Merge "Revert "Make dchecked_vector C++11 compatible.""
diff --git a/Android.mk b/Android.mk
index fcf70ff..4dc84c4 100644
--- a/Android.mk
+++ b/Android.mk
@@ -77,6 +77,7 @@
 # product rules
 
 include $(art_path)/runtime/Android.mk
+include $(art_path)/runtime/simulator/Android.mk
 include $(art_path)/compiler/Android.mk
 include $(art_path)/dexdump/Android.mk
 include $(art_path)/dexlist/Android.mk
@@ -85,29 +86,25 @@
 include $(art_path)/oatdump/Android.mk
 include $(art_path)/imgdiag/Android.mk
 include $(art_path)/patchoat/Android.mk
+include $(art_path)/profman/Android.mk
 include $(art_path)/dalvikvm/Android.mk
 include $(art_path)/tools/Android.mk
 include $(art_path)/tools/ahat/Android.mk
 include $(art_path)/tools/dexfuzz/Android.mk
 include $(art_path)/tools/dmtracedump/Android.mk
 include $(art_path)/sigchainlib/Android.mk
+include $(art_path)/libart_fake/Android.mk
 
 
 # ART_HOST_DEPENDENCIES depends on Android.executable.mk above for ART_HOST_EXECUTABLES
 ART_HOST_DEPENDENCIES := \
-	$(ART_HOST_EXECUTABLES) \
-	$(HOST_OUT_JAVA_LIBRARIES)/core-libart-hostdex.jar \
-	$(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
+  $(ART_HOST_EXECUTABLES) \
+  $(ART_HOST_DEX_DEPENDENCIES) \
+  $(ART_HOST_SHARED_LIBRARY_DEPENDENCIES)
 ART_TARGET_DEPENDENCIES := \
-	$(ART_TARGET_EXECUTABLES) \
-	$(TARGET_OUT_JAVA_LIBRARIES)/core-libart.jar \
-	$(TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
-ifdef TARGET_2ND_ARCH
-ART_TARGET_DEPENDENCIES += $(2ND_TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
-endif
-ifdef HOST_2ND_ARCH
-ART_HOST_DEPENDENCIES += $(2ND_HOST_OUT_SHARED_LIBRARIES)/libjavacore.so
-endif
+  $(ART_TARGET_EXECUTABLES) \
+  $(ART_TARGET_DEX_DEPENDENCIES) \
+  $(ART_TARGET_SHARED_LIBRARY_DEPENDENCIES)
 
 ########################################################################
 # test rules
@@ -122,6 +119,16 @@
 include $(art_path)/test/Android.run-test.mk
 include $(art_path)/benchmark/Android.mk
 
+TEST_ART_ADB_ROOT_AND_REMOUNT := \
+    (adb root && \
+     adb wait-for-device remount && \
+     ((adb shell touch /system/testfile && \
+       (adb shell rm /system/testfile || true)) || \
+      (adb disable-verity && \
+       adb reboot && \
+       adb wait-for-device root && \
+       adb wait-for-device remount)))
+
 # Sync test files to the target, depends upon all things that must be pushed to the target.
 .PHONY: test-art-target-sync
 # Check if we need to sync. In case ART_TEST_ANDROID_ROOT is not empty,
@@ -130,14 +137,16 @@
 ifneq ($(ART_TEST_NO_SYNC),true)
 ifeq ($(ART_TEST_ANDROID_ROOT),)
 test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS)
-	adb root
-	adb wait-for-device remount
+	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
 	adb sync
 else
 test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS)
-	adb root
+	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
 	adb wait-for-device push $(ANDROID_PRODUCT_OUT)/system $(ART_TEST_ANDROID_ROOT)
-	adb push $(ANDROID_PRODUCT_OUT)/data /data
+# Push the contents of the `data` dir into `/data` on the device.  If
+# `/data` already exists on the device, it is not overwritten, but its
+# contents are updated.
+	adb push $(ANDROID_PRODUCT_OUT)/data /
 endif
 endif
 
@@ -246,9 +255,17 @@
 test-art-host-dexdump: $(addprefix $(HOST_OUT_EXECUTABLES)/, dexdump2 dexlist)
 	ANDROID_HOST_OUT=$(realpath $(HOST_OUT)) art/test/dexdump/run-all-tests
 
-# Valgrind. Currently only 32b gtests.
+# Valgrind.
 .PHONY: valgrind-test-art-host
-valgrind-test-art-host: valgrind-test-art-host-gtest32
+valgrind-test-art-host: valgrind-test-art-host-gtest
+	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
+
+.PHONY: valgrind-test-art-host32
+valgrind-test-art-host32: valgrind-test-art-host-gtest32
+	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
+
+.PHONY: valgrind-test-art-host64
+valgrind-test-art-host64: valgrind-test-art-host-gtest64
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 ########################################################################
@@ -327,6 +344,19 @@
 
 endif  # art_test_bother
 
+# Valgrind.
+.PHONY: valgrind-test-art-target
+valgrind-test-art-target: valgrind-test-art-target-gtest
+	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
+
+.PHONY: valgrind-test-art-target32
+valgrind-test-art-target32: valgrind-test-art-target-gtest32
+	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
+
+.PHONY: valgrind-test-art-target64
+valgrind-test-art-target64: valgrind-test-art-target-gtest64
+	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
+
 ########################################################################
 # oat-target and oat-target-sync rules
 
@@ -374,10 +404,38 @@
 
 .PHONY: oat-target-sync
 oat-target-sync: oat-target
-	adb root
-	adb wait-for-device remount
+	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
 	adb sync
 
+####################################################################################################
+# Fake packages to ensure generation of libopenjdkd when one builds with mm/mmm/mmma.
+#
+# The library is required for starting a runtime in debug mode, but libartd does not depend on it
+# (dependency cycle otherwise).
+#
+# Note: * As the package is phony to create a dependency the package name is irrelevant.
+#       * We make MULTILIB explicit to "both," just to state here that we want both libraries on
+#         64-bit systems, even if it is the default.
+
+# ART on the host.
+ifeq ($(ART_BUILD_HOST_DEBUG),true)
+include $(CLEAR_VARS)
+LOCAL_MODULE := art-libartd-libopenjdkd-host-dependency
+LOCAL_MULTILIB := both
+LOCAL_REQUIRED_MODULES := libopenjdkd
+LOCAL_IS_HOST_MODULE := true
+include $(BUILD_PHONY_PACKAGE)
+endif
+
+# ART on the target.
+ifeq ($(ART_BUILD_TARGET_DEBUG),true)
+include $(CLEAR_VARS)
+LOCAL_MODULE := art-libartd-libopenjdkd-target-dependency
+LOCAL_MULTILIB := both
+LOCAL_REQUIRED_MODULES := libopenjdkd
+include $(BUILD_PHONY_PACKAGE)
+endif
+
 ########################################################################
 # "m build-art" for quick minimal build
 .PHONY: build-art
@@ -393,10 +451,10 @@
 # Rules for building all dependencies for tests.
 
 .PHONY: build-art-host-tests
-build-art-host-tests:   build-art-host $(TEST_ART_RUN_TEST_DEPENDENCIES) $(ART_TEST_HOST_RUN_TEST_DEPENDENCIES) $(ART_TEST_HOST_GTEST_DEPENDENCIES)
+build-art-host-tests:   build-art-host $(TEST_ART_RUN_TEST_DEPENDENCIES) $(ART_TEST_HOST_RUN_TEST_DEPENDENCIES) $(ART_TEST_HOST_GTEST_DEPENDENCIES) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
 
 .PHONY: build-art-target-tests
-build-art-target-tests:   build-art-target $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TEST_ART_TARGET_SYNC_DEPS)
+build-art-target-tests:   build-art-target $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TEST_ART_TARGET_SYNC_DEPS) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
 
 ########################################################################
 # targets to switch back and forth from libdvm to libart
@@ -496,3 +554,13 @@
 art_dont_bother :=
 art_test_bother :=
 TEST_ART_TARGET_SYNC_DEPS :=
+
+include $(art_path)/runtime/openjdkjvm/Android.mk
+include $(art_path)/runtime/openjdkjvmti/Android.mk
+
+# Helper target that depends on boot image creation.
+#
+# Can be used, for example, to dump initialization failures:
+#   m art-boot-image ART_BOOT_IMAGE_EXTRA_ARGS=--dump-init-failures=fails.txt
+.PHONY: art-boot-image
+art-boot-image: $(DEFAULT_DEX_PREOPT_BUILT_IMAGE_FILENAME)
diff --git a/NOTICE b/NOTICE
index d27f6a6..d79b004 100644
--- a/NOTICE
+++ b/NOTICE
@@ -262,5 +262,3 @@
 pyyaml tests        llvm/test/YAMLParser/{*.data, LICENSE.TXT}
 ARM contributions   llvm/lib/Target/ARM/LICENSE.TXT
 md5 contributions   llvm/lib/Support/MD5.cpp llvm/include/llvm/Support/MD5.h
-
--------------------------------------------------------------------
diff --git a/benchmark/Android.mk b/benchmark/Android.mk
index a4a603a..2360bcc 100644
--- a/benchmark/Android.mk
+++ b/benchmark/Android.mk
@@ -45,7 +45,7 @@
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
   ifeq ($$(art_target_or_host),target)
-    $(call set-target-local-clang-vars)
+    LOCAL_CLANG := $(ART_TARGET_CLANG)
     $(call set-target-local-cflags-vars,debug)
     LOCAL_SHARED_LIBRARIES += libdl
     LOCAL_MULTILIB := both
@@ -56,8 +56,8 @@
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS)
-    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
-    LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
+    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS) $(ART_HOST_DEBUG_ASFLAGS)
+    LOCAL_LDLIBS := -ldl -lpthread
     LOCAL_IS_HOST_MODULE := true
     LOCAL_MULTILIB := both
     include $(BUILD_HOST_SHARED_LIBRARY)
diff --git a/benchmark/jni-perf/src/JniPerfBenchmark.java b/benchmark/jni-perf/src/JniPerfBenchmark.java
index b1b21ce..1e7cc2b 100644
--- a/benchmark/jni-perf/src/JniPerfBenchmark.java
+++ b/benchmark/jni-perf/src/JniPerfBenchmark.java
@@ -14,9 +14,7 @@
  * limitations under the License.
  */
 
-import com.google.caliper.SimpleBenchmark;
-
-public class JniPerfBenchmark extends SimpleBenchmark {
+public class JniPerfBenchmark {
   private static final String MSG = "ABCDE";
 
   native void perfJniEmptyCall();
diff --git a/benchmark/jobject-benchmark/src/JObjectBenchmark.java b/benchmark/jobject-benchmark/src/JObjectBenchmark.java
index f4c059c..90a53b3 100644
--- a/benchmark/jobject-benchmark/src/JObjectBenchmark.java
+++ b/benchmark/jobject-benchmark/src/JObjectBenchmark.java
@@ -14,9 +14,7 @@
  * limitations under the License.
  */
 
-import com.google.caliper.SimpleBenchmark;
-
-public class JObjectBenchmark extends SimpleBenchmark {
+public class JObjectBenchmark {
   public JObjectBenchmark() {
     // Make sure to link methods before benchmark starts.
     System.loadLibrary("artbenchmark");
diff --git a/benchmark/scoped-primitive-array/src/ScopedPrimitiveArrayBenchmark.java b/benchmark/scoped-primitive-array/src/ScopedPrimitiveArrayBenchmark.java
index be276fe..0ad9c36 100644
--- a/benchmark/scoped-primitive-array/src/ScopedPrimitiveArrayBenchmark.java
+++ b/benchmark/scoped-primitive-array/src/ScopedPrimitiveArrayBenchmark.java
@@ -14,9 +14,7 @@
  * limitations under the License.
  */
 
-import com.google.caliper.SimpleBenchmark; 
-
-public class ScopedPrimitiveArrayBenchmark extends SimpleBenchmark {
+public class ScopedPrimitiveArrayBenchmark {
   // Measure adds the first and last element of the array by using ScopedPrimitiveArray.
   static native long measureByteArray(int reps, byte[] arr);
   static native long measureShortArray(int reps, short[] arr);
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 6952d69..6befec5 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -63,7 +63,11 @@
     ART_TARGET_ARCH_64 := $(TARGET_ARCH)
   else
     # TODO: ???
-    $(error Do not know what to do with this multi-target configuration!)
+    $(warning Do not know what to do with this multi-target configuration!)
+    ART_PHONY_TEST_TARGET_SUFFIX := 32
+    2ND_ART_PHONY_TEST_TARGET_SUFFIX :=
+    ART_TARGET_ARCH_32 := $(TARGET_ARCH)
+    ART_TARGET_ARCH_64 :=
   endif
 else
   ifneq ($(filter %64,$(TARGET_ARCH)),)
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index cd9d18d..c8e3654 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -34,7 +34,7 @@
 ART_BUILD_HOST_NDEBUG ?= true
 ART_BUILD_HOST_DEBUG ?= true
 
-# Set this to change what opt level Art is built at.
+# Set this to change what opt level ART is built at.
 ART_DEBUG_OPT_FLAG ?= -O2
 ART_NDEBUG_OPT_FLAG ?= -O3
 
@@ -76,135 +76,20 @@
 endif
 
 #
-# Used to enable JIT
-#
-ART_JIT := false
-ifneq ($(wildcard art/JIT_ART),)
-$(info Enabling ART_JIT because of existence of art/JIT_ART)
-ART_JIT := true
-endif
-ifeq ($(WITH_ART_JIT), true)
-ART_JIT := true
-endif
-
-#
 # Used to change the default GC. Valid values are CMS, SS, GSS. The default is CMS.
 #
 ART_DEFAULT_GC_TYPE ?= CMS
 art_default_gc_type_cflags := -DART_DEFAULT_GC_TYPE_IS_$(ART_DEFAULT_GC_TYPE)
 
-ART_HOST_CFLAGS :=
-ART_TARGET_CFLAGS :=
-
-ART_HOST_ASFLAGS :=
-ART_TARGET_ASFLAGS :=
-
-# Clang build support.
-
-# Host.
-ART_HOST_CLANG := false
-ifneq ($(WITHOUT_HOST_CLANG),true)
-  # By default, host builds use clang for better warnings.
-  ART_HOST_CLANG := true
-endif
-
-# Clang on the target. Target builds use GCC by default.
-ifneq ($(USE_CLANG_PLATFORM_BUILD),)
-ART_TARGET_CLANG := $(USE_CLANG_PLATFORM_BUILD)
-else
-ART_TARGET_CLANG := false
-endif
-# b/25130937
-ART_TARGET_CLANG_arm := false
-ART_TARGET_CLANG_arm64 :=
-ART_TARGET_CLANG_mips :=
-ART_TARGET_CLANG_mips64 :=
-ART_TARGET_CLANG_x86 :=
-ART_TARGET_CLANG_x86_64 :=
-
-define set-target-local-clang-vars
-    LOCAL_CLANG := $(ART_TARGET_CLANG)
-    $(foreach arch,$(ART_TARGET_SUPPORTED_ARCH),
-      ifneq ($$(ART_TARGET_CLANG_$(arch)),)
-        LOCAL_CLANG_$(arch) := $$(ART_TARGET_CLANG_$(arch))
-      endif)
-endef
-
-ART_TARGET_CLANG_CFLAGS :=
-ART_TARGET_CLANG_CFLAGS_arm :=
-ART_TARGET_CLANG_CFLAGS_arm64 :=
-ART_TARGET_CLANG_CFLAGS_mips :=
-ART_TARGET_CLANG_CFLAGS_mips64 :=
-ART_TARGET_CLANG_CFLAGS_x86 :=
-ART_TARGET_CLANG_CFLAGS_x86_64 :=
-
-# Warn about thread safety violations with clang.
-art_clang_cflags := -Wthread-safety -Wthread-safety-negative
-
-# Warn if switch fallthroughs aren't annotated.
-art_clang_cflags += -Wimplicit-fallthrough
-
-# Enable float equality warnings.
-art_clang_cflags += -Wfloat-equal
-
-# Enable warning of converting ints to void*.
-art_clang_cflags += -Wint-to-void-pointer-cast
-
-# Enable warning of wrong unused annotations.
-art_clang_cflags += -Wused-but-marked-unused
-
-# Enable warning for deprecated language features.
-art_clang_cflags += -Wdeprecated
-
-# Enable warning for unreachable break & return.
-art_clang_cflags += -Wunreachable-code-break -Wunreachable-code-return
-
-# Enable missing-noreturn only on non-Mac. As lots of things are not implemented for Apple, it's
-# a pain.
-ifneq ($(HOST_OS),darwin)
-  art_clang_cflags += -Wmissing-noreturn
-endif
-
-
-# GCC-only warnings.
-art_gcc_cflags := -Wunused-but-set-parameter
-# Suggest const: too many false positives, but good for a trial run.
-#                  -Wsuggest-attribute=const
-# Useless casts: too many, as we need to be 32/64 agnostic, but the compiler knows.
-#                  -Wuseless-cast
-# Zero-as-null: Have to convert all NULL and "diagnostic ignore" all includes like libnativehelper
-# that are still stuck pre-C++11.
-#                  -Wzero-as-null-pointer-constant \
-# Suggest final: Have to move to a more recent GCC.
-#                  -Wsuggest-final-types
-
-ART_TARGET_CLANG_CFLAGS := $(art_clang_cflags)
-ifeq ($(ART_HOST_CLANG),true)
-  # Bug: 15446488. We don't omit the frame pointer to work around
-  # clang/libunwind bugs that cause SEGVs in run-test-004-ThreadStress.
-  ART_HOST_CFLAGS += $(art_clang_cflags) -fno-omit-frame-pointer
-else
-  ART_HOST_CFLAGS += $(art_gcc_cflags)
-endif
-ifneq ($(ART_TARGET_CLANG),true)
-  ART_TARGET_CFLAGS += $(art_gcc_cflags)
-else
-  # TODO: if we ever want to support GCC/Clang mix for multi-target products, this needs to be
-  #       split up.
-  ifeq ($(ART_TARGET_CLANG_$(TARGET_ARCH)),false)
-    ART_TARGET_CFLAGS += $(art_gcc_cflags)
-  endif
-endif
-
-# Clear local variables now their use has ended.
-art_clang_cflags :=
-art_gcc_cflags :=
+ART_HOST_CLANG := true
+ART_TARGET_CLANG := true
 
 ART_CPP_EXTENSION := .cc
 
 ART_C_INCLUDES := \
   external/gtest/include \
   external/icu/icu4c/source/common \
+  external/lz4/lib \
   external/valgrind/include \
   external/valgrind \
   external/vixl/src \
@@ -215,8 +100,41 @@
 # Note: technically we only need this on device, but this avoids the duplication of the includes.
 ART_C_INCLUDES += bionic/libc/private
 
+art_cflags :=
+
+# Warn about thread safety violations with clang.
+art_cflags += -Wthread-safety -Wthread-safety-negative
+
+# Warn if switch fallthroughs aren't annotated.
+art_cflags += -Wimplicit-fallthrough
+
+# Enable float equality warnings.
+art_cflags += -Wfloat-equal
+
+# Enable warning of converting ints to void*.
+art_cflags += -Wint-to-void-pointer-cast
+
+# Enable warning of wrong unused annotations.
+art_cflags += -Wused-but-marked-unused
+
+# Enable warning for deprecated language features.
+art_cflags += -Wdeprecated
+
+# Enable warning for unreachable break & return.
+art_cflags += -Wunreachable-code-break -Wunreachable-code-return
+
+# Bug: http://b/29823425  Disable -Wconstant-conversion and
+# -Wundefined-var-template for Clang update to r271374
+art_cflags += -Wno-constant-conversion -Wno-undefined-var-template
+
+# Enable missing-noreturn only on non-Mac. As lots of things are not implemented for Apple, it's
+# a pain.
+ifneq ($(HOST_OS),darwin)
+  art_cflags += -Wmissing-noreturn
+endif
+
 # Base set of cflags used by all things ART.
-art_cflags := \
+art_cflags += \
   -fno-rtti \
   -std=gnu++11 \
   -ggdb3 \
@@ -234,24 +152,19 @@
 
 # The architectures the compiled tools are able to run on. Setting this to 'all' will cause all
 # architectures to be included.
-ART_TARGET_CODEGEN_ARCHS ?= all
+ART_TARGET_CODEGEN_ARCHS ?= svelte
 ART_HOST_CODEGEN_ARCHS ?= all
 
 ifeq ($(ART_TARGET_CODEGEN_ARCHS),all)
   ART_TARGET_CODEGEN_ARCHS := $(sort $(ART_TARGET_SUPPORTED_ARCH) $(ART_HOST_SUPPORTED_ARCH))
-  # We need to handle the fact that some compiler tests mix code from different architectures.
-  ART_TARGET_COMPILER_TESTS ?= true
 else
-  ART_TARGET_COMPILER_TESTS := false
   ifeq ($(ART_TARGET_CODEGEN_ARCHS),svelte)
     ART_TARGET_CODEGEN_ARCHS := $(sort $(ART_TARGET_ARCH_64) $(ART_TARGET_ARCH_32))
   endif
 endif
 ifeq ($(ART_HOST_CODEGEN_ARCHS),all)
   ART_HOST_CODEGEN_ARCHS := $(sort $(ART_TARGET_SUPPORTED_ARCH) $(ART_HOST_SUPPORTED_ARCH))
-  ART_HOST_COMPILER_TESTS ?= true
 else
-  ART_HOST_COMPILER_TESTS := false
   ifeq ($(ART_HOST_CODEGEN_ARCHS),svelte)
     ART_HOST_CODEGEN_ARCHS := $(sort $(ART_TARGET_CODEGEN_ARCHS) $(ART_HOST_ARCH_64) $(ART_HOST_ARCH_32))
   endif
@@ -296,8 +209,8 @@
 ifdef ART_IMT_SIZE
   art_cflags += -DIMT_SIZE=$(ART_IMT_SIZE)
 else
-  # Default is 64
-  art_cflags += -DIMT_SIZE=64
+  # Default is 43
+  art_cflags += -DIMT_SIZE=43
 endif
 
 ifeq ($(ART_HEAP_POISONING),true)
@@ -316,6 +229,10 @@
   art_cflags += -DART_READ_BARRIER_TYPE_IS_$(ART_READ_BARRIER_TYPE)=1
   art_asflags += -DART_USE_READ_BARRIER=1
   art_asflags += -DART_READ_BARRIER_TYPE_IS_$(ART_READ_BARRIER_TYPE)=1
+
+  # Temporarily override -fstack-protector-strong with -fstack-protector to avoid a major
+  # slowdown with the read barrier config. b/26744236.
+  art_cflags += -fstack-protector
 endif
 
 ifeq ($(ART_USE_TLAB),true)
@@ -333,48 +250,108 @@
   -DVIXL_DEBUG \
   -UNDEBUG
 
+# Assembler flags for non-debug ART and ART tools.
+art_non_debug_asflags :=
+
+# Assembler flags for debug ART and ART tools.
+art_debug_asflags := -UNDEBUG
+
 art_host_non_debug_cflags := $(art_non_debug_cflags)
 art_target_non_debug_cflags := $(art_non_debug_cflags)
 
+###
+# Frame size
+###
+
+# Size of the stack-overflow gap.
+ART_STACK_OVERFLOW_GAP_arm := 8192
+ART_STACK_OVERFLOW_GAP_arm64 := 8192
+ART_STACK_OVERFLOW_GAP_mips := 16384
+ART_STACK_OVERFLOW_GAP_mips64 := 16384
+ART_STACK_OVERFLOW_GAP_x86 := 8192
+ART_STACK_OVERFLOW_GAP_x86_64 := 8192
+ART_COMMON_STACK_OVERFLOW_DEFINES := \
+  -DART_STACK_OVERFLOW_GAP_arm=$(ART_STACK_OVERFLOW_GAP_arm) \
+  -DART_STACK_OVERFLOW_GAP_arm64=$(ART_STACK_OVERFLOW_GAP_arm64) \
+  -DART_STACK_OVERFLOW_GAP_mips=$(ART_STACK_OVERFLOW_GAP_mips) \
+  -DART_STACK_OVERFLOW_GAP_mips64=$(ART_STACK_OVERFLOW_GAP_mips64) \
+  -DART_STACK_OVERFLOW_GAP_x86=$(ART_STACK_OVERFLOW_GAP_x86) \
+  -DART_STACK_OVERFLOW_GAP_x86_64=$(ART_STACK_OVERFLOW_GAP_x86_64) \
+
+# Keep these as small as possible. We have separate values as we have some host vs target
+# specific code (and previously GCC vs Clang).
+ART_HOST_FRAME_SIZE_LIMIT := 1736
+ART_TARGET_FRAME_SIZE_LIMIT := 1736
+
+# Frame size adaptations for instrumented builds.
+ifdef SANITIZE_TARGET
+  ART_TARGET_FRAME_SIZE_LIMIT := 6400
+endif
+
+# Add frame-size checks for non-debug builds.
 ifeq ($(HOST_OS),linux)
-  # Larger frame-size for host clang builds today
   ifneq ($(ART_COVERAGE),true)
     ifneq ($(NATIVE_COVERAGE),true)
-      art_host_non_debug_cflags += -Wframe-larger-than=2700
-      ifdef SANITIZE_TARGET
-        art_target_non_debug_cflags += -Wframe-larger-than=6400
-      else
-        art_target_non_debug_cflags += -Wframe-larger-than=1728
-      endif
+      art_host_non_debug_cflags += -Wframe-larger-than=$(ART_HOST_FRAME_SIZE_LIMIT)
+      art_target_non_debug_cflags += -Wframe-larger-than=$(ART_TARGET_FRAME_SIZE_LIMIT)
     endif
   endif
 endif
 
+
+ART_HOST_CFLAGS := $(art_cflags)
+ART_TARGET_CFLAGS := $(art_cflags)
+
+ART_HOST_ASFLAGS := $(art_asflags)
+ART_TARGET_ASFLAGS := $(art_asflags)
+
+# Bug: 15446488. We don't omit the frame pointer to work around
+# clang/libunwind bugs that cause SEGVs in run-test-004-ThreadStress.
+ART_HOST_CFLAGS += -fno-omit-frame-pointer
+
 ifndef LIBART_IMG_HOST_BASE_ADDRESS
   $(error LIBART_IMG_HOST_BASE_ADDRESS unset)
 endif
-ART_HOST_CFLAGS += $(art_cflags) -DART_BASE_ADDRESS=$(LIBART_IMG_HOST_BASE_ADDRESS)
-ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default $(art_host_cflags)
-ART_HOST_ASFLAGS += $(art_asflags)
+ART_HOST_CFLAGS += -DART_BASE_ADDRESS=$(LIBART_IMG_HOST_BASE_ADDRESS)
+ART_HOST_CFLAGS += $(art_host_cflags)
 
-# The latest clang update trips over many of the files in art and never finishes
-# compiling for aarch64 with -O3 (or -O2). Drop back to -O1 while we investigate
-# to stop punishing the build server.
-# Bug: http://b/23256622
-ART_TARGET_CLANG_CFLAGS_arm64 += -O1
+ART_HOST_CFLAGS += -DART_FRAME_SIZE_LIMIT=$(ART_HOST_FRAME_SIZE_LIMIT) \
+                   $(ART_COMMON_STACK_OVERFLOW_DEFINES)
+
 
 ifndef LIBART_IMG_TARGET_BASE_ADDRESS
   $(error LIBART_IMG_TARGET_BASE_ADDRESS unset)
 endif
-ART_TARGET_CFLAGS += $(art_cflags) -DART_TARGET -DART_BASE_ADDRESS=$(LIBART_IMG_TARGET_BASE_ADDRESS)
+
+ART_TARGET_CFLAGS += -DART_TARGET \
+                     -DART_BASE_ADDRESS=$(LIBART_IMG_TARGET_BASE_ADDRESS) \
+
+ART_TARGET_CFLAGS += -DART_FRAME_SIZE_LIMIT=$(ART_TARGET_FRAME_SIZE_LIMIT) \
+                     $(ART_COMMON_STACK_OVERFLOW_DEFINES)
+
+ifeq ($(ART_TARGET_LINUX),true)
+# Setting ART_TARGET_LINUX to true compiles art/ assuming that the target device
+# will be running linux rather than android.
+ART_TARGET_CFLAGS += -DART_TARGET_LINUX
+else
+# The ART_TARGET_ANDROID macro is passed to target builds, which check
+# against it instead of against __ANDROID__ (which is provided by target
+# toolchains).
+ART_TARGET_CFLAGS += -DART_TARGET_ANDROID
+endif
+
 ART_TARGET_CFLAGS += $(art_target_cflags)
-ART_TARGET_ASFLAGS += $(art_asflags)
 
 ART_HOST_NON_DEBUG_CFLAGS := $(art_host_non_debug_cflags)
 ART_TARGET_NON_DEBUG_CFLAGS := $(art_target_non_debug_cflags)
 ART_HOST_DEBUG_CFLAGS := $(art_debug_cflags)
 ART_TARGET_DEBUG_CFLAGS := $(art_debug_cflags)
 
+ART_HOST_NON_DEBUG_ASFLAGS := $(art_non_debug_asflags)
+ART_TARGET_NON_DEBUG_ASFLAGS := $(art_non_debug_asflags)
+ART_HOST_DEBUG_ASFLAGS := $(art_debug_asflags)
+ART_TARGET_DEBUG_ASFLAGS := $(art_debug_asflags)
+
 ifndef LIBART_IMG_HOST_MIN_BASE_ADDRESS_DELTA
   LIBART_IMG_HOST_MIN_BASE_ADDRESS_DELTA=-0x1000000
 endif
@@ -403,35 +380,28 @@
 art_target_cflags :=
 art_debug_cflags :=
 art_non_debug_cflags :=
+art_debug_asflags :=
+art_non_debug_asflags :=
 art_host_non_debug_cflags :=
 art_target_non_debug_cflags :=
 art_default_gc_type_cflags :=
 
-ART_HOST_LDLIBS :=
-ifneq ($(ART_HOST_CLANG),true)
-  # GCC lacks libc++ assumed atomic operations, grab via libatomic.
-  ART_HOST_LDLIBS += -latomic
-endif
-
 ART_TARGET_LDFLAGS :=
 
 # $(1): ndebug_or_debug
 define set-target-local-cflags-vars
   LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
-  LOCAL_CFLAGS_x86 += $(ART_TARGET_CFLAGS_x86)
   LOCAL_ASFLAGS += $(ART_TARGET_ASFLAGS)
   LOCAL_LDFLAGS += $(ART_TARGET_LDFLAGS)
   art_target_cflags_ndebug_or_debug := $(1)
   ifeq ($$(art_target_cflags_ndebug_or_debug),debug)
     LOCAL_CFLAGS += $(ART_TARGET_DEBUG_CFLAGS)
+    LOCAL_ASFLAGS += $(ART_TARGET_DEBUG_ASFLAGS)
   else
     LOCAL_CFLAGS += $(ART_TARGET_NON_DEBUG_CFLAGS)
+    LOCAL_ASFLAGS += $(ART_TARGET_NON_DEBUG_ASFLAGS)
   endif
 
-  LOCAL_CLANG_CFLAGS := $(ART_TARGET_CLANG_CFLAGS)
-  $(foreach arch,$(ART_TARGET_SUPPORTED_ARCH),
-    LOCAL_CLANG_CFLAGS_$(arch) += $$(ART_TARGET_CLANG_CFLAGS_$(arch)))
-
   # Clear locally used variables.
   art_target_cflags_ndebug_or_debug :=
 endef
diff --git a/build/Android.common_path.mk b/build/Android.common_path.mk
index c53479c..86bb475 100644
--- a/build/Android.common_path.mk
+++ b/build/Android.common_path.mk
@@ -38,7 +38,7 @@
 ifneq ($(TMPDIR),)
 ART_HOST_TEST_DIR := $(TMPDIR)/test-art-$(shell echo $$PPID)
 else
-ART_HOST_TEST_DIR := /tmp/test-art-$(shell echo $$PPID)
+ART_HOST_TEST_DIR := /tmp/$(USER)/test-art-$(shell echo $$PPID)
 endif
 
 # core.oat location on the device.
@@ -80,20 +80,37 @@
 TARGET_CORE_IMG_LOCATION := $(ART_TARGET_TEST_OUT)/core.art
 
 # Jar files for core.art.
-TARGET_CORE_JARS := core-libart conscrypt okhttp bouncycastle
+TARGET_CORE_JARS := core-oj core-libart conscrypt okhttp bouncycastle apache-xml
 HOST_CORE_JARS := $(addsuffix -hostdex,$(TARGET_CORE_JARS))
 
 HOST_CORE_DEX_LOCATIONS   := $(foreach jar,$(HOST_CORE_JARS),  $(HOST_OUT_JAVA_LIBRARIES)/$(jar).jar)
+ifeq ($(ART_TEST_ANDROID_ROOT),)
 TARGET_CORE_DEX_LOCATIONS := $(foreach jar,$(TARGET_CORE_JARS),/$(DEXPREOPT_BOOT_JAR_DIR)/$(jar).jar)
+else
+TARGET_CORE_DEX_LOCATIONS := $(foreach jar,$(TARGET_CORE_JARS),$(ART_TEST_ANDROID_ROOT)/framework/$(jar).jar)
+endif
 
 HOST_CORE_DEX_FILES   := $(foreach jar,$(HOST_CORE_JARS),  $(call intermediates-dir-for,JAVA_LIBRARIES,$(jar),t,COMMON)/javalib.jar)
 TARGET_CORE_DEX_FILES := $(foreach jar,$(TARGET_CORE_JARS),$(call intermediates-dir-for,JAVA_LIBRARIES,$(jar), ,COMMON)/javalib.jar)
 
-ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
 # Classpath for Jack compilation: we only need core-libart.
-HOST_JACK_CLASSPATH_DEPENDENCIES   := $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart-hostdex,t,COMMON)/classes.jack
-HOST_JACK_CLASSPATH                := $(foreach dep,$(HOST_JACK_CLASSPATH_DEPENDENCIES),$(abspath $(dep)))
-TARGET_JACK_CLASSPATH_DEPENDENCIES := $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart, ,COMMON)/classes.jack
-TARGET_JACK_CLASSPATH              := $(foreach dep,$(TARGET_JACK_CLASSPATH_DEPENDENCIES),$(abspath $(dep)))
+HOST_JACK_CLASSPATH_DEPENDENCIES   := $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj-hostdex,t,COMMON)/classes.jack $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart-hostdex,t,COMMON)/classes.jack
+HOST_JACK_CLASSPATH                := $(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj-hostdex,t,COMMON)/classes.jack):$(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart-hostdex,t,COMMON)/classes.jack)
+TARGET_JACK_CLASSPATH_DEPENDENCIES := $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj, ,COMMON)/classes.jack $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart, ,COMMON)/classes.jack
+TARGET_JACK_CLASSPATH              := $(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj, ,COMMON)/classes.jack):$(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart, ,COMMON)/classes.jack)
+
+ART_HOST_DEX_DEPENDENCIES := $(foreach jar,$(HOST_CORE_JARS),$(HOST_OUT_JAVA_LIBRARIES)/$(jar).jar)
+ART_TARGET_DEX_DEPENDENCIES := $(foreach jar,$(TARGET_CORE_JARS),$(TARGET_OUT_JAVA_LIBRARIES)/$(jar).jar)
+
+ART_CORE_SHARED_LIBRARIES := libjavacore libopenjdk libopenjdkjvm libopenjdkjvmti
+ART_HOST_SHARED_LIBRARY_DEPENDENCIES := $(foreach lib,$(ART_CORE_SHARED_LIBRARIES), $(ART_HOST_OUT_SHARED_LIBRARIES)/$(lib)$(ART_HOST_SHLIB_EXTENSION))
+ifdef HOST_2ND_ARCH
+ART_HOST_SHARED_LIBRARY_DEPENDENCIES += $(foreach lib,$(ART_CORE_SHARED_LIBRARIES), $(2ND_HOST_OUT_SHARED_LIBRARIES)/$(lib).so)
 endif
+
+ART_TARGET_SHARED_LIBRARY_DEPENDENCIES := $(foreach lib,$(ART_CORE_SHARED_LIBRARIES), $(TARGET_OUT_SHARED_LIBRARIES)/$(lib).so)
+ifdef TARGET_2ND_ARCH
+ART_TARGET_SHARED_LIBRARY_DEPENDENCIES += $(foreach lib,$(ART_CORE_SHARED_LIBRARIES), $(2ND_TARGET_OUT_SHARED_LIBRARIES)/$(lib).so)
+endif
+
 endif # ART_ANDROID_COMMON_PATH_MK
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index 2f43f5f..93e310e 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -27,6 +27,10 @@
 # rule name such as test-art-host-oat-optimizing-HelloWorld64.
 ART_TEST_KNOWN_BROKEN :=
 
+# List of run-tests to skip running in any configuration. This needs to be the full name of the
+# run-test such as '457-regs'.
+ART_TEST_RUN_TEST_SKIP ?=
+
 # Failing valgrind tests.
 # Note: *all* 64b tests involving the runtime do not work currently. b/15170219.
 
@@ -40,8 +44,8 @@
 # Do you want all tests, even those that are time consuming?
 ART_TEST_FULL ?= false
 
-# Do you want default compiler tests run?
-ART_TEST_DEFAULT_COMPILER ?= true
+# Do you want run-test to be quieter? run-tests will only show output if they fail.
+ART_TEST_QUIET ?= true
 
 # Do you want interpreter tests run?
 ART_TEST_INTERPRETER ?= $(ART_TEST_FULL)
@@ -51,7 +55,10 @@
 ART_TEST_JIT ?= $(ART_TEST_FULL)
 
 # Do you want optimizing compiler tests run?
-ART_TEST_OPTIMIZING ?= $(ART_TEST_FULL)
+ART_TEST_OPTIMIZING ?= true
+
+# Do you want to test the optimizing compiler with graph coloring register allocation?
+ART_TEST_OPTIMIZING_GRAPH_COLOR ?= $(ART_TEST_FULL)
 
 # Do we want to test a PIC-compiled core image?
 ART_TEST_PIC_IMAGE ?= $(ART_TEST_FULL)
@@ -107,6 +114,9 @@
 # Do you want run-tests with the --debuggable flag
 ART_TEST_RUN_TEST_DEBUGGABLE ?= $(ART_TEST_FULL)
 
+# Do you want to test multi-part boot-image functionality?
+ART_TEST_RUN_TEST_MULTI_IMAGE ?= $(ART_TEST_FULL)
+
 # Define the command run on test failure. $(1) is the name of the test. Executed by the shell.
 define ART_TEST_FAILED
   ( [ -f $(ART_HOST_TEST_DIR)/skipped/$(1) ] || \
@@ -116,12 +126,25 @@
         || (echo -e "$(1) \e[91mFAILED\e[0m" >&2 )))
 endef
 
+ifeq ($(ART_TEST_QUIET),true)
+  ART_TEST_ANNOUNCE_PASS := ( true )
+  ART_TEST_ANNOUNCE_RUN := ( true )
+  ART_TEST_ANNOUNCE_SKIP_FAILURE := ( true )
+  ART_TEST_ANNOUNCE_SKIP_BROKEN := ( true )
+else
+  # Note the use of '=' and not ':=' is intentional since these are actually functions.
+  ART_TEST_ANNOUNCE_PASS = ( echo -e "$(1) \e[92mPASSED\e[0m" )
+  ART_TEST_ANNOUNCE_RUN = ( echo -e "$(1) \e[95mRUNNING\e[0m")
+  ART_TEST_ANNOUNCE_SKIP_FAILURE = ( echo -e "$(1) \e[93mSKIPPING DUE TO EARLIER FAILURE\e[0m" )
+  ART_TEST_ANNOUNCE_SKIP_BROKEN = ( echo -e "$(1) \e[93mSKIPPING BROKEN TEST\e[0m" )
+endif
+
 # Define the command run on test success. $(1) is the name of the test. Executed by the shell.
 # The command checks prints "PASSED" then checks to see if this was a top-level make target (e.g.
 # "mm test-art-host-oat-HelloWorld32"), if it was then it does nothing, otherwise it creates a file
 # to be printed in the passing test summary.
 define ART_TEST_PASSED
-  ( echo -e "$(1) \e[92mPASSED\e[0m" && \
+  ( $(call ART_TEST_ANNOUNCE_PASS,$(1)) && \
     (echo $(MAKECMDGOALS) | grep -q $(1) || \
       (mkdir -p $(ART_HOST_TEST_DIR)/passed/ && touch $(ART_HOST_TEST_DIR)/passed/$(1))))
 endef
@@ -150,11 +173,11 @@
 define ART_TEST_SKIP
   ((echo $(ART_TEST_KNOWN_BROKEN) | grep -q -v $(1) \
      && ([ ! -d $(ART_HOST_TEST_DIR)/failed/ ] || [ $(ART_TEST_KEEP_GOING) = true ])\
-     && echo -e "$(1) \e[95mRUNNING\e[0m") \
+     && $(call ART_TEST_ANNOUNCE_RUN,$(1)) ) \
    || ((mkdir -p $(ART_HOST_TEST_DIR)/skipped/ && touch $(ART_HOST_TEST_DIR)/skipped/$(1) \
      && ([ -d $(ART_HOST_TEST_DIR)/failed/ ] \
-       && echo -e "$(1) \e[93mSKIPPING DUE TO EARLIER FAILURE\e[0m") \
-     || echo -e "$(1) \e[93mSKIPPING BROKEN TEST\e[0m") && false))
+       && $(call ART_TEST_ANNOUNCE_SKIP_FAILURE,$(1)) ) \
+     || $(call ART_TEST_ANNOUNCE_SKIP_BROKEN,$(1)) ) && false))
 endef
 
 # Create a build rule to create the dex file for a test.
@@ -181,7 +204,6 @@
     LOCAL_MODULE_PATH := $(3)
     LOCAL_DEX_PREOPT_IMAGE_LOCATION := $(TARGET_CORE_IMG_OUT)
     ifneq ($(wildcard $(LOCAL_PATH)/$(2)/main.list),)
-      LOCAL_DX_FLAGS := --multi-dex --main-dex-list=$(LOCAL_PATH)/$(2)/main.list --minimal-main-dex
       LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp
     endif
     include $(BUILD_JAVA_LIBRARY)
@@ -197,7 +219,6 @@
     LOCAL_JAVA_LIBRARIES := $(HOST_CORE_JARS)
     LOCAL_DEX_PREOPT_IMAGE := $(HOST_CORE_IMG_LOCATION)
     ifneq ($(wildcard $(LOCAL_PATH)/$(2)/main.list),)
-      LOCAL_DX_FLAGS := --multi-dex --main-dex-list=$(LOCAL_PATH)/$(2)/main.list --minimal-main-dex
       LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp
     endif
     include $(BUILD_HOST_DALVIK_JAVA_LIBRARY)
diff --git a/build/Android.cpplint.mk b/build/Android.cpplint.mk
index 79f8f5e..03791f3 100644
--- a/build/Android.cpplint.mk
+++ b/build/Android.cpplint.mk
@@ -16,9 +16,18 @@
 
 include art/build/Android.common_build.mk
 
-ART_CPPLINT := art/tools/cpplint.py
+ART_CPPLINT := $(LOCAL_PATH)/tools/cpplint.py
 ART_CPPLINT_FILTER := --filter=-whitespace/line_length,-build/include,-readability/function,-readability/streams,-readability/todo,-runtime/references,-runtime/sizeof,-runtime/threadsafe_fn,-runtime/printf
-ART_CPPLINT_SRC := $(shell find art -name "*.h" -o -name "*$(ART_CPP_EXTENSION)" | grep -v art/compiler/llvm/generated/ | grep -v art/runtime/elf\.h)
+ART_CPPLINT_FLAGS := --quiet
+ART_CPPLINT_INGORED := \
+    runtime/elf.h \
+    runtime/openjdkjvmti/jvmti.h
+
+# This:
+#  1) Gets a list of all .h & .cc files in the art directory.
+#  2) Prepends 'art/' to each of them to make the full name.
+#  3) removes art/runtime/elf.h from the list.
+ART_CPPLINT_SRC := $(filter-out $(patsubst %,$(LOCAL_PATH)/%,$(ART_CPPLINT_INGORED)), $(addprefix $(LOCAL_PATH)/, $(call all-subdir-named-files,*.h) $(call all-subdir-named-files,*$(ART_CPP_EXTENSION))))
 
 # "mm cpplint-art" to verify we aren't regressing
 .PHONY: cpplint-art
@@ -39,8 +48,8 @@
 art_cpplint_touch := $$(OUT_CPPLINT)/$$(subst /,__,$$(art_cpplint_file))
 
 $$(art_cpplint_touch): $$(art_cpplint_file) $(ART_CPPLINT) art/build/Android.cpplint.mk
-	$(hide) $(ART_CPPLINT) $(ART_CPPLINT_FILTER) $$<
-	@mkdir -p $$(dir $$@)
+	$(hide) $(ART_CPPLINT) $(ART_CPPLINT_FLAGS) $(ART_CPPLINT_FILTER) $$<
+	$(hide) mkdir -p $$(dir $$@)
 	$(hide) touch $$@
 
 ART_CPPLINT_TARGETS += $$(art_cpplint_touch)
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index 3b2d1cc..2db16af 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -80,18 +80,19 @@
   endif
 
   ifeq ($$(art_target_or_host),target)
-    $(call set-target-local-clang-vars)
+    LOCAL_CLANG := $(ART_TARGET_CLANG)
     $(call set-target-local-cflags-vars,$(6))
     LOCAL_SHARED_LIBRARIES += libdl
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
-    LOCAL_LDLIBS := $(ART_HOST_LDLIBS)
     LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
     LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
     LOCAL_LDLIBS += -lpthread -ldl
     ifeq ($$(art_static_or_shared),static)
@@ -201,6 +202,9 @@
 # $(5): library dependencies (host only)
 # $(6): extra include directories
 # $(7): multilib (default: empty), valid values: {,32,64,both})
+# $(8): host prefer 32-bit: {true, false} (default: false).  If argument
+#       `multilib` is explicitly set to 64, ignore the "host prefer 32-bit"
+#       setting and only build a 64-bit executable on host.
 define build-art-multi-executable
   $(foreach debug_flavor,ndebug debug,
     $(foreach target_flavor,host target,
@@ -211,6 +215,7 @@
       art-multi-lib-dependencies-host := $(5)
       art-multi-include-extra := $(6)
       art-multi-multilib := $(7)
+      art-multi-host-prefer-32-bit := $(8)
 
       # Add either -host or -target specific lib dependencies to the lib dependencies.
       art-multi-lib-dependencies += $$(art-multi-lib-dependencies-$(target_flavor))
@@ -223,6 +228,14 @@
       # Build the env guard var name, e.g. ART_BUILD_HOST_NDEBUG.
       art-multi-env-guard := $$(call art-string-to-uppercase,ART_BUILD_$(target_flavor)_$(debug_flavor))
 
+      ifeq ($(target_flavor),host)
+        ifeq ($$(art-multi-host-prefer-32-bit),true)
+          ifneq ($$(art-multi-multilib),64)
+            art-multi-multilib := 32
+          endif
+        endif
+      endif
+
       # Build the art executable only if the corresponding env guard was set.
       ifeq ($$($$(art-multi-env-guard)),true)
         $$(eval $$(call build-art-executable,$$(art-multi-binary-name),$$(art-multi-source-files),$$(art-multi-lib-dependencies),$$(art-multi-include-extra),$(target_flavor),$(debug_flavor),$$(art-multi-multilib)))
@@ -236,6 +249,7 @@
       art-multi-lib-dependencies-host :=
       art-multi-include-extra :=
       art-multi-multilib :=
+      art-multi-host-prefer-32-bit :=
       art-multi-env-guard :=
     )
   )
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 6295e15..c538c4f 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -29,6 +29,7 @@
   GetMethodSignature \
   Instrumentation \
   Interfaces \
+  Lookup \
   Main \
   MultiDex \
   MultiDexModifiedSecondary \
@@ -38,6 +39,7 @@
   NonStaticLeafMethods \
   ProtoCompare \
   ProtoCompare2 \
+  ProfileTestMultiDex \
   StaticLeafMethods \
   Statics \
   StaticsFromCode \
@@ -63,35 +65,53 @@
 	$(call dexpreopt-remove-classes.dex,$@)
 
 # Dex file dependencies for each gtest.
+ART_GTEST_dex2oat_environment_tests_DEX_DEPS := Main MainStripped MultiDex MultiDexModifiedSecondary Nested
+
 ART_GTEST_class_linker_test_DEX_DEPS := Interfaces MultiDex MyClass Nested Statics StaticsFromCode
-ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods
+ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
 ART_GTEST_dex_cache_test_DEX_DEPS := Main
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested
+ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS) Statics
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
 ART_GTEST_instrumentation_test_DEX_DEPS := Instrumentation
 ART_GTEST_jni_compiler_test_DEX_DEPS := MyClassNatives
 ART_GTEST_jni_internal_test_DEX_DEPS := AllFields StaticLeafMethods
-ART_GTEST_oat_file_assistant_test_DEX_DEPS := Main MainStripped MultiDex MultiDexModifiedSecondary Nested
+ART_GTEST_oat_file_assistant_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS)
 ART_GTEST_oat_file_test_DEX_DEPS := Main MultiDex
+ART_GTEST_oat_test_DEX_DEPS := Main
 ART_GTEST_object_test_DEX_DEPS := ProtoCompare ProtoCompare2 StaticsFromCode XandY
 ART_GTEST_proxy_test_DEX_DEPS := Interfaces
 ART_GTEST_reflection_test_DEX_DEPS := Main NonStaticLeafMethods StaticLeafMethods
+ART_GTEST_profile_assistant_test_DEX_DEPS := ProfileTestMultiDex
+ART_GTEST_profile_compilation_info_test_DEX_DEPS := ProfileTestMultiDex
 ART_GTEST_stub_test_DEX_DEPS := AllFields
 ART_GTEST_transaction_test_DEX_DEPS := Transaction
+ART_GTEST_type_lookup_table_test_DEX_DEPS := Lookup
 
 # The elf writer test has dependencies on core.oat.
 ART_GTEST_elf_writer_test_HOST_DEPS := $(HOST_CORE_IMAGE_default_no-pic_64) $(HOST_CORE_IMAGE_default_no-pic_32)
 ART_GTEST_elf_writer_test_TARGET_DEPS := $(TARGET_CORE_IMAGE_default_no-pic_64) $(TARGET_CORE_IMAGE_default_no-pic_32)
 
-ART_GTEST_oat_file_assistant_test_HOST_DEPS := \
+ART_GTEST_dex2oat_environment_tests_HOST_DEPS := \
   $(HOST_CORE_IMAGE_default_no-pic_64) \
   $(HOST_CORE_IMAGE_default_no-pic_32) \
   $(HOST_OUT_EXECUTABLES)/patchoatd
-ART_GTEST_oat_file_assistant_test_TARGET_DEPS := \
+ART_GTEST_dex2oat_environment_tests_TARGET_DEPS := \
   $(TARGET_CORE_IMAGE_default_no-pic_64) \
   $(TARGET_CORE_IMAGE_default_no-pic_32) \
   $(TARGET_OUT_EXECUTABLES)/patchoatd
 
+ART_GTEST_oat_file_assistant_test_HOST_DEPS := \
+  $(ART_GTEST_dex2oat_environment_tests_HOST_DEPS)
+ART_GTEST_oat_file_assistant_test_TARGET_DEPS := \
+  $(ART_GTEST_dex2oat_environment_tests_TARGET_DEPS)
+
+
+ART_GTEST_dex2oat_test_HOST_DEPS := \
+  $(ART_GTEST_dex2oat_environment_tests_HOST_DEPS)
+ART_GTEST_dex2oat_test_TARGET_DEPS := \
+  $(ART_GTEST_dex2oat_environment_tests_TARGET_DEPS)
+
 # TODO: document why this is needed.
 ART_GTEST_proxy_test_HOST_DEPS := $(HOST_CORE_IMAGE_default_no-pic_64) $(HOST_CORE_IMAGE_default_no-pic_32)
 
@@ -138,6 +158,12 @@
   $(TARGET_CORE_IMAGE_default_no-pic_32) \
   oatdump
 
+# Profile assistant tests requires profman utility.
+ART_GTEST_profile_assistant_test_HOST_DEPS := \
+  $(HOST_OUT_EXECUTABLES)/profmand
+ART_GTEST_profile_assistant_test_TARGET_DEPS := \
+  profman
+
 # The path for which all the source files are relative, not actually the current directory.
 LOCAL_PATH := art
 
@@ -145,8 +171,10 @@
   cmdline/cmdline_parser_test.cc \
   dexdump/dexdump_test.cc \
   dexlist/dexlist_test.cc \
+  dex2oat/dex2oat_test.cc \
   imgdiag/imgdiag_test.cc \
   oatdump/oatdump_test.cc \
+  profman/profile_assistant_test.cc \
   runtime/arch/arch_test.cc \
   runtime/arch/instruction_set_test.cc \
   runtime/arch/instruction_set_features_test.cc \
@@ -159,6 +187,7 @@
   runtime/arch/x86/instruction_set_features_x86_test.cc \
   runtime/arch/x86_64/instruction_set_features_x86_64_test.cc \
   runtime/barrier_test.cc \
+  runtime/base/arena_allocator_test.cc \
   runtime/base/bit_field_test.cc \
   runtime/base/bit_utils_test.cc \
   runtime/base/bit_vector_test.cc \
@@ -173,6 +202,7 @@
   runtime/base/variant_map_test.cc \
   runtime/base/unix_file/fd_file_test.cc \
   runtime/class_linker_test.cc \
+  runtime/compiler_filter_test.cc \
   runtime/dex_file_test.cc \
   runtime/dex_file_verifier_test.cc \
   runtime/dex_instruction_test.cc \
@@ -181,19 +211,18 @@
   runtime/entrypoints/math_entrypoints_test.cc \
   runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc \
   runtime/entrypoints_order_test.cc \
-  runtime/exception_test.cc \
   runtime/gc/accounting/card_table_test.cc \
   runtime/gc/accounting/mod_union_table_test.cc \
   runtime/gc/accounting/space_bitmap_test.cc \
+  runtime/gc/collector/immune_spaces_test.cc \
   runtime/gc/heap_test.cc \
   runtime/gc/reference_queue_test.cc \
-  runtime/gc/space/dlmalloc_space_base_test.cc \
   runtime/gc/space/dlmalloc_space_static_test.cc \
   runtime/gc/space/dlmalloc_space_random_test.cc \
-  runtime/gc/space/rosalloc_space_base_test.cc \
+  runtime/gc/space/large_object_space_test.cc \
   runtime/gc/space/rosalloc_space_static_test.cc \
   runtime/gc/space/rosalloc_space_random_test.cc \
-  runtime/gc/space/large_object_space_test.cc \
+  runtime/gc/space/space_create_test.cc \
   runtime/gc/task_processor_test.cc \
   runtime/gtest_test.cc \
   runtime/handle_scope_test.cc \
@@ -204,8 +233,7 @@
   runtime/interpreter/safe_math_test.cc \
   runtime/interpreter/unstarted_runtime_test.cc \
   runtime/java_vm_ext_test.cc \
-  runtime/lambda/closure_test.cc \
-  runtime/lambda/shorty_field_type_test.cc \
+  runtime/jit/profile_compilation_info_test.cc \
   runtime/leb128_test.cc \
   runtime/mem_map_test.cc \
   runtime/memory_region_test.cc \
@@ -220,6 +248,7 @@
   runtime/reference_table_test.cc \
   runtime/thread_pool_test.cc \
   runtime/transaction_test.cc \
+  runtime/type_lookup_table_test.cc \
   runtime/utf_test.cc \
   runtime/utils_test.cc \
   runtime/verifier/method_verifier_test.cc \
@@ -230,17 +259,16 @@
   runtime/jni_internal_test.cc \
   runtime/proxy_test.cc \
   runtime/reflection_test.cc \
-  compiler/dex/gvn_dead_code_elimination_test.cc \
-  compiler/dex/global_value_numbering_test.cc \
-  compiler/dex/local_value_numbering_test.cc \
-  compiler/dex/mir_graph_test.cc \
-  compiler/dex/mir_optimization_test.cc \
-  compiler/dex/type_inference_test.cc \
-  compiler/dwarf/dwarf_test.cc \
+  compiler/compiled_method_test.cc \
+  compiler/debug/dwarf/dwarf_test.cc \
+  compiler/driver/compiled_method_storage_test.cc \
   compiler/driver/compiler_driver_test.cc \
   compiler/elf_writer_test.cc \
+  compiler/exception_test.cc \
   compiler/image_test.cc \
   compiler/jni/jni_compiler_test.cc \
+  compiler/linker/multi_oat_relative_patcher_test.cc \
+  compiler/linker/output_stream_test.cc \
   compiler/oat_test.cc \
   compiler/optimizing/bounds_check_elimination_test.cc \
   compiler/optimizing/dominator_test.cc \
@@ -255,27 +283,23 @@
   compiler/optimizing/nodes_test.cc \
   compiler/optimizing/parallel_move_test.cc \
   compiler/optimizing/pretty_printer_test.cc \
+  compiler/optimizing/reference_type_propagation_test.cc \
   compiler/optimizing/side_effects_test.cc \
   compiler/optimizing/ssa_test.cc \
   compiler/optimizing/stack_map_test.cc \
   compiler/optimizing/suspend_check_test.cc \
-  compiler/output_stream_test.cc \
-  compiler/utils/arena_allocator_test.cc \
   compiler/utils/dedupe_set_test.cc \
+  compiler/utils/intrusive_forward_list_test.cc \
+  compiler/utils/string_reference_test.cc \
   compiler/utils/swap_space_test.cc \
   compiler/utils/test_dex_file_builder_test.cc \
+  compiler/utils/transform_array_ref_test.cc \
+  compiler/utils/transform_iterator_test.cc \
 
 COMPILER_GTEST_COMMON_SRC_FILES_all := \
-  compiler/dex/quick/quick_cfi_test.cc \
   compiler/jni/jni_cfi_test.cc \
   compiler/optimizing/codegen_test.cc \
-  compiler/optimizing/constant_folding_test.cc \
-  compiler/optimizing/dead_code_elimination_test.cc \
-  compiler/optimizing/linearize_test.cc \
-  compiler/optimizing/liveness_test.cc \
-  compiler/optimizing/live_ranges_test.cc \
   compiler/optimizing/optimizing_cfi_test.cc \
-  compiler/optimizing/register_allocator_test.cc \
 
 COMPILER_GTEST_COMMON_SRC_FILES_arm := \
   compiler/linker/arm/relative_patcher_thumb2_test.cc \
@@ -286,6 +310,8 @@
   compiler/utils/arm64/managed_register_arm64_test.cc \
 
 COMPILER_GTEST_COMMON_SRC_FILES_mips := \
+  compiler/linker/mips/relative_patcher_mips_test.cc \
+  compiler/linker/mips/relative_patcher_mips32r6_test.cc \
 
 COMPILER_GTEST_COMMON_SRC_FILES_mips64 := \
 
@@ -293,6 +319,16 @@
   compiler/linker/x86/relative_patcher_x86_test.cc \
   compiler/utils/x86/managed_register_x86_test.cc \
 
+# These tests are testing architecture-independent functionality, but happen
+# to use x86 codegen as part of the test.
+COMPILER_GTEST_COMMON_SRC_FILES_x86 += \
+  compiler/optimizing/constant_folding_test.cc \
+  compiler/optimizing/dead_code_elimination_test.cc \
+  compiler/optimizing/linearize_test.cc \
+  compiler/optimizing/live_ranges_test.cc \
+  compiler/optimizing/liveness_test.cc \
+  compiler/optimizing/register_allocator_test.cc \
+
 COMPILER_GTEST_COMMON_SRC_FILES_x86_64 := \
   compiler/linker/x86_64/relative_patcher_x86_64_test.cc \
 
@@ -327,9 +363,7 @@
   $(COMPILER_GTEST_COMMON_SRC_FILES_x86_64) \
 
 $(foreach arch,$(ART_TARGET_CODEGEN_ARCHS),$(eval COMPILER_GTEST_TARGET_SRC_FILES += $$(COMPILER_GTEST_TARGET_SRC_FILES_$(arch))))
-ifeq (true,$(ART_TARGET_COMPILER_TESTS))
-  COMPILER_GTEST_TARGET_SRC_FILES += $(COMPILER_GTEST_TARGET_SRC_FILES_all)
-endif
+COMPILER_GTEST_TARGET_SRC_FILES += $(COMPILER_GTEST_TARGET_SRC_FILES_all)
 
 COMPILER_GTEST_HOST_SRC_FILES := \
   $(COMPILER_GTEST_COMMON_SRC_FILES) \
@@ -349,6 +383,7 @@
 COMPILER_GTEST_HOST_SRC_FILES_mips := \
   $(COMPILER_GTEST_COMMON_SRC_FILES_mips) \
   compiler/utils/mips/assembler_mips_test.cc \
+  compiler/utils/mips/assembler_mips32r6_test.cc \
 
 COMPILER_GTEST_HOST_SRC_FILES_mips64 := \
   $(COMPILER_GTEST_COMMON_SRC_FILES_mips64) \
@@ -356,7 +391,6 @@
 
 COMPILER_GTEST_HOST_SRC_FILES_x86 := \
   $(COMPILER_GTEST_COMMON_SRC_FILES_x86) \
-  compiler/dex/quick/x86/quick_assemble_x86_test.cc \
   compiler/utils/x86/assembler_x86_test.cc \
 
 COMPILER_GTEST_HOST_SRC_FILES_x86_64 := \
@@ -364,9 +398,7 @@
   compiler/utils/x86_64/assembler_x86_64_test.cc
 
 $(foreach arch,$(ART_HOST_CODEGEN_ARCHS),$(eval COMPILER_GTEST_HOST_SRC_FILES += $$(COMPILER_GTEST_HOST_SRC_FILES_$(arch))))
-ifeq (true,$(ART_HOST_COMPILER_TESTS))
-  COMPILER_GTEST_HOST_SRC_FILES += $(COMPILER_GTEST_HOST_SRC_FILES_all)
-endif
+COMPILER_GTEST_HOST_SRC_FILES += $(COMPILER_GTEST_HOST_SRC_FILES_all)
 
 ART_TEST_CFLAGS :=
 
@@ -375,14 +407,14 @@
 LOCAL_MODULE_TAGS := optional
 LOCAL_CPP_EXTENSION := cc
 LOCAL_SRC_FILES := runtime/common_runtime_test.cc compiler/common_compiler_test.cc
-LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/compiler
+LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/cmdline art/compiler
 LOCAL_SHARED_LIBRARIES := libartd libartd-compiler libdl
 LOCAL_STATIC_LIBRARIES += libgtest
 LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
 LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.gtest.mk
-$(eval $(call set-target-local-clang-vars))
+$(eval LOCAL_CLANG := $(ART_TARGET_CLANG))
 $(eval $(call set-target-local-cflags-vars,debug))
-LOCAL_CLANG_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn # gtest issue
+LOCAL_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn # gtest issue
 include $(BUILD_SHARED_LIBRARY)
 
 include $(CLEAR_VARS)
@@ -392,13 +424,13 @@
 LOCAL_CFLAGS := $(ART_HOST_CFLAGS)
 LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
 LOCAL_SRC_FILES := runtime/common_runtime_test.cc compiler/common_compiler_test.cc
-LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/compiler
+LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/cmdline art/compiler
 LOCAL_SHARED_LIBRARIES := libartd libartd-compiler
 LOCAL_STATIC_LIBRARIES := libgtest_host
 LOCAL_LDLIBS += -ldl -lpthread
 LOCAL_MULTILIB := both
 LOCAL_CLANG := $(ART_HOST_CLANG)
-LOCAL_CLANG_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn  # gtest issue
+LOCAL_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn  # gtest issue
 LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
 LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.gtest.mk
 include $(BUILD_HOST_SHARED_LIBRARY)
@@ -413,6 +445,9 @@
 ART_TEST_TARGET_GTEST$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_GTEST$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_GTEST_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST_RULES :=
 ART_TEST_HOST_GTEST_DEPENDENCIES :=
 
 ART_GTEST_TARGET_ANDROID_ROOT := '/system'
@@ -420,6 +455,28 @@
   ART_GTEST_TARGET_ANDROID_ROOT := $(ART_TEST_ANDROID_ROOT)
 endif
 
+ART_VALGRIND_TARGET_DEPENDENCIES := \
+  $(TARGET_OUT_EXECUTABLES)/valgrind \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/memcheck-$(TARGET_ARCH)-linux \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/vgpreload_core-$(TARGET_ARCH)-linux.so \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/vgpreload_memcheck-$(TARGET_ARCH)-linux.so \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/default.supp
+
+ifdef TARGET_2ND_ARCH
+ART_VALGRIND_TARGET_DEPENDENCIES += \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/memcheck-$(TARGET_2ND_ARCH)-linux \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/vgpreload_core-$(TARGET_2ND_ARCH)-linux.so \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/vgpreload_memcheck-$(TARGET_2ND_ARCH)-linux.so
+endif
+
+include $(CLEAR_VARS)
+LOCAL_MODULE := valgrind-target-suppressions.txt
+LOCAL_MODULE_CLASS := ETC
+LOCAL_MODULE_TAGS := optional
+LOCAL_SRC_FILES := test/valgrind-target-suppressions.txt
+LOCAL_MODULE_PATH := $(ART_TARGET_TEST_OUT)
+include $(BUILD_PREBUILT)
+
 # Define a make rule for a target device gtest.
 # $(1): gtest name - the name of the test we're building such as leb128_test.
 # $(2): 2ND_ or undefined - used to differentiate between the primary and secondary architecture.
@@ -434,7 +491,10 @@
     $(foreach file,$(ART_GTEST_$(1)_DEX_DEPS),$(ART_TEST_TARGET_GTEST_$(file)_DEX)) \
     $$(ART_TARGET_NATIVETEST_OUT)/$$(TARGET_$(2)ARCH)/$(1) \
     $$($(2)TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so \
-    $$(TARGET_OUT_JAVA_LIBRARIES)/core-libart.jar
+    $$($(2)TARGET_OUT_SHARED_LIBRARIES)/libopenjdkd.so \
+    $$(TARGET_OUT_JAVA_LIBRARIES)/core-libart-testdex.jar \
+    $$(TARGET_OUT_JAVA_LIBRARIES)/core-oj-testdex.jar \
+    $$(ART_TARGET_TEST_OUT)/valgrind-target-suppressions.txt
 
 .PHONY: $$(gtest_rule)
 $$(gtest_rule): test-art-target-sync
@@ -453,7 +513,27 @@
   ART_TEST_TARGET_GTEST_RULES += $$(gtest_rule)
   ART_TEST_TARGET_GTEST_$(1)_RULES += $$(gtest_rule)
 
+.PHONY: valgrind-$$(gtest_rule)
+valgrind-$$(gtest_rule): $(ART_VALGRIND_TARGET_DEPENDENCIES) test-art-target-sync
+	$(hide) adb shell touch $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$$$$PPID
+	$(hide) adb shell rm $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$$$$PPID
+	$(hide) adb shell chmod 755 $(ART_TARGET_NATIVETEST_DIR)/$(TARGET_$(2)ARCH)/$(1)
+	$(hide) $$(call ART_TEST_SKIP,$$@) && \
+	  (adb shell "$(GCOV_ENV) LD_LIBRARY_PATH=$(3) ANDROID_ROOT=$(ART_GTEST_TARGET_ANDROID_ROOT) \
+	    valgrind --leak-check=full --error-exitcode=1 --workaround-gcc296-bugs=yes \
+	    --suppressions=$(ART_TARGET_TEST_DIR)/valgrind-target-suppressions.txt \
+	    $(ART_TARGET_NATIVETEST_DIR)/$(TARGET_$(2)ARCH)/$(1) && touch $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$$$$PPID" \
+	  && (adb pull $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$$$$PPID /tmp/ \
+	      && $$(call ART_TEST_PASSED,$$@)) \
+	  || $$(call ART_TEST_FAILED,$$@))
+	$(hide) rm -f /tmp/$$@-$$$$PPID
+
+  ART_TEST_TARGET_VALGRIND_GTEST$$($(2)ART_PHONY_TEST_TARGET_SUFFIX)_RULES += valgrind-$$(gtest_rule)
+  ART_TEST_TARGET_VALGRIND_GTEST_RULES += valgrind-$$(gtest_rule)
+  ART_TEST_TARGET_VALGRIND_GTEST_$(1)_RULES += valgrind-$$(gtest_rule)
+
   # Clear locally defined variables.
+  valgrind_gtest_rule :=
   gtest_rule :=
 endef  # define-art-gtest-rule-target
 
@@ -476,6 +556,7 @@
   # Dependencies for all host gtests.
   gtest_deps := $$(HOST_CORE_DEX_LOCATIONS) \
     $$($(2)ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$$(ART_HOST_SHLIB_EXTENSION) \
+    $$($(2)ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$$(ART_HOST_SHLIB_EXTENSION) \
     $$(gtest_exe) \
     $$(ART_GTEST_$(1)_HOST_DEPS) \
     $(foreach file,$(ART_GTEST_$(1)_DEX_DEPS),$(ART_TEST_HOST_GTEST_$(file)_DEX))
@@ -496,7 +577,8 @@
 valgrind-$$(gtest_rule): $$(gtest_exe) $$(gtest_deps) $(ART_VALGRIND_DEPENDENCIES)
 	$(hide) $$(call ART_TEST_SKIP,$$@) && \
 	  VALGRIND_LIB=$(HOST_OUT)/lib64/valgrind \
-	  $(HOST_OUT_EXECUTABLES)/valgrind --leak-check=full --error-exitcode=1 $$< && \
+	  $(HOST_OUT_EXECUTABLES)/valgrind --leak-check=full --error-exitcode=1 \
+	    --suppressions=art/test/valgrind-suppressions.txt $$< && \
 	    $$(call ART_TEST_PASSED,$$@) || $$(call ART_TEST_FAILED,$$@)
 
   ART_TEST_HOST_VALGRIND_GTEST$$($(2)ART_PHONY_TEST_HOST_SUFFIX)_RULES += valgrind-$$(gtest_rule)
@@ -535,7 +617,7 @@
   endif
   LOCAL_CPP_EXTENSION := $$(ART_CPP_EXTENSION)
   LOCAL_SRC_FILES := $$(art_gtest_filename)
-  LOCAL_C_INCLUDES += $$(ART_C_INCLUDES) art/runtime $$(art_gtest_extra_c_includes)
+  LOCAL_C_INCLUDES += $$(ART_C_INCLUDES) art/runtime art/cmdline $$(art_gtest_extra_c_includes)
   LOCAL_SHARED_LIBRARIES += libartd $$(art_gtest_extra_shared_libraries) libart-gtest libartd-disassembler
   LOCAL_WHOLE_STATIC_LIBRARIES += libsigchain
 
@@ -550,13 +632,13 @@
 
   LOCAL_CFLAGS := $$(ART_TEST_CFLAGS)
   ifeq ($$(art_target_or_host),target)
-    $$(eval $$(call set-target-local-clang-vars))
+    $$(eval LOCAL_CLANG := $$(ART_TARGET_CLANG))
     $$(eval $$(call set-target-local-cflags-vars,debug))
-    LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixld
+    LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixld-arm64
     LOCAL_MODULE_PATH_32 := $$(ART_TARGET_NATIVETEST_OUT)/$$(ART_TARGET_ARCH_32)
     LOCAL_MODULE_PATH_64 := $$(ART_TARGET_NATIVETEST_OUT)/$$(ART_TARGET_ARCH_64)
     LOCAL_MULTILIB := both
-    LOCAL_CLANG_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn  # gtest issue
+    LOCAL_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn  # gtest issue
     include $$(BUILD_EXECUTABLE)
     library_path :=
     2nd_library_path :=
@@ -574,6 +656,7 @@
     endif
 
     ART_TEST_TARGET_GTEST_$$(art_gtest_name)_RULES :=
+    ART_TEST_TARGET_VALGRIND_GTEST_$$(art_gtest_name)_RULES :=
     ifdef TARGET_2ND_ARCH
       $$(eval $$(call define-art-gtest-rule-target,$$(art_gtest_name),2ND_,$$(2nd_library_path)))
     endif
@@ -584,19 +667,24 @@
 test-art-target-gtest-$$(art_gtest_name): $$(ART_TEST_TARGET_GTEST_$$(art_gtest_name)_RULES)
 	$$(hide) $$(call ART_TEST_PREREQ_FINISHED,$$@)
 
+.PHONY: valgrind-test-art-target-gtest-$$(art_gtest_name)
+valgrind-test-art-target-gtest-$$(art_gtest_name): $$(ART_TEST_TARGET_VALGRIND_GTEST_$$(art_gtest_name)_RULES)
+	$$(hide) $$(call ART_TEST_PREREQ_FINISHED,$$@)
+
     # Clear locally defined variables.
     ART_TEST_TARGET_GTEST_$$(art_gtest_name)_RULES :=
+    ART_TEST_TARGET_VALGRIND_GTEST_$$(art_gtest_name)_RULES :=
   else # host
     LOCAL_CLANG := $$(ART_HOST_CLANG)
     LOCAL_CFLAGS += $$(ART_HOST_CFLAGS) $$(ART_HOST_DEBUG_CFLAGS)
-    LOCAL_ASFLAGS += $$(ART_HOST_ASFLAGS)
-    LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libziparchive-host libz-host libvixld
-    LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -lpthread -ldl
+    LOCAL_ASFLAGS += $$(ART_HOST_ASFLAGS) $$(ART_HOST_DEBUG_ASFLAGS)
+    LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libziparchive-host libz-host libvixld-arm64
+    LOCAL_LDLIBS := -lpthread -ldl
     LOCAL_IS_HOST_MODULE := true
     LOCAL_MULTILIB := both
     LOCAL_MODULE_STEM_32 := $$(art_gtest_name)32
     LOCAL_MODULE_STEM_64 := $$(art_gtest_name)64
-    LOCAL_CLANG_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn  # gtest issue
+    LOCAL_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn  # gtest issue
     include $$(BUILD_HOST_EXECUTABLE)
 
     ART_TEST_HOST_GTEST_$$(art_gtest_name)_RULES :=
@@ -633,11 +721,11 @@
 
 ifeq ($(ART_BUILD_TARGET),true)
   $(foreach file,$(RUNTIME_GTEST_TARGET_SRC_FILES), $(eval $(call define-art-gtest,target,$(file),,libbacktrace)))
-  $(foreach file,$(COMPILER_GTEST_TARGET_SRC_FILES), $(eval $(call define-art-gtest,target,$(file),art/compiler,libartd-compiler libbacktrace)))
+  $(foreach file,$(COMPILER_GTEST_TARGET_SRC_FILES), $(eval $(call define-art-gtest,target,$(file),art/compiler,libartd-compiler libbacktrace libnativeloader)))
 endif
 ifeq ($(ART_BUILD_HOST),true)
   $(foreach file,$(RUNTIME_GTEST_HOST_SRC_FILES), $(eval $(call define-art-gtest,host,$(file),,libbacktrace)))
-  $(foreach file,$(COMPILER_GTEST_HOST_SRC_FILES), $(eval $(call define-art-gtest,host,$(file),art/compiler,libartd-compiler libbacktrace)))
+  $(foreach file,$(COMPILER_GTEST_HOST_SRC_FILES), $(eval $(call define-art-gtest,host,$(file),art/compiler,libartd-compiler libbacktrace libnativeloader)))
 endif
 
 # Used outside the art project to get a list of the current tests
@@ -668,9 +756,6 @@
 
   rule_name := $(3)test-art-$(1)-gtest$(4)
   ifeq ($(3),valgrind-)
-    ifneq ($(1),host)
-      $$(error valgrind tests only wired up for the host)
-    endif
     dependencies := $$(ART_TEST_$(2)_VALGRIND_GTEST$(4)_RULES)
   else
     dependencies := $$(ART_TEST_$(2)_GTEST$(4)_RULES)
@@ -686,9 +771,12 @@
 endef  # define-test-art-gtest-combination
 
 $(eval $(call define-test-art-gtest-combination,target,TARGET,,))
+$(eval $(call define-test-art-gtest-combination,target,TARGET,valgrind-,))
 $(eval $(call define-test-art-gtest-combination,target,TARGET,,$(ART_PHONY_TEST_TARGET_SUFFIX)))
+$(eval $(call define-test-art-gtest-combination,target,TARGET,valgrind-,$(ART_PHONY_TEST_TARGET_SUFFIX)))
 ifdef TARGET_2ND_ARCH
 $(eval $(call define-test-art-gtest-combination,target,TARGET,,$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)))
+$(eval $(call define-test-art-gtest-combination,target,TARGET,valgrind-,$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)))
 endif
 $(eval $(call define-test-art-gtest-combination,host,HOST,,))
 $(eval $(call define-test-art-gtest-combination,host,HOST,valgrind-,))
@@ -720,6 +808,9 @@
 ART_TEST_TARGET_GTEST$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_GTEST$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_GTEST_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST_RULES :=
 ART_GTEST_TARGET_ANDROID_ROOT :=
 ART_GTEST_class_linker_test_DEX_DEPS :=
 ART_GTEST_compiler_driver_test_DEX_DEPS :=
@@ -732,12 +823,17 @@
 ART_GTEST_oat_file_assistant_test_DEX_DEPS :=
 ART_GTEST_oat_file_assistant_test_HOST_DEPS :=
 ART_GTEST_oat_file_assistant_test_TARGET_DEPS :=
+ART_GTEST_dex2oat_test_DEX_DEPS :=
+ART_GTEST_dex2oat_test_HOST_DEPS :=
+ART_GTEST_dex2oat_test_TARGET_DEPS :=
 ART_GTEST_object_test_DEX_DEPS :=
 ART_GTEST_proxy_test_DEX_DEPS :=
 ART_GTEST_reflection_test_DEX_DEPS :=
 ART_GTEST_stub_test_DEX_DEPS :=
 ART_GTEST_transaction_test_DEX_DEPS :=
+ART_GTEST_dex2oat_environment_tests_DEX_DEPS :=
 ART_VALGRIND_DEPENDENCIES :=
+ART_VALGRIND_TARGET_DEPENDENCIES :=
 $(foreach dir,$(GTEST_DEX_DIRECTORIES), $(eval ART_TEST_TARGET_GTEST_$(dir)_DEX :=))
 $(foreach dir,$(GTEST_DEX_DIRECTORIES), $(eval ART_TEST_HOST_GTEST_$(dir)_DEX :=))
 ART_TEST_HOST_GTEST_MainStripped_DEX :=
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 592843e..884f698 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -42,6 +42,7 @@
 # $(3): 2ND_ or undefined, 2ND_ for 32-bit host builds.
 # $(4): wrapper, e.g., valgrind.
 # $(5): dex2oat suffix, e.g, valgrind requires 32 right now.
+# $(6): multi-image.
 # NB depending on HOST_CORE_DEX_LOCATIONS so we are sure to have the dex files in frameworks for
 # run-test --no-image
 define create-core-oat-host-rules
@@ -92,14 +93,25 @@
     $$(error found $(2) expected pic or no-pic)
   endif
 
-  core_image_name := $($(3)HOST_CORE_IMG_OUT_BASE)$$(core_infix)$$(core_pic_infix)$(4)$(CORE_IMG_SUFFIX)
-  core_oat_name := $($(3)HOST_CORE_OAT_OUT_BASE)$$(core_infix)$$(core_pic_infix)$(4)$(CORE_OAT_SUFFIX)
+  # If $(6) is true, generate a multi-image.
+  ifeq ($(6),true)
+    core_multi_infix := -multi
+    core_multi_param := --multi-image --no-inline-from=core-oj-hostdex.jar
+    core_multi_group := _multi
+  else
+    core_multi_infix :=
+    core_multi_param :=
+    core_multi_group :=
+  endif
+
+  core_image_name := $($(3)HOST_CORE_IMG_OUT_BASE)$$(core_infix)$$(core_pic_infix)$$(core_multi_infix)$(4)$(CORE_IMG_SUFFIX)
+  core_oat_name := $($(3)HOST_CORE_OAT_OUT_BASE)$$(core_infix)$$(core_pic_infix)$$(core_multi_infix)$(4)$(CORE_OAT_SUFFIX)
 
   # Using the bitness suffix makes it easier to add as a dependency for the run-test mk.
   ifeq ($(3),)
-    $(4)HOST_CORE_IMAGE_$(1)_$(2)_64 := $$(core_image_name)
+    $(4)HOST_CORE_IMAGE_$(1)_$(2)$$(core_multi_group)_64 := $$(core_image_name)
   else
-    $(4)HOST_CORE_IMAGE_$(1)_$(2)_32 := $$(core_image_name)
+    $(4)HOST_CORE_IMAGE_$(1)_$(2)$$(core_multi_group)_32 := $$(core_image_name)
   endif
   $(4)HOST_CORE_IMG_OUTS += $$(core_image_name)
   $(4)HOST_CORE_OAT_OUTS += $$(core_oat_name)
@@ -111,8 +123,9 @@
 $$(core_image_name): PRIVATE_CORE_COMPILE_OPTIONS := $$(core_compile_options)
 $$(core_image_name): PRIVATE_CORE_IMG_NAME := $$(core_image_name)
 $$(core_image_name): PRIVATE_CORE_OAT_NAME := $$(core_oat_name)
+$$(core_image_name): PRIVATE_CORE_MULTI_PARAM := $$(core_multi_param)
 $$(core_image_name): $$(HOST_CORE_DEX_LOCATIONS) $$(core_dex2oat_dependency)
-	@echo "host dex2oat: $$@ ($$?)"
+	@echo "host dex2oat: $$@"
 	@mkdir -p $$(dir $$@)
 	$$(hide) $(4) $$(DEX2OAT)$(5) --runtime-arg -Xms$(DEX2OAT_IMAGE_XMS) \
 	  --runtime-arg -Xmx$(DEX2OAT_IMAGE_XMX) \
@@ -122,7 +135,7 @@
 	  --base=$$(LIBART_IMG_HOST_BASE_ADDRESS) --instruction-set=$$($(3)ART_HOST_ARCH) \
 	  $$(LOCAL_$(3)DEX2OAT_HOST_INSTRUCTION_SET_FEATURES_OPTION) \
 	  --host --android-root=$$(HOST_OUT) --include-patch-information --generate-debug-info \
-	  $$(PRIVATE_CORE_COMPILE_OPTIONS)
+	  $$(PRIVATE_CORE_MULTI_PARAM) $$(PRIVATE_CORE_COMPILE_OPTIONS)
 
 $$(core_oat_name): $$(core_image_name)
 
@@ -138,32 +151,40 @@
 # $(1): compiler - default, optimizing, jit, interpreter or interpreter-access-checks.
 # $(2): wrapper.
 # $(3): dex2oat suffix.
+# $(4): multi-image.
 define create-core-oat-host-rule-combination
-  $(call create-core-oat-host-rules,$(1),no-pic,,$(2),$(3))
-  $(call create-core-oat-host-rules,$(1),pic,,$(2),$(3))
+  $(call create-core-oat-host-rules,$(1),no-pic,,$(2),$(3),$(4))
+  $(call create-core-oat-host-rules,$(1),pic,,$(2),$(3),$(4))
 
   ifneq ($(HOST_PREFER_32_BIT),true)
-    $(call create-core-oat-host-rules,$(1),no-pic,2ND_,$(2),$(3))
-    $(call create-core-oat-host-rules,$(1),pic,2ND_,$(2),$(3))
+    $(call create-core-oat-host-rules,$(1),no-pic,2ND_,$(2),$(3),$(4))
+    $(call create-core-oat-host-rules,$(1),pic,2ND_,$(2),$(3),$(4))
   endif
 endef
 
-$(eval $(call create-core-oat-host-rule-combination,default,,))
-$(eval $(call create-core-oat-host-rule-combination,optimizing,,))
-$(eval $(call create-core-oat-host-rule-combination,interpreter,,))
-$(eval $(call create-core-oat-host-rule-combination,interp-ac,,))
-$(eval $(call create-core-oat-host-rule-combination,jit,,))
+$(eval $(call create-core-oat-host-rule-combination,default,,,false))
+$(eval $(call create-core-oat-host-rule-combination,optimizing,,,false))
+$(eval $(call create-core-oat-host-rule-combination,interpreter,,,false))
+$(eval $(call create-core-oat-host-rule-combination,interp-ac,,,false))
+$(eval $(call create-core-oat-host-rule-combination,jit,,,false))
+$(eval $(call create-core-oat-host-rule-combination,default,,,true))
+$(eval $(call create-core-oat-host-rule-combination,optimizing,,,true))
+$(eval $(call create-core-oat-host-rule-combination,interpreter,,,true))
+$(eval $(call create-core-oat-host-rule-combination,interp-ac,,,true))
+$(eval $(call create-core-oat-host-rule-combination,jit,,,true))
 
 valgrindHOST_CORE_IMG_OUTS :=
 valgrindHOST_CORE_OAT_OUTS :=
-$(eval $(call create-core-oat-host-rule-combination,default,valgrind,32))
-$(eval $(call create-core-oat-host-rule-combination,optimizing,valgrind,32))
-$(eval $(call create-core-oat-host-rule-combination,interpreter,valgrind,32))
-$(eval $(call create-core-oat-host-rule-combination,interp-ac,valgrind,32))
-$(eval $(call create-core-oat-host-rule-combination,jit,valgrind,32))
+$(eval $(call create-core-oat-host-rule-combination,default,valgrind,32,false))
+$(eval $(call create-core-oat-host-rule-combination,optimizing,valgrind,32,false))
+$(eval $(call create-core-oat-host-rule-combination,interpreter,valgrind,32,false))
+$(eval $(call create-core-oat-host-rule-combination,interp-ac,valgrind,32,false))
+$(eval $(call create-core-oat-host-rule-combination,jit,valgrind,32,false))
 
 valgrind-test-art-host-dex2oat-host: $(valgrindHOST_CORE_IMG_OUTS)
 
+test-art-host-dex2oat-host: $(HOST_CORE_IMG_OUTS)
+
 define create-core-oat-target-rules
   core_compile_options :=
   core_image_name :=
@@ -238,7 +259,7 @@
 $$(core_image_name): PRIVATE_CORE_IMG_NAME := $$(core_image_name)
 $$(core_image_name): PRIVATE_CORE_OAT_NAME := $$(core_oat_name)
 $$(core_image_name): $$(TARGET_CORE_DEX_FILES) $$(core_dex2oat_dependency)
-	@echo "target dex2oat: $$@ ($$?)"
+	@echo "target dex2oat: $$@"
 	@mkdir -p $$(dir $$@)
 	$$(hide) $(4) $$(DEX2OAT)$(5) --runtime-arg -Xms$(DEX2OAT_IMAGE_XMS) \
 	  --runtime-arg -Xmx$(DEX2OAT_IMAGE_XMX) \
diff --git a/cmdline/cmdline.h b/cmdline/cmdline.h
index 2e9f208..4dcaf80 100644
--- a/cmdline/cmdline.h
+++ b/cmdline/cmdline.h
@@ -80,8 +80,7 @@
   }
 }
 
-static Runtime* StartRuntime(const char* boot_image_location,
-                             InstructionSet instruction_set) {
+static Runtime* StartRuntime(const char* boot_image_location, InstructionSet instruction_set) {
   CHECK(boot_image_location != nullptr);
 
   RuntimeOptions options;
@@ -196,6 +195,7 @@
         "  --boot-image=<file.art>: provide the image location for the boot class path.\n"
         "      Do not include the arch as part of the name, it is added automatically.\n"
         "      Example: --boot-image=/system/framework/boot.art\n"
+        "               (specifies /system/framework/<arch>/boot.art as the image file)\n"
         "\n";
     usage += StringPrintf(  // Optional.
         "  --instruction-set=(arm|arm64|mips|mips64|x86|x86_64): for locating the image\n"
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index f34b5ed..5809dcd 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -30,18 +30,15 @@
   bool UsuallyEquals(double expected, double actual);
 
   // This has a gtest dependency, which is why it's in the gtest only.
-  bool operator==(const TestProfilerOptions& lhs, const TestProfilerOptions& rhs) {
+  bool operator==(const ProfileSaverOptions& lhs, const ProfileSaverOptions& rhs) {
     return lhs.enabled_ == rhs.enabled_ &&
-        lhs.output_file_name_ == rhs.output_file_name_ &&
-        lhs.period_s_ == rhs.period_s_ &&
-        lhs.duration_s_ == rhs.duration_s_ &&
-        lhs.interval_us_ == rhs.interval_us_ &&
-        UsuallyEquals(lhs.backoff_coefficient_, rhs.backoff_coefficient_) &&
-        UsuallyEquals(lhs.start_immediately_, rhs.start_immediately_) &&
-        UsuallyEquals(lhs.top_k_threshold_, rhs.top_k_threshold_) &&
-        UsuallyEquals(lhs.top_k_change_threshold_, rhs.top_k_change_threshold_) &&
-        lhs.profile_type_ == rhs.profile_type_ &&
-        lhs.max_stack_depth_ == rhs.max_stack_depth_;
+        lhs.min_save_period_ms_ == rhs.min_save_period_ms_ &&
+        lhs.save_resolved_classes_delay_ms_ == rhs.save_resolved_classes_delay_ms_ &&
+        lhs.startup_method_samples_ == rhs.startup_method_samples_ &&
+        lhs.min_methods_to_save_ == rhs.min_methods_to_save_ &&
+        lhs.min_classes_to_save_ == rhs.min_classes_to_save_ &&
+        lhs.min_notification_before_wake_ == rhs.min_notification_before_wake_ &&
+        lhs.max_notification_before_wake_ == rhs.max_notification_before_wake_;
   }
 
   bool UsuallyEquals(double expected, double actual) {
@@ -243,8 +240,8 @@
 TEST_F(CmdlineParserTest, TestLogVerbosity) {
   {
     const char* log_args = "-verbose:"
-        "class,compiler,gc,heap,jdwp,jni,monitor,profiler,signals,startup,third-party-jni,"
-        "threads,verifier";
+        "class,compiler,gc,heap,jdwp,jni,monitor,profiler,signals,simulator,startup,"
+        "third-party-jni,threads,verifier";
 
     LogVerbosity log_verbosity = LogVerbosity();
     log_verbosity.class_linker = true;
@@ -256,6 +253,7 @@
     log_verbosity.monitor = true;
     log_verbosity.profiler = true;
     log_verbosity.signals = true;
+    log_verbosity.simulator = true;
     log_verbosity.startup = true;
     log_verbosity.third_party_jni = true;
     log_verbosity.threads = true;
@@ -290,6 +288,13 @@
   }
 
   {
+    const char* log_args = "-verbose:collector";
+    LogVerbosity log_verbosity = LogVerbosity();
+    log_verbosity.collector = true;
+    EXPECT_SINGLE_PARSE_VALUE(log_verbosity, log_args, M::Verbose);
+  }
+
+  {
     const char* log_args = "-verbose:oat";
     LogVerbosity log_verbosity = LogVerbosity();
     log_verbosity.oat = true;
@@ -453,12 +458,14 @@
   * Test successes
   */
   {
-    EXPECT_SINGLE_PARSE_VALUE(true, "-Xusejit:true", M::UseJIT);
-    EXPECT_SINGLE_PARSE_VALUE(false, "-Xusejit:false", M::UseJIT);
+    EXPECT_SINGLE_PARSE_VALUE(true, "-Xusejit:true", M::UseJitCompilation);
+    EXPECT_SINGLE_PARSE_VALUE(false, "-Xusejit:false", M::UseJitCompilation);
   }
   {
-    EXPECT_SINGLE_PARSE_VALUE(MemoryKiB(16 * KB), "-Xjitcodecachesize:16K", M::JITCodeCacheCapacity);
-    EXPECT_SINGLE_PARSE_VALUE(MemoryKiB(16 * MB), "-Xjitcodecachesize:16M", M::JITCodeCacheCapacity);
+    EXPECT_SINGLE_PARSE_VALUE(
+        MemoryKiB(16 * KB), "-Xjitinitialsize:16K", M::JITCodeCacheInitialCapacity);
+    EXPECT_SINGLE_PARSE_VALUE(
+        MemoryKiB(16 * MB), "-Xjitmaxsize:16M", M::JITCodeCacheMaxCapacity);
   }
   {
     EXPECT_SINGLE_PARSE_VALUE(12345u, "-Xjitthreshold:12345", M::JITCompileThreshold);
@@ -466,73 +473,26 @@
 }  // TEST_F
 
 /*
-* -X-profile-*
+* -Xps-*
 */
-TEST_F(CmdlineParserTest, TestProfilerOptions) {
- /*
-  * Test successes
-  */
+TEST_F(CmdlineParserTest, ProfileSaverOptions) {
+  ProfileSaverOptions opt = ProfileSaverOptions(true, 1, 2, 3, 4, 5, 6, 7);
 
-  {
-    TestProfilerOptions opt;
-    opt.enabled_ = true;
-
-    EXPECT_SINGLE_PARSE_VALUE(opt,
-                              "-Xenable-profiler",
-                              M::ProfilerOpts);
-  }
-
-  {
-    TestProfilerOptions opt;
-    // also need to test 'enabled'
-    opt.output_file_name_ = "hello_world.txt";
-
-    EXPECT_SINGLE_PARSE_VALUE(opt,
-                              "-Xprofile-filename:hello_world.txt ",
-                              M::ProfilerOpts);
-  }
-
-  {
-    TestProfilerOptions opt = TestProfilerOptions();
-    // also need to test 'enabled'
-    opt.output_file_name_ = "output.txt";
-    opt.period_s_ = 123u;
-    opt.duration_s_ = 456u;
-    opt.interval_us_ = 789u;
-    opt.backoff_coefficient_ = 2.0;
-    opt.start_immediately_ = true;
-    opt.top_k_threshold_ = 50.0;
-    opt.top_k_change_threshold_ = 60.0;
-    opt.profile_type_ = kProfilerMethod;
-    opt.max_stack_depth_ = 1337u;
-
-    EXPECT_SINGLE_PARSE_VALUE(opt,
-                              "-Xprofile-filename:output.txt "
-                              "-Xprofile-period:123 "
-                              "-Xprofile-duration:456 "
-                              "-Xprofile-interval:789 "
-                              "-Xprofile-backoff:2.0 "
-                              "-Xprofile-start-immediately "
-                              "-Xprofile-top-k-threshold:50.0 "
-                              "-Xprofile-top-k-change-threshold:60.0 "
-                              "-Xprofile-type:method "
-                              "-Xprofile-max-stack-depth:1337",
-                              M::ProfilerOpts);
-  }
-
-  {
-    TestProfilerOptions opt = TestProfilerOptions();
-    opt.profile_type_ = kProfilerBoundedStack;
-
-    EXPECT_SINGLE_PARSE_VALUE(opt,
-                              "-Xprofile-type:stack",
-                              M::ProfilerOpts);
-  }
+  EXPECT_SINGLE_PARSE_VALUE(opt,
+                            "-Xjitsaveprofilinginfo "
+                            "-Xps-min-save-period-ms:1 "
+                            "-Xps-save-resolved-classes-delay-ms:2 "
+                            "-Xps-startup-method-samples:3 "
+                            "-Xps-min-methods-to-save:4 "
+                            "-Xps-min-classes-to-save:5 "
+                            "-Xps-min-notification-before-wake:6 "
+                            "-Xps-max-notification-before-wake:7",
+                            M::ProfileSaverOpts);
 }  // TEST_F
 
 /* -Xexperimental:_ */
 TEST_F(CmdlineParserTest, TestExperimentalFlags) {
-  // Off by default
+  // Default
   EXPECT_SINGLE_PARSE_DEFAULT_VALUE(ExperimentalFlags::kNone,
                                     "",
                                     M::Experimental);
@@ -541,21 +501,6 @@
   EXPECT_SINGLE_PARSE_VALUE(ExperimentalFlags::kNone,
                             "-Xexperimental:none",
                             M::Experimental);
-
-  // Enabled explicitly
-  EXPECT_SINGLE_PARSE_VALUE(ExperimentalFlags::kLambdas,
-                            "-Xexperimental:lambdas",
-                            M::Experimental);
-  // Enabled explicitly
-  EXPECT_SINGLE_PARSE_VALUE(ExperimentalFlags::kDefaultMethods,
-                            "-Xexperimental:default-methods",
-                            M::Experimental);
-
-  // Enabled both
-  EXPECT_SINGLE_PARSE_VALUE(ExperimentalFlags::kDefaultMethods | ExperimentalFlags::kLambdas,
-                            "-Xexperimental:default-methods "
-                            "-Xexperimental:lambdas",
-                            M::Experimental);
 }
 
 // -Xverify:_
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index c594adb..b57383b 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -18,20 +18,22 @@
 
 #define CMDLINE_NDEBUG 1  // Do not output any debugging information for parsing.
 
-#include "cmdline/memory_representation.h"
-#include "cmdline/detail/cmdline_debug_detail.h"
+#include "memory_representation.h"
+#include "detail/cmdline_debug_detail.h"
 #include "cmdline_type_parser.h"
 
 // Includes for the types that are being specialized
 #include <string>
-#include "unit.h"
-#include "jdwp/jdwp.h"
-#include "runtime/base/logging.h"
-#include "runtime/base/time_utils.h"
-#include "runtime/experimental_flags.h"
+#include "base/logging.h"
+#include "base/time_utils.h"
+#include "experimental_flags.h"
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
-#include "profiler_options.h"
+#include "jdwp/jdwp.h"
+#include "jit/profile_saver_options.h"
+#include "plugin.h"
+#include "ti/agent.h"
+#include "unit.h"
 
 namespace art {
 
@@ -381,6 +383,38 @@
 };
 
 template <>
+struct CmdlineType<std::vector<Plugin>> : CmdlineTypeParser<std::vector<Plugin>> {
+  Result Parse(const std::string& args) {
+    assert(false && "Use AppendValues() for a Plugin vector type");
+    return Result::Failure("Unconditional failure: Plugin vector must be appended: " + args);
+  }
+
+  Result ParseAndAppend(const std::string& args,
+                        std::vector<Plugin>& existing_value) {
+    existing_value.push_back(Plugin::Create(args));
+    return Result::SuccessNoValue();
+  }
+
+  static const char* Name() { return "std::vector<Plugin>"; }
+};
+
+template <>
+struct CmdlineType<std::vector<ti::Agent>> : CmdlineTypeParser<std::vector<ti::Agent>> {
+  Result Parse(const std::string& args) {
+    assert(false && "Use AppendValues() for an Agent vector type");
+    return Result::Failure("Unconditional failure: Agent vector must be appended: " + args);
+  }
+
+  Result ParseAndAppend(const std::string& args,
+                        std::vector<ti::Agent>& existing_value) {
+    existing_value.push_back(ti::Agent::Create(args));
+    return Result::SuccessNoValue();
+  }
+
+  static const char* Name() { return "std::vector<ti::Agent>"; }
+};
+
+template <>
 struct CmdlineType<std::vector<std::string>> : CmdlineTypeParser<std::vector<std::string>> {
   Result Parse(const std::string& args) {
     assert(false && "Use AppendValues() for a string vector type");
@@ -462,7 +496,7 @@
 struct XGcOption {
   // These defaults are used when the command line arguments for -Xgc:
   // are either omitted completely or partially.
-  gc::CollectorType collector_type_ =  kUseReadBarrier ?
+  gc::CollectorType collector_type_ = kUseReadBarrier ?
                                            // If RB is enabled (currently a build-time decision),
                                            // use CC as the default GC.
                                            gc::kCollectorTypeCC :
@@ -473,6 +507,7 @@
   bool verify_pre_gc_rosalloc_ = kIsDebugBuild;
   bool verify_pre_sweeping_rosalloc_ = false;
   bool verify_post_gc_rosalloc_ = false;
+  bool measure_ = kIsDebugBuild;
   bool gcstress_ = false;
 };
 
@@ -515,6 +550,8 @@
         xgc.gcstress_ = true;
       } else if (gc_option == "nogcstress") {
         xgc.gcstress_ = false;
+      } else if (gc_option == "measure") {
+        xgc.measure_ = true;
       } else if ((gc_option == "precise") ||
                  (gc_option == "noprecise") ||
                  (gc_option == "verifycardtable") ||
@@ -584,6 +621,8 @@
     for (size_t j = 0; j < verbose_options.size(); ++j) {
       if (verbose_options[j] == "class") {
         log_verbosity.class_linker = true;
+      } else if (verbose_options[j] == "collector") {
+        log_verbosity.collector = true;
       } else if (verbose_options[j] == "compiler") {
         log_verbosity.compiler = true;
       } else if (verbose_options[j] == "deopt") {
@@ -606,6 +645,8 @@
         log_verbosity.profiler = true;
       } else if (verbose_options[j] == "signals") {
         log_verbosity.signals = true;
+      } else if (verbose_options[j] == "simulator") {
+        log_verbosity.simulator = true;
       } else if (verbose_options[j] == "startup") {
         log_verbosity.startup = true;
       } else if (verbose_options[j] == "third-party-jni") {
@@ -614,6 +655,12 @@
         log_verbosity.threads = true;
       } else if (verbose_options[j] == "verifier") {
         log_verbosity.verifier = true;
+      } else if (verbose_options[j] == "image") {
+        log_verbosity.image = true;
+      } else if (verbose_options[j] == "systrace-locks") {
+        log_verbosity.systrace_lock_logging = true;
+      } else if (verbose_options[j] == "agents") {
+        log_verbosity.agents = true;
       } else {
         return Result::Usage(std::string("Unknown -verbose option ") + verbose_options[j]);
       }
@@ -625,84 +672,17 @@
   static const char* Name() { return "LogVerbosity"; }
 };
 
-// TODO: Replace with art::ProfilerOptions for the real thing.
-struct TestProfilerOptions {
-  // Whether or not the applications should be profiled.
-  bool enabled_;
-  // Destination file name where the profiling data will be saved into.
-  std::string output_file_name_;
-  // Generate profile every n seconds.
-  uint32_t period_s_;
-  // Run profile for n seconds.
-  uint32_t duration_s_;
-  // Microseconds between samples.
-  uint32_t interval_us_;
-  // Coefficient to exponential backoff.
-  double backoff_coefficient_;
-  // Whether the profile should start upon app startup or be delayed by some random offset.
-  bool start_immediately_;
-  // Top K% of samples that are considered relevant when deciding if the app should be recompiled.
-  double top_k_threshold_;
-  // How much the top K% samples needs to change in order for the app to be recompiled.
-  double top_k_change_threshold_;
-  // The type of profile data dumped to the disk.
-  ProfileDataType profile_type_;
-  // The max depth of the stack collected by the profiler
-  uint32_t max_stack_depth_;
-
-  TestProfilerOptions() :
-    enabled_(false),
-    output_file_name_(),
-    period_s_(0),
-    duration_s_(0),
-    interval_us_(0),
-    backoff_coefficient_(0),
-    start_immediately_(0),
-    top_k_threshold_(0),
-    top_k_change_threshold_(0),
-    profile_type_(ProfileDataType::kProfilerMethod),
-    max_stack_depth_(0) {
-  }
-
-  TestProfilerOptions(const TestProfilerOptions&) = default;
-  TestProfilerOptions(TestProfilerOptions&&) = default;
-};
-
-static inline std::ostream& operator<<(std::ostream& stream, const TestProfilerOptions& options) {
-  stream << "TestProfilerOptions {" << std::endl;
-
-#define PRINT_TO_STREAM(field) \
-  stream << #field << ": '" << options.field << "'" << std::endl;
-
-  PRINT_TO_STREAM(enabled_);
-  PRINT_TO_STREAM(output_file_name_);
-  PRINT_TO_STREAM(period_s_);
-  PRINT_TO_STREAM(duration_s_);
-  PRINT_TO_STREAM(interval_us_);
-  PRINT_TO_STREAM(backoff_coefficient_);
-  PRINT_TO_STREAM(start_immediately_);
-  PRINT_TO_STREAM(top_k_threshold_);
-  PRINT_TO_STREAM(top_k_change_threshold_);
-  PRINT_TO_STREAM(profile_type_);
-  PRINT_TO_STREAM(max_stack_depth_);
-
-  stream << "}";
-
-  return stream;
-#undef PRINT_TO_STREAM
-}
-
 template <>
-struct CmdlineType<TestProfilerOptions> : CmdlineTypeParser<TestProfilerOptions> {
-  using Result = CmdlineParseResult<TestProfilerOptions>;
+struct CmdlineType<ProfileSaverOptions> : CmdlineTypeParser<ProfileSaverOptions> {
+  using Result = CmdlineParseResult<ProfileSaverOptions>;
 
  private:
   using StringResult = CmdlineParseResult<std::string>;
   using DoubleResult = CmdlineParseResult<double>;
 
   template <typename T>
-  static Result ParseInto(TestProfilerOptions& options,
-                          T TestProfilerOptions::*pField,
+  static Result ParseInto(ProfileSaverOptions& options,
+                          T ProfileSaverOptions::*pField,
                           CmdlineParseResult<T>&& result) {
     assert(pField != nullptr);
 
@@ -714,36 +694,6 @@
     return Result::CastError(result);
   }
 
-  template <typename T>
-  static Result ParseIntoRangeCheck(TestProfilerOptions& options,
-                                    T TestProfilerOptions::*pField,
-                                    CmdlineParseResult<T>&& result,
-                                    T min,
-                                    T max) {
-    if (result.IsSuccess()) {
-      const T& value = result.GetValue();
-
-      if (value < min || value > max) {
-        CmdlineParseResult<T> out_of_range = CmdlineParseResult<T>::OutOfRange(value, min, max);
-        return Result::CastError(out_of_range);
-      }
-    }
-
-    return ParseInto(options, pField, std::forward<CmdlineParseResult<T>>(result));
-  }
-
-  static StringResult ParseStringAfterChar(const std::string& s, char c) {
-    std::string parsed_value;
-
-    std::string::size_type colon = s.find(c);
-    if (colon == std::string::npos) {
-      return StringResult::Usage(std::string() + "Missing char " + c + " in option " + s);
-    }
-    // Add one to remove the char we were trimming until.
-    parsed_value = s.substr(colon + 1);
-    return StringResult::Success(parsed_value);
-  }
-
   static std::string RemovePrefix(const std::string& source) {
     size_t prefix_idx = source.find(":");
 
@@ -755,87 +705,64 @@
   }
 
  public:
-  Result ParseAndAppend(const std::string& option, TestProfilerOptions& existing) {
+  Result ParseAndAppend(const std::string& option, ProfileSaverOptions& existing) {
     // Special case which doesn't include a wildcard argument definition.
     // We pass-it through as-is.
-    if (option == "-Xenable-profiler") {
+    if (option == "-Xjitsaveprofilinginfo") {
       existing.enabled_ = true;
       return Result::SuccessNoValue();
     }
 
-    // The rest of these options are always the wildcard from '-Xprofile-*'
+    // The rest of these options are always the wildcard from '-Xps-*'
     std::string suffix = RemovePrefix(option);
 
-    if (StartsWith(option, "filename:")) {
-      CmdlineType<std::string> type_parser;
-
-      return ParseInto(existing,
-                       &TestProfilerOptions::output_file_name_,
-                       type_parser.Parse(suffix));
-    } else if (StartsWith(option, "period:")) {
+    if (StartsWith(option, "min-save-period-ms:")) {
       CmdlineType<unsigned int> type_parser;
-
       return ParseInto(existing,
-                       &TestProfilerOptions::period_s_,
-                       type_parser.Parse(suffix));
-    } else if (StartsWith(option, "duration:")) {
+             &ProfileSaverOptions::min_save_period_ms_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "save-resolved-classes-delay-ms:")) {
       CmdlineType<unsigned int> type_parser;
-
       return ParseInto(existing,
-                       &TestProfilerOptions::duration_s_,
-                       type_parser.Parse(suffix));
-    } else if (StartsWith(option, "interval:")) {
+             &ProfileSaverOptions::save_resolved_classes_delay_ms_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "startup-method-samples:")) {
       CmdlineType<unsigned int> type_parser;
-
       return ParseInto(existing,
-                       &TestProfilerOptions::interval_us_,
-                       type_parser.Parse(suffix));
-    } else if (StartsWith(option, "backoff:")) {
-      CmdlineType<double> type_parser;
-
-      return ParseIntoRangeCheck(existing,
-                                 &TestProfilerOptions::backoff_coefficient_,
-                                 type_parser.Parse(suffix),
-                                 1.0,
-                                 10.0);
-
-    } else if (option == "start-immediately") {
-      existing.start_immediately_ = true;
-      return Result::SuccessNoValue();
-    } else if (StartsWith(option, "top-k-threshold:")) {
-      CmdlineType<double> type_parser;
-
-      return ParseIntoRangeCheck(existing,
-                                 &TestProfilerOptions::top_k_threshold_,
-                                 type_parser.Parse(suffix),
-                                 0.0,
-                                 100.0);
-    } else if (StartsWith(option, "top-k-change-threshold:")) {
-      CmdlineType<double> type_parser;
-
-      return ParseIntoRangeCheck(existing,
-                                 &TestProfilerOptions::top_k_change_threshold_,
-                                 type_parser.Parse(suffix),
-                                 0.0,
-                                 100.0);
-    } else if (option == "type:method") {
-      existing.profile_type_ = kProfilerMethod;
-      return Result::SuccessNoValue();
-    } else if (option == "type:stack") {
-      existing.profile_type_ = kProfilerBoundedStack;
-      return Result::SuccessNoValue();
-    } else if (StartsWith(option, "max-stack-depth:")) {
+             &ProfileSaverOptions::startup_method_samples_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "min-methods-to-save:")) {
       CmdlineType<unsigned int> type_parser;
-
       return ParseInto(existing,
-                       &TestProfilerOptions::max_stack_depth_,
-                       type_parser.Parse(suffix));
+             &ProfileSaverOptions::min_methods_to_save_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "min-classes-to-save:")) {
+      CmdlineType<unsigned int> type_parser;
+      return ParseInto(existing,
+             &ProfileSaverOptions::min_classes_to_save_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "min-notification-before-wake:")) {
+      CmdlineType<unsigned int> type_parser;
+      return ParseInto(existing,
+             &ProfileSaverOptions::min_notification_before_wake_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "max-notification-before-wake:")) {
+      CmdlineType<unsigned int> type_parser;
+      return ParseInto(existing,
+             &ProfileSaverOptions::max_notification_before_wake_,
+             type_parser.Parse(suffix));
     } else {
       return Result::Failure(std::string("Invalid suboption '") + option + "'");
     }
   }
 
-  static const char* Name() { return "TestProfilerOptions"; }
+  static const char* Name() { return "ProfileSaverOptions"; }
   static constexpr bool kCanParseBlankless = true;
 };
 
@@ -843,11 +770,11 @@
 struct CmdlineType<ExperimentalFlags> : CmdlineTypeParser<ExperimentalFlags> {
   Result ParseAndAppend(const std::string& option, ExperimentalFlags& existing) {
     if (option == "none") {
-      existing = existing | ExperimentalFlags::kNone;
-    } else if (option == "lambdas") {
-      existing = existing | ExperimentalFlags::kLambdas;
-    } else if (option == "default-methods") {
-      existing = existing | ExperimentalFlags::kDefaultMethods;
+      existing = ExperimentalFlags::kNone;
+    } else if (option == "agents") {
+      existing = existing | ExperimentalFlags::kAgents;
+    } else if (option == "runtime-plugins") {
+      existing = existing | ExperimentalFlags::kRuntimePlugins;
     } else {
       return Result::Failure(std::string("Unknown option '") + option + "'");
     }
diff --git a/cmdline/detail/cmdline_parse_argument_detail.h b/cmdline/detail/cmdline_parse_argument_detail.h
index 3009b32..84beff5 100644
--- a/cmdline/detail/cmdline_parse_argument_detail.h
+++ b/cmdline/detail/cmdline_parse_argument_detail.h
@@ -25,10 +25,10 @@
 #include <numeric>
 #include <memory>
 
-#include "cmdline/cmdline_parse_result.h"
-#include "cmdline/token_range.h"
-#include "cmdline/unit.h"
-#include "cmdline/cmdline_types.h"
+#include "cmdline_parse_result.h"
+#include "cmdline_types.h"
+#include "token_range.h"
+#include "unit.h"
 
 namespace art {
   // Implementation details for the parser. Do not look inside if you hate templates.
@@ -497,7 +497,7 @@
       std::function<void(TArg&)> save_argument_;
       std::function<TArg&(void)> load_argument_;
     };
-  } // namespace detail // NOLINT [readability/namespace] [5] [whitespace/comments] [2]
+  }  // namespace detail  // NOLINT [readability/namespace] [5]
 }  // namespace art
 
 #endif  // ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
diff --git a/cmdline/detail/cmdline_parser_detail.h b/cmdline/detail/cmdline_parser_detail.h
index 9b43bb0..24dbca2 100644
--- a/cmdline/detail/cmdline_parser_detail.h
+++ b/cmdline/detail/cmdline_parser_detail.h
@@ -35,7 +35,7 @@
      private:
       template <typename TStream, typename T>
       static std::true_type InsertionOperatorTest(TStream& os, const T& value,
-                                                  std::remove_reference<decltype(os << value)>* = 0); // NOLINT [whitespace/operators] [3]
+                                                  std::remove_reference<decltype(os << value)>* = 0);  // NOLINT [whitespace/operators] [3]
 
       template <typename TStream, typename ... T>
       static std::false_type InsertionOperatorTest(TStream& os, const T& ... args);
@@ -53,7 +53,7 @@
      private:
       template <typename TL, typename TR>
       static std::true_type EqualityOperatorTest(const TL& left, const TR& right,
-                                                 std::remove_reference<decltype(left == right)>* = 0); // NOLINT [whitespace/operators] [3]
+                                                 std::remove_reference<decltype(left == right)>* = 0);  // NOLINT [whitespace/operators] [3]
 
       template <typename TL, typename ... T>
       static std::false_type EqualityOperatorTest(const TL& left, const T& ... args);
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 17f9d12..16c6a7b 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -20,53 +20,31 @@
 
 LIBART_COMPILER_SRC_FILES := \
 	compiled_method.cc \
-	dex/global_value_numbering.cc \
-	dex/gvn_dead_code_elimination.cc \
-	dex/local_value_numbering.cc \
-	dex/type_inference.cc \
-	dex/quick/codegen_util.cc \
-	dex/quick/dex_file_method_inliner.cc \
-	dex/quick/dex_file_to_method_inliner_map.cc \
-	dex/quick/gen_common.cc \
-	dex/quick/gen_invoke.cc \
-	dex/quick/gen_loadstore.cc \
-	dex/quick/lazy_debug_frame_opcode_writer.cc \
-	dex/quick/local_optimizations.cc \
-	dex/quick/mir_to_lir.cc \
-	dex/quick/quick_compiler.cc \
-	dex/quick/ralloc_util.cc \
-	dex/quick/resource_mask.cc \
+	debug/elf_debug_writer.cc \
 	dex/dex_to_dex_compiler.cc \
-	dex/bb_optimizations.cc \
-	dex/compiler_ir.cc \
-	dex/mir_analysis.cc \
-	dex/mir_dataflow.cc \
-	dex/mir_field_info.cc \
-	dex/mir_graph.cc \
-	dex/mir_method_info.cc \
-	dex/mir_optimization.cc \
-	dex/post_opt_passes.cc \
-	dex/pass_driver_me_opts.cc \
-	dex/pass_driver_me_post_opt.cc \
-	dex/pass_manager.cc \
-	dex/ssa_transformation.cc \
 	dex/verified_method.cc \
 	dex/verification_results.cc \
-	dex/vreg_analysis.cc \
 	dex/quick_compiler_callbacks.cc \
+	dex/quick/dex_file_method_inliner.cc \
+	dex/quick/dex_file_to_method_inliner_map.cc \
+	driver/compiled_method_storage.cc \
 	driver/compiler_driver.cc \
 	driver/compiler_options.cc \
 	driver/dex_compilation_unit.cc \
+	linker/buffered_output_stream.cc \
+	linker/file_output_stream.cc \
+	linker/multi_oat_relative_patcher.cc \
+	linker/output_stream.cc \
+	linker/vector_output_stream.cc \
 	linker/relative_patcher.cc \
 	jit/jit_compiler.cc \
 	jni/quick/calling_convention.cc \
 	jni/quick/jni_compiler.cc \
-	optimizing/boolean_simplifier.cc \
+	optimizing/block_builder.cc \
 	optimizing/bounds_check_elimination.cc \
 	optimizing/builder.cc \
 	optimizing/code_generator.cc \
 	optimizing/code_generator_utils.cc \
-	optimizing/constant_area_fixups_x86.cc \
 	optimizing/constant_folding.cc \
 	optimizing/dead_code_elimination.cc \
 	optimizing/graph_checker.cc \
@@ -75,6 +53,7 @@
 	optimizing/induction_var_analysis.cc \
 	optimizing/induction_var_range.cc \
 	optimizing/inliner.cc \
+	optimizing/instruction_builder.cc \
 	optimizing/instruction_simplifier.cc \
 	optimizing/intrinsics.cc \
 	optimizing/licm.cc \
@@ -85,9 +64,13 @@
 	optimizing/optimizing_compiler.cc \
 	optimizing/parallel_move_resolver.cc \
 	optimizing/prepare_for_register_allocation.cc \
-	optimizing/primitive_type_propagation.cc \
 	optimizing/reference_type_propagation.cc \
+	optimizing/register_allocation_resolver.cc \
 	optimizing/register_allocator.cc \
+	optimizing/register_allocator_graph_color.cc \
+	optimizing/register_allocator_linear_scan.cc \
+	optimizing/select_generator.cc \
+	optimizing/sharpening.cc \
 	optimizing/side_effects_analysis.cc \
 	optimizing/ssa_builder.cc \
 	optimizing/ssa_liveness_analysis.cc \
@@ -95,33 +78,27 @@
 	optimizing/stack_map_stream.cc \
 	trampolines/trampoline_compiler.cc \
 	utils/assembler.cc \
+	utils/jni_macro_assembler.cc \
 	utils/swap_space.cc \
-	buffered_output_stream.cc \
 	compiler.cc \
 	elf_writer.cc \
-	elf_writer_debug.cc \
 	elf_writer_quick.cc \
-	file_output_stream.cc \
 	image_writer.cc \
-	oat_writer.cc \
-	output_stream.cc \
-	vector_output_stream.cc
+	oat_writer.cc
 
 LIBART_COMPILER_SRC_FILES_arm := \
-	dex/quick/arm/assemble_arm.cc \
-	dex/quick/arm/call_arm.cc \
-	dex/quick/arm/fp_arm.cc \
-	dex/quick/arm/int_arm.cc \
-	dex/quick/arm/target_arm.cc \
-	dex/quick/arm/utility_arm.cc \
 	jni/quick/arm/calling_convention_arm.cc \
 	linker/arm/relative_patcher_arm_base.cc \
 	linker/arm/relative_patcher_thumb2.cc \
 	optimizing/code_generator_arm.cc \
+	optimizing/dex_cache_array_fixups_arm.cc \
+	optimizing/instruction_simplifier_arm.cc \
+	optimizing/instruction_simplifier_shared.cc \
 	optimizing/intrinsics_arm.cc \
 	utils/arm/assembler_arm.cc \
 	utils/arm/assembler_arm32.cc \
 	utils/arm/assembler_thumb2.cc \
+	utils/arm/jni_macro_assembler_arm.cc \
 	utils/arm/managed_register_arm.cc \
 
 # TODO We should really separate out those files that are actually needed for both variants of an
@@ -130,29 +107,24 @@
 # 32bit one.
 LIBART_COMPILER_SRC_FILES_arm64 := \
     $(LIBART_COMPILER_SRC_FILES_arm) \
-	dex/quick/arm64/assemble_arm64.cc \
-	dex/quick/arm64/call_arm64.cc \
-	dex/quick/arm64/fp_arm64.cc \
-	dex/quick/arm64/int_arm64.cc \
-	dex/quick/arm64/target_arm64.cc \
-	dex/quick/arm64/utility_arm64.cc \
 	jni/quick/arm64/calling_convention_arm64.cc \
 	linker/arm64/relative_patcher_arm64.cc \
+	optimizing/nodes_arm64.cc \
 	optimizing/code_generator_arm64.cc \
 	optimizing/instruction_simplifier_arm64.cc \
+	optimizing/instruction_simplifier_shared.cc \
 	optimizing/intrinsics_arm64.cc \
 	utils/arm64/assembler_arm64.cc \
+	utils/arm64/jni_macro_assembler_arm64.cc \
 	utils/arm64/managed_register_arm64.cc \
 
 LIBART_COMPILER_SRC_FILES_mips := \
-	dex/quick/mips/assemble_mips.cc \
-	dex/quick/mips/call_mips.cc \
-	dex/quick/mips/fp_mips.cc \
-	dex/quick/mips/int_mips.cc \
-	dex/quick/mips/target_mips.cc \
-	dex/quick/mips/utility_mips.cc \
 	jni/quick/mips/calling_convention_mips.cc \
+	linker/mips/relative_patcher_mips.cc \
 	optimizing/code_generator_mips.cc \
+	optimizing/dex_cache_array_fixups_mips.cc \
+	optimizing/intrinsics_mips.cc \
+	optimizing/pc_relative_fixups_mips.cc \
 	utils/mips/assembler_mips.cc \
 	utils/mips/managed_register_mips.cc \
 
@@ -166,18 +138,15 @@
 
 
 LIBART_COMPILER_SRC_FILES_x86 := \
-	dex/quick/x86/assemble_x86.cc \
-	dex/quick/x86/call_x86.cc \
-	dex/quick/x86/fp_x86.cc \
-	dex/quick/x86/int_x86.cc \
-	dex/quick/x86/target_x86.cc \
-	dex/quick/x86/utility_x86.cc \
 	jni/quick/x86/calling_convention_x86.cc \
 	linker/x86/relative_patcher_x86.cc \
 	linker/x86/relative_patcher_x86_base.cc \
 	optimizing/code_generator_x86.cc \
 	optimizing/intrinsics_x86.cc \
+	optimizing/pc_relative_fixups_x86.cc \
+	optimizing/x86_memory_gen.cc \
 	utils/x86/assembler_x86.cc \
+	utils/x86/jni_macro_assembler_x86.cc \
 	utils/x86/managed_register_x86.cc \
 
 LIBART_COMPILER_SRC_FILES_x86_64 := \
@@ -187,36 +156,32 @@
 	optimizing/intrinsics_x86_64.cc \
 	optimizing/code_generator_x86_64.cc \
 	utils/x86_64/assembler_x86_64.cc \
+	utils/x86_64/jni_macro_assembler_x86_64.cc \
 	utils/x86_64/managed_register_x86_64.cc \
 
 
 LIBART_COMPILER_CFLAGS :=
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES := \
-  dex/quick/resource_mask.h \
-  dex/compiler_enums.h \
+  compiled_method.h \
   dex/dex_to_dex_compiler.h \
-  dex/global_value_numbering.h \
-  dex/pass_me.h \
   driver/compiler_driver.h \
   driver/compiler_options.h \
   image_writer.h \
   optimizing/locations.h
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_arm := \
-  dex/quick/arm/arm_lir.h \
   utils/arm/constants_arm.h
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_arm64 := \
-  $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_arm) \
-  dex/quick/arm64/arm64_lir.h
+  $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_arm)
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips := \
-  dex/quick/mips/mips_lir.h \
   utils/mips/assembler_mips.h
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips64 := \
-  $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips)
+  $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips) \
+  utils/mips64/assembler_mips64.h
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_x86 :=
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_x86_64 := \
@@ -252,9 +217,9 @@
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := libart-compiler
     ifeq ($$(art_static_or_shared), static)
-      LOCAL_STATIC_LIBRARIES += libart
+      LOCAL_STATIC_LIBRARIES += libart liblz4 liblzma
     else
-      LOCAL_SHARED_LIBRARIES += libart
+      LOCAL_SHARED_LIBRARIES += libart liblz4 liblzma
     endif
     ifeq ($$(art_target_or_host),target)
       LOCAL_FDO_SUPPORT := true
@@ -262,9 +227,9 @@
   else # debug
     LOCAL_MODULE := libartd-compiler
     ifeq ($$(art_static_or_shared), static)
-      LOCAL_STATIC_LIBRARIES += libartd
+      LOCAL_STATIC_LIBRARIES += libartd liblz4 liblzma
     else
-      LOCAL_SHARED_LIBRARIES += libartd
+      LOCAL_SHARED_LIBRARIES += libartd liblz4 liblzma
     endif
   endif
 
@@ -294,20 +259,21 @@
 
   LOCAL_CFLAGS := $$(LIBART_COMPILER_CFLAGS)
   ifeq ($$(art_target_or_host),target)
-    $(call set-target-local-clang-vars)
+    LOCAL_CLANG := $(ART_TARGET_CLANG)
     $(call set-target-local-cflags-vars,$(2))
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
     LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
-    LOCAL_LDLIBS := $(ART_HOST_LDLIBS)
     ifeq ($$(art_static_or_shared),static)
       LOCAL_LDFLAGS += -static
     endif
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
   endif
 
@@ -319,18 +285,18 @@
   endif
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
-  # Vixl assembly support for ARM64 targets.
+  # VIXL assembly support for ARM64 targets.
   ifeq ($$(art_ndebug_or_debug),debug)
     ifeq ($$(art_static_or_shared), static)
-      LOCAL_WHOLESTATIC_LIBRARIES += libvixld
+      LOCAL_WHOLESTATIC_LIBRARIES += libvixld-arm64
     else
-      LOCAL_SHARED_LIBRARIES += libvixld
+      LOCAL_SHARED_LIBRARIES += libvixld-arm64
     endif
   else
     ifeq ($$(art_static_or_shared), static)
-      LOCAL_WHOLE_STATIC_LIBRARIES += libvixl
+      LOCAL_WHOLE_STATIC_LIBRARIES += libvixl-arm64
     else
-      LOCAL_SHARED_LIBRARIES += libvixl
+      LOCAL_SHARED_LIBRARIES += libvixl-arm64
     endif
   endif
 
@@ -349,28 +315,6 @@
     endif
   endif
 
-  ifeq ($$(art_target_or_host),target)
-    ifeq ($$(art_ndebug_or_debug),debug)
-      $(TARGET_OUT_EXECUTABLES)/dex2oatd: $$(LOCAL_INSTALLED_MODULE)
-    else
-      $(TARGET_OUT_EXECUTABLES)/dex2oat: $$(LOCAL_INSTALLED_MODULE)
-    endif
-  else # host
-    ifeq ($$(art_ndebug_or_debug),debug)
-      ifeq ($$(art_static_or_shared),static)
-        $(HOST_OUT_EXECUTABLES)/dex2oatds: $$(LOCAL_INSTALLED_MODULE)
-      else
-        $(HOST_OUT_EXECUTABLES)/dex2oatd: $$(LOCAL_INSTALLED_MODULE)
-      endif
-    else
-      ifeq ($$(art_static_or_shared),static)
-        $(HOST_OUT_EXECUTABLES)/dex2oats: $$(LOCAL_INSTALLED_MODULE)
-      else
-        $(HOST_OUT_EXECUTABLES)/dex2oat: $$(LOCAL_INSTALLED_MODULE)
-      endif
-    endif
-  endif
-
   # Clear locally defined variables.
   art_target_or_host :=
   art_ndebug_or_debug :=
diff --git a/compiler/buffered_output_stream.cc b/compiler/buffered_output_stream.cc
deleted file mode 100644
index 0940a80..0000000
--- a/compiler/buffered_output_stream.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "buffered_output_stream.h"
-
-#include <string.h>
-
-namespace art {
-
-BufferedOutputStream::BufferedOutputStream(OutputStream* out)
-    : OutputStream(out->GetLocation()), out_(out), used_(0) {}
-
-bool BufferedOutputStream::WriteFully(const void* buffer, size_t byte_count) {
-  if (byte_count > kBufferSize) {
-    Flush();
-    return out_->WriteFully(buffer, byte_count);
-  }
-  if (used_ + byte_count > kBufferSize) {
-    bool success = Flush();
-    if (!success) {
-      return false;
-    }
-  }
-  const uint8_t* src = reinterpret_cast<const uint8_t*>(buffer);
-  memcpy(&buffer_[used_], src, byte_count);
-  used_ += byte_count;
-  return true;
-}
-
-bool BufferedOutputStream::Flush() {
-  bool success = true;
-  if (used_ > 0) {
-    success = out_->WriteFully(&buffer_[0], used_);
-    used_ = 0;
-  }
-  return success;
-}
-
-off_t BufferedOutputStream::Seek(off_t offset, Whence whence) {
-  if (!Flush()) {
-    return -1;
-  }
-  return out_->Seek(offset, whence);
-}
-
-}  // namespace art
diff --git a/compiler/buffered_output_stream.h b/compiler/buffered_output_stream.h
deleted file mode 100644
index 15fc033..0000000
--- a/compiler/buffered_output_stream.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
-#define ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
-
-#include "output_stream.h"
-
-#include "globals.h"
-
-namespace art {
-
-class BufferedOutputStream FINAL : public OutputStream {
- public:
-  explicit BufferedOutputStream(OutputStream* out);
-
-  virtual ~BufferedOutputStream() {
-    Flush();
-    delete out_;
-  }
-
-  virtual bool WriteFully(const void* buffer, size_t byte_count);
-
-  virtual off_t Seek(off_t offset, Whence whence);
-
- private:
-  static const size_t kBufferSize = 8 * KB;
-
-  bool Flush();
-
-  OutputStream* const out_;
-
-  uint8_t buffer_[kBufferSize];
-
-  size_t used_;
-
-  DISALLOW_COPY_AND_ASSIGN(BufferedOutputStream);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h
index 5e345db..c754e55 100644
--- a/compiler/cfi_test.h
+++ b/compiler/cfi_test.h
@@ -22,11 +22,13 @@
 #include <sstream>
 
 #include "arch/instruction_set.h"
-#include "dwarf/dwarf_constants.h"
-#include "dwarf/dwarf_test.h"
-#include "dwarf/headers.h"
+#include "base/enums.h"
+#include "debug/dwarf/dwarf_constants.h"
+#include "debug/dwarf/dwarf_test.h"
+#include "debug/dwarf/headers.h"
 #include "disassembler/disassembler.h"
 #include "gtest/gtest.h"
+#include "thread.h"
 
 namespace art {
 
@@ -48,14 +50,22 @@
     // Pretty-print CFI opcodes.
     constexpr bool is64bit = false;
     dwarf::DebugFrameOpCodeWriter<> initial_opcodes;
-    dwarf::WriteDebugFrameCIE(is64bit, dwarf::DW_EH_PE_absptr, dwarf::Reg(8),
-                              initial_opcodes, kCFIFormat, &debug_frame_data_);
+    dwarf::WriteCIE(is64bit, dwarf::Reg(8),
+                    initial_opcodes, kCFIFormat, &debug_frame_data_);
     std::vector<uintptr_t> debug_frame_patches;
-    dwarf::WriteDebugFrameFDE(is64bit, 0, 0, actual_asm.size(), &actual_cfi,
-                              kCFIFormat, &debug_frame_data_, &debug_frame_patches);
+    dwarf::WriteFDE(is64bit, 0, 0, 0, actual_asm.size(), ArrayRef<const uint8_t>(actual_cfi),
+                    kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches);
     ReformatCfi(Objdump(false, "-W"), &lines);
     // Pretty-print assembly.
-    auto* opts = new DisassemblerOptions(false, actual_asm.data(), true);
+    const uint8_t* asm_base = actual_asm.data();
+    const uint8_t* asm_end = asm_base + actual_asm.size();
+    auto* opts = new DisassemblerOptions(false,
+                                         asm_base,
+                                         asm_end,
+                                         true,
+                                         is64bit
+                                             ? &Thread::DumpThreadOffset<PointerSize::k64>
+                                             : &Thread::DumpThreadOffset<PointerSize::k32>);
     std::unique_ptr<Disassembler> disasm(Disassembler::Create(isa, opts));
     std::stringstream stream;
     const uint8_t* base = actual_asm.data() + (isa == kThumb2 ? 1 : 0);
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 58a2f96..06a39b2 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -19,9 +19,9 @@
 #include "arch/instruction_set_features.h"
 #include "art_field-inl.h"
 #include "art_method.h"
+#include "base/enums.h"
 #include "class_linker.h"
 #include "compiled_method.h"
-#include "dex/pass_manager.h"
 #include "dex/quick_compiler_callbacks.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/verification_results.h"
@@ -54,48 +54,37 @@
                                                             method->GetDexMethodIndex()));
   }
   if (compiled_method != nullptr) {
-    const SwapVector<uint8_t>* code = compiled_method->GetQuickCode();
-    uint32_t code_size = code->size();
+    ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
+    uint32_t code_size = code.size();
     CHECK_NE(0u, code_size);
-    const SwapVector<uint8_t>* vmap_table = compiled_method->GetVmapTable();
-    uint32_t vmap_table_offset = vmap_table->empty() ? 0u
-        : sizeof(OatQuickMethodHeader) + vmap_table->size();
-    const SwapVector<uint8_t>* mapping_table = compiled_method->GetMappingTable();
-    bool mapping_table_used = mapping_table != nullptr && !mapping_table->empty();
-    size_t mapping_table_size = mapping_table_used ? mapping_table->size() : 0U;
-    uint32_t mapping_table_offset = !mapping_table_used ? 0u
-        : sizeof(OatQuickMethodHeader) + vmap_table->size() + mapping_table_size;
-    const SwapVector<uint8_t>* gc_map = compiled_method->GetGcMap();
-    bool gc_map_used = gc_map != nullptr && !gc_map->empty();
-    size_t gc_map_size = gc_map_used ? gc_map->size() : 0U;
-    uint32_t gc_map_offset = !gc_map_used ? 0u
-        : sizeof(OatQuickMethodHeader) + vmap_table->size() + mapping_table_size + gc_map_size;
-    OatQuickMethodHeader method_header(mapping_table_offset, vmap_table_offset, gc_map_offset,
+    ArrayRef<const uint8_t> vmap_table = compiled_method->GetVmapTable();
+    uint32_t vmap_table_offset = vmap_table.empty() ? 0u
+        : sizeof(OatQuickMethodHeader) + vmap_table.size();
+    OatQuickMethodHeader method_header(vmap_table_offset,
                                        compiled_method->GetFrameSizeInBytes(),
                                        compiled_method->GetCoreSpillMask(),
-                                       compiled_method->GetFpSpillMask(), code_size);
+                                       compiled_method->GetFpSpillMask(),
+                                       code_size);
 
     header_code_and_maps_chunks_.push_back(std::vector<uint8_t>());
     std::vector<uint8_t>* chunk = &header_code_and_maps_chunks_.back();
-    size_t size = sizeof(method_header) + code_size + vmap_table->size() + mapping_table_size +
-        gc_map_size;
-    size_t code_offset = compiled_method->AlignCode(size - code_size);
-    size_t padding = code_offset - (size - code_size);
-    chunk->reserve(padding + size);
+    const size_t max_padding = GetInstructionSetAlignment(compiled_method->GetInstructionSet());
+    const size_t size = vmap_table.size() + sizeof(method_header) + code_size;
+    chunk->reserve(size + max_padding);
     chunk->resize(sizeof(method_header));
     memcpy(&(*chunk)[0], &method_header, sizeof(method_header));
-    chunk->insert(chunk->begin(), vmap_table->begin(), vmap_table->end());
-    if (mapping_table_used) {
-      chunk->insert(chunk->begin(), mapping_table->begin(), mapping_table->end());
-    }
-    if (gc_map_used) {
-      chunk->insert(chunk->begin(), gc_map->begin(), gc_map->end());
-    }
+    chunk->insert(chunk->begin(), vmap_table.begin(), vmap_table.end());
+    chunk->insert(chunk->end(), code.begin(), code.end());
+    CHECK_EQ(chunk->size(), size);
+    const void* unaligned_code_ptr = chunk->data() + (size - code_size);
+    size_t offset = dchecked_integral_cast<size_t>(reinterpret_cast<uintptr_t>(unaligned_code_ptr));
+    size_t padding = compiled_method->AlignCode(offset) - offset;
+    // Make sure no resizing takes place.
+    CHECK_GE(chunk->capacity(), chunk->size() + padding);
     chunk->insert(chunk->begin(), padding, 0);
-    chunk->insert(chunk->end(), code->begin(), code->end());
-    CHECK_EQ(padding + size, chunk->size());
-    const void* code_ptr = &(*chunk)[code_offset];
-    MakeExecutable(code_ptr, code->size());
+    const void* code_ptr = reinterpret_cast<const uint8_t*>(unaligned_code_ptr) + padding;
+    CHECK_EQ(code_ptr, static_cast<const void*>(chunk->data() + (chunk->size() - code_size)));
+    MakeExecutable(code_ptr, code.size());
     const void* method_code = CompiledMethod::CodePointer(code_ptr,
                                                           compiled_method->GetInstructionSet());
     LOG(INFO) << "MakeExecutable " << PrettyMethod(method) << " code=" << method_code;
@@ -117,16 +106,7 @@
   int result = mprotect(reinterpret_cast<void*>(base), len, PROT_READ | PROT_WRITE | PROT_EXEC);
   CHECK_EQ(result, 0);
 
-  // Flush instruction cache
-  // Only uses __builtin___clear_cache if GCC >= 4.3.3
-#if GCC_VERSION >= 40303
-  __builtin___clear_cache(reinterpret_cast<void*>(base), reinterpret_cast<void*>(base + len));
-#else
-  // Only warn if not Intel as Intel doesn't have cache flush instructions.
-#if !defined(__i386__) && !defined(__x86_64__)
-  UNIMPLEMENTED(WARNING) << "cache flush";
-#endif
-#endif
+  FlushInstructionCache(reinterpret_cast<char*>(base), reinterpret_cast<char*>(base + len));
 }
 
 void CommonCompilerTest::MakeExecutable(mirror::ClassLoader* class_loader, const char* class_name) {
@@ -136,11 +116,8 @@
   Handle<mirror::ClassLoader> loader(hs.NewHandle(class_loader));
   mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader);
   CHECK(klass != nullptr) << "Class not found " << class_name;
-  size_t pointer_size = class_linker_->GetImagePointerSize();
-  for (auto& m : klass->GetDirectMethods(pointer_size)) {
-    MakeExecutable(&m);
-  }
-  for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+  PointerSize pointer_size = class_linker_->GetImagePointerSize();
+  for (auto& m : klass->GetMethods(pointer_size)) {
     MakeExecutable(&m);
   }
 }
@@ -166,6 +143,12 @@
   return nullptr;
 }
 
+// Get ProfileCompilationInfo that should be passed to the driver.
+ProfileCompilationInfo* CommonCompilerTest::GetProfileCompilationInfo() {
+  // Null, profile information will not be taken into account.
+  return nullptr;
+}
+
 void CommonCompilerTest::SetUp() {
   CommonRuntimeTest::SetUp();
   {
@@ -184,17 +167,30 @@
     }
 
     timer_.reset(new CumulativeLogger("Compilation times"));
-    compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
-                                              verification_results_.get(),
-                                              method_inliner_map_.get(),
-                                              compiler_kind_, instruction_set,
-                                              instruction_set_features_.get(),
-                                              true,
-                                              GetImageClasses(),
-                                              GetCompiledClasses(),
-                                              GetCompiledMethods(),
-                                              2, true, true, "", false, timer_.get(), -1, ""));
+    CreateCompilerDriver(compiler_kind_, instruction_set);
   }
+}
+
+void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind,
+                                              InstructionSet isa,
+                                              size_t number_of_threads) {
+  compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
+                                            verification_results_.get(),
+                                            method_inliner_map_.get(),
+                                            kind,
+                                            isa,
+                                            instruction_set_features_.get(),
+                                            /* boot_image */ true,
+                                            /* app_image */ false,
+                                            GetImageClasses(),
+                                            GetCompiledClasses(),
+                                            GetCompiledMethods(),
+                                            number_of_threads,
+                                            /* dump_stats */ true,
+                                            /* dump_passes */ true,
+                                            timer_.get(),
+                                            /* swap_fd */ -1,
+                                            GetProfileCompilationInfo()));
   // We typically don't generate an image in unit tests, disable this optimization by default.
   compiler_driver_->SetSupportBootImageFixup(false);
 }
@@ -218,6 +214,11 @@
   compiler_kind_ = compiler_kind;
 }
 
+InstructionSet CommonCompilerTest::GetInstructionSet() const {
+  DCHECK(compiler_driver_.get() != nullptr);
+  return compiler_driver_->GetInstructionSet();
+}
+
 void CommonCompilerTest::TearDown() {
   timer_.reset();
   compiler_driver_.reset();
@@ -237,10 +238,7 @@
   mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader);
   CHECK(klass != nullptr) << "Class not found " << class_name;
   auto pointer_size = class_linker_->GetImagePointerSize();
-  for (auto& m : klass->GetDirectMethods(pointer_size)) {
-    CompileMethod(&m);
-  }
-  for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+  for (auto& m : klass->GetMethods(pointer_size)) {
     CompileMethod(&m);
   }
 }
@@ -289,7 +287,7 @@
   MemMap::Init();
   image_reservation_.reset(MemMap::MapAnonymous("image reservation",
                                                 reinterpret_cast<uint8_t*>(ART_BASE_ADDRESS),
-                                                (size_t)100 * 1024 * 1024,  // 100MB
+                                                (size_t)120 * 1024 * 1024,  // 120MB
                                                 PROT_NONE,
                                                 false /* no need for 4gb flag with fixed mmap*/,
                                                 false /* not reusing existing reservation */,
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 67b4428..c942375 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -23,6 +23,7 @@
 
 #include "common_runtime_test.h"
 #include "compiler.h"
+#include "jit/offline_profiling_info.h"
 #include "oat_file.h"
 
 namespace art {
@@ -61,6 +62,8 @@
   Compiler::Kind GetCompilerKind() const;
   void SetCompilerKind(Compiler::Kind compiler_kind);
 
+  InstructionSet GetInstructionSet() const;
+
   // Get the set of image classes given to the compiler-driver in SetUp. Note: the compiler
   // driver assumes ownership of the set, so the test should properly release the set.
   virtual std::unordered_set<std::string>* GetImageClasses();
@@ -73,6 +76,8 @@
   // driver assumes ownership of the set, so the test should properly release the set.
   virtual std::unordered_set<std::string>* GetCompiledMethods();
 
+  virtual ProfileCompilationInfo* GetProfileCompilationInfo();
+
   virtual void TearDown();
 
   void CompileClass(mirror::ClassLoader* class_loader, const char* class_name)
@@ -88,6 +93,8 @@
                             const char* method_name, const char* signature)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  void CreateCompilerDriver(Compiler::Kind kind, InstructionSet isa, size_t number_of_threads = 2U);
+
   void ReserveImageSpace();
 
   void UnreserveImageSpace();
@@ -108,18 +115,23 @@
   std::list<std::vector<uint8_t>> header_code_and_maps_chunks_;
 };
 
-// TODO: When heap reference poisoning works with all compilers in use, get rid of this.
-#define TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK() \
-  if (kPoisonHeapReferences && GetCompilerKind() == Compiler::kQuick) { \
-    printf("WARNING: TEST DISABLED FOR HEAP REFERENCE POISONING WITH QUICK\n"); \
-    return; \
-  }
-
-// TODO: When non-PIC works with all compilers in use, get rid of this.
-#define TEST_DISABLED_FOR_NON_PIC_COMPILING_WITH_OPTIMIZING() \
-  if (GetCompilerKind() == Compiler::kOptimizing) { \
-    printf("WARNING: TEST DISABLED FOR NON-PIC COMPILING WITH OPTIMIZING\n"); \
-    return; \
+// TODO: When read barrier works with all Optimizing back ends, get rid of this.
+#define TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS() \
+  if (kUseReadBarrier && GetCompilerKind() == Compiler::kOptimizing) {                    \
+    switch (GetInstructionSet()) {                                                        \
+      case kArm64:                                                                        \
+      case kThumb2:                                                                       \
+      case kX86:                                                                          \
+      case kX86_64:                                                                       \
+        /* Instruction set has read barrier support. */                                   \
+        break;                                                                            \
+                                                                                          \
+      default:                                                                            \
+        /* Instruction set does not have barrier support. */                              \
+        printf("WARNING: TEST DISABLED FOR READ BARRIER WITH OPTIMIZING "                 \
+               "FOR THIS INSTRUCTION SET\n");                                             \
+        return;                                                                           \
+    }                                                                                     \
   }
 
 }  // namespace art
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 74ef35e..f06d90c 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -15,27 +15,22 @@
  */
 
 #include "compiled_method.h"
+
+#include "driver/compiled_method_storage.h"
 #include "driver/compiler_driver.h"
+#include "utils/swap_space.h"
 
 namespace art {
 
 CompiledCode::CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
-                           const ArrayRef<const uint8_t>& quick_code, bool owns_code_array)
-    : compiler_driver_(compiler_driver), instruction_set_(instruction_set),
-      owns_code_array_(owns_code_array), quick_code_(nullptr) {
-  if (owns_code_array_) {
-    // If we are supposed to own the code, don't deduplicate it.
-    quick_code_ = new SwapVector<uint8_t>(quick_code.begin(), quick_code.end(),
-                                          compiler_driver_->GetSwapSpaceAllocator());
-  } else {
-    quick_code_ = compiler_driver_->DeduplicateCode(quick_code);
-  }
+                           const ArrayRef<const uint8_t>& quick_code)
+    : compiler_driver_(compiler_driver),
+      instruction_set_(instruction_set),
+      quick_code_(compiler_driver_->GetCompiledMethodStorage()->DeduplicateCode(quick_code)) {
 }
 
 CompiledCode::~CompiledCode() {
-  if (owns_code_array_) {
-    delete quick_code_;
-  }
+  compiler_driver_->GetCompiledMethodStorage()->ReleaseCode(quick_code_);
 }
 
 bool CompiledCode::operator==(const CompiledCode& rhs) const {
@@ -104,59 +99,24 @@
   }
 }
 
-const std::vector<uint32_t>& CompiledCode::GetOatdataOffsetsToCompliledCodeOffset() const {
-  CHECK_NE(0U, oatdata_offsets_to_compiled_code_offset_.size());
-  return oatdata_offsets_to_compiled_code_offset_;
-}
-
-void CompiledCode::AddOatdataOffsetToCompliledCodeOffset(uint32_t offset) {
-  oatdata_offsets_to_compiled_code_offset_.push_back(offset);
-}
-
 CompiledMethod::CompiledMethod(CompilerDriver* driver,
                                InstructionSet instruction_set,
                                const ArrayRef<const uint8_t>& quick_code,
                                const size_t frame_size_in_bytes,
                                const uint32_t core_spill_mask,
                                const uint32_t fp_spill_mask,
-                               DefaultSrcMap* src_mapping_table,
-                               const ArrayRef<const uint8_t>& mapping_table,
+                               const ArrayRef<const SrcMapElem>& src_mapping_table,
                                const ArrayRef<const uint8_t>& vmap_table,
-                               const ArrayRef<const uint8_t>& native_gc_map,
                                const ArrayRef<const uint8_t>& cfi_info,
                                const ArrayRef<const LinkerPatch>& patches)
-    : CompiledCode(driver, instruction_set, quick_code, !driver->DedupeEnabled()),
-      owns_arrays_(!driver->DedupeEnabled()),
+    : CompiledCode(driver, instruction_set, quick_code),
       frame_size_in_bytes_(frame_size_in_bytes), core_spill_mask_(core_spill_mask),
       fp_spill_mask_(fp_spill_mask),
-      patches_(patches.begin(), patches.end(), driver->GetSwapSpaceAllocator()) {
-  if (owns_arrays_) {
-    if (src_mapping_table == nullptr) {
-      src_mapping_table_ = new SwapSrcMap(driver->GetSwapSpaceAllocator());
-    } else {
-      src_mapping_table_ = new SwapSrcMap(src_mapping_table->begin(), src_mapping_table->end(),
-                                          driver->GetSwapSpaceAllocator());
-    }
-    mapping_table_ = mapping_table.empty() ?
-        nullptr : new SwapVector<uint8_t>(mapping_table.begin(), mapping_table.end(),
-                                          driver->GetSwapSpaceAllocator());
-    vmap_table_ = new SwapVector<uint8_t>(vmap_table.begin(), vmap_table.end(),
-                                          driver->GetSwapSpaceAllocator());
-    gc_map_ = native_gc_map.empty() ? nullptr :
-        new SwapVector<uint8_t>(native_gc_map.begin(), native_gc_map.end(),
-                                driver->GetSwapSpaceAllocator());
-    cfi_info_ = cfi_info.empty() ? nullptr :
-        new SwapVector<uint8_t>(cfi_info.begin(), cfi_info.end(), driver->GetSwapSpaceAllocator());
-  } else {
-    src_mapping_table_ = src_mapping_table == nullptr ?
-        driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>()) :
-        driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>(*src_mapping_table));
-    mapping_table_ = mapping_table.empty() ?
-        nullptr : driver->DeduplicateMappingTable(mapping_table);
-    vmap_table_ = driver->DeduplicateVMapTable(vmap_table);
-    gc_map_ = native_gc_map.empty() ? nullptr : driver->DeduplicateGCMap(native_gc_map);
-    cfi_info_ = cfi_info.empty() ? nullptr : driver->DeduplicateCFIInfo(cfi_info);
-  }
+      src_mapping_table_(
+          driver->GetCompiledMethodStorage()->DeduplicateSrcMappingTable(src_mapping_table)),
+      vmap_table_(driver->GetCompiledMethodStorage()->DeduplicateVMapTable(vmap_table)),
+      cfi_info_(driver->GetCompiledMethodStorage()->DeduplicateCFIInfo(cfi_info)),
+      patches_(driver->GetCompiledMethodStorage()->DeduplicateLinkerPatches(patches)) {
 }
 
 CompiledMethod* CompiledMethod::SwapAllocCompiledMethod(
@@ -166,36 +126,37 @@
     const size_t frame_size_in_bytes,
     const uint32_t core_spill_mask,
     const uint32_t fp_spill_mask,
-    DefaultSrcMap* src_mapping_table,
-    const ArrayRef<const uint8_t>& mapping_table,
+    const ArrayRef<const SrcMapElem>& src_mapping_table,
     const ArrayRef<const uint8_t>& vmap_table,
-    const ArrayRef<const uint8_t>& native_gc_map,
     const ArrayRef<const uint8_t>& cfi_info,
     const ArrayRef<const LinkerPatch>& patches) {
-  SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator());
+  SwapAllocator<CompiledMethod> alloc(driver->GetCompiledMethodStorage()->GetSwapSpaceAllocator());
   CompiledMethod* ret = alloc.allocate(1);
-  alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask,
-                  fp_spill_mask, src_mapping_table, mapping_table, vmap_table, native_gc_map,
+  alloc.construct(ret,
+                  driver,
+                  instruction_set,
+                  quick_code,
+                  frame_size_in_bytes,
+                  core_spill_mask,
+                  fp_spill_mask,
+                  src_mapping_table,
+                  vmap_table,
                   cfi_info, patches);
   return ret;
 }
 
-
-
 void CompiledMethod::ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m) {
-  SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator());
+  SwapAllocator<CompiledMethod> alloc(driver->GetCompiledMethodStorage()->GetSwapSpaceAllocator());
   alloc.destroy(m);
   alloc.deallocate(m, 1);
 }
 
 CompiledMethod::~CompiledMethod() {
-  if (owns_arrays_) {
-    delete src_mapping_table_;
-    delete mapping_table_;
-    delete vmap_table_;
-    delete gc_map_;
-    delete cfi_info_;
-  }
+  CompiledMethodStorage* storage = GetCompilerDriver()->GetCompiledMethodStorage();
+  storage->ReleaseLinkerPatches(patches_);
+  storage->ReleaseCFIInfo(cfi_info_);
+  storage->ReleaseVMapTable(vmap_table_);
+  storage->ReleaseSrcMappingTable(src_mapping_table_);
 }
 
 }  // namespace art
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index a4d2387..2a81804 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -18,24 +18,26 @@
 #define ART_COMPILER_COMPILED_METHOD_H_
 
 #include <memory>
+#include <iosfwd>
 #include <string>
 #include <vector>
 
 #include "arch/instruction_set.h"
 #include "base/bit_utils.h"
+#include "base/length_prefixed_array.h"
 #include "method_reference.h"
 #include "utils/array_ref.h"
-#include "utils/swap_space.h"
 
 namespace art {
 
 class CompilerDriver;
+class CompiledMethodStorage;
 
 class CompiledCode {
  public:
   // For Quick to supply an code blob
   CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
-               const ArrayRef<const uint8_t>& quick_code, bool owns_code_array);
+               const ArrayRef<const uint8_t>& quick_code);
 
   virtual ~CompiledCode();
 
@@ -43,8 +45,8 @@
     return instruction_set_;
   }
 
-  const SwapVector<uint8_t>* GetQuickCode() const {
-    return quick_code_;
+  ArrayRef<const uint8_t> GetQuickCode() const {
+    return GetArray(quick_code_);
   }
 
   bool operator==(const CompiledCode& rhs) const;
@@ -66,41 +68,46 @@
   static const void* CodePointer(const void* code_pointer,
                                  InstructionSet instruction_set);
 
-  const std::vector<uint32_t>& GetOatdataOffsetsToCompliledCodeOffset() const;
-  void AddOatdataOffsetToCompliledCodeOffset(uint32_t offset);
+ protected:
+  template <typename T>
+  static ArrayRef<const T> GetArray(const LengthPrefixedArray<T>* array) {
+    if (array == nullptr) {
+      return ArrayRef<const T>();
+    }
+    DCHECK_NE(array->size(), 0u);
+    return ArrayRef<const T>(&array->At(0), array->size());
+  }
+
+  CompilerDriver* GetCompilerDriver() {
+    return compiler_driver_;
+  }
 
  private:
   CompilerDriver* const compiler_driver_;
 
   const InstructionSet instruction_set_;
 
-  // If we own the code array (means that we free in destructor).
-  const bool owns_code_array_;
-
   // Used to store the PIC code for Quick.
-  SwapVector<uint8_t>* quick_code_;
-
-  // There are offsets from the oatdata symbol to where the offset to
-  // the compiled method will be found. These are computed by the
-  // OatWriter and then used by the ElfWriter to add relocations so
-  // that MCLinker can update the values to the location in the linked .so.
-  std::vector<uint32_t> oatdata_offsets_to_compiled_code_offset_;
+  const LengthPrefixedArray<uint8_t>* const quick_code_;
 };
 
 class SrcMapElem {
  public:
   uint32_t from_;
   int32_t to_;
-
-  // Lexicographical compare.
-  bool operator<(const SrcMapElem& other) const {
-    if (from_ != other.from_) {
-      return from_ < other.from_;
-    }
-    return to_ < other.to_;
-  }
 };
 
+inline bool operator<(const SrcMapElem& lhs, const SrcMapElem& rhs) {
+  if (lhs.from_ != rhs.from_) {
+    return lhs.from_ < rhs.from_;
+  }
+  return lhs.to_ < rhs.to_;
+}
+
+inline bool operator==(const SrcMapElem& lhs, const SrcMapElem& rhs) {
+  return lhs.from_ == rhs.from_ && lhs.to_ == rhs.to_;
+}
+
 template <class Allocator>
 class SrcMap FINAL : public std::vector<SrcMapElem, Allocator> {
  public:
@@ -151,23 +158,36 @@
 };
 
 using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
-using SwapSrcMap = SrcMap<SwapAllocator<SrcMapElem>>;
-
-
-enum LinkerPatchType {
-  kLinkerPatchMethod,
-  kLinkerPatchCall,
-  kLinkerPatchCallRelative,  // NOTE: Actual patching is instruction_set-dependent.
-  kLinkerPatchType,
-  kLinkerPatchDexCacheArray,  // NOTE: Actual patching is instruction_set-dependent.
-};
 
 class LinkerPatch {
  public:
+  // Note: We explicitly specify the underlying type of the enum because GCC
+  // would otherwise select a bigger underlying type and then complain that
+  //     'art::LinkerPatch::patch_type_' is too small to hold all
+  //     values of 'enum class art::LinkerPatch::Type'
+  // which is ridiculous given we have only a handful of values here. If we
+  // choose to squeeze the Type into fewer than 8 bits, we'll have to declare
+  // patch_type_ as an uintN_t and do explicit static_cast<>s.
+  enum class Type : uint8_t {
+    kRecordPosition,   // Just record patch position for patchoat.
+    kMethod,
+    kCall,
+    kCallRelative,     // NOTE: Actual patching is instruction_set-dependent.
+    kType,
+    kTypeRelative,     // NOTE: Actual patching is instruction_set-dependent.
+    kString,
+    kStringRelative,   // NOTE: Actual patching is instruction_set-dependent.
+    kDexCacheArray,    // NOTE: Actual patching is instruction_set-dependent.
+  };
+
+  static LinkerPatch RecordPosition(size_t literal_offset) {
+    return LinkerPatch(literal_offset, Type::kRecordPosition, /* target_dex_file */ nullptr);
+  }
+
   static LinkerPatch MethodPatch(size_t literal_offset,
                                  const DexFile* target_dex_file,
                                  uint32_t target_method_idx) {
-    LinkerPatch patch(literal_offset, kLinkerPatchMethod, target_dex_file);
+    LinkerPatch patch(literal_offset, Type::kMethod, target_dex_file);
     patch.method_idx_ = target_method_idx;
     return patch;
   }
@@ -175,7 +195,7 @@
   static LinkerPatch CodePatch(size_t literal_offset,
                                const DexFile* target_dex_file,
                                uint32_t target_method_idx) {
-    LinkerPatch patch(literal_offset, kLinkerPatchCall, target_dex_file);
+    LinkerPatch patch(literal_offset, Type::kCall, target_dex_file);
     patch.method_idx_ = target_method_idx;
     return patch;
   }
@@ -183,7 +203,7 @@
   static LinkerPatch RelativeCodePatch(size_t literal_offset,
                                        const DexFile* target_dex_file,
                                        uint32_t target_method_idx) {
-    LinkerPatch patch(literal_offset, kLinkerPatchCallRelative, target_dex_file);
+    LinkerPatch patch(literal_offset, Type::kCallRelative, target_dex_file);
     patch.method_idx_ = target_method_idx;
     return patch;
   }
@@ -191,17 +211,45 @@
   static LinkerPatch TypePatch(size_t literal_offset,
                                const DexFile* target_dex_file,
                                uint32_t target_type_idx) {
-    LinkerPatch patch(literal_offset, kLinkerPatchType, target_dex_file);
+    LinkerPatch patch(literal_offset, Type::kType, target_dex_file);
     patch.type_idx_ = target_type_idx;
     return patch;
   }
 
+  static LinkerPatch RelativeTypePatch(size_t literal_offset,
+                                       const DexFile* target_dex_file,
+                                       uint32_t pc_insn_offset,
+                                       uint32_t target_type_idx) {
+    LinkerPatch patch(literal_offset, Type::kTypeRelative, target_dex_file);
+    patch.type_idx_ = target_type_idx;
+    patch.pc_insn_offset_ = pc_insn_offset;
+    return patch;
+  }
+
+  static LinkerPatch StringPatch(size_t literal_offset,
+                                 const DexFile* target_dex_file,
+                                 uint32_t target_string_idx) {
+    LinkerPatch patch(literal_offset, Type::kString, target_dex_file);
+    patch.string_idx_ = target_string_idx;
+    return patch;
+  }
+
+  static LinkerPatch RelativeStringPatch(size_t literal_offset,
+                                         const DexFile* target_dex_file,
+                                         uint32_t pc_insn_offset,
+                                         uint32_t target_string_idx) {
+    LinkerPatch patch(literal_offset, Type::kStringRelative, target_dex_file);
+    patch.string_idx_ = target_string_idx;
+    patch.pc_insn_offset_ = pc_insn_offset;
+    return patch;
+  }
+
   static LinkerPatch DexCacheArrayPatch(size_t literal_offset,
                                         const DexFile* target_dex_file,
                                         uint32_t pc_insn_offset,
                                         size_t element_offset) {
     DCHECK(IsUint<32>(element_offset));
-    LinkerPatch patch(literal_offset, kLinkerPatchDexCacheArray, target_dex_file);
+    LinkerPatch patch(literal_offset, Type::kDexCacheArray, target_dex_file);
     patch.pc_insn_offset_ = pc_insn_offset;
     patch.element_offset_ = element_offset;
     return patch;
@@ -214,47 +262,68 @@
     return literal_offset_;
   }
 
-  LinkerPatchType Type() const {
+  Type GetType() const {
     return patch_type_;
   }
 
   bool IsPcRelative() const {
-    return Type() == kLinkerPatchCallRelative || Type() == kLinkerPatchDexCacheArray;
+    switch (GetType()) {
+      case Type::kCallRelative:
+      case Type::kTypeRelative:
+      case Type::kStringRelative:
+      case Type::kDexCacheArray:
+        return true;
+      default:
+        return false;
+    }
   }
 
   MethodReference TargetMethod() const {
-    DCHECK(patch_type_ == kLinkerPatchMethod ||
-           patch_type_ == kLinkerPatchCall || patch_type_ == kLinkerPatchCallRelative);
+    DCHECK(patch_type_ == Type::kMethod ||
+           patch_type_ == Type::kCall ||
+           patch_type_ == Type::kCallRelative);
     return MethodReference(target_dex_file_, method_idx_);
   }
 
   const DexFile* TargetTypeDexFile() const {
-    DCHECK(patch_type_ == kLinkerPatchType);
+    DCHECK(patch_type_ == Type::kType || patch_type_ == Type::kTypeRelative);
     return target_dex_file_;
   }
 
   uint32_t TargetTypeIndex() const {
-    DCHECK(patch_type_ == kLinkerPatchType);
+    DCHECK(patch_type_ == Type::kType || patch_type_ == Type::kTypeRelative);
     return type_idx_;
   }
 
+  const DexFile* TargetStringDexFile() const {
+    DCHECK(patch_type_ == Type::kString || patch_type_ == Type::kStringRelative);
+    return target_dex_file_;
+  }
+
+  uint32_t TargetStringIndex() const {
+    DCHECK(patch_type_ == Type::kString || patch_type_ == Type::kStringRelative);
+    return string_idx_;
+  }
+
   const DexFile* TargetDexCacheDexFile() const {
-    DCHECK(patch_type_ == kLinkerPatchDexCacheArray);
+    DCHECK(patch_type_ == Type::kDexCacheArray);
     return target_dex_file_;
   }
 
   size_t TargetDexCacheElementOffset() const {
-    DCHECK(patch_type_ == kLinkerPatchDexCacheArray);
+    DCHECK(patch_type_ == Type::kDexCacheArray);
     return element_offset_;
   }
 
   uint32_t PcInsnOffset() const {
-    DCHECK(patch_type_ == kLinkerPatchDexCacheArray);
+    DCHECK(patch_type_ == Type::kTypeRelative ||
+           patch_type_ == Type::kStringRelative ||
+           patch_type_ == Type::kDexCacheArray);
     return pc_insn_offset_;
   }
 
  private:
-  LinkerPatch(size_t literal_offset, LinkerPatchType patch_type, const DexFile* target_dex_file)
+  LinkerPatch(size_t literal_offset, Type patch_type, const DexFile* target_dex_file)
       : target_dex_file_(target_dex_file),
         literal_offset_(literal_offset),
         patch_type_(patch_type) {
@@ -267,24 +336,32 @@
 
   const DexFile* target_dex_file_;
   uint32_t literal_offset_ : 24;  // Method code size up to 16MiB.
-  LinkerPatchType patch_type_ : 8;
+  Type patch_type_ : 8;
   union {
     uint32_t cmp1_;             // Used for relational operators.
     uint32_t method_idx_;       // Method index for Call/Method patches.
     uint32_t type_idx_;         // Type index for Type patches.
+    uint32_t string_idx_;       // String index for String patches.
     uint32_t element_offset_;   // Element offset in the dex cache arrays.
+    static_assert(sizeof(method_idx_) == sizeof(cmp1_), "needed by relational operators");
+    static_assert(sizeof(type_idx_) == sizeof(cmp1_), "needed by relational operators");
+    static_assert(sizeof(string_idx_) == sizeof(cmp1_), "needed by relational operators");
+    static_assert(sizeof(element_offset_) == sizeof(cmp1_), "needed by relational operators");
   };
   union {
-    uint32_t cmp2_;             // Used for relational operators.
+    // Note: To avoid uninitialized padding on 64-bit systems, we use `size_t` for `cmp2_`.
+    // This allows a hashing function to treat an array of linker patches as raw memory.
+    size_t cmp2_;             // Used for relational operators.
     // Literal offset of the insn loading PC (same as literal_offset if it's the same insn,
     // may be different if the PC-relative addressing needs multiple insns).
     uint32_t pc_insn_offset_;
-    static_assert(sizeof(pc_insn_offset_) == sizeof(cmp2_), "needed by relational operators");
+    static_assert(sizeof(pc_insn_offset_) <= sizeof(cmp2_), "needed by relational operators");
   };
 
   friend bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs);
   friend bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs);
 };
+std::ostream& operator<<(std::ostream& os, const LinkerPatch::Type& type);
 
 inline bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs) {
   return lhs.literal_offset_ == rhs.literal_offset_ &&
@@ -313,10 +390,8 @@
                  const size_t frame_size_in_bytes,
                  const uint32_t core_spill_mask,
                  const uint32_t fp_spill_mask,
-                 DefaultSrcMap* src_mapping_table,
-                 const ArrayRef<const uint8_t>& mapping_table,
+                 const ArrayRef<const SrcMapElem>& src_mapping_table,
                  const ArrayRef<const uint8_t>& vmap_table,
-                 const ArrayRef<const uint8_t>& native_gc_map,
                  const ArrayRef<const uint8_t>& cfi_info,
                  const ArrayRef<const LinkerPatch>& patches);
 
@@ -329,10 +404,8 @@
       const size_t frame_size_in_bytes,
       const uint32_t core_spill_mask,
       const uint32_t fp_spill_mask,
-      DefaultSrcMap* src_mapping_table,
-      const ArrayRef<const uint8_t>& mapping_table,
+      const ArrayRef<const SrcMapElem>& src_mapping_table,
       const ArrayRef<const uint8_t>& vmap_table,
-      const ArrayRef<const uint8_t>& native_gc_map,
       const ArrayRef<const uint8_t>& cfi_info,
       const ArrayRef<const LinkerPatch>& patches);
 
@@ -350,35 +423,23 @@
     return fp_spill_mask_;
   }
 
-  const SwapSrcMap& GetSrcMappingTable() const {
-    DCHECK(src_mapping_table_ != nullptr);
-    return *src_mapping_table_;
+  ArrayRef<const SrcMapElem> GetSrcMappingTable() const {
+    return GetArray(src_mapping_table_);
   }
 
-  SwapVector<uint8_t> const* GetMappingTable() const {
-    return mapping_table_;
+  ArrayRef<const uint8_t> GetVmapTable() const {
+    return GetArray(vmap_table_);
   }
 
-  const SwapVector<uint8_t>* GetVmapTable() const {
-    DCHECK(vmap_table_ != nullptr);
-    return vmap_table_;
-  }
-
-  SwapVector<uint8_t> const* GetGcMap() const {
-    return gc_map_;
-  }
-
-  const SwapVector<uint8_t>* GetCFIInfo() const {
-    return cfi_info_;
+  ArrayRef<const uint8_t> GetCFIInfo() const {
+    return GetArray(cfi_info_);
   }
 
   ArrayRef<const LinkerPatch> GetPatches() const {
-    return ArrayRef<const LinkerPatch>(patches_);
+    return GetArray(patches_);
   }
 
  private:
-  // Whether or not the arrays are owned by the compiled method or dedupe sets.
-  const bool owns_arrays_;
   // For quick code, the size of the activation used by the code.
   const size_t frame_size_in_bytes_;
   // For quick code, a bit mask describing spilled GPR callee-save registers.
@@ -386,19 +447,13 @@
   // For quick code, a bit mask describing spilled FPR callee-save registers.
   const uint32_t fp_spill_mask_;
   // For quick code, a set of pairs (PC, DEX) mapping from native PC offset to DEX offset.
-  SwapSrcMap* src_mapping_table_;
-  // For quick code, a uleb128 encoded map from native PC offset to dex PC aswell as dex PC to
-  // native PC offset. Size prefixed.
-  SwapVector<uint8_t>* mapping_table_;
+  const LengthPrefixedArray<SrcMapElem>* const src_mapping_table_;
   // For quick code, a uleb128 encoded map from GPR/FPR register to dex register. Size prefixed.
-  SwapVector<uint8_t>* vmap_table_;
-  // For quick code, a map keyed by native PC indices to bitmaps describing what dalvik registers
-  // are live.
-  SwapVector<uint8_t>* gc_map_;
+  const LengthPrefixedArray<uint8_t>* const vmap_table_;
   // For quick code, a FDE entry for the debug_frame section.
-  SwapVector<uint8_t>* cfi_info_;
+  const LengthPrefixedArray<uint8_t>* const cfi_info_;
   // For quick code, linker patches needed by the method.
-  const SwapVector<LinkerPatch> patches_;
+  const LengthPrefixedArray<LinkerPatch>* const patches_;
 };
 
 }  // namespace art
diff --git a/compiler/compiled_method_test.cc b/compiler/compiled_method_test.cc
new file mode 100644
index 0000000..99ee875
--- /dev/null
+++ b/compiler/compiled_method_test.cc
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "compiled_method.h"
+
+namespace art {
+
+TEST(CompiledMethod, SrcMapElemOperators) {
+  SrcMapElem elems[] = {
+      { 1u, -1 },
+      { 1u, 0 },
+      { 1u, 1 },
+      { 2u, -1 },
+      { 2u, 0 },    // Index 4.
+      { 2u, 1 },
+      { 2u, 0u },   // Index 6: Arbitrarily add identical SrcMapElem with index 4.
+  };
+
+  for (size_t i = 0; i != arraysize(elems); ++i) {
+    for (size_t j = 0; j != arraysize(elems); ++j) {
+      bool expected = (i != 6u ? i : 4u) == (j != 6u ? j : 4u);
+      EXPECT_EQ(expected, elems[i] == elems[j]) << i << " " << j;
+    }
+  }
+
+  for (size_t i = 0; i != arraysize(elems); ++i) {
+    for (size_t j = 0; j != arraysize(elems); ++j) {
+      bool expected = (i != 6u ? i : 4u) < (j != 6u ? j : 4u);
+      EXPECT_EQ(expected, elems[i] < elems[j]) << i << " " << j;
+    }
+  }
+}
+
+TEST(CompiledMethod, LinkerPatchOperators) {
+  const DexFile* dex_file1 = reinterpret_cast<const DexFile*>(1);
+  const DexFile* dex_file2 = reinterpret_cast<const DexFile*>(2);
+  LinkerPatch patches[] = {
+      LinkerPatch::MethodPatch(16u, dex_file1, 1000u),
+      LinkerPatch::MethodPatch(16u, dex_file1, 1001u),
+      LinkerPatch::MethodPatch(16u, dex_file2, 1000u),
+      LinkerPatch::MethodPatch(16u, dex_file2, 1001u),  // Index 3.
+      LinkerPatch::CodePatch(16u, dex_file1, 1000u),
+      LinkerPatch::CodePatch(16u, dex_file1, 1001u),
+      LinkerPatch::CodePatch(16u, dex_file2, 1000u),
+      LinkerPatch::CodePatch(16u, dex_file2, 1001u),
+      LinkerPatch::RelativeCodePatch(16u, dex_file1, 1000u),
+      LinkerPatch::RelativeCodePatch(16u, dex_file1, 1001u),
+      LinkerPatch::RelativeCodePatch(16u, dex_file2, 1000u),
+      LinkerPatch::RelativeCodePatch(16u, dex_file2, 1001u),
+      LinkerPatch::TypePatch(16u, dex_file1, 1000u),
+      LinkerPatch::TypePatch(16u, dex_file1, 1001u),
+      LinkerPatch::TypePatch(16u, dex_file2, 1000u),
+      LinkerPatch::TypePatch(16u, dex_file2, 1001u),
+      LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3000u, 2000u),
+      LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3001u, 2000u),
+      LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3000u, 2001u),
+      LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3001u, 2001u),
+      LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3000u, 2000u),
+      LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3001u, 2000u),
+      LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3000u, 2001u),
+      LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3001u, 2001u),
+      LinkerPatch::MethodPatch(32u, dex_file1, 1000u),
+      LinkerPatch::MethodPatch(32u, dex_file1, 1001u),
+      LinkerPatch::MethodPatch(32u, dex_file2, 1000u),
+      LinkerPatch::MethodPatch(32u, dex_file2, 1001u),
+      LinkerPatch::CodePatch(32u, dex_file1, 1000u),
+      LinkerPatch::CodePatch(32u, dex_file1, 1001u),
+      LinkerPatch::CodePatch(32u, dex_file2, 1000u),
+      LinkerPatch::CodePatch(32u, dex_file2, 1001u),
+      LinkerPatch::RelativeCodePatch(32u, dex_file1, 1000u),
+      LinkerPatch::RelativeCodePatch(32u, dex_file1, 1001u),
+      LinkerPatch::RelativeCodePatch(32u, dex_file2, 1000u),
+      LinkerPatch::RelativeCodePatch(32u, dex_file2, 1001u),
+      LinkerPatch::TypePatch(32u, dex_file1, 1000u),
+      LinkerPatch::TypePatch(32u, dex_file1, 1001u),
+      LinkerPatch::TypePatch(32u, dex_file2, 1000u),
+      LinkerPatch::TypePatch(32u, dex_file2, 1001u),
+      LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3000u, 2000u),
+      LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3001u, 2000u),
+      LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3000u, 2001u),
+      LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3001u, 2001u),
+      LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3000u, 2000u),
+      LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3001u, 2000u),
+      LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3000u, 2001u),
+      LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3001u, 2001u),
+      LinkerPatch::MethodPatch(16u, dex_file2, 1001u),  // identical with patch as index 3.
+  };
+  constexpr size_t last_index = arraysize(patches) - 1u;
+
+  for (size_t i = 0; i != arraysize(patches); ++i) {
+    for (size_t j = 0; j != arraysize(patches); ++j) {
+      bool expected = (i != last_index ? i : 3u) == (j != last_index ? j : 3u);
+      EXPECT_EQ(expected, patches[i] == patches[j]) << i << " " << j;
+    }
+  }
+
+  for (size_t i = 0; i != arraysize(patches); ++i) {
+    for (size_t j = 0; j != arraysize(patches); ++j) {
+      bool expected = (i != last_index ? i : 3u) < (j != last_index ? j : 3u);
+      EXPECT_EQ(expected, patches[i] < patches[j]) << i << " " << j;
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/compiler.cc b/compiler/compiler.cc
index 223affa..1626317 100644
--- a/compiler/compiler.cc
+++ b/compiler/compiler.cc
@@ -17,7 +17,6 @@
 #include "compiler.h"
 
 #include "base/logging.h"
-#include "dex/quick/quick_compiler_factory.h"
 #include "driver/compiler_driver.h"
 #include "optimizing/optimizing_compiler.h"
 #include "utils.h"
@@ -27,8 +26,7 @@
 Compiler* Compiler::Create(CompilerDriver* driver, Compiler::Kind kind) {
   switch (kind) {
     case kQuick:
-      return CreateQuickCompiler(driver);
-
+      // TODO: Remove Quick in options.
     case kOptimizing:
       return CreateOptimizingCompiler(driver);
 
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 8788dc1..a955f3c 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -22,9 +22,11 @@
 
 namespace art {
 
+namespace jit {
+  class JitCodeCache;
+}
+
 class ArtMethod;
-class Backend;
-struct CompilationUnit;
 class CompilerDriver;
 class CompiledMethod;
 class OatWriter;
@@ -36,14 +38,18 @@
     kOptimizing
   };
 
+  enum JniOptimizationFlags {
+    kNone,
+    kFastNative,
+  };
+
   static Compiler* Create(CompilerDriver* driver, Kind kind);
 
   virtual void Init() = 0;
 
   virtual void UnInit() const = 0;
 
-  virtual bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file, CompilationUnit* cu)
-      const = 0;
+  virtual bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const = 0;
 
   virtual CompiledMethod* Compile(const DexFile::CodeItem* code_item,
                                   uint32_t access_flags,
@@ -56,7 +62,16 @@
 
   virtual CompiledMethod* JniCompile(uint32_t access_flags,
                                      uint32_t method_idx,
-                                     const DexFile& dex_file) const = 0;
+                                     const DexFile& dex_file,
+                                     JniOptimizationFlags optimization_flags) const = 0;
+
+  virtual bool JitCompile(Thread* self ATTRIBUTE_UNUSED,
+                          jit::JitCodeCache* code_cache ATTRIBUTE_UNUSED,
+                          ArtMethod* method ATTRIBUTE_UNUSED,
+                          bool osr ATTRIBUTE_UNUSED)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    return false;
+  }
 
   virtual uintptr_t GetEntryPointOf(ArtMethod* method) const
      SHARED_REQUIRES(Locks::mutator_lock_) = 0;
@@ -65,8 +80,6 @@
     return maximum_compilation_time_before_warning_;
   }
 
-  virtual void InitCompilationUnit(CompilationUnit& cu) const = 0;
-
   virtual ~Compiler() {}
 
   /*
diff --git a/compiler/debug/dwarf/debug_abbrev_writer.h b/compiler/debug/dwarf/debug_abbrev_writer.h
new file mode 100644
index 0000000..0fc843c
--- /dev/null
+++ b/compiler/debug/dwarf/debug_abbrev_writer.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_DWARF_DEBUG_ABBREV_WRITER_H_
+#define ART_COMPILER_DEBUG_DWARF_DEBUG_ABBREV_WRITER_H_
+
+#include <cstdint>
+#include <type_traits>
+#include <unordered_map>
+
+#include "base/casts.h"
+#include "base/stl_util.h"
+#include "debug/dwarf/dwarf_constants.h"
+#include "debug/dwarf/writer.h"
+#include "leb128.h"
+
+namespace art {
+namespace dwarf {
+
+// Writer for the .debug_abbrev.
+//
+// Abbreviations specify the format of entries in .debug_info.
+// Each entry specifies abbreviation code, which in turns
+// determines all the attributes and their format.
+// It is possible to think of them as type definitions.
+template <typename Vector = std::vector<uint8_t>>
+class DebugAbbrevWriter FINAL : private Writer<Vector> {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
+ public:
+  explicit DebugAbbrevWriter(Vector* buffer)
+      : Writer<Vector>(buffer),
+        current_abbrev_(buffer->get_allocator()) {
+    this->PushUint8(0);  // Add abbrev table terminator.
+  }
+
+  // Start abbreviation declaration.
+  void StartAbbrev(Tag tag) {
+    DCHECK(current_abbrev_.empty());
+    EncodeUnsignedLeb128(&current_abbrev_, tag);
+    has_children_offset_ = current_abbrev_.size();
+    current_abbrev_.push_back(0);  // Place-holder for DW_CHILDREN.
+  }
+
+  // Add attribute specification.
+  void AddAbbrevAttribute(Attribute name, Form type) {
+    EncodeUnsignedLeb128(&current_abbrev_, name);
+    EncodeUnsignedLeb128(&current_abbrev_, type);
+  }
+
+  // End abbreviation declaration and return its code.
+  // This will deduplicate abbreviations.
+  uint32_t EndAbbrev(Children has_children) {
+    DCHECK(!current_abbrev_.empty());
+    current_abbrev_[has_children_offset_] = has_children;
+    auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_), NextAbbrevCode()));
+    uint32_t abbrev_code = it.first->second;
+    if (UNLIKELY(it.second)) {  // Inserted new entry.
+      const Vector& abbrev = it.first->first;
+      this->Pop();  // Remove abbrev table terminator.
+      this->PushUleb128(abbrev_code);
+      this->PushData(abbrev.data(), abbrev.size());
+      this->PushUint8(0);  // Attribute list end.
+      this->PushUint8(0);  // Attribute list end.
+      this->PushUint8(0);  // Add abbrev table terminator.
+    }
+    current_abbrev_.clear();
+    return abbrev_code;
+  }
+
+  // Get the next free abbrev code.
+  uint32_t NextAbbrevCode() {
+    return dchecked_integral_cast<uint32_t>(1 + abbrev_codes_.size());
+  }
+
+ private:
+  Vector current_abbrev_;
+  size_t has_children_offset_ = 0;
+  std::unordered_map<Vector, uint32_t, FNVHash<Vector> > abbrev_codes_;
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_DWARF_DEBUG_ABBREV_WRITER_H_
diff --git a/compiler/debug/dwarf/debug_frame_opcode_writer.h b/compiler/debug/dwarf/debug_frame_opcode_writer.h
new file mode 100644
index 0000000..7c75c9b
--- /dev/null
+++ b/compiler/debug/dwarf/debug_frame_opcode_writer.h
@@ -0,0 +1,341 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_
+#define ART_COMPILER_DEBUG_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_
+
+#include "base/bit_utils.h"
+#include "debug/dwarf/dwarf_constants.h"
+#include "debug/dwarf/register.h"
+#include "debug/dwarf/writer.h"
+
+namespace art {
+namespace dwarf {
+
+// Writer for .debug_frame opcodes (DWARF-3).
+// See the DWARF specification for the precise meaning of the opcodes.
+// The writer is very light-weight, however it will do the following for you:
+//  * Choose the most compact encoding of a given opcode.
+//  * Keep track of current state and convert absolute values to deltas.
+//  * Divide by header-defined factors as appropriate.
+template<typename Vector = std::vector<uint8_t> >
+class DebugFrameOpCodeWriter : private Writer<Vector> {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
+ public:
+  // To save space, DWARF divides most offsets by header-defined factors.
+  // They are used in integer divisions, so we make them constants.
+  // We usually subtract from stack base pointer, so making the factor
+  // negative makes the encoded values positive and thus easier to encode.
+  static constexpr int kDataAlignmentFactor = -4;
+  static constexpr int kCodeAlignmentFactor = 1;
+
+  // Explicitely advance the program counter to given location.
+  void ALWAYS_INLINE AdvancePC(int absolute_pc) {
+    DCHECK_GE(absolute_pc, current_pc_);
+    if (UNLIKELY(enabled_)) {
+      int delta = FactorCodeOffset(absolute_pc - current_pc_);
+      if (delta != 0) {
+        if (delta <= 0x3F) {
+          this->PushUint8(DW_CFA_advance_loc | delta);
+        } else if (delta <= UINT8_MAX) {
+          this->PushUint8(DW_CFA_advance_loc1);
+          this->PushUint8(delta);
+        } else if (delta <= UINT16_MAX) {
+          this->PushUint8(DW_CFA_advance_loc2);
+          this->PushUint16(delta);
+        } else {
+          this->PushUint8(DW_CFA_advance_loc4);
+          this->PushUint32(delta);
+        }
+      }
+      current_pc_ = absolute_pc;
+    }
+  }
+
+  // Override this method to automatically advance the PC before each opcode.
+  virtual void ImplicitlyAdvancePC() { }
+
+  // Common alias in assemblers - spill relative to current stack pointer.
+  void ALWAYS_INLINE RelOffset(Reg reg, int offset) {
+    Offset(reg, offset - current_cfa_offset_);
+  }
+
+  // Common alias in assemblers - increase stack frame size.
+  void ALWAYS_INLINE AdjustCFAOffset(int delta) {
+    DefCFAOffset(current_cfa_offset_ + delta);
+  }
+
+  // Custom alias - spill many registers based on bitmask.
+  void ALWAYS_INLINE RelOffsetForMany(Reg reg_base, int offset,
+                                      uint32_t reg_mask, int reg_size) {
+    DCHECK(reg_size == 4 || reg_size == 8);
+    if (UNLIKELY(enabled_)) {
+      for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) {
+        // Skip zero bits and go to the set bit.
+        int num_zeros = CTZ(reg_mask);
+        i += num_zeros;
+        reg_mask >>= num_zeros;
+        RelOffset(Reg(reg_base.num() + i), offset);
+        offset += reg_size;
+      }
+    }
+  }
+
+  // Custom alias - unspill many registers based on bitmask.
+  void ALWAYS_INLINE RestoreMany(Reg reg_base, uint32_t reg_mask) {
+    if (UNLIKELY(enabled_)) {
+      for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) {
+        // Skip zero bits and go to the set bit.
+        int num_zeros = CTZ(reg_mask);
+        i += num_zeros;
+        reg_mask >>= num_zeros;
+        Restore(Reg(reg_base.num() + i));
+      }
+    }
+  }
+
+  void ALWAYS_INLINE Nop() {
+    if (UNLIKELY(enabled_)) {
+      this->PushUint8(DW_CFA_nop);
+    }
+  }
+
+  void ALWAYS_INLINE Offset(Reg reg, int offset) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      int factored_offset = FactorDataOffset(offset);  // May change sign.
+      if (factored_offset >= 0) {
+        if (0 <= reg.num() && reg.num() <= 0x3F) {
+          this->PushUint8(DW_CFA_offset | reg.num());
+          this->PushUleb128(factored_offset);
+        } else {
+          this->PushUint8(DW_CFA_offset_extended);
+          this->PushUleb128(reg.num());
+          this->PushUleb128(factored_offset);
+        }
+      } else {
+        uses_dwarf3_features_ = true;
+        this->PushUint8(DW_CFA_offset_extended_sf);
+        this->PushUleb128(reg.num());
+        this->PushSleb128(factored_offset);
+      }
+    }
+  }
+
+  void ALWAYS_INLINE Restore(Reg reg) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      if (0 <= reg.num() && reg.num() <= 0x3F) {
+        this->PushUint8(DW_CFA_restore | reg.num());
+      } else {
+        this->PushUint8(DW_CFA_restore_extended);
+        this->PushUleb128(reg.num());
+      }
+    }
+  }
+
+  void ALWAYS_INLINE Undefined(Reg reg) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_undefined);
+      this->PushUleb128(reg.num());
+    }
+  }
+
+  void ALWAYS_INLINE SameValue(Reg reg) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_same_value);
+      this->PushUleb128(reg.num());
+    }
+  }
+
+  // The previous value of "reg" is stored in register "new_reg".
+  void ALWAYS_INLINE Register(Reg reg, Reg new_reg) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_register);
+      this->PushUleb128(reg.num());
+      this->PushUleb128(new_reg.num());
+    }
+  }
+
+  void ALWAYS_INLINE RememberState() {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_remember_state);
+    }
+  }
+
+  void ALWAYS_INLINE RestoreState() {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_restore_state);
+    }
+  }
+
+  void ALWAYS_INLINE DefCFA(Reg reg, int offset) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      if (offset >= 0) {
+        this->PushUint8(DW_CFA_def_cfa);
+        this->PushUleb128(reg.num());
+        this->PushUleb128(offset);  // Non-factored.
+      } else {
+        uses_dwarf3_features_ = true;
+        this->PushUint8(DW_CFA_def_cfa_sf);
+        this->PushUleb128(reg.num());
+        this->PushSleb128(FactorDataOffset(offset));
+      }
+    }
+    current_cfa_offset_ = offset;
+  }
+
+  void ALWAYS_INLINE DefCFARegister(Reg reg) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_def_cfa_register);
+      this->PushUleb128(reg.num());
+    }
+  }
+
+  void ALWAYS_INLINE DefCFAOffset(int offset) {
+    if (UNLIKELY(enabled_)) {
+      if (current_cfa_offset_ != offset) {
+        ImplicitlyAdvancePC();
+        if (offset >= 0) {
+          this->PushUint8(DW_CFA_def_cfa_offset);
+          this->PushUleb128(offset);  // Non-factored.
+        } else {
+          uses_dwarf3_features_ = true;
+          this->PushUint8(DW_CFA_def_cfa_offset_sf);
+          this->PushSleb128(FactorDataOffset(offset));
+        }
+      }
+    }
+    // Uncoditional so that the user can still get and check the value.
+    current_cfa_offset_ = offset;
+  }
+
+  void ALWAYS_INLINE ValOffset(Reg reg, int offset) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      uses_dwarf3_features_ = true;
+      int factored_offset = FactorDataOffset(offset);  // May change sign.
+      if (factored_offset >= 0) {
+        this->PushUint8(DW_CFA_val_offset);
+        this->PushUleb128(reg.num());
+        this->PushUleb128(factored_offset);
+      } else {
+        this->PushUint8(DW_CFA_val_offset_sf);
+        this->PushUleb128(reg.num());
+        this->PushSleb128(factored_offset);
+      }
+    }
+  }
+
+  void ALWAYS_INLINE DefCFAExpression(uint8_t* expr, int expr_size) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      uses_dwarf3_features_ = true;
+      this->PushUint8(DW_CFA_def_cfa_expression);
+      this->PushUleb128(expr_size);
+      this->PushData(expr, expr_size);
+    }
+  }
+
+  void ALWAYS_INLINE Expression(Reg reg, uint8_t* expr, int expr_size) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      uses_dwarf3_features_ = true;
+      this->PushUint8(DW_CFA_expression);
+      this->PushUleb128(reg.num());
+      this->PushUleb128(expr_size);
+      this->PushData(expr, expr_size);
+    }
+  }
+
+  void ALWAYS_INLINE ValExpression(Reg reg, uint8_t* expr, int expr_size) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      uses_dwarf3_features_ = true;
+      this->PushUint8(DW_CFA_val_expression);
+      this->PushUleb128(reg.num());
+      this->PushUleb128(expr_size);
+      this->PushData(expr, expr_size);
+    }
+  }
+
+  bool IsEnabled() const { return enabled_; }
+
+  void SetEnabled(bool value) {
+    enabled_ = value;
+    if (enabled_ && opcodes_.capacity() == 0u) {
+      opcodes_.reserve(kDefaultCapacity);
+    }
+  }
+
+  int GetCurrentPC() const { return current_pc_; }
+
+  int GetCurrentCFAOffset() const { return current_cfa_offset_; }
+
+  void SetCurrentCFAOffset(int offset) { current_cfa_offset_ = offset; }
+
+  using Writer<Vector>::data;
+
+  explicit DebugFrameOpCodeWriter(bool enabled = true,
+                                  const typename Vector::allocator_type& alloc =
+                                      typename Vector::allocator_type())
+      : Writer<Vector>(&opcodes_),
+        enabled_(false),
+        opcodes_(alloc),
+        current_cfa_offset_(0),
+        current_pc_(0),
+        uses_dwarf3_features_(false) {
+    SetEnabled(enabled);
+  }
+
+  virtual ~DebugFrameOpCodeWriter() { }
+
+ protected:
+  // Best guess based on couple of observed outputs.
+  static constexpr size_t kDefaultCapacity = 32u;
+
+  int FactorDataOffset(int offset) const {
+    DCHECK_EQ(offset % kDataAlignmentFactor, 0);
+    return offset / kDataAlignmentFactor;
+  }
+
+  int FactorCodeOffset(int offset) const {
+    DCHECK_EQ(offset % kCodeAlignmentFactor, 0);
+    return offset / kCodeAlignmentFactor;
+  }
+
+  bool enabled_;  // If disabled all writes are no-ops.
+  Vector opcodes_;
+  int current_cfa_offset_;
+  int current_pc_;
+  bool uses_dwarf3_features_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(DebugFrameOpCodeWriter);
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_
diff --git a/compiler/debug/dwarf/debug_info_entry_writer.h b/compiler/debug/dwarf/debug_info_entry_writer.h
new file mode 100644
index 0000000..85f021e
--- /dev/null
+++ b/compiler/debug/dwarf/debug_info_entry_writer.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_DWARF_DEBUG_INFO_ENTRY_WRITER_H_
+#define ART_COMPILER_DEBUG_DWARF_DEBUG_INFO_ENTRY_WRITER_H_
+
+#include <cstdint>
+#include <unordered_map>
+
+#include "base/casts.h"
+#include "debug/dwarf/debug_abbrev_writer.h"
+#include "debug/dwarf/dwarf_constants.h"
+#include "debug/dwarf/expression.h"
+#include "debug/dwarf/writer.h"
+#include "leb128.h"
+
+namespace art {
+namespace dwarf {
+
+/*
+ * Writer for debug information entries (DIE).
+ *
+ * Usage:
+ *   StartTag(DW_TAG_compile_unit);
+ *     WriteStrp(DW_AT_producer, "Compiler name", debug_str);
+ *     StartTag(DW_TAG_subprogram);
+ *       WriteStrp(DW_AT_name, "Foo", debug_str);
+ *     EndTag();
+ *   EndTag();
+ */
+template <typename Vector = std::vector<uint8_t>>
+class DebugInfoEntryWriter FINAL : private Writer<Vector> {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
+ public:
+  static constexpr size_t kCompilationUnitHeaderSize = 11;
+
+  // Start debugging information entry.
+  // Returns offset of the entry in compilation unit.
+  size_t StartTag(Tag tag) {
+    if (inside_entry_) {
+      // Write abbrev code for the previous entry.
+      // Parent entry is finalized before any children are written.
+      this->UpdateUleb128(abbrev_code_offset_, debug_abbrev_->EndAbbrev(DW_CHILDREN_yes));
+      inside_entry_ = false;
+    }
+    debug_abbrev_->StartAbbrev(tag);
+    // Abbrev code placeholder of sufficient size.
+    abbrev_code_offset_ = this->data()->size();
+    this->PushUleb128(debug_abbrev_->NextAbbrevCode());
+    depth_++;
+    inside_entry_ = true;
+    return abbrev_code_offset_ + kCompilationUnitHeaderSize;
+  }
+
+  // End debugging information entry.
+  void EndTag() {
+    DCHECK_GT(depth_, 0);
+    if (inside_entry_) {
+      // Write abbrev code for this entry.
+      this->UpdateUleb128(abbrev_code_offset_, debug_abbrev_->EndAbbrev(DW_CHILDREN_no));
+      inside_entry_ = false;
+      // This entry has no children and so there is no terminator.
+    } else {
+      // The entry has been already finalized so it must be parent entry
+      // and we need to write the terminator required by DW_CHILDREN_yes.
+      this->PushUint8(0);
+    }
+    depth_--;
+  }
+
+  void WriteAddr(Attribute attrib, uint64_t value) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_addr);
+    patch_locations_.push_back(this->data()->size());
+    if (is64bit_) {
+      this->PushUint64(value);
+    } else {
+      this->PushUint32(value);
+    }
+  }
+
+  void WriteBlock(Attribute attrib, const uint8_t* ptr, size_t num_bytes) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_block);
+    this->PushUleb128(num_bytes);
+    this->PushData(ptr, num_bytes);
+  }
+
+  void WriteExprLoc(Attribute attrib, const Expression& expr) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_exprloc);
+    this->PushUleb128(dchecked_integral_cast<uint32_t>(expr.size()));
+    this->PushData(expr.data());
+  }
+
+  void WriteData1(Attribute attrib, uint8_t value) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_data1);
+    this->PushUint8(value);
+  }
+
+  void WriteData2(Attribute attrib, uint16_t value) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_data2);
+    this->PushUint16(value);
+  }
+
+  void WriteData4(Attribute attrib, uint32_t value) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_data4);
+    this->PushUint32(value);
+  }
+
+  void WriteData8(Attribute attrib, uint64_t value) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_data8);
+    this->PushUint64(value);
+  }
+
+  void WriteSecOffset(Attribute attrib, uint32_t offset) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_sec_offset);
+    this->PushUint32(offset);
+  }
+
+  void WriteSdata(Attribute attrib, int value) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_sdata);
+    this->PushSleb128(value);
+  }
+
+  void WriteUdata(Attribute attrib, int value) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_udata);
+    this->PushUleb128(value);
+  }
+
+  void WriteUdata(Attribute attrib, uint32_t value) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_udata);
+    this->PushUleb128(value);
+  }
+
+  void WriteFlag(Attribute attrib, bool value) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_flag);
+    this->PushUint8(value ? 1 : 0);
+  }
+
+  void WriteFlagPresent(Attribute attrib) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_flag_present);
+  }
+
+  void WriteRef4(Attribute attrib, uint32_t cu_offset) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_ref4);
+    this->PushUint32(cu_offset);
+  }
+
+  void WriteRef(Attribute attrib, uint32_t cu_offset) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_ref_udata);
+    this->PushUleb128(cu_offset);
+  }
+
+  void WriteString(Attribute attrib, const char* value) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_string);
+    this->PushString(value);
+  }
+
+  void WriteStrp(Attribute attrib, size_t debug_str_offset) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_strp);
+    this->PushUint32(dchecked_integral_cast<uint32_t>(debug_str_offset));
+  }
+
+  void WriteStrp(Attribute attrib, const char* str, size_t len,
+                 std::vector<uint8_t>* debug_str) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_strp);
+    this->PushUint32(debug_str->size());
+    debug_str->insert(debug_str->end(), str, str + len);
+    debug_str->push_back(0);
+  }
+
+  void WriteStrp(Attribute attrib, const char* str, std::vector<uint8_t>* debug_str) {
+    WriteStrp(attrib, str, strlen(str), debug_str);
+  }
+
+  bool Is64bit() const { return is64bit_; }
+
+  const std::vector<uintptr_t>& GetPatchLocations() const {
+    return patch_locations_;
+  }
+
+  int Depth() const { return depth_; }
+
+  using Writer<Vector>::data;
+  using Writer<Vector>::size;
+  using Writer<Vector>::UpdateUint32;
+
+  DebugInfoEntryWriter(bool is64bitArch,
+                       DebugAbbrevWriter<Vector>* debug_abbrev,
+                       const typename Vector::allocator_type& alloc =
+                           typename Vector::allocator_type())
+      : Writer<Vector>(&entries_),
+        debug_abbrev_(debug_abbrev),
+        entries_(alloc),
+        is64bit_(is64bitArch) {
+  }
+
+  ~DebugInfoEntryWriter() {
+    DCHECK(!inside_entry_);
+    DCHECK_EQ(depth_, 0);
+  }
+
+ private:
+  DebugAbbrevWriter<Vector>* debug_abbrev_;
+  Vector entries_;
+  bool is64bit_;
+  int depth_ = 0;
+  size_t abbrev_code_offset_ = 0;  // Location to patch once we know the code.
+  bool inside_entry_ = false;  // Entry ends at first child (if any).
+  std::vector<uintptr_t> patch_locations_;
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_DWARF_DEBUG_INFO_ENTRY_WRITER_H_
diff --git a/compiler/debug/dwarf/debug_line_opcode_writer.h b/compiler/debug/dwarf/debug_line_opcode_writer.h
new file mode 100644
index 0000000..b4a4d63
--- /dev/null
+++ b/compiler/debug/dwarf/debug_line_opcode_writer.h
@@ -0,0 +1,261 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_DWARF_DEBUG_LINE_OPCODE_WRITER_H_
+#define ART_COMPILER_DEBUG_DWARF_DEBUG_LINE_OPCODE_WRITER_H_
+
+#include <cstdint>
+
+#include "debug/dwarf/dwarf_constants.h"
+#include "debug/dwarf/writer.h"
+
+namespace art {
+namespace dwarf {
+
+// Writer for the .debug_line opcodes (DWARF-3).
+// The writer is very light-weight, however it will do the following for you:
+//  * Choose the most compact encoding of a given opcode.
+//  * Keep track of current state and convert absolute values to deltas.
+//  * Divide by header-defined factors as appropriate.
+template<typename Vector = std::vector<uint8_t>>
+class DebugLineOpCodeWriter FINAL : private Writer<Vector> {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
+ public:
+  static constexpr int kOpcodeBase = 13;
+  static constexpr bool kDefaultIsStmt = false;
+  static constexpr int kLineBase = -5;
+  static constexpr int kLineRange = 14;
+
+  void AddRow() {
+    this->PushUint8(DW_LNS_copy);
+  }
+
+  void AdvancePC(uint64_t absolute_address) {
+    DCHECK_NE(current_address_, 0u);  // Use SetAddress for the first advance.
+    DCHECK_GE(absolute_address, current_address_);
+    if (absolute_address != current_address_) {
+      uint64_t delta = FactorCodeOffset(absolute_address - current_address_);
+      if (delta <= INT32_MAX) {
+        this->PushUint8(DW_LNS_advance_pc);
+        this->PushUleb128(static_cast<int>(delta));
+        current_address_ = absolute_address;
+      } else {
+        SetAddress(absolute_address);
+      }
+    }
+  }
+
+  void AdvanceLine(int absolute_line) {
+    int delta = absolute_line - current_line_;
+    if (delta != 0) {
+      this->PushUint8(DW_LNS_advance_line);
+      this->PushSleb128(delta);
+      current_line_ = absolute_line;
+    }
+  }
+
+  void SetFile(int file) {
+    if (current_file_ != file) {
+      this->PushUint8(DW_LNS_set_file);
+      this->PushUleb128(file);
+      current_file_ = file;
+    }
+  }
+
+  void SetColumn(int column) {
+    this->PushUint8(DW_LNS_set_column);
+    this->PushUleb128(column);
+  }
+
+  void SetIsStmt(bool is_stmt) {
+    if (is_stmt_ != is_stmt) {
+      this->PushUint8(DW_LNS_negate_stmt);
+      is_stmt_ = is_stmt;
+    }
+  }
+
+  void SetBasicBlock() {
+    this->PushUint8(DW_LNS_set_basic_block);
+  }
+
+  void SetPrologueEnd() {
+    uses_dwarf3_features_ = true;
+    this->PushUint8(DW_LNS_set_prologue_end);
+  }
+
+  void SetEpilogueBegin() {
+    uses_dwarf3_features_ = true;
+    this->PushUint8(DW_LNS_set_epilogue_begin);
+  }
+
+  void SetISA(int isa) {
+    uses_dwarf3_features_ = true;
+    this->PushUint8(DW_LNS_set_isa);
+    this->PushUleb128(isa);
+  }
+
+  void EndSequence() {
+    this->PushUint8(0);
+    this->PushUleb128(1);
+    this->PushUint8(DW_LNE_end_sequence);
+    current_address_ = 0;
+    current_file_ = 1;
+    current_line_ = 1;
+    is_stmt_ = kDefaultIsStmt;
+  }
+
+  // Uncoditionally set address using the long encoding.
+  // This gives the linker opportunity to relocate the address.
+  void SetAddress(uint64_t absolute_address) {
+    DCHECK_GE(absolute_address, current_address_);
+    FactorCodeOffset(absolute_address);  // Check if it is factorable.
+    this->PushUint8(0);
+    if (use_64bit_address_) {
+      this->PushUleb128(1 + 8);
+      this->PushUint8(DW_LNE_set_address);
+      patch_locations_.push_back(this->data()->size());
+      this->PushUint64(absolute_address);
+    } else {
+      this->PushUleb128(1 + 4);
+      this->PushUint8(DW_LNE_set_address);
+      patch_locations_.push_back(this->data()->size());
+      this->PushUint32(absolute_address);
+    }
+    current_address_ = absolute_address;
+  }
+
+  void DefineFile(const char* filename,
+                  int directory_index,
+                  int modification_time,
+                  int file_size) {
+    int size = 1 +
+               strlen(filename) + 1 +
+               UnsignedLeb128Size(directory_index) +
+               UnsignedLeb128Size(modification_time) +
+               UnsignedLeb128Size(file_size);
+    this->PushUint8(0);
+    this->PushUleb128(size);
+    size_t start = data()->size();
+    this->PushUint8(DW_LNE_define_file);
+    this->PushString(filename);
+    this->PushUleb128(directory_index);
+    this->PushUleb128(modification_time);
+    this->PushUleb128(file_size);
+    DCHECK_EQ(start + size, data()->size());
+  }
+
+  // Compact address and line opcode.
+  void AddRow(uint64_t absolute_address, int absolute_line) {
+    DCHECK_GE(absolute_address, current_address_);
+
+    // If the address is definitely too far, use the long encoding.
+    uint64_t delta_address = FactorCodeOffset(absolute_address - current_address_);
+    if (delta_address > UINT8_MAX) {
+      AdvancePC(absolute_address);
+      delta_address = 0;
+    }
+
+    // If the line is definitely too far, use the long encoding.
+    int delta_line = absolute_line - current_line_;
+    if (!(kLineBase <= delta_line && delta_line < kLineBase + kLineRange)) {
+      AdvanceLine(absolute_line);
+      delta_line = 0;
+    }
+
+    // Both address and line should be reasonable now.  Use the short encoding.
+    int opcode = kOpcodeBase + (delta_line - kLineBase) +
+                 (static_cast<int>(delta_address) * kLineRange);
+    if (opcode > UINT8_MAX) {
+      // If the address is still too far, try to increment it by const amount.
+      int const_advance = (0xFF - kOpcodeBase) / kLineRange;
+      opcode -= (kLineRange * const_advance);
+      if (opcode <= UINT8_MAX) {
+        this->PushUint8(DW_LNS_const_add_pc);
+      } else {
+        // Give up and use long encoding for address.
+        AdvancePC(absolute_address);
+        // Still use the opcode to do line advance and copy.
+        opcode = kOpcodeBase + (delta_line - kLineBase);
+      }
+    }
+    DCHECK(kOpcodeBase <= opcode && opcode <= 0xFF);
+    this->PushUint8(opcode);  // Special opcode.
+    current_line_ = absolute_line;
+    current_address_ = absolute_address;
+  }
+
+  int GetCodeFactorBits() const {
+    return code_factor_bits_;
+  }
+
+  uint64_t CurrentAddress() const {
+    return current_address_;
+  }
+
+  int CurrentFile() const {
+    return current_file_;
+  }
+
+  int CurrentLine() const {
+    return current_line_;
+  }
+
+  const std::vector<uintptr_t>& GetPatchLocations() const {
+    return patch_locations_;
+  }
+
+  using Writer<Vector>::data;
+
+  DebugLineOpCodeWriter(bool use64bitAddress,
+                        int codeFactorBits,
+                        const typename Vector::allocator_type& alloc =
+                            typename Vector::allocator_type())
+      : Writer<Vector>(&opcodes_),
+        opcodes_(alloc),
+        uses_dwarf3_features_(false),
+        use_64bit_address_(use64bitAddress),
+        code_factor_bits_(codeFactorBits),
+        current_address_(0),
+        current_file_(1),
+        current_line_(1),
+        is_stmt_(kDefaultIsStmt) {
+  }
+
+ private:
+  uint64_t FactorCodeOffset(uint64_t offset) const {
+    DCHECK_GE(code_factor_bits_, 0);
+    DCHECK_EQ((offset >> code_factor_bits_) << code_factor_bits_, offset);
+    return offset >> code_factor_bits_;
+  }
+
+  Vector opcodes_;
+  bool uses_dwarf3_features_;
+  bool use_64bit_address_;
+  int code_factor_bits_;
+  uint64_t current_address_;
+  int current_file_;
+  int current_line_;
+  bool is_stmt_;
+  std::vector<uintptr_t> patch_locations_;
+
+  DISALLOW_COPY_AND_ASSIGN(DebugLineOpCodeWriter);
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_DWARF_DEBUG_LINE_OPCODE_WRITER_H_
diff --git a/compiler/debug/dwarf/dwarf_constants.h b/compiler/debug/dwarf/dwarf_constants.h
new file mode 100644
index 0000000..96f805e
--- /dev/null
+++ b/compiler/debug/dwarf/dwarf_constants.h
@@ -0,0 +1,694 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_DWARF_DWARF_CONSTANTS_H_
+#define ART_COMPILER_DEBUG_DWARF_DWARF_CONSTANTS_H_
+
+namespace art {
+namespace dwarf {
+
+// Based on the Dwarf 4 specification at dwarfstd.com and issues marked
+// for inclusion in Dwarf 5 on same. Values not specified in the Dwarf 4
+// standard might change or be removed in the future and may be different
+// than the values used currently by other implementations for the same trait,
+// use at your own risk.
+
+enum Tag {
+  DW_TAG_array_type = 0x01,
+  DW_TAG_class_type = 0x02,
+  DW_TAG_entry_point = 0x03,
+  DW_TAG_enumeration_type = 0x04,
+  DW_TAG_formal_parameter = 0x05,
+  DW_TAG_imported_declaration = 0x08,
+  DW_TAG_label = 0x0a,
+  DW_TAG_lexical_block = 0x0b,
+  DW_TAG_member = 0x0d,
+  DW_TAG_pointer_type = 0x0f,
+  DW_TAG_reference_type = 0x10,
+  DW_TAG_compile_unit = 0x11,
+  DW_TAG_string_type = 0x12,
+  DW_TAG_structure_type = 0x13,
+  DW_TAG_subroutine_type = 0x15,
+  DW_TAG_typedef = 0x16,
+  DW_TAG_union_type = 0x17,
+  DW_TAG_unspecified_parameters = 0x18,
+  DW_TAG_variant = 0x19,
+  DW_TAG_common_block = 0x1a,
+  DW_TAG_common_inclusion = 0x1b,
+  DW_TAG_inheritance = 0x1c,
+  DW_TAG_inlined_subroutine = 0x1d,
+  DW_TAG_module = 0x1e,
+  DW_TAG_ptr_to_member_type = 0x1f,
+  DW_TAG_set_type = 0x20,
+  DW_TAG_subrange_type = 0x21,
+  DW_TAG_with_stmt = 0x22,
+  DW_TAG_access_declaration = 0x23,
+  DW_TAG_base_type = 0x24,
+  DW_TAG_catch_block = 0x25,
+  DW_TAG_const_type = 0x26,
+  DW_TAG_constant = 0x27,
+  DW_TAG_enumerator = 0x28,
+  DW_TAG_file_type = 0x29,
+  DW_TAG_friend = 0x2a,
+  DW_TAG_namelist = 0x2b,
+  DW_TAG_namelist_item = 0x2c,
+  DW_TAG_packed_type = 0x2d,
+  DW_TAG_subprogram = 0x2e,
+  DW_TAG_template_type_parameter = 0x2f,
+  DW_TAG_template_value_parameter = 0x30,
+  DW_TAG_thrown_type = 0x31,
+  DW_TAG_try_block = 0x32,
+  DW_TAG_variant_part = 0x33,
+  DW_TAG_variable = 0x34,
+  DW_TAG_volatile_type = 0x35,
+  DW_TAG_dwarf_procedure = 0x36,
+  DW_TAG_restrict_type = 0x37,
+  DW_TAG_interface_type = 0x38,
+  DW_TAG_namespace = 0x39,
+  DW_TAG_imported_module = 0x3a,
+  DW_TAG_unspecified_type = 0x3b,
+  DW_TAG_partial_unit = 0x3c,
+  DW_TAG_imported_unit = 0x3d,
+  DW_TAG_condition = 0x3f,
+  DW_TAG_shared_type = 0x40,
+  DW_TAG_type_unit = 0x41,
+  DW_TAG_rvalue_reference_type = 0x42,
+  DW_TAG_template_alias = 0x43,
+#ifdef INCLUDE_DWARF5_VALUES
+  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
+  // may be different than other implementations. Use with caution.
+  // TODO Update these values when Dwarf 5 is released.
+  DW_TAG_coarray_type = 0x44,
+  DW_TAG_call_site = 0x45,
+  DW_TAG_call_site_parameter = 0x46,
+  DW_TAG_generic_subrange = 0x47,
+  DW_TAG_atomic_type = 0x48,
+  DW_TAG_dynamic_type = 0x49,
+  DW_TAG_aligned_type = 0x50,
+#endif
+  DW_TAG_lo_user = 0x4080,
+  DW_TAG_hi_user = 0xffff
+};
+
+enum Children : uint8_t {
+  DW_CHILDREN_no = 0x00,
+  DW_CHILDREN_yes = 0x01
+};
+
+enum Attribute {
+  DW_AT_sibling = 0x01,
+  DW_AT_location = 0x02,
+  DW_AT_name = 0x03,
+  DW_AT_ordering = 0x09,
+  DW_AT_byte_size = 0x0b,
+  DW_AT_bit_offset = 0x0c,
+  DW_AT_bit_size = 0x0d,
+  DW_AT_stmt_list = 0x10,
+  DW_AT_low_pc = 0x11,
+  DW_AT_high_pc = 0x12,
+  DW_AT_language = 0x13,
+  DW_AT_discr = 0x15,
+  DW_AT_discr_value = 0x16,
+  DW_AT_visibility = 0x17,
+  DW_AT_import = 0x18,
+  DW_AT_string_length = 0x19,
+  DW_AT_common_reference = 0x1a,
+  DW_AT_comp_dir = 0x1b,
+  DW_AT_const_value = 0x1c,
+  DW_AT_containing_type = 0x1d,
+  DW_AT_default_value = 0x1e,
+  DW_AT_inline = 0x20,
+  DW_AT_is_optional = 0x21,
+  DW_AT_lower_bound = 0x22,
+  DW_AT_producer = 0x25,
+  DW_AT_prototyped = 0x27,
+  DW_AT_return_addr = 0x2a,
+  DW_AT_start_scope = 0x2c,
+  DW_AT_bit_stride = 0x2e,
+  DW_AT_upper_bound = 0x2f,
+  DW_AT_abstract_origin = 0x31,
+  DW_AT_accessibility = 0x32,
+  DW_AT_address_class = 0x33,
+  DW_AT_artificial = 0x34,
+  DW_AT_base_types = 0x35,
+  DW_AT_calling_convention = 0x36,
+  DW_AT_count = 0x37,
+  DW_AT_data_member_location = 0x38,
+  DW_AT_decl_column = 0x39,
+  DW_AT_decl_file = 0x3a,
+  DW_AT_decl_line = 0x3b,
+  DW_AT_declaration = 0x3c,
+  DW_AT_discr_list = 0x3d,
+  DW_AT_encoding = 0x3e,
+  DW_AT_external = 0x3f,
+  DW_AT_frame_base = 0x40,
+  DW_AT_friend = 0x41,
+  DW_AT_identifier_case = 0x42,
+  DW_AT_macro_info = 0x43,
+  DW_AT_namelist_item = 0x44,
+  DW_AT_priority = 0x45,
+  DW_AT_segment = 0x46,
+  DW_AT_specification = 0x47,
+  DW_AT_static_link = 0x48,
+  DW_AT_type = 0x49,
+  DW_AT_use_location = 0x4a,
+  DW_AT_variable_parameter = 0x4b,
+  DW_AT_virtuality = 0x4c,
+  DW_AT_vtable_elem_location = 0x4d,
+  DW_AT_allocated = 0x4e,
+  DW_AT_associated = 0x4f,
+  DW_AT_data_location = 0x50,
+  DW_AT_byte_stride = 0x51,
+  DW_AT_entry_pc = 0x52,
+  DW_AT_use_UTF8 = 0x53,
+  DW_AT_extension = 0x54,
+  DW_AT_ranges = 0x55,
+  DW_AT_trampoline = 0x56,
+  DW_AT_call_column = 0x57,
+  DW_AT_call_file = 0x58,
+  DW_AT_call_line = 0x59,
+  DW_AT_description = 0x5a,
+  DW_AT_binary_scale = 0x5b,
+  DW_AT_decimal_scale = 0x5c,
+  DW_AT_small = 0x5d,
+  DW_AT_decimal_sign = 0x5e,
+  DW_AT_digit_count = 0x5f,
+  DW_AT_picture_string = 0x60,
+  DW_AT_mutable = 0x61,
+  DW_AT_threads_scaled = 0x62,
+  DW_AT_explicit = 0x63,
+  DW_AT_object_pointer = 0x64,
+  DW_AT_endianity = 0x65,
+  DW_AT_elemental = 0x66,
+  DW_AT_pure = 0x67,
+  DW_AT_recursive = 0x68,
+  DW_AT_signature = 0x69,
+  DW_AT_main_subprogram = 0x6a,
+  DW_AT_data_bit_offset = 0x6b,
+  DW_AT_const_expr = 0x6c,
+  DW_AT_enum_class = 0x6d,
+  DW_AT_linkage_name = 0x6e,
+#ifdef INCLUDE_DWARF5_VALUES
+  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
+  // may be different than other implementations. Use with caution.
+  // TODO Update these values when Dwarf 5 is released.
+  DW_AT_call_site_value = 0x6f,
+  DW_AT_call_site_data_value = 0x70,
+  DW_AT_call_site_target = 0x71,
+  DW_AT_call_site_target_clobbered = 0x72,
+  DW_AT_tail_call = 0x73,
+  DW_AT_all_tail_call_sites = 0x74,
+  DW_AT_all_call_sites = 0x75,
+  DW_AT_all_source_call_sites = 0x76,
+  DW_AT_call_site_parameter = 0x77,
+  DW_AT_tail_call = 0x78,
+  DW_AT_all_tail_call_sites = 0x79,
+  DW_AT_all_call_sites = 0x7a,
+  DW_AT_all_source_call_sites = 0x7b,
+  DW_AT_rank = 0x7c,
+  DW_AT_string_bitsize = 0x7d,
+  DW_AT_string_byte_size = 0x7e,
+  DW_AT_reference = 0x7f,
+  DW_AT_rvalue_reference = 0x80,
+  DW_AT_noreturn = 0x81,
+  DW_AT_alignment = 0x82,
+#endif
+  DW_AT_lo_user = 0x2000,
+  DW_AT_hi_user = 0xffff
+};
+
+enum Form : uint8_t {
+  DW_FORM_addr = 0x01,
+  DW_FORM_block2 = 0x03,
+  DW_FORM_block4 = 0x04,
+  DW_FORM_data2 = 0x05,
+  DW_FORM_data4 = 0x06,
+  DW_FORM_data8 = 0x07,
+  DW_FORM_string = 0x08,
+  DW_FORM_block = 0x09,
+  DW_FORM_block1 = 0x0a,
+  DW_FORM_data1 = 0x0b,
+  DW_FORM_flag = 0x0c,
+  DW_FORM_sdata = 0x0d,
+  DW_FORM_strp = 0x0e,
+  DW_FORM_udata = 0x0f,
+  DW_FORM_ref_addr = 0x10,
+  DW_FORM_ref1 = 0x11,
+  DW_FORM_ref2 = 0x12,
+  DW_FORM_ref4 = 0x13,
+  DW_FORM_ref8 = 0x14,
+  DW_FORM_ref_udata = 0x15,
+  DW_FORM_indirect = 0x16,
+  DW_FORM_sec_offset = 0x17,
+  DW_FORM_exprloc = 0x18,
+  DW_FORM_flag_present = 0x19,
+  DW_FORM_ref_sig8 = 0x20
+};
+
+enum Operation : uint16_t {
+  DW_OP_addr = 0x03,
+  DW_OP_deref = 0x06,
+  DW_OP_const1u = 0x08,
+  DW_OP_const1s = 0x09,
+  DW_OP_const2u = 0x0a,
+  DW_OP_const2s = 0x0b,
+  DW_OP_const4u = 0x0c,
+  DW_OP_const4s = 0x0d,
+  DW_OP_const8u = 0x0e,
+  DW_OP_const8s = 0x0f,
+  DW_OP_constu = 0x10,
+  DW_OP_consts = 0x11,
+  DW_OP_dup = 0x12,
+  DW_OP_drop = 0x13,
+  DW_OP_over = 0x14,
+  DW_OP_pick = 0x15,
+  DW_OP_swap = 0x16,
+  DW_OP_rot = 0x17,
+  DW_OP_xderef = 0x18,
+  DW_OP_abs = 0x19,
+  DW_OP_and = 0x1a,
+  DW_OP_div = 0x1b,
+  DW_OP_minus = 0x1c,
+  DW_OP_mod = 0x1d,
+  DW_OP_mul = 0x1e,
+  DW_OP_neg = 0x1f,
+  DW_OP_not = 0x20,
+  DW_OP_or = 0x21,
+  DW_OP_plus = 0x22,
+  DW_OP_plus_uconst = 0x23,
+  DW_OP_shl = 0x24,
+  DW_OP_shr = 0x25,
+  DW_OP_shra = 0x26,
+  DW_OP_xor = 0x27,
+  DW_OP_skip = 0x2f,
+  DW_OP_bra = 0x28,
+  DW_OP_eq = 0x29,
+  DW_OP_ge = 0x2a,
+  DW_OP_gt = 0x2b,
+  DW_OP_le = 0x2c,
+  DW_OP_lt = 0x2d,
+  DW_OP_ne = 0x2e,
+  DW_OP_lit0 = 0x30,
+  DW_OP_lit1 = 0x31,
+  DW_OP_lit2 = 0x32,
+  DW_OP_lit3 = 0x33,
+  DW_OP_lit4 = 0x34,
+  DW_OP_lit5 = 0x35,
+  DW_OP_lit6 = 0x36,
+  DW_OP_lit7 = 0x37,
+  DW_OP_lit8 = 0x38,
+  DW_OP_lit9 = 0x39,
+  DW_OP_lit10 = 0x3a,
+  DW_OP_lit11 = 0x3b,
+  DW_OP_lit12 = 0x3c,
+  DW_OP_lit13 = 0x3d,
+  DW_OP_lit14 = 0x3e,
+  DW_OP_lit15 = 0x3f,
+  DW_OP_lit16 = 0x40,
+  DW_OP_lit17 = 0x41,
+  DW_OP_lit18 = 0x42,
+  DW_OP_lit19 = 0x43,
+  DW_OP_lit20 = 0x44,
+  DW_OP_lit21 = 0x45,
+  DW_OP_lit22 = 0x46,
+  DW_OP_lit23 = 0x47,
+  DW_OP_lit24 = 0x48,
+  DW_OP_lit25 = 0x49,
+  DW_OP_lit26 = 0x4a,
+  DW_OP_lit27 = 0x4b,
+  DW_OP_lit28 = 0x4c,
+  DW_OP_lit29 = 0x4d,
+  DW_OP_lit30 = 0x4e,
+  DW_OP_lit31 = 0x4f,
+  DW_OP_reg0 = 0x50,
+  DW_OP_reg1 = 0x51,
+  DW_OP_reg2 = 0x52,
+  DW_OP_reg3 = 0x53,
+  DW_OP_reg4 = 0x54,
+  DW_OP_reg5 = 0x55,
+  DW_OP_reg6 = 0x56,
+  DW_OP_reg7 = 0x57,
+  DW_OP_reg8 = 0x58,
+  DW_OP_reg9 = 0x59,
+  DW_OP_reg10 = 0x5a,
+  DW_OP_reg11 = 0x5b,
+  DW_OP_reg12 = 0x5c,
+  DW_OP_reg13 = 0x5d,
+  DW_OP_reg14 = 0x5e,
+  DW_OP_reg15 = 0x5f,
+  DW_OP_reg16 = 0x60,
+  DW_OP_reg17 = 0x61,
+  DW_OP_reg18 = 0x62,
+  DW_OP_reg19 = 0x63,
+  DW_OP_reg20 = 0x64,
+  DW_OP_reg21 = 0x65,
+  DW_OP_reg22 = 0x66,
+  DW_OP_reg23 = 0x67,
+  DW_OP_reg24 = 0x68,
+  DW_OP_reg25 = 0x69,
+  DW_OP_reg26 = 0x6a,
+  DW_OP_reg27 = 0x6b,
+  DW_OP_reg28 = 0x6c,
+  DW_OP_reg29 = 0x6d,
+  DW_OP_reg30 = 0x6e,
+  DW_OP_reg31 = 0x6f,
+  DW_OP_breg0 = 0x70,
+  DW_OP_breg1 = 0x71,
+  DW_OP_breg2 = 0x72,
+  DW_OP_breg3 = 0x73,
+  DW_OP_breg4 = 0x74,
+  DW_OP_breg5 = 0x75,
+  DW_OP_breg6 = 0x76,
+  DW_OP_breg7 = 0x77,
+  DW_OP_breg8 = 0x78,
+  DW_OP_breg9 = 0x79,
+  DW_OP_breg10 = 0x7a,
+  DW_OP_breg11 = 0x7b,
+  DW_OP_breg12 = 0x7c,
+  DW_OP_breg13 = 0x7d,
+  DW_OP_breg14 = 0x7e,
+  DW_OP_breg15 = 0x7f,
+  DW_OP_breg16 = 0x80,
+  DW_OP_breg17 = 0x81,
+  DW_OP_breg18 = 0x82,
+  DW_OP_breg19 = 0x83,
+  DW_OP_breg20 = 0x84,
+  DW_OP_breg21 = 0x85,
+  DW_OP_breg22 = 0x86,
+  DW_OP_breg23 = 0x87,
+  DW_OP_breg24 = 0x88,
+  DW_OP_breg25 = 0x89,
+  DW_OP_breg26 = 0x8a,
+  DW_OP_breg27 = 0x8b,
+  DW_OP_breg28 = 0x8c,
+  DW_OP_breg29 = 0x8d,
+  DW_OP_breg30 = 0x8e,
+  DW_OP_breg31 = 0x8f,
+  DW_OP_regx = 0x90,
+  DW_OP_fbreg = 0x91,
+  DW_OP_bregx = 0x92,
+  DW_OP_piece = 0x93,
+  DW_OP_deref_size = 0x94,
+  DW_OP_xderef_size = 0x95,
+  DW_OP_nop = 0x96,
+  DW_OP_push_object_address = 0x97,
+  DW_OP_call2 = 0x98,
+  DW_OP_call4 = 0x99,
+  DW_OP_call_ref = 0x9a,
+  DW_OP_form_tls_address = 0x9b,
+  DW_OP_call_frame_cfa = 0x9c,
+  DW_OP_bit_piece = 0x9d,
+  DW_OP_implicit_value = 0x9e,
+  DW_OP_stack_value = 0x9f,
+#ifdef INCLUDE_DWARF5_VALUES
+  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
+  // may be different than other implementations. Use with caution.
+  // TODO Update these values when Dwarf 5 is released.
+  DW_OP_entry_value = 0xa0,
+  DW_OP_const_type = 0xa1,
+  DW_OP_regval_type = 0xa2,
+  DW_OP_deref_type = 0xa3,
+  DW_OP_xderef_type = 0xa4,
+  DW_OP_convert = 0xa5,
+  DW_OP_reinterpret = 0xa6,
+#endif
+  DW_OP_lo_user = 0xe0,
+  DW_OP_hi_user = 0xff
+};
+
+enum BaseTypeEncoding : uint8_t {
+  DW_ATE_address = 0x01,
+  DW_ATE_boolean = 0x02,
+  DW_ATE_complex_float = 0x03,
+  DW_ATE_float = 0x04,
+  DW_ATE_signed = 0x05,
+  DW_ATE_signed_char = 0x06,
+  DW_ATE_unsigned = 0x07,
+  DW_ATE_unsigned_char = 0x08,
+  DW_ATE_imaginary_float = 0x09,
+  DW_ATE_packed_decimal = 0x0a,
+  DW_ATE_numeric_string = 0x0b,
+  DW_ATE_edited = 0x0c,
+  DW_ATE_signed_fixed = 0x0d,
+  DW_ATE_unsigned_fixed = 0x0e,
+  DW_ATE_decimal_float = 0x0f,
+  DW_ATE_UTF = 0x10,
+  DW_ATE_lo_user = 0x80,
+  DW_ATE_hi_user = 0xff
+};
+
+enum DecimalSign : uint8_t {
+  DW_DS_unsigned = 0x01,
+  DW_DS_leading_overpunch = 0x02,
+  DW_DS_trailing_overpunch = 0x03,
+  DW_DS_leading_separate = 0x04,
+  DW_DS_trailing_separate = 0x05
+};
+
+enum Endianity : uint8_t {
+  DW_END_default = 0x00,
+  DW_END_big = 0x01,
+  DW_END_little = 0x02,
+  DW_END_lo_user = 0x40,
+  DW_END_hi_user = 0xff
+};
+
+enum Accessibility : uint8_t {
+  DW_ACCESS_public = 0x01,
+  DW_ACCESS_protected = 0x02,
+  DW_ACCESS_private = 0x03
+};
+
+enum Visibility : uint8_t {
+  DW_VIS_local = 0x01,
+  DW_VIS_exported = 0x02,
+  DW_VIS_qualified = 0x03
+};
+
+enum Virtuality : uint8_t {
+  DW_VIRTUALITY_none = 0x00,
+  DW_VIRTUALITY_virtual = 0x01,
+  DW_VIRTUALITY_pure_virtual = 0x02
+};
+
+enum Language {
+  DW_LANG_C89 = 0x01,
+  DW_LANG_C = 0x02,
+  DW_LANG_Ada83 = 0x03,
+  DW_LANG_C_plus_plus = 0x04,
+  DW_LANG_Cobol74 = 0x05,
+  DW_LANG_Cobol85 = 0x06,
+  DW_LANG_Fortran77 = 0x07,
+  DW_LANG_Fortran90 = 0x08,
+  DW_LANG_Pascal83 = 0x09,
+  DW_LANG_Modula2 = 0x0a,
+  DW_LANG_Java = 0x0b,
+  DW_LANG_C99 = 0x0c,
+  DW_LANG_Ada95 = 0x0d,
+  DW_LANG_Fortran95 = 0x0e,
+  DW_LANG_PLI = 0x0f,
+  DW_LANG_ObjC = 0x10,
+  DW_LANG_ObjC_plus_plus = 0x11,
+  DW_LANG_UPC = 0x12,
+  DW_LANG_D = 0x13,
+  DW_LANG_Python = 0x14,
+#ifdef INCLUDE_DWARF5_VALUES
+  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
+  // may be different than other implementations. Use with caution.
+  // TODO Update these values when Dwarf 5 is released.
+  DW_LANG_OpenCL = 0x15,
+  DW_LANG_Go = 0x16,
+  DW_LANG_Modula3 = 0x17,
+  DW_LANG_Haskell = 0x18,
+  DW_LANG_C_plus_plus_03 = 0x19,
+  DW_LANG_C_plus_plus_11 = 0x1a,
+  DW_LANG_OCaml = 0x1b,
+  DW_LANG_Rust = 0x1c,
+  DW_LANG_C11 = 0x1d,
+  DW_LANG_Swift = 0x1e,
+  DW_LANG_Julia = 0x1f,
+#endif
+  DW_LANG_lo_user = 0x8000,
+  DW_LANG_hi_user = 0xffff
+};
+
+enum Identifier : uint8_t {
+  DW_ID_case_sensitive = 0x00,
+  DW_ID_up_case = 0x01,
+  DW_ID_down_case = 0x02,
+  DW_ID_case_insensitive = 0x03
+};
+
+enum CallingConvention : uint8_t {
+  DW_CC_normal = 0x01,
+  DW_CC_program = 0x02,
+  DW_CC_nocall = 0x03,
+  DW_CC_lo_user = 0x40,
+  DW_CC_hi_user = 0xff
+};
+
+enum Inline : uint8_t {
+  DW_INL_not_inlined = 0x00,
+  DW_INL_inlined = 0x01,
+  DW_INL_declared_not_inlined = 0x02,
+  DW_INL_declared_inlined = 0x03
+};
+
+enum ArrayOrdering : uint8_t {
+  DW_ORD_row_major = 0x00,
+  DW_ORD_col_major = 0x01
+};
+
+enum DiscriminantList : uint8_t {
+  DW_DSC_label = 0x00,
+  DW_DSC_range = 0x01
+};
+
+enum LineNumberOpcode : uint8_t {
+  DW_LNS_copy = 0x01,
+  DW_LNS_advance_pc = 0x02,
+  DW_LNS_advance_line = 0x03,
+  DW_LNS_set_file = 0x04,
+  DW_LNS_set_column = 0x05,
+  DW_LNS_negate_stmt = 0x06,
+  DW_LNS_set_basic_block = 0x07,
+  DW_LNS_const_add_pc = 0x08,
+  DW_LNS_fixed_advance_pc = 0x09,
+  DW_LNS_set_prologue_end = 0x0a,
+  DW_LNS_set_epilogue_begin = 0x0b,
+  DW_LNS_set_isa = 0x0c
+};
+
+enum LineNumberExtendedOpcode : uint8_t {
+  DW_LNE_end_sequence = 0x01,
+  DW_LNE_set_address = 0x02,
+  DW_LNE_define_file = 0x03,
+  DW_LNE_set_discriminator = 0x04,
+  DW_LNE_lo_user = 0x80,
+  DW_LNE_hi_user = 0xff
+};
+
+#ifdef INCLUDE_DWARF5_VALUES
+enum LineNumberFormat : uint8_t {
+  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
+  // may be different than other implementations. Use with caution.
+  // TODO Update these values when Dwarf 5 is released.
+  //
+  DW_LNF_path = 0x1,
+  DW_LNF_include_index = 0x2,
+  DW_LNF_timestamp = 0x3,
+  DW_LNF_size = 0x4,
+  DW_LNF_MD5 = 0x5,
+  DW_LNF_lo_user = 0x2000,
+  DW_LNF_hi_user = 0x3fff
+};
+#endif
+
+enum MacroInfo : uint8_t {
+  DW_MACINFO_define = 0x01,
+  DW_MACINFO_undef = 0x02,
+  DW_MACINFO_start_file = 0x03,
+  DW_MACINFO_end_file = 0x04,
+  DW_MACINFO_vendor_ext = 0xff
+};
+
+#ifdef INCLUDE_DWARF5_VALUES
+enum Macro : uint8_t {
+  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
+  // may be different than other implementations. Use with caution.
+  // TODO Update these values when Dwarf 5 is released.
+  DW_MACRO_define = 0x01,
+  DW_MACRO_undef = 0x02,
+  DW_MACRO_start_file = 0x03,
+  DW_MACRO_end_file = 0x04,
+  DW_MACRO_define_indirect = 0x05,
+  DW_MACRO_undef_indirect = 0x06,
+  DW_MACRO_transparent_include = 0x07,
+  DW_MACRO_define_indirectx = 0x0b,
+  DW_MACRO_undef_indirectx = 0x0c,
+  DW_MACRO_lo_user = 0xe0,
+  DW_MACRO_hi_user = 0xff
+};
+#endif
+
+const uint32_t CIE_ID_32 = 0xffffffff;
+const uint64_t CIE_ID_64 = 0xffffffffffffffff;
+
+enum CallFrameInstruction : uint8_t {
+  DW_CFA_advance_loc = 0x40,
+  DW_CFA_offset = 0x80,
+  DW_CFA_restore = 0xc0,
+  DW_CFA_nop = 0x00,
+  DW_CFA_set_loc = 0x01,
+  DW_CFA_advance_loc1 = 0x02,
+  DW_CFA_advance_loc2 = 0x03,
+  DW_CFA_advance_loc4 = 0x04,
+  DW_CFA_offset_extended = 0x05,
+  DW_CFA_restore_extended = 0x06,
+  DW_CFA_undefined = 0x07,
+  DW_CFA_same_value = 0x08,
+  DW_CFA_register = 0x09,
+  DW_CFA_remember_state = 0x0a,
+  DW_CFA_restore_state = 0x0b,
+  DW_CFA_def_cfa = 0x0c,
+  DW_CFA_def_cfa_register = 0x0d,
+  DW_CFA_def_cfa_offset = 0x0e,
+  DW_CFA_def_cfa_expression = 0x0f,
+  DW_CFA_expression = 0x10,
+  DW_CFA_offset_extended_sf = 0x11,
+  DW_CFA_def_cfa_sf = 0x12,
+  DW_CFA_def_cfa_offset_sf = 0x13,
+  DW_CFA_val_offset = 0x14,
+  DW_CFA_val_offset_sf = 0x15,
+  DW_CFA_val_expression = 0x16,
+  DW_CFA_lo_user = 0x1c,
+  DW_CFA_hi_user = 0x3f
+};
+
+enum ExceptionHeaderValueFormat : uint8_t  {
+  DW_EH_PE_native = 0x00,
+  DW_EH_PE_uleb128 = 0x01,
+  DW_EH_PE_udata2 = 0x02,
+  DW_EH_PE_udata4 = 0x03,
+  DW_EH_PE_udata8 = 0x04,
+  DW_EH_PE_sleb128 = 0x09,
+  DW_EH_PE_sdata2 = 0x0A,
+  DW_EH_PE_sdata4 = 0x0B,
+  DW_EH_PE_sdata8 = 0x0C,
+  DW_EH_PE_omit = 0xFF,
+};
+
+enum ExceptionHeaderValueApplication : uint8_t {
+  DW_EH_PE_absptr = 0x00,
+  DW_EH_PE_pcrel = 0x10,
+  DW_EH_PE_textrel = 0x20,
+  DW_EH_PE_datarel = 0x30,
+  DW_EH_PE_funcrel = 0x40,
+  DW_EH_PE_aligned = 0x50,
+};
+
+enum CFIFormat : uint8_t {
+  // This is the original format as defined by the specification.
+  // It is used for the .debug_frame section.
+  DW_DEBUG_FRAME_FORMAT,
+  // Slightly modified format used for the .eh_frame section.
+  DW_EH_FRAME_FORMAT
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_DWARF_DWARF_CONSTANTS_H_
diff --git a/compiler/debug/dwarf/dwarf_test.cc b/compiler/debug/dwarf/dwarf_test.cc
new file mode 100644
index 0000000..866bf43
--- /dev/null
+++ b/compiler/debug/dwarf/dwarf_test.cc
@@ -0,0 +1,347 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dwarf_test.h"
+
+#include "debug/dwarf/debug_frame_opcode_writer.h"
+#include "debug/dwarf/debug_info_entry_writer.h"
+#include "debug/dwarf/debug_line_opcode_writer.h"
+#include "debug/dwarf/dwarf_constants.h"
+#include "debug/dwarf/headers.h"
+#include "gtest/gtest.h"
+
+namespace art {
+namespace dwarf {
+
+// Run the tests only on host since we need objdump.
+#ifndef ART_TARGET_ANDROID
+
+constexpr CFIFormat kCFIFormat = DW_DEBUG_FRAME_FORMAT;
+
+TEST_F(DwarfTest, DebugFrame) {
+  const bool is64bit = false;
+
+  // Pick offset value which would catch Uleb vs Sleb errors.
+  const int offset = 40000;
+  ASSERT_EQ(UnsignedLeb128Size(offset / 4), 2u);
+  ASSERT_EQ(SignedLeb128Size(offset / 4), 3u);
+  DW_CHECK("Data alignment factor: -4");
+  const Reg reg(6);
+
+  // Test the opcodes in the order mentioned in the spec.
+  // There are usually several encoding variations of each opcode.
+  DebugFrameOpCodeWriter<> opcodes;
+  DW_CHECK("FDE");
+  int pc = 0;
+  for (int i : {0, 1, 0x3F, 0x40, 0xFF, 0x100, 0xFFFF, 0x10000}) {
+    pc += i;
+    opcodes.AdvancePC(pc);
+  }
+  DW_CHECK_NEXT("DW_CFA_advance_loc: 1 to 01000001");
+  DW_CHECK_NEXT("DW_CFA_advance_loc: 63 to 01000040");
+  DW_CHECK_NEXT("DW_CFA_advance_loc1: 64 to 01000080");
+  DW_CHECK_NEXT("DW_CFA_advance_loc1: 255 to 0100017f");
+  DW_CHECK_NEXT("DW_CFA_advance_loc2: 256 to 0100027f");
+  DW_CHECK_NEXT("DW_CFA_advance_loc2: 65535 to 0101027e");
+  DW_CHECK_NEXT("DW_CFA_advance_loc4: 65536 to 0102027e");
+  opcodes.DefCFA(reg, offset);
+  DW_CHECK_NEXT("DW_CFA_def_cfa: r6 (esi) ofs 40000");
+  opcodes.DefCFA(reg, -offset);
+  DW_CHECK_NEXT("DW_CFA_def_cfa_sf: r6 (esi) ofs -40000");
+  opcodes.DefCFARegister(reg);
+  DW_CHECK_NEXT("DW_CFA_def_cfa_register: r6 (esi)");
+  opcodes.DefCFAOffset(offset);
+  DW_CHECK_NEXT("DW_CFA_def_cfa_offset: 40000");
+  opcodes.DefCFAOffset(-offset);
+  DW_CHECK_NEXT("DW_CFA_def_cfa_offset_sf: -40000");
+  uint8_t expr[] = { 0 };
+  opcodes.DefCFAExpression(expr, arraysize(expr));
+  DW_CHECK_NEXT("DW_CFA_def_cfa_expression");
+  opcodes.Undefined(reg);
+  DW_CHECK_NEXT("DW_CFA_undefined: r6 (esi)");
+  opcodes.SameValue(reg);
+  DW_CHECK_NEXT("DW_CFA_same_value: r6 (esi)");
+  opcodes.Offset(Reg(0x3F), -offset);
+  // Bad register likely means that it does not exist on x86,
+  // but we want to test high register numbers anyway.
+  DW_CHECK_NEXT("DW_CFA_offset: bad register: r63 at cfa-40000");
+  opcodes.Offset(Reg(0x40), -offset);
+  DW_CHECK_NEXT("DW_CFA_offset_extended: bad register: r64 at cfa-40000");
+  opcodes.Offset(Reg(0x40), offset);
+  DW_CHECK_NEXT("DW_CFA_offset_extended_sf: bad register: r64 at cfa+40000");
+  opcodes.ValOffset(reg, -offset);
+  DW_CHECK_NEXT("DW_CFA_val_offset: r6 (esi) at cfa-40000");
+  opcodes.ValOffset(reg, offset);
+  DW_CHECK_NEXT("DW_CFA_val_offset_sf: r6 (esi) at cfa+40000");
+  opcodes.Register(reg, Reg(1));
+  DW_CHECK_NEXT("DW_CFA_register: r6 (esi) in r1 (ecx)");
+  opcodes.Expression(reg, expr, arraysize(expr));
+  DW_CHECK_NEXT("DW_CFA_expression: r6 (esi)");
+  opcodes.ValExpression(reg, expr, arraysize(expr));
+  DW_CHECK_NEXT("DW_CFA_val_expression: r6 (esi)");
+  opcodes.Restore(Reg(0x3F));
+  DW_CHECK_NEXT("DW_CFA_restore: bad register: r63");
+  opcodes.Restore(Reg(0x40));
+  DW_CHECK_NEXT("DW_CFA_restore_extended: bad register: r64");
+  opcodes.Restore(reg);
+  DW_CHECK_NEXT("DW_CFA_restore: r6 (esi)");
+  opcodes.RememberState();
+  DW_CHECK_NEXT("DW_CFA_remember_state");
+  opcodes.RestoreState();
+  DW_CHECK_NEXT("DW_CFA_restore_state");
+  opcodes.Nop();
+  DW_CHECK_NEXT("DW_CFA_nop");
+
+  // Also test helpers.
+  opcodes.DefCFA(Reg(4), 100);  // ESP
+  DW_CHECK_NEXT("DW_CFA_def_cfa: r4 (esp) ofs 100");
+  opcodes.AdjustCFAOffset(8);
+  DW_CHECK_NEXT("DW_CFA_def_cfa_offset: 108");
+  opcodes.RelOffset(Reg(0), 0);  // push R0
+  DW_CHECK_NEXT("DW_CFA_offset: r0 (eax) at cfa-108");
+  opcodes.RelOffset(Reg(1), 4);  // push R1
+  DW_CHECK_NEXT("DW_CFA_offset: r1 (ecx) at cfa-104");
+  opcodes.RelOffsetForMany(Reg(2), 8, 1 | (1 << 3), 4);  // push R2 and R5
+  DW_CHECK_NEXT("DW_CFA_offset: r2 (edx) at cfa-100");
+  DW_CHECK_NEXT("DW_CFA_offset: r5 (ebp) at cfa-96");
+  opcodes.RestoreMany(Reg(2), 1 | (1 << 3));  // pop R2 and R5
+  DW_CHECK_NEXT("DW_CFA_restore: r2 (edx)");
+  DW_CHECK_NEXT("DW_CFA_restore: r5 (ebp)");
+
+  DebugFrameOpCodeWriter<> initial_opcodes;
+  WriteCIE(is64bit, Reg(is64bit ? 16 : 8),
+           initial_opcodes, kCFIFormat, &debug_frame_data_);
+  std::vector<uintptr_t> debug_frame_patches;
+  std::vector<uintptr_t> expected_patches { 28 };  // NOLINT
+  WriteFDE(is64bit, 0, 0, 0x01000000, 0x01000000, ArrayRef<const uint8_t>(*opcodes.data()),
+           kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches);
+
+  EXPECT_EQ(expected_patches, debug_frame_patches);
+  CheckObjdumpOutput(is64bit, "-W");
+}
+
+TEST_F(DwarfTest, DebugFrame64) {
+  constexpr bool is64bit = true;
+  DebugFrameOpCodeWriter<> initial_opcodes;
+  WriteCIE(is64bit, Reg(16),
+           initial_opcodes, kCFIFormat, &debug_frame_data_);
+  DebugFrameOpCodeWriter<> opcodes;
+  std::vector<uintptr_t> debug_frame_patches;
+  std::vector<uintptr_t> expected_patches { 32 };  // NOLINT
+  WriteFDE(is64bit, 0, 0, 0x0100000000000000, 0x0200000000000000,
+           ArrayRef<const uint8_t>(*opcodes.data()),
+                     kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches);
+  DW_CHECK("FDE cie=00000000 pc=100000000000000..300000000000000");
+
+  EXPECT_EQ(expected_patches, debug_frame_patches);
+  CheckObjdumpOutput(is64bit, "-W");
+}
+
+// Test x86_64 register mapping. It is the only non-trivial architecture.
+// ARM, X86, and Mips have: dwarf_reg = art_reg + constant.
+TEST_F(DwarfTest, x86_64_RegisterMapping) {
+  constexpr bool is64bit = true;
+  DebugFrameOpCodeWriter<> opcodes;
+  for (int i = 0; i < 16; i++) {
+    opcodes.RelOffset(Reg::X86_64Core(i), 0);
+  }
+  DW_CHECK("FDE");
+  DW_CHECK_NEXT("DW_CFA_offset: r0 (rax)");
+  DW_CHECK_NEXT("DW_CFA_offset: r2 (rcx)");
+  DW_CHECK_NEXT("DW_CFA_offset: r1 (rdx)");
+  DW_CHECK_NEXT("DW_CFA_offset: r3 (rbx)");
+  DW_CHECK_NEXT("DW_CFA_offset: r7 (rsp)");
+  DW_CHECK_NEXT("DW_CFA_offset: r6 (rbp)");
+  DW_CHECK_NEXT("DW_CFA_offset: r4 (rsi)");
+  DW_CHECK_NEXT("DW_CFA_offset: r5 (rdi)");
+  DW_CHECK_NEXT("DW_CFA_offset: r8 (r8)");
+  DW_CHECK_NEXT("DW_CFA_offset: r9 (r9)");
+  DW_CHECK_NEXT("DW_CFA_offset: r10 (r10)");
+  DW_CHECK_NEXT("DW_CFA_offset: r11 (r11)");
+  DW_CHECK_NEXT("DW_CFA_offset: r12 (r12)");
+  DW_CHECK_NEXT("DW_CFA_offset: r13 (r13)");
+  DW_CHECK_NEXT("DW_CFA_offset: r14 (r14)");
+  DW_CHECK_NEXT("DW_CFA_offset: r15 (r15)");
+  DebugFrameOpCodeWriter<> initial_opcodes;
+  WriteCIE(is64bit, Reg(16),
+           initial_opcodes, kCFIFormat, &debug_frame_data_);
+  std::vector<uintptr_t> debug_frame_patches;
+  WriteFDE(is64bit, 0, 0, 0x0100000000000000, 0x0200000000000000,
+           ArrayRef<const uint8_t>(*opcodes.data()),
+                     kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches);
+
+  CheckObjdumpOutput(is64bit, "-W");
+}
+
+TEST_F(DwarfTest, DebugLine) {
+  const bool is64bit = false;
+  const int code_factor_bits = 1;
+  DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits);
+
+  std::vector<std::string> include_directories;
+  include_directories.push_back("/path/to/source");
+  DW_CHECK("/path/to/source");
+
+  std::vector<FileEntry> files {
+    { "file0.c", 0, 1000, 2000 },
+    { "file1.c", 1, 1000, 2000 },
+    { "file2.c", 1, 1000, 2000 },
+  };
+  DW_CHECK("1\t0\t1000\t2000\tfile0.c");
+  DW_CHECK_NEXT("2\t1\t1000\t2000\tfile1.c");
+  DW_CHECK_NEXT("3\t1\t1000\t2000\tfile2.c");
+
+  DW_CHECK("Line Number Statements");
+  opcodes.SetAddress(0x01000000);
+  DW_CHECK_NEXT("Extended opcode 2: set Address to 0x1000000");
+  opcodes.AddRow();
+  DW_CHECK_NEXT("Copy");
+  opcodes.AdvancePC(0x01000100);
+  DW_CHECK_NEXT("Advance PC by 256 to 0x1000100");
+  opcodes.SetFile(2);
+  DW_CHECK_NEXT("Set File Name to entry 2 in the File Name Table");
+  opcodes.AdvanceLine(3);
+  DW_CHECK_NEXT("Advance Line by 2 to 3");
+  opcodes.SetColumn(4);
+  DW_CHECK_NEXT("Set column to 4");
+  opcodes.SetIsStmt(true);
+  DW_CHECK_NEXT("Set is_stmt to 1");
+  opcodes.SetIsStmt(false);
+  DW_CHECK_NEXT("Set is_stmt to 0");
+  opcodes.SetBasicBlock();
+  DW_CHECK_NEXT("Set basic block");
+  opcodes.SetPrologueEnd();
+  DW_CHECK_NEXT("Set prologue_end to true");
+  opcodes.SetEpilogueBegin();
+  DW_CHECK_NEXT("Set epilogue_begin to true");
+  opcodes.SetISA(5);
+  DW_CHECK_NEXT("Set ISA to 5");
+  opcodes.EndSequence();
+  DW_CHECK_NEXT("Extended opcode 1: End of Sequence");
+  opcodes.DefineFile("file.c", 0, 1000, 2000);
+  DW_CHECK_NEXT("Extended opcode 3: define new File Table entry");
+  DW_CHECK_NEXT("Entry\tDir\tTime\tSize\tName");
+  DW_CHECK_NEXT("1\t0\t1000\t2000\tfile.c");
+
+  std::vector<uintptr_t> debug_line_patches;
+  std::vector<uintptr_t> expected_patches { 87 };  // NOLINT
+  WriteDebugLineTable(include_directories, files, opcodes,
+                      0, &debug_line_data_, &debug_line_patches);
+
+  EXPECT_EQ(expected_patches, debug_line_patches);
+  CheckObjdumpOutput(is64bit, "-W");
+}
+
+// DWARF has special one byte codes which advance PC and line at the same time.
+TEST_F(DwarfTest, DebugLineSpecialOpcodes) {
+  const bool is64bit = false;
+  const int code_factor_bits = 1;
+  uint32_t pc = 0x01000000;
+  int line = 1;
+  DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits);
+  opcodes.SetAddress(pc);
+  size_t num_rows = 0;
+  DW_CHECK("Line Number Statements:");
+  DW_CHECK("Special opcode");
+  DW_CHECK("Advance PC by constant");
+  DW_CHECK("Decoded dump of debug contents of section .debug_line:");
+  DW_CHECK("Line number    Starting address");
+  for (int addr_delta = 0; addr_delta < 80; addr_delta += 2) {
+    for (int line_delta = 16; line_delta >= -16; --line_delta) {
+      pc += addr_delta;
+      line += line_delta;
+      opcodes.AddRow(pc, line);
+      num_rows++;
+      ASSERT_EQ(opcodes.CurrentAddress(), pc);
+      ASSERT_EQ(opcodes.CurrentLine(), line);
+      char expected[1024];
+      sprintf(expected, "%i           0x%x", line, pc);
+      DW_CHECK_NEXT(expected);
+    }
+  }
+  EXPECT_LT(opcodes.data()->size(), num_rows * 3);
+
+  std::vector<std::string> directories;
+  std::vector<FileEntry> files { { "file.c", 0, 1000, 2000 } };  // NOLINT
+  std::vector<uintptr_t> debug_line_patches;
+  WriteDebugLineTable(directories, files, opcodes,
+                      0, &debug_line_data_, &debug_line_patches);
+
+  CheckObjdumpOutput(is64bit, "-W -WL");
+}
+
+TEST_F(DwarfTest, DebugInfo) {
+  constexpr bool is64bit = false;
+  DebugAbbrevWriter<> debug_abbrev(&debug_abbrev_data_);
+  DebugInfoEntryWriter<> info(is64bit, &debug_abbrev);
+  DW_CHECK("Contents of the .debug_info section:");
+  info.StartTag(dwarf::DW_TAG_compile_unit);
+  DW_CHECK("Abbrev Number: 1 (DW_TAG_compile_unit)");
+  info.WriteStrp(dwarf::DW_AT_producer, "Compiler name", &debug_str_data_);
+  DW_CHECK_NEXT("DW_AT_producer    : (indirect string, offset: 0x0): Compiler name");
+  info.WriteAddr(dwarf::DW_AT_low_pc, 0x01000000);
+  DW_CHECK_NEXT("DW_AT_low_pc      : 0x1000000");
+  info.WriteAddr(dwarf::DW_AT_high_pc, 0x02000000);
+  DW_CHECK_NEXT("DW_AT_high_pc     : 0x2000000");
+  info.StartTag(dwarf::DW_TAG_subprogram);
+  DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
+  info.WriteStrp(dwarf::DW_AT_name, "Foo", &debug_str_data_);
+  DW_CHECK_NEXT("DW_AT_name        : (indirect string, offset: 0xe): Foo");
+  info.WriteAddr(dwarf::DW_AT_low_pc, 0x01010000);
+  DW_CHECK_NEXT("DW_AT_low_pc      : 0x1010000");
+  info.WriteAddr(dwarf::DW_AT_high_pc, 0x01020000);
+  DW_CHECK_NEXT("DW_AT_high_pc     : 0x1020000");
+  info.EndTag();  // DW_TAG_subprogram
+  info.StartTag(dwarf::DW_TAG_subprogram);
+  DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
+  info.WriteStrp(dwarf::DW_AT_name, "Bar", &debug_str_data_);
+  DW_CHECK_NEXT("DW_AT_name        : (indirect string, offset: 0x12): Bar");
+  info.WriteAddr(dwarf::DW_AT_low_pc, 0x01020000);
+  DW_CHECK_NEXT("DW_AT_low_pc      : 0x1020000");
+  info.WriteAddr(dwarf::DW_AT_high_pc, 0x01030000);
+  DW_CHECK_NEXT("DW_AT_high_pc     : 0x1030000");
+  info.EndTag();  // DW_TAG_subprogram
+  info.EndTag();  // DW_TAG_compile_unit
+  // Test that previous list was properly terminated and empty children.
+  info.StartTag(dwarf::DW_TAG_compile_unit);
+  info.EndTag();  // DW_TAG_compile_unit
+
+  // The abbrev table is just side product, but check it as well.
+  DW_CHECK("Abbrev Number: 3 (DW_TAG_compile_unit)");
+  DW_CHECK("Contents of the .debug_abbrev section:");
+  DW_CHECK("1      DW_TAG_compile_unit    [has children]");
+  DW_CHECK_NEXT("DW_AT_producer     DW_FORM_strp");
+  DW_CHECK_NEXT("DW_AT_low_pc       DW_FORM_addr");
+  DW_CHECK_NEXT("DW_AT_high_pc      DW_FORM_addr");
+  DW_CHECK("2      DW_TAG_subprogram    [no children]");
+  DW_CHECK_NEXT("DW_AT_name         DW_FORM_strp");
+  DW_CHECK_NEXT("DW_AT_low_pc       DW_FORM_addr");
+  DW_CHECK_NEXT("DW_AT_high_pc      DW_FORM_addr");
+  DW_CHECK("3      DW_TAG_compile_unit    [no children]");
+
+  std::vector<uintptr_t> debug_info_patches;
+  std::vector<uintptr_t> expected_patches { 16, 20, 29, 33, 42, 46 };  // NOLINT
+  dwarf::WriteDebugInfoCU(0 /* debug_abbrev_offset */, info,
+                          0, &debug_info_data_, &debug_info_patches);
+
+  EXPECT_EQ(expected_patches, debug_info_patches);
+  CheckObjdumpOutput(is64bit, "-W");
+}
+
+#endif  // ART_TARGET_ANDROID
+
+}  // namespace dwarf
+}  // namespace art
diff --git a/compiler/debug/dwarf/dwarf_test.h b/compiler/debug/dwarf/dwarf_test.h
new file mode 100644
index 0000000..e2f0a65
--- /dev/null
+++ b/compiler/debug/dwarf/dwarf_test.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_DWARF_DWARF_TEST_H_
+#define ART_COMPILER_DEBUG_DWARF_DWARF_TEST_H_
+
+#include <cstring>
+#include <dirent.h>
+#include <memory>
+#include <set>
+#include <stdio.h>
+#include <string>
+#include <sys/types.h>
+
+#include "base/unix_file/fd_file.h"
+#include "common_runtime_test.h"
+#include "elf_builder.h"
+#include "gtest/gtest.h"
+#include "linker/file_output_stream.h"
+#include "os.h"
+
+namespace art {
+namespace dwarf {
+
+#define DW_CHECK(substring) Check(substring, false, __FILE__, __LINE__)
+#define DW_CHECK_NEXT(substring) Check(substring, true, __FILE__, __LINE__)
+
+class DwarfTest : public CommonRuntimeTest {
+ public:
+  static constexpr bool kPrintObjdumpOutput = false;  // debugging.
+
+  struct ExpectedLine {
+    std::string substring;
+    bool next;
+    const char* at_file;
+    int at_line;
+  };
+
+  // Check that the objdump output contains given output.
+  // If next is true, it must be the next line.  Otherwise lines are skipped.
+  void Check(const char* substr, bool next, const char* at_file, int at_line) {
+    expected_lines_.push_back(ExpectedLine {substr, next, at_file, at_line});
+  }
+
+  // Pretty-print the generated DWARF data using objdump.
+  template<typename ElfTypes>
+  std::vector<std::string> Objdump(const char* args) {
+    // Write simple elf file with just the DWARF sections.
+    InstructionSet isa = (sizeof(typename ElfTypes::Addr) == 8) ? kX86_64 : kX86;
+    ScratchFile file;
+    FileOutputStream output_stream(file.GetFile());
+    ElfBuilder<ElfTypes> builder(isa, nullptr, &output_stream);
+    builder.Start();
+    if (!debug_info_data_.empty()) {
+      builder.WriteSection(".debug_info", &debug_info_data_);
+    }
+    if (!debug_abbrev_data_.empty()) {
+      builder.WriteSection(".debug_abbrev", &debug_abbrev_data_);
+    }
+    if (!debug_str_data_.empty()) {
+      builder.WriteSection(".debug_str", &debug_str_data_);
+    }
+    if (!debug_line_data_.empty()) {
+      builder.WriteSection(".debug_line", &debug_line_data_);
+    }
+    if (!debug_frame_data_.empty()) {
+      builder.WriteSection(".debug_frame", &debug_frame_data_);
+    }
+    builder.End();
+    EXPECT_TRUE(builder.Good());
+
+    // Read the elf file back using objdump.
+    std::vector<std::string> lines;
+    std::string cmd = GetAndroidHostToolsDir();
+    cmd = cmd + "objdump " + args + " " + file.GetFilename() + " 2>&1";
+    FILE* output = popen(cmd.data(), "r");
+    char buffer[1024];
+    const char* line;
+    while ((line = fgets(buffer, sizeof(buffer), output)) != nullptr) {
+      if (kPrintObjdumpOutput) {
+        printf("%s", line);
+      }
+      if (line[0] != '\0' && line[0] != '\n') {
+        EXPECT_TRUE(strstr(line, "objdump: Error:") == nullptr) << line;
+        EXPECT_TRUE(strstr(line, "objdump: Warning:") == nullptr) << line;
+        std::string str(line);
+        if (str.back() == '\n') {
+          str.pop_back();
+        }
+        lines.push_back(str);
+      }
+    }
+    pclose(output);
+    return lines;
+  }
+
+  std::vector<std::string> Objdump(bool is64bit, const char* args) {
+    if (is64bit) {
+      return Objdump<ElfTypes64>(args);
+    } else {
+      return Objdump<ElfTypes32>(args);
+    }
+  }
+
+  // Compare objdump output to the recorded checks.
+  void CheckObjdumpOutput(bool is64bit, const char* args) {
+    std::vector<std::string> actual_lines = Objdump(is64bit, args);
+    auto actual_line = actual_lines.begin();
+    for (const ExpectedLine& expected_line : expected_lines_) {
+      const std::string& substring = expected_line.substring;
+      if (actual_line == actual_lines.end()) {
+        ADD_FAILURE_AT(expected_line.at_file, expected_line.at_line) <<
+            "Expected '" << substring << "'.\n" <<
+            "Seen end of output.";
+      } else if (expected_line.next) {
+        if (actual_line->find(substring) == std::string::npos) {
+          ADD_FAILURE_AT(expected_line.at_file, expected_line.at_line) <<
+            "Expected '" << substring << "'.\n" <<
+            "Seen '" << actual_line->data() << "'.";
+        } else {
+          // printf("Found '%s' in '%s'.\n", substring.data(), actual_line->data());
+        }
+        actual_line++;
+      } else {
+        bool found = false;
+        for (auto it = actual_line; it < actual_lines.end(); it++) {
+          if (it->find(substring) != std::string::npos) {
+            actual_line = it;
+            found = true;
+            break;
+          }
+        }
+        if (!found) {
+          ADD_FAILURE_AT(expected_line.at_file, expected_line.at_line) <<
+            "Expected '" << substring << "'.\n" <<
+            "Not found anywhere in the rest of the output.";
+        } else {
+          // printf("Found '%s' in '%s'.\n", substring.data(), actual_line->data());
+          actual_line++;
+        }
+      }
+    }
+  }
+
+  // Buffers which are going to assembled into ELF file and passed to objdump.
+  std::vector<uint8_t> debug_frame_data_;
+  std::vector<uint8_t> debug_info_data_;
+  std::vector<uint8_t> debug_abbrev_data_;
+  std::vector<uint8_t> debug_str_data_;
+  std::vector<uint8_t> debug_line_data_;
+
+  // The expected output of objdump.
+  std::vector<ExpectedLine> expected_lines_;
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_DWARF_DWARF_TEST_H_
diff --git a/compiler/debug/dwarf/expression.h b/compiler/debug/dwarf/expression.h
new file mode 100644
index 0000000..fafc046
--- /dev/null
+++ b/compiler/debug/dwarf/expression.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_DWARF_EXPRESSION_H_
+#define ART_COMPILER_DEBUG_DWARF_EXPRESSION_H_
+
+#include <cstddef>
+#include <cstdint>
+
+#include "debug/dwarf/dwarf_constants.h"
+#include "debug/dwarf/writer.h"
+
+namespace art {
+namespace dwarf {
+
+// Writer for DWARF expressions which are used in .debug_info and .debug_loc sections.
+// See the DWARF specification for the precise meaning of the opcodes.
+// If multiple equivalent encodings are possible, it will choose the most compact one.
+// The writer is not exhaustive - it only implements opcodes we have needed so far.
+class Expression : private Writer<> {
+ public:
+  using Writer<>::data;
+  using Writer<>::size;
+
+  // Push signed integer on the stack.
+  void WriteOpConsts(int32_t value) {
+    if (0 <= value && value < 32) {
+      PushUint8(DW_OP_lit0 + value);
+    } else {
+      PushUint8(DW_OP_consts);
+      PushSleb128(value);
+    }
+  }
+
+  // Push unsigned integer on the stack.
+  void WriteOpConstu(uint32_t value) {
+    if (value < 32) {
+      PushUint8(DW_OP_lit0 + value);
+    } else {
+      PushUint8(DW_OP_constu);
+      PushUleb128(value);
+    }
+  }
+
+  // Variable is stored in given register.
+  void WriteOpReg(uint32_t dwarf_reg_num) {
+    if (dwarf_reg_num < 32) {
+      PushUint8(DW_OP_reg0 + dwarf_reg_num);
+    } else {
+      PushUint8(DW_OP_regx);
+      PushUleb128(dwarf_reg_num);
+    }
+  }
+
+  // Variable is stored on stack.  Also see DW_AT_frame_base.
+  void WriteOpFbreg(int32_t stack_offset) {
+    PushUint8(DW_OP_fbreg);
+    PushSleb128(stack_offset);
+  }
+
+  // The variable is stored in multiple locations (pieces).
+  void WriteOpPiece(uint32_t num_bytes) {
+    PushUint8(DW_OP_piece);
+    PushUleb128(num_bytes);
+  }
+
+  // Loads 32-bit or 64-bit value depending on architecture.
+  void WriteOpDeref() { PushUint8(DW_OP_deref); }
+
+  // Loads value of given byte size.
+  void WriteOpDerefSize(uint8_t num_bytes) {
+    PushUint8(DW_OP_deref_size);
+    PushUint8(num_bytes);
+  }
+
+  // Pop two values and push their sum.
+  void WriteOpPlus() { PushUint8(DW_OP_plus); }
+
+  // Add constant value to value on top of stack.
+  void WriteOpPlusUconst(uint32_t offset) {
+    PushUint8(DW_OP_plus_uconst);
+    PushUleb128(offset);
+  }
+
+  // Negate top of stack.
+  void WriteOpNeg() { PushUint8(DW_OP_neg); }
+
+  // Pop two values and push their bitwise-AND.
+  void WriteOpAnd() { PushUint8(DW_OP_and); }
+
+  // Push stack base pointer as determined from .debug_frame.
+  void WriteOpCallFrameCfa() { PushUint8(DW_OP_call_frame_cfa); }
+
+  // Push address of the variable we are working with.
+  void WriteOpPushObjectAddress() { PushUint8(DW_OP_push_object_address); }
+
+  // Return the top stack as the value of the variable.
+  // Otherwise, the top of stack is the variable's location.
+  void WriteOpStackValue() { PushUint8(DW_OP_stack_value); }
+
+  explicit Expression(std::vector<uint8_t>* buffer) : Writer<>(buffer) {
+    buffer->clear();
+  }
+};
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_DWARF_EXPRESSION_H_
diff --git a/compiler/debug/dwarf/headers.h b/compiler/debug/dwarf/headers.h
new file mode 100644
index 0000000..146d9fd
--- /dev/null
+++ b/compiler/debug/dwarf/headers.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_DWARF_HEADERS_H_
+#define ART_COMPILER_DEBUG_DWARF_HEADERS_H_
+
+#include <cstdint>
+
+#include "debug/dwarf/debug_frame_opcode_writer.h"
+#include "debug/dwarf/debug_info_entry_writer.h"
+#include "debug/dwarf/debug_line_opcode_writer.h"
+#include "debug/dwarf/dwarf_constants.h"
+#include "debug/dwarf/register.h"
+#include "debug/dwarf/writer.h"
+#include "utils/array_ref.h"
+
+namespace art {
+namespace dwarf {
+
+// Note that all headers start with 32-bit length.
+// DWARF also supports 64-bit lengths, but we never use that.
+// It is intended to support very large debug sections (>4GB),
+// and compilers are expected *not* to use it by default.
+// In particular, it is not related to machine architecture.
+
+// Write common information entry (CIE) to .debug_frame or .eh_frame section.
+template<typename Vector>
+void WriteCIE(bool is64bit,
+              Reg return_address_register,
+              const DebugFrameOpCodeWriter<Vector>& opcodes,
+              CFIFormat format,
+              std::vector<uint8_t>* buffer) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
+  Writer<> writer(buffer);
+  size_t cie_header_start_ = writer.data()->size();
+  writer.PushUint32(0);  // Length placeholder.
+  writer.PushUint32((format == DW_EH_FRAME_FORMAT) ? 0 : 0xFFFFFFFF);  // CIE id.
+  writer.PushUint8(1);   // Version.
+  writer.PushString("zR");
+  writer.PushUleb128(DebugFrameOpCodeWriter<Vector>::kCodeAlignmentFactor);
+  writer.PushSleb128(DebugFrameOpCodeWriter<Vector>::kDataAlignmentFactor);
+  writer.PushUleb128(return_address_register.num());  // ubyte in DWARF2.
+  writer.PushUleb128(1);  // z: Augmentation data size.
+  if (is64bit) {
+    if (format == DW_EH_FRAME_FORMAT) {
+      writer.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata8);   // R: Pointer encoding.
+    } else {
+      DCHECK(format == DW_DEBUG_FRAME_FORMAT);
+      writer.PushUint8(DW_EH_PE_absptr | DW_EH_PE_udata8);  // R: Pointer encoding.
+    }
+  } else {
+    if (format == DW_EH_FRAME_FORMAT) {
+      writer.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata4);   // R: Pointer encoding.
+    } else {
+      DCHECK(format == DW_DEBUG_FRAME_FORMAT);
+      writer.PushUint8(DW_EH_PE_absptr | DW_EH_PE_udata4);  // R: Pointer encoding.
+    }
+  }
+  writer.PushData(opcodes.data());
+  writer.Pad(is64bit ? 8 : 4);
+  writer.UpdateUint32(cie_header_start_, writer.data()->size() - cie_header_start_ - 4);
+}
+
+// Write frame description entry (FDE) to .debug_frame or .eh_frame section.
+inline
+void WriteFDE(bool is64bit,
+              uint64_t section_address,  // Absolute address of the section.
+              uint64_t cie_address,  // Absolute address of last CIE.
+              uint64_t code_address,
+              uint64_t code_size,
+              const ArrayRef<const uint8_t>& opcodes,
+              CFIFormat format,
+              uint64_t buffer_address,  // Address of buffer in linked application.
+              std::vector<uint8_t>* buffer,
+              std::vector<uintptr_t>* patch_locations) {
+  CHECK_GE(cie_address, section_address);
+  CHECK_GE(buffer_address, section_address);
+
+  Writer<> writer(buffer);
+  size_t fde_header_start = writer.data()->size();
+  writer.PushUint32(0);  // Length placeholder.
+  if (format == DW_EH_FRAME_FORMAT) {
+    uint32_t cie_pointer = (buffer_address + buffer->size()) - cie_address;
+    writer.PushUint32(cie_pointer);
+  } else {
+    DCHECK(format == DW_DEBUG_FRAME_FORMAT);
+    uint32_t cie_pointer = cie_address - section_address;
+    writer.PushUint32(cie_pointer);
+  }
+  if (format == DW_EH_FRAME_FORMAT) {
+    // .eh_frame encodes the location as relative address.
+    code_address -= buffer_address + buffer->size();
+  } else {
+    DCHECK(format == DW_DEBUG_FRAME_FORMAT);
+    // Relocate code_address if it has absolute value.
+    patch_locations->push_back(buffer_address + buffer->size() - section_address);
+  }
+  if (is64bit) {
+    writer.PushUint64(code_address);
+    writer.PushUint64(code_size);
+  } else {
+    writer.PushUint32(code_address);
+    writer.PushUint32(code_size);
+  }
+  writer.PushUleb128(0);  // Augmentation data size.
+  writer.PushData(opcodes.data(), opcodes.size());
+  writer.Pad(is64bit ? 8 : 4);
+  writer.UpdateUint32(fde_header_start, writer.data()->size() - fde_header_start - 4);
+}
+
+// Write compilation unit (CU) to .debug_info section.
+template<typename Vector>
+void WriteDebugInfoCU(uint32_t debug_abbrev_offset,
+                      const DebugInfoEntryWriter<Vector>& entries,
+                      size_t debug_info_offset,  // offset from start of .debug_info.
+                      std::vector<uint8_t>* debug_info,
+                      std::vector<uintptr_t>* debug_info_patches) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
+  Writer<> writer(debug_info);
+  size_t start = writer.data()->size();
+  writer.PushUint32(0);  // Length placeholder.
+  writer.PushUint16(4);  // Version.
+  writer.PushUint32(debug_abbrev_offset);
+  writer.PushUint8(entries.Is64bit() ? 8 : 4);
+  size_t entries_offset = writer.data()->size();
+  DCHECK_EQ(entries_offset, DebugInfoEntryWriter<Vector>::kCompilationUnitHeaderSize);
+  writer.PushData(entries.data());
+  writer.UpdateUint32(start, writer.data()->size() - start - 4);
+  // Copy patch locations and make them relative to .debug_info section.
+  for (uintptr_t patch_location : entries.GetPatchLocations()) {
+    debug_info_patches->push_back(debug_info_offset + entries_offset + patch_location);
+  }
+}
+
+struct FileEntry {
+  std::string file_name;
+  int directory_index;
+  int modification_time;
+  int file_size;
+};
+
+// Write line table to .debug_line section.
+template<typename Vector>
+void WriteDebugLineTable(const std::vector<std::string>& include_directories,
+                         const std::vector<FileEntry>& files,
+                         const DebugLineOpCodeWriter<Vector>& opcodes,
+                         size_t debug_line_offset,  // offset from start of .debug_line.
+                         std::vector<uint8_t>* debug_line,
+                         std::vector<uintptr_t>* debug_line_patches) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
+  Writer<> writer(debug_line);
+  size_t header_start = writer.data()->size();
+  writer.PushUint32(0);  // Section-length placeholder.
+  writer.PushUint16(3);  // .debug_line version.
+  size_t header_length_pos = writer.data()->size();
+  writer.PushUint32(0);  // Header-length placeholder.
+  writer.PushUint8(1 << opcodes.GetCodeFactorBits());
+  writer.PushUint8(DebugLineOpCodeWriter<Vector>::kDefaultIsStmt ? 1 : 0);
+  writer.PushInt8(DebugLineOpCodeWriter<Vector>::kLineBase);
+  writer.PushUint8(DebugLineOpCodeWriter<Vector>::kLineRange);
+  writer.PushUint8(DebugLineOpCodeWriter<Vector>::kOpcodeBase);
+  static const int opcode_lengths[DebugLineOpCodeWriter<Vector>::kOpcodeBase] = {
+      0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 };
+  for (int i = 1; i < DebugLineOpCodeWriter<Vector>::kOpcodeBase; i++) {
+    writer.PushUint8(opcode_lengths[i]);
+  }
+  for (const std::string& directory : include_directories) {
+    writer.PushData(directory.data(), directory.size() + 1);
+  }
+  writer.PushUint8(0);  // Terminate include_directories list.
+  for (const FileEntry& file : files) {
+    writer.PushData(file.file_name.data(), file.file_name.size() + 1);
+    writer.PushUleb128(file.directory_index);
+    writer.PushUleb128(file.modification_time);
+    writer.PushUleb128(file.file_size);
+  }
+  writer.PushUint8(0);  // Terminate file list.
+  writer.UpdateUint32(header_length_pos, writer.data()->size() - header_length_pos - 4);
+  size_t opcodes_offset = writer.data()->size();
+  writer.PushData(opcodes.data());
+  writer.UpdateUint32(header_start, writer.data()->size() - header_start - 4);
+  // Copy patch locations and make them relative to .debug_line section.
+  for (uintptr_t patch_location : opcodes.GetPatchLocations()) {
+    debug_line_patches->push_back(debug_line_offset + opcodes_offset + patch_location);
+  }
+}
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_DWARF_HEADERS_H_
diff --git a/compiler/debug/dwarf/register.h b/compiler/debug/dwarf/register.h
new file mode 100644
index 0000000..24bacac
--- /dev/null
+++ b/compiler/debug/dwarf/register.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_DWARF_REGISTER_H_
+#define ART_COMPILER_DEBUG_DWARF_REGISTER_H_
+
+namespace art {
+namespace dwarf {
+
+// Represents DWARF register.
+class Reg {
+ public:
+  explicit Reg(int reg_num) : num_(reg_num) { }
+  int num() const { return num_; }
+
+  // TODO: Arm S0–S31 register mapping is obsolescent.
+  //   We should use VFP-v3/Neon D0-D31 mapping instead.
+  //   However, D0 is aliased to pair of S0 and S1, so using that
+  //   mapping we cannot easily say S0 is spilled and S1 is not.
+  //   There are ways around this in DWARF but they are complex.
+  //   It would be much simpler to always spill whole D registers.
+  //   Arm64 mapping is correct since we already do this there.
+  //   libunwind might struggle with the new mapping as well.
+
+  static Reg ArmCore(int num) { return Reg(num); }  // R0-R15.
+  static Reg ArmFp(int num) { return Reg(64 + num); }  // S0–S31.
+  static Reg ArmDp(int num) { return Reg(256 + num); }  // D0–D31.
+  static Reg Arm64Core(int num) { return Reg(num); }  // X0-X31.
+  static Reg Arm64Fp(int num) { return Reg(64 + num); }  // V0-V31.
+  static Reg MipsCore(int num) { return Reg(num); }
+  static Reg Mips64Core(int num) { return Reg(num); }
+  static Reg MipsFp(int num) { return Reg(32 + num); }
+  static Reg Mips64Fp(int num) { return Reg(32 + num); }
+  static Reg X86Core(int num) { return Reg(num); }
+  static Reg X86Fp(int num) { return Reg(21 + num); }
+  static Reg X86_64Core(int num) {
+    static const int map[8] = {0, 2, 1, 3, 7, 6, 4, 5};
+    return Reg(num < 8 ? map[num] : num);
+  }
+  static Reg X86_64Fp(int num) { return Reg(17 + num); }
+
+ private:
+  int num_;
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_DWARF_REGISTER_H_
diff --git a/compiler/debug/dwarf/writer.h b/compiler/debug/dwarf/writer.h
new file mode 100644
index 0000000..95912ad
--- /dev/null
+++ b/compiler/debug/dwarf/writer.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_DWARF_WRITER_H_
+#define ART_COMPILER_DEBUG_DWARF_WRITER_H_
+
+#include <type_traits>
+#include <vector>
+#include "base/bit_utils.h"
+#include "base/logging.h"
+#include "leb128.h"
+
+namespace art {
+namespace dwarf {
+
+// The base class for all DWARF writers.
+template <typename Vector = std::vector<uint8_t>>
+class Writer {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
+ public:
+  void PushUint8(int value) {
+    DCHECK_GE(value, 0);
+    DCHECK_LE(value, UINT8_MAX);
+    data_->push_back(value & 0xff);
+  }
+
+  void PushUint16(int value) {
+    DCHECK_GE(value, 0);
+    DCHECK_LE(value, UINT16_MAX);
+    data_->push_back((value >> 0) & 0xff);
+    data_->push_back((value >> 8) & 0xff);
+  }
+
+  void PushUint32(uint32_t value) {
+    data_->push_back((value >> 0) & 0xff);
+    data_->push_back((value >> 8) & 0xff);
+    data_->push_back((value >> 16) & 0xff);
+    data_->push_back((value >> 24) & 0xff);
+  }
+
+  void PushUint32(int value) {
+    DCHECK_GE(value, 0);
+    PushUint32(static_cast<uint32_t>(value));
+  }
+
+  void PushUint32(uint64_t value) {
+    DCHECK_LE(value, UINT32_MAX);
+    PushUint32(static_cast<uint32_t>(value));
+  }
+
+  void PushUint64(uint64_t value) {
+    data_->push_back((value >> 0) & 0xff);
+    data_->push_back((value >> 8) & 0xff);
+    data_->push_back((value >> 16) & 0xff);
+    data_->push_back((value >> 24) & 0xff);
+    data_->push_back((value >> 32) & 0xff);
+    data_->push_back((value >> 40) & 0xff);
+    data_->push_back((value >> 48) & 0xff);
+    data_->push_back((value >> 56) & 0xff);
+  }
+
+  void PushInt8(int value) {
+    DCHECK_GE(value, INT8_MIN);
+    DCHECK_LE(value, INT8_MAX);
+    PushUint8(static_cast<uint8_t>(value));
+  }
+
+  void PushInt16(int value) {
+    DCHECK_GE(value, INT16_MIN);
+    DCHECK_LE(value, INT16_MAX);
+    PushUint16(static_cast<uint16_t>(value));
+  }
+
+  void PushInt32(int value) {
+    PushUint32(static_cast<uint32_t>(value));
+  }
+
+  void PushInt64(int64_t value) {
+    PushUint64(static_cast<uint64_t>(value));
+  }
+
+  // Variable-length encoders.
+
+  void PushUleb128(uint32_t value) {
+    EncodeUnsignedLeb128(data_, value);
+  }
+
+  void PushUleb128(int value) {
+    DCHECK_GE(value, 0);
+    EncodeUnsignedLeb128(data_, value);
+  }
+
+  void PushSleb128(int value) {
+    EncodeSignedLeb128(data_, value);
+  }
+
+  // Miscellaneous functions.
+
+  void PushString(const char* value) {
+    data_->insert(data_->end(), value, value + strlen(value) + 1);
+  }
+
+  void PushData(const uint8_t* ptr, size_t num_bytes) {
+    data_->insert(data_->end(), ptr, ptr + num_bytes);
+  }
+
+  void PushData(const char* ptr, size_t num_bytes) {
+    data_->insert(data_->end(), ptr, ptr + num_bytes);
+  }
+
+  void PushData(const Vector* buffer) {
+    data_->insert(data_->end(), buffer->begin(), buffer->end());
+  }
+
+  void UpdateUint32(size_t offset, uint32_t value) {
+    DCHECK_LT(offset + 3, data_->size());
+    (*data_)[offset + 0] = (value >> 0) & 0xFF;
+    (*data_)[offset + 1] = (value >> 8) & 0xFF;
+    (*data_)[offset + 2] = (value >> 16) & 0xFF;
+    (*data_)[offset + 3] = (value >> 24) & 0xFF;
+  }
+
+  void UpdateUint64(size_t offset, uint64_t value) {
+    DCHECK_LT(offset + 7, data_->size());
+    (*data_)[offset + 0] = (value >> 0) & 0xFF;
+    (*data_)[offset + 1] = (value >> 8) & 0xFF;
+    (*data_)[offset + 2] = (value >> 16) & 0xFF;
+    (*data_)[offset + 3] = (value >> 24) & 0xFF;
+    (*data_)[offset + 4] = (value >> 32) & 0xFF;
+    (*data_)[offset + 5] = (value >> 40) & 0xFF;
+    (*data_)[offset + 6] = (value >> 48) & 0xFF;
+    (*data_)[offset + 7] = (value >> 56) & 0xFF;
+  }
+
+  void UpdateUleb128(size_t offset, uint32_t value) {
+    DCHECK_LE(offset + UnsignedLeb128Size(value), data_->size());
+    UpdateUnsignedLeb128(data_->data() + offset, value);
+  }
+
+  void Pop() {
+    return data_->pop_back();
+  }
+
+  void Pad(int alignment) {
+    DCHECK_NE(alignment, 0);
+    data_->resize(RoundUp(data_->size(), alignment), 0);
+  }
+
+  const Vector* data() const {
+    return data_;
+  }
+
+  size_t size() const {
+    return data_->size();
+  }
+
+  explicit Writer(Vector* buffer) : data_(buffer) { }
+
+ private:
+  Vector* const data_;
+
+  DISALLOW_COPY_AND_ASSIGN(Writer);
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_DWARF_WRITER_H_
diff --git a/compiler/debug/elf_compilation_unit.h b/compiler/debug/elf_compilation_unit.h
new file mode 100644
index 0000000..b1d89eb
--- /dev/null
+++ b/compiler/debug/elf_compilation_unit.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_ELF_COMPILATION_UNIT_H_
+#define ART_COMPILER_DEBUG_ELF_COMPILATION_UNIT_H_
+
+#include <vector>
+
+#include "debug/method_debug_info.h"
+
+namespace art {
+namespace debug {
+
+struct ElfCompilationUnit {
+  std::vector<const MethodDebugInfo*> methods;
+  size_t debug_line_offset = 0;
+  bool is_code_address_text_relative;  // Is the address offset from start of .text section?
+  uint64_t code_address = std::numeric_limits<uint64_t>::max();
+  uint64_t code_end = 0;
+};
+
+}  // namespace debug
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_ELF_COMPILATION_UNIT_H_
+
diff --git a/compiler/debug/elf_debug_frame_writer.h b/compiler/debug/elf_debug_frame_writer.h
new file mode 100644
index 0000000..f9d33c1
--- /dev/null
+++ b/compiler/debug/elf_debug_frame_writer.h
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_ELF_DEBUG_FRAME_WRITER_H_
+#define ART_COMPILER_DEBUG_ELF_DEBUG_FRAME_WRITER_H_
+
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "debug/dwarf/debug_frame_opcode_writer.h"
+#include "debug/dwarf/dwarf_constants.h"
+#include "debug/dwarf/headers.h"
+#include "debug/method_debug_info.h"
+#include "elf_builder.h"
+
+namespace art {
+namespace debug {
+
+static void WriteCIE(InstructionSet isa,
+                     dwarf::CFIFormat format,
+                     std::vector<uint8_t>* buffer) {
+  using Reg = dwarf::Reg;
+  // Scratch registers should be marked as undefined.  This tells the
+  // debugger that its value in the previous frame is not recoverable.
+  bool is64bit = Is64BitInstructionSet(isa);
+  switch (isa) {
+    case kArm:
+    case kThumb2: {
+      dwarf::DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(Reg::ArmCore(13), 0);  // R13(SP).
+      // core registers.
+      for (int reg = 0; reg < 13; reg++) {
+        if (reg < 4 || reg == 12) {
+          opcodes.Undefined(Reg::ArmCore(reg));
+        } else {
+          opcodes.SameValue(Reg::ArmCore(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 32; reg++) {
+        if (reg < 16) {
+          opcodes.Undefined(Reg::ArmFp(reg));
+        } else {
+          opcodes.SameValue(Reg::ArmFp(reg));
+        }
+      }
+      auto return_reg = Reg::ArmCore(14);  // R14(LR).
+      WriteCIE(is64bit, return_reg, opcodes, format, buffer);
+      return;
+    }
+    case kArm64: {
+      dwarf::DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(Reg::Arm64Core(31), 0);  // R31(SP).
+      // core registers.
+      for (int reg = 0; reg < 30; reg++) {
+        if (reg < 8 || reg == 16 || reg == 17) {
+          opcodes.Undefined(Reg::Arm64Core(reg));
+        } else {
+          opcodes.SameValue(Reg::Arm64Core(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 32; reg++) {
+        if (reg < 8 || reg >= 16) {
+          opcodes.Undefined(Reg::Arm64Fp(reg));
+        } else {
+          opcodes.SameValue(Reg::Arm64Fp(reg));
+        }
+      }
+      auto return_reg = Reg::Arm64Core(30);  // R30(LR).
+      WriteCIE(is64bit, return_reg, opcodes, format, buffer);
+      return;
+    }
+    case kMips:
+    case kMips64: {
+      dwarf::DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(Reg::MipsCore(29), 0);  // R29(SP).
+      // core registers.
+      for (int reg = 1; reg < 26; reg++) {
+        if (reg < 16 || reg == 24 || reg == 25) {  // AT, V*, A*, T*.
+          opcodes.Undefined(Reg::MipsCore(reg));
+        } else {
+          opcodes.SameValue(Reg::MipsCore(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 32; reg++) {
+        if (reg < 24) {
+          opcodes.Undefined(Reg::Mips64Fp(reg));
+        } else {
+          opcodes.SameValue(Reg::Mips64Fp(reg));
+        }
+      }
+      auto return_reg = Reg::MipsCore(31);  // R31(RA).
+      WriteCIE(is64bit, return_reg, opcodes, format, buffer);
+      return;
+    }
+    case kX86: {
+      // FIXME: Add fp registers once libunwind adds support for them. Bug: 20491296
+      constexpr bool generate_opcodes_for_x86_fp = false;
+      dwarf::DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(Reg::X86Core(4), 4);   // R4(ESP).
+      opcodes.Offset(Reg::X86Core(8), -4);  // R8(EIP).
+      // core registers.
+      for (int reg = 0; reg < 8; reg++) {
+        if (reg <= 3) {
+          opcodes.Undefined(Reg::X86Core(reg));
+        } else if (reg == 4) {
+          // Stack pointer.
+        } else {
+          opcodes.SameValue(Reg::X86Core(reg));
+        }
+      }
+      // fp registers.
+      if (generate_opcodes_for_x86_fp) {
+        for (int reg = 0; reg < 8; reg++) {
+          opcodes.Undefined(Reg::X86Fp(reg));
+        }
+      }
+      auto return_reg = Reg::X86Core(8);  // R8(EIP).
+      WriteCIE(is64bit, return_reg, opcodes, format, buffer);
+      return;
+    }
+    case kX86_64: {
+      dwarf::DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(Reg::X86_64Core(4), 8);  // R4(RSP).
+      opcodes.Offset(Reg::X86_64Core(16), -8);  // R16(RIP).
+      // core registers.
+      for (int reg = 0; reg < 16; reg++) {
+        if (reg == 4) {
+          // Stack pointer.
+        } else if (reg < 12 && reg != 3 && reg != 5) {  // except EBX and EBP.
+          opcodes.Undefined(Reg::X86_64Core(reg));
+        } else {
+          opcodes.SameValue(Reg::X86_64Core(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 16; reg++) {
+        if (reg < 12) {
+          opcodes.Undefined(Reg::X86_64Fp(reg));
+        } else {
+          opcodes.SameValue(Reg::X86_64Fp(reg));
+        }
+      }
+      auto return_reg = Reg::X86_64Core(16);  // R16(RIP).
+      WriteCIE(is64bit, return_reg, opcodes, format, buffer);
+      return;
+    }
+    case kNone:
+      break;
+  }
+  LOG(FATAL) << "Cannot write CIE frame for ISA " << isa;
+  UNREACHABLE();
+}
+
+template<typename ElfTypes>
+void WriteCFISection(ElfBuilder<ElfTypes>* builder,
+                     const ArrayRef<const MethodDebugInfo>& method_infos,
+                     dwarf::CFIFormat format,
+                     bool write_oat_patches) {
+  CHECK(format == dwarf::DW_DEBUG_FRAME_FORMAT || format == dwarf::DW_EH_FRAME_FORMAT);
+  typedef typename ElfTypes::Addr Elf_Addr;
+
+  // The methods can be written in any order.
+  // Let's therefore sort them in the lexicographical order of the opcodes.
+  // This has no effect on its own. However, if the final .debug_frame section is
+  // compressed it reduces the size since similar opcodes sequences are grouped.
+  std::vector<const MethodDebugInfo*> sorted_method_infos;
+  sorted_method_infos.reserve(method_infos.size());
+  for (size_t i = 0; i < method_infos.size(); i++) {
+    if (!method_infos[i].cfi.empty() && !method_infos[i].deduped) {
+      sorted_method_infos.push_back(&method_infos[i]);
+    }
+  }
+  if (sorted_method_infos.empty()) {
+    return;
+  }
+  std::stable_sort(
+      sorted_method_infos.begin(),
+      sorted_method_infos.end(),
+      [](const MethodDebugInfo* lhs, const MethodDebugInfo* rhs) {
+        ArrayRef<const uint8_t> l = lhs->cfi;
+        ArrayRef<const uint8_t> r = rhs->cfi;
+        return std::lexicographical_compare(l.begin(), l.end(), r.begin(), r.end());
+      });
+
+  std::vector<uint32_t> binary_search_table;
+  std::vector<uintptr_t> patch_locations;
+  if (format == dwarf::DW_EH_FRAME_FORMAT) {
+    binary_search_table.reserve(2 * sorted_method_infos.size());
+  } else {
+    patch_locations.reserve(sorted_method_infos.size());
+  }
+
+  // Write .eh_frame/.debug_frame section.
+  auto* cfi_section = (format == dwarf::DW_DEBUG_FRAME_FORMAT
+                       ? builder->GetDebugFrame()
+                       : builder->GetEhFrame());
+  {
+    cfi_section->Start();
+    const bool is64bit = Is64BitInstructionSet(builder->GetIsa());
+    const Elf_Addr cfi_address = cfi_section->GetAddress();
+    const Elf_Addr cie_address = cfi_address;
+    Elf_Addr buffer_address = cfi_address;
+    std::vector<uint8_t> buffer;  // Small temporary buffer.
+    WriteCIE(builder->GetIsa(), format, &buffer);
+    cfi_section->WriteFully(buffer.data(), buffer.size());
+    buffer_address += buffer.size();
+    buffer.clear();
+    for (const MethodDebugInfo* mi : sorted_method_infos) {
+      DCHECK(!mi->deduped);
+      DCHECK(!mi->cfi.empty());
+      const Elf_Addr code_address = mi->code_address +
+          (mi->is_code_address_text_relative ? builder->GetText()->GetAddress() : 0);
+      if (format == dwarf::DW_EH_FRAME_FORMAT) {
+        binary_search_table.push_back(dchecked_integral_cast<uint32_t>(code_address));
+        binary_search_table.push_back(dchecked_integral_cast<uint32_t>(buffer_address));
+      }
+      WriteFDE(is64bit, cfi_address, cie_address,
+               code_address, mi->code_size,
+               mi->cfi, format, buffer_address, &buffer,
+               &patch_locations);
+      cfi_section->WriteFully(buffer.data(), buffer.size());
+      buffer_address += buffer.size();
+      buffer.clear();
+    }
+    cfi_section->End();
+  }
+
+  if (format == dwarf::DW_EH_FRAME_FORMAT) {
+    auto* header_section = builder->GetEhFrameHdr();
+    header_section->Start();
+    uint32_t header_address = dchecked_integral_cast<int32_t>(header_section->GetAddress());
+    // Write .eh_frame_hdr section.
+    std::vector<uint8_t> buffer;
+    dwarf::Writer<> header(&buffer);
+    header.PushUint8(1);  // Version.
+    // Encoding of .eh_frame pointer - libunwind does not honor datarel here,
+    // so we have to use pcrel which means relative to the pointer's location.
+    header.PushUint8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+    // Encoding of binary search table size.
+    header.PushUint8(dwarf::DW_EH_PE_udata4);
+    // Encoding of binary search table addresses - libunwind supports only this
+    // specific combination, which means relative to the start of .eh_frame_hdr.
+    header.PushUint8(dwarf::DW_EH_PE_datarel | dwarf::DW_EH_PE_sdata4);
+    // .eh_frame pointer
+    header.PushInt32(cfi_section->GetAddress() - (header_address + 4u));
+    // Binary search table size (number of entries).
+    header.PushUint32(dchecked_integral_cast<uint32_t>(binary_search_table.size()/2));
+    header_section->WriteFully(buffer.data(), buffer.size());
+    // Binary search table.
+    for (size_t i = 0; i < binary_search_table.size(); i++) {
+      // Make addresses section-relative since we know the header address now.
+      binary_search_table[i] -= header_address;
+    }
+    header_section->WriteFully(binary_search_table.data(), binary_search_table.size());
+    header_section->End();
+  } else {
+    if (write_oat_patches) {
+      builder->WritePatches(".debug_frame.oat_patches",
+                            ArrayRef<const uintptr_t>(patch_locations));
+    }
+  }
+}
+
+}  // namespace debug
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_ELF_DEBUG_FRAME_WRITER_H_
+
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h
new file mode 100644
index 0000000..e8e278d
--- /dev/null
+++ b/compiler/debug/elf_debug_info_writer.h
@@ -0,0 +1,671 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_ELF_DEBUG_INFO_WRITER_H_
+#define ART_COMPILER_DEBUG_ELF_DEBUG_INFO_WRITER_H_
+
+#include <map>
+#include <unordered_set>
+#include <vector>
+
+#include "debug/dwarf/debug_abbrev_writer.h"
+#include "debug/dwarf/debug_info_entry_writer.h"
+#include "debug/elf_compilation_unit.h"
+#include "debug/elf_debug_loc_writer.h"
+#include "debug/method_debug_info.h"
+#include "dex_file-inl.h"
+#include "dex_file.h"
+#include "elf_builder.h"
+#include "linear_alloc.h"
+#include "mirror/array.h"
+#include "mirror/class-inl.h"
+#include "mirror/class.h"
+
+namespace art {
+namespace debug {
+
+typedef std::vector<DexFile::LocalInfo> LocalInfos;
+
+static void LocalInfoCallback(void* ctx, const DexFile::LocalInfo& entry) {
+  static_cast<LocalInfos*>(ctx)->push_back(entry);
+}
+
+static std::vector<const char*> GetParamNames(const MethodDebugInfo* mi) {
+  std::vector<const char*> names;
+  if (mi->code_item != nullptr) {
+    DCHECK(mi->dex_file != nullptr);
+    const uint8_t* stream = mi->dex_file->GetDebugInfoStream(mi->code_item);
+    if (stream != nullptr) {
+      DecodeUnsignedLeb128(&stream);  // line.
+      uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
+      for (uint32_t i = 0; i < parameters_size; ++i) {
+        uint32_t id = DecodeUnsignedLeb128P1(&stream);
+        names.push_back(mi->dex_file->StringDataByIdx(id));
+      }
+    }
+  }
+  return names;
+}
+
+// Helper class to write .debug_info and its supporting sections.
+template<typename ElfTypes>
+class ElfDebugInfoWriter {
+  using Elf_Addr = typename ElfTypes::Addr;
+
+ public:
+  explicit ElfDebugInfoWriter(ElfBuilder<ElfTypes>* builder)
+      : builder_(builder),
+        debug_abbrev_(&debug_abbrev_buffer_) {
+  }
+
+  void Start() {
+    builder_->GetDebugInfo()->Start();
+  }
+
+  void End(bool write_oat_patches) {
+    builder_->GetDebugInfo()->End();
+    if (write_oat_patches) {
+      builder_->WritePatches(".debug_info.oat_patches",
+                             ArrayRef<const uintptr_t>(debug_info_patches_));
+    }
+    builder_->WriteSection(".debug_abbrev", &debug_abbrev_buffer_);
+    if (!debug_loc_.empty()) {
+      builder_->WriteSection(".debug_loc", &debug_loc_);
+    }
+    if (!debug_ranges_.empty()) {
+      builder_->WriteSection(".debug_ranges", &debug_ranges_);
+    }
+  }
+
+ private:
+  ElfBuilder<ElfTypes>* builder_;
+  std::vector<uintptr_t> debug_info_patches_;
+  std::vector<uint8_t> debug_abbrev_buffer_;
+  dwarf::DebugAbbrevWriter<> debug_abbrev_;
+  std::vector<uint8_t> debug_loc_;
+  std::vector<uint8_t> debug_ranges_;
+
+  std::unordered_set<const char*> defined_dex_classes_;  // For CHECKs only.
+
+  template<typename ElfTypes2>
+  friend class ElfCompilationUnitWriter;
+};
+
+// Helper class to write one compilation unit.
+// It holds helper methods and temporary state.
+template<typename ElfTypes>
+class ElfCompilationUnitWriter {
+  using Elf_Addr = typename ElfTypes::Addr;
+
+ public:
+  explicit ElfCompilationUnitWriter(ElfDebugInfoWriter<ElfTypes>* owner)
+    : owner_(owner),
+      info_(Is64BitInstructionSet(owner_->builder_->GetIsa()), &owner->debug_abbrev_) {
+  }
+
+  void Write(const ElfCompilationUnit& compilation_unit) {
+    CHECK(!compilation_unit.methods.empty());
+    const Elf_Addr base_address = compilation_unit.is_code_address_text_relative
+        ? owner_->builder_->GetText()->GetAddress()
+        : 0;
+    const uint64_t cu_size = compilation_unit.code_end - compilation_unit.code_address;
+    using namespace dwarf;  // NOLINT. For easy access to DWARF constants.
+
+    info_.StartTag(DW_TAG_compile_unit);
+    info_.WriteString(DW_AT_producer, "Android dex2oat");
+    info_.WriteData1(DW_AT_language, DW_LANG_Java);
+    info_.WriteString(DW_AT_comp_dir, "$JAVA_SRC_ROOT");
+    info_.WriteAddr(DW_AT_low_pc, base_address + compilation_unit.code_address);
+    info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(cu_size));
+    info_.WriteSecOffset(DW_AT_stmt_list, compilation_unit.debug_line_offset);
+
+    const char* last_dex_class_desc = nullptr;
+    for (auto mi : compilation_unit.methods) {
+      DCHECK(mi->dex_file != nullptr);
+      const DexFile* dex = mi->dex_file;
+      const DexFile::CodeItem* dex_code = mi->code_item;
+      const DexFile::MethodId& dex_method = dex->GetMethodId(mi->dex_method_index);
+      const DexFile::ProtoId& dex_proto = dex->GetMethodPrototype(dex_method);
+      const DexFile::TypeList* dex_params = dex->GetProtoParameters(dex_proto);
+      const char* dex_class_desc = dex->GetMethodDeclaringClassDescriptor(dex_method);
+      const bool is_static = (mi->access_flags & kAccStatic) != 0;
+
+      // Enclose the method in correct class definition.
+      if (last_dex_class_desc != dex_class_desc) {
+        if (last_dex_class_desc != nullptr) {
+          EndClassTag();
+        }
+        // Write reference tag for the class we are about to declare.
+        size_t reference_tag_offset = info_.StartTag(DW_TAG_reference_type);
+        type_cache_.emplace(std::string(dex_class_desc), reference_tag_offset);
+        size_t type_attrib_offset = info_.size();
+        info_.WriteRef4(DW_AT_type, 0);
+        info_.EndTag();
+        // Declare the class that owns this method.
+        size_t class_offset = StartClassTag(dex_class_desc);
+        info_.UpdateUint32(type_attrib_offset, class_offset);
+        info_.WriteFlagPresent(DW_AT_declaration);
+        // Check that each class is defined only once.
+        bool unique = owner_->defined_dex_classes_.insert(dex_class_desc).second;
+        CHECK(unique) << "Redefinition of " << dex_class_desc;
+        last_dex_class_desc = dex_class_desc;
+      }
+
+      int start_depth = info_.Depth();
+      info_.StartTag(DW_TAG_subprogram);
+      WriteName(dex->GetMethodName(dex_method));
+      info_.WriteAddr(DW_AT_low_pc, base_address + mi->code_address);
+      info_.WriteUdata(DW_AT_high_pc, mi->code_size);
+      std::vector<uint8_t> expr_buffer;
+      Expression expr(&expr_buffer);
+      expr.WriteOpCallFrameCfa();
+      info_.WriteExprLoc(DW_AT_frame_base, expr);
+      WriteLazyType(dex->GetReturnTypeDescriptor(dex_proto));
+
+      // Decode dex register locations for all stack maps.
+      // It might be expensive, so do it just once and reuse the result.
+      std::vector<DexRegisterMap> dex_reg_maps;
+      if (mi->code_info != nullptr) {
+        const CodeInfo code_info(mi->code_info);
+        CodeInfoEncoding encoding = code_info.ExtractEncoding();
+        for (size_t s = 0; s < code_info.GetNumberOfStackMaps(encoding); ++s) {
+          const StackMap& stack_map = code_info.GetStackMapAt(s, encoding);
+          dex_reg_maps.push_back(code_info.GetDexRegisterMapOf(
+              stack_map, encoding, dex_code->registers_size_));
+        }
+      }
+
+      // Write parameters. DecodeDebugLocalInfo returns them as well, but it does not
+      // guarantee order or uniqueness so it is safer to iterate over them manually.
+      // DecodeDebugLocalInfo might not also be available if there is no debug info.
+      std::vector<const char*> param_names = GetParamNames(mi);
+      uint32_t arg_reg = 0;
+      if (!is_static) {
+        info_.StartTag(DW_TAG_formal_parameter);
+        WriteName("this");
+        info_.WriteFlagPresent(DW_AT_artificial);
+        WriteLazyType(dex_class_desc);
+        if (dex_code != nullptr) {
+          // Write the stack location of the parameter.
+          const uint32_t vreg = dex_code->registers_size_ - dex_code->ins_size_ + arg_reg;
+          const bool is64bitValue = false;
+          WriteRegLocation(mi, dex_reg_maps, vreg, is64bitValue, compilation_unit.code_address);
+        }
+        arg_reg++;
+        info_.EndTag();
+      }
+      if (dex_params != nullptr) {
+        for (uint32_t i = 0; i < dex_params->Size(); ++i) {
+          info_.StartTag(DW_TAG_formal_parameter);
+          // Parameter names may not be always available.
+          if (i < param_names.size()) {
+            WriteName(param_names[i]);
+          }
+          // Write the type.
+          const char* type_desc = dex->StringByTypeIdx(dex_params->GetTypeItem(i).type_idx_);
+          WriteLazyType(type_desc);
+          const bool is64bitValue = type_desc[0] == 'D' || type_desc[0] == 'J';
+          if (dex_code != nullptr) {
+            // Write the stack location of the parameter.
+            const uint32_t vreg = dex_code->registers_size_ - dex_code->ins_size_ + arg_reg;
+            WriteRegLocation(mi, dex_reg_maps, vreg, is64bitValue, compilation_unit.code_address);
+          }
+          arg_reg += is64bitValue ? 2 : 1;
+          info_.EndTag();
+        }
+        if (dex_code != nullptr) {
+          DCHECK_EQ(arg_reg, dex_code->ins_size_);
+        }
+      }
+
+      // Write local variables.
+      LocalInfos local_infos;
+      if (dex->DecodeDebugLocalInfo(dex_code,
+                                    is_static,
+                                    mi->dex_method_index,
+                                    LocalInfoCallback,
+                                    &local_infos)) {
+        for (const DexFile::LocalInfo& var : local_infos) {
+          if (var.reg_ < dex_code->registers_size_ - dex_code->ins_size_) {
+            info_.StartTag(DW_TAG_variable);
+            WriteName(var.name_);
+            WriteLazyType(var.descriptor_);
+            bool is64bitValue = var.descriptor_[0] == 'D' || var.descriptor_[0] == 'J';
+            WriteRegLocation(mi,
+                             dex_reg_maps,
+                             var.reg_,
+                             is64bitValue,
+                             compilation_unit.code_address,
+                             var.start_address_,
+                             var.end_address_);
+            info_.EndTag();
+          }
+        }
+      }
+
+      info_.EndTag();
+      CHECK_EQ(info_.Depth(), start_depth);  // Balanced start/end.
+    }
+    if (last_dex_class_desc != nullptr) {
+      EndClassTag();
+    }
+    FinishLazyTypes();
+    CloseNamespacesAboveDepth(0);
+    info_.EndTag();  // DW_TAG_compile_unit
+    CHECK_EQ(info_.Depth(), 0);
+    std::vector<uint8_t> buffer;
+    buffer.reserve(info_.data()->size() + KB);
+    const size_t offset = owner_->builder_->GetDebugInfo()->GetSize();
+    // All compilation units share single table which is at the start of .debug_abbrev.
+    const size_t debug_abbrev_offset = 0;
+    WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_);
+    owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
+  }
+
+  void Write(const ArrayRef<mirror::Class*>& types) SHARED_REQUIRES(Locks::mutator_lock_) {
+    using namespace dwarf;  // NOLINT. For easy access to DWARF constants.
+
+    info_.StartTag(DW_TAG_compile_unit);
+    info_.WriteString(DW_AT_producer, "Android dex2oat");
+    info_.WriteData1(DW_AT_language, DW_LANG_Java);
+
+    // Base class references to be patched at the end.
+    std::map<size_t, mirror::Class*> base_class_references;
+
+    // Already written declarations or definitions.
+    std::map<mirror::Class*, size_t> class_declarations;
+
+    std::vector<uint8_t> expr_buffer;
+    for (mirror::Class* type : types) {
+      if (type->IsPrimitive()) {
+        // For primitive types the definition and the declaration is the same.
+        if (type->GetPrimitiveType() != Primitive::kPrimVoid) {
+          WriteTypeDeclaration(type->GetDescriptor(nullptr));
+        }
+      } else if (type->IsArrayClass()) {
+        mirror::Class* element_type = type->GetComponentType();
+        uint32_t component_size = type->GetComponentSize();
+        uint32_t data_offset = mirror::Array::DataOffset(component_size).Uint32Value();
+        uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
+
+        CloseNamespacesAboveDepth(0);  // Declare in root namespace.
+        info_.StartTag(DW_TAG_array_type);
+        std::string descriptor_string;
+        WriteLazyType(element_type->GetDescriptor(&descriptor_string));
+        WriteLinkageName(type);
+        info_.WriteUdata(DW_AT_data_member_location, data_offset);
+        info_.StartTag(DW_TAG_subrange_type);
+        Expression count_expr(&expr_buffer);
+        count_expr.WriteOpPushObjectAddress();
+        count_expr.WriteOpPlusUconst(length_offset);
+        count_expr.WriteOpDerefSize(4);  // Array length is always 32-bit wide.
+        info_.WriteExprLoc(DW_AT_count, count_expr);
+        info_.EndTag();  // DW_TAG_subrange_type.
+        info_.EndTag();  // DW_TAG_array_type.
+      } else if (type->IsInterface()) {
+        // Skip.  Variables cannot have an interface as a dynamic type.
+        // We do not expose the interface information to the debugger in any way.
+      } else {
+        std::string descriptor_string;
+        const char* desc = type->GetDescriptor(&descriptor_string);
+        size_t class_offset = StartClassTag(desc);
+        class_declarations.emplace(type, class_offset);
+
+        if (!type->IsVariableSize()) {
+          info_.WriteUdata(DW_AT_byte_size, type->GetObjectSize());
+        }
+
+        WriteLinkageName(type);
+
+        if (type->IsObjectClass()) {
+          // Generate artificial member which is used to get the dynamic type of variable.
+          // The run-time value of this field will correspond to linkage name of some type.
+          // We need to do it only once in j.l.Object since all other types inherit it.
+          info_.StartTag(DW_TAG_member);
+          WriteName(".dynamic_type");
+          WriteLazyType(sizeof(uintptr_t) == 8 ? "J" : "I");
+          info_.WriteFlagPresent(DW_AT_artificial);
+          // Create DWARF expression to get the value of the methods_ field.
+          Expression expr(&expr_buffer);
+          // The address of the object has been implicitly pushed on the stack.
+          // Dereference the klass_ field of Object (32-bit; possibly poisoned).
+          DCHECK_EQ(type->ClassOffset().Uint32Value(), 0u);
+          DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Class>), 4u);
+          expr.WriteOpDerefSize(4);
+          if (kPoisonHeapReferences) {
+            expr.WriteOpNeg();
+            // DWARF stack is pointer sized. Ensure that the high bits are clear.
+            expr.WriteOpConstu(0xFFFFFFFF);
+            expr.WriteOpAnd();
+          }
+          // Add offset to the methods_ field.
+          expr.WriteOpPlusUconst(mirror::Class::MethodsOffset().Uint32Value());
+          // Top of stack holds the location of the field now.
+          info_.WriteExprLoc(DW_AT_data_member_location, expr);
+          info_.EndTag();  // DW_TAG_member.
+        }
+
+        // Base class.
+        mirror::Class* base_class = type->GetSuperClass();
+        if (base_class != nullptr) {
+          info_.StartTag(DW_TAG_inheritance);
+          base_class_references.emplace(info_.size(), base_class);
+          info_.WriteRef4(DW_AT_type, 0);
+          info_.WriteUdata(DW_AT_data_member_location, 0);
+          info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public);
+          info_.EndTag();  // DW_TAG_inheritance.
+        }
+
+        // Member variables.
+        for (uint32_t i = 0, count = type->NumInstanceFields(); i < count; ++i) {
+          ArtField* field = type->GetInstanceField(i);
+          info_.StartTag(DW_TAG_member);
+          WriteName(field->GetName());
+          WriteLazyType(field->GetTypeDescriptor());
+          info_.WriteUdata(DW_AT_data_member_location, field->GetOffset().Uint32Value());
+          uint32_t access_flags = field->GetAccessFlags();
+          if (access_flags & kAccPublic) {
+            info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public);
+          } else if (access_flags & kAccProtected) {
+            info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_protected);
+          } else if (access_flags & kAccPrivate) {
+            info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_private);
+          }
+          info_.EndTag();  // DW_TAG_member.
+        }
+
+        if (type->IsStringClass()) {
+          // Emit debug info about an artifical class member for java.lang.String which represents
+          // the first element of the data stored in a string instance. Consumers of the debug
+          // info will be able to read the content of java.lang.String based on the count (real
+          // field) and based on the location of this data member.
+          info_.StartTag(DW_TAG_member);
+          WriteName("value");
+          // We don't support fields with C like array types so we just say its type is java char.
+          WriteLazyType("C");  // char.
+          info_.WriteUdata(DW_AT_data_member_location,
+                           mirror::String::ValueOffset().Uint32Value());
+          info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_private);
+          info_.EndTag();  // DW_TAG_member.
+        }
+
+        EndClassTag();
+      }
+    }
+
+    // Write base class declarations.
+    for (const auto& base_class_reference : base_class_references) {
+      size_t reference_offset = base_class_reference.first;
+      mirror::Class* base_class = base_class_reference.second;
+      const auto& it = class_declarations.find(base_class);
+      if (it != class_declarations.end()) {
+        info_.UpdateUint32(reference_offset, it->second);
+      } else {
+        // Declare base class.  We can not use the standard WriteLazyType
+        // since we want to avoid the DW_TAG_reference_tag wrapping.
+        std::string tmp_storage;
+        const char* base_class_desc = base_class->GetDescriptor(&tmp_storage);
+        size_t base_class_declaration_offset = StartClassTag(base_class_desc);
+        info_.WriteFlagPresent(DW_AT_declaration);
+        WriteLinkageName(base_class);
+        EndClassTag();
+        class_declarations.emplace(base_class, base_class_declaration_offset);
+        info_.UpdateUint32(reference_offset, base_class_declaration_offset);
+      }
+    }
+
+    FinishLazyTypes();
+    CloseNamespacesAboveDepth(0);
+    info_.EndTag();  // DW_TAG_compile_unit.
+    CHECK_EQ(info_.Depth(), 0);
+    std::vector<uint8_t> buffer;
+    buffer.reserve(info_.data()->size() + KB);
+    const size_t offset = owner_->builder_->GetDebugInfo()->GetSize();
+    // All compilation units share single table which is at the start of .debug_abbrev.
+    const size_t debug_abbrev_offset = 0;
+    WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_);
+    owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
+  }
+
+  // Write table into .debug_loc which describes location of dex register.
+  // The dex register might be valid only at some points and it might
+  // move between machine registers and stack.
+  void WriteRegLocation(const MethodDebugInfo* method_info,
+                        const std::vector<DexRegisterMap>& dex_register_maps,
+                        uint16_t vreg,
+                        bool is64bitValue,
+                        uint64_t compilation_unit_code_address,
+                        uint32_t dex_pc_low = 0,
+                        uint32_t dex_pc_high = 0xFFFFFFFF) {
+    WriteDebugLocEntry(method_info,
+                       dex_register_maps,
+                       vreg,
+                       is64bitValue,
+                       compilation_unit_code_address,
+                       dex_pc_low,
+                       dex_pc_high,
+                       owner_->builder_->GetIsa(),
+                       &info_,
+                       &owner_->debug_loc_,
+                       &owner_->debug_ranges_);
+  }
+
+  // Linkage name uniquely identifies type.
+  // It is used to determine the dynamic type of objects.
+  // We use the methods_ field of class since it is unique and it is not moved by the GC.
+  void WriteLinkageName(mirror::Class* type) SHARED_REQUIRES(Locks::mutator_lock_) {
+    auto* methods_ptr = type->GetMethodsPtr();
+    if (methods_ptr == nullptr) {
+      // Some types might have no methods.  Allocate empty array instead.
+      LinearAlloc* allocator = Runtime::Current()->GetLinearAlloc();
+      void* storage = allocator->Alloc(Thread::Current(), sizeof(LengthPrefixedArray<ArtMethod>));
+      methods_ptr = new (storage) LengthPrefixedArray<ArtMethod>(0);
+      type->SetMethodsPtr(methods_ptr, 0, 0);
+      DCHECK(type->GetMethodsPtr() != nullptr);
+    }
+    char name[32];
+    snprintf(name, sizeof(name), "0x%" PRIXPTR, reinterpret_cast<uintptr_t>(methods_ptr));
+    info_.WriteString(dwarf::DW_AT_linkage_name, name);
+  }
+
+  // Some types are difficult to define as we go since they need
+  // to be enclosed in the right set of namespaces. Therefore we
+  // just define all types lazily at the end of compilation unit.
+  void WriteLazyType(const char* type_descriptor) {
+    if (type_descriptor != nullptr && type_descriptor[0] != 'V') {
+      lazy_types_.emplace(std::string(type_descriptor), info_.size());
+      info_.WriteRef4(dwarf::DW_AT_type, 0);
+    }
+  }
+
+  void FinishLazyTypes() {
+    for (const auto& lazy_type : lazy_types_) {
+      info_.UpdateUint32(lazy_type.second, WriteTypeDeclaration(lazy_type.first));
+    }
+    lazy_types_.clear();
+  }
+
+ private:
+  void WriteName(const char* name) {
+    if (name != nullptr) {
+      info_.WriteString(dwarf::DW_AT_name, name);
+    }
+  }
+
+  // Convert dex type descriptor to DWARF.
+  // Returns offset in the compilation unit.
+  size_t WriteTypeDeclaration(const std::string& desc) {
+    using namespace dwarf;  // NOLINT. For easy access to DWARF constants.
+
+    DCHECK(!desc.empty());
+    const auto& it = type_cache_.find(desc);
+    if (it != type_cache_.end()) {
+      return it->second;
+    }
+
+    size_t offset;
+    if (desc[0] == 'L') {
+      // Class type. For example: Lpackage/name;
+      size_t class_offset = StartClassTag(desc.c_str());
+      info_.WriteFlagPresent(DW_AT_declaration);
+      EndClassTag();
+      // Reference to the class type.
+      offset = info_.StartTag(DW_TAG_reference_type);
+      info_.WriteRef(DW_AT_type, class_offset);
+      info_.EndTag();
+    } else if (desc[0] == '[') {
+      // Array type.
+      size_t element_type = WriteTypeDeclaration(desc.substr(1));
+      CloseNamespacesAboveDepth(0);  // Declare in root namespace.
+      size_t array_type = info_.StartTag(DW_TAG_array_type);
+      info_.WriteFlagPresent(DW_AT_declaration);
+      info_.WriteRef(DW_AT_type, element_type);
+      info_.EndTag();
+      offset = info_.StartTag(DW_TAG_reference_type);
+      info_.WriteRef4(DW_AT_type, array_type);
+      info_.EndTag();
+    } else {
+      // Primitive types.
+      DCHECK_EQ(desc.size(), 1u);
+
+      const char* name;
+      uint32_t encoding;
+      uint32_t byte_size;
+      switch (desc[0]) {
+      case 'B':
+        name = "byte";
+        encoding = DW_ATE_signed;
+        byte_size = 1;
+        break;
+      case 'C':
+        name = "char";
+        encoding = DW_ATE_UTF;
+        byte_size = 2;
+        break;
+      case 'D':
+        name = "double";
+        encoding = DW_ATE_float;
+        byte_size = 8;
+        break;
+      case 'F':
+        name = "float";
+        encoding = DW_ATE_float;
+        byte_size = 4;
+        break;
+      case 'I':
+        name = "int";
+        encoding = DW_ATE_signed;
+        byte_size = 4;
+        break;
+      case 'J':
+        name = "long";
+        encoding = DW_ATE_signed;
+        byte_size = 8;
+        break;
+      case 'S':
+        name = "short";
+        encoding = DW_ATE_signed;
+        byte_size = 2;
+        break;
+      case 'Z':
+        name = "boolean";
+        encoding = DW_ATE_boolean;
+        byte_size = 1;
+        break;
+      case 'V':
+        LOG(FATAL) << "Void type should not be encoded";
+        UNREACHABLE();
+      default:
+        LOG(FATAL) << "Unknown dex type descriptor: \"" << desc << "\"";
+        UNREACHABLE();
+      }
+      CloseNamespacesAboveDepth(0);  // Declare in root namespace.
+      offset = info_.StartTag(DW_TAG_base_type);
+      WriteName(name);
+      info_.WriteData1(DW_AT_encoding, encoding);
+      info_.WriteData1(DW_AT_byte_size, byte_size);
+      info_.EndTag();
+    }
+
+    type_cache_.emplace(desc, offset);
+    return offset;
+  }
+
+  // Start DW_TAG_class_type tag nested in DW_TAG_namespace tags.
+  // Returns offset of the class tag in the compilation unit.
+  size_t StartClassTag(const char* desc) {
+    std::string name = SetNamespaceForClass(desc);
+    size_t offset = info_.StartTag(dwarf::DW_TAG_class_type);
+    WriteName(name.c_str());
+    return offset;
+  }
+
+  void EndClassTag() {
+    info_.EndTag();
+  }
+
+  // Set the current namespace nesting to one required by the given class.
+  // Returns the class name with namespaces, 'L', and ';' stripped.
+  std::string SetNamespaceForClass(const char* desc) {
+    DCHECK(desc != nullptr && desc[0] == 'L');
+    desc++;  // Skip the initial 'L'.
+    size_t depth = 0;
+    for (const char* end; (end = strchr(desc, '/')) != nullptr; desc = end + 1, ++depth) {
+      // Check whether the name at this depth is already what we need.
+      if (depth < current_namespace_.size()) {
+        const std::string& name = current_namespace_[depth];
+        if (name.compare(0, name.size(), desc, end - desc) == 0) {
+          continue;
+        }
+      }
+      // Otherwise we need to open a new namespace tag at this depth.
+      CloseNamespacesAboveDepth(depth);
+      info_.StartTag(dwarf::DW_TAG_namespace);
+      std::string name(desc, end - desc);
+      WriteName(name.c_str());
+      current_namespace_.push_back(std::move(name));
+    }
+    CloseNamespacesAboveDepth(depth);
+    return std::string(desc, strchr(desc, ';') - desc);
+  }
+
+  // Close namespace tags to reach the given nesting depth.
+  void CloseNamespacesAboveDepth(size_t depth) {
+    DCHECK_LE(depth, current_namespace_.size());
+    while (current_namespace_.size() > depth) {
+      info_.EndTag();
+      current_namespace_.pop_back();
+    }
+  }
+
+  // For access to the ELF sections.
+  ElfDebugInfoWriter<ElfTypes>* owner_;
+  // Temporary buffer to create and store the entries.
+  dwarf::DebugInfoEntryWriter<> info_;
+  // Cache of already translated type descriptors.
+  std::map<std::string, size_t> type_cache_;  // type_desc -> definition_offset.
+  // 32-bit references which need to be resolved to a type later.
+  // Given type may be used multiple times.  Therefore we need a multimap.
+  std::multimap<std::string, size_t> lazy_types_;  // type_desc -> patch_offset.
+  // The current set of open namespace tags which are active and not closed yet.
+  std::vector<std::string> current_namespace_;
+};
+
+}  // namespace debug
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_ELF_DEBUG_INFO_WRITER_H_
+
diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h
new file mode 100644
index 0000000..3db7306
--- /dev/null
+++ b/compiler/debug/elf_debug_line_writer.h
@@ -0,0 +1,290 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_ELF_DEBUG_LINE_WRITER_H_
+#define ART_COMPILER_DEBUG_ELF_DEBUG_LINE_WRITER_H_
+
+#include <unordered_set>
+#include <vector>
+
+#include "compiled_method.h"
+#include "debug/dwarf/debug_line_opcode_writer.h"
+#include "debug/dwarf/headers.h"
+#include "debug/elf_compilation_unit.h"
+#include "dex_file-inl.h"
+#include "elf_builder.h"
+#include "stack_map.h"
+
+namespace art {
+namespace debug {
+
+typedef std::vector<DexFile::PositionInfo> PositionInfos;
+
+static bool PositionInfoCallback(void* ctx, const DexFile::PositionInfo& entry) {
+  static_cast<PositionInfos*>(ctx)->push_back(entry);
+  return false;
+}
+
+template<typename ElfTypes>
+class ElfDebugLineWriter {
+  using Elf_Addr = typename ElfTypes::Addr;
+
+ public:
+  explicit ElfDebugLineWriter(ElfBuilder<ElfTypes>* builder) : builder_(builder) {
+  }
+
+  void Start() {
+    builder_->GetDebugLine()->Start();
+  }
+
+  // Write line table for given set of methods.
+  // Returns the number of bytes written.
+  size_t WriteCompilationUnit(ElfCompilationUnit& compilation_unit) {
+    const bool is64bit = Is64BitInstructionSet(builder_->GetIsa());
+    const Elf_Addr base_address = compilation_unit.is_code_address_text_relative
+        ? builder_->GetText()->GetAddress()
+        : 0;
+
+    compilation_unit.debug_line_offset = builder_->GetDebugLine()->GetSize();
+
+    std::vector<dwarf::FileEntry> files;
+    std::unordered_map<std::string, size_t> files_map;
+    std::vector<std::string> directories;
+    std::unordered_map<std::string, size_t> directories_map;
+    int code_factor_bits_ = 0;
+    int dwarf_isa = -1;
+    switch (builder_->GetIsa()) {
+      case kArm:  // arm actually means thumb2.
+      case kThumb2:
+        code_factor_bits_ = 1;  // 16-bit instuctions
+        dwarf_isa = 1;  // DW_ISA_ARM_thumb.
+        break;
+      case kArm64:
+      case kMips:
+      case kMips64:
+        code_factor_bits_ = 2;  // 32-bit instructions
+        break;
+      case kNone:
+      case kX86:
+      case kX86_64:
+        break;
+    }
+    std::unordered_set<uint64_t> seen_addresses(compilation_unit.methods.size());
+    dwarf::DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits_);
+    for (const MethodDebugInfo* mi : compilation_unit.methods) {
+      // Ignore function if we have already generated line table for the same address.
+      // It would confuse the debugger and the DWARF specification forbids it.
+      // We allow the line table for method to be replicated in different compilation unit.
+      // This ensures that each compilation unit contains line table for all its methods.
+      if (!seen_addresses.insert(mi->code_address).second) {
+        continue;
+      }
+
+      uint32_t prologue_end = std::numeric_limits<uint32_t>::max();
+      std::vector<SrcMapElem> pc2dex_map;
+      if (mi->code_info != nullptr) {
+        // Use stack maps to create mapping table from pc to dex.
+        const CodeInfo code_info(mi->code_info);
+        const CodeInfoEncoding encoding = code_info.ExtractEncoding();
+        pc2dex_map.reserve(code_info.GetNumberOfStackMaps(encoding));
+        for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(encoding); s++) {
+          StackMap stack_map = code_info.GetStackMapAt(s, encoding);
+          DCHECK(stack_map.IsValid());
+          const uint32_t pc = stack_map.GetNativePcOffset(encoding.stack_map_encoding);
+          const int32_t dex = stack_map.GetDexPc(encoding.stack_map_encoding);
+          pc2dex_map.push_back({pc, dex});
+          if (stack_map.HasDexRegisterMap(encoding.stack_map_encoding)) {
+            // Guess that the first map with local variables is the end of prologue.
+            prologue_end = std::min(prologue_end, pc);
+          }
+        }
+        std::sort(pc2dex_map.begin(), pc2dex_map.end());
+      }
+
+      if (pc2dex_map.empty()) {
+        continue;
+      }
+
+      // Compensate for compiler's off-by-one-instruction error.
+      //
+      // The compiler generates stackmap with PC *after* the branch instruction
+      // (because this is the PC which is easier to obtain when unwinding).
+      //
+      // However, the debugger is more clever and it will ask us for line-number
+      // mapping at the location of the branch instruction (since the following
+      // instruction could belong to other line, this is the correct thing to do).
+      //
+      // So we really want to just decrement the PC by one instruction so that the
+      // branch instruction is covered as well. However, we do not know the size
+      // of the previous instruction, and we can not subtract just a fixed amount
+      // (the debugger would trust us that the PC is valid; it might try to set
+      // breakpoint there at some point, and setting breakpoint in mid-instruction
+      // would make the process crash in spectacular way).
+      //
+      // Therefore, we say that the PC which the compiler gave us for the stackmap
+      // is the end of its associated address range, and we use the PC from the
+      // previous stack map as the start of the range. This ensures that the PC is
+      // valid and that the branch instruction is covered.
+      //
+      // This ensures we have correct line number mapping at call sites (which is
+      // important for backtraces), but there is nothing we can do for non-call
+      // sites (so stepping through optimized code in debugger is not possible).
+      //
+      // We do not adjust the stackmaps if the code was compiled as debuggable.
+      // In that case, the stackmaps should accurately cover all instructions.
+      if (!mi->is_native_debuggable) {
+        for (size_t i = pc2dex_map.size() - 1; i > 0; --i) {
+          pc2dex_map[i].from_ = pc2dex_map[i - 1].from_;
+        }
+        pc2dex_map[0].from_ = 0;
+      }
+
+      Elf_Addr method_address = base_address + mi->code_address;
+
+      PositionInfos dex2line_map;
+      DCHECK(mi->dex_file != nullptr);
+      const DexFile* dex = mi->dex_file;
+      if (!dex->DecodeDebugPositionInfo(mi->code_item, PositionInfoCallback, &dex2line_map)) {
+        continue;
+      }
+
+      if (dex2line_map.empty()) {
+        continue;
+      }
+
+      opcodes.SetAddress(method_address);
+      if (dwarf_isa != -1) {
+        opcodes.SetISA(dwarf_isa);
+      }
+
+      // Get and deduplicate directory and filename.
+      int file_index = 0;  // 0 - primary source file of the compilation.
+      auto& dex_class_def = dex->GetClassDef(mi->class_def_index);
+      const char* source_file = dex->GetSourceFile(dex_class_def);
+      if (source_file != nullptr) {
+        std::string file_name(source_file);
+        size_t file_name_slash = file_name.find_last_of('/');
+        std::string class_name(dex->GetClassDescriptor(dex_class_def));
+        size_t class_name_slash = class_name.find_last_of('/');
+        std::string full_path(file_name);
+
+        // Guess directory from package name.
+        int directory_index = 0;  // 0 - current directory of the compilation.
+        if (file_name_slash == std::string::npos &&  // Just filename.
+            class_name.front() == 'L' &&  // Type descriptor for a class.
+            class_name_slash != std::string::npos) {  // Has package name.
+          std::string package_name = class_name.substr(1, class_name_slash - 1);
+          auto it = directories_map.find(package_name);
+          if (it == directories_map.end()) {
+            directory_index = 1 + directories.size();
+            directories_map.emplace(package_name, directory_index);
+            directories.push_back(package_name);
+          } else {
+            directory_index = it->second;
+          }
+          full_path = package_name + "/" + file_name;
+        }
+
+        // Add file entry.
+        auto it2 = files_map.find(full_path);
+        if (it2 == files_map.end()) {
+          file_index = 1 + files.size();
+          files_map.emplace(full_path, file_index);
+          files.push_back(dwarf::FileEntry {
+            file_name,
+            directory_index,
+            0,  // Modification time - NA.
+            0,  // File size - NA.
+          });
+        } else {
+          file_index = it2->second;
+        }
+      }
+      opcodes.SetFile(file_index);
+
+      // Generate mapping opcodes from PC to Java lines.
+      if (file_index != 0) {
+        // If the method was not compiled as native-debuggable, we still generate all available
+        // lines, but we try to prevent the debugger from stepping and setting breakpoints since
+        // the information is too inaccurate for that (breakpoints would be set after the calls).
+        const bool default_is_stmt = mi->is_native_debuggable;
+        bool first = true;
+        for (SrcMapElem pc2dex : pc2dex_map) {
+          uint32_t pc = pc2dex.from_;
+          int dex_pc = pc2dex.to_;
+          // Find mapping with address with is greater than our dex pc; then go back one step.
+          auto dex2line = std::upper_bound(
+              dex2line_map.begin(),
+              dex2line_map.end(),
+              dex_pc,
+              [](uint32_t address, const DexFile::PositionInfo& entry) {
+                  return address < entry.address_;
+              });
+          // Look for first valid mapping after the prologue.
+          if (dex2line != dex2line_map.begin() && pc >= prologue_end) {
+            int line = (--dex2line)->line_;
+            if (first) {
+              first = false;
+              if (pc > 0) {
+                // Assume that any preceding code is prologue.
+                int first_line = dex2line_map.front().line_;
+                // Prologue is not a sensible place for a breakpoint.
+                opcodes.SetIsStmt(false);
+                opcodes.AddRow(method_address, first_line);
+                opcodes.SetPrologueEnd();
+              }
+              opcodes.SetIsStmt(default_is_stmt);
+              opcodes.AddRow(method_address + pc, line);
+            } else if (line != opcodes.CurrentLine()) {
+              opcodes.SetIsStmt(default_is_stmt);
+              opcodes.AddRow(method_address + pc, line);
+            }
+          }
+        }
+      } else {
+        // line 0 - instruction cannot be attributed to any source line.
+        opcodes.AddRow(method_address, 0);
+      }
+
+      opcodes.AdvancePC(method_address + mi->code_size);
+      opcodes.EndSequence();
+    }
+    std::vector<uint8_t> buffer;
+    buffer.reserve(opcodes.data()->size() + KB);
+    size_t offset = builder_->GetDebugLine()->GetSize();
+    WriteDebugLineTable(directories, files, opcodes, offset, &buffer, &debug_line_patches_);
+    builder_->GetDebugLine()->WriteFully(buffer.data(), buffer.size());
+    return buffer.size();
+  }
+
+  void End(bool write_oat_patches) {
+    builder_->GetDebugLine()->End();
+    if (write_oat_patches) {
+      builder_->WritePatches(".debug_line.oat_patches",
+                             ArrayRef<const uintptr_t>(debug_line_patches_));
+    }
+  }
+
+ private:
+  ElfBuilder<ElfTypes>* builder_;
+  std::vector<uintptr_t> debug_line_patches_;
+};
+
+}  // namespace debug
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_ELF_DEBUG_LINE_WRITER_H_
+
diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h
new file mode 100644
index 0000000..9645643
--- /dev/null
+++ b/compiler/debug/elf_debug_loc_writer.h
@@ -0,0 +1,336 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_ELF_DEBUG_LOC_WRITER_H_
+#define ART_COMPILER_DEBUG_ELF_DEBUG_LOC_WRITER_H_
+
+#include <cstring>
+#include <map>
+
+#include "arch/instruction_set.h"
+#include "compiled_method.h"
+#include "debug/dwarf/debug_info_entry_writer.h"
+#include "debug/dwarf/register.h"
+#include "debug/method_debug_info.h"
+#include "stack_map.h"
+
+namespace art {
+namespace debug {
+using Reg = dwarf::Reg;
+
+static Reg GetDwarfCoreReg(InstructionSet isa, int machine_reg) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      return Reg::ArmCore(machine_reg);
+    case kArm64:
+      return Reg::Arm64Core(machine_reg);
+    case kX86:
+      return Reg::X86Core(machine_reg);
+    case kX86_64:
+      return Reg::X86_64Core(machine_reg);
+    case kMips:
+      return Reg::MipsCore(machine_reg);
+    case kMips64:
+      return Reg::Mips64Core(machine_reg);
+    case kNone:
+      LOG(FATAL) << "No instruction set";
+  }
+  UNREACHABLE();
+}
+
+static Reg GetDwarfFpReg(InstructionSet isa, int machine_reg) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      return Reg::ArmFp(machine_reg);
+    case kArm64:
+      return Reg::Arm64Fp(machine_reg);
+    case kX86:
+      return Reg::X86Fp(machine_reg);
+    case kX86_64:
+      return Reg::X86_64Fp(machine_reg);
+    case kMips:
+      return Reg::MipsFp(machine_reg);
+    case kMips64:
+      return Reg::Mips64Fp(machine_reg);
+    case kNone:
+      LOG(FATAL) << "No instruction set";
+  }
+  UNREACHABLE();
+}
+
+struct VariableLocation {
+  uint32_t low_pc;  // Relative to compilation unit.
+  uint32_t high_pc;  // Relative to compilation unit.
+  DexRegisterLocation reg_lo;  // May be None if the location is unknown.
+  DexRegisterLocation reg_hi;  // Most significant bits of 64-bit value.
+};
+
+// Get the location of given dex register (e.g. stack or machine register).
+// Note that the location might be different based on the current pc.
+// The result will cover all ranges where the variable is in scope.
+// PCs corresponding to stackmap with dex register map are accurate,
+// all other PCs are best-effort only.
+std::vector<VariableLocation> GetVariableLocations(
+    const MethodDebugInfo* method_info,
+    const std::vector<DexRegisterMap>& dex_register_maps,
+    uint16_t vreg,
+    bool is64bitValue,
+    uint64_t compilation_unit_code_address,
+    uint32_t dex_pc_low,
+    uint32_t dex_pc_high) {
+  std::vector<VariableLocation> variable_locations;
+
+  // Get stack maps sorted by pc (they might not be sorted internally).
+  // TODO(dsrbecky) Remove this once stackmaps get sorted by pc.
+  const CodeInfo code_info(method_info->code_info);
+  const CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  std::map<uint32_t, uint32_t> stack_maps;  // low_pc -> stack_map_index.
+  for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(encoding); s++) {
+    StackMap stack_map = code_info.GetStackMapAt(s, encoding);
+    DCHECK(stack_map.IsValid());
+    if (!stack_map.HasDexRegisterMap(encoding.stack_map_encoding)) {
+      // The compiler creates stackmaps without register maps at the start of
+      // basic blocks in order to keep instruction-accurate line number mapping.
+      // However, we never stop at those (breakpoint locations always have map).
+      // Therefore, for the purpose of local variables, we ignore them.
+      // The main reason for this is to save space by avoiding undefined gaps.
+      continue;
+    }
+    const uint32_t pc_offset = stack_map.GetNativePcOffset(encoding.stack_map_encoding);
+    DCHECK_LE(pc_offset, method_info->code_size);
+    DCHECK_LE(compilation_unit_code_address, method_info->code_address);
+    const uint32_t low_pc = dchecked_integral_cast<uint32_t>(
+        method_info->code_address + pc_offset - compilation_unit_code_address);
+    stack_maps.emplace(low_pc, s);
+  }
+
+  // Create entries for the requested register based on stack map data.
+  for (auto it = stack_maps.begin(); it != stack_maps.end(); it++) {
+    const uint32_t low_pc = it->first;
+    const uint32_t stack_map_index = it->second;
+    const StackMap& stack_map = code_info.GetStackMapAt(stack_map_index, encoding);
+    auto next_it = it;
+    next_it++;
+    const uint32_t high_pc = next_it != stack_maps.end()
+      ? next_it->first
+      : method_info->code_address + method_info->code_size - compilation_unit_code_address;
+    DCHECK_LE(low_pc, high_pc);
+    if (low_pc == high_pc) {
+      continue;  // Ignore if the address range is empty.
+    }
+
+    // Check that the stack map is in the requested range.
+    uint32_t dex_pc = stack_map.GetDexPc(encoding.stack_map_encoding);
+    if (!(dex_pc_low <= dex_pc && dex_pc < dex_pc_high)) {
+      // The variable is not in scope at this PC. Therefore omit the entry.
+      // Note that this is different to None() entry which means in scope, but unknown location.
+      continue;
+    }
+
+    // Find the location of the dex register.
+    DexRegisterLocation reg_lo = DexRegisterLocation::None();
+    DexRegisterLocation reg_hi = DexRegisterLocation::None();
+    DCHECK_LT(stack_map_index, dex_register_maps.size());
+    DexRegisterMap dex_register_map = dex_register_maps[stack_map_index];
+    DCHECK(dex_register_map.IsValid());
+    reg_lo = dex_register_map.GetDexRegisterLocation(
+        vreg, method_info->code_item->registers_size_, code_info, encoding);
+    if (is64bitValue) {
+      reg_hi = dex_register_map.GetDexRegisterLocation(
+          vreg + 1, method_info->code_item->registers_size_, code_info, encoding);
+    }
+
+    // Add location entry for this address range.
+    if (!variable_locations.empty() &&
+        variable_locations.back().reg_lo == reg_lo &&
+        variable_locations.back().reg_hi == reg_hi &&
+        variable_locations.back().high_pc == low_pc) {
+      // Merge with the previous entry (extend its range).
+      variable_locations.back().high_pc = high_pc;
+    } else {
+      variable_locations.push_back({low_pc, high_pc, reg_lo, reg_hi});
+    }
+  }
+
+  return variable_locations;
+}
+
+// Write table into .debug_loc which describes location of dex register.
+// The dex register might be valid only at some points and it might
+// move between machine registers and stack.
+static void WriteDebugLocEntry(const MethodDebugInfo* method_info,
+                               const std::vector<DexRegisterMap>& dex_register_maps,
+                               uint16_t vreg,
+                               bool is64bitValue,
+                               uint64_t compilation_unit_code_address,
+                               uint32_t dex_pc_low,
+                               uint32_t dex_pc_high,
+                               InstructionSet isa,
+                               dwarf::DebugInfoEntryWriter<>* debug_info,
+                               std::vector<uint8_t>* debug_loc_buffer,
+                               std::vector<uint8_t>* debug_ranges_buffer) {
+  using Kind = DexRegisterLocation::Kind;
+  if (method_info->code_info == nullptr || dex_register_maps.empty()) {
+    return;
+  }
+
+  std::vector<VariableLocation> variable_locations = GetVariableLocations(
+      method_info,
+      dex_register_maps,
+      vreg,
+      is64bitValue,
+      compilation_unit_code_address,
+      dex_pc_low,
+      dex_pc_high);
+
+  // Write .debug_loc entries.
+  dwarf::Writer<> debug_loc(debug_loc_buffer);
+  const size_t debug_loc_offset = debug_loc.size();
+  const bool is64bit = Is64BitInstructionSet(isa);
+  std::vector<uint8_t> expr_buffer;
+  for (const VariableLocation& variable_location : variable_locations) {
+    // Translate dex register location to DWARF expression.
+    // Note that 64-bit value might be split to two distinct locations.
+    // (for example, two 32-bit machine registers, or even stack and register)
+    dwarf::Expression expr(&expr_buffer);
+    DexRegisterLocation reg_lo = variable_location.reg_lo;
+    DexRegisterLocation reg_hi = variable_location.reg_hi;
+    for (int piece = 0; piece < (is64bitValue ? 2 : 1); piece++) {
+      DexRegisterLocation reg_loc = (piece == 0 ? reg_lo : reg_hi);
+      const Kind kind = reg_loc.GetKind();
+      const int32_t value = reg_loc.GetValue();
+      if (kind == Kind::kInStack) {
+        // The stack offset is relative to SP. Make it relative to CFA.
+        expr.WriteOpFbreg(value - method_info->frame_size_in_bytes);
+        if (piece == 0 && reg_hi.GetKind() == Kind::kInStack &&
+            reg_hi.GetValue() == value + 4) {
+          break;  // the high word is correctly implied by the low word.
+        }
+      } else if (kind == Kind::kInRegister) {
+        expr.WriteOpReg(GetDwarfCoreReg(isa, value).num());
+        if (piece == 0 && reg_hi.GetKind() == Kind::kInRegisterHigh &&
+            reg_hi.GetValue() == value) {
+          break;  // the high word is correctly implied by the low word.
+        }
+      } else if (kind == Kind::kInFpuRegister) {
+        if ((isa == kArm || isa == kThumb2) &&
+            piece == 0 && reg_hi.GetKind() == Kind::kInFpuRegister &&
+            reg_hi.GetValue() == value + 1 && value % 2 == 0) {
+          // Translate S register pair to D register (e.g. S4+S5 to D2).
+          expr.WriteOpReg(Reg::ArmDp(value / 2).num());
+          break;
+        }
+        expr.WriteOpReg(GetDwarfFpReg(isa, value).num());
+        if (piece == 0 && reg_hi.GetKind() == Kind::kInFpuRegisterHigh &&
+            reg_hi.GetValue() == reg_lo.GetValue()) {
+          break;  // the high word is correctly implied by the low word.
+        }
+      } else if (kind == Kind::kConstant) {
+        expr.WriteOpConsts(value);
+        expr.WriteOpStackValue();
+      } else if (kind == Kind::kNone) {
+        break;
+      } else {
+        // kInStackLargeOffset and kConstantLargeValue are hidden by GetKind().
+        // kInRegisterHigh and kInFpuRegisterHigh should be handled by
+        // the special cases above and they should not occur alone.
+        LOG(ERROR) << "Unexpected register location kind: " << kind;
+        break;
+      }
+      if (is64bitValue) {
+        // Write the marker which is needed by split 64-bit values.
+        // This code is skipped by the special cases.
+        expr.WriteOpPiece(4);
+      }
+    }
+
+    if (expr.size() > 0) {
+      if (is64bit) {
+        debug_loc.PushUint64(variable_location.low_pc);
+        debug_loc.PushUint64(variable_location.high_pc);
+      } else {
+        debug_loc.PushUint32(variable_location.low_pc);
+        debug_loc.PushUint32(variable_location.high_pc);
+      }
+      // Write the expression.
+      debug_loc.PushUint16(expr.size());
+      debug_loc.PushData(expr.data());
+    } else {
+      // Do not generate .debug_loc if the location is not known.
+    }
+  }
+  // Write end-of-list entry.
+  if (is64bit) {
+    debug_loc.PushUint64(0);
+    debug_loc.PushUint64(0);
+  } else {
+    debug_loc.PushUint32(0);
+    debug_loc.PushUint32(0);
+  }
+
+  // Write .debug_ranges entries.
+  // This includes ranges where the variable is in scope but the location is not known.
+  dwarf::Writer<> debug_ranges(debug_ranges_buffer);
+  size_t debug_ranges_offset = debug_ranges.size();
+  for (size_t i = 0; i < variable_locations.size(); i++) {
+    uint32_t low_pc = variable_locations[i].low_pc;
+    uint32_t high_pc = variable_locations[i].high_pc;
+    while (i + 1 < variable_locations.size() && variable_locations[i+1].low_pc == high_pc) {
+      // Merge address range with the next entry.
+      high_pc = variable_locations[++i].high_pc;
+    }
+    if (is64bit) {
+      debug_ranges.PushUint64(low_pc);
+      debug_ranges.PushUint64(high_pc);
+    } else {
+      debug_ranges.PushUint32(low_pc);
+      debug_ranges.PushUint32(high_pc);
+    }
+  }
+  // Write end-of-list entry.
+  if (is64bit) {
+    debug_ranges.PushUint64(0);
+    debug_ranges.PushUint64(0);
+  } else {
+    debug_ranges.PushUint32(0);
+    debug_ranges.PushUint32(0);
+  }
+
+  // Simple de-duplication - check whether this entry is same as the last one (or tail of it).
+  size_t debug_ranges_entry_size = debug_ranges.size() - debug_ranges_offset;
+  if (debug_ranges_offset >= debug_ranges_entry_size) {
+    size_t previous_offset = debug_ranges_offset - debug_ranges_entry_size;
+    if (memcmp(debug_ranges_buffer->data() + previous_offset,
+               debug_ranges_buffer->data() + debug_ranges_offset,
+               debug_ranges_entry_size) == 0) {
+      // Remove what we have just written and use the last entry instead.
+      debug_ranges_buffer->resize(debug_ranges_offset);
+      debug_ranges_offset = previous_offset;
+    }
+  }
+
+  // Write attributes to .debug_info.
+  debug_info->WriteSecOffset(dwarf::DW_AT_location, debug_loc_offset);
+  debug_info->WriteSecOffset(dwarf::DW_AT_start_scope, debug_ranges_offset);
+}
+
+}  // namespace debug
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_ELF_DEBUG_LOC_WRITER_H_
+
diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc
new file mode 100644
index 0000000..b7e000a
--- /dev/null
+++ b/compiler/debug/elf_debug_writer.cc
@@ -0,0 +1,214 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "elf_debug_writer.h"
+
+#include <vector>
+
+#include "debug/dwarf/dwarf_constants.h"
+#include "debug/elf_compilation_unit.h"
+#include "debug/elf_debug_frame_writer.h"
+#include "debug/elf_debug_info_writer.h"
+#include "debug/elf_debug_line_writer.h"
+#include "debug/elf_debug_loc_writer.h"
+#include "debug/elf_gnu_debugdata_writer.h"
+#include "debug/elf_symtab_writer.h"
+#include "debug/method_debug_info.h"
+#include "elf_builder.h"
+#include "linker/vector_output_stream.h"
+#include "utils/array_ref.h"
+
+namespace art {
+namespace debug {
+
+template <typename ElfTypes>
+void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
+                    const ArrayRef<const MethodDebugInfo>& method_infos,
+                    dwarf::CFIFormat cfi_format,
+                    bool write_oat_patches) {
+  // Write .strtab and .symtab.
+  WriteDebugSymbols(builder, method_infos, true /* with_signature */);
+
+  // Write .debug_frame.
+  WriteCFISection(builder, method_infos, cfi_format, write_oat_patches);
+
+  // Group the methods into compilation units based on source file.
+  std::vector<ElfCompilationUnit> compilation_units;
+  const char* last_source_file = nullptr;
+  for (const MethodDebugInfo& mi : method_infos) {
+    if (mi.dex_file != nullptr) {
+      auto& dex_class_def = mi.dex_file->GetClassDef(mi.class_def_index);
+      const char* source_file = mi.dex_file->GetSourceFile(dex_class_def);
+      if (compilation_units.empty() || source_file != last_source_file) {
+        compilation_units.push_back(ElfCompilationUnit());
+      }
+      ElfCompilationUnit& cu = compilation_units.back();
+      cu.methods.push_back(&mi);
+      // All methods must have the same addressing mode otherwise the min/max below does not work.
+      DCHECK_EQ(cu.methods.front()->is_code_address_text_relative, mi.is_code_address_text_relative);
+      cu.is_code_address_text_relative = mi.is_code_address_text_relative;
+      cu.code_address = std::min(cu.code_address, mi.code_address);
+      cu.code_end = std::max(cu.code_end, mi.code_address + mi.code_size);
+      last_source_file = source_file;
+    }
+  }
+
+  // Write .debug_line section.
+  if (!compilation_units.empty()) {
+    ElfDebugLineWriter<ElfTypes> line_writer(builder);
+    line_writer.Start();
+    for (auto& compilation_unit : compilation_units) {
+      line_writer.WriteCompilationUnit(compilation_unit);
+    }
+    line_writer.End(write_oat_patches);
+  }
+
+  // Write .debug_info section.
+  if (!compilation_units.empty()) {
+    ElfDebugInfoWriter<ElfTypes> info_writer(builder);
+    info_writer.Start();
+    for (const auto& compilation_unit : compilation_units) {
+      ElfCompilationUnitWriter<ElfTypes> cu_writer(&info_writer);
+      cu_writer.Write(compilation_unit);
+    }
+    info_writer.End(write_oat_patches);
+  }
+}
+
+std::vector<uint8_t> MakeMiniDebugInfo(
+    InstructionSet isa,
+    const InstructionSetFeatures* features,
+    size_t rodata_size,
+    size_t text_size,
+    const ArrayRef<const MethodDebugInfo>& method_infos) {
+  if (Is64BitInstructionSet(isa)) {
+    return MakeMiniDebugInfoInternal<ElfTypes64>(isa,
+                                                 features,
+                                                 rodata_size,
+                                                 text_size,
+                                                 method_infos);
+  } else {
+    return MakeMiniDebugInfoInternal<ElfTypes32>(isa,
+                                                 features,
+                                                 rodata_size,
+                                                 text_size,
+                                                 method_infos);
+  }
+}
+
+template <typename ElfTypes>
+static std::vector<uint8_t> WriteDebugElfFileForMethodsInternal(
+    InstructionSet isa,
+    const InstructionSetFeatures* features,
+    const ArrayRef<const MethodDebugInfo>& method_infos) {
+  std::vector<uint8_t> buffer;
+  buffer.reserve(KB);
+  VectorOutputStream out("Debug ELF file", &buffer);
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, features, &out));
+  // No program headers since the ELF file is not linked and has no allocated sections.
+  builder->Start(false /* write_program_headers */);
+  WriteDebugInfo(builder.get(),
+                 method_infos,
+                 dwarf::DW_DEBUG_FRAME_FORMAT,
+                 false /* write_oat_patches */);
+  builder->End();
+  CHECK(builder->Good());
+  return buffer;
+}
+
+std::vector<uint8_t> WriteDebugElfFileForMethods(
+    InstructionSet isa,
+    const InstructionSetFeatures* features,
+    const ArrayRef<const MethodDebugInfo>& method_infos) {
+  if (Is64BitInstructionSet(isa)) {
+    return WriteDebugElfFileForMethodsInternal<ElfTypes64>(isa, features, method_infos);
+  } else {
+    return WriteDebugElfFileForMethodsInternal<ElfTypes32>(isa, features, method_infos);
+  }
+}
+
+template <typename ElfTypes>
+static std::vector<uint8_t> WriteDebugElfFileForClassesInternal(
+    InstructionSet isa,
+    const InstructionSetFeatures* features,
+    const ArrayRef<mirror::Class*>& types)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  std::vector<uint8_t> buffer;
+  buffer.reserve(KB);
+  VectorOutputStream out("Debug ELF file", &buffer);
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, features, &out));
+  // No program headers since the ELF file is not linked and has no allocated sections.
+  builder->Start(false /* write_program_headers */);
+  ElfDebugInfoWriter<ElfTypes> info_writer(builder.get());
+  info_writer.Start();
+  ElfCompilationUnitWriter<ElfTypes> cu_writer(&info_writer);
+  cu_writer.Write(types);
+  info_writer.End(false /* write_oat_patches */);
+
+  builder->End();
+  CHECK(builder->Good());
+  return buffer;
+}
+
+std::vector<uint8_t> WriteDebugElfFileForClasses(InstructionSet isa,
+                                                 const InstructionSetFeatures* features,
+                                                 const ArrayRef<mirror::Class*>& types) {
+  if (Is64BitInstructionSet(isa)) {
+    return WriteDebugElfFileForClassesInternal<ElfTypes64>(isa, features, types);
+  } else {
+    return WriteDebugElfFileForClassesInternal<ElfTypes32>(isa, features, types);
+  }
+}
+
+std::vector<MethodDebugInfo> MakeTrampolineInfos(const OatHeader& header) {
+  std::map<const char*, uint32_t> trampolines = {
+    { "interpreterToInterpreterBridge", header.GetInterpreterToInterpreterBridgeOffset() },
+    { "interpreterToCompiledCodeBridge", header.GetInterpreterToCompiledCodeBridgeOffset() },
+    { "jniDlsymLookup", header.GetJniDlsymLookupOffset() },
+    { "quickGenericJniTrampoline", header.GetQuickGenericJniTrampolineOffset() },
+    { "quickImtConflictTrampoline", header.GetQuickImtConflictTrampolineOffset() },
+    { "quickResolutionTrampoline", header.GetQuickResolutionTrampolineOffset() },
+    { "quickToInterpreterBridge", header.GetQuickToInterpreterBridgeOffset() },
+  };
+  std::vector<MethodDebugInfo> result;
+  for (const auto& it : trampolines) {
+    if (it.second != 0) {
+      MethodDebugInfo info = MethodDebugInfo();
+      info.trampoline_name = it.first;
+      info.isa = header.GetInstructionSet();
+      info.is_code_address_text_relative = true;
+      info.code_address = it.second - header.GetExecutableOffset();
+      info.code_size = 0;  // The symbol lasts until the next symbol.
+      result.push_back(std::move(info));
+    }
+  }
+  return result;
+}
+
+// Explicit instantiations
+template void WriteDebugInfo<ElfTypes32>(
+    ElfBuilder<ElfTypes32>* builder,
+    const ArrayRef<const MethodDebugInfo>& method_infos,
+    dwarf::CFIFormat cfi_format,
+    bool write_oat_patches);
+template void WriteDebugInfo<ElfTypes64>(
+    ElfBuilder<ElfTypes64>* builder,
+    const ArrayRef<const MethodDebugInfo>& method_infos,
+    dwarf::CFIFormat cfi_format,
+    bool write_oat_patches);
+
+}  // namespace debug
+}  // namespace art
diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h
new file mode 100644
index 0000000..6f52249
--- /dev/null
+++ b/compiler/debug/elf_debug_writer.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_ELF_DEBUG_WRITER_H_
+#define ART_COMPILER_DEBUG_ELF_DEBUG_WRITER_H_
+
+#include <vector>
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "debug/dwarf/dwarf_constants.h"
+#include "elf_builder.h"
+#include "utils/array_ref.h"
+
+namespace art {
+class OatHeader;
+namespace mirror {
+class Class;
+}
+namespace debug {
+struct MethodDebugInfo;
+
+template <typename ElfTypes>
+void WriteDebugInfo(
+    ElfBuilder<ElfTypes>* builder,
+    const ArrayRef<const MethodDebugInfo>& method_infos,
+    dwarf::CFIFormat cfi_format,
+    bool write_oat_patches);
+
+std::vector<uint8_t> MakeMiniDebugInfo(
+    InstructionSet isa,
+    const InstructionSetFeatures* features,
+    size_t rodata_section_size,
+    size_t text_section_size,
+    const ArrayRef<const MethodDebugInfo>& method_infos);
+
+std::vector<uint8_t> WriteDebugElfFileForMethods(
+    InstructionSet isa,
+    const InstructionSetFeatures* features,
+    const ArrayRef<const MethodDebugInfo>& method_infos);
+
+std::vector<uint8_t> WriteDebugElfFileForClasses(
+    InstructionSet isa,
+    const InstructionSetFeatures* features,
+    const ArrayRef<mirror::Class*>& types)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+std::vector<MethodDebugInfo> MakeTrampolineInfos(const OatHeader& oat_header);
+
+}  // namespace debug
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_ELF_DEBUG_WRITER_H_
diff --git a/compiler/debug/elf_gnu_debugdata_writer.h b/compiler/debug/elf_gnu_debugdata_writer.h
new file mode 100644
index 0000000..fb63d62
--- /dev/null
+++ b/compiler/debug/elf_gnu_debugdata_writer.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_ELF_GNU_DEBUGDATA_WRITER_H_
+#define ART_COMPILER_DEBUG_ELF_GNU_DEBUGDATA_WRITER_H_
+
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "elf_builder.h"
+#include "linker/vector_output_stream.h"
+
+// liblzma.
+#include "7zCrc.h"
+#include "XzCrc64.h"
+#include "XzEnc.h"
+
+namespace art {
+namespace debug {
+
+static void XzCompress(const std::vector<uint8_t>* src, std::vector<uint8_t>* dst) {
+  // Configure the compression library.
+  CrcGenerateTable();
+  Crc64GenerateTable();
+  CLzma2EncProps lzma2Props;
+  Lzma2EncProps_Init(&lzma2Props);
+  lzma2Props.lzmaProps.level = 1;  // Fast compression.
+  Lzma2EncProps_Normalize(&lzma2Props);
+  CXzProps props;
+  XzProps_Init(&props);
+  props.lzma2Props = &lzma2Props;
+  // Implement the required interface for communication (written in C so no virtual methods).
+  struct XzCallbacks : public ISeqInStream, public ISeqOutStream, public ICompressProgress {
+    static SRes ReadImpl(void* p, void* buf, size_t* size) {
+      auto* ctx = static_cast<XzCallbacks*>(reinterpret_cast<ISeqInStream*>(p));
+      *size = std::min(*size, ctx->src_->size() - ctx->src_pos_);
+      memcpy(buf, ctx->src_->data() + ctx->src_pos_, *size);
+      ctx->src_pos_ += *size;
+      return SZ_OK;
+    }
+    static size_t WriteImpl(void* p, const void* buf, size_t size) {
+      auto* ctx = static_cast<XzCallbacks*>(reinterpret_cast<ISeqOutStream*>(p));
+      const uint8_t* buffer = reinterpret_cast<const uint8_t*>(buf);
+      ctx->dst_->insert(ctx->dst_->end(), buffer, buffer + size);
+      return size;
+    }
+    static SRes ProgressImpl(void* , UInt64, UInt64) {
+      return SZ_OK;
+    }
+    size_t src_pos_;
+    const std::vector<uint8_t>* src_;
+    std::vector<uint8_t>* dst_;
+  };
+  XzCallbacks callbacks;
+  callbacks.Read = XzCallbacks::ReadImpl;
+  callbacks.Write = XzCallbacks::WriteImpl;
+  callbacks.Progress = XzCallbacks::ProgressImpl;
+  callbacks.src_pos_ = 0;
+  callbacks.src_ = src;
+  callbacks.dst_ = dst;
+  // Compress.
+  SRes res = Xz_Encode(&callbacks, &callbacks, &props, &callbacks);
+  CHECK_EQ(res, SZ_OK);
+}
+
+template <typename ElfTypes>
+static std::vector<uint8_t> MakeMiniDebugInfoInternal(
+    InstructionSet isa,
+    const InstructionSetFeatures* features,
+    size_t rodata_section_size,
+    size_t text_section_size,
+    const ArrayRef<const MethodDebugInfo>& method_infos) {
+  std::vector<uint8_t> buffer;
+  buffer.reserve(KB);
+  VectorOutputStream out("Mini-debug-info ELF file", &buffer);
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, features, &out));
+  builder->Start();
+  // Mirror .rodata and .text as NOBITS sections.
+  // It is needed to detected relocations after compression.
+  builder->GetRoData()->WriteNoBitsSection(rodata_section_size);
+  builder->GetText()->WriteNoBitsSection(text_section_size);
+  WriteDebugSymbols(builder.get(), method_infos, false /* with_signature */);
+  WriteCFISection(builder.get(),
+                  method_infos,
+                  dwarf::DW_DEBUG_FRAME_FORMAT,
+                  false /* write_oat_paches */);
+  builder->End();
+  CHECK(builder->Good());
+  std::vector<uint8_t> compressed_buffer;
+  compressed_buffer.reserve(buffer.size() / 4);
+  XzCompress(&buffer, &compressed_buffer);
+  return compressed_buffer;
+}
+
+}  // namespace debug
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_ELF_GNU_DEBUGDATA_WRITER_H_
+
diff --git a/compiler/debug/elf_symtab_writer.h b/compiler/debug/elf_symtab_writer.h
new file mode 100644
index 0000000..045eddd
--- /dev/null
+++ b/compiler/debug/elf_symtab_writer.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_ELF_SYMTAB_WRITER_H_
+#define ART_COMPILER_DEBUG_ELF_SYMTAB_WRITER_H_
+
+#include <unordered_set>
+
+#include "debug/method_debug_info.h"
+#include "elf_builder.h"
+#include "utils.h"
+
+namespace art {
+namespace debug {
+
+// The ARM specification defines three special mapping symbols
+// $a, $t and $d which mark ARM, Thumb and data ranges respectively.
+// These symbols can be used by tools, for example, to pretty
+// print instructions correctly.  Objdump will use them if they
+// exist, but it will still work well without them.
+// However, these extra symbols take space, so let's just generate
+// one symbol which marks the whole .text section as code.
+constexpr bool kGenerateSingleArmMappingSymbol = true;
+
+template <typename ElfTypes>
+static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
+                              const ArrayRef<const MethodDebugInfo>& method_infos,
+                              bool with_signature) {
+  uint64_t mapping_symbol_address = std::numeric_limits<uint64_t>::max();
+  auto* strtab = builder->GetStrTab();
+  auto* symtab = builder->GetSymTab();
+
+  if (method_infos.empty()) {
+    return;
+  }
+
+  // Find all addresses which contain deduped methods.
+  // The first instance of method is not marked deduped_, but the rest is.
+  std::unordered_set<uint64_t> deduped_addresses;
+  for (const MethodDebugInfo& info : method_infos) {
+    if (info.deduped) {
+      deduped_addresses.insert(info.code_address);
+    }
+  }
+
+  strtab->Start();
+  strtab->Write("");  // strtab should start with empty string.
+  std::string last_name;
+  size_t last_name_offset = 0;
+  for (const MethodDebugInfo& info : method_infos) {
+    if (info.deduped) {
+      continue;  // Add symbol only for the first instance.
+    }
+    size_t name_offset;
+    if (info.trampoline_name != nullptr) {
+      name_offset = strtab->Write(info.trampoline_name);
+    } else {
+      DCHECK(info.dex_file != nullptr);
+      std::string name = PrettyMethod(info.dex_method_index, *info.dex_file, with_signature);
+      if (deduped_addresses.find(info.code_address) != deduped_addresses.end()) {
+        name += " [DEDUPED]";
+      }
+      // If we write method names without signature, we might see the same name multiple times.
+      name_offset = (name == last_name ? last_name_offset : strtab->Write(name));
+      last_name = std::move(name);
+      last_name_offset = name_offset;
+    }
+
+    const auto* text = info.is_code_address_text_relative ? builder->GetText() : nullptr;
+    uint64_t address = info.code_address + (text != nullptr ? text->GetAddress() : 0);
+    // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
+    address += CompiledMethod::CodeDelta(info.isa);
+    symtab->Add(name_offset, text, address, info.code_size, STB_GLOBAL, STT_FUNC);
+
+    // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
+    // instructions, so that disassembler tools can correctly disassemble.
+    // Note that even if we generate just a single mapping symbol, ARM's Streamline
+    // requires it to match function symbol.  Just address 0 does not work.
+    if (info.isa == kThumb2) {
+      if (address < mapping_symbol_address || !kGenerateSingleArmMappingSymbol) {
+        symtab->Add(strtab->Write("$t"), text, address & ~1, 0, STB_LOCAL, STT_NOTYPE);
+        mapping_symbol_address = address;
+      }
+    }
+  }
+  strtab->End();
+
+  // Symbols are buffered and written after names (because they are smaller).
+  // We could also do two passes in this function to avoid the buffering.
+  symtab->Start();
+  symtab->Write();
+  symtab->End();
+}
+
+}  // namespace debug
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_ELF_SYMTAB_WRITER_H_
+
diff --git a/compiler/debug/method_debug_info.h b/compiler/debug/method_debug_info.h
new file mode 100644
index 0000000..ed1da2c
--- /dev/null
+++ b/compiler/debug/method_debug_info.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_METHOD_DEBUG_INFO_H_
+#define ART_COMPILER_DEBUG_METHOD_DEBUG_INFO_H_
+
+#include "compiled_method.h"
+#include "dex_file.h"
+
+namespace art {
+namespace debug {
+
+struct MethodDebugInfo {
+  const char* trampoline_name;
+  const DexFile* dex_file;  // Native methods (trampolines) do not reference dex file.
+  size_t class_def_index;
+  uint32_t dex_method_index;
+  uint32_t access_flags;
+  const DexFile::CodeItem* code_item;
+  InstructionSet isa;
+  bool deduped;
+  bool is_native_debuggable;
+  bool is_optimized;
+  bool is_code_address_text_relative;  // Is the address offset from start of .text section?
+  uint64_t code_address;
+  uint32_t code_size;
+  uint32_t frame_size_in_bytes;
+  const void* code_info;
+  ArrayRef<const uint8_t> cfi;
+};
+
+}  // namespace debug
+}  // namespace art
+
+#endif  // ART_COMPILER_DEBUG_METHOD_DEBUG_INFO_H_
diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc
deleted file mode 100644
index 11a7e44..0000000
--- a/compiler/dex/bb_optimizations.cc
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "bb_optimizations.h"
-#include "dataflow_iterator.h"
-#include "dataflow_iterator-inl.h"
-
-namespace art {
-
-/*
- * Code Layout pass implementation start.
- */
-bool CodeLayout::Worker(PassDataHolder* data) const {
-  DCHECK(data != nullptr);
-  PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
-  CompilationUnit* c_unit = pass_me_data_holder->c_unit;
-  DCHECK(c_unit != nullptr);
-  BasicBlock* bb = pass_me_data_holder->bb;
-  DCHECK(bb != nullptr);
-  c_unit->mir_graph->LayoutBlocks(bb);
-  // No need of repeating, so just return false.
-  return false;
-}
-
-/*
- * BasicBlock Combine pass implementation start.
- */
-bool BBCombine::Worker(PassDataHolder* data) const {
-  DCHECK(data != nullptr);
-  PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
-  CompilationUnit* c_unit = pass_me_data_holder->c_unit;
-  DCHECK(c_unit != nullptr);
-  BasicBlock* bb = pass_me_data_holder->bb;
-  DCHECK(bb != nullptr);
-  c_unit->mir_graph->CombineBlocks(bb);
-
-  // No need of repeating, so just return false.
-  return false;
-}
-
-/*
- * MethodUseCount pass implementation start.
- */
-bool MethodUseCount::Gate(const PassDataHolder* data) const {
-  DCHECK(data != nullptr);
-  CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-  DCHECK(c_unit != nullptr);
-  // First initialize the data.
-  c_unit->mir_graph->InitializeMethodUses();
-
-  // Now check if the pass is to be ignored.
-  bool res = ((c_unit->disable_opt & (1 << kPromoteRegs)) == 0);
-
-  return res;
-}
-
-bool MethodUseCount::Worker(PassDataHolder* data) const {
-  DCHECK(data != nullptr);
-  PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
-  CompilationUnit* c_unit = pass_me_data_holder->c_unit;
-  DCHECK(c_unit != nullptr);
-  BasicBlock* bb = pass_me_data_holder->bb;
-  DCHECK(bb != nullptr);
-  c_unit->mir_graph->CountUses(bb);
-  // No need of repeating, so just return false.
-  return false;
-}
-
-}  // namespace art
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
deleted file mode 100644
index 02d5327..0000000
--- a/compiler/dex/bb_optimizations.h
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_BB_OPTIMIZATIONS_H_
-#define ART_COMPILER_DEX_BB_OPTIMIZATIONS_H_
-
-#include "base/casts.h"
-#include "compiler_ir.h"
-#include "dex_flags.h"
-#include "pass_me.h"
-#include "mir_graph.h"
-
-namespace art {
-
-/**
- * @class String Change
- * @brief Converts calls to String.<init> to StringFactory instead.
- */
-class StringChange : public PassME {
- public:
-  StringChange() : PassME("StringChange", kNoNodes) {
-  }
-
-  void Start(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->StringChange();
-  }
-
-  bool Gate(const PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    return c_unit->mir_graph->HasInvokes();
-  }
-};
-
-/**
- * @class CacheFieldLoweringInfo
- * @brief Cache the lowering info for fields used by IGET/IPUT/SGET/SPUT insns.
- */
-class CacheFieldLoweringInfo : public PassME {
- public:
-  CacheFieldLoweringInfo() : PassME("CacheFieldLoweringInfo", kNoNodes) {
-  }
-
-  void Start(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->DoCacheFieldLoweringInfo();
-  }
-
-  bool Gate(const PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    return c_unit->mir_graph->HasFieldAccess();
-  }
-};
-
-/**
- * @class CacheMethodLoweringInfo
- * @brief Cache the lowering info for methods called by INVOKEs.
- */
-class CacheMethodLoweringInfo : public PassME {
- public:
-  CacheMethodLoweringInfo() : PassME("CacheMethodLoweringInfo", kNoNodes) {
-  }
-
-  void Start(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->DoCacheMethodLoweringInfo();
-  }
-
-  bool Gate(const PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    return c_unit->mir_graph->HasInvokes();
-  }
-};
-
-/**
- * @class SpecialMethodInliner
- * @brief Performs method inlining pass on special kinds of methods.
- * @details Special methods are methods that fall in one of the following categories:
- * empty, instance getter, instance setter, argument return, and constant return.
- */
-class SpecialMethodInliner : public PassME {
- public:
-  SpecialMethodInliner() : PassME("SpecialMethodInliner") {
-  }
-
-  bool Gate(const PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    return c_unit->mir_graph->InlineSpecialMethodsGate();
-  }
-
-  void Start(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->InlineSpecialMethodsStart();
-  }
-
-  bool Worker(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
-    CompilationUnit* c_unit = pass_me_data_holder->c_unit;
-    DCHECK(c_unit != nullptr);
-    BasicBlock* bb = pass_me_data_holder->bb;
-    DCHECK(bb != nullptr);
-    c_unit->mir_graph->InlineSpecialMethods(bb);
-    // No need of repeating, so just return false.
-    return false;
-  }
-
-  void End(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->InlineSpecialMethodsEnd();
-  }
-};
-
-/**
- * @class CodeLayout
- * @brief Perform the code layout pass.
- */
-class CodeLayout : public PassME {
- public:
-  CodeLayout() : PassME("CodeLayout", kAllNodes, kOptimizationBasicBlockChange, "2_post_layout_cfg") {
-  }
-
-  void Start(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->VerifyDataflow();
-    c_unit->mir_graph->ClearAllVisitedFlags();
-  }
-
-  bool Worker(PassDataHolder* data) const;
-};
-
-/**
- * @class NullCheckElimination
- * @brief Null check elimination pass.
- */
-class NullCheckElimination : public PassME {
- public:
-  NullCheckElimination()
-    : PassME("NCE", kRepeatingPreOrderDFSTraversal, "3_post_nce_cfg") {
-  }
-
-  bool Gate(const PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    return c_unit->mir_graph->EliminateNullChecksGate();
-  }
-
-  bool Worker(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
-    CompilationUnit* c_unit = pass_me_data_holder->c_unit;
-    DCHECK(c_unit != nullptr);
-    BasicBlock* bb = pass_me_data_holder->bb;
-    DCHECK(bb != nullptr);
-    return c_unit->mir_graph->EliminateNullChecks(bb);
-  }
-
-  void End(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->EliminateNullChecksEnd();
-  }
-};
-
-class ClassInitCheckElimination : public PassME {
- public:
-  ClassInitCheckElimination()
-    : PassME("ClInitCheckElimination", kRepeatingPreOrderDFSTraversal) {
-  }
-
-  bool Gate(const PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    return c_unit->mir_graph->EliminateClassInitChecksGate();
-  }
-
-  bool Worker(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
-    CompilationUnit* c_unit = pass_me_data_holder->c_unit;
-    DCHECK(c_unit != nullptr);
-    BasicBlock* bb = pass_me_data_holder->bb;
-    DCHECK(bb != nullptr);
-    return c_unit->mir_graph->EliminateClassInitChecks(bb);
-  }
-
-  void End(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->EliminateClassInitChecksEnd();
-  }
-};
-
-/**
- * @class GlobalValueNumberingPass
- * @brief Performs the global value numbering pass.
- */
-class GlobalValueNumberingPass : public PassME {
- public:
-  GlobalValueNumberingPass()
-    : PassME("GVN", kLoopRepeatingTopologicalSortTraversal, "4_post_gvn_cfg") {
-  }
-
-  bool Gate(const PassDataHolder* data) const OVERRIDE {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    return c_unit->mir_graph->ApplyGlobalValueNumberingGate();
-  }
-
-  bool Worker(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
-    CompilationUnit* c_unit = pass_me_data_holder->c_unit;
-    DCHECK(c_unit != nullptr);
-    BasicBlock* bb = pass_me_data_holder->bb;
-    DCHECK(bb != nullptr);
-    return c_unit->mir_graph->ApplyGlobalValueNumbering(bb);
-  }
-
-  void End(PassDataHolder* data) const OVERRIDE {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->ApplyGlobalValueNumberingEnd();
-  }
-};
-
-/**
- * @class DeadCodeEliminationPass
- * @brief Performs the GVN-based dead code elimination pass.
- */
-class DeadCodeEliminationPass : public PassME {
- public:
-  DeadCodeEliminationPass() : PassME("DCE", kPreOrderDFSTraversal, "4_post_dce_cfg") {
-  }
-
-  bool Gate(const PassDataHolder* data) const OVERRIDE {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    return c_unit->mir_graph->EliminateDeadCodeGate();
-  }
-
-  bool Worker(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
-    CompilationUnit* c_unit = pass_me_data_holder->c_unit;
-    DCHECK(c_unit != nullptr);
-    BasicBlock* bb = pass_me_data_holder->bb;
-    DCHECK(bb != nullptr);
-    return c_unit->mir_graph->EliminateDeadCode(bb);
-  }
-
-  void End(PassDataHolder* data) const OVERRIDE {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->EliminateDeadCodeEnd();
-  }
-};
-
-/**
- * @class GlobalValueNumberingCleanupPass
- * @brief Performs the cleanup after global value numbering pass and the dependent
- *        dead code elimination pass that needs the GVN data.
- */
-class GlobalValueNumberingCleanupPass : public PassME {
- public:
-  GlobalValueNumberingCleanupPass()
-    : PassME("GVNCleanup", kNoNodes, "") {
-  }
-
-  void Start(PassDataHolder* data) const OVERRIDE {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    return c_unit->mir_graph->GlobalValueNumberingCleanup();
-  }
-};
-
-/**
- * @class BBCombine
- * @brief Perform the basic block combination pass.
- */
-class BBCombine : public PassME {
- public:
-  BBCombine() : PassME("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") {
-  }
-
-  bool Gate(const PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    return c_unit->mir_graph->HasTryCatchBlocks() ||
-        ((c_unit->disable_opt & (1 << kSuppressExceptionEdges)) != 0);
-  }
-
-  bool Worker(PassDataHolder* data) const;
-};
-
-/**
- * @class ConstantPropagation
- * @brief Perform a constant propagation pass.
- */
-class ConstantPropagation : public PassME {
- public:
-  ConstantPropagation() : PassME("ConstantPropagation") {
-  }
-
-  void Start(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->InitializeConstantPropagation();
-  }
-
-  bool Worker(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    BasicBlock* bb = down_cast<PassMEDataHolder*>(data)->bb;
-    DCHECK(bb != nullptr);
-    c_unit->mir_graph->DoConstantPropagation(bb);
-    // No need of repeating, so just return false.
-    return false;
-  }
-};
-
-/**
- * @class MethodUseCount
- * @brief Count the register uses of the method
- */
-class MethodUseCount : public PassME {
- public:
-  MethodUseCount() : PassME("UseCount") {
-  }
-
-  bool Worker(PassDataHolder* data) const;
-
-  bool Gate(const PassDataHolder* data) const;
-};
-
-/**
- * @class BasicBlock Optimizations
- * @brief Any simple BasicBlock optimization can be put here.
- */
-class BBOptimizations : public PassME {
- public:
-  BBOptimizations()
-      : PassME("BBOptimizations", kNoNodes, kOptimizationBasicBlockChange, "5_post_bbo_cfg") {
-  }
-
-  bool Gate(const PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    return ((c_unit->disable_opt & (1 << kBBOpt)) == 0);
-  }
-
-  void Start(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->BasicBlockOptimizationStart();
-
-    /*
-     * This pass has a different ordering depending on the suppress exception,
-     * so do the pass here for now:
-     *   - Later, the Start should just change the ordering and we can move the extended
-     *     creation into the pass driver's main job with a new iterator
-     */
-    c_unit->mir_graph->BasicBlockOptimization();
-  }
-
-  void End(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->BasicBlockOptimizationEnd();
-    down_cast<PassMEDataHolder*>(data)->dirty = !c_unit->mir_graph->DfsOrdersUpToDate();
-  }
-};
-
-/**
- * @class SuspendCheckElimination
- * @brief Any simple BasicBlock optimization can be put here.
- */
-class SuspendCheckElimination : public PassME {
- public:
-  SuspendCheckElimination()
-    : PassME("SuspendCheckElimination", kTopologicalSortTraversal, "6_post_sce_cfg") {
-  }
-
-  bool Gate(const PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    return c_unit->mir_graph->EliminateSuspendChecksGate();
-  }
-
-  bool Worker(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
-    CompilationUnit* c_unit = pass_me_data_holder->c_unit;
-    DCHECK(c_unit != nullptr);
-    BasicBlock* bb = pass_me_data_holder->bb;
-    DCHECK(bb != nullptr);
-    return c_unit->mir_graph->EliminateSuspendChecks(bb);
-  }
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_BB_OPTIMIZATIONS_H_
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
deleted file mode 100644
index b78b3d7..0000000
--- a/compiler/dex/compiler_enums.h
+++ /dev/null
@@ -1,676 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_COMPILER_ENUMS_H_
-#define ART_COMPILER_DEX_COMPILER_ENUMS_H_
-
-#include "dex_instruction.h"
-
-namespace art {
-
-enum RegisterClass {
-  kInvalidRegClass,
-  kCoreReg,
-  kFPReg,
-  kRefReg,
-  kAnyReg,
-};
-std::ostream& operator<<(std::ostream& os, const RegisterClass& rhs);
-
-enum BitsUsed {
-  kSize32Bits,
-  kSize64Bits,
-  kSize128Bits,
-  kSize256Bits,
-  kSize512Bits,
-  kSize1024Bits,
-};
-std::ostream& operator<<(std::ostream& os, const BitsUsed& rhs);
-
-enum SpecialTargetRegister {
-  kSelf,            // Thread pointer.
-  kSuspend,         // Used to reduce suspend checks for some targets.
-  kLr,
-  kPc,
-  kSp,
-  kArg0,
-  kArg1,
-  kArg2,
-  kArg3,
-  kArg4,
-  kArg5,
-  kArg6,
-  kArg7,
-  kFArg0,
-  kFArg1,
-  kFArg2,
-  kFArg3,
-  kFArg4,
-  kFArg5,
-  kFArg6,
-  kFArg7,
-  kFArg8,
-  kFArg9,
-  kFArg10,
-  kFArg11,
-  kFArg12,
-  kFArg13,
-  kFArg14,
-  kFArg15,
-  kRet0,
-  kRet1,
-  kInvokeTgt,
-  kHiddenArg,
-  kHiddenFpArg,
-  kCount
-};
-std::ostream& operator<<(std::ostream& os, const SpecialTargetRegister& code);
-
-enum RegLocationType {
-  kLocDalvikFrame = 0,  // Normal Dalvik register
-  kLocPhysReg,
-  kLocCompilerTemp,
-  kLocInvalid
-};
-std::ostream& operator<<(std::ostream& os, const RegLocationType& rhs);
-
-enum BBType {
-  kNullBlock,
-  kEntryBlock,
-  kDalvikByteCode,
-  kExitBlock,
-  kExceptionHandling,
-  kDead,
-};
-std::ostream& operator<<(std::ostream& os, const BBType& code);
-
-// Shared pseudo opcodes - must be < 0.
-enum LIRPseudoOpcode {
-  kPseudoPrologueBegin = -18,
-  kPseudoPrologueEnd = -17,
-  kPseudoEpilogueBegin = -16,
-  kPseudoEpilogueEnd = -15,
-  kPseudoExportedPC = -14,
-  kPseudoSafepointPC = -13,
-  kPseudoIntrinsicRetry = -12,
-  kPseudoSuspendTarget = -11,
-  kPseudoThrowTarget = -10,
-  kPseudoCaseLabel = -9,
-  kPseudoBarrier = -8,
-  kPseudoEntryBlock = -7,
-  kPseudoExitBlock = -6,
-  kPseudoTargetLabel = -5,
-  kPseudoDalvikByteCodeBoundary = -4,
-  kPseudoPseudoAlign4 = -3,
-  kPseudoEHBlockLabel = -2,
-  kPseudoNormalBlockLabel = -1,
-};
-std::ostream& operator<<(std::ostream& os, const LIRPseudoOpcode& rhs);
-
-enum ExtendedMIROpcode {
-  kMirOpFirst = kNumPackedOpcodes,
-  kMirOpPhi = kMirOpFirst,
-
-  // @brief Copy from one VR to another.
-  // @details
-  // vA: destination VR
-  // vB: source VR
-  kMirOpCopy,
-
-  // @brief Used to do float comparison with less-than bias.
-  // @details Unlike cmpl-float, this does not store result of comparison in VR.
-  // vA: left-hand side VR for comparison.
-  // vB: right-hand side VR for comparison.
-  kMirOpFusedCmplFloat,
-
-  // @brief Used to do float comparison with greater-than bias.
-  // @details Unlike cmpg-float, this does not store result of comparison in VR.
-  // vA: left-hand side VR for comparison.
-  // vB: right-hand side VR for comparison.
-  kMirOpFusedCmpgFloat,
-
-  // @brief Used to do double comparison with less-than bias.
-  // @details Unlike cmpl-double, this does not store result of comparison in VR.
-  // vA: left-hand side wide VR for comparison.
-  // vB: right-hand side wide VR for comparison.
-  kMirOpFusedCmplDouble,
-
-  // @brief Used to do double comparison with greater-than bias.
-  // @details Unlike cmpl-double, this does not store result of comparison in VR.
-  // vA: left-hand side wide VR for comparison.
-  // vB: right-hand side wide VR for comparison.
-  kMirOpFusedCmpgDouble,
-
-  // @brief Used to do comparison of 64-bit long integers.
-  // @details Unlike cmp-long, this does not store result of comparison in VR.
-  // vA: left-hand side wide VR for comparison.
-  // vB: right-hand side wide VR for comparison.
-  kMirOpFusedCmpLong,
-
-  // @brief This represents no-op.
-  kMirOpNop,
-
-  // @brief Do a null check on the object register.
-  // @details The backends may implement this implicitly or explicitly. This MIR is guaranteed
-  // to have the correct offset as an exception thrower.
-  // vA: object register
-  kMirOpNullCheck,
-
-  kMirOpRangeCheck,
-  kMirOpDivZeroCheck,
-  kMirOpCheck,
-  kMirOpSelect,
-
-  // Vector opcodes:
-  // TypeSize is an encoded field giving the element type and the vector size.
-  // It is encoded as OpSize << 16 | (number of bits in vector)
-  //
-  // Destination and source are integers that will be interpreted by the
-  // backend that supports Vector operations.  Backends are permitted to support only
-  // certain vector register sizes.
-  //
-  // At this point, only two operand instructions are supported.  Three operand instructions
-  // could be supported by using a bit in TypeSize and arg[0] where needed.
-
-  // @brief MIR to move constant data to a vector register
-  // vA: destination
-  // vB: number of bits in register
-  // args[0]~args[3]: up to 128 bits of data for initialization
-  kMirOpConstVector,
-
-  // @brief MIR to move a vectorized register to another
-  // vA: destination
-  // vB: source
-  // vC: TypeSize
-  kMirOpMoveVector,
-
-  // @brief Packed multiply of units in two vector registers: vB = vB .* vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedMultiply,
-
-  // @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedAddition,
-
-  // @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedSubtract,
-
-  // @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: amount to shift
-  // vC: TypeSize
-  kMirOpPackedShiftLeft,
-
-  // @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: amount to shift
-  // vC: TypeSize
-  kMirOpPackedSignedShiftRight,
-
-  // @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: amount to shift
-  // vC: TypeSize
-  kMirOpPackedUnsignedShiftRight,
-
-  // @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedAnd,
-
-  // @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedOr,
-
-  // @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedXor,
-
-  // @brief Reduce a 128-bit packed element into a single VR by taking lower bits
-  // @details Instruction does a horizontal addition of the packed elements and then adds it to VR
-  // vA: destination and source VR (not vector register)
-  // vB: source (vector register)
-  // vC: TypeSize
-  kMirOpPackedAddReduce,
-
-  // @brief Extract a packed element into a single VR.
-  // vA: destination VR (not vector register)
-  // vB: source (vector register)
-  // vC: TypeSize
-  // arg[0]: The index to use for extraction from vector register (which packed element)
-  kMirOpPackedReduce,
-
-  // @brief Create a vector value, with all TypeSize values equal to vC
-  // vA: destination vector register
-  // vB: source VR (not vector register)
-  // vC: TypeSize
-  kMirOpPackedSet,
-
-  // @brief Reserve a range of vector registers.
-  // vA: Start vector register to reserve.
-  // vB: Inclusive end vector register to reserve.
-  // @note: The backend may choose to map vector numbers used in vector opcodes.
-  //  Reserved registers are removed from the list of backend temporary pool.
-  kMirOpReserveVectorRegisters,
-
-  // @brief Free a range of reserved vector registers
-  // vA: Start vector register to unreserve.
-  // vB: Inclusive end vector register to unreserve.
-  // @note: All currently reserved vector registers are returned to the temporary pool.
-  kMirOpReturnVectorRegisters,
-
-  // @brief Create a memory barrier.
-  // vA: a constant defined by enum MemBarrierKind.
-  kMirOpMemBarrier,
-
-  // @brief Used to fill a vector register with array values.
-  // @details Just as with normal arrays, access on null object register must ensure NullPointerException
-  // and invalid index must ensure ArrayIndexOutOfBoundsException. Exception behavior must be the same
-  // as the aget it replaced and must happen at same index. Therefore, it is generally recommended that
-  // before using this MIR, it is proven that exception is guaranteed to not be thrown and marked with
-  // MIR_IGNORE_NULL_CHECK and MIR_IGNORE_RANGE_CHECK.
-  // vA: destination vector register
-  // vB: array register
-  // vC: index register
-  // arg[0]: TypeSize (most other vector opcodes have this in vC)
-  kMirOpPackedArrayGet,
-
-  // @brief Used to store a vector register into array.
-  // @details Just as with normal arrays, access on null object register must ensure NullPointerException
-  // and invalid index must ensure ArrayIndexOutOfBoundsException. Exception behavior must be the same
-  // as the aget it replaced and must happen at same index. Therefore, it is generally recommended that
-  // before using this MIR, it is proven that exception is guaranteed to not be thrown and marked with
-  // MIR_IGNORE_NULL_CHECK and MIR_IGNORE_RANGE_CHECK.
-  // vA: source vector register
-  // vB: array register
-  // vC: index register
-  // arg[0]: TypeSize (most other vector opcodes have this in vC)
-  kMirOpPackedArrayPut,
-
-  // @brief Multiply-add integer.
-  // vA: destination
-  // vB: multiplicand
-  // vC: multiplier
-  // arg[0]: addend
-  kMirOpMaddInt,
-
-  // @brief Multiply-subtract integer.
-  // vA: destination
-  // vB: multiplicand
-  // vC: multiplier
-  // arg[0]: minuend
-  kMirOpMsubInt,
-
-  // @brief Multiply-add long.
-  // vA: destination
-  // vB: multiplicand
-  // vC: multiplier
-  // arg[0]: addend
-  kMirOpMaddLong,
-
-  // @brief Multiply-subtract long.
-  // vA: destination
-  // vB: multiplicand
-  // vC: multiplier
-  // arg[0]: minuend
-  kMirOpMsubLong,
-
-  kMirOpLast,
-};
-
-enum MIROptimizationFlagPositions {
-  kMIRIgnoreNullCheck = 0,
-  kMIRIgnoreRangeCheck,
-  kMIRIgnoreCheckCast,
-  kMIRStoreNonNullValue,              // Storing non-null value, always mark GC card.
-  kMIRClassIsInitialized,
-  kMIRClassIsInDexCache,
-  kMirIgnoreDivZeroCheck,
-  kMIRInlined,                        // Invoke is inlined (ie dead).
-  kMIRInlinedPred,                    // Invoke is inlined via prediction.
-  kMIRCallee,                         // Instruction is inlined from callee.
-  kMIRIgnoreSuspendCheck,
-  kMIRDup,
-  kMIRMark,                           // Temporary node mark can be used by
-                                      // opt passes for their private needs.
-  kMIRStoreNonTemporal,
-  kMIRLastMIRFlag,
-};
-
-// For successor_block_list.
-enum BlockListType {
-  kNotUsed = 0,
-  kCatch,
-  kPackedSwitch,
-  kSparseSwitch,
-};
-std::ostream& operator<<(std::ostream& os, const BlockListType& rhs);
-
-enum AssemblerStatus {
-  kSuccess,
-  kRetryAll,
-};
-std::ostream& operator<<(std::ostream& os, const AssemblerStatus& rhs);
-
-enum OpSize {
-  kWord,            // Natural word size of target (32/64).
-  k32,
-  k64,
-  kReference,       // Object reference; compressed on 64-bit targets.
-  kSingle,
-  kDouble,
-  kUnsignedHalf,
-  kSignedHalf,
-  kUnsignedByte,
-  kSignedByte,
-};
-std::ostream& operator<<(std::ostream& os, const OpSize& kind);
-
-enum OpKind {
-  kOpMov,
-  kOpCmov,
-  kOpMvn,
-  kOpCmp,
-  kOpLsl,
-  kOpLsr,
-  kOpAsr,
-  kOpRor,
-  kOpNot,
-  kOpAnd,
-  kOpOr,
-  kOpXor,
-  kOpNeg,
-  kOpAdd,
-  kOpAdc,
-  kOpSub,
-  kOpSbc,
-  kOpRsub,
-  kOpMul,
-  kOpDiv,
-  kOpRem,
-  kOpBic,
-  kOpCmn,
-  kOpTst,
-  kOpRev,
-  kOpRevsh,
-  kOpBkpt,
-  kOpBlx,
-  kOpPush,
-  kOpPop,
-  kOp2Char,
-  kOp2Short,
-  kOp2Byte,
-  kOpCondBr,
-  kOpUncondBr,
-  kOpBx,
-  kOpInvalid,
-};
-std::ostream& operator<<(std::ostream& os, const OpKind& rhs);
-
-enum MoveType {
-  kMov8GP,      // Move 8-bit general purpose register.
-  kMov16GP,     // Move 16-bit general purpose register.
-  kMov32GP,     // Move 32-bit general purpose register.
-  kMov64GP,     // Move 64-bit general purpose register.
-  kMov32FP,     // Move 32-bit FP register.
-  kMov64FP,     // Move 64-bit FP register.
-  kMovLo64FP,   // Move low 32-bits of 64-bit FP register.
-  kMovHi64FP,   // Move high 32-bits of 64-bit FP register.
-  kMovU128FP,   // Move 128-bit FP register to/from possibly unaligned region.
-  kMov128FP = kMovU128FP,
-  kMovA128FP,   // Move 128-bit FP register to/from region surely aligned to 16-bytes.
-  kMovLo128FP,  // Move low 64-bits of 128-bit FP register.
-  kMovHi128FP,  // Move high 64-bits of 128-bit FP register.
-};
-std::ostream& operator<<(std::ostream& os, const MoveType& kind);
-
-enum ConditionCode {
-  kCondEq,  // equal
-  kCondNe,  // not equal
-  kCondCs,  // carry set
-  kCondCc,  // carry clear
-  kCondUlt,  // unsigned less than
-  kCondUge,  // unsigned greater than or same
-  kCondMi,  // minus
-  kCondPl,  // plus, positive or zero
-  kCondVs,  // overflow
-  kCondVc,  // no overflow
-  kCondHi,  // unsigned greater than
-  kCondLs,  // unsigned lower or same
-  kCondGe,  // signed greater than or equal
-  kCondLt,  // signed less than
-  kCondGt,  // signed greater than
-  kCondLe,  // signed less than or equal
-  kCondAl,  // always
-  kCondNv,  // never
-};
-std::ostream& operator<<(std::ostream& os, const ConditionCode& kind);
-
-// Target specific condition encodings
-enum ArmConditionCode {
-  kArmCondEq = 0x0,  // 0000
-  kArmCondNe = 0x1,  // 0001
-  kArmCondCs = 0x2,  // 0010
-  kArmCondCc = 0x3,  // 0011
-  kArmCondMi = 0x4,  // 0100
-  kArmCondPl = 0x5,  // 0101
-  kArmCondVs = 0x6,  // 0110
-  kArmCondVc = 0x7,  // 0111
-  kArmCondHi = 0x8,  // 1000
-  kArmCondLs = 0x9,  // 1001
-  kArmCondGe = 0xa,  // 1010
-  kArmCondLt = 0xb,  // 1011
-  kArmCondGt = 0xc,  // 1100
-  kArmCondLe = 0xd,  // 1101
-  kArmCondAl = 0xe,  // 1110
-  kArmCondNv = 0xf,  // 1111
-};
-std::ostream& operator<<(std::ostream& os, const ArmConditionCode& kind);
-
-enum X86ConditionCode {
-  kX86CondO   = 0x0,    // overflow
-  kX86CondNo  = 0x1,    // not overflow
-
-  kX86CondB   = 0x2,    // below
-  kX86CondNae = kX86CondB,  // not-above-equal
-  kX86CondC   = kX86CondB,  // carry
-
-  kX86CondNb  = 0x3,    // not-below
-  kX86CondAe  = kX86CondNb,  // above-equal
-  kX86CondNc  = kX86CondNb,  // not-carry
-
-  kX86CondZ   = 0x4,    // zero
-  kX86CondEq  = kX86CondZ,  // equal
-
-  kX86CondNz  = 0x5,    // not-zero
-  kX86CondNe  = kX86CondNz,  // not-equal
-
-  kX86CondBe  = 0x6,    // below-equal
-  kX86CondNa  = kX86CondBe,  // not-above
-
-  kX86CondNbe = 0x7,    // not-below-equal
-  kX86CondA   = kX86CondNbe,  // above
-
-  kX86CondS   = 0x8,    // sign
-  kX86CondNs  = 0x9,    // not-sign
-
-  kX86CondP   = 0xa,    // 8-bit parity even
-  kX86CondPE  = kX86CondP,
-
-  kX86CondNp  = 0xb,    // 8-bit parity odd
-  kX86CondPo  = kX86CondNp,
-
-  kX86CondL   = 0xc,    // less-than
-  kX86CondNge = kX86CondL,  // not-greater-equal
-
-  kX86CondNl  = 0xd,    // not-less-than
-  kX86CondGe  = kX86CondNl,  // not-greater-equal
-
-  kX86CondLe  = 0xe,    // less-than-equal
-  kX86CondNg  = kX86CondLe,  // not-greater
-
-  kX86CondNle = 0xf,    // not-less-than
-  kX86CondG   = kX86CondNle,  // greater
-};
-std::ostream& operator<<(std::ostream& os, const X86ConditionCode& kind);
-
-enum DividePattern {
-  DivideNone,
-  Divide3,
-  Divide5,
-  Divide7,
-};
-std::ostream& operator<<(std::ostream& os, const DividePattern& pattern);
-
-/**
- * @brief Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers").
- * @details We define the combined barrier types that are actually required
- * by the Java Memory Model, rather than using exactly the terminology from
- * the JSR-133 cookbook.  These should, in many cases, be replaced by acquire/release
- * primitives.  Note that the JSR-133 cookbook generally does not deal with
- * store atomicity issues, and the recipes there are not always entirely sufficient.
- * The current recipe is as follows:
- * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store.
- * -# Use AnyAny barrier after volatile store.  (StoreLoad is as expensive.)
- * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrier after each volatile load.
- * -# Use StoreStore barrier after all stores but before return from any constructor whose
- *    class has final fields.
- * -# Use NTStoreStore to order non-temporal stores with respect to all later
- *    store-to-memory instructions.  Only generated together with non-temporal stores.
- */
-enum MemBarrierKind {
-  kAnyStore,
-  kLoadAny,
-  kStoreStore,
-  kAnyAny,
-  kNTStoreStore,
-};
-std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind);
-
-enum OpFeatureFlags {
-  kIsBranch = 0,
-  kNoOperand,
-  kIsUnaryOp,
-  kIsBinaryOp,
-  kIsTertiaryOp,
-  kIsQuadOp,
-  kIsQuinOp,
-  kIsSextupleOp,
-  kIsIT,
-  kIsMoveOp,
-  kMemLoad,
-  kMemStore,
-  kMemVolatile,
-  kMemScaledx0,
-  kMemScaledx2,
-  kMemScaledx4,
-  kPCRelFixup,  // x86 FIXME: add NEEDS_FIXUP to instruction attributes.
-  kRegDef0,
-  kRegDef1,
-  kRegDef2,
-  kRegDefA,
-  kRegDefD,
-  kRegDefFPCSList0,
-  kRegDefFPCSList2,
-  kRegDefList0,
-  kRegDefList1,
-  kRegDefList2,
-  kRegDefLR,
-  kRegDefSP,
-  kRegUse0,
-  kRegUse1,
-  kRegUse2,
-  kRegUse3,
-  kRegUse4,
-  kRegUseA,
-  kRegUseC,
-  kRegUseD,
-  kRegUseB,
-  kRegUseFPCSList0,
-  kRegUseFPCSList2,
-  kRegUseList0,
-  kRegUseList1,
-  kRegUseLR,
-  kRegUsePC,
-  kRegUseSP,
-  kSetsCCodes,
-  kUsesCCodes,
-  kUseFpStack,
-  kUseHi,
-  kUseLo,
-  kDefHi,
-  kDefLo
-};
-std::ostream& operator<<(std::ostream& os, const OpFeatureFlags& rhs);
-
-enum SelectInstructionKind {
-  kSelectNone,
-  kSelectConst,
-  kSelectMove,
-  kSelectGoto
-};
-std::ostream& operator<<(std::ostream& os, const SelectInstructionKind& kind);
-
-// LIR fixup kinds for Arm and X86.
-enum FixupKind {
-  kFixupNone,
-  kFixupLabel,             // For labels we just adjust the offset.
-  kFixupLoad,              // Mostly for immediates.
-  kFixupVLoad,             // FP load which *may* be pc-relative.
-  kFixupCBxZ,              // Cbz, Cbnz.
-  kFixupTBxZ,              // Tbz, Tbnz.
-  kFixupCondBranch,        // Conditional branch
-  kFixupT1Branch,          // Thumb1 Unconditional branch
-  kFixupT2Branch,          // Thumb2 Unconditional branch
-  kFixupBlx1,              // Blx1 (start of Blx1/Blx2 pair).
-  kFixupBl1,               // Bl1 (start of Bl1/Bl2 pair).
-  kFixupAdr,               // Adr.
-  kFixupMovImmLST,         // kThumb2MovImm16LST.
-  kFixupMovImmHST,         // kThumb2MovImm16HST.
-  kFixupAlign4,            // Align to 4-byte boundary.
-  kFixupA53Erratum835769,  // Cortex A53 Erratum 835769.
-  kFixupSwitchTable,       // X86_64 packed switch table.
-};
-std::ostream& operator<<(std::ostream& os, const FixupKind& kind);
-
-enum VolatileKind {
-  kNotVolatile,      // Load/Store is not volatile
-  kVolatile          // Load/Store is volatile
-};
-std::ostream& operator<<(std::ostream& os, const VolatileKind& kind);
-
-enum WideKind {
-  kNotWide,      // Non-wide view
-  kWide,         // Wide view
-  kRef           // Ref width
-};
-std::ostream& operator<<(std::ostream& os, const WideKind& kind);
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_COMPILER_ENUMS_H_
diff --git a/compiler/dex/compiler_ir.cc b/compiler/dex/compiler_ir.cc
deleted file mode 100644
index 6e1853b..0000000
--- a/compiler/dex/compiler_ir.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "compiler_ir.h"
-
-#include "arch/instruction_set_features.h"
-#include "base/dumpable.h"
-#include "dex_flags.h"
-#include "dex/quick/mir_to_lir.h"
-#include "driver/compiler_driver.h"
-#include "mir_graph.h"
-#include "utils.h"
-
-namespace art {
-
-CompilationUnit::CompilationUnit(ArenaPool* pool, InstructionSet isa, CompilerDriver* driver,
-                                 ClassLinker* linker)
-  : compiler_driver(driver),
-    class_linker(linker),
-    dex_file(nullptr),
-    class_loader(nullptr),
-    class_def_idx(0),
-    method_idx(0),
-    access_flags(0),
-    invoke_type(kDirect),
-    shorty(nullptr),
-    disable_opt(0),
-    enable_debug(0),
-    verbose(false),
-    instruction_set(isa),
-    target64(Is64BitInstructionSet(isa)),
-    arena(pool),
-    arena_stack(pool),
-    mir_graph(nullptr),
-    cg(nullptr),
-    timings("QuickCompiler", true, false),
-    print_pass(false) {
-}
-
-CompilationUnit::~CompilationUnit() {
-  overridden_pass_options.clear();
-}
-
-void CompilationUnit::StartTimingSplit(const char* label) {
-  if (compiler_driver->GetDumpPasses()) {
-    timings.StartTiming(label);
-  }
-}
-
-void CompilationUnit::NewTimingSplit(const char* label) {
-  if (compiler_driver->GetDumpPasses()) {
-    timings.EndTiming();
-    timings.StartTiming(label);
-  }
-}
-
-void CompilationUnit::EndTiming() {
-  if (compiler_driver->GetDumpPasses()) {
-    timings.EndTiming();
-    if (enable_debug & (1 << kDebugTimings)) {
-      LOG(INFO) << "TIMINGS " << PrettyMethod(method_idx, *dex_file);
-      LOG(INFO) << Dumpable<TimingLogger>(timings);
-    }
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h
deleted file mode 100644
index 5203355..0000000
--- a/compiler/dex/compiler_ir.h
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_COMPILER_IR_H_
-#define ART_COMPILER_DEX_COMPILER_IR_H_
-
-#include "jni.h"
-#include <string>
-#include <vector>
-
-#include "arch/instruction_set.h"
-#include "base/arena_allocator.h"
-#include "base/scoped_arena_allocator.h"
-#include "base/timing_logger.h"
-#include "invoke_type.h"
-#include "safe_map.h"
-
-namespace art {
-
-class ClassLinker;
-class CompilerDriver;
-class DexFile;
-class Mir2Lir;
-class MIRGraph;
-
-constexpr size_t kOptionStringMaxLength = 2048;
-
-/**
- * Structure abstracting pass option values, which can be of type string or integer.
- */
-struct OptionContent {
-  OptionContent(const OptionContent& option) :
-    type(option.type), container(option.container, option.type) {}
-
-  explicit OptionContent(const char* value) :
-    type(kString), container(value) {}
-
-  explicit OptionContent(int value) :
-    type(kInteger), container(value) {}
-
-  explicit OptionContent(int64_t value) :
-    type(kInteger), container(value) {}
-
-  ~OptionContent() {
-    if (type == kString) {
-      container.StringDelete();
-    }
-  }
-
-  /**
-   * Allows for a transparent display of the option content.
-   */
-  friend std::ostream& operator<<(std::ostream& out, const OptionContent& option) {
-    if (option.type == kString) {
-      out << option.container.s;
-    } else {
-      out << option.container.i;
-    }
-
-    return out;
-  }
-
-  inline const char* GetString() const {
-    return container.s;
-  }
-
-  inline int64_t GetInteger() const {
-    return container.i;
-  }
-
-  /**
-   * @brief Used to compare a string option value to a given @p value.
-   * @details Will return whether the internal string option is equal to
-   * the parameter @p value. It will return false if the type of the
-   * object is not a string.
-   * @param value The string to compare to.
-   * @return Returns whether the internal string option is equal to the
-   * parameter @p value.
-  */
-  inline bool Equals(const char* value) const {
-    DCHECK(value != nullptr);
-    if (type != kString) {
-      return false;
-    }
-    return !strncmp(container.s, value, kOptionStringMaxLength);
-  }
-
-  /**
-   * @brief Used to compare an integer option value to a given @p value.
-   * @details Will return whether the internal integer option is equal to
-   * the parameter @p value. It will return false if the type of the
-   * object is not an integer.
-   * @param value The integer to compare to.
-   * @return Returns whether the internal integer option is equal to the
-   * parameter @p value.
-  */
-  inline bool Equals(int64_t value) const {
-    if (type != kInteger) {
-      return false;
-    }
-    return container.i == value;
-  }
-
-  /**
-   * Describes the type of parameters allowed as option values.
-   */
-  enum OptionType {
-    kString = 0,
-    kInteger
-  };
-
-  OptionType type;
-
- private:
-  /**
-   * Union containing the option value of either type.
-   */
-  union OptionContainer {
-    OptionContainer(const OptionContainer& c, OptionType t) {
-      if (t == kString) {
-        DCHECK(c.s != nullptr);
-        s = strndup(c.s, kOptionStringMaxLength);
-      } else {
-        i = c.i;
-      }
-    }
-
-    explicit OptionContainer(const char* value) {
-      DCHECK(value != nullptr);
-      s = strndup(value, kOptionStringMaxLength);
-    }
-
-    explicit OptionContainer(int64_t value) : i(value) {}
-    ~OptionContainer() {}
-
-    void StringDelete() {
-      if (s != nullptr) {
-        free(s);
-      }
-    }
-
-    char* s;
-    int64_t i;
-  };
-
-  OptionContainer container;
-};
-
-struct CompilationUnit {
-  CompilationUnit(ArenaPool* pool, InstructionSet isa, CompilerDriver* driver, ClassLinker* linker);
-  ~CompilationUnit();
-
-  void StartTimingSplit(const char* label);
-  void NewTimingSplit(const char* label);
-  void EndTiming();
-
-  /*
-   * Fields needed/generated by common frontend and generally used throughout
-   * the compiler.
-  */
-  CompilerDriver* const compiler_driver;
-  ClassLinker* const class_linker;        // Linker to resolve fields and methods.
-  const DexFile* dex_file;                // DexFile containing the method being compiled.
-  jobject class_loader;                   // compiling method's class loader.
-  uint16_t class_def_idx;                 // compiling method's defining class definition index.
-  uint32_t method_idx;                    // compiling method's index into method_ids of DexFile.
-  uint32_t access_flags;                  // compiling method's access flags.
-  InvokeType invoke_type;                 // compiling method's invocation type.
-  const char* shorty;                     // compiling method's shorty.
-  uint32_t disable_opt;                   // opt_control_vector flags.
-  uint32_t enable_debug;                  // debugControlVector flags.
-  bool verbose;
-  const InstructionSet instruction_set;
-  const bool target64;
-
-  // TODO: move memory management to mir_graph, or just switch to using standard containers.
-  ArenaAllocator arena;
-  ArenaStack arena_stack;  // Arenas for ScopedArenaAllocator.
-
-  std::unique_ptr<MIRGraph> mir_graph;   // MIR container.
-  std::unique_ptr<Mir2Lir> cg;           // Target-specific codegen.
-  TimingLogger timings;
-  bool print_pass;                 // Do we want to print a pass or not?
-
-  /**
-   * @brief Holds pass options for current pass being applied to compilation unit.
-   * @details This is updated for every pass to contain the overridden pass options
-   * that were specified by user. The pass itself will check this to see if the
-   * default settings have been changed. The key is simply the option string without
-   * the pass name.
-   */
-  SafeMap<const std::string, const OptionContent> overridden_pass_options;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_COMPILER_IR_H_
diff --git a/compiler/dex/dataflow_iterator-inl.h b/compiler/dex/dataflow_iterator-inl.h
deleted file mode 100644
index e9402e3..0000000
--- a/compiler/dex/dataflow_iterator-inl.h
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_DATAFLOW_ITERATOR_INL_H_
-#define ART_COMPILER_DEX_DATAFLOW_ITERATOR_INL_H_
-
-#include "dataflow_iterator.h"
-
-namespace art {
-
-// Single forward pass over the nodes.
-inline BasicBlock* DataflowIterator::ForwardSingleNext() {
-  BasicBlock* res = nullptr;
-
-  // Are we not yet at the end?
-  if (idx_ < end_idx_) {
-    // Get the next index.
-    BasicBlockId bb_id = (*block_id_list_)[idx_];
-    res = mir_graph_->GetBasicBlock(bb_id);
-    idx_++;
-  }
-
-  return res;
-}
-
-// Repeat full forward passes over all nodes until no change occurs during a complete pass.
-inline BasicBlock* DataflowIterator::ForwardRepeatNext() {
-  BasicBlock* res = nullptr;
-
-  // Are we at the end and have we changed something?
-  if ((idx_ >= end_idx_) && changed_ == true) {
-    // Reset the index.
-    idx_ = start_idx_;
-    repeats_++;
-    changed_ = false;
-  }
-
-  // Are we not yet at the end?
-  if (idx_ < end_idx_) {
-    // Get the BasicBlockId.
-    BasicBlockId bb_id = (*block_id_list_)[idx_];
-    res = mir_graph_->GetBasicBlock(bb_id);
-    idx_++;
-  }
-
-  return res;
-}
-
-// Single reverse pass over the nodes.
-inline BasicBlock* DataflowIterator::ReverseSingleNext() {
-  BasicBlock* res = nullptr;
-
-  // Are we not yet at the end?
-  if (idx_ >= 0) {
-    // Get the BasicBlockId.
-    BasicBlockId bb_id = (*block_id_list_)[idx_];
-    res = mir_graph_->GetBasicBlock(bb_id);
-    idx_--;
-  }
-
-  return res;
-}
-
-// Repeat full backwards passes over all nodes until no change occurs during a complete pass.
-inline BasicBlock* DataflowIterator::ReverseRepeatNext() {
-  BasicBlock* res = nullptr;
-
-  // Are we done and we changed something during the last iteration?
-  if ((idx_ < 0) && changed_) {
-    // Reset the index.
-    idx_ = start_idx_;
-    repeats_++;
-    changed_ = false;
-  }
-
-  // Are we not yet done?
-  if (idx_ >= 0) {
-    // Get the BasicBlockId.
-    BasicBlockId bb_id = (*block_id_list_)[idx_];
-    res = mir_graph_->GetBasicBlock(bb_id);
-    idx_--;
-  }
-
-  return res;
-}
-
-// AllNodes uses the existing block list, and should be considered unordered.
-inline BasicBlock* AllNodesIterator::Next(bool had_change) {
-  // Update changed: if had_changed is true, we remember it for the whole iteration.
-  changed_ |= had_change;
-
-  BasicBlock* res = nullptr;
-  while (idx_ != end_idx_) {
-    BasicBlock* bb = mir_graph_->GetBlockList()[idx_++];
-    DCHECK(bb != nullptr);
-    if (!bb->hidden) {
-      res = bb;
-      break;
-    }
-  }
-
-  return res;
-}
-
-inline BasicBlock* TopologicalSortIterator::Next(bool had_change) {
-  // Update changed: if had_changed is true, we remember it for the whole iteration.
-  changed_ |= had_change;
-
-  while (loop_head_stack_->size() != 0u &&
-      (*loop_ends_)[loop_head_stack_->back().first] == idx_) {
-    loop_head_stack_->pop_back();
-  }
-
-  if (idx_ == end_idx_) {
-    return nullptr;
-  }
-
-  // Get next block and return it.
-  BasicBlockId idx = idx_;
-  idx_ += 1;
-  BasicBlock* bb = mir_graph_->GetBasicBlock((*block_id_list_)[idx]);
-  DCHECK(bb != nullptr);
-  if ((*loop_ends_)[idx] != 0u) {
-    loop_head_stack_->push_back(std::make_pair(idx, false));  // Not recalculating.
-  }
-  return bb;
-}
-
-inline BasicBlock* LoopRepeatingTopologicalSortIterator::Next(bool had_change) {
-  if (idx_ != 0) {
-    // Mark last processed block visited.
-    BasicBlock* bb = mir_graph_->GetBasicBlock((*block_id_list_)[idx_ - 1]);
-    bb->visited = true;
-    if (had_change) {
-      // If we had a change we need to revisit the children.
-      ChildBlockIterator iter(bb, mir_graph_);
-      for (BasicBlock* child_bb = iter.Next(); child_bb != nullptr; child_bb = iter.Next()) {
-        child_bb->visited = false;
-      }
-    }
-  }
-
-  while (true) {
-    // Pop loops we have left and check if we need to recalculate one of them.
-    // NOTE: We need to do this even if idx_ == end_idx_.
-    while (loop_head_stack_->size() != 0u &&
-        (*loop_ends_)[loop_head_stack_->back().first] == idx_) {
-      auto top = loop_head_stack_->back();
-      uint16_t loop_head_idx = top.first;
-      bool recalculated = top.second;
-      loop_head_stack_->pop_back();
-      BasicBlock* loop_head = mir_graph_->GetBasicBlock((*block_id_list_)[loop_head_idx]);
-      DCHECK(loop_head != nullptr);
-      if (!recalculated || !loop_head->visited) {
-        // Recalculating this loop.
-        loop_head_stack_->push_back(std::make_pair(loop_head_idx, true));
-        idx_ = loop_head_idx + 1;
-        return loop_head;
-      }
-    }
-
-    if (idx_ == end_idx_) {
-      return nullptr;
-    }
-
-    // Get next block and return it if unvisited.
-    BasicBlockId idx = idx_;
-    idx_ += 1;
-    BasicBlock* bb = mir_graph_->GetBasicBlock((*block_id_list_)[idx]);
-    DCHECK(bb != nullptr);
-    if ((*loop_ends_)[idx] != 0u) {
-      // If bb->visited is false, the loop needs to be processed from scratch.
-      // Otherwise we mark it as recalculating; for a natural loop we will not
-      // need to recalculate any block in the loop anyway, and for unnatural
-      // loops we will recalculate the loop head only if one of its predecessors
-      // actually changes.
-      bool recalculating = bb->visited;
-      loop_head_stack_->push_back(std::make_pair(idx, recalculating));
-    }
-    if (!bb->visited) {
-      return bb;
-    }
-  }
-}
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_DATAFLOW_ITERATOR_INL_H_
diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h
deleted file mode 100644
index 097c2a4..0000000
--- a/compiler/dex/dataflow_iterator.h
+++ /dev/null
@@ -1,405 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_DATAFLOW_ITERATOR_H_
-#define ART_COMPILER_DEX_DATAFLOW_ITERATOR_H_
-
-#include "base/logging.h"
-#include "mir_graph.h"
-
-namespace art {
-
-  /*
-   * This class supports iterating over lists of basic blocks in various
-   * interesting orders.  Note that for efficiency, the visit orders have been pre-computed.
-   * The order itself will not change during the iteration.  However, for some uses,
-   * auxiliary data associated with the basic blocks may be changed during the iteration,
-   * necessitating another pass over the list.  If this behavior is required, use the
-   * "Repeating" variant.  For the repeating variant, the caller must tell the iterator
-   * whether a change has been made that necessitates another pass.  Note that calling Next(true)
-   * does not affect the iteration order or short-circuit the current pass - it simply tells
-   * the iterator that once it has finished walking through the block list it should reset and
-   * do another full pass through the list.
-   */
-  /**
-   * @class DataflowIterator
-   * @brief The main iterator class, all other iterators derive of this one to define an iteration order.
-   */
-  class DataflowIterator {
-    public:
-      virtual ~DataflowIterator() {}
-
-      /**
-       * @brief How many times have we repeated the iterator across the BasicBlocks?
-       * @return the number of iteration repetitions.
-       */
-      int32_t GetRepeatCount() { return repeats_; }
-
-      /**
-       * @brief Has the user of the iterator reported a change yet?
-       * @details Does not mean there was or not a change, it is only whether the user passed a true to the Next function call.
-       * @return whether the user of the iterator reported a change yet.
-       */
-      int32_t GetChanged() { return changed_; }
-
-      /**
-       * @brief Get the next BasicBlock depending on iteration order.
-       * @param had_change did the user of the iteration change the previous BasicBlock.
-       * @return the next BasicBlock following the iteration order, 0 if finished.
-       */
-      virtual BasicBlock* Next(bool had_change = false) = 0;
-
-    protected:
-      /**
-       * @param mir_graph the MIRGraph we are interested in.
-       * @param start_idx the first index we want to iterate across.
-       * @param end_idx the last index we want to iterate (not included).
-       */
-      DataflowIterator(MIRGraph* mir_graph, int32_t start_idx, int32_t end_idx)
-          : mir_graph_(mir_graph),
-            start_idx_(start_idx),
-            end_idx_(end_idx),
-            block_id_list_(nullptr),
-            idx_(0),
-            repeats_(0),
-            changed_(false) {}
-
-      /**
-       * @brief Get the next BasicBlock iterating forward.
-       * @return the next BasicBlock iterating forward.
-       */
-      virtual BasicBlock* ForwardSingleNext() ALWAYS_INLINE;
-
-      /**
-       * @brief Get the next BasicBlock iterating backward.
-       * @return the next BasicBlock iterating backward.
-       */
-      virtual BasicBlock* ReverseSingleNext() ALWAYS_INLINE;
-
-      /**
-       * @brief Get the next BasicBlock iterating forward, restart if a BasicBlock was reported changed during the last iteration.
-       * @return the next BasicBlock iterating forward, with chance of repeating the iteration.
-       */
-      virtual BasicBlock* ForwardRepeatNext() ALWAYS_INLINE;
-
-      /**
-       * @brief Get the next BasicBlock iterating backward, restart if a BasicBlock was reported changed during the last iteration.
-       * @return the next BasicBlock iterating backward, with chance of repeating the iteration.
-       */
-      virtual BasicBlock* ReverseRepeatNext() ALWAYS_INLINE;
-
-      MIRGraph* const mir_graph_;                       /**< @brief the MIRGraph */
-      const int32_t start_idx_;                         /**< @brief the start index for the iteration */
-      const int32_t end_idx_;                           /**< @brief the last index for the iteration */
-      const ArenaVector<BasicBlockId>* block_id_list_;  /**< @brief the list of BasicBlocks we want to iterate on */
-      int32_t idx_;                                     /**< @brief Current index for the iterator */
-      int32_t repeats_;                                 /**< @brief Number of repeats over the iteration */
-      bool changed_;                                    /**< @brief Has something changed during the current iteration? */
-  };  // DataflowIterator
-
-  /**
-   * @class PreOrderDfsIterator
-   * @brief Used to perform a Pre-order Depth-First-Search Iteration of a MIRGraph.
-   */
-  class PreOrderDfsIterator : public DataflowIterator {
-    public:
-      /**
-       * @brief The constructor, using all of the reachable blocks of the MIRGraph.
-       * @param mir_graph The MIRGraph considered.
-       */
-      explicit PreOrderDfsIterator(MIRGraph* mir_graph)
-          : DataflowIterator(mir_graph, 0, mir_graph->GetNumReachableBlocks()) {
-        // Extra setup for the PreOrderDfsIterator.
-        idx_ = start_idx_;
-        block_id_list_ = &mir_graph->GetDfsOrder();
-      }
-
-      /**
-       * @brief Get the next BasicBlock depending on iteration order.
-       * @param had_change did the user of the iteration change the previous BasicBlock.
-       * @return the next BasicBlock following the iteration order, 0 if finished.
-       */
-      virtual BasicBlock* Next(bool had_change = false) {
-        // Update changed: if had_changed is true, we remember it for the whole iteration.
-        changed_ |= had_change;
-
-        return ForwardSingleNext();
-      }
-  };
-
-  /**
-   * @class RepeatingPreOrderDfsIterator
-   * @brief Used to perform a Repeating Pre-order Depth-First-Search Iteration of a MIRGraph.
-   * @details If there is a change during an iteration, the iteration starts over at the end of the iteration.
-   */
-  class RepeatingPreOrderDfsIterator : public DataflowIterator {
-    public:
-      /**
-       * @brief The constructor, using all of the reachable blocks of the MIRGraph.
-       * @param mir_graph The MIRGraph considered.
-       */
-      explicit RepeatingPreOrderDfsIterator(MIRGraph* mir_graph)
-          : DataflowIterator(mir_graph, 0, mir_graph->GetNumReachableBlocks()) {
-        // Extra setup for the RepeatingPreOrderDfsIterator.
-        idx_ = start_idx_;
-        block_id_list_ = &mir_graph->GetDfsOrder();
-      }
-
-      /**
-       * @brief Get the next BasicBlock depending on iteration order.
-       * @param had_change did the user of the iteration change the previous BasicBlock.
-       * @return the next BasicBlock following the iteration order, 0 if finished.
-       */
-      virtual BasicBlock* Next(bool had_change = false) {
-        // Update changed: if had_changed is true, we remember it for the whole iteration.
-        changed_ |= had_change;
-
-        return ForwardRepeatNext();
-      }
-  };
-
-  /**
-   * @class RepeatingPostOrderDfsIterator
-   * @brief Used to perform a Repeating Post-order Depth-First-Search Iteration of a MIRGraph.
-   * @details If there is a change during an iteration, the iteration starts over at the end of the iteration.
-   */
-  class RepeatingPostOrderDfsIterator : public DataflowIterator {
-    public:
-      /**
-       * @brief The constructor, using all of the reachable blocks of the MIRGraph.
-       * @param mir_graph The MIRGraph considered.
-       */
-      explicit RepeatingPostOrderDfsIterator(MIRGraph* mir_graph)
-          : DataflowIterator(mir_graph, 0, mir_graph->GetNumReachableBlocks()) {
-        // Extra setup for the RepeatingPostOrderDfsIterator.
-        idx_ = start_idx_;
-        block_id_list_ = &mir_graph->GetDfsPostOrder();
-      }
-
-      /**
-       * @brief Get the next BasicBlock depending on iteration order.
-       * @param had_change did the user of the iteration change the previous BasicBlock.
-       * @return the next BasicBlock following the iteration order, 0 if finished.
-       */
-      virtual BasicBlock* Next(bool had_change = false) {
-        // Update changed: if had_changed is true, we remember it for the whole iteration.
-        changed_ |= had_change;
-
-        return ForwardRepeatNext();
-      }
-  };
-
-  /**
-   * @class ReversePostOrderDfsIterator
-   * @brief Used to perform a Reverse Post-order Depth-First-Search Iteration of a MIRGraph.
-   */
-  class ReversePostOrderDfsIterator : public DataflowIterator {
-    public:
-      /**
-       * @brief The constructor, using all of the reachable blocks of the MIRGraph.
-       * @param mir_graph The MIRGraph considered.
-       */
-      explicit ReversePostOrderDfsIterator(MIRGraph* mir_graph)
-          : DataflowIterator(mir_graph, mir_graph->GetNumReachableBlocks() -1, 0) {
-        // Extra setup for the ReversePostOrderDfsIterator.
-        idx_ = start_idx_;
-        block_id_list_ = &mir_graph->GetDfsPostOrder();
-      }
-
-      /**
-       * @brief Get the next BasicBlock depending on iteration order.
-       * @param had_change did the user of the iteration change the previous BasicBlock.
-       * @return the next BasicBlock following the iteration order, 0 if finished.
-       */
-      virtual BasicBlock* Next(bool had_change = false) {
-        // Update changed: if had_changed is true, we remember it for the whole iteration.
-        changed_ |= had_change;
-
-        return ReverseSingleNext();
-      }
-  };
-
-  /**
-   * @class ReversePostOrderDfsIterator
-   * @brief Used to perform a Repeating Reverse Post-order Depth-First-Search Iteration of a MIRGraph.
-   * @details If there is a change during an iteration, the iteration starts over at the end of the iteration.
-   */
-  class RepeatingReversePostOrderDfsIterator : public DataflowIterator {
-    public:
-      /**
-       * @brief The constructor, using all of the reachable blocks of the MIRGraph.
-       * @param mir_graph The MIRGraph considered.
-       */
-      explicit RepeatingReversePostOrderDfsIterator(MIRGraph* mir_graph)
-          : DataflowIterator(mir_graph, mir_graph->GetNumReachableBlocks() -1, 0) {
-        // Extra setup for the RepeatingReversePostOrderDfsIterator
-        idx_ = start_idx_;
-        block_id_list_ = &mir_graph->GetDfsPostOrder();
-      }
-
-      /**
-       * @brief Get the next BasicBlock depending on iteration order.
-       * @param had_change did the user of the iteration change the previous BasicBlock.
-       * @return the next BasicBlock following the iteration order, 0 if finished.
-       */
-      virtual BasicBlock* Next(bool had_change = false) {
-        // Update changed: if had_changed is true, we remember it for the whole iteration.
-        changed_ |= had_change;
-
-        return ReverseRepeatNext();
-      }
-  };
-
-  /**
-   * @class PostOrderDOMIterator
-   * @brief Used to perform a Post-order Domination Iteration of a MIRGraph.
-   */
-  class PostOrderDOMIterator : public DataflowIterator {
-    public:
-      /**
-       * @brief The constructor, using all of the reachable blocks of the MIRGraph.
-       * @param mir_graph The MIRGraph considered.
-       */
-      explicit PostOrderDOMIterator(MIRGraph* mir_graph)
-          : DataflowIterator(mir_graph, 0, mir_graph->GetNumReachableBlocks()) {
-        // Extra setup for thePostOrderDOMIterator.
-        idx_ = start_idx_;
-        block_id_list_ = &mir_graph->GetDomPostOrder();
-      }
-
-      /**
-       * @brief Get the next BasicBlock depending on iteration order.
-       * @param had_change did the user of the iteration change the previous BasicBlock.
-       * @return the next BasicBlock following the iteration order, 0 if finished.
-       */
-      virtual BasicBlock* Next(bool had_change = false) {
-        // Update changed: if had_changed is true, we remember it for the whole iteration.
-        changed_ |= had_change;
-
-        return ForwardSingleNext();
-      }
-  };
-
-  /**
-   * @class AllNodesIterator
-   * @brief Used to perform an iteration on all the BasicBlocks a MIRGraph.
-   */
-  class AllNodesIterator : public DataflowIterator {
-    public:
-      /**
-       * @brief The constructor, using all of the reachable blocks of the MIRGraph.
-       * @param mir_graph The MIRGraph considered.
-       */
-      explicit AllNodesIterator(MIRGraph* mir_graph)
-          : DataflowIterator(mir_graph, 0, mir_graph->GetBlockList().size()) {
-      }
-
-      /**
-       * @brief Resetting the iterator.
-       */
-      void Reset() {
-        idx_ = 0;
-      }
-
-      /**
-       * @brief Get the next BasicBlock depending on iteration order.
-       * @param had_change did the user of the iteration change the previous BasicBlock.
-       * @return the next BasicBlock following the iteration order, 0 if finished.
-       */
-      virtual BasicBlock* Next(bool had_change = false) ALWAYS_INLINE;
-  };
-
-  /**
-   * @class TopologicalSortIterator
-   * @brief Used to perform a Topological Sort Iteration of a MIRGraph.
-   */
-  class TopologicalSortIterator : public DataflowIterator {
-    public:
-      /**
-       * @brief The constructor, using all of the reachable blocks of the MIRGraph.
-       * @param mir_graph The MIRGraph considered.
-       */
-      explicit TopologicalSortIterator(MIRGraph* mir_graph)
-          : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder().size()),
-            loop_ends_(&mir_graph->GetTopologicalSortOrderLoopEnds()),
-            loop_head_stack_(mir_graph_->GetTopologicalSortOrderLoopHeadStack()) {
-        // Extra setup for TopologicalSortIterator.
-        idx_ = start_idx_;
-        block_id_list_ = &mir_graph->GetTopologicalSortOrder();
-      }
-
-      /**
-       * @brief Get the next BasicBlock depending on iteration order.
-       * @param had_change did the user of the iteration change the previous BasicBlock.
-       * @return the next BasicBlock following the iteration order, 0 if finished.
-       */
-      virtual BasicBlock* Next(bool had_change = false) OVERRIDE;
-
-    private:
-     const ArenaVector<BasicBlockId>* const loop_ends_;
-     ArenaVector<std::pair<uint16_t, bool>>* const loop_head_stack_;
-  };
-
-  /**
-   * @class LoopRepeatingTopologicalSortIterator
-   * @brief Used to perform a Topological Sort Iteration of a MIRGraph, repeating loops as needed.
-   * @details The iterator uses the visited flags to keep track of the blocks that need
-   * recalculation and keeps a stack of loop heads in the MIRGraph. At the end of the loop
-   * it returns back to the loop head if it needs to be recalculated. Due to the use of
-   * the visited flags and the loop head stack in the MIRGraph, it's not possible to use
-   * two iterators at the same time or modify this data during iteration (though inspection
-   * of this data is allowed and sometimes even expected).
-   *
-   * NOTE: This iterator is not suitable for passes that need to propagate changes to
-   * predecessors, such as type inferrence.
-   */
-  class LoopRepeatingTopologicalSortIterator : public DataflowIterator {
-    public:
-     /**
-      * @brief The constructor, using all of the reachable blocks of the MIRGraph.
-      * @param mir_graph The MIRGraph considered.
-      */
-     explicit LoopRepeatingTopologicalSortIterator(MIRGraph* mir_graph)
-         : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder().size()),
-           loop_ends_(&mir_graph->GetTopologicalSortOrderLoopEnds()),
-           loop_head_stack_(mir_graph_->GetTopologicalSortOrderLoopHeadStack()) {
-       // Extra setup for RepeatingTopologicalSortIterator.
-       idx_ = start_idx_;
-       block_id_list_ = &mir_graph->GetTopologicalSortOrder();
-       // Clear visited flags and check that the loop head stack is empty.
-       mir_graph->ClearAllVisitedFlags();
-       DCHECK_EQ(loop_head_stack_->size(), 0u);
-     }
-
-     ~LoopRepeatingTopologicalSortIterator() {
-       DCHECK_EQ(loop_head_stack_->size(), 0u);
-     }
-
-     /**
-      * @brief Get the next BasicBlock depending on iteration order.
-      * @param had_change did the user of the iteration change the previous BasicBlock.
-      * @return the next BasicBlock following the iteration order, 0 if finished.
-      */
-     virtual BasicBlock* Next(bool had_change = false) OVERRIDE;
-
-    private:
-     const ArenaVector<BasicBlockId>* const loop_ends_;
-     ArenaVector<std::pair<uint16_t, bool>>* const loop_head_stack_;
-  };
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_DATAFLOW_ITERATOR_H_
diff --git a/compiler/dex/dex_flags.h b/compiler/dex/dex_flags.h
deleted file mode 100644
index e8eb40c..0000000
--- a/compiler/dex/dex_flags.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_DEX_FLAGS_H_
-#define ART_COMPILER_DEX_DEX_FLAGS_H_
-
-namespace art {
-
-// Suppress optimization if corresponding bit set.
-enum OptControlVector {
-  kLoadStoreElimination = 0,
-  kLoadHoisting,
-  kSuppressLoads,
-  kNullCheckElimination,
-  kClassInitCheckElimination,
-  kGlobalValueNumbering,
-  kGvnDeadCodeElimination,
-  kLocalValueNumbering,
-  kPromoteRegs,
-  kTrackLiveTemps,
-  kSafeOptimizations,
-  kBBOpt,
-  kSuspendCheckElimination,
-  kMatch,
-  kPromoteCompilerTemps,
-  kBranchFusing,
-  kSuppressExceptionEdges,
-  kSuppressMethodInlining,
-};
-
-// Force code generation paths for testing.
-enum DebugControlVector {
-  kDebugVerbose,
-  kDebugDumpCFG,
-  kDebugSlowFieldPath,
-  kDebugSlowInvokePath,
-  kDebugSlowStringPath,
-  kDebugSlowTypePath,
-  kDebugSlowestFieldPath,
-  kDebugSlowestStringPath,
-  kDebugExerciseResolveMethod,
-  kDebugVerifyDataflow,
-  kDebugShowMemoryUsage,
-  kDebugShowNops,
-  kDebugCountOpcodes,
-  kDebugDumpCheckStats,
-  kDebugShowSummaryMemoryUsage,
-  kDebugShowFilterStats,
-  kDebugTimings,
-  kDebugCodegenDump
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_DEX_FLAGS_H_
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index ff7ddc1..3ce786e 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -327,10 +327,16 @@
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    art::DexCompilationUnit unit(nullptr, class_loader, class_linker,
-                                 dex_file, code_item, class_def_idx, method_idx, access_flags,
-                                 driver->GetVerifiedMethod(&dex_file, method_idx),
-                                 hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)));
+    art::DexCompilationUnit unit(
+        class_loader,
+        class_linker,
+        dex_file,
+        code_item,
+        class_def_idx,
+        method_idx,
+        access_flags,
+        driver->GetVerifiedMethod(&dex_file, method_idx),
+        hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)));
     art::optimizer::DexCompiler dex_compiler(*driver, unit, dex_to_dex_compilation_level);
     dex_compiler.Compile();
     if (dex_compiler.GetQuickenedInfo().empty()) {
@@ -356,10 +362,8 @@
         0,
         0,
         0,
-        nullptr,                                     // src_mapping_table
-        ArrayRef<const uint8_t>(),                   // mapping_table
+        ArrayRef<const SrcMapElem>(),                // src_mapping_table
         ArrayRef<const uint8_t>(builder.GetData()),  // vmap_table
-        ArrayRef<const uint8_t>(),                   // gc_map
         ArrayRef<const uint8_t>(),                   // cfi data
         ArrayRef<const LinkerPatch>());
   }
diff --git a/compiler/dex/dex_types.h b/compiler/dex/dex_types.h
deleted file mode 100644
index f485c1c..0000000
--- a/compiler/dex/dex_types.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_DEX_TYPES_H_
-#define ART_COMPILER_DEX_DEX_TYPES_H_
-
-namespace art {
-
-typedef uint32_t DexOffset;          // Dex offset in code units.
-typedef uint16_t NarrowDexOffset;    // For use in structs, Dex offsets range from 0 .. 0xffff.
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_DEX_TYPES_H_
diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc
deleted file mode 100644
index 94ba4fa..0000000
--- a/compiler/dex/global_value_numbering.cc
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "global_value_numbering.h"
-
-#include "base/bit_vector-inl.h"
-#include "base/stl_util.h"
-#include "local_value_numbering.h"
-
-namespace art {
-
-GlobalValueNumbering::GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator,
-                                           Mode mode)
-    : cu_(cu),
-      mir_graph_(cu->mir_graph.get()),
-      allocator_(allocator),
-      bbs_processed_(0u),
-      max_bbs_to_process_(kMaxBbsToProcessMultiplyFactor * mir_graph_->GetNumReachableBlocks()),
-      last_value_(kNullValue),
-      modifications_allowed_(true),
-      mode_(mode),
-      global_value_map_(std::less<uint64_t>(), allocator->Adapter()),
-      array_location_map_(ArrayLocationComparator(), allocator->Adapter()),
-      array_location_reverse_map_(allocator->Adapter()),
-      ref_set_map_(std::less<ValueNameSet>(), allocator->Adapter()),
-      lvns_(mir_graph_->GetNumBlocks(), nullptr, allocator->Adapter()),
-      work_lvn_(nullptr),
-      merge_lvns_(allocator->Adapter()) {
-}
-
-GlobalValueNumbering::~GlobalValueNumbering() {
-  STLDeleteElements(&lvns_);
-}
-
-LocalValueNumbering* GlobalValueNumbering::PrepareBasicBlock(BasicBlock* bb,
-                                                             ScopedArenaAllocator* allocator) {
-  if (UNLIKELY(!Good())) {
-    return nullptr;
-  }
-  if (bb->block_type != kDalvikByteCode && bb->block_type != kEntryBlock) {
-    DCHECK(bb->first_mir_insn == nullptr);
-    return nullptr;
-  }
-  if (mode_ == kModeGvn && UNLIKELY(bbs_processed_ == max_bbs_to_process_)) {
-    // If we're still trying to converge, stop now. Otherwise, proceed to apply optimizations.
-    last_value_ = kNoValue;  // Make bad.
-    return nullptr;
-  }
-  if (mode_ == kModeGvnPostProcessing &&
-    mir_graph_->GetTopologicalSortOrderLoopHeadStack()->empty()) {
-    // Modifications outside loops are performed during the main phase.
-    return nullptr;
-  }
-  if (allocator == nullptr) {
-    allocator = allocator_;
-  }
-  DCHECK(work_lvn_.get() == nullptr);
-  work_lvn_.reset(new (allocator) LocalValueNumbering(this, bb->id, allocator));
-  if (bb->block_type == kEntryBlock) {
-    work_lvn_->PrepareEntryBlock();
-    DCHECK(bb->first_mir_insn == nullptr);  // modifications_allowed_ is irrelevant.
-  } else {
-    // To avoid repeated allocation on the ArenaStack, reuse a single vector kept as a member.
-    DCHECK(merge_lvns_.empty());
-    // If we're running the full GVN, the RepeatingTopologicalSortIterator keeps the loop
-    // head stack in the MIRGraph up to date and for a loop head we need to check whether
-    // we're making the initial computation and need to merge only preceding blocks in the
-    // topological order, or we're recalculating a loop head and need to merge all incoming
-    // LVNs. When we're not at a loop head (including having an empty loop head stack) all
-    // predecessors should be preceding blocks and we shall merge all of them anyway.
-    bool use_all_predecessors = true;
-    uint16_t loop_head_idx = 0u;  // Used only if !use_all_predecessors.
-    if (mode_ == kModeGvn && mir_graph_->GetTopologicalSortOrderLoopHeadStack()->size() != 0) {
-      // Full GVN inside a loop, see if we're at the loop head for the first time.
-      modifications_allowed_ = false;
-      auto top = mir_graph_->GetTopologicalSortOrderLoopHeadStack()->back();
-      loop_head_idx = top.first;
-      bool recalculating = top.second;
-      use_all_predecessors = recalculating ||
-          loop_head_idx != mir_graph_->GetTopologicalSortOrderIndexes()[bb->id];
-    } else {
-      modifications_allowed_ = true;
-    }
-    for (BasicBlockId pred_id : bb->predecessors) {
-      DCHECK_NE(pred_id, NullBasicBlockId);
-      if (lvns_[pred_id] != nullptr &&
-          (use_all_predecessors ||
-              mir_graph_->GetTopologicalSortOrderIndexes()[pred_id] < loop_head_idx)) {
-        merge_lvns_.push_back(lvns_[pred_id]);
-      }
-    }
-    // Determine merge type.
-    LocalValueNumbering::MergeType merge_type = LocalValueNumbering::kNormalMerge;
-    if (bb->catch_entry) {
-      merge_type = LocalValueNumbering::kCatchMerge;
-    } else if (bb->last_mir_insn != nullptr &&
-        IsInstructionReturn(bb->last_mir_insn->dalvikInsn.opcode) &&
-        bb->GetFirstNonPhiInsn() == bb->last_mir_insn) {
-      merge_type = LocalValueNumbering::kReturnMerge;
-    }
-    // At least one predecessor must have been processed before this bb.
-    CHECK(!merge_lvns_.empty());
-    if (merge_lvns_.size() == 1u) {
-      work_lvn_->MergeOne(*merge_lvns_[0], merge_type);
-    } else {
-      work_lvn_->Merge(merge_type);
-    }
-  }
-  return work_lvn_.get();
-}
-
-bool GlobalValueNumbering::FinishBasicBlock(BasicBlock* bb) {
-  DCHECK(work_lvn_ != nullptr);
-  DCHECK_EQ(bb->id, work_lvn_->Id());
-  ++bbs_processed_;
-  merge_lvns_.clear();
-
-  bool change = false;
-  if (mode_ == kModeGvn) {
-    change = (lvns_[bb->id] == nullptr) || !lvns_[bb->id]->Equals(*work_lvn_);
-    // In GVN mode, keep the latest LVN even if Equals() indicates no change. This is
-    // to keep the correct values of fields that do not contribute to Equals() as long
-    // as they depend only on predecessor LVNs' fields that do contribute to Equals().
-    // Currently, that's LVN::merge_map_ used by LVN::GetStartingVregValueNumberImpl().
-    std::unique_ptr<const LocalValueNumbering> old_lvn(lvns_[bb->id]);
-    lvns_[bb->id] = work_lvn_.release();
-  } else {
-    DCHECK_EQ(mode_, kModeGvnPostProcessing);  // kModeLvn doesn't use FinishBasicBlock().
-    DCHECK(lvns_[bb->id] != nullptr);
-    DCHECK(lvns_[bb->id]->Equals(*work_lvn_));
-    work_lvn_.reset();
-  }
-  return change;
-}
-
-uint16_t GlobalValueNumbering::GetArrayLocation(uint16_t base, uint16_t index) {
-  auto cmp = array_location_map_.key_comp();
-  ArrayLocation key = { base, index };
-  auto lb = array_location_map_.lower_bound(key);
-  if (lb != array_location_map_.end() && !cmp(key, lb->first)) {
-    return lb->second;
-  }
-  uint16_t location = static_cast<uint16_t>(array_location_reverse_map_.size());
-  DCHECK_EQ(location, array_location_reverse_map_.size());  // No overflow.
-  auto it = array_location_map_.PutBefore(lb, key, location);
-  array_location_reverse_map_.push_back(&*it);
-  return location;
-}
-
-bool GlobalValueNumbering::NullCheckedInAllPredecessors(
-    const ScopedArenaVector<uint16_t>& merge_names) const {
-  // Implicit parameters:
-  //   - *work_lvn_: the LVN for which we're checking predecessors.
-  //   - merge_lvns_: the predecessor LVNs.
-  DCHECK_EQ(merge_lvns_.size(), merge_names.size());
-  for (size_t i = 0, size = merge_lvns_.size(); i != size; ++i) {
-    const LocalValueNumbering* pred_lvn = merge_lvns_[i];
-    uint16_t value_name = merge_names[i];
-    if (!pred_lvn->IsValueNullChecked(value_name)) {
-      // Check if the predecessor has an IF_EQZ/IF_NEZ as the last insn.
-      const BasicBlock* pred_bb = mir_graph_->GetBasicBlock(pred_lvn->Id());
-      if (!HasNullCheckLastInsn(pred_bb, work_lvn_->Id())) {
-        return false;
-      }
-      // IF_EQZ/IF_NEZ checks some sreg, see if that sreg contains the value_name.
-      int s_reg = pred_bb->last_mir_insn->ssa_rep->uses[0];
-      if (pred_lvn->GetSregValue(s_reg) != value_name) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
-bool GlobalValueNumbering::DivZeroCheckedInAllPredecessors(
-    const ScopedArenaVector<uint16_t>& merge_names) const {
-  // Implicit parameters:
-  //   - *work_lvn_: the LVN for which we're checking predecessors.
-  //   - merge_lvns_: the predecessor LVNs.
-  DCHECK_EQ(merge_lvns_.size(), merge_names.size());
-  for (size_t i = 0, size = merge_lvns_.size(); i != size; ++i) {
-    const LocalValueNumbering* pred_lvn = merge_lvns_[i];
-    uint16_t value_name = merge_names[i];
-    if (!pred_lvn->IsValueDivZeroChecked(value_name)) {
-      return false;
-    }
-  }
-  return true;
-}
-
-bool GlobalValueNumbering::IsBlockEnteredOnTrue(uint16_t cond, BasicBlockId bb_id) {
-  DCHECK_NE(cond, kNoValue);
-  BasicBlock* bb = mir_graph_->GetBasicBlock(bb_id);
-  if (bb->predecessors.size() == 1u) {
-    BasicBlockId pred_id = bb->predecessors[0];
-    BasicBlock* pred_bb = mir_graph_->GetBasicBlock(pred_id);
-    if (pred_bb->BranchesToSuccessorOnlyIfNotZero(bb_id)) {
-      DCHECK(lvns_[pred_id] != nullptr);
-      uint16_t operand = lvns_[pred_id]->GetSregValue(pred_bb->last_mir_insn->ssa_rep->uses[0]);
-      if (operand == cond) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-bool GlobalValueNumbering::IsTrueInBlock(uint16_t cond, BasicBlockId bb_id) {
-  // We're not doing proper value propagation, so just see if the condition is used
-  // with if-nez/if-eqz to branch/fall-through to this bb or one of its dominators.
-  DCHECK_NE(cond, kNoValue);
-  if (IsBlockEnteredOnTrue(cond, bb_id)) {
-    return true;
-  }
-  BasicBlock* bb = mir_graph_->GetBasicBlock(bb_id);
-  for (uint32_t dom_id : bb->dominators->Indexes()) {
-    if (IsBlockEnteredOnTrue(cond, dom_id)) {
-      return true;
-    }
-  }
-  return false;
-}
-
-}  // namespace art
diff --git a/compiler/dex/global_value_numbering.h b/compiler/dex/global_value_numbering.h
deleted file mode 100644
index c514f75..0000000
--- a/compiler/dex/global_value_numbering.h
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_GLOBAL_VALUE_NUMBERING_H_
-#define ART_COMPILER_DEX_GLOBAL_VALUE_NUMBERING_H_
-
-#include "base/arena_object.h"
-#include "base/logging.h"
-#include "base/macros.h"
-#include "mir_graph.h"
-#include "compiler_ir.h"
-#include "dex_flags.h"
-
-namespace art {
-
-class LocalValueNumbering;
-class MirFieldInfo;
-
-class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
- public:
-  static constexpr uint16_t kNoValue = 0xffffu;
-  static constexpr uint16_t kNullValue = 1u;
-
-  enum Mode {
-    kModeGvn,
-    kModeGvnPostProcessing,
-    kModeLvn
-  };
-
-  static bool Skip(CompilationUnit* cu) {
-    return (cu->disable_opt & (1u << kGlobalValueNumbering)) != 0u ||
-        cu->mir_graph->GetMaxNestedLoops() > kMaxAllowedNestedLoops;
-  }
-
-  // Instance and static field id map is held by MIRGraph to avoid multiple recalculations
-  // when doing LVN.
-  template <typename Container>  // Container of MirIFieldLoweringInfo or MirSFieldLoweringInfo.
-  static uint16_t* PrepareGvnFieldIds(ScopedArenaAllocator* allocator,
-                                      const Container& field_infos);
-
-  GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator, Mode mode);
-  ~GlobalValueNumbering();
-
-  CompilationUnit* GetCompilationUnit() const {
-    return cu_;
-  }
-
-  MIRGraph* GetMirGraph() const {
-    return mir_graph_;
-  }
-
-  // Prepare LVN for the basic block.
-  LocalValueNumbering* PrepareBasicBlock(BasicBlock* bb,
-                                         ScopedArenaAllocator* allocator = nullptr);
-
-  // Finish processing the basic block.
-  bool FinishBasicBlock(BasicBlock* bb);
-
-  // Checks that the value names didn't overflow.
-  bool Good() const {
-    return last_value_ < kNoValue;
-  }
-
-  // Allow modifications.
-  void StartPostProcessing();
-
-  bool CanModify() const {
-    return modifications_allowed_ && Good();
-  }
-
-  // Retrieve the LVN with GVN results for a given BasicBlock.
-  const LocalValueNumbering* GetLvn(BasicBlockId bb_id) const;
-
- private:
-  // Allocate a new value name.
-  uint16_t NewValueName();
-
-  // Key is concatenation of opcode, operand1, operand2 and modifier, value is value name.
-  typedef ScopedArenaSafeMap<uint64_t, uint16_t> ValueMap;
-
-  static uint64_t BuildKey(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) {
-    return (static_cast<uint64_t>(op) << 48 | static_cast<uint64_t>(operand1) << 32 |
-            static_cast<uint64_t>(operand2) << 16 | static_cast<uint64_t>(modifier));
-  }
-
-  // Look up a value in the global value map, adding a new entry if there was none before.
-  uint16_t LookupValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) {
-    uint16_t res;
-    uint64_t key = BuildKey(op, operand1, operand2, modifier);
-    auto lb = global_value_map_.lower_bound(key);
-    if (lb != global_value_map_.end() && lb->first == key) {
-      res = lb->second;
-    } else {
-      res = NewValueName();
-      global_value_map_.PutBefore(lb, key, res);
-    }
-    return res;
-  }
-
-  // Look up a value in the global value map, don't add a new entry if there was none before.
-  uint16_t FindValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) const {
-    uint16_t res;
-    uint64_t key = BuildKey(op, operand1, operand2, modifier);
-    auto lb = global_value_map_.lower_bound(key);
-    if (lb != global_value_map_.end() && lb->first == key) {
-      res = lb->second;
-    } else {
-      res = kNoValue;
-    }
-    return res;
-  }
-
-  // Get an instance field id.
-  uint16_t GetIFieldId(MIR* mir) {
-    return GetMirGraph()->GetGvnIFieldId(mir);
-  }
-
-  // Get a static field id.
-  uint16_t GetSFieldId(MIR* mir) {
-    return GetMirGraph()->GetGvnSFieldId(mir);
-  }
-
-  // Get an instance field type based on field id.
-  uint16_t GetIFieldType(uint16_t field_id) {
-    return static_cast<uint16_t>(GetMirGraph()->GetIFieldLoweringInfo(field_id).MemAccessType());
-  }
-
-  // Get a static field type based on field id.
-  uint16_t GetSFieldType(uint16_t field_id) {
-    return static_cast<uint16_t>(GetMirGraph()->GetSFieldLoweringInfo(field_id).MemAccessType());
-  }
-
-  struct ArrayLocation {
-    uint16_t base;
-    uint16_t index;
-  };
-
-  struct ArrayLocationComparator {
-    bool operator()(const ArrayLocation& lhs, const ArrayLocation& rhs) const {
-      if (lhs.base != rhs.base) {
-        return lhs.base < rhs.base;
-      }
-      return lhs.index < rhs.index;
-    }
-  };
-
-  typedef ScopedArenaSafeMap<ArrayLocation, uint16_t, ArrayLocationComparator> ArrayLocationMap;
-
-  // Get an array location.
-  uint16_t GetArrayLocation(uint16_t base, uint16_t index);
-
-  // Get the array base from an array location.
-  uint16_t GetArrayLocationBase(uint16_t location) const {
-    return array_location_reverse_map_[location]->first.base;
-  }
-
-  // Get the array index from an array location.
-  uint16_t GetArrayLocationIndex(uint16_t location) const {
-    return array_location_reverse_map_[location]->first.index;
-  }
-
-  // A set of value names.
-  typedef ScopedArenaSet<uint16_t> ValueNameSet;
-
-  // A map from a set of references to the set id.
-  typedef ScopedArenaSafeMap<ValueNameSet, uint16_t> RefSetIdMap;
-
-  uint16_t GetRefSetId(const ValueNameSet& ref_set) {
-    uint16_t res = kNoValue;
-    auto lb = ref_set_map_.lower_bound(ref_set);
-    if (lb != ref_set_map_.end() && !ref_set_map_.key_comp()(ref_set, lb->first)) {
-      res = lb->second;
-    } else {
-      res = NewValueName();
-      ref_set_map_.PutBefore(lb, ref_set, res);
-    }
-    return res;
-  }
-
-  const BasicBlock* GetBasicBlock(uint16_t bb_id) const {
-    return mir_graph_->GetBasicBlock(bb_id);
-  }
-
-  static bool HasNullCheckLastInsn(const BasicBlock* pred_bb, BasicBlockId succ_id) {
-    return pred_bb->BranchesToSuccessorOnlyIfNotZero(succ_id);
-  }
-
-  bool NullCheckedInAllPredecessors(const ScopedArenaVector<uint16_t>& merge_names) const;
-
-  bool DivZeroCheckedInAllPredecessors(const ScopedArenaVector<uint16_t>& merge_names) const;
-
-  bool IsBlockEnteredOnTrue(uint16_t cond, BasicBlockId bb_id);
-  bool IsTrueInBlock(uint16_t cond, BasicBlockId bb_id);
-
-  ScopedArenaAllocator* Allocator() const {
-    return allocator_;
-  }
-
-  CompilationUnit* const cu_;
-  MIRGraph* const mir_graph_;
-  ScopedArenaAllocator* const allocator_;
-
-  // The maximum number of nested loops that we accept for GVN.
-  static constexpr size_t kMaxAllowedNestedLoops = 6u;
-
-  // The number of BBs that we need to process grows exponentially with the number
-  // of nested loops. Don't allow excessive processing for too many nested loops or
-  // otherwise expensive methods.
-  static constexpr uint32_t kMaxBbsToProcessMultiplyFactor = 20u;
-
-  uint32_t bbs_processed_;
-  uint32_t max_bbs_to_process_;  // Doesn't apply after the main GVN has converged.
-
-  // We have 32-bit last_value_ so that we can detect when we run out of value names, see Good().
-  // We usually don't check Good() until the end of LVN unless we're about to modify code.
-  uint32_t last_value_;
-
-  // Marks whether code modifications are allowed. The initial GVN is done without code
-  // modifications to settle the value names. Afterwards, we allow modifications and rerun
-  // LVN once for each BasicBlock.
-  bool modifications_allowed_;
-
-  // Specifies the mode of operation.
-  Mode mode_;
-
-  ValueMap global_value_map_;
-  ArrayLocationMap array_location_map_;
-  ScopedArenaVector<const ArrayLocationMap::value_type*> array_location_reverse_map_;
-  RefSetIdMap ref_set_map_;
-
-  ScopedArenaVector<const LocalValueNumbering*> lvns_;        // Owning.
-  std::unique_ptr<LocalValueNumbering> work_lvn_;
-  ScopedArenaVector<const LocalValueNumbering*> merge_lvns_;  // Not owning.
-
-  friend class LocalValueNumbering;
-  friend class GlobalValueNumberingTest;
-
-  DISALLOW_COPY_AND_ASSIGN(GlobalValueNumbering);
-};
-std::ostream& operator<<(std::ostream& os, const GlobalValueNumbering::Mode& rhs);
-
-inline const LocalValueNumbering* GlobalValueNumbering::GetLvn(BasicBlockId bb_id) const {
-  DCHECK_EQ(mode_, kModeGvnPostProcessing);
-  DCHECK_LT(bb_id, lvns_.size());
-  DCHECK(lvns_[bb_id] != nullptr);
-  return lvns_[bb_id];
-}
-
-inline void GlobalValueNumbering::StartPostProcessing() {
-  DCHECK(Good());
-  DCHECK_EQ(mode_, kModeGvn);
-  mode_ = kModeGvnPostProcessing;
-}
-
-inline uint16_t GlobalValueNumbering::NewValueName() {
-  DCHECK_NE(mode_, kModeGvnPostProcessing);
-  ++last_value_;
-  return last_value_;
-}
-
-template <typename Container>  // Container of MirIFieldLoweringInfo or MirSFieldLoweringInfo.
-uint16_t* GlobalValueNumbering::PrepareGvnFieldIds(ScopedArenaAllocator* allocator,
-                                                   const Container& field_infos) {
-  size_t size = field_infos.size();
-  uint16_t* field_ids = allocator->AllocArray<uint16_t>(size, kArenaAllocMisc);
-  for (size_t i = 0u; i != size; ++i) {
-    size_t idx = i;
-    const MirFieldInfo& cur_info = field_infos[i];
-    if (cur_info.IsResolved()) {
-      for (size_t j = 0; j != i; ++j) {
-        const MirFieldInfo& prev_info = field_infos[j];
-        if (prev_info.IsResolved() &&
-            prev_info.DeclaringDexFile() == cur_info.DeclaringDexFile() &&
-            prev_info.DeclaringFieldIndex() == cur_info.DeclaringFieldIndex()) {
-          DCHECK_EQ(cur_info.MemAccessType(), prev_info.MemAccessType());
-          idx = j;
-          break;
-        }
-      }
-    }
-    field_ids[i] = idx;
-  }
-  return field_ids;
-}
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_GLOBAL_VALUE_NUMBERING_H_
diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc
deleted file mode 100644
index f2c2e22..0000000
--- a/compiler/dex/global_value_numbering_test.cc
+++ /dev/null
@@ -1,2428 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "base/logging.h"
-#include "dataflow_iterator-inl.h"
-#include "dex/mir_field_info.h"
-#include "global_value_numbering.h"
-#include "local_value_numbering.h"
-#include "gtest/gtest.h"
-
-namespace art {
-
-class GlobalValueNumberingTest : public testing::Test {
- protected:
-  static constexpr uint16_t kNoValue = GlobalValueNumbering::kNoValue;
-
-  struct IFieldDef {
-    uint16_t field_idx;
-    uintptr_t declaring_dex_file;
-    uint16_t declaring_field_idx;
-    bool is_volatile;
-    DexMemAccessType type;
-  };
-
-  struct SFieldDef {
-    uint16_t field_idx;
-    uintptr_t declaring_dex_file;
-    uint16_t declaring_field_idx;
-    bool is_volatile;
-    DexMemAccessType type;
-  };
-
-  struct BBDef {
-    static constexpr size_t kMaxSuccessors = 4;
-    static constexpr size_t kMaxPredecessors = 4;
-
-    BBType type;
-    size_t num_successors;
-    BasicBlockId successors[kMaxPredecessors];
-    size_t num_predecessors;
-    BasicBlockId predecessors[kMaxPredecessors];
-  };
-
-  struct MIRDef {
-    static constexpr size_t kMaxSsaDefs = 2;
-    static constexpr size_t kMaxSsaUses = 4;
-
-    BasicBlockId bbid;
-    Instruction::Code opcode;
-    int64_t value;
-    uint32_t field_info;
-    size_t num_uses;
-    int32_t uses[kMaxSsaUses];
-    size_t num_defs;
-    int32_t defs[kMaxSsaDefs];
-  };
-
-#define DEF_SUCC0() \
-    0u, { }
-#define DEF_SUCC1(s1) \
-    1u, { s1 }
-#define DEF_SUCC2(s1, s2) \
-    2u, { s1, s2 }
-#define DEF_SUCC3(s1, s2, s3) \
-    3u, { s1, s2, s3 }
-#define DEF_SUCC4(s1, s2, s3, s4) \
-    4u, { s1, s2, s3, s4 }
-#define DEF_PRED0() \
-    0u, { }
-#define DEF_PRED1(p1) \
-    1u, { p1 }
-#define DEF_PRED2(p1, p2) \
-    2u, { p1, p2 }
-#define DEF_PRED3(p1, p2, p3) \
-    3u, { p1, p2, p3 }
-#define DEF_PRED4(p1, p2, p3, p4) \
-    4u, { p1, p2, p3, p4 }
-#define DEF_BB(type, succ, pred) \
-    { type, succ, pred }
-
-#define DEF_CONST(bb, opcode, reg, value) \
-    { bb, opcode, value, 0u, 0, { }, 1, { reg } }
-#define DEF_CONST_WIDE(bb, opcode, reg, value) \
-    { bb, opcode, value, 0u, 0, { }, 2, { reg, reg + 1 } }
-#define DEF_CONST_STRING(bb, opcode, reg, index) \
-    { bb, opcode, index, 0u, 0, { }, 1, { reg } }
-#define DEF_IGET(bb, opcode, reg, obj, field_info) \
-    { bb, opcode, 0u, field_info, 1, { obj }, 1, { reg } }
-#define DEF_IGET_WIDE(bb, opcode, reg, obj, field_info) \
-    { bb, opcode, 0u, field_info, 1, { obj }, 2, { reg, reg + 1 } }
-#define DEF_IPUT(bb, opcode, reg, obj, field_info) \
-    { bb, opcode, 0u, field_info, 2, { reg, obj }, 0, { } }
-#define DEF_IPUT_WIDE(bb, opcode, reg, obj, field_info) \
-    { bb, opcode, 0u, field_info, 3, { reg, reg + 1, obj }, 0, { } }
-#define DEF_SGET(bb, opcode, reg, field_info) \
-    { bb, opcode, 0u, field_info, 0, { }, 1, { reg } }
-#define DEF_SGET_WIDE(bb, opcode, reg, field_info) \
-    { bb, opcode, 0u, field_info, 0, { }, 2, { reg, reg + 1 } }
-#define DEF_SPUT(bb, opcode, reg, field_info) \
-    { bb, opcode, 0u, field_info, 1, { reg }, 0, { } }
-#define DEF_SPUT_WIDE(bb, opcode, reg, field_info) \
-    { bb, opcode, 0u, field_info, 2, { reg, reg + 1 }, 0, { } }
-#define DEF_AGET(bb, opcode, reg, obj, idx) \
-    { bb, opcode, 0u, 0u, 2, { obj, idx }, 1, { reg } }
-#define DEF_AGET_WIDE(bb, opcode, reg, obj, idx) \
-    { bb, opcode, 0u, 0u, 2, { obj, idx }, 2, { reg, reg + 1 } }
-#define DEF_APUT(bb, opcode, reg, obj, idx) \
-    { bb, opcode, 0u, 0u, 3, { reg, obj, idx }, 0, { } }
-#define DEF_APUT_WIDE(bb, opcode, reg, obj, idx) \
-    { bb, opcode, 0u, 0u, 4, { reg, reg + 1, obj, idx }, 0, { } }
-#define DEF_INVOKE1(bb, opcode, reg) \
-    { bb, opcode, 0u, 0u, 1, { reg }, 0, { } }
-#define DEF_UNIQUE_REF(bb, opcode, reg) \
-    { bb, opcode, 0u, 0u, 0, { }, 1, { reg } }  // CONST_CLASS, CONST_STRING, NEW_ARRAY, ...
-#define DEF_IFZ(bb, opcode, reg) \
-    { bb, opcode, 0u, 0u, 1, { reg }, 0, { } }
-#define DEF_MOVE(bb, opcode, reg, src) \
-    { bb, opcode, 0u, 0u, 1, { src }, 1, { reg } }
-#define DEF_MOVE_WIDE(bb, opcode, reg, src) \
-    { bb, opcode, 0u, 0u, 2, { src, src + 1 }, 2, { reg, reg + 1 } }
-#define DEF_PHI2(bb, reg, src1, src2) \
-    { bb, static_cast<Instruction::Code>(kMirOpPhi), 0, 0u, 2u, { src1, src2 }, 1, { reg } }
-#define DEF_BINOP(bb, opcode, result, src1, src2) \
-    { bb, opcode, 0u, 0u, 2, { src1, src2 }, 1, { result } }
-#define DEF_UNOP(bb, opcode, result, src) DEF_MOVE(bb, opcode, result, src)
-
-  void DoPrepareIFields(const IFieldDef* defs, size_t count) {
-    cu_.mir_graph->ifield_lowering_infos_.clear();
-    cu_.mir_graph->ifield_lowering_infos_.reserve(count);
-    for (size_t i = 0u; i != count; ++i) {
-      const IFieldDef* def = &defs[i];
-      MirIFieldLoweringInfo field_info(def->field_idx, def->type, false);
-      if (def->declaring_dex_file != 0u) {
-        field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
-        field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ &= ~(def->is_volatile ? 0u : MirSFieldLoweringInfo::kFlagIsVolatile);
-      }
-      cu_.mir_graph->ifield_lowering_infos_.push_back(field_info);
-    }
-  }
-
-  template <size_t count>
-  void PrepareIFields(const IFieldDef (&defs)[count]) {
-    DoPrepareIFields(defs, count);
-  }
-
-  void DoPrepareSFields(const SFieldDef* defs, size_t count) {
-    cu_.mir_graph->sfield_lowering_infos_.clear();
-    cu_.mir_graph->sfield_lowering_infos_.reserve(count);
-    for (size_t i = 0u; i != count; ++i) {
-      const SFieldDef* def = &defs[i];
-      MirSFieldLoweringInfo field_info(def->field_idx, def->type);
-      // Mark even unresolved fields as initialized.
-      field_info.flags_ |= MirSFieldLoweringInfo::kFlagClassIsInitialized;
-      // NOTE: MirSFieldLoweringInfo::kFlagClassIsInDexCache isn't used by GVN.
-      if (def->declaring_dex_file != 0u) {
-        field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
-        field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ &= ~(def->is_volatile ? 0u : MirSFieldLoweringInfo::kFlagIsVolatile);
-      }
-      cu_.mir_graph->sfield_lowering_infos_.push_back(field_info);
-    }
-  }
-
-  template <size_t count>
-  void PrepareSFields(const SFieldDef (&defs)[count]) {
-    DoPrepareSFields(defs, count);
-  }
-
-  void DoPrepareBasicBlocks(const BBDef* defs, size_t count) {
-    cu_.mir_graph->block_id_map_.clear();
-    cu_.mir_graph->block_list_.clear();
-    ASSERT_LT(3u, count);  // null, entry, exit and at least one bytecode block.
-    ASSERT_EQ(kNullBlock, defs[0].type);
-    ASSERT_EQ(kEntryBlock, defs[1].type);
-    ASSERT_EQ(kExitBlock, defs[2].type);
-    for (size_t i = 0u; i != count; ++i) {
-      const BBDef* def = &defs[i];
-      BasicBlock* bb = cu_.mir_graph->CreateNewBB(def->type);
-      if (def->num_successors <= 2) {
-        bb->successor_block_list_type = kNotUsed;
-        bb->fall_through = (def->num_successors >= 1) ? def->successors[0] : 0u;
-        bb->taken = (def->num_successors >= 2) ? def->successors[1] : 0u;
-      } else {
-        bb->successor_block_list_type = kPackedSwitch;
-        bb->fall_through = 0u;
-        bb->taken = 0u;
-        bb->successor_blocks.reserve(def->num_successors);
-        for (size_t j = 0u; j != def->num_successors; ++j) {
-          SuccessorBlockInfo* successor_block_info =
-              static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
-                                                               kArenaAllocSuccessors));
-          successor_block_info->block = j;
-          successor_block_info->key = 0u;  // Not used by class init check elimination.
-          bb->successor_blocks.push_back(successor_block_info);
-        }
-      }
-      bb->predecessors.assign(def->predecessors, def->predecessors + def->num_predecessors);
-      if (def->type == kDalvikByteCode || def->type == kEntryBlock || def->type == kExitBlock) {
-        bb->data_flow_info = static_cast<BasicBlockDataFlow*>(
-            cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo));
-        bb->data_flow_info->live_in_v = live_in_v_;
-      }
-    }
-    ASSERT_EQ(count, cu_.mir_graph->block_list_.size());
-    cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1];
-    ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type);
-    cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_[2];
-    ASSERT_EQ(kExitBlock, cu_.mir_graph->exit_block_->block_type);
-  }
-
-  template <size_t count>
-  void PrepareBasicBlocks(const BBDef (&defs)[count]) {
-    DoPrepareBasicBlocks(defs, count);
-  }
-
-  void DoPrepareMIRs(const MIRDef* defs, size_t count) {
-    mir_count_ = count;
-    mirs_ = cu_.arena.AllocArray<MIR>(count, kArenaAllocMIR);
-    ssa_reps_.resize(count);
-    for (size_t i = 0u; i != count; ++i) {
-      const MIRDef* def = &defs[i];
-      MIR* mir = &mirs_[i];
-      ASSERT_LT(def->bbid, cu_.mir_graph->block_list_.size());
-      BasicBlock* bb = cu_.mir_graph->block_list_[def->bbid];
-      bb->AppendMIR(mir);
-      mir->dalvikInsn.opcode = def->opcode;
-      mir->dalvikInsn.vB = static_cast<int32_t>(def->value);
-      mir->dalvikInsn.vB_wide = def->value;
-      if (IsInstructionIGetOrIPut(def->opcode)) {
-        ASSERT_LT(def->field_info, cu_.mir_graph->ifield_lowering_infos_.size());
-        mir->meta.ifield_lowering_info = def->field_info;
-        ASSERT_EQ(cu_.mir_graph->ifield_lowering_infos_[def->field_info].MemAccessType(),
-                  IGetOrIPutMemAccessType(def->opcode));
-      } else if (IsInstructionSGetOrSPut(def->opcode)) {
-        ASSERT_LT(def->field_info, cu_.mir_graph->sfield_lowering_infos_.size());
-        mir->meta.sfield_lowering_info = def->field_info;
-        ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->field_info].MemAccessType(),
-                  SGetOrSPutMemAccessType(def->opcode));
-      } else if (def->opcode == static_cast<Instruction::Code>(kMirOpPhi)) {
-        mir->meta.phi_incoming =
-            allocator_->AllocArray<BasicBlockId>(def->num_uses, kArenaAllocDFInfo);
-        ASSERT_EQ(def->num_uses, bb->predecessors.size());
-        std::copy(bb->predecessors.begin(), bb->predecessors.end(), mir->meta.phi_incoming);
-      }
-      mir->ssa_rep = &ssa_reps_[i];
-      mir->ssa_rep->num_uses = def->num_uses;
-      mir->ssa_rep->uses = const_cast<int32_t*>(def->uses);  // Not modified by LVN.
-      mir->ssa_rep->num_defs = def->num_defs;
-      mir->ssa_rep->defs = const_cast<int32_t*>(def->defs);  // Not modified by LVN.
-      mir->dalvikInsn.opcode = def->opcode;
-      mir->offset = i;  // LVN uses offset only for debug output
-      mir->optimization_flags = 0u;
-    }
-    DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>(
-        cu_.arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc));
-    code_item->insns_size_in_code_units_ = 2u * count;
-    cu_.mir_graph->current_code_item_ = code_item;
-  }
-
-  template <size_t count>
-  void PrepareMIRs(const MIRDef (&defs)[count]) {
-    DoPrepareMIRs(defs, count);
-  }
-
-  void DoPrepareVregToSsaMapExit(BasicBlockId bb_id, const int32_t* map, size_t count) {
-    BasicBlock* bb = cu_.mir_graph->GetBasicBlock(bb_id);
-    ASSERT_TRUE(bb != nullptr);
-    ASSERT_TRUE(bb->data_flow_info != nullptr);
-    bb->data_flow_info->vreg_to_ssa_map_exit =
-        cu_.arena.AllocArray<int32_t>(count, kArenaAllocDFInfo);
-    std::copy_n(map, count, bb->data_flow_info->vreg_to_ssa_map_exit);
-  }
-
-  template <size_t count>
-  void PrepareVregToSsaMapExit(BasicBlockId bb_id, const int32_t (&map)[count]) {
-    DoPrepareVregToSsaMapExit(bb_id, map, count);
-  }
-
-  template <size_t count>
-  void MarkAsWideSRegs(const int32_t (&sregs)[count]) {
-    for (int32_t sreg : sregs) {
-      cu_.mir_graph->reg_location_[sreg].wide = true;
-      cu_.mir_graph->reg_location_[sreg + 1].wide = true;
-      cu_.mir_graph->reg_location_[sreg + 1].high_word = true;
-    }
-  }
-
-  void PerformGVN() {
-    DoPerformGVN<LoopRepeatingTopologicalSortIterator>();
-  }
-
-  void PerformPreOrderDfsGVN() {
-    DoPerformGVN<RepeatingPreOrderDfsIterator>();
-  }
-
-  template <typename IteratorType>
-  void DoPerformGVN() {
-    cu_.mir_graph->SSATransformationStart();
-    cu_.mir_graph->ComputeDFSOrders();
-    cu_.mir_graph->ComputeDominators();
-    cu_.mir_graph->ComputeTopologicalSortOrder();
-    cu_.mir_graph->SSATransformationEnd();
-    cu_.mir_graph->temp_.gvn.ifield_ids =  GlobalValueNumbering::PrepareGvnFieldIds(
-        allocator_.get(), cu_.mir_graph->ifield_lowering_infos_);
-    cu_.mir_graph->temp_.gvn.sfield_ids =  GlobalValueNumbering::PrepareGvnFieldIds(
-        allocator_.get(), cu_.mir_graph->sfield_lowering_infos_);
-    ASSERT_TRUE(gvn_ == nullptr);
-    gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(),
-                                                           GlobalValueNumbering::kModeGvn));
-    value_names_.resize(mir_count_, 0xffffu);
-    IteratorType iterator(cu_.mir_graph.get());
-    bool change = false;
-    for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
-      LocalValueNumbering* lvn = gvn_->PrepareBasicBlock(bb);
-      if (lvn != nullptr) {
-        for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-          value_names_[mir - mirs_] = lvn->GetValueNumber(mir);
-        }
-      }
-      change = (lvn != nullptr) && gvn_->FinishBasicBlock(bb);
-      ASSERT_TRUE(gvn_->Good());
-    }
-  }
-
-  void PerformGVNCodeModifications() {
-    ASSERT_TRUE(gvn_ != nullptr);
-    ASSERT_TRUE(gvn_->Good());
-    gvn_->StartPostProcessing();
-    TopologicalSortIterator iterator(cu_.mir_graph.get());
-    for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
-      LocalValueNumbering* lvn = gvn_->PrepareBasicBlock(bb);
-      if (lvn != nullptr) {
-        for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-          uint16_t value_name = lvn->GetValueNumber(mir);
-          ASSERT_EQ(value_name, value_names_[mir - mirs_]);
-        }
-      }
-      bool change = (lvn != nullptr) && gvn_->FinishBasicBlock(bb);
-      ASSERT_FALSE(change);
-      ASSERT_TRUE(gvn_->Good());
-    }
-  }
-
-  GlobalValueNumberingTest()
-      : pool_(),
-        cu_(&pool_, kRuntimeISA, nullptr, nullptr),
-        mir_count_(0u),
-        mirs_(nullptr),
-        ssa_reps_(),
-        allocator_(),
-        gvn_(),
-        value_names_(),
-        live_in_v_(new (&cu_.arena) ArenaBitVector(&cu_.arena, kMaxSsaRegs, false, kBitMapMisc)) {
-    cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
-    cu_.access_flags = kAccStatic;  // Don't let "this" interfere with this test.
-    allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
-    // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that
-    // 0 constants are integral, not references, and the values are all narrow.
-    // Nothing else is used by LVN/GVN. Tests can override the default values as needed.
-    cu_.mir_graph->reg_location_ =
-        cu_.arena.AllocArray<RegLocation>(kMaxSsaRegs, kArenaAllocRegAlloc);
-    cu_.mir_graph->num_ssa_regs_ = kMaxSsaRegs;
-    // Bind all possible sregs to live vregs for test purposes.
-    live_in_v_->SetInitialBits(kMaxSsaRegs);
-    cu_.mir_graph->ssa_base_vregs_.reserve(kMaxSsaRegs);
-    cu_.mir_graph->ssa_subscripts_.reserve(kMaxSsaRegs);
-    for (unsigned int i = 0; i < kMaxSsaRegs; i++) {
-      cu_.mir_graph->ssa_base_vregs_.push_back(i);
-      cu_.mir_graph->ssa_subscripts_.push_back(0);
-    }
-    // Set shorty for a void-returning method without arguments.
-    cu_.shorty = "V";
-  }
-
-  static constexpr size_t kMaxSsaRegs = 16384u;
-
-  ArenaPool pool_;
-  CompilationUnit cu_;
-  size_t mir_count_;
-  MIR* mirs_;
-  std::vector<SSARepresentation> ssa_reps_;
-  std::unique_ptr<ScopedArenaAllocator> allocator_;
-  std::unique_ptr<GlobalValueNumbering> gvn_;
-  std::vector<uint16_t> value_names_;
-  ArenaBitVector* live_in_v_;
-};
-
-constexpr uint16_t GlobalValueNumberingTest::kNoValue;
-
-class GlobalValueNumberingTestDiamond : public GlobalValueNumberingTest {
- public:
-  GlobalValueNumberingTestDiamond();
-
- private:
-  static const BBDef kDiamondBbs[];
-};
-
-const GlobalValueNumberingTest::BBDef GlobalValueNumberingTestDiamond::kDiamondBbs[] = {
-    DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-    DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-    DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)),  // Block #3, top of the diamond.
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // Block #4, left side.
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // Block #5, right side.
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(4, 5)),  // Block #6, bottom.
-};
-
-GlobalValueNumberingTestDiamond::GlobalValueNumberingTestDiamond()
-    : GlobalValueNumberingTest() {
-  PrepareBasicBlocks(kDiamondBbs);
-}
-
-class GlobalValueNumberingTestLoop : public GlobalValueNumberingTest {
- public:
-  GlobalValueNumberingTestLoop();
-
- private:
-  static const BBDef kLoopBbs[];
-};
-
-const GlobalValueNumberingTest::BBDef GlobalValueNumberingTestLoop::kLoopBbs[] = {
-    DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-    DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-    DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED2(3, 4)),  // "taken" loops to self.
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
-};
-
-GlobalValueNumberingTestLoop::GlobalValueNumberingTestLoop()
-    : GlobalValueNumberingTest() {
-  PrepareBasicBlocks(kLoopBbs);
-}
-
-class GlobalValueNumberingTestCatch : public GlobalValueNumberingTest {
- public:
-  GlobalValueNumberingTestCatch();
-
- private:
-  static const BBDef kCatchBbs[];
-};
-
-const GlobalValueNumberingTest::BBDef GlobalValueNumberingTestCatch::kCatchBbs[] = {
-    DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-    DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-    DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),     // The top.
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // The throwing insn.
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // Catch handler.
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(4, 5)),  // The merged block.
-};
-
-GlobalValueNumberingTestCatch::GlobalValueNumberingTestCatch()
-    : GlobalValueNumberingTest() {
-  PrepareBasicBlocks(kCatchBbs);
-  // Mark catch handler.
-  BasicBlock* catch_handler = cu_.mir_graph->GetBasicBlock(5u);
-  catch_handler->catch_entry = true;
-  // Add successor block info to the check block.
-  BasicBlock* check_bb = cu_.mir_graph->GetBasicBlock(3u);
-  check_bb->successor_block_list_type = kCatch;
-  SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
-      (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessors));
-  successor_block_info->block = catch_handler->id;
-  check_bb->successor_blocks.push_back(successor_block_info);
-}
-
-class GlobalValueNumberingTestTwoConsecutiveLoops : public GlobalValueNumberingTest {
- public:
-  GlobalValueNumberingTestTwoConsecutiveLoops();
-
- private:
-  static const BBDef kTwoConsecutiveLoopsBbs[];
-};
-
-const GlobalValueNumberingTest::BBDef
-GlobalValueNumberingTestTwoConsecutiveLoops::kTwoConsecutiveLoopsBbs[] = {
-    DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-    DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-    DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(9)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 6), DEF_PRED2(3, 5)),  // "taken" skips over the loop.
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(4)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(7), DEF_PRED1(4)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC2(8, 9), DEF_PRED2(6, 8)),  // "taken" skips over the loop.
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(7), DEF_PRED1(7)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(7)),
-};
-
-GlobalValueNumberingTestTwoConsecutiveLoops::GlobalValueNumberingTestTwoConsecutiveLoops()
-    : GlobalValueNumberingTest() {
-  PrepareBasicBlocks(kTwoConsecutiveLoopsBbs);
-}
-
-class GlobalValueNumberingTestTwoNestedLoops : public GlobalValueNumberingTest {
- public:
-  GlobalValueNumberingTestTwoNestedLoops();
-
- private:
-  static const BBDef kTwoNestedLoopsBbs[];
-};
-
-const GlobalValueNumberingTest::BBDef
-GlobalValueNumberingTestTwoNestedLoops::kTwoNestedLoopsBbs[] = {
-    DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-    DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-    DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(8)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 8), DEF_PRED2(3, 7)),  // "taken" skips over the loop.
-    DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 7), DEF_PRED2(4, 6)),  // "taken" skips over the loop.
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(5)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(5)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
-};
-
-GlobalValueNumberingTestTwoNestedLoops::GlobalValueNumberingTestTwoNestedLoops()
-    : GlobalValueNumberingTest() {
-  PrepareBasicBlocks(kTwoNestedLoopsBbs);
-}
-
-TEST_F(GlobalValueNumberingTestDiamond, NonAliasingIFields) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-      { 2u, 1u, 2u, false, kDexMemAccessWord },
-      { 3u, 1u, 3u, false, kDexMemAccessWord },
-      { 4u, 1u, 4u, false, kDexMemAccessShort },
-      { 5u, 1u, 5u, false, kDexMemAccessChar },
-      { 6u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
-      { 7u, 1u, 7u, false, kDexMemAccessWord },
-      { 8u, 0u, 0u, false, kDexMemAccessWord },    // Unresolved.
-      { 9u, 1u, 9u, false, kDexMemAccessWord },
-      { 10u, 1u, 10u, false, kDexMemAccessWord },
-      { 11u, 1u, 11u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 100u),
-      DEF_IGET(3, Instruction::IGET, 1u, 100u, 0u),
-      DEF_IGET(6, Instruction::IGET, 2u, 100u, 0u),   // Same as at the top.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 200u),
-      DEF_IGET(4, Instruction::IGET, 4u, 200u, 1u),
-      DEF_IGET(6, Instruction::IGET, 5u, 200u, 1u),   // Same as at the left side.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 300u),
-      DEF_IGET(3, Instruction::IGET, 7u, 300u, 2u),
-      DEF_CONST(5, Instruction::CONST, 8u, 1000),
-      DEF_IPUT(5, Instruction::IPUT, 8u, 300u, 2u),
-      DEF_IGET(6, Instruction::IGET, 10u, 300u, 2u),  // Differs from the top and the CONST.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 400u),
-      DEF_IGET(3, Instruction::IGET, 12u, 400u, 3u),
-      DEF_CONST(3, Instruction::CONST, 13u, 2000),
-      DEF_IPUT(4, Instruction::IPUT, 13u, 400u, 3u),
-      DEF_IPUT(5, Instruction::IPUT, 13u, 400u, 3u),
-      DEF_IGET(6, Instruction::IGET, 16u, 400u, 3u),  // Differs from the top, equals the CONST.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 500u),
-      DEF_IGET(3, Instruction::IGET_SHORT, 18u, 500u, 4u),
-      DEF_IGET(3, Instruction::IGET_CHAR, 19u, 500u, 5u),
-      DEF_IPUT(4, Instruction::IPUT_SHORT, 20u, 500u, 6u),  // Clobbers field #4, not #5.
-      DEF_IGET(6, Instruction::IGET_SHORT, 21u, 500u, 4u),  // Differs from the top.
-      DEF_IGET(6, Instruction::IGET_CHAR, 22u, 500u, 5u),   // Same as the top.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 600u),
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 601u),
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 602u),
-      DEF_IGET(3, Instruction::IGET, 26u, 600u, 7u),
-      DEF_IGET(3, Instruction::IGET, 27u, 601u, 7u),
-      DEF_IPUT(4, Instruction::IPUT, 28u, 602u, 8u),  // Doesn't clobber field #7 for other refs.
-      DEF_IGET(6, Instruction::IGET, 29u, 600u, 7u),  // Same as the top.
-      DEF_IGET(6, Instruction::IGET, 30u, 601u, 7u),  // Same as the top.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 700u),
-      DEF_CONST(4, Instruction::CONST, 32u, 3000),
-      DEF_IPUT(4, Instruction::IPUT, 32u, 700u, 9u),
-      DEF_IPUT(4, Instruction::IPUT, 32u, 700u, 10u),
-      DEF_CONST(5, Instruction::CONST, 35u, 3001),
-      DEF_IPUT(5, Instruction::IPUT, 35u, 700u, 9u),
-      DEF_IPUT(5, Instruction::IPUT, 35u, 700u, 10u),
-      DEF_IGET(6, Instruction::IGET, 38u, 700u, 9u),
-      DEF_IGET(6, Instruction::IGET, 39u, 700u, 10u),  // Same value as read from field #9.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 800u),
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 801u),
-      DEF_CONST(4, Instruction::CONST, 42u, 3000),
-      DEF_IPUT(4, Instruction::IPUT, 42u, 800u, 11u),
-      DEF_IPUT(4, Instruction::IPUT, 42u, 801u, 11u),
-      DEF_CONST(5, Instruction::CONST, 45u, 3001),
-      DEF_IPUT(5, Instruction::IPUT, 45u, 800u, 11u),
-      DEF_IPUT(5, Instruction::IPUT, 45u, 801u, 11u),
-      DEF_IGET(6, Instruction::IGET, 48u, 800u, 11u),
-      DEF_IGET(6, Instruction::IGET, 49u, 801u, 11u),  // Same value as read from ref 46u.
-
-      // Invoke doesn't interfere with non-aliasing refs. There's one test above where a reference
-      // escapes in the left BB (we let a reference escape if we use it to store to an unresolved
-      // field) and the INVOKE in the right BB shouldn't interfere with that either.
-      DEF_INVOKE1(5, Instruction::INVOKE_STATIC, 48u),
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[1], value_names_[2]);
-
-  EXPECT_EQ(value_names_[4], value_names_[5]);
-
-  EXPECT_NE(value_names_[7], value_names_[10]);
-  EXPECT_NE(value_names_[8], value_names_[10]);
-
-  EXPECT_NE(value_names_[12], value_names_[16]);
-  EXPECT_EQ(value_names_[13], value_names_[16]);
-
-  EXPECT_NE(value_names_[18], value_names_[21]);
-  EXPECT_EQ(value_names_[19], value_names_[22]);
-
-  EXPECT_EQ(value_names_[26], value_names_[29]);
-  EXPECT_EQ(value_names_[27], value_names_[30]);
-
-  EXPECT_EQ(value_names_[38], value_names_[39]);
-
-  EXPECT_EQ(value_names_[48], value_names_[49]);
-}
-
-TEST_F(GlobalValueNumberingTestDiamond, AliasingIFieldsSingleObject) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-      { 2u, 1u, 2u, false, kDexMemAccessWord },
-      { 3u, 1u, 3u, false, kDexMemAccessWord },
-      { 4u, 1u, 4u, false, kDexMemAccessShort },
-      { 5u, 1u, 5u, false, kDexMemAccessChar },
-      { 6u, 0u, 0u, false, kDexMemAccessShort },  // Unresolved.
-      { 7u, 1u, 7u, false, kDexMemAccessWord },
-      { 8u, 1u, 8u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
-      DEF_IGET(3, Instruction::IGET, 0u, 100u, 0u),
-      DEF_IGET(6, Instruction::IGET, 1u, 100u, 0u),   // Same as at the top.
-
-      DEF_IGET(4, Instruction::IGET, 2u, 100u, 1u),
-      DEF_IGET(6, Instruction::IGET, 3u, 100u, 1u),   // Same as at the left side.
-
-      DEF_IGET(3, Instruction::IGET, 4u, 100u, 2u),
-      DEF_CONST(5, Instruction::CONST, 5u, 1000),
-      DEF_IPUT(5, Instruction::IPUT, 5u, 100u, 2u),
-      DEF_IGET(6, Instruction::IGET, 7u, 100u, 2u),   // Differs from the top and the CONST.
-
-      DEF_IGET(3, Instruction::IGET, 8u, 100u, 3u),
-      DEF_CONST(3, Instruction::CONST, 9u, 2000),
-      DEF_IPUT(4, Instruction::IPUT, 9u, 100u, 3u),
-      DEF_IPUT(5, Instruction::IPUT, 9u, 100u, 3u),
-      DEF_IGET(6, Instruction::IGET, 12u, 100u, 3u),  // Differs from the top, equals the CONST.
-
-      DEF_IGET(3, Instruction::IGET_SHORT, 13u, 100u, 4u),
-      DEF_IGET(3, Instruction::IGET_CHAR, 14u, 100u, 5u),
-      DEF_IPUT(4, Instruction::IPUT_SHORT, 15u, 100u, 6u),  // Clobbers field #4, not #5.
-      DEF_IGET(6, Instruction::IGET_SHORT, 16u, 100u, 4u),  // Differs from the top.
-      DEF_IGET(6, Instruction::IGET_CHAR, 17u, 100u, 5u),   // Same as the top.
-
-      DEF_CONST(4, Instruction::CONST, 18u, 3000),
-      DEF_IPUT(4, Instruction::IPUT, 18u, 100u, 7u),
-      DEF_IPUT(4, Instruction::IPUT, 18u, 100u, 8u),
-      DEF_CONST(5, Instruction::CONST, 21u, 3001),
-      DEF_IPUT(5, Instruction::IPUT, 21u, 100u, 7u),
-      DEF_IPUT(5, Instruction::IPUT, 21u, 100u, 8u),
-      DEF_IGET(6, Instruction::IGET, 24u, 100u, 7u),
-      DEF_IGET(6, Instruction::IGET, 25u, 100u, 8u),  // Same value as read from field #7.
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-
-  EXPECT_EQ(value_names_[2], value_names_[3]);
-
-  EXPECT_NE(value_names_[4], value_names_[7]);
-  EXPECT_NE(value_names_[5], value_names_[7]);
-
-  EXPECT_NE(value_names_[8], value_names_[12]);
-  EXPECT_EQ(value_names_[9], value_names_[12]);
-
-  EXPECT_NE(value_names_[13], value_names_[16]);
-  EXPECT_EQ(value_names_[14], value_names_[17]);
-
-  EXPECT_EQ(value_names_[24], value_names_[25]);
-}
-
-TEST_F(GlobalValueNumberingTestDiamond, AliasingIFieldsTwoObjects) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-      { 2u, 1u, 2u, false, kDexMemAccessWord },
-      { 3u, 1u, 3u, false, kDexMemAccessWord },
-      { 4u, 1u, 4u, false, kDexMemAccessShort },
-      { 5u, 1u, 5u, false, kDexMemAccessChar },
-      { 6u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
-      { 7u, 1u, 7u, false, kDexMemAccessWord },
-      { 8u, 1u, 8u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
-      DEF_IGET(3, Instruction::IGET, 0u, 100u, 0u),
-      DEF_IPUT(4, Instruction::IPUT, 1u, 101u, 0u),   // May alias with the IGET at the top.
-      DEF_IGET(6, Instruction::IGET, 2u, 100u, 0u),   // Differs from the top.
-
-      DEF_IGET(3, Instruction::IGET, 3u, 100u, 1u),
-      DEF_IPUT(5, Instruction::IPUT, 3u, 101u, 1u),   // If aliasing, stores the same value.
-      DEF_IGET(6, Instruction::IGET, 5u, 100u, 1u),   // Same as the top.
-
-      DEF_IGET(3, Instruction::IGET, 6u, 100u, 2u),
-      DEF_CONST(5, Instruction::CONST, 7u, 1000),
-      DEF_IPUT(5, Instruction::IPUT, 7u, 101u, 2u),
-      DEF_IGET(6, Instruction::IGET, 9u, 100u, 2u),   // Differs from the top and the CONST.
-
-      DEF_IGET(3, Instruction::IGET, 10u, 100u, 3u),
-      DEF_CONST(3, Instruction::CONST, 11u, 2000),
-      DEF_IPUT(4, Instruction::IPUT, 11u, 101u, 3u),
-      DEF_IPUT(5, Instruction::IPUT, 11u, 101u, 3u),
-      DEF_IGET(6, Instruction::IGET, 14u, 100u, 3u),  // Differs from the top and the CONST.
-
-      DEF_IGET(3, Instruction::IGET_SHORT, 15u, 100u, 4u),
-      DEF_IGET(3, Instruction::IGET_CHAR, 16u, 100u, 5u),
-      DEF_IPUT(4, Instruction::IPUT_SHORT, 17u, 101u, 6u),  // Clobbers field #4, not #5.
-      DEF_IGET(6, Instruction::IGET_SHORT, 18u, 100u, 4u),  // Differs from the top.
-      DEF_IGET(6, Instruction::IGET_CHAR, 19u, 100u, 5u),   // Same as the top.
-
-      DEF_CONST(4, Instruction::CONST, 20u, 3000),
-      DEF_IPUT(4, Instruction::IPUT, 20u, 100u, 7u),
-      DEF_IPUT(4, Instruction::IPUT, 20u, 101u, 8u),
-      DEF_CONST(5, Instruction::CONST, 23u, 3001),
-      DEF_IPUT(5, Instruction::IPUT, 23u, 100u, 7u),
-      DEF_IPUT(5, Instruction::IPUT, 23u, 101u, 8u),
-      DEF_IGET(6, Instruction::IGET, 26u, 100u, 7u),
-      DEF_IGET(6, Instruction::IGET, 27u, 101u, 8u),  // Same value as read from field #7.
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_NE(value_names_[0], value_names_[2]);
-
-  EXPECT_EQ(value_names_[3], value_names_[5]);
-
-  EXPECT_NE(value_names_[6], value_names_[9]);
-  EXPECT_NE(value_names_[7], value_names_[9]);
-
-  EXPECT_NE(value_names_[10], value_names_[14]);
-  EXPECT_NE(value_names_[10], value_names_[14]);
-
-  EXPECT_NE(value_names_[15], value_names_[18]);
-  EXPECT_EQ(value_names_[16], value_names_[19]);
-
-  EXPECT_EQ(value_names_[26], value_names_[27]);
-}
-
-TEST_F(GlobalValueNumberingTestDiamond, SFields) {
-  static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-      { 2u, 1u, 2u, false, kDexMemAccessWord },
-      { 3u, 1u, 3u, false, kDexMemAccessWord },
-      { 4u, 1u, 4u, false, kDexMemAccessShort },
-      { 5u, 1u, 5u, false, kDexMemAccessChar },
-      { 6u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
-      { 7u, 1u, 7u, false, kDexMemAccessWord },
-      { 8u, 1u, 8u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
-      DEF_SGET(3, Instruction::SGET, 0u, 0u),
-      DEF_SGET(6, Instruction::SGET, 1u, 0u),         // Same as at the top.
-
-      DEF_SGET(4, Instruction::SGET, 2u, 1u),
-      DEF_SGET(6, Instruction::SGET, 3u, 1u),         // Same as at the left side.
-
-      DEF_SGET(3, Instruction::SGET, 4u, 2u),
-      DEF_CONST(5, Instruction::CONST, 5u, 100),
-      DEF_SPUT(5, Instruction::SPUT, 5u, 2u),
-      DEF_SGET(6, Instruction::SGET, 7u, 2u),         // Differs from the top and the CONST.
-
-      DEF_SGET(3, Instruction::SGET, 8u, 3u),
-      DEF_CONST(3, Instruction::CONST, 9u, 200),
-      DEF_SPUT(4, Instruction::SPUT, 9u, 3u),
-      DEF_SPUT(5, Instruction::SPUT, 9u, 3u),
-      DEF_SGET(6, Instruction::SGET, 12u, 3u),        // Differs from the top, equals the CONST.
-
-      DEF_SGET(3, Instruction::SGET_SHORT, 13u, 4u),
-      DEF_SGET(3, Instruction::SGET_CHAR, 14u, 5u),
-      DEF_SPUT(4, Instruction::SPUT_SHORT, 15u, 6u),  // Clobbers field #4, not #5.
-      DEF_SGET(6, Instruction::SGET_SHORT, 16u, 4u),  // Differs from the top.
-      DEF_SGET(6, Instruction::SGET_CHAR, 17u, 5u),   // Same as the top.
-
-      DEF_CONST(4, Instruction::CONST, 18u, 300),
-      DEF_SPUT(4, Instruction::SPUT, 18u, 7u),
-      DEF_SPUT(4, Instruction::SPUT, 18u, 8u),
-      DEF_CONST(5, Instruction::CONST, 21u, 301),
-      DEF_SPUT(5, Instruction::SPUT, 21u, 7u),
-      DEF_SPUT(5, Instruction::SPUT, 21u, 8u),
-      DEF_SGET(6, Instruction::SGET, 24u, 7u),
-      DEF_SGET(6, Instruction::SGET, 25u, 8u),        // Same value as read from field #7.
-  };
-
-  PrepareSFields(sfields);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-
-  EXPECT_EQ(value_names_[2], value_names_[3]);
-
-  EXPECT_NE(value_names_[4], value_names_[7]);
-  EXPECT_NE(value_names_[5], value_names_[7]);
-
-  EXPECT_NE(value_names_[8], value_names_[12]);
-  EXPECT_EQ(value_names_[9], value_names_[12]);
-
-  EXPECT_NE(value_names_[13], value_names_[16]);
-  EXPECT_EQ(value_names_[14], value_names_[17]);
-
-  EXPECT_EQ(value_names_[24], value_names_[25]);
-}
-
-TEST_F(GlobalValueNumberingTestDiamond, NonAliasingArrays) {
-  static const MIRDef mirs[] = {
-      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
-      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 100u),
-      DEF_AGET(3, Instruction::AGET, 1u, 100u, 101u),
-      DEF_AGET(6, Instruction::AGET, 2u, 100u, 101u),   // Same as at the top.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 200u),
-      DEF_IGET(4, Instruction::AGET, 4u, 200u, 201u),
-      DEF_IGET(6, Instruction::AGET, 5u, 200u, 201u),   // Same as at the left side.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 300u),
-      DEF_AGET(3, Instruction::AGET, 7u, 300u, 301u),
-      DEF_CONST(5, Instruction::CONST, 8u, 1000),
-      DEF_APUT(5, Instruction::APUT, 8u, 300u, 301u),
-      DEF_AGET(6, Instruction::AGET, 10u, 300u, 301u),  // Differs from the top and the CONST.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 400u),
-      DEF_AGET(3, Instruction::AGET, 12u, 400u, 401u),
-      DEF_CONST(3, Instruction::CONST, 13u, 2000),
-      DEF_APUT(4, Instruction::APUT, 13u, 400u, 401u),
-      DEF_APUT(5, Instruction::APUT, 13u, 400u, 401u),
-      DEF_AGET(6, Instruction::AGET, 16u, 400u, 401u),  // Differs from the top, equals the CONST.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 500u),
-      DEF_AGET(3, Instruction::AGET, 18u, 500u, 501u),
-      DEF_APUT(4, Instruction::APUT, 19u, 500u, 502u),  // Clobbers value at index 501u.
-      DEF_AGET(6, Instruction::AGET, 20u, 500u, 501u),  // Differs from the top.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 600u),
-      DEF_CONST(4, Instruction::CONST, 22u, 3000),
-      DEF_APUT(4, Instruction::APUT, 22u, 600u, 601u),
-      DEF_APUT(4, Instruction::APUT, 22u, 600u, 602u),
-      DEF_CONST(5, Instruction::CONST, 25u, 3001),
-      DEF_APUT(5, Instruction::APUT, 25u, 600u, 601u),
-      DEF_APUT(5, Instruction::APUT, 25u, 600u, 602u),
-      DEF_AGET(6, Instruction::AGET, 28u, 600u, 601u),
-      DEF_AGET(6, Instruction::AGET, 29u, 600u, 602u),  // Same value as read from index 601u.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 700u),
-      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 701u),
-      DEF_AGET(3, Instruction::AGET, 32u, 700u, 702u),
-      DEF_APUT(4, Instruction::APUT, 33u, 701u, 702u),  // Doesn't interfere with unrelated array.
-      DEF_AGET(6, Instruction::AGET, 34u, 700u, 702u),  // Same value as at the top.
-  };
-
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[1], value_names_[2]);
-
-  EXPECT_EQ(value_names_[4], value_names_[5]);
-
-  EXPECT_NE(value_names_[7], value_names_[10]);
-  EXPECT_NE(value_names_[8], value_names_[10]);
-
-  EXPECT_NE(value_names_[12], value_names_[16]);
-  EXPECT_EQ(value_names_[13], value_names_[16]);
-
-  EXPECT_NE(value_names_[18], value_names_[20]);
-
-  EXPECT_NE(value_names_[28], value_names_[22]);
-  EXPECT_NE(value_names_[28], value_names_[25]);
-  EXPECT_EQ(value_names_[28], value_names_[29]);
-
-  EXPECT_EQ(value_names_[32], value_names_[34]);
-}
-
-TEST_F(GlobalValueNumberingTestDiamond, AliasingArrays) {
-  static const MIRDef mirs[] = {
-      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
-      // NOTE: We're also testing that these tests really do not interfere with each other.
-
-      DEF_AGET(3, Instruction::AGET_BOOLEAN, 0u, 100u, 101u),
-      DEF_AGET(6, Instruction::AGET_BOOLEAN, 1u, 100u, 101u),  // Same as at the top.
-
-      DEF_IGET(4, Instruction::AGET_OBJECT, 2u, 200u, 201u),
-      DEF_IGET(6, Instruction::AGET_OBJECT, 3u, 200u, 201u),  // Same as at the left side.
-
-      DEF_AGET(3, Instruction::AGET_WIDE, 4u, 300u, 301u),
-      DEF_CONST(5, Instruction::CONST_WIDE, 6u, 1000),
-      DEF_APUT(5, Instruction::APUT_WIDE, 6u, 300u, 301u),
-      DEF_AGET(6, Instruction::AGET_WIDE, 8u, 300u, 301u),  // Differs from the top and the CONST.
-
-      DEF_AGET(3, Instruction::AGET_SHORT, 10u, 400u, 401u),
-      DEF_CONST(3, Instruction::CONST, 11u, 2000),
-      DEF_APUT(4, Instruction::APUT_SHORT, 11u, 400u, 401u),
-      DEF_APUT(5, Instruction::APUT_SHORT, 11u, 400u, 401u),
-      DEF_AGET(6, Instruction::AGET_SHORT, 12u, 400u, 401u),  // Differs from the top, == CONST.
-
-      DEF_AGET(3, Instruction::AGET_CHAR, 13u, 500u, 501u),
-      DEF_APUT(4, Instruction::APUT_CHAR, 14u, 500u, 502u),  // Clobbers value at index 501u.
-      DEF_AGET(6, Instruction::AGET_CHAR, 15u, 500u, 501u),  // Differs from the top.
-
-      DEF_AGET(3, Instruction::AGET_BYTE, 16u, 600u, 602u),
-      DEF_APUT(4, Instruction::APUT_BYTE, 17u, 601u, 602u),  // Clobbers values in array 600u.
-      DEF_AGET(6, Instruction::AGET_BYTE, 18u, 600u, 602u),  // Differs from the top.
-
-      DEF_CONST(4, Instruction::CONST, 19u, 3000),
-      DEF_APUT(4, Instruction::APUT, 19u, 700u, 701u),
-      DEF_APUT(4, Instruction::APUT, 19u, 700u, 702u),
-      DEF_CONST(5, Instruction::CONST, 22u, 3001),
-      DEF_APUT(5, Instruction::APUT, 22u, 700u, 701u),
-      DEF_APUT(5, Instruction::APUT, 22u, 700u, 702u),
-      DEF_AGET(6, Instruction::AGET, 25u, 700u, 701u),
-      DEF_AGET(6, Instruction::AGET, 26u, 700u, 702u),  // Same value as read from index 601u.
-  };
-
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 4, 6, 8 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-
-  EXPECT_EQ(value_names_[2], value_names_[3]);
-
-  EXPECT_NE(value_names_[4], value_names_[7]);
-  EXPECT_NE(value_names_[5], value_names_[7]);
-
-  EXPECT_NE(value_names_[8], value_names_[12]);
-  EXPECT_EQ(value_names_[9], value_names_[12]);
-
-  EXPECT_NE(value_names_[13], value_names_[15]);
-
-  EXPECT_NE(value_names_[16], value_names_[18]);
-
-  EXPECT_NE(value_names_[25], value_names_[19]);
-  EXPECT_NE(value_names_[25], value_names_[22]);
-  EXPECT_EQ(value_names_[25], value_names_[26]);
-}
-
-TEST_F(GlobalValueNumberingTestDiamond, Phi) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3, Instruction::CONST, 0u, 1000),
-      DEF_CONST(4, Instruction::CONST, 1u, 2000),
-      DEF_CONST(5, Instruction::CONST, 2u, 3000),
-      DEF_MOVE(4, Instruction::MOVE, 3u, 0u),
-      DEF_MOVE(4, Instruction::MOVE, 4u, 1u),
-      DEF_MOVE(5, Instruction::MOVE, 5u, 0u),
-      DEF_MOVE(5, Instruction::MOVE, 6u, 2u),
-      DEF_PHI2(6, 7u, 3u, 5u),    // Same as CONST 0u (1000).
-      DEF_PHI2(6, 8u, 3u, 0u),    // Same as CONST 0u (1000).
-      DEF_PHI2(6, 9u, 0u, 5u),    // Same as CONST 0u (1000).
-      DEF_PHI2(6, 10u, 4u, 5u),   // Merge 1u (2000) and 0u (1000).
-      DEF_PHI2(6, 11u, 1u, 5u),   // Merge 1u (2000) and 0u (1000).
-      DEF_PHI2(6, 12u, 4u, 0u),   // Merge 1u (2000) and 0u (1000).
-      DEF_PHI2(6, 13u, 1u, 0u),   // Merge 1u (2000) and 0u (1000).
-      DEF_PHI2(6, 14u, 3u, 6u),   // Merge 0u (1000) and 2u (3000).
-      DEF_PHI2(6, 15u, 0u, 6u),   // Merge 0u (1000) and 2u (3000).
-      DEF_PHI2(6, 16u, 3u, 2u),   // Merge 0u (1000) and 2u (3000).
-      DEF_PHI2(6, 17u, 0u, 2u),   // Merge 0u (1000) and 2u (3000).
-      DEF_PHI2(6, 18u, 4u, 6u),   // Merge 1u (2000) and 2u (3000).
-      DEF_PHI2(6, 19u, 1u, 6u),   // Merge 1u (2000) and 2u (3000).
-      DEF_PHI2(6, 20u, 4u, 2u),   // Merge 1u (2000) and 2u (3000).
-      DEF_PHI2(6, 21u, 1u, 2u),   // Merge 1u (2000) and 2u (3000).
-  };
-
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[0], value_names_[7]);
-  EXPECT_EQ(value_names_[0], value_names_[8]);
-  EXPECT_EQ(value_names_[0], value_names_[9]);
-  EXPECT_NE(value_names_[10], value_names_[0]);
-  EXPECT_NE(value_names_[10], value_names_[1]);
-  EXPECT_NE(value_names_[10], value_names_[2]);
-  EXPECT_EQ(value_names_[10], value_names_[11]);
-  EXPECT_EQ(value_names_[10], value_names_[12]);
-  EXPECT_EQ(value_names_[10], value_names_[13]);
-  EXPECT_NE(value_names_[14], value_names_[0]);
-  EXPECT_NE(value_names_[14], value_names_[1]);
-  EXPECT_NE(value_names_[14], value_names_[2]);
-  EXPECT_NE(value_names_[14], value_names_[10]);
-  EXPECT_EQ(value_names_[14], value_names_[15]);
-  EXPECT_EQ(value_names_[14], value_names_[16]);
-  EXPECT_EQ(value_names_[14], value_names_[17]);
-  EXPECT_NE(value_names_[18], value_names_[0]);
-  EXPECT_NE(value_names_[18], value_names_[1]);
-  EXPECT_NE(value_names_[18], value_names_[2]);
-  EXPECT_NE(value_names_[18], value_names_[10]);
-  EXPECT_NE(value_names_[18], value_names_[14]);
-  EXPECT_EQ(value_names_[18], value_names_[19]);
-  EXPECT_EQ(value_names_[18], value_names_[20]);
-  EXPECT_EQ(value_names_[18], value_names_[21]);
-}
-
-TEST_F(GlobalValueNumberingTestDiamond, PhiWide) {
-  static const MIRDef mirs[] = {
-      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 0u, 1000),
-      DEF_CONST_WIDE(4, Instruction::CONST_WIDE, 2u, 2000),
-      DEF_CONST_WIDE(5, Instruction::CONST_WIDE, 4u, 3000),
-      DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 6u, 0u),
-      DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 8u, 2u),
-      DEF_MOVE_WIDE(5, Instruction::MOVE_WIDE, 10u, 0u),
-      DEF_MOVE_WIDE(5, Instruction::MOVE_WIDE, 12u, 4u),
-      DEF_PHI2(6, 14u, 6u, 10u),    // Same as CONST_WIDE 0u (1000).
-      DEF_PHI2(6, 15u, 7u, 11u),    // Same as CONST_WIDE 0u (1000), high word.
-      DEF_PHI2(6, 16u, 6u,  0u),    // Same as CONST_WIDE 0u (1000).
-      DEF_PHI2(6, 17u, 7u,  1u),    // Same as CONST_WIDE 0u (1000), high word.
-      DEF_PHI2(6, 18u, 0u, 10u),    // Same as CONST_WIDE 0u (1000).
-      DEF_PHI2(6, 19u, 1u, 11u),    // Same as CONST_WIDE 0u (1000), high word.
-      DEF_PHI2(6, 20u, 8u, 10u),    // Merge 2u (2000) and 0u (1000).
-      DEF_PHI2(6, 21u, 9u, 11u),    // Merge 2u (2000) and 0u (1000), high word.
-      DEF_PHI2(6, 22u, 2u, 10u),    // Merge 2u (2000) and 0u (1000).
-      DEF_PHI2(6, 23u, 3u, 11u),    // Merge 2u (2000) and 0u (1000), high word.
-      DEF_PHI2(6, 24u, 8u,  0u),    // Merge 2u (2000) and 0u (1000).
-      DEF_PHI2(6, 25u, 9u,  1u),    // Merge 2u (2000) and 0u (1000), high word.
-      DEF_PHI2(6, 26u, 2u,  0u),    // Merge 2u (2000) and 0u (1000).
-      DEF_PHI2(6, 27u, 5u,  1u),    // Merge 2u (2000) and 0u (1000), high word.
-      DEF_PHI2(6, 28u, 6u, 12u),    // Merge 0u (1000) and 4u (3000).
-      DEF_PHI2(6, 29u, 7u, 13u),    // Merge 0u (1000) and 4u (3000), high word.
-      DEF_PHI2(6, 30u, 0u, 12u),    // Merge 0u (1000) and 4u (3000).
-      DEF_PHI2(6, 31u, 1u, 13u),    // Merge 0u (1000) and 4u (3000), high word.
-      DEF_PHI2(6, 32u, 6u,  4u),    // Merge 0u (1000) and 4u (3000).
-      DEF_PHI2(6, 33u, 7u,  5u),    // Merge 0u (1000) and 4u (3000), high word.
-      DEF_PHI2(6, 34u, 0u,  4u),    // Merge 0u (1000) and 4u (3000).
-      DEF_PHI2(6, 35u, 1u,  5u),    // Merge 0u (1000) and 4u (3000), high word.
-      DEF_PHI2(6, 36u, 8u, 12u),    // Merge 2u (2000) and 4u (3000).
-      DEF_PHI2(6, 37u, 9u, 13u),    // Merge 2u (2000) and 4u (3000), high word.
-      DEF_PHI2(6, 38u, 2u, 12u),    // Merge 2u (2000) and 4u (3000).
-      DEF_PHI2(6, 39u, 3u, 13u),    // Merge 2u (2000) and 4u (3000), high word.
-      DEF_PHI2(6, 40u, 8u,  4u),    // Merge 2u (2000) and 4u (3000).
-      DEF_PHI2(6, 41u, 9u,  5u),    // Merge 2u (2000) and 4u (3000), high word.
-      DEF_PHI2(6, 42u, 2u,  4u),    // Merge 2u (2000) and 4u (3000).
-      DEF_PHI2(6, 43u, 3u,  5u),    // Merge 2u (2000) and 4u (3000), high word.
-  };
-
-  PrepareMIRs(mirs);
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    if ((mirs_[i].ssa_rep->defs[0] % 2) == 0) {
-      const int32_t wide_sregs[] = { mirs_[i].ssa_rep->defs[0] };
-      MarkAsWideSRegs(wide_sregs);
-    }
-  }
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[0], value_names_[7]);
-  EXPECT_EQ(value_names_[0], value_names_[9]);
-  EXPECT_EQ(value_names_[0], value_names_[11]);
-  EXPECT_NE(value_names_[13], value_names_[0]);
-  EXPECT_NE(value_names_[13], value_names_[1]);
-  EXPECT_NE(value_names_[13], value_names_[2]);
-  EXPECT_EQ(value_names_[13], value_names_[15]);
-  EXPECT_EQ(value_names_[13], value_names_[17]);
-  EXPECT_EQ(value_names_[13], value_names_[19]);
-  EXPECT_NE(value_names_[21], value_names_[0]);
-  EXPECT_NE(value_names_[21], value_names_[1]);
-  EXPECT_NE(value_names_[21], value_names_[2]);
-  EXPECT_NE(value_names_[21], value_names_[13]);
-  EXPECT_EQ(value_names_[21], value_names_[23]);
-  EXPECT_EQ(value_names_[21], value_names_[25]);
-  EXPECT_EQ(value_names_[21], value_names_[27]);
-  EXPECT_NE(value_names_[29], value_names_[0]);
-  EXPECT_NE(value_names_[29], value_names_[1]);
-  EXPECT_NE(value_names_[29], value_names_[2]);
-  EXPECT_NE(value_names_[29], value_names_[13]);
-  EXPECT_NE(value_names_[29], value_names_[21]);
-  EXPECT_EQ(value_names_[29], value_names_[31]);
-  EXPECT_EQ(value_names_[29], value_names_[33]);
-  EXPECT_EQ(value_names_[29], value_names_[35]);
-  // High words should get kNoValue.
-  EXPECT_EQ(value_names_[8], kNoValue);
-  EXPECT_EQ(value_names_[10], kNoValue);
-  EXPECT_EQ(value_names_[12], kNoValue);
-  EXPECT_EQ(value_names_[14], kNoValue);
-  EXPECT_EQ(value_names_[16], kNoValue);
-  EXPECT_EQ(value_names_[18], kNoValue);
-  EXPECT_EQ(value_names_[20], kNoValue);
-  EXPECT_EQ(value_names_[22], kNoValue);
-  EXPECT_EQ(value_names_[24], kNoValue);
-  EXPECT_EQ(value_names_[26], kNoValue);
-  EXPECT_EQ(value_names_[28], kNoValue);
-  EXPECT_EQ(value_names_[30], kNoValue);
-  EXPECT_EQ(value_names_[32], kNoValue);
-  EXPECT_EQ(value_names_[34], kNoValue);
-  EXPECT_EQ(value_names_[36], kNoValue);
-}
-
-TEST_F(GlobalValueNumberingTestLoop, NonAliasingIFields) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-      { 2u, 1u, 2u, false, kDexMemAccessWord },
-      { 3u, 1u, 3u, false, kDexMemAccessWord },
-      { 4u, 1u, 4u, false, kDexMemAccessWord },
-      { 5u, 1u, 5u, false, kDexMemAccessShort },
-      { 6u, 1u, 6u, false, kDexMemAccessChar },
-      { 7u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
-      { 8u, 1u, 8u, false, kDexMemAccessWord },
-      { 9u, 0u, 0u, false, kDexMemAccessWord },    // Unresolved.
-      { 10u, 1u, 10u, false, kDexMemAccessWord },
-      { 11u, 1u, 11u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 100u),
-      DEF_IGET(3, Instruction::IGET, 1u, 100u, 0u),
-      DEF_IGET(4, Instruction::IGET, 2u, 100u, 0u),   // Same as at the top.
-      DEF_IGET(5, Instruction::IGET, 3u, 100u, 0u),   // Same as at the top.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 200u),
-      DEF_IGET(3, Instruction::IGET, 5u, 200u, 1u),
-      DEF_IGET(4, Instruction::IGET, 6u, 200u, 1u),   // Differs from top...
-      DEF_IPUT(4, Instruction::IPUT, 7u, 200u, 1u),   // Because of this IPUT.
-      DEF_IGET(5, Instruction::IGET, 8u, 200u, 1u),   // Differs from top and the loop IGET.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 300u),
-      DEF_IGET(3, Instruction::IGET, 10u, 300u, 2u),
-      DEF_IPUT(4, Instruction::IPUT, 11u, 300u, 2u),  // Because of this IPUT...
-      DEF_IGET(4, Instruction::IGET, 12u, 300u, 2u),  // Differs from top.
-      DEF_IGET(5, Instruction::IGET, 13u, 300u, 2u),  // Differs from top but same as the loop IGET.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 400u),
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 401u),
-      DEF_CONST(3, Instruction::CONST, 16u, 3000),
-      DEF_IPUT(3, Instruction::IPUT, 16u, 400u, 3u),
-      DEF_IPUT(3, Instruction::IPUT, 16u, 400u, 4u),
-      DEF_IPUT(3, Instruction::IPUT, 16u, 401u, 3u),
-      DEF_IGET(4, Instruction::IGET, 20u, 400u, 3u),  // Differs from 16u and 23u.
-      DEF_IGET(4, Instruction::IGET, 21u, 400u, 4u),  // Same as 20u.
-      DEF_IGET(4, Instruction::IGET, 22u, 401u, 3u),  // Same as 20u.
-      DEF_CONST(4, Instruction::CONST, 23u, 4000),
-      DEF_IPUT(4, Instruction::IPUT, 23u, 400u, 3u),
-      DEF_IPUT(4, Instruction::IPUT, 23u, 400u, 4u),
-      DEF_IPUT(4, Instruction::IPUT, 23u, 401u, 3u),
-      DEF_IGET(5, Instruction::IGET, 27u, 400u, 3u),  // Differs from 16u and 20u...
-      DEF_IGET(5, Instruction::IGET, 28u, 400u, 4u),  // and same as the CONST 23u
-      DEF_IGET(5, Instruction::IGET, 29u, 400u, 4u),  // and same as the CONST 23u.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 500u),
-      DEF_IGET(3, Instruction::IGET_SHORT, 31u, 500u, 5u),
-      DEF_IGET(3, Instruction::IGET_CHAR, 32u, 500u, 6u),
-      DEF_IPUT(4, Instruction::IPUT_SHORT, 33u, 500u, 7u),  // Clobbers field #5, not #6.
-      DEF_IGET(5, Instruction::IGET_SHORT, 34u, 500u, 5u),  // Differs from the top.
-      DEF_IGET(5, Instruction::IGET_CHAR, 35u, 500u, 6u),   // Same as the top.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 600u),
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 601u),
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 602u),
-      DEF_IGET(3, Instruction::IGET, 39u, 600u, 8u),
-      DEF_IGET(3, Instruction::IGET, 40u, 601u, 8u),
-      DEF_IPUT(4, Instruction::IPUT, 41u, 602u, 9u),  // Doesn't clobber field #8 for other refs.
-      DEF_IGET(5, Instruction::IGET, 42u, 600u, 8u),  // Same as the top.
-      DEF_IGET(5, Instruction::IGET, 43u, 601u, 8u),  // Same as the top.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 700u),
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 701u),
-      DEF_CONST(3, Instruction::CONST, 46u, 3000),
-      DEF_IPUT(3, Instruction::IPUT, 46u, 700u, 10u),
-      DEF_IPUT(3, Instruction::IPUT, 46u, 700u, 11u),
-      DEF_IPUT(3, Instruction::IPUT, 46u, 701u, 10u),
-      DEF_IGET(4, Instruction::IGET, 50u, 700u, 10u),  // Differs from the CONSTs 46u and 53u.
-      DEF_IGET(4, Instruction::IGET, 51u, 700u, 11u),  // Same as 50u.
-      DEF_IGET(4, Instruction::IGET, 52u, 701u, 10u),  // Same as 50u.
-      DEF_CONST(4, Instruction::CONST, 53u, 3001),
-      DEF_IPUT(4, Instruction::IPUT, 53u, 700u, 10u),
-      DEF_IPUT(4, Instruction::IPUT, 53u, 700u, 11u),
-      DEF_IPUT(4, Instruction::IPUT, 53u, 701u, 10u),
-      DEF_IGET(5, Instruction::IGET, 57u, 700u, 10u),  // Same as the CONST 53u.
-      DEF_IGET(5, Instruction::IGET, 58u, 700u, 11u),  // Same as the CONST 53u.
-      DEF_IGET(5, Instruction::IGET, 59u, 701u, 10u),  // Same as the CONST 53u.
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[1], value_names_[2]);
-  EXPECT_EQ(value_names_[1], value_names_[3]);
-
-  EXPECT_NE(value_names_[5], value_names_[6]);
-  EXPECT_NE(value_names_[5], value_names_[7]);
-  EXPECT_NE(value_names_[6], value_names_[7]);
-
-  EXPECT_NE(value_names_[10], value_names_[12]);
-  EXPECT_EQ(value_names_[12], value_names_[13]);
-
-  EXPECT_NE(value_names_[20], value_names_[16]);
-  EXPECT_NE(value_names_[20], value_names_[23]);
-  EXPECT_EQ(value_names_[20], value_names_[21]);
-  EXPECT_EQ(value_names_[20], value_names_[22]);
-  EXPECT_NE(value_names_[27], value_names_[16]);
-  EXPECT_NE(value_names_[27], value_names_[20]);
-  EXPECT_EQ(value_names_[27], value_names_[28]);
-  EXPECT_EQ(value_names_[27], value_names_[29]);
-
-  EXPECT_NE(value_names_[31], value_names_[34]);
-  EXPECT_EQ(value_names_[32], value_names_[35]);
-
-  EXPECT_EQ(value_names_[39], value_names_[42]);
-  EXPECT_EQ(value_names_[40], value_names_[43]);
-
-  EXPECT_NE(value_names_[50], value_names_[46]);
-  EXPECT_NE(value_names_[50], value_names_[53]);
-  EXPECT_EQ(value_names_[50], value_names_[51]);
-  EXPECT_EQ(value_names_[50], value_names_[52]);
-  EXPECT_EQ(value_names_[57], value_names_[53]);
-  EXPECT_EQ(value_names_[58], value_names_[53]);
-  EXPECT_EQ(value_names_[59], value_names_[53]);
-}
-
-TEST_F(GlobalValueNumberingTestLoop, AliasingIFieldsSingleObject) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-      { 2u, 1u, 2u, false, kDexMemAccessWord },
-      { 3u, 1u, 3u, false, kDexMemAccessWord },
-      { 4u, 1u, 4u, false, kDexMemAccessWord },
-      { 5u, 1u, 5u, false, kDexMemAccessShort },
-      { 6u, 1u, 6u, false, kDexMemAccessChar },
-      { 7u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
-  };
-  static const MIRDef mirs[] = {
-      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
-      DEF_IGET(3, Instruction::IGET, 0u, 100u, 0u),
-      DEF_IGET(4, Instruction::IGET, 1u, 100u, 0u),   // Same as at the top.
-      DEF_IGET(5, Instruction::IGET, 2u, 100u, 0u),   // Same as at the top.
-
-      DEF_IGET(3, Instruction::IGET, 3u, 100u, 1u),
-      DEF_IGET(4, Instruction::IGET, 4u, 100u, 1u),   // Differs from top...
-      DEF_IPUT(4, Instruction::IPUT, 5u, 100u, 1u),   // Because of this IPUT.
-      DEF_IGET(5, Instruction::IGET, 6u, 100u, 1u),   // Differs from top and the loop IGET.
-
-      DEF_IGET(3, Instruction::IGET, 7u, 100u, 2u),
-      DEF_IPUT(4, Instruction::IPUT, 8u, 100u, 2u),   // Because of this IPUT...
-      DEF_IGET(4, Instruction::IGET, 9u, 100u, 2u),   // Differs from top.
-      DEF_IGET(5, Instruction::IGET, 10u, 100u, 2u),  // Differs from top but same as the loop IGET.
-
-      DEF_CONST(3, Instruction::CONST, 11u, 3000),
-      DEF_IPUT(3, Instruction::IPUT, 11u, 100u, 3u),
-      DEF_IPUT(3, Instruction::IPUT, 11u, 100u, 4u),
-      DEF_IGET(4, Instruction::IGET, 14u, 100u, 3u),  // Differs from 11u and 16u.
-      DEF_IGET(4, Instruction::IGET, 15u, 100u, 4u),  // Same as 14u.
-      DEF_CONST(4, Instruction::CONST, 16u, 4000),
-      DEF_IPUT(4, Instruction::IPUT, 16u, 100u, 3u),
-      DEF_IPUT(4, Instruction::IPUT, 16u, 100u, 4u),
-      DEF_IGET(5, Instruction::IGET, 19u, 100u, 3u),  // Differs from 11u and 14u...
-      DEF_IGET(5, Instruction::IGET, 20u, 100u, 4u),  // and same as the CONST 16u.
-
-      DEF_IGET(3, Instruction::IGET_SHORT, 21u, 100u, 5u),
-      DEF_IGET(3, Instruction::IGET_CHAR, 22u, 100u, 6u),
-      DEF_IPUT(4, Instruction::IPUT_SHORT, 23u, 100u, 7u),  // Clobbers field #5, not #6.
-      DEF_IGET(5, Instruction::IGET_SHORT, 24u, 100u, 5u),  // Differs from the top.
-      DEF_IGET(5, Instruction::IGET_CHAR, 25u, 100u, 6u),   // Same as the top.
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-  EXPECT_EQ(value_names_[0], value_names_[2]);
-
-  EXPECT_NE(value_names_[3], value_names_[4]);
-  EXPECT_NE(value_names_[3], value_names_[6]);
-  EXPECT_NE(value_names_[4], value_names_[6]);
-
-  EXPECT_NE(value_names_[7], value_names_[9]);
-  EXPECT_EQ(value_names_[9], value_names_[10]);
-
-  EXPECT_NE(value_names_[14], value_names_[11]);
-  EXPECT_NE(value_names_[14], value_names_[16]);
-  EXPECT_EQ(value_names_[14], value_names_[15]);
-  EXPECT_NE(value_names_[19], value_names_[11]);
-  EXPECT_NE(value_names_[19], value_names_[14]);
-  EXPECT_EQ(value_names_[19], value_names_[16]);
-  EXPECT_EQ(value_names_[19], value_names_[20]);
-
-  EXPECT_NE(value_names_[21], value_names_[24]);
-  EXPECT_EQ(value_names_[22], value_names_[25]);
-}
-
-TEST_F(GlobalValueNumberingTestLoop, AliasingIFieldsTwoObjects) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-      { 2u, 1u, 2u, false, kDexMemAccessWord },
-      { 3u, 1u, 3u, false, kDexMemAccessShort },
-      { 4u, 1u, 4u, false, kDexMemAccessChar },
-      { 5u, 0u, 0u, false, kDexMemAccessShort },   // Unresolved.
-      { 6u, 1u, 6u, false, kDexMemAccessWord },
-      { 7u, 1u, 7u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
-      DEF_IGET(3, Instruction::IGET, 0u, 100u, 0u),
-      DEF_IPUT(4, Instruction::IPUT, 1u, 101u, 0u),   // May alias with the IGET at the top.
-      DEF_IGET(5, Instruction::IGET, 2u, 100u, 0u),   // Differs from the top.
-
-      DEF_IGET(3, Instruction::IGET, 3u, 100u, 1u),
-      DEF_IPUT(4, Instruction::IPUT, 3u, 101u, 1u),   // If aliasing, stores the same value.
-      DEF_IGET(5, Instruction::IGET, 5u, 100u, 1u),   // Same as the top.
-
-      DEF_IGET(3, Instruction::IGET, 6u, 100u, 2u),
-      DEF_CONST(4, Instruction::CONST, 7u, 1000),
-      DEF_IPUT(4, Instruction::IPUT, 7u, 101u, 2u),
-      DEF_IGET(5, Instruction::IGET, 9u, 100u, 2u),   // Differs from the top and the CONST.
-
-      DEF_IGET(3, Instruction::IGET_SHORT, 10u, 100u, 3u),
-      DEF_IGET(3, Instruction::IGET_CHAR, 11u, 100u, 4u),
-      DEF_IPUT(4, Instruction::IPUT_SHORT, 12u, 101u, 5u),  // Clobbers field #3, not #4.
-      DEF_IGET(5, Instruction::IGET_SHORT, 13u, 100u, 3u),  // Differs from the top.
-      DEF_IGET(5, Instruction::IGET_CHAR, 14u, 100u, 4u),   // Same as the top.
-
-      DEF_CONST(3, Instruction::CONST, 15u, 3000),
-      DEF_IPUT(3, Instruction::IPUT, 15u, 100u, 6u),
-      DEF_IPUT(3, Instruction::IPUT, 15u, 100u, 7u),
-      DEF_IPUT(3, Instruction::IPUT, 15u, 101u, 6u),
-      DEF_IGET(4, Instruction::IGET, 19u, 100u, 6u),  // Differs from CONSTs 15u and 22u.
-      DEF_IGET(4, Instruction::IGET, 20u, 100u, 7u),  // Same value as 19u.
-      DEF_IGET(4, Instruction::IGET, 21u, 101u, 6u),  // Same value as read from field #7.
-      DEF_CONST(4, Instruction::CONST, 22u, 3001),
-      DEF_IPUT(4, Instruction::IPUT, 22u, 100u, 6u),
-      DEF_IPUT(4, Instruction::IPUT, 22u, 100u, 7u),
-      DEF_IPUT(4, Instruction::IPUT, 22u, 101u, 6u),
-      DEF_IGET(5, Instruction::IGET, 26u, 100u, 6u),  // Same as CONST 22u.
-      DEF_IGET(5, Instruction::IGET, 27u, 100u, 7u),  // Same as CONST 22u.
-      DEF_IGET(5, Instruction::IGET, 28u, 101u, 6u),  // Same as CONST 22u.
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_NE(value_names_[0], value_names_[2]);
-
-  EXPECT_EQ(value_names_[3], value_names_[5]);
-
-  EXPECT_NE(value_names_[6], value_names_[9]);
-  EXPECT_NE(value_names_[7], value_names_[9]);
-
-  EXPECT_NE(value_names_[10], value_names_[13]);
-  EXPECT_EQ(value_names_[11], value_names_[14]);
-
-  EXPECT_NE(value_names_[19], value_names_[15]);
-  EXPECT_NE(value_names_[19], value_names_[22]);
-  EXPECT_EQ(value_names_[22], value_names_[26]);
-  EXPECT_EQ(value_names_[22], value_names_[27]);
-  EXPECT_EQ(value_names_[22], value_names_[28]);
-}
-
-TEST_F(GlobalValueNumberingTestLoop, IFieldToBaseDependency) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      // For the IGET that loads sreg 3u using base 2u, the following IPUT creates a dependency
-      // from the field value to the base. However, this dependency does not result in an
-      // infinite loop since the merge of the field value for base 0u gets assigned a value name
-      // based only on the base 0u, not on the actual value, and breaks the dependency cycle.
-      DEF_IGET(3, Instruction::IGET, 0u, 100u, 0u),
-      DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
-      DEF_IGET(4, Instruction::IGET, 2u, 0u, 0u),
-      DEF_IGET(4, Instruction::IGET, 3u, 2u, 0u),
-      DEF_IPUT(4, Instruction::IPUT, 3u, 0u, 0u),
-      DEF_IGET(5, Instruction::IGET, 5u, 0u, 0u),
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_NE(value_names_[1], value_names_[2]);
-  EXPECT_EQ(value_names_[3], value_names_[5]);
-}
-
-TEST_F(GlobalValueNumberingTestLoop, SFields) {
-  static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-      { 2u, 1u, 2u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
-      DEF_SGET(3, Instruction::SGET, 0u, 0u),
-      DEF_SGET(4, Instruction::SGET, 1u, 0u),         // Same as at the top.
-      DEF_SGET(5, Instruction::SGET, 2u, 0u),         // Same as at the top.
-
-      DEF_SGET(3, Instruction::SGET, 3u, 1u),
-      DEF_SGET(4, Instruction::SGET, 4u, 1u),         // Differs from top...
-      DEF_SPUT(4, Instruction::SPUT, 5u, 1u),         // Because of this SPUT.
-      DEF_SGET(5, Instruction::SGET, 6u, 1u),         // Differs from top and the loop SGET.
-
-      DEF_SGET(3, Instruction::SGET, 7u, 2u),
-      DEF_SPUT(4, Instruction::SPUT, 8u, 2u),         // Because of this SPUT...
-      DEF_SGET(4, Instruction::SGET, 9u, 2u),         // Differs from top.
-      DEF_SGET(5, Instruction::SGET, 10u, 2u),        // Differs from top but same as the loop SGET.
-  };
-
-  PrepareSFields(sfields);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-  EXPECT_EQ(value_names_[0], value_names_[2]);
-
-  EXPECT_NE(value_names_[3], value_names_[4]);
-  EXPECT_NE(value_names_[3], value_names_[6]);
-  EXPECT_NE(value_names_[4], value_names_[5]);
-
-  EXPECT_NE(value_names_[7], value_names_[9]);
-  EXPECT_EQ(value_names_[9], value_names_[10]);
-}
-
-TEST_F(GlobalValueNumberingTestLoop, NonAliasingArrays) {
-  static const MIRDef mirs[] = {
-      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
-      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 100u),
-      DEF_AGET(3, Instruction::AGET, 1u, 100u, 101u),
-      DEF_AGET(4, Instruction::AGET, 2u, 100u, 101u),   // Same as at the top.
-      DEF_AGET(5, Instruction::AGET, 3u, 100u, 101u),   // Same as at the top.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 200u),
-      DEF_AGET(3, Instruction::AGET, 5u, 200u, 201u),
-      DEF_AGET(4, Instruction::AGET, 6u, 200u, 201u),  // Differs from top...
-      DEF_APUT(4, Instruction::APUT, 7u, 200u, 201u),  // Because of this IPUT.
-      DEF_AGET(5, Instruction::AGET, 8u, 200u, 201u),  // Differs from top and the loop AGET.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 300u),
-      DEF_AGET(3, Instruction::AGET, 10u, 300u, 301u),
-      DEF_APUT(4, Instruction::APUT, 11u, 300u, 301u),  // Because of this IPUT...
-      DEF_AGET(4, Instruction::AGET, 12u, 300u, 301u),  // Differs from top.
-      DEF_AGET(5, Instruction::AGET, 13u, 300u, 301u),  // Differs from top but == the loop AGET.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 400u),
-      DEF_CONST(3, Instruction::CONST, 15u, 3000),
-      DEF_APUT(3, Instruction::APUT, 15u, 400u, 401u),
-      DEF_APUT(3, Instruction::APUT, 15u, 400u, 402u),
-      DEF_AGET(4, Instruction::AGET, 18u, 400u, 401u),  // Differs from 15u and 20u.
-      DEF_AGET(4, Instruction::AGET, 19u, 400u, 402u),  // Same as 18u.
-      DEF_CONST(4, Instruction::CONST, 20u, 4000),
-      DEF_APUT(4, Instruction::APUT, 20u, 400u, 401u),
-      DEF_APUT(4, Instruction::APUT, 20u, 400u, 402u),
-      DEF_AGET(5, Instruction::AGET, 23u, 400u, 401u),  // Differs from 15u and 18u...
-      DEF_AGET(5, Instruction::AGET, 24u, 400u, 402u),  // and same as the CONST 20u.
-
-      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 500u),
-      DEF_AGET(3, Instruction::AGET, 26u, 500u, 501u),
-      DEF_APUT(4, Instruction::APUT, 27u, 500u, 502u),  // Clobbers element at index 501u.
-      DEF_AGET(5, Instruction::AGET, 28u, 500u, 501u),  // Differs from the top.
-  };
-
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[1], value_names_[2]);
-  EXPECT_EQ(value_names_[1], value_names_[3]);
-
-  EXPECT_NE(value_names_[5], value_names_[6]);
-  EXPECT_NE(value_names_[5], value_names_[8]);
-  EXPECT_NE(value_names_[6], value_names_[8]);
-
-  EXPECT_NE(value_names_[10], value_names_[12]);
-  EXPECT_EQ(value_names_[12], value_names_[13]);
-
-  EXPECT_NE(value_names_[18], value_names_[15]);
-  EXPECT_NE(value_names_[18], value_names_[20]);
-  EXPECT_EQ(value_names_[18], value_names_[19]);
-  EXPECT_NE(value_names_[23], value_names_[15]);
-  EXPECT_NE(value_names_[23], value_names_[18]);
-  EXPECT_EQ(value_names_[23], value_names_[20]);
-  EXPECT_EQ(value_names_[23], value_names_[24]);
-
-  EXPECT_NE(value_names_[26], value_names_[28]);
-}
-
-TEST_F(GlobalValueNumberingTestLoop, AliasingArrays) {
-  static const MIRDef mirs[] = {
-      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
-      DEF_AGET(3, Instruction::AGET_WIDE, 0u, 100u, 101u),
-      DEF_AGET(4, Instruction::AGET_WIDE, 2u, 100u, 101u),   // Same as at the top.
-      DEF_AGET(5, Instruction::AGET_WIDE, 4u, 100u, 101u),   // Same as at the top.
-
-      DEF_AGET(3, Instruction::AGET_BYTE, 6u, 200u, 201u),
-      DEF_AGET(4, Instruction::AGET_BYTE, 7u, 200u, 201u),  // Differs from top...
-      DEF_APUT(4, Instruction::APUT_BYTE, 8u, 200u, 201u),  // Because of this IPUT.
-      DEF_AGET(5, Instruction::AGET_BYTE, 9u, 200u, 201u),  // Differs from top and the loop AGET.
-
-      DEF_AGET(3, Instruction::AGET, 10u, 300u, 301u),
-      DEF_APUT(4, Instruction::APUT, 11u, 300u, 301u),  // Because of this IPUT...
-      DEF_AGET(4, Instruction::AGET, 12u, 300u, 301u),   // Differs from top.
-      DEF_AGET(5, Instruction::AGET, 13u, 300u, 301u),  // Differs from top but == the loop AGET.
-
-      DEF_CONST(3, Instruction::CONST, 14u, 3000),
-      DEF_APUT(3, Instruction::APUT_CHAR, 14u, 400u, 401u),
-      DEF_APUT(3, Instruction::APUT_CHAR, 14u, 400u, 402u),
-      DEF_AGET(4, Instruction::AGET_CHAR, 15u, 400u, 401u),  // Differs from 11u and 16u.
-      DEF_AGET(4, Instruction::AGET_CHAR, 16u, 400u, 402u),  // Same as 14u.
-      DEF_CONST(4, Instruction::CONST, 17u, 4000),
-      DEF_APUT(4, Instruction::APUT_CHAR, 17u, 400u, 401u),
-      DEF_APUT(4, Instruction::APUT_CHAR, 17u, 400u, 402u),
-      DEF_AGET(5, Instruction::AGET_CHAR, 19u, 400u, 401u),  // Differs from 11u and 14u...
-      DEF_AGET(5, Instruction::AGET_CHAR, 20u, 400u, 402u),  // and same as the CONST 16u.
-
-      DEF_AGET(3, Instruction::AGET_SHORT, 21u, 500u, 501u),
-      DEF_APUT(4, Instruction::APUT_SHORT, 22u, 500u, 502u),  // Clobbers element at index 501u.
-      DEF_AGET(5, Instruction::AGET_SHORT, 23u, 500u, 501u),  // Differs from the top.
-
-      DEF_AGET(3, Instruction::AGET_OBJECT, 24u, 600u, 601u),
-      DEF_APUT(4, Instruction::APUT_OBJECT, 25u, 601u, 602u),  // Clobbers 600u/601u.
-      DEF_AGET(5, Instruction::AGET_OBJECT, 26u, 600u, 601u),  // Differs from the top.
-
-      DEF_AGET(3, Instruction::AGET_BOOLEAN, 27u, 700u, 701u),
-      DEF_APUT(4, Instruction::APUT_BOOLEAN, 27u, 701u, 702u),  // Storing the same value.
-      DEF_AGET(5, Instruction::AGET_BOOLEAN, 29u, 700u, 701u),  // Differs from the top.
-  };
-
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 0, 2, 4 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-  EXPECT_EQ(value_names_[0], value_names_[2]);
-
-  EXPECT_NE(value_names_[3], value_names_[4]);
-  EXPECT_NE(value_names_[3], value_names_[6]);
-  EXPECT_NE(value_names_[4], value_names_[6]);
-
-  EXPECT_NE(value_names_[7], value_names_[9]);
-  EXPECT_EQ(value_names_[9], value_names_[10]);
-
-  EXPECT_NE(value_names_[14], value_names_[11]);
-  EXPECT_NE(value_names_[14], value_names_[16]);
-  EXPECT_EQ(value_names_[14], value_names_[15]);
-  EXPECT_NE(value_names_[19], value_names_[11]);
-  EXPECT_NE(value_names_[19], value_names_[14]);
-  EXPECT_EQ(value_names_[19], value_names_[16]);
-  EXPECT_EQ(value_names_[19], value_names_[20]);
-
-  EXPECT_NE(value_names_[21], value_names_[23]);
-
-  EXPECT_NE(value_names_[24], value_names_[26]);
-
-  EXPECT_EQ(value_names_[27], value_names_[29]);
-}
-
-TEST_F(GlobalValueNumberingTestLoop, Phi) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3, Instruction::CONST, 0u, 1000),
-      DEF_PHI2(4, 1u, 0u, 6u),                     // Merge CONST 0u (1000) with the same.
-      DEF_PHI2(4, 2u, 0u, 7u),                     // Merge CONST 0u (1000) with the Phi itself.
-      DEF_PHI2(4, 3u, 0u, 8u),                     // Merge CONST 0u (1000) and CONST 4u (2000).
-      DEF_PHI2(4, 4u, 0u, 9u),                     // Merge CONST 0u (1000) and Phi 3u.
-      DEF_CONST(4, Instruction::CONST, 5u, 2000),
-      DEF_MOVE(4, Instruction::MOVE, 6u, 0u),
-      DEF_MOVE(4, Instruction::MOVE, 7u, 2u),
-      DEF_MOVE(4, Instruction::MOVE, 8u, 5u),
-      DEF_MOVE(4, Instruction::MOVE, 9u, 3u),
-  };
-
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[1], value_names_[0]);
-  EXPECT_EQ(value_names_[2], value_names_[0]);
-
-  EXPECT_NE(value_names_[3], value_names_[0]);
-  EXPECT_NE(value_names_[3], value_names_[5]);
-  EXPECT_NE(value_names_[4], value_names_[0]);
-  EXPECT_NE(value_names_[4], value_names_[5]);
-  EXPECT_NE(value_names_[4], value_names_[3]);
-}
-
-TEST_F(GlobalValueNumberingTestLoop, IFieldLoopVariable) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3, Instruction::CONST, 0u, 0),
-      DEF_IPUT(3, Instruction::IPUT, 0u, 100u, 0u),
-      DEF_IGET(4, Instruction::IGET, 2u, 100u, 0u),
-      DEF_BINOP(4, Instruction::ADD_INT, 3u, 2u, 101u),
-      DEF_IPUT(4, Instruction::IPUT, 3u, 100u, 0u),
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_NE(value_names_[2], value_names_[0]);
-  EXPECT_NE(value_names_[3], value_names_[0]);
-  EXPECT_NE(value_names_[3], value_names_[2]);
-
-
-  // Set up vreg_to_ssa_map_exit for prologue and loop and set post-processing mode
-  // as needed for GetStartingVregValueNumber().
-  const int32_t prologue_vreg_to_ssa_map_exit[] = { 0 };
-  const int32_t loop_vreg_to_ssa_map_exit[] = { 3 };
-  PrepareVregToSsaMapExit(3, prologue_vreg_to_ssa_map_exit);
-  PrepareVregToSsaMapExit(4, loop_vreg_to_ssa_map_exit);
-  gvn_->StartPostProcessing();
-
-  // Check that vreg 0 has the same value number as the result of IGET 2u.
-  const LocalValueNumbering* loop = gvn_->GetLvn(4);
-  EXPECT_EQ(value_names_[2], loop->GetStartingVregValueNumber(0));
-}
-
-TEST_F(GlobalValueNumberingTestCatch, IFields) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 200u),
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 201u),
-      DEF_IGET(3, Instruction::IGET, 2u, 100u, 0u),
-      DEF_IGET(3, Instruction::IGET, 3u, 200u, 0u),
-      DEF_IGET(3, Instruction::IGET, 4u, 201u, 0u),
-      DEF_INVOKE1(4, Instruction::INVOKE_STATIC, 201u),     // Clobbering catch, 201u escapes.
-      DEF_IGET(4, Instruction::IGET, 6u, 100u, 0u),         // Differs from IGET 2u.
-      DEF_IPUT(4, Instruction::IPUT, 6u, 100u, 1u),
-      DEF_IPUT(4, Instruction::IPUT, 6u, 101u, 0u),
-      DEF_IPUT(4, Instruction::IPUT, 6u, 200u, 0u),
-      DEF_IGET(5, Instruction::IGET, 10u, 100u, 0u),        // Differs from IGETs 2u and 6u.
-      DEF_IGET(5, Instruction::IGET, 11u, 200u, 0u),        // Same as the top.
-      DEF_IGET(5, Instruction::IGET, 12u, 201u, 0u),        // Differs from the top, 201u escaped.
-      DEF_IPUT(5, Instruction::IPUT, 10u, 100u, 1u),
-      DEF_IPUT(5, Instruction::IPUT, 10u, 101u, 0u),
-      DEF_IPUT(5, Instruction::IPUT, 10u, 200u, 0u),
-      DEF_IGET(6, Instruction::IGET, 16u, 100u, 0u),        // Differs from IGETs 2u, 6u and 10u.
-      DEF_IGET(6, Instruction::IGET, 17u, 100u, 1u),        // Same as IGET 16u.
-      DEF_IGET(6, Instruction::IGET, 18u, 101u, 0u),        // Same as IGET 16u.
-      DEF_IGET(6, Instruction::IGET, 19u, 200u, 0u),        // Same as IGET 16u.
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_NE(value_names_[2], value_names_[6]);
-  EXPECT_NE(value_names_[2], value_names_[10]);
-  EXPECT_NE(value_names_[6], value_names_[10]);
-  EXPECT_EQ(value_names_[3], value_names_[11]);
-  EXPECT_NE(value_names_[4], value_names_[12]);
-
-  EXPECT_NE(value_names_[2], value_names_[16]);
-  EXPECT_NE(value_names_[6], value_names_[16]);
-  EXPECT_NE(value_names_[10], value_names_[16]);
-  EXPECT_EQ(value_names_[16], value_names_[17]);
-  EXPECT_EQ(value_names_[16], value_names_[18]);
-  EXPECT_EQ(value_names_[16], value_names_[19]);
-}
-
-TEST_F(GlobalValueNumberingTestCatch, SFields) {
-  static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_SGET(3, Instruction::SGET, 0u, 0u),
-      DEF_INVOKE1(4, Instruction::INVOKE_STATIC, 100u),     // Clobbering catch.
-      DEF_SGET(4, Instruction::SGET, 2u, 0u),               // Differs from SGET 0u.
-      DEF_SPUT(4, Instruction::SPUT, 2u, 1u),
-      DEF_SGET(5, Instruction::SGET, 4u, 0u),               // Differs from SGETs 0u and 2u.
-      DEF_SPUT(5, Instruction::SPUT, 4u, 1u),
-      DEF_SGET(6, Instruction::SGET, 6u, 0u),               // Differs from SGETs 0u, 2u and 4u.
-      DEF_SGET(6, Instruction::SGET, 7u, 1u),               // Same as field #1.
-  };
-
-  PrepareSFields(sfields);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_NE(value_names_[0], value_names_[2]);
-  EXPECT_NE(value_names_[0], value_names_[4]);
-  EXPECT_NE(value_names_[2], value_names_[4]);
-  EXPECT_NE(value_names_[0], value_names_[6]);
-  EXPECT_NE(value_names_[2], value_names_[6]);
-  EXPECT_NE(value_names_[4], value_names_[6]);
-  EXPECT_EQ(value_names_[6], value_names_[7]);
-}
-
-TEST_F(GlobalValueNumberingTestCatch, Arrays) {
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 200u),
-      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 201u),
-      DEF_AGET(3, Instruction::AGET, 2u, 100u, 101u),
-      DEF_AGET(3, Instruction::AGET, 3u, 200u, 202u),
-      DEF_AGET(3, Instruction::AGET, 4u, 200u, 203u),
-      DEF_AGET(3, Instruction::AGET, 5u, 201u, 202u),
-      DEF_AGET(3, Instruction::AGET, 6u, 201u, 203u),
-      DEF_INVOKE1(4, Instruction::INVOKE_STATIC, 201u),     // Clobbering catch, 201u escapes.
-      DEF_AGET(4, Instruction::AGET, 8u, 100u, 101u),       // Differs from AGET 2u.
-      DEF_APUT(4, Instruction::APUT, 8u, 100u, 102u),
-      DEF_APUT(4, Instruction::APUT, 8u, 200u, 202u),
-      DEF_APUT(4, Instruction::APUT, 8u, 200u, 203u),
-      DEF_APUT(4, Instruction::APUT, 8u, 201u, 202u),
-      DEF_APUT(4, Instruction::APUT, 8u, 201u, 203u),
-      DEF_AGET(5, Instruction::AGET, 14u, 100u, 101u),      // Differs from AGETs 2u and 8u.
-      DEF_AGET(5, Instruction::AGET, 15u, 200u, 202u),      // Same as AGET 3u.
-      DEF_AGET(5, Instruction::AGET, 16u, 200u, 203u),      // Same as AGET 4u.
-      DEF_AGET(5, Instruction::AGET, 17u, 201u, 202u),      // Differs from AGET 5u.
-      DEF_AGET(5, Instruction::AGET, 18u, 201u, 203u),      // Differs from AGET 6u.
-      DEF_APUT(5, Instruction::APUT, 14u, 100u, 102u),
-      DEF_APUT(5, Instruction::APUT, 14u, 200u, 202u),
-      DEF_APUT(5, Instruction::APUT, 14u, 200u, 203u),
-      DEF_APUT(5, Instruction::APUT, 14u, 201u, 202u),
-      DEF_APUT(5, Instruction::APUT, 14u, 201u, 203u),
-      DEF_AGET(6, Instruction::AGET, 24u, 100u, 101u),      // Differs from AGETs 2u, 8u and 14u.
-      DEF_AGET(6, Instruction::AGET, 25u, 100u, 101u),      // Same as AGET 24u.
-      DEF_AGET(6, Instruction::AGET, 26u, 200u, 202u),      // Same as AGET 24u.
-      DEF_AGET(6, Instruction::AGET, 27u, 200u, 203u),      // Same as AGET 24u.
-      DEF_AGET(6, Instruction::AGET, 28u, 201u, 202u),      // Same as AGET 24u.
-      DEF_AGET(6, Instruction::AGET, 29u, 201u, 203u),      // Same as AGET 24u.
-  };
-
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_NE(value_names_[2], value_names_[8]);
-  EXPECT_NE(value_names_[2], value_names_[14]);
-  EXPECT_NE(value_names_[8], value_names_[14]);
-  EXPECT_EQ(value_names_[3], value_names_[15]);
-  EXPECT_EQ(value_names_[4], value_names_[16]);
-  EXPECT_NE(value_names_[5], value_names_[17]);
-  EXPECT_NE(value_names_[6], value_names_[18]);
-  EXPECT_NE(value_names_[2], value_names_[24]);
-  EXPECT_NE(value_names_[8], value_names_[24]);
-  EXPECT_NE(value_names_[14], value_names_[24]);
-  EXPECT_EQ(value_names_[24], value_names_[25]);
-  EXPECT_EQ(value_names_[24], value_names_[26]);
-  EXPECT_EQ(value_names_[24], value_names_[27]);
-  EXPECT_EQ(value_names_[24], value_names_[28]);
-  EXPECT_EQ(value_names_[24], value_names_[29]);
-}
-
-TEST_F(GlobalValueNumberingTestCatch, Phi) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3, Instruction::CONST, 0u, 1000),
-      DEF_CONST(3, Instruction::CONST, 1u, 2000),
-      DEF_MOVE(3, Instruction::MOVE, 2u, 1u),
-      DEF_INVOKE1(4, Instruction::INVOKE_STATIC, 100u),     // Clobbering catch.
-      DEF_CONST(5, Instruction::CONST, 4u, 1000),
-      DEF_CONST(5, Instruction::CONST, 5u, 3000),
-      DEF_MOVE(5, Instruction::MOVE, 6u, 5u),
-      DEF_PHI2(6, 7u, 0u, 4u),
-      DEF_PHI2(6, 8u, 0u, 5u),
-      DEF_PHI2(6, 9u, 0u, 6u),
-      DEF_PHI2(6, 10u, 1u, 4u),
-      DEF_PHI2(6, 11u, 1u, 5u),
-      DEF_PHI2(6, 12u, 1u, 6u),
-      DEF_PHI2(6, 13u, 2u, 4u),
-      DEF_PHI2(6, 14u, 2u, 5u),
-      DEF_PHI2(6, 15u, 2u, 6u),
-  };
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  ASSERT_EQ(value_names_[4], value_names_[0]);  // Both CONSTs are 1000.
-  EXPECT_EQ(value_names_[7], value_names_[0]);  // Merging CONST 0u and CONST 4u, both 1000.
-  EXPECT_NE(value_names_[8], value_names_[0]);
-  EXPECT_NE(value_names_[8], value_names_[5]);
-  EXPECT_EQ(value_names_[9], value_names_[8]);
-  EXPECT_NE(value_names_[10], value_names_[1]);
-  EXPECT_NE(value_names_[10], value_names_[4]);
-  EXPECT_NE(value_names_[10], value_names_[8]);
-  EXPECT_NE(value_names_[11], value_names_[1]);
-  EXPECT_NE(value_names_[11], value_names_[5]);
-  EXPECT_NE(value_names_[11], value_names_[8]);
-  EXPECT_NE(value_names_[11], value_names_[10]);
-  EXPECT_EQ(value_names_[12], value_names_[11]);
-  EXPECT_EQ(value_names_[13], value_names_[10]);
-  EXPECT_EQ(value_names_[14], value_names_[11]);
-  EXPECT_EQ(value_names_[15], value_names_[11]);
-}
-
-TEST_F(GlobalValueNumberingTest, NullCheckIFields) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessObject },  // Object.
-      { 1u, 1u, 1u, false, kDexMemAccessObject },  // Object.
-  };
-  static const BBDef bbs[] = {
-      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)),  // 4 is fall-through, 5 is taken.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(3)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(3, 4)),
-  };
-  static const MIRDef mirs[] = {
-      DEF_IGET(3, Instruction::IGET_OBJECT, 0u, 100u, 0u),
-      DEF_IGET(3, Instruction::IGET_OBJECT, 1u, 100u, 1u),
-      DEF_IGET(3, Instruction::IGET_OBJECT, 2u, 101u, 0u),
-      DEF_IFZ(3, Instruction::IF_NEZ, 0u),            // Null-check for field #0 for taken.
-      DEF_UNIQUE_REF(4, Instruction::NEW_ARRAY, 4u),
-      DEF_IPUT(4, Instruction::IPUT_OBJECT, 4u, 100u, 0u),
-      DEF_IPUT(4, Instruction::IPUT_OBJECT, 4u, 100u, 1u),
-      DEF_IPUT(4, Instruction::IPUT_OBJECT, 4u, 101u, 0u),
-      DEF_IGET(5, Instruction::IGET_OBJECT, 8u, 100u, 0u),   // 100u/#0, IF_NEZ/NEW_ARRAY.
-      DEF_IGET(5, Instruction::IGET_OBJECT, 9u, 100u, 1u),   // 100u/#1, -/NEW_ARRAY.
-      DEF_IGET(5, Instruction::IGET_OBJECT, 10u, 101u, 0u),  // 101u/#0, -/NEW_ARRAY.
-      DEF_CONST(5, Instruction::CONST, 11u, 0),
-      DEF_AGET(5, Instruction::AGET, 12u, 8u, 11u),   // Null-check eliminated.
-      DEF_AGET(5, Instruction::AGET, 13u, 9u, 11u),   // Null-check kept.
-      DEF_AGET(5, Instruction::AGET, 14u, 10u, 11u),  // Null-check kept.
-  };
-  static const bool expected_ignore_null_check[] = {
-      false, true, false, false,                      // BB #3; unimportant.
-      false, true, true, true,                        // BB #4; unimportant.
-      true, true, true, false, true, false, false,    // BB #5; only the last three are important.
-  };
-
-  PrepareIFields(ifields);
-  PrepareBasicBlocks(bbs);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  PerformGVNCodeModifications();
-  ASSERT_EQ(arraysize(expected_ignore_null_check), mir_count_);
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(expected_ignore_null_check[i],
-              (mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) << i;
-  }
-}
-
-TEST_F(GlobalValueNumberingTest, NullCheckSFields) {
-  static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessObject },
-      { 1u, 1u, 1u, false, kDexMemAccessObject },
-  };
-  static const BBDef bbs[] = {
-      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)),  // 4 is fall-through, 5 is taken.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(3)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(3, 4)),
-  };
-  static const MIRDef mirs[] = {
-      DEF_SGET(3, Instruction::SGET_OBJECT, 0u, 0u),
-      DEF_SGET(3, Instruction::SGET_OBJECT, 1u, 1u),
-      DEF_IFZ(3, Instruction::IF_NEZ, 0u),            // Null-check for field #0 for taken.
-      DEF_UNIQUE_REF(4, Instruction::NEW_ARRAY, 3u),
-      DEF_SPUT(4, Instruction::SPUT_OBJECT, 3u, 0u),
-      DEF_SPUT(4, Instruction::SPUT_OBJECT, 3u, 1u),
-      DEF_SGET(5, Instruction::SGET_OBJECT, 6u, 0u),  // Field #0 is null-checked, IF_NEZ/NEW_ARRAY.
-      DEF_SGET(5, Instruction::SGET_OBJECT, 7u, 1u),  // Field #1 is not null-checked, -/NEW_ARRAY.
-      DEF_CONST(5, Instruction::CONST, 8u, 0),
-      DEF_AGET(5, Instruction::AGET, 9u, 6u, 8u),     // Null-check eliminated.
-      DEF_AGET(5, Instruction::AGET, 10u, 7u, 8u),    // Null-check kept.
-  };
-  static const bool expected_ignore_null_check[] = {
-      false, false, false, false, false, false, false, false, false, true, false
-  };
-
-  PrepareSFields(sfields);
-  PrepareBasicBlocks(bbs);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  PerformGVNCodeModifications();
-  ASSERT_EQ(arraysize(expected_ignore_null_check), mir_count_);
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(expected_ignore_null_check[i],
-              (mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) << i;
-  }
-}
-
-TEST_F(GlobalValueNumberingTest, NullCheckArrays) {
-  static const BBDef bbs[] = {
-      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)),  // 4 is fall-through, 5 is taken.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(3)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(3, 4)),
-  };
-  static const MIRDef mirs[] = {
-      DEF_AGET(3, Instruction::AGET_OBJECT, 0u, 100u, 102u),
-      DEF_AGET(3, Instruction::AGET_OBJECT, 1u, 100u, 103u),
-      DEF_AGET(3, Instruction::AGET_OBJECT, 2u, 101u, 102u),
-      DEF_IFZ(3, Instruction::IF_NEZ, 0u),            // Null-check for field #0 for taken.
-      DEF_UNIQUE_REF(4, Instruction::NEW_ARRAY, 4u),
-      DEF_APUT(4, Instruction::APUT_OBJECT, 4u, 100u, 102u),
-      DEF_APUT(4, Instruction::APUT_OBJECT, 4u, 100u, 103u),
-      DEF_APUT(4, Instruction::APUT_OBJECT, 4u, 101u, 102u),
-      DEF_AGET(5, Instruction::AGET_OBJECT, 8u, 100u, 102u),   // Null-checked, IF_NEZ/NEW_ARRAY.
-      DEF_AGET(5, Instruction::AGET_OBJECT, 9u, 100u, 103u),   // Not null-checked, -/NEW_ARRAY.
-      DEF_AGET(5, Instruction::AGET_OBJECT, 10u, 101u, 102u),  // Not null-checked, -/NEW_ARRAY.
-      DEF_CONST(5, Instruction::CONST, 11u, 0),
-      DEF_AGET(5, Instruction::AGET, 12u, 8u, 11u),    // Null-check eliminated.
-      DEF_AGET(5, Instruction::AGET, 13u, 9u, 11u),    // Null-check kept.
-      DEF_AGET(5, Instruction::AGET, 14u, 10u, 11u),   // Null-check kept.
-  };
-  static const bool expected_ignore_null_check[] = {
-      false, true, false, false,                      // BB #3; unimportant.
-      false, true, true, true,                        // BB #4; unimportant.
-      true, true, true, false, true, false, false,    // BB #5; only the last three are important.
-  };
-
-  PrepareBasicBlocks(bbs);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  PerformGVNCodeModifications();
-  ASSERT_EQ(arraysize(expected_ignore_null_check), mir_count_);
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(expected_ignore_null_check[i],
-              (mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) << i;
-  }
-}
-
-TEST_F(GlobalValueNumberingTestDiamond, RangeCheckArrays) {
-  // NOTE: We don't merge range checks when we merge value names for Phis or memory locations.
-  static const MIRDef mirs[] = {
-      DEF_AGET(4, Instruction::AGET, 0u, 100u, 101u),
-      DEF_AGET(5, Instruction::AGET, 1u, 100u, 101u),
-      DEF_APUT(6, Instruction::APUT, 2u, 100u, 101u),
-
-      DEF_AGET(4, Instruction::AGET, 3u, 200u, 201u),
-      DEF_AGET(5, Instruction::AGET, 4u, 200u, 202u),
-      DEF_APUT(6, Instruction::APUT, 5u, 200u, 201u),
-
-      DEF_AGET(4, Instruction::AGET, 6u, 300u, 302u),
-      DEF_AGET(5, Instruction::AGET, 7u, 301u, 302u),
-      DEF_APUT(6, Instruction::APUT, 8u, 300u, 302u),
-  };
-  static const bool expected_ignore_null_check[] = {
-      false, false, true,
-      false, false, true,
-      false, false, false,
-  };
-  static const bool expected_ignore_range_check[] = {
-      false, false, true,
-      false, false, false,
-      false, false, false,
-  };
-
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  PerformGVNCodeModifications();
-  ASSERT_EQ(arraysize(expected_ignore_null_check), mir_count_);
-  ASSERT_EQ(arraysize(expected_ignore_range_check), mir_count_);
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(expected_ignore_null_check[i],
-              (mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) << i;
-    EXPECT_EQ(expected_ignore_range_check[i],
-              (mirs_[i].optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0) << i;
-  }
-}
-
-TEST_F(GlobalValueNumberingTestDiamond, MergeSameValueInDifferentMemoryLocations) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-  };
-  static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 100u),
-      DEF_UNIQUE_REF(3, Instruction::NEW_ARRAY, 200u),
-      DEF_CONST(4, Instruction::CONST, 2u, 1000),
-      DEF_IPUT(4, Instruction::IPUT, 2u, 100u, 0u),
-      DEF_IPUT(4, Instruction::IPUT, 2u, 100u, 1u),
-      DEF_IPUT(4, Instruction::IPUT, 2u, 101u, 0u),
-      DEF_APUT(4, Instruction::APUT, 2u, 200u, 202u),
-      DEF_APUT(4, Instruction::APUT, 2u, 200u, 203u),
-      DEF_APUT(4, Instruction::APUT, 2u, 201u, 202u),
-      DEF_APUT(4, Instruction::APUT, 2u, 201u, 203u),
-      DEF_SPUT(4, Instruction::SPUT, 2u, 0u),
-      DEF_SPUT(4, Instruction::SPUT, 2u, 1u),
-      DEF_CONST(5, Instruction::CONST, 12u, 2000),
-      DEF_IPUT(5, Instruction::IPUT, 12u, 100u, 0u),
-      DEF_IPUT(5, Instruction::IPUT, 12u, 100u, 1u),
-      DEF_IPUT(5, Instruction::IPUT, 12u, 101u, 0u),
-      DEF_APUT(5, Instruction::APUT, 12u, 200u, 202u),
-      DEF_APUT(5, Instruction::APUT, 12u, 200u, 203u),
-      DEF_APUT(5, Instruction::APUT, 12u, 201u, 202u),
-      DEF_APUT(5, Instruction::APUT, 12u, 201u, 203u),
-      DEF_SPUT(5, Instruction::SPUT, 12u, 0u),
-      DEF_SPUT(5, Instruction::SPUT, 12u, 1u),
-      DEF_PHI2(6, 22u, 2u, 12u),
-      DEF_IGET(6, Instruction::IGET, 23u, 100u, 0u),
-      DEF_IGET(6, Instruction::IGET, 24u, 100u, 1u),
-      DEF_IGET(6, Instruction::IGET, 25u, 101u, 0u),
-      DEF_AGET(6, Instruction::AGET, 26u, 200u, 202u),
-      DEF_AGET(6, Instruction::AGET, 27u, 200u, 203u),
-      DEF_AGET(6, Instruction::AGET, 28u, 201u, 202u),
-      DEF_AGET(6, Instruction::AGET, 29u, 201u, 203u),
-      DEF_SGET(6, Instruction::SGET, 30u, 0u),
-      DEF_SGET(6, Instruction::SGET, 31u, 1u),
-  };
-  PrepareIFields(ifields);
-  PrepareSFields(sfields);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_NE(value_names_[2], value_names_[12]);
-  EXPECT_NE(value_names_[2], value_names_[22]);
-  EXPECT_NE(value_names_[12], value_names_[22]);
-  for (size_t i = 23; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(value_names_[22], value_names_[i]) << i;
-  }
-}
-
-TEST_F(GlobalValueNumberingTest, InfiniteLocationLoop) {
-  // This is a pattern that lead to an infinite loop during the GVN development. This has been
-  // fixed by rewriting the merging of AliasingValues to merge only locations read from or
-  // written to in each incoming LVN rather than merging all locations read from or written to
-  // in any incoming LVN. It also showed up only when the GVN used the DFS ordering instead of
-  // the "topological" ordering but, since the "topological" ordering is not really topological
-  // when there are cycles and an optimizing Java compiler (or a tool like proguard) could
-  // theoretically create any sort of flow graph, this could have shown up in real code.
-  //
-  // While we were merging all the locations:
-  // The first time the Phi evaluates to the same value name as CONST 0u.  After the second
-  // evaluation, when the BB #9 has been processed, the Phi receives its own value name.
-  // However, the index from the first evaluation keeps disappearing and reappearing in the
-  // LVN's aliasing_array_value_map_'s load_value_map for BBs #9, #4, #5, #7 because of the
-  // DFS ordering of LVN evaluation.
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessObject },
-  };
-  static const BBDef bbs[] = {
-      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(4)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 2), DEF_PRED2(3, 9)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 7), DEF_PRED1(4)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(9), DEF_PRED1(5)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(8, 9), DEF_PRED1(5)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(9), DEF_PRED1(7)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED3(6, 7, 8)),
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3, Instruction::CONST, 0u, 0),
-      DEF_PHI2(4, 1u, 0u, 10u),
-      DEF_INVOKE1(6, Instruction::INVOKE_STATIC, 100u),
-      DEF_IGET(6, Instruction::IGET_OBJECT, 3u, 100u, 0u),
-      DEF_CONST(6, Instruction::CONST, 4u, 1000),
-      DEF_APUT(6, Instruction::APUT, 4u, 3u, 1u),            // Index is Phi 1u.
-      DEF_INVOKE1(8, Instruction::INVOKE_STATIC, 100u),
-      DEF_IGET(8, Instruction::IGET_OBJECT, 7u, 100u, 0u),
-      DEF_CONST(8, Instruction::CONST, 8u, 2000),
-      DEF_APUT(8, Instruction::APUT, 9u, 7u, 1u),            // Index is Phi 1u.
-      DEF_CONST(9, Instruction::CONST, 10u, 3000),
-  };
-  PrepareIFields(ifields);
-  PrepareBasicBlocks(bbs);
-  PrepareMIRs(mirs);
-  // Using DFS order for this test. The GVN result should not depend on the used ordering
-  // once the GVN actually converges. But creating a test for this convergence issue with
-  // the topological ordering could be a very challenging task.
-  PerformPreOrderDfsGVN();
-}
-
-TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, IFieldAndPhi) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessObject },
-  };
-  static const MIRDef mirs[] = {
-      DEF_MOVE(3, Instruction::MOVE_OBJECT, 0u, 100u),
-      DEF_IPUT(3, Instruction::IPUT_OBJECT, 0u, 200u, 0u),
-      DEF_PHI2(4, 2u, 0u, 3u),
-      DEF_MOVE(5, Instruction::MOVE_OBJECT, 3u, 300u),
-      DEF_IPUT(5, Instruction::IPUT_OBJECT, 3u, 200u, 0u),
-      DEF_MOVE(6, Instruction::MOVE_OBJECT, 5u, 2u),
-      DEF_IGET(6, Instruction::IGET_OBJECT, 6u, 200u, 0u),
-      DEF_MOVE(7, Instruction::MOVE_OBJECT, 7u, 5u),
-      DEF_IGET(7, Instruction::IGET_OBJECT, 8u, 200u, 0u),
-      DEF_MOVE(8, Instruction::MOVE_OBJECT, 9u, 5u),
-      DEF_IGET(8, Instruction::IGET_OBJECT, 10u, 200u, 0u),
-      DEF_MOVE(9, Instruction::MOVE_OBJECT, 11u, 5u),
-      DEF_IGET(9, Instruction::IGET_OBJECT, 12u, 200u, 0u),
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_NE(value_names_[0], value_names_[3]);
-  EXPECT_NE(value_names_[0], value_names_[2]);
-  EXPECT_NE(value_names_[3], value_names_[2]);
-  EXPECT_EQ(value_names_[2], value_names_[5]);
-  EXPECT_EQ(value_names_[5], value_names_[6]);
-  EXPECT_EQ(value_names_[5], value_names_[7]);
-  EXPECT_EQ(value_names_[5], value_names_[8]);
-  EXPECT_EQ(value_names_[5], value_names_[9]);
-  EXPECT_EQ(value_names_[5], value_names_[10]);
-  EXPECT_EQ(value_names_[5], value_names_[11]);
-  EXPECT_EQ(value_names_[5], value_names_[12]);
-}
-
-TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, NullCheck) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessObject },
-  };
-  static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessObject },
-  };
-  static const MIRDef mirs[] = {
-      DEF_MOVE(3, Instruction::MOVE_OBJECT, 0u, 100u),
-      DEF_IGET(3, Instruction::IGET_OBJECT, 1u, 200u, 0u),
-      DEF_SGET(3, Instruction::SGET_OBJECT, 2u, 0u),
-      DEF_AGET(3, Instruction::AGET_OBJECT, 3u, 300u, 201u),
-      DEF_PHI2(4, 4u, 0u, 8u),
-      DEF_IGET(5, Instruction::IGET_OBJECT, 5u, 200u, 0u),
-      DEF_SGET(5, Instruction::SGET_OBJECT, 6u, 0u),
-      DEF_AGET(5, Instruction::AGET_OBJECT, 7u, 300u, 201u),
-      DEF_MOVE(5, Instruction::MOVE_OBJECT, 8u, 400u),
-      DEF_IPUT(5, Instruction::IPUT_OBJECT, 4u, 200u, 0u),          // PUT the Phi 4u.
-      DEF_SPUT(5, Instruction::SPUT_OBJECT, 4u, 0u),                // PUT the Phi 4u.
-      DEF_APUT(5, Instruction::APUT_OBJECT, 4u, 300u, 201u),        // PUT the Phi 4u.
-      DEF_MOVE(6, Instruction::MOVE_OBJECT, 12u, 4u),
-      DEF_IGET(6, Instruction::IGET_OBJECT, 13u, 200u, 0u),
-      DEF_SGET(6, Instruction::SGET_OBJECT, 14u, 0u),
-      DEF_AGET(6, Instruction::AGET_OBJECT, 15u, 300u, 201u),
-      DEF_AGET(6, Instruction::AGET_OBJECT, 16u, 12u, 600u),
-      DEF_AGET(6, Instruction::AGET_OBJECT, 17u, 13u, 600u),
-      DEF_AGET(6, Instruction::AGET_OBJECT, 18u, 14u, 600u),
-      DEF_AGET(6, Instruction::AGET_OBJECT, 19u, 15u, 600u),
-      DEF_MOVE(8, Instruction::MOVE_OBJECT, 20u, 12u),
-      DEF_IGET(8, Instruction::IGET_OBJECT, 21u, 200u, 0u),
-      DEF_SGET(8, Instruction::SGET_OBJECT, 22u, 0u),
-      DEF_AGET(8, Instruction::AGET_OBJECT, 23u, 300u, 201u),
-      DEF_AGET(8, Instruction::AGET_OBJECT, 24u, 12u, 600u),
-      DEF_AGET(8, Instruction::AGET_OBJECT, 25u, 13u, 600u),
-      DEF_AGET(8, Instruction::AGET_OBJECT, 26u, 14u, 600u),
-      DEF_AGET(8, Instruction::AGET_OBJECT, 27u, 15u, 600u),
-      DEF_MOVE(9, Instruction::MOVE_OBJECT, 28u, 12u),
-      DEF_IGET(9, Instruction::IGET_OBJECT, 29u, 200u, 0u),
-      DEF_SGET(9, Instruction::SGET_OBJECT, 30u, 0u),
-      DEF_AGET(9, Instruction::AGET_OBJECT, 31u, 300u, 201u),
-      DEF_AGET(9, Instruction::AGET_OBJECT, 32u, 12u, 600u),
-      DEF_AGET(9, Instruction::AGET_OBJECT, 33u, 13u, 600u),
-      DEF_AGET(9, Instruction::AGET_OBJECT, 34u, 14u, 600u),
-      DEF_AGET(9, Instruction::AGET_OBJECT, 35u, 15u, 600u),
-  };
-  static const bool expected_ignore_null_check[] = {
-      false, false, false, false,                                   // BB #3.
-      false, true, false, true, false, true, false, true,           // BBs #4 and #5.
-      false, true, false, true, false, false, false, false,         // BB #6.
-      false, true, false, true, true, true, true, true,             // BB #7.
-      false, true, false, true, true, true, true, true,             // BB #8.
-  };
-  static const bool expected_ignore_range_check[] = {
-      false, false, false, false,                                   // BB #3.
-      false, false, false, true, false, false, false, true,         // BBs #4 and #5.
-      false, false, false, true, false, false, false, false,        // BB #6.
-      false, false, false, true, true, true, true, true,            // BB #7.
-      false, false, false, true, true, true, true, true,            // BB #8.
-  };
-
-  PrepareIFields(ifields);
-  PrepareSFields(sfields);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_NE(value_names_[0], value_names_[4]);
-  EXPECT_NE(value_names_[1], value_names_[5]);
-  EXPECT_NE(value_names_[2], value_names_[6]);
-  EXPECT_NE(value_names_[3], value_names_[7]);
-  EXPECT_NE(value_names_[4], value_names_[8]);
-  EXPECT_EQ(value_names_[4], value_names_[12]);
-  EXPECT_EQ(value_names_[5], value_names_[13]);
-  EXPECT_EQ(value_names_[6], value_names_[14]);
-  EXPECT_EQ(value_names_[7], value_names_[15]);
-  EXPECT_EQ(value_names_[12], value_names_[20]);
-  EXPECT_EQ(value_names_[13], value_names_[21]);
-  EXPECT_EQ(value_names_[14], value_names_[22]);
-  EXPECT_EQ(value_names_[15], value_names_[23]);
-  EXPECT_EQ(value_names_[12], value_names_[28]);
-  EXPECT_EQ(value_names_[13], value_names_[29]);
-  EXPECT_EQ(value_names_[14], value_names_[30]);
-  EXPECT_EQ(value_names_[15], value_names_[31]);
-  PerformGVNCodeModifications();
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(expected_ignore_null_check[i],
-              (mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) << i;
-    EXPECT_EQ(expected_ignore_range_check[i],
-              (mirs_[i].optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0) << i;
-  }
-}
-
-TEST_F(GlobalValueNumberingTestTwoNestedLoops, IFieldAndPhi) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessObject },
-  };
-  static const MIRDef mirs[] = {
-      DEF_MOVE(3, Instruction::MOVE_OBJECT, 0u, 100u),
-      DEF_IPUT(3, Instruction::IPUT_OBJECT, 0u, 200u, 0u),
-      DEF_PHI2(4, 2u, 0u, 11u),
-      DEF_MOVE(4, Instruction::MOVE_OBJECT, 3u, 2u),
-      DEF_IGET(4, Instruction::IGET_OBJECT, 4u, 200u, 0u),
-      DEF_MOVE(5, Instruction::MOVE_OBJECT, 5u, 3u),
-      DEF_IGET(5, Instruction::IGET_OBJECT, 6u, 200u, 0u),
-      DEF_MOVE(6, Instruction::MOVE_OBJECT, 7u, 3u),
-      DEF_IGET(6, Instruction::IGET_OBJECT, 8u, 200u, 0u),
-      DEF_MOVE(7, Instruction::MOVE_OBJECT, 9u, 3u),
-      DEF_IGET(7, Instruction::IGET_OBJECT, 10u, 200u, 0u),
-      DEF_MOVE(7, Instruction::MOVE_OBJECT, 11u, 300u),
-      DEF_IPUT(7, Instruction::IPUT_OBJECT, 11u, 200u, 0u),
-      DEF_MOVE(8, Instruction::MOVE_OBJECT, 13u, 3u),
-      DEF_IGET(8, Instruction::IGET_OBJECT, 14u, 200u, 0u),
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN();
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_NE(value_names_[0], value_names_[11]);
-  EXPECT_NE(value_names_[0], value_names_[2]);
-  EXPECT_NE(value_names_[11], value_names_[2]);
-  EXPECT_EQ(value_names_[2], value_names_[3]);
-  EXPECT_EQ(value_names_[3], value_names_[4]);
-  EXPECT_EQ(value_names_[3], value_names_[5]);
-  EXPECT_EQ(value_names_[3], value_names_[6]);
-  EXPECT_EQ(value_names_[3], value_names_[7]);
-  EXPECT_EQ(value_names_[3], value_names_[8]);
-  EXPECT_EQ(value_names_[3], value_names_[9]);
-  EXPECT_EQ(value_names_[3], value_names_[10]);
-  EXPECT_EQ(value_names_[3], value_names_[13]);
-  EXPECT_EQ(value_names_[3], value_names_[14]);
-}
-
-TEST_F(GlobalValueNumberingTest, NormalPathToCatchEntry) {
-  // When there's an empty catch block, all the exception paths lead to the next block in
-  // the normal path and we can also have normal "taken" or "fall-through" branches to that
-  // path. Check that LocalValueNumbering::PruneNonAliasingRefsForCatch() can handle it.
-  static const BBDef bbs[] = {
-      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(3)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(3, 4)),
-  };
-  static const MIRDef mirs[] = {
-      DEF_INVOKE1(4, Instruction::INVOKE_STATIC, 100u),
-  };
-  PrepareBasicBlocks(bbs);
-  BasicBlock* catch_handler = cu_.mir_graph->GetBasicBlock(5u);
-  catch_handler->catch_entry = true;
-  // Add successor block info to the check block.
-  BasicBlock* check_bb = cu_.mir_graph->GetBasicBlock(3u);
-  check_bb->successor_block_list_type = kCatch;
-  SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
-      (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessors));
-  successor_block_info->block = catch_handler->id;
-  check_bb->successor_blocks.push_back(successor_block_info);
-  BasicBlock* merge_block = cu_.mir_graph->GetBasicBlock(4u);
-  std::swap(merge_block->taken, merge_block->fall_through);
-  PrepareMIRs(mirs);
-  PerformGVN();
-}
-
-TEST_F(GlobalValueNumberingTestDiamond, DivZeroCheckDiamond) {
-  static const MIRDef mirs[] = {
-      DEF_BINOP(3u, Instruction::DIV_INT, 1u, 20u, 21u),
-      DEF_BINOP(3u, Instruction::DIV_INT, 2u, 24u, 21u),
-      DEF_BINOP(3u, Instruction::DIV_INT, 3u, 20u, 23u),
-      DEF_BINOP(4u, Instruction::DIV_INT, 4u, 24u, 22u),
-      DEF_BINOP(4u, Instruction::DIV_INT, 9u, 24u, 25u),
-      DEF_BINOP(5u, Instruction::DIV_INT, 5u, 24u, 21u),
-      DEF_BINOP(5u, Instruction::DIV_INT, 10u, 24u, 26u),
-      DEF_PHI2(6u, 27u, 25u, 26u),
-      DEF_BINOP(6u, Instruction::DIV_INT, 12u, 20u, 27u),
-      DEF_BINOP(6u, Instruction::DIV_INT, 6u, 24u, 21u),
-      DEF_BINOP(6u, Instruction::DIV_INT, 7u, 20u, 23u),
-      DEF_BINOP(6u, Instruction::DIV_INT, 8u, 20u, 22u),
-  };
-
-  static const bool expected_ignore_div_zero_check[] = {
-      false,  // New divisor seen.
-      true,   // Eliminated since it has first divisor as first one.
-      false,  // New divisor seen.
-      false,  // New divisor seen.
-      false,  // New divisor seen.
-      true,   // Eliminated in dominating block.
-      false,  // New divisor seen.
-      false,  // Phi node.
-      true,   // Eliminated on both sides of diamond and merged via phi.
-      true,   // Eliminated in dominating block.
-      true,   // Eliminated in dominating block.
-      false,  // Only eliminated on one path of diamond.
-  };
-
-  PrepareMIRs(mirs);
-  PerformGVN();
-  PerformGVNCodeModifications();
-  ASSERT_EQ(arraysize(expected_ignore_div_zero_check), mir_count_);
-  for (size_t i = 0u; i != mir_count_; ++i) {
-    int expected = expected_ignore_div_zero_check[i] ? MIR_IGNORE_DIV_ZERO_CHECK : 0u;
-    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
-  }
-}
-
-TEST_F(GlobalValueNumberingTestDiamond, CheckCastDiamond) {
-  static const MIRDef mirs[] = {
-      DEF_UNOP(3u, Instruction::INSTANCE_OF, 0u, 100u),
-      DEF_UNOP(3u, Instruction::INSTANCE_OF, 1u, 200u),
-      DEF_IFZ(3u, Instruction::IF_NEZ, 0u),
-      DEF_INVOKE1(4u, Instruction::CHECK_CAST, 100u),
-      DEF_INVOKE1(5u, Instruction::CHECK_CAST, 100u),
-      DEF_INVOKE1(5u, Instruction::CHECK_CAST, 200u),
-      DEF_INVOKE1(5u, Instruction::CHECK_CAST, 100u),
-      DEF_INVOKE1(6u, Instruction::CHECK_CAST, 100u),
-  };
-
-  static const bool expected_ignore_check_cast[] = {
-      false,  // instance-of
-      false,  // instance-of
-      false,  // if-nez
-      false,  // Not eliminated, fall-through branch.
-      true,   // Eliminated.
-      false,  // Not eliminated, different value.
-      false,  // Not eliminated, different type.
-      false,  // Not eliminated, bottom block.
-  };
-
-  PrepareMIRs(mirs);
-  mirs_[0].dalvikInsn.vC = 1234;  // type for instance-of
-  mirs_[1].dalvikInsn.vC = 1234;  // type for instance-of
-  mirs_[3].dalvikInsn.vB = 1234;  // type for check-cast
-  mirs_[4].dalvikInsn.vB = 1234;  // type for check-cast
-  mirs_[5].dalvikInsn.vB = 1234;  // type for check-cast
-  mirs_[6].dalvikInsn.vB = 4321;  // type for check-cast
-  mirs_[7].dalvikInsn.vB = 1234;  // type for check-cast
-  PerformGVN();
-  PerformGVNCodeModifications();
-  ASSERT_EQ(arraysize(expected_ignore_check_cast), mir_count_);
-  for (size_t i = 0u; i != mir_count_; ++i) {
-    int expected = expected_ignore_check_cast[i] ? MIR_IGNORE_CHECK_CAST : 0u;
-    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
-  }
-}
-
-TEST_F(GlobalValueNumberingTest, CheckCastDominators) {
-  const BBDef bbs[] = {
-      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(7)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)),  // Block #3, top of the diamond.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(7), DEF_PRED1(3)),     // Block #4, left side.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // Block #5, right side.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(7), DEF_PRED1(5)),     // Block #6, right side.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(4, 6)),  // Block #7, bottom.
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNOP(3u, Instruction::INSTANCE_OF, 0u, 100u),
-      DEF_UNOP(3u, Instruction::INSTANCE_OF, 1u, 200u),
-      DEF_IFZ(3u, Instruction::IF_NEZ, 0u),
-      DEF_INVOKE1(4u, Instruction::CHECK_CAST, 100u),
-      DEF_INVOKE1(6u, Instruction::CHECK_CAST, 100u),
-      DEF_INVOKE1(6u, Instruction::CHECK_CAST, 200u),
-      DEF_INVOKE1(6u, Instruction::CHECK_CAST, 100u),
-      DEF_INVOKE1(7u, Instruction::CHECK_CAST, 100u),
-  };
-
-  static const bool expected_ignore_check_cast[] = {
-      false,  // instance-of
-      false,  // instance-of
-      false,  // if-nez
-      false,  // Not eliminated, fall-through branch.
-      true,   // Eliminated.
-      false,  // Not eliminated, different value.
-      false,  // Not eliminated, different type.
-      false,  // Not eliminated, bottom block.
-  };
-
-  PrepareBasicBlocks(bbs);
-  PrepareMIRs(mirs);
-  mirs_[0].dalvikInsn.vC = 1234;  // type for instance-of
-  mirs_[1].dalvikInsn.vC = 1234;  // type for instance-of
-  mirs_[3].dalvikInsn.vB = 1234;  // type for check-cast
-  mirs_[4].dalvikInsn.vB = 1234;  // type for check-cast
-  mirs_[5].dalvikInsn.vB = 1234;  // type for check-cast
-  mirs_[6].dalvikInsn.vB = 4321;  // type for check-cast
-  mirs_[7].dalvikInsn.vB = 1234;  // type for check-cast
-  PerformGVN();
-  PerformGVNCodeModifications();
-  ASSERT_EQ(arraysize(expected_ignore_check_cast), mir_count_);
-  for (size_t i = 0u; i != mir_count_; ++i) {
-    int expected = expected_ignore_check_cast[i] ? MIR_IGNORE_CHECK_CAST : 0u;
-    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc
deleted file mode 100644
index 445859c..0000000
--- a/compiler/dex/gvn_dead_code_elimination.cc
+++ /dev/null
@@ -1,1473 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <sstream>
-
-#include "gvn_dead_code_elimination.h"
-
-#include "base/arena_bit_vector.h"
-#include "base/bit_vector-inl.h"
-#include "base/macros.h"
-#include "base/allocator.h"
-#include "compiler_enums.h"
-#include "dataflow_iterator-inl.h"
-#include "dex_instruction.h"
-#include "dex/mir_graph.h"
-#include "local_value_numbering.h"
-
-namespace art {
-
-constexpr uint16_t GvnDeadCodeElimination::kNoValue;
-constexpr uint16_t GvnDeadCodeElimination::kNPos;
-
-inline uint16_t GvnDeadCodeElimination::MIRData::PrevChange(int v_reg) const {
-  DCHECK(has_def);
-  DCHECK(v_reg == vreg_def || v_reg == vreg_def + 1);
-  return (v_reg == vreg_def) ? prev_value.change : prev_value_high.change;
-}
-
-inline void GvnDeadCodeElimination::MIRData::SetPrevChange(int v_reg, uint16_t change) {
-  DCHECK(has_def);
-  DCHECK(v_reg == vreg_def || v_reg == vreg_def + 1);
-  if (v_reg == vreg_def) {
-    prev_value.change = change;
-  } else {
-    prev_value_high.change = change;
-  }
-}
-
-inline void GvnDeadCodeElimination::MIRData::RemovePrevChange(int v_reg, MIRData* prev_data) {
-  DCHECK_NE(PrevChange(v_reg), kNPos);
-  DCHECK(v_reg == prev_data->vreg_def || v_reg == prev_data->vreg_def + 1);
-  if (vreg_def == v_reg) {
-    if (prev_data->vreg_def == v_reg) {
-      prev_value = prev_data->prev_value;
-      low_def_over_high_word = prev_data->low_def_over_high_word;
-    } else {
-      prev_value = prev_data->prev_value_high;
-      low_def_over_high_word = !prev_data->high_def_over_low_word;
-    }
-  } else {
-    if (prev_data->vreg_def == v_reg) {
-      prev_value_high = prev_data->prev_value;
-      high_def_over_low_word = !prev_data->low_def_over_high_word;
-    } else {
-      prev_value_high = prev_data->prev_value_high;
-      high_def_over_low_word = prev_data->high_def_over_low_word;
-    }
-  }
-}
-
-GvnDeadCodeElimination::VRegChains::VRegChains(uint32_t num_vregs, ScopedArenaAllocator* alloc)
-    : num_vregs_(num_vregs),
-      vreg_data_(alloc->AllocArray<VRegValue>(num_vregs, kArenaAllocMisc)),
-      vreg_high_words_(false, Allocator::GetNoopAllocator(),
-                       BitVector::BitsToWords(num_vregs),
-                       alloc->AllocArray<uint32_t>(BitVector::BitsToWords(num_vregs))),
-      mir_data_(alloc->Adapter()) {
-  mir_data_.reserve(100);
-}
-
-inline void GvnDeadCodeElimination::VRegChains::Reset() {
-  DCHECK(mir_data_.empty());
-  std::fill_n(vreg_data_, num_vregs_, VRegValue());
-  vreg_high_words_.ClearAllBits();
-}
-
-void GvnDeadCodeElimination::VRegChains::AddMIRWithDef(MIR* mir, int v_reg, bool wide,
-                                                       uint16_t new_value) {
-  uint16_t pos = mir_data_.size();
-  mir_data_.emplace_back(mir);
-  MIRData* data = &mir_data_.back();
-  data->has_def = true;
-  data->wide_def = wide;
-  data->vreg_def = v_reg;
-
-  DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
-  data->prev_value = vreg_data_[v_reg];
-  data->low_def_over_high_word =
-      (vreg_data_[v_reg].change != kNPos)
-      ? GetMIRData(vreg_data_[v_reg].change)->vreg_def + 1 == v_reg
-      : vreg_high_words_.IsBitSet(v_reg);
-  vreg_data_[v_reg].value = new_value;
-  vreg_data_[v_reg].change = pos;
-  vreg_high_words_.ClearBit(v_reg);
-
-  if (wide) {
-    DCHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_);
-    data->prev_value_high = vreg_data_[v_reg + 1];
-    data->high_def_over_low_word =
-        (vreg_data_[v_reg + 1].change != kNPos)
-        ? GetMIRData(vreg_data_[v_reg + 1].change)->vreg_def == v_reg + 1
-        : !vreg_high_words_.IsBitSet(v_reg + 1);
-    vreg_data_[v_reg + 1].value = new_value;
-    vreg_data_[v_reg + 1].change = pos;
-    vreg_high_words_.SetBit(v_reg + 1);
-  }
-}
-
-inline void GvnDeadCodeElimination::VRegChains::AddMIRWithoutDef(MIR* mir) {
-  mir_data_.emplace_back(mir);
-}
-
-void GvnDeadCodeElimination::VRegChains::RemoveLastMIRData() {
-  MIRData* data = LastMIRData();
-  if (data->has_def) {
-    DCHECK_EQ(vreg_data_[data->vreg_def].change, NumMIRs() - 1u);
-    vreg_data_[data->vreg_def] = data->prev_value;
-    DCHECK(!vreg_high_words_.IsBitSet(data->vreg_def));
-    if (data->low_def_over_high_word) {
-      vreg_high_words_.SetBit(data->vreg_def);
-    }
-    if (data->wide_def) {
-      DCHECK_EQ(vreg_data_[data->vreg_def + 1].change, NumMIRs() - 1u);
-      vreg_data_[data->vreg_def + 1] = data->prev_value_high;
-      DCHECK(vreg_high_words_.IsBitSet(data->vreg_def + 1));
-      if (data->high_def_over_low_word) {
-        vreg_high_words_.ClearBit(data->vreg_def + 1);
-      }
-    }
-  }
-  mir_data_.pop_back();
-}
-
-void GvnDeadCodeElimination::VRegChains::RemoveTrailingNops() {
-  // There's at least one NOP to drop. There may be more.
-  MIRData* last_data = LastMIRData();
-  DCHECK(!last_data->must_keep && !last_data->has_def);
-  do {
-    DCHECK_EQ(static_cast<int>(last_data->mir->dalvikInsn.opcode), static_cast<int>(kMirOpNop));
-    mir_data_.pop_back();
-    if (mir_data_.empty()) {
-      break;
-    }
-    last_data = LastMIRData();
-  } while (!last_data->must_keep && !last_data->has_def);
-}
-
-inline size_t GvnDeadCodeElimination::VRegChains::NumMIRs() const {
-  return mir_data_.size();
-}
-
-inline GvnDeadCodeElimination::MIRData* GvnDeadCodeElimination::VRegChains::GetMIRData(size_t pos) {
-  DCHECK_LT(pos, mir_data_.size());
-  return &mir_data_[pos];
-}
-
-inline GvnDeadCodeElimination::MIRData* GvnDeadCodeElimination::VRegChains::LastMIRData() {
-  DCHECK(!mir_data_.empty());
-  return &mir_data_.back();
-}
-
-uint32_t GvnDeadCodeElimination::VRegChains::NumVRegs() const {
-  return num_vregs_;
-}
-
-void GvnDeadCodeElimination::VRegChains::InsertInitialValueHigh(int v_reg, uint16_t value) {
-  DCHECK_NE(value, kNoValue);
-  DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
-  uint16_t change = vreg_data_[v_reg].change;
-  if (change == kNPos) {
-    vreg_data_[v_reg].value = value;
-    vreg_high_words_.SetBit(v_reg);
-  } else {
-    while (true) {
-      MIRData* data = &mir_data_[change];
-      DCHECK(data->vreg_def == v_reg || data->vreg_def + 1 == v_reg);
-      if (data->vreg_def == v_reg) {  // Low word, use prev_value.
-        if (data->prev_value.change == kNPos) {
-          DCHECK_EQ(data->prev_value.value, kNoValue);
-          data->prev_value.value = value;
-          data->low_def_over_high_word = true;
-          break;
-        }
-        change = data->prev_value.change;
-      } else {  // High word, use prev_value_high.
-        if (data->prev_value_high.change == kNPos) {
-          DCHECK_EQ(data->prev_value_high.value, kNoValue);
-          data->prev_value_high.value = value;
-          break;
-        }
-        change = data->prev_value_high.change;
-      }
-    }
-  }
-}
-
-void GvnDeadCodeElimination::VRegChains::UpdateInitialVRegValue(int v_reg, bool wide,
-                                                                const LocalValueNumbering* lvn) {
-  DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
-  if (!wide) {
-    if (vreg_data_[v_reg].value == kNoValue) {
-      uint16_t old_value = lvn->GetStartingVregValueNumber(v_reg);
-      if (old_value == kNoValue) {
-        // Maybe there was a wide value in v_reg before. Do not check for wide value in v_reg-1,
-        // that will be done only if we see a definition of v_reg-1, otherwise it's unnecessary.
-        old_value = lvn->GetStartingVregValueNumberWide(v_reg);
-        if (old_value != kNoValue) {
-          InsertInitialValueHigh(v_reg + 1, old_value);
-        }
-      }
-      vreg_data_[v_reg].value = old_value;
-      DCHECK(!vreg_high_words_.IsBitSet(v_reg));  // Keep marked as low word.
-    }
-  } else {
-    DCHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_);
-    bool check_high = true;
-    if (vreg_data_[v_reg].value == kNoValue) {
-      uint16_t old_value = lvn->GetStartingVregValueNumberWide(v_reg);
-      if (old_value != kNoValue) {
-        InsertInitialValueHigh(v_reg + 1, old_value);
-        check_high = false;  // High word has been processed.
-      } else {
-        // Maybe there was a narrow value before. Do not check for wide value in v_reg-1,
-        // that will be done only if we see a definition of v_reg-1, otherwise it's unnecessary.
-        old_value = lvn->GetStartingVregValueNumber(v_reg);
-      }
-      vreg_data_[v_reg].value = old_value;
-      DCHECK(!vreg_high_words_.IsBitSet(v_reg));  // Keep marked as low word.
-    }
-    if (check_high && vreg_data_[v_reg + 1].value == kNoValue) {
-      uint16_t old_value = lvn->GetStartingVregValueNumber(v_reg + 1);
-      if (old_value == kNoValue && static_cast<size_t>(v_reg + 2) < num_vregs_) {
-        // Maybe there was a wide value before.
-        old_value = lvn->GetStartingVregValueNumberWide(v_reg + 1);
-        if (old_value != kNoValue) {
-          InsertInitialValueHigh(v_reg + 2, old_value);
-        }
-      }
-      vreg_data_[v_reg + 1].value = old_value;
-      DCHECK(!vreg_high_words_.IsBitSet(v_reg + 1));  // Keep marked as low word.
-    }
-  }
-}
-
-inline uint16_t GvnDeadCodeElimination::VRegChains::LastChange(int v_reg) {
-  DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
-  return vreg_data_[v_reg].change;
-}
-
-inline uint16_t GvnDeadCodeElimination::VRegChains::CurrentValue(int v_reg) {
-  DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
-  return vreg_data_[v_reg].value;
-}
-
-uint16_t GvnDeadCodeElimination::VRegChains::FindKillHead(int v_reg, uint16_t cutoff) {
-  uint16_t current_value = this->CurrentValue(v_reg);
-  DCHECK_NE(current_value, kNoValue);
-  uint16_t change = LastChange(v_reg);
-  DCHECK_LT(change, mir_data_.size());
-  DCHECK_GE(change, cutoff);
-  bool match_high_word = (mir_data_[change].vreg_def != v_reg);
-  do {
-    MIRData* data = &mir_data_[change];
-    DCHECK(data->vreg_def == v_reg || data->vreg_def + 1 == v_reg);
-    if (data->vreg_def == v_reg) {  // Low word, use prev_value.
-      if (data->prev_value.value == current_value &&
-          match_high_word == data->low_def_over_high_word) {
-        break;
-      }
-      change = data->prev_value.change;
-    } else {  // High word, use prev_value_high.
-      if (data->prev_value_high.value == current_value &&
-          match_high_word != data->high_def_over_low_word) {
-        break;
-      }
-      change = data->prev_value_high.change;
-    }
-    if (change < cutoff) {
-      change = kNPos;
-    }
-  } while (change != kNPos);
-  return change;
-}
-
-uint16_t GvnDeadCodeElimination::VRegChains::FindFirstChangeAfter(int v_reg,
-                                                                  uint16_t change) const {
-  DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
-  DCHECK_LT(change, mir_data_.size());
-  uint16_t result = kNPos;
-  uint16_t search_change = vreg_data_[v_reg].change;
-  while (search_change != kNPos && search_change > change) {
-    result = search_change;
-    search_change = mir_data_[search_change].PrevChange(v_reg);
-  }
-  return result;
-}
-
-void GvnDeadCodeElimination::VRegChains::ReplaceChange(uint16_t old_change, uint16_t new_change) {
-  const MIRData* old_data = GetMIRData(old_change);
-  DCHECK(old_data->has_def);
-  int count = old_data->wide_def ? 2 : 1;
-  for (int v_reg = old_data->vreg_def, end = old_data->vreg_def + count; v_reg != end; ++v_reg) {
-    uint16_t next_change = FindFirstChangeAfter(v_reg, old_change);
-    if (next_change == kNPos) {
-      DCHECK_EQ(vreg_data_[v_reg].change, old_change);
-      vreg_data_[v_reg].change = new_change;
-      DCHECK_EQ(vreg_high_words_.IsBitSet(v_reg), v_reg == old_data->vreg_def + 1);
-      // No change in vreg_high_words_.
-    } else {
-      DCHECK_EQ(mir_data_[next_change].PrevChange(v_reg), old_change);
-      mir_data_[next_change].SetPrevChange(v_reg, new_change);
-    }
-  }
-}
-
-void GvnDeadCodeElimination::VRegChains::RemoveChange(uint16_t change) {
-  MIRData* data = &mir_data_[change];
-  DCHECK(data->has_def);
-  int count = data->wide_def ? 2 : 1;
-  for (int v_reg = data->vreg_def, end = data->vreg_def + count; v_reg != end; ++v_reg) {
-    uint16_t next_change = FindFirstChangeAfter(v_reg, change);
-    if (next_change == kNPos) {
-      DCHECK_EQ(vreg_data_[v_reg].change, change);
-      vreg_data_[v_reg] = (data->vreg_def == v_reg) ? data->prev_value : data->prev_value_high;
-      DCHECK_EQ(vreg_high_words_.IsBitSet(v_reg), v_reg == data->vreg_def + 1);
-      if (data->vreg_def == v_reg && data->low_def_over_high_word) {
-        vreg_high_words_.SetBit(v_reg);
-      } else if (data->vreg_def != v_reg && data->high_def_over_low_word) {
-        vreg_high_words_.ClearBit(v_reg);
-      }
-    } else {
-      DCHECK_EQ(mir_data_[next_change].PrevChange(v_reg), change);
-      mir_data_[next_change].RemovePrevChange(v_reg, data);
-    }
-  }
-}
-
-inline bool GvnDeadCodeElimination::VRegChains::IsTopChange(uint16_t change) const {
-  DCHECK_LT(change, mir_data_.size());
-  const MIRData* data = &mir_data_[change];
-  DCHECK(data->has_def);
-  DCHECK_LT(data->wide_def ? data->vreg_def + 1u : data->vreg_def, num_vregs_);
-  return vreg_data_[data->vreg_def].change == change &&
-      (!data->wide_def || vreg_data_[data->vreg_def + 1u].change == change);
-}
-
-bool GvnDeadCodeElimination::VRegChains::IsSRegUsed(uint16_t first_change, uint16_t last_change,
-                                                    int s_reg) const {
-  DCHECK_LE(first_change, last_change);
-  DCHECK_LE(last_change, mir_data_.size());
-  for (size_t c = first_change; c != last_change; ++c) {
-    SSARepresentation* ssa_rep = mir_data_[c].mir->ssa_rep;
-    for (int i = 0; i != ssa_rep->num_uses; ++i) {
-      if (ssa_rep->uses[i] == s_reg) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-bool GvnDeadCodeElimination::VRegChains::IsVRegUsed(uint16_t first_change, uint16_t last_change,
-                                                    int v_reg, MIRGraph* mir_graph) const {
-  DCHECK_LE(first_change, last_change);
-  DCHECK_LE(last_change, mir_data_.size());
-  for (size_t c = first_change; c != last_change; ++c) {
-    SSARepresentation* ssa_rep = mir_data_[c].mir->ssa_rep;
-    for (int i = 0; i != ssa_rep->num_uses; ++i) {
-      if (mir_graph->SRegToVReg(ssa_rep->uses[i]) == v_reg) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-void GvnDeadCodeElimination::VRegChains::RenameSRegUses(uint16_t first_change, uint16_t last_change,
-                                                        int old_s_reg, int new_s_reg, bool wide) {
-  for (size_t c = first_change; c != last_change; ++c) {
-    SSARepresentation* ssa_rep = mir_data_[c].mir->ssa_rep;
-    for (int i = 0; i != ssa_rep->num_uses; ++i) {
-      if (ssa_rep->uses[i] == old_s_reg) {
-        ssa_rep->uses[i] = new_s_reg;
-        if (wide) {
-          ++i;
-          DCHECK_LT(i, ssa_rep->num_uses);
-          ssa_rep->uses[i] = new_s_reg + 1;
-        }
-      }
-    }
-  }
-}
-
-void GvnDeadCodeElimination::VRegChains::RenameVRegUses(uint16_t first_change, uint16_t last_change,
-                                                    int old_s_reg, int old_v_reg,
-                                                    int new_s_reg, int new_v_reg) {
-  for (size_t c = first_change; c != last_change; ++c) {
-    MIR* mir = mir_data_[c].mir;
-    if (IsInstructionBinOp2Addr(mir->dalvikInsn.opcode) &&
-        mir->ssa_rep->uses[0] == old_s_reg && old_v_reg != new_v_reg) {
-      // Rewrite binop_2ADDR with plain binop before doing the register rename.
-      ChangeBinOp2AddrToPlainBinOp(mir);
-    }
-    uint64_t df_attr = MIRGraph::GetDataFlowAttributes(mir);
-    size_t use = 0u;
-#define REPLACE_VREG(REG) \
-    if ((df_attr & DF_U##REG) != 0) {                                         \
-      if (mir->ssa_rep->uses[use] == old_s_reg) {                             \
-        DCHECK_EQ(mir->dalvikInsn.v##REG, static_cast<uint32_t>(old_v_reg));  \
-        mir->dalvikInsn.v##REG = new_v_reg;                                   \
-        mir->ssa_rep->uses[use] = new_s_reg;                                  \
-        if ((df_attr & DF_##REG##_WIDE) != 0) {                               \
-          DCHECK_EQ(mir->ssa_rep->uses[use + 1], old_s_reg + 1);              \
-          mir->ssa_rep->uses[use + 1] = new_s_reg + 1;                        \
-        }                                                                     \
-      }                                                                       \
-      use += ((df_attr & DF_##REG##_WIDE) != 0) ? 2 : 1;                      \
-    }
-    REPLACE_VREG(A)
-    REPLACE_VREG(B)
-    REPLACE_VREG(C)
-#undef REPLACE_VREG
-    // We may encounter an out-of-order Phi which we need to ignore, otherwise we should
-    // only be asked to rename registers specified by DF_UA, DF_UB and DF_UC.
-    DCHECK_EQ(use,
-              static_cast<int>(mir->dalvikInsn.opcode) == kMirOpPhi
-              ? 0u
-              : static_cast<size_t>(mir->ssa_rep->num_uses));
-  }
-}
-
-GvnDeadCodeElimination::GvnDeadCodeElimination(const GlobalValueNumbering* gvn,
-                                         ScopedArenaAllocator* alloc)
-    : gvn_(gvn),
-      mir_graph_(gvn_->GetMirGraph()),
-      vreg_chains_(mir_graph_->GetNumOfCodeAndTempVRs(), alloc),
-      bb_(nullptr),
-      lvn_(nullptr),
-      no_uses_all_since_(0u),
-      unused_vregs_(new (alloc) ArenaBitVector(alloc, vreg_chains_.NumVRegs(), false)),
-      vregs_to_kill_(new (alloc) ArenaBitVector(alloc, vreg_chains_.NumVRegs(), false)),
-      kill_heads_(alloc->AllocArray<uint16_t>(vreg_chains_.NumVRegs(), kArenaAllocMisc)),
-      changes_to_kill_(alloc->Adapter()),
-      dependent_vregs_(new (alloc) ArenaBitVector(alloc, vreg_chains_.NumVRegs(), false)) {
-  changes_to_kill_.reserve(16u);
-}
-
-void GvnDeadCodeElimination::Apply(BasicBlock* bb) {
-  bb_ = bb;
-  lvn_ = gvn_->GetLvn(bb->id);
-
-  RecordPass();
-  BackwardPass();
-
-  DCHECK_EQ(no_uses_all_since_, 0u);
-  lvn_ = nullptr;
-  bb_ = nullptr;
-}
-
-void GvnDeadCodeElimination::RecordPass() {
-  // Record MIRs with vreg definition data, eliminate single instructions.
-  vreg_chains_.Reset();
-  DCHECK_EQ(no_uses_all_since_, 0u);
-  for (MIR* mir = bb_->first_mir_insn; mir != nullptr; mir = mir->next) {
-    if (RecordMIR(mir)) {
-      RecordPassTryToKillOverwrittenMoveOrMoveSrc();
-      RecordPassTryToKillLastMIR();
-    }
-  }
-}
-
-void GvnDeadCodeElimination::BackwardPass() {
-  // Now process MIRs in reverse order, trying to eliminate them.
-  unused_vregs_->ClearAllBits();  // Implicitly depend on all vregs at the end of BB.
-  while (vreg_chains_.NumMIRs() != 0u) {
-    if (BackwardPassTryToKillLastMIR()) {
-      continue;
-    }
-    BackwardPassProcessLastMIR();
-  }
-}
-
-void GvnDeadCodeElimination::KillMIR(MIRData* data) {
-  DCHECK(!data->must_keep);
-  DCHECK(!data->uses_all_vregs);
-  DCHECK(data->has_def);
-  DCHECK(data->mir->ssa_rep->num_defs == 1 || data->mir->ssa_rep->num_defs == 2);
-
-  KillMIR(data->mir);
-  data->has_def = false;
-  data->is_move = false;
-  data->is_move_src = false;
-}
-
-void GvnDeadCodeElimination::KillMIR(MIR* mir) {
-  mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
-  mir->ssa_rep->num_uses = 0;
-  mir->ssa_rep->num_defs = 0;
-}
-
-void GvnDeadCodeElimination::ChangeBinOp2AddrToPlainBinOp(MIR* mir) {
-  mir->dalvikInsn.vC = mir->dalvikInsn.vB;
-  mir->dalvikInsn.vB = mir->dalvikInsn.vA;
-  mir->dalvikInsn.opcode = static_cast<Instruction::Code>(
-      mir->dalvikInsn.opcode - Instruction::ADD_INT_2ADDR +  Instruction::ADD_INT);
-}
-
-MIR* GvnDeadCodeElimination::CreatePhi(int s_reg) {
-  int v_reg = mir_graph_->SRegToVReg(s_reg);
-  MIR* phi = mir_graph_->NewMIR();
-  phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpPhi);
-  phi->dalvikInsn.vA = v_reg;
-  phi->offset = bb_->start_offset;
-  phi->m_unit_index = 0;  // Arbitrarily assign all Phi nodes to outermost method.
-
-  phi->ssa_rep = static_cast<struct SSARepresentation *>(mir_graph_->GetArena()->Alloc(
-      sizeof(SSARepresentation), kArenaAllocDFInfo));
-
-  mir_graph_->AllocateSSADefData(phi, 1);
-  phi->ssa_rep->defs[0] = s_reg;
-
-  size_t num_uses = bb_->predecessors.size();
-  mir_graph_->AllocateSSAUseData(phi, num_uses);
-  size_t idx = 0u;
-  for (BasicBlockId pred_id : bb_->predecessors) {
-    BasicBlock* pred_bb = mir_graph_->GetBasicBlock(pred_id);
-    DCHECK(pred_bb != nullptr);
-    phi->ssa_rep->uses[idx] = pred_bb->data_flow_info->vreg_to_ssa_map_exit[v_reg];
-    DCHECK_NE(phi->ssa_rep->uses[idx], INVALID_SREG);
-    idx++;
-  }
-
-  phi->meta.phi_incoming = static_cast<BasicBlockId*>(mir_graph_->GetArena()->Alloc(
-      sizeof(BasicBlockId) * num_uses, kArenaAllocDFInfo));
-  std::copy(bb_->predecessors.begin(), bb_->predecessors.end(), phi->meta.phi_incoming);
-  bb_->PrependMIR(phi);
-  return phi;
-}
-
-MIR* GvnDeadCodeElimination::RenameSRegDefOrCreatePhi(uint16_t def_change, uint16_t last_change,
-                                                      MIR* mir_to_kill) {
-  DCHECK(mir_to_kill->ssa_rep->num_defs == 1 || mir_to_kill->ssa_rep->num_defs == 2);
-  bool wide = (mir_to_kill->ssa_rep->num_defs != 1);
-  int new_s_reg = mir_to_kill->ssa_rep->defs[0];
-
-  // Just before we kill mir_to_kill, we need to replace the previous SSA reg assigned to the
-  // same dalvik reg to keep consistency with subsequent instructions. However, if there's no
-  // defining MIR for that dalvik reg, the preserved values must come from its predecessors
-  // and we need to create a new Phi (a degenerate Phi if there's only a single predecessor).
-  if (def_change == kNPos) {
-    if (wide) {
-      DCHECK_EQ(new_s_reg + 1, mir_to_kill->ssa_rep->defs[1]);
-      DCHECK_EQ(mir_graph_->SRegToVReg(new_s_reg) + 1, mir_graph_->SRegToVReg(new_s_reg + 1));
-      CreatePhi(new_s_reg + 1);  // High word Phi.
-    }
-    MIR* phi = CreatePhi(new_s_reg);
-    // If this is a degenerate Phi with all inputs being the same SSA reg, we need to its uses.
-    DCHECK_NE(phi->ssa_rep->num_uses, 0u);
-    int old_s_reg = phi->ssa_rep->uses[0];
-    bool all_same = true;
-    for (size_t i = 1u, num = phi->ssa_rep->num_uses; i != num; ++i) {
-      if (phi->ssa_rep->uses[i] != old_s_reg) {
-        all_same = false;
-        break;
-      }
-    }
-    if (all_same) {
-      vreg_chains_.RenameSRegUses(0u, last_change, old_s_reg, new_s_reg, wide);
-    }
-    return phi;
-  } else {
-    DCHECK_LT(def_change, last_change);
-    DCHECK_LE(last_change, vreg_chains_.NumMIRs());
-    MIRData* def_data = vreg_chains_.GetMIRData(def_change);
-    DCHECK(def_data->has_def);
-    int old_s_reg = def_data->mir->ssa_rep->defs[0];
-    DCHECK_NE(old_s_reg, new_s_reg);
-    DCHECK_EQ(mir_graph_->SRegToVReg(old_s_reg), mir_graph_->SRegToVReg(new_s_reg));
-    def_data->mir->ssa_rep->defs[0] = new_s_reg;
-    if (wide) {
-      if (static_cast<int>(def_data->mir->dalvikInsn.opcode) == kMirOpPhi) {
-        // Currently the high word Phi is always located after the low word Phi.
-        MIR* phi_high = def_data->mir->next;
-        DCHECK(phi_high != nullptr && static_cast<int>(phi_high->dalvikInsn.opcode) == kMirOpPhi);
-        DCHECK_EQ(phi_high->ssa_rep->defs[0], old_s_reg + 1);
-        phi_high->ssa_rep->defs[0] = new_s_reg + 1;
-      } else {
-        DCHECK_EQ(def_data->mir->ssa_rep->defs[1], old_s_reg + 1);
-        def_data->mir->ssa_rep->defs[1] = new_s_reg + 1;
-      }
-    }
-    vreg_chains_.RenameSRegUses(def_change + 1u, last_change, old_s_reg, new_s_reg, wide);
-    return nullptr;
-  }
-}
-
-
-void GvnDeadCodeElimination::BackwardPassProcessLastMIR() {
-  MIRData* data = vreg_chains_.LastMIRData();
-  if (data->uses_all_vregs) {
-    DCHECK(data->must_keep);
-    unused_vregs_->ClearAllBits();
-    DCHECK_EQ(no_uses_all_since_, vreg_chains_.NumMIRs());
-    --no_uses_all_since_;
-    while (no_uses_all_since_ != 0u &&
-        !vreg_chains_.GetMIRData(no_uses_all_since_ - 1u)->uses_all_vregs) {
-      --no_uses_all_since_;
-    }
-  } else {
-    if (data->has_def) {
-      unused_vregs_->SetBit(data->vreg_def);
-      if (data->wide_def) {
-        unused_vregs_->SetBit(data->vreg_def + 1);
-      }
-    }
-    for (int i = 0, num_uses = data->mir->ssa_rep->num_uses; i != num_uses; ++i) {
-      int v_reg = mir_graph_->SRegToVReg(data->mir->ssa_rep->uses[i]);
-      unused_vregs_->ClearBit(v_reg);
-    }
-  }
-  vreg_chains_.RemoveLastMIRData();
-}
-
-void GvnDeadCodeElimination::RecordPassKillMoveByRenamingSrcDef(uint16_t src_change,
-                                                                uint16_t move_change) {
-  DCHECK_LT(src_change, move_change);
-  MIRData* src_data = vreg_chains_.GetMIRData(src_change);
-  MIRData* move_data = vreg_chains_.GetMIRData(move_change);
-  DCHECK(src_data->is_move_src);
-  DCHECK_EQ(src_data->wide_def, move_data->wide_def);
-  DCHECK(move_data->prev_value.change == kNPos || move_data->prev_value.change <= src_change);
-  DCHECK(!move_data->wide_def || move_data->prev_value_high.change == kNPos ||
-         move_data->prev_value_high.change <= src_change);
-
-  int old_s_reg = src_data->mir->ssa_rep->defs[0];
-  // NOTE: old_s_reg may differ from move_data->mir->ssa_rep->uses[0]; value names must match.
-  int new_s_reg = move_data->mir->ssa_rep->defs[0];
-  DCHECK_NE(old_s_reg, new_s_reg);
-
-  if (IsInstructionBinOp2Addr(src_data->mir->dalvikInsn.opcode) &&
-      src_data->vreg_def != move_data->vreg_def) {
-    // Rewrite binop_2ADDR with plain binop before doing the register rename.
-    ChangeBinOp2AddrToPlainBinOp(src_data->mir);
-  }
-  // Remove src_change from the vreg chain(s).
-  vreg_chains_.RemoveChange(src_change);
-  // Replace the move_change with the src_change, copying all necessary data.
-  src_data->is_move_src = move_data->is_move_src;
-  src_data->low_def_over_high_word = move_data->low_def_over_high_word;
-  src_data->high_def_over_low_word = move_data->high_def_over_low_word;
-  src_data->vreg_def = move_data->vreg_def;
-  src_data->prev_value = move_data->prev_value;
-  src_data->prev_value_high = move_data->prev_value_high;
-  src_data->mir->dalvikInsn.vA = move_data->vreg_def;
-  src_data->mir->ssa_rep->defs[0] = new_s_reg;
-  if (move_data->wide_def) {
-    DCHECK_EQ(src_data->mir->ssa_rep->defs[1], old_s_reg + 1);
-    src_data->mir->ssa_rep->defs[1] = new_s_reg + 1;
-  }
-  vreg_chains_.ReplaceChange(move_change, src_change);
-
-  // Rename uses and kill the move.
-  vreg_chains_.RenameVRegUses(src_change + 1u, vreg_chains_.NumMIRs(),
-                              old_s_reg, mir_graph_->SRegToVReg(old_s_reg),
-                              new_s_reg, mir_graph_->SRegToVReg(new_s_reg));
-  KillMIR(move_data);
-}
-
-void GvnDeadCodeElimination::RecordPassTryToKillOverwrittenMoveOrMoveSrc(uint16_t check_change) {
-  MIRData* data = vreg_chains_.GetMIRData(check_change);
-  DCHECK(data->is_move || data->is_move_src);
-  int32_t dest_s_reg = data->mir->ssa_rep->defs[0];
-
-  if (data->is_move) {
-    // Check if source vreg has changed since the MOVE.
-    int32_t src_s_reg = data->mir->ssa_rep->uses[0];
-    uint32_t src_v_reg = mir_graph_->SRegToVReg(src_s_reg);
-    uint16_t src_change = vreg_chains_.FindFirstChangeAfter(src_v_reg, check_change);
-    bool wide = data->wide_def;
-    if (wide) {
-      uint16_t src_change_high = vreg_chains_.FindFirstChangeAfter(src_v_reg + 1, check_change);
-      if (src_change_high != kNPos && (src_change == kNPos || src_change_high < src_change)) {
-        src_change = src_change_high;
-      }
-    }
-    if (src_change == kNPos ||
-        !vreg_chains_.IsSRegUsed(src_change + 1u, vreg_chains_.NumMIRs(), dest_s_reg)) {
-      // We can simply change all uses of dest to src.
-      size_t rename_end = (src_change != kNPos) ? src_change + 1u : vreg_chains_.NumMIRs();
-      vreg_chains_.RenameVRegUses(check_change + 1u, rename_end,
-                                  dest_s_reg, mir_graph_->SRegToVReg(dest_s_reg),
-                                  src_s_reg,  mir_graph_->SRegToVReg(src_s_reg));
-
-      // Now, remove the MOVE from the vreg chain(s) and kill it.
-      vreg_chains_.RemoveChange(check_change);
-      KillMIR(data);
-      return;
-    }
-  }
-
-  if (data->is_move_src) {
-    // Try to find a MOVE to a vreg that wasn't changed since check_change.
-    uint16_t value_name =
-        data->wide_def ? lvn_->GetSregValueWide(dest_s_reg) : lvn_->GetSregValue(dest_s_reg);
-    uint32_t dest_v_reg = mir_graph_->SRegToVReg(dest_s_reg);
-    for (size_t c = check_change + 1u, size = vreg_chains_.NumMIRs(); c != size; ++c) {
-      MIRData* d = vreg_chains_.GetMIRData(c);
-      if (d->is_move && d->wide_def == data->wide_def &&
-          (d->prev_value.change == kNPos || d->prev_value.change <= check_change) &&
-          (!d->wide_def ||
-           d->prev_value_high.change == kNPos || d->prev_value_high.change <= check_change)) {
-        // Compare value names to find move to move.
-        int32_t src_s_reg = d->mir->ssa_rep->uses[0];
-        uint16_t src_name =
-            (d->wide_def ? lvn_->GetSregValueWide(src_s_reg) : lvn_->GetSregValue(src_s_reg));
-        if (value_name == src_name) {
-          // Check if the move's destination vreg is unused between check_change and the move.
-          uint32_t new_dest_v_reg = mir_graph_->SRegToVReg(d->mir->ssa_rep->defs[0]);
-          if (!vreg_chains_.IsVRegUsed(check_change + 1u, c, new_dest_v_reg, mir_graph_) &&
-              (!d->wide_def ||
-               !vreg_chains_.IsVRegUsed(check_change + 1u, c, new_dest_v_reg + 1, mir_graph_))) {
-            // If the move's destination vreg changed, check if the vreg we're trying
-            // to rename is unused after that change.
-            uint16_t dest_change = vreg_chains_.FindFirstChangeAfter(new_dest_v_reg, c);
-            if (d->wide_def) {
-              uint16_t dest_change_high = vreg_chains_.FindFirstChangeAfter(new_dest_v_reg + 1, c);
-              if (dest_change_high != kNPos &&
-                  (dest_change == kNPos || dest_change_high < dest_change)) {
-                dest_change = dest_change_high;
-              }
-            }
-            if (dest_change == kNPos ||
-                !vreg_chains_.IsVRegUsed(dest_change + 1u, size, dest_v_reg, mir_graph_)) {
-              RecordPassKillMoveByRenamingSrcDef(check_change, c);
-              return;
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-void GvnDeadCodeElimination::RecordPassTryToKillOverwrittenMoveOrMoveSrc() {
-  // Check if we're overwriting a the result of a move or the definition of a source of a move.
-  // For MOVE_WIDE, we may be overwriting partially; if that's the case, check that the other
-  // word wasn't previously overwritten - we would have tried to rename back then.
-  MIRData* data = vreg_chains_.LastMIRData();
-  if (!data->has_def) {
-    return;
-  }
-  // NOTE: Instructions such as new-array implicitly use all vregs (if they throw) but they can
-  // define a move source which can be renamed. Therefore we allow the checked change to be the
-  // change before no_uses_all_since_. This has no effect on moves as they never use all vregs.
-  if (data->prev_value.change != kNPos && data->prev_value.change + 1u >= no_uses_all_since_) {
-    MIRData* check_data = vreg_chains_.GetMIRData(data->prev_value.change);
-    bool try_to_kill = false;
-    if (!check_data->is_move && !check_data->is_move_src) {
-      DCHECK(!try_to_kill);
-    } else if (!check_data->wide_def) {
-      // Narrow move; always fully overwritten by the last MIR.
-      try_to_kill = true;
-    } else if (data->low_def_over_high_word) {
-      // Overwriting only the high word; is the low word still valid?
-      DCHECK_EQ(check_data->vreg_def + 1u, data->vreg_def);
-      if (vreg_chains_.LastChange(check_data->vreg_def) == data->prev_value.change) {
-        try_to_kill = true;
-      }
-    } else if (!data->wide_def) {
-      // Overwriting only the low word, is the high word still valid?
-      if (vreg_chains_.LastChange(data->vreg_def + 1) == data->prev_value.change) {
-        try_to_kill = true;
-      }
-    } else {
-      // Overwriting both words; was the high word still from the same move?
-      if (data->prev_value_high.change == data->prev_value.change) {
-        try_to_kill = true;
-      }
-    }
-    if (try_to_kill) {
-      RecordPassTryToKillOverwrittenMoveOrMoveSrc(data->prev_value.change);
-    }
-  }
-  if (data->wide_def && data->high_def_over_low_word &&
-      data->prev_value_high.change != kNPos &&
-      data->prev_value_high.change + 1u >= no_uses_all_since_) {
-    MIRData* check_data = vreg_chains_.GetMIRData(data->prev_value_high.change);
-    bool try_to_kill = false;
-    if (!check_data->is_move && !check_data->is_move_src) {
-      DCHECK(!try_to_kill);
-    } else if (!check_data->wide_def) {
-      // Narrow move; always fully overwritten by the last MIR.
-      try_to_kill = true;
-    } else if (vreg_chains_.LastChange(check_data->vreg_def + 1) ==
-        data->prev_value_high.change) {
-      // High word is still valid.
-      try_to_kill = true;
-    }
-    if (try_to_kill) {
-      RecordPassTryToKillOverwrittenMoveOrMoveSrc(data->prev_value_high.change);
-    }
-  }
-}
-
-void GvnDeadCodeElimination::RecordPassTryToKillLastMIR() {
-  MIRData* last_data = vreg_chains_.LastMIRData();
-  if (last_data->must_keep) {
-    return;
-  }
-  if (UNLIKELY(!last_data->has_def)) {
-    // Must be an eliminated MOVE. Drop its data and data of all eliminated MIRs before it.
-    vreg_chains_.RemoveTrailingNops();
-    return;
-  }
-
-  // Try to kill a sequence of consecutive definitions of the same vreg. Allow mixing
-  // wide and non-wide defs; consider high word dead if low word has been overwritten.
-  uint16_t current_value = vreg_chains_.CurrentValue(last_data->vreg_def);
-  uint16_t change = vreg_chains_.NumMIRs() - 1u;
-  MIRData* data = last_data;
-  while (data->prev_value.value != current_value) {
-    --change;
-    if (data->prev_value.change == kNPos || data->prev_value.change != change) {
-      return;
-    }
-    data = vreg_chains_.GetMIRData(data->prev_value.change);
-    if (data->must_keep || !data->has_def || data->vreg_def != last_data->vreg_def) {
-      return;
-    }
-  }
-
-  bool wide = last_data->wide_def;
-  if (wide) {
-    // Check that the low word is valid.
-    if (data->low_def_over_high_word) {
-      return;
-    }
-    // Check that the high word is valid.
-    MIRData* high_data = data;
-    if (!high_data->wide_def) {
-      uint16_t high_change = vreg_chains_.FindFirstChangeAfter(data->vreg_def + 1, change);
-      DCHECK_NE(high_change, kNPos);
-      high_data = vreg_chains_.GetMIRData(high_change);
-      DCHECK_EQ(high_data->vreg_def, data->vreg_def);
-    }
-    if (high_data->prev_value_high.value != current_value || high_data->high_def_over_low_word) {
-      return;
-    }
-  }
-
-  MIR* phi = RenameSRegDefOrCreatePhi(data->prev_value.change, change, last_data->mir);
-  for (size_t i = 0, count = vreg_chains_.NumMIRs() - change; i != count; ++i) {
-    KillMIR(vreg_chains_.LastMIRData()->mir);
-    vreg_chains_.RemoveLastMIRData();
-  }
-  if (phi != nullptr) {
-    // Though the Phi has been added to the beginning, we can put the MIRData at the end.
-    vreg_chains_.AddMIRWithDef(phi, phi->dalvikInsn.vA, wide, current_value);
-    // Reset the previous value to avoid eventually eliminating the Phi itself (unless unused).
-    last_data = vreg_chains_.LastMIRData();
-    last_data->prev_value.value = kNoValue;
-    last_data->prev_value_high.value = kNoValue;
-  }
-}
-
-uint16_t GvnDeadCodeElimination::FindChangesToKill(uint16_t first_change, uint16_t last_change) {
-  // Process dependencies for changes in range [first_change, last_change) and record all
-  // changes that we need to kill. Return kNPos if there's a dependent change that must be
-  // kept unconditionally; otherwise the end of the range processed before encountering
-  // a change that defines a dalvik reg that we need to keep (last_change on full success).
-  changes_to_kill_.clear();
-  dependent_vregs_->ClearAllBits();
-  for (size_t change = first_change; change != last_change; ++change) {
-    MIRData* data = vreg_chains_.GetMIRData(change);
-    DCHECK(!data->uses_all_vregs);
-    bool must_not_depend = data->must_keep;
-    bool depends = false;
-    // Check if the MIR defines a vreg we're trying to eliminate.
-    if (data->has_def && vregs_to_kill_->IsBitSet(data->vreg_def)) {
-      if (change < kill_heads_[data->vreg_def]) {
-        must_not_depend = true;
-      } else {
-        depends = true;
-      }
-    }
-    if (data->has_def && data->wide_def && vregs_to_kill_->IsBitSet(data->vreg_def + 1)) {
-      if (change < kill_heads_[data->vreg_def + 1]) {
-        must_not_depend = true;
-      } else {
-        depends = true;
-      }
-    }
-    if (!depends) {
-      // Check for dependency through SSA reg uses.
-      SSARepresentation* ssa_rep = data->mir->ssa_rep;
-      for (int i = 0; i != ssa_rep->num_uses; ++i) {
-        if (dependent_vregs_->IsBitSet(mir_graph_->SRegToVReg(ssa_rep->uses[i]))) {
-          depends = true;
-          break;
-        }
-      }
-    }
-    // Now check if we can eliminate the insn if we need to.
-    if (depends && must_not_depend) {
-      return kNPos;
-    }
-    if (depends && data->has_def &&
-        vreg_chains_.IsTopChange(change) && !vregs_to_kill_->IsBitSet(data->vreg_def) &&
-        !unused_vregs_->IsBitSet(data->vreg_def) &&
-        (!data->wide_def || !unused_vregs_->IsBitSet(data->vreg_def + 1))) {
-      // This is a top change but neither unnecessary nor one of the top kill changes.
-      return change;
-    }
-    // Finally, update the data.
-    if (depends) {
-      changes_to_kill_.push_back(change);
-      if (data->has_def) {
-        dependent_vregs_->SetBit(data->vreg_def);
-        if (data->wide_def) {
-          dependent_vregs_->SetBit(data->vreg_def + 1);
-        }
-      }
-    } else {
-      if (data->has_def) {
-        dependent_vregs_->ClearBit(data->vreg_def);
-        if (data->wide_def) {
-          dependent_vregs_->ClearBit(data->vreg_def + 1);
-        }
-      }
-    }
-  }
-  return last_change;
-}
-
-void GvnDeadCodeElimination::BackwardPassTryToKillRevertVRegs() {
-}
-
-bool GvnDeadCodeElimination::BackwardPassTryToKillLastMIR() {
-  MIRData* last_data = vreg_chains_.LastMIRData();
-  if (last_data->must_keep) {
-    return false;
-  }
-  DCHECK(!last_data->uses_all_vregs);
-  if (!last_data->has_def) {
-    // Previously eliminated.
-    DCHECK_EQ(static_cast<int>(last_data->mir->dalvikInsn.opcode), static_cast<int>(kMirOpNop));
-    vreg_chains_.RemoveTrailingNops();
-    return true;
-  }
-  if (unused_vregs_->IsBitSet(last_data->vreg_def) ||
-      (last_data->wide_def && unused_vregs_->IsBitSet(last_data->vreg_def + 1))) {
-    if (last_data->wide_def) {
-      // For wide defs, one of the vregs may still be considered needed, fix that.
-      unused_vregs_->SetBit(last_data->vreg_def);
-      unused_vregs_->SetBit(last_data->vreg_def + 1);
-    }
-    KillMIR(last_data->mir);
-    vreg_chains_.RemoveLastMIRData();
-    return true;
-  }
-
-  vregs_to_kill_->ClearAllBits();
-  size_t num_mirs = vreg_chains_.NumMIRs();
-  DCHECK_NE(num_mirs, 0u);
-  uint16_t kill_change = num_mirs - 1u;
-  uint16_t start = num_mirs;
-  size_t num_killed_top_changes = 0u;
-  while (num_killed_top_changes != kMaxNumTopChangesToKill &&
-      kill_change != kNPos && kill_change != num_mirs) {
-    ++num_killed_top_changes;
-
-    DCHECK(vreg_chains_.IsTopChange(kill_change));
-    MIRData* data = vreg_chains_.GetMIRData(kill_change);
-    int count = data->wide_def ? 2 : 1;
-    for (int v_reg = data->vreg_def, end = data->vreg_def + count; v_reg != end; ++v_reg) {
-      uint16_t kill_head = vreg_chains_.FindKillHead(v_reg, no_uses_all_since_);
-      if (kill_head == kNPos) {
-        return false;
-      }
-      kill_heads_[v_reg] = kill_head;
-      vregs_to_kill_->SetBit(v_reg);
-      start = std::min(start, kill_head);
-    }
-    DCHECK_LT(start, vreg_chains_.NumMIRs());
-
-    kill_change = FindChangesToKill(start, num_mirs);
-  }
-
-  if (kill_change != num_mirs) {
-    return false;
-  }
-
-  // Kill all MIRs marked as dependent.
-  for (uint32_t v_reg : vregs_to_kill_->Indexes()) {
-    // Rename s_regs or create Phi only once for each MIR (only for low word).
-    MIRData* data = vreg_chains_.GetMIRData(vreg_chains_.LastChange(v_reg));
-    DCHECK(data->has_def);
-    if (data->vreg_def == v_reg) {
-      MIRData* kill_head_data = vreg_chains_.GetMIRData(kill_heads_[v_reg]);
-      RenameSRegDefOrCreatePhi(kill_head_data->PrevChange(v_reg), num_mirs, data->mir);
-    } else {
-      DCHECK_EQ(data->vreg_def + 1u, v_reg);
-      DCHECK_EQ(vreg_chains_.GetMIRData(kill_heads_[v_reg - 1u])->PrevChange(v_reg - 1u),
-                vreg_chains_.GetMIRData(kill_heads_[v_reg])->PrevChange(v_reg));
-    }
-  }
-  for (auto it = changes_to_kill_.rbegin(), end = changes_to_kill_.rend(); it != end; ++it) {
-    MIRData* data = vreg_chains_.GetMIRData(*it);
-    DCHECK(!data->must_keep);
-    DCHECK(data->has_def);
-    vreg_chains_.RemoveChange(*it);
-    KillMIR(data);
-  }
-
-  // Each dependent register not in vregs_to_kill_ is either already marked unused or
-  // it's one word of a wide register where the other word has been overwritten.
-  unused_vregs_->UnionIfNotIn(dependent_vregs_, vregs_to_kill_);
-
-  vreg_chains_.RemoveTrailingNops();
-  return true;
-}
-
-bool GvnDeadCodeElimination::RecordMIR(MIR* mir) {
-  bool must_keep = false;
-  bool uses_all_vregs = false;
-  bool is_move = false;
-  uint16_t opcode = mir->dalvikInsn.opcode;
-  switch (opcode) {
-    case kMirOpPhi: {
-      // Determine if this Phi is merging wide regs.
-      RegLocation raw_dest = gvn_->GetMirGraph()->GetRawDest(mir);
-      if (raw_dest.high_word) {
-        // This is the high part of a wide reg. Ignore the Phi.
-        return false;
-      }
-      bool wide = raw_dest.wide;
-      // Record the value.
-      DCHECK_EQ(mir->ssa_rep->num_defs, 1);
-      int s_reg = mir->ssa_rep->defs[0];
-      uint16_t new_value = wide ? lvn_->GetSregValueWide(s_reg) : lvn_->GetSregValue(s_reg);
-
-      int v_reg = mir_graph_->SRegToVReg(s_reg);
-      DCHECK_EQ(vreg_chains_.CurrentValue(v_reg), kNoValue);  // No previous def for v_reg.
-      if (wide) {
-        DCHECK_EQ(vreg_chains_.CurrentValue(v_reg + 1), kNoValue);
-      }
-      vreg_chains_.AddMIRWithDef(mir, v_reg, wide, new_value);
-      return true;  // Avoid the common processing.
-    }
-
-    case kMirOpNop:
-    case Instruction::NOP:
-      // Don't record NOPs.
-      return false;
-
-    case kMirOpCheck:
-      must_keep = true;
-      uses_all_vregs = true;
-      break;
-
-    case Instruction::RETURN_VOID:
-    case Instruction::RETURN:
-    case Instruction::RETURN_OBJECT:
-    case Instruction::RETURN_WIDE:
-    case Instruction::GOTO:
-    case Instruction::GOTO_16:
-    case Instruction::GOTO_32:
-    case Instruction::PACKED_SWITCH:
-    case Instruction::SPARSE_SWITCH:
-    case Instruction::IF_EQ:
-    case Instruction::IF_NE:
-    case Instruction::IF_LT:
-    case Instruction::IF_GE:
-    case Instruction::IF_GT:
-    case Instruction::IF_LE:
-    case Instruction::IF_EQZ:
-    case Instruction::IF_NEZ:
-    case Instruction::IF_LTZ:
-    case Instruction::IF_GEZ:
-    case Instruction::IF_GTZ:
-    case Instruction::IF_LEZ:
-    case kMirOpFusedCmplFloat:
-    case kMirOpFusedCmpgFloat:
-    case kMirOpFusedCmplDouble:
-    case kMirOpFusedCmpgDouble:
-    case kMirOpFusedCmpLong:
-      must_keep = true;
-      uses_all_vregs = true;  // Keep the implicit dependencies on all vregs.
-      break;
-
-    case Instruction::CONST_CLASS:
-    case Instruction::CONST_STRING:
-    case Instruction::CONST_STRING_JUMBO:
-      // NOTE: While we're currently treating CONST_CLASS, CONST_STRING and CONST_STRING_JUMBO
-      // as throwing but we could conceivably try and eliminate those exceptions if we're
-      // retrieving the class/string repeatedly.
-      must_keep = true;
-      uses_all_vregs = true;
-      break;
-
-    case Instruction::MONITOR_ENTER:
-    case Instruction::MONITOR_EXIT:
-      // We can actually try and optimize across the acquire operation of MONITOR_ENTER,
-      // the value names provided by GVN reflect the possible changes to memory visibility.
-      // NOTE: In ART, MONITOR_ENTER and MONITOR_EXIT can throw only NPE.
-      must_keep = true;
-      uses_all_vregs = (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0;
-      break;
-
-    case Instruction::INVOKE_DIRECT:
-    case Instruction::INVOKE_DIRECT_RANGE:
-    case Instruction::INVOKE_VIRTUAL:
-    case Instruction::INVOKE_VIRTUAL_RANGE:
-    case Instruction::INVOKE_SUPER:
-    case Instruction::INVOKE_SUPER_RANGE:
-    case Instruction::INVOKE_INTERFACE:
-    case Instruction::INVOKE_INTERFACE_RANGE:
-    case Instruction::INVOKE_STATIC:
-    case Instruction::INVOKE_STATIC_RANGE:
-    case Instruction::THROW:
-    case Instruction::FILLED_NEW_ARRAY:
-    case Instruction::FILLED_NEW_ARRAY_RANGE:
-    case Instruction::FILL_ARRAY_DATA:
-      must_keep = true;
-      uses_all_vregs = true;
-      break;
-
-    case Instruction::NEW_INSTANCE:
-    case Instruction::NEW_ARRAY:
-      must_keep = true;
-      uses_all_vregs = true;
-      break;
-
-    case Instruction::CHECK_CAST:
-      DCHECK_EQ(mir->ssa_rep->num_uses, 1);
-      must_keep = true;  // Keep for type information even if MIR_IGNORE_CHECK_CAST.
-      uses_all_vregs = (mir->optimization_flags & MIR_IGNORE_CHECK_CAST) == 0;
-      break;
-
-    case kMirOpNullCheck:
-      DCHECK_EQ(mir->ssa_rep->num_uses, 1);
-      if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) {
-        mir->ssa_rep->num_uses = 0;
-        mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
-        return false;
-      }
-      must_keep = true;
-      uses_all_vregs = true;
-      break;
-
-    case Instruction::MOVE_RESULT:
-    case Instruction::MOVE_RESULT_OBJECT:
-    case Instruction::MOVE_RESULT_WIDE:
-      break;
-
-    case Instruction::INSTANCE_OF:
-      break;
-
-    case Instruction::MOVE_EXCEPTION:
-      must_keep = true;
-      break;
-
-    case kMirOpCopy:
-    case Instruction::MOVE:
-    case Instruction::MOVE_FROM16:
-    case Instruction::MOVE_16:
-    case Instruction::MOVE_WIDE:
-    case Instruction::MOVE_WIDE_FROM16:
-    case Instruction::MOVE_WIDE_16:
-    case Instruction::MOVE_OBJECT:
-    case Instruction::MOVE_OBJECT_FROM16:
-    case Instruction::MOVE_OBJECT_16: {
-      is_move = true;
-      // If the MIR defining src vreg is known, allow renaming all uses of src vreg to dest vreg
-      // while updating the defining MIR to directly define dest vreg. However, changing Phi's
-      // def this way doesn't work without changing MIRs in other BBs.
-      int src_v_reg = mir_graph_->SRegToVReg(mir->ssa_rep->uses[0]);
-      int src_change = vreg_chains_.LastChange(src_v_reg);
-      if (src_change != kNPos) {
-        MIRData* src_data = vreg_chains_.GetMIRData(src_change);
-        if (static_cast<int>(src_data->mir->dalvikInsn.opcode) != kMirOpPhi) {
-          src_data->is_move_src = true;
-        }
-      }
-      break;
-    }
-
-    case Instruction::CONST_4:
-    case Instruction::CONST_16:
-    case Instruction::CONST:
-    case Instruction::CONST_HIGH16:
-    case Instruction::CONST_WIDE_16:
-    case Instruction::CONST_WIDE_32:
-    case Instruction::CONST_WIDE:
-    case Instruction::CONST_WIDE_HIGH16:
-    case Instruction::CMPL_FLOAT:
-    case Instruction::CMPG_FLOAT:
-    case Instruction::CMPL_DOUBLE:
-    case Instruction::CMPG_DOUBLE:
-    case Instruction::CMP_LONG:
-    case Instruction::NEG_INT:
-    case Instruction::NOT_INT:
-    case Instruction::NEG_LONG:
-    case Instruction::NOT_LONG:
-    case Instruction::NEG_FLOAT:
-    case Instruction::NEG_DOUBLE:
-    case Instruction::INT_TO_LONG:
-    case Instruction::INT_TO_FLOAT:
-    case Instruction::INT_TO_DOUBLE:
-    case Instruction::LONG_TO_INT:
-    case Instruction::LONG_TO_FLOAT:
-    case Instruction::LONG_TO_DOUBLE:
-    case Instruction::FLOAT_TO_INT:
-    case Instruction::FLOAT_TO_LONG:
-    case Instruction::FLOAT_TO_DOUBLE:
-    case Instruction::DOUBLE_TO_INT:
-    case Instruction::DOUBLE_TO_LONG:
-    case Instruction::DOUBLE_TO_FLOAT:
-    case Instruction::INT_TO_BYTE:
-    case Instruction::INT_TO_CHAR:
-    case Instruction::INT_TO_SHORT:
-    case Instruction::ADD_INT:
-    case Instruction::SUB_INT:
-    case Instruction::MUL_INT:
-    case Instruction::AND_INT:
-    case Instruction::OR_INT:
-    case Instruction::XOR_INT:
-    case Instruction::SHL_INT:
-    case Instruction::SHR_INT:
-    case Instruction::USHR_INT:
-    case Instruction::ADD_LONG:
-    case Instruction::SUB_LONG:
-    case Instruction::MUL_LONG:
-    case Instruction::AND_LONG:
-    case Instruction::OR_LONG:
-    case Instruction::XOR_LONG:
-    case Instruction::SHL_LONG:
-    case Instruction::SHR_LONG:
-    case Instruction::USHR_LONG:
-    case Instruction::ADD_FLOAT:
-    case Instruction::SUB_FLOAT:
-    case Instruction::MUL_FLOAT:
-    case Instruction::DIV_FLOAT:
-    case Instruction::REM_FLOAT:
-    case Instruction::ADD_DOUBLE:
-    case Instruction::SUB_DOUBLE:
-    case Instruction::MUL_DOUBLE:
-    case Instruction::DIV_DOUBLE:
-    case Instruction::REM_DOUBLE:
-    case Instruction::ADD_INT_2ADDR:
-    case Instruction::SUB_INT_2ADDR:
-    case Instruction::MUL_INT_2ADDR:
-    case Instruction::AND_INT_2ADDR:
-    case Instruction::OR_INT_2ADDR:
-    case Instruction::XOR_INT_2ADDR:
-    case Instruction::SHL_INT_2ADDR:
-    case Instruction::SHR_INT_2ADDR:
-    case Instruction::USHR_INT_2ADDR:
-    case Instruction::ADD_LONG_2ADDR:
-    case Instruction::SUB_LONG_2ADDR:
-    case Instruction::MUL_LONG_2ADDR:
-    case Instruction::AND_LONG_2ADDR:
-    case Instruction::OR_LONG_2ADDR:
-    case Instruction::XOR_LONG_2ADDR:
-    case Instruction::SHL_LONG_2ADDR:
-    case Instruction::SHR_LONG_2ADDR:
-    case Instruction::USHR_LONG_2ADDR:
-    case Instruction::ADD_FLOAT_2ADDR:
-    case Instruction::SUB_FLOAT_2ADDR:
-    case Instruction::MUL_FLOAT_2ADDR:
-    case Instruction::DIV_FLOAT_2ADDR:
-    case Instruction::REM_FLOAT_2ADDR:
-    case Instruction::ADD_DOUBLE_2ADDR:
-    case Instruction::SUB_DOUBLE_2ADDR:
-    case Instruction::MUL_DOUBLE_2ADDR:
-    case Instruction::DIV_DOUBLE_2ADDR:
-    case Instruction::REM_DOUBLE_2ADDR:
-    case Instruction::ADD_INT_LIT16:
-    case Instruction::RSUB_INT:
-    case Instruction::MUL_INT_LIT16:
-    case Instruction::AND_INT_LIT16:
-    case Instruction::OR_INT_LIT16:
-    case Instruction::XOR_INT_LIT16:
-    case Instruction::ADD_INT_LIT8:
-    case Instruction::RSUB_INT_LIT8:
-    case Instruction::MUL_INT_LIT8:
-    case Instruction::AND_INT_LIT8:
-    case Instruction::OR_INT_LIT8:
-    case Instruction::XOR_INT_LIT8:
-    case Instruction::SHL_INT_LIT8:
-    case Instruction::SHR_INT_LIT8:
-    case Instruction::USHR_INT_LIT8:
-      break;
-
-    case Instruction::DIV_INT:
-    case Instruction::REM_INT:
-    case Instruction::DIV_LONG:
-    case Instruction::REM_LONG:
-    case Instruction::DIV_INT_2ADDR:
-    case Instruction::REM_INT_2ADDR:
-    case Instruction::DIV_LONG_2ADDR:
-    case Instruction::REM_LONG_2ADDR:
-      if ((mir->optimization_flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
-        must_keep = true;
-        uses_all_vregs = true;
-      }
-      break;
-
-    case Instruction::DIV_INT_LIT16:
-    case Instruction::REM_INT_LIT16:
-    case Instruction::DIV_INT_LIT8:
-    case Instruction::REM_INT_LIT8:
-      if (mir->dalvikInsn.vC == 0) {  // Explicit division by 0?
-        must_keep = true;
-        uses_all_vregs = true;
-      }
-      break;
-
-    case Instruction::ARRAY_LENGTH:
-      if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0) {
-        must_keep = true;
-        uses_all_vregs = true;
-      }
-      break;
-
-    case Instruction::AGET_OBJECT:
-    case Instruction::AGET:
-    case Instruction::AGET_WIDE:
-    case Instruction::AGET_BOOLEAN:
-    case Instruction::AGET_BYTE:
-    case Instruction::AGET_CHAR:
-    case Instruction::AGET_SHORT:
-      if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 ||
-          (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) == 0) {
-        must_keep = true;
-        uses_all_vregs = true;
-      }
-      break;
-
-    case Instruction::APUT_OBJECT:
-    case Instruction::APUT:
-    case Instruction::APUT_WIDE:
-    case Instruction::APUT_BYTE:
-    case Instruction::APUT_BOOLEAN:
-    case Instruction::APUT_SHORT:
-    case Instruction::APUT_CHAR:
-      must_keep = true;
-      if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 ||
-          (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) == 0) {
-        uses_all_vregs = true;
-      }
-      break;
-
-    case Instruction::IGET_OBJECT:
-    case Instruction::IGET:
-    case Instruction::IGET_WIDE:
-    case Instruction::IGET_BOOLEAN:
-    case Instruction::IGET_BYTE:
-    case Instruction::IGET_CHAR:
-    case Instruction::IGET_SHORT: {
-      const MirIFieldLoweringInfo& info = mir_graph_->GetIFieldLoweringInfo(mir);
-      if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 ||
-          !info.IsResolved() || !info.FastGet()) {
-        must_keep = true;
-        uses_all_vregs = true;
-      } else if (info.IsVolatile()) {
-        must_keep = true;
-      }
-      break;
-    }
-
-    case Instruction::IPUT_OBJECT:
-    case Instruction::IPUT:
-    case Instruction::IPUT_WIDE:
-    case Instruction::IPUT_BOOLEAN:
-    case Instruction::IPUT_BYTE:
-    case Instruction::IPUT_CHAR:
-    case Instruction::IPUT_SHORT: {
-      must_keep = true;
-      const MirIFieldLoweringInfo& info = mir_graph_->GetIFieldLoweringInfo(mir);
-      if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 ||
-          !info.IsResolved() || !info.FastPut()) {
-        uses_all_vregs = true;
-      }
-      break;
-    }
-
-    case Instruction::SGET_OBJECT:
-    case Instruction::SGET:
-    case Instruction::SGET_WIDE:
-    case Instruction::SGET_BOOLEAN:
-    case Instruction::SGET_BYTE:
-    case Instruction::SGET_CHAR:
-    case Instruction::SGET_SHORT: {
-      const MirSFieldLoweringInfo& info = mir_graph_->GetSFieldLoweringInfo(mir);
-      if ((mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0 ||
-          !info.IsResolved() || !info.FastGet()) {
-        must_keep = true;
-        uses_all_vregs = true;
-      } else if (info.IsVolatile()) {
-        must_keep = true;
-      }
-      break;
-    }
-
-    case Instruction::SPUT_OBJECT:
-    case Instruction::SPUT:
-    case Instruction::SPUT_WIDE:
-    case Instruction::SPUT_BOOLEAN:
-    case Instruction::SPUT_BYTE:
-    case Instruction::SPUT_CHAR:
-    case Instruction::SPUT_SHORT: {
-      must_keep = true;
-      const MirSFieldLoweringInfo& info = mir_graph_->GetSFieldLoweringInfo(mir);
-      if ((mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0 ||
-          !info.IsResolved() || !info.FastPut()) {
-        uses_all_vregs = true;
-      }
-      break;
-    }
-
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << opcode;
-      UNREACHABLE();
-  }
-
-  if (mir->ssa_rep->num_defs != 0) {
-    DCHECK(mir->ssa_rep->num_defs == 1 || mir->ssa_rep->num_defs == 2);
-    bool wide = (mir->ssa_rep->num_defs == 2);
-    int s_reg = mir->ssa_rep->defs[0];
-    int v_reg = mir_graph_->SRegToVReg(s_reg);
-    uint16_t new_value = wide ? lvn_->GetSregValueWide(s_reg) : lvn_->GetSregValue(s_reg);
-    DCHECK_NE(new_value, kNoValue);
-
-    vreg_chains_.UpdateInitialVRegValue(v_reg, wide, lvn_);
-    vreg_chains_.AddMIRWithDef(mir, v_reg, wide, new_value);
-    if (is_move) {
-      // Allow renaming all uses of dest vreg to src vreg.
-      vreg_chains_.LastMIRData()->is_move = true;
-    }
-  } else {
-    vreg_chains_.AddMIRWithoutDef(mir);
-    DCHECK(!is_move) << opcode;
-  }
-
-  if (must_keep) {
-    MIRData* last_data = vreg_chains_.LastMIRData();
-    last_data->must_keep = true;
-    if (uses_all_vregs) {
-      last_data->uses_all_vregs = true;
-      no_uses_all_since_ = vreg_chains_.NumMIRs();
-    }
-  } else {
-    DCHECK_NE(mir->ssa_rep->num_defs, 0) << opcode;
-    DCHECK(!uses_all_vregs) << opcode;
-  }
-  return true;
-}
-
-}  // namespace art
diff --git a/compiler/dex/gvn_dead_code_elimination.h b/compiler/dex/gvn_dead_code_elimination.h
deleted file mode 100644
index 06022db..0000000
--- a/compiler/dex/gvn_dead_code_elimination.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_GVN_DEAD_CODE_ELIMINATION_H_
-#define ART_COMPILER_DEX_GVN_DEAD_CODE_ELIMINATION_H_
-
-#include "base/arena_object.h"
-#include "base/scoped_arena_containers.h"
-#include "global_value_numbering.h"
-
-namespace art {
-
-class ArenaBitVector;
-class BasicBlock;
-class LocalValueNumbering;
-class MIR;
-class MIRGraph;
-
-/**
- * @class DeadCodeElimination
- * @details Eliminate dead code based on the results of global value numbering.
- * Also get rid of MOVE insns when we can use the source instead of destination
- * without affecting the vreg values at safepoints; this is useful in methods
- * with a large number of vregs that frequently move values to and from low vregs
- * to accommodate insns that can work only with the low 16 or 256 vregs.
- */
-class GvnDeadCodeElimination : public DeletableArenaObject<kArenaAllocMisc> {
- public:
-  GvnDeadCodeElimination(const GlobalValueNumbering* gvn, ScopedArenaAllocator* alloc);
-
-  // Apply the DCE to a basic block.
-  void Apply(BasicBlock* bb);
-
- private:
-  static constexpr uint16_t kNoValue = GlobalValueNumbering::kNoValue;
-  static constexpr uint16_t kNPos = 0xffffu;
-  static constexpr size_t kMaxNumTopChangesToKill = 2;
-
-  struct VRegValue {
-    VRegValue() : value(kNoValue), change(kNPos) { }
-
-    // Value name as reported by GVN, kNoValue if not available.
-    uint16_t value;
-    // Index of the change in mir_data_ that defined the value, kNPos if initial value for the BB.
-    uint16_t change;
-  };
-
-  struct MIRData {
-    explicit MIRData(MIR* m)
-        : mir(m), uses_all_vregs(false), must_keep(false), is_move(false), is_move_src(false),
-          has_def(false), wide_def(false),
-          low_def_over_high_word(false), high_def_over_low_word(false), vreg_def(0u),
-          prev_value(), prev_value_high() {
-    }
-
-    uint16_t PrevChange(int v_reg) const;
-    void SetPrevChange(int v_reg, uint16_t change);
-    void RemovePrevChange(int v_reg, MIRData* prev_data);
-
-    MIR* mir;
-    bool uses_all_vregs : 1;  // If mir uses all vregs, uses in mir->ssa_rep are irrelevant.
-    bool must_keep : 1;
-    bool is_move : 1;
-    bool is_move_src : 1;
-    bool has_def : 1;
-    bool wide_def : 1;
-    bool low_def_over_high_word : 1;
-    bool high_def_over_low_word : 1;
-    uint16_t vreg_def;
-    VRegValue prev_value;
-    VRegValue prev_value_high;   // For wide defs.
-  };
-
-  class VRegChains {
-   public:
-    VRegChains(uint32_t num_vregs, ScopedArenaAllocator* alloc);
-
-    void Reset();
-
-    void AddMIRWithDef(MIR* mir, int v_reg, bool wide, uint16_t new_value);
-    void AddMIRWithoutDef(MIR* mir);
-    void RemoveLastMIRData();
-    void RemoveTrailingNops();
-
-    size_t NumMIRs() const;
-    MIRData* GetMIRData(size_t pos);
-    MIRData* LastMIRData();
-
-    uint32_t NumVRegs() const;
-    void InsertInitialValueHigh(int v_reg, uint16_t value);
-    void UpdateInitialVRegValue(int v_reg, bool wide, const LocalValueNumbering* lvn);
-    uint16_t LastChange(int v_reg);
-    uint16_t CurrentValue(int v_reg);
-
-    uint16_t FindKillHead(int v_reg, uint16_t cutoff);
-    uint16_t FindFirstChangeAfter(int v_reg, uint16_t change) const;
-    void ReplaceChange(uint16_t old_change, uint16_t new_change);
-    void RemoveChange(uint16_t change);
-    bool IsTopChange(uint16_t change) const;
-    bool IsSRegUsed(uint16_t first_change, uint16_t last_change, int s_reg) const;
-    bool IsVRegUsed(uint16_t first_change, uint16_t last_change, int v_reg,
-                    MIRGraph* mir_graph) const;
-    void RenameSRegUses(uint16_t first_change, uint16_t last_change,
-                        int old_s_reg, int new_s_reg, bool wide);
-    void RenameVRegUses(uint16_t first_change, uint16_t last_change,
-                        int old_s_reg, int old_v_reg, int new_s_reg, int new_v_reg);
-
-   private:
-    const uint32_t num_vregs_;
-    VRegValue* const vreg_data_;
-    BitVector vreg_high_words_;
-    ScopedArenaVector<MIRData> mir_data_;
-  };
-
-  void RecordPass();
-  void BackwardPass();
-
-  void KillMIR(MIRData* data);
-  static void KillMIR(MIR* mir);
-  static void ChangeBinOp2AddrToPlainBinOp(MIR* mir);
-  MIR* CreatePhi(int s_reg);
-  MIR* RenameSRegDefOrCreatePhi(uint16_t def_change, uint16_t last_change, MIR* mir_to_kill);
-
-  // Update state variables going backwards through a MIR.
-  void BackwardPassProcessLastMIR();
-
-  uint16_t FindChangesToKill(uint16_t first_change, uint16_t last_change);
-  void BackwardPassTryToKillRevertVRegs();
-  bool BackwardPassTryToKillLastMIR();
-
-  void RecordPassKillMoveByRenamingSrcDef(uint16_t src_change, uint16_t move_change);
-  void RecordPassTryToKillOverwrittenMoveOrMoveSrc(uint16_t check_change);
-  void RecordPassTryToKillOverwrittenMoveOrMoveSrc();
-  void RecordPassTryToKillLastMIR();
-
-  bool RecordMIR(MIR* mir);
-
-  const GlobalValueNumbering* const gvn_;
-  MIRGraph* const mir_graph_;
-
-  VRegChains vreg_chains_;
-  BasicBlock* bb_;
-  const LocalValueNumbering* lvn_;
-  size_t no_uses_all_since_;  // The change index after the last change with uses_all_vregs set.
-
-  // Data used when processing MIRs in reverse order.
-  ArenaBitVector* unused_vregs_;              // vregs that are not needed later.
-  ArenaBitVector* vregs_to_kill_;             // vregs that revert to a previous value.
-  uint16_t* kill_heads_;  // For each vreg in vregs_to_kill_, the first change to kill.
-  ScopedArenaVector<uint16_t> changes_to_kill_;
-  ArenaBitVector* dependent_vregs_;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_GVN_DEAD_CODE_ELIMINATION_H_
diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc
deleted file mode 100644
index 28c61a8..0000000
--- a/compiler/dex/gvn_dead_code_elimination_test.cc
+++ /dev/null
@@ -1,2201 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dataflow_iterator-inl.h"
-#include "dex/mir_field_info.h"
-#include "global_value_numbering.h"
-#include "gvn_dead_code_elimination.h"
-#include "local_value_numbering.h"
-#include "gtest/gtest.h"
-
-namespace art {
-
-class GvnDeadCodeEliminationTest : public testing::Test {
- protected:
-  static constexpr uint16_t kNoValue = GlobalValueNumbering::kNoValue;
-
-  struct IFieldDef {
-    uint16_t field_idx;
-    uintptr_t declaring_dex_file;
-    uint16_t declaring_field_idx;
-    bool is_volatile;
-    DexMemAccessType type;
-  };
-
-  struct SFieldDef {
-    uint16_t field_idx;
-    uintptr_t declaring_dex_file;
-    uint16_t declaring_field_idx;
-    bool is_volatile;
-    DexMemAccessType type;
-  };
-
-  struct BBDef {
-    static constexpr size_t kMaxSuccessors = 4;
-    static constexpr size_t kMaxPredecessors = 4;
-
-    BBType type;
-    size_t num_successors;
-    BasicBlockId successors[kMaxPredecessors];
-    size_t num_predecessors;
-    BasicBlockId predecessors[kMaxPredecessors];
-  };
-
-  struct MIRDef {
-    static constexpr size_t kMaxSsaDefs = 2;
-    static constexpr size_t kMaxSsaUses = 4;
-
-    BasicBlockId bbid;
-    Instruction::Code opcode;
-    int64_t value;
-    uint32_t field_info;
-    size_t num_uses;
-    int32_t uses[kMaxSsaUses];
-    size_t num_defs;
-    int32_t defs[kMaxSsaDefs];
-  };
-
-#define DEF_SUCC0() \
-    0u, { }
-#define DEF_SUCC1(s1) \
-    1u, { s1 }
-#define DEF_SUCC2(s1, s2) \
-    2u, { s1, s2 }
-#define DEF_SUCC3(s1, s2, s3) \
-    3u, { s1, s2, s3 }
-#define DEF_SUCC4(s1, s2, s3, s4) \
-    4u, { s1, s2, s3, s4 }
-#define DEF_PRED0() \
-    0u, { }
-#define DEF_PRED1(p1) \
-    1u, { p1 }
-#define DEF_PRED2(p1, p2) \
-    2u, { p1, p2 }
-#define DEF_PRED3(p1, p2, p3) \
-    3u, { p1, p2, p3 }
-#define DEF_PRED4(p1, p2, p3, p4) \
-    4u, { p1, p2, p3, p4 }
-#define DEF_BB(type, succ, pred) \
-    { type, succ, pred }
-
-#define DEF_CONST(bb, opcode, reg, value) \
-    { bb, opcode, value, 0u, 0, { }, 1, { reg } }
-#define DEF_CONST_WIDE(bb, opcode, reg, value) \
-    { bb, opcode, value, 0u, 0, { }, 2, { reg, reg + 1 } }
-#define DEF_CONST_STRING(bb, opcode, reg, index) \
-    { bb, opcode, index, 0u, 0, { }, 1, { reg } }
-#define DEF_IGET(bb, opcode, reg, obj, field_info) \
-    { bb, opcode, 0u, field_info, 1, { obj }, 1, { reg } }
-#define DEF_IGET_WIDE(bb, opcode, reg, obj, field_info) \
-    { bb, opcode, 0u, field_info, 1, { obj }, 2, { reg, reg + 1 } }
-#define DEF_IPUT(bb, opcode, reg, obj, field_info) \
-    { bb, opcode, 0u, field_info, 2, { reg, obj }, 0, { } }
-#define DEF_IPUT_WIDE(bb, opcode, reg, obj, field_info) \
-    { bb, opcode, 0u, field_info, 3, { reg, reg + 1, obj }, 0, { } }
-#define DEF_SGET(bb, opcode, reg, field_info) \
-    { bb, opcode, 0u, field_info, 0, { }, 1, { reg } }
-#define DEF_SGET_WIDE(bb, opcode, reg, field_info) \
-    { bb, opcode, 0u, field_info, 0, { }, 2, { reg, reg + 1 } }
-#define DEF_SPUT(bb, opcode, reg, field_info) \
-    { bb, opcode, 0u, field_info, 1, { reg }, 0, { } }
-#define DEF_SPUT_WIDE(bb, opcode, reg, field_info) \
-    { bb, opcode, 0u, field_info, 2, { reg, reg + 1 }, 0, { } }
-#define DEF_AGET(bb, opcode, reg, obj, idx) \
-    { bb, opcode, 0u, 0u, 2, { obj, idx }, 1, { reg } }
-#define DEF_AGET_WIDE(bb, opcode, reg, obj, idx) \
-    { bb, opcode, 0u, 0u, 2, { obj, idx }, 2, { reg, reg + 1 } }
-#define DEF_APUT(bb, opcode, reg, obj, idx) \
-    { bb, opcode, 0u, 0u, 3, { reg, obj, idx }, 0, { } }
-#define DEF_APUT_WIDE(bb, opcode, reg, obj, idx) \
-    { bb, opcode, 0u, 0u, 4, { reg, reg + 1, obj, idx }, 0, { } }
-#define DEF_INVOKE1(bb, opcode, reg) \
-    { bb, opcode, 0u, 0u, 1, { reg }, 0, { } }
-#define DEF_UNIQUE_REF(bb, opcode, reg) \
-    { bb, opcode, 0u, 0u, 0, { }, 1, { reg } }  // CONST_CLASS, CONST_STRING, NEW_ARRAY, ...
-#define DEF_IFZ(bb, opcode, reg) \
-    { bb, opcode, 0u, 0u, 1, { reg }, 0, { } }
-#define DEF_MOVE(bb, opcode, reg, src) \
-    { bb, opcode, 0u, 0u, 1, { src }, 1, { reg } }
-#define DEF_MOVE_WIDE(bb, opcode, reg, src) \
-    { bb, opcode, 0u, 0u, 2, { src, src + 1 }, 2, { reg, reg + 1 } }
-#define DEF_PHI2(bb, reg, src1, src2) \
-    { bb, static_cast<Instruction::Code>(kMirOpPhi), 0, 0u, 2u, { src1, src2 }, 1, { reg } }
-#define DEF_UNOP(bb, opcode, result, src1) \
-    { bb, opcode, 0u, 0u, 1, { src1 }, 1, { result } }
-#define DEF_BINOP(bb, opcode, result, src1, src2) \
-    { bb, opcode, 0u, 0u, 2, { src1, src2 }, 1, { result } }
-#define DEF_BINOP_WIDE(bb, opcode, result, src1, src2) \
-    { bb, opcode, 0u, 0u, 4, { src1, src1 + 1, src2, src2 + 1 }, 2, { result, result + 1 } }
-
-  void DoPrepareIFields(const IFieldDef* defs, size_t count) {
-    cu_.mir_graph->ifield_lowering_infos_.clear();
-    cu_.mir_graph->ifield_lowering_infos_.reserve(count);
-    for (size_t i = 0u; i != count; ++i) {
-      const IFieldDef* def = &defs[i];
-      MirIFieldLoweringInfo field_info(def->field_idx, def->type, false);
-      if (def->declaring_dex_file != 0u) {
-        field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
-        field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ =
-            MirIFieldLoweringInfo::kFlagFastGet | MirIFieldLoweringInfo::kFlagFastPut |
-            (field_info.flags_ & ~(def->is_volatile ? 0u : MirIFieldLoweringInfo::kFlagIsVolatile));
-      }
-      cu_.mir_graph->ifield_lowering_infos_.push_back(field_info);
-    }
-  }
-
-  template <size_t count>
-  void PrepareIFields(const IFieldDef (&defs)[count]) {
-    DoPrepareIFields(defs, count);
-  }
-
-  void DoPrepareSFields(const SFieldDef* defs, size_t count) {
-    cu_.mir_graph->sfield_lowering_infos_.clear();
-    cu_.mir_graph->sfield_lowering_infos_.reserve(count);
-    for (size_t i = 0u; i != count; ++i) {
-      const SFieldDef* def = &defs[i];
-      MirSFieldLoweringInfo field_info(def->field_idx, def->type);
-      // Mark even unresolved fields as initialized.
-      field_info.flags_ |= MirSFieldLoweringInfo::kFlagClassIsInitialized;
-      // NOTE: MirSFieldLoweringInfo::kFlagClassIsInDexCache isn't used by GVN.
-      if (def->declaring_dex_file != 0u) {
-        field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
-        field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ =
-            MirSFieldLoweringInfo::kFlagFastGet | MirSFieldLoweringInfo::kFlagFastPut |
-            (field_info.flags_ & ~(def->is_volatile ? 0u : MirSFieldLoweringInfo::kFlagIsVolatile));
-      }
-      cu_.mir_graph->sfield_lowering_infos_.push_back(field_info);
-    }
-  }
-
-  template <size_t count>
-  void PrepareSFields(const SFieldDef (&defs)[count]) {
-    DoPrepareSFields(defs, count);
-  }
-
-  void DoPrepareBasicBlocks(const BBDef* defs, size_t count) {
-    cu_.mir_graph->block_id_map_.clear();
-    cu_.mir_graph->block_list_.clear();
-    ASSERT_LT(3u, count);  // null, entry, exit and at least one bytecode block.
-    ASSERT_EQ(kNullBlock, defs[0].type);
-    ASSERT_EQ(kEntryBlock, defs[1].type);
-    ASSERT_EQ(kExitBlock, defs[2].type);
-    for (size_t i = 0u; i != count; ++i) {
-      const BBDef* def = &defs[i];
-      BasicBlock* bb = cu_.mir_graph->CreateNewBB(def->type);
-      if (def->num_successors <= 2) {
-        bb->successor_block_list_type = kNotUsed;
-        bb->fall_through = (def->num_successors >= 1) ? def->successors[0] : 0u;
-        bb->taken = (def->num_successors >= 2) ? def->successors[1] : 0u;
-      } else {
-        bb->successor_block_list_type = kPackedSwitch;
-        bb->fall_through = 0u;
-        bb->taken = 0u;
-        bb->successor_blocks.reserve(def->num_successors);
-        for (size_t j = 0u; j != def->num_successors; ++j) {
-          SuccessorBlockInfo* successor_block_info =
-              static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
-                                                               kArenaAllocSuccessors));
-          successor_block_info->block = j;
-          successor_block_info->key = 0u;  // Not used by class init check elimination.
-          bb->successor_blocks.push_back(successor_block_info);
-        }
-      }
-      bb->predecessors.assign(def->predecessors, def->predecessors + def->num_predecessors);
-      if (def->type == kDalvikByteCode || def->type == kEntryBlock || def->type == kExitBlock) {
-        bb->data_flow_info = static_cast<BasicBlockDataFlow*>(
-            cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo));
-        bb->data_flow_info->live_in_v = live_in_v_;
-        bb->data_flow_info->vreg_to_ssa_map_exit = nullptr;
-      }
-    }
-    ASSERT_EQ(count, cu_.mir_graph->block_list_.size());
-    cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1];
-    ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type);
-    cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_[2];
-    ASSERT_EQ(kExitBlock, cu_.mir_graph->exit_block_->block_type);
-  }
-
-  template <size_t count>
-  void PrepareBasicBlocks(const BBDef (&defs)[count]) {
-    DoPrepareBasicBlocks(defs, count);
-  }
-
-  int SRegToVReg(int32_t s_reg, bool wide) {
-    int v_reg = cu_.mir_graph->SRegToVReg(s_reg);
-    CHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
-    if (wide) {
-      CHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_);
-    }
-    return v_reg;
-  }
-
-  int SRegToVReg(int32_t* uses, size_t* use, bool wide) {
-    int v_reg = SRegToVReg(uses[*use], wide);
-    if (wide) {
-      CHECK_EQ(uses[*use] + 1, uses[*use + 1]);
-      *use += 2u;
-    } else {
-      *use += 1u;
-    }
-    return v_reg;
-  }
-
-  void DoPrepareMIRs(const MIRDef* defs, size_t count) {
-    mir_count_ = count;
-    mirs_ = reinterpret_cast<MIR*>(cu_.arena.Alloc(sizeof(MIR) * count, kArenaAllocMIR));
-    ssa_reps_.resize(count);
-    for (size_t i = 0u; i != count; ++i) {
-      const MIRDef* def = &defs[i];
-      MIR* mir = &mirs_[i];
-      ASSERT_LT(def->bbid, cu_.mir_graph->block_list_.size());
-      BasicBlock* bb = cu_.mir_graph->block_list_[def->bbid];
-      bb->AppendMIR(mir);
-      mir->dalvikInsn.opcode = def->opcode;
-      mir->dalvikInsn.vB = static_cast<int32_t>(def->value);
-      mir->dalvikInsn.vB_wide = def->value;
-      if (IsInstructionIGetOrIPut(def->opcode)) {
-        ASSERT_LT(def->field_info, cu_.mir_graph->ifield_lowering_infos_.size());
-        mir->meta.ifield_lowering_info = def->field_info;
-        ASSERT_EQ(cu_.mir_graph->ifield_lowering_infos_[def->field_info].MemAccessType(),
-                  IGetOrIPutMemAccessType(def->opcode));
-      } else if (IsInstructionSGetOrSPut(def->opcode)) {
-        ASSERT_LT(def->field_info, cu_.mir_graph->sfield_lowering_infos_.size());
-        mir->meta.sfield_lowering_info = def->field_info;
-        ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->field_info].MemAccessType(),
-                  SGetOrSPutMemAccessType(def->opcode));
-      } else if (def->opcode == static_cast<Instruction::Code>(kMirOpPhi)) {
-        mir->meta.phi_incoming =
-            allocator_->AllocArray<BasicBlockId>(def->num_uses, kArenaAllocDFInfo);
-        ASSERT_EQ(def->num_uses, bb->predecessors.size());
-        std::copy(bb->predecessors.begin(), bb->predecessors.end(), mir->meta.phi_incoming);
-      }
-      mir->ssa_rep = &ssa_reps_[i];
-      cu_.mir_graph->AllocateSSAUseData(mir, def->num_uses);
-      std::copy_n(def->uses, def->num_uses, mir->ssa_rep->uses);
-      // Keep mir->ssa_rep->fp_use[.] zero-initialized (false). Not used by DCE, only copied.
-      cu_.mir_graph->AllocateSSADefData(mir, def->num_defs);
-      std::copy_n(def->defs, def->num_defs, mir->ssa_rep->defs);
-      // Keep mir->ssa_rep->fp_def[.] zero-initialized (false). Not used by DCE, only copied.
-      mir->dalvikInsn.opcode = def->opcode;
-      mir->offset = i;  // LVN uses offset only for debug output
-      mir->optimization_flags = 0u;
-      uint64_t df_attrs = MIRGraph::GetDataFlowAttributes(mir);
-      if ((df_attrs & DF_DA) != 0) {
-        CHECK_NE(def->num_defs, 0u);
-        mir->dalvikInsn.vA = SRegToVReg(def->defs[0], (df_attrs & DF_A_WIDE) != 0);
-        bb->data_flow_info->vreg_to_ssa_map_exit[mir->dalvikInsn.vA] = def->defs[0];
-        if ((df_attrs & DF_A_WIDE) != 0) {
-          CHECK_EQ(def->defs[0] + 1, def->defs[1]);
-          bb->data_flow_info->vreg_to_ssa_map_exit[mir->dalvikInsn.vA + 1u] = def->defs[0] + 1;
-        }
-      }
-      if ((df_attrs & (DF_UA | DF_UB | DF_UC)) != 0) {
-        size_t use = 0;
-        if ((df_attrs & DF_UA) != 0) {
-          mir->dalvikInsn.vA = SRegToVReg(mir->ssa_rep->uses, &use, (df_attrs & DF_A_WIDE) != 0);
-        }
-        if ((df_attrs & DF_UB) != 0) {
-          mir->dalvikInsn.vB = SRegToVReg(mir->ssa_rep->uses, &use, (df_attrs & DF_B_WIDE) != 0);
-        }
-        if ((df_attrs & DF_UC) != 0) {
-          mir->dalvikInsn.vC = SRegToVReg(mir->ssa_rep->uses, &use, (df_attrs & DF_C_WIDE) != 0);
-        }
-        DCHECK_EQ(def->num_uses, use);
-      }
-    }
-    DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>(
-        cu_.arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc));
-    code_item->insns_size_in_code_units_ = 2u * count;
-    code_item->registers_size_ = kMaxVRegs;
-    cu_.mir_graph->current_code_item_ = code_item;
-  }
-
-  template <size_t count>
-  void PrepareMIRs(const MIRDef (&defs)[count]) {
-    DoPrepareMIRs(defs, count);
-  }
-
-  template <size_t count>
-  void PrepareSRegToVRegMap(const int (&map)[count]) {
-    cu_.mir_graph->ssa_base_vregs_.assign(map, map + count);
-    num_vregs_ = *std::max_element(map, map + count) + 1u;
-    AllNodesIterator iterator(cu_.mir_graph.get());
-    for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
-      if (bb->data_flow_info != nullptr) {
-        bb->data_flow_info->vreg_to_ssa_map_exit = static_cast<int32_t*>(
-            cu_.arena.Alloc(sizeof(int32_t) * num_vregs_, kArenaAllocDFInfo));
-        std::fill_n(bb->data_flow_info->vreg_to_ssa_map_exit, num_vregs_, INVALID_SREG);
-      }
-    }
-  }
-
-  void PerformGVN() {
-    cu_.mir_graph->SSATransformationStart();
-    cu_.mir_graph->ComputeDFSOrders();
-    cu_.mir_graph->ComputeDominators();
-    cu_.mir_graph->ComputeTopologicalSortOrder();
-    cu_.mir_graph->SSATransformationEnd();
-    cu_.mir_graph->temp_.gvn.ifield_ids =  GlobalValueNumbering::PrepareGvnFieldIds(
-        allocator_.get(), cu_.mir_graph->ifield_lowering_infos_);
-    cu_.mir_graph->temp_.gvn.sfield_ids =  GlobalValueNumbering::PrepareGvnFieldIds(
-        allocator_.get(), cu_.mir_graph->sfield_lowering_infos_);
-    ASSERT_TRUE(gvn_ == nullptr);
-    gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(),
-                                                           GlobalValueNumbering::kModeGvn));
-    value_names_.resize(mir_count_, 0xffffu);
-    LoopRepeatingTopologicalSortIterator iterator(cu_.mir_graph.get());
-    bool change = false;
-    for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
-      LocalValueNumbering* lvn = gvn_->PrepareBasicBlock(bb);
-      if (lvn != nullptr) {
-        for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-          value_names_[mir - mirs_] = lvn->GetValueNumber(mir);
-        }
-      }
-      change = (lvn != nullptr) && gvn_->FinishBasicBlock(bb);
-      ASSERT_TRUE(gvn_->Good());
-    }
-  }
-
-  void PerformGVNCodeModifications() {
-    ASSERT_TRUE(gvn_ != nullptr);
-    ASSERT_TRUE(gvn_->Good());
-    gvn_->StartPostProcessing();
-    TopologicalSortIterator iterator(cu_.mir_graph.get());
-    for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
-      LocalValueNumbering* lvn = gvn_->PrepareBasicBlock(bb);
-      if (lvn != nullptr) {
-        for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-          uint16_t value_name = lvn->GetValueNumber(mir);
-          ASSERT_EQ(value_name, value_names_[mir - mirs_]);
-        }
-      }
-      bool change = (lvn != nullptr) && gvn_->FinishBasicBlock(bb);
-      ASSERT_FALSE(change);
-      ASSERT_TRUE(gvn_->Good());
-    }
-  }
-
-  void FillVregToSsaRegExitMaps() {
-    // Fill in vreg_to_ssa_map_exit for each BB.
-    PreOrderDfsIterator iterator(cu_.mir_graph.get());
-    for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
-      if (bb->block_type == kDalvikByteCode) {
-        CHECK(!bb->predecessors.empty());
-        BasicBlock* pred_bb = cu_.mir_graph->GetBasicBlock(bb->predecessors[0]);
-        for (size_t v_reg = 0; v_reg != num_vregs_; ++v_reg) {
-          if (bb->data_flow_info->vreg_to_ssa_map_exit[v_reg] == INVALID_SREG) {
-            bb->data_flow_info->vreg_to_ssa_map_exit[v_reg] =
-                pred_bb->data_flow_info->vreg_to_ssa_map_exit[v_reg];
-          }
-        }
-      }
-    }
-  }
-
-  template <size_t count>
-  void MarkAsWideSRegs(const int32_t (&sregs)[count]) {
-    for (int32_t sreg : sregs) {
-      cu_.mir_graph->reg_location_[sreg].wide = true;
-      cu_.mir_graph->reg_location_[sreg + 1].wide = true;
-      cu_.mir_graph->reg_location_[sreg + 1].high_word = true;
-    }
-  }
-
-  void PerformDCE() {
-    FillVregToSsaRegExitMaps();
-    cu_.mir_graph->GetNumOfCodeAndTempVRs();
-    dce_.reset(new (allocator_.get()) GvnDeadCodeElimination(gvn_.get(), allocator_.get()));
-    PreOrderDfsIterator iterator(cu_.mir_graph.get());
-    for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
-      if (bb->block_type == kDalvikByteCode) {
-        dce_->Apply(bb);
-      }
-    }
-  }
-
-  void PerformGVN_DCE() {
-    PerformGVN();
-    PerformGVNCodeModifications();  // Eliminate null/range checks.
-    PerformDCE();
-  }
-
-  template <size_t count>
-  void ExpectValueNamesNE(const size_t (&indexes)[count]) {
-    for (size_t i1 = 0; i1 != count; ++i1) {
-      size_t idx1 = indexes[i1];
-      for (size_t i2 = i1 + 1; i2 != count; ++i2) {
-        size_t idx2 = indexes[i2];
-        EXPECT_NE(value_names_[idx1], value_names_[idx2]) << idx1 << " " << idx2;
-      }
-    }
-  }
-
-  template <size_t count>
-  void ExpectNoNullCheck(const size_t (&indexes)[count]) {
-    for (size_t i = 0; i != count; ++i) {
-      size_t idx = indexes[i];
-      EXPECT_EQ(MIR_IGNORE_NULL_CHECK, mirs_[idx].optimization_flags & MIR_IGNORE_NULL_CHECK)
-          << idx;
-    }
-    size_t num_no_null_ck = 0u;
-    for (size_t i = 0; i != mir_count_; ++i) {
-      if ((mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) {
-        ++num_no_null_ck;
-      }
-    }
-    EXPECT_EQ(count, num_no_null_ck);
-  }
-
-  GvnDeadCodeEliminationTest()
-      : pool_(),
-        cu_(&pool_, kRuntimeISA, nullptr, nullptr),
-        num_vregs_(0u),
-        mir_count_(0u),
-        mirs_(nullptr),
-        ssa_reps_(),
-        allocator_(),
-        gvn_(),
-        dce_(),
-        value_names_(),
-        live_in_v_(new (&cu_.arena) ArenaBitVector(&cu_.arena, kMaxSsaRegs, false, kBitMapMisc)) {
-    cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
-    cu_.access_flags = kAccStatic;  // Don't let "this" interfere with this test.
-    allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
-    // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that
-    // 0 constants are integral, not references, and the values are all narrow.
-    // Nothing else is used by LVN/GVN. Tests can override the default values as needed.
-    cu_.mir_graph->reg_location_ = static_cast<RegLocation*>(cu_.arena.Alloc(
-        kMaxSsaRegs * sizeof(cu_.mir_graph->reg_location_[0]), kArenaAllocRegAlloc));
-    cu_.mir_graph->num_ssa_regs_ = kMaxSsaRegs;
-    // Bind all possible sregs to live vregs for test purposes.
-    live_in_v_->SetInitialBits(kMaxSsaRegs);
-    cu_.mir_graph->ssa_base_vregs_.reserve(kMaxSsaRegs);
-    cu_.mir_graph->ssa_subscripts_.reserve(kMaxSsaRegs);
-    for (unsigned int i = 0; i < kMaxSsaRegs; i++) {
-      cu_.mir_graph->ssa_base_vregs_.push_back(i);
-      cu_.mir_graph->ssa_subscripts_.push_back(0);
-    }
-    // Set shorty for a void-returning method without arguments.
-    cu_.shorty = "V";
-  }
-
-  static constexpr size_t kMaxSsaRegs = 16384u;
-  static constexpr size_t kMaxVRegs = 256u;
-
-  ArenaPool pool_;
-  CompilationUnit cu_;
-  size_t num_vregs_;
-  size_t mir_count_;
-  MIR* mirs_;
-  std::vector<SSARepresentation> ssa_reps_;
-  std::unique_ptr<ScopedArenaAllocator> allocator_;
-  std::unique_ptr<GlobalValueNumbering> gvn_;
-  std::unique_ptr<GvnDeadCodeElimination> dce_;
-  std::vector<uint16_t> value_names_;
-  ArenaBitVector* live_in_v_;
-};
-
-constexpr uint16_t GvnDeadCodeEliminationTest::kNoValue;
-
-class GvnDeadCodeEliminationTestSimple : public GvnDeadCodeEliminationTest {
- public:
-  GvnDeadCodeEliminationTestSimple();
-
- private:
-  static const BBDef kSimpleBbs[];
-};
-
-const GvnDeadCodeEliminationTest::BBDef GvnDeadCodeEliminationTestSimple::kSimpleBbs[] = {
-    DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-    DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-    DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(3)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(1)),
-};
-
-GvnDeadCodeEliminationTestSimple::GvnDeadCodeEliminationTestSimple()
-    : GvnDeadCodeEliminationTest() {
-  PrepareBasicBlocks(kSimpleBbs);
-}
-
-class GvnDeadCodeEliminationTestDiamond : public GvnDeadCodeEliminationTest {
- public:
-  GvnDeadCodeEliminationTestDiamond();
-
- private:
-  static const BBDef kDiamondBbs[];
-};
-
-const GvnDeadCodeEliminationTest::BBDef GvnDeadCodeEliminationTestDiamond::kDiamondBbs[] = {
-    DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-    DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-    DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)),  // Block #3, top of the diamond.
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // Block #4, left side.
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // Block #5, right side.
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(4, 5)),  // Block #6, bottom.
-};
-
-GvnDeadCodeEliminationTestDiamond::GvnDeadCodeEliminationTestDiamond()
-    : GvnDeadCodeEliminationTest() {
-  PrepareBasicBlocks(kDiamondBbs);
-}
-
-class GvnDeadCodeEliminationTestLoop : public GvnDeadCodeEliminationTest {
- public:
-  GvnDeadCodeEliminationTestLoop();
-
- private:
-  static const BBDef kLoopBbs[];
-};
-
-const GvnDeadCodeEliminationTest::BBDef GvnDeadCodeEliminationTestLoop::kLoopBbs[] = {
-    DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-    DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-    DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
-    DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED2(3, 4)),  // "taken" loops to self.
-    DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
-};
-
-GvnDeadCodeEliminationTestLoop::GvnDeadCodeEliminationTestLoop()
-    : GvnDeadCodeEliminationTest() {
-  PrepareBasicBlocks(kLoopBbs);
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, Rename1) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
-      DEF_MOVE(3, Instruction::MOVE_OBJECT, 2u, 0u),
-      DEF_IGET(3, Instruction::IGET, 3u, 2u, 1u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 2 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 3 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[0], value_names_[2]);
-
-  const size_t no_null_ck_indexes[] = { 1, 3 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, true, false
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the IGET uses the s_reg 0, v_reg 0, defined by mirs_[0].
-  ASSERT_EQ(1, mirs_[3].ssa_rep->num_uses);
-  EXPECT_EQ(0, mirs_[3].ssa_rep->uses[0]);
-  EXPECT_EQ(0u, mirs_[3].dalvikInsn.vB);
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, Rename2) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
-      DEF_MOVE(3, Instruction::MOVE_OBJECT, 2u, 0u),
-      DEF_IGET(3, Instruction::IGET, 3u, 2u, 1u),
-      DEF_CONST(3, Instruction::CONST, 4u, 1000),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 2 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 3, 4 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[0], value_names_[2]);
-
-  const size_t no_null_ck_indexes[] = { 1, 3 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, true, false, false
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the IGET uses the s_reg 0, v_reg 0, defined by mirs_[0].
-  ASSERT_EQ(1, mirs_[3].ssa_rep->num_uses);
-  EXPECT_EQ(0, mirs_[3].ssa_rep->uses[0]);
-  EXPECT_EQ(0u, mirs_[3].dalvikInsn.vB);
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, Rename3) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
-      DEF_MOVE(3, Instruction::MOVE_OBJECT, 2u, 0u),
-      DEF_IGET(3, Instruction::IGET, 3u, 2u, 1u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 0 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 3 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[0], value_names_[2]);
-
-  const size_t no_null_ck_indexes[] = { 1, 3 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, true, false
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the NEW_INSTANCE defines the s_reg 2, v_reg 2, originally defined by the move.
-  ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs);
-  EXPECT_EQ(2, mirs_[0].ssa_rep->defs[0]);
-  EXPECT_EQ(2u, mirs_[0].dalvikInsn.vA);
-  // Check that the first IGET is using the s_reg 2, v_reg 2.
-  ASSERT_EQ(1, mirs_[1].ssa_rep->num_uses);
-  EXPECT_EQ(2, mirs_[1].ssa_rep->uses[0]);
-  EXPECT_EQ(2u, mirs_[1].dalvikInsn.vB);
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, Rename4) {
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_MOVE(3, Instruction::MOVE_OBJECT, 1u, 0u),
-      DEF_MOVE(3, Instruction::MOVE_OBJECT, 2u, 1u),
-      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 3u, 1000u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 0, 1 /* high word */ };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 3 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 3 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-  EXPECT_EQ(value_names_[0], value_names_[2]);
-
-  static const bool eliminated[] = {
-      false, true, true, false
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the NEW_INSTANCE defines the s_reg 2, v_reg 2, originally defined by the move 2u.
-  ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs);
-  EXPECT_EQ(2, mirs_[0].ssa_rep->defs[0]);
-  EXPECT_EQ(2u, mirs_[0].dalvikInsn.vA);
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, Rename5) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
-      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 2u, 1u),
-      DEF_MOVE(3, Instruction::MOVE_OBJECT, 3u, 0u),
-      DEF_MOVE(3, Instruction::MOVE_OBJECT, 4u, 3u),
-      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 5u, 1000u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 3, 0, 1 /* high word */ };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 5 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 5 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[0], value_names_[3]);
-  EXPECT_EQ(value_names_[0], value_names_[4]);
-
-  static const bool eliminated[] = {
-      false, false, false, true, true, false
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the NEW_INSTANCE defines the s_reg 4, v_reg 3, originally defined by the move 4u.
-  ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs);
-  EXPECT_EQ(4, mirs_[0].ssa_rep->defs[0]);
-  EXPECT_EQ(3u, mirs_[0].dalvikInsn.vA);
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, Rename6) {
-  static const MIRDef mirs[] = {
-      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 0u, 1000u),
-      DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 2u, 0u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1 /* high word */, 1, 2 /* high word */ };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 0, 2 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-
-  static const bool eliminated[] = {
-      false, true
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the CONST_WIDE defines the s_reg 2, v_reg 1, originally defined by the move 2u.
-  ASSERT_EQ(2, mirs_[0].ssa_rep->num_defs);
-  EXPECT_EQ(2, mirs_[0].ssa_rep->defs[0]);
-  EXPECT_EQ(3, mirs_[0].ssa_rep->defs[1]);
-  EXPECT_EQ(1u, mirs_[0].dalvikInsn.vA);
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, Rename7) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3, Instruction::CONST, 0u, 1000u),
-      DEF_MOVE(3, Instruction::MOVE, 1u, 0u),
-      DEF_BINOP(3, Instruction::ADD_INT, 2u, 0u, 1u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 0 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_NE(value_names_[0], value_names_[2]);
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-
-  static const bool eliminated[] = {
-      false, true, false
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the CONST defines the s_reg 1, v_reg 1, originally defined by the move 1u.
-  ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs);
-  EXPECT_EQ(1, mirs_[0].ssa_rep->defs[0]);
-  EXPECT_EQ(1u, mirs_[0].dalvikInsn.vA);
-  // Check that the ADD_INT inputs are both s_reg1, vreg 1.
-  ASSERT_EQ(2, mirs_[2].ssa_rep->num_uses);
-  EXPECT_EQ(1, mirs_[2].ssa_rep->uses[0]);
-  EXPECT_EQ(1, mirs_[2].ssa_rep->uses[1]);
-  EXPECT_EQ(1u, mirs_[2].dalvikInsn.vB);
-  EXPECT_EQ(1u, mirs_[2].dalvikInsn.vC);
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, Rename8) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3, Instruction::CONST, 0u, 1000u),
-      DEF_MOVE(3, Instruction::MOVE, 1u, 0u),
-      DEF_BINOP(3, Instruction::ADD_INT_2ADDR, 2u, 0u, 1u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 0 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_NE(value_names_[0], value_names_[2]);
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-
-  static const bool eliminated[] = {
-      false, true, false
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the CONST defines the s_reg 1, v_reg 1, originally defined by the move 1u.
-  ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs);
-  EXPECT_EQ(1, mirs_[0].ssa_rep->defs[0]);
-  EXPECT_EQ(1u, mirs_[0].dalvikInsn.vA);
-  // Check that the ADD_INT_2ADDR was replaced by ADD_INT and inputs are both s_reg 1, vreg 1.
-  EXPECT_EQ(Instruction::ADD_INT, mirs_[2].dalvikInsn.opcode);
-  ASSERT_EQ(2, mirs_[2].ssa_rep->num_uses);
-  EXPECT_EQ(1, mirs_[2].ssa_rep->uses[0]);
-  EXPECT_EQ(1, mirs_[2].ssa_rep->uses[1]);
-  EXPECT_EQ(1u, mirs_[2].dalvikInsn.vB);
-  EXPECT_EQ(1u, mirs_[2].dalvikInsn.vC);
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, Rename9) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3, Instruction::CONST, 0u, 1000u),
-      DEF_BINOP(3, Instruction::ADD_INT_2ADDR, 1u, 0u, 0u),
-      DEF_MOVE(3, Instruction::MOVE, 2u, 1u),
-      DEF_CONST(3, Instruction::CONST, 3u, 3000u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 0, 1, 0 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 3 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[1], value_names_[2]);
-
-  static const bool eliminated[] = {
-      false, false, true, false
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the ADD_INT_2ADDR was replaced by ADD_INT and output is in s_reg 2, vreg 1.
-  EXPECT_EQ(Instruction::ADD_INT, mirs_[1].dalvikInsn.opcode);
-  ASSERT_EQ(2, mirs_[1].ssa_rep->num_uses);
-  EXPECT_EQ(0, mirs_[1].ssa_rep->uses[0]);
-  EXPECT_EQ(0, mirs_[1].ssa_rep->uses[1]);
-  EXPECT_EQ(0u, mirs_[1].dalvikInsn.vB);
-  EXPECT_EQ(0u, mirs_[1].dalvikInsn.vC);
-  ASSERT_EQ(1, mirs_[1].ssa_rep->num_defs);
-  EXPECT_EQ(2, mirs_[1].ssa_rep->defs[0]);
-  EXPECT_EQ(1u, mirs_[1].dalvikInsn.vA);
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, NoRename1) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
-      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 2u, 1u),
-      DEF_MOVE(3, Instruction::MOVE_OBJECT, 3u, 0u),
-      DEF_CONST(3, Instruction::CONST, 4u, 1000),
-      DEF_IGET(3, Instruction::IGET, 5u, 3u, 1u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 0, 1 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 4, 5 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[0], value_names_[3]);
-
-  const size_t no_null_ck_indexes[] = { 1, 5 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, false, false, false, false
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, NoRename2) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 2u),
-      DEF_MOVE(3, Instruction::MOVE_OBJECT, 3u, 0u),
-      DEF_CONST(3, Instruction::CONST, 4u, 1000),
-      DEF_IGET(3, Instruction::IGET, 5u, 3u, 1u),
-      DEF_CONST(3, Instruction::CONST, 6u, 2000),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 2, 0, 3, 2 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 4, 5, 6 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[0], value_names_[3]);
-
-  const size_t no_null_ck_indexes[] = { 1, 5 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, false, false, false, false, false
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, NoRename3) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-      { 2u, 1u, 2u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
-      DEF_IGET(3, Instruction::IGET, 2u, 0u, 2u),
-      DEF_BINOP(3, Instruction::ADD_INT, 3u, 1u, 2u),
-      DEF_MOVE(3, Instruction::MOVE_OBJECT, 4u, 0u),
-      DEF_IGET(3, Instruction::IGET, 5u, 4u, 1u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 2, 0 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 3, 5 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[0], value_names_[4]);
-
-  const size_t no_null_ck_indexes[] = { 1, 2, 5 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, false, false, false, false
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, NoRename4) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3, Instruction::CONST, 0u, 1000u),
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 1u),
-      DEF_CONST(3, Instruction::CONST, 2u, 100u),
-      DEF_CONST(3, Instruction::CONST, 3u, 200u),
-      DEF_BINOP(3, Instruction::OR_INT_2ADDR, 4u, 2u, 3u),   // 3. Find definition of the move src.
-      DEF_MOVE(3, Instruction::MOVE, 5u, 0u),                // 4. Uses move dest vreg.
-      DEF_MOVE(3, Instruction::MOVE, 6u, 4u),                // 2. Find overwritten move src.
-      DEF_CONST(3, Instruction::CONST, 7u, 2000u),           // 1. Overwrites 4u, look for moves.
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 2, 4, 0, 2 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 7 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[0], value_names_[5]);
-  EXPECT_EQ(value_names_[4], value_names_[6]);
-
-  static const bool eliminated[] = {
-      false, false, false, false, false, false, false, false
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, Simple1) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessObject },
-      { 1u, 1u, 1u, false, kDexMemAccessObject },
-      { 2u, 1u, 2u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_IGET(3, Instruction::IGET_OBJECT, 1u, 0u, 0u),
-      DEF_IGET(3, Instruction::IGET_OBJECT, 2u, 1u, 1u),
-      DEF_IGET(3, Instruction::IGET, 3u, 2u, 2u),
-      DEF_IGET(3, Instruction::IGET_OBJECT, 4u, 0u, 0u),
-      DEF_IGET(3, Instruction::IGET_OBJECT, 5u, 4u, 1u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 1, 2 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_NE(value_names_[0], value_names_[1]);
-  EXPECT_NE(value_names_[0], value_names_[2]);
-  EXPECT_NE(value_names_[0], value_names_[3]);
-  EXPECT_NE(value_names_[1], value_names_[2]);
-  EXPECT_NE(value_names_[1], value_names_[3]);
-  EXPECT_NE(value_names_[2], value_names_[3]);
-  EXPECT_EQ(value_names_[1], value_names_[4]);
-  EXPECT_EQ(value_names_[2], value_names_[5]);
-
-  EXPECT_EQ(MIR_IGNORE_NULL_CHECK, mirs_[4].optimization_flags & MIR_IGNORE_NULL_CHECK);
-  EXPECT_EQ(MIR_IGNORE_NULL_CHECK, mirs_[5].optimization_flags & MIR_IGNORE_NULL_CHECK);
-
-  static const bool eliminated[] = {
-      false, false, false, false, true, true
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the sregs have been renamed correctly.
-  ASSERT_EQ(1, mirs_[1].ssa_rep->num_defs);
-  EXPECT_EQ(4, mirs_[1].ssa_rep->defs[0]);
-  ASSERT_EQ(1, mirs_[1].ssa_rep->num_uses);
-  EXPECT_EQ(0, mirs_[1].ssa_rep->uses[0]);
-  ASSERT_EQ(1, mirs_[2].ssa_rep->num_defs);
-  EXPECT_EQ(5, mirs_[2].ssa_rep->defs[0]);
-  ASSERT_EQ(1, mirs_[2].ssa_rep->num_uses);
-  EXPECT_EQ(4, mirs_[2].ssa_rep->uses[0]);
-  ASSERT_EQ(1, mirs_[3].ssa_rep->num_defs);
-  EXPECT_EQ(3, mirs_[3].ssa_rep->defs[0]);
-  ASSERT_EQ(1, mirs_[3].ssa_rep->num_uses);
-  EXPECT_EQ(5, mirs_[3].ssa_rep->uses[0]);
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, Simple2) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_CONST(3, Instruction::CONST, 1u, 1000),
-      DEF_IGET(3, Instruction::IGET, 2u, 0u, 0u),
-      DEF_BINOP(3, Instruction::ADD_INT_2ADDR, 3u, 2u, 1u),
-      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 4u, 3u),
-      DEF_IGET(3, Instruction::IGET, 5u, 0u, 0u),
-      DEF_BINOP(3, Instruction::ADD_INT_2ADDR, 6u, 5u, 1u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 2, 3, 2, 2 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 3 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[2], value_names_[5]);
-  EXPECT_EQ(value_names_[3], value_names_[6]);
-
-  const size_t no_null_ck_indexes[] = { 2, 5 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, false, false, false, true, true
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the sregs have been renamed correctly.
-  ASSERT_EQ(1, mirs_[3].ssa_rep->num_defs);
-  EXPECT_EQ(6, mirs_[3].ssa_rep->defs[0]);
-  ASSERT_EQ(2, mirs_[3].ssa_rep->num_uses);
-  EXPECT_EQ(2, mirs_[3].ssa_rep->uses[0]);
-  EXPECT_EQ(1, mirs_[3].ssa_rep->uses[1]);
-  ASSERT_EQ(1, mirs_[4].ssa_rep->num_defs);
-  EXPECT_EQ(4, mirs_[4].ssa_rep->defs[0]);
-  ASSERT_EQ(1, mirs_[4].ssa_rep->num_uses);
-  EXPECT_EQ(6, mirs_[4].ssa_rep->uses[0]);
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, Simple3) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_CONST(3, Instruction::CONST, 1u, 1000),
-      DEF_CONST(3, Instruction::CONST, 2u, 2000),
-      DEF_CONST(3, Instruction::CONST, 3u, 3000),
-      DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
-      DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
-      DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
-      DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
-      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
-      DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
-      DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
-      DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u),  // Simple elimination of ADD+MUL
-      DEF_BINOP(3, Instruction::SUB_INT, 12u, 11u, 3u),  // allows simple elimination of IGET+SUB.
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 5, 4, 6, 4, 5, 5, 4 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[4], value_names_[9]);
-  EXPECT_EQ(value_names_[5], value_names_[10]);
-  EXPECT_EQ(value_names_[6], value_names_[11]);
-  EXPECT_EQ(value_names_[7], value_names_[12]);
-
-  const size_t no_null_ck_indexes[] = { 4, 9 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, false, false, false, false, false, false, false, true, true, true, true
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the sregs have been renamed correctly.
-  ASSERT_EQ(1, mirs_[6].ssa_rep->num_defs);
-  EXPECT_EQ(11, mirs_[6].ssa_rep->defs[0]);  // 6 -> 11
-  ASSERT_EQ(2, mirs_[6].ssa_rep->num_uses);
-  EXPECT_EQ(5, mirs_[6].ssa_rep->uses[0]);
-  EXPECT_EQ(2, mirs_[6].ssa_rep->uses[1]);
-  ASSERT_EQ(1, mirs_[7].ssa_rep->num_defs);
-  EXPECT_EQ(12, mirs_[7].ssa_rep->defs[0]);  // 7 -> 12
-  ASSERT_EQ(2, mirs_[7].ssa_rep->num_uses);
-  EXPECT_EQ(11, mirs_[7].ssa_rep->uses[0]);  // 6 -> 11
-  EXPECT_EQ(3, mirs_[7].ssa_rep->uses[1]);
-  ASSERT_EQ(1, mirs_[8].ssa_rep->num_defs);
-  EXPECT_EQ(8, mirs_[8].ssa_rep->defs[0]);
-  ASSERT_EQ(1, mirs_[8].ssa_rep->num_uses);
-  EXPECT_EQ(12, mirs_[8].ssa_rep->uses[0]);  // 7 -> 12
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, Simple4) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 1u, INT64_C(1)),
-      DEF_BINOP(3, Instruction::LONG_TO_FLOAT, 3u, 1u, 2u),
-      DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
-      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 5u, 4u),
-      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 6u, INT64_C(1)),
-      DEF_BINOP(3, Instruction::LONG_TO_FLOAT, 8u, 6u, 7u),
-      DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 2, 3, 1, 2, 1, 2 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 1, 6 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[1], value_names_[5]);
-  EXPECT_EQ(value_names_[2], value_names_[6]);
-  EXPECT_EQ(value_names_[3], value_names_[7]);
-
-  const size_t no_null_ck_indexes[] = { 3, 7 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      // Simple elimination of CONST_WIDE+LONG_TO_FLOAT allows simple eliminatiion of IGET.
-      false, false, false, false, false, true, true, true
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the sregs have been renamed correctly.
-  ASSERT_EQ(1, mirs_[2].ssa_rep->num_defs);
-  EXPECT_EQ(8, mirs_[2].ssa_rep->defs[0]);   // 3 -> 8
-  ASSERT_EQ(2, mirs_[2].ssa_rep->num_uses);
-  EXPECT_EQ(1, mirs_[2].ssa_rep->uses[0]);
-  EXPECT_EQ(2, mirs_[2].ssa_rep->uses[1]);
-  ASSERT_EQ(1, mirs_[3].ssa_rep->num_defs);
-  EXPECT_EQ(9, mirs_[3].ssa_rep->defs[0]);   // 4 -> 9
-  ASSERT_EQ(1, mirs_[3].ssa_rep->num_uses);
-  EXPECT_EQ(0, mirs_[3].ssa_rep->uses[0]);
-  ASSERT_EQ(1, mirs_[4].ssa_rep->num_defs);
-  EXPECT_EQ(5, mirs_[4].ssa_rep->defs[0]);
-  ASSERT_EQ(1, mirs_[4].ssa_rep->num_uses);
-  EXPECT_EQ(9, mirs_[4].ssa_rep->uses[0]);   // 4 -> 9
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, KillChain1) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_CONST(3, Instruction::CONST, 1u, 1000),
-      DEF_CONST(3, Instruction::CONST, 2u, 2000),
-      DEF_CONST(3, Instruction::CONST, 3u, 3000),
-      DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
-      DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
-      DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
-      DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
-      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
-      DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
-      DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
-      DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u),
-      DEF_BINOP(3, Instruction::SUB_INT, 12u, 11u, 3u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 4, 5, 6, 4, 5, 4, 5 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[4], value_names_[9]);
-  EXPECT_EQ(value_names_[5], value_names_[10]);
-  EXPECT_EQ(value_names_[6], value_names_[11]);
-  EXPECT_EQ(value_names_[7], value_names_[12]);
-
-  const size_t no_null_ck_indexes[] = { 4, 9 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, false, false, false, false, false, false, false, true, true, true, true
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the sregs have been renamed correctly.
-  ASSERT_EQ(1, mirs_[6].ssa_rep->num_defs);
-  EXPECT_EQ(11, mirs_[6].ssa_rep->defs[0]);  // 6 -> 11
-  ASSERT_EQ(2, mirs_[6].ssa_rep->num_uses);
-  EXPECT_EQ(5, mirs_[6].ssa_rep->uses[0]);
-  EXPECT_EQ(2, mirs_[6].ssa_rep->uses[1]);
-  ASSERT_EQ(1, mirs_[7].ssa_rep->num_defs);
-  EXPECT_EQ(12, mirs_[7].ssa_rep->defs[0]);  // 7 -> 12
-  ASSERT_EQ(2, mirs_[7].ssa_rep->num_uses);
-  EXPECT_EQ(11, mirs_[7].ssa_rep->uses[0]);  // 6 -> 11
-  EXPECT_EQ(3, mirs_[7].ssa_rep->uses[1]);
-  ASSERT_EQ(1, mirs_[8].ssa_rep->num_defs);
-  EXPECT_EQ(8, mirs_[8].ssa_rep->defs[0]);
-  ASSERT_EQ(1, mirs_[8].ssa_rep->num_uses);
-  EXPECT_EQ(12, mirs_[8].ssa_rep->uses[0]);   // 7 -> 12
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, KillChain2) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_CONST(3, Instruction::CONST, 1u, 1000),
-      DEF_CONST(3, Instruction::CONST, 2u, 2000),
-      DEF_CONST(3, Instruction::CONST, 3u, 3000),
-      DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
-      DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
-      DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
-      DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
-      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
-      DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
-      DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
-      DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u),
-      DEF_BINOP(3, Instruction::SUB_INT, 12u, 11u, 3u),
-      DEF_CONST(3, Instruction::CONST, 13u, 4000),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 5, 4, 6, 4, 7, 7, 4, 7 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 13 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[4], value_names_[9]);
-  EXPECT_EQ(value_names_[5], value_names_[10]);
-  EXPECT_EQ(value_names_[6], value_names_[11]);
-  EXPECT_EQ(value_names_[7], value_names_[12]);
-
-  const size_t no_null_ck_indexes[] = { 4, 9 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, false, false, false, false, false, false, false, true, true, true, true, false
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the sregs have been renamed correctly.
-  ASSERT_EQ(1, mirs_[7].ssa_rep->num_defs);
-  EXPECT_EQ(12, mirs_[7].ssa_rep->defs[0]);  // 7 -> 12
-  ASSERT_EQ(2, mirs_[7].ssa_rep->num_uses);
-  EXPECT_EQ(6, mirs_[7].ssa_rep->uses[0]);
-  EXPECT_EQ(3, mirs_[7].ssa_rep->uses[1]);
-  ASSERT_EQ(1, mirs_[8].ssa_rep->num_defs);
-  EXPECT_EQ(8, mirs_[8].ssa_rep->defs[0]);
-  ASSERT_EQ(1, mirs_[8].ssa_rep->num_uses);
-  EXPECT_EQ(12, mirs_[8].ssa_rep->uses[0]);   // 7 -> 12
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, KillChain3) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_CONST(3, Instruction::CONST, 1u, 1000),
-      DEF_CONST(3, Instruction::CONST, 2u, 2000),
-      DEF_CONST(3, Instruction::CONST, 3u, 3000),
-      DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
-      DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
-      DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
-      DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
-      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
-      DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
-      DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
-      DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u),
-      DEF_CONST(3, Instruction::CONST, 12u, 4000),
-      DEF_BINOP(3, Instruction::SUB_INT, 13u, 11u, 3u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 5, 4, 6, 4, 7, 4, 7, 4 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 12 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[4], value_names_[9]);
-  EXPECT_EQ(value_names_[5], value_names_[10]);
-  EXPECT_EQ(value_names_[6], value_names_[11]);
-  EXPECT_EQ(value_names_[7], value_names_[13]);
-
-  const size_t no_null_ck_indexes[] = { 4, 9 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, false, false, false, false, false, false, false, true, true, true, false, true
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the sregs have been renamed correctly.
-  ASSERT_EQ(1, mirs_[7].ssa_rep->num_defs);
-  EXPECT_EQ(13, mirs_[7].ssa_rep->defs[0]);  // 7 -> 13
-  ASSERT_EQ(2, mirs_[7].ssa_rep->num_uses);
-  EXPECT_EQ(6, mirs_[7].ssa_rep->uses[0]);
-  EXPECT_EQ(3, mirs_[7].ssa_rep->uses[1]);
-  ASSERT_EQ(1, mirs_[8].ssa_rep->num_defs);
-  EXPECT_EQ(8, mirs_[8].ssa_rep->defs[0]);
-  ASSERT_EQ(1, mirs_[8].ssa_rep->num_uses);
-  EXPECT_EQ(13, mirs_[8].ssa_rep->uses[0]);   // 7 -> 13
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, KeepChain1) {
-  // KillChain2 without the final CONST.
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_CONST(3, Instruction::CONST, 1u, 1000),
-      DEF_CONST(3, Instruction::CONST, 2u, 2000),
-      DEF_CONST(3, Instruction::CONST, 3u, 3000),
-      DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
-      DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
-      DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
-      DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
-      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
-      DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
-      DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
-      DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u),
-      DEF_BINOP(3, Instruction::SUB_INT, 12u, 11u, 3u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 5, 4, 6, 4, 7, 7, 4 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[4], value_names_[9]);
-  EXPECT_EQ(value_names_[5], value_names_[10]);
-  EXPECT_EQ(value_names_[6], value_names_[11]);
-  EXPECT_EQ(value_names_[7], value_names_[12]);
-
-  const size_t no_null_ck_indexes[] = { 4, 9 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, false, false, false, false, false, false, false, false, false, false, false
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, KeepChain2) {
-  // KillChain1 with MIRs in the middle of the chain.
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_CONST(3, Instruction::CONST, 1u, 1000),
-      DEF_CONST(3, Instruction::CONST, 2u, 2000),
-      DEF_CONST(3, Instruction::CONST, 3u, 3000),
-      DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
-      DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
-      DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
-      DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
-      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
-      DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
-      DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
-      DEF_CONST(3, Instruction::CONST, 11u, 4000),
-      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 12u, 11u),
-      DEF_BINOP(3, Instruction::MUL_INT, 13u, 10u, 2u),
-      DEF_BINOP(3, Instruction::SUB_INT, 14u, 13u, 3u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 4, 5, 6, 4, 5, 4, 7, 4, 5 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[4], value_names_[9]);
-  EXPECT_EQ(value_names_[5], value_names_[10]);
-  EXPECT_EQ(value_names_[6], value_names_[13]);
-  EXPECT_EQ(value_names_[7], value_names_[14]);
-
-  const size_t no_null_ck_indexes[] = { 4, 9 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, false, false, false, false, false, false, false,
-      false, false, false, false, false, false
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-}
-
-TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi1) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3, Instruction::CONST, 0u, 1000),
-      DEF_CONST(4, Instruction::CONST, 1u, 1000),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 0 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-
-  static const bool eliminated[] = {
-      false, true,
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that we've created a single-input Phi to replace the CONST 3u.
-  BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
-  MIR* phi = bb4->first_mir_insn;
-  ASSERT_TRUE(phi != nullptr);
-  ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
-  ASSERT_EQ(1, phi->ssa_rep->num_uses);
-  EXPECT_EQ(0, phi->ssa_rep->uses[0]);
-  ASSERT_EQ(1, phi->ssa_rep->num_defs);
-  EXPECT_EQ(1, phi->ssa_rep->defs[0]);
-  EXPECT_EQ(0u, phi->dalvikInsn.vA);
-}
-
-TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi2) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3, Instruction::CONST, 0u, 1000),
-      DEF_MOVE(4, Instruction::MOVE, 1u, 0u),
-      DEF_CONST(4, Instruction::CONST, 2u, 1000),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 0 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-  EXPECT_EQ(value_names_[0], value_names_[2]);
-
-  static const bool eliminated[] = {
-      false, false, true,
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that we've created a single-input Phi to replace the CONST 3u.
-  BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
-  MIR* phi = bb4->first_mir_insn;
-  ASSERT_TRUE(phi != nullptr);
-  ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
-  ASSERT_EQ(1, phi->ssa_rep->num_uses);
-  EXPECT_EQ(0, phi->ssa_rep->uses[0]);
-  ASSERT_EQ(1, phi->ssa_rep->num_defs);
-  EXPECT_EQ(2, phi->ssa_rep->defs[0]);
-  EXPECT_EQ(0u, phi->dalvikInsn.vA);
-  MIR* move = phi->next;
-  ASSERT_TRUE(move != nullptr);
-  ASSERT_EQ(Instruction::MOVE, move->dalvikInsn.opcode);
-  ASSERT_EQ(1, move->ssa_rep->num_uses);
-  EXPECT_EQ(2, move->ssa_rep->uses[0]);
-  ASSERT_EQ(1, move->ssa_rep->num_defs);
-  EXPECT_EQ(1, move->ssa_rep->defs[0]);
-  EXPECT_EQ(1u, move->dalvikInsn.vA);
-  EXPECT_EQ(0u, move->dalvikInsn.vB);
-}
-
-TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi3) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_CONST(4, Instruction::CONST, 1u, 1000),
-      DEF_IPUT(4, Instruction::IPUT, 1u, 0u, 0u),
-      DEF_CONST(5, Instruction::CONST, 3u, 2000),
-      DEF_IPUT(5, Instruction::IPUT, 3u, 0u, 0u),
-      DEF_IGET(6, Instruction::IGET, 5u, 0u, 0u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2 /* dummy */, 1, 2 /* dummy */, 1 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 3, 5 };
-  ExpectValueNamesNE(diff_indexes);
-
-  const size_t no_null_ck_indexes[] = { 2, 4, 5 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, false, false, false, true,
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that we've created a two-input Phi to replace the IGET 5u.
-  BasicBlock* bb6 = cu_.mir_graph->GetBasicBlock(6);
-  MIR* phi = bb6->first_mir_insn;
-  ASSERT_TRUE(phi != nullptr);
-  ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
-  ASSERT_EQ(2, phi->ssa_rep->num_uses);
-  EXPECT_EQ(1, phi->ssa_rep->uses[0]);
-  EXPECT_EQ(3, phi->ssa_rep->uses[1]);
-  ASSERT_EQ(1, phi->ssa_rep->num_defs);
-  EXPECT_EQ(5, phi->ssa_rep->defs[0]);
-  EXPECT_EQ(1u, phi->dalvikInsn.vA);
-}
-
-TEST_F(GvnDeadCodeEliminationTestDiamond, KillChainInAnotherBlock1) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessObject },  // linked list
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_IGET(3, Instruction::IGET_OBJECT, 1u, 0u, 0u),
-      DEF_IGET(3, Instruction::IGET_OBJECT, 2u, 1u, 0u),
-      DEF_IGET(3, Instruction::IGET_OBJECT, 3u, 2u, 0u),
-      DEF_IGET(3, Instruction::IGET_OBJECT, 4u, 3u, 0u),
-      DEF_IFZ(3, Instruction::IF_NEZ, 4u),
-      DEF_IGET(4, Instruction::IGET_OBJECT, 6u, 0u, 0u),
-      DEF_IGET(4, Instruction::IGET_OBJECT, 7u, 6u, 0u),
-      DEF_IGET(4, Instruction::IGET_OBJECT, 8u, 7u, 0u),
-      DEF_IGET(4, Instruction::IGET_OBJECT, 9u, 8u, 0u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 2, 3 /* dummy */, 1, 2, 1, 2 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[1], value_names_[6]);
-  EXPECT_EQ(value_names_[2], value_names_[7]);
-  EXPECT_EQ(value_names_[3], value_names_[8]);
-  EXPECT_EQ(value_names_[4], value_names_[9]);
-
-  const size_t no_null_ck_indexes[] = { 1, 6, 7, 8, 9 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, false, false, false, false, true, true, true, true,
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that we've created two single-input Phis to replace the IGET 8u and IGET 9u;
-  // the IGET 6u and IGET 7u were killed without a replacement.
-  BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
-  MIR* phi1 = bb4->first_mir_insn;
-  ASSERT_TRUE(phi1 != nullptr);
-  ASSERT_EQ(kMirOpPhi, static_cast<int>(phi1->dalvikInsn.opcode));
-  MIR* phi2 = phi1->next;
-  ASSERT_TRUE(phi2 != nullptr);
-  ASSERT_EQ(kMirOpPhi, static_cast<int>(phi2->dalvikInsn.opcode));
-  ASSERT_TRUE(phi2->next == &mirs_[6]);
-  if (phi1->dalvikInsn.vA == 2u) {
-    std::swap(phi1, phi2);
-  }
-  ASSERT_EQ(1, phi1->ssa_rep->num_uses);
-  EXPECT_EQ(3, phi1->ssa_rep->uses[0]);
-  ASSERT_EQ(1, phi1->ssa_rep->num_defs);
-  EXPECT_EQ(8, phi1->ssa_rep->defs[0]);
-  EXPECT_EQ(1u, phi1->dalvikInsn.vA);
-  ASSERT_EQ(1, phi2->ssa_rep->num_uses);
-  EXPECT_EQ(4, phi2->ssa_rep->uses[0]);
-  ASSERT_EQ(1, phi2->ssa_rep->num_defs);
-  EXPECT_EQ(9, phi2->ssa_rep->defs[0]);
-  EXPECT_EQ(2u, phi2->dalvikInsn.vA);
-}
-
-TEST_F(GvnDeadCodeEliminationTestDiamond, KillChainInAnotherBlock2) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessObject },  // linked list
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_IGET(3, Instruction::IGET_OBJECT, 1u, 0u, 0u),
-      DEF_IGET(3, Instruction::IGET_OBJECT, 2u, 1u, 0u),
-      DEF_IGET(3, Instruction::IGET_OBJECT, 3u, 2u, 0u),
-      DEF_IGET(3, Instruction::IGET_OBJECT, 4u, 3u, 0u),
-      DEF_IFZ(3, Instruction::IF_NEZ, 4u),
-      DEF_IGET(4, Instruction::IGET_OBJECT, 6u, 0u, 0u),
-      DEF_IGET(4, Instruction::IGET_OBJECT, 7u, 6u, 0u),
-      DEF_IGET(4, Instruction::IGET_OBJECT, 8u, 7u, 0u),
-      DEF_CONST(4, Instruction::CONST, 9u, 1000),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 2, 3 /* dummy */, 1, 2, 1, 2 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 9 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[1], value_names_[6]);
-  EXPECT_EQ(value_names_[2], value_names_[7]);
-  EXPECT_EQ(value_names_[3], value_names_[8]);
-
-  const size_t no_null_ck_indexes[] = { 1, 6, 7, 8 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, false, false, false, false, true, true, true, false,
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that we've created a single-input Phi to replace the IGET 8u;
-  // the IGET 6u and IGET 7u were killed without a replacement.
-  BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
-  MIR* phi = bb4->first_mir_insn;
-  ASSERT_TRUE(phi != nullptr);
-  ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
-  ASSERT_TRUE(phi->next == &mirs_[6]);
-  ASSERT_EQ(1, phi->ssa_rep->num_uses);
-  EXPECT_EQ(3, phi->ssa_rep->uses[0]);
-  ASSERT_EQ(1, phi->ssa_rep->num_defs);
-  EXPECT_EQ(8, phi->ssa_rep->defs[0]);
-  EXPECT_EQ(1u, phi->dalvikInsn.vA);
-}
-
-TEST_F(GvnDeadCodeEliminationTestLoop, IFieldLoopVariable) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
-      DEF_CONST(3, Instruction::CONST, 1u, 1),
-      DEF_CONST(3, Instruction::CONST, 2u, 0),
-      DEF_IPUT(3, Instruction::IPUT, 2u, 0u, 0u),
-      DEF_IGET(4, Instruction::IGET, 4u, 0u, 0u),
-      DEF_BINOP(4, Instruction::ADD_INT, 5u, 4u, 1u),
-      DEF_IPUT(4, Instruction::IPUT, 5u, 0u, 0u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3 /* dummy */, 2, 2 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 4, 5 };
-  ExpectValueNamesNE(diff_indexes);
-
-  const size_t no_null_ck_indexes[] = { 3, 4, 6 };
-  ExpectNoNullCheck(no_null_ck_indexes);
-
-  static const bool eliminated[] = {
-      false, false, false, false, true, false, false,
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that we've created a two-input Phi to replace the IGET 3u.
-  BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
-  MIR* phi = bb4->first_mir_insn;
-  ASSERT_TRUE(phi != nullptr);
-  ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
-  ASSERT_TRUE(phi->next == &mirs_[4]);
-  ASSERT_EQ(2, phi->ssa_rep->num_uses);
-  EXPECT_EQ(2, phi->ssa_rep->uses[0]);
-  EXPECT_EQ(5, phi->ssa_rep->uses[1]);
-  ASSERT_EQ(1, phi->ssa_rep->num_defs);
-  EXPECT_EQ(4, phi->ssa_rep->defs[0]);
-  EXPECT_EQ(2u, phi->dalvikInsn.vA);
-}
-
-TEST_F(GvnDeadCodeEliminationTestDiamond, LongOverlaps1) {
-  static const MIRDef mirs[] = {
-      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 0u, 1000u),
-      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 2u, 1000u),
-      DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 4u, 0u),
-      DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 6u, 2u),
-      DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 8u, 4u),
-      DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 10u, 6u),
-  };
-
-  // The last insn should overlap the first and second.
-  static const int32_t sreg_to_vreg_map[] = { 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 0, 2, 4, 6, 8, 10 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-  EXPECT_EQ(value_names_[0], value_names_[2]);
-  EXPECT_EQ(value_names_[0], value_names_[3]);
-  EXPECT_EQ(value_names_[0], value_names_[4]);
-
-  static const bool eliminated[] = {
-      false, false, false, false, false, false,
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, LongOverlaps2) {
-  static const MIRDef mirs[] = {
-      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 0u, 1000u),
-      DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 2u, 0u),
-      DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 4u, 2u),
-  };
-
-  // The last insn should overlap the first and second.
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 1, 2 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 0, 2, 4 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-  EXPECT_EQ(value_names_[0], value_names_[2]);
-
-  static const bool eliminated[] = {
-      false, true, true,
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the CONST_WIDE registers have been correctly renamed.
-  MIR* const_wide = &mirs_[0];
-  ASSERT_EQ(2u, const_wide->ssa_rep->num_defs);
-  EXPECT_EQ(4, const_wide->ssa_rep->defs[0]);
-  EXPECT_EQ(5, const_wide->ssa_rep->defs[1]);
-  EXPECT_EQ(1u, const_wide->dalvikInsn.vA);
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, LongOverlaps3) {
-  static const MIRDef mirs[] = {
-      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 0u, 1000u),
-      DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 2u, 0u),
-      DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 4u, 2u),
-  };
-
-  // The last insn should overlap the first and second.
-  static const int32_t sreg_to_vreg_map[] = { 2, 3, 0, 1, 1, 2 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 0, 2, 4 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-  EXPECT_EQ(value_names_[0], value_names_[2]);
-
-  static const bool eliminated[] = {
-      false, true, true,
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check that the CONST_WIDE registers have been correctly renamed.
-  MIR* const_wide = &mirs_[0];
-  ASSERT_EQ(2u, const_wide->ssa_rep->num_defs);
-  EXPECT_EQ(4, const_wide->ssa_rep->defs[0]);
-  EXPECT_EQ(5, const_wide->ssa_rep->defs[1]);
-  EXPECT_EQ(1u, const_wide->dalvikInsn.vA);
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, MixedOverlaps1) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3, Instruction::CONST, 0u, 1000u),
-      DEF_MOVE(3, Instruction::MOVE, 1u, 0u),
-      DEF_CONST(3, Instruction::CONST, 2u, 2000u),
-      { 3, Instruction::INT_TO_LONG, 0, 0u, 1, { 2u }, 2, { 3u, 4u } },
-      DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 5u, 3u),
-      DEF_CONST(3, Instruction::CONST, 7u, 3000u),
-      DEF_CONST(3, Instruction::CONST, 8u, 4000u),
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 1, 2, 0, 0, 1, 3, 4, 0, 1 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 3, 5 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 2, 3, 5, 6 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-  EXPECT_EQ(value_names_[3], value_names_[4]);
-
-  static const bool eliminated[] = {
-      false, true, false, false, true, false, false,
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-  // Check renamed registers in CONST.
-  MIR* cst = &mirs_[0];
-  ASSERT_EQ(Instruction::CONST, cst->dalvikInsn.opcode);
-  ASSERT_EQ(0, cst->ssa_rep->num_uses);
-  ASSERT_EQ(1, cst->ssa_rep->num_defs);
-  EXPECT_EQ(1, cst->ssa_rep->defs[0]);
-  EXPECT_EQ(2u, cst->dalvikInsn.vA);
-  // Check renamed registers in INT_TO_LONG.
-  MIR* int_to_long = &mirs_[3];
-  ASSERT_EQ(Instruction::INT_TO_LONG, int_to_long->dalvikInsn.opcode);
-  ASSERT_EQ(1, int_to_long->ssa_rep->num_uses);
-  EXPECT_EQ(2, int_to_long->ssa_rep->uses[0]);
-  ASSERT_EQ(2, int_to_long->ssa_rep->num_defs);
-  EXPECT_EQ(5, int_to_long->ssa_rep->defs[0]);
-  EXPECT_EQ(6, int_to_long->ssa_rep->defs[1]);
-  EXPECT_EQ(3u, int_to_long->dalvikInsn.vA);
-  EXPECT_EQ(0u, int_to_long->dalvikInsn.vB);
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, UnusedRegs1) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3, Instruction::CONST, 0u, 1000u),
-      DEF_CONST(3, Instruction::CONST, 1u, 2000u),
-      DEF_BINOP(3, Instruction::ADD_INT, 2u, 1u, 0u),
-      DEF_CONST(3, Instruction::CONST, 3u, 1000u),            // NOT killed (b/21702651).
-      DEF_BINOP(3, Instruction::ADD_INT, 4u, 1u, 3u),         // Killed (RecordPass)
-      DEF_CONST(3, Instruction::CONST, 5u, 2000u),            // Killed with 9u (BackwardPass)
-      DEF_BINOP(3, Instruction::ADD_INT, 6u, 5u, 0u),         // Killed (RecordPass)
-      DEF_CONST(3, Instruction::CONST, 7u, 4000u),
-      DEF_MOVE(3, Instruction::MOVE, 8u, 0u),                 // Killed with 6u (BackwardPass)
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 1, 2, 3, 0, 3, 0, 3, 4, 0 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 7 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[0], value_names_[3]);
-  EXPECT_EQ(value_names_[2], value_names_[4]);
-  EXPECT_EQ(value_names_[1], value_names_[5]);
-  EXPECT_EQ(value_names_[2], value_names_[6]);
-  EXPECT_EQ(value_names_[0], value_names_[8]);
-
-  static const bool eliminated[] = {
-      false, false, false, false, true, true, true, false, true,
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, UnusedRegs2) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3, Instruction::CONST, 0u, 1000u),
-      DEF_CONST(3, Instruction::CONST, 1u, 2000u),
-      DEF_BINOP(3, Instruction::ADD_INT, 2u, 1u, 0u),
-      DEF_CONST(3, Instruction::CONST, 3u, 1000u),            // Killed (BackwardPass; b/21702651)
-      DEF_BINOP(3, Instruction::ADD_INT, 4u, 1u, 3u),         // Killed (RecordPass)
-      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 5u, 4000u),
-      { 3, Instruction::LONG_TO_INT, 0, 0u, 2, { 5u, 6u }, 1, { 7u } },
-      DEF_BINOP(3, Instruction::ADD_INT, 8u, 7u, 0u),
-      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 9u, 4000u),  // Killed with 12u (BackwardPass)
-      DEF_CONST(3, Instruction::CONST, 11u, 6000u),
-      { 3, Instruction::LONG_TO_INT, 0, 0u, 2, { 9u, 10u }, 1, { 12u } },  // Killed with 9u (BP)
-  };
-
-  static const int32_t sreg_to_vreg_map[] = {
-      2, 3, 4, 1, 4, 5, 6 /* high word */, 0, 7, 0, 1 /* high word */, 8, 0
-  };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 5, 9 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2, 5, 6, 7, 9 };
-  ExpectValueNamesNE(diff_indexes);
-  EXPECT_EQ(value_names_[0], value_names_[3]);
-  EXPECT_EQ(value_names_[2], value_names_[4]);
-  EXPECT_EQ(value_names_[5], value_names_[8]);
-  EXPECT_EQ(value_names_[6], value_names_[10]);
-
-  static const bool eliminated[] = {
-      false, false, false, true, true, false, false, false, true, false, true,
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, ArrayLengthThrows) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3, Instruction::CONST, 0u, 0),              // null
-      DEF_UNOP(3, Instruction::ARRAY_LENGTH, 1u, 0u),       // null.length
-      DEF_CONST(3, Instruction::CONST, 2u, 1000u),          // Overwrite the array-length dest.
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 1 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareMIRs(mirs);
-  PerformGVN_DCE();
-
-  ASSERT_EQ(arraysize(mirs), value_names_.size());
-  static const size_t diff_indexes[] = { 0, 1, 2 };
-  ExpectValueNamesNE(diff_indexes);
-
-  static const bool eliminated[] = {
-      false, false, false,
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-}
-
-TEST_F(GvnDeadCodeEliminationTestSimple, Dependancy) {
-  static const MIRDef mirs[] = {
-      DEF_MOVE(3, Instruction::MOVE, 5u, 1u),                 // move v5,v1
-      DEF_MOVE(3, Instruction::MOVE, 6u, 1u),                 // move v12,v1
-      DEF_MOVE(3, Instruction::MOVE, 7u, 0u),                 // move v13,v0
-      DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 8u, 2u),       // move v0_1,v2_3
-      DEF_MOVE(3, Instruction::MOVE, 10u, 6u),                // move v3,v12
-      DEF_MOVE(3, Instruction::MOVE, 11u, 4u),                // move v2,v4
-      DEF_MOVE(3, Instruction::MOVE, 12u, 7u),                // move v4,v13
-      DEF_MOVE(3, Instruction::MOVE, 13, 11u),                // move v12,v2
-      DEF_MOVE(3, Instruction::MOVE, 14u, 10u),               // move v2,v3
-      DEF_MOVE(3, Instruction::MOVE, 15u, 5u),                // move v3,v5
-      DEF_MOVE(3, Instruction::MOVE, 16u, 12u),               // move v5,v4
-  };
-
-  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 12, 13, 0, 1, 3, 2, 4, 12, 2, 3, 5 };
-  PrepareSRegToVRegMap(sreg_to_vreg_map);
-
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 2, 8 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformGVN_DCE();
-
-  static const bool eliminated[] = {
-      false, false, false, false, false, false, false, true, true, false, false,
-  };
-  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(eliminated); ++i) {
-    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
-    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
deleted file mode 100644
index 38f7d1e..0000000
--- a/compiler/dex/local_value_numbering.cc
+++ /dev/null
@@ -1,2038 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "local_value_numbering.h"
-
-#include "base/bit_utils.h"
-#include "global_value_numbering.h"
-#include "mir_field_info.h"
-#include "mir_graph.h"
-#include "utils.h"
-
-namespace art {
-
-namespace {  // anonymous namespace
-
-// Operations used for value map keys instead of actual opcode.
-static constexpr uint16_t kInvokeMemoryVersionBumpOp = Instruction::INVOKE_VIRTUAL;
-static constexpr uint16_t kUnresolvedSFieldOp = Instruction::SGET;
-static constexpr uint16_t kResolvedSFieldOp = Instruction::SGET_WIDE;
-static constexpr uint16_t kUnresolvedIFieldOp = Instruction::IGET;
-static constexpr uint16_t kNonAliasingIFieldLocOp = Instruction::IGET_WIDE;
-static constexpr uint16_t kNonAliasingIFieldInitialOp = Instruction::IGET_OBJECT;
-static constexpr uint16_t kAliasingIFieldOp = Instruction::IGET_BOOLEAN;
-static constexpr uint16_t kAliasingIFieldStartVersionOp = Instruction::IGET_BYTE;
-static constexpr uint16_t kAliasingIFieldBumpVersionOp = Instruction::IGET_CHAR;
-static constexpr uint16_t kNonAliasingArrayOp = Instruction::AGET;
-static constexpr uint16_t kNonAliasingArrayStartVersionOp = Instruction::AGET_WIDE;
-static constexpr uint16_t kNonAliasingArrayBumpVersionOp = Instruction::AGET_OBJECT;
-static constexpr uint16_t kAliasingArrayOp = Instruction::AGET_BOOLEAN;
-static constexpr uint16_t kAliasingArrayStartVersionOp = Instruction::AGET_BYTE;
-static constexpr uint16_t kAliasingArrayBumpVersionOp = Instruction::AGET_CHAR;
-static constexpr uint16_t kMergeBlockMemoryVersionBumpOp = Instruction::INVOKE_VIRTUAL_RANGE;
-static constexpr uint16_t kMergeBlockAliasingIFieldVersionBumpOp = Instruction::IPUT;
-static constexpr uint16_t kMergeBlockAliasingIFieldMergeLocationOp = Instruction::IPUT_WIDE;
-static constexpr uint16_t kMergeBlockNonAliasingArrayVersionBumpOp = Instruction::APUT;
-static constexpr uint16_t kMergeBlockNonAliasingArrayMergeLocationOp = Instruction::APUT_WIDE;
-static constexpr uint16_t kMergeBlockAliasingArrayVersionBumpOp = Instruction::APUT_OBJECT;
-static constexpr uint16_t kMergeBlockAliasingArrayMergeLocationOp = Instruction::APUT_BOOLEAN;
-static constexpr uint16_t kMergeBlockNonAliasingIFieldVersionBumpOp = Instruction::APUT_BYTE;
-static constexpr uint16_t kMergeBlockSFieldVersionBumpOp = Instruction::APUT_CHAR;
-
-}  // anonymous namespace
-
-class LocalValueNumbering::AliasingIFieldVersions {
- public:
-  static uint16_t StartMemoryVersion(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
-                                     uint16_t field_id) {
-    uint16_t type = gvn->GetIFieldType(field_id);
-    return gvn->LookupValue(kAliasingIFieldStartVersionOp, field_id,
-                            lvn->global_memory_version_, lvn->unresolved_ifield_version_[type]);
-  }
-
-  static uint16_t BumpMemoryVersion(GlobalValueNumbering* gvn, uint16_t old_version,
-                                    uint16_t store_ref_set_id, uint16_t stored_value) {
-    return gvn->LookupValue(kAliasingIFieldBumpVersionOp, old_version,
-                            store_ref_set_id, stored_value);
-  }
-
-  static uint16_t LookupGlobalValue(GlobalValueNumbering* gvn,
-                                    uint16_t field_id, uint16_t base, uint16_t memory_version) {
-    return gvn->LookupValue(kAliasingIFieldOp, field_id, base, memory_version);
-  }
-
-  static uint16_t LookupMergeValue(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
-                                   uint16_t field_id, uint16_t base) {
-    // If the base/field_id is non-aliasing in lvn, use the non-aliasing value.
-    uint16_t type = gvn->GetIFieldType(field_id);
-    if (lvn->IsNonAliasingIField(base, field_id, type)) {
-      uint16_t loc = gvn->LookupValue(kNonAliasingIFieldLocOp, base, field_id, type);
-      auto lb = lvn->non_aliasing_ifield_value_map_.find(loc);
-      return (lb != lvn->non_aliasing_ifield_value_map_.end())
-          ? lb->second
-          : gvn->LookupValue(kNonAliasingIFieldInitialOp, loc, kNoValue, kNoValue);
-    }
-    return AliasingValuesMergeGet<AliasingIFieldVersions>(
-        gvn, lvn, &lvn->aliasing_ifield_value_map_, field_id, base);
-  }
-
-  static bool HasNewBaseVersion(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
-                                uint16_t field_id) {
-    uint16_t type = gvn->GetIFieldType(field_id);
-    return lvn->unresolved_ifield_version_[type] == lvn->merge_new_memory_version_ ||
-        lvn->global_memory_version_ == lvn->merge_new_memory_version_;
-  }
-
-  static uint16_t LookupMergeBlockValue(GlobalValueNumbering* gvn, uint16_t lvn_id,
-                                        uint16_t field_id) {
-    return gvn->LookupValue(kMergeBlockAliasingIFieldVersionBumpOp, field_id, kNoValue, lvn_id);
-  }
-
-  static uint16_t LookupMergeLocationValue(GlobalValueNumbering* gvn, uint16_t lvn_id,
-                                           uint16_t field_id, uint16_t base) {
-    return gvn->LookupValue(kMergeBlockAliasingIFieldMergeLocationOp, field_id, base, lvn_id);
-  }
-};
-
-class LocalValueNumbering::NonAliasingArrayVersions {
- public:
-  static uint16_t StartMemoryVersion(GlobalValueNumbering* gvn,
-                                     const LocalValueNumbering* lvn ATTRIBUTE_UNUSED,
-                                     uint16_t array) {
-    return gvn->LookupValue(kNonAliasingArrayStartVersionOp, array, kNoValue, kNoValue);
-  }
-
-  static uint16_t BumpMemoryVersion(GlobalValueNumbering* gvn, uint16_t old_version,
-                                    uint16_t store_ref_set_id, uint16_t stored_value) {
-    return gvn->LookupValue(kNonAliasingArrayBumpVersionOp, old_version,
-                            store_ref_set_id, stored_value);
-  }
-
-  static uint16_t LookupGlobalValue(GlobalValueNumbering* gvn,
-                                    uint16_t array, uint16_t index, uint16_t memory_version) {
-    return gvn->LookupValue(kNonAliasingArrayOp, array, index, memory_version);
-  }
-
-  static uint16_t LookupMergeValue(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
-                                   uint16_t array, uint16_t index) {
-    return AliasingValuesMergeGet<NonAliasingArrayVersions>(
-        gvn, lvn, &lvn->non_aliasing_array_value_map_, array, index);
-  }
-
-  static bool HasNewBaseVersion(GlobalValueNumbering* gvn ATTRIBUTE_UNUSED,
-                                const LocalValueNumbering* lvn ATTRIBUTE_UNUSED,
-                                uint16_t array ATTRIBUTE_UNUSED) {
-    return false;  // Not affected by global_memory_version_.
-  }
-
-  static uint16_t LookupMergeBlockValue(GlobalValueNumbering* gvn, uint16_t lvn_id,
-                                        uint16_t array) {
-    return gvn->LookupValue(kMergeBlockNonAliasingArrayVersionBumpOp, array, kNoValue, lvn_id);
-  }
-
-  static uint16_t LookupMergeLocationValue(GlobalValueNumbering* gvn, uint16_t lvn_id,
-                                           uint16_t array, uint16_t index) {
-    return gvn->LookupValue(kMergeBlockNonAliasingArrayMergeLocationOp, array, index, lvn_id);
-  }
-};
-
-class LocalValueNumbering::AliasingArrayVersions {
- public:
-  static uint16_t StartMemoryVersion(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn,
-                                     uint16_t type) {
-    return gvn->LookupValue(kAliasingArrayStartVersionOp, type, lvn->global_memory_version_,
-                            kNoValue);
-  }
-
-  static uint16_t BumpMemoryVersion(GlobalValueNumbering* gvn, uint16_t old_version,
-                                    uint16_t store_ref_set_id, uint16_t stored_value) {
-    return gvn->LookupValue(kAliasingArrayBumpVersionOp, old_version,
-                            store_ref_set_id, stored_value);
-  }
-
-  static uint16_t LookupGlobalValue(GlobalValueNumbering* gvn,
-                                    uint16_t type, uint16_t location, uint16_t memory_version) {
-    return gvn->LookupValue(kAliasingArrayOp, type, location, memory_version);
-  }
-
-  static uint16_t LookupMergeValue(GlobalValueNumbering* gvn,
-                                   const LocalValueNumbering* lvn,
-                                   uint16_t type, uint16_t location) {
-    // If the location is non-aliasing in lvn, use the non-aliasing value.
-    uint16_t array = gvn->GetArrayLocationBase(location);
-    if (lvn->IsNonAliasingArray(array, type)) {
-      uint16_t index = gvn->GetArrayLocationIndex(location);
-      return NonAliasingArrayVersions::LookupMergeValue(gvn, lvn, array, index);
-    }
-    return AliasingValuesMergeGet<AliasingArrayVersions>(
-        gvn, lvn, &lvn->aliasing_array_value_map_, type, location);
-  }
-
-  static bool HasNewBaseVersion(GlobalValueNumbering* gvn ATTRIBUTE_UNUSED,
-                                const LocalValueNumbering* lvn,
-                                uint16_t type ATTRIBUTE_UNUSED) {
-    return lvn->global_memory_version_ == lvn->merge_new_memory_version_;
-  }
-
-  static uint16_t LookupMergeBlockValue(GlobalValueNumbering* gvn, uint16_t lvn_id,
-                                        uint16_t type) {
-    return gvn->LookupValue(kMergeBlockAliasingArrayVersionBumpOp, type, kNoValue, lvn_id);
-  }
-
-  static uint16_t LookupMergeLocationValue(GlobalValueNumbering* gvn, uint16_t lvn_id,
-                                           uint16_t type, uint16_t location) {
-    return gvn->LookupValue(kMergeBlockAliasingArrayMergeLocationOp, type, location, lvn_id);
-  }
-};
-
-template <typename Map>
-LocalValueNumbering::AliasingValues* LocalValueNumbering::GetAliasingValues(
-    Map* map, const typename Map::key_type& key) {
-  auto lb = map->lower_bound(key);
-  if (lb == map->end() || map->key_comp()(key, lb->first)) {
-    lb = map->PutBefore(lb, key, AliasingValues(this));
-  }
-  return &lb->second;
-}
-
-template <typename Versions, typename KeyType>
-void LocalValueNumbering::UpdateAliasingValuesLoadVersion(const KeyType& key,
-                                                          AliasingValues* values) {
-  if (values->last_load_memory_version == kNoValue) {
-    // Get the start version that accounts for aliasing with unresolved fields of the same
-    // type and make it unique for the field by including the field_id.
-    uint16_t memory_version = values->memory_version_before_stores;
-    if (memory_version == kNoValue) {
-      memory_version = Versions::StartMemoryVersion(gvn_, this, key);
-    }
-    if (!values->store_loc_set.empty()) {
-      uint16_t ref_set_id = gvn_->GetRefSetId(values->store_loc_set);
-      memory_version = Versions::BumpMemoryVersion(gvn_, memory_version, ref_set_id,
-                                                   values->last_stored_value);
-    }
-    values->last_load_memory_version = memory_version;
-  }
-}
-
-template <typename Versions, typename Map>
-uint16_t LocalValueNumbering::AliasingValuesMergeGet(GlobalValueNumbering* gvn,
-                                                     const LocalValueNumbering* lvn,
-                                                     Map* map, const typename Map::key_type& key,
-                                                     uint16_t location) {
-  // Retrieve the value name that we would get from
-  //   const_cast<LocalValueNumbering*>(lvn)->HandleAliasingValueGet(map. key, location)
-  // but don't modify the map.
-  uint16_t value_name;
-  auto it = map->find(key);
-  if (it == map->end()) {
-    uint16_t start_version = Versions::StartMemoryVersion(gvn, lvn, key);
-    value_name = Versions::LookupGlobalValue(gvn, key, location, start_version);
-  } else if (it->second.store_loc_set.count(location) != 0u) {
-    value_name = it->second.last_stored_value;
-  } else {
-    auto load_it = it->second.load_value_map.find(location);
-    if (load_it != it->second.load_value_map.end()) {
-      value_name = load_it->second;
-    } else {
-      value_name = Versions::LookupGlobalValue(gvn, key, location, it->second.last_load_memory_version);
-    }
-  }
-  return value_name;
-}
-
-template <typename Versions, typename Map>
-uint16_t LocalValueNumbering::HandleAliasingValuesGet(Map* map, const typename Map::key_type& key,
-                                                      uint16_t location) {
-  // Retrieve the value name for IGET/SGET/AGET, update the map with new value if any.
-  uint16_t res;
-  AliasingValues* values = GetAliasingValues(map, key);
-  if (values->store_loc_set.count(location) != 0u) {
-    res = values->last_stored_value;
-  } else {
-    UpdateAliasingValuesLoadVersion<Versions>(key, values);
-    auto lb = values->load_value_map.lower_bound(location);
-    if (lb != values->load_value_map.end() && lb->first == location) {
-      res = lb->second;
-    } else {
-      res = Versions::LookupGlobalValue(gvn_, key, location, values->last_load_memory_version);
-      values->load_value_map.PutBefore(lb, location, res);
-    }
-  }
-  return res;
-}
-
-template <typename Versions, typename Map>
-bool LocalValueNumbering::HandleAliasingValuesPut(Map* map, const typename Map::key_type& key,
-                                                  uint16_t location, uint16_t value) {
-  AliasingValues* values = GetAliasingValues(map, key);
-  auto load_values_it = values->load_value_map.find(location);
-  if (load_values_it != values->load_value_map.end() && load_values_it->second == value) {
-    // This insn can be eliminated, it stores the same value that's already in the field.
-    return false;
-  }
-  if (value == values->last_stored_value) {
-    auto store_loc_lb = values->store_loc_set.lower_bound(location);
-    if (store_loc_lb != values->store_loc_set.end() && *store_loc_lb == location) {
-      // This insn can be eliminated, it stores the same value that's already in the field.
-      return false;
-    }
-    values->store_loc_set.emplace_hint(store_loc_lb, location);
-  } else {
-    UpdateAliasingValuesLoadVersion<Versions>(key, values);
-    values->memory_version_before_stores = values->last_load_memory_version;
-    values->last_stored_value = value;
-    values->store_loc_set.clear();
-    values->store_loc_set.insert(location);
-  }
-  // Clear the last load memory version and remove all potentially overwritten values.
-  values->last_load_memory_version = kNoValue;
-  auto it = values->load_value_map.begin(), end = values->load_value_map.end();
-  while (it != end) {
-    if (it->second == value) {
-      ++it;
-    } else {
-      it = values->load_value_map.erase(it);
-    }
-  }
-  return true;
-}
-
-template <typename K>
-void LocalValueNumbering::CopyAliasingValuesMap(ScopedArenaSafeMap<K, AliasingValues>* dest,
-                                                const ScopedArenaSafeMap<K, AliasingValues>& src) {
-  // We need each new AliasingValues (or rather its map members) to be constructed
-  // with our allocator, rather than the allocator of the source.
-  for (const auto& entry : src) {
-    auto it = dest->PutBefore(dest->end(), entry.first, AliasingValues(this));
-    it->second = entry.second;  // Map assignments preserve current allocator.
-  }
-}
-
-LocalValueNumbering::LocalValueNumbering(GlobalValueNumbering* gvn, uint16_t id,
-                                         ScopedArenaAllocator* allocator)
-    : gvn_(gvn),
-      id_(id),
-      sreg_value_map_(std::less<uint16_t>(), allocator->Adapter()),
-      sreg_wide_value_map_(std::less<uint16_t>(), allocator->Adapter()),
-      sfield_value_map_(std::less<uint16_t>(), allocator->Adapter()),
-      non_aliasing_ifield_value_map_(std::less<uint16_t>(), allocator->Adapter()),
-      aliasing_ifield_value_map_(std::less<uint16_t>(), allocator->Adapter()),
-      non_aliasing_array_value_map_(std::less<uint16_t>(), allocator->Adapter()),
-      aliasing_array_value_map_(std::less<uint16_t>(), allocator->Adapter()),
-      global_memory_version_(0u),
-      non_aliasing_refs_(std::less<uint16_t>(), allocator->Adapter()),
-      escaped_refs_(std::less<uint16_t>(), allocator->Adapter()),
-      escaped_ifield_clobber_set_(EscapedIFieldClobberKeyComparator(), allocator->Adapter()),
-      escaped_array_clobber_set_(EscapedArrayClobberKeyComparator(), allocator->Adapter()),
-      range_checked_(RangeCheckKeyComparator() , allocator->Adapter()),
-      null_checked_(std::less<uint16_t>(), allocator->Adapter()),
-      div_zero_checked_(std::less<uint16_t>(), allocator->Adapter()),
-      merge_names_(allocator->Adapter()),
-      merge_map_(std::less<ScopedArenaVector<BasicBlockId>>(), allocator->Adapter()),
-      merge_new_memory_version_(kNoValue) {
-  std::fill_n(unresolved_sfield_version_, arraysize(unresolved_sfield_version_), 0u);
-  std::fill_n(unresolved_ifield_version_, arraysize(unresolved_ifield_version_), 0u);
-}
-
-bool LocalValueNumbering::Equals(const LocalValueNumbering& other) const {
-  DCHECK(gvn_ == other.gvn_);
-  // Compare the maps/sets and memory versions.
-  return sreg_value_map_ == other.sreg_value_map_ &&
-      sreg_wide_value_map_ == other.sreg_wide_value_map_ &&
-      sfield_value_map_ == other.sfield_value_map_ &&
-      non_aliasing_ifield_value_map_ == other.non_aliasing_ifield_value_map_ &&
-      aliasing_ifield_value_map_ == other.aliasing_ifield_value_map_ &&
-      non_aliasing_array_value_map_ == other.non_aliasing_array_value_map_ &&
-      aliasing_array_value_map_ == other.aliasing_array_value_map_ &&
-      SameMemoryVersion(other) &&
-      non_aliasing_refs_ == other.non_aliasing_refs_ &&
-      escaped_refs_ == other.escaped_refs_ &&
-      escaped_ifield_clobber_set_ == other.escaped_ifield_clobber_set_ &&
-      escaped_array_clobber_set_ == other.escaped_array_clobber_set_ &&
-      range_checked_ == other.range_checked_ &&
-      null_checked_ == other.null_checked_ &&
-      div_zero_checked_ == other.div_zero_checked_;
-}
-
-void LocalValueNumbering::MergeOne(const LocalValueNumbering& other, MergeType merge_type) {
-  CopyLiveSregValues(&sreg_value_map_, other.sreg_value_map_);
-  CopyLiveSregValues(&sreg_wide_value_map_, other.sreg_wide_value_map_);
-
-  if (merge_type == kReturnMerge) {
-    // RETURN or PHI+RETURN. We need only sreg value maps.
-    return;
-  }
-
-  non_aliasing_ifield_value_map_ = other.non_aliasing_ifield_value_map_;
-  CopyAliasingValuesMap(&non_aliasing_array_value_map_, other.non_aliasing_array_value_map_);
-  non_aliasing_refs_ = other.non_aliasing_refs_;
-  range_checked_ = other.range_checked_;
-  null_checked_ = other.null_checked_;
-  div_zero_checked_ = other.div_zero_checked_;
-
-  const BasicBlock* pred_bb = gvn_->GetBasicBlock(other.Id());
-  if (GlobalValueNumbering::HasNullCheckLastInsn(pred_bb, Id())) {
-    int s_reg = pred_bb->last_mir_insn->ssa_rep->uses[0];
-    null_checked_.insert(other.GetOperandValue(s_reg));
-  }
-
-  if (merge_type == kCatchMerge) {
-    // Memory is clobbered. Use new memory version and don't merge aliasing locations.
-    global_memory_version_ = NewMemoryVersion(&merge_new_memory_version_);
-    std::fill_n(unresolved_sfield_version_, arraysize(unresolved_sfield_version_),
-                global_memory_version_);
-    std::fill_n(unresolved_ifield_version_, arraysize(unresolved_ifield_version_),
-                global_memory_version_);
-    PruneNonAliasingRefsForCatch();
-    return;
-  }
-
-  DCHECK(merge_type == kNormalMerge);
-  global_memory_version_ = other.global_memory_version_;
-  std::copy_n(other.unresolved_ifield_version_, arraysize(unresolved_sfield_version_),
-              unresolved_ifield_version_);
-  std::copy_n(other.unresolved_sfield_version_, arraysize(unresolved_ifield_version_),
-              unresolved_sfield_version_);
-  sfield_value_map_ = other.sfield_value_map_;
-  CopyAliasingValuesMap(&aliasing_ifield_value_map_, other.aliasing_ifield_value_map_);
-  CopyAliasingValuesMap(&aliasing_array_value_map_, other.aliasing_array_value_map_);
-  escaped_refs_ = other.escaped_refs_;
-  escaped_ifield_clobber_set_ = other.escaped_ifield_clobber_set_;
-  escaped_array_clobber_set_ = other.escaped_array_clobber_set_;
-}
-
-bool LocalValueNumbering::SameMemoryVersion(const LocalValueNumbering& other) const {
-  return
-      global_memory_version_ == other.global_memory_version_ &&
-      std::equal(unresolved_ifield_version_,
-                 unresolved_ifield_version_ + arraysize(unresolved_ifield_version_),
-                 other.unresolved_ifield_version_) &&
-      std::equal(unresolved_sfield_version_,
-                 unresolved_sfield_version_ + arraysize(unresolved_sfield_version_),
-                 other.unresolved_sfield_version_);
-}
-
-uint16_t LocalValueNumbering::NewMemoryVersion(uint16_t* new_version) {
-  if (*new_version == kNoValue) {
-    *new_version = gvn_->LookupValue(kMergeBlockMemoryVersionBumpOp, 0u, 0u, id_);
-  }
-  return *new_version;
-}
-
-void LocalValueNumbering::MergeMemoryVersions(bool clobbered_catch) {
-  DCHECK_GE(gvn_->merge_lvns_.size(), 2u);
-  const LocalValueNumbering* cmp = gvn_->merge_lvns_[0];
-  // Check if the global version has changed.
-  bool new_global_version = clobbered_catch;
-  if (!new_global_version) {
-    for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-      if (lvn->global_memory_version_ != cmp->global_memory_version_) {
-        // Use a new version for everything.
-        new_global_version = true;
-        break;
-      }
-    }
-  }
-  if (new_global_version) {
-    global_memory_version_ = NewMemoryVersion(&merge_new_memory_version_);
-    std::fill_n(unresolved_sfield_version_, arraysize(unresolved_sfield_version_),
-                merge_new_memory_version_);
-    std::fill_n(unresolved_ifield_version_, arraysize(unresolved_ifield_version_),
-                merge_new_memory_version_);
-  } else {
-    // Initialize with a copy of memory versions from the comparison LVN.
-    global_memory_version_ = cmp->global_memory_version_;
-    std::copy_n(cmp->unresolved_ifield_version_, arraysize(unresolved_sfield_version_),
-                unresolved_ifield_version_);
-    std::copy_n(cmp->unresolved_sfield_version_, arraysize(unresolved_ifield_version_),
-                unresolved_sfield_version_);
-    for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-      if (lvn == cmp) {
-        continue;
-      }
-      for (size_t i = 0; i != kDexMemAccessTypeCount; ++i) {
-        if (lvn->unresolved_ifield_version_[i] != cmp->unresolved_ifield_version_[i]) {
-          unresolved_ifield_version_[i] = NewMemoryVersion(&merge_new_memory_version_);
-        }
-        if (lvn->unresolved_sfield_version_[i] != cmp->unresolved_sfield_version_[i]) {
-          unresolved_sfield_version_[i] = NewMemoryVersion(&merge_new_memory_version_);
-        }
-      }
-    }
-  }
-}
-
-void LocalValueNumbering::PruneNonAliasingRefsForCatch() {
-  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-    const BasicBlock* bb = gvn_->GetBasicBlock(lvn->Id());
-    if (UNLIKELY(bb->taken == id_) || UNLIKELY(bb->fall_through == id_)) {
-      // Non-exceptional path to a catch handler means that the catch block was actually
-      // empty and all exceptional paths lead to the shared path after that empty block.
-      continue;
-    }
-    DCHECK_EQ(bb->taken, kNullBlock);
-    DCHECK_NE(bb->fall_through, kNullBlock);
-    const BasicBlock* fall_through_bb = gvn_->GetBasicBlock(bb->fall_through);
-    const MIR* mir = fall_through_bb->first_mir_insn;
-    DCHECK(mir != nullptr);
-    // Only INVOKEs can leak and clobber non-aliasing references if they throw.
-    if ((mir->dalvikInsn.FlagsOf() & Instruction::kInvoke) != 0) {
-      HandleInvokeArgs(mir, lvn);
-    }
-  }
-}
-
-
-template <typename Set, Set LocalValueNumbering::* set_ptr>
-void LocalValueNumbering::IntersectSets() {
-  DCHECK_GE(gvn_->merge_lvns_.size(), 2u);
-
-  // Find the LVN with the least entries in the set.
-  const LocalValueNumbering* least_entries_lvn = gvn_->merge_lvns_[0];
-  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-    if ((lvn->*set_ptr).size() < (least_entries_lvn->*set_ptr).size()) {
-      least_entries_lvn = lvn;
-    }
-  }
-
-  // For each key check if it's in all the LVNs.
-  for (const auto& key : least_entries_lvn->*set_ptr) {
-    bool checked = true;
-    for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-      if (lvn != least_entries_lvn && (lvn->*set_ptr).count(key) == 0u) {
-        checked = false;
-        break;
-      }
-    }
-    if (checked) {
-      (this->*set_ptr).emplace_hint((this->*set_ptr).end(), key);
-    }
-  }
-}
-
-void LocalValueNumbering::CopyLiveSregValues(SregValueMap* dest, const SregValueMap& src) {
-  auto dest_end = dest->end();
-  ArenaBitVector* live_in_v = gvn_->GetMirGraph()->GetBasicBlock(id_)->data_flow_info->live_in_v;
-  DCHECK(live_in_v != nullptr);
-  for (const auto& entry : src) {
-    bool live = live_in_v->IsBitSet(gvn_->GetMirGraph()->SRegToVReg(entry.first));
-    if (live) {
-      dest->PutBefore(dest_end, entry.first, entry.second);
-    }
-  }
-}
-
-template <LocalValueNumbering::SregValueMap LocalValueNumbering::* map_ptr>
-void LocalValueNumbering::IntersectSregValueMaps() {
-  DCHECK_GE(gvn_->merge_lvns_.size(), 2u);
-
-  // Find the LVN with the least entries in the set.
-  const LocalValueNumbering* least_entries_lvn = gvn_->merge_lvns_[0];
-  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-    if ((lvn->*map_ptr).size() < (least_entries_lvn->*map_ptr).size()) {
-      least_entries_lvn = lvn;
-    }
-  }
-
-  // For each key check if it's in all the LVNs.
-  ArenaBitVector* live_in_v = gvn_->GetMirGraph()->GetBasicBlock(id_)->data_flow_info->live_in_v;
-  DCHECK(live_in_v != nullptr);
-  for (const auto& entry : least_entries_lvn->*map_ptr) {
-    bool live_and_same = live_in_v->IsBitSet(gvn_->GetMirGraph()->SRegToVReg(entry.first));
-    if (live_and_same) {
-      for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-        if (lvn != least_entries_lvn) {
-          auto it = (lvn->*map_ptr).find(entry.first);
-          if (it == (lvn->*map_ptr).end() || !(it->second == entry.second)) {
-            live_and_same = false;
-            break;
-          }
-        }
-      }
-    }
-    if (live_and_same) {
-      (this->*map_ptr).PutBefore((this->*map_ptr).end(), entry.first, entry.second);
-    }
-  }
-}
-
-// Intersect maps as sets. The value type must be equality-comparable.
-template <typename Map>
-void LocalValueNumbering::InPlaceIntersectMaps(Map* work_map, const Map& other_map) {
-  auto work_it = work_map->begin(), work_end = work_map->end();
-  auto cmp = work_map->value_comp();
-  for (const auto& entry : other_map) {
-    while (work_it != work_end &&
-        (cmp(*work_it, entry) ||
-         (!cmp(entry, *work_it) && !(work_it->second == entry.second)))) {
-      work_it = work_map->erase(work_it);
-    }
-    if (work_it == work_end) {
-      return;
-    }
-    ++work_it;
-  }
-}
-
-template <typename Set, Set LocalValueNumbering::*set_ptr, void (LocalValueNumbering::*MergeFn)(
-    const typename Set::value_type& entry, typename Set::iterator hint)>
-void LocalValueNumbering::MergeSets() {
-  auto cmp = (this->*set_ptr).value_comp();
-  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-    auto my_it = (this->*set_ptr).begin(), my_end = (this->*set_ptr).end();
-    for (const auto& entry : lvn->*set_ptr) {
-      while (my_it != my_end && cmp(*my_it, entry)) {
-        ++my_it;
-      }
-      if (my_it != my_end && !cmp(entry, *my_it)) {
-        // Already handled.
-        ++my_it;
-      } else {
-        // Merge values for this field_id.
-        (this->*MergeFn)(entry, my_it);  // my_it remains valid across inserts to std::set/SafeMap.
-      }
-    }
-  }
-}
-
-void LocalValueNumbering::IntersectAliasingValueLocations(AliasingValues* work_values,
-                                                          const AliasingValues* values) {
-  auto cmp = work_values->load_value_map.key_comp();
-  auto work_it = work_values->load_value_map.begin(), work_end = work_values->load_value_map.end();
-  auto store_it = values->store_loc_set.begin(), store_end = values->store_loc_set.end();
-  auto load_it = values->load_value_map.begin(), load_end = values->load_value_map.end();
-  while (store_it != store_end || load_it != load_end) {
-    uint16_t loc;
-    if (store_it != store_end && (load_it == load_end || *store_it < load_it->first)) {
-      loc = *store_it;
-      ++store_it;
-    } else {
-      loc = load_it->first;
-      ++load_it;
-      DCHECK(store_it == store_end || cmp(loc, *store_it));
-    }
-    while (work_it != work_end && cmp(work_it->first, loc)) {
-      work_it = work_values->load_value_map.erase(work_it);
-    }
-    if (work_it != work_end && !cmp(loc, work_it->first)) {
-      // The location matches, keep it.
-      ++work_it;
-    }
-  }
-  while (work_it != work_end) {
-    work_it = work_values->load_value_map.erase(work_it);
-  }
-}
-
-void LocalValueNumbering::MergeEscapedRefs(const ValueNameSet::value_type& entry,
-                                           ValueNameSet::iterator hint) {
-  // See if the ref is either escaped or non-aliasing in each predecessor.
-  bool is_escaped = true;
-  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-    if (lvn->non_aliasing_refs_.count(entry) == 0u &&
-        lvn->escaped_refs_.count(entry) == 0u) {
-      is_escaped = false;
-      break;
-    }
-  }
-  if (is_escaped) {
-    escaped_refs_.emplace_hint(hint, entry);
-  }
-}
-
-void LocalValueNumbering::MergeEscapedIFieldTypeClobberSets(
-    const EscapedIFieldClobberSet::value_type& entry, EscapedIFieldClobberSet::iterator hint) {
-  // Insert only type-clobber entries (field_id == kNoValue) of escaped refs.
-  if (entry.field_id == kNoValue && escaped_refs_.count(entry.base) != 0u) {
-    escaped_ifield_clobber_set_.emplace_hint(hint, entry);
-  }
-}
-
-void LocalValueNumbering::MergeEscapedIFieldClobberSets(
-    const EscapedIFieldClobberSet::value_type& entry, EscapedIFieldClobberSet::iterator hint) {
-  // Insert only those entries of escaped refs that are not overridden by a type clobber.
-  if (!(hint == escaped_ifield_clobber_set_.end() &&
-        hint->base == entry.base && hint->type == entry.type) &&
-      escaped_refs_.count(entry.base) != 0u) {
-    escaped_ifield_clobber_set_.emplace_hint(hint, entry);
-  }
-}
-
-void LocalValueNumbering::MergeEscapedArrayClobberSets(
-    const EscapedArrayClobberSet::value_type& entry, EscapedArrayClobberSet::iterator hint) {
-  if (escaped_refs_.count(entry.base) != 0u) {
-    escaped_array_clobber_set_.emplace_hint(hint, entry);
-  }
-}
-
-void LocalValueNumbering::MergeNullChecked() {
-  DCHECK_GE(gvn_->merge_lvns_.size(), 2u);
-
-  // Find the LVN with the least entries in the set.
-  const LocalValueNumbering* least_entries_lvn = gvn_->merge_lvns_[0];
-  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-    if (lvn->null_checked_.size() < least_entries_lvn->null_checked_.size()) {
-      least_entries_lvn = lvn;
-    }
-  }
-
-  // For each null-checked value name check if it's null-checked in all the LVNs.
-  for (const auto& value_name : least_entries_lvn->null_checked_) {
-    // Merge null_checked_ for this ref.
-    merge_names_.clear();
-    merge_names_.resize(gvn_->merge_lvns_.size(), value_name);
-    if (gvn_->NullCheckedInAllPredecessors(merge_names_)) {
-      null_checked_.insert(null_checked_.end(), value_name);
-    }
-  }
-
-  // Now check if the least_entries_lvn has a null-check as the last insn.
-  const BasicBlock* least_entries_bb = gvn_->GetBasicBlock(least_entries_lvn->Id());
-  if (gvn_->HasNullCheckLastInsn(least_entries_bb, id_)) {
-    int s_reg = least_entries_bb->last_mir_insn->ssa_rep->uses[0];
-    uint32_t value_name = least_entries_lvn->GetOperandValue(s_reg);
-    merge_names_.clear();
-    merge_names_.resize(gvn_->merge_lvns_.size(), value_name);
-    if (gvn_->NullCheckedInAllPredecessors(merge_names_)) {
-      null_checked_.insert(value_name);
-    }
-  }
-}
-
-void LocalValueNumbering::MergeDivZeroChecked() {
-  DCHECK_GE(gvn_->merge_lvns_.size(), 2u);
-
-  // Find the LVN with the least entries in the set.
-  const LocalValueNumbering* least_entries_lvn = gvn_->merge_lvns_[0];
-  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-    if (lvn->div_zero_checked_.size() < least_entries_lvn->div_zero_checked_.size()) {
-      least_entries_lvn = lvn;
-    }
-  }
-
-  // For each div-zero value name check if it's div-zero checked in all the LVNs.
-  for (const auto& value_name : least_entries_lvn->div_zero_checked_) {
-    // Merge null_checked_ for this ref.
-    merge_names_.clear();
-    merge_names_.resize(gvn_->merge_lvns_.size(), value_name);
-    if (gvn_->DivZeroCheckedInAllPredecessors(merge_names_)) {
-      div_zero_checked_.insert(div_zero_checked_.end(), value_name);
-    }
-  }
-}
-
-void LocalValueNumbering::MergeSFieldValues(const SFieldToValueMap::value_type& entry,
-                                            SFieldToValueMap::iterator hint) {
-  uint16_t field_id = entry.first;
-  merge_names_.clear();
-  uint16_t value_name = kNoValue;
-  bool same_values = true;
-  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-    // Get the value name as in HandleSGet() but don't modify *lvn.
-    auto it = lvn->sfield_value_map_.find(field_id);
-    if (it != lvn->sfield_value_map_.end()) {
-      value_name = it->second;
-    } else {
-      uint16_t type = gvn_->GetSFieldType(field_id);
-      value_name = gvn_->LookupValue(kResolvedSFieldOp, field_id,
-                                     lvn->unresolved_sfield_version_[type],
-                                     lvn->global_memory_version_);
-    }
-
-    same_values = same_values && (merge_names_.empty() || value_name == merge_names_.back());
-    merge_names_.push_back(value_name);
-  }
-  if (same_values) {
-    // value_name already contains the result.
-  } else {
-    auto lb = merge_map_.lower_bound(merge_names_);
-    if (lb != merge_map_.end() && !merge_map_.key_comp()(merge_names_, lb->first)) {
-      value_name = lb->second;
-    } else {
-      value_name = gvn_->LookupValue(kMergeBlockSFieldVersionBumpOp, field_id, id_, kNoValue);
-      merge_map_.PutBefore(lb, merge_names_, value_name);
-      if (gvn_->NullCheckedInAllPredecessors(merge_names_)) {
-        null_checked_.insert(value_name);
-      }
-    }
-  }
-  sfield_value_map_.PutBefore(hint, field_id, value_name);
-}
-
-void LocalValueNumbering::MergeNonAliasingIFieldValues(const IFieldLocToValueMap::value_type& entry,
-                                                       IFieldLocToValueMap::iterator hint) {
-  uint16_t field_loc = entry.first;
-  merge_names_.clear();
-  uint16_t value_name = kNoValue;
-  bool same_values = true;
-  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-    // Get the value name as in HandleIGet() but don't modify *lvn.
-    auto it = lvn->non_aliasing_ifield_value_map_.find(field_loc);
-    if (it != lvn->non_aliasing_ifield_value_map_.end()) {
-      value_name = it->second;
-    } else {
-      value_name = gvn_->LookupValue(kNonAliasingIFieldInitialOp, field_loc, kNoValue, kNoValue);
-    }
-
-    same_values = same_values && (merge_names_.empty() || value_name == merge_names_.back());
-    merge_names_.push_back(value_name);
-  }
-  if (same_values) {
-    // value_name already contains the result.
-  } else {
-    auto lb = merge_map_.lower_bound(merge_names_);
-    if (lb != merge_map_.end() && !merge_map_.key_comp()(merge_names_, lb->first)) {
-      value_name = lb->second;
-    } else {
-      value_name = gvn_->LookupValue(kMergeBlockNonAliasingIFieldVersionBumpOp, field_loc,
-                                     id_, kNoValue);
-      merge_map_.PutBefore(lb, merge_names_, value_name);
-      if (gvn_->NullCheckedInAllPredecessors(merge_names_)) {
-        null_checked_.insert(value_name);
-      }
-    }
-  }
-  non_aliasing_ifield_value_map_.PutBefore(hint, field_loc, value_name);
-}
-
-template <typename Map, Map LocalValueNumbering::*map_ptr, typename Versions>
-void LocalValueNumbering::MergeAliasingValues(const typename Map::value_type& entry,
-                                              typename Map::iterator hint) {
-  const typename Map::key_type& key = entry.first;
-
-  auto it = (this->*map_ptr).PutBefore(hint, key, AliasingValues(this));
-  AliasingValues* my_values = &it->second;
-
-  const AliasingValues* cmp_values = nullptr;
-  bool same_version = !Versions::HasNewBaseVersion(gvn_, this, key);
-  uint16_t load_memory_version_for_same_version = kNoValue;
-  if (same_version) {
-    // Find the first non-null values.
-    for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-      auto value = (lvn->*map_ptr).find(key);
-      if (value != (lvn->*map_ptr).end()) {
-        cmp_values = &value->second;
-        break;
-      }
-    }
-    DCHECK(cmp_values != nullptr);  // There must be at least one non-null values.
-
-    // Check if we have identical memory versions, i.e. the global memory version, unresolved
-    // field version and the values' memory_version_before_stores, last_stored_value
-    // and store_loc_set are identical.
-    for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-      auto value = (lvn->*map_ptr).find(key);
-      if (value == (lvn->*map_ptr).end()) {
-        if (cmp_values->memory_version_before_stores != kNoValue) {
-          same_version = false;
-          break;
-        }
-      } else if (cmp_values->last_stored_value != value->second.last_stored_value ||
-          cmp_values->memory_version_before_stores != value->second.memory_version_before_stores ||
-          cmp_values->store_loc_set != value->second.store_loc_set) {
-        same_version = false;
-        break;
-      } else if (value->second.last_load_memory_version != kNoValue) {
-        DCHECK(load_memory_version_for_same_version == kNoValue ||
-               load_memory_version_for_same_version == value->second.last_load_memory_version);
-        load_memory_version_for_same_version = value->second.last_load_memory_version;
-      }
-    }
-  }
-
-  if (same_version) {
-    // Copy the identical values.
-    my_values->memory_version_before_stores = cmp_values->memory_version_before_stores;
-    my_values->last_stored_value = cmp_values->last_stored_value;
-    my_values->store_loc_set = cmp_values->store_loc_set;
-    my_values->last_load_memory_version = load_memory_version_for_same_version;
-    // Merge load values seen in all incoming arcs (i.e. an intersection).
-    if (!cmp_values->load_value_map.empty()) {
-      my_values->load_value_map = cmp_values->load_value_map;
-      for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-        auto value = (lvn->*map_ptr).find(key);
-        if (value == (lvn->*map_ptr).end() || value->second.load_value_map.empty()) {
-          my_values->load_value_map.clear();
-          break;
-        }
-        InPlaceIntersectMaps(&my_values->load_value_map, value->second.load_value_map);
-        if (my_values->load_value_map.empty()) {
-          break;
-        }
-      }
-    }
-  } else {
-    // Bump version number for the merge.
-    my_values->memory_version_before_stores = my_values->last_load_memory_version =
-        Versions::LookupMergeBlockValue(gvn_, id_, key);
-
-    // Calculate the locations that have been either read from or written to in each incoming LVN.
-    bool first_lvn = true;
-    for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-      auto value = (lvn->*map_ptr).find(key);
-      if (value == (lvn->*map_ptr).end()) {
-        my_values->load_value_map.clear();
-        break;
-      }
-      if (first_lvn) {
-        first_lvn = false;
-        // Copy the first LVN's locations. Values will be overwritten later.
-        my_values->load_value_map = value->second.load_value_map;
-        for (uint16_t location : value->second.store_loc_set) {
-          my_values->load_value_map.Put(location, 0u);
-        }
-      } else {
-        IntersectAliasingValueLocations(my_values, &value->second);
-      }
-    }
-    // Calculate merged values for the intersection.
-    for (auto& load_value_entry : my_values->load_value_map) {
-      uint16_t location = load_value_entry.first;
-      merge_names_.clear();
-      uint16_t value_name = kNoValue;
-      bool same_values = true;
-      for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-        value_name = Versions::LookupMergeValue(gvn_, lvn, key, location);
-        same_values = same_values && (merge_names_.empty() || value_name == merge_names_.back());
-        merge_names_.push_back(value_name);
-      }
-      if (same_values) {
-        // value_name already contains the result.
-      } else {
-        auto lb = merge_map_.lower_bound(merge_names_);
-        if (lb != merge_map_.end() && !merge_map_.key_comp()(merge_names_, lb->first)) {
-          value_name = lb->second;
-        } else {
-          // NOTE: In addition to the key and id_ which don't change on an LVN recalculation
-          // during GVN, we also add location which can actually change on recalculation, so the
-          // value_name below may change. This could lead to an infinite loop if the location
-          // value name always changed when the refereced value name changes. However, given that
-          // we assign unique value names for other merges, such as Phis, such a dependency is
-          // not possible in a well-formed SSA graph.
-          value_name = Versions::LookupMergeLocationValue(gvn_, id_, key, location);
-          merge_map_.PutBefore(lb, merge_names_, value_name);
-          if (gvn_->NullCheckedInAllPredecessors(merge_names_)) {
-            null_checked_.insert(value_name);
-          }
-        }
-      }
-      load_value_entry.second = value_name;
-    }
-  }
-}
-
-void LocalValueNumbering::Merge(MergeType merge_type) {
-  DCHECK_GE(gvn_->merge_lvns_.size(), 2u);
-
-  // Always reserve space in merge_names_. Even if we don't use it in Merge() we may need it
-  // in GetStartingVregValueNumberImpl() when the merge_names_'s allocator is not the top.
-  merge_names_.reserve(gvn_->merge_lvns_.size());
-
-  IntersectSregValueMaps<&LocalValueNumbering::sreg_value_map_>();
-  IntersectSregValueMaps<&LocalValueNumbering::sreg_wide_value_map_>();
-  if (merge_type == kReturnMerge) {
-    // RETURN or PHI+RETURN. We need only sreg value maps.
-    return;
-  }
-
-  MergeMemoryVersions(merge_type == kCatchMerge);
-
-  // Merge non-aliasing maps/sets.
-  IntersectSets<ValueNameSet, &LocalValueNumbering::non_aliasing_refs_>();
-  if (!non_aliasing_refs_.empty() && merge_type == kCatchMerge) {
-    PruneNonAliasingRefsForCatch();
-  }
-  if (!non_aliasing_refs_.empty()) {
-    MergeSets<IFieldLocToValueMap, &LocalValueNumbering::non_aliasing_ifield_value_map_,
-              &LocalValueNumbering::MergeNonAliasingIFieldValues>();
-    MergeSets<NonAliasingArrayValuesMap, &LocalValueNumbering::non_aliasing_array_value_map_,
-              &LocalValueNumbering::MergeAliasingValues<
-                  NonAliasingArrayValuesMap, &LocalValueNumbering::non_aliasing_array_value_map_,
-                  NonAliasingArrayVersions>>();
-  }
-
-  // We won't do anything complicated for range checks, just calculate the intersection.
-  IntersectSets<RangeCheckSet, &LocalValueNumbering::range_checked_>();
-
-  // Merge null_checked_. We may later insert more, such as merged object field values.
-  MergeNullChecked();
-
-  // Now merge the div_zero_checked_.
-  MergeDivZeroChecked();
-
-  if (merge_type == kCatchMerge) {
-    // Memory is clobbered. New memory version already created, don't merge aliasing locations.
-    return;
-  }
-
-  DCHECK(merge_type == kNormalMerge);
-
-  // Merge escaped refs and clobber sets.
-  MergeSets<ValueNameSet, &LocalValueNumbering::escaped_refs_,
-            &LocalValueNumbering::MergeEscapedRefs>();
-  if (!escaped_refs_.empty()) {
-    MergeSets<EscapedIFieldClobberSet, &LocalValueNumbering::escaped_ifield_clobber_set_,
-              &LocalValueNumbering::MergeEscapedIFieldTypeClobberSets>();
-    MergeSets<EscapedIFieldClobberSet, &LocalValueNumbering::escaped_ifield_clobber_set_,
-              &LocalValueNumbering::MergeEscapedIFieldClobberSets>();
-    MergeSets<EscapedArrayClobberSet, &LocalValueNumbering::escaped_array_clobber_set_,
-              &LocalValueNumbering::MergeEscapedArrayClobberSets>();
-  }
-
-  MergeSets<SFieldToValueMap, &LocalValueNumbering::sfield_value_map_,
-            &LocalValueNumbering::MergeSFieldValues>();
-  MergeSets<AliasingIFieldValuesMap, &LocalValueNumbering::aliasing_ifield_value_map_,
-            &LocalValueNumbering::MergeAliasingValues<
-                AliasingIFieldValuesMap, &LocalValueNumbering::aliasing_ifield_value_map_,
-                AliasingIFieldVersions>>();
-  MergeSets<AliasingArrayValuesMap, &LocalValueNumbering::aliasing_array_value_map_,
-            &LocalValueNumbering::MergeAliasingValues<
-                AliasingArrayValuesMap, &LocalValueNumbering::aliasing_array_value_map_,
-                AliasingArrayVersions>>();
-}
-
-void LocalValueNumbering::PrepareEntryBlock() {
-  uint32_t vreg = gvn_->GetMirGraph()->GetFirstInVR();
-  CompilationUnit* cu = gvn_->GetCompilationUnit();
-  const char* shorty = cu->shorty;
-  ++shorty;  // Skip return value.
-  if ((cu->access_flags & kAccStatic) == 0) {
-    // If non-static method, mark "this" as non-null
-    uint16_t value_name = GetOperandValue(vreg);
-    ++vreg;
-    null_checked_.insert(value_name);
-  }
-  for ( ; *shorty != 0; ++shorty, ++vreg) {
-    if (*shorty == 'J' || *shorty == 'D') {
-      uint16_t value_name = GetOperandValueWide(vreg);
-      SetOperandValueWide(vreg, value_name);
-      ++vreg;
-    }
-  }
-}
-
-uint16_t LocalValueNumbering::MarkNonAliasingNonNull(MIR* mir) {
-  uint16_t res = GetOperandValue(mir->ssa_rep->defs[0]);
-  DCHECK(null_checked_.find(res) == null_checked_.end());
-  null_checked_.insert(res);
-  non_aliasing_refs_.insert(res);
-  return res;
-}
-
-bool LocalValueNumbering::IsNonAliasing(uint16_t reg) const {
-  return non_aliasing_refs_.find(reg) != non_aliasing_refs_.end();
-}
-
-bool LocalValueNumbering::IsNonAliasingIField(uint16_t reg, uint16_t field_id,
-                                              uint16_t type) const {
-  if (IsNonAliasing(reg)) {
-    return true;
-  }
-  if (escaped_refs_.find(reg) == escaped_refs_.end()) {
-    return false;
-  }
-  // Check for IPUTs to unresolved fields.
-  EscapedIFieldClobberKey key1 = { reg, type, kNoValue };
-  if (escaped_ifield_clobber_set_.find(key1) != escaped_ifield_clobber_set_.end()) {
-    return false;
-  }
-  // Check for aliased IPUTs to the same field.
-  EscapedIFieldClobberKey key2 = { reg, type, field_id };
-  return escaped_ifield_clobber_set_.find(key2) == escaped_ifield_clobber_set_.end();
-}
-
-bool LocalValueNumbering::IsNonAliasingArray(uint16_t reg, uint16_t type) const {
-  if (IsNonAliasing(reg)) {
-    return true;
-  }
-  if (escaped_refs_.count(reg) == 0u) {
-    return false;
-  }
-  // Check for aliased APUTs.
-  EscapedArrayClobberKey key = { reg, type };
-  return escaped_array_clobber_set_.find(key) == escaped_array_clobber_set_.end();
-}
-
-void LocalValueNumbering::HandleNullCheck(MIR* mir, uint16_t reg) {
-  auto lb = null_checked_.lower_bound(reg);
-  if (lb != null_checked_.end() && *lb == reg) {
-    if (LIKELY(gvn_->CanModify())) {
-      if (gvn_->GetCompilationUnit()->verbose) {
-        LOG(INFO) << "Removing null check for 0x" << std::hex << mir->offset;
-      }
-      mir->optimization_flags |= MIR_IGNORE_NULL_CHECK;
-    }
-  } else {
-    null_checked_.insert(lb, reg);
-  }
-}
-
-void LocalValueNumbering::HandleRangeCheck(MIR* mir, uint16_t array, uint16_t index) {
-  RangeCheckKey key = { array, index };
-  auto lb = range_checked_.lower_bound(key);
-  if (lb != range_checked_.end() && !RangeCheckKeyComparator()(key, *lb)) {
-    if (LIKELY(gvn_->CanModify())) {
-      if (gvn_->GetCompilationUnit()->verbose) {
-        LOG(INFO) << "Removing range check for 0x" << std::hex << mir->offset;
-      }
-      mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK;
-    }
-  } else {
-    // Mark range check completed.
-    range_checked_.insert(lb, key);
-  }
-}
-
-void LocalValueNumbering::HandleDivZeroCheck(MIR* mir, uint16_t reg) {
-  auto lb = div_zero_checked_.lower_bound(reg);
-  if (lb != div_zero_checked_.end() && *lb == reg) {
-    if (LIKELY(gvn_->CanModify())) {
-      if (gvn_->GetCompilationUnit()->verbose) {
-        LOG(INFO) << "Removing div zero check for 0x" << std::hex << mir->offset;
-      }
-      mir->optimization_flags |= MIR_IGNORE_DIV_ZERO_CHECK;
-    }
-  } else {
-    div_zero_checked_.insert(lb, reg);
-  }
-}
-
-void LocalValueNumbering::HandlePutObject(MIR* mir) {
-  // If we're storing a non-aliasing reference, stop tracking it as non-aliasing now.
-  uint16_t base = GetOperandValue(mir->ssa_rep->uses[0]);
-  HandleEscapingRef(base);
-  if (gvn_->CanModify() && null_checked_.count(base) != 0u) {
-    if (gvn_->GetCompilationUnit()->verbose) {
-      LOG(INFO) << "Removing GC card mark value null check for 0x" << std::hex << mir->offset;
-    }
-    mir->optimization_flags |= MIR_STORE_NON_NULL_VALUE;
-  }
-}
-
-void LocalValueNumbering::HandleEscapingRef(uint16_t base) {
-  auto it = non_aliasing_refs_.find(base);
-  if (it != non_aliasing_refs_.end()) {
-    non_aliasing_refs_.erase(it);
-    escaped_refs_.insert(base);
-  }
-}
-
-void LocalValueNumbering::HandleInvokeArgs(const MIR* mir, const LocalValueNumbering* mir_lvn) {
-  const int32_t* uses = mir->ssa_rep->uses;
-  const int32_t* uses_end = uses + mir->ssa_rep->num_uses;
-  while (uses != uses_end) {
-    uint16_t sreg = *uses;
-    ++uses;
-    // Avoid LookupValue() so that we don't store new values in the global value map.
-    auto local_it = mir_lvn->sreg_value_map_.find(sreg);
-    if (local_it != mir_lvn->sreg_value_map_.end()) {
-      non_aliasing_refs_.erase(local_it->second);
-    } else {
-      uint16_t value_name = gvn_->FindValue(kNoValue, sreg, kNoValue, kNoValue);
-      if (value_name != kNoValue) {
-        non_aliasing_refs_.erase(value_name);
-      }
-    }
-  }
-}
-
-uint16_t LocalValueNumbering::HandlePhi(MIR* mir) {
-  if (gvn_->merge_lvns_.empty()) {
-    // Running LVN without a full GVN?
-    return kNoValue;
-  }
-  // Determine if this Phi is merging wide regs.
-  RegLocation raw_dest = gvn_->GetMirGraph()->GetRawDest(mir);
-  if (raw_dest.high_word) {
-    // This is the high part of a wide reg. Ignore the Phi.
-    return kNoValue;
-  }
-  bool wide = raw_dest.wide;
-  // Iterate over *merge_lvns_ and skip incoming sregs for BBs without associated LVN.
-  merge_names_.clear();
-  uint16_t value_name = kNoValue;
-  bool same_values = true;
-  BasicBlockId* incoming = mir->meta.phi_incoming;
-  int32_t* uses = mir->ssa_rep->uses;
-  int16_t pos = 0;
-  for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
-    DCHECK_LT(pos, mir->ssa_rep->num_uses);
-    while (incoming[pos] != lvn->Id()) {
-      ++pos;
-      DCHECK_LT(pos, mir->ssa_rep->num_uses);
-    }
-    int s_reg = uses[pos];
-    ++pos;
-    value_name = wide ? lvn->GetOperandValueWide(s_reg) : lvn->GetOperandValue(s_reg);
-
-    same_values = same_values && (merge_names_.empty() || value_name == merge_names_.back());
-    merge_names_.push_back(value_name);
-  }
-  if (same_values) {
-    // value_name already contains the result.
-  } else {
-    auto lb = merge_map_.lower_bound(merge_names_);
-    if (lb != merge_map_.end() && !merge_map_.key_comp()(merge_names_, lb->first)) {
-      value_name = lb->second;
-    } else {
-      value_name = gvn_->LookupValue(kNoValue, mir->ssa_rep->defs[0], kNoValue, kNoValue);
-      merge_map_.PutBefore(lb, merge_names_, value_name);
-      if (!wide && gvn_->NullCheckedInAllPredecessors(merge_names_)) {
-        null_checked_.insert(value_name);
-      }
-      if (gvn_->DivZeroCheckedInAllPredecessors(merge_names_)) {
-        div_zero_checked_.insert(value_name);
-      }
-    }
-  }
-  if (wide) {
-    SetOperandValueWide(mir->ssa_rep->defs[0], value_name);
-  } else {
-    SetOperandValue(mir->ssa_rep->defs[0], value_name);
-  }
-  return value_name;
-}
-
-uint16_t LocalValueNumbering::HandleConst(MIR* mir, uint32_t value) {
-  RegLocation raw_dest = gvn_->GetMirGraph()->GetRawDest(mir);
-  uint16_t res;
-  if (value == 0u && raw_dest.ref) {
-    res = GlobalValueNumbering::kNullValue;
-  } else {
-    Instruction::Code op = raw_dest.fp ? Instruction::CONST_HIGH16 : Instruction::CONST;
-    res = gvn_->LookupValue(op, Low16Bits(value), High16Bits(value), 0);
-  }
-  SetOperandValue(mir->ssa_rep->defs[0], res);
-  return res;
-}
-
-uint16_t LocalValueNumbering::HandleConstWide(MIR* mir, uint64_t value) {
-  RegLocation raw_dest = gvn_->GetMirGraph()->GetRawDest(mir);
-  Instruction::Code op = raw_dest.fp ? Instruction::CONST_HIGH16 : Instruction::CONST;
-  uint32_t low_word = Low32Bits(value);
-  uint32_t high_word = High32Bits(value);
-  uint16_t low_res = gvn_->LookupValue(op, Low16Bits(low_word), High16Bits(low_word), 1);
-  uint16_t high_res = gvn_->LookupValue(op, Low16Bits(high_word), High16Bits(high_word), 2);
-  uint16_t res = gvn_->LookupValue(op, low_res, high_res, 3);
-  SetOperandValueWide(mir->ssa_rep->defs[0], res);
-  return res;
-}
-
-uint16_t LocalValueNumbering::HandleAGet(MIR* mir, uint16_t opcode) {
-  uint16_t array = GetOperandValue(mir->ssa_rep->uses[0]);
-  HandleNullCheck(mir, array);
-  uint16_t index = GetOperandValue(mir->ssa_rep->uses[1]);
-  HandleRangeCheck(mir, array, index);
-  uint16_t type = AGetMemAccessType(static_cast<Instruction::Code>(opcode));
-  // Establish value number for loaded register.
-  uint16_t res;
-  if (IsNonAliasingArray(array, type)) {
-    res = HandleAliasingValuesGet<NonAliasingArrayVersions>(&non_aliasing_array_value_map_,
-                                                            array, index);
-  } else {
-    uint16_t location = gvn_->GetArrayLocation(array, index);
-    res = HandleAliasingValuesGet<AliasingArrayVersions>(&aliasing_array_value_map_,
-                                                         type, location);
-  }
-  if (opcode == Instruction::AGET_WIDE) {
-    SetOperandValueWide(mir->ssa_rep->defs[0], res);
-  } else {
-    SetOperandValue(mir->ssa_rep->defs[0], res);
-  }
-  return res;
-}
-
-void LocalValueNumbering::HandleAPut(MIR* mir, uint16_t opcode) {
-  int array_idx = (opcode == Instruction::APUT_WIDE) ? 2 : 1;
-  int index_idx = array_idx + 1;
-  uint16_t array = GetOperandValue(mir->ssa_rep->uses[array_idx]);
-  HandleNullCheck(mir, array);
-  uint16_t index = GetOperandValue(mir->ssa_rep->uses[index_idx]);
-  HandleRangeCheck(mir, array, index);
-
-  uint16_t type = APutMemAccessType(static_cast<Instruction::Code>(opcode));
-  uint16_t value = (opcode == Instruction::APUT_WIDE)
-                   ? GetOperandValueWide(mir->ssa_rep->uses[0])
-                   : GetOperandValue(mir->ssa_rep->uses[0]);
-  if (IsNonAliasing(array)) {
-    bool put_is_live = HandleAliasingValuesPut<NonAliasingArrayVersions>(
-        &non_aliasing_array_value_map_, array, index, value);
-    if (!put_is_live) {
-      // This APUT can be eliminated, it stores the same value that's already in the field.
-      // TODO: Eliminate the APUT.
-      return;
-    }
-  } else {
-    uint16_t location = gvn_->GetArrayLocation(array, index);
-    bool put_is_live = HandleAliasingValuesPut<AliasingArrayVersions>(
-        &aliasing_array_value_map_, type, location, value);
-    if (!put_is_live) {
-      // This APUT can be eliminated, it stores the same value that's already in the field.
-      // TODO: Eliminate the APUT.
-      return;
-    }
-
-    // Clobber all escaped array refs for this type.
-    for (uint16_t escaped_array : escaped_refs_) {
-      EscapedArrayClobberKey clobber_key = { escaped_array, type };
-      escaped_array_clobber_set_.insert(clobber_key);
-    }
-  }
-}
-
-uint16_t LocalValueNumbering::HandleIGet(MIR* mir, uint16_t opcode) {
-  uint16_t base = GetOperandValue(mir->ssa_rep->uses[0]);
-  HandleNullCheck(mir, base);
-  const MirFieldInfo& field_info = gvn_->GetMirGraph()->GetIFieldLoweringInfo(mir);
-  uint16_t res;
-  if (!field_info.IsResolved() || field_info.IsVolatile()) {
-    // Unresolved fields may be volatile, so handle them as such to be safe.
-    HandleInvokeOrClInitOrAcquireOp(mir);  // Volatile GETs have acquire semantics.
-    // Volatile fields always get a new memory version; field id is irrelevant.
-    // Use result s_reg - will be unique.
-    res = gvn_->LookupValue(kNoValue, mir->ssa_rep->defs[0], kNoValue, kNoValue);
-  } else {
-    uint16_t type = IGetMemAccessType(static_cast<Instruction::Code>(opcode));
-    uint16_t field_id = gvn_->GetIFieldId(mir);
-    if (IsNonAliasingIField(base, field_id, type)) {
-      uint16_t loc = gvn_->LookupValue(kNonAliasingIFieldLocOp, base, field_id, type);
-      auto lb = non_aliasing_ifield_value_map_.lower_bound(loc);
-      if (lb != non_aliasing_ifield_value_map_.end() && lb->first == loc) {
-        res = lb->second;
-      } else {
-        res = gvn_->LookupValue(kNonAliasingIFieldInitialOp, loc, kNoValue, kNoValue);
-        non_aliasing_ifield_value_map_.PutBefore(lb, loc, res);
-      }
-    } else {
-      res = HandleAliasingValuesGet<AliasingIFieldVersions>(&aliasing_ifield_value_map_,
-                                                            field_id, base);
-    }
-  }
-  if (opcode == Instruction::IGET_WIDE) {
-    SetOperandValueWide(mir->ssa_rep->defs[0], res);
-  } else {
-    SetOperandValue(mir->ssa_rep->defs[0], res);
-  }
-  return res;
-}
-
-void LocalValueNumbering::HandleIPut(MIR* mir, uint16_t opcode) {
-  int base_reg = (opcode == Instruction::IPUT_WIDE) ? 2 : 1;
-  uint16_t base = GetOperandValue(mir->ssa_rep->uses[base_reg]);
-  HandleNullCheck(mir, base);
-  uint16_t type = IPutMemAccessType(static_cast<Instruction::Code>(opcode));
-  const MirFieldInfo& field_info = gvn_->GetMirGraph()->GetIFieldLoweringInfo(mir);
-  if (!field_info.IsResolved()) {
-    // Unresolved fields always alias with everything of the same type.
-    // Use mir->offset as modifier; without elaborate inlining, it will be unique.
-    unresolved_ifield_version_[type] =
-        gvn_->LookupValue(kUnresolvedIFieldOp, kNoValue, kNoValue, mir->offset);
-
-    // For simplicity, treat base as escaped now.
-    HandleEscapingRef(base);
-
-    // Clobber all fields of escaped references of the same type.
-    for (uint16_t escaped_ref : escaped_refs_) {
-      EscapedIFieldClobberKey clobber_key = { escaped_ref, type, kNoValue };
-      escaped_ifield_clobber_set_.insert(clobber_key);
-    }
-
-    // Aliasing fields of the same type may have been overwritten.
-    auto it = aliasing_ifield_value_map_.begin(), end = aliasing_ifield_value_map_.end();
-    while (it != end) {
-      if (gvn_->GetIFieldType(it->first) != type) {
-        ++it;
-      } else {
-        it = aliasing_ifield_value_map_.erase(it);
-      }
-    }
-  } else if (field_info.IsVolatile()) {
-    // Nothing to do, resolved volatile fields always get a new memory version anyway and
-    // can't alias with resolved non-volatile fields.
-  } else {
-    uint16_t field_id = gvn_->GetIFieldId(mir);
-    uint16_t value = (opcode == Instruction::IPUT_WIDE)
-                     ? GetOperandValueWide(mir->ssa_rep->uses[0])
-                     : GetOperandValue(mir->ssa_rep->uses[0]);
-    if (IsNonAliasing(base)) {
-      uint16_t loc = gvn_->LookupValue(kNonAliasingIFieldLocOp, base, field_id, type);
-      auto lb = non_aliasing_ifield_value_map_.lower_bound(loc);
-      if (lb != non_aliasing_ifield_value_map_.end() && lb->first == loc) {
-        if (lb->second == value) {
-          // This IPUT can be eliminated, it stores the same value that's already in the field.
-          // TODO: Eliminate the IPUT.
-          return;
-        }
-        lb->second = value;  // Overwrite.
-      } else {
-        non_aliasing_ifield_value_map_.PutBefore(lb, loc, value);
-      }
-    } else {
-      bool put_is_live = HandleAliasingValuesPut<AliasingIFieldVersions>(
-          &aliasing_ifield_value_map_, field_id, base, value);
-      if (!put_is_live) {
-        // This IPUT can be eliminated, it stores the same value that's already in the field.
-        // TODO: Eliminate the IPUT.
-        return;
-      }
-
-      // Clobber all fields of escaped references for this field.
-      for (uint16_t escaped_ref : escaped_refs_) {
-        EscapedIFieldClobberKey clobber_key = { escaped_ref, type, field_id };
-        escaped_ifield_clobber_set_.insert(clobber_key);
-      }
-    }
-  }
-}
-
-uint16_t LocalValueNumbering::HandleSGet(MIR* mir, uint16_t opcode) {
-  const MirSFieldLoweringInfo& field_info = gvn_->GetMirGraph()->GetSFieldLoweringInfo(mir);
-  if (!field_info.IsResolved() || field_info.IsVolatile() ||
-      (!field_info.IsClassInitialized() &&
-       (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0)) {
-    // Volatile SGETs (and unresolved fields are potentially volatile) have acquire semantics
-    // and class initialization can call arbitrary functions, we need to wipe aliasing values.
-    HandleInvokeOrClInitOrAcquireOp(mir);
-  }
-  uint16_t res;
-  if (!field_info.IsResolved() || field_info.IsVolatile()) {
-    // Unresolved fields may be volatile, so handle them as such to be safe.
-    // Volatile fields always get a new memory version; field id is irrelevant.
-    // Use result s_reg - will be unique.
-    res = gvn_->LookupValue(kNoValue, mir->ssa_rep->defs[0], kNoValue, kNoValue);
-  } else {
-    uint16_t type = SGetMemAccessType(static_cast<Instruction::Code>(opcode));
-    uint16_t field_id = gvn_->GetSFieldId(mir);
-    auto lb = sfield_value_map_.lower_bound(field_id);
-    if (lb != sfield_value_map_.end() && lb->first == field_id) {
-      res = lb->second;
-    } else {
-      // Resolved non-volatile static fields can alias with non-resolved fields of the same type,
-      // so we need to use unresolved_sfield_version_[type] in addition to global_memory_version_
-      // to determine the version of the field.
-      res = gvn_->LookupValue(kResolvedSFieldOp, field_id,
-                              unresolved_sfield_version_[type], global_memory_version_);
-      sfield_value_map_.PutBefore(lb, field_id, res);
-    }
-  }
-  if (opcode == Instruction::SGET_WIDE) {
-    SetOperandValueWide(mir->ssa_rep->defs[0], res);
-  } else {
-    SetOperandValue(mir->ssa_rep->defs[0], res);
-  }
-  return res;
-}
-
-void LocalValueNumbering::HandleSPut(MIR* mir, uint16_t opcode) {
-  const MirSFieldLoweringInfo& field_info = gvn_->GetMirGraph()->GetSFieldLoweringInfo(mir);
-  if (!field_info.IsClassInitialized() &&
-      (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0) {
-    // Class initialization can call arbitrary functions, we need to wipe aliasing values.
-    HandleInvokeOrClInitOrAcquireOp(mir);
-  }
-  uint16_t type = SPutMemAccessType(static_cast<Instruction::Code>(opcode));
-  if (!field_info.IsResolved()) {
-    // Unresolved fields always alias with everything of the same type.
-    // Use mir->offset as modifier; without elaborate inlining, it will be unique.
-    unresolved_sfield_version_[type] =
-        gvn_->LookupValue(kUnresolvedSFieldOp, kNoValue, kNoValue, mir->offset);
-    RemoveSFieldsForType(type);
-  } else if (field_info.IsVolatile()) {
-    // Nothing to do, resolved volatile fields always get a new memory version anyway and
-    // can't alias with resolved non-volatile fields.
-  } else {
-    uint16_t field_id = gvn_->GetSFieldId(mir);
-    uint16_t value = (opcode == Instruction::SPUT_WIDE)
-                     ? GetOperandValueWide(mir->ssa_rep->uses[0])
-                     : GetOperandValue(mir->ssa_rep->uses[0]);
-    // Resolved non-volatile static fields can alias with non-resolved fields of the same type,
-    // so we need to use unresolved_sfield_version_[type] in addition to global_memory_version_
-    // to determine the version of the field.
-    auto lb = sfield_value_map_.lower_bound(field_id);
-    if (lb != sfield_value_map_.end() && lb->first == field_id) {
-      if (lb->second == value) {
-        // This SPUT can be eliminated, it stores the same value that's already in the field.
-        // TODO: Eliminate the SPUT.
-        return;
-      }
-      lb->second = value;  // Overwrite.
-    } else {
-      sfield_value_map_.PutBefore(lb, field_id, value);
-    }
-  }
-}
-
-void LocalValueNumbering::RemoveSFieldsForType(uint16_t type) {
-  // Erase all static fields of this type from the sfield_value_map_.
-  for (auto it = sfield_value_map_.begin(), end = sfield_value_map_.end(); it != end; ) {
-    if (gvn_->GetSFieldType(it->first) == type) {
-      it = sfield_value_map_.erase(it);
-    } else {
-      ++it;
-    }
-  }
-}
-
-void LocalValueNumbering::HandleInvokeOrClInitOrAcquireOp(MIR* mir) {
-  // Use mir->offset as modifier; without elaborate inlining, it will be unique.
-  global_memory_version_ =
-      gvn_->LookupValue(kInvokeMemoryVersionBumpOp, 0u, 0u, mir->offset);
-  // All static fields and instance fields and array elements of aliasing references,
-  // including escaped references, may have been modified.
-  sfield_value_map_.clear();
-  aliasing_ifield_value_map_.clear();
-  aliasing_array_value_map_.clear();
-  escaped_refs_.clear();
-  escaped_ifield_clobber_set_.clear();
-  escaped_array_clobber_set_.clear();
-}
-
-uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
-  uint16_t res = kNoValue;
-  uint16_t opcode = mir->dalvikInsn.opcode;
-  switch (opcode) {
-    case Instruction::NOP:
-    case Instruction::RETURN_VOID:
-    case Instruction::RETURN:
-    case Instruction::RETURN_OBJECT:
-    case Instruction::RETURN_WIDE:
-    case Instruction::GOTO:
-    case Instruction::GOTO_16:
-    case Instruction::GOTO_32:
-    case Instruction::THROW:
-    case Instruction::FILL_ARRAY_DATA:
-    case Instruction::PACKED_SWITCH:
-    case Instruction::SPARSE_SWITCH:
-    case Instruction::IF_EQ:
-    case Instruction::IF_NE:
-    case Instruction::IF_LT:
-    case Instruction::IF_GE:
-    case Instruction::IF_GT:
-    case Instruction::IF_LE:
-    case Instruction::IF_EQZ:
-    case Instruction::IF_NEZ:
-    case Instruction::IF_LTZ:
-    case Instruction::IF_GEZ:
-    case Instruction::IF_GTZ:
-    case Instruction::IF_LEZ:
-    case kMirOpFusedCmplFloat:
-    case kMirOpFusedCmpgFloat:
-    case kMirOpFusedCmplDouble:
-    case kMirOpFusedCmpgDouble:
-    case kMirOpFusedCmpLong:
-      // Nothing defined - take no action.
-      break;
-
-    case Instruction::MONITOR_ENTER:
-      HandleNullCheck(mir, GetOperandValue(mir->ssa_rep->uses[0]));
-      HandleInvokeOrClInitOrAcquireOp(mir);  // Acquire operation.
-      break;
-
-    case Instruction::MONITOR_EXIT:
-      HandleNullCheck(mir, GetOperandValue(mir->ssa_rep->uses[0]));
-      // If we're running GVN and CanModify(), uneliminated null check indicates bytecode error.
-      if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 &&
-          gvn_->work_lvn_ != nullptr && gvn_->CanModify()) {
-        LOG(WARNING) << "Bytecode error: MONITOR_EXIT is still null checked at 0x" << std::hex
-            << mir->offset << " in " << PrettyMethod(gvn_->cu_->method_idx, *gvn_->cu_->dex_file);
-      }
-      break;
-
-    case Instruction::FILLED_NEW_ARRAY:
-    case Instruction::FILLED_NEW_ARRAY_RANGE:
-      // Nothing defined but the result will be unique and non-null.
-      if (mir->next != nullptr && mir->next->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) {
-        uint16_t array = MarkNonAliasingNonNull(mir->next);
-        // Do not SetOperandValue(), we'll do that when we process the MOVE_RESULT_OBJECT.
-        if (kLocalValueNumberingEnableFilledNewArrayTracking && mir->ssa_rep->num_uses != 0u) {
-          AliasingValues* values = GetAliasingValues(&non_aliasing_array_value_map_, array);
-          // Clear the value if we got a merged version in a loop.
-          *values = AliasingValues(this);
-          for (size_t i = 0u, count = mir->ssa_rep->num_uses; i != count; ++i) {
-            DCHECK_EQ(High16Bits(i), 0u);
-            uint16_t index = gvn_->LookupValue(Instruction::CONST, i, 0u, 0);
-            uint16_t value = GetOperandValue(mir->ssa_rep->uses[i]);
-            values->load_value_map.Put(index, value);
-            RangeCheckKey key = { array, index };
-            range_checked_.insert(key);
-          }
-        }
-        // The MOVE_RESULT_OBJECT will be processed next and we'll return the value name then.
-      }
-      // All args escaped (if references).
-      for (size_t i = 0u, count = mir->ssa_rep->num_uses; i != count; ++i) {
-        uint16_t reg = GetOperandValue(mir->ssa_rep->uses[i]);
-        HandleEscapingRef(reg);
-      }
-      break;
-
-    case kMirOpNullCheck:
-      HandleNullCheck(mir, GetOperandValue(mir->ssa_rep->uses[0]));
-      break;
-
-    case Instruction::INVOKE_DIRECT:
-    case Instruction::INVOKE_DIRECT_RANGE:
-    case Instruction::INVOKE_VIRTUAL:
-    case Instruction::INVOKE_VIRTUAL_RANGE:
-    case Instruction::INVOKE_SUPER:
-    case Instruction::INVOKE_SUPER_RANGE:
-    case Instruction::INVOKE_INTERFACE:
-    case Instruction::INVOKE_INTERFACE_RANGE: {
-        // Nothing defined but handle the null check.
-        uint16_t reg = GetOperandValue(mir->ssa_rep->uses[0]);
-        HandleNullCheck(mir, reg);
-      }
-      FALLTHROUGH_INTENDED;
-    case Instruction::INVOKE_STATIC:
-    case Instruction::INVOKE_STATIC_RANGE:
-      // Make ref args aliasing.
-      HandleInvokeArgs(mir, this);
-      HandleInvokeOrClInitOrAcquireOp(mir);
-      break;
-
-    case Instruction::INSTANCE_OF: {
-        uint16_t operand = GetOperandValue(mir->ssa_rep->uses[0]);
-        uint16_t type = mir->dalvikInsn.vC;
-        res = gvn_->LookupValue(Instruction::INSTANCE_OF, operand, type, kNoValue);
-        SetOperandValue(mir->ssa_rep->defs[0], res);
-      }
-      break;
-    case Instruction::CHECK_CAST:
-      if (gvn_->CanModify()) {
-        // Check if there was an instance-of operation on the same value and if we are
-        // in a block where its result is true. If so, we can eliminate the check-cast.
-        uint16_t operand = GetOperandValue(mir->ssa_rep->uses[0]);
-        uint16_t type = mir->dalvikInsn.vB;
-        uint16_t cond = gvn_->FindValue(Instruction::INSTANCE_OF, operand, type, kNoValue);
-        if (cond != kNoValue && gvn_->IsTrueInBlock(cond, Id())) {
-          if (gvn_->GetCompilationUnit()->verbose) {
-            LOG(INFO) << "Removing check-cast at 0x" << std::hex << mir->offset;
-          }
-          // Don't use kMirOpNop. Keep the check-cast as it defines the type of the register.
-          mir->optimization_flags |= MIR_IGNORE_CHECK_CAST;
-        }
-      }
-      break;
-
-    case Instruction::MOVE_RESULT:
-    case Instruction::MOVE_RESULT_OBJECT:
-      // 1 result, treat as unique each time, use result s_reg - will be unique.
-      res = GetOperandValue(mir->ssa_rep->defs[0]);
-      SetOperandValue(mir->ssa_rep->defs[0], res);
-      break;
-    case Instruction::MOVE_EXCEPTION:
-    case Instruction::NEW_INSTANCE:
-    case Instruction::NEW_ARRAY:
-      // 1 result, treat as unique each time, use result s_reg - will be unique.
-      res = MarkNonAliasingNonNull(mir);
-      SetOperandValue(mir->ssa_rep->defs[0], res);
-      break;
-    case Instruction::CONST_CLASS:
-      DCHECK_EQ(Low16Bits(mir->dalvikInsn.vB), mir->dalvikInsn.vB);
-      res = gvn_->LookupValue(Instruction::CONST_CLASS, mir->dalvikInsn.vB, 0, 0);
-      SetOperandValue(mir->ssa_rep->defs[0], res);
-      null_checked_.insert(res);
-      non_aliasing_refs_.insert(res);
-      break;
-    case Instruction::CONST_STRING:
-    case Instruction::CONST_STRING_JUMBO:
-      // These strings are internalized, so assign value based on the string pool index.
-      res = gvn_->LookupValue(Instruction::CONST_STRING, Low16Bits(mir->dalvikInsn.vB),
-                              High16Bits(mir->dalvikInsn.vB), 0);
-      SetOperandValue(mir->ssa_rep->defs[0], res);
-      null_checked_.insert(res);  // May already be there.
-      // NOTE: Hacking the contents of an internalized string via reflection is possible
-      // but the behavior is undefined. Therefore, we consider the string constant and
-      // the reference non-aliasing.
-      // TUNING: We could keep this property even if the reference "escapes".
-      non_aliasing_refs_.insert(res);  // May already be there.
-      break;
-    case Instruction::MOVE_RESULT_WIDE:
-      // 1 wide result, treat as unique each time, use result s_reg - will be unique.
-      res = GetOperandValueWide(mir->ssa_rep->defs[0]);
-      SetOperandValueWide(mir->ssa_rep->defs[0], res);
-      break;
-
-    case kMirOpPhi:
-      res = HandlePhi(mir);
-      break;
-
-    case Instruction::MOVE:
-    case Instruction::MOVE_OBJECT:
-    case Instruction::MOVE_16:
-    case Instruction::MOVE_OBJECT_16:
-    case Instruction::MOVE_FROM16:
-    case Instruction::MOVE_OBJECT_FROM16:
-    case kMirOpCopy:
-      // Just copy value number of source to value number of result.
-      res = GetOperandValue(mir->ssa_rep->uses[0]);
-      SetOperandValue(mir->ssa_rep->defs[0], res);
-      break;
-
-    case Instruction::MOVE_WIDE:
-    case Instruction::MOVE_WIDE_16:
-    case Instruction::MOVE_WIDE_FROM16:
-      // Just copy value number of source to value number of result.
-      res = GetOperandValueWide(mir->ssa_rep->uses[0]);
-      SetOperandValueWide(mir->ssa_rep->defs[0], res);
-      break;
-
-    case Instruction::CONST_HIGH16:
-      res = HandleConst(mir, mir->dalvikInsn.vB << 16);
-      break;
-    case Instruction::CONST:
-    case Instruction::CONST_4:
-    case Instruction::CONST_16:
-      res = HandleConst(mir, mir->dalvikInsn.vB);
-      break;
-
-    case Instruction::CONST_WIDE_16:
-    case Instruction::CONST_WIDE_32:
-      res = HandleConstWide(
-          mir,
-          mir->dalvikInsn.vB +
-              ((mir->dalvikInsn.vB & 0x80000000) != 0 ? UINT64_C(0xffffffff00000000) : 0u));
-      break;
-
-    case Instruction::CONST_WIDE:
-      res = HandleConstWide(mir, mir->dalvikInsn.vB_wide);
-      break;
-
-    case Instruction::CONST_WIDE_HIGH16:
-      res = HandleConstWide(mir, static_cast<uint64_t>(mir->dalvikInsn.vB) << 48);
-      break;
-
-    case Instruction::ARRAY_LENGTH: {
-        // Handle the null check.
-        uint16_t reg = GetOperandValue(mir->ssa_rep->uses[0]);
-        HandleNullCheck(mir, reg);
-      }
-      FALLTHROUGH_INTENDED;
-    case Instruction::NEG_INT:
-    case Instruction::NOT_INT:
-    case Instruction::NEG_FLOAT:
-    case Instruction::INT_TO_BYTE:
-    case Instruction::INT_TO_SHORT:
-    case Instruction::INT_TO_CHAR:
-    case Instruction::INT_TO_FLOAT:
-    case Instruction::FLOAT_TO_INT: {
-        // res = op + 1 operand
-        uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]);
-        res = gvn_->LookupValue(opcode, operand1, kNoValue, kNoValue);
-        SetOperandValue(mir->ssa_rep->defs[0], res);
-      }
-      break;
-
-    case Instruction::LONG_TO_FLOAT:
-    case Instruction::LONG_TO_INT:
-    case Instruction::DOUBLE_TO_FLOAT:
-    case Instruction::DOUBLE_TO_INT: {
-        // res = op + 1 wide operand
-        uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]);
-        res = gvn_->LookupValue(opcode, operand1, kNoValue, kNoValue);
-        SetOperandValue(mir->ssa_rep->defs[0], res);
-      }
-      break;
-
-    case Instruction::DOUBLE_TO_LONG:
-    case Instruction::LONG_TO_DOUBLE:
-    case Instruction::NEG_LONG:
-    case Instruction::NOT_LONG:
-    case Instruction::NEG_DOUBLE: {
-        // wide res = op + 1 wide operand
-        uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]);
-        res = gvn_->LookupValue(opcode, operand1, kNoValue, kNoValue);
-        SetOperandValueWide(mir->ssa_rep->defs[0], res);
-      }
-      break;
-
-    case Instruction::FLOAT_TO_DOUBLE:
-    case Instruction::FLOAT_TO_LONG:
-    case Instruction::INT_TO_DOUBLE:
-    case Instruction::INT_TO_LONG: {
-        // wide res = op + 1 operand
-        uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]);
-        res = gvn_->LookupValue(opcode, operand1, kNoValue, kNoValue);
-        SetOperandValueWide(mir->ssa_rep->defs[0], res);
-      }
-      break;
-
-    case Instruction::CMPL_DOUBLE:
-    case Instruction::CMPG_DOUBLE:
-    case Instruction::CMP_LONG: {
-        // res = op + 2 wide operands
-        uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]);
-        uint16_t operand2 = GetOperandValueWide(mir->ssa_rep->uses[2]);
-        res = gvn_->LookupValue(opcode, operand1, operand2, kNoValue);
-        SetOperandValue(mir->ssa_rep->defs[0], res);
-      }
-      break;
-
-    case Instruction::DIV_INT:
-    case Instruction::DIV_INT_2ADDR:
-    case Instruction::REM_INT:
-    case Instruction::REM_INT_2ADDR:
-      HandleDivZeroCheck(mir, GetOperandValue(mir->ssa_rep->uses[1]));
-      FALLTHROUGH_INTENDED;
-
-    case Instruction::CMPG_FLOAT:
-    case Instruction::CMPL_FLOAT:
-    case Instruction::ADD_INT:
-    case Instruction::ADD_INT_2ADDR:
-    case Instruction::MUL_INT:
-    case Instruction::MUL_INT_2ADDR:
-    case Instruction::AND_INT:
-    case Instruction::AND_INT_2ADDR:
-    case Instruction::OR_INT:
-    case Instruction::OR_INT_2ADDR:
-    case Instruction::XOR_INT:
-    case Instruction::XOR_INT_2ADDR:
-    case Instruction::SUB_INT:
-    case Instruction::SUB_INT_2ADDR:
-    case Instruction::SHL_INT:
-    case Instruction::SHL_INT_2ADDR:
-    case Instruction::SHR_INT:
-    case Instruction::SHR_INT_2ADDR:
-    case Instruction::USHR_INT:
-    case Instruction::USHR_INT_2ADDR: {
-        // res = op + 2 operands
-        uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]);
-        uint16_t operand2 = GetOperandValue(mir->ssa_rep->uses[1]);
-        res = gvn_->LookupValue(opcode, operand1, operand2, kNoValue);
-        SetOperandValue(mir->ssa_rep->defs[0], res);
-      }
-      break;
-
-    case Instruction::DIV_LONG:
-    case Instruction::REM_LONG:
-    case Instruction::DIV_LONG_2ADDR:
-    case Instruction::REM_LONG_2ADDR:
-      HandleDivZeroCheck(mir, GetOperandValueWide(mir->ssa_rep->uses[2]));
-      FALLTHROUGH_INTENDED;
-
-    case Instruction::ADD_LONG:
-    case Instruction::SUB_LONG:
-    case Instruction::MUL_LONG:
-    case Instruction::AND_LONG:
-    case Instruction::OR_LONG:
-    case Instruction::XOR_LONG:
-    case Instruction::ADD_LONG_2ADDR:
-    case Instruction::SUB_LONG_2ADDR:
-    case Instruction::MUL_LONG_2ADDR:
-    case Instruction::AND_LONG_2ADDR:
-    case Instruction::OR_LONG_2ADDR:
-    case Instruction::XOR_LONG_2ADDR:
-    case Instruction::ADD_DOUBLE:
-    case Instruction::SUB_DOUBLE:
-    case Instruction::MUL_DOUBLE:
-    case Instruction::DIV_DOUBLE:
-    case Instruction::REM_DOUBLE:
-    case Instruction::ADD_DOUBLE_2ADDR:
-    case Instruction::SUB_DOUBLE_2ADDR:
-    case Instruction::MUL_DOUBLE_2ADDR:
-    case Instruction::DIV_DOUBLE_2ADDR:
-    case Instruction::REM_DOUBLE_2ADDR: {
-        // wide res = op + 2 wide operands
-        uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]);
-        uint16_t operand2 = GetOperandValueWide(mir->ssa_rep->uses[2]);
-        res = gvn_->LookupValue(opcode, operand1, operand2, kNoValue);
-        SetOperandValueWide(mir->ssa_rep->defs[0], res);
-      }
-      break;
-
-    case Instruction::SHL_LONG:
-    case Instruction::SHR_LONG:
-    case Instruction::USHR_LONG:
-    case Instruction::SHL_LONG_2ADDR:
-    case Instruction::SHR_LONG_2ADDR:
-    case Instruction::USHR_LONG_2ADDR: {
-        // wide res = op + 1 wide operand + 1 operand
-        uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]);
-        uint16_t operand2 = GetOperandValue(mir->ssa_rep->uses[2]);
-        res = gvn_->LookupValue(opcode, operand1, operand2, kNoValue);
-        SetOperandValueWide(mir->ssa_rep->defs[0], res);
-      }
-      break;
-
-    case Instruction::ADD_FLOAT:
-    case Instruction::SUB_FLOAT:
-    case Instruction::MUL_FLOAT:
-    case Instruction::DIV_FLOAT:
-    case Instruction::REM_FLOAT:
-    case Instruction::ADD_FLOAT_2ADDR:
-    case Instruction::SUB_FLOAT_2ADDR:
-    case Instruction::MUL_FLOAT_2ADDR:
-    case Instruction::DIV_FLOAT_2ADDR:
-    case Instruction::REM_FLOAT_2ADDR: {
-        // res = op + 2 operands
-        uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]);
-        uint16_t operand2 = GetOperandValue(mir->ssa_rep->uses[1]);
-        res = gvn_->LookupValue(opcode, operand1, operand2, kNoValue);
-        SetOperandValue(mir->ssa_rep->defs[0], res);
-      }
-      break;
-
-    case Instruction::RSUB_INT:
-    case Instruction::ADD_INT_LIT16:
-    case Instruction::MUL_INT_LIT16:
-    case Instruction::DIV_INT_LIT16:
-    case Instruction::REM_INT_LIT16:
-    case Instruction::AND_INT_LIT16:
-    case Instruction::OR_INT_LIT16:
-    case Instruction::XOR_INT_LIT16:
-    case Instruction::ADD_INT_LIT8:
-    case Instruction::RSUB_INT_LIT8:
-    case Instruction::MUL_INT_LIT8:
-    case Instruction::DIV_INT_LIT8:
-    case Instruction::REM_INT_LIT8:
-    case Instruction::AND_INT_LIT8:
-    case Instruction::OR_INT_LIT8:
-    case Instruction::XOR_INT_LIT8:
-    case Instruction::SHL_INT_LIT8:
-    case Instruction::SHR_INT_LIT8:
-    case Instruction::USHR_INT_LIT8: {
-        // Same as res = op + 2 operands, except use vC as operand 2
-        uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]);
-        uint16_t operand2 = gvn_->LookupValue(Instruction::CONST, mir->dalvikInsn.vC, 0, 0);
-        res = gvn_->LookupValue(opcode, operand1, operand2, kNoValue);
-        SetOperandValue(mir->ssa_rep->defs[0], res);
-      }
-      break;
-
-    case Instruction::AGET_OBJECT:
-    case Instruction::AGET:
-    case Instruction::AGET_WIDE:
-    case Instruction::AGET_BOOLEAN:
-    case Instruction::AGET_BYTE:
-    case Instruction::AGET_CHAR:
-    case Instruction::AGET_SHORT:
-      res = HandleAGet(mir, opcode);
-      break;
-
-    case Instruction::APUT_OBJECT:
-      HandlePutObject(mir);
-      FALLTHROUGH_INTENDED;
-    case Instruction::APUT:
-    case Instruction::APUT_WIDE:
-    case Instruction::APUT_BYTE:
-    case Instruction::APUT_BOOLEAN:
-    case Instruction::APUT_SHORT:
-    case Instruction::APUT_CHAR:
-      HandleAPut(mir, opcode);
-      break;
-
-    case Instruction::IGET_OBJECT:
-    case Instruction::IGET:
-    case Instruction::IGET_WIDE:
-    case Instruction::IGET_BOOLEAN:
-    case Instruction::IGET_BYTE:
-    case Instruction::IGET_CHAR:
-    case Instruction::IGET_SHORT:
-      res = HandleIGet(mir, opcode);
-      break;
-
-    case Instruction::IPUT_OBJECT:
-      HandlePutObject(mir);
-      FALLTHROUGH_INTENDED;
-    case Instruction::IPUT:
-    case Instruction::IPUT_WIDE:
-    case Instruction::IPUT_BOOLEAN:
-    case Instruction::IPUT_BYTE:
-    case Instruction::IPUT_CHAR:
-    case Instruction::IPUT_SHORT:
-      HandleIPut(mir, opcode);
-      break;
-
-    case Instruction::SGET_OBJECT:
-    case Instruction::SGET:
-    case Instruction::SGET_WIDE:
-    case Instruction::SGET_BOOLEAN:
-    case Instruction::SGET_BYTE:
-    case Instruction::SGET_CHAR:
-    case Instruction::SGET_SHORT:
-      res = HandleSGet(mir, opcode);
-      break;
-
-    case Instruction::SPUT_OBJECT:
-      HandlePutObject(mir);
-      FALLTHROUGH_INTENDED;
-    case Instruction::SPUT:
-    case Instruction::SPUT_WIDE:
-    case Instruction::SPUT_BOOLEAN:
-    case Instruction::SPUT_BYTE:
-    case Instruction::SPUT_CHAR:
-    case Instruction::SPUT_SHORT:
-      HandleSPut(mir, opcode);
-      break;
-  }
-  return res;
-}
-
-uint16_t LocalValueNumbering::GetEndingVregValueNumberImpl(int v_reg, bool wide) const {
-  const BasicBlock* bb = gvn_->GetBasicBlock(Id());
-  DCHECK(bb != nullptr);
-  int s_reg = bb->data_flow_info->vreg_to_ssa_map_exit[v_reg];
-  if (s_reg == INVALID_SREG) {
-    return kNoValue;
-  }
-  if (gvn_->GetMirGraph()->GetRegLocation(s_reg).wide != wide) {
-    return kNoValue;
-  }
-  if (wide) {
-    int high_s_reg = bb->data_flow_info->vreg_to_ssa_map_exit[v_reg + 1];
-    if (high_s_reg != s_reg + 1) {
-      return kNoValue;  // High word has been overwritten.
-    }
-    return GetSregValueWide(s_reg);
-  } else {
-    return GetSregValue(s_reg);
-  }
-}
-
-uint16_t LocalValueNumbering::GetStartingVregValueNumberImpl(int v_reg, bool wide) const {
-  DCHECK_EQ(gvn_->mode_, GlobalValueNumbering::kModeGvnPostProcessing);
-  DCHECK(gvn_->CanModify());
-  const BasicBlock* bb = gvn_->GetBasicBlock(Id());
-  DCHECK(bb != nullptr);
-  DCHECK_NE(bb->predecessors.size(), 0u);
-  if (bb->predecessors.size() == 1u) {
-    return gvn_->GetLvn(bb->predecessors[0])->GetEndingVregValueNumberImpl(v_reg, wide);
-  }
-  merge_names_.clear();
-  uint16_t value_name = kNoValue;
-  bool same_values = true;
-  for (BasicBlockId pred_id : bb->predecessors) {
-    value_name = gvn_->GetLvn(pred_id)->GetEndingVregValueNumberImpl(v_reg, wide);
-    if (value_name == kNoValue) {
-      return kNoValue;
-    }
-    same_values = same_values && (merge_names_.empty() || value_name == merge_names_.back());
-    merge_names_.push_back(value_name);
-  }
-  if (same_values) {
-    // value_name already contains the result.
-  } else {
-    auto lb = merge_map_.lower_bound(merge_names_);
-    if (lb != merge_map_.end() && !merge_map_.key_comp()(merge_names_, lb->first)) {
-      value_name = lb->second;
-    } else {
-      value_name = kNoValue;  // We never assigned a value name to this set of merged names.
-    }
-  }
-  return value_name;
-}
-
-}    // namespace art
diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h
deleted file mode 100644
index dff5e27..0000000
--- a/compiler/dex/local_value_numbering.h
+++ /dev/null
@@ -1,416 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_LOCAL_VALUE_NUMBERING_H_
-#define ART_COMPILER_DEX_LOCAL_VALUE_NUMBERING_H_
-
-#include <memory>
-
-#include "base/arena_object.h"
-#include "base/logging.h"
-#include "dex_instruction_utils.h"
-#include "global_value_numbering.h"
-
-namespace art {
-
-class DexFile;
-
-// Enable/disable tracking values stored in the FILLED_NEW_ARRAY result.
-static constexpr bool kLocalValueNumberingEnableFilledNewArrayTracking = true;
-
-class LocalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
- private:
-  static constexpr uint16_t kNoValue = GlobalValueNumbering::kNoValue;
-
- public:
-  LocalValueNumbering(GlobalValueNumbering* gvn, BasicBlockId id, ScopedArenaAllocator* allocator);
-
-  BasicBlockId Id() const {
-    return id_;
-  }
-
-  bool Equals(const LocalValueNumbering& other) const;
-
-  bool IsValueNullChecked(uint16_t value_name) const {
-    return null_checked_.find(value_name) != null_checked_.end();
-  }
-
-  bool IsValueDivZeroChecked(uint16_t value_name) const {
-    return div_zero_checked_.find(value_name) != div_zero_checked_.end();
-  }
-
-  uint16_t GetSregValue(uint16_t s_reg) const {
-    DCHECK(!gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
-    return GetSregValueImpl(s_reg, &sreg_value_map_);
-  }
-
-  uint16_t GetSregValueWide(uint16_t s_reg) const {
-    DCHECK(gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
-    return GetSregValueImpl(s_reg, &sreg_wide_value_map_);
-  }
-
-  // Get the starting value number for a given dalvik register.
-  uint16_t GetStartingVregValueNumber(int v_reg) const {
-    return GetStartingVregValueNumberImpl(v_reg, false);
-  }
-
-  // Get the starting value number for a given wide dalvik register.
-  uint16_t GetStartingVregValueNumberWide(int v_reg) const {
-    return GetStartingVregValueNumberImpl(v_reg, true);
-  }
-
-  enum MergeType {
-    kNormalMerge,
-    kCatchMerge,
-    kReturnMerge,  // RETURN or PHI+RETURN. Merge only sreg maps.
-  };
-
-  void MergeOne(const LocalValueNumbering& other, MergeType merge_type);
-  void Merge(MergeType merge_type);  // Merge gvn_->merge_lvns_.
-  void PrepareEntryBlock();
-
-  uint16_t GetValueNumber(MIR* mir);
-
- private:
-  // A set of value names.
-  typedef GlobalValueNumbering::ValueNameSet ValueNameSet;
-
-  // Key is s_reg, value is value name.
-  typedef ScopedArenaSafeMap<uint16_t, uint16_t> SregValueMap;
-
-  uint16_t GetEndingVregValueNumberImpl(int v_reg, bool wide) const;
-  uint16_t GetStartingVregValueNumberImpl(int v_reg, bool wide) const;
-
-  uint16_t GetSregValueImpl(int s_reg, const SregValueMap* map) const {
-    uint16_t res = kNoValue;
-    auto lb = map->find(s_reg);
-    if (lb != map->end()) {
-      res = lb->second;
-    } else {
-      res = gvn_->FindValue(kNoValue, s_reg, kNoValue, kNoValue);
-    }
-    return res;
-  }
-
-  void SetOperandValueImpl(uint16_t s_reg, uint16_t value, SregValueMap* map) {
-    DCHECK_EQ(map->count(s_reg), 0u);
-    map->Put(s_reg, value);
-  }
-
-  uint16_t GetOperandValueImpl(int s_reg, const SregValueMap* map) const {
-    uint16_t res = kNoValue;
-    auto lb = map->find(s_reg);
-    if (lb != map->end()) {
-      res = lb->second;
-    } else {
-      // Using the original value; s_reg refers to an input reg.
-      res = gvn_->LookupValue(kNoValue, s_reg, kNoValue, kNoValue);
-    }
-    return res;
-  }
-
-  void SetOperandValue(uint16_t s_reg, uint16_t value) {
-    DCHECK_EQ(sreg_wide_value_map_.count(s_reg), 0u);
-    DCHECK(!gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
-    SetOperandValueImpl(s_reg, value, &sreg_value_map_);
-  }
-
-  uint16_t GetOperandValue(int s_reg) const {
-    DCHECK_EQ(sreg_wide_value_map_.count(s_reg), 0u);
-    DCHECK(!gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
-    return GetOperandValueImpl(s_reg, &sreg_value_map_);
-  }
-
-  void SetOperandValueWide(uint16_t s_reg, uint16_t value) {
-    DCHECK_EQ(sreg_value_map_.count(s_reg), 0u);
-    DCHECK(gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
-    DCHECK(!gvn_->GetMirGraph()->GetRegLocation(s_reg).high_word);
-    SetOperandValueImpl(s_reg, value, &sreg_wide_value_map_);
-  }
-
-  uint16_t GetOperandValueWide(int s_reg) const {
-    DCHECK_EQ(sreg_value_map_.count(s_reg), 0u);
-    DCHECK(gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
-    DCHECK(!gvn_->GetMirGraph()->GetRegLocation(s_reg).high_word);
-    return GetOperandValueImpl(s_reg, &sreg_wide_value_map_);
-  }
-
-  struct RangeCheckKey {
-    uint16_t array;
-    uint16_t index;
-
-    // NOTE: Can't define this at namespace scope for a private struct.
-    bool operator==(const RangeCheckKey& other) const {
-      return array == other.array && index == other.index;
-    }
-  };
-
-  struct RangeCheckKeyComparator {
-    bool operator()(const RangeCheckKey& lhs, const RangeCheckKey& rhs) const {
-      if (lhs.array != rhs.array) {
-        return lhs.array < rhs.array;
-      }
-      return lhs.index < rhs.index;
-    }
-  };
-
-  typedef ScopedArenaSet<RangeCheckKey, RangeCheckKeyComparator> RangeCheckSet;
-
-  // Maps instance field "location" (derived from base, field_id and type) to value name.
-  typedef ScopedArenaSafeMap<uint16_t, uint16_t> IFieldLocToValueMap;
-
-  // Maps static field id to value name
-  typedef ScopedArenaSafeMap<uint16_t, uint16_t> SFieldToValueMap;
-
-  struct EscapedIFieldClobberKey {
-    uint16_t base;      // Or array.
-    uint16_t type;
-    uint16_t field_id;  // None (kNoValue) for arrays and unresolved instance field stores.
-
-    // NOTE: Can't define this at namespace scope for a private struct.
-    bool operator==(const EscapedIFieldClobberKey& other) const {
-      return base == other.base && type == other.type && field_id == other.field_id;
-    }
-  };
-
-  struct EscapedIFieldClobberKeyComparator {
-    bool operator()(const EscapedIFieldClobberKey& lhs, const EscapedIFieldClobberKey& rhs) const {
-      // Compare base first. This makes sequential iteration respect the order of base.
-      if (lhs.base != rhs.base) {
-        return lhs.base < rhs.base;
-      }
-      // Compare type second. This makes the type-clobber entries (field_id == kNoValue) last
-      // for given base and type and makes it easy to prune unnecessary entries when merging
-      // escaped_ifield_clobber_set_ from multiple LVNs.
-      if (lhs.type != rhs.type) {
-        return lhs.type < rhs.type;
-      }
-      return lhs.field_id < rhs.field_id;
-    }
-  };
-
-  typedef ScopedArenaSet<EscapedIFieldClobberKey, EscapedIFieldClobberKeyComparator>
-      EscapedIFieldClobberSet;
-
-  struct EscapedArrayClobberKey {
-    uint16_t base;
-    uint16_t type;
-
-    // NOTE: Can't define this at namespace scope for a private struct.
-    bool operator==(const EscapedArrayClobberKey& other) const {
-      return base == other.base && type == other.type;
-    }
-  };
-
-  struct EscapedArrayClobberKeyComparator {
-    bool operator()(const EscapedArrayClobberKey& lhs, const EscapedArrayClobberKey& rhs) const {
-      // Compare base first. This makes sequential iteration respect the order of base.
-      if (lhs.base != rhs.base) {
-        return lhs.base < rhs.base;
-      }
-      return lhs.type < rhs.type;
-    }
-  };
-
-  // Clobber set for previously non-aliasing array refs that escaped.
-  typedef ScopedArenaSet<EscapedArrayClobberKey, EscapedArrayClobberKeyComparator>
-      EscapedArrayClobberSet;
-
-  // Known location values for an aliasing set. The set can be tied to one of:
-  //   1. Instance field. The locations are aliasing references used to access the field.
-  //   2. Non-aliasing array reference. The locations are indexes to the array.
-  //   3. Aliasing array type. The locations are (reference, index) pair ids assigned by GVN.
-  // In each case we keep track of the last stored value, if any, and the set of locations
-  // where it was stored. We also keep track of all values known for the current write state
-  // (load_value_map), which can be known either because they have been loaded since the last
-  // store or because they contained the last_stored_value before the store and thus could not
-  // have changed as a result.
-  struct AliasingValues {
-    explicit AliasingValues(LocalValueNumbering* lvn)
-        : memory_version_before_stores(kNoValue),
-          last_stored_value(kNoValue),
-          store_loc_set(std::less<uint16_t>(), lvn->null_checked_.get_allocator()),
-          last_load_memory_version(kNoValue),
-          load_value_map(std::less<uint16_t>(), lvn->null_checked_.get_allocator()) {
-    }
-
-    uint16_t memory_version_before_stores;  // kNoValue if start version for the field.
-    uint16_t last_stored_value;             // Last stored value name, kNoValue if none.
-    ValueNameSet store_loc_set;             // Where was last_stored_value stored.
-
-    // Maps refs (other than stored_to) to currently known values for this field other. On write,
-    // anything that differs from the written value is removed as it may be overwritten.
-    uint16_t last_load_memory_version;    // kNoValue if not known.
-    ScopedArenaSafeMap<uint16_t, uint16_t> load_value_map;
-
-    // NOTE: Can't define this at namespace scope for a private struct.
-    bool operator==(const AliasingValues& other) const {
-      return memory_version_before_stores == other.memory_version_before_stores &&
-          last_load_memory_version == other.last_load_memory_version &&
-          last_stored_value == other.last_stored_value &&
-          store_loc_set == other.store_loc_set &&
-          load_value_map == other.load_value_map;
-    }
-  };
-
-  // Maps instance field id to AliasingValues, locations are object refs.
-  typedef ScopedArenaSafeMap<uint16_t, AliasingValues> AliasingIFieldValuesMap;
-
-  // Maps non-aliasing array reference to AliasingValues, locations are array indexes.
-  typedef ScopedArenaSafeMap<uint16_t, AliasingValues> NonAliasingArrayValuesMap;
-
-  // Maps aliasing array type to AliasingValues, locations are (array, index) pair ids.
-  typedef ScopedArenaSafeMap<uint16_t, AliasingValues> AliasingArrayValuesMap;
-
-  // Helper classes defining versions for updating and merging the AliasingValues maps above.
-  class AliasingIFieldVersions;
-  class NonAliasingArrayVersions;
-  class AliasingArrayVersions;
-
-  template <typename Map>
-  AliasingValues* GetAliasingValues(Map* map, const typename Map::key_type& key);
-
-  template <typename Versions, typename KeyType>
-  void UpdateAliasingValuesLoadVersion(const KeyType& key, AliasingValues* values);
-
-  template <typename Versions, typename Map>
-  static uint16_t AliasingValuesMergeGet(GlobalValueNumbering* gvn,
-                                         const LocalValueNumbering* lvn,
-                                         Map* map, const typename Map::key_type& key,
-                                         uint16_t location);
-
-  template <typename Versions, typename Map>
-  uint16_t HandleAliasingValuesGet(Map* map, const typename Map::key_type& key,
-                                   uint16_t location);
-
-  template <typename Versions, typename Map>
-  bool HandleAliasingValuesPut(Map* map, const typename Map::key_type& key,
-                               uint16_t location, uint16_t value);
-
-  template <typename K>
-  void CopyAliasingValuesMap(ScopedArenaSafeMap<K, AliasingValues>* dest,
-                             const ScopedArenaSafeMap<K, AliasingValues>& src);
-
-  uint16_t MarkNonAliasingNonNull(MIR* mir);
-  bool IsNonAliasing(uint16_t reg) const;
-  bool IsNonAliasingIField(uint16_t reg, uint16_t field_id, uint16_t type) const;
-  bool IsNonAliasingArray(uint16_t reg, uint16_t type) const;
-  void HandleNullCheck(MIR* mir, uint16_t reg);
-  void HandleRangeCheck(MIR* mir, uint16_t array, uint16_t index);
-  void HandleDivZeroCheck(MIR* mir, uint16_t reg);
-  void HandlePutObject(MIR* mir);
-  void HandleEscapingRef(uint16_t base);
-  void HandleInvokeArgs(const MIR* mir, const LocalValueNumbering* mir_lvn);
-  uint16_t HandlePhi(MIR* mir);
-  uint16_t HandleConst(MIR* mir, uint32_t value);
-  uint16_t HandleConstWide(MIR* mir, uint64_t value);
-  uint16_t HandleAGet(MIR* mir, uint16_t opcode);
-  void HandleAPut(MIR* mir, uint16_t opcode);
-  uint16_t HandleIGet(MIR* mir, uint16_t opcode);
-  void HandleIPut(MIR* mir, uint16_t opcode);
-  uint16_t HandleSGet(MIR* mir, uint16_t opcode);
-  void HandleSPut(MIR* mir, uint16_t opcode);
-  void RemoveSFieldsForType(uint16_t type);
-  void HandleInvokeOrClInitOrAcquireOp(MIR* mir);
-
-  bool SameMemoryVersion(const LocalValueNumbering& other) const;
-
-  uint16_t NewMemoryVersion(uint16_t* new_version);
-  void MergeMemoryVersions(bool clobbered_catch);
-
-  void PruneNonAliasingRefsForCatch();
-
-  template <typename Set, Set LocalValueNumbering::* set_ptr>
-  void IntersectSets();
-
-  void CopyLiveSregValues(SregValueMap* dest, const SregValueMap& src);
-
-  // Intersect SSA reg value maps as sets, ignore dead regs.
-  template <SregValueMap LocalValueNumbering::* map_ptr>
-  void IntersectSregValueMaps();
-
-  // Intersect maps as sets. The value type must be equality-comparable.
-  template <typename Map>
-  static void InPlaceIntersectMaps(Map* work_map, const Map& other_map);
-
-  template <typename Set, Set LocalValueNumbering::*set_ptr, void (LocalValueNumbering::*MergeFn)(
-      const typename Set::value_type& entry, typename Set::iterator hint)>
-  void MergeSets();
-
-  void IntersectAliasingValueLocations(AliasingValues* work_values, const AliasingValues* values);
-
-  void MergeEscapedRefs(const ValueNameSet::value_type& entry, ValueNameSet::iterator hint);
-  void MergeEscapedIFieldTypeClobberSets(const EscapedIFieldClobberSet::value_type& entry,
-                                         EscapedIFieldClobberSet::iterator hint);
-  void MergeEscapedIFieldClobberSets(const EscapedIFieldClobberSet::value_type& entry,
-                                     EscapedIFieldClobberSet::iterator hint);
-  void MergeEscapedArrayClobberSets(const EscapedArrayClobberSet::value_type& entry,
-                                    EscapedArrayClobberSet::iterator hint);
-  void MergeSFieldValues(const SFieldToValueMap::value_type& entry,
-                         SFieldToValueMap::iterator hint);
-  void MergeNonAliasingIFieldValues(const IFieldLocToValueMap::value_type& entry,
-                                    IFieldLocToValueMap::iterator hint);
-  void MergeNullChecked();
-  void MergeDivZeroChecked();
-
-  template <typename Map, Map LocalValueNumbering::*map_ptr, typename Versions>
-  void MergeAliasingValues(const typename Map::value_type& entry, typename Map::iterator hint);
-
-  GlobalValueNumbering* gvn_;
-
-  // We're using the block id as a 16-bit operand value for some lookups.
-  static_assert(sizeof(BasicBlockId) == sizeof(uint16_t), "BasicBlockId must be 16 bit");
-  BasicBlockId id_;
-
-  SregValueMap sreg_value_map_;
-  SregValueMap sreg_wide_value_map_;
-
-  SFieldToValueMap sfield_value_map_;
-  IFieldLocToValueMap non_aliasing_ifield_value_map_;
-  AliasingIFieldValuesMap aliasing_ifield_value_map_;
-  NonAliasingArrayValuesMap non_aliasing_array_value_map_;
-  AliasingArrayValuesMap aliasing_array_value_map_;
-
-  // Data for dealing with memory clobbering and store/load aliasing.
-  uint16_t global_memory_version_;
-  uint16_t unresolved_sfield_version_[kDexMemAccessTypeCount];
-  uint16_t unresolved_ifield_version_[kDexMemAccessTypeCount];
-  // Value names of references to objects that cannot be reached through a different value name.
-  ValueNameSet non_aliasing_refs_;
-  // Previously non-aliasing refs that escaped but can still be used for non-aliasing AGET/IGET.
-  ValueNameSet escaped_refs_;
-  // Blacklists for cases where escaped_refs_ can't be used.
-  EscapedIFieldClobberSet escaped_ifield_clobber_set_;
-  EscapedArrayClobberSet escaped_array_clobber_set_;
-
-  // Range check and null check elimination.
-  RangeCheckSet range_checked_;
-  ValueNameSet null_checked_;
-  ValueNameSet div_zero_checked_;
-
-  // Reuse one vector for all merges to avoid leaking too much memory on the ArenaStack.
-  mutable ScopedArenaVector<uint16_t> merge_names_;
-  // Map to identify when different locations merge the same values.
-  ScopedArenaSafeMap<ScopedArenaVector<uint16_t>, uint16_t> merge_map_;
-  // New memory version for merge, kNoValue if all memory versions matched.
-  uint16_t merge_new_memory_version_;
-
-  DISALLOW_COPY_AND_ASSIGN(LocalValueNumbering);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_LOCAL_VALUE_NUMBERING_H_
diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc
deleted file mode 100644
index f98969e..0000000
--- a/compiler/dex/local_value_numbering_test.cc
+++ /dev/null
@@ -1,920 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dex/mir_field_info.h"
-#include "global_value_numbering.h"
-#include "local_value_numbering.h"
-#include "gtest/gtest.h"
-
-namespace art {
-
-class LocalValueNumberingTest : public testing::Test {
- protected:
-  struct IFieldDef {
-    uint16_t field_idx;
-    uintptr_t declaring_dex_file;
-    uint16_t declaring_field_idx;
-    bool is_volatile;
-    DexMemAccessType type;
-  };
-
-  struct SFieldDef {
-    uint16_t field_idx;
-    uintptr_t declaring_dex_file;
-    uint16_t declaring_field_idx;
-    bool is_volatile;
-    DexMemAccessType type;
-  };
-
-  struct MIRDef {
-    static constexpr size_t kMaxSsaDefs = 2;
-    static constexpr size_t kMaxSsaUses = 4;
-
-    Instruction::Code opcode;
-    int64_t value;
-    uint32_t field_info;
-    size_t num_uses;
-    int32_t uses[kMaxSsaUses];
-    size_t num_defs;
-    int32_t defs[kMaxSsaDefs];
-  };
-
-#define DEF_CONST(opcode, reg, value) \
-    { opcode, value, 0u, 0, { }, 1, { reg } }
-#define DEF_CONST_WIDE(opcode, reg, value) \
-    { opcode, value, 0u, 0, { }, 2, { reg, reg + 1 } }
-#define DEF_CONST_STRING(opcode, reg, index) \
-    { opcode, index, 0u, 0, { }, 1, { reg } }
-#define DEF_IGET(opcode, reg, obj, field_info) \
-    { opcode, 0u, field_info, 1, { obj }, 1, { reg } }
-#define DEF_IGET_WIDE(opcode, reg, obj, field_info) \
-    { opcode, 0u, field_info, 1, { obj }, 2, { reg, reg + 1 } }
-#define DEF_IPUT(opcode, reg, obj, field_info) \
-    { opcode, 0u, field_info, 2, { reg, obj }, 0, { } }
-#define DEF_IPUT_WIDE(opcode, reg, obj, field_info) \
-    { opcode, 0u, field_info, 3, { reg, reg + 1, obj }, 0, { } }
-#define DEF_SGET(opcode, reg, field_info) \
-    { opcode, 0u, field_info, 0, { }, 1, { reg } }
-#define DEF_SGET_WIDE(opcode, reg, field_info) \
-    { opcode, 0u, field_info, 0, { }, 2, { reg, reg + 1 } }
-#define DEF_SPUT(opcode, reg, field_info) \
-    { opcode, 0u, field_info, 1, { reg }, 0, { } }
-#define DEF_SPUT_WIDE(opcode, reg, field_info) \
-    { opcode, 0u, field_info, 2, { reg, reg + 1 }, 0, { } }
-#define DEF_AGET(opcode, reg, obj, idx) \
-    { opcode, 0u, 0u, 2, { obj, idx }, 1, { reg } }
-#define DEF_AGET_WIDE(opcode, reg, obj, idx) \
-    { opcode, 0u, 0u, 2, { obj, idx }, 2, { reg, reg + 1 } }
-#define DEF_APUT(opcode, reg, obj, idx) \
-    { opcode, 0u, 0u, 3, { reg, obj, idx }, 0, { } }
-#define DEF_APUT_WIDE(opcode, reg, obj, idx) \
-    { opcode, 0u, 0u, 4, { reg, reg + 1, obj, idx }, 0, { } }
-#define DEF_INVOKE1(opcode, reg) \
-    { opcode, 0u, 0u, 1, { reg }, 0, { } }
-#define DEF_UNIQUE_REF(opcode, reg) \
-    { opcode, 0u, 0u, 0, { }, 1, { reg } }  // CONST_CLASS, CONST_STRING, NEW_ARRAY, ...
-#define DEF_DIV_REM(opcode, result, dividend, divisor) \
-    { opcode, 0u, 0u, 2, { dividend, divisor }, 1, { result } }
-#define DEF_DIV_REM_WIDE(opcode, result, dividend, divisor) \
-    { opcode, 0u, 0u, 4, { dividend, dividend + 1, divisor, divisor + 1 }, 2, { result, result + 1 } }
-
-  void DoPrepareIFields(const IFieldDef* defs, size_t count) {
-    cu_.mir_graph->ifield_lowering_infos_.clear();
-    cu_.mir_graph->ifield_lowering_infos_.reserve(count);
-    for (size_t i = 0u; i != count; ++i) {
-      const IFieldDef* def = &defs[i];
-      MirIFieldLoweringInfo field_info(def->field_idx, def->type, false);
-      if (def->declaring_dex_file != 0u) {
-        field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
-        field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ &= ~(def->is_volatile ? 0u : MirSFieldLoweringInfo::kFlagIsVolatile);
-      }
-      cu_.mir_graph->ifield_lowering_infos_.push_back(field_info);
-    }
-  }
-
-  template <size_t count>
-  void PrepareIFields(const IFieldDef (&defs)[count]) {
-    DoPrepareIFields(defs, count);
-  }
-
-  void DoPrepareSFields(const SFieldDef* defs, size_t count) {
-    cu_.mir_graph->sfield_lowering_infos_.clear();
-    cu_.mir_graph->sfield_lowering_infos_.reserve(count);
-    for (size_t i = 0u; i != count; ++i) {
-      const SFieldDef* def = &defs[i];
-      MirSFieldLoweringInfo field_info(def->field_idx, def->type);
-      // Mark even unresolved fields as initialized.
-      field_info.flags_ |= MirSFieldLoweringInfo::kFlagClassIsInitialized;
-      // NOTE: MirSFieldLoweringInfo::kFlagClassIsInDexCache isn't used by LVN.
-      if (def->declaring_dex_file != 0u) {
-        field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
-        field_info.declaring_field_idx_ = def->declaring_field_idx;
-        field_info.flags_ &= ~(def->is_volatile ? 0u : MirSFieldLoweringInfo::kFlagIsVolatile);
-      }
-      cu_.mir_graph->sfield_lowering_infos_.push_back(field_info);
-    }
-  }
-
-  template <size_t count>
-  void PrepareSFields(const SFieldDef (&defs)[count]) {
-    DoPrepareSFields(defs, count);
-  }
-
-  void DoPrepareMIRs(const MIRDef* defs, size_t count) {
-    mir_count_ = count;
-    mirs_ = cu_.arena.AllocArray<MIR>(count, kArenaAllocMIR);
-    ssa_reps_.resize(count);
-    for (size_t i = 0u; i != count; ++i) {
-      const MIRDef* def = &defs[i];
-      MIR* mir = &mirs_[i];
-      mir->dalvikInsn.opcode = def->opcode;
-      mir->dalvikInsn.vB = static_cast<int32_t>(def->value);
-      mir->dalvikInsn.vB_wide = def->value;
-      if (IsInstructionIGetOrIPut(def->opcode)) {
-        ASSERT_LT(def->field_info, cu_.mir_graph->ifield_lowering_infos_.size());
-        mir->meta.ifield_lowering_info = def->field_info;
-        ASSERT_EQ(cu_.mir_graph->ifield_lowering_infos_[def->field_info].MemAccessType(),
-                  IGetOrIPutMemAccessType(def->opcode));
-      } else if (IsInstructionSGetOrSPut(def->opcode)) {
-        ASSERT_LT(def->field_info, cu_.mir_graph->sfield_lowering_infos_.size());
-        mir->meta.sfield_lowering_info = def->field_info;
-        ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->field_info].MemAccessType(),
-                  SGetOrSPutMemAccessType(def->opcode));
-      }
-      mir->ssa_rep = &ssa_reps_[i];
-      mir->ssa_rep->num_uses = def->num_uses;
-      mir->ssa_rep->uses = const_cast<int32_t*>(def->uses);  // Not modified by LVN.
-      mir->ssa_rep->num_defs = def->num_defs;
-      mir->ssa_rep->defs = const_cast<int32_t*>(def->defs);  // Not modified by LVN.
-      mir->dalvikInsn.opcode = def->opcode;
-      mir->offset = i;  // LVN uses offset only for debug output
-      mir->optimization_flags = 0u;
-
-      if (i != 0u) {
-        mirs_[i - 1u].next = mir;
-      }
-    }
-    mirs_[count - 1u].next = nullptr;
-  }
-
-  template <size_t count>
-  void PrepareMIRs(const MIRDef (&defs)[count]) {
-    DoPrepareMIRs(defs, count);
-  }
-
-  void MakeSFieldUninitialized(uint32_t sfield_index) {
-    CHECK_LT(sfield_index, cu_.mir_graph->sfield_lowering_infos_.size());
-    cu_.mir_graph->sfield_lowering_infos_[sfield_index].flags_ &=
-        ~MirSFieldLoweringInfo::kFlagClassIsInitialized;
-  }
-
-  template <size_t count>
-  void MarkAsWideSRegs(const int32_t (&sregs)[count]) {
-    for (int32_t sreg : sregs) {
-      cu_.mir_graph->reg_location_[sreg].wide = true;
-      cu_.mir_graph->reg_location_[sreg + 1].wide = true;
-      cu_.mir_graph->reg_location_[sreg + 1].high_word = true;
-    }
-  }
-
-  void PerformLVN() {
-    cu_.mir_graph->temp_.gvn.ifield_ids =  GlobalValueNumbering::PrepareGvnFieldIds(
-        allocator_.get(), cu_.mir_graph->ifield_lowering_infos_);
-    cu_.mir_graph->temp_.gvn.sfield_ids =  GlobalValueNumbering::PrepareGvnFieldIds(
-        allocator_.get(), cu_.mir_graph->sfield_lowering_infos_);
-    gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(),
-                                                           GlobalValueNumbering::kModeLvn));
-    lvn_.reset(new (allocator_.get()) LocalValueNumbering(gvn_.get(), 0u, allocator_.get()));
-    value_names_.resize(mir_count_);
-    for (size_t i = 0; i != mir_count_; ++i) {
-      value_names_[i] =  lvn_->GetValueNumber(&mirs_[i]);
-    }
-    EXPECT_TRUE(gvn_->Good());
-  }
-
-  LocalValueNumberingTest()
-      : pool_(),
-        cu_(&pool_, kRuntimeISA, nullptr, nullptr),
-        mir_count_(0u),
-        mirs_(nullptr),
-        ssa_reps_(),
-        allocator_(),
-        gvn_(),
-        lvn_(),
-        value_names_() {
-    cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
-    allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
-    // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that
-    // 0 constants are integral, not references, and the values are all narrow.
-    // Nothing else is used by LVN/GVN. Tests can override the default values as needed.
-    cu_.mir_graph->reg_location_ = static_cast<RegLocation*>(cu_.arena.Alloc(
-        kMaxSsaRegs * sizeof(cu_.mir_graph->reg_location_[0]), kArenaAllocRegAlloc));
-    cu_.mir_graph->num_ssa_regs_ = kMaxSsaRegs;
-  }
-
-  static constexpr size_t kMaxSsaRegs = 16384u;
-
-  ArenaPool pool_;
-  CompilationUnit cu_;
-  size_t mir_count_;
-  MIR* mirs_;
-  std::vector<SSARepresentation> ssa_reps_;
-  std::unique_ptr<ScopedArenaAllocator> allocator_;
-  std::unique_ptr<GlobalValueNumbering> gvn_;
-  std::unique_ptr<LocalValueNumbering> lvn_;
-  std::vector<uint16_t> value_names_;
-};
-
-TEST_F(LocalValueNumberingTest, IGetIGetInvokeIGet) {
-  static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_IGET(Instruction::IGET, 0u, 10u, 0u),
-      DEF_IGET(Instruction::IGET, 1u, 10u, 0u),
-      DEF_INVOKE1(Instruction::INVOKE_VIRTUAL, 11u),
-      DEF_IGET(Instruction::IGET, 2u, 10u, 0u),
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 4u);
-  EXPECT_EQ(value_names_[0], value_names_[1]);
-  EXPECT_NE(value_names_[0], value_names_[3]);
-  EXPECT_EQ(mirs_[0].optimization_flags, 0u);
-  EXPECT_EQ(mirs_[1].optimization_flags, MIR_IGNORE_NULL_CHECK);
-  EXPECT_EQ(mirs_[2].optimization_flags, 0u);
-  EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK);
-}
-
-TEST_F(LocalValueNumberingTest, IGetIPutIGetIGetIGet) {
-  static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false, kDexMemAccessObject },
-      { 2u, 1u, 2u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_IGET(Instruction::IGET_OBJECT, 0u, 10u, 0u),
-      DEF_IPUT(Instruction::IPUT_OBJECT, 1u, 11u, 0u),  // May alias.
-      DEF_IGET(Instruction::IGET_OBJECT, 2u, 10u, 0u),
-      DEF_IGET(Instruction::IGET, 3u,  0u, 1u),
-      DEF_IGET(Instruction::IGET, 4u,  2u, 1u),
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 5u);
-  EXPECT_NE(value_names_[0], value_names_[2]);
-  EXPECT_NE(value_names_[3], value_names_[4]);
-  for (size_t i = 0; i != arraysize(mirs); ++i) {
-    EXPECT_EQ((i == 2u) ? MIR_IGNORE_NULL_CHECK : 0,
-              mirs_[i].optimization_flags) << i;
-  }
-}
-
-TEST_F(LocalValueNumberingTest, UniquePreserve1) {
-  static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 10u),
-      DEF_IGET(Instruction::IGET, 0u, 10u, 0u),
-      DEF_IPUT(Instruction::IPUT, 1u, 11u, 0u),  // No aliasing since 10u is unique.
-      DEF_IGET(Instruction::IGET, 2u, 10u, 0u),
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 4u);
-  EXPECT_EQ(value_names_[1], value_names_[3]);
-  for (size_t i = 0; i != arraysize(mirs); ++i) {
-    EXPECT_EQ((i == 1u || i == 3u) ? MIR_IGNORE_NULL_CHECK : 0,
-              mirs_[i].optimization_flags) << i;
-  }
-}
-
-TEST_F(LocalValueNumberingTest, UniquePreserve2) {
-  static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 11u),
-      DEF_IGET(Instruction::IGET, 0u, 10u, 0u),
-      DEF_IPUT(Instruction::IPUT, 1u, 11u, 0u),  // No aliasing since 11u is unique.
-      DEF_IGET(Instruction::IGET, 2u, 10u, 0u),
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 4u);
-  EXPECT_EQ(value_names_[1], value_names_[3]);
-  for (size_t i = 0; i != arraysize(mirs); ++i) {
-    EXPECT_EQ((i == 2u || i == 3u) ? MIR_IGNORE_NULL_CHECK : 0,
-              mirs_[i].optimization_flags) << i;
-  }
-}
-
-TEST_F(LocalValueNumberingTest, UniquePreserveAndEscape) {
-  static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 10u),
-      DEF_IGET(Instruction::IGET, 0u, 10u, 0u),
-      DEF_INVOKE1(Instruction::INVOKE_VIRTUAL, 11u),  // 10u still unique.
-      DEF_IGET(Instruction::IGET, 2u, 10u, 0u),
-      DEF_INVOKE1(Instruction::INVOKE_VIRTUAL, 10u),  // 10u not unique anymore.
-      DEF_IGET(Instruction::IGET, 3u, 10u, 0u),
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 6u);
-  EXPECT_EQ(value_names_[1], value_names_[3]);
-  EXPECT_NE(value_names_[1], value_names_[5]);
-  for (size_t i = 0; i != arraysize(mirs); ++i) {
-    EXPECT_EQ((i == 1u || i == 3u || i == 4u || i == 5u) ? MIR_IGNORE_NULL_CHECK : 0,
-              mirs_[i].optimization_flags) << i;
-  }
-}
-
-TEST_F(LocalValueNumberingTest, Volatile) {
-  static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-      { 2u, 1u, 2u, true, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_IGET(Instruction::IGET, 0u, 10u, 1u),  // Volatile.
-      DEF_IGET(Instruction::IGET, 1u,  0u, 0u),  // Non-volatile.
-      DEF_IGET(Instruction::IGET, 2u, 10u, 1u),  // Volatile.
-      DEF_IGET(Instruction::IGET, 3u,  2u, 1u),  // Non-volatile.
-      DEF_IGET(Instruction::IGET, 4u,  0u, 0u),  // Non-volatile.
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 5u);
-  EXPECT_NE(value_names_[0], value_names_[2]);  // Volatile has always different value name.
-  EXPECT_NE(value_names_[1], value_names_[3]);  // Used different base because of volatile.
-  EXPECT_NE(value_names_[1], value_names_[4]);  // Not guaranteed to be the same after "acquire".
-
-  for (size_t i = 0; i != arraysize(mirs); ++i) {
-    EXPECT_EQ((i == 2u || i == 4u) ? MIR_IGNORE_NULL_CHECK : 0,
-              mirs_[i].optimization_flags) << i;
-  }
-}
-
-TEST_F(LocalValueNumberingTest, UnresolvedIField) {
-  static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false, kDexMemAccessWord },  // Resolved field #1.
-      { 2u, 1u, 2u, false, kDexMemAccessWide },  // Resolved field #2.
-      { 3u, 0u, 0u, false, kDexMemAccessWord },  // Unresolved field.
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 30u),
-      DEF_IGET(Instruction::IGET, 1u, 30u, 0u),             // Resolved field #1, unique object.
-      DEF_IGET(Instruction::IGET, 2u, 31u, 0u),             // Resolved field #1.
-      DEF_IGET_WIDE(Instruction::IGET_WIDE, 3u, 31u, 1u),   // Resolved field #2.
-      DEF_IGET(Instruction::IGET, 5u, 32u, 2u),             // Unresolved IGET can be "acquire".
-      DEF_IGET(Instruction::IGET, 6u, 30u, 0u),             // Resolved field #1, unique object.
-      DEF_IGET(Instruction::IGET, 7u, 31u, 0u),             // Resolved field #1.
-      DEF_IGET_WIDE(Instruction::IGET_WIDE, 8u, 31u, 1u),   // Resolved field #2.
-      DEF_IPUT(Instruction::IPUT, 10u, 32u, 2u),            // IPUT clobbers field #1 (#2 is wide).
-      DEF_IGET(Instruction::IGET, 11u, 30u, 0u),            // Resolved field #1, unique object.
-      DEF_IGET(Instruction::IGET, 12u, 31u, 0u),            // Resolved field #1, new value name.
-      DEF_IGET_WIDE(Instruction::IGET_WIDE, 13u, 31u, 1u),  // Resolved field #2.
-      DEF_IGET_WIDE(Instruction::IGET_WIDE, 15u, 30u, 1u),  // Resolved field #2, unique object.
-      DEF_IPUT(Instruction::IPUT, 17u, 30u, 2u),            // IPUT clobbers field #1 (#2 is wide).
-      DEF_IGET(Instruction::IGET, 18u, 30u, 0u),            // Resolved field #1, unique object.
-      DEF_IGET_WIDE(Instruction::IGET_WIDE, 19u, 30u, 1u),  // Resolved field #2, unique object.
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 3, 8, 13, 15, 19 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 16u);
-  // Unresolved field is potentially volatile, so we need to adhere to the volatile semantics.
-  EXPECT_EQ(value_names_[1], value_names_[5]);    // Unique object.
-  EXPECT_NE(value_names_[2], value_names_[6]);    // Not guaranteed to be the same after "acquire".
-  EXPECT_NE(value_names_[3], value_names_[7]);    // Not guaranteed to be the same after "acquire".
-  EXPECT_EQ(value_names_[1], value_names_[9]);    // Unique object.
-  EXPECT_NE(value_names_[6], value_names_[10]);   // This aliased with unresolved IPUT.
-  EXPECT_EQ(value_names_[7], value_names_[11]);   // Still the same after "release".
-  EXPECT_EQ(value_names_[12], value_names_[15]);  // Still the same after "release".
-  EXPECT_NE(value_names_[1], value_names_[14]);   // This aliased with unresolved IPUT.
-  EXPECT_EQ(mirs_[0].optimization_flags, 0u);
-  EXPECT_EQ(mirs_[1].optimization_flags, MIR_IGNORE_NULL_CHECK);
-  EXPECT_EQ(mirs_[2].optimization_flags, 0u);
-  EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK);
-  EXPECT_EQ(mirs_[4].optimization_flags, 0u);
-  for (size_t i = 5u; i != mir_count_; ++i) {
-    EXPECT_EQ((i == 1u || i == 3u || i >=5u) ? MIR_IGNORE_NULL_CHECK : 0,
-              mirs_[i].optimization_flags) << i;
-  }
-}
-
-TEST_F(LocalValueNumberingTest, UnresolvedSField) {
-  static const SFieldDef sfields[] = {
-      { 1u, 1u, 1u, false, kDexMemAccessWord },  // Resolved field #1.
-      { 2u, 1u, 2u, false, kDexMemAccessWide },  // Resolved field #2.
-      { 3u, 0u, 0u, false, kDexMemAccessWord },  // Unresolved field.
-  };
-  static const MIRDef mirs[] = {
-      DEF_SGET(Instruction::SGET, 0u, 0u),            // Resolved field #1.
-      DEF_SGET_WIDE(Instruction::SGET_WIDE, 1u, 1u),  // Resolved field #2.
-      DEF_SGET(Instruction::SGET, 3u, 2u),            // Unresolved SGET can be "acquire".
-      DEF_SGET(Instruction::SGET, 4u, 0u),            // Resolved field #1.
-      DEF_SGET_WIDE(Instruction::SGET_WIDE, 5u, 1u),  // Resolved field #2.
-      DEF_SPUT(Instruction::SPUT, 7u, 2u),            // SPUT clobbers field #1 (#2 is wide).
-      DEF_SGET(Instruction::SGET, 8u, 0u),            // Resolved field #1.
-      DEF_SGET_WIDE(Instruction::SGET_WIDE, 9u, 1u),  // Resolved field #2.
-  };
-
-  PrepareSFields(sfields);
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 1, 5, 9 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 8u);
-  // Unresolved field is potentially volatile, so we need to adhere to the volatile semantics.
-  EXPECT_NE(value_names_[0], value_names_[3]);  // Not guaranteed to be the same after "acquire".
-  EXPECT_NE(value_names_[1], value_names_[4]);  // Not guaranteed to be the same after "acquire".
-  EXPECT_NE(value_names_[3], value_names_[6]);  // This aliased with unresolved IPUT.
-  EXPECT_EQ(value_names_[4], value_names_[7]);  // Still the same after "release".
-  for (size_t i = 0u; i != mir_count_; ++i) {
-    EXPECT_EQ(0, mirs_[i].optimization_flags) << i;
-  }
-}
-
-TEST_F(LocalValueNumberingTest, UninitializedSField) {
-  static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false, kDexMemAccessWord },  // Resolved field #1.
-  };
-  static const SFieldDef sfields[] = {
-      { 1u, 1u, 1u, false, kDexMemAccessWord },  // Resolved field #1.
-      { 2u, 1u, 2u, false, kDexMemAccessWord },  // Resolved field #2; uninitialized.
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 200u),
-      DEF_IGET(Instruction::IGET, 1u, 100u, 0u),
-      DEF_IGET(Instruction::IGET, 2u, 200u, 0u),
-      DEF_SGET(Instruction::SGET, 3u, 0u),
-      DEF_SGET(Instruction::SGET, 4u, 1u),            // Can call <clinit>().
-      DEF_IGET(Instruction::IGET, 5u, 100u, 0u),      // Differs from 1u.
-      DEF_IGET(Instruction::IGET, 6u, 200u, 0u),      // Same as 2u.
-      DEF_SGET(Instruction::SGET, 7u, 0u),            // Differs from 3u.
-  };
-
-  PrepareIFields(ifields);
-  PrepareSFields(sfields);
-  MakeSFieldUninitialized(1u);
-  PrepareMIRs(mirs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 8u);
-  EXPECT_NE(value_names_[1], value_names_[5]);
-  EXPECT_EQ(value_names_[2], value_names_[6]);
-  EXPECT_NE(value_names_[3], value_names_[7]);
-}
-
-TEST_F(LocalValueNumberingTest, ConstString) {
-  static const MIRDef mirs[] = {
-      DEF_CONST_STRING(Instruction::CONST_STRING, 0u, 0u),
-      DEF_CONST_STRING(Instruction::CONST_STRING, 1u, 0u),
-      DEF_CONST_STRING(Instruction::CONST_STRING, 2u, 2u),
-      DEF_CONST_STRING(Instruction::CONST_STRING, 3u, 0u),
-      DEF_INVOKE1(Instruction::INVOKE_DIRECT, 2u),
-      DEF_CONST_STRING(Instruction::CONST_STRING, 4u, 2u),
-  };
-
-  PrepareMIRs(mirs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 6u);
-  EXPECT_EQ(value_names_[1], value_names_[0]);
-  EXPECT_NE(value_names_[2], value_names_[0]);
-  EXPECT_EQ(value_names_[3], value_names_[0]);
-  EXPECT_EQ(value_names_[5], value_names_[2]);
-}
-
-TEST_F(LocalValueNumberingTest, SameValueInDifferentMemoryLocations) {
-  static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-      { 2u, 1u, 2u, false, kDexMemAccessWord },
-  };
-  static const SFieldDef sfields[] = {
-      { 3u, 1u, 3u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(Instruction::NEW_ARRAY, 201u),
-      DEF_IGET(Instruction::IGET, 0u, 100u, 0u),
-      DEF_IPUT(Instruction::IPUT, 0u, 100u, 1u),
-      DEF_IPUT(Instruction::IPUT, 0u, 101u, 1u),
-      DEF_APUT(Instruction::APUT, 0u, 200u, 300u),
-      DEF_APUT(Instruction::APUT, 0u, 200u, 301u),
-      DEF_APUT(Instruction::APUT, 0u, 201u, 300u),
-      DEF_APUT(Instruction::APUT, 0u, 201u, 301u),
-      DEF_SPUT(Instruction::SPUT, 0u, 0u),
-      DEF_IGET(Instruction::IGET, 9u, 100u, 0u),
-      DEF_IGET(Instruction::IGET, 10u, 100u, 1u),
-      DEF_IGET(Instruction::IGET, 11u, 101u, 1u),
-      DEF_AGET(Instruction::AGET, 12u, 200u, 300u),
-      DEF_AGET(Instruction::AGET, 13u, 200u, 301u),
-      DEF_AGET(Instruction::AGET, 14u, 201u, 300u),
-      DEF_AGET(Instruction::AGET, 15u, 201u, 301u),
-      DEF_SGET(Instruction::SGET, 16u, 0u),
-  };
-
-  PrepareIFields(ifields);
-  PrepareSFields(sfields);
-  PrepareMIRs(mirs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 17u);
-  for (size_t i = 9; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(value_names_[1], value_names_[i]) << i;
-  }
-  for (size_t i = 0; i != arraysize(mirs); ++i) {
-    int expected_flags =
-        ((i == 2u || (i >= 5u && i <= 7u) || (i >= 9u && i <= 15u)) ? MIR_IGNORE_NULL_CHECK : 0) |
-        ((i >= 12u && i <= 15u) ? MIR_IGNORE_RANGE_CHECK : 0);
-    EXPECT_EQ(expected_flags, mirs_[i].optimization_flags) << i;
-  }
-}
-
-TEST_F(LocalValueNumberingTest, UniqueArrayAliasing) {
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(Instruction::NEW_ARRAY, 20u),
-      DEF_AGET(Instruction::AGET, 1u, 20u, 40u),
-      DEF_APUT(Instruction::APUT, 2u, 20u, 41u),  // May alias with index for sreg 40u.
-      DEF_AGET(Instruction::AGET, 3u, 20u, 40u),
-  };
-
-  PrepareMIRs(mirs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 4u);
-  EXPECT_NE(value_names_[1], value_names_[3]);
-  for (size_t i = 0; i != arraysize(mirs); ++i) {
-    int expected_flags =
-        ((i >= 1u) ? MIR_IGNORE_NULL_CHECK : 0) |
-        ((i == 3u) ? MIR_IGNORE_RANGE_CHECK : 0);
-    EXPECT_EQ(expected_flags, mirs_[i].optimization_flags) << i;
-  }
-}
-
-TEST_F(LocalValueNumberingTest, EscapingRefs) {
-  static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false, kDexMemAccessWord },    // Field #1.
-      { 2u, 1u, 2u, false, kDexMemAccessWord },    // Field #2.
-      { 3u, 1u, 3u, false, kDexMemAccessObject },  // For storing escaping refs.
-      { 4u, 1u, 4u, false, kDexMemAccessWide },    // Wide.
-      { 5u, 0u, 0u, false, kDexMemAccessWord },    // Unresolved field, int.
-      { 6u, 0u, 0u, false, kDexMemAccessWide },    // Unresolved field, wide.
-  };
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 20u),
-      DEF_IGET(Instruction::IGET, 1u, 20u, 0u),
-      DEF_IGET(Instruction::IGET, 2u, 20u, 1u),
-      DEF_IPUT(Instruction::IPUT_OBJECT, 20u, 30u, 2u),      // Ref escapes.
-      DEF_IGET(Instruction::IGET, 4u, 20u, 0u),
-      DEF_IGET(Instruction::IGET, 5u, 20u, 1u),
-      DEF_IPUT(Instruction::IPUT, 6u, 31u, 0u),              // May alias with field #1.
-      DEF_IGET(Instruction::IGET, 7u, 20u, 0u),              // New value.
-      DEF_IGET(Instruction::IGET, 8u, 20u, 1u),              // Still the same.
-      DEF_IPUT_WIDE(Instruction::IPUT_WIDE, 9u, 31u, 3u),    // No aliasing, different type.
-      DEF_IGET(Instruction::IGET, 11u, 20u, 0u),
-      DEF_IGET(Instruction::IGET, 12u, 20u, 1u),
-      DEF_IPUT_WIDE(Instruction::IPUT_WIDE, 13u, 31u, 5u),   // No aliasing, different type.
-      DEF_IGET(Instruction::IGET, 15u, 20u, 0u),
-      DEF_IGET(Instruction::IGET, 16u, 20u, 1u),
-      DEF_IPUT(Instruction::IPUT, 17u, 31u, 4u),             // Aliasing, same type.
-      DEF_IGET(Instruction::IGET, 18u, 20u, 0u),
-      DEF_IGET(Instruction::IGET, 19u, 20u, 1u),
-  };
-
-  PrepareIFields(ifields);
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 9, 13 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 18u);
-  EXPECT_EQ(value_names_[1], value_names_[4]);
-  EXPECT_EQ(value_names_[2], value_names_[5]);
-  EXPECT_NE(value_names_[4], value_names_[7]);  // New value.
-  EXPECT_EQ(value_names_[5], value_names_[8]);
-  EXPECT_EQ(value_names_[7], value_names_[10]);
-  EXPECT_EQ(value_names_[8], value_names_[11]);
-  EXPECT_EQ(value_names_[10], value_names_[13]);
-  EXPECT_EQ(value_names_[11], value_names_[14]);
-  EXPECT_NE(value_names_[13], value_names_[16]);  // New value.
-  EXPECT_NE(value_names_[14], value_names_[17]);  // New value.
-  for (size_t i = 0u; i != mir_count_; ++i) {
-    int expected =
-        ((i != 0u && i != 3u && i != 6u) ? MIR_IGNORE_NULL_CHECK : 0) |
-        ((i == 3u) ? MIR_STORE_NON_NULL_VALUE: 0);
-    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
-  }
-}
-
-TEST_F(LocalValueNumberingTest, EscapingArrayRefs) {
-  static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(Instruction::NEW_ARRAY, 20u),
-      DEF_AGET(Instruction::AGET, 1u, 20u, 40u),
-      DEF_AGET(Instruction::AGET, 2u, 20u, 41u),
-      DEF_APUT(Instruction::APUT_OBJECT, 20u, 30u, 42u),    // Array ref escapes.
-      DEF_AGET(Instruction::AGET, 4u, 20u, 40u),
-      DEF_AGET(Instruction::AGET, 5u, 20u, 41u),
-      DEF_APUT_WIDE(Instruction::APUT_WIDE, 6u, 31u, 43u),  // No aliasing, different type.
-      DEF_AGET(Instruction::AGET, 8u, 20u, 40u),
-      DEF_AGET(Instruction::AGET, 9u, 20u, 41u),
-      DEF_APUT(Instruction::APUT, 10u, 32u, 40u),           // May alias with all elements.
-      DEF_AGET(Instruction::AGET, 11u, 20u, 40u),           // New value (same index name).
-      DEF_AGET(Instruction::AGET, 12u, 20u, 41u),           // New value (different index name).
-  };
-
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 6 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 12u);
-  EXPECT_EQ(value_names_[1], value_names_[4]);
-  EXPECT_EQ(value_names_[2], value_names_[5]);
-  EXPECT_EQ(value_names_[4], value_names_[7]);
-  EXPECT_EQ(value_names_[5], value_names_[8]);
-  EXPECT_NE(value_names_[7], value_names_[10]);  // New value.
-  EXPECT_NE(value_names_[8], value_names_[11]);  // New value.
-  for (size_t i = 0u; i != mir_count_; ++i) {
-    int expected =
-        ((i != 0u && i != 3u && i != 6u && i != 9u) ? MIR_IGNORE_NULL_CHECK : 0u) |
-        ((i >= 4 && i != 6u && i != 9u) ? MIR_IGNORE_RANGE_CHECK : 0u) |
-        ((i == 3u) ? MIR_STORE_NON_NULL_VALUE: 0);
-    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
-  }
-}
-
-TEST_F(LocalValueNumberingTest, StoringSameValueKeepsMemoryVersion) {
-  static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false, kDexMemAccessWord },
-      { 2u, 1u, 2u, false, kDexMemAccessWord },
-  };
-  static const SFieldDef sfields[] = {
-      { 2u, 1u, 2u, false, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_IGET(Instruction::IGET, 0u, 30u, 0u),
-      DEF_IGET(Instruction::IGET, 1u, 31u, 0u),
-      DEF_IPUT(Instruction::IPUT, 1u, 31u, 0u),            // Store the same value.
-      DEF_IGET(Instruction::IGET, 3u, 30u, 0u),
-      DEF_AGET(Instruction::AGET, 4u, 32u, 40u),
-      DEF_AGET(Instruction::AGET, 5u, 33u, 40u),
-      DEF_APUT(Instruction::APUT, 5u, 33u, 40u),           // Store the same value.
-      DEF_AGET(Instruction::AGET, 7u, 32u, 40u),
-      DEF_SGET(Instruction::SGET, 8u, 0u),
-      DEF_SPUT(Instruction::SPUT, 8u, 0u),                 // Store the same value.
-      DEF_SGET(Instruction::SGET, 10u, 0u),
-      DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 50u),      // Test with unique references.
-      { Instruction::FILLED_NEW_ARRAY, 0, 0u, 2, { 12u, 13u }, 0, { } },
-      DEF_UNIQUE_REF(Instruction::MOVE_RESULT_OBJECT, 51u),
-      DEF_IGET(Instruction::IGET, 14u, 50u, 0u),
-      DEF_IGET(Instruction::IGET, 15u, 50u, 1u),
-      DEF_IPUT(Instruction::IPUT, 15u, 50u, 1u),           // Store the same value.
-      DEF_IGET(Instruction::IGET, 17u, 50u, 0u),
-      DEF_AGET(Instruction::AGET, 18u, 51u, 40u),
-      DEF_AGET(Instruction::AGET, 19u, 51u, 41u),
-      DEF_APUT(Instruction::APUT, 19u, 51u, 41u),          // Store the same value.
-      DEF_AGET(Instruction::AGET, 21u, 51u, 40u),
-  };
-
-  PrepareIFields(ifields);
-  PrepareSFields(sfields);
-  PrepareMIRs(mirs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 22u);
-  EXPECT_NE(value_names_[0], value_names_[1]);
-  EXPECT_EQ(value_names_[0], value_names_[3]);
-  EXPECT_NE(value_names_[4], value_names_[5]);
-  EXPECT_EQ(value_names_[4], value_names_[7]);
-  EXPECT_EQ(value_names_[8], value_names_[10]);
-  EXPECT_NE(value_names_[14], value_names_[15]);
-  EXPECT_EQ(value_names_[14], value_names_[17]);
-  EXPECT_NE(value_names_[18], value_names_[19]);
-  EXPECT_EQ(value_names_[18], value_names_[21]);
-  for (size_t i = 0u; i != mir_count_; ++i) {
-    int expected =
-        ((i == 2u || i == 3u || i == 6u || i == 7u || (i >= 14u)) ? MIR_IGNORE_NULL_CHECK : 0u) |
-        ((i == 6u || i == 7u || i >= 20u) ? MIR_IGNORE_RANGE_CHECK : 0u);
-    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
-  }
-}
-
-TEST_F(LocalValueNumberingTest, FilledNewArrayTracking) {
-  if (!kLocalValueNumberingEnableFilledNewArrayTracking) {
-    // Feature disabled.
-    return;
-  }
-  static const MIRDef mirs[] = {
-      DEF_CONST(Instruction::CONST, 0u, 100),
-      DEF_CONST(Instruction::CONST, 1u, 200),
-      { Instruction::FILLED_NEW_ARRAY, 0, 0u, 2, { 0u, 1u }, 0, { } },
-      DEF_UNIQUE_REF(Instruction::MOVE_RESULT_OBJECT, 10u),
-      DEF_CONST(Instruction::CONST, 20u, 0),
-      DEF_CONST(Instruction::CONST, 21u, 1),
-      DEF_AGET(Instruction::AGET, 6u, 10u, 20u),
-      DEF_AGET(Instruction::AGET, 7u, 10u, 21u),
-  };
-
-  PrepareMIRs(mirs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 8u);
-  EXPECT_EQ(value_names_[0], value_names_[6]);
-  EXPECT_EQ(value_names_[1], value_names_[7]);
-  for (size_t i = 0u; i != mir_count_; ++i) {
-    int expected = (i == 6u || i == 7u) ? (MIR_IGNORE_NULL_CHECK | MIR_IGNORE_RANGE_CHECK) : 0u;
-    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
-  }
-}
-
-TEST_F(LocalValueNumberingTest, ClInitOnSget) {
-  static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, false, kDexMemAccessObject },
-      { 1u, 2u, 1u, false, kDexMemAccessObject },
-  };
-  static const MIRDef mirs[] = {
-      DEF_SGET(Instruction::SGET_OBJECT, 0u, 0u),
-      DEF_AGET(Instruction::AGET, 1u, 0u, 100u),
-      DEF_SGET(Instruction::SGET_OBJECT, 2u, 1u),
-      DEF_SGET(Instruction::SGET_OBJECT, 3u, 0u),
-      DEF_AGET(Instruction::AGET, 4u, 3u, 100u),
-  };
-
-  PrepareSFields(sfields);
-  MakeSFieldUninitialized(1u);
-  PrepareMIRs(mirs);
-  PerformLVN();
-  ASSERT_EQ(value_names_.size(), 5u);
-  EXPECT_NE(value_names_[0], value_names_[3]);
-}
-
-TEST_F(LocalValueNumberingTest, DivZeroCheck) {
-  static const MIRDef mirs[] = {
-      DEF_DIV_REM(Instruction::DIV_INT, 1u, 10u, 20u),
-      DEF_DIV_REM(Instruction::DIV_INT, 2u, 20u, 20u),
-      DEF_DIV_REM(Instruction::DIV_INT_2ADDR, 3u, 10u, 1u),
-      DEF_DIV_REM(Instruction::REM_INT, 4u, 30u, 20u),
-      DEF_DIV_REM_WIDE(Instruction::REM_LONG, 5u, 12u, 14u),
-      DEF_DIV_REM_WIDE(Instruction::DIV_LONG_2ADDR, 7u, 16u, 14u),
-  };
-
-  static const bool expected_ignore_div_zero_check[] = {
-      false, true, false, true, false, true,
-  };
-
-  PrepareMIRs(mirs);
-  static const int32_t wide_sregs[] = { 5, 7, 12, 14, 16 };
-  MarkAsWideSRegs(wide_sregs);
-  PerformLVN();
-  for (size_t i = 0u; i != mir_count_; ++i) {
-    int expected = expected_ignore_div_zero_check[i] ? MIR_IGNORE_DIV_ZERO_CHECK : 0u;
-    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
-  }
-}
-
-static constexpr int64_t shift_minus_1(size_t by) {
-  return static_cast<int64_t>(static_cast<uint64_t>(INT64_C(-1)) << by);
-}
-
-TEST_F(LocalValueNumberingTest, ConstWide) {
-  static const MIRDef mirs[] = {
-      // Core reg constants.
-      DEF_CONST(Instruction::CONST_WIDE_16, 0u, 0),
-      DEF_CONST(Instruction::CONST_WIDE_16, 2u, 1),
-      DEF_CONST(Instruction::CONST_WIDE_16, 4u, -1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 6u, 1 << 16),
-      DEF_CONST(Instruction::CONST_WIDE_32, 8u, shift_minus_1(16)),
-      DEF_CONST(Instruction::CONST_WIDE_32, 10u, (1 << 16) + 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 12u, (1 << 16) - 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 14u, -(1 << 16) + 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 16u, -(1 << 16) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 18u, INT64_C(1) << 32),
-      DEF_CONST(Instruction::CONST_WIDE, 20u, shift_minus_1(32)),
-      DEF_CONST(Instruction::CONST_WIDE, 22u, (INT64_C(1) << 32) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 24u, (INT64_C(1) << 32) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 26u, shift_minus_1(32) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 28u, shift_minus_1(32) - 1),
-      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 30u, 1),       // Effectively 1 << 48.
-      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 32u, 0xffff),  // Effectively -1 << 48.
-      DEF_CONST(Instruction::CONST_WIDE, 34u, (INT64_C(1) << 48) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 36u, (INT64_C(1) << 48) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 38u, shift_minus_1(48) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 40u, shift_minus_1(48) - 1),
-      // FP reg constants.
-      DEF_CONST(Instruction::CONST_WIDE_16, 42u, 0),
-      DEF_CONST(Instruction::CONST_WIDE_16, 44u, 1),
-      DEF_CONST(Instruction::CONST_WIDE_16, 46u, -1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 48u, 1 << 16),
-      DEF_CONST(Instruction::CONST_WIDE_32, 50u, shift_minus_1(16)),
-      DEF_CONST(Instruction::CONST_WIDE_32, 52u, (1 << 16) + 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 54u, (1 << 16) - 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 56u, -(1 << 16) + 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 58u, -(1 << 16) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 60u, INT64_C(1) << 32),
-      DEF_CONST(Instruction::CONST_WIDE, 62u, shift_minus_1(32)),
-      DEF_CONST(Instruction::CONST_WIDE, 64u, (INT64_C(1) << 32) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 66u, (INT64_C(1) << 32) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 68u, shift_minus_1(32) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 70u, shift_minus_1(32) - 1),
-      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 72u, 1),       // Effectively 1 << 48.
-      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 74u, 0xffff),  // Effectively -1 << 48.
-      DEF_CONST(Instruction::CONST_WIDE, 76u, (INT64_C(1) << 48) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 78u, (INT64_C(1) << 48) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 80u, shift_minus_1(48) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 82u, shift_minus_1(48) - 1),
-  };
-
-  PrepareMIRs(mirs);
-  for (size_t i = 0; i != arraysize(mirs); ++i) {
-    const int32_t wide_sregs[] = { mirs_[i].ssa_rep->defs[0] };
-    MarkAsWideSRegs(wide_sregs);
-  }
-  for (size_t i = arraysize(mirs) / 2u; i != arraysize(mirs); ++i) {
-    cu_.mir_graph->reg_location_[mirs_[i].ssa_rep->defs[0]].fp = true;
-  }
-  PerformLVN();
-  for (size_t i = 0u; i != mir_count_; ++i) {
-    for (size_t j = i + 1u; j != mir_count_; ++j) {
-      EXPECT_NE(value_names_[i], value_names_[j]) << i << " " << j;
-    }
-  }
-}
-
-TEST_F(LocalValueNumberingTest, Const) {
-  static const MIRDef mirs[] = {
-      // Core reg constants.
-      DEF_CONST(Instruction::CONST_4, 0u, 0),
-      DEF_CONST(Instruction::CONST_4, 1u, 1),
-      DEF_CONST(Instruction::CONST_4, 2u, -1),
-      DEF_CONST(Instruction::CONST_16, 3u, 1 << 4),
-      DEF_CONST(Instruction::CONST_16, 4u, shift_minus_1(4)),
-      DEF_CONST(Instruction::CONST_16, 5u, (1 << 4) + 1),
-      DEF_CONST(Instruction::CONST_16, 6u, (1 << 4) - 1),
-      DEF_CONST(Instruction::CONST_16, 7u, -(1 << 4) + 1),
-      DEF_CONST(Instruction::CONST_16, 8u, -(1 << 4) - 1),
-      DEF_CONST(Instruction::CONST_HIGH16, 9u, 1),       // Effectively 1 << 16.
-      DEF_CONST(Instruction::CONST_HIGH16, 10u, 0xffff),  // Effectively -1 << 16.
-      DEF_CONST(Instruction::CONST, 11u, (1 << 16) + 1),
-      DEF_CONST(Instruction::CONST, 12u, (1 << 16) - 1),
-      DEF_CONST(Instruction::CONST, 13u, shift_minus_1(16) + 1),
-      DEF_CONST(Instruction::CONST, 14u, shift_minus_1(16) - 1),
-      // FP reg constants.
-      DEF_CONST(Instruction::CONST_4, 15u, 0),
-      DEF_CONST(Instruction::CONST_4, 16u, 1),
-      DEF_CONST(Instruction::CONST_4, 17u, -1),
-      DEF_CONST(Instruction::CONST_16, 18u, 1 << 4),
-      DEF_CONST(Instruction::CONST_16, 19u, shift_minus_1(4)),
-      DEF_CONST(Instruction::CONST_16, 20u, (1 << 4) + 1),
-      DEF_CONST(Instruction::CONST_16, 21u, (1 << 4) - 1),
-      DEF_CONST(Instruction::CONST_16, 22u, -(1 << 4) + 1),
-      DEF_CONST(Instruction::CONST_16, 23u, -(1 << 4) - 1),
-      DEF_CONST(Instruction::CONST_HIGH16, 24u, 1),       // Effectively 1 << 16.
-      DEF_CONST(Instruction::CONST_HIGH16, 25u, 0xffff),  // Effectively -1 << 16.
-      DEF_CONST(Instruction::CONST, 26u, (1 << 16) + 1),
-      DEF_CONST(Instruction::CONST, 27u, (1 << 16) - 1),
-      DEF_CONST(Instruction::CONST, 28u, shift_minus_1(16) + 1),
-      DEF_CONST(Instruction::CONST, 29u, shift_minus_1(16) - 1),
-      // null reference constant.
-      DEF_CONST(Instruction::CONST_4, 30u, 0),
-  };
-
-  PrepareMIRs(mirs);
-  static_assert((arraysize(mirs) & 1) != 0, "missing null or unmatched fp/core");
-  cu_.mir_graph->reg_location_[arraysize(mirs) - 1].ref = true;
-  for (size_t i = arraysize(mirs) / 2u; i != arraysize(mirs) - 1; ++i) {
-    cu_.mir_graph->reg_location_[mirs_[i].ssa_rep->defs[0]].fp = true;
-  }
-  PerformLVN();
-  for (size_t i = 0u; i != mir_count_; ++i) {
-    for (size_t j = i + 1u; j != mir_count_; ++j) {
-      EXPECT_NE(value_names_[i], value_names_[j]) << i << " " << j;
-    }
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
deleted file mode 100644
index 39f8ee8..0000000
--- a/compiler/dex/mir_analysis.cc
+++ /dev/null
@@ -1,1437 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <algorithm>
-#include <memory>
-
-#include "base/logging.h"
-#include "base/scoped_arena_containers.h"
-#include "dataflow_iterator-inl.h"
-#include "compiler_ir.h"
-#include "dex_flags.h"
-#include "dex_instruction-inl.h"
-#include "dex/mir_field_info.h"
-#include "dex/verified_method.h"
-#include "dex/quick/dex_file_method_inliner.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "driver/compiler_driver.h"
-#include "driver/compiler_options.h"
-#include "driver/dex_compilation_unit.h"
-#include "scoped_thread_state_change.h"
-#include "utils.h"
-
-namespace art {
-
-enum InstructionAnalysisAttributeOps : uint8_t {
-  kUninterestingOp = 0,
-  kArithmeticOp,
-  kFpOp,
-  kSingleOp,
-  kDoubleOp,
-  kIntOp,
-  kLongOp,
-  kBranchOp,
-  kInvokeOp,
-  kArrayOp,
-  kHeavyweightOp,
-  kSimpleConstOp,
-  kMoveOp,
-  kSwitch
-};
-
-enum InstructionAnalysisAttributeMasks : uint16_t {
-  kAnNone = 1 << kUninterestingOp,
-  kAnMath = 1 << kArithmeticOp,
-  kAnFp = 1 << kFpOp,
-  kAnLong = 1 << kLongOp,
-  kAnInt = 1 << kIntOp,
-  kAnSingle = 1 << kSingleOp,
-  kAnDouble = 1 << kDoubleOp,
-  kAnFloatMath = 1 << kFpOp,
-  kAnBranch = 1 << kBranchOp,
-  kAnInvoke = 1 << kInvokeOp,
-  kAnArrayOp = 1 << kArrayOp,
-  kAnHeavyWeight = 1 << kHeavyweightOp,
-  kAnSimpleConst = 1 << kSimpleConstOp,
-  kAnMove = 1 << kMoveOp,
-  kAnSwitch = 1 << kSwitch,
-  kAnComputational = kAnMath | kAnArrayOp | kAnMove | kAnSimpleConst,
-};
-
-// Instruction characteristics used to statically identify computation-intensive methods.
-static const uint16_t kAnalysisAttributes[kMirOpLast] = {
-  // 00 NOP
-  kAnNone,
-
-  // 01 MOVE vA, vB
-  kAnMove,
-
-  // 02 MOVE_FROM16 vAA, vBBBB
-  kAnMove,
-
-  // 03 MOVE_16 vAAAA, vBBBB
-  kAnMove,
-
-  // 04 MOVE_WIDE vA, vB
-  kAnMove,
-
-  // 05 MOVE_WIDE_FROM16 vAA, vBBBB
-  kAnMove,
-
-  // 06 MOVE_WIDE_16 vAAAA, vBBBB
-  kAnMove,
-
-  // 07 MOVE_OBJECT vA, vB
-  kAnMove,
-
-  // 08 MOVE_OBJECT_FROM16 vAA, vBBBB
-  kAnMove,
-
-  // 09 MOVE_OBJECT_16 vAAAA, vBBBB
-  kAnMove,
-
-  // 0A MOVE_RESULT vAA
-  kAnMove,
-
-  // 0B MOVE_RESULT_WIDE vAA
-  kAnMove,
-
-  // 0C MOVE_RESULT_OBJECT vAA
-  kAnMove,
-
-  // 0D MOVE_EXCEPTION vAA
-  kAnMove,
-
-  // 0E RETURN_VOID
-  kAnBranch,
-
-  // 0F RETURN vAA
-  kAnBranch,
-
-  // 10 RETURN_WIDE vAA
-  kAnBranch,
-
-  // 11 RETURN_OBJECT vAA
-  kAnBranch,
-
-  // 12 CONST_4 vA, #+B
-  kAnSimpleConst,
-
-  // 13 CONST_16 vAA, #+BBBB
-  kAnSimpleConst,
-
-  // 14 CONST vAA, #+BBBBBBBB
-  kAnSimpleConst,
-
-  // 15 CONST_HIGH16 VAA, #+BBBB0000
-  kAnSimpleConst,
-
-  // 16 CONST_WIDE_16 vAA, #+BBBB
-  kAnSimpleConst,
-
-  // 17 CONST_WIDE_32 vAA, #+BBBBBBBB
-  kAnSimpleConst,
-
-  // 18 CONST_WIDE vAA, #+BBBBBBBBBBBBBBBB
-  kAnSimpleConst,
-
-  // 19 CONST_WIDE_HIGH16 vAA, #+BBBB000000000000
-  kAnSimpleConst,
-
-  // 1A CONST_STRING vAA, string@BBBB
-  kAnNone,
-
-  // 1B CONST_STRING_JUMBO vAA, string@BBBBBBBB
-  kAnNone,
-
-  // 1C CONST_CLASS vAA, type@BBBB
-  kAnNone,
-
-  // 1D MONITOR_ENTER vAA
-  kAnNone,
-
-  // 1E MONITOR_EXIT vAA
-  kAnNone,
-
-  // 1F CHK_CAST vAA, type@BBBB
-  kAnNone,
-
-  // 20 INSTANCE_OF vA, vB, type@CCCC
-  kAnNone,
-
-  // 21 ARRAY_LENGTH vA, vB
-  kAnArrayOp,
-
-  // 22 NEW_INSTANCE vAA, type@BBBB
-  kAnHeavyWeight,
-
-  // 23 NEW_ARRAY vA, vB, type@CCCC
-  kAnHeavyWeight,
-
-  // 24 FILLED_NEW_ARRAY {vD, vE, vF, vG, vA}
-  kAnHeavyWeight,
-
-  // 25 FILLED_NEW_ARRAY_RANGE {vCCCC .. vNNNN}, type@BBBB
-  kAnHeavyWeight,
-
-  // 26 FILL_ARRAY_DATA vAA, +BBBBBBBB
-  kAnNone,
-
-  // 27 THROW vAA
-  kAnHeavyWeight | kAnBranch,
-
-  // 28 GOTO
-  kAnBranch,
-
-  // 29 GOTO_16
-  kAnBranch,
-
-  // 2A GOTO_32
-  kAnBranch,
-
-  // 2B PACKED_SWITCH vAA, +BBBBBBBB
-  kAnSwitch,
-
-  // 2C SPARSE_SWITCH vAA, +BBBBBBBB
-  kAnSwitch,
-
-  // 2D CMPL_FLOAT vAA, vBB, vCC
-  kAnMath | kAnFp | kAnSingle,
-
-  // 2E CMPG_FLOAT vAA, vBB, vCC
-  kAnMath | kAnFp | kAnSingle,
-
-  // 2F CMPL_DOUBLE vAA, vBB, vCC
-  kAnMath | kAnFp | kAnDouble,
-
-  // 30 CMPG_DOUBLE vAA, vBB, vCC
-  kAnMath | kAnFp | kAnDouble,
-
-  // 31 CMP_LONG vAA, vBB, vCC
-  kAnMath | kAnLong,
-
-  // 32 IF_EQ vA, vB, +CCCC
-  kAnMath | kAnBranch | kAnInt,
-
-  // 33 IF_NE vA, vB, +CCCC
-  kAnMath | kAnBranch | kAnInt,
-
-  // 34 IF_LT vA, vB, +CCCC
-  kAnMath | kAnBranch | kAnInt,
-
-  // 35 IF_GE vA, vB, +CCCC
-  kAnMath | kAnBranch | kAnInt,
-
-  // 36 IF_GT vA, vB, +CCCC
-  kAnMath | kAnBranch | kAnInt,
-
-  // 37 IF_LE vA, vB, +CCCC
-  kAnMath | kAnBranch | kAnInt,
-
-  // 38 IF_EQZ vAA, +BBBB
-  kAnMath | kAnBranch | kAnInt,
-
-  // 39 IF_NEZ vAA, +BBBB
-  kAnMath | kAnBranch | kAnInt,
-
-  // 3A IF_LTZ vAA, +BBBB
-  kAnMath | kAnBranch | kAnInt,
-
-  // 3B IF_GEZ vAA, +BBBB
-  kAnMath | kAnBranch | kAnInt,
-
-  // 3C IF_GTZ vAA, +BBBB
-  kAnMath | kAnBranch | kAnInt,
-
-  // 3D IF_LEZ vAA, +BBBB
-  kAnMath | kAnBranch | kAnInt,
-
-  // 3E UNUSED_3E
-  kAnNone,
-
-  // 3F UNUSED_3F
-  kAnNone,
-
-  // 40 UNUSED_40
-  kAnNone,
-
-  // 41 UNUSED_41
-  kAnNone,
-
-  // 42 UNUSED_42
-  kAnNone,
-
-  // 43 UNUSED_43
-  kAnNone,
-
-  // 44 AGET vAA, vBB, vCC
-  kAnArrayOp,
-
-  // 45 AGET_WIDE vAA, vBB, vCC
-  kAnArrayOp,
-
-  // 46 AGET_OBJECT vAA, vBB, vCC
-  kAnArrayOp,
-
-  // 47 AGET_BOOLEAN vAA, vBB, vCC
-  kAnArrayOp,
-
-  // 48 AGET_BYTE vAA, vBB, vCC
-  kAnArrayOp,
-
-  // 49 AGET_CHAR vAA, vBB, vCC
-  kAnArrayOp,
-
-  // 4A AGET_SHORT vAA, vBB, vCC
-  kAnArrayOp,
-
-  // 4B APUT vAA, vBB, vCC
-  kAnArrayOp,
-
-  // 4C APUT_WIDE vAA, vBB, vCC
-  kAnArrayOp,
-
-  // 4D APUT_OBJECT vAA, vBB, vCC
-  kAnArrayOp,
-
-  // 4E APUT_BOOLEAN vAA, vBB, vCC
-  kAnArrayOp,
-
-  // 4F APUT_BYTE vAA, vBB, vCC
-  kAnArrayOp,
-
-  // 50 APUT_CHAR vAA, vBB, vCC
-  kAnArrayOp,
-
-  // 51 APUT_SHORT vAA, vBB, vCC
-  kAnArrayOp,
-
-  // 52 IGET vA, vB, field@CCCC
-  kAnNone,
-
-  // 53 IGET_WIDE vA, vB, field@CCCC
-  kAnNone,
-
-  // 54 IGET_OBJECT vA, vB, field@CCCC
-  kAnNone,
-
-  // 55 IGET_BOOLEAN vA, vB, field@CCCC
-  kAnNone,
-
-  // 56 IGET_BYTE vA, vB, field@CCCC
-  kAnNone,
-
-  // 57 IGET_CHAR vA, vB, field@CCCC
-  kAnNone,
-
-  // 58 IGET_SHORT vA, vB, field@CCCC
-  kAnNone,
-
-  // 59 IPUT vA, vB, field@CCCC
-  kAnNone,
-
-  // 5A IPUT_WIDE vA, vB, field@CCCC
-  kAnNone,
-
-  // 5B IPUT_OBJECT vA, vB, field@CCCC
-  kAnNone,
-
-  // 5C IPUT_BOOLEAN vA, vB, field@CCCC
-  kAnNone,
-
-  // 5D IPUT_BYTE vA, vB, field@CCCC
-  kAnNone,
-
-  // 5E IPUT_CHAR vA, vB, field@CCCC
-  kAnNone,
-
-  // 5F IPUT_SHORT vA, vB, field@CCCC
-  kAnNone,
-
-  // 60 SGET vAA, field@BBBB
-  kAnNone,
-
-  // 61 SGET_WIDE vAA, field@BBBB
-  kAnNone,
-
-  // 62 SGET_OBJECT vAA, field@BBBB
-  kAnNone,
-
-  // 63 SGET_BOOLEAN vAA, field@BBBB
-  kAnNone,
-
-  // 64 SGET_BYTE vAA, field@BBBB
-  kAnNone,
-
-  // 65 SGET_CHAR vAA, field@BBBB
-  kAnNone,
-
-  // 66 SGET_SHORT vAA, field@BBBB
-  kAnNone,
-
-  // 67 SPUT vAA, field@BBBB
-  kAnNone,
-
-  // 68 SPUT_WIDE vAA, field@BBBB
-  kAnNone,
-
-  // 69 SPUT_OBJECT vAA, field@BBBB
-  kAnNone,
-
-  // 6A SPUT_BOOLEAN vAA, field@BBBB
-  kAnNone,
-
-  // 6B SPUT_BYTE vAA, field@BBBB
-  kAnNone,
-
-  // 6C SPUT_CHAR vAA, field@BBBB
-  kAnNone,
-
-  // 6D SPUT_SHORT vAA, field@BBBB
-  kAnNone,
-
-  // 6E INVOKE_VIRTUAL {vD, vE, vF, vG, vA}
-  kAnInvoke | kAnHeavyWeight,
-
-  // 6F INVOKE_SUPER {vD, vE, vF, vG, vA}
-  kAnInvoke | kAnHeavyWeight,
-
-  // 70 INVOKE_DIRECT {vD, vE, vF, vG, vA}
-  kAnInvoke | kAnHeavyWeight,
-
-  // 71 INVOKE_STATIC {vD, vE, vF, vG, vA}
-  kAnInvoke | kAnHeavyWeight,
-
-  // 72 INVOKE_INTERFACE {vD, vE, vF, vG, vA}
-  kAnInvoke | kAnHeavyWeight,
-
-  // 73 RETURN_VOID_NO_BARRIER
-  kAnBranch,
-
-  // 74 INVOKE_VIRTUAL_RANGE {vCCCC .. vNNNN}
-  kAnInvoke | kAnHeavyWeight,
-
-  // 75 INVOKE_SUPER_RANGE {vCCCC .. vNNNN}
-  kAnInvoke | kAnHeavyWeight,
-
-  // 76 INVOKE_DIRECT_RANGE {vCCCC .. vNNNN}
-  kAnInvoke | kAnHeavyWeight,
-
-  // 77 INVOKE_STATIC_RANGE {vCCCC .. vNNNN}
-  kAnInvoke | kAnHeavyWeight,
-
-  // 78 INVOKE_INTERFACE_RANGE {vCCCC .. vNNNN}
-  kAnInvoke | kAnHeavyWeight,
-
-  // 79 UNUSED_79
-  kAnNone,
-
-  // 7A UNUSED_7A
-  kAnNone,
-
-  // 7B NEG_INT vA, vB
-  kAnMath | kAnInt,
-
-  // 7C NOT_INT vA, vB
-  kAnMath | kAnInt,
-
-  // 7D NEG_LONG vA, vB
-  kAnMath | kAnLong,
-
-  // 7E NOT_LONG vA, vB
-  kAnMath | kAnLong,
-
-  // 7F NEG_FLOAT vA, vB
-  kAnMath | kAnFp | kAnSingle,
-
-  // 80 NEG_DOUBLE vA, vB
-  kAnMath | kAnFp | kAnDouble,
-
-  // 81 INT_TO_LONG vA, vB
-  kAnMath | kAnInt | kAnLong,
-
-  // 82 INT_TO_FLOAT vA, vB
-  kAnMath | kAnFp | kAnInt | kAnSingle,
-
-  // 83 INT_TO_DOUBLE vA, vB
-  kAnMath | kAnFp | kAnInt | kAnDouble,
-
-  // 84 LONG_TO_INT vA, vB
-  kAnMath | kAnInt | kAnLong,
-
-  // 85 LONG_TO_FLOAT vA, vB
-  kAnMath | kAnFp | kAnLong | kAnSingle,
-
-  // 86 LONG_TO_DOUBLE vA, vB
-  kAnMath | kAnFp | kAnLong | kAnDouble,
-
-  // 87 FLOAT_TO_INT vA, vB
-  kAnMath | kAnFp | kAnInt | kAnSingle,
-
-  // 88 FLOAT_TO_LONG vA, vB
-  kAnMath | kAnFp | kAnLong | kAnSingle,
-
-  // 89 FLOAT_TO_DOUBLE vA, vB
-  kAnMath | kAnFp | kAnSingle | kAnDouble,
-
-  // 8A DOUBLE_TO_INT vA, vB
-  kAnMath | kAnFp | kAnInt | kAnDouble,
-
-  // 8B DOUBLE_TO_LONG vA, vB
-  kAnMath | kAnFp | kAnLong | kAnDouble,
-
-  // 8C DOUBLE_TO_FLOAT vA, vB
-  kAnMath | kAnFp | kAnSingle | kAnDouble,
-
-  // 8D INT_TO_BYTE vA, vB
-  kAnMath | kAnInt,
-
-  // 8E INT_TO_CHAR vA, vB
-  kAnMath | kAnInt,
-
-  // 8F INT_TO_SHORT vA, vB
-  kAnMath | kAnInt,
-
-  // 90 ADD_INT vAA, vBB, vCC
-  kAnMath | kAnInt,
-
-  // 91 SUB_INT vAA, vBB, vCC
-  kAnMath | kAnInt,
-
-  // 92 MUL_INT vAA, vBB, vCC
-  kAnMath | kAnInt,
-
-  // 93 DIV_INT vAA, vBB, vCC
-  kAnMath | kAnInt,
-
-  // 94 REM_INT vAA, vBB, vCC
-  kAnMath | kAnInt,
-
-  // 95 AND_INT vAA, vBB, vCC
-  kAnMath | kAnInt,
-
-  // 96 OR_INT vAA, vBB, vCC
-  kAnMath | kAnInt,
-
-  // 97 XOR_INT vAA, vBB, vCC
-  kAnMath | kAnInt,
-
-  // 98 SHL_INT vAA, vBB, vCC
-  kAnMath | kAnInt,
-
-  // 99 SHR_INT vAA, vBB, vCC
-  kAnMath | kAnInt,
-
-  // 9A USHR_INT vAA, vBB, vCC
-  kAnMath | kAnInt,
-
-  // 9B ADD_LONG vAA, vBB, vCC
-  kAnMath | kAnLong,
-
-  // 9C SUB_LONG vAA, vBB, vCC
-  kAnMath | kAnLong,
-
-  // 9D MUL_LONG vAA, vBB, vCC
-  kAnMath | kAnLong,
-
-  // 9E DIV_LONG vAA, vBB, vCC
-  kAnMath | kAnLong,
-
-  // 9F REM_LONG vAA, vBB, vCC
-  kAnMath | kAnLong,
-
-  // A0 AND_LONG vAA, vBB, vCC
-  kAnMath | kAnLong,
-
-  // A1 OR_LONG vAA, vBB, vCC
-  kAnMath | kAnLong,
-
-  // A2 XOR_LONG vAA, vBB, vCC
-  kAnMath | kAnLong,
-
-  // A3 SHL_LONG vAA, vBB, vCC
-  kAnMath | kAnLong,
-
-  // A4 SHR_LONG vAA, vBB, vCC
-  kAnMath | kAnLong,
-
-  // A5 USHR_LONG vAA, vBB, vCC
-  kAnMath | kAnLong,
-
-  // A6 ADD_FLOAT vAA, vBB, vCC
-  kAnMath | kAnFp | kAnSingle,
-
-  // A7 SUB_FLOAT vAA, vBB, vCC
-  kAnMath | kAnFp | kAnSingle,
-
-  // A8 MUL_FLOAT vAA, vBB, vCC
-  kAnMath | kAnFp | kAnSingle,
-
-  // A9 DIV_FLOAT vAA, vBB, vCC
-  kAnMath | kAnFp | kAnSingle,
-
-  // AA REM_FLOAT vAA, vBB, vCC
-  kAnMath | kAnFp | kAnSingle,
-
-  // AB ADD_DOUBLE vAA, vBB, vCC
-  kAnMath | kAnFp | kAnDouble,
-
-  // AC SUB_DOUBLE vAA, vBB, vCC
-  kAnMath | kAnFp | kAnDouble,
-
-  // AD MUL_DOUBLE vAA, vBB, vCC
-  kAnMath | kAnFp | kAnDouble,
-
-  // AE DIV_DOUBLE vAA, vBB, vCC
-  kAnMath | kAnFp | kAnDouble,
-
-  // AF REM_DOUBLE vAA, vBB, vCC
-  kAnMath | kAnFp | kAnDouble,
-
-  // B0 ADD_INT_2ADDR vA, vB
-  kAnMath | kAnInt,
-
-  // B1 SUB_INT_2ADDR vA, vB
-  kAnMath | kAnInt,
-
-  // B2 MUL_INT_2ADDR vA, vB
-  kAnMath | kAnInt,
-
-  // B3 DIV_INT_2ADDR vA, vB
-  kAnMath | kAnInt,
-
-  // B4 REM_INT_2ADDR vA, vB
-  kAnMath | kAnInt,
-
-  // B5 AND_INT_2ADDR vA, vB
-  kAnMath | kAnInt,
-
-  // B6 OR_INT_2ADDR vA, vB
-  kAnMath | kAnInt,
-
-  // B7 XOR_INT_2ADDR vA, vB
-  kAnMath | kAnInt,
-
-  // B8 SHL_INT_2ADDR vA, vB
-  kAnMath | kAnInt,
-
-  // B9 SHR_INT_2ADDR vA, vB
-  kAnMath | kAnInt,
-
-  // BA USHR_INT_2ADDR vA, vB
-  kAnMath | kAnInt,
-
-  // BB ADD_LONG_2ADDR vA, vB
-  kAnMath | kAnLong,
-
-  // BC SUB_LONG_2ADDR vA, vB
-  kAnMath | kAnLong,
-
-  // BD MUL_LONG_2ADDR vA, vB
-  kAnMath | kAnLong,
-
-  // BE DIV_LONG_2ADDR vA, vB
-  kAnMath | kAnLong,
-
-  // BF REM_LONG_2ADDR vA, vB
-  kAnMath | kAnLong,
-
-  // C0 AND_LONG_2ADDR vA, vB
-  kAnMath | kAnLong,
-
-  // C1 OR_LONG_2ADDR vA, vB
-  kAnMath | kAnLong,
-
-  // C2 XOR_LONG_2ADDR vA, vB
-  kAnMath | kAnLong,
-
-  // C3 SHL_LONG_2ADDR vA, vB
-  kAnMath | kAnLong,
-
-  // C4 SHR_LONG_2ADDR vA, vB
-  kAnMath | kAnLong,
-
-  // C5 USHR_LONG_2ADDR vA, vB
-  kAnMath | kAnLong,
-
-  // C6 ADD_FLOAT_2ADDR vA, vB
-  kAnMath | kAnFp | kAnSingle,
-
-  // C7 SUB_FLOAT_2ADDR vA, vB
-  kAnMath | kAnFp | kAnSingle,
-
-  // C8 MUL_FLOAT_2ADDR vA, vB
-  kAnMath | kAnFp | kAnSingle,
-
-  // C9 DIV_FLOAT_2ADDR vA, vB
-  kAnMath | kAnFp | kAnSingle,
-
-  // CA REM_FLOAT_2ADDR vA, vB
-  kAnMath | kAnFp | kAnSingle,
-
-  // CB ADD_DOUBLE_2ADDR vA, vB
-  kAnMath | kAnFp | kAnDouble,
-
-  // CC SUB_DOUBLE_2ADDR vA, vB
-  kAnMath | kAnFp | kAnDouble,
-
-  // CD MUL_DOUBLE_2ADDR vA, vB
-  kAnMath | kAnFp | kAnDouble,
-
-  // CE DIV_DOUBLE_2ADDR vA, vB
-  kAnMath | kAnFp | kAnDouble,
-
-  // CF REM_DOUBLE_2ADDR vA, vB
-  kAnMath | kAnFp | kAnDouble,
-
-  // D0 ADD_INT_LIT16 vA, vB, #+CCCC
-  kAnMath | kAnInt,
-
-  // D1 RSUB_INT vA, vB, #+CCCC
-  kAnMath | kAnInt,
-
-  // D2 MUL_INT_LIT16 vA, vB, #+CCCC
-  kAnMath | kAnInt,
-
-  // D3 DIV_INT_LIT16 vA, vB, #+CCCC
-  kAnMath | kAnInt,
-
-  // D4 REM_INT_LIT16 vA, vB, #+CCCC
-  kAnMath | kAnInt,
-
-  // D5 AND_INT_LIT16 vA, vB, #+CCCC
-  kAnMath | kAnInt,
-
-  // D6 OR_INT_LIT16 vA, vB, #+CCCC
-  kAnMath | kAnInt,
-
-  // D7 XOR_INT_LIT16 vA, vB, #+CCCC
-  kAnMath | kAnInt,
-
-  // D8 ADD_INT_LIT8 vAA, vBB, #+CC
-  kAnMath | kAnInt,
-
-  // D9 RSUB_INT_LIT8 vAA, vBB, #+CC
-  kAnMath | kAnInt,
-
-  // DA MUL_INT_LIT8 vAA, vBB, #+CC
-  kAnMath | kAnInt,
-
-  // DB DIV_INT_LIT8 vAA, vBB, #+CC
-  kAnMath | kAnInt,
-
-  // DC REM_INT_LIT8 vAA, vBB, #+CC
-  kAnMath | kAnInt,
-
-  // DD AND_INT_LIT8 vAA, vBB, #+CC
-  kAnMath | kAnInt,
-
-  // DE OR_INT_LIT8 vAA, vBB, #+CC
-  kAnMath | kAnInt,
-
-  // DF XOR_INT_LIT8 vAA, vBB, #+CC
-  kAnMath | kAnInt,
-
-  // E0 SHL_INT_LIT8 vAA, vBB, #+CC
-  kAnMath | kAnInt,
-
-  // E1 SHR_INT_LIT8 vAA, vBB, #+CC
-  kAnMath | kAnInt,
-
-  // E2 USHR_INT_LIT8 vAA, vBB, #+CC
-  kAnMath | kAnInt,
-
-  // E3 IGET_QUICK
-  kAnNone,
-
-  // E4 IGET_WIDE_QUICK
-  kAnNone,
-
-  // E5 IGET_OBJECT_QUICK
-  kAnNone,
-
-  // E6 IPUT_QUICK
-  kAnNone,
-
-  // E7 IPUT_WIDE_QUICK
-  kAnNone,
-
-  // E8 IPUT_OBJECT_QUICK
-  kAnNone,
-
-  // E9 INVOKE_VIRTUAL_QUICK
-  kAnInvoke | kAnHeavyWeight,
-
-  // EA INVOKE_VIRTUAL_RANGE_QUICK
-  kAnInvoke | kAnHeavyWeight,
-
-  // EB IPUT_BOOLEAN_QUICK
-  kAnNone,
-
-  // EC IPUT_BYTE_QUICK
-  kAnNone,
-
-  // ED IPUT_CHAR_QUICK
-  kAnNone,
-
-  // EE IPUT_SHORT_QUICK
-  kAnNone,
-
-  // EF IGET_BOOLEAN_QUICK
-  kAnNone,
-
-  // F0 IGET_BYTE_QUICK
-  kAnNone,
-
-  // F1 IGET_CHAR_QUICK
-  kAnNone,
-
-  // F2 IGET_SHORT_QUICK
-  kAnNone,
-
-  // F3 UNUSED_F3
-  kAnNone,
-
-  // F4 UNUSED_F4
-  kAnNone,
-
-  // F5 UNUSED_F5
-  kAnNone,
-
-  // F6 UNUSED_F6
-  kAnNone,
-
-  // F7 UNUSED_F7
-  kAnNone,
-
-  // F8 UNUSED_F8
-  kAnNone,
-
-  // F9 UNUSED_F9
-  kAnNone,
-
-  // FA UNUSED_FA
-  kAnNone,
-
-  // FB UNUSED_FB
-  kAnNone,
-
-  // FC UNUSED_FC
-  kAnNone,
-
-  // FD UNUSED_FD
-  kAnNone,
-
-  // FE UNUSED_FE
-  kAnNone,
-
-  // FF UNUSED_FF
-  kAnNone,
-
-  // Beginning of extended MIR opcodes
-  // 100 MIR_PHI
-  kAnNone,
-
-  // 101 MIR_COPY
-  kAnNone,
-
-  // 102 MIR_FUSED_CMPL_FLOAT
-  kAnNone,
-
-  // 103 MIR_FUSED_CMPG_FLOAT
-  kAnNone,
-
-  // 104 MIR_FUSED_CMPL_DOUBLE
-  kAnNone,
-
-  // 105 MIR_FUSED_CMPG_DOUBLE
-  kAnNone,
-
-  // 106 MIR_FUSED_CMP_LONG
-  kAnNone,
-
-  // 107 MIR_NOP
-  kAnNone,
-
-  // 108 MIR_NULL_CHECK
-  kAnNone,
-
-  // 109 MIR_RANGE_CHECK
-  kAnNone,
-
-  // 10A MIR_DIV_ZERO_CHECK
-  kAnNone,
-
-  // 10B MIR_CHECK
-  kAnNone,
-
-  // 10C MIR_CHECKPART2
-  kAnNone,
-
-  // 10D MIR_SELECT
-  kAnNone,
-
-  // 10E MirOpConstVector
-  kAnNone,
-
-  // 10F MirOpMoveVector
-  kAnNone,
-
-  // 110 MirOpPackedMultiply
-  kAnNone,
-
-  // 111 MirOpPackedAddition
-  kAnNone,
-
-  // 112 MirOpPackedSubtract
-  kAnNone,
-
-  // 113 MirOpPackedShiftLeft
-  kAnNone,
-
-  // 114 MirOpPackedSignedShiftRight
-  kAnNone,
-
-  // 115 MirOpPackedUnsignedShiftRight
-  kAnNone,
-
-  // 116 MirOpPackedAnd
-  kAnNone,
-
-  // 117 MirOpPackedOr
-  kAnNone,
-
-  // 118 MirOpPackedXor
-  kAnNone,
-
-  // 119 MirOpPackedAddReduce
-  kAnNone,
-
-  // 11A MirOpPackedReduce
-  kAnNone,
-
-  // 11B MirOpPackedSet
-  kAnNone,
-
-  // 11C MirOpReserveVectorRegisters
-  kAnNone,
-
-  // 11D MirOpReturnVectorRegisters
-  kAnNone,
-
-  // 11E MirOpMemBarrier
-  kAnNone,
-
-  // 11F MirOpPackedArrayGet
-  kAnArrayOp,
-
-  // 120 MirOpPackedArrayPut
-  kAnArrayOp,
-};
-
-struct MethodStats {
-  int dex_instructions;
-  int math_ops;
-  int fp_ops;
-  int array_ops;
-  int branch_ops;
-  int heavyweight_ops;
-  bool has_computational_loop;
-  bool has_switch;
-  float math_ratio;
-  float fp_ratio;
-  float array_ratio;
-  float branch_ratio;
-  float heavyweight_ratio;
-};
-
-void MIRGraph::AnalyzeBlock(BasicBlock* bb, MethodStats* stats) {
-  if (bb->visited || (bb->block_type != kDalvikByteCode)) {
-    return;
-  }
-  bool computational_block = true;
-  bool has_math = false;
-  /*
-   * For the purposes of this scan, we want to treat the set of basic blocks broken
-   * by an exception edge as a single basic block.  We'll scan forward along the fallthrough
-   * edges until we reach an explicit branch or return.
-   */
-  BasicBlock* ending_bb = bb;
-  if (ending_bb->last_mir_insn != nullptr) {
-    uint32_t ending_flags = kAnalysisAttributes[ending_bb->last_mir_insn->dalvikInsn.opcode];
-    while ((ending_flags & kAnBranch) == 0) {
-      ending_bb = GetBasicBlock(ending_bb->fall_through);
-      ending_flags = kAnalysisAttributes[ending_bb->last_mir_insn->dalvikInsn.opcode];
-    }
-  }
-  /*
-   * Ideally, we'd weight the operations by loop nesting level, but to do so we'd
-   * first need to do some expensive loop detection - and the point of this is to make
-   * an informed guess before investing in computation.  However, we can cheaply detect
-   * many simple loop forms without having to do full dataflow analysis.
-   */
-  int loop_scale_factor = 1;
-  // Simple for and while loops
-  if ((ending_bb->taken != NullBasicBlockId) && (ending_bb->fall_through == NullBasicBlockId)) {
-    if ((GetBasicBlock(ending_bb->taken)->taken == bb->id) ||
-        (GetBasicBlock(ending_bb->taken)->fall_through == bb->id)) {
-      loop_scale_factor = 25;
-    }
-  }
-  // Simple do-while loop
-  if ((ending_bb->taken != NullBasicBlockId) && (ending_bb->taken == bb->id)) {
-    loop_scale_factor = 25;
-  }
-
-  BasicBlock* tbb = bb;
-  bool done = false;
-  while (!done) {
-    tbb->visited = true;
-    for (MIR* mir = tbb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      if (MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) {
-        // Skip any MIR pseudo-op.
-        continue;
-      }
-      uint16_t flags = kAnalysisAttributes[mir->dalvikInsn.opcode];
-      stats->dex_instructions += loop_scale_factor;
-      if ((flags & kAnBranch) == 0) {
-        computational_block &= ((flags & kAnComputational) != 0);
-      } else {
-        stats->branch_ops += loop_scale_factor;
-      }
-      if ((flags & kAnMath) != 0) {
-        stats->math_ops += loop_scale_factor;
-        has_math = true;
-      }
-      if ((flags & kAnFp) != 0) {
-        stats->fp_ops += loop_scale_factor;
-      }
-      if ((flags & kAnArrayOp) != 0) {
-        stats->array_ops += loop_scale_factor;
-      }
-      if ((flags & kAnHeavyWeight) != 0) {
-        stats->heavyweight_ops += loop_scale_factor;
-      }
-      if ((flags & kAnSwitch) != 0) {
-        stats->has_switch = true;
-      }
-    }
-    if (tbb == ending_bb) {
-      done = true;
-    } else {
-      tbb = GetBasicBlock(tbb->fall_through);
-    }
-  }
-  if (has_math && computational_block && (loop_scale_factor > 1)) {
-    stats->has_computational_loop = true;
-  }
-}
-
-bool MIRGraph::ComputeSkipCompilation(MethodStats* stats, bool skip_default,
-                                      std::string* skip_message) {
-  float count = stats->dex_instructions;
-  stats->math_ratio = stats->math_ops / count;
-  stats->fp_ratio = stats->fp_ops / count;
-  stats->branch_ratio = stats->branch_ops / count;
-  stats->array_ratio = stats->array_ops / count;
-  stats->heavyweight_ratio = stats->heavyweight_ops / count;
-
-  if (cu_->enable_debug & (1 << kDebugShowFilterStats)) {
-    LOG(INFO) << "STATS " << stats->dex_instructions << ", math:"
-              << stats->math_ratio << ", fp:"
-              << stats->fp_ratio << ", br:"
-              << stats->branch_ratio << ", hw:"
-              << stats->heavyweight_ratio << ", arr:"
-              << stats->array_ratio << ", hot:"
-              << stats->has_computational_loop << ", "
-              << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-  }
-
-  // Computation intensive?
-  if (stats->has_computational_loop && (stats->heavyweight_ratio < 0.04)) {
-    return false;
-  }
-
-  // Complex, logic-intensive?
-  if (cu_->compiler_driver->GetCompilerOptions().IsSmallMethod(GetNumDalvikInsns()) &&
-      stats->branch_ratio > 0.3) {
-    return false;
-  }
-
-  // Significant floating point?
-  if (stats->fp_ratio > 0.05) {
-    return false;
-  }
-
-  // Significant generic math?
-  if (stats->math_ratio > 0.3) {
-    return false;
-  }
-
-  // If array-intensive, compiling is probably worthwhile.
-  if (stats->array_ratio > 0.1) {
-    return false;
-  }
-
-  // Switch operations benefit greatly from compilation, so go ahead and spend the cycles.
-  if (stats->has_switch) {
-    return false;
-  }
-
-  // If significant in size and high proportion of expensive operations, skip.
-  if (cu_->compiler_driver->GetCompilerOptions().IsSmallMethod(GetNumDalvikInsns()) &&
-      (stats->heavyweight_ratio > 0.3)) {
-    *skip_message = "Is a small method with heavyweight ratio " +
-                    std::to_string(stats->heavyweight_ratio);
-    return true;
-  }
-
-  return skip_default;
-}
-
- /*
-  * Will eventually want this to be a bit more sophisticated and happen at verification time.
-  */
-bool MIRGraph::SkipCompilation(std::string* skip_message) {
-  const CompilerOptions& compiler_options = cu_->compiler_driver->GetCompilerOptions();
-  CompilerOptions::CompilerFilter compiler_filter = compiler_options.GetCompilerFilter();
-  if (compiler_filter == CompilerOptions::kEverything) {
-    return false;
-  }
-
-  // Contains a pattern we don't want to compile?
-  if (PuntToInterpreter()) {
-    *skip_message = "Punt to interpreter set";
-    return true;
-  }
-
-  DCHECK(compiler_options.IsCompilationEnabled());
-
-  // Set up compilation cutoffs based on current filter mode.
-  size_t small_cutoff;
-  size_t default_cutoff;
-  switch (compiler_filter) {
-    case CompilerOptions::kBalanced:
-      small_cutoff = compiler_options.GetSmallMethodThreshold();
-      default_cutoff = compiler_options.GetLargeMethodThreshold();
-      break;
-    case CompilerOptions::kSpace:
-      small_cutoff = compiler_options.GetTinyMethodThreshold();
-      default_cutoff = compiler_options.GetSmallMethodThreshold();
-      break;
-    case CompilerOptions::kSpeed:
-    case CompilerOptions::kTime:
-      small_cutoff = compiler_options.GetHugeMethodThreshold();
-      default_cutoff = compiler_options.GetHugeMethodThreshold();
-      break;
-    default:
-      LOG(FATAL) << "Unexpected compiler_filter_: " << compiler_filter;
-      UNREACHABLE();
-  }
-
-  // If size < cutoff, assume we'll compile - but allow removal.
-  bool skip_compilation = (GetNumDalvikInsns() >= default_cutoff);
-  if (skip_compilation) {
-    *skip_message = "#Insns >= default_cutoff: " + std::to_string(GetNumDalvikInsns());
-  }
-
-  /*
-   * Filter 1: Huge methods are likely to be machine generated, but some aren't.
-   * If huge, assume we won't compile, but allow futher analysis to turn it back on.
-   */
-  if (compiler_options.IsHugeMethod(GetNumDalvikInsns())) {
-    skip_compilation = true;
-    *skip_message = "Huge method: " + std::to_string(GetNumDalvikInsns());
-    // If we're got a huge number of basic blocks, don't bother with further analysis.
-    if (static_cast<size_t>(GetNumBlocks()) > (compiler_options.GetHugeMethodThreshold() / 2)) {
-      return true;
-    }
-  } else if (compiler_options.IsLargeMethod(GetNumDalvikInsns()) &&
-    /* If it's large and contains no branches, it's likely to be machine generated initialization */
-      (GetBranchCount() == 0)) {
-    *skip_message = "Large method with no branches";
-    return true;
-  } else if (compiler_filter == CompilerOptions::kSpeed) {
-    // If not huge, compile.
-    return false;
-  }
-
-  // Filter 2: Skip class initializers.
-  if (((cu_->access_flags & kAccConstructor) != 0) && ((cu_->access_flags & kAccStatic) != 0)) {
-    *skip_message = "Class initializer";
-    return true;
-  }
-
-  // Filter 3: if this method is a special pattern, go ahead and emit the canned pattern.
-  if (cu_->compiler_driver->GetMethodInlinerMap() != nullptr &&
-      cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file)
-          ->IsSpecial(cu_->method_idx)) {
-    return false;
-  }
-
-  // Filter 4: if small, just compile.
-  if (GetNumDalvikInsns() < small_cutoff) {
-    return false;
-  }
-
-  // Analyze graph for:
-  //  o floating point computation
-  //  o basic blocks contained in loop with heavy arithmetic.
-  //  o proportion of conditional branches.
-
-  MethodStats stats;
-  memset(&stats, 0, sizeof(stats));
-
-  ClearAllVisitedFlags();
-  AllNodesIterator iter(this);
-  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-    AnalyzeBlock(bb, &stats);
-  }
-
-  return ComputeSkipCompilation(&stats, skip_compilation, skip_message);
-}
-
-void MIRGraph::DoCacheFieldLoweringInfo() {
-  static constexpr uint32_t kFieldIndexFlagQuickened = 0x80000000;
-  // All IGET/IPUT/SGET/SPUT instructions take 2 code units and there must also be a RETURN.
-  const uint32_t max_refs = (GetNumDalvikInsns() - 1u) / 2u;
-  ScopedArenaAllocator allocator(&cu_->arena_stack);
-  auto* field_idxs = allocator.AllocArray<uint32_t>(max_refs, kArenaAllocMisc);
-  DexMemAccessType* field_types = allocator.AllocArray<DexMemAccessType>(
-      max_refs, kArenaAllocMisc);
-  // Find IGET/IPUT/SGET/SPUT insns, store IGET/IPUT fields at the beginning, SGET/SPUT at the end.
-  size_t ifield_pos = 0u;
-  size_t sfield_pos = max_refs;
-  AllNodesIterator iter(this);
-  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-    if (bb->block_type != kDalvikByteCode) {
-      continue;
-    }
-    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      // Get field index and try to find it among existing indexes. If found, it's usually among
-      // the last few added, so we'll start the search from ifield_pos/sfield_pos. Though this
-      // is a linear search, it actually performs much better than map based approach.
-      const bool is_iget_or_iput = IsInstructionIGetOrIPut(mir->dalvikInsn.opcode);
-      const bool is_iget_or_iput_quick = IsInstructionIGetQuickOrIPutQuick(mir->dalvikInsn.opcode);
-      if (is_iget_or_iput || is_iget_or_iput_quick) {
-        uint32_t field_idx;
-        DexMemAccessType access_type;
-        if (is_iget_or_iput) {
-          field_idx = mir->dalvikInsn.vC;
-          access_type = IGetOrIPutMemAccessType(mir->dalvikInsn.opcode);
-        } else {
-          DCHECK(is_iget_or_iput_quick);
-          // Set kFieldIndexFlagQuickened so that we don't deduplicate against non quickened field
-          // indexes.
-          field_idx = mir->offset | kFieldIndexFlagQuickened;
-          access_type = IGetQuickOrIPutQuickMemAccessType(mir->dalvikInsn.opcode);
-        }
-        size_t i = ifield_pos;
-        while (i != 0u && field_idxs[i - 1] != field_idx) {
-          --i;
-        }
-        if (i != 0u) {
-          mir->meta.ifield_lowering_info = i - 1;
-          DCHECK_EQ(field_types[i - 1], access_type);
-        } else {
-          mir->meta.ifield_lowering_info = ifield_pos;
-          field_idxs[ifield_pos] = field_idx;
-          field_types[ifield_pos] = access_type;
-          ++ifield_pos;
-        }
-      } else if (IsInstructionSGetOrSPut(mir->dalvikInsn.opcode)) {
-        auto field_idx = mir->dalvikInsn.vB;
-        size_t i = sfield_pos;
-        while (i != max_refs && field_idxs[i] != field_idx) {
-          ++i;
-        }
-        if (i != max_refs) {
-          mir->meta.sfield_lowering_info = max_refs - i - 1u;
-          DCHECK_EQ(field_types[i], SGetOrSPutMemAccessType(mir->dalvikInsn.opcode));
-        } else {
-          mir->meta.sfield_lowering_info = max_refs - sfield_pos;
-          --sfield_pos;
-          field_idxs[sfield_pos] = field_idx;
-          field_types[sfield_pos] = SGetOrSPutMemAccessType(mir->dalvikInsn.opcode);
-        }
-      }
-      DCHECK_LE(ifield_pos, sfield_pos);
-    }
-  }
-
-  if (ifield_pos != 0u) {
-    // Resolve instance field infos.
-    DCHECK_EQ(ifield_lowering_infos_.size(), 0u);
-    ifield_lowering_infos_.reserve(ifield_pos);
-    for (size_t pos = 0u; pos != ifield_pos; ++pos) {
-      const uint32_t field_idx = field_idxs[pos];
-      const bool is_quickened = (field_idx & kFieldIndexFlagQuickened) != 0;
-      const uint32_t masked_field_idx = field_idx & ~kFieldIndexFlagQuickened;
-      CHECK_LT(masked_field_idx, 1u << 16);
-      ifield_lowering_infos_.push_back(
-          MirIFieldLoweringInfo(masked_field_idx, field_types[pos], is_quickened));
-    }
-    ScopedObjectAccess soa(Thread::Current());
-    MirIFieldLoweringInfo::Resolve(soa,
-                                   cu_->compiler_driver,
-                                   GetCurrentDexCompilationUnit(),
-                                   ifield_lowering_infos_.data(),
-                                   ifield_pos);
-  }
-
-  if (sfield_pos != max_refs) {
-    // Resolve static field infos.
-    DCHECK_EQ(sfield_lowering_infos_.size(), 0u);
-    sfield_lowering_infos_.reserve(max_refs - sfield_pos);
-    for (size_t pos = max_refs; pos != sfield_pos;) {
-      --pos;
-      sfield_lowering_infos_.push_back(MirSFieldLoweringInfo(field_idxs[pos], field_types[pos]));
-    }
-    MirSFieldLoweringInfo::Resolve(cu_->compiler_driver, GetCurrentDexCompilationUnit(),
-                                   sfield_lowering_infos_.data(), max_refs - sfield_pos);
-  }
-}
-
-void MIRGraph::DoCacheMethodLoweringInfo() {
-  static constexpr uint16_t invoke_types[] = { kVirtual, kSuper, kDirect, kStatic, kInterface };
-  static constexpr uint32_t kMethodIdxFlagQuickened = 0x80000000;
-
-  // Embed the map value in the entry to avoid extra padding in 64-bit builds.
-  struct MapEntry {
-    // Map key: target_method_idx, invoke_type, devirt_target. Ordered to avoid padding.
-    const MethodReference* devirt_target;
-    uint32_t target_method_idx;
-    uint32_t vtable_idx;
-    uint16_t invoke_type;
-    // Map value.
-    uint32_t lowering_info_index;
-  };
-
-  struct MapEntryComparator {
-    bool operator()(const MapEntry& lhs, const MapEntry& rhs) const {
-      if (lhs.target_method_idx != rhs.target_method_idx) {
-        return lhs.target_method_idx < rhs.target_method_idx;
-      }
-      if (lhs.invoke_type != rhs.invoke_type) {
-        return lhs.invoke_type < rhs.invoke_type;
-      }
-      if (lhs.vtable_idx != rhs.vtable_idx) {
-        return lhs.vtable_idx < rhs.vtable_idx;
-      }
-      if (lhs.devirt_target != rhs.devirt_target) {
-        if (lhs.devirt_target == nullptr) {
-          return true;
-        }
-        if (rhs.devirt_target == nullptr) {
-          return false;
-        }
-        return devirt_cmp(*lhs.devirt_target, *rhs.devirt_target);
-      }
-      return false;
-    }
-    MethodReferenceComparator devirt_cmp;
-  };
-
-  ScopedArenaAllocator allocator(&cu_->arena_stack);
-
-  // All INVOKE instructions take 3 code units and there must also be a RETURN.
-  const uint32_t max_refs = (GetNumDalvikInsns() - 1u) / 3u;
-
-  // Map invoke key (see MapEntry) to lowering info index and vice versa.
-  // The invoke_map and sequential entries are essentially equivalent to Boost.MultiIndex's
-  // multi_index_container with one ordered index and one sequential index.
-  ScopedArenaSet<MapEntry, MapEntryComparator> invoke_map(MapEntryComparator(),
-                                                          allocator.Adapter());
-  const MapEntry** sequential_entries =
-      allocator.AllocArray<const MapEntry*>(max_refs, kArenaAllocMisc);
-
-  // Find INVOKE insns and their devirtualization targets.
-  const VerifiedMethod* verified_method = GetCurrentDexCompilationUnit()->GetVerifiedMethod();
-  AllNodesIterator iter(this);
-  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-    if (bb->block_type != kDalvikByteCode) {
-      continue;
-    }
-    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      const bool is_quick_invoke = IsInstructionQuickInvoke(mir->dalvikInsn.opcode);
-      const bool is_invoke = IsInstructionInvoke(mir->dalvikInsn.opcode);
-      if (is_quick_invoke || is_invoke) {
-        uint32_t vtable_index = 0;
-        uint32_t target_method_idx = 0;
-        uint32_t invoke_type_idx = 0;  // Default to virtual (in case of quickened).
-        DCHECK_EQ(invoke_types[invoke_type_idx], kVirtual);
-        if (is_quick_invoke) {
-          // We need to store the vtable index since we can't necessarily recreate it at resolve
-          // phase if the dequickening resolved to an interface method.
-          vtable_index = mir->dalvikInsn.vB;
-          // Fake up the method index by storing the mir offset so that we can read the dequicken
-          // info in resolve.
-          target_method_idx = mir->offset | kMethodIdxFlagQuickened;
-        } else {
-          DCHECK(is_invoke);
-          // Decode target method index and invoke type.
-          invoke_type_idx = InvokeInstructionType(mir->dalvikInsn.opcode);
-          target_method_idx = mir->dalvikInsn.vB;
-        }
-        // Find devirtualization target.
-        // TODO: The devirt map is ordered by the dex pc here. Is there a way to get INVOKEs
-        // ordered by dex pc as well? That would allow us to keep an iterator to devirt targets
-        // and increment it as needed instead of making O(log n) lookups.
-        const MethodReference* devirt_target = verified_method->GetDevirtTarget(mir->offset);
-        // Try to insert a new entry. If the insertion fails, we will have found an old one.
-        MapEntry entry = {
-            devirt_target,
-            target_method_idx,
-            vtable_index,
-            invoke_types[invoke_type_idx],
-            static_cast<uint32_t>(invoke_map.size())
-        };
-        auto it = invoke_map.insert(entry).first;  // Iterator to either the old or the new entry.
-        mir->meta.method_lowering_info = it->lowering_info_index;
-        // If we didn't actually insert, this will just overwrite an existing value with the same.
-        sequential_entries[it->lowering_info_index] = &*it;
-      }
-    }
-  }
-  if (invoke_map.empty()) {
-    return;
-  }
-  // Prepare unique method infos, set method info indexes for their MIRs.
-  const size_t count = invoke_map.size();
-  method_lowering_infos_.reserve(count);
-  for (size_t pos = 0u; pos != count; ++pos) {
-    const MapEntry* entry = sequential_entries[pos];
-    const bool is_quick = (entry->target_method_idx & kMethodIdxFlagQuickened) != 0;
-    const uint32_t masked_method_idx = entry->target_method_idx & ~kMethodIdxFlagQuickened;
-    MirMethodLoweringInfo method_info(masked_method_idx,
-                                      static_cast<InvokeType>(entry->invoke_type), is_quick);
-    if (entry->devirt_target != nullptr) {
-      method_info.SetDevirtualizationTarget(*entry->devirt_target);
-    }
-    if (is_quick) {
-      method_info.SetVTableIndex(entry->vtable_idx);
-    }
-    method_lowering_infos_.push_back(method_info);
-  }
-  MirMethodLoweringInfo::Resolve(cu_->compiler_driver, GetCurrentDexCompilationUnit(),
-                                 method_lowering_infos_.data(), count);
-}
-
-bool MIRGraph::SkipCompilationByName(const std::string& methodname) {
-  return cu_->compiler_driver->SkipCompilation(methodname);
-}
-
-}  // namespace art
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
deleted file mode 100644
index a7ba061..0000000
--- a/compiler/dex/mir_dataflow.cc
+++ /dev/null
@@ -1,1453 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "local_value_numbering.h"
-#include "dataflow_iterator-inl.h"
-
-namespace art {
-
-/*
- * Main table containing data flow attributes for each bytecode. The
- * first kNumPackedOpcodes entries are for Dalvik bytecode
- * instructions, where extended opcode at the MIR level are appended
- * afterwards.
- *
- * TODO - many optimization flags are incomplete - they will only limit the
- * scope of optimizations but will not cause mis-optimizations.
- */
-const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = {
-  // 00 NOP
-  DF_NOP,
-
-  // 01 MOVE vA, vB
-  DF_DA | DF_UB | DF_IS_MOVE,
-
-  // 02 MOVE_FROM16 vAA, vBBBB
-  DF_DA | DF_UB | DF_IS_MOVE,
-
-  // 03 MOVE_16 vAAAA, vBBBB
-  DF_DA | DF_UB | DF_IS_MOVE,
-
-  // 04 MOVE_WIDE vA, vB
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_IS_MOVE,
-
-  // 05 MOVE_WIDE_FROM16 vAA, vBBBB
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_IS_MOVE,
-
-  // 06 MOVE_WIDE_16 vAAAA, vBBBB
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_IS_MOVE,
-
-  // 07 MOVE_OBJECT vA, vB
-  DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE | DF_REF_A | DF_REF_B,
-
-  // 08 MOVE_OBJECT_FROM16 vAA, vBBBB
-  DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE | DF_REF_A | DF_REF_B,
-
-  // 09 MOVE_OBJECT_16 vAAAA, vBBBB
-  DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE | DF_REF_A | DF_REF_B,
-
-  // 0A MOVE_RESULT vAA
-  DF_DA,
-
-  // 0B MOVE_RESULT_WIDE vAA
-  DF_DA | DF_A_WIDE,
-
-  // 0C MOVE_RESULT_OBJECT vAA
-  DF_DA | DF_REF_A,
-
-  // 0D MOVE_EXCEPTION vAA
-  DF_DA | DF_REF_A | DF_NON_NULL_DST,
-
-  // 0E RETURN_VOID
-  DF_NOP,
-
-  // 0F RETURN vAA
-  DF_UA,
-
-  // 10 RETURN_WIDE vAA
-  DF_UA | DF_A_WIDE,
-
-  // 11 RETURN_OBJECT vAA
-  DF_UA | DF_REF_A,
-
-  // 12 CONST_4 vA, #+B
-  DF_DA | DF_SETS_CONST,
-
-  // 13 CONST_16 vAA, #+BBBB
-  DF_DA | DF_SETS_CONST,
-
-  // 14 CONST vAA, #+BBBBBBBB
-  DF_DA | DF_SETS_CONST,
-
-  // 15 CONST_HIGH16 VAA, #+BBBB0000
-  DF_DA | DF_SETS_CONST,
-
-  // 16 CONST_WIDE_16 vAA, #+BBBB
-  DF_DA | DF_A_WIDE | DF_SETS_CONST,
-
-  // 17 CONST_WIDE_32 vAA, #+BBBBBBBB
-  DF_DA | DF_A_WIDE | DF_SETS_CONST,
-
-  // 18 CONST_WIDE vAA, #+BBBBBBBBBBBBBBBB
-  DF_DA | DF_A_WIDE | DF_SETS_CONST,
-
-  // 19 CONST_WIDE_HIGH16 vAA, #+BBBB000000000000
-  DF_DA | DF_A_WIDE | DF_SETS_CONST,
-
-  // 1A CONST_STRING vAA, string@BBBB
-  DF_DA | DF_REF_A | DF_NON_NULL_DST,
-
-  // 1B CONST_STRING_JUMBO vAA, string@BBBBBBBB
-  DF_DA | DF_REF_A | DF_NON_NULL_DST,
-
-  // 1C CONST_CLASS vAA, type@BBBB
-  DF_DA | DF_REF_A | DF_NON_NULL_DST,
-
-  // 1D MONITOR_ENTER vAA
-  DF_UA | DF_NULL_CHK_A | DF_REF_A,
-
-  // 1E MONITOR_EXIT vAA
-  DF_UA | DF_NULL_CHK_A | DF_REF_A,
-
-  // 1F CHK_CAST vAA, type@BBBB
-  DF_UA | DF_REF_A | DF_CHK_CAST | DF_UMS,
-
-  // 20 INSTANCE_OF vA, vB, type@CCCC
-  DF_DA | DF_UB | DF_CORE_A | DF_REF_B | DF_UMS,
-
-  // 21 ARRAY_LENGTH vA, vB
-  DF_DA | DF_UB | DF_NULL_CHK_B | DF_CORE_A | DF_REF_B,
-
-  // 22 NEW_INSTANCE vAA, type@BBBB
-  DF_DA | DF_NON_NULL_DST | DF_REF_A | DF_UMS,
-
-  // 23 NEW_ARRAY vA, vB, type@CCCC
-  DF_DA | DF_UB | DF_NON_NULL_DST | DF_REF_A | DF_CORE_B | DF_UMS,
-
-  // 24 FILLED_NEW_ARRAY {vD, vE, vF, vG, vA}
-  DF_FORMAT_35C | DF_NON_NULL_RET | DF_UMS,
-
-  // 25 FILLED_NEW_ARRAY_RANGE {vCCCC .. vNNNN}, type@BBBB
-  DF_FORMAT_3RC | DF_NON_NULL_RET | DF_UMS,
-
-  // 26 FILL_ARRAY_DATA vAA, +BBBBBBBB
-  DF_UA | DF_REF_A | DF_UMS,
-
-  // 27 THROW vAA
-  DF_UA | DF_REF_A | DF_UMS,
-
-  // 28 GOTO
-  DF_NOP,
-
-  // 29 GOTO_16
-  DF_NOP,
-
-  // 2A GOTO_32
-  DF_NOP,
-
-  // 2B PACKED_SWITCH vAA, +BBBBBBBB
-  DF_UA | DF_CORE_A,
-
-  // 2C SPARSE_SWITCH vAA, +BBBBBBBB
-  DF_UA | DF_CORE_A,
-
-  // 2D CMPL_FLOAT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_FP_B | DF_FP_C | DF_CORE_A,
-
-  // 2E CMPG_FLOAT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_FP_B | DF_FP_C | DF_CORE_A,
-
-  // 2F CMPL_DOUBLE vAA, vBB, vCC
-  DF_DA | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_FP_B | DF_FP_C | DF_CORE_A,
-
-  // 30 CMPG_DOUBLE vAA, vBB, vCC
-  DF_DA | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_FP_B | DF_FP_C | DF_CORE_A,
-
-  // 31 CMP_LONG vAA, vBB, vCC
-  DF_DA | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // 32 IF_EQ vA, vB, +CCCC
-  DF_UA | DF_UB | DF_SAME_TYPE_AB,
-
-  // 33 IF_NE vA, vB, +CCCC
-  DF_UA | DF_UB | DF_SAME_TYPE_AB,
-
-  // 34 IF_LT vA, vB, +CCCC
-  DF_UA | DF_UB | DF_SAME_TYPE_AB,
-
-  // 35 IF_GE vA, vB, +CCCC
-  DF_UA | DF_UB | DF_SAME_TYPE_AB,
-
-  // 36 IF_GT vA, vB, +CCCC
-  DF_UA | DF_UB | DF_SAME_TYPE_AB,
-
-  // 37 IF_LE vA, vB, +CCCC
-  DF_UA | DF_UB | DF_SAME_TYPE_AB,
-
-  // 38 IF_EQZ vAA, +BBBB
-  DF_UA,
-
-  // 39 IF_NEZ vAA, +BBBB
-  DF_UA,
-
-  // 3A IF_LTZ vAA, +BBBB
-  DF_UA,
-
-  // 3B IF_GEZ vAA, +BBBB
-  DF_UA,
-
-  // 3C IF_GTZ vAA, +BBBB
-  DF_UA,
-
-  // 3D IF_LEZ vAA, +BBBB
-  DF_UA,
-
-  // 3E UNUSED_3E
-  DF_NOP,
-
-  // 3F UNUSED_3F
-  DF_NOP,
-
-  // 40 UNUSED_40
-  DF_NOP,
-
-  // 41 UNUSED_41
-  DF_NOP,
-
-  // 42 UNUSED_42
-  DF_NOP,
-
-  // 43 UNUSED_43
-  DF_NOP,
-
-  // 44 AGET vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_B | DF_CORE_C | DF_LVN,
-
-  // 45 AGET_WIDE vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_B | DF_CORE_C | DF_LVN,
-
-  // 46 AGET_OBJECT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_A | DF_REF_B | DF_CORE_C | DF_LVN,
-
-  // 47 AGET_BOOLEAN vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_B | DF_CORE_C | DF_LVN,
-
-  // 48 AGET_BYTE vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_B | DF_CORE_C | DF_LVN,
-
-  // 49 AGET_CHAR vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_B | DF_CORE_C | DF_LVN,
-
-  // 4A AGET_SHORT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_B | DF_CORE_C | DF_LVN,
-
-  // 4B APUT vAA, vBB, vCC
-  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_B | DF_CORE_C | DF_LVN,
-
-  // 4C APUT_WIDE vAA, vBB, vCC
-  DF_UA | DF_A_WIDE | DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_B | DF_CORE_C | DF_LVN,
-
-  // 4D APUT_OBJECT vAA, vBB, vCC
-  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_A | DF_REF_B | DF_CORE_C | DF_LVN,
-
-  // 4E APUT_BOOLEAN vAA, vBB, vCC
-  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_B | DF_CORE_C | DF_LVN,
-
-  // 4F APUT_BYTE vAA, vBB, vCC
-  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_B | DF_CORE_C | DF_LVN,
-
-  // 50 APUT_CHAR vAA, vBB, vCC
-  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_B | DF_CORE_C | DF_LVN,
-
-  // 51 APUT_SHORT vAA, vBB, vCC
-  DF_UA | DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_B | DF_CORE_C | DF_LVN,
-
-  // 52 IGET vA, vB, field@CCCC
-  DF_DA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // 53 IGET_WIDE vA, vB, field@CCCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // 54 IGET_OBJECT vA, vB, field@CCCC
-  DF_DA | DF_UB | DF_NULL_CHK_B | DF_REF_A | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // 55 IGET_BOOLEAN vA, vB, field@CCCC
-  DF_DA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // 56 IGET_BYTE vA, vB, field@CCCC
-  DF_DA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // 57 IGET_CHAR vA, vB, field@CCCC
-  DF_DA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // 58 IGET_SHORT vA, vB, field@CCCC
-  DF_DA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // 59 IPUT vA, vB, field@CCCC
-  DF_UA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // 5A IPUT_WIDE vA, vB, field@CCCC
-  DF_UA | DF_A_WIDE | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // 5B IPUT_OBJECT vA, vB, field@CCCC
-  DF_UA | DF_UB | DF_NULL_CHK_B | DF_REF_A | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // 5C IPUT_BOOLEAN vA, vB, field@CCCC
-  DF_UA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // 5D IPUT_BYTE vA, vB, field@CCCC
-  DF_UA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // 5E IPUT_CHAR vA, vB, field@CCCC
-  DF_UA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // 5F IPUT_SHORT vA, vB, field@CCCC
-  DF_UA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // 60 SGET vAA, field@BBBB
-  DF_DA | DF_SFIELD | DF_CLINIT | DF_UMS,
-
-  // 61 SGET_WIDE vAA, field@BBBB
-  DF_DA | DF_A_WIDE | DF_SFIELD | DF_CLINIT | DF_UMS,
-
-  // 62 SGET_OBJECT vAA, field@BBBB
-  DF_DA | DF_REF_A | DF_SFIELD | DF_CLINIT | DF_UMS,
-
-  // 63 SGET_BOOLEAN vAA, field@BBBB
-  DF_DA | DF_SFIELD | DF_CLINIT | DF_UMS,
-
-  // 64 SGET_BYTE vAA, field@BBBB
-  DF_DA | DF_SFIELD | DF_CLINIT | DF_UMS,
-
-  // 65 SGET_CHAR vAA, field@BBBB
-  DF_DA | DF_SFIELD | DF_CLINIT | DF_UMS,
-
-  // 66 SGET_SHORT vAA, field@BBBB
-  DF_DA | DF_SFIELD | DF_CLINIT | DF_UMS,
-
-  // 67 SPUT vAA, field@BBBB
-  DF_UA | DF_SFIELD | DF_CLINIT | DF_UMS,
-
-  // 68 SPUT_WIDE vAA, field@BBBB
-  DF_UA | DF_A_WIDE | DF_SFIELD | DF_CLINIT | DF_UMS,
-
-  // 69 SPUT_OBJECT vAA, field@BBBB
-  DF_UA | DF_REF_A | DF_SFIELD | DF_CLINIT | DF_UMS,
-
-  // 6A SPUT_BOOLEAN vAA, field@BBBB
-  DF_UA | DF_SFIELD | DF_CLINIT | DF_UMS,
-
-  // 6B SPUT_BYTE vAA, field@BBBB
-  DF_UA | DF_SFIELD | DF_CLINIT | DF_UMS,
-
-  // 6C SPUT_CHAR vAA, field@BBBB
-  DF_UA | DF_SFIELD | DF_CLINIT | DF_UMS,
-
-  // 6D SPUT_SHORT vAA, field@BBBB
-  DF_UA | DF_SFIELD | DF_CLINIT | DF_UMS,
-
-  // 6E INVOKE_VIRTUAL {vD, vE, vF, vG, vA}
-  DF_FORMAT_35C | DF_NULL_CHK_OUT0 | DF_UMS,
-
-  // 6F INVOKE_SUPER {vD, vE, vF, vG, vA}
-  DF_FORMAT_35C | DF_NULL_CHK_OUT0 | DF_UMS,
-
-  // 70 INVOKE_DIRECT {vD, vE, vF, vG, vA}
-  DF_FORMAT_35C | DF_NULL_CHK_OUT0 | DF_UMS,
-
-  // 71 INVOKE_STATIC {vD, vE, vF, vG, vA}
-  DF_FORMAT_35C | DF_CLINIT | DF_UMS,
-
-  // 72 INVOKE_INTERFACE {vD, vE, vF, vG, vA}
-  DF_FORMAT_35C | DF_NULL_CHK_OUT0 | DF_UMS,
-
-  // 73 RETURN_VOID_NO_BARRIER
-  DF_NOP,
-
-  // 74 INVOKE_VIRTUAL_RANGE {vCCCC .. vNNNN}
-  DF_FORMAT_3RC | DF_NULL_CHK_OUT0 | DF_UMS,
-
-  // 75 INVOKE_SUPER_RANGE {vCCCC .. vNNNN}
-  DF_FORMAT_3RC | DF_NULL_CHK_OUT0 | DF_UMS,
-
-  // 76 INVOKE_DIRECT_RANGE {vCCCC .. vNNNN}
-  DF_FORMAT_3RC | DF_NULL_CHK_OUT0 | DF_UMS,
-
-  // 77 INVOKE_STATIC_RANGE {vCCCC .. vNNNN}
-  DF_FORMAT_3RC | DF_CLINIT | DF_UMS,
-
-  // 78 INVOKE_INTERFACE_RANGE {vCCCC .. vNNNN}
-  DF_FORMAT_3RC | DF_NULL_CHK_OUT0 | DF_UMS,
-
-  // 79 UNUSED_79
-  DF_NOP,
-
-  // 7A UNUSED_7A
-  DF_NOP,
-
-  // 7B NEG_INT vA, vB
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // 7C NOT_INT vA, vB
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // 7D NEG_LONG vA, vB
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B,
-
-  // 7E NOT_LONG vA, vB
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B,
-
-  // 7F NEG_FLOAT vA, vB
-  DF_DA | DF_UB | DF_FP_A | DF_FP_B,
-
-  // 80 NEG_DOUBLE vA, vB
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_FP_A | DF_FP_B,
-
-  // 81 INT_TO_LONG vA, vB
-  DF_DA | DF_A_WIDE | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // 82 INT_TO_FLOAT vA, vB
-  DF_DA | DF_UB | DF_FP_A | DF_CORE_B,
-
-  // 83 INT_TO_DOUBLE vA, vB
-  DF_DA | DF_A_WIDE | DF_UB | DF_FP_A | DF_CORE_B,
-
-  // 84 LONG_TO_INT vA, vB
-  DF_DA | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B,
-
-  // 85 LONG_TO_FLOAT vA, vB
-  DF_DA | DF_UB | DF_B_WIDE | DF_FP_A | DF_CORE_B,
-
-  // 86 LONG_TO_DOUBLE vA, vB
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_FP_A | DF_CORE_B,
-
-  // 87 FLOAT_TO_INT vA, vB
-  DF_DA | DF_UB | DF_FP_B | DF_CORE_A,
-
-  // 88 FLOAT_TO_LONG vA, vB
-  DF_DA | DF_A_WIDE | DF_UB | DF_FP_B | DF_CORE_A,
-
-  // 89 FLOAT_TO_DOUBLE vA, vB
-  DF_DA | DF_A_WIDE | DF_UB | DF_FP_A | DF_FP_B,
-
-  // 8A DOUBLE_TO_INT vA, vB
-  DF_DA | DF_UB | DF_B_WIDE | DF_FP_B | DF_CORE_A,
-
-  // 8B DOUBLE_TO_LONG vA, vB
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_FP_B | DF_CORE_A,
-
-  // 8C DOUBLE_TO_FLOAT vA, vB
-  DF_DA | DF_UB | DF_B_WIDE | DF_FP_A | DF_FP_B,
-
-  // 8D INT_TO_BYTE vA, vB
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // 8E INT_TO_CHAR vA, vB
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // 8F INT_TO_SHORT vA, vB
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // 90 ADD_INT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // 91 SUB_INT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // 92 MUL_INT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // 93 DIV_INT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // 94 REM_INT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // 95 AND_INT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // 96 OR_INT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // 97 XOR_INT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // 98 SHL_INT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // 99 SHR_INT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // 9A USHR_INT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // 9B ADD_LONG vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // 9C SUB_LONG vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // 9D MUL_LONG vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // 9E DIV_LONG vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // 9F REM_LONG vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // A0 AND_LONG vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // A1 OR_LONG vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // A2 XOR_LONG vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // A3 SHL_LONG vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // A4 SHR_LONG vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // A5 USHR_LONG vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
-
-  // A6 ADD_FLOAT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_FP_A | DF_FP_B | DF_FP_C,
-
-  // A7 SUB_FLOAT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_FP_A | DF_FP_B | DF_FP_C,
-
-  // A8 MUL_FLOAT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_FP_A | DF_FP_B | DF_FP_C,
-
-  // A9 DIV_FLOAT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_FP_A | DF_FP_B | DF_FP_C,
-
-  // AA REM_FLOAT vAA, vBB, vCC
-  DF_DA | DF_UB | DF_UC | DF_FP_A | DF_FP_B | DF_FP_C,
-
-  // AB ADD_DOUBLE vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_FP_A | DF_FP_B | DF_FP_C,
-
-  // AC SUB_DOUBLE vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_FP_A | DF_FP_B | DF_FP_C,
-
-  // AD MUL_DOUBLE vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_FP_A | DF_FP_B | DF_FP_C,
-
-  // AE DIV_DOUBLE vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_FP_A | DF_FP_B | DF_FP_C,
-
-  // AF REM_DOUBLE vAA, vBB, vCC
-  DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_FP_A | DF_FP_B | DF_FP_C,
-
-  // B0 ADD_INT_2ADDR vA, vB
-  DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // B1 SUB_INT_2ADDR vA, vB
-  DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // B2 MUL_INT_2ADDR vA, vB
-  DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // B3 DIV_INT_2ADDR vA, vB
-  DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // B4 REM_INT_2ADDR vA, vB
-  DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // B5 AND_INT_2ADDR vA, vB
-  DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // B6 OR_INT_2ADDR vA, vB
-  DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // B7 XOR_INT_2ADDR vA, vB
-  DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // B8 SHL_INT_2ADDR vA, vB
-  DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // B9 SHR_INT_2ADDR vA, vB
-  DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // BA USHR_INT_2ADDR vA, vB
-  DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // BB ADD_LONG_2ADDR vA, vB
-  DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B,
-
-  // BC SUB_LONG_2ADDR vA, vB
-  DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B,
-
-  // BD MUL_LONG_2ADDR vA, vB
-  DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B,
-
-  // BE DIV_LONG_2ADDR vA, vB
-  DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B,
-
-  // BF REM_LONG_2ADDR vA, vB
-  DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B,
-
-  // C0 AND_LONG_2ADDR vA, vB
-  DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B,
-
-  // C1 OR_LONG_2ADDR vA, vB
-  DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B,
-
-  // C2 XOR_LONG_2ADDR vA, vB
-  DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B,
-
-  // C3 SHL_LONG_2ADDR vA, vB
-  DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // C4 SHR_LONG_2ADDR vA, vB
-  DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // C5 USHR_LONG_2ADDR vA, vB
-  DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // C6 ADD_FLOAT_2ADDR vA, vB
-  DF_DA | DF_UA | DF_UB | DF_FP_A | DF_FP_B,
-
-  // C7 SUB_FLOAT_2ADDR vA, vB
-  DF_DA | DF_UA | DF_UB | DF_FP_A | DF_FP_B,
-
-  // C8 MUL_FLOAT_2ADDR vA, vB
-  DF_DA | DF_UA | DF_UB | DF_FP_A | DF_FP_B,
-
-  // C9 DIV_FLOAT_2ADDR vA, vB
-  DF_DA | DF_UA | DF_UB | DF_FP_A | DF_FP_B,
-
-  // CA REM_FLOAT_2ADDR vA, vB
-  DF_DA | DF_UA | DF_UB | DF_FP_A | DF_FP_B,
-
-  // CB ADD_DOUBLE_2ADDR vA, vB
-  DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_FP_A | DF_FP_B,
-
-  // CC SUB_DOUBLE_2ADDR vA, vB
-  DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_FP_A | DF_FP_B,
-
-  // CD MUL_DOUBLE_2ADDR vA, vB
-  DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_FP_A | DF_FP_B,
-
-  // CE DIV_DOUBLE_2ADDR vA, vB
-  DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_FP_A | DF_FP_B,
-
-  // CF REM_DOUBLE_2ADDR vA, vB
-  DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_FP_A | DF_FP_B,
-
-  // D0 ADD_INT_LIT16 vA, vB, #+CCCC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // D1 RSUB_INT vA, vB, #+CCCC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // D2 MUL_INT_LIT16 vA, vB, #+CCCC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // D3 DIV_INT_LIT16 vA, vB, #+CCCC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // D4 REM_INT_LIT16 vA, vB, #+CCCC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // D5 AND_INT_LIT16 vA, vB, #+CCCC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // D6 OR_INT_LIT16 vA, vB, #+CCCC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // D7 XOR_INT_LIT16 vA, vB, #+CCCC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // D8 ADD_INT_LIT8 vAA, vBB, #+CC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // D9 RSUB_INT_LIT8 vAA, vBB, #+CC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // DA MUL_INT_LIT8 vAA, vBB, #+CC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // DB DIV_INT_LIT8 vAA, vBB, #+CC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // DC REM_INT_LIT8 vAA, vBB, #+CC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // DD AND_INT_LIT8 vAA, vBB, #+CC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // DE OR_INT_LIT8 vAA, vBB, #+CC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // DF XOR_INT_LIT8 vAA, vBB, #+CC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // E0 SHL_INT_LIT8 vAA, vBB, #+CC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // E1 SHR_INT_LIT8 vAA, vBB, #+CC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // E2 USHR_INT_LIT8 vAA, vBB, #+CC
-  DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
-
-  // E3 IGET_QUICK
-  DF_DA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // E4 IGET_WIDE_QUICK
-  DF_DA | DF_A_WIDE | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // E5 IGET_OBJECT_QUICK
-  DF_DA | DF_UB | DF_NULL_CHK_B | DF_REF_A | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // E6 IPUT_QUICK
-  DF_UA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // E7 IPUT_WIDE_QUICK
-  DF_UA | DF_A_WIDE | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // E8 IPUT_OBJECT_QUICK
-  DF_UA | DF_UB | DF_NULL_CHK_B | DF_REF_A | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // E9 INVOKE_VIRTUAL_QUICK
-  DF_FORMAT_35C | DF_NULL_CHK_OUT0 | DF_UMS,
-
-  // EA INVOKE_VIRTUAL_RANGE_QUICK
-  DF_FORMAT_3RC | DF_NULL_CHK_OUT0 | DF_UMS,
-
-  // EB IPUT_BOOLEAN_QUICK vA, vB, index
-  DF_UA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // EC IPUT_BYTE_QUICK vA, vB, index
-  DF_UA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // ED IPUT_CHAR_QUICK vA, vB, index
-  DF_UA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // EE IPUT_SHORT_QUICK vA, vB, index
-  DF_UA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // EF IGET_BOOLEAN_QUICK vA, vB, index
-  DF_DA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // F0 IGET_BYTE_QUICK vA, vB, index
-  DF_DA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // F1 IGET_CHAR_QUICK vA, vB, index
-  DF_DA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // F2 IGET_SHORT_QUICK vA, vB, index
-  DF_DA | DF_UB | DF_NULL_CHK_B | DF_REF_B | DF_IFIELD | DF_LVN,
-
-  // F3 UNUSED_F3
-  DF_NOP,
-
-  // F4 UNUSED_F4
-  DF_NOP,
-
-  // F5 UNUSED_F5
-  DF_NOP,
-
-  // F6 UNUSED_F6
-  DF_NOP,
-
-  // F7 UNUSED_F7
-  DF_NOP,
-
-  // F8 UNUSED_F8
-  DF_NOP,
-
-  // F9 UNUSED_F9
-  DF_NOP,
-
-  // FA UNUSED_FA
-  DF_NOP,
-
-  // FB UNUSED_FB
-  DF_NOP,
-
-  // FC UNUSED_FC
-  DF_NOP,
-
-  // FD UNUSED_FD
-  DF_NOP,
-
-  // FE UNUSED_FE
-  DF_NOP,
-
-  // FF UNUSED_FF
-  DF_NOP,
-
-  // Beginning of extended MIR opcodes
-  // 100 MIR_PHI
-  DF_DA | DF_NULL_TRANSFER_N,
-
-  // 101 MIR_COPY
-  DF_DA | DF_UB | DF_IS_MOVE,
-
-  // 102 MIR_FUSED_CMPL_FLOAT
-  DF_UA | DF_UB | DF_FP_A | DF_FP_B,
-
-  // 103 MIR_FUSED_CMPG_FLOAT
-  DF_UA | DF_UB | DF_FP_A | DF_FP_B,
-
-  // 104 MIR_FUSED_CMPL_DOUBLE
-  DF_UA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_FP_A | DF_FP_B,
-
-  // 105 MIR_FUSED_CMPG_DOUBLE
-  DF_UA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_FP_A | DF_FP_B,
-
-  // 106 MIR_FUSED_CMP_LONG
-  DF_UA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B,
-
-  // 107 MIR_NOP
-  DF_NOP,
-
-  // 108 MIR_NULL_CHECK
-  DF_UA | DF_REF_A | DF_NULL_CHK_A | DF_LVN,
-
-  // 109 MIR_RANGE_CHECK
-  0,
-
-  // 10A MIR_DIV_ZERO_CHECK
-  0,
-
-  // 10B MIR_CHECK
-  0,
-
-  // 10D MIR_SELECT
-  DF_DA | DF_UB,
-
-  // 10E MirOpConstVector
-  0,
-
-  // 10F MirOpMoveVector
-  0,
-
-  // 110 MirOpPackedMultiply
-  0,
-
-  // 111 MirOpPackedAddition
-  0,
-
-  // 112 MirOpPackedSubtract
-  0,
-
-  // 113 MirOpPackedShiftLeft
-  0,
-
-  // 114 MirOpPackedSignedShiftRight
-  0,
-
-  // 115 MirOpPackedUnsignedShiftRight
-  0,
-
-  // 116 MirOpPackedAnd
-  0,
-
-  // 117 MirOpPackedOr
-  0,
-
-  // 118 MirOpPackedXor
-  0,
-
-  // 119 MirOpPackedAddReduce
-  DF_FORMAT_EXTENDED,
-
-  // 11A MirOpPackedReduce
-  DF_FORMAT_EXTENDED,
-
-  // 11B MirOpPackedSet
-  DF_FORMAT_EXTENDED,
-
-  // 11C MirOpReserveVectorRegisters
-  0,
-
-  // 11D MirOpReturnVectorRegisters
-  0,
-
-  // 11E MirOpMemBarrier
-  0,
-
-  // 11F MirOpPackedArrayGet
-  DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_B | DF_CORE_C | DF_LVN,
-
-  // 120 MirOpPackedArrayPut
-  DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_B | DF_CORE_C | DF_LVN,
-
-  // 121 MirOpMaddInt
-  DF_FORMAT_EXTENDED,
-
-  // 122 MirOpMsubInt
-  DF_FORMAT_EXTENDED,
-
-  // 123 MirOpMaddLong
-  DF_FORMAT_EXTENDED,
-
-  // 124 MirOpMsubLong
-  DF_FORMAT_EXTENDED,
-};
-
-/* Any register that is used before being defined is considered live-in */
-void MIRGraph::HandleLiveInUse(ArenaBitVector* use_v, ArenaBitVector* def_v,
-                               ArenaBitVector* live_in_v, int dalvik_reg_id) {
-  use_v->SetBit(dalvik_reg_id);
-  if (!def_v->IsBitSet(dalvik_reg_id)) {
-    live_in_v->SetBit(dalvik_reg_id);
-  }
-}
-
-/* Mark a reg as being defined */
-void MIRGraph::HandleDef(ArenaBitVector* def_v, int dalvik_reg_id) {
-  def_v->SetBit(dalvik_reg_id);
-}
-
-void MIRGraph::HandleExtended(ArenaBitVector* use_v, ArenaBitVector* def_v,
-                              ArenaBitVector* live_in_v,
-                              const MIR::DecodedInstruction& d_insn) {
-  // For vector MIRs, vC contains type information
-  bool is_vector_type_wide = false;
-  int type_size = d_insn.vC >> 16;
-  if (type_size == k64 || type_size == kDouble) {
-    is_vector_type_wide = true;
-  }
-
-  switch (static_cast<int>(d_insn.opcode)) {
-    case kMirOpPackedAddReduce:
-      HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vA);
-      if (is_vector_type_wide == true) {
-        HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vA + 1);
-      }
-      HandleDef(def_v, d_insn.vA);
-      if (is_vector_type_wide == true) {
-        HandleDef(def_v, d_insn.vA + 1);
-      }
-      break;
-    case kMirOpPackedReduce:
-      HandleDef(def_v, d_insn.vA);
-      if (is_vector_type_wide == true) {
-        HandleDef(def_v, d_insn.vA + 1);
-      }
-      break;
-    case kMirOpPackedSet:
-      HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vB);
-      if (is_vector_type_wide == true) {
-        HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vB + 1);
-      }
-      break;
-    case kMirOpMaddInt:
-    case kMirOpMsubInt:
-      HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vB);
-      HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vC);
-      HandleLiveInUse(use_v, def_v, live_in_v, d_insn.arg[0]);
-      HandleDef(def_v, d_insn.vA);
-      break;
-    case kMirOpMaddLong:
-    case kMirOpMsubLong:
-      HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vB);
-      HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vB + 1);
-      HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vC);
-      HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vC + 1);
-      HandleLiveInUse(use_v, def_v, live_in_v, d_insn.arg[0]);
-      HandleLiveInUse(use_v, def_v, live_in_v, d_insn.arg[0] + 1);
-      HandleDef(def_v, d_insn.vA);
-      HandleDef(def_v, d_insn.vA + 1);
-      break;
-    default:
-      LOG(ERROR) << "Unexpected Extended Opcode " << d_insn.opcode;
-      break;
-  }
-}
-
-/*
- * Find out live-in variables for natural loops. Variables that are live-in in
- * the main loop body are considered to be defined in the entry block.
- */
-bool MIRGraph::FindLocalLiveIn(BasicBlock* bb) {
-  MIR* mir;
-  ArenaBitVector *use_v, *def_v, *live_in_v;
-
-  if (bb->data_flow_info == nullptr) return false;
-
-  use_v = bb->data_flow_info->use_v =
-      new (arena_) ArenaBitVector(arena_, GetNumOfCodeAndTempVRs(), false, kBitMapUse);
-  def_v = bb->data_flow_info->def_v =
-      new (arena_) ArenaBitVector(arena_, GetNumOfCodeAndTempVRs(), false, kBitMapDef);
-  live_in_v = bb->data_flow_info->live_in_v =
-      new (arena_) ArenaBitVector(arena_, GetNumOfCodeAndTempVRs(), false, kBitMapLiveIn);
-
-  for (mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-    uint64_t df_attributes = GetDataFlowAttributes(mir);
-    MIR::DecodedInstruction* d_insn = &mir->dalvikInsn;
-
-    if (df_attributes & DF_HAS_USES) {
-      if (df_attributes & DF_UA) {
-        HandleLiveInUse(use_v, def_v, live_in_v, d_insn->vA);
-        if (df_attributes & DF_A_WIDE) {
-          HandleLiveInUse(use_v, def_v, live_in_v, d_insn->vA+1);
-        }
-      }
-      if (df_attributes & DF_UB) {
-        HandleLiveInUse(use_v, def_v, live_in_v, d_insn->vB);
-        if (df_attributes & DF_B_WIDE) {
-          HandleLiveInUse(use_v, def_v, live_in_v, d_insn->vB+1);
-        }
-      }
-      if (df_attributes & DF_UC) {
-        HandleLiveInUse(use_v, def_v, live_in_v, d_insn->vC);
-        if (df_attributes & DF_C_WIDE) {
-          HandleLiveInUse(use_v, def_v, live_in_v, d_insn->vC+1);
-        }
-      }
-    }
-    if (df_attributes & DF_FORMAT_35C) {
-      for (unsigned int i = 0; i < d_insn->vA; i++) {
-        HandleLiveInUse(use_v, def_v, live_in_v, d_insn->arg[i]);
-      }
-    }
-    if (df_attributes & DF_FORMAT_3RC) {
-      for (unsigned int i = 0; i < d_insn->vA; i++) {
-        HandleLiveInUse(use_v, def_v, live_in_v, d_insn->vC+i);
-      }
-    }
-    if (df_attributes & DF_HAS_DEFS) {
-      HandleDef(def_v, d_insn->vA);
-      if (df_attributes & DF_A_WIDE) {
-        HandleDef(def_v, d_insn->vA+1);
-      }
-    }
-    if (df_attributes & DF_FORMAT_EXTENDED) {
-      HandleExtended(use_v, def_v, live_in_v, mir->dalvikInsn);
-    }
-  }
-  return true;
-}
-
-int MIRGraph::AddNewSReg(int v_reg) {
-  int subscript = ++ssa_last_defs_[v_reg];
-  uint32_t ssa_reg = GetNumSSARegs();
-  SetNumSSARegs(ssa_reg + 1);
-  ssa_base_vregs_.push_back(v_reg);
-  ssa_subscripts_.push_back(subscript);
-  DCHECK_EQ(ssa_base_vregs_.size(), ssa_subscripts_.size());
-  // If we are expanding very late, update use counts too.
-  if (ssa_reg > 0 && use_counts_.size() == ssa_reg) {
-    // Need to expand the counts.
-    use_counts_.push_back(0);
-    raw_use_counts_.push_back(0);
-  }
-  return ssa_reg;
-}
-
-/* Find out the latest SSA register for a given Dalvik register */
-void MIRGraph::HandleSSAUse(int* uses, int dalvik_reg, int reg_index) {
-  DCHECK((dalvik_reg >= 0) && (dalvik_reg < static_cast<int>(GetNumOfCodeAndTempVRs())));
-  uses[reg_index] = vreg_to_ssa_map_[dalvik_reg];
-}
-
-/* Setup a new SSA register for a given Dalvik register */
-void MIRGraph::HandleSSADef(int* defs, int dalvik_reg, int reg_index) {
-  DCHECK((dalvik_reg >= 0) && (dalvik_reg < static_cast<int>(GetNumOfCodeAndTempVRs())));
-  int ssa_reg = AddNewSReg(dalvik_reg);
-  vreg_to_ssa_map_[dalvik_reg] = ssa_reg;
-  defs[reg_index] = ssa_reg;
-}
-
-void MIRGraph::AllocateSSAUseData(MIR *mir, int num_uses) {
-  mir->ssa_rep->num_uses = num_uses;
-
-  if (mir->ssa_rep->num_uses_allocated < num_uses) {
-    mir->ssa_rep->uses = arena_->AllocArray<int32_t>(num_uses, kArenaAllocDFInfo);
-  }
-}
-
-void MIRGraph::AllocateSSADefData(MIR *mir, int num_defs) {
-  mir->ssa_rep->num_defs = num_defs;
-
-  if (mir->ssa_rep->num_defs_allocated < num_defs) {
-    mir->ssa_rep->defs = arena_->AllocArray<int32_t>(num_defs, kArenaAllocDFInfo);
-  }
-}
-
-/* Look up new SSA names for format_35c instructions */
-void MIRGraph::DataFlowSSAFormat35C(MIR* mir) {
-  MIR::DecodedInstruction* d_insn = &mir->dalvikInsn;
-  int num_uses = d_insn->vA;
-  int i;
-
-  AllocateSSAUseData(mir, num_uses);
-
-  for (i = 0; i < num_uses; i++) {
-    HandleSSAUse(mir->ssa_rep->uses, d_insn->arg[i], i);
-  }
-}
-
-/* Look up new SSA names for format_3rc instructions */
-void MIRGraph::DataFlowSSAFormat3RC(MIR* mir) {
-  MIR::DecodedInstruction* d_insn = &mir->dalvikInsn;
-  int num_uses = d_insn->vA;
-  int i;
-
-  AllocateSSAUseData(mir, num_uses);
-
-  for (i = 0; i < num_uses; i++) {
-    HandleSSAUse(mir->ssa_rep->uses, d_insn->vC+i, i);
-  }
-}
-
-void MIRGraph::DataFlowSSAFormatExtended(MIR* mir) {
-  const MIR::DecodedInstruction& d_insn = mir->dalvikInsn;
-  // For vector MIRs, vC contains type information
-  bool is_vector_type_wide = false;
-  int type_size = d_insn.vC >> 16;
-  if (type_size == k64 || type_size == kDouble) {
-    is_vector_type_wide = true;
-  }
-
-  switch (static_cast<int>(mir->dalvikInsn.opcode)) {
-    case kMirOpPackedAddReduce:
-      // We have one use, plus one more for wide
-      AllocateSSAUseData(mir, is_vector_type_wide ? 2 : 1);
-      HandleSSAUse(mir->ssa_rep->uses, d_insn.vA, 0);
-      if (is_vector_type_wide == true) {
-        HandleSSAUse(mir->ssa_rep->uses, d_insn.vA + 1, 1);
-      }
-
-      // We have a def, plus one more for wide
-      AllocateSSADefData(mir, is_vector_type_wide ? 2 : 1);
-      HandleSSADef(mir->ssa_rep->defs, d_insn.vA, 0);
-      if (is_vector_type_wide == true) {
-        HandleSSADef(mir->ssa_rep->defs, d_insn.vA + 1, 1);
-      }
-      break;
-    case kMirOpPackedReduce:
-      // We have a def, plus one more for wide
-      AllocateSSADefData(mir, is_vector_type_wide ? 2 : 1);
-      HandleSSADef(mir->ssa_rep->defs, d_insn.vA, 0);
-      if (is_vector_type_wide == true) {
-        HandleSSADef(mir->ssa_rep->defs, d_insn.vA + 1, 1);
-      }
-      break;
-    case kMirOpPackedSet:
-      // We have one use, plus one more for wide
-      AllocateSSAUseData(mir, is_vector_type_wide ? 2 : 1);
-      HandleSSAUse(mir->ssa_rep->uses, d_insn.vB, 0);
-      if (is_vector_type_wide == true) {
-        HandleSSAUse(mir->ssa_rep->uses, d_insn.vB + 1, 1);
-      }
-      break;
-    case kMirOpMaddInt:
-    case kMirOpMsubInt:
-      AllocateSSAUseData(mir, 3);
-      HandleSSAUse(mir->ssa_rep->uses, d_insn.vB, 0);
-      HandleSSAUse(mir->ssa_rep->uses, d_insn.vC, 1);
-      HandleSSAUse(mir->ssa_rep->uses, d_insn.arg[0], 2);
-      AllocateSSADefData(mir, 1);
-      HandleSSADef(mir->ssa_rep->defs, d_insn.vA, 0);
-      break;
-    case kMirOpMaddLong:
-    case kMirOpMsubLong:
-      AllocateSSAUseData(mir, 6);
-      HandleSSAUse(mir->ssa_rep->uses, d_insn.vB, 0);
-      HandleSSAUse(mir->ssa_rep->uses, d_insn.vB + 1, 1);
-      HandleSSAUse(mir->ssa_rep->uses, d_insn.vC, 2);
-      HandleSSAUse(mir->ssa_rep->uses, d_insn.vC + 1, 3);
-      HandleSSAUse(mir->ssa_rep->uses, d_insn.arg[0], 4);
-      HandleSSAUse(mir->ssa_rep->uses, d_insn.arg[0] + 1, 5);
-      AllocateSSADefData(mir, 2);
-      HandleSSADef(mir->ssa_rep->defs, d_insn.vA, 0);
-      HandleSSADef(mir->ssa_rep->defs, d_insn.vA + 1, 1);
-      break;
-    default:
-      LOG(ERROR) << "Missing case for extended MIR: " << mir->dalvikInsn.opcode;
-      break;
-  }
-}
-
-/* Entry function to convert a block into SSA representation */
-bool MIRGraph::DoSSAConversion(BasicBlock* bb) {
-  if (bb->data_flow_info == nullptr) return false;
-
-  /*
-   * Pruned SSA form: Insert phi nodes for each dalvik register marked in phi_node_blocks
-   * only if the dalvik register is in the live-in set.
-   */
-  BasicBlockId bb_id = bb->id;
-  for (int dalvik_reg = GetNumOfCodeAndTempVRs() - 1; dalvik_reg >= 0; dalvik_reg--) {
-    if (temp_.ssa.phi_node_blocks[dalvik_reg]->IsBitSet(bb_id)) {
-      if (!bb->data_flow_info->live_in_v->IsBitSet(dalvik_reg)) {
-        /* Variable will be clobbered before being used - no need for phi */
-        vreg_to_ssa_map_[dalvik_reg] = INVALID_SREG;
-        continue;
-      }
-      MIR *phi = NewMIR();
-      phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpPhi);
-      phi->dalvikInsn.vA = dalvik_reg;
-      phi->offset = bb->start_offset;
-      phi->m_unit_index = 0;  // Arbitrarily assign all Phi nodes to outermost method.
-      bb->PrependMIR(phi);
-    }
-  }
-
-  for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-    mir->ssa_rep =
-        static_cast<struct SSARepresentation *>(arena_->Alloc(sizeof(SSARepresentation),
-                                                              kArenaAllocDFInfo));
-    memset(mir->ssa_rep, 0, sizeof(*mir->ssa_rep));
-
-    uint64_t df_attributes = GetDataFlowAttributes(mir);
-
-      // If not a pseudo-op, note non-leaf or can throw
-    if (!MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) {
-      int flags = mir->dalvikInsn.FlagsOf();
-
-      if ((flags & Instruction::kInvoke) != 0) {
-        attributes_ &= ~METHOD_IS_LEAF;
-      }
-    }
-
-    int num_uses = 0;
-
-    if (df_attributes & DF_FORMAT_35C) {
-      DataFlowSSAFormat35C(mir);
-      continue;
-    }
-
-    if (df_attributes & DF_FORMAT_3RC) {
-      DataFlowSSAFormat3RC(mir);
-      continue;
-    }
-
-    if (df_attributes & DF_FORMAT_EXTENDED) {
-      DataFlowSSAFormatExtended(mir);
-      continue;
-    }
-
-    if (df_attributes & DF_HAS_USES) {
-      if (df_attributes & DF_UA) {
-        num_uses++;
-        if (df_attributes & DF_A_WIDE) {
-          num_uses++;
-        }
-      }
-      if (df_attributes & DF_UB) {
-        num_uses++;
-        if (df_attributes & DF_B_WIDE) {
-          num_uses++;
-        }
-      }
-      if (df_attributes & DF_UC) {
-        num_uses++;
-        if (df_attributes & DF_C_WIDE) {
-          num_uses++;
-        }
-      }
-    }
-
-    AllocateSSAUseData(mir, num_uses);
-
-    int num_defs = 0;
-
-    if (df_attributes & DF_HAS_DEFS) {
-      num_defs++;
-      if (df_attributes & DF_A_WIDE) {
-        num_defs++;
-      }
-    }
-
-    AllocateSSADefData(mir, num_defs);
-
-    MIR::DecodedInstruction* d_insn = &mir->dalvikInsn;
-
-    if (df_attributes & DF_HAS_USES) {
-      num_uses = 0;
-      if (df_attributes & DF_UA) {
-        HandleSSAUse(mir->ssa_rep->uses, d_insn->vA, num_uses++);
-        if (df_attributes & DF_A_WIDE) {
-          HandleSSAUse(mir->ssa_rep->uses, d_insn->vA+1, num_uses++);
-        }
-      }
-      if (df_attributes & DF_UB) {
-        HandleSSAUse(mir->ssa_rep->uses, d_insn->vB, num_uses++);
-        if (df_attributes & DF_B_WIDE) {
-          HandleSSAUse(mir->ssa_rep->uses, d_insn->vB+1, num_uses++);
-        }
-      }
-      if (df_attributes & DF_UC) {
-        HandleSSAUse(mir->ssa_rep->uses, d_insn->vC, num_uses++);
-        if (df_attributes & DF_C_WIDE) {
-          HandleSSAUse(mir->ssa_rep->uses, d_insn->vC+1, num_uses++);
-        }
-      }
-    }
-    if (df_attributes & DF_HAS_DEFS) {
-      HandleSSADef(mir->ssa_rep->defs, d_insn->vA, 0);
-      if (df_attributes & DF_A_WIDE) {
-        HandleSSADef(mir->ssa_rep->defs, d_insn->vA+1, 1);
-      }
-    }
-  }
-
-  /*
-   * Take a snapshot of Dalvik->SSA mapping at the end of each block. The
-   * input to PHI nodes can be derived from the snapshot of all
-   * predecessor blocks.
-   */
-  bb->data_flow_info->vreg_to_ssa_map_exit =
-      arena_->AllocArray<int32_t>(GetNumOfCodeAndTempVRs(), kArenaAllocDFInfo);
-
-  memcpy(bb->data_flow_info->vreg_to_ssa_map_exit, vreg_to_ssa_map_,
-         sizeof(int) * GetNumOfCodeAndTempVRs());
-  return true;
-}
-
-void MIRGraph::InitializeBasicBlockDataFlow() {
-  /*
-   * Allocate the BasicBlockDataFlow structure for the entry and code blocks.
-   */
-  for (BasicBlock* bb : block_list_) {
-    if (bb->hidden == true) continue;
-    if (bb->block_type == kDalvikByteCode ||
-        bb->block_type == kEntryBlock ||
-        bb->block_type == kExitBlock) {
-      bb->data_flow_info =
-          static_cast<BasicBlockDataFlow*>(arena_->Alloc(sizeof(BasicBlockDataFlow),
-                                                         kArenaAllocDFInfo));
-      }
-  }
-}
-
-/* Setup the basic data structures for SSA conversion */
-void MIRGraph::CompilerInitializeSSAConversion() {
-  size_t num_reg = GetNumOfCodeAndTempVRs();
-
-  ssa_base_vregs_.clear();
-  ssa_base_vregs_.reserve(num_reg + GetDefCount() + 128);
-  ssa_subscripts_.clear();
-  ssa_subscripts_.reserve(num_reg + GetDefCount() + 128);
-
-  /*
-   * Initial number of SSA registers is equal to the number of Dalvik
-   * registers.
-   */
-  SetNumSSARegs(num_reg);
-
-  /*
-   * Initialize the SSA2Dalvik map list. For the first num_reg elements,
-   * the subscript is 0 so we use the ENCODE_REG_SUB macro to encode the value
-   * into "(0 << 16) | i"
-   */
-  for (unsigned int i = 0; i < num_reg; i++) {
-    ssa_base_vregs_.push_back(i);
-    ssa_subscripts_.push_back(0);
-  }
-
-  /*
-   * Initialize the DalvikToSSAMap map. There is one entry for each
-   * Dalvik register, and the SSA names for those are the same.
-   */
-  vreg_to_ssa_map_ = arena_->AllocArray<int32_t>(num_reg, kArenaAllocDFInfo);
-  /* Keep track of the higest def for each dalvik reg */
-  ssa_last_defs_ = arena_->AllocArray<int>(num_reg, kArenaAllocDFInfo);
-
-  for (unsigned int i = 0; i < num_reg; i++) {
-    vreg_to_ssa_map_[i] = i;
-    ssa_last_defs_[i] = 0;
-  }
-
-  // Create a compiler temporary for Method*. This is done after SSA initialization.
-  CompilerTemp* method_temp = GetNewCompilerTemp(kCompilerTempSpecialMethodPtr, false);
-  // The MIR graph keeps track of the sreg for method pointer specially, so record that now.
-  method_sreg_ = method_temp->s_reg_low;
-
-  InitializeBasicBlockDataFlow();
-}
-
-uint32_t MIRGraph::GetUseCountWeight(BasicBlock* bb) const {
-  // Each level of nesting adds *100 to count, up to 3 levels deep.
-  uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth));
-  uint32_t weight = std::max(1U, depth * 100);
-  return weight;
-}
-
-/*
- * Count uses, weighting by loop nesting depth.  This code only
- * counts explicitly used s_regs.  A later phase will add implicit
- * counts for things such as Method*, null-checked references, etc.
- */
-void MIRGraph::CountUses(BasicBlock* bb) {
-  if (bb->block_type != kDalvikByteCode) {
-    return;
-  }
-  uint32_t weight = GetUseCountWeight(bb);
-  for (MIR* mir = bb->first_mir_insn; (mir != nullptr); mir = mir->next) {
-    if (mir->ssa_rep == nullptr) {
-      continue;
-    }
-    for (int i = 0; i < mir->ssa_rep->num_uses; i++) {
-      int s_reg = mir->ssa_rep->uses[i];
-      raw_use_counts_[s_reg] += 1u;
-      use_counts_[s_reg] += weight;
-    }
-  }
-}
-
-/* Verify if all the successor is connected with all the claimed predecessors */
-bool MIRGraph::VerifyPredInfo(BasicBlock* bb) {
-  for (BasicBlockId pred_id : bb->predecessors) {
-    BasicBlock* pred_bb = GetBasicBlock(pred_id);
-    DCHECK(pred_bb != nullptr);
-    bool found = false;
-    if (pred_bb->taken == bb->id) {
-        found = true;
-    } else if (pred_bb->fall_through == bb->id) {
-        found = true;
-    } else if (pred_bb->successor_block_list_type != kNotUsed) {
-      for (SuccessorBlockInfo* successor_block_info : pred_bb->successor_blocks) {
-        BasicBlockId succ_bb = successor_block_info->block;
-        if (succ_bb == bb->id) {
-            found = true;
-            break;
-        }
-      }
-    }
-    if (found == false) {
-      char block_name1[BLOCK_NAME_LEN], block_name2[BLOCK_NAME_LEN];
-      GetBlockName(bb, block_name1);
-      GetBlockName(pred_bb, block_name2);
-      DumpCFG("/sdcard/cfg/", false);
-      LOG(FATAL) << "Successor " << block_name1 << " not found from "
-                 << block_name2;
-    }
-  }
-  return true;
-}
-
-void MIRGraph::VerifyDataflow() {
-    /* Verify if all blocks are connected as claimed */
-  AllNodesIterator iter(this);
-  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-    VerifyPredInfo(bb);
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/mir_field_info.cc b/compiler/dex/mir_field_info.cc
deleted file mode 100644
index 13bbc3e..0000000
--- a/compiler/dex/mir_field_info.cc
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "mir_field_info.h"
-
-#include <string.h>
-
-#include "base/logging.h"
-#include "dex/verified_method.h"
-#include "driver/compiler_driver.h"
-#include "driver/compiler_driver-inl.h"
-#include "mirror/class_loader.h"  // Only to allow casts in Handle<ClassLoader>.
-#include "mirror/dex_cache.h"     // Only to allow casts in Handle<DexCache>.
-#include "scoped_thread_state_change.h"
-#include "handle_scope-inl.h"
-
-namespace art {
-
-void MirIFieldLoweringInfo::Resolve(const ScopedObjectAccess& soa,
-                                    CompilerDriver* compiler_driver,
-                                    const DexCompilationUnit* mUnit,
-                                    MirIFieldLoweringInfo* field_infos, size_t count) {
-  if (kIsDebugBuild) {
-    DCHECK(field_infos != nullptr);
-    DCHECK_NE(count, 0u);
-    for (auto it = field_infos, end = field_infos + count; it != end; ++it) {
-      MirIFieldLoweringInfo unresolved(it->field_idx_, it->MemAccessType(), it->IsQuickened());
-      unresolved.field_offset_ = it->field_offset_;
-      unresolved.CheckEquals(*it);
-    }
-  }
-
-  // We're going to resolve fields and check access in a tight loop. It's better to hold
-  // the lock and needed references once than re-acquiring them again and again.
-  StackHandleScope<3> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(compiler_driver->GetDexCache(mUnit)));
-  Handle<mirror::ClassLoader> class_loader(
-      hs.NewHandle(compiler_driver->GetClassLoader(soa, mUnit)));
-  Handle<mirror::Class> referrer_class(hs.NewHandle(
-      compiler_driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, mUnit)));
-  const VerifiedMethod* const verified_method = mUnit->GetVerifiedMethod();
-  // Even if the referrer class is unresolved (i.e. we're compiling a method without class
-  // definition) we still want to resolve fields and record all available info.
-  for (auto it = field_infos, end = field_infos + count; it != end; ++it) {
-    uint32_t field_idx;
-    ArtField* resolved_field;
-    if (!it->IsQuickened()) {
-      field_idx = it->field_idx_;
-      resolved_field = compiler_driver->ResolveField(soa, dex_cache, class_loader, mUnit,
-                                                     field_idx, false);
-    } else {
-      const auto mir_offset = it->field_idx_;
-      // For quickened instructions, it->field_offset_ actually contains the mir offset.
-      // We need to use the de-quickening info to get dex file / field idx
-      auto* field_idx_ptr = verified_method->GetDequickenIndex(mir_offset);
-      CHECK(field_idx_ptr != nullptr);
-      field_idx = field_idx_ptr->index;
-      StackHandleScope<1> hs2(soa.Self());
-      auto h_dex_cache = hs2.NewHandle(compiler_driver->FindDexCache(field_idx_ptr->dex_file));
-      resolved_field = compiler_driver->ResolveFieldWithDexFile(
-          soa, h_dex_cache, class_loader, field_idx_ptr->dex_file, field_idx, false);
-      // Since we don't have a valid field index we can't go slow path later.
-      CHECK(resolved_field != nullptr);
-    }
-    if (UNLIKELY(resolved_field == nullptr)) {
-      continue;
-    }
-    compiler_driver->GetResolvedFieldDexFileLocation(resolved_field,
-        &it->declaring_dex_file_, &it->declaring_class_idx_, &it->declaring_field_idx_);
-    bool is_volatile = compiler_driver->IsFieldVolatile(resolved_field);
-    it->field_offset_ = compiler_driver->GetFieldOffset(resolved_field);
-    std::pair<bool, bool> fast_path = compiler_driver->IsFastInstanceField(
-        dex_cache.Get(), referrer_class.Get(), resolved_field, field_idx);
-    it->flags_ = 0u |  // Without kFlagIsStatic.
-        (it->flags_ & (kMemAccessTypeMask << kBitMemAccessTypeBegin)) |
-        (is_volatile ? kFlagIsVolatile : 0u) |
-        (fast_path.first ? kFlagFastGet : 0u) |
-        (fast_path.second ? kFlagFastPut : 0u);
-  }
-}
-
-void MirSFieldLoweringInfo::Resolve(CompilerDriver* compiler_driver,
-                                    const DexCompilationUnit* mUnit,
-                                    MirSFieldLoweringInfo* field_infos, size_t count) {
-  if (kIsDebugBuild) {
-    DCHECK(field_infos != nullptr);
-    DCHECK_NE(count, 0u);
-    for (auto it = field_infos, end = field_infos + count; it != end; ++it) {
-      MirSFieldLoweringInfo unresolved(it->field_idx_, it->MemAccessType());
-      // In 64-bit builds, there's padding after storage_index_, don't include it in memcmp.
-      size_t size = OFFSETOF_MEMBER(MirSFieldLoweringInfo, storage_index_) +
-          sizeof(it->storage_index_);
-      DCHECK_EQ(memcmp(&unresolved, &*it, size), 0);
-    }
-  }
-
-  // We're going to resolve fields and check access in a tight loop. It's better to hold
-  // the lock and needed references once than re-acquiring them again and again.
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<3> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(compiler_driver->GetDexCache(mUnit)));
-  Handle<mirror::ClassLoader> class_loader(
-      hs.NewHandle(compiler_driver->GetClassLoader(soa, mUnit)));
-  Handle<mirror::Class> referrer_class_handle(hs.NewHandle(
-      compiler_driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, mUnit)));
-  // Even if the referrer class is unresolved (i.e. we're compiling a method without class
-  // definition) we still want to resolve fields and record all available info.
-
-  for (auto it = field_infos, end = field_infos + count; it != end; ++it) {
-    uint32_t field_idx = it->field_idx_;
-    ArtField* resolved_field =
-        compiler_driver->ResolveField(soa, dex_cache, class_loader, mUnit, field_idx, true);
-    if (UNLIKELY(resolved_field == nullptr)) {
-      continue;
-    }
-    compiler_driver->GetResolvedFieldDexFileLocation(resolved_field,
-        &it->declaring_dex_file_, &it->declaring_class_idx_, &it->declaring_field_idx_);
-    bool is_volatile = compiler_driver->IsFieldVolatile(resolved_field) ? 1u : 0u;
-
-    mirror::Class* referrer_class = referrer_class_handle.Get();
-    std::pair<bool, bool> fast_path = compiler_driver->IsFastStaticField(
-        dex_cache.Get(), referrer_class, resolved_field, field_idx, &it->storage_index_);
-    uint16_t flags = kFlagIsStatic |
-        (it->flags_ & (kMemAccessTypeMask << kBitMemAccessTypeBegin)) |
-        (is_volatile ? kFlagIsVolatile : 0u) |
-        (fast_path.first ? kFlagFastGet : 0u) |
-        (fast_path.second ? kFlagFastPut : 0u);
-    if (fast_path.first) {
-      it->field_offset_ = compiler_driver->GetFieldOffset(resolved_field);
-      bool is_referrers_class =
-          compiler_driver->IsStaticFieldInReferrerClass(referrer_class, resolved_field);
-      bool is_class_initialized =
-          compiler_driver->IsStaticFieldsClassInitialized(referrer_class, resolved_field);
-      bool is_class_in_dex_cache = !is_referrers_class &&  // If referrer's class, we don't care.
-          compiler_driver->CanAssumeTypeIsPresentInDexCache(*dex_cache->GetDexFile(),
-                                                            it->storage_index_);
-      flags |= (is_referrers_class ? kFlagIsReferrersClass : 0u) |
-          (is_class_initialized ? kFlagClassIsInitialized : 0u) |
-          (is_class_in_dex_cache ? kFlagClassIsInDexCache : 0u);
-    }
-    it->flags_ = flags;
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/mir_field_info.h b/compiler/dex/mir_field_info.h
deleted file mode 100644
index b6dc27d..0000000
--- a/compiler/dex/mir_field_info.h
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_MIR_FIELD_INFO_H_
-#define ART_COMPILER_DEX_MIR_FIELD_INFO_H_
-
-#include "base/macros.h"
-#include "dex_file.h"
-#include "dex_instruction_utils.h"
-#include "offsets.h"
-
-namespace art {
-
-class CompilerDriver;
-class DexCompilationUnit;
-class ScopedObjectAccess;
-
-/*
- * Field info is calculated from the perspective of the compilation unit that accesses
- * the field and stored in that unit's MIRGraph. Therefore it does not need to reference the
- * dex file or method for which it has been calculated. However, we do store the declaring
- * field index, class index and dex file of the resolved field to help distinguish between fields.
- */
-
-class MirFieldInfo {
- public:
-  uint16_t FieldIndex() const {
-    return field_idx_;
-  }
-  void SetFieldIndex(uint16_t field_idx) {
-    field_idx_ = field_idx;
-  }
-
-  bool IsStatic() const {
-    return (flags_ & kFlagIsStatic) != 0u;
-  }
-
-  bool IsResolved() const {
-    return declaring_dex_file_ != nullptr;
-  }
-
-  const DexFile* DeclaringDexFile() const {
-    return declaring_dex_file_;
-  }
-  void SetDeclaringDexFile(const DexFile* dex_file) {
-    declaring_dex_file_ = dex_file;
-  }
-
-  uint16_t DeclaringClassIndex() const {
-    return declaring_class_idx_;
-  }
-
-  uint16_t DeclaringFieldIndex() const {
-    return declaring_field_idx_;
-  }
-
-  bool IsVolatile() const {
-    return (flags_ & kFlagIsVolatile) != 0u;
-  }
-
-  // IGET_QUICK, IGET_BYTE_QUICK, ...
-  bool IsQuickened() const {
-    return (flags_ & kFlagIsQuickened) != 0u;
-  }
-
-  DexMemAccessType MemAccessType() const {
-    return static_cast<DexMemAccessType>((flags_ >> kBitMemAccessTypeBegin) & kMemAccessTypeMask);
-  }
-
-  void CheckEquals(const MirFieldInfo& other) const {
-    CHECK_EQ(field_idx_, other.field_idx_);
-    CHECK_EQ(flags_, other.flags_);
-    CHECK_EQ(declaring_field_idx_, other.declaring_field_idx_);
-    CHECK_EQ(declaring_class_idx_, other.declaring_class_idx_);
-    CHECK_EQ(declaring_dex_file_, other.declaring_dex_file_);
-  }
-
- protected:
-  enum {
-    kBitIsStatic = 0,
-    kBitIsVolatile,
-    kBitIsQuickened,
-    kBitMemAccessTypeBegin,
-    kBitMemAccessTypeEnd = kBitMemAccessTypeBegin + 3,  // 3 bits for raw type.
-    kFieldInfoBitEnd = kBitMemAccessTypeEnd
-  };
-  static constexpr uint16_t kFlagIsVolatile = 1u << kBitIsVolatile;
-  static constexpr uint16_t kFlagIsStatic = 1u << kBitIsStatic;
-  static constexpr uint16_t kFlagIsQuickened = 1u << kBitIsQuickened;
-  static constexpr uint16_t kMemAccessTypeMask = 7u;
-  static_assert((1u << (kBitMemAccessTypeEnd - kBitMemAccessTypeBegin)) - 1u == kMemAccessTypeMask,
-                "Invalid raw type mask");
-
-  MirFieldInfo(uint16_t field_idx, uint16_t flags, DexMemAccessType type)
-      : field_idx_(field_idx),
-        flags_(flags | static_cast<uint16_t>(type) << kBitMemAccessTypeBegin),
-        declaring_field_idx_(0u),
-        declaring_class_idx_(0u),
-        declaring_dex_file_(nullptr) {
-  }
-
-  // Make copy-ctor/assign/dtor protected to avoid slicing.
-  MirFieldInfo(const MirFieldInfo& other) = default;
-  MirFieldInfo& operator=(const MirFieldInfo& other) = default;
-  ~MirFieldInfo() = default;
-
-  // The field index in the compiling method's dex file.
-  uint16_t field_idx_;
-  // Flags, for volatility and derived class data.
-  uint16_t flags_;
-  // The field index in the dex file that defines field, 0 if unresolved.
-  uint16_t declaring_field_idx_;
-  // The type index of the class declaring the field, 0 if unresolved.
-  uint16_t declaring_class_idx_;
-  // The dex file that defines the class containing the field and the field, null if unresolved.
-  const DexFile* declaring_dex_file_;
-};
-
-class MirIFieldLoweringInfo : public MirFieldInfo {
- public:
-  // For each requested instance field retrieve the field's declaring location (dex file, class
-  // index and field index) and volatility and compute whether we can fast path the access
-  // with IGET/IPUT. For fast path fields, retrieve the field offset.
-  static void Resolve(const ScopedObjectAccess& soa,
-                      CompilerDriver* compiler_driver,
-                      const DexCompilationUnit* mUnit,
-                      MirIFieldLoweringInfo* field_infos,
-                      size_t count)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Construct an unresolved instance field lowering info.
-  MirIFieldLoweringInfo(uint16_t field_idx, DexMemAccessType type, bool is_quickened)
-      : MirFieldInfo(field_idx,
-                     kFlagIsVolatile | (is_quickened ? kFlagIsQuickened : 0u),
-                     type),  // Without kFlagIsStatic.
-        field_offset_(0u) {
-  }
-
-  bool FastGet() const {
-    return (flags_ & kFlagFastGet) != 0u;
-  }
-
-  bool FastPut() const {
-    return (flags_ & kFlagFastPut) != 0u;
-  }
-
-  MemberOffset FieldOffset() const {
-    return field_offset_;
-  }
-
-  void CheckEquals(const MirIFieldLoweringInfo& other) const {
-    MirFieldInfo::CheckEquals(other);
-    CHECK_EQ(field_offset_.Uint32Value(), other.field_offset_.Uint32Value());
-  }
-
- private:
-  enum {
-    kBitFastGet = kFieldInfoBitEnd,
-    kBitFastPut,
-    kIFieldLoweringInfoBitEnd
-  };
-  static_assert(kIFieldLoweringInfoBitEnd <= 16, "Too many flags");
-  static constexpr uint16_t kFlagFastGet = 1u << kBitFastGet;
-  static constexpr uint16_t kFlagFastPut = 1u << kBitFastPut;
-
-  // The member offset of the field, 0u if unresolved.
-  MemberOffset field_offset_;
-
-  friend class NullCheckEliminationTest;
-  friend class GlobalValueNumberingTest;
-  friend class GvnDeadCodeEliminationTest;
-  friend class LocalValueNumberingTest;
-  friend class TypeInferenceTest;
-};
-
-class MirSFieldLoweringInfo : public MirFieldInfo {
- public:
-  // For each requested static field retrieve the field's declaring location (dex file, class
-  // index and field index) and volatility and compute whether we can fast path the access with
-  // IGET/IPUT. For fast path fields (at least for IGET), retrieve the information needed for
-  // the field access, i.e. the field offset, whether the field is in the same class as the
-  // method being compiled, whether the declaring class can be safely assumed to be initialized
-  // and the type index of the declaring class in the compiled method's dex file.
-  static void Resolve(CompilerDriver* compiler_driver, const DexCompilationUnit* mUnit,
-                      MirSFieldLoweringInfo* field_infos, size_t count)
-      REQUIRES(!Locks::mutator_lock_);
-
-  // Construct an unresolved static field lowering info.
-  MirSFieldLoweringInfo(uint16_t field_idx, DexMemAccessType type)
-      : MirFieldInfo(field_idx, kFlagIsVolatile | kFlagIsStatic, type),
-        field_offset_(0u),
-        storage_index_(DexFile::kDexNoIndex) {
-  }
-
-  bool FastGet() const {
-    return (flags_ & kFlagFastGet) != 0u;
-  }
-
-  bool FastPut() const {
-    return (flags_ & kFlagFastPut) != 0u;
-  }
-
-  bool IsReferrersClass() const {
-    return (flags_ & kFlagIsReferrersClass) != 0u;
-  }
-
-  bool IsClassInitialized() const {
-    return (flags_ & kFlagClassIsInitialized) != 0u;
-  }
-
-  bool IsClassInDexCache() const {
-    return (flags_ & kFlagClassIsInDexCache) != 0u;
-  }
-
-  MemberOffset FieldOffset() const {
-    return field_offset_;
-  }
-
-  uint32_t StorageIndex() const {
-    return storage_index_;
-  }
-
- private:
-  enum {
-    kBitFastGet = kFieldInfoBitEnd,
-    kBitFastPut,
-    kBitIsReferrersClass,
-    kBitClassIsInitialized,
-    kBitClassIsInDexCache,
-    kSFieldLoweringInfoBitEnd
-  };
-  static_assert(kSFieldLoweringInfoBitEnd <= 16, "Too many flags");
-  static constexpr uint16_t kFlagFastGet = 1u << kBitFastGet;
-  static constexpr uint16_t kFlagFastPut = 1u << kBitFastPut;
-  static constexpr uint16_t kFlagIsReferrersClass = 1u << kBitIsReferrersClass;
-  static constexpr uint16_t kFlagClassIsInitialized = 1u << kBitClassIsInitialized;
-  static constexpr uint16_t kFlagClassIsInDexCache = 1u << kBitClassIsInDexCache;
-
-  // The member offset of the field, 0u if unresolved.
-  MemberOffset field_offset_;
-  // The type index of the declaring class in the compiling method's dex file,
-  // -1 if the field is unresolved or there's no appropriate TypeId in that dex file.
-  uint32_t storage_index_;
-
-  friend class ClassInitCheckEliminationTest;
-  friend class GlobalValueNumberingTest;
-  friend class GvnDeadCodeEliminationTest;
-  friend class LocalValueNumberingTest;
-  friend class TypeInferenceTest;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_MIR_FIELD_INFO_H_
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
deleted file mode 100644
index b0972d9..0000000
--- a/compiler/dex/mir_graph.cc
+++ /dev/null
@@ -1,2589 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "mir_graph.h"
-
-#include <inttypes.h>
-#include <queue>
-#include <unistd.h>
-
-#include "base/bit_vector-inl.h"
-#include "base/logging.h"
-#include "base/stl_util.h"
-#include "base/stringprintf.h"
-#include "base/scoped_arena_containers.h"
-#include "compiler_ir.h"
-#include "dex_file-inl.h"
-#include "dex_flags.h"
-#include "dex_instruction-inl.h"
-#include "driver/compiler_driver.h"
-#include "driver/dex_compilation_unit.h"
-#include "dex/quick/quick_compiler.h"
-#include "leb128.h"
-#include "pass_driver_me_post_opt.h"
-#include "stack.h"
-#include "utils.h"
-
-namespace art {
-
-#define MAX_PATTERN_LEN 5
-
-const char* MIRGraph::extended_mir_op_names_[kMirOpLast - kMirOpFirst] = {
-  "Phi",
-  "Copy",
-  "FusedCmplFloat",
-  "FusedCmpgFloat",
-  "FusedCmplDouble",
-  "FusedCmpgDouble",
-  "FusedCmpLong",
-  "Nop",
-  "OpNullCheck",
-  "OpRangeCheck",
-  "OpDivZeroCheck",
-  "Check",
-  "Select",
-  "ConstVector",
-  "MoveVector",
-  "PackedMultiply",
-  "PackedAddition",
-  "PackedSubtract",
-  "PackedShiftLeft",
-  "PackedSignedShiftRight",
-  "PackedUnsignedShiftRight",
-  "PackedAnd",
-  "PackedOr",
-  "PackedXor",
-  "PackedAddReduce",
-  "PackedReduce",
-  "PackedSet",
-  "ReserveVectorRegisters",
-  "ReturnVectorRegisters",
-  "MemBarrier",
-  "PackedArrayGet",
-  "PackedArrayPut",
-  "MaddInt",
-  "MsubInt",
-  "MaddLong",
-  "MsubLong",
-};
-
-MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena)
-    : reg_location_(nullptr),
-      block_id_map_(std::less<unsigned int>(), arena->Adapter()),
-      cu_(cu),
-      ssa_base_vregs_(arena->Adapter(kArenaAllocSSAToDalvikMap)),
-      ssa_subscripts_(arena->Adapter(kArenaAllocSSAToDalvikMap)),
-      vreg_to_ssa_map_(nullptr),
-      ssa_last_defs_(nullptr),
-      is_constant_v_(nullptr),
-      constant_values_(nullptr),
-      use_counts_(arena->Adapter()),
-      raw_use_counts_(arena->Adapter()),
-      num_reachable_blocks_(0),
-      max_num_reachable_blocks_(0),
-      dfs_orders_up_to_date_(false),
-      domination_up_to_date_(false),
-      mir_ssa_rep_up_to_date_(false),
-      topological_order_up_to_date_(false),
-      dfs_order_(arena->Adapter(kArenaAllocDfsPreOrder)),
-      dfs_post_order_(arena->Adapter(kArenaAllocDfsPostOrder)),
-      dom_post_order_traversal_(arena->Adapter(kArenaAllocDomPostOrder)),
-      topological_order_(arena->Adapter(kArenaAllocTopologicalSortOrder)),
-      topological_order_loop_ends_(arena->Adapter(kArenaAllocTopologicalSortOrder)),
-      topological_order_indexes_(arena->Adapter(kArenaAllocTopologicalSortOrder)),
-      topological_order_loop_head_stack_(arena->Adapter(kArenaAllocTopologicalSortOrder)),
-      max_nested_loops_(0u),
-      i_dom_list_(nullptr),
-      temp_scoped_alloc_(),
-      block_list_(arena->Adapter(kArenaAllocBBList)),
-      try_block_addr_(nullptr),
-      entry_block_(nullptr),
-      exit_block_(nullptr),
-      current_code_item_(nullptr),
-      m_units_(arena->Adapter()),
-      method_stack_(arena->Adapter()),
-      current_method_(kInvalidEntry),
-      current_offset_(kInvalidEntry),
-      def_count_(0),
-      opcode_count_(nullptr),
-      num_ssa_regs_(0),
-      extended_basic_blocks_(arena->Adapter()),
-      method_sreg_(0),
-      attributes_(METHOD_IS_LEAF),  // Start with leaf assumption, change on encountering invoke.
-      checkstats_(nullptr),
-      arena_(arena),
-      backward_branches_(0),
-      forward_branches_(0),
-      num_non_special_compiler_temps_(0),
-      max_available_special_compiler_temps_(1),  // We only need the method ptr as a special temp for now.
-      requested_backend_temp_(false),
-      compiler_temps_committed_(false),
-      punt_to_interpreter_(false),
-      merged_df_flags_(0u),
-      ifield_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)),
-      sfield_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)),
-      method_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)),
-      suspend_checks_in_loops_(nullptr) {
-  memset(&temp_, 0, sizeof(temp_));
-  use_counts_.reserve(256);
-  raw_use_counts_.reserve(256);
-  block_list_.reserve(100);
-  try_block_addr_ = new (arena_) ArenaBitVector(arena_, 0, true /* expandable */);
-
-
-  if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-    // X86 requires a temp to keep track of the method address.
-    // TODO For x86_64, addressing can be done with RIP. When that is implemented,
-    // this needs to be updated to reserve 0 temps for BE.
-    max_available_non_special_compiler_temps_ = cu_->target64 ? 2 : 1;
-    reserved_temps_for_backend_ = max_available_non_special_compiler_temps_;
-  } else {
-    // Other architectures do not have a known lower bound for non-special temps.
-    // We allow the update of the max to happen at BE initialization stage and simply set 0 for now.
-    max_available_non_special_compiler_temps_ = 0;
-    reserved_temps_for_backend_ = 0;
-  }
-}
-
-MIRGraph::~MIRGraph() {
-  STLDeleteElements(&block_list_);
-  STLDeleteElements(&m_units_);
-}
-
-/*
- * Parse an instruction, return the length of the instruction
- */
-int MIRGraph::ParseInsn(const uint16_t* code_ptr, MIR::DecodedInstruction* decoded_instruction) {
-  const Instruction* inst = Instruction::At(code_ptr);
-  decoded_instruction->opcode = inst->Opcode();
-  decoded_instruction->vA = inst->HasVRegA() ? inst->VRegA() : 0;
-  decoded_instruction->vB = inst->HasVRegB() ? inst->VRegB() : 0;
-  decoded_instruction->vB_wide = inst->HasWideVRegB() ? inst->WideVRegB() : 0;
-  decoded_instruction->vC = inst->HasVRegC() ?  inst->VRegC() : 0;
-  if (inst->HasVarArgs35c()) {
-    inst->GetVarArgs(decoded_instruction->arg);
-  }
-  return inst->SizeInCodeUnits();
-}
-
-
-/* Split an existing block from the specified code offset into two */
-BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset,
-                                 BasicBlock* orig_block, BasicBlock** immed_pred_block_p) {
-  DCHECK_GT(code_offset, orig_block->start_offset);
-  MIR* insn = orig_block->first_mir_insn;
-  MIR* prev = nullptr;  // Will be set to instruction before split.
-  while (insn) {
-    if (insn->offset == code_offset) break;
-    prev = insn;
-    insn = insn->next;
-  }
-  if (insn == nullptr) {
-    LOG(FATAL) << "Break split failed";
-  }
-  // Now insn is at the instruction where we want to split, namely
-  // insn will be the first instruction of the "bottom" block.
-  // Similarly, prev will be the last instruction of the "top" block
-
-  BasicBlock* bottom_block = CreateNewBB(kDalvikByteCode);
-
-  bottom_block->start_offset = code_offset;
-  bottom_block->first_mir_insn = insn;
-  bottom_block->last_mir_insn = orig_block->last_mir_insn;
-
-  /* If this block was terminated by a return, conditional branch or throw,
-   * the flag needs to go with the bottom block
-   */
-  bottom_block->terminated_by_return = orig_block->terminated_by_return;
-  orig_block->terminated_by_return = false;
-
-  bottom_block->conditional_branch = orig_block->conditional_branch;
-  orig_block->conditional_branch = false;
-
-  bottom_block->explicit_throw = orig_block->explicit_throw;
-  orig_block->explicit_throw = false;
-
-  /* Handle the taken path */
-  bottom_block->taken = orig_block->taken;
-  if (bottom_block->taken != NullBasicBlockId) {
-    orig_block->taken = NullBasicBlockId;
-    BasicBlock* bb_taken = GetBasicBlock(bottom_block->taken);
-    bb_taken->ErasePredecessor(orig_block->id);
-    bb_taken->predecessors.push_back(bottom_block->id);
-  }
-
-  /* Handle the fallthrough path */
-  bottom_block->fall_through = orig_block->fall_through;
-  orig_block->fall_through = bottom_block->id;
-  bottom_block->predecessors.push_back(orig_block->id);
-  if (bottom_block->fall_through != NullBasicBlockId) {
-    BasicBlock* bb_fall_through = GetBasicBlock(bottom_block->fall_through);
-    bb_fall_through->ErasePredecessor(orig_block->id);
-    bb_fall_through->predecessors.push_back(bottom_block->id);
-  }
-
-  /* Handle the successor list */
-  if (orig_block->successor_block_list_type != kNotUsed) {
-    bottom_block->successor_block_list_type = orig_block->successor_block_list_type;
-    bottom_block->successor_blocks.swap(orig_block->successor_blocks);
-    orig_block->successor_block_list_type = kNotUsed;
-    DCHECK(orig_block->successor_blocks.empty());  // Empty after the swap() above.
-    for (SuccessorBlockInfo* successor_block_info : bottom_block->successor_blocks) {
-      BasicBlock* bb = GetBasicBlock(successor_block_info->block);
-      if (bb != nullptr) {
-        bb->ErasePredecessor(orig_block->id);
-        bb->predecessors.push_back(bottom_block->id);
-      }
-    }
-  }
-
-  orig_block->last_mir_insn = prev;
-  prev->next = nullptr;
-
-  /*
-   * Update the immediate predecessor block pointer so that outgoing edges
-   * can be applied to the proper block.
-   */
-  if (immed_pred_block_p) {
-    DCHECK_EQ(*immed_pred_block_p, orig_block);
-    *immed_pred_block_p = bottom_block;
-  }
-
-  // Associate dex instructions in the bottom block with the new container.
-  DCHECK(insn != nullptr);
-  DCHECK(insn != orig_block->first_mir_insn);
-  DCHECK(insn == bottom_block->first_mir_insn);
-  DCHECK_EQ(insn->offset, bottom_block->start_offset);
-  // Scan the "bottom" instructions, remapping them to the
-  // newly created "bottom" block.
-  MIR* p = insn;
-  p->bb = bottom_block->id;
-  while (p != bottom_block->last_mir_insn) {
-    p = p->next;
-    DCHECK(p != nullptr);
-    p->bb = bottom_block->id;
-  }
-
-  return bottom_block;
-}
-
-/*
- * Given a code offset, find out the block that starts with it. If the offset
- * is in the middle of an existing block, split it into two.  If immed_pred_block_p
- * is not non-null and is the block being split, update *immed_pred_block_p to
- * point to the bottom block so that outgoing edges can be set up properly
- * (by the caller)
- * Utilizes a map for fast lookup of the typical cases.
- */
-BasicBlock* MIRGraph::FindBlock(DexOffset code_offset, bool create,
-                                BasicBlock** immed_pred_block_p,
-                                ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
-  if (UNLIKELY(code_offset >= current_code_item_->insns_size_in_code_units_)) {
-    // There can be a fall-through out of the method code. We shall record such a block
-    // here (assuming create==true) and check that it's dead at the end of InlineMethod().
-    // Though we're only aware of the cases where code_offset is exactly the same as
-    // insns_size_in_code_units_, treat greater code_offset the same just in case.
-    code_offset = current_code_item_->insns_size_in_code_units_;
-  }
-
-  int block_id = (*dex_pc_to_block_map)[code_offset];
-  BasicBlock* bb = GetBasicBlock(block_id);
-
-  if ((bb != nullptr) && (bb->start_offset == code_offset)) {
-    // Does this containing block start with the desired instruction?
-    return bb;
-  }
-
-  // No direct hit.
-  if (!create) {
-    return nullptr;
-  }
-
-  if (bb != nullptr) {
-    // The target exists somewhere in an existing block.
-    BasicBlock* bottom_block = SplitBlock(code_offset, bb, bb == *immed_pred_block_p ?  immed_pred_block_p : nullptr);
-    DCHECK(bottom_block != nullptr);
-    MIR* p = bottom_block->first_mir_insn;
-    BasicBlock* orig_block = bb;
-    DCHECK_EQ((*dex_pc_to_block_map)[p->offset], orig_block->id);
-    // Scan the "bottom" instructions, remapping them to the
-    // newly created "bottom" block.
-    (*dex_pc_to_block_map)[p->offset] = bottom_block->id;
-    while (p != bottom_block->last_mir_insn) {
-      p = p->next;
-      DCHECK(p != nullptr);
-      int opcode = p->dalvikInsn.opcode;
-      /*
-       * Some messiness here to ensure that we only enter real opcodes and only the
-       * first half of a potentially throwing instruction that has been split into
-       * CHECK and work portions. Since the 2nd half of a split operation is always
-       * the first in a BasicBlock, we can't hit it here.
-       */
-      if ((opcode == kMirOpCheck) || !MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
-        BasicBlockId mapped_id = (*dex_pc_to_block_map)[p->offset];
-        // At first glance the instructions should all be mapped to orig_block.
-        // However, multiple instructions may correspond to the same dex, hence an earlier
-        // instruction may have already moved the mapping for dex to bottom_block.
-        DCHECK((mapped_id == orig_block->id) || (mapped_id == bottom_block->id));
-        (*dex_pc_to_block_map)[p->offset] = bottom_block->id;
-      }
-    }
-    return bottom_block;
-  }
-
-  // Create a new block.
-  bb = CreateNewBB(kDalvikByteCode);
-  bb->start_offset = code_offset;
-  (*dex_pc_to_block_map)[bb->start_offset] = bb->id;
-  return bb;
-}
-
-
-/* Identify code range in try blocks and set up the empty catch blocks */
-void MIRGraph::ProcessTryCatchBlocks(ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
-  int tries_size = current_code_item_->tries_size_;
-  DexOffset offset;
-
-  if (tries_size == 0) {
-    return;
-  }
-
-  for (int i = 0; i < tries_size; i++) {
-    const DexFile::TryItem* pTry =
-        DexFile::GetTryItems(*current_code_item_, i);
-    DexOffset start_offset = pTry->start_addr_;
-    DexOffset end_offset = start_offset + pTry->insn_count_;
-    for (offset = start_offset; offset < end_offset; offset++) {
-      try_block_addr_->SetBit(offset);
-    }
-  }
-
-  // Iterate over each of the handlers to enqueue the empty Catch blocks.
-  const uint8_t* handlers_ptr = DexFile::GetCatchHandlerData(*current_code_item_, 0);
-  uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr);
-  for (uint32_t idx = 0; idx < handlers_size; idx++) {
-    CatchHandlerIterator iterator(handlers_ptr);
-    for (; iterator.HasNext(); iterator.Next()) {
-      uint32_t address = iterator.GetHandlerAddress();
-      FindBlock(address, true /*create*/, /* immed_pred_block_p */ nullptr, dex_pc_to_block_map);
-    }
-    handlers_ptr = iterator.EndDataPointer();
-  }
-}
-
-bool MIRGraph::IsBadMonitorExitCatch(NarrowDexOffset monitor_exit_offset,
-                                     NarrowDexOffset catch_offset) {
-  // Catches for monitor-exit during stack unwinding have the pattern
-  //   move-exception (move)* (goto)? monitor-exit throw
-  // In the currently generated dex bytecode we see these catching a bytecode range including
-  // either its own or an identical monitor-exit, http://b/15745363 . This function checks if
-  // it's the case for a given monitor-exit and catch block so that we can ignore it.
-  // (We don't want to ignore all monitor-exit catches since one could enclose a synchronized
-  // block in a try-block and catch the NPE, Error or Throwable and we should let it through;
-  // even though a throwing monitor-exit certainly indicates a bytecode error.)
-  const Instruction* monitor_exit = Instruction::At(current_code_item_->insns_ + monitor_exit_offset);
-  DCHECK(monitor_exit->Opcode() == Instruction::MONITOR_EXIT);
-  int monitor_reg = monitor_exit->VRegA_11x();
-  const Instruction* check_insn = Instruction::At(current_code_item_->insns_ + catch_offset);
-  if (check_insn->Opcode() == Instruction::MOVE_EXCEPTION) {
-    if (check_insn->VRegA_11x() == monitor_reg) {
-      // Unexpected move-exception to the same register. Probably not the pattern we're looking for.
-      return false;
-    }
-    check_insn = check_insn->Next();
-  }
-  while (true) {
-    int dest = -1;
-    bool wide = false;
-    switch (check_insn->Opcode()) {
-      case Instruction::MOVE_WIDE:
-        wide = true;
-        FALLTHROUGH_INTENDED;
-      case Instruction::MOVE_OBJECT:
-      case Instruction::MOVE:
-        dest = check_insn->VRegA_12x();
-        break;
-
-      case Instruction::MOVE_WIDE_FROM16:
-        wide = true;
-        FALLTHROUGH_INTENDED;
-      case Instruction::MOVE_OBJECT_FROM16:
-      case Instruction::MOVE_FROM16:
-        dest = check_insn->VRegA_22x();
-        break;
-
-      case Instruction::MOVE_WIDE_16:
-        wide = true;
-        FALLTHROUGH_INTENDED;
-      case Instruction::MOVE_OBJECT_16:
-      case Instruction::MOVE_16:
-        dest = check_insn->VRegA_32x();
-        break;
-
-      case Instruction::GOTO:
-      case Instruction::GOTO_16:
-      case Instruction::GOTO_32:
-        check_insn = check_insn->RelativeAt(check_insn->GetTargetOffset());
-        FALLTHROUGH_INTENDED;
-      default:
-        return check_insn->Opcode() == Instruction::MONITOR_EXIT &&
-            check_insn->VRegA_11x() == monitor_reg;
-    }
-
-    if (dest == monitor_reg || (wide && dest + 1 == monitor_reg)) {
-      return false;
-    }
-
-    check_insn = check_insn->Next();
-  }
-}
-
-/* Process instructions with the kBranch flag */
-BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset,
-                                       int width, int flags, const uint16_t* code_ptr,
-                                       const uint16_t* code_end,
-                                       ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
-  DexOffset target = cur_offset;
-  switch (insn->dalvikInsn.opcode) {
-    case Instruction::GOTO:
-    case Instruction::GOTO_16:
-    case Instruction::GOTO_32:
-      target += insn->dalvikInsn.vA;
-      break;
-    case Instruction::IF_EQ:
-    case Instruction::IF_NE:
-    case Instruction::IF_LT:
-    case Instruction::IF_GE:
-    case Instruction::IF_GT:
-    case Instruction::IF_LE:
-      cur_block->conditional_branch = true;
-      target += insn->dalvikInsn.vC;
-      break;
-    case Instruction::IF_EQZ:
-    case Instruction::IF_NEZ:
-    case Instruction::IF_LTZ:
-    case Instruction::IF_GEZ:
-    case Instruction::IF_GTZ:
-    case Instruction::IF_LEZ:
-      cur_block->conditional_branch = true;
-      target += insn->dalvikInsn.vB;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected opcode(" << insn->dalvikInsn.opcode << ") with kBranch set";
-  }
-  CountBranch(target);
-  BasicBlock* taken_block = FindBlock(target, /* create */ true,
-                                      /* immed_pred_block_p */ &cur_block,
-                                      dex_pc_to_block_map);
-  DCHECK(taken_block != nullptr);
-  cur_block->taken = taken_block->id;
-  taken_block->predecessors.push_back(cur_block->id);
-
-  /* Always terminate the current block for conditional branches */
-  if (flags & Instruction::kContinue) {
-    BasicBlock* fallthrough_block = FindBlock(cur_offset +  width,
-                                             /* create */
-                                             true,
-                                             /* immed_pred_block_p */
-                                             &cur_block,
-                                             dex_pc_to_block_map);
-    DCHECK(fallthrough_block != nullptr);
-    cur_block->fall_through = fallthrough_block->id;
-    fallthrough_block->predecessors.push_back(cur_block->id);
-  } else if (code_ptr < code_end) {
-    FindBlock(cur_offset + width, /* create */ true, /* immed_pred_block_p */ nullptr, dex_pc_to_block_map);
-  }
-  return cur_block;
-}
-
-/* Process instructions with the kSwitch flag */
-BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset,
-                                       int width, int flags ATTRIBUTE_UNUSED,
-                                       ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
-  const uint16_t* switch_data =
-      reinterpret_cast<const uint16_t*>(GetCurrentInsns() + cur_offset +
-          static_cast<int32_t>(insn->dalvikInsn.vB));
-  int size;
-  const int* keyTable;
-  const int* target_table;
-  int i;
-  int first_key;
-
-  /*
-   * Packed switch data format:
-   *  ushort ident = 0x0100   magic value
-   *  ushort size             number of entries in the table
-   *  int first_key           first (and lowest) switch case value
-   *  int targets[size]       branch targets, relative to switch opcode
-   *
-   * Total size is (4+size*2) 16-bit code units.
-   */
-  if (insn->dalvikInsn.opcode == Instruction::PACKED_SWITCH) {
-    DCHECK_EQ(static_cast<int>(switch_data[0]),
-              static_cast<int>(Instruction::kPackedSwitchSignature));
-    size = switch_data[1];
-    first_key = switch_data[2] | (switch_data[3] << 16);
-    target_table = reinterpret_cast<const int*>(&switch_data[4]);
-    keyTable = nullptr;        // Make the compiler happy.
-  /*
-   * Sparse switch data format:
-   *  ushort ident = 0x0200   magic value
-   *  ushort size             number of entries in the table; > 0
-   *  int keys[size]          keys, sorted low-to-high; 32-bit aligned
-   *  int targets[size]       branch targets, relative to switch opcode
-   *
-   * Total size is (2+size*4) 16-bit code units.
-   */
-  } else {
-    DCHECK_EQ(static_cast<int>(switch_data[0]),
-              static_cast<int>(Instruction::kSparseSwitchSignature));
-    size = switch_data[1];
-    keyTable = reinterpret_cast<const int*>(&switch_data[2]);
-    target_table = reinterpret_cast<const int*>(&switch_data[2 + size*2]);
-    first_key = 0;   // To make the compiler happy.
-  }
-
-  if (cur_block->successor_block_list_type != kNotUsed) {
-    LOG(FATAL) << "Successor block list already in use: "
-               << static_cast<int>(cur_block->successor_block_list_type);
-  }
-  cur_block->successor_block_list_type =
-      (insn->dalvikInsn.opcode == Instruction::PACKED_SWITCH) ?  kPackedSwitch : kSparseSwitch;
-  cur_block->successor_blocks.reserve(size);
-
-  for (i = 0; i < size; i++) {
-    BasicBlock* case_block = FindBlock(cur_offset + target_table[i],  /* create */ true,
-                                       /* immed_pred_block_p */ &cur_block,
-                                       dex_pc_to_block_map);
-    DCHECK(case_block != nullptr);
-    SuccessorBlockInfo* successor_block_info =
-        static_cast<SuccessorBlockInfo*>(arena_->Alloc(sizeof(SuccessorBlockInfo),
-                                                       kArenaAllocSuccessors));
-    successor_block_info->block = case_block->id;
-    successor_block_info->key =
-        (insn->dalvikInsn.opcode == Instruction::PACKED_SWITCH) ?
-        first_key + i : keyTable[i];
-    cur_block->successor_blocks.push_back(successor_block_info);
-    case_block->predecessors.push_back(cur_block->id);
-  }
-
-  /* Fall-through case */
-  BasicBlock* fallthrough_block = FindBlock(cur_offset +  width, /* create */ true,
-                                            /* immed_pred_block_p */ nullptr,
-                                            dex_pc_to_block_map);
-  DCHECK(fallthrough_block != nullptr);
-  cur_block->fall_through = fallthrough_block->id;
-  fallthrough_block->predecessors.push_back(cur_block->id);
-  return cur_block;
-}
-
-/* Process instructions with the kThrow flag */
-BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block,
-                                      MIR* insn,
-                                      DexOffset cur_offset,
-                                      int width,
-                                      int flags ATTRIBUTE_UNUSED,
-                                      ArenaBitVector* try_block_addr,
-                                      const uint16_t* code_ptr,
-                                      const uint16_t* code_end,
-                                      ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
-  bool in_try_block = try_block_addr->IsBitSet(cur_offset);
-  bool is_throw = (insn->dalvikInsn.opcode == Instruction::THROW);
-
-  /* In try block */
-  if (in_try_block) {
-    CatchHandlerIterator iterator(*current_code_item_, cur_offset);
-
-    if (cur_block->successor_block_list_type != kNotUsed) {
-      LOG(INFO) << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-      LOG(FATAL) << "Successor block list already in use: "
-                 << static_cast<int>(cur_block->successor_block_list_type);
-    }
-
-    for (; iterator.HasNext(); iterator.Next()) {
-      BasicBlock* catch_block = FindBlock(iterator.GetHandlerAddress(), false /* create */,
-                                          nullptr /* immed_pred_block_p */,
-                                          dex_pc_to_block_map);
-      if (insn->dalvikInsn.opcode == Instruction::MONITOR_EXIT &&
-          IsBadMonitorExitCatch(insn->offset, catch_block->start_offset)) {
-        // Don't allow monitor-exit to catch its own exception, http://b/15745363 .
-        continue;
-      }
-      if (cur_block->successor_block_list_type == kNotUsed) {
-        cur_block->successor_block_list_type = kCatch;
-      }
-      catch_block->catch_entry = true;
-      if (kIsDebugBuild) {
-        catches_.insert(catch_block->start_offset);
-      }
-      SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
-          (arena_->Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessors));
-      successor_block_info->block = catch_block->id;
-      successor_block_info->key = iterator.GetHandlerTypeIndex();
-      cur_block->successor_blocks.push_back(successor_block_info);
-      catch_block->predecessors.push_back(cur_block->id);
-    }
-    in_try_block = (cur_block->successor_block_list_type != kNotUsed);
-  }
-  bool build_all_edges =
-      (cu_->disable_opt & (1 << kSuppressExceptionEdges)) || is_throw || in_try_block;
-  if (!in_try_block && build_all_edges) {
-    BasicBlock* eh_block = CreateNewBB(kExceptionHandling);
-    cur_block->taken = eh_block->id;
-    eh_block->start_offset = cur_offset;
-    eh_block->predecessors.push_back(cur_block->id);
-  }
-
-  if (is_throw) {
-    cur_block->explicit_throw = true;
-    if (code_ptr < code_end) {
-      // Force creation of new block following THROW via side-effect.
-      FindBlock(cur_offset + width, /* create */ true, /* immed_pred_block_p */ nullptr, dex_pc_to_block_map);
-    }
-    if (!in_try_block) {
-       // Don't split a THROW that can't rethrow - we're done.
-      return cur_block;
-    }
-  }
-
-  if (!build_all_edges) {
-    /*
-     * Even though there is an exception edge here, control cannot return to this
-     * method.  Thus, for the purposes of dataflow analysis and optimization, we can
-     * ignore the edge.  Doing this reduces compile time, and increases the scope
-     * of the basic-block level optimization pass.
-     */
-    return cur_block;
-  }
-
-  /*
-   * Split the potentially-throwing instruction into two parts.
-   * The first half will be a pseudo-op that captures the exception
-   * edges and terminates the basic block.  It always falls through.
-   * Then, create a new basic block that begins with the throwing instruction
-   * (minus exceptions).  Note: this new basic block must NOT be entered into
-   * the block_map.  If the potentially-throwing instruction is the target of a
-   * future branch, we need to find the check psuedo half.  The new
-   * basic block containing the work portion of the instruction should
-   * only be entered via fallthrough from the block containing the
-   * pseudo exception edge MIR.  Note also that this new block is
-   * not automatically terminated after the work portion, and may
-   * contain following instructions.
-   *
-   * Note also that the dex_pc_to_block_map entry for the potentially
-   * throwing instruction will refer to the original basic block.
-   */
-  BasicBlock* new_block = CreateNewBB(kDalvikByteCode);
-  new_block->start_offset = insn->offset;
-  cur_block->fall_through = new_block->id;
-  new_block->predecessors.push_back(cur_block->id);
-  MIR* new_insn = NewMIR();
-  *new_insn = *insn;
-  insn->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpCheck);
-  // Associate the two halves.
-  insn->meta.throw_insn = new_insn;
-  new_block->AppendMIR(new_insn);
-  return new_block;
-}
-
-/* Parse a Dex method and insert it into the MIRGraph at the current insert point. */
-void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_flags,
-                           InvokeType invoke_type ATTRIBUTE_UNUSED, uint16_t class_def_idx,
-                           uint32_t method_idx, jobject class_loader, const DexFile& dex_file,
-                           Handle<mirror::DexCache> dex_cache) {
-  current_code_item_ = code_item;
-  method_stack_.push_back(std::make_pair(current_method_, current_offset_));
-  current_method_ = m_units_.size();
-  current_offset_ = 0;
-  // TODO: will need to snapshot stack image and use that as the mir context identification.
-  m_units_.push_back(new (arena_) DexCompilationUnit(
-      cu_, class_loader, Runtime::Current()->GetClassLinker(), dex_file, current_code_item_,
-      class_def_idx, method_idx, access_flags,
-      cu_->compiler_driver->GetVerifiedMethod(&dex_file, method_idx), dex_cache));
-  const uint16_t* code_ptr = current_code_item_->insns_;
-  const uint16_t* code_end =
-      current_code_item_->insns_ + current_code_item_->insns_size_in_code_units_;
-
-  // TODO: need to rework expansion of block list & try_block_addr when inlining activated.
-  // TUNING: use better estimate of basic blocks for following resize.
-  block_list_.reserve(block_list_.size() + current_code_item_->insns_size_in_code_units_);
-  // FindBlock lookup cache.
-  ScopedArenaAllocator allocator(&cu_->arena_stack);
-  ScopedArenaVector<uint16_t> dex_pc_to_block_map(allocator.Adapter());
-  dex_pc_to_block_map.resize(current_code_item_->insns_size_in_code_units_ +
-                             1 /* Fall-through on last insn; dead or punt to interpreter. */);
-
-  // TODO: replace with explicit resize routine.  Using automatic extension side effect for now.
-  try_block_addr_->SetBit(current_code_item_->insns_size_in_code_units_);
-  try_block_addr_->ClearBit(current_code_item_->insns_size_in_code_units_);
-
-  // If this is the first method, set up default entry and exit blocks.
-  if (current_method_ == 0) {
-    DCHECK(entry_block_ == nullptr);
-    DCHECK(exit_block_ == nullptr);
-    DCHECK_EQ(GetNumBlocks(), 0U);
-    // Use id 0 to represent a null block.
-    BasicBlock* null_block = CreateNewBB(kNullBlock);
-    DCHECK_EQ(null_block->id, NullBasicBlockId);
-    null_block->hidden = true;
-    entry_block_ = CreateNewBB(kEntryBlock);
-    exit_block_ = CreateNewBB(kExitBlock);
-  } else {
-    UNIMPLEMENTED(FATAL) << "Nested inlining not implemented.";
-    /*
-     * Will need to manage storage for ins & outs, push prevous state and update
-     * insert point.
-     */
-  }
-
-  /* Current block to record parsed instructions */
-  BasicBlock* cur_block = CreateNewBB(kDalvikByteCode);
-  DCHECK_EQ(current_offset_, 0U);
-  cur_block->start_offset = current_offset_;
-  // TODO: for inlining support, insert at the insert point rather than entry block.
-  entry_block_->fall_through = cur_block->id;
-  cur_block->predecessors.push_back(entry_block_->id);
-
-  /* Identify code range in try blocks and set up the empty catch blocks */
-  ProcessTryCatchBlocks(&dex_pc_to_block_map);
-
-  uint64_t merged_df_flags = 0u;
-
-  /* Parse all instructions and put them into containing basic blocks */
-  while (code_ptr < code_end) {
-    MIR *insn = NewMIR();
-    insn->offset = current_offset_;
-    insn->m_unit_index = current_method_;
-    int width = ParseInsn(code_ptr, &insn->dalvikInsn);
-    Instruction::Code opcode = insn->dalvikInsn.opcode;
-    if (opcode_count_ != nullptr) {
-      opcode_count_[static_cast<int>(opcode)]++;
-    }
-
-    int flags = insn->dalvikInsn.FlagsOf();
-    int verify_flags = Instruction::VerifyFlagsOf(insn->dalvikInsn.opcode);
-
-    uint64_t df_flags = GetDataFlowAttributes(insn);
-    merged_df_flags |= df_flags;
-
-    if (df_flags & DF_HAS_DEFS) {
-      def_count_ += (df_flags & DF_A_WIDE) ? 2 : 1;
-    }
-
-    if (df_flags & DF_LVN) {
-      cur_block->use_lvn = true;  // Run local value numbering on this basic block.
-    }
-
-    // Check for inline data block signatures.
-    if (opcode == Instruction::NOP) {
-      // A simple NOP will have a width of 1 at this point, embedded data NOP > 1.
-      if ((width == 1) && ((current_offset_ & 0x1) == 0x1) && ((code_end - code_ptr) > 1)) {
-        // Could be an aligning nop.  If an embedded data NOP follows, treat pair as single unit.
-        uint16_t following_raw_instruction = code_ptr[1];
-        if ((following_raw_instruction == Instruction::kSparseSwitchSignature) ||
-            (following_raw_instruction == Instruction::kPackedSwitchSignature) ||
-            (following_raw_instruction == Instruction::kArrayDataSignature)) {
-          width += Instruction::At(code_ptr + 1)->SizeInCodeUnits();
-        }
-      }
-      if (width == 1) {
-        // It is a simple nop - treat normally.
-        cur_block->AppendMIR(insn);
-      } else {
-        DCHECK(cur_block->fall_through == NullBasicBlockId);
-        DCHECK(cur_block->taken == NullBasicBlockId);
-        // Unreachable instruction, mark for no continuation and end basic block.
-        flags &= ~Instruction::kContinue;
-        FindBlock(current_offset_ + width, /* create */ true,
-                  /* immed_pred_block_p */ nullptr, &dex_pc_to_block_map);
-      }
-    } else {
-      cur_block->AppendMIR(insn);
-    }
-
-    // Associate the starting dex_pc for this opcode with its containing basic block.
-    dex_pc_to_block_map[insn->offset] = cur_block->id;
-
-    code_ptr += width;
-
-    if (flags & Instruction::kBranch) {
-      cur_block = ProcessCanBranch(cur_block, insn, current_offset_,
-                                   width, flags, code_ptr, code_end, &dex_pc_to_block_map);
-    } else if (flags & Instruction::kReturn) {
-      cur_block->terminated_by_return = true;
-      cur_block->fall_through = exit_block_->id;
-      exit_block_->predecessors.push_back(cur_block->id);
-      /*
-       * Terminate the current block if there are instructions
-       * afterwards.
-       */
-      if (code_ptr < code_end) {
-        /*
-         * Create a fallthrough block for real instructions
-         * (incl. NOP).
-         */
-         FindBlock(current_offset_ + width, /* create */ true,
-                   /* immed_pred_block_p */ nullptr, &dex_pc_to_block_map);
-      }
-    } else if (flags & Instruction::kThrow) {
-      cur_block = ProcessCanThrow(cur_block, insn, current_offset_, width, flags, try_block_addr_,
-                                  code_ptr, code_end, &dex_pc_to_block_map);
-    } else if (flags & Instruction::kSwitch) {
-      cur_block = ProcessCanSwitch(cur_block, insn, current_offset_, width,
-                                   flags, &dex_pc_to_block_map);
-    }
-    if (verify_flags & Instruction::kVerifyVarArgRange ||
-        verify_flags & Instruction::kVerifyVarArgRangeNonZero) {
-      /*
-       * The Quick backend's runtime model includes a gap between a method's
-       * argument ("in") vregs and the rest of its vregs.  Handling a range instruction
-       * which spans the gap is somewhat complicated, and should not happen
-       * in normal usage of dx.  Punt to the interpreter.
-       */
-      int first_reg_in_range = insn->dalvikInsn.vC;
-      int last_reg_in_range = first_reg_in_range + insn->dalvikInsn.vA - 1;
-      if (IsInVReg(first_reg_in_range) != IsInVReg(last_reg_in_range)) {
-        punt_to_interpreter_ = true;
-      }
-    }
-    current_offset_ += width;
-    BasicBlock* next_block = FindBlock(current_offset_, /* create */ false,
-                                       /* immed_pred_block_p */ nullptr,
-                                       &dex_pc_to_block_map);
-    if (next_block) {
-      /*
-       * The next instruction could be the target of a previously parsed
-       * forward branch so a block is already created. If the current
-       * instruction is not an unconditional branch, connect them through
-       * the fall-through link.
-       */
-      DCHECK(cur_block->fall_through == NullBasicBlockId ||
-             GetBasicBlock(cur_block->fall_through) == next_block ||
-             GetBasicBlock(cur_block->fall_through) == exit_block_);
-
-      if ((cur_block->fall_through == NullBasicBlockId) && (flags & Instruction::kContinue)) {
-        cur_block->fall_through = next_block->id;
-        next_block->predecessors.push_back(cur_block->id);
-      }
-      cur_block = next_block;
-    }
-  }
-  merged_df_flags_ = merged_df_flags;
-
-  if (cu_->enable_debug & (1 << kDebugDumpCFG)) {
-    DumpCFG("/sdcard/1_post_parse_cfg/", true);
-  }
-
-  if (cu_->verbose) {
-    DumpMIRGraph();
-  }
-
-  // Check if there's been a fall-through out of the method code.
-  BasicBlockId out_bb_id = dex_pc_to_block_map[current_code_item_->insns_size_in_code_units_];
-  if (UNLIKELY(out_bb_id != NullBasicBlockId)) {
-    // Eagerly calculate DFS order to determine if the block is dead.
-    DCHECK(!DfsOrdersUpToDate());
-    ComputeDFSOrders();
-    BasicBlock* out_bb = GetBasicBlock(out_bb_id);
-    DCHECK(out_bb != nullptr);
-    if (out_bb->block_type != kDead) {
-      LOG(WARNING) << "Live fall-through out of method in " << PrettyMethod(method_idx, dex_file);
-      SetPuntToInterpreter(true);
-    }
-  }
-}
-
-void MIRGraph::ShowOpcodeStats() {
-  DCHECK(opcode_count_ != nullptr);
-  LOG(INFO) << "Opcode Count";
-  for (int i = 0; i < kNumPackedOpcodes; i++) {
-    if (opcode_count_[i] != 0) {
-      LOG(INFO) << "-C- " << Instruction::Name(static_cast<Instruction::Code>(i))
-                << " " << opcode_count_[i];
-    }
-  }
-}
-
-uint64_t MIRGraph::GetDataFlowAttributes(Instruction::Code opcode) {
-  DCHECK_LT((size_t) opcode, (sizeof(oat_data_flow_attributes_) / sizeof(oat_data_flow_attributes_[0])));
-  return oat_data_flow_attributes_[opcode];
-}
-
-uint64_t MIRGraph::GetDataFlowAttributes(MIR* mir) {
-  DCHECK(mir != nullptr);
-  Instruction::Code opcode = mir->dalvikInsn.opcode;
-  return GetDataFlowAttributes(opcode);
-}
-
-// The path can easily surpass FS limits because of parameters etc. Use pathconf to get FS
-// restrictions here. Note that a successful invocation will return an actual value. If the path
-// is too long for some reason, the return will be ENAMETOOLONG. Then cut off part of the name.
-//
-// It's possible the path is not valid, or some other errors appear. In that case return false.
-static bool CreateDumpFile(std::string& fname, const char* dir_prefix, NarrowDexOffset start_offset,
-                           const char *suffix, int nr, std::string* output) {
-  std::string dir = StringPrintf("./%s", dir_prefix);
-  int64_t max_name_length = pathconf(dir.c_str(), _PC_NAME_MAX);
-  if (max_name_length <= 0) {
-    PLOG(ERROR) << "Could not get file name restrictions for " << dir;
-    return false;
-  }
-
-  std::string name = StringPrintf("%s%x%s_%d.dot", fname.c_str(), start_offset,
-                                  suffix == nullptr ? "" : suffix, nr);
-  std::string fpath;
-  if (static_cast<int64_t>(name.size()) > max_name_length) {
-    std::string suffix_str = StringPrintf("_%d.dot", nr);
-    name = name.substr(0, static_cast<size_t>(max_name_length) - suffix_str.size()) + suffix_str;
-  }
-  // Sanity check.
-  DCHECK_LE(name.size(), static_cast<size_t>(max_name_length));
-
-  *output = StringPrintf("%s%s", dir_prefix, name.c_str());
-  return true;
-}
-
-// TODO: use a configurable base prefix, and adjust callers to supply pass name.
-/* Dump the CFG into a DOT graph */
-void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suffix) {
-  FILE* file;
-  static AtomicInteger cnt(0);
-
-  // Increment counter to get a unique file number.
-  cnt++;
-  int nr = cnt.LoadRelaxed();
-
-  std::string fname(PrettyMethod(cu_->method_idx, *cu_->dex_file));
-  ReplaceSpecialChars(fname);
-  std::string fpath;
-  if (!CreateDumpFile(fname, dir_prefix, GetBasicBlock(GetEntryBlock()->fall_through)->start_offset,
-                      suffix, nr, &fpath)) {
-    LOG(ERROR) << "Could not create dump file name for " << fname;
-    return;
-  }
-  file = fopen(fpath.c_str(), "w");
-  if (file == nullptr) {
-    PLOG(ERROR) << "Could not open " << fpath << " for DumpCFG.";
-    return;
-  }
-  fprintf(file, "digraph G {\n");
-
-  fprintf(file, "  rankdir=TB\n");
-
-  int num_blocks = all_blocks ? GetNumBlocks() : num_reachable_blocks_;
-  int idx;
-
-  for (idx = 0; idx < num_blocks; idx++) {
-    int block_idx = all_blocks ? idx : dfs_order_[idx];
-    BasicBlock* bb = GetBasicBlock(block_idx);
-    if (bb == nullptr) continue;
-    if (bb->block_type == kDead) continue;
-    if (bb->hidden) continue;
-    if (bb->block_type == kEntryBlock) {
-      fprintf(file, "  entry_%d [shape=Mdiamond];\n", bb->id);
-    } else if (bb->block_type == kExitBlock) {
-      fprintf(file, "  exit_%d [shape=Mdiamond];\n", bb->id);
-    } else if (bb->block_type == kDalvikByteCode) {
-      fprintf(file, "  block%04x_%d [shape=record,label = \"{ \\\n",
-              bb->start_offset, bb->id);
-      const MIR* mir;
-        fprintf(file, "    {block id %d\\l}%s\\\n", bb->id,
-                bb->first_mir_insn ? " | " : " ");
-        for (mir = bb->first_mir_insn; mir; mir = mir->next) {
-            int opcode = mir->dalvikInsn.opcode;
-            fprintf(file, "    {%04x %s %s %s %s %s %s %s %s %s\\l}%s\\\n", mir->offset,
-                      mir->ssa_rep ? GetDalvikDisassembly(mir) :
-                      !MIR::DecodedInstruction::IsPseudoMirOp(opcode) ?
-                        Instruction::Name(mir->dalvikInsn.opcode) :
-                        extended_mir_op_names_[opcode - kMirOpFirst],
-                      (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0 ? " no_rangecheck" : " ",
-                      (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0 ? " no_nullcheck" : " ",
-                      (mir->optimization_flags & MIR_IGNORE_SUSPEND_CHECK) != 0 ? " no_suspendcheck" : " ",
-                      (mir->optimization_flags & MIR_STORE_NON_TEMPORAL) != 0 ? " non_temporal" : " ",
-                      (mir->optimization_flags & MIR_CALLEE) != 0 ? " inlined" : " ",
-                      (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) != 0 ? " cl_inited" : " ",
-                      (mir->optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) != 0 ? " cl_in_cache" : " ",
-                      (mir->optimization_flags & MIR_IGNORE_DIV_ZERO_CHECK) != 0 ? " no_div_check" : " ",
-                      mir->next ? " | " : " ");
-        }
-        fprintf(file, "  }\"];\n\n");
-    } else if (bb->block_type == kExceptionHandling) {
-      char block_name[BLOCK_NAME_LEN];
-
-      GetBlockName(bb, block_name);
-      fprintf(file, "  %s [shape=invhouse];\n", block_name);
-    }
-
-    char block_name1[BLOCK_NAME_LEN], block_name2[BLOCK_NAME_LEN];
-
-    if (bb->taken != NullBasicBlockId) {
-      GetBlockName(bb, block_name1);
-      GetBlockName(GetBasicBlock(bb->taken), block_name2);
-      fprintf(file, "  %s:s -> %s:n [style=dotted]\n",
-              block_name1, block_name2);
-    }
-    if (bb->fall_through != NullBasicBlockId) {
-      GetBlockName(bb, block_name1);
-      GetBlockName(GetBasicBlock(bb->fall_through), block_name2);
-      fprintf(file, "  %s:s -> %s:n\n", block_name1, block_name2);
-    }
-
-    if (bb->successor_block_list_type != kNotUsed) {
-      fprintf(file, "  succ%04x_%d [shape=%s,label = \"{ \\\n",
-              bb->start_offset, bb->id,
-              (bb->successor_block_list_type == kCatch) ?  "Mrecord" : "record");
-
-      int last_succ_id = static_cast<int>(bb->successor_blocks.size() - 1u);
-      int succ_id = 0;
-      for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
-        BasicBlock* dest_block = GetBasicBlock(successor_block_info->block);
-        fprintf(file, "    {<f%d> %04x: %04x\\l}%s\\\n",
-                succ_id,
-                successor_block_info->key,
-                dest_block->start_offset,
-                (succ_id != last_succ_id) ? " | " : " ");
-        ++succ_id;
-      }
-      fprintf(file, "  }\"];\n\n");
-
-      GetBlockName(bb, block_name1);
-      fprintf(file, "  %s:s -> succ%04x_%d:n [style=dashed]\n",
-              block_name1, bb->start_offset, bb->id);
-
-      // Link the successor pseudo-block with all of its potential targets.
-      succ_id = 0;
-      for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
-        BasicBlock* dest_block = GetBasicBlock(successor_block_info->block);
-
-        GetBlockName(dest_block, block_name2);
-        fprintf(file, "  succ%04x_%d:f%d:e -> %s:n\n", bb->start_offset,
-                bb->id, succ_id++, block_name2);
-      }
-    }
-    fprintf(file, "\n");
-
-    if (cu_->verbose) {
-      /* Display the dominator tree */
-      GetBlockName(bb, block_name1);
-      fprintf(file, "  cfg%s [label=\"%s\", shape=none];\n",
-              block_name1, block_name1);
-      if (bb->i_dom) {
-        GetBlockName(GetBasicBlock(bb->i_dom), block_name2);
-        fprintf(file, "  cfg%s:s -> cfg%s:n\n\n", block_name2, block_name1);
-      }
-    }
-  }
-  fprintf(file, "}\n");
-  fclose(file);
-}
-
-/* Insert an MIR instruction to the end of a basic block. */
-void BasicBlock::AppendMIR(MIR* mir) {
-  // Insert it after the last MIR.
-  InsertMIRListAfter(last_mir_insn, mir, mir);
-}
-
-void BasicBlock::AppendMIRList(MIR* first_list_mir, MIR* last_list_mir) {
-  // Insert it after the last MIR.
-  InsertMIRListAfter(last_mir_insn, first_list_mir, last_list_mir);
-}
-
-void BasicBlock::AppendMIRList(const std::vector<MIR*>& insns) {
-  for (std::vector<MIR*>::const_iterator it = insns.begin(); it != insns.end(); it++) {
-    MIR* new_mir = *it;
-
-    // Add a copy of each MIR.
-    InsertMIRListAfter(last_mir_insn, new_mir, new_mir);
-  }
-}
-
-/* Insert a MIR instruction after the specified MIR. */
-void BasicBlock::InsertMIRAfter(MIR* current_mir, MIR* new_mir) {
-  InsertMIRListAfter(current_mir, new_mir, new_mir);
-}
-
-void BasicBlock::InsertMIRListAfter(MIR* insert_after, MIR* first_list_mir, MIR* last_list_mir) {
-  // If no MIR, we are done.
-  if (first_list_mir == nullptr || last_list_mir == nullptr) {
-    return;
-  }
-
-  // If insert_after is null, assume BB is empty.
-  if (insert_after == nullptr) {
-    first_mir_insn = first_list_mir;
-    last_mir_insn = last_list_mir;
-    last_list_mir->next = nullptr;
-  } else {
-    MIR* after_list = insert_after->next;
-    insert_after->next = first_list_mir;
-    last_list_mir->next = after_list;
-    if (after_list == nullptr) {
-      last_mir_insn = last_list_mir;
-    }
-  }
-
-  // Set this BB to be the basic block of the MIRs.
-  MIR* last = last_list_mir->next;
-  for (MIR* mir = first_list_mir; mir != last; mir = mir->next) {
-    mir->bb = id;
-  }
-}
-
-/* Insert an MIR instruction to the head of a basic block. */
-void BasicBlock::PrependMIR(MIR* mir) {
-  InsertMIRListBefore(first_mir_insn, mir, mir);
-}
-
-void BasicBlock::PrependMIRList(MIR* first_list_mir, MIR* last_list_mir) {
-  // Insert it before the first MIR.
-  InsertMIRListBefore(first_mir_insn, first_list_mir, last_list_mir);
-}
-
-void BasicBlock::PrependMIRList(const std::vector<MIR*>& to_add) {
-  for (std::vector<MIR*>::const_iterator it = to_add.begin(); it != to_add.end(); it++) {
-    MIR* mir = *it;
-
-    InsertMIRListBefore(first_mir_insn, mir, mir);
-  }
-}
-
-/* Insert a MIR instruction before the specified MIR. */
-void BasicBlock::InsertMIRBefore(MIR* current_mir, MIR* new_mir) {
-  // Insert as a single element list.
-  return InsertMIRListBefore(current_mir, new_mir, new_mir);
-}
-
-MIR* BasicBlock::FindPreviousMIR(MIR* mir) {
-  MIR* current = first_mir_insn;
-
-  while (current != nullptr) {
-    MIR* next = current->next;
-
-    if (next == mir) {
-      return current;
-    }
-
-    current = next;
-  }
-
-  return nullptr;
-}
-
-void BasicBlock::InsertMIRListBefore(MIR* insert_before, MIR* first_list_mir, MIR* last_list_mir) {
-  // If no MIR, we are done.
-  if (first_list_mir == nullptr || last_list_mir == nullptr) {
-    return;
-  }
-
-  // If insert_before is null, assume BB is empty.
-  if (insert_before == nullptr) {
-    first_mir_insn = first_list_mir;
-    last_mir_insn = last_list_mir;
-    last_list_mir->next = nullptr;
-  } else {
-    if (first_mir_insn == insert_before) {
-      last_list_mir->next = first_mir_insn;
-      first_mir_insn = first_list_mir;
-    } else {
-      // Find the preceding MIR.
-      MIR* before_list = FindPreviousMIR(insert_before);
-      DCHECK(before_list != nullptr);
-      before_list->next = first_list_mir;
-      last_list_mir->next = insert_before;
-    }
-  }
-
-  // Set this BB to be the basic block of the MIRs.
-  for (MIR* mir = first_list_mir; mir != last_list_mir->next; mir = mir->next) {
-    mir->bb = id;
-  }
-}
-
-bool BasicBlock::RemoveMIR(MIR* mir) {
-  // Remove as a single element list.
-  return RemoveMIRList(mir, mir);
-}
-
-bool BasicBlock::RemoveMIRList(MIR* first_list_mir, MIR* last_list_mir) {
-  if (first_list_mir == nullptr) {
-    return false;
-  }
-
-  // Try to find the MIR.
-  MIR* before_list = nullptr;
-  MIR* after_list = nullptr;
-
-  // If we are removing from the beginning of the MIR list.
-  if (first_mir_insn == first_list_mir) {
-    before_list = nullptr;
-  } else {
-    before_list = FindPreviousMIR(first_list_mir);
-    if (before_list == nullptr) {
-      // We did not find the mir.
-      return false;
-    }
-  }
-
-  // Remove the BB information and also find the after_list.
-  for (MIR* mir = first_list_mir; mir != last_list_mir->next; mir = mir->next) {
-    mir->bb = NullBasicBlockId;
-  }
-
-  after_list = last_list_mir->next;
-
-  // If there is nothing before the list, after_list is the first_mir.
-  if (before_list == nullptr) {
-    first_mir_insn = after_list;
-  } else {
-    before_list->next = after_list;
-  }
-
-  // If there is nothing after the list, before_list is last_mir.
-  if (after_list == nullptr) {
-    last_mir_insn = before_list;
-  }
-
-  return true;
-}
-
-MIR* BasicBlock::GetFirstNonPhiInsn() {
-  MIR* mir = first_mir_insn;
-  while (mir != nullptr && static_cast<int>(mir->dalvikInsn.opcode) == kMirOpPhi) {
-    mir = mir->next;
-  }
-  return mir;
-}
-
-MIR* BasicBlock::GetNextUnconditionalMir(MIRGraph* mir_graph, MIR* current) {
-  MIR* next_mir = nullptr;
-
-  if (current != nullptr) {
-    next_mir = current->next;
-  }
-
-  if (next_mir == nullptr) {
-    // Only look for next MIR that follows unconditionally.
-    if ((taken == NullBasicBlockId) && (fall_through != NullBasicBlockId)) {
-      next_mir = mir_graph->GetBasicBlock(fall_through)->first_mir_insn;
-    }
-  }
-
-  return next_mir;
-}
-
-static void FillTypeSizeString(uint32_t type_size, std::string* decoded_mir) {
-  DCHECK(decoded_mir != nullptr);
-  OpSize type = static_cast<OpSize>(type_size >> 16);
-  uint16_t vect_size = (type_size & 0xFFFF);
-
-  // Now print the type and vector size.
-  std::stringstream ss;
-  ss << " (type:";
-  ss << type;
-  ss << " vectsize:";
-  ss << vect_size;
-  ss << ")";
-
-  decoded_mir->append(ss.str());
-}
-
-void MIRGraph::DisassembleExtendedInstr(const MIR* mir, std::string* decoded_mir) {
-  DCHECK(decoded_mir != nullptr);
-  int opcode = mir->dalvikInsn.opcode;
-  SSARepresentation* ssa_rep = mir->ssa_rep;
-  int defs = (ssa_rep != nullptr) ? ssa_rep->num_defs : 0;
-  int uses = (ssa_rep != nullptr) ? ssa_rep->num_uses : 0;
-
-  if (opcode < kMirOpFirst) {
-    return;  // It is not an extended instruction.
-  }
-
-  decoded_mir->append(extended_mir_op_names_[opcode - kMirOpFirst]);
-
-  switch (opcode) {
-    case kMirOpPhi: {
-      if (defs > 0 && uses > 0) {
-        BasicBlockId* incoming = mir->meta.phi_incoming;
-        decoded_mir->append(StringPrintf(" %s = (%s",
-                           GetSSANameWithConst(ssa_rep->defs[0], true).c_str(),
-                           GetSSANameWithConst(ssa_rep->uses[0], true).c_str()));
-        decoded_mir->append(StringPrintf(":%d", incoming[0]));
-        for (int i = 1; i < uses; i++) {
-          decoded_mir->append(StringPrintf(", %s:%d", GetSSANameWithConst(ssa_rep->uses[i], true).c_str(), incoming[i]));
-        }
-        decoded_mir->append(")");
-      }
-      break;
-    }
-    case kMirOpCopy:
-      if (ssa_rep != nullptr) {
-        decoded_mir->append(" ");
-        decoded_mir->append(GetSSANameWithConst(ssa_rep->defs[0], false));
-        if (defs > 1) {
-          decoded_mir->append(", ");
-          decoded_mir->append(GetSSANameWithConst(ssa_rep->defs[1], false));
-        }
-        decoded_mir->append(" = ");
-        decoded_mir->append(GetSSANameWithConst(ssa_rep->uses[0], false));
-        if (uses > 1) {
-          decoded_mir->append(", ");
-          decoded_mir->append(GetSSANameWithConst(ssa_rep->uses[1], false));
-        }
-      } else {
-        decoded_mir->append(StringPrintf(" v%d = v%d", mir->dalvikInsn.vA, mir->dalvikInsn.vB));
-      }
-      break;
-    case kMirOpFusedCmplFloat:
-    case kMirOpFusedCmpgFloat:
-    case kMirOpFusedCmplDouble:
-    case kMirOpFusedCmpgDouble:
-    case kMirOpFusedCmpLong:
-      if (ssa_rep != nullptr) {
-        decoded_mir->append(" ");
-        decoded_mir->append(GetSSANameWithConst(ssa_rep->uses[0], false));
-        for (int i = 1; i < uses; i++) {
-          decoded_mir->append(", ");
-          decoded_mir->append(GetSSANameWithConst(ssa_rep->uses[i], false));
-        }
-      } else {
-        decoded_mir->append(StringPrintf(" v%d, v%d", mir->dalvikInsn.vA, mir->dalvikInsn.vB));
-      }
-      break;
-    case kMirOpMoveVector:
-      decoded_mir->append(StringPrintf(" vect%d = vect%d", mir->dalvikInsn.vA, mir->dalvikInsn.vB));
-      FillTypeSizeString(mir->dalvikInsn.vC, decoded_mir);
-      break;
-    case kMirOpPackedAddition:
-      decoded_mir->append(StringPrintf(" vect%d = vect%d + vect%d", mir->dalvikInsn.vA, mir->dalvikInsn.vA, mir->dalvikInsn.vB));
-      FillTypeSizeString(mir->dalvikInsn.vC, decoded_mir);
-      break;
-    case kMirOpPackedMultiply:
-      decoded_mir->append(StringPrintf(" vect%d = vect%d * vect%d", mir->dalvikInsn.vA, mir->dalvikInsn.vA, mir->dalvikInsn.vB));
-      FillTypeSizeString(mir->dalvikInsn.vC, decoded_mir);
-      break;
-    case kMirOpPackedSubtract:
-      decoded_mir->append(StringPrintf(" vect%d = vect%d - vect%d", mir->dalvikInsn.vA, mir->dalvikInsn.vA, mir->dalvikInsn.vB));
-      FillTypeSizeString(mir->dalvikInsn.vC, decoded_mir);
-      break;
-    case kMirOpPackedAnd:
-      decoded_mir->append(StringPrintf(" vect%d = vect%d & vect%d", mir->dalvikInsn.vA, mir->dalvikInsn.vA, mir->dalvikInsn.vB));
-      FillTypeSizeString(mir->dalvikInsn.vC, decoded_mir);
-      break;
-    case kMirOpPackedOr:
-      decoded_mir->append(StringPrintf(" vect%d = vect%d \\| vect%d", mir->dalvikInsn.vA, mir->dalvikInsn.vA, mir->dalvikInsn.vB));
-      FillTypeSizeString(mir->dalvikInsn.vC, decoded_mir);
-      break;
-    case kMirOpPackedXor:
-      decoded_mir->append(StringPrintf(" vect%d = vect%d ^ vect%d", mir->dalvikInsn.vA, mir->dalvikInsn.vA, mir->dalvikInsn.vB));
-      FillTypeSizeString(mir->dalvikInsn.vC, decoded_mir);
-      break;
-    case kMirOpPackedShiftLeft:
-      decoded_mir->append(StringPrintf(" vect%d = vect%d \\<\\< %d", mir->dalvikInsn.vA, mir->dalvikInsn.vA, mir->dalvikInsn.vB));
-      FillTypeSizeString(mir->dalvikInsn.vC, decoded_mir);
-      break;
-    case kMirOpPackedUnsignedShiftRight:
-      decoded_mir->append(StringPrintf(" vect%d = vect%d \\>\\>\\> %d", mir->dalvikInsn.vA, mir->dalvikInsn.vA, mir->dalvikInsn.vB));
-      FillTypeSizeString(mir->dalvikInsn.vC, decoded_mir);
-      break;
-    case kMirOpPackedSignedShiftRight:
-      decoded_mir->append(StringPrintf(" vect%d = vect%d \\>\\> %d", mir->dalvikInsn.vA, mir->dalvikInsn.vA, mir->dalvikInsn.vB));
-      FillTypeSizeString(mir->dalvikInsn.vC, decoded_mir);
-      break;
-    case kMirOpConstVector:
-      decoded_mir->append(StringPrintf(" vect%d = %x, %x, %x, %x", mir->dalvikInsn.vA, mir->dalvikInsn.arg[0],
-                                      mir->dalvikInsn.arg[1], mir->dalvikInsn.arg[2], mir->dalvikInsn.arg[3]));
-      break;
-    case kMirOpPackedSet:
-      if (ssa_rep != nullptr) {
-        decoded_mir->append(StringPrintf(" vect%d = %s", mir->dalvikInsn.vA,
-              GetSSANameWithConst(ssa_rep->uses[0], false).c_str()));
-        if (uses > 1) {
-          decoded_mir->append(", ");
-          decoded_mir->append(GetSSANameWithConst(ssa_rep->uses[1], false));
-        }
-      } else {
-        decoded_mir->append(StringPrintf(" vect%d = v%d", mir->dalvikInsn.vA, mir->dalvikInsn.vB));
-      }
-      FillTypeSizeString(mir->dalvikInsn.vC, decoded_mir);
-      break;
-    case kMirOpPackedAddReduce:
-      if (ssa_rep != nullptr) {
-        decoded_mir->append(" ");
-        decoded_mir->append(GetSSANameWithConst(ssa_rep->defs[0], false));
-        if (defs > 1) {
-          decoded_mir->append(", ");
-          decoded_mir->append(GetSSANameWithConst(ssa_rep->defs[1], false));
-        }
-        decoded_mir->append(StringPrintf(" = vect%d + %s", mir->dalvikInsn.vB,
-            GetSSANameWithConst(ssa_rep->uses[0], false).c_str()));
-        if (uses > 1) {
-          decoded_mir->append(", ");
-          decoded_mir->append(GetSSANameWithConst(ssa_rep->uses[1], false));
-        }
-      } else {
-        decoded_mir->append(StringPrintf("v%d = vect%d + v%d", mir->dalvikInsn.vA, mir->dalvikInsn.vB, mir->dalvikInsn.vA));
-      }
-      FillTypeSizeString(mir->dalvikInsn.vC, decoded_mir);
-      break;
-    case kMirOpPackedReduce:
-      if (ssa_rep != nullptr) {
-        decoded_mir->append(" ");
-        decoded_mir->append(GetSSANameWithConst(ssa_rep->defs[0], false));
-        if (defs > 1) {
-          decoded_mir->append(", ");
-          decoded_mir->append(GetSSANameWithConst(ssa_rep->defs[1], false));
-        }
-        decoded_mir->append(StringPrintf(" = vect%d (extr_idx:%d)", mir->dalvikInsn.vB, mir->dalvikInsn.arg[0]));
-      } else {
-        decoded_mir->append(StringPrintf(" v%d = vect%d (extr_idx:%d)", mir->dalvikInsn.vA,
-                                         mir->dalvikInsn.vB, mir->dalvikInsn.arg[0]));
-      }
-      FillTypeSizeString(mir->dalvikInsn.vC, decoded_mir);
-      break;
-    case kMirOpReserveVectorRegisters:
-    case kMirOpReturnVectorRegisters:
-      decoded_mir->append(StringPrintf(" vect%d - vect%d", mir->dalvikInsn.vA, mir->dalvikInsn.vB));
-      break;
-    case kMirOpMemBarrier: {
-      decoded_mir->append(" type:");
-      std::stringstream ss;
-      ss << static_cast<MemBarrierKind>(mir->dalvikInsn.vA);
-      decoded_mir->append(ss.str());
-      break;
-    }
-    case kMirOpPackedArrayGet:
-    case kMirOpPackedArrayPut:
-      decoded_mir->append(StringPrintf(" vect%d", mir->dalvikInsn.vA));
-      if (ssa_rep != nullptr) {
-        decoded_mir->append(StringPrintf(", %s[%s]",
-                                        GetSSANameWithConst(ssa_rep->uses[0], false).c_str(),
-                                        GetSSANameWithConst(ssa_rep->uses[1], false).c_str()));
-      } else {
-        decoded_mir->append(StringPrintf(", v%d[v%d]", mir->dalvikInsn.vB, mir->dalvikInsn.vC));
-      }
-      FillTypeSizeString(mir->dalvikInsn.arg[0], decoded_mir);
-      break;
-    case kMirOpMaddInt:
-    case kMirOpMsubInt:
-    case kMirOpMaddLong:
-    case kMirOpMsubLong:
-      if (ssa_rep != nullptr) {
-        decoded_mir->append(" ");
-        decoded_mir->append(GetSSANameWithConst(ssa_rep->defs[0], false));
-        if (defs > 1) {
-          decoded_mir->append(", ");
-          decoded_mir->append(GetSSANameWithConst(ssa_rep->defs[1], false));
-        }
-        for (int i = 0; i < uses; i++) {
-          decoded_mir->append(", ");
-          decoded_mir->append(GetSSANameWithConst(ssa_rep->uses[i], false));
-        }
-      } else {
-        decoded_mir->append(StringPrintf(" v%d, v%d, v%d, v%d",
-                                         mir->dalvikInsn.vA, mir->dalvikInsn.vB,
-                                         mir->dalvikInsn.vC, mir->dalvikInsn.arg[0]));
-      }
-      break;
-    default:
-      break;
-  }
-}
-
-char* MIRGraph::GetDalvikDisassembly(const MIR* mir) {
-  MIR::DecodedInstruction insn = mir->dalvikInsn;
-  std::string str;
-  int flags = 0;
-  int opcode = insn.opcode;
-  char* ret;
-  bool nop = false;
-  SSARepresentation* ssa_rep = mir->ssa_rep;
-  Instruction::Format dalvik_format = Instruction::k10x;  // Default to no-operand format.
-
-  // Handle special cases that recover the original dalvik instruction.
-  if (opcode == kMirOpCheck) {
-    str.append(extended_mir_op_names_[opcode - kMirOpFirst]);
-    str.append(": ");
-    // Recover the original Dex instruction.
-    insn = mir->meta.throw_insn->dalvikInsn;
-    ssa_rep = mir->meta.throw_insn->ssa_rep;
-    opcode = insn.opcode;
-  } else if (opcode == kMirOpNop) {
-    str.append("[");
-    if (mir->offset < current_code_item_->insns_size_in_code_units_) {
-      // Recover original opcode.
-      insn.opcode = Instruction::At(current_code_item_->insns_ + mir->offset)->Opcode();
-      opcode = insn.opcode;
-    }
-    nop = true;
-  }
-  int defs = (ssa_rep != nullptr) ? ssa_rep->num_defs : 0;
-  int uses = (ssa_rep != nullptr) ? ssa_rep->num_uses : 0;
-
-  if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
-    // Note that this does not check the MIR's opcode in all cases. In cases where it
-    // recovered dalvik instruction, it uses opcode of that instead of the extended one.
-    DisassembleExtendedInstr(mir, &str);
-  } else {
-    dalvik_format = Instruction::FormatOf(insn.opcode);
-    flags = insn.FlagsOf();
-    str.append(Instruction::Name(insn.opcode));
-
-    // For invokes-style formats, treat wide regs as a pair of singles.
-    bool show_singles = ((dalvik_format == Instruction::k35c) ||
-                         (dalvik_format == Instruction::k3rc));
-    if (defs != 0) {
-      str.append(" ");
-      str.append(GetSSANameWithConst(ssa_rep->defs[0], false));
-      if (defs > 1) {
-        str.append(", ");
-        str.append(GetSSANameWithConst(ssa_rep->defs[1], false));
-      }
-      if (uses != 0) {
-        str.append(", ");
-      }
-    }
-    for (int i = 0; i < uses; i++) {
-      str.append(" ");
-      str.append(GetSSANameWithConst(ssa_rep->uses[i], show_singles));
-      if (!show_singles && (reg_location_ != nullptr) && reg_location_[i].wide) {
-        // For the listing, skip the high sreg.
-        i++;
-      }
-      if (i != (uses - 1)) {
-        str.append(",");
-      }
-    }
-
-    switch (dalvik_format) {
-      case Instruction::k11n:  // Add one immediate from vB.
-      case Instruction::k21s:
-      case Instruction::k31i:
-      case Instruction::k21h:
-        str.append(StringPrintf(", #0x%x", insn.vB));
-        break;
-      case Instruction::k51l:  // Add one wide immediate.
-        str.append(StringPrintf(", #%" PRId64, insn.vB_wide));
-        break;
-      case Instruction::k21c:  // One register, one string/type/method index.
-      case Instruction::k31c:
-        str.append(StringPrintf(", index #0x%x", insn.vB));
-        break;
-      case Instruction::k22c:  // Two registers, one string/type/method index.
-        str.append(StringPrintf(", index #0x%x", insn.vC));
-        break;
-      case Instruction::k22s:  // Add one immediate from vC.
-      case Instruction::k22b:
-        str.append(StringPrintf(", #0x%x", insn.vC));
-        break;
-      default:
-        // Nothing left to print.
-        break;
-    }
-
-    if ((flags & Instruction::kBranch) != 0) {
-      // For branches, decode the instructions to print out the branch targets.
-      int offset = 0;
-      switch (dalvik_format) {
-        case Instruction::k21t:
-          offset = insn.vB;
-          break;
-        case Instruction::k22t:
-          offset = insn.vC;
-          break;
-        case Instruction::k10t:
-        case Instruction::k20t:
-        case Instruction::k30t:
-          offset = insn.vA;
-          break;
-        default:
-          LOG(FATAL) << "Unexpected branch format " << dalvik_format << " from " << insn.opcode;
-          break;
-      }
-      str.append(StringPrintf(", 0x%x (%c%x)", mir->offset + offset,
-                              offset > 0 ? '+' : '-', offset > 0 ? offset : -offset));
-    }
-
-    if (nop) {
-      str.append("]--optimized away");
-    }
-  }
-  int length = str.length() + 1;
-  ret = arena_->AllocArray<char>(length, kArenaAllocDFInfo);
-  strncpy(ret, str.c_str(), length);
-  return ret;
-}
-
-/* Turn method name into a legal Linux file name */
-void MIRGraph::ReplaceSpecialChars(std::string& str) {
-  static const struct { const char before; const char after; } match[] = {
-    {'/', '-'}, {';', '#'}, {' ', '#'}, {'$', '+'},
-    {'(', '@'}, {')', '@'}, {'<', '='}, {'>', '='}
-  };
-  for (unsigned int i = 0; i < sizeof(match)/sizeof(match[0]); i++) {
-    std::replace(str.begin(), str.end(), match[i].before, match[i].after);
-  }
-}
-
-std::string MIRGraph::GetSSAName(int ssa_reg) {
-  // TODO: This value is needed for debugging. Currently, we compute this and then copy to the
-  //       arena. We should be smarter and just place straight into the arena, or compute the
-  //       value more lazily.
-  int vreg = SRegToVReg(ssa_reg);
-  if (vreg >= static_cast<int>(GetFirstTempVR())) {
-    return StringPrintf("t%d_%d", SRegToVReg(ssa_reg), GetSSASubscript(ssa_reg));
-  } else {
-    return StringPrintf("v%d_%d", SRegToVReg(ssa_reg), GetSSASubscript(ssa_reg));
-  }
-}
-
-// Similar to GetSSAName, but if ssa name represents an immediate show that as well.
-std::string MIRGraph::GetSSANameWithConst(int ssa_reg, bool singles_only) {
-  if (reg_location_ == nullptr) {
-    // Pre-SSA - just use the standard name.
-    return GetSSAName(ssa_reg);
-  }
-  if (IsConst(reg_location_[ssa_reg])) {
-    if (!singles_only && reg_location_[ssa_reg].wide &&
-        !reg_location_[ssa_reg].high_word) {
-      return StringPrintf("v%d_%d#0x%" PRIx64, SRegToVReg(ssa_reg), GetSSASubscript(ssa_reg),
-                          ConstantValueWide(reg_location_[ssa_reg]));
-    } else {
-      return StringPrintf("v%d_%d#0x%x", SRegToVReg(ssa_reg), GetSSASubscript(ssa_reg),
-                          ConstantValue(reg_location_[ssa_reg]));
-    }
-  } else {
-    int vreg = SRegToVReg(ssa_reg);
-    if (vreg >= static_cast<int>(GetFirstTempVR())) {
-      return StringPrintf("t%d_%d", SRegToVReg(ssa_reg), GetSSASubscript(ssa_reg));
-    } else {
-      return StringPrintf("v%d_%d", SRegToVReg(ssa_reg), GetSSASubscript(ssa_reg));
-    }
-  }
-}
-
-void MIRGraph::GetBlockName(BasicBlock* bb, char* name) {
-  switch (bb->block_type) {
-    case kEntryBlock:
-      snprintf(name, BLOCK_NAME_LEN, "entry_%d", bb->id);
-      break;
-    case kExitBlock:
-      snprintf(name, BLOCK_NAME_LEN, "exit_%d", bb->id);
-      break;
-    case kDalvikByteCode:
-      snprintf(name, BLOCK_NAME_LEN, "block%04x_%d", bb->start_offset, bb->id);
-      break;
-    case kExceptionHandling:
-      snprintf(name, BLOCK_NAME_LEN, "exception%04x_%d", bb->start_offset,
-               bb->id);
-      break;
-    default:
-      snprintf(name, BLOCK_NAME_LEN, "_%d", bb->id);
-      break;
-  }
-}
-
-const char* MIRGraph::GetShortyFromMethodReference(const MethodReference& target_method) {
-  const DexFile::MethodId& method_id =
-      target_method.dex_file->GetMethodId(target_method.dex_method_index);
-  return target_method.dex_file->GetShorty(method_id.proto_idx_);
-}
-
-/* Debug Utility - dump a compilation unit */
-void MIRGraph::DumpMIRGraph() {
-  const char* block_type_names[] = {
-    "Null Block",
-    "Entry Block",
-    "Code Block",
-    "Exit Block",
-    "Exception Handling",
-    "Catch Block"
-  };
-
-  LOG(INFO) << "Compiling " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-  LOG(INFO) << GetInsns(0) << " insns";
-  LOG(INFO) << GetNumBlocks() << " blocks in total";
-
-  for (BasicBlock* bb : block_list_) {
-    LOG(INFO) << StringPrintf("Block %d (%s) (insn %04x - %04x%s)",
-        bb->id,
-        block_type_names[bb->block_type],
-        bb->start_offset,
-        bb->last_mir_insn ? bb->last_mir_insn->offset : bb->start_offset,
-        bb->last_mir_insn ? "" : " empty");
-    if (bb->taken != NullBasicBlockId) {
-      LOG(INFO) << "  Taken branch: block " << bb->taken
-                << "(0x" << std::hex << GetBasicBlock(bb->taken)->start_offset << ")";
-    }
-    if (bb->fall_through != NullBasicBlockId) {
-      LOG(INFO) << "  Fallthrough : block " << bb->fall_through
-                << " (0x" << std::hex << GetBasicBlock(bb->fall_through)->start_offset << ")";
-    }
-  }
-}
-
-/*
- * Build an array of location records for the incoming arguments.
- * Note: one location record per word of arguments, with dummy
- * high-word loc for wide arguments.  Also pull up any following
- * MOVE_RESULT and incorporate it into the invoke.
- */
-CallInfo* MIRGraph::NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, bool is_range) {
-  CallInfo* info = static_cast<CallInfo*>(arena_->Alloc(sizeof(CallInfo),
-                                                        kArenaAllocMisc));
-  MIR* move_result_mir = FindMoveResult(bb, mir);
-  if (move_result_mir == nullptr) {
-    info->result.location = kLocInvalid;
-  } else {
-    info->result = GetRawDest(move_result_mir);
-    move_result_mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
-  }
-  info->num_arg_words = mir->ssa_rep->num_uses;
-  info->args = (info->num_arg_words == 0) ? nullptr :
-      arena_->AllocArray<RegLocation>(info->num_arg_words, kArenaAllocMisc);
-  for (size_t i = 0; i < info->num_arg_words; i++) {
-    info->args[i] = GetRawSrc(mir, i);
-  }
-  info->opt_flags = mir->optimization_flags;
-  info->type = type;
-  info->is_range = is_range;
-  if (IsInstructionQuickInvoke(mir->dalvikInsn.opcode)) {
-    const auto& method_info = GetMethodLoweringInfo(mir);
-    info->method_ref = method_info.GetTargetMethod();
-  } else {
-    info->method_ref = MethodReference(GetCurrentDexCompilationUnit()->GetDexFile(),
-                                       mir->dalvikInsn.vB);
-  }
-  info->index = mir->dalvikInsn.vB;
-  info->offset = mir->offset;
-  info->mir = mir;
-  return info;
-}
-
-// Allocate a new MIR.
-MIR* MIRGraph::NewMIR() {
-  MIR* mir = new (arena_) MIR();
-  return mir;
-}
-
-// Allocate a new basic block.
-BasicBlock* MIRGraph::NewMemBB(BBType block_type, int block_id) {
-  BasicBlock* bb = new (arena_) BasicBlock(block_id, block_type, arena_);
-
-  // TUNING: better estimate of the exit block predecessors?
-  bb->predecessors.reserve((block_type == kExitBlock) ? 2048 : 2);
-  block_id_map_.Put(block_id, block_id);
-  return bb;
-}
-
-void MIRGraph::InitializeConstantPropagation() {
-  is_constant_v_ = new (arena_) ArenaBitVector(arena_, GetNumSSARegs(), false);
-  constant_values_ = arena_->AllocArray<int>(GetNumSSARegs(), kArenaAllocDFInfo);
-}
-
-void MIRGraph::InitializeMethodUses() {
-  // The gate starts by initializing the use counts.
-  int num_ssa_regs = GetNumSSARegs();
-  use_counts_.clear();
-  use_counts_.reserve(num_ssa_regs + 32);
-  use_counts_.resize(num_ssa_regs, 0u);
-  raw_use_counts_.clear();
-  raw_use_counts_.reserve(num_ssa_regs + 32);
-  raw_use_counts_.resize(num_ssa_regs, 0u);
-}
-
-void MIRGraph::SSATransformationStart() {
-  DCHECK(temp_scoped_alloc_.get() == nullptr);
-  temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
-  temp_.ssa.num_vregs = GetNumOfCodeAndTempVRs();
-  temp_.ssa.work_live_vregs = new (temp_scoped_alloc_.get()) ArenaBitVector(
-      temp_scoped_alloc_.get(), temp_.ssa.num_vregs, false, kBitMapRegisterV);
-}
-
-void MIRGraph::SSATransformationEnd() {
-  // Verify the dataflow information after the pass.
-  if (cu_->enable_debug & (1 << kDebugVerifyDataflow)) {
-    VerifyDataflow();
-  }
-
-  temp_.ssa.num_vregs = 0u;
-  temp_.ssa.work_live_vregs = nullptr;
-  DCHECK(temp_.ssa.def_block_matrix == nullptr);
-  temp_.ssa.phi_node_blocks = nullptr;
-  DCHECK(temp_scoped_alloc_.get() != nullptr);
-  temp_scoped_alloc_.reset();
-
-  // Update the maximum number of reachable blocks.
-  max_num_reachable_blocks_ = num_reachable_blocks_;
-
-  // Mark MIR SSA representations as up to date.
-  mir_ssa_rep_up_to_date_ = true;
-}
-
-size_t MIRGraph::GetNumDalvikInsns() const {
-  size_t cumulative_size = 0u;
-  bool counted_current_item = false;
-  const uint8_t size_for_null_code_item = 2u;
-
-  for (auto it : m_units_) {
-    const DexFile::CodeItem* code_item = it->GetCodeItem();
-    // Even if the code item is null, we still count non-zero value so that
-    // each m_unit is counted as having impact.
-    cumulative_size += (code_item == nullptr ?
-        size_for_null_code_item : code_item->insns_size_in_code_units_);
-    if (code_item == current_code_item_) {
-      counted_current_item = true;
-    }
-  }
-
-  // If the current code item was not counted yet, count it now.
-  // This can happen for example in unit tests where some fields like m_units_
-  // are not initialized.
-  if (counted_current_item == false) {
-    cumulative_size += (current_code_item_ == nullptr ?
-        size_for_null_code_item : current_code_item_->insns_size_in_code_units_);
-  }
-
-  return cumulative_size;
-}
-
-static BasicBlock* SelectTopologicalSortOrderFallBack(
-    MIRGraph* mir_graph, const ArenaBitVector* current_loop,
-    const ScopedArenaVector<size_t>* visited_cnt_values, ScopedArenaAllocator* allocator,
-    ScopedArenaVector<BasicBlockId>* tmp_stack) {
-  // No true loop head has been found but there may be true loop heads after the mess we need
-  // to resolve. To avoid taking one of those, pick the candidate with the highest number of
-  // reachable unvisited nodes. That candidate will surely be a part of a loop.
-  BasicBlock* fall_back = nullptr;
-  size_t fall_back_num_reachable = 0u;
-  // Reuse the same bit vector for each candidate to mark reachable unvisited blocks.
-  ArenaBitVector candidate_reachable(allocator, mir_graph->GetNumBlocks(), false, kBitMapMisc);
-  AllNodesIterator iter(mir_graph);
-  for (BasicBlock* candidate = iter.Next(); candidate != nullptr; candidate = iter.Next()) {
-    if (candidate->hidden ||                            // Hidden, or
-        candidate->visited ||                           // already processed, or
-        (*visited_cnt_values)[candidate->id] == 0u ||   // no processed predecessors, or
-        (current_loop != nullptr &&                     // outside current loop.
-         !current_loop->IsBitSet(candidate->id))) {
-      continue;
-    }
-    DCHECK(tmp_stack->empty());
-    tmp_stack->push_back(candidate->id);
-    candidate_reachable.ClearAllBits();
-    size_t num_reachable = 0u;
-    while (!tmp_stack->empty()) {
-      BasicBlockId current_id = tmp_stack->back();
-      tmp_stack->pop_back();
-      BasicBlock* current_bb = mir_graph->GetBasicBlock(current_id);
-      DCHECK(current_bb != nullptr);
-      ChildBlockIterator child_iter(current_bb, mir_graph);
-      BasicBlock* child_bb = child_iter.Next();
-      for ( ; child_bb != nullptr; child_bb = child_iter.Next()) {
-        DCHECK(!child_bb->hidden);
-        if (child_bb->visited ||                            // Already processed, or
-            (current_loop != nullptr &&                     // outside current loop.
-             !current_loop->IsBitSet(child_bb->id))) {
-          continue;
-        }
-        if (!candidate_reachable.IsBitSet(child_bb->id)) {
-          candidate_reachable.SetBit(child_bb->id);
-          tmp_stack->push_back(child_bb->id);
-          num_reachable += 1u;
-        }
-      }
-    }
-    if (fall_back_num_reachable < num_reachable) {
-      fall_back_num_reachable = num_reachable;
-      fall_back = candidate;
-    }
-  }
-  return fall_back;
-}
-
-// Compute from which unvisited blocks is bb_id reachable through unvisited blocks.
-static void ComputeUnvisitedReachableFrom(MIRGraph* mir_graph, BasicBlockId bb_id,
-                                          ArenaBitVector* reachable,
-                                          ScopedArenaVector<BasicBlockId>* tmp_stack) {
-  // NOTE: Loop heads indicated by the "visited" flag.
-  DCHECK(tmp_stack->empty());
-  reachable->ClearAllBits();
-  tmp_stack->push_back(bb_id);
-  while (!tmp_stack->empty()) {
-    BasicBlockId current_id = tmp_stack->back();
-    tmp_stack->pop_back();
-    BasicBlock* current_bb = mir_graph->GetBasicBlock(current_id);
-    DCHECK(current_bb != nullptr);
-    for (BasicBlockId pred_id : current_bb->predecessors) {
-      BasicBlock* pred_bb = mir_graph->GetBasicBlock(pred_id);
-      DCHECK(pred_bb != nullptr);
-      if (!pred_bb->visited && !reachable->IsBitSet(pred_bb->id)) {
-        reachable->SetBit(pred_bb->id);
-        tmp_stack->push_back(pred_bb->id);
-      }
-    }
-  }
-}
-
-void MIRGraph::ComputeTopologicalSortOrder() {
-  ScopedArenaAllocator allocator(&cu_->arena_stack);
-  unsigned int num_blocks = GetNumBlocks();
-
-  ScopedArenaQueue<BasicBlock*> q(allocator.Adapter());
-  ScopedArenaVector<size_t> visited_cnt_values(num_blocks, 0u, allocator.Adapter());
-  ScopedArenaVector<BasicBlockId> loop_head_stack(allocator.Adapter());
-  size_t max_nested_loops = 0u;
-  ArenaBitVector loop_exit_blocks(&allocator, num_blocks, false, kBitMapMisc);
-  loop_exit_blocks.ClearAllBits();
-
-  // Count the number of blocks to process and add the entry block(s).
-  unsigned int num_blocks_to_process = 0u;
-  for (BasicBlock* bb : block_list_) {
-    if (bb->hidden == true) {
-      continue;
-    }
-
-    num_blocks_to_process += 1u;
-
-    if (bb->predecessors.size() == 0u) {
-      // Add entry block to the queue.
-      q.push(bb);
-    }
-  }
-
-  // Clear the topological order arrays.
-  topological_order_.clear();
-  topological_order_.reserve(num_blocks);
-  topological_order_loop_ends_.clear();
-  topological_order_loop_ends_.resize(num_blocks, 0u);
-  topological_order_indexes_.clear();
-  topological_order_indexes_.resize(num_blocks, static_cast<uint16_t>(-1));
-
-  // Mark all blocks as unvisited.
-  ClearAllVisitedFlags();
-
-  // For loop heads, keep track from which blocks they are reachable not going through other
-  // loop heads. Other loop heads are excluded to detect the heads of nested loops. The children
-  // in this set go into the loop body, the other children are jumping over the loop.
-  ScopedArenaVector<ArenaBitVector*> loop_head_reachable_from(allocator.Adapter());
-  loop_head_reachable_from.resize(num_blocks, nullptr);
-  // Reuse the same temp stack whenever calculating a loop_head_reachable_from[loop_head_id].
-  ScopedArenaVector<BasicBlockId> tmp_stack(allocator.Adapter());
-
-  while (num_blocks_to_process != 0u) {
-    BasicBlock* bb = nullptr;
-    if (!q.empty()) {
-      num_blocks_to_process -= 1u;
-      // Get top.
-      bb = q.front();
-      q.pop();
-      if (bb->visited) {
-        // Loop head: it was already processed, mark end and copy exit blocks to the queue.
-        DCHECK(q.empty()) << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-        uint16_t idx = static_cast<uint16_t>(topological_order_.size());
-        topological_order_loop_ends_[topological_order_indexes_[bb->id]] = idx;
-        DCHECK_EQ(loop_head_stack.back(), bb->id);
-        loop_head_stack.pop_back();
-        ArenaBitVector* reachable =
-            loop_head_stack.empty() ? nullptr : loop_head_reachable_from[loop_head_stack.back()];
-        for (BasicBlockId candidate_id : loop_exit_blocks.Indexes()) {
-          if (reachable == nullptr || reachable->IsBitSet(candidate_id)) {
-            q.push(GetBasicBlock(candidate_id));
-            // NOTE: The BitVectorSet::IndexIterator will not check the pointed-to bit again,
-            // so clearing the bit has no effect on the iterator.
-            loop_exit_blocks.ClearBit(candidate_id);
-          }
-        }
-        continue;
-      }
-    } else {
-      // Find the new loop head.
-      AllNodesIterator iter(this);
-      while (true) {
-        BasicBlock* candidate = iter.Next();
-        if (candidate == nullptr) {
-          // We did not find a true loop head, fall back to a reachable block in any loop.
-          ArenaBitVector* current_loop =
-              loop_head_stack.empty() ? nullptr : loop_head_reachable_from[loop_head_stack.back()];
-          bb = SelectTopologicalSortOrderFallBack(this, current_loop, &visited_cnt_values,
-                                                  &allocator, &tmp_stack);
-          DCHECK(bb != nullptr) << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-          if (kIsDebugBuild && cu_->dex_file != nullptr) {
-            LOG(INFO) << "Topological sort order: Using fall-back in "
-                << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " BB #" << bb->id
-                << " @0x" << std::hex << bb->start_offset
-                << ", num_blocks = " << std::dec << num_blocks;
-          }
-          break;
-        }
-        if (candidate->hidden ||                            // Hidden, or
-            candidate->visited ||                           // already processed, or
-            visited_cnt_values[candidate->id] == 0u ||      // no processed predecessors, or
-            (!loop_head_stack.empty() &&                    // outside current loop.
-             !loop_head_reachable_from[loop_head_stack.back()]->IsBitSet(candidate->id))) {
-          continue;
-        }
-
-        for (BasicBlockId pred_id : candidate->predecessors) {
-          BasicBlock* pred_bb = GetBasicBlock(pred_id);
-          DCHECK(pred_bb != nullptr);
-          if (pred_bb != candidate && !pred_bb->visited &&
-              !pred_bb->dominators->IsBitSet(candidate->id)) {
-            candidate = nullptr;  // Set candidate to null to indicate failure.
-            break;
-          }
-        }
-        if (candidate != nullptr) {
-          bb = candidate;
-          break;
-        }
-      }
-      // Compute blocks from which the loop head is reachable and process those blocks first.
-      ArenaBitVector* reachable =
-          new (&allocator) ArenaBitVector(&allocator, num_blocks, false, kBitMapMisc);
-      loop_head_reachable_from[bb->id] = reachable;
-      ComputeUnvisitedReachableFrom(this, bb->id, reachable, &tmp_stack);
-      // Now mark as loop head. (Even if it's only a fall back when we don't find a true loop.)
-      loop_head_stack.push_back(bb->id);
-      max_nested_loops = std::max(max_nested_loops, loop_head_stack.size());
-    }
-
-    DCHECK_EQ(bb->hidden, false);
-    DCHECK_EQ(bb->visited, false);
-    bb->visited = true;
-    bb->nesting_depth = loop_head_stack.size();
-
-    // Now add the basic block.
-    uint16_t idx = static_cast<uint16_t>(topological_order_.size());
-    topological_order_indexes_[bb->id] = idx;
-    topological_order_.push_back(bb->id);
-
-    // Update visited_cnt_values for children.
-    ChildBlockIterator succIter(bb, this);
-    BasicBlock* successor = succIter.Next();
-    for ( ; successor != nullptr; successor = succIter.Next()) {
-      if (successor->hidden) {
-        continue;
-      }
-
-      // One more predecessor was visited.
-      visited_cnt_values[successor->id] += 1u;
-      if (visited_cnt_values[successor->id] == successor->predecessors.size()) {
-        if (loop_head_stack.empty() ||
-            loop_head_reachable_from[loop_head_stack.back()]->IsBitSet(successor->id)) {
-          q.push(successor);
-        } else {
-          DCHECK(!loop_exit_blocks.IsBitSet(successor->id));
-          loop_exit_blocks.SetBit(successor->id);
-        }
-      }
-    }
-  }
-
-  // Prepare the loop head stack for iteration.
-  topological_order_loop_head_stack_.clear();
-  topological_order_loop_head_stack_.reserve(max_nested_loops);
-  max_nested_loops_ = max_nested_loops;
-  topological_order_up_to_date_ = true;
-}
-
-bool BasicBlock::IsExceptionBlock() const {
-  if (block_type == kExceptionHandling) {
-    return true;
-  }
-  return false;
-}
-
-ChildBlockIterator::ChildBlockIterator(BasicBlock* bb, MIRGraph* mir_graph)
-    : basic_block_(bb), mir_graph_(mir_graph), visited_fallthrough_(false),
-      visited_taken_(false), have_successors_(false) {
-  // Check if we actually do have successors.
-  if (basic_block_ != 0 && basic_block_->successor_block_list_type != kNotUsed) {
-    have_successors_ = true;
-    successor_iter_ = basic_block_->successor_blocks.cbegin();
-  }
-}
-
-BasicBlock* ChildBlockIterator::Next() {
-  // We check if we have a basic block. If we don't we cannot get next child.
-  if (basic_block_ == nullptr) {
-    return nullptr;
-  }
-
-  // If we haven't visited fallthrough, return that.
-  if (visited_fallthrough_ == false) {
-    visited_fallthrough_ = true;
-
-    BasicBlock* result = mir_graph_->GetBasicBlock(basic_block_->fall_through);
-    if (result != nullptr) {
-      return result;
-    }
-  }
-
-  // If we haven't visited taken, return that.
-  if (visited_taken_ == false) {
-    visited_taken_ = true;
-
-    BasicBlock* result = mir_graph_->GetBasicBlock(basic_block_->taken);
-    if (result != nullptr) {
-      return result;
-    }
-  }
-
-  // We visited both taken and fallthrough. Now check if we have successors we need to visit.
-  if (have_successors_ == true) {
-    // Get information about next successor block.
-    auto end = basic_block_->successor_blocks.cend();
-    while (successor_iter_ != end) {
-      SuccessorBlockInfo* successor_block_info = *successor_iter_;
-      ++successor_iter_;
-      // If block was replaced by zero block, take next one.
-      if (successor_block_info->block != NullBasicBlockId) {
-        return mir_graph_->GetBasicBlock(successor_block_info->block);
-      }
-    }
-  }
-
-  // We do not have anything.
-  return nullptr;
-}
-
-BasicBlock* BasicBlock::Copy(CompilationUnit* c_unit) {
-  MIRGraph* mir_graph = c_unit->mir_graph.get();
-  return Copy(mir_graph);
-}
-
-BasicBlock* BasicBlock::Copy(MIRGraph* mir_graph) {
-  BasicBlock* result_bb = mir_graph->CreateNewBB(block_type);
-
-  // We don't do a memcpy style copy here because it would lead to a lot of things
-  // to clean up. Let us do it by hand instead.
-  // Copy in taken and fallthrough.
-  result_bb->fall_through = fall_through;
-  result_bb->taken = taken;
-
-  // Copy successor links if needed.
-  ArenaAllocator* arena = mir_graph->GetArena();
-
-  result_bb->successor_block_list_type = successor_block_list_type;
-  if (result_bb->successor_block_list_type != kNotUsed) {
-    result_bb->successor_blocks.reserve(successor_blocks.size());
-    for (SuccessorBlockInfo* sbi_old : successor_blocks) {
-      SuccessorBlockInfo* sbi_new = static_cast<SuccessorBlockInfo*>(
-          arena->Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessors));
-      memcpy(sbi_new, sbi_old, sizeof(SuccessorBlockInfo));
-      result_bb->successor_blocks.push_back(sbi_new);
-    }
-  }
-
-  // Copy offset, method.
-  result_bb->start_offset = start_offset;
-
-  // Now copy instructions.
-  for (MIR* mir = first_mir_insn; mir != 0; mir = mir->next) {
-    // Get a copy first.
-    MIR* copy = mir->Copy(mir_graph);
-
-    // Append it.
-    result_bb->AppendMIR(copy);
-  }
-
-  return result_bb;
-}
-
-MIR* MIR::Copy(MIRGraph* mir_graph) {
-  MIR* res = mir_graph->NewMIR();
-  *res = *this;
-
-  // Remove links
-  res->next = nullptr;
-  res->bb = NullBasicBlockId;
-  res->ssa_rep = nullptr;
-
-  return res;
-}
-
-MIR* MIR::Copy(CompilationUnit* c_unit) {
-  return Copy(c_unit->mir_graph.get());
-}
-
-uint32_t SSARepresentation::GetStartUseIndex(Instruction::Code opcode) {
-  // Default result.
-  int res = 0;
-
-  // We are basically setting the iputs to their igets counterparts.
-  switch (opcode) {
-    case Instruction::IPUT:
-    case Instruction::IPUT_OBJECT:
-    case Instruction::IPUT_BOOLEAN:
-    case Instruction::IPUT_BYTE:
-    case Instruction::IPUT_CHAR:
-    case Instruction::IPUT_SHORT:
-    case Instruction::IPUT_QUICK:
-    case Instruction::IPUT_OBJECT_QUICK:
-    case Instruction::IPUT_BOOLEAN_QUICK:
-    case Instruction::IPUT_BYTE_QUICK:
-    case Instruction::IPUT_CHAR_QUICK:
-    case Instruction::IPUT_SHORT_QUICK:
-    case Instruction::APUT:
-    case Instruction::APUT_OBJECT:
-    case Instruction::APUT_BOOLEAN:
-    case Instruction::APUT_BYTE:
-    case Instruction::APUT_CHAR:
-    case Instruction::APUT_SHORT:
-    case Instruction::SPUT:
-    case Instruction::SPUT_OBJECT:
-    case Instruction::SPUT_BOOLEAN:
-    case Instruction::SPUT_BYTE:
-    case Instruction::SPUT_CHAR:
-    case Instruction::SPUT_SHORT:
-      // Skip the VR containing what to store.
-      res = 1;
-      break;
-    case Instruction::IPUT_WIDE:
-    case Instruction::IPUT_WIDE_QUICK:
-    case Instruction::APUT_WIDE:
-    case Instruction::SPUT_WIDE:
-      // Skip the two VRs containing what to store.
-      res = 2;
-      break;
-    default:
-      // Do nothing in the general case.
-      break;
-  }
-
-  return res;
-}
-
-/**
- * @brief Given a decoded instruction, it checks whether the instruction
- * sets a constant and if it does, more information is provided about the
- * constant being set.
- * @param ptr_value pointer to a 64-bit holder for the constant.
- * @param wide Updated by function whether a wide constant is being set by bytecode.
- * @return Returns false if the decoded instruction does not represent a constant bytecode.
- */
-bool MIR::DecodedInstruction::GetConstant(int64_t* ptr_value, bool* wide) const {
-  bool sets_const = true;
-  int64_t value = vB;
-
-  DCHECK(ptr_value != nullptr);
-  DCHECK(wide != nullptr);
-
-  switch (opcode) {
-    case Instruction::CONST_4:
-    case Instruction::CONST_16:
-    case Instruction::CONST:
-      *wide = false;
-      value <<= 32;      // In order to get the sign extend.
-      value >>= 32;
-      break;
-    case Instruction::CONST_HIGH16:
-      *wide = false;
-      value <<= 48;      // In order to get the sign extend.
-      value >>= 32;
-      break;
-    case Instruction::CONST_WIDE_16:
-    case Instruction::CONST_WIDE_32:
-      *wide = true;
-      value <<= 32;      // In order to get the sign extend.
-      value >>= 32;
-      break;
-    case Instruction::CONST_WIDE:
-      *wide = true;
-      value = vB_wide;
-      break;
-    case Instruction::CONST_WIDE_HIGH16:
-      *wide = true;
-      value <<= 48;      // In order to get the sign extend.
-      break;
-    default:
-      sets_const = false;
-      break;
-  }
-
-  if (sets_const) {
-    *ptr_value = value;
-  }
-
-  return sets_const;
-}
-
-void BasicBlock::ResetOptimizationFlags(uint16_t reset_flags) {
-  // Reset flags for all MIRs in bb.
-  for (MIR* mir = first_mir_insn; mir != nullptr; mir = mir->next) {
-    mir->optimization_flags &= (~reset_flags);
-  }
-}
-
-void BasicBlock::Kill(MIRGraph* mir_graph) {
-  for (BasicBlockId pred_id : predecessors) {
-    BasicBlock* pred_bb = mir_graph->GetBasicBlock(pred_id);
-    DCHECK(pred_bb != nullptr);
-
-    // Sadly we have to go through the children by hand here.
-    pred_bb->ReplaceChild(id, NullBasicBlockId);
-  }
-  predecessors.clear();
-
-  // Mark as dead and hidden.
-  block_type = kDead;
-  hidden = true;
-
-  // Detach it from its MIRs so we don't generate code for them. Also detached MIRs
-  // are updated to know that they no longer have a parent.
-  for (MIR* mir = first_mir_insn; mir != nullptr; mir = mir->next) {
-    mir->bb = NullBasicBlockId;
-  }
-  first_mir_insn = nullptr;
-  last_mir_insn = nullptr;
-
-  data_flow_info = nullptr;
-
-  // Erase this bb from all children's predecessors and kill unreachable children.
-  ChildBlockIterator iter(this, mir_graph);
-  for (BasicBlock* succ_bb = iter.Next(); succ_bb != nullptr; succ_bb = iter.Next()) {
-    succ_bb->ErasePredecessor(id);
-  }
-
-  // Remove links to children.
-  fall_through = NullBasicBlockId;
-  taken = NullBasicBlockId;
-  successor_block_list_type = kNotUsed;
-
-  if (kIsDebugBuild) {
-    if (catch_entry) {
-      DCHECK_EQ(mir_graph->catches_.count(start_offset), 1u);
-      mir_graph->catches_.erase(start_offset);
-    }
-  }
-}
-
-bool BasicBlock::IsSSALiveOut(const CompilationUnit* c_unit, int ssa_reg) {
-  // In order to determine if the ssa reg is live out, we scan all the MIRs. We remember
-  // the last SSA number of the same dalvik register. At the end, if it is different than ssa_reg,
-  // then it is not live out of this BB.
-  int dalvik_reg = c_unit->mir_graph->SRegToVReg(ssa_reg);
-
-  int last_ssa_reg = -1;
-
-  // Walk through the MIRs backwards.
-  for (MIR* mir = first_mir_insn; mir != nullptr; mir = mir->next) {
-    // Get ssa rep.
-    SSARepresentation *ssa_rep = mir->ssa_rep;
-
-    // Go through the defines for this MIR.
-    for (int i = 0; i < ssa_rep->num_defs; i++) {
-      DCHECK(ssa_rep->defs != nullptr);
-
-      // Get the ssa reg.
-      int def_ssa_reg = ssa_rep->defs[i];
-
-      // Get dalvik reg.
-      int def_dalvik_reg = c_unit->mir_graph->SRegToVReg(def_ssa_reg);
-
-      // Compare dalvik regs.
-      if (dalvik_reg == def_dalvik_reg) {
-        // We found a def of the register that we are being asked about.
-        // Remember it.
-        last_ssa_reg = def_ssa_reg;
-      }
-    }
-  }
-
-  if (last_ssa_reg == -1) {
-    // If we get to this point we couldn't find a define of register user asked about.
-    // Let's assume the user knows what he's doing so we can be safe and say that if we
-    // couldn't find a def, it is live out.
-    return true;
-  }
-
-  // If it is not -1, we found a match, is it ssa_reg?
-  return (ssa_reg == last_ssa_reg);
-}
-
-bool BasicBlock::ReplaceChild(BasicBlockId old_bb, BasicBlockId new_bb) {
-  // We need to check taken, fall_through, and successor_blocks to replace.
-  bool found = false;
-  if (taken == old_bb) {
-    taken = new_bb;
-    found = true;
-  }
-
-  if (fall_through == old_bb) {
-    fall_through = new_bb;
-    found = true;
-  }
-
-  if (successor_block_list_type != kNotUsed) {
-    for (SuccessorBlockInfo* successor_block_info : successor_blocks) {
-      if (successor_block_info->block == old_bb) {
-        successor_block_info->block = new_bb;
-        found = true;
-      }
-    }
-  }
-
-  return found;
-}
-
-void BasicBlock::ErasePredecessor(BasicBlockId old_pred) {
-  auto pos = std::find(predecessors.begin(), predecessors.end(), old_pred);
-  DCHECK(pos != predecessors.end());
-  // It's faster to move the back() to *pos than erase(pos).
-  *pos = predecessors.back();
-  predecessors.pop_back();
-  size_t idx = std::distance(predecessors.begin(), pos);
-  for (MIR* mir = first_mir_insn; mir != nullptr; mir = mir->next) {
-    if (static_cast<int>(mir->dalvikInsn.opcode) != kMirOpPhi) {
-      break;
-    }
-    DCHECK_EQ(mir->ssa_rep->num_uses - 1u, predecessors.size());
-    DCHECK_EQ(mir->meta.phi_incoming[idx], old_pred);
-    mir->meta.phi_incoming[idx] = mir->meta.phi_incoming[predecessors.size()];
-    mir->ssa_rep->uses[idx] = mir->ssa_rep->uses[predecessors.size()];
-    mir->ssa_rep->num_uses = predecessors.size();
-  }
-}
-
-void BasicBlock::UpdatePredecessor(BasicBlockId old_pred, BasicBlockId new_pred) {
-  DCHECK_NE(new_pred, NullBasicBlockId);
-  auto pos = std::find(predecessors.begin(), predecessors.end(), old_pred);
-  DCHECK(pos != predecessors.end());
-  *pos = new_pred;
-  size_t idx = std::distance(predecessors.begin(), pos);
-  for (MIR* mir = first_mir_insn; mir != nullptr; mir = mir->next) {
-    if (static_cast<int>(mir->dalvikInsn.opcode) != kMirOpPhi) {
-      break;
-    }
-    DCHECK_EQ(mir->meta.phi_incoming[idx], old_pred);
-    mir->meta.phi_incoming[idx] = new_pred;
-  }
-}
-
-// Create a new basic block with block_id as num_blocks_ that is
-// post-incremented.
-BasicBlock* MIRGraph::CreateNewBB(BBType block_type) {
-  BasicBlockId id = static_cast<BasicBlockId>(block_list_.size());
-  BasicBlock* res = NewMemBB(block_type, id);
-  block_list_.push_back(res);
-  return res;
-}
-
-void MIRGraph::CalculateBasicBlockInformation(const PassManager* const post_opt_pass_manager) {
-  /* Create the pass driver and launch it */
-  PassDriverMEPostOpt driver(post_opt_pass_manager, cu_);
-  driver.Launch();
-}
-
-int MIR::DecodedInstruction::FlagsOf() const {
-  // Calculate new index.
-  int idx = static_cast<int>(opcode) - kNumPackedOpcodes;
-
-  // Check if it is an extended or not.
-  if (idx < 0) {
-    return Instruction::FlagsOf(opcode);
-  }
-
-  // For extended, we use a switch.
-  switch (static_cast<int>(opcode)) {
-    case kMirOpPhi:
-      return Instruction::kContinue;
-    case kMirOpCopy:
-      return Instruction::kContinue;
-    case kMirOpFusedCmplFloat:
-      return Instruction::kContinue | Instruction::kBranch;
-    case kMirOpFusedCmpgFloat:
-      return Instruction::kContinue | Instruction::kBranch;
-    case kMirOpFusedCmplDouble:
-      return Instruction::kContinue | Instruction::kBranch;
-    case kMirOpFusedCmpgDouble:
-      return Instruction::kContinue | Instruction::kBranch;
-    case kMirOpFusedCmpLong:
-      return Instruction::kContinue | Instruction::kBranch;
-    case kMirOpNop:
-      return Instruction::kContinue;
-    case kMirOpNullCheck:
-      return Instruction::kContinue | Instruction::kThrow;
-    case kMirOpRangeCheck:
-      return Instruction::kContinue | Instruction::kThrow;
-    case kMirOpDivZeroCheck:
-      return Instruction::kContinue | Instruction::kThrow;
-    case kMirOpCheck:
-      return Instruction::kContinue | Instruction::kThrow;
-    case kMirOpSelect:
-      return Instruction::kContinue;
-    case kMirOpConstVector:
-      return Instruction::kContinue;
-    case kMirOpMoveVector:
-      return Instruction::kContinue;
-    case kMirOpPackedMultiply:
-      return Instruction::kContinue;
-    case kMirOpPackedAddition:
-      return Instruction::kContinue;
-    case kMirOpPackedSubtract:
-      return Instruction::kContinue;
-    case kMirOpPackedShiftLeft:
-      return Instruction::kContinue;
-    case kMirOpPackedSignedShiftRight:
-      return Instruction::kContinue;
-    case kMirOpPackedUnsignedShiftRight:
-      return Instruction::kContinue;
-    case kMirOpPackedAnd:
-      return Instruction::kContinue;
-    case kMirOpPackedOr:
-      return Instruction::kContinue;
-    case kMirOpPackedXor:
-      return Instruction::kContinue;
-    case kMirOpPackedAddReduce:
-      return Instruction::kContinue;
-    case kMirOpPackedReduce:
-      return Instruction::kContinue;
-    case kMirOpPackedSet:
-      return Instruction::kContinue;
-    case kMirOpReserveVectorRegisters:
-      return Instruction::kContinue;
-    case kMirOpReturnVectorRegisters:
-      return Instruction::kContinue;
-    case kMirOpMemBarrier:
-      return Instruction::kContinue;
-    case kMirOpPackedArrayGet:
-      return Instruction::kContinue | Instruction::kThrow;
-    case kMirOpPackedArrayPut:
-      return Instruction::kContinue | Instruction::kThrow;
-    case kMirOpMaddInt:
-    case kMirOpMsubInt:
-    case kMirOpMaddLong:
-    case kMirOpMsubLong:
-      return Instruction::kContinue;
-    default:
-      LOG(WARNING) << "ExtendedFlagsOf: Unhandled case: " << static_cast<int> (opcode);
-      return 0;
-  }
-}
-
-const uint16_t* MIRGraph::GetInsns(int m_unit_index) const {
-  return m_units_[m_unit_index]->GetCodeItem()->insns_;
-}
-
-void MIRGraph::SetPuntToInterpreter(bool val) {
-  punt_to_interpreter_ = val;
-  if (val) {
-    // Disable all subsequent optimizations. They may not be safe to run. (For example,
-    // LVN/GVN assumes there are no conflicts found by the type inference pass.)
-    cu_->disable_opt = ~static_cast<decltype(cu_->disable_opt)>(0);
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
deleted file mode 100644
index 2da8a98..0000000
--- a/compiler/dex/mir_graph.h
+++ /dev/null
@@ -1,1493 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_MIR_GRAPH_H_
-#define ART_COMPILER_DEX_MIR_GRAPH_H_
-
-#include <stdint.h>
-
-#include "base/arena_bit_vector.h"
-#include "base/arena_containers.h"
-#include "base/bit_utils.h"
-#include "base/scoped_arena_containers.h"
-#include "dex_file.h"
-#include "dex_instruction.h"
-#include "dex_types.h"
-#include "invoke_type.h"
-#include "mir_field_info.h"
-#include "mir_method_info.h"
-#include "reg_location.h"
-#include "reg_storage.h"
-
-namespace art {
-
-struct CompilationUnit;
-class DexCompilationUnit;
-class DexFileMethodInliner;
-class GlobalValueNumbering;
-class GvnDeadCodeElimination;
-class PassManager;
-class TypeInference;
-
-// Forward declaration.
-class MIRGraph;
-
-enum DataFlowAttributePos {
-  kUA = 0,
-  kUB,
-  kUC,
-  kAWide,
-  kBWide,
-  kCWide,
-  kDA,
-  kIsMove,
-  kSetsConst,
-  kFormat35c,
-  kFormat3rc,
-  kFormatExtended,       // Extended format for extended MIRs.
-  kNullCheckA,           // Null check of A.
-  kNullCheckB,           // Null check of B.
-  kNullCheckOut0,        // Null check out outgoing arg0.
-  kDstNonNull,           // May assume dst is non-null.
-  kRetNonNull,           // May assume retval is non-null.
-  kNullTransferSrc0,     // Object copy src[0] -> dst.
-  kNullTransferSrcN,     // Phi null check state transfer.
-  kRangeCheckC,          // Range check of C.
-  kCheckCastA,           // Check cast of A.
-  kFPA,
-  kFPB,
-  kFPC,
-  kCoreA,
-  kCoreB,
-  kCoreC,
-  kRefA,
-  kRefB,
-  kRefC,
-  kSameTypeAB,           // A and B have the same type but it can be core/ref/fp (IF_cc).
-  kUsesMethodStar,       // Implicit use of Method*.
-  kUsesIField,           // Accesses an instance field (IGET/IPUT).
-  kUsesSField,           // Accesses a static field (SGET/SPUT).
-  kCanInitializeClass,   // Can trigger class initialization (SGET/SPUT/INVOKE_STATIC).
-  kDoLVN,                // Worth computing local value numbers.
-};
-
-#define DF_NOP                  UINT64_C(0)
-#define DF_UA                   (UINT64_C(1) << kUA)
-#define DF_UB                   (UINT64_C(1) << kUB)
-#define DF_UC                   (UINT64_C(1) << kUC)
-#define DF_A_WIDE               (UINT64_C(1) << kAWide)
-#define DF_B_WIDE               (UINT64_C(1) << kBWide)
-#define DF_C_WIDE               (UINT64_C(1) << kCWide)
-#define DF_DA                   (UINT64_C(1) << kDA)
-#define DF_IS_MOVE              (UINT64_C(1) << kIsMove)
-#define DF_SETS_CONST           (UINT64_C(1) << kSetsConst)
-#define DF_FORMAT_35C           (UINT64_C(1) << kFormat35c)
-#define DF_FORMAT_3RC           (UINT64_C(1) << kFormat3rc)
-#define DF_FORMAT_EXTENDED      (UINT64_C(1) << kFormatExtended)
-#define DF_NULL_CHK_A           (UINT64_C(1) << kNullCheckA)
-#define DF_NULL_CHK_B           (UINT64_C(1) << kNullCheckB)
-#define DF_NULL_CHK_OUT0        (UINT64_C(1) << kNullCheckOut0)
-#define DF_NON_NULL_DST         (UINT64_C(1) << kDstNonNull)
-#define DF_NON_NULL_RET         (UINT64_C(1) << kRetNonNull)
-#define DF_NULL_TRANSFER_0      (UINT64_C(1) << kNullTransferSrc0)
-#define DF_NULL_TRANSFER_N      (UINT64_C(1) << kNullTransferSrcN)
-#define DF_RANGE_CHK_C          (UINT64_C(1) << kRangeCheckC)
-#define DF_CHK_CAST             (UINT64_C(1) << kCheckCastA)
-#define DF_FP_A                 (UINT64_C(1) << kFPA)
-#define DF_FP_B                 (UINT64_C(1) << kFPB)
-#define DF_FP_C                 (UINT64_C(1) << kFPC)
-#define DF_CORE_A               (UINT64_C(1) << kCoreA)
-#define DF_CORE_B               (UINT64_C(1) << kCoreB)
-#define DF_CORE_C               (UINT64_C(1) << kCoreC)
-#define DF_REF_A                (UINT64_C(1) << kRefA)
-#define DF_REF_B                (UINT64_C(1) << kRefB)
-#define DF_REF_C                (UINT64_C(1) << kRefC)
-#define DF_SAME_TYPE_AB         (UINT64_C(1) << kSameTypeAB)
-#define DF_UMS                  (UINT64_C(1) << kUsesMethodStar)
-#define DF_IFIELD               (UINT64_C(1) << kUsesIField)
-#define DF_SFIELD               (UINT64_C(1) << kUsesSField)
-#define DF_CLINIT               (UINT64_C(1) << kCanInitializeClass)
-#define DF_LVN                  (UINT64_C(1) << kDoLVN)
-
-#define DF_HAS_USES             (DF_UA | DF_UB | DF_UC)
-
-#define DF_HAS_DEFS             (DF_DA)
-
-#define DF_HAS_NULL_CHKS        (DF_NULL_CHK_A | \
-                                 DF_NULL_CHK_B | \
-                                 DF_NULL_CHK_OUT0)
-
-#define DF_HAS_RANGE_CHKS       (DF_RANGE_CHK_C)
-
-#define DF_HAS_NR_CHKS          (DF_HAS_NULL_CHKS | \
-                                 DF_HAS_RANGE_CHKS)
-
-#define DF_A_IS_REG             (DF_UA | DF_DA)
-#define DF_B_IS_REG             (DF_UB)
-#define DF_C_IS_REG             (DF_UC)
-#define DF_USES_FP              (DF_FP_A | DF_FP_B | DF_FP_C)
-#define DF_NULL_TRANSFER        (DF_NULL_TRANSFER_0 | DF_NULL_TRANSFER_N)
-#define DF_IS_INVOKE            (DF_FORMAT_35C | DF_FORMAT_3RC)
-
-enum OatMethodAttributes {
-  kIsLeaf,            // Method is leaf.
-};
-
-#define METHOD_IS_LEAF          (1 << kIsLeaf)
-
-// Minimum field size to contain Dalvik v_reg number.
-#define VREG_NUM_WIDTH 16
-
-#define INVALID_VREG (0xFFFFU)
-#define INVALID_OFFSET (0xDEADF00FU)
-
-#define MIR_IGNORE_NULL_CHECK           (1 << kMIRIgnoreNullCheck)
-#define MIR_IGNORE_RANGE_CHECK          (1 << kMIRIgnoreRangeCheck)
-#define MIR_IGNORE_CHECK_CAST           (1 << kMIRIgnoreCheckCast)
-#define MIR_STORE_NON_NULL_VALUE        (1 << kMIRStoreNonNullValue)
-#define MIR_CLASS_IS_INITIALIZED        (1 << kMIRClassIsInitialized)
-#define MIR_CLASS_IS_IN_DEX_CACHE       (1 << kMIRClassIsInDexCache)
-#define MIR_IGNORE_DIV_ZERO_CHECK       (1 << kMirIgnoreDivZeroCheck)
-#define MIR_INLINED                     (1 << kMIRInlined)
-#define MIR_INLINED_PRED                (1 << kMIRInlinedPred)
-#define MIR_CALLEE                      (1 << kMIRCallee)
-#define MIR_IGNORE_SUSPEND_CHECK        (1 << kMIRIgnoreSuspendCheck)
-#define MIR_DUP                         (1 << kMIRDup)
-#define MIR_MARK                        (1 << kMIRMark)
-#define MIR_STORE_NON_TEMPORAL          (1 << kMIRStoreNonTemporal)
-
-#define BLOCK_NAME_LEN 80
-
-typedef uint16_t BasicBlockId;
-static const BasicBlockId NullBasicBlockId = 0;
-
-// Leaf optimization is basically the removal of suspend checks from leaf methods.
-// This is incompatible with SuspendCheckElimination (SCE) which eliminates suspend
-// checks from loops that call any non-intrinsic method, since a loop that calls
-// only a leaf method would end up without any suspend checks at all. So turning
-// this on automatically disables the SCE in MIRGraph::EliminateSuspendChecksGate().
-//
-// Since the Optimizing compiler is actually applying the same optimization, Quick
-// must not run SCE anyway, so we enable this optimization as a way to disable SCE
-// while keeping a consistent behavior across the backends, b/22657404.
-static constexpr bool kLeafOptimization = true;
-
-/*
- * In general, vreg/sreg describe Dalvik registers that originated with dx.  However,
- * it is useful to have compiler-generated temporary registers and have them treated
- * in the same manner as dx-generated virtual registers.  This struct records the SSA
- * name of compiler-introduced temporaries.
- */
-struct CompilerTemp {
-  int32_t v_reg;      // Virtual register number for temporary.
-  int32_t s_reg_low;  // SSA name for low Dalvik word.
-};
-
-enum CompilerTempType {
-  kCompilerTempVR,                // A virtual register temporary.
-  kCompilerTempSpecialMethodPtr,  // Temporary that keeps track of current method pointer.
-  kCompilerTempBackend,           // Temporary that is used by backend.
-};
-
-// When debug option enabled, records effectiveness of null and range check elimination.
-struct Checkstats {
-  int32_t null_checks;
-  int32_t null_checks_eliminated;
-  int32_t range_checks;
-  int32_t range_checks_eliminated;
-};
-
-// Dataflow attributes of a basic block.
-struct BasicBlockDataFlow {
-  ArenaBitVector* use_v;
-  ArenaBitVector* def_v;
-  ArenaBitVector* live_in_v;
-  int32_t* vreg_to_ssa_map_exit;
-};
-
-/*
- * Normalized use/def for a MIR operation using SSA names rather than vregs.  Note that
- * uses/defs retain the Dalvik convention that long operations operate on a pair of 32-bit
- * vregs.  For example, "ADD_LONG v0, v2, v3" would have 2 defs (v0/v1) and 4 uses (v2/v3, v4/v5).
- * Following SSA renaming, this is the primary struct used by code generators to locate
- * operand and result registers.  This is a somewhat confusing and unhelpful convention that
- * we may want to revisit in the future.
- *
- * TODO:
- *  1. Add accessors for uses/defs and make data private
- *  2. Change fp_use/fp_def to a bit array (could help memory usage)
- *  3. Combine array storage into internal array and handled via accessors from 1.
- */
-struct SSARepresentation {
-  int32_t* uses;
-  int32_t* defs;
-  uint16_t num_uses_allocated;
-  uint16_t num_defs_allocated;
-  uint16_t num_uses;
-  uint16_t num_defs;
-
-  static uint32_t GetStartUseIndex(Instruction::Code opcode);
-};
-
-/*
- * The Midlevel Intermediate Representation node, which may be largely considered a
- * wrapper around a Dalvik byte code.
- */
-class MIR : public ArenaObject<kArenaAllocMIR> {
- public:
-  /*
-   * TODO: remove embedded DecodedInstruction to save space, keeping only opcode.  Recover
-   * additional fields on as-needed basis.  Question: how to support MIR Pseudo-ops; probably
-   * need to carry aux data pointer.
-   */
-  struct DecodedInstruction {
-    uint32_t vA;
-    uint32_t vB;
-    uint64_t vB_wide;        /* for k51l */
-    uint32_t vC;
-    uint32_t arg[5];         /* vC/D/E/F/G in invoke or filled-new-array */
-    Instruction::Code opcode;
-
-    DecodedInstruction() : vA(0), vB(0), vB_wide(0), vC(0), opcode(Instruction::NOP) {
-    }
-
-    /*
-     * Given a decoded instruction representing a const bytecode, it updates
-     * the out arguments with proper values as dictated by the constant bytecode.
-     */
-    bool GetConstant(int64_t* ptr_value, bool* wide) const;
-
-    static bool IsPseudoMirOp(Instruction::Code opcode) {
-      return static_cast<int>(opcode) >= static_cast<int>(kMirOpFirst);
-    }
-
-    static bool IsPseudoMirOp(int opcode) {
-      return opcode >= static_cast<int>(kMirOpFirst);
-    }
-
-    bool IsInvoke() const {
-      return ((FlagsOf() & Instruction::kInvoke) == Instruction::kInvoke);
-    }
-
-    bool IsStore() const {
-      return ((FlagsOf() & Instruction::kStore) == Instruction::kStore);
-    }
-
-    bool IsLoad() const {
-      return ((FlagsOf() & Instruction::kLoad) == Instruction::kLoad);
-    }
-
-    bool IsConditionalBranch() const {
-      return (FlagsOf() == (Instruction::kContinue | Instruction::kBranch));
-    }
-
-    /**
-     * @brief Is the register C component of the decoded instruction a constant?
-     */
-    bool IsCFieldOrConstant() const {
-      return ((FlagsOf() & Instruction::kRegCFieldOrConstant) == Instruction::kRegCFieldOrConstant);
-    }
-
-    /**
-     * @brief Is the register C component of the decoded instruction a constant?
-     */
-    bool IsBFieldOrConstant() const {
-      return ((FlagsOf() & Instruction::kRegBFieldOrConstant) == Instruction::kRegBFieldOrConstant);
-    }
-
-    bool IsCast() const {
-      return ((FlagsOf() & Instruction::kCast) == Instruction::kCast);
-    }
-
-    /**
-     * @brief Does the instruction clobber memory?
-     * @details Clobber means that the instruction changes the memory not in a punctual way.
-     *          Therefore any supposition on memory aliasing or memory contents should be disregarded
-     *            when crossing such an instruction.
-     */
-    bool Clobbers() const {
-      return ((FlagsOf() & Instruction::kClobber) == Instruction::kClobber);
-    }
-
-    bool IsLinear() const {
-      return (FlagsOf() & (Instruction::kAdd | Instruction::kSubtract)) != 0;
-    }
-
-    int FlagsOf() const;
-  } dalvikInsn;
-
-  NarrowDexOffset offset;         // Offset of the instruction in code units.
-  uint16_t optimization_flags;
-  int16_t m_unit_index;           // From which method was this MIR included
-  BasicBlockId bb;
-  MIR* next;
-  SSARepresentation* ssa_rep;
-  union {
-    // Incoming edges for phi node.
-    BasicBlockId* phi_incoming;
-    // Establish link from check instruction (kMirOpCheck) to the actual throwing instruction.
-    MIR* throw_insn;
-    // Branch condition for fused cmp or select.
-    ConditionCode ccode;
-    // IGET/IPUT lowering info index, points to MIRGraph::ifield_lowering_infos_. Due to limit on
-    // the number of code points (64K) and size of IGET/IPUT insn (2), this will never exceed 32K.
-    uint32_t ifield_lowering_info;
-    // SGET/SPUT lowering info index, points to MIRGraph::sfield_lowering_infos_. Due to limit on
-    // the number of code points (64K) and size of SGET/SPUT insn (2), this will never exceed 32K.
-    uint32_t sfield_lowering_info;
-    // INVOKE data index, points to MIRGraph::method_lowering_infos_. Also used for inlined
-    // CONST and MOVE insn (with MIR_CALLEE) to remember the invoke for type inference.
-    uint32_t method_lowering_info;
-  } meta;
-
-  MIR() : offset(0), optimization_flags(0), m_unit_index(0), bb(NullBasicBlockId),
-                 next(nullptr), ssa_rep(nullptr) {
-    memset(&meta, 0, sizeof(meta));
-  }
-
-  uint32_t GetStartUseIndex() const {
-    return SSARepresentation::GetStartUseIndex(dalvikInsn.opcode);
-  }
-
-  MIR* Copy(CompilationUnit *c_unit);
-  MIR* Copy(MIRGraph* mir_Graph);
-};
-
-struct SuccessorBlockInfo;
-
-class BasicBlock : public DeletableArenaObject<kArenaAllocBasicBlock> {
- public:
-  BasicBlock(BasicBlockId block_id, BBType type, ArenaAllocator* allocator)
-      : id(block_id),
-        dfs_id(), start_offset(), fall_through(), taken(), i_dom(), nesting_depth(),
-        block_type(type),
-        successor_block_list_type(kNotUsed),
-        visited(), hidden(), catch_entry(), explicit_throw(), conditional_branch(),
-        terminated_by_return(), dominates_return(), use_lvn(), first_mir_insn(),
-        last_mir_insn(), data_flow_info(), dominators(), i_dominated(), dom_frontier(),
-        predecessors(allocator->Adapter(kArenaAllocBBPredecessors)),
-        successor_blocks(allocator->Adapter(kArenaAllocSuccessors)) {
-  }
-  BasicBlockId id;
-  BasicBlockId dfs_id;
-  NarrowDexOffset start_offset;     // Offset in code units.
-  BasicBlockId fall_through;
-  BasicBlockId taken;
-  BasicBlockId i_dom;               // Immediate dominator.
-  uint16_t nesting_depth;
-  BBType block_type:4;
-  BlockListType successor_block_list_type:4;
-  bool visited:1;
-  bool hidden:1;
-  bool catch_entry:1;
-  bool explicit_throw:1;
-  bool conditional_branch:1;
-  bool terminated_by_return:1;  // Block ends with a Dalvik return opcode.
-  bool dominates_return:1;      // Is a member of return extended basic block.
-  bool use_lvn:1;               // Run local value numbering on this block.
-  MIR* first_mir_insn;
-  MIR* last_mir_insn;
-  BasicBlockDataFlow* data_flow_info;
-  ArenaBitVector* dominators;
-  ArenaBitVector* i_dominated;      // Set nodes being immediately dominated.
-  ArenaBitVector* dom_frontier;     // Dominance frontier.
-  ArenaVector<BasicBlockId> predecessors;
-  ArenaVector<SuccessorBlockInfo*> successor_blocks;
-
-  void AppendMIR(MIR* mir);
-  void AppendMIRList(MIR* first_list_mir, MIR* last_list_mir);
-  void AppendMIRList(const std::vector<MIR*>& insns);
-  void PrependMIR(MIR* mir);
-  void PrependMIRList(MIR* first_list_mir, MIR* last_list_mir);
-  void PrependMIRList(const std::vector<MIR*>& to_add);
-  void InsertMIRAfter(MIR* current_mir, MIR* new_mir);
-  void InsertMIRListAfter(MIR* insert_after, MIR* first_list_mir, MIR* last_list_mir);
-  MIR* FindPreviousMIR(MIR* mir);
-  void InsertMIRBefore(MIR* insert_before, MIR* list);
-  void InsertMIRListBefore(MIR* insert_before, MIR* first_list_mir, MIR* last_list_mir);
-  bool RemoveMIR(MIR* mir);
-  bool RemoveMIRList(MIR* first_list_mir, MIR* last_list_mir);
-
-  BasicBlock* Copy(CompilationUnit* c_unit);
-  BasicBlock* Copy(MIRGraph* mir_graph);
-
-  /**
-   * @brief Reset the optimization_flags field of each MIR.
-   */
-  void ResetOptimizationFlags(uint16_t reset_flags);
-
-  /**
-   * @brief Kill the BasicBlock.
-   * @details Unlink predecessors and successors, remove all MIRs, set the block type to kDead
-   *          and set hidden to true.
-   */
-  void Kill(MIRGraph* mir_graph);
-
-  /**
-   * @brief Is ssa_reg the last SSA definition of that VR in the block?
-   */
-  bool IsSSALiveOut(const CompilationUnit* c_unit, int ssa_reg);
-
-  /**
-   * @brief Replace the edge going to old_bb to now go towards new_bb.
-   */
-  bool ReplaceChild(BasicBlockId old_bb, BasicBlockId new_bb);
-
-  /**
-   * @brief Erase the predecessor old_pred.
-   */
-  void ErasePredecessor(BasicBlockId old_pred);
-
-  /**
-   * @brief Update the predecessor array from old_pred to new_pred.
-   */
-  void UpdatePredecessor(BasicBlockId old_pred, BasicBlockId new_pred);
-
-  /**
-   * @brief Return first non-Phi insn.
-   */
-  MIR* GetFirstNonPhiInsn();
-
-  /**
-   * @brief Checks whether the block ends with if-nez or if-eqz that branches to
-   *        the given successor only if the register in not zero.
-   */
-  bool BranchesToSuccessorOnlyIfNotZero(BasicBlockId succ_id) const {
-    if (last_mir_insn == nullptr) {
-      return false;
-    }
-    Instruction::Code last_opcode = last_mir_insn->dalvikInsn.opcode;
-    return ((last_opcode == Instruction::IF_EQZ && fall_through == succ_id) ||
-        (last_opcode == Instruction::IF_NEZ && taken == succ_id)) &&
-        // Make sure the other successor isn't the same (empty if), b/21614284.
-        (fall_through != taken);
-  }
-
-  /**
-   * @brief Used to obtain the next MIR that follows unconditionally.
-   * @details The implementation does not guarantee that a MIR does not
-   * follow even if this method returns nullptr.
-   * @param mir_graph the MIRGraph.
-   * @param current The MIR for which to find an unconditional follower.
-   * @return Returns the following MIR if one can be found.
-   */
-  MIR* GetNextUnconditionalMir(MIRGraph* mir_graph, MIR* current);
-  bool IsExceptionBlock() const;
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(BasicBlock);
-};
-
-/*
- * The "blocks" field in "successor_block_list" points to an array of elements with the type
- * "SuccessorBlockInfo".  For catch blocks, key is type index for the exception.  For switch
- * blocks, key is the case value.
- */
-struct SuccessorBlockInfo {
-  BasicBlockId block;
-  int key;
-};
-
-/**
- * @class ChildBlockIterator
- * @brief Enable an easy iteration of the children.
- */
-class ChildBlockIterator {
- public:
-  /**
-   * @brief Constructs a child iterator.
-   * @param bb The basic whose children we need to iterate through.
-   * @param mir_graph The MIRGraph used to get the basic block during iteration.
-   */
-  ChildBlockIterator(BasicBlock* bb, MIRGraph* mir_graph);
-  BasicBlock* Next();
-
- private:
-  BasicBlock* basic_block_;
-  MIRGraph* mir_graph_;
-  bool visited_fallthrough_;
-  bool visited_taken_;
-  bool have_successors_;
-  ArenaVector<SuccessorBlockInfo*>::const_iterator successor_iter_;
-};
-
-/*
- * Collection of information describing an invoke, and the destination of
- * the subsequent MOVE_RESULT (if applicable).  Collected as a unit to enable
- * more efficient invoke code generation.
- */
-struct CallInfo {
-  size_t num_arg_words;   // Note: word count, not arg count.
-  RegLocation* args;      // One for each word of arguments.
-  RegLocation result;     // Eventual target of MOVE_RESULT.
-  int opt_flags;
-  InvokeType type;
-  uint32_t dex_idx;
-  MethodReference method_ref;
-  uint32_t index;         // Method idx for invokes, type idx for FilledNewArray.
-  uintptr_t direct_code;
-  uintptr_t direct_method;
-  RegLocation target;     // Target of following move_result.
-  bool skip_this;
-  bool is_range;
-  DexOffset offset;       // Offset in code units.
-  MIR* mir;
-  int32_t string_init_offset;
-};
-
-
-const RegLocation bad_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0, RegStorage(), INVALID_SREG,
-                             INVALID_SREG};
-
-class MIRGraph {
- public:
-  MIRGraph(CompilationUnit* cu, ArenaAllocator* arena);
-  virtual ~MIRGraph();
-
-  /*
-   * Examine the graph to determine whether it's worthwile to spend the time compiling
-   * this method.
-   */
-  bool SkipCompilation(std::string* skip_message);
-
-  /*
-   * Should we skip the compilation of this method based on its name?
-   */
-  bool SkipCompilationByName(const std::string& methodname);
-
-  /*
-   * Parse dex method and add MIR at current insert point.  Returns id (which is
-   * actually the index of the method in the m_units_ array).
-   */
-  void InlineMethod(const DexFile::CodeItem* code_item,
-                    uint32_t access_flags,
-                    InvokeType invoke_type,
-                    uint16_t class_def_idx,
-                    uint32_t method_idx,
-                    jobject class_loader,
-                    const DexFile& dex_file,
-                    Handle<mirror::DexCache> dex_cache);
-
-  /* Find existing block */
-  BasicBlock* FindBlock(DexOffset code_offset,
-                        ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
-    return FindBlock(code_offset, false, nullptr, dex_pc_to_block_map);
-  }
-
-  const uint16_t* GetCurrentInsns() const {
-    return current_code_item_->insns_;
-  }
-
-  /**
-   * @brief Used to obtain the raw dex bytecode instruction pointer.
-   * @param m_unit_index The method index in MIRGraph (caused by having multiple methods).
-   * This is guaranteed to contain index 0 which is the base method being compiled.
-   * @return Returns the raw instruction pointer.
-   */
-  const uint16_t* GetInsns(int m_unit_index) const;
-
-  /**
-   * @brief Used to obtain the raw data table.
-   * @param mir sparse switch, packed switch, of fill-array-data
-   * @param table_offset The table offset from start of method.
-   * @return Returns the raw table pointer.
-   */
-  const uint16_t* GetTable(MIR* mir, uint32_t table_offset) const {
-    return GetInsns(mir->m_unit_index) + mir->offset + static_cast<int32_t>(table_offset);
-  }
-
-  unsigned int GetNumBlocks() const {
-    return block_list_.size();
-  }
-
-  /**
-   * @brief Provides the total size in code units of all instructions in MIRGraph.
-   * @details Includes the sizes of all methods in compilation unit.
-   * @return Returns the cumulative sum of all insn sizes (in code units).
-   */
-  size_t GetNumDalvikInsns() const;
-
-  ArenaBitVector* GetTryBlockAddr() const {
-    return try_block_addr_;
-  }
-
-  BasicBlock* GetEntryBlock() const {
-    return entry_block_;
-  }
-
-  BasicBlock* GetExitBlock() const {
-    return exit_block_;
-  }
-
-  BasicBlock* GetBasicBlock(unsigned int block_id) const {
-    DCHECK_LT(block_id, block_list_.size());  // NOTE: NullBasicBlockId is 0.
-    return (block_id == NullBasicBlockId) ? nullptr : block_list_[block_id];
-  }
-
-  size_t GetBasicBlockListCount() const {
-    return block_list_.size();
-  }
-
-  const ArenaVector<BasicBlock*>& GetBlockList() {
-    return block_list_;
-  }
-
-  const ArenaVector<BasicBlockId>& GetDfsOrder() {
-    return dfs_order_;
-  }
-
-  const ArenaVector<BasicBlockId>& GetDfsPostOrder() {
-    return dfs_post_order_;
-  }
-
-  const ArenaVector<BasicBlockId>& GetDomPostOrder() {
-    return dom_post_order_traversal_;
-  }
-
-  int GetDefCount() const {
-    return def_count_;
-  }
-
-  ArenaAllocator* GetArena() const {
-    return arena_;
-  }
-
-  void EnableOpcodeCounting() {
-    opcode_count_ = arena_->AllocArray<int>(kNumPackedOpcodes, kArenaAllocMisc);
-  }
-
-  void ShowOpcodeStats();
-
-  DexCompilationUnit* GetCurrentDexCompilationUnit() const {
-    return m_units_[current_method_];
-  }
-
-  /**
-   * @brief Dump a CFG into a dot file format.
-   * @param dir_prefix the directory the file will be created in.
-   * @param all_blocks does the dumper use all the basic blocks or use the reachable blocks.
-   * @param suffix does the filename require a suffix or not (default = nullptr).
-   */
-  void DumpCFG(const char* dir_prefix, bool all_blocks, const char* suffix = nullptr);
-
-  bool HasCheckCast() const {
-    return (merged_df_flags_ & DF_CHK_CAST) != 0u;
-  }
-
-  bool HasFieldAccess() const {
-    return (merged_df_flags_ & (DF_IFIELD | DF_SFIELD)) != 0u;
-  }
-
-  bool HasStaticFieldAccess() const {
-    return (merged_df_flags_ & DF_SFIELD) != 0u;
-  }
-
-  bool HasInvokes() const {
-    // NOTE: These formats include the rare filled-new-array/range.
-    return (merged_df_flags_ & (DF_FORMAT_35C | DF_FORMAT_3RC)) != 0u;
-  }
-
-  void DoCacheFieldLoweringInfo();
-
-  const MirIFieldLoweringInfo& GetIFieldLoweringInfo(MIR* mir) const {
-    return GetIFieldLoweringInfo(mir->meta.ifield_lowering_info);
-  }
-
-  const MirIFieldLoweringInfo& GetIFieldLoweringInfo(uint32_t lowering_info) const {
-    DCHECK_LT(lowering_info, ifield_lowering_infos_.size());
-    return ifield_lowering_infos_[lowering_info];
-  }
-
-  size_t GetIFieldLoweringInfoCount() const {
-    return ifield_lowering_infos_.size();
-  }
-
-  const MirSFieldLoweringInfo& GetSFieldLoweringInfo(MIR* mir) const {
-    return GetSFieldLoweringInfo(mir->meta.sfield_lowering_info);
-  }
-
-  const MirSFieldLoweringInfo& GetSFieldLoweringInfo(uint32_t lowering_info) const {
-    DCHECK_LT(lowering_info, sfield_lowering_infos_.size());
-    return sfield_lowering_infos_[lowering_info];
-  }
-
-  size_t GetSFieldLoweringInfoCount() const {
-    return sfield_lowering_infos_.size();
-  }
-
-  void DoCacheMethodLoweringInfo();
-
-  const MirMethodLoweringInfo& GetMethodLoweringInfo(MIR* mir) const {
-    return GetMethodLoweringInfo(mir->meta.method_lowering_info);
-  }
-
-  const MirMethodLoweringInfo& GetMethodLoweringInfo(uint32_t lowering_info) const {
-    DCHECK_LT(lowering_info, method_lowering_infos_.size());
-    return method_lowering_infos_[lowering_info];
-  }
-
-  size_t GetMethodLoweringInfoCount() const {
-    return method_lowering_infos_.size();
-  }
-
-  void ComputeInlineIFieldLoweringInfo(uint16_t field_idx, MIR* invoke, MIR* iget_or_iput);
-
-  void InitRegLocations();
-
-  void RemapRegLocations();
-
-  void DumpRegLocTable(RegLocation* table, int count);
-
-  void BasicBlockOptimizationStart();
-  void BasicBlockOptimization();
-  void BasicBlockOptimizationEnd();
-
-  void StringChange();
-
-  const ArenaVector<BasicBlockId>& GetTopologicalSortOrder() {
-    DCHECK(!topological_order_.empty());
-    return topological_order_;
-  }
-
-  const ArenaVector<BasicBlockId>& GetTopologicalSortOrderLoopEnds() {
-    DCHECK(!topological_order_loop_ends_.empty());
-    return topological_order_loop_ends_;
-  }
-
-  const ArenaVector<BasicBlockId>& GetTopologicalSortOrderIndexes() {
-    DCHECK(!topological_order_indexes_.empty());
-    return topological_order_indexes_;
-  }
-
-  ArenaVector<std::pair<uint16_t, bool>>* GetTopologicalSortOrderLoopHeadStack() {
-    DCHECK(!topological_order_.empty());  // Checking the main array, not the stack.
-    return &topological_order_loop_head_stack_;
-  }
-
-  size_t GetMaxNestedLoops() const {
-    return max_nested_loops_;
-  }
-
-  bool IsLoopHead(BasicBlockId bb_id) {
-    return topological_order_loop_ends_[topological_order_indexes_[bb_id]] != 0u;
-  }
-
-  bool IsConst(int32_t s_reg) const {
-    return is_constant_v_->IsBitSet(s_reg);
-  }
-
-  bool IsConst(RegLocation loc) const {
-    return loc.orig_sreg < 0 ? false : IsConst(loc.orig_sreg);
-  }
-
-  int32_t ConstantValue(RegLocation loc) const {
-    DCHECK(IsConst(loc));
-    return constant_values_[loc.orig_sreg];
-  }
-
-  int32_t ConstantValue(int32_t s_reg) const {
-    DCHECK(IsConst(s_reg));
-    return constant_values_[s_reg];
-  }
-
-  /**
-   * @brief Used to obtain 64-bit value of a pair of ssa registers.
-   * @param s_reg_low The ssa register representing the low bits.
-   * @param s_reg_high The ssa register representing the high bits.
-   * @return Retusn the 64-bit constant value.
-   */
-  int64_t ConstantValueWide(int32_t s_reg_low, int32_t s_reg_high) const {
-    DCHECK(IsConst(s_reg_low));
-    DCHECK(IsConst(s_reg_high));
-    return (static_cast<int64_t>(constant_values_[s_reg_high]) << 32) |
-        Low32Bits(static_cast<int64_t>(constant_values_[s_reg_low]));
-  }
-
-  int64_t ConstantValueWide(RegLocation loc) const {
-    DCHECK(IsConst(loc));
-    DCHECK(!loc.high_word);  // Do not allow asking for the high partner.
-    DCHECK_LT(loc.orig_sreg + 1, GetNumSSARegs());
-    return (static_cast<int64_t>(constant_values_[loc.orig_sreg + 1]) << 32) |
-        Low32Bits(static_cast<int64_t>(constant_values_[loc.orig_sreg]));
-  }
-
-  /**
-   * @brief Used to mark ssa register as being constant.
-   * @param ssa_reg The ssa register.
-   * @param value The constant value of ssa register.
-   */
-  void SetConstant(int32_t ssa_reg, int32_t value);
-
-  /**
-   * @brief Used to mark ssa register and its wide counter-part as being constant.
-   * @param ssa_reg The ssa register.
-   * @param value The 64-bit constant value of ssa register and its pair.
-   */
-  void SetConstantWide(int32_t ssa_reg, int64_t value);
-
-  bool IsConstantNullRef(RegLocation loc) const {
-    return loc.ref && loc.is_const && (ConstantValue(loc) == 0);
-  }
-
-  int GetNumSSARegs() const {
-    return num_ssa_regs_;
-  }
-
-  void SetNumSSARegs(int new_num) {
-     /*
-      * TODO: It's theoretically possible to exceed 32767, though any cases which did
-      * would be filtered out with current settings.  When orig_sreg field is removed
-      * from RegLocation, expand s_reg_low to handle all possible cases and remove DCHECK().
-      */
-    CHECK_EQ(new_num, static_cast<int16_t>(new_num));
-    num_ssa_regs_ = new_num;
-  }
-
-  unsigned int GetNumReachableBlocks() const {
-    return num_reachable_blocks_;
-  }
-
-  uint32_t GetUseCount(int sreg) const {
-    DCHECK_LT(static_cast<size_t>(sreg), use_counts_.size());
-    return use_counts_[sreg];
-  }
-
-  uint32_t GetRawUseCount(int sreg) const {
-    DCHECK_LT(static_cast<size_t>(sreg), raw_use_counts_.size());
-    return raw_use_counts_[sreg];
-  }
-
-  int GetSSASubscript(int ssa_reg) const {
-    DCHECK_LT(static_cast<size_t>(ssa_reg), ssa_subscripts_.size());
-    return ssa_subscripts_[ssa_reg];
-  }
-
-  RegLocation GetRawSrc(MIR* mir, int num) {
-    DCHECK(num < mir->ssa_rep->num_uses);
-    RegLocation res = reg_location_[mir->ssa_rep->uses[num]];
-    return res;
-  }
-
-  RegLocation GetRawDest(MIR* mir) {
-    DCHECK_GT(mir->ssa_rep->num_defs, 0);
-    RegLocation res = reg_location_[mir->ssa_rep->defs[0]];
-    return res;
-  }
-
-  RegLocation GetDest(MIR* mir) {
-    RegLocation res = GetRawDest(mir);
-    DCHECK(!res.wide);
-    return res;
-  }
-
-  RegLocation GetSrc(MIR* mir, int num) {
-    RegLocation res = GetRawSrc(mir, num);
-    DCHECK(!res.wide);
-    return res;
-  }
-
-  RegLocation GetDestWide(MIR* mir) {
-    RegLocation res = GetRawDest(mir);
-    DCHECK(res.wide);
-    return res;
-  }
-
-  RegLocation GetSrcWide(MIR* mir, int low) {
-    RegLocation res = GetRawSrc(mir, low);
-    DCHECK(res.wide);
-    return res;
-  }
-
-  RegLocation GetBadLoc() {
-    return bad_loc;
-  }
-
-  int GetMethodSReg() const {
-    return method_sreg_;
-  }
-
-  /**
-   * @brief Used to obtain the number of compiler temporaries being used.
-   * @return Returns the number of compiler temporaries.
-   */
-  size_t GetNumUsedCompilerTemps() const {
-    // Assume that the special temps will always be used.
-    return GetNumNonSpecialCompilerTemps() + max_available_special_compiler_temps_;
-  }
-
-  /**
-   * @brief Used to obtain number of bytes needed for special temps.
-   * @details This space is always needed because temps have special location on stack.
-   * @return Returns number of bytes for the special temps.
-   */
-  size_t GetNumBytesForSpecialTemps() const;
-
-  /**
-   * @brief Used by backend as a hint for maximum number of bytes for non-special temps.
-   * @details Returns 4 bytes for each temp because that is the maximum amount needed
-   * for storing each temp. The BE could be smarter though and allocate a smaller
-   * spill region.
-   * @return Returns the maximum number of bytes needed for non-special temps.
-   */
-  size_t GetMaximumBytesForNonSpecialTemps() const {
-    return GetNumNonSpecialCompilerTemps() * sizeof(uint32_t);
-  }
-
-  /**
-   * @brief Used to obtain the number of non-special compiler temporaries being used.
-   * @return Returns the number of non-special compiler temporaries.
-   */
-  size_t GetNumNonSpecialCompilerTemps() const {
-    return num_non_special_compiler_temps_;
-  }
-
-  /**
-   * @brief Used to set the total number of available non-special compiler temporaries.
-   * @details Can fail setting the new max if there are more temps being used than the new_max.
-   * @param new_max The new maximum number of non-special compiler temporaries.
-   * @return Returns true if the max was set and false if failed to set.
-   */
-  bool SetMaxAvailableNonSpecialCompilerTemps(size_t new_max) {
-    // Make sure that enough temps still exist for backend and also that the
-    // new max can still keep around all of the already requested temps.
-    if (new_max < (GetNumNonSpecialCompilerTemps() + reserved_temps_for_backend_)) {
-      return false;
-    } else {
-      max_available_non_special_compiler_temps_ = new_max;
-      return true;
-    }
-  }
-
-  /**
-   * @brief Provides the number of non-special compiler temps available for use by ME.
-   * @details Even if this returns zero, special compiler temps are guaranteed to be available.
-   * Additionally, this makes sure to not use any temps reserved for BE only.
-   * @return Returns the number of available temps.
-   */
-  size_t GetNumAvailableVRTemps();
-
-  /**
-   * @brief Used to obtain the maximum number of compiler temporaries that can be requested.
-   * @return Returns the maximum number of compiler temporaries, whether used or not.
-   */
-  size_t GetMaxPossibleCompilerTemps() const {
-    return max_available_special_compiler_temps_ + max_available_non_special_compiler_temps_;
-  }
-
-  /**
-   * @brief Used to signal that the compiler temps have been committed.
-   * @details This should be used once the number of temps can no longer change,
-   * such as after frame size is committed and cannot be changed.
-   */
-  void CommitCompilerTemps() {
-    compiler_temps_committed_ = true;
-  }
-
-  /**
-   * @brief Used to obtain a new unique compiler temporary.
-   * @details Two things are done for convenience when allocating a new compiler
-   * temporary. The ssa register is automatically requested and the information
-   * about reg location is filled. This helps when the temp is requested post
-   * ssa initialization, such as when temps are requested by the backend.
-   * @warning If the temp requested will be used for ME and have multiple versions,
-   * the sreg provided by the temp will be invalidated on next ssa recalculation.
-   * @param ct_type Type of compiler temporary requested.
-   * @param wide Whether we should allocate a wide temporary.
-   * @return Returns the newly created compiler temporary.
-   */
-  CompilerTemp* GetNewCompilerTemp(CompilerTempType ct_type, bool wide);
-
-  /**
-   * @brief Used to remove last created compiler temporary when it's not needed.
-   * @param temp the temporary to remove.
-   */
-  void RemoveLastCompilerTemp(CompilerTempType ct_type, bool wide, CompilerTemp* temp);
-
-  bool MethodIsLeaf() {
-    return attributes_ & METHOD_IS_LEAF;
-  }
-
-  RegLocation GetRegLocation(int index) {
-    DCHECK((index >= 0) && (index < num_ssa_regs_));
-    return reg_location_[index];
-  }
-
-  RegLocation GetMethodLoc() {
-    return reg_location_[method_sreg_];
-  }
-
-  bool IsBackEdge(BasicBlock* branch_bb, BasicBlockId target_bb_id) {
-    DCHECK_NE(target_bb_id, NullBasicBlockId);
-    DCHECK_LT(target_bb_id, topological_order_indexes_.size());
-    DCHECK_LT(branch_bb->id, topological_order_indexes_.size());
-    return topological_order_indexes_[target_bb_id] <= topological_order_indexes_[branch_bb->id];
-  }
-
-  bool IsSuspendCheckEdge(BasicBlock* branch_bb, BasicBlockId target_bb_id) {
-    if (!IsBackEdge(branch_bb, target_bb_id)) {
-      return false;
-    }
-    if (suspend_checks_in_loops_ == nullptr) {
-      // We didn't run suspend check elimination.
-      return true;
-    }
-    uint16_t target_depth = GetBasicBlock(target_bb_id)->nesting_depth;
-    return (suspend_checks_in_loops_[branch_bb->id] & (1u << (target_depth - 1u))) == 0;
-  }
-
-  void CountBranch(DexOffset target_offset) {
-    if (target_offset <= current_offset_) {
-      backward_branches_++;
-    } else {
-      forward_branches_++;
-    }
-  }
-
-  int GetBranchCount() {
-    return backward_branches_ + forward_branches_;
-  }
-
-  // Is this vreg in the in set?
-  bool IsInVReg(uint32_t vreg) {
-    return (vreg >= GetFirstInVR()) && (vreg < GetFirstTempVR());
-  }
-
-  uint32_t GetNumOfCodeVRs() const {
-    return current_code_item_->registers_size_;
-  }
-
-  uint32_t GetNumOfCodeAndTempVRs() const {
-    // Include all of the possible temps so that no structures overflow when initialized.
-    return GetNumOfCodeVRs() + GetMaxPossibleCompilerTemps();
-  }
-
-  uint32_t GetNumOfLocalCodeVRs() const {
-    // This also refers to the first "in" VR.
-    return GetNumOfCodeVRs() - current_code_item_->ins_size_;
-  }
-
-  uint32_t GetNumOfInVRs() const {
-    return current_code_item_->ins_size_;
-  }
-
-  uint32_t GetNumOfOutVRs() const {
-    return current_code_item_->outs_size_;
-  }
-
-  uint32_t GetFirstInVR() const {
-    return GetNumOfLocalCodeVRs();
-  }
-
-  uint32_t GetFirstTempVR() const {
-    // Temp VRs immediately follow code VRs.
-    return GetNumOfCodeVRs();
-  }
-
-  uint32_t GetFirstSpecialTempVR() const {
-    // Special temps appear first in the ordering before non special temps.
-    return GetFirstTempVR();
-  }
-
-  uint32_t GetFirstNonSpecialTempVR() const {
-    // We always leave space for all the special temps before the non-special ones.
-    return GetFirstSpecialTempVR() + max_available_special_compiler_temps_;
-  }
-
-  bool HasTryCatchBlocks() const {
-    return current_code_item_->tries_size_ != 0;
-  }
-
-  void DumpCheckStats();
-  MIR* FindMoveResult(BasicBlock* bb, MIR* mir);
-
-  /* Return the base virtual register for a SSA name */
-  int SRegToVReg(int ssa_reg) const {
-    return ssa_base_vregs_[ssa_reg];
-  }
-
-  void VerifyDataflow();
-  void CheckForDominanceFrontier(BasicBlock* dom_bb, const BasicBlock* succ_bb);
-  bool EliminateNullChecksGate();
-  bool EliminateNullChecks(BasicBlock* bb);
-  void EliminateNullChecksEnd();
-  void InferTypesStart();
-  bool InferTypes(BasicBlock* bb);
-  void InferTypesEnd();
-  bool EliminateClassInitChecksGate();
-  bool EliminateClassInitChecks(BasicBlock* bb);
-  void EliminateClassInitChecksEnd();
-  bool ApplyGlobalValueNumberingGate();
-  bool ApplyGlobalValueNumbering(BasicBlock* bb);
-  void ApplyGlobalValueNumberingEnd();
-  bool EliminateDeadCodeGate();
-  bool EliminateDeadCode(BasicBlock* bb);
-  void EliminateDeadCodeEnd();
-  void GlobalValueNumberingCleanup();
-  bool EliminateSuspendChecksGate();
-  bool EliminateSuspendChecks(BasicBlock* bb);
-
-  uint16_t GetGvnIFieldId(MIR* mir) const {
-    DCHECK(IsInstructionIGetOrIPut(mir->dalvikInsn.opcode));
-    DCHECK_LT(mir->meta.ifield_lowering_info, ifield_lowering_infos_.size());
-    DCHECK(temp_.gvn.ifield_ids != nullptr);
-    return temp_.gvn.ifield_ids[mir->meta.ifield_lowering_info];
-  }
-
-  uint16_t GetGvnSFieldId(MIR* mir) const {
-    DCHECK(IsInstructionSGetOrSPut(mir->dalvikInsn.opcode));
-    DCHECK_LT(mir->meta.sfield_lowering_info, sfield_lowering_infos_.size());
-    DCHECK(temp_.gvn.sfield_ids != nullptr);
-    return temp_.gvn.sfield_ids[mir->meta.sfield_lowering_info];
-  }
-
-  bool PuntToInterpreter() {
-    return punt_to_interpreter_;
-  }
-
-  void SetPuntToInterpreter(bool val);
-
-  void DisassembleExtendedInstr(const MIR* mir, std::string* decoded_mir);
-  char* GetDalvikDisassembly(const MIR* mir);
-  void ReplaceSpecialChars(std::string& str);
-  std::string GetSSAName(int ssa_reg);
-  std::string GetSSANameWithConst(int ssa_reg, bool singles_only);
-  void GetBlockName(BasicBlock* bb, char* name);
-  const char* GetShortyFromMethodReference(const MethodReference& target_method);
-  void DumpMIRGraph();
-  CallInfo* NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, bool is_range);
-  BasicBlock* NewMemBB(BBType block_type, int block_id);
-  MIR* NewMIR();
-  MIR* AdvanceMIR(BasicBlock** p_bb, MIR* mir);
-  BasicBlock* NextDominatedBlock(BasicBlock* bb);
-  bool LayoutBlocks(BasicBlock* bb);
-  void ComputeTopologicalSortOrder();
-  BasicBlock* CreateNewBB(BBType block_type);
-
-  bool InlineSpecialMethodsGate();
-  void InlineSpecialMethodsStart();
-  void InlineSpecialMethods(BasicBlock* bb);
-  void InlineSpecialMethodsEnd();
-
-  /**
-   * @brief Perform the initial preparation for the Method Uses.
-   */
-  void InitializeMethodUses();
-
-  /**
-   * @brief Perform the initial preparation for the Constant Propagation.
-   */
-  void InitializeConstantPropagation();
-
-  /**
-   * @brief Perform the initial preparation for the SSA Transformation.
-   */
-  void SSATransformationStart();
-
-  /**
-   * @brief Insert a the operands for the Phi nodes.
-   * @param bb the considered BasicBlock.
-   * @return true
-   */
-  bool InsertPhiNodeOperands(BasicBlock* bb);
-
-  /**
-   * @brief Perform the cleanup after the SSA Transformation.
-   */
-  void SSATransformationEnd();
-
-  /**
-   * @brief Perform constant propagation on a BasicBlock.
-   * @param bb the considered BasicBlock.
-   */
-  void DoConstantPropagation(BasicBlock* bb);
-
-  /**
-   * @brief Get use count weight for a given block.
-   * @param bb the BasicBlock.
-   */
-  uint32_t GetUseCountWeight(BasicBlock* bb) const;
-
-  /**
-   * @brief Count the uses in the BasicBlock
-   * @param bb the BasicBlock
-   */
-  void CountUses(BasicBlock* bb);
-
-  static uint64_t GetDataFlowAttributes(Instruction::Code opcode);
-  static uint64_t GetDataFlowAttributes(MIR* mir);
-
-  /**
-   * @brief Combine BasicBlocks
-   * @param the BasicBlock we are considering
-   */
-  void CombineBlocks(BasicBlock* bb);
-
-  void ClearAllVisitedFlags();
-
-  void AllocateSSAUseData(MIR *mir, int num_uses);
-  void AllocateSSADefData(MIR *mir, int num_defs);
-  void CalculateBasicBlockInformation(const PassManager* const post_opt);
-  void ComputeDFSOrders();
-  void ComputeDefBlockMatrix();
-  void ComputeDominators();
-  void CompilerInitializeSSAConversion();
-  virtual void InitializeBasicBlockDataFlow();
-  void FindPhiNodeBlocks();
-  void DoDFSPreOrderSSARename(BasicBlock* block);
-
-  bool DfsOrdersUpToDate() const {
-    return dfs_orders_up_to_date_;
-  }
-
-  bool DominationUpToDate() const {
-    return domination_up_to_date_;
-  }
-
-  bool MirSsaRepUpToDate() const {
-    return mir_ssa_rep_up_to_date_;
-  }
-
-  bool TopologicalOrderUpToDate() const {
-    return topological_order_up_to_date_;
-  }
-
-  /*
-   * IsDebugBuild sanity check: keep track of the Dex PCs for catch entries so that later on
-   * we can verify that all catch entries have native PC entries.
-   */
-  std::set<uint32_t> catches_;
-
-  // TODO: make these private.
-  RegLocation* reg_location_;                               // Map SSA names to location.
-  ArenaSafeMap<unsigned int, unsigned int> block_id_map_;   // Block collapse lookup cache.
-
-  static const char* extended_mir_op_names_[kMirOpLast - kMirOpFirst];
-
-  void HandleSSADef(int* defs, int dalvik_reg, int reg_index);
-
- protected:
-  int FindCommonParent(int block1, int block2);
-  void ComputeSuccLineIn(ArenaBitVector* dest, const ArenaBitVector* src1,
-                         const ArenaBitVector* src2);
-  void HandleLiveInUse(ArenaBitVector* use_v, ArenaBitVector* def_v,
-                       ArenaBitVector* live_in_v, int dalvik_reg_id);
-  void HandleDef(ArenaBitVector* def_v, int dalvik_reg_id);
-  void HandleExtended(ArenaBitVector* use_v, ArenaBitVector* def_v,
-                      ArenaBitVector* live_in_v,
-                      const MIR::DecodedInstruction& d_insn);
-  bool DoSSAConversion(BasicBlock* bb);
-  int ParseInsn(const uint16_t* code_ptr, MIR::DecodedInstruction* decoded_instruction);
-  bool ContentIsInsn(const uint16_t* code_ptr);
-  BasicBlock* SplitBlock(DexOffset code_offset, BasicBlock* orig_block,
-                         BasicBlock** immed_pred_block_p);
-  BasicBlock* FindBlock(DexOffset code_offset, bool create, BasicBlock** immed_pred_block_p,
-                        ScopedArenaVector<uint16_t>* dex_pc_to_block_map);
-  void ProcessTryCatchBlocks(ScopedArenaVector<uint16_t>* dex_pc_to_block_map);
-  bool IsBadMonitorExitCatch(NarrowDexOffset monitor_exit_offset, NarrowDexOffset catch_offset);
-  BasicBlock* ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
-                               int flags, const uint16_t* code_ptr, const uint16_t* code_end,
-                               ScopedArenaVector<uint16_t>* dex_pc_to_block_map);
-  BasicBlock* ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
-                               int flags,
-                               ScopedArenaVector<uint16_t>* dex_pc_to_block_map);
-  BasicBlock* ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
-                              int flags, ArenaBitVector* try_block_addr, const uint16_t* code_ptr,
-                              const uint16_t* code_end,
-                              ScopedArenaVector<uint16_t>* dex_pc_to_block_map);
-  int AddNewSReg(int v_reg);
-  void HandleSSAUse(int* uses, int dalvik_reg, int reg_index);
-  void DataFlowSSAFormat35C(MIR* mir);
-  void DataFlowSSAFormat3RC(MIR* mir);
-  void DataFlowSSAFormatExtended(MIR* mir);
-  bool FindLocalLiveIn(BasicBlock* bb);
-  bool VerifyPredInfo(BasicBlock* bb);
-  BasicBlock* NeedsVisit(BasicBlock* bb);
-  BasicBlock* NextUnvisitedSuccessor(BasicBlock* bb);
-  void MarkPreOrder(BasicBlock* bb);
-  void RecordDFSOrders(BasicBlock* bb);
-  void ComputeDomPostOrderTraversal(BasicBlock* bb);
-  int GetSSAUseCount(int s_reg);
-  bool BasicBlockOpt(BasicBlock* bb);
-  void MultiplyAddOpt(BasicBlock* bb);
-
-  /**
-   * @brief Check whether the given MIR is possible to throw an exception.
-   * @param mir The mir to check.
-   * @return Returns 'true' if the given MIR might throw an exception.
-   */
-  bool CanThrow(MIR* mir) const;
-
-  /**
-   * @brief Combine multiply and add/sub MIRs into corresponding extended MAC MIR.
-   * @param mul_mir The multiply MIR to be combined.
-   * @param add_mir The add/sub MIR to be combined.
-   * @param mul_is_first_addend 'true' if multiply product is the first addend of add operation.
-   * @param is_wide 'true' if the operations are long type.
-   * @param is_sub 'true' if it is a multiply-subtract operation.
-   */
-  void CombineMultiplyAdd(MIR* mul_mir, MIR* add_mir, bool mul_is_first_addend,
-                          bool is_wide, bool is_sub);
-  /*
-   * @brief Check whether the first MIR anti-depends on the second MIR.
-   * @details To check whether one of first MIR's uses of vregs is redefined by the second MIR,
-   * i.e. there is a write-after-read dependency.
-   * @param first The first MIR.
-   * @param second The second MIR.
-   * @param Returns true if there is a write-after-read dependency.
-   */
-  bool HasAntiDependency(MIR* first, MIR* second);
-
-  bool BuildExtendedBBList(class BasicBlock* bb);
-  bool FillDefBlockMatrix(BasicBlock* bb);
-  void InitializeDominationInfo(BasicBlock* bb);
-  bool ComputeblockIDom(BasicBlock* bb);
-  bool ComputeBlockDominators(BasicBlock* bb);
-  bool SetDominators(BasicBlock* bb);
-  bool ComputeBlockLiveIns(BasicBlock* bb);
-  bool ComputeDominanceFrontier(BasicBlock* bb);
-
-  void CountChecks(BasicBlock* bb);
-  void AnalyzeBlock(BasicBlock* bb, struct MethodStats* stats);
-  bool ComputeSkipCompilation(struct MethodStats* stats, bool skip_default,
-                              std::string* skip_message);
-
-  CompilationUnit* const cu_;
-  ArenaVector<int> ssa_base_vregs_;
-  ArenaVector<int> ssa_subscripts_;
-  // Map original Dalvik virtual reg i to the current SSA name.
-  int32_t* vreg_to_ssa_map_;        // length == method->registers_size
-  int* ssa_last_defs_;              // length == method->registers_size
-  ArenaBitVector* is_constant_v_;   // length == num_ssa_reg
-  int* constant_values_;            // length == num_ssa_reg
-  // Use counts of ssa names.
-  ArenaVector<uint32_t> use_counts_;      // Weighted by nesting depth
-  ArenaVector<uint32_t> raw_use_counts_;  // Not weighted
-  unsigned int num_reachable_blocks_;
-  unsigned int max_num_reachable_blocks_;
-  bool dfs_orders_up_to_date_;
-  bool domination_up_to_date_;
-  bool mir_ssa_rep_up_to_date_;
-  bool topological_order_up_to_date_;
-  ArenaVector<BasicBlockId> dfs_order_;
-  ArenaVector<BasicBlockId> dfs_post_order_;
-  ArenaVector<BasicBlockId> dom_post_order_traversal_;
-  ArenaVector<BasicBlockId> topological_order_;
-  // Indexes in topological_order_ need to be only as big as the BasicBlockId.
-  static_assert(sizeof(BasicBlockId) == sizeof(uint16_t), "Assuming 16 bit BasicBlockId");
-  // For each loop head, remember the past-the-end index of the end of the loop. 0 if not loop head.
-  ArenaVector<uint16_t> topological_order_loop_ends_;
-  // Map BB ids to topological_order_ indexes. 0xffff if not included (hidden or null block).
-  ArenaVector<uint16_t> topological_order_indexes_;
-  // Stack of the loop head indexes and recalculation flags for RepeatingTopologicalSortIterator.
-  ArenaVector<std::pair<uint16_t, bool>> topological_order_loop_head_stack_;
-  size_t max_nested_loops_;
-  int* i_dom_list_;
-  std::unique_ptr<ScopedArenaAllocator> temp_scoped_alloc_;
-  // Union of temporaries used by different passes.
-  union {
-    // Class init check elimination.
-    struct {
-      size_t num_class_bits;  // 2 bits per class: class initialized and class in dex cache.
-      ArenaBitVector* work_classes_to_check;
-      ArenaBitVector** ending_classes_to_check_matrix;  // num_blocks_ x num_class_bits.
-      uint16_t* indexes;
-    } cice;
-    // Null check elimination.
-    struct {
-      size_t num_vregs;
-      ArenaBitVector* work_vregs_to_check;
-      ArenaBitVector** ending_vregs_to_check_matrix;  // num_blocks_ x num_vregs.
-    } nce;
-    // Special method inlining.
-    struct {
-      size_t num_indexes;
-      ArenaBitVector* processed_indexes;
-      uint16_t* lowering_infos;
-    } smi;
-    // SSA transformation.
-    struct {
-      size_t num_vregs;
-      ArenaBitVector* work_live_vregs;
-      ArenaBitVector** def_block_matrix;  // num_vregs x num_blocks_.
-      ArenaBitVector** phi_node_blocks;  // num_vregs x num_blocks_.
-      TypeInference* ti;
-    } ssa;
-    // Global value numbering.
-    struct {
-      GlobalValueNumbering* gvn;
-      uint16_t* ifield_ids;  // Part of GVN/LVN but cached here for LVN to avoid recalculation.
-      uint16_t* sfield_ids;  // Ditto.
-      GvnDeadCodeElimination* dce;
-    } gvn;
-  } temp_;
-  static const int kInvalidEntry = -1;
-  ArenaVector<BasicBlock*> block_list_;
-  ArenaBitVector* try_block_addr_;
-  BasicBlock* entry_block_;
-  BasicBlock* exit_block_;
-  const DexFile::CodeItem* current_code_item_;
-  ArenaVector<DexCompilationUnit*> m_units_;     // List of methods included in this graph
-  typedef std::pair<int, int> MIRLocation;       // Insert point, (m_unit_ index, offset)
-  ArenaVector<MIRLocation> method_stack_;        // Include stack
-  int current_method_;
-  DexOffset current_offset_;                     // Offset in code units
-  int def_count_;                                // Used to estimate size of ssa name storage.
-  int* opcode_count_;                            // Dex opcode coverage stats.
-  int num_ssa_regs_;                             // Number of names following SSA transformation.
-  ArenaVector<BasicBlockId> extended_basic_blocks_;  // Heads of block "traces".
-  int method_sreg_;
-  unsigned int attributes_;
-  Checkstats* checkstats_;
-  ArenaAllocator* const arena_;
-  int backward_branches_;
-  int forward_branches_;
-  size_t num_non_special_compiler_temps_;  // Keeps track of allocated non-special compiler temps. These are VRs that are in compiler temp region on stack.
-  size_t max_available_non_special_compiler_temps_;  // Keeps track of maximum available non-special temps.
-  size_t max_available_special_compiler_temps_;      // Keeps track of maximum available special temps.
-  bool requested_backend_temp_;            // Keeps track whether BE temps have been requested.
-  size_t reserved_temps_for_backend_;      // Keeps track of the remaining temps that are reserved for BE.
-  bool compiler_temps_committed_;          // Keeps track whether number of temps has been frozen (for example post frame size calculation).
-  bool punt_to_interpreter_;               // Difficult or not worthwhile - just interpret.
-  uint64_t merged_df_flags_;
-  ArenaVector<MirIFieldLoweringInfo> ifield_lowering_infos_;
-  ArenaVector<MirSFieldLoweringInfo> sfield_lowering_infos_;
-  ArenaVector<MirMethodLoweringInfo> method_lowering_infos_;
-
-  // In the suspend check elimination pass we determine for each basic block and enclosing
-  // loop whether there's guaranteed to be a suspend check on the path from the loop head
-  // to this block. If so, we can eliminate the back-edge suspend check.
-  // The bb->id is index into suspend_checks_in_loops_ and the loop head's depth is bit index
-  // in a suspend_checks_in_loops_[bb->id].
-  uint32_t* suspend_checks_in_loops_;
-
-  static const uint64_t oat_data_flow_attributes_[kMirOpLast];
-
-  friend class MirOptimizationTest;
-  friend class ClassInitCheckEliminationTest;
-  friend class SuspendCheckEliminationTest;
-  friend class NullCheckEliminationTest;
-  friend class GlobalValueNumberingTest;
-  friend class GvnDeadCodeEliminationTest;
-  friend class LocalValueNumberingTest;
-  friend class TopologicalSortOrderTest;
-  friend class TypeInferenceTest;
-  friend class QuickCFITest;
-  friend class QuickAssembleX86TestBase;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_MIR_GRAPH_H_
diff --git a/compiler/dex/mir_graph_test.cc b/compiler/dex/mir_graph_test.cc
deleted file mode 100644
index 7858681..0000000
--- a/compiler/dex/mir_graph_test.cc
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "compiler_ir.h"
-#include "dataflow_iterator-inl.h"
-#include "mir_graph.h"
-#include "gtest/gtest.h"
-
-namespace art {
-
-class TopologicalSortOrderTest : public testing::Test {
- protected:
-  struct BBDef {
-    static constexpr size_t kMaxSuccessors = 4;
-    static constexpr size_t kMaxPredecessors = 4;
-
-    BBType type;
-    size_t num_successors;
-    BasicBlockId successors[kMaxPredecessors];
-    size_t num_predecessors;
-    BasicBlockId predecessors[kMaxPredecessors];
-  };
-
-#define DEF_SUCC0() \
-    0u, { }
-#define DEF_SUCC1(s1) \
-    1u, { s1 }
-#define DEF_SUCC2(s1, s2) \
-    2u, { s1, s2 }
-#define DEF_SUCC3(s1, s2, s3) \
-    3u, { s1, s2, s3 }
-#define DEF_SUCC4(s1, s2, s3, s4) \
-    4u, { s1, s2, s3, s4 }
-#define DEF_PRED0() \
-    0u, { }
-#define DEF_PRED1(p1) \
-    1u, { p1 }
-#define DEF_PRED2(p1, p2) \
-    2u, { p1, p2 }
-#define DEF_PRED3(p1, p2, p3) \
-    3u, { p1, p2, p3 }
-#define DEF_PRED4(p1, p2, p3, p4) \
-    4u, { p1, p2, p3, p4 }
-#define DEF_BB(type, succ, pred) \
-    { type, succ, pred }
-
-  void DoPrepareBasicBlocks(const BBDef* defs, size_t count) {
-    cu_.mir_graph->block_id_map_.clear();
-    cu_.mir_graph->block_list_.clear();
-    ASSERT_LT(3u, count);  // null, entry, exit and at least one bytecode block.
-    ASSERT_EQ(kNullBlock, defs[0].type);
-    ASSERT_EQ(kEntryBlock, defs[1].type);
-    ASSERT_EQ(kExitBlock, defs[2].type);
-    for (size_t i = 0u; i != count; ++i) {
-      const BBDef* def = &defs[i];
-      BasicBlock* bb = cu_.mir_graph->CreateNewBB(def->type);
-      if (def->num_successors <= 2) {
-        bb->successor_block_list_type = kNotUsed;
-        bb->fall_through = (def->num_successors >= 1) ? def->successors[0] : 0u;
-        bb->taken = (def->num_successors >= 2) ? def->successors[1] : 0u;
-      } else {
-        bb->successor_block_list_type = kPackedSwitch;
-        bb->fall_through = 0u;
-        bb->taken = 0u;
-        bb->successor_blocks.reserve(def->num_successors);
-        for (size_t j = 0u; j != def->num_successors; ++j) {
-          SuccessorBlockInfo* successor_block_info =
-              static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
-                                                               kArenaAllocSuccessors));
-          successor_block_info->block = j;
-          successor_block_info->key = 0u;  // Not used by class init check elimination.
-          bb->successor_blocks.push_back(successor_block_info);
-        }
-      }
-      bb->predecessors.assign(def->predecessors, def->predecessors + def->num_predecessors);
-      if (def->type == kDalvikByteCode || def->type == kEntryBlock || def->type == kExitBlock) {
-        bb->data_flow_info = static_cast<BasicBlockDataFlow*>(
-            cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo));
-      }
-    }
-    ASSERT_EQ(count, cu_.mir_graph->block_list_.size());
-    cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1];
-    ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type);
-    cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_[2];
-    ASSERT_EQ(kExitBlock, cu_.mir_graph->exit_block_->block_type);
-
-    DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>(cu_.arena.Alloc(sizeof(DexFile::CodeItem),
-                                                                                   kArenaAllocMisc));
-    cu_.mir_graph->current_code_item_ = code_item;
-  }
-
-  template <size_t count>
-  void PrepareBasicBlocks(const BBDef (&defs)[count]) {
-    DoPrepareBasicBlocks(defs, count);
-  }
-
-  void ComputeTopologicalSortOrder() {
-    cu_.mir_graph->SSATransformationStart();
-    cu_.mir_graph->ComputeDFSOrders();
-    cu_.mir_graph->ComputeDominators();
-    cu_.mir_graph->ComputeTopologicalSortOrder();
-    cu_.mir_graph->SSATransformationEnd();
-    ASSERT_FALSE(cu_.mir_graph->topological_order_.empty());
-    ASSERT_FALSE(cu_.mir_graph->topological_order_loop_ends_.empty());
-    ASSERT_FALSE(cu_.mir_graph->topological_order_indexes_.empty());
-    ASSERT_EQ(cu_.mir_graph->GetNumBlocks(), cu_.mir_graph->topological_order_indexes_.size());
-    for (size_t i = 0, size = cu_.mir_graph->GetTopologicalSortOrder().size(); i != size; ++i) {
-      ASSERT_LT(cu_.mir_graph->topological_order_[i], cu_.mir_graph->GetNumBlocks());
-      BasicBlockId id = cu_.mir_graph->topological_order_[i];
-      EXPECT_EQ(i, cu_.mir_graph->topological_order_indexes_[id]);
-    }
-  }
-
-  void DoCheckOrder(const BasicBlockId* ids, size_t count) {
-    ASSERT_EQ(count, cu_.mir_graph->GetTopologicalSortOrder().size());
-    for (size_t i = 0; i != count; ++i) {
-      EXPECT_EQ(ids[i], cu_.mir_graph->GetTopologicalSortOrder()[i]) << i;
-    }
-  }
-
-  template <size_t count>
-  void CheckOrder(const BasicBlockId (&ids)[count]) {
-    DoCheckOrder(ids, count);
-  }
-
-  void DoCheckLoopEnds(const uint16_t* ends, size_t count) {
-    for (size_t i = 0; i != count; ++i) {
-      ASSERT_LT(i, cu_.mir_graph->GetTopologicalSortOrderLoopEnds().size());
-      EXPECT_EQ(ends[i], cu_.mir_graph->GetTopologicalSortOrderLoopEnds()[i]) << i;
-    }
-  }
-
-  template <size_t count>
-  void CheckLoopEnds(const uint16_t (&ends)[count]) {
-    DoCheckLoopEnds(ends, count);
-  }
-
-  TopologicalSortOrderTest()
-      : pool_(),
-        cu_(&pool_, kRuntimeISA, nullptr, nullptr) {
-    cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
-  }
-
-  ArenaPool pool_;
-  CompilationUnit cu_;
-};
-
-TEST_F(TopologicalSortOrderTest, DoWhile) {
-  const BBDef bbs[] = {
-      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED2(3, 4)),  // "taken" loops to self.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
-  };
-  const BasicBlockId expected_order[] = {
-      1, 3, 4, 5, 2
-  };
-  const uint16_t loop_ends[] = {
-      0, 0, 3, 0, 0
-  };
-
-  PrepareBasicBlocks(bbs);
-  ComputeTopologicalSortOrder();
-  CheckOrder(expected_order);
-  CheckLoopEnds(loop_ends);
-}
-
-TEST_F(TopologicalSortOrderTest, While) {
-  const BBDef bbs[] = {
-      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED2(1, 4)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(3), DEF_PRED1(3)),     // Loops to 3.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
-  };
-  const BasicBlockId expected_order[] = {
-      1, 3, 4, 5, 2
-  };
-  const uint16_t loop_ends[] = {
-      0, 3, 0, 0, 0
-  };
-
-  PrepareBasicBlocks(bbs);
-  ComputeTopologicalSortOrder();
-  CheckOrder(expected_order);
-  CheckLoopEnds(loop_ends);
-}
-
-TEST_F(TopologicalSortOrderTest, WhileWithTwoBackEdges) {
-  const BBDef bbs[] = {
-      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 6), DEF_PRED3(1, 4, 5)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 3), DEF_PRED1(3)),     // Loops to 3.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(3), DEF_PRED1(4)),        // Loops to 3.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
-  };
-  const BasicBlockId expected_order[] = {
-      1, 3, 4, 5, 6, 2
-  };
-  const uint16_t loop_ends[] = {
-      0, 4, 0, 0, 0, 0
-  };
-
-  PrepareBasicBlocks(bbs);
-  ComputeTopologicalSortOrder();
-  CheckOrder(expected_order);
-  CheckLoopEnds(loop_ends);
-}
-
-TEST_F(TopologicalSortOrderTest, NestedLoop) {
-  const BBDef bbs[] = {
-      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(7)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 7), DEF_PRED2(1, 6)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 6), DEF_PRED2(3, 5)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(4)),            // Loops to 4.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(3), DEF_PRED1(4)),            // Loops to 3.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
-  };
-  const BasicBlockId expected_order[] = {
-      1, 3, 4, 5, 6, 7, 2
-  };
-  const uint16_t loop_ends[] = {
-      0, 5, 4, 0, 0, 0, 0
-  };
-
-  PrepareBasicBlocks(bbs);
-  ComputeTopologicalSortOrder();
-  CheckOrder(expected_order);
-  CheckLoopEnds(loop_ends);
-}
-
-TEST_F(TopologicalSortOrderTest, NestedLoopHeadLoops) {
-  const BBDef bbs[] = {
-      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 6), DEF_PRED2(1, 4)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 3), DEF_PRED2(3, 5)),      // Nested head, loops to 3.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(4)),            // Loops to 4.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
-  };
-  const BasicBlockId expected_order[] = {
-      1, 3, 4, 5, 6, 2
-  };
-  const uint16_t loop_ends[] = {
-      0, 4, 4, 0, 0, 0
-  };
-
-  PrepareBasicBlocks(bbs);
-  ComputeTopologicalSortOrder();
-  CheckOrder(expected_order);
-  CheckLoopEnds(loop_ends);
-}
-
-TEST_F(TopologicalSortOrderTest, NestedLoopSameBackBranchBlock) {
-  const BBDef bbs[] = {
-      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 6), DEF_PRED2(1, 5)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED2(3, 5)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 3), DEF_PRED1(4)),         // Loops to 4 and 3.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
-  };
-  const BasicBlockId expected_order[] = {
-      1, 3, 4, 5, 6, 2
-  };
-  const uint16_t loop_ends[] = {
-      0, 4, 4, 0, 0, 0
-  };
-
-  PrepareBasicBlocks(bbs);
-  ComputeTopologicalSortOrder();
-  CheckOrder(expected_order);
-  CheckLoopEnds(loop_ends);
-}
-
-TEST_F(TopologicalSortOrderTest, TwoReorderedInnerLoops) {
-  // This is a simplified version of real code graph where the branch from 8 to 5 must prevent
-  // the block 5 from being considered a loop head before processing the loop 7-8.
-  const BBDef bbs[] = {
-      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(9)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 9), DEF_PRED2(1, 5)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 7), DEF_PRED1(3)),         // Branch over loop in 5.
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 3), DEF_PRED3(4, 6, 8)),   // Loops to 4; inner loop.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(5)),            // Loops to 5.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(8), DEF_PRED2(4, 8)),         // Loop head.
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(7, 5), DEF_PRED1(7)),         // Loops to 7; branches to 5.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
-  };
-  const BasicBlockId expected_order[] = {
-      1, 3, 4, 7, 8, 5, 6, 9, 2
-  };
-  const uint16_t loop_ends[] = {
-      0, 7, 0, 5, 0, 7, 0, 0, 0
-  };
-
-  PrepareBasicBlocks(bbs);
-  ComputeTopologicalSortOrder();
-  CheckOrder(expected_order);
-  CheckLoopEnds(loop_ends);
-}
-
-TEST_F(TopologicalSortOrderTest, NestedLoopWithBackEdgeAfterOuterLoopBackEdge) {
-  // This is a simplified version of real code graph. The back-edge from 7 to the inner
-  // loop head 4 comes after the back-edge from 6 to the outer loop head 3. To make this
-  // appear a bit more complex, there's also a back-edge from 5 to 4.
-  const BBDef bbs[] = {
-      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(7)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED2(1, 6)),         // Outer loop head.
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 6), DEF_PRED3(3, 5, 7)),   // Inner loop head.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(4)),            // Loops to inner loop head 4.
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(7, 3), DEF_PRED1(4)),         // Loops to outer loop head 3.
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(2, 4), DEF_PRED1(6)),         // Loops to inner loop head 4.
-  };
-  const BasicBlockId expected_order[] = {
-      // NOTE: The 5 goes before 6 only because 5 is a "fall-through" from 4 while 6 is "taken".
-      1, 3, 4, 5, 6, 7, 2
-  };
-  const uint16_t loop_ends[] = {
-      0, 6, 6, 0, 0, 0, 0
-  };
-
-  PrepareBasicBlocks(bbs);
-  ComputeTopologicalSortOrder();
-  CheckOrder(expected_order);
-  CheckLoopEnds(loop_ends);
-}
-
-TEST_F(TopologicalSortOrderTest, LoopWithTwoEntryPoints) {
-  const BBDef bbs[] = {
-      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(7)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED1(1)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED2(3, 6)),  // Fall-back block is chosen as
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED2(3, 4)),  // the earlier from these two.
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 7), DEF_PRED1(5)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(6)),
-  };
-  const BasicBlockId expected_order[] = {
-      1, 3, 4, 5, 6, 7, 2
-  };
-  const uint16_t loop_ends[] = {
-      0, 0, 5, 0, 0, 0, 0
-  };
-
-  PrepareBasicBlocks(bbs);
-  ComputeTopologicalSortOrder();
-  CheckOrder(expected_order);
-  CheckLoopEnds(loop_ends);
-}
-
-TEST_F(TopologicalSortOrderTest, UnnaturalLoops) {
-  const BBDef bbs[] = {
-      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(10)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED1(1)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED2(11, 3)),  // Unnatural loop head (top-level).
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED2(3, 4)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(9, 7), DEF_PRED1(5)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(8), DEF_PRED1(6)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(9), DEF_PRED2(10, 7)),  // Unnatural loop head (nested).
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(10), DEF_PRED2(6, 8)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(8, 11), DEF_PRED1(9)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 2), DEF_PRED1(10)),
-  };
-  const BasicBlockId expected_order[] = {
-      1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 2
-  };
-  const uint16_t loop_ends[] = {
-      0, 0, 10, 0, 0, 0, 9, 0, 0, 0, 0,
-  };
-
-  PrepareBasicBlocks(bbs);
-  ComputeTopologicalSortOrder();
-  CheckOrder(expected_order);
-  CheckLoopEnds(loop_ends);
-
-  const std::pair<BasicBlockId, bool> expected_and_change[] = {
-      { 1, false },
-      { 3, false },
-      { 4, true },    // Initial run of the outer loop.
-      { 5, true },
-      { 6, true },
-      { 7, true },
-      { 8, true },    // Initial run of the inner loop.
-      { 9, true },
-      { 10, true },
-      { 8, true },    // Recalculation of the inner loop - changed.
-      { 9, true },
-      { 10, true },
-      { 8, false },   // Recalculation of the inner loop - unchanged.
-      { 11, true },
-      { 4, true },    // Recalculation of the outer loop - changed.
-      { 5, true },
-      { 6, true },
-      { 7, false },   // No change: skip inner loop head because inputs are unchanged.
-      { 9, true },
-      { 10, true },
-      { 8, true },    // Recalculation of the inner loop - changed.
-      { 9, true },
-      { 10, true },
-      { 8, false },   // Recalculation of the inner loop - unchanged.
-      { 11, true },
-      { 4, false },   // Recalculation of the outer loop - unchanged.
-      { 2, false },
-  };
-  size_t pos = 0;
-  LoopRepeatingTopologicalSortIterator iter(cu_.mir_graph.get());
-  bool change = false;
-  for (BasicBlock* bb = iter.Next(change); bb != nullptr; bb = iter.Next(change)) {
-    ASSERT_NE(arraysize(expected_and_change), pos);
-    ASSERT_EQ(expected_and_change[pos].first, bb->id) << pos;
-    change = expected_and_change[pos].second;
-    ++pos;
-  }
-  ASSERT_EQ(arraysize(expected_and_change), pos);
-}
-
-}  // namespace art
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
deleted file mode 100644
index 658e7d6..0000000
--- a/compiler/dex/mir_method_info.cc
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-# include "mir_method_info.h"
-
-#include "dex/compiler_ir.h"
-#include "dex/quick/dex_file_method_inliner.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "dex/verified_method.h"
-#include "driver/compiler_driver.h"
-#include "driver/dex_compilation_unit.h"
-#include "driver/compiler_driver-inl.h"
-#include "driver/compiler_options.h"
-#include "mirror/class_loader.h"  // Only to allow casts in Handle<ClassLoader>.
-#include "mirror/dex_cache.h"     // Only to allow casts in Handle<DexCache>.
-#include "scoped_thread_state_change.h"
-#include "handle_scope-inl.h"
-
-namespace art {
-
-void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver,
-                                    const DexCompilationUnit* mUnit,
-                                    MirMethodLoweringInfo* method_infos, size_t count) {
-  if (kIsDebugBuild) {
-    DCHECK(method_infos != nullptr);
-    DCHECK_NE(count, 0u);
-    for (auto it = method_infos, end = method_infos + count; it != end; ++it) {
-      MirMethodLoweringInfo unresolved(it->MethodIndex(), it->GetInvokeType(), it->IsQuickened());
-      unresolved.declaring_dex_file_ = it->declaring_dex_file_;
-      unresolved.vtable_idx_ = it->vtable_idx_;
-      if (it->target_dex_file_ != nullptr) {
-        unresolved.target_dex_file_ = it->target_dex_file_;
-        unresolved.target_method_idx_ = it->target_method_idx_;
-      }
-      if (kIsDebugBuild) {
-        unresolved.CheckEquals(*it);
-      }
-    }
-  }
-
-  // We're going to resolve methods and check access in a tight loop. It's better to hold
-  // the lock and needed references once than re-acquiring them again and again.
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<4> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(compiler_driver->GetDexCache(mUnit)));
-  Handle<mirror::ClassLoader> class_loader(
-      hs.NewHandle(compiler_driver->GetClassLoader(soa, mUnit)));
-  Handle<mirror::Class> referrer_class(hs.NewHandle(
-      compiler_driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, mUnit)));
-  auto current_dex_cache(hs.NewHandle<mirror::DexCache>(nullptr));
-  // Even if the referrer class is unresolved (i.e. we're compiling a method without class
-  // definition) we still want to resolve methods and record all available info.
-  Runtime* const runtime = Runtime::Current();
-  const DexFile* const dex_file = mUnit->GetDexFile();
-  const bool use_jit = runtime->UseJit();
-  const VerifiedMethod* const verified_method = mUnit->GetVerifiedMethod();
-  DexFileToMethodInlinerMap* inliner_map = compiler_driver->GetMethodInlinerMap();
-  DexFileMethodInliner* default_inliner =
-      (inliner_map != nullptr) ? inliner_map->GetMethodInliner(dex_file) : nullptr;
-
-  for (auto it = method_infos, end = method_infos + count; it != end; ++it) {
-    // For quickened invokes, the dex method idx is actually the mir offset.
-    if (it->IsQuickened()) {
-      const auto* dequicken_ref = verified_method->GetDequickenIndex(it->method_idx_);
-      CHECK(dequicken_ref != nullptr);
-      it->target_dex_file_ = dequicken_ref->dex_file;
-      it->target_method_idx_ = dequicken_ref->index;
-    }
-    // Remember devirtualized invoke target and set the called method to the default.
-    MethodReference devirt_ref(it->target_dex_file_, it->target_method_idx_);
-    MethodReference* devirt_target = (it->target_dex_file_ != nullptr) ? &devirt_ref : nullptr;
-    InvokeType invoke_type = it->GetInvokeType();
-    ArtMethod* resolved_method = nullptr;
-
-    bool string_init = false;
-    if (default_inliner->IsStringInitMethodIndex(it->MethodIndex())) {
-      string_init = true;
-      invoke_type = kDirect;
-    }
-
-    if (!it->IsQuickened()) {
-      it->target_dex_file_ = dex_file;
-      it->target_method_idx_ = it->MethodIndex();
-      current_dex_cache.Assign(dex_cache.Get());
-      resolved_method = compiler_driver->ResolveMethod(soa, dex_cache, class_loader, mUnit,
-                                                       it->target_method_idx_, invoke_type, true);
-    } else {
-      // The method index is actually the dex PC in this case.
-      // Calculate the proper dex file and target method idx.
-      CHECK(use_jit);
-      CHECK_EQ(invoke_type, kVirtual);
-      // Don't devirt if we are in a different dex file since we can't have direct invokes in
-      // another dex file unless we always put a direct / patch pointer.
-      devirt_target = nullptr;
-      current_dex_cache.Assign(runtime->GetClassLinker()->FindDexCache(
-          soa.Self(), *it->target_dex_file_));
-      CHECK(current_dex_cache.Get() != nullptr);
-      DexCompilationUnit cu(
-          mUnit->GetCompilationUnit(), mUnit->GetClassLoader(), mUnit->GetClassLinker(),
-          *it->target_dex_file_, nullptr /* code_item not used */, 0u /* class_def_idx not used */,
-          it->target_method_idx_, 0u /* access_flags not used */,
-          nullptr /* verified_method not used */,
-          current_dex_cache);
-      resolved_method = compiler_driver->ResolveMethod(soa, current_dex_cache, class_loader, &cu,
-                                                       it->target_method_idx_, invoke_type, false);
-      if (resolved_method == nullptr) {
-        // If the method is null then it should be a miranda method, in this case try
-        // re-loading it, this time as an interface method. The actual miranda method is in the
-        // vtable, but it will resolve to an interface method.
-        resolved_method = compiler_driver->ResolveMethod(
-            soa, current_dex_cache, class_loader, &cu, it->target_method_idx_, kInterface, false);
-        CHECK(resolved_method != nullptr);
-      }
-      if (resolved_method != nullptr) {
-        // Since this was a dequickened virtual, it is guaranteed to be resolved. However, it may be
-        // resolved to an interface method. If this is the case then change the invoke type to
-        // interface with the assumption that sharp_type will be kVirtual.
-        if (resolved_method->GetInvokeType() == kInterface) {
-          it->flags_ = (it->flags_ & ~(kInvokeTypeMask << kBitInvokeTypeBegin)) |
-              (static_cast<uint16_t>(kInterface) << kBitInvokeTypeBegin);
-        }
-      }
-    }
-    if (UNLIKELY(resolved_method == nullptr)) {
-      continue;
-    }
-
-    compiler_driver->GetResolvedMethodDexFileLocation(resolved_method,
-        &it->declaring_dex_file_, &it->declaring_class_idx_, &it->declaring_method_idx_);
-    if (!it->IsQuickened()) {
-      // For quickened invoke virtuals we may have desharpened to an interface method which
-      // wont give us the right method index, in this case blindly dispatch or else we can't
-      // compile the method. Converting the invoke to interface dispatch doesn't work since we
-      // have no way to get the dex method index for quickened invoke virtuals in the interface
-      // trampolines.
-      it->vtable_idx_ =
-          compiler_driver->GetResolvedMethodVTableIndex(resolved_method, invoke_type);
-    }
-
-    MethodReference target_method(it->target_dex_file_, it->target_method_idx_);
-    int fast_path_flags = compiler_driver->IsFastInvoke(
-        soa, current_dex_cache, class_loader, mUnit, referrer_class.Get(), resolved_method,
-        &invoke_type, &target_method, devirt_target, &it->direct_code_, &it->direct_method_);
-    const bool is_referrers_class = referrer_class.Get() == resolved_method->GetDeclaringClass();
-    const bool is_class_initialized =
-        compiler_driver->IsMethodsClassInitialized(referrer_class.Get(), resolved_method);
-
-    // Check if the target method is intrinsic or special.
-    InlineMethodFlags is_intrinsic_or_special = kNoInlineMethodFlags;
-    if (inliner_map != nullptr) {
-      auto* inliner = (target_method.dex_file == dex_file)
-          ? default_inliner
-          : inliner_map->GetMethodInliner(target_method.dex_file);
-      is_intrinsic_or_special = inliner->IsIntrinsicOrSpecial(target_method.dex_method_index);
-    }
-
-    uint16_t other_flags = it->flags_ &
-        ~(kFlagFastPath | kFlagIsIntrinsic | kFlagIsSpecial | kFlagClassIsInitialized |
-            (kInvokeTypeMask << kBitSharpTypeBegin));
-    it->flags_ = other_flags |
-        // String init path is a special always-fast path.
-        (fast_path_flags != 0 || string_init ? kFlagFastPath : 0u) |
-        ((is_intrinsic_or_special & kInlineIntrinsic) != 0 ? kFlagIsIntrinsic : 0u) |
-        ((is_intrinsic_or_special & kInlineSpecial) != 0 ? kFlagIsSpecial : 0u) |
-        (static_cast<uint16_t>(invoke_type) << kBitSharpTypeBegin) |
-        (is_referrers_class ? kFlagIsReferrersClass : 0u) |
-        (is_class_initialized ? kFlagClassIsInitialized : 0u);
-    it->target_dex_file_ = target_method.dex_file;
-    it->target_method_idx_ = target_method.dex_method_index;
-    it->stats_flags_ = fast_path_flags;
-    if (string_init) {
-      it->direct_code_ = 0;
-    }
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/mir_method_info.h b/compiler/dex/mir_method_info.h
deleted file mode 100644
index 4512f35..0000000
--- a/compiler/dex/mir_method_info.h
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_MIR_METHOD_INFO_H_
-#define ART_COMPILER_DEX_MIR_METHOD_INFO_H_
-
-#include "base/logging.h"
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "invoke_type.h"
-#include "method_reference.h"
-
-namespace art {
-
-class CompilerDriver;
-class DexCompilationUnit;
-class DexFile;
-
-class MirMethodInfo {
- public:
-  uint16_t MethodIndex() const {
-    return method_idx_;
-  }
-
-  bool IsStatic() const {
-    return (flags_ & kFlagIsStatic) != 0u;
-  }
-
-  bool IsResolved() const {
-    return declaring_dex_file_ != nullptr;
-  }
-
-  const DexFile* DeclaringDexFile() const {
-    return declaring_dex_file_;
-  }
-  void SetDeclaringDexFile(const DexFile* dex_file) {
-    declaring_dex_file_ = dex_file;
-  }
-
-  uint16_t DeclaringClassIndex() const {
-    return declaring_class_idx_;
-  }
-
-  uint16_t DeclaringMethodIndex() const {
-    return declaring_method_idx_;
-  }
-
- protected:
-  enum {
-    kBitIsStatic = 0,
-    kMethodInfoBitEnd
-  };
-  static_assert(kMethodInfoBitEnd <= 16, "Too many flags");
-  static constexpr uint16_t kFlagIsStatic = 1u << kBitIsStatic;
-
-  MirMethodInfo(uint16_t method_idx, uint16_t flags)
-      : method_idx_(method_idx),
-        flags_(flags),
-        declaring_method_idx_(0u),
-        declaring_class_idx_(0u),
-        declaring_dex_file_(nullptr) {
-  }
-
-  // Make copy-ctor/assign/dtor protected to avoid slicing.
-  MirMethodInfo(const MirMethodInfo& other) = default;
-  MirMethodInfo& operator=(const MirMethodInfo& other) = default;
-  ~MirMethodInfo() = default;
-
-  // The method index in the compiling method's dex file.
-  uint16_t method_idx_;
-  // Flags, for volatility and derived class data.
-  uint16_t flags_;
-  // The method index in the dex file that defines the method, 0 if unresolved.
-  uint16_t declaring_method_idx_;
-  // The type index of the class declaring the method, 0 if unresolved.
-  uint16_t declaring_class_idx_;
-  // The dex file that defines the class containing the method and the method,
-  // null if unresolved.
-  const DexFile* declaring_dex_file_;
-};
-
-class MirMethodLoweringInfo : public MirMethodInfo {
- public:
-  // For each requested method retrieve the method's declaring location (dex file, class
-  // index and method index) and compute whether we can fast path the method call. For fast
-  // path methods, retrieve the method's vtable index and direct code and method when applicable.
-  static void Resolve(CompilerDriver* compiler_driver, const DexCompilationUnit* mUnit,
-                      MirMethodLoweringInfo* method_infos, size_t count)
-      REQUIRES(!Locks::mutator_lock_);
-
-  MirMethodLoweringInfo(uint16_t method_idx, InvokeType type, bool is_quickened)
-      : MirMethodInfo(method_idx,
-                      ((type == kStatic) ? kFlagIsStatic : 0u) |
-                      (static_cast<uint16_t>(type) << kBitInvokeTypeBegin) |
-                      (static_cast<uint16_t>(type) << kBitSharpTypeBegin) |
-                      (is_quickened ? kFlagQuickened : 0u)),
-        direct_code_(0u),
-        direct_method_(0u),
-        target_dex_file_(nullptr),
-        target_method_idx_(0u),
-        vtable_idx_(0u),
-        stats_flags_(0) {
-  }
-
-  void SetDevirtualizationTarget(const MethodReference& ref) {
-    DCHECK(target_dex_file_ == nullptr);
-    DCHECK_EQ(target_method_idx_, 0u);
-    DCHECK_LE(ref.dex_method_index, 0xffffu);
-    target_dex_file_ = ref.dex_file;
-    target_method_idx_ = ref.dex_method_index;
-  }
-
-  bool FastPath() const {
-    return (flags_ & kFlagFastPath) != 0u;
-  }
-
-  bool IsIntrinsic() const {
-    return (flags_ & kFlagIsIntrinsic) != 0u;
-  }
-
-  bool IsSpecial() const {
-    return (flags_ & kFlagIsSpecial) != 0u;
-  }
-
-  bool IsReferrersClass() const {
-    return (flags_ & kFlagIsReferrersClass) != 0;
-  }
-
-  bool IsClassInitialized() const {
-    return (flags_ & kFlagClassIsInitialized) != 0u;
-  }
-
-  // Returns true iff the method invoke is INVOKE_VIRTUAL_QUICK or INVOKE_VIRTUAL_RANGE_QUICK.
-  bool IsQuickened() const {
-    return (flags_ & kFlagQuickened) != 0u;
-  }
-
-  InvokeType GetInvokeType() const {
-    return static_cast<InvokeType>((flags_ >> kBitInvokeTypeBegin) & kInvokeTypeMask);
-  }
-
-  art::InvokeType GetSharpType() const {
-    return static_cast<InvokeType>((flags_ >> kBitSharpTypeBegin) & kInvokeTypeMask);
-  }
-
-  MethodReference GetTargetMethod() const {
-    return MethodReference(target_dex_file_, target_method_idx_);
-  }
-
-  uint16_t VTableIndex() const {
-    return vtable_idx_;
-  }
-  void SetVTableIndex(uint16_t index) {
-    vtable_idx_ = index;
-  }
-
-  uintptr_t DirectCode() const {
-    return direct_code_;
-  }
-
-  uintptr_t DirectMethod() const {
-    return direct_method_;
-  }
-
-  int StatsFlags() const {
-    return stats_flags_;
-  }
-
-  void CheckEquals(const MirMethodLoweringInfo& info) const {
-    CHECK_EQ(method_idx_, info.method_idx_);
-    CHECK_EQ(flags_, info.flags_);
-    CHECK_EQ(declaring_method_idx_, info.declaring_method_idx_);
-    CHECK_EQ(declaring_class_idx_, info.declaring_class_idx_);
-    CHECK_EQ(declaring_dex_file_, info.declaring_dex_file_);
-    CHECK_EQ(direct_code_, info.direct_code_);
-    CHECK_EQ(direct_method_, info.direct_method_);
-    CHECK_EQ(target_dex_file_, info.target_dex_file_);
-    CHECK_EQ(target_method_idx_, info.target_method_idx_);
-    CHECK_EQ(vtable_idx_, info.vtable_idx_);
-    CHECK_EQ(stats_flags_, info.stats_flags_);
-  }
-
- private:
-  enum {
-    kBitFastPath = kMethodInfoBitEnd,
-    kBitIsIntrinsic,
-    kBitIsSpecial,
-    kBitInvokeTypeBegin,
-    kBitInvokeTypeEnd = kBitInvokeTypeBegin + 3,  // 3 bits for invoke type.
-    kBitSharpTypeBegin = kBitInvokeTypeEnd,
-    kBitSharpTypeEnd = kBitSharpTypeBegin + 3,  // 3 bits for sharp type.
-    kBitIsReferrersClass = kBitSharpTypeEnd,
-    kBitClassIsInitialized,
-    kBitQuickened,
-    kMethodLoweringInfoBitEnd
-  };
-  static_assert(kMethodLoweringInfoBitEnd <= 16, "Too many flags");
-  static constexpr uint16_t kFlagFastPath = 1u << kBitFastPath;
-  static constexpr uint16_t kFlagIsIntrinsic = 1u << kBitIsIntrinsic;
-  static constexpr uint16_t kFlagIsSpecial = 1u << kBitIsSpecial;
-  static constexpr uint16_t kFlagIsReferrersClass = 1u << kBitIsReferrersClass;
-  static constexpr uint16_t kFlagClassIsInitialized = 1u << kBitClassIsInitialized;
-  static constexpr uint16_t kFlagQuickened = 1u << kBitQuickened;
-  static constexpr uint16_t kInvokeTypeMask = 7u;
-  static_assert((1u << (kBitInvokeTypeEnd - kBitInvokeTypeBegin)) - 1u == kInvokeTypeMask,
-                "assert invoke type bits failed");
-  static_assert((1u << (kBitSharpTypeEnd - kBitSharpTypeBegin)) - 1u == kInvokeTypeMask,
-                "assert sharp type bits failed");
-
-  uintptr_t direct_code_;
-  uintptr_t direct_method_;
-  // Before Resolve(), target_dex_file_ and target_method_idx_ hold the verification-based
-  // devirtualized invoke target if available, null and 0u otherwise.
-  // After Resolve() they hold the actual target method that will be called; it will be either
-  // a devirtualized target method or the compilation's unit's dex file and MethodIndex().
-  const DexFile* target_dex_file_;
-  uint16_t target_method_idx_;
-  uint16_t vtable_idx_;
-  int stats_flags_;
-
-  friend class MirOptimizationTest;
-  friend class TypeInferenceTest;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_MIR_METHOD_INFO_H_
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
deleted file mode 100644
index eb4915b..0000000
--- a/compiler/dex/mir_optimization.cc
+++ /dev/null
@@ -1,2038 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "base/bit_vector-inl.h"
-#include "base/logging.h"
-#include "base/scoped_arena_containers.h"
-#include "class_linker-inl.h"
-#include "dataflow_iterator-inl.h"
-#include "dex/verified_method.h"
-#include "dex_flags.h"
-#include "driver/compiler_driver.h"
-#include "driver/dex_compilation_unit.h"
-#include "global_value_numbering.h"
-#include "gvn_dead_code_elimination.h"
-#include "local_value_numbering.h"
-#include "mir_field_info.h"
-#include "mirror/string.h"
-#include "quick/dex_file_method_inliner.h"
-#include "quick/dex_file_to_method_inliner_map.h"
-#include "stack.h"
-#include "thread-inl.h"
-#include "type_inference.h"
-#include "utils.h"
-
-namespace art {
-
-static unsigned int Predecessors(BasicBlock* bb) {
-  return bb->predecessors.size();
-}
-
-/* Setup a constant value for opcodes thare have the DF_SETS_CONST attribute */
-void MIRGraph::SetConstant(int32_t ssa_reg, int32_t value) {
-  is_constant_v_->SetBit(ssa_reg);
-  constant_values_[ssa_reg] = value;
-  reg_location_[ssa_reg].is_const = true;
-}
-
-void MIRGraph::SetConstantWide(int32_t ssa_reg, int64_t value) {
-  is_constant_v_->SetBit(ssa_reg);
-  is_constant_v_->SetBit(ssa_reg + 1);
-  constant_values_[ssa_reg] = Low32Bits(value);
-  constant_values_[ssa_reg + 1] = High32Bits(value);
-  reg_location_[ssa_reg].is_const = true;
-  reg_location_[ssa_reg + 1].is_const = true;
-}
-
-void MIRGraph::DoConstantPropagation(BasicBlock* bb) {
-  MIR* mir;
-
-  for (mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-    // Skip pass if BB has MIR without SSA representation.
-    if (mir->ssa_rep == nullptr) {
-       return;
-    }
-
-    uint64_t df_attributes = GetDataFlowAttributes(mir);
-
-    MIR::DecodedInstruction* d_insn = &mir->dalvikInsn;
-
-    if (!(df_attributes & DF_HAS_DEFS)) continue;
-
-    /* Handle instructions that set up constants directly */
-    if (df_attributes & DF_SETS_CONST) {
-      if (df_attributes & DF_DA) {
-        int32_t vB = static_cast<int32_t>(d_insn->vB);
-        switch (d_insn->opcode) {
-          case Instruction::CONST_4:
-          case Instruction::CONST_16:
-          case Instruction::CONST:
-            SetConstant(mir->ssa_rep->defs[0], vB);
-            break;
-          case Instruction::CONST_HIGH16:
-            SetConstant(mir->ssa_rep->defs[0], vB << 16);
-            break;
-          case Instruction::CONST_WIDE_16:
-          case Instruction::CONST_WIDE_32:
-            SetConstantWide(mir->ssa_rep->defs[0], static_cast<int64_t>(vB));
-            break;
-          case Instruction::CONST_WIDE:
-            SetConstantWide(mir->ssa_rep->defs[0], d_insn->vB_wide);
-            break;
-          case Instruction::CONST_WIDE_HIGH16:
-            SetConstantWide(mir->ssa_rep->defs[0], static_cast<int64_t>(vB) << 48);
-            break;
-          default:
-            break;
-        }
-      }
-      /* Handle instructions that set up constants directly */
-    } else if (df_attributes & DF_IS_MOVE) {
-      int i;
-
-      for (i = 0; i < mir->ssa_rep->num_uses; i++) {
-        if (!is_constant_v_->IsBitSet(mir->ssa_rep->uses[i])) break;
-      }
-      /* Move a register holding a constant to another register */
-      if (i == mir->ssa_rep->num_uses) {
-        SetConstant(mir->ssa_rep->defs[0], constant_values_[mir->ssa_rep->uses[0]]);
-        if (df_attributes & DF_A_WIDE) {
-          SetConstant(mir->ssa_rep->defs[1], constant_values_[mir->ssa_rep->uses[1]]);
-        }
-      }
-    }
-  }
-  /* TODO: implement code to handle arithmetic operations */
-}
-
-/* Advance to next strictly dominated MIR node in an extended basic block */
-MIR* MIRGraph::AdvanceMIR(BasicBlock** p_bb, MIR* mir) {
-  BasicBlock* bb = *p_bb;
-  if (mir != nullptr) {
-    mir = mir->next;
-    while (mir == nullptr) {
-      bb = GetBasicBlock(bb->fall_through);
-      if ((bb == nullptr) || Predecessors(bb) != 1) {
-        // mir is null and we cannot proceed further.
-        break;
-      } else {
-        *p_bb = bb;
-        mir = bb->first_mir_insn;
-      }
-    }
-  }
-  return mir;
-}
-
-/*
- * To be used at an invoke mir.  If the logically next mir node represents
- * a move-result, return it.  Else, return nullptr.  If a move-result exists,
- * it is required to immediately follow the invoke with no intervening
- * opcodes or incoming arcs.  However, if the result of the invoke is not
- * used, a move-result may not be present.
- */
-MIR* MIRGraph::FindMoveResult(BasicBlock* bb, MIR* mir) {
-  BasicBlock* tbb = bb;
-  mir = AdvanceMIR(&tbb, mir);
-  while (mir != nullptr) {
-    if ((mir->dalvikInsn.opcode == Instruction::MOVE_RESULT) ||
-        (mir->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) ||
-        (mir->dalvikInsn.opcode == Instruction::MOVE_RESULT_WIDE)) {
-      break;
-    }
-    // Keep going if pseudo op, otherwise terminate
-    if (MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) {
-      mir = AdvanceMIR(&tbb, mir);
-    } else {
-      mir = nullptr;
-    }
-  }
-  return mir;
-}
-
-BasicBlock* MIRGraph::NextDominatedBlock(BasicBlock* bb) {
-  if (bb->block_type == kDead) {
-    return nullptr;
-  }
-  DCHECK((bb->block_type == kEntryBlock) || (bb->block_type == kDalvikByteCode)
-      || (bb->block_type == kExitBlock));
-  BasicBlock* bb_taken = GetBasicBlock(bb->taken);
-  BasicBlock* bb_fall_through = GetBasicBlock(bb->fall_through);
-  if (((bb_fall_through == nullptr) && (bb_taken != nullptr)) &&
-      ((bb_taken->block_type == kDalvikByteCode) || (bb_taken->block_type == kExitBlock))) {
-    // Follow simple unconditional branches.
-    bb = bb_taken;
-  } else {
-    // Follow simple fallthrough
-    bb = (bb_taken != nullptr) ? nullptr : bb_fall_through;
-  }
-  if (bb == nullptr || (Predecessors(bb) != 1)) {
-    return nullptr;
-  }
-  DCHECK((bb->block_type == kDalvikByteCode) || (bb->block_type == kExitBlock));
-  return bb;
-}
-
-static MIR* FindPhi(BasicBlock* bb, int ssa_name) {
-  for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-    if (static_cast<int>(mir->dalvikInsn.opcode) == kMirOpPhi) {
-      for (int i = 0; i < mir->ssa_rep->num_uses; i++) {
-        if (mir->ssa_rep->uses[i] == ssa_name) {
-          return mir;
-        }
-      }
-    }
-  }
-  return nullptr;
-}
-
-static SelectInstructionKind SelectKind(MIR* mir) {
-  // Work with the case when mir is null.
-  if (mir == nullptr) {
-    return kSelectNone;
-  }
-  switch (mir->dalvikInsn.opcode) {
-    case Instruction::MOVE:
-    case Instruction::MOVE_OBJECT:
-    case Instruction::MOVE_16:
-    case Instruction::MOVE_OBJECT_16:
-    case Instruction::MOVE_FROM16:
-    case Instruction::MOVE_OBJECT_FROM16:
-      return kSelectMove;
-    case Instruction::CONST:
-    case Instruction::CONST_4:
-    case Instruction::CONST_16:
-      return kSelectConst;
-    case Instruction::GOTO:
-    case Instruction::GOTO_16:
-    case Instruction::GOTO_32:
-      return kSelectGoto;
-    default:
-      return kSelectNone;
-  }
-}
-
-static constexpr ConditionCode kIfCcZConditionCodes[] = {
-    kCondEq, kCondNe, kCondLt, kCondGe, kCondGt, kCondLe
-};
-
-static_assert(arraysize(kIfCcZConditionCodes) == Instruction::IF_LEZ - Instruction::IF_EQZ + 1,
-              "if_ccz_ccodes_size1");
-
-static constexpr ConditionCode ConditionCodeForIfCcZ(Instruction::Code opcode) {
-  return kIfCcZConditionCodes[opcode - Instruction::IF_EQZ];
-}
-
-static_assert(ConditionCodeForIfCcZ(Instruction::IF_EQZ) == kCondEq, "if_eqz ccode");
-static_assert(ConditionCodeForIfCcZ(Instruction::IF_NEZ) == kCondNe, "if_nez ccode");
-static_assert(ConditionCodeForIfCcZ(Instruction::IF_LTZ) == kCondLt, "if_ltz ccode");
-static_assert(ConditionCodeForIfCcZ(Instruction::IF_GEZ) == kCondGe, "if_gez ccode");
-static_assert(ConditionCodeForIfCcZ(Instruction::IF_GTZ) == kCondGt, "if_gtz ccode");
-static_assert(ConditionCodeForIfCcZ(Instruction::IF_LEZ) == kCondLe, "if_lez ccode");
-
-int MIRGraph::GetSSAUseCount(int s_reg) {
-  DCHECK_LT(static_cast<size_t>(s_reg), ssa_subscripts_.size());
-  return raw_use_counts_[s_reg];
-}
-
-size_t MIRGraph::GetNumBytesForSpecialTemps() const {
-  // This logic is written with assumption that Method* is only special temp.
-  DCHECK_EQ(max_available_special_compiler_temps_, 1u);
-  return InstructionSetPointerSize(cu_->instruction_set);
-}
-
-size_t MIRGraph::GetNumAvailableVRTemps() {
-  // First take into account all temps reserved for backend.
-  if (max_available_non_special_compiler_temps_ < reserved_temps_for_backend_) {
-    return 0;
-  }
-
-  // Calculate remaining ME temps available.
-  size_t remaining_me_temps = max_available_non_special_compiler_temps_ -
-      reserved_temps_for_backend_;
-
-  if (num_non_special_compiler_temps_ >= remaining_me_temps) {
-    return 0;
-  } else {
-    return remaining_me_temps - num_non_special_compiler_temps_;
-  }
-}
-
-// FIXME - will probably need to revisit all uses of this, as type not defined.
-static const RegLocation temp_loc = {kLocCompilerTemp,
-                                     0, 1 /*defined*/, 0, 0, 0, 0, 0, 1 /*home*/,
-                                     RegStorage(), INVALID_SREG, INVALID_SREG};
-
-CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide) {
-  // Once the compiler temps have been committed, new ones cannot be requested anymore.
-  DCHECK_EQ(compiler_temps_committed_, false);
-  // Make sure that reserved for BE set is sane.
-  DCHECK_LE(reserved_temps_for_backend_, max_available_non_special_compiler_temps_);
-
-  bool verbose = cu_->verbose;
-  const char* ct_type_str = nullptr;
-
-  if (verbose) {
-    switch (ct_type) {
-      case kCompilerTempBackend:
-        ct_type_str = "backend";
-        break;
-      case kCompilerTempSpecialMethodPtr:
-        ct_type_str = "method*";
-        break;
-      case kCompilerTempVR:
-        ct_type_str = "VR";
-        break;
-      default:
-        ct_type_str = "unknown";
-        break;
-    }
-    LOG(INFO) << "CompilerTemps: A compiler temp of type " << ct_type_str << " that is "
-        << (wide ? "wide is being requested." : "not wide is being requested.");
-  }
-
-  CompilerTemp *compiler_temp = static_cast<CompilerTemp *>(arena_->Alloc(sizeof(CompilerTemp),
-                                                            kArenaAllocRegAlloc));
-
-  // Create the type of temp requested. Special temps need special handling because
-  // they have a specific virtual register assignment.
-  if (ct_type == kCompilerTempSpecialMethodPtr) {
-    // This has a special location on stack which is 32-bit or 64-bit depending
-    // on mode. However, we don't want to overlap with non-special section
-    // and thus even for 64-bit, we allow only a non-wide temp to be requested.
-    DCHECK_EQ(wide, false);
-
-    // The vreg is always the first special temp for method ptr.
-    compiler_temp->v_reg = GetFirstSpecialTempVR();
-
-    CHECK(reg_location_ == nullptr);
-  } else if (ct_type == kCompilerTempBackend) {
-    requested_backend_temp_ = true;
-
-    // Make sure that we are not exceeding temps reserved for BE.
-    // Since VR temps cannot be requested once the BE temps are requested, we
-    // allow reservation of VR temps as well for BE. We
-    size_t available_temps = reserved_temps_for_backend_ + GetNumAvailableVRTemps();
-    size_t needed_temps = wide ? 2u : 1u;
-    if (available_temps < needed_temps) {
-      if (verbose) {
-        LOG(INFO) << "CompilerTemps: Not enough temp(s) of type " << ct_type_str
-            << " are available.";
-      }
-      return nullptr;
-    }
-
-    // Update the remaining reserved temps since we have now used them.
-    // Note that the code below is actually subtracting to remove them from reserve
-    // once they have been claimed. It is careful to not go below zero.
-    reserved_temps_for_backend_ =
-        std::max(reserved_temps_for_backend_, needed_temps) - needed_temps;
-
-    // The new non-special compiler temp must receive a unique v_reg.
-    compiler_temp->v_reg = GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_;
-    num_non_special_compiler_temps_++;
-  } else if (ct_type == kCompilerTempVR) {
-    // Once we start giving out BE temps, we don't allow anymore ME temps to be requested.
-    // This is done in order to prevent problems with ssa since these structures are allocated
-    // and managed by the ME.
-    DCHECK_EQ(requested_backend_temp_, false);
-
-    // There is a limit to the number of non-special temps so check to make sure it wasn't exceeded.
-    size_t available_temps = GetNumAvailableVRTemps();
-    if (available_temps <= 0 || (available_temps <= 1 && wide)) {
-      if (verbose) {
-        LOG(INFO) << "CompilerTemps: Not enough temp(s) of type " << ct_type_str
-            << " are available.";
-      }
-      return nullptr;
-    }
-
-    // The new non-special compiler temp must receive a unique v_reg.
-    compiler_temp->v_reg = GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_;
-    num_non_special_compiler_temps_++;
-  } else {
-    UNIMPLEMENTED(FATAL) << "No handling for compiler temp type " << ct_type_str << ".";
-  }
-
-  // We allocate an sreg as well to make developer life easier.
-  // However, if this is requested from an ME pass that will recalculate ssa afterwards,
-  // this sreg is no longer valid. The caller should be aware of this.
-  compiler_temp->s_reg_low = AddNewSReg(compiler_temp->v_reg);
-
-  if (verbose) {
-    LOG(INFO) << "CompilerTemps: New temp of type " << ct_type_str << " with v"
-        << compiler_temp->v_reg << " and s" << compiler_temp->s_reg_low << " has been created.";
-  }
-
-  if (wide) {
-    // Only non-special temps are handled as wide for now.
-    // Note that the number of non special temps is incremented below.
-    DCHECK(ct_type == kCompilerTempBackend || ct_type == kCompilerTempVR);
-
-    // Ensure that the two registers are consecutive.
-    int ssa_reg_low = compiler_temp->s_reg_low;
-    int ssa_reg_high = AddNewSReg(compiler_temp->v_reg + 1);
-    num_non_special_compiler_temps_++;
-
-    if (verbose) {
-      LOG(INFO) << "CompilerTemps: The wide part of temp of type " << ct_type_str << " is v"
-          << compiler_temp->v_reg + 1 << " and s" << ssa_reg_high << ".";
-    }
-
-    if (reg_location_ != nullptr) {
-      reg_location_[ssa_reg_high] = temp_loc;
-      reg_location_[ssa_reg_high].high_word = true;
-      reg_location_[ssa_reg_high].s_reg_low = ssa_reg_low;
-      reg_location_[ssa_reg_high].wide = true;
-    }
-  }
-
-  // If the register locations have already been allocated, add the information
-  // about the temp. We will not overflow because they have been initialized
-  // to support the maximum number of temps. For ME temps that have multiple
-  // ssa versions, the structures below will be expanded on the post pass cleanup.
-  if (reg_location_ != nullptr) {
-    int ssa_reg_low = compiler_temp->s_reg_low;
-    reg_location_[ssa_reg_low] = temp_loc;
-    reg_location_[ssa_reg_low].s_reg_low = ssa_reg_low;
-    reg_location_[ssa_reg_low].wide = wide;
-  }
-
-  return compiler_temp;
-}
-
-void MIRGraph::RemoveLastCompilerTemp(CompilerTempType ct_type, bool wide, CompilerTemp* temp) {
-  // Once the compiler temps have been committed, it's too late for any modifications.
-  DCHECK_EQ(compiler_temps_committed_, false);
-
-  size_t used_temps = wide ? 2u : 1u;
-
-  if (ct_type == kCompilerTempBackend) {
-    DCHECK(requested_backend_temp_);
-
-    // Make the temps available to backend again.
-    reserved_temps_for_backend_ += used_temps;
-  } else if (ct_type == kCompilerTempVR) {
-    DCHECK(!requested_backend_temp_);
-  } else {
-    UNIMPLEMENTED(FATAL) << "No handling for compiler temp type " << static_cast<int>(ct_type);
-  }
-
-  // Reduce the number of non-special compiler temps.
-  DCHECK_LE(used_temps, num_non_special_compiler_temps_);
-  num_non_special_compiler_temps_ -= used_temps;
-
-  // Check that this was really the last temp.
-  DCHECK_EQ(static_cast<size_t>(temp->v_reg),
-            GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_);
-
-  if (cu_->verbose) {
-    LOG(INFO) << "Last temporary has been removed.";
-  }
-}
-
-static bool EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2) {
-  bool is_taken;
-  switch (opcode) {
-    case Instruction::IF_EQ: is_taken = (src1 == src2); break;
-    case Instruction::IF_NE: is_taken = (src1 != src2); break;
-    case Instruction::IF_LT: is_taken = (src1 < src2); break;
-    case Instruction::IF_GE: is_taken = (src1 >= src2); break;
-    case Instruction::IF_GT: is_taken = (src1 > src2); break;
-    case Instruction::IF_LE: is_taken = (src1 <= src2); break;
-    case Instruction::IF_EQZ: is_taken = (src1 == 0); break;
-    case Instruction::IF_NEZ: is_taken = (src1 != 0); break;
-    case Instruction::IF_LTZ: is_taken = (src1 < 0); break;
-    case Instruction::IF_GEZ: is_taken = (src1 >= 0); break;
-    case Instruction::IF_GTZ: is_taken = (src1 > 0); break;
-    case Instruction::IF_LEZ: is_taken = (src1 <= 0); break;
-    default:
-      LOG(FATAL) << "Unexpected opcode " << opcode;
-      UNREACHABLE();
-  }
-  return is_taken;
-}
-
-/* Do some MIR-level extended basic block optimizations */
-bool MIRGraph::BasicBlockOpt(BasicBlock* bb) {
-  if (bb->block_type == kDead) {
-    return true;
-  }
-  // Currently multiply-accumulate backend supports are only available on arm32 and arm64.
-  if (cu_->instruction_set == kArm64 || cu_->instruction_set == kThumb2) {
-    MultiplyAddOpt(bb);
-  }
-  bool use_lvn = bb->use_lvn && (cu_->disable_opt & (1u << kLocalValueNumbering)) == 0u;
-  std::unique_ptr<ScopedArenaAllocator> allocator;
-  std::unique_ptr<GlobalValueNumbering> global_valnum;
-  std::unique_ptr<LocalValueNumbering> local_valnum;
-  if (use_lvn) {
-    allocator.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
-    global_valnum.reset(new (allocator.get()) GlobalValueNumbering(cu_, allocator.get(),
-                                                                   GlobalValueNumbering::kModeLvn));
-    local_valnum.reset(new (allocator.get()) LocalValueNumbering(global_valnum.get(), bb->id,
-                                                                 allocator.get()));
-  }
-  while (bb != nullptr) {
-    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      // TUNING: use the returned value number for CSE.
-      if (use_lvn) {
-        local_valnum->GetValueNumber(mir);
-      }
-      // Look for interesting opcodes, skip otherwise
-      Instruction::Code opcode = mir->dalvikInsn.opcode;
-      switch (opcode) {
-        case Instruction::IF_EQ:
-        case Instruction::IF_NE:
-        case Instruction::IF_LT:
-        case Instruction::IF_GE:
-        case Instruction::IF_GT:
-        case Instruction::IF_LE:
-          if (!IsConst(mir->ssa_rep->uses[1])) {
-            break;
-          }
-          FALLTHROUGH_INTENDED;
-        case Instruction::IF_EQZ:
-        case Instruction::IF_NEZ:
-        case Instruction::IF_LTZ:
-        case Instruction::IF_GEZ:
-        case Instruction::IF_GTZ:
-        case Instruction::IF_LEZ:
-          // Result known at compile time?
-          if (IsConst(mir->ssa_rep->uses[0])) {
-            int32_t rhs = (mir->ssa_rep->num_uses == 2) ? ConstantValue(mir->ssa_rep->uses[1]) : 0;
-            bool is_taken = EvaluateBranch(opcode, ConstantValue(mir->ssa_rep->uses[0]), rhs);
-            BasicBlockId edge_to_kill = is_taken ? bb->fall_through : bb->taken;
-            if (is_taken) {
-              // Replace with GOTO.
-              bb->fall_through = NullBasicBlockId;
-              mir->dalvikInsn.opcode = Instruction::GOTO;
-              mir->dalvikInsn.vA =
-                  IsInstructionIfCc(opcode) ? mir->dalvikInsn.vC : mir->dalvikInsn.vB;
-            } else {
-              // Make NOP.
-              bb->taken = NullBasicBlockId;
-              mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
-            }
-            mir->ssa_rep->num_uses = 0;
-            BasicBlock* successor_to_unlink = GetBasicBlock(edge_to_kill);
-            successor_to_unlink->ErasePredecessor(bb->id);
-            // We have changed the graph structure.
-            dfs_orders_up_to_date_ = false;
-            domination_up_to_date_ = false;
-            topological_order_up_to_date_ = false;
-            // Keep MIR SSA rep, the worst that can happen is a Phi with just 1 input.
-          }
-          break;
-        case Instruction::CMPL_FLOAT:
-        case Instruction::CMPL_DOUBLE:
-        case Instruction::CMPG_FLOAT:
-        case Instruction::CMPG_DOUBLE:
-        case Instruction::CMP_LONG:
-          if ((cu_->disable_opt & (1 << kBranchFusing)) != 0) {
-            // Bitcode doesn't allow this optimization.
-            break;
-          }
-          if (mir->next != nullptr) {
-            MIR* mir_next = mir->next;
-            // Make sure result of cmp is used by next insn and nowhere else
-            if (IsInstructionIfCcZ(mir_next->dalvikInsn.opcode) &&
-                (mir->ssa_rep->defs[0] == mir_next->ssa_rep->uses[0]) &&
-                (GetSSAUseCount(mir->ssa_rep->defs[0]) == 1)) {
-              mir_next->meta.ccode = ConditionCodeForIfCcZ(mir_next->dalvikInsn.opcode);
-              switch (opcode) {
-                case Instruction::CMPL_FLOAT:
-                  mir_next->dalvikInsn.opcode =
-                      static_cast<Instruction::Code>(kMirOpFusedCmplFloat);
-                  break;
-                case Instruction::CMPL_DOUBLE:
-                  mir_next->dalvikInsn.opcode =
-                      static_cast<Instruction::Code>(kMirOpFusedCmplDouble);
-                  break;
-                case Instruction::CMPG_FLOAT:
-                  mir_next->dalvikInsn.opcode =
-                      static_cast<Instruction::Code>(kMirOpFusedCmpgFloat);
-                  break;
-                case Instruction::CMPG_DOUBLE:
-                  mir_next->dalvikInsn.opcode =
-                      static_cast<Instruction::Code>(kMirOpFusedCmpgDouble);
-                  break;
-                case Instruction::CMP_LONG:
-                  mir_next->dalvikInsn.opcode =
-                      static_cast<Instruction::Code>(kMirOpFusedCmpLong);
-                  break;
-                default: LOG(ERROR) << "Unexpected opcode: " << opcode;
-              }
-              mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
-              // Clear use count of temp VR.
-              use_counts_[mir->ssa_rep->defs[0]] = 0;
-              raw_use_counts_[mir->ssa_rep->defs[0]] = 0;
-              // Copy the SSA information that is relevant.
-              mir_next->ssa_rep->num_uses = mir->ssa_rep->num_uses;
-              mir_next->ssa_rep->uses = mir->ssa_rep->uses;
-              mir_next->ssa_rep->num_defs = 0;
-              mir->ssa_rep->num_uses = 0;
-              mir->ssa_rep->num_defs = 0;
-              // Copy in the decoded instruction information for potential SSA re-creation.
-              mir_next->dalvikInsn.vA = mir->dalvikInsn.vB;
-              mir_next->dalvikInsn.vB = mir->dalvikInsn.vC;
-            }
-          }
-          break;
-        default:
-          break;
-      }
-      // Is this the select pattern?
-      // TODO: flesh out support for Mips.  NOTE: llvm's select op doesn't quite work here.
-      // TUNING: expand to support IF_xx compare & branches
-      if ((cu_->instruction_set == kArm64 || cu_->instruction_set == kThumb2 ||
-           cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) &&
-          IsInstructionIfCcZ(mir->dalvikInsn.opcode)) {
-        BasicBlock* ft = GetBasicBlock(bb->fall_through);
-        DCHECK(ft != nullptr);
-        BasicBlock* ft_ft = GetBasicBlock(ft->fall_through);
-        BasicBlock* ft_tk = GetBasicBlock(ft->taken);
-
-        BasicBlock* tk = GetBasicBlock(bb->taken);
-        DCHECK(tk != nullptr);
-        BasicBlock* tk_ft = GetBasicBlock(tk->fall_through);
-        BasicBlock* tk_tk = GetBasicBlock(tk->taken);
-
-        /*
-         * In the select pattern, the taken edge goes to a block that unconditionally
-         * transfers to the rejoin block and the fall_though edge goes to a block that
-         * unconditionally falls through to the rejoin block.
-         */
-        if ((tk_ft == nullptr) && (ft_tk == nullptr) && (tk_tk == ft_ft) &&
-            (Predecessors(tk) == 1) && (Predecessors(ft) == 1)) {
-          /*
-           * Okay - we have the basic diamond shape.
-           */
-
-          // TODO: Add logic for LONG.
-          // Are the block bodies something we can handle?
-          if ((ft->first_mir_insn == ft->last_mir_insn) &&
-              (tk->first_mir_insn != tk->last_mir_insn) &&
-              (tk->first_mir_insn->next == tk->last_mir_insn) &&
-              ((SelectKind(ft->first_mir_insn) == kSelectMove) ||
-              (SelectKind(ft->first_mir_insn) == kSelectConst)) &&
-              (SelectKind(ft->first_mir_insn) == SelectKind(tk->first_mir_insn)) &&
-              (SelectKind(tk->last_mir_insn) == kSelectGoto)) {
-            // Almost there.  Are the instructions targeting the same vreg?
-            MIR* if_true = tk->first_mir_insn;
-            MIR* if_false = ft->first_mir_insn;
-            // It's possible that the target of the select isn't used - skip those (rare) cases.
-            MIR* phi = FindPhi(tk_tk, if_true->ssa_rep->defs[0]);
-            if ((phi != nullptr) && (if_true->dalvikInsn.vA == if_false->dalvikInsn.vA)) {
-              /*
-               * We'll convert the IF_EQZ/IF_NEZ to a SELECT.  We need to find the
-               * Phi node in the merge block and delete it (while using the SSA name
-               * of the merge as the target of the SELECT.  Delete both taken and
-               * fallthrough blocks, and set fallthrough to merge block.
-               * NOTE: not updating other dataflow info (no longer used at this point).
-               * If this changes, need to update i_dom, etc. here (and in CombineBlocks).
-               */
-              mir->meta.ccode = ConditionCodeForIfCcZ(mir->dalvikInsn.opcode);
-              mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpSelect);
-              bool const_form = (SelectKind(if_true) == kSelectConst);
-              if ((SelectKind(if_true) == kSelectMove)) {
-                if (IsConst(if_true->ssa_rep->uses[0]) &&
-                    IsConst(if_false->ssa_rep->uses[0])) {
-                    const_form = true;
-                    if_true->dalvikInsn.vB = ConstantValue(if_true->ssa_rep->uses[0]);
-                    if_false->dalvikInsn.vB = ConstantValue(if_false->ssa_rep->uses[0]);
-                }
-              }
-              if (const_form) {
-                /*
-                 * TODO: If both constants are the same value, then instead of generating
-                 * a select, we should simply generate a const bytecode. This should be
-                 * considered after inlining which can lead to CFG of this form.
-                 */
-                // "true" set val in vB
-                mir->dalvikInsn.vB = if_true->dalvikInsn.vB;
-                // "false" set val in vC
-                mir->dalvikInsn.vC = if_false->dalvikInsn.vB;
-              } else {
-                DCHECK_EQ(SelectKind(if_true), kSelectMove);
-                DCHECK_EQ(SelectKind(if_false), kSelectMove);
-                int32_t* src_ssa = arena_->AllocArray<int32_t>(3, kArenaAllocDFInfo);
-                src_ssa[0] = mir->ssa_rep->uses[0];
-                src_ssa[1] = if_true->ssa_rep->uses[0];
-                src_ssa[2] = if_false->ssa_rep->uses[0];
-                mir->ssa_rep->uses = src_ssa;
-                mir->ssa_rep->num_uses = 3;
-              }
-              AllocateSSADefData(mir, 1);
-              /*
-               * There is usually a Phi node in the join block for our two cases.  If the
-               * Phi node only contains our two cases as input, we will use the result
-               * SSA name of the Phi node as our select result and delete the Phi.  If
-               * the Phi node has more than two operands, we will arbitrarily use the SSA
-               * name of the "false" path, delete the SSA name of the "true" path from the
-               * Phi node (and fix up the incoming arc list).
-               */
-              if (phi->ssa_rep->num_uses == 2) {
-                mir->ssa_rep->defs[0] = phi->ssa_rep->defs[0];
-                // Rather than changing the Phi to kMirOpNop, remove it completely.
-                // This avoids leaving other Phis after kMirOpNop (i.e. a non-Phi) insn.
-                tk_tk->RemoveMIR(phi);
-                int dead_false_def = if_false->ssa_rep->defs[0];
-                raw_use_counts_[dead_false_def] = use_counts_[dead_false_def] = 0;
-              } else {
-                int live_def = if_false->ssa_rep->defs[0];
-                mir->ssa_rep->defs[0] = live_def;
-              }
-              int dead_true_def = if_true->ssa_rep->defs[0];
-              raw_use_counts_[dead_true_def] = use_counts_[dead_true_def] = 0;
-              // Update ending vreg->sreg map for GC maps generation.
-              int def_vreg = SRegToVReg(mir->ssa_rep->defs[0]);
-              bb->data_flow_info->vreg_to_ssa_map_exit[def_vreg] = mir->ssa_rep->defs[0];
-              // We want to remove ft and tk and link bb directly to ft_ft. First, we need
-              // to update all Phi inputs correctly with UpdatePredecessor(ft->id, bb->id)
-              // since the live_def above comes from ft->first_mir_insn (if_false).
-              DCHECK(if_false == ft->first_mir_insn);
-              ft_ft->UpdatePredecessor(ft->id, bb->id);
-              // Correct the rest of the links between bb, ft and ft_ft.
-              ft->ErasePredecessor(bb->id);
-              ft->fall_through = NullBasicBlockId;
-              bb->fall_through = ft_ft->id;
-              // Now we can kill tk and ft.
-              tk->Kill(this);
-              ft->Kill(this);
-              // NOTE: DFS order, domination info and topological order are still usable
-              // despite the newly dead blocks.
-            }
-          }
-        }
-      }
-    }
-    bb = ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) ? NextDominatedBlock(bb) :
-        nullptr;
-  }
-  if (use_lvn && UNLIKELY(!global_valnum->Good())) {
-    LOG(WARNING) << "LVN overflow in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-  }
-
-  return true;
-}
-
-/* Collect stats on number of checks removed */
-void MIRGraph::CountChecks(class BasicBlock* bb) {
-  if (bb->data_flow_info != nullptr) {
-    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      if (mir->ssa_rep == nullptr) {
-        continue;
-      }
-      uint64_t df_attributes = GetDataFlowAttributes(mir);
-      if (df_attributes & DF_HAS_NULL_CHKS) {
-        checkstats_->null_checks++;
-        if (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) {
-          checkstats_->null_checks_eliminated++;
-        }
-      }
-      if (df_attributes & DF_HAS_RANGE_CHKS) {
-        checkstats_->range_checks++;
-        if (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) {
-          checkstats_->range_checks_eliminated++;
-        }
-      }
-    }
-  }
-}
-
-/* Try to make common case the fallthrough path. */
-bool MIRGraph::LayoutBlocks(BasicBlock* bb) {
-  // TODO: For now, just looking for direct throws.  Consider generalizing for profile feedback.
-  if (!bb->explicit_throw) {
-    return false;
-  }
-
-  // If we visited it, we are done.
-  if (bb->visited) {
-    return false;
-  }
-  bb->visited = true;
-
-  BasicBlock* walker = bb;
-  while (true) {
-    // Check termination conditions.
-    if ((walker->block_type == kEntryBlock) || (Predecessors(walker) != 1)) {
-      break;
-    }
-    DCHECK(!walker->predecessors.empty());
-    BasicBlock* prev = GetBasicBlock(walker->predecessors[0]);
-
-    // If we visited the predecessor, we are done.
-    if (prev->visited) {
-      return false;
-    }
-    prev->visited = true;
-
-    if (prev->conditional_branch) {
-      if (GetBasicBlock(prev->fall_through) == walker) {
-        // Already done - return.
-        break;
-      }
-      DCHECK_EQ(walker, GetBasicBlock(prev->taken));
-      // Got one.  Flip it and exit.
-      Instruction::Code opcode = prev->last_mir_insn->dalvikInsn.opcode;
-      switch (opcode) {
-        case Instruction::IF_EQ: opcode = Instruction::IF_NE; break;
-        case Instruction::IF_NE: opcode = Instruction::IF_EQ; break;
-        case Instruction::IF_LT: opcode = Instruction::IF_GE; break;
-        case Instruction::IF_GE: opcode = Instruction::IF_LT; break;
-        case Instruction::IF_GT: opcode = Instruction::IF_LE; break;
-        case Instruction::IF_LE: opcode = Instruction::IF_GT; break;
-        case Instruction::IF_EQZ: opcode = Instruction::IF_NEZ; break;
-        case Instruction::IF_NEZ: opcode = Instruction::IF_EQZ; break;
-        case Instruction::IF_LTZ: opcode = Instruction::IF_GEZ; break;
-        case Instruction::IF_GEZ: opcode = Instruction::IF_LTZ; break;
-        case Instruction::IF_GTZ: opcode = Instruction::IF_LEZ; break;
-        case Instruction::IF_LEZ: opcode = Instruction::IF_GTZ; break;
-        default: LOG(FATAL) << "Unexpected opcode " << opcode;
-      }
-      prev->last_mir_insn->dalvikInsn.opcode = opcode;
-      BasicBlockId t_bb = prev->taken;
-      prev->taken = prev->fall_through;
-      prev->fall_through = t_bb;
-      break;
-    }
-    walker = prev;
-  }
-  return false;
-}
-
-/* Combine any basic blocks terminated by instructions that we now know can't throw */
-void MIRGraph::CombineBlocks(class BasicBlock* bb) {
-  // Loop here to allow combining a sequence of blocks
-  while ((bb->block_type == kDalvikByteCode) &&
-      (bb->last_mir_insn != nullptr) &&
-      (static_cast<int>(bb->last_mir_insn->dalvikInsn.opcode) == kMirOpCheck)) {
-    MIR* mir = bb->last_mir_insn;
-    DCHECK(bb->first_mir_insn !=  nullptr);
-
-    // Get the paired insn and check if it can still throw.
-    MIR* throw_insn = mir->meta.throw_insn;
-    if (CanThrow(throw_insn)) {
-      break;
-    }
-
-    // OK - got one.  Combine
-    BasicBlock* bb_next = GetBasicBlock(bb->fall_through);
-    DCHECK(!bb_next->catch_entry);
-    DCHECK_EQ(bb_next->predecessors.size(), 1u);
-
-    // Now move instructions from bb_next to bb. Start off with doing a sanity check
-    // that kMirOpCheck's throw instruction is first one in the bb_next.
-    DCHECK_EQ(bb_next->first_mir_insn, throw_insn);
-    // Now move all instructions (throw instruction to last one) from bb_next to bb.
-    MIR* last_to_move = bb_next->last_mir_insn;
-    bb_next->RemoveMIRList(throw_insn, last_to_move);
-    bb->InsertMIRListAfter(bb->last_mir_insn, throw_insn, last_to_move);
-    // The kMirOpCheck instruction is not needed anymore.
-    mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
-    bb->RemoveMIR(mir);
-
-    // Before we overwrite successors, remove their predecessor links to bb.
-    bb_next->ErasePredecessor(bb->id);
-    if (bb->taken != NullBasicBlockId) {
-      DCHECK_EQ(bb->successor_block_list_type, kNotUsed);
-      BasicBlock* bb_taken = GetBasicBlock(bb->taken);
-      // bb->taken will be overwritten below.
-      DCHECK_EQ(bb_taken->block_type, kExceptionHandling);
-      DCHECK_EQ(bb_taken->predecessors.size(), 1u);
-      DCHECK_EQ(bb_taken->predecessors[0], bb->id);
-      bb_taken->predecessors.clear();
-      bb_taken->block_type = kDead;
-      DCHECK(bb_taken->data_flow_info == nullptr);
-    } else {
-      DCHECK_EQ(bb->successor_block_list_type, kCatch);
-      for (SuccessorBlockInfo* succ_info : bb->successor_blocks) {
-        if (succ_info->block != NullBasicBlockId) {
-          BasicBlock* succ_bb = GetBasicBlock(succ_info->block);
-          DCHECK(succ_bb->catch_entry);
-          succ_bb->ErasePredecessor(bb->id);
-        }
-      }
-    }
-    // Use the successor info from the next block
-    bb->successor_block_list_type = bb_next->successor_block_list_type;
-    bb->successor_blocks.swap(bb_next->successor_blocks);  // Swap instead of copying.
-    bb_next->successor_block_list_type = kNotUsed;
-    // Use the ending block linkage from the next block
-    bb->fall_through = bb_next->fall_through;
-    bb_next->fall_through = NullBasicBlockId;
-    bb->taken = bb_next->taken;
-    bb_next->taken = NullBasicBlockId;
-    /*
-     * If lower-half of pair of blocks to combine contained
-     * a return or a conditional branch or an explicit throw,
-     * move the flag to the newly combined block.
-     */
-    bb->terminated_by_return = bb_next->terminated_by_return;
-    bb->conditional_branch = bb_next->conditional_branch;
-    bb->explicit_throw = bb_next->explicit_throw;
-    // Merge the use_lvn flag.
-    bb->use_lvn |= bb_next->use_lvn;
-
-    // Kill the unused block.
-    bb_next->data_flow_info = nullptr;
-
-    /*
-     * NOTE: we aren't updating all dataflow info here.  Should either make sure this pass
-     * happens after uses of i_dominated, dom_frontier or update the dataflow info here.
-     * NOTE: GVN uses bb->data_flow_info->live_in_v which is unaffected by the block merge.
-     */
-
-    // Kill bb_next and remap now-dead id to parent.
-    bb_next->block_type = kDead;
-    bb_next->data_flow_info = nullptr;  // Must be null for dead blocks. (Relied on by the GVN.)
-    block_id_map_.Overwrite(bb_next->id, bb->id);
-    // Update predecessors in children.
-    ChildBlockIterator iter(bb, this);
-    for (BasicBlock* child = iter.Next(); child != nullptr; child = iter.Next()) {
-      child->UpdatePredecessor(bb_next->id, bb->id);
-    }
-
-    // DFS orders, domination and topological order are not up to date anymore.
-    dfs_orders_up_to_date_ = false;
-    domination_up_to_date_ = false;
-    topological_order_up_to_date_ = false;
-
-    // Now, loop back and see if we can keep going
-  }
-}
-
-bool MIRGraph::EliminateNullChecksGate() {
-  if ((cu_->disable_opt & (1 << kNullCheckElimination)) != 0 ||
-      (merged_df_flags_ & DF_HAS_NULL_CHKS) == 0) {
-    return false;
-  }
-
-  DCHECK(temp_scoped_alloc_.get() == nullptr);
-  temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
-  temp_.nce.num_vregs = GetNumOfCodeAndTempVRs();
-  temp_.nce.work_vregs_to_check = new (temp_scoped_alloc_.get()) ArenaBitVector(
-      temp_scoped_alloc_.get(), temp_.nce.num_vregs, false, kBitMapNullCheck);
-  temp_.nce.ending_vregs_to_check_matrix =
-      temp_scoped_alloc_->AllocArray<ArenaBitVector*>(GetNumBlocks(), kArenaAllocMisc);
-  std::fill_n(temp_.nce.ending_vregs_to_check_matrix, GetNumBlocks(), nullptr);
-
-  // reset MIR_MARK
-  AllNodesIterator iter(this);
-  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      mir->optimization_flags &= ~MIR_MARK;
-    }
-  }
-
-  return true;
-}
-
-/*
- * Eliminate unnecessary null checks for a basic block.
- */
-bool MIRGraph::EliminateNullChecks(BasicBlock* bb) {
-  if (bb->block_type != kDalvikByteCode && bb->block_type != kEntryBlock) {
-    // Ignore the kExitBlock as well.
-    DCHECK(bb->first_mir_insn == nullptr);
-    return false;
-  }
-
-  ArenaBitVector* vregs_to_check = temp_.nce.work_vregs_to_check;
-  /*
-   * Set initial state. Catch blocks don't need any special treatment.
-   */
-  if (bb->block_type == kEntryBlock) {
-    vregs_to_check->ClearAllBits();
-    // Assume all ins are objects.
-    for (uint16_t in_reg = GetFirstInVR();
-         in_reg < GetNumOfCodeVRs(); in_reg++) {
-      vregs_to_check->SetBit(in_reg);
-    }
-    if ((cu_->access_flags & kAccStatic) == 0) {
-      // If non-static method, mark "this" as non-null.
-      int this_reg = GetFirstInVR();
-      vregs_to_check->ClearBit(this_reg);
-    }
-  } else {
-    DCHECK_EQ(bb->block_type, kDalvikByteCode);
-    // Starting state is union of all incoming arcs.
-    bool copied_first = false;
-    for (BasicBlockId pred_id : bb->predecessors) {
-      if (temp_.nce.ending_vregs_to_check_matrix[pred_id] == nullptr) {
-        continue;
-      }
-      BasicBlock* pred_bb = GetBasicBlock(pred_id);
-      DCHECK(pred_bb != nullptr);
-      MIR* null_check_insn = nullptr;
-      // Check to see if predecessor had an explicit null-check.
-      if (pred_bb->BranchesToSuccessorOnlyIfNotZero(bb->id)) {
-        // Remember the null check insn if there's no other predecessor requiring null check.
-        if (!copied_first || !vregs_to_check->IsBitSet(pred_bb->last_mir_insn->dalvikInsn.vA)) {
-          null_check_insn = pred_bb->last_mir_insn;
-          DCHECK(null_check_insn != nullptr);
-        }
-      }
-      if (!copied_first) {
-        copied_first = true;
-        vregs_to_check->Copy(temp_.nce.ending_vregs_to_check_matrix[pred_id]);
-      } else {
-        vregs_to_check->Union(temp_.nce.ending_vregs_to_check_matrix[pred_id]);
-      }
-      if (null_check_insn != nullptr) {
-        vregs_to_check->ClearBit(null_check_insn->dalvikInsn.vA);
-      }
-    }
-    DCHECK(copied_first);  // At least one predecessor must have been processed before this bb.
-  }
-  // At this point, vregs_to_check shows which sregs have an object definition with
-  // no intervening uses.
-
-  // Walk through the instruction in the block, updating as necessary
-  for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-    uint64_t df_attributes = GetDataFlowAttributes(mir);
-
-    if ((df_attributes & DF_NULL_TRANSFER_N) != 0u) {
-      // The algorithm was written in a phi agnostic way.
-      continue;
-    }
-
-    // Might need a null check?
-    if (df_attributes & DF_HAS_NULL_CHKS) {
-      int src_vreg;
-      if (df_attributes & DF_NULL_CHK_OUT0) {
-        DCHECK_NE(df_attributes & DF_IS_INVOKE, 0u);
-        src_vreg = mir->dalvikInsn.vC;
-      } else if (df_attributes & DF_NULL_CHK_B) {
-        DCHECK_NE(df_attributes & DF_REF_B, 0u);
-        src_vreg = mir->dalvikInsn.vB;
-      } else {
-        DCHECK_NE(df_attributes & DF_NULL_CHK_A, 0u);
-        DCHECK_NE(df_attributes & DF_REF_A, 0u);
-        src_vreg = mir->dalvikInsn.vA;
-      }
-      if (!vregs_to_check->IsBitSet(src_vreg)) {
-        // Eliminate the null check.
-        mir->optimization_flags |= MIR_MARK;
-      } else {
-        // Do the null check.
-        mir->optimization_flags &= ~MIR_MARK;
-        // Mark src_vreg as null-checked.
-        vregs_to_check->ClearBit(src_vreg);
-      }
-    }
-
-    if ((df_attributes & DF_A_WIDE) ||
-        (df_attributes & (DF_REF_A | DF_SETS_CONST | DF_NULL_TRANSFER)) == 0) {
-      continue;
-    }
-
-    /*
-     * First, mark all object definitions as requiring null check.
-     * Note: we can't tell if a CONST definition might be used as an object, so treat
-     * them all as object definitions.
-     */
-    if ((df_attributes & (DF_DA | DF_REF_A)) == (DF_DA | DF_REF_A) ||
-        (df_attributes & DF_SETS_CONST))  {
-      vregs_to_check->SetBit(mir->dalvikInsn.vA);
-    }
-
-    // Then, remove mark from all object definitions we know are non-null.
-    if (df_attributes & DF_NON_NULL_DST) {
-      // Mark target of NEW* as non-null
-      DCHECK_NE(df_attributes & DF_REF_A, 0u);
-      vregs_to_check->ClearBit(mir->dalvikInsn.vA);
-    }
-
-    // Mark non-null returns from invoke-style NEW*
-    if (df_attributes & DF_NON_NULL_RET) {
-      MIR* next_mir = mir->next;
-      // Next should be an MOVE_RESULT_OBJECT
-      if (UNLIKELY(next_mir == nullptr)) {
-        // The MethodVerifier makes sure there's no MOVE_RESULT at the catch entry or branch
-        // target, so the MOVE_RESULT cannot be broken away into another block.
-        LOG(WARNING) << "Unexpected end of block following new";
-      } else if (UNLIKELY(next_mir->dalvikInsn.opcode != Instruction::MOVE_RESULT_OBJECT)) {
-        LOG(WARNING) << "Unexpected opcode following new: " << next_mir->dalvikInsn.opcode;
-      } else {
-        // Mark as null checked.
-        vregs_to_check->ClearBit(next_mir->dalvikInsn.vA);
-      }
-    }
-
-    // Propagate null check state on register copies.
-    if (df_attributes & DF_NULL_TRANSFER_0) {
-      DCHECK_EQ(df_attributes | ~(DF_DA | DF_REF_A | DF_UB | DF_REF_B), static_cast<uint64_t>(-1));
-      if (vregs_to_check->IsBitSet(mir->dalvikInsn.vB)) {
-        vregs_to_check->SetBit(mir->dalvikInsn.vA);
-      } else {
-        vregs_to_check->ClearBit(mir->dalvikInsn.vA);
-      }
-    }
-  }
-
-  // Did anything change?
-  bool nce_changed = false;
-  ArenaBitVector* old_ending_ssa_regs_to_check = temp_.nce.ending_vregs_to_check_matrix[bb->id];
-  if (old_ending_ssa_regs_to_check == nullptr) {
-    DCHECK(temp_scoped_alloc_.get() != nullptr);
-    nce_changed = vregs_to_check->GetHighestBitSet() != -1;
-    temp_.nce.ending_vregs_to_check_matrix[bb->id] = vregs_to_check;
-    // Create a new vregs_to_check for next BB.
-    temp_.nce.work_vregs_to_check = new (temp_scoped_alloc_.get()) ArenaBitVector(
-        temp_scoped_alloc_.get(), temp_.nce.num_vregs, false, kBitMapNullCheck);
-  } else if (!vregs_to_check->SameBitsSet(old_ending_ssa_regs_to_check)) {
-    nce_changed = true;
-    temp_.nce.ending_vregs_to_check_matrix[bb->id] = vregs_to_check;
-    temp_.nce.work_vregs_to_check = old_ending_ssa_regs_to_check;  // Reuse for next BB.
-  }
-  return nce_changed;
-}
-
-void MIRGraph::EliminateNullChecksEnd() {
-  // Clean up temporaries.
-  temp_.nce.num_vregs = 0u;
-  temp_.nce.work_vregs_to_check = nullptr;
-  temp_.nce.ending_vregs_to_check_matrix = nullptr;
-  DCHECK(temp_scoped_alloc_.get() != nullptr);
-  temp_scoped_alloc_.reset();
-
-  // converge MIR_MARK with MIR_IGNORE_NULL_CHECK
-  AllNodesIterator iter(this);
-  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      constexpr int kMarkToIgnoreNullCheckShift = kMIRMark - kMIRIgnoreNullCheck;
-      static_assert(kMarkToIgnoreNullCheckShift > 0, "Not a valid right-shift");
-      uint16_t mirMarkAdjustedToIgnoreNullCheck =
-          (mir->optimization_flags & MIR_MARK) >> kMarkToIgnoreNullCheckShift;
-      mir->optimization_flags |= mirMarkAdjustedToIgnoreNullCheck;
-    }
-  }
-}
-
-void MIRGraph::InferTypesStart() {
-  DCHECK(temp_scoped_alloc_ != nullptr);
-  temp_.ssa.ti = new (temp_scoped_alloc_.get()) TypeInference(this, temp_scoped_alloc_.get());
-}
-
-/*
- * Perform type and size inference for a basic block.
- */
-bool MIRGraph::InferTypes(BasicBlock* bb) {
-  if (bb->data_flow_info == nullptr) return false;
-
-  DCHECK(temp_.ssa.ti != nullptr);
-  return temp_.ssa.ti->Apply(bb);
-}
-
-void MIRGraph::InferTypesEnd() {
-  DCHECK(temp_.ssa.ti != nullptr);
-  temp_.ssa.ti->Finish();
-  delete temp_.ssa.ti;
-  temp_.ssa.ti = nullptr;
-}
-
-bool MIRGraph::EliminateClassInitChecksGate() {
-  if ((cu_->disable_opt & (1 << kClassInitCheckElimination)) != 0 ||
-      (merged_df_flags_ & DF_CLINIT) == 0) {
-    return false;
-  }
-
-  DCHECK(temp_scoped_alloc_.get() == nullptr);
-  temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
-
-  // Each insn we use here has at least 2 code units, offset/2 will be a unique index.
-  const size_t end = (GetNumDalvikInsns() + 1u) / 2u;
-  temp_.cice.indexes = temp_scoped_alloc_->AllocArray<uint16_t>(end, kArenaAllocGrowableArray);
-  std::fill_n(temp_.cice.indexes, end, 0xffffu);
-
-  uint32_t unique_class_count = 0u;
-  {
-    // Get unique_class_count and store indexes in temp_insn_data_ using a map on a nested
-    // ScopedArenaAllocator.
-
-    // Embed the map value in the entry to save space.
-    struct MapEntry {
-      // Map key: the class identified by the declaring dex file and type index.
-      const DexFile* declaring_dex_file;
-      uint16_t declaring_class_idx;
-      // Map value: index into bit vectors of classes requiring initialization checks.
-      uint16_t index;
-    };
-    struct MapEntryComparator {
-      bool operator()(const MapEntry& lhs, const MapEntry& rhs) const {
-        if (lhs.declaring_class_idx != rhs.declaring_class_idx) {
-          return lhs.declaring_class_idx < rhs.declaring_class_idx;
-        }
-        return lhs.declaring_dex_file < rhs.declaring_dex_file;
-      }
-    };
-
-    ScopedArenaAllocator allocator(&cu_->arena_stack);
-    ScopedArenaSet<MapEntry, MapEntryComparator> class_to_index_map(MapEntryComparator(),
-                                                                    allocator.Adapter());
-
-    // First, find all SGET/SPUTs that may need class initialization checks, record INVOKE_STATICs.
-    AllNodesIterator iter(this);
-    for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-      if (bb->block_type == kDalvikByteCode) {
-        for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-          if (IsInstructionSGetOrSPut(mir->dalvikInsn.opcode)) {
-            const MirSFieldLoweringInfo& field_info = GetSFieldLoweringInfo(mir);
-            if (!field_info.IsReferrersClass()) {
-              DCHECK_LT(class_to_index_map.size(), 0xffffu);
-              MapEntry entry = {
-                  // Treat unresolved fields as if each had its own class.
-                  field_info.IsResolved() ? field_info.DeclaringDexFile()
-                                          : nullptr,
-                  field_info.IsResolved() ? field_info.DeclaringClassIndex()
-                                          : field_info.FieldIndex(),
-                  static_cast<uint16_t>(class_to_index_map.size())
-              };
-              uint16_t index = class_to_index_map.insert(entry).first->index;
-              // Using offset/2 for index into temp_.cice.indexes.
-              temp_.cice.indexes[mir->offset / 2u] = index;
-            }
-          } else if (IsInstructionInvokeStatic(mir->dalvikInsn.opcode)) {
-            const MirMethodLoweringInfo& method_info = GetMethodLoweringInfo(mir);
-            DCHECK(method_info.IsStatic());
-            if (method_info.FastPath() && !method_info.IsReferrersClass()) {
-              MapEntry entry = {
-                  method_info.DeclaringDexFile(),
-                  method_info.DeclaringClassIndex(),
-                  static_cast<uint16_t>(class_to_index_map.size())
-              };
-              uint16_t index = class_to_index_map.insert(entry).first->index;
-              // Using offset/2 for index into temp_.cice.indexes.
-              temp_.cice.indexes[mir->offset / 2u] = index;
-            }
-          }
-        }
-      }
-    }
-    unique_class_count = static_cast<uint32_t>(class_to_index_map.size());
-  }
-
-  if (unique_class_count == 0u) {
-    // All SGET/SPUTs refer to initialized classes. Nothing to do.
-    temp_.cice.indexes = nullptr;
-    temp_scoped_alloc_.reset();
-    return false;
-  }
-
-  // 2 bits for each class: is class initialized, is class in dex cache.
-  temp_.cice.num_class_bits = 2u * unique_class_count;
-  temp_.cice.work_classes_to_check = new (temp_scoped_alloc_.get()) ArenaBitVector(
-      temp_scoped_alloc_.get(), temp_.cice.num_class_bits, false, kBitMapClInitCheck);
-  temp_.cice.ending_classes_to_check_matrix =
-      temp_scoped_alloc_->AllocArray<ArenaBitVector*>(GetNumBlocks(), kArenaAllocMisc);
-  std::fill_n(temp_.cice.ending_classes_to_check_matrix, GetNumBlocks(), nullptr);
-  DCHECK_GT(temp_.cice.num_class_bits, 0u);
-  return true;
-}
-
-/*
- * Eliminate unnecessary class initialization checks for a basic block.
- */
-bool MIRGraph::EliminateClassInitChecks(BasicBlock* bb) {
-  DCHECK_EQ((cu_->disable_opt & (1 << kClassInitCheckElimination)), 0u);
-  if (bb->block_type != kDalvikByteCode && bb->block_type != kEntryBlock) {
-    // Ignore the kExitBlock as well.
-    DCHECK(bb->first_mir_insn == nullptr);
-    return false;
-  }
-
-  /*
-   * Set initial state.  Catch blocks don't need any special treatment.
-   */
-  ArenaBitVector* classes_to_check = temp_.cice.work_classes_to_check;
-  DCHECK(classes_to_check != nullptr);
-  if (bb->block_type == kEntryBlock) {
-    classes_to_check->SetInitialBits(temp_.cice.num_class_bits);
-  } else {
-    // Starting state is union of all incoming arcs.
-    bool copied_first = false;
-    for (BasicBlockId pred_id : bb->predecessors) {
-      if (temp_.cice.ending_classes_to_check_matrix[pred_id] == nullptr) {
-        continue;
-      }
-      if (!copied_first) {
-        copied_first = true;
-        classes_to_check->Copy(temp_.cice.ending_classes_to_check_matrix[pred_id]);
-      } else {
-        classes_to_check->Union(temp_.cice.ending_classes_to_check_matrix[pred_id]);
-      }
-    }
-    DCHECK(copied_first);  // At least one predecessor must have been processed before this bb.
-  }
-  // At this point, classes_to_check shows which classes need clinit checks.
-
-  // Walk through the instruction in the block, updating as necessary
-  for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-    uint16_t index = temp_.cice.indexes[mir->offset / 2u];
-    if (index != 0xffffu) {
-      bool check_initialization = false;
-      bool check_dex_cache = false;
-
-      // NOTE: index != 0xffff does not guarantee that this is an SGET/SPUT/INVOKE_STATIC.
-      // Dex instructions with width 1 can have the same offset/2.
-
-      if (IsInstructionSGetOrSPut(mir->dalvikInsn.opcode)) {
-        check_initialization = true;
-        check_dex_cache = true;
-      } else if (IsInstructionInvokeStatic(mir->dalvikInsn.opcode)) {
-        check_initialization = true;
-        // NOTE: INVOKE_STATIC doesn't guarantee that the type will be in the dex cache.
-      }
-
-      if (check_dex_cache) {
-        uint32_t check_dex_cache_index = 2u * index + 1u;
-        if (!classes_to_check->IsBitSet(check_dex_cache_index)) {
-          // Eliminate the class init check.
-          mir->optimization_flags |= MIR_CLASS_IS_IN_DEX_CACHE;
-        } else {
-          // Do the class init check.
-          mir->optimization_flags &= ~MIR_CLASS_IS_IN_DEX_CACHE;
-        }
-        classes_to_check->ClearBit(check_dex_cache_index);
-      }
-      if (check_initialization) {
-        uint32_t check_clinit_index = 2u * index;
-        if (!classes_to_check->IsBitSet(check_clinit_index)) {
-          // Eliminate the class init check.
-          mir->optimization_flags |= MIR_CLASS_IS_INITIALIZED;
-        } else {
-          // Do the class init check.
-          mir->optimization_flags &= ~MIR_CLASS_IS_INITIALIZED;
-        }
-        // Mark the class as initialized.
-        classes_to_check->ClearBit(check_clinit_index);
-      }
-    }
-  }
-
-  // Did anything change?
-  bool changed = false;
-  ArenaBitVector* old_ending_classes_to_check = temp_.cice.ending_classes_to_check_matrix[bb->id];
-  if (old_ending_classes_to_check == nullptr) {
-    DCHECK(temp_scoped_alloc_.get() != nullptr);
-    changed = classes_to_check->GetHighestBitSet() != -1;
-    temp_.cice.ending_classes_to_check_matrix[bb->id] = classes_to_check;
-    // Create a new classes_to_check for next BB.
-    temp_.cice.work_classes_to_check = new (temp_scoped_alloc_.get()) ArenaBitVector(
-        temp_scoped_alloc_.get(), temp_.cice.num_class_bits, false, kBitMapClInitCheck);
-  } else if (!classes_to_check->Equal(old_ending_classes_to_check)) {
-    changed = true;
-    temp_.cice.ending_classes_to_check_matrix[bb->id] = classes_to_check;
-    temp_.cice.work_classes_to_check = old_ending_classes_to_check;  // Reuse for next BB.
-  }
-  return changed;
-}
-
-void MIRGraph::EliminateClassInitChecksEnd() {
-  // Clean up temporaries.
-  temp_.cice.num_class_bits = 0u;
-  temp_.cice.work_classes_to_check = nullptr;
-  temp_.cice.ending_classes_to_check_matrix = nullptr;
-  DCHECK(temp_.cice.indexes != nullptr);
-  temp_.cice.indexes = nullptr;
-  DCHECK(temp_scoped_alloc_.get() != nullptr);
-  temp_scoped_alloc_.reset();
-}
-
-static void DisableGVNDependentOptimizations(CompilationUnit* cu) {
-  cu->disable_opt |= (1u << kGvnDeadCodeElimination);
-}
-
-bool MIRGraph::ApplyGlobalValueNumberingGate() {
-  if (GlobalValueNumbering::Skip(cu_)) {
-    DisableGVNDependentOptimizations(cu_);
-    return false;
-  }
-
-  DCHECK(temp_scoped_alloc_ == nullptr);
-  temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
-  temp_.gvn.ifield_ids =
-      GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), ifield_lowering_infos_);
-  temp_.gvn.sfield_ids =
-      GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), sfield_lowering_infos_);
-  DCHECK(temp_.gvn.gvn == nullptr);
-  temp_.gvn.gvn = new (temp_scoped_alloc_.get()) GlobalValueNumbering(
-      cu_, temp_scoped_alloc_.get(), GlobalValueNumbering::kModeGvn);
-  return true;
-}
-
-bool MIRGraph::ApplyGlobalValueNumbering(BasicBlock* bb) {
-  DCHECK(temp_.gvn.gvn != nullptr);
-  LocalValueNumbering* lvn = temp_.gvn.gvn->PrepareBasicBlock(bb);
-  if (lvn != nullptr) {
-    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      lvn->GetValueNumber(mir);
-    }
-  }
-  bool change = (lvn != nullptr) && temp_.gvn.gvn->FinishBasicBlock(bb);
-  return change;
-}
-
-void MIRGraph::ApplyGlobalValueNumberingEnd() {
-  // Perform modifications.
-  DCHECK(temp_.gvn.gvn != nullptr);
-  if (temp_.gvn.gvn->Good()) {
-    temp_.gvn.gvn->StartPostProcessing();
-    if (max_nested_loops_ != 0u) {
-      TopologicalSortIterator iter(this);
-      for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-        ScopedArenaAllocator allocator(&cu_->arena_stack);  // Reclaim memory after each LVN.
-        LocalValueNumbering* lvn = temp_.gvn.gvn->PrepareBasicBlock(bb, &allocator);
-        if (lvn != nullptr) {
-          for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-            lvn->GetValueNumber(mir);
-          }
-          bool change = temp_.gvn.gvn->FinishBasicBlock(bb);
-          DCHECK(!change) << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-        }
-      }
-    }
-    // GVN was successful, running the LVN would be useless.
-    cu_->disable_opt |= (1u << kLocalValueNumbering);
-  } else {
-    LOG(WARNING) << "GVN failed for " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-    DisableGVNDependentOptimizations(cu_);
-  }
-}
-
-bool MIRGraph::EliminateDeadCodeGate() {
-  if ((cu_->disable_opt & (1 << kGvnDeadCodeElimination)) != 0 || temp_.gvn.gvn == nullptr) {
-    return false;
-  }
-  DCHECK(temp_scoped_alloc_ != nullptr);
-  temp_.gvn.dce = new (temp_scoped_alloc_.get()) GvnDeadCodeElimination(temp_.gvn.gvn,
-                                                                        temp_scoped_alloc_.get());
-  return true;
-}
-
-bool MIRGraph::EliminateDeadCode(BasicBlock* bb) {
-  DCHECK(temp_scoped_alloc_ != nullptr);
-  DCHECK(temp_.gvn.gvn != nullptr);
-  if (bb->block_type != kDalvikByteCode) {
-    return false;
-  }
-  DCHECK(temp_.gvn.dce != nullptr);
-  temp_.gvn.dce->Apply(bb);
-  return false;  // No need to repeat.
-}
-
-void MIRGraph::EliminateDeadCodeEnd() {
-  if (kIsDebugBuild) {
-    // DCE can make some previously dead vregs alive again. Make sure the obsolete
-    // live-in information is not used anymore.
-    AllNodesIterator iter(this);
-    for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-      if (bb->data_flow_info != nullptr) {
-        bb->data_flow_info->live_in_v = nullptr;
-      }
-    }
-  }
-}
-
-void MIRGraph::GlobalValueNumberingCleanup() {
-  // If the GVN didn't run, these pointers should be null and everything is effectively no-op.
-  delete temp_.gvn.dce;
-  temp_.gvn.dce = nullptr;
-  delete temp_.gvn.gvn;
-  temp_.gvn.gvn = nullptr;
-  temp_.gvn.ifield_ids = nullptr;
-  temp_.gvn.sfield_ids = nullptr;
-  temp_scoped_alloc_.reset();
-}
-
-void MIRGraph::ComputeInlineIFieldLoweringInfo(uint16_t field_idx, MIR* invoke, MIR* iget_or_iput) {
-  uint32_t method_index = invoke->meta.method_lowering_info;
-  if (temp_.smi.processed_indexes->IsBitSet(method_index)) {
-    iget_or_iput->meta.ifield_lowering_info = temp_.smi.lowering_infos[method_index];
-    DCHECK_EQ(field_idx, GetIFieldLoweringInfo(iget_or_iput).FieldIndex());
-    return;
-  }
-
-  const MirMethodLoweringInfo& method_info = GetMethodLoweringInfo(invoke);
-  MethodReference target = method_info.GetTargetMethod();
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<1> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(
-      hs.NewHandle(cu_->class_linker->FindDexCache(hs.Self(), *target.dex_file)));
-  DexCompilationUnit inlined_unit(cu_,
-                                  cu_->class_loader,
-                                  cu_->class_linker,
-                                  *target.dex_file,
-                                  nullptr /* code_item not used */,
-                                  0u /* class_def_idx not used */,
-                                  target.dex_method_index,
-                                  0u /* access_flags not used */,
-                                  nullptr /* verified_method not used */,
-                                  dex_cache);
-  DexMemAccessType type = IGetOrIPutMemAccessType(iget_or_iput->dalvikInsn.opcode);
-  MirIFieldLoweringInfo inlined_field_info(field_idx, type, false);
-  MirIFieldLoweringInfo::Resolve(soa, cu_->compiler_driver, &inlined_unit, &inlined_field_info, 1u);
-  DCHECK(inlined_field_info.IsResolved());
-
-  uint32_t field_info_index = ifield_lowering_infos_.size();
-  ifield_lowering_infos_.push_back(inlined_field_info);
-  temp_.smi.processed_indexes->SetBit(method_index);
-  temp_.smi.lowering_infos[method_index] = field_info_index;
-  iget_or_iput->meta.ifield_lowering_info = field_info_index;
-}
-
-bool MIRGraph::InlineSpecialMethodsGate() {
-  if ((cu_->disable_opt & (1 << kSuppressMethodInlining)) != 0 ||
-      method_lowering_infos_.size() == 0u) {
-    return false;
-  }
-  if (cu_->compiler_driver->GetMethodInlinerMap() == nullptr) {
-    // This isn't the Quick compiler.
-    return false;
-  }
-  return true;
-}
-
-void MIRGraph::InlineSpecialMethodsStart() {
-  // Prepare for inlining getters/setters. Since we're inlining at most 1 IGET/IPUT from
-  // each INVOKE, we can index the data by the MIR::meta::method_lowering_info index.
-
-  DCHECK(temp_scoped_alloc_.get() == nullptr);
-  temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
-  temp_.smi.num_indexes = method_lowering_infos_.size();
-  temp_.smi.processed_indexes = new (temp_scoped_alloc_.get()) ArenaBitVector(
-      temp_scoped_alloc_.get(), temp_.smi.num_indexes, false, kBitMapMisc);
-  temp_.smi.processed_indexes->ClearAllBits();
-  temp_.smi.lowering_infos =
-      temp_scoped_alloc_->AllocArray<uint16_t>(temp_.smi.num_indexes, kArenaAllocGrowableArray);
-}
-
-void MIRGraph::InlineSpecialMethods(BasicBlock* bb) {
-  if (bb->block_type != kDalvikByteCode) {
-    return;
-  }
-  for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-    if (MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) {
-      continue;
-    }
-    if (!(mir->dalvikInsn.FlagsOf() & Instruction::kInvoke)) {
-      continue;
-    }
-    const MirMethodLoweringInfo& method_info = GetMethodLoweringInfo(mir);
-    if (!method_info.FastPath() || !method_info.IsSpecial()) {
-      continue;
-    }
-
-    InvokeType sharp_type = method_info.GetSharpType();
-    if ((sharp_type != kDirect) && (sharp_type != kStatic)) {
-      continue;
-    }
-
-    if (sharp_type == kStatic) {
-      bool needs_clinit = !method_info.IsClassInitialized() &&
-          ((mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0);
-      if (needs_clinit) {
-        continue;
-      }
-    }
-
-    DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
-    MethodReference target = method_info.GetTargetMethod();
-    if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(target.dex_file)
-            ->GenInline(this, bb, mir, target.dex_method_index)) {
-      if (cu_->verbose || cu_->print_pass) {
-        LOG(INFO) << "SpecialMethodInliner: Inlined " << method_info.GetInvokeType() << " ("
-            << sharp_type << ") call to \"" << PrettyMethod(target.dex_method_index,
-                                                            *target.dex_file)
-            << "\" from \"" << PrettyMethod(cu_->method_idx, *cu_->dex_file)
-            << "\" @0x" << std::hex << mir->offset;
-      }
-    }
-  }
-}
-
-void MIRGraph::InlineSpecialMethodsEnd() {
-  // Clean up temporaries.
-  DCHECK(temp_.smi.lowering_infos != nullptr);
-  temp_.smi.lowering_infos = nullptr;
-  temp_.smi.num_indexes = 0u;
-  DCHECK(temp_.smi.processed_indexes != nullptr);
-  temp_.smi.processed_indexes = nullptr;
-  DCHECK(temp_scoped_alloc_.get() != nullptr);
-  temp_scoped_alloc_.reset();
-}
-
-void MIRGraph::DumpCheckStats() {
-  Checkstats* stats =
-      static_cast<Checkstats*>(arena_->Alloc(sizeof(Checkstats), kArenaAllocDFInfo));
-  checkstats_ = stats;
-  AllNodesIterator iter(this);
-  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-    CountChecks(bb);
-  }
-  if (stats->null_checks > 0) {
-    float eliminated = static_cast<float>(stats->null_checks_eliminated);
-    float checks = static_cast<float>(stats->null_checks);
-    LOG(INFO) << "Null Checks: " << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
-              << stats->null_checks_eliminated << " of " << stats->null_checks << " -> "
-              << (eliminated/checks) * 100.0 << "%";
-    }
-  if (stats->range_checks > 0) {
-    float eliminated = static_cast<float>(stats->range_checks_eliminated);
-    float checks = static_cast<float>(stats->range_checks);
-    LOG(INFO) << "Range Checks: " << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
-              << stats->range_checks_eliminated << " of " << stats->range_checks << " -> "
-              << (eliminated/checks) * 100.0 << "%";
-  }
-}
-
-bool MIRGraph::BuildExtendedBBList(class BasicBlock* bb) {
-  if (bb->visited) return false;
-  if (!((bb->block_type == kEntryBlock) || (bb->block_type == kDalvikByteCode)
-      || (bb->block_type == kExitBlock))) {
-    // Ignore special blocks
-    bb->visited = true;
-    return false;
-  }
-  // Must be head of extended basic block.
-  BasicBlock* start_bb = bb;
-  extended_basic_blocks_.push_back(bb->id);
-  bool terminated_by_return = false;
-  bool do_local_value_numbering = false;
-  // Visit blocks strictly dominated by this head.
-  while (bb != nullptr) {
-    bb->visited = true;
-    terminated_by_return |= bb->terminated_by_return;
-    do_local_value_numbering |= bb->use_lvn;
-    bb = NextDominatedBlock(bb);
-  }
-  if (terminated_by_return || do_local_value_numbering) {
-    // Do lvn for all blocks in this extended set.
-    bb = start_bb;
-    while (bb != nullptr) {
-      bb->use_lvn = do_local_value_numbering;
-      bb->dominates_return = terminated_by_return;
-      bb = NextDominatedBlock(bb);
-    }
-  }
-  return false;  // Not iterative - return value will be ignored
-}
-
-void MIRGraph::BasicBlockOptimizationStart() {
-  if ((cu_->disable_opt & (1 << kLocalValueNumbering)) == 0) {
-    temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
-    temp_.gvn.ifield_ids =
-        GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), ifield_lowering_infos_);
-    temp_.gvn.sfield_ids =
-        GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), sfield_lowering_infos_);
-  }
-}
-
-void MIRGraph::BasicBlockOptimization() {
-  if ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) {
-    ClearAllVisitedFlags();
-    PreOrderDfsIterator iter2(this);
-    for (BasicBlock* bb = iter2.Next(); bb != nullptr; bb = iter2.Next()) {
-      BuildExtendedBBList(bb);
-    }
-    // Perform extended basic block optimizations.
-    for (unsigned int i = 0; i < extended_basic_blocks_.size(); i++) {
-      BasicBlockOpt(GetBasicBlock(extended_basic_blocks_[i]));
-    }
-  } else {
-    PreOrderDfsIterator iter(this);
-    for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-      BasicBlockOpt(bb);
-    }
-  }
-}
-
-void MIRGraph::BasicBlockOptimizationEnd() {
-  // Clean up after LVN.
-  temp_.gvn.ifield_ids = nullptr;
-  temp_.gvn.sfield_ids = nullptr;
-  temp_scoped_alloc_.reset();
-}
-
-void MIRGraph::StringChange() {
-  AllNodesIterator iter(this);
-  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      // Look for new instance opcodes, skip otherwise
-      Instruction::Code opcode = mir->dalvikInsn.opcode;
-      if (opcode == Instruction::NEW_INSTANCE) {
-        uint32_t type_idx = mir->dalvikInsn.vB;
-        if (cu_->compiler_driver->IsStringTypeIndex(type_idx, cu_->dex_file)) {
-          // Change NEW_INSTANCE into CONST_4 of 0
-          mir->dalvikInsn.opcode = Instruction::CONST_4;
-          mir->dalvikInsn.vB = 0;
-        }
-      } else if ((opcode == Instruction::INVOKE_DIRECT) ||
-                 (opcode == Instruction::INVOKE_DIRECT_RANGE)) {
-        uint32_t method_idx = mir->dalvikInsn.vB;
-        DexFileMethodInliner* inliner =
-            cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
-        if (inliner->IsStringInitMethodIndex(method_idx)) {
-          bool is_range = (opcode == Instruction::INVOKE_DIRECT_RANGE);
-          uint32_t orig_this_reg = is_range ? mir->dalvikInsn.vC : mir->dalvikInsn.arg[0];
-          // Remove this pointer from string init and change to static call.
-          mir->dalvikInsn.vA--;
-          if (!is_range) {
-            mir->dalvikInsn.opcode = Instruction::INVOKE_STATIC;
-            for (uint32_t i = 0; i < mir->dalvikInsn.vA; i++) {
-              mir->dalvikInsn.arg[i] = mir->dalvikInsn.arg[i + 1];
-            }
-          } else {
-            mir->dalvikInsn.opcode = Instruction::INVOKE_STATIC_RANGE;
-            mir->dalvikInsn.vC++;
-          }
-          // Insert a move-result instruction to the original this pointer reg.
-          MIR* move_result_mir = static_cast<MIR *>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR));
-          move_result_mir->dalvikInsn.opcode = Instruction::MOVE_RESULT_OBJECT;
-          move_result_mir->dalvikInsn.vA = orig_this_reg;
-          move_result_mir->offset = mir->offset;
-          move_result_mir->m_unit_index = mir->m_unit_index;
-          bb->InsertMIRAfter(mir, move_result_mir);
-          // Add additional moves if this pointer was copied to other registers.
-          const VerifiedMethod* verified_method =
-              cu_->compiler_driver->GetVerifiedMethod(cu_->dex_file, cu_->method_idx);
-          DCHECK(verified_method != nullptr);
-          const SafeMap<uint32_t, std::set<uint32_t>>& string_init_map =
-              verified_method->GetStringInitPcRegMap();
-          auto map_it = string_init_map.find(mir->offset);
-          if (map_it != string_init_map.end()) {
-            const std::set<uint32_t>& reg_set = map_it->second;
-            for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) {
-              MIR* move_mir = static_cast<MIR *>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR));
-              move_mir->dalvikInsn.opcode = Instruction::MOVE_OBJECT;
-              move_mir->dalvikInsn.vA = *set_it;
-              move_mir->dalvikInsn.vB = orig_this_reg;
-              move_mir->offset = mir->offset;
-              move_mir->m_unit_index = mir->m_unit_index;
-              bb->InsertMIRAfter(move_result_mir, move_mir);
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-
-bool MIRGraph::EliminateSuspendChecksGate() {
-  if (kLeafOptimization ||           // Incompatible (could create loops without suspend checks).
-      (cu_->disable_opt & (1 << kSuspendCheckElimination)) != 0 ||  // Disabled.
-      GetMaxNestedLoops() == 0u ||   // Nothing to do.
-      GetMaxNestedLoops() >= 32u ||  // Only 32 bits in suspend_checks_in_loops_[.].
-                                     // Exclude 32 as well to keep bit shifts well-defined.
-      !HasInvokes()) {               // No invokes to actually eliminate any suspend checks.
-    return false;
-  }
-  suspend_checks_in_loops_ = arena_->AllocArray<uint32_t>(GetNumBlocks(), kArenaAllocMisc);
-  return true;
-}
-
-bool MIRGraph::EliminateSuspendChecks(BasicBlock* bb) {
-  if (bb->block_type != kDalvikByteCode) {
-    return false;
-  }
-  DCHECK_EQ(GetTopologicalSortOrderLoopHeadStack()->size(), bb->nesting_depth);
-  if (bb->nesting_depth == 0u) {
-    // Out of loops.
-    DCHECK_EQ(suspend_checks_in_loops_[bb->id], 0u);  // The array was zero-initialized.
-    return false;
-  }
-  uint32_t suspend_checks_in_loops = (1u << bb->nesting_depth) - 1u;  // Start with all loop heads.
-  bool found_invoke = false;
-  for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-    if ((IsInstructionInvoke(mir->dalvikInsn.opcode) ||
-        IsInstructionQuickInvoke(mir->dalvikInsn.opcode)) &&
-        !GetMethodLoweringInfo(mir).IsIntrinsic()) {
-      // Non-intrinsic invoke, rely on a suspend point in the invoked method.
-      found_invoke = true;
-      break;
-    }
-  }
-  if (!found_invoke) {
-    // Intersect suspend checks from predecessors.
-    uint16_t bb_topo_idx = topological_order_indexes_[bb->id];
-    uint32_t pred_mask_union = 0u;
-    for (BasicBlockId pred_id : bb->predecessors) {
-      uint16_t pred_topo_idx = topological_order_indexes_[pred_id];
-      if (pred_topo_idx < bb_topo_idx) {
-        // Determine the loop depth of the predecessors relative to this block.
-        size_t pred_loop_depth = topological_order_loop_head_stack_.size();
-        while (pred_loop_depth != 0u &&
-            pred_topo_idx < topological_order_loop_head_stack_[pred_loop_depth - 1].first) {
-          --pred_loop_depth;
-        }
-        DCHECK_LE(pred_loop_depth, GetBasicBlock(pred_id)->nesting_depth);
-        uint32_t pred_mask = (1u << pred_loop_depth) - 1u;
-        // Intersect pred_mask bits in suspend_checks_in_loops with
-        // suspend_checks_in_loops_[pred_id].
-        uint32_t pred_loops_without_checks = pred_mask & ~suspend_checks_in_loops_[pred_id];
-        suspend_checks_in_loops = suspend_checks_in_loops & ~pred_loops_without_checks;
-        pred_mask_union |= pred_mask;
-      }
-    }
-    // DCHECK_EQ() may not hold for unnatural loop heads, so use DCHECK_GE().
-    DCHECK_GE(((1u << (IsLoopHead(bb->id) ? bb->nesting_depth - 1u: bb->nesting_depth)) - 1u),
-              pred_mask_union);
-    suspend_checks_in_loops &= pred_mask_union;
-  }
-  suspend_checks_in_loops_[bb->id] = suspend_checks_in_loops;
-  if (suspend_checks_in_loops == 0u) {
-    return false;
-  }
-  // Apply MIR_IGNORE_SUSPEND_CHECK if appropriate.
-  if (bb->taken != NullBasicBlockId) {
-    DCHECK(bb->last_mir_insn != nullptr);
-    DCHECK(IsInstructionIfCc(bb->last_mir_insn->dalvikInsn.opcode) ||
-           IsInstructionIfCcZ(bb->last_mir_insn->dalvikInsn.opcode) ||
-           IsInstructionGoto(bb->last_mir_insn->dalvikInsn.opcode) ||
-           (static_cast<int>(bb->last_mir_insn->dalvikInsn.opcode) >= kMirOpFusedCmplFloat &&
-            static_cast<int>(bb->last_mir_insn->dalvikInsn.opcode) <= kMirOpFusedCmpLong));
-    if (!IsSuspendCheckEdge(bb, bb->taken) &&
-        (bb->fall_through == NullBasicBlockId || !IsSuspendCheckEdge(bb, bb->fall_through))) {
-      bb->last_mir_insn->optimization_flags |= MIR_IGNORE_SUSPEND_CHECK;
-    }
-  } else if (bb->fall_through != NullBasicBlockId && IsSuspendCheckEdge(bb, bb->fall_through)) {
-    // We've got a fall-through suspend edge. Add an artificial GOTO to force suspend check.
-    MIR* mir = NewMIR();
-    mir->dalvikInsn.opcode = Instruction::GOTO;
-    mir->dalvikInsn.vA = 0;  // Branch offset.
-    mir->offset = GetBasicBlock(bb->fall_through)->start_offset;
-    mir->m_unit_index = current_method_;
-    mir->ssa_rep = reinterpret_cast<SSARepresentation*>(
-        arena_->Alloc(sizeof(SSARepresentation), kArenaAllocDFInfo));  // Zero-initialized.
-    bb->AppendMIR(mir);
-    std::swap(bb->fall_through, bb->taken);  // The fall-through has become taken.
-  }
-  return true;
-}
-
-bool MIRGraph::CanThrow(MIR* mir) const {
-  if ((mir->dalvikInsn.FlagsOf() & Instruction::kThrow) == 0) {
-    return false;
-  }
-  const int opt_flags = mir->optimization_flags;
-  uint64_t df_attributes = GetDataFlowAttributes(mir);
-
-  // First, check if the insn can still throw NPE.
-  if (((df_attributes & DF_HAS_NULL_CHKS) != 0) && ((opt_flags & MIR_IGNORE_NULL_CHECK) == 0)) {
-    return true;
-  }
-
-  // Now process specific instructions.
-  if ((df_attributes & DF_IFIELD) != 0) {
-    // The IGET/IPUT family. We have processed the IGET/IPUT null check above.
-    DCHECK_NE(opt_flags & MIR_IGNORE_NULL_CHECK, 0);
-    // If not fast, weird things can happen and the insn can throw.
-    const MirIFieldLoweringInfo& field_info = GetIFieldLoweringInfo(mir);
-    bool fast = (df_attributes & DF_DA) != 0 ? field_info.FastGet() : field_info.FastPut();
-    return !fast;
-  } else if ((df_attributes & DF_SFIELD) != 0) {
-    // The SGET/SPUT family. Check for potentially throwing class initialization.
-    // Also, if not fast, weird things can happen and the insn can throw.
-    const MirSFieldLoweringInfo& field_info = GetSFieldLoweringInfo(mir);
-    bool fast = (df_attributes & DF_DA) != 0 ? field_info.FastGet() : field_info.FastPut();
-    bool is_class_initialized = field_info.IsClassInitialized() ||
-        ((mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) != 0);
-    return !(fast && is_class_initialized);
-  } else if ((df_attributes & DF_HAS_RANGE_CHKS) != 0) {
-    // Only AGET/APUT have range checks. We have processed the AGET/APUT null check above.
-    DCHECK_NE(opt_flags & MIR_IGNORE_NULL_CHECK, 0);
-    // Non-throwing only if range check has been eliminated.
-    return ((opt_flags & MIR_IGNORE_RANGE_CHECK) == 0);
-  } else if (mir->dalvikInsn.opcode == Instruction::CHECK_CAST &&
-      (opt_flags & MIR_IGNORE_CHECK_CAST) != 0) {
-    return false;
-  } else if (mir->dalvikInsn.opcode == Instruction::ARRAY_LENGTH ||
-      static_cast<int>(mir->dalvikInsn.opcode) == kMirOpNullCheck) {
-    // No more checks for these (null check was processed above).
-    return false;
-  }
-  return true;
-}
-
-bool MIRGraph::HasAntiDependency(MIR* first, MIR* second) {
-  DCHECK(first->ssa_rep != nullptr);
-  DCHECK(second->ssa_rep != nullptr);
-  if ((second->ssa_rep->num_defs > 0) && (first->ssa_rep->num_uses > 0)) {
-    int vreg0 = SRegToVReg(second->ssa_rep->defs[0]);
-    int vreg1 = (second->ssa_rep->num_defs == 2) ?
-        SRegToVReg(second->ssa_rep->defs[1]) : INVALID_VREG;
-    for (int i = 0; i < first->ssa_rep->num_uses; i++) {
-      int32_t use = SRegToVReg(first->ssa_rep->uses[i]);
-      if (use == vreg0 || use == vreg1) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-void MIRGraph::CombineMultiplyAdd(MIR* mul_mir, MIR* add_mir, bool mul_is_first_addend,
-                                  bool is_wide, bool is_sub) {
-  if (is_wide) {
-    if (is_sub) {
-      add_mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpMsubLong);
-    } else {
-      add_mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpMaddLong);
-    }
-  } else {
-    if (is_sub) {
-      add_mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpMsubInt);
-    } else {
-      add_mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpMaddInt);
-    }
-  }
-  add_mir->ssa_rep->num_uses = is_wide ? 6 : 3;
-  int32_t addend0 = INVALID_SREG;
-  int32_t addend1 = INVALID_SREG;
-  if (is_wide) {
-    addend0 = mul_is_first_addend ? add_mir->ssa_rep->uses[2] : add_mir->ssa_rep->uses[0];
-    addend1 = mul_is_first_addend ? add_mir->ssa_rep->uses[3] : add_mir->ssa_rep->uses[1];
-  } else {
-    addend0 = mul_is_first_addend ? add_mir->ssa_rep->uses[1] : add_mir->ssa_rep->uses[0];
-  }
-
-  AllocateSSAUseData(add_mir, add_mir->ssa_rep->num_uses);
-  add_mir->ssa_rep->uses[0] = mul_mir->ssa_rep->uses[0];
-  add_mir->ssa_rep->uses[1] = mul_mir->ssa_rep->uses[1];
-  // Clear the original multiply product ssa use count, as it is not used anymore.
-  raw_use_counts_[mul_mir->ssa_rep->defs[0]] = 0;
-  use_counts_[mul_mir->ssa_rep->defs[0]] = 0;
-  if (is_wide) {
-    DCHECK_EQ(add_mir->ssa_rep->num_uses, 6);
-    add_mir->ssa_rep->uses[2] = mul_mir->ssa_rep->uses[2];
-    add_mir->ssa_rep->uses[3] = mul_mir->ssa_rep->uses[3];
-    add_mir->ssa_rep->uses[4] = addend0;
-    add_mir->ssa_rep->uses[5] = addend1;
-    raw_use_counts_[mul_mir->ssa_rep->defs[1]] = 0;
-    use_counts_[mul_mir->ssa_rep->defs[1]] = 0;
-  } else {
-    DCHECK_EQ(add_mir->ssa_rep->num_uses, 3);
-    add_mir->ssa_rep->uses[2] = addend0;
-  }
-  // Copy in the decoded instruction information.
-  add_mir->dalvikInsn.vB = SRegToVReg(add_mir->ssa_rep->uses[0]);
-  if (is_wide) {
-    add_mir->dalvikInsn.vC = SRegToVReg(add_mir->ssa_rep->uses[2]);
-    add_mir->dalvikInsn.arg[0] = SRegToVReg(add_mir->ssa_rep->uses[4]);
-  } else {
-    add_mir->dalvikInsn.vC = SRegToVReg(add_mir->ssa_rep->uses[1]);
-    add_mir->dalvikInsn.arg[0] = SRegToVReg(add_mir->ssa_rep->uses[2]);
-  }
-  // Original multiply MIR is set to Nop.
-  mul_mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
-}
-
-void MIRGraph::MultiplyAddOpt(BasicBlock* bb) {
-  if (bb->block_type == kDead) {
-    return;
-  }
-  ScopedArenaAllocator allocator(&cu_->arena_stack);
-  ScopedArenaSafeMap<uint32_t, MIR*> ssa_mul_map(std::less<uint32_t>(), allocator.Adapter());
-  ScopedArenaSafeMap<uint32_t, MIR*>::iterator map_it;
-  for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-    Instruction::Code opcode = mir->dalvikInsn.opcode;
-    bool is_sub = true;
-    bool is_candidate_multiply = false;
-    switch (opcode) {
-      case Instruction::MUL_INT:
-      case Instruction::MUL_INT_2ADDR:
-        is_candidate_multiply = true;
-        break;
-      case Instruction::MUL_LONG:
-      case Instruction::MUL_LONG_2ADDR:
-        if (cu_->target64) {
-          is_candidate_multiply = true;
-        }
-        break;
-      case Instruction::ADD_INT:
-      case Instruction::ADD_INT_2ADDR:
-        is_sub = false;
-        FALLTHROUGH_INTENDED;
-      case Instruction::SUB_INT:
-      case Instruction::SUB_INT_2ADDR:
-        if (((map_it = ssa_mul_map.find(mir->ssa_rep->uses[0])) != ssa_mul_map.end()) && !is_sub) {
-          // a*b+c
-          CombineMultiplyAdd(map_it->second, mir, true /* product is the first addend */,
-                             false /* is_wide */, false /* is_sub */);
-          ssa_mul_map.erase(mir->ssa_rep->uses[0]);
-        } else if ((map_it = ssa_mul_map.find(mir->ssa_rep->uses[1])) != ssa_mul_map.end()) {
-          // c+a*b or c-a*b
-          CombineMultiplyAdd(map_it->second, mir, false /* product is the second addend */,
-                             false /* is_wide */, is_sub);
-          ssa_mul_map.erase(map_it);
-        }
-        break;
-      case Instruction::ADD_LONG:
-      case Instruction::ADD_LONG_2ADDR:
-        is_sub = false;
-        FALLTHROUGH_INTENDED;
-      case Instruction::SUB_LONG:
-      case Instruction::SUB_LONG_2ADDR:
-        if (!cu_->target64) {
-          break;
-        }
-        if ((map_it = ssa_mul_map.find(mir->ssa_rep->uses[0])) != ssa_mul_map.end() && !is_sub) {
-          // a*b+c
-          CombineMultiplyAdd(map_it->second, mir, true /* product is the first addend */,
-                             true /* is_wide */, false /* is_sub */);
-          ssa_mul_map.erase(map_it);
-        } else if ((map_it = ssa_mul_map.find(mir->ssa_rep->uses[2])) != ssa_mul_map.end()) {
-          // c+a*b or c-a*b
-          CombineMultiplyAdd(map_it->second, mir, false /* product is the second addend */,
-                             true /* is_wide */, is_sub);
-          ssa_mul_map.erase(map_it);
-        }
-        break;
-      default:
-        if (!ssa_mul_map.empty() && CanThrow(mir)) {
-          // Should not combine multiply and add MIRs across potential exception.
-          ssa_mul_map.clear();
-        }
-        break;
-    }
-
-    // Exclude the case when an MIR writes a vreg which is previous candidate multiply MIR's uses.
-    // It is because that current RA may allocate the same physical register to them. For this
-    // kind of cases, the multiplier has been updated, we should not use updated value to the
-    // multiply-add insn.
-    if (ssa_mul_map.size() > 0) {
-      for (auto it = ssa_mul_map.begin(); it != ssa_mul_map.end();) {
-        MIR* mul = it->second;
-        if (HasAntiDependency(mul, mir)) {
-          it = ssa_mul_map.erase(it);
-        } else {
-          ++it;
-        }
-      }
-    }
-
-    if (is_candidate_multiply &&
-        (GetRawUseCount(mir->ssa_rep->defs[0]) == 1) && (mir->next != nullptr)) {
-      ssa_mul_map.Put(mir->ssa_rep->defs[0], mir);
-    }
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
deleted file mode 100644
index a0cedff..0000000
--- a/compiler/dex/mir_optimization_test.cc
+++ /dev/null
@@ -1,1186 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vector>
-
-#include "base/logging.h"
-#include "dataflow_iterator.h"
-#include "dataflow_iterator-inl.h"
-#include "dex/compiler_ir.h"
-#include "dex/mir_field_info.h"
-#include "gtest/gtest.h"
-
-namespace art {
-
-class MirOptimizationTest : public testing::Test {
- protected:
-  struct BBDef {
-    static constexpr size_t kMaxSuccessors = 4;
-    static constexpr size_t kMaxPredecessors = 4;
-
-    BBType type;
-    size_t num_successors;
-    BasicBlockId successors[kMaxPredecessors];
-    size_t num_predecessors;
-    BasicBlockId predecessors[kMaxPredecessors];
-  };
-
-  struct MethodDef {
-    uint16_t method_idx;
-    uintptr_t declaring_dex_file;
-    uint16_t declaring_class_idx;
-    uint16_t declaring_method_idx;
-    InvokeType invoke_type;
-    InvokeType sharp_type;
-    bool is_referrers_class;
-    bool is_initialized;
-  };
-
-  struct MIRDef {
-    BasicBlockId bbid;
-    Instruction::Code opcode;
-    uint32_t field_or_method_info;
-    uint32_t vA;
-    uint32_t vB;
-    uint32_t vC;
-  };
-
-#define DEF_SUCC0() \
-    0u, { }
-#define DEF_SUCC1(s1) \
-    1u, { s1 }
-#define DEF_SUCC2(s1, s2) \
-    2u, { s1, s2 }
-#define DEF_SUCC3(s1, s2, s3) \
-    3u, { s1, s2, s3 }
-#define DEF_SUCC4(s1, s2, s3, s4) \
-    4u, { s1, s2, s3, s4 }
-#define DEF_PRED0() \
-    0u, { }
-#define DEF_PRED1(p1) \
-    1u, { p1 }
-#define DEF_PRED2(p1, p2) \
-    2u, { p1, p2 }
-#define DEF_PRED3(p1, p2, p3) \
-    3u, { p1, p2, p3 }
-#define DEF_PRED4(p1, p2, p3, p4) \
-    4u, { p1, p2, p3, p4 }
-#define DEF_BB(type, succ, pred) \
-    { type, succ, pred }
-
-#define DEF_SGET_SPUT(bb, opcode, vA, field_info) \
-    { bb, opcode, field_info, vA, 0u, 0u }
-#define DEF_IGET_IPUT(bb, opcode, vA, vB, field_info) \
-    { bb, opcode, field_info, vA, vB, 0u }
-#define DEF_AGET_APUT(bb, opcode, vA, vB, vC) \
-    { bb, opcode, 0u, vA, vB, vC }
-#define DEF_INVOKE(bb, opcode, vC, method_info) \
-    { bb, opcode, method_info, 0u, 0u, vC }
-#define DEF_OTHER0(bb, opcode) \
-    { bb, opcode, 0u, 0u, 0u, 0u }
-#define DEF_OTHER1(bb, opcode, vA) \
-    { bb, opcode, 0u, vA, 0u, 0u }
-#define DEF_OTHER2(bb, opcode, vA, vB) \
-    { bb, opcode, 0u, vA, vB, 0u }
-
-  void DoPrepareBasicBlocks(const BBDef* defs, size_t count) {
-    cu_.mir_graph->block_id_map_.clear();
-    cu_.mir_graph->block_list_.clear();
-    ASSERT_LT(3u, count);  // null, entry, exit and at least one bytecode block.
-    ASSERT_EQ(kNullBlock, defs[0].type);
-    ASSERT_EQ(kEntryBlock, defs[1].type);
-    ASSERT_EQ(kExitBlock, defs[2].type);
-    for (size_t i = 0u; i != count; ++i) {
-      const BBDef* def = &defs[i];
-      BasicBlock* bb = cu_.mir_graph->CreateNewBB(def->type);
-      if (def->num_successors <= 2) {
-        bb->successor_block_list_type = kNotUsed;
-        bb->fall_through = (def->num_successors >= 1) ? def->successors[0] : 0u;
-        bb->taken = (def->num_successors >= 2) ? def->successors[1] : 0u;
-      } else {
-        bb->successor_block_list_type = kPackedSwitch;
-        bb->fall_through = 0u;
-        bb->taken = 0u;
-        bb->successor_blocks.reserve(def->num_successors);
-        for (size_t j = 0u; j != def->num_successors; ++j) {
-          SuccessorBlockInfo* successor_block_info =
-              static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
-                                                               kArenaAllocSuccessors));
-          successor_block_info->block = j;
-          successor_block_info->key = 0u;  // Not used by class init check elimination.
-          bb->successor_blocks.push_back(successor_block_info);
-        }
-      }
-      bb->predecessors.assign(def->predecessors, def->predecessors + def->num_predecessors);
-      if (def->type == kDalvikByteCode || def->type == kEntryBlock || def->type == kExitBlock) {
-        bb->data_flow_info = static_cast<BasicBlockDataFlow*>(
-            cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo));
-      }
-    }
-    ASSERT_EQ(count, cu_.mir_graph->block_list_.size());
-    cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1];
-    ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type);
-    cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_[2];
-    ASSERT_EQ(kExitBlock, cu_.mir_graph->exit_block_->block_type);
-  }
-
-  template <size_t count>
-  void PrepareBasicBlocks(const BBDef (&defs)[count]) {
-    DoPrepareBasicBlocks(defs, count);
-  }
-
-  void PrepareSingleBlock() {
-    static const BBDef bbs[] = {
-        DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-        DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-        DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(3)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(1)),
-    };
-    PrepareBasicBlocks(bbs);
-  }
-
-  void PrepareDiamond() {
-    static const BBDef bbs[] = {
-        DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-        DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-        DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(4, 5)),
-    };
-    PrepareBasicBlocks(bbs);
-  }
-
-  void PrepareLoop() {
-    static const BBDef bbs[] = {
-        DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-        DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-        DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED2(3, 4)),  // "taken" loops to self.
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
-    };
-    PrepareBasicBlocks(bbs);
-  }
-
-  void PrepareNestedLoopsWhile_While() {
-    static const BBDef bbs[] = {
-        DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-        DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-        DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(8)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 8), DEF_PRED2(3, 7)),  // Outer while loop head.
-        DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 7), DEF_PRED2(4, 6)),  // Inner while loop head.
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(5)),        // "taken" loops to inner head.
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(5)),        // "taken" loops to outer head.
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
-    };
-    PrepareBasicBlocks(bbs);
-  }
-
-  void PrepareNestedLoopsWhile_WhileWhile() {
-    static const BBDef bbs[] = {
-        DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-        DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-        DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(10)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 10), DEF_PRED2(3, 9)),   // Outer while loop head.
-        DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 7), DEF_PRED2(4, 6)),    // Inner while loop head 1.
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(5)),          // Loops to inner head 1.
-        DEF_BB(kDalvikByteCode, DEF_SUCC2(8, 9), DEF_PRED2(5, 8)),    // Inner while loop head 2.
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(7), DEF_PRED1(7)),          // loops to inner head 2.
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(7)),          // loops to outer head.
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
-    };
-    PrepareBasicBlocks(bbs);
-  }
-
-  void PrepareNestedLoopsWhile_WhileWhile_WithExtraEdge() {
-    // Extra edge from the first inner loop body to second inner loop body (6u->8u).
-    static const BBDef bbs[] = {
-        DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-        DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-        DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(10)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 10), DEF_PRED2(3, 9)),   // Outer while loop head.
-        DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 7), DEF_PRED2(4, 6)),    // Inner while loop head 1.
-        DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 8), DEF_PRED1(5)),       // Loops to inner head 1.
-        DEF_BB(kDalvikByteCode, DEF_SUCC2(8, 9), DEF_PRED2(5, 8)),    // Inner while loop head 2.
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(7), DEF_PRED2(7, 6)),       // loops to inner head 2.
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(7)),          // loops to outer head.
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
-    };
-    PrepareBasicBlocks(bbs);
-  }
-
-  void PrepareCatch() {
-    static const BBDef bbs[] = {
-        DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-        DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-        DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),     // The top.
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // The throwing insn.
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // Catch handler.
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(4, 5)),  // The merged block.
-    };
-    PrepareBasicBlocks(bbs);
-    BasicBlock* catch_handler = cu_.mir_graph->GetBasicBlock(5u);
-    catch_handler->catch_entry = true;
-    // Add successor block info to the check block.
-    BasicBlock* check_bb = cu_.mir_graph->GetBasicBlock(3u);
-    check_bb->successor_block_list_type = kCatch;
-    SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
-        (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessors));
-    successor_block_info->block = catch_handler->id;
-    check_bb->successor_blocks.push_back(successor_block_info);
-  }
-
-  void DoPrepareMethods(const MethodDef* defs, size_t count) {
-    cu_.mir_graph->method_lowering_infos_.clear();
-    cu_.mir_graph->method_lowering_infos_.reserve(count);
-    for (size_t i = 0u; i != count; ++i) {
-      const MethodDef* def = &defs[i];
-      MirMethodLoweringInfo method_info(def->method_idx, def->invoke_type, false);
-      if (def->declaring_dex_file != 0u) {
-        method_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
-        method_info.declaring_class_idx_ = def->declaring_class_idx;
-        method_info.declaring_method_idx_ = def->declaring_method_idx;
-      }
-      ASSERT_EQ(def->invoke_type != kStatic, def->sharp_type != kStatic);
-      method_info.flags_ =
-          ((def->invoke_type == kStatic) ? MirMethodLoweringInfo::kFlagIsStatic : 0u) |
-          MirMethodLoweringInfo::kFlagFastPath |
-          (static_cast<uint16_t>(def->invoke_type) << MirMethodLoweringInfo::kBitInvokeTypeBegin) |
-          (static_cast<uint16_t>(def->sharp_type) << MirMethodLoweringInfo::kBitSharpTypeBegin) |
-          ((def->is_referrers_class) ? MirMethodLoweringInfo::kFlagIsReferrersClass : 0u) |
-          ((def->is_initialized == kStatic) ? MirMethodLoweringInfo::kFlagClassIsInitialized : 0u);
-      ASSERT_EQ(def->declaring_dex_file != 0u, method_info.IsResolved());
-      cu_.mir_graph->method_lowering_infos_.push_back(method_info);
-    }
-  }
-
-  template <size_t count>
-  void PrepareMethods(const MethodDef (&defs)[count]) {
-    DoPrepareMethods(defs, count);
-  }
-
-  void DoPrepareMIRs(const MIRDef* defs, size_t count) {
-    mir_count_ = count;
-    mirs_ = cu_.arena.AllocArray<MIR>(count, kArenaAllocMIR);
-    uint64_t merged_df_flags = 0u;
-    for (size_t i = 0u; i != count; ++i) {
-      const MIRDef* def = &defs[i];
-      MIR* mir = &mirs_[i];
-      mir->dalvikInsn.opcode = def->opcode;
-      ASSERT_LT(def->bbid, cu_.mir_graph->block_list_.size());
-      BasicBlock* bb = cu_.mir_graph->block_list_[def->bbid];
-      bb->AppendMIR(mir);
-      if (IsInstructionIGetOrIPut(def->opcode)) {
-        ASSERT_LT(def->field_or_method_info, cu_.mir_graph->ifield_lowering_infos_.size());
-        mir->meta.ifield_lowering_info = def->field_or_method_info;
-        ASSERT_EQ(cu_.mir_graph->ifield_lowering_infos_[def->field_or_method_info].MemAccessType(),
-                  IGetOrIPutMemAccessType(def->opcode));
-      } else if (IsInstructionSGetOrSPut(def->opcode)) {
-        ASSERT_LT(def->field_or_method_info, cu_.mir_graph->sfield_lowering_infos_.size());
-        mir->meta.sfield_lowering_info = def->field_or_method_info;
-        ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->field_or_method_info].MemAccessType(),
-                  SGetOrSPutMemAccessType(def->opcode));
-      } else if (IsInstructionInvoke(def->opcode)) {
-        ASSERT_LT(def->field_or_method_info, cu_.mir_graph->method_lowering_infos_.size());
-        mir->meta.method_lowering_info = def->field_or_method_info;
-      }
-      mir->dalvikInsn.vA = def->vA;
-      mir->dalvikInsn.vB = def->vB;
-      mir->dalvikInsn.vC = def->vC;
-      mir->ssa_rep = nullptr;
-      mir->offset = 2 * i;  // All insns need to be at least 2 code units long.
-      mir->optimization_flags = 0u;
-      merged_df_flags |= MIRGraph::GetDataFlowAttributes(def->opcode);
-    }
-    cu_.mir_graph->merged_df_flags_ = merged_df_flags;
-
-    code_item_ = static_cast<DexFile::CodeItem*>(
-        cu_.arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc));
-    memset(code_item_, 0, sizeof(DexFile::CodeItem));
-    code_item_->insns_size_in_code_units_ = 2u * count;
-    cu_.mir_graph->current_code_item_ = code_item_;
-  }
-
-  template <size_t count>
-  void PrepareMIRs(const MIRDef (&defs)[count]) {
-    DoPrepareMIRs(defs, count);
-  }
-
-  MirOptimizationTest()
-      : pool_(),
-        cu_(&pool_, kRuntimeISA, nullptr, nullptr),
-        mir_count_(0u),
-        mirs_(nullptr),
-        code_item_(nullptr) {
-    cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
-    cu_.access_flags = kAccStatic;  // Don't let "this" interfere with this test.
-  }
-
-  ArenaPool pool_;
-  CompilationUnit cu_;
-  size_t mir_count_;
-  MIR* mirs_;
-  DexFile::CodeItem* code_item_;
-};
-
-class ClassInitCheckEliminationTest : public MirOptimizationTest {
- protected:
-  struct SFieldDef {
-    uint16_t field_idx;
-    uintptr_t declaring_dex_file;
-    uint16_t declaring_class_idx;
-    uint16_t declaring_field_idx;
-    DexMemAccessType type;
-  };
-
-  void DoPrepareSFields(const SFieldDef* defs, size_t count) {
-    cu_.mir_graph->sfield_lowering_infos_.clear();
-    cu_.mir_graph->sfield_lowering_infos_.reserve(count);
-    for (size_t i = 0u; i != count; ++i) {
-      const SFieldDef* def = &defs[i];
-      MirSFieldLoweringInfo field_info(def->field_idx, def->type);
-      if (def->declaring_dex_file != 0u) {
-        field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
-        field_info.declaring_class_idx_ = def->declaring_class_idx;
-        field_info.declaring_field_idx_ = def->declaring_field_idx;
-        // We don't care about the volatile flag in these tests.
-      }
-      ASSERT_EQ(def->declaring_dex_file != 0u, field_info.IsResolved());
-      ASSERT_FALSE(field_info.IsClassInitialized());
-      cu_.mir_graph->sfield_lowering_infos_.push_back(field_info);
-    }
-  }
-
-  template <size_t count>
-  void PrepareSFields(const SFieldDef (&defs)[count]) {
-    DoPrepareSFields(defs, count);
-  }
-
-  void PerformClassInitCheckElimination() {
-    cu_.mir_graph->ComputeDFSOrders();
-    bool gate_result = cu_.mir_graph->EliminateClassInitChecksGate();
-    ASSERT_TRUE(gate_result);
-    RepeatingPreOrderDfsIterator iterator(cu_.mir_graph.get());
-    bool change = false;
-    for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
-      change = cu_.mir_graph->EliminateClassInitChecks(bb);
-    }
-    cu_.mir_graph->EliminateClassInitChecksEnd();
-  }
-
-  ClassInitCheckEliminationTest()
-      : MirOptimizationTest() {
-  }
-};
-
-class NullCheckEliminationTest : public MirOptimizationTest {
- protected:
-  struct IFieldDef {
-    uint16_t field_idx;
-    uintptr_t declaring_dex_file;
-    uint16_t declaring_class_idx;
-    uint16_t declaring_field_idx;
-    DexMemAccessType type;
-  };
-
-  void DoPrepareIFields(const IFieldDef* defs, size_t count) {
-    cu_.mir_graph->ifield_lowering_infos_.clear();
-    cu_.mir_graph->ifield_lowering_infos_.reserve(count);
-    for (size_t i = 0u; i != count; ++i) {
-      const IFieldDef* def = &defs[i];
-      MirIFieldLoweringInfo field_info(def->field_idx, def->type, false);
-      if (def->declaring_dex_file != 0u) {
-        field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
-        field_info.declaring_class_idx_ = def->declaring_class_idx;
-        field_info.declaring_field_idx_ = def->declaring_field_idx;
-        // We don't care about the volatile flag in these tests.
-      }
-      ASSERT_EQ(def->declaring_dex_file != 0u, field_info.IsResolved());
-      cu_.mir_graph->ifield_lowering_infos_.push_back(field_info);
-    }
-  }
-
-  template <size_t count>
-  void PrepareIFields(const IFieldDef (&defs)[count]) {
-    DoPrepareIFields(defs, count);
-  }
-
-  void PerformNullCheckElimination() {
-    // Make vregs in range [100, 1000) input registers, i.e. requiring a null check.
-    code_item_->registers_size_ = 1000;
-    code_item_->ins_size_ = 900;
-
-    cu_.mir_graph->ComputeDFSOrders();
-    bool gate_result = cu_.mir_graph->EliminateNullChecksGate();
-    ASSERT_TRUE(gate_result);
-    RepeatingPreOrderDfsIterator iterator(cu_.mir_graph.get());
-    bool change = false;
-    for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
-      change = cu_.mir_graph->EliminateNullChecks(bb);
-    }
-    cu_.mir_graph->EliminateNullChecksEnd();
-  }
-
-  NullCheckEliminationTest()
-      : MirOptimizationTest() {
-    static const MethodDef methods[] = {
-        { 0u, 1u, 0u, 0u, kDirect, kDirect, false, false },  // Dummy.
-    };
-    PrepareMethods(methods);
-  }
-};
-
-class SuspendCheckEliminationTest : public MirOptimizationTest {
- protected:
-  bool IsBackEdge(BasicBlockId branch_bb, BasicBlockId target_bb) {
-    BasicBlock* branch = cu_.mir_graph->GetBasicBlock(branch_bb);
-    return target_bb != NullBasicBlockId && cu_.mir_graph->IsBackEdge(branch, target_bb);
-  }
-
-  bool IsSuspendCheckEdge(BasicBlockId branch_bb, BasicBlockId target_bb) {
-    BasicBlock* branch = cu_.mir_graph->GetBasicBlock(branch_bb);
-    return cu_.mir_graph->IsSuspendCheckEdge(branch, target_bb);
-  }
-
-  void PerformSuspendCheckElimination() {
-    cu_.mir_graph->SSATransformationStart();
-    cu_.mir_graph->ComputeDFSOrders();
-    cu_.mir_graph->ComputeDominators();
-    cu_.mir_graph->ComputeTopologicalSortOrder();
-    cu_.mir_graph->SSATransformationEnd();
-
-    bool gate_result = cu_.mir_graph->EliminateSuspendChecksGate();
-    ASSERT_NE(gate_result, kLeafOptimization);
-    if (kLeafOptimization) {
-      // Even with kLeafOptimization on and Gate() refusing to allow SCE, we want
-      // to run the SCE test to avoid bitrot, so we need to initialize explicitly.
-      cu_.mir_graph->suspend_checks_in_loops_ =
-          cu_.mir_graph->arena_->AllocArray<uint32_t>(cu_.mir_graph->GetNumBlocks(),
-                                                      kArenaAllocMisc);
-    }
-
-    TopologicalSortIterator iterator(cu_.mir_graph.get());
-    bool change = false;
-    for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
-      change = cu_.mir_graph->EliminateSuspendChecks(bb);
-    }
-  }
-
-  SuspendCheckEliminationTest()
-      : MirOptimizationTest() {
-    static const MethodDef methods[] = {
-        { 0u, 1u, 0u, 0u, kDirect, kDirect, false, false },  // Dummy.
-    };
-    PrepareMethods(methods);
-  }
-};
-
-TEST_F(ClassInitCheckEliminationTest, SingleBlock) {
-  static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
-      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
-      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
-      { 3u, 1u, 3u, 3u, kDexMemAccessWord },  // Same declaring class as sfield[4].
-      { 4u, 1u, 3u, 4u, kDexMemAccessWord },  // Same declaring class as sfield[3].
-      { 5u, 0u, 0u, 0u, kDexMemAccessWord },  // Unresolved.
-  };
-  static const MIRDef mirs[] = {
-      DEF_SGET_SPUT(3u, Instruction::SPUT, 0u, 5u),  // Unresolved.
-      DEF_SGET_SPUT(3u, Instruction::SPUT, 0u, 0u),
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 1u),
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 2u),
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 5u),  // Unresolved.
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 0u),
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 1u),
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 2u),
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 5u),  // Unresolved.
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 3u),
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 4u),
-  };
-  static const bool expected_ignore_clinit_check[] = {
-      false, false, false, false, true, true, true, true, true, false, true
-  };
-
-  PrepareSFields(sfields);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformClassInitCheckElimination();
-  ASSERT_EQ(arraysize(expected_ignore_clinit_check), mir_count_);
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(expected_ignore_clinit_check[i],
-              (mirs_[i].optimization_flags & MIR_CLASS_IS_INITIALIZED) != 0) << i;
-    EXPECT_EQ(expected_ignore_clinit_check[i],
-              (mirs_[i].optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) != 0) << i;
-  }
-}
-
-TEST_F(ClassInitCheckEliminationTest, SingleBlockWithInvokes) {
-  static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
-      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
-      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
-  };
-  static const MethodDef methods[] = {
-      { 0u, 1u, 0u, 0u, kStatic, kStatic, false, false },
-      { 1u, 1u, 1u, 1u, kStatic, kStatic, false, false },
-      { 2u, 1u, 2u, 2u, kStatic, kStatic, false, false },
-  };
-  static const MIRDef mirs[] = {
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 0u),
-      DEF_INVOKE(3u, Instruction::INVOKE_STATIC, 0u /* dummy */, 0u),
-      DEF_INVOKE(3u, Instruction::INVOKE_STATIC, 0u /* dummy */, 1u),
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 1u),
-      DEF_INVOKE(3u, Instruction::INVOKE_STATIC, 0u /* dummy */, 2u),
-      DEF_INVOKE(3u, Instruction::INVOKE_STATIC, 0u /* dummy */, 2u),
-  };
-  static const bool expected_class_initialized[] = {
-      false, true, false, true, false, true
-  };
-  static const bool expected_class_in_dex_cache[] = {
-      false, false, false, false, false, false
-  };
-
-  PrepareSFields(sfields);
-  PrepareMethods(methods);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformClassInitCheckElimination();
-  ASSERT_EQ(arraysize(expected_class_initialized), mir_count_);
-  ASSERT_EQ(arraysize(expected_class_in_dex_cache), mir_count_);
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(expected_class_initialized[i],
-              (mirs_[i].optimization_flags & MIR_CLASS_IS_INITIALIZED) != 0) << i;
-    EXPECT_EQ(expected_class_in_dex_cache[i],
-              (mirs_[i].optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) != 0) << i;
-  }
-}
-
-TEST_F(ClassInitCheckEliminationTest, Diamond) {
-  static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
-      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
-      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
-      { 3u, 1u, 3u, 3u, kDexMemAccessWord },
-      { 4u, 1u, 4u, 4u, kDexMemAccessWord },
-      { 5u, 1u, 5u, 5u, kDexMemAccessWord },
-      { 6u, 1u, 6u, 6u, kDexMemAccessWord },
-      { 7u, 1u, 7u, 7u, kDexMemAccessWord },
-      { 8u, 1u, 8u, 8u, kDexMemAccessWord },   // Same declaring class as sfield[9].
-      { 9u, 1u, 8u, 9u, kDexMemAccessWord },   // Same declaring class as sfield[8].
-      { 10u, 0u, 0u, 0u, kDexMemAccessWord },  // Unresolved.
-  };
-  static const MIRDef mirs[] = {
-      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 10u),  // Unresolved.
-      DEF_SGET_SPUT(3u, Instruction::SPUT, 0u, 10u),  // Unresolved.
-      DEF_SGET_SPUT(3u, Instruction::SPUT, 0u, 0u),
-      DEF_SGET_SPUT(6u, Instruction::SGET, 0u, 0u),  // Eliminated (BB #3 dominates #6).
-      DEF_SGET_SPUT(4u, Instruction::SPUT, 0u, 1u),
-      DEF_SGET_SPUT(6u, Instruction::SGET, 0u, 1u),  // Not eliminated (BB #4 doesn't dominate #6).
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 2u),
-      DEF_SGET_SPUT(4u, Instruction::SGET, 0u, 2u),  // Eliminated (BB #3 dominates #4).
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 3u),
-      DEF_SGET_SPUT(5u, Instruction::SGET, 0u, 3u),  // Eliminated (BB #3 dominates #5).
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 4u),
-      DEF_SGET_SPUT(6u, Instruction::SGET, 0u, 4u),  // Eliminated (BB #3 dominates #6).
-      DEF_SGET_SPUT(4u, Instruction::SGET, 0u, 5u),
-      DEF_SGET_SPUT(6u, Instruction::SGET, 0u, 5u),  // Not eliminated (BB #4 doesn't dominate #6).
-      DEF_SGET_SPUT(5u, Instruction::SGET, 0u, 6u),
-      DEF_SGET_SPUT(6u, Instruction::SGET, 0u, 6u),  // Not eliminated (BB #5 doesn't dominate #6).
-      DEF_SGET_SPUT(4u, Instruction::SGET, 0u, 7u),
-      DEF_SGET_SPUT(5u, Instruction::SGET, 0u, 7u),
-      DEF_SGET_SPUT(6u, Instruction::SGET, 0u, 7u),  // Eliminated (initialized in both #3 and #4).
-      DEF_SGET_SPUT(4u, Instruction::SGET, 0u, 8u),
-      DEF_SGET_SPUT(5u, Instruction::SGET, 0u, 9u),
-      DEF_SGET_SPUT(6u, Instruction::SGET, 0u, 8u),  // Eliminated (with sfield[9] in BB #5).
-      DEF_SGET_SPUT(6u, Instruction::SPUT, 0u, 9u),  // Eliminated (with sfield[8] in BB #4).
-  };
-  static const bool expected_ignore_clinit_check[] = {
-      false, true,          // Unresolved: sfield[10]
-      false, true,          // sfield[0]
-      false, false,         // sfield[1]
-      false, true,          // sfield[2]
-      false, true,          // sfield[3]
-      false, true,          // sfield[4]
-      false, false,         // sfield[5]
-      false, false,         // sfield[6]
-      false, false, true,   // sfield[7]
-      false, false, true, true,  // sfield[8], sfield[9]
-  };
-
-  PrepareSFields(sfields);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  PerformClassInitCheckElimination();
-  ASSERT_EQ(arraysize(expected_ignore_clinit_check), mir_count_);
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(expected_ignore_clinit_check[i],
-              (mirs_[i].optimization_flags & MIR_CLASS_IS_INITIALIZED) != 0) << i;
-    EXPECT_EQ(expected_ignore_clinit_check[i],
-              (mirs_[i].optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) != 0) << i;
-  }
-}
-
-TEST_F(ClassInitCheckEliminationTest, DiamondWithInvokes) {
-  static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
-      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
-      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
-      { 3u, 1u, 3u, 3u, kDexMemAccessWord },
-      { 4u, 1u, 4u, 4u, kDexMemAccessWord },
-  };
-  static const MethodDef methods[] = {
-      { 0u, 1u, 0u, 0u, kStatic, kStatic, false, false },
-      { 1u, 1u, 1u, 1u, kStatic, kStatic, false, false },
-      { 2u, 1u, 2u, 2u, kStatic, kStatic, false, false },
-      { 3u, 1u, 3u, 3u, kStatic, kStatic, false, false },
-      { 4u, 1u, 4u, 4u, kStatic, kStatic, false, false },
-  };
-  static const MIRDef mirs[] = {
-      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
-      DEF_SGET_SPUT(3u, Instruction::SPUT, 0u, 0u),
-      DEF_INVOKE(6u, Instruction::INVOKE_STATIC, 0u /* dummy */, 0u),
-      DEF_INVOKE(3u, Instruction::INVOKE_STATIC, 0u /* dummy */, 1u),
-      DEF_SGET_SPUT(6u, Instruction::SPUT, 0u, 1u),
-      DEF_SGET_SPUT(4u, Instruction::SGET, 0u, 2u),
-      DEF_INVOKE(5u, Instruction::INVOKE_STATIC, 0u /* dummy */, 2u),
-      DEF_SGET_SPUT(6u, Instruction::SPUT, 0u, 2u),
-      DEF_INVOKE(4u, Instruction::INVOKE_STATIC, 0u /* dummy */, 3u),
-      DEF_SGET_SPUT(5u, Instruction::SPUT, 0u, 3u),
-      DEF_SGET_SPUT(6u, Instruction::SGET, 0u, 3u),
-      DEF_SGET_SPUT(4u, Instruction::SPUT, 0u, 4u),
-      DEF_SGET_SPUT(5u, Instruction::SGET, 0u, 4u),
-      DEF_INVOKE(6u, Instruction::INVOKE_STATIC, 0u /* dummy */, 4u),
-  };
-  static const bool expected_class_initialized[] = {
-      false, true,    // BB #3 SPUT, BB#6 INVOKE_STATIC
-      false, true,    // BB #3 INVOKE_STATIC, BB#6 SPUT
-      false, false, true,   // BB #4 SGET, BB #5 INVOKE_STATIC, BB #6 SPUT
-      false, false, true,   // BB #4 INVOKE_STATIC, BB #5 SPUT, BB #6 SGET
-      false, false, true,   // BB #4 SPUT, BB #5 SGET, BB #6 INVOKE_STATIC
-  };
-  static const bool expected_class_in_dex_cache[] = {
-      false, false,   // BB #3 SPUT, BB#6 INVOKE_STATIC
-      false, false,   // BB #3 INVOKE_STATIC, BB#6 SPUT
-      false, false, false,  // BB #4 SGET, BB #5 INVOKE_STATIC, BB #6 SPUT
-      false, false, false,  // BB #4 INVOKE_STATIC, BB #5 SPUT, BB #6 SGET
-      false, false, false,  // BB #4 SPUT, BB #5 SGET, BB #6 INVOKE_STATIC
-  };
-
-  PrepareSFields(sfields);
-  PrepareMethods(methods);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  PerformClassInitCheckElimination();
-  ASSERT_EQ(arraysize(expected_class_initialized), mir_count_);
-  ASSERT_EQ(arraysize(expected_class_in_dex_cache), mir_count_);
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(expected_class_initialized[i],
-              (mirs_[i].optimization_flags & MIR_CLASS_IS_INITIALIZED) != 0) << i;
-    EXPECT_EQ(expected_class_in_dex_cache[i],
-              (mirs_[i].optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) != 0) << i;
-  }
-}
-
-TEST_F(ClassInitCheckEliminationTest, Loop) {
-  static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
-      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
-      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 0u),
-      DEF_SGET_SPUT(4u, Instruction::SGET, 0u, 0u),  // Eliminated.
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 1u),
-      DEF_SGET_SPUT(5u, Instruction::SGET, 0u, 1u),  // Eliminated.
-      DEF_SGET_SPUT(4u, Instruction::SGET, 0u, 2u),
-      DEF_SGET_SPUT(5u, Instruction::SGET, 0u, 2u),  // Eliminated.
-  };
-  static const bool expected_ignore_clinit_check[] = {
-      false, true, false, true, false, true,
-  };
-
-  PrepareSFields(sfields);
-  PrepareLoop();
-  PrepareMIRs(mirs);
-  PerformClassInitCheckElimination();
-  ASSERT_EQ(arraysize(expected_ignore_clinit_check), mir_count_);
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(expected_ignore_clinit_check[i],
-              (mirs_[i].optimization_flags & MIR_CLASS_IS_INITIALIZED) != 0) << i;
-    EXPECT_EQ(expected_ignore_clinit_check[i],
-              (mirs_[i].optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) != 0) << i;
-  }
-}
-
-TEST_F(ClassInitCheckEliminationTest, LoopWithInvokes) {
-  static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
-  };
-  static const MethodDef methods[] = {
-      { 0u, 1u, 0u, 0u, kStatic, kStatic, false, false },
-      { 1u, 1u, 1u, 1u, kStatic, kStatic, false, false },
-      { 2u, 1u, 2u, 2u, kStatic, kStatic, false, false },
-  };
-  static const MIRDef mirs[] = {
-      DEF_INVOKE(3u, Instruction::INVOKE_STATIC, 0u /* dummy */, 0u),
-      DEF_INVOKE(4u, Instruction::INVOKE_STATIC, 0u /* dummy */, 0u),
-      DEF_INVOKE(3u, Instruction::INVOKE_STATIC, 0u /* dummy */, 1u),
-      DEF_INVOKE(5u, Instruction::INVOKE_STATIC, 0u /* dummy */, 1u),
-      DEF_INVOKE(4u, Instruction::INVOKE_STATIC, 0u /* dummy */, 2u),
-      DEF_INVOKE(5u, Instruction::INVOKE_STATIC, 0u /* dummy */, 2u),
-      DEF_SGET_SPUT(5u, Instruction::SGET, 0u, 0u),
-  };
-  static const bool expected_class_initialized[] = {
-      false, true, false, true, false, true, true,
-  };
-  static const bool expected_class_in_dex_cache[] = {
-      false, false, false, false, false, false, false,
-  };
-
-  PrepareSFields(sfields);
-  PrepareMethods(methods);
-  PrepareLoop();
-  PrepareMIRs(mirs);
-  PerformClassInitCheckElimination();
-  ASSERT_EQ(arraysize(expected_class_initialized), mir_count_);
-  ASSERT_EQ(arraysize(expected_class_in_dex_cache), mir_count_);
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(expected_class_initialized[i],
-              (mirs_[i].optimization_flags & MIR_CLASS_IS_INITIALIZED) != 0) << i;
-    EXPECT_EQ(expected_class_in_dex_cache[i],
-              (mirs_[i].optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) != 0) << i;
-  }
-}
-
-TEST_F(ClassInitCheckEliminationTest, Catch) {
-  static const SFieldDef sfields[] = {
-      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
-      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
-      { 2u, 1u, 2u, 2u, kDexMemAccessWord },
-      { 3u, 1u, 3u, 3u, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 0u),  // Before the exception edge.
-      DEF_SGET_SPUT(3u, Instruction::SGET, 0u, 1u),  // Before the exception edge.
-      DEF_SGET_SPUT(4u, Instruction::SGET, 0u, 2u),  // After the exception edge.
-      DEF_SGET_SPUT(4u, Instruction::SGET, 0u, 3u),  // After the exception edge.
-      DEF_SGET_SPUT(5u, Instruction::SGET, 0u, 0u),  // In catch handler; eliminated.
-      DEF_SGET_SPUT(5u, Instruction::SGET, 0u, 2u),  // In catch handler; not eliminated.
-      DEF_SGET_SPUT(6u, Instruction::SGET, 0u, 0u),  // Class init check eliminated.
-      DEF_SGET_SPUT(6u, Instruction::SGET, 0u, 1u),  // Class init check eliminated.
-      DEF_SGET_SPUT(6u, Instruction::SGET, 0u, 2u),  // Class init check eliminated.
-      DEF_SGET_SPUT(6u, Instruction::SGET, 0u, 3u),  // Class init check not eliminated.
-  };
-  static const bool expected_ignore_clinit_check[] = {
-      false, false, false, false, true, false, true, true, true, false
-  };
-
-  PrepareSFields(sfields);
-  PrepareCatch();
-  PrepareMIRs(mirs);
-  PerformClassInitCheckElimination();
-  ASSERT_EQ(arraysize(expected_ignore_clinit_check), mir_count_);
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(expected_ignore_clinit_check[i],
-              (mirs_[i].optimization_flags & MIR_CLASS_IS_INITIALIZED) != 0) << i;
-    EXPECT_EQ(expected_ignore_clinit_check[i],
-              (mirs_[i].optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) != 0) << i;
-  }
-}
-
-TEST_F(NullCheckEliminationTest, SingleBlock) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
-      { 1u, 1u, 0u, 1u, kDexMemAccessWord },
-      { 2u, 1u, 0u, 2u, kDexMemAccessObject },
-  };
-  static const MIRDef mirs[] = {
-      DEF_IGET_IPUT(3u, Instruction::IGET_OBJECT, 0u, 100u, 2u),
-      DEF_IGET_IPUT(3u, Instruction::IGET, 1u, 0u, 1u),
-      DEF_IGET_IPUT(3u, Instruction::IGET_OBJECT, 2u, 100u, 2u),  // Differs from 0u (no LVN here).
-      DEF_IGET_IPUT(3u, Instruction::IGET, 3u, 2u, 1u),
-      DEF_IGET_IPUT(3u, Instruction::IGET, 4u, 101u, 0u),
-      DEF_IGET_IPUT(3u, Instruction::IGET, 5u, 102u, 0u),
-      DEF_IGET_IPUT(3u, Instruction::IGET, 6u, 103u, 0u),
-      DEF_IGET_IPUT(3u, Instruction::IGET, 7u, 103u, 1u),
-      DEF_IGET_IPUT(3u, Instruction::IPUT, 8u, 104u, 0u),
-      DEF_IGET_IPUT(3u, Instruction::IPUT, 9u, 104u, 1u),
-      DEF_IGET_IPUT(3u, Instruction::IGET, 10u, 105u, 0u),
-      DEF_IGET_IPUT(3u, Instruction::IPUT, 11u, 105u, 1u),
-      DEF_IGET_IPUT(3u, Instruction::IPUT, 12u, 106u, 0u),
-      DEF_IGET_IPUT(3u, Instruction::IGET, 13u, 106u, 1u),
-      DEF_INVOKE(3u, Instruction::INVOKE_DIRECT, 107, 0u /* dummy */),
-      DEF_IGET_IPUT(3u, Instruction::IGET, 15u, 107u, 1u),
-      DEF_IGET_IPUT(3u, Instruction::IGET, 16u, 108u, 0u),
-      DEF_INVOKE(3u, Instruction::INVOKE_DIRECT, 108, 0u /* dummy */),
-      DEF_AGET_APUT(3u, Instruction::AGET, 18u, 109u, 110u),
-      DEF_AGET_APUT(3u, Instruction::APUT, 19u, 109u, 111u),
-      DEF_OTHER2(3u, Instruction::ARRAY_LENGTH, 20u, 112u),
-      DEF_AGET_APUT(3u, Instruction::AGET, 21u, 112u, 113u),
-      DEF_OTHER1(3u, Instruction::MONITOR_ENTER, 114u),
-      DEF_OTHER1(3u, Instruction::MONITOR_EXIT, 114u),
-  };
-  static const bool expected_ignore_null_check[] = {
-      false, false, true, false /* Not doing LVN. */,
-      false, true /* Set before running NCE. */,
-      false, true,  // IGET, IGET
-      false, true,  // IPUT, IPUT
-      false, true,  // IGET, IPUT
-      false, true,  // IPUT, IGET
-      false, true,  // INVOKE, IGET
-      false, true,  // IGET, INVOKE
-      false, true,  // AGET, APUT
-      false, true,  // ARRAY_LENGTH, AGET
-      false, true,  // MONITOR_ENTER, MONITOR_EXIT
-  };
-
-  PrepareIFields(ifields);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-
-  // Mark IGET 5u as null-checked to test that NCE doesn't clear this flag.
-  mirs_[5u].optimization_flags |= MIR_IGNORE_NULL_CHECK;
-
-  PerformNullCheckElimination();
-  ASSERT_EQ(arraysize(expected_ignore_null_check), mir_count_);
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(expected_ignore_null_check[i],
-              (mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) << i;
-  }
-}
-
-TEST_F(NullCheckEliminationTest, Diamond) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
-      { 1u, 1u, 0u, 1u, kDexMemAccessWord },
-      { 2u, 1u, 0u, 2u, kDexMemAccessObject },  // int[].
-  };
-  static const MIRDef mirs[] = {
-      // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
-      DEF_IGET_IPUT(3u, Instruction::IPUT, 0u, 100u, 0u),
-      DEF_IGET_IPUT(6u, Instruction::IGET, 1u, 100u, 1u),  // Eliminated (BB #3 dominates #6).
-      DEF_IGET_IPUT(3u, Instruction::IGET, 2u, 101u, 0u),
-      DEF_IGET_IPUT(4u, Instruction::IPUT, 3u, 101u, 0u),  // Eliminated (BB #3 dominates #4).
-      DEF_IGET_IPUT(3u, Instruction::IGET, 4u, 102u, 0u),
-      DEF_IGET_IPUT(5u, Instruction::IPUT, 5u, 102u, 1u),  // Eliminated (BB #3 dominates #5).
-      DEF_IGET_IPUT(4u, Instruction::IPUT, 6u, 103u, 0u),
-      DEF_IGET_IPUT(6u, Instruction::IPUT, 7u, 103u, 1u),  // Not eliminated (going through BB #5).
-      DEF_IGET_IPUT(5u, Instruction::IGET, 8u, 104u, 1u),
-      DEF_IGET_IPUT(6u, Instruction::IGET, 9u, 104u, 0u),  // Not eliminated (going through BB #4).
-      DEF_INVOKE(4u, Instruction::INVOKE_DIRECT, 105u, 0u /* dummy */),
-      DEF_IGET_IPUT(5u, Instruction::IGET, 11u, 105u, 1u),
-      DEF_IGET_IPUT(6u, Instruction::IPUT, 12u, 105u, 0u),  // Eliminated.
-      DEF_IGET_IPUT(3u, Instruction::IGET_OBJECT, 13u, 106u, 2u),
-      DEF_OTHER1(3u, Instruction::IF_EQZ, 13u),            // Last insn in the BB #3.
-      DEF_OTHER2(5u, Instruction::NEW_ARRAY, 13u, 107u),
-      DEF_AGET_APUT(6u, Instruction::AGET, 16u, 13u, 108u),  // Eliminated.
-  };
-  static const bool expected_ignore_null_check[] = {
-      false, true,   // BB #3 IPUT, BB #6 IGET
-      false, true,   // BB #3 IGET, BB #4 IPUT
-      false, true,   // BB #3 IGET, BB #5 IPUT
-      false, false,  // BB #4 IPUT, BB #6 IPUT
-      false, false,  // BB #5 IGET, BB #6 IGET
-      false, false, true,  // BB #4 INVOKE, BB #5 IGET, BB #6 IPUT
-      false, false,  // BB #3 IGET_OBJECT & IF_EQZ
-      false, true,   // BB #5 NEW_ARRAY, BB #6 AGET
-  };
-
-  PrepareIFields(ifields);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  PerformNullCheckElimination();
-  ASSERT_EQ(arraysize(expected_ignore_null_check), mir_count_);
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(expected_ignore_null_check[i],
-              (mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) << i;
-  }
-}
-
-TEST_F(NullCheckEliminationTest, Loop) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
-      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_IGET_IPUT(3u, Instruction::IGET, 0u, 100u, 0u),
-      DEF_IGET_IPUT(4u, Instruction::IGET, 1u, 101u, 0u),
-      DEF_IGET_IPUT(5u, Instruction::IGET, 2u, 100u, 1u),  // Eliminated.
-      DEF_IGET_IPUT(5u, Instruction::IGET, 3u, 101u, 1u),  // Eliminated.
-      DEF_IGET_IPUT(3u, Instruction::IGET, 4u, 102u, 0u),
-      DEF_IGET_IPUT(4u, Instruction::IGET, 5u, 102u, 1u),  // Not eliminated (MOVE_OBJECT_16).
-      DEF_OTHER2(4u, Instruction::MOVE_OBJECT_16, 102u, 103u),
-  };
-  static const bool expected_ignore_null_check[] = {
-      false, false, true, true,
-      false, false, false,
-  };
-
-  PrepareIFields(ifields);
-  PrepareLoop();
-  PrepareMIRs(mirs);
-  PerformNullCheckElimination();
-  ASSERT_EQ(arraysize(expected_ignore_null_check), mir_count_);
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(expected_ignore_null_check[i],
-              (mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) << i;
-  }
-}
-
-TEST_F(NullCheckEliminationTest, Catch) {
-  static const IFieldDef ifields[] = {
-      { 0u, 1u, 0u, 0u, kDexMemAccessWord },
-      { 1u, 1u, 1u, 1u, kDexMemAccessWord },
-  };
-  static const MIRDef mirs[] = {
-      DEF_IGET_IPUT(3u, Instruction::IGET, 0u, 100u, 0u),  // Before the exception edge.
-      DEF_IGET_IPUT(3u, Instruction::IGET, 1u, 101u, 0u),  // Before the exception edge.
-      DEF_IGET_IPUT(4u, Instruction::IGET, 2u, 102u, 0u),  // After the exception edge.
-      DEF_IGET_IPUT(4u, Instruction::IGET, 3u, 103u, 0u),  // After the exception edge.
-      DEF_IGET_IPUT(5u, Instruction::IGET, 4u, 100u, 1u),  // In catch handler; eliminated.
-      DEF_IGET_IPUT(5u, Instruction::IGET, 5u, 102u, 1u),  // In catch handler; not eliminated.
-      DEF_IGET_IPUT(6u, Instruction::IGET, 6u, 100u, 0u),  // Null check eliminated.
-      DEF_IGET_IPUT(6u, Instruction::IGET, 6u, 101u, 1u),  // Null check eliminated.
-      DEF_IGET_IPUT(6u, Instruction::IGET, 6u, 102u, 0u),  // Null check eliminated.
-      DEF_IGET_IPUT(6u, Instruction::IGET, 6u, 103u, 1u),  // Null check not eliminated.
-  };
-  static const bool expected_ignore_null_check[] = {
-      false, false, false, false, true, false, true, true, true, false
-  };
-
-  PrepareIFields(ifields);
-  PrepareCatch();
-  PrepareMIRs(mirs);
-  PerformNullCheckElimination();
-  ASSERT_EQ(arraysize(expected_ignore_null_check), mir_count_);
-  for (size_t i = 0u; i != arraysize(mirs); ++i) {
-    EXPECT_EQ(expected_ignore_null_check[i],
-              (mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) << i;
-  }
-}
-
-TEST_F(SuspendCheckEliminationTest, LoopNoElimination) {
-  static const MIRDef mirs[] = {
-    DEF_INVOKE(3u, Instruction::INVOKE_STATIC, 0u, 0u),  // Force the pass to run.
-    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge back.
-  };
-
-  PrepareLoop();
-  PrepareMIRs(mirs);
-  PerformSuspendCheckElimination();
-  ASSERT_TRUE(IsBackEdge(4u, 4u));
-  EXPECT_TRUE(IsSuspendCheckEdge(4u, 4u));  // Suspend point on loop to self.
-}
-
-TEST_F(SuspendCheckEliminationTest, LoopElimination) {
-  static const MIRDef mirs[] = {
-    DEF_INVOKE(4u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in the loop.
-    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge back.
-  };
-
-  PrepareLoop();
-  PrepareMIRs(mirs);
-  PerformSuspendCheckElimination();
-  ASSERT_TRUE(IsBackEdge(4u, 4u));
-  EXPECT_FALSE(IsSuspendCheckEdge(4u, 4u));  // No suspend point on loop to self.
-}
-
-TEST_F(SuspendCheckEliminationTest, While_While_NoElimination) {
-  static const MIRDef mirs[] = {
-    DEF_INVOKE(3u, Instruction::INVOKE_STATIC, 0u, 0u),  // Force the pass to run.
-    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
-    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop.
-    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
-    DEF_OTHER0(7u, Instruction::GOTO),                   // Edge back to outer loop head.
-  };
-
-  PrepareNestedLoopsWhile_While();
-  PrepareMIRs(mirs);
-  PerformSuspendCheckElimination();
-  ASSERT_TRUE(IsBackEdge(6u, 5u));
-  EXPECT_TRUE(IsSuspendCheckEdge(6u, 5u));
-  ASSERT_TRUE(IsBackEdge(7u, 4u));
-  EXPECT_TRUE(IsSuspendCheckEdge(7u, 4u));
-}
-
-TEST_F(SuspendCheckEliminationTest, While_While_InvokeInOuterLoopHead) {
-  static const MIRDef mirs[] = {
-    DEF_INVOKE(4u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in outer loop head.
-    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
-    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop.
-    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
-    DEF_OTHER0(7u, Instruction::GOTO),                   // Edge back to outer loop head.
-  };
-
-  PrepareNestedLoopsWhile_While();
-  PrepareMIRs(mirs);
-  PerformSuspendCheckElimination();
-  ASSERT_TRUE(IsBackEdge(6u, 5u));
-  EXPECT_TRUE(IsSuspendCheckEdge(6u, 5u));
-  ASSERT_TRUE(IsBackEdge(7u, 4u));
-  EXPECT_FALSE(IsSuspendCheckEdge(7u, 4u));
-}
-
-TEST_F(SuspendCheckEliminationTest, While_While_InvokeInOuterLoopBody) {
-  static const MIRDef mirs[] = {
-    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
-    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop.
-    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
-    DEF_INVOKE(7u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in outer loop body.
-    DEF_OTHER0(7u, Instruction::GOTO),                   // Edge back to outer loop head.
-  };
-
-  PrepareNestedLoopsWhile_While();
-  PrepareMIRs(mirs);
-  PerformSuspendCheckElimination();
-  ASSERT_TRUE(IsBackEdge(6u, 5u));
-  EXPECT_TRUE(IsSuspendCheckEdge(6u, 5u));
-  ASSERT_TRUE(IsBackEdge(7u, 4u));
-  EXPECT_FALSE(IsSuspendCheckEdge(7u, 4u));
-}
-
-TEST_F(SuspendCheckEliminationTest, While_While_InvokeInInnerLoopHead) {
-  static const MIRDef mirs[] = {
-    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
-    DEF_INVOKE(5u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in inner loop head.
-    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop.
-    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
-    DEF_OTHER0(7u, Instruction::GOTO),                   // Edge back to outer loop head.
-  };
-
-  PrepareNestedLoopsWhile_While();
-  PrepareMIRs(mirs);
-  PerformSuspendCheckElimination();
-  ASSERT_TRUE(IsBackEdge(6u, 5u));
-  EXPECT_FALSE(IsSuspendCheckEdge(6u, 5u));
-  ASSERT_TRUE(IsBackEdge(7u, 4u));
-  EXPECT_FALSE(IsSuspendCheckEdge(7u, 4u));
-}
-
-TEST_F(SuspendCheckEliminationTest, While_While_InvokeInInnerLoopBody) {
-  static const MIRDef mirs[] = {
-    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
-    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop.
-    DEF_INVOKE(6u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in inner loop body.
-    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
-    DEF_OTHER0(7u, Instruction::GOTO),                   // Edge back to outer loop head.
-  };
-
-  PrepareNestedLoopsWhile_While();
-  PrepareMIRs(mirs);
-  PerformSuspendCheckElimination();
-  ASSERT_TRUE(IsBackEdge(6u, 5u));
-  EXPECT_FALSE(IsSuspendCheckEdge(6u, 5u));
-  ASSERT_TRUE(IsBackEdge(7u, 4u));
-  EXPECT_TRUE(IsSuspendCheckEdge(7u, 4u));
-}
-
-TEST_F(SuspendCheckEliminationTest, While_WhileWhile_InvokeInFirstInnerLoopHead) {
-  static const MIRDef mirs[] = {
-    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
-    DEF_INVOKE(5u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in first inner loop head.
-    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 1.
-    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
-    DEF_OTHER1(7u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 2.
-    DEF_OTHER0(8u, Instruction::GOTO),                   // Edge back to inner loop 2 head.
-    DEF_OTHER0(9u, Instruction::GOTO),                   // Edge back to outer loop head.
-  };
-
-  PrepareNestedLoopsWhile_WhileWhile();
-  PrepareMIRs(mirs);
-  PerformSuspendCheckElimination();
-  ASSERT_TRUE(IsBackEdge(6u, 5u));
-  EXPECT_FALSE(IsSuspendCheckEdge(6u, 5u));
-  ASSERT_TRUE(IsBackEdge(8u, 7u));
-  EXPECT_TRUE(IsSuspendCheckEdge(8u, 7u));
-  ASSERT_TRUE(IsBackEdge(9u, 4u));
-  EXPECT_FALSE(IsSuspendCheckEdge(9u, 4u));
-}
-
-TEST_F(SuspendCheckEliminationTest, While_WhileWhile_InvokeInFirstInnerLoopBody) {
-  static const MIRDef mirs[] = {
-    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
-    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 1.
-    DEF_INVOKE(6u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in first inner loop body.
-    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
-    DEF_OTHER1(7u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 2.
-    DEF_OTHER0(8u, Instruction::GOTO),                   // Edge back to inner loop 2 head.
-    DEF_OTHER0(9u, Instruction::GOTO),                   // Edge back to outer loop head.
-  };
-
-  PrepareNestedLoopsWhile_WhileWhile();
-  PrepareMIRs(mirs);
-  PerformSuspendCheckElimination();
-  ASSERT_TRUE(IsBackEdge(6u, 5u));
-  EXPECT_FALSE(IsSuspendCheckEdge(6u, 5u));
-  ASSERT_TRUE(IsBackEdge(8u, 7u));
-  EXPECT_TRUE(IsSuspendCheckEdge(8u, 7u));
-  ASSERT_TRUE(IsBackEdge(9u, 4u));
-  EXPECT_TRUE(IsSuspendCheckEdge(9u, 4u));
-}
-
-TEST_F(SuspendCheckEliminationTest, While_WhileWhile_WithExtraEdge_InvokeInFirstInnerLoopBody) {
-  static const MIRDef mirs[] = {
-    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
-    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 1.
-    DEF_INVOKE(6u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in first inner loop body.
-    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
-    DEF_OTHER1(7u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 2.
-    DEF_OTHER0(8u, Instruction::GOTO),                   // Edge back to inner loop 2 head.
-    DEF_OTHER0(9u, Instruction::GOTO),                   // Edge back to outer loop head.
-  };
-
-  PrepareNestedLoopsWhile_WhileWhile_WithExtraEdge();
-  PrepareMIRs(mirs);
-  PerformSuspendCheckElimination();
-  ASSERT_TRUE(IsBackEdge(6u, 5u));
-  EXPECT_FALSE(IsSuspendCheckEdge(6u, 5u));
-  ASSERT_TRUE(IsBackEdge(8u, 7u));
-  EXPECT_TRUE(IsSuspendCheckEdge(8u, 7u));  // Unaffected by the extra edge.
-  ASSERT_TRUE(IsBackEdge(9u, 4u));
-  EXPECT_TRUE(IsSuspendCheckEdge(9u, 4u));
-}
-
-TEST_F(SuspendCheckEliminationTest, While_WhileWhile_WithExtraEdge_InvokeInSecondInnerLoopHead) {
-  static const MIRDef mirs[] = {
-    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
-    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 1.
-    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
-    DEF_INVOKE(7u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in second inner loop head.
-    DEF_OTHER1(7u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 2.
-    DEF_OTHER0(8u, Instruction::GOTO),                   // Edge back to inner loop 2 head.
-    DEF_OTHER0(9u, Instruction::GOTO),                   // Edge back to outer loop head.
-  };
-
-  PrepareNestedLoopsWhile_WhileWhile_WithExtraEdge();
-  PrepareMIRs(mirs);
-  PerformSuspendCheckElimination();
-  ASSERT_TRUE(IsBackEdge(6u, 5u));
-  EXPECT_TRUE(IsSuspendCheckEdge(6u, 5u));
-  ASSERT_TRUE(IsBackEdge(8u, 7u));
-  EXPECT_FALSE(IsSuspendCheckEdge(8u, 7u));  // Unaffected by the extra edge.
-  ASSERT_TRUE(IsBackEdge(9u, 4u));
-  EXPECT_FALSE(IsSuspendCheckEdge(9u, 4u));
-}
-
-}  // namespace art
diff --git a/compiler/dex/pass.h b/compiler/dex/pass.h
deleted file mode 100644
index 16414ef..0000000
--- a/compiler/dex/pass.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_PASS_H_
-#define ART_COMPILER_DEX_PASS_H_
-
-#include <string>
-
-#include "base/logging.h"
-
-namespace art {
-
-// Forward declarations.
-class BasicBlock;
-class Pass;
-
-// Empty Pass Data Class, can be extended by any pass extending the base Pass class.
-class PassDataHolder {
-};
-
-/**
- * @class Pass
- * @brief Base Pass class, can be extended to perform a more defined way of doing the work call.
- */
-class Pass {
- public:
-  explicit Pass(const char* name)
-    : pass_name_(name) {
-  }
-
-  virtual ~Pass() {
-  }
-
-  virtual const char* GetName() const {
-    return pass_name_;
-  }
-
-  /**
-   * @brief Gate for the pass: determines whether to execute the pass or not considering a CompilationUnit
-   * @param data the PassDataHolder.
-   * @return whether or not to execute the pass.
-   */
-  virtual bool Gate(const PassDataHolder* data ATTRIBUTE_UNUSED) const {
-    // Base class says yes.
-    return true;
-  }
-
-  /**
-   * @brief Start of the pass: called before the Worker function.
-   */
-  virtual void Start(PassDataHolder* data ATTRIBUTE_UNUSED) const {
-  }
-
-  /**
-   * @brief End of the pass: called after the WalkBasicBlocks function.
-   */
-  virtual void End(PassDataHolder* data ATTRIBUTE_UNUSED) const {
-  }
-
-  /**
-   * @param data the object containing data necessary for the pass.
-   * @return whether or not there is a change when walking the BasicBlock
-   */
-  virtual bool Worker(PassDataHolder* data ATTRIBUTE_UNUSED) const {
-    // Passes that do all their work in Start() or End() should not allow useless node iteration.
-    LOG(FATAL) << "Unsupported default Worker() used for " << GetName();
-    UNREACHABLE();
-  }
-
- protected:
-  /** @brief The pass name: used for searching for a pass when running a particular pass or debugging. */
-  const char* const pass_name_;
-
- private:
-  // In order to make the all passes not copy-friendly.
-  DISALLOW_COPY_AND_ASSIGN(Pass);
-};
-}  // namespace art
-#endif  // ART_COMPILER_DEX_PASS_H_
diff --git a/compiler/dex/pass_driver.h b/compiler/dex/pass_driver.h
deleted file mode 100644
index 34a6f63..0000000
--- a/compiler/dex/pass_driver.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_PASS_DRIVER_H_
-#define ART_COMPILER_DEX_PASS_DRIVER_H_
-
-#include <vector>
-
-#include "base/logging.h"
-#include "pass.h"
-#include "pass_manager.h"
-
-namespace art {
-
-class Pass;
-class PassDataHolder;
-class PassDriver;
-class PassManager;
-
-// Empty holder for the constructor.
-class PassDriverDataHolder {
-};
-
-/**
- * @class PassDriver
- * @brief PassDriver is the wrapper around all Pass instances in order to execute them
- */
-class PassDriver {
- public:
-  explicit PassDriver(const PassManager* const pass_manager) : pass_manager_(pass_manager) {
-    pass_list_ = *pass_manager_->GetDefaultPassList();
-    DCHECK(!pass_list_.empty());
-  }
-
-  virtual ~PassDriver() {
-  }
-
-  /**
-   * @brief Insert a Pass: can warn if multiple passes have the same name.
-   */
-  void InsertPass(const Pass* new_pass) {
-    DCHECK(new_pass != nullptr);
-    DCHECK(new_pass->GetName() != nullptr);
-    DCHECK_NE(new_pass->GetName()[0], 0);
-
-    // It is an error to override an existing pass.
-    DCHECK(GetPass(new_pass->GetName()) == nullptr)
-        << "Pass name " << new_pass->GetName() << " already used.";
-    // Now add to the list.
-    pass_list_.push_back(new_pass);
-  }
-
-  /**
-   * @brief Run a pass using the name as key.
-   * @return whether the pass was applied.
-   */
-  virtual bool RunPass(const char* pass_name) {
-    // Paranoid: c_unit cannot be null and we need a pass name.
-    DCHECK(pass_name != nullptr);
-    DCHECK_NE(pass_name[0], 0);
-
-    const Pass* cur_pass = GetPass(pass_name);
-
-    if (cur_pass != nullptr) {
-      return RunPass(cur_pass);
-    }
-
-    // Return false, we did not find the pass.
-    return false;
-  }
-
-  /**
-   * @brief Runs all the passes with the pass_list_.
-   */
-  void Launch() {
-    for (const Pass* cur_pass : pass_list_) {
-      RunPass(cur_pass);
-    }
-  }
-
-  /**
-   * @brief Searches for a particular pass.
-   * @param the name of the pass to be searched for.
-   */
-  const Pass* GetPass(const char* name) const {
-    for (const Pass* cur_pass : pass_list_) {
-      if (strcmp(name, cur_pass->GetName()) == 0) {
-        return cur_pass;
-      }
-    }
-    return nullptr;
-  }
-
-  /**
-   * @brief Run a pass using the Pass itself.
-   * @param time_split do we want a time split request(default: false)?
-   * @return whether the pass was applied.
-   */
-  virtual bool RunPass(const Pass* pass, bool time_split = false) = 0;
-
- protected:
-  /**
-   * @brief Apply a patch: perform start/work/end functions.
-   */
-  virtual void ApplyPass(PassDataHolder* data, const Pass* pass) {
-    pass->Start(data);
-    DispatchPass(pass);
-    pass->End(data);
-  }
-
-  /**
-   * @brief Dispatch a patch.
-   * Gives the ability to add logic when running the patch.
-   */
-  virtual void DispatchPass(const Pass* pass ATTRIBUTE_UNUSED) {
-  }
-
-  /** @brief List of passes: provides the order to execute the passes.
-   *  Passes are owned by pass_manager_. */
-  std::vector<const Pass*> pass_list_;
-
-  const PassManager* const pass_manager_;
-};
-
-}  // namespace art
-#endif  // ART_COMPILER_DEX_PASS_DRIVER_H_
diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h
deleted file mode 100644
index d0af71c..0000000
--- a/compiler/dex/pass_driver_me.h
+++ /dev/null
@@ -1,316 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_PASS_DRIVER_ME_H_
-#define ART_COMPILER_DEX_PASS_DRIVER_ME_H_
-
-#include <cstdlib>
-#include <cstring>
-
-#include "bb_optimizations.h"
-#include "dataflow_iterator.h"
-#include "dataflow_iterator-inl.h"
-#include "dex_flags.h"
-#include "pass_driver.h"
-#include "pass_manager.h"
-#include "pass_me.h"
-#include "safe_map.h"
-
-namespace art {
-
-class PassManager;
-class PassManagerOptions;
-
-class PassDriverME: public PassDriver {
- public:
-  PassDriverME(const PassManager* const pass_manager, CompilationUnit* cu)
-      : PassDriver(pass_manager), pass_me_data_holder_(), dump_cfg_folder_("/sdcard/") {
-        pass_me_data_holder_.bb = nullptr;
-        pass_me_data_holder_.c_unit = cu;
-  }
-
-  ~PassDriverME() {
-  }
-
-  void DispatchPass(const Pass* pass) {
-    VLOG(compiler) << "Dispatching " << pass->GetName();
-    const PassME* me_pass = down_cast<const PassME*>(pass);
-
-    DataFlowAnalysisMode mode = me_pass->GetTraversal();
-
-    switch (mode) {
-      case kPreOrderDFSTraversal:
-        DoWalkBasicBlocks<PreOrderDfsIterator>(&pass_me_data_holder_, me_pass);
-        break;
-      case kRepeatingPreOrderDFSTraversal:
-        DoWalkBasicBlocks<RepeatingPreOrderDfsIterator>(&pass_me_data_holder_, me_pass);
-        break;
-      case kRepeatingPostOrderDFSTraversal:
-        DoWalkBasicBlocks<RepeatingPostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
-        break;
-      case kReversePostOrderDFSTraversal:
-        DoWalkBasicBlocks<ReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
-        break;
-      case kRepeatingReversePostOrderDFSTraversal:
-        DoWalkBasicBlocks<RepeatingReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
-        break;
-      case kPostOrderDOMTraversal:
-        DoWalkBasicBlocks<PostOrderDOMIterator>(&pass_me_data_holder_, me_pass);
-        break;
-      case kTopologicalSortTraversal:
-        DoWalkBasicBlocks<TopologicalSortIterator>(&pass_me_data_holder_, me_pass);
-        break;
-      case kLoopRepeatingTopologicalSortTraversal:
-        DoWalkBasicBlocks<LoopRepeatingTopologicalSortIterator>(&pass_me_data_holder_, me_pass);
-        break;
-      case kAllNodes:
-        DoWalkBasicBlocks<AllNodesIterator>(&pass_me_data_holder_, me_pass);
-        break;
-      case kNoNodes:
-        break;
-      default:
-        LOG(FATAL) << "Iterator mode not handled in dispatcher: " << mode;
-        break;
-    }
-  }
-
-  bool RunPass(const Pass* pass, bool time_split) OVERRIDE {
-    // Paranoid: c_unit and pass cannot be null, and the pass should have a name.
-    DCHECK(pass != nullptr);
-    DCHECK(pass->GetName() != nullptr && pass->GetName()[0] != 0);
-    CompilationUnit* c_unit = pass_me_data_holder_.c_unit;
-    DCHECK(c_unit != nullptr);
-
-    // Do we perform a time split
-    if (time_split) {
-      c_unit->NewTimingSplit(pass->GetName());
-    }
-
-    // First, work on determining pass verbosity.
-    bool old_print_pass = c_unit->print_pass;
-    c_unit->print_pass = pass_manager_->GetOptions().GetPrintAllPasses();
-    auto* const options = &pass_manager_->GetOptions();
-    const std::string& print_pass_list = options->GetPrintPassList();
-    if (!print_pass_list.empty() && strstr(print_pass_list.c_str(), pass->GetName()) != nullptr) {
-      c_unit->print_pass = true;
-    }
-
-    // Next, check if there are any overridden settings for the pass that change default
-    // configuration.
-    c_unit->overridden_pass_options.clear();
-    FillOverriddenPassSettings(options, pass->GetName(), c_unit->overridden_pass_options);
-    if (c_unit->print_pass) {
-      for (auto setting_it : c_unit->overridden_pass_options) {
-        LOG(INFO) << "Overridden option \"" << setting_it.first << ":"
-          << setting_it.second << "\" for pass \"" << pass->GetName() << "\"";
-      }
-    }
-
-    // Check the pass gate first.
-    bool should_apply_pass = pass->Gate(&pass_me_data_holder_);
-    if (should_apply_pass) {
-      // Applying the pass: first start, doWork, and end calls.
-      this->ApplyPass(&pass_me_data_holder_, pass);
-
-      bool should_dump = (c_unit->enable_debug & (1 << kDebugDumpCFG)) != 0;
-
-      const std::string& dump_pass_list = pass_manager_->GetOptions().GetDumpPassList();
-      if (!dump_pass_list.empty()) {
-        const bool found = strstr(dump_pass_list.c_str(), pass->GetName());
-        should_dump = should_dump || found;
-      }
-
-      if (should_dump) {
-        // Do we want to log it?
-        if ((c_unit->enable_debug&  (1 << kDebugDumpCFG)) != 0) {
-          // Do we have a pass folder?
-          const PassME* me_pass = (down_cast<const PassME*>(pass));
-          const char* passFolder = me_pass->GetDumpCFGFolder();
-          DCHECK(passFolder != nullptr);
-
-          if (passFolder[0] != 0) {
-            // Create directory prefix.
-            std::string prefix = GetDumpCFGFolder();
-            prefix += passFolder;
-            prefix += "/";
-
-            c_unit->mir_graph->DumpCFG(prefix.c_str(), false);
-          }
-        }
-      }
-    }
-
-    // Before wrapping up with this pass, restore old pass verbosity flag.
-    c_unit->print_pass = old_print_pass;
-
-    // If the pass gate passed, we can declare success.
-    return should_apply_pass;
-  }
-
-  static void PrintPassOptions(PassManager* manager) {
-    for (const auto* pass : *manager->GetDefaultPassList()) {
-      const PassME* me_pass = down_cast<const PassME*>(pass);
-      if (me_pass->HasOptions()) {
-        LOG(INFO) << "Pass options for \"" << me_pass->GetName() << "\" are:";
-        SafeMap<const std::string, const OptionContent> overridden_settings;
-        FillOverriddenPassSettings(&manager->GetOptions(), me_pass->GetName(),
-                                   overridden_settings);
-        me_pass->PrintPassOptions(overridden_settings);
-      }
-    }
-  }
-
-  const char* GetDumpCFGFolder() const {
-    return dump_cfg_folder_;
-  }
-
- protected:
-  /** @brief The data holder that contains data needed for the PassDriverME. */
-  PassMEDataHolder pass_me_data_holder_;
-
-  /** @brief Dump CFG base folder: where is the base folder for dumping CFGs. */
-  const char* dump_cfg_folder_;
-
-  static void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass,
-                                DataflowIterator* iterator) {
-    // Paranoid: Check the iterator before walking the BasicBlocks.
-    DCHECK(iterator != nullptr);
-    bool change = false;
-    for (BasicBlock* bb = iterator->Next(change); bb != nullptr; bb = iterator->Next(change)) {
-      data->bb = bb;
-      change = pass->Worker(data);
-    }
-  }
-
-  template <typename Iterator>
-  inline static void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass) {
-      DCHECK(data != nullptr);
-      CompilationUnit* c_unit = data->c_unit;
-      DCHECK(c_unit != nullptr);
-      Iterator iterator(c_unit->mir_graph.get());
-      DoWalkBasicBlocks(data, pass, &iterator);
-    }
-
-  /**
-   * @brief Fills the settings_to_fill by finding all of the applicable options in the
-   * overridden_pass_options_list_.
-   * @param pass_name The pass name for which to fill settings.
-   * @param settings_to_fill Fills the options to contain the mapping of name of option to the new
-   * configuration.
-   */
-  static void FillOverriddenPassSettings(
-      const PassManagerOptions* options, const char* pass_name,
-      SafeMap<const std::string, const OptionContent>& settings_to_fill) {
-    const std::string& settings = options->GetOverriddenPassOptions();
-    const size_t settings_len = settings.size();
-
-    // Before anything, check if we care about anything right now.
-    if (settings_len == 0) {
-      return;
-    }
-
-    const size_t pass_name_len = strlen(pass_name);
-    const size_t min_setting_size = 4;  // 2 delimiters, 1 setting name, 1 setting
-    size_t search_pos = 0;
-
-    // If there is no room for pass options, exit early.
-    if (settings_len < pass_name_len + min_setting_size) {
-      return;
-    }
-
-    do {
-      search_pos = settings.find(pass_name, search_pos);
-
-      // Check if we found this pass name in rest of string.
-      if (search_pos == std::string::npos) {
-        // No more settings for this pass.
-        break;
-      }
-
-      // The string contains the pass name. Now check that there is
-      // room for the settings: at least one char for setting name,
-      // two chars for two delimiter, and at least one char for setting.
-      if (search_pos + pass_name_len + min_setting_size >= settings_len) {
-        // No more settings for this pass.
-        break;
-      }
-
-      // Update the current search position to not include the pass name.
-      search_pos += pass_name_len;
-
-      // The format must be "PassName:SettingName:#" where # is the setting.
-      // Thus look for the first ":" which must exist.
-      if (settings[search_pos] != ':') {
-        // Missing delimiter right after pass name.
-        continue;
-      } else {
-        search_pos += 1;
-      }
-
-      // Now look for the actual setting by finding the next ":" delimiter.
-      const size_t setting_name_pos = search_pos;
-      size_t setting_pos = settings.find(':', setting_name_pos);
-
-      if (setting_pos == std::string::npos) {
-        // Missing a delimiter that would capture where setting starts.
-        continue;
-      } else if (setting_pos == setting_name_pos) {
-        // Missing setting thus did not move from setting name
-        continue;
-      } else {
-        // Skip the delimiter.
-        setting_pos += 1;
-      }
-
-      // Look for the terminating delimiter which must be a comma.
-      size_t next_configuration_separator = settings.find(',', setting_pos);
-      if (next_configuration_separator == std::string::npos) {
-        next_configuration_separator = settings_len;
-      }
-
-      // Prevent end of string errors.
-      if (next_configuration_separator == setting_pos) {
-          continue;
-      }
-
-      // Get the actual setting itself.
-      std::string setting_string =
-          settings.substr(setting_pos, next_configuration_separator - setting_pos);
-
-      std::string setting_name =
-          settings.substr(setting_name_pos, setting_pos - setting_name_pos - 1);
-
-      // We attempt to convert the option value to integer. Strtoll is being used to
-      // convert because it is exception safe.
-      char* end_ptr = nullptr;
-      const char* setting_ptr = setting_string.c_str();
-      DCHECK(setting_ptr != nullptr);  // Paranoid: setting_ptr must be a valid pointer.
-      int64_t int_value = strtoll(setting_ptr, &end_ptr, 0);
-      DCHECK(end_ptr != nullptr);  // Paranoid: end_ptr must be set by the strtoll call.
-
-      // If strtoll call succeeded, the option is now considered as integer.
-      if (*setting_ptr != '\0' && end_ptr != setting_ptr && *end_ptr == '\0') {
-        settings_to_fill.Put(setting_name, OptionContent(int_value));
-      } else {
-        // Otherwise, it is considered as a string.
-        settings_to_fill.Put(setting_name, OptionContent(setting_string.c_str()));
-      }
-      search_pos = next_configuration_separator;
-    } while (true);
-  }
-};
-}  // namespace art
-#endif  // ART_COMPILER_DEX_PASS_DRIVER_ME_H_
diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc
deleted file mode 100644
index 375003b..0000000
--- a/compiler/dex/pass_driver_me_opts.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "pass_driver_me_opts.h"
-
-#include "base/logging.h"
-#include "base/macros.h"
-#include "bb_optimizations.h"
-#include "dataflow_iterator.h"
-#include "dataflow_iterator-inl.h"
-#include "pass_driver_me_opts.h"
-#include "pass_manager.h"
-#include "post_opt_passes.h"
-
-namespace art {
-
-void PassDriverMEOpts::SetupPasses(PassManager* pass_manager) {
-  /*
-   * Create the pass list. These passes are immutable and are shared across the threads.
-   *
-   * Advantage is that there will be no race conditions here.
-   * Disadvantage is the passes can't change their internal states depending on CompilationUnit:
-   *   - This is not yet an issue: no current pass would require it.
-   */
-  pass_manager->AddPass(new StringChange);
-  pass_manager->AddPass(new CacheFieldLoweringInfo);
-  pass_manager->AddPass(new CacheMethodLoweringInfo);
-  pass_manager->AddPass(new CalculatePredecessors);
-  pass_manager->AddPass(new DFSOrders);
-  pass_manager->AddPass(new ClassInitCheckElimination);
-  pass_manager->AddPass(new SpecialMethodInliner);
-  pass_manager->AddPass(new NullCheckElimination);
-  pass_manager->AddPass(new BBCombine);
-  pass_manager->AddPass(new CodeLayout);
-  pass_manager->AddPass(new GlobalValueNumberingPass);
-  pass_manager->AddPass(new DeadCodeEliminationPass);
-  pass_manager->AddPass(new GlobalValueNumberingCleanupPass);
-  pass_manager->AddPass(new ConstantPropagation);
-  pass_manager->AddPass(new MethodUseCount);
-  pass_manager->AddPass(new BBOptimizations);
-  pass_manager->AddPass(new SuspendCheckElimination);
-}
-
-void PassDriverMEOpts::ApplyPass(PassDataHolder* data, const Pass* pass) {
-  const PassME* const pass_me = down_cast<const PassME*>(pass);
-  DCHECK(pass_me != nullptr);
-  PassMEDataHolder* const pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
-  // Set to dirty.
-  pass_me_data_holder->dirty = true;
-  // First call the base class' version.
-  PassDriver::ApplyPass(data, pass);
-  // Now we care about flags.
-  if ((pass_me->GetFlag(kOptimizationBasicBlockChange) == true) ||
-      (pass_me->GetFlag(kOptimizationDefUsesChange) == true)) {
-    // Is it dirty at least?
-    if (pass_me_data_holder->dirty == true) {
-      CompilationUnit* c_unit = pass_me_data_holder->c_unit;
-      c_unit->mir_graph.get()->CalculateBasicBlockInformation(post_opt_pass_manager_);
-    }
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/pass_driver_me_opts.h b/compiler/dex/pass_driver_me_opts.h
deleted file mode 100644
index c8093d0..0000000
--- a/compiler/dex/pass_driver_me_opts.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_PASS_DRIVER_ME_OPTS_H_
-#define ART_COMPILER_DEX_PASS_DRIVER_ME_OPTS_H_
-
-#include "pass_driver_me.h"
-
-namespace art {
-
-// Forward Declarations.
-struct CompilationUnit;
-class Pass;
-class PassDataHolder;
-class PassManager;
-
-class PassDriverMEOpts : public PassDriverME {
- public:
-  PassDriverMEOpts(const PassManager* const manager,
-                   const PassManager* const post_opt_pass_manager,
-                   CompilationUnit* cu)
-      : PassDriverME(manager, cu), post_opt_pass_manager_(post_opt_pass_manager) {
-  }
-
-  ~PassDriverMEOpts() {
-  }
-
-  /**
-   * @brief Write and allocate corresponding passes into the pass manager.
-   */
-  static void SetupPasses(PassManager* pass_manasger);
-
-  /**
-   * @brief Apply a patch: perform start/work/end functions.
-   */
-  virtual void ApplyPass(PassDataHolder* data, const Pass* pass) OVERRIDE;
-
-  const PassManager* const post_opt_pass_manager_;
-};
-
-}  // namespace art
-#endif  // ART_COMPILER_DEX_PASS_DRIVER_ME_OPTS_H_
diff --git a/compiler/dex/pass_driver_me_post_opt.cc b/compiler/dex/pass_driver_me_post_opt.cc
deleted file mode 100644
index b35bc3d..0000000
--- a/compiler/dex/pass_driver_me_post_opt.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "pass_driver_me_post_opt.h"
-
-#include "base/macros.h"
-#include "post_opt_passes.h"
-#include "pass_manager.h"
-
-namespace art {
-
-void PassDriverMEPostOpt::SetupPasses(PassManager* pass_manager) {
-  /*
-   * Create the pass list. These passes are immutable and are shared across the threads.
-   *
-   * Advantage is that there will be no race conditions here.
-   * Disadvantage is the passes can't change their internal states depending on CompilationUnit:
-   *   - This is not yet an issue: no current pass would require it.
-   */
-  // The initial list of passes to be used by the PassDriveMEPostOpt.
-  pass_manager->AddPass(new DFSOrders);
-  pass_manager->AddPass(new BuildDomination);
-  pass_manager->AddPass(new TopologicalSortOrders);
-  pass_manager->AddPass(new InitializeSSATransformation);
-  pass_manager->AddPass(new ClearPhiInstructions);
-  pass_manager->AddPass(new DefBlockMatrix);
-  pass_manager->AddPass(new FindPhiNodeBlocksPass);
-  pass_manager->AddPass(new SSAConversion);
-  pass_manager->AddPass(new PhiNodeOperands);
-  pass_manager->AddPass(new PerformInitRegLocations);
-  pass_manager->AddPass(new TypeInferencePass);
-  pass_manager->AddPass(new FinishSSATransformation);
-}
-
-}  // namespace art
diff --git a/compiler/dex/pass_driver_me_post_opt.h b/compiler/dex/pass_driver_me_post_opt.h
deleted file mode 100644
index 94176db..0000000
--- a/compiler/dex/pass_driver_me_post_opt.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_PASS_DRIVER_ME_POST_OPT_H_
-#define ART_COMPILER_DEX_PASS_DRIVER_ME_POST_OPT_H_
-
-#include "pass_driver_me.h"
-
-namespace art {
-
-// Forward Declarations.
-struct CompilationUnit;
-class Pass;
-class PassDataHolder;
-
-class PassDriverMEPostOpt : public PassDriverME {
- public:
-  PassDriverMEPostOpt(const PassManager* const manager, CompilationUnit* cu)
-      : PassDriverME(manager, cu) {
-  }
-
-  ~PassDriverMEPostOpt() {
-  }
-
-  /**
-   * @brief Write and allocate corresponding passes into the pass manager.
-   */
-  static void SetupPasses(PassManager* pass_manager);
-};
-
-}  // namespace art
-#endif  // ART_COMPILER_DEX_PASS_DRIVER_ME_POST_OPT_H_
diff --git a/compiler/dex/pass_manager.cc b/compiler/dex/pass_manager.cc
deleted file mode 100644
index 6377a6c..0000000
--- a/compiler/dex/pass_manager.cc
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "pass_manager.h"
-
-#include "base/stl_util.h"
-#include "pass_me.h"
-
-namespace art {
-
-PassManager::PassManager(const PassManagerOptions& options) : options_(options) {
-}
-
-PassManager::~PassManager() {
-  STLDeleteElements(&passes_);
-}
-
-void PassManager::CreateDefaultPassList() {
-  default_pass_list_.clear();
-  // Add each pass which isn't disabled into default_pass_list_.
-  for (const auto* pass : passes_) {
-    if (options_.GetDisablePassList().find(pass->GetName()) != std::string::npos) {
-      VLOG(compiler) << "Skipping disabled pass " << pass->GetName();
-    } else {
-      default_pass_list_.push_back(pass);
-    }
-  }
-}
-
-void PassManager::PrintPassNames() const {
-  LOG(INFO) << "Loop Passes are:";
-  for (const Pass* cur_pass : default_pass_list_) {
-    LOG(INFO) << "\t-" << cur_pass->GetName();
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/pass_manager.h b/compiler/dex/pass_manager.h
deleted file mode 100644
index 68e488d..0000000
--- a/compiler/dex/pass_manager.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_PASS_MANAGER_H_
-#define ART_COMPILER_DEX_PASS_MANAGER_H_
-
-#include <string>
-#include <vector>
-
-#include "base/logging.h"
-
-namespace art {
-
-class Pass;
-
-class PassManagerOptions {
- public:
-  PassManagerOptions()
-     : default_print_passes_(false),
-       print_pass_names_(false),
-       print_pass_options_(false) {
-  }
-  explicit PassManagerOptions(const PassManagerOptions&) = default;
-
-  void SetPrintPassNames(bool b) {
-    print_pass_names_ = b;
-  }
-
-  void SetPrintAllPasses() {
-    default_print_passes_ = true;
-  }
-  bool GetPrintAllPasses() const {
-    return default_print_passes_;
-  }
-
-  void SetDisablePassList(const std::string& list) {
-    disable_pass_list_ = list;
-  }
-  const std::string& GetDisablePassList() const {
-    return disable_pass_list_;
-  }
-
-  void SetPrintPassList(const std::string& list) {
-    print_pass_list_ = list;
-  }
-  const std::string& GetPrintPassList() const {
-    return print_pass_list_;
-  }
-
-  void SetDumpPassList(const std::string& list) {
-    dump_pass_list_ = list;
-  }
-  const std::string& GetDumpPassList() const {
-    return dump_pass_list_;
-  }
-
-  /**
-   * @brief Used to set a string that contains the overridden pass options.
-   * @details An overridden pass option means that the pass uses this option
-   * instead of using its default option.
-   * @param s The string passed by user with overridden options. The string is in format
-   * Pass1Name:Pass1Option:Pass1Setting,Pass2Name:Pass2Option::Pass2Setting
-   */
-  void SetOverriddenPassOptions(const std::string& list) {
-    overridden_pass_options_list_ = list;
-  }
-  const std::string& GetOverriddenPassOptions() const {
-    return overridden_pass_options_list_;
-  }
-
-  void SetPrintPassOptions(bool b) {
-    print_pass_options_ = b;
-  }
-  bool GetPrintPassOptions() const {
-    return print_pass_options_;
-  }
-
- private:
-  /** @brief Do we, by default, want to be printing the log messages? */
-  bool default_print_passes_;
-
-  /** @brief What are the passes we want to be printing the log messages? */
-  std::string print_pass_list_;
-
-  /** @brief What are the passes we want to be dumping the CFG? */
-  std::string dump_pass_list_;
-
-  /** @brief String of all options that should be overridden for selected passes */
-  std::string overridden_pass_options_list_;
-
-  /** @brief String of all options that should be overridden for selected passes */
-  std::string disable_pass_list_;
-
-  /** @brief Whether or not we print all the passes when we create the pass manager */
-  bool print_pass_names_;
-
-  /** @brief Whether or not we print all the pass options when we create the pass manager */
-  bool print_pass_options_;
-};
-
-/**
- * @class PassManager
- * @brief Owns passes
- */
-class PassManager {
- public:
-  explicit PassManager(const PassManagerOptions& options);
-  virtual ~PassManager();
-  void CreateDefaultPassList();
-  void AddPass(const Pass* pass) {
-    passes_.push_back(pass);
-  }
-  /**
-   * @brief Print the pass names of all the passes available.
-   */
-  void PrintPassNames() const;
-  const std::vector<const Pass*>* GetDefaultPassList() const {
-    return &default_pass_list_;
-  }
-  const PassManagerOptions& GetOptions() const {
-    return options_;
-  }
-
- private:
-  /** @brief The set of possible passes.  */
-  std::vector<const Pass*> passes_;
-
-  /** @brief The default pass list is used to initialize pass_list_. */
-  std::vector<const Pass*> default_pass_list_;
-
-  /** @brief Pass manager options. */
-  PassManagerOptions options_;
-
-  DISALLOW_COPY_AND_ASSIGN(PassManager);
-};
-}  // namespace art
-#endif  // ART_COMPILER_DEX_PASS_MANAGER_H_
diff --git a/compiler/dex/pass_me.h b/compiler/dex/pass_me.h
deleted file mode 100644
index d3cf393..0000000
--- a/compiler/dex/pass_me.h
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_PASS_ME_H_
-#define ART_COMPILER_DEX_PASS_ME_H_
-
-#include <string>
-
-#include "base/logging.h"
-#include "pass.h"
-#include "compiler_ir.h"
-#include "safe_map.h"
-
-namespace art {
-
-// Forward declarations.
-class BasicBlock;
-struct CompilationUnit;
-
-/**
- * @brief OptimizationFlag is an enumeration to perform certain tasks for a given pass.
- * @details Each enum should be a power of 2 to be correctly used.
- */
-enum OptimizationFlag {
-  kOptimizationBasicBlockChange = 1,  /// @brief Has there been a change to a BasicBlock?
-  kOptimizationDefUsesChange = 2,     /// @brief Has there been a change to a def-use?
-  kLoopStructureChange = 4,           /// @brief Has there been a loop structural change?
-};
-std::ostream& operator<<(std::ostream& os, const OptimizationFlag& rhs);
-
-// Data holder class.
-class PassMEDataHolder: public PassDataHolder {
- public:
-  CompilationUnit* c_unit;
-  BasicBlock* bb;
-  void* data;               /**< @brief Any data the pass wants to use */
-  bool dirty;               /**< @brief Has the pass rendered the CFG dirty, requiring post-opt? */
-};
-
-enum DataFlowAnalysisMode {
-  kAllNodes = 0,                           /// @brief All nodes.
-  kPreOrderDFSTraversal,                   /// @brief Depth-First-Search / Pre-Order.
-  kRepeatingPreOrderDFSTraversal,          /// @brief Depth-First-Search / Repeating Pre-Order.
-  kReversePostOrderDFSTraversal,           /// @brief Depth-First-Search / Reverse Post-Order.
-  kRepeatingPostOrderDFSTraversal,         /// @brief Depth-First-Search / Repeating Post-Order.
-  kRepeatingReversePostOrderDFSTraversal,  /// @brief Depth-First-Search / Repeating Reverse Post-Order.
-  kPostOrderDOMTraversal,                  /// @brief Dominator tree / Post-Order.
-  kTopologicalSortTraversal,               /// @brief Topological Order traversal.
-  kLoopRepeatingTopologicalSortTraversal,  /// @brief Loop-repeating Topological Order traversal.
-  kNoNodes,                                /// @brief Skip BasicBlock traversal.
-};
-std::ostream& operator<<(std::ostream& os, const DataFlowAnalysisMode& rhs);
-
-/**
- * @class Pass
- * @brief Pass is the Pass structure for the optimizations.
- * @details The following structure has the different optimization passes that we are going to do.
- */
-class PassME : public Pass {
- public:
-  explicit PassME(const char* name, DataFlowAnalysisMode type = kAllNodes,
-          unsigned int flags = 0u, const char* dump = "")
-    : Pass(name), traversal_type_(type), flags_(flags), dump_cfg_folder_(dump) {
-  }
-
-  PassME(const char* name, DataFlowAnalysisMode type, const char* dump)
-    : Pass(name), traversal_type_(type), flags_(0), dump_cfg_folder_(dump) {
-  }
-
-  PassME(const char* name, const char* dump)
-    : Pass(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) {
-  }
-
-  ~PassME() {
-    default_options_.clear();
-  }
-
-  virtual DataFlowAnalysisMode GetTraversal() const {
-    return traversal_type_;
-  }
-
-  /**
-   * @return Returns whether the pass has any configurable options.
-   */
-  bool HasOptions() const {
-    return default_options_.size() != 0;
-  }
-
-  /**
-   * @brief Prints the pass options along with default settings if there are any.
-   * @details The printing is done using LOG(INFO).
-   */
-  void PrintPassDefaultOptions() const {
-    for (const auto& option : default_options_) {
-      LOG(INFO) << "\t" << option.first << ":" << option.second;
-    }
-  }
-
-  /**
-   * @brief Prints the pass options along with either default or overridden setting.
-   * @param overridden_options The overridden settings for this pass.
-   */
-  void PrintPassOptions(SafeMap<const std::string, const OptionContent>& overridden_options) const {
-    // We walk through the default options only to get the pass names. We use GetPassOption to
-    // also consider the overridden ones.
-    for (const auto& option : default_options_) {
-      LOG(INFO) << "\t" << option.first << ":"
-                << GetPassOption(option.first, overridden_options);
-    }
-  }
-
-  /**
-   * @brief Used to obtain the option structure for a pass.
-   * @details Will return the overridden option if it exists or default one otherwise.
-   * @param option_name The name of option whose setting to look for.
-   * @param c_unit The compilation unit currently being handled.
-   * @return Returns the option structure containing the option value.
-  */
-  const OptionContent& GetPassOption(const char* option_name, CompilationUnit* c_unit) const {
-    return GetPassOption(option_name, c_unit->overridden_pass_options);
-  }
-
-  /**
-   * @brief Used to obtain the option for a pass as a string.
-   * @details Will return the overridden option if it exists or default one otherwise.
-   * It will return nullptr if the required option value is not a string.
-   * @param option_name The name of option whose setting to look for.
-   * @param c_unit The compilation unit currently being handled.
-   * @return Returns the overridden option if it exists or the default one otherwise.
-  */
-  const char* GetStringPassOption(const char* option_name, CompilationUnit* c_unit) const {
-    return GetStringPassOption(option_name, c_unit->overridden_pass_options);
-  }
-
-  /**
-    * @brief Used to obtain the pass option value as an integer.
-    * @details Will return the overridden option if it exists or default one otherwise.
-    * It will return 0 if the required option value is not an integer.
-    * @param c_unit The compilation unit currently being handled.
-    * @return Returns the overriden option if it exists or the default one otherwise.
-   */
-  int64_t GetIntegerPassOption(const char* option_name, CompilationUnit* c_unit) const {
-    return GetIntegerPassOption(option_name, c_unit->overridden_pass_options);
-  }
-
-  const char* GetDumpCFGFolder() const {
-    return dump_cfg_folder_;
-  }
-
-  bool GetFlag(OptimizationFlag flag) const {
-    return (flags_ & flag);
-  }
-
- protected:
-  const OptionContent& GetPassOption(const char* option_name,
-        const SafeMap<const std::string, const OptionContent>& overridden_options) const {
-    DCHECK(option_name != nullptr);
-
-    // First check if there are any overridden settings.
-    auto overridden_it = overridden_options.find(std::string(option_name));
-    if (overridden_it != overridden_options.end()) {
-      return overridden_it->second;
-    } else {
-      // Otherwise, there must be a default value for this option name.
-      auto default_it = default_options_.find(option_name);
-      // An invalid option is being requested.
-      if (default_it == default_options_.end()) {
-        LOG(FATAL) << "Fatal: Cannot find an option named \"" << option_name << "\"";
-      }
-
-      return default_it->second;
-    }
-  }
-
-  const char* GetStringPassOption(const char* option_name,
-        const SafeMap<const std::string, const OptionContent>& overridden_options) const {
-    const OptionContent& option_content = GetPassOption(option_name, overridden_options);
-    if (option_content.type != OptionContent::kString) {
-      return nullptr;
-    }
-
-    return option_content.GetString();
-  }
-
-  int64_t GetIntegerPassOption(const char* option_name,
-          const SafeMap<const std::string, const OptionContent>& overridden_options) const {
-    const OptionContent& option_content = GetPassOption(option_name, overridden_options);
-    if (option_content.type != OptionContent::kInteger) {
-      return 0;
-    }
-
-    return option_content.GetInteger();
-  }
-
-  /** @brief Type of traversal: determines the order to execute the pass on the BasicBlocks. */
-  const DataFlowAnalysisMode traversal_type_;
-
-  /** @brief Flags for additional directives: used to determine if a particular
-    * post-optimization pass is necessary. */
-  const unsigned int flags_;
-
-  /** @brief CFG Dump Folder: what sub-folder to use for dumping the CFGs post pass. */
-  const char* const dump_cfg_folder_;
-
-  /**
-   * @brief Contains a map of options with the default settings.
-   * @details The constructor of the specific pass instance should fill this
-   * with default options.
-   * */
-  SafeMap<const char*, const OptionContent> default_options_;
-};
-}  // namespace art
-#endif  // ART_COMPILER_DEX_PASS_ME_H_
diff --git a/compiler/dex/post_opt_passes.cc b/compiler/dex/post_opt_passes.cc
deleted file mode 100644
index 9262440..0000000
--- a/compiler/dex/post_opt_passes.cc
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "post_opt_passes.h"
-
-#include "dataflow_iterator-inl.h"
-
-namespace art {
-
-bool ClearPhiInstructions::Worker(PassDataHolder* data) const {
-  DCHECK(data != nullptr);
-  PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
-  CompilationUnit* c_unit = pass_me_data_holder->c_unit;
-  DCHECK(c_unit != nullptr);
-  BasicBlock* bb = pass_me_data_holder->bb;
-  DCHECK(bb != nullptr);
-  MIR* mir = bb->first_mir_insn;
-
-  while (mir != nullptr) {
-    MIR* next = mir->next;
-
-    Instruction::Code opcode = mir->dalvikInsn.opcode;
-
-    if (opcode == static_cast<Instruction::Code> (kMirOpPhi)) {
-      bb->RemoveMIR(mir);
-    }
-
-    mir = next;
-  }
-
-  // We do not care in reporting a change or not in the MIR.
-  return false;
-}
-
-void CalculatePredecessors::Start(PassDataHolder* data) const {
-  DCHECK(data != nullptr);
-  CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-  DCHECK(c_unit != nullptr);
-  // First get the MIRGraph here to factorize a bit the code.
-  MIRGraph *mir_graph = c_unit->mir_graph.get();
-
-  // First clear all predecessors.
-  AllNodesIterator first(mir_graph);
-  for (BasicBlock* bb = first.Next(); bb != nullptr; bb = first.Next()) {
-    bb->predecessors.clear();
-  }
-
-  // Now calculate all predecessors.
-  AllNodesIterator second(mir_graph);
-  for (BasicBlock* bb = second.Next(); bb != nullptr; bb = second.Next()) {
-    // We only care about non hidden blocks.
-    if (bb->hidden == true) {
-      continue;
-    }
-
-    // Create iterator for visiting children.
-    ChildBlockIterator child_iter(bb, mir_graph);
-
-    // Now iterate through the children to set the predecessor bits.
-    for (BasicBlock* child = child_iter.Next(); child != nullptr; child = child_iter.Next()) {
-      child->predecessors.push_back(bb->id);
-    }
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/post_opt_passes.h b/compiler/dex/post_opt_passes.h
deleted file mode 100644
index e9fa0eb..0000000
--- a/compiler/dex/post_opt_passes.h
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_POST_OPT_PASSES_H_
-#define ART_COMPILER_DEX_POST_OPT_PASSES_H_
-
-#include "base/casts.h"
-#include "base/logging.h"
-#include "compiler_ir.h"
-#include "dex_flags.h"
-#include "mir_graph.h"
-#include "pass_me.h"
-
-namespace art {
-
-/**
- * @class PassMEMirSsaRep
- * @brief Convenience class for passes that check MIRGraph::MirSsaRepUpToDate().
- */
-class PassMEMirSsaRep : public PassME {
- public:
-  PassMEMirSsaRep(const char* name, DataFlowAnalysisMode type = kAllNodes)
-      : PassME(name, type) {
-  }
-
-  bool Gate(const PassDataHolder* data) const OVERRIDE {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    return !c_unit->mir_graph->MirSsaRepUpToDate();
-  }
-};
-
-/**
- * @class InitializeSSATransformation
- * @brief There is some data that needs to be initialized before performing
- * the post optimization passes.
- */
-class InitializeSSATransformation : public PassMEMirSsaRep {
- public:
-  InitializeSSATransformation() : PassMEMirSsaRep("InitializeSSATransformation", kNoNodes) {
-  }
-
-  void Start(PassDataHolder* data) const {
-    // New blocks may have been inserted so the first thing we do is ensure that
-    // the c_unit's number of blocks matches the actual count of basic blocks.
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->SSATransformationStart();
-    c_unit->mir_graph->CompilerInitializeSSAConversion();
-  }
-};
-
-/**
- * @class ClearPhiInformation
- * @brief Clear the PHI nodes from the CFG.
- */
-class ClearPhiInstructions : public PassMEMirSsaRep {
- public:
-  ClearPhiInstructions() : PassMEMirSsaRep("ClearPhiInstructions") {
-  }
-
-  bool Worker(PassDataHolder* data) const;
-};
-
-/**
- * @class CalculatePredecessors
- * @brief Calculate the predecessor BitVector of each Basicblock.
- */
-class CalculatePredecessors : public PassME {
- public:
-  CalculatePredecessors() : PassME("CalculatePredecessors", kNoNodes) {
-  }
-
-  void Start(PassDataHolder* data) const;
-};
-
-/**
- * @class DFSOrders
- * @brief Compute the DFS order of the MIR graph
- */
-class DFSOrders : public PassME {
- public:
-  DFSOrders() : PassME("DFSOrders", kNoNodes) {
-  }
-
-  bool Gate(const PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    return !c_unit->mir_graph->DfsOrdersUpToDate();
-  }
-
-  void Start(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph.get()->ComputeDFSOrders();
-  }
-};
-
-/**
- * @class BuildDomination
- * @brief Build the domination information of the MIR Graph
- */
-class BuildDomination : public PassME {
- public:
-  BuildDomination() : PassME("BuildDomination", kNoNodes) {
-  }
-
-  bool Gate(const PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    return !c_unit->mir_graph->DominationUpToDate();
-  }
-
-  void Start(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->ComputeDominators();
-  }
-
-  void End(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    // Verify the dataflow information after the pass.
-    if (c_unit->enable_debug & (1 << kDebugVerifyDataflow)) {
-      c_unit->mir_graph->VerifyDataflow();
-    }
-  }
-};
-
-/**
- * @class TopologicalSortOrders
- * @brief Compute the topological sort order of the MIR graph
- */
-class TopologicalSortOrders : public PassME {
- public:
-  TopologicalSortOrders() : PassME("TopologicalSortOrders", kNoNodes) {
-  }
-
-  bool Gate(const PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    return !c_unit->mir_graph->TopologicalOrderUpToDate();
-  }
-
-  void Start(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph.get()->ComputeTopologicalSortOrder();
-  }
-};
-
-/**
- * @class DefBlockMatrix
- * @brief Calculate the matrix of definition per basic block
- */
-class DefBlockMatrix : public PassMEMirSsaRep {
- public:
-  DefBlockMatrix() : PassMEMirSsaRep("DefBlockMatrix", kNoNodes) {
-  }
-
-  void Start(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph.get()->ComputeDefBlockMatrix();
-  }
-};
-
-/**
- * @class FindPhiNodeBlocksPass
- * @brief Pass to find out where we need to insert the phi nodes for the SSA conversion.
- */
-class FindPhiNodeBlocksPass : public PassMEMirSsaRep {
- public:
-  FindPhiNodeBlocksPass() : PassMEMirSsaRep("FindPhiNodeBlocks", kNoNodes) {
-  }
-
-  void Start(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph.get()->FindPhiNodeBlocks();
-  }
-};
-
-/**
- * @class SSAConversion
- * @brief Pass for SSA conversion of MIRs
- */
-class SSAConversion : public PassMEMirSsaRep {
- public:
-  SSAConversion() : PassMEMirSsaRep("SSAConversion", kNoNodes) {
-  }
-
-  void Start(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    MIRGraph *mir_graph = c_unit->mir_graph.get();
-    mir_graph->ClearAllVisitedFlags();
-    mir_graph->DoDFSPreOrderSSARename(mir_graph->GetEntryBlock());
-  }
-};
-
-/**
- * @class PhiNodeOperands
- * @brief Pass to insert the Phi node operands to basic blocks
- */
-class PhiNodeOperands : public PassMEMirSsaRep {
- public:
-  PhiNodeOperands() : PassMEMirSsaRep("PhiNodeOperands", kPreOrderDFSTraversal) {
-  }
-
-  bool Worker(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    BasicBlock* bb = down_cast<PassMEDataHolder*>(data)->bb;
-    DCHECK(bb != nullptr);
-    c_unit->mir_graph->InsertPhiNodeOperands(bb);
-    // No need of repeating, so just return false.
-    return false;
-  }
-};
-
-/**
- * @class InitRegLocations
- * @brief Initialize Register Locations.
- */
-class PerformInitRegLocations : public PassMEMirSsaRep {
- public:
-  PerformInitRegLocations() : PassMEMirSsaRep("PerformInitRegLocation", kNoNodes) {
-  }
-
-  void Start(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->InitRegLocations();
-  }
-};
-
-/**
- * @class TypeInferencePass
- * @brief Type inference pass.
- */
-class TypeInferencePass : public PassMEMirSsaRep {
- public:
-  TypeInferencePass() : PassMEMirSsaRep("TypeInference", kRepeatingPreOrderDFSTraversal) {
-  }
-
-  void Start(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->InferTypesStart();
-  }
-
-  bool Worker(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
-    CompilationUnit* c_unit = pass_me_data_holder->c_unit;
-    DCHECK(c_unit != nullptr);
-    BasicBlock* bb = pass_me_data_holder->bb;
-    DCHECK(bb != nullptr);
-    return c_unit->mir_graph->InferTypes(bb);
-  }
-
-  void End(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph.get()->InferTypesEnd();
-  }
-};
-
-/**
- * @class FinishSSATransformation
- * @brief There is some data that needs to be freed after performing the post optimization passes.
- */
-class FinishSSATransformation : public PassMEMirSsaRep {
- public:
-  FinishSSATransformation() : PassMEMirSsaRep("FinishSSATransformation", kNoNodes) {
-  }
-
-  void End(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph.get()->SSATransformationEnd();
-  }
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_POST_OPT_PASSES_H_
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
deleted file mode 100644
index 9717459..0000000
--- a/compiler/dex/quick/arm/arm_lir.h
+++ /dev/null
@@ -1,605 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_ARM_ARM_LIR_H_
-#define ART_COMPILER_DEX_QUICK_ARM_ARM_LIR_H_
-
-#include "dex/compiler_enums.h"
-#include "dex/reg_location.h"
-#include "dex/reg_storage.h"
-
-namespace art {
-
-/*
- * Runtime register usage conventions.
- *
- * r0-r3: Argument registers in both Dalvik and C/C++ conventions.
- *        However, for Dalvik->Dalvik calls we'll pass the target's Method*
- *        pointer in r0 as a hidden arg0. Otherwise used as codegen scratch
- *        registers.
- * r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit
- * r4   : If ARM_R4_SUSPEND_FLAG is set then reserved as a suspend check/debugger
- *        assist flag, otherwise a callee save promotion target.
- * r5   : Callee save (promotion target)
- * r6   : Callee save (promotion target)
- * r7   : Callee save (promotion target)
- * r8   : Callee save (promotion target)
- * r9   : (rARM_SELF) is reserved (pointer to thread-local storage)
- * r10  : Callee save (promotion target)
- * r11  : Callee save (promotion target)
- * r12  : Scratch, may be trashed by linkage stubs
- * r13  : (sp) is reserved
- * r14  : (lr) is reserved
- * r15  : (pc) is reserved
- *
- * 5 core temps that codegen can use (r0, r1, r2, r3, r12)
- * 7 core registers that can be used for promotion
- *
- * Floating pointer registers
- * s0-s31
- * d0-d15, where d0={s0,s1}, d1={s2,s3}, ... , d15={s30,s31}
- *
- * s16-s31 (d8-d15) preserved across C calls
- * s0-s15 (d0-d7) trashed across C calls
- *
- * s0-s15/d0-d7 used as codegen temp/scratch
- * s16-s31/d8-d31 can be used for promotion.
- *
- * Calling convention
- *     o On a call to a Dalvik method, pass target's Method* in r0
- *     o r1-r3 will be used for up to the first 3 words of arguments
- *     o Arguments past the first 3 words will be placed in appropriate
- *       out slots by the caller.
- *     o If a 64-bit argument would span the register/memory argument
- *       boundary, it will instead be fully passed in the frame.
- *     o Maintain a 16-byte stack alignment
- *
- *  Stack frame diagram (stack grows down, higher addresses at top):
- *
- * +------------------------+
- * | IN[ins-1]              |  {Note: resides in caller's frame}
- * |       .                |
- * | IN[0]                  |
- * | caller's Method*       |
- * +========================+  {Note: start of callee's frame}
- * | spill region           |  {variable sized - will include lr if non-leaf.}
- * +------------------------+
- * | ...filler word...      |  {Note: used as 2nd word of V[locals-1] if long]
- * +------------------------+
- * | V[locals-1]            |
- * | V[locals-2]            |
- * |      .                 |
- * |      .                 |
- * | V[1]                   |
- * | V[0]                   |
- * +------------------------+
- * |  0 to 3 words padding  |
- * +------------------------+
- * | OUT[outs-1]            |
- * | OUT[outs-2]            |
- * |       .                |
- * | OUT[0]                 |
- * | cur_method*            | <<== sp w/ 16-byte alignment
- * +========================+
- */
-
-// First FP callee save.
-#define ARM_FP_CALLEE_SAVE_BASE 16
-// Flag for using R4 to do suspend check
-// #define ARM_R4_SUSPEND_FLAG
-
-enum ArmResourceEncodingPos {
-  kArmGPReg0   = 0,
-  kArmRegSP    = 13,
-  kArmRegLR    = 14,
-  kArmRegPC    = 15,
-  kArmFPReg0   = 16,
-  kArmFPReg16  = 32,
-  kArmRegEnd   = 48,
-};
-
-enum ArmNativeRegisterPool {  // private marker to avoid generate-operator-out.py from processing.
-  r0           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  0,
-  r1           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  1,
-  r2           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  2,
-  r3           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  3,
-#ifdef ARM_R4_SUSPEND_FLAG
-  rARM_SUSPEND = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  4,
-#else
-  r4           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  4,
-#endif
-  r5           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  5,
-  r6           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  6,
-  r7           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  7,
-  r8           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  8,
-  rARM_SELF    = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  9,
-  r10          = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 10,
-  r11          = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 11,
-  r12          = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 12,
-  r13sp        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 13,
-  rARM_SP      = r13sp,
-  r14lr        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 14,
-  rARM_LR      = r14lr,
-  r15pc        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 15,
-  rARM_PC      = r15pc,
-
-  fr0          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  0,
-  fr1          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  1,
-  fr2          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  2,
-  fr3          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  3,
-  fr4          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  4,
-  fr5          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  5,
-  fr6          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  6,
-  fr7          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  7,
-  fr8          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  8,
-  fr9          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  9,
-  fr10         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10,
-  fr11         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 11,
-  fr12         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 12,
-  fr13         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13,
-  fr14         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14,
-  fr15         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15,
-  fr16         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 16,
-  fr17         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 17,
-  fr18         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 18,
-  fr19         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 19,
-  fr20         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 20,
-  fr21         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 21,
-  fr22         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 22,
-  fr23         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 23,
-  fr24         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 24,
-  fr25         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 25,
-  fr26         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 26,
-  fr27         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 27,
-  fr28         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 28,
-  fr29         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 29,
-  fr30         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 30,
-  fr31         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 31,
-
-  dr0          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  0,
-  dr1          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  1,
-  dr2          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
-  dr3          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  3,
-  dr4          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
-  dr5          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  5,
-  dr6          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
-  dr7          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  7,
-  dr8          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
-  dr9          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  9,
-  dr10         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
-  dr11         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11,
-  dr12         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
-  dr13         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
-  dr14         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
-  dr15         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
-#if 0
-  // Enable when def/use and runtime able to handle these.
-  dr16         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 16,
-  dr17         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 17,
-  dr18         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 18,
-  dr19         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 19,
-  dr20         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 20,
-  dr21         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 21,
-  dr22         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 22,
-  dr23         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 23,
-  dr24         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 24,
-  dr25         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 25,
-  dr26         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 26,
-  dr27         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 27,
-  dr28         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 28,
-  dr29         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 29,
-  dr30         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 30,
-  dr31         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 31,
-#endif
-};
-
-constexpr RegStorage rs_r0(RegStorage::kValid | r0);
-constexpr RegStorage rs_r1(RegStorage::kValid | r1);
-constexpr RegStorage rs_r2(RegStorage::kValid | r2);
-constexpr RegStorage rs_r3(RegStorage::kValid | r3);
-#ifdef ARM_R4_SUSPEND_FLAG
-constexpr RegStorage rs_rARM_SUSPEND(RegStorage::kValid | rARM_SUSPEND);
-#else
-constexpr RegStorage rs_r4(RegStorage::kValid | r4);
-#endif
-constexpr RegStorage rs_r5(RegStorage::kValid | r5);
-constexpr RegStorage rs_r6(RegStorage::kValid | r6);
-constexpr RegStorage rs_r7(RegStorage::kValid | r7);
-constexpr RegStorage rs_r8(RegStorage::kValid | r8);
-constexpr RegStorage rs_rARM_SELF(RegStorage::kValid | rARM_SELF);
-constexpr RegStorage rs_r10(RegStorage::kValid | r10);
-constexpr RegStorage rs_r11(RegStorage::kValid | r11);
-constexpr RegStorage rs_r12(RegStorage::kValid | r12);
-constexpr RegStorage rs_r13sp(RegStorage::kValid | r13sp);
-constexpr RegStorage rs_rARM_SP(RegStorage::kValid | rARM_SP);
-constexpr RegStorage rs_r14lr(RegStorage::kValid | r14lr);
-constexpr RegStorage rs_rARM_LR(RegStorage::kValid | rARM_LR);
-constexpr RegStorage rs_r15pc(RegStorage::kValid | r15pc);
-constexpr RegStorage rs_rARM_PC(RegStorage::kValid | rARM_PC);
-constexpr RegStorage rs_invalid(RegStorage::kInvalid);
-
-constexpr RegStorage rs_fr0(RegStorage::kValid | fr0);
-constexpr RegStorage rs_fr1(RegStorage::kValid | fr1);
-constexpr RegStorage rs_fr2(RegStorage::kValid | fr2);
-constexpr RegStorage rs_fr3(RegStorage::kValid | fr3);
-constexpr RegStorage rs_fr4(RegStorage::kValid | fr4);
-constexpr RegStorage rs_fr5(RegStorage::kValid | fr5);
-constexpr RegStorage rs_fr6(RegStorage::kValid | fr6);
-constexpr RegStorage rs_fr7(RegStorage::kValid | fr7);
-constexpr RegStorage rs_fr8(RegStorage::kValid | fr8);
-constexpr RegStorage rs_fr9(RegStorage::kValid | fr9);
-constexpr RegStorage rs_fr10(RegStorage::kValid | fr10);
-constexpr RegStorage rs_fr11(RegStorage::kValid | fr11);
-constexpr RegStorage rs_fr12(RegStorage::kValid | fr12);
-constexpr RegStorage rs_fr13(RegStorage::kValid | fr13);
-constexpr RegStorage rs_fr14(RegStorage::kValid | fr14);
-constexpr RegStorage rs_fr15(RegStorage::kValid | fr15);
-constexpr RegStorage rs_fr16(RegStorage::kValid | fr16);
-constexpr RegStorage rs_fr17(RegStorage::kValid | fr17);
-constexpr RegStorage rs_fr18(RegStorage::kValid | fr18);
-constexpr RegStorage rs_fr19(RegStorage::kValid | fr19);
-constexpr RegStorage rs_fr20(RegStorage::kValid | fr20);
-constexpr RegStorage rs_fr21(RegStorage::kValid | fr21);
-constexpr RegStorage rs_fr22(RegStorage::kValid | fr22);
-constexpr RegStorage rs_fr23(RegStorage::kValid | fr23);
-constexpr RegStorage rs_fr24(RegStorage::kValid | fr24);
-constexpr RegStorage rs_fr25(RegStorage::kValid | fr25);
-constexpr RegStorage rs_fr26(RegStorage::kValid | fr26);
-constexpr RegStorage rs_fr27(RegStorage::kValid | fr27);
-constexpr RegStorage rs_fr28(RegStorage::kValid | fr28);
-constexpr RegStorage rs_fr29(RegStorage::kValid | fr29);
-constexpr RegStorage rs_fr30(RegStorage::kValid | fr30);
-constexpr RegStorage rs_fr31(RegStorage::kValid | fr31);
-
-constexpr RegStorage rs_dr0(RegStorage::kValid | dr0);
-constexpr RegStorage rs_dr1(RegStorage::kValid | dr1);
-constexpr RegStorage rs_dr2(RegStorage::kValid | dr2);
-constexpr RegStorage rs_dr3(RegStorage::kValid | dr3);
-constexpr RegStorage rs_dr4(RegStorage::kValid | dr4);
-constexpr RegStorage rs_dr5(RegStorage::kValid | dr5);
-constexpr RegStorage rs_dr6(RegStorage::kValid | dr6);
-constexpr RegStorage rs_dr7(RegStorage::kValid | dr7);
-constexpr RegStorage rs_dr8(RegStorage::kValid | dr8);
-constexpr RegStorage rs_dr9(RegStorage::kValid | dr9);
-constexpr RegStorage rs_dr10(RegStorage::kValid | dr10);
-constexpr RegStorage rs_dr11(RegStorage::kValid | dr11);
-constexpr RegStorage rs_dr12(RegStorage::kValid | dr12);
-constexpr RegStorage rs_dr13(RegStorage::kValid | dr13);
-constexpr RegStorage rs_dr14(RegStorage::kValid | dr14);
-constexpr RegStorage rs_dr15(RegStorage::kValid | dr15);
-#if 0
-constexpr RegStorage rs_dr16(RegStorage::kValid | dr16);
-constexpr RegStorage rs_dr17(RegStorage::kValid | dr17);
-constexpr RegStorage rs_dr18(RegStorage::kValid | dr18);
-constexpr RegStorage rs_dr19(RegStorage::kValid | dr19);
-constexpr RegStorage rs_dr20(RegStorage::kValid | dr20);
-constexpr RegStorage rs_dr21(RegStorage::kValid | dr21);
-constexpr RegStorage rs_dr22(RegStorage::kValid | dr22);
-constexpr RegStorage rs_dr23(RegStorage::kValid | dr23);
-constexpr RegStorage rs_dr24(RegStorage::kValid | dr24);
-constexpr RegStorage rs_dr25(RegStorage::kValid | dr25);
-constexpr RegStorage rs_dr26(RegStorage::kValid | dr26);
-constexpr RegStorage rs_dr27(RegStorage::kValid | dr27);
-constexpr RegStorage rs_dr28(RegStorage::kValid | dr28);
-constexpr RegStorage rs_dr29(RegStorage::kValid | dr29);
-constexpr RegStorage rs_dr30(RegStorage::kValid | dr30);
-constexpr RegStorage rs_dr31(RegStorage::kValid | dr31);
-#endif
-
-// RegisterLocation templates return values (r0, r0/r1, s0, or d0).
-// Note: The return locations are shared between quick code and quick helper. This follows quick
-// ABI. Quick helper assembly routine needs to handle the ABI differences.
-const RegLocation arm_loc_c_return =
-    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG};
-const RegLocation arm_loc_c_return_wide =
-    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
-     RegStorage::MakeRegPair(rs_r0, rs_r1), INVALID_SREG, INVALID_SREG};
-const RegLocation arm_loc_c_return_float = kArm32QuickCodeUseSoftFloat
-    ? arm_loc_c_return
-    : RegLocation({kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, rs_fr0, INVALID_SREG, INVALID_SREG});
-const RegLocation arm_loc_c_return_double = kArm32QuickCodeUseSoftFloat
-    ? arm_loc_c_return_wide
-    : RegLocation({kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, rs_dr0, INVALID_SREG, INVALID_SREG});
-
-enum ArmShiftEncodings {
-  kArmLsl = 0x0,
-  kArmLsr = 0x1,
-  kArmAsr = 0x2,
-  kArmRor = 0x3
-};
-
-/*
- * The following enum defines the list of supported Thumb instructions by the
- * assembler. Their corresponding EncodingMap positions will be defined in
- * Assemble.cc.
- */
-enum ArmOpcode {
-  kArmFirst = 0,
-  kArm16BitData = kArmFirst,  // DATA   [0] rd[15..0].
-  kThumbAdcRR,       // adc   [0100000101] rm[5..3] rd[2..0].
-  kThumbAddRRI3,     // add(1)  [0001110] imm_3[8..6] rn[5..3] rd[2..0].
-  kThumbAddRI8,      // add(2)  [00110] rd[10..8] imm_8[7..0].
-  kThumbAddRRR,      // add(3)  [0001100] rm[8..6] rn[5..3] rd[2..0].
-  kThumbAddRRLH,     // add(4)  [01000100] H12[01] rm[5..3] rd[2..0].
-  kThumbAddRRHL,     // add(4)  [01001000] H12[10] rm[5..3] rd[2..0].
-  kThumbAddRRHH,     // add(4)  [01001100] H12[11] rm[5..3] rd[2..0].
-  kThumbAddPcRel,    // add(5)  [10100] rd[10..8] imm_8[7..0].
-  kThumbAddSpRel,    // add(6)  [10101] rd[10..8] imm_8[7..0].
-  kThumbAddSpI7,     // add(7)  [101100000] imm_7[6..0].
-  kThumbAndRR,       // and   [0100000000] rm[5..3] rd[2..0].
-  kThumbAsrRRI5,     // asr(1)  [00010] imm_5[10..6] rm[5..3] rd[2..0].
-  kThumbAsrRR,       // asr(2)  [0100000100] rs[5..3] rd[2..0].
-  kThumbBCond,       // b(1)  [1101] cond[11..8] offset_8[7..0].
-  kThumbBUncond,     // b(2)  [11100] offset_11[10..0].
-  kThumbBicRR,       // bic   [0100001110] rm[5..3] rd[2..0].
-  kThumbBkpt,        // bkpt  [10111110] imm_8[7..0].
-  kThumbBlx1,        // blx(1)  [111] H[10] offset_11[10..0].
-  kThumbBlx2,        // blx(1)  [111] H[01] offset_11[10..0].
-  kThumbBl1,         // blx(1)  [111] H[10] offset_11[10..0].
-  kThumbBl2,         // blx(1)  [111] H[11] offset_11[10..0].
-  kThumbBlxR,        // blx(2)  [010001111] rm[6..3] [000].
-  kThumbBx,          // bx    [010001110] H2[6..6] rm[5..3] SBZ[000].
-  kThumbCmnRR,       // cmn   [0100001011] rm[5..3] rd[2..0].
-  kThumbCmpRI8,      // cmp(1)  [00101] rn[10..8] imm_8[7..0].
-  kThumbCmpRR,       // cmp(2)  [0100001010] rm[5..3] rd[2..0].
-  kThumbCmpLH,       // cmp(3)  [01000101] H12[01] rm[5..3] rd[2..0].
-  kThumbCmpHL,       // cmp(3)  [01000110] H12[10] rm[5..3] rd[2..0].
-  kThumbCmpHH,       // cmp(3)  [01000111] H12[11] rm[5..3] rd[2..0].
-  kThumbEorRR,       // eor   [0100000001] rm[5..3] rd[2..0].
-  kThumbLdmia,       // ldmia   [11001] rn[10..8] reglist [7..0].
-  kThumbLdrRRI5,     // ldr(1)  [01101] imm_5[10..6] rn[5..3] rd[2..0].
-  kThumbLdrRRR,      // ldr(2)  [0101100] rm[8..6] rn[5..3] rd[2..0].
-  kThumbLdrPcRel,    // ldr(3)  [01001] rd[10..8] imm_8[7..0].
-  kThumbLdrSpRel,    // ldr(4)  [10011] rd[10..8] imm_8[7..0].
-  kThumbLdrbRRI5,    // ldrb(1) [01111] imm_5[10..6] rn[5..3] rd[2..0].
-  kThumbLdrbRRR,     // ldrb(2) [0101110] rm[8..6] rn[5..3] rd[2..0].
-  kThumbLdrhRRI5,    // ldrh(1) [10001] imm_5[10..6] rn[5..3] rd[2..0].
-  kThumbLdrhRRR,     // ldrh(2) [0101101] rm[8..6] rn[5..3] rd[2..0].
-  kThumbLdrsbRRR,    // ldrsb   [0101011] rm[8..6] rn[5..3] rd[2..0].
-  kThumbLdrshRRR,    // ldrsh   [0101111] rm[8..6] rn[5..3] rd[2..0].
-  kThumbLslRRI5,     // lsl(1)  [00000] imm_5[10..6] rm[5..3] rd[2..0].
-  kThumbLslRR,       // lsl(2)  [0100000010] rs[5..3] rd[2..0].
-  kThumbLsrRRI5,     // lsr(1)  [00001] imm_5[10..6] rm[5..3] rd[2..0].
-  kThumbLsrRR,       // lsr(2)  [0100000011] rs[5..3] rd[2..0].
-  kThumbMovImm,      // mov(1)  [00100] rd[10..8] imm_8[7..0].
-  kThumbMovRR,       // mov(2)  [0001110000] rn[5..3] rd[2..0].
-  kThumbMovRR_H2H,   // mov(3)  [01000111] H12[11] rm[5..3] rd[2..0].
-  kThumbMovRR_H2L,   // mov(3)  [01000110] H12[01] rm[5..3] rd[2..0].
-  kThumbMovRR_L2H,   // mov(3)  [01000101] H12[10] rm[5..3] rd[2..0].
-  kThumbMul,         // mul   [0100001101] rm[5..3] rd[2..0].
-  kThumbMvn,         // mvn   [0100001111] rm[5..3] rd[2..0].
-  kThumbNeg,         // neg   [0100001001] rm[5..3] rd[2..0].
-  kThumbOrr,         // orr   [0100001100] rm[5..3] rd[2..0].
-  kThumbPop,         // pop   [1011110] r[8..8] rl[7..0].
-  kThumbPush,        // push  [1011010] r[8..8] rl[7..0].
-  kThumbRev,         // rev   [1011101000] rm[5..3] rd[2..0]
-  kThumbRevsh,       // revsh   [1011101011] rm[5..3] rd[2..0]
-  kThumbRorRR,       // ror   [0100000111] rs[5..3] rd[2..0].
-  kThumbSbc,         // sbc   [0100000110] rm[5..3] rd[2..0].
-  kThumbStmia,       // stmia   [11000] rn[10..8] reglist [7.. 0].
-  kThumbStrRRI5,     // str(1)  [01100] imm_5[10..6] rn[5..3] rd[2..0].
-  kThumbStrRRR,      // str(2)  [0101000] rm[8..6] rn[5..3] rd[2..0].
-  kThumbStrSpRel,    // str(3)  [10010] rd[10..8] imm_8[7..0].
-  kThumbStrbRRI5,    // strb(1) [01110] imm_5[10..6] rn[5..3] rd[2..0].
-  kThumbStrbRRR,     // strb(2) [0101010] rm[8..6] rn[5..3] rd[2..0].
-  kThumbStrhRRI5,    // strh(1) [10000] imm_5[10..6] rn[5..3] rd[2..0].
-  kThumbStrhRRR,     // strh(2) [0101001] rm[8..6] rn[5..3] rd[2..0].
-  kThumbSubRRI3,     // sub(1)  [0001111] imm_3[8..6] rn[5..3] rd[2..0]*/
-  kThumbSubRI8,      // sub(2)  [00111] rd[10..8] imm_8[7..0].
-  kThumbSubRRR,      // sub(3)  [0001101] rm[8..6] rn[5..3] rd[2..0].
-  kThumbSubSpI7,     // sub(4)  [101100001] imm_7[6..0].
-  kThumbSwi,         // swi   [11011111] imm_8[7..0].
-  kThumbTst,         // tst   [0100001000] rm[5..3] rn[2..0].
-  kThumb2Vldrs,      // vldr low  sx [111011011001] rn[19..16] rd[15-12] [1010] imm_8[7..0].
-  kThumb2Vldrd,      // vldr low  dx [111011011001] rn[19..16] rd[15-12] [1011] imm_8[7..0].
-  kThumb2Vmuls,      // vmul vd, vn, vm [111011100010] rn[19..16] rd[15-12] [10100000] rm[3..0].
-  kThumb2Vmuld,      // vmul vd, vn, vm [111011100010] rn[19..16] rd[15-12] [10110000] rm[3..0].
-  kThumb2Vstrs,      // vstr low  sx [111011011000] rn[19..16] rd[15-12] [1010] imm_8[7..0].
-  kThumb2Vstrd,      // vstr low  dx [111011011000] rn[19..16] rd[15-12] [1011] imm_8[7..0].
-  kThumb2Vsubs,      // vsub vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10100040] rm[3..0].
-  kThumb2Vsubd,      // vsub vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110040] rm[3..0].
-  kThumb2Vadds,      // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10100000] rm[3..0].
-  kThumb2Vaddd,      // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110000] rm[3..0].
-  kThumb2Vdivs,      // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10100000] rm[3..0].
-  kThumb2Vdivd,      // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10110000] rm[3..0].
-  kThumb2VmlaF64,    // vmla.F64 vd, vn, vm [111011100000] vn[19..16] vd[15..12] [10110000] vm[3..0].
-  kThumb2VcvtIF,     // vcvt.F32.S32 vd, vm [1110111010111000] vd[15..12] [10101100] vm[3..0].
-  kThumb2VcvtFI,     // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10101100] vm[3..0].
-  kThumb2VcvtDI,     // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10111100] vm[3..0].
-  kThumb2VcvtFd,     // vcvt.F64.F32 vd, vm [1110111010110111] vd[15..12] [10101100] vm[3..0].
-  kThumb2VcvtDF,     // vcvt.F32.F64 vd, vm [1110111010110111] vd[15..12] [10111100] vm[3..0].
-  kThumb2VcvtF64S32,  // vcvt.F64.S32 vd, vm [1110111010111000] vd[15..12] [10111100] vm[3..0].
-  kThumb2VcvtF64U32,  // vcvt.F64.U32 vd, vm [1110111010111000] vd[15..12] [10110100] vm[3..0].
-  kThumb2Vsqrts,     // vsqrt.f32 vd, vm [1110111010110001] vd[15..12] [10101100] vm[3..0].
-  kThumb2Vsqrtd,     // vsqrt.f64 vd, vm [1110111010110001] vd[15..12] [10111100] vm[3..0].
-  kThumb2MovI8M,     // mov(T2) rd, #<const> [11110] i [00001001111] imm3 rd[11..8] imm8.
-  kThumb2MovImm16,   // mov(T3) rd, #<const> [11110] i [0010100] imm4 [0] imm3 rd[11..8] imm8.
-  kThumb2StrRRI12,   // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
-  kThumb2LdrRRI12,   // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
-  kThumb2StrRRI8Predec,  // str(Imm,T4) rd,[rn,#-imm8] [111110000100] rn[19..16] rt[15..12] [1100] imm[7..0].
-  kThumb2LdrRRI8Predec,  // ldr(Imm,T4) rd,[rn,#-imm8] [111110000101] rn[19..16] rt[15..12] [1100] imm[7..0].
-  kThumb2Cbnz,       // cbnz rd,<label> [101110] i [1] imm5[7..3] rn[2..0].
-  kThumb2Cbz,        // cbn rd,<label> [101100] i [1] imm5[7..3] rn[2..0].
-  kThumb2AddRRI12,   // add rd, rn, #imm12 [11110] i [100000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2MovRR,      // mov rd, rm [11101010010011110000] rd[11..8] [0000] rm[3..0].
-  kThumb2Vmovs,      // vmov.f32 vd, vm [111011101] D [110000] vd[15..12] 101001] M [0] vm[3..0].
-  kThumb2Vmovd,      // vmov.f64 vd, vm [111011101] D [110000] vd[15..12] 101101] M [0] vm[3..0].
-  kThumb2Ldmia,      // ldmia  [111010001001] rn[19..16] mask[15..0].
-  kThumb2Stmia,      // stmia  [111010001000] rn[19..16] mask[15..0].
-  kThumb2AddRRR,     // add [111010110000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2SubRRR,     // sub [111010111010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2SbcRRR,     // sbc [111010110110] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2CmpRR,      // cmp [111010111011] rn[19..16] [0000] [1111] [0000] rm[3..0].
-  kThumb2SubRRI12,   // sub rd, rn, #imm12 [11110] i [101010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2MvnI8M,     // mov(T2) rd, #<const> [11110] i [00011011110] imm3 rd[11..8] imm8.
-  kThumb2Sel,        // sel rd, rn, rm [111110101010] rn[19-16] rd[11-8] rm[3-0].
-  kThumb2Ubfx,       // ubfx rd,rn,#lsb,#width [111100111100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0].
-  kThumb2Sbfx,       // ubfx rd,rn,#lsb,#width [111100110100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0].
-  kThumb2LdrRRR,     // ldr rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
-  kThumb2LdrhRRR,    // ldrh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
-  kThumb2LdrshRRR,   // ldrsh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
-  kThumb2LdrbRRR,    // ldrb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
-  kThumb2LdrsbRRR,   // ldrsb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
-  kThumb2StrRRR,     // str rt,[rn,rm,LSL #imm] [111110000100] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
-  kThumb2StrhRRR,    // str rt,[rn,rm,LSL #imm] [111110000010] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
-  kThumb2StrbRRR,    // str rt,[rn,rm,LSL #imm] [111110000000] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
-  kThumb2LdrhRRI12,  // ldrh rt,[rn,#imm12] [111110001011] rt[15..12] rn[19..16] imm12[11..0].
-  kThumb2LdrshRRI12,  // ldrsh rt,[rn,#imm12] [111110011011] rt[15..12] rn[19..16] imm12[11..0].
-  kThumb2LdrbRRI12,  // ldrb rt,[rn,#imm12] [111110001001] rt[15..12] rn[19..16] imm12[11..0].
-  kThumb2LdrsbRRI12,  // ldrsb rt,[rn,#imm12] [111110011001] rt[15..12] rn[19..16] imm12[11..0].
-  kThumb2StrhRRI12,  // strh rt,[rn,#imm12] [111110001010] rt[15..12] rn[19..16] imm12[11..0].
-  kThumb2StrbRRI12,  // strb rt,[rn,#imm12] [111110001000] rt[15..12] rn[19..16] imm12[11..0].
-  kThumb2Pop,        // pop   [1110100010111101] list[15-0]*/
-  kThumb2Push,       // push  [1110100100101101] list[15-0]*/
-  kThumb2CmpRI8M,    // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0].
-  kThumb2CmnRI8M,    // cmn rn, #<const> [11110] i [010001] rn[19-16] [0] imm3 [1111] imm8[7..0].
-  kThumb2AdcRRR,     // adc [111010110101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2AndRRR,     // and [111010100000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2BicRRR,     // bic [111010100010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2CmnRR,      // cmn [111010110001] rn[19..16] [0000] [1111] [0000] rm[3..0].
-  kThumb2EorRRR,     // eor [111010101000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2MulRRR,     // mul [111110110000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
-  kThumb2SdivRRR,    // sdiv [111110111001] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
-  kThumb2UdivRRR,    // udiv [111110111011] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
-  kThumb2MnvRR,      // mvn [11101010011011110] rd[11-8] [0000] rm[3..0].
-  kThumb2RsubRRI8M,  // rsb rd, rn, #<const> [11110] i [011101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2NegRR,      // actually rsub rd, rn, #0.
-  kThumb2OrrRRR,     // orr [111010100100] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2TstRR,      // tst [111010100001] rn[19..16] [0000] [1111] [0000] rm[3..0].
-  kThumb2LslRRR,     // lsl [111110100000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
-  kThumb2LsrRRR,     // lsr [111110100010] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
-  kThumb2AsrRRR,     // asr [111110100100] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
-  kThumb2RorRRR,     // ror [111110100110] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
-  kThumb2LslRRI5,    // lsl [11101010010011110] imm3[14..12] rd[11..8] imm2[7..6] [00] rm[3..0].
-  kThumb2LsrRRI5,    // lsr [11101010010011110] imm3[14..12] rd[11..8] imm2[7..6] [01] rm[3..0].
-  kThumb2AsrRRI5,    // asr [11101010010011110] imm3[14..12] rd[11..8] imm2[7..6] [10] rm[3..0].
-  kThumb2RorRRI5,    // ror [11101010010011110] imm3[14..12] rd[11..8] imm2[7..6] [11] rm[3..0].
-  kThumb2BicRRI8M,   // bic rd, rn, #<const> [11110] i [000010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2AndRRI8M,   // and rd, rn, #<const> [11110] i [000000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2OrrRRI8M,   // orr rd, rn, #<const> [11110] i [000100] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2OrnRRI8M,   // orn rd, rn, #<const> [11110] i [000110] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2EorRRI8M,   // eor rd, rn, #<const> [11110] i [001000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2AddRRI8M,   // add rd, rn, #<const> [11110] i [010001] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2AdcRRI8M,   // adc rd, rn, #<const> [11110] i [010101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2SubRRI8M,   // sub rd, rn, #<const> [11110] i [011011] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2SbcRRI8M,   // sub rd, rn, #<const> [11110] i [010111] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2RevRR,      // rev [111110101001] rm[19..16] [1111] rd[11..8] 1000 rm[3..0]
-  kThumb2RevshRR,    // rev [111110101001] rm[19..16] [1111] rd[11..8] 1011 rm[3..0]
-  kThumb2It,         // it [10111111] firstcond[7-4] mask[3-0].
-  kThumb2Fmstat,     // fmstat [11101110111100011111101000010000].
-  kThumb2Vcmpd,      // vcmp [111011101] D [11011] rd[15-12] [1011] E [1] M [0] rm[3-0].
-  kThumb2Vcmps,      // vcmp [111011101] D [11010] rd[15-12] [1011] E [1] M [0] rm[3-0].
-  kThumb2LdrPcRel12,  // ldr rd,[pc,#imm12] [1111100011011111] rt[15-12] imm12[11-0].
-  kThumb2BCond,      // b<c> [1110] S cond[25-22] imm6[21-16] [10] J1 [0] J2 imm11[10..0].
-  kThumb2Fmrs,       // vmov [111011100000] vn[19-16] rt[15-12] [1010] N [0010000].
-  kThumb2Fmsr,       // vmov [111011100001] vn[19-16] rt[15-12] [1010] N [0010000].
-  kThumb2Fmrrd,      // vmov [111011000100] rt2[19-16] rt[15-12] [101100] M [1] vm[3-0].
-  kThumb2Fmdrr,      // vmov [111011000101] rt2[19-16] rt[15-12] [101100] M [1] vm[3-0].
-  kThumb2Vabsd,      // vabs.f64 [111011101] D [110000] rd[15-12] [1011110] M [0] vm[3-0].
-  kThumb2Vabss,      // vabs.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0].
-  kThumb2Vnegd,      // vneg.f64 [111011101] D [110000] rd[15-12] [1011110] M [0] vm[3-0].
-  kThumb2Vnegs,      // vneg.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0].
-  kThumb2Vmovs_IMM8,  // vmov.f32 [111011101] D [11] imm4h[19-16] vd[15-12] [10100000] imm4l[3-0].
-  kThumb2Vmovd_IMM8,  // vmov.f64 [111011101] D [11] imm4h[19-16] vd[15-12] [10110000] imm4l[3-0].
-  kThumb2Mla,        // mla [111110110000] rn[19-16] ra[15-12] rd[11-8] [0000] rm[3-0].
-  kThumb2Mls,        // mls [111110110000] rn[19-16] ra[15-12] rd[11-8] [0001] rm[3-0].
-  kThumb2Umull,      // umull [111110111010] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0].
-  kThumb2Ldrex,      // ldrex [111010000101] rn[19-16] rt[15-12] [1111] imm8[7-0].
-  kThumb2Ldrexd,     // ldrexd [111010001101] rn[19-16] rt[15-12] rt2[11-8] [11111111].
-  kThumb2Strex,      // strex [111010000100] rn[19-16] rt[15-12] rd[11-8] imm8[7-0].
-  kThumb2Strexd,     // strexd [111010001100] rn[19-16] rt[15-12] rt2[11-8] [0111] Rd[3-0].
-  kThumb2Clrex,      // clrex [11110011101111111000111100101111].
-  kThumb2Bfi,        // bfi [111100110110] rn[19-16] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0].
-  kThumb2Bfc,        // bfc [11110011011011110] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0].
-  kThumb2Dmb,        // dmb [1111001110111111100011110101] option[3-0].
-  kThumb2LdrPcReln12,  // ldr rd,[pc,-#imm12] [1111100011011111] rt[15-12] imm12[11-0].
-  kThumb2Stm,        // stm <list> [111010010000] rn[19-16] 000 rl[12-0].
-  kThumbUndefined,   // undefined [11011110xxxxxxxx].
-  kThumb2VPopCS,     // vpop <list of callee save fp singles (s16+).
-  kThumb2VPushCS,    // vpush <list callee save fp singles (s16+).
-  kThumb2Vldms,      // vldms rd, <list>.
-  kThumb2Vstms,      // vstms rd, <list>.
-  kThumb2BUncond,    // b <label>.
-  kThumb2Bl,         // bl with linker fixup. [11110] S imm10 [11] J1 [1] J2 imm11.
-  kThumb2MovImm16H,  // similar to kThumb2MovImm16, but target high hw.
-  kThumb2AddPCR,     // Thumb2 2-operand add with hard-coded PC target.
-  kThumb2Adr,        // Special purpose encoding of ADR for switch tables.
-  kThumb2MovImm16LST,  // Special purpose version for switch table use.
-  kThumb2MovImm16HST,  // Special purpose version for switch table use.
-  kThumb2LdmiaWB,    // ldmia  [111010011001[ rn[19..16] mask[15..0].
-  kThumb2OrrRRRs,    // orrs [111010100101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2Push1,      // t3 encoding of push.
-  kThumb2Pop1,       // t3 encoding of pop.
-  kThumb2RsubRRR,    // rsb [111010111101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2Smull,      // smull [111110111000] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0].
-  kThumb2LdrdPcRel8,  // ldrd rt, rt2, pc +-/1024.
-  kThumb2LdrdI8,     // ldrd rt, rt2, [rn +-/1024].
-  kThumb2StrdI8,     // strd rt, rt2, [rn +-/1024].
-  kArmLast,
-};
-std::ostream& operator<<(std::ostream& os, const ArmOpcode& rhs);
-
-enum ArmOpDmbOptions {
-  kSY = 0xf,
-  kST = 0xe,
-  kISH = 0xb,
-  kISHST = 0xa,
-  kNSH = 0x7,
-  kNSHST = 0x6
-};
-
-// Instruction assembly field_loc kind.
-enum ArmEncodingKind {
-  kFmtUnused,      // Unused field and marks end of formats.
-  kFmtBitBlt,      // Bit string using end/start.
-  kFmtLdmRegList,  // Load multiple register list using [15,14,12..0].
-  kFmtStmRegList,  // Store multiple register list using [14,12..0].
-  kFmtDfp,         // Double FP reg.
-  kFmtSfp,         // Single FP reg.
-  kFmtModImm,      // Shifted 8-bit immed using [26,14..12,7..0].
-  kFmtImm16,       // Zero-extended immed using [26,19..16,14..12,7..0].
-  kFmtImm6,        // Encoded branch target using [9,7..3]0.
-  kFmtImm12,       // Zero-extended immediate using [26,14..12,7..0].
-  kFmtShift,       // Shift descriptor, [14..12,7..4].
-  kFmtLsb,         // least significant bit using [14..12][7..6].
-  kFmtBWidth,      // bit-field width, encoded as width-1.
-  kFmtShift5,      // Shift count, [14..12,7..6].
-  kFmtBrOffset,    // Signed extended [26,11,13,21-16,10-0]:0.
-  kFmtFPImm,       // Encoded floating point immediate.
-  kFmtOff24,       // 24-bit Thumb2 unconditional branch encoding.
-  kFmtSkip,        // Unused field, but continue to next.
-};
-std::ostream& operator<<(std::ostream& os, const ArmEncodingKind& rhs);
-
-// Struct used to define the snippet positions for each Thumb opcode.
-struct ArmEncodingMap {
-  uint32_t skeleton;
-  struct {
-    ArmEncodingKind kind;
-    int end;   // end for kFmtBitBlt, 1-bit slice end for FP regs.
-    int start;  // start for kFmtBitBlt, 4-bit slice end for FP regs.
-  } field_loc[4];
-  ArmOpcode opcode;
-  uint64_t flags;
-  const char* name;
-  const char* fmt;
-  int size;   // Note: size is in bytes.
-  FixupKind fixup;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_ARM_ARM_LIR_H_
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
deleted file mode 100644
index 5f911db..0000000
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ /dev/null
@@ -1,1687 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "codegen_arm.h"
-
-#include "arm_lir.h"
-#include "base/logging.h"
-#include "dex/compiler_ir.h"
-#include "dex/quick/mir_to_lir-inl.h"
-
-namespace art {
-
-/*
- * opcode: ArmOpcode enum
- * skeleton: pre-designated bit-pattern for this opcode
- * k0: key to applying ds/de
- * ds: dest start bit position
- * de: dest end bit position
- * k1: key to applying s1s/s1e
- * s1s: src1 start bit position
- * s1e: src1 end bit position
- * k2: key to applying s2s/s2e
- * s2s: src2 start bit position
- * s2e: src2 end bit position
- * operands: number of operands (for sanity check purposes)
- * name: mnemonic name
- * fmt: for pretty-printing
- */
-#define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \
-                     k3, k3s, k3e, flags, name, fmt, size, fixup) \
-        {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}, \
-                    {k3, k3s, k3e}}, opcode, flags, name, fmt, size, fixup}
-
-/* Instruction dump string format keys: !pf, where "!" is the start
- * of the key, "p" is which numeric operand to use and "f" is the
- * print format.
- *
- * [p]ositions:
- *     0 -> operands[0] (dest)
- *     1 -> operands[1] (src1)
- *     2 -> operands[2] (src2)
- *     3 -> operands[3] (extra)
- *
- * [f]ormats:
- *     h -> 4-digit hex
- *     d -> decimal
- *     E -> decimal*4
- *     F -> decimal*2
- *     c -> branch condition (beq, bne, etc.)
- *     t -> pc-relative target
- *     u -> 1st half of bl[x] target
- *     v -> 2nd half ob bl[x] target
- *     R -> register list
- *     s -> single precision floating point register
- *     S -> double precision floating point register
- *     m -> Thumb2 modified immediate
- *     n -> complimented Thumb2 modified immediate
- *     M -> Thumb2 16-bit zero-extended immediate
- *     b -> 4-digit binary
- *     B -> dmb option string (sy, st, ish, ishst, nsh, hshst)
- *     H -> operand shift
- *     C -> core register name
- *     P -> fp cs register list (base of s16)
- *     Q -> fp cs register list (base of s0)
- *
- *  [!] escape.  To insert "!", use "!!"
- */
-/* NOTE: must be kept in sync with enum ArmOpcode from LIR.h */
-const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = {
-    ENCODING_MAP(kArm16BitData,    0x0000,
-                 kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 2, kFixupNone),
-    ENCODING_MAP(kThumbAdcRR,        0x4140,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES | USES_CCODES,
-                 "adcs", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbAddRRI3,      0x1c00,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "adds", "!0C, !1C, #!2d", 2, kFixupNone),
-    ENCODING_MAP(kThumbAddRI8,       0x3000,
-                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES,
-                 "adds", "!0C, !0C, #!1d", 2, kFixupNone),
-    ENCODING_MAP(kThumbAddRRR,       0x1800,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "adds", "!0C, !1C, !2C", 2, kFixupNone),
-    ENCODING_MAP(kThumbAddRRLH,     0x4440,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
-                 "add", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbAddRRHL,     0x4480,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
-                 "add", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbAddRRHH,     0x44c0,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
-                 "add", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbAddPcRel,    0xa000,
-                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | NEEDS_FIXUP,
-                 "add", "!0C, pc, #!1E", 2, kFixupLoad),
-    ENCODING_MAP(kThumbAddSpRel,    0xa800,
-                 kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP,
-                 "add", "!0C, sp, #!2E", 2, kFixupNone),
-    ENCODING_MAP(kThumbAddSpI7,      0xb000,
-                 kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP,
-                 "add", "sp, #!0d*4", 2, kFixupNone),
-    ENCODING_MAP(kThumbAndRR,        0x4000,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "ands", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbAsrRRI5,      0x1000,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "asrs", "!0C, !1C, #!2d", 2, kFixupNone),
-    ENCODING_MAP(kThumbAsrRR,        0x4100,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "asrs", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbBCond,        0xd000,
-                 kFmtBitBlt, 7, 0, kFmtBitBlt, 11, 8, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES |
-                 NEEDS_FIXUP, "b!1c", "!0t", 2, kFixupCondBranch),
-    ENCODING_MAP(kThumbBUncond,      0xe000,
-                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP,
-                 "b", "!0t", 2, kFixupT1Branch),
-    ENCODING_MAP(kThumbBicRR,        0x4380,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "bics", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbBkpt,          0xbe00,
-                 kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
-                 "bkpt", "!0d", 2, kFixupNone),
-    ENCODING_MAP(kThumbBlx1,         0xf000,
-                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR |
-                 NEEDS_FIXUP, "blx_1", "!0u", 2, kFixupBlx1),
-    ENCODING_MAP(kThumbBlx2,         0xe800,
-                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR |
-                 NEEDS_FIXUP, "blx_2", "!0v", 2, kFixupLabel),
-    ENCODING_MAP(kThumbBl1,          0xf000,
-                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
-                 "bl_1", "!0u", 2, kFixupBl1),
-    ENCODING_MAP(kThumbBl2,          0xf800,
-                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
-                 "bl_2", "!0v", 2, kFixupLabel),
-    ENCODING_MAP(kThumbBlxR,         0x4780,
-                 kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR,
-                 "blx", "!0C", 2, kFixupNone),
-    ENCODING_MAP(kThumbBx,            0x4700,
-                 kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | IS_BRANCH,
-                 "bx", "!0C", 2, kFixupNone),
-    ENCODING_MAP(kThumbCmnRR,        0x42c0,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmn", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbCmpRI8,       0x2800,
-                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | SETS_CCODES,
-                 "cmp", "!0C, #!1d", 2, kFixupNone),
-    ENCODING_MAP(kThumbCmpRR,        0x4280,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbCmpLH,        0x4540,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbCmpHL,        0x4580,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbCmpHH,        0x45c0,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbEorRR,        0x4040,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "eors", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdmia,         0xc800,
-                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
-                 "ldmia", "!0C!!, <!1R>", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrRRI5,      0x6800,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF4,
-                 "ldr", "!0C, [!1C, #!2E]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrRRR,       0x5800,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldr", "!0C, [!1C, !2C]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrPcRel,    0x4800,
-                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC
-                 | IS_LOAD_OFF4 | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2, kFixupLoad),
-    ENCODING_MAP(kThumbLdrSpRel,    0x9800,
-                 kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP
-                 | IS_LOAD_OFF4, "ldr", "!0C, [sp, #!2E]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrbRRI5,     0x7800,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
-                 "ldrb", "!0C, [!1C, #2d]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrbRRR,      0x5c00,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrb", "!0C, [!1C, !2C]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrhRRI5,     0x8800,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF2,
-                 "ldrh", "!0C, [!1C, #!2F]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrhRRR,      0x5a00,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrh", "!0C, [!1C, !2C]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrsbRRR,     0x5600,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrsb", "!0C, [!1C, !2C]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrshRRR,     0x5e00,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrsh", "!0C, [!1C, !2C]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLslRRI5,      0x0000,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "lsls", "!0C, !1C, #!2d", 2, kFixupNone),
-    ENCODING_MAP(kThumbLslRR,        0x4080,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "lsls", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbLsrRRI5,      0x0800,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "lsrs", "!0C, !1C, #!2d", 2, kFixupNone),
-    ENCODING_MAP(kThumbLsrRR,        0x40c0,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "lsrs", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbMovImm,       0x2000,
-                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0 | SETS_CCODES,
-                 "movs", "!0C, #!1d", 2, kFixupNone),
-    ENCODING_MAP(kThumbMovRR,        0x1c00,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES |IS_MOVE,
-                 "movs", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbMovRR_H2H,    0x46c0,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
-                 "mov", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbMovRR_H2L,    0x4640,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
-                 "mov", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbMovRR_L2H,    0x4680,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
-                 "mov", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbMul,           0x4340,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "muls", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbMvn,           0x43c0,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "mvns", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbNeg,           0x4240,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "negs", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbOrr,           0x4300,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "orrs", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbPop,           0xbc00,
-                 kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
-                 | IS_LOAD, "pop", "<!0R>", 2, kFixupNone),
-    ENCODING_MAP(kThumbPush,          0xb400,
-                 kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
-                 | IS_STORE, "push", "<!0R>", 2, kFixupNone),
-    ENCODING_MAP(kThumbRev,           0xba00,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE1,
-                 "rev", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbRevsh,         0xbac0,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE1,
-                 "rev", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbRorRR,        0x41c0,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "rors", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbSbc,           0x4180,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | USES_CCODES | SETS_CCODES,
-                 "sbcs", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbStmia,         0xc000,
-                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_USE_LIST1 | IS_STORE,
-                 "stmia", "!0C!!, <!1R>", 2, kFixupNone),
-    ENCODING_MAP(kThumbStrRRI5,      0x6000,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF4,
-                 "str", "!0C, [!1C, #!2E]", 2, kFixupNone),
-    ENCODING_MAP(kThumbStrRRR,       0x5000,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
-                 "str", "!0C, [!1C, !2C]", 2, kFixupNone),
-    ENCODING_MAP(kThumbStrSpRel,    0x9000,
-                 kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_SP
-                 | IS_STORE_OFF4, "str", "!0C, [sp, #!2E]", 2, kFixupNone),
-    ENCODING_MAP(kThumbStrbRRI5,     0x7000,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
-                 "strb", "!0C, [!1C, #!2d]", 2, kFixupNone),
-    ENCODING_MAP(kThumbStrbRRR,      0x5400,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
-                 "strb", "!0C, [!1C, !2C]", 2, kFixupNone),
-    ENCODING_MAP(kThumbStrhRRI5,     0x8000,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF2,
-                 "strh", "!0C, [!1C, #!2F]", 2, kFixupNone),
-    ENCODING_MAP(kThumbStrhRRR,      0x5200,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
-                 "strh", "!0C, [!1C, !2C]", 2, kFixupNone),
-    ENCODING_MAP(kThumbSubRRI3,      0x1e00,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "subs", "!0C, !1C, #!2d", 2, kFixupNone),
-    ENCODING_MAP(kThumbSubRI8,       0x3800,
-                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES,
-                 "subs", "!0C, #!1d", 2, kFixupNone),
-    ENCODING_MAP(kThumbSubRRR,       0x1a00,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "subs", "!0C, !1C, !2C", 2, kFixupNone),
-    ENCODING_MAP(kThumbSubSpI7,      0xb080,
-                 kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP,
-                 "sub", "sp, #!0d*4", 2, kFixupNone),
-    ENCODING_MAP(kThumbSwi,           0xdf00,
-                 kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
-                 "swi", "!0d", 2, kFixupNone),
-    ENCODING_MAP(kThumbTst,           0x4200,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE01 | SETS_CCODES,
-                 "tst", "!0C, !1C", 2, kFixupNone),
-    /*
-     * Note: The encoding map entries for vldrd and vldrs include REG_DEF_LR, even though
-     * these instructions don't define lr.  The reason is that these instructions
-     * are used for loading values from the literal pool, and the displacement may be found
-     * to be insuffient at assembly time.  In that case, we need to materialize a new base
-     * register - and will use lr as the temp register.  This works because lr is used as
-     * a temp register in very limited situations, and never in conjunction with a floating
-     * point constant load.  However, it is possible that during instruction scheduling,
-     * another use of lr could be moved across a vldrd/vldrs.  By setting REG_DEF_LR, we
-     * prevent that from happening.  Note that we set REG_DEF_LR on all vldrd/vldrs - even those
-     * not used in a pc-relative case.  It is really only needed on the pc-relative loads, but
-     * the case we're handling is rare enough that it seemed not worth the trouble to distinguish.
-     */
-    ENCODING_MAP(kThumb2Vldrs,       0xed900a00,
-                 kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF4 |
-                 REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 4, kFixupVLoad),
-    ENCODING_MAP(kThumb2Vldrd,       0xed900b00,
-                 kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF4 |
-                 REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 4, kFixupVLoad),
-    ENCODING_MAP(kThumb2Vmuls,        0xee200a00,
-                 kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vmuls", "!0s, !1s, !2s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vmuld,        0xee200b00,
-                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vmuld", "!0S, !1S, !2S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vstrs,       0xed800a00,
-                 kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF4,
-                 "vstr", "!0s, [!1C, #!2E]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vstrd,       0xed800b00,
-                 kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF4,
-                 "vstr", "!0S, [!1C, #!2E]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vsubs,        0xee300a40,
-                 kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vsub", "!0s, !1s, !2s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vsubd,        0xee300b40,
-                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vsub", "!0S, !1S, !2S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vadds,        0xee300a00,
-                 kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vadd", "!0s, !1s, !2s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vaddd,        0xee300b00,
-                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vadd", "!0S, !1S, !2S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vdivs,        0xee800a00,
-                 kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vdivs", "!0s, !1s, !2s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vdivd,        0xee800b00,
-                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vdivd", "!0S, !1S, !2S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VmlaF64,     0xee000b00,
-                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE012,
-                 "vmla", "!0S, !1S, !2S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VcvtIF,       0xeeb80ac0,
-                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f32.s32", "!0s, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VcvtFI,       0xeebd0ac0,
-                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.s32.f32 ", "!0s, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VcvtDI,       0xeebd0bc0,
-                 kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.s32.f64 ", "!0s, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VcvtFd,       0xeeb70ac0,
-                 kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f64.f32 ", "!0S, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VcvtDF,       0xeeb70bc0,
-                 kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f32.f64 ", "!0s, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VcvtF64S32,   0xeeb80bc0,
-                 kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f64.s32 ", "!0S, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VcvtF64U32,   0xeeb80b40,
-                 kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f64.u32 ", "!0S, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vsqrts,       0xeeb10ac0,
-                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vsqrt.f32 ", "!0s, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vsqrtd,       0xeeb10bc0,
-                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vsqrt.f64 ", "!0S, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MovI8M, 0xf04f0000, /* no setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "mov", "!0C, #!1m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MovImm16,       0xf2400000,
-                 kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "mov", "!0C, #!1M", 4, kFixupNone),
-    ENCODING_MAP(kThumb2StrRRI12,       0xf8c00000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
-                 "str", "!0C, [!1C, #!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrRRI12,       0xf8d00000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
-                 "ldr", "!0C, [!1C, #!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2StrRRI8Predec,       0xf8400c00,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
-                 "str", "!0C, [!1C, #-!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrRRI8Predec,       0xf8500c00,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
-                 "ldr", "!0C, [!1C, #-!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Cbnz,       0xb900, /* Note: does not affect flags */
-                 kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH |
-                 NEEDS_FIXUP, "cbnz", "!0C,!1t", 2, kFixupCBxZ),
-    ENCODING_MAP(kThumb2Cbz,       0xb100, /* Note: does not affect flags */
-                 kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH |
-                 NEEDS_FIXUP, "cbz", "!0C,!1t", 2, kFixupCBxZ),
-    ENCODING_MAP(kThumb2AddRRI12,       0xf2000000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
-                 "add", "!0C,!1C,#!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MovRR,       0xea4f0000, /* no setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
-                 "mov", "!0C, !1C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vmovs,       0xeeb00a40,
-                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
-                 "vmov.f32 ", " !0s, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vmovd,       0xeeb00b40,
-                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
-                 "vmov.f64 ", " !0S, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Ldmia,         0xe8900000,
-                 kFmtBitBlt, 19, 16, kFmtLdmRegList, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
-                 "ldmia", "!0C!!, <!1R>", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Stmia,         0xe8800000,
-                 kFmtBitBlt, 19, 16, kFmtStmRegList, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE0 | REG_USE_LIST1 | IS_STORE,
-                 "stmia", "!0C!!, <!1R>", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AddRRR,  0xeb100000, /* setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1,
-                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "adds", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SubRRR,       0xebb00000, /* setflags enconding */
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1,
-                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "subs", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SbcRRR,       0xeb700000, /* setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1,
-                 IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES | SETS_CCODES,
-                 "sbcs", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2CmpRR,       0xebb00f00,
-                 kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SubRRI12,       0xf2a00000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
-                 "sub", "!0C,!1C,#!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MvnI8M,  0xf06f0000, /* no setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "mvn", "!0C, #!1n", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Sel,       0xfaa0f080,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE12 | USES_CCODES,
-                 "sel", "!0C, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Ubfx,       0xf3c00000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1,
-                 kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
-                 "ubfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Sbfx,       0xf3400000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1,
-                 kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
-                 "sbfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrRRR,    0xf8500000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
-                 "ldr", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrhRRR,    0xf8300000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
-                 "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrshRRR,    0xf9300000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
-                 "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrbRRR,    0xf8100000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
-                 "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrsbRRR,    0xf9100000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
-                 "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2StrRRR,    0xf8400000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE_OFF,
-                 "str", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2StrhRRR,    0xf8200000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE_OFF,
-                 "strh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2StrbRRR,    0xf8000000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE_OFF,
-                 "strb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrhRRI12,       0xf8b00000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
-                 "ldrh", "!0C, [!1C, #!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrshRRI12,       0xf9b00000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
-                 "ldrsh", "!0C, [!1C, #!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrbRRI12,       0xf8900000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
-                 "ldrb", "!0C, [!1C, #!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrsbRRI12,       0xf9900000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
-                 "ldrsb", "!0C, [!1C, #!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2StrhRRI12,       0xf8a00000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
-                 "strh", "!0C, [!1C, #!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2StrbRRI12,       0xf8800000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
-                 "strb", "!0C, [!1C, #!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Pop,           0xe8bd0000,
-                 kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
-                 | IS_LOAD, "pop", "<!0R>", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Push,          0xe92d0000,
-                 kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
-                 | IS_STORE, "push", "<!0R>", 4, kFixupNone),
-    ENCODING_MAP(kThumb2CmpRI8M, 0xf1b00f00,
-                 kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_USE0 | SETS_CCODES,
-                 "cmp", "!0C, #!1m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2CmnRI8M, 0xf1100f00,
-                 kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_USE0 | SETS_CCODES,
-                 "cmn", "!0C, #!1m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AdcRRR,  0xeb500000, /* setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1,
-                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES | USES_CCODES,
-                 "adcs", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AndRRR,  0xea000000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "and", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2BicRRR,  0xea200000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "bic", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2CmnRR,  0xeb000000,
-                 kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "cmn", "!0C, !1C, shift !2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2EorRRR,  0xea800000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "eor", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MulRRR,  0xfb00f000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "mul", "!0C, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SdivRRR,  0xfb90f0f0,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "sdiv", "!0C, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2UdivRRR,  0xfbb0f0f0,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "udiv", "!0C, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MnvRR,  0xea6f0000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "mvn", "!0C, !1C, shift !2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2RsubRRI8M,       0xf1d00000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "rsbs", "!0C,!1C,#!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2NegRR,       0xf1d00000, /* instance of rsub */
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "neg", "!0C,!1C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2OrrRRR,  0xea400000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "orr", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2TstRR,       0xea100f00,
-                 kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
-                 "tst", "!0C, !1C, shift !2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LslRRR,  0xfa00f000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "lsl", "!0C, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LsrRRR,  0xfa20f000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "lsr", "!0C, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AsrRRR,  0xfa40f000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "asr", "!0C, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2RorRRR,  0xfa60f000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "ror", "!0C, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LslRRI5,  0xea4f0000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "lsl", "!0C, !1C, #!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LsrRRI5,  0xea4f0010,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "lsr", "!0C, !1C, #!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AsrRRI5,  0xea4f0020,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "asr", "!0C, !1C, #!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2RorRRI5,  0xea4f0030,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "ror", "!0C, !1C, #!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2BicRRI8M,  0xf0200000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "bic", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AndRRI8M,  0xf0000000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "and", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2OrrRRI8M,  0xf0400000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "orr", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2OrnRRI8M,  0xf0600000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "orn", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2EorRRI8M,  0xf0800000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "eor", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AddRRI8M,  0xf1100000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "adds", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AdcRRI8M,  0xf1500000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
-                 "adcs", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SubRRI8M,  0xf1b00000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "subs", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SbcRRI8M,  0xf1700000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
-                 "sbcs", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2RevRR, 0xfa90f080,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE12,  // Binary, but rm is stored twice.
-                 "rev", "!0C, !1C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2RevshRR, 0xfa90f0b0,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE12,  // Binary, but rm is stored twice.
-                 "revsh", "!0C, !1C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2It,  0xbf00,
-                 kFmtBitBlt, 7, 4, kFmtBitBlt, 3, 0, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_IT | USES_CCODES,
-                 "it:!1b", "!0c", 2, kFixupNone),
-    ENCODING_MAP(kThumb2Fmstat,  0xeef1fa10,
-                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, NO_OPERAND | SETS_CCODES | USES_CCODES,
-                 "fmstat", "", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vcmpd,        0xeeb40b40,
-                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "vcmp.f64", "!0S, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vcmps,        0xeeb40a40,
-                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "vcmp.f32", "!0s, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrPcRel12,       0xf8df0000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD_OFF | NEEDS_FIXUP,
-                 "ldr", "!0C, [r15pc, #!1d]", 4, kFixupLoad),
-    ENCODING_MAP(kThumb2BCond,        0xf0008000,
-                 kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | IS_BRANCH | USES_CCODES | NEEDS_FIXUP,
-                 "b!1c", "!0t", 4, kFixupCondBranch),
-    ENCODING_MAP(kThumb2Fmrs,       0xee100a10,
-                 kFmtBitBlt, 15, 12, kFmtSfp, 7, 16, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fmrs", "!0C, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Fmsr,       0xee000a10,
-                 kFmtSfp, 7, 16, kFmtBitBlt, 15, 12, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fmsr", "!0s, !1C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Fmrrd,       0xec500b10,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtDfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2,
-                 "fmrrd", "!0C, !1C, !2S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Fmdrr,       0xec400b10,
-                 kFmtDfp, 5, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "fmdrr", "!0S, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vabsd,       0xeeb00bc0,
-                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vabs.f64", "!0S, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vabss,       0xeeb00ac0,
-                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vabs.f32", "!0s, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vnegd,       0xeeb10b40,
-                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vneg.f64", "!0S, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vnegs,       0xeeb10a40,
-                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vneg.f32", "!0s, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vmovs_IMM8,       0xeeb00a00,
-                 kFmtSfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "vmov.f32", "!0s, #0x!1h", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vmovd_IMM8,       0xeeb00b00,
-                 kFmtDfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "vmov.f64", "!0S, #0x!1h", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Mla,  0xfb000000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE123,
-                 "mla", "!0C, !1C, !2C, !3C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Mls,  0xfb000010,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE123,
-                 "mls", "!0C, !1C, !2C, !3C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Umull,  0xfba00000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
-                 kFmtBitBlt, 3, 0,
-                 IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
-                 "umull", "!0C, !1C, !2C, !3C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Ldrex,       0xe8500f00,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOADX,
-                 "ldrex", "!0C, [!1C, #!2E]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Ldrexd,      0xe8d0007f,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2 | IS_LOADX,
-                 "ldrexd", "!0C, !1C, [!2C]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Strex,       0xe8400000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16,
-                 kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STOREX,
-                 "strex", "!0C, !1C, [!2C, #!2E]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Strexd,      0xe8c00070,
-                 kFmtBitBlt, 3, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8,
-                 kFmtBitBlt, 19, 16, IS_QUAD_OP | REG_DEF0_USE123 | IS_STOREX,
-                 "strexd", "!0C, !1C, !2C, [!3C]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Clrex,       0xf3bf8f2f,
-                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, NO_OPERAND,
-                 "clrex", "", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Bfi,         0xf3600000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtShift5, -1, -1,
-                 kFmtBitBlt, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
-                 "bfi", "!0C,!1C,#!2d,#!3d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Bfc,         0xf36f0000,
-                 kFmtBitBlt, 11, 8, kFmtShift5, -1, -1, kFmtBitBlt, 4, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
-                 "bfc", "!0C,#!1d,#!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Dmb,         0xf3bf8f50,
-                 kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_VOLATILE,
-                 "dmb", "#!0B", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrPcReln12,       0xf85f0000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD_OFF,
-                 "ldr", "!0C, [r15pc, -#!1d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Stm,          0xe9000000,
-                 kFmtBitBlt, 19, 16, kFmtStmRegList, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_USE0 | REG_USE_LIST1 | IS_STORE,
-                 "stm", "!0C, <!1R>", 4, kFixupNone),
-    ENCODING_MAP(kThumbUndefined,       0xde00,
-                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, NO_OPERAND,
-                 "undefined", "", 2, kFixupNone),
-    // NOTE: vpop, vpush hard-encoded for s16+ reg list
-    ENCODING_MAP(kThumb2VPopCS,       0xecbd8a00,
-                 kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_FPCS_LIST0
-                 | IS_LOAD, "vpop", "<!0P>", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VPushCS,      0xed2d8a00,
-                 kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_FPCS_LIST0
-                 | IS_STORE, "vpush", "<!0P>", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vldms,        0xec900a00,
-                 kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_USE0 | REG_DEF_FPCS_LIST2
-                 | IS_LOAD, "vldms", "!0C, <!2Q>", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vstms,        0xec800a00,
-                 kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_USE0 | REG_USE_FPCS_LIST2
-                 | IS_STORE, "vstms", "!0C, <!2Q>", 4, kFixupNone),
-    ENCODING_MAP(kThumb2BUncond,      0xf0009000,
-                 kFmtOff24, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH,
-                 "b", "!0t", 4, kFixupT2Branch),
-    ENCODING_MAP(kThumb2Bl,           0xf000d000,
-                 kFmtOff24, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
-                 "bl", "!0T", 4, kFixupLabel),
-    ENCODING_MAP(kThumb2MovImm16H,       0xf2c00000,
-                 kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0,
-                 "movt", "!0C, #!1M", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AddPCR,      0x4487,
-                 kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP,
-                 "add", "rPC, !0C", 2, kFixupLabel),
-    ENCODING_MAP(kThumb2Adr,         0xf20f0000,
-                 kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 /* Note: doesn't affect flags */
-                 IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP,
-                 "adr", "!0C,#!1d", 4, kFixupAdr),
-    ENCODING_MAP(kThumb2MovImm16LST,     0xf2400000,
-                 kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP,
-                 "mov", "!0C, #!1M", 4, kFixupMovImmLST),
-    ENCODING_MAP(kThumb2MovImm16HST,     0xf2c00000,
-                 kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | NEEDS_FIXUP,
-                 "movt", "!0C, #!1M", 4, kFixupMovImmHST),
-    ENCODING_MAP(kThumb2LdmiaWB,         0xe8b00000,
-                 kFmtBitBlt, 19, 16, kFmtLdmRegList, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
-                 "ldmia", "!0C!!, <!1R>", 4, kFixupNone),
-    ENCODING_MAP(kThumb2OrrRRRs,  0xea500000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "orrs", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Push1,    0xf84d0d04,
-                 kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE0
-                 | IS_STORE, "push1", "!0C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Pop1,    0xf85d0b04,
-                 kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF0
-                 | IS_LOAD, "pop1", "!0C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2RsubRRR,  0xebd00000, /* setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1,
-                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "rsbs", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Smull,  0xfb800000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
-                 kFmtBitBlt, 3, 0,
-                 IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
-                 "smull", "!0C, !1C, !2C, !3C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrdPcRel8,  0xe9df0000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD_OFF4 | NEEDS_FIXUP,
-                 "ldrd", "!0C, !1C, [pc, #!2E]", 4, kFixupLoad),
-    ENCODING_MAP(kThumb2LdrdI8, 0xe9d00000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
-                 kFmtBitBlt, 7, 0,
-                 IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD_OFF4,
-                 "ldrd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2StrdI8, 0xe9c00000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
-                 kFmtBitBlt, 7, 0,
-                 IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE_OFF4,
-                 "strd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
-};
-
-// new_lir replaces orig_lir in the pcrel_fixup list.
-void ArmMir2Lir::ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) {
-  new_lir->u.a.pcrel_next = orig_lir->u.a.pcrel_next;
-  if (UNLIKELY(prev_lir == nullptr)) {
-    first_fixup_ = new_lir;
-  } else {
-    prev_lir->u.a.pcrel_next = new_lir;
-  }
-  orig_lir->flags.fixup = kFixupNone;
-}
-
-// new_lir is inserted before orig_lir in the pcrel_fixup list.
-void ArmMir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) {
-  new_lir->u.a.pcrel_next = orig_lir;
-  if (UNLIKELY(prev_lir == nullptr)) {
-    first_fixup_ = new_lir;
-  } else {
-    DCHECK(prev_lir->u.a.pcrel_next == orig_lir);
-    prev_lir->u.a.pcrel_next = new_lir;
-  }
-}
-
-/*
- * The fake NOP of moving r0 to r0 actually will incur data stalls if r0 is
- * not ready. Since r5FP is not updated often, it is less likely to
- * generate unnecessary stall cycles.
- * TUNING: No longer true - find new NOP pattern.
- */
-#define PADDING_MOV_R5_R5               0x1C2D
-
-uint8_t* ArmMir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) {
-  uint8_t* const write_buffer = write_pos;
-  for (; lir != nullptr; lir = NEXT_LIR(lir)) {
-    lir->offset = (write_pos - write_buffer);
-    if (!lir->flags.is_nop) {
-      int opcode = lir->opcode;
-      if (IsPseudoLirOp(opcode)) {
-        if (UNLIKELY(opcode == kPseudoPseudoAlign4)) {
-          // Note: size for this opcode will be either 0 or 2 depending on final alignment.
-          if (lir->offset & 0x2) {
-            write_pos[0] = (PADDING_MOV_R5_R5 & 0xff);
-            write_pos[1] = ((PADDING_MOV_R5_R5 >> 8) & 0xff);
-            write_pos += 2;
-          }
-        }
-      } else if (LIKELY(!lir->flags.is_nop)) {
-        const ArmEncodingMap *encoder = &EncodingMap[lir->opcode];
-        uint32_t bits = encoder->skeleton;
-        for (int i = 0; i < 4; i++) {
-          uint32_t operand;
-          uint32_t value;
-          operand = lir->operands[i];
-          ArmEncodingKind kind = encoder->field_loc[i].kind;
-          if (LIKELY(kind == kFmtBitBlt)) {
-            value = (operand << encoder->field_loc[i].start) &
-                ((1 << (encoder->field_loc[i].end + 1)) - 1);
-            bits |= value;
-          } else {
-            switch (encoder->field_loc[i].kind) {
-              case kFmtLdmRegList:
-                value = (operand << encoder->field_loc[i].start) &
-                    ((1 << (encoder->field_loc[i].end + 1)) - 1);
-                bits |= value;
-                DCHECK_EQ((bits & (1 << 13)), 0u);
-                break;
-              case kFmtStmRegList:
-                value = (operand << encoder->field_loc[i].start) &
-                    ((1 << (encoder->field_loc[i].end + 1)) - 1);
-                bits |= value;
-                DCHECK_EQ((bits & (1 << 13)), 0u);
-                DCHECK_EQ((bits & (1 << 15)), 0u);
-                break;
-              case kFmtSkip:
-                break;  // Nothing to do, but continue to next.
-              case kFmtUnused:
-                i = 4;  // Done, break out of the enclosing loop.
-                break;
-              case kFmtFPImm:
-                value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end;
-                value |= (operand & 0x0F) << encoder->field_loc[i].start;
-                bits |= value;
-                break;
-              case kFmtBrOffset:
-                value = ((operand  & 0x80000) >> 19) << 26;
-                value |= ((operand & 0x40000) >> 18) << 11;
-                value |= ((operand & 0x20000) >> 17) << 13;
-                value |= ((operand & 0x1f800) >> 11) << 16;
-                value |= (operand  & 0x007ff);
-                bits |= value;
-                break;
-              case kFmtShift5:
-                value = ((operand & 0x1c) >> 2) << 12;
-                value |= (operand & 0x03) << 6;
-                bits |= value;
-                break;
-              case kFmtShift:
-                value = ((operand & 0x70) >> 4) << 12;
-                value |= (operand & 0x0f) << 4;
-                bits |= value;
-                break;
-              case kFmtBWidth:
-                value = operand - 1;
-                bits |= value;
-                break;
-              case kFmtLsb:
-                value = ((operand & 0x1c) >> 2) << 12;
-                value |= (operand & 0x03) << 6;
-                bits |= value;
-                break;
-              case kFmtImm6:
-                value = ((operand & 0x20) >> 5) << 9;
-                value |= (operand & 0x1f) << 3;
-                bits |= value;
-                break;
-              case kFmtDfp: {
-                DCHECK(RegStorage::IsDouble(operand)) << ", Operand = 0x" << std::hex << operand;
-                uint32_t reg_num = RegStorage::RegNum(operand);
-                /* Snag the 1-bit slice and position it */
-                value = ((reg_num & 0x10) >> 4) << encoder->field_loc[i].end;
-                /* Extract and position the 4-bit slice */
-                value |= (reg_num & 0x0f) << encoder->field_loc[i].start;
-                bits |= value;
-                break;
-              }
-              case kFmtSfp: {
-                DCHECK(RegStorage::IsSingle(operand)) << ", Operand = 0x" << std::hex << operand;
-                uint32_t reg_num = RegStorage::RegNum(operand);
-                /* Snag the 1-bit slice and position it */
-                value = (reg_num & 0x1) << encoder->field_loc[i].end;
-                /* Extract and position the 4-bit slice */
-                value |= ((reg_num & 0x1e) >> 1) << encoder->field_loc[i].start;
-                bits |= value;
-                break;
-              }
-              case kFmtImm12:
-              case kFmtModImm:
-                value = ((operand & 0x800) >> 11) << 26;
-                value |= ((operand & 0x700) >> 8) << 12;
-                value |= operand & 0x0ff;
-                bits |= value;
-                break;
-              case kFmtImm16:
-                value = ((operand & 0x0800) >> 11) << 26;
-                value |= ((operand & 0xf000) >> 12) << 16;
-                value |= ((operand & 0x0700) >> 8) << 12;
-                value |= operand & 0x0ff;
-                bits |= value;
-                break;
-              case kFmtOff24: {
-                uint32_t signbit = (operand >> 31) & 0x1;
-                uint32_t i1 = (operand >> 22) & 0x1;
-                uint32_t i2 = (operand >> 21) & 0x1;
-                uint32_t imm10 = (operand >> 11) & 0x03ff;
-                uint32_t imm11 = operand & 0x07ff;
-                uint32_t j1 = (i1 ^ signbit) ? 0 : 1;
-                uint32_t j2 = (i2 ^ signbit) ? 0 : 1;
-                value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) |
-                    imm11;
-                bits |= value;
-                }
-                break;
-              default:
-                LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind;
-            }
-          }
-        }
-        if (encoder->size == 4) {
-          write_pos[0] = ((bits >> 16) & 0xff);
-          write_pos[1] = ((bits >> 24) & 0xff);
-          write_pos[2] = (bits & 0xff);
-          write_pos[3] = ((bits >> 8) & 0xff);
-          write_pos += 4;
-        } else {
-          DCHECK_EQ(encoder->size, 2);
-          write_pos[0] = (bits & 0xff);
-          write_pos[1] = ((bits >> 8) & 0xff);
-          write_pos += 2;
-        }
-      }
-    }
-  }
-  return write_pos;
-}
-
-// Assemble the LIR into binary instruction format.
-void ArmMir2Lir::AssembleLIR() {
-  LIR* lir;
-  LIR* prev_lir;
-  cu_->NewTimingSplit("Assemble");
-  int assembler_retries = 0;
-  CodeOffset starting_offset = LinkFixupInsns(first_lir_insn_, last_lir_insn_, 0);
-  data_offset_ = RoundUp(starting_offset, 4);
-  int32_t offset_adjustment;
-  AssignDataOffsets();
-
-  /*
-   * Note: generation must be 1 on first pass (to distinguish from initialized state of 0 for
-   * non-visited nodes).  Start at zero here, and bit will be flipped to 1 on entry to the loop.
-   */
-  int generation = 0;
-  while (true) {
-    offset_adjustment = 0;
-    AssemblerStatus res = kSuccess;  // Assume success
-    generation ^= 1;
-    // Note: nodes requring possible fixup linked in ascending order.
-    lir = first_fixup_;
-    prev_lir = nullptr;
-    while (lir != nullptr) {
-      /*
-       * NOTE: the lir being considered here will be encoded following the switch (so long as
-       * we're not in a retry situation).  However, any new non-pc_rel instructions inserted
-       * due to retry must be explicitly encoded at the time of insertion.  Note that
-       * inserted instructions don't need use/def flags, but do need size and pc-rel status
-       * properly updated.
-       */
-      lir->offset += offset_adjustment;
-      // During pass, allows us to tell whether a node has been updated with offset_adjustment yet.
-      lir->flags.generation = generation;
-      switch (static_cast<FixupKind>(lir->flags.fixup)) {
-        case kFixupLabel:
-        case kFixupNone:
-          break;
-        case kFixupVLoad:
-          if (lir->operands[1] != rs_r15pc.GetReg()) {
-            break;
-          }
-          FALLTHROUGH_INTENDED;
-        case kFixupLoad: {
-          /*
-           * PC-relative loads are mostly used to load immediates
-           * that are too large to materialize directly in one shot.
-           * However, if the load displacement exceeds the limit,
-           * we revert to a multiple-instruction materialization sequence.
-           */
-          LIR *lir_target = lir->target;
-          CodeOffset pc = (lir->offset + 4) & ~3;
-          CodeOffset target = lir_target->offset +
-              ((lir_target->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
-          int32_t delta = target - pc;
-          if (res != kSuccess) {
-            /*
-             * In this case, we're just estimating and will do it again for real.  Ensure offset
-             * is legal.
-             */
-            delta &= ~0x3;
-          }
-          DCHECK_ALIGNED(delta, 4);
-          // First, a sanity check for cases we shouldn't see now
-          if (kIsDebugBuild && (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) ||
-              ((lir->opcode == kThumbLdrPcRel) && (delta > 1020)))) {
-            // Shouldn't happen in current codegen.
-            LOG(FATAL) << "Unexpected pc-rel offset " << delta;
-          }
-          // Now, check for the difficult cases
-          if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
-              ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) ||
-              ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) ||
-              ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) {
-            /*
-             * Note: The reason vldrs/vldrd include rARM_LR in their use/def masks is that we
-             * sometimes have to use it to fix up out-of-range accesses.  This is where that
-             * happens.
-             */
-            int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) ||
-                            (lir->opcode == kThumb2LdrPcRel12)) ?  lir->operands[0] :
-                            rs_rARM_LR.GetReg();
-
-            // Add new Adr to generate the address.
-            LIR* new_adr = RawLIR(lir->dalvik_offset, kThumb2Adr,
-                       base_reg, 0, 0, 0, 0, lir->target);
-            new_adr->offset = lir->offset;
-            new_adr->flags.fixup = kFixupAdr;
-            new_adr->flags.size = EncodingMap[kThumb2Adr].size;
-            InsertLIRBefore(lir, new_adr);
-            lir->offset += new_adr->flags.size;
-            offset_adjustment += new_adr->flags.size;
-
-            // lir no longer pcrel, unlink and link in new_adr.
-            ReplaceFixup(prev_lir, lir, new_adr);
-
-            // Convert to normal load.
-            offset_adjustment -= lir->flags.size;
-            if (lir->opcode == kThumb2LdrPcRel12) {
-              lir->opcode = kThumb2LdrRRI12;
-            } else if (lir->opcode == kThumb2LdrdPcRel8) {
-              lir->opcode = kThumb2LdrdI8;
-            }
-            lir->flags.size = EncodingMap[lir->opcode].size;
-            offset_adjustment += lir->flags.size;
-            // Change the load to be relative to the new Adr base.
-            if (lir->opcode == kThumb2LdrdI8) {
-              lir->operands[3] = 0;
-              lir->operands[2] = base_reg;
-            } else {
-              lir->operands[2] = 0;
-              lir->operands[1] = base_reg;
-            }
-            prev_lir = new_adr;  // Continue scan with new_adr;
-            lir = new_adr->u.a.pcrel_next;
-            res = kRetryAll;
-            continue;
-          } else {
-            if ((lir->opcode == kThumb2Vldrs) ||
-                (lir->opcode == kThumb2Vldrd) ||
-                (lir->opcode == kThumb2LdrdPcRel8)) {
-              lir->operands[2] = delta >> 2;
-            } else {
-              lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ?  delta :
-                  delta >> 2;
-            }
-          }
-          break;
-        }
-        case kFixupCBxZ: {
-          LIR *target_lir = lir->target;
-          CodeOffset pc = lir->offset + 4;
-          CodeOffset target = target_lir->offset +
-              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
-          int32_t delta = target - pc;
-          if (delta > 126 || delta < 0) {
-            /*
-             * Convert to cmp rx,#0 / b[eq/ne] tgt pair
-             * Make new branch instruction and insert after
-             */
-            LIR* new_inst =
-              RawLIR(lir->dalvik_offset, kThumbBCond, 0,
-                     (lir->opcode == kThumb2Cbz) ? kArmCondEq : kArmCondNe,
-                     0, 0, 0, lir->target);
-            InsertLIRAfter(lir, new_inst);
-
-            /* Convert the cb[n]z to a cmp rx, #0 ] */
-            // Subtract the old size.
-            offset_adjustment -= lir->flags.size;
-            lir->opcode = kThumbCmpRI8;
-            /* operand[0] is src1 in both cb[n]z & CmpRI8 */
-            lir->operands[1] = 0;
-            lir->target = 0;
-            lir->flags.size = EncodingMap[lir->opcode].size;
-            // Add back the new size.
-            offset_adjustment += lir->flags.size;
-            // Set up the new following inst.
-            new_inst->offset = lir->offset + lir->flags.size;
-            new_inst->flags.fixup = kFixupCondBranch;
-            new_inst->flags.size = EncodingMap[new_inst->opcode].size;
-            offset_adjustment += new_inst->flags.size;
-
-            // lir no longer pcrel, unlink and link in new_inst.
-            ReplaceFixup(prev_lir, lir, new_inst);
-            prev_lir = new_inst;  // Continue with the new instruction.
-            lir = new_inst->u.a.pcrel_next;
-            res = kRetryAll;
-            continue;
-          } else {
-            lir->operands[1] = delta >> 1;
-          }
-          break;
-        }
-        case kFixupCondBranch: {
-          LIR *target_lir = lir->target;
-          int32_t delta = 0;
-          DCHECK(target_lir);
-          CodeOffset pc = lir->offset + 4;
-          CodeOffset target = target_lir->offset +
-              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
-          delta = target - pc;
-          if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) {
-            offset_adjustment -= lir->flags.size;
-            lir->opcode = kThumb2BCond;
-            lir->flags.size = EncodingMap[lir->opcode].size;
-            // Fixup kind remains the same.
-            offset_adjustment += lir->flags.size;
-            res = kRetryAll;
-          }
-          lir->operands[0] = delta >> 1;
-          break;
-        }
-        case kFixupT2Branch: {
-          LIR *target_lir = lir->target;
-          CodeOffset pc = lir->offset + 4;
-          CodeOffset target = target_lir->offset +
-              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
-          int32_t delta = target - pc;
-          lir->operands[0] = delta >> 1;
-          if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == 0) {
-            // Useless branch
-            offset_adjustment -= lir->flags.size;
-            lir->flags.is_nop = true;
-            // Don't unlink - just set to do-nothing.
-            lir->flags.fixup = kFixupNone;
-            res = kRetryAll;
-          }
-          break;
-        }
-        case kFixupT1Branch: {
-          LIR *target_lir = lir->target;
-          CodeOffset pc = lir->offset + 4;
-          CodeOffset target = target_lir->offset +
-              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
-          int32_t delta = target - pc;
-          if (delta > 2046 || delta < -2048) {
-            // Convert to Thumb2BCond w/ kArmCondAl
-            offset_adjustment -= lir->flags.size;
-            lir->opcode = kThumb2BUncond;
-            lir->operands[0] = 0;
-            lir->flags.size = EncodingMap[lir->opcode].size;
-            lir->flags.fixup = kFixupT2Branch;
-            offset_adjustment += lir->flags.size;
-            res = kRetryAll;
-          } else {
-            lir->operands[0] = delta >> 1;
-            if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == -1) {
-              // Useless branch
-              offset_adjustment -= lir->flags.size;
-              lir->flags.is_nop = true;
-              // Don't unlink - just set to do-nothing.
-              lir->flags.fixup = kFixupNone;
-              res = kRetryAll;
-            }
-          }
-          break;
-        }
-        case kFixupBlx1: {
-          DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2);
-          /* cur_pc is Thumb */
-          CodeOffset cur_pc = (lir->offset + 4) & ~3;
-          CodeOffset target = lir->operands[1];
-
-          /* Match bit[1] in target with base */
-          if (cur_pc & 0x2) {
-            target |= 0x2;
-          }
-          int32_t delta = target - cur_pc;
-          DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
-
-          lir->operands[0] = (delta >> 12) & 0x7ff;
-          NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
-          break;
-        }
-        case kFixupBl1: {
-          DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2);
-          /* Both cur_pc and target are Thumb */
-          CodeOffset cur_pc = lir->offset + 4;
-          CodeOffset target = lir->operands[1];
-
-          int32_t delta = target - cur_pc;
-          DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
-
-          lir->operands[0] = (delta >> 12) & 0x7ff;
-          NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
-          break;
-        }
-        case kFixupAdr: {
-          const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[2]);
-          LIR* target = lir->target;
-          int32_t target_disp = (tab_rec != nullptr) ?  tab_rec->offset + offset_adjustment
-              : target->offset + ((target->flags.generation == lir->flags.generation) ? 0 :
-              offset_adjustment);
-          int32_t disp = target_disp - ((lir->offset + 4) & ~3);
-          if (disp < 4096) {
-            lir->operands[1] = disp;
-          } else {
-            // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0]
-            // TUNING: if this case fires often, it can be improved.  Not expected to be common.
-            LIR *new_mov16L =
-                RawLIR(lir->dalvik_offset, kThumb2MovImm16LST, lir->operands[0], 0,
-                       WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target);
-            new_mov16L->flags.size = EncodingMap[new_mov16L->opcode].size;
-            new_mov16L->flags.fixup = kFixupMovImmLST;
-            new_mov16L->offset = lir->offset;
-            // Link the new instruction, retaining lir.
-            InsertLIRBefore(lir, new_mov16L);
-            lir->offset += new_mov16L->flags.size;
-            offset_adjustment += new_mov16L->flags.size;
-            InsertFixupBefore(prev_lir, lir, new_mov16L);
-            prev_lir = new_mov16L;   // Now we've got a new prev.
-            LIR *new_mov16H =
-                RawLIR(lir->dalvik_offset, kThumb2MovImm16HST, lir->operands[0], 0,
-                       WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target);
-            new_mov16H->flags.size = EncodingMap[new_mov16H->opcode].size;
-            new_mov16H->flags.fixup = kFixupMovImmHST;
-            new_mov16H->offset = lir->offset;
-            // Link the new instruction, retaining lir.
-            InsertLIRBefore(lir, new_mov16H);
-            lir->offset += new_mov16H->flags.size;
-            offset_adjustment += new_mov16H->flags.size;
-            InsertFixupBefore(prev_lir, lir, new_mov16H);
-            prev_lir = new_mov16H;  // Now we've got a new prev.
-
-            offset_adjustment -= lir->flags.size;
-            if (RegStorage::RegNum(lir->operands[0]) < 8) {
-              lir->opcode = kThumbAddRRLH;
-            } else {
-              lir->opcode = kThumbAddRRHH;
-            }
-            lir->operands[1] = rs_rARM_PC.GetReg();
-            lir->flags.size = EncodingMap[lir->opcode].size;
-            offset_adjustment += lir->flags.size;
-            // Must stay in fixup list and have offset updated; will be used by LST/HSP pair.
-            lir->flags.fixup = kFixupNone;
-            res = kRetryAll;
-          }
-          break;
-        }
-        case kFixupMovImmLST: {
-          // operands[1] should hold disp, [2] has add, [3] has tab_rec
-          const LIR* addPCInst = UnwrapPointer<LIR>(lir->operands[2]);
-          const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
-          // If tab_rec is null, this is a literal load. Use target
-          LIR* target = lir->target;
-          int32_t target_disp = tab_rec ? tab_rec->offset : target->offset;
-          lir->operands[1] = (target_disp - (addPCInst->offset + 4)) & 0xffff;
-          break;
-        }
-        case kFixupMovImmHST: {
-          // operands[1] should hold disp, [2] has add, [3] has tab_rec
-          const LIR* addPCInst = UnwrapPointer<LIR>(lir->operands[2]);
-          const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
-          // If tab_rec is null, this is a literal load. Use target
-          LIR* target = lir->target;
-          int32_t target_disp = tab_rec ? tab_rec->offset : target->offset;
-          lir->operands[1] =
-              ((target_disp - (addPCInst->offset + 4)) >> 16) & 0xffff;
-          break;
-        }
-        case kFixupAlign4: {
-          int32_t required_size = lir->offset & 0x2;
-          if (lir->flags.size != required_size) {
-            offset_adjustment += required_size - lir->flags.size;
-            lir->flags.size = required_size;
-            res = kRetryAll;
-          }
-          break;
-        }
-        default:
-          LOG(FATAL) << "Unexpected case " << lir->flags.fixup;
-      }
-      prev_lir = lir;
-      lir = lir->u.a.pcrel_next;
-    }
-
-    if (res == kSuccess) {
-      break;
-    } else {
-      assembler_retries++;
-      if (assembler_retries > MAX_ASSEMBLER_RETRIES) {
-        CodegenDump();
-        LOG(FATAL) << "Assembler error - too many retries";
-      }
-      starting_offset += offset_adjustment;
-      data_offset_ = RoundUp(starting_offset, 4);
-      AssignDataOffsets();
-    }
-  }
-
-  // Build the CodeBuffer.
-  DCHECK_LE(data_offset_, total_size_);
-  code_buffer_.reserve(total_size_);
-  code_buffer_.resize(starting_offset);
-  uint8_t* write_pos = &code_buffer_[0];
-  write_pos = EncodeLIRs(write_pos, first_lir_insn_);
-  DCHECK_EQ(static_cast<CodeOffset>(write_pos - &code_buffer_[0]), starting_offset);
-
-  DCHECK_EQ(data_offset_, RoundUp(code_buffer_.size(), 4));
-
-  // Install literals
-  InstallLiteralPools();
-
-  // Install switch tables
-  InstallSwitchTables();
-
-  // Install fill array data
-  InstallFillArrayData();
-
-  // Create the mapping table and native offset to reference map.
-  cu_->NewTimingSplit("PcMappingTable");
-  CreateMappingTables();
-
-  cu_->NewTimingSplit("GcMap");
-  CreateNativeGcMap();
-}
-
-size_t ArmMir2Lir::GetInsnSize(LIR* lir) {
-  DCHECK(!IsPseudoLirOp(lir->opcode));
-  return EncodingMap[lir->opcode].size;
-}
-
-// Encode instruction bit pattern and assign offsets.
-uint32_t ArmMir2Lir::LinkFixupInsns(LIR* head_lir, LIR* tail_lir, uint32_t offset) {
-  LIR* end_lir = tail_lir->next;
-
-  LIR* last_fixup = nullptr;
-  for (LIR* lir = head_lir; lir != end_lir; lir = NEXT_LIR(lir)) {
-    if (!lir->flags.is_nop) {
-      if (lir->flags.fixup != kFixupNone) {
-        if (!IsPseudoLirOp(lir->opcode)) {
-          lir->flags.size = EncodingMap[lir->opcode].size;
-          lir->flags.fixup = EncodingMap[lir->opcode].fixup;
-        } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) {
-          lir->flags.size = (offset & 0x2);
-          lir->flags.fixup = kFixupAlign4;
-        } else {
-          lir->flags.size = 0;
-          lir->flags.fixup = kFixupLabel;
-        }
-        // Link into the fixup chain.
-        lir->flags.use_def_invalid = true;
-        lir->u.a.pcrel_next = nullptr;
-        if (first_fixup_ == nullptr) {
-          first_fixup_ = lir;
-        } else {
-          last_fixup->u.a.pcrel_next = lir;
-        }
-        last_fixup = lir;
-        lir->offset = offset;
-      }
-      offset += lir->flags.size;
-    }
-  }
-  return offset;
-}
-
-void ArmMir2Lir::AssignDataOffsets() {
-  /* Set up offsets for literals */
-  CodeOffset offset = data_offset_;
-
-  offset = AssignLiteralOffset(offset);
-
-  offset = AssignSwitchTablesOffset(offset);
-
-  total_size_ = AssignFillArrayDataOffset(offset);
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/arm/backend_arm.h b/compiler/dex/quick/arm/backend_arm.h
deleted file mode 100644
index 42a9bca..0000000
--- a/compiler/dex/quick/arm/backend_arm.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_ARM_BACKEND_ARM_H_
-#define ART_COMPILER_DEX_QUICK_ARM_BACKEND_ARM_H_
-
-namespace art {
-
-struct CompilationUnit;
-class Mir2Lir;
-class MIRGraph;
-class ArenaAllocator;
-
-Mir2Lir* ArmCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
-                          ArenaAllocator* const arena);
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_ARM_BACKEND_ARM_H_
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
deleted file mode 100644
index 868d9a4..0000000
--- a/compiler/dex/quick/arm/call_arm.cc
+++ /dev/null
@@ -1,763 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This file contains codegen for the Thumb2 ISA. */
-
-#include "codegen_arm.h"
-
-#include "arm_lir.h"
-#include "art_method.h"
-#include "base/bit_utils.h"
-#include "base/logging.h"
-#include "dex/mir_graph.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "driver/compiler_driver.h"
-#include "driver/compiler_options.h"
-#include "gc/accounting/card_table.h"
-#include "mirror/object_array-inl.h"
-#include "entrypoints/quick/quick_entrypoints.h"
-#include "utils/dex_cache_arrays_layout-inl.h"
-
-namespace art {
-
-/*
- * The sparse table in the literal pool is an array of <key,displacement>
- * pairs.  For each set, we'll load them as a pair using ldmia.
- * This means that the register number of the temp we use for the key
- * must be lower than the reg for the displacement.
- *
- * The test loop will look something like:
- *
- *   adr   r_base, <table>
- *   ldr   r_val, [rARM_SP, v_reg_off]
- *   mov   r_idx, #table_size
- * lp:
- *   ldmia r_base!, {r_key, r_disp}
- *   sub   r_idx, #1
- *   cmp   r_val, r_key
- *   ifeq
- *   add   rARM_PC, r_disp   ; This is the branch from which we compute displacement
- *   cbnz  r_idx, lp
- */
-void ArmMir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
-  const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  // Add the table to the list - we'll process it later
-  SwitchTable *tab_rec =
-      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
-  tab_rec->switch_mir = mir;
-  tab_rec->table = table;
-  tab_rec->vaddr = current_dalvik_offset_;
-  uint32_t size = table[1];
-  switch_tables_.push_back(tab_rec);
-
-  // Get the switch value
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegStorage r_base = AllocTemp();
-  /* Allocate key and disp temps */
-  RegStorage r_key = AllocTemp();
-  RegStorage r_disp = AllocTemp();
-  // Make sure r_key's register number is less than r_disp's number for ldmia
-  if (r_key.GetReg() > r_disp.GetReg()) {
-    RegStorage tmp = r_disp;
-    r_disp = r_key;
-    r_key = tmp;
-  }
-  // Materialize a pointer to the switch table
-  NewLIR3(kThumb2Adr, r_base.GetReg(), 0, WrapPointer(tab_rec));
-  // Set up r_idx
-  RegStorage r_idx = AllocTemp();
-  LoadConstant(r_idx, size);
-  // Establish loop branch target
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  // Load next key/disp
-  NewLIR2(kThumb2LdmiaWB, r_base.GetReg(), (1 << r_key.GetRegNum()) | (1 << r_disp.GetRegNum()));
-  OpRegReg(kOpCmp, r_key, rl_src.reg);
-  // Go if match. NOTE: No instruction set switch here - must stay Thumb2
-  LIR* it = OpIT(kCondEq, "");
-  LIR* switch_branch = NewLIR1(kThumb2AddPCR, r_disp.GetReg());
-  OpEndIT(it);
-  tab_rec->anchor = switch_branch;
-  // Needs to use setflags encoding here
-  OpRegRegImm(kOpSub, r_idx, r_idx, 1);  // For value == 1, this should set flags.
-  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
-  OpCondBranch(kCondNe, target);
-}
-
-
-void ArmMir2Lir::GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
-  const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  // Add the table to the list - we'll process it later
-  SwitchTable *tab_rec =
-      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable),  kArenaAllocData));
-  tab_rec->switch_mir = mir;
-  tab_rec->table = table;
-  tab_rec->vaddr = current_dalvik_offset_;
-  uint32_t size = table[1];
-  switch_tables_.push_back(tab_rec);
-
-  // Get the switch value
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegStorage table_base = AllocTemp();
-  // Materialize a pointer to the switch table
-  NewLIR3(kThumb2Adr, table_base.GetReg(), 0, WrapPointer(tab_rec));
-  int low_key = s4FromSwitchData(&table[2]);
-  RegStorage keyReg;
-  // Remove the bias, if necessary
-  if (low_key == 0) {
-    keyReg = rl_src.reg;
-  } else {
-    keyReg = AllocTemp();
-    OpRegRegImm(kOpSub, keyReg, rl_src.reg, low_key);
-  }
-  // Bounds check - if < 0 or >= size continue following switch
-  OpRegImm(kOpCmp, keyReg, size-1);
-  LIR* branch_over = OpCondBranch(kCondHi, nullptr);
-
-  // Load the displacement from the switch table
-  RegStorage disp_reg = AllocTemp();
-  LoadBaseIndexed(table_base, keyReg, disp_reg, 2, k32);
-
-  // ..and go! NOTE: No instruction set switch here - must stay Thumb2
-  LIR* switch_branch = NewLIR1(kThumb2AddPCR, disp_reg.GetReg());
-  tab_rec->anchor = switch_branch;
-
-  /* branch_over target here */
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  branch_over->target = target;
-}
-
-/*
- * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
- * details see monitor.cc.
- */
-void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
-  FlushAllRegs();
-  // FIXME: need separate LoadValues for object references.
-  LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
-  LockCallTemps();  // Prepare for explicit register usage
-  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
-  if (kArchVariantHasGoodBranchPredictor) {
-    LIR* null_check_branch = nullptr;
-    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
-      null_check_branch = nullptr;  // No null check.
-    } else {
-      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-        null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, nullptr);
-      }
-    }
-    Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
-    NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(),
-        mirror::Object::MonitorOffset().Int32Value() >> 2);
-    MarkPossibleNullPointerException(opt_flags);
-    // Zero out the read barrier bits.
-    OpRegRegImm(kOpAnd, rs_r3, rs_r1, LockWord::kReadBarrierStateMaskShiftedToggled);
-    LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r3, 0, nullptr);
-    // r1 is zero except for the rb bits here. Copy the read barrier bits into r2.
-    OpRegRegReg(kOpOr, rs_r2, rs_r2, rs_r1);
-    NewLIR4(kThumb2Strex, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(),
-        mirror::Object::MonitorOffset().Int32Value() >> 2);
-    LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, nullptr);
-
-
-    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
-    not_unlocked_branch->target = slow_path_target;
-    if (null_check_branch != nullptr) {
-      null_check_branch->target = slow_path_target;
-    }
-    // TODO: move to a slow path.
-    // Go expensive route - artLockObjectFromCode(obj);
-    LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), rs_rARM_LR);
-    ClobberCallerSave();
-    LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
-    MarkSafepointPC(call_inst);
-
-    LIR* success_target = NewLIR0(kPseudoTargetLabel);
-    lock_success_branch->target = success_target;
-    GenMemBarrier(kLoadAny);
-  } else {
-    // Explicit null-check as slow-path is entered using an IT.
-    GenNullCheck(rs_r0, opt_flags);
-    Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
-    NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(),
-        mirror::Object::MonitorOffset().Int32Value() >> 2);
-    MarkPossibleNullPointerException(opt_flags);
-    // Zero out the read barrier bits.
-    OpRegRegImm(kOpAnd, rs_r3, rs_r1, LockWord::kReadBarrierStateMaskShiftedToggled);
-    // r1 will be zero except for the rb bits if the following
-    // cmp-and-branch branches to eq where r2 will be used. Copy the
-    // read barrier bits into r2.
-    OpRegRegReg(kOpOr, rs_r2, rs_r2, rs_r1);
-    OpRegImm(kOpCmp, rs_r3, 0);
-
-    LIR* it = OpIT(kCondEq, "");
-    NewLIR4(kThumb2Strex/*eq*/, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(),
-        mirror::Object::MonitorOffset().Int32Value() >> 2);
-    OpEndIT(it);
-    OpRegImm(kOpCmp, rs_r1, 0);
-    it = OpIT(kCondNe, "T");
-    // Go expensive route - artLockObjectFromCode(self, obj);
-    LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(),
-                       rs_rARM_LR);
-    ClobberCallerSave();
-    LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
-    OpEndIT(it);
-    MarkSafepointPC(call_inst);
-    GenMemBarrier(kLoadAny);
-  }
-}
-
-/*
- * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
- * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock
- * and can only give away ownership if its suspended.
- */
-void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
-  FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
-  LockCallTemps();  // Prepare for explicit register usage
-  LIR* null_check_branch = nullptr;
-  Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
-  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
-  if (kArchVariantHasGoodBranchPredictor) {
-    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
-      null_check_branch = nullptr;  // No null check.
-    } else {
-      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-        null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, nullptr);
-      }
-    }
-    if (!kUseReadBarrier) {
-      Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);  // Get lock
-    } else {
-      NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(),
-              mirror::Object::MonitorOffset().Int32Value() >> 2);
-    }
-    MarkPossibleNullPointerException(opt_flags);
-    // Zero out the read barrier bits.
-    OpRegRegImm(kOpAnd, rs_r3, rs_r1, LockWord::kReadBarrierStateMaskShiftedToggled);
-    // Zero out except the read barrier bits.
-    OpRegRegImm(kOpAnd, rs_r1, rs_r1, LockWord::kReadBarrierStateMaskShifted);
-    LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r3, rs_r2, nullptr);
-    GenMemBarrier(kAnyStore);
-    LIR* unlock_success_branch;
-    if (!kUseReadBarrier) {
-      Store32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
-      unlock_success_branch = OpUnconditionalBranch(nullptr);
-    } else {
-      NewLIR4(kThumb2Strex, rs_r2.GetReg(), rs_r1.GetReg(), rs_r0.GetReg(),
-              mirror::Object::MonitorOffset().Int32Value() >> 2);
-      unlock_success_branch = OpCmpImmBranch(kCondEq, rs_r2, 0, nullptr);
-    }
-    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
-    slow_unlock_branch->target = slow_path_target;
-    if (null_check_branch != nullptr) {
-      null_check_branch->target = slow_path_target;
-    }
-    // TODO: move to a slow path.
-    // Go expensive route - artUnlockObjectFromCode(obj);
-    LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), rs_rARM_LR);
-    ClobberCallerSave();
-    LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
-    MarkSafepointPC(call_inst);
-
-    LIR* success_target = NewLIR0(kPseudoTargetLabel);
-    unlock_success_branch->target = success_target;
-  } else {
-    // Explicit null-check as slow-path is entered using an IT.
-    GenNullCheck(rs_r0, opt_flags);
-    if (!kUseReadBarrier) {
-      Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);  // Get lock
-    } else {
-      // If we use read barriers, we need to use atomic instructions.
-      NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(),
-              mirror::Object::MonitorOffset().Int32Value() >> 2);
-    }
-    MarkPossibleNullPointerException(opt_flags);
-    Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
-    // Zero out the read barrier bits.
-    OpRegRegImm(kOpAnd, rs_r3, rs_r1, LockWord::kReadBarrierStateMaskShiftedToggled);
-    // Zero out except the read barrier bits.
-    OpRegRegImm(kOpAnd, rs_r1, rs_r1, LockWord::kReadBarrierStateMaskShifted);
-    // Is lock unheld on lock or held by us (==thread_id) on unlock?
-    OpRegReg(kOpCmp, rs_r3, rs_r2);
-    if (!kUseReadBarrier) {
-      LIR* it = OpIT(kCondEq, "EE");
-      if (GenMemBarrier(kAnyStore)) {
-        UpdateIT(it, "TEE");
-      }
-      Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
-      // Go expensive route - UnlockObjectFromCode(obj);
-      LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(),
-                         rs_rARM_LR);
-      ClobberCallerSave();
-      LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
-      OpEndIT(it);
-      MarkSafepointPC(call_inst);
-    } else {
-      // If we use read barriers, we need to use atomic instructions.
-      LIR* it = OpIT(kCondEq, "");
-      if (GenMemBarrier(kAnyStore)) {
-        UpdateIT(it, "T");
-      }
-      NewLIR4/*eq*/(kThumb2Strex, rs_r2.GetReg(), rs_r1.GetReg(), rs_r0.GetReg(),
-                    mirror::Object::MonitorOffset().Int32Value() >> 2);
-      OpEndIT(it);
-      // Since we know r2 wasn't zero before the above it instruction,
-      // if r2 is zero here, we know r3 was equal to r2 and the strex
-      // suceeded (we're done). Otherwise (either r3 wasn't equal to r2
-      // or the strex failed), call the entrypoint.
-      OpRegImm(kOpCmp, rs_r2, 0);
-      LIR* it2 = OpIT(kCondNe, "T");
-      // Go expensive route - UnlockObjectFromCode(obj);
-      LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(),
-                         rs_rARM_LR);
-      ClobberCallerSave();
-      LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
-      OpEndIT(it2);
-      MarkSafepointPC(call_inst);
-    }
-  }
-}
-
-void ArmMir2Lir::GenMoveException(RegLocation rl_dest) {
-  int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
-  RegStorage reset_reg = AllocTempRef();
-  LoadRefDisp(rs_rARM_SELF, ex_offset, rl_result.reg, kNotVolatile);
-  LoadConstant(reset_reg, 0);
-  StoreRefDisp(rs_rARM_SELF, ex_offset, reset_reg, kNotVolatile);
-  FreeTemp(reset_reg);
-  StoreValue(rl_dest, rl_result);
-}
-
-void ArmMir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
-  RegStorage reg_card_base = AllocTemp();
-  RegStorage reg_card_no = AllocTemp();
-  LoadWordDisp(rs_rARM_SELF, Thread::CardTableOffset<4>().Int32Value(), reg_card_base);
-  OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
-  StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
-  FreeTemp(reg_card_base);
-  FreeTemp(reg_card_no);
-}
-
-static dwarf::Reg DwarfCoreReg(int num) {
-  return dwarf::Reg::ArmCore(num);
-}
-
-static dwarf::Reg DwarfFpReg(int num) {
-  return dwarf::Reg::ArmFp(num);
-}
-
-void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
-  DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);  // empty stack.
-  int spill_count = num_core_spills_ + num_fp_spills_;
-  /*
-   * On entry, r0, r1, r2 & r3 are live.  Let the register allocation
-   * mechanism know so it doesn't try to use any of them when
-   * expanding the frame or flushing.  This leaves the utility
-   * code with a single temp: r12.  This should be enough.
-   */
-  LockTemp(rs_r0);
-  LockTemp(rs_r1);
-  LockTemp(rs_r2);
-  LockTemp(rs_r3);
-
-  /*
-   * We can safely skip the stack overflow check if we're
-   * a leaf *and* our frame size < fudge factor.
-   */
-  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, kArm);
-  const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm);
-  bool large_frame = (static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes);
-  bool generate_explicit_stack_overflow_check = large_frame ||
-    !cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks();
-  if (!skip_overflow_check) {
-    if (generate_explicit_stack_overflow_check) {
-      if (!large_frame) {
-        /* Load stack limit */
-        LockTemp(rs_r12);
-        Load32Disp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
-      }
-    } else {
-      // Implicit stack overflow check.
-      // Generate a load from [sp, #-overflowsize].  If this is in the stack
-      // redzone we will get a segmentation fault.
-      //
-      // Caveat coder: if someone changes the kStackOverflowReservedBytes value
-      // we need to make sure that it's loadable in an immediate field of
-      // a sub instruction.  Otherwise we will get a temp allocation and the
-      // code size will increase.
-      //
-      // This is done before the callee save instructions to avoid any possibility
-      // of these overflowing.  This uses r12 and that's never saved in a callee
-      // save.
-      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, GetStackOverflowReservedBytes(kArm));
-      Load32Disp(rs_r12, 0, rs_r12);
-      MarkPossibleStackOverflowException();
-    }
-  }
-  /* Spill core callee saves */
-  if (core_spill_mask_ != 0u) {
-    if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) {
-      // Spilling only low regs and/or LR, use 16-bit PUSH.
-      constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8;
-      NewLIR1(kThumbPush,
-              (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |
-              ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift));
-    } else if (IsPowerOfTwo(core_spill_mask_)) {
-      // kThumb2Push cannot be used to spill a single register.
-      NewLIR1(kThumb2Push1, CTZ(core_spill_mask_));
-    } else {
-      NewLIR1(kThumb2Push, core_spill_mask_);
-    }
-    cfi_.AdjustCFAOffset(num_core_spills_ * kArmPointerSize);
-    cfi_.RelOffsetForMany(DwarfCoreReg(0), 0, core_spill_mask_, kArmPointerSize);
-  }
-  /* Need to spill any FP regs? */
-  if (num_fp_spills_ != 0u) {
-    /*
-     * NOTE: fp spills are a little different from core spills in that
-     * they are pushed as a contiguous block.  When promoting from
-     * the fp set, we must allocate all singles from s16..highest-promoted
-     */
-    NewLIR1(kThumb2VPushCS, num_fp_spills_);
-    cfi_.AdjustCFAOffset(num_fp_spills_ * kArmPointerSize);
-    cfi_.RelOffsetForMany(DwarfFpReg(0), 0, fp_spill_mask_, kArmPointerSize);
-  }
-
-  const int spill_size = spill_count * 4;
-  const int frame_size_without_spills = frame_size_ - spill_size;
-  if (!skip_overflow_check) {
-    if (generate_explicit_stack_overflow_check) {
-      class StackOverflowSlowPath : public LIRSlowPath {
-       public:
-        StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace)
-            : LIRSlowPath(m2l, branch), restore_lr_(restore_lr),
-              sp_displace_(sp_displace) {
-        }
-        void Compile() OVERRIDE {
-          m2l_->ResetRegPool();
-          m2l_->ResetDefTracking();
-          GenerateTargetLabel(kPseudoThrowTarget);
-          if (restore_lr_) {
-            m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR);
-          }
-          m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_);
-          m2l_->cfi().AdjustCFAOffset(-sp_displace_);
-          m2l_->ClobberCallerSave();
-          ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow);
-          // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes
-          // codegen and target are in thumb2 mode.
-          // NOTE: native pointer.
-          m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC);
-          m2l_->cfi().AdjustCFAOffset(sp_displace_);
-        }
-
-       private:
-        const bool restore_lr_;
-        const size_t sp_displace_;
-      };
-      if (large_frame) {
-        // Note: may need a temp reg, and we only have r12 free at this point.
-        OpRegRegImm(kOpSub, rs_rARM_LR, rs_rARM_SP, frame_size_without_spills);
-        Load32Disp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
-        LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_LR, rs_r12, nullptr);
-        // Need to restore LR since we used it as a temp.
-        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, spill_size));
-        OpRegCopy(rs_rARM_SP, rs_rARM_LR);     // Establish stack
-        cfi_.AdjustCFAOffset(frame_size_without_spills);
-      } else {
-        /*
-         * If the frame is small enough we are guaranteed to have enough space that remains to
-         * handle signals on the user stack.  However, we may not have any free temp
-         * registers at this point, so we'll temporarily add LR to the temp pool.
-         */
-        DCHECK(!GetRegInfo(rs_rARM_LR)->IsTemp());
-        MarkTemp(rs_rARM_LR);
-        FreeTemp(rs_rARM_LR);
-        OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills);
-        cfi_.AdjustCFAOffset(frame_size_without_spills);
-        Clobber(rs_rARM_LR);
-        UnmarkTemp(rs_rARM_LR);
-        LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr);
-        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, false, frame_size_));
-      }
-    } else {
-      // Implicit stack overflow check has already been done.  Just make room on the
-      // stack for the frame now.
-      OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
-      cfi_.AdjustCFAOffset(frame_size_without_spills);
-    }
-  } else {
-    OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
-    cfi_.AdjustCFAOffset(frame_size_without_spills);
-  }
-
-  FlushIns(ArgLocs, rl_method);
-
-  // We can promote a PC-relative reference to dex cache arrays to a register
-  // if it's used at least twice. Without investigating where we should lazily
-  // load the reference, we conveniently load it after flushing inputs.
-  if (dex_cache_arrays_base_reg_.Valid()) {
-    OpPcRelDexCacheArrayAddr(cu_->dex_file, dex_cache_arrays_min_offset_,
-                             dex_cache_arrays_base_reg_);
-  }
-
-  FreeTemp(rs_r0);
-  FreeTemp(rs_r1);
-  FreeTemp(rs_r2);
-  FreeTemp(rs_r3);
-  FreeTemp(rs_r12);
-}
-
-void ArmMir2Lir::GenExitSequence() {
-  cfi_.RememberState();
-  int spill_count = num_core_spills_ + num_fp_spills_;
-
-  /*
-   * In the exit path, r0/r1 are live - make sure they aren't
-   * allocated by the register utilities as temps.
-   */
-  LockTemp(rs_r0);
-  LockTemp(rs_r1);
-
-  int adjust = frame_size_ - (spill_count * kArmPointerSize);
-  OpRegImm(kOpAdd, rs_rARM_SP, adjust);
-  cfi_.AdjustCFAOffset(-adjust);
-  /* Need to restore any FP callee saves? */
-  if (num_fp_spills_) {
-    NewLIR1(kThumb2VPopCS, num_fp_spills_);
-    cfi_.AdjustCFAOffset(-num_fp_spills_ * kArmPointerSize);
-    cfi_.RestoreMany(DwarfFpReg(0), fp_spill_mask_);
-  }
-  bool unspill_LR_to_PC = (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0;
-  uint32_t core_unspill_mask = core_spill_mask_;
-  if (unspill_LR_to_PC) {
-    core_unspill_mask &= ~(1 << rs_rARM_LR.GetRegNum());
-    core_unspill_mask |= (1 << rs_rARM_PC.GetRegNum());
-  }
-  if (core_unspill_mask != 0u) {
-    if ((core_unspill_mask & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) {
-      // Unspilling only low regs and/or PC, use 16-bit POP.
-      constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8;
-      NewLIR1(kThumbPop,
-              (core_unspill_mask & ~(1u << rs_rARM_PC.GetRegNum())) |
-              ((core_unspill_mask & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift));
-    } else if (IsPowerOfTwo(core_unspill_mask)) {
-      // kThumb2Pop cannot be used to unspill a single register.
-      NewLIR1(kThumb2Pop1, CTZ(core_unspill_mask));
-    } else {
-      NewLIR1(kThumb2Pop, core_unspill_mask);
-    }
-    // If we pop to PC, there is no further epilogue code.
-    if (!unspill_LR_to_PC) {
-      cfi_.AdjustCFAOffset(-num_core_spills_ * kArmPointerSize);
-      cfi_.RestoreMany(DwarfCoreReg(0), core_unspill_mask);
-      DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);  // empty stack.
-    }
-  }
-  if (!unspill_LR_to_PC) {
-    /* We didn't pop to rARM_PC, so must do a bv rARM_LR */
-    NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
-  }
-  // The CFI should be restored for any code that follows the exit block.
-  cfi_.RestoreState();
-  cfi_.DefCFAOffset(frame_size_);
-}
-
-void ArmMir2Lir::GenSpecialExitSequence() {
-  NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
-}
-
-void ArmMir2Lir::GenSpecialEntryForSuspend() {
-  // Keep 16-byte stack alignment - push r0, i.e. ArtMethod*, r5, r6, lr.
-  DCHECK(!IsTemp(rs_r5));
-  DCHECK(!IsTemp(rs_r6));
-  core_spill_mask_ =
-      (1u << rs_r5.GetRegNum()) | (1u << rs_r6.GetRegNum()) | (1u << rs_rARM_LR.GetRegNum());
-  num_core_spills_ = 3u;
-  fp_spill_mask_ = 0u;
-  num_fp_spills_ = 0u;
-  frame_size_ = 16u;
-  core_vmap_table_.clear();
-  fp_vmap_table_.clear();
-  NewLIR1(kThumbPush, (1u << rs_r0.GetRegNum()) |                 // ArtMethod*
-          (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |  // Spills other than LR.
-          (1u << 8));                                             // LR encoded for 16-bit push.
-  cfi_.AdjustCFAOffset(frame_size_);
-  // Do not generate CFI for scratch register r0.
-  cfi_.RelOffsetForMany(DwarfCoreReg(0), 4, core_spill_mask_, kArmPointerSize);
-}
-
-void ArmMir2Lir::GenSpecialExitForSuspend() {
-  // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
-  NewLIR1(kThumb2Pop, (1u << rs_r0.GetRegNum()) | core_spill_mask_);  // 32-bit because of LR.
-  cfi_.AdjustCFAOffset(-frame_size_);
-  cfi_.RestoreMany(DwarfCoreReg(0), core_spill_mask_);
-}
-
-static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
-  // Emit relative calls only within a dex file due to the limited range of the BL insn.
-  return cu->dex_file == target_method.dex_file;
-}
-
-/*
- * Bit of a hack here - in the absence of a real scheduling pass,
- * emit the next instruction in static & direct invoke sequences.
- */
-int ArmMir2Lir::ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info,
-                                  int state, const MethodReference& target_method,
-                                  uint32_t unused_idx ATTRIBUTE_UNUSED,
-                                  uintptr_t direct_code, uintptr_t direct_method,
-                                  InvokeType type) {
-  ArmMir2Lir* cg = static_cast<ArmMir2Lir*>(cu->cg.get());
-  if (info->string_init_offset != 0) {
-    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
-    switch (state) {
-    case 0: {  // Grab target method* from thread pointer
-      cg->LoadRefDisp(rs_rARM_SELF, info->string_init_offset, arg0_ref, kNotVolatile);
-      break;
-    }
-    case 1:  // Grab the code from the method*
-      if (direct_code == 0) {
-        // kInvokeTgt := arg0_ref->entrypoint
-        cg->LoadWordDisp(arg0_ref,
-                         ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                             kArmPointerSize).Int32Value(), cg->TargetPtrReg(kInvokeTgt));
-      }
-      break;
-    default:
-      return -1;
-    }
-  } else if (direct_code != 0 && direct_method != 0) {
-    switch (state) {
-    case 0:  // Get the current Method* [sets kArg0]
-      if (direct_code != static_cast<uintptr_t>(-1)) {
-        cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
-      } else if (ArmUseRelativeCall(cu, target_method)) {
-        // Defer to linker patch.
-      } else {
-        cg->LoadCodeAddress(target_method, type, kInvokeTgt);
-      }
-      if (direct_method != static_cast<uintptr_t>(-1)) {
-        cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method);
-      } else {
-        cg->LoadMethodAddress(target_method, type, kArg0);
-      }
-      break;
-    default:
-      return -1;
-    }
-  } else {
-    bool use_pc_rel = cg->CanUseOpPcRelDexCacheArrayLoad();
-    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
-    switch (state) {
-    case 0:  // Get the current Method* [sets kArg0]
-      // TUNING: we can save a reg copy if Method* has been promoted.
-      if (!use_pc_rel) {
-        cg->LoadCurrMethodDirect(arg0_ref);
-        break;
-      }
-      ++state;
-      FALLTHROUGH_INTENDED;
-    case 1:  // Get method->dex_cache_resolved_methods_
-      if (!use_pc_rel) {
-        cg->LoadBaseDisp(arg0_ref,
-                         ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value(),
-                         arg0_ref,
-                         k32,
-                         kNotVolatile);
-      }
-      // Set up direct code if known.
-      if (direct_code != 0) {
-        if (direct_code != static_cast<uintptr_t>(-1)) {
-          cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
-        } else if (ArmUseRelativeCall(cu, target_method)) {
-          // Defer to linker patch.
-        } else {
-          CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
-          cg->LoadCodeAddress(target_method, type, kInvokeTgt);
-        }
-      }
-      if (!use_pc_rel || direct_code != 0) {
-        break;
-      }
-      ++state;
-      FALLTHROUGH_INTENDED;
-    case 2:  // Grab target method*
-      CHECK_EQ(cu->dex_file, target_method.dex_file);
-      if (!use_pc_rel) {
-        cg->LoadRefDisp(arg0_ref,
-                        cg->GetCachePointerOffset(target_method.dex_method_index,
-                                                  kArmPointerSize),
-                        arg0_ref,
-                        kNotVolatile);
-      } else {
-        size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index);
-        cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, arg0_ref, false);
-      }
-      break;
-    case 3:  // Grab the code from the method*
-      if (direct_code == 0) {
-        // kInvokeTgt := arg0_ref->entrypoint
-        cg->LoadWordDisp(arg0_ref,
-                         ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                             kArmPointerSize).Int32Value(), cg->TargetPtrReg(kInvokeTgt));
-      }
-      break;
-    default:
-      return -1;
-    }
-  }
-  return state + 1;
-}
-
-NextCallInsn ArmMir2Lir::GetNextSDCallInsn() {
-  return ArmNextSDCallInsn;
-}
-
-LIR* ArmMir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
-  // For ARM, just generate a relative BL instruction that will be filled in at 'link time'.
-  // If the target turns out to be too far, the linker will generate a thunk for dispatch.
-  int target_method_idx = target_method.dex_method_index;
-  const DexFile* target_dex_file = target_method.dex_file;
-
-  // Generate the call instruction and save index, dex_file, and type.
-  // NOTE: Method deduplication takes linker patches into account, so we can just pass 0
-  // as a placeholder for the offset.
-  LIR* call = RawLIR(current_dalvik_offset_, kThumb2Bl, 0,
-                     target_method_idx, WrapPointer(target_dex_file), type);
-  AppendLIR(call);
-  call_method_insns_.push_back(call);
-  return call;
-}
-
-LIR* ArmMir2Lir::GenCallInsn(const MirMethodLoweringInfo& method_info) {
-  LIR* call_insn;
-  if (method_info.FastPath() && ArmUseRelativeCall(cu_, method_info.GetTargetMethod()) &&
-      (method_info.GetSharpType() == kDirect || method_info.GetSharpType() == kStatic) &&
-      method_info.DirectCode() == static_cast<uintptr_t>(-1)) {
-    call_insn = CallWithLinkerFixup(method_info.GetTargetMethod(), method_info.GetSharpType());
-  } else {
-    call_insn = OpReg(kOpBlx, TargetPtrReg(kInvokeTgt));
-  }
-  return call_insn;
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
deleted file mode 100644
index b94e707..0000000
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_
-#define ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_
-
-#include "arm_lir.h"
-#include "base/arena_containers.h"
-#include "base/logging.h"
-#include "dex/quick/mir_to_lir.h"
-
-namespace art {
-
-struct CompilationUnit;
-
-class ArmMir2Lir FINAL : public Mir2Lir {
- protected:
-  // Inherited class for ARM backend.
-  class InToRegStorageArmMapper FINAL : public InToRegStorageMapper {
-   public:
-    InToRegStorageArmMapper()
-        : cur_core_reg_(0), cur_fp_reg_(0), cur_fp_double_reg_(0) {
-    }
-
-    RegStorage GetNextReg(ShortyArg arg) OVERRIDE;
-
-    virtual void Reset() OVERRIDE {
-      cur_core_reg_ = 0;
-      cur_fp_reg_ = 0;
-      cur_fp_double_reg_ = 0;
-    }
-
-   private:
-    size_t cur_core_reg_;
-    size_t cur_fp_reg_;
-    size_t cur_fp_double_reg_;
-  };
-
-  InToRegStorageArmMapper in_to_reg_storage_arm_mapper_;
-  InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE {
-    in_to_reg_storage_arm_mapper_.Reset();
-    return &in_to_reg_storage_arm_mapper_;
-  }
-
-  public:
-    ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
-
-    // Required for target - codegen helpers.
-    bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
-                            RegLocation rl_dest, int lit);
-    bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
-    void GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
-                                    int32_t constant) OVERRIDE;
-    void GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
-                                     int64_t constant) OVERRIDE;
-    LIR* CheckSuspendUsingLoad() OVERRIDE;
-    RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE;
-    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
-                      OpSize size, VolatileKind is_volatile) OVERRIDE;
-    LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
-                         OpSize size) OVERRIDE;
-    LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
-    LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
-    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
-                       OpSize size, VolatileKind is_volatile) OVERRIDE;
-    LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
-                          OpSize size) OVERRIDE;
-
-    /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
-    void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
-
-    bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE;
-    void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest,
-                                  bool wide) OVERRIDE;
-
-    // Required for target - register utilities.
-    RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
-    RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) OVERRIDE {
-      if (wide_kind == kWide) {
-        DCHECK((kArg0 <= reg && reg < kArg3) || (kFArg0 <= reg && reg < kFArg15) || (kRet0 == reg));
-        RegStorage ret_reg = RegStorage::MakeRegPair(TargetReg(reg),
-            TargetReg(static_cast<SpecialTargetRegister>(reg + 1)));
-        if (ret_reg.IsFloat()) {
-          // Regard double as double, be consistent with register allocation.
-          ret_reg = As64BitFloatReg(ret_reg);
-        }
-        return ret_reg;
-      } else {
-        return TargetReg(reg);
-      }
-    }
-
-    RegLocation GetReturnAlt() OVERRIDE;
-    RegLocation GetReturnWideAlt() OVERRIDE;
-    RegLocation LocCReturn() OVERRIDE;
-    RegLocation LocCReturnRef() OVERRIDE;
-    RegLocation LocCReturnDouble() OVERRIDE;
-    RegLocation LocCReturnFloat() OVERRIDE;
-    RegLocation LocCReturnWide() OVERRIDE;
-    ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE;
-    void AdjustSpillMask();
-    void ClobberCallerSave();
-    void FreeCallTemps();
-    void LockCallTemps();
-    void MarkPreservedSingle(int v_reg, RegStorage reg);
-    void MarkPreservedDouble(int v_reg, RegStorage reg);
-    void CompilerInitializeRegAlloc();
-
-    // Required for target - miscellaneous.
-    void AssembleLIR();
-    uint32_t LinkFixupInsns(LIR* head_lir, LIR* tail_lir, CodeOffset offset);
-    int AssignInsnOffsets();
-    void AssignOffsets();
-    static uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir);
-    void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE;
-    void SetupTargetResourceMasks(LIR* lir, uint64_t flags,
-                                  ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE;
-    const char* GetTargetInstFmt(int opcode);
-    const char* GetTargetInstName(int opcode);
-    std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
-    ResourceMask GetPCUseDefEncoding() const OVERRIDE;
-    uint64_t GetTargetInstFlags(int opcode);
-    size_t GetInsnSize(LIR* lir) OVERRIDE;
-    bool IsUnconditionalBranch(LIR* lir);
-
-    // Get the register class for load/store of a field.
-    RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
-
-    // Required for target - Dalvik-level generators.
-    void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                        RegLocation rl_src2, int flags) OVERRIDE;
-    void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                           RegLocation rl_src1, RegLocation rl_src2, int flags);
-    void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
-                     RegLocation rl_index, RegLocation rl_dest, int scale);
-    void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
-                     RegLocation rl_src, int scale, bool card_mark);
-    void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                           RegLocation rl_src1, RegLocation rl_shift, int flags);
-    void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                          RegLocation rl_src2);
-    void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                         RegLocation rl_src2);
-    void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                  RegLocation rl_src2);
-    void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
-    bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
-    bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
-    bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
-    bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
-    bool GenInlinedSqrt(CallInfo* info);
-    bool GenInlinedPeek(CallInfo* info, OpSize size);
-    bool GenInlinedPoke(CallInfo* info, OpSize size);
-    bool GenInlinedArrayCopyCharArray(CallInfo* info) OVERRIDE;
-    RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
-    RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
-    void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenDivZeroCheckWide(RegStorage reg);
-    void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
-    void GenExitSequence();
-    void GenSpecialExitSequence() OVERRIDE;
-    void GenSpecialEntryForSuspend() OVERRIDE;
-    void GenSpecialExitForSuspend() OVERRIDE;
-    void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
-    void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
-    void GenSelect(BasicBlock* bb, MIR* mir);
-    void GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
-                          int32_t true_val, int32_t false_val, RegStorage rs_dest,
-                          RegisterClass dest_reg_class) OVERRIDE;
-    bool GenMemBarrier(MemBarrierKind barrier_kind);
-    void GenMonitorEnter(int opt_flags, RegLocation rl_src);
-    void GenMonitorExit(int opt_flags, RegLocation rl_src);
-    void GenMoveException(RegLocation rl_dest);
-    void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
-                                       int first_bit, int second_bit);
-    void GenNegDouble(RegLocation rl_dest, RegLocation rl_src);
-    void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
-    void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-    void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-    void GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                        RegLocation rl_src3, bool is_sub);
-
-    // Required for target - single operation generators.
-    LIR* OpUnconditionalBranch(LIR* target);
-    LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target);
-    LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target);
-    LIR* OpCondBranch(ConditionCode cc, LIR* target);
-    LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target);
-    LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src);
-    LIR* OpIT(ConditionCode cond, const char* guide);
-    void UpdateIT(LIR* it, const char* new_guide);
-    void OpEndIT(LIR* it);
-    LIR* OpMem(OpKind op, RegStorage r_base, int disp);
-    void OpPcRelLoad(RegStorage reg, LIR* target);
-    LIR* OpReg(OpKind op, RegStorage r_dest_src);
-    void OpRegCopy(RegStorage r_dest, RegStorage r_src);
-    LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
-    LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value);
-    LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2);
-    LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type);
-    LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type);
-    LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src);
-    LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
-    LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
-    LIR* OpTestSuspend(LIR* target);
-    LIR* OpVldm(RegStorage r_base, int count);
-    LIR* OpVstm(RegStorage r_base, int count);
-    void OpRegCopyWide(RegStorage dest, RegStorage src);
-
-    LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size);
-    LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
-    LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
-                          int shift);
-    LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
-    static const ArmEncodingMap EncodingMap[kArmLast];
-    int EncodeShift(int code, int amount);
-    int ModifiedImmediate(uint32_t value);
-    ArmConditionCode ArmConditionEncoding(ConditionCode code);
-    bool InexpensiveConstantInt(int32_t value) OVERRIDE;
-    bool InexpensiveConstantInt(int32_t value, Instruction::Code opcode) OVERRIDE;
-    bool InexpensiveConstantFloat(int32_t value) OVERRIDE;
-    bool InexpensiveConstantLong(int64_t value) OVERRIDE;
-    bool InexpensiveConstantDouble(int64_t value) OVERRIDE;
-    RegStorage AllocPreservedDouble(int s_reg);
-    RegStorage AllocPreservedSingle(int s_reg);
-
-    bool WideGPRsAreAliases() const OVERRIDE {
-      return false;  // Wide GPRs are formed by pairing.
-    }
-    bool WideFPRsAreAliases() const OVERRIDE {
-      return false;  // Wide FPRs are formed by pairing.
-    }
-
-    NextCallInsn GetNextSDCallInsn() OVERRIDE;
-
-    /*
-     * @brief Generate a relative call to the method that will be patched at link time.
-     * @param target_method The MethodReference of the method to be invoked.
-     * @param type How the method will be invoked.
-     * @returns Call instruction
-     */
-    LIR* CallWithLinkerFixup(const MethodReference& target_method, InvokeType type);
-
-    /*
-     * @brief Generate the actual call insn based on the method info.
-     * @param method_info the lowering info for the method call.
-     * @returns Call instruction
-     */
-    LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE;
-
-    void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) OVERRIDE;
-    void DoPromotion() OVERRIDE;
-
-    /*
-     * @brief Handle ARM specific literals.
-     */
-    void InstallLiteralPools() OVERRIDE;
-
-    LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
-    size_t GetInstructionOffset(LIR* lir);
-
-    void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) OVERRIDE;
-
-    bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
-                          RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
-
-  private:
-    void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                    RegLocation rl_src2);
-    void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
-                                  ConditionCode ccode);
-    LIR* LoadFPConstantValue(int r_dest, int value);
-    LIR* LoadStoreUsingInsnWithOffsetImm8Shl2(ArmOpcode opcode, RegStorage r_base,
-                                              int displacement, RegStorage r_src_dest,
-                                              RegStorage r_work = RegStorage::InvalidReg());
-    void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
-    void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
-    void AssignDataOffsets();
-    RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                          bool is_div, int flags) OVERRIDE;
-    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) OVERRIDE;
-    struct EasyMultiplyOp {
-      OpKind op;
-      uint32_t shift;
-    };
-    bool GetEasyMultiplyOp(int lit, EasyMultiplyOp* op);
-    bool GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops);
-    void GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops);
-
-    static constexpr ResourceMask GetRegMaskArm(RegStorage reg);
-    static constexpr ResourceMask EncodeArmRegList(int reg_list);
-    static constexpr ResourceMask EncodeArmRegFpcsList(int reg_list);
-
-    ArenaVector<LIR*> call_method_insns_;
-
-    // Instructions needing patching with PC relative code addresses.
-    ArenaVector<LIR*> dex_cache_access_insns_;
-
-    // Register with a reference to the dex cache arrays at dex_cache_arrays_min_offset_,
-    // if promoted.
-    RegStorage dex_cache_arrays_base_reg_;
-
-    /**
-     * @brief Given float register pair, returns Solo64 float register.
-     * @param reg #RegStorage containing a float register pair (e.g. @c s2 and @c s3).
-     * @return A Solo64 float mapping to the register pair (e.g. @c d1).
-     */
-    static RegStorage As64BitFloatReg(RegStorage reg) {
-      DCHECK(reg.IsFloat());
-
-      RegStorage low = reg.GetLow();
-      RegStorage high = reg.GetHigh();
-      DCHECK((low.GetRegNum() % 2 == 0) && (low.GetRegNum() + 1 == high.GetRegNum()));
-
-      return RegStorage::FloatSolo64(low.GetRegNum() / 2);
-    }
-
-    /**
-     * @brief Given Solo64 float register, returns float register pair.
-     * @param reg #RegStorage containing a Solo64 float register (e.g. @c d1).
-     * @return A float register pair mapping to the Solo64 float pair (e.g. @c s2 and s3).
-     */
-    static RegStorage As64BitFloatRegPair(RegStorage reg) {
-      DCHECK(reg.IsDouble() && reg.Is64BitSolo());
-
-      int reg_num = reg.GetRegNum();
-      return RegStorage::MakeRegPair(RegStorage::FloatSolo32(reg_num * 2),
-                                     RegStorage::FloatSolo32(reg_num * 2 + 1));
-    }
-
-    int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
-
-    static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
-                                 int state, const MethodReference& target_method,
-                                 uint32_t unused_idx ATTRIBUTE_UNUSED,
-                                 uintptr_t direct_code, uintptr_t direct_method,
-                                 InvokeType type);
-
-    void OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
deleted file mode 100644
index 1a5c108..0000000
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ /dev/null
@@ -1,423 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "codegen_arm.h"
-
-#include "arm_lir.h"
-#include "base/logging.h"
-#include "dex/mir_graph.h"
-#include "dex/quick/mir_to_lir-inl.h"
-
-namespace art {
-
-void ArmMir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
-                                 RegLocation rl_src1, RegLocation rl_src2) {
-  int op = kThumbBkpt;
-  RegLocation rl_result;
-
-  /*
-   * Don't attempt to optimize register usage since these opcodes call out to
-   * the handlers.
-   */
-  switch (opcode) {
-    case Instruction::ADD_FLOAT_2ADDR:
-    case Instruction::ADD_FLOAT:
-      op = kThumb2Vadds;
-      break;
-    case Instruction::SUB_FLOAT_2ADDR:
-    case Instruction::SUB_FLOAT:
-      op = kThumb2Vsubs;
-      break;
-    case Instruction::DIV_FLOAT_2ADDR:
-    case Instruction::DIV_FLOAT:
-      op = kThumb2Vdivs;
-      break;
-    case Instruction::MUL_FLOAT_2ADDR:
-    case Instruction::MUL_FLOAT:
-      op = kThumb2Vmuls;
-      break;
-    case Instruction::REM_FLOAT_2ADDR:
-    case Instruction::REM_FLOAT:
-      FlushAllRegs();   // Send everything to home location
-      CallRuntimeHelperRegLocationRegLocation(kQuickFmodf, rl_src1, rl_src2, false);
-      rl_result = GetReturn(kFPReg);
-      StoreValue(rl_dest, rl_result);
-      return;
-    case Instruction::NEG_FLOAT:
-      GenNegFloat(rl_dest, rl_src1);
-      return;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << opcode;
-  }
-  rl_src1 = LoadValue(rl_src1, kFPReg);
-  rl_src2 = LoadValue(rl_src2, kFPReg);
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  StoreValue(rl_dest, rl_result);
-}
-
-void ArmMir2Lir::GenArithOpDouble(Instruction::Code opcode,
-                                  RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
-  int op = kThumbBkpt;
-  RegLocation rl_result;
-
-  switch (opcode) {
-    case Instruction::ADD_DOUBLE_2ADDR:
-    case Instruction::ADD_DOUBLE:
-      op = kThumb2Vaddd;
-      break;
-    case Instruction::SUB_DOUBLE_2ADDR:
-    case Instruction::SUB_DOUBLE:
-      op = kThumb2Vsubd;
-      break;
-    case Instruction::DIV_DOUBLE_2ADDR:
-    case Instruction::DIV_DOUBLE:
-      op = kThumb2Vdivd;
-      break;
-    case Instruction::MUL_DOUBLE_2ADDR:
-    case Instruction::MUL_DOUBLE:
-      op = kThumb2Vmuld;
-      break;
-    case Instruction::REM_DOUBLE_2ADDR:
-    case Instruction::REM_DOUBLE:
-      FlushAllRegs();   // Send everything to home location
-      CallRuntimeHelperRegLocationRegLocation(kQuickFmod, rl_src1, rl_src2, false);
-      rl_result = GetReturnWide(kFPReg);
-      StoreValueWide(rl_dest, rl_result);
-      return;
-    case Instruction::NEG_DOUBLE:
-      GenNegDouble(rl_dest, rl_src1);
-      return;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << opcode;
-  }
-
-  rl_src1 = LoadValueWide(rl_src1, kFPReg);
-  DCHECK(rl_src1.wide);
-  rl_src2 = LoadValueWide(rl_src2, kFPReg);
-  DCHECK(rl_src2.wide);
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
-  DCHECK(rl_dest.wide);
-  DCHECK(rl_result.wide);
-  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void ArmMir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
-                                            int32_t constant) {
-  RegLocation rl_result;
-  RegStorage r_tmp = AllocTempSingle();
-  LoadConstantNoClobber(r_tmp, constant);
-  rl_src1 = LoadValue(rl_src1, kFPReg);
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR3(kThumb2Vmuls, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), r_tmp.GetReg());
-  StoreValue(rl_dest, rl_result);
-}
-
-void ArmMir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
-                                             int64_t constant) {
-  RegLocation rl_result;
-  RegStorage r_tmp = AllocTempDouble();
-  DCHECK(r_tmp.IsDouble());
-  LoadConstantWide(r_tmp, constant);
-  rl_src1 = LoadValueWide(rl_src1, kFPReg);
-  DCHECK(rl_src1.wide);
-  rl_result = EvalLocWide(rl_dest, kFPReg, true);
-  DCHECK(rl_dest.wide);
-  DCHECK(rl_result.wide);
-  NewLIR3(kThumb2Vmuld, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), r_tmp.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void ArmMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src) {
-  int op = kThumbBkpt;
-  int src_reg;
-  RegLocation rl_result;
-
-  switch (opcode) {
-    case Instruction::INT_TO_FLOAT:
-      op = kThumb2VcvtIF;
-      break;
-    case Instruction::FLOAT_TO_INT:
-      op = kThumb2VcvtFI;
-      break;
-    case Instruction::DOUBLE_TO_FLOAT:
-      op = kThumb2VcvtDF;
-      break;
-    case Instruction::FLOAT_TO_DOUBLE:
-      op = kThumb2VcvtFd;
-      break;
-    case Instruction::INT_TO_DOUBLE:
-      op = kThumb2VcvtF64S32;
-      break;
-    case Instruction::DOUBLE_TO_INT:
-      op = kThumb2VcvtDI;
-      break;
-    case Instruction::LONG_TO_DOUBLE: {
-      rl_src = LoadValueWide(rl_src, kFPReg);
-      RegisterInfo* info = GetRegInfo(rl_src.reg);
-      RegStorage src_low = info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg();
-      DCHECK(src_low.Valid());
-      RegStorage src_high = info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg();
-      DCHECK(src_high.Valid());
-      rl_result = EvalLoc(rl_dest, kFPReg, true);
-      RegStorage tmp1 = AllocTempDouble();
-      RegStorage tmp2 = AllocTempDouble();
-
-      NewLIR2(kThumb2VcvtF64S32, tmp1.GetReg(), src_high.GetReg());
-      NewLIR2(kThumb2VcvtF64U32, rl_result.reg.GetReg(), src_low.GetReg());
-      LoadConstantWide(tmp2, 0x41f0000000000000LL);
-      NewLIR3(kThumb2VmlaF64, rl_result.reg.GetReg(), tmp1.GetReg(), tmp2.GetReg());
-      FreeTemp(tmp1);
-      FreeTemp(tmp2);
-      StoreValueWide(rl_dest, rl_result);
-      return;
-    }
-    case Instruction::FLOAT_TO_LONG:
-      CheckEntrypointTypes<kQuickF2l, int64_t, float>();  // int64_t -> kCoreReg
-      GenConversionCall(kQuickF2l, rl_dest, rl_src, kCoreReg);
-      return;
-    case Instruction::LONG_TO_FLOAT: {
-      CheckEntrypointTypes<kQuickL2f, float, int64_t>();  // float -> kFPReg
-      GenConversionCall(kQuickL2f, rl_dest, rl_src, kFPReg);
-      return;
-    }
-    case Instruction::DOUBLE_TO_LONG:
-      CheckEntrypointTypes<kQuickD2l, int64_t, double>();  // int64_t -> kCoreReg
-      GenConversionCall(kQuickD2l, rl_dest, rl_src, kCoreReg);
-      return;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << opcode;
-  }
-  if (rl_src.wide) {
-    rl_src = LoadValueWide(rl_src, kFPReg);
-    src_reg = rl_src.reg.GetReg();
-  } else {
-    rl_src = LoadValue(rl_src, kFPReg);
-    src_reg = rl_src.reg.GetReg();
-  }
-  if (rl_dest.wide) {
-    rl_result = EvalLoc(rl_dest, kFPReg, true);
-    NewLIR2(op, rl_result.reg.GetReg(), src_reg);
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    rl_result = EvalLoc(rl_dest, kFPReg, true);
-    NewLIR2(op, rl_result.reg.GetReg(), src_reg);
-    StoreValue(rl_dest, rl_result);
-  }
-}
-
-void ArmMir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
-                                     bool is_double) {
-  LIR* target = &block_label_list_[bb->taken];
-  RegLocation rl_src1;
-  RegLocation rl_src2;
-  if (is_double) {
-    rl_src1 = mir_graph_->GetSrcWide(mir, 0);
-    rl_src2 = mir_graph_->GetSrcWide(mir, 2);
-    rl_src1 = LoadValueWide(rl_src1, kFPReg);
-    rl_src2 = LoadValueWide(rl_src2, kFPReg);
-    NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  } else {
-    rl_src1 = mir_graph_->GetSrc(mir, 0);
-    rl_src2 = mir_graph_->GetSrc(mir, 1);
-    rl_src1 = LoadValue(rl_src1, kFPReg);
-    rl_src2 = LoadValue(rl_src2, kFPReg);
-    NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  }
-  NewLIR0(kThumb2Fmstat);
-  ConditionCode ccode = mir->meta.ccode;
-  switch (ccode) {
-    case kCondEq:
-    case kCondNe:
-      break;
-    case kCondLt:
-      if (gt_bias) {
-        ccode = kCondMi;
-      }
-      break;
-    case kCondLe:
-      if (gt_bias) {
-        ccode = kCondLs;
-      }
-      break;
-    case kCondGt:
-      if (gt_bias) {
-        ccode = kCondHi;
-      }
-      break;
-    case kCondGe:
-      if (gt_bias) {
-        ccode = kCondUge;
-      }
-      break;
-    default:
-      LOG(FATAL) << "Unexpected ccode: " << ccode;
-  }
-  OpCondBranch(ccode, target);
-}
-
-
-void ArmMir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest,
-                          RegLocation rl_src1, RegLocation rl_src2) {
-  bool is_double = false;
-  int default_result = -1;
-  RegLocation rl_result;
-
-  switch (opcode) {
-    case Instruction::CMPL_FLOAT:
-      is_double = false;
-      default_result = -1;
-      break;
-    case Instruction::CMPG_FLOAT:
-      is_double = false;
-      default_result = 1;
-      break;
-    case Instruction::CMPL_DOUBLE:
-      is_double = true;
-      default_result = -1;
-      break;
-    case Instruction::CMPG_DOUBLE:
-      is_double = true;
-      default_result = 1;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << opcode;
-  }
-  if (is_double) {
-    rl_src1 = LoadValueWide(rl_src1, kFPReg);
-    rl_src2 = LoadValueWide(rl_src2, kFPReg);
-    // In case result vreg is also a src vreg, break association to avoid useless copy by EvalLoc()
-    ClobberSReg(rl_dest.s_reg_low);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    LoadConstant(rl_result.reg, default_result);
-    NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  } else {
-    rl_src1 = LoadValue(rl_src1, kFPReg);
-    rl_src2 = LoadValue(rl_src2, kFPReg);
-    // In case result vreg is also a srcvreg, break association to avoid useless copy by EvalLoc()
-    ClobberSReg(rl_dest.s_reg_low);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    LoadConstant(rl_result.reg, default_result);
-    NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  }
-  DCHECK(!rl_result.reg.IsFloat());
-  NewLIR0(kThumb2Fmstat);
-
-  LIR* it = OpIT((default_result == -1) ? kCondGt : kCondMi, "");
-  NewLIR2(kThumb2MovI8M, rl_result.reg.GetReg(),
-          ModifiedImmediate(-default_result));  // Must not alter ccodes
-  OpEndIT(it);
-
-  it = OpIT(kCondEq, "");
-  LoadConstant(rl_result.reg, 0);
-  OpEndIT(it);
-
-  StoreValue(rl_dest, rl_result);
-}
-
-void ArmMir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation rl_result;
-  rl_src = LoadValue(rl_src, kFPReg);
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(kThumb2Vnegs, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  StoreValue(rl_dest, rl_result);
-}
-
-void ArmMir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation rl_result;
-  rl_src = LoadValueWide(rl_src, kFPReg);
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(kThumb2Vnegd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-}
-
-static RegisterClass RegClassForAbsFP(RegLocation rl_src, RegLocation rl_dest) {
-  // If src is in a core reg or, unlikely, dest has been promoted to a core reg, use core reg.
-  if ((rl_src.location == kLocPhysReg && !rl_src.reg.IsFloat()) ||
-      (rl_dest.location == kLocPhysReg && !rl_dest.reg.IsFloat())) {
-    return kCoreReg;
-  }
-  // If src is in an fp reg or dest has been promoted to an fp reg, use fp reg.
-  if (rl_src.location == kLocPhysReg || rl_dest.location == kLocPhysReg) {
-    return kFPReg;
-  }
-  // With both src and dest in the stack frame we have to perform load+abs+store. Whether this
-  // is faster using a core reg or fp reg depends on the particular CPU. Without further
-  // investigation and testing we prefer core register. (If the result is subsequently used in
-  // another fp operation, the dalvik reg will probably get promoted and that should be handled
-  // by the cases above.)
-  return kCoreReg;
-}
-
-bool ArmMir2Lir::GenInlinedAbsFloat(CallInfo* info) {
-  if (info->result.location == kLocInvalid) {
-    return true;  // Result is unused: inlining successful, no code generated.
-  }
-  RegLocation rl_dest = info->result;
-  RegLocation rl_src = UpdateLoc(info->args[0]);
-  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
-  rl_src = LoadValue(rl_src, reg_class);
-  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
-  if (reg_class == kFPReg) {
-    NewLIR2(kThumb2Vabss, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  } else {
-    OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
-  }
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-bool ArmMir2Lir::GenInlinedAbsDouble(CallInfo* info) {
-  if (info->result.location == kLocInvalid) {
-    return true;  // Result is unused: inlining successful, no code generated.
-  }
-  RegLocation rl_dest = info->result;
-  RegLocation rl_src = UpdateLocWide(info->args[0]);
-  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
-  rl_src = LoadValueWide(rl_src, reg_class);
-  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
-  if (reg_class == kFPReg) {
-    NewLIR2(kThumb2Vabsd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  } else if (rl_result.reg.GetLow().GetReg() != rl_src.reg.GetHigh().GetReg()) {
-    // No inconvenient overlap.
-    OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
-    OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x7fffffff);
-  } else {
-    // Inconvenient overlap, use a temp register to preserve the high word of the source.
-    RegStorage rs_tmp = AllocTemp();
-    OpRegCopy(rs_tmp, rl_src.reg.GetHigh());
-    OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
-    OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rs_tmp, 0x7fffffff);
-    FreeTemp(rs_tmp);
-  }
-  StoreValueWide(rl_dest, rl_result);
-  return true;
-}
-
-bool ArmMir2Lir::GenInlinedSqrt(CallInfo* info) {
-  DCHECK_EQ(cu_->instruction_set, kThumb2);
-  RegLocation rl_src = info->args[0];
-  RegLocation rl_dest = InlineTargetWide(info);  // double place for result
-  rl_src = LoadValueWide(rl_src, kFPReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(kThumb2Vsqrtd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-  return true;
-}
-
-
-}  // namespace art
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
deleted file mode 100644
index b2bd6fa..0000000
--- a/compiler/dex/quick/arm/int_arm.cc
+++ /dev/null
@@ -1,1736 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This file contains codegen for the Thumb2 ISA. */
-
-#include "codegen_arm.h"
-
-#include "arch/instruction_set_features.h"
-#include "arm_lir.h"
-#include "base/bit_utils.h"
-#include "base/logging.h"
-#include "dex/compiler_ir.h"
-#include "dex/mir_graph.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "dex/reg_storage_eq.h"
-#include "driver/compiler_driver.h"
-#include "entrypoints/quick/quick_entrypoints.h"
-#include "mirror/array-inl.h"
-
-namespace art {
-
-LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
-  OpRegReg(kOpCmp, src1, src2);
-  return OpCondBranch(cond, target);
-}
-
-/*
- * Generate a Thumb2 IT instruction, which can nullify up to
- * four subsequent instructions based on a condition and its
- * inverse.  The condition applies to the first instruction, which
- * is executed if the condition is met.  The string "guide" consists
- * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
- * A "T" means the instruction is executed if the condition is
- * met, and an "E" means the instruction is executed if the condition
- * is not met.
- */
-LIR* ArmMir2Lir::OpIT(ConditionCode ccode, const char* guide) {
-  int mask;
-  int mask3 = 0;
-  int mask2 = 0;
-  int mask1 = 0;
-  ArmConditionCode code = ArmConditionEncoding(ccode);
-  int cond_bit = code & 1;
-  int alt_bit = cond_bit ^ 1;
-
-  switch (strlen(guide)) {
-    case 3:
-      mask1 = (guide[2] == 'T') ? cond_bit : alt_bit;
-      FALLTHROUGH_INTENDED;
-    case 2:
-      mask2 = (guide[1] == 'T') ? cond_bit : alt_bit;
-      FALLTHROUGH_INTENDED;
-    case 1:
-      mask3 = (guide[0] == 'T') ? cond_bit : alt_bit;
-      break;
-    case 0:
-      break;
-    default:
-      LOG(FATAL) << "OAT: bad case in OpIT";
-      UNREACHABLE();
-  }
-  mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
-       (1 << (3 - strlen(guide)));
-  return NewLIR2(kThumb2It, code, mask);
-}
-
-void ArmMir2Lir::UpdateIT(LIR* it, const char* new_guide) {
-  int mask;
-  int mask3 = 0;
-  int mask2 = 0;
-  int mask1 = 0;
-  ArmConditionCode code = static_cast<ArmConditionCode>(it->operands[0]);
-  int cond_bit = code & 1;
-  int alt_bit = cond_bit ^ 1;
-
-  switch (strlen(new_guide)) {
-    case 3:
-      mask1 = (new_guide[2] == 'T') ? cond_bit : alt_bit;
-      FALLTHROUGH_INTENDED;
-    case 2:
-      mask2 = (new_guide[1] == 'T') ? cond_bit : alt_bit;
-      FALLTHROUGH_INTENDED;
-    case 1:
-      mask3 = (new_guide[0] == 'T') ? cond_bit : alt_bit;
-      break;
-    case 0:
-      break;
-    default:
-      LOG(FATAL) << "OAT: bad case in UpdateIT";
-      UNREACHABLE();
-  }
-  mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
-      (1 << (3 - strlen(new_guide)));
-  it->operands[1] = mask;
-}
-
-void ArmMir2Lir::OpEndIT(LIR* it) {
-  // TODO: use the 'it' pointer to do some checks with the LIR, for example
-  //       we could check that the number of instructions matches the mask
-  //       in the IT instruction.
-  CHECK(it != nullptr);
-  GenBarrier();
-}
-
-/*
- * 64-bit 3way compare function.
- *     mov   rX, #-1
- *     cmp   op1hi, op2hi
- *     blt   done
- *     bgt   flip
- *     sub   rX, op1lo, op2lo (treat as unsigned)
- *     beq   done
- *     ite   hi
- *     mov(hi)   rX, #-1
- *     mov(!hi)  rX, #1
- * flip:
- *     neg   rX
- * done:
- */
-void ArmMir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
-  LIR* target1;
-  LIR* target2;
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  RegStorage t_reg = AllocTemp();
-  LoadConstant(t_reg, -1);
-  OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
-  LIR* branch1 = OpCondBranch(kCondLt, nullptr);
-  LIR* branch2 = OpCondBranch(kCondGt, nullptr);
-  OpRegRegReg(kOpSub, t_reg, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
-  LIR* branch3 = OpCondBranch(kCondEq, nullptr);
-
-  LIR* it = OpIT(kCondHi, "E");
-  NewLIR2(kThumb2MovI8M, t_reg.GetReg(), ModifiedImmediate(-1));
-  LoadConstant(t_reg, 1);
-  OpEndIT(it);
-
-  target2 = NewLIR0(kPseudoTargetLabel);
-  OpRegReg(kOpNeg, t_reg, t_reg);
-
-  target1 = NewLIR0(kPseudoTargetLabel);
-
-  RegLocation rl_temp = LocCReturn();  // Just using as template, will change
-  rl_temp.reg.SetReg(t_reg.GetReg());
-  StoreValue(rl_dest, rl_temp);
-  FreeTemp(t_reg);
-
-  branch1->target = target1;
-  branch2->target = target2;
-  branch3->target = branch1->target;
-}
-
-void ArmMir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
-                                          int64_t val, ConditionCode ccode) {
-  int32_t val_lo = Low32Bits(val);
-  int32_t val_hi = High32Bits(val);
-  DCHECK_GE(ModifiedImmediate(val_lo), 0);
-  DCHECK_GE(ModifiedImmediate(val_hi), 0);
-  LIR* taken = &block_label_list_[bb->taken];
-  LIR* not_taken = &block_label_list_[bb->fall_through];
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  RegStorage low_reg = rl_src1.reg.GetLow();
-  RegStorage high_reg = rl_src1.reg.GetHigh();
-
-  if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
-    RegStorage t_reg = AllocTemp();
-    NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), low_reg.GetReg(), high_reg.GetReg(), 0);
-    FreeTemp(t_reg);
-    OpCondBranch(ccode, taken);
-    return;
-  }
-
-  switch (ccode) {
-    case kCondEq:
-    case kCondNe:
-      OpCmpImmBranch(kCondNe, high_reg, val_hi, (ccode == kCondEq) ? not_taken : taken);
-      break;
-    case kCondLt:
-      OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
-      OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
-      ccode = kCondUlt;
-      break;
-    case kCondLe:
-      OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
-      OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
-      ccode = kCondLs;
-      break;
-    case kCondGt:
-      OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
-      OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
-      ccode = kCondHi;
-      break;
-    case kCondGe:
-      OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
-      OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
-      ccode = kCondUge;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected ccode: " << ccode;
-  }
-  OpCmpImmBranch(ccode, low_reg, val_lo, taken);
-}
-
-void ArmMir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
-                                  int32_t true_val, int32_t false_val, RegStorage rs_dest,
-                                  RegisterClass dest_reg_class ATTRIBUTE_UNUSED) {
-  // TODO: Generalize the IT below to accept more than one-instruction loads.
-  DCHECK(InexpensiveConstantInt(true_val));
-  DCHECK(InexpensiveConstantInt(false_val));
-
-  if ((true_val == 0 && code == kCondEq) ||
-      (false_val == 0 && code == kCondNe)) {
-    OpRegRegReg(kOpSub, rs_dest, left_op, right_op);
-    DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
-    LIR* it = OpIT(kCondNe, "");
-    LoadConstant(rs_dest, code == kCondEq ? false_val : true_val);
-    OpEndIT(it);
-    return;
-  }
-
-  OpRegReg(kOpCmp, left_op, right_op);  // Same?
-  LIR* it = OpIT(code, "E");   // if-convert the test
-  LoadConstant(rs_dest, true_val);      // .eq case - load true
-  LoadConstant(rs_dest, false_val);     // .eq case - load true
-  OpEndIT(it);
-}
-
-void ArmMir2Lir::GenSelect(BasicBlock* bb ATTRIBUTE_UNUSED, MIR* mir) {
-  RegLocation rl_result;
-  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
-  RegLocation rl_dest = mir_graph_->GetDest(mir);
-  // Avoid using float regs here.
-  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
-  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
-  rl_src = LoadValue(rl_src, src_reg_class);
-  ConditionCode ccode = mir->meta.ccode;
-  if (mir->ssa_rep->num_uses == 1) {
-    // CONST case
-    int true_val = mir->dalvikInsn.vB;
-    int false_val = mir->dalvikInsn.vC;
-    rl_result = EvalLoc(rl_dest, result_reg_class, true);
-    // Change kCondNe to kCondEq for the special cases below.
-    if (ccode == kCondNe) {
-      ccode = kCondEq;
-      std::swap(true_val, false_val);
-    }
-    bool cheap_false_val = InexpensiveConstantInt(false_val);
-    if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) {
-      OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val);
-      DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
-      LIR* it = OpIT(true_val == 0 ? kCondNe : kCondUge, "");
-      LoadConstant(rl_result.reg, false_val);
-      OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
-    } else if (cheap_false_val && ccode == kCondEq && true_val == 1) {
-      OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1);
-      DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
-      LIR* it = OpIT(kCondLs, "");
-      LoadConstant(rl_result.reg, false_val);
-      OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
-    } else if (cheap_false_val && InexpensiveConstantInt(true_val)) {
-      OpRegImm(kOpCmp, rl_src.reg, 0);
-      LIR* it = OpIT(ccode, "E");
-      LoadConstant(rl_result.reg, true_val);
-      LoadConstant(rl_result.reg, false_val);
-      OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
-    } else {
-      // Unlikely case - could be tuned.
-      RegStorage t_reg1 = AllocTypedTemp(false, result_reg_class);
-      RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
-      LoadConstant(t_reg1, true_val);
-      LoadConstant(t_reg2, false_val);
-      OpRegImm(kOpCmp, rl_src.reg, 0);
-      LIR* it = OpIT(ccode, "E");
-      OpRegCopy(rl_result.reg, t_reg1);
-      OpRegCopy(rl_result.reg, t_reg2);
-      OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
-    }
-  } else {
-    // MOVE case
-    RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
-    RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
-    rl_true = LoadValue(rl_true, result_reg_class);
-    rl_false = LoadValue(rl_false, result_reg_class);
-    rl_result = EvalLoc(rl_dest, result_reg_class, true);
-    OpRegImm(kOpCmp, rl_src.reg, 0);
-    LIR* it = nullptr;
-    if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {  // Is the "true" case already in place?
-      it = OpIT(NegateComparison(ccode), "");
-      OpRegCopy(rl_result.reg, rl_false.reg);
-    } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {  // False case in place?
-      it = OpIT(ccode, "");
-      OpRegCopy(rl_result.reg, rl_true.reg);
-    } else {  // Normal - select between the two.
-      it = OpIT(ccode, "E");
-      OpRegCopy(rl_result.reg, rl_true.reg);
-      OpRegCopy(rl_result.reg, rl_false.reg);
-    }
-    OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
-  }
-  StoreValue(rl_dest, rl_result);
-}
-
-void ArmMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
-  RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
-  RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
-  // Normalize such that if either operand is constant, src2 will be constant.
-  ConditionCode ccode = mir->meta.ccode;
-  if (rl_src1.is_const) {
-    std::swap(rl_src1, rl_src2);
-    ccode = FlipComparisonOrder(ccode);
-  }
-  if (rl_src2.is_const) {
-    rl_src2 = UpdateLocWide(rl_src2);
-    // Do special compare/branch against simple const operand if not already in registers.
-    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
-    if ((rl_src2.location != kLocPhysReg) &&
-        ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) {
-      GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
-      return;
-    }
-  }
-  LIR* taken = &block_label_list_[bb->taken];
-  LIR* not_taken = &block_label_list_[bb->fall_through];
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
-  switch (ccode) {
-    case kCondEq:
-      OpCondBranch(kCondNe, not_taken);
-      break;
-    case kCondNe:
-      OpCondBranch(kCondNe, taken);
-      break;
-    case kCondLt:
-      OpCondBranch(kCondLt, taken);
-      OpCondBranch(kCondGt, not_taken);
-      ccode = kCondUlt;
-      break;
-    case kCondLe:
-      OpCondBranch(kCondLt, taken);
-      OpCondBranch(kCondGt, not_taken);
-      ccode = kCondLs;
-      break;
-    case kCondGt:
-      OpCondBranch(kCondGt, taken);
-      OpCondBranch(kCondLt, not_taken);
-      ccode = kCondHi;
-      break;
-    case kCondGe:
-      OpCondBranch(kCondGt, taken);
-      OpCondBranch(kCondLt, not_taken);
-      ccode = kCondUge;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected ccode: " << ccode;
-  }
-  OpRegReg(kOpCmp, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
-  OpCondBranch(ccode, taken);
-}
-
-/*
- * Generate a register comparison to an immediate and branch.  Caller
- * is responsible for setting branch target field.
- */
-LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) {
-  LIR* branch = nullptr;
-  ArmConditionCode arm_cond = ArmConditionEncoding(cond);
-  /*
-   * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit
-   * compare-and-branch if zero is ideal if it will reach.  However, because null checks
-   * branch forward to a slow path, they will frequently not reach - and thus have to
-   * be converted to a long form during assembly (which will trigger another assembly
-   * pass).  Here we estimate the branch distance for checks, and if large directly
-   * generate the long form in an attempt to avoid an extra assembly pass.
-   * TODO: consider interspersing slowpaths in code following unconditional branches.
-   */
-  bool skip = ((target != nullptr) && (target->opcode == kPseudoThrowTarget));
-  skip &= ((mir_graph_->GetNumDalvikInsns() - current_dalvik_offset_) > 64);
-  if (!skip && reg.Low8() && (check_value == 0)) {
-    if (arm_cond == kArmCondEq || arm_cond == kArmCondNe) {
-      branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
-                       reg.GetReg(), 0);
-    } else if (arm_cond == kArmCondLs) {
-      // kArmCondLs is an unsigned less or equal. A comparison r <= 0 is then the same as cbz.
-      // This case happens for a bounds check of array[0].
-      branch = NewLIR2(kThumb2Cbz, reg.GetReg(), 0);
-    }
-  }
-
-  if (branch == nullptr) {
-    OpRegImm(kOpCmp, reg, check_value);
-    branch = NewLIR2(kThumbBCond, 0, arm_cond);
-  }
-
-  branch->target = target;
-  return branch;
-}
-
-LIR* ArmMir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
-  LIR* res;
-  int opcode;
-  // If src or dest is a pair, we'll be using low reg.
-  if (r_dest.IsPair()) {
-    r_dest = r_dest.GetLow();
-  }
-  if (r_src.IsPair()) {
-    r_src = r_src.GetLow();
-  }
-  if (r_dest.IsFloat() || r_src.IsFloat())
-    return OpFpRegCopy(r_dest, r_src);
-  if (r_dest.Low8() && r_src.Low8())
-    opcode = kThumbMovRR;
-  else if (!r_dest.Low8() && !r_src.Low8())
-     opcode = kThumbMovRR_H2H;
-  else if (r_dest.Low8())
-     opcode = kThumbMovRR_H2L;
-  else
-     opcode = kThumbMovRR_L2H;
-  res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
-  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
-    res->flags.is_nop = true;
-  }
-  return res;
-}
-
-void ArmMir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
-  if (r_dest != r_src) {
-    LIR* res = OpRegCopyNoInsert(r_dest, r_src);
-    AppendLIR(res);
-  }
-}
-
-void ArmMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
-  if (r_dest != r_src) {
-    bool dest_fp = r_dest.IsFloat();
-    bool src_fp = r_src.IsFloat();
-    DCHECK(r_dest.Is64Bit());
-    DCHECK(r_src.Is64Bit());
-    // Note: If the register is get by register allocator, it should never be a pair.
-    // But some functions in mir_2_lir assume 64-bit registers are 32-bit register pairs.
-    // TODO: Rework Mir2Lir::LoadArg() and Mir2Lir::LoadArgDirect().
-    if (dest_fp && r_dest.IsPair()) {
-      r_dest = As64BitFloatReg(r_dest);
-    }
-    if (src_fp && r_src.IsPair()) {
-      r_src = As64BitFloatReg(r_src);
-    }
-    if (dest_fp) {
-      if (src_fp) {
-        OpRegCopy(r_dest, r_src);
-      } else {
-        NewLIR3(kThumb2Fmdrr, r_dest.GetReg(), r_src.GetLowReg(), r_src.GetHighReg());
-      }
-    } else {
-      if (src_fp) {
-        NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_src.GetReg());
-      } else {
-        // Handle overlap
-        if (r_src.GetHighReg() != r_dest.GetLowReg()) {
-          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
-        } else if (r_src.GetLowReg() != r_dest.GetHighReg()) {
-          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
-          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-        } else {
-          RegStorage r_tmp = AllocTemp();
-          OpRegCopy(r_tmp, r_src.GetHigh());
-          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-          OpRegCopy(r_dest.GetHigh(), r_tmp);
-          FreeTemp(r_tmp);
-        }
-      }
-    }
-  }
-}
-
-// Table of magic divisors
-struct MagicTable {
-  uint32_t magic;
-  uint32_t shift;
-  DividePattern pattern;
-};
-
-static const MagicTable magic_table[] = {
-  {0, 0, DivideNone},        // 0
-  {0, 0, DivideNone},        // 1
-  {0, 0, DivideNone},        // 2
-  {0x55555556, 0, Divide3},  // 3
-  {0, 0, DivideNone},        // 4
-  {0x66666667, 1, Divide5},  // 5
-  {0x2AAAAAAB, 0, Divide3},  // 6
-  {0x92492493, 2, Divide7},  // 7
-  {0, 0, DivideNone},        // 8
-  {0x38E38E39, 1, Divide5},  // 9
-  {0x66666667, 2, Divide5},  // 10
-  {0x2E8BA2E9, 1, Divide5},  // 11
-  {0x2AAAAAAB, 1, Divide5},  // 12
-  {0x4EC4EC4F, 2, Divide5},  // 13
-  {0x92492493, 3, Divide7},  // 14
-  {0x88888889, 3, Divide7},  // 15
-};
-
-// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
-bool ArmMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode ATTRIBUTE_UNUSED, bool is_div,
-                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
-  if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) {
-    return false;
-  }
-  DividePattern pattern = magic_table[lit].pattern;
-  if (pattern == DivideNone) {
-    return false;
-  }
-
-  RegStorage r_magic = AllocTemp();
-  LoadConstant(r_magic, magic_table[lit].magic);
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage r_hi = AllocTemp();
-  RegStorage r_lo = AllocTemp();
-
-  // rl_dest and rl_src might overlap.
-  // Reuse r_hi to save the div result for reminder case.
-  RegStorage r_div_result = is_div ? rl_result.reg : r_hi;
-
-  NewLIR4(kThumb2Smull, r_lo.GetReg(), r_hi.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
-  switch (pattern) {
-    case Divide3:
-      OpRegRegRegShift(kOpSub, r_div_result, r_hi, rl_src.reg, EncodeShift(kArmAsr, 31));
-      break;
-    case Divide5:
-      OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
-      OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi,
-                       EncodeShift(kArmAsr, magic_table[lit].shift));
-      break;
-    case Divide7:
-      OpRegReg(kOpAdd, r_hi, rl_src.reg);
-      OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
-      OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi,
-                       EncodeShift(kArmAsr, magic_table[lit].shift));
-      break;
-    default:
-      LOG(FATAL) << "Unexpected pattern: " << pattern;
-  }
-
-  if (!is_div) {
-    // div_result = src / lit
-    // tmp1 = div_result * lit
-    // dest = src - tmp1
-    RegStorage tmp1 = r_lo;
-    EasyMultiplyOp ops[2];
-
-    bool canEasyMultiply = GetEasyMultiplyTwoOps(lit, ops);
-    DCHECK_NE(canEasyMultiply, false);
-
-    GenEasyMultiplyTwoOps(tmp1, r_div_result, ops);
-    OpRegRegReg(kOpSub, rl_result.reg, rl_src.reg, tmp1);
-  }
-
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-// Try to convert *lit to 1 RegRegRegShift/RegRegShift form.
-bool ArmMir2Lir::GetEasyMultiplyOp(int lit, ArmMir2Lir::EasyMultiplyOp* op) {
-  if (lit == 0) {
-    // Special case for *divide-by-zero*. The ops won't actually be used to generate code, as
-    // GenArithOpIntLit will directly generate exception-throwing code, and multiply-by-zero will
-    // have been optimized away earlier.
-    op->op = kOpInvalid;
-    op->shift = 0;
-    return true;
-  }
-
-  if (IsPowerOfTwo(lit)) {
-    op->op = kOpLsl;
-    op->shift = CTZ(lit);
-    return true;
-  }
-
-  // At this point lit != 1 (which is a power of two).
-  DCHECK_NE(lit, 1);
-  if (IsPowerOfTwo(lit - 1)) {
-    op->op = kOpAdd;
-    op->shift = CTZ(lit - 1);
-    return true;
-  }
-
-  if (lit == -1) {
-    // Can be created as neg.
-    op->op = kOpNeg;
-    op->shift = 0;
-    return true;
-  } else if (IsPowerOfTwo(lit + 1)) {
-    op->op = kOpRsub;
-    op->shift = CTZ(lit + 1);
-    return true;
-  }
-
-  op->op = kOpInvalid;
-  op->shift = 0;
-  return false;
-}
-
-// Try to convert *lit to 1~2 RegRegRegShift/RegRegShift forms.
-bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) {
-  DCHECK_NE(lit, 1);           // A case of "1" should have been folded.
-  DCHECK_NE(lit, -1);          // A case of "-1" should have been folded.
-  if (GetEasyMultiplyOp(lit, &ops[0])) {
-    ops[1].op = kOpInvalid;
-    ops[1].shift = 0;
-    return true;
-  }
-
-  DCHECK_NE(lit, 0);           // Should be handled above.
-  DCHECK(!IsPowerOfTwo(lit));  // Same.
-
-  int lit1 = lit;              // With the DCHECKs, it's clear we don't get "0", "1" or "-1" for
-  uint32_t shift = CTZ(lit1);  // lit1.
-  if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
-    ops[1].op = kOpLsl;
-    ops[1].shift = shift;
-    return true;
-  }
-
-  lit1 = lit - 1;              // With the DCHECKs, it's clear we don't get "0" or "1" for lit1.
-  shift = CTZ(lit1);
-  if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
-    ops[1].op = kOpAdd;
-    ops[1].shift = shift;
-    return true;
-  }
-
-  lit1 = lit + 1;              // With the DCHECKs, it's clear we don't get "0" here.
-  shift = CTZ(lit1);
-  if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
-    ops[1].op = kOpRsub;
-    ops[1].shift = shift;
-    return true;
-  }
-
-  ops[1].op = kOpInvalid;
-  ops[1].shift = 0;
-
-  return false;
-}
-
-// Generate instructions to do multiply.
-// Additional temporary register is required,
-// if it need to generate 2 instructions and src/dest overlap.
-void ArmMir2Lir::GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops) {
-  // tmp1 = (( src << shift1) + [ src | -src | 0 ] ) | -src
-  // dest = (tmp1 << shift2) + [ src | -src | 0 ]
-
-  RegStorage r_tmp1;
-  if (ops[1].op == kOpInvalid) {
-    r_tmp1 = r_dest;
-  } else if (r_dest.GetReg() != r_src.GetReg()) {
-    r_tmp1 = r_dest;
-  } else {
-    r_tmp1 = AllocTemp();
-  }
-
-  switch (ops[0].op) {
-    case kOpLsl:
-      OpRegRegImm(kOpLsl, r_tmp1, r_src, ops[0].shift);
-      break;
-    case kOpAdd:
-      OpRegRegRegShift(kOpAdd, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift));
-      break;
-    case kOpRsub:
-      OpRegRegRegShift(kOpRsub, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift));
-      break;
-    case kOpNeg:
-      OpRegReg(kOpNeg, r_tmp1, r_src);
-      break;
-    default:
-      DCHECK_EQ(ops[0].op, kOpInvalid);
-      break;
-  }
-
-  switch (ops[1].op) {
-    case kOpInvalid:
-      return;
-    case kOpLsl:
-      OpRegRegImm(kOpLsl, r_dest, r_tmp1, ops[1].shift);
-      break;
-    case kOpAdd:
-      OpRegRegRegShift(kOpAdd, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift));
-      break;
-    case kOpRsub:
-      OpRegRegRegShift(kOpRsub, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift));
-      break;
-    // No negation allowed in second op.
-    default:
-      LOG(FATAL) << "Unexpected opcode passed to GenEasyMultiplyTwoOps";
-      break;
-  }
-}
-
-bool ArmMir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
-  EasyMultiplyOp ops[2];
-
-  if (!GetEasyMultiplyTwoOps(lit, ops)) {
-    return false;
-  }
-
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-
-  GenEasyMultiplyTwoOps(rl_result.reg, rl_src.reg, ops);
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest ATTRIBUTE_UNUSED,
-                                  RegLocation rl_src1 ATTRIBUTE_UNUSED,
-                                  RegLocation rl_src2 ATTRIBUTE_UNUSED,
-                                  bool is_div ATTRIBUTE_UNUSED,
-                                  int flags ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
-  UNREACHABLE();
-}
-
-RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest ATTRIBUTE_UNUSED,
-                                     RegLocation rl_src1 ATTRIBUTE_UNUSED,
-                                     int lit ATTRIBUTE_UNUSED,
-                                     bool is_div ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm";
-  UNREACHABLE();
-}
-
-RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-
-  // Put the literal in a temp.
-  RegStorage lit_temp = AllocTemp();
-  LoadConstant(lit_temp, lit);
-  // Use the generic case for div/rem with arg2 in a register.
-  // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
-  rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
-  FreeTemp(lit_temp);
-
-  return rl_result;
-}
-
-RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2,
-                                  bool is_div) {
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (is_div) {
-    // Simple case, use sdiv instruction.
-    OpRegRegReg(kOpDiv, rl_result.reg, reg1, reg2);
-  } else {
-    // Remainder case, use the following code:
-    // temp = reg1 / reg2      - integer division
-    // temp = temp * reg2
-    // dest = reg1 - temp
-
-    RegStorage temp = AllocTemp();
-    OpRegRegReg(kOpDiv, temp, reg1, reg2);
-    OpRegReg(kOpMul, temp, reg2);
-    OpRegRegReg(kOpSub, rl_result.reg, reg1, temp);
-    FreeTemp(temp);
-  }
-
-  return rl_result;
-}
-
-bool ArmMir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
-  DCHECK_EQ(cu_->instruction_set, kThumb2);
-  if (is_long) {
-    return false;
-  }
-  RegLocation rl_src1 = info->args[0];
-  RegLocation rl_src2 = info->args[1];
-  rl_src1 = LoadValue(rl_src1, kCoreReg);
-  rl_src2 = LoadValue(rl_src2, kCoreReg);
-  RegLocation rl_dest = InlineTarget(info);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-  LIR* it = OpIT((is_min) ? kCondGt : kCondLt, "E");
-  OpRegReg(kOpMov, rl_result.reg, rl_src2.reg);
-  OpRegReg(kOpMov, rl_result.reg, rl_src1.reg);
-  OpEndIT(it);
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-bool ArmMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
-  RegLocation rl_src_address = info->args[0];  // long address
-  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
-  RegLocation rl_dest = InlineTarget(info);
-  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (size == k64) {
-    // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0.
-    if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) {
-      Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
-      Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
-    } else {
-      Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
-      Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
-    }
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
-    // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
-    LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
-    StoreValue(rl_dest, rl_result);
-  }
-  return true;
-}
-
-bool ArmMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
-  RegLocation rl_src_address = info->args[0];  // long address
-  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
-  RegLocation rl_src_value = info->args[2];  // [size] value
-  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
-  if (size == k64) {
-    // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0.
-    RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
-    StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32, kNotVolatile);
-    StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32, kNotVolatile);
-  } else {
-    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
-    // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0.
-    RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
-    StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
-  }
-  return true;
-}
-
-// Generate a CAS with memory_order_seq_cst semantics.
-bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
-  DCHECK_EQ(cu_->instruction_set, kThumb2);
-  // Unused - RegLocation rl_src_unsafe = info->args[0];
-  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
-  RegLocation rl_src_offset = info->args[2];  // long low
-  rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
-  RegLocation rl_src_expected = info->args[4];  // int, long or Object
-  // If is_long, high half is in info->args[5]
-  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
-  // If is_long, high half is in info->args[7]
-  RegLocation rl_dest = InlineTarget(info);  // boolean place for result
-
-  // We have only 5 temporary registers available and actually only 4 if the InlineTarget
-  // above locked one of the temps. For a straightforward CAS64 we need 7 registers:
-  // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor
-  // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop
-  // into the same temps, reducing the number of required temps down to 5. We shall work
-  // around the potentially locked temp by using LR for r_ptr, unconditionally.
-  // TODO: Pass information about the need for more temps to the stack frame generation
-  // code so that we can rely on being able to allocate enough temps.
-  DCHECK(!GetRegInfo(rs_rARM_LR)->IsTemp());
-  MarkTemp(rs_rARM_LR);
-  FreeTemp(rs_rARM_LR);
-  LockTemp(rs_rARM_LR);
-  bool load_early = true;
-  if (is_long) {
-    RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() :
-        rl_src_expected.reg;
-    RegStorage new_val_reg = rl_src_new_value.reg.IsPair() ? rl_src_new_value.reg.GetLow() :
-        rl_src_new_value.reg;
-    bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !expected_reg.IsFloat();
-    bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !new_val_reg.IsFloat();
-    bool expected_is_good_reg = expected_is_core_reg && !IsTemp(expected_reg);
-    bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(new_val_reg);
-
-    if (!expected_is_good_reg && !new_value_is_good_reg) {
-      // None of expected/new_value is non-temp reg, need to load both late
-      load_early = false;
-      // Make sure they are not in the temp regs and the load will not be skipped.
-      if (expected_is_core_reg) {
-        FlushRegWide(rl_src_expected.reg);
-        ClobberSReg(rl_src_expected.s_reg_low);
-        ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low));
-        rl_src_expected.location = kLocDalvikFrame;
-      }
-      if (new_value_is_core_reg) {
-        FlushRegWide(rl_src_new_value.reg);
-        ClobberSReg(rl_src_new_value.s_reg_low);
-        ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low));
-        rl_src_new_value.location = kLocDalvikFrame;
-      }
-    }
-  }
-
-  // Prevent reordering with prior memory operations.
-  GenMemBarrier(kAnyStore);
-
-  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
-  RegLocation rl_new_value;
-  if (!is_long) {
-    rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg);
-  } else if (load_early) {
-    rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
-  }
-
-  if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
-    // Mark card for object assuming new value is stored.
-    MarkGCCard(0, rl_new_value.reg, rl_object.reg);
-  }
-
-  RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
-
-  RegStorage r_ptr = rs_rARM_LR;
-  OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
-
-  // Free now unneeded rl_object and rl_offset to give more temps.
-  ClobberSReg(rl_object.s_reg_low);
-  FreeTemp(rl_object.reg);
-  ClobberSReg(rl_offset.s_reg_low);
-  FreeTemp(rl_offset.reg);
-
-  RegLocation rl_expected;
-  if (!is_long) {
-    rl_expected = LoadValue(rl_src_expected, is_object ? kRefReg : kCoreReg);
-  } else if (load_early) {
-    rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
-  } else {
-    // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs.
-    RegStorage low_reg = AllocTemp();
-    RegStorage high_reg = AllocTemp();
-    rl_new_value.reg = RegStorage::MakeRegPair(low_reg, high_reg);
-    rl_expected = rl_new_value;
-  }
-
-  // do {
-  //   tmp = [r_ptr] - expected;
-  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
-  // result = tmp != 0;
-
-  RegStorage r_tmp = AllocTemp();
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-
-  LIR* it = nullptr;
-  if (is_long) {
-    RegStorage r_tmp_high = AllocTemp();
-    if (!load_early) {
-      LoadValueDirectWide(rl_src_expected, rl_expected.reg);
-    }
-    NewLIR3(kThumb2Ldrexd, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg());
-    OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow());
-    OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh());
-    if (!load_early) {
-      LoadValueDirectWide(rl_src_new_value, rl_new_value.reg);
-    }
-    // Make sure we use ORR that sets the ccode
-    if (r_tmp.Low8() && r_tmp_high.Low8()) {
-      NewLIR2(kThumbOrr, r_tmp.GetReg(), r_tmp_high.GetReg());
-    } else {
-      NewLIR4(kThumb2OrrRRRs, r_tmp.GetReg(), r_tmp.GetReg(), r_tmp_high.GetReg(), 0);
-    }
-    FreeTemp(r_tmp_high);  // Now unneeded
-
-    DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
-    it = OpIT(kCondEq, "T");
-    NewLIR4(kThumb2Strexd /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetLowReg(), rl_new_value.reg.GetHighReg(), r_ptr.GetReg());
-
-  } else {
-    NewLIR3(kThumb2Ldrex, r_tmp.GetReg(), r_ptr.GetReg(), 0);
-    OpRegReg(kOpSub, r_tmp, rl_expected.reg);
-    DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
-    it = OpIT(kCondEq, "T");
-    NewLIR4(kThumb2Strex /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0);
-  }
-
-  // Still one conditional left from OpIT(kCondEq, "T") from either branch
-  OpRegImm(kOpCmp /* eq */, r_tmp, 1);
-  OpEndIT(it);
-
-  OpCondBranch(kCondEq, target);
-
-  if (!load_early) {
-    FreeTemp(rl_expected.reg);  // Now unneeded.
-  }
-
-  // Prevent reordering with subsequent memory operations.
-  GenMemBarrier(kLoadAny);
-
-  // result := (tmp1 != 0) ? 0 : 1;
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1);
-  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
-  it = OpIT(kCondUlt, "");
-  LoadConstant(rl_result.reg, 0); /* cc */
-  FreeTemp(r_tmp);  // Now unneeded.
-  OpEndIT(it);     // Barrier to terminate OpIT.
-
-  StoreValue(rl_dest, rl_result);
-
-  // Now, restore lr to its non-temp status.
-  Clobber(rs_rARM_LR);
-  UnmarkTemp(rs_rARM_LR);
-  return true;
-}
-
-bool ArmMir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) {
-  constexpr int kLargeArrayThreshold = 256;
-
-  RegLocation rl_src = info->args[0];
-  RegLocation rl_src_pos = info->args[1];
-  RegLocation rl_dst = info->args[2];
-  RegLocation rl_dst_pos = info->args[3];
-  RegLocation rl_length = info->args[4];
-  // Compile time check, handle exception by non-inline method to reduce related meta-data.
-  if ((rl_src_pos.is_const && (mir_graph_->ConstantValue(rl_src_pos) < 0)) ||
-      (rl_dst_pos.is_const && (mir_graph_->ConstantValue(rl_dst_pos) < 0)) ||
-      (rl_length.is_const && (mir_graph_->ConstantValue(rl_length) < 0))) {
-    return false;
-  }
-
-  ClobberCallerSave();
-  LockCallTemps();  // Prepare for explicit register usage.
-  LockTemp(rs_r12);
-  RegStorage rs_src = rs_r0;
-  RegStorage rs_dst = rs_r1;
-  LoadValueDirectFixed(rl_src, rs_src);
-  LoadValueDirectFixed(rl_dst, rs_dst);
-
-  // Handle null pointer exception in slow-path.
-  LIR* src_check_branch = OpCmpImmBranch(kCondEq, rs_src, 0, nullptr);
-  LIR* dst_check_branch = OpCmpImmBranch(kCondEq, rs_dst, 0, nullptr);
-  // Handle potential overlapping in slow-path.
-  LIR* src_dst_same = OpCmpBranch(kCondEq, rs_src, rs_dst, nullptr);
-  // Handle exception or big length in slow-path.
-  RegStorage rs_length = rs_r2;
-  LoadValueDirectFixed(rl_length, rs_length);
-  LIR* len_neg_or_too_big = OpCmpImmBranch(kCondHi, rs_length, kLargeArrayThreshold, nullptr);
-  // Src bounds check.
-  RegStorage rs_pos = rs_r3;
-  RegStorage rs_arr_length = rs_r12;
-  LoadValueDirectFixed(rl_src_pos, rs_pos);
-  LIR* src_pos_negative = OpCmpImmBranch(kCondLt, rs_pos, 0, nullptr);
-  Load32Disp(rs_src, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
-  OpRegReg(kOpSub, rs_arr_length, rs_pos);
-  LIR* src_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
-  // Dst bounds check.
-  LoadValueDirectFixed(rl_dst_pos, rs_pos);
-  LIR* dst_pos_negative = OpCmpImmBranch(kCondLt, rs_pos, 0, nullptr);
-  Load32Disp(rs_dst, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
-  OpRegReg(kOpSub, rs_arr_length, rs_pos);
-  LIR* dst_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
-
-  // Everything is checked now.
-  OpRegImm(kOpAdd, rs_dst, mirror::Array::DataOffset(2).Int32Value());
-  OpRegReg(kOpAdd, rs_dst, rs_pos);
-  OpRegReg(kOpAdd, rs_dst, rs_pos);
-  OpRegImm(kOpAdd, rs_src, mirror::Array::DataOffset(2).Int32Value());
-  LoadValueDirectFixed(rl_src_pos, rs_pos);
-  OpRegReg(kOpAdd, rs_src, rs_pos);
-  OpRegReg(kOpAdd, rs_src, rs_pos);
-
-  RegStorage rs_tmp = rs_pos;
-  OpRegRegImm(kOpLsl, rs_length, rs_length, 1);
-
-  // Copy one element.
-  OpRegRegImm(kOpAnd, rs_tmp, rs_length, 2);
-  LIR* jmp_to_begin_loop = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr);
-  OpRegImm(kOpSub, rs_length, 2);
-  LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf);
-  StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf);
-
-  // Copy two elements.
-  LIR *begin_loop = NewLIR0(kPseudoTargetLabel);
-  LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_length, 0, nullptr);
-  OpRegImm(kOpSub, rs_length, 4);
-  LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32);
-  StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32);
-  OpUnconditionalBranch(begin_loop);
-
-  LIR *check_failed = NewLIR0(kPseudoTargetLabel);
-  LIR* launchpad_branch = OpUnconditionalBranch(nullptr);
-  LIR* return_point = NewLIR0(kPseudoTargetLabel);
-
-  src_check_branch->target = check_failed;
-  dst_check_branch->target = check_failed;
-  src_dst_same->target = check_failed;
-  len_neg_or_too_big->target = check_failed;
-  src_pos_negative->target = check_failed;
-  src_bad_len->target = check_failed;
-  dst_pos_negative->target = check_failed;
-  dst_bad_len->target = check_failed;
-  jmp_to_begin_loop->target = begin_loop;
-  jmp_to_ret->target = return_point;
-
-  AddIntrinsicSlowPath(info, launchpad_branch, return_point);
-  ClobberCallerSave();  // We must clobber everything because slow path will return here
-
-  return true;
-}
-
-void ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  LIR* lir = NewLIR2(kThumb2LdrPcRel12, reg.GetReg(), 0);
-  lir->target = target;
-}
-
-bool ArmMir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
-  return dex_cache_arrays_layout_.Valid();
-}
-
-void ArmMir2Lir::OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest) {
-  LIR* movw = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), 0);
-  LIR* movt = NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), 0);
-  ArmOpcode add_pc_opcode = (r_dest.GetRegNum() < 8) ? kThumbAddRRLH : kThumbAddRRHH;
-  LIR* add_pc = NewLIR2(add_pc_opcode, r_dest.GetReg(), rs_rARM_PC.GetReg());
-  add_pc->flags.fixup = kFixupLabel;
-  movw->operands[2] = WrapPointer(dex_file);
-  movw->operands[3] = offset;
-  movw->operands[4] = WrapPointer(add_pc);
-  movt->operands[2] = movw->operands[2];
-  movt->operands[3] = movw->operands[3];
-  movt->operands[4] = movw->operands[4];
-  dex_cache_access_insns_.push_back(movw);
-  dex_cache_access_insns_.push_back(movt);
-}
-
-void ArmMir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest,
-                                          bool wide) {
-  DCHECK(!wide) << "Unsupported";
-  if (dex_cache_arrays_base_reg_.Valid()) {
-    LoadRefDisp(dex_cache_arrays_base_reg_, offset - dex_cache_arrays_min_offset_,
-                r_dest, kNotVolatile);
-  } else {
-    OpPcRelDexCacheArrayAddr(dex_file, offset, r_dest);
-    LoadRefDisp(r_dest, 0, r_dest, kNotVolatile);
-  }
-}
-
-LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) {
-  return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count);
-}
-
-LIR* ArmMir2Lir::OpVstm(RegStorage r_base, int count) {
-  return NewLIR3(kThumb2Vstms, r_base.GetReg(), rs_fr0.GetReg(), count);
-}
-
-void ArmMir2Lir::GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                                RegLocation rl_src3, bool is_sub) {
-  rl_src1 = LoadValue(rl_src1, kCoreReg);
-  rl_src2 = LoadValue(rl_src2, kCoreReg);
-  rl_src3 = LoadValue(rl_src3, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  NewLIR4(is_sub ? kThumb2Mls : kThumb2Mla, rl_result.reg.GetReg(), rl_src1.reg.GetReg(),
-          rl_src2.reg.GetReg(), rl_src3.reg.GetReg());
-  StoreValue(rl_dest, rl_result);
-}
-
-void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
-                                               RegLocation rl_result, int lit ATTRIBUTE_UNUSED,
-                                               int first_bit, int second_bit) {
-  OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg,
-                   EncodeShift(kArmLsl, second_bit - first_bit));
-  if (first_bit != 0) {
-    OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
-  }
-}
-
-void ArmMir2Lir::GenDivZeroCheckWide(RegStorage reg) {
-  DCHECK(reg.IsPair());   // TODO: support k64BitSolo.
-  RegStorage t_reg = AllocTemp();
-  NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), reg.GetLowReg(), reg.GetHighReg(), 0);
-  FreeTemp(t_reg);
-  GenDivZeroCheck(kCondEq);
-}
-
-// Test suspend flag, return target of taken suspend branch
-LIR* ArmMir2Lir::OpTestSuspend(LIR* target) {
-#ifdef ARM_R4_SUSPEND_FLAG
-  NewLIR2(kThumbSubRI8, rs_rARM_SUSPEND.GetReg(), 1);
-  return OpCondBranch((target == nullptr) ? kCondEq : kCondNe, target);
-#else
-  RegStorage t_reg = AllocTemp();
-  LoadBaseDisp(rs_rARM_SELF, Thread::ThreadFlagsOffset<4>().Int32Value(),
-    t_reg, kUnsignedHalf, kNotVolatile);
-  LIR* cmp_branch = OpCmpImmBranch((target == nullptr) ? kCondNe : kCondEq, t_reg,
-    0, target);
-  FreeTemp(t_reg);
-  return cmp_branch;
-#endif
-}
-
-// Decrement register and branch on condition
-LIR* ArmMir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
-  // Combine sub & test using sub setflags encoding here
-  OpRegRegImm(kOpSub, reg, reg, 1);  // For value == 1, this should set flags.
-  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
-  return OpCondBranch(c_code, target);
-}
-
-bool ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
-  if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
-    return false;
-  }
-  // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
-  LIR* barrier = last_lir_insn_;
-
-  int dmb_flavor;
-  // TODO: revisit Arm barrier kinds
-  switch (barrier_kind) {
-    case kAnyStore: dmb_flavor = kISH; break;
-    case kLoadAny: dmb_flavor = kISH; break;
-    case kStoreStore: dmb_flavor = kISHST; break;
-    case kAnyAny: dmb_flavor = kISH; break;
-    default:
-      LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
-      dmb_flavor = kSY;  // quiet gcc.
-      break;
-  }
-
-  bool ret = false;
-
-  // If the same barrier already exists, don't generate another.
-  if (barrier == nullptr
-      || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) {
-    barrier = NewLIR1(kThumb2Dmb, dmb_flavor);
-    ret = true;
-  }
-
-  // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
-  DCHECK(!barrier->flags.use_def_invalid);
-  barrier->u.m.def_mask = &kEncodeAll;
-  return ret;
-}
-
-void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage z_reg = AllocTemp();
-  LoadConstantNoClobber(z_reg, 0);
-  // Check for destructive overlap
-  if (rl_result.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
-    RegStorage t_reg = AllocTemp();
-    OpRegCopy(t_reg, rl_result.reg.GetLow());
-    OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
-    OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, t_reg);
-    FreeTemp(t_reg);
-  } else {
-    OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
-    OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, rl_src.reg.GetHigh());
-  }
-  FreeTemp(z_reg);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void ArmMir2Lir::GenMulLong(Instruction::Code opcode ATTRIBUTE_UNUSED, RegLocation rl_dest,
-                            RegLocation rl_src1, RegLocation rl_src2) {
-  /*
-   * tmp1     = src1.hi * src2.lo;  // src1.hi is no longer needed
-   * dest     = src1.lo * src2.lo;
-   * tmp1    += src1.lo * src2.hi;
-   * dest.hi += tmp1;
-   *
-   * To pull off inline multiply, we have a worst-case requirement of 7 temporary
-   * registers.  Normally for Arm, we get 5.  We can get to 6 by including
-   * lr in the temp set.  The only problematic case is all operands and result are
-   * distinct, and none have been promoted.  In that case, we can succeed by aggressively
-   * freeing operand temp registers after they are no longer needed.  All other cases
-   * can proceed normally.  We'll just punt on the case of the result having a misaligned
-   * overlap with either operand and send that case to a runtime handler.
-   */
-  RegLocation rl_result;
-  if (PartiallyIntersects(rl_src1, rl_dest) || (PartiallyIntersects(rl_src2, rl_dest))) {
-    FlushAllRegs();
-    CallRuntimeHelperRegLocationRegLocation(kQuickLmul, rl_src1, rl_src2, false);
-    rl_result = GetReturnWide(kCoreReg);
-    StoreValueWide(rl_dest, rl_result);
-    return;
-  }
-
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-
-  int reg_status = 0;
-  RegStorage res_lo;
-  RegStorage res_hi;
-  bool dest_promoted = rl_dest.location == kLocPhysReg && rl_dest.reg.Valid() &&
-      !IsTemp(rl_dest.reg.GetLow()) && !IsTemp(rl_dest.reg.GetHigh());
-  bool src1_promoted = !IsTemp(rl_src1.reg.GetLow()) && !IsTemp(rl_src1.reg.GetHigh());
-  bool src2_promoted = !IsTemp(rl_src2.reg.GetLow()) && !IsTemp(rl_src2.reg.GetHigh());
-  // Check if rl_dest is *not* either operand and we have enough temp registers.
-  if ((rl_dest.s_reg_low != rl_src1.s_reg_low && rl_dest.s_reg_low != rl_src2.s_reg_low) &&
-      (dest_promoted || src1_promoted || src2_promoted)) {
-    // In this case, we do not need to manually allocate temp registers for result.
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    res_lo = rl_result.reg.GetLow();
-    res_hi = rl_result.reg.GetHigh();
-  } else {
-    res_lo = AllocTemp();
-    if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || src1_promoted || src2_promoted) {
-      // In this case, we have enough temp registers to be allocated for result.
-      res_hi = AllocTemp();
-      reg_status = 1;
-    } else {
-      // In this case, all temps are now allocated.
-      // res_hi will be allocated after we can free src1_hi.
-      reg_status = 2;
-    }
-  }
-
-  // Temporarily add LR to the temp pool, and assign it to tmp1
-  MarkTemp(rs_rARM_LR);
-  FreeTemp(rs_rARM_LR);
-  RegStorage tmp1 = rs_rARM_LR;
-  LockTemp(rs_rARM_LR);
-
-  if (rl_src1.reg == rl_src2.reg) {
-    DCHECK(res_hi.Valid());
-    DCHECK(res_lo.Valid());
-    NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg());
-    NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src1.reg.GetLowReg(),
-            rl_src1.reg.GetLowReg());
-    OpRegRegRegShift(kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1));
-  } else {
-    NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src2.reg.GetLowReg(), rl_src1.reg.GetHighReg());
-    if (reg_status == 2) {
-      DCHECK(!res_hi.Valid());
-      DCHECK_NE(rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg());
-      DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
-      // Will force free src1_hi, so must clobber.
-      Clobber(rl_src1.reg);
-      FreeTemp(rl_src1.reg.GetHigh());
-      res_hi = AllocTemp();
-    }
-    DCHECK(res_hi.Valid());
-    DCHECK(res_lo.Valid());
-    NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src2.reg.GetLowReg(),
-            rl_src1.reg.GetLowReg());
-    NewLIR4(kThumb2Mla, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src2.reg.GetHighReg(),
-            tmp1.GetReg());
-    NewLIR4(kThumb2AddRRR, res_hi.GetReg(), tmp1.GetReg(), res_hi.GetReg(), 0);
-    if (reg_status == 2) {
-      FreeTemp(rl_src1.reg.GetLow());
-    }
-  }
-
-  if (reg_status != 0) {
-    // We had manually allocated registers for rl_result.
-    // Now construct a RegLocation.
-    rl_result = GetReturnWide(kCoreReg);  // Just using as a template.
-    rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi);
-  }
-
-  // Free tmp1 but keep LR as temp for StoreValueWide() if needed.
-  FreeTemp(tmp1);
-
-  StoreValueWide(rl_dest, rl_result);
-
-  // Now, restore lr to its non-temp status.
-  Clobber(rs_rARM_LR);
-  UnmarkTemp(rs_rARM_LR);
-}
-
-void ArmMir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                                RegLocation rl_src2, int flags) {
-  switch (opcode) {
-    case Instruction::MUL_LONG:
-    case Instruction::MUL_LONG_2ADDR:
-      GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
-      return;
-    case Instruction::NEG_LONG:
-      GenNegLong(rl_dest, rl_src2);
-      return;
-
-    default:
-      break;
-  }
-
-  // Fallback for all other ops.
-  Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
-}
-
-/*
- * Generate array load
- */
-void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_dest, int scale) {
-  RegisterClass reg_class = RegClassBySize(size);
-  int len_offset = mirror::Array::LengthOffset().Int32Value();
-  int data_offset;
-  RegLocation rl_result;
-  bool constant_index = rl_index.is_const;
-  rl_array = LoadValue(rl_array, kRefReg);
-  if (!constant_index) {
-    rl_index = LoadValue(rl_index, kCoreReg);
-  }
-
-  if (rl_dest.wide) {
-    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
-  } else {
-    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
-  }
-
-  // If index is constant, just fold it into the data offset
-  if (constant_index) {
-    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
-  }
-
-  /* null object? */
-  GenNullCheck(rl_array.reg, opt_flags);
-
-  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
-  RegStorage reg_len;
-  if (needs_range_check) {
-    reg_len = AllocTemp();
-    /* Get len */
-    Load32Disp(rl_array.reg, len_offset, reg_len);
-    MarkPossibleNullPointerException(opt_flags);
-  } else {
-    ForceImplicitNullCheck(rl_array.reg, opt_flags);
-  }
-  if (rl_dest.wide || rl_dest.fp || constant_index) {
-    RegStorage reg_ptr;
-    if (constant_index) {
-      reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
-    } else {
-      // No special indexed operation, lea + load w/ displacement
-      reg_ptr = AllocTempRef();
-      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
-      FreeTemp(rl_index.reg);
-    }
-    rl_result = EvalLoc(rl_dest, reg_class, true);
-
-    if (needs_range_check) {
-      if (constant_index) {
-        GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
-      } else {
-        GenArrayBoundsCheck(rl_index.reg, reg_len);
-      }
-      FreeTemp(reg_len);
-    }
-    LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, kNotVolatile);
-    if (!constant_index) {
-      FreeTemp(reg_ptr);
-    }
-    if (rl_dest.wide) {
-      StoreValueWide(rl_dest, rl_result);
-    } else {
-      StoreValue(rl_dest, rl_result);
-    }
-  } else {
-    // Offset base, then use indexed load
-    RegStorage reg_ptr = AllocTempRef();
-    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
-    FreeTemp(rl_array.reg);
-    rl_result = EvalLoc(rl_dest, reg_class, true);
-
-    if (needs_range_check) {
-      GenArrayBoundsCheck(rl_index.reg, reg_len);
-      FreeTemp(reg_len);
-    }
-    LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size);
-    FreeTemp(reg_ptr);
-    StoreValue(rl_dest, rl_result);
-  }
-}
-
-/*
- * Generate array store
- *
- */
-void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
-  RegisterClass reg_class = RegClassBySize(size);
-  int len_offset = mirror::Array::LengthOffset().Int32Value();
-  bool constant_index = rl_index.is_const;
-
-  int data_offset;
-  if (size == k64 || size == kDouble) {
-    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
-  } else {
-    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
-  }
-
-  // If index is constant, just fold it into the data offset.
-  if (constant_index) {
-    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
-  }
-
-  rl_array = LoadValue(rl_array, kRefReg);
-  if (!constant_index) {
-    rl_index = LoadValue(rl_index, kCoreReg);
-  }
-
-  RegStorage reg_ptr;
-  bool allocated_reg_ptr_temp = false;
-  if (constant_index) {
-    reg_ptr = rl_array.reg;
-  } else if (IsTemp(rl_array.reg) && !card_mark) {
-    Clobber(rl_array.reg);
-    reg_ptr = rl_array.reg;
-  } else {
-    allocated_reg_ptr_temp = true;
-    reg_ptr = AllocTempRef();
-  }
-
-  /* null object? */
-  GenNullCheck(rl_array.reg, opt_flags);
-
-  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
-  RegStorage reg_len;
-  if (needs_range_check) {
-    reg_len = AllocTemp();
-    // NOTE: max live temps(4) here.
-    /* Get len */
-    Load32Disp(rl_array.reg, len_offset, reg_len);
-    MarkPossibleNullPointerException(opt_flags);
-  } else {
-    ForceImplicitNullCheck(rl_array.reg, opt_flags);
-  }
-  /* at this point, reg_ptr points to array, 2 live temps */
-  if (rl_src.wide || rl_src.fp || constant_index) {
-    if (rl_src.wide) {
-      rl_src = LoadValueWide(rl_src, reg_class);
-    } else {
-      rl_src = LoadValue(rl_src, reg_class);
-    }
-    if (!constant_index) {
-      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
-    }
-    if (needs_range_check) {
-      if (constant_index) {
-        GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
-      } else {
-        GenArrayBoundsCheck(rl_index.reg, reg_len);
-      }
-      FreeTemp(reg_len);
-    }
-
-    StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size, kNotVolatile);
-  } else {
-    /* reg_ptr -> array data */
-    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
-    rl_src = LoadValue(rl_src, reg_class);
-    if (needs_range_check) {
-      GenArrayBoundsCheck(rl_index.reg, reg_len);
-      FreeTemp(reg_len);
-    }
-    StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
-  }
-  if (allocated_reg_ptr_temp) {
-    FreeTemp(reg_ptr);
-  }
-  if (card_mark) {
-    MarkGCCard(opt_flags, rl_src.reg, rl_array.reg);
-  }
-}
-
-
-void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
-                                   RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift,
-                                   int flags ATTRIBUTE_UNUSED) {
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  // Per spec, we only care about low 6 bits of shift amount.
-  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
-  if (shift_amount == 0) {
-    StoreValueWide(rl_dest, rl_src);
-    return;
-  }
-  if (PartiallyIntersects(rl_src, rl_dest)) {
-    GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
-    return;
-  }
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  switch (opcode) {
-    case Instruction::SHL_LONG:
-    case Instruction::SHL_LONG_2ADDR:
-      if (shift_amount == 1) {
-        OpRegRegReg(kOpAdd, rl_result.reg.GetLow(), rl_src.reg.GetLow(), rl_src.reg.GetLow());
-        OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), rl_src.reg.GetHigh());
-      } else if (shift_amount == 32) {
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg);
-        LoadConstant(rl_result.reg.GetLow(), 0);
-      } else if (shift_amount > 31) {
-        OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetLow(), shift_amount - 32);
-        LoadConstant(rl_result.reg.GetLow(), 0);
-      } else {
-        OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
-        OpRegRegRegShift(kOpOr, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), rl_src.reg.GetLow(),
-                         EncodeShift(kArmLsr, 32 - shift_amount));
-        OpRegRegImm(kOpLsl, rl_result.reg.GetLow(), rl_src.reg.GetLow(), shift_amount);
-      }
-      break;
-    case Instruction::SHR_LONG:
-    case Instruction::SHR_LONG_2ADDR:
-      if (shift_amount == 32) {
-        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-        OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31);
-      } else if (shift_amount > 31) {
-        OpRegRegImm(kOpAsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32);
-        OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31);
-      } else {
-        RegStorage t_reg = AllocTemp();
-        OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
-        OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(),
-                         EncodeShift(kArmLsl, 32 - shift_amount));
-        FreeTemp(t_reg);
-        OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
-      }
-      break;
-    case Instruction::USHR_LONG:
-    case Instruction::USHR_LONG_2ADDR:
-      if (shift_amount == 32) {
-        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-        LoadConstant(rl_result.reg.GetHigh(), 0);
-      } else if (shift_amount > 31) {
-        OpRegRegImm(kOpLsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32);
-        LoadConstant(rl_result.reg.GetHigh(), 0);
-      } else {
-        RegStorage t_reg = AllocTemp();
-        OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
-        OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(),
-                         EncodeShift(kArmLsl, 32 - shift_amount));
-        FreeTemp(t_reg);
-        OpRegRegImm(kOpLsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
-      }
-      break;
-    default:
-      LOG(FATAL) << "Unexpected case";
-  }
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void ArmMir2Lir::GenArithImmOpLong(Instruction::Code opcode,
-                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                                   int flags) {
-  if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) {
-    if (!rl_src2.is_const) {
-      // Don't bother with special handling for subtract from immediate.
-      GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
-      return;
-    }
-  } else {
-    // Normalize
-    if (!rl_src2.is_const) {
-      DCHECK(rl_src1.is_const);
-      std::swap(rl_src1, rl_src2);
-    }
-  }
-  if (PartiallyIntersects(rl_src1, rl_dest)) {
-    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
-    return;
-  }
-  DCHECK(rl_src2.is_const);
-  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
-  uint32_t val_lo = Low32Bits(val);
-  uint32_t val_hi = High32Bits(val);
-  int32_t mod_imm_lo = ModifiedImmediate(val_lo);
-  int32_t mod_imm_hi = ModifiedImmediate(val_hi);
-
-  // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit
-  switch (opcode) {
-    case Instruction::ADD_LONG:
-    case Instruction::ADD_LONG_2ADDR:
-    case Instruction::SUB_LONG:
-    case Instruction::SUB_LONG_2ADDR:
-      if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) {
-        GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
-        return;
-      }
-      break;
-    default:
-      break;
-  }
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  // NOTE: once we've done the EvalLoc on dest, we can no longer bail.
-  switch (opcode) {
-    case Instruction::ADD_LONG:
-    case Instruction::ADD_LONG_2ADDR:
-      NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
-      NewLIR3(kThumb2AdcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
-      break;
-    case Instruction::OR_LONG:
-    case Instruction::OR_LONG_2ADDR:
-      if ((val_lo != 0) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) {
-        OpRegRegImm(kOpOr, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
-      }
-      if ((val_hi != 0) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
-        OpRegRegImm(kOpOr, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
-      }
-      break;
-    case Instruction::XOR_LONG:
-    case Instruction::XOR_LONG_2ADDR:
-      OpRegRegImm(kOpXor, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
-      OpRegRegImm(kOpXor, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
-      break;
-    case Instruction::AND_LONG:
-    case Instruction::AND_LONG_2ADDR:
-      if ((val_lo != 0xffffffff) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) {
-        OpRegRegImm(kOpAnd, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
-      }
-      if ((val_hi != 0xffffffff) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
-        OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
-      }
-      break;
-    case Instruction::SUB_LONG_2ADDR:
-    case Instruction::SUB_LONG:
-      NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
-      NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
-      break;
-    default:
-      LOG(FATAL) << "Unexpected opcode " << opcode;
-  }
-  StoreValueWide(rl_dest, rl_result);
-}
-
-bool ArmMir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
-                                  RegLocation rl_src, RegLocation rl_dest, int lit) {
-  if (lit < 2) {
-    return false;
-  }
-
-  // ARM does either not support a division instruction, or it is potentially expensive. Look for
-  // more special cases.
-  if (!IsPowerOfTwo(lit)) {
-    return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, lit);
-  }
-
-  return Mir2Lir::HandleEasyDivRem(dalvik_opcode, is_div, rl_src, rl_dest, lit);
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
deleted file mode 100644
index 355485e..0000000
--- a/compiler/dex/quick/arm/target_arm.cc
+++ /dev/null
@@ -1,1015 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "codegen_arm.h"
-
-#include <inttypes.h>
-
-#include <string>
-#include <sstream>
-
-#include "backend_arm.h"
-#include "base/logging.h"
-#include "dex/mir_graph.h"
-#include "dex/quick/mir_to_lir-inl.h"
-
-namespace art {
-
-#ifdef ARM_R4_SUSPEND_FLAG
-static constexpr RegStorage core_regs_arr[] =
-    {rs_r0, rs_r1, rs_r2, rs_r3, rs_rARM_SUSPEND, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF,
-     rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
-#else
-static constexpr RegStorage core_regs_arr[] =
-    {rs_r0, rs_r1, rs_r2, rs_r3, rs_r4, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF,
-     rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
-#endif
-static constexpr RegStorage sp_regs_arr[] =
-    {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
-     rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15, rs_fr16, rs_fr17, rs_fr18, rs_fr19, rs_fr20,
-     rs_fr21, rs_fr22, rs_fr23, rs_fr24, rs_fr25, rs_fr26, rs_fr27, rs_fr28, rs_fr29, rs_fr30,
-     rs_fr31};
-static constexpr RegStorage dp_regs_arr[] =
-    {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, rs_dr8, rs_dr9, rs_dr10,
-     rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15};
-#ifdef ARM_R4_SUSPEND_FLAG
-static constexpr RegStorage reserved_regs_arr[] =
-    {rs_rARM_SUSPEND, rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
-static constexpr RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r12};
-#else
-static constexpr RegStorage reserved_regs_arr[] =
-    {rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
-static constexpr RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r4, rs_r12};
-#endif
-static constexpr RegStorage sp_temps_arr[] =
-    {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
-     rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15};
-static constexpr RegStorage dp_temps_arr[] =
-    {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7};
-
-static constexpr ArrayRef<const RegStorage> empty_pool;
-static constexpr ArrayRef<const RegStorage> core_regs(core_regs_arr);
-static constexpr ArrayRef<const RegStorage> sp_regs(sp_regs_arr);
-static constexpr ArrayRef<const RegStorage> dp_regs(dp_regs_arr);
-static constexpr ArrayRef<const RegStorage> reserved_regs(reserved_regs_arr);
-static constexpr ArrayRef<const RegStorage> core_temps(core_temps_arr);
-static constexpr ArrayRef<const RegStorage> sp_temps(sp_temps_arr);
-static constexpr ArrayRef<const RegStorage> dp_temps(dp_temps_arr);
-
-RegLocation ArmMir2Lir::LocCReturn() {
-  return arm_loc_c_return;
-}
-
-RegLocation ArmMir2Lir::LocCReturnRef() {
-  return arm_loc_c_return;
-}
-
-RegLocation ArmMir2Lir::LocCReturnWide() {
-  return arm_loc_c_return_wide;
-}
-
-RegLocation ArmMir2Lir::LocCReturnFloat() {
-  return arm_loc_c_return_float;
-}
-
-RegLocation ArmMir2Lir::LocCReturnDouble() {
-  return arm_loc_c_return_double;
-}
-
-// Return a target-dependent special register.
-RegStorage ArmMir2Lir::TargetReg(SpecialTargetRegister reg) {
-  RegStorage res_reg;
-  switch (reg) {
-    case kSelf: res_reg = rs_rARM_SELF; break;
-#ifdef ARM_R4_SUSPEND_FLAG
-    case kSuspend: res_reg =  rs_rARM_SUSPEND; break;
-#else
-    case kSuspend: res_reg = RegStorage::InvalidReg(); break;
-#endif
-    case kLr: res_reg =  rs_rARM_LR; break;
-    case kPc: res_reg =  rs_rARM_PC; break;
-    case kSp: res_reg =  rs_rARM_SP; break;
-    case kArg0: res_reg = rs_r0; break;
-    case kArg1: res_reg = rs_r1; break;
-    case kArg2: res_reg = rs_r2; break;
-    case kArg3: res_reg = rs_r3; break;
-    case kFArg0: res_reg = kArm32QuickCodeUseSoftFloat ? rs_r0 : rs_fr0; break;
-    case kFArg1: res_reg = kArm32QuickCodeUseSoftFloat ? rs_r1 : rs_fr1; break;
-    case kFArg2: res_reg = kArm32QuickCodeUseSoftFloat ? rs_r2 : rs_fr2; break;
-    case kFArg3: res_reg = kArm32QuickCodeUseSoftFloat ? rs_r3 : rs_fr3; break;
-    case kFArg4: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr4; break;
-    case kFArg5: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr5; break;
-    case kFArg6: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr6; break;
-    case kFArg7: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr7; break;
-    case kFArg8: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr8; break;
-    case kFArg9: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr9; break;
-    case kFArg10: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr10; break;
-    case kFArg11: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr11; break;
-    case kFArg12: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr12; break;
-    case kFArg13: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr13; break;
-    case kFArg14: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr14; break;
-    case kFArg15: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr15; break;
-    case kRet0: res_reg = rs_r0; break;
-    case kRet1: res_reg = rs_r1; break;
-    case kInvokeTgt: res_reg = rs_rARM_LR; break;
-    case kHiddenArg: res_reg = rs_r12; break;
-    case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
-    case kCount: res_reg = RegStorage::InvalidReg(); break;
-    default: res_reg = RegStorage::InvalidReg();
-  }
-  return res_reg;
-}
-
-/*
- * Decode the register id.
- */
-ResourceMask ArmMir2Lir::GetRegMaskCommon(const RegStorage& reg) const {
-  return GetRegMaskArm(reg);
-}
-
-constexpr ResourceMask ArmMir2Lir::GetRegMaskArm(RegStorage reg) {
-  return reg.IsDouble()
-      /* Each double register is equal to a pair of single-precision FP registers */
-      ? ResourceMask::TwoBits(reg.GetRegNum() * 2 + kArmFPReg0)
-      : ResourceMask::Bit(reg.IsSingle() ? reg.GetRegNum() + kArmFPReg0 : reg.GetRegNum());
-}
-
-constexpr ResourceMask ArmMir2Lir::EncodeArmRegList(int reg_list) {
-  return ResourceMask::RawMask(static_cast<uint64_t >(reg_list), 0u);
-}
-
-constexpr ResourceMask ArmMir2Lir::EncodeArmRegFpcsList(int reg_list) {
-  return ResourceMask::RawMask(static_cast<uint64_t >(reg_list) << kArmFPReg16, 0u);
-}
-
-ResourceMask ArmMir2Lir::GetPCUseDefEncoding() const {
-  return ResourceMask::Bit(kArmRegPC);
-}
-
-// Thumb2 specific setup.  TODO: inline?:
-void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags,
-                                          ResourceMask* use_mask, ResourceMask* def_mask) {
-  DCHECK_EQ(cu_->instruction_set, kThumb2);
-  DCHECK(!lir->flags.use_def_invalid);
-
-  int opcode = lir->opcode;
-
-  // These flags are somewhat uncommon - bypass if we can.
-  if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 | REG_DEF_LIST1 |
-                REG_DEF_FPCS_LIST0 | REG_DEF_FPCS_LIST2 | REG_USE_PC | IS_IT | REG_USE_LIST0 |
-                REG_USE_LIST1 | REG_USE_FPCS_LIST0 | REG_USE_FPCS_LIST2 | REG_DEF_LR)) != 0) {
-    if (flags & REG_DEF_SP) {
-      def_mask->SetBit(kArmRegSP);
-    }
-
-    if (flags & REG_USE_SP) {
-      use_mask->SetBit(kArmRegSP);
-    }
-
-    if (flags & REG_DEF_LIST0) {
-      def_mask->SetBits(EncodeArmRegList(lir->operands[0]));
-    }
-
-    if (flags & REG_DEF_LIST1) {
-      def_mask->SetBits(EncodeArmRegList(lir->operands[1]));
-    }
-
-    if (flags & REG_DEF_FPCS_LIST0) {
-      def_mask->SetBits(EncodeArmRegList(lir->operands[0]));
-    }
-
-    if (flags & REG_DEF_FPCS_LIST2) {
-      for (int i = 0; i < lir->operands[2]; i++) {
-        SetupRegMask(def_mask, lir->operands[1] + i);
-      }
-    }
-
-    if (flags & REG_USE_PC) {
-      use_mask->SetBit(kArmRegPC);
-    }
-
-    /* Conservatively treat the IT block */
-    if (flags & IS_IT) {
-      *def_mask = kEncodeAll;
-    }
-
-    if (flags & REG_USE_LIST0) {
-      use_mask->SetBits(EncodeArmRegList(lir->operands[0]));
-    }
-
-    if (flags & REG_USE_LIST1) {
-      use_mask->SetBits(EncodeArmRegList(lir->operands[1]));
-    }
-
-    if (flags & REG_USE_FPCS_LIST0) {
-      use_mask->SetBits(EncodeArmRegList(lir->operands[0]));
-    }
-
-    if (flags & REG_USE_FPCS_LIST2) {
-      for (int i = 0; i < lir->operands[2]; i++) {
-        SetupRegMask(use_mask, lir->operands[1] + i);
-      }
-    }
-    /* Fixup for kThumbPush/lr and kThumbPop/pc */
-    if (opcode == kThumbPush || opcode == kThumbPop) {
-      constexpr ResourceMask r8Mask = GetRegMaskArm(rs_r8);
-      if ((opcode == kThumbPush) && (use_mask->Intersects(r8Mask))) {
-        use_mask->ClearBits(r8Mask);
-        use_mask->SetBit(kArmRegLR);
-      } else if ((opcode == kThumbPop) && (def_mask->Intersects(r8Mask))) {
-        def_mask->ClearBits(r8Mask);
-        def_mask->SetBit(kArmRegPC);;
-      }
-    }
-    if (flags & REG_DEF_LR) {
-      def_mask->SetBit(kArmRegLR);
-    }
-  }
-}
-
-ArmConditionCode ArmMir2Lir::ArmConditionEncoding(ConditionCode ccode) {
-  ArmConditionCode res;
-  switch (ccode) {
-    case kCondEq: res = kArmCondEq; break;
-    case kCondNe: res = kArmCondNe; break;
-    case kCondCs: res = kArmCondCs; break;
-    case kCondCc: res = kArmCondCc; break;
-    case kCondUlt: res = kArmCondCc; break;
-    case kCondUge: res = kArmCondCs; break;
-    case kCondMi: res = kArmCondMi; break;
-    case kCondPl: res = kArmCondPl; break;
-    case kCondVs: res = kArmCondVs; break;
-    case kCondVc: res = kArmCondVc; break;
-    case kCondHi: res = kArmCondHi; break;
-    case kCondLs: res = kArmCondLs; break;
-    case kCondGe: res = kArmCondGe; break;
-    case kCondLt: res = kArmCondLt; break;
-    case kCondGt: res = kArmCondGt; break;
-    case kCondLe: res = kArmCondLe; break;
-    case kCondAl: res = kArmCondAl; break;
-    case kCondNv: res = kArmCondNv; break;
-    default:
-      LOG(FATAL) << "Bad condition code " << ccode;
-      res = static_cast<ArmConditionCode>(0);  // Quiet gcc
-  }
-  return res;
-}
-
-static const char* core_reg_names[16] = {
-  "r0",
-  "r1",
-  "r2",
-  "r3",
-  "r4",
-  "r5",
-  "r6",
-  "r7",
-  "r8",
-  "rSELF",
-  "r10",
-  "r11",
-  "r12",
-  "sp",
-  "lr",
-  "pc",
-};
-
-
-static const char* shift_names[4] = {
-  "lsl",
-  "lsr",
-  "asr",
-  "ror"};
-
-/* Decode and print a ARM register name */
-static char* DecodeRegList(int opcode, int vector, char* buf, size_t buf_size) {
-  int i;
-  bool printed = false;
-  buf[0] = 0;
-  for (i = 0; i < 16; i++, vector >>= 1) {
-    if (vector & 0x1) {
-      int reg_id = i;
-      if (opcode == kThumbPush && i == 8) {
-        reg_id = rs_rARM_LR.GetRegNum();
-      } else if (opcode == kThumbPop && i == 8) {
-        reg_id = rs_rARM_PC.GetRegNum();
-      }
-      if (printed) {
-        snprintf(buf + strlen(buf), buf_size - strlen(buf), ", r%d", reg_id);
-      } else {
-        printed = true;
-        snprintf(buf, buf_size, "r%d", reg_id);
-      }
-    }
-  }
-  return buf;
-}
-
-static char*  DecodeFPCSRegList(int count, int base, char* buf, size_t buf_size) {
-  snprintf(buf, buf_size, "s%d", base);
-  for (int i = 1; i < count; i++) {
-    snprintf(buf + strlen(buf), buf_size - strlen(buf), ", s%d", base + i);
-  }
-  return buf;
-}
-
-static int32_t ExpandImmediate(int value) {
-  int32_t mode = (value & 0xf00) >> 8;
-  uint32_t bits = value & 0xff;
-  switch (mode) {
-    case 0:
-      return bits;
-     case 1:
-      return (bits << 16) | bits;
-     case 2:
-      return (bits << 24) | (bits << 8);
-     case 3:
-      return (bits << 24) | (bits << 16) | (bits << 8) | bits;
-    default:
-      break;
-  }
-  bits = (bits | 0x80) << 24;
-  return bits >> (((value & 0xf80) >> 7) - 8);
-}
-
-const char* cc_names[] = {"eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
-                         "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"};
-/*
- * Interpret a format string and build a string no longer than size
- * See format key in Assemble.c.
- */
-std::string ArmMir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) {
-  std::string buf;
-  int i;
-  const char* fmt_end = &fmt[strlen(fmt)];
-  char tbuf[256];
-  const char* name;
-  char nc;
-  while (fmt < fmt_end) {
-    int operand;
-    if (*fmt == '!') {
-      fmt++;
-      DCHECK_LT(fmt, fmt_end);
-      nc = *fmt++;
-      if (nc == '!') {
-        strcpy(tbuf, "!");
-      } else {
-         DCHECK_LT(fmt, fmt_end);
-         DCHECK_LT(static_cast<unsigned>(nc-'0'), 4U);
-         operand = lir->operands[nc-'0'];
-         switch (*fmt++) {
-           case 'H':
-             if (operand != 0) {
-               snprintf(tbuf, arraysize(tbuf), ", %s %d", shift_names[operand & 0x3], operand >> 2);
-             } else {
-               strcpy(tbuf, "");
-             }
-             break;
-           case 'B':
-             switch (operand) {
-               case kSY:
-                 name = "sy";
-                 break;
-               case kST:
-                 name = "st";
-                 break;
-               case kISH:
-                 name = "ish";
-                 break;
-               case kISHST:
-                 name = "ishst";
-                 break;
-               case kNSH:
-                 name = "nsh";
-                 break;
-               case kNSHST:
-                 name = "shst";
-                 break;
-               default:
-                 name = "DecodeError2";
-                 break;
-             }
-             strcpy(tbuf, name);
-             break;
-           case 'b':
-             strcpy(tbuf, "0000");
-             for (i = 3; i >= 0; i--) {
-               tbuf[i] += operand & 1;
-               operand >>= 1;
-             }
-             break;
-           case 'n':
-             operand = ~ExpandImmediate(operand);
-             snprintf(tbuf, arraysize(tbuf), "%d [%#x]", operand, operand);
-             break;
-           case 'm':
-             operand = ExpandImmediate(operand);
-             snprintf(tbuf, arraysize(tbuf), "%d [%#x]", operand, operand);
-             break;
-           case 's':
-             snprintf(tbuf, arraysize(tbuf), "s%d", RegStorage::RegNum(operand));
-             break;
-           case 'S':
-             snprintf(tbuf, arraysize(tbuf), "d%d", RegStorage::RegNum(operand));
-             break;
-           case 'h':
-             snprintf(tbuf, arraysize(tbuf), "%04x", operand);
-             break;
-           case 'M':
-           case 'd':
-             snprintf(tbuf, arraysize(tbuf), "%d", operand);
-             break;
-           case 'C':
-             operand = RegStorage::RegNum(operand);
-             DCHECK_LT(operand, static_cast<int>(
-                 sizeof(core_reg_names)/sizeof(core_reg_names[0])));
-             snprintf(tbuf, arraysize(tbuf), "%s", core_reg_names[operand]);
-             break;
-           case 'E':
-             snprintf(tbuf, arraysize(tbuf), "%d", operand*4);
-             break;
-           case 'F':
-             snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
-             break;
-           case 'c':
-             strcpy(tbuf, cc_names[operand]);
-             break;
-           case 't':
-             snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
-                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1),
-                 lir->target);
-             break;
-           case 'T':
-             snprintf(tbuf, arraysize(tbuf), "%s", PrettyMethod(
-                 static_cast<uint32_t>(lir->operands[1]),
-                 *UnwrapPointer<DexFile>(lir->operands[2])).c_str());
-             break;
-           case 'u': {
-             int offset_1 = lir->operands[0];
-             int offset_2 = NEXT_LIR(lir)->operands[0];
-             uintptr_t target =
-                 (((reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4) &
-                 ~3) + (offset_1 << 21 >> 9) + (offset_2 << 1)) &
-                 0xfffffffc;
-             snprintf(tbuf, arraysize(tbuf), "%p", reinterpret_cast<void *>(target));
-             break;
-          }
-
-           /* Nothing to print for BLX_2 */
-           case 'v':
-             strcpy(tbuf, "see above");
-             break;
-           case 'R':
-             DecodeRegList(lir->opcode, operand, tbuf, arraysize(tbuf));
-             break;
-           case 'P':
-             DecodeFPCSRegList(operand, 16, tbuf, arraysize(tbuf));
-             break;
-           case 'Q':
-             DecodeFPCSRegList(operand, 0, tbuf, arraysize(tbuf));
-             break;
-           default:
-             strcpy(tbuf, "DecodeError1");
-             break;
-        }
-        buf += tbuf;
-      }
-    } else {
-       buf += *fmt++;
-    }
-  }
-  // Dump thread offset.
-  std::string fmt_str = GetTargetInstFmt(lir->opcode);
-  if (std::string::npos != fmt_str.find(", [!1C, #!2") && rARM_SELF == lir->operands[1] &&
-      std::string::npos != buf.find(", [")) {
-    int offset = lir->operands[2];
-    if (std::string::npos != fmt_str.find("#!2d")) {
-    } else if (std::string::npos != fmt_str.find("#!2E")) {
-      offset *= 4;
-    } else if (std::string::npos != fmt_str.find("#!2F")) {
-      offset *= 2;
-    } else {
-      LOG(FATAL) << "Should not reach here";
-    }
-    std::ostringstream tmp_stream;
-    Thread::DumpThreadOffset<4>(tmp_stream, offset);
-    buf += "  ; ";
-    buf += tmp_stream.str();
-  }
-  return buf;
-}
-
-void ArmMir2Lir::DumpResourceMask(LIR* arm_lir, const ResourceMask& mask, const char* prefix) {
-  char buf[256];
-  buf[0] = 0;
-
-  if (mask.Equals(kEncodeAll)) {
-    strcpy(buf, "all");
-  } else {
-    char num[8];
-    int i;
-
-    for (i = 0; i < kArmRegEnd; i++) {
-      if (mask.HasBit(i)) {
-        snprintf(num, arraysize(num), "%d ", i);
-        strcat(buf, num);
-      }
-    }
-
-    if (mask.HasBit(ResourceMask::kCCode)) {
-      strcat(buf, "cc ");
-    }
-    if (mask.HasBit(ResourceMask::kFPStatus)) {
-      strcat(buf, "fpcc ");
-    }
-
-    /* Memory bits */
-    if (arm_lir && (mask.HasBit(ResourceMask::kDalvikReg))) {
-      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
-               DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info),
-               DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : "");
-    }
-    if (mask.HasBit(ResourceMask::kLiteral)) {
-      strcat(buf, "lit ");
-    }
-
-    if (mask.HasBit(ResourceMask::kHeapRef)) {
-      strcat(buf, "heap ");
-    }
-    if (mask.HasBit(ResourceMask::kMustNotAlias)) {
-      strcat(buf, "noalias ");
-    }
-  }
-  if (buf[0]) {
-    LOG(INFO) << prefix << ": " << buf;
-  }
-}
-
-bool ArmMir2Lir::IsUnconditionalBranch(LIR* lir) {
-  return ((lir->opcode == kThumbBUncond) || (lir->opcode == kThumb2BUncond));
-}
-
-RegisterClass ArmMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
-  if (UNLIKELY(is_volatile)) {
-    // On arm, atomic 64-bit load/store requires a core register pair.
-    // Smaller aligned load/store is atomic for both core and fp registers.
-    if (size == k64 || size == kDouble) {
-      return kCoreReg;
-    }
-  }
-  return RegClassBySize(size);
-}
-
-ArmMir2Lir::ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
-    : Mir2Lir(cu, mir_graph, arena),
-      call_method_insns_(arena->Adapter()),
-      dex_cache_access_insns_(arena->Adapter()),
-      dex_cache_arrays_base_reg_(RegStorage::InvalidReg()) {
-  call_method_insns_.reserve(100);
-  // Sanity check - make sure encoding map lines up.
-  for (int i = 0; i < kArmLast; i++) {
-    DCHECK_EQ(ArmMir2Lir::EncodingMap[i].opcode, i)
-        << "Encoding order for " << ArmMir2Lir::EncodingMap[i].name
-        << " is wrong: expecting " << i << ", seeing "
-        << static_cast<int>(ArmMir2Lir::EncodingMap[i].opcode);
-  }
-}
-
-Mir2Lir* ArmCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
-                          ArenaAllocator* const arena) {
-  return new ArmMir2Lir(cu, mir_graph, arena);
-}
-
-void ArmMir2Lir::CompilerInitializeRegAlloc() {
-  reg_pool_.reset(new (arena_) RegisterPool(this, arena_, core_regs, empty_pool /* core64 */,
-                                            sp_regs, dp_regs,
-                                            reserved_regs, empty_pool /* reserved64 */,
-                                            core_temps, empty_pool /* core64_temps */,
-                                            sp_temps, dp_temps));
-
-  // Target-specific adjustments.
-
-  // Alias single precision floats to appropriate half of overlapping double.
-  for (RegisterInfo* info : reg_pool_->sp_regs_) {
-    int sp_reg_num = info->GetReg().GetRegNum();
-    int dp_reg_num = sp_reg_num >> 1;
-    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num);
-    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
-    // Double precision register's master storage should refer to itself.
-    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
-    // Redirect single precision's master storage to master.
-    info->SetMaster(dp_reg_info);
-    // Singles should show a single 32-bit mask bit, at first referring to the low half.
-    DCHECK_EQ(info->StorageMask(), RegisterInfo::kLowSingleStorageMask);
-    if (sp_reg_num & 1) {
-      // For odd singles, change to use the high word of the backing double.
-      info->SetStorageMask(RegisterInfo::kHighSingleStorageMask);
-    }
-  }
-
-#ifdef ARM_R4_SUSPEND_FLAG
-  // TODO: re-enable this when we can safely save r4 over the suspension code path.
-  bool no_suspend = NO_SUSPEND;  // || !Runtime::Current()->ExplicitSuspendChecks();
-  if (no_suspend) {
-    GetRegInfo(rs_rARM_SUSPEND)->MarkFree();
-  }
-#endif
-
-  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
-  // TODO: adjust when we roll to hard float calling convention.
-  reg_pool_->next_core_reg_ = 2;
-  reg_pool_->next_sp_reg_ = 0;
-  reg_pool_->next_dp_reg_ = 0;
-}
-
-/*
- * TUNING: is true leaf?  Can't just use METHOD_IS_LEAF to determine as some
- * instructions might call out to C/assembly helper functions.  Until
- * machinery is in place, always spill lr.
- */
-
-void ArmMir2Lir::AdjustSpillMask() {
-  core_spill_mask_ |= (1 << rs_rARM_LR.GetRegNum());
-  num_core_spills_++;
-}
-
-/*
- * Mark a callee-save fp register as promoted.  Note that
- * vpush/vpop uses contiguous register lists so we must
- * include any holes in the mask.  Associate holes with
- * Dalvik register INVALID_VREG (0xFFFFU).
- */
-void ArmMir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
-  DCHECK_GE(reg.GetRegNum(), ARM_FP_CALLEE_SAVE_BASE);
-  int adjusted_reg_num = reg.GetRegNum() - ARM_FP_CALLEE_SAVE_BASE;
-  // Ensure fp_vmap_table is large enough
-  int table_size = fp_vmap_table_.size();
-  for (int i = table_size; i < (adjusted_reg_num + 1); i++) {
-    fp_vmap_table_.push_back(INVALID_VREG);
-  }
-  // Add the current mapping
-  fp_vmap_table_[adjusted_reg_num] = v_reg;
-  // Size of fp_vmap_table is high-water mark, use to set mask
-  num_fp_spills_ = fp_vmap_table_.size();
-  fp_spill_mask_ = ((1 << num_fp_spills_) - 1) << ARM_FP_CALLEE_SAVE_BASE;
-}
-
-void ArmMir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
-  // TEMP: perform as 2 singles.
-  int reg_num = reg.GetRegNum() << 1;
-  RegStorage lo = RegStorage::Solo32(RegStorage::kFloatingPoint | reg_num);
-  RegStorage hi = RegStorage::Solo32(RegStorage::kFloatingPoint | reg_num | 1);
-  MarkPreservedSingle(v_reg, lo);
-  MarkPreservedSingle(v_reg + 1, hi);
-}
-
-/* Clobber all regs that might be used by an external C call */
-void ArmMir2Lir::ClobberCallerSave() {
-  // TODO: rework this - it's gotten even more ugly.
-  Clobber(rs_r0);
-  Clobber(rs_r1);
-  Clobber(rs_r2);
-  Clobber(rs_r3);
-  Clobber(rs_r12);
-  Clobber(rs_r14lr);
-  Clobber(rs_fr0);
-  Clobber(rs_fr1);
-  Clobber(rs_fr2);
-  Clobber(rs_fr3);
-  Clobber(rs_fr4);
-  Clobber(rs_fr5);
-  Clobber(rs_fr6);
-  Clobber(rs_fr7);
-  Clobber(rs_fr8);
-  Clobber(rs_fr9);
-  Clobber(rs_fr10);
-  Clobber(rs_fr11);
-  Clobber(rs_fr12);
-  Clobber(rs_fr13);
-  Clobber(rs_fr14);
-  Clobber(rs_fr15);
-  Clobber(rs_dr0);
-  Clobber(rs_dr1);
-  Clobber(rs_dr2);
-  Clobber(rs_dr3);
-  Clobber(rs_dr4);
-  Clobber(rs_dr5);
-  Clobber(rs_dr6);
-  Clobber(rs_dr7);
-}
-
-RegLocation ArmMir2Lir::GetReturnWideAlt() {
-  RegLocation res = LocCReturnWide();
-  res.reg.SetLowReg(rs_r2.GetReg());
-  res.reg.SetHighReg(rs_r3.GetReg());
-  Clobber(rs_r2);
-  Clobber(rs_r3);
-  MarkInUse(rs_r2);
-  MarkInUse(rs_r3);
-  MarkWide(res.reg);
-  return res;
-}
-
-RegLocation ArmMir2Lir::GetReturnAlt() {
-  RegLocation res = LocCReturn();
-  res.reg.SetReg(rs_r1.GetReg());
-  Clobber(rs_r1);
-  MarkInUse(rs_r1);
-  return res;
-}
-
-/* To be used when explicitly managing register use */
-void ArmMir2Lir::LockCallTemps() {
-  LockTemp(rs_r0);
-  LockTemp(rs_r1);
-  LockTemp(rs_r2);
-  LockTemp(rs_r3);
-  if (!kArm32QuickCodeUseSoftFloat) {
-    LockTemp(rs_fr0);
-    LockTemp(rs_fr1);
-    LockTemp(rs_fr2);
-    LockTemp(rs_fr3);
-    LockTemp(rs_fr4);
-    LockTemp(rs_fr5);
-    LockTemp(rs_fr6);
-    LockTemp(rs_fr7);
-    LockTemp(rs_fr8);
-    LockTemp(rs_fr9);
-    LockTemp(rs_fr10);
-    LockTemp(rs_fr11);
-    LockTemp(rs_fr12);
-    LockTemp(rs_fr13);
-    LockTemp(rs_fr14);
-    LockTemp(rs_fr15);
-    LockTemp(rs_dr0);
-    LockTemp(rs_dr1);
-    LockTemp(rs_dr2);
-    LockTemp(rs_dr3);
-    LockTemp(rs_dr4);
-    LockTemp(rs_dr5);
-    LockTemp(rs_dr6);
-    LockTemp(rs_dr7);
-  }
-}
-
-/* To be used when explicitly managing register use */
-void ArmMir2Lir::FreeCallTemps() {
-  FreeTemp(rs_r0);
-  FreeTemp(rs_r1);
-  FreeTemp(rs_r2);
-  FreeTemp(rs_r3);
-  FreeTemp(TargetReg(kHiddenArg));
-  if (!kArm32QuickCodeUseSoftFloat) {
-    FreeTemp(rs_fr0);
-    FreeTemp(rs_fr1);
-    FreeTemp(rs_fr2);
-    FreeTemp(rs_fr3);
-    FreeTemp(rs_fr4);
-    FreeTemp(rs_fr5);
-    FreeTemp(rs_fr6);
-    FreeTemp(rs_fr7);
-    FreeTemp(rs_fr8);
-    FreeTemp(rs_fr9);
-    FreeTemp(rs_fr10);
-    FreeTemp(rs_fr11);
-    FreeTemp(rs_fr12);
-    FreeTemp(rs_fr13);
-    FreeTemp(rs_fr14);
-    FreeTemp(rs_fr15);
-    FreeTemp(rs_dr0);
-    FreeTemp(rs_dr1);
-    FreeTemp(rs_dr2);
-    FreeTemp(rs_dr3);
-    FreeTemp(rs_dr4);
-    FreeTemp(rs_dr5);
-    FreeTemp(rs_dr6);
-    FreeTemp(rs_dr7);
-  }
-}
-
-RegStorage ArmMir2Lir::LoadHelper(QuickEntrypointEnum trampoline) {
-  LoadWordDisp(rs_rARM_SELF, GetThreadOffset<4>(trampoline).Int32Value(), rs_rARM_LR);
-  return rs_rARM_LR;
-}
-
-LIR* ArmMir2Lir::CheckSuspendUsingLoad() {
-  RegStorage tmp = rs_r0;
-  Load32Disp(rs_rARM_SELF, Thread::ThreadSuspendTriggerOffset<4>().Int32Value(), tmp);
-  LIR* load2 = Load32Disp(tmp, 0, tmp);
-  return load2;
-}
-
-uint64_t ArmMir2Lir::GetTargetInstFlags(int opcode) {
-  DCHECK(!IsPseudoLirOp(opcode));
-  return ArmMir2Lir::EncodingMap[opcode].flags;
-}
-
-const char* ArmMir2Lir::GetTargetInstName(int opcode) {
-  DCHECK(!IsPseudoLirOp(opcode));
-  return ArmMir2Lir::EncodingMap[opcode].name;
-}
-
-const char* ArmMir2Lir::GetTargetInstFmt(int opcode) {
-  DCHECK(!IsPseudoLirOp(opcode));
-  return ArmMir2Lir::EncodingMap[opcode].fmt;
-}
-
-/*
- * Somewhat messy code here.  We want to allocate a pair of contiguous
- * physical single-precision floating point registers starting with
- * an even numbered reg.  It is possible that the paired s_reg (s_reg+1)
- * has already been allocated - try to fit if possible.  Fail to
- * allocate if we can't meet the requirements for the pair of
- * s_reg<=sX[even] & (s_reg+1)<= sX+1.
- */
-// TODO: needs rewrite to support non-backed 64-bit float regs.
-RegStorage ArmMir2Lir::AllocPreservedDouble(int s_reg) {
-  RegStorage res;
-  int v_reg = mir_graph_->SRegToVReg(s_reg);
-  int p_map_idx = SRegToPMap(s_reg);
-  if (promotion_map_[p_map_idx+1].fp_location == kLocPhysReg) {
-    // Upper reg is already allocated.  Can we fit?
-    int high_reg = promotion_map_[p_map_idx+1].fp_reg;
-    if ((high_reg & 1) == 0) {
-      // High reg is even - fail.
-      return res;  // Invalid.
-    }
-    // Is the low reg of the pair free?
-    // FIXME: rework.
-    RegisterInfo* p = GetRegInfo(RegStorage::FloatSolo32(high_reg - 1));
-    if (p->InUse() || p->IsTemp()) {
-      // Already allocated or not preserved - fail.
-      return res;  // Invalid.
-    }
-    // OK - good to go.
-    res = RegStorage::FloatSolo64(p->GetReg().GetRegNum() >> 1);
-    p->MarkInUse();
-    MarkPreservedSingle(v_reg, p->GetReg());
-  } else {
-    /*
-     * TODO: until runtime support is in, make sure we avoid promoting the same vreg to
-     * different underlying physical registers.
-     */
-    for (RegisterInfo* info : reg_pool_->dp_regs_) {
-      if (!info->IsTemp() && !info->InUse()) {
-        res = info->GetReg();
-        info->MarkInUse();
-        MarkPreservedDouble(v_reg, info->GetReg());
-        break;
-      }
-    }
-  }
-  if (res.Valid()) {
-    RegisterInfo* info = GetRegInfo(res);
-    promotion_map_[p_map_idx].fp_location = kLocPhysReg;
-    promotion_map_[p_map_idx].fp_reg =
-        info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg().GetReg();
-    promotion_map_[p_map_idx+1].fp_location = kLocPhysReg;
-    promotion_map_[p_map_idx+1].fp_reg =
-        info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg().GetReg();
-  }
-  return res;
-}
-
-// Reserve a callee-save sp single register.
-RegStorage ArmMir2Lir::AllocPreservedSingle(int s_reg) {
-  RegStorage res;
-  for (RegisterInfo* info : reg_pool_->sp_regs_) {
-    if (!info->IsTemp() && !info->InUse()) {
-      res = info->GetReg();
-      int p_map_idx = SRegToPMap(s_reg);
-      int v_reg = mir_graph_->SRegToVReg(s_reg);
-      GetRegInfo(res)->MarkInUse();
-      MarkPreservedSingle(v_reg, res);
-      promotion_map_[p_map_idx].fp_location = kLocPhysReg;
-      promotion_map_[p_map_idx].fp_reg = res.GetReg();
-      break;
-    }
-  }
-  return res;
-}
-
-void ArmMir2Lir::InstallLiteralPools() {
-  patches_.reserve(call_method_insns_.size() + dex_cache_access_insns_.size());
-
-  // PC-relative calls to methods.
-  for (LIR* p : call_method_insns_) {
-    DCHECK_EQ(p->opcode, kThumb2Bl);
-    uint32_t target_method_idx = p->operands[1];
-    const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
-    patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
-                                                      target_dex_file, target_method_idx));
-  }
-
-  // PC-relative dex cache array accesses.
-  for (LIR* p : dex_cache_access_insns_) {
-    DCHECK(p->opcode = kThumb2MovImm16 || p->opcode == kThumb2MovImm16H);
-    const LIR* add_pc = UnwrapPointer<LIR>(p->operands[4]);
-    DCHECK(add_pc->opcode == kThumbAddRRLH || add_pc->opcode == kThumbAddRRHH);
-    const DexFile* dex_file = UnwrapPointer<DexFile>(p->operands[2]);
-    uint32_t offset = p->operands[3];
-    DCHECK(!p->flags.is_nop);
-    DCHECK(!add_pc->flags.is_nop);
-    patches_.push_back(LinkerPatch::DexCacheArrayPatch(p->offset,
-                                                       dex_file, add_pc->offset, offset));
-  }
-
-  // And do the normal processing.
-  Mir2Lir::InstallLiteralPools();
-}
-
-RegStorage ArmMir2Lir::InToRegStorageArmMapper::GetNextReg(ShortyArg arg) {
-  const RegStorage coreArgMappingToPhysicalReg[] =
-      {rs_r1, rs_r2, rs_r3};
-  const int coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
-  const RegStorage fpArgMappingToPhysicalReg[] =
-      {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
-       rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15};
-  constexpr uint32_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
-  static_assert(fpArgMappingToPhysicalRegSize % 2 == 0, "Number of FP Arg regs is not even");
-
-  RegStorage result = RegStorage::InvalidReg();
-  // Regard double as long, float as int for kArm32QuickCodeUseSoftFloat.
-  if (arg.IsFP() && !kArm32QuickCodeUseSoftFloat) {
-    if (arg.IsWide()) {
-      cur_fp_double_reg_ = std::max(cur_fp_double_reg_, RoundUp(cur_fp_reg_, 2));
-      if (cur_fp_double_reg_ < fpArgMappingToPhysicalRegSize) {
-        result = RegStorage::MakeRegPair(fpArgMappingToPhysicalReg[cur_fp_double_reg_],
-                                         fpArgMappingToPhysicalReg[cur_fp_double_reg_ + 1]);
-        result = As64BitFloatReg(result);
-        cur_fp_double_reg_ += 2;
-      }
-    } else {
-      if (cur_fp_reg_ % 2 == 0) {
-        cur_fp_reg_ = std::max(cur_fp_double_reg_, cur_fp_reg_);
-      }
-      if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
-        result = fpArgMappingToPhysicalReg[cur_fp_reg_];
-        cur_fp_reg_++;
-      }
-    }
-  } else {
-    if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-      if (!kArm32QuickCodeUseSoftFloat && arg.IsWide() && cur_core_reg_ == 0) {
-        // Skip r1, and use r2-r3 for the register pair.
-        cur_core_reg_++;
-      }
-      result = coreArgMappingToPhysicalReg[cur_core_reg_++];
-      if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-        result = RegStorage::MakeRegPair(result, coreArgMappingToPhysicalReg[cur_core_reg_++]);
-      }
-    }
-  }
-  return result;
-}
-
-int ArmMir2Lir::GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) {
-  if (kArm32QuickCodeUseSoftFloat) {
-    return Mir2Lir::GenDalvikArgsBulkCopy(info, first, count);
-  }
-  /*
-   * TODO: Improve by adding block copy for large number of arguments.  For now, just
-   * copy a Dalvik vreg at a time.
-   */
-  return count;
-}
-
-void ArmMir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb ATTRIBUTE_UNUSED, MIR* mir) {
-  DCHECK(MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode));
-  RegLocation rl_src[3];
-  RegLocation rl_dest = mir_graph_->GetBadLoc();
-  rl_src[0] = rl_src[1] = rl_src[2] = mir_graph_->GetBadLoc();
-  switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
-    case kMirOpMaddInt:
-      rl_dest = mir_graph_->GetDest(mir);
-      rl_src[0] = mir_graph_->GetSrc(mir, 0);
-      rl_src[1] = mir_graph_->GetSrc(mir, 1);
-      rl_src[2]= mir_graph_->GetSrc(mir, 2);
-      GenMaddMsubInt(rl_dest, rl_src[0], rl_src[1], rl_src[2], false);
-      break;
-    case kMirOpMsubInt:
-      rl_dest = mir_graph_->GetDest(mir);
-      rl_src[0] = mir_graph_->GetSrc(mir, 0);
-      rl_src[1] = mir_graph_->GetSrc(mir, 1);
-      rl_src[2]= mir_graph_->GetSrc(mir, 2);
-      GenMaddMsubInt(rl_dest, rl_src[0], rl_src[1], rl_src[2], true);
-      break;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << mir->dalvikInsn.opcode;
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
deleted file mode 100644
index c31f46b..0000000
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ /dev/null
@@ -1,1314 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "codegen_arm.h"
-
-#include "arch/arm/instruction_set_features_arm.h"
-#include "arm_lir.h"
-#include "base/logging.h"
-#include "dex/mir_graph.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "dex/reg_storage_eq.h"
-#include "driver/compiler_driver.h"
-
-namespace art {
-
-/* This file contains codegen for the Thumb ISA. */
-
-static int32_t EncodeImmSingle(int32_t value) {
-  int32_t res;
-  int32_t bit_a =  (value & 0x80000000) >> 31;
-  int32_t not_bit_b = (value & 0x40000000) >> 30;
-  int32_t bit_b =  (value & 0x20000000) >> 29;
-  int32_t b_smear =  (value & 0x3e000000) >> 25;
-  int32_t slice =   (value & 0x01f80000) >> 19;
-  int32_t zeroes =  (value & 0x0007ffff);
-  if (zeroes != 0)
-    return -1;
-  if (bit_b) {
-    if ((not_bit_b != 0) || (b_smear != 0x1f))
-      return -1;
-  } else {
-    if ((not_bit_b != 1) || (b_smear != 0x0))
-      return -1;
-  }
-  res = (bit_a << 7) | (bit_b << 6) | slice;
-  return res;
-}
-
-/*
- * Determine whether value can be encoded as a Thumb2 floating point
- * immediate.  If not, return -1.  If so return encoded 8-bit value.
- */
-static int32_t EncodeImmDouble(int64_t value) {
-  int32_t res;
-  int32_t bit_a = (value & INT64_C(0x8000000000000000)) >> 63;
-  int32_t not_bit_b = (value & INT64_C(0x4000000000000000)) >> 62;
-  int32_t bit_b = (value & INT64_C(0x2000000000000000)) >> 61;
-  int32_t b_smear = (value & INT64_C(0x3fc0000000000000)) >> 54;
-  int32_t slice =  (value & INT64_C(0x003f000000000000)) >> 48;
-  uint64_t zeroes = (value & INT64_C(0x0000ffffffffffff));
-  if (zeroes != 0ull)
-    return -1;
-  if (bit_b) {
-    if ((not_bit_b != 0) || (b_smear != 0xff))
-      return -1;
-  } else {
-    if ((not_bit_b != 1) || (b_smear != 0x0))
-      return -1;
-  }
-  res = (bit_a << 7) | (bit_b << 6) | slice;
-  return res;
-}
-
-LIR* ArmMir2Lir::LoadFPConstantValue(int r_dest, int value) {
-  DCHECK(RegStorage::IsSingle(r_dest));
-  if (value == 0) {
-    // TODO: we need better info about the target CPU.  a vector exclusive or
-    //       would probably be better here if we could rely on its existance.
-    // Load an immediate +2.0 (which encodes to 0)
-    NewLIR2(kThumb2Vmovs_IMM8, r_dest, 0);
-    // +0.0 = +2.0 - +2.0
-    return NewLIR3(kThumb2Vsubs, r_dest, r_dest, r_dest);
-  } else {
-    int encoded_imm = EncodeImmSingle(value);
-    if (encoded_imm >= 0) {
-      return NewLIR2(kThumb2Vmovs_IMM8, r_dest, encoded_imm);
-    }
-  }
-  LIR* data_target = ScanLiteralPool(literal_list_, value, 0);
-  if (data_target == nullptr) {
-    data_target = AddWordData(&literal_list_, value);
-  }
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs,
-                          r_dest, rs_r15pc.GetReg(), 0, 0, 0, data_target);
-  AppendLIR(load_pc_rel);
-  return load_pc_rel;
-}
-
-/*
- * Determine whether value can be encoded as a Thumb2 modified
- * immediate.  If not, return -1.  If so, return i:imm3:a:bcdefgh form.
- */
-int ArmMir2Lir::ModifiedImmediate(uint32_t value) {
-  uint32_t b0 = value & 0xff;
-
-  /* Note: case of value==0 must use 0:000:0:0000000 encoding */
-  if (value <= 0xFF)
-    return b0;  // 0:000:a:bcdefgh
-  if (value == ((b0 << 16) | b0))
-    return (0x1 << 8) | b0; /* 0:001:a:bcdefgh */
-  if (value == ((b0 << 24) | (b0 << 16) | (b0 << 8) | b0))
-    return (0x3 << 8) | b0; /* 0:011:a:bcdefgh */
-  b0 = (value >> 8) & 0xff;
-  if (value == ((b0 << 24) | (b0 << 8)))
-    return (0x2 << 8) | b0; /* 0:010:a:bcdefgh */
-  /* Can we do it with rotation? */
-  int z_leading = CLZ(value);
-  int z_trailing = CTZ(value);
-  /* A run of eight or fewer active bits? */
-  if ((z_leading + z_trailing) < 24)
-    return -1;  /* No - bail */
-  /* left-justify the constant, discarding msb (known to be 1) */
-  value <<= z_leading + 1;
-  /* Create bcdefgh */
-  value >>= 25;
-  /* Put it all together */
-  return value | ((0x8 + z_leading) << 7); /* [01000..11111]:bcdefgh */
-}
-
-bool ArmMir2Lir::InexpensiveConstantInt(int32_t value) {
-  return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
-}
-
-bool ArmMir2Lir::InexpensiveConstantInt(int32_t value, Instruction::Code opcode) {
-  switch (opcode) {
-    case Instruction::ADD_INT:
-    case Instruction::ADD_INT_2ADDR:
-    case Instruction::SUB_INT:
-    case Instruction::SUB_INT_2ADDR:
-      if ((value >> 12) == (value >> 31)) {  // Signed 12-bit, RRI12 versions of ADD/SUB.
-        return true;
-      }
-      FALLTHROUGH_INTENDED;
-    case Instruction::IF_EQ:
-    case Instruction::IF_NE:
-    case Instruction::IF_LT:
-    case Instruction::IF_GE:
-    case Instruction::IF_GT:
-    case Instruction::IF_LE:
-      return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(-value) >= 0);
-    case Instruction::SHL_INT:
-    case Instruction::SHL_INT_2ADDR:
-    case Instruction::SHR_INT:
-    case Instruction::SHR_INT_2ADDR:
-    case Instruction::USHR_INT:
-    case Instruction::USHR_INT_2ADDR:
-      return true;
-    case Instruction::CONST:
-    case Instruction::CONST_4:
-    case Instruction::CONST_16:
-      if ((value >> 16) == 0) {
-        return true;  // movw, 16-bit unsigned.
-      }
-      FALLTHROUGH_INTENDED;
-    case Instruction::AND_INT:
-    case Instruction::AND_INT_2ADDR:
-    case Instruction::AND_INT_LIT16:
-    case Instruction::AND_INT_LIT8:
-    case Instruction::OR_INT:
-    case Instruction::OR_INT_2ADDR:
-    case Instruction::OR_INT_LIT16:
-    case Instruction::OR_INT_LIT8:
-      return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
-    case Instruction::XOR_INT:
-    case Instruction::XOR_INT_2ADDR:
-    case Instruction::XOR_INT_LIT16:
-    case Instruction::XOR_INT_LIT8:
-      return (ModifiedImmediate(value) >= 0);
-    case Instruction::MUL_INT:
-    case Instruction::MUL_INT_2ADDR:
-    case Instruction::MUL_INT_LIT8:
-    case Instruction::MUL_INT_LIT16:
-    case Instruction::DIV_INT:
-    case Instruction::DIV_INT_2ADDR:
-    case Instruction::DIV_INT_LIT8:
-    case Instruction::DIV_INT_LIT16:
-    case Instruction::REM_INT:
-    case Instruction::REM_INT_2ADDR:
-    case Instruction::REM_INT_LIT8:
-    case Instruction::REM_INT_LIT16: {
-      EasyMultiplyOp ops[2];
-      return GetEasyMultiplyTwoOps(value, ops);
-    }
-    default:
-      return false;
-  }
-}
-
-bool ArmMir2Lir::InexpensiveConstantFloat(int32_t value) {
-  return EncodeImmSingle(value) >= 0;
-}
-
-bool ArmMir2Lir::InexpensiveConstantLong(int64_t value) {
-  return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value));
-}
-
-bool ArmMir2Lir::InexpensiveConstantDouble(int64_t value) {
-  return EncodeImmDouble(value) >= 0;
-}
-
-/*
- * Load a immediate using a shortcut if possible; otherwise
- * grab from the per-translation literal pool.
- *
- * No additional register clobbering operation performed. Use this version when
- * 1) r_dest is freshly returned from AllocTemp or
- * 2) The codegen is under fixed register usage
- */
-LIR* ArmMir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
-  LIR* res;
-  int mod_imm;
-
-  if (r_dest.IsFloat()) {
-    return LoadFPConstantValue(r_dest.GetReg(), value);
-  }
-
-  /* See if the value can be constructed cheaply */
-  if (r_dest.Low8() && (value >= 0) && (value <= 255)) {
-    return NewLIR2(kThumbMovImm, r_dest.GetReg(), value);
-  }
-  /* Check Modified immediate special cases */
-  mod_imm = ModifiedImmediate(value);
-  if (mod_imm >= 0) {
-    res = NewLIR2(kThumb2MovI8M, r_dest.GetReg(), mod_imm);
-    return res;
-  }
-  mod_imm = ModifiedImmediate(~value);
-  if (mod_imm >= 0) {
-    res = NewLIR2(kThumb2MvnI8M, r_dest.GetReg(), mod_imm);
-    return res;
-  }
-  /* 16-bit immediate? */
-  if ((value & 0xffff) == value) {
-    res = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), value);
-    return res;
-  }
-  /* Do a low/high pair */
-  res = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), Low16Bits(value));
-  NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), High16Bits(value));
-  return res;
-}
-
-LIR* ArmMir2Lir::OpUnconditionalBranch(LIR* target) {
-  LIR* res = NewLIR1(kThumbBUncond, 0 /* offset to be patched  during assembly */);
-  res->target = target;
-  return res;
-}
-
-LIR* ArmMir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
-  LIR* branch = NewLIR2(kThumbBCond, 0 /* offset to be patched */,
-                        ArmConditionEncoding(cc));
-  branch->target = target;
-  return branch;
-}
-
-LIR* ArmMir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
-  ArmOpcode opcode = kThumbBkpt;
-  switch (op) {
-    case kOpBlx:
-      opcode = kThumbBlxR;
-      break;
-    case kOpBx:
-      opcode = kThumbBx;
-      break;
-    default:
-      LOG(FATAL) << "Bad opcode " << op;
-  }
-  return NewLIR1(opcode, r_dest_src.GetReg());
-}
-
-LIR* ArmMir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2,
-                               int shift) {
-  bool thumb_form =
-      ((shift == 0) && r_dest_src1.Low8() && r_src2.Low8());
-  ArmOpcode opcode = kThumbBkpt;
-  switch (op) {
-    case kOpAdc:
-      opcode = (thumb_form) ? kThumbAdcRR : kThumb2AdcRRR;
-      break;
-    case kOpAnd:
-      opcode = (thumb_form) ? kThumbAndRR : kThumb2AndRRR;
-      break;
-    case kOpBic:
-      opcode = (thumb_form) ? kThumbBicRR : kThumb2BicRRR;
-      break;
-    case kOpCmn:
-      DCHECK_EQ(shift, 0);
-      opcode = (thumb_form) ? kThumbCmnRR : kThumb2CmnRR;
-      break;
-    case kOpCmp:
-      if (thumb_form)
-        opcode = kThumbCmpRR;
-      else if ((shift == 0) && !r_dest_src1.Low8() && !r_src2.Low8())
-        opcode = kThumbCmpHH;
-      else if ((shift == 0) && r_dest_src1.Low8())
-        opcode = kThumbCmpLH;
-      else if (shift == 0)
-        opcode = kThumbCmpHL;
-      else
-        opcode = kThumb2CmpRR;
-      break;
-    case kOpXor:
-      opcode = (thumb_form) ? kThumbEorRR : kThumb2EorRRR;
-      break;
-    case kOpMov:
-      DCHECK_EQ(shift, 0);
-      if (r_dest_src1.Low8() && r_src2.Low8())
-        opcode = kThumbMovRR;
-      else if (!r_dest_src1.Low8() && !r_src2.Low8())
-        opcode = kThumbMovRR_H2H;
-      else if (r_dest_src1.Low8())
-        opcode = kThumbMovRR_H2L;
-      else
-        opcode = kThumbMovRR_L2H;
-      break;
-    case kOpMul:
-      DCHECK_EQ(shift, 0);
-      opcode = (thumb_form) ? kThumbMul : kThumb2MulRRR;
-      break;
-    case kOpMvn:
-      opcode = (thumb_form) ? kThumbMvn : kThumb2MnvRR;
-      break;
-    case kOpNeg:
-      DCHECK_EQ(shift, 0);
-      opcode = (thumb_form) ? kThumbNeg : kThumb2NegRR;
-      break;
-    case kOpOr:
-      opcode = (thumb_form) ? kThumbOrr : kThumb2OrrRRR;
-      break;
-    case kOpSbc:
-      opcode = (thumb_form) ? kThumbSbc : kThumb2SbcRRR;
-      break;
-    case kOpTst:
-      opcode = (thumb_form) ? kThumbTst : kThumb2TstRR;
-      break;
-    case kOpLsl:
-      DCHECK_EQ(shift, 0);
-      opcode = (thumb_form) ? kThumbLslRR : kThumb2LslRRR;
-      break;
-    case kOpLsr:
-      DCHECK_EQ(shift, 0);
-      opcode = (thumb_form) ? kThumbLsrRR : kThumb2LsrRRR;
-      break;
-    case kOpAsr:
-      DCHECK_EQ(shift, 0);
-      opcode = (thumb_form) ? kThumbAsrRR : kThumb2AsrRRR;
-      break;
-    case kOpRor:
-      DCHECK_EQ(shift, 0);
-      opcode = (thumb_form) ? kThumbRorRR : kThumb2RorRRR;
-      break;
-    case kOpAdd:
-      opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR;
-      break;
-    case kOpSub:
-      opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR;
-      break;
-    case kOpRev:
-      DCHECK_EQ(shift, 0);
-      if (!thumb_form) {
-        // Binary, but rm is encoded twice.
-        return NewLIR3(kThumb2RevRR, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
-      }
-      opcode = kThumbRev;
-      break;
-    case kOpRevsh:
-      DCHECK_EQ(shift, 0);
-      if (!thumb_form) {
-        // Binary, but rm is encoded twice.
-        return NewLIR3(kThumb2RevshRR, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
-      }
-      opcode = kThumbRevsh;
-      break;
-    case kOp2Byte:
-      DCHECK_EQ(shift, 0);
-      return NewLIR4(kThumb2Sbfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 8);
-    case kOp2Short:
-      DCHECK_EQ(shift, 0);
-      return NewLIR4(kThumb2Sbfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 16);
-    case kOp2Char:
-      DCHECK_EQ(shift, 0);
-      return NewLIR4(kThumb2Ubfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 16);
-    default:
-      LOG(FATAL) << "Bad opcode: " << op;
-      break;
-  }
-  DCHECK(!IsPseudoLirOp(opcode));
-  if (EncodingMap[opcode].flags & IS_BINARY_OP) {
-    return NewLIR2(opcode, r_dest_src1.GetReg(), r_src2.GetReg());
-  } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
-    if (EncodingMap[opcode].field_loc[2].kind == kFmtShift) {
-      return NewLIR3(opcode, r_dest_src1.GetReg(), r_src2.GetReg(), shift);
-    } else {
-      return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg());
-    }
-  } else if (EncodingMap[opcode].flags & IS_QUAD_OP) {
-    return NewLIR4(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg(), shift);
-  } else {
-    LOG(FATAL) << "Unexpected encoding operand count";
-    return nullptr;
-  }
-}
-
-LIR* ArmMir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
-  return OpRegRegShift(op, r_dest_src1, r_src2, 0);
-}
-
-LIR* ArmMir2Lir::OpMovRegMem(RegStorage r_dest ATTRIBUTE_UNUSED,
-                             RegStorage r_base ATTRIBUTE_UNUSED,
-                             int offset ATTRIBUTE_UNUSED,
-                             MoveType move_type ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-  UNREACHABLE();
-}
-
-LIR* ArmMir2Lir::OpMovMemReg(RegStorage r_base ATTRIBUTE_UNUSED,
-                             int offset ATTRIBUTE_UNUSED,
-                             RegStorage r_src ATTRIBUTE_UNUSED,
-                             MoveType move_type ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-  UNREACHABLE();
-}
-
-LIR* ArmMir2Lir::OpCondRegReg(OpKind op ATTRIBUTE_UNUSED,
-                              ConditionCode cc ATTRIBUTE_UNUSED,
-                              RegStorage r_dest ATTRIBUTE_UNUSED,
-                              RegStorage r_src ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm";
-  UNREACHABLE();
-}
-
-LIR* ArmMir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1,
-                                  RegStorage r_src2, int shift) {
-  ArmOpcode opcode = kThumbBkpt;
-  bool thumb_form = (shift == 0) && r_dest.Low8() && r_src1.Low8() && r_src2.Low8();
-  switch (op) {
-    case kOpAdd:
-      opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR;
-      break;
-    case kOpSub:
-      opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR;
-      break;
-    case kOpRsub:
-      opcode = kThumb2RsubRRR;
-      break;
-    case kOpAdc:
-      opcode = kThumb2AdcRRR;
-      break;
-    case kOpAnd:
-      opcode = kThumb2AndRRR;
-      break;
-    case kOpBic:
-      opcode = kThumb2BicRRR;
-      break;
-    case kOpXor:
-      opcode = kThumb2EorRRR;
-      break;
-    case kOpMul:
-      DCHECK_EQ(shift, 0);
-      opcode = kThumb2MulRRR;
-      break;
-    case kOpDiv:
-      DCHECK_EQ(shift, 0);
-      opcode = kThumb2SdivRRR;
-      break;
-    case kOpOr:
-      opcode = kThumb2OrrRRR;
-      break;
-    case kOpSbc:
-      opcode = kThumb2SbcRRR;
-      break;
-    case kOpLsl:
-      DCHECK_EQ(shift, 0);
-      opcode = kThumb2LslRRR;
-      break;
-    case kOpLsr:
-      DCHECK_EQ(shift, 0);
-      opcode = kThumb2LsrRRR;
-      break;
-    case kOpAsr:
-      DCHECK_EQ(shift, 0);
-      opcode = kThumb2AsrRRR;
-      break;
-    case kOpRor:
-      DCHECK_EQ(shift, 0);
-      opcode = kThumb2RorRRR;
-      break;
-    default:
-      LOG(FATAL) << "Bad opcode: " << op;
-      break;
-  }
-  DCHECK(!IsPseudoLirOp(opcode));
-  if (EncodingMap[opcode].flags & IS_QUAD_OP) {
-    return NewLIR4(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift);
-  } else {
-    DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP);
-    return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg());
-  }
-}
-
-LIR* ArmMir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
-  return OpRegRegRegShift(op, r_dest, r_src1, r_src2, 0);
-}
-
-LIR* ArmMir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
-  bool neg = (value < 0);
-  int32_t abs_value = (neg) ? -value : value;
-  ArmOpcode opcode = kThumbBkpt;
-  ArmOpcode alt_opcode = kThumbBkpt;
-  bool all_low_regs = r_dest.Low8() && r_src1.Low8();
-  int32_t mod_imm = ModifiedImmediate(value);
-
-  switch (op) {
-    case kOpLsl:
-      if (all_low_regs)
-        return NewLIR3(kThumbLslRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
-      else
-        return NewLIR3(kThumb2LslRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
-    case kOpLsr:
-      if (all_low_regs)
-        return NewLIR3(kThumbLsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
-      else
-        return NewLIR3(kThumb2LsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
-    case kOpAsr:
-      if (all_low_regs)
-        return NewLIR3(kThumbAsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
-      else
-        return NewLIR3(kThumb2AsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
-    case kOpRor:
-      return NewLIR3(kThumb2RorRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
-    case kOpAdd:
-      if (r_dest.Low8() && (r_src1 == rs_r13sp) && (value <= 1020) && ((value & 0x3) == 0)) {
-        return NewLIR3(kThumbAddSpRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2);
-      } else if (r_dest.Low8() && (r_src1 == rs_r15pc) &&
-          (value <= 1020) && ((value & 0x3) == 0)) {
-        return NewLIR3(kThumbAddPcRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2);
-      }
-      FALLTHROUGH_INTENDED;
-    case kOpSub:
-      if (all_low_regs && ((abs_value & 0x7) == abs_value)) {
-        if (op == kOpAdd)
-          opcode = (neg) ? kThumbSubRRI3 : kThumbAddRRI3;
-        else
-          opcode = (neg) ? kThumbAddRRI3 : kThumbSubRRI3;
-        return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), abs_value);
-      }
-      if (mod_imm < 0) {
-        mod_imm = ModifiedImmediate(-value);
-        if (mod_imm >= 0) {
-          op = (op == kOpAdd) ? kOpSub : kOpAdd;
-        }
-      }
-      if (mod_imm < 0 && (abs_value >> 12) == 0) {
-        // This is deliberately used only if modified immediate encoding is inadequate since
-        // we sometimes actually use the flags for small values but not necessarily low regs.
-        if (op == kOpAdd)
-          opcode = (neg) ? kThumb2SubRRI12 : kThumb2AddRRI12;
-        else
-          opcode = (neg) ? kThumb2AddRRI12 : kThumb2SubRRI12;
-        return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), abs_value);
-      }
-      if (op == kOpSub) {
-        opcode = kThumb2SubRRI8M;
-        alt_opcode = kThumb2SubRRR;
-      } else {
-        opcode = kThumb2AddRRI8M;
-        alt_opcode = kThumb2AddRRR;
-      }
-      break;
-    case kOpRsub:
-      opcode = kThumb2RsubRRI8M;
-      alt_opcode = kThumb2RsubRRR;
-      break;
-    case kOpAdc:
-      opcode = kThumb2AdcRRI8M;
-      alt_opcode = kThumb2AdcRRR;
-      break;
-    case kOpSbc:
-      opcode = kThumb2SbcRRI8M;
-      alt_opcode = kThumb2SbcRRR;
-      break;
-    case kOpOr:
-      opcode = kThumb2OrrRRI8M;
-      alt_opcode = kThumb2OrrRRR;
-      if (mod_imm < 0) {
-        mod_imm = ModifiedImmediate(~value);
-        if (mod_imm >= 0) {
-          opcode = kThumb2OrnRRI8M;
-        }
-      }
-      break;
-    case kOpAnd:
-      if (mod_imm < 0) {
-        mod_imm = ModifiedImmediate(~value);
-        if (mod_imm >= 0) {
-          return NewLIR3(kThumb2BicRRI8M, r_dest.GetReg(), r_src1.GetReg(), mod_imm);
-        }
-      }
-      opcode = kThumb2AndRRI8M;
-      alt_opcode = kThumb2AndRRR;
-      break;
-    case kOpXor:
-      opcode = kThumb2EorRRI8M;
-      alt_opcode = kThumb2EorRRR;
-      break;
-    case kOpMul:
-      // TUNING: power of 2, shift & add
-      mod_imm = -1;
-      alt_opcode = kThumb2MulRRR;
-      break;
-    case kOpCmp: {
-      LIR* res;
-      if (mod_imm >= 0) {
-        res = NewLIR2(kThumb2CmpRI8M, r_src1.GetReg(), mod_imm);
-      } else {
-        mod_imm = ModifiedImmediate(-value);
-        if (mod_imm >= 0) {
-          res = NewLIR2(kThumb2CmnRI8M, r_src1.GetReg(), mod_imm);
-        } else {
-          RegStorage r_tmp = AllocTemp();
-          res = LoadConstant(r_tmp, value);
-          OpRegReg(kOpCmp, r_src1, r_tmp);
-          FreeTemp(r_tmp);
-        }
-      }
-      return res;
-    }
-    default:
-      LOG(FATAL) << "Bad opcode: " << op;
-  }
-
-  if (mod_imm >= 0) {
-    return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), mod_imm);
-  } else {
-    RegStorage r_scratch = AllocTemp();
-    LoadConstant(r_scratch, value);
-    LIR* res;
-    if (EncodingMap[alt_opcode].flags & IS_QUAD_OP)
-      res = NewLIR4(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0);
-    else
-      res = NewLIR3(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
-    FreeTemp(r_scratch);
-    return res;
-  }
-}
-
-/* Handle Thumb-only variants here - otherwise punt to OpRegRegImm */
-LIR* ArmMir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
-  bool neg = (value < 0);
-  int32_t abs_value = (neg) ? -value : value;
-  bool short_form = (((abs_value & 0xff) == abs_value) && r_dest_src1.Low8());
-  ArmOpcode opcode = kThumbBkpt;
-  switch (op) {
-    case kOpAdd:
-      if (!neg && (r_dest_src1 == rs_r13sp) && (value <= 508)) { /* sp */
-        DCHECK_EQ((value & 0x3), 0);
-        return NewLIR1(kThumbAddSpI7, value >> 2);
-      } else if (short_form) {
-        opcode = (neg) ? kThumbSubRI8 : kThumbAddRI8;
-      }
-      break;
-    case kOpSub:
-      if (!neg && (r_dest_src1 == rs_r13sp) && (value <= 508)) { /* sp */
-        DCHECK_EQ((value & 0x3), 0);
-        return NewLIR1(kThumbSubSpI7, value >> 2);
-      } else if (short_form) {
-        opcode = (neg) ? kThumbAddRI8 : kThumbSubRI8;
-      }
-      break;
-    case kOpCmp:
-      if (!neg && short_form) {
-        opcode = kThumbCmpRI8;
-      } else {
-        short_form = false;
-      }
-      break;
-    default:
-      /* Punt to OpRegRegImm - if bad case catch it there */
-      short_form = false;
-      break;
-  }
-  if (short_form) {
-    return NewLIR2(opcode, r_dest_src1.GetReg(), abs_value);
-  } else {
-    return OpRegRegImm(op, r_dest_src1, r_dest_src1, value);
-  }
-}
-
-LIR* ArmMir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
-  LIR* res = nullptr;
-  int32_t val_lo = Low32Bits(value);
-  int32_t val_hi = High32Bits(value);
-  if (r_dest.IsFloat()) {
-    DCHECK(!r_dest.IsPair());
-    if ((val_lo == 0) && (val_hi == 0)) {
-      // TODO: we need better info about the target CPU.  a vector exclusive or
-      //       would probably be better here if we could rely on its existance.
-      // Load an immediate +2.0 (which encodes to 0)
-      NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), 0);
-      // +0.0 = +2.0 - +2.0
-      res = NewLIR3(kThumb2Vsubd, r_dest.GetReg(), r_dest.GetReg(), r_dest.GetReg());
-    } else {
-      int encoded_imm = EncodeImmDouble(value);
-      if (encoded_imm >= 0) {
-        res = NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), encoded_imm);
-      }
-    }
-  } else {
-    // NOTE: Arm32 assumption here.
-    DCHECK(r_dest.IsPair());
-    if ((InexpensiveConstantInt(val_lo) && (InexpensiveConstantInt(val_hi)))) {
-      res = LoadConstantNoClobber(r_dest.GetLow(), val_lo);
-      LoadConstantNoClobber(r_dest.GetHigh(), val_hi);
-    }
-  }
-  if (res == nullptr) {
-    // No short form - load from the literal pool.
-    LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
-    if (data_target == nullptr) {
-      data_target = AddWideData(&literal_list_, val_lo, val_hi);
-    }
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-    if (r_dest.IsFloat()) {
-      res = RawLIR(current_dalvik_offset_, kThumb2Vldrd,
-                   r_dest.GetReg(), rs_r15pc.GetReg(), 0, 0, 0, data_target);
-    } else {
-      DCHECK(r_dest.IsPair());
-      res = RawLIR(current_dalvik_offset_, kThumb2LdrdPcRel8,
-                   r_dest.GetLowReg(), r_dest.GetHighReg(), rs_r15pc.GetReg(), 0, 0, data_target);
-    }
-    AppendLIR(res);
-  }
-  return res;
-}
-
-int ArmMir2Lir::EncodeShift(int code, int amount) {
-  return ((amount & 0x1f) << 2) | code;
-}
-
-LIR* ArmMir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
-                                 int scale, OpSize size) {
-  bool all_low_regs = r_base.Low8() && r_index.Low8() && r_dest.Low8();
-  LIR* load;
-  ArmOpcode opcode = kThumbBkpt;
-  bool thumb_form = (all_low_regs && (scale == 0));
-  RegStorage reg_ptr;
-
-  if (r_dest.IsFloat()) {
-    if (r_dest.IsSingle()) {
-      DCHECK((size == k32) || (size == kSingle) || (size == kReference));
-      opcode = kThumb2Vldrs;
-      size = kSingle;
-    } else {
-      DCHECK(r_dest.IsDouble());
-      DCHECK((size == k64) || (size == kDouble));
-      opcode = kThumb2Vldrd;
-      size = kDouble;
-    }
-  } else {
-    if (size == kSingle)
-      size = k32;
-  }
-
-  switch (size) {
-    case kDouble:  // fall-through
-    // Intentional fall-though.
-    case kSingle:
-      reg_ptr = AllocTemp();
-      if (scale) {
-        NewLIR4(kThumb2AddRRR, reg_ptr.GetReg(), r_base.GetReg(), r_index.GetReg(),
-                EncodeShift(kArmLsl, scale));
-      } else {
-        OpRegRegReg(kOpAdd, reg_ptr, r_base, r_index);
-      }
-      load = NewLIR3(opcode, r_dest.GetReg(), reg_ptr.GetReg(), 0);
-      FreeTemp(reg_ptr);
-      return load;
-    case k32:
-    // Intentional fall-though.
-    case kReference:
-      opcode = (thumb_form) ? kThumbLdrRRR : kThumb2LdrRRR;
-      break;
-    case kUnsignedHalf:
-      opcode = (thumb_form) ? kThumbLdrhRRR : kThumb2LdrhRRR;
-      break;
-    case kSignedHalf:
-      opcode = (thumb_form) ? kThumbLdrshRRR : kThumb2LdrshRRR;
-      break;
-    case kUnsignedByte:
-      opcode = (thumb_form) ? kThumbLdrbRRR : kThumb2LdrbRRR;
-      break;
-    case kSignedByte:
-      opcode = (thumb_form) ? kThumbLdrsbRRR : kThumb2LdrsbRRR;
-      break;
-    default:
-      LOG(FATAL) << "Bad size: " << size;
-  }
-  if (thumb_form)
-    load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
-  else
-    load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
-
-  return load;
-}
-
-LIR* ArmMir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
-                                  int scale, OpSize size) {
-  bool all_low_regs = r_base.Low8() && r_index.Low8() && r_src.Low8();
-  LIR* store = nullptr;
-  ArmOpcode opcode = kThumbBkpt;
-  bool thumb_form = (all_low_regs && (scale == 0));
-  RegStorage reg_ptr;
-
-  if (r_src.IsFloat()) {
-    if (r_src.IsSingle()) {
-      DCHECK((size == k32) || (size == kSingle) || (size == kReference));
-      opcode = kThumb2Vstrs;
-      size = kSingle;
-    } else {
-      DCHECK(r_src.IsDouble());
-      DCHECK((size == k64) || (size == kDouble));
-      DCHECK_EQ((r_src.GetReg() & 0x1), 0);
-      opcode = kThumb2Vstrd;
-      size = kDouble;
-    }
-  } else {
-    if (size == kSingle)
-      size = k32;
-  }
-
-  switch (size) {
-    case kDouble:  // fall-through
-    // Intentional fall-though.
-    case kSingle:
-      reg_ptr = AllocTemp();
-      if (scale) {
-        NewLIR4(kThumb2AddRRR, reg_ptr.GetReg(), r_base.GetReg(), r_index.GetReg(),
-                EncodeShift(kArmLsl, scale));
-      } else {
-        OpRegRegReg(kOpAdd, reg_ptr, r_base, r_index);
-      }
-      store = NewLIR3(opcode, r_src.GetReg(), reg_ptr.GetReg(), 0);
-      FreeTemp(reg_ptr);
-      return store;
-    case k32:
-    // Intentional fall-though.
-    case kReference:
-      opcode = (thumb_form) ? kThumbStrRRR : kThumb2StrRRR;
-      break;
-    case kUnsignedHalf:
-    // Intentional fall-though.
-    case kSignedHalf:
-      opcode = (thumb_form) ? kThumbStrhRRR : kThumb2StrhRRR;
-      break;
-    case kUnsignedByte:
-    // Intentional fall-though.
-    case kSignedByte:
-      opcode = (thumb_form) ? kThumbStrbRRR : kThumb2StrbRRR;
-      break;
-    default:
-      LOG(FATAL) << "Bad size: " << size;
-  }
-  if (thumb_form)
-    store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
-  else
-    store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
-
-  return store;
-}
-
-// Helper function for LoadBaseDispBody()/StoreBaseDispBody().
-LIR* ArmMir2Lir::LoadStoreUsingInsnWithOffsetImm8Shl2(ArmOpcode opcode, RegStorage r_base,
-                                                      int displacement, RegStorage r_src_dest,
-                                                      RegStorage r_work) {
-  DCHECK_ALIGNED(displacement, 4);
-  constexpr int kOffsetMask = 0xff << 2;
-  int encoded_disp = (displacement & kOffsetMask) >> 2;  // Within range of the instruction.
-  RegStorage r_ptr = r_base;
-  if ((displacement & ~kOffsetMask) != 0) {
-    r_ptr = r_work.Valid() ? r_work : AllocTemp();
-    // Add displacement & ~kOffsetMask to base, it's a single instruction for up to +-256KiB.
-    OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~kOffsetMask);
-  }
-  LIR* lir = nullptr;
-  if (!r_src_dest.IsPair()) {
-    lir = NewLIR3(opcode, r_src_dest.GetReg(), r_ptr.GetReg(), encoded_disp);
-  } else {
-    lir = NewLIR4(opcode, r_src_dest.GetLowReg(), r_src_dest.GetHighReg(), r_ptr.GetReg(),
-                  encoded_disp);
-  }
-  if ((displacement & ~kOffsetMask) != 0 && !r_work.Valid()) {
-    FreeTemp(r_ptr);
-  }
-  return lir;
-}
-
-/*
- * Load value from base + displacement.  Optionally perform null check
- * on base (which must have an associated s_reg and MIR).  If not
- * performing null check, incoming MIR can be null.
- */
-LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
-                                  OpSize size) {
-  LIR* load = nullptr;
-  ArmOpcode opcode16 = kThumbBkpt;  // 16-bit Thumb opcode.
-  ArmOpcode opcode32 = kThumbBkpt;  // 32-bit Thumb2 opcode.
-  bool short_form = false;
-  bool all_low = r_dest.Is32Bit() && r_base.Low8() && r_dest.Low8();
-  int scale = 0;  // Used for opcode16 and some indexed loads.
-  bool already_generated = false;
-  switch (size) {
-    case kDouble:
-    // Intentional fall-though.
-    case k64:
-      if (r_dest.IsFloat()) {
-        DCHECK(!r_dest.IsPair());
-        load = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vldrd, r_base, displacement, r_dest);
-      } else {
-        DCHECK(r_dest.IsPair());
-        // Use the r_dest.GetLow() for the temporary pointer if needed.
-        load = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2LdrdI8, r_base, displacement, r_dest,
-                                                    r_dest.GetLow());
-      }
-      already_generated = true;
-      break;
-    case kSingle:
-    // Intentional fall-though.
-    case k32:
-    // Intentional fall-though.
-    case kReference:
-      if (r_dest.IsFloat()) {
-        DCHECK(r_dest.IsSingle());
-        load = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vldrs, r_base, displacement, r_dest);
-        already_generated = true;
-        break;
-      }
-      DCHECK_ALIGNED(displacement, 4);
-      scale = 2;
-      if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) &&
-          (displacement >= 0)) {
-        short_form = true;
-        opcode16 = kThumbLdrPcRel;
-      } else if (r_dest.Low8() && (r_base == rs_rARM_SP) && (displacement <= 1020) &&
-                 (displacement >= 0)) {
-        short_form = true;
-        opcode16 = kThumbLdrSpRel;
-      } else {
-        short_form = all_low && (displacement >> (5 + scale)) == 0;
-        opcode16 = kThumbLdrRRI5;
-        opcode32 = kThumb2LdrRRI12;
-      }
-      break;
-    case kUnsignedHalf:
-      DCHECK_ALIGNED(displacement, 2);
-      scale = 1;
-      short_form = all_low && (displacement >> (5 + scale)) == 0;
-      opcode16 = kThumbLdrhRRI5;
-      opcode32 = kThumb2LdrhRRI12;
-      break;
-    case kSignedHalf:
-      DCHECK_ALIGNED(displacement, 2);
-      scale = 1;
-      DCHECK_EQ(opcode16, kThumbBkpt);  // Not available.
-      opcode32 = kThumb2LdrshRRI12;
-      break;
-    case kUnsignedByte:
-      DCHECK_EQ(scale, 0);  // Keep scale = 0.
-      short_form = all_low && (displacement >> (5 + scale)) == 0;
-      opcode16 = kThumbLdrbRRI5;
-      opcode32 = kThumb2LdrbRRI12;
-      break;
-    case kSignedByte:
-      DCHECK_EQ(scale, 0);  // Keep scale = 0.
-      DCHECK_EQ(opcode16, kThumbBkpt);  // Not available.
-      opcode32 = kThumb2LdrsbRRI12;
-      break;
-    default:
-      LOG(FATAL) << "Bad size: " << size;
-  }
-
-  if (!already_generated) {
-    if (short_form) {
-      load = NewLIR3(opcode16, r_dest.GetReg(), r_base.GetReg(), displacement >> scale);
-    } else if ((displacement >> 12) == 0) {  // Thumb2 form.
-      load = NewLIR3(opcode32, r_dest.GetReg(), r_base.GetReg(), displacement);
-    } else if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) &&
-        InexpensiveConstantInt(displacement & ~0x00000fff, Instruction::ADD_INT)) {
-      // In this case, using LoadIndexed would emit 3 insns (movw+movt+ldr) but we can
-      // actually do it in two because we know that the kOpAdd is a single insn. On the
-      // other hand, we introduce an extra dependency, so this is not necessarily faster.
-      if (opcode16 != kThumbBkpt && r_dest.Low8() &&
-          InexpensiveConstantInt(displacement & ~(0x1f << scale), Instruction::ADD_INT)) {
-        // We can use the 16-bit Thumb opcode for the load.
-        OpRegRegImm(kOpAdd, r_dest, r_base, displacement & ~(0x1f << scale));
-        load = NewLIR3(opcode16, r_dest.GetReg(), r_dest.GetReg(), (displacement >> scale) & 0x1f);
-      } else {
-        DCHECK_NE(opcode32, kThumbBkpt);
-        OpRegRegImm(kOpAdd, r_dest, r_base, displacement & ~0x00000fff);
-        load = NewLIR3(opcode32, r_dest.GetReg(), r_dest.GetReg(), displacement & 0x00000fff);
-      }
-    } else {
-      if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) ||
-          (scale != 0 && InexpensiveConstantInt(displacement, Instruction::CONST))) {
-        scale = 0;  // Prefer unscaled indexing if the same number of insns.
-      }
-      RegStorage reg_offset = AllocTemp();
-      LoadConstant(reg_offset, displacement >> scale);
-      DCHECK(!r_dest.IsFloat());
-      load = LoadBaseIndexed(r_base, reg_offset, r_dest, scale, size);
-      FreeTemp(reg_offset);
-    }
-  }
-
-  // TODO: in future may need to differentiate Dalvik accesses w/ spills
-  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
-    DCHECK_EQ(r_base, rs_rARM_SP);
-    AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
-  }
-  return load;
-}
-
-LIR* ArmMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
-                              OpSize size, VolatileKind is_volatile) {
-  // TODO: base this on target.
-  if (size == kWord) {
-    size = k32;
-  }
-  LIR* load;
-  if (is_volatile == kVolatile && (size == k64 || size == kDouble) &&
-      !cu_->compiler_driver->GetInstructionSetFeatures()->
-          AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd()) {
-    // Only 64-bit load needs special handling.
-    // If the cpu supports LPAE, aligned LDRD is atomic - fall through to LoadBaseDisp().
-    DCHECK(!r_dest.IsFloat());  // See RegClassForFieldLoadSave().
-    // Use LDREXD for the atomic load. (Expect displacement > 0, don't optimize for == 0.)
-    RegStorage r_ptr = AllocTemp();
-    OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
-    load = NewLIR3(kThumb2Ldrexd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg());
-    FreeTemp(r_ptr);
-  } else {
-    load = LoadBaseDispBody(r_base, displacement, r_dest, size);
-  }
-
-  if (UNLIKELY(is_volatile == kVolatile)) {
-    GenMemBarrier(kLoadAny);
-  }
-
-  return load;
-}
-
-
-LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
-                                   OpSize size) {
-  LIR* store = nullptr;
-  ArmOpcode opcode16 = kThumbBkpt;  // 16-bit Thumb opcode.
-  ArmOpcode opcode32 = kThumbBkpt;  // 32-bit Thumb2 opcode.
-  bool short_form = false;
-  bool all_low = r_src.Is32Bit() && r_base.Low8() && r_src.Low8();
-  int scale = 0;  // Used for opcode16 and some indexed loads.
-  bool already_generated = false;
-  switch (size) {
-    case kDouble:
-    // Intentional fall-though.
-    case k64:
-      if (r_src.IsFloat()) {
-        // Note: If the register is retrieved by register allocator, it should never be a pair.
-        // But some functions in mir2lir assume 64-bit registers are 32-bit register pairs.
-        // TODO: Rework Mir2Lir::LoadArg() and Mir2Lir::LoadArgDirect().
-        if (r_src.IsPair()) {
-          r_src = As64BitFloatReg(r_src);
-        }
-        DCHECK(!r_src.IsPair());
-        store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vstrd, r_base, displacement, r_src);
-      } else {
-        DCHECK(r_src.IsPair());
-        store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2StrdI8, r_base, displacement, r_src);
-      }
-      already_generated = true;
-      break;
-    case kSingle:
-    // Intentional fall-through.
-    case k32:
-    // Intentional fall-through.
-    case kReference:
-      if (r_src.IsFloat()) {
-        DCHECK(r_src.IsSingle());
-        store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vstrs, r_base, displacement, r_src);
-        already_generated = true;
-        break;
-      }
-      DCHECK_ALIGNED(displacement, 4);
-      scale = 2;
-      if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) {
-        short_form = true;
-        opcode16 = kThumbStrSpRel;
-      } else {
-        short_form = all_low && (displacement >> (5 + scale)) == 0;
-        opcode16 = kThumbStrRRI5;
-        opcode32 = kThumb2StrRRI12;
-      }
-      break;
-    case kUnsignedHalf:
-    case kSignedHalf:
-      DCHECK_ALIGNED(displacement, 2);
-      scale = 1;
-      short_form = all_low && (displacement >> (5 + scale)) == 0;
-      opcode16 = kThumbStrhRRI5;
-      opcode32 = kThumb2StrhRRI12;
-      break;
-    case kUnsignedByte:
-    case kSignedByte:
-      DCHECK_EQ(scale, 0);  // Keep scale = 0.
-      short_form = all_low && (displacement >> (5 + scale)) == 0;
-      opcode16 = kThumbStrbRRI5;
-      opcode32 = kThumb2StrbRRI12;
-      break;
-    default:
-      LOG(FATAL) << "Bad size: " << size;
-  }
-  if (!already_generated) {
-    if (short_form) {
-      store = NewLIR3(opcode16, r_src.GetReg(), r_base.GetReg(), displacement >> scale);
-    } else if ((displacement >> 12) == 0) {
-      store = NewLIR3(opcode32, r_src.GetReg(), r_base.GetReg(), displacement);
-    } else if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) &&
-        InexpensiveConstantInt(displacement & ~0x00000fff, Instruction::ADD_INT)) {
-      // In this case, using StoreIndexed would emit 3 insns (movw+movt+str) but we can
-      // actually do it in two because we know that the kOpAdd is a single insn. On the
-      // other hand, we introduce an extra dependency, so this is not necessarily faster.
-      RegStorage r_scratch = AllocTemp();
-      if (opcode16 != kThumbBkpt && r_src.Low8() && r_scratch.Low8() &&
-          InexpensiveConstantInt(displacement & ~(0x1f << scale), Instruction::ADD_INT)) {
-        // We can use the 16-bit Thumb opcode for the load.
-        OpRegRegImm(kOpAdd, r_scratch, r_base, displacement & ~(0x1f << scale));
-        store = NewLIR3(opcode16, r_src.GetReg(), r_scratch.GetReg(),
-                        (displacement >> scale) & 0x1f);
-      } else {
-        DCHECK_NE(opcode32, kThumbBkpt);
-        OpRegRegImm(kOpAdd, r_scratch, r_base, displacement & ~0x00000fff);
-        store = NewLIR3(opcode32, r_src.GetReg(), r_scratch.GetReg(), displacement & 0x00000fff);
-      }
-      FreeTemp(r_scratch);
-    } else {
-      if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) ||
-          (scale != 0 && InexpensiveConstantInt(displacement, Instruction::CONST))) {
-        scale = 0;  // Prefer unscaled indexing if the same number of insns.
-      }
-      RegStorage r_scratch = AllocTemp();
-      LoadConstant(r_scratch, displacement >> scale);
-      DCHECK(!r_src.IsFloat());
-      store = StoreBaseIndexed(r_base, r_scratch, r_src, scale, size);
-      FreeTemp(r_scratch);
-    }
-  }
-
-  // TODO: In future, may need to differentiate Dalvik & spill accesses
-  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
-    DCHECK_EQ(r_base, rs_rARM_SP);
-    AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
-  }
-  return store;
-}
-
-LIR* ArmMir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
-                               OpSize size, VolatileKind is_volatile) {
-  if (UNLIKELY(is_volatile == kVolatile)) {
-    // Ensure that prior accesses become visible to other threads first.
-    GenMemBarrier(kAnyStore);
-  }
-
-  LIR* null_ck_insn;
-  if (is_volatile == kVolatile && (size == k64 || size == kDouble) &&
-      !cu_->compiler_driver->GetInstructionSetFeatures()->
-          AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd()) {
-    // Only 64-bit store needs special handling.
-    // If the cpu supports LPAE, aligned STRD is atomic - fall through to StoreBaseDisp().
-    // Use STREXD for the atomic store. (Expect displacement > 0, don't optimize for == 0.)
-    DCHECK(!r_src.IsFloat());  // See RegClassForFieldLoadSave().
-    RegStorage r_ptr = AllocTemp();
-    OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
-    LIR* fail_target = NewLIR0(kPseudoTargetLabel);
-    // We have only 5 temporary registers available and if r_base, r_src and r_ptr already
-    // take 4, we can't directly allocate 2 more for LDREXD temps. In that case clobber r_ptr
-    // in LDREXD and recalculate it from r_base.
-    RegStorage r_temp = AllocTemp();
-    RegStorage r_temp_high = AllocTemp(false);  // We may not have another temp.
-    if (r_temp_high.Valid()) {
-      null_ck_insn = NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_temp_high.GetReg(), r_ptr.GetReg());
-      FreeTemp(r_temp_high);
-      FreeTemp(r_temp);
-    } else {
-      // If we don't have another temp, clobber r_ptr in LDREXD and reload it.
-      null_ck_insn = NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_ptr.GetReg(), r_ptr.GetReg());
-      FreeTemp(r_temp);  // May need the temp for kOpAdd.
-      OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
-    }
-    NewLIR4(kThumb2Strexd, r_temp.GetReg(), r_src.GetLowReg(), r_src.GetHighReg(), r_ptr.GetReg());
-    OpCmpImmBranch(kCondNe, r_temp, 0, fail_target);
-    FreeTemp(r_ptr);
-  } else {
-    // TODO: base this on target.
-    if (size == kWord) {
-      size = k32;
-    }
-
-    null_ck_insn = StoreBaseDispBody(r_base, displacement, r_src, size);
-  }
-
-  if (UNLIKELY(is_volatile == kVolatile)) {
-    // Preserve order with respect to any subsequent volatile loads.
-    // We need StoreLoad, but that generally requires the most expensive barrier.
-    GenMemBarrier(kAnyAny);
-  }
-
-  return null_ck_insn;
-}
-
-LIR* ArmMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
-  int opcode;
-  DCHECK_EQ(r_dest.IsDouble(), r_src.IsDouble());
-  if (r_dest.IsDouble()) {
-    opcode = kThumb2Vmovd;
-  } else {
-    if (r_dest.IsSingle()) {
-      opcode = r_src.IsSingle() ? kThumb2Vmovs : kThumb2Fmsr;
-    } else {
-      DCHECK(r_src.IsSingle());
-      opcode = kThumb2Fmrs;
-    }
-  }
-  LIR* res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
-  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
-    res->flags.is_nop = true;
-  }
-  return res;
-}
-
-LIR* ArmMir2Lir::OpMem(OpKind op ATTRIBUTE_UNUSED,
-                       RegStorage r_base ATTRIBUTE_UNUSED,
-                       int disp ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpMem for Arm";
-  UNREACHABLE();
-}
-
-LIR* ArmMir2Lir::InvokeTrampoline(OpKind op,
-                                  RegStorage r_tgt,
-                                  // The address of the trampoline is already loaded into r_tgt.
-                                  QuickEntrypointEnum trampoline ATTRIBUTE_UNUSED) {
-  return OpReg(op, r_tgt);
-}
-
-size_t ArmMir2Lir::GetInstructionOffset(LIR* lir) {
-  uint64_t check_flags = GetTargetInstFlags(lir->opcode);
-  DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE));
-  size_t offset = (check_flags & IS_TERTIARY_OP) ? lir->operands[2] : 0;
-
-  if (check_flags & SCALED_OFFSET_X2) {
-    offset = offset * 2;
-  } else if (check_flags & SCALED_OFFSET_X4) {
-    offset = offset * 4;
-  }
-  return offset;
-}
-
-void ArmMir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
-  // Start with the default counts.
-  Mir2Lir::CountRefs(core_counts, fp_counts, num_regs);
-
-  if (pc_rel_temp_ != nullptr) {
-    // Now, if the dex cache array base temp is used only once outside any loops (weight = 1),
-    // avoid the promotion, otherwise boost the weight by factor 3 because the full PC-relative
-    // load sequence is 4 instructions long and by promoting the PC base we save up to 3
-    // instructions per use.
-    int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low);
-    if (core_counts[p_map_idx].count == 1) {
-      core_counts[p_map_idx].count = 0;
-    } else {
-      core_counts[p_map_idx].count *= 3;
-    }
-  }
-}
-
-void ArmMir2Lir::DoPromotion() {
-  if (CanUseOpPcRelDexCacheArrayLoad()) {
-    pc_rel_temp_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false);
-  }
-
-  Mir2Lir::DoPromotion();
-
-  if (pc_rel_temp_ != nullptr) {
-    // Now, if the dex cache array base temp is promoted, remember the register but
-    // always remove the temp's stack location to avoid unnecessarily bloating the stack.
-    dex_cache_arrays_base_reg_ = mir_graph_->reg_location_[pc_rel_temp_->s_reg_low].reg;
-    DCHECK(!dex_cache_arrays_base_reg_.Valid() || !dex_cache_arrays_base_reg_.IsFloat());
-    mir_graph_->RemoveLastCompilerTemp(kCompilerTempBackend, false, pc_rel_temp_);
-    pc_rel_temp_ = nullptr;
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
deleted file mode 100644
index 2253d10..0000000
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ /dev/null
@@ -1,440 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
-#define ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
-
-#include "dex/compiler_enums.h"
-#include "dex/reg_location.h"
-#include "dex/reg_storage.h"
-
-namespace art {
-
-/*
- * Runtime register usage conventions.
- *
- * r0     : As in C/C++ w0 is 32-bit return register and x0 is 64-bit.
- * r0-r7  : Argument registers in both Dalvik and C/C++ conventions.
- *          However, for Dalvik->Dalvik calls we'll pass the target's Method*
- *          pointer in x0 as a hidden arg0. Otherwise used as codegen scratch
- *          registers.
- * r8-r15 : Caller save registers (used as temporary registers).
- * r16-r17: Also known as ip0-ip1, respectively. Used as scratch registers by
- *          the linker, by the trampolines and other stubs (the backend uses
- *          these as temporary registers).
- * r18    : Caller save register (used as temporary register).
- * r19    : (rxSELF) is reserved (pointer to thread-local storage).
- * r20-r29: Callee save registers (promotion targets).
- * r30    : (lr) is reserved (the link register).
- * rsp    : (sp) is reserved (the stack pointer).
- * rzr    : (zr) is reserved (the zero register).
- *
- * 19 core temps that codegen can use (r0-r18).
- * 9 core registers that can be used for promotion.
- *
- * Floating-point registers
- * v0-v31
- *
- * v0     : s0 is return register for singles (32-bit) and d0 for doubles (64-bit).
- *          This is analogous to the C/C++ (hard-float) calling convention.
- * v0-v7  : Floating-point argument registers in both Dalvik and C/C++ conventions.
- *          Also used as temporary and codegen scratch registers.
- *
- * v0-v7 and v16-v31 : trashed across C calls.
- * v8-v15 : bottom 64-bits preserved across C calls (d8-d15 are preserved).
- *
- * v16-v31: Used as codegen temp/scratch.
- * v8-v15 : Can be used for promotion.
- *
- * Calling convention (Hard-float)
- *     o On a call to a Dalvik method, pass target's Method* in x0
- *     o r1-r7, v0-v7 will be used for the first 7+8 arguments
- *     o Arguments which cannot be put in registers are placed in appropriate
- *       out slots by the caller.
- *     o Maintain a 16-byte stack alignment
- *
- *  Stack frame diagram (stack grows down, higher addresses at top):
- *
- * +--------------------------------------------+
- * | IN[ins-1]                                  |  {Note: resides in caller's frame}
- * |       .                                    |
- * | IN[0]                                      |
- * | caller's method ArtMethod*                 |  {Pointer sized reference}
- * +============================================+  {Note: start of callee's frame}
- * | spill region                               |  {variable sized - will include lr if non-leaf}
- * +--------------------------------------------+
- * |   ...filler word...                        |  {Note: used as 2nd word of V[locals-1] if long}
- * +--------------------------------------------+
- * | V[locals-1]                                |
- * | V[locals-2]                                |
- * |      .                                     |
- * |      .                                     |
- * | V[1]                                       |
- * | V[0]                                       |
- * +--------------------------------------------+
- * |   0 to 3 words padding                     |
- * +--------------------------------------------+
- * | OUT[outs-1]                                |
- * | OUT[outs-2]                                |
- * |       .                                    |
- * | OUT[0]                                     |
- * | current method ArtMethod*                  | <<== sp w/ 16-byte alignment
- * +============================================+
- */
-
-// First FP callee save.
-#define A64_FP_CALLEE_SAVE_BASE 8
-
-// Temporary macros, used to mark code which wants to distinguish betweek zr/sp.
-#define A64_REG_IS_SP(reg_num) ((reg_num) == rwsp || (reg_num) == rsp)
-#define A64_REG_IS_ZR(reg_num) ((reg_num) == rwzr || (reg_num) == rxzr)
-#define A64_REGSTORAGE_IS_SP_OR_ZR(rs) (((rs).GetRegNum() & 0x1f) == 0x1f)
-
-enum A64ResourceEncodingPos {
-  kA64GPReg0   = 0,
-  kA64RegLR    = 30,
-  kA64RegSP    = 31,
-  kA64FPReg0   = 32,
-  kA64RegEnd   = 64,
-};
-
-#define IS_SIGNED_IMM(size, value) \
-  ((value) >= -(1 << ((size) - 1)) && (value) < (1 << ((size) - 1)))
-#define IS_SIGNED_IMM7(value) IS_SIGNED_IMM(7, value)
-#define IS_SIGNED_IMM9(value) IS_SIGNED_IMM(9, value)
-#define IS_SIGNED_IMM12(value) IS_SIGNED_IMM(12, value)
-#define IS_SIGNED_IMM14(value) IS_SIGNED_IMM(14, value)
-#define IS_SIGNED_IMM19(value) IS_SIGNED_IMM(19, value)
-#define IS_SIGNED_IMM21(value) IS_SIGNED_IMM(21, value)
-#define IS_SIGNED_IMM26(value) IS_SIGNED_IMM(26, value)
-
-// Quick macro used to define the registers.
-#define A64_REGISTER_CODE_LIST(R) \
-  R(0)  R(1)  R(2)  R(3)  R(4)  R(5)  R(6)  R(7) \
-  R(8)  R(9)  R(10) R(11) R(12) R(13) R(14) R(15) \
-  R(16) R(17) R(18) R(19) R(20) R(21) R(22) R(23) \
-  R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31)
-
-// Registers (integer) values.
-enum A64NativeRegisterPool {  // private marker to avoid generate-operator-out.py from processing.
-#  define A64_DEFINE_REGISTERS(nr) \
-    rw##nr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | nr, \
-    rx##nr = RegStorage::k64BitSolo | RegStorage::kCoreRegister | nr, \
-    rf##nr = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | nr, \
-    rd##nr = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | nr,
-  A64_REGISTER_CODE_LIST(A64_DEFINE_REGISTERS)
-#undef A64_DEFINE_REGISTERS
-
-  rxzr = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 0x3f,
-  rwzr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0x3f,
-  rsp = rx31,
-  rwsp = rw31,
-
-  // Aliases which are not defined in "ARM Architecture Reference, register names".
-  rxIP0 = rx16,
-  rxIP1 = rx17,
-  rxSELF = rx19,
-  rxLR = rx30,
-  /*
-   * FIXME: It's a bit awkward to define both 32 and 64-bit views of these - we'll only ever use
-   * the 64-bit view. However, for now we'll define a 32-bit view to keep these from being
-   * allocated as 32-bit temp registers.
-   */
-  rwIP0 = rw16,
-  rwIP1 = rw17,
-  rwSELF = rw19,
-  rwLR = rw30,
-};
-
-#define A64_DEFINE_REGSTORAGES(nr) \
-  constexpr RegStorage rs_w##nr(RegStorage::kValid | rw##nr); \
-  constexpr RegStorage rs_x##nr(RegStorage::kValid | rx##nr); \
-  constexpr RegStorage rs_f##nr(RegStorage::kValid | rf##nr); \
-  constexpr RegStorage rs_d##nr(RegStorage::kValid | rd##nr);
-A64_REGISTER_CODE_LIST(A64_DEFINE_REGSTORAGES)
-#undef A64_DEFINE_REGSTORAGES
-
-constexpr RegStorage rs_xzr(RegStorage::kValid | rxzr);
-constexpr RegStorage rs_wzr(RegStorage::kValid | rwzr);
-constexpr RegStorage rs_xIP0(RegStorage::kValid | rxIP0);
-constexpr RegStorage rs_wIP0(RegStorage::kValid | rwIP0);
-constexpr RegStorage rs_xIP1(RegStorage::kValid | rxIP1);
-constexpr RegStorage rs_wIP1(RegStorage::kValid | rwIP1);
-// Reserved registers.
-constexpr RegStorage rs_xSELF(RegStorage::kValid | rxSELF);
-constexpr RegStorage rs_sp(RegStorage::kValid | rsp);
-constexpr RegStorage rs_xLR(RegStorage::kValid | rxLR);
-// TODO: eliminate the need for these.
-constexpr RegStorage rs_wSELF(RegStorage::kValid | rwSELF);
-constexpr RegStorage rs_wsp(RegStorage::kValid | rwsp);
-constexpr RegStorage rs_wLR(RegStorage::kValid | rwLR);
-
-// RegisterLocation templates return values (following the hard-float calling convention).
-const RegLocation a64_loc_c_return =
-    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_w0, INVALID_SREG, INVALID_SREG};
-const RegLocation a64_loc_c_return_ref =
-    {kLocPhysReg, 0, 0, 0, 0, 0, 1, 0, 1, rs_x0, INVALID_SREG, INVALID_SREG};
-const RegLocation a64_loc_c_return_wide =
-    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_x0, INVALID_SREG, INVALID_SREG};
-const RegLocation a64_loc_c_return_float =
-    {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, rs_f0, INVALID_SREG, INVALID_SREG};
-const RegLocation a64_loc_c_return_double =
-    {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, rs_d0, INVALID_SREG, INVALID_SREG};
-
-/**
- * @brief Shift-type to be applied to a register via EncodeShift().
- */
-enum A64ShiftEncodings {
-  kA64Lsl = 0x0,
-  kA64Lsr = 0x1,
-  kA64Asr = 0x2,
-  kA64Ror = 0x3
-};
-
-/**
- * @brief Extend-type to be applied to a register via EncodeExtend().
- */
-enum A64RegExtEncodings {
-  kA64Uxtb = 0x0,
-  kA64Uxth = 0x1,
-  kA64Uxtw = 0x2,
-  kA64Uxtx = 0x3,
-  kA64Sxtb = 0x4,
-  kA64Sxth = 0x5,
-  kA64Sxtw = 0x6,
-  kA64Sxtx = 0x7
-};
-
-#define ENCODE_NO_SHIFT (EncodeShift(kA64Lsl, 0))
-#define ENCODE_NO_EXTEND (EncodeExtend(kA64Uxtx, 0))
-/*
- * The following enum defines the list of supported A64 instructions by the
- * assembler. Their corresponding EncodingMap positions will be defined in
- * assemble_arm64.cc.
- */
-enum A64Opcode {
-  kA64First = 0,
-  kA64Adc3rrr = kA64First,  // adc [00011010000] rm[20-16] [000000] rn[9-5] rd[4-0].
-  kA64Add4RRdT,      // add [s001000100] imm_12[21-10] rn[9-5] rd[4-0].
-  kA64Add4rrro,      // add [00001011000] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
-  kA64Add4RRre,      // add [00001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
-  kA64Adr2xd,        // adr [0] immlo[30-29] [10000] immhi[23-5] rd[4-0].
-  kA64Adrp2xd,       // adrp [1] immlo[30-29] [10000] immhi[23-5] rd[4-0].
-  kA64And3Rrl,       // and [00010010] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
-  kA64And4rrro,      // and [00001010] shift[23-22] [N=0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
-  kA64Asr3rrd,       // asr [0001001100] immr[21-16] imms[15-10] rn[9-5] rd[4-0].
-  kA64Asr3rrr,       // asr alias of "sbfm arg0, arg1, arg2, {#31/#63}".
-  kA64B2ct,          // b.cond [01010100] imm_19[23-5] [0] cond[3-0].
-  kA64Blr1x,         // blr [1101011000111111000000] rn[9-5] [00000].
-  kA64Br1x,          // br  [1101011000011111000000] rn[9-5] [00000].
-  kA64Bl1t,          // bl  [100101] imm26[25-0].
-  kA64Brk1d,         // brk [11010100001] imm_16[20-5] [00000].
-  kA64B1t,           // b   [00010100] offset_26[25-0].
-  kA64Cbnz2rt,       // cbnz[00110101] imm_19[23-5] rt[4-0].
-  kA64Cbz2rt,        // cbz [00110100] imm_19[23-5] rt[4-0].
-  kA64Cmn3rro,       // cmn [s0101011] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] [11111].
-  kA64Cmn3Rre,       // cmn [s0101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
-  kA64Cmn3RdT,       // cmn [00110001] shift[23-22] imm_12[21-10] rn[9-5] [11111].
-  kA64Cmp3rro,       // cmp [s1101011] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] [11111].
-  kA64Cmp3Rre,       // cmp [s1101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
-  kA64Cmp3RdT,       // cmp [01110001] shift[23-22] imm_12[21-10] rn[9-5] [11111].
-  kA64Csel4rrrc,     // csel[s0011010100] rm[20-16] cond[15-12] [00] rn[9-5] rd[4-0].
-  kA64Csinc4rrrc,    // csinc [s0011010100] rm[20-16] cond[15-12] [01] rn[9-5] rd[4-0].
-  kA64Csinv4rrrc,    // csinv [s1011010100] rm[20-16] cond[15-12] [00] rn[9-5] rd[4-0].
-  kA64Csneg4rrrc,    // csneg [s1011010100] rm[20-16] cond[15-12] [01] rn[9-5] rd[4-0].
-  kA64Dmb1B,         // dmb [11010101000000110011] CRm[11-8] [10111111].
-  kA64Eor3Rrl,       // eor [s10100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
-  kA64Eor4rrro,      // eor [s1001010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
-  kA64Extr4rrrd,     // extr[s00100111N0] rm[20-16] imm_s[15-10] rn[9-5] rd[4-0].
-  kA64Fabs2ff,       // fabs[000111100s100000110000] rn[9-5] rd[4-0].
-  kA64Fadd3fff,      // fadd[000111100s1] rm[20-16] [001010] rn[9-5] rd[4-0].
-  kA64Fcmp1f,        // fcmp[000111100s100000001000] rn[9-5] [01000].
-  kA64Fcmp2ff,       // fcmp[000111100s1] rm[20-16] [001000] rn[9-5] [00000].
-  kA64Fcvtzs2wf,     // fcvtzs [000111100s111000000000] rn[9-5] rd[4-0].
-  kA64Fcvtzs2xf,     // fcvtzs [100111100s111000000000] rn[9-5] rd[4-0].
-  kA64Fcvt2Ss,       // fcvt   [0001111000100010110000] rn[9-5] rd[4-0].
-  kA64Fcvt2sS,       // fcvt   [0001111001100010010000] rn[9-5] rd[4-0].
-  kA64Fcvtms2ws,     // fcvtms [0001111000110000000000] rn[9-5] rd[4-0].
-  kA64Fcvtms2xS,     // fcvtms [1001111001110000000000] rn[9-5] rd[4-0].
-  kA64Fdiv3fff,      // fdiv[000111100s1] rm[20-16] [000110] rn[9-5] rd[4-0].
-  kA64Fmax3fff,      // fmax[000111100s1] rm[20-16] [010010] rn[9-5] rd[4-0].
-  kA64Fmin3fff,      // fmin[000111100s1] rm[20-16] [010110] rn[9-5] rd[4-0].
-  kA64Fmov2ff,       // fmov[000111100s100000010000] rn[9-5] rd[4-0].
-  kA64Fmov2fI,       // fmov[000111100s1] imm_8[20-13] [10000000] rd[4-0].
-  kA64Fmov2sw,       // fmov[0001111000100111000000] rn[9-5] rd[4-0].
-  kA64Fmov2Sx,       // fmov[1001111001100111000000] rn[9-5] rd[4-0].
-  kA64Fmov2ws,       // fmov[0001111001101110000000] rn[9-5] rd[4-0].
-  kA64Fmov2xS,       // fmov[1001111001101111000000] rn[9-5] rd[4-0].
-  kA64Fmul3fff,      // fmul[000111100s1] rm[20-16] [000010] rn[9-5] rd[4-0].
-  kA64Fneg2ff,       // fneg[000111100s100001010000] rn[9-5] rd[4-0].
-  kA64Frintp2ff,     // frintp [000111100s100100110000] rn[9-5] rd[4-0].
-  kA64Frintm2ff,     // frintm [000111100s100101010000] rn[9-5] rd[4-0].
-  kA64Frintn2ff,     // frintn [000111100s100100010000] rn[9-5] rd[4-0].
-  kA64Frintz2ff,     // frintz [000111100s100101110000] rn[9-5] rd[4-0].
-  kA64Fsqrt2ff,      // fsqrt[000111100s100001110000] rn[9-5] rd[4-0].
-  kA64Fsub3fff,      // fsub[000111100s1] rm[20-16] [001110] rn[9-5] rd[4-0].
-  kA64Ldrb3wXd,      // ldrb[0011100101] imm_12[21-10] rn[9-5] rt[4-0].
-  kA64Ldrb3wXx,      // ldrb[00111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
-  kA64Ldrsb3rXd,     // ldrsb[001110011s] imm_12[21-10] rn[9-5] rt[4-0].
-  kA64Ldrsb3rXx,     // ldrsb[0011 1000 1s1] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
-  kA64Ldrh3wXF,      // ldrh[0111100101] imm_12[21-10] rn[9-5] rt[4-0].
-  kA64Ldrh4wXxd,     // ldrh[01111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
-  kA64Ldrsh3rXF,     // ldrsh[011110011s] imm_12[21-10] rn[9-5] rt[4-0].
-  kA64Ldrsh4rXxd,    // ldrsh[011110001s1] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0]
-  kA64Ldr2fp,        // ldr [0s011100] imm_19[23-5] rt[4-0].
-  kA64Ldr2rp,        // ldr [0s011000] imm_19[23-5] rt[4-0].
-  kA64Ldr3fXD,       // ldr [1s11110100] imm_12[21-10] rn[9-5] rt[4-0].
-  kA64Ldr3rXD,       // ldr [1s111000010] imm_9[20-12] [01] rn[9-5] rt[4-0].
-  kA64Ldr4fXxG,      // ldr [1s111100011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
-  kA64Ldr4rXxG,      // ldr [1s111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
-  kA64LdrPost3rXd,   // ldr [1s111000010] imm_9[20-12] [01] rn[9-5] rt[4-0].
-  kA64Ldp4ffXD,      // ldp [0s10110101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
-  kA64Ldp4rrXD,      // ldp [s010100101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
-  kA64LdpPost4rrXD,  // ldp [s010100011] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
-  kA64Ldur3fXd,      // ldur[1s111100010] imm_9[20-12] [00] rn[9-5] rt[4-0].
-  kA64Ldur3rXd,      // ldur[1s111000010] imm_9[20-12] [00] rn[9-5] rt[4-0].
-  kA64Ldxr2rX,       // ldxr[1s00100001011111011111] rn[9-5] rt[4-0].
-  kA64Ldaxr2rX,      // ldaxr[1s00100001011111111111] rn[9-5] rt[4-0].
-  kA64Lsl3rrr,       // lsl [s0011010110] rm[20-16] [001000] rn[9-5] rd[4-0].
-  kA64Lsr3rrd,       // lsr alias of "ubfm arg0, arg1, arg2, #{31/63}".
-  kA64Lsr3rrr,       // lsr [s0011010110] rm[20-16] [001001] rn[9-5] rd[4-0].
-  kA64Madd4rrrr,     // madd[s0011011000] rm[20-16] [0] ra[14-10] rn[9-5] rd[4-0].
-  kA64Movk3rdM,      // mov [010100101] hw[22-21] imm_16[20-5] rd[4-0].
-  kA64Movn3rdM,      // mov [000100101] hw[22-21] imm_16[20-5] rd[4-0].
-  kA64Movz3rdM,      // mov [011100101] hw[22-21] imm_16[20-5] rd[4-0].
-  kA64Mov2rr,        // mov [00101010000] rm[20-16] [000000] [11111] rd[4-0].
-  kA64Mvn2rr,        // mov [00101010001] rm[20-16] [000000] [11111] rd[4-0].
-  kA64Mul3rrr,       // mul [00011011000] rm[20-16] [011111] rn[9-5] rd[4-0].
-  kA64Msub4rrrr,     // msub[s0011011000] rm[20-16] [1] ra[14-10] rn[9-5] rd[4-0].
-  kA64Neg3rro,       // neg alias of "sub arg0, rzr, arg1, arg2".
-  kA64Nop0,          // nop alias of "hint #0" [11010101000000110010000000011111].
-  kA64Orr3Rrl,       // orr [s01100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
-  kA64Orr4rrro,      // orr [s0101010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
-  kA64Ret,           // ret [11010110010111110000001111000000].
-  kA64Rbit2rr,       // rbit [s101101011000000000000] rn[9-5] rd[4-0].
-  kA64Rev2rr,        // rev [s10110101100000000001x] rn[9-5] rd[4-0].
-  kA64Rev162rr,      // rev16[s101101011000000000001] rn[9-5] rd[4-0].
-  kA64Ror3rrr,       // ror [s0011010110] rm[20-16] [001011] rn[9-5] rd[4-0].
-  kA64Sbc3rrr,       // sbc [s0011010000] rm[20-16] [000000] rn[9-5] rd[4-0].
-  kA64Sbfm4rrdd,     // sbfm[0001001100] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
-  kA64Scvtf2fw,      // scvtf  [000111100s100010000000] rn[9-5] rd[4-0].
-  kA64Scvtf2fx,      // scvtf  [100111100s100010000000] rn[9-5] rd[4-0].
-  kA64Sdiv3rrr,      // sdiv[s0011010110] rm[20-16] [000011] rn[9-5] rd[4-0].
-  kA64Smull3xww,     // smull [10011011001] rm[20-16] [011111] rn[9-5] rd[4-0].
-  kA64Smulh3xxx,     // smulh [10011011010] rm[20-16] [011111] rn[9-5] rd[4-0].
-  kA64Stp4ffXD,      // stp [0s10110100] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
-  kA64Stp4rrXD,      // stp [s010100100] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
-  kA64StpPost4rrXD,  // stp [s010100010] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
-  kA64StpPre4ffXD,   // stp [0s10110110] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
-  kA64StpPre4rrXD,   // stp [s010100110] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
-  kA64Str3fXD,       // str [1s11110100] imm_12[21-10] rn[9-5] rt[4-0].
-  kA64Str4fXxG,      // str [1s111100001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
-  kA64Str3rXD,       // str [1s11100100] imm_12[21-10] rn[9-5] rt[4-0].
-  kA64Str4rXxG,      // str [1s111000001] rm[20-16] option[15-13] S[12-12] [10] rn[9-5] rt[4-0].
-  kA64Strb3wXd,      // strb[0011100100] imm_12[21-10] rn[9-5] rt[4-0].
-  kA64Strb3wXx,      // strb[00111000001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
-  kA64Strh3wXF,      // strh[0111100100] imm_12[21-10] rn[9-5] rt[4-0].
-  kA64Strh4wXxd,     // strh[01111000001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
-  kA64StrPost3rXd,   // str [1s111000000] imm_9[20-12] [01] rn[9-5] rt[4-0].
-  kA64Stur3fXd,      // stur[1s111100000] imm_9[20-12] [00] rn[9-5] rt[4-0].
-  kA64Stur3rXd,      // stur[1s111000000] imm_9[20-12] [00] rn[9-5] rt[4-0].
-  kA64Stxr3wrX,      // stxr[11001000000] rs[20-16] [011111] rn[9-5] rt[4-0].
-  kA64Stlxr3wrX,     // stlxr[11001000000] rs[20-16] [111111] rn[9-5] rt[4-0].
-  kA64Sub4RRdT,      // sub [s101000100] imm_12[21-10] rn[9-5] rd[4-0].
-  kA64Sub4rrro,      // sub [s1001011000] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
-  kA64Sub4RRre,      // sub [s1001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
-  kA64Subs3rRd,      // subs[s111000100] imm_12[21-10] rn[9-5] rd[4-0].
-  kA64Tst2rl,        // tst alias of "ands rzr, rn, #imm".
-  kA64Tst3rro,       // tst alias of "ands rzr, arg1, arg2, arg3".
-  kA64Tbnz3rht,      // tbnz imm_6_b5[31] [0110111] imm_6_b40[23-19] imm_14[18-5] rt[4-0].
-  kA64Tbz3rht,       // tbz imm_6_b5[31] [0110110] imm_6_b40[23-19] imm_14[18-5] rt[4-0].
-  kA64Ubfm4rrdd,     // ubfm[s10100110] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
-  kA64Last,
-  kA64NotWide = kA64First,  // 0 - Flag used to select the first instruction variant.
-  kA64Wide = 0x1000         // Flag used to select the second instruction variant.
-};
-std::ostream& operator<<(std::ostream& os, const A64Opcode& rhs);
-
-/*
- * The A64 instruction set provides two variants for many instructions. For example, "mov wN, wM"
- * and "mov xN, xM" or - for floating point instructions - "mov sN, sM" and "mov dN, dM".
- * It definitely makes sense to exploit this symmetries of the instruction set. We do this via the
- * WIDE, UNWIDE macros. For opcodes that allow it, the wide variant can be obtained by applying the
- * WIDE macro to the non-wide opcode. E.g. WIDE(kA64Sub4RRdT).
- */
-
-// Return the wide and no-wide variants of the given opcode.
-#define WIDE(op) ((A64Opcode)((op) | kA64Wide))
-#define UNWIDE(op) ((A64Opcode)((op) & ~kA64Wide))
-
-// Whether the given opcode is wide.
-#define IS_WIDE(op) (((op) & kA64Wide) != 0)
-
-enum A64OpDmbOptions {
-  kSY = 0xf,
-  kST = 0xe,
-  kISH = 0xb,
-  kISHST = 0xa,
-  kISHLD = 0x9,
-  kNSH = 0x7,
-  kNSHST = 0x6
-};
-
-// Instruction assembly field_loc kind.
-enum A64EncodingKind {
-  // All the formats below are encoded in the same way (as a kFmtBitBlt).
-  // These are grouped together, for fast handling (e.g. "if (LIKELY(fmt <= kFmtBitBlt)) ...").
-  kFmtRegW = 0,   // Word register (w) or wzr.
-  kFmtRegX,       // Extended word register (x) or xzr.
-  kFmtRegR,       // Register with same width as the instruction or zr.
-  kFmtRegWOrSp,   // Word register (w) or wsp.
-  kFmtRegXOrSp,   // Extended word register (x) or sp.
-  kFmtRegROrSp,   // Register with same width as the instruction or sp.
-  kFmtRegS,       // Single FP reg.
-  kFmtRegD,       // Double FP reg.
-  kFmtRegF,       // Single/double FP reg depending on the instruction width.
-  kFmtBitBlt,     // Bit string using end/start.
-
-  // Less likely formats.
-  kFmtUnused,     // Unused field and marks end of formats.
-  kFmtImm6Shift,  // Shift immediate, 6-bit at [31, 23..19].
-  kFmtImm21,      // Sign-extended immediate using [23..5,30..29].
-  kFmtShift,      // Register shift, 9-bit at [23..21, 15..10]..
-  kFmtExtend,     // Register extend, 9-bit at [23..21, 15..10].
-  kFmtSkip,       // Unused field, but continue to next.
-};
-std::ostream& operator<<(std::ostream& os, const A64EncodingKind & rhs);
-
-// Struct used to define the snippet positions for each A64 opcode.
-struct A64EncodingMap {
-  uint32_t wskeleton;
-  uint32_t xskeleton;
-  struct {
-    A64EncodingKind kind;
-    int end;         // end for kFmtBitBlt, 1-bit slice end for FP regs.
-    int start;       // start for kFmtBitBlt, 4-bit slice end for FP regs.
-  } field_loc[4];
-  A64Opcode opcode;  // can be WIDE()-ned to indicate it has a wide variant.
-  uint64_t flags;
-  const char* name;
-  const char* fmt;
-  int size;          // Note: size is in bytes.
-  FixupKind fixup;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
deleted file mode 100644
index 25c69d1..0000000
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ /dev/null
@@ -1,1152 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "codegen_arm64.h"
-
-#include "arch/arm64/instruction_set_features_arm64.h"
-#include "arm64_lir.h"
-#include "base/logging.h"
-#include "dex/compiler_ir.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "driver/compiler_driver.h"
-
-namespace art {
-
-// The macros below are exclusively used in the encoding map.
-
-// Most generic way of providing two variants for one instructions.
-#define CUSTOM_VARIANTS(variant1, variant2) variant1, variant2
-
-// Used for instructions which do not have a wide variant.
-#define NO_VARIANTS(variant) \
-  CUSTOM_VARIANTS(variant, 0)
-
-// Used for instructions which have a wide variant with the sf bit set to 1.
-#define SF_VARIANTS(sf0_skeleton) \
-  CUSTOM_VARIANTS(sf0_skeleton, (sf0_skeleton | 0x80000000))
-
-// Used for instructions which have a wide variant with the size bits set to either x0 or x1.
-#define SIZE_VARIANTS(sizex0_skeleton) \
-  CUSTOM_VARIANTS(sizex0_skeleton, (sizex0_skeleton | 0x40000000))
-
-// Used for instructions which have a wide variant with the sf and n bits set to 1.
-#define SF_N_VARIANTS(sf0_n0_skeleton) \
-  CUSTOM_VARIANTS(sf0_n0_skeleton, (sf0_n0_skeleton | 0x80400000))
-
-// Used for FP instructions which have a single and double precision variants, with he type bits set
-// to either 00 or 01.
-#define FLOAT_VARIANTS(type00_skeleton) \
-  CUSTOM_VARIANTS(type00_skeleton, (type00_skeleton | 0x00400000))
-
-/*
- * opcode: A64Opcode enum
- * variants: instruction skeletons supplied via CUSTOM_VARIANTS or derived macros.
- * a{n}k: key to applying argument {n}    \
- * a{n}s: argument {n} start bit position | n = 0, 1, 2, 3
- * a{n}e: argument {n} end bit position   /
- * flags: instruction attributes (used in optimization)
- * name: mnemonic name
- * fmt: for pretty-printing
- * fixup: used for second-pass fixes (e.g. adresses fixups in branch instructions).
- */
-#define ENCODING_MAP(opcode, variants, a0k, a0s, a0e, a1k, a1s, a1e, a2k, a2s, a2e, \
-                     a3k, a3s, a3e, flags, name, fmt, fixup) \
-        {variants, {{a0k, a0s, a0e}, {a1k, a1s, a1e}, {a2k, a2s, a2e}, \
-                    {a3k, a3s, a3e}}, opcode, flags, name, fmt, 4, fixup}
-
-/* Instruction dump string format keys: !pf, where "!" is the start
- * of the key, "p" is which numeric operand to use and "f" is the
- * print format.
- *
- * [p]ositions:
- *     0 -> operands[0] (dest)
- *     1 -> operands[1] (src1)
- *     2 -> operands[2] (src2)
- *     3 -> operands[3] (extra)
- *
- * [f]ormats:
- *     d -> decimal
- *     D -> decimal*4 or decimal*8 depending on the instruction width
- *     E -> decimal*4
- *     F -> decimal*2
- *     G -> ", lsl #2" or ", lsl #3" depending on the instruction width
- *     c -> branch condition (eq, ne, etc.)
- *     t -> pc-relative target
- *     p -> pc-relative address
- *     s -> single precision floating point register
- *     S -> double precision floating point register
- *     f -> single or double precision register (depending on instruction width)
- *     I -> 8-bit immediate floating point number
- *     l -> logical immediate
- *     M -> 16-bit shift expression ("" or ", lsl #16" or ", lsl #32"...)
- *     B -> dmb option string (sy, st, ish, ishst, nsh, hshst)
- *     H -> operand shift
- *     h -> 6-bit shift immediate
- *     T -> register shift (either ", lsl #0" or ", lsl #12")
- *     e -> register extend (e.g. uxtb #1)
- *     o -> register shift (e.g. lsl #1) for Word registers
- *     w -> word (32-bit) register wn, or wzr
- *     W -> word (32-bit) register wn, or wsp
- *     x -> extended (64-bit) register xn, or xzr
- *     X -> extended (64-bit) register xn, or sp
- *     r -> register with same width as instruction, r31 -> wzr, xzr
- *     R -> register with same width as instruction, r31 -> wsp, sp
- *
- *  [!] escape.  To insert "!", use "!!"
- */
-/* NOTE: must be kept in sync with enum A64Opcode from arm64_lir.h */
-const A64EncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
-    ENCODING_MAP(WIDE(kA64Adc3rrr), SF_VARIANTS(0x1a000000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | USES_CCODES,
-                 "adc", "!0r, !1r, !2r", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Add4RRdT), SF_VARIANTS(0x11000000),
-                 kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtBitBlt, 23, 22, IS_QUAD_OP | REG_DEF0_USE1,
-                 "add", "!0R, !1R, #!2d!3T", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Add4rrro), SF_VARIANTS(0x0b000000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "add", "!0r, !1r, !2r!3o", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Add4RRre), SF_VARIANTS(0x0b200000),
-                 kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16,
-                 kFmtExtend, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "add", "!0r, !1r, !2r!3e", kFixupNone),
-    // Note: adr is binary, but declared as tertiary. The third argument is used while doing the
-    //   fixups and contains information to identify the adr label.
-    ENCODING_MAP(kA64Adr2xd, NO_VARIANTS(0x10000000),
-                 kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP,
-                 "adr", "!0x, #!1d", kFixupAdr),
-    ENCODING_MAP(kA64Adrp2xd, NO_VARIANTS(0x90000000),
-                 kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP,
-                 "adrp", "!0x, #!1d", kFixupLabel),
-    ENCODING_MAP(WIDE(kA64And3Rrl), SF_VARIANTS(0x12000000),
-                 kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "and", "!0R, !1r, #!2l", kFixupNone),
-    ENCODING_MAP(WIDE(kA64And4rrro), SF_VARIANTS(0x0a000000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "and", "!0r, !1r, !2r!3o", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Asr3rrd), CUSTOM_VARIANTS(0x13007c00, 0x9340fc00),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "asr", "!0r, !1r, #!2d", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Asr3rrr), SF_VARIANTS(0x1ac02800),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "asr", "!0r, !1r, !2r", kFixupNone),
-    ENCODING_MAP(kA64B2ct, NO_VARIANTS(0x54000000),
-                 kFmtBitBlt, 3, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES |
-                 NEEDS_FIXUP, "b.!0c", "!1t", kFixupCondBranch),
-    ENCODING_MAP(kA64Blr1x, NO_VARIANTS(0xd63f0000),
-                 kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR,
-                 "blr", "!0x", kFixupNone),
-    ENCODING_MAP(kA64Br1x, NO_VARIANTS(0xd61f0000),
-                 kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | IS_BRANCH,
-                 "br", "!0x", kFixupNone),
-    ENCODING_MAP(kA64Bl1t, NO_VARIANTS(0x94000000),
-                 kFmtBitBlt, 25, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
-                 "bl", "!0T", kFixupLabel),
-    ENCODING_MAP(kA64Brk1d, NO_VARIANTS(0xd4200000),
-                 kFmtBitBlt, 20, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
-                 "brk", "!0d", kFixupNone),
-    ENCODING_MAP(kA64B1t, NO_VARIANTS(0x14000000),
-                 kFmtBitBlt, 25, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP,
-                 "b", "!0t", kFixupT1Branch),
-    ENCODING_MAP(WIDE(kA64Cbnz2rt), SF_VARIANTS(0x35000000),
-                 kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP,
-                 "cbnz", "!0r, !1t", kFixupCBxZ),
-    ENCODING_MAP(WIDE(kA64Cbz2rt), SF_VARIANTS(0x34000000),
-                 kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP,
-                 "cbz", "!0r, !1t", kFixupCBxZ),
-    ENCODING_MAP(WIDE(kA64Cmn3rro), SF_VARIANTS(0x2b00001f),
-                 kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmn", "!0r, !1r!2o", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Cmn3Rre), SF_VARIANTS(0x2b20001f),
-                 kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtExtend, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmn", "!0R, !1r!2e", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Cmn3RdT), SF_VARIANTS(0x3100001f),
-                 kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
-                 "cmn", "!0R, #!1d!2T", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Cmp3rro), SF_VARIANTS(0x6b00001f),
-                 kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0r, !1r!2o", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Cmp3Rre), SF_VARIANTS(0x6b20001f),
-                 kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtExtend, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0R, !1r!2e", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Cmp3RdT), SF_VARIANTS(0x7100001f),
-                 kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
-                 "cmp", "!0R, #!1d!2T", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Csel4rrrc), SF_VARIANTS(0x1a800000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
-                 "csel", "!0r, !1r, !2r, !3c", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Csinc4rrrc), SF_VARIANTS(0x1a800400),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
-                 "csinc", "!0r, !1r, !2r, !3c", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Csinv4rrrc), SF_VARIANTS(0x5a800000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
-                 "csinv", "!0r, !1r, !2r, !3c", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Csneg4rrrc), SF_VARIANTS(0x5a800400),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
-                 "csneg", "!0r, !1r, !2r, !3c", kFixupNone),
-    ENCODING_MAP(kA64Dmb1B, NO_VARIANTS(0xd50330bf),
-                 kFmtBitBlt, 11, 8, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_VOLATILE,
-                 "dmb", "#!0B", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Eor3Rrl), SF_VARIANTS(0x52000000),
-                 kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "eor", "!0R, !1r, #!2l", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Eor4rrro), SF_VARIANTS(0x4a000000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "eor", "!0r, !1r, !2r!3o", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Extr4rrrd), SF_N_VARIANTS(0x13800000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE12,
-                 "extr", "!0r, !1r, !2r, #!3d", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Fabs2ff), FLOAT_VARIANTS(0x1e20c000),
-                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP| REG_DEF0_USE1,
-                 "fabs", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Fadd3fff), FLOAT_VARIANTS(0x1e202800),
-                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "fadd", "!0f, !1f, !2f", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Fcmp1f), FLOAT_VARIANTS(0x1e202008),
-                 kFmtRegF, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | SETS_CCODES,
-                 "fcmp", "!0f, #0", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Fcmp2ff), FLOAT_VARIANTS(0x1e202000),
-                 kFmtRegF, 9, 5, kFmtRegF, 20, 16, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "fcmp", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Fcvtzs2wf), FLOAT_VARIANTS(0x1e380000),
-                 kFmtRegW, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fcvtzs", "!0w, !1f", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Fcvtzs2xf), FLOAT_VARIANTS(0x9e380000),
-                 kFmtRegX, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fcvtzs", "!0x, !1f", kFixupNone),
-    ENCODING_MAP(kA64Fcvt2Ss, NO_VARIANTS(0x1e22C000),
-                 kFmtRegD, 4, 0, kFmtRegS, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fcvt", "!0S, !1s", kFixupNone),
-    ENCODING_MAP(kA64Fcvt2sS, NO_VARIANTS(0x1e624000),
-                 kFmtRegS, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fcvt", "!0s, !1S", kFixupNone),
-    ENCODING_MAP(kA64Fcvtms2ws, NO_VARIANTS(0x1e300000),
-                 kFmtRegW, 4, 0, kFmtRegS, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fcvtms", "!0w, !1s", kFixupNone),
-    ENCODING_MAP(kA64Fcvtms2xS, NO_VARIANTS(0x9e700000),
-                 kFmtRegX, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fcvtms", "!0x, !1S", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Fdiv3fff), FLOAT_VARIANTS(0x1e201800),
-                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "fdiv", "!0f, !1f, !2f", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Fmax3fff), FLOAT_VARIANTS(0x1e204800),
-                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "fmax", "!0f, !1f, !2f", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Fmin3fff), FLOAT_VARIANTS(0x1e205800),
-                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "fmin", "!0f, !1f, !2f", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Fmov2ff), FLOAT_VARIANTS(0x1e204000),
-                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
-                 "fmov", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Fmov2fI), FLOAT_VARIANTS(0x1e201000),
-                 kFmtRegF, 4, 0, kFmtBitBlt, 20, 13, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "fmov", "!0f, #!1I", kFixupNone),
-    ENCODING_MAP(kA64Fmov2sw, NO_VARIANTS(0x1e270000),
-                 kFmtRegS, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fmov", "!0s, !1w", kFixupNone),
-    ENCODING_MAP(kA64Fmov2Sx, NO_VARIANTS(0x9e670000),
-                 kFmtRegD, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fmov", "!0S, !1x", kFixupNone),
-    ENCODING_MAP(kA64Fmov2ws, NO_VARIANTS(0x1e260000),
-                 kFmtRegW, 4, 0, kFmtRegS, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fmov", "!0w, !1s", kFixupNone),
-    ENCODING_MAP(kA64Fmov2xS, NO_VARIANTS(0x9e660000),
-                 kFmtRegX, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fmov", "!0x, !1S", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Fmul3fff), FLOAT_VARIANTS(0x1e200800),
-                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "fmul", "!0f, !1f, !2f", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Fneg2ff), FLOAT_VARIANTS(0x1e214000),
-                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fneg", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Frintp2ff), FLOAT_VARIANTS(0x1e24c000),
-                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "frintp", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Frintm2ff), FLOAT_VARIANTS(0x1e254000),
-                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "frintm", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Frintn2ff), FLOAT_VARIANTS(0x1e244000),
-                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "frintn", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Frintz2ff), FLOAT_VARIANTS(0x1e25c000),
-                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "frintz", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Fsqrt2ff), FLOAT_VARIANTS(0x1e61c000),
-                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fsqrt", "!0f, !1f", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Fsub3fff), FLOAT_VARIANTS(0x1e203800),
-                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "fsub", "!0f, !1f, !2f", kFixupNone),
-    ENCODING_MAP(kA64Ldrb3wXd, NO_VARIANTS(0x39400000),
-                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
-                 "ldrb", "!0w, [!1X, #!2d]", kFixupNone),
-    ENCODING_MAP(kA64Ldrb3wXx, NO_VARIANTS(0x38606800),
-                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrb", "!0w, [!1X, !2x]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Ldrsb3rXd), CUSTOM_VARIANTS(0x39c00000, 0x39800000),
-                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
-                 "ldrsb", "!0r, [!1X, #!2d]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Ldrsb3rXx), CUSTOM_VARIANTS(0x38e06800, 0x38a06800),
-                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrsb", "!0r, [!1X, !2x]", kFixupNone),
-    ENCODING_MAP(kA64Ldrh3wXF, NO_VARIANTS(0x79400000),
-                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
-                 "ldrh", "!0w, [!1X, #!2F]", kFixupNone),
-    ENCODING_MAP(kA64Ldrh4wXxd, NO_VARIANTS(0x78606800),
-                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
-                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
-                 "ldrh", "!0w, [!1X, !2x, lsl #!3d]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Ldrsh3rXF), CUSTOM_VARIANTS(0x79c00000, 0x79800000),
-                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
-                 "ldrsh", "!0r, [!1X, #!2F]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Ldrsh4rXxd), CUSTOM_VARIANTS(0x78e06800, 0x78906800),
-                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
-                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
-                 "ldrsh", "!0r, [!1X, !2x, lsl #!3d]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Ldr2fp), SIZE_VARIANTS(0x1c000000),
-                 kFmtRegF, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
-                 "ldr", "!0f, !1p", kFixupLoad),
-    ENCODING_MAP(WIDE(kA64Ldr2rp), SIZE_VARIANTS(0x18000000),
-                 kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
-                 "ldr", "!0r, !1p", kFixupLoad),
-    ENCODING_MAP(WIDE(kA64Ldr3fXD), SIZE_VARIANTS(0xbd400000),
-                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
-                 "ldr", "!0f, [!1X, #!2D]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Ldr3rXD), SIZE_VARIANTS(0xb9400000),
-                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
-                 "ldr", "!0r, [!1X, #!2D]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Ldr4fXxG), SIZE_VARIANTS(0xbc606800),
-                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
-                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldr", "!0f, [!1X, !2x!3G]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Ldr4rXxG), SIZE_VARIANTS(0xb8606800),
-                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
-                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldr", "!0r, [!1X, !2x!3G]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64LdrPost3rXd), SIZE_VARIANTS(0xb8400400),
-                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01 | REG_USE1 | IS_LOAD,
-                 "ldr", "!0r, [!1X], #!2d", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Ldp4ffXD), CUSTOM_VARIANTS(0x2d400000, 0x6d400000),
-                 kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD_OFF,
-                 "ldp", "!0f, !1f, [!2X, #!3D]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Ldp4rrXD), SF_VARIANTS(0x29400000),
-                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD_OFF,
-                 "ldp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64LdpPost4rrXD), CUSTOM_VARIANTS(0x28c00000, 0xa8c00000),
-                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF012 | IS_LOAD,
-                 "ldp", "!0r, !1r, [!2X], #!3D", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Ldur3fXd), CUSTOM_VARIANTS(0xbc400000, 0xfc400000),
-                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldur", "!0f, [!1X, #!2d]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Ldur3rXd), SIZE_VARIANTS(0xb8400000),
-                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldur", "!0r, [!1X, #!2d]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Ldxr2rX), SIZE_VARIANTS(0x885f7c00),
-                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOADX,
-                 "ldxr", "!0r, [!1X]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Ldaxr2rX), SIZE_VARIANTS(0x885ffc00),
-                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOADX,
-                 "ldaxr", "!0r, [!1X]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Lsl3rrr), SF_VARIANTS(0x1ac02000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "lsl", "!0r, !1r, !2r", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Lsr3rrd), CUSTOM_VARIANTS(0x53007c00, 0xd340fc00),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "lsr", "!0r, !1r, #!2d", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Lsr3rrr), SF_VARIANTS(0x1ac02400),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "lsr", "!0r, !1r, !2r", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Madd4rrrr), SF_VARIANTS(0x1b000000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtRegR, 14, 10, IS_QUAD_OP | REG_DEF0_USE123 | NEEDS_FIXUP,
-                 "madd", "!0r, !1r, !2r, !3r", kFixupA53Erratum835769),
-    ENCODING_MAP(WIDE(kA64Movk3rdM), SF_VARIANTS(0x72800000),
-                 kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE0,
-                 "movk", "!0r, #!1d!2M", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Movn3rdM), SF_VARIANTS(0x12800000),
-                 kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
-                 "movn", "!0r, #!1d!2M", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Movz3rdM), SF_VARIANTS(0x52800000),
-                 kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
-                 "movz", "!0r, #!1d!2M", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Mov2rr), SF_VARIANTS(0x2a0003e0),
-                 kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
-                 "mov", "!0r, !1r", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Mvn2rr), SF_VARIANTS(0x2a2003e0),
-                 kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mvn", "!0r, !1r", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Mul3rrr), SF_VARIANTS(0x1b007c00),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "mul", "!0r, !1r, !2r", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Msub4rrrr), SF_VARIANTS(0x1b008000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtRegR, 14, 10, IS_QUAD_OP | REG_DEF0_USE123 | NEEDS_FIXUP,
-                 "msub", "!0r, !1r, !2r, !3r", kFixupA53Erratum835769),
-    ENCODING_MAP(WIDE(kA64Neg3rro), SF_VARIANTS(0x4b0003e0),
-                 kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtShift, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "neg", "!0r, !1r!2o", kFixupNone),
-    ENCODING_MAP(kA64Nop0, NO_VARIANTS(0xd503201f),
-                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, NO_OPERAND,
-                 "nop", "", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Orr3Rrl), SF_VARIANTS(0x32000000),
-                 kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "orr", "!0R, !1r, #!2l", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Orr4rrro), SF_VARIANTS(0x2a000000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "orr", "!0r, !1r, !2r!3o", kFixupNone),
-    ENCODING_MAP(kA64Ret, NO_VARIANTS(0xd65f03c0),
-                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH,
-                 "ret", "", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Rbit2rr), SF_VARIANTS(0x5ac00000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "rbit", "!0r, !1r", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Rev2rr), CUSTOM_VARIANTS(0x5ac00800, 0xdac00c00),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "rev", "!0r, !1r", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Rev162rr), SF_VARIANTS(0x5ac00400),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "rev16", "!0r, !1r", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Ror3rrr), SF_VARIANTS(0x1ac02c00),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "ror", "!0r, !1r, !2r", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Sbc3rrr), SF_VARIANTS(0x5a000000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | USES_CCODES,
-                 "sbc", "!0r, !1r, !2r", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Sbfm4rrdd), SF_N_VARIANTS(0x13000000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
-                 kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1,
-                 "sbfm", "!0r, !1r, #!2d, #!3d", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Scvtf2fw), FLOAT_VARIANTS(0x1e220000),
-                 kFmtRegF, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "scvtf", "!0f, !1w", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Scvtf2fx), FLOAT_VARIANTS(0x9e220000),
-                 kFmtRegF, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "scvtf", "!0f, !1x", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Sdiv3rrr), SF_VARIANTS(0x1ac00c00),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "sdiv", "!0r, !1r, !2r", kFixupNone),
-    ENCODING_MAP(kA64Smull3xww, NO_VARIANTS(0x9b207c00),
-                 kFmtRegX, 4, 0, kFmtRegW, 9, 5, kFmtRegW, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "smull", "!0x, !1w, !2w", kFixupNone),
-    ENCODING_MAP(kA64Smulh3xxx, NO_VARIANTS(0x9b407c00),
-                 kFmtRegX, 4, 0, kFmtRegX, 9, 5, kFmtRegX, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "smulh", "!0x, !1x, !2x", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Stp4ffXD), CUSTOM_VARIANTS(0x2d000000, 0x6d000000),
-                 kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE_OFF,
-                 "stp", "!0f, !1f, [!2X, #!3D]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Stp4rrXD), SF_VARIANTS(0x29000000),
-                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE_OFF,
-                 "stp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64StpPost4rrXD), CUSTOM_VARIANTS(0x28800000, 0xa8800000),
-                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
-                 "stp", "!0r, !1r, [!2X], #!3D", kFixupNone),
-    ENCODING_MAP(WIDE(kA64StpPre4ffXD), CUSTOM_VARIANTS(0x2d800000, 0x6d800000),
-                 kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
-                 "stp", "!0f, !1f, [!2X, #!3D]!!", kFixupNone),
-    ENCODING_MAP(WIDE(kA64StpPre4rrXD), CUSTOM_VARIANTS(0x29800000, 0xa9800000),
-                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
-                 "stp", "!0r, !1r, [!2X, #!3D]!!", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Str3fXD), CUSTOM_VARIANTS(0xbd000000, 0xfd000000),
-                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
-                 "str", "!0f, [!1X, #!2D]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Str4fXxG), CUSTOM_VARIANTS(0xbc206800, 0xfc206800),
-                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
-                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE,
-                 "str", "!0f, [!1X, !2x!3G]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Str3rXD), SIZE_VARIANTS(0xb9000000),
-                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
-                 "str", "!0r, [!1X, #!2D]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Str4rXxG), SIZE_VARIANTS(0xb8206800),
-                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
-                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE,
-                 "str", "!0r, [!1X, !2x!3G]", kFixupNone),
-    ENCODING_MAP(kA64Strb3wXd, NO_VARIANTS(0x39000000),
-                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
-                 "strb", "!0w, [!1X, #!2d]", kFixupNone),
-    ENCODING_MAP(kA64Strb3wXx, NO_VARIANTS(0x38206800),
-                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
-                 "strb", "!0w, [!1X, !2x]", kFixupNone),
-    ENCODING_MAP(kA64Strh3wXF, NO_VARIANTS(0x79000000),
-                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
-                 "strh", "!0w, [!1X, #!2F]", kFixupNone),
-    ENCODING_MAP(kA64Strh4wXxd, NO_VARIANTS(0x78206800),
-                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
-                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE,
-                 "strh", "!0w, [!1X, !2x, lsl #!3d]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64StrPost3rXd), SIZE_VARIANTS(0xb8000400),
-                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | REG_DEF1 | IS_STORE,
-                 "str", "!0r, [!1X], #!2d", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Stur3fXd), CUSTOM_VARIANTS(0xbc000000, 0xfc000000),
-                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "stur", "!0f, [!1X, #!2d]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Stur3rXd), SIZE_VARIANTS(0xb8000000),
-                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "stur", "!0r, [!1X, #!2d]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Stxr3wrX), SIZE_VARIANTS(0x88007c00),
-                 kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STOREX,
-                 "stxr", "!0w, !1r, [!2X]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Stlxr3wrX), SIZE_VARIANTS(0x8800fc00),
-                 kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STOREX,
-                 "stlxr", "!0w, !1r, [!2X]", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Sub4RRdT), SF_VARIANTS(0x51000000),
-                 kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtBitBlt, 23, 22, IS_QUAD_OP | REG_DEF0_USE1,
-                 "sub", "!0R, !1R, #!2d!3T", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Sub4rrro), SF_VARIANTS(0x4b000000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "sub", "!0r, !1r, !2r!3o", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Sub4RRre), SF_VARIANTS(0x4b200000),
-                 kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16,
-                 kFmtExtend, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "sub", "!0r, !1r, !2r!3e", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Subs3rRd), SF_VARIANTS(0x71000000),
-                 kFmtRegR, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "subs", "!0r, !1R, #!2d", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Tst2rl), SF_VARIANTS(0x7200001f),
-                 kFmtRegR, 9, 5, kFmtBitBlt, 22, 10, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
-                 "tst", "!0r, !1l", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Tst3rro), SF_VARIANTS(0x6a00001f),
-                 kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
-                 "tst", "!0r, !1r!2o", kFixupNone),
-    // NOTE: Tbz/Tbnz does not require SETS_CCODES, but it may be replaced by some other LIRs
-    // which require SETS_CCODES in the fix-up stage.
-    ENCODING_MAP(WIDE(kA64Tbnz3rht), CUSTOM_VARIANTS(0x37000000, 0x37000000),
-                 kFmtRegR, 4, 0, kFmtImm6Shift, -1, -1, kFmtBitBlt, 18, 5, kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP | SETS_CCODES,
-                 "tbnz", "!0r, #!1h, !2t", kFixupTBxZ),
-    ENCODING_MAP(WIDE(kA64Tbz3rht), CUSTOM_VARIANTS(0x36000000, 0x36000000),
-                 kFmtRegR, 4, 0, kFmtImm6Shift, -1, -1, kFmtBitBlt, 18, 5, kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP | SETS_CCODES,
-                 "tbz", "!0r, #!1h, !2t", kFixupTBxZ),
-    ENCODING_MAP(WIDE(kA64Ubfm4rrdd), SF_N_VARIANTS(0x53000000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
-                 kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1,
-                 "ubfm", "!0r, !1r, !2d, !3d", kFixupNone),
-};
-
-// new_lir replaces orig_lir in the pcrel_fixup list.
-void Arm64Mir2Lir::ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) {
-  new_lir->u.a.pcrel_next = orig_lir->u.a.pcrel_next;
-  if (UNLIKELY(prev_lir == nullptr)) {
-    first_fixup_ = new_lir;
-  } else {
-    prev_lir->u.a.pcrel_next = new_lir;
-  }
-  orig_lir->flags.fixup = kFixupNone;
-}
-
-// new_lir is inserted before orig_lir in the pcrel_fixup list.
-void Arm64Mir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) {
-  new_lir->u.a.pcrel_next = orig_lir;
-  if (UNLIKELY(prev_lir == nullptr)) {
-    first_fixup_ = new_lir;
-  } else {
-    DCHECK(prev_lir->u.a.pcrel_next == orig_lir);
-    prev_lir->u.a.pcrel_next = new_lir;
-  }
-}
-
-/* Nop, used for aligning code. Nop is an alias for hint #0. */
-#define PADDING_NOP (UINT32_C(0xd503201f))
-
-uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) {
-  uint8_t* const write_buffer = write_pos;
-  for (; lir != nullptr; lir = NEXT_LIR(lir)) {
-    lir->offset = (write_pos - write_buffer);
-    bool opcode_is_wide = IS_WIDE(lir->opcode);
-    A64Opcode opcode = UNWIDE(lir->opcode);
-
-    if (UNLIKELY(IsPseudoLirOp(opcode))) {
-      continue;
-    }
-
-    if (LIKELY(!lir->flags.is_nop)) {
-      const A64EncodingMap *encoder = &EncodingMap[opcode];
-
-      // Select the right variant of the skeleton.
-      uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton;
-      DCHECK(!opcode_is_wide || IS_WIDE(encoder->opcode));
-
-      for (int i = 0; i < 4; i++) {
-        A64EncodingKind kind = encoder->field_loc[i].kind;
-        uint32_t operand = lir->operands[i];
-        uint32_t value;
-
-        if (LIKELY(static_cast<unsigned>(kind) <= kFmtBitBlt)) {
-          // Note: this will handle kFmtReg* and kFmtBitBlt.
-
-          if (static_cast<unsigned>(kind) < kFmtBitBlt) {
-            bool is_zero = A64_REG_IS_ZR(operand);
-
-            if (kIsDebugBuild && (kFailOnSizeError || kReportSizeError)) {
-              // Register usage checks: First establish register usage requirements based on the
-              // format in `kind'.
-              bool want_float = false;     // Want a float (rather than core) register.
-              bool want_64_bit = false;    // Want a 64-bit (rather than 32-bit) register.
-              bool want_var_size = true;   // Want register with variable size (kFmtReg{R,F}).
-              bool want_zero = false;      // Want the zero (rather than sp) register.
-              switch (kind) {
-                case kFmtRegX:
-                  want_64_bit = true;
-                  FALLTHROUGH_INTENDED;
-                case kFmtRegW:
-                  want_var_size = false;
-                  FALLTHROUGH_INTENDED;
-                case kFmtRegR:
-                  want_zero = true;
-                  break;
-                case kFmtRegXOrSp:
-                  want_64_bit = true;
-                  FALLTHROUGH_INTENDED;
-                case kFmtRegWOrSp:
-                  want_var_size = false;
-                  break;
-                case kFmtRegROrSp:
-                  break;
-                case kFmtRegD:
-                  want_64_bit = true;
-                  FALLTHROUGH_INTENDED;
-                case kFmtRegS:
-                  want_var_size = false;
-                  FALLTHROUGH_INTENDED;
-                case kFmtRegF:
-                  want_float = true;
-                  break;
-                default:
-                  LOG(FATAL) << "Bad fmt for arg n. " << i << " of " << encoder->name
-                             << " (" << kind << ")";
-                  break;
-              }
-
-              // want_var_size == true means kind == kFmtReg{R,F}. In these two cases, we want
-              // the register size to be coherent with the instruction width.
-              if (want_var_size) {
-                want_64_bit = opcode_is_wide;
-              }
-
-              // Now check that the requirements are satisfied.
-              RegStorage reg(operand | RegStorage::kValid);
-              const char *expected = nullptr;
-              if (want_float) {
-                if (!reg.IsFloat()) {
-                  expected = "float register";
-                } else if (reg.IsDouble() != want_64_bit) {
-                  expected = (want_64_bit) ? "double register" : "single register";
-                }
-              } else {
-                if (reg.IsFloat()) {
-                  expected = "core register";
-                } else if (reg.Is64Bit() != want_64_bit) {
-                  expected = (want_64_bit) ? "x-register" : "w-register";
-                } else if (A64_REGSTORAGE_IS_SP_OR_ZR(reg) && is_zero != want_zero) {
-                  expected = (want_zero) ? "zero-register" : "sp-register";
-                }
-              }
-
-              // Fail, if `expected' contains an unsatisfied requirement.
-              if (expected != nullptr) {
-                LOG(WARNING) << "Method: " << PrettyMethod(cu_->method_idx, *cu_->dex_file)
-                             << " @ 0x" << std::hex << lir->dalvik_offset;
-                if (kFailOnSizeError) {
-                  LOG(FATAL) << "Bad argument n. " << i << " of " << encoder->name
-                             << "(" << UNWIDE(encoder->opcode) << ", " << encoder->fmt << ")"
-                             << ". Expected " << expected << ", got 0x" << std::hex << operand;
-                } else {
-                  LOG(WARNING) << "Bad argument n. " << i << " of " << encoder->name
-                               << ". Expected " << expected << ", got 0x" << std::hex << operand;
-                }
-              }
-            }
-
-            // In the lines below, we rely on (operand & 0x1f) == 31 to be true for register sp
-            // and zr. This means that these two registers do not need any special treatment, as
-            // their bottom 5 bits are correctly set to 31 == 0b11111, which is the right
-            // value for encoding both sp and zr.
-            static_assert((rxzr & 0x1f) == 0x1f, "rzr register number must be 31");
-            static_assert((rsp & 0x1f) == 0x1f, "rsp register number must be 31");
-          }
-
-          value = (operand << encoder->field_loc[i].start) &
-              ((1 << (encoder->field_loc[i].end + 1)) - 1);
-          bits |= value;
-        } else {
-          switch (kind) {
-            case kFmtSkip:
-              break;  // Nothing to do, but continue to next.
-            case kFmtUnused:
-              i = 4;  // Done, break out of the enclosing loop.
-              break;
-            case kFmtShift:
-              // Intentional fallthrough.
-            case kFmtExtend:
-              DCHECK_EQ((operand & (1 << 6)) == 0, kind == kFmtShift);
-              value = (operand & 0x3f) << 10;
-              value |= ((operand & 0x1c0) >> 6) << 21;
-              bits |= value;
-              break;
-            case kFmtImm21:
-              value = (operand & 0x3) << 29;
-              value |= ((operand & 0x1ffffc) >> 2) << 5;
-              bits |= value;
-              break;
-            case kFmtImm6Shift:
-              value = (operand & 0x1f) << 19;
-              value |= ((operand & 0x20) >> 5) << 31;
-              bits |= value;
-              break;
-            default:
-              LOG(FATAL) << "Bad fmt for arg. " << i << " in " << encoder->name
-                         << " (" << kind << ")";
-          }
-        }
-      }
-
-      DCHECK_EQ(encoder->size, 4);
-      write_pos[0] = (bits & 0xff);
-      write_pos[1] = ((bits >> 8) & 0xff);
-      write_pos[2] = ((bits >> 16) & 0xff);
-      write_pos[3] = ((bits >> 24) & 0xff);
-      write_pos += 4;
-    }
-  }
-
-  return write_pos;
-}
-
-// Align data offset on 8 byte boundary: it will only contain double-word items, as word immediates
-// are better set directly from the code (they will require no more than 2 instructions).
-#define ALIGNED_DATA_OFFSET(offset) (((offset) + 0x7) & ~0x7)
-
-/*
- * Get the LIR which emits the instruction preceding the given LIR.
- * Returns nullptr, if no previous emitting insn found.
- */
-static LIR* GetPrevEmittingLIR(LIR* lir) {
-  DCHECK(lir != nullptr);
-  LIR* prev_lir = lir->prev;
-  while ((prev_lir != nullptr) &&
-         (prev_lir->flags.is_nop || Mir2Lir::IsPseudoLirOp(prev_lir->opcode))) {
-    prev_lir = prev_lir->prev;
-  }
-  return prev_lir;
-}
-
-// Assemble the LIR into binary instruction format.
-void Arm64Mir2Lir::AssembleLIR() {
-  LIR* lir;
-  LIR* prev_lir;
-  cu_->NewTimingSplit("Assemble");
-  int assembler_retries = 0;
-  CodeOffset starting_offset = LinkFixupInsns(first_lir_insn_, last_lir_insn_, 0);
-  data_offset_ = ALIGNED_DATA_OFFSET(starting_offset);
-  int32_t offset_adjustment;
-  AssignDataOffsets();
-
-  /*
-   * Note: generation must be 1 on first pass (to distinguish from initialized state of 0
-   * for non-visited nodes). Start at zero here, and bit will be flipped to 1 on entry to the loop.
-   */
-  int generation = 0;
-  while (true) {
-    offset_adjustment = 0;
-    AssemblerStatus res = kSuccess;  // Assume success
-    generation ^= 1;
-    // Note: nodes requiring possible fixup linked in ascending order.
-    lir = first_fixup_;
-    prev_lir = nullptr;
-    while (lir != nullptr) {
-      // NOTE: Any new non-pc_rel instructions inserted due to retry must be explicitly encoded at
-      // the time of insertion.  Note that inserted instructions don't need use/def flags, but do
-      // need size and pc-rel status properly updated.
-      lir->offset += offset_adjustment;
-      // During pass, allows us to tell whether a node has been updated with offset_adjustment yet.
-      lir->flags.generation = generation;
-      switch (static_cast<FixupKind>(lir->flags.fixup)) {
-        case kFixupLabel:
-        case kFixupNone:
-        case kFixupVLoad:
-          break;
-        case kFixupT1Branch: {
-          LIR *target_lir = lir->target;
-          DCHECK(target_lir);
-          CodeOffset pc = lir->offset;
-          CodeOffset target = target_lir->offset +
-              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
-          int32_t delta = target - pc;
-          DCHECK_ALIGNED(delta, 4);
-          if (!IS_SIGNED_IMM26(delta >> 2)) {
-            LOG(FATAL) << "Invalid jump range in kFixupT1Branch";
-          }
-          lir->operands[0] = delta >> 2;
-          if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == 1) {
-            // Useless branch.
-            offset_adjustment -= lir->flags.size;
-            lir->flags.is_nop = true;
-            // Don't unlink - just set to do-nothing.
-            lir->flags.fixup = kFixupNone;
-            res = kRetryAll;
-          }
-          break;
-        }
-        case kFixupLoad:
-        case kFixupCBxZ:
-        case kFixupCondBranch: {
-          LIR *target_lir = lir->target;
-          DCHECK(target_lir);
-          CodeOffset pc = lir->offset;
-          CodeOffset target = target_lir->offset +
-            ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
-          int32_t delta = target - pc;
-          DCHECK_ALIGNED(delta, 4);
-          if (!IS_SIGNED_IMM19(delta >> 2)) {
-            LOG(FATAL) << "Invalid jump range in kFixupLoad";
-          }
-          lir->operands[1] = delta >> 2;
-          break;
-        }
-        case kFixupTBxZ: {
-          int16_t opcode = lir->opcode;
-          RegStorage reg(lir->operands[0] | RegStorage::kValid);
-          int32_t imm = lir->operands[1];
-          DCHECK_EQ(IS_WIDE(opcode), reg.Is64Bit());
-          DCHECK_LT(imm, 64);
-          if (imm >= 32) {
-            DCHECK(IS_WIDE(opcode));
-          } else if (kIsDebugBuild && IS_WIDE(opcode)) {
-            // "tbz/tbnz x0, #imm(<32)" is the same with "tbz/tbnz w0, #imm(<32)", but GCC/oatdump
-            // will disassemble it as "tbz/tbnz w0, #imm(<32)". So unwide the LIR to make the
-            // compiler log behave the same with those disassembler in debug build.
-            // This will also affect tst instruction if it need to be replaced, but there is no
-            // performance difference between "tst Xt" and "tst Wt".
-            lir->opcode = UNWIDE(opcode);
-            lir->operands[0] = As32BitReg(reg).GetReg();
-          }
-
-          // Fix-up branch offset.
-          LIR *target_lir = lir->target;
-          DCHECK(target_lir);
-          CodeOffset pc = lir->offset;
-          CodeOffset target = target_lir->offset +
-              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
-          int32_t delta = target - pc;
-          DCHECK_ALIGNED(delta, 4);
-          // Check if branch offset can be encoded in tbz/tbnz.
-          if (!IS_SIGNED_IMM14(delta >> 2)) {
-            DexOffset dalvik_offset = lir->dalvik_offset;
-            LIR* targetLIR = lir->target;
-            // "tbz/tbnz Rt, #imm, label" -> "tst Rt, #(1<<imm)".
-            offset_adjustment -= lir->flags.size;
-            int32_t encodedImm = EncodeLogicalImmediate(IS_WIDE(opcode), 1 << lir->operands[1]);
-            DCHECK_NE(encodedImm, -1);
-            lir->opcode = IS_WIDE(opcode) ? WIDE(kA64Tst2rl) : kA64Tst2rl;
-            lir->operands[1] = encodedImm;
-            lir->target = nullptr;
-            lir->flags.fixup = EncodingMap[kA64Tst2rl].fixup;
-            lir->flags.size = EncodingMap[kA64Tst2rl].size;
-            offset_adjustment += lir->flags.size;
-            // Insert "beq/bneq label".
-            opcode = UNWIDE(opcode);
-            DCHECK(opcode == kA64Tbz3rht || opcode == kA64Tbnz3rht);
-            LIR* new_lir = RawLIR(dalvik_offset, kA64B2ct,
-                opcode == kA64Tbz3rht ? kArmCondEq : kArmCondNe, 0, 0, 0, 0, targetLIR);
-            InsertLIRAfter(lir, new_lir);
-            new_lir->offset = lir->offset + lir->flags.size;
-            new_lir->flags.generation = generation;
-            new_lir->flags.fixup = EncodingMap[kA64B2ct].fixup;
-            new_lir->flags.size = EncodingMap[kA64B2ct].size;
-            offset_adjustment += new_lir->flags.size;
-            // lir no longer pcrel, unlink and link in new_lir.
-            ReplaceFixup(prev_lir, lir, new_lir);
-            prev_lir = new_lir;  // Continue with the new instruction.
-            lir = new_lir->u.a.pcrel_next;
-            res = kRetryAll;
-            continue;
-          }
-          lir->operands[2] = delta >> 2;
-          break;
-        }
-        case kFixupAdr: {
-          LIR* target_lir = lir->target;
-          int32_t delta;
-          if (target_lir) {
-            CodeOffset target_offs = ((target_lir->flags.generation == lir->flags.generation) ?
-                                      0 : offset_adjustment) + target_lir->offset;
-            delta = target_offs - lir->offset;
-          } else if (lir->operands[2] >= 0) {
-            const EmbeddedData* tab = UnwrapPointer<EmbeddedData>(lir->operands[2]);
-            delta = tab->offset + offset_adjustment - lir->offset;
-          } else {
-            // No fixup: this usage allows to retrieve the current PC.
-            delta = lir->operands[1];
-          }
-          if (!IS_SIGNED_IMM21(delta)) {
-            LOG(FATAL) << "Jump range above 1MB in kFixupAdr";
-          }
-          lir->operands[1] = delta;
-          break;
-        }
-        case kFixupA53Erratum835769:
-          // Avoid emitting code that could trigger Cortex A53's erratum 835769.
-          // This fixup should be carried out for all multiply-accumulate instructions: madd, msub,
-          // smaddl, smsubl, umaddl and umsubl.
-          if (cu_->compiler_driver->GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()
-              ->NeedFixCortexA53_835769()) {
-            // Check that this is a 64-bit multiply-accumulate.
-            if (IS_WIDE(lir->opcode)) {
-              LIR* prev_insn = GetPrevEmittingLIR(lir);
-              if (prev_insn == nullptr) {
-                break;
-              }
-              uint64_t prev_insn_flags = EncodingMap[UNWIDE(prev_insn->opcode)].flags;
-              // Check that the instruction preceding the multiply-accumulate is a load or store.
-              if ((prev_insn_flags & IS_LOAD) != 0 || (prev_insn_flags & IS_STORE) != 0) {
-                // insert a NOP between the load/store and the multiply-accumulate.
-                LIR* new_lir = RawLIR(lir->dalvik_offset, kA64Nop0, 0, 0, 0, 0, 0, nullptr);
-                new_lir->offset = lir->offset;
-                new_lir->flags.fixup = kFixupNone;
-                new_lir->flags.size = EncodingMap[kA64Nop0].size;
-                InsertLIRBefore(lir, new_lir);
-                lir->offset += new_lir->flags.size;
-                offset_adjustment += new_lir->flags.size;
-                res = kRetryAll;
-              }
-            }
-          }
-          break;
-        default:
-          LOG(FATAL) << "Unexpected case " << lir->flags.fixup;
-      }
-      prev_lir = lir;
-      lir = lir->u.a.pcrel_next;
-    }
-
-    if (res == kSuccess) {
-      DCHECK_EQ(offset_adjustment, 0);
-      break;
-    } else {
-      assembler_retries++;
-      if (assembler_retries > MAX_ASSEMBLER_RETRIES) {
-        CodegenDump();
-        LOG(FATAL) << "Assembler error - too many retries";
-      }
-      starting_offset += offset_adjustment;
-      data_offset_ = ALIGNED_DATA_OFFSET(starting_offset);
-      AssignDataOffsets();
-    }
-  }
-
-  // Build the CodeBuffer.
-  DCHECK_LE(data_offset_, total_size_);
-  code_buffer_.reserve(total_size_);
-  code_buffer_.resize(starting_offset);
-  uint8_t* write_pos = &code_buffer_[0];
-  write_pos = EncodeLIRs(write_pos, first_lir_insn_);
-  DCHECK_EQ(static_cast<CodeOffset>(write_pos - &code_buffer_[0]), starting_offset);
-
-  DCHECK_EQ(data_offset_, ALIGNED_DATA_OFFSET(code_buffer_.size()));
-
-  // Install literals
-  InstallLiteralPools();
-
-  // Install switch tables
-  InstallSwitchTables();
-
-  // Install fill array data
-  InstallFillArrayData();
-
-  // Create the mapping table and native offset to reference map.
-  cu_->NewTimingSplit("PcMappingTable");
-  CreateMappingTables();
-
-  cu_->NewTimingSplit("GcMap");
-  CreateNativeGcMap();
-}
-
-size_t Arm64Mir2Lir::GetInsnSize(LIR* lir) {
-  A64Opcode opcode = UNWIDE(lir->opcode);
-  DCHECK(!IsPseudoLirOp(opcode));
-  return EncodingMap[opcode].size;
-}
-
-// Encode instruction bit pattern and assign offsets.
-uint32_t Arm64Mir2Lir::LinkFixupInsns(LIR* head_lir, LIR* tail_lir, uint32_t offset) {
-  LIR* end_lir = tail_lir->next;
-
-  LIR* last_fixup = nullptr;
-  for (LIR* lir = head_lir; lir != end_lir; lir = NEXT_LIR(lir)) {
-    A64Opcode opcode = UNWIDE(lir->opcode);
-    if (!lir->flags.is_nop) {
-      if (lir->flags.fixup != kFixupNone) {
-        if (!IsPseudoLirOp(opcode)) {
-          lir->flags.size = EncodingMap[opcode].size;
-          lir->flags.fixup = EncodingMap[opcode].fixup;
-        } else {
-          DCHECK_NE(static_cast<int>(opcode), kPseudoPseudoAlign4);
-          lir->flags.size = 0;
-          lir->flags.fixup = kFixupLabel;
-        }
-        // Link into the fixup chain.
-        lir->flags.use_def_invalid = true;
-        lir->u.a.pcrel_next = nullptr;
-        if (first_fixup_ == nullptr) {
-          first_fixup_ = lir;
-        } else {
-          last_fixup->u.a.pcrel_next = lir;
-        }
-        last_fixup = lir;
-        lir->offset = offset;
-      }
-      offset += lir->flags.size;
-    }
-  }
-  return offset;
-}
-
-void Arm64Mir2Lir::AssignDataOffsets() {
-  /* Set up offsets for literals */
-  CodeOffset offset = data_offset_;
-
-  offset = AssignLiteralOffset(offset);
-
-  offset = AssignSwitchTablesOffset(offset);
-
-  total_size_ = AssignFillArrayDataOffset(offset);
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/arm64/backend_arm64.h b/compiler/dex/quick/arm64/backend_arm64.h
deleted file mode 100644
index 53650c4..0000000
--- a/compiler/dex/quick/arm64/backend_arm64.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_ARM64_BACKEND_ARM64_H_
-#define ART_COMPILER_DEX_QUICK_ARM64_BACKEND_ARM64_H_
-
-namespace art {
-
-struct CompilationUnit;
-class Mir2Lir;
-class MIRGraph;
-class ArenaAllocator;
-
-Mir2Lir* Arm64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
-                            ArenaAllocator* const arena);
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_ARM64_BACKEND_ARM64_H_
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
deleted file mode 100644
index 036da2e..0000000
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ /dev/null
@@ -1,595 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This file contains codegen for the Thumb2 ISA. */
-
-#include "codegen_arm64.h"
-
-#include "arm64_lir.h"
-#include "art_method.h"
-#include "base/logging.h"
-#include "dex/mir_graph.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "driver/compiler_driver.h"
-#include "driver/compiler_options.h"
-#include "gc/accounting/card_table.h"
-#include "entrypoints/quick/quick_entrypoints.h"
-#include "mirror/object_array-inl.h"
-#include "utils/dex_cache_arrays_layout-inl.h"
-
-namespace art {
-
-/*
- * The sparse table in the literal pool is an array of <key,displacement>
- * pairs.  For each set, we'll load them as a pair using ldp.
- * The test loop will look something like:
- *
- *   adr   r_base, <table>
- *   ldr   r_val, [rA64_SP, v_reg_off]
- *   mov   r_idx, #table_size
- * loop:
- *   cbz   r_idx, quit
- *   ldp   r_key, r_disp, [r_base], #8
- *   sub   r_idx, #1
- *   cmp   r_val, r_key
- *   b.ne  loop
- *   adr   r_base, #0        ; This is the instruction from which we compute displacements
- *   add   r_base, r_disp
- *   br    r_base
- * quit:
- */
-void Arm64Mir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
-  const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  // Add the table to the list - we'll process it later
-  SwitchTable *tab_rec =
-      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
-  tab_rec->switch_mir = mir;
-  tab_rec->table = table;
-  tab_rec->vaddr = current_dalvik_offset_;
-  uint32_t size = table[1];
-  switch_tables_.push_back(tab_rec);
-
-  // Get the switch value
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegStorage r_base = AllocTempWide();
-  // Allocate key and disp temps.
-  RegStorage r_key = AllocTemp();
-  RegStorage r_disp = AllocTemp();
-  // Materialize a pointer to the switch table
-  NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, WrapPointer(tab_rec));
-  // Set up r_idx
-  RegStorage r_idx = AllocTemp();
-  LoadConstant(r_idx, size);
-
-  // Entry of loop.
-  LIR* loop_entry = NewLIR0(kPseudoTargetLabel);
-  LIR* branch_out = NewLIR2(kA64Cbz2rt, r_idx.GetReg(), 0);
-
-  // Load next key/disp.
-  NewLIR4(kA64LdpPost4rrXD, r_key.GetReg(), r_disp.GetReg(), r_base.GetReg(), 2);
-  OpRegRegImm(kOpSub, r_idx, r_idx, 1);
-
-  // Go to next case, if key does not match.
-  OpRegReg(kOpCmp, r_key, rl_src.reg);
-  OpCondBranch(kCondNe, loop_entry);
-
-  // Key does match: branch to case label.
-  LIR* switch_label = NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, -1);
-  tab_rec->anchor = switch_label;
-
-  // Add displacement to base branch address and go!
-  OpRegRegRegExtend(kOpAdd, r_base, r_base, As64BitReg(r_disp), kA64Sxtw, 0U);
-  NewLIR1(kA64Br1x, r_base.GetReg());
-
-  // Loop exit label.
-  LIR* loop_exit = NewLIR0(kPseudoTargetLabel);
-  branch_out->target = loop_exit;
-}
-
-
-void Arm64Mir2Lir::GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
-  const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  // Add the table to the list - we'll process it later
-  SwitchTable *tab_rec =
-      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable),  kArenaAllocData));
-  tab_rec->switch_mir = mir;
-  tab_rec->table = table;
-  tab_rec->vaddr = current_dalvik_offset_;
-  uint32_t size = table[1];
-  switch_tables_.push_back(tab_rec);
-
-  // Get the switch value
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegStorage table_base = AllocTempWide();
-  // Materialize a pointer to the switch table
-  NewLIR3(kA64Adr2xd, table_base.GetReg(), 0, WrapPointer(tab_rec));
-  int low_key = s4FromSwitchData(&table[2]);
-  RegStorage key_reg;
-  // Remove the bias, if necessary
-  if (low_key == 0) {
-    key_reg = rl_src.reg;
-  } else {
-    key_reg = AllocTemp();
-    OpRegRegImm(kOpSub, key_reg, rl_src.reg, low_key);
-  }
-  // Bounds check - if < 0 or >= size continue following switch
-  OpRegImm(kOpCmp, key_reg, size - 1);
-  LIR* branch_over = OpCondBranch(kCondHi, nullptr);
-
-  // Load the displacement from the switch table
-  RegStorage disp_reg = AllocTemp();
-  LoadBaseIndexed(table_base, As64BitReg(key_reg), disp_reg, 2, k32);
-
-  // Get base branch address.
-  RegStorage branch_reg = AllocTempWide();
-  LIR* switch_label = NewLIR3(kA64Adr2xd, branch_reg.GetReg(), 0, -1);
-  tab_rec->anchor = switch_label;
-
-  // Add displacement to base branch address and go!
-  OpRegRegRegExtend(kOpAdd, branch_reg, branch_reg, As64BitReg(disp_reg), kA64Sxtw, 0U);
-  NewLIR1(kA64Br1x, branch_reg.GetReg());
-
-  // branch_over target here
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  branch_over->target = target;
-}
-
-/*
- * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
- * details see monitor.cc.
- */
-void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
-  // x0/w0 = object
-  // w1    = thin lock thread id
-  // x2    = address of lock word
-  // w3    = lock word / store failure
-  // TUNING: How much performance we get when we inline this?
-  // Since we've already flush all register.
-  FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rs_x0);  // = TargetReg(kArg0, kRef)
-  LockCallTemps();  // Prepare for explicit register usage
-  LIR* null_check_branch = nullptr;
-  if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
-    null_check_branch = nullptr;  // No null check.
-  } else {
-    // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-      null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, nullptr);
-    }
-  }
-  Load32Disp(rs_xSELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
-  OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value());
-  NewLIR2(kA64Ldxr2rX, rw3, rx2);
-  MarkPossibleNullPointerException(opt_flags);
-  // Zero out the read barrier bits.
-  OpRegRegImm(kOpAnd, rs_w2, rs_w3, LockWord::kReadBarrierStateMaskShiftedToggled);
-  LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_w2, 0, nullptr);
-  // w3 is zero except for the rb bits here. Copy the read barrier bits into w1.
-  OpRegRegReg(kOpOr, rs_w1, rs_w1, rs_w3);
-  OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value());
-  NewLIR3(kA64Stxr3wrX, rw3, rw1, rx2);
-  LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_w3, 0, nullptr);
-
-  LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
-  not_unlocked_branch->target = slow_path_target;
-  if (null_check_branch != nullptr) {
-    null_check_branch->target = slow_path_target;
-  }
-  // TODO: move to a slow path.
-  // Go expensive route - artLockObjectFromCode(obj);
-  LoadWordDisp(rs_xSELF, QUICK_ENTRYPOINT_OFFSET(8, pLockObject).Int32Value(), rs_xLR);
-  ClobberCallerSave();
-  LIR* call_inst = OpReg(kOpBlx, rs_xLR);
-  MarkSafepointPC(call_inst);
-
-  LIR* success_target = NewLIR0(kPseudoTargetLabel);
-  lock_success_branch->target = success_target;
-  GenMemBarrier(kLoadAny);
-}
-
-/*
- * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
- * details see monitor.cc. Note the code below doesn't use ldxr/stxr as the code holds the lock
- * and can only give away ownership if its suspended.
- */
-void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
-  // x0/w0 = object
-  // w1    = thin lock thread id
-  // w2    = lock word
-  // TUNING: How much performance we get when we inline this?
-  // Since we've already flush all register.
-  FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rs_x0);  // Get obj
-  LockCallTemps();  // Prepare for explicit register usage
-  LIR* null_check_branch = nullptr;
-  if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
-    null_check_branch = nullptr;  // No null check.
-  } else {
-    // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-      null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, nullptr);
-    }
-  }
-  Load32Disp(rs_xSELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
-  if (!kUseReadBarrier) {
-    Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
-  } else {
-    OpRegRegImm(kOpAdd, rs_x3, rs_x0, mirror::Object::MonitorOffset().Int32Value());
-    NewLIR2(kA64Ldxr2rX, rw2, rx3);
-  }
-  MarkPossibleNullPointerException(opt_flags);
-  // Zero out the read barrier bits.
-  OpRegRegImm(kOpAnd, rs_w3, rs_w2, LockWord::kReadBarrierStateMaskShiftedToggled);
-  // Zero out except the read barrier bits.
-  OpRegRegImm(kOpAnd, rs_w2, rs_w2, LockWord::kReadBarrierStateMaskShifted);
-  LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w3, rs_w1, nullptr);
-  GenMemBarrier(kAnyStore);
-  LIR* unlock_success_branch;
-  if (!kUseReadBarrier) {
-    Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
-    unlock_success_branch = OpUnconditionalBranch(nullptr);
-  } else {
-    OpRegRegImm(kOpAdd, rs_x3, rs_x0, mirror::Object::MonitorOffset().Int32Value());
-    NewLIR3(kA64Stxr3wrX, rw1, rw2, rx3);
-    unlock_success_branch = OpCmpImmBranch(kCondEq, rs_w1, 0, nullptr);
-  }
-  LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
-  slow_unlock_branch->target = slow_path_target;
-  if (null_check_branch != nullptr) {
-    null_check_branch->target = slow_path_target;
-  }
-  // TODO: move to a slow path.
-  // Go expensive route - artUnlockObjectFromCode(obj);
-  LoadWordDisp(rs_xSELF, QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject).Int32Value(), rs_xLR);
-  ClobberCallerSave();
-  LIR* call_inst = OpReg(kOpBlx, rs_xLR);
-  MarkSafepointPC(call_inst);
-
-  LIR* success_target = NewLIR0(kPseudoTargetLabel);
-  unlock_success_branch->target = success_target;
-}
-
-void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
-  int ex_offset = Thread::ExceptionOffset<8>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
-  LoadRefDisp(rs_xSELF, ex_offset, rl_result.reg, kNotVolatile);
-  StoreRefDisp(rs_xSELF, ex_offset, rs_xzr, kNotVolatile);
-  StoreValue(rl_dest, rl_result);
-}
-
-void Arm64Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
-  RegStorage reg_card_base = AllocTempWide();
-  RegStorage reg_card_no = AllocTempWide();  // Needs to be wide as addr is ref=64b
-  LoadWordDisp(rs_xSELF, Thread::CardTableOffset<8>().Int32Value(), reg_card_base);
-  OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
-  // TODO(Arm64): generate "strb wB, [xB, wC, uxtw]" rather than "strb wB, [xB, xC]"?
-  StoreBaseIndexed(reg_card_base, reg_card_no, As32BitReg(reg_card_base),
-                   0, kUnsignedByte);
-  FreeTemp(reg_card_base);
-  FreeTemp(reg_card_no);
-}
-
-static dwarf::Reg DwarfCoreReg(int num) {
-  return dwarf::Reg::Arm64Core(num);
-}
-
-void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
-  DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);  // empty stack.
-
-  /*
-   * On entry, x0 to x7 are live.  Let the register allocation
-   * mechanism know so it doesn't try to use any of them when
-   * expanding the frame or flushing.
-   * Reserve x8 & x9 for temporaries.
-   */
-  LockTemp(rs_x0);
-  LockTemp(rs_x1);
-  LockTemp(rs_x2);
-  LockTemp(rs_x3);
-  LockTemp(rs_x4);
-  LockTemp(rs_x5);
-  LockTemp(rs_x6);
-  LockTemp(rs_x7);
-  LockTemp(rs_xIP0);
-  LockTemp(rs_xIP1);
-
-  /* TUNING:
-   * Use AllocTemp() and reuse LR if possible to give us the freedom on adjusting the number
-   * of temp registers.
-   */
-
-  /*
-   * We can safely skip the stack overflow check if we're
-   * a leaf *and* our frame size < fudge factor.
-   */
-  bool skip_overflow_check = mir_graph_->MethodIsLeaf() &&
-    !FrameNeedsStackCheck(frame_size_, kArm64);
-
-  const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm64);
-  const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes;
-  bool generate_explicit_stack_overflow_check = large_frame ||
-    !cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks();
-  const int spill_count = num_core_spills_ + num_fp_spills_;
-  const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf;  // SP 16 byte alignment.
-  const int frame_size_without_spills = frame_size_ - spill_size;
-
-  if (!skip_overflow_check) {
-    if (generate_explicit_stack_overflow_check) {
-      // Load stack limit
-      LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_xIP1);
-    } else {
-      // Implicit stack overflow check.
-      // Generate a load from [sp, #-framesize].  If this is in the stack
-      // redzone we will get a segmentation fault.
-
-      // TODO: If the frame size is small enough, is it possible to make this a pre-indexed load,
-      //       so that we can avoid the following "sub sp" when spilling?
-      OpRegRegImm(kOpSub, rs_x8, rs_sp, GetStackOverflowReservedBytes(kArm64));
-      Load32Disp(rs_x8, 0, rs_wzr);
-      MarkPossibleStackOverflowException();
-    }
-  }
-
-  int spilled_already = 0;
-  if (spill_size > 0) {
-    spilled_already = SpillRegs(rs_sp, core_spill_mask_, fp_spill_mask_, frame_size_);
-    DCHECK(spill_size == spilled_already || frame_size_ == spilled_already);
-  }
-
-  if (spilled_already != frame_size_) {
-    OpRegImm(kOpSub, rs_sp, frame_size_without_spills);
-    cfi_.AdjustCFAOffset(frame_size_without_spills);
-  }
-
-  if (!skip_overflow_check) {
-    if (generate_explicit_stack_overflow_check) {
-      class StackOverflowSlowPath: public LIRSlowPath {
-      public:
-        StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace)
-            : LIRSlowPath(m2l, branch),
-              sp_displace_(sp_displace) {
-        }
-        void Compile() OVERRIDE {
-          m2l_->ResetRegPool();
-          m2l_->ResetDefTracking();
-          GenerateTargetLabel(kPseudoThrowTarget);
-          // Unwinds stack.
-          m2l_->OpRegImm(kOpAdd, rs_sp, sp_displace_);
-          m2l_->cfi().AdjustCFAOffset(-sp_displace_);
-          m2l_->ClobberCallerSave();
-          ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow);
-          m2l_->LockTemp(rs_xIP0);
-          m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_xIP0);
-          m2l_->NewLIR1(kA64Br1x, rs_xIP0.GetReg());
-          m2l_->FreeTemp(rs_xIP0);
-          m2l_->cfi().AdjustCFAOffset(sp_displace_);
-        }
-
-      private:
-        const size_t sp_displace_;
-      };
-
-      LIR* branch = OpCmpBranch(kCondUlt, rs_sp, rs_xIP1, nullptr);
-      AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, frame_size_));
-    }
-  }
-
-  FlushIns(ArgLocs, rl_method);
-
-  FreeTemp(rs_x0);
-  FreeTemp(rs_x1);
-  FreeTemp(rs_x2);
-  FreeTemp(rs_x3);
-  FreeTemp(rs_x4);
-  FreeTemp(rs_x5);
-  FreeTemp(rs_x6);
-  FreeTemp(rs_x7);
-  FreeTemp(rs_xIP0);
-  FreeTemp(rs_xIP1);
-}
-
-void Arm64Mir2Lir::GenExitSequence() {
-  cfi_.RememberState();
-  /*
-   * In the exit path, r0/r1 are live - make sure they aren't
-   * allocated by the register utilities as temps.
-   */
-  LockTemp(rs_x0);
-  LockTemp(rs_x1);
-  UnspillRegs(rs_sp, core_spill_mask_, fp_spill_mask_, frame_size_);
-
-  // Finally return.
-  NewLIR0(kA64Ret);
-  // The CFI should be restored for any code that follows the exit block.
-  cfi_.RestoreState();
-  cfi_.DefCFAOffset(frame_size_);
-}
-
-void Arm64Mir2Lir::GenSpecialExitSequence() {
-  NewLIR0(kA64Ret);
-}
-
-void Arm64Mir2Lir::GenSpecialEntryForSuspend() {
-  // Keep 16-byte stack alignment - push x0, i.e. ArtMethod*, lr.
-  core_spill_mask_ = (1u << rs_xLR.GetRegNum());
-  num_core_spills_ = 1u;
-  fp_spill_mask_ = 0u;
-  num_fp_spills_ = 0u;
-  frame_size_ = 16u;
-  core_vmap_table_.clear();
-  fp_vmap_table_.clear();
-  NewLIR4(WIDE(kA64StpPre4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), -frame_size_ / 8);
-  cfi_.AdjustCFAOffset(frame_size_);
-  // Do not generate CFI for scratch register x0.
-  cfi_.RelOffset(DwarfCoreReg(rxLR), 8);
-}
-
-void Arm64Mir2Lir::GenSpecialExitForSuspend() {
-  // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
-  NewLIR4(WIDE(kA64LdpPost4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), frame_size_ / 8);
-  cfi_.AdjustCFAOffset(-frame_size_);
-  cfi_.Restore(DwarfCoreReg(rxLR));
-}
-
-static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
-  // Emit relative calls anywhere in the image or within a dex file otherwise.
-  return cu->compiler_driver->IsImage() || cu->dex_file == target_method.dex_file;
-}
-
-/*
- * Bit of a hack here - in the absence of a real scheduling pass,
- * emit the next instruction in static & direct invoke sequences.
- */
-int Arm64Mir2Lir::Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
-                                      int state, const MethodReference& target_method,
-                                      uint32_t unused_idx ATTRIBUTE_UNUSED,
-                                      uintptr_t direct_code, uintptr_t direct_method,
-                                      InvokeType type) {
-  Arm64Mir2Lir* cg = static_cast<Arm64Mir2Lir*>(cu->cg.get());
-  if (info->string_init_offset != 0) {
-    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
-    switch (state) {
-    case 0: {  // Grab target method* from thread pointer
-      cg->LoadWordDisp(rs_xSELF, info->string_init_offset, arg0_ref);
-      break;
-    }
-    case 1:  // Grab the code from the method*
-      if (direct_code == 0) {
-        // kInvokeTgt := arg0_ref->entrypoint
-        cg->LoadWordDisp(arg0_ref,
-                         ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                             kArm64PointerSize).Int32Value(), cg->TargetPtrReg(kInvokeTgt));
-      }
-      break;
-    default:
-      return -1;
-    }
-  } else if (direct_code != 0 && direct_method != 0) {
-    switch (state) {
-    case 0:  // Get the current Method* [sets kArg0]
-      if (direct_code != static_cast<uintptr_t>(-1)) {
-        cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
-      } else if (Arm64UseRelativeCall(cu, target_method)) {
-        // Defer to linker patch.
-      } else {
-        cg->LoadCodeAddress(target_method, type, kInvokeTgt);
-      }
-      if (direct_method != static_cast<uintptr_t>(-1)) {
-        cg->LoadConstantWide(cg->TargetReg(kArg0, kRef), direct_method);
-      } else {
-        cg->LoadMethodAddress(target_method, type, kArg0);
-      }
-      break;
-    default:
-      return -1;
-    }
-  } else {
-    bool use_pc_rel = cg->CanUseOpPcRelDexCacheArrayLoad();
-    RegStorage arg0_ref = cg->TargetPtrReg(kArg0);
-    switch (state) {
-    case 0:  // Get the current Method* [sets kArg0]
-      // TUNING: we can save a reg copy if Method* has been promoted.
-      if (!use_pc_rel) {
-        cg->LoadCurrMethodDirect(arg0_ref);
-        break;
-      }
-      ++state;
-      FALLTHROUGH_INTENDED;
-    case 1:  // Get method->dex_cache_resolved_methods_
-      if (!use_pc_rel) {
-        cg->LoadBaseDisp(arg0_ref,
-                         ArtMethod::DexCacheResolvedMethodsOffset(kArm64PointerSize).Int32Value(),
-                         arg0_ref,
-                         k64,
-                         kNotVolatile);
-      }
-      // Set up direct code if known.
-      if (direct_code != 0) {
-        if (direct_code != static_cast<uintptr_t>(-1)) {
-          cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
-        } else if (Arm64UseRelativeCall(cu, target_method)) {
-          // Defer to linker patch.
-        } else {
-          CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
-          cg->LoadCodeAddress(target_method, type, kInvokeTgt);
-        }
-      }
-      if (!use_pc_rel || direct_code != 0) {
-        break;
-      }
-      ++state;
-      FALLTHROUGH_INTENDED;
-    case 2:  // Grab target method*
-      CHECK_EQ(cu->dex_file, target_method.dex_file);
-      if (!use_pc_rel) {
-        cg->LoadWordDisp(arg0_ref,
-                         cg->GetCachePointerOffset(target_method.dex_method_index,
-                                                   kArm64PointerSize),
-                         arg0_ref);
-      } else {
-        size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index);
-        cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, arg0_ref, true);
-      }
-      break;
-    case 3:  // Grab the code from the method*
-      if (direct_code == 0) {
-        // kInvokeTgt := arg0_ref->entrypoint
-        cg->LoadWordDisp(arg0_ref,
-                         ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                             kArm64PointerSize).Int32Value(), cg->TargetPtrReg(kInvokeTgt));
-      }
-      break;
-    default:
-      return -1;
-    }
-  }
-  return state + 1;
-}
-
-NextCallInsn Arm64Mir2Lir::GetNextSDCallInsn() {
-  return Arm64NextSDCallInsn;
-}
-
-LIR* Arm64Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
-  // For ARM64, just generate a relative BL instruction that will be filled in at 'link time'.
-  // If the target turns out to be too far, the linker will generate a thunk for dispatch.
-  int target_method_idx = target_method.dex_method_index;
-  const DexFile* target_dex_file = target_method.dex_file;
-
-  // Generate the call instruction and save index, dex_file, and type.
-  // NOTE: Method deduplication takes linker patches into account, so we can just pass 0
-  // as a placeholder for the offset.
-  LIR* call = RawLIR(current_dalvik_offset_, kA64Bl1t, 0,
-                     target_method_idx, WrapPointer(target_dex_file), type);
-  AppendLIR(call);
-  call_method_insns_.push_back(call);
-  return call;
-}
-
-LIR* Arm64Mir2Lir::GenCallInsn(const MirMethodLoweringInfo& method_info) {
-  LIR* call_insn;
-  if (method_info.FastPath() && Arm64UseRelativeCall(cu_, method_info.GetTargetMethod()) &&
-      (method_info.GetSharpType() == kDirect || method_info.GetSharpType() == kStatic) &&
-      method_info.DirectCode() == static_cast<uintptr_t>(-1)) {
-    call_insn = CallWithLinkerFixup(method_info.GetTargetMethod(), method_info.GetSharpType());
-  } else {
-    call_insn = OpReg(kOpBlx, TargetPtrReg(kInvokeTgt));
-  }
-  return call_insn;
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
deleted file mode 100644
index ca2e012..0000000
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ /dev/null
@@ -1,416 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_
-#define ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_
-
-#include "arm64_lir.h"
-#include "base/logging.h"
-#include "dex/quick/mir_to_lir.h"
-
-#include <map>
-
-namespace art {
-
-class Arm64Mir2Lir FINAL : public Mir2Lir {
- protected:
-  class InToRegStorageArm64Mapper : public InToRegStorageMapper {
-   public:
-    InToRegStorageArm64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {}
-    virtual ~InToRegStorageArm64Mapper() {}
-    virtual RegStorage GetNextReg(ShortyArg arg);
-    virtual void Reset() OVERRIDE {
-      cur_core_reg_ = 0;
-      cur_fp_reg_ = 0;
-    }
-   private:
-    size_t cur_core_reg_;
-    size_t cur_fp_reg_;
-  };
-
-  InToRegStorageArm64Mapper in_to_reg_storage_arm64_mapper_;
-  InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE {
-    in_to_reg_storage_arm64_mapper_.Reset();
-    return &in_to_reg_storage_arm64_mapper_;
-  }
-
- public:
-  Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
-
-  // Required for target - codegen helpers.
-  bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
-                          RegLocation rl_dest, int lit) OVERRIDE;
-  bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
-                        RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
-  bool HandleEasyDivRem64(Instruction::Code dalvik_opcode, bool is_div,
-                          RegLocation rl_src, RegLocation rl_dest, int64_t lit);
-  bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
-  void GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
-                                  int32_t constant) OVERRIDE;
-  void GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
-                                   int64_t constant) OVERRIDE;
-  LIR* CheckSuspendUsingLoad() OVERRIDE;
-  RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE;
-  LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
-                    OpSize size, VolatileKind is_volatile) OVERRIDE;
-  LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
-                       OpSize size) OVERRIDE;
-  LIR* LoadConstantNoClobber(RegStorage r_dest, int value) OVERRIDE;
-  LIR* LoadConstantWide(RegStorage r_dest, int64_t value) OVERRIDE;
-  LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size,
-                     VolatileKind is_volatile) OVERRIDE;
-  LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
-                        OpSize size) OVERRIDE;
-
-  /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
-  void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
-
-  bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE;
-  void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest, bool wide)
-      OVERRIDE;
-
-  LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                         int offset, int check_value, LIR* target, LIR** compare) OVERRIDE;
-
-  // Required for target - register utilities.
-  RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
-  RegStorage TargetReg(SpecialTargetRegister symbolic_reg, WideKind wide_kind) OVERRIDE {
-    if (wide_kind == kWide || wide_kind == kRef) {
-      return As64BitReg(TargetReg(symbolic_reg));
-    } else {
-      return Check32BitReg(TargetReg(symbolic_reg));
-    }
-  }
-  RegStorage TargetPtrReg(SpecialTargetRegister symbolic_reg) OVERRIDE {
-    return As64BitReg(TargetReg(symbolic_reg));
-  }
-  RegLocation GetReturnAlt() OVERRIDE;
-  RegLocation GetReturnWideAlt() OVERRIDE;
-  RegLocation LocCReturn() OVERRIDE;
-  RegLocation LocCReturnRef() OVERRIDE;
-  RegLocation LocCReturnDouble() OVERRIDE;
-  RegLocation LocCReturnFloat() OVERRIDE;
-  RegLocation LocCReturnWide() OVERRIDE;
-  ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE;
-  void AdjustSpillMask() OVERRIDE;
-  void ClobberCallerSave() OVERRIDE;
-  void FreeCallTemps() OVERRIDE;
-  void LockCallTemps() OVERRIDE;
-  void CompilerInitializeRegAlloc() OVERRIDE;
-
-  // Required for target - miscellaneous.
-  void AssembleLIR() OVERRIDE;
-  void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE;
-  void SetupTargetResourceMasks(LIR* lir, uint64_t flags,
-                                ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE;
-  const char* GetTargetInstFmt(int opcode) OVERRIDE;
-  const char* GetTargetInstName(int opcode) OVERRIDE;
-  std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) OVERRIDE;
-  ResourceMask GetPCUseDefEncoding() const OVERRIDE;
-  uint64_t GetTargetInstFlags(int opcode) OVERRIDE;
-  size_t GetInsnSize(LIR* lir) OVERRIDE;
-  bool IsUnconditionalBranch(LIR* lir) OVERRIDE;
-
-  // Get the register class for load/store of a field.
-  RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
-
-  // Required for target - Dalvik-level generators.
-  void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                      RegLocation lr_shift) OVERRIDE;
-  void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                         RegLocation rl_src2, int flags) OVERRIDE;
-  void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
-                   RegLocation rl_dest, int scale) OVERRIDE;
-  void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
-                   RegLocation rl_src, int scale, bool card_mark) OVERRIDE;
-  void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                         RegLocation rl_shift, int flags) OVERRIDE;
-  void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                        RegLocation rl_src2) OVERRIDE;
-  void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                       RegLocation rl_src2) OVERRIDE;
-  void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                RegLocation rl_src2) OVERRIDE;
-  void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
-  bool GenInlinedReverseBits(CallInfo* info, OpSize size) OVERRIDE;
-  bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
-  bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
-  bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object) OVERRIDE;
-  bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) OVERRIDE;
-  bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) OVERRIDE;
-  bool GenInlinedSqrt(CallInfo* info) OVERRIDE;
-  bool GenInlinedCeil(CallInfo* info) OVERRIDE;
-  bool GenInlinedFloor(CallInfo* info) OVERRIDE;
-  bool GenInlinedRint(CallInfo* info) OVERRIDE;
-  bool GenInlinedRound(CallInfo* info, bool is_double) OVERRIDE;
-  bool GenInlinedPeek(CallInfo* info, OpSize size) OVERRIDE;
-  bool GenInlinedPoke(CallInfo* info, OpSize size) OVERRIDE;
-  bool GenInlinedAbsInt(CallInfo* info) OVERRIDE;
-  bool GenInlinedAbsLong(CallInfo* info) OVERRIDE;
-  bool GenInlinedArrayCopyCharArray(CallInfo* info) OVERRIDE;
-  void GenIntToLong(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
-  void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                      RegLocation rl_src2, int flags) OVERRIDE;
-  RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div)
-      OVERRIDE;
-  RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div)
-      OVERRIDE;
-  void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2)  OVERRIDE;
-  void GenDivZeroCheckWide(RegStorage reg) OVERRIDE;
-  void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
-  void GenExitSequence() OVERRIDE;
-  void GenSpecialExitSequence() OVERRIDE;
-  void GenSpecialEntryForSuspend() OVERRIDE;
-  void GenSpecialExitForSuspend() OVERRIDE;
-  void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) OVERRIDE;
-  void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) OVERRIDE;
-  void GenSelect(BasicBlock* bb, MIR* mir) OVERRIDE;
-  void GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
-                        int32_t true_val, int32_t false_val, RegStorage rs_dest,
-                        RegisterClass dest_reg_class) OVERRIDE;
-
-  bool GenMemBarrier(MemBarrierKind barrier_kind) OVERRIDE;
-  void GenMonitorEnter(int opt_flags, RegLocation rl_src) OVERRIDE;
-  void GenMonitorExit(int opt_flags, RegLocation rl_src) OVERRIDE;
-  void GenMoveException(RegLocation rl_dest) OVERRIDE;
-  void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
-                                     int first_bit, int second_bit) OVERRIDE;
-  void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
-  void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
-  void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
-  void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
-  void GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                      RegLocation rl_src3, bool is_sub);
-  void GenMaddMsubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                       RegLocation rl_src3, bool is_sub);
-
-  // Required for target - single operation generators.
-  LIR* OpUnconditionalBranch(LIR* target) OVERRIDE;
-  LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) OVERRIDE;
-  LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) OVERRIDE;
-  LIR* OpCondBranch(ConditionCode cc, LIR* target) OVERRIDE;
-  LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) OVERRIDE;
-  LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src) OVERRIDE;
-  LIR* OpIT(ConditionCode cond, const char* guide) OVERRIDE;
-  void OpEndIT(LIR* it) OVERRIDE;
-  LIR* OpMem(OpKind op, RegStorage r_base, int disp) OVERRIDE;
-  void OpPcRelLoad(RegStorage reg, LIR* target) OVERRIDE;
-  LIR* OpReg(OpKind op, RegStorage r_dest_src) OVERRIDE;
-  void OpRegCopy(RegStorage r_dest, RegStorage r_src) OVERRIDE;
-  LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) OVERRIDE;
-  LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value) OVERRIDE;
-  LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) OVERRIDE;
-  LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) OVERRIDE;
-  LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) OVERRIDE;
-  LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) OVERRIDE;
-  LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) OVERRIDE;
-  LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) OVERRIDE;
-  LIR* OpTestSuspend(LIR* target) OVERRIDE;
-  LIR* OpVldm(RegStorage r_base, int count) OVERRIDE;
-  LIR* OpVstm(RegStorage r_base, int count) OVERRIDE;
-  void OpRegCopyWide(RegStorage dest, RegStorage src) OVERRIDE;
-
-  bool InexpensiveConstantInt(int32_t value) OVERRIDE;
-  bool InexpensiveConstantInt(int32_t value, Instruction::Code opcode) OVERRIDE;
-  bool InexpensiveConstantFloat(int32_t value) OVERRIDE;
-  bool InexpensiveConstantLong(int64_t value) OVERRIDE;
-  bool InexpensiveConstantDouble(int64_t value) OVERRIDE;
-
-  void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) OVERRIDE;
-
-  bool WideGPRsAreAliases() const OVERRIDE {
-    return true;  // 64b architecture.
-  }
-  bool WideFPRsAreAliases() const OVERRIDE {
-    return true;  // 64b architecture.
-  }
-
-  size_t GetInstructionOffset(LIR* lir) OVERRIDE;
-
-  NextCallInsn GetNextSDCallInsn() OVERRIDE;
-
-  /*
-   * @brief Generate a relative call to the method that will be patched at link time.
-   * @param target_method The MethodReference of the method to be invoked.
-   * @param type How the method will be invoked.
-   * @returns Call instruction
-   */
-  LIR* CallWithLinkerFixup(const MethodReference& target_method, InvokeType type);
-
-  /*
-   * @brief Generate the actual call insn based on the method info.
-   * @param method_info the lowering info for the method call.
-   * @returns Call instruction
-   */
-  virtual LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE;
-
-  /*
-   * @brief Handle ARM specific literals.
-   */
-  void InstallLiteralPools() OVERRIDE;
-
-  LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
-
- private:
-  /**
-   * @brief Given register xNN (dNN), returns register wNN (sNN).
-   * @param reg #RegStorage containing a Solo64 input register (e.g. @c x1 or @c d2).
-   * @return A Solo32 with the same register number as the @p reg (e.g. @c w1 or @c s2).
-   * @see As64BitReg
-   */
-  RegStorage As32BitReg(RegStorage reg) {
-    DCHECK(!reg.IsPair());
-    if ((kFailOnSizeError || kReportSizeError) && !reg.Is64Bit()) {
-      if (kFailOnSizeError) {
-        LOG(FATAL) << "Expected 64b register";
-      } else {
-        LOG(WARNING) << "Expected 64b register";
-        return reg;
-      }
-    }
-    RegStorage ret_val = RegStorage(RegStorage::k32BitSolo,
-                                    reg.GetRawBits() & RegStorage::kRegTypeMask);
-    DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k32SoloStorageMask)
-              ->GetReg().GetReg(),
-              ret_val.GetReg());
-    return ret_val;
-  }
-
-  RegStorage Check32BitReg(RegStorage reg) {
-    if ((kFailOnSizeError || kReportSizeError) && !reg.Is32Bit()) {
-      if (kFailOnSizeError) {
-        LOG(FATAL) << "Checked for 32b register";
-      } else {
-        LOG(WARNING) << "Checked for 32b register";
-        return As32BitReg(reg);
-      }
-    }
-    return reg;
-  }
-
-  /**
-   * @brief Given register wNN (sNN), returns register xNN (dNN).
-   * @param reg #RegStorage containing a Solo32 input register (e.g. @c w1 or @c s2).
-   * @return A Solo64 with the same register number as the @p reg (e.g. @c x1 or @c d2).
-   * @see As32BitReg
-   */
-  RegStorage As64BitReg(RegStorage reg) {
-    DCHECK(!reg.IsPair());
-    if ((kFailOnSizeError || kReportSizeError) && !reg.Is32Bit()) {
-      if (kFailOnSizeError) {
-        LOG(FATAL) << "Expected 32b register";
-      } else {
-        LOG(WARNING) << "Expected 32b register";
-        return reg;
-      }
-    }
-    RegStorage ret_val = RegStorage(RegStorage::k64BitSolo,
-                                    reg.GetRawBits() & RegStorage::kRegTypeMask);
-    DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k64SoloStorageMask)
-              ->GetReg().GetReg(),
-              ret_val.GetReg());
-    return ret_val;
-  }
-
-  RegStorage Check64BitReg(RegStorage reg) {
-    if ((kFailOnSizeError || kReportSizeError) && !reg.Is64Bit()) {
-      if (kFailOnSizeError) {
-        LOG(FATAL) << "Checked for 64b register";
-      } else {
-        LOG(WARNING) << "Checked for 64b register";
-        return As64BitReg(reg);
-      }
-    }
-    return reg;
-  }
-
-  int32_t EncodeImmSingle(uint32_t bits);
-  int32_t EncodeImmDouble(uint64_t bits);
-  LIR* LoadFPConstantValue(RegStorage r_dest, int32_t value);
-  LIR* LoadFPConstantValueWide(RegStorage r_dest, int64_t value);
-  void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
-  void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
-  void AssignDataOffsets();
-  RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                        bool is_div, int flags) OVERRIDE;
-  RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) OVERRIDE;
-  size_t GetLoadStoreSize(LIR* lir);
-
-  bool SmallLiteralDivRem64(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
-                            RegLocation rl_dest, int64_t lit);
-
-  uint32_t LinkFixupInsns(LIR* head_lir, LIR* tail_lir, CodeOffset offset);
-  int AssignInsnOffsets();
-  void AssignOffsets();
-  uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir);
-
-  // Spill core and FP registers. Returns the SP difference: either spill size, or whole
-  // frame size.
-  int SpillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask, int frame_size);
-
-  // Unspill core and FP registers.
-  void UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask, int frame_size);
-
-  void GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-
-  LIR* OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value);
-  LIR* OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value);
-
-  LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
-  LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
-                        int shift);
-  int EncodeShift(int code, int amount);
-
-  LIR* OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2,
-                      A64RegExtEncodings ext, uint8_t amount);
-  LIR* OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
-                         A64RegExtEncodings ext, uint8_t amount);
-  int EncodeExtend(int extend_type, int amount);
-  bool IsExtendEncoding(int encoded_value);
-
-  LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size);
-  LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
-
-  int EncodeLogicalImmediate(bool is_wide, uint64_t value);
-  uint64_t DecodeLogicalImmediate(bool is_wide, int value);
-  ArmConditionCode ArmConditionEncoding(ConditionCode code);
-
-  // Helper used in the two GenSelect variants.
-  void GenSelect(int32_t left, int32_t right, ConditionCode code, RegStorage rs_dest,
-                 int result_reg_class);
-
-  void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
-  void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-  void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                     RegLocation rl_src2, bool is_div, int flags);
-
-  static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
-                                 int state, const MethodReference& target_method,
-                                 uint32_t unused_idx,
-                                 uintptr_t direct_code, uintptr_t direct_method,
-                                 InvokeType type);
-
-  static const A64EncodingMap EncodingMap[kA64Last];
-
-  ArenaVector<LIR*> call_method_insns_;
-  ArenaVector<LIR*> dex_cache_access_insns_;
-
-  int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
deleted file mode 100644
index 3b88021..0000000
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ /dev/null
@@ -1,482 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "codegen_arm64.h"
-
-#include "arm64_lir.h"
-#include "base/logging.h"
-#include "dex/mir_graph.h"
-#include "dex/quick/mir_to_lir-inl.h"
-
-namespace art {
-
-void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
-                                   RegLocation rl_src1, RegLocation rl_src2) {
-  int op = kA64Brk1d;
-  RegLocation rl_result;
-
-  switch (opcode) {
-    case Instruction::ADD_FLOAT_2ADDR:
-    case Instruction::ADD_FLOAT:
-      op = kA64Fadd3fff;
-      break;
-    case Instruction::SUB_FLOAT_2ADDR:
-    case Instruction::SUB_FLOAT:
-      op = kA64Fsub3fff;
-      break;
-    case Instruction::DIV_FLOAT_2ADDR:
-    case Instruction::DIV_FLOAT:
-      op = kA64Fdiv3fff;
-      break;
-    case Instruction::MUL_FLOAT_2ADDR:
-    case Instruction::MUL_FLOAT:
-      op = kA64Fmul3fff;
-      break;
-    case Instruction::REM_FLOAT_2ADDR:
-    case Instruction::REM_FLOAT:
-      FlushAllRegs();   // Send everything to home location
-      CallRuntimeHelperRegLocationRegLocation(kQuickFmodf, rl_src1, rl_src2, false);
-      rl_result = GetReturn(kFPReg);
-      StoreValue(rl_dest, rl_result);
-      return;
-    case Instruction::NEG_FLOAT:
-      GenNegFloat(rl_dest, rl_src1);
-      return;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << opcode;
-  }
-  rl_src1 = LoadValue(rl_src1, kFPReg);
-  rl_src2 = LoadValue(rl_src2, kFPReg);
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  StoreValue(rl_dest, rl_result);
-}
-
-void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
-                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
-  int op = kA64Brk1d;
-  RegLocation rl_result;
-
-  switch (opcode) {
-    case Instruction::ADD_DOUBLE_2ADDR:
-    case Instruction::ADD_DOUBLE:
-      op = kA64Fadd3fff;
-      break;
-    case Instruction::SUB_DOUBLE_2ADDR:
-    case Instruction::SUB_DOUBLE:
-      op = kA64Fsub3fff;
-      break;
-    case Instruction::DIV_DOUBLE_2ADDR:
-    case Instruction::DIV_DOUBLE:
-      op = kA64Fdiv3fff;
-      break;
-    case Instruction::MUL_DOUBLE_2ADDR:
-    case Instruction::MUL_DOUBLE:
-      op = kA64Fmul3fff;
-      break;
-    case Instruction::REM_DOUBLE_2ADDR:
-    case Instruction::REM_DOUBLE:
-      FlushAllRegs();   // Send everything to home location
-      {
-        RegStorage r_tgt = CallHelperSetup(kQuickFmod);
-        LoadValueDirectWideFixed(rl_src1, rs_d0);
-        LoadValueDirectWideFixed(rl_src2, rs_d1);
-        ClobberCallerSave();
-        CallHelper(r_tgt, kQuickFmod, false);
-      }
-      rl_result = GetReturnWide(kFPReg);
-      StoreValueWide(rl_dest, rl_result);
-      return;
-    case Instruction::NEG_DOUBLE:
-      GenNegDouble(rl_dest, rl_src1);
-      return;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << opcode;
-  }
-
-  rl_src1 = LoadValueWide(rl_src1, kFPReg);
-  DCHECK(rl_src1.wide);
-  rl_src2 = LoadValueWide(rl_src2, kFPReg);
-  DCHECK(rl_src2.wide);
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
-  DCHECK(rl_dest.wide);
-  DCHECK(rl_result.wide);
-  NewLIR3(WIDE(op), rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void Arm64Mir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
-                                              int32_t constant) {
-  RegLocation rl_result;
-  RegStorage r_tmp = AllocTempSingle();
-  LoadConstantNoClobber(r_tmp, constant);
-  rl_src1 = LoadValue(rl_src1, kFPReg);
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR3(kA64Fmul3fff, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), r_tmp.GetReg());
-  StoreValue(rl_dest, rl_result);
-}
-
-void Arm64Mir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
-                                               int64_t constant) {
-  RegLocation rl_result;
-  RegStorage r_tmp = AllocTempDouble();
-  DCHECK(r_tmp.IsDouble());
-  LoadConstantWide(r_tmp, constant);
-  rl_src1 = LoadValueWide(rl_src1, kFPReg);
-  DCHECK(rl_src1.wide);
-  rl_result = EvalLocWide(rl_dest, kFPReg, true);
-  DCHECK(rl_dest.wide);
-  DCHECK(rl_result.wide);
-  NewLIR3(WIDE(kA64Fmul3fff), rl_result.reg.GetReg(), rl_src1.reg.GetReg(), r_tmp.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void Arm64Mir2Lir::GenConversion(Instruction::Code opcode,
-                                 RegLocation rl_dest, RegLocation rl_src) {
-  int op = kA64Brk1d;
-  RegLocation rl_result;
-  RegisterClass src_reg_class = kInvalidRegClass;
-  RegisterClass dst_reg_class = kInvalidRegClass;
-
-  switch (opcode) {
-    case Instruction::INT_TO_FLOAT:
-      op = kA64Scvtf2fw;
-      src_reg_class = kCoreReg;
-      dst_reg_class = kFPReg;
-      break;
-    case Instruction::FLOAT_TO_INT:
-      op = kA64Fcvtzs2wf;
-      src_reg_class = kFPReg;
-      dst_reg_class = kCoreReg;
-      break;
-    case Instruction::DOUBLE_TO_FLOAT:
-      op = kA64Fcvt2sS;
-      src_reg_class = kFPReg;
-      dst_reg_class = kFPReg;
-      break;
-    case Instruction::FLOAT_TO_DOUBLE:
-      op = kA64Fcvt2Ss;
-      src_reg_class = kFPReg;
-      dst_reg_class = kFPReg;
-      break;
-    case Instruction::INT_TO_DOUBLE:
-      op = WIDE(kA64Scvtf2fw);
-      src_reg_class = kCoreReg;
-      dst_reg_class = kFPReg;
-      break;
-    case Instruction::DOUBLE_TO_INT:
-      op = WIDE(kA64Fcvtzs2wf);
-      src_reg_class = kFPReg;
-      dst_reg_class = kCoreReg;
-      break;
-    case Instruction::LONG_TO_DOUBLE:
-      op = WIDE(kA64Scvtf2fx);
-      src_reg_class = kCoreReg;
-      dst_reg_class = kFPReg;
-      break;
-    case Instruction::FLOAT_TO_LONG:
-      op = kA64Fcvtzs2xf;
-      src_reg_class = kFPReg;
-      dst_reg_class = kCoreReg;
-      break;
-    case Instruction::LONG_TO_FLOAT:
-      op = kA64Scvtf2fx;
-      src_reg_class = kCoreReg;
-      dst_reg_class = kFPReg;
-      break;
-    case Instruction::DOUBLE_TO_LONG:
-      op = WIDE(kA64Fcvtzs2xf);
-      src_reg_class = kFPReg;
-      dst_reg_class = kCoreReg;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << opcode;
-  }
-
-  DCHECK_NE(src_reg_class, kInvalidRegClass);
-  DCHECK_NE(dst_reg_class, kInvalidRegClass);
-  DCHECK_NE(op, kA64Brk1d);
-
-  if (rl_src.wide) {
-    rl_src = LoadValueWide(rl_src, src_reg_class);
-  } else {
-    rl_src = LoadValue(rl_src, src_reg_class);
-  }
-
-  rl_result = EvalLoc(rl_dest, dst_reg_class, true);
-  NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-
-  if (rl_dest.wide) {
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    StoreValue(rl_dest, rl_result);
-  }
-}
-
-void Arm64Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
-                                     bool is_double) {
-  LIR* target = &block_label_list_[bb->taken];
-  RegLocation rl_src1;
-  RegLocation rl_src2;
-  if (is_double) {
-    rl_src1 = mir_graph_->GetSrcWide(mir, 0);
-    rl_src2 = mir_graph_->GetSrcWide(mir, 2);
-    rl_src1 = LoadValueWide(rl_src1, kFPReg);
-    rl_src2 = LoadValueWide(rl_src2, kFPReg);
-    NewLIR2(WIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  } else {
-    rl_src1 = mir_graph_->GetSrc(mir, 0);
-    rl_src2 = mir_graph_->GetSrc(mir, 1);
-    rl_src1 = LoadValue(rl_src1, kFPReg);
-    rl_src2 = LoadValue(rl_src2, kFPReg);
-    NewLIR2(kA64Fcmp2ff, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  }
-  ConditionCode ccode = mir->meta.ccode;
-  switch (ccode) {
-    case kCondEq:
-    case kCondNe:
-      break;
-    case kCondLt:
-      if (gt_bias) {
-        ccode = kCondMi;
-      }
-      break;
-    case kCondLe:
-      if (gt_bias) {
-        ccode = kCondLs;
-      }
-      break;
-    case kCondGt:
-      if (gt_bias) {
-        ccode = kCondHi;
-      }
-      break;
-    case kCondGe:
-      if (gt_bias) {
-        ccode = kCondUge;
-      }
-      break;
-    default:
-      LOG(FATAL) << "Unexpected ccode: " << ccode;
-  }
-  OpCondBranch(ccode, target);
-}
-
-
-void Arm64Mir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest,
-                            RegLocation rl_src1, RegLocation rl_src2) {
-  bool is_double = false;
-  int default_result = -1;
-  RegLocation rl_result;
-
-  switch (opcode) {
-    case Instruction::CMPL_FLOAT:
-      is_double = false;
-      default_result = -1;
-      break;
-    case Instruction::CMPG_FLOAT:
-      is_double = false;
-      default_result = 1;
-      break;
-    case Instruction::CMPL_DOUBLE:
-      is_double = true;
-      default_result = -1;
-      break;
-    case Instruction::CMPG_DOUBLE:
-      is_double = true;
-      default_result = 1;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << opcode;
-  }
-  if (is_double) {
-    rl_src1 = LoadValueWide(rl_src1, kFPReg);
-    rl_src2 = LoadValueWide(rl_src2, kFPReg);
-    // In case result vreg is also a src vreg, break association to avoid useless copy by EvalLoc()
-    ClobberSReg(rl_dest.s_reg_low);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    LoadConstant(rl_result.reg, default_result);
-    NewLIR2(WIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  } else {
-    rl_src1 = LoadValue(rl_src1, kFPReg);
-    rl_src2 = LoadValue(rl_src2, kFPReg);
-    // In case result vreg is also a srcvreg, break association to avoid useless copy by EvalLoc()
-    ClobberSReg(rl_dest.s_reg_low);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    LoadConstant(rl_result.reg, default_result);
-    NewLIR2(kA64Fcmp2ff, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  }
-  DCHECK(!rl_result.reg.IsFloat());
-
-  // TODO(Arm64): should we rather do this?
-  // csinc wD, wzr, wzr, eq
-  // csneg wD, wD, wD, le
-  // (which requires 2 instructions rather than 3)
-
-  // Rd = if cond then Rd else -Rd.
-  NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
-          rl_result.reg.GetReg(), (default_result == 1) ? kArmCondPl : kArmCondLe);
-  NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rwzr, rl_result.reg.GetReg(),
-          kArmCondEq);
-  StoreValue(rl_dest, rl_result);
-}
-
-void Arm64Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation rl_result;
-  rl_src = LoadValue(rl_src, kFPReg);
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(kA64Fneg2ff, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  StoreValue(rl_dest, rl_result);
-}
-
-void Arm64Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation rl_result;
-  rl_src = LoadValueWide(rl_src, kFPReg);
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(WIDE(kA64Fneg2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-}
-
-static RegisterClass RegClassForAbsFP(RegLocation rl_src, RegLocation rl_dest) {
-  // If src is in a core reg or, unlikely, dest has been promoted to a core reg, use core reg.
-  if ((rl_src.location == kLocPhysReg && !rl_src.reg.IsFloat()) ||
-      (rl_dest.location == kLocPhysReg && !rl_dest.reg.IsFloat())) {
-    return kCoreReg;
-  }
-  // If src is in an fp reg or dest has been promoted to an fp reg, use fp reg.
-  if (rl_src.location == kLocPhysReg || rl_dest.location == kLocPhysReg) {
-    return kFPReg;
-  }
-  // With both src and dest in the stack frame we have to perform load+abs+store. Whether this
-  // is faster using a core reg or fp reg depends on the particular CPU. For example, on A53
-  // it's faster using core reg while on A57 it's faster with fp reg, the difference being
-  // bigger on the A53. Without further investigation and testing we prefer core register.
-  // (If the result is subsequently used in another fp operation, the dalvik reg will probably
-  // get promoted and that should be handled by the cases above.)
-  return kCoreReg;
-}
-
-bool Arm64Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
-  if (info->result.location == kLocInvalid) {
-    return true;  // Result is unused: inlining successful, no code generated.
-  }
-  RegLocation rl_dest = info->result;
-  RegLocation rl_src = UpdateLoc(info->args[0]);
-  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
-  rl_src = LoadValue(rl_src, reg_class);
-  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
-  if (reg_class == kFPReg) {
-    NewLIR2(kA64Fabs2ff, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  } else {
-    // Clear the sign bit in an integer register.
-    OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
-  }
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-bool Arm64Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
-  if (info->result.location == kLocInvalid) {
-    return true;  // Result is unused: inlining successful, no code generated.
-  }
-  RegLocation rl_dest = info->result;
-  RegLocation rl_src = UpdateLocWide(info->args[0]);
-  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
-  rl_src = LoadValueWide(rl_src, reg_class);
-  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
-  if (reg_class == kFPReg) {
-    NewLIR2(WIDE(kA64Fabs2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  } else {
-    // Clear the sign bit in an integer register.
-    OpRegRegImm64(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffffffffffff);
-  }
-  StoreValueWide(rl_dest, rl_result);
-  return true;
-}
-
-bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) {
-  RegLocation rl_src = info->args[0];
-  RegLocation rl_dest = InlineTargetWide(info);  // double place for result
-  rl_src = LoadValueWide(rl_src, kFPReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(WIDE(kA64Fsqrt2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-  return true;
-}
-
-bool Arm64Mir2Lir::GenInlinedCeil(CallInfo* info) {
-  RegLocation rl_src = info->args[0];
-  RegLocation rl_dest = InlineTargetWide(info);
-  rl_src = LoadValueWide(rl_src, kFPReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(WIDE(kA64Frintp2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-  return true;
-}
-
-bool Arm64Mir2Lir::GenInlinedFloor(CallInfo* info) {
-  RegLocation rl_src = info->args[0];
-  RegLocation rl_dest = InlineTargetWide(info);
-  rl_src = LoadValueWide(rl_src, kFPReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(WIDE(kA64Frintm2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-  return true;
-}
-
-bool Arm64Mir2Lir::GenInlinedRint(CallInfo* info) {
-  RegLocation rl_src = info->args[0];
-  RegLocation rl_dest = InlineTargetWide(info);
-  rl_src = LoadValueWide(rl_src, kFPReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(WIDE(kA64Frintn2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-  return true;
-}
-
-bool Arm64Mir2Lir::GenInlinedRound(CallInfo* info, bool is_double) {
-  int32_t encoded_imm = EncodeImmSingle(bit_cast<uint32_t, float>(0.5f));
-  A64Opcode wide = (is_double) ? WIDE(0) : UNWIDE(0);
-  RegLocation rl_src = info->args[0];
-  RegLocation rl_dest = (is_double) ? InlineTargetWide(info) : InlineTarget(info);
-  rl_src = (is_double) ? LoadValueWide(rl_src, kFPReg) : LoadValue(rl_src, kFPReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage r_imm_point5 = (is_double) ? AllocTempDouble() : AllocTempSingle();
-  RegStorage r_tmp = (is_double) ? AllocTempDouble() : AllocTempSingle();
-  // 0.5f and 0.5d are encoded in the same way.
-  NewLIR2(kA64Fmov2fI | wide, r_imm_point5.GetReg(), encoded_imm);
-  NewLIR3(kA64Fadd3fff | wide, r_tmp.GetReg(), rl_src.reg.GetReg(), r_imm_point5.GetReg());
-  NewLIR2((is_double) ? kA64Fcvtms2xS : kA64Fcvtms2ws, rl_result.reg.GetReg(), r_tmp.GetReg());
-  (is_double) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-bool Arm64Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) {
-  DCHECK_EQ(cu_->instruction_set, kArm64);
-  int op = (is_min) ? kA64Fmin3fff : kA64Fmax3fff;
-  A64Opcode wide = (is_double) ? WIDE(0) : UNWIDE(0);
-  RegLocation rl_src1 = info->args[0];
-  RegLocation rl_src2 = (is_double) ? info->args[2] : info->args[1];
-  rl_src1 = (is_double) ? LoadValueWide(rl_src1, kFPReg) : LoadValue(rl_src1, kFPReg);
-  rl_src2 = (is_double) ? LoadValueWide(rl_src2, kFPReg) : LoadValue(rl_src2, kFPReg);
-  RegLocation rl_dest = (is_double) ? InlineTargetWide(info) : InlineTarget(info);
-  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR3(op | wide, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  (is_double) ?  StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
deleted file mode 100644
index d92dea2..0000000
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ /dev/null
@@ -1,1798 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This file contains codegen for the Thumb2 ISA. */
-
-#include "codegen_arm64.h"
-
-#include "arch/instruction_set_features.h"
-#include "arm64_lir.h"
-#include "base/bit_utils.h"
-#include "base/logging.h"
-#include "dex/compiler_ir.h"
-#include "dex/mir_graph.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "dex/reg_storage_eq.h"
-#include "driver/compiler_driver.h"
-#include "entrypoints/quick/quick_entrypoints.h"
-#include "mirror/array-inl.h"
-
-namespace art {
-
-LIR* Arm64Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
-  OpRegReg(kOpCmp, src1, src2);
-  return OpCondBranch(cond, target);
-}
-
-LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode ATTRIBUTE_UNUSED, const char* guide ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpIT for Arm64";
-  UNREACHABLE();
-}
-
-void Arm64Mir2Lir::OpEndIT(LIR* it ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpEndIT for Arm64";
-}
-
-/*
- * 64-bit 3way compare function.
- *     cmp   xA, xB
- *     csinc wC, wzr, wzr, eq  // wC = (xA == xB) ? 0 : 1
- *     csneg wC, wC, wC, ge    // wC = (xA >= xB) ? wC : -wC
- */
-void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
-                              RegLocation rl_src2) {
-  RegLocation rl_result;
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  rl_result = EvalLoc(rl_dest, kCoreReg, true);
-
-  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-  NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondEq);
-  NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
-          rl_result.reg.GetReg(), kArmCondGe);
-  StoreValue(rl_dest, rl_result);
-}
-
-void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                             RegLocation rl_src1, RegLocation rl_shift) {
-  OpKind op = kOpBkpt;
-  switch (opcode) {
-  case Instruction::SHL_LONG:
-  case Instruction::SHL_LONG_2ADDR:
-    op = kOpLsl;
-    break;
-  case Instruction::SHR_LONG:
-  case Instruction::SHR_LONG_2ADDR:
-    op = kOpAsr;
-    break;
-  case Instruction::USHR_LONG:
-  case Instruction::USHR_LONG_2ADDR:
-    op = kOpLsr;
-    break;
-  default:
-    LOG(FATAL) << "Unexpected case: " << opcode;
-  }
-  rl_shift = LoadValue(rl_shift, kCoreReg);
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  OpRegRegReg(op, rl_result.reg, rl_src1.reg, As64BitReg(rl_shift.reg));
-  StoreValueWide(rl_dest, rl_result);
-}
-
-static constexpr bool kUseDeltaEncodingInGenSelect = false;
-
-void Arm64Mir2Lir::GenSelect(int32_t true_val, int32_t false_val, ConditionCode ccode,
-                             RegStorage rs_dest, int result_reg_class) {
-  if (false_val == 0 ||               // 0 is better as first operand.
-      true_val == 1 ||                // Potentially Csinc.
-      true_val == -1 ||               // Potentially Csinv.
-      true_val == false_val + 1) {    // Potentially Csinc.
-    ccode = NegateComparison(ccode);
-    std::swap(true_val, false_val);
-  }
-
-  ArmConditionCode code = ArmConditionEncoding(ccode);
-
-  int opcode;                                      // The opcode.
-  RegStorage left_op = RegStorage::InvalidReg();   // The operands.
-  RegStorage right_op = RegStorage::InvalidReg();  // The operands.
-
-  bool is_wide = rs_dest.Is64Bit();
-
-  RegStorage zero_reg = is_wide ? rs_xzr : rs_wzr;
-
-  if (true_val == 0) {
-    left_op = zero_reg;
-  } else {
-    left_op = rs_dest;
-    LoadConstantNoClobber(rs_dest, true_val);
-  }
-  if (false_val == 1) {
-    right_op = zero_reg;
-    opcode = kA64Csinc4rrrc;
-  } else if (false_val == -1) {
-    right_op = zero_reg;
-    opcode = kA64Csinv4rrrc;
-  } else if (false_val == true_val + 1) {
-    right_op = left_op;
-    opcode = kA64Csinc4rrrc;
-  } else if (false_val == -true_val) {
-    right_op = left_op;
-    opcode = kA64Csneg4rrrc;
-  } else if (false_val == ~true_val) {
-    right_op = left_op;
-    opcode = kA64Csinv4rrrc;
-  } else if (true_val == 0) {
-    // left_op is zero_reg.
-    right_op = rs_dest;
-    LoadConstantNoClobber(rs_dest, false_val);
-    opcode = kA64Csel4rrrc;
-  } else {
-    // Generic case.
-    RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
-    if (is_wide) {
-      if (t_reg2.Is32Bit()) {
-        t_reg2 = As64BitReg(t_reg2);
-      }
-    } else {
-      if (t_reg2.Is64Bit()) {
-        t_reg2 = As32BitReg(t_reg2);
-      }
-    }
-
-    if (kUseDeltaEncodingInGenSelect) {
-      int32_t delta = false_val - true_val;
-      uint32_t abs_val = delta < 0 ? -delta : delta;
-
-      if (abs_val < 0x1000) {  // TODO: Replace with InexpensiveConstant with opcode.
-        // Can encode as immediate to an add.
-        right_op = t_reg2;
-        OpRegRegImm(kOpAdd, t_reg2, left_op, delta);
-      }
-    }
-
-    // Load as constant.
-    if (!right_op.Valid()) {
-      LoadConstantNoClobber(t_reg2, false_val);
-      right_op = t_reg2;
-    }
-
-    opcode = kA64Csel4rrrc;
-  }
-
-  DCHECK(left_op.Valid() && right_op.Valid());
-  NewLIR4(is_wide ? WIDE(opcode) : opcode, rs_dest.GetReg(), left_op.GetReg(), right_op.GetReg(),
-      code);
-}
-
-void Arm64Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
-                                    int32_t true_val, int32_t false_val, RegStorage rs_dest,
-                                    RegisterClass dest_reg_class) {
-  DCHECK(rs_dest.Valid());
-  OpRegReg(kOpCmp, left_op, right_op);
-  GenSelect(true_val, false_val, code, rs_dest, dest_reg_class);
-}
-
-void Arm64Mir2Lir::GenSelect(BasicBlock* bb ATTRIBUTE_UNUSED, MIR* mir) {
-  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
-  rl_src = LoadValue(rl_src, rl_src.ref ? kRefReg : kCoreReg);
-  // rl_src may be aliased with rl_result/rl_dest, so do compare early.
-  OpRegImm(kOpCmp, rl_src.reg, 0);
-
-  RegLocation rl_dest = mir_graph_->GetDest(mir);
-
-  // The kMirOpSelect has two variants, one for constants and one for moves.
-  if (mir->ssa_rep->num_uses == 1) {
-    RegLocation rl_result = EvalLoc(rl_dest, rl_dest.ref ? kRefReg : kCoreReg, true);
-    GenSelect(mir->dalvikInsn.vB, mir->dalvikInsn.vC, mir->meta.ccode, rl_result.reg,
-              rl_dest.ref ? kRefReg : kCoreReg);
-    StoreValue(rl_dest, rl_result);
-  } else {
-    RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
-    RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
-
-    RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
-    rl_true = LoadValue(rl_true, result_reg_class);
-    rl_false = LoadValue(rl_false, result_reg_class);
-    RegLocation rl_result = EvalLoc(rl_dest, result_reg_class, true);
-
-    bool is_wide = rl_dest.ref || rl_dest.wide;
-    int opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc;
-    NewLIR4(opcode, rl_result.reg.GetReg(),
-            rl_true.reg.GetReg(), rl_false.reg.GetReg(), ArmConditionEncoding(mir->meta.ccode));
-    StoreValue(rl_dest, rl_result);
-  }
-}
-
-void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
-  RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
-  RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
-  LIR* taken = &block_label_list_[bb->taken];
-  LIR* not_taken = &block_label_list_[bb->fall_through];
-  // Normalize such that if either operand is constant, src2 will be constant.
-  ConditionCode ccode = mir->meta.ccode;
-  if (rl_src1.is_const) {
-    std::swap(rl_src1, rl_src2);
-    ccode = FlipComparisonOrder(ccode);
-  }
-
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-
-  if (rl_src2.is_const) {
-    // TODO: Optimize for rl_src1.is_const? (Does happen in the boot image at the moment.)
-
-    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
-    // Special handling using cbz & cbnz.
-    if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
-      OpCmpImmBranch(ccode, rl_src1.reg, 0, taken);
-      OpCmpImmBranch(NegateComparison(ccode), rl_src1.reg, 0, not_taken);
-      return;
-    }
-
-    // Only handle Imm if src2 is not already in a register.
-    rl_src2 = UpdateLocWide(rl_src2);
-    if (rl_src2.location != kLocPhysReg) {
-      OpRegImm64(kOpCmp, rl_src1.reg, val);
-      OpCondBranch(ccode, taken);
-      OpCondBranch(NegateComparison(ccode), not_taken);
-      return;
-    }
-  }
-
-  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-  OpCondBranch(ccode, taken);
-  OpCondBranch(NegateComparison(ccode), not_taken);
-}
-
-/*
- * Generate a register comparison to an immediate and branch.  Caller
- * is responsible for setting branch target field.
- */
-LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value,
-                                  LIR* target) {
-  LIR* branch = nullptr;
-  ArmConditionCode arm_cond = ArmConditionEncoding(cond);
-  if (check_value == 0) {
-    if (arm_cond == kArmCondEq || arm_cond == kArmCondNe) {
-      A64Opcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
-      A64Opcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
-      branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
-    } else if (arm_cond == kArmCondLs) {
-      // kArmCondLs is an unsigned less or equal. A comparison r <= 0 is then the same as cbz.
-      // This case happens for a bounds check of array[0].
-      A64Opcode opcode = kA64Cbz2rt;
-      A64Opcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
-      branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
-    } else if (arm_cond == kArmCondLt || arm_cond == kArmCondGe) {
-      A64Opcode opcode = (arm_cond == kArmCondLt) ? kA64Tbnz3rht : kA64Tbz3rht;
-      A64Opcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
-      int value = reg.Is64Bit() ? 63 : 31;
-      branch = NewLIR3(opcode | wide, reg.GetReg(), value, 0);
-    }
-  }
-
-  if (branch == nullptr) {
-    OpRegImm(kOpCmp, reg, check_value);
-    branch = NewLIR2(kA64B2ct, arm_cond, 0);
-  }
-
-  branch->target = target;
-  return branch;
-}
-
-LIR* Arm64Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg,
-                                     RegStorage base_reg, int offset, int check_value,
-                                     LIR* target, LIR** compare) {
-  DCHECK(compare == nullptr);
-  // It is possible that temp register is 64-bit. (ArgReg or RefReg)
-  // Always compare 32-bit value no matter what temp_reg is.
-  if (temp_reg.Is64Bit()) {
-    temp_reg = As32BitReg(temp_reg);
-  }
-  Load32Disp(base_reg, offset, temp_reg);
-  LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
-  return branch;
-}
-
-LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
-  bool dest_is_fp = r_dest.IsFloat();
-  bool src_is_fp = r_src.IsFloat();
-  A64Opcode opcode = kA64Brk1d;
-  LIR* res;
-
-  if (LIKELY(dest_is_fp == src_is_fp)) {
-    if (LIKELY(!dest_is_fp)) {
-      DCHECK_EQ(r_dest.Is64Bit(), r_src.Is64Bit());
-
-      // Core/core copy.
-      // Copies involving the sp register require a different instruction.
-      opcode = UNLIKELY(A64_REG_IS_SP(r_dest.GetReg())) ? kA64Add4RRdT : kA64Mov2rr;
-
-      // TODO(Arm64): kA64Add4RRdT formally has 4 args, but is used as a 2 args instruction.
-      //   This currently works because the other arguments are set to 0 by default. We should
-      //   rather introduce an alias kA64Mov2RR.
-
-      // core/core copy. Do a x/x copy only if both registers are x.
-      if (r_dest.Is64Bit() && r_src.Is64Bit()) {
-        opcode = WIDE(opcode);
-      }
-    } else {
-      // Float/float copy.
-      bool dest_is_double = r_dest.IsDouble();
-      bool src_is_double = r_src.IsDouble();
-
-      // We do not do float/double or double/float casts here.
-      DCHECK_EQ(dest_is_double, src_is_double);
-
-      // Homogeneous float/float copy.
-      opcode = (dest_is_double) ? WIDE(kA64Fmov2ff) : kA64Fmov2ff;
-    }
-  } else {
-    // Inhomogeneous register copy.
-    if (dest_is_fp) {
-      if (r_dest.IsDouble()) {
-        opcode = kA64Fmov2Sx;
-      } else {
-        r_src = Check32BitReg(r_src);
-        opcode = kA64Fmov2sw;
-      }
-    } else {
-      if (r_src.IsDouble()) {
-        opcode = kA64Fmov2xS;
-      } else {
-        r_dest = Check32BitReg(r_dest);
-        opcode = kA64Fmov2ws;
-      }
-    }
-  }
-
-  res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
-
-  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
-    res->flags.is_nop = true;
-  }
-
-  return res;
-}
-
-void Arm64Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
-  if (r_dest != r_src) {
-    LIR* res = OpRegCopyNoInsert(r_dest, r_src);
-    AppendLIR(res);
-  }
-}
-
-void Arm64Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
-  OpRegCopy(r_dest, r_src);
-}
-
-// Table of magic divisors
-struct MagicTable {
-  int magic64_base;
-  int magic64_eor;
-  uint64_t magic64;
-  uint32_t magic32;
-  uint32_t shift;
-  DividePattern pattern;
-};
-
-static const MagicTable magic_table[] = {
-  {   0,      0,                  0,          0, 0, DivideNone},  // 0
-  {   0,      0,                  0,          0, 0, DivideNone},  // 1
-  {   0,      0,                  0,          0, 0, DivideNone},  // 2
-  {0x3c,     -1, 0x5555555555555556, 0x55555556, 0, Divide3},     // 3
-  {   0,      0,                  0,          0, 0, DivideNone},  // 4
-  {0xf9,     -1, 0x6666666666666667, 0x66666667, 1, Divide5},     // 5
-  {0x7c, 0x1041, 0x2AAAAAAAAAAAAAAB, 0x2AAAAAAB, 0, Divide3},     // 6
-  {  -1,     -1, 0x924924924924924A, 0x92492493, 2, Divide7},     // 7
-  {   0,      0,                  0,          0, 0, DivideNone},  // 8
-  {  -1,     -1, 0x38E38E38E38E38E4, 0x38E38E39, 1, Divide5},     // 9
-  {0xf9,     -1, 0x6666666666666667, 0x66666667, 2, Divide5},     // 10
-  {  -1,     -1, 0x2E8BA2E8BA2E8BA3, 0x2E8BA2E9, 1, Divide5},     // 11
-  {0x7c, 0x1041, 0x2AAAAAAAAAAAAAAB, 0x2AAAAAAB, 1, Divide5},     // 12
-  {  -1,     -1, 0x4EC4EC4EC4EC4EC5, 0x4EC4EC4F, 2, Divide5},     // 13
-  {  -1,     -1, 0x924924924924924A, 0x92492493, 3, Divide7},     // 14
-  {0x78,     -1, 0x8888888888888889, 0x88888889, 3, Divide7},     // 15
-};
-
-// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
-bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode ATTRIBUTE_UNUSED,
-                                      bool is_div,
-                                      RegLocation rl_src,
-                                      RegLocation rl_dest,
-                                      int lit) {
-  if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
-    return false;
-  }
-  DividePattern pattern = magic_table[lit].pattern;
-  if (pattern == DivideNone) {
-    return false;
-  }
-  // Tuning: add rem patterns
-  if (!is_div) {
-    return false;
-  }
-
-  RegStorage r_magic = AllocTemp();
-  LoadConstant(r_magic, magic_table[lit].magic32);
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage r_long_mul = AllocTemp();
-  NewLIR3(kA64Smull3xww, As64BitReg(r_long_mul).GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
-  switch (pattern) {
-    case Divide3:
-      OpRegRegImm(kOpLsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul), 32);
-      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
-      break;
-    case Divide5:
-      OpRegRegImm(kOpAsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul),
-                  32 + magic_table[lit].shift);
-      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
-      break;
-    case Divide7:
-      OpRegRegRegShift(kOpAdd, As64BitReg(r_long_mul), As64BitReg(rl_src.reg),
-                       As64BitReg(r_long_mul), EncodeShift(kA64Lsr, 32));
-      OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
-      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
-      break;
-    default:
-      LOG(FATAL) << "Unexpected pattern: " << pattern;
-  }
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-bool Arm64Mir2Lir::SmallLiteralDivRem64(Instruction::Code dalvik_opcode ATTRIBUTE_UNUSED,
-                                        bool is_div,
-                                        RegLocation rl_src,
-                                        RegLocation rl_dest,
-                                        int64_t lit) {
-  if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
-    return false;
-  }
-  DividePattern pattern = magic_table[lit].pattern;
-  if (pattern == DivideNone) {
-    return false;
-  }
-  // Tuning: add rem patterns
-  if (!is_div) {
-    return false;
-  }
-
-  RegStorage r_magic = AllocTempWide();
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  RegStorage r_long_mul = AllocTempWide();
-
-  if (magic_table[lit].magic64_base >= 0) {
-    // Check that the entry in the table is correct.
-    if (kIsDebugBuild) {
-      uint64_t reconstructed_imm;
-      uint64_t base = DecodeLogicalImmediate(/*is_wide*/true, magic_table[lit].magic64_base);
-      if (magic_table[lit].magic64_eor >= 0) {
-        uint64_t eor = DecodeLogicalImmediate(/*is_wide*/true, magic_table[lit].magic64_eor);
-        reconstructed_imm = base ^ eor;
-      } else {
-        reconstructed_imm = base + 1;
-      }
-      DCHECK_EQ(reconstructed_imm, magic_table[lit].magic64) << " for literal " << lit;
-    }
-
-    // Load the magic constant in two instructions.
-    NewLIR3(WIDE(kA64Orr3Rrl), r_magic.GetReg(), rxzr, magic_table[lit].magic64_base);
-    if (magic_table[lit].magic64_eor >= 0) {
-      NewLIR3(WIDE(kA64Eor3Rrl), r_magic.GetReg(), r_magic.GetReg(),
-              magic_table[lit].magic64_eor);
-    } else {
-      NewLIR4(WIDE(kA64Add4RRdT), r_magic.GetReg(), r_magic.GetReg(), 1, 0);
-    }
-  } else {
-    LoadConstantWide(r_magic, magic_table[lit].magic64);
-  }
-
-  NewLIR3(kA64Smulh3xxx, r_long_mul.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
-  switch (pattern) {
-    case Divide3:
-      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
-      break;
-    case Divide5:
-      OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
-      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
-      break;
-    case Divide7:
-      OpRegRegReg(kOpAdd, r_long_mul, rl_src.reg, r_long_mul);
-      OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
-      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
-      break;
-    default:
-      LOG(FATAL) << "Unexpected pattern: " << pattern;
-  }
-  StoreValueWide(rl_dest, rl_result);
-  return true;
-}
-
-// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
-// and store the result in 'rl_dest'.
-bool Arm64Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
-                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
-  return HandleEasyDivRem64(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int>(lit));
-}
-
-// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
-// and store the result in 'rl_dest'.
-bool Arm64Mir2Lir::HandleEasyDivRem64(Instruction::Code dalvik_opcode, bool is_div,
-                                      RegLocation rl_src, RegLocation rl_dest, int64_t lit) {
-  const bool is_64bit = rl_dest.wide;
-  const int nbits = (is_64bit) ? 64 : 32;
-
-  if (lit < 2) {
-    return false;
-  }
-  if (!IsPowerOfTwo(lit)) {
-    if (is_64bit) {
-      return SmallLiteralDivRem64(dalvik_opcode, is_div, rl_src, rl_dest, lit);
-    } else {
-      return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int32_t>(lit));
-    }
-  }
-  int k = CTZ(lit);
-  if (k >= nbits - 2) {
-    // Avoid special cases.
-    return false;
-  }
-
-  RegLocation rl_result;
-  RegStorage t_reg;
-  if (is_64bit) {
-    rl_src = LoadValueWide(rl_src, kCoreReg);
-    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-    t_reg = AllocTempWide();
-  } else {
-    rl_src = LoadValue(rl_src, kCoreReg);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    t_reg = AllocTemp();
-  }
-
-  int shift = EncodeShift(kA64Lsr, nbits - k);
-  if (is_div) {
-    if (lit == 2) {
-      // Division by 2 is by far the most common division by constant.
-      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, shift);
-      OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
-    } else {
-      OpRegRegImm(kOpAsr, t_reg, rl_src.reg, nbits - 1);
-      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, t_reg, shift);
-      OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
-    }
-  } else {
-    if (lit == 2) {
-      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, shift);
-      OpRegRegImm64(kOpAnd, t_reg, t_reg, lit - 1);
-      OpRegRegRegShift(kOpSub, rl_result.reg, t_reg, rl_src.reg, shift);
-    } else {
-      RegStorage t_reg2 = (is_64bit) ? AllocTempWide() : AllocTemp();
-      OpRegRegImm(kOpAsr, t_reg, rl_src.reg, nbits - 1);
-      OpRegRegRegShift(kOpAdd, t_reg2, rl_src.reg, t_reg, shift);
-      OpRegRegImm64(kOpAnd, t_reg2, t_reg2, lit - 1);
-      OpRegRegRegShift(kOpSub, rl_result.reg, t_reg2, t_reg, shift);
-    }
-  }
-
-  if (is_64bit) {
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    StoreValue(rl_dest, rl_result);
-  }
-  return true;
-}
-
-bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src ATTRIBUTE_UNUSED,
-                                RegLocation rl_dest ATTRIBUTE_UNUSED,
-                                int lit ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of EasyMultiply for Arm64";
-  UNREACHABLE();
-}
-
-RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest ATTRIBUTE_UNUSED,
-                                       RegLocation rl_src1 ATTRIBUTE_UNUSED,
-                                       int lit ATTRIBUTE_UNUSED,
-                                       bool is_div ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm64";
-  UNREACHABLE();
-}
-
-RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-
-  // Put the literal in a temp.
-  RegStorage lit_temp = AllocTemp();
-  LoadConstant(lit_temp, lit);
-  // Use the generic case for div/rem with arg2 in a register.
-  // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
-  rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
-  FreeTemp(lit_temp);
-
-  return rl_result;
-}
-
-RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest ATTRIBUTE_UNUSED,
-                                    RegLocation rl_src1 ATTRIBUTE_UNUSED,
-                                    RegLocation rl_src2 ATTRIBUTE_UNUSED,
-                                    bool is_div ATTRIBUTE_UNUSED,
-                                    int flags ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
-  UNREACHABLE();
-}
-
-RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage r_src1, RegStorage r_src2,
-                                    bool is_div) {
-  CHECK_EQ(r_src1.Is64Bit(), r_src2.Is64Bit());
-
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (is_div) {
-    OpRegRegReg(kOpDiv, rl_result.reg, r_src1, r_src2);
-  } else {
-    // temp = r_src1 / r_src2
-    // dest = r_src1 - temp * r_src2
-    RegStorage temp;
-    A64Opcode wide;
-    if (rl_result.reg.Is64Bit()) {
-      temp = AllocTempWide();
-      wide = WIDE(0);
-    } else {
-      temp = AllocTemp();
-      wide = UNWIDE(0);
-    }
-    OpRegRegReg(kOpDiv, temp, r_src1, r_src2);
-    NewLIR4(kA64Msub4rrrr | wide, rl_result.reg.GetReg(), temp.GetReg(),
-            r_src2.GetReg(), r_src1.GetReg());
-    FreeTemp(temp);
-  }
-  return rl_result;
-}
-
-bool Arm64Mir2Lir::GenInlinedAbsInt(CallInfo* info) {
-  RegLocation rl_src = info->args[0];
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegLocation rl_dest = InlineTarget(info);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-
-  // Compare the source value with zero. Write the negated value to the result if
-  // negative, otherwise write the original value.
-  OpRegImm(kOpCmp, rl_src.reg, 0);
-  NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg(),
-          kArmCondPl);
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-bool Arm64Mir2Lir::GenInlinedAbsLong(CallInfo* info) {
-  RegLocation rl_src = info->args[0];
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_dest = InlineTargetWide(info);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-
-  // Compare the source value with zero. Write the negated value to the result if
-  // negative, otherwise write the original value.
-  OpRegImm(kOpCmp, rl_src.reg, 0);
-  NewLIR4(WIDE(kA64Csneg4rrrc), rl_result.reg.GetReg(), rl_src.reg.GetReg(),
-          rl_src.reg.GetReg(), kArmCondPl);
-  StoreValueWide(rl_dest, rl_result);
-  return true;
-}
-
-bool Arm64Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
-  DCHECK_EQ(cu_->instruction_set, kArm64);
-  RegLocation rl_src1 = info->args[0];
-  RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
-  rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
-  rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
-  RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-  NewLIR4((is_long) ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc, rl_result.reg.GetReg(),
-          rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), (is_min) ? kArmCondLt : kArmCondGt);
-  (is_long) ?  StoreValueWide(rl_dest, rl_result) :StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
-  RegLocation rl_src_address = info->args[0];  // long address
-  RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);
-  RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-
-  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
-  if (size == k64) {
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
-    StoreValue(rl_dest, rl_result);
-  }
-  return true;
-}
-
-bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
-  RegLocation rl_src_address = info->args[0];  // long address
-  RegLocation rl_src_value = info->args[2];  // [size] value
-  RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
-
-  RegLocation rl_value;
-  if (size == k64) {
-    rl_value = LoadValueWide(rl_src_value, kCoreReg);
-  } else {
-    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
-    rl_value = LoadValue(rl_src_value, kCoreReg);
-  }
-  StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
-  return true;
-}
-
-bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
-  DCHECK_EQ(cu_->instruction_set, kArm64);
-  // Unused - RegLocation rl_src_unsafe = info->args[0];
-  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
-  RegLocation rl_src_offset = info->args[2];  // long low
-  RegLocation rl_src_expected = info->args[4];  // int, long or Object
-  // If is_long, high half is in info->args[5]
-  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
-  // If is_long, high half is in info->args[7]
-  RegLocation rl_dest = InlineTarget(info);  // boolean place for result
-
-  // Load Object and offset
-  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
-  RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
-
-  RegLocation rl_new_value;
-  RegLocation rl_expected;
-  if (is_long) {
-    rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
-    rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
-  } else {
-    rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg);
-    rl_expected = LoadValue(rl_src_expected, is_object ? kRefReg : kCoreReg);
-  }
-
-  if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
-    // Mark card for object assuming new value is stored.
-    MarkGCCard(0, rl_new_value.reg, rl_object.reg);
-  }
-
-  RegStorage r_ptr = AllocTempRef();
-  OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
-
-  // Free now unneeded rl_object and rl_offset to give more temps.
-  ClobberSReg(rl_object.s_reg_low);
-  FreeTemp(rl_object.reg);
-  ClobberSReg(rl_offset.s_reg_low);
-  FreeTemp(rl_offset.reg);
-
-  // do {
-  //   tmp = [r_ptr] - expected;
-  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
-  // result = tmp != 0;
-
-  RegStorage r_tmp;
-  RegStorage r_tmp_stored;
-  RegStorage rl_new_value_stored = rl_new_value.reg;
-  A64Opcode wide = UNWIDE(0);
-  if (is_long) {
-    r_tmp_stored = r_tmp = AllocTempWide();
-    wide = WIDE(0);
-  } else if (is_object) {
-    // References use 64-bit registers, but are stored as compressed 32-bit values.
-    // This means r_tmp_stored != r_tmp.
-    r_tmp = AllocTempRef();
-    r_tmp_stored = As32BitReg(r_tmp);
-    rl_new_value_stored = As32BitReg(rl_new_value_stored);
-  } else {
-    r_tmp_stored = r_tmp = AllocTemp();
-  }
-
-  RegStorage r_tmp32 = (r_tmp.Is32Bit()) ? r_tmp : As32BitReg(r_tmp);
-  LIR* loop = NewLIR0(kPseudoTargetLabel);
-  NewLIR2(kA64Ldaxr2rX | wide, r_tmp_stored.GetReg(), r_ptr.GetReg());
-  OpRegReg(kOpCmp, r_tmp, rl_expected.reg);
-  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
-  LIR* early_exit = OpCondBranch(kCondNe, nullptr);
-  NewLIR3(kA64Stlxr3wrX | wide, r_tmp32.GetReg(), rl_new_value_stored.GetReg(), r_ptr.GetReg());
-  NewLIR3(kA64Cmp3RdT, r_tmp32.GetReg(), 0, ENCODE_NO_SHIFT);
-  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
-  OpCondBranch(kCondNe, loop);
-
-  LIR* exit_loop = NewLIR0(kPseudoTargetLabel);
-  early_exit->target = exit_loop;
-
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe);
-
-  FreeTemp(r_tmp);  // Now unneeded.
-  FreeTemp(r_ptr);  // Now unneeded.
-
-  StoreValue(rl_dest, rl_result);
-
-  return true;
-}
-
-bool Arm64Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) {
-  constexpr int kLargeArrayThreshold = 512;
-
-  RegLocation rl_src = info->args[0];
-  RegLocation rl_src_pos = info->args[1];
-  RegLocation rl_dst = info->args[2];
-  RegLocation rl_dst_pos = info->args[3];
-  RegLocation rl_length = info->args[4];
-  // Compile time check, handle exception by non-inline method to reduce related meta-data.
-  if ((rl_src_pos.is_const && (mir_graph_->ConstantValue(rl_src_pos) < 0)) ||
-      (rl_dst_pos.is_const && (mir_graph_->ConstantValue(rl_dst_pos) < 0)) ||
-      (rl_length.is_const && (mir_graph_->ConstantValue(rl_length) < 0))) {
-    return false;
-  }
-
-  ClobberCallerSave();
-  LockCallTemps();  // Prepare for explicit register usage.
-  RegStorage rs_src = rs_x0;
-  RegStorage rs_dst = rs_x1;
-  LoadValueDirectFixed(rl_src, rs_src);
-  LoadValueDirectFixed(rl_dst, rs_dst);
-
-  // Handle null pointer exception in slow-path.
-  LIR* src_check_branch = OpCmpImmBranch(kCondEq, rs_src, 0, nullptr);
-  LIR* dst_check_branch = OpCmpImmBranch(kCondEq, rs_dst, 0, nullptr);
-  // Handle potential overlapping in slow-path.
-  // TUNING: Support overlapping cases.
-  LIR* src_dst_same = OpCmpBranch(kCondEq, rs_src, rs_dst, nullptr);
-  // Handle exception or big length in slow-path.
-  RegStorage rs_length = rs_w2;
-  LoadValueDirectFixed(rl_length, rs_length);
-  LIR* len_neg_or_too_big = OpCmpImmBranch(kCondHi, rs_length, kLargeArrayThreshold, nullptr);
-  // Src bounds check.
-  RegStorage rs_src_pos = rs_w3;
-  RegStorage rs_arr_length = rs_w4;
-  LoadValueDirectFixed(rl_src_pos, rs_src_pos);
-  LIR* src_pos_negative = OpCmpImmBranch(kCondLt, rs_src_pos, 0, nullptr);
-  Load32Disp(rs_src, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
-  OpRegReg(kOpSub, rs_arr_length, rs_src_pos);
-  LIR* src_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
-  // Dst bounds check.
-  RegStorage rs_dst_pos = rs_w5;
-  LoadValueDirectFixed(rl_dst_pos, rs_dst_pos);
-  LIR* dst_pos_negative = OpCmpImmBranch(kCondLt, rs_dst_pos, 0, nullptr);
-  Load32Disp(rs_dst, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
-  OpRegReg(kOpSub, rs_arr_length, rs_dst_pos);
-  LIR* dst_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
-
-  // Everything is checked now.
-  // Set rs_src to the address of the first element to be copied.
-  rs_src_pos = As64BitReg(rs_src_pos);
-  OpRegImm(kOpAdd, rs_src, mirror::Array::DataOffset(2).Int32Value());
-  OpRegRegImm(kOpLsl, rs_src_pos, rs_src_pos, 1);
-  OpRegReg(kOpAdd, rs_src, rs_src_pos);
-  // Set rs_src to the address of the first element to be copied.
-  rs_dst_pos = As64BitReg(rs_dst_pos);
-  OpRegImm(kOpAdd, rs_dst, mirror::Array::DataOffset(2).Int32Value());
-  OpRegRegImm(kOpLsl, rs_dst_pos, rs_dst_pos, 1);
-  OpRegReg(kOpAdd, rs_dst, rs_dst_pos);
-
-  // rs_arr_length won't be not used anymore.
-  RegStorage rs_tmp = rs_arr_length;
-  // Use 64-bit view since rs_length will be used as index.
-  rs_length = As64BitReg(rs_length);
-  OpRegRegImm(kOpLsl, rs_length, rs_length, 1);
-
-  // Copy one element.
-  LIR* jmp_to_copy_two = NewLIR3(WIDE(kA64Tbz3rht), rs_length.GetReg(), 1, 0);
-  OpRegImm(kOpSub, rs_length, 2);
-  LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf);
-  StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf);
-
-  // Copy two elements.
-  LIR *copy_two = NewLIR0(kPseudoTargetLabel);
-  LIR* jmp_to_copy_four = NewLIR3(WIDE(kA64Tbz3rht), rs_length.GetReg(), 2, 0);
-  OpRegImm(kOpSub, rs_length, 4);
-  LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32);
-  StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32);
-
-  // Copy four elements.
-  LIR *copy_four = NewLIR0(kPseudoTargetLabel);
-  LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_length, 0, nullptr);
-  LIR *begin_loop = NewLIR0(kPseudoTargetLabel);
-  OpRegImm(kOpSub, rs_length, 8);
-  rs_tmp = As64BitReg(rs_tmp);
-  LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k64);
-  StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k64);
-  LIR* jmp_to_loop = OpCmpImmBranch(kCondNe, rs_length, 0, nullptr);
-  LIR* loop_finished = OpUnconditionalBranch(nullptr);
-
-  LIR *check_failed = NewLIR0(kPseudoTargetLabel);
-  LIR* launchpad_branch = OpUnconditionalBranch(nullptr);
-  LIR* return_point = NewLIR0(kPseudoTargetLabel);
-
-  src_check_branch->target = check_failed;
-  dst_check_branch->target = check_failed;
-  src_dst_same->target = check_failed;
-  len_neg_or_too_big->target = check_failed;
-  src_pos_negative->target = check_failed;
-  src_bad_len->target = check_failed;
-  dst_pos_negative->target = check_failed;
-  dst_bad_len->target = check_failed;
-  jmp_to_copy_two->target = copy_two;
-  jmp_to_copy_four->target = copy_four;
-  jmp_to_ret->target = return_point;
-  jmp_to_loop->target = begin_loop;
-  loop_finished->target = return_point;
-
-  AddIntrinsicSlowPath(info, launchpad_branch, return_point);
-  ClobberCallerSave();  // We must clobber everything because slow path will return here
-
-  return true;
-}
-
-void Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  LIR* lir = NewLIR2(kA64Ldr2rp, As32BitReg(reg).GetReg(), 0);
-  lir->target = target;
-}
-
-bool Arm64Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
-  return dex_cache_arrays_layout_.Valid();
-}
-
-void Arm64Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest,
-                                            bool wide) {
-  LIR* adrp = NewLIR2(kA64Adrp2xd, r_dest.GetReg(), 0);
-  adrp->operands[2] = WrapPointer(dex_file);
-  adrp->operands[3] = offset;
-  adrp->operands[4] = WrapPointer(adrp);
-  dex_cache_access_insns_.push_back(adrp);
-  if (wide) {
-    DCHECK(r_dest.Is64Bit());
-  }
-  LIR* ldr = LoadBaseDisp(r_dest, 0, r_dest, wide ? k64 : kReference, kNotVolatile);
-  ldr->operands[4] = adrp->operands[4];
-  ldr->flags.fixup = kFixupLabel;
-  dex_cache_access_insns_.push_back(ldr);
-}
-
-LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base ATTRIBUTE_UNUSED, int count ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpVldm for Arm64";
-  UNREACHABLE();
-}
-
-LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base ATTRIBUTE_UNUSED, int count ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpVstm for Arm64";
-  UNREACHABLE();
-}
-
-void Arm64Mir2Lir::GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                                  RegLocation rl_src3, bool is_sub) {
-  rl_src1 = LoadValue(rl_src1, kCoreReg);
-  rl_src2 = LoadValue(rl_src2, kCoreReg);
-  rl_src3 = LoadValue(rl_src3, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  NewLIR4(is_sub ? kA64Msub4rrrr : kA64Madd4rrrr, rl_result.reg.GetReg(), rl_src1.reg.GetReg(),
-          rl_src2.reg.GetReg(), rl_src3.reg.GetReg());
-  StoreValue(rl_dest, rl_result);
-}
-
-void Arm64Mir2Lir::GenMaddMsubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                                   RegLocation rl_src3, bool is_sub) {
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  rl_src3 = LoadValueWide(rl_src3, kCoreReg);
-  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  NewLIR4(is_sub ? WIDE(kA64Msub4rrrr) : WIDE(kA64Madd4rrrr), rl_result.reg.GetReg(),
-          rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), rl_src3.reg.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
-                                                 RegLocation rl_result, int lit ATTRIBUTE_UNUSED,
-                                                 int first_bit, int second_bit) {
-  OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg,
-                   EncodeShift(kA64Lsl, second_bit - first_bit));
-  if (first_bit != 0) {
-    OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
-  }
-}
-
-void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of GenDivZero for Arm64";
-}
-
-// Test suspend flag, return target of taken suspend branch
-LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) {
-  RegStorage r_tmp = AllocTemp();
-  LoadBaseDisp(rs_xSELF, Thread::ThreadFlagsOffset<kArm64PointerSize>().Int32Value(), r_tmp,
-      kUnsignedHalf, kNotVolatile);
-  LIR* cmp_branch = OpCmpImmBranch(target == nullptr ? kCondNe: kCondEq, r_tmp, 0, target);
-  FreeTemp(r_tmp);
-  return cmp_branch;
-}
-
-// Decrement register and branch on condition
-LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
-  // Combine sub & test using sub setflags encoding here.  We need to make sure a
-  // subtract form that sets carry is used, so generate explicitly.
-  // TODO: might be best to add a new op, kOpSubs, and handle it generically.
-  A64Opcode opcode = reg.Is64Bit() ? WIDE(kA64Subs3rRd) : UNWIDE(kA64Subs3rRd);
-  NewLIR3(opcode, reg.GetReg(), reg.GetReg(), 1);  // For value == 1, this should set flags.
-  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
-  return OpCondBranch(c_code, target);
-}
-
-bool Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
-  if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
-    return false;
-  }
-  // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
-  LIR* barrier = last_lir_insn_;
-
-  int dmb_flavor;
-  // TODO: revisit Arm barrier kinds
-  switch (barrier_kind) {
-    case kAnyStore: dmb_flavor = kISH; break;
-    case kLoadAny: dmb_flavor = kISH; break;
-        // We conjecture that kISHLD is insufficient.  It is documented
-        // to provide LoadLoad | StoreStore ordering.  But if this were used
-        // to implement volatile loads, we suspect that the lack of store
-        // atomicity on ARM would cause us to allow incorrect results for
-        // the canonical IRIW example.  But we're not sure.
-        // We should be using acquire loads instead.
-    case kStoreStore: dmb_flavor = kISHST; break;
-    case kAnyAny: dmb_flavor = kISH; break;
-    default:
-      LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
-      dmb_flavor = kSY;  // quiet gcc.
-      break;
-  }
-
-  bool ret = false;
-
-  // If the same barrier already exists, don't generate another.
-  if (barrier == nullptr
-      || (barrier->opcode != kA64Dmb1B || barrier->operands[0] != dmb_flavor)) {
-    barrier = NewLIR1(kA64Dmb1B, dmb_flavor);
-    ret = true;
-  }
-
-  // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
-  DCHECK(!barrier->flags.use_def_invalid);
-  barrier->u.m.def_mask = &kEncodeAll;
-  return ret;
-}
-
-void Arm64Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation rl_result;
-
-  rl_src = LoadValue(rl_src, kCoreReg);
-  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), As64BitReg(rl_src.reg).GetReg(), 0, 31);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void Arm64Mir2Lir::GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest,
-                                 RegLocation rl_src1, RegLocation rl_src2, bool is_div, int flags) {
-  if (rl_src2.is_const) {
-    DCHECK(rl_src2.wide);
-    int64_t lit = mir_graph_->ConstantValueWide(rl_src2);
-    if (HandleEasyDivRem64(opcode, is_div, rl_src1, rl_dest, lit)) {
-      return;
-    }
-  }
-
-  RegLocation rl_result;
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
-    GenDivZeroCheck(rl_src2.reg);
-  }
-  rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, is_div);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void Arm64Mir2Lir::GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
-  RegLocation rl_result;
-
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  OpRegRegRegShift(op, rl_result.reg, rl_src1.reg, rl_src2.reg, ENCODE_NO_SHIFT);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation rl_result;
-
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  OpRegRegShift(kOpNeg, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void Arm64Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation rl_result;
-
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  OpRegRegShift(kOpMvn, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void Arm64Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                  RegLocation rl_src1, RegLocation rl_src2, int flags) {
-  switch (opcode) {
-    case Instruction::NOT_LONG:
-      GenNotLong(rl_dest, rl_src2);
-      return;
-    case Instruction::ADD_LONG:
-    case Instruction::ADD_LONG_2ADDR:
-      GenLongOp(kOpAdd, rl_dest, rl_src1, rl_src2);
-      return;
-    case Instruction::SUB_LONG:
-    case Instruction::SUB_LONG_2ADDR:
-      GenLongOp(kOpSub, rl_dest, rl_src1, rl_src2);
-      return;
-    case Instruction::MUL_LONG:
-    case Instruction::MUL_LONG_2ADDR:
-      GenLongOp(kOpMul, rl_dest, rl_src1, rl_src2);
-      return;
-    case Instruction::DIV_LONG:
-    case Instruction::DIV_LONG_2ADDR:
-      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true, flags);
-      return;
-    case Instruction::REM_LONG:
-    case Instruction::REM_LONG_2ADDR:
-      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false, flags);
-      return;
-    case Instruction::AND_LONG_2ADDR:
-    case Instruction::AND_LONG:
-      GenLongOp(kOpAnd, rl_dest, rl_src1, rl_src2);
-      return;
-    case Instruction::OR_LONG:
-    case Instruction::OR_LONG_2ADDR:
-      GenLongOp(kOpOr, rl_dest, rl_src1, rl_src2);
-      return;
-    case Instruction::XOR_LONG:
-    case Instruction::XOR_LONG_2ADDR:
-      GenLongOp(kOpXor, rl_dest, rl_src1, rl_src2);
-      return;
-    case Instruction::NEG_LONG: {
-      GenNegLong(rl_dest, rl_src2);
-      return;
-    }
-    default:
-      LOG(FATAL) << "Invalid long arith op";
-      return;
-  }
-}
-
-/*
- * Generate array load
- */
-void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_dest, int scale) {
-  RegisterClass reg_class = RegClassBySize(size);
-  int len_offset = mirror::Array::LengthOffset().Int32Value();
-  int data_offset;
-  RegLocation rl_result;
-  bool constant_index = rl_index.is_const;
-  rl_array = LoadValue(rl_array, kRefReg);
-  if (!constant_index) {
-    rl_index = LoadValue(rl_index, kCoreReg);
-  }
-
-  if (rl_dest.wide) {
-    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
-  } else {
-    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
-  }
-
-  /* null object? */
-  GenNullCheck(rl_array.reg, opt_flags);
-
-  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
-  RegStorage reg_len;
-  if (needs_range_check) {
-    reg_len = AllocTemp();
-    /* Get len */
-    Load32Disp(rl_array.reg, len_offset, reg_len);
-    MarkPossibleNullPointerException(opt_flags);
-  } else {
-    ForceImplicitNullCheck(rl_array.reg, opt_flags);
-  }
-  if (constant_index) {
-    rl_result = EvalLoc(rl_dest, reg_class, true);
-
-    if (needs_range_check) {
-      GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
-      FreeTemp(reg_len);
-    }
-    // Fold the constant index into the data offset.
-    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
-    if (rl_result.ref) {
-      LoadRefDisp(rl_array.reg, data_offset, rl_result.reg, kNotVolatile);
-    } else {
-      LoadBaseDisp(rl_array.reg, data_offset, rl_result.reg, size, kNotVolatile);
-    }
-  } else {
-    // Offset base, then use indexed load.
-    RegStorage reg_ptr = AllocTempRef();
-    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
-    FreeTemp(rl_array.reg);
-    rl_result = EvalLoc(rl_dest, reg_class, true);
-
-    if (needs_range_check) {
-      GenArrayBoundsCheck(rl_index.reg, reg_len);
-      FreeTemp(reg_len);
-    }
-    if (rl_result.ref) {
-      LoadRefIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale);
-    } else {
-      LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size);
-    }
-    FreeTemp(reg_ptr);
-  }
-  if (rl_dest.wide) {
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    StoreValue(rl_dest, rl_result);
-  }
-}
-
-/*
- * Generate array store
- *
- */
-void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
-  RegisterClass reg_class = RegClassBySize(size);
-  int len_offset = mirror::Array::LengthOffset().Int32Value();
-  bool constant_index = rl_index.is_const;
-
-  int data_offset;
-  if (size == k64 || size == kDouble) {
-    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
-  } else {
-    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
-  }
-
-  rl_array = LoadValue(rl_array, kRefReg);
-  if (!constant_index) {
-    rl_index = LoadValue(rl_index, kCoreReg);
-  }
-
-  RegStorage reg_ptr;
-  bool allocated_reg_ptr_temp = false;
-  if (constant_index) {
-    reg_ptr = rl_array.reg;
-  } else if (IsTemp(rl_array.reg) && !card_mark) {
-    Clobber(rl_array.reg);
-    reg_ptr = rl_array.reg;
-  } else {
-    allocated_reg_ptr_temp = true;
-    reg_ptr = AllocTempRef();
-  }
-
-  /* null object? */
-  GenNullCheck(rl_array.reg, opt_flags);
-
-  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
-  RegStorage reg_len;
-  if (needs_range_check) {
-    reg_len = AllocTemp();
-    // NOTE: max live temps(4) here.
-    /* Get len */
-    Load32Disp(rl_array.reg, len_offset, reg_len);
-    MarkPossibleNullPointerException(opt_flags);
-  } else {
-    ForceImplicitNullCheck(rl_array.reg, opt_flags);
-  }
-  /* at this point, reg_ptr points to array, 2 live temps */
-  if (rl_src.wide) {
-    rl_src = LoadValueWide(rl_src, reg_class);
-  } else {
-    rl_src = LoadValue(rl_src, reg_class);
-  }
-  if (constant_index) {
-    if (needs_range_check) {
-      GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
-      FreeTemp(reg_len);
-    }
-    // Fold the constant index into the data offset.
-    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
-    if (rl_src.ref) {
-      StoreRefDisp(reg_ptr, data_offset, rl_src.reg, kNotVolatile);
-    } else {
-      StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size, kNotVolatile);
-    }
-  } else {
-    /* reg_ptr -> array data */
-    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
-    if (needs_range_check) {
-      GenArrayBoundsCheck(rl_index.reg, reg_len);
-      FreeTemp(reg_len);
-    }
-    if (rl_src.ref) {
-      StoreRefIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale);
-    } else {
-      StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
-    }
-  }
-  if (allocated_reg_ptr_temp) {
-    FreeTemp(reg_ptr);
-  }
-  if (card_mark) {
-    MarkGCCard(opt_flags, rl_src.reg, rl_array.reg);
-  }
-}
-
-void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
-                                     RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift,
-                                     int flags ATTRIBUTE_UNUSED) {
-  OpKind op = kOpBkpt;
-  // Per spec, we only care about low 6 bits of shift amount.
-  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  if (shift_amount == 0) {
-    StoreValueWide(rl_dest, rl_src);
-    return;
-  }
-
-  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  switch (opcode) {
-    case Instruction::SHL_LONG:
-    case Instruction::SHL_LONG_2ADDR:
-      op = kOpLsl;
-      break;
-    case Instruction::SHR_LONG:
-    case Instruction::SHR_LONG_2ADDR:
-      op = kOpAsr;
-      break;
-    case Instruction::USHR_LONG:
-    case Instruction::USHR_LONG_2ADDR:
-      op = kOpLsr;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected case";
-  }
-  OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                     RegLocation rl_src1, RegLocation rl_src2, int flags) {
-  OpKind op = kOpBkpt;
-  switch (opcode) {
-    case Instruction::ADD_LONG:
-    case Instruction::ADD_LONG_2ADDR:
-      op = kOpAdd;
-      break;
-    case Instruction::SUB_LONG:
-    case Instruction::SUB_LONG_2ADDR:
-      op = kOpSub;
-      break;
-    case Instruction::AND_LONG:
-    case Instruction::AND_LONG_2ADDR:
-      op = kOpAnd;
-      break;
-    case Instruction::OR_LONG:
-    case Instruction::OR_LONG_2ADDR:
-      op = kOpOr;
-      break;
-    case Instruction::XOR_LONG:
-    case Instruction::XOR_LONG_2ADDR:
-      op = kOpXor;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected opcode";
-  }
-
-  if (op == kOpSub) {
-    if (!rl_src2.is_const) {
-      return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
-    }
-  } else {
-    // Associativity.
-    if (!rl_src2.is_const) {
-      DCHECK(rl_src1.is_const);
-      std::swap(rl_src1, rl_src2);
-    }
-  }
-  DCHECK(rl_src2.is_const);
-  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
-
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  OpRegRegImm64(op, rl_result.reg, rl_src1.reg, val);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-static uint32_t ExtractReg(uint32_t reg_mask, int* reg) {
-  // Find first register.
-  int first_bit_set = CTZ(reg_mask) + 1;
-  *reg = *reg + first_bit_set;
-  reg_mask >>= first_bit_set;
-  return reg_mask;
-}
-
-/**
- * @brief Split a register list in pairs or registers.
- *
- * Given a list of registers in @p reg_mask, split the list in pairs. Use as follows:
- * @code
- *   int reg1 = -1, reg2 = -1;
- *   while (reg_mask) {
- *     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
- *     if (UNLIKELY(reg2 < 0)) {
- *       // Single register in reg1.
- *     } else {
- *       // Pair in reg1, reg2.
- *     }
- *   }
- * @endcode
- */
-static uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) {
-  // Find first register.
-  int first_bit_set = CTZ(reg_mask) + 1;
-  int reg = *reg1 + first_bit_set;
-  reg_mask >>= first_bit_set;
-
-  if (LIKELY(reg_mask)) {
-    // Save the first register, find the second and use the pair opcode.
-    int second_bit_set = CTZ(reg_mask) + 1;
-    *reg2 = reg;
-    reg_mask >>= second_bit_set;
-    *reg1 = reg + second_bit_set;
-    return reg_mask;
-  }
-
-  // Use the single opcode, as we just have one register.
-  *reg1 = reg;
-  *reg2 = -1;
-  return reg_mask;
-}
-
-static dwarf::Reg DwarfCoreReg(int num) {
-  return dwarf::Reg::Arm64Core(num);
-}
-
-static dwarf::Reg DwarfFpReg(int num) {
-  return dwarf::Reg::Arm64Fp(num);
-}
-
-static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
-  int reg1 = -1, reg2 = -1;
-  const int reg_log2_size = 3;
-
-  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
-    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
-    if (UNLIKELY(reg2 < 0)) {
-      m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
-      m2l->cfi().RelOffset(DwarfCoreReg(reg1), offset << reg_log2_size);
-    } else {
-      m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
-                   RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
-      m2l->cfi().RelOffset(DwarfCoreReg(reg2), offset << reg_log2_size);
-      m2l->cfi().RelOffset(DwarfCoreReg(reg1), (offset + 1) << reg_log2_size);
-    }
-  }
-}
-
-// TODO(Arm64): consider using ld1 and st1?
-static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
-  int reg1 = -1, reg2 = -1;
-  const int reg_log2_size = 3;
-
-  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
-    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
-    if (UNLIKELY(reg2 < 0)) {
-      m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
-                   offset);
-      m2l->cfi().RelOffset(DwarfFpReg(reg1), offset << reg_log2_size);
-    } else {
-      m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
-                   RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
-      m2l->cfi().RelOffset(DwarfFpReg(reg2), offset << reg_log2_size);
-      m2l->cfi().RelOffset(DwarfFpReg(reg1), (offset + 1) << reg_log2_size);
-    }
-  }
-}
-
-static int SpillRegsPreSub(Arm64Mir2Lir* m2l, uint32_t core_reg_mask, uint32_t fp_reg_mask,
-                           int frame_size) {
-  m2l->OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size);
-  m2l->cfi().AdjustCFAOffset(frame_size);
-
-  int core_count = POPCOUNT(core_reg_mask);
-
-  if (fp_reg_mask != 0) {
-    // Spill FP regs.
-    int fp_count = POPCOUNT(fp_reg_mask);
-    int spill_offset = frame_size - (core_count + fp_count) * kArm64PointerSize;
-    SpillFPRegs(m2l, rs_sp, spill_offset, fp_reg_mask);
-  }
-
-  if (core_reg_mask != 0) {
-    // Spill core regs.
-    int spill_offset = frame_size - (core_count * kArm64PointerSize);
-    SpillCoreRegs(m2l, rs_sp, spill_offset, core_reg_mask);
-  }
-
-  return frame_size;
-}
-
-static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core_reg_mask,
-                               uint32_t fp_reg_mask) {
-  // Otherwise, spill both core and fp regs at the same time.
-  // The very first instruction will be an stp with pre-indexed address, moving the stack pointer
-  // down. From then on, we fill upwards. This will generate overall the same number of instructions
-  // as the specialized code above in most cases (exception being odd number of core and even
-  // non-zero fp spills), but is more flexible, as the offsets are guaranteed small.
-  //
-  // Some demonstrative fill cases : (c) = core, (f) = fp
-  // cc    44   cc    44   cc    22   cc    33   fc => 1[1/2]
-  // fc => 23   fc => 23   ff => 11   ff => 22
-  // ff    11    f    11               f    11
-  //
-  int reg1 = -1, reg2 = -1;
-  int core_count = POPCOUNT(core_reg_mask);
-  int fp_count = POPCOUNT(fp_reg_mask);
-
-  int combined = fp_count + core_count;
-  int all_offset = RoundUp(combined, 2);  // Needs to be 16B = 2-reg aligned.
-
-  int cur_offset = 2;  // What's the starting offset after the first stp? We expect the base slot
-                       // to be filled.
-
-  // First figure out whether the bottom is FP or core.
-  if (fp_count > 0) {
-    // Some FP spills.
-    //
-    // Four cases: (d0 is dummy to fill up stp)
-    // 1) Single FP, even number of core -> stp d0, fp_reg
-    // 2) Single FP, odd number of core -> stp fp_reg, d0
-    // 3) More FP, even number combined -> stp fp_reg1, fp_reg2
-    // 4) More FP, odd number combined -> stp d0, fp_reg
-    if (fp_count == 1) {
-      fp_reg_mask = ExtractReg(fp_reg_mask, &reg1);
-      DCHECK_EQ(fp_reg_mask, 0U);
-      if (core_count % 2 == 0) {
-        m2l->NewLIR4(WIDE(kA64StpPre4ffXD),
-                     RegStorage::FloatSolo64(reg1).GetReg(),
-                     RegStorage::FloatSolo64(reg1).GetReg(),
-                     base.GetReg(), -all_offset);
-        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
-        m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
-      } else {
-        m2l->NewLIR4(WIDE(kA64StpPre4ffXD),
-                     RegStorage::FloatSolo64(reg1).GetReg(),
-                     RegStorage::FloatSolo64(reg1).GetReg(),
-                     base.GetReg(), -all_offset);
-        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
-        m2l->cfi().RelOffset(DwarfFpReg(reg1), 0);
-        cur_offset = 0;  // That core reg needs to go into the upper half.
-      }
-    } else {
-      if (combined % 2 == 0) {
-        fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
-        m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
-                     RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset);
-        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
-        m2l->cfi().RelOffset(DwarfFpReg(reg2), 0);
-        m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
-      } else {
-        fp_reg_mask = ExtractReg(fp_reg_mask, &reg1);
-        m2l->NewLIR4(WIDE(kA64StpPre4ffXD), rs_d0.GetReg(), RegStorage::FloatSolo64(reg1).GetReg(),
-                     base.GetReg(), -all_offset);
-        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
-        m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
-      }
-    }
-  } else {
-    // No FP spills.
-    //
-    // Two cases:
-    // 1) Even number of core -> stp core1, core2
-    // 2) Odd number of core -> stp xzr, core1
-    if (core_count % 2 == 1) {
-      core_reg_mask = ExtractReg(core_reg_mask, &reg1);
-      m2l->NewLIR4(WIDE(kA64StpPre4rrXD), rs_xzr.GetReg(),
-                   RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
-      m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
-      m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize);
-    } else {
-      core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
-      m2l->NewLIR4(WIDE(kA64StpPre4rrXD), RegStorage::Solo64(reg2).GetReg(),
-                   RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
-      m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
-      m2l->cfi().RelOffset(DwarfCoreReg(reg2), 0);
-      m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize);
-    }
-  }
-  DCHECK_EQ(m2l->cfi().GetCurrentCFAOffset(),
-            static_cast<int>(all_offset * kArm64PointerSize));
-
-  if (fp_count != 0) {
-    for (; fp_reg_mask != 0;) {
-      // Have some FP regs to do.
-      fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
-      if (UNLIKELY(reg2 < 0)) {
-        m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
-                     cur_offset);
-        m2l->cfi().RelOffset(DwarfFpReg(reg1), cur_offset * kArm64PointerSize);
-        // Do not increment offset here, as the second half will be filled by a core reg.
-      } else {
-        m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
-                     RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset);
-        m2l->cfi().RelOffset(DwarfFpReg(reg2), cur_offset * kArm64PointerSize);
-        m2l->cfi().RelOffset(DwarfFpReg(reg1), (cur_offset + 1) * kArm64PointerSize);
-        cur_offset += 2;
-      }
-    }
-
-    // Reset counting.
-    reg1 = -1;
-
-    // If there is an odd number of core registers, we need to store the bottom now.
-    if (core_count % 2 == 1) {
-      core_reg_mask = ExtractReg(core_reg_mask, &reg1);
-      m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(),
-                   cur_offset + 1);
-      m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize);
-      cur_offset += 2;  // Half-slot filled now.
-    }
-  }
-
-  // Spill the rest of the core regs. They are guaranteed to be even.
-  DCHECK_EQ(POPCOUNT(core_reg_mask) % 2, 0);
-  for (; core_reg_mask != 0; cur_offset += 2) {
-    core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
-    m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
-                 RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset);
-    m2l->cfi().RelOffset(DwarfCoreReg(reg2), cur_offset * kArm64PointerSize);
-    m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize);
-  }
-
-  DCHECK_EQ(cur_offset, all_offset);
-
-  return all_offset * 8;
-}
-
-int Arm64Mir2Lir::SpillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask,
-                            int frame_size) {
-  // If the frame size is small enough that all offsets would fit into the immediates, use that
-  // setup, as it decrements sp early (kind of instruction scheduling), and is not worse
-  // instruction-count wise than the complicated code below.
-  //
-  // This case is also optimal when we have an odd number of core spills, and an even (non-zero)
-  // number of fp spills.
-  if ((RoundUp(frame_size, 8) / 8 <= 63)) {
-    return SpillRegsPreSub(this, core_reg_mask, fp_reg_mask, frame_size);
-  } else {
-    return SpillRegsPreIndexed(this, base, core_reg_mask, fp_reg_mask);
-  }
-}
-
-static void UnSpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
-  int reg1 = -1, reg2 = -1;
-  const int reg_log2_size = 3;
-
-  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
-    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
-    if (UNLIKELY(reg2 < 0)) {
-      m2l->NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
-      m2l->cfi().Restore(DwarfCoreReg(reg1));
-    } else {
-      DCHECK_LE(offset, 63);
-      m2l->NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
-                   RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
-      m2l->cfi().Restore(DwarfCoreReg(reg2));
-      m2l->cfi().Restore(DwarfCoreReg(reg1));
-    }
-  }
-}
-
-static void UnSpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
-  int reg1 = -1, reg2 = -1;
-  const int reg_log2_size = 3;
-
-  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
-     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
-    if (UNLIKELY(reg2 < 0)) {
-      m2l->NewLIR3(WIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
-                   offset);
-      m2l->cfi().Restore(DwarfFpReg(reg1));
-    } else {
-      m2l->NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
-                   RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
-      m2l->cfi().Restore(DwarfFpReg(reg2));
-      m2l->cfi().Restore(DwarfFpReg(reg1));
-    }
-  }
-}
-
-void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask,
-                               int frame_size) {
-  DCHECK_EQ(base, rs_sp);
-  // Restore saves and drop stack frame.
-  // 2 versions:
-  //
-  // 1. (Original): Try to address directly, then drop the whole frame.
-  //                Limitation: ldp is a 7b signed immediate.
-  //
-  // 2. (New): Drop the non-save-part. Then do similar to original, which is now guaranteed to be
-  //           in range. Then drop the rest.
-  //
-  // TODO: In methods with few spills but huge frame, it would be better to do non-immediate loads
-  //       in variant 1.
-
-  // "Magic" constant, 63 (max signed 7b) * 8.
-  static constexpr int kMaxFramesizeForOffset = 63 * kArm64PointerSize;
-
-  const int num_core_spills = POPCOUNT(core_reg_mask);
-  const int num_fp_spills = POPCOUNT(fp_reg_mask);
-
-  int early_drop = 0;
-
-  if (frame_size > kMaxFramesizeForOffset) {
-    // Second variant. Drop the frame part.
-
-    // TODO: Always use the first formula, as num_fp_spills would be zero?
-    if (fp_reg_mask != 0) {
-      early_drop = frame_size - kArm64PointerSize * (num_fp_spills + num_core_spills);
-    } else {
-      early_drop = frame_size - kArm64PointerSize * num_core_spills;
-    }
-
-    // Drop needs to be 16B aligned, so that SP keeps aligned.
-    early_drop = RoundDown(early_drop, 16);
-
-    OpRegImm64(kOpAdd, rs_sp, early_drop);
-    cfi_.AdjustCFAOffset(-early_drop);
-  }
-
-  // Unspill.
-  if (fp_reg_mask != 0) {
-    int offset = frame_size - early_drop - kArm64PointerSize * (num_fp_spills + num_core_spills);
-    UnSpillFPRegs(this, rs_sp, offset, fp_reg_mask);
-  }
-  if (core_reg_mask != 0) {
-    int offset = frame_size - early_drop - kArm64PointerSize * num_core_spills;
-    UnSpillCoreRegs(this, rs_sp, offset, core_reg_mask);
-  }
-
-  // Drop the (rest of) the frame.
-  int adjust = frame_size - early_drop;
-  OpRegImm64(kOpAdd, rs_sp, adjust);
-  cfi_.AdjustCFAOffset(-adjust);
-}
-
-bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
-  A64Opcode wide = IsWide(size) ? WIDE(0) : UNWIDE(0);
-  RegLocation rl_src_i = info->args[0];
-  RegLocation rl_dest = IsWide(size) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegLocation rl_i = IsWide(size) ?
-      LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg);
-  NewLIR2(kA64Rbit2rr | wide, rl_result.reg.GetReg(), rl_i.reg.GetReg());
-  IsWide(size) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
deleted file mode 100644
index 691bfd9..0000000
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ /dev/null
@@ -1,912 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "codegen_arm64.h"
-
-#include <inttypes.h>
-
-#include <string>
-#include <sstream>
-
-#include "backend_arm64.h"
-#include "base/logging.h"
-#include "dex/mir_graph.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "dex/reg_storage_eq.h"
-
-namespace art {
-
-static constexpr RegStorage core_regs_arr[] =
-    {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7,
-     rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15,
-     rs_w16, rs_w17, rs_w18, rs_w19, rs_w20, rs_w21, rs_w22, rs_w23,
-     rs_w24, rs_w25, rs_w26, rs_w27, rs_w28, rs_w29, rs_w30, rs_w31,
-     rs_wzr};
-static constexpr RegStorage core64_regs_arr[] =
-    {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
-     rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15,
-     rs_x16, rs_x17, rs_x18, rs_x19, rs_x20, rs_x21, rs_x22, rs_x23,
-     rs_x24, rs_x25, rs_x26, rs_x27, rs_x28, rs_x29, rs_x30, rs_x31,
-     rs_xzr};
-static constexpr RegStorage sp_regs_arr[] =
-    {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
-     rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15,
-     rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23,
-     rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31};
-static constexpr RegStorage dp_regs_arr[] =
-    {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7,
-     rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15,
-     rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
-     rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
-static constexpr RegStorage reserved_regs_arr[] = {rs_wSELF, rs_wsp, rs_wLR, rs_wzr};
-static constexpr RegStorage reserved64_regs_arr[] = {rs_xSELF, rs_sp, rs_xLR, rs_xzr};
-
-static constexpr RegStorage core_temps_arr[] =
-    {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7,
-     rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15, rs_w16,
-     rs_w17, rs_w18};
-static constexpr RegStorage core64_temps_arr[] =
-    {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
-     rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15, rs_x16,
-     rs_x17, rs_x18};
-static constexpr RegStorage sp_temps_arr[] =
-    {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
-     rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23,
-     rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31};
-static constexpr RegStorage dp_temps_arr[] =
-    {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7,
-     rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
-     rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
-
-static constexpr ArrayRef<const RegStorage> core_regs(core_regs_arr);
-static constexpr ArrayRef<const RegStorage> core64_regs(core64_regs_arr);
-static constexpr ArrayRef<const RegStorage> sp_regs(sp_regs_arr);
-static constexpr ArrayRef<const RegStorage> dp_regs(dp_regs_arr);
-static constexpr ArrayRef<const RegStorage> reserved_regs(reserved_regs_arr);
-static constexpr ArrayRef<const RegStorage> reserved64_regs(reserved64_regs_arr);
-static constexpr ArrayRef<const RegStorage> core_temps(core_temps_arr);
-static constexpr ArrayRef<const RegStorage> core64_temps(core64_temps_arr);
-static constexpr ArrayRef<const RegStorage> sp_temps(sp_temps_arr);
-static constexpr ArrayRef<const RegStorage> dp_temps(dp_temps_arr);
-
-RegLocation Arm64Mir2Lir::LocCReturn() {
-  return a64_loc_c_return;
-}
-
-RegLocation Arm64Mir2Lir::LocCReturnRef() {
-  return a64_loc_c_return_ref;
-}
-
-RegLocation Arm64Mir2Lir::LocCReturnWide() {
-  return a64_loc_c_return_wide;
-}
-
-RegLocation Arm64Mir2Lir::LocCReturnFloat() {
-  return a64_loc_c_return_float;
-}
-
-RegLocation Arm64Mir2Lir::LocCReturnDouble() {
-  return a64_loc_c_return_double;
-}
-
-// Return a target-dependent special register.
-RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) {
-  RegStorage res_reg = RegStorage::InvalidReg();
-  switch (reg) {
-    case kSelf: res_reg = rs_wSELF; break;
-    case kSuspend: res_reg = RegStorage::InvalidReg(); break;
-    case kLr: res_reg =  rs_wLR; break;
-    case kPc: res_reg = RegStorage::InvalidReg(); break;
-    case kSp: res_reg =  rs_wsp; break;
-    case kArg0: res_reg = rs_w0; break;
-    case kArg1: res_reg = rs_w1; break;
-    case kArg2: res_reg = rs_w2; break;
-    case kArg3: res_reg = rs_w3; break;
-    case kArg4: res_reg = rs_w4; break;
-    case kArg5: res_reg = rs_w5; break;
-    case kArg6: res_reg = rs_w6; break;
-    case kArg7: res_reg = rs_w7; break;
-    case kFArg0: res_reg = rs_f0; break;
-    case kFArg1: res_reg = rs_f1; break;
-    case kFArg2: res_reg = rs_f2; break;
-    case kFArg3: res_reg = rs_f3; break;
-    case kFArg4: res_reg = rs_f4; break;
-    case kFArg5: res_reg = rs_f5; break;
-    case kFArg6: res_reg = rs_f6; break;
-    case kFArg7: res_reg = rs_f7; break;
-    case kRet0: res_reg = rs_w0; break;
-    case kRet1: res_reg = rs_w1; break;
-    case kInvokeTgt: res_reg = rs_wLR; break;
-    case kHiddenArg: res_reg = rs_wIP1; break;
-    case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
-    case kCount: res_reg = RegStorage::InvalidReg(); break;
-    default: res_reg = RegStorage::InvalidReg();
-  }
-  return res_reg;
-}
-
-/*
- * Decode the register id. This routine makes assumptions on the encoding made by RegStorage.
- */
-ResourceMask Arm64Mir2Lir::GetRegMaskCommon(const RegStorage& reg) const {
-  // TODO(Arm64): this function depends too much on the internal RegStorage encoding. Refactor.
-
-  // Check if the shape mask is zero (i.e. invalid).
-  if (UNLIKELY(reg == rs_wzr || reg == rs_xzr)) {
-    // The zero register is not a true register. It is just an immediate zero.
-    return kEncodeNone;
-  }
-
-  return ResourceMask::Bit(
-      // FP register starts at bit position 32.
-      (reg.IsFloat() ? kA64FPReg0 : 0) + reg.GetRegNum());
-}
-
-ResourceMask Arm64Mir2Lir::GetPCUseDefEncoding() const {
-  // Note: On arm64, we are not able to set pc except branch instructions, which is regarded as a
-  //       kind of barrier. All other instructions only use pc, which has no dependency between any
-  //       of them. So it is fine to just return kEncodeNone here.
-  return kEncodeNone;
-}
-
-// Arm64 specific setup.  TODO: inline?:
-void Arm64Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags,
-                                            ResourceMask* use_mask, ResourceMask* def_mask) {
-  DCHECK_EQ(cu_->instruction_set, kArm64);
-  DCHECK(!lir->flags.use_def_invalid);
-
-  // Note: REG_USE_PC is ignored, the reason is the same with what we do in GetPCUseDefEncoding().
-  // These flags are somewhat uncommon - bypass if we can.
-  if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LR)) != 0) {
-    if (flags & REG_DEF_SP) {
-      def_mask->SetBit(kA64RegSP);
-    }
-
-    if (flags & REG_USE_SP) {
-      use_mask->SetBit(kA64RegSP);
-    }
-
-    if (flags & REG_DEF_LR) {
-      def_mask->SetBit(kA64RegLR);
-    }
-  }
-}
-
-ArmConditionCode Arm64Mir2Lir::ArmConditionEncoding(ConditionCode ccode) {
-  ArmConditionCode res;
-  switch (ccode) {
-    case kCondEq: res = kArmCondEq; break;
-    case kCondNe: res = kArmCondNe; break;
-    case kCondCs: res = kArmCondCs; break;
-    case kCondCc: res = kArmCondCc; break;
-    case kCondUlt: res = kArmCondCc; break;
-    case kCondUge: res = kArmCondCs; break;
-    case kCondMi: res = kArmCondMi; break;
-    case kCondPl: res = kArmCondPl; break;
-    case kCondVs: res = kArmCondVs; break;
-    case kCondVc: res = kArmCondVc; break;
-    case kCondHi: res = kArmCondHi; break;
-    case kCondLs: res = kArmCondLs; break;
-    case kCondGe: res = kArmCondGe; break;
-    case kCondLt: res = kArmCondLt; break;
-    case kCondGt: res = kArmCondGt; break;
-    case kCondLe: res = kArmCondLe; break;
-    case kCondAl: res = kArmCondAl; break;
-    case kCondNv: res = kArmCondNv; break;
-    default:
-      LOG(FATAL) << "Bad condition code " << ccode;
-      res = static_cast<ArmConditionCode>(0);  // Quiet gcc
-  }
-  return res;
-}
-
-static const char *shift_names[4] = {
-  "lsl",
-  "lsr",
-  "asr",
-  "ror"
-};
-
-static const char* extend_names[8] = {
-  "uxtb",
-  "uxth",
-  "uxtw",
-  "uxtx",
-  "sxtb",
-  "sxth",
-  "sxtw",
-  "sxtx",
-};
-
-/* Decode and print a register extension (e.g. ", uxtb #1") */
-static void DecodeRegExtendOrShift(int operand, char *buf, size_t buf_size) {
-  if ((operand & (1 << 6)) == 0) {
-    const char *shift_name = shift_names[(operand >> 7) & 0x3];
-    int amount = operand & 0x3f;
-    snprintf(buf, buf_size, ", %s #%d", shift_name, amount);
-  } else {
-    const char *extend_name = extend_names[(operand >> 3) & 0x7];
-    int amount = operand & 0x7;
-    if (amount == 0) {
-      snprintf(buf, buf_size, ", %s", extend_name);
-    } else {
-      snprintf(buf, buf_size, ", %s #%d", extend_name, amount);
-    }
-  }
-}
-
-static uint64_t bit_mask(unsigned width) {
-  DCHECK_LE(width, 64U);
-  return (width == 64) ? static_cast<uint64_t>(-1) : ((UINT64_C(1) << (width)) - UINT64_C(1));
-}
-
-static uint64_t RotateRight(uint64_t value, unsigned rotate, unsigned width) {
-  DCHECK_LE(width, 64U);
-  rotate &= 63;
-  value = value & bit_mask(width);
-  return ((value & bit_mask(rotate)) << (width - rotate)) | (value >> rotate);
-}
-
-static uint64_t RepeatBitsAcrossReg(bool is_wide, uint64_t value, unsigned width) {
-  unsigned i;
-  unsigned reg_size = (is_wide) ? 64 : 32;
-  uint64_t result = value & bit_mask(width);
-  for (i = width; i < reg_size; i *= 2) {
-    result |= (result << i);
-  }
-  DCHECK_EQ(i, reg_size);
-  return result;
-}
-
-/**
- * @brief Decode an immediate in the form required by logical instructions.
- *
- * @param is_wide Whether @p value encodes a 64-bit (as opposed to 32-bit) immediate.
- * @param value The encoded logical immediates that is to be decoded.
- * @return The decoded logical immediate.
- * @note This is the inverse of Arm64Mir2Lir::EncodeLogicalImmediate().
- */
-uint64_t Arm64Mir2Lir::DecodeLogicalImmediate(bool is_wide, int value) {
-  unsigned n     = (value >> 12) & 0x01;
-  unsigned imm_r = (value >>  6) & 0x3f;
-  unsigned imm_s = (value >>  0) & 0x3f;
-
-  // An integer is constructed from the n, imm_s and imm_r bits according to
-  // the following table:
-  //
-  // N   imms immr  size S             R
-  // 1 ssssss rrrrrr 64  UInt(ssssss) UInt(rrrrrr)
-  // 0 0sssss xrrrrr 32  UInt(sssss)  UInt(rrrrr)
-  // 0 10ssss xxrrrr 16  UInt(ssss)   UInt(rrrr)
-  // 0 110sss xxxrrr 8   UInt(sss)    UInt(rrr)
-  // 0 1110ss xxxxrr 4   UInt(ss)     UInt(rr)
-  // 0 11110s xxxxxr 2   UInt(s)      UInt(r)
-  // (s bits must not be all set)
-  //
-  // A pattern is constructed of size bits, where the least significant S+1
-  // bits are set. The pattern is rotated right by R, and repeated across a
-  // 32 or 64-bit value, depending on destination register width.
-
-  if (n == 1) {
-    DCHECK_NE(imm_s, 0x3fU);
-    uint64_t bits = bit_mask(imm_s + 1);
-    return RotateRight(bits, imm_r, 64);
-  } else {
-    DCHECK_NE((imm_s >> 1), 0x1fU);
-    for (unsigned width = 0x20; width >= 0x2; width >>= 1) {
-      if ((imm_s & width) == 0) {
-        unsigned mask = (unsigned)(width - 1);
-        DCHECK_NE((imm_s & mask), mask);
-        uint64_t bits = bit_mask((imm_s & mask) + 1);
-        return RepeatBitsAcrossReg(is_wide, RotateRight(bits, imm_r & mask, width), width);
-      }
-    }
-  }
-  return 0;
-}
-
-/**
- * @brief Decode an 8-bit single point number encoded with EncodeImmSingle().
- */
-static float DecodeImmSingle(uint8_t small_float) {
-  int mantissa = (small_float & 0x0f) + 0x10;
-  int sign = ((small_float & 0x80) == 0) ? 1 : -1;
-  float signed_mantissa = static_cast<float>(sign*mantissa);
-  int exponent = (((small_float >> 4) & 0x7) + 4) & 0x7;
-  return signed_mantissa*static_cast<float>(1 << exponent)*0.0078125f;
-}
-
-static const char* cc_names[] = {"eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
-                                 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"};
-/*
- * Interpret a format string and build a string no longer than size
- * See format key in assemble_arm64.cc.
- */
-std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) {
-  std::string buf;
-  const char* fmt_end = &fmt[strlen(fmt)];
-  char tbuf[256];
-  const char* name;
-  char nc;
-  while (fmt < fmt_end) {
-    int operand;
-    if (*fmt == '!') {
-      fmt++;
-      DCHECK_LT(fmt, fmt_end);
-      nc = *fmt++;
-      if (nc == '!') {
-        strcpy(tbuf, "!");
-      } else {
-         DCHECK_LT(fmt, fmt_end);
-         DCHECK_LT(static_cast<unsigned>(nc-'0'), 4U);
-         operand = lir->operands[nc-'0'];
-         switch (*fmt++) {
-           case 'e':  {
-               // Omit ", uxtw #0" in strings like "add w0, w1, w3, uxtw #0" and
-               // ", uxtx #0" in strings like "add x0, x1, x3, uxtx #0"
-               int omittable = ((IS_WIDE(lir->opcode)) ? EncodeExtend(kA64Uxtw, 0) :
-                                EncodeExtend(kA64Uxtw, 0));
-               if (LIKELY(operand == omittable)) {
-                 strcpy(tbuf, "");
-               } else {
-                 DecodeRegExtendOrShift(operand, tbuf, arraysize(tbuf));
-               }
-             }
-             break;
-           case 'o':
-             // Omit ", lsl #0"
-             if (LIKELY(operand == EncodeShift(kA64Lsl, 0))) {
-               strcpy(tbuf, "");
-             } else {
-               DecodeRegExtendOrShift(operand, tbuf, arraysize(tbuf));
-             }
-             break;
-           case 'B':
-             switch (operand) {
-               case kSY:
-                 name = "sy";
-                 break;
-               case kST:
-                 name = "st";
-                 break;
-               case kISH:
-                 name = "ish";
-                 break;
-               case kISHST:
-                 name = "ishst";
-                 break;
-               case kNSH:
-                 name = "nsh";
-                 break;
-               case kNSHST:
-                 name = "shst";
-                 break;
-               default:
-                 name = "DecodeError2";
-                 break;
-             }
-             strcpy(tbuf, name);
-             break;
-           case 's':
-             snprintf(tbuf, arraysize(tbuf), "s%d", operand & RegStorage::kRegNumMask);
-             break;
-           case 'S':
-             snprintf(tbuf, arraysize(tbuf), "d%d", operand & RegStorage::kRegNumMask);
-             break;
-           case 'f':
-             snprintf(tbuf, arraysize(tbuf), "%c%d", (IS_WIDE(lir->opcode)) ? 'd' : 's',
-                      operand & RegStorage::kRegNumMask);
-             break;
-           case 'l': {
-               bool is_wide = IS_WIDE(lir->opcode);
-               uint64_t imm = DecodeLogicalImmediate(is_wide, operand);
-               snprintf(tbuf, arraysize(tbuf), "%" PRId64 " (%#" PRIx64 ")", imm, imm);
-             }
-             break;
-           case 'I':
-             snprintf(tbuf, arraysize(tbuf), "%f", DecodeImmSingle(operand));
-             break;
-           case 'M':
-             if (LIKELY(operand == 0))
-               strcpy(tbuf, "");
-             else
-               snprintf(tbuf, arraysize(tbuf), ", lsl #%d", 16*operand);
-             break;
-           case 'd':
-             snprintf(tbuf, arraysize(tbuf), "%d", operand);
-             break;
-           case 'w':
-             if (LIKELY(operand != rwzr))
-               snprintf(tbuf, arraysize(tbuf), "w%d", operand & RegStorage::kRegNumMask);
-             else
-               strcpy(tbuf, "wzr");
-             break;
-           case 'W':
-             if (LIKELY(operand != rwsp))
-               snprintf(tbuf, arraysize(tbuf), "w%d", operand & RegStorage::kRegNumMask);
-             else
-               strcpy(tbuf, "wsp");
-             break;
-           case 'x':
-             if (LIKELY(operand != rxzr))
-               snprintf(tbuf, arraysize(tbuf), "x%d", operand & RegStorage::kRegNumMask);
-             else
-               strcpy(tbuf, "xzr");
-             break;
-           case 'X':
-             if (LIKELY(operand != rsp))
-               snprintf(tbuf, arraysize(tbuf), "x%d", operand & RegStorage::kRegNumMask);
-             else
-               strcpy(tbuf, "sp");
-             break;
-           case 'D':
-             snprintf(tbuf, arraysize(tbuf), "%d", operand*((IS_WIDE(lir->opcode)) ? 8 : 4));
-             break;
-           case 'E':
-             snprintf(tbuf, arraysize(tbuf), "%d", operand*4);
-             break;
-           case 'F':
-             snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
-             break;
-           case 'G':
-             if (LIKELY(operand == 0))
-               strcpy(tbuf, "");
-             else
-               strcpy(tbuf, (IS_WIDE(lir->opcode)) ? ", lsl #3" : ", lsl #2");
-             break;
-           case 'c':
-             strcpy(tbuf, cc_names[operand]);
-             break;
-           case 't':
-             snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
-                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + (operand << 2),
-                 lir->target);
-             break;
-           case 'r': {
-               bool is_wide = IS_WIDE(lir->opcode);
-               if (LIKELY(operand != rwzr && operand != rxzr)) {
-                 snprintf(tbuf, arraysize(tbuf), "%c%d", (is_wide) ? 'x' : 'w',
-                          operand & RegStorage::kRegNumMask);
-               } else {
-                 strcpy(tbuf, (is_wide) ? "xzr" : "wzr");
-               }
-             }
-             break;
-           case 'R': {
-               bool is_wide = IS_WIDE(lir->opcode);
-               if (LIKELY(operand != rwsp && operand != rsp)) {
-                 snprintf(tbuf, arraysize(tbuf), "%c%d", (is_wide) ? 'x' : 'w',
-                          operand & RegStorage::kRegNumMask);
-               } else {
-                 strcpy(tbuf, (is_wide) ? "sp" : "wsp");
-               }
-             }
-             break;
-           case 'p':
-             snprintf(tbuf, arraysize(tbuf), ".+%d (addr %#" PRIxPTR ")", 4*operand,
-                      reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4*operand);
-             break;
-           case 'T':
-             if (LIKELY(operand == 0))
-               strcpy(tbuf, "");
-             else if (operand == 1)
-               strcpy(tbuf, ", lsl #12");
-             else
-               strcpy(tbuf, ", DecodeError3");
-             break;
-           case 'h':
-             snprintf(tbuf, arraysize(tbuf), "%d", operand);
-             break;
-           default:
-             strcpy(tbuf, "DecodeError1");
-             break;
-        }
-        buf += tbuf;
-      }
-    } else {
-       buf += *fmt++;
-    }
-  }
-  // Dump thread offset.
-  std::string fmt_str = GetTargetInstFmt(lir->opcode);
-  if (std::string::npos != fmt_str.find(", [!1X, #!2") && rxSELF == lir->operands[1] &&
-      std::string::npos != buf.find(", [")) {
-    int offset = lir->operands[2];
-    if (std::string::npos != fmt_str.find("#!2d")) {
-    } else if (std::string::npos != fmt_str.find("#!2D")) {
-      offset *= (IS_WIDE(lir->opcode)) ? 8 : 4;
-    } else if (std::string::npos != fmt_str.find("#!2F")) {
-      offset *= 2;
-    } else {
-      LOG(FATAL) << "Should not reach here";
-    }
-    std::ostringstream tmp_stream;
-    Thread::DumpThreadOffset<8>(tmp_stream, offset);
-    buf += "  ; ";
-    buf += tmp_stream.str();
-  }
-  return buf;
-}
-
-void Arm64Mir2Lir::DumpResourceMask(LIR* arm_lir, const ResourceMask& mask, const char* prefix) {
-  char buf[256];
-  buf[0] = 0;
-
-  if (mask.Equals(kEncodeAll)) {
-    strcpy(buf, "all");
-  } else {
-    char num[8];
-    int i;
-
-    for (i = 0; i < kA64RegEnd; i++) {
-      if (mask.HasBit(i)) {
-        snprintf(num, arraysize(num), "%d ", i);
-        strcat(buf, num);
-      }
-    }
-
-    if (mask.HasBit(ResourceMask::kCCode)) {
-      strcat(buf, "cc ");
-    }
-    if (mask.HasBit(ResourceMask::kFPStatus)) {
-      strcat(buf, "fpcc ");
-    }
-
-    /* Memory bits */
-    if (arm_lir && (mask.HasBit(ResourceMask::kDalvikReg))) {
-      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
-               DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info),
-               DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : "");
-    }
-    if (mask.HasBit(ResourceMask::kLiteral)) {
-      strcat(buf, "lit ");
-    }
-
-    if (mask.HasBit(ResourceMask::kHeapRef)) {
-      strcat(buf, "heap ");
-    }
-    if (mask.HasBit(ResourceMask::kMustNotAlias)) {
-      strcat(buf, "noalias ");
-    }
-  }
-  if (buf[0]) {
-    LOG(INFO) << prefix << ": " << buf;
-  }
-}
-
-bool Arm64Mir2Lir::IsUnconditionalBranch(LIR* lir) {
-  return (lir->opcode == kA64B1t);
-}
-
-RegisterClass Arm64Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
-  if (UNLIKELY(is_volatile)) {
-    // On arm64, fp register load/store is atomic only for single bytes.
-    if (size != kSignedByte && size != kUnsignedByte) {
-      return (size == kReference) ? kRefReg : kCoreReg;
-    }
-  }
-  return RegClassBySize(size);
-}
-
-Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
-    : Mir2Lir(cu, mir_graph, arena),
-      call_method_insns_(arena->Adapter()),
-      dex_cache_access_insns_(arena->Adapter()) {
-  // Sanity check - make sure encoding map lines up.
-  for (int i = 0; i < kA64Last; i++) {
-    DCHECK_EQ(UNWIDE(Arm64Mir2Lir::EncodingMap[i].opcode), i)
-        << "Encoding order for " << Arm64Mir2Lir::EncodingMap[i].name
-        << " is wrong: expecting " << i << ", seeing "
-        << static_cast<int>(Arm64Mir2Lir::EncodingMap[i].opcode);
-  }
-}
-
-Mir2Lir* Arm64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
-                            ArenaAllocator* const arena) {
-  return new Arm64Mir2Lir(cu, mir_graph, arena);
-}
-
-void Arm64Mir2Lir::CompilerInitializeRegAlloc() {
-  reg_pool_.reset(new (arena_) RegisterPool(this, arena_, core_regs, core64_regs, sp_regs, dp_regs,
-                                            reserved_regs, reserved64_regs,
-                                            core_temps, core64_temps, sp_temps, dp_temps));
-
-  // Target-specific adjustments.
-  // Alias single precision float registers to corresponding double registers.
-  for (RegisterInfo* info : reg_pool_->sp_regs_) {
-    int fp_reg_num = info->GetReg().GetRegNum();
-    RegStorage dp_reg = RegStorage::FloatSolo64(fp_reg_num);
-    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
-    // Double precision register's master storage should refer to itself.
-    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
-    // Redirect single precision's master storage to master.
-    info->SetMaster(dp_reg_info);
-    // Singles should show a single 32-bit mask bit, at first referring to the low half.
-    DCHECK_EQ(info->StorageMask(), 0x1U);
-  }
-
-  // Alias 32bit W registers to corresponding 64bit X registers.
-  for (RegisterInfo* info : reg_pool_->core_regs_) {
-    int x_reg_num = info->GetReg().GetRegNum();
-    RegStorage x_reg = RegStorage::Solo64(x_reg_num);
-    RegisterInfo* x_reg_info = GetRegInfo(x_reg);
-    // 64bit X register's master storage should refer to itself.
-    DCHECK_EQ(x_reg_info, x_reg_info->Master());
-    // Redirect 32bit W master storage to 64bit X.
-    info->SetMaster(x_reg_info);
-    // 32bit W should show a single 32-bit mask bit, at first referring to the low half.
-    DCHECK_EQ(info->StorageMask(), 0x1U);
-  }
-
-  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
-  // TODO: adjust when we roll to hard float calling convention.
-  reg_pool_->next_core_reg_ = 2;
-  reg_pool_->next_sp_reg_ = 0;
-  reg_pool_->next_dp_reg_ = 0;
-}
-
-/*
- * TUNING: is true leaf?  Can't just use METHOD_IS_LEAF to determine as some
- * instructions might call out to C/assembly helper functions.  Until
- * machinery is in place, always spill lr.
- */
-
-void Arm64Mir2Lir::AdjustSpillMask() {
-  core_spill_mask_ |= (1 << rs_xLR.GetRegNum());
-  num_core_spills_++;
-}
-
-/* Clobber all regs that might be used by an external C call */
-void Arm64Mir2Lir::ClobberCallerSave() {
-  Clobber(rs_x0);
-  Clobber(rs_x1);
-  Clobber(rs_x2);
-  Clobber(rs_x3);
-  Clobber(rs_x4);
-  Clobber(rs_x5);
-  Clobber(rs_x6);
-  Clobber(rs_x7);
-  Clobber(rs_x8);
-  Clobber(rs_x9);
-  Clobber(rs_x10);
-  Clobber(rs_x11);
-  Clobber(rs_x12);
-  Clobber(rs_x13);
-  Clobber(rs_x14);
-  Clobber(rs_x15);
-  Clobber(rs_x16);
-  Clobber(rs_x17);
-  Clobber(rs_x18);
-  Clobber(rs_x30);
-
-  Clobber(rs_f0);
-  Clobber(rs_f1);
-  Clobber(rs_f2);
-  Clobber(rs_f3);
-  Clobber(rs_f4);
-  Clobber(rs_f5);
-  Clobber(rs_f6);
-  Clobber(rs_f7);
-  Clobber(rs_f16);
-  Clobber(rs_f17);
-  Clobber(rs_f18);
-  Clobber(rs_f19);
-  Clobber(rs_f20);
-  Clobber(rs_f21);
-  Clobber(rs_f22);
-  Clobber(rs_f23);
-  Clobber(rs_f24);
-  Clobber(rs_f25);
-  Clobber(rs_f26);
-  Clobber(rs_f27);
-  Clobber(rs_f28);
-  Clobber(rs_f29);
-  Clobber(rs_f30);
-  Clobber(rs_f31);
-}
-
-RegLocation Arm64Mir2Lir::GetReturnWideAlt() {
-  RegLocation res = LocCReturnWide();
-  res.reg.SetReg(rx2);
-  res.reg.SetHighReg(rx3);
-  Clobber(rs_x2);
-  Clobber(rs_x3);
-  MarkInUse(rs_x2);
-  MarkInUse(rs_x3);
-  MarkWide(res.reg);
-  return res;
-}
-
-RegLocation Arm64Mir2Lir::GetReturnAlt() {
-  RegLocation res = LocCReturn();
-  res.reg.SetReg(rx1);
-  Clobber(rs_x1);
-  MarkInUse(rs_x1);
-  return res;
-}
-
-/* To be used when explicitly managing register use */
-void Arm64Mir2Lir::LockCallTemps() {
-  // TODO: needs cleanup.
-  LockTemp(rs_x0);
-  LockTemp(rs_x1);
-  LockTemp(rs_x2);
-  LockTemp(rs_x3);
-  LockTemp(rs_x4);
-  LockTemp(rs_x5);
-  LockTemp(rs_x6);
-  LockTemp(rs_x7);
-  LockTemp(rs_f0);
-  LockTemp(rs_f1);
-  LockTemp(rs_f2);
-  LockTemp(rs_f3);
-  LockTemp(rs_f4);
-  LockTemp(rs_f5);
-  LockTemp(rs_f6);
-  LockTemp(rs_f7);
-}
-
-/* To be used when explicitly managing register use */
-void Arm64Mir2Lir::FreeCallTemps() {
-  // TODO: needs cleanup.
-  FreeTemp(rs_x0);
-  FreeTemp(rs_x1);
-  FreeTemp(rs_x2);
-  FreeTemp(rs_x3);
-  FreeTemp(rs_x4);
-  FreeTemp(rs_x5);
-  FreeTemp(rs_x6);
-  FreeTemp(rs_x7);
-  FreeTemp(rs_f0);
-  FreeTemp(rs_f1);
-  FreeTemp(rs_f2);
-  FreeTemp(rs_f3);
-  FreeTemp(rs_f4);
-  FreeTemp(rs_f5);
-  FreeTemp(rs_f6);
-  FreeTemp(rs_f7);
-  FreeTemp(TargetReg(kHiddenArg));
-}
-
-RegStorage Arm64Mir2Lir::LoadHelper(QuickEntrypointEnum trampoline) {
-  // TODO(Arm64): use LoadWordDisp instead.
-  //   e.g. LoadWordDisp(rs_rA64_SELF, offset.Int32Value(), rs_rA64_LR);
-  LoadBaseDisp(rs_xSELF, GetThreadOffset<8>(trampoline).Int32Value(), rs_xLR, k64, kNotVolatile);
-  return rs_xLR;
-}
-
-LIR* Arm64Mir2Lir::CheckSuspendUsingLoad() {
-  RegStorage tmp = rs_x0;
-  LoadWordDisp(rs_xSELF, Thread::ThreadSuspendTriggerOffset<8>().Int32Value(), tmp);
-  LIR* load2 = LoadWordDisp(tmp, 0, tmp);
-  return load2;
-}
-
-uint64_t Arm64Mir2Lir::GetTargetInstFlags(int opcode) {
-  DCHECK(!IsPseudoLirOp(opcode));
-  return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].flags;
-}
-
-const char* Arm64Mir2Lir::GetTargetInstName(int opcode) {
-  DCHECK(!IsPseudoLirOp(opcode));
-  return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].name;
-}
-
-const char* Arm64Mir2Lir::GetTargetInstFmt(int opcode) {
-  DCHECK(!IsPseudoLirOp(opcode));
-  return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].fmt;
-}
-
-RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(ShortyArg arg) {
-  const RegStorage coreArgMappingToPhysicalReg[] =
-      {rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7};
-  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
-  const RegStorage fpArgMappingToPhysicalReg[] =
-      {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7};
-  const size_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
-
-  RegStorage result = RegStorage::InvalidReg();
-  if (arg.IsFP()) {
-    if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
-      DCHECK(!arg.IsRef());
-      result = fpArgMappingToPhysicalReg[cur_fp_reg_++];
-      if (result.Valid()) {
-        // TODO: switching between widths remains a bit ugly.  Better way?
-        int res_reg = result.GetReg();
-        result = arg.IsWide() ? RegStorage::FloatSolo64(res_reg) : RegStorage::FloatSolo32(res_reg);
-      }
-    }
-  } else {
-    if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-      result = coreArgMappingToPhysicalReg[cur_core_reg_++];
-      if (result.Valid()) {
-        // TODO: switching between widths remains a bit ugly.  Better way?
-        int res_reg = result.GetReg();
-        DCHECK(!(arg.IsWide() && arg.IsRef()));
-        result = (arg.IsWide() || arg.IsRef()) ?
-                 RegStorage::Solo64(res_reg) : RegStorage::Solo32(res_reg);
-      }
-    }
-  }
-  return result;
-}
-
-void Arm64Mir2Lir::InstallLiteralPools() {
-  patches_.reserve(call_method_insns_.size() + dex_cache_access_insns_.size());
-
-  // PC-relative calls to methods.
-  for (LIR* p : call_method_insns_) {
-      DCHECK_EQ(p->opcode, kA64Bl1t);
-      uint32_t target_method_idx = p->operands[1];
-      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
-      patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
-                                                        target_dex_file, target_method_idx));
-  }
-
-  // PC-relative references to dex cache arrays.
-  for (LIR* p : dex_cache_access_insns_) {
-    auto non_wide = UNWIDE(p->opcode);  // May be a wide load for ArtMethod*.
-    DCHECK(non_wide == kA64Adrp2xd || non_wide == kA64Ldr3rXD) << p->opcode << " " << non_wide;
-    const LIR* adrp = UnwrapPointer<LIR>(p->operands[4]);
-    DCHECK_EQ(adrp->opcode, kA64Adrp2xd);
-    const DexFile* dex_file = UnwrapPointer<DexFile>(adrp->operands[2]);
-    uint32_t offset = adrp->operands[3];
-    DCHECK(!p->flags.is_nop);
-    DCHECK(!adrp->flags.is_nop);
-    patches_.push_back(LinkerPatch::DexCacheArrayPatch(p->offset, dex_file, adrp->offset, offset));
-  }
-
-  // And do the normal processing.
-  Mir2Lir::InstallLiteralPools();
-}
-
-int Arm64Mir2Lir::GenDalvikArgsBulkCopy(CallInfo* /*info*/, int /*first*/, int count) {
-  /*
-   * TODO: Improve by adding block copy for large number of arguments.  For now, just
-   * copy a Dalvik vreg at a time.
-   */
-  return count;
-}
-
-void Arm64Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb ATTRIBUTE_UNUSED, MIR* mir) {
-  DCHECK(MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode));
-  RegLocation rl_src[3];
-  RegLocation rl_dest = mir_graph_->GetBadLoc();
-  rl_src[0] = rl_src[1] = rl_src[2] = mir_graph_->GetBadLoc();
-  ExtendedMIROpcode opcode = static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode);
-  switch (opcode) {
-    case kMirOpMaddInt:
-    case kMirOpMsubInt:
-      rl_dest = mir_graph_->GetDest(mir);
-      rl_src[0] = mir_graph_->GetSrc(mir, 0);
-      rl_src[1] = mir_graph_->GetSrc(mir, 1);
-      rl_src[2]= mir_graph_->GetSrc(mir, 2);
-      GenMaddMsubInt(rl_dest, rl_src[0], rl_src[1], rl_src[2], opcode == kMirOpMsubInt);
-      break;
-    case kMirOpMaddLong:
-    case kMirOpMsubLong:
-      rl_dest = mir_graph_->GetDestWide(mir);
-      rl_src[0] = mir_graph_->GetSrcWide(mir, 0);
-      rl_src[1] = mir_graph_->GetSrcWide(mir, 2);
-      rl_src[2] = mir_graph_->GetSrcWide(mir, 4);
-      GenMaddMsubLong(rl_dest, rl_src[0], rl_src[1], rl_src[2], opcode == kMirOpMsubLong);
-      break;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << static_cast<int>(opcode);
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
deleted file mode 100644
index 58769ea..0000000
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ /dev/null
@@ -1,1407 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "codegen_arm64.h"
-
-#include "arm64_lir.h"
-#include "base/logging.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "dex/reg_storage_eq.h"
-
-namespace art {
-
-/* This file contains codegen for the A64 ISA. */
-
-int32_t Arm64Mir2Lir::EncodeImmSingle(uint32_t bits) {
-  /*
-   * Valid values will have the form:
-   *
-   *   aBbb.bbbc.defg.h000.0000.0000.0000.0000
-   *
-   * where B = not(b). In other words, if b == 1, then B == 0 and viceversa.
-   */
-
-  // bits[19..0] are cleared.
-  if ((bits & 0x0007ffff) != 0)
-    return -1;
-
-  // bits[29..25] are all set or all cleared.
-  uint32_t b_pattern = (bits >> 16) & 0x3e00;
-  if (b_pattern != 0 && b_pattern != 0x3e00)
-    return -1;
-
-  // bit[30] and bit[29] are opposite.
-  if (((bits ^ (bits << 1)) & 0x40000000) == 0)
-    return -1;
-
-  // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
-  // bit7: a000.0000
-  uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
-  // bit6: 0b00.0000
-  uint32_t bit6 = ((bits >> 29) & 0x1) << 6;
-  // bit5_to_0: 00cd.efgh
-  uint32_t bit5_to_0 = (bits >> 19) & 0x3f;
-  return (bit7 | bit6 | bit5_to_0);
-}
-
-int32_t Arm64Mir2Lir::EncodeImmDouble(uint64_t bits) {
-  /*
-   * Valid values will have the form:
-   *
-   *   aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
-   *   0000.0000.0000.0000.0000.0000.0000.0000
-   *
-   * where B = not(b).
-   */
-
-  // bits[47..0] are cleared.
-  if ((bits & UINT64_C(0xffffffffffff)) != 0)
-    return -1;
-
-  // bits[61..54] are all set or all cleared.
-  uint32_t b_pattern = (bits >> 48) & 0x3fc0;
-  if (b_pattern != 0 && b_pattern != 0x3fc0)
-    return -1;
-
-  // bit[62] and bit[61] are opposite.
-  if (((bits ^ (bits << 1)) & UINT64_C(0x4000000000000000)) == 0)
-    return -1;
-
-  // bit7: a000.0000
-  uint32_t bit7 = ((bits >> 63) & 0x1) << 7;
-  // bit6: 0b00.0000
-  uint32_t bit6 = ((bits >> 61) & 0x1) << 6;
-  // bit5_to_0: 00cd.efgh
-  uint32_t bit5_to_0 = (bits >> 48) & 0x3f;
-  return (bit7 | bit6 | bit5_to_0);
-}
-
-size_t Arm64Mir2Lir::GetLoadStoreSize(LIR* lir) {
-  bool opcode_is_wide = IS_WIDE(lir->opcode);
-  A64Opcode opcode = UNWIDE(lir->opcode);
-  DCHECK(!IsPseudoLirOp(opcode));
-  const A64EncodingMap *encoder = &EncodingMap[opcode];
-  uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton;
-  return (bits >> 30);
-}
-
-size_t Arm64Mir2Lir::GetInstructionOffset(LIR* lir) {
-  size_t offset = lir->operands[2];
-  uint64_t check_flags = GetTargetInstFlags(lir->opcode);
-  DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE));
-  if (check_flags & SCALED_OFFSET_X0) {
-    DCHECK(check_flags & IS_TERTIARY_OP);
-    offset = offset * (1 << GetLoadStoreSize(lir));
-  }
-  return offset;
-}
-
-LIR* Arm64Mir2Lir::LoadFPConstantValue(RegStorage r_dest, int32_t value) {
-  DCHECK(r_dest.IsSingle());
-  if (value == 0) {
-    return NewLIR2(kA64Fmov2sw, r_dest.GetReg(), rwzr);
-  } else {
-    int32_t encoded_imm = EncodeImmSingle((uint32_t)value);
-    if (encoded_imm >= 0) {
-      return NewLIR2(kA64Fmov2fI, r_dest.GetReg(), encoded_imm);
-    }
-  }
-
-  LIR* data_target = ScanLiteralPool(literal_list_, value, 0);
-  if (data_target == nullptr) {
-    // Wide, as we need 8B alignment.
-    data_target = AddWideData(&literal_list_, value, 0);
-  }
-
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp,
-                            r_dest.GetReg(), 0, 0, 0, 0, data_target);
-  AppendLIR(load_pc_rel);
-  return load_pc_rel;
-}
-
-LIR* Arm64Mir2Lir::LoadFPConstantValueWide(RegStorage r_dest, int64_t value) {
-  DCHECK(r_dest.IsDouble());
-  if (value == 0) {
-    return NewLIR2(kA64Fmov2Sx, r_dest.GetReg(), rxzr);
-  } else {
-    int32_t encoded_imm = EncodeImmDouble(value);
-    if (encoded_imm >= 0) {
-      return NewLIR2(WIDE(kA64Fmov2fI), r_dest.GetReg(), encoded_imm);
-    }
-  }
-
-  // No short form - load from the literal pool.
-  int32_t val_lo = Low32Bits(value);
-  int32_t val_hi = High32Bits(value);
-  LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
-  if (data_target == nullptr) {
-    data_target = AddWideData(&literal_list_, val_lo, val_hi);
-  }
-
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2fp),
-                            r_dest.GetReg(), 0, 0, 0, 0, data_target);
-  AppendLIR(load_pc_rel);
-  return load_pc_rel;
-}
-
-static int CountLeadingZeros(bool is_wide, uint64_t value) {
-  return (is_wide) ? __builtin_clzll(value) : __builtin_clz((uint32_t)value);
-}
-
-static int CountTrailingZeros(bool is_wide, uint64_t value) {
-  return (is_wide) ? __builtin_ctzll(value) : __builtin_ctz((uint32_t)value);
-}
-
-static int CountSetBits(bool is_wide, uint64_t value) {
-  return ((is_wide) ?
-          __builtin_popcountll(value) : __builtin_popcount((uint32_t)value));
-}
-
-/**
- * @brief Try encoding an immediate in the form required by logical instructions.
- *
- * @param is_wide Whether @p value is a 64-bit (as opposed to 32-bit) value.
- * @param value An integer to be encoded. This is interpreted as 64-bit if @p is_wide is true and as
- *   32-bit if @p is_wide is false.
- * @return A non-negative integer containing the encoded immediate or -1 if the encoding failed.
- * @note This is the inverse of Arm64Mir2Lir::DecodeLogicalImmediate().
- */
-int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) {
-  unsigned n, imm_s, imm_r;
-
-  // Logical immediates are encoded using parameters n, imm_s and imm_r using
-  // the following table:
-  //
-  //  N   imms    immr    size        S             R
-  //  1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
-  //  0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
-  //  0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
-  //  0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
-  //  0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
-  //  0  11110s  xxxxxr     2    UInt(s)       UInt(r)
-  // (s bits must not be all set)
-  //
-  // A pattern is constructed of size bits, where the least significant S+1
-  // bits are set. The pattern is rotated right by R, and repeated across a
-  // 32 or 64-bit value, depending on destination register width.
-  //
-  // To test if an arbitary immediate can be encoded using this scheme, an
-  // iterative algorithm is used.
-  //
-
-  // 1. If the value has all set or all clear bits, it can't be encoded.
-  if (value == 0 || value == ~UINT64_C(0) ||
-      (!is_wide && (uint32_t)value == ~UINT32_C(0))) {
-    return -1;
-  }
-
-  unsigned lead_zero  = CountLeadingZeros(is_wide, value);
-  unsigned lead_one   = CountLeadingZeros(is_wide, ~value);
-  unsigned trail_zero = CountTrailingZeros(is_wide, value);
-  unsigned trail_one  = CountTrailingZeros(is_wide, ~value);
-  unsigned set_bits   = CountSetBits(is_wide, value);
-
-  // The fixed bits in the immediate s field.
-  // If width == 64 (X reg), start at 0xFFFFFF80.
-  // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
-  // widths won't be executed.
-  unsigned width = (is_wide) ? 64 : 32;
-  int imm_s_fixed = (is_wide) ? -128 : -64;
-  int imm_s_mask = 0x3f;
-
-  for (;;) {
-    // 2. If the value is two bits wide, it can be encoded.
-    if (width == 2) {
-      n = 0;
-      imm_s = 0x3C;
-      imm_r = (value & 3) - 1;
-      break;
-    }
-
-    n = (width == 64) ? 1 : 0;
-    imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
-    if ((lead_zero + set_bits) == width) {
-      imm_r = 0;
-    } else {
-      imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
-    }
-
-    // 3. If the sum of leading zeros, trailing zeros and set bits is
-    //    equal to the bit width of the value, it can be encoded.
-    if (lead_zero + trail_zero + set_bits == width) {
-      break;
-    }
-
-    // 4. If the sum of leading ones, trailing ones and unset bits in the
-    //    value is equal to the bit width of the value, it can be encoded.
-    if (lead_one + trail_one + (width - set_bits) == width) {
-      break;
-    }
-
-    // 5. If the most-significant half of the bitwise value is equal to
-    //    the least-significant half, return to step 2 using the
-    //    least-significant half of the value.
-    uint64_t mask = (UINT64_C(1) << (width >> 1)) - 1;
-    if ((value & mask) == ((value >> (width >> 1)) & mask)) {
-      width >>= 1;
-      set_bits >>= 1;
-      imm_s_fixed >>= 1;
-      continue;
-    }
-
-    // 6. Otherwise, the value can't be encoded.
-    return -1;
-  }
-
-  return (n << 12 | imm_r << 6 | imm_s);
-}
-
-// Maximum number of instructions to use for encoding the immediate.
-static const int max_num_ops_per_const_load = 2;
-
-/**
- * @brief Return the number of fast halfwords in the given uint64_t integer.
- * @details The input integer is split into 4 halfwords (bits 0-15, 16-31, 32-47, 48-63). The
- *   number of fast halfwords (halfwords that are either 0 or 0xffff) is returned. See below for
- *   a more accurate description.
- * @param value The input 64-bit integer.
- * @return Return @c retval such that (retval & 0x7) is the maximum between n and m, where n is
- *   the number of halfwords with all bits unset (0) and m is the number of halfwords with all bits
- *   set (0xffff). Additionally (retval & 0x8) is set when m > n.
- */
-static int GetNumFastHalfWords(uint64_t value) {
-  unsigned int num_0000_halfwords = 0;
-  unsigned int num_ffff_halfwords = 0;
-  for (int shift = 0; shift < 64; shift += 16) {
-    uint16_t halfword = static_cast<uint16_t>(value >> shift);
-    if (halfword == 0)
-      num_0000_halfwords++;
-    else if (halfword == UINT16_C(0xffff))
-      num_ffff_halfwords++;
-  }
-  if (num_0000_halfwords >= num_ffff_halfwords) {
-    DCHECK_LE(num_0000_halfwords, 4U);
-    return num_0000_halfwords;
-  } else {
-    DCHECK_LE(num_ffff_halfwords, 4U);
-    return num_ffff_halfwords | 0x8;
-  }
-}
-
-// The InexpensiveConstantXXX variants below are used in the promotion algorithm to determine how a
-// constant is considered for promotion. If the constant is "inexpensive" then the promotion
-// algorithm will give it a low priority for promotion, even when it is referenced many times in
-// the code.
-
-bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value ATTRIBUTE_UNUSED) {
-  // A 32-bit int can always be loaded with 2 instructions (and without using the literal pool).
-  // We therefore return true and give it a low priority for promotion.
-  return true;
-}
-
-bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) {
-  return EncodeImmSingle(value) >= 0;
-}
-
-bool Arm64Mir2Lir::InexpensiveConstantLong(int64_t value) {
-  int num_slow_halfwords = 4 - (GetNumFastHalfWords(value) & 0x7);
-  if (num_slow_halfwords <= max_num_ops_per_const_load) {
-    return true;
-  }
-  return (EncodeLogicalImmediate(/*is_wide=*/true, value) >= 0);
-}
-
-bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) {
-  return EncodeImmDouble(value) >= 0;
-}
-
-// The InexpensiveConstantXXX variants below are used to determine which A64 instructions to use
-// when one of the operands is an immediate (e.g. register version or immediate version of add).
-
-bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value, Instruction::Code opcode) {
-  switch (opcode) {
-  case Instruction::IF_EQ:
-  case Instruction::IF_NE:
-  case Instruction::IF_LT:
-  case Instruction::IF_GE:
-  case Instruction::IF_GT:
-  case Instruction::IF_LE:
-  case Instruction::ADD_INT:
-  case Instruction::ADD_INT_2ADDR:
-  case Instruction::SUB_INT:
-  case Instruction::SUB_INT_2ADDR:
-    // The code below is consistent with the implementation of OpRegRegImm().
-    {
-      uint32_t abs_value = (value == INT_MIN) ? value : std::abs(value);
-      if (abs_value < 0x1000) {
-        return true;
-      } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
-        return true;
-      }
-      return false;
-    }
-  case Instruction::SHL_INT:
-  case Instruction::SHL_INT_2ADDR:
-  case Instruction::SHR_INT:
-  case Instruction::SHR_INT_2ADDR:
-  case Instruction::USHR_INT:
-  case Instruction::USHR_INT_2ADDR:
-    return true;
-  case Instruction::AND_INT:
-  case Instruction::AND_INT_2ADDR:
-  case Instruction::AND_INT_LIT16:
-  case Instruction::AND_INT_LIT8:
-  case Instruction::OR_INT:
-  case Instruction::OR_INT_2ADDR:
-  case Instruction::OR_INT_LIT16:
-  case Instruction::OR_INT_LIT8:
-  case Instruction::XOR_INT:
-  case Instruction::XOR_INT_2ADDR:
-  case Instruction::XOR_INT_LIT16:
-  case Instruction::XOR_INT_LIT8:
-    if (value == 0 || value == INT32_C(-1)) {
-      return true;
-    }
-    return (EncodeLogicalImmediate(/*is_wide=*/false, value) >= 0);
-  default:
-    return false;
-  }
-}
-
-/*
- * Load a immediate using one single instruction when possible; otherwise
- * use a pair of movz and movk instructions.
- *
- * No additional register clobbering operation performed. Use this version when
- * 1) r_dest is freshly returned from AllocTemp or
- * 2) The codegen is under fixed register usage
- */
-LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
-  LIR* res;
-
-  if (r_dest.IsFloat()) {
-    return LoadFPConstantValue(r_dest, value);
-  }
-
-  if (r_dest.Is64Bit()) {
-    return LoadConstantWide(r_dest, value);
-  }
-
-  // Loading SP/ZR with an immediate is not supported.
-  DCHECK(!A64_REG_IS_SP(r_dest.GetReg()));
-  DCHECK(!A64_REG_IS_ZR(r_dest.GetReg()));
-
-  // Compute how many movk, movz instructions are needed to load the value.
-  uint16_t high_bits = High16Bits(value);
-  uint16_t low_bits = Low16Bits(value);
-
-  bool low_fast = ((uint16_t)(low_bits + 1) <= 1);
-  bool high_fast = ((uint16_t)(high_bits + 1) <= 1);
-
-  if (LIKELY(low_fast || high_fast)) {
-    // 1 instruction is enough to load the immediate.
-    if (LIKELY(low_bits == high_bits)) {
-      // Value is either 0 or -1: we can just use wzr.
-      A64Opcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr;
-      res = NewLIR2(opcode, r_dest.GetReg(), rwzr);
-    } else {
-      uint16_t uniform_bits, useful_bits;
-      int shift;
-
-      if (LIKELY(high_fast)) {
-        shift = 0;
-        uniform_bits = high_bits;
-        useful_bits = low_bits;
-      } else {
-        shift = 1;
-        uniform_bits = low_bits;
-        useful_bits = high_bits;
-      }
-
-      if (UNLIKELY(uniform_bits != 0)) {
-        res = NewLIR3(kA64Movn3rdM, r_dest.GetReg(), ~useful_bits, shift);
-      } else {
-        res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), useful_bits, shift);
-      }
-    }
-  } else {
-    // movk, movz require 2 instructions. Try detecting logical immediates.
-    int log_imm = EncodeLogicalImmediate(/*is_wide=*/false, value);
-    if (log_imm >= 0) {
-      res = NewLIR3(kA64Orr3Rrl, r_dest.GetReg(), rwzr, log_imm);
-    } else {
-      // Use 2 instructions.
-      res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), low_bits, 0);
-      NewLIR3(kA64Movk3rdM, r_dest.GetReg(), high_bits, 1);
-    }
-  }
-
-  return res;
-}
-
-// TODO: clean up the names. LoadConstantWide() should really be LoadConstantNoClobberWide().
-LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
-  if (r_dest.IsFloat()) {
-    return LoadFPConstantValueWide(r_dest, value);
-  }
-
-  DCHECK(r_dest.Is64Bit());
-
-  // Loading SP/ZR with an immediate is not supported.
-  DCHECK(!A64_REG_IS_SP(r_dest.GetReg()));
-  DCHECK(!A64_REG_IS_ZR(r_dest.GetReg()));
-
-  if (LIKELY(value == INT64_C(0) || value == INT64_C(-1))) {
-    // value is either 0 or -1: we can just use xzr.
-    A64Opcode opcode = LIKELY(value == 0) ? WIDE(kA64Mov2rr) : WIDE(kA64Mvn2rr);
-    return NewLIR2(opcode, r_dest.GetReg(), rxzr);
-  }
-
-  // At least one in value's halfwords is not 0x0, nor 0xffff: find out how many.
-  uint64_t uvalue = static_cast<uint64_t>(value);
-  int num_fast_halfwords = GetNumFastHalfWords(uvalue);
-  int num_slow_halfwords = 4 - (num_fast_halfwords & 0x7);
-  bool more_ffff_halfwords = (num_fast_halfwords & 0x8) != 0;
-
-  if (num_slow_halfwords > 1) {
-    // A single movz/movn is not enough. Try the logical immediate route.
-    int log_imm = EncodeLogicalImmediate(/*is_wide=*/true, value);
-    if (log_imm >= 0) {
-      return NewLIR3(WIDE(kA64Orr3Rrl), r_dest.GetReg(), rxzr, log_imm);
-    }
-  }
-
-  if (num_slow_halfwords <= max_num_ops_per_const_load) {
-    // We can encode the number using a movz/movn followed by one or more movk.
-    A64Opcode op;
-    uint16_t background;
-    LIR* res = nullptr;
-
-    // Decide whether to use a movz or a movn.
-    if (more_ffff_halfwords) {
-      op = WIDE(kA64Movn3rdM);
-      background = 0xffff;
-    } else {
-      op = WIDE(kA64Movz3rdM);
-      background = 0;
-    }
-
-    // Emit the first instruction (movz, movn).
-    int shift;
-    for (shift = 0; shift < 4; shift++) {
-      uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4));
-      if (halfword != background) {
-        res = NewLIR3(op, r_dest.GetReg(), halfword ^ background, shift);
-        break;
-      }
-    }
-
-    // Emit the movk instructions.
-    for (shift++; shift < 4; shift++) {
-      uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4));
-      if (halfword != background) {
-        NewLIR3(WIDE(kA64Movk3rdM), r_dest.GetReg(), halfword, shift);
-      }
-    }
-    return res;
-  }
-
-  // Use the literal pool.
-  int32_t val_lo = Low32Bits(value);
-  int32_t val_hi = High32Bits(value);
-  LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
-  if (data_target == nullptr) {
-    data_target = AddWideData(&literal_list_, val_lo, val_hi);
-  }
-
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  LIR *res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp),
-                    r_dest.GetReg(), 0, 0, 0, 0, data_target);
-  AppendLIR(res);
-  return res;
-}
-
-LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) {
-  LIR* res = NewLIR1(kA64B1t, 0 /* offset to be patched  during assembly */);
-  res->target = target;
-  return res;
-}
-
-LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
-  LIR* branch = NewLIR2(kA64B2ct, ArmConditionEncoding(cc),
-                        0 /* offset to be patched */);
-  branch->target = target;
-  return branch;
-}
-
-LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
-  A64Opcode opcode = kA64Brk1d;
-  switch (op) {
-    case kOpBlx:
-      opcode = kA64Blr1x;
-      break;
-    default:
-      LOG(FATAL) << "Bad opcode " << op;
-  }
-  return NewLIR1(opcode, r_dest_src.GetReg());
-}
-
-LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift) {
-  A64Opcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
-  CHECK_EQ(r_dest_src1.Is64Bit(), r_src2.Is64Bit());
-  A64Opcode opcode = kA64Brk1d;
-
-  switch (op) {
-    case kOpCmn:
-      opcode = kA64Cmn3rro;
-      break;
-    case kOpCmp:
-      opcode = kA64Cmp3rro;
-      break;
-    case kOpMov:
-      opcode = kA64Mov2rr;
-      break;
-    case kOpMvn:
-      opcode = kA64Mvn2rr;
-      break;
-    case kOpNeg:
-      opcode = kA64Neg3rro;
-      break;
-    case kOpTst:
-      opcode = kA64Tst3rro;
-      break;
-    case kOpRev:
-      DCHECK_EQ(shift, 0);
-      // Binary, but rm is encoded twice.
-      return NewLIR2(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
-    case kOpRevsh:
-      // Binary, but rm is encoded twice.
-      NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
-      // "sxth r1, r2" is "sbfm r1, r2, #0, #15"
-      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), 0, 15);
-    case kOp2Byte:
-      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
-      // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
-      // For now we use sbfm directly.
-      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 7);
-    case kOp2Short:
-      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
-      // For now we use sbfm rather than its alias, sbfx.
-      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
-    case kOp2Char:
-      // "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
-      // For now we use ubfm directly.
-      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
-      return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
-    default:
-      return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift);
-  }
-
-  DCHECK(!IsPseudoLirOp(opcode));
-  if (EncodingMap[opcode].flags & IS_BINARY_OP) {
-    DCHECK_EQ(shift, ENCODE_NO_SHIFT);
-    return NewLIR2(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg());
-  } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
-    A64EncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
-    if (kind == kFmtShift) {
-      return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), shift);
-    }
-  }
-
-  LOG(FATAL) << "Unexpected encoding operand count";
-  return nullptr;
-}
-
-LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2,
-                                  A64RegExtEncodings ext, uint8_t amount) {
-  A64Opcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
-  A64Opcode opcode = kA64Brk1d;
-
-  switch (op) {
-    case kOpCmn:
-      opcode = kA64Cmn3Rre;
-      break;
-    case kOpCmp:
-      opcode = kA64Cmp3Rre;
-      break;
-    case kOpAdd:
-      // Note: intentional fallthrough
-    case kOpSub:
-      return OpRegRegRegExtend(op, r_dest_src1, r_dest_src1, r_src2, ext, amount);
-    default:
-      LOG(FATAL) << "Bad Opcode: " << opcode;
-      UNREACHABLE();
-  }
-
-  DCHECK(!IsPseudoLirOp(opcode));
-  if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
-    A64EncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
-    if (kind == kFmtExtend) {
-      return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(),
-                     EncodeExtend(ext, amount));
-    }
-  }
-
-  LOG(FATAL) << "Unexpected encoding operand count";
-  return nullptr;
-}
-
-LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
-  /* RegReg operations with SP in first parameter need extended register instruction form.
-   * Only CMN, CMP, ADD & SUB instructions are implemented.
-   */
-  if (r_dest_src1 == rs_sp) {
-    return OpRegRegExtend(op, r_dest_src1, r_src2, kA64Uxtx, 0);
-  } else {
-    return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT);
-  }
-}
-
-LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest ATTRIBUTE_UNUSED,
-                               RegStorage r_base ATTRIBUTE_UNUSED,
-                               int offset ATTRIBUTE_UNUSED,
-                               MoveType move_type ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-  UNREACHABLE();
-}
-
-LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base ATTRIBUTE_UNUSED,
-                               int offset ATTRIBUTE_UNUSED,
-                               RegStorage r_src ATTRIBUTE_UNUSED,
-                               MoveType move_type ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-  return nullptr;
-}
-
-LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op ATTRIBUTE_UNUSED,
-                                ConditionCode cc ATTRIBUTE_UNUSED,
-                                RegStorage r_dest ATTRIBUTE_UNUSED,
-                                RegStorage r_src ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm64";
-  UNREACHABLE();
-}
-
-LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1,
-                                    RegStorage r_src2, int shift) {
-  A64Opcode opcode = kA64Brk1d;
-
-  switch (op) {
-    case kOpAdd:
-      opcode = kA64Add4rrro;
-      break;
-    case kOpSub:
-      opcode = kA64Sub4rrro;
-      break;
-    // case kOpRsub:
-    //   opcode = kA64RsubWWW;
-    //   break;
-    case kOpAdc:
-      opcode = kA64Adc3rrr;
-      break;
-    case kOpAnd:
-      opcode = kA64And4rrro;
-      break;
-    case kOpXor:
-      opcode = kA64Eor4rrro;
-      break;
-    case kOpMul:
-      opcode = kA64Mul3rrr;
-      break;
-    case kOpDiv:
-      opcode = kA64Sdiv3rrr;
-      break;
-    case kOpOr:
-      opcode = kA64Orr4rrro;
-      break;
-    case kOpSbc:
-      opcode = kA64Sbc3rrr;
-      break;
-    case kOpLsl:
-      opcode = kA64Lsl3rrr;
-      break;
-    case kOpLsr:
-      opcode = kA64Lsr3rrr;
-      break;
-    case kOpAsr:
-      opcode = kA64Asr3rrr;
-      break;
-    case kOpRor:
-      opcode = kA64Ror3rrr;
-      break;
-    default:
-      LOG(FATAL) << "Bad opcode: " << op;
-      break;
-  }
-
-  // The instructions above belong to two kinds:
-  // - 4-operands instructions, where the last operand is a shift/extend immediate,
-  // - 3-operands instructions with no shift/extend.
-  A64Opcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
-  CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit());
-  CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit());
-  if (EncodingMap[opcode].flags & IS_QUAD_OP) {
-    DCHECK(!IsExtendEncoding(shift));
-    return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift);
-  } else {
-    DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP);
-    DCHECK_EQ(shift, ENCODE_NO_SHIFT);
-    return NewLIR3(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg());
-  }
-}
-
-LIR* Arm64Mir2Lir::OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1,
-                                     RegStorage r_src2, A64RegExtEncodings ext, uint8_t amount) {
-  A64Opcode opcode = kA64Brk1d;
-
-  switch (op) {
-    case kOpAdd:
-      opcode = kA64Add4RRre;
-      break;
-    case kOpSub:
-      opcode = kA64Sub4RRre;
-      break;
-    default:
-      UNIMPLEMENTED(FATAL) << "Unimplemented opcode: " << op;
-      UNREACHABLE();
-  }
-  A64Opcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
-
-  if (r_dest.Is64Bit()) {
-    CHECK(r_src1.Is64Bit());
-
-    // dest determines whether the op is wide or not. Up-convert src2 when necessary.
-    // Note: this is not according to aarch64 specifications, but our encoding.
-    if (!r_src2.Is64Bit()) {
-      r_src2 = As64BitReg(r_src2);
-    }
-  } else {
-    CHECK(!r_src1.Is64Bit());
-    CHECK(!r_src2.Is64Bit());
-  }
-
-  // Sanity checks.
-  //    1) Amount is in the range 0..4
-  CHECK_LE(amount, 4);
-
-  return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(),
-                 EncodeExtend(ext, amount));
-}
-
-LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
-  return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT);
-}
-
-LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
-  return OpRegRegImm64(op, r_dest, r_src1, static_cast<int64_t>(value));
-}
-
-LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value) {
-  LIR* res;
-  bool neg = (value < 0);
-  uint64_t abs_value = (neg & !(value == LLONG_MIN)) ? -value : value;
-  A64Opcode opcode = kA64Brk1d;
-  A64Opcode alt_opcode = kA64Brk1d;
-  bool is_logical = false;
-  bool is_wide = r_dest.Is64Bit();
-  A64Opcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
-  int info = 0;
-
-  switch (op) {
-    case kOpLsl: {
-      // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)"
-      // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 64), #(63-imm)".
-      // For now, we just use ubfm directly.
-      int max_value = (is_wide) ? 63 : 31;
-      return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(),
-                     (-value) & max_value, max_value - value);
-    }
-    case kOpLsr:
-      return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
-    case kOpAsr:
-      return NewLIR3(kA64Asr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
-    case kOpRor:
-      // "ror r1, r2, #imm" is an alias of "extr r1, r2, r2, #imm".
-      // For now, we just use extr directly.
-      return NewLIR4(kA64Extr4rrrd | wide, r_dest.GetReg(), r_src1.GetReg(), r_src1.GetReg(),
-                     value);
-    case kOpAdd:
-      neg = !neg;
-      FALLTHROUGH_INTENDED;
-    case kOpSub:
-      // Add and sub below read/write sp rather than xzr.
-      if (abs_value < 0x1000) {
-        opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
-        return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value, 0);
-      } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
-        opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
-        return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1);
-      } else {
-        alt_opcode = (op == kOpAdd) ? kA64Add4RRre : kA64Sub4RRre;
-        info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0);
-      }
-      break;
-    case kOpAdc:
-      alt_opcode = kA64Adc3rrr;
-      break;
-    case kOpSbc:
-      alt_opcode = kA64Sbc3rrr;
-      break;
-    case kOpOr:
-      is_logical = true;
-      opcode = kA64Orr3Rrl;
-      alt_opcode = kA64Orr4rrro;
-      break;
-    case kOpAnd:
-      is_logical = true;
-      opcode = kA64And3Rrl;
-      alt_opcode = kA64And4rrro;
-      break;
-    case kOpXor:
-      is_logical = true;
-      opcode = kA64Eor3Rrl;
-      alt_opcode = kA64Eor4rrro;
-      break;
-    case kOpMul:
-      // TUNING: power of 2, shift & add
-      alt_opcode = kA64Mul3rrr;
-      break;
-    default:
-      LOG(FATAL) << "Bad opcode: " << op;
-  }
-
-  if (is_logical) {
-    int log_imm = EncodeLogicalImmediate(is_wide, value);
-    if (log_imm >= 0) {
-      return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm);
-    } else {
-      // When the immediate is either 0 or ~0, the logical operation can be trivially reduced
-      // to a - possibly negated - assignment.
-      if (value == 0) {
-        switch (op) {
-          case kOpOr:
-          case kOpXor:
-            // Or/Xor by zero reduces to an assignment.
-            return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg());
-          default:
-            // And by zero reduces to a `mov rdest, xzr'.
-            DCHECK(op == kOpAnd);
-            return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr);
-        }
-      } else if (value == INT64_C(-1)
-                 || (!is_wide && static_cast<uint32_t>(value) == ~UINT32_C(0))) {
-        switch (op) {
-          case kOpAnd:
-            // And by -1 reduces to an assignment.
-            return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg());
-          case kOpXor:
-            // Xor by -1 reduces to an `mvn rdest, rsrc'.
-            return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), r_src1.GetReg());
-          default:
-            // Or by -1 reduces to a `mvn rdest, xzr'.
-            DCHECK(op == kOpOr);
-            return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr);
-        }
-      }
-    }
-  }
-
-  RegStorage r_scratch;
-  if (is_wide) {
-    r_scratch = AllocTempWide();
-    LoadConstantWide(r_scratch, value);
-  } else {
-    r_scratch = AllocTemp();
-    LoadConstant(r_scratch, value);
-  }
-  if (EncodingMap[alt_opcode].flags & IS_QUAD_OP)
-    res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info);
-  else
-    res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
-  FreeTemp(r_scratch);
-  return res;
-}
-
-LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
-  return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value));
-}
-
-LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) {
-  A64Opcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
-  A64Opcode opcode = kA64Brk1d;
-  A64Opcode neg_opcode = kA64Brk1d;
-  bool shift;
-  bool neg = (value < 0);
-  uint64_t abs_value = (neg & !(value == LLONG_MIN)) ? -value : value;
-
-  if (LIKELY(abs_value < 0x1000)) {
-    // abs_value is a 12-bit immediate.
-    shift = false;
-  } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
-    // abs_value is a shifted 12-bit immediate.
-    shift = true;
-    abs_value >>= 12;
-  } else if (LIKELY(abs_value < 0x1000000 && (op == kOpAdd || op == kOpSub))) {
-    // Note: It is better to use two ADD/SUB instead of loading a number to a temp register.
-    // This works for both normal registers and SP.
-    // For a frame size == 0x2468, it will be encoded as:
-    //   sub sp, #0x2000
-    //   sub sp, #0x468
-    if (neg) {
-      op = (op == kOpAdd) ? kOpSub : kOpAdd;
-    }
-    OpRegImm64(op, r_dest_src1, abs_value & (~INT64_C(0xfff)));
-    return OpRegImm64(op, r_dest_src1, abs_value & 0xfff);
-  } else {
-    RegStorage r_tmp;
-    LIR* res;
-    if (IS_WIDE(wide)) {
-      r_tmp = AllocTempWide();
-      res = LoadConstantWide(r_tmp, value);
-    } else {
-      r_tmp = AllocTemp();
-      res = LoadConstant(r_tmp, value);
-    }
-    OpRegReg(op, r_dest_src1, r_tmp);
-    FreeTemp(r_tmp);
-    return res;
-  }
-
-  switch (op) {
-    case kOpAdd:
-      neg_opcode = kA64Sub4RRdT;
-      opcode = kA64Add4RRdT;
-      break;
-    case kOpSub:
-      neg_opcode = kA64Add4RRdT;
-      opcode = kA64Sub4RRdT;
-      break;
-    case kOpCmp:
-      neg_opcode = kA64Cmn3RdT;
-      opcode = kA64Cmp3RdT;
-      break;
-    default:
-      LOG(FATAL) << "Bad op-kind in OpRegImm: " << op;
-      break;
-  }
-
-  if (UNLIKELY(neg))
-    opcode = neg_opcode;
-
-  if (EncodingMap[opcode].flags & IS_QUAD_OP)
-    return NewLIR4(opcode | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), abs_value,
-                   (shift) ? 1 : 0);
-  else
-    return NewLIR3(opcode | wide, r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0);
-}
-
-int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) {
-  DCHECK_EQ(shift_type & 0x3, shift_type);
-  DCHECK_EQ(amount & 0x3f, amount);
-  return ((shift_type & 0x3) << 7) | (amount & 0x3f);
-}
-
-int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) {
-  DCHECK_EQ(extend_type & 0x7, extend_type);
-  DCHECK_EQ(amount & 0x7, amount);
-  return  (1 << 6) | ((extend_type & 0x7) << 3) | (amount & 0x7);
-}
-
-bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) {
-  return ((1 << 6) & encoded_value) != 0;
-}
-
-LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
-                                   int scale, OpSize size) {
-  LIR* load;
-  int expected_scale = 0;
-  A64Opcode opcode = kA64Brk1d;
-  r_base = Check64BitReg(r_base);
-
-  // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
-  //   register offset load (rather than doing the sign extension in a separate instruction).
-  if (r_index.Is32Bit()) {
-    // Assemble: ``sxtw xN, wN''.
-    r_index = As64BitReg(r_index);
-    NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
-  }
-
-  if (r_dest.IsFloat()) {
-    if (r_dest.IsDouble()) {
-      DCHECK(size == k64 || size == kDouble);
-      expected_scale = 3;
-      opcode = WIDE(kA64Ldr4fXxG);
-    } else {
-      DCHECK(r_dest.IsSingle());
-      DCHECK(size == k32 || size == kSingle);
-      expected_scale = 2;
-      opcode = kA64Ldr4fXxG;
-    }
-
-    DCHECK(scale == 0 || scale == expected_scale);
-    return NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
-                   (scale != 0) ? 1 : 0);
-  }
-
-  switch (size) {
-    case kDouble:
-    case kWord:
-    case k64:
-      r_dest = Check64BitReg(r_dest);
-      opcode = WIDE(kA64Ldr4rXxG);
-      expected_scale = 3;
-      break;
-    case kReference:
-      r_dest = As32BitReg(r_dest);
-      FALLTHROUGH_INTENDED;
-    case kSingle:     // Intentional fall-through.
-    case k32:
-      r_dest = Check32BitReg(r_dest);
-      opcode = kA64Ldr4rXxG;
-      expected_scale = 2;
-      break;
-    case kUnsignedHalf:
-      r_dest = Check32BitReg(r_dest);
-      opcode = kA64Ldrh4wXxd;
-      expected_scale = 1;
-      break;
-    case kSignedHalf:
-      r_dest = Check32BitReg(r_dest);
-      opcode = kA64Ldrsh4rXxd;
-      expected_scale = 1;
-      break;
-    case kUnsignedByte:
-      r_dest = Check32BitReg(r_dest);
-      opcode = kA64Ldrb3wXx;
-      break;
-    case kSignedByte:
-      r_dest = Check32BitReg(r_dest);
-      opcode = kA64Ldrsb3rXx;
-      break;
-    default:
-      LOG(FATAL) << "Bad size: " << size;
-  }
-
-  if (UNLIKELY(expected_scale == 0)) {
-    // This is a tertiary op (e.g. ldrb, ldrsb), it does not not support scale.
-    DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U);
-    DCHECK_EQ(scale, 0);
-    load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
-  } else {
-    DCHECK(scale == 0 || scale == expected_scale);
-    load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
-                   (scale != 0) ? 1 : 0);
-  }
-
-  return load;
-}
-
-LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
-                                    int scale, OpSize size) {
-  LIR* store;
-  int expected_scale = 0;
-  A64Opcode opcode = kA64Brk1d;
-  r_base = Check64BitReg(r_base);
-
-  // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
-  //   register offset store (rather than doing the sign extension in a separate instruction).
-  if (r_index.Is32Bit()) {
-    // Assemble: ``sxtw xN, wN''.
-    r_index = As64BitReg(r_index);
-    NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
-  }
-
-  if (r_src.IsFloat()) {
-    if (r_src.IsDouble()) {
-      DCHECK(size == k64 || size == kDouble);
-      expected_scale = 3;
-      opcode = WIDE(kA64Str4fXxG);
-    } else {
-      DCHECK(r_src.IsSingle());
-      DCHECK(size == k32 || size == kSingle);
-      expected_scale = 2;
-      opcode = kA64Str4fXxG;
-    }
-
-    DCHECK(scale == 0 || scale == expected_scale);
-    return NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(),
-                   (scale != 0) ? 1 : 0);
-  }
-
-  switch (size) {
-    case kDouble:     // Intentional fall-trough.
-    case kWord:       // Intentional fall-trough.
-    case k64:
-      r_src = Check64BitReg(r_src);
-      opcode = WIDE(kA64Str4rXxG);
-      expected_scale = 3;
-      break;
-    case kReference:
-      r_src = As32BitReg(r_src);
-      FALLTHROUGH_INTENDED;
-    case kSingle:     // Intentional fall-trough.
-    case k32:
-      r_src = Check32BitReg(r_src);
-      opcode = kA64Str4rXxG;
-      expected_scale = 2;
-      break;
-    case kUnsignedHalf:
-    case kSignedHalf:
-      r_src = Check32BitReg(r_src);
-      opcode = kA64Strh4wXxd;
-      expected_scale = 1;
-      break;
-    case kUnsignedByte:
-    case kSignedByte:
-      r_src = Check32BitReg(r_src);
-      opcode = kA64Strb3wXx;
-      break;
-    default:
-      LOG(FATAL) << "Bad size: " << size;
-  }
-
-  if (UNLIKELY(expected_scale == 0)) {
-    // This is a tertiary op (e.g. strb), it does not not support scale.
-    DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U);
-    DCHECK_EQ(scale, 0);
-    store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
-  } else {
-    store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(),
-                    (scale != 0) ? 1 : 0);
-  }
-
-  return store;
-}
-
-/*
- * Load value from base + displacement.  Optionally perform null check
- * on base (which must have an associated s_reg and MIR).  If not
- * performing null check, incoming MIR can be null.
- */
-LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
-                                    OpSize size) {
-  LIR* load = nullptr;
-  A64Opcode opcode = kA64Brk1d;
-  A64Opcode alt_opcode = kA64Brk1d;
-  int scale = 0;
-
-  switch (size) {
-    case kDouble:     // Intentional fall-through.
-    case kWord:       // Intentional fall-through.
-    case k64:
-      r_dest = Check64BitReg(r_dest);
-      scale = 3;
-      if (r_dest.IsFloat()) {
-        DCHECK(r_dest.IsDouble());
-        opcode = WIDE(kA64Ldr3fXD);
-        alt_opcode = WIDE(kA64Ldur3fXd);
-      } else {
-        opcode = WIDE(kA64Ldr3rXD);
-        alt_opcode = WIDE(kA64Ldur3rXd);
-      }
-      break;
-    case kReference:
-      r_dest = As32BitReg(r_dest);
-      FALLTHROUGH_INTENDED;
-    case kSingle:     // Intentional fall-through.
-    case k32:
-      r_dest = Check32BitReg(r_dest);
-      scale = 2;
-      if (r_dest.IsFloat()) {
-        DCHECK(r_dest.IsSingle());
-        opcode = kA64Ldr3fXD;
-      } else {
-        opcode = kA64Ldr3rXD;
-      }
-      break;
-    case kUnsignedHalf:
-      scale = 1;
-      opcode = kA64Ldrh3wXF;
-      break;
-    case kSignedHalf:
-      scale = 1;
-      opcode = kA64Ldrsh3rXF;
-      break;
-    case kUnsignedByte:
-      opcode = kA64Ldrb3wXd;
-      break;
-    case kSignedByte:
-      opcode = kA64Ldrsb3rXd;
-      break;
-    default:
-      LOG(FATAL) << "Bad size: " << size;
-  }
-
-  bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
-  int scaled_disp = displacement >> scale;
-  if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
-    // Can use scaled load.
-    load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), scaled_disp);
-  } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
-    // Can use unscaled load.
-    load = NewLIR3(alt_opcode, r_dest.GetReg(), r_base.GetReg(), displacement);
-  } else {
-    // Use long sequence.
-    // TODO: cleaner support for index/displacement registers?  Not a reference, but must match width.
-    RegStorage r_scratch = AllocTempWide();
-    LoadConstantWide(r_scratch, displacement);
-    load = LoadBaseIndexed(r_base, r_scratch,
-                           (size == kReference) ? As64BitReg(r_dest) : r_dest,
-                           0, size);
-    FreeTemp(r_scratch);
-  }
-
-  // TODO: in future may need to differentiate Dalvik accesses w/ spills
-  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
-    DCHECK_EQ(r_base, rs_sp);
-    AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
-  }
-  return load;
-}
-
-LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
-                                OpSize size, VolatileKind is_volatile) {
-  // LoadBaseDisp() will emit correct insn for atomic load on arm64
-  // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
-
-  LIR* load = LoadBaseDispBody(r_base, displacement, r_dest, size);
-
-  if (UNLIKELY(is_volatile == kVolatile)) {
-    // TODO: This should generate an acquire load instead of the barrier.
-    GenMemBarrier(kLoadAny);
-  }
-
-  return load;
-}
-
-LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
-                                     OpSize size) {
-  LIR* store = nullptr;
-  A64Opcode opcode = kA64Brk1d;
-  A64Opcode alt_opcode = kA64Brk1d;
-  int scale = 0;
-
-  switch (size) {
-    case kDouble:     // Intentional fall-through.
-    case kWord:       // Intentional fall-through.
-    case k64:
-      r_src = Check64BitReg(r_src);
-      scale = 3;
-      if (r_src.IsFloat()) {
-        DCHECK(r_src.IsDouble());
-        opcode = WIDE(kA64Str3fXD);
-        alt_opcode = WIDE(kA64Stur3fXd);
-      } else {
-        opcode = WIDE(kA64Str3rXD);
-        alt_opcode = WIDE(kA64Stur3rXd);
-      }
-      break;
-    case kReference:
-      r_src = As32BitReg(r_src);
-      FALLTHROUGH_INTENDED;
-    case kSingle:     // Intentional fall-through.
-    case k32:
-      r_src = Check32BitReg(r_src);
-      scale = 2;
-      if (r_src.IsFloat()) {
-        DCHECK(r_src.IsSingle());
-        opcode = kA64Str3fXD;
-      } else {
-        opcode = kA64Str3rXD;
-      }
-      break;
-    case kUnsignedHalf:
-    case kSignedHalf:
-      scale = 1;
-      opcode = kA64Strh3wXF;
-      break;
-    case kUnsignedByte:
-    case kSignedByte:
-      opcode = kA64Strb3wXd;
-      break;
-    default:
-      LOG(FATAL) << "Bad size: " << size;
-  }
-
-  bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
-  int scaled_disp = displacement >> scale;
-  if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
-    // Can use scaled store.
-    store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), scaled_disp);
-  } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
-    // Can use unscaled store.
-    store = NewLIR3(alt_opcode, r_src.GetReg(), r_base.GetReg(), displacement);
-  } else {
-    // Use long sequence.
-    RegStorage r_scratch = AllocTempWide();
-    LoadConstantWide(r_scratch, displacement);
-    store = StoreBaseIndexed(r_base, r_scratch,
-                             (size == kReference) ? As64BitReg(r_src) : r_src,
-                             0, size);
-    FreeTemp(r_scratch);
-  }
-
-  // TODO: In future, may need to differentiate Dalvik & spill accesses.
-  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
-    DCHECK_EQ(r_base, rs_sp);
-    AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
-  }
-  return store;
-}
-
-LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
-                                 OpSize size, VolatileKind is_volatile) {
-  // TODO: This should generate a release store and no barriers.
-  if (UNLIKELY(is_volatile == kVolatile)) {
-    // Ensure that prior accesses become visible to other threads first.
-    GenMemBarrier(kAnyStore);
-  }
-
-  // StoreBaseDisp() will emit correct insn for atomic store on arm64
-  // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
-
-  LIR* store = StoreBaseDispBody(r_base, displacement, r_src, size);
-
-  if (UNLIKELY(is_volatile == kVolatile)) {
-    // Preserve order with respect to any subsequent volatile loads.
-    // We need StoreLoad, but that generally requires the most expensive barrier.
-    GenMemBarrier(kAnyAny);
-  }
-
-  return store;
-}
-
-LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest ATTRIBUTE_UNUSED,
-                               RegStorage r_src ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64";
-  UNREACHABLE();
-}
-
-LIR* Arm64Mir2Lir::OpMem(OpKind op ATTRIBUTE_UNUSED,
-                         RegStorage r_base ATTRIBUTE_UNUSED,
-                         int disp ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpMem for Arm64";
-  UNREACHABLE();
-}
-
-LIR* Arm64Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt,
-                                    QuickEntrypointEnum trampoline ATTRIBUTE_UNUSED) {
-  // The address of the trampoline is already loaded into r_tgt.
-  return OpReg(op, r_tgt);
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
deleted file mode 100644
index cde99b3..0000000
--- a/compiler/dex/quick/codegen_util.cc
+++ /dev/null
@@ -1,1450 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "mir_to_lir-inl.h"
-
-// Mac does not provide endian.h, so we'll use byte order agnostic code.
-#ifndef __APPLE__
-#include <endian.h>
-#endif
-
-#include "base/bit_vector-inl.h"
-#include "dex/mir_graph.h"
-#include "driver/compiler_driver.h"
-#include "driver/compiler_options.h"
-#include "driver/dex_compilation_unit.h"
-#include "dex_file-inl.h"
-#include "gc_map.h"
-#include "gc_map_builder.h"
-#include "mapping_table.h"
-#include "dex/quick/dex_file_method_inliner.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "dex/verification_results.h"
-#include "dex/verified_method.h"
-#include "utils/dex_cache_arrays_layout-inl.h"
-#include "verifier/dex_gc_map.h"
-#include "verifier/method_verifier.h"
-#include "vmap_table.h"
-
-namespace art {
-
-namespace {
-
-/* Dump a mapping table */
-template <typename It>
-void DumpMappingTable(const char* table_name, const char* descriptor, const char* name,
-                      const Signature& signature, uint32_t size, It first) {
-  if (size != 0) {
-    std::string line(StringPrintf("\n  %s %s%s_%s_table[%u] = {", table_name,
-                     descriptor, name, signature.ToString().c_str(), size));
-    std::replace(line.begin(), line.end(), ';', '_');
-    LOG(INFO) << line;
-    for (uint32_t i = 0; i != size; ++i) {
-      line = StringPrintf("    {0x%05x, 0x%04x},", first.NativePcOffset(), first.DexPc());
-      ++first;
-      LOG(INFO) << line;
-    }
-    LOG(INFO) <<"  };\n\n";
-  }
-}
-
-}  // anonymous namespace
-
-bool Mir2Lir::IsInexpensiveConstant(RegLocation rl_src) {
-  bool res = false;
-  if (rl_src.is_const) {
-    if (rl_src.wide) {
-      // For wide registers, check whether we're the high partner. In that case we need to switch
-      // to the lower one for the correct value.
-      if (rl_src.high_word) {
-        rl_src.high_word = false;
-        rl_src.s_reg_low--;
-        rl_src.orig_sreg--;
-      }
-      if (rl_src.fp) {
-        res = InexpensiveConstantDouble(mir_graph_->ConstantValueWide(rl_src));
-      } else {
-        res = InexpensiveConstantLong(mir_graph_->ConstantValueWide(rl_src));
-      }
-    } else {
-      if (rl_src.fp) {
-        res = InexpensiveConstantFloat(mir_graph_->ConstantValue(rl_src));
-      } else {
-        res = InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src));
-      }
-    }
-  }
-  return res;
-}
-
-void Mir2Lir::MarkSafepointPC(LIR* inst) {
-  DCHECK(!inst->flags.use_def_invalid);
-  inst->u.m.def_mask = &kEncodeAll;
-  LIR* safepoint_pc = NewLIR0(kPseudoSafepointPC);
-  DCHECK(safepoint_pc->u.m.def_mask->Equals(kEncodeAll));
-  DCHECK(current_mir_ != nullptr || (current_dalvik_offset_ == 0 && safepoints_.empty()));
-  safepoints_.emplace_back(safepoint_pc, current_mir_);
-}
-
-void Mir2Lir::MarkSafepointPCAfter(LIR* after) {
-  DCHECK(!after->flags.use_def_invalid);
-  after->u.m.def_mask = &kEncodeAll;
-  // As NewLIR0 uses Append, we need to create the LIR by hand.
-  LIR* safepoint_pc = RawLIR(current_dalvik_offset_, kPseudoSafepointPC);
-  if (after->next == nullptr) {
-    DCHECK_EQ(after, last_lir_insn_);
-    AppendLIR(safepoint_pc);
-  } else {
-    InsertLIRAfter(after, safepoint_pc);
-  }
-  DCHECK(safepoint_pc->u.m.def_mask->Equals(kEncodeAll));
-  DCHECK(current_mir_ != nullptr || (current_dalvik_offset_ == 0 && safepoints_.empty()));
-  safepoints_.emplace_back(safepoint_pc, current_mir_);
-}
-
-/* Remove a LIR from the list. */
-void Mir2Lir::UnlinkLIR(LIR* lir) {
-  if (UNLIKELY(lir == first_lir_insn_)) {
-    first_lir_insn_ = lir->next;
-    if (lir->next != nullptr) {
-      lir->next->prev = nullptr;
-    } else {
-      DCHECK(lir->next == nullptr);
-      DCHECK(lir == last_lir_insn_);
-      last_lir_insn_ = nullptr;
-    }
-  } else if (lir == last_lir_insn_) {
-    last_lir_insn_ = lir->prev;
-    lir->prev->next = nullptr;
-  } else if ((lir->prev != nullptr) && (lir->next != nullptr)) {
-    lir->prev->next = lir->next;
-    lir->next->prev = lir->prev;
-  }
-}
-
-/* Convert an instruction to a NOP */
-void Mir2Lir::NopLIR(LIR* lir) {
-  lir->flags.is_nop = true;
-  if (!cu_->verbose) {
-    UnlinkLIR(lir);
-  }
-}
-
-void Mir2Lir::SetMemRefType(LIR* lir, bool is_load, int mem_type) {
-  DCHECK(GetTargetInstFlags(lir->opcode) & (IS_LOAD | IS_STORE));
-  DCHECK(!lir->flags.use_def_invalid);
-  // TODO: Avoid the extra Arena allocation!
-  const ResourceMask** mask_ptr;
-  ResourceMask mask;
-  if (is_load) {
-    mask_ptr = &lir->u.m.use_mask;
-  } else {
-    mask_ptr = &lir->u.m.def_mask;
-  }
-  mask = **mask_ptr;
-  /* Clear out the memref flags */
-  mask.ClearBits(kEncodeMem);
-  /* ..and then add back the one we need */
-  switch (mem_type) {
-    case ResourceMask::kLiteral:
-      DCHECK(is_load);
-      mask.SetBit(ResourceMask::kLiteral);
-      break;
-    case ResourceMask::kDalvikReg:
-      mask.SetBit(ResourceMask::kDalvikReg);
-      break;
-    case ResourceMask::kHeapRef:
-      mask.SetBit(ResourceMask::kHeapRef);
-      break;
-    case ResourceMask::kMustNotAlias:
-      /* Currently only loads can be marked as kMustNotAlias */
-      DCHECK(!(GetTargetInstFlags(lir->opcode) & IS_STORE));
-      mask.SetBit(ResourceMask::kMustNotAlias);
-      break;
-    default:
-      LOG(FATAL) << "Oat: invalid memref kind - " << mem_type;
-  }
-  *mask_ptr = mask_cache_.GetMask(mask);
-}
-
-/*
- * Mark load/store instructions that access Dalvik registers through the stack.
- */
-void Mir2Lir::AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load,
-                                      bool is64bit) {
-  DCHECK((is_load ? lir->u.m.use_mask : lir->u.m.def_mask)->Intersection(kEncodeMem).Equals(
-      kEncodeDalvikReg));
-
-  /*
-   * Store the Dalvik register id in alias_info. Mark the MSB if it is a 64-bit
-   * access.
-   */
-  lir->flags.alias_info = ENCODE_ALIAS_INFO(reg_id, is64bit);
-}
-
-/*
- * Debugging macros
- */
-#define DUMP_RESOURCE_MASK(X)
-
-/* Pretty-print a LIR instruction */
-void Mir2Lir::DumpLIRInsn(LIR* lir, unsigned char* base_addr) {
-  int offset = lir->offset;
-  int dest = lir->operands[0];
-  const bool dump_nop = (cu_->enable_debug & (1 << kDebugShowNops));
-
-  /* Handle pseudo-ops individually, and all regular insns as a group */
-  switch (lir->opcode) {
-    case kPseudoPrologueBegin:
-      LOG(INFO) << "-------- PrologueBegin";
-      break;
-    case kPseudoPrologueEnd:
-      LOG(INFO) << "-------- PrologueEnd";
-      break;
-    case kPseudoEpilogueBegin:
-      LOG(INFO) << "-------- EpilogueBegin";
-      break;
-    case kPseudoEpilogueEnd:
-      LOG(INFO) << "-------- EpilogueEnd";
-      break;
-    case kPseudoBarrier:
-      LOG(INFO) << "-------- BARRIER";
-      break;
-    case kPseudoEntryBlock:
-      LOG(INFO) << "-------- entry offset: 0x" << std::hex << dest;
-      break;
-    case kPseudoDalvikByteCodeBoundary:
-      if (lir->operands[0] == 0) {
-         // NOTE: only used for debug listings.
-         lir->operands[0] = WrapPointer(ArenaStrdup("No instruction string"));
-      }
-      LOG(INFO) << "-------- dalvik offset: 0x" << std::hex
-                << lir->dalvik_offset << " @ "
-                << UnwrapPointer<char>(lir->operands[0]);
-      break;
-    case kPseudoExitBlock:
-      LOG(INFO) << "-------- exit offset: 0x" << std::hex << dest;
-      break;
-    case kPseudoPseudoAlign4:
-      LOG(INFO) << reinterpret_cast<uintptr_t>(base_addr) + offset << " (0x" << std::hex
-                << offset << "): .align4";
-      break;
-    case kPseudoEHBlockLabel:
-      LOG(INFO) << "Exception_Handling:";
-      break;
-    case kPseudoTargetLabel:
-    case kPseudoNormalBlockLabel:
-      LOG(INFO) << "L" << reinterpret_cast<void*>(lir) << ":";
-      break;
-    case kPseudoThrowTarget:
-      LOG(INFO) << "LT" << reinterpret_cast<void*>(lir) << ":";
-      break;
-    case kPseudoIntrinsicRetry:
-      LOG(INFO) << "IR" << reinterpret_cast<void*>(lir) << ":";
-      break;
-    case kPseudoSuspendTarget:
-      LOG(INFO) << "LS" << reinterpret_cast<void*>(lir) << ":";
-      break;
-    case kPseudoSafepointPC:
-      LOG(INFO) << "LsafepointPC_0x" << std::hex << lir->offset << "_" << lir->dalvik_offset << ":";
-      break;
-    case kPseudoExportedPC:
-      LOG(INFO) << "LexportedPC_0x" << std::hex << lir->offset << "_" << lir->dalvik_offset << ":";
-      break;
-    case kPseudoCaseLabel:
-      LOG(INFO) << "LC" << reinterpret_cast<void*>(lir) << ": Case target 0x"
-                << std::hex << lir->operands[0] << "|" << std::dec <<
-        lir->operands[0];
-      break;
-    default:
-      if (lir->flags.is_nop && !dump_nop) {
-        break;
-      } else {
-        std::string op_name(BuildInsnString(GetTargetInstName(lir->opcode),
-                                               lir, base_addr));
-        std::string op_operands(BuildInsnString(GetTargetInstFmt(lir->opcode),
-                                                    lir, base_addr));
-        LOG(INFO) << StringPrintf("%5p|0x%02x: %-9s%s%s",
-                                  base_addr + offset,
-                                  lir->dalvik_offset,
-                                  op_name.c_str(), op_operands.c_str(),
-                                  lir->flags.is_nop ? "(nop)" : "");
-      }
-      break;
-  }
-
-  if (lir->u.m.use_mask && (!lir->flags.is_nop || dump_nop)) {
-    DUMP_RESOURCE_MASK(DumpResourceMask(lir, *lir->u.m.use_mask, "use"));
-  }
-  if (lir->u.m.def_mask && (!lir->flags.is_nop || dump_nop)) {
-    DUMP_RESOURCE_MASK(DumpResourceMask(lir, *lir->u.m.def_mask, "def"));
-  }
-}
-
-void Mir2Lir::DumpPromotionMap() {
-  uint32_t num_regs = mir_graph_->GetNumOfCodeAndTempVRs();
-  for (uint32_t i = 0; i < num_regs; i++) {
-    PromotionMap v_reg_map = promotion_map_[i];
-    std::string buf;
-    if (v_reg_map.fp_location == kLocPhysReg) {
-      StringAppendF(&buf, " : s%d", RegStorage::RegNum(v_reg_map.fp_reg));
-    }
-
-    std::string buf3;
-    if (i < mir_graph_->GetNumOfCodeVRs()) {
-      StringAppendF(&buf3, "%02d", i);
-    } else if (i == mir_graph_->GetNumOfCodeVRs()) {
-      buf3 = "Method*";
-    } else {
-      uint32_t diff = i - mir_graph_->GetNumOfCodeVRs();
-      StringAppendF(&buf3, "ct%d", diff);
-    }
-
-    LOG(INFO) << StringPrintf("V[%s] -> %s%d%s", buf3.c_str(),
-                              v_reg_map.core_location == kLocPhysReg ?
-                              "r" : "SP+", v_reg_map.core_location == kLocPhysReg ?
-                              v_reg_map.core_reg : SRegOffset(i),
-                              buf.c_str());
-  }
-}
-
-void Mir2Lir::UpdateLIROffsets() {
-  // Only used for code listings.
-  size_t offset = 0;
-  for (LIR* lir = first_lir_insn_; lir != nullptr; lir = lir->next) {
-    lir->offset = offset;
-    if (!lir->flags.is_nop && !IsPseudoLirOp(lir->opcode)) {
-      offset += GetInsnSize(lir);
-    } else if (lir->opcode == kPseudoPseudoAlign4) {
-      offset += (offset & 0x2);
-    }
-  }
-}
-
-void Mir2Lir::MarkGCCard(int opt_flags, RegStorage val_reg, RegStorage tgt_addr_reg) {
-  DCHECK(val_reg.Valid());
-  DCHECK_EQ(val_reg.Is64Bit(), cu_->target64);
-  if ((opt_flags & MIR_STORE_NON_NULL_VALUE) != 0) {
-    UnconditionallyMarkGCCard(tgt_addr_reg);
-  } else {
-    LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, nullptr);
-    UnconditionallyMarkGCCard(tgt_addr_reg);
-    LIR* target = NewLIR0(kPseudoTargetLabel);
-    branch_over->target = target;
-  }
-}
-
-/* Dump instructions and constant pool contents */
-void Mir2Lir::CodegenDump() {
-  LOG(INFO) << "Dumping LIR insns for "
-            << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-  LIR* lir_insn;
-  int insns_size = mir_graph_->GetNumDalvikInsns();
-
-  LOG(INFO) << "Regs (excluding ins) : " << mir_graph_->GetNumOfLocalCodeVRs();
-  LOG(INFO) << "Ins          : " << mir_graph_->GetNumOfInVRs();
-  LOG(INFO) << "Outs         : " << mir_graph_->GetNumOfOutVRs();
-  LOG(INFO) << "CoreSpills       : " << num_core_spills_;
-  LOG(INFO) << "FPSpills       : " << num_fp_spills_;
-  LOG(INFO) << "CompilerTemps    : " << mir_graph_->GetNumUsedCompilerTemps();
-  LOG(INFO) << "Frame size       : " << frame_size_;
-  LOG(INFO) << "code size is " << total_size_ <<
-    " bytes, Dalvik size is " << insns_size * 2;
-  LOG(INFO) << "expansion factor: "
-            << static_cast<float>(total_size_) / static_cast<float>(insns_size * 2);
-  DumpPromotionMap();
-  UpdateLIROffsets();
-  for (lir_insn = first_lir_insn_; lir_insn != nullptr; lir_insn = lir_insn->next) {
-    DumpLIRInsn(lir_insn, 0);
-  }
-  for (lir_insn = literal_list_; lir_insn != nullptr; lir_insn = lir_insn->next) {
-    LOG(INFO) << StringPrintf("%x (%04x): .word (%#x)", lir_insn->offset, lir_insn->offset,
-                              lir_insn->operands[0]);
-  }
-
-  const DexFile::MethodId& method_id =
-      cu_->dex_file->GetMethodId(cu_->method_idx);
-  const Signature signature = cu_->dex_file->GetMethodSignature(method_id);
-  const char* name = cu_->dex_file->GetMethodName(method_id);
-  const char* descriptor(cu_->dex_file->GetMethodDeclaringClassDescriptor(method_id));
-
-  // Dump mapping tables
-  if (!encoded_mapping_table_.empty()) {
-    MappingTable table(&encoded_mapping_table_[0]);
-    DumpMappingTable("PC2Dex_MappingTable", descriptor, name, signature,
-                     table.PcToDexSize(), table.PcToDexBegin());
-    DumpMappingTable("Dex2PC_MappingTable", descriptor, name, signature,
-                     table.DexToPcSize(), table.DexToPcBegin());
-  }
-}
-
-/*
- * Search the existing constants in the literal pool for an exact or close match
- * within specified delta (greater or equal to 0).
- */
-LIR* Mir2Lir::ScanLiteralPool(LIR* data_target, int value, unsigned int delta) {
-  while (data_target) {
-    if ((static_cast<unsigned>(value - data_target->operands[0])) <= delta)
-      return data_target;
-    data_target = data_target->next;
-  }
-  return nullptr;
-}
-
-/* Search the existing constants in the literal pool for an exact wide match */
-LIR* Mir2Lir::ScanLiteralPoolWide(LIR* data_target, int val_lo, int val_hi) {
-  bool lo_match = false;
-  LIR* lo_target = nullptr;
-  while (data_target) {
-    if (lo_match && (data_target->operands[0] == val_hi)) {
-      // Record high word in case we need to expand this later.
-      lo_target->operands[1] = val_hi;
-      return lo_target;
-    }
-    lo_match = false;
-    if (data_target->operands[0] == val_lo) {
-      lo_match = true;
-      lo_target = data_target;
-    }
-    data_target = data_target->next;
-  }
-  return nullptr;
-}
-
-/* Search the existing constants in the literal pool for an exact method match */
-LIR* Mir2Lir::ScanLiteralPoolMethod(LIR* data_target, const MethodReference& method) {
-  while (data_target) {
-    if (static_cast<uint32_t>(data_target->operands[0]) == method.dex_method_index &&
-        UnwrapPointer<DexFile>(data_target->operands[1]) == method.dex_file) {
-      return data_target;
-    }
-    data_target = data_target->next;
-  }
-  return nullptr;
-}
-
-/* Search the existing constants in the literal pool for an exact class match */
-LIR* Mir2Lir::ScanLiteralPoolClass(LIR* data_target, const DexFile& dex_file, uint32_t type_idx) {
-  while (data_target) {
-    if (static_cast<uint32_t>(data_target->operands[0]) == type_idx &&
-        UnwrapPointer<DexFile>(data_target->operands[1]) == &dex_file) {
-      return data_target;
-    }
-    data_target = data_target->next;
-  }
-  return nullptr;
-}
-
-/*
- * The following are building blocks to insert constants into the pool or
- * instruction streams.
- */
-
-/* Add a 32-bit constant to the constant pool */
-LIR* Mir2Lir::AddWordData(LIR* *constant_list_p, int value) {
-  /* Add the constant to the literal pool */
-  if (constant_list_p) {
-    LIR* new_value = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocData));
-    new_value->operands[0] = value;
-    new_value->next = *constant_list_p;
-    *constant_list_p = new_value;
-    estimated_native_code_size_ += sizeof(value);
-    return new_value;
-  }
-  return nullptr;
-}
-
-/* Add a 64-bit constant to the constant pool or mixed with code */
-LIR* Mir2Lir::AddWideData(LIR* *constant_list_p, int val_lo, int val_hi) {
-  AddWordData(constant_list_p, val_hi);
-  return AddWordData(constant_list_p, val_lo);
-}
-
-/**
- * @brief Push a compressed reference which needs patching at link/patchoat-time.
- * @details This needs to be kept consistent with the code which actually does the patching in
- *   oat_writer.cc and in the patchoat tool.
- */
-static void PushUnpatchedReference(CodeBuffer* buf) {
-  // Note that we can safely initialize the patches to zero. The code deduplication mechanism takes
-  // the patches into account when determining whether two pieces of codes are functionally
-  // equivalent.
-  Push32(buf, UINT32_C(0));
-}
-
-static void AlignBuffer(CodeBuffer* buf, size_t offset) {
-  DCHECK_LE(buf->size(), offset);
-  buf->insert(buf->end(), offset - buf->size(), 0u);
-}
-
-/* Write the literal pool to the output stream */
-void Mir2Lir::InstallLiteralPools() {
-  AlignBuffer(&code_buffer_, data_offset_);
-  LIR* data_lir = literal_list_;
-  while (data_lir != nullptr) {
-    Push32(&code_buffer_, data_lir->operands[0]);
-    data_lir = NEXT_LIR(data_lir);
-  }
-  // TODO: patches_.reserve() as needed.
-  // Push code and method literals, record offsets for the compiler to patch.
-  data_lir = code_literal_list_;
-  while (data_lir != nullptr) {
-    uint32_t target_method_idx = data_lir->operands[0];
-    const DexFile* target_dex_file = UnwrapPointer<DexFile>(data_lir->operands[1]);
-    patches_.push_back(LinkerPatch::CodePatch(code_buffer_.size(),
-                                              target_dex_file, target_method_idx));
-    PushUnpatchedReference(&code_buffer_);
-    data_lir = NEXT_LIR(data_lir);
-  }
-  data_lir = method_literal_list_;
-  while (data_lir != nullptr) {
-    uint32_t target_method_idx = data_lir->operands[0];
-    const DexFile* target_dex_file = UnwrapPointer<DexFile>(data_lir->operands[1]);
-    patches_.push_back(LinkerPatch::MethodPatch(code_buffer_.size(),
-                                                target_dex_file, target_method_idx));
-    PushUnpatchedReference(&code_buffer_);
-    data_lir = NEXT_LIR(data_lir);
-  }
-  // Push class literals.
-  data_lir = class_literal_list_;
-  while (data_lir != nullptr) {
-    uint32_t target_type_idx = data_lir->operands[0];
-    const DexFile* class_dex_file = UnwrapPointer<DexFile>(data_lir->operands[1]);
-    patches_.push_back(LinkerPatch::TypePatch(code_buffer_.size(),
-                                              class_dex_file, target_type_idx));
-    PushUnpatchedReference(&code_buffer_);
-    data_lir = NEXT_LIR(data_lir);
-  }
-}
-
-/* Write the switch tables to the output stream */
-void Mir2Lir::InstallSwitchTables() {
-  for (Mir2Lir::SwitchTable* tab_rec : switch_tables_) {
-    AlignBuffer(&code_buffer_, tab_rec->offset);
-    /*
-     * For Arm, our reference point is the address of the bx
-     * instruction that does the launch, so we have to subtract
-     * the auto pc-advance.  For other targets the reference point
-     * is a label, so we can use the offset as-is.
-     */
-    int bx_offset = INVALID_OFFSET;
-    switch (cu_->instruction_set) {
-      case kThumb2:
-        DCHECK(tab_rec->anchor->flags.fixup != kFixupNone);
-        bx_offset = tab_rec->anchor->offset + 4;
-        break;
-      case kX86_64:
-        // RIP relative to switch table.
-        bx_offset = tab_rec->offset;
-        break;
-      case kX86:
-      case kArm64:
-      case kMips:
-      case kMips64:
-        bx_offset = tab_rec->anchor->offset;
-        break;
-      default: LOG(FATAL) << "Unexpected instruction set: " << cu_->instruction_set;
-    }
-    if (cu_->verbose) {
-      LOG(INFO) << "Switch table for offset 0x" << std::hex << bx_offset;
-    }
-    if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) {
-      DCHECK(tab_rec->switch_mir != nullptr);
-      BasicBlock* bb = mir_graph_->GetBasicBlock(tab_rec->switch_mir->bb);
-      DCHECK(bb != nullptr);
-      int elems = 0;
-      for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
-        int key = successor_block_info->key;
-        int target = successor_block_info->block;
-        LIR* boundary_lir = InsertCaseLabel(target, key);
-        DCHECK(boundary_lir != nullptr);
-        int disp = boundary_lir->offset - bx_offset;
-        Push32(&code_buffer_, key);
-        Push32(&code_buffer_, disp);
-        if (cu_->verbose) {
-          LOG(INFO) << "  Case[" << elems << "] key: 0x"
-                    << std::hex << key << ", disp: 0x"
-                    << std::hex << disp;
-        }
-        elems++;
-      }
-      DCHECK_EQ(elems, tab_rec->table[1]);
-    } else {
-      DCHECK_EQ(static_cast<int>(tab_rec->table[0]),
-                static_cast<int>(Instruction::kPackedSwitchSignature));
-      DCHECK(tab_rec->switch_mir != nullptr);
-      BasicBlock* bb = mir_graph_->GetBasicBlock(tab_rec->switch_mir->bb);
-      DCHECK(bb != nullptr);
-      int elems = 0;
-      int low_key = s4FromSwitchData(&tab_rec->table[2]);
-      for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
-        int key = successor_block_info->key;
-        DCHECK_EQ(elems + low_key, key);
-        int target = successor_block_info->block;
-        LIR* boundary_lir = InsertCaseLabel(target, key);
-        DCHECK(boundary_lir != nullptr);
-        int disp = boundary_lir->offset - bx_offset;
-        Push32(&code_buffer_, disp);
-        if (cu_->verbose) {
-          LOG(INFO) << "  Case[" << elems << "] disp: 0x"
-                    << std::hex << disp;
-        }
-        elems++;
-      }
-      DCHECK_EQ(elems, tab_rec->table[1]);
-    }
-  }
-}
-
-/* Write the fill array dta to the output stream */
-void Mir2Lir::InstallFillArrayData() {
-  for (Mir2Lir::FillArrayData* tab_rec : fill_array_data_) {
-    AlignBuffer(&code_buffer_, tab_rec->offset);
-    for (int i = 0; i < (tab_rec->size + 1) / 2; i++) {
-      code_buffer_.push_back(tab_rec->table[i] & 0xFF);
-      code_buffer_.push_back((tab_rec->table[i] >> 8) & 0xFF);
-    }
-  }
-}
-
-static int AssignLiteralOffsetCommon(LIR* lir, CodeOffset offset) {
-  for (; lir != nullptr; lir = lir->next) {
-    lir->offset = offset;
-    offset += 4;
-  }
-  return offset;
-}
-
-static int AssignLiteralPointerOffsetCommon(LIR* lir, CodeOffset offset,
-                                            unsigned int element_size) {
-  // Align to natural pointer size.
-  offset = RoundUp(offset, element_size);
-  for (; lir != nullptr; lir = lir->next) {
-    lir->offset = offset;
-    offset += element_size;
-  }
-  return offset;
-}
-
-// Make sure we have a code address for every declared catch entry
-bool Mir2Lir::VerifyCatchEntries() {
-  MappingTable table(&encoded_mapping_table_[0]);
-  std::vector<uint32_t> dex_pcs;
-  dex_pcs.reserve(table.DexToPcSize());
-  for (auto it = table.DexToPcBegin(), end = table.DexToPcEnd(); it != end; ++it) {
-    dex_pcs.push_back(it.DexPc());
-  }
-  // Sort dex_pcs, so that we can quickly check it against the ordered mir_graph_->catches_.
-  std::sort(dex_pcs.begin(), dex_pcs.end());
-
-  bool success = true;
-  auto it = dex_pcs.begin(), end = dex_pcs.end();
-  for (uint32_t dex_pc : mir_graph_->catches_) {
-    while (it != end && *it < dex_pc) {
-      LOG(INFO) << "Unexpected catch entry @ dex pc 0x" << std::hex << *it;
-      ++it;
-      success = false;
-    }
-    if (it == end || *it > dex_pc) {
-      LOG(INFO) << "Missing native PC for catch entry @ 0x" << std::hex << dex_pc;
-      success = false;
-    } else {
-      ++it;
-    }
-  }
-  if (!success) {
-    LOG(INFO) << "Bad dex2pcMapping table in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-    LOG(INFO) << "Entries @ decode: " << mir_graph_->catches_.size() << ", Entries in table: "
-              << table.DexToPcSize();
-  }
-  return success;
-}
-
-
-void Mir2Lir::CreateMappingTables() {
-  bool generate_src_map = cu_->compiler_driver->GetCompilerOptions().GetGenerateDebugInfo();
-
-  uint32_t pc2dex_data_size = 0u;
-  uint32_t pc2dex_entries = 0u;
-  uint32_t pc2dex_offset = 0u;
-  uint32_t pc2dex_dalvik_offset = 0u;
-  uint32_t pc2dex_src_entries = 0u;
-  uint32_t dex2pc_data_size = 0u;
-  uint32_t dex2pc_entries = 0u;
-  uint32_t dex2pc_offset = 0u;
-  uint32_t dex2pc_dalvik_offset = 0u;
-  for (LIR* tgt_lir = first_lir_insn_; tgt_lir != nullptr; tgt_lir = NEXT_LIR(tgt_lir)) {
-    pc2dex_src_entries++;
-    if (!tgt_lir->flags.is_nop && (tgt_lir->opcode == kPseudoSafepointPC)) {
-      pc2dex_entries += 1;
-      DCHECK(pc2dex_offset <= tgt_lir->offset);
-      pc2dex_data_size += UnsignedLeb128Size(tgt_lir->offset - pc2dex_offset);
-      pc2dex_data_size += SignedLeb128Size(static_cast<int32_t>(tgt_lir->dalvik_offset) -
-                                           static_cast<int32_t>(pc2dex_dalvik_offset));
-      pc2dex_offset = tgt_lir->offset;
-      pc2dex_dalvik_offset = tgt_lir->dalvik_offset;
-    }
-    if (!tgt_lir->flags.is_nop && (tgt_lir->opcode == kPseudoExportedPC)) {
-      dex2pc_entries += 1;
-      DCHECK(dex2pc_offset <= tgt_lir->offset);
-      dex2pc_data_size += UnsignedLeb128Size(tgt_lir->offset - dex2pc_offset);
-      dex2pc_data_size += SignedLeb128Size(static_cast<int32_t>(tgt_lir->dalvik_offset) -
-                                           static_cast<int32_t>(dex2pc_dalvik_offset));
-      dex2pc_offset = tgt_lir->offset;
-      dex2pc_dalvik_offset = tgt_lir->dalvik_offset;
-    }
-  }
-
-  if (generate_src_map) {
-    src_mapping_table_.reserve(pc2dex_src_entries);
-  }
-
-  uint32_t total_entries = pc2dex_entries + dex2pc_entries;
-  uint32_t hdr_data_size = UnsignedLeb128Size(total_entries) + UnsignedLeb128Size(pc2dex_entries);
-  uint32_t data_size = hdr_data_size + pc2dex_data_size + dex2pc_data_size;
-  encoded_mapping_table_.resize(data_size);
-  uint8_t* write_pos = &encoded_mapping_table_[0];
-  write_pos = EncodeUnsignedLeb128(write_pos, total_entries);
-  write_pos = EncodeUnsignedLeb128(write_pos, pc2dex_entries);
-  DCHECK_EQ(static_cast<size_t>(write_pos - &encoded_mapping_table_[0]), hdr_data_size);
-  uint8_t* write_pos2 = write_pos + pc2dex_data_size;
-
-  bool is_in_prologue_or_epilogue = false;
-  pc2dex_offset = 0u;
-  pc2dex_dalvik_offset = 0u;
-  dex2pc_offset = 0u;
-  dex2pc_dalvik_offset = 0u;
-  for (LIR* tgt_lir = first_lir_insn_; tgt_lir != nullptr; tgt_lir = NEXT_LIR(tgt_lir)) {
-    if (generate_src_map && !tgt_lir->flags.is_nop && tgt_lir->opcode >= 0) {
-      if (!is_in_prologue_or_epilogue) {
-        src_mapping_table_.push_back(SrcMapElem({tgt_lir->offset,
-                static_cast<int32_t>(tgt_lir->dalvik_offset)}));
-      }
-    }
-    if (!tgt_lir->flags.is_nop && (tgt_lir->opcode == kPseudoSafepointPC)) {
-      DCHECK(pc2dex_offset <= tgt_lir->offset);
-      write_pos = EncodeUnsignedLeb128(write_pos, tgt_lir->offset - pc2dex_offset);
-      write_pos = EncodeSignedLeb128(write_pos, static_cast<int32_t>(tgt_lir->dalvik_offset) -
-                                     static_cast<int32_t>(pc2dex_dalvik_offset));
-      pc2dex_offset = tgt_lir->offset;
-      pc2dex_dalvik_offset = tgt_lir->dalvik_offset;
-    }
-    if (!tgt_lir->flags.is_nop && (tgt_lir->opcode == kPseudoExportedPC)) {
-      DCHECK(dex2pc_offset <= tgt_lir->offset);
-      write_pos2 = EncodeUnsignedLeb128(write_pos2, tgt_lir->offset - dex2pc_offset);
-      write_pos2 = EncodeSignedLeb128(write_pos2, static_cast<int32_t>(tgt_lir->dalvik_offset) -
-                                      static_cast<int32_t>(dex2pc_dalvik_offset));
-      dex2pc_offset = tgt_lir->offset;
-      dex2pc_dalvik_offset = tgt_lir->dalvik_offset;
-    }
-    if (tgt_lir->opcode == kPseudoPrologueBegin || tgt_lir->opcode == kPseudoEpilogueBegin) {
-      is_in_prologue_or_epilogue = true;
-    }
-    if (tgt_lir->opcode == kPseudoPrologueEnd || tgt_lir->opcode == kPseudoEpilogueEnd) {
-      is_in_prologue_or_epilogue = false;
-    }
-  }
-  DCHECK_EQ(static_cast<size_t>(write_pos - &encoded_mapping_table_[0]),
-            hdr_data_size + pc2dex_data_size);
-  DCHECK_EQ(static_cast<size_t>(write_pos2 - &encoded_mapping_table_[0]), data_size);
-
-  if (kIsDebugBuild) {
-    CHECK(VerifyCatchEntries());
-
-    // Verify the encoded table holds the expected data.
-    MappingTable table(&encoded_mapping_table_[0]);
-    CHECK_EQ(table.TotalSize(), total_entries);
-    CHECK_EQ(table.PcToDexSize(), pc2dex_entries);
-    auto it = table.PcToDexBegin();
-    auto it2 = table.DexToPcBegin();
-    for (LIR* tgt_lir = first_lir_insn_; tgt_lir != nullptr; tgt_lir = NEXT_LIR(tgt_lir)) {
-      if (!tgt_lir->flags.is_nop && (tgt_lir->opcode == kPseudoSafepointPC)) {
-        CHECK_EQ(tgt_lir->offset, it.NativePcOffset());
-        CHECK_EQ(tgt_lir->dalvik_offset, it.DexPc());
-        ++it;
-      }
-      if (!tgt_lir->flags.is_nop && (tgt_lir->opcode == kPseudoExportedPC)) {
-        CHECK_EQ(tgt_lir->offset, it2.NativePcOffset());
-        CHECK_EQ(tgt_lir->dalvik_offset, it2.DexPc());
-        ++it2;
-      }
-    }
-    CHECK(it == table.PcToDexEnd());
-    CHECK(it2 == table.DexToPcEnd());
-  }
-}
-
-void Mir2Lir::CreateNativeGcMap() {
-  if (UNLIKELY((cu_->disable_opt & (1u << kPromoteRegs)) != 0u)) {
-    // If we're not promoting to physical registers, it's safe to use the verifier's notion of
-    // references. (We disable register promotion when type inference finds a type conflict and
-    // in that the case we defer to the verifier to avoid using the compiler's conflicting info.)
-    CreateNativeGcMapWithoutRegisterPromotion();
-    return;
-  }
-
-  ArenaBitVector* references = new (arena_) ArenaBitVector(arena_, mir_graph_->GetNumSSARegs(),
-                                                           false);
-
-  // Calculate max native offset and max reference vreg.
-  MIR* prev_mir = nullptr;
-  int max_ref_vreg = -1;
-  CodeOffset max_native_offset = 0u;
-  for (const auto& entry : safepoints_) {
-    uint32_t native_offset = entry.first->offset;
-    max_native_offset = std::max(max_native_offset, native_offset);
-    MIR* mir = entry.second;
-    UpdateReferenceVRegs(mir, prev_mir, references);
-    max_ref_vreg = std::max(max_ref_vreg, references->GetHighestBitSet());
-    prev_mir = mir;
-  }
-
-#if defined(BYTE_ORDER) && (BYTE_ORDER == LITTLE_ENDIAN)
-  static constexpr bool kLittleEndian = true;
-#else
-  static constexpr bool kLittleEndian = false;
-#endif
-
-  // Build the GC map.
-  uint32_t reg_width = static_cast<uint32_t>((max_ref_vreg + 8) / 8);
-  GcMapBuilder native_gc_map_builder(&native_gc_map_,
-                                     safepoints_.size(),
-                                     max_native_offset, reg_width);
-  if (kLittleEndian) {
-    for (const auto& entry : safepoints_) {
-      uint32_t native_offset = entry.first->offset;
-      MIR* mir = entry.second;
-      UpdateReferenceVRegs(mir, prev_mir, references);
-      // For little-endian, the bytes comprising the bit vector's raw storage are what we need.
-      native_gc_map_builder.AddEntry(native_offset,
-                                     reinterpret_cast<const uint8_t*>(references->GetRawStorage()));
-      prev_mir = mir;
-    }
-  } else {
-    ArenaVector<uint8_t> references_buffer(arena_->Adapter());
-    references_buffer.resize(reg_width);
-    for (const auto& entry : safepoints_) {
-      uint32_t native_offset = entry.first->offset;
-      MIR* mir = entry.second;
-      UpdateReferenceVRegs(mir, prev_mir, references);
-      // Big-endian or unknown endianness, manually translate the bit vector data.
-      const auto* raw_storage = references->GetRawStorage();
-      for (size_t i = 0; i != reg_width; ++i) {
-        references_buffer[i] = static_cast<uint8_t>(
-            raw_storage[i / sizeof(raw_storage[0])] >> (8u * (i % sizeof(raw_storage[0]))));
-      }
-      native_gc_map_builder.AddEntry(native_offset, references_buffer.data());
-      prev_mir = mir;
-    }
-  }
-}
-
-void Mir2Lir::CreateNativeGcMapWithoutRegisterPromotion() {
-  DCHECK(!encoded_mapping_table_.empty());
-  MappingTable mapping_table(&encoded_mapping_table_[0]);
-  uint32_t max_native_offset = 0;
-  for (auto it = mapping_table.PcToDexBegin(), end = mapping_table.PcToDexEnd(); it != end; ++it) {
-    uint32_t native_offset = it.NativePcOffset();
-    if (native_offset > max_native_offset) {
-      max_native_offset = native_offset;
-    }
-  }
-  MethodReference method_ref(cu_->dex_file, cu_->method_idx);
-  const std::vector<uint8_t>& gc_map_raw =
-      mir_graph_->GetCurrentDexCompilationUnit()->GetVerifiedMethod()->GetDexGcMap();
-  verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
-  DCHECK_EQ(gc_map_raw.size(), dex_gc_map.RawSize());
-  // Compute native offset to references size.
-  GcMapBuilder native_gc_map_builder(&native_gc_map_,
-                                     mapping_table.PcToDexSize(),
-                                     max_native_offset, dex_gc_map.RegWidth());
-
-  for (auto it = mapping_table.PcToDexBegin(), end = mapping_table.PcToDexEnd(); it != end; ++it) {
-    uint32_t native_offset = it.NativePcOffset();
-    uint32_t dex_pc = it.DexPc();
-    const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false);
-    CHECK(references != nullptr) << "Missing ref for dex pc 0x" << std::hex << dex_pc <<
-        ": " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-    native_gc_map_builder.AddEntry(native_offset, references);
-  }
-
-  // Maybe not necessary, but this could help prevent errors where we access the verified method
-  // after it has been deleted.
-  mir_graph_->GetCurrentDexCompilationUnit()->ClearVerifiedMethod();
-}
-
-/* Determine the offset of each literal field */
-int Mir2Lir::AssignLiteralOffset(CodeOffset offset) {
-  offset = AssignLiteralOffsetCommon(literal_list_, offset);
-  constexpr unsigned int ptr_size = sizeof(uint32_t);
-  static_assert(ptr_size >= sizeof(mirror::HeapReference<mirror::Object>),
-                "Pointer size cannot hold a heap reference");
-  offset = AssignLiteralPointerOffsetCommon(code_literal_list_, offset, ptr_size);
-  offset = AssignLiteralPointerOffsetCommon(method_literal_list_, offset, ptr_size);
-  offset = AssignLiteralPointerOffsetCommon(class_literal_list_, offset, ptr_size);
-  return offset;
-}
-
-int Mir2Lir::AssignSwitchTablesOffset(CodeOffset offset) {
-  for (Mir2Lir::SwitchTable* tab_rec : switch_tables_) {
-    tab_rec->offset = offset;
-    if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) {
-      offset += tab_rec->table[1] * (sizeof(int) * 2);
-    } else {
-      DCHECK_EQ(static_cast<int>(tab_rec->table[0]),
-                static_cast<int>(Instruction::kPackedSwitchSignature));
-      offset += tab_rec->table[1] * sizeof(int);
-    }
-  }
-  return offset;
-}
-
-int Mir2Lir::AssignFillArrayDataOffset(CodeOffset offset) {
-  for (Mir2Lir::FillArrayData* tab_rec : fill_array_data_) {
-    tab_rec->offset = offset;
-    offset += tab_rec->size;
-    // word align
-    offset = RoundUp(offset, 4);
-  }
-  return offset;
-}
-
-/*
- * Insert a kPseudoCaseLabel at the beginning of the Dalvik
- * offset vaddr if pretty-printing, otherise use the standard block
- * label.  The selected label will be used to fix up the case
- * branch table during the assembly phase.  All resource flags
- * are set to prevent code motion.  KeyVal is just there for debugging.
- */
-LIR* Mir2Lir::InsertCaseLabel(uint32_t bbid, int keyVal) {
-  LIR* boundary_lir = &block_label_list_[bbid];
-  LIR* res = boundary_lir;
-  if (cu_->verbose) {
-    // Only pay the expense if we're pretty-printing.
-    LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR));
-    BasicBlock* bb = mir_graph_->GetBasicBlock(bbid);
-    DCHECK(bb != nullptr);
-    new_label->dalvik_offset = bb->start_offset;
-    new_label->opcode = kPseudoCaseLabel;
-    new_label->operands[0] = keyVal;
-    new_label->flags.fixup = kFixupLabel;
-    DCHECK(!new_label->flags.use_def_invalid);
-    new_label->u.m.def_mask = &kEncodeAll;
-    InsertLIRAfter(boundary_lir, new_label);
-  }
-  return res;
-}
-
-void Mir2Lir::DumpSparseSwitchTable(const uint16_t* table) {
-  /*
-   * Sparse switch data format:
-   *  ushort ident = 0x0200   magic value
-   *  ushort size       number of entries in the table; > 0
-   *  int keys[size]      keys, sorted low-to-high; 32-bit aligned
-   *  int targets[size]     branch targets, relative to switch opcode
-   *
-   * Total size is (2+size*4) 16-bit code units.
-   */
-  uint16_t ident = table[0];
-  int entries = table[1];
-  const int32_t* keys = reinterpret_cast<const int32_t*>(&table[2]);
-  const int32_t* targets = &keys[entries];
-  LOG(INFO) <<  "Sparse switch table - ident:0x" << std::hex << ident
-            << ", entries: " << std::dec << entries;
-  for (int i = 0; i < entries; i++) {
-    LOG(INFO) << "  Key[" << keys[i] << "] -> 0x" << std::hex << targets[i];
-  }
-}
-
-void Mir2Lir::DumpPackedSwitchTable(const uint16_t* table) {
-  /*
-   * Packed switch data format:
-   *  ushort ident = 0x0100   magic value
-   *  ushort size       number of entries in the table
-   *  int first_key       first (and lowest) switch case value
-   *  int targets[size]     branch targets, relative to switch opcode
-   *
-   * Total size is (4+size*2) 16-bit code units.
-   */
-  uint16_t ident = table[0];
-  const int32_t* targets = reinterpret_cast<const int32_t*>(&table[4]);
-  int entries = table[1];
-  int low_key = s4FromSwitchData(&table[2]);
-  LOG(INFO) << "Packed switch table - ident:0x" << std::hex << ident
-            << ", entries: " << std::dec << entries << ", low_key: " << low_key;
-  for (int i = 0; i < entries; i++) {
-    LOG(INFO) << "  Key[" << (i + low_key) << "] -> 0x" << std::hex
-              << targets[i];
-  }
-}
-
-/* Set up special LIR to mark a Dalvik byte-code instruction start for pretty printing */
-void Mir2Lir::MarkBoundary(DexOffset offset ATTRIBUTE_UNUSED, const char* inst_str) {
-  // NOTE: only used for debug listings.
-  NewLIR1(kPseudoDalvikByteCodeBoundary, WrapPointer(ArenaStrdup(inst_str)));
-}
-
-// Convert relation of src1/src2 to src2/src1
-ConditionCode Mir2Lir::FlipComparisonOrder(ConditionCode before) {
-  ConditionCode res;
-  switch (before) {
-    case kCondEq: res = kCondEq; break;
-    case kCondNe: res = kCondNe; break;
-    case kCondLt: res = kCondGt; break;
-    case kCondGt: res = kCondLt; break;
-    case kCondLe: res = kCondGe; break;
-    case kCondGe: res = kCondLe; break;
-    default:
-      LOG(FATAL) << "Unexpected ccode " << before;
-      UNREACHABLE();
-  }
-  return res;
-}
-
-ConditionCode Mir2Lir::NegateComparison(ConditionCode before) {
-  ConditionCode res;
-  switch (before) {
-    case kCondEq: res = kCondNe; break;
-    case kCondNe: res = kCondEq; break;
-    case kCondLt: res = kCondGe; break;
-    case kCondGt: res = kCondLe; break;
-    case kCondLe: res = kCondGt; break;
-    case kCondGe: res = kCondLt; break;
-    default:
-      LOG(FATAL) << "Unexpected ccode " << before;
-      UNREACHABLE();
-  }
-  return res;
-}
-
-// TODO: move to mir_to_lir.cc
-Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
-    : literal_list_(nullptr),
-      method_literal_list_(nullptr),
-      class_literal_list_(nullptr),
-      code_literal_list_(nullptr),
-      first_fixup_(nullptr),
-      arena_(arena),
-      cu_(cu),
-      mir_graph_(mir_graph),
-      switch_tables_(arena->Adapter(kArenaAllocSwitchTable)),
-      fill_array_data_(arena->Adapter(kArenaAllocFillArrayData)),
-      tempreg_info_(arena->Adapter()),
-      reginfo_map_(arena->Adapter()),
-      pointer_storage_(arena->Adapter()),
-      data_offset_(0),
-      total_size_(0),
-      block_label_list_(nullptr),
-      promotion_map_(nullptr),
-      current_dalvik_offset_(0),
-      current_mir_(nullptr),
-      estimated_native_code_size_(0),
-      reg_pool_(nullptr),
-      live_sreg_(0),
-      code_buffer_(mir_graph->GetArena()->Adapter()),
-      encoded_mapping_table_(mir_graph->GetArena()->Adapter()),
-      core_vmap_table_(mir_graph->GetArena()->Adapter()),
-      fp_vmap_table_(mir_graph->GetArena()->Adapter()),
-      native_gc_map_(mir_graph->GetArena()->Adapter()),
-      patches_(mir_graph->GetArena()->Adapter()),
-      num_core_spills_(0),
-      num_fp_spills_(0),
-      frame_size_(0),
-      core_spill_mask_(0),
-      fp_spill_mask_(0),
-      first_lir_insn_(nullptr),
-      last_lir_insn_(nullptr),
-      slow_paths_(arena->Adapter(kArenaAllocSlowPaths)),
-      mem_ref_type_(ResourceMask::kHeapRef),
-      mask_cache_(arena),
-      safepoints_(arena->Adapter()),
-      dex_cache_arrays_layout_(cu->compiler_driver->GetDexCacheArraysLayout(cu->dex_file)),
-      pc_rel_temp_(nullptr),
-      dex_cache_arrays_min_offset_(std::numeric_limits<uint32_t>::max()),
-      cfi_(&last_lir_insn_,
-           cu->compiler_driver->GetCompilerOptions().GetGenerateDebugInfo(),
-           arena),
-      in_to_reg_storage_mapping_(arena) {
-  switch_tables_.reserve(4);
-  fill_array_data_.reserve(4);
-  tempreg_info_.reserve(20);
-  reginfo_map_.reserve(RegStorage::kMaxRegs);
-  pointer_storage_.reserve(128);
-  slow_paths_.reserve(32);
-  // Reserve pointer id 0 for null.
-  size_t null_idx = WrapPointer<void>(nullptr);
-  DCHECK_EQ(null_idx, 0U);
-}
-
-void Mir2Lir::Materialize() {
-  cu_->NewTimingSplit("RegisterAllocation");
-  CompilerInitializeRegAlloc();  // Needs to happen after SSA naming
-
-  /* Allocate Registers using simple local allocation scheme */
-  SimpleRegAlloc();
-
-  /* First try the custom light codegen for special cases. */
-  DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
-  bool special_worked = cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file)
-      ->GenSpecial(this, cu_->method_idx);
-
-  /* Take normal path for converting MIR to LIR only if the special codegen did not succeed. */
-  if (special_worked == false) {
-    MethodMIR2LIR();
-  }
-
-  /* Method is not empty */
-  if (first_lir_insn_) {
-    /* Convert LIR into machine code. */
-    AssembleLIR();
-
-    if ((cu_->enable_debug & (1 << kDebugCodegenDump)) != 0) {
-      CodegenDump();
-    }
-  }
-}
-
-CompiledMethod* Mir2Lir::GetCompiledMethod() {
-  // Combine vmap tables - core regs, then fp regs - into vmap_table.
-  Leb128EncodingVector<> vmap_encoder;
-  if (frame_size_ > 0) {
-    // Prefix the encoded data with its size.
-    size_t size = core_vmap_table_.size() + 1 /* marker */ + fp_vmap_table_.size();
-    vmap_encoder.Reserve(size + 1u);  // All values are likely to be one byte in ULEB128 (<128).
-    vmap_encoder.PushBackUnsigned(size);
-    // Core regs may have been inserted out of order - sort first.
-    std::sort(core_vmap_table_.begin(), core_vmap_table_.end());
-    for (size_t i = 0 ; i < core_vmap_table_.size(); ++i) {
-      // Copy, stripping out the phys register sort key.
-      vmap_encoder.PushBackUnsigned(
-          ~(~0u << VREG_NUM_WIDTH) & (core_vmap_table_[i] + VmapTable::kEntryAdjustment));
-    }
-    // Push a marker to take place of lr.
-    vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
-    if (cu_->instruction_set == kThumb2) {
-      // fp regs already sorted.
-      for (uint32_t i = 0; i < fp_vmap_table_.size(); i++) {
-        vmap_encoder.PushBackUnsigned(fp_vmap_table_[i] + VmapTable::kEntryAdjustment);
-      }
-    } else {
-      // For other platforms regs may have been inserted out of order - sort first.
-      std::sort(fp_vmap_table_.begin(), fp_vmap_table_.end());
-      for (size_t i = 0 ; i < fp_vmap_table_.size(); ++i) {
-        // Copy, stripping out the phys register sort key.
-        vmap_encoder.PushBackUnsigned(
-            ~(~0u << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment));
-      }
-    }
-  } else {
-    DCHECK_EQ(POPCOUNT(core_spill_mask_), 0);
-    DCHECK_EQ(POPCOUNT(fp_spill_mask_), 0);
-    DCHECK_EQ(core_vmap_table_.size(), 0u);
-    DCHECK_EQ(fp_vmap_table_.size(), 0u);
-    vmap_encoder.PushBackUnsigned(0u);  // Size is 0.
-  }
-
-  // Sort patches by literal offset. Required for .oat_patches encoding.
-  std::sort(patches_.begin(), patches_.end(), [](const LinkerPatch& lhs, const LinkerPatch& rhs) {
-    return lhs.LiteralOffset() < rhs.LiteralOffset();
-  });
-
-  return CompiledMethod::SwapAllocCompiledMethod(
-      cu_->compiler_driver, cu_->instruction_set,
-      ArrayRef<const uint8_t>(code_buffer_),
-      frame_size_, core_spill_mask_, fp_spill_mask_,
-      &src_mapping_table_,
-      ArrayRef<const uint8_t>(encoded_mapping_table_),
-      ArrayRef<const uint8_t>(vmap_encoder.GetData()),
-      ArrayRef<const uint8_t>(native_gc_map_),
-      ArrayRef<const uint8_t>(*cfi_.Patch(code_buffer_.size())),
-      ArrayRef<const LinkerPatch>(patches_));
-}
-
-size_t Mir2Lir::GetMaxPossibleCompilerTemps() const {
-  // Chose a reasonably small value in order to contain stack growth.
-  // Backends that are smarter about spill region can return larger values.
-  const size_t max_compiler_temps = 10;
-  return max_compiler_temps;
-}
-
-size_t Mir2Lir::GetNumBytesForCompilerTempSpillRegion() {
-  // By default assume that the Mir2Lir will need one slot for each temporary.
-  // If the backend can better determine temps that have non-overlapping ranges and
-  // temps that do not need spilled, it can actually provide a small region.
-  mir_graph_->CommitCompilerTemps();
-  return mir_graph_->GetNumBytesForSpecialTemps() + mir_graph_->GetMaximumBytesForNonSpecialTemps();
-}
-
-int Mir2Lir::ComputeFrameSize() {
-  /* Figure out the frame size */
-  uint32_t size = num_core_spills_ * GetBytesPerGprSpillLocation(cu_->instruction_set)
-                  + num_fp_spills_ * GetBytesPerFprSpillLocation(cu_->instruction_set)
-                  + sizeof(uint32_t)  // Filler.
-                  + mir_graph_->GetNumOfLocalCodeVRs()  * sizeof(uint32_t)
-                  + mir_graph_->GetNumOfOutVRs() * sizeof(uint32_t)
-                  + GetNumBytesForCompilerTempSpillRegion();
-  /* Align and set */
-  return RoundUp(size, kStackAlignment);
-}
-
-/*
- * Append an LIR instruction to the LIR list maintained by a compilation
- * unit
- */
-void Mir2Lir::AppendLIR(LIR* lir) {
-  if (first_lir_insn_ == nullptr) {
-    DCHECK(last_lir_insn_ == nullptr);
-    last_lir_insn_ = first_lir_insn_ = lir;
-    lir->prev = lir->next = nullptr;
-  } else {
-    last_lir_insn_->next = lir;
-    lir->prev = last_lir_insn_;
-    lir->next = nullptr;
-    last_lir_insn_ = lir;
-  }
-}
-
-/*
- * Insert an LIR instruction before the current instruction, which cannot be the
- * first instruction.
- *
- * prev_lir <-> new_lir <-> current_lir
- */
-void Mir2Lir::InsertLIRBefore(LIR* current_lir, LIR* new_lir) {
-  DCHECK(current_lir->prev != nullptr);
-  LIR *prev_lir = current_lir->prev;
-
-  prev_lir->next = new_lir;
-  new_lir->prev = prev_lir;
-  new_lir->next = current_lir;
-  current_lir->prev = new_lir;
-}
-
-/*
- * Insert an LIR instruction after the current instruction, which cannot be the
- * last instruction.
- *
- * current_lir -> new_lir -> old_next
- */
-void Mir2Lir::InsertLIRAfter(LIR* current_lir, LIR* new_lir) {
-  new_lir->prev = current_lir;
-  new_lir->next = current_lir->next;
-  current_lir->next = new_lir;
-  new_lir->next->prev = new_lir;
-}
-
-bool Mir2Lir::PartiallyIntersects(RegLocation rl_src, RegLocation rl_dest) {
-  DCHECK(rl_src.wide);
-  DCHECK(rl_dest.wide);
-  return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1);
-}
-
-bool Mir2Lir::Intersects(RegLocation rl_src, RegLocation rl_dest) {
-  DCHECK(rl_src.wide);
-  DCHECK(rl_dest.wide);
-  return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) <= 1);
-}
-
-LIR *Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                                int offset, int check_value, LIR* target, LIR** compare) {
-  // Handle this for architectures that can't compare to memory.
-  LIR* inst = Load32Disp(base_reg, offset, temp_reg);
-  if (compare != nullptr) {
-    *compare = inst;
-  }
-  LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
-  return branch;
-}
-
-void Mir2Lir::AddSlowPath(LIRSlowPath* slowpath) {
-  slow_paths_.push_back(slowpath);
-  ResetDefTracking();
-}
-
-void Mir2Lir::LoadCodeAddress(const MethodReference& target_method, InvokeType type,
-                              SpecialTargetRegister symbolic_reg) {
-  LIR* data_target = ScanLiteralPoolMethod(code_literal_list_, target_method);
-  if (data_target == nullptr) {
-    data_target = AddWordData(&code_literal_list_, target_method.dex_method_index);
-    data_target->operands[1] = WrapPointer(const_cast<DexFile*>(target_method.dex_file));
-    // NOTE: The invoke type doesn't contribute to the literal identity. In fact, we can have
-    // the same method invoked with kVirtual, kSuper and kInterface but the class linker will
-    // resolve these invokes to the same method, so we don't care which one we record here.
-    data_target->operands[2] = type;
-  }
-  // Loads a code pointer. Code from oat file can be mapped anywhere.
-  OpPcRelLoad(TargetPtrReg(symbolic_reg), data_target);
-  DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
-  DCHECK_NE(cu_->instruction_set, kMips64) << reinterpret_cast<void*>(data_target);
-}
-
-void Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
-                                SpecialTargetRegister symbolic_reg) {
-  LIR* data_target = ScanLiteralPoolMethod(method_literal_list_, target_method);
-  if (data_target == nullptr) {
-    data_target = AddWordData(&method_literal_list_, target_method.dex_method_index);
-    data_target->operands[1] = WrapPointer(const_cast<DexFile*>(target_method.dex_file));
-    // NOTE: The invoke type doesn't contribute to the literal identity. In fact, we can have
-    // the same method invoked with kVirtual, kSuper and kInterface but the class linker will
-    // resolve these invokes to the same method, so we don't care which one we record here.
-    data_target->operands[2] = type;
-  }
-  // Loads an ArtMethod pointer, which is not a reference.
-  OpPcRelLoad(TargetPtrReg(symbolic_reg), data_target);
-  DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
-  DCHECK_NE(cu_->instruction_set, kMips64) << reinterpret_cast<void*>(data_target);
-}
-
-void Mir2Lir::LoadClassType(const DexFile& dex_file, uint32_t type_idx,
-                            SpecialTargetRegister symbolic_reg) {
-  // Use the literal pool and a PC-relative load from a data word.
-  LIR* data_target = ScanLiteralPoolClass(class_literal_list_, dex_file, type_idx);
-  if (data_target == nullptr) {
-    data_target = AddWordData(&class_literal_list_, type_idx);
-    data_target->operands[1] = WrapPointer(const_cast<DexFile*>(&dex_file));
-  }
-  // Loads a Class pointer, which is a reference as it lives in the heap.
-  OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
-}
-
-bool Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
-  return false;
-}
-
-void Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file ATTRIBUTE_UNUSED,
-                                       int offset ATTRIBUTE_UNUSED,
-                                       RegStorage r_dest ATTRIBUTE_UNUSED,
-                                       bool wide ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "No generic implementation.";
-  UNREACHABLE();
-}
-
-RegLocation Mir2Lir::NarrowRegLoc(RegLocation loc) {
-  if (loc.location == kLocPhysReg) {
-    DCHECK(!loc.reg.Is32Bit());
-    if (loc.reg.IsPair()) {
-      RegisterInfo* info_lo = GetRegInfo(loc.reg.GetLow());
-      RegisterInfo* info_hi = GetRegInfo(loc.reg.GetHigh());
-      info_lo->SetIsWide(false);
-      info_hi->SetIsWide(false);
-      loc.reg = info_lo->GetReg();
-    } else {
-      RegisterInfo* info = GetRegInfo(loc.reg);
-      RegisterInfo* info_new = info->FindMatchingView(RegisterInfo::k32SoloStorageMask);
-      DCHECK(info_new != nullptr);
-      if (info->IsLive() && (info->SReg() == loc.s_reg_low)) {
-        info->MarkDead();
-        info_new->MarkLive(loc.s_reg_low);
-      }
-      loc.reg = info_new->GetReg();
-    }
-    DCHECK(loc.reg.Valid());
-  }
-  loc.wide = false;
-  return loc;
-}
-
-void Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb ATTRIBUTE_UNUSED,
-                                                  MIR* mir ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unknown MIR opcode not supported on this architecture";
-  UNREACHABLE();
-}
-
-void Mir2Lir::InitReferenceVRegs(BasicBlock* bb, BitVector* references) {
-  // Mark the references coming from the first predecessor.
-  DCHECK(bb != nullptr);
-  DCHECK(bb->block_type == kEntryBlock || !bb->predecessors.empty());
-  BasicBlock* first_bb =
-      (bb->block_type == kEntryBlock) ? bb : mir_graph_->GetBasicBlock(bb->predecessors[0]);
-  DCHECK(first_bb != nullptr);
-  DCHECK(first_bb->data_flow_info != nullptr);
-  DCHECK(first_bb->data_flow_info->vreg_to_ssa_map_exit != nullptr);
-  const int32_t* first_vreg_to_ssa_map = first_bb->data_flow_info->vreg_to_ssa_map_exit;
-  references->ClearAllBits();
-  for (uint32_t vreg = 0,
-       num_vregs = mir_graph_->GetNumOfCodeVRs() + mir_graph_->GetNumUsedCompilerTemps();
-       vreg != num_vregs; ++vreg) {
-    int32_t sreg = first_vreg_to_ssa_map[vreg];
-    if (sreg != INVALID_SREG && mir_graph_->reg_location_[sreg].ref &&
-        !mir_graph_->IsConstantNullRef(mir_graph_->reg_location_[sreg])) {
-      references->SetBit(vreg);
-    }
-  }
-  // Unmark the references that are merging with a different value.
-  for (size_t i = 1u, num_pred = bb->predecessors.size(); i < num_pred; ++i) {
-    BasicBlock* pred_bb = mir_graph_->GetBasicBlock(bb->predecessors[i]);
-    DCHECK(pred_bb != nullptr);
-    DCHECK(pred_bb->data_flow_info != nullptr);
-    DCHECK(pred_bb->data_flow_info->vreg_to_ssa_map_exit != nullptr);
-    const int32_t* pred_vreg_to_ssa_map = pred_bb->data_flow_info->vreg_to_ssa_map_exit;
-    for (uint32_t vreg : references->Indexes()) {
-      if (first_vreg_to_ssa_map[vreg] != pred_vreg_to_ssa_map[vreg]) {
-        // NOTE: The BitVectorSet::IndexIterator will not check the pointed-to bit again,
-        // so clearing the bit has no effect on the iterator.
-        references->ClearBit(vreg);
-      }
-    }
-  }
-}
-
-bool Mir2Lir::UpdateReferenceVRegsLocal(MIR* mir, MIR* prev_mir, BitVector* references) {
-  DCHECK(mir == nullptr || mir->bb == prev_mir->bb);
-  DCHECK(prev_mir != nullptr);
-  while (prev_mir != nullptr) {
-    if (prev_mir == mir) {
-      return true;
-    }
-    const size_t num_defs = prev_mir->ssa_rep->num_defs;
-    const int32_t* defs = prev_mir->ssa_rep->defs;
-    if (num_defs == 1u && mir_graph_->reg_location_[defs[0]].ref &&
-        !mir_graph_->IsConstantNullRef(mir_graph_->reg_location_[defs[0]])) {
-      references->SetBit(mir_graph_->SRegToVReg(defs[0]));
-    } else {
-      for (size_t i = 0u; i != num_defs; ++i) {
-        references->ClearBit(mir_graph_->SRegToVReg(defs[i]));
-      }
-    }
-    prev_mir = prev_mir->next;
-  }
-  return false;
-}
-
-void Mir2Lir::UpdateReferenceVRegs(MIR* mir, MIR* prev_mir, BitVector* references) {
-  if (mir == nullptr) {
-    // Safepoint in entry sequence.
-    InitReferenceVRegs(mir_graph_->GetEntryBlock(), references);
-    return;
-  }
-  if (IsInstructionReturn(mir->dalvikInsn.opcode) ||
-      mir->dalvikInsn.opcode == Instruction::RETURN_VOID_NO_BARRIER) {
-    references->ClearAllBits();
-    if (mir->dalvikInsn.opcode == Instruction::RETURN_OBJECT) {
-      references->SetBit(mir_graph_->SRegToVReg(mir->ssa_rep->uses[0]));
-    }
-    return;
-  }
-  if (prev_mir != nullptr && mir->bb == prev_mir->bb &&
-      UpdateReferenceVRegsLocal(mir, prev_mir, references)) {
-    return;
-  }
-  BasicBlock* bb = mir_graph_->GetBasicBlock(mir->bb);
-  DCHECK(bb != nullptr);
-  InitReferenceVRegs(bb, references);
-  bool success = UpdateReferenceVRegsLocal(mir, bb->first_mir_insn, references);
-  DCHECK(success) << "MIR @0x" << std::hex << mir->offset << " not in BB#" << std::dec << mir->bb;
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index e1a2838..8d53dbf 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -21,10 +21,8 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/mutex-inl.h"
-#include "dex/compiler_ir.h"
+#include "driver/compiler_driver.h"
 #include "thread-inl.h"
-#include "dex/mir_graph.h"
-#include "dex/quick/mir_to_lir.h"
 #include "dex_instruction-inl.h"
 #include "driver/dex_compilation_unit.h"
 #include "verifier/method_verifier-inl.h"
@@ -36,12 +34,23 @@
 static constexpr bool kIntrinsicIsStatic[] = {
     true,   // kIntrinsicDoubleCvt
     true,   // kIntrinsicFloatCvt
+    true,   // kIntrinsicFloat2Int
+    true,   // kIntrinsicDouble2Long
+    true,   // kIntrinsicFloatIsInfinite
+    true,   // kIntrinsicDoubleIsInfinite
+    true,   // kIntrinsicFloatIsNaN
+    true,   // kIntrinsicDoubleIsNaN
     true,   // kIntrinsicReverseBits
     true,   // kIntrinsicReverseBytes
+    true,   // kIntrinsicBitCount
+    true,   // kIntrinsicCompare,
+    true,   // kIntrinsicHighestOneBit
+    true,   // kIntrinsicLowestOneBit
     true,   // kIntrinsicNumberOfLeadingZeros
     true,   // kIntrinsicNumberOfTrailingZeros
     true,   // kIntrinsicRotateRight
     true,   // kIntrinsicRotateLeft
+    true,   // kIntrinsicSignum
     true,   // kIntrinsicAbsInt
     true,   // kIntrinsicAbsLong
     true,   // kIntrinsicAbsFloat
@@ -50,6 +59,23 @@
     true,   // kIntrinsicMinMaxLong
     true,   // kIntrinsicMinMaxFloat
     true,   // kIntrinsicMinMaxDouble
+    true,   // kIntrinsicCos
+    true,   // kIntrinsicSin
+    true,   // kIntrinsicAcos
+    true,   // kIntrinsicAsin
+    true,   // kIntrinsicAtan
+    true,   // kIntrinsicAtan2
+    true,   // kIntrinsicCbrt
+    true,   // kIntrinsicCosh
+    true,   // kIntrinsicExp
+    true,   // kIntrinsicExpm1
+    true,   // kIntrinsicHypot
+    true,   // kIntrinsicLog
+    true,   // kIntrinsicLog10
+    true,   // kIntrinsicNextAfter
+    true,   // kIntrinsicSinh
+    true,   // kIntrinsicTan
+    true,   // kIntrinsicTanh
     true,   // kIntrinsicSqrt
     true,   // kIntrinsicCeil
     true,   // kIntrinsicFloor
@@ -72,6 +98,14 @@
     false,  // kIntrinsicCas
     false,  // kIntrinsicUnsafeGet
     false,  // kIntrinsicUnsafePut
+    false,  // kIntrinsicUnsafeGetAndAddInt,
+    false,  // kIntrinsicUnsafeGetAndAddLong,
+    false,  // kIntrinsicUnsafeGetAndSetInt,
+    false,  // kIntrinsicUnsafeGetAndSetLong,
+    false,  // kIntrinsicUnsafeGetAndSetObject,
+    false,  // kIntrinsicUnsafeLoadFence,
+    false,  // kIntrinsicUnsafeStoreFence,
+    false,  // kIntrinsicUnsafeFullFence,
     true,   // kIntrinsicSystemArrayCopyCharArray
     true,   // kIntrinsicSystemArrayCopy
 };
@@ -79,22 +113,50 @@
               "arraysize of kIntrinsicIsStatic unexpected");
 static_assert(kIntrinsicIsStatic[kIntrinsicDoubleCvt], "DoubleCvt must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicFloatCvt], "FloatCvt must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicFloat2Int], "Float2Int must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicDouble2Long], "Double2Long must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicFloatIsInfinite], "FloatIsInfinite must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicDoubleIsInfinite], "DoubleIsInfinite must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicFloatIsNaN], "FloatIsNaN must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicDoubleIsNaN], "DoubleIsNaN must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicReverseBits], "ReverseBits must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicReverseBytes], "ReverseBytes must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicBitCount], "BitCount must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicCompare], "Compare must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicHighestOneBit], "HighestOneBit must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicLowestOneBit], "LowestOneBit  must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfLeadingZeros],
               "NumberOfLeadingZeros must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfTrailingZeros],
               "NumberOfTrailingZeros must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicRotateRight], "RotateRight must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicRotateLeft], "RotateLeft must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicSignum], "Signum must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsInt], "AbsInt must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsLong], "AbsLong must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsFloat], "AbsFloat must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsDouble], "AbsDouble must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxInt], "MinMaxInt must be static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxLong], "MinMaxLong_must_be_static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], "MinMaxFloat_must_be_static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], "MinMaxDouble_must_be_static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxLong], "MinMaxLong must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], "MinMaxFloat must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], "MinMaxDouble must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicCos], "Cos must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicSin], "Sin must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicAcos], "Acos must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicAsin], "Asin must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicAtan], "Atan must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicAtan2], "Atan2 must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicCbrt], "Cbrt must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicCosh], "Cosh must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicExp], "Exp must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicExpm1], "Expm1 must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicHypot], "Hypot must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicLog], "Log must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicLog10], "Log10 must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicNextAfter], "NextAfter must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicSinh], "Sinh must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicTan], "Tan must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicTanh], "Tanh must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSqrt], "Sqrt must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicCeil], "Ceil must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicFloor], "Floor must be static");
@@ -118,38 +180,21 @@
 static_assert(kIntrinsicIsStatic[kIntrinsicPeek], "Peek must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicPoke], "Poke must be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicCas], "Cas must not be static");
-static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGet], "UnsafeGet_must_not_be_static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGet], "UnsafeGet must not be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafePut], "UnsafePut must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGetAndAddInt], "UnsafeGetAndAddInt must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGetAndAddLong], "UnsafeGetAndAddLong must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGetAndSetInt], "UnsafeGetAndSetInt must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGetAndSetLong], "UnsafeGetAndSetLong must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGetAndSetObject], "UnsafeGetAndSetObject must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeLoadFence], "UnsafeLoadFence must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeStoreFence], "UnsafeStoreFence must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeFullFence], "UnsafeFullFence must not be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSystemArrayCopyCharArray],
               "SystemArrayCopyCharArray must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSystemArrayCopy],
               "SystemArrayCopy must be static");
 
-MIR* AllocReplacementMIR(MIRGraph* mir_graph, MIR* invoke) {
-  MIR* insn = mir_graph->NewMIR();
-  insn->offset = invoke->offset;
-  insn->optimization_flags = MIR_CALLEE;
-  return insn;
-}
-
-uint32_t GetInvokeReg(MIR* invoke, uint32_t arg) {
-  DCHECK_LT(arg, invoke->dalvikInsn.vA);
-  DCHECK(!MIR::DecodedInstruction::IsPseudoMirOp(invoke->dalvikInsn.opcode));
-  if (IsInvokeInstructionRange(invoke->dalvikInsn.opcode)) {
-    return invoke->dalvikInsn.vC + arg;  // Range invoke.
-  } else {
-    DCHECK_EQ(Instruction::FormatOf(invoke->dalvikInsn.opcode), Instruction::k35c);
-    return invoke->dalvikInsn.arg[arg];  // Non-range invoke.
-  }
-}
-
-bool WideArgIsInConsecutiveDalvikRegs(MIR* invoke, uint32_t arg) {
-  DCHECK_LT(arg + 1, invoke->dalvikInsn.vA);
-  DCHECK(!MIR::DecodedInstruction::IsPseudoMirOp(invoke->dalvikInsn.opcode));
-  return IsInvokeInstructionRange(invoke->dalvikInsn.opcode) ||
-      invoke->dalvikInsn.arg[arg + 1u] == invoke->dalvikInsn.arg[arg] + 1u;
-}
-
 }  // anonymous namespace
 
 const uint32_t DexFileMethodInliner::kIndexUnresolved;
@@ -196,6 +241,23 @@
     "abs",                   // kNameCacheAbs
     "max",                   // kNameCacheMax
     "min",                   // kNameCacheMin
+    "cos",                   // kNameCacheCos
+    "sin",                   // kNameCacheSin
+    "acos",                  // kNameCacheAcos
+    "asin",                  // kNameCacheAsin
+    "atan",                  // kNameCacheAtan
+    "atan2",                 // kNameCacheAtan2
+    "cbrt",                  // kNameCacheCbrt
+    "cosh",                  // kNameCacheCosh
+    "exp",                   // kNameCacheExp
+    "expm1",                 // kNameCacheExpm1
+    "hypot",                 // kNameCacheHypot
+    "log",                   // kNameCacheLog
+    "log10",                 // kNameCacheLog10
+    "nextAfter",             // kNameCacheNextAfter
+    "sinh",                  // kNameCacheSinh
+    "tan",                   // kNameCacheTan
+    "tanh",                  // kNameCacheTanh
     "sqrt",                  // kNameCacheSqrt
     "ceil",                  // kNameCacheCeil
     "floor",                 // kNameCacheFloor
@@ -207,6 +269,10 @@
     "equals",                // kNameCacheEquals
     "getCharsNoCheck",       // kNameCacheGetCharsNoCheck
     "isEmpty",               // kNameCacheIsEmpty
+    "floatToIntBits",        // kNameCacheFloatToIntBits
+    "doubleToLongBits",      // kNameCacheDoubleToLongBits
+    "isInfinite",            // kNameCacheIsInfinite
+    "isNaN",                 // kNameCacheIsNaN
     "indexOf",               // kNameCacheIndexOf
     "length",                // kNameCacheLength
     "<init>",                // kNameCacheInit
@@ -240,11 +306,24 @@
     "putObject",             // kNameCachePutObject
     "putObjectVolatile",     // kNameCachePutObjectVolatile
     "putOrderedObject",      // kNameCachePutOrderedObject
+    "getAndAddInt",          // kNameCacheGetAndAddInt,
+    "getAndAddLong",         // kNameCacheGetAndAddLong,
+    "getAndSetInt",          // kNameCacheGetAndSetInt,
+    "getAndSetLong",         // kNameCacheGetAndSetLong,
+    "getAndSetObject",       // kNameCacheGetAndSetObject,
+    "loadFence",             // kNameCacheLoadFence,
+    "storeFence",            // kNameCacheStoreFence,
+    "fullFence",             // kNameCacheFullFence,
     "arraycopy",             // kNameCacheArrayCopy
+    "bitCount",              // kNameCacheBitCount
+    "compare",               // kNameCacheCompare
+    "highestOneBit",         // kNameCacheHighestOneBit
+    "lowestOneBit",          // kNameCacheLowestOneBit
     "numberOfLeadingZeros",  // kNameCacheNumberOfLeadingZeros
     "numberOfTrailingZeros",  // kNameCacheNumberOfTrailingZeros
     "rotateRight",           // kNameCacheRotateRight
     "rotateLeft",            // kNameCacheRotateLeft
+    "signum",                // kNameCacheSignum
 };
 
 const DexFileMethodInliner::ProtoDef DexFileMethodInliner::kProtoCacheDefs[] = {
@@ -264,10 +343,14 @@
     { kClassCacheFloat, 2, { kClassCacheFloat, kClassCacheFloat } },
     // kProtoCacheD_J
     { kClassCacheLong, 1, { kClassCacheDouble } },
+    // kProtoCacheD_Z
+    { kClassCacheBoolean, 1, { kClassCacheDouble } },
     // kProtoCacheJ_D
     { kClassCacheDouble, 1, { kClassCacheLong } },
     // kProtoCacheF_I
     { kClassCacheInt, 1, { kClassCacheFloat } },
+    // kProtoCacheF_Z
+    { kClassCacheBoolean, 1, { kClassCacheFloat } },
     // kProtoCacheI_F
     { kClassCacheFloat, 1, { kClassCacheInt } },
     // kProtoCacheII_I
@@ -296,6 +379,8 @@
     { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheInt } },
     // kProtoCacheJJ_J
     { kClassCacheLong, 2, { kClassCacheLong, kClassCacheLong } },
+    // kProtoCacheJJ_I
+    { kClassCacheInt, 2, { kClassCacheLong, kClassCacheLong } },
     // kProtoCacheJJ_V
     { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheLong } },
     // kProtoCacheJS_V
@@ -315,10 +400,14 @@
         kClassCacheJavaLangObject, kClassCacheJavaLangObject } },
     // kProtoCacheObjectJ_I
     { kClassCacheInt, 2, { kClassCacheJavaLangObject, kClassCacheLong } },
+    // kProtoCacheObjectJI_I
+    { kClassCacheInt, 3, { kClassCacheJavaLangObject, kClassCacheLong, kClassCacheInt } },
     // kProtoCacheObjectJI_V
     { kClassCacheVoid, 3, { kClassCacheJavaLangObject, kClassCacheLong, kClassCacheInt } },
     // kProtoCacheObjectJ_J
     { kClassCacheLong, 2, { kClassCacheJavaLangObject, kClassCacheLong } },
+    // kProtoCacheObjectJJ_J
+    { kClassCacheLong, 3, { kClassCacheJavaLangObject, kClassCacheLong, kClassCacheLong } },
     // kProtoCacheObjectJJ_V
     { kClassCacheVoid, 3, { kClassCacheJavaLangObject, kClassCacheLong, kClassCacheLong } },
     // kProtoCacheObjectJ_Object
@@ -326,6 +415,9 @@
     // kProtoCacheObjectJObject_V
     { kClassCacheVoid, 3, { kClassCacheJavaLangObject, kClassCacheLong,
         kClassCacheJavaLangObject } },
+    // kProtoCacheObjectJObject_Object
+    { kClassCacheJavaLangObject, 3, { kClassCacheJavaLangObject, kClassCacheLong,
+        kClassCacheJavaLangObject } },
     // kProtoCacheCharArrayICharArrayII_V
     { kClassCacheVoid, 5, {kClassCacheJavaLangCharArray, kClassCacheInt,
         kClassCacheJavaLangCharArray, kClassCacheInt, kClassCacheInt} },
@@ -389,16 +481,34 @@
     INTRINSIC(JavaLangFloat, FloatToRawIntBits, F_I, kIntrinsicFloatCvt, 0),
     INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, kIntrinsicFlagToFloatingPoint),
 
+    INTRINSIC(JavaLangFloat, FloatToIntBits, F_I, kIntrinsicFloat2Int, 0),
+    INTRINSIC(JavaLangDouble, DoubleToLongBits, D_J, kIntrinsicDouble2Long, 0),
+
+    INTRINSIC(JavaLangFloat, IsInfinite, F_Z, kIntrinsicFloatIsInfinite, 0),
+    INTRINSIC(JavaLangDouble, IsInfinite, D_Z, kIntrinsicDoubleIsInfinite, 0),
+    INTRINSIC(JavaLangFloat, IsNaN, F_Z, kIntrinsicFloatIsNaN, 0),
+    INTRINSIC(JavaLangDouble, IsNaN, D_Z, kIntrinsicDoubleIsNaN, 0),
+
     INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, k32),
     INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, k64),
     INTRINSIC(JavaLangShort, ReverseBytes, S_S, kIntrinsicReverseBytes, kSignedHalf),
     INTRINSIC(JavaLangInteger, Reverse, I_I, kIntrinsicReverseBits, k32),
     INTRINSIC(JavaLangLong, Reverse, J_J, kIntrinsicReverseBits, k64),
 
+    INTRINSIC(JavaLangInteger, BitCount, I_I, kIntrinsicBitCount, k32),
+    INTRINSIC(JavaLangLong, BitCount, J_I, kIntrinsicBitCount, k64),
+    INTRINSIC(JavaLangInteger, Compare, II_I, kIntrinsicCompare, k32),
+    INTRINSIC(JavaLangLong, Compare, JJ_I, kIntrinsicCompare, k64),
+    INTRINSIC(JavaLangInteger, HighestOneBit, I_I, kIntrinsicHighestOneBit, k32),
+    INTRINSIC(JavaLangLong, HighestOneBit, J_J, kIntrinsicHighestOneBit, k64),
+    INTRINSIC(JavaLangInteger, LowestOneBit, I_I, kIntrinsicLowestOneBit, k32),
+    INTRINSIC(JavaLangLong, LowestOneBit, J_J, kIntrinsicLowestOneBit, k64),
     INTRINSIC(JavaLangInteger, NumberOfLeadingZeros, I_I, kIntrinsicNumberOfLeadingZeros, k32),
     INTRINSIC(JavaLangLong, NumberOfLeadingZeros, J_I, kIntrinsicNumberOfLeadingZeros, k64),
     INTRINSIC(JavaLangInteger, NumberOfTrailingZeros, I_I, kIntrinsicNumberOfTrailingZeros, k32),
     INTRINSIC(JavaLangLong, NumberOfTrailingZeros, J_I, kIntrinsicNumberOfTrailingZeros, k64),
+    INTRINSIC(JavaLangInteger, Signum, I_I, kIntrinsicSignum, k32),
+    INTRINSIC(JavaLangLong, Signum, J_I, kIntrinsicSignum, k64),
 
     INTRINSIC(JavaLangMath,       Abs, I_I, kIntrinsicAbsInt, 0),
     INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0),
@@ -425,6 +535,23 @@
     INTRINSIC(JavaLangMath,       Max, DD_D, kIntrinsicMinMaxDouble, kIntrinsicFlagMax),
     INTRINSIC(JavaLangStrictMath, Max, DD_D, kIntrinsicMinMaxDouble, kIntrinsicFlagMax),
 
+    INTRINSIC(JavaLangMath,       Cos, D_D, kIntrinsicCos, 0),
+    INTRINSIC(JavaLangMath,       Sin, D_D, kIntrinsicSin, 0),
+    INTRINSIC(JavaLangMath,       Acos, D_D, kIntrinsicAcos, 0),
+    INTRINSIC(JavaLangMath,       Asin, D_D, kIntrinsicAsin, 0),
+    INTRINSIC(JavaLangMath,       Atan, D_D, kIntrinsicAtan, 0),
+    INTRINSIC(JavaLangMath,       Atan2, DD_D, kIntrinsicAtan2, 0),
+    INTRINSIC(JavaLangMath,       Cbrt, D_D, kIntrinsicCbrt, 0),
+    INTRINSIC(JavaLangMath,       Cosh, D_D, kIntrinsicCosh, 0),
+    INTRINSIC(JavaLangMath,       Exp, D_D, kIntrinsicExp, 0),
+    INTRINSIC(JavaLangMath,       Expm1, D_D, kIntrinsicExpm1, 0),
+    INTRINSIC(JavaLangMath,       Hypot, DD_D, kIntrinsicHypot, 0),
+    INTRINSIC(JavaLangMath,       Log, D_D, kIntrinsicLog, 0),
+    INTRINSIC(JavaLangMath,       Log10, D_D, kIntrinsicLog10, 0),
+    INTRINSIC(JavaLangMath,       NextAfter, DD_D, kIntrinsicNextAfter, 0),
+    INTRINSIC(JavaLangMath,       Sinh, D_D, kIntrinsicSinh, 0),
+    INTRINSIC(JavaLangMath,       Tan, D_D, kIntrinsicTan, 0),
+    INTRINSIC(JavaLangMath,       Tanh, D_D, kIntrinsicTanh, 0),
     INTRINSIC(JavaLangMath,       Sqrt, D_D, kIntrinsicSqrt, 0),
     INTRINSIC(JavaLangStrictMath, Sqrt, D_D, kIntrinsicSqrt, 0),
 
@@ -450,6 +577,13 @@
     INTRINSIC(JavaLangString, IndexOf, I_I, kIntrinsicIndexOf, kIntrinsicFlagBase0),
     INTRINSIC(JavaLangString, Length, _I, kIntrinsicIsEmptyOrLength, kIntrinsicFlagLength),
 
+    INTRINSIC(JavaLangStringFactory, NewStringFromBytes, ByteArrayIII_String,
+              kIntrinsicNewStringFromBytes, kIntrinsicFlagNone),
+    INTRINSIC(JavaLangStringFactory, NewStringFromChars, IICharArray_String,
+              kIntrinsicNewStringFromChars, kIntrinsicFlagNone),
+    INTRINSIC(JavaLangStringFactory, NewStringFromString, String_String,
+              kIntrinsicNewStringFromString, kIntrinsicFlagNone),
+
     INTRINSIC(JavaLangThread, CurrentThread, _Thread, kIntrinsicCurrentThread, 0),
 
     INTRINSIC(LibcoreIoMemory, PeekByte, J_B, kIntrinsicPeek, kSignedByte),
@@ -472,19 +606,29 @@
     INTRINSIC(SunMiscUnsafe, Get ## type, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
               type_flags), \
     INTRINSIC(SunMiscUnsafe, Get ## type ## Volatile, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
-              type_flags | kIntrinsicFlagIsVolatile), \
+              (type_flags) | kIntrinsicFlagIsVolatile), \
     INTRINSIC(SunMiscUnsafe, Put ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
               type_flags), \
     INTRINSIC(SunMiscUnsafe, Put ## type ## Volatile, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
-              type_flags | kIntrinsicFlagIsVolatile), \
+              (type_flags) | kIntrinsicFlagIsVolatile), \
     INTRINSIC(SunMiscUnsafe, PutOrdered ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
-              type_flags | kIntrinsicFlagIsOrdered)
+              (type_flags) | kIntrinsicFlagIsOrdered)
 
     UNSAFE_GET_PUT(Int, I, kIntrinsicFlagNone),
     UNSAFE_GET_PUT(Long, J, kIntrinsicFlagIsLong),
     UNSAFE_GET_PUT(Object, Object, kIntrinsicFlagIsObject),
 #undef UNSAFE_GET_PUT
 
+    // 1.8
+    INTRINSIC(SunMiscUnsafe, GetAndAddInt, ObjectJI_I, kIntrinsicUnsafeGetAndAddInt, 0),
+    INTRINSIC(SunMiscUnsafe, GetAndAddLong, ObjectJJ_J, kIntrinsicUnsafeGetAndAddLong, 0),
+    INTRINSIC(SunMiscUnsafe, GetAndSetInt, ObjectJI_I, kIntrinsicUnsafeGetAndSetInt, 0),
+    INTRINSIC(SunMiscUnsafe, GetAndSetLong, ObjectJJ_J, kIntrinsicUnsafeGetAndSetLong, 0),
+    INTRINSIC(SunMiscUnsafe, GetAndSetObject, ObjectJObject_Object, kIntrinsicUnsafeGetAndSetObject, 0),
+    INTRINSIC(SunMiscUnsafe, LoadFence, _V, kIntrinsicUnsafeLoadFence, 0),
+    INTRINSIC(SunMiscUnsafe, StoreFence, _V, kIntrinsicUnsafeStoreFence, 0),
+    INTRINSIC(SunMiscUnsafe, FullFence, _V, kIntrinsicUnsafeFullFence, 0),
+
     INTRINSIC(JavaLangSystem, ArrayCopy, CharArrayICharArrayII_V , kIntrinsicSystemArrayCopyCharArray,
               0),
     INTRINSIC(JavaLangSystem, ArrayCopy, ObjectIObjectII_V , kIntrinsicSystemArrayCopy,
@@ -564,191 +708,12 @@
   return res;
 }
 
-bool DexFileMethodInliner::GenIntrinsic(Mir2Lir* backend, CallInfo* info) {
-  InlineMethod intrinsic;
-  {
-    ReaderMutexLock mu(Thread::Current(), lock_);
-    auto it = inline_methods_.find(info->method_ref.dex_method_index);
-    if (it == inline_methods_.end() || (it->second.flags & kInlineIntrinsic) == 0) {
-      return false;
-    }
-    intrinsic = it->second;
-  }
-  if (kIntrinsicIsStatic[intrinsic.opcode] != (info->type == kStatic)) {
-    // Invoke type mismatch.
-    return false;
-  }
-  switch (intrinsic.opcode) {
-    case kIntrinsicDoubleCvt:
-      return backend->GenInlinedDoubleCvt(info);
-    case kIntrinsicFloatCvt:
-      return backend->GenInlinedFloatCvt(info);
-    case kIntrinsicReverseBytes:
-      return backend->GenInlinedReverseBytes(info, static_cast<OpSize>(intrinsic.d.data));
-    case kIntrinsicReverseBits:
-      return backend->GenInlinedReverseBits(info, static_cast<OpSize>(intrinsic.d.data));
-    case kIntrinsicAbsInt:
-      return backend->GenInlinedAbsInt(info);
-    case kIntrinsicAbsLong:
-      return backend->GenInlinedAbsLong(info);
-    case kIntrinsicAbsFloat:
-      return backend->GenInlinedAbsFloat(info);
-    case kIntrinsicAbsDouble:
-      return backend->GenInlinedAbsDouble(info);
-    case kIntrinsicMinMaxInt:
-      return backend->GenInlinedMinMax(info, intrinsic.d.data & kIntrinsicFlagMin, false /* is_long */);
-    case kIntrinsicMinMaxLong:
-      return backend->GenInlinedMinMax(info, intrinsic.d.data & kIntrinsicFlagMin, true /* is_long */);
-    case kIntrinsicMinMaxFloat:
-      return backend->GenInlinedMinMaxFP(info, intrinsic.d.data & kIntrinsicFlagMin, false /* is_double */);
-    case kIntrinsicMinMaxDouble:
-      return backend->GenInlinedMinMaxFP(info, intrinsic.d.data & kIntrinsicFlagMin, true /* is_double */);
-    case kIntrinsicSqrt:
-      return backend->GenInlinedSqrt(info);
-    case kIntrinsicCeil:
-      return backend->GenInlinedCeil(info);
-    case kIntrinsicFloor:
-      return backend->GenInlinedFloor(info);
-    case kIntrinsicRint:
-      return backend->GenInlinedRint(info);
-    case kIntrinsicRoundFloat:
-      return backend->GenInlinedRound(info, false /* is_double */);
-    case kIntrinsicRoundDouble:
-      return backend->GenInlinedRound(info, true /* is_double */);
-    case kIntrinsicReferenceGetReferent:
-      return backend->GenInlinedReferenceGetReferent(info);
-    case kIntrinsicCharAt:
-      return backend->GenInlinedCharAt(info);
-    case kIntrinsicCompareTo:
-      return backend->GenInlinedStringCompareTo(info);
-    case kIntrinsicEquals:
-      // Quick does not implement this intrinsic.
-      return false;
-    case kIntrinsicGetCharsNoCheck:
-      return backend->GenInlinedStringGetCharsNoCheck(info);
-    case kIntrinsicIsEmptyOrLength:
-      return backend->GenInlinedStringIsEmptyOrLength(
-          info, intrinsic.d.data & kIntrinsicFlagIsEmpty);
-    case kIntrinsicIndexOf:
-      return backend->GenInlinedIndexOf(info, intrinsic.d.data & kIntrinsicFlagBase0);
-    case kIntrinsicNewStringFromBytes:
-      return backend->GenInlinedStringFactoryNewStringFromBytes(info);
-    case kIntrinsicNewStringFromChars:
-      return backend->GenInlinedStringFactoryNewStringFromChars(info);
-    case kIntrinsicNewStringFromString:
-      return backend->GenInlinedStringFactoryNewStringFromString(info);
-    case kIntrinsicCurrentThread:
-      return backend->GenInlinedCurrentThread(info);
-    case kIntrinsicPeek:
-      return backend->GenInlinedPeek(info, static_cast<OpSize>(intrinsic.d.data));
-    case kIntrinsicPoke:
-      return backend->GenInlinedPoke(info, static_cast<OpSize>(intrinsic.d.data));
-    case kIntrinsicCas:
-      return backend->GenInlinedCas(info, intrinsic.d.data & kIntrinsicFlagIsLong,
-                                    intrinsic.d.data & kIntrinsicFlagIsObject);
-    case kIntrinsicUnsafeGet:
-      return backend->GenInlinedUnsafeGet(info, intrinsic.d.data & kIntrinsicFlagIsLong,
-                                          intrinsic.d.data & kIntrinsicFlagIsObject,
-                                          intrinsic.d.data & kIntrinsicFlagIsVolatile);
-    case kIntrinsicUnsafePut:
-      return backend->GenInlinedUnsafePut(info, intrinsic.d.data & kIntrinsicFlagIsLong,
-                                          intrinsic.d.data & kIntrinsicFlagIsObject,
-                                          intrinsic.d.data & kIntrinsicFlagIsVolatile,
-                                          intrinsic.d.data & kIntrinsicFlagIsOrdered);
-    case kIntrinsicSystemArrayCopyCharArray:
-      return backend->GenInlinedArrayCopyCharArray(info);
-    case kIntrinsicNumberOfLeadingZeros:
-    case kIntrinsicNumberOfTrailingZeros:
-    case kIntrinsicRotateRight:
-    case kIntrinsicRotateLeft:
-    case kIntrinsicSystemArrayCopy:
-      return false;   // not implemented in quick.
-    default:
-      LOG(FATAL) << "Unexpected intrinsic opcode: " << intrinsic.opcode;
-      return false;  // avoid warning "control reaches end of non-void function"
-  }
-}
-
 bool DexFileMethodInliner::IsSpecial(uint32_t method_index) {
   ReaderMutexLock mu(Thread::Current(), lock_);
   auto it = inline_methods_.find(method_index);
   return it != inline_methods_.end() && (it->second.flags & kInlineSpecial) != 0;
 }
 
-bool DexFileMethodInliner::GenSpecial(Mir2Lir* backend, uint32_t method_idx) {
-  InlineMethod special;
-  {
-    ReaderMutexLock mu(Thread::Current(), lock_);
-    auto it = inline_methods_.find(method_idx);
-    if (it == inline_methods_.end() || (it->second.flags & kInlineSpecial) == 0) {
-      return false;
-    }
-    special = it->second;
-  }
-  return backend->SpecialMIR2LIR(special);
-}
-
-bool DexFileMethodInliner::GenInline(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke,
-                                     uint32_t method_idx) {
-  InlineMethod method;
-  {
-    ReaderMutexLock mu(Thread::Current(), lock_);
-    auto it = inline_methods_.find(method_idx);
-    if (it == inline_methods_.end() || (it->second.flags & kInlineSpecial) == 0) {
-      return false;
-    }
-    method = it->second;
-  }
-
-  MIR* move_result = nullptr;
-  bool result = true;
-  switch (method.opcode) {
-    case kInlineOpNop:
-      break;
-    case kInlineOpNonWideConst:
-      move_result = mir_graph->FindMoveResult(bb, invoke);
-      result = GenInlineConst(mir_graph, bb, invoke, move_result, method);
-      break;
-    case kInlineOpReturnArg:
-      move_result = mir_graph->FindMoveResult(bb, invoke);
-      result = GenInlineReturnArg(mir_graph, bb, invoke, move_result, method);
-      break;
-    case kInlineOpIGet:
-      move_result = mir_graph->FindMoveResult(bb, invoke);
-      result = GenInlineIGet(mir_graph, bb, invoke, move_result, method);
-      break;
-    case kInlineOpIPut:
-      move_result = mir_graph->FindMoveResult(bb, invoke);
-      result = GenInlineIPut(mir_graph, bb, invoke, move_result, method);
-      break;
-    case kInlineStringInit:
-      return false;
-    default:
-      LOG(FATAL) << "Unexpected inline op: " << method.opcode;
-      break;
-  }
-  if (result) {
-    // If the invoke has not been eliminated yet, check now whether we should do it.
-    // This is done so that dataflow analysis does not get tripped up seeing nop invoke.
-    if (static_cast<int>(invoke->dalvikInsn.opcode) != kMirOpNop) {
-      bool is_static = IsInstructionInvokeStatic(invoke->dalvikInsn.opcode);
-      if (is_static || (invoke->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) {
-        // No null object register involved here so we can eliminate the invoke.
-        invoke->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
-      } else {
-        // Invoke was kept around because null check needed to be done.
-        invoke->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNullCheck);
-        // For invokes, the object register is in vC. For null check mir, it is in vA.
-        invoke->dalvikInsn.vA = invoke->dalvikInsn.vC;
-      }
-    }
-    if (move_result != nullptr) {
-      move_result->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
-    }
-  }
-  return result;
-}
-
 uint32_t DexFileMethodInliner::FindClassIndex(const DexFile* dex_file, IndexCache* cache,
                                               ClassCacheIndex index) {
   uint32_t* class_index = &cache->class_indexes[index];
@@ -756,14 +721,7 @@
     return *class_index;
   }
 
-  const DexFile::StringId* string_id = dex_file->FindStringId(kClassCacheNames[index]);
-  if (string_id == nullptr) {
-    *class_index = kIndexNotFound;
-    return *class_index;
-  }
-  uint32_t string_index = dex_file->GetIndexForStringId(*string_id);
-
-  const DexFile::TypeId* type_id = dex_file->FindTypeId(string_index);
+  const DexFile::TypeId* type_id = dex_file->FindTypeId(kClassCacheNames[index]);
   if (type_id == nullptr) {
     *class_index = kIndexNotFound;
     return *class_index;
@@ -885,198 +843,14 @@
   }
 }
 
-bool DexFileMethodInliner::GenInlineConst(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke,
-                                          MIR* move_result, const InlineMethod& method) {
-  if (move_result == nullptr) {
-    // Result is unused.
-    return true;
-  }
-
-  // Check the opcode and for MOVE_RESULT_OBJECT check also that the constant is null.
-  DCHECK(move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT ||
-         (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT &&
-             method.d.data == 0u));
-
-  // Insert the CONST instruction.
-  MIR* insn = AllocReplacementMIR(mir_graph, invoke);
-  insn->dalvikInsn.opcode = Instruction::CONST;
-  insn->dalvikInsn.vA = move_result->dalvikInsn.vA;
-  insn->dalvikInsn.vB = method.d.data;
-  insn->meta.method_lowering_info = invoke->meta.method_lowering_info;  // Preserve type info.
-  bb->InsertMIRAfter(move_result, insn);
-  return true;
-}
-
-bool DexFileMethodInliner::GenInlineReturnArg(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke,
-                                              MIR* move_result, const InlineMethod& method) {
-  if (move_result == nullptr) {
-    // Result is unused.
-    return true;
-  }
-
-  // Select opcode and argument.
-  const InlineReturnArgData& data = method.d.return_data;
-  Instruction::Code opcode = Instruction::MOVE_FROM16;
-  uint32_t arg = GetInvokeReg(invoke, data.arg);
-  if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) {
-    DCHECK_EQ(data.is_object, 1u);
-    DCHECK_EQ(data.is_wide, 0u);
-    opcode = Instruction::MOVE_OBJECT_FROM16;
-  } else if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT_WIDE) {
-    DCHECK_EQ(data.is_wide, 1u);
-    DCHECK_EQ(data.is_object, 0u);
-    opcode = Instruction::MOVE_WIDE_FROM16;
-    if (!WideArgIsInConsecutiveDalvikRegs(invoke, data.arg)) {
-      // The two halfs of the source value are not in consecutive dalvik registers in INVOKE.
-      return false;
-    }
-  } else {
-    DCHECK(move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT);
-    DCHECK_EQ(data.is_wide, 0u);
-    DCHECK_EQ(data.is_object, 0u);
-  }
-
-  // Insert the move instruction
-  MIR* insn = AllocReplacementMIR(mir_graph, invoke);
-  insn->dalvikInsn.opcode = opcode;
-  insn->dalvikInsn.vA = move_result->dalvikInsn.vA;
-  insn->dalvikInsn.vB = arg;
-  insn->meta.method_lowering_info = invoke->meta.method_lowering_info;  // Preserve type info.
-  bb->InsertMIRAfter(move_result, insn);
-  return true;
-}
-
-bool DexFileMethodInliner::GenInlineIGet(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke,
-                                         MIR* move_result, const InlineMethod& method) {
-  CompilationUnit* cu = mir_graph->GetCurrentDexCompilationUnit()->GetCompilationUnit();
-  if (cu->enable_debug & (1 << kDebugSlowFieldPath)) {
-    return false;
-  }
-
-  const InlineIGetIPutData& data = method.d.ifield_data;
-  Instruction::Code opcode = static_cast<Instruction::Code>(Instruction::IGET + data.op_variant);
-  DCHECK_EQ(InlineMethodAnalyser::IGetVariant(opcode), data.op_variant);
-  uint32_t object_reg = GetInvokeReg(invoke, data.object_arg);
-
-  if (move_result == nullptr) {
-    // Result is unused. If volatile, we still need to emit the IGET but we have no destination.
-    return !data.is_volatile;
-  }
-
-  DCHECK_EQ(data.method_is_static != 0u, IsInstructionInvokeStatic(invoke->dalvikInsn.opcode));
-  bool object_is_this = (data.method_is_static == 0u && data.object_arg == 0u);
-  if (!object_is_this) {
-    // TODO: Implement inlining of IGET on non-"this" registers (needs correct stack trace for NPE).
-    // Allow synthetic accessors. We don't care about losing their stack frame in NPE.
-    if (!InlineMethodAnalyser::IsSyntheticAccessor(
-        mir_graph->GetMethodLoweringInfo(invoke).GetTargetMethod())) {
-      return false;
-    }
-  }
-
-  if (object_is_this) {
-    // Mark invoke as NOP, null-check is done on IGET. No aborts after this.
-    invoke->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
-  }
-
-  MIR* insn = AllocReplacementMIR(mir_graph, invoke);
-  insn->offset = invoke->offset;
-  insn->dalvikInsn.opcode = opcode;
-  insn->dalvikInsn.vA = move_result->dalvikInsn.vA;
-  insn->dalvikInsn.vB = object_reg;
-  mir_graph->ComputeInlineIFieldLoweringInfo(data.field_idx, invoke, insn);
-
-  DCHECK(mir_graph->GetIFieldLoweringInfo(insn).IsResolved());
-  DCHECK(mir_graph->GetIFieldLoweringInfo(insn).FastGet());
-  DCHECK_EQ(data.field_offset, mir_graph->GetIFieldLoweringInfo(insn).FieldOffset().Uint32Value());
-  DCHECK_EQ(data.is_volatile, mir_graph->GetIFieldLoweringInfo(insn).IsVolatile() ? 1u : 0u);
-
-  bb->InsertMIRAfter(move_result, insn);
-  return true;
-}
-
-bool DexFileMethodInliner::GenInlineIPut(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke,
-                                         MIR* move_result, const InlineMethod& method) {
-  CompilationUnit* cu = mir_graph->GetCurrentDexCompilationUnit()->GetCompilationUnit();
-  if (cu->enable_debug & (1 << kDebugSlowFieldPath)) {
-    return false;
-  }
-
-  const InlineIGetIPutData& data = method.d.ifield_data;
-  Instruction::Code opcode = static_cast<Instruction::Code>(Instruction::IPUT + data.op_variant);
-  DCHECK_EQ(InlineMethodAnalyser::IPutVariant(opcode), data.op_variant);
-  uint32_t object_reg = GetInvokeReg(invoke, data.object_arg);
-  uint32_t src_reg = GetInvokeReg(invoke, data.src_arg);
-  uint32_t return_reg =
-      data.return_arg_plus1 != 0u ? GetInvokeReg(invoke, data.return_arg_plus1 - 1u) : 0u;
-
-  if (opcode == Instruction::IPUT_WIDE && !WideArgIsInConsecutiveDalvikRegs(invoke, data.src_arg)) {
-    // The two halfs of the source value are not in consecutive dalvik registers in INVOKE.
-    return false;
-  }
-
-  DCHECK(move_result == nullptr || data.return_arg_plus1 != 0u);
-  if (move_result != nullptr && move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT_WIDE &&
-      !WideArgIsInConsecutiveDalvikRegs(invoke, data.return_arg_plus1 - 1u)) {
-    // The two halfs of the return value are not in consecutive dalvik registers in INVOKE.
-    return false;
-  }
-
-  DCHECK_EQ(data.method_is_static != 0u, IsInstructionInvokeStatic(invoke->dalvikInsn.opcode));
-  bool object_is_this = (data.method_is_static == 0u && data.object_arg == 0u);
-  if (!object_is_this) {
-    // TODO: Implement inlining of IPUT on non-"this" registers (needs correct stack trace for NPE).
-    // Allow synthetic accessors. We don't care about losing their stack frame in NPE.
-    if (!InlineMethodAnalyser::IsSyntheticAccessor(
-        mir_graph->GetMethodLoweringInfo(invoke).GetTargetMethod())) {
-      return false;
-    }
-  }
-
-  if (object_is_this) {
-    // Mark invoke as NOP, null-check is done on IPUT. No aborts after this.
-    invoke->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
-  }
-
-  MIR* insn = AllocReplacementMIR(mir_graph, invoke);
-  insn->dalvikInsn.opcode = opcode;
-  insn->dalvikInsn.vA = src_reg;
-  insn->dalvikInsn.vB = object_reg;
-  mir_graph->ComputeInlineIFieldLoweringInfo(data.field_idx, invoke, insn);
-
-  DCHECK(mir_graph->GetIFieldLoweringInfo(insn).IsResolved());
-  DCHECK(mir_graph->GetIFieldLoweringInfo(insn).FastPut());
-  DCHECK_EQ(data.field_offset, mir_graph->GetIFieldLoweringInfo(insn).FieldOffset().Uint32Value());
-  DCHECK_EQ(data.is_volatile, mir_graph->GetIFieldLoweringInfo(insn).IsVolatile() ? 1u : 0u);
-
-  bb->InsertMIRAfter(invoke, insn);
-
-  if (move_result != nullptr) {
-    MIR* move = AllocReplacementMIR(mir_graph, invoke);
-    move->offset = move_result->offset;
-    if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT) {
-      move->dalvikInsn.opcode = Instruction::MOVE_FROM16;
-    } else if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) {
-      move->dalvikInsn.opcode = Instruction::MOVE_OBJECT_FROM16;
-    } else {
-      DCHECK_EQ(move_result->dalvikInsn.opcode, Instruction::MOVE_RESULT_WIDE);
-      move->dalvikInsn.opcode = Instruction::MOVE_WIDE_FROM16;
-    }
-    move->dalvikInsn.vA = move_result->dalvikInsn.vA;
-    move->dalvikInsn.vB = return_reg;
-    move->meta.method_lowering_info = invoke->meta.method_lowering_info;  // Preserve type info.
-    bb->InsertMIRAfter(insn, move);
-  }
-  return true;
-}
-
-uint32_t DexFileMethodInliner::GetOffsetForStringInit(uint32_t method_index, size_t pointer_size) {
+uint32_t DexFileMethodInliner::GetOffsetForStringInit(uint32_t method_index,
+                                                      PointerSize pointer_size) {
   ReaderMutexLock mu(Thread::Current(), lock_);
   auto it = inline_methods_.find(method_index);
   if (it != inline_methods_.end() && (it->second.opcode == kInlineStringInit)) {
     uint32_t string_init_base_offset = Thread::QuickEntryPointOffsetWithSize(
               OFFSETOF_MEMBER(QuickEntryPoints, pNewEmptyString), pointer_size);
-    return string_init_base_offset + it->second.d.data * pointer_size;
+    return string_init_base_offset + it->second.d.data * static_cast<size_t>(pointer_size);
   }
   return 0;
 }
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index 5ce110c..dbdfa24 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -18,10 +18,11 @@
 #define ART_COMPILER_DEX_QUICK_DEX_FILE_METHOD_INLINER_H_
 
 #include <stdint.h>
+
+#include "base/enums.h"
 #include "base/mutex.h"
 #include "base/macros.h"
 #include "safe_map.h"
-#include "dex/compiler_enums.h"
 #include "dex_file.h"
 #include "quick/inline_method_analyser.h"
 
@@ -31,11 +32,12 @@
 class MethodVerifier;
 }  // namespace verifier
 
-class BasicBlock;
-struct CallInfo;
-class MIR;
-class MIRGraph;
-class Mir2Lir;
+enum OpSize {
+  k32,
+  k64,
+  kSignedHalf,
+  kSignedByte,
+};
 
 /**
  * Handles inlining of methods from a particular DexFile.
@@ -75,30 +77,14 @@
     bool IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) REQUIRES(!lock_);
 
     /**
-     * Generate code for an intrinsic function invocation.
-     */
-    bool GenIntrinsic(Mir2Lir* backend, CallInfo* info) REQUIRES(!lock_);
-
-    /**
      * Check whether a particular method index corresponds to a special function.
      */
     bool IsSpecial(uint32_t method_index) REQUIRES(!lock_);
 
     /**
-     * Generate code for a special function.
-     */
-    bool GenSpecial(Mir2Lir* backend, uint32_t method_idx) REQUIRES(!lock_);
-
-    /**
-     * Try to inline an invoke.
-     */
-    bool GenInline(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke, uint32_t method_idx)
-        REQUIRES(!lock_);
-
-    /**
      * Gets the thread pointer entrypoint offset for a string init method index and pointer size.
      */
-    uint32_t GetOffsetForStringInit(uint32_t method_index, size_t pointer_size)
+    uint32_t GetOffsetForStringInit(uint32_t method_index, PointerSize pointer_size)
         REQUIRES(!lock_);
 
     /**
@@ -162,6 +148,23 @@
       kNameCacheAbs,
       kNameCacheMax,
       kNameCacheMin,
+      kNameCacheCos,
+      kNameCacheSin,
+      kNameCacheAcos,
+      kNameCacheAsin,
+      kNameCacheAtan,
+      kNameCacheAtan2,
+      kNameCacheCbrt,
+      kNameCacheCosh,
+      kNameCacheExp,
+      kNameCacheExpm1,
+      kNameCacheHypot,
+      kNameCacheLog,
+      kNameCacheLog10,
+      kNameCacheNextAfter,
+      kNameCacheSinh,
+      kNameCacheTan,
+      kNameCacheTanh,
       kNameCacheSqrt,
       kNameCacheCeil,
       kNameCacheFloor,
@@ -173,6 +176,10 @@
       kNameCacheEquals,
       kNameCacheGetCharsNoCheck,
       kNameCacheIsEmpty,
+      kNameCacheFloatToIntBits,
+      kNameCacheDoubleToLongBits,
+      kNameCacheIsInfinite,
+      kNameCacheIsNaN,
       kNameCacheIndexOf,
       kNameCacheLength,
       kNameCacheInit,
@@ -206,11 +213,24 @@
       kNameCachePutObject,
       kNameCachePutObjectVolatile,
       kNameCachePutOrderedObject,
+      kNameCacheGetAndAddInt,
+      kNameCacheGetAndAddLong,
+      kNameCacheGetAndSetInt,
+      kNameCacheGetAndSetLong,
+      kNameCacheGetAndSetObject,
+      kNameCacheLoadFence,
+      kNameCacheStoreFence,
+      kNameCacheFullFence,
       kNameCacheArrayCopy,
+      kNameCacheBitCount,
+      kNameCacheCompare,
+      kNameCacheHighestOneBit,
+      kNameCacheLowestOneBit,
       kNameCacheNumberOfLeadingZeros,
       kNameCacheNumberOfTrailingZeros,
       kNameCacheRotateRight,
       kNameCacheRotateLeft,
+      kNameCacheSignum,
       kNameCacheLast
     };
 
@@ -229,8 +249,10 @@
       kProtoCacheF_F,
       kProtoCacheFF_F,
       kProtoCacheD_J,
+      kProtoCacheD_Z,
       kProtoCacheJ_D,
       kProtoCacheF_I,
+      kProtoCacheF_Z,
       kProtoCacheI_F,
       kProtoCacheII_I,
       kProtoCacheI_C,
@@ -245,6 +267,7 @@
       kProtoCacheJB_V,
       kProtoCacheJI_V,
       kProtoCacheJJ_J,
+      kProtoCacheJJ_I,
       kProtoCacheJJ_V,
       kProtoCacheJS_V,
       kProtoCacheObject_Z,
@@ -253,11 +276,14 @@
       kProtoCacheObjectJJJ_Z,
       kProtoCacheObjectJObjectObject_Z,
       kProtoCacheObjectJ_I,
+      kProtoCacheObjectJI_I,
       kProtoCacheObjectJI_V,
       kProtoCacheObjectJ_J,
+      kProtoCacheObjectJJ_J,
       kProtoCacheObjectJJ_V,
       kProtoCacheObjectJ_Object,
       kProtoCacheObjectJObject_V,
+      kProtoCacheObjectJObject_Object,
       kProtoCacheCharArrayICharArrayII_V,
       kProtoCacheObjectIObjectII_V,
       kProtoCacheIICharArrayI_V,
@@ -365,15 +391,6 @@
 
     bool AddInlineMethod(int32_t method_idx, const InlineMethod& method) REQUIRES(!lock_);
 
-    static bool GenInlineConst(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke,
-                               MIR* move_result, const InlineMethod& method);
-    static bool GenInlineReturnArg(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke,
-                                   MIR* move_result, const InlineMethod& method);
-    static bool GenInlineIGet(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke,
-                              MIR* move_result, const InlineMethod& method);
-    static bool GenInlineIPut(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke,
-                              MIR* move_result, const InlineMethod& method);
-
     ReaderWriterMutex lock_;
     /*
      * Maps method indexes (for the particular DexFile) to Intrinsic defintions.
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
deleted file mode 100644
index 2b60a51..0000000
--- a/compiler/dex/quick/gen_common.cc
+++ /dev/null
@@ -1,2249 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "mir_to_lir-inl.h"
-
-#include <functional>
-
-#include "arch/arm/instruction_set_features_arm.h"
-#include "base/bit_utils.h"
-#include "base/macros.h"
-#include "dex/compiler_ir.h"
-#include "dex/mir_graph.h"
-#include "dex/quick/arm/arm_lir.h"
-#include "driver/compiler_driver.h"
-#include "driver/compiler_options.h"
-#include "entrypoints/quick/quick_entrypoints.h"
-#include "mirror/array.h"
-#include "mirror/object_array-inl.h"
-#include "mirror/object-inl.h"
-#include "mirror/object_reference.h"
-#include "utils/dex_cache_arrays_layout-inl.h"
-#include "verifier/method_verifier.h"
-
-namespace art {
-
-// Shortcuts to repeatedly used long types.
-typedef mirror::ObjectArray<mirror::Object> ObjArray;
-typedef mirror::ObjectArray<mirror::Class> ClassArray;
-
-/*
- * This source files contains "gen" codegen routines that should
- * be applicable to most targets.  Only mid-level support utilities
- * and "op" calls may be used here.
- */
-
-ALWAYS_INLINE static inline bool ForceSlowFieldPath(CompilationUnit* cu) {
-  return (cu->enable_debug & (1 << kDebugSlowFieldPath)) != 0;
-}
-
-ALWAYS_INLINE static inline bool ForceSlowStringPath(CompilationUnit* cu) {
-  return (cu->enable_debug & (1 << kDebugSlowStringPath)) != 0;
-}
-
-ALWAYS_INLINE static inline bool ForceSlowTypePath(CompilationUnit* cu) {
-  return (cu->enable_debug & (1 << kDebugSlowTypePath)) != 0;
-}
-
-void Mir2Lir::GenIfNullUseHelperImm(RegStorage r_result, QuickEntrypointEnum trampoline, int imm) {
-  class CallHelperImmMethodSlowPath : public LIRSlowPath {
-   public:
-    CallHelperImmMethodSlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont,
-                                QuickEntrypointEnum trampoline_in, int imm_in,
-                                RegStorage r_result_in)
-        : LIRSlowPath(m2l, fromfast, cont), trampoline_(trampoline_in),
-          imm_(imm_in), r_result_(r_result_in) {
-    }
-
-    void Compile() {
-      GenerateTargetLabel();
-      m2l_->CallRuntimeHelperImm(trampoline_, imm_, true);
-      m2l_->OpRegCopy(r_result_,  m2l_->TargetReg(kRet0, kRef));
-      m2l_->OpUnconditionalBranch(cont_);
-    }
-
-   private:
-    QuickEntrypointEnum trampoline_;
-    const int imm_;
-    const RegStorage r_result_;
-  };
-
-  LIR* branch = OpCmpImmBranch(kCondEq, r_result, 0, nullptr);
-  LIR* cont = NewLIR0(kPseudoTargetLabel);
-
-  AddSlowPath(new (arena_) CallHelperImmMethodSlowPath(this, branch, cont, trampoline, imm,
-                                                       r_result));
-}
-
-void Mir2Lir::LoadTypeFromCache(uint32_t type_index, RegStorage class_reg) {
-  if (CanUseOpPcRelDexCacheArrayLoad()) {
-    uint32_t offset = dex_cache_arrays_layout_.TypeOffset(type_index);
-    OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, class_reg, false);
-  } else {
-    RegStorage r_method = LoadCurrMethodWithHint(class_reg);
-    MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(
-        GetInstructionSetPointerSize(cu_->instruction_set));
-    LoadBaseDisp(r_method, resolved_types_offset.Int32Value(), class_reg,
-                 cu_->target64 ? k64 : k32, kNotVolatile);
-    int32_t offset_of_type = GetCacheOffset(type_index);
-    LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
-  }
-}
-
-RegStorage Mir2Lir::GenGetOtherTypeForSgetSput(const MirSFieldLoweringInfo& field_info,
-                                               int opt_flags) {
-  DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex);
-  // May do runtime call so everything to home locations.
-  FlushAllRegs();
-  // Using fixed register to sync with possible call to runtime support.
-  RegStorage r_base = TargetReg(kArg0, kRef);
-  LockTemp(r_base);
-  LoadTypeFromCache(field_info.StorageIndex(), r_base);
-  // r_base now points at static storage (Class*) or null if the type is not yet resolved.
-  LIR* unresolved_branch = nullptr;
-  if (!field_info.IsClassInDexCache() && (opt_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) {
-    // Check if r_base is null.
-    unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, nullptr);
-  }
-  LIR* uninit_branch = nullptr;
-  if (!field_info.IsClassInitialized() && (opt_flags & MIR_CLASS_IS_INITIALIZED) == 0) {
-    // Check if r_base is not yet initialized class.
-    RegStorage r_tmp = TargetReg(kArg2, kNotWide);
-    LockTemp(r_tmp);
-    uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
-                                      mirror::Class::StatusOffset().Int32Value(),
-                                      mirror::Class::kStatusInitialized, nullptr, nullptr);
-    FreeTemp(r_tmp);
-  }
-  if (unresolved_branch != nullptr || uninit_branch != nullptr) {
-    //
-    // Slow path to ensure a class is initialized for sget/sput.
-    //
-    class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath {
-     public:
-      // There are up to two branches to the static field slow path, the "unresolved" when the type
-      // entry in the dex cache is null, and the "uninit" when the class is not yet initialized.
-      // At least one will be non-null here, otherwise we wouldn't generate the slow path.
-      StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index,
-                          RegStorage r_base_in)
-          : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont),
-            second_branch_(unresolved != nullptr ? uninit : nullptr),
-            storage_index_(storage_index), r_base_(r_base_in) {
-      }
-
-      void Compile() {
-        LIR* target = GenerateTargetLabel();
-        if (second_branch_ != nullptr) {
-          second_branch_->target = target;
-        }
-        m2l_->CallRuntimeHelperImm(kQuickInitializeStaticStorage, storage_index_, true);
-        // Copy helper's result into r_base, a no-op on all but MIPS.
-        m2l_->OpRegCopy(r_base_,  m2l_->TargetReg(kRet0, kRef));
-
-        m2l_->OpUnconditionalBranch(cont_);
-      }
-
-     private:
-      // Second branch to the slow path, or null if there's only one branch.
-      LIR* const second_branch_;
-
-      const int storage_index_;
-      const RegStorage r_base_;
-    };
-
-    // The slow path is invoked if the r_base is null or the class pointed
-    // to by it is not initialized.
-    LIR* cont = NewLIR0(kPseudoTargetLabel);
-    AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
-                                                 field_info.StorageIndex(), r_base));
-  }
-  return r_base;
-}
-
-/*
- * Generate a kPseudoBarrier marker to indicate the boundary of special
- * blocks.
- */
-void Mir2Lir::GenBarrier() {
-  LIR* barrier = NewLIR0(kPseudoBarrier);
-  /* Mark all resources as being clobbered */
-  DCHECK(!barrier->flags.use_def_invalid);
-  barrier->u.m.def_mask = &kEncodeAll;
-}
-
-void Mir2Lir::GenDivZeroException() {
-  LIR* branch = OpUnconditionalBranch(nullptr);
-  AddDivZeroCheckSlowPath(branch);
-}
-
-void Mir2Lir::GenDivZeroCheck(ConditionCode c_code) {
-  LIR* branch = OpCondBranch(c_code, nullptr);
-  AddDivZeroCheckSlowPath(branch);
-}
-
-void Mir2Lir::GenDivZeroCheck(RegStorage reg) {
-  LIR* branch = OpCmpImmBranch(kCondEq, reg, 0, nullptr);
-  AddDivZeroCheckSlowPath(branch);
-}
-
-void Mir2Lir::AddDivZeroCheckSlowPath(LIR* branch) {
-  class DivZeroCheckSlowPath : public Mir2Lir::LIRSlowPath {
-   public:
-    DivZeroCheckSlowPath(Mir2Lir* m2l, LIR* branch_in)
-        : LIRSlowPath(m2l, branch_in) {
-    }
-
-    void Compile() OVERRIDE {
-      m2l_->ResetRegPool();
-      m2l_->ResetDefTracking();
-      GenerateTargetLabel(kPseudoThrowTarget);
-      m2l_->CallRuntimeHelper(kQuickThrowDivZero, true);
-    }
-  };
-
-  AddSlowPath(new (arena_) DivZeroCheckSlowPath(this, branch));
-}
-
-void Mir2Lir::GenArrayBoundsCheck(RegStorage index, RegStorage length) {
-  class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
-   public:
-    ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in, RegStorage index_in,
-                             RegStorage length_in)
-        : LIRSlowPath(m2l, branch_in),
-          index_(index_in), length_(length_in) {
-    }
-
-    void Compile() OVERRIDE {
-      m2l_->ResetRegPool();
-      m2l_->ResetDefTracking();
-      GenerateTargetLabel(kPseudoThrowTarget);
-      m2l_->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, index_, length_, true);
-    }
-
-   private:
-    const RegStorage index_;
-    const RegStorage length_;
-  };
-
-  LIR* branch = OpCmpBranch(kCondUge, index, length, nullptr);
-  AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch, index, length));
-}
-
-void Mir2Lir::GenArrayBoundsCheck(int index, RegStorage length) {
-  class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
-   public:
-    ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in, int index_in, RegStorage length_in)
-        : LIRSlowPath(m2l, branch_in),
-          index_(index_in), length_(length_in) {
-    }
-
-    void Compile() OVERRIDE {
-      m2l_->ResetRegPool();
-      m2l_->ResetDefTracking();
-      GenerateTargetLabel(kPseudoThrowTarget);
-
-      RegStorage arg1_32 = m2l_->TargetReg(kArg1, kNotWide);
-      RegStorage arg0_32 = m2l_->TargetReg(kArg0, kNotWide);
-
-      m2l_->OpRegCopy(arg1_32, length_);
-      m2l_->LoadConstant(arg0_32, index_);
-      m2l_->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, arg0_32, arg1_32, true);
-    }
-
-   private:
-    const int32_t index_;
-    const RegStorage length_;
-  };
-
-  LIR* branch = OpCmpImmBranch(kCondLs, length, index, nullptr);
-  AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch, index, length));
-}
-
-LIR* Mir2Lir::GenNullCheck(RegStorage reg) {
-  class NullCheckSlowPath : public Mir2Lir::LIRSlowPath {
-   public:
-    NullCheckSlowPath(Mir2Lir* m2l, LIR* branch)
-        : LIRSlowPath(m2l, branch) {
-    }
-
-    void Compile() OVERRIDE {
-      m2l_->ResetRegPool();
-      m2l_->ResetDefTracking();
-      GenerateTargetLabel(kPseudoThrowTarget);
-      m2l_->CallRuntimeHelper(kQuickThrowNullPointer, true);
-    }
-  };
-
-  LIR* branch = OpCmpImmBranch(kCondEq, reg, 0, nullptr);
-  AddSlowPath(new (arena_) NullCheckSlowPath(this, branch));
-  return branch;
-}
-
-/* Perform null-check on a register.  */
-LIR* Mir2Lir::GenNullCheck(RegStorage m_reg, int opt_flags) {
-  if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-    return GenExplicitNullCheck(m_reg, opt_flags);
-  }
-  // If null check has not been eliminated, reset redundant store tracking.
-  if ((opt_flags & MIR_IGNORE_NULL_CHECK) == 0) {
-    ResetDefTracking();
-  }
-  return nullptr;
-}
-
-/* Perform an explicit null-check on a register.  */
-LIR* Mir2Lir::GenExplicitNullCheck(RegStorage m_reg, int opt_flags) {
-  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
-    return nullptr;
-  }
-  return GenNullCheck(m_reg);
-}
-
-void Mir2Lir::MarkPossibleNullPointerException(int opt_flags) {
-  if (cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-    if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
-      return;
-    }
-    // Insert after last instruction.
-    MarkSafepointPC(last_lir_insn_);
-  }
-}
-
-void Mir2Lir::MarkPossibleNullPointerExceptionAfter(int opt_flags, LIR* after) {
-  if (cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-    if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
-      return;
-    }
-    MarkSafepointPCAfter(after);
-  }
-}
-
-void Mir2Lir::MarkPossibleStackOverflowException() {
-  if (cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
-    MarkSafepointPC(last_lir_insn_);
-  }
-}
-
-void Mir2Lir::ForceImplicitNullCheck(RegStorage reg, int opt_flags) {
-  if (cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-    if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
-      return;
-    }
-    // Force an implicit null check by performing a memory operation (load) from the given
-    // register with offset 0.  This will cause a signal if the register contains 0 (null).
-    RegStorage tmp = AllocTemp();
-    // TODO: for Mips, would be best to use rZERO as the bogus register target.
-    LIR* load = Load32Disp(reg, 0, tmp);
-    FreeTemp(tmp);
-    MarkSafepointPC(load);
-  }
-}
-
-void Mir2Lir::GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1,
-                                  RegLocation rl_src2, LIR* taken) {
-  ConditionCode cond;
-  RegisterClass reg_class = (rl_src1.ref || rl_src2.ref) ? kRefReg : kCoreReg;
-  switch (opcode) {
-    case Instruction::IF_EQ:
-      cond = kCondEq;
-      break;
-    case Instruction::IF_NE:
-      cond = kCondNe;
-      break;
-    case Instruction::IF_LT:
-      cond = kCondLt;
-      break;
-    case Instruction::IF_GE:
-      cond = kCondGe;
-      break;
-    case Instruction::IF_GT:
-      cond = kCondGt;
-      break;
-    case Instruction::IF_LE:
-      cond = kCondLe;
-      break;
-    default:
-      cond = static_cast<ConditionCode>(0);
-      LOG(FATAL) << "Unexpected opcode " << opcode;
-  }
-
-  // Normalize such that if either operand is constant, src2 will be constant
-  if (rl_src1.is_const) {
-    RegLocation rl_temp = rl_src1;
-    rl_src1 = rl_src2;
-    rl_src2 = rl_temp;
-    cond = FlipComparisonOrder(cond);
-  }
-
-  rl_src1 = LoadValue(rl_src1, reg_class);
-  // Is this really an immediate comparison?
-  if (rl_src2.is_const) {
-    // If it's already live in a register or not easily materialized, just keep going
-    RegLocation rl_temp = UpdateLoc(rl_src2);
-    int32_t constant_value = mir_graph_->ConstantValue(rl_src2);
-    if ((rl_temp.location == kLocDalvikFrame) &&
-        InexpensiveConstantInt(constant_value, opcode)) {
-      // OK - convert this to a compare immediate and branch
-      OpCmpImmBranch(cond, rl_src1.reg, mir_graph_->ConstantValue(rl_src2), taken);
-      return;
-    }
-
-    // It's also commonly more efficient to have a test against zero with Eq/Ne. This is not worse
-    // for x86, and allows a cbz/cbnz for Arm and Mips. At the same time, it works around a register
-    // mismatch for 64b systems, where a reference is compared against null, as dex bytecode uses
-    // the 32b literal 0 for null.
-    if (constant_value == 0 && (cond == kCondEq || cond == kCondNe)) {
-      // Use the OpCmpImmBranch and ignore the value in the register.
-      OpCmpImmBranch(cond, rl_src1.reg, 0, taken);
-      return;
-    }
-  }
-
-  rl_src2 = LoadValue(rl_src2, reg_class);
-  OpCmpBranch(cond, rl_src1.reg, rl_src2.reg, taken);
-}
-
-void Mir2Lir::GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src, LIR* taken) {
-  ConditionCode cond;
-  RegisterClass reg_class = rl_src.ref ? kRefReg : kCoreReg;
-  rl_src = LoadValue(rl_src, reg_class);
-  switch (opcode) {
-    case Instruction::IF_EQZ:
-      cond = kCondEq;
-      break;
-    case Instruction::IF_NEZ:
-      cond = kCondNe;
-      break;
-    case Instruction::IF_LTZ:
-      cond = kCondLt;
-      break;
-    case Instruction::IF_GEZ:
-      cond = kCondGe;
-      break;
-    case Instruction::IF_GTZ:
-      cond = kCondGt;
-      break;
-    case Instruction::IF_LEZ:
-      cond = kCondLe;
-      break;
-    default:
-      cond = static_cast<ConditionCode>(0);
-      LOG(FATAL) << "Unexpected opcode " << opcode;
-  }
-  OpCmpImmBranch(cond, rl_src.reg, 0, taken);
-}
-
-void Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (rl_src.location == kLocPhysReg) {
-    OpRegCopy(rl_result.reg, rl_src.reg);
-  } else {
-    LoadValueDirect(rl_src, rl_result.reg.GetLow());
-  }
-  OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_result.reg.GetLow(), 31);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void Mir2Lir::GenLongToInt(RegLocation rl_dest, RegLocation rl_src) {
-  rl_src = UpdateLocWide(rl_src);
-  rl_src = NarrowRegLoc(rl_src);
-  StoreValue(rl_dest, rl_src);
-}
-
-void Mir2Lir::GenIntNarrowing(Instruction::Code opcode, RegLocation rl_dest,
-                              RegLocation rl_src) {
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  OpKind op = kOpInvalid;
-  switch (opcode) {
-    case Instruction::INT_TO_BYTE:
-      op = kOp2Byte;
-      break;
-    case Instruction::INT_TO_SHORT:
-       op = kOp2Short;
-       break;
-    case Instruction::INT_TO_CHAR:
-       op = kOp2Char;
-       break;
-    default:
-      LOG(ERROR) << "Bad int conversion type";
-  }
-  OpRegReg(op, rl_result.reg, rl_src.reg);
-  StoreValue(rl_dest, rl_result);
-}
-
-/*
- * Let helper function take care of everything.  Will call
- * Array::AllocFromCode(type_idx, method, count);
- * Note: AllocFromCode will handle checks for errNegativeArraySize.
- */
-void Mir2Lir::GenNewArray(uint32_t type_idx, RegLocation rl_dest,
-                          RegLocation rl_src) {
-  FlushAllRegs();  /* Everything to home location */
-  const DexFile* dex_file = cu_->dex_file;
-  CompilerDriver* driver = cu_->compiler_driver;
-  if (cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *dex_file, type_idx)) {
-    bool is_type_initialized;  // Ignored as an array does not have an initializer.
-    bool use_direct_type_ptr;
-    uintptr_t direct_type_ptr;
-    bool is_finalizable;
-    if (kEmbedClassInCode &&
-        driver->CanEmbedTypeInCode(*dex_file, type_idx, &is_type_initialized, &use_direct_type_ptr,
-                                   &direct_type_ptr, &is_finalizable)) {
-      // The fast path.
-      if (!use_direct_type_ptr) {
-        LoadClassType(*dex_file, type_idx, kArg0);
-        CallRuntimeHelperRegRegLocationMethod(kQuickAllocArrayResolved, TargetReg(kArg0, kNotWide),
-                                              rl_src, true);
-      } else {
-        // Use the direct pointer.
-        CallRuntimeHelperImmRegLocationMethod(kQuickAllocArrayResolved, direct_type_ptr, rl_src,
-                                              true);
-      }
-    } else {
-      // The slow path.
-      CallRuntimeHelperImmRegLocationMethod(kQuickAllocArray, type_idx, rl_src, true);
-    }
-  } else {
-    CallRuntimeHelperImmRegLocationMethod(kQuickAllocArrayWithAccessCheck, type_idx, rl_src, true);
-  }
-  StoreValue(rl_dest, GetReturn(kRefReg));
-}
-
-/*
- * Similar to GenNewArray, but with post-allocation initialization.
- * Verifier guarantees we're dealing with an array class.  Current
- * code throws runtime exception "bad Filled array req" for 'D' and 'J'.
- * Current code also throws internal unimp if not 'L', '[' or 'I'.
- */
-void Mir2Lir::GenFilledNewArray(CallInfo* info) {
-  size_t elems = info->num_arg_words;
-  int type_idx = info->index;
-  FlushAllRegs();  /* Everything to home location */
-  QuickEntrypointEnum target;
-  if (cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file,
-                                                       type_idx)) {
-    target = kQuickCheckAndAllocArray;
-  } else {
-    target = kQuickCheckAndAllocArrayWithAccessCheck;
-  }
-  CallRuntimeHelperImmImmMethod(target, type_idx, elems, true);
-  FreeTemp(TargetReg(kArg2, kNotWide));
-  FreeTemp(TargetReg(kArg1, kNotWide));
-  /*
-   * NOTE: the implicit target for Instruction::FILLED_NEW_ARRAY is the
-   * return region.  Because AllocFromCode placed the new array
-   * in kRet0, we'll just lock it into place.  When debugger support is
-   * added, it may be necessary to additionally copy all return
-   * values to a home location in thread-local storage
-   */
-  RegStorage ref_reg = TargetReg(kRet0, kRef);
-  LockTemp(ref_reg);
-
-  // TODO: use the correct component size, currently all supported types
-  // share array alignment with ints (see comment at head of function)
-  size_t component_size = sizeof(int32_t);
-
-  if (elems > 5) {
-    DCHECK(info->is_range);  // Non-range insn can't encode more than 5 elems.
-    /*
-     * Bit of ugliness here.  We're going generate a mem copy loop
-     * on the register range, but it is possible that some regs
-     * in the range have been promoted.  This is unlikely, but
-     * before generating the copy, we'll just force a flush
-     * of any regs in the source range that have been promoted to
-     * home location.
-     */
-    for (size_t i = 0; i < elems; i++) {
-      RegLocation loc = UpdateLoc(info->args[i]);
-      if (loc.location == kLocPhysReg) {
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        if (loc.ref) {
-          StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
-        } else {
-          Store32Disp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
-        }
-      }
-    }
-    /*
-     * TUNING note: generated code here could be much improved, but
-     * this is an uncommon operation and isn't especially performance
-     * critical.
-     */
-    // This is addressing the stack, which may be out of the 4G area.
-    RegStorage r_src = AllocTempRef();
-    RegStorage r_dst = AllocTempRef();
-    RegStorage r_idx = AllocTempRef();  // Not really a reference, but match src/dst.
-    RegStorage r_val;
-    switch (cu_->instruction_set) {
-      case kThumb2:
-      case kArm64:
-        r_val = TargetReg(kLr, kNotWide);
-        break;
-      case kX86:
-      case kX86_64:
-        FreeTemp(ref_reg);
-        r_val = AllocTemp();
-        break;
-      case kMips:
-      case kMips64:
-        r_val = AllocTemp();
-        break;
-      default: LOG(FATAL) << "Unexpected instruction set: " << cu_->instruction_set;
-    }
-    // Set up source pointer
-    RegLocation rl_first = info->args[0];
-    OpRegRegImm(kOpAdd, r_src, TargetPtrReg(kSp), SRegOffset(rl_first.s_reg_low));
-    // Set up the target pointer
-    OpRegRegImm(kOpAdd, r_dst, ref_reg,
-                mirror::Array::DataOffset(component_size).Int32Value());
-    // Set up the loop counter (known to be > 0)
-    LoadConstant(r_idx, static_cast<int>(elems - 1));
-    // Generate the copy loop.  Going backwards for convenience
-    LIR* loop_head_target = NewLIR0(kPseudoTargetLabel);
-    // Copy next element
-    {
-      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      LoadBaseIndexed(r_src, r_idx, r_val, 2, k32);
-      // NOTE: No dalvik register annotation, local optimizations will be stopped
-      // by the loop boundaries.
-    }
-    StoreBaseIndexed(r_dst, r_idx, r_val, 2, k32);
-    FreeTemp(r_val);
-    OpDecAndBranch(kCondGe, r_idx, loop_head_target);
-    if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-      // Restore the target pointer
-      OpRegRegImm(kOpAdd, ref_reg, r_dst,
-                  -mirror::Array::DataOffset(component_size).Int32Value());
-    }
-    FreeTemp(r_idx);
-    FreeTemp(r_dst);
-    FreeTemp(r_src);
-  } else {
-    DCHECK_LE(elems, 5u);  // Usually but not necessarily non-range.
-    // TUNING: interleave
-    for (size_t i = 0; i < elems; i++) {
-      RegLocation rl_arg;
-      if (info->args[i].ref) {
-        rl_arg = LoadValue(info->args[i], kRefReg);
-        StoreRefDisp(ref_reg,
-                    mirror::Array::DataOffset(component_size).Int32Value() + i * 4, rl_arg.reg,
-                    kNotVolatile);
-      } else {
-        rl_arg = LoadValue(info->args[i], kCoreReg);
-        Store32Disp(ref_reg,
-                    mirror::Array::DataOffset(component_size).Int32Value() + i * 4, rl_arg.reg);
-      }
-      // If the LoadValue caused a temp to be allocated, free it
-      if (IsTemp(rl_arg.reg)) {
-        FreeTemp(rl_arg.reg);
-      }
-    }
-  }
-  if (elems != 0 && info->args[0].ref) {
-    // If there is at least one potentially non-null value, unconditionally mark the GC card.
-    for (size_t i = 0; i < elems; i++) {
-      if (!mir_graph_->IsConstantNullRef(info->args[i])) {
-        UnconditionallyMarkGCCard(ref_reg);
-        break;
-      }
-    }
-  }
-  if (info->result.location != kLocInvalid) {
-    StoreValue(info->result, GetReturn(kRefReg));
-  }
-}
-
-/*
- * Array data table format:
- *  ushort ident = 0x0300   magic value
- *  ushort width            width of each element in the table
- *  uint   size             number of elements in the table
- *  ubyte  data[size*width] table of data values (may contain a single-byte
- *                          padding at the end)
- *
- * Total size is 4+(width * size + 1)/2 16-bit code units.
- */
-void Mir2Lir::GenFillArrayData(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
-  if (kIsDebugBuild) {
-    const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-    const Instruction::ArrayDataPayload* payload =
-        reinterpret_cast<const Instruction::ArrayDataPayload*>(table);
-    CHECK_EQ(payload->ident, static_cast<uint16_t>(Instruction::kArrayDataSignature));
-  }
-  uint32_t table_offset_from_start = mir->offset + static_cast<int32_t>(table_offset);
-  CallRuntimeHelperImmRegLocation(kQuickHandleFillArrayData, table_offset_from_start, rl_src, true);
-}
-
-void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, OpSize size) {
-  const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
-  DCHECK_EQ(SPutMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
-  cu_->compiler_driver->ProcessedStaticField(field_info.FastPut(), field_info.IsReferrersClass());
-  if (!ForceSlowFieldPath(cu_) && field_info.FastPut()) {
-    DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
-    RegStorage r_base;
-    if (field_info.IsReferrersClass()) {
-      // Fast path, static storage base is this method's class
-      r_base = AllocTempRef();
-      RegStorage r_method = LoadCurrMethodWithHint(r_base);
-      LoadRefDisp(r_method, ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
-                  kNotVolatile);
-    } else {
-      // Medium path, static storage base in a different class which requires checks that the other
-      // class is initialized.
-      r_base = GenGetOtherTypeForSgetSput(field_info, mir->optimization_flags);
-      if (!field_info.IsClassInitialized() &&
-          (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0) {
-        // Ensure load of status and store of value don't re-order.
-        // TODO: Presumably the actual value store is control-dependent on the status load,
-        // and will thus not be reordered in any case, since stores are never speculated.
-        // Does later code "know" that the class is now initialized?  If so, we still
-        // need the barrier to guard later static loads.
-        GenMemBarrier(kLoadAny);
-      }
-    }
-    // rBase now holds static storage base
-    RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
-    if (IsWide(size)) {
-      rl_src = LoadValueWide(rl_src, reg_class);
-    } else {
-      rl_src = LoadValue(rl_src, reg_class);
-    }
-    if (IsRef(size)) {
-      StoreRefDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg,
-                   field_info.IsVolatile() ? kVolatile : kNotVolatile);
-    } else {
-      StoreBaseDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg, size,
-                    field_info.IsVolatile() ? kVolatile : kNotVolatile);
-    }
-    if (IsRef(size) && !mir_graph_->IsConstantNullRef(rl_src)) {
-      MarkGCCard(mir->optimization_flags, rl_src.reg, r_base);
-    }
-    FreeTemp(r_base);
-  } else {
-    FlushAllRegs();  // Everything to home locations
-    QuickEntrypointEnum target;
-    switch (size) {
-      case kReference:
-        target = kQuickSetObjStatic;
-        break;
-      case k64:
-      case kDouble:
-        target = kQuickSet64Static;
-        break;
-      case k32:
-      case kSingle:
-        target = kQuickSet32Static;
-        break;
-      case kSignedHalf:
-      case kUnsignedHalf:
-        target = kQuickSet16Static;
-        break;
-      case kSignedByte:
-      case kUnsignedByte:
-        target = kQuickSet8Static;
-        break;
-      case kWord:  // Intentional fallthrough.
-      default:
-        LOG(FATAL) << "Can't determine entrypoint for: " << size;
-        target = kQuickSet32Static;
-    }
-    CallRuntimeHelperImmRegLocation(target, field_info.FieldIndex(), rl_src, true);
-  }
-}
-
-void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, OpSize size, Primitive::Type type) {
-  const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
-  DCHECK_EQ(SGetMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
-  cu_->compiler_driver->ProcessedStaticField(field_info.FastGet(), field_info.IsReferrersClass());
-
-  if (!ForceSlowFieldPath(cu_) && field_info.FastGet()) {
-    DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
-    RegStorage r_base;
-    if (field_info.IsReferrersClass()) {
-      // Fast path, static storage base is this method's class
-      r_base = AllocTempRef();
-      RegStorage r_method = LoadCurrMethodWithHint(r_base);
-      LoadRefDisp(r_method, ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
-                  kNotVolatile);
-    } else {
-      // Medium path, static storage base in a different class which requires checks that the other
-      // class is initialized
-      r_base = GenGetOtherTypeForSgetSput(field_info, mir->optimization_flags);
-      if (!field_info.IsClassInitialized() &&
-          (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0) {
-        // Ensure load of status and load of value don't re-order.
-        GenMemBarrier(kLoadAny);
-      }
-    }
-    // r_base now holds static storage base
-    RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
-    RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
-
-    int field_offset = field_info.FieldOffset().Int32Value();
-    if (IsRef(size)) {
-      // TODO: DCHECK?
-      LoadRefDisp(r_base, field_offset, rl_result.reg, field_info.IsVolatile() ? kVolatile :
-          kNotVolatile);
-    } else {
-      LoadBaseDisp(r_base, field_offset, rl_result.reg, size, field_info.IsVolatile() ?
-          kVolatile : kNotVolatile);
-    }
-    FreeTemp(r_base);
-
-    if (IsWide(size)) {
-      StoreValueWide(rl_dest, rl_result);
-    } else {
-      StoreValue(rl_dest, rl_result);
-    }
-  } else {
-    DCHECK(SizeMatchesTypeForEntrypoint(size, type));
-    FlushAllRegs();  // Everything to home locations
-    QuickEntrypointEnum target;
-    switch (type) {
-      case Primitive::kPrimNot:
-        target = kQuickGetObjStatic;
-        break;
-      case Primitive::kPrimLong:
-      case Primitive::kPrimDouble:
-        target = kQuickGet64Static;
-        break;
-      case Primitive::kPrimInt:
-      case Primitive::kPrimFloat:
-        target = kQuickGet32Static;
-        break;
-      case Primitive::kPrimShort:
-        target = kQuickGetShortStatic;
-        break;
-      case Primitive::kPrimChar:
-        target = kQuickGetCharStatic;
-        break;
-      case Primitive::kPrimByte:
-        target = kQuickGetByteStatic;
-        break;
-      case Primitive::kPrimBoolean:
-        target = kQuickGetBooleanStatic;
-        break;
-      case Primitive::kPrimVoid:  // Intentional fallthrough.
-      default:
-        LOG(FATAL) << "Can't determine entrypoint for: " << type;
-        target = kQuickGet32Static;
-    }
-    CallRuntimeHelperImm(target, field_info.FieldIndex(), true);
-
-    // FIXME: pGetXXStatic always return an int or int64 regardless of rl_dest.fp.
-    if (IsWide(size)) {
-      RegLocation rl_result = GetReturnWide(kCoreReg);
-      StoreValueWide(rl_dest, rl_result);
-    } else {
-      RegLocation rl_result = GetReturn(rl_dest.ref ? kRefReg : kCoreReg);
-      StoreValue(rl_dest, rl_result);
-    }
-  }
-}
-
-// Generate code for all slow paths.
-void Mir2Lir::HandleSlowPaths() {
-  // We should check slow_paths_.Size() every time, because a new slow path
-  // may be created during slowpath->Compile().
-  for (LIRSlowPath* slowpath : slow_paths_) {
-    slowpath->Compile();
-  }
-  slow_paths_.clear();
-}
-
-void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size, Primitive::Type type,
-                      RegLocation rl_dest, RegLocation rl_obj) {
-  const MirIFieldLoweringInfo& field_info = mir_graph_->GetIFieldLoweringInfo(mir);
-  if (kIsDebugBuild) {
-    auto mem_access_type = IsInstructionIGetQuickOrIPutQuick(mir->dalvikInsn.opcode) ?
-        IGetQuickOrIPutQuickMemAccessType(mir->dalvikInsn.opcode) :
-        IGetMemAccessType(mir->dalvikInsn.opcode);
-    DCHECK_EQ(mem_access_type, field_info.MemAccessType()) << mir->dalvikInsn.opcode;
-  }
-  cu_->compiler_driver->ProcessedInstanceField(field_info.FastGet());
-  if (!ForceSlowFieldPath(cu_) && field_info.FastGet()) {
-    RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
-    // A load of the class will lead to an iget with offset 0.
-    DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
-    rl_obj = LoadValue(rl_obj, kRefReg);
-    GenNullCheck(rl_obj.reg, opt_flags);
-    RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
-    int field_offset = field_info.FieldOffset().Int32Value();
-    LIR* load_lir;
-    if (IsRef(size)) {
-      load_lir = LoadRefDisp(rl_obj.reg, field_offset, rl_result.reg, field_info.IsVolatile() ?
-          kVolatile : kNotVolatile);
-    } else {
-      load_lir = LoadBaseDisp(rl_obj.reg, field_offset, rl_result.reg, size,
-                              field_info.IsVolatile() ? kVolatile : kNotVolatile);
-    }
-    MarkPossibleNullPointerExceptionAfter(opt_flags, load_lir);
-    if (IsWide(size)) {
-      StoreValueWide(rl_dest, rl_result);
-    } else {
-      StoreValue(rl_dest, rl_result);
-    }
-  } else {
-    DCHECK(SizeMatchesTypeForEntrypoint(size, type));
-    QuickEntrypointEnum target;
-    switch (type) {
-      case Primitive::kPrimNot:
-        target = kQuickGetObjInstance;
-        break;
-      case Primitive::kPrimLong:
-      case Primitive::kPrimDouble:
-        target = kQuickGet64Instance;
-        break;
-      case Primitive::kPrimFloat:
-      case Primitive::kPrimInt:
-        target = kQuickGet32Instance;
-        break;
-      case Primitive::kPrimShort:
-        target = kQuickGetShortInstance;
-        break;
-      case Primitive::kPrimChar:
-        target = kQuickGetCharInstance;
-        break;
-      case Primitive::kPrimByte:
-        target = kQuickGetByteInstance;
-        break;
-      case Primitive::kPrimBoolean:
-        target = kQuickGetBooleanInstance;
-        break;
-      case Primitive::kPrimVoid:  // Intentional fallthrough.
-      default:
-        LOG(FATAL) << "Can't determine entrypoint for: " << type;
-        target = kQuickGet32Instance;
-    }
-    // Second argument of pGetXXInstance is always a reference.
-    DCHECK_EQ(static_cast<unsigned int>(rl_obj.wide), 0U);
-    CallRuntimeHelperImmRegLocation(target, field_info.FieldIndex(), rl_obj, true);
-
-    // FIXME: pGetXXInstance always return an int or int64 regardless of rl_dest.fp.
-    if (IsWide(size)) {
-      RegLocation rl_result = GetReturnWide(kCoreReg);
-      StoreValueWide(rl_dest, rl_result);
-    } else {
-      RegLocation rl_result = GetReturn(rl_dest.ref ? kRefReg : kCoreReg);
-      StoreValue(rl_dest, rl_result);
-    }
-  }
-}
-
-void Mir2Lir::GenIPut(MIR* mir, int opt_flags, OpSize size,
-                      RegLocation rl_src, RegLocation rl_obj) {
-  const MirIFieldLoweringInfo& field_info = mir_graph_->GetIFieldLoweringInfo(mir);
-  if (kIsDebugBuild) {
-    auto mem_access_type = IsInstructionIGetQuickOrIPutQuick(mir->dalvikInsn.opcode) ?
-        IGetQuickOrIPutQuickMemAccessType(mir->dalvikInsn.opcode) :
-        IPutMemAccessType(mir->dalvikInsn.opcode);
-    DCHECK_EQ(mem_access_type, field_info.MemAccessType());
-  }
-  cu_->compiler_driver->ProcessedInstanceField(field_info.FastPut());
-  if (!ForceSlowFieldPath(cu_) && field_info.FastPut()) {
-    RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
-    // Dex code never writes to the class field.
-    DCHECK_GE(static_cast<uint32_t>(field_info.FieldOffset().Int32Value()),
-              sizeof(mirror::HeapReference<mirror::Class>));
-    rl_obj = LoadValue(rl_obj, kRefReg);
-    if (IsWide(size)) {
-      rl_src = LoadValueWide(rl_src, reg_class);
-    } else {
-      rl_src = LoadValue(rl_src, reg_class);
-    }
-    GenNullCheck(rl_obj.reg, opt_flags);
-    int field_offset = field_info.FieldOffset().Int32Value();
-    LIR* null_ck_insn;
-    if (IsRef(size)) {
-      null_ck_insn = StoreRefDisp(rl_obj.reg, field_offset, rl_src.reg, field_info.IsVolatile() ?
-          kVolatile : kNotVolatile);
-    } else {
-      null_ck_insn = StoreBaseDisp(rl_obj.reg, field_offset, rl_src.reg, size,
-                                   field_info.IsVolatile() ? kVolatile : kNotVolatile);
-    }
-    MarkPossibleNullPointerExceptionAfter(opt_flags, null_ck_insn);
-    if (IsRef(size) && !mir_graph_->IsConstantNullRef(rl_src)) {
-      MarkGCCard(opt_flags, rl_src.reg, rl_obj.reg);
-    }
-  } else {
-    QuickEntrypointEnum target;
-    switch (size) {
-      case kReference:
-        target = kQuickSetObjInstance;
-        break;
-      case k64:
-      case kDouble:
-        target = kQuickSet64Instance;
-        break;
-      case k32:
-      case kSingle:
-        target = kQuickSet32Instance;
-        break;
-      case kSignedHalf:
-      case kUnsignedHalf:
-        target = kQuickSet16Instance;
-        break;
-      case kSignedByte:
-      case kUnsignedByte:
-        target = kQuickSet8Instance;
-        break;
-      case kWord:  // Intentional fallthrough.
-      default:
-        LOG(FATAL) << "Can't determine entrypoint for: " << size;
-        target = kQuickSet32Instance;
-    }
-    CallRuntimeHelperImmRegLocationRegLocation(target, field_info.FieldIndex(), rl_obj, rl_src,
-                                               true);
-  }
-}
-
-void Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index,
-                             RegLocation rl_src) {
-  bool needs_range_check = !(opt_flags & MIR_IGNORE_RANGE_CHECK);
-  bool needs_null_check = !((cu_->disable_opt & (1 << kNullCheckElimination)) &&
-      (opt_flags & MIR_IGNORE_NULL_CHECK));
-  QuickEntrypointEnum target = needs_range_check
-        ? (needs_null_check ? kQuickAputObjectWithNullAndBoundCheck
-                            : kQuickAputObjectWithBoundCheck)
-        : kQuickAputObject;
-  CallRuntimeHelperRegLocationRegLocationRegLocation(target, rl_array, rl_index, rl_src, true);
-}
-
-void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) {
-  RegLocation rl_result;
-  if (!cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx,
-                                                        *cu_->dex_file,
-                                                        type_idx)) {
-    // Call out to helper which resolves type and verifies access.
-    // Resolved type returned in kRet0.
-    CallRuntimeHelperImm(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
-    rl_result = GetReturn(kRefReg);
-  } else {
-    rl_result = EvalLoc(rl_dest, kRefReg, true);
-    // We don't need access checks, load type from dex cache
-    LoadTypeFromCache(type_idx, rl_result.reg);
-    if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file,
-        type_idx) || ForceSlowTypePath(cu_)) {
-      // Slow path, at runtime test if type is null and if so initialize
-      FlushAllRegs();
-      GenIfNullUseHelperImm(rl_result.reg, kQuickInitializeType, type_idx);
-    }
-  }
-  StoreValue(rl_dest, rl_result);
-}
-
-void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) {
-  /* NOTE: Most strings should be available at compile time */
-  int32_t offset_of_string = GetCacheOffset(string_idx);
-  if (!cu_->compiler_driver->CanAssumeStringIsPresentInDexCache(
-      *cu_->dex_file, string_idx) || ForceSlowStringPath(cu_)) {
-    // slow path, resolve string if not in dex cache
-    FlushAllRegs();
-    LockCallTemps();  // Using explicit registers
-
-    // Might call out to helper, which will return resolved string in kRet0
-    RegStorage ret0 = TargetReg(kRet0, kRef);
-    if (CanUseOpPcRelDexCacheArrayLoad()) {
-      size_t offset = dex_cache_arrays_layout_.StringOffset(string_idx);
-      OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, ret0, false);
-    } else {
-      // Method to declaring class.
-      RegStorage arg0 = TargetReg(kArg0, kRef);
-      RegStorage r_method = LoadCurrMethodWithHint(arg0);
-      LoadRefDisp(r_method, ArtMethod::DeclaringClassOffset().Int32Value(), arg0, kNotVolatile);
-      // Declaring class to dex cache strings.
-      LoadBaseDisp(arg0, mirror::Class::DexCacheStringsOffset().Int32Value(), arg0,
-                   cu_->target64 ? k64 : k32, kNotVolatile);
-
-      LoadRefDisp(arg0, offset_of_string, ret0, kNotVolatile);
-    }
-    GenIfNullUseHelperImm(ret0, kQuickResolveString, string_idx);
-
-    GenBarrier();
-    StoreValue(rl_dest, GetReturn(kRefReg));
-  } else {
-    RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
-    if (CanUseOpPcRelDexCacheArrayLoad()) {
-      size_t offset = dex_cache_arrays_layout_.StringOffset(string_idx);
-      OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, rl_result.reg, false);
-    } else {
-      RegLocation rl_method = LoadCurrMethod();
-      RegStorage res_reg = AllocTempRef();
-      LoadRefDisp(rl_method.reg, ArtMethod::DeclaringClassOffset().Int32Value(), res_reg,
-                  kNotVolatile);
-      LoadBaseDisp(res_reg, mirror::Class::DexCacheStringsOffset().Int32Value(), res_reg,
-                   cu_->target64 ? k64 : k32, kNotVolatile);
-      LoadRefDisp(res_reg, offset_of_string, rl_result.reg, kNotVolatile);
-      FreeTemp(res_reg);
-    }
-    StoreValue(rl_dest, rl_result);
-  }
-}
-
-/*
- * Let helper function take care of everything.  Will
- * call Class::NewInstanceFromCode(type_idx, method);
- */
-void Mir2Lir::GenNewInstance(uint32_t type_idx, RegLocation rl_dest) {
-  FlushAllRegs();  /* Everything to home location */
-  // alloc will always check for resolution, do we also need to verify
-  // access because the verifier was unable to?
-  const DexFile* dex_file = cu_->dex_file;
-  CompilerDriver* driver = cu_->compiler_driver;
-  if (driver->CanAccessInstantiableTypeWithoutChecks(cu_->method_idx, *dex_file, type_idx)) {
-    bool is_type_initialized;
-    bool use_direct_type_ptr;
-    uintptr_t direct_type_ptr;
-    bool is_finalizable;
-    if (kEmbedClassInCode &&
-        driver->CanEmbedTypeInCode(*dex_file, type_idx, &is_type_initialized, &use_direct_type_ptr,
-                                   &direct_type_ptr, &is_finalizable) &&
-                                   !is_finalizable) {
-      // The fast path.
-      if (!use_direct_type_ptr) {
-        LoadClassType(*dex_file, type_idx, kArg0);
-        if (!is_type_initialized) {
-          CallRuntimeHelperRegMethod(kQuickAllocObjectResolved, TargetReg(kArg0, kRef), true);
-        } else {
-          CallRuntimeHelperRegMethod(kQuickAllocObjectInitialized, TargetReg(kArg0, kRef), true);
-        }
-      } else {
-        // Use the direct pointer.
-        if (!is_type_initialized) {
-          CallRuntimeHelperImmMethod(kQuickAllocObjectResolved, direct_type_ptr, true);
-        } else {
-          CallRuntimeHelperImmMethod(kQuickAllocObjectInitialized, direct_type_ptr, true);
-        }
-      }
-    } else {
-      // The slow path.
-      CallRuntimeHelperImmMethod(kQuickAllocObject, type_idx, true);
-    }
-  } else {
-    CallRuntimeHelperImmMethod(kQuickAllocObjectWithAccessCheck, type_idx, true);
-  }
-  StoreValue(rl_dest, GetReturn(kRefReg));
-}
-
-void Mir2Lir::GenThrow(RegLocation rl_src) {
-  FlushAllRegs();
-  CallRuntimeHelperRegLocation(kQuickDeliverException, rl_src, true);
-}
-
-// For final classes there are no sub-classes to check and so we can answer the instance-of
-// question with simple comparisons.
-void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
-                                 RegLocation rl_src) {
-  // X86 has its own implementation.
-  DCHECK(cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64);
-
-  RegLocation object = LoadValue(rl_src, kRefReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage result_reg = rl_result.reg;
-  if (IsSameReg(result_reg, object.reg)) {
-    result_reg = AllocTypedTemp(false, kCoreReg);
-    DCHECK(!IsSameReg(result_reg, object.reg));
-  }
-  LoadConstant(result_reg, 0);     // assume false
-  LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, nullptr);
-
-  RegStorage check_class = AllocTypedTemp(false, kRefReg);
-  RegStorage object_class = AllocTypedTemp(false, kRefReg);
-
-  if (use_declaring_class) {
-    RegStorage r_method = LoadCurrMethodWithHint(check_class);
-    LoadRefDisp(r_method, ArtMethod::DeclaringClassOffset().Int32Value(), check_class,
-                kNotVolatile);
-    LoadRefDisp(object.reg,  mirror::Object::ClassOffset().Int32Value(), object_class,
-                kNotVolatile);
-  } else {
-    LoadTypeFromCache(type_idx, check_class);
-    LoadRefDisp(object.reg,  mirror::Object::ClassOffset().Int32Value(), object_class,
-                kNotVolatile);
-  }
-
-  // FIXME: what should we be comparing here? compressed or decompressed references?
-  if (cu_->instruction_set == kThumb2) {
-    OpRegReg(kOpCmp, check_class, object_class);  // Same?
-    LIR* it = OpIT(kCondEq, "");   // if-convert the test
-    LoadConstant(result_reg, 1);     // .eq case - load true
-    OpEndIT(it);
-  } else {
-    GenSelectConst32(check_class, object_class, kCondEq, 1, 0, result_reg, kCoreReg);
-  }
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  null_branchover->target = target;
-  FreeTemp(object_class);
-  FreeTemp(check_class);
-  if (IsTemp(result_reg)) {
-    OpRegCopy(rl_result.reg, result_reg);
-    FreeTemp(result_reg);
-  }
-  StoreValue(rl_dest, rl_result);
-}
-
-void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
-                                         bool type_known_abstract, bool use_declaring_class,
-                                         bool can_assume_type_is_in_dex_cache,
-                                         uint32_t type_idx, RegLocation rl_dest,
-                                         RegLocation rl_src) {
-  FlushAllRegs();
-  // May generate a call - use explicit registers
-  LockCallTemps();
-  RegStorage class_reg = TargetReg(kArg2, kRef);  // kArg2 will hold the Class*
-  RegStorage ref_reg = TargetReg(kArg0, kRef);  // kArg0 will hold the ref.
-  RegStorage ret_reg = GetReturn(kRefReg).reg;
-  if (needs_access_check) {
-    // Check we have access to type_idx and if not throw IllegalAccessError,
-    // returns Class* in kArg0
-    CallRuntimeHelperImmMethod(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
-    OpRegCopy(class_reg, ret_reg);  // Align usage with fast path
-    LoadValueDirectFixed(rl_src, ref_reg);  // kArg0 <= ref
-  } else if (use_declaring_class) {
-    RegStorage r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
-    LoadValueDirectFixed(rl_src, ref_reg);  // kArg0 <= ref
-    LoadRefDisp(r_method, ArtMethod::DeclaringClassOffset().Int32Value(),
-                class_reg, kNotVolatile);
-  } else {
-    if (can_assume_type_is_in_dex_cache) {
-      // Conditionally, as in the other case we will also load it.
-      LoadValueDirectFixed(rl_src, ref_reg);  // kArg0 <= ref
-    }
-
-    // Load dex cache entry into class_reg (kArg2)
-    LoadTypeFromCache(type_idx, class_reg);
-    if (!can_assume_type_is_in_dex_cache) {
-      GenIfNullUseHelperImm(class_reg, kQuickInitializeType, type_idx);
-
-      // Should load value here.
-      LoadValueDirectFixed(rl_src, ref_reg);  // kArg0 <= ref
-    }
-  }
-  /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result */
-  RegLocation rl_result = GetReturn(kCoreReg);
-  if (!IsSameReg(rl_result.reg, ref_reg)) {
-    // On MIPS and x86_64 rArg0 != rl_result, place false in result if branch is taken.
-    LoadConstant(rl_result.reg, 0);
-  }
-  LIR* branch1 = OpCmpImmBranch(kCondEq, ref_reg, 0, nullptr);
-
-  /* load object->klass_ */
-  RegStorage ref_class_reg = TargetReg(kArg1, kRef);  // kArg1 will hold the Class* of ref.
-  DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-  LoadRefDisp(ref_reg, mirror::Object::ClassOffset().Int32Value(),
-              ref_class_reg, kNotVolatile);
-  /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class */
-  LIR* branchover = nullptr;
-  if (type_known_final) {
-    // rl_result == ref == class.
-    GenSelectConst32(ref_class_reg, class_reg, kCondEq, 1, 0, rl_result.reg,
-                     kCoreReg);
-  } else {
-    if (cu_->instruction_set == kThumb2) {
-      RegStorage r_tgt = LoadHelper(kQuickInstanceofNonTrivial);
-      LIR* it = nullptr;
-      if (!type_known_abstract) {
-      /* Uses conditional nullification */
-        OpRegReg(kOpCmp, ref_class_reg, class_reg);  // Same?
-        it = OpIT(kCondEq, "EE");   // if-convert the test
-        LoadConstant(rl_result.reg, 1);     // .eq case - load true
-      }
-      OpRegCopy(ref_reg, class_reg);    // .ne case - arg0 <= class
-      OpReg(kOpBlx, r_tgt);    // .ne case: helper(class, ref->class)
-      if (it != nullptr) {
-        OpEndIT(it);
-      }
-      FreeTemp(r_tgt);
-    } else {
-      if (!type_known_abstract) {
-        /* Uses branchovers */
-        LoadConstant(rl_result.reg, 1);     // assume true
-        branchover = OpCmpBranch(kCondEq, TargetReg(kArg1, kRef), TargetReg(kArg2, kRef), nullptr);
-      }
-
-      OpRegCopy(TargetReg(kArg0, kRef), class_reg);    // .ne case - arg0 <= class
-      CallRuntimeHelper(kQuickInstanceofNonTrivial, false);
-    }
-  }
-  // TODO: only clobber when type isn't final?
-  ClobberCallerSave();
-  /* branch targets here */
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  StoreValue(rl_dest, rl_result);
-  branch1->target = target;
-  if (branchover != nullptr) {
-    branchover->target = target;
-  }
-}
-
-void Mir2Lir::GenInstanceof(uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src) {
-  bool type_known_final, type_known_abstract, use_declaring_class;
-  bool needs_access_check = !cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx,
-                                                                              *cu_->dex_file,
-                                                                              type_idx,
-                                                                              &type_known_final,
-                                                                              &type_known_abstract,
-                                                                              &use_declaring_class);
-  bool can_assume_type_is_in_dex_cache = !needs_access_check &&
-      cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx);
-
-  if ((use_declaring_class || can_assume_type_is_in_dex_cache) && type_known_final) {
-    GenInstanceofFinal(use_declaring_class, type_idx, rl_dest, rl_src);
-  } else {
-    GenInstanceofCallingHelper(needs_access_check, type_known_final, type_known_abstract,
-                               use_declaring_class, can_assume_type_is_in_dex_cache,
-                               type_idx, rl_dest, rl_src);
-  }
-}
-
-void Mir2Lir::GenCheckCast(int opt_flags, uint32_t insn_idx, uint32_t type_idx,
-                           RegLocation rl_src) {
-  if ((opt_flags & MIR_IGNORE_CHECK_CAST) != 0) {
-    // Compiler analysis proved that this check-cast would never cause an exception.
-    return;
-  }
-  bool type_known_final, type_known_abstract, use_declaring_class;
-  bool needs_access_check = !cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx,
-                                                                              *cu_->dex_file,
-                                                                              type_idx,
-                                                                              &type_known_final,
-                                                                              &type_known_abstract,
-                                                                              &use_declaring_class);
-  // Note: currently type_known_final is unused, as optimizing will only improve the performance
-  // of the exception throw path.
-  DexCompilationUnit* cu = mir_graph_->GetCurrentDexCompilationUnit();
-  if (!needs_access_check && cu_->compiler_driver->IsSafeCast(cu, insn_idx)) {
-    // Verifier type analysis proved this check cast would never cause an exception.
-    return;
-  }
-  FlushAllRegs();
-  // May generate a call - use explicit registers
-  LockCallTemps();
-  RegStorage class_reg = TargetReg(kArg2, kRef);  // kArg2 will hold the Class*
-  if (needs_access_check) {
-    // Check we have access to type_idx and if not throw IllegalAccessError,
-    // returns Class* in kRet0
-    // InitializeTypeAndVerifyAccess(idx, method)
-    CallRuntimeHelperImmMethod(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
-    OpRegCopy(class_reg, TargetReg(kRet0, kRef));  // Align usage with fast path
-  } else if (use_declaring_class) {
-    RegStorage method_reg = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
-    LoadRefDisp(method_reg, ArtMethod::DeclaringClassOffset().Int32Value(),
-                class_reg, kNotVolatile);
-  } else {
-    // Load dex cache entry into class_reg (kArg2)
-    LoadTypeFromCache(type_idx, class_reg);
-    if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx)) {
-      // Need to test presence of type in dex cache at runtime
-      GenIfNullUseHelperImm(class_reg, kQuickInitializeType, type_idx);
-    }
-  }
-  // At this point, class_reg (kArg2) has class
-  LoadValueDirectFixed(rl_src, TargetReg(kArg0, kRef));  // kArg0 <= ref
-
-  // Slow path for the case where the classes are not equal.  In this case we need
-  // to call a helper function to do the check.
-  class SlowPath : public LIRSlowPath {
-   public:
-    SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, bool load)
-        : LIRSlowPath(m2l, fromfast, cont), load_(load) {
-    }
-
-    void Compile() {
-      GenerateTargetLabel();
-
-      if (load_) {
-        m2l_->LoadRefDisp(m2l_->TargetReg(kArg0, kRef), mirror::Object::ClassOffset().Int32Value(),
-                          m2l_->TargetReg(kArg1, kRef), kNotVolatile);
-      }
-      m2l_->CallRuntimeHelperRegReg(kQuickCheckCast, m2l_->TargetReg(kArg2, kRef),
-                                    m2l_->TargetReg(kArg1, kRef), true);
-      m2l_->OpUnconditionalBranch(cont_);
-    }
-
-   private:
-    const bool load_;
-  };
-
-  if (type_known_abstract) {
-    // Easier case, run slow path if target is non-null (slow path will load from target)
-    LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kArg0, kRef), 0, nullptr);
-    LIR* cont = NewLIR0(kPseudoTargetLabel);
-    AddSlowPath(new (arena_) SlowPath(this, branch, cont, true));
-  } else {
-    // Harder, more common case.  We need to generate a forward branch over the load
-    // if the target is null.  If it's non-null we perform the load and branch to the
-    // slow path if the classes are not equal.
-
-    /* Null is OK - continue */
-    LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0, kRef), 0, nullptr);
-    /* load object->klass_ */
-    DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-    LoadRefDisp(TargetReg(kArg0, kRef), mirror::Object::ClassOffset().Int32Value(),
-                TargetReg(kArg1, kRef), kNotVolatile);
-
-    LIR* branch2 = OpCmpBranch(kCondNe, TargetReg(kArg1, kRef), class_reg, nullptr);
-    LIR* cont = NewLIR0(kPseudoTargetLabel);
-
-    // Add the slow path that will not perform load since this is already done.
-    AddSlowPath(new (arena_) SlowPath(this, branch2, cont, false));
-
-    // Set the null check to branch to the continuation.
-    branch1->target = cont;
-  }
-}
-
-void Mir2Lir::GenLong3Addr(OpKind first_op, OpKind second_op, RegLocation rl_dest,
-                           RegLocation rl_src1, RegLocation rl_src2) {
-  RegLocation rl_result;
-  if (cu_->instruction_set == kThumb2) {
-    /*
-     * NOTE:  This is the one place in the code in which we might have
-     * as many as six live temporary registers.  There are 5 in the normal
-     * set for Arm.  Until we have spill capabilities, temporarily add
-     * lr to the temp set.  It is safe to do this locally, but note that
-     * lr is used explicitly elsewhere in the code generator and cannot
-     * normally be used as a general temp register.
-     */
-    MarkTemp(TargetReg(kLr, kNotWide));   // Add lr to the temp pool
-    FreeTemp(TargetReg(kLr, kNotWide));   // and make it available
-  }
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  // The longs may overlap - use intermediate temp if so
-  if ((rl_result.reg.GetLowReg() == rl_src1.reg.GetHighReg()) || (rl_result.reg.GetLowReg() == rl_src2.reg.GetHighReg())) {
-    RegStorage t_reg = AllocTemp();
-    OpRegRegReg(first_op, t_reg, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
-    OpRegRegReg(second_op, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
-    OpRegCopy(rl_result.reg.GetLow(), t_reg);
-    FreeTemp(t_reg);
-  } else {
-    OpRegRegReg(first_op, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
-    OpRegRegReg(second_op, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
-  }
-  /*
-   * NOTE: If rl_dest refers to a frame variable in a large frame, the
-   * following StoreValueWide might need to allocate a temp register.
-   * To further work around the lack of a spill capability, explicitly
-   * free any temps from rl_src1 & rl_src2 that aren't still live in rl_result.
-   * Remove when spill is functional.
-   */
-  FreeRegLocTemps(rl_result, rl_src1);
-  FreeRegLocTemps(rl_result, rl_src2);
-  StoreValueWide(rl_dest, rl_result);
-  if (cu_->instruction_set == kThumb2) {
-    Clobber(TargetReg(kLr, kNotWide));
-    UnmarkTemp(TargetReg(kLr, kNotWide));  // Remove lr from the temp pool
-  }
-}
-
-void Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                             RegLocation rl_src1, RegLocation rl_shift) {
-  QuickEntrypointEnum target;
-  switch (opcode) {
-    case Instruction::SHL_LONG:
-    case Instruction::SHL_LONG_2ADDR:
-      target = kQuickShlLong;
-      break;
-    case Instruction::SHR_LONG:
-    case Instruction::SHR_LONG_2ADDR:
-      target = kQuickShrLong;
-      break;
-    case Instruction::USHR_LONG:
-    case Instruction::USHR_LONG_2ADDR:
-      target = kQuickUshrLong;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected case";
-      target = kQuickShlLong;
-  }
-  FlushAllRegs();   /* Send everything to home location */
-  CallRuntimeHelperRegLocationRegLocation(target, rl_src1, rl_shift, false);
-  RegLocation rl_result = GetReturnWide(kCoreReg);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-
-void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
-                            RegLocation rl_src1, RegLocation rl_src2, int flags) {
-  DCHECK(cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64);
-  OpKind op = kOpBkpt;
-  bool is_div_rem = false;
-  bool check_zero = false;
-  bool unary = false;
-  RegLocation rl_result;
-  bool shift_op = false;
-  switch (opcode) {
-    case Instruction::NEG_INT:
-      op = kOpNeg;
-      unary = true;
-      break;
-    case Instruction::NOT_INT:
-      op = kOpMvn;
-      unary = true;
-      break;
-    case Instruction::ADD_INT:
-    case Instruction::ADD_INT_2ADDR:
-      op = kOpAdd;
-      break;
-    case Instruction::SUB_INT:
-    case Instruction::SUB_INT_2ADDR:
-      op = kOpSub;
-      break;
-    case Instruction::MUL_INT:
-    case Instruction::MUL_INT_2ADDR:
-      op = kOpMul;
-      break;
-    case Instruction::DIV_INT:
-    case Instruction::DIV_INT_2ADDR:
-      check_zero = true;
-      op = kOpDiv;
-      is_div_rem = true;
-      break;
-    /* NOTE: returns in kArg1 */
-    case Instruction::REM_INT:
-    case Instruction::REM_INT_2ADDR:
-      check_zero = true;
-      op = kOpRem;
-      is_div_rem = true;
-      break;
-    case Instruction::AND_INT:
-    case Instruction::AND_INT_2ADDR:
-      op = kOpAnd;
-      break;
-    case Instruction::OR_INT:
-    case Instruction::OR_INT_2ADDR:
-      op = kOpOr;
-      break;
-    case Instruction::XOR_INT:
-    case Instruction::XOR_INT_2ADDR:
-      op = kOpXor;
-      break;
-    case Instruction::SHL_INT:
-    case Instruction::SHL_INT_2ADDR:
-      shift_op = true;
-      op = kOpLsl;
-      break;
-    case Instruction::SHR_INT:
-    case Instruction::SHR_INT_2ADDR:
-      shift_op = true;
-      op = kOpAsr;
-      break;
-    case Instruction::USHR_INT:
-    case Instruction::USHR_INT_2ADDR:
-      shift_op = true;
-      op = kOpLsr;
-      break;
-    default:
-      LOG(FATAL) << "Invalid word arith op: " << opcode;
-  }
-  if (!is_div_rem) {
-    if (unary) {
-      rl_src1 = LoadValue(rl_src1, kCoreReg);
-      rl_result = EvalLoc(rl_dest, kCoreReg, true);
-      OpRegReg(op, rl_result.reg, rl_src1.reg);
-    } else {
-      if ((shift_op) && (cu_->instruction_set != kArm64)) {
-        rl_src2 = LoadValue(rl_src2, kCoreReg);
-        RegStorage t_reg = AllocTemp();
-        OpRegRegImm(kOpAnd, t_reg, rl_src2.reg, 31);
-        rl_src1 = LoadValue(rl_src1, kCoreReg);
-        rl_result = EvalLoc(rl_dest, kCoreReg, true);
-        OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg);
-        FreeTemp(t_reg);
-      } else {
-        rl_src1 = LoadValue(rl_src1, kCoreReg);
-        rl_src2 = LoadValue(rl_src2, kCoreReg);
-        rl_result = EvalLoc(rl_dest, kCoreReg, true);
-        OpRegRegReg(op, rl_result.reg, rl_src1.reg, rl_src2.reg);
-      }
-    }
-    StoreValue(rl_dest, rl_result);
-  } else {
-    bool done = false;      // Set to true if we happen to find a way to use a real instruction.
-    if (cu_->instruction_set == kMips || cu_->instruction_set == kMips64 ||
-        cu_->instruction_set == kArm64) {
-      rl_src1 = LoadValue(rl_src1, kCoreReg);
-      rl_src2 = LoadValue(rl_src2, kCoreReg);
-      if (check_zero && (flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
-        GenDivZeroCheck(rl_src2.reg);
-      }
-      rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, op == kOpDiv);
-      done = true;
-    } else if (cu_->instruction_set == kThumb2) {
-      if (cu_->compiler_driver->GetInstructionSetFeatures()->AsArmInstructionSetFeatures()->
-              HasDivideInstruction()) {
-        // Use ARM SDIV instruction for division.  For remainder we also need to
-        // calculate using a MUL and subtract.
-        rl_src1 = LoadValue(rl_src1, kCoreReg);
-        rl_src2 = LoadValue(rl_src2, kCoreReg);
-        if (check_zero && (flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
-          GenDivZeroCheck(rl_src2.reg);
-        }
-        rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, op == kOpDiv);
-        done = true;
-      }
-    }
-
-    // If we haven't already generated the code use the callout function.
-    if (!done) {
-      FlushAllRegs();   /* Send everything to home location */
-      LoadValueDirectFixed(rl_src2, TargetReg(kArg1, kNotWide));
-      RegStorage r_tgt = CallHelperSetup(kQuickIdivmod);
-      LoadValueDirectFixed(rl_src1, TargetReg(kArg0, kNotWide));
-      if (check_zero && (flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
-        GenDivZeroCheck(TargetReg(kArg1, kNotWide));
-      }
-      // NOTE: callout here is not a safepoint.
-      CallHelper(r_tgt, kQuickIdivmod, false /* not a safepoint */);
-      if (op == kOpDiv)
-        rl_result = GetReturn(kCoreReg);
-      else
-        rl_result = GetReturnAlt();
-    }
-    StoreValue(rl_dest, rl_result);
-  }
-}
-
-/*
- * The following are the first-level codegen routines that analyze the format
- * of each bytecode then either dispatch special purpose codegen routines
- * or produce corresponding Thumb instructions directly.
- */
-
-// Returns true if no more than two bits are set in 'x'.
-static bool IsPopCountLE2(unsigned int x) {
-  x &= x - 1;
-  return (x & (x - 1)) == 0;
-}
-
-// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
-// and store the result in 'rl_dest'.
-bool Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode ATTRIBUTE_UNUSED, bool is_div,
-                               RegLocation rl_src, RegLocation rl_dest, int lit) {
-  if ((lit < 2) || (!IsPowerOfTwo(lit))) {
-    return false;
-  }
-  int k = CTZ(lit);
-  if (k >= 30) {
-    // Avoid special cases.
-    return false;
-  }
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (is_div) {
-    RegStorage t_reg = AllocTemp();
-    if (lit == 2) {
-      // Division by 2 is by far the most common division by constant.
-      OpRegRegImm(kOpLsr, t_reg, rl_src.reg, 32 - k);
-      OpRegRegReg(kOpAdd, t_reg, t_reg, rl_src.reg);
-      OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
-    } else {
-      OpRegRegImm(kOpAsr, t_reg, rl_src.reg, 31);
-      OpRegRegImm(kOpLsr, t_reg, t_reg, 32 - k);
-      OpRegRegReg(kOpAdd, t_reg, t_reg, rl_src.reg);
-      OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
-    }
-  } else {
-    RegStorage t_reg1 = AllocTemp();
-    RegStorage t_reg2 = AllocTemp();
-    if (lit == 2) {
-      OpRegRegImm(kOpLsr, t_reg1, rl_src.reg, 32 - k);
-      OpRegRegReg(kOpAdd, t_reg2, t_reg1, rl_src.reg);
-      OpRegRegImm(kOpAnd, t_reg2, t_reg2, lit -1);
-      OpRegRegReg(kOpSub, rl_result.reg, t_reg2, t_reg1);
-    } else {
-      OpRegRegImm(kOpAsr, t_reg1, rl_src.reg, 31);
-      OpRegRegImm(kOpLsr, t_reg1, t_reg1, 32 - k);
-      OpRegRegReg(kOpAdd, t_reg2, t_reg1, rl_src.reg);
-      OpRegRegImm(kOpAnd, t_reg2, t_reg2, lit - 1);
-      OpRegRegReg(kOpSub, rl_result.reg, t_reg2, t_reg1);
-    }
-  }
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-// Returns true if it added instructions to 'cu' to multiply 'rl_src' by 'lit'
-// and store the result in 'rl_dest'.
-bool Mir2Lir::HandleEasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
-  if (lit < 0) {
-    return false;
-  }
-  if (lit == 0) {
-    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    LoadConstant(rl_result.reg, 0);
-    StoreValue(rl_dest, rl_result);
-    return true;
-  }
-  if (lit == 1) {
-    rl_src = LoadValue(rl_src, kCoreReg);
-    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    OpRegCopy(rl_result.reg, rl_src.reg);
-    StoreValue(rl_dest, rl_result);
-    return true;
-  }
-  // There is RegRegRegShift on Arm, so check for more special cases
-  if (cu_->instruction_set == kThumb2) {
-    return EasyMultiply(rl_src, rl_dest, lit);
-  }
-  // Can we simplify this multiplication?
-  bool power_of_two = false;
-  bool pop_count_le2 = false;
-  bool power_of_two_minus_one = false;
-  if (IsPowerOfTwo(lit)) {
-    power_of_two = true;
-  } else if (IsPopCountLE2(lit)) {
-    pop_count_le2 = true;
-  } else if (IsPowerOfTwo(lit + 1)) {
-    power_of_two_minus_one = true;
-  } else {
-    return false;
-  }
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (power_of_two) {
-    // Shift.
-    OpRegRegImm(kOpLsl, rl_result.reg, rl_src.reg, CTZ(lit));
-  } else if (pop_count_le2) {
-    // Shift and add and shift.
-    int first_bit = CTZ(lit);
-    int second_bit = CTZ(lit ^ (1 << first_bit));
-    GenMultiplyByTwoBitMultiplier(rl_src, rl_result, lit, first_bit, second_bit);
-  } else {
-    // Reverse subtract: (src << (shift + 1)) - src.
-    DCHECK(power_of_two_minus_one);
-    // TUNING: rsb dst, src, src lsl#CTZ(lit + 1)
-    RegStorage t_reg = AllocTemp();
-    OpRegRegImm(kOpLsl, t_reg, rl_src.reg, CTZ(lit + 1));
-    OpRegRegReg(kOpSub, rl_result.reg, t_reg, rl_src.reg);
-  }
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-// Returns true if it generates instructions.
-bool Mir2Lir::HandleEasyFloatingPointDiv(RegLocation rl_dest, RegLocation rl_src1,
-                                         RegLocation rl_src2) {
-  if (!rl_src2.is_const ||
-      ((cu_->instruction_set != kThumb2) && (cu_->instruction_set != kArm64))) {
-    return false;
-  }
-
-  if (!rl_src2.wide) {
-    int32_t divisor = mir_graph_->ConstantValue(rl_src2);
-    if (CanDivideByReciprocalMultiplyFloat(divisor)) {
-      // Generate multiply by reciprocal instead of div.
-      float recip = 1.0f/bit_cast<float, int32_t>(divisor);
-      GenMultiplyByConstantFloat(rl_dest, rl_src1, bit_cast<int32_t, float>(recip));
-      return true;
-    }
-  } else {
-    int64_t divisor = mir_graph_->ConstantValueWide(rl_src2);
-    if (CanDivideByReciprocalMultiplyDouble(divisor)) {
-      // Generate multiply by reciprocal instead of div.
-      double recip = 1.0/bit_cast<double, int64_t>(divisor);
-      GenMultiplyByConstantDouble(rl_dest, rl_src1, bit_cast<int64_t, double>(recip));
-      return true;
-    }
-  }
-  return false;
-}
-
-void Mir2Lir::GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src,
-                               int lit) {
-  RegLocation rl_result;
-  OpKind op = static_cast<OpKind>(0);    /* Make gcc happy */
-  int shift_op = false;
-  bool is_div = false;
-
-  switch (opcode) {
-    case Instruction::RSUB_INT_LIT8:
-    case Instruction::RSUB_INT: {
-      rl_src = LoadValue(rl_src, kCoreReg);
-      rl_result = EvalLoc(rl_dest, kCoreReg, true);
-      if (cu_->instruction_set == kThumb2) {
-        OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, lit);
-      } else {
-        OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
-        OpRegImm(kOpAdd, rl_result.reg, lit);
-      }
-      StoreValue(rl_dest, rl_result);
-      return;
-    }
-
-    case Instruction::SUB_INT:
-    case Instruction::SUB_INT_2ADDR:
-      lit = -lit;
-      FALLTHROUGH_INTENDED;
-    case Instruction::ADD_INT:
-    case Instruction::ADD_INT_2ADDR:
-    case Instruction::ADD_INT_LIT8:
-    case Instruction::ADD_INT_LIT16:
-      op = kOpAdd;
-      break;
-    case Instruction::MUL_INT:
-    case Instruction::MUL_INT_2ADDR:
-    case Instruction::MUL_INT_LIT8:
-    case Instruction::MUL_INT_LIT16: {
-      if (HandleEasyMultiply(rl_src, rl_dest, lit)) {
-        return;
-      }
-      op = kOpMul;
-      break;
-    }
-    case Instruction::AND_INT:
-    case Instruction::AND_INT_2ADDR:
-    case Instruction::AND_INT_LIT8:
-    case Instruction::AND_INT_LIT16:
-      op = kOpAnd;
-      break;
-    case Instruction::OR_INT:
-    case Instruction::OR_INT_2ADDR:
-    case Instruction::OR_INT_LIT8:
-    case Instruction::OR_INT_LIT16:
-      op = kOpOr;
-      break;
-    case Instruction::XOR_INT:
-    case Instruction::XOR_INT_2ADDR:
-    case Instruction::XOR_INT_LIT8:
-    case Instruction::XOR_INT_LIT16:
-      op = kOpXor;
-      break;
-    case Instruction::SHL_INT_LIT8:
-    case Instruction::SHL_INT:
-    case Instruction::SHL_INT_2ADDR:
-      lit &= 31;
-      shift_op = true;
-      op = kOpLsl;
-      break;
-    case Instruction::SHR_INT_LIT8:
-    case Instruction::SHR_INT:
-    case Instruction::SHR_INT_2ADDR:
-      lit &= 31;
-      shift_op = true;
-      op = kOpAsr;
-      break;
-    case Instruction::USHR_INT_LIT8:
-    case Instruction::USHR_INT:
-    case Instruction::USHR_INT_2ADDR:
-      lit &= 31;
-      shift_op = true;
-      op = kOpLsr;
-      break;
-
-    case Instruction::DIV_INT:
-    case Instruction::DIV_INT_2ADDR:
-    case Instruction::DIV_INT_LIT8:
-    case Instruction::DIV_INT_LIT16:
-    case Instruction::REM_INT:
-    case Instruction::REM_INT_2ADDR:
-    case Instruction::REM_INT_LIT8:
-    case Instruction::REM_INT_LIT16: {
-      if (lit == 0) {
-        GenDivZeroException();
-        return;
-      }
-      if ((opcode == Instruction::DIV_INT) ||
-          (opcode == Instruction::DIV_INT_2ADDR) ||
-          (opcode == Instruction::DIV_INT_LIT8) ||
-          (opcode == Instruction::DIV_INT_LIT16)) {
-        is_div = true;
-      } else {
-        is_div = false;
-      }
-      if (HandleEasyDivRem(opcode, is_div, rl_src, rl_dest, lit)) {
-        return;
-      }
-
-      bool done = false;
-      if (cu_->instruction_set == kMips || cu_->instruction_set == kMips64 ||
-          cu_->instruction_set == kArm64) {
-        rl_src = LoadValue(rl_src, kCoreReg);
-        rl_result = GenDivRemLit(rl_dest, rl_src.reg, lit, is_div);
-        done = true;
-      } else if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-        rl_result = GenDivRemLit(rl_dest, rl_src, lit, is_div);
-        done = true;
-      } else if (cu_->instruction_set == kThumb2) {
-        if (cu_->compiler_driver->GetInstructionSetFeatures()->AsArmInstructionSetFeatures()->
-                HasDivideInstruction()) {
-          // Use ARM SDIV instruction for division.  For remainder we also need to
-          // calculate using a MUL and subtract.
-          rl_src = LoadValue(rl_src, kCoreReg);
-          rl_result = GenDivRemLit(rl_dest, rl_src.reg, lit, is_div);
-          done = true;
-        }
-      }
-
-      if (!done) {
-        FlushAllRegs();   /* Everything to home location. */
-        LoadValueDirectFixed(rl_src, TargetReg(kArg0, kNotWide));
-        Clobber(TargetReg(kArg0, kNotWide));
-        CallRuntimeHelperRegImm(kQuickIdivmod, TargetReg(kArg0, kNotWide), lit, false);
-        if (is_div)
-          rl_result = GetReturn(kCoreReg);
-        else
-          rl_result = GetReturnAlt();
-      }
-      StoreValue(rl_dest, rl_result);
-      return;
-    }
-    default:
-      LOG(FATAL) << "Unexpected opcode " << opcode;
-  }
-  rl_src = LoadValue(rl_src, kCoreReg);
-  rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  // Avoid shifts by literal 0 - no support in Thumb.  Change to copy.
-  if (shift_op && (lit == 0)) {
-    OpRegCopy(rl_result.reg, rl_src.reg);
-  } else {
-    OpRegRegImm(op, rl_result.reg, rl_src.reg, lit);
-  }
-  StoreValue(rl_dest, rl_result);
-}
-
-void Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                             RegLocation rl_src1, RegLocation rl_src2, int flags) {
-  RegLocation rl_result;
-  OpKind first_op = kOpBkpt;
-  OpKind second_op = kOpBkpt;
-  bool call_out = false;
-  bool check_zero = false;
-  int ret_reg = TargetReg(kRet0, kNotWide).GetReg();
-  QuickEntrypointEnum target;
-
-  switch (opcode) {
-    case Instruction::NOT_LONG:
-      rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-      rl_result = EvalLoc(rl_dest, kCoreReg, true);
-      // Check for destructive overlap
-      if (rl_result.reg.GetLowReg() == rl_src2.reg.GetHighReg()) {
-        RegStorage t_reg = AllocTemp();
-        OpRegCopy(t_reg, rl_src2.reg.GetHigh());
-        OpRegReg(kOpMvn, rl_result.reg.GetLow(), rl_src2.reg.GetLow());
-        OpRegReg(kOpMvn, rl_result.reg.GetHigh(), t_reg);
-        FreeTemp(t_reg);
-      } else {
-        OpRegReg(kOpMvn, rl_result.reg.GetLow(), rl_src2.reg.GetLow());
-        OpRegReg(kOpMvn, rl_result.reg.GetHigh(), rl_src2.reg.GetHigh());
-      }
-      StoreValueWide(rl_dest, rl_result);
-      return;
-    case Instruction::ADD_LONG:
-    case Instruction::ADD_LONG_2ADDR:
-      first_op = kOpAdd;
-      second_op = kOpAdc;
-      break;
-    case Instruction::SUB_LONG:
-    case Instruction::SUB_LONG_2ADDR:
-      first_op = kOpSub;
-      second_op = kOpSbc;
-      break;
-    case Instruction::MUL_LONG:
-    case Instruction::MUL_LONG_2ADDR:
-      call_out = true;
-      ret_reg = TargetReg(kRet0, kNotWide).GetReg();
-      target = kQuickLmul;
-      break;
-    case Instruction::DIV_LONG:
-    case Instruction::DIV_LONG_2ADDR:
-      call_out = true;
-      check_zero = true;
-      ret_reg = TargetReg(kRet0, kNotWide).GetReg();
-      target = kQuickLdiv;
-      break;
-    case Instruction::REM_LONG:
-    case Instruction::REM_LONG_2ADDR:
-      call_out = true;
-      check_zero = true;
-      target = kQuickLmod;
-      /* NOTE - for Arm, result is in kArg2/kArg3 instead of kRet0/kRet1 */
-      ret_reg = (cu_->instruction_set == kThumb2) ? TargetReg(kArg2, kNotWide).GetReg() :
-          TargetReg(kRet0, kNotWide).GetReg();
-      break;
-    case Instruction::AND_LONG_2ADDR:
-    case Instruction::AND_LONG:
-      first_op = kOpAnd;
-      second_op = kOpAnd;
-      break;
-    case Instruction::OR_LONG:
-    case Instruction::OR_LONG_2ADDR:
-      first_op = kOpOr;
-      second_op = kOpOr;
-      break;
-    case Instruction::XOR_LONG:
-    case Instruction::XOR_LONG_2ADDR:
-      first_op = kOpXor;
-      second_op = kOpXor;
-      break;
-    default:
-      LOG(FATAL) << "Invalid long arith op";
-  }
-  if (!call_out) {
-    GenLong3Addr(first_op, second_op, rl_dest, rl_src1, rl_src2);
-  } else {
-    FlushAllRegs();   /* Send everything to home location */
-    if (check_zero) {
-      RegStorage r_tmp1 = TargetReg(kArg0, kWide);
-      RegStorage r_tmp2 = TargetReg(kArg2, kWide);
-      LoadValueDirectWideFixed(rl_src2, r_tmp2);
-      RegStorage r_tgt = CallHelperSetup(target);
-      if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
-        GenDivZeroCheckWide(r_tmp2);
-      }
-      LoadValueDirectWideFixed(rl_src1, r_tmp1);
-      // NOTE: callout here is not a safepoint
-      CallHelper(r_tgt, target, false /* not safepoint */);
-    } else {
-      CallRuntimeHelperRegLocationRegLocation(target, rl_src1, rl_src2, false);
-    }
-    // Adjust return regs in to handle case of rem returning kArg2/kArg3
-    if (ret_reg == TargetReg(kRet0, kNotWide).GetReg())
-      rl_result = GetReturnWide(kCoreReg);
-    else
-      rl_result = GetReturnWideAlt();
-    StoreValueWide(rl_dest, rl_result);
-  }
-}
-
-void Mir2Lir::GenConst(RegLocation rl_dest, int value) {
-  RegLocation rl_result = EvalLoc(rl_dest, kAnyReg, true);
-  LoadConstantNoClobber(rl_result.reg, value);
-  StoreValue(rl_dest, rl_result);
-}
-
-void Mir2Lir::GenConversionCall(QuickEntrypointEnum trampoline, RegLocation rl_dest,
-                                RegLocation rl_src, RegisterClass return_reg_class) {
-  /*
-   * Don't optimize the register usage since it calls out to support
-   * functions
-   */
-
-  FlushAllRegs();   /* Send everything to home location */
-  CallRuntimeHelperRegLocation(trampoline, rl_src, false);
-  if (rl_dest.wide) {
-    RegLocation rl_result = GetReturnWide(return_reg_class);
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    RegLocation rl_result = GetReturn(return_reg_class);
-    StoreValue(rl_dest, rl_result);
-  }
-}
-
-class Mir2Lir::SuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
- public:
-  SuspendCheckSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont)
-      : LIRSlowPath(m2l, branch, cont) {
-  }
-
-  void Compile() OVERRIDE {
-    m2l_->ResetRegPool();
-    m2l_->ResetDefTracking();
-    GenerateTargetLabel(kPseudoSuspendTarget);
-    m2l_->CallRuntimeHelper(kQuickTestSuspend, true);
-    if (cont_ != nullptr) {
-      m2l_->OpUnconditionalBranch(cont_);
-    }
-  }
-};
-
-/* Check if we need to check for pending suspend request */
-void Mir2Lir::GenSuspendTest(int opt_flags) {
-  if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK) != 0) {
-    return;
-  }
-  if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitSuspendChecks()) {
-    FlushAllRegs();
-    LIR* branch = OpTestSuspend(nullptr);
-    LIR* cont = NewLIR0(kPseudoTargetLabel);
-    AddSlowPath(new (arena_) SuspendCheckSlowPath(this, branch, cont));
-  } else {
-    FlushAllRegs();     // TODO: needed?
-    LIR* inst = CheckSuspendUsingLoad();
-    MarkSafepointPC(inst);
-  }
-}
-
-/* Check if we need to check for pending suspend request */
-void Mir2Lir::GenSuspendTestAndBranch(int opt_flags, LIR* target) {
-  if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK) != 0) {
-    OpUnconditionalBranch(target);
-    return;
-  }
-  if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitSuspendChecks()) {
-    OpTestSuspend(target);
-    FlushAllRegs();
-    LIR* branch = OpUnconditionalBranch(nullptr);
-    AddSlowPath(new (arena_) SuspendCheckSlowPath(this, branch, target));
-  } else {
-    // For the implicit suspend check, just perform the trigger
-    // load and branch to the target.
-    FlushAllRegs();
-    LIR* inst = CheckSuspendUsingLoad();
-    MarkSafepointPC(inst);
-    OpUnconditionalBranch(target);
-  }
-}
-
-/* Call out to helper assembly routine that will null check obj and then lock it. */
-void Mir2Lir::GenMonitorEnter(int opt_flags ATTRIBUTE_UNUSED, RegLocation rl_src) {
-  // TODO: avoid null check with specialized non-null helper.
-  FlushAllRegs();
-  CallRuntimeHelperRegLocation(kQuickLockObject, rl_src, true);
-}
-
-/* Call out to helper assembly routine that will null check obj and then unlock it. */
-void Mir2Lir::GenMonitorExit(int opt_flags ATTRIBUTE_UNUSED, RegLocation rl_src) {
-  // TODO: avoid null check with specialized non-null helper.
-  FlushAllRegs();
-  CallRuntimeHelperRegLocation(kQuickUnlockObject, rl_src, true);
-}
-
-/* Generic code for generating a wide constant into a VR. */
-void Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
-  RegLocation rl_result = EvalLoc(rl_dest, kAnyReg, true);
-  LoadConstantWide(rl_result.reg, value);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void Mir2Lir::GenSmallPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
-  BasicBlock* bb = mir_graph_->GetBasicBlock(mir->bb);
-  DCHECK(bb != nullptr);
-  ArenaVector<SuccessorBlockInfo*>::const_iterator succ_bb_iter = bb->successor_blocks.cbegin();
-  const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  const uint16_t entries = table[1];
-  // Chained cmp-and-branch.
-  const int32_t* as_int32 = reinterpret_cast<const int32_t*>(&table[2]);
-  int32_t starting_key = as_int32[0];
-  rl_src = LoadValue(rl_src, kCoreReg);
-  int i = 0;
-  for (; i < entries; ++i, ++succ_bb_iter) {
-    if (!InexpensiveConstantInt(starting_key + i, Instruction::Code::IF_EQ)) {
-      // Switch to using a temp and add.
-      break;
-    }
-    SuccessorBlockInfo* successor_block_info = *succ_bb_iter;
-    DCHECK(successor_block_info != nullptr);
-    int case_block_id = successor_block_info->block;
-    DCHECK_EQ(starting_key + i, successor_block_info->key);
-    OpCmpImmBranch(kCondEq, rl_src.reg, starting_key + i, &block_label_list_[case_block_id]);
-  }
-  if (i < entries) {
-    // The rest do not seem to be inexpensive. Try to allocate a temp and use add.
-    RegStorage key_temp = AllocTypedTemp(false, kCoreReg, false);
-    if (key_temp.Valid()) {
-      LoadConstantNoClobber(key_temp, starting_key + i);
-      for (; i < entries - 1; ++i, ++succ_bb_iter) {
-        SuccessorBlockInfo* successor_block_info = *succ_bb_iter;
-        DCHECK(successor_block_info != nullptr);
-        int case_block_id = successor_block_info->block;
-        DCHECK_EQ(starting_key + i, successor_block_info->key);
-        OpCmpBranch(kCondEq, rl_src.reg, key_temp, &block_label_list_[case_block_id]);
-        OpRegImm(kOpAdd, key_temp, 1);  // Increment key.
-      }
-      SuccessorBlockInfo* successor_block_info = *succ_bb_iter;
-      DCHECK(successor_block_info != nullptr);
-      int case_block_id = successor_block_info->block;
-      DCHECK_EQ(starting_key + i, successor_block_info->key);
-      OpCmpBranch(kCondEq, rl_src.reg, key_temp, &block_label_list_[case_block_id]);
-    } else {
-      // No free temp, just finish the old loop.
-      for (; i < entries; ++i, ++succ_bb_iter) {
-        SuccessorBlockInfo* successor_block_info = *succ_bb_iter;
-        DCHECK(successor_block_info != nullptr);
-        int case_block_id = successor_block_info->block;
-        DCHECK_EQ(starting_key + i, successor_block_info->key);
-        OpCmpImmBranch(kCondEq, rl_src.reg, starting_key + i, &block_label_list_[case_block_id]);
-      }
-    }
-  }
-}
-
-void Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
-  const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  if (cu_->verbose) {
-    DumpPackedSwitchTable(table);
-  }
-
-  const uint16_t entries = table[1];
-  if (entries <= kSmallSwitchThreshold) {
-    GenSmallPackedSwitch(mir, table_offset, rl_src);
-  } else {
-    // Use the backend-specific implementation.
-    GenLargePackedSwitch(mir, table_offset, rl_src);
-  }
-}
-
-void Mir2Lir::GenSmallSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
-  BasicBlock* bb = mir_graph_->GetBasicBlock(mir->bb);
-  DCHECK(bb != nullptr);
-  const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  const uint16_t entries = table[1];
-  // Chained cmp-and-branch.
-  rl_src = LoadValue(rl_src, kCoreReg);
-  int i = 0;
-  for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
-    int case_block_id = successor_block_info->block;
-    int key = successor_block_info->key;
-    OpCmpImmBranch(kCondEq, rl_src.reg, key, &block_label_list_[case_block_id]);
-    i++;
-  }
-  DCHECK_EQ(i, entries);
-}
-
-void Mir2Lir::GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
-  const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  if (cu_->verbose) {
-    DumpSparseSwitchTable(table);
-  }
-
-  const uint16_t entries = table[1];
-  if (entries <= kSmallSwitchThreshold) {
-    GenSmallSparseSwitch(mir, table_offset, rl_src);
-  } else {
-    // Use the backend-specific implementation.
-    GenLargeSparseSwitch(mir, table_offset, rl_src);
-  }
-}
-
-bool Mir2Lir::SizeMatchesTypeForEntrypoint(OpSize size, Primitive::Type type) {
-  switch (size) {
-    case kReference:
-      return type == Primitive::kPrimNot;
-    case k64:
-    case kDouble:
-      return type == Primitive::kPrimLong || type == Primitive::kPrimDouble;
-    case k32:
-    case kSingle:
-      return type == Primitive::kPrimInt || type == Primitive::kPrimFloat;
-    case kSignedHalf:
-      return type == Primitive::kPrimShort;
-    case kUnsignedHalf:
-      return type == Primitive::kPrimChar;
-    case kSignedByte:
-      return type == Primitive::kPrimByte;
-    case kUnsignedByte:
-      return type == Primitive::kPrimBoolean;
-    case kWord:  // Intentional fallthrough.
-    default:
-      return false;  // There are no sane types with this op size.
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
deleted file mode 100755
index 422d82f..0000000
--- a/compiler/dex/quick/gen_invoke.cc
+++ /dev/null
@@ -1,1623 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "mir_to_lir-inl.h"
-
-#include "arm/codegen_arm.h"
-#include "dex/compiler_ir.h"
-#include "dex/dex_flags.h"
-#include "dex/mir_graph.h"
-#include "dex/quick/dex_file_method_inliner.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "dex_file-inl.h"
-#include "driver/compiler_driver.h"
-#include "driver/compiler_options.h"
-#include "entrypoints/quick/quick_entrypoints.h"
-#include "invoke_type.h"
-#include "mirror/array.h"
-#include "mirror/class-inl.h"
-#include "mirror/dex_cache.h"
-#include "mirror/object_array-inl.h"
-#include "mirror/string.h"
-#include "scoped_thread_state_change.h"
-
-namespace art {
-
-// Shortcuts to repeatedly used long types.
-typedef mirror::ObjectArray<mirror::Object> ObjArray;
-
-/*
- * This source files contains "gen" codegen routines that should
- * be applicable to most targets.  Only mid-level support utilities
- * and "op" calls may be used here.
- */
-
-void Mir2Lir::AddIntrinsicSlowPath(CallInfo* info, LIR* branch, LIR* resume) {
-  class IntrinsicSlowPathPath : public Mir2Lir::LIRSlowPath {
-   public:
-    IntrinsicSlowPathPath(Mir2Lir* m2l, CallInfo* info_in, LIR* branch_in, LIR* resume_in)
-        : LIRSlowPath(m2l, branch_in, resume_in), info_(info_in) {
-      DCHECK_EQ(info_in->offset, current_dex_pc_);
-    }
-
-    void Compile() {
-      m2l_->ResetRegPool();
-      m2l_->ResetDefTracking();
-      GenerateTargetLabel(kPseudoIntrinsicRetry);
-      // NOTE: GenInvokeNoInline() handles MarkSafepointPC.
-      m2l_->GenInvokeNoInline(info_);
-      if (cont_ != nullptr) {
-        m2l_->OpUnconditionalBranch(cont_);
-      }
-    }
-
-   private:
-    CallInfo* const info_;
-  };
-
-  AddSlowPath(new (arena_) IntrinsicSlowPathPath(this, info, branch, resume));
-}
-
-/*
- * To save scheduling time, helper calls are broken into two parts: generation of
- * the helper target address, and the actual call to the helper.  Because x86
- * has a memory call operation, part 1 is a NOP for x86.  For other targets,
- * load arguments between the two parts.
- */
-// template <size_t pointer_size>
-RegStorage Mir2Lir::CallHelperSetup(QuickEntrypointEnum trampoline) {
-  if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-    return RegStorage::InvalidReg();
-  } else {
-    return LoadHelper(trampoline);
-  }
-}
-
-LIR* Mir2Lir::CallHelper(RegStorage r_tgt, QuickEntrypointEnum trampoline, bool safepoint_pc,
-                         bool use_link) {
-  LIR* call_inst = InvokeTrampoline(use_link ? kOpBlx : kOpBx, r_tgt, trampoline);
-
-  if (r_tgt.Valid()) {
-    FreeTemp(r_tgt);
-  }
-
-  if (safepoint_pc) {
-    MarkSafepointPC(call_inst);
-  }
-  return call_inst;
-}
-
-void Mir2Lir::CallRuntimeHelper(QuickEntrypointEnum trampoline, bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperImm(QuickEntrypointEnum trampoline, int arg0, bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  LoadConstant(TargetReg(kArg0, kNotWide), arg0);
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperReg(QuickEntrypointEnum trampoline, RegStorage arg0,
-                                   bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  OpRegCopy(TargetReg(kArg0, arg0.GetWideKind()), arg0);
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperRegLocation(QuickEntrypointEnum trampoline, RegLocation arg0,
-                                           bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  if (arg0.wide == 0) {
-    LoadValueDirectFixed(arg0, TargetReg(arg0.fp ? kFArg0 : kArg0, arg0));
-  } else {
-    LoadValueDirectWideFixed(arg0, TargetReg(arg0.fp ? kFArg0 : kArg0, kWide));
-  }
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperImmImm(QuickEntrypointEnum trampoline, int arg0, int arg1,
-                                      bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  LoadConstant(TargetReg(kArg0, kNotWide), arg0);
-  LoadConstant(TargetReg(kArg1, kNotWide), arg1);
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperImmRegLocation(QuickEntrypointEnum trampoline, int arg0,
-                                              RegLocation arg1, bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  if (arg1.wide == 0) {
-    LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
-  } else {
-    RegStorage r_tmp = TargetReg(kArg2, kWide);
-    LoadValueDirectWideFixed(arg1, r_tmp);
-  }
-  LoadConstant(TargetReg(kArg0, kNotWide), arg0);
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperRegLocationImm(QuickEntrypointEnum trampoline, RegLocation arg0,
-                                              int arg1, bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  DCHECK(!arg0.wide);
-  LoadValueDirectFixed(arg0, TargetReg(kArg0, arg0));
-  LoadConstant(TargetReg(kArg1, kNotWide), arg1);
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperImmReg(QuickEntrypointEnum trampoline, int arg0, RegStorage arg1,
-                                      bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  OpRegCopy(TargetReg(kArg1, arg1.GetWideKind()), arg1);
-  LoadConstant(TargetReg(kArg0, kNotWide), arg0);
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperRegImm(QuickEntrypointEnum trampoline, RegStorage arg0, int arg1,
-                                      bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  OpRegCopy(TargetReg(kArg0, arg0.GetWideKind()), arg0);
-  LoadConstant(TargetReg(kArg1, kNotWide), arg1);
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperImmMethod(QuickEntrypointEnum trampoline, int arg0,
-                                         bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  LoadCurrMethodDirect(TargetReg(kArg1, kRef));
-  LoadConstant(TargetReg(kArg0, kNotWide), arg0);
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperRegMethod(QuickEntrypointEnum trampoline, RegStorage arg0,
-                                         bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  DCHECK(!IsSameReg(TargetReg(kArg1, arg0.GetWideKind()), arg0));
-  RegStorage r_tmp = TargetReg(kArg0, arg0.GetWideKind());
-  if (r_tmp.NotExactlyEquals(arg0)) {
-    OpRegCopy(r_tmp, arg0);
-  }
-  LoadCurrMethodDirect(TargetReg(kArg1, kRef));
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperRegRegLocationMethod(QuickEntrypointEnum trampoline, RegStorage arg0,
-                                                    RegLocation arg1, bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  DCHECK(!IsSameReg(TargetReg(kArg2, arg0.GetWideKind()), arg0));
-  RegStorage r_tmp = TargetReg(kArg0, arg0.GetWideKind());
-  if (r_tmp.NotExactlyEquals(arg0)) {
-    OpRegCopy(r_tmp, arg0);
-  }
-  LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
-  LoadCurrMethodDirect(TargetReg(kArg2, kRef));
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(QuickEntrypointEnum trampoline,
-                                                      RegLocation arg0, RegLocation arg1,
-                                                      bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  if (cu_->instruction_set == kArm64 || cu_->instruction_set == kMips64 ||
-      cu_->instruction_set == kX86_64) {
-    RegStorage arg0_reg = TargetReg((arg0.fp) ? kFArg0 : kArg0, arg0);
-
-    RegStorage arg1_reg;
-    if (arg1.fp == arg0.fp) {
-      arg1_reg = TargetReg((arg1.fp) ? kFArg1 : kArg1, arg1);
-    } else {
-      arg1_reg = TargetReg((arg1.fp) ? kFArg0 : kArg0, arg1);
-    }
-
-    if (arg0.wide == 0) {
-      LoadValueDirectFixed(arg0, arg0_reg);
-    } else {
-      LoadValueDirectWideFixed(arg0, arg0_reg);
-    }
-
-    if (arg1.wide == 0) {
-      LoadValueDirectFixed(arg1, arg1_reg);
-    } else {
-      LoadValueDirectWideFixed(arg1, arg1_reg);
-    }
-  } else {
-    DCHECK(!cu_->target64);
-    if (arg0.wide == 0) {
-      LoadValueDirectFixed(arg0, TargetReg(arg0.fp ? kFArg0 : kArg0, kNotWide));
-      if (arg1.wide == 0) {
-        // For Mips, when the 1st arg is integral, then remaining arg are passed in core reg.
-        if (cu_->instruction_set == kMips) {
-          LoadValueDirectFixed(arg1, TargetReg((arg1.fp && arg0.fp) ? kFArg2 : kArg1, kNotWide));
-        } else {
-          LoadValueDirectFixed(arg1, TargetReg(arg1.fp ? kFArg1 : kArg1, kNotWide));
-        }
-      } else {
-        // For Mips, when the 1st arg is integral, then remaining arg are passed in core reg.
-        if (cu_->instruction_set == kMips) {
-          LoadValueDirectWideFixed(arg1, TargetReg((arg1.fp && arg0.fp) ? kFArg2 : kArg2, kWide));
-        } else {
-          LoadValueDirectWideFixed(arg1, TargetReg(arg1.fp ? kFArg1 : kArg1, kWide));
-        }
-      }
-    } else {
-      LoadValueDirectWideFixed(arg0, TargetReg(arg0.fp ? kFArg0 : kArg0, kWide));
-      if (arg1.wide == 0) {
-        // For Mips, when the 1st arg is integral, then remaining arg are passed in core reg.
-        if (cu_->instruction_set == kMips) {
-          LoadValueDirectFixed(arg1, TargetReg((arg1.fp && arg0.fp) ? kFArg2 : kArg2, kNotWide));
-        } else {
-          LoadValueDirectFixed(arg1, TargetReg(arg1.fp ? kFArg2 : kArg2, kNotWide));
-        }
-      } else {
-        // For Mips, when the 1st arg is integral, then remaining arg are passed in core reg.
-        if (cu_->instruction_set == kMips) {
-          LoadValueDirectWideFixed(arg1, TargetReg((arg1.fp && arg0.fp) ? kFArg2 : kArg2, kWide));
-        } else {
-          LoadValueDirectWideFixed(arg1, TargetReg(arg1.fp ? kFArg2 : kArg2, kWide));
-        }
-      }
-    }
-  }
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CopyToArgumentRegs(RegStorage arg0, RegStorage arg1) {
-  WideKind arg0_kind = arg0.GetWideKind();
-  WideKind arg1_kind = arg1.GetWideKind();
-  if (IsSameReg(arg1, TargetReg(kArg0, arg1_kind))) {
-    if (IsSameReg(arg0, TargetReg(kArg1, arg0_kind))) {
-      // Swap kArg0 and kArg1 with kArg2 as temp.
-      OpRegCopy(TargetReg(kArg2, arg1_kind), arg1);
-      OpRegCopy(TargetReg(kArg0, arg0_kind), arg0);
-      OpRegCopy(TargetReg(kArg1, arg1_kind), TargetReg(kArg2, arg1_kind));
-    } else {
-      OpRegCopy(TargetReg(kArg1, arg1_kind), arg1);
-      OpRegCopy(TargetReg(kArg0, arg0_kind), arg0);
-    }
-  } else {
-    OpRegCopy(TargetReg(kArg0, arg0_kind), arg0);
-    OpRegCopy(TargetReg(kArg1, arg1_kind), arg1);
-  }
-}
-
-void Mir2Lir::CallRuntimeHelperRegReg(QuickEntrypointEnum trampoline, RegStorage arg0,
-                                      RegStorage arg1, bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  CopyToArgumentRegs(arg0, arg1);
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperRegRegImm(QuickEntrypointEnum trampoline, RegStorage arg0,
-                                         RegStorage arg1, int arg2, bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  CopyToArgumentRegs(arg0, arg1);
-  LoadConstant(TargetReg(kArg2, kNotWide), arg2);
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperImmRegLocationMethod(QuickEntrypointEnum trampoline, int arg0,
-                                                    RegLocation arg1, bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
-  LoadCurrMethodDirect(TargetReg(kArg2, kRef));
-  LoadConstant(TargetReg(kArg0, kNotWide), arg0);
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperImmImmMethod(QuickEntrypointEnum trampoline, int arg0, int arg1,
-                                            bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  LoadCurrMethodDirect(TargetReg(kArg2, kRef));
-  LoadConstant(TargetReg(kArg1, kNotWide), arg1);
-  LoadConstant(TargetReg(kArg0, kNotWide), arg0);
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperImmRegLocationRegLocation(QuickEntrypointEnum trampoline, int arg0,
-                                                         RegLocation arg1,
-                                                         RegLocation arg2, bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  DCHECK_EQ(static_cast<unsigned int>(arg1.wide), 0U);  // The static_cast works around an
-                                                        // instantiation bug in GCC.
-  LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
-  if (arg2.wide == 0) {
-    LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
-  } else {
-    LoadValueDirectWideFixed(arg2, TargetReg(kArg2, kWide));
-  }
-  LoadConstant(TargetReg(kArg0, kNotWide), arg0);
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocation(
-    QuickEntrypointEnum trampoline,
-    RegLocation arg0,
-    RegLocation arg1,
-    RegLocation arg2,
-    bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  LoadValueDirectFixed(arg0, TargetReg(kArg0, arg0));
-  LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
-  LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocationRegLocation(
-    QuickEntrypointEnum trampoline, RegLocation arg0, RegLocation arg1, RegLocation arg2,
-    RegLocation arg3, bool safepoint_pc) {
-  RegStorage r_tgt = CallHelperSetup(trampoline);
-  LoadValueDirectFixed(arg0, TargetReg(kArg0, arg0));
-  LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
-  LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
-  LoadValueDirectFixed(arg3, TargetReg(kArg3, arg3));
-  ClobberCallerSave();
-  CallHelper(r_tgt, trampoline, safepoint_pc);
-}
-
-/*
- * If there are any ins passed in registers that have not been promoted
- * to a callee-save register, flush them to the frame.  Perform initial
- * assignment of promoted arguments.
- *
- * ArgLocs is an array of location records describing the incoming arguments
- * with one location record per word of argument.
- */
-// TODO: Support 64-bit argument registers.
-void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
-  /*
-   * Dummy up a RegLocation for the incoming ArtMethod*
-   * It will attempt to keep kArg0 live (or copy it to home location
-   * if promoted).
-   */
-  RegLocation rl_src = rl_method;
-  rl_src.location = kLocPhysReg;
-  rl_src.reg = TargetReg(kArg0, kRef);
-  rl_src.home = false;
-  MarkLive(rl_src);
-  if (cu_->target64) {
-    DCHECK(rl_method.wide);
-    StoreValueWide(rl_method, rl_src);
-  } else {
-    StoreValue(rl_method, rl_src);
-  }
-  // If Method* has been promoted, explicitly flush
-  if (rl_method.location == kLocPhysReg) {
-    StoreBaseDisp(TargetPtrReg(kSp), 0, rl_src.reg, kWord, kNotVolatile);
-  }
-
-  if (mir_graph_->GetNumOfInVRs() == 0) {
-    return;
-  }
-
-  int start_vreg = mir_graph_->GetFirstInVR();
-  /*
-   * Copy incoming arguments to their proper home locations.
-   * NOTE: an older version of dx had an issue in which
-   * it would reuse static method argument registers.
-   * This could result in the same Dalvik virtual register
-   * being promoted to both core and fp regs. To account for this,
-   * we only copy to the corresponding promoted physical register
-   * if it matches the type of the SSA name for the incoming
-   * argument.  It is also possible that long and double arguments
-   * end up half-promoted.  In those cases, we must flush the promoted
-   * half to memory as well.
-   */
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  RegLocation* t_loc = nullptr;
-  EnsureInitializedArgMappingToPhysicalReg();
-  for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i += t_loc->wide ? 2 : 1) {
-    // get reg corresponding to input
-    RegStorage reg = in_to_reg_storage_mapping_.GetReg(i);
-    t_loc = &ArgLocs[i];
-
-    // If the wide input appeared as single, flush it and go
-    // as it comes from memory.
-    if (t_loc->wide && reg.Valid() && !reg.Is64Bit()) {
-      // The memory already holds the half. Don't do anything.
-      reg = RegStorage::InvalidReg();
-    }
-
-    if (reg.Valid()) {
-      // If arriving in register.
-
-      // We have already updated the arg location with promoted info
-      // so we can be based on it.
-      if (t_loc->location == kLocPhysReg) {
-        // Just copy it.
-        if (t_loc->wide) {
-          OpRegCopyWide(t_loc->reg, reg);
-        } else {
-          OpRegCopy(t_loc->reg, reg);
-        }
-      } else {
-        // Needs flush.
-        int offset = SRegOffset(start_vreg + i);
-        if (t_loc->ref) {
-          StoreRefDisp(TargetPtrReg(kSp), offset, reg, kNotVolatile);
-        } else {
-          StoreBaseDisp(TargetPtrReg(kSp), offset, reg, t_loc->wide ? k64 : k32, kNotVolatile);
-        }
-      }
-    } else {
-      // If arriving in frame & promoted.
-      if (t_loc->location == kLocPhysReg) {
-        int offset = SRegOffset(start_vreg + i);
-        if (t_loc->ref) {
-          LoadRefDisp(TargetPtrReg(kSp), offset, t_loc->reg, kNotVolatile);
-        } else {
-          LoadBaseDisp(TargetPtrReg(kSp), offset, t_loc->reg, t_loc->wide ? k64 : k32,
-                       kNotVolatile);
-        }
-      }
-    }
-  }
-}
-
-static void CommonCallCodeLoadThisIntoArg1(const CallInfo* info, Mir2Lir* cg) {
-  RegLocation rl_arg = info->args[0];
-  cg->LoadValueDirectFixed(rl_arg, cg->TargetReg(kArg1, kRef));
-}
-
-static void CommonCallCodeLoadClassIntoArg0(const CallInfo* info, Mir2Lir* cg) {
-  cg->GenNullCheck(cg->TargetReg(kArg1, kRef), info->opt_flags);
-  // get this->klass_ [use kArg1, set kArg0]
-  cg->LoadRefDisp(cg->TargetReg(kArg1, kRef), mirror::Object::ClassOffset().Int32Value(),
-                  cg->TargetReg(kArg0, kRef),
-                  kNotVolatile);
-  cg->MarkPossibleNullPointerException(info->opt_flags);
-}
-
-static bool CommonCallCodeLoadCodePointerIntoInvokeTgt(const RegStorage* alt_from,
-                                                       const CompilationUnit* cu, Mir2Lir* cg) {
-  if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
-    int32_t offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-        InstructionSetPointerSize(cu->instruction_set)).Int32Value();
-    // Get the compiled code address [use *alt_from or kArg0, set kInvokeTgt]
-    cg->LoadWordDisp(alt_from == nullptr ? cg->TargetReg(kArg0, kRef) : *alt_from, offset,
-                     cg->TargetPtrReg(kInvokeTgt));
-    return true;
-  }
-  return false;
-}
-
-/*
- * Bit of a hack here - in the absence of a real scheduling pass,
- * emit the next instruction in a virtual invoke sequence.
- * We can use kLr as a temp prior to target address loading
- * Note also that we'll load the first argument ("this") into
- * kArg1 here rather than the standard GenDalvikArgs.
- */
-static int NextVCallInsn(CompilationUnit* cu, CallInfo* info,
-                         int state, const MethodReference& target_method ATTRIBUTE_UNUSED,
-                         uint32_t method_idx, uintptr_t, uintptr_t,
-                         InvokeType) {
-  Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
-  /*
-   * This is the fast path in which the target virtual method is
-   * fully resolved at compile time.
-   */
-  switch (state) {
-    case 0:
-      CommonCallCodeLoadThisIntoArg1(info, cg);   // kArg1 := this
-      break;
-    case 1:
-      CommonCallCodeLoadClassIntoArg0(info, cg);  // kArg0 := kArg1->class
-                                                  // Includes a null-check.
-      break;
-    case 2: {
-      // Get this->klass_.embedded_vtable[method_idx] [usr kArg0, set kArg0]
-      const size_t pointer_size = InstructionSetPointerSize(
-          cu->compiler_driver->GetInstructionSet());
-      int32_t offset = mirror::Class::EmbeddedVTableEntryOffset(
-          method_idx, pointer_size).Uint32Value();
-      // Load target method from embedded vtable to kArg0 [use kArg0, set kArg0]
-      cg->LoadWordDisp(cg->TargetPtrReg(kArg0), offset, cg->TargetPtrReg(kArg0));
-      break;
-    }
-    case 3:
-      if (CommonCallCodeLoadCodePointerIntoInvokeTgt(nullptr, cu, cg)) {
-        break;                                    // kInvokeTgt := kArg0->entrypoint
-      }
-      DCHECK(cu->instruction_set == kX86 || cu->instruction_set == kX86_64);
-      FALLTHROUGH_INTENDED;
-    default:
-      return -1;
-  }
-  return state + 1;
-}
-
-/*
- * Emit the next instruction in an invoke interface sequence. This will do a lookup in the
- * class's IMT, calling either the actual method or art_quick_imt_conflict_trampoline if
- * more than one interface method map to the same index. Note also that we'll load the first
- * argument ("this") into kArg1 here rather than the standard GenDalvikArgs.
- */
-static int NextInterfaceCallInsn(CompilationUnit* cu, CallInfo* info, int state,
-                                 const MethodReference& target_method,
-                                 uint32_t method_idx, uintptr_t, uintptr_t, InvokeType) {
-  Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
-
-  switch (state) {
-    case 0:  // Set target method index in case of conflict [set kHiddenArg, kHiddenFpArg (x86)]
-      CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
-      cg->LoadConstant(cg->TargetReg(kHiddenArg, kNotWide), target_method.dex_method_index);
-      if (cu->instruction_set == kX86) {
-        cg->OpRegCopy(cg->TargetReg(kHiddenFpArg, kNotWide), cg->TargetReg(kHiddenArg, kNotWide));
-      }
-      break;
-    case 1:
-      CommonCallCodeLoadThisIntoArg1(info, cg);   // kArg1 := this
-      break;
-    case 2:
-      CommonCallCodeLoadClassIntoArg0(info, cg);  // kArg0 := kArg1->class
-                                                  // Includes a null-check.
-      break;
-    case 3: {  // Get target method [use kInvokeTgt, set kArg0]
-      const size_t pointer_size = InstructionSetPointerSize(
-          cu->compiler_driver->GetInstructionSet());
-      int32_t offset = mirror::Class::EmbeddedImTableEntryOffset(
-          method_idx % mirror::Class::kImtSize, pointer_size).Uint32Value();
-      // Load target method from embedded imtable to kArg0 [use kArg0, set kArg0]
-      cg->LoadWordDisp(cg->TargetPtrReg(kArg0), offset, cg->TargetPtrReg(kArg0));
-      break;
-    }
-    case 4:
-      if (CommonCallCodeLoadCodePointerIntoInvokeTgt(nullptr, cu, cg)) {
-        break;                                    // kInvokeTgt := kArg0->entrypoint
-      }
-      DCHECK(cu->instruction_set == kX86 || cu->instruction_set == kX86_64);
-      FALLTHROUGH_INTENDED;
-    default:
-      return -1;
-  }
-  return state + 1;
-}
-
-static int NextInvokeInsnSP(CompilationUnit* cu,
-                            CallInfo* info ATTRIBUTE_UNUSED,
-                            QuickEntrypointEnum trampoline,
-                            int state,
-                            const MethodReference& target_method,
-                            uint32_t method_idx ATTRIBUTE_UNUSED) {
-  Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
-
-  /*
-   * This handles the case in which the base method is not fully
-   * resolved at compile time, we bail to a runtime helper.
-   */
-  if (state == 0) {
-    if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
-      // Load trampoline target
-      int32_t disp;
-      if (cu->target64) {
-        disp = GetThreadOffset<8>(trampoline).Int32Value();
-      } else {
-        disp = GetThreadOffset<4>(trampoline).Int32Value();
-      }
-      cg->LoadWordDisp(cg->TargetPtrReg(kSelf), disp, cg->TargetPtrReg(kInvokeTgt));
-    }
-    // Load kArg0 with method index
-    CHECK_EQ(cu->dex_file, target_method.dex_file);
-    cg->LoadConstant(cg->TargetReg(kArg0, kNotWide), target_method.dex_method_index);
-    return 1;
-  }
-  return -1;
-}
-
-static int NextStaticCallInsnSP(CompilationUnit* cu, CallInfo* info,
-                                int state,
-                                const MethodReference& target_method,
-                                uint32_t, uintptr_t, uintptr_t, InvokeType) {
-  return NextInvokeInsnSP(cu, info, kQuickInvokeStaticTrampolineWithAccessCheck, state,
-                          target_method, 0);
-}
-
-static int NextDirectCallInsnSP(CompilationUnit* cu, CallInfo* info, int state,
-                                const MethodReference& target_method,
-                                uint32_t, uintptr_t, uintptr_t, InvokeType) {
-  return NextInvokeInsnSP(cu, info, kQuickInvokeDirectTrampolineWithAccessCheck, state,
-                          target_method, 0);
-}
-
-static int NextSuperCallInsnSP(CompilationUnit* cu, CallInfo* info, int state,
-                               const MethodReference& target_method,
-                               uint32_t, uintptr_t, uintptr_t, InvokeType) {
-  return NextInvokeInsnSP(cu, info, kQuickInvokeSuperTrampolineWithAccessCheck, state,
-                          target_method, 0);
-}
-
-static int NextVCallInsnSP(CompilationUnit* cu, CallInfo* info, int state,
-                           const MethodReference& target_method,
-                           uint32_t, uintptr_t, uintptr_t, InvokeType) {
-  return NextInvokeInsnSP(cu, info, kQuickInvokeVirtualTrampolineWithAccessCheck, state,
-                          target_method, 0);
-}
-
-static int NextInterfaceCallInsnWithAccessCheck(CompilationUnit* cu,
-                                                CallInfo* info, int state,
-                                                const MethodReference& target_method,
-                                                uint32_t, uintptr_t, uintptr_t, InvokeType) {
-  return NextInvokeInsnSP(cu, info, kQuickInvokeInterfaceTrampolineWithAccessCheck, state,
-                          target_method, 0);
-}
-
-// Default implementation of implicit null pointer check.
-// Overridden by arch specific as necessary.
-void Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
-  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
-    return;
-  }
-  RegStorage tmp = AllocTemp();
-  Load32Disp(reg, 0, tmp);
-  MarkPossibleNullPointerException(opt_flags);
-  FreeTemp(tmp);
-}
-
-/**
- * @brief Used to flush promoted registers if they are used as argument
- * in an invocation.
- * @param info the infromation about arguments for invocation.
- * @param start the first argument we should start to look from.
- */
-void Mir2Lir::GenDalvikArgsFlushPromoted(CallInfo* info, int start) {
-  if (cu_->disable_opt & (1 << kPromoteRegs)) {
-    // This make sense only if promotion is enabled.
-    return;
-  }
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  // Scan the rest of the args - if in phys_reg flush to memory
-  for (size_t next_arg = start; next_arg < info->num_arg_words;) {
-    RegLocation loc = info->args[next_arg];
-    if (loc.wide) {
-      loc = UpdateLocWide(loc);
-      if (loc.location == kLocPhysReg) {
-        StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
-      }
-      next_arg += 2;
-    } else {
-      loc = UpdateLoc(loc);
-      if (loc.location == kLocPhysReg) {
-        if (loc.ref) {
-          StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
-        } else {
-          StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32,
-                        kNotVolatile);
-        }
-      }
-      next_arg++;
-    }
-  }
-}
-
-/**
- * @brief Used to optimize the copying of VRs which are arguments of invocation.
- * Please note that you should flush promoted registers first if you copy.
- * If implementation does copying it may skip several of the first VRs but must copy
- * till the end. Implementation must return the number of skipped VRs
- * (it might be all VRs).
- * @see GenDalvikArgsFlushPromoted
- * @param info the information about arguments for invocation.
- * @param first the first argument we should start to look from.
- * @param count the number of remaining arguments we can handle.
- * @return the number of arguments which we did not handle. Unhandled arguments
- * must be attached to the first one.
- */
-int Mir2Lir::GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) {
-  // call is pretty expensive, let's use it if count is big.
-  if (count > 16) {
-    GenDalvikArgsFlushPromoted(info, first);
-    int start_offset = SRegOffset(info->args[first].s_reg_low);
-    int outs_offset = StackVisitor::GetOutVROffset(first, cu_->instruction_set);
-
-    OpRegRegImm(kOpAdd, TargetReg(kArg0, kRef), TargetPtrReg(kSp), outs_offset);
-    OpRegRegImm(kOpAdd, TargetReg(kArg1, kRef), TargetPtrReg(kSp), start_offset);
-    CallRuntimeHelperRegRegImm(kQuickMemcpy, TargetReg(kArg0, kRef), TargetReg(kArg1, kRef),
-                               count * 4, false);
-    count = 0;
-  }
-  return count;
-}
-
-int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state,
-                           LIR** pcrLabel, NextCallInsn next_call_insn,
-                           const MethodReference& target_method,
-                           uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
-                           InvokeType type, bool skip_this) {
-  // If no arguments, just return.
-  if (info->num_arg_words == 0u)
-    return call_state;
-
-  const size_t start_index = skip_this ? 1 : 0;
-
-  // Get architecture dependent mapping between output VRs and physical registers
-  // basing on shorty of method to call.
-  InToRegStorageMapping in_to_reg_storage_mapping(arena_);
-  {
-    const char* target_shorty = mir_graph_->GetShortyFromMethodReference(target_method);
-    ShortyIterator shorty_iterator(target_shorty, type == kStatic);
-    in_to_reg_storage_mapping.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper());
-  }
-
-  size_t stack_map_start = std::max(in_to_reg_storage_mapping.GetEndMappedIn(), start_index);
-  if ((stack_map_start < info->num_arg_words) && info->args[stack_map_start].high_word) {
-    // It is possible that the last mapped reg is 32 bit while arg is 64-bit.
-    // It will be handled together with low part mapped to register.
-    stack_map_start++;
-  }
-  size_t regs_left_to_pass_via_stack = info->num_arg_words - stack_map_start;
-
-  // If it is a range case we can try to copy remaining VRs (not mapped to physical registers)
-  // using more optimal algorithm.
-  if (info->is_range && regs_left_to_pass_via_stack > 1) {
-    regs_left_to_pass_via_stack = GenDalvikArgsBulkCopy(info, stack_map_start,
-                                                        regs_left_to_pass_via_stack);
-  }
-
-  // Now handle any remaining VRs mapped to stack.
-  if (in_to_reg_storage_mapping.HasArgumentsOnStack()) {
-    // Two temps but do not use kArg1, it might be this which we can skip.
-    // Separate single and wide - it can give some advantage.
-    RegStorage regRef = TargetReg(kArg3, kRef);
-    RegStorage regSingle = TargetReg(kArg3, kNotWide);
-    RegStorage regWide = TargetReg(kArg2, kWide);
-    for (size_t i = start_index; i < stack_map_start + regs_left_to_pass_via_stack; i++) {
-      RegLocation rl_arg = info->args[i];
-      rl_arg = UpdateRawLoc(rl_arg);
-      RegStorage reg = in_to_reg_storage_mapping.GetReg(i);
-      if (!reg.Valid()) {
-        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
-        {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (rl_arg.wide) {
-            if (rl_arg.location == kLocPhysReg) {
-              StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
-            } else {
-              LoadValueDirectWideFixed(rl_arg, regWide);
-              StoreBaseDisp(TargetPtrReg(kSp), out_offset, regWide, k64, kNotVolatile);
-            }
-          } else {
-            if (rl_arg.location == kLocPhysReg) {
-              if (rl_arg.ref) {
-                StoreRefDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, kNotVolatile);
-              } else {
-                StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile);
-              }
-            } else {
-              if (rl_arg.ref) {
-                LoadValueDirectFixed(rl_arg, regRef);
-                StoreRefDisp(TargetPtrReg(kSp), out_offset, regRef, kNotVolatile);
-              } else {
-                LoadValueDirectFixed(rl_arg, regSingle);
-                StoreBaseDisp(TargetPtrReg(kSp), out_offset, regSingle, k32, kNotVolatile);
-              }
-            }
-          }
-        }
-        call_state = next_call_insn(cu_, info, call_state, target_method,
-                                    vtable_idx, direct_code, direct_method, type);
-      }
-      if (rl_arg.wide) {
-        i++;
-      }
-    }
-  }
-
-  // Finish with VRs mapped to physical registers.
-  for (size_t i = start_index; i < stack_map_start; i++) {
-    RegLocation rl_arg = info->args[i];
-    rl_arg = UpdateRawLoc(rl_arg);
-    RegStorage reg = in_to_reg_storage_mapping.GetReg(i);
-    if (reg.Valid()) {
-      if (rl_arg.wide) {
-        // if reg is not 64-bit (it is half of 64-bit) then handle it separately.
-        if (!reg.Is64Bit()) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (rl_arg.location == kLocPhysReg) {
-            int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
-            // Dump it to memory.
-            StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
-            LoadBaseDisp(TargetPtrReg(kSp), out_offset, reg, k32, kNotVolatile);
-          } else {
-            int high_offset = StackVisitor::GetOutVROffset(i + 1, cu_->instruction_set);
-            // First, use target reg for high part.
-            LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low + 1), reg, k32,
-                         kNotVolatile);
-            StoreBaseDisp(TargetPtrReg(kSp), high_offset, reg, k32, kNotVolatile);
-            // Now, use target reg for low part.
-            LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low), reg, k32, kNotVolatile);
-            int low_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
-            // And store it to the expected memory location.
-            StoreBaseDisp(TargetPtrReg(kSp), low_offset, reg, k32, kNotVolatile);
-          }
-        } else {
-          LoadValueDirectWideFixed(rl_arg, reg);
-        }
-      } else {
-        LoadValueDirectFixed(rl_arg, reg);
-      }
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-    }
-    if (rl_arg.wide) {
-      i++;
-    }
-  }
-
-  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                           direct_code, direct_method, type);
-  if (pcrLabel) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    } else {
-      *pcrLabel = nullptr;
-      GenImplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    }
-  }
-  return call_state;
-}
-
-void Mir2Lir::EnsureInitializedArgMappingToPhysicalReg() {
-  if (!in_to_reg_storage_mapping_.IsInitialized()) {
-    ShortyIterator shorty_iterator(cu_->shorty, cu_->invoke_type == kStatic);
-    in_to_reg_storage_mapping_.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper());
-  }
-}
-
-RegLocation Mir2Lir::InlineTarget(CallInfo* info) {
-  RegLocation res;
-  if (info->result.location == kLocInvalid) {
-    // If result is unused, return a sink target based on type of invoke target.
-    res = GetReturn(
-        ShortyToRegClass(mir_graph_->GetShortyFromMethodReference(info->method_ref)[0]));
-  } else {
-    res = info->result;
-  }
-  return res;
-}
-
-RegLocation Mir2Lir::InlineTargetWide(CallInfo* info) {
-  RegLocation res;
-  if (info->result.location == kLocInvalid) {
-    // If result is unused, return a sink target based on type of invoke target.
-    res = GetReturnWide(ShortyToRegClass(
-        mir_graph_->GetShortyFromMethodReference(info->method_ref)[0]));
-  } else {
-    res = info->result;
-  }
-  return res;
-}
-
-bool Mir2Lir::GenInlinedReferenceGetReferent(CallInfo* info) {
-  if (cu_->instruction_set == kMips || cu_->instruction_set == kMips64) {
-    // TODO: add Mips and Mips64 implementations.
-    return false;
-  }
-
-  bool use_direct_type_ptr;
-  uintptr_t direct_type_ptr;
-  ClassReference ref;
-  if (!cu_->compiler_driver->CanEmbedReferenceTypeInCode(&ref,
-        &use_direct_type_ptr, &direct_type_ptr)) {
-    return false;
-  }
-
-  RegStorage reg_class = TargetReg(kArg1, kRef);
-  Clobber(reg_class);
-  LockTemp(reg_class);
-  if (use_direct_type_ptr) {
-    LoadConstant(reg_class, direct_type_ptr);
-  } else {
-    uint16_t type_idx = ref.first->GetClassDef(ref.second).class_idx_;
-    LoadClassType(*ref.first, type_idx, kArg1);
-  }
-
-  uint32_t slow_path_flag_offset = cu_->compiler_driver->GetReferenceSlowFlagOffset();
-  uint32_t disable_flag_offset = cu_->compiler_driver->GetReferenceDisableFlagOffset();
-  CHECK(slow_path_flag_offset && disable_flag_offset &&
-        (slow_path_flag_offset != disable_flag_offset));
-
-  // intrinsic logic start.
-  RegLocation rl_obj = info->args[0];
-  rl_obj = LoadValue(rl_obj, kRefReg);
-
-  RegStorage reg_slow_path = AllocTemp();
-  RegStorage reg_disabled = AllocTemp();
-  LoadBaseDisp(reg_class, slow_path_flag_offset, reg_slow_path, kSignedByte, kNotVolatile);
-  LoadBaseDisp(reg_class, disable_flag_offset, reg_disabled, kSignedByte, kNotVolatile);
-  FreeTemp(reg_class);
-  LIR* or_inst = OpRegRegReg(kOpOr, reg_slow_path, reg_slow_path, reg_disabled);
-  FreeTemp(reg_disabled);
-
-  // if slow path, jump to JNI path target
-  LIR* slow_path_branch;
-  if (or_inst->u.m.def_mask->HasBit(ResourceMask::kCCode)) {
-    // Generate conditional branch only, as the OR set a condition state (we are interested in a 'Z' flag).
-    slow_path_branch = OpCondBranch(kCondNe, nullptr);
-  } else {
-    // Generate compare and branch.
-    slow_path_branch = OpCmpImmBranch(kCondNe, reg_slow_path, 0, nullptr);
-  }
-  FreeTemp(reg_slow_path);
-
-  // slow path not enabled, simply load the referent of the reference object
-  RegLocation rl_dest = InlineTarget(info);
-  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
-  GenNullCheck(rl_obj.reg, info->opt_flags);
-  LoadRefDisp(rl_obj.reg, mirror::Reference::ReferentOffset().Int32Value(), rl_result.reg,
-              kNotVolatile);
-  MarkPossibleNullPointerException(info->opt_flags);
-  StoreValue(rl_dest, rl_result);
-
-  LIR* intrinsic_finish = NewLIR0(kPseudoTargetLabel);
-  AddIntrinsicSlowPath(info, slow_path_branch, intrinsic_finish);
-  ClobberCallerSave();  // We must clobber everything because slow path will return here
-  return true;
-}
-
-bool Mir2Lir::GenInlinedCharAt(CallInfo* info) {
-  // Location of char array data
-  int value_offset = mirror::String::ValueOffset().Int32Value();
-  // Location of count
-  int count_offset = mirror::String::CountOffset().Int32Value();
-
-  RegLocation rl_obj = info->args[0];
-  RegLocation rl_idx = info->args[1];
-  rl_obj = LoadValue(rl_obj, kRefReg);
-  rl_idx = LoadValue(rl_idx, kCoreReg);
-  RegStorage reg_max;
-  GenNullCheck(rl_obj.reg, info->opt_flags);
-  bool range_check = (!(info->opt_flags & MIR_IGNORE_RANGE_CHECK));
-  LIR* range_check_branch = nullptr;
-  if (range_check) {
-    reg_max = AllocTemp();
-    Load32Disp(rl_obj.reg, count_offset, reg_max);
-    MarkPossibleNullPointerException(info->opt_flags);
-    // Set up a slow path to allow retry in case of bounds violation
-    OpRegReg(kOpCmp, rl_idx.reg, reg_max);
-    FreeTemp(reg_max);
-    range_check_branch = OpCondBranch(kCondUge, nullptr);
-  }
-  RegStorage reg_ptr = AllocTempRef();
-  OpRegRegImm(kOpAdd, reg_ptr, rl_obj.reg, value_offset);
-  FreeTemp(rl_obj.reg);
-  RegLocation rl_dest = InlineTarget(info);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  LoadBaseIndexed(reg_ptr, rl_idx.reg, rl_result.reg, 1, kUnsignedHalf);
-  FreeTemp(reg_ptr);
-  StoreValue(rl_dest, rl_result);
-  if (range_check) {
-    DCHECK(range_check_branch != nullptr);
-    info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've already null checked.
-    AddIntrinsicSlowPath(info, range_check_branch);
-  }
-  return true;
-}
-
-bool Mir2Lir::GenInlinedStringGetCharsNoCheck(CallInfo* info) {
-  if (cu_->instruction_set == kMips) {
-    // TODO - add Mips implementation
-    return false;
-  }
-  size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar);
-  // Location of data in char array buffer
-  int data_offset = mirror::Array::DataOffset(char_component_size).Int32Value();
-  // Location of char array data in string
-  int value_offset = mirror::String::ValueOffset().Int32Value();
-
-  RegLocation rl_obj = info->args[0];
-  RegLocation rl_start = info->args[1];
-  RegLocation rl_end = info->args[2];
-  RegLocation rl_buffer = info->args[3];
-  RegLocation rl_index = info->args[4];
-
-  ClobberCallerSave();
-  LockCallTemps();  // Using fixed registers
-  RegStorage reg_dst_ptr = TargetReg(kArg0, kRef);
-  RegStorage reg_src_ptr = TargetReg(kArg1, kRef);
-  RegStorage reg_length = TargetReg(kArg2, kNotWide);
-  RegStorage reg_tmp = TargetReg(kArg3, kNotWide);
-  RegStorage reg_tmp_ptr = RegStorage(RegStorage::k64BitSolo, reg_tmp.GetRawBits() & RegStorage::kRegTypeMask);
-
-  LoadValueDirectFixed(rl_buffer, reg_dst_ptr);
-  OpRegImm(kOpAdd, reg_dst_ptr, data_offset);
-  LoadValueDirectFixed(rl_index, reg_tmp);
-  OpRegRegImm(kOpLsl, reg_tmp, reg_tmp, 1);
-  OpRegReg(kOpAdd, reg_dst_ptr, cu_->instruction_set == kArm64 ? reg_tmp_ptr : reg_tmp);
-
-  LoadValueDirectFixed(rl_start, reg_tmp);
-  LoadValueDirectFixed(rl_end, reg_length);
-  OpRegReg(kOpSub, reg_length, reg_tmp);
-  OpRegRegImm(kOpLsl, reg_length, reg_length, 1);
-  LoadValueDirectFixed(rl_obj, reg_src_ptr);
-
-  OpRegImm(kOpAdd, reg_src_ptr, value_offset);
-  OpRegRegImm(kOpLsl, reg_tmp, reg_tmp, 1);
-  OpRegReg(kOpAdd, reg_src_ptr, cu_->instruction_set == kArm64 ? reg_tmp_ptr : reg_tmp);
-
-  RegStorage r_tgt;
-  if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
-    r_tgt = LoadHelper(kQuickMemcpy);
-  } else {
-    r_tgt = RegStorage::InvalidReg();
-  }
-  // NOTE: not a safepoint
-  CallHelper(r_tgt, kQuickMemcpy, false, true);
-
-  return true;
-}
-
-// Generates an inlined String.is_empty or String.length.
-bool Mir2Lir::GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty) {
-  if (cu_->instruction_set == kMips || cu_->instruction_set == kMips64) {
-    // TODO: add Mips and Mips64 implementations.
-    return false;
-  }
-  // dst = src.length();
-  RegLocation rl_obj = info->args[0];
-  rl_obj = LoadValue(rl_obj, kRefReg);
-  RegLocation rl_dest = InlineTarget(info);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  GenNullCheck(rl_obj.reg, info->opt_flags);
-  Load32Disp(rl_obj.reg, mirror::String::CountOffset().Int32Value(), rl_result.reg);
-  MarkPossibleNullPointerException(info->opt_flags);
-  if (is_empty) {
-    // dst = (dst == 0);
-    if (cu_->instruction_set == kThumb2) {
-      RegStorage t_reg = AllocTemp();
-      OpRegReg(kOpNeg, t_reg, rl_result.reg);
-      OpRegRegReg(kOpAdc, rl_result.reg, rl_result.reg, t_reg);
-    } else if (cu_->instruction_set == kArm64) {
-      OpRegImm(kOpSub, rl_result.reg, 1);
-      OpRegRegImm(kOpLsr, rl_result.reg, rl_result.reg, 31);
-    } else {
-      DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
-      OpRegImm(kOpSub, rl_result.reg, 1);
-      OpRegImm(kOpLsr, rl_result.reg, 31);
-    }
-  }
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-bool Mir2Lir::GenInlinedStringFactoryNewStringFromBytes(CallInfo* info) {
-  if (cu_->instruction_set == kMips) {
-    // TODO - add Mips implementation
-    return false;
-  }
-  RegLocation rl_data = info->args[0];
-  RegLocation rl_high = info->args[1];
-  RegLocation rl_offset = info->args[2];
-  RegLocation rl_count = info->args[3];
-  rl_data = LoadValue(rl_data, kRefReg);
-  LIR* data_null_check_branch = OpCmpImmBranch(kCondEq, rl_data.reg, 0, nullptr);
-  AddIntrinsicSlowPath(info, data_null_check_branch);
-  CallRuntimeHelperRegLocationRegLocationRegLocationRegLocation(
-      kQuickAllocStringFromBytes, rl_data, rl_high, rl_offset, rl_count, true);
-  RegLocation rl_return = GetReturn(kRefReg);
-  RegLocation rl_dest = InlineTarget(info);
-  StoreValue(rl_dest, rl_return);
-  return true;
-}
-
-bool Mir2Lir::GenInlinedStringFactoryNewStringFromChars(CallInfo* info) {
-  if (cu_->instruction_set == kMips) {
-    // TODO - add Mips implementation
-    return false;
-  }
-  RegLocation rl_offset = info->args[0];
-  RegLocation rl_count = info->args[1];
-  RegLocation rl_data = info->args[2];
-  CallRuntimeHelperRegLocationRegLocationRegLocation(
-      kQuickAllocStringFromChars, rl_offset, rl_count, rl_data, true);
-  RegLocation rl_return = GetReturn(kRefReg);
-  RegLocation rl_dest = InlineTarget(info);
-  StoreValue(rl_dest, rl_return);
-  return true;
-}
-
-bool Mir2Lir::GenInlinedStringFactoryNewStringFromString(CallInfo* info) {
-  if (cu_->instruction_set == kMips) {
-    // TODO - add Mips implementation
-    return false;
-  }
-  RegLocation rl_string = info->args[0];
-  rl_string = LoadValue(rl_string, kRefReg);
-  LIR* string_null_check_branch = OpCmpImmBranch(kCondEq, rl_string.reg, 0, nullptr);
-  AddIntrinsicSlowPath(info, string_null_check_branch);
-  CallRuntimeHelperRegLocation(kQuickAllocStringFromString, rl_string, true);
-  RegLocation rl_return = GetReturn(kRefReg);
-  RegLocation rl_dest = InlineTarget(info);
-  StoreValue(rl_dest, rl_return);
-  return true;
-}
-
-bool Mir2Lir::GenInlinedReverseBytes(CallInfo* info, OpSize size) {
-  if (cu_->instruction_set == kMips || cu_->instruction_set == kMips64) {
-    // TODO: add Mips and Mips64 implementations.
-    return false;
-  }
-  RegLocation rl_dest = IsWide(size) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
-  if (rl_dest.s_reg_low == INVALID_SREG) {
-    // Result is unused, the code is dead. Inlining successful, no code generated.
-    return true;
-  }
-  RegLocation rl_src_i = info->args[0];
-  RegLocation rl_i = IsWide(size) ? LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (IsWide(size)) {
-    if (cu_->instruction_set == kArm64 || cu_->instruction_set == kX86_64) {
-      OpRegReg(kOpRev, rl_result.reg, rl_i.reg);
-      StoreValueWide(rl_dest, rl_result);
-      return true;
-    }
-    RegStorage r_i_low = rl_i.reg.GetLow();
-    if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
-      // First REV shall clobber rl_result.reg.GetReg(), save the value in a temp for the second REV.
-      r_i_low = AllocTemp();
-      OpRegCopy(r_i_low, rl_i.reg);
-    }
-    OpRegReg(kOpRev, rl_result.reg.GetLow(), rl_i.reg.GetHigh());
-    OpRegReg(kOpRev, rl_result.reg.GetHigh(), r_i_low);
-    if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
-      FreeTemp(r_i_low);
-    }
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    DCHECK(size == k32 || size == kSignedHalf);
-    OpKind op = (size == k32) ? kOpRev : kOpRevsh;
-    OpRegReg(op, rl_result.reg, rl_i.reg);
-    StoreValue(rl_dest, rl_result);
-  }
-  return true;
-}
-
-bool Mir2Lir::GenInlinedAbsInt(CallInfo* info) {
-  RegLocation rl_dest = InlineTarget(info);
-  if (rl_dest.s_reg_low == INVALID_SREG) {
-    // Result is unused, the code is dead. Inlining successful, no code generated.
-    return true;
-  }
-  RegLocation rl_src = info->args[0];
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage sign_reg = AllocTemp();
-  // abs(x) = y<=x>>31, (x+y)^y.
-  OpRegRegImm(kOpAsr, sign_reg, rl_src.reg, 31);
-  OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, sign_reg);
-  OpRegReg(kOpXor, rl_result.reg, sign_reg);
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-bool Mir2Lir::GenInlinedAbsLong(CallInfo* info) {
-  RegLocation rl_dest = InlineTargetWide(info);
-  if (rl_dest.s_reg_low == INVALID_SREG) {
-    // Result is unused, the code is dead. Inlining successful, no code generated.
-    return true;
-  }
-  RegLocation rl_src = info->args[0];
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-
-  // If on x86 or if we would clobber a register needed later, just copy the source first.
-  if (cu_->instruction_set != kX86_64 &&
-      (cu_->instruction_set == kX86 ||
-       rl_result.reg.GetLowReg() == rl_src.reg.GetHighReg())) {
-    OpRegCopyWide(rl_result.reg, rl_src.reg);
-    if (rl_result.reg.GetLowReg() != rl_src.reg.GetLowReg() &&
-        rl_result.reg.GetLowReg() != rl_src.reg.GetHighReg() &&
-        rl_result.reg.GetHighReg() != rl_src.reg.GetLowReg() &&
-        rl_result.reg.GetHighReg() != rl_src.reg.GetHighReg()) {
-      // Reuse source registers to avoid running out of temps.
-      FreeTemp(rl_src.reg);
-    }
-    rl_src = rl_result;
-  }
-
-  // abs(x) = y<=x>>31, (x+y)^y.
-  RegStorage sign_reg;
-  if (cu_->instruction_set == kX86_64) {
-    sign_reg = AllocTempWide();
-    OpRegRegImm(kOpAsr, sign_reg, rl_src.reg, 63);
-    OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, sign_reg);
-    OpRegReg(kOpXor, rl_result.reg, sign_reg);
-  } else {
-    sign_reg = AllocTemp();
-    OpRegRegImm(kOpAsr, sign_reg, rl_src.reg.GetHigh(), 31);
-    OpRegRegReg(kOpAdd, rl_result.reg.GetLow(), rl_src.reg.GetLow(), sign_reg);
-    OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), sign_reg);
-    OpRegReg(kOpXor, rl_result.reg.GetLow(), sign_reg);
-    OpRegReg(kOpXor, rl_result.reg.GetHigh(), sign_reg);
-  }
-  FreeTemp(sign_reg);
-  StoreValueWide(rl_dest, rl_result);
-  return true;
-}
-
-bool Mir2Lir::GenInlinedReverseBits(CallInfo* info ATTRIBUTE_UNUSED, OpSize size ATTRIBUTE_UNUSED) {
-  // Currently implemented only for ARM64.
-  return false;
-}
-
-bool Mir2Lir::GenInlinedMinMaxFP(CallInfo* info ATTRIBUTE_UNUSED,
-                                 bool is_min ATTRIBUTE_UNUSED,
-                                 bool is_double ATTRIBUTE_UNUSED) {
-  // Currently implemented only for ARM64.
-  return false;
-}
-
-bool Mir2Lir::GenInlinedCeil(CallInfo* info ATTRIBUTE_UNUSED) {
-  return false;
-}
-
-bool Mir2Lir::GenInlinedFloor(CallInfo* info ATTRIBUTE_UNUSED) {
-  return false;
-}
-
-bool Mir2Lir::GenInlinedRint(CallInfo* info ATTRIBUTE_UNUSED) {
-  return false;
-}
-
-bool Mir2Lir::GenInlinedRound(CallInfo* info ATTRIBUTE_UNUSED, bool is_double ATTRIBUTE_UNUSED) {
-  return false;
-}
-
-bool Mir2Lir::GenInlinedFloatCvt(CallInfo* info) {
-  if (cu_->instruction_set == kMips || cu_->instruction_set == kMips64) {
-    // TODO: add Mips and Mips64 implementations.
-    return false;
-  }
-  RegLocation rl_dest = InlineTarget(info);
-  if (rl_dest.s_reg_low == INVALID_SREG) {
-    // Result is unused, the code is dead. Inlining successful, no code generated.
-    return true;
-  }
-  RegLocation rl_src = info->args[0];
-  StoreValue(rl_dest, rl_src);
-  return true;
-}
-
-bool Mir2Lir::GenInlinedDoubleCvt(CallInfo* info) {
-  if (cu_->instruction_set == kMips || cu_->instruction_set == kMips64) {
-    // TODO: add Mips and Mips64 implementations.
-    return false;
-  }
-  RegLocation rl_dest = InlineTargetWide(info);
-  if (rl_dest.s_reg_low == INVALID_SREG) {
-    // Result is unused, the code is dead. Inlining successful, no code generated.
-    return true;
-  }
-  RegLocation rl_src = info->args[0];
-  StoreValueWide(rl_dest, rl_src);
-  return true;
-}
-
-bool Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info ATTRIBUTE_UNUSED) {
-  return false;
-}
-
-
-/*
- * Fast String.indexOf(I) & (II).  Tests for simple case of char <= 0xFFFF,
- * otherwise bails to standard library code.
- */
-bool Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
-  RegLocation rl_obj = info->args[0];
-  RegLocation rl_char = info->args[1];
-  if (rl_char.is_const && (mir_graph_->ConstantValue(rl_char) & ~0xFFFF) != 0) {
-    // Code point beyond 0xFFFF. Punt to the real String.indexOf().
-    return false;
-  }
-
-  ClobberCallerSave();
-  LockCallTemps();  // Using fixed registers
-  RegStorage reg_ptr = TargetReg(kArg0, kRef);
-  RegStorage reg_char = TargetReg(kArg1, kNotWide);
-  RegStorage reg_start = TargetReg(kArg2, kNotWide);
-
-  LoadValueDirectFixed(rl_obj, reg_ptr);
-  LoadValueDirectFixed(rl_char, reg_char);
-  if (zero_based) {
-    LoadConstant(reg_start, 0);
-  } else {
-    RegLocation rl_start = info->args[2];     // 3rd arg only present in III flavor of IndexOf.
-    LoadValueDirectFixed(rl_start, reg_start);
-  }
-  RegStorage r_tgt = LoadHelper(kQuickIndexOf);
-  GenExplicitNullCheck(reg_ptr, info->opt_flags);
-  LIR* high_code_point_branch =
-      rl_char.is_const ? nullptr : OpCmpImmBranch(kCondGt, reg_char, 0xFFFF, nullptr);
-  // NOTE: not a safepoint
-  OpReg(kOpBlx, r_tgt);
-  if (!rl_char.is_const) {
-    // Add the slow path for code points beyond 0xFFFF.
-    DCHECK(high_code_point_branch != nullptr);
-    LIR* resume_tgt = NewLIR0(kPseudoTargetLabel);
-    info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
-    AddIntrinsicSlowPath(info, high_code_point_branch, resume_tgt);
-    ClobberCallerSave();  // We must clobber everything because slow path will return here
-  } else {
-    DCHECK_EQ(mir_graph_->ConstantValue(rl_char) & ~0xFFFF, 0);
-    DCHECK(high_code_point_branch == nullptr);
-  }
-  RegLocation rl_return = GetReturn(kCoreReg);
-  RegLocation rl_dest = InlineTarget(info);
-  StoreValue(rl_dest, rl_return);
-  return true;
-}
-
-/* Fast string.compareTo(Ljava/lang/string;)I. */
-bool Mir2Lir::GenInlinedStringCompareTo(CallInfo* info) {
-  if (cu_->instruction_set == kMips || cu_->instruction_set == kMips64) {
-    // TODO: add Mips and Mips64 implementations.
-    return false;
-  }
-  ClobberCallerSave();
-  LockCallTemps();  // Using fixed registers
-  RegStorage reg_this = TargetReg(kArg0, kRef);
-  RegStorage reg_cmp = TargetReg(kArg1, kRef);
-
-  RegLocation rl_this = info->args[0];
-  RegLocation rl_cmp = info->args[1];
-  LoadValueDirectFixed(rl_this, reg_this);
-  LoadValueDirectFixed(rl_cmp, reg_cmp);
-  RegStorage r_tgt;
-  if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
-    r_tgt = LoadHelper(kQuickStringCompareTo);
-  } else {
-    r_tgt = RegStorage::InvalidReg();
-  }
-  GenExplicitNullCheck(reg_this, info->opt_flags);
-  info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
-  // TUNING: check if rl_cmp.s_reg_low is already null checked
-  LIR* cmp_null_check_branch = OpCmpImmBranch(kCondEq, reg_cmp, 0, nullptr);
-  AddIntrinsicSlowPath(info, cmp_null_check_branch);
-  // NOTE: not a safepoint
-  CallHelper(r_tgt, kQuickStringCompareTo, false, true);
-  RegLocation rl_return = GetReturn(kCoreReg);
-  RegLocation rl_dest = InlineTarget(info);
-  StoreValue(rl_dest, rl_return);
-  return true;
-}
-
-bool Mir2Lir::GenInlinedCurrentThread(CallInfo* info) {
-  RegLocation rl_dest = InlineTarget(info);
-
-  // Early exit if the result is unused.
-  if (rl_dest.orig_sreg < 0) {
-    return true;
-  }
-
-  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
-
-  if (cu_->target64) {
-    LoadRefDisp(TargetPtrReg(kSelf), Thread::PeerOffset<8>().Int32Value(), rl_result.reg,
-                kNotVolatile);
-  } else {
-    Load32Disp(TargetPtrReg(kSelf), Thread::PeerOffset<4>().Int32Value(), rl_result.reg);
-  }
-
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-bool Mir2Lir::GenInlinedUnsafeGet(CallInfo* info,
-                                  bool is_long, bool is_object, bool is_volatile) {
-  if (cu_->instruction_set == kMips || cu_->instruction_set == kMips64) {
-    // TODO: add Mips and Mips64 implementations.
-    return false;
-  }
-  // Unused - RegLocation rl_src_unsafe = info->args[0];
-  RegLocation rl_src_obj = info->args[1];  // Object
-  RegLocation rl_src_offset = info->args[2];  // long low
-  rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
-  RegLocation rl_dest = is_long ? InlineTargetWide(info) : InlineTarget(info);  // result reg
-
-  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
-  RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, is_object ? kRefReg : kCoreReg, true);
-  if (is_long) {
-    if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64
-        || cu_->instruction_set == kArm64) {
-      LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0, k64);
-    } else {
-      RegStorage rl_temp_offset = AllocTemp();
-      OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg);
-      LoadBaseDisp(rl_temp_offset, 0, rl_result.reg, k64, kNotVolatile);
-      FreeTemp(rl_temp_offset);
-    }
-  } else {
-    if (rl_result.ref) {
-      LoadRefIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0);
-    } else {
-      LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0, k32);
-    }
-  }
-
-  if (is_volatile) {
-    GenMemBarrier(kLoadAny);
-  }
-
-  if (is_long) {
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    StoreValue(rl_dest, rl_result);
-  }
-  return true;
-}
-
-bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long,
-                                  bool is_object, bool is_volatile, bool is_ordered) {
-  if (cu_->instruction_set == kMips || cu_->instruction_set == kMips64) {
-    // TODO: add Mips and Mips64 implementations.
-    return false;
-  }
-  // Unused - RegLocation rl_src_unsafe = info->args[0];
-  RegLocation rl_src_obj = info->args[1];  // Object
-  RegLocation rl_src_offset = info->args[2];  // long low
-  rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
-  RegLocation rl_src_value = info->args[4];  // value to store
-  if (is_volatile || is_ordered) {
-    GenMemBarrier(kAnyStore);
-  }
-  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
-  RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
-  RegLocation rl_value;
-  if (is_long) {
-    rl_value = LoadValueWide(rl_src_value, kCoreReg);
-    if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64
-        || cu_->instruction_set == kArm64) {
-      StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k64);
-    } else {
-      RegStorage rl_temp_offset = AllocTemp();
-      OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg);
-      StoreBaseDisp(rl_temp_offset, 0, rl_value.reg, k64, kNotVolatile);
-      FreeTemp(rl_temp_offset);
-    }
-  } else {
-    rl_value = LoadValue(rl_src_value, is_object ? kRefReg : kCoreReg);
-    if (rl_value.ref) {
-      StoreRefIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0);
-    } else {
-      StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k32);
-    }
-  }
-
-  // Free up the temp early, to ensure x86 doesn't run out of temporaries in MarkGCCard.
-  FreeTemp(rl_offset.reg);
-
-  if (is_volatile) {
-    // Prevent reordering with a subsequent volatile load.
-    // May also be needed to address store atomicity issues.
-    GenMemBarrier(kAnyAny);
-  }
-  if (is_object) {
-    MarkGCCard(0, rl_value.reg, rl_object.reg);
-  }
-  return true;
-}
-
-void Mir2Lir::GenInvoke(CallInfo* info) {
-  DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
-  if (mir_graph_->GetMethodLoweringInfo(info->mir).IsIntrinsic()) {
-    const DexFile* dex_file = info->method_ref.dex_file;
-    auto* inliner = cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(dex_file);
-    if (inliner->GenIntrinsic(this, info)) {
-      return;
-    }
-  }
-  GenInvokeNoInline(info);
-}
-
-void Mir2Lir::GenInvokeNoInline(CallInfo* info) {
-  int call_state = 0;
-  LIR* null_ck;
-  LIR** p_null_ck = nullptr;
-  NextCallInsn next_call_insn;
-  FlushAllRegs();  /* Everything to home location */
-  // Explicit register usage
-  LockCallTemps();
-
-  const MirMethodLoweringInfo& method_info = mir_graph_->GetMethodLoweringInfo(info->mir);
-  MethodReference target_method = method_info.GetTargetMethod();
-  cu_->compiler_driver->ProcessedInvoke(method_info.GetInvokeType(), method_info.StatsFlags());
-  InvokeType original_type = static_cast<InvokeType>(method_info.GetInvokeType());
-  info->type = method_info.GetSharpType();
-  bool is_string_init = false;
-  if (method_info.IsSpecial()) {
-    DexFileMethodInliner* inliner = cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(
-        target_method.dex_file);
-    if (inliner->IsStringInitMethodIndex(target_method.dex_method_index)) {
-      is_string_init = true;
-      size_t pointer_size = GetInstructionSetPointerSize(cu_->instruction_set);
-      info->string_init_offset = inliner->GetOffsetForStringInit(target_method.dex_method_index,
-                                                                 pointer_size);
-      info->type = kStatic;
-    }
-  }
-  bool fast_path = method_info.FastPath();
-  bool skip_this;
-
-  if (info->type == kInterface) {
-    next_call_insn = fast_path ? NextInterfaceCallInsn : NextInterfaceCallInsnWithAccessCheck;
-    skip_this = fast_path;
-  } else if (info->type == kDirect) {
-    if (fast_path) {
-      p_null_ck = &null_ck;
-    }
-    next_call_insn = fast_path ? GetNextSDCallInsn() : NextDirectCallInsnSP;
-    skip_this = false;
-  } else if (info->type == kStatic) {
-    next_call_insn = fast_path ? GetNextSDCallInsn() : NextStaticCallInsnSP;
-    skip_this = false;
-  } else if (info->type == kSuper) {
-    DCHECK(!fast_path);  // Fast path is a direct call.
-    next_call_insn = NextSuperCallInsnSP;
-    skip_this = false;
-  } else {
-    DCHECK_EQ(info->type, kVirtual);
-    next_call_insn = fast_path ? NextVCallInsn : NextVCallInsnSP;
-    skip_this = fast_path;
-  }
-  call_state = GenDalvikArgs(info, call_state, p_null_ck,
-                             next_call_insn, target_method, method_info.VTableIndex(),
-                             method_info.DirectCode(), method_info.DirectMethod(),
-                             original_type, skip_this);
-  // Finish up any of the call sequence not interleaved in arg loading
-  while (call_state >= 0) {
-    call_state = next_call_insn(cu_, info, call_state, target_method, method_info.VTableIndex(),
-                                method_info.DirectCode(), method_info.DirectMethod(),
-                                original_type);
-  }
-  LIR* call_insn = GenCallInsn(method_info);
-  MarkSafepointPC(call_insn);
-
-  FreeCallTemps();
-  if (info->result.location != kLocInvalid) {
-    // We have a following MOVE_RESULT - do it now.
-    RegisterClass reg_class = is_string_init ? kRefReg :
-        ShortyToRegClass(mir_graph_->GetShortyFromMethodReference(info->method_ref)[0]);
-    if (info->result.wide) {
-      RegLocation ret_loc = GetReturnWide(reg_class);
-      StoreValueWide(info->result, ret_loc);
-    } else {
-      RegLocation ret_loc = GetReturn(reg_class);
-      StoreValue(info->result, ret_loc);
-    }
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
deleted file mode 100644
index 3f89001..0000000
--- a/compiler/dex/quick/gen_loadstore.cc
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "mir_to_lir-inl.h"
-
-#include "dex/compiler_ir.h"
-#include "dex/mir_graph.h"
-#include "invoke_type.h"
-
-namespace art {
-
-/* This file contains target-independent codegen and support. */
-
-/*
- * Load an immediate value into a fixed or temp register.  Target
- * register is clobbered, and marked in_use.
- */
-LIR* Mir2Lir::LoadConstant(RegStorage r_dest, int value) {
-  if (IsTemp(r_dest)) {
-    Clobber(r_dest);
-    MarkInUse(r_dest);
-  }
-  return LoadConstantNoClobber(r_dest, value);
-}
-
-/*
- * Load a Dalvik register into a physical register.  Take care when
- * using this routine, as it doesn't perform any bookkeeping regarding
- * register liveness.  That is the responsibility of the caller.
- */
-void Mir2Lir::LoadValueDirect(RegLocation rl_src, RegStorage r_dest) {
-  rl_src = rl_src.wide ? UpdateLocWide(rl_src) : UpdateLoc(rl_src);
-  if (rl_src.location == kLocPhysReg) {
-    OpRegCopy(r_dest, rl_src.reg);
-  } else if (IsInexpensiveConstant(rl_src)) {
-    // On 64-bit targets, will sign extend.  Make sure constant reference is always null.
-    DCHECK(!rl_src.ref || (mir_graph_->ConstantValue(rl_src) == 0));
-    LoadConstantNoClobber(r_dest, mir_graph_->ConstantValue(rl_src));
-  } else {
-    DCHECK((rl_src.location == kLocDalvikFrame) ||
-           (rl_src.location == kLocCompilerTemp));
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    OpSize op_size;
-    if (rl_src.ref) {
-      op_size = kReference;
-    } else if (rl_src.wide) {
-      op_size = k64;
-    } else {
-      op_size = k32;
-    }
-    LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, op_size, kNotVolatile);
-  }
-}
-
-/*
- * Similar to LoadValueDirect, but clobbers and allocates the target
- * register.  Should be used when loading to a fixed register (for example,
- * loading arguments to an out of line call.
- */
-void Mir2Lir::LoadValueDirectFixed(RegLocation rl_src, RegStorage r_dest) {
-  Clobber(r_dest);
-  MarkInUse(r_dest);
-  LoadValueDirect(rl_src, r_dest);
-}
-
-/*
- * Load a Dalvik register pair into a physical register[s].  Take care when
- * using this routine, as it doesn't perform any bookkeeping regarding
- * register liveness.  That is the responsibility of the caller.
- */
-void Mir2Lir::LoadValueDirectWide(RegLocation rl_src, RegStorage r_dest) {
-  rl_src = UpdateLocWide(rl_src);
-  if (rl_src.location == kLocPhysReg) {
-    OpRegCopyWide(r_dest, rl_src.reg);
-  } else if (IsInexpensiveConstant(rl_src)) {
-    LoadConstantWide(r_dest, mir_graph_->ConstantValueWide(rl_src));
-  } else {
-    DCHECK((rl_src.location == kLocDalvikFrame) ||
-           (rl_src.location == kLocCompilerTemp));
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, k64, kNotVolatile);
-  }
-}
-
-/*
- * Similar to LoadValueDirect, but clobbers and allocates the target
- * registers.  Should be used when loading to a fixed registers (for example,
- * loading arguments to an out of line call.
- */
-void Mir2Lir::LoadValueDirectWideFixed(RegLocation rl_src, RegStorage r_dest) {
-  Clobber(r_dest);
-  MarkInUse(r_dest);
-  LoadValueDirectWide(rl_src, r_dest);
-}
-
-RegLocation Mir2Lir::LoadValue(RegLocation rl_src, RegisterClass op_kind) {
-  // If op_kind isn't a reference, rl_src should not be marked as a reference either
-  // unless we've seen type conflicts (i.e. register promotion is disabled).
-  DCHECK(op_kind == kRefReg || (!rl_src.ref || (cu_->disable_opt & (1u << kPromoteRegs)) != 0u));
-  rl_src = UpdateLoc(rl_src);
-  if (rl_src.location == kLocPhysReg) {
-    if (!RegClassMatches(op_kind, rl_src.reg)) {
-      // Wrong register class, realloc, copy and transfer ownership.
-      RegStorage new_reg = AllocTypedTemp(rl_src.fp, op_kind);
-      OpRegCopy(new_reg, rl_src.reg);
-      // Clobber the old regs and free it.
-      Clobber(rl_src.reg);
-      FreeTemp(rl_src.reg);
-      // ...and mark the new one live.
-      rl_src.reg = new_reg;
-      MarkLive(rl_src);
-    }
-    return rl_src;
-  }
-
-  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
-  rl_src.reg = AllocTypedTemp(rl_src.fp, op_kind);
-  LoadValueDirect(rl_src, rl_src.reg);
-  rl_src.location = kLocPhysReg;
-  MarkLive(rl_src);
-  return rl_src;
-}
-
-void Mir2Lir::StoreValue(RegLocation rl_dest, RegLocation rl_src) {
-  /*
-   * Sanity checking - should never try to store to the same
-   * ssa name during the compilation of a single instruction
-   * without an intervening ClobberSReg().
-   */
-  if (kIsDebugBuild) {
-    DCHECK((live_sreg_ == INVALID_SREG) ||
-           (rl_dest.s_reg_low != live_sreg_));
-    live_sreg_ = rl_dest.s_reg_low;
-  }
-  LIR* def_start;
-  LIR* def_end;
-  DCHECK(!rl_dest.wide);
-  DCHECK(!rl_src.wide);
-  rl_src = UpdateLoc(rl_src);
-  rl_dest = UpdateLoc(rl_dest);
-  if (rl_src.location == kLocPhysReg) {
-    if (IsLive(rl_src.reg) ||
-      IsPromoted(rl_src.reg) ||
-      (rl_dest.location == kLocPhysReg)) {
-      // Src is live/promoted or Dest has assigned reg.
-      rl_dest = EvalLoc(rl_dest, rl_dest.ref || rl_src.ref ? kRefReg : kAnyReg, false);
-      OpRegCopy(rl_dest.reg, rl_src.reg);
-    } else {
-      // Just re-assign the registers.  Dest gets Src's regs
-      rl_dest.reg = rl_src.reg;
-      Clobber(rl_src.reg);
-    }
-  } else {
-    // Load Src either into promoted Dest or temps allocated for Dest
-    rl_dest = EvalLoc(rl_dest, rl_dest.ref ? kRefReg : kAnyReg, false);
-    LoadValueDirect(rl_src, rl_dest.reg);
-  }
-
-  // Dest is now live and dirty (until/if we flush it to home location)
-  MarkLive(rl_dest);
-  MarkDirty(rl_dest);
-
-
-  ResetDefLoc(rl_dest);
-  if (IsDirty(rl_dest.reg) && LiveOut(rl_dest.s_reg_low)) {
-    def_start = last_lir_insn_;
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    if (rl_dest.ref) {
-      StoreRefDisp(TargetPtrReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, kNotVolatile);
-    } else {
-      Store32Disp(TargetPtrReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
-    }
-    MarkClean(rl_dest);
-    def_end = last_lir_insn_;
-    if (!rl_dest.ref) {
-      // Exclude references from store elimination
-      MarkDef(rl_dest, def_start, def_end);
-    }
-  }
-}
-
-RegLocation Mir2Lir::LoadValueWide(RegLocation rl_src, RegisterClass op_kind) {
-  DCHECK(rl_src.wide);
-  rl_src = UpdateLocWide(rl_src);
-  if (rl_src.location == kLocPhysReg) {
-    if (!RegClassMatches(op_kind, rl_src.reg)) {
-      // Wrong register class, realloc, copy and transfer ownership.
-      RegStorage new_regs = AllocTypedTempWide(rl_src.fp, op_kind);
-      OpRegCopyWide(new_regs, rl_src.reg);
-      // Clobber the old regs and free it.
-      Clobber(rl_src.reg);
-      FreeTemp(rl_src.reg);
-      // ...and mark the new ones live.
-      rl_src.reg = new_regs;
-      MarkLive(rl_src);
-    }
-    return rl_src;
-  }
-
-  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
-  DCHECK_NE(GetSRegHi(rl_src.s_reg_low), INVALID_SREG);
-  rl_src.reg = AllocTypedTempWide(rl_src.fp, op_kind);
-  LoadValueDirectWide(rl_src, rl_src.reg);
-  rl_src.location = kLocPhysReg;
-  MarkLive(rl_src);
-  return rl_src;
-}
-
-void Mir2Lir::StoreValueWide(RegLocation rl_dest, RegLocation rl_src) {
-  /*
-   * Sanity checking - should never try to store to the same
-   * ssa name during the compilation of a single instruction
-   * without an intervening ClobberSReg().
-   */
-  if (kIsDebugBuild) {
-    DCHECK((live_sreg_ == INVALID_SREG) ||
-           (rl_dest.s_reg_low != live_sreg_));
-    live_sreg_ = rl_dest.s_reg_low;
-  }
-  LIR* def_start;
-  LIR* def_end;
-  DCHECK(rl_dest.wide);
-  DCHECK(rl_src.wide);
-  rl_src = UpdateLocWide(rl_src);
-  rl_dest = UpdateLocWide(rl_dest);
-  if (rl_src.location == kLocPhysReg) {
-    if (IsLive(rl_src.reg) ||
-        IsPromoted(rl_src.reg) ||
-        (rl_dest.location == kLocPhysReg)) {
-      /*
-       * If src reg[s] are tied to the original Dalvik vreg via liveness or promotion, we
-       * can't repurpose them.  Similarly, if the dest reg[s] are tied to Dalvik vregs via
-       * promotion, we can't just re-assign.  In these cases, we have to copy.
-       */
-      rl_dest = EvalLoc(rl_dest, kAnyReg, false);
-      OpRegCopyWide(rl_dest.reg, rl_src.reg);
-    } else {
-      // Just re-assign the registers.  Dest gets Src's regs
-      rl_dest.reg = rl_src.reg;
-      Clobber(rl_src.reg);
-    }
-  } else {
-    // Load Src either into promoted Dest or temps allocated for Dest
-    rl_dest = EvalLoc(rl_dest, kAnyReg, false);
-    LoadValueDirectWide(rl_src, rl_dest.reg);
-  }
-
-  // Dest is now live and dirty (until/if we flush it to home location)
-  MarkLive(rl_dest);
-  MarkWide(rl_dest.reg);
-  MarkDirty(rl_dest);
-
-  ResetDefLocWide(rl_dest);
-  if (IsDirty(rl_dest.reg) && (LiveOut(rl_dest.s_reg_low) ||
-      LiveOut(GetSRegHi(rl_dest.s_reg_low)))) {
-    def_start = last_lir_insn_;
-    DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1),
-              mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low)));
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64, kNotVolatile);
-    MarkClean(rl_dest);
-    def_end = last_lir_insn_;
-    MarkDefWide(rl_dest, def_start, def_end);
-  }
-}
-
-void Mir2Lir::StoreFinalValue(RegLocation rl_dest, RegLocation rl_src) {
-  DCHECK_EQ(rl_src.location, kLocPhysReg);
-
-  if (rl_dest.location == kLocPhysReg) {
-    OpRegCopy(rl_dest.reg, rl_src.reg);
-  } else {
-    // Just re-assign the register.  Dest gets Src's reg.
-    rl_dest.location = kLocPhysReg;
-    rl_dest.reg = rl_src.reg;
-    Clobber(rl_src.reg);
-  }
-
-  // Dest is now live and dirty (until/if we flush it to home location)
-  MarkLive(rl_dest);
-  MarkDirty(rl_dest);
-
-
-  ResetDefLoc(rl_dest);
-  if (IsDirty(rl_dest.reg) && LiveOut(rl_dest.s_reg_low)) {
-    LIR *def_start = last_lir_insn_;
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    Store32Disp(TargetPtrReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
-    MarkClean(rl_dest);
-    LIR *def_end = last_lir_insn_;
-    if (!rl_dest.ref) {
-      // Exclude references from store elimination
-      MarkDef(rl_dest, def_start, def_end);
-    }
-  }
-}
-
-void Mir2Lir::StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src) {
-  DCHECK(rl_dest.wide);
-  DCHECK(rl_src.wide);
-  DCHECK_EQ(rl_src.location, kLocPhysReg);
-
-  if (rl_dest.location == kLocPhysReg) {
-    OpRegCopyWide(rl_dest.reg, rl_src.reg);
-  } else {
-    // Just re-assign the registers.  Dest gets Src's regs.
-    rl_dest.location = kLocPhysReg;
-    rl_dest.reg = rl_src.reg;
-    Clobber(rl_src.reg);
-  }
-
-  // Dest is now live and dirty (until/if we flush it to home location).
-  MarkLive(rl_dest);
-  MarkWide(rl_dest.reg);
-  MarkDirty(rl_dest);
-
-  ResetDefLocWide(rl_dest);
-  if (IsDirty(rl_dest.reg) && (LiveOut(rl_dest.s_reg_low) ||
-      LiveOut(GetSRegHi(rl_dest.s_reg_low)))) {
-    LIR *def_start = last_lir_insn_;
-    DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1),
-              mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low)));
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64, kNotVolatile);
-    MarkClean(rl_dest);
-    LIR *def_end = last_lir_insn_;
-    MarkDefWide(rl_dest, def_start, def_end);
-  }
-}
-
-/* Utilities to load the current Method* */
-void Mir2Lir::LoadCurrMethodDirect(RegStorage r_tgt) {
-  if (GetCompilationUnit()->target64) {
-    LoadValueDirectWideFixed(mir_graph_->GetMethodLoc(), r_tgt);
-  } else {
-    LoadValueDirectFixed(mir_graph_->GetMethodLoc(), r_tgt);
-  }
-}
-
-RegStorage Mir2Lir::LoadCurrMethodWithHint(RegStorage r_hint) {
-  // If the method is promoted to a register, return that register, otherwise load it to r_hint.
-  // (Replacement for LoadCurrMethod() usually used when LockCallTemps() is in effect.)
-  DCHECK(r_hint.Valid());
-  RegLocation rl_method = mir_graph_->GetMethodLoc();
-  if (rl_method.location == kLocPhysReg) {
-    DCHECK(!IsTemp(rl_method.reg));
-    return rl_method.reg;
-  } else {
-    LoadCurrMethodDirect(r_hint);
-    return r_hint;
-  }
-}
-
-RegLocation Mir2Lir::LoadCurrMethod() {
-  return GetCompilationUnit()->target64 ?
-      LoadValueWide(mir_graph_->GetMethodLoc(), kCoreReg) :
-      LoadValue(mir_graph_->GetMethodLoc(), kRefReg);
-}
-
-RegLocation Mir2Lir::ForceTemp(RegLocation loc) {
-  DCHECK(!loc.wide);
-  DCHECK(loc.location == kLocPhysReg);
-  DCHECK(!loc.reg.IsFloat());
-  if (IsTemp(loc.reg)) {
-    Clobber(loc.reg);
-  } else {
-    RegStorage temp_low = AllocTemp();
-    OpRegCopy(temp_low, loc.reg);
-    loc.reg = temp_low;
-  }
-
-  // Ensure that this doesn't represent the original SR any more.
-  loc.s_reg_low = INVALID_SREG;
-  return loc;
-}
-
-RegLocation Mir2Lir::ForceTempWide(RegLocation loc) {
-  DCHECK(loc.wide);
-  DCHECK(loc.location == kLocPhysReg);
-  DCHECK(!loc.reg.IsFloat());
-
-  if (!loc.reg.IsPair()) {
-    if (IsTemp(loc.reg)) {
-      Clobber(loc.reg);
-    } else {
-      RegStorage temp = AllocTempWide();
-      OpRegCopy(temp, loc.reg);
-      loc.reg = temp;
-    }
-  } else {
-    if (IsTemp(loc.reg.GetLow())) {
-      Clobber(loc.reg.GetLow());
-    } else {
-      RegStorage temp_low = AllocTemp();
-      OpRegCopy(temp_low, loc.reg.GetLow());
-      loc.reg.SetLowReg(temp_low.GetReg());
-    }
-    if (IsTemp(loc.reg.GetHigh())) {
-      Clobber(loc.reg.GetHigh());
-    } else {
-      RegStorage temp_high = AllocTemp();
-      OpRegCopy(temp_high, loc.reg.GetHigh());
-      loc.reg.SetHighReg(temp_high.GetReg());
-    }
-  }
-
-  // Ensure that this doesn't represent the original SR any more.
-  loc.s_reg_low = INVALID_SREG;
-  return loc;
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc b/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc
deleted file mode 100644
index 5cfb0ff..0000000
--- a/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "lazy_debug_frame_opcode_writer.h"
-#include "mir_to_lir.h"
-
-namespace art {
-namespace dwarf {
-
-const ArenaVector<uint8_t>* LazyDebugFrameOpCodeWriter::Patch(size_t code_size) {
-  if (!this->enabled_) {
-    DCHECK(this->data()->empty());
-    return this->data();
-  }
-  if (!patched_) {
-    patched_ = true;
-    // Move our data buffer to temporary variable.
-    ArenaVector<uint8_t> old_opcodes(this->opcodes_.get_allocator());
-    old_opcodes.swap(this->opcodes_);
-    // Refill our data buffer with patched opcodes.
-    this->opcodes_.reserve(old_opcodes.size() + advances_.size() + 4);
-    size_t pos = 0;
-    for (auto advance : advances_) {
-      DCHECK_GE(advance.pos, pos);
-      // Copy old data up to the point when advance was issued.
-      this->opcodes_.insert(this->opcodes_.end(),
-                            old_opcodes.begin() + pos,
-                            old_opcodes.begin() + advance.pos);
-      pos = advance.pos;
-      // This may be null if there is no slow-path code after return.
-      LIR* next_lir = NEXT_LIR(advance.last_lir_insn);
-      // Insert the advance command with its final offset.
-      Base::AdvancePC(next_lir != nullptr ? next_lir->offset : code_size);
-    }
-    // Copy the final segment.
-    this->opcodes_.insert(this->opcodes_.end(),
-                          old_opcodes.begin() + pos,
-                          old_opcodes.end());
-    Base::AdvancePC(code_size);
-  }
-  return this->data();
-}
-
-}  // namespace dwarf
-}  // namespace art
diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.h b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h
deleted file mode 100644
index c425fc8..0000000
--- a/compiler/dex/quick/lazy_debug_frame_opcode_writer.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_
-#define ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_
-
-#include "base/arena_allocator.h"
-#include "base/arena_containers.h"
-#include "dwarf/debug_frame_opcode_writer.h"
-
-namespace art {
-struct LIR;
-namespace dwarf {
-
-// When we are generating the CFI code, we do not know the instuction offsets,
-// this class stores the LIR references and patches the instruction stream later.
-class LazyDebugFrameOpCodeWriter FINAL
-    : public DebugFrameOpCodeWriter<ArenaVector<uint8_t>> {
-  typedef DebugFrameOpCodeWriter<ArenaVector<uint8_t>> Base;
- public:
-  // This method is implicitely called the by opcode writers.
-  virtual void ImplicitlyAdvancePC() OVERRIDE {
-    DCHECK_EQ(patched_, false);
-    DCHECK_EQ(this->current_pc_, 0);
-    advances_.push_back({this->data()->size(), *last_lir_insn_});
-  }
-
-  const ArenaVector<uint8_t>* Patch(size_t code_size);
-
-  LazyDebugFrameOpCodeWriter(LIR** last_lir_insn, bool enable_writes, ArenaAllocator* allocator)
-      : Base(enable_writes, allocator->Adapter()),
-        last_lir_insn_(last_lir_insn),
-        advances_(allocator->Adapter()),
-        patched_(false) {
-  }
-
- private:
-  typedef struct {
-    size_t pos;
-    LIR* last_lir_insn;
-  } Advance;
-
-  using Base::data;  // Hidden. Use Patch method instead.
-
-  LIR** last_lir_insn_;
-  ArenaVector<Advance> advances_;
-  bool patched_;
-
-  DISALLOW_COPY_AND_ASSIGN(LazyDebugFrameOpCodeWriter);
-};
-
-}  // namespace dwarf
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_
diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc
deleted file mode 100644
index 6cdf567..0000000
--- a/compiler/dex/quick/local_optimizations.cc
+++ /dev/null
@@ -1,518 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dex/quick/mir_to_lir-inl.h"
-
-#include "base/logging.h"
-
-namespace art {
-
-#define DEBUG_OPT(X)
-
-#define LOAD_STORE_CHECK_REG_DEP(mask, check) (mask.Intersects(*check->u.m.def_mask))
-
-/* Check RAW, WAR, and RAW dependency on the register operands */
-#define CHECK_REG_DEP(use, def, check) (def.Intersects(*check->u.m.use_mask)) || \
-                                       (use.Union(def).Intersects(*check->u.m.def_mask))
-
-/* Load Store Elimination filter:
- *  - Wide Load/Store
- *  - Exclusive Load/Store
- *  - Quad operand Load/Store
- *  - List Load/Store
- *  - IT blocks
- *  - Branch
- *  - Dmb
- */
-#define LOAD_STORE_FILTER(flags) ((flags & (IS_QUAD_OP|IS_STORE)) == (IS_QUAD_OP|IS_STORE) || \
-                                 (flags & (IS_QUAD_OP|IS_LOAD)) == (IS_QUAD_OP|IS_LOAD) || \
-                                 (flags & REG_USE012) == REG_USE012 || \
-                                 (flags & REG_DEF01) == REG_DEF01 || \
-                                 (flags & REG_DEF_LIST0) || \
-                                 (flags & REG_DEF_LIST1) || \
-                                 (flags & REG_USE_LIST0) || \
-                                 (flags & REG_USE_LIST1) || \
-                                 (flags & REG_DEF_FPCS_LIST0) || \
-                                 (flags & REG_DEF_FPCS_LIST2) || \
-                                 (flags & REG_USE_FPCS_LIST0) || \
-                                 (flags & REG_USE_FPCS_LIST2) || \
-                                 (flags & IS_VOLATILE) || \
-                                 (flags & IS_BRANCH) || \
-                                 (flags & IS_IT))
-
-/* Scheduler heuristics */
-#define MAX_HOIST_DISTANCE 20
-#define LDLD_DISTANCE 4
-#define LD_LATENCY 2
-
-static bool IsDalvikRegisterClobbered(LIR* lir1, LIR* lir2) {
-  int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->flags.alias_info);
-  int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->flags.alias_info);
-  int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->flags.alias_info);
-  int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->flags.alias_info);
-
-  return (reg1Lo == reg2Lo) || (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo);
-}
-
-/* Convert a more expensive instruction (ie load) into a move */
-void Mir2Lir::ConvertMemOpIntoMove(LIR* orig_lir, RegStorage dest, RegStorage src) {
-  /* Insert a move to replace the load */
-  LIR* move_lir;
-  move_lir = OpRegCopyNoInsert(dest, src);
-  move_lir->dalvik_offset = orig_lir->dalvik_offset;
-  /*
-   * Insert the converted instruction after the original since the
-   * optimization is scannng in the top-down order and the new instruction
-   * will need to be re-checked (eg the new dest clobbers the src used in
-   * this_lir).
-   */
-  InsertLIRAfter(orig_lir, move_lir);
-}
-
-void Mir2Lir::DumpDependentInsnPair(LIR* check_lir, LIR* this_lir, const char* type) {
-  LOG(INFO) << type;
-  LOG(INFO) << "Check LIR:";
-  DumpLIRInsn(check_lir, 0);
-  LOG(INFO) << "This LIR:";
-  DumpLIRInsn(this_lir, 0);
-}
-
-inline void Mir2Lir::EliminateLoad(LIR* lir, int reg_id) {
-  DCHECK(RegStorage::SameRegType(lir->operands[0], reg_id));
-  RegStorage dest_reg, src_reg;
-
-  /* Same Register - Nop */
-  if (lir->operands[0] == reg_id) {
-    NopLIR(lir);
-    return;
-  }
-
-  /* different Regsister - Move + Nop */
-  switch (reg_id & RegStorage::kShapeTypeMask) {
-    case RegStorage::k32BitSolo | RegStorage::kCoreRegister:
-      dest_reg = RegStorage::Solo32(lir->operands[0]);
-      src_reg = RegStorage::Solo32(reg_id);
-      break;
-    case RegStorage::k64BitSolo | RegStorage::kCoreRegister:
-      dest_reg = RegStorage::Solo64(lir->operands[0]);
-      src_reg = RegStorage::Solo64(reg_id);
-      break;
-    case RegStorage::k32BitSolo | RegStorage::kFloatingPoint:
-      dest_reg = RegStorage::FloatSolo32(lir->operands[0]);
-      src_reg = RegStorage::FloatSolo32(reg_id);
-      break;
-    case RegStorage::k64BitSolo | RegStorage::kFloatingPoint:
-      dest_reg = RegStorage::FloatSolo64(lir->operands[0]);
-      src_reg = RegStorage::FloatSolo64(reg_id);
-      break;
-    default:
-      LOG(INFO) << "Load Store: Unsuported register type!";
-      return;
-  }
-  ConvertMemOpIntoMove(lir, dest_reg, src_reg);
-  NopLIR(lir);
-  return;
-}
-
-/*
- * Perform a pass of top-down walk, from the first to the last instruction in the
- * superblock, to eliminate redundant loads and stores.
- *
- * An earlier load can eliminate a later load iff
- *   1) They are must-aliases
- *   2) The native register is not clobbered in between
- *   3) The memory location is not written to in between
- *
- * An earlier store can eliminate a later load iff
- *   1) They are must-aliases
- *   2) The native register is not clobbered in between
- *   3) The memory location is not written to in between
- *
- * An earlier store can eliminate a later store iff
- *   1) They are must-aliases
- *   2) The memory location is not written to in between
- */
-void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) {
-  LIR* this_lir, *check_lir;
-  std::vector<int> alias_list;
-
-  if (head_lir == tail_lir) {
-    return;
-  }
-
-  for (this_lir = head_lir; this_lir != tail_lir; this_lir = NEXT_LIR(this_lir)) {
-    if (this_lir->flags.is_nop || IsPseudoLirOp(this_lir->opcode)) {
-      continue;
-    }
-
-    uint64_t target_flags = GetTargetInstFlags(this_lir->opcode);
-    /* Target LIR - skip if instr is:
-     *  - NOP
-     *  - Branch
-     *  - Load and store
-     *  - Wide load
-     *  - Wide store
-     *  - Exclusive load/store
-     */
-    if (LOAD_STORE_FILTER(target_flags) ||
-        ((target_flags & (IS_LOAD | IS_STORE)) == (IS_LOAD | IS_STORE)) ||
-        !(target_flags & (IS_LOAD | IS_STORE))) {
-      continue;
-    }
-    int native_reg_id = this_lir->operands[0];
-    int dest_reg_id = this_lir->operands[1];
-    bool is_this_lir_load = target_flags & IS_LOAD;
-    ResourceMask this_mem_mask = kEncodeMem.Intersection(this_lir->u.m.use_mask->Union(
-                                                        *this_lir->u.m.def_mask));
-
-    /* Memory region */
-    if (!this_mem_mask.Intersects(kEncodeLiteral.Union(kEncodeDalvikReg)) &&
-      (!this_mem_mask.Intersects(kEncodeLiteral.Union(kEncodeHeapRef)))) {
-      continue;
-    }
-
-    /* Does not redefine the address */
-    if (this_lir->u.m.def_mask->Intersects(*this_lir->u.m.use_mask)) {
-      continue;
-    }
-
-    ResourceMask stop_def_reg_mask = this_lir->u.m.def_mask->Without(kEncodeMem);
-    ResourceMask stop_use_reg_mask = this_lir->u.m.use_mask->Without(kEncodeMem);
-
-    /* The ARM backend can load/store PC */
-    ResourceMask uses_pc = GetPCUseDefEncoding();
-    if (uses_pc.Intersects(this_lir->u.m.use_mask->Union(*this_lir->u.m.def_mask))) {
-      continue;
-    }
-
-    /* Initialize alias list */
-    alias_list.clear();
-    ResourceMask alias_reg_list_mask = kEncodeNone;
-    if (!this_mem_mask.Intersects(kEncodeMem) && !this_mem_mask.Intersects(kEncodeLiteral)) {
-      alias_list.push_back(dest_reg_id);
-      SetupRegMask(&alias_reg_list_mask, dest_reg_id);
-    }
-
-    /* Scan through the BB for posible elimination candidates */
-    for (check_lir = NEXT_LIR(this_lir); check_lir != tail_lir; check_lir = NEXT_LIR(check_lir)) {
-      if (check_lir->flags.is_nop || IsPseudoLirOp(check_lir->opcode)) {
-        continue;
-      }
-
-      if (uses_pc.Intersects(check_lir->u.m.use_mask->Union(*check_lir->u.m.def_mask))) {
-        break;
-      }
-
-      ResourceMask check_mem_mask = kEncodeMem.Intersection(check_lir->u.m.use_mask->Union(
-                                                          *check_lir->u.m.def_mask));
-      ResourceMask alias_mem_mask = this_mem_mask.Intersection(check_mem_mask);
-      uint64_t check_flags = GetTargetInstFlags(check_lir->opcode);
-      bool stop_here = false;
-      bool pass_over = false;
-
-      /* Check LIR - skip if instr is:
-       *  - Wide Load
-       *  - Wide Store
-       *  - Branch
-       *  - Dmb
-       *  - Exclusive load/store
-       *  - IT blocks
-       *  - Quad loads
-       */
-      if (LOAD_STORE_FILTER(check_flags)) {
-        stop_here = true;
-        /* Possible alias or result of earlier pass */
-      } else if (check_flags & IS_MOVE) {
-        for (auto &reg : alias_list) {
-          if (RegStorage::RegNum(check_lir->operands[1]) == RegStorage::RegNum(reg)) {
-            pass_over = true;
-            alias_list.push_back(check_lir->operands[0]);
-            SetupRegMask(&alias_reg_list_mask, check_lir->operands[0]);
-          }
-        }
-      /* Memory regions */
-      } else if (!alias_mem_mask.Equals(kEncodeNone)) {
-        DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE));
-        bool is_check_lir_load = check_flags & IS_LOAD;
-        bool reg_compatible = RegStorage::SameRegType(check_lir->operands[0], native_reg_id);
-
-        if (!alias_mem_mask.Intersects(kEncodeMem) && alias_mem_mask.Equals(kEncodeLiteral)) {
-          DCHECK(check_flags & IS_LOAD);
-          /* Same value && same register type */
-          if (reg_compatible && (this_lir->target == check_lir->target)) {
-            DEBUG_OPT(DumpDependentInsnPair(check_lir, this_lir, "LITERAL"));
-            EliminateLoad(check_lir, native_reg_id);
-          }
-        } else if (((alias_mem_mask.Equals(kEncodeDalvikReg)) || (alias_mem_mask.Equals(kEncodeHeapRef))) &&
-                   alias_reg_list_mask.Intersects((check_lir->u.m.use_mask)->Without(kEncodeMem))) {
-          bool same_offset = (GetInstructionOffset(this_lir) == GetInstructionOffset(check_lir));
-          if (same_offset && !is_check_lir_load) {
-            if (check_lir->operands[0] != native_reg_id) {
-              DEBUG_OPT(DumpDependentInsnPair(check_lir, this_lir, "STORE STOP"));
-              stop_here = true;
-              break;
-            }
-          }
-
-          if (reg_compatible && same_offset &&
-              ((is_this_lir_load && is_check_lir_load)  /* LDR - LDR */ ||
-              (!is_this_lir_load && is_check_lir_load)  /* STR - LDR */ ||
-              (!is_this_lir_load && !is_check_lir_load) /* STR - STR */)) {
-            DEBUG_OPT(DumpDependentInsnPair(check_lir, this_lir, "LOAD STORE"));
-            EliminateLoad(check_lir, native_reg_id);
-          }
-        } else {
-          /* Unsupported memory region */
-        }
-      }
-
-      if (pass_over) {
-        continue;
-      }
-
-      if (stop_here == false) {
-        bool stop_alias = LOAD_STORE_CHECK_REG_DEP(alias_reg_list_mask, check_lir);
-        if (stop_alias) {
-          /* Scan through alias list and if alias remove from alias list. */
-          for (auto &reg : alias_list) {
-            stop_alias = false;
-            ResourceMask alias_reg_mask = kEncodeNone;
-            SetupRegMask(&alias_reg_mask, reg);
-            stop_alias = LOAD_STORE_CHECK_REG_DEP(alias_reg_mask, check_lir);
-            if (stop_alias) {
-              ClearRegMask(&alias_reg_list_mask, reg);
-              alias_list.erase(std::remove(alias_list.begin(), alias_list.end(),
-                                           reg), alias_list.end());
-            }
-          }
-        }
-        ResourceMask stop_search_mask = stop_def_reg_mask.Union(stop_use_reg_mask);
-        stop_search_mask = stop_search_mask.Union(alias_reg_list_mask);
-        stop_here = LOAD_STORE_CHECK_REG_DEP(stop_search_mask, check_lir);
-        if (stop_here) {
-          break;
-        }
-      } else {
-        break;
-      }
-    }
-  }
-}
-
-/*
- * Perform a pass of bottom-up walk, from the second instruction in the
- * superblock, to try to hoist loads to earlier slots.
- */
-void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) {
-  LIR* this_lir, *check_lir;
-  /*
-   * Store the list of independent instructions that can be hoisted past.
-   * Will decide the best place to insert later.
-   */
-  LIR* prev_inst_list[MAX_HOIST_DISTANCE];
-
-  /* Empty block */
-  if (head_lir == tail_lir) {
-    return;
-  }
-
-  /* Start from the second instruction */
-  for (this_lir = NEXT_LIR(head_lir); this_lir != tail_lir; this_lir = NEXT_LIR(this_lir)) {
-    if (IsPseudoLirOp(this_lir->opcode)) {
-      continue;
-    }
-
-    uint64_t target_flags = GetTargetInstFlags(this_lir->opcode);
-    /* Skip non-interesting instructions */
-    if (!(target_flags & IS_LOAD) ||
-        (this_lir->flags.is_nop == true) ||
-        ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) ||
-        ((target_flags & (IS_STORE | IS_LOAD)) == (IS_STORE | IS_LOAD))) {
-      continue;
-    }
-
-    ResourceMask stop_use_all_mask = *this_lir->u.m.use_mask;
-
-    /*
-     * Branches for null/range checks are marked with the true resource
-     * bits, and loads to Dalvik registers, constant pools, and non-alias
-     * locations are safe to be hoisted. So only mark the heap references
-     * conservatively here.
-     *
-     * Note: on x86(-64) and Arm64 this will add kEncodeNone.
-     * TODO: Sanity check. LoadStoreElimination uses kBranchBit to fake a PC.
-     */
-    if (stop_use_all_mask.HasBit(ResourceMask::kHeapRef)) {
-      stop_use_all_mask.SetBits(GetPCUseDefEncoding());
-    }
-
-    /* Similar as above, but just check for pure register dependency */
-    ResourceMask stop_use_reg_mask = stop_use_all_mask.Without(kEncodeMem);
-    ResourceMask stop_def_reg_mask = this_lir->u.m.def_mask->Without(kEncodeMem);
-
-    int next_slot = 0;
-    bool stop_here = false;
-
-    /* Try to hoist the load to a good spot */
-    for (check_lir = PREV_LIR(this_lir); check_lir != head_lir; check_lir = PREV_LIR(check_lir)) {
-      /*
-       * Skip already dead instructions (whose dataflow information is
-       * outdated and misleading).
-       */
-      if (check_lir->flags.is_nop) {
-        continue;
-      }
-
-      ResourceMask check_mem_mask = check_lir->u.m.def_mask->Intersection(kEncodeMem);
-      ResourceMask alias_condition = stop_use_all_mask.Intersection(check_mem_mask);
-      stop_here = false;
-
-      /* Potential WAR alias seen - check the exact relation */
-      if (!check_mem_mask.Equals(kEncodeMem) && !alias_condition.Equals(kEncodeNone)) {
-        /* We can fully disambiguate Dalvik references */
-        if (alias_condition.Equals(kEncodeDalvikReg)) {
-          /* Must alias or partially overlap */
-          if ((check_lir->flags.alias_info == this_lir->flags.alias_info) ||
-            IsDalvikRegisterClobbered(this_lir, check_lir)) {
-            stop_here = true;
-          }
-        /* Conservatively treat all heap refs as may-alias */
-        } else {
-          DCHECK(alias_condition.Equals(kEncodeHeapRef));
-          stop_here = true;
-        }
-        /* Memory content may be updated. Stop looking now. */
-        if (stop_here) {
-          prev_inst_list[next_slot++] = check_lir;
-          break;
-        }
-      }
-
-      if (stop_here == false) {
-        stop_here = CHECK_REG_DEP(stop_use_reg_mask, stop_def_reg_mask,
-                     check_lir);
-      }
-
-      /*
-       * Store the dependent or non-pseudo/indepedent instruction to the
-       * list.
-       */
-      if (stop_here || !IsPseudoLirOp(check_lir->opcode)) {
-        prev_inst_list[next_slot++] = check_lir;
-        if (next_slot == MAX_HOIST_DISTANCE) {
-          break;
-        }
-      }
-
-      /* Found a new place to put the load - move it here */
-      if (stop_here == true) {
-        DEBUG_OPT(DumpDependentInsnPair(check_lir, this_lir, "HOIST STOP"));
-        break;
-      }
-    }
-
-    /*
-     * Reached the top - use head_lir as the dependent marker as all labels
-     * are barriers.
-     */
-    if (stop_here == false && next_slot < MAX_HOIST_DISTANCE) {
-      prev_inst_list[next_slot++] = head_lir;
-    }
-
-    /*
-     * At least one independent instruction is found. Scan in the reversed
-     * direction to find a beneficial slot.
-     */
-    if (next_slot >= 2) {
-      int first_slot = next_slot - 2;
-      int slot;
-      LIR* dep_lir = prev_inst_list[next_slot-1];
-      /* If there is ld-ld dependency, wait LDLD_DISTANCE cycles */
-      if (!IsPseudoLirOp(dep_lir->opcode) &&
-        (GetTargetInstFlags(dep_lir->opcode) & IS_LOAD)) {
-        first_slot -= LDLD_DISTANCE;
-      }
-      /*
-       * Make sure we check slot >= 0 since first_slot may be negative
-       * when the loop is first entered.
-       */
-      for (slot = first_slot; slot >= 0; slot--) {
-        LIR* cur_lir = prev_inst_list[slot];
-        LIR* prev_lir = prev_inst_list[slot+1];
-
-        /* Check the highest instruction */
-        if (prev_lir->u.m.def_mask->Equals(kEncodeAll)) {
-          /*
-           * If the first instruction is a load, don't hoist anything
-           * above it since it is unlikely to be beneficial.
-           */
-          if (GetTargetInstFlags(cur_lir->opcode) & IS_LOAD) {
-            continue;
-          }
-          /*
-           * If the remaining number of slots is less than LD_LATENCY,
-           * insert the hoisted load here.
-           */
-          if (slot < LD_LATENCY) {
-            break;
-          }
-        }
-
-        // Don't look across a barrier label
-        if ((prev_lir->opcode == kPseudoTargetLabel) ||
-            (prev_lir->opcode == kPseudoSafepointPC) ||
-            (prev_lir->opcode == kPseudoBarrier)) {
-          break;
-        }
-
-        /*
-         * Try to find two instructions with load/use dependency until
-         * the remaining instructions are less than LD_LATENCY.
-         */
-        bool prev_is_load = IsPseudoLirOp(prev_lir->opcode) ? false :
-            (GetTargetInstFlags(prev_lir->opcode) & IS_LOAD);
-        if ((prev_is_load && (cur_lir->u.m.use_mask->Intersects(*prev_lir->u.m.def_mask))) ||
-            (slot < LD_LATENCY)) {
-          break;
-        }
-      }
-
-      /* Found a slot to hoist to */
-      if (slot >= 0) {
-        LIR* cur_lir = prev_inst_list[slot];
-        LIR* prev_lir = PREV_LIR(this_lir);
-        UnlinkLIR(this_lir);
-        /*
-         * Insertion is guaranteed to succeed since check_lir
-         * is never the first LIR on the list
-         */
-        InsertLIRBefore(cur_lir, this_lir);
-        this_lir = prev_lir;  // Continue the loop with the next LIR.
-      }
-    }
-  }
-}
-
-void Mir2Lir::ApplyLocalOptimizations(LIR* head_lir, LIR* tail_lir) {
-  if (!(cu_->disable_opt & (1 << kLoadStoreElimination))) {
-    ApplyLoadStoreElimination(head_lir, tail_lir);
-  }
-  if (!(cu_->disable_opt & (1 << kLoadHoisting))) {
-    ApplyLoadHoisting(head_lir, tail_lir);
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/mips/README.mips b/compiler/dex/quick/mips/README.mips
deleted file mode 100644
index ff561fa..0000000
--- a/compiler/dex/quick/mips/README.mips
+++ /dev/null
@@ -1,57 +0,0 @@
-               Notes on the Mips target (3/4/2012)
-               -----------------------------------
-
-Testing
-
-The initial implementation of Mips support in the compiler is untested on
-actual hardware, and as such should be expected to have many bugs.  However,
-the vast majority of code for Mips support is either shared with other
-tested targets, or was taken from the functional Mips JIT compiler.  The
-expectation is that when it is first tried out on actual hardware lots of
-small bugs will be flushed out, but it should not take long to get it
-solidly running.  The following areas are considered most likely to have
-problems that need to be addressed:
-
-    o Endianness.  Focus was on little-endian support, and if a big-endian
-      target is desired, you should pay particular attention to the
-      code generation for switch tables, fill array data, 64-bit
-      data handling and the register usage conventions.
-
-    o The memory model.  Verify that GenMemoryBarrier() generates the
-      appropriate flavor of sync.
-
-Register promotion
-
-The resource masks in the LIR structure are 64-bits wide, which is enough
-room to fully describe def/use info for Arm and x86 instructions.  However,
-the larger number of MIPS core and float registers render this too small.
-Currently, the workaround for this limitation is to avoid using floating
-point registers 16-31.  These are the callee-save registers, which therefore
-means that no floating point promotion is allowed.  Among the solution are:
-     o Expand the def/use mask (which, unfortunately, is a significant change)
-     o The Arm target uses 52 of the 64 bits, so we could support float
-       registers 16-27 without much effort.
-     o We could likely assign the 4 non-register bits (kDalvikReg, kLiteral,
-       kHeapRef & kMustNotAlias) to positions occuped by MIPS registers that
-       don't need def/use bits because they are never modified by code
-       subject to scheduling: r_K0, r_K1, r_SP, r_ZERO, r_S1 (rSELF).
-
-Branch delay slots
-
-Little to no attempt was made to fill branch delay slots.  Branch
-instructions in the encoding map are given a length of 8 bytes to include
-an implicit NOP.  It should not be too difficult to provide a slot-filling
-pass following successful assembly, but thought should be given to the
-design.  Branches are currently treated as scheduling barriers.  One
-simple solution would be to copy the instruction at branch targets to the
-slot and adjust the displacement.  However, given that code expansion is
-already a problem it would be preferable to use a more sophisticated
-scheduling solution.
-
-Code expansion
-
-Code expansion for the MIPS target is significantly higher than we see
-for Arm and x86.  It might make sense to replace the inline code generation
-for some of the more verbose Dalik byte codes with subroutine calls to
-shared helper functions.
-
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
deleted file mode 100644
index f9b9684..0000000
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ /dev/null
@@ -1,956 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "codegen_mips.h"
-
-#include "base/logging.h"
-#include "dex/compiler_ir.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "mips_lir.h"
-
-namespace art {
-
-#define MAX_ASSEMBLER_RETRIES 50
-
-/*
- * opcode: MipsOpCode enum
- * skeleton: pre-designated bit-pattern for this opcode
- * k0: key to applying ds/de
- * ds: dest start bit position
- * de: dest end bit position
- * k1: key to applying s1s/s1e
- * s1s: src1 start bit position
- * s1e: src1 end bit position
- * k2: key to applying s2s/s2e
- * s2s: src2 start bit position
- * s2e: src2 end bit position
- * operands: number of operands (for sanity check purposes)
- * name: mnemonic name
- * fmt: for pretty-printing
- */
-#define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \
-                     k3, k3s, k3e, flags, name, fmt, size) \
-        {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}, \
-                    {k3, k3s, k3e}}, opcode, flags, name, fmt, size}
-
-/* Instruction dump string format keys: !pf, where "!" is the start
- * of the key, "p" is which numeric operand to use and "f" is the
- * print format.
- *
- * [p]ositions:
- *     0 -> operands[0] (dest)
- *     1 -> operands[1] (src1)
- *     2 -> operands[2] (src2)
- *     3 -> operands[3] (extra)
- *
- * [f]ormats:
- *     h -> 4-digit hex
- *     d -> decimal
- *     E -> decimal*4
- *     F -> decimal*2
- *     c -> branch condition (beq, bne, etc.)
- *     t -> pc-relative target
- *     T -> pc-region target
- *     u -> 1st half of bl[x] target
- *     v -> 2nd half ob bl[x] target
- *     R -> register list
- *     s -> single precision floating point register
- *     S -> double precision floating point register
- *     m -> Thumb2 modified immediate
- *     n -> complimented Thumb2 modified immediate
- *     M -> Thumb2 16-bit zero-extended immediate
- *     b -> 4-digit binary
- *     N -> append a NOP
- *
- *  [!] escape.  To insert "!", use "!!"
- */
-/* NOTE: must be kept in sync with enum MipsOpcode from mips_lir.h */
-/*
- * TUNING: We're currently punting on the branch delay slots.  All branch
- * instructions in this map are given a size of 8, which during assembly
- * is expanded to include a nop.  This scheme should be replaced with
- * an assembler pass to fill those slots when possible.
- */
-const MipsEncodingMap MipsMir2Lir::EncodingMap[kMipsLast] = {
-    // The following are common mips32r2, mips32r6 and mips64r6 instructions.
-    ENCODING_MAP(kMips32BitData, 0x00000000,
-                 kFmtBitBlt, 31, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP,
-                 "data", "0x!0h(!0d)", 4),
-    ENCODING_MAP(kMipsAddiu, 0x24000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "addiu", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMipsAddu, 0x00000021,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "addu", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMipsAnd, 0x00000024,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "and", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMipsAndi, 0x30000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "andi", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMipsB, 0x10000000,
-                 kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP,
-                 "b", "!0t!0N", 8),
-    ENCODING_MAP(kMipsBal, 0x04110000,
-                 kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR |
-                 NEEDS_FIXUP, "bal", "!0t!0N", 8),
-    ENCODING_MAP(kMipsBeq, 0x10000000,
-                 kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_USE01 |
-                 NEEDS_FIXUP, "beq", "!0r,!1r,!2t!0N", 8),
-    ENCODING_MAP(kMipsBeqz, 0x10000000,  // Same as beq above with t = $zero.
-                 kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0 |
-                 NEEDS_FIXUP, "beqz", "!0r,!1t!0N", 8),
-    ENCODING_MAP(kMipsBgez, 0x04010000,
-                 kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0 |
-                 NEEDS_FIXUP, "bgez", "!0r,!1t!0N", 8),
-    ENCODING_MAP(kMipsBgtz, 0x1C000000,
-                 kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0 |
-                 NEEDS_FIXUP, "bgtz", "!0r,!1t!0N", 8),
-    ENCODING_MAP(kMipsBlez, 0x18000000,
-                 kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0 |
-                 NEEDS_FIXUP, "blez", "!0r,!1t!0N", 8),
-    ENCODING_MAP(kMipsBltz, 0x04000000,
-                 kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0 |
-                 NEEDS_FIXUP, "bltz", "!0r,!1t!0N", 8),
-    ENCODING_MAP(kMipsBnez, 0x14000000,  // Same as bne below with t = $zero.
-                 kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0 |
-                 NEEDS_FIXUP, "bnez", "!0r,!1t!0N", 8),
-    ENCODING_MAP(kMipsBne, 0x14000000,
-                 kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_USE01 |
-                 NEEDS_FIXUP, "bne", "!0r,!1r,!2t!0N", 8),
-    ENCODING_MAP(kMipsExt, 0x7c000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 10, 6,
-                 kFmtBitBlt, 15, 11, IS_QUAD_OP | REG_DEF0 | REG_USE1,
-                 "ext", "!0r,!1r,!2d,!3D", 4),
-    ENCODING_MAP(kMipsFaddd, 0x46200000,
-                 kFmtDfp, 10, 6, kFmtDfp, 15, 11, kFmtDfp, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "add.d", "!0S,!1S,!2S", 4),
-    ENCODING_MAP(kMipsFadds, 0x46000000,
-                 kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtSfp, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "add.s", "!0s,!1s,!2s", 4),
-    ENCODING_MAP(kMipsFsubd, 0x46200001,
-                 kFmtDfp, 10, 6, kFmtDfp, 15, 11, kFmtDfp, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "sub.d", "!0S,!1S,!2S", 4),
-    ENCODING_MAP(kMipsFsubs, 0x46000001,
-                 kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtSfp, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "sub.s", "!0s,!1s,!2s", 4),
-    ENCODING_MAP(kMipsFdivd, 0x46200003,
-                 kFmtDfp, 10, 6, kFmtDfp, 15, 11, kFmtDfp, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "div.d", "!0S,!1S,!2S", 4),
-    ENCODING_MAP(kMipsFdivs, 0x46000003,
-                 kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtSfp, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "div.s", "!0s,!1s,!2s", 4),
-    ENCODING_MAP(kMipsFmuld, 0x46200002,
-                 kFmtDfp, 10, 6, kFmtDfp, 15, 11, kFmtDfp, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "mul.d", "!0S,!1S,!2S", 4),
-    ENCODING_MAP(kMipsFmuls, 0x46000002,
-                 kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtSfp, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "mul.s", "!0s,!1s,!2s", 4),
-    ENCODING_MAP(kMipsFcvtsd, 0x46200020,
-                 kFmtSfp, 10, 6, kFmtDfp, 15, 11, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "cvt.s.d", "!0s,!1S", 4),
-    ENCODING_MAP(kMipsFcvtsw, 0x46800020,
-                 kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "cvt.s.w", "!0s,!1s", 4),
-    ENCODING_MAP(kMipsFcvtds, 0x46000021,
-                 kFmtDfp, 10, 6, kFmtSfp, 15, 11, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "cvt.d.s", "!0S,!1s", 4),
-    ENCODING_MAP(kMipsFcvtdw, 0x46800021,
-                 kFmtDfp, 10, 6, kFmtSfp, 15, 11, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "cvt.d.w", "!0S,!1s", 4),
-    ENCODING_MAP(kMipsFcvtwd, 0x46200024,
-                 kFmtSfp, 10, 6, kFmtDfp, 15, 11, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "cvt.w.d", "!0s,!1S", 4),
-    ENCODING_MAP(kMipsFcvtws, 0x46000024,
-                 kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "cvt.w.s", "!0s,!1s", 4),
-    ENCODING_MAP(kMipsFmovd, 0x46200006,
-                 kFmtDfp, 10, 6, kFmtDfp, 15, 11, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mov.d", "!0S,!1S", 4),
-    ENCODING_MAP(kMipsFmovs, 0x46000006,
-                 kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mov.s", "!0s,!1s", 4),
-    ENCODING_MAP(kMipsFnegd, 0x46200007,
-                 kFmtDfp, 10, 6, kFmtDfp, 15, 11, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "neg.d", "!0S,!1S", 4),
-    ENCODING_MAP(kMipsFnegs, 0x46000007,
-                 kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "neg.s", "!0s,!1s", 4),
-    ENCODING_MAP(kMipsFldc1, 0xd4000000,
-                 kFmtDfp, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD,
-                 "ldc1", "!0S,!1d(!2r)", 4),
-    ENCODING_MAP(kMipsFlwc1, 0xc4000000,
-                 kFmtSfp, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD,
-                 "lwc1", "!0s,!1d(!2r)", 4),
-    ENCODING_MAP(kMipsFsdc1, 0xf4000000,
-                 kFmtDfp, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE02 | IS_STORE,
-                 "sdc1", "!0S,!1d(!2r)", 4),
-    ENCODING_MAP(kMipsFswc1, 0xe4000000,
-                 kFmtSfp, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE02 | IS_STORE,
-                 "swc1", "!0s,!1d(!2r)", 4),
-    ENCODING_MAP(kMipsJal, 0x0c000000,
-                 kFmtBitBlt, 25, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR,
-                 "jal", "!0T(!0E)!0N", 8),
-    ENCODING_MAP(kMipsJalr, 0x00000009,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF0_USE1,
-                 "jalr", "!0r,!1r!0N", 8),
-    ENCODING_MAP(kMipsJr, 0x00000008,
-                 kFmtBitBlt, 25, 21, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0 |
-                 NEEDS_FIXUP, "jr", "!0r!0N", 8),
-    ENCODING_MAP(kMipsLahi, 0x3C000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "lahi/lui", "!0r,0x!1h(!1d)", 4),
-    ENCODING_MAP(kMipsLalo, 0x34000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "lalo/ori", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMipsLui, 0x3C000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "lui", "!0r,0x!1h(!1d)", 4),
-    ENCODING_MAP(kMipsLb, 0x80000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD,
-                 "lb", "!0r,!1d(!2r)", 4),
-    ENCODING_MAP(kMipsLbu, 0x90000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD,
-                 "lbu", "!0r,!1d(!2r)", 4),
-    ENCODING_MAP(kMipsLh, 0x84000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD,
-                 "lh", "!0r,!1d(!2r)", 4),
-    ENCODING_MAP(kMipsLhu, 0x94000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD,
-                 "lhu", "!0r,!1d(!2r)", 4),
-    ENCODING_MAP(kMipsLw, 0x8C000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD,
-                 "lw", "!0r,!1d(!2r)", 4),
-    ENCODING_MAP(kMipsMove, 0x00000025,  // Or using zero reg.
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "move", "!0r,!1r", 4),
-    ENCODING_MAP(kMipsMfc1, 0x44000000,
-                 kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mfc1", "!0r,!1s", 4),
-    ENCODING_MAP(kMipsMtc1, 0x44800000,
-                 kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | REG_DEF1,
-                 "mtc1", "!0r,!1s", 4),
-    ENCODING_MAP(kMipsMfhc1, 0x44600000,
-                 kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mfhc1", "!0r,!1s", 4),
-    ENCODING_MAP(kMipsMthc1, 0x44e00000,
-                 kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | REG_DEF1,
-                 "mthc1", "!0r,!1s", 4),
-    ENCODING_MAP(kMipsNop, 0x00000000,
-                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, NO_OPERAND,
-                 "nop", ";", 4),
-    ENCODING_MAP(kMipsNor, 0x00000027,  // Used for "not" too.
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "nor", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMipsOr, 0x00000025,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "or", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMipsOri, 0x34000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "ori", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMipsPref, 0xCC000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE2,
-                 "pref", "!0d,!1d(!2r)", 4),
-    ENCODING_MAP(kMipsSb, 0xA0000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE02 | IS_STORE,
-                 "sb", "!0r,!1d(!2r)", 4),
-    ENCODING_MAP(kMipsSeb, 0x7c000420,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "seb", "!0r,!1r", 4),
-    ENCODING_MAP(kMipsSeh, 0x7c000620,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "seh", "!0r,!1r", 4),
-    ENCODING_MAP(kMipsSh, 0xA4000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE02 | IS_STORE,
-                 "sh", "!0r,!1d(!2r)", 4),
-    ENCODING_MAP(kMipsSll, 0x00000000,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "sll", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMipsSllv, 0x00000004,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "sllv", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMipsSlt, 0x0000002a,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "slt", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMipsSlti, 0x28000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "slti", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMipsSltu, 0x0000002b,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "sltu", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMipsSra, 0x00000003,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "sra", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMipsSrav, 0x00000007,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "srav", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMipsSrl, 0x00000002,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "srl", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMipsSrlv, 0x00000006,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "srlv", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMipsSubu, 0x00000023,  // Used for "neg" too.
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "subu", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMipsSw, 0xAC000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE02 | IS_STORE,
-                 "sw", "!0r,!1d(!2r)", 4),
-    ENCODING_MAP(kMipsSync, 0x0000000f,
-                 kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP,
-                 "sync", ";", 4),
-    ENCODING_MAP(kMipsXor, 0x00000026,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "xor", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMipsXori, 0x38000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "xori", "!0r,!1r,0x!2h(!2d)", 4),
-
-    // The following are mips32r2 instructions.
-    ENCODING_MAP(kMipsR2Div, 0x0000001a,
-                 kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF_HI | REG_DEF_LO | REG_USE01,
-                 "div", "!0r,!1r", 4),
-    ENCODING_MAP(kMipsR2Mul, 0x70000002,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "mul", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMipsR2Mfhi, 0x00000010,
-                 kFmtBitBlt, 15, 11, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF0 | REG_USE_HI,
-                 "mfhi", "!0r", 4),
-    ENCODING_MAP(kMipsR2Mflo, 0x00000012,
-                 kFmtBitBlt, 15, 11, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF0 | REG_USE_LO,
-                 "mflo", "!0r", 4),
-    ENCODING_MAP(kMipsR2Movz, 0x0000000a,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "movz", "!0r,!1r,!2r", 4),
-
-    // The following are mips32r6 and mips64r6 instructions.
-    ENCODING_MAP(kMipsR6Div, 0x0000009a,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "div", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMipsR6Mod, 0x000000da,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "mod", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMipsR6Mul, 0x00000098,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "mul", "!0r,!1r,!2r", 4),
-
-    // The following are mips64r6 instructions.
-    ENCODING_MAP(kMips64Daddiu, 0x64000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "daddiu", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMips64Daddu, 0x0000002d,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "daddu", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMips64Dahi, 0x04060000,
-                 kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE0,
-                 "dahi", "!0r,0x!1h(!1d)", 4),
-    ENCODING_MAP(kMips64Dati, 0x041E0000,
-                 kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE0,
-                 "dati", "!0r,0x!1h(!1d)", 4),
-    ENCODING_MAP(kMips64Daui, 0x74000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "daui", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMips64Ddiv, 0x0000009e,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "ddiv", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMips64Dmod, 0x000000de,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "dmod", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMips64Dmul, 0x0000009c,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "dmul", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMips64Dmfc1, 0x44200000,
-                 kFmtBitBlt, 20, 16, kFmtDfp, 15, 11, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "dmfc1", "!0r,!1s", 4),
-    ENCODING_MAP(kMips64Dmtc1, 0x44a00000,
-                 kFmtBitBlt, 20, 16, kFmtDfp, 15, 11, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | REG_DEF1,
-                 "dmtc1", "!0r,!1s", 4),
-    ENCODING_MAP(kMips64Drotr32, 0x0000003e | (1 << 21),
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "drotr32", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMips64Dsll, 0x00000038,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "dsll", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMips64Dsll32, 0x0000003c,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "dsll32", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMips64Dsrl, 0x0000003a,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "dsrl", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMips64Dsrl32, 0x0000003e,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "dsrl32", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMips64Dsra, 0x0000003b,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "dsra", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMips64Dsra32, 0x0000003f,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "dsra32", "!0r,!1r,0x!2h(!2d)", 4),
-    ENCODING_MAP(kMips64Dsllv, 0x00000014,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "dsllv", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMips64Dsrlv, 0x00000016,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "dsrlv", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMips64Dsrav, 0x00000017,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "dsrav", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMips64Dsubu, 0x0000002f,
-                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "dsubu", "!0r,!1r,!2r", 4),
-    ENCODING_MAP(kMips64Ld, 0xdc000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD,
-                 "ld", "!0r,!1d(!2r)", 4),
-    ENCODING_MAP(kMips64Lwu, 0x9c000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD,
-                 "lwu", "!0r,!1d(!2r)", 4),
-    ENCODING_MAP(kMips64Sd, 0xfc000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE02 | IS_STORE,
-                 "sd", "!0r,!1d(!2r)", 4),
-
-    // The following are pseudoinstructions.
-    ENCODING_MAP(kMipsDelta, 0x27e00000,  // It is implemented as daddiu for mips64.
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtUnused, 15, 0,
-                 kFmtUnused, -1, -1, IS_QUAD_OP | REG_DEF0 | REG_USE_LR |
-                 NEEDS_FIXUP, "addiu", "!0r,ra,0x!1h(!1d)", 4),
-    ENCODING_MAP(kMipsDeltaHi, 0x3C000000,
-                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_QUAD_OP | REG_DEF0 | NEEDS_FIXUP,
-                 "lui", "!0r,0x!1h(!1d)", 4),
-    ENCODING_MAP(kMipsDeltaLo, 0x34000000,
-                 kFmtBlt5_2, 16, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_QUAD_OP | REG_DEF0_USE0 | NEEDS_FIXUP,
-                 "ori", "!0r,!0r,0x!1h(!1d)", 4),
-    ENCODING_MAP(kMipsCurrPC, 0x04110001,
-                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH | REG_DEF_LR,
-                 "addiu", "ra,pc,8", 4),
-    ENCODING_MAP(kMipsUndefined, 0x64000000,
-                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, NO_OPERAND,
-                 "undefined", "", 4),
-};
-
-
-/*
- * Convert a short-form branch to long form.  Hopefully, this won't happen
- * very often because the PIC sequence is especially unfortunate.
- *
- * Orig conditional branch
- * -----------------------
- *      beq  rs,rt,target
- *
- * Long conditional branch
- * -----------------------
- *      bne  rs,rt,hop
- *      bal  .+8   ; rRA <- anchor
- *      lui  rAT, ((target-anchor) >> 16)
- * anchor:
- *      ori  rAT, rAT, ((target-anchor) & 0xffff)
- *      addu rAT, rAT, rRA
- *      jalr rZERO, rAT
- * hop:
- *
- * Orig unconditional branch
- * -------------------------
- *      b target
- *
- * Long unconditional branch
- * -----------------------
- *      bal  .+8   ; rRA <- anchor
- *      lui  rAT, ((target-anchor) >> 16)
- * anchor:
- *      ori  rAT, rAT, ((target-anchor) & 0xffff)
- *      addu rAT, rAT, rRA
- *      jalr rZERO, rAT
- *
- *
- * NOTE: An out-of-range bal isn't supported because it should
- * never happen with the current PIC model.
- */
-void MipsMir2Lir::ConvertShortToLongBranch(LIR* lir) {
-  // For conditional branches we'll need to reverse the sense
-  bool unconditional = false;
-  int opcode = lir->opcode;
-  int dalvik_offset = lir->dalvik_offset;
-  switch (opcode) {
-    case kMipsBal:
-      LOG(FATAL) << "long branch and link unsupported";
-      UNREACHABLE();
-    case kMipsB:
-      unconditional = true;
-      break;
-    case kMipsBeq:  opcode = kMipsBne; break;
-    case kMipsBne:  opcode = kMipsBeq; break;
-    case kMipsBeqz: opcode = kMipsBnez; break;
-    case kMipsBgez: opcode = kMipsBltz; break;
-    case kMipsBgtz: opcode = kMipsBlez; break;
-    case kMipsBlez: opcode = kMipsBgtz; break;
-    case kMipsBltz: opcode = kMipsBgez; break;
-    case kMipsBnez: opcode = kMipsBeqz; break;
-    default:
-      LOG(FATAL) << "Unexpected branch kind " << opcode;
-      UNREACHABLE();
-  }
-  LIR* hop_target = nullptr;
-  if (!unconditional) {
-    hop_target = RawLIR(dalvik_offset, kPseudoTargetLabel);
-    LIR* hop_branch = RawLIR(dalvik_offset, opcode, lir->operands[0],
-                             lir->operands[1], 0, 0, 0, hop_target);
-    InsertLIRBefore(lir, hop_branch);
-  }
-  LIR* curr_pc = RawLIR(dalvik_offset, kMipsCurrPC);
-  InsertLIRBefore(lir, curr_pc);
-  LIR* anchor = RawLIR(dalvik_offset, kPseudoTargetLabel);
-  LIR* delta_hi = RawLIR(dalvik_offset, kMipsDeltaHi, rAT, 0, WrapPointer(anchor), 0, 0,
-                         lir->target);
-  InsertLIRBefore(lir, delta_hi);
-  InsertLIRBefore(lir, anchor);
-  LIR* delta_lo = RawLIR(dalvik_offset, kMipsDeltaLo, rAT, 0, WrapPointer(anchor), 0, 0,
-                         lir->target);
-  InsertLIRBefore(lir, delta_lo);
-  LIR* addu = RawLIR(dalvik_offset, kMipsAddu, rAT, rAT, rRA);
-  InsertLIRBefore(lir, addu);
-  LIR* jalr = RawLIR(dalvik_offset, kMipsJalr, rZERO, rAT);
-  InsertLIRBefore(lir, jalr);
-  if (!unconditional) {
-    InsertLIRBefore(lir, hop_target);
-  }
-  NopLIR(lir);
-}
-
-/*
- * Assemble the LIR into binary instruction format.  Note that we may
- * discover that pc-relative displacements may not fit the selected
- * instruction.  In those cases we will try to substitute a new code
- * sequence or request that the trace be shortened and retried.
- */
-AssemblerStatus MipsMir2Lir::AssembleInstructions(CodeOffset start_addr) {
-  LIR *lir;
-  AssemblerStatus res = kSuccess;  // Assume success.
-
-  for (lir = first_lir_insn_; lir != nullptr; lir = NEXT_LIR(lir)) {
-    if (lir->opcode < 0) {
-      continue;
-    }
-
-    if (lir->flags.is_nop) {
-      continue;
-    }
-
-    if (lir->flags.fixup != kFixupNone) {
-      if (lir->opcode == kMipsDelta) {
-        /*
-         * The "Delta" pseudo-ops load the difference between
-         * two pc-relative locations into a the target register
-         * found in operands[0].  The delta is determined by
-         * (label2 - label1), where label1 is a standard
-         * kPseudoTargetLabel and is stored in operands[2].
-         * If operands[3] is null, then label2 is a kPseudoTargetLabel
-         * and is found in lir->target.  If operands[3] is non-nullptr,
-         * then it is a Switch/Data table.
-         */
-        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
-        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
-        int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
-        int delta = offset2 - offset1;
-        if ((delta & 0xffff) == delta && ((delta & 0x8000) == 0)) {
-          // Fits.
-          lir->operands[1] = delta;
-          if (cu_->target64) {
-            LIR *new_addiu = RawLIR(lir->dalvik_offset, kMips64Daddiu, lir->operands[0], rRAd,
-                                    delta);
-            InsertLIRBefore(lir, new_addiu);
-            NopLIR(lir);
-            res = kRetryAll;
-          }
-        } else {
-          // Doesn't fit - must expand to kMipsDelta[Hi|Lo] pair.
-          LIR *new_delta_hi = RawLIR(lir->dalvik_offset, kMipsDeltaHi, lir->operands[0], 0,
-                                     lir->operands[2], lir->operands[3], 0, lir->target);
-          InsertLIRBefore(lir, new_delta_hi);
-          LIR *new_delta_lo = RawLIR(lir->dalvik_offset, kMipsDeltaLo, lir->operands[0], 0,
-                                     lir->operands[2], lir->operands[3], 0, lir->target);
-          InsertLIRBefore(lir, new_delta_lo);
-          LIR *new_addu;
-          if (cu_->target64) {
-            new_addu = RawLIR(lir->dalvik_offset, kMips64Daddu, lir->operands[0], lir->operands[0],
-                              rRAd);
-          } else {
-            new_addu = RawLIR(lir->dalvik_offset, kMipsAddu, lir->operands[0], lir->operands[0],
-                              rRA);
-          }
-          InsertLIRBefore(lir, new_addu);
-          NopLIR(lir);
-          res = kRetryAll;
-        }
-      } else if (lir->opcode == kMipsDeltaLo) {
-        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
-        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
-        int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
-        int delta = offset2 - offset1;
-        lir->operands[1] = delta & 0xffff;
-      } else if (lir->opcode == kMipsDeltaHi) {
-        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
-        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
-        int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
-        int delta = offset2 - offset1;
-        lir->operands[1] = (delta >> 16) & 0xffff;
-      } else if (lir->opcode == kMipsB || lir->opcode == kMipsBal) {
-        LIR *target_lir = lir->target;
-        CodeOffset pc = lir->offset + 4;
-        CodeOffset target = target_lir->offset;
-        int delta = target - pc;
-        if (delta & 0x3) {
-          LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta;
-        }
-        if (delta > 131068 || delta < -131069) {
-          res = kRetryAll;
-          ConvertShortToLongBranch(lir);
-        } else {
-          lir->operands[0] = delta >> 2;
-        }
-      } else if (lir->opcode >= kMipsBeqz && lir->opcode <= kMipsBnez) {
-        LIR *target_lir = lir->target;
-        CodeOffset pc = lir->offset + 4;
-        CodeOffset target = target_lir->offset;
-        int delta = target - pc;
-        if (delta & 0x3) {
-          LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta;
-        }
-        if (delta > 131068 || delta < -131069) {
-          res = kRetryAll;
-          ConvertShortToLongBranch(lir);
-        } else {
-          lir->operands[1] = delta >> 2;
-        }
-      } else if (lir->opcode == kMipsBeq || lir->opcode == kMipsBne) {
-        LIR *target_lir = lir->target;
-        CodeOffset pc = lir->offset + 4;
-        CodeOffset target = target_lir->offset;
-        int delta = target - pc;
-        if (delta & 0x3) {
-          LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta;
-        }
-        if (delta > 131068 || delta < -131069) {
-          res = kRetryAll;
-          ConvertShortToLongBranch(lir);
-        } else {
-          lir->operands[2] = delta >> 2;
-        }
-      } else if (lir->opcode == kMipsJal) {
-        CodeOffset cur_pc = (start_addr + lir->offset + 4) & ~3;
-        CodeOffset target = lir->operands[0];
-        /* ensure PC-region branch can be used */
-        DCHECK_EQ((cur_pc & 0xF0000000), (target & 0xF0000000));
-        if (target & 0x3) {
-          LOG(FATAL) << "Jump target not multiple of 4: " << target;
-        }
-        lir->operands[0] =  target >> 2;
-      } else if (lir->opcode == kMipsLahi) { /* ld address hi (via lui) */
-        LIR *target_lir = lir->target;
-        CodeOffset target = start_addr + target_lir->offset;
-        lir->operands[1] = target >> 16;
-      } else if (lir->opcode == kMipsLalo) { /* ld address lo (via ori) */
-        LIR *target_lir = lir->target;
-        CodeOffset target = start_addr + target_lir->offset;
-        lir->operands[2] = lir->operands[2] + target;
-      }
-    }
-
-    /*
-     * If one of the pc-relative instructions expanded we'll have
-     * to make another pass.  Don't bother to fully assemble the
-     * instruction.
-     */
-    if (res != kSuccess) {
-      continue;
-    }
-    DCHECK(!IsPseudoLirOp(lir->opcode));
-    const MipsEncodingMap *encoder = &EncodingMap[lir->opcode];
-    uint32_t bits = encoder->skeleton;
-    int i;
-    for (i = 0; i < 4; i++) {
-      uint32_t operand;
-      uint32_t value;
-      operand = lir->operands[i];
-      switch (encoder->field_loc[i].kind) {
-        case kFmtUnused:
-          break;
-        case kFmtBitBlt:
-          if (encoder->field_loc[i].start == 0 && encoder->field_loc[i].end == 31) {
-            value = operand;
-          } else {
-            value = (operand << encoder->field_loc[i].start) &
-                ((1 << (encoder->field_loc[i].end + 1)) - 1);
-          }
-          bits |= value;
-          break;
-        case kFmtBlt5_2:
-          value = (operand & 0x1f);
-          bits |= (value << encoder->field_loc[i].start);
-          bits |= (value << encoder->field_loc[i].end);
-          break;
-        case kFmtDfp: {
-          // TODO: do we need to adjust now that we're using 64BitSolo?
-          DCHECK(RegStorage::IsDouble(operand)) << ", Operand = 0x" << std::hex << operand;
-          if (!cu_->target64) {
-            DCHECK_EQ((operand & 0x1), 0U);  // May only use even numbered registers for mips32.
-          }
-          value = (RegStorage::RegNum(operand) << encoder->field_loc[i].start) &
-              ((1 << (encoder->field_loc[i].end + 1)) - 1);
-          bits |= value;
-          break;
-        }
-        case kFmtSfp:
-          DCHECK(RegStorage::IsSingle(operand)) << ", Operand = 0x" << std::hex << operand;
-          value = (RegStorage::RegNum(operand) << encoder->field_loc[i].start) &
-              ((1 << (encoder->field_loc[i].end + 1)) - 1);
-          bits |= value;
-          break;
-        default:
-          LOG(FATAL) << "Bad encoder format: " << encoder->field_loc[i].kind;
-      }
-    }
-    // We only support little-endian MIPS.
-    code_buffer_.push_back(bits & 0xff);
-    code_buffer_.push_back((bits >> 8) & 0xff);
-    code_buffer_.push_back((bits >> 16) & 0xff);
-    code_buffer_.push_back((bits >> 24) & 0xff);
-    // TUNING: replace with proper delay slot handling.
-    if (encoder->size == 8) {
-      DCHECK(!IsPseudoLirOp(lir->opcode));
-      const MipsEncodingMap *encoder2 = &EncodingMap[kMipsNop];
-      uint32_t bits2 = encoder2->skeleton;
-      code_buffer_.push_back(bits2 & 0xff);
-      code_buffer_.push_back((bits2 >> 8) & 0xff);
-      code_buffer_.push_back((bits2 >> 16) & 0xff);
-      code_buffer_.push_back((bits2 >> 24) & 0xff);
-    }
-  }
-  return res;
-}
-
-size_t MipsMir2Lir::GetInsnSize(LIR* lir) {
-  DCHECK(!IsPseudoLirOp(lir->opcode));
-  return EncodingMap[lir->opcode].size;
-}
-
-// LIR offset assignment.
-// TODO: consolidate w/ Arm assembly mechanism.
-int MipsMir2Lir::AssignInsnOffsets() {
-  LIR* lir;
-  int offset = 0;
-
-  for (lir = first_lir_insn_; lir != nullptr; lir = NEXT_LIR(lir)) {
-    lir->offset = offset;
-    if (LIKELY(lir->opcode >= 0)) {
-      if (!lir->flags.is_nop) {
-        offset += lir->flags.size;
-      }
-    } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) {
-      if (offset & 0x2) {
-        offset += 2;
-        lir->operands[0] = 1;
-      } else {
-        lir->operands[0] = 0;
-      }
-    }
-    // Pseudo opcodes don't consume space.
-  }
-  return offset;
-}
-
-/*
- * Walk the compilation unit and assign offsets to instructions
- * and literals and compute the total size of the compiled unit.
- * TODO: consolidate w/ Arm assembly mechanism.
- */
-void MipsMir2Lir::AssignOffsets() {
-  int offset = AssignInsnOffsets();
-
-  // Const values have to be word aligned.
-  offset = RoundUp(offset, 4);
-
-  // Set up offsets for literals.
-  data_offset_ = offset;
-
-  offset = AssignLiteralOffset(offset);
-
-  offset = AssignSwitchTablesOffset(offset);
-
-  offset = AssignFillArrayDataOffset(offset);
-
-  total_size_ = offset;
-}
-
-/*
- * Go over each instruction in the list and calculate the offset from the top
- * before sending them off to the assembler. If out-of-range branch distance is
- * seen rearrange the instructions a bit to correct it.
- * TODO: consolidate w/ Arm assembly mechanism.
- */
-void MipsMir2Lir::AssembleLIR() {
-  cu_->NewTimingSplit("Assemble");
-  AssignOffsets();
-  int assembler_retries = 0;
-  /*
-   * Assemble here.  Note that we generate code with optimistic assumptions
-   * and if found now to work, we'll have to redo the sequence and retry.
-   */
-
-  while (true) {
-    AssemblerStatus res = AssembleInstructions(0);
-    if (res == kSuccess) {
-      break;
-    } else {
-      assembler_retries++;
-      if (assembler_retries > MAX_ASSEMBLER_RETRIES) {
-        CodegenDump();
-        LOG(FATAL) << "Assembler error - too many retries";
-      }
-      // Redo offsets and try again.
-      AssignOffsets();
-      code_buffer_.clear();
-    }
-  }
-
-  // Install literals.
-  InstallLiteralPools();
-
-  // Install switch tables.
-  InstallSwitchTables();
-
-  // Install fill array data.
-  InstallFillArrayData();
-
-  // Create the mapping table and native offset to reference map.
-  cu_->NewTimingSplit("PcMappingTable");
-  CreateMappingTables();
-
-  cu_->NewTimingSplit("GcMap");
-  CreateNativeGcMap();
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/mips/backend_mips.h b/compiler/dex/quick/mips/backend_mips.h
deleted file mode 100644
index f65e984..0000000
--- a/compiler/dex/quick/mips/backend_mips.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_MIPS_BACKEND_MIPS_H_
-#define ART_COMPILER_DEX_QUICK_MIPS_BACKEND_MIPS_H_
-
-namespace art {
-
-struct CompilationUnit;
-class Mir2Lir;
-class MIRGraph;
-class ArenaAllocator;
-
-Mir2Lir* MipsCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
-                           ArenaAllocator* const arena);
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_MIPS_BACKEND_MIPS_H_
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
deleted file mode 100644
index 4a736f3d..0000000
--- a/compiler/dex/quick/mips/call_mips.cc
+++ /dev/null
@@ -1,527 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This file contains codegen for the Mips ISA */
-
-#include "codegen_mips.h"
-
-#include "art_method.h"
-#include "base/logging.h"
-#include "dex/mir_graph.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "driver/compiler_driver.h"
-#include "driver/compiler_options.h"
-#include "entrypoints/quick/quick_entrypoints.h"
-#include "gc/accounting/card_table.h"
-#include "mips_lir.h"
-#include "mirror/object_array-inl.h"
-
-namespace art {
-
-bool MipsMir2Lir::GenSpecialCase(BasicBlock* bb ATTRIBUTE_UNUSED,
-                                 MIR* mir ATTRIBUTE_UNUSED,
-                                 const InlineMethod& special ATTRIBUTE_UNUSED) {
-  // TODO
-  return false;
-}
-
-/*
- * The lack of pc-relative loads on Mips presents somewhat of a challenge
- * for our PIC switch table strategy.  To materialize the current location
- * we'll do a dummy JAL and reference our tables using rRA as the
- * base register.  Note that rRA will be used both as the base to
- * locate the switch table data and as the reference base for the switch
- * target offsets stored in the table.  We'll use a special pseudo-instruction
- * to represent the jal and trigger the construction of the
- * switch table offsets (which will happen after final assembly and all
- * labels are fixed).
- *
- * The test loop will look something like:
- *
- *   ori   r_end, rZERO, #table_size  ; size in bytes
- *   jal   BaseLabel         ; stores "return address" (BaseLabel) in rRA
- *   nop                     ; opportunistically fill
- * BaseLabel:
- *   addiu r_base, rRA, <table> - <BaseLabel>    ; table relative to BaseLabel
-     addu  r_end, r_end, r_base                   ; end of table
- *   lw    r_val, [rSP, v_reg_off]                ; Test Value
- * loop:
- *   beq   r_base, r_end, done
- *   lw    r_key, 0(r_base)
- *   addu  r_base, 8
- *   bne   r_val, r_key, loop
- *   lw    r_disp, -4(r_base)
- *   addu  rRA, r_disp
- *   jalr  rZERO, rRA
- * done:
- *
- */
-void MipsMir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
-  const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  // Add the table to the list - we'll process it later.
-  SwitchTable* tab_rec =
-      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
-  tab_rec->switch_mir = mir;
-  tab_rec->table = table;
-  tab_rec->vaddr = current_dalvik_offset_;
-  int elements = table[1];
-  switch_tables_.push_back(tab_rec);
-
-  // The table is composed of 8-byte key/disp pairs.
-  int byte_size = elements * 8;
-
-  int size_hi = byte_size >> 16;
-  int size_lo = byte_size & 0xffff;
-
-  RegStorage r_end = AllocPtrSizeTemp();
-  if (size_hi) {
-    NewLIR2(kMipsLui, r_end.GetReg(), size_hi);
-  }
-  // Must prevent code motion for the curr pc pair.
-  GenBarrier();  // Scheduling barrier
-  NewLIR0(kMipsCurrPC);  // Really a jal to .+8.
-  // Now, fill the branch delay slot.
-  if (size_hi) {
-    NewLIR3(kMipsOri, r_end.GetReg(), r_end.GetReg(), size_lo);
-  } else {
-    NewLIR3(kMipsOri, r_end.GetReg(), rZERO, size_lo);
-  }
-  GenBarrier();  // Scheduling barrier.
-
-  // Construct BaseLabel and set up table base register.
-  LIR* base_label = NewLIR0(kPseudoTargetLabel);
-  // Remember base label so offsets can be computed later.
-  tab_rec->anchor = base_label;
-  RegStorage r_base = AllocPtrSizeTemp();
-  NewLIR4(kMipsDelta, r_base.GetReg(), 0, WrapPointer(base_label), WrapPointer(tab_rec));
-  OpRegRegReg(kOpAdd, r_end, r_end, r_base);
-
-  // Grab switch test value.
-  rl_src = LoadValue(rl_src, kCoreReg);
-
-  // Test loop.
-  RegStorage r_key = AllocTemp();
-  LIR* loop_label = NewLIR0(kPseudoTargetLabel);
-  LIR* exit_branch = OpCmpBranch(kCondEq, r_base, r_end, nullptr);
-  Load32Disp(r_base, 0, r_key);
-  OpRegImm(kOpAdd, r_base, 8);
-  OpCmpBranch(kCondNe, rl_src.reg, r_key, loop_label);
-  RegStorage r_disp = AllocTemp();
-  Load32Disp(r_base, -4, r_disp);
-  const RegStorage rs_ra = TargetPtrReg(kLr);
-  OpRegRegReg(kOpAdd, rs_ra, rs_ra, r_disp);
-  OpReg(kOpBx, rs_ra);
-  // Loop exit.
-  LIR* exit_label = NewLIR0(kPseudoTargetLabel);
-  exit_branch->target = exit_label;
-}
-
-/*
- * Code pattern will look something like:
- *
- *   lw    r_val
- *   jal   BaseLabel         ; stores "return address" (BaseLabel) in rRA
- *   nop                     ; opportunistically fill
- *   [subiu r_val, bias]      ; Remove bias if low_val != 0
- *   bound check -> done
- *   lw    r_disp, [rRA, r_val]
- *   addu  rRA, r_disp
- *   jalr  rZERO, rRA
- * done:
- */
-void MipsMir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
-  const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  // Add the table to the list - we'll process it later.
-  SwitchTable* tab_rec =
-      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
-  tab_rec->switch_mir = mir;
-  tab_rec->table = table;
-  tab_rec->vaddr = current_dalvik_offset_;
-  int size = table[1];
-  switch_tables_.push_back(tab_rec);
-
-  // Get the switch value.
-  rl_src = LoadValue(rl_src, kCoreReg);
-
-  // Prepare the bias.  If too big, handle 1st stage here.
-  int low_key = s4FromSwitchData(&table[2]);
-  bool large_bias = false;
-  RegStorage r_key;
-  if (low_key == 0) {
-    r_key = rl_src.reg;
-  } else if ((low_key & 0xffff) != low_key) {
-    r_key = AllocTemp();
-    LoadConstant(r_key, low_key);
-    large_bias = true;
-  } else {
-    r_key = AllocTemp();
-  }
-
-  // Must prevent code motion for the curr pc pair.
-  GenBarrier();
-  NewLIR0(kMipsCurrPC);  // Really a jal to .+8.
-  // Now, fill the branch delay slot with bias strip.
-  if (low_key == 0) {
-    NewLIR0(kMipsNop);
-  } else {
-    if (large_bias) {
-      OpRegRegReg(kOpSub, r_key, rl_src.reg, r_key);
-    } else {
-      OpRegRegImm(kOpSub, r_key, rl_src.reg, low_key);
-    }
-  }
-  GenBarrier();  // Scheduling barrier.
-
-  // Construct BaseLabel and set up table base register.
-  LIR* base_label = NewLIR0(kPseudoTargetLabel);
-  // Remember base label so offsets can be computed later.
-  tab_rec->anchor = base_label;
-
-  // Bounds check - if < 0 or >= size continue following switch.
-  LIR* branch_over = OpCmpImmBranch(kCondHi, r_key, size-1, nullptr);
-
-  // Materialize the table base pointer.
-  RegStorage r_base = AllocPtrSizeTemp();
-  NewLIR4(kMipsDelta, r_base.GetReg(), 0, WrapPointer(base_label), WrapPointer(tab_rec));
-
-  // Load the displacement from the switch table.
-  RegStorage r_disp = AllocTemp();
-  LoadBaseIndexed(r_base, r_key, r_disp, 2, k32);
-
-  // Add to rRA and go.
-  const RegStorage rs_ra = TargetPtrReg(kLr);
-  OpRegRegReg(kOpAdd, rs_ra, rs_ra, r_disp);
-  OpReg(kOpBx, rs_ra);
-
-  // Branch_over target here.
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  branch_over->target = target;
-}
-
-void MipsMir2Lir::GenMoveException(RegLocation rl_dest) {
-  int ex_offset = cu_->target64 ? Thread::ExceptionOffset<8>().Int32Value() :
-      Thread::ExceptionOffset<4>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
-  RegStorage reset_reg = AllocTempRef();
-  LoadRefDisp(TargetPtrReg(kSelf), ex_offset, rl_result.reg, kNotVolatile);
-  LoadConstant(reset_reg, 0);
-  StoreRefDisp(TargetPtrReg(kSelf), ex_offset, reset_reg, kNotVolatile);
-  FreeTemp(reset_reg);
-  StoreValue(rl_dest, rl_result);
-}
-
-void MipsMir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
-  RegStorage reg_card_base = AllocPtrSizeTemp();
-  RegStorage reg_card_no = AllocPtrSizeTemp();
-  if (cu_->target64) {
-    // NOTE: native pointer.
-    LoadWordDisp(TargetPtrReg(kSelf), Thread::CardTableOffset<8>().Int32Value(), reg_card_base);
-    OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
-    StoreBaseIndexed(reg_card_base, reg_card_no, As32BitReg(reg_card_base), 0, kUnsignedByte);
-  } else {
-    // NOTE: native pointer.
-    LoadWordDisp(TargetPtrReg(kSelf), Thread::CardTableOffset<4>().Int32Value(), reg_card_base);
-    OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
-    StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
-  }
-  FreeTemp(reg_card_base);
-  FreeTemp(reg_card_no);
-}
-
-static dwarf::Reg DwarfCoreReg(int num) {
-  return dwarf::Reg::MipsCore(num);
-}
-
-void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
-  DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);
-  int spill_count = num_core_spills_ + num_fp_spills_;
-  /*
-   * On entry, A0, A1, A2 & A3 are live. On Mips64, A4, A5, A6 & A7 are also live.
-   * Let the register allocation mechanism know so it doesn't try to use any of them when
-   * expanding the frame or flushing.
-   */
-  const RegStorage arg0 = TargetReg(kArg0);
-  const RegStorage arg1 = TargetReg(kArg1);
-  const RegStorage arg2 = TargetReg(kArg2);
-  const RegStorage arg3 = TargetReg(kArg3);
-  const RegStorage arg4 = TargetReg(kArg4);
-  const RegStorage arg5 = TargetReg(kArg5);
-  const RegStorage arg6 = TargetReg(kArg6);
-  const RegStorage arg7 = TargetReg(kArg7);
-
-  LockTemp(arg0);
-  LockTemp(arg1);
-  LockTemp(arg2);
-  LockTemp(arg3);
-  if (cu_->target64) {
-    LockTemp(arg4);
-    LockTemp(arg5);
-    LockTemp(arg6);
-    LockTemp(arg7);
-  }
-
-  bool skip_overflow_check;
-  InstructionSet target = (cu_->target64) ? kMips64 : kMips;
-  int ptr_size = cu_->target64 ? 8 : 4;
-
-  /*
-   * We can safely skip the stack overflow check if we're
-   * a leaf *and* our frame size < fudge factor.
-   */
-
-  skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, target);
-  RegStorage check_reg = AllocPtrSizeTemp();
-  RegStorage new_sp = AllocPtrSizeTemp();
-  const RegStorage rs_sp = TargetPtrReg(kSp);
-  const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(target);
-  const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes;
-  bool generate_explicit_stack_overflow_check = large_frame ||
-    !cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks();
-
-  if (!skip_overflow_check) {
-    if (generate_explicit_stack_overflow_check) {
-      // Load stack limit.
-      if (cu_->target64) {
-        LoadWordDisp(TargetPtrReg(kSelf), Thread::StackEndOffset<8>().Int32Value(), check_reg);
-      } else {
-        Load32Disp(TargetPtrReg(kSelf), Thread::StackEndOffset<4>().Int32Value(), check_reg);
-      }
-    } else {
-      // Implicit stack overflow check.
-      // Generate a load from [sp, #-overflowsize].  If this is in the stack
-      // redzone we will get a segmentation fault.
-      Load32Disp(rs_sp, -kStackOverflowReservedUsableBytes, rs_rZERO);
-      MarkPossibleStackOverflowException();
-    }
-  }
-  // Spill core callee saves.
-  SpillCoreRegs();
-  // NOTE: promotion of FP regs currently unsupported, thus no FP spill.
-  DCHECK_EQ(num_fp_spills_, 0);
-  const int frame_sub = frame_size_ - spill_count * ptr_size;
-  if (!skip_overflow_check && generate_explicit_stack_overflow_check) {
-    class StackOverflowSlowPath : public LIRSlowPath {
-     public:
-      StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace)
-          : LIRSlowPath(m2l, branch), sp_displace_(sp_displace) {
-      }
-      void Compile() OVERRIDE {
-        m2l_->ResetRegPool();
-        m2l_->ResetDefTracking();
-        GenerateTargetLabel(kPseudoThrowTarget);
-        // RA is offset 0 since we push in reverse order.
-        m2l_->LoadWordDisp(m2l_->TargetPtrReg(kSp), 0, m2l_->TargetPtrReg(kLr));
-        m2l_->OpRegImm(kOpAdd, m2l_->TargetPtrReg(kSp), sp_displace_);
-        m2l_->cfi().AdjustCFAOffset(-sp_displace_);
-        m2l_->ClobberCallerSave();
-        RegStorage r_tgt = m2l_->CallHelperSetup(kQuickThrowStackOverflow);  // Doesn't clobber LR.
-        m2l_->CallHelper(r_tgt, kQuickThrowStackOverflow, false /* MarkSafepointPC */,
-                         false /* UseLink */);
-        m2l_->cfi().AdjustCFAOffset(sp_displace_);
-      }
-
-     private:
-      const size_t sp_displace_;
-    };
-    OpRegRegImm(kOpSub, new_sp, rs_sp, frame_sub);
-    LIR* branch = OpCmpBranch(kCondUlt, new_sp, check_reg, nullptr);
-    AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_count * ptr_size));
-    // TODO: avoid copy for small frame sizes.
-    OpRegCopy(rs_sp, new_sp);  // Establish stack.
-    cfi_.AdjustCFAOffset(frame_sub);
-  } else {
-    // Here if skip_overflow_check or doing implicit stack overflow check.
-    // Just make room on the stack for the frame now.
-    OpRegImm(kOpSub, rs_sp, frame_sub);
-    cfi_.AdjustCFAOffset(frame_sub);
-  }
-
-  FlushIns(ArgLocs, rl_method);
-
-  FreeTemp(arg0);
-  FreeTemp(arg1);
-  FreeTemp(arg2);
-  FreeTemp(arg3);
-  if (cu_->target64) {
-    FreeTemp(arg4);
-    FreeTemp(arg5);
-    FreeTemp(arg6);
-    FreeTemp(arg7);
-  }
-}
-
-void MipsMir2Lir::GenExitSequence() {
-  cfi_.RememberState();
-  /*
-   * In the exit path, rMIPS_RET0/rMIPS_RET1 are live - make sure they aren't
-   * allocated by the register utilities as temps.
-   */
-  LockTemp(TargetPtrReg(kRet0));
-  LockTemp(TargetPtrReg(kRet1));
-
-  UnSpillCoreRegs();
-  OpReg(kOpBx, TargetPtrReg(kLr));
-  // The CFI should be restored for any code that follows the exit block.
-  cfi_.RestoreState();
-  cfi_.DefCFAOffset(frame_size_);
-}
-
-void MipsMir2Lir::GenSpecialExitSequence() {
-  OpReg(kOpBx, TargetPtrReg(kLr));
-}
-
-void MipsMir2Lir::GenSpecialEntryForSuspend() {
-  // Keep 16-byte stack alignment - push A0, i.e. ArtMethod*, 2 filler words and RA for mips32,
-  // but A0 and RA for mips64.
-  core_spill_mask_ = (1u << TargetPtrReg(kLr).GetRegNum());
-  num_core_spills_ = 1u;
-  fp_spill_mask_ = 0u;
-  num_fp_spills_ = 0u;
-  frame_size_ = 16u;
-  core_vmap_table_.clear();
-  fp_vmap_table_.clear();
-  const RegStorage rs_sp = TargetPtrReg(kSp);
-  OpRegImm(kOpSub, rs_sp, frame_size_);
-  cfi_.AdjustCFAOffset(frame_size_);
-  StoreWordDisp(rs_sp, frame_size_ - (cu_->target64 ? 8 : 4), TargetPtrReg(kLr));
-  cfi_.RelOffset(DwarfCoreReg(rRA), frame_size_ - (cu_->target64 ? 8 : 4));
-  StoreWordDisp(rs_sp, 0, TargetPtrReg(kArg0));
-  // Do not generate CFI for scratch register A0.
-}
-
-void MipsMir2Lir::GenSpecialExitForSuspend() {
-  // Pop the frame. Don't pop ArtMethod*, it's no longer needed.
-  const RegStorage rs_sp = TargetPtrReg(kSp);
-  LoadWordDisp(rs_sp, frame_size_ - (cu_->target64 ? 8 : 4), TargetPtrReg(kLr));
-  cfi_.Restore(DwarfCoreReg(rRA));
-  OpRegImm(kOpAdd, rs_sp, frame_size_);
-  cfi_.AdjustCFAOffset(-frame_size_);
-}
-
-/*
- * Bit of a hack here - in the absence of a real scheduling pass,
- * emit the next instruction in static & direct invoke sequences.
- */
-int MipsMir2Lir::MipsNextSDCallInsn(CompilationUnit* cu, CallInfo* info, int state,
-                                    const MethodReference& target_method, uint32_t,
-                                    uintptr_t direct_code, uintptr_t direct_method,
-                                    InvokeType type) {
-  MipsMir2Lir* cg = static_cast<MipsMir2Lir*>(cu->cg.get());
-  if (info->string_init_offset != 0) {
-    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
-    switch (state) {
-    case 0: {  // Grab target method* from thread pointer
-      cg->LoadWordDisp(cg->TargetPtrReg(kSelf), info->string_init_offset, arg0_ref);
-      break;
-    }
-    case 1:  // Grab the code from the method*
-      if (direct_code == 0) {
-        int32_t offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-            InstructionSetPointerSize(cu->instruction_set)).Int32Value();
-        cg->LoadWordDisp(arg0_ref, offset, cg->TargetPtrReg(kInvokeTgt));
-      }
-      break;
-    default:
-      return -1;
-    }
-  } else if (direct_code != 0 && direct_method != 0) {
-    switch (state) {
-      case 0:  // Get the current Method* [sets kArg0]
-        if (direct_code != static_cast<uintptr_t>(-1)) {
-          if (cu->target64) {
-            cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
-          } else {
-            cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
-          }
-        } else {
-          cg->LoadCodeAddress(target_method, type, kInvokeTgt);
-        }
-        if (direct_method != static_cast<uintptr_t>(-1)) {
-          if (cu->target64) {
-            cg->LoadConstantWide(cg->TargetReg(kArg0, kRef), direct_method);
-          } else {
-            cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method);
-          }
-        } else {
-          cg->LoadMethodAddress(target_method, type, kArg0);
-        }
-        break;
-      default:
-        return -1;
-    }
-  } else {
-    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
-    switch (state) {
-      case 0:  // Get the current Method* [sets kArg0]
-        // TUNING: we can save a reg copy if Method* has been promoted.
-        cg->LoadCurrMethodDirect(arg0_ref);
-        break;
-      case 1:  // Get method->dex_cache_resolved_methods_
-        cg->LoadBaseDisp(arg0_ref,
-                         ArtMethod::DexCacheResolvedMethodsOffset(
-                             cu->target64 ? kMips64PointerSize : kMipsPointerSize).Int32Value(),
-                         arg0_ref,
-                         cu->target64 ? k64 : k32,
-                         kNotVolatile);
-        // Set up direct code if known.
-        if (direct_code != 0) {
-          if (direct_code != static_cast<uintptr_t>(-1)) {
-            if (cu->target64) {
-              cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
-            } else {
-              cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
-            }
-          } else {
-            CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
-            cg->LoadCodeAddress(target_method, type, kInvokeTgt);
-          }
-        }
-        break;
-      case 2: {
-        // Grab target method*
-        CHECK_EQ(cu->dex_file, target_method.dex_file);
-        const size_t pointer_size = GetInstructionSetPointerSize(cu->instruction_set);
-        cg->LoadWordDisp(arg0_ref,
-                         cg->GetCachePointerOffset(target_method.dex_method_index,
-                                                   pointer_size),
-                         arg0_ref);
-        break;
-      }
-      case 3:  // Grab the code from the method*
-        if (direct_code == 0) {
-          int32_t offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-              InstructionSetPointerSize(cu->instruction_set)).Int32Value();
-          // Get the compiled code address [use *alt_from or kArg0, set kInvokeTgt]
-          cg->LoadWordDisp(arg0_ref, offset, cg->TargetPtrReg(kInvokeTgt));
-        }
-        break;
-      default:
-        return -1;
-    }
-  }
-  return state + 1;
-}
-
-NextCallInsn MipsMir2Lir::GetNextSDCallInsn() {
-  return MipsNextSDCallInsn;
-}
-
-LIR* MipsMir2Lir::GenCallInsn(const MirMethodLoweringInfo& method_info ATTRIBUTE_UNUSED) {
-  return OpReg(kOpBlx, TargetPtrReg(kInvokeTgt));
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
deleted file mode 100644
index 378b9a0..0000000
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ /dev/null
@@ -1,353 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_MIPS_CODEGEN_MIPS_H_
-#define ART_COMPILER_DEX_QUICK_MIPS_CODEGEN_MIPS_H_
-
-#include "dex/compiler_ir.h"
-#include "dex/quick/mir_to_lir.h"
-#include "mips_lir.h"
-
-namespace art {
-
-struct CompilationUnit;
-
-class MipsMir2Lir FINAL : public Mir2Lir {
- protected:
-  class InToRegStorageMipsMapper : public InToRegStorageMapper {
-   public:
-    explicit InToRegStorageMipsMapper(Mir2Lir* m2l) : m2l_(m2l), cur_core_reg_(0), cur_fpu_reg_(0)
-        {}
-    virtual RegStorage GetNextReg(ShortyArg arg);
-    virtual void Reset() OVERRIDE {
-      cur_core_reg_ = 0;
-      cur_fpu_reg_ = 0;
-    }
-   protected:
-    Mir2Lir* m2l_;
-   private:
-    size_t cur_core_reg_;
-    size_t cur_fpu_reg_;
-  };
-
-  class InToRegStorageMips64Mapper : public InToRegStorageMapper {
-   public:
-    explicit InToRegStorageMips64Mapper(Mir2Lir* m2l) : m2l_(m2l), cur_arg_reg_(0) {}
-    virtual RegStorage GetNextReg(ShortyArg arg);
-    virtual void Reset() OVERRIDE {
-      cur_arg_reg_ = 0;
-    }
-   protected:
-    Mir2Lir* m2l_;
-   private:
-    size_t cur_arg_reg_;
-  };
-
-  InToRegStorageMips64Mapper in_to_reg_storage_mips64_mapper_;
-  InToRegStorageMipsMapper in_to_reg_storage_mips_mapper_;
-  InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE {
-    InToRegStorageMapper* res;
-    if (cu_->target64) {
-      res = &in_to_reg_storage_mips64_mapper_;
-    } else {
-      res = &in_to_reg_storage_mips_mapper_;
-    }
-    res->Reset();
-    return res;
-  }
-
- public:
-  MipsMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
-
-  // Required for target - codegen utilities.
-  bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
-                          RegLocation rl_dest, int lit);
-  bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
-  void GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1, int32_t constant)
-  OVERRIDE;
-  void GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1, int64_t constant)
-  OVERRIDE;
-  LIR* CheckSuspendUsingLoad() OVERRIDE;
-  RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE;
-  void ForceImplicitNullCheck(RegStorage reg, int opt_flags, bool is_wide);
-  LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
-                    VolatileKind is_volatile) OVERRIDE;
-  LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
-                       OpSize size) OVERRIDE;
-  LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
-  LIR* LoadConstantWideNoClobber(RegStorage r_dest, int64_t value);
-  LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
-  LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size,
-                     VolatileKind is_volatile) OVERRIDE;
-  LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
-                        OpSize size) OVERRIDE;
-  LIR* GenAtomic64Load(RegStorage r_base, int displacement, RegStorage r_dest);
-  LIR* GenAtomic64Store(RegStorage r_base, int displacement, RegStorage r_src);
-
-  /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
-  void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
-
-  // Required for target - register utilities.
-  RegStorage Solo64ToPair64(RegStorage reg);
-  RegStorage Fp64ToSolo32(RegStorage reg);
-  RegStorage TargetReg(SpecialTargetRegister reg);
-  RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) OVERRIDE;
-  RegStorage TargetPtrReg(SpecialTargetRegister reg) OVERRIDE {
-    return TargetReg(reg, cu_->target64 ? kWide : kNotWide);
-  }
-  RegLocation GetReturnAlt();
-  RegLocation GetReturnWideAlt();
-  RegLocation LocCReturn();
-  RegLocation LocCReturnRef();
-  RegLocation LocCReturnDouble();
-  RegLocation LocCReturnFloat();
-  RegLocation LocCReturnWide();
-  ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE;
-  void AdjustSpillMask();
-  void ClobberCallerSave();
-  void FreeCallTemps();
-  void LockCallTemps();
-  void CompilerInitializeRegAlloc();
-
-  // Required for target - miscellaneous.
-  void AssembleLIR();
-  int AssignInsnOffsets();
-  void AssignOffsets();
-  AssemblerStatus AssembleInstructions(CodeOffset start_addr);
-  void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE;
-  void SetupTargetResourceMasks(LIR* lir, uint64_t flags, ResourceMask* use_mask,
-                                ResourceMask* def_mask) OVERRIDE;
-  const char* GetTargetInstFmt(int opcode);
-  const char* GetTargetInstName(int opcode);
-  std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
-  ResourceMask GetPCUseDefEncoding() const OVERRIDE;
-  uint64_t GetTargetInstFlags(int opcode);
-  size_t GetInsnSize(LIR* lir) OVERRIDE;
-  bool IsUnconditionalBranch(LIR* lir);
-
-  // Get the register class for load/store of a field.
-  RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
-
-  // Required for target - Dalvik-level generators.
-  void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                      RegLocation lr_shift);
-  void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                         RegLocation rl_src2, int flags);
-  void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
-                   RegLocation rl_dest, int scale);
-  void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
-                   RegLocation rl_src, int scale, bool card_mark);
-  void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                         RegLocation rl_shift, int flags);
-  void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                        RegLocation rl_src2);
-  void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                       RegLocation rl_src2);
-  void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                RegLocation rl_src2);
-  void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
-  bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
-  bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
-  bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
-  bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
-  bool GenInlinedSqrt(CallInfo* info);
-  bool GenInlinedPeek(CallInfo* info, OpSize size);
-  bool GenInlinedPoke(CallInfo* info, OpSize size);
-  void GenIntToLong(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
-  void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                      RegLocation rl_src2, int flags) OVERRIDE;
-  RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
-  RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
-  void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-  void GenDivZeroCheckWide(RegStorage reg);
-  void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
-  void GenExitSequence();
-  void GenSpecialExitSequence() OVERRIDE;
-  void GenSpecialEntryForSuspend() OVERRIDE;
-  void GenSpecialExitForSuspend() OVERRIDE;
-  void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
-  void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
-  void GenSelect(BasicBlock* bb, MIR* mir);
-  void GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
-                        int32_t true_val, int32_t false_val, RegStorage rs_dest,
-                        RegisterClass dest_reg_class) OVERRIDE;
-  bool GenMemBarrier(MemBarrierKind barrier_kind);
-  void GenMoveException(RegLocation rl_dest);
-  void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
-                                     int first_bit, int second_bit);
-  void GenNegDouble(RegLocation rl_dest, RegLocation rl_src);
-  void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
-  void GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src);
-  void GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src);
-  bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
-
-  // Required for target - single operation generators.
-  LIR* OpUnconditionalBranch(LIR* target);
-  LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target);
-  LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target);
-  LIR* OpCondBranch(ConditionCode cc, LIR* target);
-  LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target);
-  LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src);
-  LIR* OpIT(ConditionCode cond, const char* guide);
-  void OpEndIT(LIR* it);
-  LIR* OpMem(OpKind op, RegStorage r_base, int disp);
-  void OpPcRelLoad(RegStorage reg, LIR* target);
-  LIR* OpReg(OpKind op, RegStorage r_dest_src);
-  void OpRegCopy(RegStorage r_dest, RegStorage r_src);
-  LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
-  LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value);
-  LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2);
-  LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type);
-  LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type);
-  LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src);
-  LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
-  LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
-  LIR* OpTestSuspend(LIR* target);
-  LIR* OpVldm(RegStorage r_base, int count);
-  LIR* OpVstm(RegStorage r_base, int count);
-  void OpRegCopyWide(RegStorage dest, RegStorage src);
-
-  // TODO: collapse r_dest.
-  LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size);
-  // TODO: collapse r_src.
-  LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
-  void SpillCoreRegs();
-  void UnSpillCoreRegs();
-  static const MipsEncodingMap EncodingMap[kMipsLast];
-  bool InexpensiveConstantInt(int32_t value);
-  bool InexpensiveConstantFloat(int32_t value);
-  bool InexpensiveConstantLong(int64_t value);
-  bool InexpensiveConstantDouble(int64_t value);
-
-  bool WideGPRsAreAliases() const OVERRIDE {
-    return cu_->target64;  // Wide GPRs are formed by pairing on mips32.
-  }
-  bool WideFPRsAreAliases() const OVERRIDE {
-    return cu_->target64;  // Wide FPRs are formed by pairing on mips32.
-  }
-
-  LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
-
-  RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_div,
-                        int flags) OVERRIDE;
-  RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) OVERRIDE;
-  NextCallInsn GetNextSDCallInsn() OVERRIDE;
-  LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE;
-
-  // Unimplemented intrinsics.
-  bool GenInlinedCharAt(CallInfo* info ATTRIBUTE_UNUSED) OVERRIDE {
-    return false;
-  }
-  bool GenInlinedAbsInt(CallInfo* info ATTRIBUTE_UNUSED) OVERRIDE {
-    return false;
-  }
-  bool GenInlinedAbsLong(CallInfo* info ATTRIBUTE_UNUSED) OVERRIDE {
-    return false;
-  }
-  bool GenInlinedIndexOf(CallInfo* info ATTRIBUTE_UNUSED, bool zero_based ATTRIBUTE_UNUSED)
-  OVERRIDE {
-    return false;
-  }
-
-  // True if isa is rev R6.
-  const bool isaIsR6_;
-
-  // True if floating point unit is 32bits.
-  const bool fpuIs32Bit_;
-
- private:
-  static int MipsNextSDCallInsn(CompilationUnit* cu, CallInfo* info, int state,
-                                const MethodReference& target_method, uint32_t,
-                                uintptr_t direct_code, uintptr_t direct_method,
-                                InvokeType type);
-
-  void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-  void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-  void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-
-  void ConvertShortToLongBranch(LIR* lir);
-
-  // Mips64 specific long gen methods:
-  void GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-  void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
-  void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-  void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                     RegLocation rl_src2, bool is_div, int flags);
-  void GenConversionCall(QuickEntrypointEnum trampoline, RegLocation rl_dest, RegLocation rl_src,
-                         RegisterClass reg_class);
-  RegStorage AllocPtrSizeTemp(bool required = true);
-
-  /**
-   * @param reg #RegStorage containing a Solo64 input register (e.g. @c a1 or @c d0).
-   * @return A Solo32 with the same register number as the @p reg (e.g. @c a1 or @c f0).
-   * @see As64BitReg
-   */
-  RegStorage As32BitReg(RegStorage reg) {
-    DCHECK(!reg.IsPair());
-    if ((kFailOnSizeError || kReportSizeError) && !reg.Is64Bit()) {
-      if (kFailOnSizeError) {
-        LOG(FATAL) << "Expected 64b register";
-      } else {
-        LOG(WARNING) << "Expected 64b register";
-        return reg;
-      }
-    }
-    RegStorage ret_val = RegStorage(RegStorage::k32BitSolo,
-                                    reg.GetRawBits() & RegStorage::kRegTypeMask);
-    DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k32SoloStorageMask)
-              ->GetReg().GetReg(),
-              ret_val.GetReg());
-    return ret_val;
-  }
-
-  /**
-   * @param reg #RegStorage containing a Solo32 input register (e.g. @c a1 or @c f0).
-   * @return A Solo64 with the same register number as the @p reg (e.g. @c a1 or @c d0).
-   */
-  RegStorage As64BitReg(RegStorage reg) {
-    DCHECK(!reg.IsPair());
-    if ((kFailOnSizeError || kReportSizeError) && !reg.Is32Bit()) {
-      if (kFailOnSizeError) {
-        LOG(FATAL) << "Expected 32b register";
-      } else {
-        LOG(WARNING) << "Expected 32b register";
-        return reg;
-      }
-    }
-    RegStorage ret_val = RegStorage(RegStorage::k64BitSolo,
-                                    reg.GetRawBits() & RegStorage::kRegTypeMask);
-    DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k64SoloStorageMask)
-              ->GetReg().GetReg(),
-              ret_val.GetReg());
-    return ret_val;
-  }
-
-  RegStorage Check64BitReg(RegStorage reg) {
-    if ((kFailOnSizeError || kReportSizeError) && !reg.Is64Bit()) {
-      if (kFailOnSizeError) {
-        LOG(FATAL) << "Checked for 64b register";
-      } else {
-        LOG(WARNING) << "Checked for 64b register";
-        return As64BitReg(reg);
-      }
-    }
-    return reg;
-  }
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_MIPS_CODEGEN_MIPS_H_
diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc
deleted file mode 100644
index 52706df..0000000
--- a/compiler/dex/quick/mips/fp_mips.cc
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "codegen_mips.h"
-
-#include "base/logging.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "entrypoints/quick/quick_entrypoints.h"
-#include "mips_lir.h"
-
-namespace art {
-
-void MipsMir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
-                                  RegLocation rl_src1, RegLocation rl_src2) {
-  int op = kMipsNop;
-  RegLocation rl_result;
-
-  /*
-   * Don't attempt to optimize register usage since these opcodes call out to
-   * the handlers.
-   */
-  switch (opcode) {
-    case Instruction::ADD_FLOAT_2ADDR:
-    case Instruction::ADD_FLOAT:
-      op = kMipsFadds;
-      break;
-    case Instruction::SUB_FLOAT_2ADDR:
-    case Instruction::SUB_FLOAT:
-      op = kMipsFsubs;
-      break;
-    case Instruction::DIV_FLOAT_2ADDR:
-    case Instruction::DIV_FLOAT:
-      op = kMipsFdivs;
-      break;
-    case Instruction::MUL_FLOAT_2ADDR:
-    case Instruction::MUL_FLOAT:
-      op = kMipsFmuls;
-      break;
-    case Instruction::REM_FLOAT_2ADDR:
-    case Instruction::REM_FLOAT:
-      FlushAllRegs();   // Send everything to home location.
-      CallRuntimeHelperRegLocationRegLocation(kQuickFmodf, rl_src1, rl_src2, false);
-      rl_result = GetReturn(kFPReg);
-      StoreValue(rl_dest, rl_result);
-      return;
-    case Instruction::NEG_FLOAT:
-      GenNegFloat(rl_dest, rl_src1);
-      return;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << opcode;
-  }
-  rl_src1 = LoadValue(rl_src1, kFPReg);
-  rl_src2 = LoadValue(rl_src2, kFPReg);
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  StoreValue(rl_dest, rl_result);
-}
-
-void MipsMir2Lir::GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest,
-                                   RegLocation rl_src1, RegLocation rl_src2) {
-  int op = kMipsNop;
-  RegLocation rl_result;
-
-  switch (opcode) {
-    case Instruction::ADD_DOUBLE_2ADDR:
-    case Instruction::ADD_DOUBLE:
-      op = kMipsFaddd;
-      break;
-    case Instruction::SUB_DOUBLE_2ADDR:
-    case Instruction::SUB_DOUBLE:
-      op = kMipsFsubd;
-      break;
-    case Instruction::DIV_DOUBLE_2ADDR:
-    case Instruction::DIV_DOUBLE:
-      op = kMipsFdivd;
-      break;
-    case Instruction::MUL_DOUBLE_2ADDR:
-    case Instruction::MUL_DOUBLE:
-      op = kMipsFmuld;
-      break;
-    case Instruction::REM_DOUBLE_2ADDR:
-    case Instruction::REM_DOUBLE:
-      FlushAllRegs();   // Send everything to home location.
-      CallRuntimeHelperRegLocationRegLocation(kQuickFmod, rl_src1, rl_src2, false);
-      rl_result = GetReturnWide(kFPReg);
-      StoreValueWide(rl_dest, rl_result);
-      return;
-    case Instruction::NEG_DOUBLE:
-      GenNegDouble(rl_dest, rl_src1);
-      return;
-    default:
-      LOG(FATAL) << "Unpexpected opcode: " << opcode;
-  }
-  rl_src1 = LoadValueWide(rl_src1, kFPReg);
-  DCHECK(rl_src1.wide);
-  rl_src2 = LoadValueWide(rl_src2, kFPReg);
-  DCHECK(rl_src2.wide);
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
-  DCHECK(rl_dest.wide);
-  DCHECK(rl_result.wide);
-  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void MipsMir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest ATTRIBUTE_UNUSED,
-                                             RegLocation rl_src1 ATTRIBUTE_UNUSED,
-                                             int32_t constant ATTRIBUTE_UNUSED) {
-  // TODO: need mips implementation.
-  LOG(FATAL) << "Unimplemented GenMultiplyByConstantFloat in mips";
-}
-
-void MipsMir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest ATTRIBUTE_UNUSED,
-                                              RegLocation rl_src1 ATTRIBUTE_UNUSED,
-                                              int64_t constant ATTRIBUTE_UNUSED) {
-  // TODO: need mips implementation.
-  LOG(FATAL) << "Unimplemented GenMultiplyByConstantDouble in mips";
-}
-
-void MipsMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
-                                RegLocation rl_src) {
-  int op = kMipsNop;
-  RegLocation rl_result;
-  switch (opcode) {
-    case Instruction::INT_TO_FLOAT:
-      op = kMipsFcvtsw;
-      break;
-    case Instruction::DOUBLE_TO_FLOAT:
-      op = kMipsFcvtsd;
-      break;
-    case Instruction::FLOAT_TO_DOUBLE:
-      op = kMipsFcvtds;
-      break;
-    case Instruction::INT_TO_DOUBLE:
-      op = kMipsFcvtdw;
-      break;
-    case Instruction::FLOAT_TO_INT:
-      GenConversionCall(kQuickF2iz, rl_dest, rl_src, kCoreReg);
-      return;
-    case Instruction::DOUBLE_TO_INT:
-      GenConversionCall(kQuickD2iz, rl_dest, rl_src, kCoreReg);
-      return;
-    case Instruction::LONG_TO_DOUBLE:
-      GenConversionCall(kQuickL2d, rl_dest, rl_src, kFPReg);
-      return;
-    case Instruction::FLOAT_TO_LONG:
-      GenConversionCall(kQuickF2l, rl_dest, rl_src, kCoreReg);
-      return;
-    case Instruction::LONG_TO_FLOAT:
-      GenConversionCall(kQuickL2f, rl_dest, rl_src, kFPReg);
-      return;
-    case Instruction::DOUBLE_TO_LONG:
-      GenConversionCall(kQuickD2l, rl_dest, rl_src, kCoreReg);
-      return;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << opcode;
-  }
-  if (rl_src.wide) {
-    rl_src = LoadValueWide(rl_src, kFPReg);
-  } else {
-    rl_src = LoadValue(rl_src, kFPReg);
-  }
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  if (rl_dest.wide) {
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    StoreValue(rl_dest, rl_result);
-  }
-}
-
-// Get the reg storage for a wide FP. Is either a solo or a pair. Base is Mips-counted, e.g., even
-// values are valid (0, 2).
-static RegStorage GetWideArgFP(bool fpuIs32Bit, size_t base) {
-  // Think about how to make this be able to be computed. E.g., rMIPS_FARG0 + base. Right now
-  // inlining should optimize everything.
-  if (fpuIs32Bit) {
-    switch (base) {
-      case 0:
-        return RegStorage(RegStorage::k64BitPair, rFARG0, rFARG1);
-      case 2:
-        return RegStorage(RegStorage::k64BitPair, rFARG2, rFARG3);
-    }
-  } else {
-    switch (base) {
-      case 0:
-        return RegStorage(RegStorage::k64BitSolo, rFARG0);
-      case 2:
-        return RegStorage(RegStorage::k64BitSolo, rFARG2);
-    }
-  }
-  LOG(FATAL) << "Unsupported Mips.GetWideFP: " << fpuIs32Bit << " " << base;
-  UNREACHABLE();
-}
-
-void MipsMir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                           RegLocation rl_src2) {
-  bool wide = true;
-  QuickEntrypointEnum target;
-
-  switch (opcode) {
-    case Instruction::CMPL_FLOAT:
-      target = kQuickCmplFloat;
-      wide = false;
-      break;
-    case Instruction::CMPG_FLOAT:
-      target = kQuickCmpgFloat;
-      wide = false;
-      break;
-    case Instruction::CMPL_DOUBLE:
-      target = kQuickCmplDouble;
-      break;
-    case Instruction::CMPG_DOUBLE:
-      target = kQuickCmpgDouble;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << opcode;
-      target = kQuickCmplFloat;
-  }
-  FlushAllRegs();
-  LockCallTemps();
-  if (wide) {
-    RegStorage r_tmp1;
-    RegStorage r_tmp2;
-    if (cu_->target64) {
-      r_tmp1 = RegStorage(RegStorage::k64BitSolo, rFARG0);
-      r_tmp2 = RegStorage(RegStorage::k64BitSolo, rFARG1);
-    } else {
-      r_tmp1 = GetWideArgFP(fpuIs32Bit_, 0);
-      r_tmp2 = GetWideArgFP(fpuIs32Bit_, 2);
-    }
-    LoadValueDirectWideFixed(rl_src1, r_tmp1);
-    LoadValueDirectWideFixed(rl_src2, r_tmp2);
-  } else {
-    LoadValueDirectFixed(rl_src1, rs_rFARG0);
-    LoadValueDirectFixed(rl_src2, cu_->target64 ? rs_rFARG1 : rs_rFARG2);
-  }
-  RegStorage r_tgt = LoadHelper(target);
-  // NOTE: not a safepoint.
-  OpReg(kOpBlx, r_tgt);
-  RegLocation rl_result = GetReturn(kCoreReg);
-  StoreValue(rl_dest, rl_result);
-}
-
-void MipsMir2Lir::GenFusedFPCmpBranch(BasicBlock* bb ATTRIBUTE_UNUSED,
-                                      MIR* mir ATTRIBUTE_UNUSED,
-                                      bool gt_bias ATTRIBUTE_UNUSED,
-                                      bool is_double ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL) << "Need codegen for fused fp cmp branch";
-}
-
-void MipsMir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation rl_result;
-  if (cu_->target64) {
-    rl_src = LoadValue(rl_src, kFPReg);
-    rl_result = EvalLoc(rl_dest, kFPReg, true);
-    NewLIR2(kMipsFnegs, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  } else {
-    rl_src = LoadValue(rl_src, kCoreReg);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    OpRegRegImm(kOpAdd, rl_result.reg, rl_src.reg, 0x80000000);
-  }
-  StoreValue(rl_dest, rl_result);
-}
-
-void MipsMir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation rl_result;
-  if (cu_->target64) {
-    rl_src = LoadValueWide(rl_src, kFPReg);
-    rl_result = EvalLocWide(rl_dest, kFPReg, true);
-    NewLIR2(kMipsFnegd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  } else {
-    rl_src = LoadValueWide(rl_src, kCoreReg);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x80000000);
-    OpRegCopy(rl_result.reg, rl_src.reg);
-  }
-  StoreValueWide(rl_dest, rl_result);
-}
-
-bool MipsMir2Lir::GenInlinedMinMax(CallInfo* info ATTRIBUTE_UNUSED,
-                                   bool is_min ATTRIBUTE_UNUSED,
-                                   bool is_long ATTRIBUTE_UNUSED) {
-  // TODO: need Mips implementation.
-  return false;
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
deleted file mode 100644
index 8ca53ea..0000000
--- a/compiler/dex/quick/mips/int_mips.cc
+++ /dev/null
@@ -1,932 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This file contains codegen for the Mips ISA */
-
-#include "codegen_mips.h"
-
-#include "base/logging.h"
-#include "dex/mir_graph.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "dex/reg_storage_eq.h"
-#include "entrypoints/quick/quick_entrypoints.h"
-#include "mips_lir.h"
-#include "mirror/array-inl.h"
-
-namespace art {
-
-/*
- * Compare two 64-bit values
- *    x = y     return  0
- *    x < y     return -1
- *    x > y     return  1
- *
- * Mips32 implementation
- *    slt   t0,  x.hi, y.hi;        # (x.hi < y.hi) ? 1:0
- *    sgt   t1,  x.hi, y.hi;        # (y.hi > x.hi) ? 1:0
- *    subu  res, t0, t1             # res = -1:1:0 for [ < > = ]
- *    bnez  res, finish
- *    sltu  t0, x.lo, y.lo
- *    sgtu  r1, x.lo, y.lo
- *    subu  res, t0, t1
- * finish:
- *
- * Mips64 implementation
- *    slt   temp, x, y;             # (x < y) ? 1:0
- *    slt   res, y, x;              # (x > y) ? 1:0
- *    subu  res, res, temp;         # res = -1:1:0 for [ < > = ]
- *
- */
-void MipsMir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  if (cu_->target64) {
-    RegStorage temp = AllocTempWide();
-    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    NewLIR3(kMipsSlt, temp.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-    NewLIR3(kMipsSlt, rl_result.reg.GetReg(), rl_src2.reg.GetReg(), rl_src1.reg.GetReg());
-    NewLIR3(kMipsSubu, rl_result.reg.GetReg(), rl_result.reg.GetReg(), temp.GetReg());
-    FreeTemp(temp);
-    StoreValue(rl_dest, rl_result);
-  } else {
-    RegStorage t0 = AllocTemp();
-    RegStorage t1 = AllocTemp();
-    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    NewLIR3(kMipsSlt, t0.GetReg(), rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
-    NewLIR3(kMipsSlt, t1.GetReg(), rl_src2.reg.GetHighReg(), rl_src1.reg.GetHighReg());
-    NewLIR3(kMipsSubu, rl_result.reg.GetReg(), t1.GetReg(), t0.GetReg());
-    LIR* branch = OpCmpImmBranch(kCondNe, rl_result.reg, 0, nullptr);
-    NewLIR3(kMipsSltu, t0.GetReg(), rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg());
-    NewLIR3(kMipsSltu, t1.GetReg(), rl_src2.reg.GetLowReg(), rl_src1.reg.GetLowReg());
-    NewLIR3(kMipsSubu, rl_result.reg.GetReg(), t1.GetReg(), t0.GetReg());
-    FreeTemp(t0);
-    FreeTemp(t1);
-    LIR* target = NewLIR0(kPseudoTargetLabel);
-    branch->target = target;
-    StoreValue(rl_dest, rl_result);
-  }
-}
-
-LIR* MipsMir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
-  LIR* branch;
-  MipsOpCode slt_op;
-  MipsOpCode br_op;
-  bool cmp_zero = false;
-  bool swapped = false;
-  switch (cond) {
-    case kCondEq:
-      br_op = kMipsBeq;
-      cmp_zero = true;
-      break;
-    case kCondNe:
-      br_op = kMipsBne;
-      cmp_zero = true;
-      break;
-    case kCondUlt:
-      slt_op = kMipsSltu;
-      br_op = kMipsBnez;
-      break;
-    case kCondUge:
-      slt_op = kMipsSltu;
-      br_op = kMipsBeqz;
-      break;
-    case kCondGe:
-      slt_op = kMipsSlt;
-      br_op = kMipsBeqz;
-      break;
-    case kCondGt:
-      slt_op = kMipsSlt;
-      br_op = kMipsBnez;
-      swapped = true;
-      break;
-    case kCondLe:
-      slt_op = kMipsSlt;
-      br_op = kMipsBeqz;
-      swapped = true;
-      break;
-    case kCondLt:
-      slt_op = kMipsSlt;
-      br_op = kMipsBnez;
-      break;
-    case kCondHi:  // Gtu
-      slt_op = kMipsSltu;
-      br_op = kMipsBnez;
-      swapped = true;
-      break;
-    default:
-      LOG(FATAL) << "No support for ConditionCode: " << cond;
-      return nullptr;
-  }
-  if (cmp_zero) {
-    branch = NewLIR2(br_op, src1.GetReg(), src2.GetReg());
-  } else {
-    RegStorage t_reg = AllocTemp();
-    if (swapped) {
-      NewLIR3(slt_op, t_reg.GetReg(), src2.GetReg(), src1.GetReg());
-    } else {
-      NewLIR3(slt_op, t_reg.GetReg(), src1.GetReg(), src2.GetReg());
-    }
-    branch = NewLIR1(br_op, t_reg.GetReg());
-    FreeTemp(t_reg);
-  }
-  branch->target = target;
-  return branch;
-}
-
-LIR* MipsMir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) {
-  LIR* branch;
-  if (check_value != 0) {
-    // TUNING: handle s16 & kCondLt/Mi case using slti.
-    RegStorage t_reg = AllocTemp();
-    LoadConstant(t_reg, check_value);
-    branch = OpCmpBranch(cond, reg, t_reg, target);
-    FreeTemp(t_reg);
-    return branch;
-  }
-  MipsOpCode opc;
-  switch (cond) {
-    case kCondEq: opc = kMipsBeqz; break;
-    case kCondGe: opc = kMipsBgez; break;
-    case kCondGt: opc = kMipsBgtz; break;
-    case kCondLe: opc = kMipsBlez; break;
-    // case KCondMi:
-    case kCondLt: opc = kMipsBltz; break;
-    case kCondNe: opc = kMipsBnez; break;
-    default:
-      // Tuning: use slti when applicable
-      RegStorage t_reg = AllocTemp();
-      LoadConstant(t_reg, check_value);
-      branch = OpCmpBranch(cond, reg, t_reg, target);
-      FreeTemp(t_reg);
-      return branch;
-  }
-  branch = NewLIR1(opc, reg.GetReg());
-  branch->target = target;
-  return branch;
-}
-
-LIR* MipsMir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
-  LIR* res;
-  MipsOpCode opcode;
-
-  if (!cu_->target64) {
-    // If src or dest is a pair, we'll be using low reg.
-    if (r_dest.IsPair()) {
-      r_dest = r_dest.GetLow();
-    }
-    if (r_src.IsPair()) {
-      r_src = r_src.GetLow();
-    }
-  } else {
-    DCHECK(!r_dest.IsPair() && !r_src.IsPair());
-  }
-
-  if (r_dest.IsFloat() || r_src.IsFloat())
-    return OpFpRegCopy(r_dest, r_src);
-  if (cu_->target64) {
-    // TODO: Check that r_src and r_dest are both 32 or both 64 bits length on Mips64.
-    if (r_dest.Is64Bit() || r_src.Is64Bit()) {
-      opcode = kMipsMove;
-    } else {
-      opcode = kMipsSll;
-    }
-  } else {
-    opcode = kMipsMove;
-  }
-  res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
-  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
-    res->flags.is_nop = true;
-  }
-  return res;
-}
-
-void MipsMir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
-  if (r_dest != r_src) {
-    LIR *res = OpRegCopyNoInsert(r_dest, r_src);
-    AppendLIR(res);
-  }
-}
-
-void MipsMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
-  if (cu_->target64) {
-    OpRegCopy(r_dest, r_src);
-    return;
-  }
-  if (r_dest != r_src) {
-    bool dest_fp = r_dest.IsFloat();
-    bool src_fp = r_src.IsFloat();
-    if (dest_fp) {
-      if (src_fp) {
-        // Here if both src and dest are fp registers. OpRegCopy will choose the right copy
-        // (solo or pair).
-        OpRegCopy(r_dest, r_src);
-      } else {
-        // note the operands are swapped for the mtc1 and mthc1 instr.
-        // Here if dest is fp reg and src is core reg.
-        if (fpuIs32Bit_) {
-          NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
-          NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
-        } else {
-          r_dest = Fp64ToSolo32(r_dest);
-          NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetReg());
-          NewLIR2(kMipsMthc1, r_src.GetHighReg(), r_dest.GetReg());
-        }
-      }
-    } else {
-      if (src_fp) {
-        // Here if dest is core reg and src is fp reg.
-        if (fpuIs32Bit_) {
-          NewLIR2(kMipsMfc1, r_dest.GetLowReg(), r_src.GetLowReg());
-          NewLIR2(kMipsMfc1, r_dest.GetHighReg(), r_src.GetHighReg());
-        } else {
-          r_src = Fp64ToSolo32(r_src);
-          NewLIR2(kMipsMfc1, r_dest.GetLowReg(), r_src.GetReg());
-          NewLIR2(kMipsMfhc1, r_dest.GetHighReg(), r_src.GetReg());
-        }
-      } else {
-        // Here if both src and dest are core registers.
-        // Handle overlap
-        if (r_src.GetHighReg() != r_dest.GetLowReg()) {
-          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
-        } else if (r_src.GetLowReg() != r_dest.GetHighReg()) {
-          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
-          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-        } else {
-          RegStorage r_tmp = AllocTemp();
-          OpRegCopy(r_tmp, r_src.GetHigh());
-          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-          OpRegCopy(r_dest.GetHigh(), r_tmp);
-          FreeTemp(r_tmp);
-        }
-      }
-    }
-  }
-}
-
-void MipsMir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
-                                   int32_t true_val, int32_t false_val, RegStorage rs_dest,
-                                   RegisterClass dest_reg_class ATTRIBUTE_UNUSED) {
-  // Implement as a branch-over.
-  // TODO: Conditional move?
-  LoadConstant(rs_dest, true_val);
-  LIR* ne_branchover = OpCmpBranch(code, left_op, right_op, nullptr);
-  LoadConstant(rs_dest, false_val);
-  LIR* target_label = NewLIR0(kPseudoTargetLabel);
-  ne_branchover->target = target_label;
-}
-
-void MipsMir2Lir::GenSelect(BasicBlock* bb ATTRIBUTE_UNUSED, MIR* mir ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL) << "Need codegen for select";
-}
-
-void MipsMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb ATTRIBUTE_UNUSED,
-                                        MIR* mir ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL) << "Need codegen for fused long cmp branch";
-}
-
-RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2,
-                                   bool is_div) {
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-
-  if (isaIsR6_) {
-    NewLIR3(is_div ? kMipsR6Div : kMipsR6Mod, rl_result.reg.GetReg(), reg1.GetReg(), reg2.GetReg());
-  } else {
-    NewLIR2(kMipsR2Div, reg1.GetReg(), reg2.GetReg());
-    NewLIR1(is_div ? kMipsR2Mflo : kMipsR2Mfhi, rl_result.reg.GetReg());
-  }
-  return rl_result;
-}
-
-RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
-  RegStorage t_reg = AllocTemp();
-  // lit is guarantee to be a 16-bit constant
-  if (IsUint<16>(lit)) {
-    NewLIR3(kMipsOri, t_reg.GetReg(), rZERO, lit);
-  } else {
-    // Addiu will sign extend the entire width (32 or 64) of the register.
-    NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit);
-  }
-  RegLocation rl_result = GenDivRem(rl_dest, reg1, t_reg, is_div);
-  FreeTemp(t_reg);
-  return rl_result;
-}
-
-RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest ATTRIBUTE_UNUSED,
-                                   RegLocation rl_src1 ATTRIBUTE_UNUSED,
-                                   RegLocation rl_src2 ATTRIBUTE_UNUSED,
-                                   bool is_div ATTRIBUTE_UNUSED,
-                                   int flags ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of GenDivRem for Mips";
-  UNREACHABLE();
-}
-
-RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest ATTRIBUTE_UNUSED,
-                                      RegLocation rl_src1 ATTRIBUTE_UNUSED,
-                                      int lit ATTRIBUTE_UNUSED,
-                                      bool is_div ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of GenDivRemLit for Mips";
-  UNREACHABLE();
-}
-
-bool MipsMir2Lir::GenInlinedCas(CallInfo* info ATTRIBUTE_UNUSED,
-                                bool is_long ATTRIBUTE_UNUSED,
-                                bool is_object ATTRIBUTE_UNUSED) {
-  return false;
-}
-
-bool MipsMir2Lir::GenInlinedAbsFloat(CallInfo* info ATTRIBUTE_UNUSED) {
-  // TODO: add Mips implementation.
-  return false;
-}
-
-bool MipsMir2Lir::GenInlinedAbsDouble(CallInfo* info ATTRIBUTE_UNUSED) {
-  // TODO: add Mips implementation.
-  return false;
-}
-
-bool MipsMir2Lir::GenInlinedSqrt(CallInfo* info ATTRIBUTE_UNUSED) {
-  return false;
-}
-
-bool MipsMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
-  if (size != kSignedByte) {
-    // MIPS supports only aligned access. Defer unaligned access to JNI implementation.
-    return false;
-  }
-  RegLocation rl_src_address = info->args[0];       // Long address.
-  if (!cu_->target64) {
-    rl_src_address = NarrowRegLoc(rl_src_address);  // Ignore high half in info->args[1].
-  }
-  RegLocation rl_dest = InlineTarget(info);
-  RegLocation rl_address;
-  if (cu_->target64) {
-    rl_address = LoadValueWide(rl_src_address, kCoreReg);
-  } else {
-    rl_address = LoadValue(rl_src_address, kCoreReg);
-  }
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  DCHECK(size == kSignedByte);
-  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-bool MipsMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
-  if (size != kSignedByte) {
-    // MIPS supports only aligned access. Defer unaligned access to JNI implementation.
-    return false;
-  }
-  RegLocation rl_src_address = info->args[0];       // Long address.
-  if (!cu_->target64) {
-    rl_src_address = NarrowRegLoc(rl_src_address);  // Ignore high half in info->args[1].
-  }
-  RegLocation rl_src_value = info->args[2];         // [size] value.
-  RegLocation rl_address;
-  if (cu_->target64) {
-    rl_address = LoadValueWide(rl_src_address, kCoreReg);
-  } else {
-    rl_address = LoadValue(rl_src_address, kCoreReg);
-  }
-  DCHECK(size == kSignedByte);
-  RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
-  StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
-  return true;
-}
-
-void MipsMir2Lir::OpPcRelLoad(RegStorage reg ATTRIBUTE_UNUSED, LIR* target ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpPcRelLoad for Mips";
-  UNREACHABLE();
-}
-
-LIR* MipsMir2Lir::OpVldm(RegStorage r_base ATTRIBUTE_UNUSED, int count ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpVldm for Mips";
-  UNREACHABLE();
-}
-
-LIR* MipsMir2Lir::OpVstm(RegStorage r_base ATTRIBUTE_UNUSED, int count ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpVstm for Mips";
-  UNREACHABLE();
-}
-
-void MipsMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
-                                                RegLocation rl_result,
-                                                int lit ATTRIBUTE_UNUSED,
-                                                int first_bit,
-                                                int second_bit) {
-  RegStorage t_reg = AllocTemp();
-  OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit);
-  OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg);
-  FreeTemp(t_reg);
-  if (first_bit != 0) {
-    OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
-  }
-}
-
-void MipsMir2Lir::GenDivZeroCheckWide(RegStorage reg) {
-  if (cu_->target64) {
-    GenDivZeroCheck(reg);
-  } else {
-    DCHECK(reg.IsPair());   // TODO: support k64BitSolo.
-    RegStorage t_reg = AllocTemp();
-    OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
-    GenDivZeroCheck(t_reg);
-    FreeTemp(t_reg);
-  }
-}
-
-// Test suspend flag, return target of taken suspend branch.
-LIR* MipsMir2Lir::OpTestSuspend(LIR* target) {
-  OpRegImm(kOpSub, TargetPtrReg(kSuspend), 1);
-  return OpCmpImmBranch((target == nullptr) ? kCondEq : kCondNe, TargetPtrReg(kSuspend), 0, target);
-}
-
-// Decrement register and branch on condition.
-LIR* MipsMir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
-  OpRegImm(kOpSub, reg, 1);
-  return OpCmpImmBranch(c_code, reg, 0, target);
-}
-
-bool MipsMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode ATTRIBUTE_UNUSED,
-                                     bool is_div ATTRIBUTE_UNUSED,
-                                     RegLocation rl_src ATTRIBUTE_UNUSED,
-                                     RegLocation rl_dest ATTRIBUTE_UNUSED,
-                                     int lit ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of smallLiteralDivRem in Mips";
-  UNREACHABLE();
-}
-
-bool MipsMir2Lir::EasyMultiply(RegLocation rl_src ATTRIBUTE_UNUSED,
-                               RegLocation rl_dest ATTRIBUTE_UNUSED,
-                               int lit ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of easyMultiply in Mips";
-  UNREACHABLE();
-}
-
-LIR* MipsMir2Lir::OpIT(ConditionCode cond ATTRIBUTE_UNUSED, const char* guide ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpIT in Mips";
-  UNREACHABLE();
-}
-
-void MipsMir2Lir::OpEndIT(LIR* it ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpEndIT in Mips";
-}
-
-void MipsMir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  /*
-   *  [v1 v0] =  [a1 a0] + [a3 a2];
-   *  addu v0,a2,a0
-   *  addu t1,a3,a1
-   *  sltu v1,v0,a2
-   *  addu v1,v1,t1
-   */
-
-  OpRegRegReg(kOpAdd, rl_result.reg.GetLow(), rl_src2.reg.GetLow(), rl_src1.reg.GetLow());
-  RegStorage t_reg = AllocTemp();
-  OpRegRegReg(kOpAdd, t_reg, rl_src2.reg.GetHigh(), rl_src1.reg.GetHigh());
-  NewLIR3(kMipsSltu, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
-          rl_src2.reg.GetLowReg());
-  OpRegRegReg(kOpAdd, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), t_reg);
-  FreeTemp(t_reg);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void MipsMir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  /*
-   *  [v1 v0] =  [a1 a0] - [a3 a2];
-   *  sltu  t1,a0,a2
-   *  subu  v0,a0,a2
-   *  subu  v1,a1,a3
-   *  subu  v1,v1,t1
-   */
-
-  RegStorage t_reg = AllocTemp();
-  NewLIR3(kMipsSltu, t_reg.GetReg(), rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg());
-  OpRegRegReg(kOpSub, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
-  OpRegRegReg(kOpSub, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
-  OpRegRegReg(kOpSub, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), t_reg);
-  FreeTemp(t_reg);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void MipsMir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                                 RegLocation rl_src2, int flags) {
-  if (cu_->target64) {
-    switch (opcode) {
-      case Instruction::NOT_LONG:
-        GenNotLong(rl_dest, rl_src2);
-        return;
-      case Instruction::ADD_LONG:
-      case Instruction::ADD_LONG_2ADDR:
-        GenLongOp(kOpAdd, rl_dest, rl_src1, rl_src2);
-        return;
-      case Instruction::SUB_LONG:
-      case Instruction::SUB_LONG_2ADDR:
-        GenLongOp(kOpSub, rl_dest, rl_src1, rl_src2);
-        return;
-      case Instruction::MUL_LONG:
-      case Instruction::MUL_LONG_2ADDR:
-        GenMulLong(rl_dest, rl_src1, rl_src2);
-        return;
-      case Instruction::DIV_LONG:
-      case Instruction::DIV_LONG_2ADDR:
-        GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true, flags);
-        return;
-      case Instruction::REM_LONG:
-      case Instruction::REM_LONG_2ADDR:
-        GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false, flags);
-        return;
-      case Instruction::AND_LONG:
-      case Instruction::AND_LONG_2ADDR:
-        GenLongOp(kOpAnd, rl_dest, rl_src1, rl_src2);
-        return;
-      case Instruction::OR_LONG:
-      case Instruction::OR_LONG_2ADDR:
-        GenLongOp(kOpOr, rl_dest, rl_src1, rl_src2);
-        return;
-      case Instruction::XOR_LONG:
-      case Instruction::XOR_LONG_2ADDR:
-        GenLongOp(kOpXor, rl_dest, rl_src1, rl_src2);
-        return;
-      case Instruction::NEG_LONG:
-        GenNegLong(rl_dest, rl_src2);
-        return;
-
-      default:
-        LOG(FATAL) << "Invalid long arith op";
-        return;
-    }
-  } else {
-    switch (opcode) {
-      case Instruction::ADD_LONG:
-      case Instruction::ADD_LONG_2ADDR:
-        GenAddLong(rl_dest, rl_src1, rl_src2);
-        return;
-      case Instruction::SUB_LONG:
-      case Instruction::SUB_LONG_2ADDR:
-        GenSubLong(rl_dest, rl_src1, rl_src2);
-        return;
-      case Instruction::NEG_LONG:
-        GenNegLong(rl_dest, rl_src2);
-        return;
-      default:
-        break;
-    }
-    // Fallback for all other ops.
-    Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
-  }
-}
-
-void MipsMir2Lir::GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  OpRegRegReg(op, rl_result.reg, rl_src1.reg, rl_src2.reg);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void MipsMir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  OpRegReg(kOpMvn, rl_result.reg, rl_src.reg);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void MipsMir2Lir::GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  NewLIR3(kMips64Dmul, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void MipsMir2Lir::GenDivRemLong(Instruction::Code opcode ATTRIBUTE_UNUSED,
-                                RegLocation rl_dest,
-                                RegLocation rl_src1,
-                                RegLocation rl_src2,
-                                bool is_div,
-                                int flags) {
-  // TODO: Implement easy div/rem?
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
-    GenDivZeroCheckWide(rl_src2.reg);
-  }
-  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  NewLIR3(is_div ? kMips64Ddiv : kMips64Dmod, rl_result.reg.GetReg(), rl_src1.reg.GetReg(),
-          rl_src2.reg.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void MipsMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_result;
-
-  if (cu_->target64) {
-    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-    OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    //  [v1 v0] =  -[a1 a0]
-    //  negu  v0,a0
-    //  negu  v1,a1
-    //  sltu  t1,r_zero
-    //  subu  v1,v1,t1
-    OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_src.reg.GetLow());
-    OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-    RegStorage t_reg = AllocTemp();
-    NewLIR3(kMipsSltu, t_reg.GetReg(), rZERO, rl_result.reg.GetLowReg());
-    OpRegRegReg(kOpSub, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), t_reg);
-    FreeTemp(t_reg);
-    StoreValueWide(rl_dest, rl_result);
-  }
-}
-
-/*
- * Generate array load
- */
-void MipsMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
-                              RegLocation rl_index, RegLocation rl_dest, int scale) {
-  RegisterClass reg_class = RegClassBySize(size);
-  int len_offset = mirror::Array::LengthOffset().Int32Value();
-  int data_offset;
-  RegLocation rl_result;
-  rl_array = LoadValue(rl_array, kRefReg);
-  rl_index = LoadValue(rl_index, kCoreReg);
-
-  // FIXME: need to add support for rl_index.is_const.
-
-  if (size == k64 || size == kDouble) {
-    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
-  } else {
-    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
-  }
-
-  // Null object?
-  GenNullCheck(rl_array.reg, opt_flags);
-
-  RegStorage reg_ptr = (cu_->target64) ? AllocTempRef() : AllocTemp();
-  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
-  RegStorage reg_len;
-  if (needs_range_check) {
-    reg_len = AllocTemp();
-    // Get len.
-    Load32Disp(rl_array.reg, len_offset, reg_len);
-    MarkPossibleNullPointerException(opt_flags);
-  } else {
-    ForceImplicitNullCheck(rl_array.reg, opt_flags, false);
-  }
-  // reg_ptr -> array data.
-  OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
-  FreeTemp(rl_array.reg);
-  if ((size == k64) || (size == kDouble)) {
-    if (scale) {
-      RegStorage r_new_index = AllocTemp();
-      OpRegRegImm(kOpLsl, r_new_index, rl_index.reg, scale);
-      OpRegReg(kOpAdd, reg_ptr, r_new_index);
-      FreeTemp(r_new_index);
-    } else {
-      OpRegReg(kOpAdd, reg_ptr, rl_index.reg);
-    }
-    FreeTemp(rl_index.reg);
-    rl_result = EvalLoc(rl_dest, reg_class, true);
-
-    if (needs_range_check) {
-      GenArrayBoundsCheck(rl_index.reg, reg_len);
-      FreeTemp(reg_len);
-    }
-    LoadBaseDisp(reg_ptr, 0, rl_result.reg, size, kNotVolatile);
-
-    FreeTemp(reg_ptr);
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    rl_result = EvalLoc(rl_dest, reg_class, true);
-
-    if (needs_range_check) {
-      GenArrayBoundsCheck(rl_index.reg, reg_len);
-      FreeTemp(reg_len);
-    }
-
-    if (cu_->target64) {
-      if (rl_result.ref) {
-        LoadBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), As32BitReg(rl_result.reg), scale,
-                        kReference);
-      } else {
-        LoadBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale, size);
-      }
-    } else {
-      LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size);
-    }
-
-    FreeTemp(reg_ptr);
-    StoreValue(rl_dest, rl_result);
-  }
-}
-
-/*
- * Generate array store
- *
- */
-void MipsMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                              RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
-  RegisterClass reg_class = RegClassBySize(size);
-  int len_offset = mirror::Array::LengthOffset().Int32Value();
-  int data_offset;
-
-  if (size == k64 || size == kDouble) {
-    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
-  } else {
-    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
-  }
-
-  rl_array = LoadValue(rl_array, kRefReg);
-  rl_index = LoadValue(rl_index, kCoreReg);
-
-  // FIXME: need to add support for rl_index.is_const.
-
-  RegStorage reg_ptr;
-  bool allocated_reg_ptr_temp = false;
-  if (IsTemp(rl_array.reg) && !card_mark) {
-    Clobber(rl_array.reg);
-    reg_ptr = rl_array.reg;
-  } else {
-    reg_ptr = AllocTemp();
-    OpRegCopy(reg_ptr, rl_array.reg);
-    allocated_reg_ptr_temp = true;
-  }
-
-  // Null object?
-  GenNullCheck(rl_array.reg, opt_flags);
-
-  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
-  RegStorage reg_len;
-  if (needs_range_check) {
-    reg_len = AllocTemp();
-    // NOTE: max live temps(4) here.
-    // Get len.
-    Load32Disp(rl_array.reg, len_offset, reg_len);
-    MarkPossibleNullPointerException(opt_flags);
-  } else {
-    ForceImplicitNullCheck(rl_array.reg, opt_flags, false);
-  }
-  // reg_ptr -> array data.
-  OpRegImm(kOpAdd, reg_ptr, data_offset);
-  // At this point, reg_ptr points to array, 2 live temps.
-  if ((size == k64) || (size == kDouble)) {
-    // TUNING: specific wide routine that can handle fp regs.
-    if (scale) {
-      RegStorage r_new_index = AllocTemp();
-      OpRegRegImm(kOpLsl, r_new_index, rl_index.reg, scale);
-      OpRegReg(kOpAdd, reg_ptr, r_new_index);
-      FreeTemp(r_new_index);
-    } else {
-      OpRegReg(kOpAdd, reg_ptr, rl_index.reg);
-    }
-    rl_src = LoadValueWide(rl_src, reg_class);
-
-    if (needs_range_check) {
-      GenArrayBoundsCheck(rl_index.reg, reg_len);
-      FreeTemp(reg_len);
-    }
-
-    StoreBaseDisp(reg_ptr, 0, rl_src.reg, size, kNotVolatile);
-  } else {
-    rl_src = LoadValue(rl_src, reg_class);
-    if (needs_range_check) {
-      GenArrayBoundsCheck(rl_index.reg, reg_len);
-      FreeTemp(reg_len);
-    }
-    StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
-  }
-  if (allocated_reg_ptr_temp) {
-    FreeTemp(reg_ptr);
-  }
-  if (card_mark) {
-    MarkGCCard(opt_flags, rl_src.reg, rl_array.reg);
-  }
-}
-
-void MipsMir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                                 RegLocation rl_shift) {
-  if (!cu_->target64) {
-    Mir2Lir::GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
-    return;
-  }
-  OpKind op = kOpBkpt;
-  switch (opcode) {
-    case Instruction::SHL_LONG:
-    case Instruction::SHL_LONG_2ADDR:
-      op = kOpLsl;
-      break;
-    case Instruction::SHR_LONG:
-    case Instruction::SHR_LONG_2ADDR:
-      op = kOpAsr;
-      break;
-    case Instruction::USHR_LONG:
-    case Instruction::USHR_LONG_2ADDR:
-      op = kOpLsr;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected case: " << opcode;
-  }
-  rl_shift = LoadValue(rl_shift, kCoreReg);
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  OpRegRegReg(op, rl_result.reg, rl_src1.reg, As64BitReg(rl_shift.reg));
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void MipsMir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
-                                    RegLocation rl_dest,
-                                    RegLocation rl_src1,
-                                    RegLocation rl_shift,
-                                    int flags ATTRIBUTE_UNUSED) {
-  if (!cu_->target64) {
-    // Default implementation is just to ignore the constant case.
-    GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
-    return;
-  }
-  OpKind op = kOpBkpt;
-  // Per spec, we only care about low 6 bits of shift amount.
-  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  if (shift_amount == 0) {
-    StoreValueWide(rl_dest, rl_src1);
-    return;
-  }
-
-  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  switch (opcode) {
-    case Instruction::SHL_LONG:
-    case Instruction::SHL_LONG_2ADDR:
-      op = kOpLsl;
-      break;
-    case Instruction::SHR_LONG:
-    case Instruction::SHR_LONG_2ADDR:
-      op = kOpAsr;
-      break;
-    case Instruction::USHR_LONG:
-    case Instruction::USHR_LONG_2ADDR:
-      op = kOpLsr;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected case";
-  }
-  OpRegRegImm(op, rl_result.reg, rl_src1.reg, shift_amount);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void MipsMir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                    RegLocation rl_src1, RegLocation rl_src2, int flags) {
-  // Default - bail to non-const handler.
-  GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
-}
-
-void MipsMir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
-  if (!cu_->target64) {
-    Mir2Lir::GenIntToLong(rl_dest, rl_src);
-    return;
-  }
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  NewLIR3(kMipsSll, rl_result.reg.GetReg(), As64BitReg(rl_src.reg).GetReg(), 0);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void MipsMir2Lir::GenConversionCall(QuickEntrypointEnum trampoline, RegLocation rl_dest,
-                                    RegLocation rl_src, RegisterClass reg_class) {
-  FlushAllRegs();   // Send everything to home location.
-  CallRuntimeHelperRegLocation(trampoline, rl_src, false);
-  if (rl_dest.wide) {
-    RegLocation rl_result;
-    rl_result = GetReturnWide(reg_class);
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    RegLocation rl_result;
-    rl_result = GetReturn(reg_class);
-    StoreValue(rl_dest, rl_result);
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/mips/mips_lir.h b/compiler/dex/quick/mips/mips_lir.h
deleted file mode 100644
index 078ac0a..0000000
--- a/compiler/dex/quick/mips/mips_lir.h
+++ /dev/null
@@ -1,720 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_MIPS_MIPS_LIR_H_
-#define ART_COMPILER_DEX_QUICK_MIPS_MIPS_LIR_H_
-
-#include "dex/reg_location.h"
-#include "dex/reg_storage.h"
-
-namespace art {
-
-/*
- * Runtime register conventions.
- *
- *          mips32            | mips64
- * $0:      zero is always the value 0
- * $1:      at is scratch (normally used as temp reg by assembler)
- * $2,$3:   v0, v1 are scratch (normally hold subroutine return values)
- * $4-$7:   a0-a3 are scratch (normally hold subroutine arguments)
- * $8-$11:  t0-t3 are scratch | a4-a7 are scratch (normally hold subroutine arguments)
- * $12-$15: t4-t7 are scratch | t0-t3 are scratch
- * $16:     s0 (rSUSPEND) is reserved [holds suspend-check counter]
- * $17:     s1 (rSELF) is reserved [holds current &Thread]
- * $18-$23: s2-s7 are callee save (promotion target)
- * $24:     t8 is scratch
- * $25:     t9 is scratch (normally used for function calls)
- * $26,$27: k0, k1 are reserved for use by interrupt handlers
- * $28:     gp is reserved for global pointer
- * $29:     sp is reserved
- * $30:     s8 is callee save (promotion target)
- * $31:     ra is scratch (normally holds the return addr)
- *
- * Preserved across C calls: s0-s8
- * Trashed across C calls (mips32): at, v0-v1, a0-a3, t0-t9, gp, ra
- * Trashed across C calls (mips64): at, v0-v1, a0-a7, t0-t3, t8, t9, gp, ra
- *
- * Floating pointer registers (mips32)
- * NOTE: there are 32 fp registers (16 df pairs), but currently
- *       only support 16 fp registers (8 df pairs).
- * f0-f15
- * df0-df7, where df0={f0,f1}, df1={f2,f3}, ... , df7={f14,f15}
- *
- * f0-f15 (df0-df7) trashed across C calls
- *
- * Floating pointer registers (mips64)
- * NOTE: there are 32 fp registers.
- * f0-f31
- *
- * For mips32 code use:
- *      a0-a3 to hold operands
- *      v0-v1 to hold results
- *      t0-t9 for temps
- *
- * For mips64 code use:
- *      a0-a7 to hold operands
- *      v0-v1 to hold results
- *      t0-t3, t8-t9 for temps
- *
- * All jump/branch instructions have a delay slot after it.
- *
- * Stack frame diagram (stack grows down, higher addresses at top):
- *
- * +------------------------+
- * | IN[ins-1]              |  {Note: resides in caller's frame}
- * |       .                |
- * | IN[0]                  |
- * | caller's Method*       |
- * +========================+  {Note: start of callee's frame}
- * | spill region           |  {variable sized - will include lr if non-leaf.}
- * +------------------------+
- * | ...filler word...      |  {Note: used as 2nd word of V[locals-1] if long]
- * +------------------------+
- * | V[locals-1]            |
- * | V[locals-2]            |
- * |      .                 |
- * |      .                 |
- * | V[1]                   |
- * | V[0]                   |
- * +------------------------+
- * |  0 to 3 words padding  |
- * +------------------------+
- * | OUT[outs-1]            |
- * | OUT[outs-2]            |
- * |       .                |
- * | OUT[0]                 |
- * | cur_method*            | <<== sp w/ 16-byte alignment
- * +========================+
- */
-
-
-#define LOWORD_OFFSET 0
-#define HIWORD_OFFSET 4
-
-#define rFARG0 rF12
-#define rs_rFARG0 rs_rF12
-#define rFARG1 rF13
-#define rs_rFARG1 rs_rF13
-#define rFARG2 rF14
-#define rs_rFARG2 rs_rF14
-#define rFARG3 rF15
-#define rs_rFARG3 rs_rF15
-
-enum MipsResourceEncodingPos {
-  kMipsGPReg0   = 0,
-  kMipsRegSP    = 29,
-  kMipsRegLR    = 31,
-  kMipsFPReg0   = 32,  // only 16 fp regs supported currently.
-  kMipsFPRegEnd   = 48,
-  kMipsRegHI    = kMipsFPRegEnd,
-  kMipsRegLO,
-  kMipsRegPC,
-  kMipsRegEnd   = 51,
-  // Mips64 related:
-  kMips64FPRegEnd = 64,
-  kMips64RegPC    = kMips64FPRegEnd,
-  kMips64RegEnd   = 65,
-};
-
-#define ENCODE_MIPS_REG_LIST(N)      (static_cast<uint64_t>(N))
-#define ENCODE_MIPS_REG_SP           (1ULL << kMipsRegSP)
-#define ENCODE_MIPS_REG_LR           (1ULL << kMipsRegLR)
-#define ENCODE_MIPS_REG_PC           (1ULL << kMipsRegPC)
-#define ENCODE_MIPS_REG_HI           (1ULL << kMipsRegHI)
-#define ENCODE_MIPS_REG_LO           (1ULL << kMipsRegLO)
-
-// Set FR_BIT to 0
-// This bit determines how the CPU access FP registers.
-#define FR_BIT   0
-
-enum MipsNativeRegisterPool {  // private marker to avoid generate-operator-out.py from processing.
-  rZERO  = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  0,
-  rZEROd = RegStorage::k64BitSolo | RegStorage::kCoreRegister |  0,
-  rAT    = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  1,
-  rATd   = RegStorage::k64BitSolo | RegStorage::kCoreRegister |  1,
-  rV0    = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  2,
-  rV0d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister |  2,
-  rV1    = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  3,
-  rV1d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister |  3,
-  rA0    = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  4,
-  rA0d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister |  4,
-  rA1    = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  5,
-  rA1d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister |  5,
-  rA2    = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  6,
-  rA2d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister |  6,
-  rA3    = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  7,
-  rA3d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister |  7,
-  rT0_32 = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  8,
-  rA4    = rT0_32,
-  rA4d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister |  8,
-  rT1_32 = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  9,
-  rA5    = rT1_32,
-  rA5d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister |  9,
-  rT2_32 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 10,
-  rA6    = rT2_32,
-  rA6d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 10,
-  rT3_32 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 11,
-  rA7    = rT3_32,
-  rA7d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 11,
-  rT4_32 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 12,
-  rT0    = rT4_32,
-  rT0d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 12,
-  rT5_32 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 13,
-  rT1    = rT5_32,
-  rT1d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 13,
-  rT6_32 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 14,
-  rT2    = rT6_32,
-  rT2d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 14,
-  rT7_32 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 15,
-  rT3    = rT7_32,
-  rT3d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 15,
-  rS0    = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 16,
-  rS0d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 16,
-  rS1    = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 17,
-  rS1d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 17,
-  rS2    = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 18,
-  rS2d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 18,
-  rS3    = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 19,
-  rS3d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 19,
-  rS4    = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 20,
-  rS4d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 20,
-  rS5    = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 21,
-  rS5d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 21,
-  rS6    = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 22,
-  rS6d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 22,
-  rS7    = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 23,
-  rS7d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 23,
-  rT8    = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 24,
-  rT8d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 24,
-  rT9    = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 25,
-  rT9d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 25,
-  rK0    = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 26,
-  rK0d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 26,
-  rK1    = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 27,
-  rK1d   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 27,
-  rGP    = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 28,
-  rGPd   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 28,
-  rSP    = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 29,
-  rSPd   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 29,
-  rFP    = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 30,
-  rFPd   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 30,
-  rRA    = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 31,
-  rRAd   = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 31,
-
-  rF0  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  0,
-  rF1  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  1,
-  rF2  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  2,
-  rF3  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  3,
-  rF4  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  4,
-  rF5  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  5,
-  rF6  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  6,
-  rF7  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  7,
-  rF8  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  8,
-  rF9  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  9,
-  rF10 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10,
-  rF11 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 11,
-  rF12 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 12,
-  rF13 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13,
-  rF14 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14,
-  rF15 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15,
-
-  rF16 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 16,
-  rF17 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 17,
-  rF18 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 18,
-  rF19 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 19,
-  rF20 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 20,
-  rF21 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 21,
-  rF22 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 22,
-  rF23 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 23,
-  rF24 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 24,
-  rF25 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 25,
-  rF26 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 26,
-  rF27 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 27,
-  rF28 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 28,
-  rF29 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 29,
-  rF30 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 30,
-  rF31 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 31,
-
-#if 0
-  /*
-   * TODO: The shared resource mask doesn't have enough bit positions to describe all
-   * MIPS registers.  Expand it and enable use of fp registers 16 through 31.
-   */
-  rF16 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 16,
-  rF17 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 17,
-  rF18 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 18,
-  rF19 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 19,
-  rF20 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 20,
-  rF21 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 21,
-  rF22 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 22,
-  rF23 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 23,
-  rF24 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 24,
-  rF25 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 25,
-  rF26 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 26,
-  rF27 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 27,
-  rF28 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 28,
-  rF29 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 29,
-  rF30 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 30,
-  rF31 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 31,
-#endif
-  // Double precision registers where the FPU is in 32-bit mode.
-  rD0_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  0,
-  rD1_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
-  rD2_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
-  rD3_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
-  rD4_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
-  rD5_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
-  rD6_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
-  rD7_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
-#if 0  // TODO: expand resource mask to enable use of all MIPS fp registers.
-  rD8_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 16,
-  rD9_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 18,
-  rD10_fr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 20,
-  rD11_fr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 22,
-  rD12_fr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 24,
-  rD13_fr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 26,
-  rD14_fr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 28,
-  rD15_fr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 30,
-#endif
-  // Double precision registers where the FPU is in 64-bit mode.
-  rD0_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  0,
-  rD1_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
-  rD2_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
-  rD3_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
-  rD4_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
-  rD5_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
-  rD6_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
-  rD7_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
-#if 0  // TODO: expand resource mask to enable use of all MIPS fp registers.
-  rD8_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 16,
-  rD9_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 18,
-  rD10_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 20,
-  rD11_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 22,
-  rD12_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 24,
-  rD13_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 26,
-  rD14_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 28,
-  rD15_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 30,
-#endif
-
-  rD0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  0,
-  rD1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  1,
-  rD2  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
-  rD3  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  3,
-  rD4  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
-  rD5  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  5,
-  rD6  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
-  rD7  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  7,
-  rD8  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
-  rD9  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  9,
-  rD10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
-  rD11 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11,
-  rD12 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
-  rD13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
-  rD14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
-  rD15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
-  rD16 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 16,
-  rD17 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 17,
-  rD18 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 18,
-  rD19 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 19,
-  rD20 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 20,
-  rD21 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 21,
-  rD22 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 22,
-  rD23 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 23,
-  rD24 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 24,
-  rD25 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 25,
-  rD26 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 26,
-  rD27 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 27,
-  rD28 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 28,
-  rD29 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 29,
-  rD30 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 30,
-  rD31 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 31,
-};
-
-constexpr RegStorage rs_rZERO(RegStorage::kValid | rZERO);
-constexpr RegStorage rs_rAT(RegStorage::kValid | rAT);
-constexpr RegStorage rs_rV0(RegStorage::kValid | rV0);
-constexpr RegStorage rs_rV1(RegStorage::kValid | rV1);
-constexpr RegStorage rs_rA0(RegStorage::kValid | rA0);
-constexpr RegStorage rs_rA1(RegStorage::kValid | rA1);
-constexpr RegStorage rs_rA2(RegStorage::kValid | rA2);
-constexpr RegStorage rs_rA3(RegStorage::kValid | rA3);
-constexpr RegStorage rs_rT0_32(RegStorage::kValid | rT0_32);
-constexpr RegStorage rs_rA4 = rs_rT0_32;
-constexpr RegStorage rs_rT1_32(RegStorage::kValid | rT1_32);
-constexpr RegStorage rs_rA5 = rs_rT1_32;
-constexpr RegStorage rs_rT2_32(RegStorage::kValid | rT2_32);
-constexpr RegStorage rs_rA6 = rs_rT2_32;
-constexpr RegStorage rs_rT3_32(RegStorage::kValid | rT3_32);
-constexpr RegStorage rs_rA7 = rs_rT3_32;
-constexpr RegStorage rs_rT4_32(RegStorage::kValid | rT4_32);
-constexpr RegStorage rs_rT0 = rs_rT4_32;
-constexpr RegStorage rs_rT5_32(RegStorage::kValid | rT5_32);
-constexpr RegStorage rs_rT1 = rs_rT5_32;
-constexpr RegStorage rs_rT6_32(RegStorage::kValid | rT6_32);
-constexpr RegStorage rs_rT2 = rs_rT6_32;
-constexpr RegStorage rs_rT7_32(RegStorage::kValid | rT7_32);
-constexpr RegStorage rs_rT3 = rs_rT7_32;
-constexpr RegStorage rs_rS0(RegStorage::kValid | rS0);
-constexpr RegStorage rs_rS1(RegStorage::kValid | rS1);
-constexpr RegStorage rs_rS2(RegStorage::kValid | rS2);
-constexpr RegStorage rs_rS3(RegStorage::kValid | rS3);
-constexpr RegStorage rs_rS4(RegStorage::kValid | rS4);
-constexpr RegStorage rs_rS5(RegStorage::kValid | rS5);
-constexpr RegStorage rs_rS6(RegStorage::kValid | rS6);
-constexpr RegStorage rs_rS7(RegStorage::kValid | rS7);
-constexpr RegStorage rs_rT8(RegStorage::kValid | rT8);
-constexpr RegStorage rs_rT9(RegStorage::kValid | rT9);
-constexpr RegStorage rs_rK0(RegStorage::kValid | rK0);
-constexpr RegStorage rs_rK1(RegStorage::kValid | rK1);
-constexpr RegStorage rs_rGP(RegStorage::kValid | rGP);
-constexpr RegStorage rs_rSP(RegStorage::kValid | rSP);
-constexpr RegStorage rs_rFP(RegStorage::kValid | rFP);
-constexpr RegStorage rs_rRA(RegStorage::kValid | rRA);
-
-constexpr RegStorage rs_rZEROd(RegStorage::kValid | rZEROd);
-constexpr RegStorage rs_rATd(RegStorage::kValid | rATd);
-constexpr RegStorage rs_rV0d(RegStorage::kValid | rV0d);
-constexpr RegStorage rs_rV1d(RegStorage::kValid | rV1d);
-constexpr RegStorage rs_rA0d(RegStorage::kValid | rA0d);
-constexpr RegStorage rs_rA1d(RegStorage::kValid | rA1d);
-constexpr RegStorage rs_rA2d(RegStorage::kValid | rA2d);
-constexpr RegStorage rs_rA3d(RegStorage::kValid | rA3d);
-constexpr RegStorage rs_rA4d(RegStorage::kValid | rA4d);
-constexpr RegStorage rs_rA5d(RegStorage::kValid | rA5d);
-constexpr RegStorage rs_rA6d(RegStorage::kValid | rA6d);
-constexpr RegStorage rs_rA7d(RegStorage::kValid | rA7d);
-constexpr RegStorage rs_rT0d(RegStorage::kValid | rT0d);
-constexpr RegStorage rs_rT1d(RegStorage::kValid | rT1d);
-constexpr RegStorage rs_rT2d(RegStorage::kValid | rT2d);
-constexpr RegStorage rs_rT3d(RegStorage::kValid | rT3d);
-constexpr RegStorage rs_rS0d(RegStorage::kValid | rS0d);
-constexpr RegStorage rs_rS1d(RegStorage::kValid | rS1d);
-constexpr RegStorage rs_rS2d(RegStorage::kValid | rS2d);
-constexpr RegStorage rs_rS3d(RegStorage::kValid | rS3d);
-constexpr RegStorage rs_rS4d(RegStorage::kValid | rS4d);
-constexpr RegStorage rs_rS5d(RegStorage::kValid | rS5d);
-constexpr RegStorage rs_rS6d(RegStorage::kValid | rS6d);
-constexpr RegStorage rs_rS7d(RegStorage::kValid | rS7d);
-constexpr RegStorage rs_rT8d(RegStorage::kValid | rT8d);
-constexpr RegStorage rs_rT9d(RegStorage::kValid | rT9d);
-constexpr RegStorage rs_rK0d(RegStorage::kValid | rK0d);
-constexpr RegStorage rs_rK1d(RegStorage::kValid | rK1d);
-constexpr RegStorage rs_rGPd(RegStorage::kValid | rGPd);
-constexpr RegStorage rs_rSPd(RegStorage::kValid | rSPd);
-constexpr RegStorage rs_rFPd(RegStorage::kValid | rFPd);
-constexpr RegStorage rs_rRAd(RegStorage::kValid | rRAd);
-
-constexpr RegStorage rs_rF0(RegStorage::kValid | rF0);
-constexpr RegStorage rs_rF1(RegStorage::kValid | rF1);
-constexpr RegStorage rs_rF2(RegStorage::kValid | rF2);
-constexpr RegStorage rs_rF3(RegStorage::kValid | rF3);
-constexpr RegStorage rs_rF4(RegStorage::kValid | rF4);
-constexpr RegStorage rs_rF5(RegStorage::kValid | rF5);
-constexpr RegStorage rs_rF6(RegStorage::kValid | rF6);
-constexpr RegStorage rs_rF7(RegStorage::kValid | rF7);
-constexpr RegStorage rs_rF8(RegStorage::kValid | rF8);
-constexpr RegStorage rs_rF9(RegStorage::kValid | rF9);
-constexpr RegStorage rs_rF10(RegStorage::kValid | rF10);
-constexpr RegStorage rs_rF11(RegStorage::kValid | rF11);
-constexpr RegStorage rs_rF12(RegStorage::kValid | rF12);
-constexpr RegStorage rs_rF13(RegStorage::kValid | rF13);
-constexpr RegStorage rs_rF14(RegStorage::kValid | rF14);
-constexpr RegStorage rs_rF15(RegStorage::kValid | rF15);
-
-constexpr RegStorage rs_rF16(RegStorage::kValid | rF16);
-constexpr RegStorage rs_rF17(RegStorage::kValid | rF17);
-constexpr RegStorage rs_rF18(RegStorage::kValid | rF18);
-constexpr RegStorage rs_rF19(RegStorage::kValid | rF19);
-constexpr RegStorage rs_rF20(RegStorage::kValid | rF20);
-constexpr RegStorage rs_rF21(RegStorage::kValid | rF21);
-constexpr RegStorage rs_rF22(RegStorage::kValid | rF22);
-constexpr RegStorage rs_rF23(RegStorage::kValid | rF23);
-constexpr RegStorage rs_rF24(RegStorage::kValid | rF24);
-constexpr RegStorage rs_rF25(RegStorage::kValid | rF25);
-constexpr RegStorage rs_rF26(RegStorage::kValid | rF26);
-constexpr RegStorage rs_rF27(RegStorage::kValid | rF27);
-constexpr RegStorage rs_rF28(RegStorage::kValid | rF28);
-constexpr RegStorage rs_rF29(RegStorage::kValid | rF29);
-constexpr RegStorage rs_rF30(RegStorage::kValid | rF30);
-constexpr RegStorage rs_rF31(RegStorage::kValid | rF31);
-
-constexpr RegStorage rs_rD0_fr0(RegStorage::kValid | rD0_fr0);
-constexpr RegStorage rs_rD1_fr0(RegStorage::kValid | rD1_fr0);
-constexpr RegStorage rs_rD2_fr0(RegStorage::kValid | rD2_fr0);
-constexpr RegStorage rs_rD3_fr0(RegStorage::kValid | rD3_fr0);
-constexpr RegStorage rs_rD4_fr0(RegStorage::kValid | rD4_fr0);
-constexpr RegStorage rs_rD5_fr0(RegStorage::kValid | rD5_fr0);
-constexpr RegStorage rs_rD6_fr0(RegStorage::kValid | rD6_fr0);
-constexpr RegStorage rs_rD7_fr0(RegStorage::kValid | rD7_fr0);
-
-constexpr RegStorage rs_rD0_fr1(RegStorage::kValid | rD0_fr1);
-constexpr RegStorage rs_rD1_fr1(RegStorage::kValid | rD1_fr1);
-constexpr RegStorage rs_rD2_fr1(RegStorage::kValid | rD2_fr1);
-constexpr RegStorage rs_rD3_fr1(RegStorage::kValid | rD3_fr1);
-constexpr RegStorage rs_rD4_fr1(RegStorage::kValid | rD4_fr1);
-constexpr RegStorage rs_rD5_fr1(RegStorage::kValid | rD5_fr1);
-constexpr RegStorage rs_rD6_fr1(RegStorage::kValid | rD6_fr1);
-constexpr RegStorage rs_rD7_fr1(RegStorage::kValid | rD7_fr1);
-
-constexpr RegStorage rs_rD0(RegStorage::kValid | rD0);
-constexpr RegStorage rs_rD1(RegStorage::kValid | rD1);
-constexpr RegStorage rs_rD2(RegStorage::kValid | rD2);
-constexpr RegStorage rs_rD3(RegStorage::kValid | rD3);
-constexpr RegStorage rs_rD4(RegStorage::kValid | rD4);
-constexpr RegStorage rs_rD5(RegStorage::kValid | rD5);
-constexpr RegStorage rs_rD6(RegStorage::kValid | rD6);
-constexpr RegStorage rs_rD7(RegStorage::kValid | rD7);
-constexpr RegStorage rs_rD8(RegStorage::kValid | rD8);
-constexpr RegStorage rs_rD9(RegStorage::kValid | rD9);
-constexpr RegStorage rs_rD10(RegStorage::kValid | rD10);
-constexpr RegStorage rs_rD11(RegStorage::kValid | rD11);
-constexpr RegStorage rs_rD12(RegStorage::kValid | rD12);
-constexpr RegStorage rs_rD13(RegStorage::kValid | rD13);
-constexpr RegStorage rs_rD14(RegStorage::kValid | rD14);
-constexpr RegStorage rs_rD15(RegStorage::kValid | rD15);
-constexpr RegStorage rs_rD16(RegStorage::kValid | rD16);
-constexpr RegStorage rs_rD17(RegStorage::kValid | rD17);
-constexpr RegStorage rs_rD18(RegStorage::kValid | rD18);
-constexpr RegStorage rs_rD19(RegStorage::kValid | rD19);
-constexpr RegStorage rs_rD20(RegStorage::kValid | rD20);
-constexpr RegStorage rs_rD21(RegStorage::kValid | rD21);
-constexpr RegStorage rs_rD22(RegStorage::kValid | rD22);
-constexpr RegStorage rs_rD23(RegStorage::kValid | rD23);
-constexpr RegStorage rs_rD24(RegStorage::kValid | rD24);
-constexpr RegStorage rs_rD25(RegStorage::kValid | rD25);
-constexpr RegStorage rs_rD26(RegStorage::kValid | rD26);
-constexpr RegStorage rs_rD27(RegStorage::kValid | rD27);
-constexpr RegStorage rs_rD28(RegStorage::kValid | rD28);
-constexpr RegStorage rs_rD29(RegStorage::kValid | rD29);
-constexpr RegStorage rs_rD30(RegStorage::kValid | rD30);
-constexpr RegStorage rs_rD31(RegStorage::kValid | rD31);
-
-// RegisterLocation templates return values (r_V0, or r_V0/r_V1).
-const RegLocation mips_loc_c_return
-    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1,
-     RegStorage(RegStorage::k32BitSolo, rV0), INVALID_SREG, INVALID_SREG};
-const RegLocation mips64_loc_c_return_ref
-    {kLocPhysReg, 0, 0, 0, 0, 0, 1, 0, 1,
-     RegStorage(RegStorage::k64BitSolo, rV0d), INVALID_SREG, INVALID_SREG};
-const RegLocation mips_loc_c_return_wide
-    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
-     RegStorage(RegStorage::k64BitPair, rV0, rV1), INVALID_SREG, INVALID_SREG};
-const RegLocation mips64_loc_c_return_wide
-    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
-     RegStorage(RegStorage::k64BitSolo, rV0d), INVALID_SREG, INVALID_SREG};
-const RegLocation mips_loc_c_return_float
-    {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1,
-     RegStorage(RegStorage::k32BitSolo, rF0), INVALID_SREG, INVALID_SREG};
-// FIXME: move MIPS to k64Bitsolo for doubles
-const RegLocation mips_loc_c_return_double_fr0
-    {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1,
-     RegStorage(RegStorage::k64BitPair, rF0, rF1), INVALID_SREG, INVALID_SREG};
-const RegLocation mips_loc_c_return_double_fr1
-    {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1,
-     RegStorage(RegStorage::k64BitSolo, rF0), INVALID_SREG, INVALID_SREG};
-const RegLocation mips64_loc_c_return_double
-    {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1,
-     RegStorage(RegStorage::k64BitSolo, rD0), INVALID_SREG, INVALID_SREG};
-
-enum MipsShiftEncodings {
-  kMipsLsl = 0x0,
-  kMipsLsr = 0x1,
-  kMipsAsr = 0x2,
-  kMipsRor = 0x3
-};
-
-// MIPS sync kinds (Note: support for kinds other than kSYNC0 may not exist).
-#define kSYNC0        0x00
-#define kSYNC_WMB     0x04
-#define kSYNC_MB      0x01
-#define kSYNC_ACQUIRE 0x11
-#define kSYNC_RELEASE 0x12
-#define kSYNC_RMB     0x13
-
-// TODO: Use smaller hammer when appropriate for target CPU.
-#define kST kSYNC0
-#define kSY kSYNC0
-
-/*
- * The following enum defines the list of supported mips instructions by the
- * assembler. Their corresponding EncodingMap positions will be defined in
- * assemble_mips.cc.
- */
-enum MipsOpCode {
-  kMipsFirst = 0,
-  // The following are common mips32r2, mips32r6 and mips64r6 instructions.
-  kMips32BitData = kMipsFirst,  // data [31..0].
-  kMipsAddiu,      // addiu t,s,imm16 [001001] s[25..21] t[20..16] imm16[15..0].
-  kMipsAddu,       // add d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100001].
-  kMipsAnd,        // and d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100100].
-  kMipsAndi,       // andi t,s,imm16 [001100] s[25..21] t[20..16] imm16[15..0].
-  kMipsB,          // b o   [0001000000000000] o[15..0].
-  kMipsBal,        // bal o [0000010000010001] o[15..0].
-  // NOTE : the code tests the range kMipsBeq thru kMipsBne, so adding an instruction in this
-  // range may require updates.
-  kMipsBeq,        // beq s,t,o [000100] s[25..21] t[20..16] o[15..0].
-  kMipsBeqz,       // beqz s,o [000100] s[25..21] [00000] o[15..0].
-  kMipsBgez,       // bgez s,o [000001] s[25..21] [00001] o[15..0].
-  kMipsBgtz,       // bgtz s,o [000111] s[25..21] [00000] o[15..0].
-  kMipsBlez,       // blez s,o [000110] s[25..21] [00000] o[15..0].
-  kMipsBltz,       // bltz s,o [000001] s[25..21] [00000] o[15..0].
-  kMipsBnez,       // bnez s,o [000101] s[25..21] [00000] o[15..0].
-  kMipsBne,        // bne s,t,o [000101] s[25..21] t[20..16] o[15..0].
-  kMipsExt,        // ext t,s,p,z [011111] s[25..21] t[20..16] z[15..11] p[10..6] [000000].
-  kMipsFaddd,      // add.d d,s,t [01000110001] t[20..16] s[15..11] d[10..6] [000000].
-  kMipsFadds,      // add.s d,s,t [01000110000] t[20..16] s[15..11] d[10..6] [000000].
-  kMipsFsubd,      // sub.d d,s,t [01000110001] t[20..16] s[15..11] d[10..6] [000001].
-  kMipsFsubs,      // sub.s d,s,t [01000110000] t[20..16] s[15..11] d[10..6] [000001].
-  kMipsFdivd,      // div.d d,s,t [01000110001] t[20..16] s[15..11] d[10..6] [000011].
-  kMipsFdivs,      // div.s d,s,t [01000110000] t[20..16] s[15..11] d[10..6] [000011].
-  kMipsFmuld,      // mul.d d,s,t [01000110000] t[20..16] s[15..11] d[10..6] [000010].
-  kMipsFmuls,      // mul.s d,s,t [01000110001] t[20..16] s[15..11] d[10..6] [000010].
-  kMipsFcvtsd,     // cvt.s.d d,s [01000110001] [00000] s[15..11] d[10..6] [100000].
-  kMipsFcvtsw,     // cvt.s.w d,s [01000110100] [00000] s[15..11] d[10..6] [100000].
-  kMipsFcvtds,     // cvt.d.s d,s [01000110000] [00000] s[15..11] d[10..6] [100001].
-  kMipsFcvtdw,     // cvt.d.w d,s [01000110100] [00000] s[15..11] d[10..6] [100001].
-  kMipsFcvtwd,     // cvt.w.d d,s [01000110001] [00000] s[15..11] d[10..6] [100100].
-  kMipsFcvtws,     // cvt.w.s d,s [01000110000] [00000] s[15..11] d[10..6] [100100].
-  kMipsFmovd,      // mov.d d,s [01000110001] [00000] s[15..11] d[10..6] [000110].
-  kMipsFmovs,      // mov.s d,s [01000110000] [00000] s[15..11] d[10..6] [000110].
-  kMipsFnegd,      // neg.d d,s [01000110001] [00000] s[15..11] d[10..6] [000111].
-  kMipsFnegs,      // neg.s d,s [01000110000] [00000] s[15..11] d[10..6] [000111].
-  kMipsFldc1,      // ldc1 t,o(b) [110101] b[25..21] t[20..16] o[15..0].
-  kMipsFlwc1,      // lwc1 t,o(b) [110001] b[25..21] t[20..16] o[15..0].
-  kMipsFsdc1,      // sdc1 t,o(b) [111101] b[25..21] t[20..16] o[15..0].
-  kMipsFswc1,      // swc1 t,o(b) [111001] b[25..21] t[20..16] o[15..0].
-  kMipsJal,        // jal t [000011] t[25..0].
-  kMipsJalr,       // jalr d,s [000000] s[25..21] [00000] d[15..11] hint[10..6] [001001].
-  kMipsJr,         // jr s [000000] s[25..21] [0000000000] hint[10..6] [001000].
-  kMipsLahi,       // lui t,imm16 [00111100000] t[20..16] imm16[15..0] load addr hi.
-  kMipsLalo,       // ori t,s,imm16 [001001] s[25..21] t[20..16] imm16[15..0] load addr lo.
-  kMipsLui,        // lui t,imm16 [00111100000] t[20..16] imm16[15..0].
-  kMipsLb,         // lb t,o(b) [100000] b[25..21] t[20..16] o[15..0].
-  kMipsLbu,        // lbu t,o(b) [100100] b[25..21] t[20..16] o[15..0].
-  kMipsLh,         // lh t,o(b) [100001] b[25..21] t[20..16] o[15..0].
-  kMipsLhu,        // lhu t,o(b) [100101] b[25..21] t[20..16] o[15..0].
-  kMipsLw,         // lw t,o(b) [100011] b[25..21] t[20..16] o[15..0].
-  kMipsMove,       // move d,s [000000] s[25..21] [00000] d[15..11] [00000100101].
-  kMipsMfc1,       // mfc1 t,s [01000100000] t[20..16] s[15..11] [00000000000].
-  kMipsMtc1,       // mtc1 t,s [01000100100] t[20..16] s[15..11] [00000000000].
-  kMipsMfhc1,      // mfhc1 t,s [01000100011] t[20..16] s[15..11] [00000000000].
-  kMipsMthc1,      // mthc1 t,s [01000100111] t[20..16] s[15..11] [00000000000].
-  kMipsNop,        // nop [00000000000000000000000000000000].
-  kMipsNor,        // nor d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100111].
-  kMipsOr,         // or d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100101].
-  kMipsOri,        // ori t,s,imm16 [001001] s[25..21] t[20..16] imm16[15..0].
-  kMipsPref,       // pref h,o(b) [101011] b[25..21] h[20..16] o[15..0].
-  kMipsSb,         // sb t,o(b) [101000] b[25..21] t[20..16] o[15..0].
-  kMipsSeb,        // seb d,t [01111100000] t[20..16] d[15..11] [10000100000].
-  kMipsSeh,        // seh d,t [01111100000] t[20..16] d[15..11] [11000100000].
-  kMipsSh,         // sh t,o(b) [101001] b[25..21] t[20..16] o[15..0].
-  kMipsSll,        // sll d,t,a [00000000000] t[20..16] d[15..11] a[10..6] [000000].
-  kMipsSllv,       // sllv d,t,s [000000] s[25..21] t[20..16] d[15..11] [00000000100].
-  kMipsSlt,        // slt d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000101010].
-  kMipsSlti,       // slti t,s,imm16 [001010] s[25..21] t[20..16] imm16[15..0].
-  kMipsSltu,       // sltu d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000101011].
-  kMipsSra,        // sra d,s,imm5 [00000000000] t[20..16] d[15..11] imm5[10..6] [000011].
-  kMipsSrav,       // srav d,t,s [000000] s[25..21] t[20..16] d[15..11] [00000000111].
-  kMipsSrl,        // srl d,t,a [00000000000] t[20..16] d[20..16] a[10..6] [000010].
-  kMipsSrlv,       // srlv d,t,s [000000] s[25..21] t[20..16] d[15..11] [00000000110].
-  kMipsSubu,       // subu d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100011].
-  kMipsSw,         // sw t,o(b) [101011] b[25..21] t[20..16] o[15..0].
-  kMipsSync,       // sync kind [000000] [0000000000000000] s[10..6] [001111].
-  kMipsXor,        // xor d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100110].
-  kMipsXori,       // xori t,s,imm16 [001110] s[25..21] t[20..16] imm16[15..0].
-
-  // The following are mips32r2 instructions.
-  kMipsR2Div,      // div s,t [000000] s[25..21] t[20..16] [0000000000011010].
-  kMipsR2Mul,      // mul d,s,t [011100] s[25..21] t[20..16] d[15..11] [00000000010].
-  kMipsR2Mfhi,     // mfhi d [0000000000000000] d[15..11] [00000010000].
-  kMipsR2Mflo,     // mflo d [0000000000000000] d[15..11] [00000010010].
-  kMipsR2Movz,     // movz d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000001010].
-
-  // The following are mips32r6 and mips64r6 instructions.
-  kMipsR6Div,      // div d,s,t [000000] s[25..21] t[20..16] d[15..11] [00010011010].
-  kMipsR6Mod,      // mod d,s,t [000000] s[25..21] t[20..16] d[15..11] [00011011010].
-  kMipsR6Mul,      // mul d,s,t [000000] s[25..21] t[20..16] d[15..11] [00010011000].
-
-  // The following are mips64r6 instructions.
-  kMips64Daddiu,   // daddiu t,s,imm16 [011001] s[25..21] t[20..16] imm16[15..11].
-  kMips64Daddu,    // daddu d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000101101].
-  kMips64Dahi,     // dahi s,imm16 [000001] s[25..21] [00110] imm16[15..11].
-  kMips64Dati,     // dati s,imm16 [000001] s[25..21] [11110] imm16[15..11].
-  kMips64Daui,     // daui t,s,imm16 [011101] s[25..21] t[20..16] imm16[15..11].
-  kMips64Ddiv,     // ddiv  d,s,t [000000] s[25..21] t[20..16] d[15..11] [00010011110].
-  kMips64Dmod,     // dmod  d,s,t [000000] s[25..21] t[20..16] d[15..11] [00011011110].
-  kMips64Dmul,     // dmul  d,s,t [000000] s[25..21] t[20..16] d[15..11] [00010011100].
-  kMips64Dmfc1,    // dmfc1 t,s [01000100001] t[20..16] s[15..11] [00000000000].
-  kMips64Dmtc1,    // dmtc1 t,s [01000100101] t[20..16] s[15..11] [00000000000].
-  kMips64Drotr32,  // drotr32 d,t,a [00000000001] t[20..16] d[15..11] a[10..6] [111110].
-  kMips64Dsll,     // dsll    d,t,a [00000000000] t[20..16] d[15..11] a[10..6] [111000].
-  kMips64Dsll32,   // dsll32  d,t,a [00000000000] t[20..16] d[15..11] a[10..6] [111100].
-  kMips64Dsrl,     // dsrl    d,t,a [00000000000] t[20..16] d[15..11] a[10..6] [111010].
-  kMips64Dsrl32,   // dsrl32  d,t,a [00000000000] t[20..16] d[15..11] a[10..6] [111110].
-  kMips64Dsra,     // dsra    d,t,a [00000000000] t[20..16] d[15..11] a[10..6] [111011].
-  kMips64Dsra32,   // dsra32  d,t,a [00000000000] t[20..16] d[15..11] a[10..6] [111111].
-  kMips64Dsllv,    // dsllv d,t,s [000000] s[25..21] t[20..16] d[15..11] [00000010100].
-  kMips64Dsrlv,    // dsrlv d,t,s [000000] s[25..21] t[20..16] d[15..11] [00000010110].
-  kMips64Dsrav,    // dsrav d,t,s [000000] s[25..21] t[20..16] d[15..11] [00000010111].
-  kMips64Dsubu,    // dsubu d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000101111].
-  kMips64Ld,       // ld  t,o(b) [110111] b[25..21] t[20..16] o[15..0].
-  kMips64Lwu,      // lwu t,o(b) [100111] b[25..21] t[20..16] o[15..0].
-  kMips64Sd,       // sd t,o(b) [111111] b[25..21] t[20..16] o[15..0].
-
-  // The following are pseudoinstructions.
-  kMipsDelta,      // Psuedo for ori t, s, <label>-<label>.
-  kMipsDeltaHi,    // Pseudo for lui t, high16(<label>-<label>).
-  kMipsDeltaLo,    // Pseudo for ori t, s, low16(<label>-<label>).
-  kMipsCurrPC,     // jal to .+8 to materialize pc.
-  kMipsUndefined,  // undefined [011001xxxxxxxxxxxxxxxx].
-  kMipsLast
-};
-std::ostream& operator<<(std::ostream& os, const MipsOpCode& rhs);
-
-// Instruction assembly field_loc kind.
-enum MipsEncodingKind {
-  kFmtUnused,
-  kFmtBitBlt,    // Bit string using end/start.
-  kFmtDfp,       // Double FP reg.
-  kFmtSfp,       // Single FP reg.
-  kFmtBlt5_2,    // Same 5-bit field to 2 locations.
-};
-std::ostream& operator<<(std::ostream& os, const MipsEncodingKind& rhs);
-
-// Struct used to define the snippet positions for each MIPS opcode.
-struct MipsEncodingMap {
-  uint32_t skeleton;
-  struct {
-    MipsEncodingKind kind;
-    int end;   // end for kFmtBitBlt, 1-bit slice end for FP regs.
-    int start;  // start for kFmtBitBlt, 4-bit slice end for FP regs.
-  } field_loc[4];
-  MipsOpCode opcode;
-  uint64_t flags;
-  const char *name;
-  const char* fmt;
-  int size;   // Note: size is in bytes.
-};
-
-extern MipsEncodingMap EncodingMap[kMipsLast];
-
-#define IS_UIMM16(v) ((0 <= (v)) && ((v) <= 65535))
-#define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32766))
-#define IS_SIMM16_2WORD(v) ((-32764 <= (v)) && ((v) <= 32763))  // 2 offsets must fit.
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_MIPS_MIPS_LIR_H_
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
deleted file mode 100644
index 09d37f8..0000000
--- a/compiler/dex/quick/mips/target_mips.cc
+++ /dev/null
@@ -1,976 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "codegen_mips.h"
-
-#include <inttypes.h>
-
-#include <string>
-
-#include "arch/mips/instruction_set_features_mips.h"
-#include "backend_mips.h"
-#include "base/logging.h"
-#include "dex/compiler_ir.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "driver/compiler_driver.h"
-#include "mips_lir.h"
-
-namespace art {
-
-static constexpr RegStorage core_regs_arr_32[] =
-    {rs_rZERO, rs_rAT, rs_rV0, rs_rV1, rs_rA0, rs_rA1, rs_rA2, rs_rA3, rs_rT0_32, rs_rT1_32,
-     rs_rT2_32, rs_rT3_32, rs_rT4_32, rs_rT5_32, rs_rT6_32, rs_rT7_32, rs_rS0, rs_rS1, rs_rS2,
-     rs_rS3, rs_rS4, rs_rS5, rs_rS6, rs_rS7, rs_rT8, rs_rT9, rs_rK0, rs_rK1, rs_rGP, rs_rSP, rs_rFP,
-     rs_rRA};
-static constexpr RegStorage sp_regs_arr_32[] =
-    {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10,
-     rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15};
-static constexpr RegStorage dp_fr0_regs_arr_32[] =
-    {rs_rD0_fr0, rs_rD1_fr0, rs_rD2_fr0, rs_rD3_fr0, rs_rD4_fr0, rs_rD5_fr0, rs_rD6_fr0,
-     rs_rD7_fr0};
-static constexpr RegStorage dp_fr1_regs_arr_32[] =
-    {rs_rD0_fr1, rs_rD1_fr1, rs_rD2_fr1, rs_rD3_fr1, rs_rD4_fr1, rs_rD5_fr1, rs_rD6_fr1,
-     rs_rD7_fr1};
-static constexpr RegStorage reserved_regs_arr_32[] =
-    {rs_rZERO, rs_rAT, rs_rS0, rs_rS1, rs_rK0, rs_rK1, rs_rGP, rs_rSP, rs_rRA};
-static constexpr RegStorage core_temps_arr_32[] =
-    {rs_rV0, rs_rV1, rs_rA0, rs_rA1, rs_rA2, rs_rA3, rs_rT0_32, rs_rT1_32, rs_rT2_32, rs_rT3_32,
-     rs_rT4_32, rs_rT5_32, rs_rT6_32, rs_rT7_32, rs_rT8};
-static constexpr RegStorage sp_fr0_temps_arr_32[] =
-    {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10,
-     rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15};
-static constexpr RegStorage sp_fr1_temps_arr_32[] =
-    {rs_rF0, rs_rF2, rs_rF4, rs_rF6, rs_rF8, rs_rF10, rs_rF12, rs_rF14};
-static constexpr RegStorage dp_fr0_temps_arr_32[] =
-    {rs_rD0_fr0, rs_rD1_fr0, rs_rD2_fr0, rs_rD3_fr0, rs_rD4_fr0, rs_rD5_fr0, rs_rD6_fr0,
-     rs_rD7_fr0};
-static constexpr RegStorage dp_fr1_temps_arr_32[] =
-    {rs_rD0_fr1, rs_rD1_fr1, rs_rD2_fr1, rs_rD3_fr1, rs_rD4_fr1, rs_rD5_fr1, rs_rD6_fr1,
-     rs_rD7_fr1};
-
-static constexpr RegStorage core_regs_arr_64[] =
-    {rs_rZERO, rs_rAT, rs_rV0, rs_rV1, rs_rA0, rs_rA1, rs_rA2, rs_rA3, rs_rA4, rs_rA5, rs_rA6,
-     rs_rA7, rs_rT0, rs_rT1, rs_rT2, rs_rT3, rs_rS0, rs_rS1, rs_rS2, rs_rS3, rs_rS4, rs_rS5, rs_rS6,
-     rs_rS7, rs_rT8, rs_rT9, rs_rK0, rs_rK1, rs_rGP, rs_rSP, rs_rFP, rs_rRA};
-static constexpr RegStorage core_regs_arr_64d[] =
-    {rs_rZEROd, rs_rATd, rs_rV0d, rs_rV1d, rs_rA0d, rs_rA1d, rs_rA2d, rs_rA3d, rs_rA4d, rs_rA5d,
-     rs_rA6d, rs_rA7d, rs_rT0d, rs_rT1d, rs_rT2d, rs_rT3d, rs_rS0d, rs_rS1d, rs_rS2d, rs_rS3d,
-     rs_rS4d, rs_rS5d, rs_rS6d, rs_rS7d, rs_rT8d, rs_rT9d, rs_rK0d, rs_rK1d, rs_rGPd, rs_rSPd,
-     rs_rFPd, rs_rRAd};
-#if 0
-// TODO: f24-f31 must be saved before calls and restored after.
-static constexpr RegStorage sp_regs_arr_64[] =
-    {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10,
-     rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15, rs_rF16, rs_rF17, rs_rF18, rs_rF19, rs_rF20,
-     rs_rF21, rs_rF22, rs_rF23, rs_rF24, rs_rF25, rs_rF26, rs_rF27, rs_rF28, rs_rF29, rs_rF30,
-     rs_rF31};
-static constexpr RegStorage dp_regs_arr_64[] =
-    {rs_rD0, rs_rD1, rs_rD2, rs_rD3, rs_rD4, rs_rD5, rs_rD6, rs_rD7, rs_rD8, rs_rD9, rs_rD10,
-     rs_rD11, rs_rD12, rs_rD13, rs_rD14, rs_rD15, rs_rD16, rs_rD17, rs_rD18, rs_rD19, rs_rD20,
-     rs_rD21, rs_rD22, rs_rD23, rs_rD24, rs_rD25, rs_rD26, rs_rD27, rs_rD28, rs_rD29, rs_rD30,
-     rs_rD31};
-#else
-static constexpr RegStorage sp_regs_arr_64[] =
-    {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10,
-     rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15, rs_rF16, rs_rF17, rs_rF18, rs_rF19, rs_rF20,
-     rs_rF21, rs_rF22, rs_rF23};
-static constexpr RegStorage dp_regs_arr_64[] =
-    {rs_rD0, rs_rD1, rs_rD2, rs_rD3, rs_rD4, rs_rD5, rs_rD6, rs_rD7, rs_rD8, rs_rD9, rs_rD10,
-     rs_rD11, rs_rD12, rs_rD13, rs_rD14, rs_rD15, rs_rD16, rs_rD17, rs_rD18, rs_rD19, rs_rD20,
-     rs_rD21, rs_rD22, rs_rD23};
-#endif
-static constexpr RegStorage reserved_regs_arr_64[] =
-    {rs_rZERO, rs_rAT, rs_rS0, rs_rS1, rs_rT9, rs_rK0, rs_rK1, rs_rGP, rs_rSP, rs_rRA};
-static constexpr RegStorage reserved_regs_arr_64d[] =
-    {rs_rZEROd, rs_rATd, rs_rS0d, rs_rS1d, rs_rT9d, rs_rK0d, rs_rK1d, rs_rGPd, rs_rSPd, rs_rRAd};
-static constexpr RegStorage core_temps_arr_64[] =
-    {rs_rV0, rs_rV1, rs_rA0, rs_rA1, rs_rA2, rs_rA3, rs_rA4, rs_rA5, rs_rA6, rs_rA7, rs_rT0, rs_rT1,
-     rs_rT2, rs_rT3, rs_rT8};
-static constexpr RegStorage core_temps_arr_64d[] =
-    {rs_rV0d, rs_rV1d, rs_rA0d, rs_rA1d, rs_rA2d, rs_rA3d, rs_rA4d, rs_rA5d, rs_rA6d, rs_rA7d,
-     rs_rT0d, rs_rT1d, rs_rT2d, rs_rT3d, rs_rT8d};
-#if 0
-// TODO: f24-f31 must be saved before calls and restored after.
-static constexpr RegStorage sp_temps_arr_64[] =
-    {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10,
-     rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15, rs_rF16, rs_rF17, rs_rF18, rs_rF19, rs_rF20,
-     rs_rF21, rs_rF22, rs_rF23, rs_rF24, rs_rF25, rs_rF26, rs_rF27, rs_rF28, rs_rF29, rs_rF30,
-     rs_rF31};
-static constexpr RegStorage dp_temps_arr_64[] =
-    {rs_rD0, rs_rD1, rs_rD2, rs_rD3, rs_rD4, rs_rD5, rs_rD6, rs_rD7, rs_rD8, rs_rD9, rs_rD10,
-     rs_rD11, rs_rD12, rs_rD13, rs_rD14, rs_rD15, rs_rD16, rs_rD17, rs_rD18, rs_rD19, rs_rD20,
-     rs_rD21, rs_rD22, rs_rD23, rs_rD24, rs_rD25, rs_rD26, rs_rD27, rs_rD28, rs_rD29, rs_rD30,
-     rs_rD31};
-#else
-static constexpr RegStorage sp_temps_arr_64[] =
-    {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10,
-     rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15, rs_rF16, rs_rF17, rs_rF18, rs_rF19, rs_rF20,
-     rs_rF21, rs_rF22, rs_rF23};
-static constexpr RegStorage dp_temps_arr_64[] =
-    {rs_rD0, rs_rD1, rs_rD2, rs_rD3, rs_rD4, rs_rD5, rs_rD6, rs_rD7, rs_rD8, rs_rD9, rs_rD10,
-     rs_rD11, rs_rD12, rs_rD13, rs_rD14, rs_rD15, rs_rD16, rs_rD17, rs_rD18, rs_rD19, rs_rD20,
-     rs_rD21, rs_rD22, rs_rD23};
-#endif
-
-static constexpr ArrayRef<const RegStorage> empty_pool;
-static constexpr ArrayRef<const RegStorage> core_regs_32(core_regs_arr_32);
-static constexpr ArrayRef<const RegStorage> sp_regs_32(sp_regs_arr_32);
-static constexpr ArrayRef<const RegStorage> dp_fr0_regs_32(dp_fr0_regs_arr_32);
-static constexpr ArrayRef<const RegStorage> dp_fr1_regs_32(dp_fr1_regs_arr_32);
-static constexpr ArrayRef<const RegStorage> reserved_regs_32(reserved_regs_arr_32);
-static constexpr ArrayRef<const RegStorage> core_temps_32(core_temps_arr_32);
-static constexpr ArrayRef<const RegStorage> sp_fr0_temps_32(sp_fr0_temps_arr_32);
-static constexpr ArrayRef<const RegStorage> sp_fr1_temps_32(sp_fr1_temps_arr_32);
-static constexpr ArrayRef<const RegStorage> dp_fr0_temps_32(dp_fr0_temps_arr_32);
-static constexpr ArrayRef<const RegStorage> dp_fr1_temps_32(dp_fr1_temps_arr_32);
-
-static constexpr ArrayRef<const RegStorage> core_regs_64(core_regs_arr_64);
-static constexpr ArrayRef<const RegStorage> core_regs_64d(core_regs_arr_64d);
-static constexpr ArrayRef<const RegStorage> sp_regs_64(sp_regs_arr_64);
-static constexpr ArrayRef<const RegStorage> dp_regs_64(dp_regs_arr_64);
-static constexpr ArrayRef<const RegStorage> reserved_regs_64(reserved_regs_arr_64);
-static constexpr ArrayRef<const RegStorage> reserved_regs_64d(reserved_regs_arr_64d);
-static constexpr ArrayRef<const RegStorage> core_temps_64(core_temps_arr_64);
-static constexpr ArrayRef<const RegStorage> core_temps_64d(core_temps_arr_64d);
-static constexpr ArrayRef<const RegStorage> sp_temps_64(sp_temps_arr_64);
-static constexpr ArrayRef<const RegStorage> dp_temps_64(dp_temps_arr_64);
-
-RegLocation MipsMir2Lir::LocCReturn() {
-  return mips_loc_c_return;
-}
-
-RegLocation MipsMir2Lir::LocCReturnRef() {
-  return cu_->target64 ? mips64_loc_c_return_ref : mips_loc_c_return;
-}
-
-RegLocation MipsMir2Lir::LocCReturnWide() {
-  return cu_->target64 ? mips64_loc_c_return_wide : mips_loc_c_return_wide;
-}
-
-RegLocation MipsMir2Lir::LocCReturnFloat() {
-  return mips_loc_c_return_float;
-}
-
-RegLocation MipsMir2Lir::LocCReturnDouble() {
-  if (cu_->target64) {
-    return mips64_loc_c_return_double;
-  } else if (fpuIs32Bit_) {
-    return mips_loc_c_return_double_fr0;
-  } else {
-    return mips_loc_c_return_double_fr1;
-  }
-}
-
-// Convert k64BitSolo into k64BitPair.
-RegStorage MipsMir2Lir::Solo64ToPair64(RegStorage reg) {
-  DCHECK(reg.IsDouble());
-  DCHECK_EQ(reg.GetRegNum() & 1, 0);
-  int reg_num = (reg.GetRegNum() & ~1) | RegStorage::kFloatingPoint;
-  return RegStorage(RegStorage::k64BitPair, reg_num, reg_num + 1);
-}
-
-// Convert 64bit FP (k64BitSolo or k64BitPair) into k32BitSolo.
-// This routine is only used to allow a 64bit FPU to access FP registers 32bits at a time.
-RegStorage MipsMir2Lir::Fp64ToSolo32(RegStorage reg) {
-  DCHECK(!fpuIs32Bit_);
-  DCHECK(reg.IsDouble());
-  DCHECK(!reg.IsPair());
-  int reg_num = reg.GetRegNum() | RegStorage::kFloatingPoint;
-  return RegStorage(RegStorage::k32BitSolo, reg_num);
-}
-
-// Return a target-dependent special register.
-RegStorage MipsMir2Lir::TargetReg(SpecialTargetRegister reg, WideKind wide_kind) {
-  if (!cu_->target64 && wide_kind == kWide) {
-    DCHECK((kArg0 <= reg && reg < kArg7) || (kFArg0 == reg) || (kFArg2 == reg) || (kRet0 == reg));
-    RegStorage ret_reg = RegStorage::MakeRegPair(TargetReg(reg),
-                                     TargetReg(static_cast<SpecialTargetRegister>(reg + 1)));
-    if (!fpuIs32Bit_ && ret_reg.IsFloat()) {
-      // convert 64BitPair to 64BitSolo for 64bit FPUs.
-      RegStorage low = ret_reg.GetLow();
-      ret_reg = RegStorage::FloatSolo64(low.GetRegNum());
-    }
-    return ret_reg;
-  } else if (cu_->target64 && (wide_kind == kWide || wide_kind == kRef)) {
-    return As64BitReg(TargetReg(reg));
-  } else {
-    return TargetReg(reg);
-  }
-}
-
-// Return a target-dependent special register.
-RegStorage MipsMir2Lir::TargetReg(SpecialTargetRegister reg) {
-  RegStorage res_reg;
-  switch (reg) {
-    case kSelf: res_reg = rs_rS1; break;
-    case kSuspend: res_reg =  rs_rS0; break;
-    case kLr: res_reg =  rs_rRA; break;
-    case kPc: res_reg = RegStorage::InvalidReg(); break;
-    case kSp: res_reg =  rs_rSP; break;
-    case kArg0: res_reg = rs_rA0; break;
-    case kArg1: res_reg = rs_rA1; break;
-    case kArg2: res_reg = rs_rA2; break;
-    case kArg3: res_reg = rs_rA3; break;
-    case kArg4: res_reg = cu_->target64 ? rs_rA4 : RegStorage::InvalidReg(); break;
-    case kArg5: res_reg = cu_->target64 ? rs_rA5 : RegStorage::InvalidReg(); break;
-    case kArg6: res_reg = cu_->target64 ? rs_rA6 : RegStorage::InvalidReg(); break;
-    case kArg7: res_reg = cu_->target64 ? rs_rA7 : RegStorage::InvalidReg(); break;
-    case kFArg0: res_reg = rs_rF12; break;
-    case kFArg1: res_reg = rs_rF13; break;
-    case kFArg2: res_reg = rs_rF14; break;
-    case kFArg3: res_reg = rs_rF15; break;
-    case kFArg4: res_reg = cu_->target64 ? rs_rF16 : RegStorage::InvalidReg(); break;
-    case kFArg5: res_reg = cu_->target64 ? rs_rF17 : RegStorage::InvalidReg(); break;
-    case kFArg6: res_reg = cu_->target64 ? rs_rF18 : RegStorage::InvalidReg(); break;
-    case kFArg7: res_reg = cu_->target64 ? rs_rF19 : RegStorage::InvalidReg(); break;
-    case kRet0: res_reg = rs_rV0; break;
-    case kRet1: res_reg = rs_rV1; break;
-    case kInvokeTgt: res_reg = rs_rT9; break;
-    case kHiddenArg: res_reg = cu_->target64 ? rs_rT0 : rs_rT0_32; break;
-    case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
-    case kCount: res_reg = RegStorage::InvalidReg(); break;
-    default: res_reg = RegStorage::InvalidReg();
-  }
-  return res_reg;
-}
-
-RegStorage MipsMir2Lir::InToRegStorageMipsMapper::GetNextReg(ShortyArg arg) {
-  const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3};
-  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
-  const SpecialTargetRegister fpuArgMappingToPhysicalReg[] = {kFArg0, kFArg2};
-  const size_t fpuArgMappingToPhysicalRegSize = arraysize(fpuArgMappingToPhysicalReg);
-
-  RegStorage result = RegStorage::InvalidReg();
-  if (arg.IsFP()) {
-    if (cur_fpu_reg_ < fpuArgMappingToPhysicalRegSize) {
-      result = m2l_->TargetReg(fpuArgMappingToPhysicalReg[cur_fpu_reg_++],
-                               arg.IsWide() ? kWide : kNotWide);
-    }
-  } else {
-    if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-      if (arg.IsWide() && cur_core_reg_ == 0) {
-        // Don't use a1-a2 as a register pair, move to a2-a3 instead.
-        cur_core_reg_++;
-      }
-      result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
-                               arg.IsRef() ? kRef : kNotWide);
-      if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-        result = RegStorage::MakeRegPair(
-            result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide));
-      }
-    }
-  }
-  return result;
-}
-
-RegStorage MipsMir2Lir::InToRegStorageMips64Mapper::GetNextReg(ShortyArg arg) {
-  const SpecialTargetRegister coreArgMappingToPhysicalReg[] =
-      {kArg1, kArg2, kArg3, kArg4, kArg5, kArg6, kArg7};
-  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
-  const SpecialTargetRegister fpArgMappingToPhysicalReg[] =
-      {kFArg1, kFArg2, kFArg3, kFArg4, kFArg5, kFArg6, kFArg7};
-  const size_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
-
-  RegStorage result = RegStorage::InvalidReg();
-  if (arg.IsFP()) {
-    if (cur_arg_reg_ < fpArgMappingToPhysicalRegSize) {
-      DCHECK(!arg.IsRef());
-      result = m2l_->TargetReg(fpArgMappingToPhysicalReg[cur_arg_reg_++],
-                               arg.IsWide() ? kWide : kNotWide);
-    }
-  } else {
-    if (cur_arg_reg_ < coreArgMappingToPhysicalRegSize) {
-      DCHECK(!(arg.IsWide() && arg.IsRef()));
-      result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_arg_reg_++],
-                               arg.IsRef() ? kRef : (arg.IsWide() ? kWide : kNotWide));
-    }
-  }
-  return result;
-}
-
-/*
- * Decode the register id.
- */
-ResourceMask MipsMir2Lir::GetRegMaskCommon(const RegStorage& reg) const {
-  if (cu_->target64) {
-    return ResourceMask::Bit((reg.IsFloat() ? kMipsFPReg0 : 0) + reg.GetRegNum());
-  } else {
-    if (reg.IsDouble()) {
-      return ResourceMask::TwoBits((reg.GetRegNum() & ~1) + kMipsFPReg0);
-    } else if (reg.IsSingle()) {
-      return ResourceMask::Bit(reg.GetRegNum() + kMipsFPReg0);
-    } else {
-      return ResourceMask::Bit(reg.GetRegNum());
-    }
-  }
-}
-
-ResourceMask MipsMir2Lir::GetPCUseDefEncoding() const {
-  return cu_->target64 ? ResourceMask::Bit(kMips64RegPC) : ResourceMask::Bit(kMipsRegPC);
-}
-
-void MipsMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags, ResourceMask* use_mask,
-                                           ResourceMask* def_mask) {
-  DCHECK(!lir->flags.use_def_invalid);
-
-  // Mips-specific resource map setup here.
-  if (flags & REG_DEF_SP) {
-    def_mask->SetBit(kMipsRegSP);
-  }
-
-  if (flags & REG_USE_SP) {
-    use_mask->SetBit(kMipsRegSP);
-  }
-
-  if (flags & REG_DEF_LR) {
-    def_mask->SetBit(kMipsRegLR);
-  }
-
-  if (!cu_->target64) {
-    if (flags & REG_DEF_HI) {
-      def_mask->SetBit(kMipsRegHI);
-    }
-
-    if (flags & REG_DEF_LO) {
-      def_mask->SetBit(kMipsRegLO);
-    }
-
-    if (flags & REG_USE_HI) {
-      use_mask->SetBit(kMipsRegHI);
-    }
-
-    if (flags & REG_USE_LO) {
-      use_mask->SetBit(kMipsRegLO);
-    }
-  }
-}
-
-/* For dumping instructions */
-#define MIPS_REG_COUNT 32
-static const char *mips_reg_name[MIPS_REG_COUNT] = {
-  "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3",
-  "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
-  "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
-  "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra"
-};
-
-static const char *mips64_reg_name[MIPS_REG_COUNT] = {
-  "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3",
-  "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3",
-  "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
-  "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra"
-};
-
-/*
- * Interpret a format string and build a string no longer than size
- * See format key in assemble_mips.cc.
- */
-std::string MipsMir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) {
-  std::string buf;
-  int i;
-  const char *fmt_end = &fmt[strlen(fmt)];
-  char tbuf[256];
-  char nc;
-  while (fmt < fmt_end) {
-    int operand;
-    if (*fmt == '!') {
-      fmt++;
-      DCHECK_LT(fmt, fmt_end);
-      nc = *fmt++;
-      if (nc == '!') {
-        strcpy(tbuf, "!");
-      } else {
-        DCHECK_LT(fmt, fmt_end);
-        DCHECK_LT(static_cast<unsigned>(nc-'0'), 4u);
-        operand = lir->operands[nc-'0'];
-        switch (*fmt++) {
-          case 'b':
-            strcpy(tbuf, "0000");
-            for (i = 3; i >= 0; i--) {
-              tbuf[i] += operand & 1;
-              operand >>= 1;
-            }
-            break;
-          case 's':
-            snprintf(tbuf, arraysize(tbuf), "$f%d", RegStorage::RegNum(operand));
-            break;
-          case 'S':
-            DCHECK_EQ(RegStorage::RegNum(operand) & 1, 0);
-            snprintf(tbuf, arraysize(tbuf), "$f%d", RegStorage::RegNum(operand));
-            break;
-          case 'h':
-            snprintf(tbuf, arraysize(tbuf), "%04x", operand);
-            break;
-          case 'M':
-          case 'd':
-            snprintf(tbuf, arraysize(tbuf), "%d", operand);
-            break;
-          case 'D':
-            snprintf(tbuf, arraysize(tbuf), "%d", operand+1);
-            break;
-          case 'E':
-            snprintf(tbuf, arraysize(tbuf), "%d", operand*4);
-            break;
-          case 'F':
-            snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
-            break;
-          case 't':
-            snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
-                     reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1),
-                     lir->target);
-            break;
-          case 'T':
-            snprintf(tbuf, arraysize(tbuf), "0x%08x", operand << 2);
-            break;
-          case 'u': {
-            int offset_1 = lir->operands[0];
-            int offset_2 = NEXT_LIR(lir)->operands[0];
-            uintptr_t target =
-                (((reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4) & ~3) +
-                    (offset_1 << 21 >> 9) + (offset_2 << 1)) & 0xfffffffc;
-            snprintf(tbuf, arraysize(tbuf), "%p", reinterpret_cast<void*>(target));
-            break;
-          }
-
-          /* Nothing to print for BLX_2 */
-          case 'v':
-            strcpy(tbuf, "see above");
-            break;
-          case 'r':
-            DCHECK(operand >= 0 && operand < MIPS_REG_COUNT);
-            if (cu_->target64) {
-              strcpy(tbuf, mips64_reg_name[operand]);
-            } else {
-              strcpy(tbuf, mips_reg_name[operand]);
-            }
-            break;
-          case 'N':
-            // Placeholder for delay slot handling
-            strcpy(tbuf, ";  nop");
-            break;
-          default:
-            strcpy(tbuf, "DecodeError");
-            break;
-        }
-        buf += tbuf;
-      }
-    } else {
-      buf += *fmt++;
-    }
-  }
-  return buf;
-}
-
-// FIXME: need to redo resource maps for MIPS - fix this at that time.
-void MipsMir2Lir::DumpResourceMask(LIR *mips_lir, const ResourceMask& mask, const char *prefix) {
-  char buf[256];
-  buf[0] = 0;
-
-  if (mask.Equals(kEncodeAll)) {
-    strcpy(buf, "all");
-  } else {
-    char num[8];
-    int i;
-
-    for (i = 0; i < (cu_->target64 ? kMips64RegEnd : kMipsRegEnd); i++) {
-      if (mask.HasBit(i)) {
-        snprintf(num, arraysize(num), "%d ", i);
-        strcat(buf, num);
-      }
-    }
-
-    if (mask.HasBit(ResourceMask::kCCode)) {
-      strcat(buf, "cc ");
-    }
-    if (mask.HasBit(ResourceMask::kFPStatus)) {
-      strcat(buf, "fpcc ");
-    }
-    // Memory bits.
-    if (mips_lir && (mask.HasBit(ResourceMask::kDalvikReg))) {
-      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
-               DECODE_ALIAS_INFO_REG(mips_lir->flags.alias_info),
-               DECODE_ALIAS_INFO_WIDE(mips_lir->flags.alias_info) ? "(+1)" : "");
-    }
-    if (mask.HasBit(ResourceMask::kLiteral)) {
-      strcat(buf, "lit ");
-    }
-
-    if (mask.HasBit(ResourceMask::kHeapRef)) {
-      strcat(buf, "heap ");
-    }
-    if (mask.HasBit(ResourceMask::kMustNotAlias)) {
-      strcat(buf, "noalias ");
-    }
-  }
-  if (buf[0]) {
-    LOG(INFO) << prefix << ": " <<  buf;
-  }
-}
-
-/*
- * TUNING: is true leaf?  Can't just use METHOD_IS_LEAF to determine as some
- * instructions might call out to C/assembly helper functions.  Until
- * machinery is in place, always spill lr.
- */
-
-void MipsMir2Lir::AdjustSpillMask() {
-  core_spill_mask_ |= (1 << rs_rRA.GetRegNum());
-  num_core_spills_++;
-}
-
-/* Clobber all regs that might be used by an external C call */
-void MipsMir2Lir::ClobberCallerSave() {
-  if (cu_->target64) {
-    Clobber(rs_rZEROd);
-    Clobber(rs_rATd);
-    Clobber(rs_rV0d);
-    Clobber(rs_rV1d);
-    Clobber(rs_rA0d);
-    Clobber(rs_rA1d);
-    Clobber(rs_rA2d);
-    Clobber(rs_rA3d);
-    Clobber(rs_rA4d);
-    Clobber(rs_rA5d);
-    Clobber(rs_rA6d);
-    Clobber(rs_rA7d);
-    Clobber(rs_rT0d);
-    Clobber(rs_rT1d);
-    Clobber(rs_rT2d);
-    Clobber(rs_rT3d);
-    Clobber(rs_rT8d);
-    Clobber(rs_rT9d);
-    Clobber(rs_rK0d);
-    Clobber(rs_rK1d);
-    Clobber(rs_rGPd);
-    Clobber(rs_rFPd);
-    Clobber(rs_rRAd);
-
-    Clobber(rs_rF0);
-    Clobber(rs_rF1);
-    Clobber(rs_rF2);
-    Clobber(rs_rF3);
-    Clobber(rs_rF4);
-    Clobber(rs_rF5);
-    Clobber(rs_rF6);
-    Clobber(rs_rF7);
-    Clobber(rs_rF8);
-    Clobber(rs_rF9);
-    Clobber(rs_rF10);
-    Clobber(rs_rF11);
-    Clobber(rs_rF12);
-    Clobber(rs_rF13);
-    Clobber(rs_rF14);
-    Clobber(rs_rF15);
-    Clobber(rs_rD0);
-    Clobber(rs_rD1);
-    Clobber(rs_rD2);
-    Clobber(rs_rD3);
-    Clobber(rs_rD4);
-    Clobber(rs_rD5);
-    Clobber(rs_rD6);
-    Clobber(rs_rD7);
-  } else {
-    Clobber(rs_rZERO);
-    Clobber(rs_rAT);
-    Clobber(rs_rV0);
-    Clobber(rs_rV1);
-    Clobber(rs_rA0);
-    Clobber(rs_rA1);
-    Clobber(rs_rA2);
-    Clobber(rs_rA3);
-    Clobber(rs_rT0_32);
-    Clobber(rs_rT1_32);
-    Clobber(rs_rT2_32);
-    Clobber(rs_rT3_32);
-    Clobber(rs_rT4_32);
-    Clobber(rs_rT5_32);
-    Clobber(rs_rT6_32);
-    Clobber(rs_rT7_32);
-    Clobber(rs_rT8);
-    Clobber(rs_rT9);
-    Clobber(rs_rK0);
-    Clobber(rs_rK1);
-    Clobber(rs_rGP);
-    Clobber(rs_rFP);
-    Clobber(rs_rRA);
-    Clobber(rs_rF0);
-    Clobber(rs_rF2);
-    Clobber(rs_rF4);
-    Clobber(rs_rF6);
-    Clobber(rs_rF8);
-    Clobber(rs_rF10);
-    Clobber(rs_rF12);
-    Clobber(rs_rF14);
-    if (fpuIs32Bit_) {
-      Clobber(rs_rF1);
-      Clobber(rs_rF3);
-      Clobber(rs_rF5);
-      Clobber(rs_rF7);
-      Clobber(rs_rF9);
-      Clobber(rs_rF11);
-      Clobber(rs_rF13);
-      Clobber(rs_rF15);
-      Clobber(rs_rD0_fr0);
-      Clobber(rs_rD1_fr0);
-      Clobber(rs_rD2_fr0);
-      Clobber(rs_rD3_fr0);
-      Clobber(rs_rD4_fr0);
-      Clobber(rs_rD5_fr0);
-      Clobber(rs_rD6_fr0);
-      Clobber(rs_rD7_fr0);
-    } else {
-      Clobber(rs_rD0_fr1);
-      Clobber(rs_rD1_fr1);
-      Clobber(rs_rD2_fr1);
-      Clobber(rs_rD3_fr1);
-      Clobber(rs_rD4_fr1);
-      Clobber(rs_rD5_fr1);
-      Clobber(rs_rD6_fr1);
-      Clobber(rs_rD7_fr1);
-    }
-  }
-}
-
-RegLocation MipsMir2Lir::GetReturnWideAlt() {
-  UNIMPLEMENTED(FATAL) << "No GetReturnWideAlt for MIPS";
-  RegLocation res = LocCReturnWide();
-  return res;
-}
-
-RegLocation MipsMir2Lir::GetReturnAlt() {
-  UNIMPLEMENTED(FATAL) << "No GetReturnAlt for MIPS";
-  RegLocation res = LocCReturn();
-  return res;
-}
-
-/* To be used when explicitly managing register use */
-void MipsMir2Lir::LockCallTemps() {
-  LockTemp(TargetReg(kArg0));
-  LockTemp(TargetReg(kArg1));
-  LockTemp(TargetReg(kArg2));
-  LockTemp(TargetReg(kArg3));
-  if (cu_->target64) {
-    LockTemp(TargetReg(kArg4));
-    LockTemp(TargetReg(kArg5));
-    LockTemp(TargetReg(kArg6));
-    LockTemp(TargetReg(kArg7));
-  } else {
-    if (fpuIs32Bit_) {
-      LockTemp(TargetReg(kFArg0));
-      LockTemp(TargetReg(kFArg1));
-      LockTemp(TargetReg(kFArg2));
-      LockTemp(TargetReg(kFArg3));
-      LockTemp(rs_rD6_fr0);
-      LockTemp(rs_rD7_fr0);
-    } else {
-      LockTemp(TargetReg(kFArg0));
-      LockTemp(TargetReg(kFArg2));
-      LockTemp(rs_rD6_fr1);
-      LockTemp(rs_rD7_fr1);
-    }
-  }
-}
-
-/* To be used when explicitly managing register use */
-void MipsMir2Lir::FreeCallTemps() {
-  FreeTemp(TargetReg(kArg0));
-  FreeTemp(TargetReg(kArg1));
-  FreeTemp(TargetReg(kArg2));
-  FreeTemp(TargetReg(kArg3));
-  if (cu_->target64) {
-    FreeTemp(TargetReg(kArg4));
-    FreeTemp(TargetReg(kArg5));
-    FreeTemp(TargetReg(kArg6));
-    FreeTemp(TargetReg(kArg7));
-  } else {
-    if (fpuIs32Bit_) {
-      FreeTemp(TargetReg(kFArg0));
-      FreeTemp(TargetReg(kFArg1));
-      FreeTemp(TargetReg(kFArg2));
-      FreeTemp(TargetReg(kFArg3));
-      FreeTemp(rs_rD6_fr0);
-      FreeTemp(rs_rD7_fr0);
-    } else {
-      FreeTemp(TargetReg(kFArg0));
-      FreeTemp(TargetReg(kFArg2));
-      FreeTemp(rs_rD6_fr1);
-      FreeTemp(rs_rD7_fr1);
-    }
-  }
-  FreeTemp(TargetReg(kHiddenArg));
-}
-
-bool MipsMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind ATTRIBUTE_UNUSED) {
-  if (cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
-    NewLIR1(kMipsSync, 0 /* Only stype currently supported */);
-    return true;
-  } else {
-    return false;
-  }
-}
-
-void MipsMir2Lir::CompilerInitializeRegAlloc() {
-  if (cu_->target64) {
-    reg_pool_.reset(new (arena_) RegisterPool(this, arena_, core_regs_64, core_regs_64d, sp_regs_64,
-                                              dp_regs_64, reserved_regs_64, reserved_regs_64d,
-                                              core_temps_64, core_temps_64d, sp_temps_64,
-                                              dp_temps_64));
-
-    // Alias single precision floats to appropriate half of overlapping double.
-    for (RegisterInfo* info : reg_pool_->sp_regs_) {
-      int sp_reg_num = info->GetReg().GetRegNum();
-      int dp_reg_num = sp_reg_num;
-      RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num);
-      RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
-      // Double precision register's master storage should refer to itself.
-      DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
-      // Redirect single precision's master storage to master.
-      info->SetMaster(dp_reg_info);
-      // Singles should show a single 32-bit mask bit, at first referring to the low half.
-      DCHECK_EQ(info->StorageMask(), 0x1U);
-    }
-
-    // Alias 32bit W registers to corresponding 64bit X registers.
-    for (RegisterInfo* info : reg_pool_->core_regs_) {
-      int d_reg_num = info->GetReg().GetRegNum();
-      RegStorage d_reg = RegStorage::Solo64(d_reg_num);
-      RegisterInfo* d_reg_info = GetRegInfo(d_reg);
-      // 64bit D register's master storage should refer to itself.
-      DCHECK_EQ(d_reg_info, d_reg_info->Master());
-      // Redirect 32bit master storage to 64bit D.
-      info->SetMaster(d_reg_info);
-      // 32bit should show a single 32-bit mask bit, at first referring to the low half.
-      DCHECK_EQ(info->StorageMask(), 0x1U);
-    }
-  } else {
-    reg_pool_.reset(new (arena_) RegisterPool(this, arena_, core_regs_32, empty_pool,  // core64
-                                              sp_regs_32,
-                                              fpuIs32Bit_ ? dp_fr0_regs_32 : dp_fr1_regs_32,
-                                              reserved_regs_32, empty_pool,  // reserved64
-                                              core_temps_32, empty_pool,  // core64_temps
-                                              fpuIs32Bit_ ? sp_fr0_temps_32 : sp_fr1_temps_32,
-                                              fpuIs32Bit_ ? dp_fr0_temps_32 : dp_fr1_temps_32));
-
-    // Alias single precision floats to appropriate half of overlapping double.
-    for (RegisterInfo* info : reg_pool_->sp_regs_) {
-      int sp_reg_num = info->GetReg().GetRegNum();
-      int dp_reg_num = sp_reg_num & ~1;
-      if (fpuIs32Bit_ || (sp_reg_num == dp_reg_num)) {
-        RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num);
-        RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
-        // Double precision register's master storage should refer to itself.
-        DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
-        // Redirect single precision's master storage to master.
-        info->SetMaster(dp_reg_info);
-        // Singles should show a single 32-bit mask bit, at first referring to the low half.
-        DCHECK_EQ(info->StorageMask(), 0x1U);
-        if (sp_reg_num & 1) {
-          // For odd singles, change to user the high word of the backing double.
-          info->SetStorageMask(0x2);
-        }
-      }
-    }
-  }
-
-  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
-  // TODO: adjust when we roll to hard float calling convention.
-  reg_pool_->next_core_reg_ = 2;
-  reg_pool_->next_sp_reg_ = 2;
-  if (cu_->target64) {
-    reg_pool_->next_dp_reg_ = 1;
-  } else {
-    reg_pool_->next_dp_reg_ = 2;
-  }
-}
-
-/*
- * In the Arm code a it is typical to use the link register
- * to hold the target address.  However, for Mips we must
- * ensure that all branch instructions can be restarted if
- * there is a trap in the shadow.  Allocate a temp register.
- */
-RegStorage MipsMir2Lir::LoadHelper(QuickEntrypointEnum trampoline) {
-  // NOTE: native pointer.
-  if (cu_->target64) {
-    LoadWordDisp(TargetPtrReg(kSelf), GetThreadOffset<8>(trampoline).Int32Value(),
-                 TargetPtrReg(kInvokeTgt));
-  } else {
-    LoadWordDisp(TargetPtrReg(kSelf), GetThreadOffset<4>(trampoline).Int32Value(),
-                 TargetPtrReg(kInvokeTgt));
-  }
-  return TargetPtrReg(kInvokeTgt);
-}
-
-LIR* MipsMir2Lir::CheckSuspendUsingLoad() {
-  RegStorage tmp = AllocTemp();
-  // NOTE: native pointer.
-  if (cu_->target64) {
-    LoadWordDisp(TargetPtrReg(kSelf), Thread::ThreadSuspendTriggerOffset<8>().Int32Value(), tmp);
-  } else {
-    LoadWordDisp(TargetPtrReg(kSelf), Thread::ThreadSuspendTriggerOffset<4>().Int32Value(), tmp);
-  }
-  LIR *inst = LoadWordDisp(tmp, 0, tmp);
-  FreeTemp(tmp);
-  return inst;
-}
-
-LIR* MipsMir2Lir::GenAtomic64Load(RegStorage r_base, int displacement, RegStorage r_dest) {
-  DCHECK(!r_dest.IsFloat());  // See RegClassForFieldLoadStore().
-  if (!cu_->target64) {
-    DCHECK(r_dest.IsPair());
-  }
-  ClobberCallerSave();
-  LockCallTemps();  // Using fixed registers.
-  RegStorage reg_ptr = TargetReg(kArg0);
-  OpRegRegImm(kOpAdd, reg_ptr, r_base, displacement);
-  RegStorage r_tgt = LoadHelper(kQuickA64Load);
-  ForceImplicitNullCheck(reg_ptr, 0, true);  // is_wide = true
-  LIR *ret = OpReg(kOpBlx, r_tgt);
-  RegStorage reg_ret;
-  if (cu_->target64) {
-    OpRegCopy(r_dest, TargetReg(kRet0));
-  } else {
-    reg_ret = RegStorage::MakeRegPair(TargetReg(kRet0), TargetReg(kRet1));
-    OpRegCopyWide(r_dest, reg_ret);
-  }
-  return ret;
-}
-
-LIR* MipsMir2Lir::GenAtomic64Store(RegStorage r_base, int displacement, RegStorage r_src) {
-  DCHECK(!r_src.IsFloat());  // See RegClassForFieldLoadStore().
-  if (cu_->target64) {
-    DCHECK(!r_src.IsPair());
-  } else {
-    DCHECK(r_src.IsPair());
-  }
-  ClobberCallerSave();
-  LockCallTemps();  // Using fixed registers.
-  RegStorage temp_ptr = AllocTemp();
-  OpRegRegImm(kOpAdd, temp_ptr, r_base, displacement);
-  ForceImplicitNullCheck(temp_ptr, 0, true);  // is_wide = true
-  RegStorage temp_value = AllocTempWide();
-  OpRegCopyWide(temp_value, r_src);
-  if (cu_->target64) {
-    OpRegCopyWide(TargetReg(kArg0, kWide), temp_ptr);
-    OpRegCopyWide(TargetReg(kArg1, kWide), temp_value);
-  } else {
-    RegStorage reg_ptr = TargetReg(kArg0);
-    OpRegCopy(reg_ptr, temp_ptr);
-    RegStorage reg_value = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
-    OpRegCopyWide(reg_value, temp_value);
-  }
-  FreeTemp(temp_ptr);
-  FreeTemp(temp_value);
-  RegStorage r_tgt = LoadHelper(kQuickA64Store);
-  return OpReg(kOpBlx, r_tgt);
-}
-
-static dwarf::Reg DwarfCoreReg(int num) {
-  return dwarf::Reg::MipsCore(num);
-}
-
-void MipsMir2Lir::SpillCoreRegs() {
-  if (num_core_spills_ == 0) {
-    return;
-  }
-  uint32_t mask = core_spill_mask_;
-  int ptr_size = cu_->target64 ? 8 : 4;
-  int offset = num_core_spills_ * ptr_size;
-  const RegStorage rs_sp = TargetPtrReg(kSp);
-  OpRegImm(kOpSub, rs_sp, offset);
-  cfi_.AdjustCFAOffset(offset);
-  for (int reg = 0; mask; mask >>= 1, reg++) {
-    if (mask & 0x1) {
-      offset -= ptr_size;
-      StoreWordDisp(rs_sp, offset,
-                    cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg));
-      cfi_.RelOffset(DwarfCoreReg(reg), offset);
-    }
-  }
-}
-
-void MipsMir2Lir::UnSpillCoreRegs() {
-  if (num_core_spills_ == 0) {
-    return;
-  }
-  uint32_t mask = core_spill_mask_;
-  int offset  = frame_size_;
-  int ptr_size = cu_->target64 ? 8 : 4;
-  const RegStorage rs_sp = TargetPtrReg(kSp);
-  for (int reg = 0; mask; mask >>= 1, reg++) {
-    if (mask & 0x1) {
-      offset -= ptr_size;
-      LoadWordDisp(rs_sp, offset,
-                   cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg));
-      cfi_.Restore(DwarfCoreReg(reg));
-    }
-  }
-  OpRegImm(kOpAdd, rs_sp, frame_size_);
-  cfi_.AdjustCFAOffset(-frame_size_);
-}
-
-bool MipsMir2Lir::IsUnconditionalBranch(LIR* lir) {
-  return (lir->opcode == kMipsB);
-}
-
-RegisterClass MipsMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
-  if (UNLIKELY(is_volatile)) {
-    // On Mips, atomic 64-bit load/store requires a core register.
-    // Smaller aligned load/store is atomic for both core and fp registers.
-    if (size == k64 || size == kDouble) {
-      return kCoreReg;
-    }
-  }
-  // TODO: Verify that both core and fp registers are suitable for smaller sizes.
-  return RegClassBySize(size);
-}
-
-MipsMir2Lir::MipsMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
-    : Mir2Lir(cu, mir_graph, arena), in_to_reg_storage_mips64_mapper_(this),
-    in_to_reg_storage_mips_mapper_(this),
-    isaIsR6_(cu_->target64 ? true : cu->compiler_driver->GetInstructionSetFeatures()
-                ->AsMipsInstructionSetFeatures()->IsR6()),
-    fpuIs32Bit_(cu_->target64 ? false : cu->compiler_driver->GetInstructionSetFeatures()
-                   ->AsMipsInstructionSetFeatures()->Is32BitFloatingPoint()) {
-  for (int i = 0; i < kMipsLast; i++) {
-    DCHECK_EQ(MipsMir2Lir::EncodingMap[i].opcode, i)
-        << "Encoding order for " << MipsMir2Lir::EncodingMap[i].name
-        << " is wrong: expecting " << i << ", seeing "
-        << static_cast<int>(MipsMir2Lir::EncodingMap[i].opcode);
-  }
-}
-
-Mir2Lir* MipsCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
-                           ArenaAllocator* const arena) {
-  return new MipsMir2Lir(cu, mir_graph, arena);
-}
-
-uint64_t MipsMir2Lir::GetTargetInstFlags(int opcode) {
-  DCHECK(!IsPseudoLirOp(opcode));
-  return MipsMir2Lir::EncodingMap[opcode].flags;
-}
-
-const char* MipsMir2Lir::GetTargetInstName(int opcode) {
-  DCHECK(!IsPseudoLirOp(opcode));
-  return MipsMir2Lir::EncodingMap[opcode].name;
-}
-
-const char* MipsMir2Lir::GetTargetInstFmt(int opcode) {
-  DCHECK(!IsPseudoLirOp(opcode));
-  return MipsMir2Lir::EncodingMap[opcode].fmt;
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
deleted file mode 100644
index 372fe2b..0000000
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ /dev/null
@@ -1,1068 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "codegen_mips.h"
-
-#include "arch/mips/instruction_set_features_mips.h"
-#include "arch/mips/entrypoints_direct_mips.h"
-#include "base/logging.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "dex/reg_storage_eq.h"
-#include "dex/mir_graph.h"
-#include "driver/compiler_driver.h"
-#include "driver/compiler_options.h"
-#include "mips_lir.h"
-
-namespace art {
-
-/* This file contains codegen for the Mips ISA */
-LIR* MipsMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
-  int opcode;
-  if (cu_->target64) {
-    DCHECK_EQ(r_dest.Is64Bit(), r_src.Is64Bit());
-    if (r_dest.Is64Bit()) {
-      if (r_dest.IsDouble()) {
-        if (r_src.IsDouble()) {
-          opcode = kMipsFmovd;
-        } else {
-          // Note the operands are swapped for the dmtc1 instr.
-          RegStorage t_opnd = r_src;
-          r_src = r_dest;
-          r_dest = t_opnd;
-          opcode = kMips64Dmtc1;
-        }
-      } else {
-        DCHECK(r_src.IsDouble());
-        opcode = kMips64Dmfc1;
-      }
-    } else {
-      if (r_dest.IsSingle()) {
-        if (r_src.IsSingle()) {
-          opcode = kMipsFmovs;
-        } else {
-          // Note the operands are swapped for the mtc1 instr.
-          RegStorage t_opnd = r_src;
-          r_src = r_dest;
-          r_dest = t_opnd;
-          opcode = kMipsMtc1;
-        }
-      } else {
-        DCHECK(r_src.IsSingle());
-        opcode = kMipsMfc1;
-      }
-    }
-  } else {
-    // Must be both DOUBLE or both not DOUBLE.
-    DCHECK_EQ(r_dest.IsDouble(), r_src.IsDouble());
-    if (r_dest.IsDouble()) {
-      opcode = kMipsFmovd;
-    } else {
-      if (r_dest.IsSingle()) {
-        if (r_src.IsSingle()) {
-          opcode = kMipsFmovs;
-        } else {
-          // Note the operands are swapped for the mtc1 instr.
-          RegStorage t_opnd = r_src;
-          r_src = r_dest;
-          r_dest = t_opnd;
-          opcode = kMipsMtc1;
-        }
-      } else {
-        DCHECK(r_src.IsSingle());
-        opcode = kMipsMfc1;
-      }
-    }
-  }
-  LIR* res;
-  if (cu_->target64) {
-    res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
-  } else {
-    res = RawLIR(current_dalvik_offset_, opcode, r_src.GetReg(), r_dest.GetReg());
-  }
-  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
-    res->flags.is_nop = true;
-  }
-  return res;
-}
-
-bool MipsMir2Lir::InexpensiveConstantInt(int32_t value) {
-  // For encodings, see LoadConstantNoClobber below.
-  return ((value == 0) || IsUint<16>(value) || IsInt<16>(value));
-}
-
-bool MipsMir2Lir::InexpensiveConstantFloat(int32_t value ATTRIBUTE_UNUSED) {
-  return false;  // TUNING
-}
-
-bool MipsMir2Lir::InexpensiveConstantLong(int64_t value ATTRIBUTE_UNUSED) {
-  return false;  // TUNING
-}
-
-bool MipsMir2Lir::InexpensiveConstantDouble(int64_t value ATTRIBUTE_UNUSED) {
-  return false;  // TUNING
-}
-
-/*
- * Load a immediate using a shortcut if possible; otherwise
- * grab from the per-translation literal pool.  If target is
- * a high register, build constant into a low register and copy.
- *
- * No additional register clobbering operation performed. Use this version when
- * 1) r_dest is freshly returned from AllocTemp or
- * 2) The codegen is under fixed register usage
- */
-LIR* MipsMir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
-  LIR *res;
-
-  RegStorage r_dest_save = r_dest;
-  int is_fp_reg = r_dest.IsFloat();
-  if (is_fp_reg) {
-    DCHECK(r_dest.IsSingle());
-    r_dest = AllocTemp();
-  }
-
-  // See if the value can be constructed cheaply.
-  if (value == 0) {
-    res = NewLIR2(kMipsMove, r_dest.GetReg(), rZERO);
-  } else if (IsUint<16>(value)) {
-    // Use OR with (unsigned) immediate to encode 16b unsigned int.
-    res = NewLIR3(kMipsOri, r_dest.GetReg(), rZERO, value);
-  } else if (IsInt<16>(value)) {
-    // Use ADD with (signed) immediate to encode 16b signed int.
-    res = NewLIR3(kMipsAddiu, r_dest.GetReg(), rZERO, value);
-  } else {
-    res = NewLIR2(kMipsLui, r_dest.GetReg(), value >> 16);
-    if (value & 0xffff)
-      NewLIR3(kMipsOri, r_dest.GetReg(), r_dest.GetReg(), value);
-  }
-
-  if (is_fp_reg) {
-    NewLIR2(kMipsMtc1, r_dest.GetReg(), r_dest_save.GetReg());
-    FreeTemp(r_dest);
-  }
-
-  return res;
-}
-
-LIR* MipsMir2Lir::LoadConstantWideNoClobber(RegStorage r_dest, int64_t value) {
-  LIR* res = nullptr;
-  DCHECK(r_dest.Is64Bit());
-  RegStorage r_dest_save = r_dest;
-  int is_fp_reg = r_dest.IsFloat();
-  if (is_fp_reg) {
-    DCHECK(r_dest.IsDouble());
-    r_dest = AllocTemp();
-  }
-
-  int bit31 = (value & UINT64_C(0x80000000)) != 0;
-
-  // Loads with 1 instruction.
-  if (IsUint<16>(value)) {
-    res = NewLIR3(kMipsOri, r_dest.GetReg(), rZEROd, value);
-  } else if (IsInt<16>(value)) {
-    res = NewLIR3(kMips64Daddiu, r_dest.GetReg(), rZEROd, value);
-  } else if ((value & 0xFFFF) == 0 && IsInt<16>(value >> 16)) {
-    res = NewLIR2(kMipsLui, r_dest.GetReg(), value >> 16);
-  } else if (IsInt<32>(value)) {
-    // Loads with 2 instructions.
-    res = NewLIR2(kMipsLui, r_dest.GetReg(), value >> 16);
-    NewLIR3(kMipsOri, r_dest.GetReg(), r_dest.GetReg(), value);
-  } else if ((value & 0xFFFF0000) == 0 && IsInt<16>(value >> 32)) {
-    res = NewLIR3(kMipsOri, r_dest.GetReg(), rZEROd, value);
-    NewLIR2(kMips64Dahi, r_dest.GetReg(), value >> 32);
-  } else if ((value & UINT64_C(0xFFFFFFFF0000)) == 0) {
-    res = NewLIR3(kMipsOri, r_dest.GetReg(), rZEROd, value);
-    NewLIR2(kMips64Dati, r_dest.GetReg(), value >> 48);
-  } else if ((value & 0xFFFF) == 0 && (value >> 32) >= (-32768 - bit31) &&
-             (value >> 32) <= (32767 - bit31)) {
-    res = NewLIR2(kMipsLui, r_dest.GetReg(), value >> 16);
-    NewLIR2(kMips64Dahi, r_dest.GetReg(), (value >> 32) + bit31);
-  } else if ((value & 0xFFFF) == 0 && ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) {
-    res = NewLIR2(kMipsLui, r_dest.GetReg(), value >> 16);
-    NewLIR2(kMips64Dati, r_dest.GetReg(), (value >> 48) + bit31);
-  } else {
-    int64_t tmp = value;
-    int shift_cnt = 0;
-    while ((tmp & 1) == 0) {
-      tmp >>= 1;
-      shift_cnt++;
-    }
-
-    if (IsUint<16>(tmp)) {
-      res = NewLIR3(kMipsOri, r_dest.GetReg(), rZEROd, tmp);
-      NewLIR3((shift_cnt < 32) ? kMips64Dsll : kMips64Dsll32, r_dest.GetReg(), r_dest.GetReg(),
-              shift_cnt & 0x1F);
-    } else if (IsInt<16>(tmp)) {
-      res = NewLIR3(kMips64Daddiu, r_dest.GetReg(), rZEROd, tmp);
-      NewLIR3((shift_cnt < 32) ? kMips64Dsll : kMips64Dsll32, r_dest.GetReg(), r_dest.GetReg(),
-              shift_cnt & 0x1F);
-    } else if (IsInt<32>(tmp)) {
-      // Loads with 3 instructions.
-      res = NewLIR2(kMipsLui, r_dest.GetReg(), tmp >> 16);
-      NewLIR3(kMipsOri, r_dest.GetReg(), r_dest.GetReg(), tmp);
-      NewLIR3((shift_cnt < 32) ? kMips64Dsll : kMips64Dsll32, r_dest.GetReg(), r_dest.GetReg(),
-              shift_cnt & 0x1F);
-    } else {
-      tmp = value >> 16;
-      shift_cnt = 16;
-      while ((tmp & 1) == 0) {
-        tmp >>= 1;
-        shift_cnt++;
-      }
-
-      if (IsUint<16>(tmp)) {
-        res = NewLIR3(kMipsOri, r_dest.GetReg(), rZEROd, tmp);
-        NewLIR3((shift_cnt < 32) ? kMips64Dsll : kMips64Dsll32, r_dest.GetReg(), r_dest.GetReg(),
-                shift_cnt & 0x1F);
-        NewLIR3(kMipsOri, r_dest.GetReg(), r_dest.GetReg(), value);
-      } else if (IsInt<16>(tmp)) {
-        res = NewLIR3(kMips64Daddiu, r_dest.GetReg(), rZEROd, tmp);
-        NewLIR3((shift_cnt < 32) ? kMips64Dsll : kMips64Dsll32, r_dest.GetReg(), r_dest.GetReg(),
-                shift_cnt & 0x1F);
-        NewLIR3(kMipsOri, r_dest.GetReg(), r_dest.GetReg(), value);
-      } else {
-        // Loads with 3-4 instructions.
-        uint64_t tmp2 = value;
-        if (((tmp2 >> 16) & 0xFFFF) != 0 || (tmp2 & 0xFFFFFFFF) == 0) {
-          res = NewLIR2(kMipsLui, r_dest.GetReg(), tmp2 >> 16);
-        }
-        if ((tmp2 & 0xFFFF) != 0) {
-          if (res)
-            NewLIR3(kMipsOri, r_dest.GetReg(), r_dest.GetReg(), tmp2);
-          else
-            res = NewLIR3(kMipsOri, r_dest.GetReg(), rZEROd, tmp2);
-        }
-        if (bit31) {
-          tmp2 += UINT64_C(0x100000000);
-        }
-        if (((tmp2 >> 32) & 0xFFFF) != 0) {
-          NewLIR2(kMips64Dahi, r_dest.GetReg(), tmp2 >> 32);
-        }
-        if (tmp2 & UINT64_C(0x800000000000)) {
-          tmp2 += UINT64_C(0x1000000000000);
-        }
-        if ((tmp2 >> 48) != 0) {
-          NewLIR2(kMips64Dati, r_dest.GetReg(), tmp2 >> 48);
-        }
-      }
-    }
-  }
-
-  if (is_fp_reg) {
-    NewLIR2(kMips64Dmtc1, r_dest.GetReg(), r_dest_save.GetReg());
-    FreeTemp(r_dest);
-  }
-  return res;
-}
-
-LIR* MipsMir2Lir::OpUnconditionalBranch(LIR* target) {
-  LIR* res = NewLIR1(kMipsB, 0 /* offset to be patched during assembly*/);
-  res->target = target;
-  return res;
-}
-
-LIR* MipsMir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
-  MipsOpCode opcode = kMipsNop;
-  switch (op) {
-    case kOpBlx:
-      opcode = kMipsJalr;
-      break;
-    case kOpBx:
-      return NewLIR2(kMipsJalr, rZERO, r_dest_src.GetReg());
-    default:
-      LOG(FATAL) << "Bad case in OpReg";
-      UNREACHABLE();
-  }
-  return NewLIR2(opcode, cu_->target64 ? rRAd : rRA, r_dest_src.GetReg());
-}
-
-LIR* MipsMir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
-  if ((op == kOpAdd) || (op == kOpSub)) {
-    return OpRegRegImm(op, r_dest_src1, r_dest_src1, value);
-  } else {
-    LOG(FATAL) << "Bad case in OpRegImm";
-    UNREACHABLE();
-  }
-}
-
-LIR* MipsMir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
-  MipsOpCode opcode = kMipsNop;
-  bool is64bit = cu_->target64 && (r_dest.Is64Bit() || r_src1.Is64Bit() || r_src2.Is64Bit());
-  switch (op) {
-    case kOpAdd:
-      opcode = is64bit ? kMips64Daddu : kMipsAddu;
-      break;
-    case kOpSub:
-      opcode = is64bit ? kMips64Dsubu : kMipsSubu;
-      break;
-    case kOpAnd:
-      opcode = kMipsAnd;
-      break;
-    case kOpMul:
-      opcode = isaIsR6_ ? kMipsR6Mul : kMipsR2Mul;
-      break;
-    case kOpOr:
-      opcode = kMipsOr;
-      break;
-    case kOpXor:
-      opcode = kMipsXor;
-      break;
-    case kOpLsl:
-      opcode = is64bit ? kMips64Dsllv : kMipsSllv;
-      break;
-    case kOpLsr:
-      opcode = is64bit ? kMips64Dsrlv : kMipsSrlv;
-      break;
-    case kOpAsr:
-      opcode = is64bit ? kMips64Dsrav : kMipsSrav;
-      break;
-    case kOpAdc:
-    case kOpSbc:
-      LOG(FATAL) << "No carry bit on MIPS";
-      break;
-    default:
-      LOG(FATAL) << "Bad case in OpRegRegReg";
-      break;
-  }
-  return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg());
-}
-
-LIR* MipsMir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
-  LIR *res;
-  MipsOpCode opcode = kMipsNop;
-  bool short_form = true;
-  bool is64bit = cu_->target64 && (r_dest.Is64Bit() || r_src1.Is64Bit());
-
-  switch (op) {
-    case kOpAdd:
-      if (IS_SIMM16(value)) {
-        opcode = is64bit ? kMips64Daddiu : kMipsAddiu;
-      } else {
-        short_form = false;
-        opcode = is64bit ? kMips64Daddu : kMipsAddu;
-      }
-      break;
-    case kOpSub:
-      if (IS_SIMM16((-value))) {
-        value = -value;
-        opcode = is64bit ? kMips64Daddiu : kMipsAddiu;
-      } else {
-        short_form = false;
-        opcode = is64bit ? kMips64Dsubu : kMipsSubu;
-      }
-      break;
-    case kOpLsl:
-      if (is64bit) {
-        DCHECK(value >= 0 && value <= 63);
-        if (value >= 0 && value <= 31) {
-          opcode = kMips64Dsll;
-        } else {
-          opcode = kMips64Dsll32;
-          value = value - 32;
-        }
-      } else {
-        DCHECK(value >= 0 && value <= 31);
-        opcode = kMipsSll;
-      }
-      break;
-    case kOpLsr:
-      if (is64bit) {
-        DCHECK(value >= 0 && value <= 63);
-        if (value >= 0 && value <= 31) {
-          opcode = kMips64Dsrl;
-        } else {
-          opcode = kMips64Dsrl32;
-          value = value - 32;
-        }
-      } else {
-        DCHECK(value >= 0 && value <= 31);
-        opcode = kMipsSrl;
-      }
-      break;
-    case kOpAsr:
-      if (is64bit) {
-        DCHECK(value >= 0 && value <= 63);
-        if (value >= 0 && value <= 31) {
-          opcode = kMips64Dsra;
-        } else {
-          opcode = kMips64Dsra32;
-          value = value - 32;
-        }
-      } else {
-        DCHECK(value >= 0 && value <= 31);
-        opcode = kMipsSra;
-      }
-      break;
-    case kOpAnd:
-      if (IS_UIMM16((value))) {
-        opcode = kMipsAndi;
-      } else {
-        short_form = false;
-        opcode = kMipsAnd;
-      }
-      break;
-    case kOpOr:
-      if (IS_UIMM16((value))) {
-        opcode = kMipsOri;
-      } else {
-        short_form = false;
-        opcode = kMipsOr;
-      }
-      break;
-    case kOpXor:
-      if (IS_UIMM16((value))) {
-        opcode = kMipsXori;
-      } else {
-        short_form = false;
-        opcode = kMipsXor;
-      }
-      break;
-    case kOpMul:
-      short_form = false;
-      opcode = isaIsR6_ ? kMipsR6Mul : kMipsR2Mul;
-      break;
-    default:
-      LOG(FATAL) << "Bad case in OpRegRegImm";
-      break;
-  }
-
-  if (short_form) {
-    res = NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), value);
-  } else {
-    if (r_dest != r_src1) {
-      res = LoadConstant(r_dest, value);
-      NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), r_dest.GetReg());
-    } else {
-      RegStorage r_scratch;
-      if (is64bit) {
-        r_scratch = AllocTempWide();
-        res = LoadConstantWide(r_scratch, value);
-      } else {
-        r_scratch = AllocTemp();
-        res = LoadConstant(r_scratch, value);
-      }
-      NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
-    }
-  }
-  return res;
-}
-
-LIR* MipsMir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
-  MipsOpCode opcode = kMipsNop;
-  LIR *res;
-  switch (op) {
-    case kOpMov:
-      opcode = kMipsMove;
-      break;
-    case kOpMvn:
-      return NewLIR3(kMipsNor, r_dest_src1.GetReg(), r_src2.GetReg(), rZERO);
-    case kOpNeg:
-      if (cu_->target64 && r_dest_src1.Is64Bit()) {
-        return NewLIR3(kMips64Dsubu, r_dest_src1.GetReg(), rZEROd, r_src2.GetReg());
-      } else {
-        return NewLIR3(kMipsSubu, r_dest_src1.GetReg(), rZERO, r_src2.GetReg());
-      }
-    case kOpAdd:
-    case kOpAnd:
-    case kOpMul:
-    case kOpOr:
-    case kOpSub:
-    case kOpXor:
-      return OpRegRegReg(op, r_dest_src1, r_dest_src1, r_src2);
-    case kOp2Byte:
-      if (cu_->target64) {
-        res = NewLIR2(kMipsSeb, r_dest_src1.GetReg(), r_src2.GetReg());
-      } else {
-        if (cu_->compiler_driver->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
-            ->IsMipsIsaRevGreaterThanEqual2()) {
-          res = NewLIR2(kMipsSeb, r_dest_src1.GetReg(), r_src2.GetReg());
-        } else {
-          res = OpRegRegImm(kOpLsl, r_dest_src1, r_src2, 24);
-          OpRegRegImm(kOpAsr, r_dest_src1, r_dest_src1, 24);
-        }
-      }
-      return res;
-    case kOp2Short:
-      if (cu_->target64) {
-        res = NewLIR2(kMipsSeh, r_dest_src1.GetReg(), r_src2.GetReg());
-      } else {
-        if (cu_->compiler_driver->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
-            ->IsMipsIsaRevGreaterThanEqual2()) {
-          res = NewLIR2(kMipsSeh, r_dest_src1.GetReg(), r_src2.GetReg());
-        } else {
-          res = OpRegRegImm(kOpLsl, r_dest_src1, r_src2, 16);
-          OpRegRegImm(kOpAsr, r_dest_src1, r_dest_src1, 16);
-        }
-      }
-      return res;
-    case kOp2Char:
-      return NewLIR3(kMipsAndi, r_dest_src1.GetReg(), r_src2.GetReg(), 0xFFFF);
-    default:
-      LOG(FATAL) << "Bad case in OpRegReg";
-      UNREACHABLE();
-  }
-  return NewLIR2(opcode, r_dest_src1.GetReg(), r_src2.GetReg());
-}
-
-LIR* MipsMir2Lir::OpMovRegMem(RegStorage r_dest ATTRIBUTE_UNUSED,
-                              RegStorage r_base ATTRIBUTE_UNUSED,
-                              int offset ATTRIBUTE_UNUSED,
-                              MoveType move_type ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-  UNREACHABLE();
-}
-
-LIR* MipsMir2Lir::OpMovMemReg(RegStorage r_base ATTRIBUTE_UNUSED,
-                              int offset ATTRIBUTE_UNUSED,
-                              RegStorage r_src ATTRIBUTE_UNUSED,
-                              MoveType move_type ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-  UNREACHABLE();
-}
-
-LIR* MipsMir2Lir::OpCondRegReg(OpKind op ATTRIBUTE_UNUSED,
-                               ConditionCode cc ATTRIBUTE_UNUSED,
-                               RegStorage r_dest ATTRIBUTE_UNUSED,
-                               RegStorage r_src ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpCondRegReg for MIPS";
-  UNREACHABLE();
-}
-
-LIR* MipsMir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
-  LIR *res;
-  if (cu_->target64) {
-    res = LoadConstantWideNoClobber(r_dest, value);
-    return res;
-  }
-  if (fpuIs32Bit_ || !r_dest.IsFloat()) {
-    // 32bit FPU (pairs) or loading into GPR.
-    if (!r_dest.IsPair()) {
-      // Form 64-bit pair.
-      r_dest = Solo64ToPair64(r_dest);
-    }
-    res = LoadConstantNoClobber(r_dest.GetLow(), Low32Bits(value));
-    LoadConstantNoClobber(r_dest.GetHigh(), High32Bits(value));
-  } else {
-    // Here if we have a 64bit FPU and loading into FPR.
-    RegStorage r_temp = AllocTemp();
-    r_dest = Fp64ToSolo32(r_dest);
-    res = LoadConstantNoClobber(r_dest, Low32Bits(value));
-    LoadConstantNoClobber(r_temp, High32Bits(value));
-    NewLIR2(kMipsMthc1, r_temp.GetReg(), r_dest.GetReg());
-    FreeTemp(r_temp);
-  }
-  return res;
-}
-
-/* Load value from base + scaled index. */
-LIR* MipsMir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
-                                  int scale, OpSize size) {
-  LIR *first = nullptr;
-  LIR *res;
-  MipsOpCode opcode = kMipsNop;
-  bool is64bit = cu_->target64 && r_dest.Is64Bit();
-  RegStorage t_reg = is64bit ? AllocTempWide() : AllocTemp();
-
-  if (r_dest.IsFloat()) {
-    DCHECK(r_dest.IsSingle());
-    DCHECK((size == k32) || (size == kSingle) || (size == kReference));
-    size = kSingle;
-  } else {
-    if (size == kSingle)
-      size = k32;
-  }
-
-  if (cu_->target64) {
-    if (!scale) {
-      if (is64bit) {
-        first = NewLIR3(kMips64Daddu, t_reg.GetReg() , r_base.GetReg(), r_index.GetReg());
-      } else {
-        first = NewLIR3(kMipsAddu, t_reg.GetReg() , r_base.GetReg(), r_index.GetReg());
-      }
-    } else {
-      first = OpRegRegImm(kOpLsl, t_reg, r_index, scale);
-      NewLIR3(kMips64Daddu, t_reg.GetReg() , r_base.GetReg(), t_reg.GetReg());
-    }
-  } else {
-    if (!scale) {
-      first = NewLIR3(kMipsAddu, t_reg.GetReg() , r_base.GetReg(), r_index.GetReg());
-    } else {
-      first = OpRegRegImm(kOpLsl, t_reg, r_index, scale);
-      NewLIR3(kMipsAddu, t_reg.GetReg() , r_base.GetReg(), t_reg.GetReg());
-    }
-  }
-
-  switch (size) {
-    case k64:
-      if (cu_->target64) {
-        opcode = kMips64Ld;
-      } else {
-        LOG(FATAL) << "Bad case in LoadBaseIndexed";
-      }
-      break;
-    case kSingle:
-      opcode = kMipsFlwc1;
-      break;
-    case k32:
-    case kReference:
-      opcode = kMipsLw;
-      break;
-    case kUnsignedHalf:
-      opcode = kMipsLhu;
-      break;
-    case kSignedHalf:
-      opcode = kMipsLh;
-      break;
-    case kUnsignedByte:
-      opcode = kMipsLbu;
-      break;
-    case kSignedByte:
-      opcode = kMipsLb;
-      break;
-    default:
-      LOG(FATAL) << "Bad case in LoadBaseIndexed";
-  }
-
-  res = NewLIR3(opcode, r_dest.GetReg(), 0, t_reg.GetReg());
-  FreeTemp(t_reg);
-  return (first) ? first : res;
-}
-
-// Store value base base + scaled index.
-LIR* MipsMir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
-                                   int scale, OpSize size) {
-  LIR *first = nullptr;
-  MipsOpCode opcode = kMipsNop;
-  RegStorage t_reg = AllocTemp();
-
-  if (r_src.IsFloat()) {
-    DCHECK(r_src.IsSingle());
-    DCHECK((size == k32) || (size == kSingle) || (size == kReference));
-    size = kSingle;
-  } else {
-    if (size == kSingle)
-      size = k32;
-  }
-
-  MipsOpCode add_opcode = cu_->target64 ? kMips64Daddu : kMipsAddu;
-  if (!scale) {
-    first = NewLIR3(add_opcode, t_reg.GetReg() , r_base.GetReg(), r_index.GetReg());
-  } else {
-    first = OpRegRegImm(kOpLsl, t_reg, r_index, scale);
-    NewLIR3(add_opcode, t_reg.GetReg() , r_base.GetReg(), t_reg.GetReg());
-  }
-
-  switch (size) {
-    case kSingle:
-      opcode = kMipsFswc1;
-      break;
-    case k32:
-    case kReference:
-      opcode = kMipsSw;
-      break;
-    case kUnsignedHalf:
-    case kSignedHalf:
-      opcode = kMipsSh;
-      break;
-    case kUnsignedByte:
-    case kSignedByte:
-      opcode = kMipsSb;
-      break;
-    default:
-      LOG(FATAL) << "Bad case in StoreBaseIndexed";
-  }
-  NewLIR3(opcode, r_src.GetReg(), 0, t_reg.GetReg());
-  return first;
-}
-
-// FIXME: don't split r_dest into 2 containers.
-LIR* MipsMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
-                                   OpSize size) {
-/*
- * Load value from base + displacement.  Optionally perform null check
- * on base (which must have an associated s_reg and MIR).  If not
- * performing null check, incoming MIR can be null. IMPORTANT: this
- * code must not allocate any new temps.  If a new register is needed
- * and base and dest are the same, spill some other register to
- * rlp and then restore.
- */
-  LIR *res;
-  LIR *load = nullptr;
-  LIR *load2 = nullptr;
-  MipsOpCode opcode = kMipsNop;
-  bool short_form = IS_SIMM16(displacement);
-  bool is64bit = false;
-
-  switch (size) {
-    case k64:
-    case kDouble:
-      if (cu_->target64) {
-        r_dest = Check64BitReg(r_dest);
-        if (!r_dest.IsFloat()) {
-          opcode = kMips64Ld;
-        } else {
-          opcode = kMipsFldc1;
-        }
-        DCHECK_ALIGNED(displacement, 4);
-        break;
-      }
-      is64bit = true;
-      if (fpuIs32Bit_ && !r_dest.IsPair()) {
-        // Form 64-bit pair.
-        r_dest = Solo64ToPair64(r_dest);
-      }
-      short_form = IS_SIMM16_2WORD(displacement);
-      FALLTHROUGH_INTENDED;
-    case k32:
-    case kSingle:
-    case kReference:
-      opcode = kMipsLw;
-      if (r_dest.IsFloat()) {
-        opcode = kMipsFlwc1;
-        if (!is64bit) {
-          DCHECK(r_dest.IsSingle());
-        } else {
-          DCHECK(r_dest.IsDouble());
-        }
-      }
-      DCHECK_ALIGNED(displacement, 4);
-      break;
-    case kUnsignedHalf:
-      opcode = kMipsLhu;
-      DCHECK_ALIGNED(displacement, 2);
-      break;
-    case kSignedHalf:
-      opcode = kMipsLh;
-      DCHECK_ALIGNED(displacement, 2);
-      break;
-    case kUnsignedByte:
-      opcode = kMipsLbu;
-      break;
-    case kSignedByte:
-      opcode = kMipsLb;
-      break;
-    default:
-      LOG(FATAL) << "Bad case in LoadBaseIndexedBody";
-  }
-
-  if (cu_->target64) {
-    if (short_form) {
-      load = res = NewLIR3(opcode, r_dest.GetReg(), displacement, r_base.GetReg());
-    } else {
-      RegStorage r_tmp = (r_base == r_dest) ? AllocTemp() : r_dest;
-      res = OpRegRegImm(kOpAdd, r_tmp, r_base, displacement);
-      load = NewLIR3(opcode, r_dest.GetReg(), 0, r_tmp.GetReg());
-      if (r_tmp != r_dest)
-        FreeTemp(r_tmp);
-    }
-
-    if (mem_ref_type_ == ResourceMask::kDalvikReg) {
-      DCHECK_EQ(r_base, TargetPtrReg(kSp));
-      AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
-    }
-    return res;
-  }
-
-  if (short_form) {
-    if (!is64bit) {
-      load = res = NewLIR3(opcode, r_dest.GetReg(), displacement, r_base.GetReg());
-    } else {
-      if (fpuIs32Bit_ || !r_dest.IsFloat()) {
-        DCHECK(r_dest.IsPair());
-        load = res = NewLIR3(opcode, r_dest.GetLowReg(), displacement + LOWORD_OFFSET,
-                             r_base.GetReg());
-        load2 = NewLIR3(opcode, r_dest.GetHighReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
-      } else {
-        // Here if 64bit fpu and r_dest is a 64bit fp register.
-        RegStorage r_tmp = AllocTemp();
-        // FIXME: why is r_dest a 64BitPair here???
-        r_dest = Fp64ToSolo32(r_dest);
-        load = res = NewLIR3(kMipsFlwc1, r_dest.GetReg(), displacement + LOWORD_OFFSET,
-                             r_base.GetReg());
-        load2 = NewLIR3(kMipsLw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
-        NewLIR2(kMipsMthc1, r_tmp.GetReg(), r_dest.GetReg());
-        FreeTemp(r_tmp);
-      }
-    }
-  } else {
-    if (!is64bit) {
-      RegStorage r_tmp = (r_base == r_dest || r_dest.IsFloat()) ? AllocTemp() : r_dest;
-      res = OpRegRegImm(kOpAdd, r_tmp, r_base, displacement);
-      load = NewLIR3(opcode, r_dest.GetReg(), 0, r_tmp.GetReg());
-      if (r_tmp != r_dest)
-        FreeTemp(r_tmp);
-    } else {
-      RegStorage r_tmp = AllocTemp();
-      res = OpRegRegImm(kOpAdd, r_tmp, r_base, displacement);
-      if (fpuIs32Bit_ || !r_dest.IsFloat()) {
-        DCHECK(r_dest.IsPair());
-        load = NewLIR3(opcode, r_dest.GetLowReg(), LOWORD_OFFSET, r_tmp.GetReg());
-        load2 = NewLIR3(opcode, r_dest.GetHighReg(), HIWORD_OFFSET, r_tmp.GetReg());
-      } else {
-        // Here if 64bit fpu and r_dest is a 64bit fp register
-        r_dest = Fp64ToSolo32(r_dest);
-        load = res = NewLIR3(kMipsFlwc1, r_dest.GetReg(), LOWORD_OFFSET, r_tmp.GetReg());
-        load2 = NewLIR3(kMipsLw, r_tmp.GetReg(), HIWORD_OFFSET, r_tmp.GetReg());
-        NewLIR2(kMipsMthc1, r_tmp.GetReg(), r_dest.GetReg());
-      }
-      FreeTemp(r_tmp);
-    }
-  }
-
-  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
-    DCHECK_EQ(r_base, TargetPtrReg(kSp));
-    AnnotateDalvikRegAccess(load, (displacement + (is64bit ? LOWORD_OFFSET : 0)) >> 2,
-                            true /* is_load */, is64bit /* is64bit */);
-    if (is64bit) {
-      AnnotateDalvikRegAccess(load2, (displacement + HIWORD_OFFSET) >> 2,
-                              true /* is_load */, is64bit /* is64bit */);
-    }
-  }
-  return res;
-}
-
-void MipsMir2Lir::ForceImplicitNullCheck(RegStorage reg, int opt_flags, bool is_wide) {
-  if (cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-    if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
-      return;
-    }
-    // Force an implicit null check by performing a memory operation (load) from the given
-    // register with offset 0.  This will cause a signal if the register contains 0 (null).
-    LIR* load = Load32Disp(reg, LOWORD_OFFSET, rs_rZERO);
-    MarkSafepointPC(load);
-    if (is_wide) {
-      load = Load32Disp(reg, HIWORD_OFFSET, rs_rZERO);
-      MarkSafepointPC(load);
-    }
-  }
-}
-
-LIR* MipsMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
-                               VolatileKind is_volatile) {
-  if (UNLIKELY(is_volatile == kVolatile && (size == k64 || size == kDouble))
-      && (!cu_->target64 || displacement & 0x7)) {
-    // TODO: use lld/scd instructions for Mips64.
-    // Do atomic 64-bit load.
-    return GenAtomic64Load(r_base, displacement, r_dest);
-  }
-
-  // TODO: base this on target.
-  if (size == kWord) {
-    size = cu_->target64 ? k64 : k32;
-  }
-  LIR* load;
-  load = LoadBaseDispBody(r_base, displacement, r_dest, size);
-
-  if (UNLIKELY(is_volatile == kVolatile)) {
-    GenMemBarrier(kLoadAny);
-  }
-
-  return load;
-}
-
-// FIXME: don't split r_dest into 2 containers.
-LIR* MipsMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
-                                    OpSize size) {
-  LIR *res;
-  LIR *store = nullptr;
-  LIR *store2 = nullptr;
-  MipsOpCode opcode = kMipsNop;
-  bool short_form = IS_SIMM16(displacement);
-  bool is64bit = false;
-
-  switch (size) {
-    case k64:
-    case kDouble:
-      if (cu_->target64) {
-        r_src = Check64BitReg(r_src);
-        if (!r_src.IsFloat()) {
-          opcode = kMips64Sd;
-        } else {
-          opcode = kMipsFsdc1;
-        }
-        DCHECK_ALIGNED(displacement, 4);
-        break;
-      }
-      is64bit = true;
-      if (fpuIs32Bit_ && !r_src.IsPair()) {
-        // Form 64-bit pair.
-        r_src = Solo64ToPair64(r_src);
-      }
-      short_form = IS_SIMM16_2WORD(displacement);
-      FALLTHROUGH_INTENDED;
-    case k32:
-    case kSingle:
-    case kReference:
-      opcode = kMipsSw;
-      if (r_src.IsFloat()) {
-        opcode = kMipsFswc1;
-        if (!is64bit) {
-          DCHECK(r_src.IsSingle());
-        } else {
-          DCHECK(r_src.IsDouble());
-        }
-      }
-      DCHECK_ALIGNED(displacement, 4);
-      break;
-    case kUnsignedHalf:
-    case kSignedHalf:
-      opcode = kMipsSh;
-      DCHECK_ALIGNED(displacement, 2);
-      break;
-    case kUnsignedByte:
-    case kSignedByte:
-      opcode = kMipsSb;
-      break;
-    default:
-      LOG(FATAL) << "Bad case in StoreBaseDispBody";
-  }
-
-  if (cu_->target64) {
-    if (short_form) {
-      store = res = NewLIR3(opcode, r_src.GetReg(), displacement, r_base.GetReg());
-    } else {
-      RegStorage r_scratch = AllocTemp();
-      res = OpRegRegImm(kOpAdd, r_scratch, r_base, displacement);
-      store = NewLIR3(opcode, r_src.GetReg(), 0, r_scratch.GetReg());
-      FreeTemp(r_scratch);
-    }
-
-    if (mem_ref_type_ == ResourceMask::kDalvikReg) {
-      DCHECK_EQ(r_base, TargetPtrReg(kSp));
-      AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
-    }
-    return res;
-  }
-
-  if (short_form) {
-    if (!is64bit) {
-      store = res = NewLIR3(opcode, r_src.GetReg(), displacement, r_base.GetReg());
-    } else {
-      if (fpuIs32Bit_ || !r_src.IsFloat()) {
-        DCHECK(r_src.IsPair());
-        store = res = NewLIR3(opcode, r_src.GetLowReg(), displacement + LOWORD_OFFSET,
-                              r_base.GetReg());
-        store2 = NewLIR3(opcode, r_src.GetHighReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
-      } else {
-        // Here if 64bit fpu and r_src is a 64bit fp register
-        RegStorage r_tmp = AllocTemp();
-        r_src = Fp64ToSolo32(r_src);
-        store = res = NewLIR3(kMipsFswc1, r_src.GetReg(), displacement + LOWORD_OFFSET,
-                              r_base.GetReg());
-        NewLIR2(kMipsMfhc1, r_tmp.GetReg(), r_src.GetReg());
-        store2 = NewLIR3(kMipsSw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
-        FreeTemp(r_tmp);
-      }
-    }
-  } else {
-    RegStorage r_scratch = AllocTemp();
-    res = OpRegRegImm(kOpAdd, r_scratch, r_base, displacement);
-    if (!is64bit) {
-      store =  NewLIR3(opcode, r_src.GetReg(), 0, r_scratch.GetReg());
-    } else {
-      if (fpuIs32Bit_ || !r_src.IsFloat()) {
-        DCHECK(r_src.IsPair());
-        store = NewLIR3(opcode, r_src.GetLowReg(), LOWORD_OFFSET, r_scratch.GetReg());
-        store2 = NewLIR3(opcode, r_src.GetHighReg(), HIWORD_OFFSET, r_scratch.GetReg());
-      } else {
-        // Here if 64bit fpu and r_src is a 64bit fp register
-        RegStorage r_tmp = AllocTemp();
-        r_src = Fp64ToSolo32(r_src);
-        store = NewLIR3(kMipsFswc1, r_src.GetReg(), LOWORD_OFFSET, r_scratch.GetReg());
-        NewLIR2(kMipsMfhc1, r_tmp.GetReg(), r_src.GetReg());
-        store2 = NewLIR3(kMipsSw, r_tmp.GetReg(), HIWORD_OFFSET, r_scratch.GetReg());
-        FreeTemp(r_tmp);
-      }
-    }
-    FreeTemp(r_scratch);
-  }
-
-  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
-    DCHECK_EQ(r_base, TargetPtrReg(kSp));
-    AnnotateDalvikRegAccess(store, (displacement + (is64bit ? LOWORD_OFFSET : 0)) >> 2,
-                            false /* is_load */, is64bit /* is64bit */);
-    if (is64bit) {
-      AnnotateDalvikRegAccess(store2, (displacement + HIWORD_OFFSET) >> 2,
-                              false /* is_load */, is64bit /* is64bit */);
-    }
-  }
-
-  return res;
-}
-
-LIR* MipsMir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size,
-                                VolatileKind is_volatile) {
-  if (is_volatile == kVolatile) {
-    // Ensure that prior accesses become visible to other threads first.
-    GenMemBarrier(kAnyStore);
-  }
-
-  LIR* store;
-  if (UNLIKELY(is_volatile == kVolatile && (size == k64 || size == kDouble) &&
-      (!cu_->target64 || displacement & 0x7))) {
-    // TODO: use lld/scd instructions for Mips64.
-    // Do atomic 64-bit load.
-    store = GenAtomic64Store(r_base, displacement, r_src);
-  } else {
-    // TODO: base this on target.
-    if (size == kWord) {
-      size = cu_->target64 ? k64 : k32;
-    }
-    store = StoreBaseDispBody(r_base, displacement, r_src, size);
-  }
-
-  if (UNLIKELY(is_volatile == kVolatile)) {
-    // Preserve order with respect to any subsequent volatile loads.
-    // We need StoreLoad, but that generally requires the most expensive barrier.
-    GenMemBarrier(kAnyAny);
-  }
-
-  return store;
-}
-
-LIR* MipsMir2Lir::OpMem(OpKind op ATTRIBUTE_UNUSED,
-                        RegStorage r_base ATTRIBUTE_UNUSED,
-                        int disp ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpMem for MIPS";
-  UNREACHABLE();
-}
-
-LIR* MipsMir2Lir::OpCondBranch(ConditionCode cc ATTRIBUTE_UNUSED, LIR* target ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpCondBranch for MIPS";
-  UNREACHABLE();
-}
-
-LIR* MipsMir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) {
-  if (!cu_->target64 && IsDirectEntrypoint(trampoline)) {
-    // Reserve argument space on stack (for $a0-$a3) for
-    // entrypoints that directly reference native implementations.
-    // This is not safe in general, as it violates the frame size
-    // of the Quick method, but it is used here only for calling
-    // native functions, outside of the runtime.
-    OpRegImm(kOpSub, rs_rSP, 16);
-    LIR* retVal = OpReg(op, r_tgt);
-    OpRegImm(kOpAdd, rs_rSP, 16);
-    return retVal;
-  }
-
-  return OpReg(op, r_tgt);
-}
-
-RegStorage MipsMir2Lir::AllocPtrSizeTemp(bool required) {
-  return cu_->target64 ? AllocTempWide(required) : AllocTemp(required);
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
deleted file mode 100644
index f96816c..0000000
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ /dev/null
@@ -1,310 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_MIR_TO_LIR_INL_H_
-#define ART_COMPILER_DEX_QUICK_MIR_TO_LIR_INL_H_
-
-#include "mir_to_lir.h"
-
-#include "base/logging.h"
-#include "dex/compiler_ir.h"
-#include "gc_root.h"
-#include "utils.h"
-
-namespace art {
-
-/* Mark a temp register as dead.  Does not affect allocation state. */
-inline void Mir2Lir::ClobberBody(RegisterInfo* p) {
-  DCHECK(p->IsTemp());
-  if (p->SReg() != INVALID_SREG) {
-    DCHECK(!(p->IsLive() && p->IsDirty()))  << "Live & dirty temp in clobber";
-    p->MarkDead();
-    if (p->IsWide()) {
-      p->SetIsWide(false);
-      if (p->GetReg().NotExactlyEquals(p->Partner())) {
-        // Register pair - deal with the other half.
-        p = GetRegInfo(p->Partner());
-        p->SetIsWide(false);
-        p->MarkDead();
-      }
-    }
-  }
-}
-
-inline LIR* Mir2Lir::RawLIR(DexOffset dalvik_offset, int opcode, int op0,
-                            int op1, int op2, int op3, int op4, LIR* target) {
-  LIR* insn = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR));
-  insn->dalvik_offset = dalvik_offset;
-  insn->opcode = opcode;
-  insn->operands[0] = op0;
-  insn->operands[1] = op1;
-  insn->operands[2] = op2;
-  insn->operands[3] = op3;
-  insn->operands[4] = op4;
-  insn->target = target;
-  SetupResourceMasks(insn);
-  if ((opcode == kPseudoTargetLabel) || (opcode == kPseudoSafepointPC) ||
-      (opcode == kPseudoExportedPC)) {
-    // Always make labels scheduling barriers
-    DCHECK(!insn->flags.use_def_invalid);
-    insn->u.m.use_mask = insn->u.m.def_mask = &kEncodeAll;
-  }
-  return insn;
-}
-
-/*
- * The following are building blocks to construct low-level IRs with 0 - 4
- * operands.
- */
-inline LIR* Mir2Lir::NewLIR0(int opcode) {
-  DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & NO_OPERAND))
-      << GetTargetInstName(opcode) << " " << opcode << " "
-      << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
-      << current_dalvik_offset_;
-  LIR* insn = RawLIR(current_dalvik_offset_, opcode);
-  AppendLIR(insn);
-  return insn;
-}
-
-inline LIR* Mir2Lir::NewLIR1(int opcode, int dest) {
-  DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_UNARY_OP))
-      << GetTargetInstName(opcode) << " " << opcode << " "
-      << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
-      << current_dalvik_offset_;
-  LIR* insn = RawLIR(current_dalvik_offset_, opcode, dest);
-  AppendLIR(insn);
-  return insn;
-}
-
-inline LIR* Mir2Lir::NewLIR2(int opcode, int dest, int src1) {
-  DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_BINARY_OP))
-      << GetTargetInstName(opcode) << " " << opcode << " "
-      << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
-      << current_dalvik_offset_;
-  LIR* insn = RawLIR(current_dalvik_offset_, opcode, dest, src1);
-  AppendLIR(insn);
-  return insn;
-}
-
-inline LIR* Mir2Lir::NewLIR2NoDest(int opcode, int src, int info) {
-  DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_BINARY_OP))
-      << GetTargetInstName(opcode) << " " << opcode << " "
-      << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
-      << current_dalvik_offset_;
-  LIR* insn = RawLIR(current_dalvik_offset_, opcode, src, info);
-  AppendLIR(insn);
-  return insn;
-}
-
-inline LIR* Mir2Lir::NewLIR3(int opcode, int dest, int src1, int src2) {
-  DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_TERTIARY_OP))
-      << GetTargetInstName(opcode) << " " << opcode << " "
-      << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
-      << current_dalvik_offset_;
-  LIR* insn = RawLIR(current_dalvik_offset_, opcode, dest, src1, src2);
-  AppendLIR(insn);
-  return insn;
-}
-
-inline LIR* Mir2Lir::NewLIR4(int opcode, int dest, int src1, int src2, int info) {
-  DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_QUAD_OP))
-      << GetTargetInstName(opcode) << " " << opcode << " "
-      << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
-      << current_dalvik_offset_;
-  LIR* insn = RawLIR(current_dalvik_offset_, opcode, dest, src1, src2, info);
-  AppendLIR(insn);
-  return insn;
-}
-
-inline LIR* Mir2Lir::NewLIR5(int opcode, int dest, int src1, int src2, int info1,
-                             int info2) {
-  DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_QUIN_OP))
-      << GetTargetInstName(opcode) << " " << opcode << " "
-      << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
-      << current_dalvik_offset_;
-  LIR* insn = RawLIR(current_dalvik_offset_, opcode, dest, src1, src2, info1, info2);
-  AppendLIR(insn);
-  return insn;
-}
-
-/*
- * Mark the corresponding bit(s).
- */
-inline void Mir2Lir::SetupRegMask(ResourceMask* mask, int reg) {
-  DCHECK_EQ((reg & ~RegStorage::kRegValMask), 0);
-  DCHECK_LT(static_cast<size_t>(reg), reginfo_map_.size());
-  DCHECK(reginfo_map_[reg] != nullptr) << "No info for 0x" << reg;
-  *mask = mask->Union(reginfo_map_[reg]->DefUseMask());
-}
-
-/*
- * Clear the corresponding bit(s).
- */
-inline void Mir2Lir::ClearRegMask(ResourceMask* mask, int reg) {
-  DCHECK_EQ((reg & ~RegStorage::kRegValMask), 0);
-  DCHECK_LT(static_cast<size_t>(reg), reginfo_map_.size());
-  DCHECK(reginfo_map_[reg] != nullptr) << "No info for 0x" << reg;
-  *mask = mask->ClearBits(reginfo_map_[reg]->DefUseMask());
-}
-
-/*
- * Set up the proper fields in the resource mask
- */
-inline void Mir2Lir::SetupResourceMasks(LIR* lir) {
-  int opcode = lir->opcode;
-
-  if (IsPseudoLirOp(opcode)) {
-    lir->u.m.use_mask = lir->u.m.def_mask = &kEncodeNone;
-    if (opcode != kPseudoBarrier) {
-      lir->flags.fixup = kFixupLabel;
-    }
-    return;
-  }
-
-  uint64_t flags = GetTargetInstFlags(opcode);
-
-  if (flags & NEEDS_FIXUP) {
-    // Note: target-specific setup may specialize the fixup kind.
-    lir->flags.fixup = kFixupLabel;
-  }
-
-  /* Get the starting size of the instruction's template. */
-  lir->flags.size = GetInsnSize(lir);
-  estimated_native_code_size_ += lir->flags.size;
-
-  /* Set up the mask for resources. */
-  ResourceMask use_mask;
-  ResourceMask def_mask;
-
-  if (flags & (IS_LOAD | IS_STORE)) {
-    /* Set memory reference type (defaults to heap, overridden by ScopedMemRefType). */
-    if (flags & IS_LOAD) {
-      use_mask.SetBit(mem_ref_type_);
-    } else {
-      /* Currently only loads can be marked as kMustNotAlias. */
-      DCHECK(mem_ref_type_ != ResourceMask::kMustNotAlias);
-    }
-    if (flags & IS_STORE) {
-      /* Literals cannot be written to. */
-      DCHECK(mem_ref_type_ != ResourceMask::kLiteral);
-      def_mask.SetBit(mem_ref_type_);
-    }
-  }
-
-  /*
-   * Conservatively assume the branch here will call out a function that in
-   * turn will trash everything.
-   */
-  if (flags & IS_BRANCH) {
-    lir->u.m.def_mask = lir->u.m.use_mask = &kEncodeAll;
-    return;
-  }
-
-  if (flags & REG_DEF0) {
-    SetupRegMask(&def_mask, lir->operands[0]);
-  }
-
-  if (flags & REG_DEF1) {
-    SetupRegMask(&def_mask, lir->operands[1]);
-  }
-
-  if (flags & REG_DEF2) {
-    SetupRegMask(&def_mask, lir->operands[2]);
-  }
-
-  if (flags & REG_USE0) {
-    SetupRegMask(&use_mask, lir->operands[0]);
-  }
-
-  if (flags & REG_USE1) {
-    SetupRegMask(&use_mask, lir->operands[1]);
-  }
-
-  if (flags & REG_USE2) {
-    SetupRegMask(&use_mask, lir->operands[2]);
-  }
-
-  if (flags & REG_USE3) {
-    SetupRegMask(&use_mask, lir->operands[3]);
-  }
-
-  if (flags & REG_USE4) {
-    SetupRegMask(&use_mask, lir->operands[4]);
-  }
-
-  if (flags & SETS_CCODES) {
-    def_mask.SetBit(ResourceMask::kCCode);
-  }
-
-  if (flags & USES_CCODES) {
-    use_mask.SetBit(ResourceMask::kCCode);
-  }
-
-  // Handle target-specific actions
-  SetupTargetResourceMasks(lir, flags, &use_mask, &def_mask);
-
-  lir->u.m.use_mask = mask_cache_.GetMask(use_mask);
-  lir->u.m.def_mask = mask_cache_.GetMask(def_mask);
-}
-
-inline art::Mir2Lir::RegisterInfo* Mir2Lir::GetRegInfo(RegStorage reg) {
-  RegisterInfo* res = reg.IsPair() ? reginfo_map_[reg.GetLowReg()] : reginfo_map_[reg.GetReg()];
-  DCHECK(res != nullptr);
-  return res;
-}
-
-inline void Mir2Lir::CheckRegLocation(RegLocation rl) const {
-  if (kFailOnSizeError || kReportSizeError) {
-    CheckRegLocationImpl(rl, kFailOnSizeError, kReportSizeError);
-  }
-}
-
-inline void Mir2Lir::CheckRegStorage(RegStorage rs, WidenessCheck wide, RefCheck ref, FPCheck fp)
-    const {
-  if (kFailOnSizeError || kReportSizeError) {
-    CheckRegStorageImpl(rs, wide, ref, fp, kFailOnSizeError, kReportSizeError);
-  }
-}
-
-inline size_t Mir2Lir::GetCacheOffset(uint32_t index) {
-  return sizeof(GcRoot<mirror::Object>) * index;
-}
-
-inline size_t Mir2Lir::GetCachePointerOffset(uint32_t index, size_t pointer_size) {
-  return pointer_size * index;
-}
-
-inline Mir2Lir::ShortyIterator::ShortyIterator(const char* shorty, bool is_static)
-    : cur_(shorty + 1), pending_this_(!is_static), initialized_(false) {
-  DCHECK(shorty != nullptr);
-  DCHECK_NE(*shorty, 0);
-}
-
-inline bool Mir2Lir::ShortyIterator::Next() {
-  if (!initialized_) {
-    initialized_ = true;
-  } else if (pending_this_) {
-    pending_this_ = false;
-  } else if (*cur_ != 0) {
-    cur_++;
-  }
-
-  return *cur_ != 0 || pending_this_;
-}
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_MIR_TO_LIR_INL_H_
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
deleted file mode 100644
index 8da3863..0000000
--- a/compiler/dex/quick/mir_to_lir.cc
+++ /dev/null
@@ -1,1460 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "mir_to_lir-inl.h"
-
-#include "dex/dataflow_iterator-inl.h"
-#include "dex/quick/dex_file_method_inliner.h"
-#include "driver/compiler_driver.h"
-#include "primitive.h"
-#include "thread-inl.h"
-
-namespace art {
-
-class Mir2Lir::SpecialSuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
- public:
-  SpecialSuspendCheckSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont)
-      : LIRSlowPath(m2l, branch, cont),
-        num_used_args_(0u) {
-  }
-
-  void PreserveArg(int in_position) {
-    // Avoid duplicates.
-    for (size_t i = 0; i != num_used_args_; ++i) {
-      if (used_args_[i] == in_position) {
-        return;
-      }
-    }
-    DCHECK_LT(num_used_args_, kMaxArgsToPreserve);
-    used_args_[num_used_args_] = in_position;
-    ++num_used_args_;
-  }
-
-  void Compile() OVERRIDE {
-    m2l_->ResetRegPool();
-    m2l_->ResetDefTracking();
-    GenerateTargetLabel(kPseudoSuspendTarget);
-
-    m2l_->LockCallTemps();
-
-    // Generate frame.
-    m2l_->GenSpecialEntryForSuspend();
-
-    // Spill all args.
-    for (size_t i = 0, end = m2l_->in_to_reg_storage_mapping_.GetEndMappedIn(); i < end;
-        i += m2l_->in_to_reg_storage_mapping_.GetShorty(i).IsWide() ? 2u : 1u) {
-      m2l_->SpillArg(i);
-    }
-
-    m2l_->FreeCallTemps();
-
-    // Do the actual suspend call to runtime.
-    m2l_->CallRuntimeHelper(kQuickTestSuspend, true);
-
-    m2l_->LockCallTemps();
-
-    // Unspill used regs. (Don't unspill unused args.)
-    for (size_t i = 0; i != num_used_args_; ++i) {
-      m2l_->UnspillArg(used_args_[i]);
-    }
-
-    // Pop the frame.
-    m2l_->GenSpecialExitForSuspend();
-
-    // Branch to the continue label.
-    DCHECK(cont_ != nullptr);
-    m2l_->OpUnconditionalBranch(cont_);
-
-    m2l_->FreeCallTemps();
-  }
-
- private:
-  static constexpr size_t kMaxArgsToPreserve = 2u;
-  size_t num_used_args_;
-  int used_args_[kMaxArgsToPreserve];
-};
-
-RegisterClass Mir2Lir::ShortyToRegClass(char shorty_type) {
-  RegisterClass res;
-  switch (shorty_type) {
-    case 'L':
-      res = kRefReg;
-      break;
-    case 'F':
-      // Expected fallthrough.
-    case 'D':
-      res = kFPReg;
-      break;
-    default:
-      res = kCoreReg;
-  }
-  return res;
-}
-
-void Mir2Lir::LockArg(size_t in_position) {
-  RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
-
-  if (reg_arg.Valid()) {
-    LockTemp(reg_arg);
-  }
-}
-
-RegStorage Mir2Lir::LoadArg(size_t in_position, RegisterClass reg_class, bool wide) {
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
-
-  if (cu_->instruction_set == kX86) {
-    /*
-     * When doing a call for x86, it moves the stack pointer in order to push return.
-     * Thus, we add another 4 bytes to figure out the out of caller (in of callee).
-     */
-    offset += sizeof(uint32_t);
-  }
-
-  if (cu_->instruction_set == kX86_64) {
-    /*
-     * When doing a call for x86, it moves the stack pointer in order to push return.
-     * Thus, we add another 8 bytes to figure out the out of caller (in of callee).
-     */
-    offset += sizeof(uint64_t);
-  }
-
-  RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
-
-  // TODO: REVISIT: This adds a spill of low part while we could just copy it.
-  if (reg_arg.Valid() && wide && (reg_arg.GetWideKind() == kNotWide)) {
-    // For wide register we've got only half of it.
-    // Flush it to memory then.
-    StoreBaseDisp(TargetPtrReg(kSp), offset, reg_arg, k32, kNotVolatile);
-    reg_arg = RegStorage::InvalidReg();
-  }
-
-  if (!reg_arg.Valid()) {
-    reg_arg = wide ?  AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
-    LoadBaseDisp(TargetPtrReg(kSp), offset, reg_arg, wide ? k64 : k32, kNotVolatile);
-  } else {
-    // Check if we need to copy the arg to a different reg_class.
-    if (!RegClassMatches(reg_class, reg_arg)) {
-      if (wide) {
-        RegStorage new_reg = AllocTypedTempWide(false, reg_class);
-        OpRegCopyWide(new_reg, reg_arg);
-        reg_arg = new_reg;
-      } else {
-        RegStorage new_reg = AllocTypedTemp(false, reg_class);
-        OpRegCopy(new_reg, reg_arg);
-        reg_arg = new_reg;
-      }
-    }
-  }
-  return reg_arg;
-}
-
-void Mir2Lir::LoadArgDirect(size_t in_position, RegLocation rl_dest) {
-  DCHECK_EQ(rl_dest.location, kLocPhysReg);
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
-  if (cu_->instruction_set == kX86) {
-    /*
-     * When doing a call for x86, it moves the stack pointer in order to push return.
-     * Thus, we add another 4 bytes to figure out the out of caller (in of callee).
-     */
-    offset += sizeof(uint32_t);
-  }
-
-  if (cu_->instruction_set == kX86_64) {
-    /*
-     * When doing a call for x86, it moves the stack pointer in order to push return.
-     * Thus, we add another 8 bytes to figure out the out of caller (in of callee).
-     */
-    offset += sizeof(uint64_t);
-  }
-
-  RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
-
-  // TODO: REVISIT: This adds a spill of low part while we could just copy it.
-  if (reg_arg.Valid() && rl_dest.wide && (reg_arg.GetWideKind() == kNotWide)) {
-    // For wide register we've got only half of it.
-    // Flush it to memory then.
-    StoreBaseDisp(TargetPtrReg(kSp), offset, reg_arg, k32, kNotVolatile);
-    reg_arg = RegStorage::InvalidReg();
-  }
-
-  if (!reg_arg.Valid()) {
-    OpSize op_size = rl_dest.wide ? k64 : (rl_dest.ref ? kReference : k32);
-    LoadBaseDisp(TargetPtrReg(kSp), offset, rl_dest.reg, op_size, kNotVolatile);
-  } else {
-    if (rl_dest.wide) {
-      OpRegCopyWide(rl_dest.reg, reg_arg);
-    } else {
-      OpRegCopy(rl_dest.reg, reg_arg);
-    }
-  }
-}
-
-void Mir2Lir::SpillArg(size_t in_position) {
-  RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
-
-  if (reg_arg.Valid()) {
-    int offset = frame_size_ + StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
-    ShortyArg arg = in_to_reg_storage_mapping_.GetShorty(in_position);
-    OpSize size = arg.IsRef() ? kReference :
-        (arg.IsWide() && reg_arg.GetWideKind() == kWide) ? k64 : k32;
-    StoreBaseDisp(TargetPtrReg(kSp), offset, reg_arg, size, kNotVolatile);
-  }
-}
-
-void Mir2Lir::UnspillArg(size_t in_position) {
-  RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
-
-  if (reg_arg.Valid()) {
-    int offset = frame_size_ + StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
-    ShortyArg arg = in_to_reg_storage_mapping_.GetShorty(in_position);
-    OpSize size = arg.IsRef() ? kReference :
-        (arg.IsWide() && reg_arg.GetWideKind() == kWide) ? k64 : k32;
-    LoadBaseDisp(TargetPtrReg(kSp), offset, reg_arg, size, kNotVolatile);
-  }
-}
-
-Mir2Lir::SpecialSuspendCheckSlowPath* Mir2Lir::GenSpecialSuspendTest() {
-  LockCallTemps();
-  LIR* branch = OpTestSuspend(nullptr);
-  FreeCallTemps();
-  LIR* cont = NewLIR0(kPseudoTargetLabel);
-  SpecialSuspendCheckSlowPath* slow_path =
-      new (arena_) SpecialSuspendCheckSlowPath(this, branch, cont);
-  AddSlowPath(slow_path);
-  return slow_path;
-}
-
-bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) {
-  // FastInstance() already checked by DexFileMethodInliner.
-  const InlineIGetIPutData& data = special.d.ifield_data;
-  if (data.method_is_static != 0u || data.object_arg != 0u) {
-    // The object is not "this" and has to be null-checked.
-    return false;
-  }
-
-  OpSize size;
-  switch (data.op_variant) {
-    case InlineMethodAnalyser::IGetVariant(Instruction::IGET):
-      size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kSingle : k32;
-      break;
-    case InlineMethodAnalyser::IGetVariant(Instruction::IGET_WIDE):
-      size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kDouble : k64;
-      break;
-    case InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT):
-      size = kReference;
-      break;
-    case InlineMethodAnalyser::IGetVariant(Instruction::IGET_SHORT):
-      size = kSignedHalf;
-      break;
-    case InlineMethodAnalyser::IGetVariant(Instruction::IGET_CHAR):
-      size = kUnsignedHalf;
-      break;
-    case InlineMethodAnalyser::IGetVariant(Instruction::IGET_BYTE):
-      size = kSignedByte;
-      break;
-    case InlineMethodAnalyser::IGetVariant(Instruction::IGET_BOOLEAN):
-      size = kUnsignedByte;
-      break;
-    default:
-      LOG(FATAL) << "Unknown variant: " << data.op_variant;
-      UNREACHABLE();
-  }
-
-  // Point of no return - no aborts after this
-  if (!kLeafOptimization) {
-    auto* slow_path = GenSpecialSuspendTest();
-    slow_path->PreserveArg(data.object_arg);
-  }
-  LockArg(data.object_arg);
-  GenPrintLabel(mir);
-  RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
-  RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
-  RegisterClass ret_reg_class = ShortyToRegClass(cu_->shorty[0]);
-  RegLocation rl_dest = IsWide(size) ? GetReturnWide(ret_reg_class) : GetReturn(ret_reg_class);
-  RegStorage r_result = rl_dest.reg;
-  if (!RegClassMatches(reg_class, r_result)) {
-    r_result = IsWide(size) ? AllocTypedTempWide(rl_dest.fp, reg_class)
-                            : AllocTypedTemp(rl_dest.fp, reg_class);
-  }
-  if (IsRef(size)) {
-    LoadRefDisp(reg_obj, data.field_offset, r_result, data.is_volatile ? kVolatile : kNotVolatile);
-  } else {
-    LoadBaseDisp(reg_obj, data.field_offset, r_result, size, data.is_volatile ? kVolatile :
-        kNotVolatile);
-  }
-  if (r_result.NotExactlyEquals(rl_dest.reg)) {
-    if (IsWide(size)) {
-      OpRegCopyWide(rl_dest.reg, r_result);
-    } else {
-      OpRegCopy(rl_dest.reg, r_result);
-    }
-  }
-  return true;
-}
-
-bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) {
-  // FastInstance() already checked by DexFileMethodInliner.
-  const InlineIGetIPutData& data = special.d.ifield_data;
-  if (data.method_is_static != 0u || data.object_arg != 0u) {
-    // The object is not "this" and has to be null-checked.
-    return false;
-  }
-  if (data.return_arg_plus1 != 0u) {
-    // The setter returns a method argument which we don't support here.
-    return false;
-  }
-
-  OpSize size;
-  switch (data.op_variant) {
-    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT):
-      size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kSingle : k32;
-      break;
-    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_WIDE):
-      size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kDouble : k64;
-      break;
-    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_OBJECT):
-      size = kReference;
-      break;
-    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_SHORT):
-      size = kSignedHalf;
-      break;
-    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_CHAR):
-      size = kUnsignedHalf;
-      break;
-    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_BYTE):
-      size = kSignedByte;
-      break;
-    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_BOOLEAN):
-      size = kUnsignedByte;
-      break;
-    default:
-      LOG(FATAL) << "Unknown variant: " << data.op_variant;
-      UNREACHABLE();
-  }
-
-  // Point of no return - no aborts after this
-  if (!kLeafOptimization) {
-    auto* slow_path = GenSpecialSuspendTest();
-    slow_path->PreserveArg(data.object_arg);
-    slow_path->PreserveArg(data.src_arg);
-  }
-  LockArg(data.object_arg);
-  LockArg(data.src_arg);
-  GenPrintLabel(mir);
-  RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
-  RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
-  RegStorage reg_src = LoadArg(data.src_arg, reg_class, IsWide(size));
-  if (IsRef(size)) {
-    StoreRefDisp(reg_obj, data.field_offset, reg_src, data.is_volatile ? kVolatile : kNotVolatile);
-  } else {
-    StoreBaseDisp(reg_obj, data.field_offset, reg_src, size, data.is_volatile ? kVolatile :
-        kNotVolatile);
-  }
-  if (IsRef(size)) {
-    MarkGCCard(0, reg_src, reg_obj);
-  }
-  return true;
-}
-
-bool Mir2Lir::GenSpecialIdentity(MIR* mir, const InlineMethod& special) {
-  const InlineReturnArgData& data = special.d.return_data;
-  bool wide = (data.is_wide != 0u);
-
-  // Point of no return - no aborts after this
-  if (!kLeafOptimization) {
-    auto* slow_path = GenSpecialSuspendTest();
-    slow_path->PreserveArg(data.arg);
-  }
-  LockArg(data.arg);
-  GenPrintLabel(mir);
-  RegisterClass reg_class = ShortyToRegClass(cu_->shorty[0]);
-  RegLocation rl_dest = wide ? GetReturnWide(reg_class) : GetReturn(reg_class);
-  LoadArgDirect(data.arg, rl_dest);
-  return true;
-}
-
-/*
- * Special-case code generation for simple non-throwing leaf methods.
- */
-bool Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special) {
-  DCHECK(special.flags & kInlineSpecial);
-  current_dalvik_offset_ = mir->offset;
-  DCHECK(current_mir_ == nullptr);  // Safepoints attributed to prologue.
-  MIR* return_mir = nullptr;
-  bool successful = false;
-  EnsureInitializedArgMappingToPhysicalReg();
-
-  switch (special.opcode) {
-    case kInlineOpNop:
-      successful = true;
-      DCHECK_EQ(mir->dalvikInsn.opcode, Instruction::RETURN_VOID);
-      if (!kLeafOptimization) {
-        GenSpecialSuspendTest();
-      }
-      return_mir = mir;
-      break;
-    case kInlineOpNonWideConst: {
-      successful = true;
-      if (!kLeafOptimization) {
-        GenSpecialSuspendTest();
-      }
-      RegLocation rl_dest = GetReturn(ShortyToRegClass(cu_->shorty[0]));
-      GenPrintLabel(mir);
-      LoadConstant(rl_dest.reg, static_cast<int>(special.d.data));
-      return_mir = bb->GetNextUnconditionalMir(mir_graph_, mir);
-      break;
-    }
-    case kInlineOpReturnArg:
-      successful = GenSpecialIdentity(mir, special);
-      return_mir = mir;
-      break;
-    case kInlineOpIGet:
-      successful = GenSpecialIGet(mir, special);
-      return_mir = bb->GetNextUnconditionalMir(mir_graph_, mir);
-      break;
-    case kInlineOpIPut:
-      successful = GenSpecialIPut(mir, special);
-      return_mir = bb->GetNextUnconditionalMir(mir_graph_, mir);
-      break;
-    default:
-      break;
-  }
-
-  if (successful) {
-    if (kIsDebugBuild) {
-      // Clear unreachable catch entries.
-      mir_graph_->catches_.clear();
-    }
-
-    // Handle verbosity for return MIR.
-    if (return_mir != nullptr) {
-      current_dalvik_offset_ = return_mir->offset;
-      // Not handling special identity case because it already generated code as part
-      // of the return. The label should have been added before any code was generated.
-      if (special.opcode != kInlineOpReturnArg) {
-        GenPrintLabel(return_mir);
-      }
-    }
-    GenSpecialExitSequence();
-
-    if (!kLeafOptimization) {
-      HandleSlowPaths();
-    } else {
-      core_spill_mask_ = 0;
-      num_core_spills_ = 0;
-      fp_spill_mask_ = 0;
-      num_fp_spills_ = 0;
-      frame_size_ = 0;
-      core_vmap_table_.clear();
-      fp_vmap_table_.clear();
-    }
-  }
-
-  return successful;
-}
-
-/*
- * Target-independent code generation.  Use only high-level
- * load/store utilities here, or target-dependent genXX() handlers
- * when necessary.
- */
-void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list) {
-  RegLocation rl_src[3];
-  RegLocation rl_dest = mir_graph_->GetBadLoc();
-  RegLocation rl_result = mir_graph_->GetBadLoc();
-  const Instruction::Code opcode = mir->dalvikInsn.opcode;
-  const int opt_flags = mir->optimization_flags;
-  const uint32_t vB = mir->dalvikInsn.vB;
-  const uint32_t vC = mir->dalvikInsn.vC;
-  DCHECK(CheckCorePoolSanity()) << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " @ 0x:"
-                                << std::hex << current_dalvik_offset_;
-
-  // Prep Src and Dest locations.
-  int next_sreg = 0;
-  int next_loc = 0;
-  uint64_t attrs = MIRGraph::GetDataFlowAttributes(opcode);
-  rl_src[0] = rl_src[1] = rl_src[2] = mir_graph_->GetBadLoc();
-  if (attrs & DF_UA) {
-    if (attrs & DF_A_WIDE) {
-      rl_src[next_loc++] = mir_graph_->GetSrcWide(mir, next_sreg);
-      next_sreg+= 2;
-    } else {
-      rl_src[next_loc++] = mir_graph_->GetSrc(mir, next_sreg);
-      next_sreg++;
-    }
-  }
-  if (attrs & DF_UB) {
-    if (attrs & DF_B_WIDE) {
-      rl_src[next_loc++] = mir_graph_->GetSrcWide(mir, next_sreg);
-      next_sreg+= 2;
-    } else {
-      rl_src[next_loc++] = mir_graph_->GetSrc(mir, next_sreg);
-      next_sreg++;
-    }
-  }
-  if (attrs & DF_UC) {
-    if (attrs & DF_C_WIDE) {
-      rl_src[next_loc++] = mir_graph_->GetSrcWide(mir, next_sreg);
-    } else {
-      rl_src[next_loc++] = mir_graph_->GetSrc(mir, next_sreg);
-    }
-  }
-  if (attrs & DF_DA) {
-    if (attrs & DF_A_WIDE) {
-      rl_dest = mir_graph_->GetDestWide(mir);
-    } else {
-      rl_dest = mir_graph_->GetDest(mir);
-    }
-  }
-  switch (opcode) {
-    case Instruction::NOP:
-      break;
-
-    case Instruction::MOVE_EXCEPTION:
-      GenMoveException(rl_dest);
-      break;
-
-    case Instruction::RETURN_VOID_NO_BARRIER:
-    case Instruction::RETURN_VOID:
-      if (((cu_->access_flags & kAccConstructor) != 0) &&
-          cu_->compiler_driver->RequiresConstructorBarrier(Thread::Current(), cu_->dex_file,
-                                                          cu_->class_def_idx)) {
-        GenMemBarrier(kStoreStore);
-      }
-      if (!kLeafOptimization || !mir_graph_->MethodIsLeaf()) {
-        GenSuspendTest(opt_flags);
-      }
-      break;
-
-    case Instruction::RETURN_OBJECT:
-      DCHECK(rl_src[0].ref);
-      FALLTHROUGH_INTENDED;
-    case Instruction::RETURN:
-      if (!kLeafOptimization || !mir_graph_->MethodIsLeaf()) {
-        GenSuspendTest(opt_flags);
-      }
-      StoreValue(GetReturn(ShortyToRegClass(cu_->shorty[0])), rl_src[0]);
-      break;
-
-    case Instruction::RETURN_WIDE:
-      if (!kLeafOptimization || !mir_graph_->MethodIsLeaf()) {
-        GenSuspendTest(opt_flags);
-      }
-      StoreValueWide(GetReturnWide(ShortyToRegClass(cu_->shorty[0])), rl_src[0]);
-      break;
-
-    case Instruction::MOVE_RESULT:
-    case Instruction::MOVE_RESULT_WIDE:
-    case Instruction::MOVE_RESULT_OBJECT:
-      // Already processed with invoke or filled-new-array.
-      break;
-
-    case Instruction::MOVE:
-    case Instruction::MOVE_OBJECT:
-    case Instruction::MOVE_16:
-    case Instruction::MOVE_OBJECT_16:
-    case Instruction::MOVE_FROM16:
-    case Instruction::MOVE_OBJECT_FROM16:
-      StoreValue(rl_dest, rl_src[0]);
-      break;
-
-    case Instruction::MOVE_WIDE:
-    case Instruction::MOVE_WIDE_16:
-    case Instruction::MOVE_WIDE_FROM16:
-      StoreValueWide(rl_dest, rl_src[0]);
-      break;
-
-    case Instruction::CONST:
-    case Instruction::CONST_4:
-    case Instruction::CONST_16:
-      GenConst(rl_dest, vB);
-      break;
-
-    case Instruction::CONST_HIGH16:
-      GenConst(rl_dest, vB << 16);
-      break;
-
-    case Instruction::CONST_WIDE_16:
-    case Instruction::CONST_WIDE_32:
-      GenConstWide(rl_dest, static_cast<int64_t>(static_cast<int32_t>(vB)));
-      break;
-
-    case Instruction::CONST_WIDE:
-      GenConstWide(rl_dest, mir->dalvikInsn.vB_wide);
-      break;
-
-    case Instruction::CONST_WIDE_HIGH16:
-      rl_result = EvalLoc(rl_dest, kAnyReg, true);
-      LoadConstantWide(rl_result.reg, static_cast<int64_t>(vB) << 48);
-      StoreValueWide(rl_dest, rl_result);
-      break;
-
-    case Instruction::MONITOR_ENTER:
-      GenMonitorEnter(opt_flags, rl_src[0]);
-      break;
-
-    case Instruction::MONITOR_EXIT:
-      GenMonitorExit(opt_flags, rl_src[0]);
-      break;
-
-    case Instruction::CHECK_CAST: {
-      GenCheckCast(opt_flags, mir->offset, vB, rl_src[0]);
-      break;
-    }
-    case Instruction::INSTANCE_OF:
-      GenInstanceof(vC, rl_dest, rl_src[0]);
-      break;
-
-    case Instruction::NEW_INSTANCE:
-      GenNewInstance(vB, rl_dest);
-      break;
-
-    case Instruction::THROW:
-      GenThrow(rl_src[0]);
-      break;
-
-    case Instruction::ARRAY_LENGTH: {
-      int len_offset;
-      len_offset = mirror::Array::LengthOffset().Int32Value();
-      rl_src[0] = LoadValue(rl_src[0], kRefReg);
-      GenNullCheck(rl_src[0].reg, opt_flags);
-      rl_result = EvalLoc(rl_dest, kCoreReg, true);
-      Load32Disp(rl_src[0].reg, len_offset, rl_result.reg);
-      MarkPossibleNullPointerException(opt_flags);
-      StoreValue(rl_dest, rl_result);
-      break;
-    }
-    case Instruction::CONST_STRING:
-    case Instruction::CONST_STRING_JUMBO:
-      GenConstString(vB, rl_dest);
-      break;
-
-    case Instruction::CONST_CLASS:
-      GenConstClass(vB, rl_dest);
-      break;
-
-    case Instruction::FILL_ARRAY_DATA:
-      GenFillArrayData(mir, vB, rl_src[0]);
-      break;
-
-    case Instruction::FILLED_NEW_ARRAY:
-      GenFilledNewArray(mir_graph_->NewMemCallInfo(bb, mir, kStatic,
-                        false /* not range */));
-      break;
-
-    case Instruction::FILLED_NEW_ARRAY_RANGE:
-      GenFilledNewArray(mir_graph_->NewMemCallInfo(bb, mir, kStatic,
-                        true /* range */));
-      break;
-
-    case Instruction::NEW_ARRAY:
-      GenNewArray(vC, rl_dest, rl_src[0]);
-      break;
-
-    case Instruction::GOTO:
-    case Instruction::GOTO_16:
-    case Instruction::GOTO_32:
-      if (mir_graph_->IsBackEdge(bb, bb->taken)) {
-        GenSuspendTestAndBranch(opt_flags, &label_list[bb->taken]);
-      } else {
-        OpUnconditionalBranch(&label_list[bb->taken]);
-      }
-      break;
-
-    case Instruction::PACKED_SWITCH:
-      GenPackedSwitch(mir, vB, rl_src[0]);
-      break;
-
-    case Instruction::SPARSE_SWITCH:
-      GenSparseSwitch(mir, vB, rl_src[0]);
-      break;
-
-    case Instruction::CMPL_FLOAT:
-    case Instruction::CMPG_FLOAT:
-    case Instruction::CMPL_DOUBLE:
-    case Instruction::CMPG_DOUBLE:
-      GenCmpFP(opcode, rl_dest, rl_src[0], rl_src[1]);
-      break;
-
-    case Instruction::CMP_LONG:
-      GenCmpLong(rl_dest, rl_src[0], rl_src[1]);
-      break;
-
-    case Instruction::IF_EQ:
-    case Instruction::IF_NE:
-    case Instruction::IF_LT:
-    case Instruction::IF_GE:
-    case Instruction::IF_GT:
-    case Instruction::IF_LE: {
-      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
-        GenSuspendTest(opt_flags);
-      }
-      LIR* taken = &label_list[bb->taken];
-      GenCompareAndBranch(opcode, rl_src[0], rl_src[1], taken);
-      break;
-    }
-    case Instruction::IF_EQZ:
-    case Instruction::IF_NEZ:
-    case Instruction::IF_LTZ:
-    case Instruction::IF_GEZ:
-    case Instruction::IF_GTZ:
-    case Instruction::IF_LEZ: {
-      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
-        GenSuspendTest(opt_flags);
-      }
-      LIR* taken = &label_list[bb->taken];
-      GenCompareZeroAndBranch(opcode, rl_src[0], taken);
-      break;
-    }
-
-    case Instruction::AGET_WIDE:
-      GenArrayGet(opt_flags, rl_dest.fp ? kDouble : k64, rl_src[0], rl_src[1], rl_dest, 3);
-      break;
-    case Instruction::AGET_OBJECT:
-      GenArrayGet(opt_flags, kReference, rl_src[0], rl_src[1], rl_dest, 2);
-      break;
-    case Instruction::AGET:
-      GenArrayGet(opt_flags, rl_dest.fp ? kSingle : k32, rl_src[0], rl_src[1], rl_dest, 2);
-      break;
-    case Instruction::AGET_BOOLEAN:
-      GenArrayGet(opt_flags, kUnsignedByte, rl_src[0], rl_src[1], rl_dest, 0);
-      break;
-    case Instruction::AGET_BYTE:
-      GenArrayGet(opt_flags, kSignedByte, rl_src[0], rl_src[1], rl_dest, 0);
-      break;
-    case Instruction::AGET_CHAR:
-      GenArrayGet(opt_flags, kUnsignedHalf, rl_src[0], rl_src[1], rl_dest, 1);
-      break;
-    case Instruction::AGET_SHORT:
-      GenArrayGet(opt_flags, kSignedHalf, rl_src[0], rl_src[1], rl_dest, 1);
-      break;
-    case Instruction::APUT_WIDE:
-      GenArrayPut(opt_flags, rl_src[0].fp ? kDouble : k64, rl_src[1], rl_src[2], rl_src[0], 3, false);
-      break;
-    case Instruction::APUT:
-      GenArrayPut(opt_flags, rl_src[0].fp ? kSingle : k32, rl_src[1], rl_src[2], rl_src[0], 2, false);
-      break;
-    case Instruction::APUT_OBJECT: {
-      bool is_null = mir_graph_->IsConstantNullRef(rl_src[0]);
-      bool is_safe = is_null;  // Always safe to store null.
-      if (!is_safe) {
-        // Check safety from verifier type information.
-        const DexCompilationUnit* unit = mir_graph_->GetCurrentDexCompilationUnit();
-        is_safe = cu_->compiler_driver->IsSafeCast(unit, mir->offset);
-      }
-      if (is_null || is_safe) {
-        // Store of constant null doesn't require an assignability test and can be generated inline
-        // without fixed register usage or a card mark.
-        GenArrayPut(opt_flags, kReference, rl_src[1], rl_src[2], rl_src[0], 2, !is_null);
-      } else {
-        GenArrayObjPut(opt_flags, rl_src[1], rl_src[2], rl_src[0]);
-      }
-      break;
-    }
-    case Instruction::APUT_SHORT:
-    case Instruction::APUT_CHAR:
-      GenArrayPut(opt_flags, kUnsignedHalf, rl_src[1], rl_src[2], rl_src[0], 1, false);
-      break;
-    case Instruction::APUT_BYTE:
-    case Instruction::APUT_BOOLEAN:
-      GenArrayPut(opt_flags, kUnsignedByte, rl_src[1], rl_src[2], rl_src[0], 0, false);
-      break;
-
-    case Instruction::IGET_OBJECT_QUICK:
-    case Instruction::IGET_OBJECT:
-      GenIGet(mir, opt_flags, kReference, Primitive::kPrimNot, rl_dest, rl_src[0]);
-      break;
-
-    case Instruction::IGET_WIDE_QUICK:
-    case Instruction::IGET_WIDE:
-      // kPrimLong and kPrimDouble share the same entrypoints.
-      if (rl_dest.fp) {
-        GenIGet(mir, opt_flags, kDouble, Primitive::kPrimDouble, rl_dest, rl_src[0]);
-      } else {
-        GenIGet(mir, opt_flags, k64, Primitive::kPrimLong, rl_dest, rl_src[0]);
-      }
-      break;
-
-    case Instruction::IGET_QUICK:
-    case Instruction::IGET:
-      if (rl_dest.fp) {
-        GenIGet(mir, opt_flags, kSingle, Primitive::kPrimFloat, rl_dest, rl_src[0]);
-      } else {
-        GenIGet(mir, opt_flags, k32, Primitive::kPrimInt, rl_dest, rl_src[0]);
-      }
-      break;
-
-    case Instruction::IGET_CHAR_QUICK:
-    case Instruction::IGET_CHAR:
-      GenIGet(mir, opt_flags, kUnsignedHalf, Primitive::kPrimChar, rl_dest, rl_src[0]);
-      break;
-
-    case Instruction::IGET_SHORT_QUICK:
-    case Instruction::IGET_SHORT:
-      GenIGet(mir, opt_flags, kSignedHalf, Primitive::kPrimShort, rl_dest, rl_src[0]);
-      break;
-
-    case Instruction::IGET_BOOLEAN_QUICK:
-    case Instruction::IGET_BOOLEAN:
-      GenIGet(mir, opt_flags, kUnsignedByte, Primitive::kPrimBoolean, rl_dest, rl_src[0]);
-      break;
-
-    case Instruction::IGET_BYTE_QUICK:
-    case Instruction::IGET_BYTE:
-      GenIGet(mir, opt_flags, kSignedByte, Primitive::kPrimByte, rl_dest, rl_src[0]);
-      break;
-
-    case Instruction::IPUT_WIDE_QUICK:
-    case Instruction::IPUT_WIDE:
-      GenIPut(mir, opt_flags, rl_src[0].fp ? kDouble : k64, rl_src[0], rl_src[1]);
-      break;
-
-    case Instruction::IPUT_OBJECT_QUICK:
-    case Instruction::IPUT_OBJECT:
-      GenIPut(mir, opt_flags, kReference, rl_src[0], rl_src[1]);
-      break;
-
-    case Instruction::IPUT_QUICK:
-    case Instruction::IPUT:
-      GenIPut(mir, opt_flags, rl_src[0].fp ? kSingle : k32, rl_src[0], rl_src[1]);
-      break;
-
-    case Instruction::IPUT_BYTE_QUICK:
-    case Instruction::IPUT_BOOLEAN_QUICK:
-    case Instruction::IPUT_BYTE:
-    case Instruction::IPUT_BOOLEAN:
-      GenIPut(mir, opt_flags, kUnsignedByte, rl_src[0], rl_src[1]);
-      break;
-
-    case Instruction::IPUT_CHAR_QUICK:
-    case Instruction::IPUT_CHAR:
-      GenIPut(mir, opt_flags, kUnsignedHalf, rl_src[0], rl_src[1]);
-      break;
-
-    case Instruction::IPUT_SHORT_QUICK:
-    case Instruction::IPUT_SHORT:
-      GenIPut(mir, opt_flags, kSignedHalf, rl_src[0], rl_src[1]);
-      break;
-
-    case Instruction::SGET_OBJECT:
-      GenSget(mir, rl_dest, kReference, Primitive::kPrimNot);
-      break;
-
-    case Instruction::SGET:
-      GenSget(mir, rl_dest, rl_dest.fp ? kSingle : k32, Primitive::kPrimInt);
-      break;
-
-    case Instruction::SGET_CHAR:
-      GenSget(mir, rl_dest, kUnsignedHalf, Primitive::kPrimChar);
-      break;
-
-    case Instruction::SGET_SHORT:
-      GenSget(mir, rl_dest, kSignedHalf, Primitive::kPrimShort);
-      break;
-
-    case Instruction::SGET_BOOLEAN:
-      GenSget(mir, rl_dest, kUnsignedByte, Primitive::kPrimBoolean);
-      break;
-
-    case Instruction::SGET_BYTE:
-      GenSget(mir, rl_dest, kSignedByte, Primitive::kPrimByte);
-      break;
-
-    case Instruction::SGET_WIDE:
-      // kPrimLong and kPrimDouble share the same entrypoints.
-      GenSget(mir, rl_dest, rl_dest.fp ? kDouble : k64, Primitive::kPrimDouble);
-      break;
-
-    case Instruction::SPUT_OBJECT:
-      GenSput(mir, rl_src[0], kReference);
-      break;
-
-    case Instruction::SPUT:
-      GenSput(mir, rl_src[0], rl_src[0].fp ? kSingle : k32);
-      break;
-
-    case Instruction::SPUT_BYTE:
-    case Instruction::SPUT_BOOLEAN:
-      GenSput(mir, rl_src[0], kUnsignedByte);
-      break;
-
-    case Instruction::SPUT_CHAR:
-      GenSput(mir, rl_src[0], kUnsignedHalf);
-      break;
-
-    case Instruction::SPUT_SHORT:
-      GenSput(mir, rl_src[0], kSignedHalf);
-      break;
-
-
-    case Instruction::SPUT_WIDE:
-      GenSput(mir, rl_src[0], rl_src[0].fp ? kDouble : k64);
-      break;
-
-    case Instruction::INVOKE_STATIC_RANGE:
-      GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, true));
-      break;
-    case Instruction::INVOKE_STATIC:
-      GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, false));
-      break;
-
-    case Instruction::INVOKE_DIRECT:
-      GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, false));
-      break;
-    case Instruction::INVOKE_DIRECT_RANGE:
-      GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, true));
-      break;
-
-    case Instruction::INVOKE_VIRTUAL_QUICK:
-    case Instruction::INVOKE_VIRTUAL:
-      GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, false));
-      break;
-
-    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
-    case Instruction::INVOKE_VIRTUAL_RANGE:
-      GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, true));
-      break;
-
-    case Instruction::INVOKE_SUPER:
-      GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, false));
-      break;
-    case Instruction::INVOKE_SUPER_RANGE:
-      GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, true));
-      break;
-
-    case Instruction::INVOKE_INTERFACE:
-      GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, false));
-      break;
-    case Instruction::INVOKE_INTERFACE_RANGE:
-      GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, true));
-      break;
-
-    case Instruction::NEG_INT:
-    case Instruction::NOT_INT:
-      GenArithOpInt(opcode, rl_dest, rl_src[0], rl_src[0], opt_flags);
-      break;
-
-    case Instruction::NEG_LONG:
-    case Instruction::NOT_LONG:
-      GenArithOpLong(opcode, rl_dest, rl_src[0], rl_src[0], opt_flags);
-      break;
-
-    case Instruction::NEG_FLOAT:
-      GenArithOpFloat(opcode, rl_dest, rl_src[0], rl_src[0]);
-      break;
-
-    case Instruction::NEG_DOUBLE:
-      GenArithOpDouble(opcode, rl_dest, rl_src[0], rl_src[0]);
-      break;
-
-    case Instruction::INT_TO_LONG:
-      GenIntToLong(rl_dest, rl_src[0]);
-      break;
-
-    case Instruction::LONG_TO_INT:
-      GenLongToInt(rl_dest, rl_src[0]);
-      break;
-
-    case Instruction::INT_TO_BYTE:
-    case Instruction::INT_TO_SHORT:
-    case Instruction::INT_TO_CHAR:
-      GenIntNarrowing(opcode, rl_dest, rl_src[0]);
-      break;
-
-    case Instruction::INT_TO_FLOAT:
-    case Instruction::INT_TO_DOUBLE:
-    case Instruction::LONG_TO_FLOAT:
-    case Instruction::LONG_TO_DOUBLE:
-    case Instruction::FLOAT_TO_INT:
-    case Instruction::FLOAT_TO_LONG:
-    case Instruction::FLOAT_TO_DOUBLE:
-    case Instruction::DOUBLE_TO_INT:
-    case Instruction::DOUBLE_TO_LONG:
-    case Instruction::DOUBLE_TO_FLOAT:
-      GenConversion(opcode, rl_dest, rl_src[0]);
-      break;
-
-
-    case Instruction::ADD_INT:
-    case Instruction::ADD_INT_2ADDR:
-    case Instruction::MUL_INT:
-    case Instruction::MUL_INT_2ADDR:
-    case Instruction::AND_INT:
-    case Instruction::AND_INT_2ADDR:
-    case Instruction::OR_INT:
-    case Instruction::OR_INT_2ADDR:
-    case Instruction::XOR_INT:
-    case Instruction::XOR_INT_2ADDR:
-      if (rl_src[0].is_const &&
-          InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[0]), opcode)) {
-        GenArithOpIntLit(opcode, rl_dest, rl_src[1],
-                             mir_graph_->ConstantValue(rl_src[0].orig_sreg));
-      } else if (rl_src[1].is_const &&
-                 InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[1]), opcode)) {
-        GenArithOpIntLit(opcode, rl_dest, rl_src[0],
-                             mir_graph_->ConstantValue(rl_src[1].orig_sreg));
-      } else {
-        GenArithOpInt(opcode, rl_dest, rl_src[0], rl_src[1], opt_flags);
-      }
-      break;
-
-    case Instruction::SUB_INT:
-    case Instruction::SUB_INT_2ADDR:
-    case Instruction::DIV_INT:
-    case Instruction::DIV_INT_2ADDR:
-    case Instruction::REM_INT:
-    case Instruction::REM_INT_2ADDR:
-    case Instruction::SHL_INT:
-    case Instruction::SHL_INT_2ADDR:
-    case Instruction::SHR_INT:
-    case Instruction::SHR_INT_2ADDR:
-    case Instruction::USHR_INT:
-    case Instruction::USHR_INT_2ADDR:
-      if (rl_src[1].is_const &&
-          InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[1]), opcode)) {
-        GenArithOpIntLit(opcode, rl_dest, rl_src[0], mir_graph_->ConstantValue(rl_src[1]));
-      } else {
-        GenArithOpInt(opcode, rl_dest, rl_src[0], rl_src[1], opt_flags);
-      }
-      break;
-
-    case Instruction::ADD_LONG:
-    case Instruction::SUB_LONG:
-    case Instruction::AND_LONG:
-    case Instruction::OR_LONG:
-    case Instruction::XOR_LONG:
-    case Instruction::ADD_LONG_2ADDR:
-    case Instruction::SUB_LONG_2ADDR:
-    case Instruction::AND_LONG_2ADDR:
-    case Instruction::OR_LONG_2ADDR:
-    case Instruction::XOR_LONG_2ADDR:
-      if (rl_src[0].is_const || rl_src[1].is_const) {
-        GenArithImmOpLong(opcode, rl_dest, rl_src[0], rl_src[1], opt_flags);
-        break;
-      }
-      FALLTHROUGH_INTENDED;
-    case Instruction::MUL_LONG:
-    case Instruction::DIV_LONG:
-    case Instruction::REM_LONG:
-    case Instruction::MUL_LONG_2ADDR:
-    case Instruction::DIV_LONG_2ADDR:
-    case Instruction::REM_LONG_2ADDR:
-      GenArithOpLong(opcode, rl_dest, rl_src[0], rl_src[1], opt_flags);
-      break;
-
-    case Instruction::SHL_LONG:
-    case Instruction::SHR_LONG:
-    case Instruction::USHR_LONG:
-    case Instruction::SHL_LONG_2ADDR:
-    case Instruction::SHR_LONG_2ADDR:
-    case Instruction::USHR_LONG_2ADDR:
-      if (rl_src[1].is_const) {
-        GenShiftImmOpLong(opcode, rl_dest, rl_src[0], rl_src[1], opt_flags);
-      } else {
-        GenShiftOpLong(opcode, rl_dest, rl_src[0], rl_src[1]);
-      }
-      break;
-
-    case Instruction::DIV_FLOAT:
-    case Instruction::DIV_FLOAT_2ADDR:
-      if (HandleEasyFloatingPointDiv(rl_dest, rl_src[0], rl_src[1])) {
-        break;
-      }
-      FALLTHROUGH_INTENDED;
-    case Instruction::ADD_FLOAT:
-    case Instruction::SUB_FLOAT:
-    case Instruction::MUL_FLOAT:
-    case Instruction::REM_FLOAT:
-    case Instruction::ADD_FLOAT_2ADDR:
-    case Instruction::SUB_FLOAT_2ADDR:
-    case Instruction::MUL_FLOAT_2ADDR:
-    case Instruction::REM_FLOAT_2ADDR:
-      GenArithOpFloat(opcode, rl_dest, rl_src[0], rl_src[1]);
-      break;
-
-    case Instruction::DIV_DOUBLE:
-    case Instruction::DIV_DOUBLE_2ADDR:
-      if (HandleEasyFloatingPointDiv(rl_dest, rl_src[0], rl_src[1])) {
-        break;
-      }
-      FALLTHROUGH_INTENDED;
-    case Instruction::ADD_DOUBLE:
-    case Instruction::SUB_DOUBLE:
-    case Instruction::MUL_DOUBLE:
-    case Instruction::REM_DOUBLE:
-    case Instruction::ADD_DOUBLE_2ADDR:
-    case Instruction::SUB_DOUBLE_2ADDR:
-    case Instruction::MUL_DOUBLE_2ADDR:
-    case Instruction::REM_DOUBLE_2ADDR:
-      GenArithOpDouble(opcode, rl_dest, rl_src[0], rl_src[1]);
-      break;
-
-    case Instruction::RSUB_INT:
-    case Instruction::ADD_INT_LIT16:
-    case Instruction::MUL_INT_LIT16:
-    case Instruction::DIV_INT_LIT16:
-    case Instruction::REM_INT_LIT16:
-    case Instruction::AND_INT_LIT16:
-    case Instruction::OR_INT_LIT16:
-    case Instruction::XOR_INT_LIT16:
-    case Instruction::ADD_INT_LIT8:
-    case Instruction::RSUB_INT_LIT8:
-    case Instruction::MUL_INT_LIT8:
-    case Instruction::DIV_INT_LIT8:
-    case Instruction::REM_INT_LIT8:
-    case Instruction::AND_INT_LIT8:
-    case Instruction::OR_INT_LIT8:
-    case Instruction::XOR_INT_LIT8:
-    case Instruction::SHL_INT_LIT8:
-    case Instruction::SHR_INT_LIT8:
-    case Instruction::USHR_INT_LIT8:
-      GenArithOpIntLit(opcode, rl_dest, rl_src[0], vC);
-      break;
-
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << opcode;
-  }
-  DCHECK(CheckCorePoolSanity());
-}  // NOLINT(readability/fn_size)
-
-// Process extended MIR instructions
-void Mir2Lir::HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
-  switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
-    case kMirOpCopy: {
-      RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
-      RegLocation rl_dest = mir_graph_->GetDest(mir);
-      StoreValue(rl_dest, rl_src);
-      break;
-    }
-    case kMirOpFusedCmplFloat:
-      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
-        GenSuspendTest(mir->optimization_flags);
-      }
-      GenFusedFPCmpBranch(bb, mir, false /*gt bias*/, false /*double*/);
-      break;
-    case kMirOpFusedCmpgFloat:
-      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
-        GenSuspendTest(mir->optimization_flags);
-      }
-      GenFusedFPCmpBranch(bb, mir, true /*gt bias*/, false /*double*/);
-      break;
-    case kMirOpFusedCmplDouble:
-      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
-        GenSuspendTest(mir->optimization_flags);
-      }
-      GenFusedFPCmpBranch(bb, mir, false /*gt bias*/, true /*double*/);
-      break;
-    case kMirOpFusedCmpgDouble:
-      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
-        GenSuspendTest(mir->optimization_flags);
-      }
-      GenFusedFPCmpBranch(bb, mir, true /*gt bias*/, true /*double*/);
-      break;
-    case kMirOpFusedCmpLong:
-      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
-        GenSuspendTest(mir->optimization_flags);
-      }
-      GenFusedLongCmpBranch(bb, mir);
-      break;
-    case kMirOpSelect:
-      GenSelect(bb, mir);
-      break;
-    case kMirOpNullCheck: {
-      RegLocation rl_obj = mir_graph_->GetSrc(mir, 0);
-      rl_obj = LoadValue(rl_obj, kRefReg);
-      // An explicit check is done because it is not expected that when this is used,
-      // that it will actually trip up the implicit checks (since an invalid access
-      // is needed on the null object).
-      GenExplicitNullCheck(rl_obj.reg, mir->optimization_flags);
-      break;
-    }
-    case kMirOpPhi:
-    case kMirOpNop:
-    case kMirOpRangeCheck:
-    case kMirOpDivZeroCheck:
-    case kMirOpCheck:
-      // Ignore these known opcodes
-      break;
-    default:
-      // Give the backends a chance to handle unknown extended MIR opcodes.
-      GenMachineSpecificExtendedMethodMIR(bb, mir);
-      break;
-  }
-}
-
-void Mir2Lir::GenPrintLabel(MIR* mir) {
-  // Mark the beginning of a Dalvik instruction for line tracking.
-  if (cu_->verbose) {
-     char* inst_str = mir_graph_->GetDalvikDisassembly(mir);
-     MarkBoundary(mir->offset, inst_str);
-  }
-}
-
-// Handle the content in each basic block.
-bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) {
-  if (bb->block_type == kDead) return false;
-  current_dalvik_offset_ = bb->start_offset;
-  MIR* mir;
-  int block_id = bb->id;
-
-  block_label_list_[block_id].operands[0] = bb->start_offset;
-
-  // Insert the block label.
-  block_label_list_[block_id].opcode = kPseudoNormalBlockLabel;
-  block_label_list_[block_id].flags.fixup = kFixupLabel;
-  AppendLIR(&block_label_list_[block_id]);
-
-  LIR* head_lir = nullptr;
-
-  // If this is a catch block, export the start address.
-  if (bb->catch_entry) {
-    head_lir = NewLIR0(kPseudoExportedPC);
-  }
-
-  // Free temp registers and reset redundant store tracking.
-  ClobberAllTemps();
-
-  if (bb->block_type == kEntryBlock) {
-    ResetRegPool();
-    int start_vreg = mir_graph_->GetFirstInVR();
-    AppendLIR(NewLIR0(kPseudoPrologueBegin));
-    DCHECK_EQ(cu_->target64, Is64BitInstructionSet(cu_->instruction_set));
-    if (cu_->target64) {
-      DCHECK(mir_graph_->GetMethodLoc().wide);
-    }
-    GenEntrySequence(&mir_graph_->reg_location_[start_vreg], mir_graph_->GetMethodLoc());
-    AppendLIR(NewLIR0(kPseudoPrologueEnd));
-    DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_);
-  } else if (bb->block_type == kExitBlock) {
-    ResetRegPool();
-    DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_);
-    AppendLIR(NewLIR0(kPseudoEpilogueBegin));
-    GenExitSequence();
-    AppendLIR(NewLIR0(kPseudoEpilogueEnd));
-    DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_);
-  }
-
-  for (mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-    ResetRegPool();
-    if (cu_->disable_opt & (1 << kTrackLiveTemps)) {
-      ClobberAllTemps();
-      // Reset temp allocation to minimize differences when A/B testing.
-      reg_pool_->ResetNextTemp();
-    }
-
-    if (cu_->disable_opt & (1 << kSuppressLoads)) {
-      ResetDefTracking();
-    }
-
-    // Reset temp tracking sanity check.
-    if (kIsDebugBuild) {
-      live_sreg_ = INVALID_SREG;
-    }
-
-    current_dalvik_offset_ = mir->offset;
-    current_mir_ = mir;
-    int opcode = mir->dalvikInsn.opcode;
-
-    GenPrintLabel(mir);
-
-    // Remember the first LIR for this block.
-    if (head_lir == nullptr) {
-      head_lir = &block_label_list_[bb->id];
-      // Set the first label as a scheduling barrier.
-      DCHECK(!head_lir->flags.use_def_invalid);
-      head_lir->u.m.def_mask = &kEncodeAll;
-    }
-
-    if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
-      HandleExtendedMethodMIR(bb, mir);
-      continue;
-    }
-
-    CompileDalvikInstruction(mir, bb, block_label_list_);
-  }
-
-  if (head_lir) {
-    // Eliminate redundant loads/stores and delay stores into later slots.
-    ApplyLocalOptimizations(head_lir, last_lir_insn_);
-  }
-  return false;
-}
-
-bool Mir2Lir::SpecialMIR2LIR(const InlineMethod& special) {
-  cu_->NewTimingSplit("SpecialMIR2LIR");
-  // Find the first DalvikByteCode block.
-  DCHECK_EQ(mir_graph_->GetNumReachableBlocks(), mir_graph_->GetDfsOrder().size());
-  BasicBlock*bb = nullptr;
-  for (BasicBlockId dfs_id : mir_graph_->GetDfsOrder()) {
-    BasicBlock* candidate = mir_graph_->GetBasicBlock(dfs_id);
-    if (candidate->block_type == kDalvikByteCode) {
-      bb = candidate;
-      break;
-    }
-  }
-  if (bb == nullptr) {
-    return false;
-  }
-  DCHECK_EQ(bb->start_offset, 0);
-  DCHECK(bb->first_mir_insn != nullptr);
-
-  // Get the first instruction.
-  MIR* mir = bb->first_mir_insn;
-
-  // Free temp registers and reset redundant store tracking.
-  ResetRegPool();
-  ResetDefTracking();
-  ClobberAllTemps();
-
-  return GenSpecialCase(bb, mir, special);
-}
-
-void Mir2Lir::MethodMIR2LIR() {
-  cu_->NewTimingSplit("MIR2LIR");
-
-  // Hold the labels of each block.
-  block_label_list_ = arena_->AllocArray<LIR>(mir_graph_->GetNumBlocks(), kArenaAllocLIR);
-
-  PreOrderDfsIterator iter(mir_graph_);
-  BasicBlock* curr_bb = iter.Next();
-  BasicBlock* next_bb = iter.Next();
-  while (curr_bb != nullptr) {
-    MethodBlockCodeGen(curr_bb);
-    // If the fall_through block is no longer laid out consecutively, drop in a branch.
-    BasicBlock* curr_bb_fall_through = mir_graph_->GetBasicBlock(curr_bb->fall_through);
-    if ((curr_bb_fall_through != nullptr) && (curr_bb_fall_through != next_bb)) {
-      OpUnconditionalBranch(&block_label_list_[curr_bb->fall_through]);
-    }
-    curr_bb = next_bb;
-    do {
-      next_bb = iter.Next();
-    } while ((next_bb != nullptr) && (next_bb->block_type == kDead));
-  }
-  HandleSlowPaths();
-}
-
-//
-// LIR Slow Path
-//
-
-LIR* Mir2Lir::LIRSlowPath::GenerateTargetLabel(int opcode) {
-  m2l_->SetCurrentDexPc(current_dex_pc_);
-  m2l_->current_mir_ = current_mir_;
-  LIR* target = m2l_->NewLIR0(opcode);
-  fromfast_->target = target;
-  return target;
-}
-
-
-void Mir2Lir::CheckRegStorageImpl(RegStorage rs, WidenessCheck wide, RefCheck ref, FPCheck fp,
-                                  bool fail, bool report)
-    const  {
-  if (rs.Valid()) {
-    if (ref == RefCheck::kCheckRef) {
-      if (cu_->target64 && !rs.Is64Bit()) {
-        if (fail) {
-          CHECK(false) << "Reg storage not 64b for ref.";
-        } else if (report) {
-          LOG(WARNING) << "Reg storage not 64b for ref.";
-        }
-      }
-    }
-    if (wide == WidenessCheck::kCheckWide) {
-      if (!rs.Is64Bit()) {
-        if (fail) {
-          CHECK(false) << "Reg storage not 64b for wide.";
-        } else if (report) {
-          LOG(WARNING) << "Reg storage not 64b for wide.";
-        }
-      }
-    }
-    // A tighter check would be nice, but for now soft-float will not check float at all.
-    if (fp == FPCheck::kCheckFP && cu_->instruction_set != kArm) {
-      if (!rs.IsFloat()) {
-        if (fail) {
-          CHECK(false) << "Reg storage not float for fp.";
-        } else if (report) {
-          LOG(WARNING) << "Reg storage not float for fp.";
-        }
-      }
-    } else if (fp == FPCheck::kCheckNotFP) {
-      if (rs.IsFloat()) {
-        if (fail) {
-          CHECK(false) << "Reg storage float for not-fp.";
-        } else if (report) {
-          LOG(WARNING) << "Reg storage float for not-fp.";
-        }
-      }
-    }
-  }
-}
-
-void Mir2Lir::CheckRegLocationImpl(RegLocation rl, bool fail, bool report) const {
-  // Regrettably can't use the fp part of rl, as that is not really indicative of where a value
-  // will be stored.
-  CheckRegStorageImpl(rl.reg, rl.wide ? WidenessCheck::kCheckWide : WidenessCheck::kCheckNotWide,
-      rl.ref ? RefCheck::kCheckRef : RefCheck::kCheckNotRef, FPCheck::kIgnoreFP, fail, report);
-}
-
-size_t Mir2Lir::GetInstructionOffset(LIR* lir ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL) << "Unsupported GetInstructionOffset()";
-  UNREACHABLE();
-}
-
-void Mir2Lir::InToRegStorageMapping::Initialize(ShortyIterator* shorty,
-                                                InToRegStorageMapper* mapper) {
-  DCHECK(mapper != nullptr);
-  DCHECK(shorty != nullptr);
-  DCHECK(!IsInitialized());
-  DCHECK_EQ(end_mapped_in_, 0u);
-  DCHECK(!has_arguments_on_stack_);
-  while (shorty->Next()) {
-     ShortyArg arg = shorty->GetArg();
-     RegStorage reg = mapper->GetNextReg(arg);
-     mapping_.emplace_back(arg, reg);
-     if (arg.IsWide()) {
-       mapping_.emplace_back(ShortyArg(kInvalidShorty), RegStorage::InvalidReg());
-     }
-     if (reg.Valid()) {
-       end_mapped_in_ = mapping_.size();
-       // If the VR is wide but wasn't mapped as wide then account for it.
-       if (arg.IsWide() && !reg.Is64Bit()) {
-         --end_mapped_in_;
-       }
-     } else {
-       has_arguments_on_stack_ = true;
-     }
-  }
-  initialized_ = true;
-}
-
-RegStorage Mir2Lir::InToRegStorageMapping::GetReg(size_t in_position) {
-  DCHECK(IsInitialized());
-  DCHECK_LT(in_position, mapping_.size());
-  DCHECK_NE(mapping_[in_position].first.GetType(), kInvalidShorty);
-  return mapping_[in_position].second;
-}
-
-Mir2Lir::ShortyArg Mir2Lir::InToRegStorageMapping::GetShorty(size_t in_position) {
-  DCHECK(IsInitialized());
-  DCHECK_LT(static_cast<size_t>(in_position), mapping_.size());
-  DCHECK_NE(mapping_[in_position].first.GetType(), kInvalidShorty);
-  return mapping_[in_position].first;
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
deleted file mode 100644
index a0db1e8..0000000
--- a/compiler/dex/quick/mir_to_lir.h
+++ /dev/null
@@ -1,1933 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_MIR_TO_LIR_H_
-#define ART_COMPILER_DEX_QUICK_MIR_TO_LIR_H_
-
-#include "base/arena_allocator.h"
-#include "base/arena_containers.h"
-#include "base/arena_object.h"
-#include "compiled_method.h"
-#include "dex/compiler_enums.h"
-#include "dex/dex_flags.h"
-#include "dex/dex_types.h"
-#include "dex/reg_location.h"
-#include "dex/reg_storage.h"
-#include "dex/quick/resource_mask.h"
-#include "entrypoints/quick/quick_entrypoints_enum.h"
-#include "invoke_type.h"
-#include "lazy_debug_frame_opcode_writer.h"
-#include "leb128.h"
-#include "primitive.h"
-#include "safe_map.h"
-#include "utils/array_ref.h"
-#include "utils/dex_cache_arrays_layout.h"
-#include "utils/stack_checks.h"
-
-namespace art {
-
-// Set to 1 to measure cost of suspend check.
-#define NO_SUSPEND 0
-
-#define IS_BINARY_OP         (1ULL << kIsBinaryOp)
-#define IS_BRANCH            (1ULL << kIsBranch)
-#define IS_IT                (1ULL << kIsIT)
-#define IS_MOVE              (1ULL << kIsMoveOp)
-#define IS_LOAD              (1ULL << kMemLoad)
-#define IS_QUAD_OP           (1ULL << kIsQuadOp)
-#define IS_QUIN_OP           (1ULL << kIsQuinOp)
-#define IS_SEXTUPLE_OP       (1ULL << kIsSextupleOp)
-#define IS_STORE             (1ULL << kMemStore)
-#define IS_TERTIARY_OP       (1ULL << kIsTertiaryOp)
-#define IS_UNARY_OP          (1ULL << kIsUnaryOp)
-#define IS_VOLATILE          (1ULL << kMemVolatile)
-#define NEEDS_FIXUP          (1ULL << kPCRelFixup)
-#define NO_OPERAND           (1ULL << kNoOperand)
-#define REG_DEF0             (1ULL << kRegDef0)
-#define REG_DEF1             (1ULL << kRegDef1)
-#define REG_DEF2             (1ULL << kRegDef2)
-#define REG_DEFA             (1ULL << kRegDefA)
-#define REG_DEFD             (1ULL << kRegDefD)
-#define REG_DEF_FPCS_LIST0   (1ULL << kRegDefFPCSList0)
-#define REG_DEF_FPCS_LIST2   (1ULL << kRegDefFPCSList2)
-#define REG_DEF_LIST0        (1ULL << kRegDefList0)
-#define REG_DEF_LIST1        (1ULL << kRegDefList1)
-#define REG_DEF_LR           (1ULL << kRegDefLR)
-#define REG_DEF_SP           (1ULL << kRegDefSP)
-#define REG_USE0             (1ULL << kRegUse0)
-#define REG_USE1             (1ULL << kRegUse1)
-#define REG_USE2             (1ULL << kRegUse2)
-#define REG_USE3             (1ULL << kRegUse3)
-#define REG_USE4             (1ULL << kRegUse4)
-#define REG_USEA             (1ULL << kRegUseA)
-#define REG_USEC             (1ULL << kRegUseC)
-#define REG_USED             (1ULL << kRegUseD)
-#define REG_USEB             (1ULL << kRegUseB)
-#define REG_USE_FPCS_LIST0   (1ULL << kRegUseFPCSList0)
-#define REG_USE_FPCS_LIST2   (1ULL << kRegUseFPCSList2)
-#define REG_USE_LIST0        (1ULL << kRegUseList0)
-#define REG_USE_LIST1        (1ULL << kRegUseList1)
-#define REG_USE_LR           (1ULL << kRegUseLR)
-#define REG_USE_PC           (1ULL << kRegUsePC)
-#define REG_USE_SP           (1ULL << kRegUseSP)
-#define SETS_CCODES          (1ULL << kSetsCCodes)
-#define USES_CCODES          (1ULL << kUsesCCodes)
-#define USE_FP_STACK         (1ULL << kUseFpStack)
-#define REG_USE_LO           (1ULL << kUseLo)
-#define REG_USE_HI           (1ULL << kUseHi)
-#define REG_DEF_LO           (1ULL << kDefLo)
-#define REG_DEF_HI           (1ULL << kDefHi)
-#define SCALED_OFFSET_X0     (1ULL << kMemScaledx0)
-#define SCALED_OFFSET_X2     (1ULL << kMemScaledx2)
-#define SCALED_OFFSET_X4     (1ULL << kMemScaledx4)
-
-// Special load/stores
-#define IS_LOADX             (IS_LOAD | IS_VOLATILE)
-#define IS_LOAD_OFF          (IS_LOAD | SCALED_OFFSET_X0)
-#define IS_LOAD_OFF2         (IS_LOAD | SCALED_OFFSET_X2)
-#define IS_LOAD_OFF4         (IS_LOAD | SCALED_OFFSET_X4)
-
-#define IS_STOREX            (IS_STORE | IS_VOLATILE)
-#define IS_STORE_OFF         (IS_STORE | SCALED_OFFSET_X0)
-#define IS_STORE_OFF2        (IS_STORE | SCALED_OFFSET_X2)
-#define IS_STORE_OFF4        (IS_STORE | SCALED_OFFSET_X4)
-
-// Common combo register usage patterns.
-#define REG_DEF01            (REG_DEF0 | REG_DEF1)
-#define REG_DEF012           (REG_DEF0 | REG_DEF1 | REG_DEF2)
-#define REG_DEF01_USE2       (REG_DEF0 | REG_DEF1 | REG_USE2)
-#define REG_DEF0_USE01       (REG_DEF0 | REG_USE01)
-#define REG_DEF0_USE0        (REG_DEF0 | REG_USE0)
-#define REG_DEF0_USE12       (REG_DEF0 | REG_USE12)
-#define REG_DEF0_USE123      (REG_DEF0 | REG_USE123)
-#define REG_DEF0_USE1        (REG_DEF0 | REG_USE1)
-#define REG_DEF0_USE2        (REG_DEF0 | REG_USE2)
-#define REG_DEFAD_USEAD      (REG_DEFAD_USEA | REG_USED)
-#define REG_DEFAD_USEA       (REG_DEFA_USEA | REG_DEFD)
-#define REG_DEFA_USEA        (REG_DEFA | REG_USEA)
-#define REG_USE012           (REG_USE01 | REG_USE2)
-#define REG_USE014           (REG_USE01 | REG_USE4)
-#define REG_USE01            (REG_USE0 | REG_USE1)
-#define REG_USE02            (REG_USE0 | REG_USE2)
-#define REG_USE12            (REG_USE1 | REG_USE2)
-#define REG_USE23            (REG_USE2 | REG_USE3)
-#define REG_USE123           (REG_USE1 | REG_USE2 | REG_USE3)
-
-/*
- * Assembly is an iterative process, and usually terminates within
- * two or three passes.  This should be high enough to handle bizarre
- * cases, but detect an infinite loop bug.
- */
-#define MAX_ASSEMBLER_RETRIES 50
-
-class BasicBlock;
-class BitVector;
-struct CallInfo;
-struct CompilationUnit;
-struct CompilerTemp;
-struct InlineMethod;
-class MIR;
-struct LIR;
-struct RegisterInfo;
-class DexFileMethodInliner;
-class MIRGraph;
-class MirMethodLoweringInfo;
-class MirSFieldLoweringInfo;
-
-typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int,
-                            const MethodReference& target_method,
-                            uint32_t method_idx, uintptr_t direct_code,
-                            uintptr_t direct_method, InvokeType type);
-
-typedef ArenaVector<uint8_t> CodeBuffer;
-typedef uint32_t CodeOffset;           // Native code offset in bytes.
-
-struct UseDefMasks {
-  const ResourceMask* use_mask;        // Resource mask for use.
-  const ResourceMask* def_mask;        // Resource mask for def.
-};
-
-struct AssemblyInfo {
-  LIR* pcrel_next;           // Chain of LIR nodes needing pc relative fixups.
-};
-
-struct LIR {
-  CodeOffset offset;             // Offset of this instruction.
-  NarrowDexOffset dalvik_offset;   // Offset of Dalvik opcode in code units (16-bit words).
-  int16_t opcode;
-  LIR* next;
-  LIR* prev;
-  LIR* target;
-  struct {
-    unsigned int alias_info:17;  // For Dalvik register disambiguation.
-    bool is_nop:1;               // LIR is optimized away.
-    unsigned int size:4;         // Note: size of encoded instruction is in bytes.
-    bool use_def_invalid:1;      // If true, masks should not be used.
-    unsigned int generation:1;   // Used to track visitation state during fixup pass.
-    unsigned int fixup:8;        // Fixup kind.
-  } flags;
-  union {
-    UseDefMasks m;               // Use & Def masks used during optimization.
-    AssemblyInfo a;              // Instruction info used during assembly phase.
-  } u;
-  int32_t operands[5];           // [0..4] = [dest, src1, src2, extra, extra2].
-};
-
-// Utility macros to traverse the LIR list.
-#define NEXT_LIR(lir) (lir->next)
-#define PREV_LIR(lir) (lir->prev)
-
-// Defines for alias_info (tracks Dalvik register references).
-#define DECODE_ALIAS_INFO_REG(X)        (X & 0xffff)
-#define DECODE_ALIAS_INFO_WIDE_FLAG     (0x10000)
-#define DECODE_ALIAS_INFO_WIDE(X)       ((X & DECODE_ALIAS_INFO_WIDE_FLAG) ? 1 : 0)
-#define ENCODE_ALIAS_INFO(REG, ISWIDE)  (REG | (ISWIDE ? DECODE_ALIAS_INFO_WIDE_FLAG : 0))
-
-#define ENCODE_REG_PAIR(low_reg, high_reg) ((low_reg & 0xff) | ((high_reg & 0xff) << 8))
-#define DECODE_REG_PAIR(both_regs, low_reg, high_reg) \
-  do { \
-    low_reg = both_regs & 0xff; \
-    high_reg = (both_regs >> 8) & 0xff; \
-  } while (false)
-
-// Mask to denote sreg as the start of a 64-bit item.  Must not interfere with low 16 bits.
-#define STARTING_WIDE_SREG 0x10000
-
-class Mir2Lir {
-  public:
-    static constexpr bool kFailOnSizeError = true && kIsDebugBuild;
-    static constexpr bool kReportSizeError = true && kIsDebugBuild;
-
-    // TODO: If necessary, this could be made target-dependent.
-    static constexpr uint16_t kSmallSwitchThreshold = 5;
-
-    /*
-     * Auxiliary information describing the location of data embedded in the Dalvik
-     * byte code stream.
-     */
-    struct EmbeddedData {
-      CodeOffset offset;        // Code offset of data block.
-      const uint16_t* table;      // Original dex data.
-      DexOffset vaddr;            // Dalvik offset of parent opcode.
-    };
-
-    struct FillArrayData : EmbeddedData {
-      int32_t size;
-    };
-
-    struct SwitchTable : EmbeddedData {
-      LIR* anchor;                // Reference instruction for relative offsets.
-      MIR* switch_mir;            // The switch mir.
-    };
-
-    /* Static register use counts */
-    struct RefCounts {
-      int count;
-      int s_reg;
-    };
-
-    /*
-     * Data structure tracking the mapping detween a Dalvik value (32 or 64 bits)
-     * and native register storage.  The primary purpose is to reuse previuosly
-     * loaded values, if possible, and otherwise to keep the value in register
-     * storage as long as possible.
-     *
-     * NOTE 1: wide_value refers to the width of the Dalvik value contained in
-     * this register (or pair).  For example, a 64-bit register containing a 32-bit
-     * Dalvik value would have wide_value==false even though the storage container itself
-     * is wide.  Similarly, a 32-bit register containing half of a 64-bit Dalvik value
-     * would have wide_value==true (and additionally would have its partner field set to the
-     * other half whose wide_value field would also be true.
-     *
-     * NOTE 2: In the case of a register pair, you can determine which of the partners
-     * is the low half by looking at the s_reg names.  The high s_reg will equal low_sreg + 1.
-     *
-     * NOTE 3: In the case of a 64-bit register holding a Dalvik wide value, wide_value
-     * will be true and partner==self.  s_reg refers to the low-order word of the Dalvik
-     * value, and the s_reg of the high word is implied (s_reg + 1).
-     *
-     * NOTE 4: The reg and is_temp fields should always be correct.  If is_temp is false no
-     * other fields have meaning. [perhaps not true, wide should work for promoted regs?]
-     * If is_temp==true and live==false, no other fields have
-     * meaning.  If is_temp==true and live==true, wide_value, partner, dirty, s_reg, def_start
-     * and def_end describe the relationship between the temp register/register pair and
-     * the Dalvik value[s] described by s_reg/s_reg+1.
-     *
-     * The fields used_storage, master_storage and storage_mask are used to track allocation
-     * in light of potential aliasing.  For example, consider Arm's d2, which overlaps s4 & s5.
-     * d2's storage mask would be 0x00000003, the two low-order bits denoting 64 bits of
-     * storage use.  For s4, it would be 0x0000001; for s5 0x00000002.  These values should not
-     * change once initialized.  The "used_storage" field tracks current allocation status.
-     * Although each record contains this field, only the field from the largest member of
-     * an aliased group is used.  In our case, it would be d2's.  The master_storage pointer
-     * of d2, s4 and s5 would all point to d2's used_storage field.  Each bit in a used_storage
-     * represents 32 bits of storage.  d2's used_storage would be initialized to 0xfffffffc.
-     * Then, if we wanted to determine whether s4 could be allocated, we would "and"
-     * s4's storage_mask with s4's *master_storage.  If the result is zero, s4 is free and
-     * to allocate: *master_storage |= storage_mask.  To free, *master_storage &= ~storage_mask.
-     *
-     * For an X86 vector register example, storage_mask would be:
-     *    0x00000001 for 32-bit view of xmm1
-     *    0x00000003 for 64-bit view of xmm1
-     *    0x0000000f for 128-bit view of xmm1
-     *    0x000000ff for 256-bit view of ymm1   // future expansion, if needed
-     *    0x0000ffff for 512-bit view of ymm1   // future expansion, if needed
-     *    0xffffffff for 1024-bit view of ymm1  // future expansion, if needed
-     *
-     * The "liveness" of a register is handled in a similar way.  The liveness_ storage is
-     * held in the widest member of an aliased set.  Note, though, that for a temp register to
-     * reused as live, it must both be marked live and the associated SReg() must match the
-     * desired s_reg.  This gets a little complicated when dealing with aliased registers.  All
-     * members of an aliased set will share the same liveness flags, but each will individually
-     * maintain s_reg_.  In this way we can know that at least one member of an
-     * aliased set is live, but will only fully match on the appropriate alias view.  For example,
-     * if Arm d1 is live as a double and has s_reg_ set to Dalvik v8 (which also implies v9
-     * because it is wide), its aliases s2 and s3 will show as live, but will have
-     * s_reg_ == INVALID_SREG.  An attempt to later AllocLiveReg() of v9 with a single-precision
-     * view will fail because although s3's liveness bit is set, its s_reg_ will not match v9.
-     * This will cause all members of the aliased set to be clobbered and AllocLiveReg() will
-     * report that v9 is currently not live as a single (which is what we want).
-     *
-     * NOTE: the x86 usage is still somewhat in flux.  There are competing notions of how
-     * to treat xmm registers:
-     *     1. Treat them all as 128-bits wide, but denote how much data used via bytes field.
-     *         o This more closely matches reality, but means you'd need to be able to get
-     *           to the associated RegisterInfo struct to figure out how it's being used.
-     *         o This is how 64-bit core registers will be used - always 64 bits, but the
-     *           "bytes" field will be 4 for 32-bit usage and 8 for 64-bit usage.
-     *     2. View the xmm registers based on contents.
-     *         o A single in a xmm2 register would be k32BitVector, while a double in xmm2 would
-     *           be a k64BitVector.
-     *         o Note that the two uses above would be considered distinct registers (but with
-     *           the aliasing mechanism, we could detect interference).
-     *         o This is how aliased double and single float registers will be handled on
-     *           Arm and MIPS.
-     * Working plan is, for all targets, to follow mechanism 1 for 64-bit core registers, and
-     * mechanism 2 for aliased float registers and x86 vector registers.
-     */
-    class RegisterInfo : public ArenaObject<kArenaAllocRegAlloc> {
-     public:
-      RegisterInfo(RegStorage r, const ResourceMask& mask = kEncodeAll);
-      ~RegisterInfo() {}
-
-      static const uint32_t k32SoloStorageMask     = 0x00000001;
-      static const uint32_t kLowSingleStorageMask  = 0x00000001;
-      static const uint32_t kHighSingleStorageMask = 0x00000002;
-      static const uint32_t k64SoloStorageMask     = 0x00000003;
-      static const uint32_t k128SoloStorageMask    = 0x0000000f;
-      static const uint32_t k256SoloStorageMask    = 0x000000ff;
-      static const uint32_t k512SoloStorageMask    = 0x0000ffff;
-      static const uint32_t k1024SoloStorageMask   = 0xffffffff;
-
-      bool InUse() { return (storage_mask_ & master_->used_storage_) != 0; }
-      void MarkInUse() { master_->used_storage_ |= storage_mask_; }
-      void MarkFree() { master_->used_storage_ &= ~storage_mask_; }
-      // No part of the containing storage is live in this view.
-      bool IsDead() { return (master_->liveness_ & storage_mask_) == 0; }
-      // Liveness of this view matches.  Note: not equivalent to !IsDead().
-      bool IsLive() { return (master_->liveness_ & storage_mask_) == storage_mask_; }
-      void MarkLive(int s_reg) {
-        // TODO: Anything useful to assert here?
-        s_reg_ = s_reg;
-        master_->liveness_ |= storage_mask_;
-      }
-      void MarkDead() {
-        if (SReg() != INVALID_SREG) {
-          s_reg_ = INVALID_SREG;
-          master_->liveness_ &= ~storage_mask_;
-          ResetDefBody();
-        }
-      }
-      RegStorage GetReg() { return reg_; }
-      void SetReg(RegStorage reg) { reg_ = reg; }
-      bool IsTemp() { return is_temp_; }
-      void SetIsTemp(bool val) { is_temp_ = val; }
-      bool IsWide() { return wide_value_; }
-      void SetIsWide(bool val) {
-        wide_value_ = val;
-        if (!val) {
-          // If not wide, reset partner to self.
-          SetPartner(GetReg());
-        }
-      }
-      bool IsDirty() { return dirty_; }
-      void SetIsDirty(bool val) { dirty_ = val; }
-      RegStorage Partner() { return partner_; }
-      void SetPartner(RegStorage partner) { partner_ = partner; }
-      int SReg() { return (!IsTemp() || IsLive()) ? s_reg_ : INVALID_SREG; }
-      const ResourceMask& DefUseMask() { return def_use_mask_; }
-      void SetDefUseMask(const ResourceMask& def_use_mask) { def_use_mask_ = def_use_mask; }
-      RegisterInfo* Master() { return master_; }
-      void SetMaster(RegisterInfo* master) {
-        master_ = master;
-        if (master != this) {
-          master_->aliased_ = true;
-          DCHECK(alias_chain_ == nullptr);
-          alias_chain_ = master_->alias_chain_;
-          master_->alias_chain_ = this;
-        }
-      }
-      bool IsAliased() { return aliased_; }
-      RegisterInfo* GetAliasChain() { return alias_chain_; }
-      uint32_t StorageMask() { return storage_mask_; }
-      void SetStorageMask(uint32_t storage_mask) { storage_mask_ = storage_mask; }
-      LIR* DefStart() { return def_start_; }
-      void SetDefStart(LIR* def_start) { def_start_ = def_start; }
-      LIR* DefEnd() { return def_end_; }
-      void SetDefEnd(LIR* def_end) { def_end_ = def_end; }
-      void ResetDefBody() { def_start_ = def_end_ = nullptr; }
-      // Find member of aliased set matching storage_used; return null if none.
-      RegisterInfo* FindMatchingView(uint32_t storage_used) {
-        RegisterInfo* res = Master();
-        for (; res != nullptr; res = res->GetAliasChain()) {
-          if (res->StorageMask() == storage_used)
-            break;
-        }
-        return res;
-      }
-
-     private:
-      RegStorage reg_;
-      bool is_temp_;               // Can allocate as temp?
-      bool wide_value_;            // Holds a Dalvik wide value (either itself, or part of a pair).
-      bool dirty_;                 // If live, is it dirty?
-      bool aliased_;               // Is this the master for other aliased RegisterInfo's?
-      RegStorage partner_;         // If wide_value, other reg of pair or self if 64-bit register.
-      int s_reg_;                  // Name of live value.
-      ResourceMask def_use_mask_;  // Resources for this element.
-      uint32_t used_storage_;      // 1 bit per 4 bytes of storage. Unused by aliases.
-      uint32_t liveness_;          // 1 bit per 4 bytes of storage. Unused by aliases.
-      RegisterInfo* master_;       // Pointer to controlling storage mask.
-      uint32_t storage_mask_;      // Track allocation of sub-units.
-      LIR *def_start_;             // Starting inst in last def sequence.
-      LIR *def_end_;               // Ending inst in last def sequence.
-      RegisterInfo* alias_chain_;  // Chain of aliased registers.
-    };
-
-    class RegisterPool : public DeletableArenaObject<kArenaAllocRegAlloc> {
-     public:
-      RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena,
-                   const ArrayRef<const RegStorage>& core_regs,
-                   const ArrayRef<const RegStorage>& core64_regs,
-                   const ArrayRef<const RegStorage>& sp_regs,
-                   const ArrayRef<const RegStorage>& dp_regs,
-                   const ArrayRef<const RegStorage>& reserved_regs,
-                   const ArrayRef<const RegStorage>& reserved64_regs,
-                   const ArrayRef<const RegStorage>& core_temps,
-                   const ArrayRef<const RegStorage>& core64_temps,
-                   const ArrayRef<const RegStorage>& sp_temps,
-                   const ArrayRef<const RegStorage>& dp_temps);
-      ~RegisterPool() {}
-      void ResetNextTemp() {
-        next_core_reg_ = 0;
-        next_sp_reg_ = 0;
-        next_dp_reg_ = 0;
-      }
-      ArenaVector<RegisterInfo*> core_regs_;
-      int next_core_reg_;
-      ArenaVector<RegisterInfo*> core64_regs_;
-      int next_core64_reg_;
-      ArenaVector<RegisterInfo*> sp_regs_;    // Single precision float.
-      int next_sp_reg_;
-      ArenaVector<RegisterInfo*> dp_regs_;    // Double precision float.
-      int next_dp_reg_;
-      ArenaVector<RegisterInfo*>* ref_regs_;  // Points to core_regs_ or core64_regs_
-      int* next_ref_reg_;
-
-     private:
-      Mir2Lir* const m2l_;
-    };
-
-    struct PromotionMap {
-      RegLocationType core_location:3;
-      uint8_t core_reg;
-      RegLocationType fp_location:3;
-      uint8_t fp_reg;
-      bool first_in_pair;
-    };
-
-    //
-    // Slow paths.  This object is used generate a sequence of code that is executed in the
-    // slow path.  For example, resolving a string or class is slow as it will only be executed
-    // once (after that it is resolved and doesn't need to be done again).  We want slow paths
-    // to be placed out-of-line, and not require a (mispredicted, probably) conditional forward
-    // branch over them.
-    //
-    // If you want to create a slow path, declare a class derived from LIRSlowPath and provide
-    // the Compile() function that will be called near the end of the code generated by the
-    // method.
-    //
-    // The basic flow for a slow path is:
-    //
-    //     CMP reg, #value
-    //     BEQ fromfast
-    //   cont:
-    //     ...
-    //     fast path code
-    //     ...
-    //     more code
-    //     ...
-    //     RETURN
-    ///
-    //   fromfast:
-    //     ...
-    //     slow path code
-    //     ...
-    //     B cont
-    //
-    // So you see we need two labels and two branches.  The first branch (called fromfast) is
-    // the conditional branch to the slow path code.  The second label (called cont) is used
-    // as an unconditional branch target for getting back to the code after the slow path
-    // has completed.
-    //
-
-    class LIRSlowPath : public ArenaObject<kArenaAllocSlowPaths> {
-     public:
-      LIRSlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont = nullptr)
-          : m2l_(m2l), cu_(m2l->cu_),
-            current_dex_pc_(m2l->current_dalvik_offset_), current_mir_(m2l->current_mir_),
-            fromfast_(fromfast), cont_(cont) {
-      }
-      virtual ~LIRSlowPath() {}
-      virtual void Compile() = 0;
-
-      LIR *GetContinuationLabel() {
-        return cont_;
-      }
-
-      LIR *GetFromFast() {
-        return fromfast_;
-      }
-
-     protected:
-      LIR* GenerateTargetLabel(int opcode = kPseudoTargetLabel);
-
-      Mir2Lir* const m2l_;
-      CompilationUnit* const cu_;
-      const DexOffset current_dex_pc_;
-      MIR* current_mir_;
-      LIR* const fromfast_;
-      LIR* const cont_;
-    };
-
-    class SuspendCheckSlowPath;
-    class SpecialSuspendCheckSlowPath;
-
-    // Helper class for changing mem_ref_type_ until the end of current scope. See mem_ref_type_.
-    class ScopedMemRefType {
-     public:
-      ScopedMemRefType(Mir2Lir* m2l, ResourceMask::ResourceBit new_mem_ref_type)
-          : m2l_(m2l),
-            old_mem_ref_type_(m2l->mem_ref_type_) {
-        m2l_->mem_ref_type_ = new_mem_ref_type;
-      }
-
-      ~ScopedMemRefType() {
-        m2l_->mem_ref_type_ = old_mem_ref_type_;
-      }
-
-     private:
-      Mir2Lir* const m2l_;
-      ResourceMask::ResourceBit old_mem_ref_type_;
-
-      DISALLOW_COPY_AND_ASSIGN(ScopedMemRefType);
-    };
-
-    virtual ~Mir2Lir() {}
-
-    /**
-     * @brief Decodes the LIR offset.
-     * @return Returns the scaled offset of LIR.
-     */
-    virtual size_t GetInstructionOffset(LIR* lir);
-
-    int32_t s4FromSwitchData(const void* switch_data) {
-      return *reinterpret_cast<const int32_t*>(switch_data);
-    }
-
-    /*
-     * TODO: this is a trace JIT vestige, and its use should be reconsidered.  At the time
-     * it was introduced, it was intended to be a quick best guess of type without having to
-     * take the time to do type analysis.  Currently, though, we have a much better idea of
-     * the types of Dalvik virtual registers.  Instead of using this for a best guess, why not
-     * just use our knowledge of type to select the most appropriate register class?
-     */
-    RegisterClass RegClassBySize(OpSize size) {
-      if (size == kReference) {
-        return kRefReg;
-      } else {
-        return (size == kUnsignedHalf || size == kSignedHalf || size == kUnsignedByte ||
-                size == kSignedByte) ? kCoreReg : kAnyReg;
-      }
-    }
-
-    size_t CodeBufferSizeInBytes() {
-      return code_buffer_.size() / sizeof(code_buffer_[0]);
-    }
-
-    static bool IsPseudoLirOp(int opcode) {
-      return (opcode < 0);
-    }
-
-    /*
-     * LIR operands are 32-bit integers.  Sometimes, (especially for managing
-     * instructions which require PC-relative fixups), we need the operands to carry
-     * pointers.  To do this, we assign these pointers an index in pointer_storage_, and
-     * hold that index in the operand array.
-     * TUNING: If use of these utilities becomes more common on 32-bit builds, it
-     * may be worth conditionally-compiling a set of identity functions here.
-     */
-    template <typename T>
-    uint32_t WrapPointer(const T* pointer) {
-      uint32_t res = pointer_storage_.size();
-      pointer_storage_.push_back(pointer);
-      return res;
-    }
-
-    template <typename T>
-    const T* UnwrapPointer(size_t index) {
-      return reinterpret_cast<const T*>(pointer_storage_[index]);
-    }
-
-    // strdup(), but allocates from the arena.
-    char* ArenaStrdup(const char* str) {
-      size_t len = strlen(str) + 1;
-      char* res = arena_->AllocArray<char>(len, kArenaAllocMisc);
-      if (res != nullptr) {
-        strncpy(res, str, len);
-      }
-      return res;
-    }
-
-    // Shared by all targets - implemented in codegen_util.cc
-    void AppendLIR(LIR* lir);
-    void InsertLIRBefore(LIR* current_lir, LIR* new_lir);
-    void InsertLIRAfter(LIR* current_lir, LIR* new_lir);
-
-    /**
-     * @brief Provides the maximum number of compiler temporaries that the backend can/wants
-     * to place in a frame.
-     * @return Returns the maximum number of compiler temporaries.
-     */
-    size_t GetMaxPossibleCompilerTemps() const;
-
-    /**
-     * @brief Provides the number of bytes needed in frame for spilling of compiler temporaries.
-     * @return Returns the size in bytes for space needed for compiler temporary spill region.
-     */
-    size_t GetNumBytesForCompilerTempSpillRegion();
-
-    DexOffset GetCurrentDexPc() const {
-      return current_dalvik_offset_;
-    }
-
-    RegisterClass ShortyToRegClass(char shorty_type);
-    int ComputeFrameSize();
-    void Materialize();
-    virtual CompiledMethod* GetCompiledMethod();
-    void MarkSafepointPC(LIR* inst);
-    void MarkSafepointPCAfter(LIR* after);
-    void SetupResourceMasks(LIR* lir);
-    void SetMemRefType(LIR* lir, bool is_load, int mem_type);
-    void AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, bool is64bit);
-    void SetupRegMask(ResourceMask* mask, int reg);
-    void ClearRegMask(ResourceMask* mask, int reg);
-    void DumpLIRInsn(LIR* arg, unsigned char* base_addr);
-    void EliminateLoad(LIR* lir, int reg_id);
-    void DumpDependentInsnPair(LIR* check_lir, LIR* this_lir, const char* type);
-    void DumpPromotionMap();
-    void CodegenDump();
-    LIR* RawLIR(DexOffset dalvik_offset, int opcode, int op0 = 0, int op1 = 0,
-                int op2 = 0, int op3 = 0, int op4 = 0, LIR* target = nullptr);
-    LIR* NewLIR0(int opcode);
-    LIR* NewLIR1(int opcode, int dest);
-    LIR* NewLIR2(int opcode, int dest, int src1);
-    LIR* NewLIR2NoDest(int opcode, int src, int info);
-    LIR* NewLIR3(int opcode, int dest, int src1, int src2);
-    LIR* NewLIR4(int opcode, int dest, int src1, int src2, int info);
-    LIR* NewLIR5(int opcode, int dest, int src1, int src2, int info1, int info2);
-    LIR* ScanLiteralPool(LIR* data_target, int value, unsigned int delta);
-    LIR* ScanLiteralPoolWide(LIR* data_target, int val_lo, int val_hi);
-    LIR* ScanLiteralPoolMethod(LIR* data_target, const MethodReference& method);
-    LIR* ScanLiteralPoolClass(LIR* data_target, const DexFile& dex_file, uint32_t type_idx);
-    LIR* AddWordData(LIR* *constant_list_p, int value);
-    LIR* AddWideData(LIR* *constant_list_p, int val_lo, int val_hi);
-    void DumpSparseSwitchTable(const uint16_t* table);
-    void DumpPackedSwitchTable(const uint16_t* table);
-    void MarkBoundary(DexOffset offset, const char* inst_str);
-    void NopLIR(LIR* lir);
-    void UnlinkLIR(LIR* lir);
-    bool IsInexpensiveConstant(RegLocation rl_src);
-    ConditionCode FlipComparisonOrder(ConditionCode before);
-    ConditionCode NegateComparison(ConditionCode before);
-    virtual void InstallLiteralPools();
-    void InstallSwitchTables();
-    void InstallFillArrayData();
-    bool VerifyCatchEntries();
-    void CreateMappingTables();
-    void CreateNativeGcMap();
-    void CreateNativeGcMapWithoutRegisterPromotion();
-    int AssignLiteralOffset(CodeOffset offset);
-    int AssignSwitchTablesOffset(CodeOffset offset);
-    int AssignFillArrayDataOffset(CodeOffset offset);
-    LIR* InsertCaseLabel(uint32_t bbid, int keyVal);
-
-    // Handle bookkeeping to convert a wide RegLocation to a narrow RegLocation.  No code generated.
-    virtual RegLocation NarrowRegLoc(RegLocation loc);
-
-    // Shared by all targets - implemented in local_optimizations.cc
-    void ConvertMemOpIntoMove(LIR* orig_lir, RegStorage dest, RegStorage src);
-    void ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir);
-    void ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir);
-    virtual void ApplyLocalOptimizations(LIR* head_lir, LIR* tail_lir);
-
-    // Shared by all targets - implemented in ralloc_util.cc
-    int GetSRegHi(int lowSreg);
-    bool LiveOut(int s_reg);
-    void SimpleRegAlloc();
-    void ResetRegPool();
-    void CompilerInitPool(RegisterInfo* info, RegStorage* regs, int num);
-    void DumpRegPool(ArenaVector<RegisterInfo*>* regs);
-    void DumpCoreRegPool();
-    void DumpFpRegPool();
-    void DumpRegPools();
-    /* Mark a temp register as dead.  Does not affect allocation state. */
-    void Clobber(RegStorage reg);
-    void ClobberSReg(int s_reg);
-    void ClobberAliases(RegisterInfo* info, uint32_t clobber_mask);
-    int SRegToPMap(int s_reg);
-    void RecordCorePromotion(RegStorage reg, int s_reg);
-    RegStorage AllocPreservedCoreReg(int s_reg);
-    void RecordFpPromotion(RegStorage reg, int s_reg);
-    RegStorage AllocPreservedFpReg(int s_reg);
-    virtual RegStorage AllocPreservedSingle(int s_reg);
-    virtual RegStorage AllocPreservedDouble(int s_reg);
-    RegStorage AllocTempBody(ArenaVector<RegisterInfo*>& regs, int* next_temp, bool required);
-    virtual RegStorage AllocTemp(bool required = true);
-    virtual RegStorage AllocTempWide(bool required = true);
-    virtual RegStorage AllocTempRef(bool required = true);
-    virtual RegStorage AllocTempSingle(bool required = true);
-    virtual RegStorage AllocTempDouble(bool required = true);
-    virtual RegStorage AllocTypedTemp(bool fp_hint, int reg_class, bool required = true);
-    virtual RegStorage AllocTypedTempWide(bool fp_hint, int reg_class, bool required = true);
-    void FlushReg(RegStorage reg);
-    void FlushRegWide(RegStorage reg);
-    RegStorage AllocLiveReg(int s_reg, int reg_class, bool wide);
-    RegStorage FindLiveReg(ArenaVector<RegisterInfo*>& regs, int s_reg);
-    virtual void FreeTemp(RegStorage reg);
-    virtual void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
-    virtual bool IsLive(RegStorage reg);
-    virtual bool IsTemp(RegStorage reg);
-    bool IsPromoted(RegStorage reg);
-    bool IsDirty(RegStorage reg);
-    virtual void LockTemp(RegStorage reg);
-    void ResetDef(RegStorage reg);
-    void NullifyRange(RegStorage reg, int s_reg);
-    void MarkDef(RegLocation rl, LIR *start, LIR *finish);
-    void MarkDefWide(RegLocation rl, LIR *start, LIR *finish);
-    void ResetDefLoc(RegLocation rl);
-    void ResetDefLocWide(RegLocation rl);
-    void ResetDefTracking();
-    void ClobberAllTemps();
-    void FlushSpecificReg(RegisterInfo* info);
-    void FlushAllRegs();
-    bool RegClassMatches(int reg_class, RegStorage reg);
-    void MarkLive(RegLocation loc);
-    void MarkTemp(RegStorage reg);
-    void UnmarkTemp(RegStorage reg);
-    void MarkWide(RegStorage reg);
-    void MarkNarrow(RegStorage reg);
-    void MarkClean(RegLocation loc);
-    void MarkDirty(RegLocation loc);
-    void MarkInUse(RegStorage reg);
-    bool CheckCorePoolSanity();
-    virtual RegLocation UpdateLoc(RegLocation loc);
-    virtual RegLocation UpdateLocWide(RegLocation loc);
-    RegLocation UpdateRawLoc(RegLocation loc);
-
-    /**
-     * @brief Used to prepare a register location to receive a wide value.
-     * @see EvalLoc
-     * @param loc the location where the value will be stored.
-     * @param reg_class Type of register needed.
-     * @param update Whether the liveness information should be updated.
-     * @return Returns the properly typed temporary in physical register pairs.
-     */
-    virtual RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
-
-    /**
-     * @brief Used to prepare a register location to receive a value.
-     * @param loc the location where the value will be stored.
-     * @param reg_class Type of register needed.
-     * @param update Whether the liveness information should be updated.
-     * @return Returns the properly typed temporary in physical register.
-     */
-    virtual RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
-
-    virtual void AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight);
-    virtual void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
-    void DumpCounts(const RefCounts* arr, int size, const char* msg);
-    virtual void DoPromotion();
-    int VRegOffset(int v_reg);
-    int SRegOffset(int s_reg);
-    RegLocation GetReturnWide(RegisterClass reg_class);
-    RegLocation GetReturn(RegisterClass reg_class);
-    RegisterInfo* GetRegInfo(RegStorage reg);
-
-    // Shared by all targets - implemented in gen_common.cc.
-    void AddIntrinsicSlowPath(CallInfo* info, LIR* branch, LIR* resume = nullptr);
-    virtual bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
-                                  RegLocation rl_src, RegLocation rl_dest, int lit);
-    bool HandleEasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit);
-    bool HandleEasyFloatingPointDiv(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    virtual void HandleSlowPaths();
-    void GenBarrier();
-    void GenDivZeroException();
-    // c_code holds condition code that's generated from testing divisor against 0.
-    void GenDivZeroCheck(ConditionCode c_code);
-    // reg holds divisor.
-    void GenDivZeroCheck(RegStorage reg);
-    void GenArrayBoundsCheck(RegStorage index, RegStorage length);
-    void GenArrayBoundsCheck(int32_t index, RegStorage length);
-    LIR* GenNullCheck(RegStorage reg);
-    void MarkPossibleNullPointerException(int opt_flags);
-    void MarkPossibleNullPointerExceptionAfter(int opt_flags, LIR* after);
-    void MarkPossibleStackOverflowException();
-    void ForceImplicitNullCheck(RegStorage reg, int opt_flags);
-    LIR* GenNullCheck(RegStorage m_reg, int opt_flags);
-    LIR* GenExplicitNullCheck(RegStorage m_reg, int opt_flags);
-    virtual void GenImplicitNullCheck(RegStorage reg, int opt_flags);
-    void GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1, RegLocation rl_src2,
-                             LIR* taken);
-    void GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src, LIR* taken);
-    virtual void GenIntToLong(RegLocation rl_dest, RegLocation rl_src);
-    virtual void GenLongToInt(RegLocation rl_dest, RegLocation rl_src);
-    void GenIntNarrowing(Instruction::Code opcode, RegLocation rl_dest,
-                         RegLocation rl_src);
-    void GenNewArray(uint32_t type_idx, RegLocation rl_dest,
-                     RegLocation rl_src);
-    void GenFilledNewArray(CallInfo* info);
-    void GenFillArrayData(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-    void GenSput(MIR* mir, RegLocation rl_src, OpSize size);
-    // Get entrypoints are specific for types, size alone is not sufficient to safely infer
-    // entrypoint.
-    void GenSget(MIR* mir, RegLocation rl_dest, OpSize size, Primitive::Type type);
-    void GenIGet(MIR* mir, int opt_flags, OpSize size, Primitive::Type type,
-                 RegLocation rl_dest, RegLocation rl_obj);
-    void GenIPut(MIR* mir, int opt_flags, OpSize size,
-                 RegLocation rl_src, RegLocation rl_obj);
-    void GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index,
-                        RegLocation rl_src);
-
-    void GenConstClass(uint32_t type_idx, RegLocation rl_dest);
-    void GenConstString(uint32_t string_idx, RegLocation rl_dest);
-    void GenNewInstance(uint32_t type_idx, RegLocation rl_dest);
-    void GenThrow(RegLocation rl_src);
-    void GenInstanceof(uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src);
-    void GenCheckCast(int opt_flags, uint32_t insn_idx, uint32_t type_idx, RegLocation rl_src);
-    void GenLong3Addr(OpKind first_op, OpKind second_op, RegLocation rl_dest,
-                      RegLocation rl_src1, RegLocation rl_src2);
-    virtual void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                        RegLocation rl_src1, RegLocation rl_shift);
-    void GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest,
-                          RegLocation rl_src, int lit);
-    virtual void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                RegLocation rl_src1, RegLocation rl_src2, int flags);
-    void GenConversionCall(QuickEntrypointEnum trampoline, RegLocation rl_dest, RegLocation rl_src,
-                           RegisterClass return_reg_class);
-    void GenSuspendTest(int opt_flags);
-    void GenSuspendTestAndBranch(int opt_flags, LIR* target);
-
-    // This will be overridden by x86 implementation.
-    virtual void GenConstWide(RegLocation rl_dest, int64_t value);
-    virtual void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
-                       RegLocation rl_src1, RegLocation rl_src2, int flags);
-
-    // Shared by all targets - implemented in gen_invoke.cc.
-    LIR* CallHelper(RegStorage r_tgt, QuickEntrypointEnum trampoline, bool safepoint_pc,
-                    bool use_link = true);
-    RegStorage CallHelperSetup(QuickEntrypointEnum trampoline);
-
-    void CallRuntimeHelper(QuickEntrypointEnum trampoline, bool safepoint_pc);
-    void CallRuntimeHelperImm(QuickEntrypointEnum trampoline, int arg0, bool safepoint_pc);
-    void CallRuntimeHelperReg(QuickEntrypointEnum trampoline, RegStorage arg0, bool safepoint_pc);
-    void CallRuntimeHelperRegLocation(QuickEntrypointEnum trampoline, RegLocation arg0,
-                                      bool safepoint_pc);
-    void CallRuntimeHelperImmImm(QuickEntrypointEnum trampoline, int arg0, int arg1,
-                                 bool safepoint_pc);
-    void CallRuntimeHelperImmRegLocation(QuickEntrypointEnum trampoline, int arg0, RegLocation arg1,
-                                         bool safepoint_pc);
-    void CallRuntimeHelperRegLocationImm(QuickEntrypointEnum trampoline, RegLocation arg0, int arg1,
-                                         bool safepoint_pc);
-    void CallRuntimeHelperImmReg(QuickEntrypointEnum trampoline, int arg0, RegStorage arg1,
-                                 bool safepoint_pc);
-    void CallRuntimeHelperRegImm(QuickEntrypointEnum trampoline, RegStorage arg0, int arg1,
-                                 bool safepoint_pc);
-    void CallRuntimeHelperImmMethod(QuickEntrypointEnum trampoline, int arg0, bool safepoint_pc);
-    void CallRuntimeHelperRegMethod(QuickEntrypointEnum trampoline, RegStorage arg0,
-                                    bool safepoint_pc);
-    void CallRuntimeHelperRegRegLocationMethod(QuickEntrypointEnum trampoline, RegStorage arg0,
-                                               RegLocation arg1, bool safepoint_pc);
-    void CallRuntimeHelperRegLocationRegLocation(QuickEntrypointEnum trampoline, RegLocation arg0,
-                                                 RegLocation arg1, bool safepoint_pc);
-    void CallRuntimeHelperRegReg(QuickEntrypointEnum trampoline, RegStorage arg0, RegStorage arg1,
-                                 bool safepoint_pc);
-    void CallRuntimeHelperRegRegImm(QuickEntrypointEnum trampoline, RegStorage arg0,
-                                    RegStorage arg1, int arg2, bool safepoint_pc);
-    void CallRuntimeHelperImmRegLocationMethod(QuickEntrypointEnum trampoline, int arg0,
-                                               RegLocation arg1, bool safepoint_pc);
-    void CallRuntimeHelperImmImmMethod(QuickEntrypointEnum trampoline, int arg0, int arg1,
-                                       bool safepoint_pc);
-    void CallRuntimeHelperImmRegLocationRegLocation(QuickEntrypointEnum trampoline, int arg0,
-                                                    RegLocation arg1, RegLocation arg2,
-                                                    bool safepoint_pc);
-    void CallRuntimeHelperRegLocationRegLocationRegLocation(QuickEntrypointEnum trampoline,
-                                                            RegLocation arg0, RegLocation arg1,
-                                                            RegLocation arg2,
-                                                            bool safepoint_pc);
-    void CallRuntimeHelperRegLocationRegLocationRegLocationRegLocation(
-        QuickEntrypointEnum trampoline, RegLocation arg0, RegLocation arg1,
-        RegLocation arg2, RegLocation arg3, bool safepoint_pc);
-
-    void GenInvoke(CallInfo* info);
-    void GenInvokeNoInline(CallInfo* info);
-    virtual NextCallInsn GetNextSDCallInsn() = 0;
-
-    /*
-     * @brief Generate the actual call insn based on the method info.
-     * @param method_info the lowering info for the method call.
-     * @returns Call instruction
-     */
-    virtual LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) = 0;
-
-    virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
-    virtual int GenDalvikArgs(CallInfo* info, int call_state, LIR** pcrLabel,
-                      NextCallInsn next_call_insn,
-                      const MethodReference& target_method,
-                      uint32_t vtable_idx,
-                      uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                      bool skip_this);
-    virtual int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count);
-    virtual void GenDalvikArgsFlushPromoted(CallInfo* info, int start);
-    /**
-     * @brief Used to determine the register location of destination.
-     * @details This is needed during generation of inline intrinsics because it finds destination
-     *  of return,
-     * either the physical register or the target of move-result.
-     * @param info Information about the invoke.
-     * @return Returns the destination location.
-     */
-    RegLocation InlineTarget(CallInfo* info);
-
-    /**
-     * @brief Used to determine the wide register location of destination.
-     * @see InlineTarget
-     * @param info Information about the invoke.
-     * @return Returns the destination location.
-     */
-    RegLocation InlineTargetWide(CallInfo* info);
-
-    bool GenInlinedReferenceGetReferent(CallInfo* info);
-    virtual bool GenInlinedCharAt(CallInfo* info);
-    bool GenInlinedStringGetCharsNoCheck(CallInfo* info);
-    bool GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty);
-    bool GenInlinedStringFactoryNewStringFromBytes(CallInfo* info);
-    bool GenInlinedStringFactoryNewStringFromChars(CallInfo* info);
-    bool GenInlinedStringFactoryNewStringFromString(CallInfo* info);
-    virtual bool GenInlinedReverseBits(CallInfo* info, OpSize size);
-    bool GenInlinedReverseBytes(CallInfo* info, OpSize size);
-    virtual bool GenInlinedAbsInt(CallInfo* info);
-    virtual bool GenInlinedAbsLong(CallInfo* info);
-    virtual bool GenInlinedAbsFloat(CallInfo* info) = 0;
-    virtual bool GenInlinedAbsDouble(CallInfo* info) = 0;
-    bool GenInlinedFloatCvt(CallInfo* info);
-    bool GenInlinedDoubleCvt(CallInfo* info);
-    virtual bool GenInlinedCeil(CallInfo* info);
-    virtual bool GenInlinedFloor(CallInfo* info);
-    virtual bool GenInlinedRint(CallInfo* info);
-    virtual bool GenInlinedRound(CallInfo* info, bool is_double);
-    virtual bool GenInlinedArrayCopyCharArray(CallInfo* info);
-    virtual bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
-    bool GenInlinedStringCompareTo(CallInfo* info);
-    virtual bool GenInlinedCurrentThread(CallInfo* info);
-    bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_object, bool is_volatile);
-    bool GenInlinedUnsafePut(CallInfo* info, bool is_long, bool is_object,
-                             bool is_volatile, bool is_ordered);
-
-    // Shared by all targets - implemented in gen_loadstore.cc.
-    RegLocation LoadCurrMethod();
-    void LoadCurrMethodDirect(RegStorage r_tgt);
-    RegStorage LoadCurrMethodWithHint(RegStorage r_hint);
-    virtual LIR* LoadConstant(RegStorage r_dest, int value);
-    // Natural word size.
-    LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
-      return LoadBaseDisp(r_base, displacement, r_dest, kWord, kNotVolatile);
-    }
-    // Load 32 bits, regardless of target.
-    LIR* Load32Disp(RegStorage r_base, int displacement, RegStorage r_dest)  {
-      return LoadBaseDisp(r_base, displacement, r_dest, k32, kNotVolatile);
-    }
-    // Load a reference at base + displacement and decompress into register.
-    LIR* LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest,
-                     VolatileKind is_volatile) {
-      return LoadBaseDisp(r_base, displacement, r_dest, kReference, is_volatile);
-    }
-    // Load a reference at base + index and decompress into register.
-    LIR* LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale) {
-      return LoadBaseIndexed(r_base, r_index, r_dest, scale, kReference);
-    }
-    // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
-    virtual RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind);
-    // Load Dalvik value with 64-bit memory storage.
-    virtual RegLocation LoadValueWide(RegLocation rl_src, RegisterClass op_kind);
-    // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
-    virtual void LoadValueDirect(RegLocation rl_src, RegStorage r_dest);
-    // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
-    virtual void LoadValueDirectFixed(RegLocation rl_src, RegStorage r_dest);
-    // Load Dalvik value with 64-bit memory storage.
-    virtual void LoadValueDirectWide(RegLocation rl_src, RegStorage r_dest);
-    // Load Dalvik value with 64-bit memory storage.
-    virtual void LoadValueDirectWideFixed(RegLocation rl_src, RegStorage r_dest);
-    // Store an item of natural word size.
-    LIR* StoreWordDisp(RegStorage r_base, int displacement, RegStorage r_src) {
-      return StoreBaseDisp(r_base, displacement, r_src, kWord, kNotVolatile);
-    }
-    // Store an uncompressed reference into a compressed 32-bit container.
-    LIR* StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src,
-                      VolatileKind is_volatile) {
-      return StoreBaseDisp(r_base, displacement, r_src, kReference, is_volatile);
-    }
-    // Store an uncompressed reference into a compressed 32-bit container by index.
-    LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale) {
-      return StoreBaseIndexed(r_base, r_index, r_src, scale, kReference);
-    }
-    // Store 32 bits, regardless of target.
-    LIR* Store32Disp(RegStorage r_base, int displacement, RegStorage r_src) {
-      return StoreBaseDisp(r_base, displacement, r_src, k32, kNotVolatile);
-    }
-
-    /**
-     * @brief Used to do the final store in the destination as per bytecode semantics.
-     * @param rl_dest The destination dalvik register location.
-     * @param rl_src The source register location. Can be either physical register or dalvik register.
-     */
-    virtual void StoreValue(RegLocation rl_dest, RegLocation rl_src);
-
-    /**
-     * @brief Used to do the final store in a wide destination as per bytecode semantics.
-     * @see StoreValue
-     * @param rl_dest The destination dalvik register location.
-     * @param rl_src The source register location. Can be either physical register or dalvik
-     *  register.
-     */
-    virtual void StoreValueWide(RegLocation rl_dest, RegLocation rl_src);
-
-    /**
-     * @brief Used to do the final store to a destination as per bytecode semantics.
-     * @see StoreValue
-     * @param rl_dest The destination dalvik register location.
-     * @param rl_src The source register location. It must be kLocPhysReg
-     *
-     * This is used for x86 two operand computations, where we have computed the correct
-     * register value that now needs to be properly registered.  This is used to avoid an
-     * extra register copy that would result if StoreValue was called.
-     */
-    virtual void StoreFinalValue(RegLocation rl_dest, RegLocation rl_src);
-
-    /**
-     * @brief Used to do the final store in a wide destination as per bytecode semantics.
-     * @see StoreValueWide
-     * @param rl_dest The destination dalvik register location.
-     * @param rl_src The source register location. It must be kLocPhysReg
-     *
-     * This is used for x86 two operand computations, where we have computed the correct
-     * register values that now need to be properly registered.  This is used to avoid an
-     * extra pair of register copies that would result if StoreValueWide was called.
-     */
-    virtual void StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src);
-
-    // Shared by all targets - implemented in mir_to_lir.cc.
-    void CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list);
-    virtual void HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir);
-    bool MethodBlockCodeGen(BasicBlock* bb);
-    bool SpecialMIR2LIR(const InlineMethod& special);
-    virtual void MethodMIR2LIR();
-    // Update LIR for verbose listings.
-    void UpdateLIROffsets();
-
-    /**
-     * @brief Mark a garbage collection card. Skip if the stored value is null.
-     * @param val_reg the register holding the stored value to check against null.
-     * @param tgt_addr_reg the address of the object or array where the value was stored.
-     * @param opt_flags the optimization flags which may indicate that the value is non-null.
-     */
-    void MarkGCCard(int opt_flags, RegStorage val_reg, RegStorage tgt_addr_reg);
-
-    /*
-     * @brief Load the address of the dex method into the register.
-     * @param target_method The MethodReference of the method to be invoked.
-     * @param type How the method will be invoked.
-     * @param register that will contain the code address.
-     * @note register will be passed to TargetReg to get physical register.
-     */
-    void LoadCodeAddress(const MethodReference& target_method, InvokeType type,
-                         SpecialTargetRegister symbolic_reg);
-
-    /*
-     * @brief Load the Method* of a dex method into the register.
-     * @param target_method The MethodReference of the method to be invoked.
-     * @param type How the method will be invoked.
-     * @param register that will contain the code address.
-     * @note register will be passed to TargetReg to get physical register.
-     */
-    virtual void LoadMethodAddress(const MethodReference& target_method, InvokeType type,
-                                   SpecialTargetRegister symbolic_reg);
-
-    /*
-     * @brief Load the Class* of a Dex Class type into the register.
-     * @param dex DexFile that contains the class type.
-     * @param type How the method will be invoked.
-     * @param register that will contain the code address.
-     * @note register will be passed to TargetReg to get physical register.
-     */
-    virtual void LoadClassType(const DexFile& dex_file, uint32_t type_idx,
-                               SpecialTargetRegister symbolic_reg);
-
-    // TODO: Support PC-relative dex cache array loads on all platforms and
-    // replace CanUseOpPcRelDexCacheArrayLoad() with dex_cache_arrays_layout_.Valid().
-    virtual bool CanUseOpPcRelDexCacheArrayLoad() const;
-
-    /*
-     * @brief Load an element of one of the dex cache arrays.
-     * @param dex_file the dex file associated with the target dex cache.
-     * @param offset the offset of the element in the fixed dex cache arrays' layout.
-     * @param r_dest the register where to load the element.
-     * @param wide, load 64 bits if true, otherwise 32 bits.
-     */
-    virtual void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest,
-                                          bool wide);
-
-    // Routines that work for the generic case, but may be overriden by target.
-    /*
-     * @brief Compare memory to immediate, and branch if condition true.
-     * @param cond The condition code that when true will branch to the target.
-     * @param temp_reg A temporary register that can be used if compare to memory is not
-     * supported by the architecture.
-     * @param base_reg The register holding the base address.
-     * @param offset The offset from the base.
-     * @param check_value The immediate to compare to.
-     * @param target branch target (or null)
-     * @param compare output for getting LIR for comparison (or null)
-     * @returns The branch instruction that was generated.
-     */
-    virtual LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                                   int offset, int check_value, LIR* target, LIR** compare);
-
-    // Required for target - codegen helpers.
-    virtual bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
-                                    RegLocation rl_src, RegLocation rl_dest, int lit) = 0;
-    virtual bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) = 0;
-    virtual void GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
-                                            int32_t constant) = 0;
-    virtual void GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
-                                             int64_t constant) = 0;
-    virtual LIR* CheckSuspendUsingLoad() = 0;
-
-    virtual RegStorage LoadHelper(QuickEntrypointEnum trampoline) = 0;
-
-    virtual LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
-                              OpSize size, VolatileKind is_volatile) = 0;
-    virtual LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
-                                 int scale, OpSize size) = 0;
-    virtual LIR* LoadConstantNoClobber(RegStorage r_dest, int value) = 0;
-    virtual LIR* LoadConstantWide(RegStorage r_dest, int64_t value) = 0;
-    virtual LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
-                               OpSize size, VolatileKind is_volatile) = 0;
-    virtual LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
-                                  int scale, OpSize size) = 0;
-
-    /**
-     * @brief Unconditionally mark a garbage collection card.
-     * @param tgt_addr_reg the address of the object or array where the value was stored.
-     */
-    virtual void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) = 0;
-
-    // Required for target - register utilities.
-
-    bool IsSameReg(RegStorage reg1, RegStorage reg2) {
-      RegisterInfo* info1 = GetRegInfo(reg1);
-      RegisterInfo* info2 = GetRegInfo(reg2);
-      return (info1->Master() == info2->Master() &&
-             (info1->StorageMask() & info2->StorageMask()) != 0);
-    }
-
-    static constexpr bool IsWide(OpSize size) {
-      return size == k64 || size == kDouble;
-    }
-
-    static constexpr bool IsRef(OpSize size) {
-      return size == kReference;
-    }
-
-    /**
-     * @brief Portable way of getting special registers from the backend.
-     * @param reg Enumeration describing the purpose of the register.
-     * @return Return the #RegStorage corresponding to the given purpose @p reg.
-     * @note This function is currently allowed to return any suitable view of the registers
-     *   (e.g. this could be 64-bit solo or 32-bit solo for 64-bit backends).
-     */
-    virtual RegStorage TargetReg(SpecialTargetRegister reg) = 0;
-
-    /**
-     * @brief Portable way of getting special registers from the backend.
-     * @param reg Enumeration describing the purpose of the register.
-     * @param wide_kind What kind of view of the special register is required.
-     * @return Return the #RegStorage corresponding to the given purpose @p reg.
-     *
-     * @note For 32b system, wide (kWide) views only make sense for the argument registers and the
-     *       return. In that case, this function should return a pair where the first component of
-     *       the result will be the indicated special register.
-     */
-    virtual RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) {
-      if (wide_kind == kWide) {
-        DCHECK((kArg0 <= reg && reg < kArg7) || (kFArg0 <= reg && reg < kFArg15) || (kRet0 == reg));
-        static_assert((kArg1 == kArg0 + 1) && (kArg2 == kArg1 + 1) && (kArg3 == kArg2 + 1) &&
-                      (kArg4 == kArg3 + 1) && (kArg5 == kArg4 + 1) && (kArg6 == kArg5 + 1) &&
-                      (kArg7 == kArg6 + 1), "kargs range unexpected");
-        static_assert((kFArg1 == kFArg0 + 1) && (kFArg2 == kFArg1 + 1) && (kFArg3 == kFArg2 + 1) &&
-                      (kFArg4 == kFArg3 + 1) && (kFArg5 == kFArg4 + 1) && (kFArg6 == kFArg5 + 1) &&
-                      (kFArg7 == kFArg6 + 1) && (kFArg8 == kFArg7 + 1) && (kFArg9 == kFArg8 + 1) &&
-                      (kFArg10 == kFArg9 + 1) && (kFArg11 == kFArg10 + 1) &&
-                      (kFArg12 == kFArg11 + 1) && (kFArg13 == kFArg12 + 1) &&
-                      (kFArg14 == kFArg13 + 1) && (kFArg15 == kFArg14 + 1),
-                      "kfargs range unexpected");
-        static_assert(kRet1 == kRet0 + 1, "kret range unexpected");
-        return RegStorage::MakeRegPair(TargetReg(reg),
-                                       TargetReg(static_cast<SpecialTargetRegister>(reg + 1)));
-      } else {
-        return TargetReg(reg);
-      }
-    }
-
-    /**
-     * @brief Portable way of getting a special register for storing a pointer.
-     * @see TargetReg()
-     */
-    virtual RegStorage TargetPtrReg(SpecialTargetRegister reg) {
-      return TargetReg(reg);
-    }
-
-    // Get a reg storage corresponding to the wide & ref flags of the reg location.
-    virtual RegStorage TargetReg(SpecialTargetRegister reg, RegLocation loc) {
-      if (loc.ref) {
-        return TargetReg(reg, kRef);
-      } else {
-        return TargetReg(reg, loc.wide ? kWide : kNotWide);
-      }
-    }
-
-    void EnsureInitializedArgMappingToPhysicalReg();
-    virtual RegLocation GetReturnAlt() = 0;
-    virtual RegLocation GetReturnWideAlt() = 0;
-    virtual RegLocation LocCReturn() = 0;
-    virtual RegLocation LocCReturnRef() = 0;
-    virtual RegLocation LocCReturnDouble() = 0;
-    virtual RegLocation LocCReturnFloat() = 0;
-    virtual RegLocation LocCReturnWide() = 0;
-    virtual ResourceMask GetRegMaskCommon(const RegStorage& reg) const = 0;
-    virtual void AdjustSpillMask() = 0;
-    virtual void ClobberCallerSave() = 0;
-    virtual void FreeCallTemps() = 0;
-    virtual void LockCallTemps() = 0;
-    virtual void CompilerInitializeRegAlloc() = 0;
-
-    // Required for target - miscellaneous.
-    virtual void AssembleLIR() = 0;
-    virtual void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) = 0;
-    virtual void SetupTargetResourceMasks(LIR* lir, uint64_t flags,
-                                          ResourceMask* use_mask, ResourceMask* def_mask) = 0;
-    virtual const char* GetTargetInstFmt(int opcode) = 0;
-    virtual const char* GetTargetInstName(int opcode) = 0;
-    virtual std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) = 0;
-
-    // Note: This may return kEncodeNone on architectures that do not expose a PC. The caller must
-    //       take care of this.
-    virtual ResourceMask GetPCUseDefEncoding() const = 0;
-    virtual uint64_t GetTargetInstFlags(int opcode) = 0;
-    virtual size_t GetInsnSize(LIR* lir) = 0;
-    virtual bool IsUnconditionalBranch(LIR* lir) = 0;
-
-    // Get the register class for load/store of a field.
-    virtual RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) = 0;
-
-    // Required for target - Dalvik-level generators.
-    virtual void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                   RegLocation rl_src1, RegLocation rl_src2, int flags) = 0;
-    virtual void GenArithOpDouble(Instruction::Code opcode,
-                                  RegLocation rl_dest, RegLocation rl_src1,
-                                  RegLocation rl_src2) = 0;
-    virtual void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
-                                 RegLocation rl_src1, RegLocation rl_src2) = 0;
-    virtual void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest,
-                          RegLocation rl_src1, RegLocation rl_src2) = 0;
-    virtual void GenConversion(Instruction::Code opcode, RegLocation rl_dest,
-                               RegLocation rl_src) = 0;
-    virtual bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object) = 0;
-
-    /**
-     * @brief Used to generate code for intrinsic java\.lang\.Math methods min and max.
-     * @details This is also applicable for java\.lang\.StrictMath since it is a simple algorithm
-     * that applies on integers. The generated code will write the smallest or largest value
-     * directly into the destination register as specified by the invoke information.
-     * @param info Information about the invoke.
-     * @param is_min If true generates code that computes minimum. Otherwise computes maximum.
-     * @param is_long If true the value value is Long. Otherwise the value is Int.
-     * @return Returns true if successfully generated
-     */
-    virtual bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) = 0;
-    virtual bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double);
-
-    virtual bool GenInlinedSqrt(CallInfo* info) = 0;
-    virtual bool GenInlinedPeek(CallInfo* info, OpSize size) = 0;
-    virtual bool GenInlinedPoke(CallInfo* info, OpSize size) = 0;
-    virtual RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi,
-                                  bool is_div) = 0;
-    virtual RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit,
-                                     bool is_div) = 0;
-    /*
-     * @brief Generate an integer div or rem operation by a literal.
-     * @param rl_dest Destination Location.
-     * @param rl_src1 Numerator Location.
-     * @param rl_src2 Divisor Location.
-     * @param is_div 'true' if this is a division, 'false' for a remainder.
-     * @param flags The instruction optimization flags. It can include information
-     * if exception check can be elided.
-     */
-    virtual RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
-                                  RegLocation rl_src2, bool is_div, int flags) = 0;
-    /*
-     * @brief Generate an integer div or rem operation by a literal.
-     * @param rl_dest Destination Location.
-     * @param rl_src Numerator Location.
-     * @param lit Divisor.
-     * @param is_div 'true' if this is a division, 'false' for a remainder.
-     */
-    virtual RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit,
-                                     bool is_div) = 0;
-    virtual void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) = 0;
-
-    /**
-     * @brief Used for generating code that throws ArithmeticException if both registers are zero.
-     * @details This is used for generating DivideByZero checks when divisor is held in two
-     *  separate registers.
-     * @param reg The register holding the pair of 32-bit values.
-     */
-    virtual void GenDivZeroCheckWide(RegStorage reg) = 0;
-
-    virtual void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) = 0;
-    virtual void GenExitSequence() = 0;
-    virtual void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) = 0;
-    virtual void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) = 0;
-
-    /*
-     * @brief Handle Machine Specific MIR Extended opcodes.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is not standard extended MIR.
-     * @note Base class implementation will abort for unknown opcodes.
-     */
-    virtual void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir);
-
-    /**
-     * @brief Lowers the kMirOpSelect MIR into LIR.
-     * @param bb The basic block in which the MIR is from.
-     * @param mir The MIR whose opcode is kMirOpSelect.
-     */
-    virtual void GenSelect(BasicBlock* bb, MIR* mir) = 0;
-
-    /**
-     * @brief Generates code to select one of the given constants depending on the given opcode.
-     */
-    virtual void GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
-                                  int32_t true_val, int32_t false_val, RegStorage rs_dest,
-                                  RegisterClass dest_reg_class) = 0;
-
-    /**
-     * @brief Used to generate a memory barrier in an architecture specific way.
-     * @details The last generated LIR will be considered for use as barrier. Namely,
-     * if the last LIR can be updated in a way where it will serve the semantics of
-     * barrier, then it will be used as such. Otherwise, a new LIR will be generated
-     * that can keep the semantics.
-     * @param barrier_kind The kind of memory barrier to generate.
-     * @return whether a new instruction was generated.
-     */
-    virtual bool GenMemBarrier(MemBarrierKind barrier_kind) = 0;
-
-    virtual void GenMoveException(RegLocation rl_dest) = 0;
-    virtual void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
-                                               int first_bit, int second_bit) = 0;
-    virtual void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) = 0;
-    virtual void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) = 0;
-
-    // Create code for switch statements. Will decide between short and long versions below.
-    void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-    void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-
-    // Potentially backend-specific versions of switch instructions for shorter switch statements.
-    // The default implementation will create a chained compare-and-branch.
-    virtual void GenSmallPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-    virtual void GenSmallSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-    // Backend-specific versions of switch instructions for longer switch statements.
-    virtual void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) = 0;
-    virtual void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) = 0;
-
-    virtual void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_dest, int scale) = 0;
-    virtual void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale,
-                             bool card_mark) = 0;
-    virtual void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                   RegLocation rl_src1, RegLocation rl_shift, int flags) = 0;
-
-    // Required for target - single operation generators.
-    virtual LIR* OpUnconditionalBranch(LIR* target) = 0;
-    virtual LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) = 0;
-    virtual LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value,
-                                LIR* target) = 0;
-    virtual LIR* OpCondBranch(ConditionCode cc, LIR* target) = 0;
-    virtual LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) = 0;
-    virtual LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src) = 0;
-    virtual LIR* OpIT(ConditionCode cond, const char* guide) = 0;
-    virtual void OpEndIT(LIR* it) = 0;
-    virtual LIR* OpMem(OpKind op, RegStorage r_base, int disp) = 0;
-    virtual void OpPcRelLoad(RegStorage reg, LIR* target) = 0;
-    virtual LIR* OpReg(OpKind op, RegStorage r_dest_src) = 0;
-    virtual void OpRegCopy(RegStorage r_dest, RegStorage r_src) = 0;
-    virtual LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) = 0;
-    virtual LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value) = 0;
-    virtual LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) = 0;
-
-    /**
-     * @brief Used to generate an LIR that does a load from mem to reg.
-     * @param r_dest The destination physical register.
-     * @param r_base The base physical register for memory operand.
-     * @param offset The displacement for memory operand.
-     * @param move_type Specification on the move desired (size, alignment, register kind).
-     * @return Returns the generate move LIR.
-     */
-    virtual LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset,
-                             MoveType move_type) = 0;
-
-    /**
-     * @brief Used to generate an LIR that does a store from reg to mem.
-     * @param r_base The base physical register for memory operand.
-     * @param offset The displacement for memory operand.
-     * @param r_src The destination physical register.
-     * @param bytes_to_move The number of bytes to move.
-     * @param is_aligned Whether the memory location is known to be aligned.
-     * @return Returns the generate move LIR.
-     */
-    virtual LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src,
-                             MoveType move_type) = 0;
-
-    /**
-     * @brief Used for generating a conditional register to register operation.
-     * @param op The opcode kind.
-     * @param cc The condition code that when true will perform the opcode.
-     * @param r_dest The destination physical register.
-     * @param r_src The source physical register.
-     * @return Returns the newly created LIR or null in case of creation failure.
-     */
-    virtual LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) = 0;
-
-    virtual LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) = 0;
-    virtual LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1,
-                             RegStorage r_src2) = 0;
-    virtual LIR* OpTestSuspend(LIR* target) = 0;
-    virtual LIR* OpVldm(RegStorage r_base, int count) = 0;
-    virtual LIR* OpVstm(RegStorage r_base, int count) = 0;
-    virtual void OpRegCopyWide(RegStorage dest, RegStorage src) = 0;
-    virtual bool InexpensiveConstantInt(int32_t value) = 0;
-    virtual bool InexpensiveConstantFloat(int32_t value) = 0;
-    virtual bool InexpensiveConstantLong(int64_t value) = 0;
-    virtual bool InexpensiveConstantDouble(int64_t value) = 0;
-    virtual bool InexpensiveConstantInt(int32_t value, Instruction::Code opcode ATTRIBUTE_UNUSED) {
-      return InexpensiveConstantInt(value);
-    }
-
-    // May be optimized by targets.
-    virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src);
-    virtual void GenMonitorExit(int opt_flags, RegLocation rl_src);
-
-    virtual LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) = 0;
-
-    // Queries for backend support for vectors
-    /*
-     * Return the number of bits in a vector register.
-     * @return 0 if vector registers are not supported, or the
-     * number of bits in the vector register if supported.
-     */
-    virtual int VectorRegisterSize() {
-      return 0;
-    }
-
-    /*
-     * Return the number of reservable vector registers supported
-     * @param long_or_fp, true if floating point computations will be
-     * executed or the operations will be long type while vector
-     * registers are reserved.
-     * @return the number of vector registers that are available
-     * @note The backend should ensure that sufficient vector registers
-     * are held back to generate scalar code without exhausting vector
-     * registers, if scalar code also uses the vector registers.
-     */
-    virtual int NumReservableVectorRegisters(bool long_or_fp ATTRIBUTE_UNUSED) {
-      return 0;
-    }
-
-    /**
-     * @brief Buffer of DWARF's Call Frame Information opcodes.
-     * @details It is used by debuggers and other tools to unwind the call stack.
-     */
-    dwarf::LazyDebugFrameOpCodeWriter& cfi() { return cfi_; }
-
-  protected:
-    Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
-
-    CompilationUnit* GetCompilationUnit() {
-      return cu_;
-    }
-    /*
-     * @brief Do these SRs overlap?
-     * @param rl_op1 One RegLocation
-     * @param rl_op2 The other RegLocation
-     * @return 'true' if the VR pairs overlap
-     *
-     * Check to see if a result pair has a misaligned overlap with an operand pair.  This
-     * is not usual for dx to generate, but it is legal (for now).  In a future rev of
-     * dex, we'll want to make this case illegal.
-     */
-    bool PartiallyIntersects(RegLocation rl_op1, RegLocation rl_op2);
-
-    /*
-     * @brief Do these SRs intersect?
-     * @param rl_op1 One RegLocation
-     * @param rl_op2 The other RegLocation
-     * @return 'true' if the VR pairs intersect
-     *
-     * Check to see if a result pair has misaligned overlap or
-     * full overlap with an operand pair.
-     */
-    bool Intersects(RegLocation rl_op1, RegLocation rl_op2);
-
-    /*
-     * @brief Force a location (in a register) into a temporary register
-     * @param loc location of result
-     * @returns update location
-     */
-    virtual RegLocation ForceTemp(RegLocation loc);
-
-    /*
-     * @brief Force a wide location (in registers) into temporary registers
-     * @param loc location of result
-     * @returns update location
-     */
-    virtual RegLocation ForceTempWide(RegLocation loc);
-
-    virtual void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
-                                    RegLocation rl_dest, RegLocation rl_src);
-
-    void AddSlowPath(LIRSlowPath* slowpath);
-
-    /*
-     *
-     * @brief Implement Set up instanceof a class.
-     * @param needs_access_check 'true' if we must check the access.
-     * @param type_known_final 'true' if the type is known to be a final class.
-     * @param type_known_abstract 'true' if the type is known to be an abstract class.
-     * @param use_declaring_class 'true' if the type can be loaded off the current Method*.
-     * @param can_assume_type_is_in_dex_cache 'true' if the type is known to be in the cache.
-     * @param type_idx Type index to use if use_declaring_class is 'false'.
-     * @param rl_dest Result to be set to 0 or 1.
-     * @param rl_src Object to be tested.
-     */
-    void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
-                                    bool type_known_abstract, bool use_declaring_class,
-                                    bool can_assume_type_is_in_dex_cache,
-                                    uint32_t type_idx, RegLocation rl_dest,
-                                    RegLocation rl_src);
-
-    /**
-     * @brief Used to insert marker that can be used to associate MIR with LIR.
-     * @details Only inserts marker if verbosity is enabled.
-     * @param mir The mir that is currently being generated.
-     */
-    void GenPrintLabel(MIR* mir);
-
-    /**
-     * @brief Used to generate return sequence when there is no frame.
-     * @details Assumes that the return registers have already been populated.
-     */
-    virtual void GenSpecialExitSequence() = 0;
-
-    /**
-     * @brief Used to generate stack frame for suspend path of special methods.
-     */
-    virtual void GenSpecialEntryForSuspend() = 0;
-
-    /**
-     * @brief Used to pop the stack frame for suspend path of special methods.
-     */
-    virtual void GenSpecialExitForSuspend() = 0;
-
-    /**
-     * @brief Used to generate code for special methods that are known to be
-     * small enough to work in frameless mode.
-     * @param bb The basic block of the first MIR.
-     * @param mir The first MIR of the special method.
-     * @param special Information about the special method.
-     * @return Returns whether or not this was handled successfully. Returns false
-     * if caller should punt to normal MIR2LIR conversion.
-     */
-    virtual bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
-
-    void ClobberBody(RegisterInfo* p);
-    void SetCurrentDexPc(DexOffset dexpc) {
-      current_dalvik_offset_ = dexpc;
-    }
-
-    /**
-     * @brief Used to lock register if argument at in_position was passed that way.
-     * @details Does nothing if the argument is passed via stack.
-     * @param in_position The argument number whose register to lock.
-     */
-    void LockArg(size_t in_position);
-
-    /**
-     * @brief Used to load VR argument to a physical register.
-     * @details The load is only done if the argument is not already in physical register.
-     * LockArg must have been previously called.
-     * @param in_position The argument number to load.
-     * @param wide Whether the argument is 64-bit or not.
-     * @return Returns the register (or register pair) for the loaded argument.
-     */
-    RegStorage LoadArg(size_t in_position, RegisterClass reg_class, bool wide = false);
-
-    /**
-     * @brief Used to load a VR argument directly to a specified register location.
-     * @param in_position The argument number to place in register.
-     * @param rl_dest The register location where to place argument.
-     */
-    void LoadArgDirect(size_t in_position, RegLocation rl_dest);
-
-    /**
-     * @brief Used to spill register if argument at in_position was passed that way.
-     * @details Does nothing if the argument is passed via stack.
-     * @param in_position The argument number whose register to spill.
-     */
-    void SpillArg(size_t in_position);
-
-    /**
-     * @brief Used to unspill register if argument at in_position was passed that way.
-     * @details Does nothing if the argument is passed via stack.
-     * @param in_position The argument number whose register to spill.
-     */
-    void UnspillArg(size_t in_position);
-
-    /**
-     * @brief Generate suspend test in a special method.
-     */
-    SpecialSuspendCheckSlowPath* GenSpecialSuspendTest();
-
-    /**
-     * @brief Used to generate LIR for special getter method.
-     * @param mir The mir that represents the iget.
-     * @param special Information about the special getter method.
-     * @return Returns whether LIR was successfully generated.
-     */
-    bool GenSpecialIGet(MIR* mir, const InlineMethod& special);
-
-    /**
-     * @brief Used to generate LIR for special setter method.
-     * @param mir The mir that represents the iput.
-     * @param special Information about the special setter method.
-     * @return Returns whether LIR was successfully generated.
-     */
-    bool GenSpecialIPut(MIR* mir, const InlineMethod& special);
-
-    /**
-     * @brief Used to generate LIR for special return-args method.
-     * @param mir The mir that represents the return of argument.
-     * @param special Information about the special return-args method.
-     * @return Returns whether LIR was successfully generated.
-     */
-    bool GenSpecialIdentity(MIR* mir, const InlineMethod& special);
-
-    /**
-     * @brief Generate code to check if result is null and, if it is, call helper to load it.
-     * @param r_result the result register.
-     * @param trampoline the helper to call in slow path.
-     * @param imm the immediate passed to the helper.
-     */
-    void GenIfNullUseHelperImm(RegStorage r_result, QuickEntrypointEnum trampoline, int imm);
-
-    /**
-     * @brief Generate code to retrieve Class* for another type to be used by SGET/SPUT.
-     * @param field_info information about the field to be accessed.
-     * @param opt_flags the optimization flags of the MIR.
-     */
-    RegStorage GenGetOtherTypeForSgetSput(const MirSFieldLoweringInfo& field_info, int opt_flags);
-
-    void AddDivZeroCheckSlowPath(LIR* branch);
-
-    // Copy arg0 and arg1 to kArg0 and kArg1 safely, possibly using
-    // kArg2 as temp.
-    virtual void CopyToArgumentRegs(RegStorage arg0, RegStorage arg1);
-
-    /**
-     * @brief Load Constant into RegLocation
-     * @param rl_dest Destination RegLocation
-     * @param value Constant value
-     */
-    virtual void GenConst(RegLocation rl_dest, int value);
-
-    /**
-     * Returns true iff wide GPRs are just different views on the same physical register.
-     */
-    virtual bool WideGPRsAreAliases() const = 0;
-
-    /**
-     * Returns true iff wide FPRs are just different views on the same physical register.
-     */
-    virtual bool WideFPRsAreAliases() const = 0;
-
-
-    enum class WidenessCheck {  // private
-      kIgnoreWide,
-      kCheckWide,
-      kCheckNotWide
-    };
-
-    enum class RefCheck {  // private
-      kIgnoreRef,
-      kCheckRef,
-      kCheckNotRef
-    };
-
-    enum class FPCheck {  // private
-      kIgnoreFP,
-      kCheckFP,
-      kCheckNotFP
-    };
-
-    /**
-     * Check whether a reg storage seems well-formed, that is, if a reg storage is valid,
-     * that it has the expected form for the flags.
-     * A flag value of 0 means ignore. A flag value of -1 means false. A flag value of 1 means true.
-     */
-    void CheckRegStorageImpl(RegStorage rs, WidenessCheck wide, RefCheck ref, FPCheck fp, bool fail,
-                             bool report)
-        const;
-
-    /**
-     * Check whether a reg location seems well-formed, that is, if a reg storage is encoded,
-     * that it has the expected size.
-     */
-    void CheckRegLocationImpl(RegLocation rl, bool fail, bool report) const;
-
-    // See CheckRegStorageImpl. Will print or fail depending on kFailOnSizeError and
-    // kReportSizeError.
-    void CheckRegStorage(RegStorage rs, WidenessCheck wide, RefCheck ref, FPCheck fp) const;
-    // See CheckRegLocationImpl.
-    void CheckRegLocation(RegLocation rl) const;
-
-    // Find the references at the beginning of a basic block (for generating GC maps).
-    void InitReferenceVRegs(BasicBlock* bb, BitVector* references);
-
-    // Update references from prev_mir to mir in the same BB. If mir is null or before
-    // prev_mir, report failure (return false) and update references to the end of the BB.
-    bool UpdateReferenceVRegsLocal(MIR* mir, MIR* prev_mir, BitVector* references);
-
-    // Update references from prev_mir to mir.
-    void UpdateReferenceVRegs(MIR* mir, MIR* prev_mir, BitVector* references);
-
-    /**
-     * Returns true if the frame spills the given core register.
-     */
-    bool CoreSpillMaskContains(int reg) {
-      return (core_spill_mask_ & (1u << reg)) != 0;
-    }
-
-    size_t GetCacheOffset(uint32_t index);
-    size_t GetCachePointerOffset(uint32_t index, size_t pointer_size);
-
-    void LoadTypeFromCache(uint32_t type_index, RegStorage class_reg);
-
-  public:
-    // TODO: add accessors for these.
-    LIR* literal_list_;                        // Constants.
-    LIR* method_literal_list_;                 // Method literals requiring patching.
-    LIR* class_literal_list_;                  // Class literals requiring patching.
-    LIR* code_literal_list_;                   // Code literals requiring patching.
-    LIR* first_fixup_;                         // Doubly-linked list of LIR nodes requiring fixups.
-
-  protected:
-    ArenaAllocator* const arena_;
-    CompilationUnit* const cu_;
-    MIRGraph* const mir_graph_;
-    ArenaVector<SwitchTable*> switch_tables_;
-    ArenaVector<FillArrayData*> fill_array_data_;
-    ArenaVector<RegisterInfo*> tempreg_info_;
-    ArenaVector<RegisterInfo*> reginfo_map_;
-    ArenaVector<const void*> pointer_storage_;
-    CodeOffset data_offset_;            // starting offset of literal pool.
-    size_t total_size_;                   // header + code size.
-    LIR* block_label_list_;
-    PromotionMap* promotion_map_;
-    /*
-     * TODO: The code generation utilities don't have a built-in
-     * mechanism to propagate the original Dalvik opcode address to the
-     * associated generated instructions.  For the trace compiler, this wasn't
-     * necessary because the interpreter handled all throws and debugging
-     * requests.  For now we'll handle this by placing the Dalvik offset
-     * in the CompilationUnit struct before codegen for each instruction.
-     * The low-level LIR creation utilites will pull it from here.  Rework this.
-     */
-    DexOffset current_dalvik_offset_;
-    MIR* current_mir_;
-    size_t estimated_native_code_size_;     // Just an estimate; used to reserve code_buffer_ size.
-    std::unique_ptr<RegisterPool> reg_pool_;
-    /*
-     * Sanity checking for the register temp tracking.  The same ssa
-     * name should never be associated with one temp register per
-     * instruction compilation.
-     */
-    int live_sreg_;
-    CodeBuffer code_buffer_;
-    // The source mapping table data (pc -> dex). More entries than in encoded_mapping_table_
-    DefaultSrcMap src_mapping_table_;
-    // The encoding mapping table data (dex -> pc offset and pc offset -> dex) with a size prefix.
-    ArenaVector<uint8_t> encoded_mapping_table_;
-    ArenaVector<uint32_t> core_vmap_table_;
-    ArenaVector<uint32_t> fp_vmap_table_;
-    ArenaVector<uint8_t> native_gc_map_;
-    ArenaVector<LinkerPatch> patches_;
-    int num_core_spills_;
-    int num_fp_spills_;
-    int frame_size_;
-    unsigned int core_spill_mask_;
-    unsigned int fp_spill_mask_;
-    LIR* first_lir_insn_;
-    LIR* last_lir_insn_;
-
-    ArenaVector<LIRSlowPath*> slow_paths_;
-
-    // The memory reference type for new LIRs.
-    // NOTE: Passing this as an explicit parameter by all functions that directly or indirectly
-    // invoke RawLIR() would clutter the code and reduce the readability.
-    ResourceMask::ResourceBit mem_ref_type_;
-
-    // Each resource mask now takes 16-bytes, so having both use/def masks directly in a LIR
-    // would consume 32 bytes per LIR. Instead, the LIR now holds only pointers to the masks
-    // (i.e. 8 bytes on 32-bit arch, 16 bytes on 64-bit arch) and we use ResourceMaskCache
-    // to deduplicate the masks.
-    ResourceMaskCache mask_cache_;
-
-    // Record the MIR that generated a given safepoint (null for prologue safepoints).
-    ArenaVector<std::pair<LIR*, MIR*>> safepoints_;
-
-    // The layout of the cu_->dex_file's dex cache arrays for PC-relative addressing.
-    const DexCacheArraysLayout dex_cache_arrays_layout_;
-
-    // For architectures that don't have true PC-relative addressing, we can promote
-    // a PC of an instruction (or another PC-relative address such as a pointer to
-    // the dex cache arrays if supported) to a register. This is indicated to the
-    // register promotion by allocating a backend temp.
-    CompilerTemp* pc_rel_temp_;
-
-    // For architectures that don't have true PC-relative addressing (see pc_rel_temp_
-    // above) and also have a limited range of offsets for loads, it's be useful to
-    // know the minimum offset into the dex cache arrays, so we calculate that as well
-    // if pc_rel_temp_ isn't null.
-    uint32_t dex_cache_arrays_min_offset_;
-
-    dwarf::LazyDebugFrameOpCodeWriter cfi_;
-
-    // ABI support
-    class ShortyArg {
-      public:
-        explicit ShortyArg(char type) : type_(type) { }
-        bool IsFP() { return type_ == 'F' || type_ == 'D'; }
-        bool IsWide() { return type_ == 'J' || type_ == 'D'; }
-        bool IsRef() { return type_ == 'L'; }
-        char GetType() { return type_; }
-      private:
-        char type_;
-    };
-
-    class ShortyIterator {
-      public:
-        ShortyIterator(const char* shorty, bool is_static);
-        bool Next();
-        ShortyArg GetArg() { return ShortyArg(pending_this_ ? 'L' : *cur_); }
-      private:
-        const char* cur_;
-        bool pending_this_;
-        bool initialized_;
-    };
-
-    class InToRegStorageMapper {
-     public:
-      virtual RegStorage GetNextReg(ShortyArg arg) = 0;
-      virtual ~InToRegStorageMapper() {}
-      virtual void Reset() = 0;
-    };
-
-    class InToRegStorageMapping {
-     public:
-      explicit InToRegStorageMapping(ArenaAllocator* arena)
-          : mapping_(arena->Adapter()),
-            end_mapped_in_(0u), has_arguments_on_stack_(false),  initialized_(false) {}
-      void Initialize(ShortyIterator* shorty, InToRegStorageMapper* mapper);
-      /**
-       * @return the past-the-end index of VRs mapped to physical registers.
-       * In other words any VR starting from this index is mapped to memory.
-       */
-      size_t GetEndMappedIn() { return end_mapped_in_; }
-      bool HasArgumentsOnStack() { return has_arguments_on_stack_; }
-      RegStorage GetReg(size_t in_position);
-      ShortyArg GetShorty(size_t in_position);
-      bool IsInitialized() { return initialized_; }
-     private:
-      static constexpr char kInvalidShorty = '-';
-      ArenaVector<std::pair<ShortyArg, RegStorage>> mapping_;
-      size_t end_mapped_in_;
-      bool has_arguments_on_stack_;
-      bool initialized_;
-    };
-
-  // Cached mapping of method input to reg storage according to ABI.
-  InToRegStorageMapping in_to_reg_storage_mapping_;
-  virtual InToRegStorageMapper* GetResetedInToRegStorageMapper() = 0;
-
-  private:
-    static bool SizeMatchesTypeForEntrypoint(OpSize size, Primitive::Type type);
-
-    friend class QuickCFITest;
-};  // Class Mir2Lir
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_MIR_TO_LIR_H_
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
deleted file mode 100644
index 18c2e55..0000000
--- a/compiler/dex/quick/quick_cfi_test.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vector>
-#include <memory>
-
-#include "arch/instruction_set.h"
-#include "arch/instruction_set_features.h"
-#include "cfi_test.h"
-#include "dex/compiler_ir.h"
-#include "dex/mir_graph.h"
-#include "dex/pass_manager.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "dex/quick/quick_compiler.h"
-#include "dex/quick/mir_to_lir.h"
-#include "dex/verification_results.h"
-#include "driver/compiler_driver.h"
-#include "driver/compiler_options.h"
-#include "gtest/gtest.h"
-
-#include "dex/quick/quick_cfi_test_expected.inc"
-
-namespace art {
-
-// Run the tests only on host.
-#ifndef __ANDROID__
-
-class QuickCFITest : public CFITest {
- public:
-  // Enable this flag to generate the expected outputs.
-  static constexpr bool kGenerateExpected = false;
-
-  void TestImpl(InstructionSet isa, const char* isa_str,
-                const std::vector<uint8_t>& expected_asm,
-                const std::vector<uint8_t>& expected_cfi) {
-    // Setup simple compiler context.
-    ArenaPool pool;
-    ArenaAllocator arena(&pool);
-    CompilerOptions compiler_options(
-      CompilerOptions::kDefaultCompilerFilter,
-      CompilerOptions::kDefaultHugeMethodThreshold,
-      CompilerOptions::kDefaultLargeMethodThreshold,
-      CompilerOptions::kDefaultSmallMethodThreshold,
-      CompilerOptions::kDefaultTinyMethodThreshold,
-      CompilerOptions::kDefaultNumDexMethodsThreshold,
-      CompilerOptions::kDefaultInlineDepthLimit,
-      CompilerOptions::kDefaultInlineMaxCodeUnits,
-      false,
-      CompilerOptions::kDefaultTopKProfileThreshold,
-      false,
-      true,  // generate_debug_info.
-      false,
-      false,
-      false,
-      false,
-      nullptr,
-      new PassManagerOptions(),
-      nullptr,
-      false);
-    VerificationResults verification_results(&compiler_options);
-    DexFileToMethodInlinerMap method_inliner_map;
-    std::unique_ptr<const InstructionSetFeatures> isa_features;
-    std::string error;
-    isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
-    CompilerDriver driver(&compiler_options, &verification_results, &method_inliner_map,
-                          Compiler::kQuick, isa, isa_features.get(),
-                          false, nullptr, nullptr, nullptr, 0, false, false, "", false, 0, -1, "");
-    ClassLinker* linker = nullptr;
-    CompilationUnit cu(&pool, isa, &driver, linker);
-    DexFile::CodeItem code_item { 0, 0, 0, 0, 0, 0, { 0 } };  // NOLINT
-    cu.mir_graph.reset(new MIRGraph(&cu, &arena));
-    cu.mir_graph->current_code_item_ = &code_item;
-
-    // Generate empty method with some spills.
-    std::unique_ptr<Mir2Lir> m2l(QuickCompiler::GetCodeGenerator(&cu, nullptr));
-    m2l->frame_size_ = 64u;
-    m2l->CompilerInitializeRegAlloc();
-    for (const auto& info : m2l->reg_pool_->core_regs_) {
-      if (m2l->num_core_spills_ < 2 && !info->IsTemp() && !info->InUse()) {
-        m2l->core_spill_mask_ |= 1 << info->GetReg().GetRegNum();
-        m2l->num_core_spills_++;
-      }
-    }
-    for (const auto& info : m2l->reg_pool_->sp_regs_) {
-      if (m2l->num_fp_spills_ < 2 && !info->IsTemp() && !info->InUse()) {
-        m2l->fp_spill_mask_ |= 1 << info->GetReg().GetRegNum();
-        m2l->num_fp_spills_++;
-      }
-    }
-    m2l->AdjustSpillMask();
-    m2l->GenEntrySequence(nullptr, m2l->GetCompilationUnit()->target64 ?
-        m2l->LocCReturnWide() : m2l->LocCReturnRef());
-    m2l->GenExitSequence();
-    m2l->HandleSlowPaths();
-    m2l->AssembleLIR();
-    std::vector<uint8_t> actual_asm(m2l->code_buffer_.begin(), m2l->code_buffer_.end());
-    auto const& cfi_data = m2l->cfi().Patch(actual_asm.size());
-    std::vector<uint8_t> actual_cfi(cfi_data->begin(), cfi_data->end());
-    EXPECT_EQ(m2l->cfi().GetCurrentPC(), static_cast<int>(actual_asm.size()));
-
-    if (kGenerateExpected) {
-      GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
-    } else {
-      EXPECT_EQ(expected_asm, actual_asm);
-      EXPECT_EQ(expected_cfi, actual_cfi);
-    }
-  }
-};
-
-#define TEST_ISA(isa) \
-  TEST_F(QuickCFITest, isa) { \
-    std::vector<uint8_t> expected_asm(expected_asm_##isa, \
-        expected_asm_##isa + arraysize(expected_asm_##isa)); \
-    std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
-        expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
-    TestImpl(isa, #isa, expected_asm, expected_cfi); \
-  }
-
-TEST_ISA(kThumb2)
-TEST_ISA(kArm64)
-TEST_ISA(kX86)
-TEST_ISA(kX86_64)
-TEST_ISA(kMips)
-TEST_ISA(kMips64)
-
-#endif  // __ANDROID__
-
-}  // namespace art
diff --git a/compiler/dex/quick/quick_cfi_test_expected.inc b/compiler/dex/quick/quick_cfi_test_expected.inc
deleted file mode 100644
index 3032697..0000000
--- a/compiler/dex/quick/quick_cfi_test_expected.inc
+++ /dev/null
@@ -1,215 +0,0 @@
-static constexpr uint8_t expected_asm_kThumb2[] = {
-    0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0,
-    0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD, 0x00, 0x00,
-};
-static constexpr uint8_t expected_cfi_kThumb2[] = {
-    0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
-    0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42,
-    0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x44, 0x0B, 0x0E,
-    0x40,
-};
-// 0x00000000: push {r5, r6, lr}
-// 0x00000002: .cfi_def_cfa_offset: 12
-// 0x00000002: .cfi_offset: r5 at cfa-12
-// 0x00000002: .cfi_offset: r6 at cfa-8
-// 0x00000002: .cfi_offset: r14 at cfa-4
-// 0x00000002: vpush.f32 {s16-s17}
-// 0x00000006: .cfi_def_cfa_offset: 20
-// 0x00000006: .cfi_offset_extended: r80 at cfa-20
-// 0x00000006: .cfi_offset_extended: r81 at cfa-16
-// 0x00000006: sub sp, sp, #44
-// 0x00000008: .cfi_def_cfa_offset: 64
-// 0x00000008: str r0, [sp, #0]
-// 0x0000000a: .cfi_remember_state
-// 0x0000000a: add sp, sp, #44
-// 0x0000000c: .cfi_def_cfa_offset: 20
-// 0x0000000c: vpop.f32 {s16-s17}
-// 0x00000010: .cfi_def_cfa_offset: 12
-// 0x00000010: .cfi_restore_extended: r80
-// 0x00000010: .cfi_restore_extended: r81
-// 0x00000010: pop {r5, r6, pc}
-// 0x00000012: lsls r0, r0, #0
-// 0x00000014: .cfi_restore_state
-// 0x00000014: .cfi_def_cfa_offset: 64
-
-static constexpr uint8_t expected_asm_kArm64[] = {
-    0xFF, 0x03, 0x01, 0xD1, 0xE8, 0xA7, 0x01, 0x6D, 0xF4, 0xD7, 0x02, 0xA9,
-    0xFE, 0x1F, 0x00, 0xF9, 0xE0, 0x03, 0x00, 0xF9, 0xE8, 0xA7, 0x41, 0x6D,
-    0xF4, 0xD7, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91,
-    0xC0, 0x03, 0x5F, 0xD6,
-};
-static constexpr uint8_t expected_cfi_kArm64[] = {
-    0x44, 0x0E, 0x40, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x44, 0x94,
-    0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x0A, 0x44, 0x06, 0x48, 0x06,
-    0x49, 0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E,
-    0x40,
-};
-// 0x00000000: sub sp, sp, #0x40 (64)
-// 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: stp d8, d9, [sp, #24]
-// 0x00000008: .cfi_offset_extended: r72 at cfa-40
-// 0x00000008: .cfi_offset_extended: r73 at cfa-32
-// 0x00000008: stp x20, x21, [sp, #40]
-// 0x0000000c: .cfi_offset: r20 at cfa-24
-// 0x0000000c: .cfi_offset: r21 at cfa-16
-// 0x0000000c: str lr, [sp, #56]
-// 0x00000010: .cfi_offset: r30 at cfa-8
-// 0x00000010: str x0, [sp]
-// 0x00000014: .cfi_remember_state
-// 0x00000014: ldp d8, d9, [sp, #24]
-// 0x00000018: .cfi_restore_extended: r72
-// 0x00000018: .cfi_restore_extended: r73
-// 0x00000018: ldp x20, x21, [sp, #40]
-// 0x0000001c: .cfi_restore: r20
-// 0x0000001c: .cfi_restore: r21
-// 0x0000001c: ldr lr, [sp, #56]
-// 0x00000020: .cfi_restore: r30
-// 0x00000020: add sp, sp, #0x40 (64)
-// 0x00000024: .cfi_def_cfa_offset: 0
-// 0x00000024: ret
-// 0x00000028: .cfi_restore_state
-// 0x00000028: .cfi_def_cfa_offset: 64
-
-static constexpr uint8_t expected_asm_kX86[] = {
-    0x83, 0xEC, 0x3C, 0x89, 0x6C, 0x24, 0x34, 0x89, 0x74, 0x24, 0x38, 0x89,
-    0x04, 0x24, 0x8B, 0x6C, 0x24, 0x34, 0x8B, 0x74, 0x24, 0x38, 0x83, 0xC4,
-    0x3C, 0xC3, 0x00, 0x00,
-};
-static constexpr uint8_t expected_cfi_kX86[] = {
-    0x43, 0x0E, 0x40, 0x44, 0x85, 0x03, 0x44, 0x86, 0x02, 0x43, 0x0A, 0x44,
-    0xC5, 0x44, 0xC6, 0x43, 0x0E, 0x04, 0x43, 0x0B, 0x0E, 0x40,
-};
-// 0x00000000: sub esp, 60
-// 0x00000003: .cfi_def_cfa_offset: 64
-// 0x00000003: mov [esp + 52], ebp
-// 0x00000007: .cfi_offset: r5 at cfa-12
-// 0x00000007: mov [esp + 56], esi
-// 0x0000000b: .cfi_offset: r6 at cfa-8
-// 0x0000000b: mov [esp], eax
-// 0x0000000e: .cfi_remember_state
-// 0x0000000e: mov ebp, [esp + 52]
-// 0x00000012: .cfi_restore: r5
-// 0x00000012: mov esi, [esp + 56]
-// 0x00000016: .cfi_restore: r6
-// 0x00000016: add esp, 60
-// 0x00000019: .cfi_def_cfa_offset: 4
-// 0x00000019: ret
-// 0x0000001a: addb [eax], al
-// 0x0000001c: .cfi_restore_state
-// 0x0000001c: .cfi_def_cfa_offset: 64
-
-static constexpr uint8_t expected_asm_kX86_64[] = {
-    0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0x5C, 0x24, 0x28, 0x48, 0x89, 0x6C,
-    0x24, 0x30, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F,
-    0x11, 0x6C, 0x24, 0x20, 0x48, 0x8B, 0xC7, 0x48, 0x89, 0x3C, 0x24, 0x48,
-    0x8B, 0x5C, 0x24, 0x28, 0x48, 0x8B, 0x6C, 0x24, 0x30, 0xF2, 0x44, 0x0F,
-    0x10, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24, 0x20, 0x48,
-    0x83, 0xC4, 0x38, 0xC3,
-};
-static constexpr uint8_t expected_cfi_kX86_64[] = {
-    0x44, 0x0E, 0x40, 0x45, 0x83, 0x06, 0x45, 0x86, 0x04, 0x47, 0x9D, 0x0A,
-    0x47, 0x9E, 0x08, 0x47, 0x0A, 0x45, 0xC3, 0x45, 0xC6, 0x47, 0xDD, 0x47,
-    0xDE, 0x44, 0x0E, 0x08, 0x41, 0x0B, 0x0E, 0x40,
-};
-// 0x00000000: subq rsp, 56
-// 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: movq [rsp + 40], rbx
-// 0x00000009: .cfi_offset: r3 at cfa-24
-// 0x00000009: movq [rsp + 48], rbp
-// 0x0000000e: .cfi_offset: r6 at cfa-16
-// 0x0000000e: movsd [rsp + 24], xmm12
-// 0x00000015: .cfi_offset: r29 at cfa-40
-// 0x00000015: movsd [rsp + 32], xmm13
-// 0x0000001c: .cfi_offset: r30 at cfa-32
-// 0x0000001c: movq rax, rdi
-// 0x0000001f: movq [rsp], rdi
-// 0x00000023: .cfi_remember_state
-// 0x00000023: movq rbx, [rsp + 40]
-// 0x00000028: .cfi_restore: r3
-// 0x00000028: movq rbp, [rsp + 48]
-// 0x0000002d: .cfi_restore: r6
-// 0x0000002d: movsd xmm12, [rsp + 24]
-// 0x00000034: .cfi_restore: r29
-// 0x00000034: movsd xmm13, [rsp + 32]
-// 0x0000003b: .cfi_restore: r30
-// 0x0000003b: addq rsp, 56
-// 0x0000003f: .cfi_def_cfa_offset: 8
-// 0x0000003f: ret
-// 0x00000040: .cfi_restore_state
-// 0x00000040: .cfi_def_cfa_offset: 64
-
-static constexpr uint8_t expected_asm_kMips[] = {
-    0xF4, 0xFF, 0xBD, 0x27, 0x08, 0x00, 0xB2, 0xAF, 0x04, 0x00, 0xB3, 0xAF,
-    0x00, 0x00, 0xBF, 0xAF, 0xCC, 0xFF, 0xBD, 0x27, 0x25, 0x10, 0x80, 0x00,
-    0x00, 0x00, 0xA4, 0xAF, 0x3C, 0x00, 0xB2, 0x8F, 0x38, 0x00, 0xB3, 0x8F,
-    0x34, 0x00, 0xBF, 0x8F, 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03,
-    0x00, 0x00, 0x00, 0x00,
-};
-static constexpr uint8_t expected_cfi_kMips[] = {
-    0x44, 0x0E, 0x0C, 0x44, 0x92, 0x01, 0x44, 0x93, 0x02, 0x44, 0x9F, 0x03,
-    0x44, 0x0E, 0x40, 0x48, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xDF, 0x44,
-    0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
-};
-// 0x00000000: addiu r29, r29, -12
-// 0x00000004: .cfi_def_cfa_offset: 12
-// 0x00000004: sw r18, +8(r29)
-// 0x00000008: .cfi_offset: r18 at cfa-4
-// 0x00000008: sw r19, +4(r29)
-// 0x0000000c: .cfi_offset: r19 at cfa-8
-// 0x0000000c: sw r31, +0(r29)
-// 0x00000010: .cfi_offset: r31 at cfa-12
-// 0x00000010: addiu r29, r29, -52
-// 0x00000014: .cfi_def_cfa_offset: 64
-// 0x00000014: or r2, r4, r0
-// 0x00000018: sw r4, +0(r29)
-// 0x0000001c: .cfi_remember_state
-// 0x0000001c: lw r18, +60(r29)
-// 0x00000020: .cfi_restore: r18
-// 0x00000020: lw r19, +56(r29)
-// 0x00000024: .cfi_restore: r19
-// 0x00000024: lw r31, +52(r29)
-// 0x00000028: .cfi_restore: r31
-// 0x00000028: addiu r29, r29, 64
-// 0x0000002c: .cfi_def_cfa_offset: 0
-// 0x0000002c: jr r31
-// 0x00000030: nop
-// 0x00000034: .cfi_restore_state
-// 0x00000034: .cfi_def_cfa_offset: 64
-
-static constexpr uint8_t expected_asm_kMips64[] = {
-    0xE8, 0xFF, 0xBD, 0x67, 0x10, 0x00, 0xB2, 0xFF, 0x08, 0x00, 0xB3, 0xFF,
-    0x00, 0x00, 0xBF, 0xFF, 0xD8, 0xFF, 0xBD, 0x67, 0x25, 0x10, 0x80, 0x00,
-    0x00, 0x00, 0xA4, 0xFF, 0x38, 0x00, 0xB2, 0xDF, 0x30, 0x00, 0xB3, 0xDF,
-    0x28, 0x00, 0xBF, 0xDF, 0x40, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03,
-    0x00, 0x00, 0x00, 0x00,
-};
-static constexpr uint8_t expected_cfi_kMips64[] = {
-    0x44, 0x0E, 0x18, 0x44, 0x92, 0x02, 0x44, 0x93, 0x04, 0x44, 0x9F, 0x06,
-    0x44, 0x0E, 0x40, 0x48, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xDF, 0x44,
-    0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
-};
-// 0x00000000: daddiu r29, r29, -24
-// 0x00000004: .cfi_def_cfa_offset: 24
-// 0x00000004: sd r18, +16(r29)
-// 0x00000008: .cfi_offset: r18 at cfa-8
-// 0x00000008: sd r19, +8(r29)
-// 0x0000000c: .cfi_offset: r19 at cfa-16
-// 0x0000000c: sd r31, +0(r29)
-// 0x00000010: .cfi_offset: r31 at cfa-24
-// 0x00000010: daddiu r29, r29, -40
-// 0x00000014: .cfi_def_cfa_offset: 64
-// 0x00000014: or r2, r4, r0
-// 0x00000018: sd r4, +0(r29)
-// 0x0000001c: .cfi_remember_state
-// 0x0000001c: ld r18, +56(r29)
-// 0x00000020: .cfi_restore: r18
-// 0x00000020: ld r19, +48(r29)
-// 0x00000024: .cfi_restore: r19
-// 0x00000024: ld r31, +40(r29)
-// 0x00000028: .cfi_restore: r31
-// 0x00000028: daddiu r29, r29, 64
-// 0x0000002c: .cfi_def_cfa_offset: 0
-// 0x0000002c: jr r31
-// 0x00000030: nop
-// 0x00000034: .cfi_restore_state
-// 0x00000034: .cfi_def_cfa_offset: 64
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
deleted file mode 100644
index 6673ea8..0000000
--- a/compiler/dex/quick/quick_compiler.cc
+++ /dev/null
@@ -1,938 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "quick_compiler.h"
-
-#include <cstdint>
-
-#include "art_method-inl.h"
-#include "base/dumpable.h"
-#include "base/logging.h"
-#include "base/macros.h"
-#include "base/timing_logger.h"
-#include "compiler.h"
-#include "dex_file-inl.h"
-#include "dex_file_to_method_inliner_map.h"
-#include "dex/compiler_ir.h"
-#include "dex/dex_flags.h"
-#include "dex/mir_graph.h"
-#include "dex/pass_driver_me_opts.h"
-#include "dex/pass_driver_me_post_opt.h"
-#include "dex/pass_manager.h"
-#include "dex/quick/mir_to_lir.h"
-#include "dex/verified_method.h"
-#include "driver/compiler_driver.h"
-#include "driver/compiler_options.h"
-#include "elf_writer_quick.h"
-#include "experimental_flags.h"
-#include "jni/quick/jni_compiler.h"
-#include "mir_to_lir.h"
-#include "mirror/object.h"
-#include "runtime.h"
-
-// Specific compiler backends.
-#ifdef ART_ENABLE_CODEGEN_arm
-#include "dex/quick/arm/backend_arm.h"
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_arm64
-#include "dex/quick/arm64/backend_arm64.h"
-#endif
-
-#if defined(ART_ENABLE_CODEGEN_mips) || defined(ART_ENABLE_CODEGEN_mips64)
-#include "dex/quick/mips/backend_mips.h"
-#endif
-
-#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
-#include "dex/quick/x86/backend_x86.h"
-#endif
-
-namespace art {
-
-static_assert(0U == static_cast<size_t>(kNone),   "kNone not 0");
-static_assert(1U == static_cast<size_t>(kArm),    "kArm not 1");
-static_assert(2U == static_cast<size_t>(kArm64),  "kArm64 not 2");
-static_assert(3U == static_cast<size_t>(kThumb2), "kThumb2 not 3");
-static_assert(4U == static_cast<size_t>(kX86),    "kX86 not 4");
-static_assert(5U == static_cast<size_t>(kX86_64), "kX86_64 not 5");
-static_assert(6U == static_cast<size_t>(kMips),   "kMips not 6");
-static_assert(7U == static_cast<size_t>(kMips64), "kMips64 not 7");
-
-// Additional disabled optimizations (over generally disabled) per instruction set.
-static constexpr uint32_t kDisabledOptimizationsPerISA[] = {
-    // 0 = kNone.
-    ~0U,
-    // 1 = kArm, unused (will use kThumb2).
-    ~0U,
-    // 2 = kArm64.
-    0,
-    // 3 = kThumb2.
-    0,
-    // 4 = kX86.
-    (1 << kLoadStoreElimination) |
-    0,
-    // 5 = kX86_64.
-    (1 << kLoadStoreElimination) |
-    0,
-    // 6 = kMips.
-    (1 << kLoadStoreElimination) |
-    (1 << kLoadHoisting) |
-    (1 << kSuppressLoads) |
-    (1 << kNullCheckElimination) |
-    (1 << kPromoteRegs) |
-    (1 << kTrackLiveTemps) |
-    (1 << kSafeOptimizations) |
-    (1 << kBBOpt) |
-    (1 << kMatch) |
-    (1 << kPromoteCompilerTemps) |
-    0,
-    // 7 = kMips64.
-    (1 << kLoadStoreElimination) |
-    (1 << kLoadHoisting) |
-    (1 << kSuppressLoads) |
-    (1 << kNullCheckElimination) |
-    (1 << kPromoteRegs) |
-    (1 << kTrackLiveTemps) |
-    (1 << kSafeOptimizations) |
-    (1 << kBBOpt) |
-    (1 << kMatch) |
-    (1 << kPromoteCompilerTemps) |
-    0
-};
-static_assert(sizeof(kDisabledOptimizationsPerISA) == 8 * sizeof(uint32_t),
-              "kDisabledOpts unexpected");
-
-// Supported shorty types per instruction set. null means that all are available.
-// Z : boolean
-// B : byte
-// S : short
-// C : char
-// I : int
-// J : long
-// F : float
-// D : double
-// L : reference(object, array)
-// V : void
-static const char* kSupportedTypes[] = {
-    // 0 = kNone.
-    "",
-    // 1 = kArm, unused (will use kThumb2).
-    "",
-    // 2 = kArm64.
-    nullptr,
-    // 3 = kThumb2.
-    nullptr,
-    // 4 = kX86.
-    nullptr,
-    // 5 = kX86_64.
-    nullptr,
-    // 6 = kMips.
-    nullptr,
-    // 7 = kMips64.
-    nullptr
-};
-static_assert(sizeof(kSupportedTypes) == 8 * sizeof(char*), "kSupportedTypes unexpected");
-
-static int kAllOpcodes[] = {
-    Instruction::NOP,
-    Instruction::MOVE,
-    Instruction::MOVE_FROM16,
-    Instruction::MOVE_16,
-    Instruction::MOVE_WIDE,
-    Instruction::MOVE_WIDE_FROM16,
-    Instruction::MOVE_WIDE_16,
-    Instruction::MOVE_OBJECT,
-    Instruction::MOVE_OBJECT_FROM16,
-    Instruction::MOVE_OBJECT_16,
-    Instruction::MOVE_RESULT,
-    Instruction::MOVE_RESULT_WIDE,
-    Instruction::MOVE_RESULT_OBJECT,
-    Instruction::MOVE_EXCEPTION,
-    Instruction::RETURN_VOID,
-    Instruction::RETURN,
-    Instruction::RETURN_WIDE,
-    Instruction::RETURN_OBJECT,
-    Instruction::CONST_4,
-    Instruction::CONST_16,
-    Instruction::CONST,
-    Instruction::CONST_HIGH16,
-    Instruction::CONST_WIDE_16,
-    Instruction::CONST_WIDE_32,
-    Instruction::CONST_WIDE,
-    Instruction::CONST_WIDE_HIGH16,
-    Instruction::CONST_STRING,
-    Instruction::CONST_STRING_JUMBO,
-    Instruction::CONST_CLASS,
-    Instruction::MONITOR_ENTER,
-    Instruction::MONITOR_EXIT,
-    Instruction::CHECK_CAST,
-    Instruction::INSTANCE_OF,
-    Instruction::ARRAY_LENGTH,
-    Instruction::NEW_INSTANCE,
-    Instruction::NEW_ARRAY,
-    Instruction::FILLED_NEW_ARRAY,
-    Instruction::FILLED_NEW_ARRAY_RANGE,
-    Instruction::FILL_ARRAY_DATA,
-    Instruction::THROW,
-    Instruction::GOTO,
-    Instruction::GOTO_16,
-    Instruction::GOTO_32,
-    Instruction::PACKED_SWITCH,
-    Instruction::SPARSE_SWITCH,
-    Instruction::CMPL_FLOAT,
-    Instruction::CMPG_FLOAT,
-    Instruction::CMPL_DOUBLE,
-    Instruction::CMPG_DOUBLE,
-    Instruction::CMP_LONG,
-    Instruction::IF_EQ,
-    Instruction::IF_NE,
-    Instruction::IF_LT,
-    Instruction::IF_GE,
-    Instruction::IF_GT,
-    Instruction::IF_LE,
-    Instruction::IF_EQZ,
-    Instruction::IF_NEZ,
-    Instruction::IF_LTZ,
-    Instruction::IF_GEZ,
-    Instruction::IF_GTZ,
-    Instruction::IF_LEZ,
-    Instruction::UNUSED_3E,
-    Instruction::UNUSED_3F,
-    Instruction::UNUSED_40,
-    Instruction::UNUSED_41,
-    Instruction::UNUSED_42,
-    Instruction::UNUSED_43,
-    Instruction::AGET,
-    Instruction::AGET_WIDE,
-    Instruction::AGET_OBJECT,
-    Instruction::AGET_BOOLEAN,
-    Instruction::AGET_BYTE,
-    Instruction::AGET_CHAR,
-    Instruction::AGET_SHORT,
-    Instruction::APUT,
-    Instruction::APUT_WIDE,
-    Instruction::APUT_OBJECT,
-    Instruction::APUT_BOOLEAN,
-    Instruction::APUT_BYTE,
-    Instruction::APUT_CHAR,
-    Instruction::APUT_SHORT,
-    Instruction::IGET,
-    Instruction::IGET_WIDE,
-    Instruction::IGET_OBJECT,
-    Instruction::IGET_BOOLEAN,
-    Instruction::IGET_BYTE,
-    Instruction::IGET_CHAR,
-    Instruction::IGET_SHORT,
-    Instruction::IPUT,
-    Instruction::IPUT_WIDE,
-    Instruction::IPUT_OBJECT,
-    Instruction::IPUT_BOOLEAN,
-    Instruction::IPUT_BYTE,
-    Instruction::IPUT_CHAR,
-    Instruction::IPUT_SHORT,
-    Instruction::SGET,
-    Instruction::SGET_WIDE,
-    Instruction::SGET_OBJECT,
-    Instruction::SGET_BOOLEAN,
-    Instruction::SGET_BYTE,
-    Instruction::SGET_CHAR,
-    Instruction::SGET_SHORT,
-    Instruction::SPUT,
-    Instruction::SPUT_WIDE,
-    Instruction::SPUT_OBJECT,
-    Instruction::SPUT_BOOLEAN,
-    Instruction::SPUT_BYTE,
-    Instruction::SPUT_CHAR,
-    Instruction::SPUT_SHORT,
-    Instruction::INVOKE_VIRTUAL,
-    Instruction::INVOKE_SUPER,
-    Instruction::INVOKE_DIRECT,
-    Instruction::INVOKE_STATIC,
-    Instruction::INVOKE_INTERFACE,
-    Instruction::RETURN_VOID_NO_BARRIER,
-    Instruction::INVOKE_VIRTUAL_RANGE,
-    Instruction::INVOKE_SUPER_RANGE,
-    Instruction::INVOKE_DIRECT_RANGE,
-    Instruction::INVOKE_STATIC_RANGE,
-    Instruction::INVOKE_INTERFACE_RANGE,
-    Instruction::UNUSED_79,
-    Instruction::UNUSED_7A,
-    Instruction::NEG_INT,
-    Instruction::NOT_INT,
-    Instruction::NEG_LONG,
-    Instruction::NOT_LONG,
-    Instruction::NEG_FLOAT,
-    Instruction::NEG_DOUBLE,
-    Instruction::INT_TO_LONG,
-    Instruction::INT_TO_FLOAT,
-    Instruction::INT_TO_DOUBLE,
-    Instruction::LONG_TO_INT,
-    Instruction::LONG_TO_FLOAT,
-    Instruction::LONG_TO_DOUBLE,
-    Instruction::FLOAT_TO_INT,
-    Instruction::FLOAT_TO_LONG,
-    Instruction::FLOAT_TO_DOUBLE,
-    Instruction::DOUBLE_TO_INT,
-    Instruction::DOUBLE_TO_LONG,
-    Instruction::DOUBLE_TO_FLOAT,
-    Instruction::INT_TO_BYTE,
-    Instruction::INT_TO_CHAR,
-    Instruction::INT_TO_SHORT,
-    Instruction::ADD_INT,
-    Instruction::SUB_INT,
-    Instruction::MUL_INT,
-    Instruction::DIV_INT,
-    Instruction::REM_INT,
-    Instruction::AND_INT,
-    Instruction::OR_INT,
-    Instruction::XOR_INT,
-    Instruction::SHL_INT,
-    Instruction::SHR_INT,
-    Instruction::USHR_INT,
-    Instruction::ADD_LONG,
-    Instruction::SUB_LONG,
-    Instruction::MUL_LONG,
-    Instruction::DIV_LONG,
-    Instruction::REM_LONG,
-    Instruction::AND_LONG,
-    Instruction::OR_LONG,
-    Instruction::XOR_LONG,
-    Instruction::SHL_LONG,
-    Instruction::SHR_LONG,
-    Instruction::USHR_LONG,
-    Instruction::ADD_FLOAT,
-    Instruction::SUB_FLOAT,
-    Instruction::MUL_FLOAT,
-    Instruction::DIV_FLOAT,
-    Instruction::REM_FLOAT,
-    Instruction::ADD_DOUBLE,
-    Instruction::SUB_DOUBLE,
-    Instruction::MUL_DOUBLE,
-    Instruction::DIV_DOUBLE,
-    Instruction::REM_DOUBLE,
-    Instruction::ADD_INT_2ADDR,
-    Instruction::SUB_INT_2ADDR,
-    Instruction::MUL_INT_2ADDR,
-    Instruction::DIV_INT_2ADDR,
-    Instruction::REM_INT_2ADDR,
-    Instruction::AND_INT_2ADDR,
-    Instruction::OR_INT_2ADDR,
-    Instruction::XOR_INT_2ADDR,
-    Instruction::SHL_INT_2ADDR,
-    Instruction::SHR_INT_2ADDR,
-    Instruction::USHR_INT_2ADDR,
-    Instruction::ADD_LONG_2ADDR,
-    Instruction::SUB_LONG_2ADDR,
-    Instruction::MUL_LONG_2ADDR,
-    Instruction::DIV_LONG_2ADDR,
-    Instruction::REM_LONG_2ADDR,
-    Instruction::AND_LONG_2ADDR,
-    Instruction::OR_LONG_2ADDR,
-    Instruction::XOR_LONG_2ADDR,
-    Instruction::SHL_LONG_2ADDR,
-    Instruction::SHR_LONG_2ADDR,
-    Instruction::USHR_LONG_2ADDR,
-    Instruction::ADD_FLOAT_2ADDR,
-    Instruction::SUB_FLOAT_2ADDR,
-    Instruction::MUL_FLOAT_2ADDR,
-    Instruction::DIV_FLOAT_2ADDR,
-    Instruction::REM_FLOAT_2ADDR,
-    Instruction::ADD_DOUBLE_2ADDR,
-    Instruction::SUB_DOUBLE_2ADDR,
-    Instruction::MUL_DOUBLE_2ADDR,
-    Instruction::DIV_DOUBLE_2ADDR,
-    Instruction::REM_DOUBLE_2ADDR,
-    Instruction::ADD_INT_LIT16,
-    Instruction::RSUB_INT,
-    Instruction::MUL_INT_LIT16,
-    Instruction::DIV_INT_LIT16,
-    Instruction::REM_INT_LIT16,
-    Instruction::AND_INT_LIT16,
-    Instruction::OR_INT_LIT16,
-    Instruction::XOR_INT_LIT16,
-    Instruction::ADD_INT_LIT8,
-    Instruction::RSUB_INT_LIT8,
-    Instruction::MUL_INT_LIT8,
-    Instruction::DIV_INT_LIT8,
-    Instruction::REM_INT_LIT8,
-    Instruction::AND_INT_LIT8,
-    Instruction::OR_INT_LIT8,
-    Instruction::XOR_INT_LIT8,
-    Instruction::SHL_INT_LIT8,
-    Instruction::SHR_INT_LIT8,
-    Instruction::USHR_INT_LIT8,
-    Instruction::IGET_QUICK,
-    Instruction::IGET_WIDE_QUICK,
-    Instruction::IGET_OBJECT_QUICK,
-    Instruction::IPUT_QUICK,
-    Instruction::IPUT_WIDE_QUICK,
-    Instruction::IPUT_OBJECT_QUICK,
-    Instruction::INVOKE_VIRTUAL_QUICK,
-    Instruction::INVOKE_VIRTUAL_RANGE_QUICK,
-    Instruction::IPUT_BOOLEAN_QUICK,
-    Instruction::IPUT_BYTE_QUICK,
-    Instruction::IPUT_CHAR_QUICK,
-    Instruction::IPUT_SHORT_QUICK,
-    Instruction::IGET_BOOLEAN_QUICK,
-    Instruction::IGET_BYTE_QUICK,
-    Instruction::IGET_CHAR_QUICK,
-    Instruction::IGET_SHORT_QUICK,
-    Instruction::INVOKE_LAMBDA,
-    Instruction::UNUSED_F4,
-    Instruction::CAPTURE_VARIABLE,
-    Instruction::CREATE_LAMBDA,
-    Instruction::LIBERATE_VARIABLE,
-    Instruction::BOX_LAMBDA,
-    Instruction::UNBOX_LAMBDA,
-    Instruction::UNUSED_FA,
-    Instruction::UNUSED_FB,
-    Instruction::UNUSED_FC,
-    Instruction::UNUSED_FD,
-    Instruction::UNUSED_FE,
-    Instruction::UNUSED_FF,
-    // ----- ExtendedMIROpcode -----
-    kMirOpPhi,
-    kMirOpCopy,
-    kMirOpFusedCmplFloat,
-    kMirOpFusedCmpgFloat,
-    kMirOpFusedCmplDouble,
-    kMirOpFusedCmpgDouble,
-    kMirOpFusedCmpLong,
-    kMirOpNop,
-    kMirOpNullCheck,
-    kMirOpRangeCheck,
-    kMirOpDivZeroCheck,
-    kMirOpCheck,
-    kMirOpSelect,
-};
-
-static int kInvokeOpcodes[] = {
-    Instruction::INVOKE_VIRTUAL,
-    Instruction::INVOKE_SUPER,
-    Instruction::INVOKE_DIRECT,
-    Instruction::INVOKE_STATIC,
-    Instruction::INVOKE_INTERFACE,
-    Instruction::INVOKE_VIRTUAL_RANGE,
-    Instruction::INVOKE_SUPER_RANGE,
-    Instruction::INVOKE_DIRECT_RANGE,
-    Instruction::INVOKE_STATIC_RANGE,
-    Instruction::INVOKE_INTERFACE_RANGE,
-    Instruction::INVOKE_VIRTUAL_QUICK,
-    Instruction::INVOKE_VIRTUAL_RANGE_QUICK,
-};
-
-// TODO: Add support for lambda opcodes to the quick compiler.
-static const int kUnsupportedLambdaOpcodes[] = {
-    Instruction::INVOKE_LAMBDA,
-    Instruction::CREATE_LAMBDA,
-    Instruction::BOX_LAMBDA,
-    Instruction::UNBOX_LAMBDA,
-};
-
-// Unsupported opcodes. Null can be used when everything is supported. Size of the lists is
-// recorded below.
-static const int* kUnsupportedOpcodes[] = {
-    // 0 = kNone.
-    kAllOpcodes,
-    // 1 = kArm, unused (will use kThumb2).
-    kAllOpcodes,
-    // 2 = kArm64.
-    kUnsupportedLambdaOpcodes,
-    // 3 = kThumb2.
-    kUnsupportedLambdaOpcodes,
-    // 4 = kX86.
-    kUnsupportedLambdaOpcodes,
-    // 5 = kX86_64.
-    kUnsupportedLambdaOpcodes,
-    // 6 = kMips.
-    kUnsupportedLambdaOpcodes,
-    // 7 = kMips64.
-    kUnsupportedLambdaOpcodes,
-};
-static_assert(sizeof(kUnsupportedOpcodes) == 8 * sizeof(int*), "kUnsupportedOpcodes unexpected");
-
-// Size of the arrays stored above.
-static const size_t kUnsupportedOpcodesSize[] = {
-    // 0 = kNone.
-    arraysize(kAllOpcodes),
-    // 1 = kArm, unused (will use kThumb2).
-    arraysize(kAllOpcodes),
-    // 2 = kArm64.
-    arraysize(kUnsupportedLambdaOpcodes),
-    // 3 = kThumb2.
-    arraysize(kUnsupportedLambdaOpcodes),
-    // 4 = kX86.
-    arraysize(kUnsupportedLambdaOpcodes),
-    // 5 = kX86_64.
-    arraysize(kUnsupportedLambdaOpcodes),
-    // 6 = kMips.
-    arraysize(kUnsupportedLambdaOpcodes),
-    // 7 = kMips64.
-    arraysize(kUnsupportedLambdaOpcodes),
-};
-static_assert(sizeof(kUnsupportedOpcodesSize) == 8 * sizeof(size_t),
-              "kUnsupportedOpcodesSize unexpected");
-
-static bool IsUnsupportedExperimentalLambdasOnly(size_t i) {
-  DCHECK_LE(i, arraysize(kUnsupportedOpcodes));
-  return kUnsupportedOpcodes[i] == kUnsupportedLambdaOpcodes;
-}
-
-// The maximum amount of Dalvik register in a method for which we will start compiling. Tries to
-// avoid an abort when we need to manage more SSA registers than we can.
-static constexpr size_t kMaxAllowedDalvikRegisters = INT16_MAX / 2;
-
-static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) {
-  const char* supported_types = kSupportedTypes[instruction_set];
-  if (supported_types == nullptr) {
-    // Everything available.
-    return true;
-  }
-
-  uint32_t shorty_size = strlen(shorty);
-  CHECK_GE(shorty_size, 1u);
-
-  for (uint32_t i = 0; i < shorty_size; i++) {
-    if (strchr(supported_types, shorty[i]) == nullptr) {
-      return false;
-    }
-  }
-  return true;
-}
-
-// If the ISA has unsupported opcodes, should we skip scanning over them?
-//
-// Most of the time we're compiling non-experimental files, so scanning just slows
-// performance down by as much as 6% with 4 threads.
-// In the rare cases we compile experimental opcodes, the runtime has an option to enable it,
-// which will force scanning for any unsupported opcodes.
-static bool SkipScanningUnsupportedOpcodes(InstructionSet instruction_set) {
-  if (UNLIKELY(kUnsupportedOpcodesSize[instruction_set] == 0U)) {
-    // All opcodes are supported no matter what. Usually not the case
-    // since experimental opcodes are not implemented in the quick compiler.
-    return true;
-  } else if (LIKELY(!Runtime::Current()->
-                      AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas))) {
-    // Experimental opcodes are disabled.
-    //
-    // If all unsupported opcodes are experimental we don't need to do scanning.
-    return IsUnsupportedExperimentalLambdasOnly(instruction_set);
-  } else {
-    // Experimental opcodes are enabled.
-    //
-    // Do the opcode scanning if the ISA has any unsupported opcodes.
-    return false;
-  }
-}
-
-// Skip the method that we do not support currently.
-bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
-                                     CompilationUnit* cu) const {
-  // This is a limitation in mir_graph. See MirGraph::SetNumSSARegs.
-  if (cu->mir_graph->GetNumOfCodeAndTempVRs() > kMaxAllowedDalvikRegisters) {
-    VLOG(compiler) << "Too many dalvik registers : " << cu->mir_graph->GetNumOfCodeAndTempVRs();
-    return false;
-  }
-
-  // Check whether we do have limitations at all.
-  if (kSupportedTypes[cu->instruction_set] == nullptr &&
-      SkipScanningUnsupportedOpcodes(cu->instruction_set)) {
-    return true;
-  }
-
-  // Check if we can compile the prototype.
-  const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
-  if (!CanCompileShorty(shorty, cu->instruction_set)) {
-    VLOG(compiler) << "Unsupported shorty : " << shorty;
-    return false;
-  }
-
-  const int *unsupport_list = kUnsupportedOpcodes[cu->instruction_set];
-  int unsupport_list_size = kUnsupportedOpcodesSize[cu->instruction_set];
-
-  for (unsigned int idx = 0; idx < cu->mir_graph->GetNumBlocks(); idx++) {
-    BasicBlock* bb = cu->mir_graph->GetBasicBlock(idx);
-    if (bb == nullptr) continue;
-    if (bb->block_type == kDead) continue;
-    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      int opcode = mir->dalvikInsn.opcode;
-      // Check if we support the byte code.
-      if (std::find(unsupport_list, unsupport_list + unsupport_list_size, opcode)
-          != unsupport_list + unsupport_list_size) {
-        if (!MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
-          VLOG(compiler) << "Unsupported dalvik byte code : "
-              << mir->dalvikInsn.opcode;
-        } else {
-          VLOG(compiler) << "Unsupported extended MIR opcode : "
-              << MIRGraph::extended_mir_op_names_[opcode - kMirOpFirst];
-        }
-        return false;
-      }
-      // Check if it invokes a prototype that we cannot support.
-      if (std::find(kInvokeOpcodes, kInvokeOpcodes + arraysize(kInvokeOpcodes), opcode)
-          != kInvokeOpcodes + arraysize(kInvokeOpcodes)) {
-        uint32_t invoke_method_idx = mir->dalvikInsn.vB;
-        const char* invoke_method_shorty = dex_file.GetMethodShorty(
-            dex_file.GetMethodId(invoke_method_idx));
-        if (!CanCompileShorty(invoke_method_shorty, cu->instruction_set)) {
-          VLOG(compiler) << "Unsupported to invoke '"
-              << PrettyMethod(invoke_method_idx, dex_file)
-              << "' with shorty : " << invoke_method_shorty;
-          return false;
-        }
-      }
-    }
-  }
-  return true;
-}
-
-void QuickCompiler::InitCompilationUnit(CompilationUnit& cu) const {
-  // Disable optimizations according to instruction set.
-  cu.disable_opt |= kDisabledOptimizationsPerISA[cu.instruction_set];
-  if (Runtime::Current()->UseJit()) {
-    // Disable these optimizations for JIT until quickened byte codes are done being implemented.
-    // TODO: Find a cleaner way to do this.
-    cu.disable_opt |= 1u << kLocalValueNumbering;
-  }
-}
-
-void QuickCompiler::Init() {
-  CHECK(GetCompilerDriver()->GetCompilerContext() == nullptr);
-}
-
-void QuickCompiler::UnInit() const {
-  CHECK(GetCompilerDriver()->GetCompilerContext() == nullptr);
-}
-
-/* Default optimizer/debug setting for the compiler. */
-static uint32_t kCompilerOptimizerDisableFlags = 0 |  // Disable specific optimizations
-  // (1 << kLoadStoreElimination) |
-  // (1 << kLoadHoisting) |
-  // (1 << kSuppressLoads) |
-  // (1 << kNullCheckElimination) |
-  // (1 << kClassInitCheckElimination) |
-  // (1 << kGlobalValueNumbering) |
-  // (1 << kGvnDeadCodeElimination) |
-  // (1 << kLocalValueNumbering) |
-  // (1 << kPromoteRegs) |
-  // (1 << kTrackLiveTemps) |
-  // (1 << kSafeOptimizations) |
-  // (1 << kBBOpt) |
-  // (1 << kSuspendCheckElimination) |
-  // (1 << kMatch) |
-  // (1 << kPromoteCompilerTemps) |
-  // (1 << kSuppressExceptionEdges) |
-  // (1 << kSuppressMethodInlining) |
-  0;
-
-static uint32_t kCompilerDebugFlags = 0 |     // Enable debug/testing modes
-  // (1 << kDebugDisplayMissingTargets) |
-  // (1 << kDebugVerbose) |
-  // (1 << kDebugDumpCFG) |
-  // (1 << kDebugSlowFieldPath) |
-  // (1 << kDebugSlowInvokePath) |
-  // (1 << kDebugSlowStringPath) |
-  // (1 << kDebugSlowestFieldPath) |
-  // (1 << kDebugSlowestStringPath) |
-  // (1 << kDebugExerciseResolveMethod) |
-  // (1 << kDebugVerifyDataflow) |
-  // (1 << kDebugShowMemoryUsage) |
-  // (1 << kDebugShowNops) |
-  // (1 << kDebugCountOpcodes) |
-  // (1 << kDebugDumpCheckStats) |
-  // (1 << kDebugShowSummaryMemoryUsage) |
-  // (1 << kDebugShowFilterStats) |
-  // (1 << kDebugTimings) |
-  // (1 << kDebugCodegenDump) |
-  0;
-
-CompiledMethod* QuickCompiler::Compile(const DexFile::CodeItem* code_item,
-                                       uint32_t access_flags,
-                                       InvokeType invoke_type,
-                                       uint16_t class_def_idx,
-                                       uint32_t method_idx,
-                                       jobject class_loader,
-                                       const DexFile& dex_file,
-                                       Handle<mirror::DexCache> dex_cache) const {
-  if (kPoisonHeapReferences) {
-    VLOG(compiler) << "Skipping method : " << PrettyMethod(method_idx, dex_file)
-                   << "  Reason = Quick does not support heap poisoning.";
-    return nullptr;
-  }
-
-  // TODO: check method fingerprint here to determine appropriate backend type.  Until then, use
-  // build default.
-  CompilerDriver* driver = GetCompilerDriver();
-
-  VLOG(compiler) << "Compiling " << PrettyMethod(method_idx, dex_file) << "...";
-  if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) {
-    return nullptr;
-  }
-
-  DCHECK(driver->GetCompilerOptions().IsCompilationEnabled());
-  DCHECK(!driver->GetVerifiedMethod(&dex_file, method_idx)->HasRuntimeThrow());
-
-  Runtime* const runtime = Runtime::Current();
-  ClassLinker* const class_linker = runtime->GetClassLinker();
-  InstructionSet instruction_set = driver->GetInstructionSet();
-  if (instruction_set == kArm) {
-    instruction_set = kThumb2;
-  }
-  CompilationUnit cu(runtime->GetArenaPool(), instruction_set, driver, class_linker);
-  cu.dex_file = &dex_file;
-  cu.class_def_idx = class_def_idx;
-  cu.method_idx = method_idx;
-  cu.access_flags = access_flags;
-  cu.invoke_type = invoke_type;
-  cu.shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
-
-  CHECK((cu.instruction_set == kThumb2) ||
-        (cu.instruction_set == kArm64) ||
-        (cu.instruction_set == kX86) ||
-        (cu.instruction_set == kX86_64) ||
-        (cu.instruction_set == kMips) ||
-        (cu.instruction_set == kMips64));
-
-  // TODO: set this from command line
-  constexpr bool compiler_flip_match = false;
-  const std::string compiler_method_match = "";
-
-  bool use_match = !compiler_method_match.empty();
-  bool match = use_match && (compiler_flip_match ^
-      (PrettyMethod(method_idx, dex_file).find(compiler_method_match) != std::string::npos));
-  if (!use_match || match) {
-    cu.disable_opt = kCompilerOptimizerDisableFlags;
-    cu.enable_debug = kCompilerDebugFlags;
-    cu.verbose = VLOG_IS_ON(compiler) ||
-        (cu.enable_debug & (1 << kDebugVerbose));
-  }
-
-  if (driver->GetCompilerOptions().HasVerboseMethods()) {
-    cu.verbose = driver->GetCompilerOptions().IsVerboseMethod(PrettyMethod(method_idx, dex_file));
-  }
-
-  if (cu.verbose) {
-    cu.enable_debug |= (1 << kDebugCodegenDump);
-  }
-
-  /*
-   * TODO: rework handling of optimization and debug flags.  Should we split out
-   * MIR and backend flags?  Need command-line setting as well.
-   */
-
-  InitCompilationUnit(cu);
-
-  cu.StartTimingSplit("BuildMIRGraph");
-  cu.mir_graph.reset(new MIRGraph(&cu, &cu.arena));
-
-  /*
-   * After creation of the MIR graph, also create the code generator.
-   * The reason we do this is that optimizations on the MIR graph may need to get information
-   * that is only available if a CG exists.
-   */
-  cu.cg.reset(GetCodeGenerator(&cu, nullptr));
-
-  /* Gathering opcode stats? */
-  if (kCompilerDebugFlags & (1 << kDebugCountOpcodes)) {
-    cu.mir_graph->EnableOpcodeCounting();
-  }
-
-  /* Build the raw MIR graph */
-  cu.mir_graph->InlineMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx,
-                             class_loader, dex_file, dex_cache);
-
-  if (!CanCompileMethod(method_idx, dex_file, &cu)) {
-    VLOG(compiler)  << cu.instruction_set << ": Cannot compile method : "
-        << PrettyMethod(method_idx, dex_file);
-    cu.EndTiming();
-    return nullptr;
-  }
-
-  cu.NewTimingSplit("MIROpt:CheckFilters");
-  std::string skip_message;
-  if (cu.mir_graph->SkipCompilation(&skip_message)) {
-    VLOG(compiler) << cu.instruction_set << ": Skipping method : "
-        << PrettyMethod(method_idx, dex_file) << "  Reason = " << skip_message;
-    cu.EndTiming();
-    return nullptr;
-  }
-
-  /* Create the pass driver and launch it */
-  PassDriverMEOpts pass_driver(GetPreOptPassManager(), GetPostOptPassManager(), &cu);
-  pass_driver.Launch();
-
-  /* For non-leaf methods check if we should skip compilation when the profiler is enabled. */
-  if (cu.compiler_driver->ProfilePresent()
-      && !cu.mir_graph->MethodIsLeaf()
-      && cu.mir_graph->SkipCompilationByName(PrettyMethod(method_idx, dex_file))) {
-    cu.EndTiming();
-    return nullptr;
-  }
-
-  if (cu.enable_debug & (1 << kDebugDumpCheckStats)) {
-    cu.mir_graph->DumpCheckStats();
-  }
-
-  if (kCompilerDebugFlags & (1 << kDebugCountOpcodes)) {
-    cu.mir_graph->ShowOpcodeStats();
-  }
-
-  /* Reassociate sreg names with original Dalvik vreg names. */
-  cu.mir_graph->RemapRegLocations();
-
-  /* Free Arenas from the cu.arena_stack for reuse by the cu.arena in the codegen. */
-  if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) {
-    if (cu.arena_stack.PeakBytesAllocated() > 1 * 1024 * 1024) {
-      MemStats stack_stats(cu.arena_stack.GetPeakStats());
-      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(stack_stats);
-    }
-  }
-  cu.arena_stack.Reset();
-
-  CompiledMethod* result = nullptr;
-
-  if (cu.mir_graph->PuntToInterpreter()) {
-    VLOG(compiler) << cu.instruction_set << ": Punted method to interpreter: "
-        << PrettyMethod(method_idx, dex_file);
-    cu.EndTiming();
-    return nullptr;
-  }
-
-  cu.cg->Materialize();
-
-  cu.NewTimingSplit("Dedupe");  /* deduping takes up the vast majority of time in GetCompiledMethod(). */
-  result = cu.cg->GetCompiledMethod();
-  cu.NewTimingSplit("Cleanup");
-
-  if (result) {
-    VLOG(compiler) << cu.instruction_set << ": Compiled " << PrettyMethod(method_idx, dex_file);
-  } else {
-    VLOG(compiler) << cu.instruction_set << ": Deferred " << PrettyMethod(method_idx, dex_file);
-  }
-
-  if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) {
-    if (cu.arena.BytesAllocated() > (1 * 1024 *1024)) {
-      MemStats mem_stats(cu.arena.GetMemStats());
-      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats);
-    }
-  }
-
-  if (cu.enable_debug & (1 << kDebugShowSummaryMemoryUsage)) {
-    LOG(INFO) << "MEMINFO " << cu.arena.BytesAllocated() << " " << cu.mir_graph->GetNumBlocks()
-                    << " " << PrettyMethod(method_idx, dex_file);
-  }
-
-  cu.EndTiming();
-  driver->GetTimingsLogger()->AddLogger(cu.timings);
-  return result;
-}
-
-CompiledMethod* QuickCompiler::JniCompile(uint32_t access_flags,
-                                          uint32_t method_idx,
-                                          const DexFile& dex_file) const {
-  return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file);
-}
-
-uintptr_t QuickCompiler::GetEntryPointOf(ArtMethod* method) const {
-  return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize(
-      InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
-}
-
-Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu,
-                                         void* compilation_unit ATTRIBUTE_UNUSED) {
-  Mir2Lir* mir_to_lir = nullptr;
-  switch (cu->instruction_set) {
-#ifdef ART_ENABLE_CODEGEN_arm
-    case kThumb2:
-      mir_to_lir = ArmCodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
-      break;
-#endif  // ART_ENABLE_CODEGEN_arm
-#ifdef ART_ENABLE_CODEGEN_arm64
-    case kArm64:
-      mir_to_lir = Arm64CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
-      break;
-#endif  // ART_ENABLE_CODEGEN_arm64
-#if defined(ART_ENABLE_CODEGEN_mips) || defined(ART_ENABLE_CODEGEN_mips64)
-      // Intentional 2 level ifdef. Want to fail on mips64 if it is not enabled, even if mips is
-      // and vice versa.
-#ifdef ART_ENABLE_CODEGEN_mips
-    case kMips:
-      // Fall-through.
-#endif  // ART_ENABLE_CODEGEN_mips
-#ifdef ART_ENABLE_CODEGEN_mips64
-    case kMips64:
-#endif  // ART_ENABLE_CODEGEN_mips64
-      mir_to_lir = MipsCodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
-      break;
-#endif  // ART_ENABLE_CODEGEN_mips || ART_ENABLE_CODEGEN_mips64
-#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
-      // Intentional 2 level ifdef. Want to fail on x86_64 if it is not enabled, even if x86 is
-      // and vice versa.
-#ifdef ART_ENABLE_CODEGEN_x86
-    case kX86:
-      // Fall-through.
-#endif  // ART_ENABLE_CODEGEN_x86
-#ifdef ART_ENABLE_CODEGEN_x86_64
-    case kX86_64:
-#endif  // ART_ENABLE_CODEGEN_x86_64
-      mir_to_lir = X86CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
-      break;
-#endif  // ART_ENABLE_CODEGEN_x86 || ART_ENABLE_CODEGEN_x86_64
-    default:
-      LOG(FATAL) << "Unexpected instruction set: " << cu->instruction_set;
-  }
-
-  /* The number of compiler temporaries depends on backend so set it up now if possible */
-  if (mir_to_lir) {
-    size_t max_temps = mir_to_lir->GetMaxPossibleCompilerTemps();
-    bool set_max = cu->mir_graph->SetMaxAvailableNonSpecialCompilerTemps(max_temps);
-    CHECK(set_max);
-  }
-  return mir_to_lir;
-}
-
-QuickCompiler::QuickCompiler(CompilerDriver* driver) : Compiler(driver, 100) {
-  const auto& compiler_options = driver->GetCompilerOptions();
-  auto* pass_manager_options = compiler_options.GetPassManagerOptions();
-  pre_opt_pass_manager_.reset(new PassManager(*pass_manager_options));
-  CHECK(pre_opt_pass_manager_.get() != nullptr);
-  PassDriverMEOpts::SetupPasses(pre_opt_pass_manager_.get());
-  pre_opt_pass_manager_->CreateDefaultPassList();
-  if (pass_manager_options->GetPrintPassOptions()) {
-    PassDriverMEOpts::PrintPassOptions(pre_opt_pass_manager_.get());
-  }
-  // TODO: Different options for pre vs post opts?
-  post_opt_pass_manager_.reset(new PassManager(PassManagerOptions()));
-  CHECK(post_opt_pass_manager_.get() != nullptr);
-  PassDriverMEPostOpt::SetupPasses(post_opt_pass_manager_.get());
-  post_opt_pass_manager_->CreateDefaultPassList();
-  if (pass_manager_options->GetPrintPassOptions()) {
-    PassDriverMEPostOpt::PrintPassOptions(post_opt_pass_manager_.get());
-  }
-}
-
-QuickCompiler::~QuickCompiler() {
-}
-
-Compiler* CreateQuickCompiler(CompilerDriver* driver) {
-  return QuickCompiler::Create(driver);
-}
-
-Compiler* QuickCompiler::Create(CompilerDriver* driver) {
-  return new QuickCompiler(driver);
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h
deleted file mode 100644
index d512b25..0000000
--- a/compiler/dex/quick/quick_compiler.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_QUICK_COMPILER_H_
-#define ART_COMPILER_DEX_QUICK_QUICK_COMPILER_H_
-
-#include "compiler.h"
-
-namespace art {
-
-namespace mirror {
-class DexCache;
-}
-
-class Compiler;
-class CompilerDriver;
-class Mir2Lir;
-class PassManager;
-
-class QuickCompiler : public Compiler {
- public:
-  virtual ~QuickCompiler();
-
-  void Init() OVERRIDE;
-
-  void UnInit() const OVERRIDE;
-
-  bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file, CompilationUnit* cu) const
-      OVERRIDE;
-
-  CompiledMethod* Compile(const DexFile::CodeItem* code_item,
-                          uint32_t access_flags,
-                          InvokeType invoke_type,
-                          uint16_t class_def_idx,
-                          uint32_t method_idx,
-                          jobject class_loader,
-                          const DexFile& dex_file,
-                          Handle<mirror::DexCache> dex_cache) const OVERRIDE;
-
-  CompiledMethod* JniCompile(uint32_t access_flags,
-                             uint32_t method_idx,
-                             const DexFile& dex_file) const OVERRIDE;
-
-  uintptr_t GetEntryPointOf(ArtMethod* method) const OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  static Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit);
-
-  void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
-
-  static Compiler* Create(CompilerDriver* driver);
-
-  const PassManager* GetPreOptPassManager() const {
-    return pre_opt_pass_manager_.get();
-  }
-  const PassManager* GetPostOptPassManager() const {
-    return post_opt_pass_manager_.get();
-  }
-
- protected:
-  explicit QuickCompiler(CompilerDriver* driver);
-
- private:
-  std::unique_ptr<PassManager> pre_opt_pass_manager_;
-  std::unique_ptr<PassManager> post_opt_pass_manager_;
-  DISALLOW_COPY_AND_ASSIGN(QuickCompiler);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_QUICK_COMPILER_H_
diff --git a/compiler/dex/quick/quick_compiler_factory.h b/compiler/dex/quick/quick_compiler_factory.h
deleted file mode 100644
index 31ee1cf..0000000
--- a/compiler/dex/quick/quick_compiler_factory.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_QUICK_COMPILER_FACTORY_H_
-#define ART_COMPILER_DEX_QUICK_QUICK_COMPILER_FACTORY_H_
-
-namespace art {
-
-class Compiler;
-class CompilerDriver;
-
-Compiler* CreateQuickCompiler(CompilerDriver* driver);
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_QUICK_COMPILER_FACTORY_H_
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
deleted file mode 100644
index d9d0434..0000000
--- a/compiler/dex/quick/ralloc_util.cc
+++ /dev/null
@@ -1,1559 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This file contains register alloction support. */
-
-#include "mir_to_lir-inl.h"
-
-#include "dex/compiler_ir.h"
-#include "dex/dataflow_iterator-inl.h"
-#include "dex/mir_graph.h"
-#include "driver/compiler_driver.h"
-#include "driver/dex_compilation_unit.h"
-#include "utils/dex_cache_arrays_layout-inl.h"
-
-namespace art {
-
-/*
- * Free all allocated temps in the temp pools.  Note that this does
- * not affect the "liveness" of a temp register, which will stay
- * live until it is either explicitly killed or reallocated.
- */
-void Mir2Lir::ResetRegPool() {
-  for (RegisterInfo* info : tempreg_info_) {
-    info->MarkFree();
-  }
-  // Reset temp tracking sanity check.
-  if (kIsDebugBuild) {
-    live_sreg_ = INVALID_SREG;
-  }
-}
-
-Mir2Lir::RegisterInfo::RegisterInfo(RegStorage r, const ResourceMask& mask)
-  : reg_(r), is_temp_(false), wide_value_(false), dirty_(false), aliased_(false), partner_(r),
-    s_reg_(INVALID_SREG), def_use_mask_(mask), master_(this), def_start_(nullptr),
-    def_end_(nullptr), alias_chain_(nullptr) {
-  switch (r.StorageSize()) {
-    case 0: storage_mask_ = 0xffffffff; break;
-    case 4: storage_mask_ = 0x00000001; break;
-    case 8: storage_mask_ = 0x00000003; break;
-    case 16: storage_mask_ = 0x0000000f; break;
-    case 32: storage_mask_ = 0x000000ff; break;
-    case 64: storage_mask_ = 0x0000ffff; break;
-    case 128: storage_mask_ = 0xffffffff; break;
-  }
-  used_storage_ = r.Valid() ? ~storage_mask_ : storage_mask_;
-  liveness_ = used_storage_;
-}
-
-Mir2Lir::RegisterPool::RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena,
-                                    const ArrayRef<const RegStorage>& core_regs,
-                                    const ArrayRef<const RegStorage>& core64_regs,
-                                    const ArrayRef<const RegStorage>& sp_regs,
-                                    const ArrayRef<const RegStorage>& dp_regs,
-                                    const ArrayRef<const RegStorage>& reserved_regs,
-                                    const ArrayRef<const RegStorage>& reserved64_regs,
-                                    const ArrayRef<const RegStorage>& core_temps,
-                                    const ArrayRef<const RegStorage>& core64_temps,
-                                    const ArrayRef<const RegStorage>& sp_temps,
-                                    const ArrayRef<const RegStorage>& dp_temps) :
-    core_regs_(arena->Adapter()), next_core_reg_(0),
-    core64_regs_(arena->Adapter()), next_core64_reg_(0),
-    sp_regs_(arena->Adapter()), next_sp_reg_(0),
-    dp_regs_(arena->Adapter()), next_dp_reg_(0), m2l_(m2l)  {
-  // Initialize the fast lookup map.
-  m2l_->reginfo_map_.clear();
-  m2l_->reginfo_map_.resize(RegStorage::kMaxRegs, nullptr);
-
-  // Construct the register pool.
-  core_regs_.reserve(core_regs.size());
-  for (const RegStorage& reg : core_regs) {
-    RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
-    m2l_->reginfo_map_[reg.GetReg()] = info;
-    core_regs_.push_back(info);
-  }
-  core64_regs_.reserve(core64_regs.size());
-  for (const RegStorage& reg : core64_regs) {
-    RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
-    m2l_->reginfo_map_[reg.GetReg()] = info;
-    core64_regs_.push_back(info);
-  }
-  sp_regs_.reserve(sp_regs.size());
-  for (const RegStorage& reg : sp_regs) {
-    RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
-    m2l_->reginfo_map_[reg.GetReg()] = info;
-    sp_regs_.push_back(info);
-  }
-  dp_regs_.reserve(dp_regs.size());
-  for (const RegStorage& reg : dp_regs) {
-    RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
-    m2l_->reginfo_map_[reg.GetReg()] = info;
-    dp_regs_.push_back(info);
-  }
-
-  // Keep special registers from being allocated.
-  for (RegStorage reg : reserved_regs) {
-    m2l_->MarkInUse(reg);
-  }
-  for (RegStorage reg : reserved64_regs) {
-    m2l_->MarkInUse(reg);
-  }
-
-  // Mark temp regs - all others not in use can be used for promotion
-  for (RegStorage reg : core_temps) {
-    m2l_->MarkTemp(reg);
-  }
-  for (RegStorage reg : core64_temps) {
-    m2l_->MarkTemp(reg);
-  }
-  for (RegStorage reg : sp_temps) {
-    m2l_->MarkTemp(reg);
-  }
-  for (RegStorage reg : dp_temps) {
-    m2l_->MarkTemp(reg);
-  }
-
-  // Add an entry for InvalidReg with zero'd mask.
-  RegisterInfo* invalid_reg = new (arena) RegisterInfo(RegStorage::InvalidReg(), kEncodeNone);
-  m2l_->reginfo_map_[RegStorage::InvalidReg().GetReg()] = invalid_reg;
-
-  // Existence of core64 registers implies wide references.
-  if (core64_regs_.size() != 0) {
-    ref_regs_ = &core64_regs_;
-    next_ref_reg_ = &next_core64_reg_;
-  } else {
-    ref_regs_ = &core_regs_;
-    next_ref_reg_ = &next_core_reg_;
-  }
-}
-
-void Mir2Lir::DumpRegPool(ArenaVector<RegisterInfo*>* regs) {
-  LOG(INFO) << "================================================";
-  for (RegisterInfo* info : *regs) {
-    LOG(INFO) << StringPrintf(
-        "R[%d:%d:%c]: T:%d, U:%d, W:%d, p:%d, LV:%d, D:%d, SR:%d, DEF:%d",
-        info->GetReg().GetReg(), info->GetReg().GetRegNum(), info->GetReg().IsFloat() ?  'f' : 'c',
-        info->IsTemp(), info->InUse(), info->IsWide(), info->Partner().GetReg(), info->IsLive(),
-        info->IsDirty(), info->SReg(), info->DefStart() != nullptr);
-  }
-  LOG(INFO) << "================================================";
-}
-
-void Mir2Lir::DumpCoreRegPool() {
-  DumpRegPool(&reg_pool_->core_regs_);
-  DumpRegPool(&reg_pool_->core64_regs_);
-}
-
-void Mir2Lir::DumpFpRegPool() {
-  DumpRegPool(&reg_pool_->sp_regs_);
-  DumpRegPool(&reg_pool_->dp_regs_);
-}
-
-void Mir2Lir::DumpRegPools() {
-  LOG(INFO) << "Core registers";
-  DumpCoreRegPool();
-  LOG(INFO) << "FP registers";
-  DumpFpRegPool();
-}
-
-void Mir2Lir::Clobber(RegStorage reg) {
-  if (UNLIKELY(reg.IsPair())) {
-    DCHECK(!GetRegInfo(reg.GetLow())->IsAliased());
-    Clobber(reg.GetLow());
-    DCHECK(!GetRegInfo(reg.GetHigh())->IsAliased());
-    Clobber(reg.GetHigh());
-  } else {
-    RegisterInfo* info = GetRegInfo(reg);
-    if (info->IsTemp() && !info->IsDead()) {
-      if (info->GetReg().NotExactlyEquals(info->Partner())) {
-        ClobberBody(GetRegInfo(info->Partner()));
-      }
-      ClobberBody(info);
-      if (info->IsAliased()) {
-        ClobberAliases(info, info->StorageMask());
-      } else {
-        RegisterInfo* master = info->Master();
-        if (info != master) {
-          ClobberBody(info->Master());
-          ClobberAliases(info->Master(), info->StorageMask());
-        }
-      }
-    }
-  }
-}
-
-void Mir2Lir::ClobberAliases(RegisterInfo* info, uint32_t clobber_mask) {
-  for (RegisterInfo* alias = info->GetAliasChain(); alias != nullptr;
-       alias = alias->GetAliasChain()) {
-    DCHECK(!alias->IsAliased());  // Only the master should be marked as alised.
-    // Only clobber if we have overlap.
-    if ((alias->StorageMask() & clobber_mask) != 0) {
-      ClobberBody(alias);
-    }
-  }
-}
-
-/*
- * Break the association between a Dalvik vreg and a physical temp register of either register
- * class.
- * TODO: Ideally, the public version of this code should not exist.  Besides its local usage
- * in the register utilities, is is also used by code gen routines to work around a deficiency in
- * local register allocation, which fails to distinguish between the "in" and "out" identities
- * of Dalvik vregs.  This can result in useless register copies when the same Dalvik vreg
- * is used both as the source and destination register of an operation in which the type
- * changes (for example: INT_TO_FLOAT v1, v1).  Revisit when improved register allocation is
- * addressed.
- */
-void Mir2Lir::ClobberSReg(int s_reg) {
-  if (s_reg != INVALID_SREG) {
-    if (kIsDebugBuild && s_reg == live_sreg_) {
-      live_sreg_ = INVALID_SREG;
-    }
-    for (RegisterInfo* info : tempreg_info_) {
-      if (info->SReg() == s_reg) {
-        if (info->GetReg().NotExactlyEquals(info->Partner())) {
-          // Dealing with a pair - clobber the other half.
-          DCHECK(!info->IsAliased());
-          ClobberBody(GetRegInfo(info->Partner()));
-        }
-        ClobberBody(info);
-        if (info->IsAliased()) {
-          ClobberAliases(info, info->StorageMask());
-        }
-      }
-    }
-  }
-}
-
-/*
- * SSA names associated with the initial definitions of Dalvik
- * registers are the same as the Dalvik register number (and
- * thus take the same position in the promotion_map.  However,
- * the special Method* and compiler temp resisters use negative
- * v_reg numbers to distinguish them and can have an arbitrary
- * ssa name (above the last original Dalvik register).  This function
- * maps SSA names to positions in the promotion_map array.
- */
-int Mir2Lir::SRegToPMap(int s_reg) {
-  DCHECK_LT(s_reg, mir_graph_->GetNumSSARegs());
-  DCHECK_GE(s_reg, 0);
-  int v_reg = mir_graph_->SRegToVReg(s_reg);
-  return v_reg;
-}
-
-// TODO: refactor following Alloc/Record routines - much commonality.
-void Mir2Lir::RecordCorePromotion(RegStorage reg, int s_reg) {
-  int p_map_idx = SRegToPMap(s_reg);
-  int v_reg = mir_graph_->SRegToVReg(s_reg);
-  int reg_num = reg.GetRegNum();
-  GetRegInfo(reg)->MarkInUse();
-  core_spill_mask_ |= (1 << reg_num);
-  // Include reg for later sort
-  core_vmap_table_.push_back(reg_num << VREG_NUM_WIDTH | (v_reg & ((1 << VREG_NUM_WIDTH) - 1)));
-  num_core_spills_++;
-  promotion_map_[p_map_idx].core_location = kLocPhysReg;
-  promotion_map_[p_map_idx].core_reg = reg_num;
-}
-
-/* Reserve a callee-save register.  Return InvalidReg if none available */
-RegStorage Mir2Lir::AllocPreservedCoreReg(int s_reg) {
-  RegStorage res;
-  /*
-   * Note: it really doesn't matter much whether we allocate from the core or core64
-   * pool for 64-bit targets - but for some targets it does matter whether allocations
-   * happens from the single or double pool.  This entire section of code could stand
-   * a good refactoring.
-   */
-  for (RegisterInfo* info : reg_pool_->core_regs_) {
-    if (!info->IsTemp() && !info->InUse()) {
-      res = info->GetReg();
-      RecordCorePromotion(res, s_reg);
-      break;
-    }
-  }
-  return res;
-}
-
-void Mir2Lir::RecordFpPromotion(RegStorage reg, int s_reg) {
-  DCHECK_NE(cu_->instruction_set, kThumb2);
-  int p_map_idx = SRegToPMap(s_reg);
-  int v_reg = mir_graph_->SRegToVReg(s_reg);
-  int reg_num = reg.GetRegNum();
-  GetRegInfo(reg)->MarkInUse();
-  fp_spill_mask_ |= (1 << reg_num);
-  // Include reg for later sort
-  fp_vmap_table_.push_back(reg_num << VREG_NUM_WIDTH | (v_reg & ((1 << VREG_NUM_WIDTH) - 1)));
-  num_fp_spills_++;
-  promotion_map_[p_map_idx].fp_location = kLocPhysReg;
-  promotion_map_[p_map_idx].fp_reg = reg.GetReg();
-}
-
-// Reserve a callee-save floating point.
-RegStorage Mir2Lir::AllocPreservedFpReg(int s_reg) {
-  /*
-   * For targets other than Thumb2, it doesn't matter whether we allocate from
-   * the sp_regs_ or dp_regs_ pool.  Some refactoring is in order here.
-   */
-  DCHECK_NE(cu_->instruction_set, kThumb2);
-  RegStorage res;
-  for (RegisterInfo* info : reg_pool_->sp_regs_) {
-    if (!info->IsTemp() && !info->InUse()) {
-      res = info->GetReg();
-      RecordFpPromotion(res, s_reg);
-      break;
-    }
-  }
-  return res;
-}
-
-// TODO: this is Thumb2 only.  Remove when DoPromotion refactored.
-RegStorage Mir2Lir::AllocPreservedDouble(int s_reg ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL) << "Unexpected use of AllocPreservedDouble";
-  UNREACHABLE();
-}
-
-// TODO: this is Thumb2 only.  Remove when DoPromotion refactored.
-RegStorage Mir2Lir::AllocPreservedSingle(int s_reg ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL) << "Unexpected use of AllocPreservedSingle";
-  UNREACHABLE();
-}
-
-
-RegStorage Mir2Lir::AllocTempBody(ArenaVector<RegisterInfo*>& regs, int* next_temp, bool required) {
-  int num_regs = regs.size();
-  int next = *next_temp;
-  for (int i = 0; i< num_regs; i++) {
-    if (next >= num_regs) {
-      next = 0;
-    }
-    RegisterInfo* info = regs[next];
-    // Try to allocate a register that doesn't hold a live value.
-    if (info->IsTemp() && !info->InUse() && info->IsDead()) {
-      // If it's wide, split it up.
-      if (info->IsWide()) {
-        // If the pair was associated with a wide value, unmark the partner as well.
-        if (info->SReg() != INVALID_SREG) {
-          RegisterInfo* partner = GetRegInfo(info->Partner());
-          DCHECK_EQ(info->GetReg().GetRegNum(), partner->Partner().GetRegNum());
-          DCHECK(partner->IsWide());
-          partner->SetIsWide(false);
-        }
-        info->SetIsWide(false);
-      }
-      Clobber(info->GetReg());
-      info->MarkInUse();
-      *next_temp = next + 1;
-      return info->GetReg();
-    }
-    next++;
-  }
-  next = *next_temp;
-  // No free non-live regs.  Anything we can kill?
-  for (int i = 0; i< num_regs; i++) {
-    if (next >= num_regs) {
-      next = 0;
-    }
-    RegisterInfo* info = regs[next];
-    if (info->IsTemp() && !info->InUse()) {
-      // Got one.  Kill it.
-      ClobberSReg(info->SReg());
-      Clobber(info->GetReg());
-      info->MarkInUse();
-      if (info->IsWide()) {
-        RegisterInfo* partner = GetRegInfo(info->Partner());
-        DCHECK_EQ(info->GetReg().GetRegNum(), partner->Partner().GetRegNum());
-        DCHECK(partner->IsWide());
-        info->SetIsWide(false);
-        partner->SetIsWide(false);
-      }
-      *next_temp = next + 1;
-      return info->GetReg();
-    }
-    next++;
-  }
-  if (required) {
-    CodegenDump();
-    DumpRegPools();
-    LOG(FATAL) << "No free temp registers";
-  }
-  return RegStorage::InvalidReg();  // No register available
-}
-
-RegStorage Mir2Lir::AllocTemp(bool required) {
-  return AllocTempBody(reg_pool_->core_regs_, &reg_pool_->next_core_reg_, required);
-}
-
-RegStorage Mir2Lir::AllocTempWide(bool required) {
-  RegStorage res;
-  if (reg_pool_->core64_regs_.size() != 0) {
-    res = AllocTempBody(reg_pool_->core64_regs_, &reg_pool_->next_core64_reg_, required);
-  } else {
-    RegStorage low_reg = AllocTemp();
-    RegStorage high_reg = AllocTemp();
-    res = RegStorage::MakeRegPair(low_reg, high_reg);
-  }
-  if (required) {
-    CheckRegStorage(res, WidenessCheck::kCheckWide, RefCheck::kIgnoreRef, FPCheck::kCheckNotFP);
-  }
-  return res;
-}
-
-RegStorage Mir2Lir::AllocTempRef(bool required) {
-  RegStorage res = AllocTempBody(*reg_pool_->ref_regs_, reg_pool_->next_ref_reg_, required);
-  if (required) {
-    DCHECK(!res.IsPair());
-    CheckRegStorage(res, WidenessCheck::kCheckNotWide, RefCheck::kCheckRef, FPCheck::kCheckNotFP);
-  }
-  return res;
-}
-
-RegStorage Mir2Lir::AllocTempSingle(bool required) {
-  RegStorage res = AllocTempBody(reg_pool_->sp_regs_, &reg_pool_->next_sp_reg_, required);
-  if (required) {
-    DCHECK(res.IsSingle()) << "Reg: 0x" << std::hex << res.GetRawBits();
-    CheckRegStorage(res, WidenessCheck::kCheckNotWide, RefCheck::kCheckNotRef, FPCheck::kIgnoreFP);
-  }
-  return res;
-}
-
-RegStorage Mir2Lir::AllocTempDouble(bool required) {
-  RegStorage res = AllocTempBody(reg_pool_->dp_regs_, &reg_pool_->next_dp_reg_, required);
-  if (required) {
-    DCHECK(res.IsDouble()) << "Reg: 0x" << std::hex << res.GetRawBits();
-    CheckRegStorage(res, WidenessCheck::kCheckWide, RefCheck::kCheckNotRef, FPCheck::kIgnoreFP);
-  }
-  return res;
-}
-
-RegStorage Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class, bool required) {
-  DCHECK_NE(reg_class, kRefReg);  // NOTE: the Dalvik width of a reference is always 32 bits.
-  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
-    return AllocTempDouble(required);
-  }
-  return AllocTempWide(required);
-}
-
-RegStorage Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class, bool required) {
-  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
-    return AllocTempSingle(required);
-  } else if (reg_class == kRefReg) {
-    return AllocTempRef(required);
-  }
-  return AllocTemp(required);
-}
-
-RegStorage Mir2Lir::FindLiveReg(ArenaVector<RegisterInfo*>& regs, int s_reg) {
-  RegStorage res;
-  for (RegisterInfo* info : regs) {
-    if ((info->SReg() == s_reg) && info->IsLive()) {
-      res = info->GetReg();
-      break;
-    }
-  }
-  return res;
-}
-
-RegStorage Mir2Lir::AllocLiveReg(int s_reg, int reg_class, bool wide) {
-  RegStorage reg;
-  if (reg_class == kRefReg) {
-    reg = FindLiveReg(*reg_pool_->ref_regs_, s_reg);
-    CheckRegStorage(reg, WidenessCheck::kCheckNotWide, RefCheck::kCheckRef, FPCheck::kCheckNotFP);
-  }
-  if (!reg.Valid() && ((reg_class == kAnyReg) || (reg_class == kFPReg))) {
-    reg = FindLiveReg(wide ? reg_pool_->dp_regs_ : reg_pool_->sp_regs_, s_reg);
-  }
-  if (!reg.Valid() && (reg_class != kFPReg)) {
-    if (cu_->target64) {
-      reg = FindLiveReg(wide || reg_class == kRefReg ? reg_pool_->core64_regs_ :
-                                                       reg_pool_->core_regs_, s_reg);
-    } else {
-      reg = FindLiveReg(reg_pool_->core_regs_, s_reg);
-    }
-  }
-  if (reg.Valid()) {
-    if (wide && !reg.IsFloat() && !cu_->target64) {
-      // Only allow reg pairs for core regs on 32-bit targets.
-      RegStorage high_reg = FindLiveReg(reg_pool_->core_regs_, s_reg + 1);
-      if (high_reg.Valid()) {
-        reg = RegStorage::MakeRegPair(reg, high_reg);
-        MarkWide(reg);
-      } else {
-        // Only half available.
-        reg = RegStorage::InvalidReg();
-      }
-    }
-    if (reg.Valid() && (wide != GetRegInfo(reg)->IsWide())) {
-      // Width mismatch - don't try to reuse.
-      reg = RegStorage::InvalidReg();
-    }
-  }
-  if (reg.Valid()) {
-    if (reg.IsPair()) {
-      RegisterInfo* info_low = GetRegInfo(reg.GetLow());
-      RegisterInfo* info_high = GetRegInfo(reg.GetHigh());
-      if (info_low->IsTemp()) {
-        info_low->MarkInUse();
-      }
-      if (info_high->IsTemp()) {
-        info_high->MarkInUse();
-      }
-    } else {
-      RegisterInfo* info = GetRegInfo(reg);
-      if (info->IsTemp()) {
-        info->MarkInUse();
-      }
-    }
-  } else {
-    // Either not found, or something didn't match up. Clobber to prevent any stale instances.
-    ClobberSReg(s_reg);
-    if (wide) {
-      ClobberSReg(s_reg + 1);
-    }
-  }
-  CheckRegStorage(reg, WidenessCheck::kIgnoreWide,
-                  reg_class == kRefReg ? RefCheck::kCheckRef : RefCheck::kIgnoreRef,
-                  FPCheck::kIgnoreFP);
-  return reg;
-}
-
-void Mir2Lir::FreeTemp(RegStorage reg) {
-  if (reg.IsPair()) {
-    FreeTemp(reg.GetLow());
-    FreeTemp(reg.GetHigh());
-  } else {
-    RegisterInfo* p = GetRegInfo(reg);
-    if (p->IsTemp()) {
-      p->MarkFree();
-      p->SetIsWide(false);
-      p->SetPartner(reg);
-    }
-  }
-}
-
-void Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
-  DCHECK(rl_keep.wide);
-  DCHECK(rl_free.wide);
-  int free_low = rl_free.reg.GetLowReg();
-  int free_high = rl_free.reg.GetHighReg();
-  int keep_low = rl_keep.reg.GetLowReg();
-  int keep_high = rl_keep.reg.GetHighReg();
-  if ((free_low != keep_low) && (free_low != keep_high) &&
-      (free_high != keep_low) && (free_high != keep_high)) {
-    // No overlap, free both
-    FreeTemp(rl_free.reg);
-  }
-}
-
-bool Mir2Lir::IsLive(RegStorage reg) {
-  bool res;
-  if (reg.IsPair()) {
-    RegisterInfo* p_lo = GetRegInfo(reg.GetLow());
-    RegisterInfo* p_hi = GetRegInfo(reg.GetHigh());
-    DCHECK_EQ(p_lo->IsLive(), p_hi->IsLive());
-    res = p_lo->IsLive() || p_hi->IsLive();
-  } else {
-    RegisterInfo* p = GetRegInfo(reg);
-    res = p->IsLive();
-  }
-  return res;
-}
-
-bool Mir2Lir::IsTemp(RegStorage reg) {
-  bool res;
-  if (reg.IsPair()) {
-    RegisterInfo* p_lo = GetRegInfo(reg.GetLow());
-    RegisterInfo* p_hi = GetRegInfo(reg.GetHigh());
-    res = p_lo->IsTemp() || p_hi->IsTemp();
-  } else {
-    RegisterInfo* p = GetRegInfo(reg);
-    res = p->IsTemp();
-  }
-  return res;
-}
-
-bool Mir2Lir::IsPromoted(RegStorage reg) {
-  bool res;
-  if (reg.IsPair()) {
-    RegisterInfo* p_lo = GetRegInfo(reg.GetLow());
-    RegisterInfo* p_hi = GetRegInfo(reg.GetHigh());
-    res = !p_lo->IsTemp() || !p_hi->IsTemp();
-  } else {
-    RegisterInfo* p = GetRegInfo(reg);
-    res = !p->IsTemp();
-  }
-  return res;
-}
-
-bool Mir2Lir::IsDirty(RegStorage reg) {
-  bool res;
-  if (reg.IsPair()) {
-    RegisterInfo* p_lo = GetRegInfo(reg.GetLow());
-    RegisterInfo* p_hi = GetRegInfo(reg.GetHigh());
-    res = p_lo->IsDirty() || p_hi->IsDirty();
-  } else {
-    RegisterInfo* p = GetRegInfo(reg);
-    res = p->IsDirty();
-  }
-  return res;
-}
-
-/*
- * Similar to AllocTemp(), but forces the allocation of a specific
- * register.  No check is made to see if the register was previously
- * allocated.  Use with caution.
- */
-void Mir2Lir::LockTemp(RegStorage reg) {
-  DCHECK(IsTemp(reg));
-  if (reg.IsPair()) {
-    RegisterInfo* p_lo = GetRegInfo(reg.GetLow());
-    RegisterInfo* p_hi = GetRegInfo(reg.GetHigh());
-    p_lo->MarkInUse();
-    p_lo->MarkDead();
-    p_hi->MarkInUse();
-    p_hi->MarkDead();
-  } else {
-    RegisterInfo* p = GetRegInfo(reg);
-    p->MarkInUse();
-    p->MarkDead();
-  }
-}
-
-void Mir2Lir::ResetDef(RegStorage reg) {
-  if (reg.IsPair()) {
-    GetRegInfo(reg.GetLow())->ResetDefBody();
-    GetRegInfo(reg.GetHigh())->ResetDefBody();
-  } else {
-    GetRegInfo(reg)->ResetDefBody();
-  }
-}
-
-void Mir2Lir::NullifyRange(RegStorage reg, int s_reg) {
-  RegisterInfo* info = nullptr;
-  RegStorage rs = reg.IsPair() ? reg.GetLow() : reg;
-  if (IsTemp(rs)) {
-    info = GetRegInfo(reg);
-  }
-  if ((info != nullptr) && (info->DefStart() != nullptr) && (info->DefEnd() != nullptr)) {
-    DCHECK_EQ(info->SReg(), s_reg);  // Make sure we're on the same page.
-    for (LIR* p = info->DefStart();; p = p->next) {
-      NopLIR(p);
-      if (p == info->DefEnd()) {
-        break;
-      }
-    }
-  }
-}
-
-/*
- * Mark the beginning and end LIR of a def sequence.  Note that
- * on entry start points to the LIR prior to the beginning of the
- * sequence.
- */
-void Mir2Lir::MarkDef(RegLocation rl, LIR *start, LIR *finish) {
-  DCHECK(!rl.wide);
-  DCHECK(start && start->next);
-  DCHECK(finish);
-  RegisterInfo* p = GetRegInfo(rl.reg);
-  p->SetDefStart(start->next);
-  p->SetDefEnd(finish);
-}
-
-/*
- * Mark the beginning and end LIR of a def sequence.  Note that
- * on entry start points to the LIR prior to the beginning of the
- * sequence.
- */
-void Mir2Lir::MarkDefWide(RegLocation rl, LIR *start, LIR *finish) {
-  DCHECK(rl.wide);
-  DCHECK(start && start->next);
-  DCHECK(finish);
-  RegisterInfo* p;
-  if (rl.reg.IsPair()) {
-    p = GetRegInfo(rl.reg.GetLow());
-    ResetDef(rl.reg.GetHigh());  // Only track low of pair
-  } else {
-    p = GetRegInfo(rl.reg);
-  }
-  p->SetDefStart(start->next);
-  p->SetDefEnd(finish);
-}
-
-void Mir2Lir::ResetDefLoc(RegLocation rl) {
-  DCHECK(!rl.wide);
-  if (IsTemp(rl.reg) && !(cu_->disable_opt & (1 << kSuppressLoads))) {
-    NullifyRange(rl.reg, rl.s_reg_low);
-  }
-  ResetDef(rl.reg);
-}
-
-void Mir2Lir::ResetDefLocWide(RegLocation rl) {
-  DCHECK(rl.wide);
-  // If pair, only track low reg of pair.
-  RegStorage rs = rl.reg.IsPair() ? rl.reg.GetLow() : rl.reg;
-  if (IsTemp(rs) && !(cu_->disable_opt & (1 << kSuppressLoads))) {
-    NullifyRange(rs, rl.s_reg_low);
-  }
-  ResetDef(rs);
-}
-
-void Mir2Lir::ResetDefTracking() {
-  for (RegisterInfo* info : tempreg_info_) {
-    info->ResetDefBody();
-  }
-}
-
-void Mir2Lir::ClobberAllTemps() {
-  for (RegisterInfo* info : tempreg_info_) {
-    ClobberBody(info);
-  }
-}
-
-void Mir2Lir::FlushRegWide(RegStorage reg) {
-  if (reg.IsPair()) {
-    RegisterInfo* info1 = GetRegInfo(reg.GetLow());
-    RegisterInfo* info2 = GetRegInfo(reg.GetHigh());
-    DCHECK(info1 && info2 && info1->IsWide() && info2->IsWide() &&
-           (info1->Partner().ExactlyEquals(info2->GetReg())) &&
-           (info2->Partner().ExactlyEquals(info1->GetReg())));
-    if ((info1->IsLive() && info1->IsDirty()) || (info2->IsLive() && info2->IsDirty())) {
-      if (!(info1->IsTemp() && info2->IsTemp())) {
-        /* Should not happen.  If it does, there's a problem in eval_loc */
-        LOG(FATAL) << "Long half-temp, half-promoted";
-      }
-
-      info1->SetIsDirty(false);
-      info2->SetIsDirty(false);
-      if (mir_graph_->SRegToVReg(info2->SReg()) < mir_graph_->SRegToVReg(info1->SReg())) {
-        info1 = info2;
-      }
-      int v_reg = mir_graph_->SRegToVReg(info1->SReg());
-      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      StoreBaseDisp(TargetPtrReg(kSp), VRegOffset(v_reg), reg, k64, kNotVolatile);
-    }
-  } else {
-    RegisterInfo* info = GetRegInfo(reg);
-    if (info->IsLive() && info->IsDirty()) {
-      info->SetIsDirty(false);
-      int v_reg = mir_graph_->SRegToVReg(info->SReg());
-      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      StoreBaseDisp(TargetPtrReg(kSp), VRegOffset(v_reg), reg, k64, kNotVolatile);
-    }
-  }
-}
-
-void Mir2Lir::FlushReg(RegStorage reg) {
-  DCHECK(!reg.IsPair());
-  RegisterInfo* info = GetRegInfo(reg);
-  if (info->IsLive() && info->IsDirty()) {
-    info->SetIsDirty(false);
-    int v_reg = mir_graph_->SRegToVReg(info->SReg());
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    StoreBaseDisp(TargetPtrReg(kSp), VRegOffset(v_reg), reg, kWord, kNotVolatile);
-  }
-}
-
-void Mir2Lir::FlushSpecificReg(RegisterInfo* info) {
-  if (info->IsWide()) {
-    FlushRegWide(info->GetReg());
-  } else {
-    FlushReg(info->GetReg());
-  }
-}
-
-void Mir2Lir::FlushAllRegs() {
-  for (RegisterInfo* info : tempreg_info_) {
-    if (info->IsDirty() && info->IsLive()) {
-      FlushSpecificReg(info);
-    }
-    info->MarkDead();
-    info->SetIsWide(false);
-  }
-}
-
-
-bool Mir2Lir::RegClassMatches(int reg_class, RegStorage reg) {
-  if (reg_class == kAnyReg) {
-    return true;
-  } else if ((reg_class == kCoreReg) || (reg_class == kRefReg)) {
-    /*
-     * For this purpose, consider Core and Ref to be the same class. We aren't dealing
-     * with width here - that should be checked at a higher level (if needed).
-     */
-    return !reg.IsFloat();
-  } else {
-    return reg.IsFloat();
-  }
-}
-
-void Mir2Lir::MarkLive(RegLocation loc) {
-  RegStorage reg = loc.reg;
-  if (!IsTemp(reg)) {
-    return;
-  }
-  int s_reg = loc.s_reg_low;
-  if (s_reg == INVALID_SREG) {
-    // Can't be live if no associated sreg.
-    if (reg.IsPair()) {
-      GetRegInfo(reg.GetLow())->MarkDead();
-      GetRegInfo(reg.GetHigh())->MarkDead();
-    } else {
-      GetRegInfo(reg)->MarkDead();
-    }
-  } else {
-    if (reg.IsPair()) {
-      RegisterInfo* info_lo = GetRegInfo(reg.GetLow());
-      RegisterInfo* info_hi = GetRegInfo(reg.GetHigh());
-      if (info_lo->IsLive() && (info_lo->SReg() == s_reg) && info_hi->IsLive() &&
-          (info_hi->SReg() == s_reg)) {
-        return;  // Already live.
-      }
-      ClobberSReg(s_reg);
-      ClobberSReg(s_reg + 1);
-      info_lo->MarkLive(s_reg);
-      info_hi->MarkLive(s_reg + 1);
-    } else {
-      RegisterInfo* info = GetRegInfo(reg);
-      if (info->IsLive() && (info->SReg() == s_reg)) {
-        return;  // Already live.
-      }
-      ClobberSReg(s_reg);
-      if (loc.wide) {
-        ClobberSReg(s_reg + 1);
-      }
-      info->MarkLive(s_reg);
-    }
-    if (loc.wide) {
-      MarkWide(reg);
-    } else {
-      MarkNarrow(reg);
-    }
-  }
-}
-
-void Mir2Lir::MarkTemp(RegStorage reg) {
-  DCHECK(!reg.IsPair());
-  RegisterInfo* info = GetRegInfo(reg);
-  tempreg_info_.push_back(info);
-  info->SetIsTemp(true);
-}
-
-void Mir2Lir::UnmarkTemp(RegStorage reg) {
-  DCHECK(!reg.IsPair());
-  RegisterInfo* info = GetRegInfo(reg);
-  auto pos = std::find(tempreg_info_.begin(), tempreg_info_.end(), info);
-  DCHECK(pos != tempreg_info_.end());
-  tempreg_info_.erase(pos);
-  info->SetIsTemp(false);
-}
-
-void Mir2Lir::MarkWide(RegStorage reg) {
-  if (reg.IsPair()) {
-    RegisterInfo* info_lo = GetRegInfo(reg.GetLow());
-    RegisterInfo* info_hi = GetRegInfo(reg.GetHigh());
-    // Unpair any old partners.
-    if (info_lo->IsWide() && info_lo->Partner().NotExactlyEquals(info_hi->GetReg())) {
-      GetRegInfo(info_lo->Partner())->SetIsWide(false);
-    }
-    if (info_hi->IsWide() && info_hi->Partner().NotExactlyEquals(info_lo->GetReg())) {
-      GetRegInfo(info_hi->Partner())->SetIsWide(false);
-    }
-    info_lo->SetIsWide(true);
-    info_hi->SetIsWide(true);
-    info_lo->SetPartner(reg.GetHigh());
-    info_hi->SetPartner(reg.GetLow());
-  } else {
-    RegisterInfo* info = GetRegInfo(reg);
-    info->SetIsWide(true);
-    info->SetPartner(reg);
-  }
-}
-
-void Mir2Lir::MarkNarrow(RegStorage reg) {
-  DCHECK(!reg.IsPair());
-  RegisterInfo* info = GetRegInfo(reg);
-  info->SetIsWide(false);
-  info->SetPartner(reg);
-}
-
-void Mir2Lir::MarkClean(RegLocation loc) {
-  if (loc.reg.IsPair()) {
-    RegisterInfo* info = GetRegInfo(loc.reg.GetLow());
-    info->SetIsDirty(false);
-    info = GetRegInfo(loc.reg.GetHigh());
-    info->SetIsDirty(false);
-  } else {
-    RegisterInfo* info = GetRegInfo(loc.reg);
-    info->SetIsDirty(false);
-  }
-}
-
-// FIXME: need to verify rules/assumptions about how wide values are treated in 64BitSolos.
-void Mir2Lir::MarkDirty(RegLocation loc) {
-  if (loc.home) {
-    // If already home, can't be dirty
-    return;
-  }
-  if (loc.reg.IsPair()) {
-    RegisterInfo* info = GetRegInfo(loc.reg.GetLow());
-    info->SetIsDirty(true);
-    info = GetRegInfo(loc.reg.GetHigh());
-    info->SetIsDirty(true);
-  } else {
-    RegisterInfo* info = GetRegInfo(loc.reg);
-    info->SetIsDirty(true);
-  }
-}
-
-void Mir2Lir::MarkInUse(RegStorage reg) {
-  if (reg.IsPair()) {
-    GetRegInfo(reg.GetLow())->MarkInUse();
-    GetRegInfo(reg.GetHigh())->MarkInUse();
-  } else {
-    GetRegInfo(reg)->MarkInUse();
-  }
-}
-
-bool Mir2Lir::CheckCorePoolSanity() {
-  for (RegisterInfo* info : tempreg_info_) {
-    int my_sreg = info->SReg();
-    if (info->IsTemp() && info->IsLive() && info->IsWide() && my_sreg != INVALID_SREG) {
-      RegStorage my_reg = info->GetReg();
-      RegStorage partner_reg = info->Partner();
-      RegisterInfo* partner = GetRegInfo(partner_reg);
-      DCHECK(partner != nullptr);
-      DCHECK(partner->IsWide());
-      DCHECK_EQ(my_reg.GetReg(), partner->Partner().GetReg());
-      DCHECK(partner->IsLive());
-      int partner_sreg = partner->SReg();
-      int diff = my_sreg - partner_sreg;
-      DCHECK((diff == 0) || (diff == -1) || (diff == 1));
-    }
-    if (info->Master() != info) {
-      // Aliased.
-      if (info->IsLive() && (info->SReg() != INVALID_SREG)) {
-        // If I'm live, master should not be live, but should show liveness in alias set.
-        DCHECK_EQ(info->Master()->SReg(), INVALID_SREG);
-        DCHECK(!info->Master()->IsDead());
-      }
-// TODO: Add checks in !info->IsDead() case to ensure every live bit is owned by exactly 1 reg.
-    }
-    if (info->IsAliased()) {
-      // Has child aliases.
-      DCHECK_EQ(info->Master(), info);
-      if (info->IsLive() && (info->SReg() != INVALID_SREG)) {
-        // Master live, no child should be dead - all should show liveness in set.
-        for (RegisterInfo* p = info->GetAliasChain(); p != nullptr; p = p->GetAliasChain()) {
-          DCHECK(!p->IsDead());
-          DCHECK_EQ(p->SReg(), INVALID_SREG);
-        }
-      } else if (!info->IsDead()) {
-        // Master not live, one or more aliases must be.
-        bool live_alias = false;
-        for (RegisterInfo* p = info->GetAliasChain(); p != nullptr; p = p->GetAliasChain()) {
-          live_alias |= p->IsLive();
-        }
-        DCHECK(live_alias);
-      }
-    }
-    if (info->IsLive() && (info->SReg() == INVALID_SREG)) {
-      // If not fully live, should have INVALID_SREG and def's should be null.
-      DCHECK(info->DefStart() == nullptr);
-      DCHECK(info->DefEnd() == nullptr);
-    }
-  }
-  return true;
-}
-
-/*
- * Return an updated location record with current in-register status.
- * If the value lives in live temps, reflect that fact.  No code
- * is generated.  If the live value is part of an older pair,
- * clobber both low and high.
- * TUNING: clobbering both is a bit heavy-handed, but the alternative
- * is a bit complex when dealing with FP regs.  Examine code to see
- * if it's worthwhile trying to be more clever here.
- */
-RegLocation Mir2Lir::UpdateLoc(RegLocation loc) {
-  DCHECK(!loc.wide);
-  DCHECK(CheckCorePoolSanity());
-  if (loc.location != kLocPhysReg) {
-    DCHECK((loc.location == kLocDalvikFrame) ||
-         (loc.location == kLocCompilerTemp));
-    RegStorage reg = AllocLiveReg(loc.s_reg_low, loc.ref ? kRefReg : kAnyReg, false);
-    if (reg.Valid()) {
-      bool match = true;
-      RegisterInfo* info = GetRegInfo(reg);
-      match &= !reg.IsPair();
-      match &= !info->IsWide();
-      if (match) {
-        loc.location = kLocPhysReg;
-        loc.reg = reg;
-      } else {
-        Clobber(reg);
-        FreeTemp(reg);
-      }
-    }
-    CheckRegLocation(loc);
-  }
-  return loc;
-}
-
-RegLocation Mir2Lir::UpdateLocWide(RegLocation loc) {
-  DCHECK(loc.wide);
-  DCHECK(CheckCorePoolSanity());
-  if (loc.location != kLocPhysReg) {
-    DCHECK((loc.location == kLocDalvikFrame) ||
-         (loc.location == kLocCompilerTemp));
-    RegStorage reg = AllocLiveReg(loc.s_reg_low, kAnyReg, true);
-    if (reg.Valid()) {
-      bool match = true;
-      if (reg.IsPair()) {
-        // If we've got a register pair, make sure that it was last used as the same pair.
-        RegisterInfo* info_lo = GetRegInfo(reg.GetLow());
-        RegisterInfo* info_hi = GetRegInfo(reg.GetHigh());
-        match &= info_lo->IsWide();
-        match &= info_hi->IsWide();
-        match &= (info_lo->Partner().ExactlyEquals(info_hi->GetReg()));
-        match &= (info_hi->Partner().ExactlyEquals(info_lo->GetReg()));
-      } else {
-        RegisterInfo* info = GetRegInfo(reg);
-        match &= info->IsWide();
-        match &= (info->GetReg().ExactlyEquals(info->Partner()));
-      }
-      if (match) {
-        loc.location = kLocPhysReg;
-        loc.reg = reg;
-      } else {
-        Clobber(reg);
-        FreeTemp(reg);
-      }
-    }
-    CheckRegLocation(loc);
-  }
-  return loc;
-}
-
-/* For use in cases we don't know (or care) width */
-RegLocation Mir2Lir::UpdateRawLoc(RegLocation loc) {
-  if (loc.wide)
-    return UpdateLocWide(loc);
-  else
-    return UpdateLoc(loc);
-}
-
-RegLocation Mir2Lir::EvalLocWide(RegLocation loc, int reg_class, bool update) {
-  DCHECK(loc.wide);
-
-  loc = UpdateLocWide(loc);
-
-  /* If already in registers, we can assume proper form.  Right reg class? */
-  if (loc.location == kLocPhysReg) {
-    if (!RegClassMatches(reg_class, loc.reg)) {
-      // Wrong register class.  Reallocate and transfer ownership.
-      RegStorage new_regs = AllocTypedTempWide(loc.fp, reg_class);
-      // Clobber the old regs.
-      Clobber(loc.reg);
-      // ...and mark the new ones live.
-      loc.reg = new_regs;
-      MarkWide(loc.reg);
-      MarkLive(loc);
-    }
-    CheckRegLocation(loc);
-    return loc;
-  }
-
-  DCHECK_NE(loc.s_reg_low, INVALID_SREG);
-  DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG);
-
-  loc.reg = AllocTypedTempWide(loc.fp, reg_class);
-  MarkWide(loc.reg);
-
-  if (update) {
-    loc.location = kLocPhysReg;
-    MarkLive(loc);
-  }
-  CheckRegLocation(loc);
-  return loc;
-}
-
-RegLocation Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) {
-  // Narrow reg_class if the loc is a ref.
-  if (loc.ref && reg_class == kAnyReg) {
-    reg_class = kRefReg;
-  }
-
-  if (loc.wide) {
-    return EvalLocWide(loc, reg_class, update);
-  }
-
-  loc = UpdateLoc(loc);
-
-  if (loc.location == kLocPhysReg) {
-    if (!RegClassMatches(reg_class, loc.reg)) {
-      // Wrong register class.  Reallocate and transfer ownership.
-      RegStorage new_reg = AllocTypedTemp(loc.fp, reg_class);
-      // Clobber the old reg.
-      Clobber(loc.reg);
-      // ...and mark the new one live.
-      loc.reg = new_reg;
-      MarkLive(loc);
-    }
-    CheckRegLocation(loc);
-    return loc;
-  }
-
-  DCHECK_NE(loc.s_reg_low, INVALID_SREG);
-
-  loc.reg = AllocTypedTemp(loc.fp, reg_class);
-  CheckRegLocation(loc);
-
-  if (update) {
-    loc.location = kLocPhysReg;
-    MarkLive(loc);
-  }
-  CheckRegLocation(loc);
-  return loc;
-}
-
-void Mir2Lir::AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) {
-  // NOTE: This should be in sync with functions that actually generate code for
-  // the opcodes below. However, if we get this wrong, the generated code will
-  // still be correct even if it may be sub-optimal.
-  int opcode = mir->dalvikInsn.opcode;
-  bool uses_method = false;
-  bool uses_pc_rel_load = false;
-  uint32_t dex_cache_array_offset = std::numeric_limits<uint32_t>::max();
-  switch (opcode) {
-    case Instruction::CHECK_CAST:
-    case Instruction::INSTANCE_OF: {
-      if ((opcode == Instruction::CHECK_CAST) &&
-          (mir->optimization_flags & MIR_IGNORE_CHECK_CAST) != 0) {
-        break;  // No code generated.
-      }
-      uint32_t type_idx =
-          (opcode == Instruction::CHECK_CAST) ? mir->dalvikInsn.vB : mir->dalvikInsn.vC;
-      bool type_known_final, type_known_abstract, use_declaring_class;
-      bool needs_access_check = !cu_->compiler_driver->CanAccessTypeWithoutChecks(
-          cu_->method_idx, *cu_->dex_file, type_idx,
-          &type_known_final, &type_known_abstract, &use_declaring_class);
-      if (opcode == Instruction::CHECK_CAST && !needs_access_check &&
-          cu_->compiler_driver->IsSafeCast(
-              mir_graph_->GetCurrentDexCompilationUnit(), mir->offset)) {
-        break;  // No code generated.
-      }
-      if (!needs_access_check && !use_declaring_class && CanUseOpPcRelDexCacheArrayLoad()) {
-        uses_pc_rel_load = true;  // And ignore method use in slow path.
-        dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
-      } else {
-        uses_method = true;
-      }
-      break;
-    }
-
-    case Instruction::CONST_CLASS:
-      if (CanUseOpPcRelDexCacheArrayLoad() &&
-          cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file,
-                                                           mir->dalvikInsn.vB)) {
-        uses_pc_rel_load = true;  // And ignore method use in slow path.
-        dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(mir->dalvikInsn.vB);
-      } else {
-        uses_method = true;
-      }
-      break;
-
-    case Instruction::CONST_STRING:
-    case Instruction::CONST_STRING_JUMBO:
-      if (CanUseOpPcRelDexCacheArrayLoad()) {
-        uses_pc_rel_load = true;  // And ignore method use in slow path.
-        dex_cache_array_offset = dex_cache_arrays_layout_.StringOffset(mir->dalvikInsn.vB);
-      } else {
-        uses_method = true;
-      }
-      break;
-
-    case Instruction::INVOKE_VIRTUAL:
-    case Instruction::INVOKE_SUPER:
-    case Instruction::INVOKE_DIRECT:
-    case Instruction::INVOKE_STATIC:
-    case Instruction::INVOKE_INTERFACE:
-    case Instruction::INVOKE_VIRTUAL_RANGE:
-    case Instruction::INVOKE_SUPER_RANGE:
-    case Instruction::INVOKE_DIRECT_RANGE:
-    case Instruction::INVOKE_STATIC_RANGE:
-    case Instruction::INVOKE_INTERFACE_RANGE:
-    case Instruction::INVOKE_VIRTUAL_QUICK:
-    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
-      const MirMethodLoweringInfo& info = mir_graph_->GetMethodLoweringInfo(mir);
-      InvokeType sharp_type = info.GetSharpType();
-      if (info.IsIntrinsic()) {
-        // Nothing to do, if an intrinsic uses ArtMethod* it's in the slow-path - don't count it.
-      } else if (!info.FastPath() || (sharp_type != kStatic && sharp_type != kDirect)) {
-        // Nothing to do, the generated code or entrypoint uses method from the stack.
-      } else if (info.DirectCode() != 0 && info.DirectMethod() != 0) {
-        // Nothing to do, the generated code uses method from the stack.
-      } else if (CanUseOpPcRelDexCacheArrayLoad()) {
-        uses_pc_rel_load = true;
-        dex_cache_array_offset = dex_cache_arrays_layout_.MethodOffset(mir->dalvikInsn.vB);
-      } else {
-        uses_method = true;
-      }
-      break;
-    }
-
-    case Instruction::NEW_INSTANCE:
-    case Instruction::NEW_ARRAY:
-    case Instruction::FILLED_NEW_ARRAY:
-    case Instruction::FILLED_NEW_ARRAY_RANGE:
-      uses_method = true;
-      break;
-    case Instruction::FILL_ARRAY_DATA:
-      // Nothing to do, the entrypoint uses method from the stack.
-      break;
-    case Instruction::THROW:
-      // Nothing to do, the entrypoint uses method from the stack.
-      break;
-
-    case Instruction::SGET:
-    case Instruction::SGET_WIDE:
-    case Instruction::SGET_OBJECT:
-    case Instruction::SGET_BOOLEAN:
-    case Instruction::SGET_BYTE:
-    case Instruction::SGET_CHAR:
-    case Instruction::SGET_SHORT:
-    case Instruction::SPUT:
-    case Instruction::SPUT_WIDE:
-    case Instruction::SPUT_OBJECT:
-    case Instruction::SPUT_BOOLEAN:
-    case Instruction::SPUT_BYTE:
-    case Instruction::SPUT_CHAR:
-    case Instruction::SPUT_SHORT: {
-      const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
-      bool fast = IsInstructionSGet(static_cast<Instruction::Code>(opcode))
-          ? field_info.FastGet()
-          : field_info.FastPut();
-      if (fast && (cu_->enable_debug & (1 << kDebugSlowFieldPath)) == 0) {
-        if (!field_info.IsReferrersClass() && CanUseOpPcRelDexCacheArrayLoad()) {
-          uses_pc_rel_load = true;  // And ignore method use in slow path.
-          dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex());
-        } else {
-          uses_method = true;
-        }
-      } else {
-        // Nothing to do, the entrypoint uses method from the stack.
-      }
-      break;
-    }
-
-    default:
-      break;
-  }
-  if (uses_method) {
-    core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count += weight;
-  }
-  if (uses_pc_rel_load) {
-    if (pc_rel_temp_ != nullptr) {
-      core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count += weight;
-      DCHECK_NE(dex_cache_array_offset, std::numeric_limits<uint32_t>::max());
-      dex_cache_arrays_min_offset_ = std::min(dex_cache_arrays_min_offset_, dex_cache_array_offset);
-    } else {
-      // Nothing to do, using PC-relative addressing without promoting base PC to register.
-    }
-  }
-}
-
-/* USE SSA names to count references of base Dalvik v_regs. */
-void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
-  for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) {
-    RegLocation loc = mir_graph_->reg_location_[i];
-    RefCounts* counts = loc.fp ? fp_counts : core_counts;
-    int p_map_idx = SRegToPMap(loc.s_reg_low);
-    int use_count = mir_graph_->GetUseCount(i);
-    if (loc.fp) {
-      if (loc.wide) {
-        if (WideFPRsAreAliases()) {
-          // Floats and doubles can be counted together.
-          counts[p_map_idx].count += use_count;
-        } else {
-          // Treat doubles as a unit, using upper half of fp_counts array.
-          counts[p_map_idx + num_regs].count += use_count;
-        }
-        i++;
-      } else {
-        counts[p_map_idx].count += use_count;
-      }
-    } else {
-      if (loc.wide && WideGPRsAreAliases()) {
-        i++;
-      }
-      if (!IsInexpensiveConstant(loc)) {
-        counts[p_map_idx].count += use_count;
-      }
-    }
-  }
-
-  // Now analyze the ArtMethod* and pc_rel_temp_ uses.
-  DCHECK_EQ(core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count, 0);
-  if (pc_rel_temp_ != nullptr) {
-    DCHECK_EQ(core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count, 0);
-  }
-  PreOrderDfsIterator iter(mir_graph_);
-  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-    if (bb->block_type == kDead) {
-      continue;
-    }
-    uint32_t weight = mir_graph_->GetUseCountWeight(bb);
-    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      AnalyzeMIR(core_counts, mir, weight);
-    }
-  }
-}
-
-/* qsort callback function, sort descending */
-static int SortCounts(const void *val1, const void *val2) {
-  const Mir2Lir::RefCounts* op1 = reinterpret_cast<const Mir2Lir::RefCounts*>(val1);
-  const Mir2Lir::RefCounts* op2 = reinterpret_cast<const Mir2Lir::RefCounts*>(val2);
-  // Note that we fall back to sorting on reg so we get stable output on differing qsort
-  // implementations (such as on host and target or between local host and build servers).
-  // Note also that if a wide val1 and a non-wide val2 have the same count, then val1 always
-  // ``loses'' (as STARTING_WIDE_SREG is or-ed in val1->s_reg).
-  return (op1->count == op2->count)
-          ? (op1->s_reg - op2->s_reg)
-          : (op1->count < op2->count ? 1 : -1);
-}
-
-void Mir2Lir::DumpCounts(const RefCounts* arr, int size, const char* msg) {
-  LOG(INFO) << msg;
-  for (int i = 0; i < size; i++) {
-    if ((arr[i].s_reg & STARTING_WIDE_SREG) != 0) {
-      LOG(INFO) << "s_reg[64_" << (arr[i].s_reg & ~STARTING_WIDE_SREG) << "]: " << arr[i].count;
-    } else {
-      LOG(INFO) << "s_reg[32_" << arr[i].s_reg << "]: " << arr[i].count;
-    }
-  }
-}
-
-/*
- * Note: some portions of this code required even if the kPromoteRegs
- * optimization is disabled.
- */
-void Mir2Lir::DoPromotion() {
-  int num_regs = mir_graph_->GetNumOfCodeAndTempVRs();
-  const int promotion_threshold = 1;
-  // Allocate the promotion map - one entry for each Dalvik vReg or compiler temp
-  promotion_map_ = arena_->AllocArray<PromotionMap>(num_regs, kArenaAllocRegAlloc);
-
-  // Allow target code to add any special registers
-  AdjustSpillMask();
-
-  /*
-   * Simple register promotion. Just do a static count of the uses
-   * of Dalvik registers.  Note that we examine the SSA names, but
-   * count based on original Dalvik register name.  Count refs
-   * separately based on type in order to give allocation
-   * preference to fp doubles - which must be allocated sequential
-   * physical single fp registers starting with an even-numbered
-   * reg.
-   * TUNING: replace with linear scan once we have the ability
-   * to describe register live ranges for GC.
-   */
-  size_t core_reg_count_size = WideGPRsAreAliases() ? num_regs : num_regs * 2;
-  size_t fp_reg_count_size = WideFPRsAreAliases() ? num_regs : num_regs * 2;
-  RefCounts *core_regs = arena_->AllocArray<RefCounts>(core_reg_count_size, kArenaAllocRegAlloc);
-  RefCounts *fp_regs = arena_->AllocArray<RefCounts>(fp_reg_count_size, kArenaAllocRegAlloc);
-  // Set ssa names for original Dalvik registers
-  for (int i = 0; i < num_regs; i++) {
-    core_regs[i].s_reg = fp_regs[i].s_reg = i;
-  }
-
-  // Duplicate in upper half to represent possible wide starting sregs.
-  for (size_t i = num_regs; i < fp_reg_count_size; i++) {
-    fp_regs[i].s_reg = fp_regs[i - num_regs].s_reg | STARTING_WIDE_SREG;
-  }
-  for (size_t i = num_regs; i < core_reg_count_size; i++) {
-    core_regs[i].s_reg = core_regs[i - num_regs].s_reg | STARTING_WIDE_SREG;
-  }
-
-  // Sum use counts of SSA regs by original Dalvik vreg.
-  CountRefs(core_regs, fp_regs, num_regs);
-
-  // Sort the count arrays
-  qsort(core_regs, core_reg_count_size, sizeof(RefCounts), SortCounts);
-  qsort(fp_regs, fp_reg_count_size, sizeof(RefCounts), SortCounts);
-
-  if (cu_->verbose) {
-    DumpCounts(core_regs, core_reg_count_size, "Core regs after sort");
-    DumpCounts(fp_regs, fp_reg_count_size, "Fp regs after sort");
-  }
-
-  if (!(cu_->disable_opt & (1 << kPromoteRegs))) {
-    // Promote fp regs
-    for (size_t i = 0; (i < fp_reg_count_size) && (fp_regs[i].count >= promotion_threshold); i++) {
-      int low_sreg = fp_regs[i].s_reg & ~STARTING_WIDE_SREG;
-      size_t p_map_idx = SRegToPMap(low_sreg);
-      RegStorage reg = RegStorage::InvalidReg();
-      if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) {
-        // TODO: break out the Thumb2-specific code.
-        if (cu_->instruction_set == kThumb2) {
-          bool wide = fp_regs[i].s_reg & STARTING_WIDE_SREG;
-          if (wide) {
-            if (promotion_map_[p_map_idx + 1].fp_location != kLocPhysReg) {
-              // Ignore result - if can't alloc double may still be able to alloc singles.
-              AllocPreservedDouble(low_sreg);
-            }
-            // Continue regardless of success - might still be able to grab a single.
-            continue;
-          } else {
-            reg = AllocPreservedSingle(low_sreg);
-          }
-        } else {
-          reg = AllocPreservedFpReg(low_sreg);
-        }
-        if (!reg.Valid()) {
-           break;  // No more left
-        }
-      }
-    }
-
-    // Promote core regs
-    for (size_t i = 0; (i < core_reg_count_size) &&
-         (core_regs[i].count >= promotion_threshold); i++) {
-      int low_sreg = core_regs[i].s_reg & ~STARTING_WIDE_SREG;
-      size_t p_map_idx = SRegToPMap(low_sreg);
-      if (promotion_map_[p_map_idx].core_location != kLocPhysReg) {
-        RegStorage reg = AllocPreservedCoreReg(low_sreg);
-        if (!reg.Valid()) {
-           break;  // No more left
-        }
-      }
-    }
-  }
-
-  // Now, update SSA names to new home locations
-  for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) {
-    RegLocation *curr = &mir_graph_->reg_location_[i];
-    int p_map_idx = SRegToPMap(curr->s_reg_low);
-    int reg_num = curr->fp ? promotion_map_[p_map_idx].fp_reg : promotion_map_[p_map_idx].core_reg;
-    bool wide = curr->wide || (cu_->target64 && curr->ref);
-    RegStorage reg = RegStorage::InvalidReg();
-    if (curr->fp && promotion_map_[p_map_idx].fp_location == kLocPhysReg) {
-      if (wide && cu_->instruction_set == kThumb2) {
-        if (promotion_map_[p_map_idx + 1].fp_location == kLocPhysReg) {
-          int high_reg = promotion_map_[p_map_idx+1].fp_reg;
-          // TODO: move target-specific restrictions out of here.
-          if (((reg_num & 0x1) == 0) && ((reg_num + 1) == high_reg)) {
-            reg = RegStorage::FloatSolo64(RegStorage::RegNum(reg_num) >> 1);
-          }
-        }
-      } else {
-        reg = wide ? RegStorage::FloatSolo64(reg_num) : RegStorage::FloatSolo32(reg_num);
-      }
-    } else if (!curr->fp && promotion_map_[p_map_idx].core_location == kLocPhysReg) {
-      if (wide && !cu_->target64) {
-        if (promotion_map_[p_map_idx + 1].core_location == kLocPhysReg) {
-          int high_reg = promotion_map_[p_map_idx+1].core_reg;
-          reg = RegStorage(RegStorage::k64BitPair, reg_num, high_reg);
-        }
-      } else {
-        reg = wide ? RegStorage::Solo64(reg_num) : RegStorage::Solo32(reg_num);
-      }
-    }
-    if (reg.Valid()) {
-      curr->reg = reg;
-      curr->location = kLocPhysReg;
-      curr->home = true;
-    }
-  }
-  if (cu_->verbose) {
-    DumpPromotionMap();
-  }
-}
-
-/* Returns sp-relative offset in bytes for a VReg */
-int Mir2Lir::VRegOffset(int v_reg) {
-  const DexFile::CodeItem* code_item = mir_graph_->GetCurrentDexCompilationUnit()->GetCodeItem();
-  return StackVisitor::GetVRegOffsetFromQuickCode(code_item, core_spill_mask_,
-                                                  fp_spill_mask_, frame_size_, v_reg,
-                                                  cu_->instruction_set);
-}
-
-/* Returns sp-relative offset in bytes for a SReg */
-int Mir2Lir::SRegOffset(int s_reg) {
-  return VRegOffset(mir_graph_->SRegToVReg(s_reg));
-}
-
-/* Mark register usage state and return long retloc */
-RegLocation Mir2Lir::GetReturnWide(RegisterClass reg_class) {
-  RegLocation res;
-  switch (reg_class) {
-    case kRefReg: LOG(FATAL); break;
-    case kFPReg: res = LocCReturnDouble(); break;
-    default: res = LocCReturnWide(); break;
-  }
-  Clobber(res.reg);
-  LockTemp(res.reg);
-  MarkWide(res.reg);
-  CheckRegLocation(res);
-  return res;
-}
-
-RegLocation Mir2Lir::GetReturn(RegisterClass reg_class) {
-  RegLocation res;
-  switch (reg_class) {
-    case kRefReg: res = LocCReturnRef(); break;
-    case kFPReg: res = LocCReturnFloat(); break;
-    default: res = LocCReturn(); break;
-  }
-  Clobber(res.reg);
-  if (cu_->instruction_set == kMips || cu_->instruction_set == kMips64) {
-    MarkInUse(res.reg);
-  } else {
-    LockTemp(res.reg);
-  }
-  CheckRegLocation(res);
-  return res;
-}
-
-void Mir2Lir::SimpleRegAlloc() {
-  DoPromotion();
-
-  if (cu_->verbose && !(cu_->disable_opt & (1 << kPromoteRegs))) {
-    LOG(INFO) << "After Promotion";
-    mir_graph_->DumpRegLocTable(mir_graph_->reg_location_, mir_graph_->GetNumSSARegs());
-  }
-
-  /* Set the frame size */
-  frame_size_ = ComputeFrameSize();
-}
-
-/*
- * Get the "real" sreg number associated with an s_reg slot.  In general,
- * s_reg values passed through codegen are the SSA names created by
- * dataflow analysis and refer to slot numbers in the mir_graph_->reg_location
- * array.  However, renaming is accomplished by simply replacing RegLocation
- * entries in the reglocation[] array.  Therefore, when location
- * records for operands are first created, we need to ask the locRecord
- * identified by the dataflow pass what it's new name is.
- */
-int Mir2Lir::GetSRegHi(int lowSreg) {
-  return (lowSreg == INVALID_SREG) ? INVALID_SREG : lowSreg + 1;
-}
-
-bool Mir2Lir::LiveOut(int s_reg ATTRIBUTE_UNUSED) {
-  // For now.
-  return true;
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/resource_mask.cc b/compiler/dex/quick/resource_mask.cc
deleted file mode 100644
index 817a69a..0000000
--- a/compiler/dex/quick/resource_mask.cc
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <iomanip>
-
-#include "resource_mask.h"
-
-#include "base/bit_utils.h"
-#include "base/arena_allocator.h"
-#include "base/logging.h"
-
-namespace art {
-
-namespace {  // anonymous namespace
-
-constexpr ResourceMask kNoRegMasks[] = {
-    kEncodeNone,
-    kEncodeHeapRef,
-    kEncodeLiteral,
-    kEncodeDalvikReg,
-    ResourceMask::Bit(ResourceMask::kFPStatus),
-    ResourceMask::Bit(ResourceMask::kCCode),
-};
-// The 127-bit is the same as CLZ(masks_[1]) for a ResourceMask with only that bit set.
-static_assert(kNoRegMasks[127-ResourceMask::kHeapRef].Equals(
-    kEncodeHeapRef), "kNoRegMasks heap ref index unexpected");
-static_assert(kNoRegMasks[127-ResourceMask::kLiteral].Equals(
-    kEncodeLiteral), "kNoRegMasks literal index unexpected");
-static_assert(kNoRegMasks[127-ResourceMask::kDalvikReg].Equals(
-    kEncodeDalvikReg), "kNoRegMasks dalvik reg index unexpected");
-static_assert(kNoRegMasks[127-ResourceMask::kFPStatus].Equals(
-    ResourceMask::Bit(ResourceMask::kFPStatus)), "kNoRegMasks fp status index unexpected");
-static_assert(kNoRegMasks[127-ResourceMask::kCCode].Equals(
-    ResourceMask::Bit(ResourceMask::kCCode)), "kNoRegMasks ccode index unexpected");
-
-template <size_t special_bit>
-constexpr ResourceMask OneRegOneSpecial(size_t reg) {
-  return ResourceMask::Bit(reg).Union(ResourceMask::Bit(special_bit));
-}
-
-// NOTE: Working around gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61484 .
-// This should be a two-dimensions array, kSingleRegMasks[][32] and each line should be
-// enclosed in an extra { }. However, gcc issues a bogus "error: array must be initialized
-// with a brace-enclosed initializer" for that, so we flatten this to a one-dimensional array.
-constexpr ResourceMask kSingleRegMasks[] = {
-#define DEFINE_LIST_32(fn) \
-    fn(0), fn(1), fn(2), fn(3), fn(4), fn(5), fn(6), fn(7),           \
-    fn(8), fn(9), fn(10), fn(11), fn(12), fn(13), fn(14), fn(15),     \
-    fn(16), fn(17), fn(18), fn(19), fn(20), fn(21), fn(22), fn(23),   \
-    fn(24), fn(25), fn(26), fn(27), fn(28), fn(29), fn(30), fn(31)
-    // NOTE: Each line is 512B of constant data, 3KiB in total.
-    DEFINE_LIST_32(ResourceMask::Bit),
-    DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kHeapRef>),
-    DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kLiteral>),
-    DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kDalvikReg>),
-    DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kFPStatus>),
-    DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kCCode>),
-#undef DEFINE_LIST_32
-};
-
-constexpr size_t SingleRegMaskIndex(size_t main_index, size_t sub_index) {
-  return main_index * 32u + sub_index;
-}
-
-// The 127-bit is the same as CLZ(masks_[1]) for a ResourceMask with only that bit set.
-static_assert(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kHeapRef, 0)].Equals(
-    OneRegOneSpecial<ResourceMask::kHeapRef>(0)), "kSingleRegMasks heap ref index unexpected");
-static_assert(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kLiteral, 0)].Equals(
-    OneRegOneSpecial<ResourceMask::kLiteral>(0)), "kSingleRegMasks literal index  unexpected");
-static_assert(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kDalvikReg, 0)].Equals(
-    OneRegOneSpecial<ResourceMask::kDalvikReg>(0)), "kSingleRegMasks dalvik reg index unexpected");
-static_assert(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kFPStatus, 0)].Equals(
-    OneRegOneSpecial<ResourceMask::kFPStatus>(0)), "kSingleRegMasks fp status index unexpected");
-static_assert(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kCCode, 0)].Equals(
-    OneRegOneSpecial<ResourceMask::kCCode>(0)), "kSingleRegMasks ccode index unexpected");
-
-// NOTE: arraysize(kNoRegMasks) multiplied by 32 due to the gcc bug workaround, see above.
-static_assert(arraysize(kSingleRegMasks) == arraysize(kNoRegMasks) * 32, "arraysizes unexpected");
-
-constexpr ResourceMask kTwoRegsMasks[] = {
-#define TWO(a, b) ResourceMask::Bit(a).Union(ResourceMask::Bit(b))
-    // NOTE: 16 * 15 / 2 = 120 entries, 16 bytes each, 1920B in total.
-    TWO(0, 1),
-    TWO(0, 2), TWO(1, 2),
-    TWO(0, 3), TWO(1, 3), TWO(2, 3),
-    TWO(0, 4), TWO(1, 4), TWO(2, 4), TWO(3, 4),
-    TWO(0, 5), TWO(1, 5), TWO(2, 5), TWO(3, 5), TWO(4, 5),
-    TWO(0, 6), TWO(1, 6), TWO(2, 6), TWO(3, 6), TWO(4, 6), TWO(5, 6),
-    TWO(0, 7), TWO(1, 7), TWO(2, 7), TWO(3, 7), TWO(4, 7), TWO(5, 7), TWO(6, 7),
-    TWO(0, 8), TWO(1, 8), TWO(2, 8), TWO(3, 8), TWO(4, 8), TWO(5, 8), TWO(6, 8), TWO(7, 8),
-    TWO(0, 9), TWO(1, 9), TWO(2, 9), TWO(3, 9), TWO(4, 9), TWO(5, 9), TWO(6, 9), TWO(7, 9),
-        TWO(8, 9),
-    TWO(0, 10), TWO(1, 10), TWO(2, 10), TWO(3, 10), TWO(4, 10), TWO(5, 10), TWO(6, 10), TWO(7, 10),
-        TWO(8, 10), TWO(9, 10),
-    TWO(0, 11), TWO(1, 11), TWO(2, 11), TWO(3, 11), TWO(4, 11), TWO(5, 11), TWO(6, 11), TWO(7, 11),
-        TWO(8, 11), TWO(9, 11), TWO(10, 11),
-    TWO(0, 12), TWO(1, 12), TWO(2, 12), TWO(3, 12), TWO(4, 12), TWO(5, 12), TWO(6, 12), TWO(7, 12),
-        TWO(8, 12), TWO(9, 12), TWO(10, 12), TWO(11, 12),
-    TWO(0, 13), TWO(1, 13), TWO(2, 13), TWO(3, 13), TWO(4, 13), TWO(5, 13), TWO(6, 13), TWO(7, 13),
-        TWO(8, 13), TWO(9, 13), TWO(10, 13), TWO(11, 13), TWO(12, 13),
-    TWO(0, 14), TWO(1, 14), TWO(2, 14), TWO(3, 14), TWO(4, 14), TWO(5, 14), TWO(6, 14), TWO(7, 14),
-        TWO(8, 14), TWO(9, 14), TWO(10, 14), TWO(11, 14), TWO(12, 14), TWO(13, 14),
-    TWO(0, 15), TWO(1, 15), TWO(2, 15), TWO(3, 15), TWO(4, 15), TWO(5, 15), TWO(6, 15), TWO(7, 15),
-        TWO(8, 15), TWO(9, 15), TWO(10, 15), TWO(11, 15), TWO(12, 15), TWO(13, 15), TWO(14, 15),
-#undef TWO
-};
-static_assert(arraysize(kTwoRegsMasks) ==  16 * 15 / 2, "arraysize of kTwoRegsMasks unexpected");
-
-constexpr size_t TwoRegsIndex(size_t higher, size_t lower) {
-  return (higher * (higher - 1)) / 2u + lower;
-}
-
-constexpr bool CheckTwoRegsMask(size_t higher, size_t lower) {
-  return ResourceMask::Bit(lower).Union(ResourceMask::Bit(higher)).Equals(
-      kTwoRegsMasks[TwoRegsIndex(higher, lower)]);
-}
-
-constexpr bool CheckTwoRegsMaskLine(size_t line, size_t lower = 0u) {
-  return (lower == line) ||
-      (CheckTwoRegsMask(line, lower) && CheckTwoRegsMaskLine(line, lower + 1u));
-}
-
-constexpr bool CheckTwoRegsMaskTable(size_t lines) {
-  return lines == 0 ||
-      (CheckTwoRegsMaskLine(lines - 1) && CheckTwoRegsMaskTable(lines - 1u));
-}
-
-static_assert(CheckTwoRegsMaskTable(16), "two regs masks table check failed");
-
-}  // anonymous namespace
-
-const ResourceMask* ResourceMaskCache::GetMask(const ResourceMask& mask) {
-  // Instead of having a deduplication map, we shall just use pre-defined constexpr
-  // masks for the common cases. At most one of the these special bits is allowed:
-  constexpr ResourceMask kAllowedSpecialBits = ResourceMask::Bit(ResourceMask::kFPStatus)
-      .Union(ResourceMask::Bit(ResourceMask::kCCode))
-      .Union(kEncodeHeapRef).Union(kEncodeLiteral).Union(kEncodeDalvikReg);
-  const ResourceMask* res = nullptr;
-  // Limit to low 32 regs and the kAllowedSpecialBits.
-  if ((mask.masks_[0] >> 32) == 0u && (mask.masks_[1] & ~kAllowedSpecialBits.masks_[1]) == 0u) {
-    // Check if it's only up to two registers.
-    uint32_t low_regs = static_cast<uint32_t>(mask.masks_[0]);
-    uint32_t low_regs_without_lowest = low_regs & (low_regs - 1u);
-    if (low_regs_without_lowest == 0u && IsPowerOfTwo(mask.masks_[1])) {
-      // 0 or 1 register, 0 or 1 bit from kAllowedBits. Use a pre-defined mask.
-      size_t index = (mask.masks_[1] != 0u) ? CLZ(mask.masks_[1]) : 0u;
-      DCHECK_LT(index, arraysize(kNoRegMasks));
-      res = (low_regs != 0) ? &kSingleRegMasks[SingleRegMaskIndex(index, CTZ(low_regs))]
-                            : &kNoRegMasks[index];
-    } else if (IsPowerOfTwo(low_regs_without_lowest) && mask.masks_[1] == 0u) {
-      // 2 registers and no other flags. Use predefined mask if higher reg is < 16.
-      if (low_regs_without_lowest < (1u << 16)) {
-        res = &kTwoRegsMasks[TwoRegsIndex(CTZ(low_regs_without_lowest), CTZ(low_regs))];
-      }
-    }
-  } else if (mask.Equals(kEncodeAll)) {
-    res = &kEncodeAll;
-  }
-  if (res != nullptr) {
-    DCHECK(res->Equals(mask))
-        << "(" << std::hex << std::setw(16) << mask.masks_[0]
-        << ", "<< std::hex << std::setw(16) << mask.masks_[1]
-        << ") != (" << std::hex << std::setw(16) << res->masks_[0]
-        << ", "<< std::hex << std::setw(16) << res->masks_[1] << ")";
-    return res;
-  }
-
-  // TODO: Deduplicate. (At least the most common masks.)
-  void* mem = allocator_->Alloc(sizeof(ResourceMask), kArenaAllocLIRResourceMask);
-  return new (mem) ResourceMask(mask);
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/resource_mask.h b/compiler/dex/quick/resource_mask.h
deleted file mode 100644
index 78e81b2..0000000
--- a/compiler/dex/quick/resource_mask.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_RESOURCE_MASK_H_
-#define ART_COMPILER_DEX_QUICK_RESOURCE_MASK_H_
-
-#include <stdint.h>
-
-#include "base/logging.h"
-#include "base/value_object.h"
-#include "dex/reg_storage.h"
-
-namespace art {
-
-class ArenaAllocator;
-
-/**
- * @brief Resource mask for LIR insn uses or defs.
- * @detail Def/Use mask used for checking dependencies between LIR insns in local
- * optimizations such as load hoisting.
- */
-class ResourceMask {
- private:
-  constexpr ResourceMask(uint64_t mask1, uint64_t mask2)
-      : masks_{ mask1, mask2 } {  // NOLINT
-  }
-
- public:
-  /*
-   * Def/Use encoding in 128-bit use_mask/def_mask.  Low positions used for target-specific
-   * registers (and typically use the register number as the position).  High positions
-   * reserved for common and abstract resources.
-   */
-  enum ResourceBit {
-    kMustNotAlias = 127,
-    kHeapRef = 126,         // Default memory reference type.
-    kLiteral = 125,         // Literal pool memory reference.
-    kDalvikReg = 124,       // Dalvik v_reg memory reference.
-    kFPStatus = 123,
-    kCCode = 122,
-    kLowestCommonResource = kCCode,
-    kHighestCommonResource = kMustNotAlias
-  };
-
-  // Default-constructible.
-  constexpr ResourceMask()
-    : masks_ { 0u, 0u } {
-  }
-
-  // Copy-constructible and copyable.
-  ResourceMask(const ResourceMask& other) = default;
-  ResourceMask& operator=(const ResourceMask& other) = default;
-
-  // Comparable by content.
-  bool operator==(const ResourceMask& other) {
-    return masks_[0] == other.masks_[0] && masks_[1] == other.masks_[1];
-  }
-
-  static constexpr ResourceMask RawMask(uint64_t mask1, uint64_t mask2) {
-    return ResourceMask(mask1, mask2);
-  }
-
-  static constexpr ResourceMask Bit(size_t bit) {
-    return ResourceMask(bit >= 64u ? 0u : UINT64_C(1) << bit,
-                        bit >= 64u ? UINT64_C(1) << (bit - 64u) : 0u);
-  }
-
-  // Two consecutive bits. The start_bit must be even.
-  static constexpr ResourceMask TwoBits(size_t start_bit) {
-    return
-        DCHECK_CONSTEXPR((start_bit & 1u) == 0u, << start_bit << " isn't even", Bit(0))
-        ResourceMask(start_bit >= 64u ? 0u : UINT64_C(3) << start_bit,
-                     start_bit >= 64u ? UINT64_C(3) << (start_bit - 64u) : 0u);
-  }
-
-  static constexpr ResourceMask NoBits() {
-    return ResourceMask(UINT64_C(0), UINT64_C(0));
-  }
-
-  static constexpr ResourceMask AllBits() {
-    return ResourceMask(~UINT64_C(0), ~UINT64_C(0));
-  }
-
-  constexpr ResourceMask Union(const ResourceMask& other) const {
-    return ResourceMask(masks_[0] | other.masks_[0], masks_[1] | other.masks_[1]);
-  }
-
-  constexpr ResourceMask Intersection(const ResourceMask& other) const {
-    return ResourceMask(masks_[0] & other.masks_[0], masks_[1] & other.masks_[1]);
-  }
-
-  constexpr ResourceMask Without(const ResourceMask& other) const {
-    return ResourceMask(masks_[0] & ~other.masks_[0], masks_[1] & ~other.masks_[1]);
-  }
-
-  constexpr bool Equals(const ResourceMask& other) const {
-    return masks_[0] == other.masks_[0] && masks_[1] == other.masks_[1];
-  }
-
-  constexpr bool Intersects(const ResourceMask& other) const {
-    return (masks_[0] & other.masks_[0]) != 0u || (masks_[1] & other.masks_[1]) != 0u;
-  }
-
-  void SetBit(size_t bit);
-
-  constexpr bool HasBit(size_t bit) const {
-    return (masks_[bit / 64u] & (UINT64_C(1) << (bit & 63u))) != 0u;
-  }
-
-  ResourceMask& SetBits(const ResourceMask& other) {
-    masks_[0] |= other.masks_[0];
-    masks_[1] |= other.masks_[1];
-    return *this;
-  }
-
-  ResourceMask& ClearBits(const ResourceMask& other) {
-    masks_[0] &= ~other.masks_[0];
-    masks_[1] &= ~other.masks_[1];
-    return *this;
-  }
-
- private:
-  uint64_t masks_[2];
-
-  friend class ResourceMaskCache;
-};
-std::ostream& operator<<(std::ostream& os, const ResourceMask::ResourceBit& rhs);
-
-inline void ResourceMask::SetBit(size_t bit) {
-  DCHECK_LE(bit, kHighestCommonResource);
-  masks_[bit / 64u] |= UINT64_C(1) << (bit & 63u);
-}
-
-constexpr ResourceMask kEncodeNone = ResourceMask::NoBits();
-constexpr ResourceMask kEncodeAll = ResourceMask::AllBits();
-constexpr ResourceMask kEncodeHeapRef = ResourceMask::Bit(ResourceMask::kHeapRef);
-constexpr ResourceMask kEncodeLiteral = ResourceMask::Bit(ResourceMask::kLiteral);
-constexpr ResourceMask kEncodeDalvikReg = ResourceMask::Bit(ResourceMask::kDalvikReg);
-constexpr ResourceMask kEncodeMem = kEncodeLiteral.Union(kEncodeDalvikReg).Union(
-    kEncodeHeapRef).Union(ResourceMask::Bit(ResourceMask::kMustNotAlias));
-
-class ResourceMaskCache {
- public:
-  explicit ResourceMaskCache(ArenaAllocator* allocator)
-      : allocator_(allocator) {
-  }
-
-  const ResourceMask* GetMask(const ResourceMask& mask);
-
- private:
-  ArenaAllocator* allocator_;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_RESOURCE_MASK_H_
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
deleted file mode 100644
index e5d3841..0000000
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ /dev/null
@@ -1,2072 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "codegen_x86.h"
-
-#include "base/bit_utils.h"
-#include "base/logging.h"
-#include "dex/compiler_ir.h"
-#include "dex/quick/mir_to_lir.h"
-#include "oat.h"
-#include "oat_quick_method_header.h"
-#include "utils.h"
-#include "x86_lir.h"
-
-namespace art {
-
-#define MAX_ASSEMBLER_RETRIES 50
-
-const X86EncodingMap X86Mir2Lir::EncodingMap[kX86Last] = {
-  { kX8632BitData, kData,    IS_UNARY_OP,            { 0, 0, 0x00, 0, 0, 0, 0, 4, false }, "data",  "0x!0d" },
-  { kX86Bkpt,      kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xCC, 0, 0, 0, 0, 0, false }, "int 3", "" },
-  { kX86Nop,       kNop,     NO_OPERAND,             { 0, 0, 0x90, 0, 0, 0, 0, 0, false }, "nop",   "" },
-
-#define ENCODING_MAP(opname, mem_use, reg_def, uses_ccodes, \
-                     rm8_r8, rm32_r32, \
-                     r8_rm8, r32_rm32, \
-                     ax8_i8, ax32_i32, \
-                     rm8_i8, rm8_i8_modrm, \
-                     rm32_i32, rm32_i32_modrm, \
-                     rm32_i8, rm32_i8_modrm) \
-{ kX86 ## opname ## 8MR, kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0,             0, rm8_r8, 0, 0, 0,            0,      0, true }, #opname "8MR", "[!0r+!1d],!2r" }, \
-{ kX86 ## opname ## 8AR, kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0,             0, rm8_r8, 0, 0, 0,            0,      0, true }, #opname "8AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
-{ kX86 ## opname ## 8TR, kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_r8, 0, 0, 0,            0,      0, true }, #opname "8TR", "fs:[!0d],!1r" }, \
-{ kX86 ## opname ## 8RR, kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r8_rm8, 0, 0, 0,            0,      0, true }, #opname "8RR", "!0r,!1r" }, \
-{ kX86 ## opname ## 8RM, kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r8_rm8, 0, 0, 0,            0,      0, true }, #opname "8RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## 8RA, kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0,             0, r8_rm8, 0, 0, 0,            0,      0, true }, #opname "8RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
-{ kX86 ## opname ## 8RT, kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r8_rm8, 0, 0, 0,            0,      0, true }, #opname "8RT", "!0r,fs:[!1d]" }, \
-{ kX86 ## opname ## 8RI, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm8_i8, 0, 0, rm8_i8_modrm, ax8_i8, 1, true }, #opname "8RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 8MI, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm8_i8, 0, 0, rm8_i8_modrm, 0,      1, false}, #opname "8MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 8AI, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm8_i8, 0, 0, rm8_i8_modrm, 0,      1, false}, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 8TI, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0,      1, false}, #opname "8TI", "fs:[!0d],!1d" }, \
-  \
-{ kX86 ## opname ## 16MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_r32, 0, 0, 0,              0,        0, false }, #opname "16MR", "[!0r+!1d],!2r" }, \
-{ kX86 ## opname ## 16AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_r32, 0, 0, 0,              0,        0, false }, #opname "16AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
-{ kX86 ## opname ## 16TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "16TR", "fs:[!0d],!1r" }, \
-{ kX86 ## opname ## 16RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    r32_rm32, 0, 0, 0,              0,        0, false }, #opname "16RR", "!0r,!1r" }, \
-{ kX86 ## opname ## 16RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    r32_rm32, 0, 0, 0,              0,        0, false }, #opname "16RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## 16RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0x66,          0,    r32_rm32, 0, 0, 0,              0,        0, false }, #opname "16RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
-{ kX86 ## opname ## 16RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "16RT", "!0r,fs:[!1d]" }, \
-{ kX86 ## opname ## 16RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 2, false }, #opname "16RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 16MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i32, 0, 0, rm32_i32_modrm, 0,        2, false }, #opname "16MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 16AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i32, 0, 0, rm32_i32_modrm, 0,        2, false }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 16TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i32, 0, 0, rm32_i32_modrm, 0,        2, false }, #opname "16TI", "fs:[!0d],!1d" }, \
-{ kX86 ## opname ## 16RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "16RI8", "!0r,!1d" }, \
-{ kX86 ## opname ## 16MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "16MI8", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 16AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0x66,          0,    rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "16AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 16TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "16TI8", "fs:[!0d],!1d" }, \
-  \
-{ kX86 ## opname ## 32MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "32MR", "[!0r+!1d],!2r" }, \
-{ kX86 ## opname ## 32AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "32AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
-{ kX86 ## opname ## 32TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "32TR", "fs:[!0d],!1r" }, \
-{ kX86 ## opname ## 32RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "32RR", "!0r,!1r" }, \
-{ kX86 ## opname ## 32RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "32RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## 32RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
-{ kX86 ## opname ## 32RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "32RT", "!0r,fs:[!1d]" }, \
-{ kX86 ## opname ## 32RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4, false }, #opname "32RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 32MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "32MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 32AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 32TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "32TI", "fs:[!0d],!1d" }, \
-{ kX86 ## opname ## 32RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "32RI8", "!0r,!1d" }, \
-{ kX86 ## opname ## 32MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "32MI8", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 32AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "32AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 32TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "32TI8", "fs:[!0d],!1d" }, \
-  \
-{ kX86 ## opname ## 64MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "64MR", "[!0r+!1d],!2r" }, \
-{ kX86 ## opname ## 64AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "64AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
-{ kX86 ## opname ## 64TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_r32, 0, 0, 0,              0,        0, false }, #opname "64TR", "fs:[!0d],!1r" }, \
-{ kX86 ## opname ## 64RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "64RR", "!0r,!1r" }, \
-{ kX86 ## opname ## 64RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "64RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## 64RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
-{ kX86 ## opname ## 64RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, r32_rm32, 0, 0, 0,              0,        0, false }, #opname "64RT", "!0r,fs:[!1d]" }, \
-{ kX86 ## opname ## 64RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4, false }, #opname "64RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 64MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "64MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 64AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 64TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4, false }, #opname "64TI", "fs:[!0d],!1d" }, \
-{ kX86 ## opname ## 64RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "64RI8", "!0r,!1d" }, \
-{ kX86 ## opname ## 64MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "64MI8", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 64AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "64AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 64TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1, false }, #opname "64TI8", "fs:[!0d],!1d" }
-
-ENCODING_MAP(Add, IS_LOAD | IS_STORE, REG_DEF0, 0,
-  0x00 /* RegMem8/Reg8 */,     0x01 /* RegMem32/Reg32 */,
-  0x02 /* Reg8/RegMem8 */,     0x03 /* Reg32/RegMem32 */,
-  0x04 /* Rax8/imm8 opcode */, 0x05 /* Rax32/imm32 */,
-  0x80, 0x0 /* RegMem8/imm8 */,
-  0x81, 0x0 /* RegMem32/imm32 */, 0x83, 0x0 /* RegMem32/imm8 */),
-ENCODING_MAP(Or, IS_LOAD | IS_STORE, REG_DEF0, 0,
-  0x08 /* RegMem8/Reg8 */,     0x09 /* RegMem32/Reg32 */,
-  0x0A /* Reg8/RegMem8 */,     0x0B /* Reg32/RegMem32 */,
-  0x0C /* Rax8/imm8 opcode */, 0x0D /* Rax32/imm32 */,
-  0x80, 0x1 /* RegMem8/imm8 */,
-  0x81, 0x1 /* RegMem32/imm32 */, 0x83, 0x1 /* RegMem32/imm8 */),
-ENCODING_MAP(Adc, IS_LOAD | IS_STORE, REG_DEF0, USES_CCODES,
-  0x10 /* RegMem8/Reg8 */,     0x11 /* RegMem32/Reg32 */,
-  0x12 /* Reg8/RegMem8 */,     0x13 /* Reg32/RegMem32 */,
-  0x14 /* Rax8/imm8 opcode */, 0x15 /* Rax32/imm32 */,
-  0x80, 0x2 /* RegMem8/imm8 */,
-  0x81, 0x2 /* RegMem32/imm32 */, 0x83, 0x2 /* RegMem32/imm8 */),
-ENCODING_MAP(Sbb, IS_LOAD | IS_STORE, REG_DEF0, USES_CCODES,
-  0x18 /* RegMem8/Reg8 */,     0x19 /* RegMem32/Reg32 */,
-  0x1A /* Reg8/RegMem8 */,     0x1B /* Reg32/RegMem32 */,
-  0x1C /* Rax8/imm8 opcode */, 0x1D /* Rax32/imm32 */,
-  0x80, 0x3 /* RegMem8/imm8 */,
-  0x81, 0x3 /* RegMem32/imm32 */, 0x83, 0x3 /* RegMem32/imm8 */),
-ENCODING_MAP(And, IS_LOAD | IS_STORE, REG_DEF0, 0,
-  0x20 /* RegMem8/Reg8 */,     0x21 /* RegMem32/Reg32 */,
-  0x22 /* Reg8/RegMem8 */,     0x23 /* Reg32/RegMem32 */,
-  0x24 /* Rax8/imm8 opcode */, 0x25 /* Rax32/imm32 */,
-  0x80, 0x4 /* RegMem8/imm8 */,
-  0x81, 0x4 /* RegMem32/imm32 */, 0x83, 0x4 /* RegMem32/imm8 */),
-ENCODING_MAP(Sub, IS_LOAD | IS_STORE, REG_DEF0, 0,
-  0x28 /* RegMem8/Reg8 */,     0x29 /* RegMem32/Reg32 */,
-  0x2A /* Reg8/RegMem8 */,     0x2B /* Reg32/RegMem32 */,
-  0x2C /* Rax8/imm8 opcode */, 0x2D /* Rax32/imm32 */,
-  0x80, 0x5 /* RegMem8/imm8 */,
-  0x81, 0x5 /* RegMem32/imm32 */, 0x83, 0x5 /* RegMem32/imm8 */),
-ENCODING_MAP(Xor, IS_LOAD | IS_STORE, REG_DEF0, 0,
-  0x30 /* RegMem8/Reg8 */,     0x31 /* RegMem32/Reg32 */,
-  0x32 /* Reg8/RegMem8 */,     0x33 /* Reg32/RegMem32 */,
-  0x34 /* Rax8/imm8 opcode */, 0x35 /* Rax32/imm32 */,
-  0x80, 0x6 /* RegMem8/imm8 */,
-  0x81, 0x6 /* RegMem32/imm32 */, 0x83, 0x6 /* RegMem32/imm8 */),
-ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
-  0x38 /* RegMem8/Reg8 */,     0x39 /* RegMem32/Reg32 */,
-  0x3A /* Reg8/RegMem8 */,     0x3B /* Reg32/RegMem32 */,
-  0x3C /* Rax8/imm8 opcode */, 0x3D /* Rax32/imm32 */,
-  0x80, 0x7 /* RegMem8/imm8 */,
-  0x81, 0x7 /* RegMem32/imm32 */, 0x83, 0x7 /* RegMem32/imm8 */),
-#undef ENCODING_MAP
-
-  { kX86Imul16RRI,   kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2, false }, "Imul16RRI", "!0r,!1r,!2d" },
-  { kX86Imul16RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2, false }, "Imul16RMI", "!0r,[!1r+!2d],!3d" },
-  { kX86Imul16RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2, false }, "Imul16RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
-
-  { kX86Imul32RRI,   kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul32RRI", "!0r,!1r,!2d" },
-  { kX86Imul32RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul32RMI", "!0r,[!1r+!2d],!3d" },
-  { kX86Imul32RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul32RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
-  { kX86Imul32RRI8,  kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul32RRI8", "!0r,!1r,!2d" },
-  { kX86Imul32RMI8,  kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul32RMI8", "!0r,[!1r+!2d],!3d" },
-  { kX86Imul32RAI8,  kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul32RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
-
-  { kX86Imul64RRI,   kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul64RRI", "!0r,!1r,!2d" },
-  { kX86Imul64RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul64RMI", "!0r,[!1r+!2d],!3d" },
-  { kX86Imul64RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul64RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
-  { kX86Imul64RRI8,  kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul64RRI8", "!0r,!1r,!2d" },
-  { kX86Imul64RMI8,  kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul64RMI8", "!0r,[!1r+!2d],!3d" },
-  { kX86Imul64RAI8,  kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul64RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
-
-  { kX86Mov8MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0,             0, 0x88, 0, 0, 0, 0, 0, true }, "Mov8MR", "[!0r+!1d],!2r" },
-  { kX86Mov8AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0,             0, 0x88, 0, 0, 0, 0, 0, true }, "Mov8AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Mov8TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x88, 0, 0, 0, 0, 0, true }, "Mov8TR", "fs:[!0d],!1r" },
-  { kX86Mov8RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RR", "!0r,!1r" },
-  { kX86Mov8RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RM", "!0r,[!1r+!2d]" },
-  { kX86Mov8RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0,             0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86Mov8RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RT", "!0r,fs:[!1d]" },
-  { kX86Mov8RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0,             0, 0xB0, 0, 0, 0, 0, 1, true }, "Mov8RI", "!0r,!1d" },
-  { kX86Mov8MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0,             0, 0xC6, 0, 0, 0, 0, 1, false}, "Mov8MI", "[!0r+!1d],!2d" },
-  { kX86Mov8AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0,             0, 0xC6, 0, 0, 0, 0, 1, false}, "Mov8AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Mov8TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0, 0xC6, 0, 0, 0, 0, 1, false}, "Mov8TI", "fs:[!0d],!1d" },
-
-  { kX86Mov16MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0x66,          0,    0x89, 0, 0, 0, 0, 0, false }, "Mov16MR", "[!0r+!1d],!2r" },
-  { kX86Mov16AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0x66,          0,    0x89, 0, 0, 0, 0, 0, false }, "Mov16AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Mov16TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0x66, 0x89, 0, 0, 0, 0, 0, false }, "Mov16TR", "fs:[!0d],!1r" },
-  { kX86Mov16RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0x66,          0,    0x8B, 0, 0, 0, 0, 0, false }, "Mov16RR", "!0r,!1r" },
-  { kX86Mov16RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0x66,          0,    0x8B, 0, 0, 0, 0, 0, false }, "Mov16RM", "!0r,[!1r+!2d]" },
-  { kX86Mov16RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0x66,          0,    0x8B, 0, 0, 0, 0, 0, false }, "Mov16RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86Mov16RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0x66, 0x8B, 0, 0, 0, 0, 0, false }, "Mov16RT", "!0r,fs:[!1d]" },
-  { kX86Mov16RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0x66,          0,    0xB8, 0, 0, 0, 0, 2, false }, "Mov16RI", "!0r,!1d" },
-  { kX86Mov16MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0x66,          0,    0xC7, 0, 0, 0, 0, 2, false }, "Mov16MI", "[!0r+!1d],!2d" },
-  { kX86Mov16AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0x66,          0,    0xC7, 0, 0, 0, 0, 2, false }, "Mov16AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Mov16TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0x66, 0xC7, 0, 0, 0, 0, 2, false }, "Mov16TI", "fs:[!0d],!1d" },
-
-  { kX86Mov32MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32MR", "[!0r+!1d],!2r" },
-  { kX86Mov32AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Movnti32MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,   { 0,             0, 0x0F, 0xC3, 0, 0, 0, 0, false }, "Movnti32MR", "[!0r+!1d],!2r" },
-  { kX86Movnti32AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,  { 0,             0, 0x0F, 0xC3, 0, 0, 0, 0, false }, "Movnti32AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32TR", "fs:[!0d],!1r" },
-  { kX86Mov32RR, kRegReg,    IS_MOVE  | IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RR", "!0r,!1r" },
-  { kX86Mov32RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RM", "!0r,[!1r+!2d]" },
-  { kX86Mov32RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86Mov32RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RT", "!0r,fs:[!1d]" },
-  { kX86Mov32RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0,             0, 0xB8, 0, 0, 0, 0, 4, false }, "Mov32RI", "!0r,!1d" },
-  { kX86Mov32MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov32MI", "[!0r+!1d],!2d" },
-  { kX86Mov32AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov32TI", "fs:[!0d],!1d" },
-
-  { kX86Lea32RM, kRegMem,               IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea32RM", "!0r,[!1r+!2d]" },
-  { kX86Lea32RA, kRegArray,             IS_QUIN_OP | REG_DEF0_USE12,     { 0,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-
-  { kX86Mov64MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { REX_W,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov64MR", "[!0r+!1d],!2r" },
-  { kX86Mov64AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { REX_W,             0, 0x89, 0, 0, 0, 0, 0, false }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Movnti64MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,   { REX_W,             0, 0x0F, 0xC3, 0, 0, 0, 0, false }, "Movnti64MR", "[!0r+!1d],!2r" },
-  { kX86Movnti64AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,  { REX_W,             0, 0x0F, 0xC3, 0, 0, 0, 0, false }, "Movnti64AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Mov64TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, REX_W, 0x89, 0, 0, 0, 0, 0, false }, "Mov64TR", "fs:[!0d],!1r" },
-  { kX86Mov64RR, kRegReg,    IS_MOVE  | IS_BINARY_OP   | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RR", "!0r,!1r" },
-  { kX86Mov64RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RM", "!0r,[!1r+!2d]" },
-  { kX86Mov64RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86Mov64RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, REX_W, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RT", "!0r,fs:[!1d]" },
-  { kX86Mov64RI32, kRegImm,             IS_BINARY_OP   | REG_DEF0,       { REX_W,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64RI32", "!0r,!1d" },
-  { kX86Mov64RI64, kMovRegQuadImm,      IS_TERTIARY_OP | REG_DEF0,       { REX_W,             0, 0xB8, 0, 0, 0, 0, 8, false }, "Mov64RI64", "!0r,!1q" },
-  { kX86Mov64MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { REX_W,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64MI", "[!0r+!1d],!2d" },
-  { kX86Mov64AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { REX_W,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Mov64TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, REX_W, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64TI", "fs:[!0d],!1d" },
-
-  { kX86Lea64RM, kRegMem,               IS_TERTIARY_OP | REG_DEF0_USE1,  { REX_W,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea64RM", "!0r,[!1r+!2d]" },
-  { kX86Lea64RA, kRegArray,             IS_QUIN_OP | REG_DEF0_USE12,     { REX_W,             0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-
-  { kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, { 0,     0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc32RR", "!2c !0r,!1r" },
-  { kX86Cmov64RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, { REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc64RR", "!2c !0r,!1r" },
-
-  { kX86Cmov32RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, { 0,     0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc32RM", "!3c !0r,[!1r+!2d]" },
-  { kX86Cmov64RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, { REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc64RM", "!3c !0r,[!1r+!2d]" },
-
-#define SHIFT_ENCODING_MAP(opname, modrm_opcode) \
-{ kX86 ## opname ## 8RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1, true }, #opname "8RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 8MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1, true }, #opname "8MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 8AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1, true }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 8RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0,    0, 0xD2, 0, 0, modrm_opcode, 0,    1, true }, #opname "8RC", "!0r,cl" }, \
-{ kX86 ## opname ## 8MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { 0,    0, 0xD2, 0, 0, modrm_opcode, 0,    1, true }, #opname "8MC", "[!0r+!1d],cl" }, \
-{ kX86 ## opname ## 8AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0,    0, 0xD2, 0, 0, modrm_opcode, 0,    1, true }, #opname "8AC", "[!0r+!1r<<!2d+!3d],cl" }, \
-  \
-{ kX86 ## opname ## 16RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "16RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 16MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "16MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 16AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 16RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1, false }, #opname "16RC", "!0r,cl" }, \
-{ kX86 ## opname ## 16MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1, false }, #opname "16MC", "[!0r+!1d],cl" }, \
-{ kX86 ## opname ## 16AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0,    1, false }, #opname "16AC", "[!0r+!1r<<!2d+!3d],cl" }, \
-  \
-{ kX86 ## opname ## 32RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "32RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 32MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "32MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 32AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { 0,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 32RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "32RC", "!0r,cl" }, \
-{ kX86 ## opname ## 32MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "32MC", "[!0r+!1d],cl" }, \
-{ kX86 ## opname ## 32AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "32AC", "[!0r+!1r<<!2d+!3d],cl" }, \
-  \
-{ kX86 ## opname ## 64RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "64RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 64MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "64MI", "[!0r+!1d],!2d" }, \
-{ kX86 ## opname ## 64AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 64RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "64RC", "!0r,cl" }, \
-{ kX86 ## opname ## 64MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "64MC", "[!0r+!1d],cl" }, \
-{ kX86 ## opname ## 64AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0, false }, #opname "64AC", "[!0r+!1r<<!2d+!3d],cl" }
-
-  SHIFT_ENCODING_MAP(Rol, 0x0),
-  SHIFT_ENCODING_MAP(Ror, 0x1),
-  SHIFT_ENCODING_MAP(Rcl, 0x2),
-  SHIFT_ENCODING_MAP(Rcr, 0x3),
-  SHIFT_ENCODING_MAP(Sal, 0x4),
-  SHIFT_ENCODING_MAP(Shr, 0x5),
-  SHIFT_ENCODING_MAP(Sar, 0x7),
-#undef SHIFT_ENCODING_MAP
-
-  { kX86Cmc, kNullary, NO_OPERAND, { 0, 0, 0xF5, 0, 0, 0, 0, 0, false }, "Cmc", "" },
-  { kX86Shld32RRI,  kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES,            { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld32RRI", "!0r,!1r,!2d" },
-  { kX86Shld32RRC,  kShiftRegRegCl,  IS_TERTIARY_OP | REG_DEF0_USE01  | REG_USEC | SETS_CCODES, { 0,    0, 0x0F, 0xA5, 0, 0, 0, 0, false }, "Shld32RRC", "!0r,!1r,cl" },
-  { kX86Shld32MRI,  kMemRegImm,      IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld32MRI", "[!0r+!1d],!2r,!3d" },
-  { kX86Shrd32RRI,  kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES,            { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd32RRI", "!0r,!1r,!2d" },
-  { kX86Shrd32RRC,  kShiftRegRegCl,  IS_TERTIARY_OP | REG_DEF0_USE01  | REG_USEC | SETS_CCODES, { 0,    0, 0x0F, 0xAD, 0, 0, 0, 0, false }, "Shrd32RRC", "!0r,!1r,cl" },
-  { kX86Shrd32MRI,  kMemRegImm,      IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd32MRI", "[!0r+!1d],!2r,!3d" },
-  { kX86Shld64RRI,  kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES,            { REX_W,    0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld64RRI", "!0r,!1r,!2d" },
-  { kX86Shld64MRI,  kMemRegImm,      IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W,    0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld64MRI", "[!0r+!1d],!2r,!3d" },
-  { kX86Shrd64RRI,  kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES,            { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd64RRI", "!0r,!1r,!2d" },
-  { kX86Shrd64MRI,  kMemRegImm,      IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd64MRI", "[!0r+!1d],!2r,!3d" },
-
-  { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,     0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8RI", "!0r,!1d" },
-  { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,     0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8MI", "[!0r+!1d],!2d" },
-  { kX86Test8AI,  kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,     0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test16RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0x66,  0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16RI", "!0r,!1d" },
-  { kX86Test16MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0x66,  0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16MI", "[!0r+!1d],!2d" },
-  { kX86Test16AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0x66,  0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test32RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,     0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32RI", "!0r,!1d" },
-  { kX86Test32MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,     0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32MI", "[!0r+!1d],!2d" },
-  { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,     0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test64RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64RI", "!0r,!1d" },
-  { kX86Test64MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64MI", "[!0r+!1d],!2d" },
-  { kX86Test64AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64AI", "[!0r+!1r<<!2d+!3d],!4d" },
-
-  { kX86Test32RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { 0,     0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RR", "!0r,!1r" },
-  { kX86Test64RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { REX_W, 0, 0x85, 0, 0, 0, 0, 0, false }, "Test64RR", "!0r,!1r" },
-  { kX86Test32RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | REG_USE01 | SETS_CCODES, { 0,     0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RM", "!0r,[!1r+!2d]" },
-
-#define UNARY_ENCODING_MAP(opname, modrm, is_store, sets_ccodes, \
-                           reg, reg_kind, reg_flags, \
-                           mem, mem_kind, mem_flags, \
-                           arr, arr_kind, arr_flags, imm, \
-                           b_flags, hw_flags, w_flags, \
-                           b_format, hw_format, w_format) \
-{ kX86 ## opname ## 8 ## reg,  reg_kind,                      reg_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0, true }, #opname "8" #reg, b_format "!0r" }, \
-{ kX86 ## opname ## 8 ## mem,  mem_kind, IS_LOAD | is_store | mem_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0, true }, #opname "8" #mem, b_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 8 ## arr,  arr_kind, IS_LOAD | is_store | arr_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0, true }, #opname "8" #arr, b_format "[!0r+!1r<<!2d+!3d]" }, \
-{ kX86 ## opname ## 16 ## reg, reg_kind,                      reg_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1, false }, #opname "16" #reg, hw_format "!0r" }, \
-{ kX86 ## opname ## 16 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1, false }, #opname "16" #mem, hw_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 16 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1, false }, #opname "16" #arr, hw_format "[!0r+!1r<<!2d+!3d]" }, \
-{ kX86 ## opname ## 32 ## reg, reg_kind,                      reg_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "32" #reg, w_format "!0r" }, \
-{ kX86 ## opname ## 32 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "32" #mem, w_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "32" #arr, w_format "[!0r+!1r<<!2d+!3d]" }, \
-{ kX86 ## opname ## 64 ## reg, reg_kind,                      reg_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "64" #reg, w_format "!0r" }, \
-{ kX86 ## opname ## 64 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "64" #mem, w_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 64 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "64" #arr, w_format "[!0r+!1r<<!2d+!3d]" }
-
-  UNARY_ENCODING_MAP(Not, 0x2, IS_STORE, 0,           R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
-  UNARY_ENCODING_MAP(Neg, 0x3, IS_STORE, SETS_CCODES, R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
-
-  UNARY_ENCODING_MAP(Mul,     0x4, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEA,  REG_DEFAD_USEA,  "ax,al,", "dx:ax,ax,", "edx:eax,eax,"),
-  UNARY_ENCODING_MAP(Imul,    0x5, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEA,  REG_DEFAD_USEA,  "ax,al,", "dx:ax,ax,", "edx:eax,eax,"),
-  UNARY_ENCODING_MAP(Divmod,  0x6, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
-  UNARY_ENCODING_MAP(Idivmod, 0x7, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
-#undef UNARY_ENCODING_MAP
-
-  { kx86Cdq32Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { 0,     0, 0x99, 0,    0, 0, 0, 0, false }, "Cdq", "" },
-  { kx86Cqo64Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { REX_W, 0, 0x99, 0,    0, 0, 0, 0, false }, "Cqo", "" },
-  { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0,                                 { 0,     0, 0x0F, 0xC8, 0, 0, 0, 0, false }, "Bswap32R", "!0r" },
-  { kX86Bswap64R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0,                                 { REX_W, 0, 0x0F, 0xC8, 0, 0, 0, 0, false }, "Bswap64R", "!0r" },
-  { kX86Push32R,  kRegOpcode, IS_UNARY_OP | REG_USE0 | REG_USE_SP | REG_DEF_SP | IS_STORE, { 0,     0, 0x50, 0,    0, 0, 0, 0, false }, "Push32R",  "!0r" },
-  { kX86Pop32R,   kRegOpcode, IS_UNARY_OP | REG_DEF0 | REG_USE_SP | REG_DEF_SP | IS_LOAD,  { 0,     0, 0x58, 0,    0, 0, 0, 0, false }, "Pop32R",   "!0r" },
-
-#define EXT_0F_ENCODING_MAP(opname, prefix, opcode, reg_def) \
-{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
-{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
-
-// This is a special encoding with r8_form on the second register only
-// for Movzx8 and Movsx8.
-#define EXT_0F_R8_FORM_ENCODING_MAP(opname, prefix, opcode, reg_def) \
-{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, true }, #opname "RR", "!0r,!1r" }, \
-{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
-
-#define EXT_0F_REX_W_ENCODING_MAP(opname, prefix, opcode, reg_def) \
-{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
-{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
-
-#define EXT_0F_ENCODING2_MAP(opname, prefix, opcode, opcode2, reg_def) \
-{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
-{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
-
-  EXT_0F_ENCODING_MAP(Movsd, 0xF2, 0x10, REG_DEF0),
-  { kX86MovsdMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovsdMR", "[!0r+!1d],!2r" },
-  { kX86MovsdAR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovsdAR", "[!0r+!1r<<!2d+!3d],!4r" },
-
-  EXT_0F_ENCODING_MAP(Movss, 0xF3, 0x10, REG_DEF0),
-  { kX86MovssMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovssMR", "[!0r+!1d],!2r" },
-  { kX86MovssAR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovssAR", "[!0r+!1r<<!2d+!3d],!4r" },
-
-  EXT_0F_ENCODING_MAP(Cvtsi2sd,  0xF2, 0x2A, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Cvtsi2ss,  0xF3, 0x2A, REG_DEF0),
-  EXT_0F_REX_W_ENCODING_MAP(Cvtsqi2sd,  0xF2, 0x2A, REG_DEF0),
-  EXT_0F_REX_W_ENCODING_MAP(Cvtsqi2ss,  0xF3, 0x2A, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Cvttsd2si, 0xF2, 0x2C, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Cvttss2si, 0xF3, 0x2C, REG_DEF0),
-  EXT_0F_REX_W_ENCODING_MAP(Cvttsd2sqi, 0xF2, 0x2C, REG_DEF0),
-  EXT_0F_REX_W_ENCODING_MAP(Cvttss2sqi, 0xF3, 0x2C, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Cvtsd2si,  0xF2, 0x2D, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Cvtss2si,  0xF3, 0x2D, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Ucomisd,   0x66, 0x2E, SETS_CCODES|REG_USE0),
-  EXT_0F_ENCODING_MAP(Ucomiss,   0x00, 0x2E, SETS_CCODES|REG_USE0),
-  EXT_0F_ENCODING_MAP(Comisd,    0x66, 0x2F, SETS_CCODES|REG_USE0),
-  EXT_0F_ENCODING_MAP(Comiss,    0x00, 0x2F, SETS_CCODES|REG_USE0),
-  EXT_0F_ENCODING_MAP(Orpd,      0x66, 0x56, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Orps,      0x00, 0x56, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Andpd,     0x66, 0x54, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Andps,     0x00, 0x54, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Xorpd,     0x66, 0x57, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Xorps,     0x00, 0x57, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Addsd,     0xF2, 0x58, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Addss,     0xF3, 0x58, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Mulsd,     0xF2, 0x59, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Mulss,     0xF3, 0x59, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Cvtsd2ss,  0xF2, 0x5A, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Cvtss2sd,  0xF3, 0x5A, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Subsd,     0xF2, 0x5C, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Subss,     0xF3, 0x5C, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Divsd,     0xF2, 0x5E, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Divss,     0xF3, 0x5E, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Punpcklbw, 0x66, 0x60, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Punpcklwd, 0x66, 0x61, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Punpckldq, 0x66, 0x62, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Punpcklqdq, 0x66, 0x6C, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Sqrtsd,    0xF2, 0x51, REG_DEF0_USE0),
-  EXT_0F_ENCODING2_MAP(Pmulld,   0x66, 0x38, 0x40, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Pmullw,    0x66, 0xD5, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Pmuludq,   0x66, 0xF4, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Mulps,     0x00, 0x59, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Mulpd,     0x66, 0x59, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Paddb,     0x66, 0xFC, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Paddw,     0x66, 0xFD, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Paddd,     0x66, 0xFE, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Paddq,     0x66, 0xD4, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Psadbw,    0x66, 0xF6, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Addps,     0x00, 0x58, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Addpd,     0x66, 0x58, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Psubb,     0x66, 0xF8, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Psubw,     0x66, 0xF9, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Psubd,     0x66, 0xFA, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Psubq,     0x66, 0xFB, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Subps,     0x00, 0x5C, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Subpd,     0x66, 0x5C, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Pand,      0x66, 0xDB, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Por,       0x66, 0xEB, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Pxor,      0x66, 0xEF, REG_DEF0_USE0),
-  EXT_0F_ENCODING2_MAP(Phaddw,   0x66, 0x38, 0x01, REG_DEF0_USE0),
-  EXT_0F_ENCODING2_MAP(Phaddd,   0x66, 0x38, 0x02, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Haddpd,    0x66, 0x7C, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Haddps,    0xF2, 0x7C, REG_DEF0_USE0),
-
-  { kX86PextrbRRI, kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0  | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1, false }, "PextbRRI", "!0r,!1r,!2d" },
-  { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0  | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1, false }, "PextwRRI", "!0r,!1r,!2d" },
-  { kX86PextrdRRI, kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0  | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextdRRI", "!0r,!1r,!2d" },
-  { kX86PextrbMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextrbMRI", "[!0r+!1d],!2r,!3d" },
-  { kX86PextrwMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x15, 0, 0, 1, false }, "PextrwMRI", "[!0r+!1d],!2r,!3d" },
-  { kX86PextrdMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextrdMRI", "[!0r+!1d],!2r,!3d" },
-
-  { kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuflwRRI", "!0r,!1r,!2d" },
-  { kX86PshufdRRI,  kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuffRRI", "!0r,!1r,!2d" },
-
-  { kX86ShufpsRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE0 | REG_USE1, { 0x00, 0, 0x0F, 0xC6, 0, 0, 0, 1, false }, "ShufpsRRI", "!0r,!1r,!2d" },
-  { kX86ShufpdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE0 | REG_USE1, { 0x66, 0, 0x0F, 0xC6, 0, 0, 0, 1, false }, "ShufpdRRI", "!0r,!1r,!2d" },
-
-  { kX86PsrawRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 4, 0, 1, false }, "PsrawRI", "!0r,!1d" },
-  { kX86PsradRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 4, 0, 1, false }, "PsradRI", "!0r,!1d" },
-  { kX86PsrlwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 2, 0, 1, false }, "PsrlwRI", "!0r,!1d" },
-  { kX86PsrldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 2, 0, 1, false }, "PsrldRI", "!0r,!1d" },
-  { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1, false }, "PsrlqRI", "!0r,!1d" },
-  { kX86PsrldqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 3, 0, 1, false }, "PsrldqRI", "!0r,!1d" },
-  { kX86PsllwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 6, 0, 1, false }, "PsllwRI", "!0r,!1d" },
-  { kX86PslldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 6, 0, 1, false }, "PslldRI", "!0r,!1d" },
-  { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1, false }, "PsllqRI", "!0r,!1d" },
-
-  { kX86Fild32M,  kMem,     IS_LOAD    | IS_BINARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDB, 0x00, 0, 0, 0, 0, false }, "Fild32M",  "[!0r,!1d]" },
-  { kX86Fild64M,  kMem,     IS_LOAD    | IS_BINARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDF, 0x00, 0, 5, 0, 0, false }, "Fild64M",  "[!0r,!1d]" },
-  { kX86Fld32M,   kMem,     IS_LOAD    | IS_BINARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 0, 0, 0, false }, "Fld32M",   "[!0r,!1d]" },
-  { kX86Fld64M,   kMem,     IS_LOAD    | IS_BINARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 0, 0, 0, false }, "Fld64M",   "[!0r,!1d]" },
-  { kX86Fstp32M,  kMem,     IS_STORE   | IS_BINARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 3, 0, 0, false }, "Fstps32M", "[!0r,!1d]" },
-  { kX86Fstp64M,  kMem,     IS_STORE   | IS_BINARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 3, 0, 0, false }, "Fstpd64M", "[!0r,!1d]" },
-  { kX86Fst32M,   kMem,     IS_STORE   | IS_BINARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 2, 0, 0, false }, "Fsts32M",  "[!0r,!1d]" },
-  { kX86Fst64M,   kMem,     IS_STORE   | IS_BINARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 2, 0, 0, false }, "Fstd64M",  "[!0r,!1d]" },
-  { kX86Fprem,    kNullary, NO_OPERAND | USE_FP_STACK,                          { 0xD9, 0,    0xF8, 0,    0, 0, 0, 0, false }, "Fprem64",  "" },
-  { kX86Fucompp,  kNullary, NO_OPERAND | USE_FP_STACK,                          { 0xDA, 0,    0xE9, 0,    0, 0, 0, 0, false }, "Fucompp",  "" },
-  { kX86Fstsw16R, kNullary, NO_OPERAND | REG_DEFA | USE_FP_STACK,               { 0x9B, 0xDF, 0xE0, 0,    0, 0, 0, 0, false }, "Fstsw16R", "ax" },
-
-  EXT_0F_ENCODING_MAP(Movdqa,    0x66, 0x6F, REG_DEF0),
-  { kX86MovdqaMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0, false }, "MovdqaMR", "[!0r+!1d],!2r" },
-  { kX86MovdqaAR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0, false }, "MovdqaAR", "[!0r+!1r<<!2d+!3d],!4r" },
-
-
-  EXT_0F_ENCODING_MAP(Movups,    0x0, 0x10, REG_DEF0),
-  { kX86MovupsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovupsMR", "[!0r+!1d],!2r" },
-  { kX86MovupsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovupsAR", "[!0r+!1r<<!2d+!3d],!4r" },
-
-  EXT_0F_ENCODING_MAP(Movaps,    0x0, 0x28, REG_DEF0),
-  { kX86MovapsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0, false }, "MovapsMR", "[!0r+!1d],!2r" },
-  { kX86MovapsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0, false }, "MovapsAR", "[!0r+!1r<<!2d+!3d],!4r" },
-
-  { kX86MovlpsRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01,  { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0, false }, "MovlpsRM", "!0r,[!1r+!2d]" },
-  { kX86MovlpsRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0, false }, "MovlpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86MovlpsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,            { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0, false }, "MovlpsMR", "[!0r+!1d],!2r" },
-  { kX86MovlpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014,           { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0, false }, "MovlpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
-
-  { kX86MovhpsRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01,  { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0, false }, "MovhpsRM", "!0r,[!1r+!2d]" },
-  { kX86MovhpsRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0, false }, "MovhpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86MovhpsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,            { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0, false }, "MovhpsMR", "[!0r+!1d],!2r" },
-  { kX86MovhpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014,           { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0, false }, "MovhpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
-
-  EXT_0F_ENCODING_MAP(Movdxr,    0x66, 0x6E, REG_DEF0),
-  EXT_0F_REX_W_ENCODING_MAP(Movqxr, 0x66, 0x6E, REG_DEF0),
-  { kX86MovqrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE1,   { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovqrxRR", "!0r,!1r" },
-  { kX86MovqrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovqrxMR", "[!0r+!1d],!2r" },
-  { kX86MovqrxAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovqrxAR", "[!0r+!1r<<!2d+!3d],!4r" },
-
-  { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE1,   { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovdrxRR", "!0r,!1r" },
-  { kX86MovdrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovdrxMR", "[!0r+!1d],!2r" },
-  { kX86MovdrxAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovdrxAR", "[!0r+!1r<<!2d+!3d],!4r" },
-
-  { kX86MovsxdRR, kRegReg,      IS_BINARY_OP | REG_DEF0 | REG_USE1,              { REX_W, 0, 0x63, 0, 0, 0, 0, 0, false }, "MovsxdRR", "!0r,!1r" },
-  { kX86MovsxdRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE1,  { REX_W, 0, 0x63, 0, 0, 0, 0, 0, false }, "MovsxdRM", "!0r,[!1r+!2d]" },
-  { kX86MovsxdRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE12, { REX_W, 0, 0x63, 0, 0, 0, 0, 0, false }, "MovsxdRA", "!0r,[!1r+!2r<<!3d+!4d]" },
-
-  { kX86Set8R, kRegCond,   IS_BINARY_OP | REG_DEF0   | REG_USE0  | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0, true  }, "Set8R", "!1c !0r" },
-  { kX86Set8M, kMemCond,   IS_STORE | IS_TERTIARY_OP | REG_USE0  | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0, false }, "Set8M", "!2c [!0r+!1d]" },
-  { kX86Set8A, kArrayCond, IS_STORE | IS_QUIN_OP     | REG_USE01 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0, false }, "Set8A", "!4c [!0r+!1r<<!2d+!3d]" },
-
-  // TODO: load/store?
-  // Encode the modrm opcode as an extra opcode byte to avoid computation during assembly.
-  { kX86Lfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 5, 0, 0, false }, "Lfence", "" },
-  { kX86Mfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0, false }, "Mfence", "" },
-  { kX86Sfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 7, 0, 0, false }, "Sfence", "" },
-
-  EXT_0F_ENCODING_MAP(Imul16,  0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
-  EXT_0F_ENCODING_MAP(Imul32,  0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
-  EXT_0F_ENCODING_MAP(Imul64,  REX_W, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
-
-  { kX86CmpxchgRR, kRegRegStore,  IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES,   { 0,    0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Cmpxchg", "!0r,!1r" },
-  { kX86CmpxchgMR, kMemReg,       IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0,    0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Cmpxchg", "[!0r+!1d],!2r" },
-  { kX86CmpxchgAR, kArrayReg,     IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES,    { 0,    0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86LockCmpxchgMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Lock Cmpxchg", "[!0r+!1d],!2r" },
-  { kX86LockCmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES,    { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Lock Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86LockCmpxchg64AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES,    { 0xF0, REX_W, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Lock Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86LockCmpxchg64M, kMem,     IS_STORE | IS_BINARY_OP | REG_USE0 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0, false }, "Lock Cmpxchg8b", "[!0r+!1d]" },
-  { kX86LockCmpxchg64A, kArray,   IS_STORE | IS_QUAD_OP | REG_USE01 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES,  { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0, false }, "Lock Cmpxchg8b", "[!0r+!1r<<!2d+!3d]" },
-  { kX86XchgMR, kMemReg,          IS_STORE | IS_LOAD | IS_TERTIARY_OP | REG_DEF2 | REG_USE02,          { 0, 0, 0x87, 0, 0, 0, 0, 0, false }, "Xchg", "[!0r+!1d],!2r" },
-
-  EXT_0F_R8_FORM_ENCODING_MAP(Movzx8,  0x00, 0xB6, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Movzx16, 0x00, 0xB7, REG_DEF0),
-  EXT_0F_R8_FORM_ENCODING_MAP(Movsx8,  0x00, 0xBE, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Movsx16, 0x00, 0xBF, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Movzx8q,  REX_W, 0xB6, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Movzx16q, REX_W, 0xB7, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Movsx8q,  REX, 0xBE, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Movsx16q, REX_W, 0xBF, REG_DEF0),
-#undef EXT_0F_ENCODING_MAP
-
-  { kX86Jcc8,  kJcc,  IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0,             0, 0x70, 0,    0, 0, 0, 0, false }, "Jcc8",  "!1c !0t" },
-  { kX86Jcc32, kJcc,  IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0,             0, 0x0F, 0x80, 0, 0, 0, 0, false }, "Jcc32", "!1c !0t" },
-  { kX86Jmp8,  kJmp,  IS_UNARY_OP  | IS_BRANCH | NEEDS_FIXUP,               { 0,             0, 0xEB, 0,    0, 0, 0, 0, false }, "Jmp8",  "!0t" },
-  { kX86Jmp32, kJmp,  IS_UNARY_OP  | IS_BRANCH | NEEDS_FIXUP,               { 0,             0, 0xE9, 0,    0, 0, 0, 0, false }, "Jmp32", "!0t" },
-  { kX86JmpR,  kJmp,  IS_UNARY_OP  | IS_BRANCH | REG_USE0,                  { 0,             0, 0xFF, 0,    0, 4, 0, 0, false }, "JmpR",  "!0r" },
-  { kX86Jecxz8, kJmp, NO_OPERAND   | IS_BRANCH | NEEDS_FIXUP | REG_USEC,    { 0,             0, 0xE3, 0,    0, 0, 0, 0, false }, "Jecxz", "!0t" },
-  { kX86JmpT,  kJmp,  IS_UNARY_OP  | IS_BRANCH | IS_LOAD,                   { THREAD_PREFIX, 0, 0xFF, 0,    0, 4, 0, 0, false }, "JmpT",  "fs:[!0d]" },
-  { kX86CallR, kCall, IS_UNARY_OP  | IS_BRANCH | REG_USE0,                  { 0,             0, 0xE8, 0,    0, 0, 0, 0, false }, "CallR", "!0r" },
-  { kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD | REG_USE0,        { 0,             0, 0xFF, 0,    0, 2, 0, 0, false }, "CallM", "[!0r+!1d]" },
-  { kX86CallA, kCall, IS_QUAD_OP   | IS_BRANCH | IS_LOAD | REG_USE01,       { 0,             0, 0xFF, 0,    0, 2, 0, 0, false }, "CallA", "[!0r+!1r<<!2d+!3d]" },
-  { kX86CallT, kCall, IS_UNARY_OP  | IS_BRANCH | IS_LOAD,                   { THREAD_PREFIX, 0, 0xFF, 0,    0, 2, 0, 0, false }, "CallT", "fs:[!0d]" },
-  { kX86CallI, kCall, IS_UNARY_OP  | IS_BRANCH,                             { 0,             0, 0xE8, 0,    0, 0, 0, 4, false }, "CallI", "!0d" },
-  { kX86Ret,   kNullary, NO_OPERAND | IS_BRANCH,                            { 0,             0, 0xC3, 0,    0, 0, 0, 0, false }, "Ret", "" },
-
-  { kX86PcRelLoadRA,   kPcRel,  IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0, false }, "PcRelLoadRA",   "!0r,[!1r+!2r<<!3d+!4p]" },
-  { kX86PcRelAdr,      kPcRel,  IS_LOAD | IS_BINARY_OP | REG_DEF0,     { 0, 0, 0xB8, 0, 0, 0, 0, 4, false }, "PcRelAdr",      "!0r,!1p" },
-  { kX86RepneScasw,    kNullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0, false }, "RepNE ScasW", "" },
-};
-
-std::ostream& operator<<(std::ostream& os, const X86OpCode& rhs) {
-  os << X86Mir2Lir::EncodingMap[rhs].name;
-  return os;
-}
-
-static bool NeedsRex(int32_t raw_reg) {
-  return raw_reg != kRIPReg && RegStorage::RegNum(raw_reg) > 7;
-}
-
-static uint8_t LowRegisterBits(int32_t raw_reg) {
-  uint8_t low_reg = RegStorage::RegNum(raw_reg) & kRegNumMask32;  // 3 bits
-  DCHECK_LT(low_reg, 8);
-  return low_reg;
-}
-
-static bool HasModrm(const X86EncodingMap* entry) {
-  switch (entry->kind) {
-    case kNullary: return false;
-    case kRegOpcode: return false;
-    default: return true;
-  }
-}
-
-static bool HasSib(const X86EncodingMap* entry) {
-  switch (entry->kind) {
-    case kArray: return true;
-    case kArrayReg: return true;
-    case kRegArray: return true;
-    case kArrayImm: return true;
-    case kRegArrayImm: return true;
-    case kShiftArrayImm: return true;
-    case kShiftArrayCl: return true;
-    case kArrayCond: return true;
-    case kCall:
-      switch (entry->opcode) {
-        case kX86CallA: return true;
-        default: return false;
-      }
-    case kPcRel:
-       switch (entry->opcode) {
-         case kX86PcRelLoadRA: return true;
-         default: return false;
-        }
-    default: return false;
-  }
-}
-
-static bool ModrmIsRegReg(const X86EncodingMap* entry) {
-  switch (entry->kind) {
-    // There is no modrm for this kind of instruction, therefore the reg doesn't form part of the
-    // modrm:
-    case kNullary: return true;
-    case kRegOpcode: return true;
-    case kMovRegImm: return true;
-    // Regular modrm value of 3 cases, when there is one register the other register holds an
-    // opcode so the base register is special.
-    case kReg: return true;
-    case kRegReg: return true;
-    case kRegRegStore: return true;
-    case kRegImm: return true;
-    case kRegRegImm: return true;
-    case kRegRegImmStore: return true;
-    case kShiftRegImm: return true;
-    case kShiftRegCl: return true;
-    case kRegCond: return true;
-    case kRegRegCond: return true;
-    case kShiftRegRegCl: return true;
-    case kJmp:
-      switch (entry->opcode) {
-        case kX86JmpR: return true;
-        default: return false;
-      }
-    case kCall:
-      switch (entry->opcode) {
-        case kX86CallR: return true;
-        default: return false;
-      }
-    default: return false;
-  }
-}
-
-static bool IsByteSecondOperand(const X86EncodingMap* entry) {
-  return StartsWith(entry->name, "Movzx8") || StartsWith(entry->name, "Movsx8");
-}
-
-size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index,
-                               int32_t raw_base, int32_t displacement) {
-  bool has_modrm = HasModrm(entry);
-  bool has_sib = HasSib(entry);
-  bool r8_form = entry->skeleton.r8_form;
-  bool modrm_is_reg_reg = ModrmIsRegReg(entry);
-  if (has_sib) {
-    DCHECK(!modrm_is_reg_reg);
-  }
-  size_t size = 0;
-  if (entry->skeleton.prefix1 > 0) {
-    ++size;
-    if (entry->skeleton.prefix2 > 0) {
-      ++size;
-    }
-  }
-  if (cu_->target64 || kIsDebugBuild) {
-    bool registers_need_rex_prefix = NeedsRex(raw_reg) || NeedsRex(raw_index) || NeedsRex(raw_base);
-    if (r8_form) {
-      // Do we need an empty REX prefix to normalize byte registers?
-      registers_need_rex_prefix = registers_need_rex_prefix ||
-          (RegStorage::RegNum(raw_reg) >= 4 && !IsByteSecondOperand(entry));
-      registers_need_rex_prefix = registers_need_rex_prefix ||
-          (modrm_is_reg_reg && (RegStorage::RegNum(raw_base) >= 4));
-    }
-    if (registers_need_rex_prefix) {
-      DCHECK(cu_->target64) << "Attempt to use a 64-bit only addressable register "
-          << RegStorage::RegNum(raw_reg) << " with instruction " << entry->name;
-      if (entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W
-         && entry->skeleton.prefix1 != REX && entry->skeleton.prefix2 != REX) {
-        ++size;  // rex
-      }
-    }
-  }
-  ++size;  // opcode
-  if (entry->skeleton.opcode == 0x0F) {
-    ++size;
-    if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) {
-      ++size;
-    }
-  }
-  if (has_modrm) {
-    ++size;  // modrm
-  }
-  if (!modrm_is_reg_reg) {
-    if (has_sib || (LowRegisterBits(raw_base) == rs_rX86_SP_32.GetRegNum())
-        || (cu_->target64 && entry->skeleton.prefix1 == THREAD_PREFIX)) {
-      // SP requires a SIB byte.
-      // GS access also needs a SIB byte for absolute adressing in 64-bit mode.
-      ++size;
-    }
-    if (displacement != 0 || LowRegisterBits(raw_base) == rs_rBP.GetRegNum()) {
-      // BP requires an explicit displacement, even when it's 0.
-      if (entry->opcode != kX86Lea32RA && entry->opcode != kX86Lea64RA &&
-          entry->opcode != kX86Lea32RM && entry->opcode != kX86Lea64RM) {
-        DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), UINT64_C(0)) << entry->name;
-      }
-      if (raw_base == kRIPReg) {
-        DCHECK(cu_->target64) <<
-          "Attempt to use a 64-bit RIP adressing with instruction " << entry->name;
-        size += 4;
-      } else {
-        size += IS_SIMM8(displacement) ? 1 : 4;
-      }
-    }
-  }
-  size += entry->skeleton.immediate_bytes;
-  return size;
-}
-
-size_t X86Mir2Lir::GetInsnSize(LIR* lir) {
-  DCHECK(!IsPseudoLirOp(lir->opcode));
-  const X86EncodingMap* entry = &X86Mir2Lir::EncodingMap[lir->opcode];
-  DCHECK_EQ(entry->opcode, lir->opcode) << entry->name;
-
-  switch (entry->kind) {
-    case kData:
-      return 4;  // 4 bytes of data.
-    case kNop:
-      return lir->operands[0];  // Length of nop is sole operand.
-    case kNullary:
-      return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0);
-    case kRegOpcode:  // lir operands - 0: reg
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], 0);
-    case kReg:  // lir operands - 0: reg
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], 0);
-    case kMem:  // lir operands - 0: base, 1: disp
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
-    case kArray:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
-      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]);
-    case kMemReg:  // lir operands - 0: base, 1: disp, 2: reg
-      return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0], lir->operands[1]);
-    case kMemRegImm:  // lir operands - 0: base, 1: disp, 2: reg 3: immediate
-      return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0], lir->operands[1]);
-    case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
-      return ComputeSize(entry, lir->operands[4], lir->operands[1], lir->operands[0],
-                         lir->operands[3]);
-    case kThreadReg:  // lir operands - 0: disp, 1: reg
-      // Thread displacement size is always 32bit.
-      return ComputeSize(entry, lir->operands[1], NO_REG, NO_REG, 0x12345678);
-    case kRegReg:  // lir operands - 0: reg1, 1: reg2
-      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], 0);
-    case kRegRegStore:  // lir operands - 0: reg2, 1: reg1
-      return ComputeSize(entry, lir->operands[1], NO_REG, lir->operands[0], 0);
-    case kRegMem:  // lir operands - 0: reg, 1: base, 2: disp
-      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], lir->operands[2]);
-    case kRegArray:   // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
-      return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1],
-                         lir->operands[4]);
-    case kRegThread:  // lir operands - 0: reg, 1: disp
-      // Thread displacement size is always 32bit.
-      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0x12345678);
-    case kRegImm: {  // lir operands - 0: reg, 1: immediate
-      size_t size = ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0);
-      // AX opcodes don't require the modrm byte.
-      if (entry->skeleton.ax_opcode == 0) {
-        return size;
-      } else {
-        return size - (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0);
-      }
-    }
-    case kMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
-    case kArrayImm:  // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
-      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]);
-    case kThreadImm:  // lir operands - 0: disp, 1: imm
-      // Thread displacement size is always 32bit.
-      return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0x12345678);
-    case kRegRegImm:  // lir operands - 0: reg1, 1: reg2, 2: imm
-      // Note: RegRegImm form passes reg2 as index but encodes it using base.
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, 0);
-    case kRegRegImmStore:  // lir operands - 0: reg2, 1: reg1, 2: imm
-      // Note: RegRegImmStore form passes reg1 as index but encodes it using base.
-      return ComputeSize(entry, lir->operands[1], lir->operands[0], NO_REG, 0);
-    case kRegMemImm:  // lir operands - 0: reg, 1: base, 2: disp, 3: imm
-      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], lir->operands[2]);
-    case kRegArrayImm:  // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp, 5: imm
-      return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1],
-                         lir->operands[4]);
-    case kMovRegImm:  // lir operands - 0: reg, 1: immediate
-    case kMovRegQuadImm:
-      return ((entry->skeleton.prefix1 != 0 || NeedsRex(lir->operands[0])) ? 1 : 0) + 1 +
-          entry->skeleton.immediate_bytes;
-    case kShiftRegImm:  // lir operands - 0: reg, 1: immediate
-      // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0) -
-          (lir->operands[1] == 1 ? 1 : 0);
-    case kShiftMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
-      // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]) -
-          (lir->operands[2] == 1 ? 1 : 0);
-    case kShiftArrayImm:  // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
-      // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]) -
-          (lir->operands[4] == 1 ? 1 : 0);
-    case kShiftRegCl:  // lir operands - 0: reg, 1: cl
-      DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[1]));
-      // Note: ShiftRegCl form passes reg as reg but encodes it using base.
-      return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0);
-    case kShiftMemCl:  // lir operands - 0: base, 1: disp, 2: cl
-      DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[2]));
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
-    case kShiftArrayCl:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cl
-      DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[4]));
-      return ComputeSize(entry, lir->operands[4], lir->operands[1], lir->operands[0],
-                         lir->operands[3]);
-    case kShiftRegRegCl:  // lir operands - 0: reg1, 1: reg2, 2: cl
-      DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[2]));
-      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], 0);
-    case kRegCond:  // lir operands - 0: reg, 1: cond
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], 0);
-    case kMemCond:  // lir operands - 0: base, 1: disp, 2: cond
-      return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
-    case kArrayCond:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cond
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]);
-    case kRegRegCond:  // lir operands - 0: reg1, 1: reg2, 2: cond
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], 0);
-    case kRegMemCond:  // lir operands - 0: reg, 1: base, 2: disp, 3:cond
-      DCHECK_EQ(false, entry->skeleton.r8_form);
-      return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], lir->operands[2]);
-    case kJcc:
-      if (lir->opcode == kX86Jcc8) {
-        return 2;  // opcode + rel8
-      } else {
-        DCHECK(lir->opcode == kX86Jcc32);
-        return 6;  // 2 byte opcode + rel32
-      }
-    case kJmp:
-      if (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jecxz8) {
-        return 2;  // opcode + rel8
-      } else if (lir->opcode == kX86Jmp32) {
-        return 5;  // opcode + rel32
-      } else if (lir->opcode == kX86JmpT) {
-        // Thread displacement size is always 32bit.
-        return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0x12345678);
-      } else {
-        DCHECK(lir->opcode == kX86JmpR);
-        if (NeedsRex(lir->operands[0])) {
-          return 3;  // REX.B + opcode + modrm
-        } else {
-          return 2;  // opcode + modrm
-        }
-      }
-    case kCall:
-      switch (lir->opcode) {
-        case kX86CallI: return 5;  // opcode 0:disp
-        case kX86CallR: return 2;  // opcode modrm
-        case kX86CallM:  // lir operands - 0: base, 1: disp
-          return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]);
-        case kX86CallA:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
-          return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]);
-        case kX86CallT:  // lir operands - 0: disp
-          // Thread displacement size is always 32bit.
-          return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0x12345678);
-        default:
-          break;
-      }
-      break;
-    case kPcRel:
-      if (entry->opcode == kX86PcRelLoadRA) {
-        // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table
-        // Force the displacement size to 32bit, it will hold a computed offset later.
-        return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1],
-                           0x12345678);
-      } else {
-        DCHECK_EQ(entry->opcode, kX86PcRelAdr);
-        return 5;  // opcode with reg + 4 byte immediate
-      }
-    case kUnimplemented:
-      break;
-  }
-  UNIMPLEMENTED(FATAL) << "Unimplemented size encoding for: " << entry->name;
-  return 0;
-}
-
-static uint8_t ModrmForDisp(int base, int disp) {
-  // BP requires an explicit disp, so do not omit it in the 0 case
-  if (disp == 0 && RegStorage::RegNum(base) != rs_rBP.GetRegNum()) {
-    return 0;
-  } else if (IS_SIMM8(disp)) {
-    return 1;
-  } else {
-    return 2;
-  }
-}
-
-void X86Mir2Lir::CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw_reg) {
-  if (kIsDebugBuild) {
-    // Sanity check r8_form is correctly specified.
-    if (entry->skeleton.r8_form) {
-      CHECK(strchr(entry->name, '8') != nullptr) << entry->name;
-    } else {
-      if (entry->skeleton.immediate_bytes != 1) {  // Ignore ...I8 instructions.
-        if (!StartsWith(entry->name, "Movzx8") && !StartsWith(entry->name, "Movsx8")
-           && !StartsWith(entry->name, "Movzx8q") && !StartsWith(entry->name, "Movsx8q")) {
-          CHECK(strchr(entry->name, '8') == nullptr) << entry->name;
-        }
-      }
-    }
-    if (RegStorage::RegNum(raw_reg) >= 4) {
-      // ah, bh, ch and dh are not valid registers in 32-bit.
-      CHECK(cu_->target64 || !entry->skeleton.r8_form)
-               << "Invalid register " << static_cast<int>(RegStorage::RegNum(raw_reg))
-               << " for instruction " << entry->name << " in "
-               << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-    }
-  }
-}
-
-void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry,
-                            int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b) {
-  // REX.WRXB
-  // W - 64-bit operand
-  // R - MODRM.reg
-  // X - SIB.index
-  // B - MODRM.rm/SIB.base
-  bool w = (entry->skeleton.prefix1 == REX_W) || (entry->skeleton.prefix2 == REX_W);
-  bool r = NeedsRex(raw_reg_r);
-  bool x = NeedsRex(raw_reg_x);
-  bool b = NeedsRex(raw_reg_b);
-  bool r8_form = entry->skeleton.r8_form;
-  bool modrm_is_reg_reg = ModrmIsRegReg(entry);
-
-  uint8_t rex = 0;
-  if (r8_form) {
-    // Do we need an empty REX prefix to normalize byte register addressing?
-    if (RegStorage::RegNum(raw_reg_r) >= 4 && !IsByteSecondOperand(entry)) {
-      rex |= REX;  // REX.0000
-    } else if (modrm_is_reg_reg && RegStorage::RegNum(raw_reg_b) >= 4) {
-      rex |= REX;  // REX.0000
-    }
-  }
-  if (w) {
-    rex |= REX_W;  // REX.W000
-  }
-  if (r) {
-    rex |= REX_R;  // REX.0R00
-  }
-  if (x) {
-    rex |= REX_X;  // REX.00X0
-  }
-  if (b) {
-    rex |= REX_B;  // REX.000B
-  }
-  if (entry->skeleton.prefix1 != 0) {
-    if (cu_->target64 && entry->skeleton.prefix1 == THREAD_PREFIX) {
-      // 64 bit addresses by GS, not FS.
-      code_buffer_.push_back(THREAD_PREFIX_GS);
-    } else {
-      if (entry->skeleton.prefix1 == REX_W || entry->skeleton.prefix1 == REX) {
-        DCHECK(cu_->target64);
-        rex |= entry->skeleton.prefix1;
-        code_buffer_.push_back(rex);
-        rex = 0;
-      } else {
-        code_buffer_.push_back(entry->skeleton.prefix1);
-      }
-    }
-    if (entry->skeleton.prefix2 != 0) {
-      if (entry->skeleton.prefix2 == REX_W || entry->skeleton.prefix1 == REX) {
-        DCHECK(cu_->target64);
-        rex |= entry->skeleton.prefix2;
-        code_buffer_.push_back(rex);
-        rex = 0;
-      } else {
-        code_buffer_.push_back(entry->skeleton.prefix2);
-      }
-    }
-  } else {
-    DCHECK_EQ(0, entry->skeleton.prefix2);
-  }
-  if (rex != 0) {
-    DCHECK(cu_->target64);
-    code_buffer_.push_back(rex);
-  }
-}
-
-void X86Mir2Lir::EmitOpcode(const X86EncodingMap* entry) {
-  code_buffer_.push_back(entry->skeleton.opcode);
-  if (entry->skeleton.opcode == 0x0F) {
-    code_buffer_.push_back(entry->skeleton.extra_opcode1);
-    if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) {
-      code_buffer_.push_back(entry->skeleton.extra_opcode2);
-    } else {
-      DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-    }
-  } else {
-    DCHECK_EQ(0, entry->skeleton.extra_opcode1);
-    DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  }
-}
-
-void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry,
-                                     int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b) {
-  EmitPrefix(entry, raw_reg_r, raw_reg_x, raw_reg_b);
-  EmitOpcode(entry);
-}
-
-void X86Mir2Lir::EmitDisp(uint8_t base, int32_t disp) {
-  // BP requires an explicit disp, so do not omit it in the 0 case
-  if (disp == 0 && RegStorage::RegNum(base) != rs_rBP.GetRegNum()) {
-    return;
-  } else if (IS_SIMM8(disp)) {
-    code_buffer_.push_back(disp & 0xFF);
-  } else {
-    code_buffer_.push_back(disp & 0xFF);
-    code_buffer_.push_back((disp >> 8) & 0xFF);
-    code_buffer_.push_back((disp >> 16) & 0xFF);
-    code_buffer_.push_back((disp >> 24) & 0xFF);
-  }
-}
-
-void X86Mir2Lir::EmitModrmThread(uint8_t reg_or_opcode) {
-  if (cu_->target64) {
-    // Absolute adressing for GS access.
-    uint8_t modrm = (0 << 6) | (reg_or_opcode << 3) | rs_rX86_SP_32.GetRegNum();
-    code_buffer_.push_back(modrm);
-    uint8_t sib = (0/*TIMES_1*/ << 6) | (rs_rX86_SP_32.GetRegNum() << 3) | rs_rBP.GetRegNum();
-    code_buffer_.push_back(sib);
-  } else {
-    uint8_t modrm = (0 << 6) | (reg_or_opcode << 3) | rs_rBP.GetRegNum();
-    code_buffer_.push_back(modrm);
-  }
-}
-
-void X86Mir2Lir::EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int32_t disp) {
-  DCHECK_LT(reg_or_opcode, 8);
-  if (base == kRIPReg) {
-    // x86_64 RIP handling: always 32 bit displacement.
-    uint8_t modrm = (0x0 << 6) | (reg_or_opcode << 3) | 0x5;
-    code_buffer_.push_back(modrm);
-    code_buffer_.push_back(disp & 0xFF);
-    code_buffer_.push_back((disp >> 8) & 0xFF);
-    code_buffer_.push_back((disp >> 16) & 0xFF);
-    code_buffer_.push_back((disp >> 24) & 0xFF);
-  } else {
-    DCHECK_LT(base, 8);
-    uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (reg_or_opcode << 3) | base;
-    code_buffer_.push_back(modrm);
-    if (base == rs_rX86_SP_32.GetRegNum()) {
-      // Special SIB for SP base
-      code_buffer_.push_back(0 << 6 | rs_rX86_SP_32.GetRegNum() << 3 | rs_rX86_SP_32.GetRegNum());
-    }
-    EmitDisp(base, disp);
-  }
-}
-
-void X86Mir2Lir::EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index,
-                                  int scale, int32_t disp) {
-  DCHECK_LT(RegStorage::RegNum(reg_or_opcode), 8);
-  uint8_t modrm = (ModrmForDisp(base, disp) << 6) | RegStorage::RegNum(reg_or_opcode) << 3 |
-      rs_rX86_SP_32.GetRegNum();
-  code_buffer_.push_back(modrm);
-  DCHECK_LT(scale, 4);
-  DCHECK_LT(RegStorage::RegNum(index), 8);
-  DCHECK_LT(RegStorage::RegNum(base), 8);
-  uint8_t sib = (scale << 6) | (RegStorage::RegNum(index) << 3) | RegStorage::RegNum(base);
-  code_buffer_.push_back(sib);
-  EmitDisp(base, disp);
-}
-
-void X86Mir2Lir::EmitImm(const X86EncodingMap* entry, int64_t imm) {
-  switch (entry->skeleton.immediate_bytes) {
-    case 1:
-      DCHECK(IS_SIMM8(imm));
-      code_buffer_.push_back(imm & 0xFF);
-      break;
-    case 2:
-      DCHECK(IS_SIMM16(imm));
-      code_buffer_.push_back(imm & 0xFF);
-      code_buffer_.push_back((imm >> 8) & 0xFF);
-      break;
-    case 4:
-      DCHECK(IS_SIMM32(imm));
-      code_buffer_.push_back(imm & 0xFF);
-      code_buffer_.push_back((imm >> 8) & 0xFF);
-      code_buffer_.push_back((imm >> 16) & 0xFF);
-      code_buffer_.push_back((imm >> 24) & 0xFF);
-      break;
-    case 8:
-      code_buffer_.push_back(imm & 0xFF);
-      code_buffer_.push_back((imm >> 8) & 0xFF);
-      code_buffer_.push_back((imm >> 16) & 0xFF);
-      code_buffer_.push_back((imm >> 24) & 0xFF);
-      code_buffer_.push_back((imm >> 32) & 0xFF);
-      code_buffer_.push_back((imm >> 40) & 0xFF);
-      code_buffer_.push_back((imm >> 48) & 0xFF);
-      code_buffer_.push_back((imm >> 56) & 0xFF);
-      break;
-    default:
-      LOG(FATAL) << "Unexpected immediate bytes (" << entry->skeleton.immediate_bytes
-                 << ") for instruction: " << entry->name;
-      break;
-  }
-}
-
-void X86Mir2Lir::EmitNullary(const X86EncodingMap* entry) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
-  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-}
-
-void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, int32_t raw_reg) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg);
-  // There's no 3-byte instruction with +rd
-  DCHECK(entry->skeleton.opcode != 0x0F ||
-         (entry->skeleton.extra_opcode1 != 0x38 && entry->skeleton.extra_opcode1 != 0x3A));
-  DCHECK(!RegStorage::IsFloat(raw_reg));
-  uint8_t low_reg = LowRegisterBits(raw_reg);
-  code_buffer_.back() += low_reg;
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-}
-
-void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, int32_t raw_reg) {
-  CheckValidByteRegister(entry, raw_reg);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg);
-  uint8_t low_reg = LowRegisterBits(raw_reg);
-  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
-  code_buffer_.push_back(modrm);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-}
-
-void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_base);
-  code_buffer_.push_back(entry->skeleton.opcode);
-  DCHECK_NE(0x0F, entry->skeleton.opcode);
-  DCHECK_EQ(0, entry->skeleton.extra_opcode1);
-  DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  uint8_t low_base = LowRegisterBits(raw_base);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-}
-
-void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index,
-                             int scale, int32_t disp) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base);
-  uint8_t low_index = LowRegisterBits(raw_index);
-  uint8_t low_base = LowRegisterBits(raw_base);
-  EmitModrmSibDisp(entry->skeleton.modrm_opcode, low_base, low_index, scale, disp);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-}
-
-void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
-                            int32_t raw_reg) {
-  CheckValidByteRegister(entry, raw_reg);
-  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base);
-  uint8_t low_reg = LowRegisterBits(raw_reg);
-  uint8_t low_base = (raw_base == kRIPReg) ? raw_base : LowRegisterBits(raw_base);
-  EmitModrmDisp(low_reg, low_base, disp);
-  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-}
-
-void X86Mir2Lir::EmitRegMem(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base,
-                            int32_t disp) {
-  // Opcode will flip operands.
-  EmitMemReg(entry, raw_base, disp, raw_reg);
-}
-
-void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base,
-                              int32_t raw_index, int scale, int32_t disp) {
-  CheckValidByteRegister(entry, raw_reg);
-  EmitPrefixAndOpcode(entry, raw_reg, raw_index, raw_base);
-  uint8_t low_reg = LowRegisterBits(raw_reg);
-  uint8_t low_index = LowRegisterBits(raw_index);
-  uint8_t low_base = LowRegisterBits(raw_base);
-  EmitModrmSibDisp(low_reg, low_base, low_index, scale, disp);
-  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-}
-
-void X86Mir2Lir::EmitArrayReg(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index,
-                              int scale, int32_t disp, int32_t raw_reg) {
-  // Opcode will flip operands.
-  EmitRegArray(entry, raw_reg, raw_base, raw_index, scale, disp);
-}
-
-void X86Mir2Lir::EmitMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
-                            int32_t imm) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base);
-  uint8_t low_base = LowRegisterBits(raw_base);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  EmitImm(entry, imm);
-}
-
-void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry,
-                              int32_t raw_base, int32_t raw_index, int scale, int32_t disp,
-                              int32_t imm) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base);
-  uint8_t low_index = LowRegisterBits(raw_index);
-  uint8_t low_base = LowRegisterBits(raw_base);
-  EmitModrmSibDisp(entry->skeleton.modrm_opcode, low_base, low_index, scale, disp);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  EmitImm(entry, imm);
-}
-
-void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, int32_t raw_reg, int32_t disp) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  DCHECK_NE(entry->skeleton.prefix1, 0);
-  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, NO_REG);
-  uint8_t low_reg = LowRegisterBits(raw_reg);
-  EmitModrmThread(low_reg);
-  code_buffer_.push_back(disp & 0xFF);
-  code_buffer_.push_back((disp >> 8) & 0xFF);
-  code_buffer_.push_back((disp >> 16) & 0xFF);
-  code_buffer_.push_back((disp >> 24) & 0xFF);
-  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-}
-
-void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2) {
-  if (!IsByteSecondOperand(entry)) {
-    CheckValidByteRegister(entry, raw_reg1);
-  }
-  CheckValidByteRegister(entry, raw_reg2);
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2);
-  uint8_t low_reg1 = LowRegisterBits(raw_reg1);
-  uint8_t low_reg2 = LowRegisterBits(raw_reg2);
-  uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2;
-  code_buffer_.push_back(modrm);
-  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-}
-
-void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2,
-                               int32_t imm) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2);
-  uint8_t low_reg1 = LowRegisterBits(raw_reg1);
-  uint8_t low_reg2 = LowRegisterBits(raw_reg2);
-  uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2;
-  code_buffer_.push_back(modrm);
-  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  EmitImm(entry, imm);
-}
-
-void X86Mir2Lir::EmitRegMemImm(const X86EncodingMap* entry,
-                               int32_t raw_reg, int32_t raw_base, int disp, int32_t imm) {
-  DCHECK(!RegStorage::IsFloat(raw_reg));
-  CheckValidByteRegister(entry, raw_reg);
-  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base);
-  uint8_t low_reg = LowRegisterBits(raw_reg);
-  uint8_t low_base = LowRegisterBits(raw_base);
-  EmitModrmDisp(low_reg, low_base, disp);
-  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  EmitImm(entry, imm);
-}
-
-void X86Mir2Lir::EmitMemRegImm(const X86EncodingMap* entry,
-                               int32_t raw_base, int32_t disp, int32_t raw_reg, int32_t imm) {
-  // Opcode will flip operands.
-  EmitRegMemImm(entry, raw_reg, raw_base, disp, imm);
-}
-
-void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm) {
-  CheckValidByteRegister(entry, raw_reg);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
-  if (RegStorage::RegNum(raw_reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) {
-    code_buffer_.push_back(entry->skeleton.ax_opcode);
-  } else {
-    uint8_t low_reg = LowRegisterBits(raw_reg);
-    EmitOpcode(entry);
-    uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
-    code_buffer_.push_back(modrm);
-  }
-  EmitImm(entry, imm);
-}
-
-void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int32_t disp, int32_t imm) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
-  EmitModrmThread(entry->skeleton.modrm_opcode);
-  code_buffer_.push_back(disp & 0xFF);
-  code_buffer_.push_back((disp >> 8) & 0xFF);
-  code_buffer_.push_back((disp >> 16) & 0xFF);
-  code_buffer_.push_back((disp >> 24) & 0xFF);
-  EmitImm(entry, imm);
-  DCHECK_EQ(entry->skeleton.ax_opcode, 0);
-}
-
-void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, int32_t raw_reg, int64_t imm) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
-  uint8_t low_reg = LowRegisterBits(raw_reg);
-  code_buffer_.push_back(0xB8 + low_reg);
-  switch (entry->skeleton.immediate_bytes) {
-    case 4:
-      code_buffer_.push_back(imm & 0xFF);
-      code_buffer_.push_back((imm >> 8) & 0xFF);
-      code_buffer_.push_back((imm >> 16) & 0xFF);
-      code_buffer_.push_back((imm >> 24) & 0xFF);
-      break;
-    case 8:
-      code_buffer_.push_back(imm & 0xFF);
-      code_buffer_.push_back((imm >> 8) & 0xFF);
-      code_buffer_.push_back((imm >> 16) & 0xFF);
-      code_buffer_.push_back((imm >> 24) & 0xFF);
-      code_buffer_.push_back((imm >> 32) & 0xFF);
-      code_buffer_.push_back((imm >> 40) & 0xFF);
-      code_buffer_.push_back((imm >> 48) & 0xFF);
-      code_buffer_.push_back((imm >> 56) & 0xFF);
-      break;
-    default:
-      LOG(FATAL) << "Unsupported immediate size for EmitMovRegImm: "
-                 << static_cast<uint32_t>(entry->skeleton.immediate_bytes);
-  }
-}
-
-void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm) {
-  CheckValidByteRegister(entry, raw_reg);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
-  if (imm != 1) {
-    code_buffer_.push_back(entry->skeleton.opcode);
-  } else {
-    // Shorter encoding for 1 bit shift
-    code_buffer_.push_back(entry->skeleton.ax_opcode);
-  }
-  DCHECK_NE(0x0F, entry->skeleton.opcode);
-  DCHECK_EQ(0, entry->skeleton.extra_opcode1);
-  DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  uint8_t low_reg = LowRegisterBits(raw_reg);
-  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
-  code_buffer_.push_back(modrm);
-  if (imm != 1) {
-    DCHECK_EQ(entry->skeleton.immediate_bytes, 1);
-    DCHECK(IS_SIMM8(imm));
-    code_buffer_.push_back(imm & 0xFF);
-  }
-}
-
-void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_cl) {
-  CheckValidByteRegister(entry, raw_reg);
-  DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl));
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
-  code_buffer_.push_back(entry->skeleton.opcode);
-  DCHECK_NE(0x0F, entry->skeleton.opcode);
-  DCHECK_EQ(0, entry->skeleton.extra_opcode1);
-  DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  uint8_t low_reg = LowRegisterBits(raw_reg);
-  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
-  code_buffer_.push_back(modrm);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-}
-
-void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, int32_t raw_base,
-                                int32_t displacement, int32_t raw_cl) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl));
-  EmitPrefix(entry, NO_REG, NO_REG, raw_base);
-  code_buffer_.push_back(entry->skeleton.opcode);
-  DCHECK_NE(0x0F, entry->skeleton.opcode);
-  DCHECK_EQ(0, entry->skeleton.extra_opcode1);
-  DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  uint8_t low_base = LowRegisterBits(raw_base);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, displacement);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-}
-
-void X86Mir2Lir::EmitShiftRegRegCl(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2, int32_t raw_cl) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl));
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2);
-  uint8_t low_reg1 = LowRegisterBits(raw_reg1);
-  uint8_t low_reg2 = LowRegisterBits(raw_reg2);
-  uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2;
-  code_buffer_.push_back(modrm);
-  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-}
-
-void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
-                                 int32_t imm) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_base);
-  if (imm != 1) {
-    code_buffer_.push_back(entry->skeleton.opcode);
-  } else {
-    // Shorter encoding for 1 bit shift
-    code_buffer_.push_back(entry->skeleton.ax_opcode);
-  }
-  DCHECK_NE(0x0F, entry->skeleton.opcode);
-  DCHECK_EQ(0, entry->skeleton.extra_opcode1);
-  DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  uint8_t low_base = LowRegisterBits(raw_base);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
-  if (imm != 1) {
-    DCHECK_EQ(entry->skeleton.immediate_bytes, 1);
-    DCHECK(IS_SIMM8(imm));
-    code_buffer_.push_back(imm & 0xFF);
-  }
-}
-
-void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, int32_t raw_reg, int32_t cc) {
-  CheckValidByteRegister(entry, raw_reg);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  DCHECK_EQ(0x0F, entry->skeleton.opcode);
-  code_buffer_.push_back(0x0F);
-  DCHECK_EQ(0x90, entry->skeleton.extra_opcode1);
-  DCHECK_GE(cc, 0);
-  DCHECK_LT(cc, 16);
-  code_buffer_.push_back(0x90 | cc);
-  DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  uint8_t low_reg = LowRegisterBits(raw_reg);
-  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
-  code_buffer_.push_back(modrm);
-  DCHECK_EQ(entry->skeleton.immediate_bytes, 0);
-}
-
-void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
-                             int32_t cc) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  if (entry->skeleton.prefix1 != 0) {
-    code_buffer_.push_back(entry->skeleton.prefix1);
-    if (entry->skeleton.prefix2 != 0) {
-      code_buffer_.push_back(entry->skeleton.prefix2);
-    }
-  } else {
-    DCHECK_EQ(0, entry->skeleton.prefix2);
-  }
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  DCHECK_EQ(0x0F, entry->skeleton.opcode);
-  code_buffer_.push_back(0x0F);
-  DCHECK_EQ(0x90, entry->skeleton.extra_opcode1);
-  DCHECK_GE(cc, 0);
-  DCHECK_LT(cc, 16);
-  code_buffer_.push_back(0x90 | cc);
-  DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  uint8_t low_base = LowRegisterBits(raw_base);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
-  DCHECK_EQ(entry->skeleton.immediate_bytes, 0);
-}
-
-void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2,
-                                int32_t cc) {
-  // Generate prefix and opcode without the condition.
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2);
-
-  // Now add the condition. The last byte of opcode is the one that receives it.
-  DCHECK_GE(cc, 0);
-  DCHECK_LT(cc, 16);
-  code_buffer_.back() += cc;
-
-  // Not expecting to have to encode immediate or do anything special for ModR/M since there are
-  // two registers.
-  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
-
-  // For register to register encoding, the mod is 3.
-  const uint8_t mod = (3 << 6);
-
-  // Encode the ModR/M byte now.
-  uint8_t low_reg1 = LowRegisterBits(raw_reg1);
-  uint8_t low_reg2 = LowRegisterBits(raw_reg2);
-  const uint8_t modrm = mod | (low_reg1 << 3) | low_reg2;
-  code_buffer_.push_back(modrm);
-}
-
-void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_base,
-                                int32_t disp, int32_t cc) {
-  // Generate prefix and opcode without the condition.
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_base);
-
-  // Now add the condition. The last byte of opcode is the one that receives it.
-  DCHECK_GE(cc, 0);
-  DCHECK_LT(cc, 16);
-  code_buffer_.back() += cc;
-
-  // Not expecting to have to encode immediate or do anything special for ModR/M since there are
-  // two registers.
-  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
-
-  uint8_t low_reg1 = LowRegisterBits(raw_reg1);
-  uint8_t low_base = LowRegisterBits(raw_base);
-  EmitModrmDisp(low_reg1, low_base, disp);
-}
-
-void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int32_t rel) {
-  if (entry->opcode == kX86Jmp8) {
-    DCHECK(IS_SIMM8(rel));
-    code_buffer_.push_back(0xEB);
-    code_buffer_.push_back(rel & 0xFF);
-  } else if (entry->opcode == kX86Jmp32) {
-    code_buffer_.push_back(0xE9);
-    code_buffer_.push_back(rel & 0xFF);
-    code_buffer_.push_back((rel >> 8) & 0xFF);
-    code_buffer_.push_back((rel >> 16) & 0xFF);
-    code_buffer_.push_back((rel >> 24) & 0xFF);
-  } else if (entry->opcode == kX86Jecxz8) {
-    DCHECK(IS_SIMM8(rel));
-    code_buffer_.push_back(0xE3);
-    code_buffer_.push_back(rel & 0xFF);
-  } else {
-    DCHECK(entry->opcode == kX86JmpR);
-    DCHECK_EQ(false, entry->skeleton.r8_form);
-    EmitPrefix(entry, NO_REG, NO_REG, rel);
-    code_buffer_.push_back(entry->skeleton.opcode);
-    uint8_t low_reg = LowRegisterBits(rel);
-    uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
-    code_buffer_.push_back(modrm);
-  }
-}
-
-void X86Mir2Lir::EmitJcc(const X86EncodingMap* entry, int32_t rel, int32_t cc) {
-  DCHECK_GE(cc, 0);
-  DCHECK_LT(cc, 16);
-  if (entry->opcode == kX86Jcc8) {
-    DCHECK(IS_SIMM8(rel));
-    code_buffer_.push_back(0x70 | cc);
-    code_buffer_.push_back(rel & 0xFF);
-  } else {
-    DCHECK(entry->opcode == kX86Jcc32);
-    code_buffer_.push_back(0x0F);
-    code_buffer_.push_back(0x80 | cc);
-    code_buffer_.push_back(rel & 0xFF);
-    code_buffer_.push_back((rel >> 8) & 0xFF);
-    code_buffer_.push_back((rel >> 16) & 0xFF);
-    code_buffer_.push_back((rel >> 24) & 0xFF);
-  }
-}
-
-void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base);
-  uint8_t low_base = LowRegisterBits(raw_base);
-  EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-}
-
-void X86Mir2Lir::EmitCallImmediate(const X86EncodingMap* entry, int32_t disp) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
-  DCHECK_EQ(4, entry->skeleton.immediate_bytes);
-  code_buffer_.push_back(disp & 0xFF);
-  code_buffer_.push_back((disp >> 8) & 0xFF);
-  code_buffer_.push_back((disp >> 16) & 0xFF);
-  code_buffer_.push_back((disp >> 24) & 0xFF);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-}
-
-void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int32_t disp) {
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  DCHECK_NE(entry->skeleton.prefix1, 0);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
-  EmitModrmThread(entry->skeleton.modrm_opcode);
-  code_buffer_.push_back(disp & 0xFF);
-  code_buffer_.push_back((disp >> 8) & 0xFF);
-  code_buffer_.push_back((disp >> 16) & 0xFF);
-  code_buffer_.push_back((disp >> 24) & 0xFF);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-}
-
-void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base_or_table,
-                           int32_t raw_index, int scale, int32_t table_or_disp) {
-  int disp;
-  if (entry->opcode == kX86PcRelLoadRA) {
-    const SwitchTable* tab_rec = UnwrapPointer<SwitchTable>(table_or_disp);
-    disp = tab_rec->offset - tab_rec->anchor->offset;
-  } else {
-    DCHECK(entry->opcode == kX86PcRelAdr);
-    const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(raw_base_or_table);
-    disp = tab_rec->offset;
-  }
-  if (entry->opcode == kX86PcRelLoadRA) {
-    DCHECK_EQ(false, entry->skeleton.r8_form);
-    EmitPrefix(entry, raw_reg, raw_index, raw_base_or_table);
-    code_buffer_.push_back(entry->skeleton.opcode);
-    DCHECK_NE(0x0F, entry->skeleton.opcode);
-    DCHECK_EQ(0, entry->skeleton.extra_opcode1);
-    DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-    uint8_t low_reg = LowRegisterBits(raw_reg);
-    uint8_t modrm = (2 << 6) | (low_reg << 3) | rs_rX86_SP_32.GetRegNum();
-    code_buffer_.push_back(modrm);
-    DCHECK_LT(scale, 4);
-    uint8_t low_base_or_table = LowRegisterBits(raw_base_or_table);
-    uint8_t low_index = LowRegisterBits(raw_index);
-    uint8_t sib = (scale << 6) | (low_index << 3) | low_base_or_table;
-    code_buffer_.push_back(sib);
-    DCHECK_EQ(0, entry->skeleton.immediate_bytes);
-  } else {
-    uint8_t low_reg = LowRegisterBits(raw_reg);
-    code_buffer_.push_back(entry->skeleton.opcode + low_reg);
-  }
-  code_buffer_.push_back(disp & 0xFF);
-  code_buffer_.push_back((disp >> 8) & 0xFF);
-  code_buffer_.push_back((disp >> 16) & 0xFF);
-  code_buffer_.push_back((disp >> 24) & 0xFF);
-  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
-  DCHECK_EQ(0, entry->skeleton.ax_opcode);
-}
-
-void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) {
-  UNIMPLEMENTED(WARNING) << "encoding kind for " << entry->name << " "
-                         << BuildInsnString(entry->fmt, lir, 0);
-  for (size_t i = 0; i < GetInsnSize(lir); ++i) {
-    code_buffer_.push_back(0xCC);  // push breakpoint instruction - int 3
-  }
-}
-
-/*
- * Assemble the LIR into binary instruction format.  Note that we may
- * discover that pc-relative displacements may not fit the selected
- * instruction.  In those cases we will try to substitute a new code
- * sequence or request that the trace be shortened and retried.
- */
-AssemblerStatus X86Mir2Lir::AssembleInstructions(LIR* first_lir_insn,
-                                                 CodeOffset start_addr ATTRIBUTE_UNUSED) {
-  LIR *lir;
-  AssemblerStatus res = kSuccess;  // Assume success
-
-  const bool kVerbosePcFixup = false;
-  for (lir = first_lir_insn; lir != nullptr; lir = NEXT_LIR(lir)) {
-    if (IsPseudoLirOp(lir->opcode)) {
-      continue;
-    }
-
-    if (lir->flags.is_nop) {
-      continue;
-    }
-
-    if (lir->flags.fixup != kFixupNone) {
-      switch (lir->opcode) {
-        case kX86Jcc8: {
-          LIR *target_lir = lir->target;
-          DCHECK(target_lir != nullptr);
-          int delta = 0;
-          CodeOffset pc;
-          if (IS_SIMM8(lir->operands[0])) {
-            pc = lir->offset + 2 /* opcode + rel8 */;
-          } else {
-            pc = lir->offset + 6 /* 2 byte opcode + rel32 */;
-          }
-          CodeOffset target = target_lir->offset;
-          delta = target - pc;
-          if (IS_SIMM8(delta) != IS_SIMM8(lir->operands[0])) {
-            if (kVerbosePcFixup) {
-              LOG(INFO) << "Retry for JCC growth at " << lir->offset
-                  << " delta: " << delta << " old delta: " << lir->operands[0];
-            }
-            lir->opcode = kX86Jcc32;
-            lir->flags.size = GetInsnSize(lir);
-            DCHECK(lir->u.m.def_mask->Equals(kEncodeAll));
-            DCHECK(lir->u.m.use_mask->Equals(kEncodeAll));
-            res = kRetryAll;
-          }
-          if (kVerbosePcFixup) {
-            LOG(INFO) << "Source:";
-            DumpLIRInsn(lir, 0);
-            LOG(INFO) << "Target:";
-            DumpLIRInsn(target_lir, 0);
-            LOG(INFO) << "Delta " << delta;
-          }
-          lir->operands[0] = delta;
-          break;
-        }
-        case kX86Jcc32: {
-          LIR *target_lir = lir->target;
-          DCHECK(target_lir != nullptr);
-          CodeOffset pc = lir->offset + 6 /* 2 byte opcode + rel32 */;
-          CodeOffset target = target_lir->offset;
-          int delta = target - pc;
-          if (kVerbosePcFixup) {
-            LOG(INFO) << "Source:";
-            DumpLIRInsn(lir, 0);
-            LOG(INFO) << "Target:";
-            DumpLIRInsn(target_lir, 0);
-            LOG(INFO) << "Delta " << delta;
-          }
-          lir->operands[0] = delta;
-          break;
-        }
-        case kX86Jecxz8: {
-          LIR *target_lir = lir->target;
-          DCHECK(target_lir != nullptr);
-          CodeOffset pc;
-          pc = lir->offset + 2;  // opcode + rel8
-          CodeOffset target = target_lir->offset;
-          int delta = target - pc;
-          lir->operands[0] = delta;
-          DCHECK(IS_SIMM8(delta));
-          break;
-        }
-        case kX86Jmp8: {
-          LIR *target_lir = lir->target;
-          DCHECK(target_lir != nullptr);
-          int delta = 0;
-          CodeOffset pc;
-          if (IS_SIMM8(lir->operands[0])) {
-            pc = lir->offset + 2 /* opcode + rel8 */;
-          } else {
-            pc = lir->offset + 5 /* opcode + rel32 */;
-          }
-          CodeOffset target = target_lir->offset;
-          delta = target - pc;
-          if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && delta == 0) {
-            // Useless branch
-            NopLIR(lir);
-            if (kVerbosePcFixup) {
-              LOG(INFO) << "Retry for useless branch at " << lir->offset;
-            }
-            res = kRetryAll;
-          } else if (IS_SIMM8(delta) != IS_SIMM8(lir->operands[0])) {
-            if (kVerbosePcFixup) {
-              LOG(INFO) << "Retry for JMP growth at " << lir->offset;
-            }
-            lir->opcode = kX86Jmp32;
-            lir->flags.size = GetInsnSize(lir);
-            DCHECK(lir->u.m.def_mask->Equals(kEncodeAll));
-            DCHECK(lir->u.m.use_mask->Equals(kEncodeAll));
-            res = kRetryAll;
-          }
-          lir->operands[0] = delta;
-          break;
-        }
-        case kX86Jmp32: {
-          LIR *target_lir = lir->target;
-          DCHECK(target_lir != nullptr);
-          CodeOffset pc = lir->offset + 5 /* opcode + rel32 */;
-          CodeOffset target = target_lir->offset;
-          int delta = target - pc;
-          lir->operands[0] = delta;
-          break;
-        }
-        default:
-          if (lir->flags.fixup == kFixupLoad) {
-            LIR *target_lir = lir->target;
-            DCHECK(target_lir != nullptr);
-            CodeOffset target = target_lir->offset;
-            // Handle 64 bit RIP addressing.
-            if (lir->operands[1] == kRIPReg) {
-              // Offset is relative to next instruction.
-              lir->operands[2] = target - (lir->offset + lir->flags.size);
-            } else {
-              const LIR* anchor = UnwrapPointer<LIR>(lir->operands[4]);
-              lir->operands[2] = target - anchor->offset;
-              int newSize = GetInsnSize(lir);
-              if (newSize != lir->flags.size) {
-                lir->flags.size = newSize;
-                res = kRetryAll;
-              }
-            }
-          } else if (lir->flags.fixup == kFixupSwitchTable) {
-            DCHECK(cu_->target64);
-            DCHECK_EQ(lir->opcode, kX86Lea64RM) << "Unknown instruction: " << X86Mir2Lir::EncodingMap[lir->opcode].name;
-            DCHECK_EQ(lir->operands[1], static_cast<int>(kRIPReg));
-            // Grab the target offset from the saved data.
-            const EmbeddedData* tab_rec = UnwrapPointer<Mir2Lir::EmbeddedData>(lir->operands[4]);
-            CodeOffset target = tab_rec->offset;
-            // Handle 64 bit RIP addressing.
-            // Offset is relative to next instruction.
-            lir->operands[2] = target - (lir->offset + lir->flags.size);
-          }
-          break;
-      }
-    }
-
-    /*
-     * If one of the pc-relative instructions expanded we'll have
-     * to make another pass.  Don't bother to fully assemble the
-     * instruction.
-     */
-    if (res != kSuccess) {
-      continue;
-    }
-    CHECK_EQ(static_cast<size_t>(lir->offset), code_buffer_.size());
-    const X86EncodingMap *entry = &X86Mir2Lir::EncodingMap[lir->opcode];
-    size_t starting_cbuf_size = code_buffer_.size();
-    switch (entry->kind) {
-      case kData:  // 4 bytes of data
-        code_buffer_.push_back(lir->operands[0]);
-        break;
-      case kNullary:  // 1 byte of opcode and possible prefixes.
-        EmitNullary(entry);
-        break;
-      case kRegOpcode:  // lir operands - 0: reg
-        EmitOpRegOpcode(entry, lir->operands[0]);
-        break;
-      case kReg:  // lir operands - 0: reg
-        EmitOpReg(entry, lir->operands[0]);
-        break;
-      case kMem:  // lir operands - 0: base, 1: disp
-        EmitOpMem(entry, lir->operands[0], lir->operands[1]);
-        break;
-      case kArray:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
-        EmitOpArray(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3]);
-        break;
-      case kMemReg:  // lir operands - 0: base, 1: disp, 2: reg
-        EmitMemReg(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
-        break;
-      case kMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
-        EmitMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
-        break;
-      case kArrayImm:  // lir operands - 0: base, 1: index, 2: disp, 3:scale, 4:immediate
-        EmitArrayImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
-                     lir->operands[3], lir->operands[4]);
-        break;
-      case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
-        EmitArrayReg(entry, lir->operands[0], lir->operands[1], lir->operands[2],
-                     lir->operands[3], lir->operands[4]);
-        break;
-      case kRegMem:  // lir operands - 0: reg, 1: base, 2: disp
-        EmitRegMem(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
-        break;
-      case kRegArray:  // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
-        EmitRegArray(entry, lir->operands[0], lir->operands[1], lir->operands[2],
-                     lir->operands[3], lir->operands[4]);
-        break;
-      case kRegThread:  // lir operands - 0: reg, 1: disp
-        EmitRegThread(entry, lir->operands[0], lir->operands[1]);
-        break;
-      case kRegReg:  // lir operands - 0: reg1, 1: reg2
-        EmitRegReg(entry, lir->operands[0], lir->operands[1]);
-        break;
-      case kRegRegStore:  // lir operands - 0: reg2, 1: reg1
-        EmitRegReg(entry, lir->operands[1], lir->operands[0]);
-        break;
-      case kMemRegImm:  // lir operands - 0: base, 1: disp, 2: reg 3: immediate
-        EmitMemRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
-                      lir->operands[3]);
-        break;
-      case kRegRegImm:  // lir operands - 0: reg1, 1: reg2, 2: imm
-        EmitRegRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
-        break;
-      case kRegRegImmStore:   // lir operands - 0: reg2, 1: reg1, 2: imm
-        EmitRegRegImm(entry, lir->operands[1], lir->operands[0], lir->operands[2]);
-        break;
-      case kRegMemImm:  // lir operands - 0: reg, 1: base, 2: disp, 3: imm
-        EmitRegMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
-                      lir->operands[3]);
-        break;
-      case kRegImm:  // lir operands - 0: reg, 1: immediate
-        EmitRegImm(entry, lir->operands[0], lir->operands[1]);
-        break;
-      case kThreadImm:  // lir operands - 0: disp, 1: immediate
-        EmitThreadImm(entry, lir->operands[0], lir->operands[1]);
-        break;
-      case kMovRegImm:  // lir operands - 0: reg, 1: immediate
-        EmitMovRegImm(entry, lir->operands[0], lir->operands[1]);
-        break;
-      case kMovRegQuadImm: {
-          int64_t value = static_cast<int64_t>(static_cast<int64_t>(lir->operands[1]) << 32 |
-                          static_cast<uint32_t>(lir->operands[2]));
-          EmitMovRegImm(entry, lir->operands[0], value);
-        }
-        break;
-      case kShiftRegImm:  // lir operands - 0: reg, 1: immediate
-        EmitShiftRegImm(entry, lir->operands[0], lir->operands[1]);
-        break;
-      case kShiftMemImm:  // lir operands - 0: base, 1: disp, 2:immediate
-        EmitShiftMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
-        break;
-      case kShiftRegCl:  // lir operands - 0: reg, 1: cl
-        EmitShiftRegCl(entry, lir->operands[0], lir->operands[1]);
-        break;
-      case kShiftMemCl:  // lir operands - 0: base, 1:displacement, 2: cl
-        EmitShiftMemCl(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
-        break;
-      case kShiftRegRegCl:  // lir operands - 0: reg1, 1: reg2, 2: cl
-        EmitShiftRegRegCl(entry, lir->operands[1], lir->operands[0], lir->operands[2]);
-        break;
-      case kRegCond:  // lir operands - 0: reg, 1: condition
-        EmitRegCond(entry, lir->operands[0], lir->operands[1]);
-        break;
-      case kMemCond:  // lir operands - 0: base, 1: displacement, 2: condition
-        EmitMemCond(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
-        break;
-      case kRegRegCond:  // lir operands - 0: reg, 1: reg, 2: condition
-        EmitRegRegCond(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
-        break;
-      case kRegMemCond:  // lir operands - 0: reg, 1: reg, displacement, 3: condition
-        EmitRegMemCond(entry, lir->operands[0], lir->operands[1], lir->operands[2],
-                       lir->operands[3]);
-        break;
-      case kJmp:  // lir operands - 0: rel
-        if (entry->opcode == kX86JmpT) {
-          // This works since the instruction format for jmp and call is basically the same and
-          // EmitCallThread loads opcode info.
-          EmitCallThread(entry, lir->operands[0]);
-        } else {
-          EmitJmp(entry, lir->operands[0]);
-        }
-        break;
-      case kJcc:  // lir operands - 0: rel, 1: CC, target assigned
-        EmitJcc(entry, lir->operands[0], lir->operands[1]);
-        break;
-      case kCall:
-        switch (entry->opcode) {
-          case kX86CallI:  // lir operands - 0: disp
-            EmitCallImmediate(entry, lir->operands[0]);
-            break;
-          case kX86CallM:  // lir operands - 0: base, 1: disp
-            EmitCallMem(entry, lir->operands[0], lir->operands[1]);
-            break;
-          case kX86CallT:  // lir operands - 0: disp
-            EmitCallThread(entry, lir->operands[0]);
-            break;
-          default:
-            EmitUnimplemented(entry, lir);
-            break;
-        }
-        break;
-      case kPcRel:  // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table
-        EmitPcRel(entry, lir->operands[0], lir->operands[1], lir->operands[2],
-                  lir->operands[3], lir->operands[4]);
-        break;
-      case kNop:  // TODO: these instruction kinds are missing implementations.
-      case kThreadReg:
-      case kRegArrayImm:
-      case kShiftArrayImm:
-      case kShiftArrayCl:
-      case kArrayCond:
-      case kUnimplemented:
-        EmitUnimplemented(entry, lir);
-        break;
-    }
-    DCHECK_EQ(lir->flags.size, GetInsnSize(lir));
-    CHECK_EQ(lir->flags.size, code_buffer_.size() - starting_cbuf_size)
-        << "Instruction size mismatch for entry: " << X86Mir2Lir::EncodingMap[lir->opcode].name;
-  }
-  return res;
-}
-
-// LIR offset assignment.
-// TODO: consolidate w/ Arm assembly mechanism.
-int X86Mir2Lir::AssignInsnOffsets() {
-  LIR* lir;
-  int offset = 0;
-
-  for (lir = first_lir_insn_; lir != nullptr; lir = NEXT_LIR(lir)) {
-    lir->offset = offset;
-    if (LIKELY(!IsPseudoLirOp(lir->opcode))) {
-      if (!lir->flags.is_nop) {
-        offset += lir->flags.size;
-      }
-    } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) {
-      if (offset & 0x2) {
-        offset += 2;
-        lir->operands[0] = 1;
-      } else {
-        lir->operands[0] = 0;
-      }
-    }
-    /* Pseudo opcodes don't consume space */
-  }
-  return offset;
-}
-
-/*
- * Walk the compilation unit and assign offsets to instructions
- * and literals and compute the total size of the compiled unit.
- * TODO: consolidate w/ Arm assembly mechanism.
- */
-void X86Mir2Lir::AssignOffsets() {
-  int offset = AssignInsnOffsets();
-
-  if (const_vectors_ != nullptr) {
-    // Vector literals must be 16-byte aligned. The header that is placed
-    // in the code section causes misalignment so we take it into account.
-    // Otherwise, we are sure that for x86 method is aligned to 16.
-    DCHECK_EQ(GetInstructionSetAlignment(cu_->instruction_set), 16u);
-    uint32_t bytes_to_fill = (0x10 - ((offset + sizeof(OatQuickMethodHeader)) & 0xF)) & 0xF;
-    offset += bytes_to_fill;
-
-    // Now assign each literal the right offset.
-    for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
-      p->offset = offset;
-      offset += 16;
-    }
-  }
-
-  /* Const values have to be word aligned */
-  offset = RoundUp(offset, 4);
-
-  /* Set up offsets for literals */
-  data_offset_ = offset;
-
-  offset = AssignLiteralOffset(offset);
-
-  offset = AssignSwitchTablesOffset(offset);
-
-  offset = AssignFillArrayDataOffset(offset);
-
-  total_size_ = offset;
-}
-
-/*
- * Go over each instruction in the list and calculate the offset from the top
- * before sending them off to the assembler. If out-of-range branch distance is
- * seen rearrange the instructions a bit to correct it.
- * TODO: consolidate w/ Arm assembly mechanism.
- */
-void X86Mir2Lir::AssembleLIR() {
-  cu_->NewTimingSplit("Assemble");
-
-  // We will remove the method address if we never ended up using it
-  if (pc_rel_base_reg_.Valid() && !pc_rel_base_reg_used_) {
-    if (kIsDebugBuild) {
-      LOG(WARNING) << "PC-relative addressing base promoted but unused in "
-          << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-    }
-    setup_pc_rel_base_reg_->flags.is_nop = true;
-    NEXT_LIR(setup_pc_rel_base_reg_)->flags.is_nop = true;
-  }
-
-  AssignOffsets();
-  int assembler_retries = 0;
-  /*
-   * Assemble here.  Note that we generate code with optimistic assumptions
-   * and if found now to work, we'll have to redo the sequence and retry.
-   */
-
-  while (true) {
-    AssemblerStatus res = AssembleInstructions(first_lir_insn_, 0);
-    if (res == kSuccess) {
-      break;
-    } else {
-      assembler_retries++;
-      if (assembler_retries > MAX_ASSEMBLER_RETRIES) {
-        CodegenDump();
-        LOG(FATAL) << "Assembler error - too many retries";
-      }
-      // Redo offsets and try again
-      AssignOffsets();
-      code_buffer_.clear();
-    }
-  }
-
-  // Install literals
-  InstallLiteralPools();
-
-  // Install switch tables
-  InstallSwitchTables();
-
-  // Install fill array data
-  InstallFillArrayData();
-
-  // Create the mapping table and native offset to reference map.
-  cu_->NewTimingSplit("PcMappingTable");
-  CreateMappingTables();
-
-  cu_->NewTimingSplit("GcMap");
-  CreateNativeGcMap();
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/x86/backend_x86.h b/compiler/dex/quick/x86/backend_x86.h
deleted file mode 100644
index f73db94..0000000
--- a/compiler/dex/quick/x86/backend_x86.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_X86_BACKEND_X86_H_
-#define ART_COMPILER_DEX_QUICK_X86_BACKEND_X86_H_
-
-namespace art {
-
-struct CompilationUnit;
-class Mir2Lir;
-class MIRGraph;
-class ArenaAllocator;
-
-Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
-                          ArenaAllocator* const arena);
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_X86_BACKEND_X86_H_
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
deleted file mode 100644
index 9cb45a4..0000000
--- a/compiler/dex/quick/x86/call_x86.cc
+++ /dev/null
@@ -1,424 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This file contains codegen for the X86 ISA */
-
-#include "codegen_x86.h"
-
-#include "art_method.h"
-#include "base/logging.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "driver/compiler_driver.h"
-#include "driver/compiler_options.h"
-#include "gc/accounting/card_table.h"
-#include "mirror/object_array-inl.h"
-#include "utils/dex_cache_arrays_layout-inl.h"
-#include "x86_lir.h"
-
-namespace art {
-
-/*
- * The sparse table in the literal pool is an array of <key,displacement>
- * pairs.
- */
-void X86Mir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
-  GenSmallSparseSwitch(mir, table_offset, rl_src);
-}
-
-/*
- * Code pattern will look something like:
- *
- * mov  r_val, ..
- * call 0
- * pop  r_start_of_method
- * sub  r_start_of_method, ..
- * mov  r_key_reg, r_val
- * sub  r_key_reg, low_key
- * cmp  r_key_reg, size-1  ; bound check
- * ja   done
- * mov  r_disp, [r_start_of_method + r_key_reg * 4 + table_offset]
- * add  r_start_of_method, r_disp
- * jmp  r_start_of_method
- * done:
- */
-void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
-  const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  // Add the table to the list - we'll process it later
-  SwitchTable* tab_rec =
-      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
-  tab_rec->switch_mir = mir;
-  tab_rec->table = table;
-  tab_rec->vaddr = current_dalvik_offset_;
-  int size = table[1];
-  switch_tables_.push_back(tab_rec);
-
-  // Get the switch value
-  rl_src = LoadValue(rl_src, kCoreReg);
-
-  int low_key = s4FromSwitchData(&table[2]);
-  RegStorage keyReg;
-  // Remove the bias, if necessary
-  if (low_key == 0) {
-    keyReg = rl_src.reg;
-  } else {
-    keyReg = AllocTemp();
-    OpRegRegImm(kOpSub, keyReg, rl_src.reg, low_key);
-  }
-
-  // Bounds check - if < 0 or >= size continue following switch
-  OpRegImm(kOpCmp, keyReg, size - 1);
-  LIR* branch_over = OpCondBranch(kCondHi, nullptr);
-
-  RegStorage addr_for_jump;
-  if (cu_->target64) {
-    RegStorage table_base = AllocTempWide();
-    // Load the address of the table into table_base.
-    LIR* lea = RawLIR(current_dalvik_offset_, kX86Lea64RM, table_base.GetReg(), kRIPReg,
-                      256, 0, WrapPointer(tab_rec));
-    lea->flags.fixup = kFixupSwitchTable;
-    AppendLIR(lea);
-
-    // Load the offset from the table out of the table.
-    addr_for_jump = AllocTempWide();
-    NewLIR5(kX86MovsxdRA, addr_for_jump.GetReg(), table_base.GetReg(), keyReg.GetReg(), 2, 0);
-
-    // Add the offset from the table to the table base.
-    OpRegReg(kOpAdd, addr_for_jump, table_base);
-    tab_rec->anchor = nullptr;  // Unused for x86-64.
-  } else {
-    // Get the PC to a register and get the anchor.
-    LIR* anchor;
-    RegStorage r_pc = GetPcAndAnchor(&anchor);
-
-    // Load the displacement from the switch table.
-    addr_for_jump = AllocTemp();
-    NewLIR5(kX86PcRelLoadRA, addr_for_jump.GetReg(), r_pc.GetReg(), keyReg.GetReg(),
-            2, WrapPointer(tab_rec));
-    // Add displacement and r_pc to get the address.
-    OpRegReg(kOpAdd, addr_for_jump, r_pc);
-    tab_rec->anchor = anchor;
-  }
-
-  // ..and go!
-  NewLIR1(kX86JmpR, addr_for_jump.GetReg());
-
-  /* branch_over target here */
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  branch_over->target = target;
-}
-
-void X86Mir2Lir::GenMoveException(RegLocation rl_dest) {
-  int ex_offset = cu_->target64 ?
-      Thread::ExceptionOffset<8>().Int32Value() :
-      Thread::ExceptionOffset<4>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
-  NewLIR2(cu_->target64 ? kX86Mov64RT : kX86Mov32RT, rl_result.reg.GetReg(), ex_offset);
-  NewLIR2(cu_->target64 ? kX86Mov64TI : kX86Mov32TI, ex_offset, 0);
-  StoreValue(rl_dest, rl_result);
-}
-
-void X86Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
-  DCHECK_EQ(tgt_addr_reg.Is64Bit(), cu_->target64);
-  RegStorage reg_card_base = AllocTempRef();
-  RegStorage reg_card_no = AllocTempRef();
-  int ct_offset = cu_->target64 ?
-      Thread::CardTableOffset<8>().Int32Value() :
-      Thread::CardTableOffset<4>().Int32Value();
-  NewLIR2(cu_->target64 ? kX86Mov64RT : kX86Mov32RT, reg_card_base.GetReg(), ct_offset);
-  OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
-  StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
-  FreeTemp(reg_card_base);
-  FreeTemp(reg_card_no);
-}
-
-static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
-  return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
-}
-
-void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
-  /*
-   * On entry, rX86_ARG0, rX86_ARG1, rX86_ARG2 are live.  Let the register
-   * allocation mechanism know so it doesn't try to use any of them when
-   * expanding the frame or flushing.  This leaves the utility
-   * code with no spare temps.
-   */
-  const RegStorage arg0 = TargetReg32(kArg0);
-  const RegStorage arg1 = TargetReg32(kArg1);
-  const RegStorage arg2 = TargetReg32(kArg2);
-  LockTemp(arg0);
-  LockTemp(arg1);
-  LockTemp(arg2);
-
-  /*
-   * We can safely skip the stack overflow check if we're
-   * a leaf *and* our frame size < fudge factor.
-   */
-  const InstructionSet isa =  cu_->target64 ? kX86_64 : kX86;
-  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, isa);
-  const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-
-  // If we doing an implicit stack overflow check, perform the load immediately
-  // before the stack pointer is decremented and anything is saved.
-  if (!skip_overflow_check &&
-      cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
-    // Implicit stack overflow check.
-    // test eax,[esp + -overflow]
-    int overflow = GetStackOverflowReservedBytes(isa);
-    NewLIR3(kX86Test32RM, rs_rAX.GetReg(), rs_rSP.GetReg(), -overflow);
-    MarkPossibleStackOverflowException();
-  }
-
-  /* Build frame, return address already on stack */
-  cfi_.SetCurrentCFAOffset(GetInstructionSetPointerSize(cu_->instruction_set));
-  OpRegImm(kOpSub, rs_rSP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
-  cfi_.DefCFAOffset(frame_size_);
-
-  /* Spill core callee saves */
-  SpillCoreRegs();
-  SpillFPRegs();
-  if (!skip_overflow_check) {
-    class StackOverflowSlowPath : public LIRSlowPath {
-     public:
-      StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace)
-          : LIRSlowPath(m2l, branch), sp_displace_(sp_displace) {
-      }
-      void Compile() OVERRIDE {
-        m2l_->ResetRegPool();
-        m2l_->ResetDefTracking();
-        GenerateTargetLabel(kPseudoThrowTarget);
-        const RegStorage local_rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-        m2l_->OpRegImm(kOpAdd, local_rs_rSP, sp_displace_);
-        m2l_->cfi().AdjustCFAOffset(-sp_displace_);
-        m2l_->ClobberCallerSave();
-        // Assumes codegen and target are in thumb2 mode.
-        m2l_->CallHelper(RegStorage::InvalidReg(), kQuickThrowStackOverflow,
-                         false /* MarkSafepointPC */, false /* UseLink */);
-        m2l_->cfi().AdjustCFAOffset(sp_displace_);
-      }
-
-     private:
-      const size_t sp_displace_;
-    };
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
-      // TODO: for large frames we should do something like:
-      // spill ebp
-      // lea ebp, [esp + frame_size]
-      // cmp ebp, fs:[stack_end_]
-      // jcc stack_overflow_exception
-      // mov esp, ebp
-      // in case a signal comes in that's not using an alternate signal stack and the large frame
-      // may have moved us outside of the reserved area at the end of the stack.
-      // cmp rs_rX86_SP, fs:[stack_end_]; jcc throw_slowpath
-      if (cu_->target64) {
-        OpRegThreadMem(kOpCmp, rs_rX86_SP_64, Thread::StackEndOffset<8>());
-      } else {
-        OpRegThreadMem(kOpCmp, rs_rX86_SP_32, Thread::StackEndOffset<4>());
-      }
-      LIR* branch = OpCondBranch(kCondUlt, nullptr);
-      AddSlowPath(
-        new(arena_)StackOverflowSlowPath(this, branch,
-                                         frame_size_ -
-                                         GetInstructionSetPointerSize(cu_->instruction_set)));
-    }
-  }
-
-  FlushIns(ArgLocs, rl_method);
-
-  // We can promote the PC of an anchor for PC-relative addressing to a register
-  // if it's used at least twice. Without investigating where we should lazily
-  // load the reference, we conveniently load it after flushing inputs.
-  if (pc_rel_base_reg_.Valid()) {
-    DCHECK(!cu_->target64);
-    setup_pc_rel_base_reg_ = OpLoadPc(pc_rel_base_reg_);
-  }
-
-  FreeTemp(arg0);
-  FreeTemp(arg1);
-  FreeTemp(arg2);
-}
-
-void X86Mir2Lir::GenExitSequence() {
-  cfi_.RememberState();
-  /*
-   * In the exit path, rX86_RET0/rX86_RET1 are live - make sure they aren't
-   * allocated by the register utilities as temps.
-   */
-  LockTemp(rs_rX86_RET0);
-  LockTemp(rs_rX86_RET1);
-
-  UnSpillCoreRegs();
-  UnSpillFPRegs();
-  /* Remove frame except for return address */
-  const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  int adjust = frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set);
-  OpRegImm(kOpAdd, rs_rSP, adjust);
-  cfi_.AdjustCFAOffset(-adjust);
-  // There is only the return PC on the stack now.
-  NewLIR0(kX86Ret);
-  // The CFI should be restored for any code that follows the exit block.
-  cfi_.RestoreState();
-  cfi_.DefCFAOffset(frame_size_);
-}
-
-void X86Mir2Lir::GenSpecialExitSequence() {
-  NewLIR0(kX86Ret);
-}
-
-void X86Mir2Lir::GenSpecialEntryForSuspend() {
-  // Keep 16-byte stack alignment, there's already the return address, so
-  //   - for 32-bit push EAX, i.e. ArtMethod*, ESI, EDI,
-  //   - for 64-bit push RAX, i.e. ArtMethod*.
-  const int kRegSize = cu_->target64 ? 8 : 4;
-  cfi_.SetCurrentCFAOffset(kRegSize);  // Return address.
-  if (!cu_->target64) {
-    DCHECK(!IsTemp(rs_rSI));
-    DCHECK(!IsTemp(rs_rDI));
-    core_spill_mask_ =
-        (1u << rs_rDI.GetRegNum()) | (1u << rs_rSI.GetRegNum()) | (1u << rs_rRET.GetRegNum());
-    num_core_spills_ = 3u;
-  } else {
-    core_spill_mask_ = (1u << rs_rRET.GetRegNum());
-    num_core_spills_ = 1u;
-  }
-  fp_spill_mask_ = 0u;
-  num_fp_spills_ = 0u;
-  frame_size_ = 16u;
-  core_vmap_table_.clear();
-  fp_vmap_table_.clear();
-  if (!cu_->target64) {
-    NewLIR1(kX86Push32R, rs_rDI.GetReg());
-    cfi_.AdjustCFAOffset(kRegSize);
-    cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()), 0);
-    NewLIR1(kX86Push32R, rs_rSI.GetReg());
-    cfi_.AdjustCFAOffset(kRegSize);
-    cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()), 0);
-  }
-  NewLIR1(kX86Push32R, TargetReg(kArg0, kRef).GetReg());  // ArtMethod*
-  cfi_.AdjustCFAOffset(kRegSize);
-  // Do not generate CFI for scratch register.
-}
-
-void X86Mir2Lir::GenSpecialExitForSuspend() {
-  const int kRegSize = cu_->target64 ? 8 : 4;
-  // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
-  NewLIR1(kX86Pop32R, TargetReg(kArg0, kRef).GetReg());  // ArtMethod*
-  cfi_.AdjustCFAOffset(-kRegSize);
-  if (!cu_->target64) {
-    NewLIR1(kX86Pop32R, rs_rSI.GetReg());
-    cfi_.AdjustCFAOffset(-kRegSize);
-    cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()));
-    NewLIR1(kX86Pop32R, rs_rDI.GetReg());
-    cfi_.AdjustCFAOffset(-kRegSize);
-    cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()));
-  }
-}
-
-void X86Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
-  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
-    return;
-  }
-  // Implicit null pointer check.
-  // test eax,[arg1+0]
-  NewLIR3(kX86Test32RM, rs_rAX.GetReg(), reg.GetReg(), 0);
-  MarkPossibleNullPointerException(opt_flags);
-}
-
-/*
- * Bit of a hack here - in the absence of a real scheduling pass,
- * emit the next instruction in static & direct invoke sequences.
- */
-int X86Mir2Lir::X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
-                                  int state, const MethodReference& target_method,
-                                  uint32_t,
-                                  uintptr_t direct_code ATTRIBUTE_UNUSED, uintptr_t direct_method,
-                                  InvokeType type) {
-  X86Mir2Lir* cg = static_cast<X86Mir2Lir*>(cu->cg.get());
-  if (info->string_init_offset != 0) {
-    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
-    switch (state) {
-    case 0: {  // Grab target method* from thread pointer
-      cg->NewLIR2(kX86Mov32RT, arg0_ref.GetReg(), info->string_init_offset);
-      break;
-    }
-    default:
-      return -1;
-    }
-  } else if (direct_method != 0) {
-    switch (state) {
-    case 0:  // Get the current Method* [sets kArg0]
-      if (direct_method != static_cast<uintptr_t>(-1)) {
-        auto target_reg = cg->TargetReg(kArg0, kRef);
-        if (target_reg.Is64Bit()) {
-          cg->LoadConstantWide(target_reg, direct_method);
-        } else {
-          cg->LoadConstant(target_reg, direct_method);
-        }
-      } else {
-        cg->LoadMethodAddress(target_method, type, kArg0);
-      }
-      break;
-    default:
-      return -1;
-    }
-  } else if (cg->CanUseOpPcRelDexCacheArrayLoad()) {
-    switch (state) {
-      case 0: {
-        CHECK_EQ(cu->dex_file, target_method.dex_file);
-        size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index);
-        cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, cg->TargetReg(kArg0, kRef),
-                                     cu->target64);
-        break;
-      }
-      default:
-        return -1;
-    }
-  } else {
-    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
-    switch (state) {
-    case 0:  // Get the current Method* [sets kArg0]
-      // TUNING: we can save a reg copy if Method* has been promoted.
-      cg->LoadCurrMethodDirect(arg0_ref);
-      break;
-    case 1:  // Get method->dex_cache_resolved_methods_
-      cg->LoadBaseDisp(arg0_ref,
-                       ArtMethod::DexCacheResolvedMethodsOffset(
-                           cu->target64 ? kX86_64PointerSize : kX86PointerSize).Int32Value(),
-                       arg0_ref,
-                       cu->target64 ? k64 : k32,
-                       kNotVolatile);
-      break;
-    case 2: {
-      // Grab target method*
-      CHECK_EQ(cu->dex_file, target_method.dex_file);
-      const size_t pointer_size = GetInstructionSetPointerSize(cu->instruction_set);
-      cg->LoadWordDisp(arg0_ref,
-                       cg->GetCachePointerOffset(target_method.dex_method_index, pointer_size),
-                       arg0_ref);
-      break;
-    }
-    default:
-      return -1;
-    }
-  }
-  return state + 1;
-}
-
-NextCallInsn X86Mir2Lir::GetNextSDCallInsn() {
-  return X86NextSDCallInsn;
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
deleted file mode 100644
index 11d9d4a..0000000
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ /dev/null
@@ -1,985 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_X86_CODEGEN_X86_H_
-#define ART_COMPILER_DEX_QUICK_X86_CODEGEN_X86_H_
-
-#include "base/logging.h"
-#include "dex/compiler_ir.h"
-#include "dex/mir_graph.h"
-#include "dex/quick/mir_to_lir.h"
-#include "x86_lir.h"
-
-#include <map>
-#include <vector>
-
-namespace art {
-
-class X86Mir2Lir FINAL : public Mir2Lir {
- protected:
-  class InToRegStorageX86_64Mapper : public InToRegStorageMapper {
-   public:
-    explicit InToRegStorageX86_64Mapper(Mir2Lir* m2l)
-        : m2l_(m2l), cur_core_reg_(0), cur_fp_reg_(0) {}
-    virtual RegStorage GetNextReg(ShortyArg arg);
-    virtual void Reset() OVERRIDE {
-      cur_core_reg_ = 0;
-      cur_fp_reg_ = 0;
-    }
-   protected:
-    Mir2Lir* m2l_;
-    size_t cur_core_reg_;
-    size_t cur_fp_reg_;
-  };
-
-  class InToRegStorageX86Mapper : public InToRegStorageX86_64Mapper {
-   public:
-    explicit InToRegStorageX86Mapper(Mir2Lir* m2l)
-        : InToRegStorageX86_64Mapper(m2l) { }
-    virtual RegStorage GetNextReg(ShortyArg arg);
-  };
-
-  InToRegStorageX86_64Mapper in_to_reg_storage_x86_64_mapper_;
-  InToRegStorageX86Mapper in_to_reg_storage_x86_mapper_;
-  InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE {
-    InToRegStorageMapper* res;
-    if (cu_->target64) {
-      res = &in_to_reg_storage_x86_64_mapper_;
-    } else {
-      res = &in_to_reg_storage_x86_mapper_;
-    }
-    res->Reset();
-    return res;
-  }
-
-  class ExplicitTempRegisterLock {
-  public:
-    ExplicitTempRegisterLock(X86Mir2Lir* mir_to_lir, int n_regs, ...);
-    ~ExplicitTempRegisterLock();
-  protected:
-    std::vector<RegStorage> temp_regs_;
-    X86Mir2Lir* const mir_to_lir_;
-  };
-
-  virtual int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
-
- public:
-  X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
-
-  // Required for target - codegen helpers.
-  bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
-                          RegLocation rl_dest, int lit) OVERRIDE;
-  bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
-  void GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
-                                  int32_t constant) OVERRIDE;
-  void GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
-                                   int64_t constant) OVERRIDE;
-  LIR* CheckSuspendUsingLoad() OVERRIDE;
-  RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE;
-  LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
-                    OpSize size, VolatileKind is_volatile) OVERRIDE;
-  LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
-                       OpSize size) OVERRIDE;
-  LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
-  LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
-  void GenLongToInt(RegLocation rl_dest, RegLocation rl_src);
-  LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
-                     OpSize size, VolatileKind is_volatile) OVERRIDE;
-  LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
-                        OpSize size) OVERRIDE;
-
-  /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
-  void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
-
-  bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE;
-  void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest, bool wide)
-      OVERRIDE;
-
-  void GenImplicitNullCheck(RegStorage reg, int opt_flags) OVERRIDE;
-
-  // Required for target - register utilities.
-  RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
-  RegStorage TargetReg(SpecialTargetRegister symbolic_reg, WideKind wide_kind) OVERRIDE {
-    if (wide_kind == kWide) {
-      if (cu_->target64) {
-        return As64BitReg(TargetReg32(symbolic_reg));
-      } else {
-        if (symbolic_reg >= kFArg0 && symbolic_reg <= kFArg3) {
-          // We want an XMM, not a pair.
-          return As64BitReg(TargetReg32(symbolic_reg));
-        }
-        // x86: construct a pair.
-        DCHECK((kArg0 <= symbolic_reg && symbolic_reg < kArg3) ||
-               (kRet0 == symbolic_reg));
-        return RegStorage::MakeRegPair(TargetReg32(symbolic_reg),
-                                 TargetReg32(static_cast<SpecialTargetRegister>(symbolic_reg + 1)));
-      }
-    } else if (wide_kind == kRef && cu_->target64) {
-      return As64BitReg(TargetReg32(symbolic_reg));
-    } else {
-      return TargetReg32(symbolic_reg);
-    }
-  }
-  RegStorage TargetPtrReg(SpecialTargetRegister symbolic_reg) OVERRIDE {
-    return TargetReg(symbolic_reg, cu_->target64 ? kWide : kNotWide);
-  }
-
-  RegLocation GetReturnAlt() OVERRIDE;
-  RegLocation GetReturnWideAlt() OVERRIDE;
-  RegLocation LocCReturn() OVERRIDE;
-  RegLocation LocCReturnRef() OVERRIDE;
-  RegLocation LocCReturnDouble() OVERRIDE;
-  RegLocation LocCReturnFloat() OVERRIDE;
-  RegLocation LocCReturnWide() OVERRIDE;
-
-  ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE;
-  void AdjustSpillMask() OVERRIDE;
-  void ClobberCallerSave() OVERRIDE;
-  void FreeCallTemps() OVERRIDE;
-  void LockCallTemps() OVERRIDE;
-
-  void CompilerInitializeRegAlloc() OVERRIDE;
-  int VectorRegisterSize() OVERRIDE;
-  int NumReservableVectorRegisters(bool long_or_fp) OVERRIDE;
-
-  // Required for target - miscellaneous.
-  void AssembleLIR() OVERRIDE;
-  void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE;
-  void SetupTargetResourceMasks(LIR* lir, uint64_t flags,
-                                ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE;
-  const char* GetTargetInstFmt(int opcode) OVERRIDE;
-  const char* GetTargetInstName(int opcode) OVERRIDE;
-  std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) OVERRIDE;
-  ResourceMask GetPCUseDefEncoding() const OVERRIDE;
-  uint64_t GetTargetInstFlags(int opcode) OVERRIDE;
-  size_t GetInsnSize(LIR* lir) OVERRIDE;
-  bool IsUnconditionalBranch(LIR* lir) OVERRIDE;
-
-  // Get the register class for load/store of a field.
-  RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
-
-  // Required for target - Dalvik-level generators.
-  void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
-                   RegLocation rl_dest, int scale) OVERRIDE;
-  void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                   RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) OVERRIDE;
-
-  void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                        RegLocation rl_src2) OVERRIDE;
-  void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                       RegLocation rl_src2) OVERRIDE;
-  void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                RegLocation rl_src2) OVERRIDE;
-  void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
-
-  bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object) OVERRIDE;
-  bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) OVERRIDE;
-  bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) OVERRIDE;
-  bool GenInlinedReverseBits(CallInfo* info, OpSize size) OVERRIDE;
-  bool GenInlinedSqrt(CallInfo* info) OVERRIDE;
-  bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
-  bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
-  bool GenInlinedPeek(CallInfo* info, OpSize size) OVERRIDE;
-  bool GenInlinedPoke(CallInfo* info, OpSize size) OVERRIDE;
-  bool GenInlinedCharAt(CallInfo* info) OVERRIDE;
-
-  // Long instructions.
-  void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                      RegLocation rl_src2, int flags) OVERRIDE;
-  void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                         RegLocation rl_src2, int flags) OVERRIDE;
-  void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                         RegLocation rl_src1, RegLocation rl_shift, int flags) OVERRIDE;
-  void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) OVERRIDE;
-  void GenIntToLong(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
-  void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                      RegLocation rl_src1, RegLocation rl_shift) OVERRIDE;
-
-  /*
-   * @brief Generate a two address long operation with a constant value
-   * @param rl_dest location of result
-   * @param rl_src constant source operand
-   * @param op Opcode to be generated
-   * @return success or not
-   */
-  bool GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
-
-  /*
-   * @brief Generate a three address long operation with a constant value
-   * @param rl_dest location of result
-   * @param rl_src1 source operand
-   * @param rl_src2 constant source operand
-   * @param op Opcode to be generated
-   * @return success or not
-   */
-  bool GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                      Instruction::Code op);
-  /**
-   * @brief Generate a long arithmetic operation.
-   * @param rl_dest The destination.
-   * @param rl_src1 First operand.
-   * @param rl_src2 Second operand.
-   * @param op The DEX opcode for the operation.
-   * @param is_commutative The sources can be swapped if needed.
-   */
-  virtual void GenLongArith(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                            Instruction::Code op, bool is_commutative);
-
-  /**
-   * @brief Generate a two operand long arithmetic operation.
-   * @param rl_dest The destination.
-   * @param rl_src Second operand.
-   * @param op The DEX opcode for the operation.
-   */
-  void GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
-
-  /**
-   * @brief Generate a long operation.
-   * @param rl_dest The destination.  Must be in a register
-   * @param rl_src The other operand.  May be in a register or in memory.
-   * @param op The DEX opcode for the operation.
-   */
-  virtual void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
-
-
-  // TODO: collapse reg_lo, reg_hi
-  RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div)
-      OVERRIDE;
-  RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) OVERRIDE;
-  void GenDivZeroCheckWide(RegStorage reg) OVERRIDE;
-  void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
-  void GenExitSequence() OVERRIDE;
-  void GenSpecialExitSequence() OVERRIDE;
-  void GenSpecialEntryForSuspend() OVERRIDE;
-  void GenSpecialExitForSuspend() OVERRIDE;
-  void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) OVERRIDE;
-  void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) OVERRIDE;
-  void GenSelect(BasicBlock* bb, MIR* mir) OVERRIDE;
-  void GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
-                        int32_t true_val, int32_t false_val, RegStorage rs_dest,
-                        RegisterClass dest_reg_class) OVERRIDE;
-  bool GenMemBarrier(MemBarrierKind barrier_kind) OVERRIDE;
-  void GenMoveException(RegLocation rl_dest) OVERRIDE;
-  void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
-                                     int first_bit, int second_bit) OVERRIDE;
-  void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
-  void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
-  void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
-  void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
-
-  /**
-   * @brief Implement instanceof a final class with x86 specific code.
-   * @param use_declaring_class 'true' if we can use the class itself.
-   * @param type_idx Type index to use if use_declaring_class is 'false'.
-   * @param rl_dest Result to be set to 0 or 1.
-   * @param rl_src Object to be tested.
-   */
-  void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
-                          RegLocation rl_src) OVERRIDE;
-
-  // Single operation generators.
-  LIR* OpUnconditionalBranch(LIR* target) OVERRIDE;
-  LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) OVERRIDE;
-  LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) OVERRIDE;
-  LIR* OpCondBranch(ConditionCode cc, LIR* target) OVERRIDE;
-  LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) OVERRIDE;
-  LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src) OVERRIDE;
-  LIR* OpIT(ConditionCode cond, const char* guide) OVERRIDE;
-  void OpEndIT(LIR* it) OVERRIDE;
-  LIR* OpMem(OpKind op, RegStorage r_base, int disp) OVERRIDE;
-  void OpPcRelLoad(RegStorage reg, LIR* target) OVERRIDE;
-  LIR* OpReg(OpKind op, RegStorage r_dest_src) OVERRIDE;
-  void OpRegCopy(RegStorage r_dest, RegStorage r_src) OVERRIDE;
-  LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) OVERRIDE;
-  LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value) OVERRIDE;
-  LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) OVERRIDE;
-  LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) OVERRIDE;
-  LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) OVERRIDE;
-  LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) OVERRIDE;
-  LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) OVERRIDE;
-  LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) OVERRIDE;
-  LIR* OpTestSuspend(LIR* target) OVERRIDE;
-  LIR* OpVldm(RegStorage r_base, int count) OVERRIDE;
-  LIR* OpVstm(RegStorage r_base, int count) OVERRIDE;
-  void OpRegCopyWide(RegStorage dest, RegStorage src) OVERRIDE;
-  bool GenInlinedCurrentThread(CallInfo* info) OVERRIDE;
-
-  bool InexpensiveConstantInt(int32_t value) OVERRIDE;
-  bool InexpensiveConstantFloat(int32_t value) OVERRIDE;
-  bool InexpensiveConstantLong(int64_t value) OVERRIDE;
-  bool InexpensiveConstantDouble(int64_t value) OVERRIDE;
-
-  /*
-   * @brief Should try to optimize for two address instructions?
-   * @return true if we try to avoid generating three operand instructions.
-   */
-  virtual bool GenerateTwoOperandInstructions() const { return true; }
-
-  /*
-   * @brief x86 specific codegen for int operations.
-   * @param opcode Operation to perform.
-   * @param rl_dest Destination for the result.
-   * @param rl_lhs Left hand operand.
-   * @param rl_rhs Right hand operand.
-   * @param flags The instruction optimization flags.
-   */
-  void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_lhs,
-                     RegLocation rl_rhs, int flags) OVERRIDE;
-
-  /*
-   * @brief Load the Method* of a dex method into the register.
-   * @param target_method The MethodReference of the method to be invoked.
-   * @param type How the method will be invoked.
-   * @param register that will contain the code address.
-   * @note register will be passed to TargetReg to get physical register.
-   */
-  void LoadMethodAddress(const MethodReference& target_method, InvokeType type,
-                         SpecialTargetRegister symbolic_reg) OVERRIDE;
-
-  /*
-   * @brief Load the Class* of a Dex Class type into the register.
-   * @param dex DexFile that contains the class type.
-   * @param type How the method will be invoked.
-   * @param register that will contain the code address.
-   * @note register will be passed to TargetReg to get physical register.
-   */
-  void LoadClassType(const DexFile& dex_file, uint32_t type_idx,
-                     SpecialTargetRegister symbolic_reg) OVERRIDE;
-
-  NextCallInsn GetNextSDCallInsn() OVERRIDE;
-
-  /*
-   * @brief Generate a relative call to the method that will be patched at link time.
-   * @param target_method The MethodReference of the method to be invoked.
-   * @param type How the method will be invoked.
-   * @returns Call instruction
-   */
-  LIR* CallWithLinkerFixup(const MethodReference& target_method, InvokeType type);
-
-  /*
-   * @brief Generate the actual call insn based on the method info.
-   * @param method_info the lowering info for the method call.
-   * @returns Call instruction
-   */
-  LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE;
-
-  void AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) OVERRIDE;
-  void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) OVERRIDE;
-  void DoPromotion() OVERRIDE;
-
-  /*
-   * @brief Handle x86 specific literals
-   */
-  void InstallLiteralPools() OVERRIDE;
-
-  LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
-
- protected:
-  RegStorage TargetReg32(SpecialTargetRegister reg) const;
-  // Casting of RegStorage
-  RegStorage As32BitReg(RegStorage reg) {
-    DCHECK(!reg.IsPair());
-    if ((kFailOnSizeError || kReportSizeError) && !reg.Is64Bit()) {
-      if (kFailOnSizeError) {
-        LOG(FATAL) << "Expected 64b register " << reg.GetReg();
-      } else {
-        LOG(WARNING) << "Expected 64b register " << reg.GetReg();
-        return reg;
-      }
-    }
-    RegStorage ret_val = RegStorage(RegStorage::k32BitSolo,
-                                    reg.GetRawBits() & RegStorage::kRegTypeMask);
-    DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k32SoloStorageMask)
-                             ->GetReg().GetReg(),
-              ret_val.GetReg());
-    return ret_val;
-  }
-
-  RegStorage As64BitReg(RegStorage reg) {
-    DCHECK(!reg.IsPair());
-    if ((kFailOnSizeError || kReportSizeError) && !reg.Is32Bit()) {
-      if (kFailOnSizeError) {
-        LOG(FATAL) << "Expected 32b register " << reg.GetReg();
-      } else {
-        LOG(WARNING) << "Expected 32b register " << reg.GetReg();
-        return reg;
-      }
-    }
-    RegStorage ret_val = RegStorage(RegStorage::k64BitSolo,
-                                    reg.GetRawBits() & RegStorage::kRegTypeMask);
-    DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k64SoloStorageMask)
-                             ->GetReg().GetReg(),
-              ret_val.GetReg());
-    return ret_val;
-  }
-
-  LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                           RegStorage r_dest, OpSize size);
-  LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                            RegStorage r_src, OpSize size, int opt_flags = 0);
-
-  int AssignInsnOffsets();
-  void AssignOffsets();
-  AssemblerStatus AssembleInstructions(LIR* first_lir_insn, CodeOffset start_addr);
-
-  size_t ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index,
-                     int32_t raw_base, int32_t displacement);
-  void CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw_reg);
-  void EmitPrefix(const X86EncodingMap* entry,
-                  int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b);
-  void EmitOpcode(const X86EncodingMap* entry);
-  void EmitPrefixAndOpcode(const X86EncodingMap* entry,
-                           int32_t reg_r, int32_t reg_x, int32_t reg_b);
-  void EmitDisp(uint8_t base, int32_t disp);
-  void EmitModrmThread(uint8_t reg_or_opcode);
-  void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int32_t disp);
-  void EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index, int scale,
-                        int32_t disp);
-  void EmitImm(const X86EncodingMap* entry, int64_t imm);
-  void EmitNullary(const X86EncodingMap* entry);
-  void EmitOpRegOpcode(const X86EncodingMap* entry, int32_t raw_reg);
-  void EmitOpReg(const X86EncodingMap* entry, int32_t raw_reg);
-  void EmitOpMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp);
-  void EmitOpArray(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index, int scale,
-                   int32_t disp);
-  void EmitMemReg(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t raw_reg);
-  void EmitRegMem(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base, int32_t disp);
-  void EmitRegArray(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base,
-                    int32_t raw_index, int scale, int32_t disp);
-  void EmitArrayReg(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index, int scale,
-                    int32_t disp, int32_t raw_reg);
-  void EmitMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t imm);
-  void EmitArrayImm(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index, int scale,
-                    int32_t raw_disp, int32_t imm);
-  void EmitRegThread(const X86EncodingMap* entry, int32_t raw_reg, int32_t disp);
-  void EmitRegReg(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2);
-  void EmitRegRegImm(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2, int32_t imm);
-  void EmitRegMemImm(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_base, int32_t disp,
-                     int32_t imm);
-  void EmitMemRegImm(const X86EncodingMap* entry, int32_t base, int32_t disp, int32_t raw_reg1,
-                     int32_t imm);
-  void EmitRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm);
-  void EmitThreadImm(const X86EncodingMap* entry, int32_t disp, int32_t imm);
-  void EmitMovRegImm(const X86EncodingMap* entry, int32_t raw_reg, int64_t imm);
-  void EmitShiftRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm);
-  void EmitShiftRegCl(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_cl);
-  void EmitShiftMemCl(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t raw_cl);
-  void EmitShiftRegRegCl(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2,
-                         int32_t raw_cl);
-  void EmitShiftMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t imm);
-  void EmitRegCond(const X86EncodingMap* entry, int32_t raw_reg, int32_t cc);
-  void EmitMemCond(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t cc);
-  void EmitRegRegCond(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2, int32_t cc);
-  void EmitRegMemCond(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_base, int32_t disp,
-                      int32_t cc);
-
-  void EmitJmp(const X86EncodingMap* entry, int32_t rel);
-  void EmitJcc(const X86EncodingMap* entry, int32_t rel, int32_t cc);
-  void EmitCallMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp);
-  void EmitCallImmediate(const X86EncodingMap* entry, int32_t disp);
-  void EmitCallThread(const X86EncodingMap* entry, int32_t disp);
-  void EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base_or_table,
-                 int32_t raw_index, int scale, int32_t table_or_disp);
-  void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir);
-  void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
-                                int64_t val, ConditionCode ccode);
-  void GenConstWide(RegLocation rl_dest, int64_t value);
-  void GenMultiplyVectorSignedByte(RegStorage rs_dest_src1, RegStorage rs_src2);
-  void GenMultiplyVectorLong(RegStorage rs_dest_src1, RegStorage rs_src2);
-  void GenShiftByteVector(MIR* mir);
-  void AndMaskVectorRegister(RegStorage rs_src1, uint32_t m1, uint32_t m2, uint32_t m3,
-                             uint32_t m4);
-  void MaskVectorRegister(X86OpCode opcode, RegStorage rs_src1, uint32_t m1, uint32_t m2,
-                          uint32_t m3, uint32_t m4);
-  void AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir);
-  virtual void LoadVectorRegister(RegStorage rs_dest, RegStorage rs_src, OpSize opsize,
-                                  int op_mov);
-
-  static bool ProvidesFullMemoryBarrier(X86OpCode opcode);
-
-  /*
-   * @brief Ensure that a temporary register is byte addressable.
-   * @returns a temporary guarenteed to be byte addressable.
-   */
-  virtual RegStorage AllocateByteRegister();
-
-  /*
-   * @brief Use a wide temporary as a 128-bit register
-   * @returns a 128-bit temporary register.
-   */
-  virtual RegStorage Get128BitRegister(RegStorage reg);
-
-  /*
-   * @brief Check if a register is byte addressable.
-   * @returns true if a register is byte addressable.
-   */
-  bool IsByteRegister(RegStorage reg) const;
-
-  void GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src, int64_t imm, bool is_div);
-
-  bool GenInlinedArrayCopyCharArray(CallInfo* info) OVERRIDE;
-
-  /*
-   * @brief generate inline code for fast case of Strng.indexOf.
-   * @param info Call parameters
-   * @param zero_based 'true' if the index into the string is 0.
-   * @returns 'true' if the call was inlined, 'false' if a regular call needs to be
-   * generated.
-   */
-  bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
-
-  /**
-   * @brief Used to reserve a range of vector registers.
-   * @see kMirOpReserveVectorRegisters
-   * @param mir The extended MIR for reservation.
-   */
-  void ReserveVectorRegisters(MIR* mir);
-
-  /**
-   * @brief Used to return a range of vector registers.
-   * @see kMirOpReturnVectorRegisters
-   * @param mir The extended MIR for returning vector regs.
-   */
-  void ReturnVectorRegisters(MIR* mir);
-
-  /*
-   * @brief Load 128 bit constant into vector register.
-   * @param mir The MIR whose opcode is kMirConstVector
-   * @note vA is the TypeSize for the register.
-   * @note vB is the destination XMM register. arg[0..3] are 32 bit constant values.
-   */
-  void GenConst128(MIR* mir);
-
-  /*
-   * @brief MIR to move a vectorized register to another.
-   * @param mir The MIR whose opcode is kMirConstVector.
-   * @note vA: TypeSize
-   * @note vB: destination
-   * @note vC: source
-   */
-  void GenMoveVector(MIR* mir);
-
-  /*
-   * @brief Packed multiply of units in two vector registers: vB = vB .* @note vC using vA to know
-   * the type of the vector.
-   * @param mir The MIR whose opcode is kMirConstVector.
-   * @note vA: TypeSize
-   * @note vB: destination and source
-   * @note vC: source
-   */
-  void GenMultiplyVector(MIR* mir);
-
-  /*
-   * @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the
-   * type of the vector.
-   * @param mir The MIR whose opcode is kMirConstVector.
-   * @note vA: TypeSize
-   * @note vB: destination and source
-   * @note vC: source
-   */
-  void GenAddVector(MIR* mir);
-
-  /*
-   * @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the
-   * type of the vector.
-   * @param mir The MIR whose opcode is kMirConstVector.
-   * @note vA: TypeSize
-   * @note vB: destination and source
-   * @note vC: source
-   */
-  void GenSubtractVector(MIR* mir);
-
-  /*
-   * @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the
-   * type of the vector.
-   * @param mir The MIR whose opcode is kMirConstVector.
-   * @note vA: TypeSize
-   * @note vB: destination and source
-   * @note vC: immediate
-   */
-  void GenShiftLeftVector(MIR* mir);
-
-  /*
-   * @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to
-   * know the type of the vector.
-   * @param mir The MIR whose opcode is kMirConstVector.
-   * @note vA: TypeSize
-   * @note vB: destination and source
-   * @note vC: immediate
-   */
-  void GenSignedShiftRightVector(MIR* mir);
-
-  /*
-   * @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA
-   * to know the type of the vector.
-   * @param mir The MIR whose opcode is kMirConstVector.
-   * @note vA: TypeSize
-   * @note vB: destination and source
-   * @note vC: immediate
-   */
-  void GenUnsignedShiftRightVector(MIR* mir);
-
-  /*
-   * @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the
-   * type of the vector.
-   * @note vA: TypeSize
-   * @note vB: destination and source
-   * @note vC: source
-   */
-  void GenAndVector(MIR* mir);
-
-  /*
-   * @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the
-   * type of the vector.
-   * @param mir The MIR whose opcode is kMirConstVector.
-   * @note vA: TypeSize
-   * @note vB: destination and source
-   * @note vC: source
-   */
-  void GenOrVector(MIR* mir);
-
-  /*
-   * @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the
-   * type of the vector.
-   * @param mir The MIR whose opcode is kMirConstVector.
-   * @note vA: TypeSize
-   * @note vB: destination and source
-   * @note vC: source
-   */
-  void GenXorVector(MIR* mir);
-
-  /*
-   * @brief Reduce a 128-bit packed element into a single VR by taking lower bits
-   * @param mir The MIR whose opcode is kMirConstVector.
-   * @details Instruction does a horizontal addition of the packed elements and then adds it to VR.
-   * @note vA: TypeSize
-   * @note vB: destination and source VR (not vector register)
-   * @note vC: source (vector register)
-   */
-  void GenAddReduceVector(MIR* mir);
-
-  /*
-   * @brief Extract a packed element into a single VR.
-   * @param mir The MIR whose opcode is kMirConstVector.
-   * @note vA: TypeSize
-   * @note vB: destination VR (not vector register)
-   * @note vC: source (vector register)
-   * @note arg[0]: The index to use for extraction from vector register (which packed element).
-   */
-  void GenReduceVector(MIR* mir);
-
-  /*
-   * @brief Create a vector value, with all TypeSize values equal to vC
-   * @param bb The basic block in which the MIR is from.
-   * @param mir The MIR whose opcode is kMirConstVector.
-   * @note vA: TypeSize.
-   * @note vB: destination vector register.
-   * @note vC: source VR (not vector register).
-   */
-  void GenSetVector(MIR* mir);
-
-  /**
-   * @brief Used to generate code for kMirOpPackedArrayGet.
-   * @param bb The basic block of MIR.
-   * @param mir The mir whose opcode is kMirOpPackedArrayGet.
-   */
-  void GenPackedArrayGet(BasicBlock* bb, MIR* mir);
-
-  /**
-   * @brief Used to generate code for kMirOpPackedArrayPut.
-   * @param bb The basic block of MIR.
-   * @param mir The mir whose opcode is kMirOpPackedArrayPut.
-   */
-  void GenPackedArrayPut(BasicBlock* bb, MIR* mir);
-
-  /*
-   * @brief Generate code for a vector opcode.
-   * @param bb The basic block in which the MIR is from.
-   * @param mir The MIR whose opcode is a non-standard opcode.
-   */
-  void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir);
-
-  /*
-   * @brief Return the correct x86 opcode for the Dex operation
-   * @param op Dex opcode for the operation
-   * @param loc Register location of the operand
-   * @param is_high_op 'true' if this is an operation on the high word
-   * @param value Immediate value for the operation.  Used for byte variants
-   * @returns the correct x86 opcode to perform the operation
-   */
-  X86OpCode GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, int32_t value);
-
-  /*
-   * @brief Return the correct x86 opcode for the Dex operation
-   * @param op Dex opcode for the operation
-   * @param dest location of the destination.  May be register or memory.
-   * @param rhs Location for the rhs of the operation.  May be in register or memory.
-   * @param is_high_op 'true' if this is an operation on the high word
-   * @returns the correct x86 opcode to perform the operation
-   * @note at most one location may refer to memory
-   */
-  X86OpCode GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
-                      bool is_high_op);
-
-  /*
-   * @brief Is this operation a no-op for this opcode and value
-   * @param op Dex opcode for the operation
-   * @param value Immediate value for the operation.
-   * @returns 'true' if the operation will have no effect
-   */
-  bool IsNoOp(Instruction::Code op, int32_t value);
-
-  /**
-   * @brief Calculate magic number and shift for a given divisor
-   * @param divisor divisor number for calculation
-   * @param magic hold calculated magic number
-   * @param shift hold calculated shift
-   * @param is_long 'true' if divisor is jlong, 'false' for jint.
-   */
-  void CalculateMagicAndShift(int64_t divisor, int64_t& magic, int& shift, bool is_long);
-
-  /*
-   * @brief Generate an integer div or rem operation.
-   * @param rl_dest Destination Location.
-   * @param rl_src1 Numerator Location.
-   * @param rl_src2 Divisor Location.
-   * @param is_div 'true' if this is a division, 'false' for a remainder.
-   * @param flags The instruction optimization flags. It can include information
-   * if exception check can be elided.
-   */
-  RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                        bool is_div, int flags);
-
-  /*
-   * @brief Generate an integer div or rem operation by a literal.
-   * @param rl_dest Destination Location.
-   * @param rl_src Numerator Location.
-   * @param lit Divisor.
-   * @param is_div 'true' if this is a division, 'false' for a remainder.
-   */
-  RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, int lit, bool is_div);
-
-  /*
-   * Generate code to implement long shift operations.
-   * @param opcode The DEX opcode to specify the shift type.
-   * @param rl_dest The destination.
-   * @param rl_src The value to be shifted.
-   * @param shift_amount How much to shift.
-   * @param flags The instruction optimization flags.
-   * @returns the RegLocation of the result.
-   */
-  RegLocation GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                RegLocation rl_src, int shift_amount, int flags);
-  /*
-   * Generate an imul of a register by a constant or a better sequence.
-   * @param dest Destination Register.
-   * @param src Source Register.
-   * @param val Constant multiplier.
-   */
-  void GenImulRegImm(RegStorage dest, RegStorage src, int val);
-
-  /*
-   * Generate an imul of a memory location by a constant or a better sequence.
-   * @param dest Destination Register.
-   * @param sreg Symbolic register.
-   * @param displacement Displacement on stack of Symbolic Register.
-   * @param val Constant multiplier.
-   */
-  void GenImulMemImm(RegStorage dest, int sreg, int displacement, int val);
-
-  /*
-   * @brief Compare memory to immediate, and branch if condition true.
-   * @param cond The condition code that when true will branch to the target.
-   * @param temp_reg A temporary register that can be used if compare memory is not
-   * supported by the architecture.
-   * @param base_reg The register holding the base address.
-   * @param offset The offset from the base.
-   * @param check_value The immediate to compare to.
-   * @param target branch target (or nullptr)
-   * @param compare output for getting LIR for comparison (or nullptr)
-   */
-  LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                         int offset, int check_value, LIR* target, LIR** compare);
-
-  void GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double);
-
-  /*
-   * Can this operation be using core registers without temporaries?
-   * @param rl_lhs Left hand operand.
-   * @param rl_rhs Right hand operand.
-   * @returns 'true' if the operation can proceed without needing temporary regs.
-   */
-  bool IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs);
-
-  /**
-   * @brief Generates inline code for conversion of long to FP by using x87/
-   * @param rl_dest The destination of the FP.
-   * @param rl_src The source of the long.
-   * @param is_double 'true' if dealing with double, 'false' for float.
-   */
-  virtual void GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double);
-
-  void GenArrayBoundsCheck(RegStorage index, RegStorage array_base, int32_t len_offset);
-  void GenArrayBoundsCheck(int32_t index, RegStorage array_base, int32_t len_offset);
-
-  LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset);
-  LIR* OpRegMem(OpKind op, RegStorage r_dest, RegLocation value);
-  LIR* OpMemReg(OpKind op, RegLocation rl_dest, int value);
-  LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset);
-  LIR* OpThreadMem(OpKind op, ThreadOffset<8> thread_offset);
-  void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset);
-  void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset);
-  void OpTlsCmp(ThreadOffset<4> offset, int val);
-  void OpTlsCmp(ThreadOffset<8> offset, int val);
-
-  void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset);
-
-  // Try to do a long multiplication where rl_src2 is a constant. This simplified setup might fail,
-  // in which case false will be returned.
-  bool GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val, int flags);
-  void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                  RegLocation rl_src2, int flags);
-  void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
-  void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-  void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
-                     RegLocation rl_src2, bool is_div, int flags);
-
-  void SpillCoreRegs();
-  void UnSpillCoreRegs();
-  void UnSpillFPRegs();
-  void SpillFPRegs();
-
-  /*
-   * Mir2Lir's UpdateLoc() looks to see if the Dalvik value is currently live in any temp register
-   * without regard to data type.  In practice, this can result in UpdateLoc returning a
-   * location record for a Dalvik float value in a core register, and vis-versa.  For targets
-   * which can inexpensively move data between core and float registers, this can often be a win.
-   * However, for x86 this is generally not a win.  These variants of UpdateLoc()
-   * take a register class argument - and will return an in-register location record only if
-   * the value is live in a temp register of the correct class.  Additionally, if the value is in
-   * a temp register of the wrong register class, it will be clobbered.
-   */
-  RegLocation UpdateLocTyped(RegLocation loc);
-  RegLocation UpdateLocWideTyped(RegLocation loc);
-
-  /*
-   * @brief Analyze one MIR float/double instruction
-   * @param opcode MIR instruction opcode.
-   * @param mir Instruction to analyze.
-   * @return true iff the instruction needs to load a literal using PC-relative addressing.
-   */
-  bool AnalyzeFPInstruction(int opcode, MIR* mir);
-
-  /*
-   * @brief Analyze one use of a double operand.
-   * @param rl_use Double RegLocation for the operand.
-   * @return true iff the instruction needs to load a literal using PC-relative addressing.
-   */
-  bool AnalyzeDoubleUse(RegLocation rl_use);
-
-  /*
-   * @brief Analyze one invoke-static MIR instruction
-   * @param mir Instruction to analyze.
-   * @return true iff the instruction needs to load a literal using PC-relative addressing.
-   */
-  bool AnalyzeInvokeStaticIntrinsic(MIR* mir);
-
-  // Information derived from analysis of MIR
-
-  // The base register for PC-relative addressing if promoted (32-bit only).
-  RegStorage pc_rel_base_reg_;
-
-  // Have we actually used the pc_rel_base_reg_?
-  bool pc_rel_base_reg_used_;
-
-  // Pointer to the "call +0" insn that sets up the promoted register for PC-relative addressing.
-  // The anchor "pop" insn is NEXT_LIR(setup_pc_rel_base_reg_). The whole "call +0; pop <reg>"
-  // sequence will be removed in AssembleLIR() if we do not actually use PC-relative addressing.
-  LIR* setup_pc_rel_base_reg_;  // There are 2 chained insns (no reordering allowed).
-
-  // Instructions needing patching with Method* values.
-  ArenaVector<LIR*> method_address_insns_;
-
-  // Instructions needing patching with Class Type* values.
-  ArenaVector<LIR*> class_type_address_insns_;
-
-  // Instructions needing patching with PC relative code addresses.
-  ArenaVector<LIR*> call_method_insns_;
-
-  // Instructions needing patching with PC relative code addresses.
-  ArenaVector<LIR*> dex_cache_access_insns_;
-
-  // The list of const vector literals.
-  LIR* const_vectors_;
-
-  /*
-   * @brief Search for a matching vector literal
-   * @param constants An array of size 4 which contains all of 32-bit constants.
-   * @returns pointer to matching LIR constant, or nullptr if not found.
-   */
-  LIR* ScanVectorLiteral(int32_t* constants);
-
-  /*
-   * @brief Add a constant vector literal
-   * @param constants An array of size 4 which contains all of 32-bit constants.
-   */
-  LIR* AddVectorLiteral(int32_t* constants);
-
-  bool WideGPRsAreAliases() const OVERRIDE {
-    return cu_->target64;  // On 64b, we have 64b GPRs.
-  }
-
-  bool WideFPRsAreAliases() const OVERRIDE {
-    return true;  // xmm registers have 64b views even on x86.
-  }
-
-  /*
-   * @brief Dump a RegLocation using printf
-   * @param loc Register location to dump
-   */
-  static void DumpRegLocation(RegLocation loc);
-
- private:
-  void SwapBits(RegStorage result_reg, int shift, int32_t value);
-  void SwapBits64(RegStorage result_reg, int shift, int64_t value);
-
-  static int X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
-                               int state, const MethodReference& target_method,
-                               uint32_t,
-                               uintptr_t direct_code, uintptr_t direct_method,
-                               InvokeType type);
-
-  LIR* OpLoadPc(RegStorage r_dest);
-  RegStorage GetPcAndAnchor(LIR** anchor, RegStorage r_tmp = RegStorage::InvalidReg());
-
-  // When we don't know the proper offset for the value, pick one that will force
-  // 4 byte offset.  We will fix this up in the assembler or linker later to have
-  // the right value.
-  static constexpr int kDummy32BitOffset = 256;
-
-  static const X86EncodingMap EncodingMap[kX86Last];
-
-  friend std::ostream& operator<<(std::ostream& os, const X86OpCode& rhs);
-  friend class QuickAssembleX86Test;
-  friend class QuickAssembleX86MacroTest;
-  friend class QuickAssembleX86LowLevelTest;
-
-  DISALLOW_COPY_AND_ASSIGN(X86Mir2Lir);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_X86_CODEGEN_X86_H_
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
deleted file mode 100755
index b11d41c..0000000
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ /dev/null
@@ -1,813 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "codegen_x86.h"
-
-#include "base/logging.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "dex/reg_storage_eq.h"
-#include "x86_lir.h"
-
-namespace art {
-
-void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode,
-                                 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
-  X86OpCode op = kX86Nop;
-  RegLocation rl_result;
-
-  /*
-   * Don't attempt to optimize register usage since these opcodes call out to
-   * the handlers.
-   */
-  switch (opcode) {
-    case Instruction::ADD_FLOAT_2ADDR:
-    case Instruction::ADD_FLOAT:
-      op = kX86AddssRR;
-      break;
-    case Instruction::SUB_FLOAT_2ADDR:
-    case Instruction::SUB_FLOAT:
-      op = kX86SubssRR;
-      break;
-    case Instruction::DIV_FLOAT_2ADDR:
-    case Instruction::DIV_FLOAT:
-      op = kX86DivssRR;
-      break;
-    case Instruction::MUL_FLOAT_2ADDR:
-    case Instruction::MUL_FLOAT:
-      op = kX86MulssRR;
-      break;
-    case Instruction::REM_FLOAT_2ADDR:
-    case Instruction::REM_FLOAT:
-      GenRemFP(rl_dest, rl_src1, rl_src2, false /* is_double */);
-      return;
-    case Instruction::NEG_FLOAT:
-      GenNegFloat(rl_dest, rl_src1);
-      return;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << opcode;
-  }
-  rl_src1 = LoadValue(rl_src1, kFPReg);
-  rl_src2 = LoadValue(rl_src2, kFPReg);
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
-  RegStorage r_dest = rl_result.reg;
-  RegStorage r_src1 = rl_src1.reg;
-  RegStorage r_src2 = rl_src2.reg;
-  if (r_dest == r_src2) {
-    r_src2 = AllocTempSingle();
-    OpRegCopy(r_src2, r_dest);
-  }
-  OpRegCopy(r_dest, r_src1);
-  NewLIR2(op, r_dest.GetReg(), r_src2.GetReg());
-  StoreValue(rl_dest, rl_result);
-}
-
-void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
-                                  RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
-  DCHECK(rl_dest.wide);
-  DCHECK(rl_dest.fp);
-  DCHECK(rl_src1.wide);
-  DCHECK(rl_src1.fp);
-  DCHECK(rl_src2.wide);
-  DCHECK(rl_src2.fp);
-  X86OpCode op = kX86Nop;
-  RegLocation rl_result;
-
-  switch (opcode) {
-    case Instruction::ADD_DOUBLE_2ADDR:
-    case Instruction::ADD_DOUBLE:
-      op = kX86AddsdRR;
-      break;
-    case Instruction::SUB_DOUBLE_2ADDR:
-    case Instruction::SUB_DOUBLE:
-      op = kX86SubsdRR;
-      break;
-    case Instruction::DIV_DOUBLE_2ADDR:
-    case Instruction::DIV_DOUBLE:
-      op = kX86DivsdRR;
-      break;
-    case Instruction::MUL_DOUBLE_2ADDR:
-    case Instruction::MUL_DOUBLE:
-      op = kX86MulsdRR;
-      break;
-    case Instruction::REM_DOUBLE_2ADDR:
-    case Instruction::REM_DOUBLE:
-      GenRemFP(rl_dest, rl_src1, rl_src2, true /* is_double */);
-      return;
-    case Instruction::NEG_DOUBLE:
-      GenNegDouble(rl_dest, rl_src1);
-      return;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << opcode;
-  }
-  rl_src1 = LoadValueWide(rl_src1, kFPReg);
-  rl_src2 = LoadValueWide(rl_src2, kFPReg);
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
-  if (rl_result.reg == rl_src2.reg) {
-    rl_src2.reg = AllocTempDouble();
-    OpRegCopy(rl_src2.reg, rl_result.reg);
-  }
-  OpRegCopy(rl_result.reg, rl_src1.reg);
-  NewLIR2(op, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void X86Mir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest ATTRIBUTE_UNUSED,
-                                            RegLocation rl_src1 ATTRIBUTE_UNUSED,
-                                            int32_t constant ATTRIBUTE_UNUSED) {
-  // TODO: need x86 implementation.
-  LOG(FATAL) << "Unimplemented GenMultiplyByConstantFloat in x86";
-}
-
-void X86Mir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest ATTRIBUTE_UNUSED,
-                                             RegLocation rl_src1 ATTRIBUTE_UNUSED,
-                                             int64_t constant ATTRIBUTE_UNUSED) {
-  // TODO: need x86 implementation.
-  LOG(FATAL) << "Unimplemented GenMultiplyByConstantDouble in x86";
-}
-
-void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double) {
-  // Compute offsets to the source and destination VRs on stack
-  int src_v_reg_offset = SRegOffset(rl_src.s_reg_low);
-  int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
-
-  // Update the in-register state of source.
-  rl_src = UpdateLocWide(rl_src);
-
-  // All memory accesses below reference dalvik regs.
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-
-  // If the source is in physical register, then put it in its location on stack.
-  if (rl_src.location == kLocPhysReg) {
-    RegisterInfo* reg_info = GetRegInfo(rl_src.reg);
-
-    if (reg_info != nullptr && reg_info->IsTemp()) {
-      // Calling FlushSpecificReg because it will only write back VR if it is dirty.
-      FlushSpecificReg(reg_info);
-      // ResetDef to prevent NullifyRange from removing stores.
-      ResetDef(rl_src.reg);
-    } else {
-      // It must have been register promoted if it is not a temp but is still in physical
-      // register. Since we need it to be in memory to convert, we place it there now.
-      const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-      StoreBaseDisp(rs_rSP, src_v_reg_offset, rl_src.reg, k64, kNotVolatile);
-    }
-  }
-
-  // Push the source virtual register onto the x87 stack.
-  LIR *fild64 = NewLIR2NoDest(kX86Fild64M, rs_rX86_SP_32.GetReg(),
-                              src_v_reg_offset + LOWORD_OFFSET);
-  AnnotateDalvikRegAccess(fild64, (src_v_reg_offset + LOWORD_OFFSET) >> 2,
-                          true /* is_load */, true /* is64bit */);
-
-  // Now pop off x87 stack and store it in the destination VR's stack location.
-  int opcode = is_double ? kX86Fstp64M : kX86Fstp32M;
-  int displacement = is_double ? dest_v_reg_offset + LOWORD_OFFSET : dest_v_reg_offset;
-  LIR *fstp = NewLIR2NoDest(opcode, rs_rX86_SP_32.GetReg(), displacement);
-  AnnotateDalvikRegAccess(fstp, displacement >> 2, false /* is_load */, is_double);
-
-  /*
-   * The result is in a physical register if it was in a temp or was register
-   * promoted. For that reason it is enough to check if it is in physical
-   * register. If it is, then we must do all of the bookkeeping necessary to
-   * invalidate temp (if needed) and load in promoted register (if needed).
-   * If the result's location is in memory, then we do not need to do anything
-   * more since the fstp has already placed the correct value in memory.
-   */
-  RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest) : UpdateLocTyped(rl_dest);
-  if (rl_result.location == kLocPhysReg) {
-    /*
-     * We already know that the result is in a physical register but do not know if it is the
-     * right class. So we call EvalLoc(Wide) first which will ensure that it will get moved to the
-     * correct register class.
-     */
-    rl_result = EvalLoc(rl_dest, kFPReg, true);
-    const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-    if (is_double) {
-      LoadBaseDisp(rs_rSP, dest_v_reg_offset, rl_result.reg, k64, kNotVolatile);
-
-      StoreFinalValueWide(rl_dest, rl_result);
-    } else {
-      Load32Disp(rs_rSP, dest_v_reg_offset, rl_result.reg);
-
-      StoreFinalValue(rl_dest, rl_result);
-    }
-  }
-}
-
-void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
-                               RegLocation rl_src) {
-  RegisterClass rcSrc = kFPReg;
-  X86OpCode op = kX86Nop;
-  RegLocation rl_result;
-  switch (opcode) {
-    case Instruction::INT_TO_FLOAT:
-      rcSrc = kCoreReg;
-      op = kX86Cvtsi2ssRR;
-      break;
-    case Instruction::DOUBLE_TO_FLOAT:
-      rcSrc = kFPReg;
-      op = kX86Cvtsd2ssRR;
-      break;
-    case Instruction::FLOAT_TO_DOUBLE:
-      rcSrc = kFPReg;
-      op = kX86Cvtss2sdRR;
-      break;
-    case Instruction::INT_TO_DOUBLE:
-      rcSrc = kCoreReg;
-      op = kX86Cvtsi2sdRR;
-      break;
-    case Instruction::FLOAT_TO_INT: {
-      rl_src = LoadValue(rl_src, kFPReg);
-      // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
-      ClobberSReg(rl_dest.s_reg_low);
-      rl_result = EvalLoc(rl_dest, kCoreReg, true);
-      RegStorage temp_reg = AllocTempSingle();
-
-      LoadConstant(rl_result.reg, 0x7fffffff);
-      NewLIR2(kX86Cvtsi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
-      NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
-      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
-      LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
-      NewLIR2(kX86Cvttss2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-      LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
-      branch_na_n->target = NewLIR0(kPseudoTargetLabel);
-      NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
-      branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
-      branch_normal->target = NewLIR0(kPseudoTargetLabel);
-      StoreValue(rl_dest, rl_result);
-      return;
-    }
-    case Instruction::DOUBLE_TO_INT: {
-      rl_src = LoadValueWide(rl_src, kFPReg);
-      // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
-      ClobberSReg(rl_dest.s_reg_low);
-      rl_result = EvalLoc(rl_dest, kCoreReg, true);
-      RegStorage temp_reg = AllocTempDouble();
-
-      LoadConstant(rl_result.reg, 0x7fffffff);
-      NewLIR2(kX86Cvtsi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
-      NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
-      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
-      LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
-      NewLIR2(kX86Cvttsd2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-      LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
-      branch_na_n->target = NewLIR0(kPseudoTargetLabel);
-      NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
-      branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
-      branch_normal->target = NewLIR0(kPseudoTargetLabel);
-      StoreValue(rl_dest, rl_result);
-      return;
-    }
-    case Instruction::LONG_TO_DOUBLE:
-      if (cu_->target64) {
-        rcSrc = kCoreReg;
-        op = kX86Cvtsqi2sdRR;
-        break;
-      }
-      GenLongToFP(rl_dest, rl_src, true /* is_double */);
-      return;
-    case Instruction::LONG_TO_FLOAT:
-      if (cu_->target64) {
-        rcSrc = kCoreReg;
-        op = kX86Cvtsqi2ssRR;
-       break;
-      }
-      GenLongToFP(rl_dest, rl_src, false /* is_double */);
-      return;
-    case Instruction::FLOAT_TO_LONG:
-      if (cu_->target64) {
-        rl_src = LoadValue(rl_src, kFPReg);
-        // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
-        ClobberSReg(rl_dest.s_reg_low);
-        rl_result = EvalLoc(rl_dest, kCoreReg, true);
-        RegStorage temp_reg = AllocTempSingle();
-
-        // Set 0x7fffffffffffffff to rl_result
-        LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
-        NewLIR2(kX86Cvtsqi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
-        NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
-        LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
-        LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
-        NewLIR2(kX86Cvttss2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-        LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
-        branch_na_n->target = NewLIR0(kPseudoTargetLabel);
-        NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
-        branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
-        branch_normal->target = NewLIR0(kPseudoTargetLabel);
-        StoreValueWide(rl_dest, rl_result);
-      } else {
-        CheckEntrypointTypes<kQuickF2l, int64_t, float>();  // int64_t -> kCoreReg
-        GenConversionCall(kQuickF2l, rl_dest, rl_src, kCoreReg);
-      }
-      return;
-    case Instruction::DOUBLE_TO_LONG:
-      if (cu_->target64) {
-        rl_src = LoadValueWide(rl_src, kFPReg);
-        // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
-        ClobberSReg(rl_dest.s_reg_low);
-        rl_result = EvalLoc(rl_dest, kCoreReg, true);
-        RegStorage temp_reg = AllocTempDouble();
-
-        // Set 0x7fffffffffffffff to rl_result
-        LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
-        NewLIR2(kX86Cvtsqi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
-        NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
-        LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
-        LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
-        NewLIR2(kX86Cvttsd2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-        LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
-        branch_na_n->target = NewLIR0(kPseudoTargetLabel);
-        NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
-        branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
-        branch_normal->target = NewLIR0(kPseudoTargetLabel);
-        StoreValueWide(rl_dest, rl_result);
-      } else {
-        CheckEntrypointTypes<kQuickD2l, int64_t, double>();  // int64_t -> kCoreReg
-        GenConversionCall(kQuickD2l, rl_dest, rl_src, kCoreReg);
-      }
-      return;
-    default:
-      LOG(INFO) << "Unexpected opcode: " << opcode;
-  }
-  // At this point, target will be either float or double.
-  DCHECK(rl_dest.fp);
-  if (rl_src.wide) {
-    rl_src = LoadValueWide(rl_src, rcSrc);
-  } else {
-    rl_src = LoadValue(rl_src, rcSrc);
-  }
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  if (rl_dest.wide) {
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    StoreValue(rl_dest, rl_result);
-  }
-}
-
-void X86Mir2Lir::GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double) {
-  // Compute offsets to the source and destination VRs on stack.
-  int src1_v_reg_offset = SRegOffset(rl_src1.s_reg_low);
-  int src2_v_reg_offset = SRegOffset(rl_src2.s_reg_low);
-  int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
-
-  // Update the in-register state of sources.
-  rl_src1 = is_double ? UpdateLocWide(rl_src1) : UpdateLoc(rl_src1);
-  rl_src2 = is_double ? UpdateLocWide(rl_src2) : UpdateLoc(rl_src2);
-
-  // All memory accesses below reference dalvik regs.
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-
-  // If the source is in physical register, then put it in its location on stack.
-  const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  if (rl_src1.location == kLocPhysReg) {
-    RegisterInfo* reg_info = GetRegInfo(rl_src1.reg);
-
-    if (reg_info != nullptr && reg_info->IsTemp()) {
-      // Calling FlushSpecificReg because it will only write back VR if it is dirty.
-      FlushSpecificReg(reg_info);
-      // ResetDef to prevent NullifyRange from removing stores.
-      ResetDef(rl_src1.reg);
-    } else {
-      // It must have been register promoted if it is not a temp but is still in physical
-      // register. Since we need it to be in memory to convert, we place it there now.
-      StoreBaseDisp(rs_rSP, src1_v_reg_offset, rl_src1.reg, is_double ? k64 : k32,
-                    kNotVolatile);
-    }
-  }
-
-  if (rl_src2.location == kLocPhysReg) {
-    RegisterInfo* reg_info = GetRegInfo(rl_src2.reg);
-    if (reg_info != nullptr && reg_info->IsTemp()) {
-      FlushSpecificReg(reg_info);
-      ResetDef(rl_src2.reg);
-    } else {
-      StoreBaseDisp(rs_rSP, src2_v_reg_offset, rl_src2.reg, is_double ? k64 : k32,
-                    kNotVolatile);
-    }
-  }
-
-  int fld_opcode = is_double ? kX86Fld64M : kX86Fld32M;
-
-  // Push the source virtual registers onto the x87 stack.
-  LIR *fld_2 = NewLIR2NoDest(fld_opcode, rs_rSP.GetReg(),
-                             src2_v_reg_offset + LOWORD_OFFSET);
-  AnnotateDalvikRegAccess(fld_2, (src2_v_reg_offset + LOWORD_OFFSET) >> 2,
-                          true /* is_load */, is_double /* is64bit */);
-
-  LIR *fld_1 = NewLIR2NoDest(fld_opcode, rs_rSP.GetReg(),
-                             src1_v_reg_offset + LOWORD_OFFSET);
-  AnnotateDalvikRegAccess(fld_1, (src1_v_reg_offset + LOWORD_OFFSET) >> 2,
-                          true /* is_load */, is_double /* is64bit */);
-
-  FlushReg(rs_rAX);
-  Clobber(rs_rAX);
-  LockTemp(rs_rAX);
-
-  LIR* retry = NewLIR0(kPseudoTargetLabel);
-
-  // Divide ST(0) by ST(1) and place result to ST(0).
-  NewLIR0(kX86Fprem);
-
-  // Move FPU status word to AX.
-  NewLIR0(kX86Fstsw16R);
-
-  // Check if reduction is complete.
-  OpRegImm(kOpAnd, rs_rAX, 0x400);
-
-  // If no then continue to compute remainder.
-  LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
-  branch->target = retry;
-
-  FreeTemp(rs_rAX);
-
-  // Now store result in the destination VR's stack location.
-  int displacement = dest_v_reg_offset + LOWORD_OFFSET;
-  int opcode = is_double ? kX86Fst64M : kX86Fst32M;
-  LIR *fst = NewLIR2NoDest(opcode, rs_rSP.GetReg(), displacement);
-  AnnotateDalvikRegAccess(fst, displacement >> 2, false /* is_load */, is_double /* is64bit */);
-
-  // Pop ST(1) and ST(0).
-  NewLIR0(kX86Fucompp);
-
-  /*
-   * The result is in a physical register if it was in a temp or was register
-   * promoted. For that reason it is enough to check if it is in physical
-   * register. If it is, then we must do all of the bookkeeping necessary to
-   * invalidate temp (if needed) and load in promoted register (if needed).
-   * If the result's location is in memory, then we do not need to do anything
-   * more since the fstp has already placed the correct value in memory.
-   */
-  RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest) : UpdateLocTyped(rl_dest);
-  if (rl_result.location == kLocPhysReg) {
-    rl_result = EvalLoc(rl_dest, kFPReg, true);
-    if (is_double) {
-      LoadBaseDisp(rs_rSP, dest_v_reg_offset, rl_result.reg, k64, kNotVolatile);
-      StoreFinalValueWide(rl_dest, rl_result);
-    } else {
-      Load32Disp(rs_rSP, dest_v_reg_offset, rl_result.reg);
-      StoreFinalValue(rl_dest, rl_result);
-    }
-  }
-}
-
-void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest,
-                          RegLocation rl_src1, RegLocation rl_src2) {
-  bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT);
-  bool unordered_gt = (code == Instruction::CMPG_DOUBLE) || (code == Instruction::CMPG_FLOAT);
-  if (single) {
-    rl_src1 = LoadValue(rl_src1, kFPReg);
-    rl_src2 = LoadValue(rl_src2, kFPReg);
-  } else {
-    rl_src1 = LoadValueWide(rl_src1, kFPReg);
-    rl_src2 = LoadValueWide(rl_src2, kFPReg);
-  }
-  // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
-  ClobberSReg(rl_dest.s_reg_low);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  LoadConstantNoClobber(rl_result.reg, unordered_gt ? 1 : 0);
-  if (single) {
-    NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  } else {
-    NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  }
-  LIR* branch = nullptr;
-  if (unordered_gt) {
-    branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
-  }
-  // If the result reg can't be byte accessed, use a jump and move instead of a set.
-  if (!IsByteRegister(rl_result.reg)) {
-    LIR* branch2 = nullptr;
-    if (unordered_gt) {
-      branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA);
-      NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x0);
-    } else {
-      branch2 = NewLIR2(kX86Jcc8, 0, kX86CondBe);
-      NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x1);
-    }
-    branch2->target = NewLIR0(kPseudoTargetLabel);
-  } else {
-    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondA /* above - unsigned > */);
-  }
-  NewLIR2(kX86Sbb32RI, rl_result.reg.GetReg(), 0);
-  if (unordered_gt) {
-    branch->target = NewLIR0(kPseudoTargetLabel);
-  }
-  StoreValue(rl_dest, rl_result);
-}
-
-void X86Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
-                                     bool is_double) {
-  LIR* taken = &block_label_list_[bb->taken];
-  LIR* not_taken = &block_label_list_[bb->fall_through];
-  LIR* branch = nullptr;
-  RegLocation rl_src1;
-  RegLocation rl_src2;
-  if (is_double) {
-    rl_src1 = mir_graph_->GetSrcWide(mir, 0);
-    rl_src2 = mir_graph_->GetSrcWide(mir, 2);
-    rl_src1 = LoadValueWide(rl_src1, kFPReg);
-    rl_src2 = LoadValueWide(rl_src2, kFPReg);
-    NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  } else {
-    rl_src1 = mir_graph_->GetSrc(mir, 0);
-    rl_src2 = mir_graph_->GetSrc(mir, 1);
-    rl_src1 = LoadValue(rl_src1, kFPReg);
-    rl_src2 = LoadValue(rl_src2, kFPReg);
-    NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
-  }
-  ConditionCode ccode = mir->meta.ccode;
-  switch (ccode) {
-    case kCondEq:
-      if (!gt_bias) {
-        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
-        branch->target = not_taken;
-      }
-      break;
-    case kCondNe:
-      if (!gt_bias) {
-        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
-        branch->target = taken;
-      }
-      break;
-    case kCondLt:
-      if (gt_bias) {
-        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
-        branch->target = not_taken;
-      }
-      ccode = kCondUlt;
-      break;
-    case kCondLe:
-      if (gt_bias) {
-        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
-        branch->target = not_taken;
-      }
-      ccode = kCondLs;
-      break;
-    case kCondGt:
-      if (gt_bias) {
-        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
-        branch->target = taken;
-      }
-      ccode = kCondHi;
-      break;
-    case kCondGe:
-      if (gt_bias) {
-        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
-        branch->target = taken;
-      }
-      ccode = kCondUge;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected ccode: " << ccode;
-  }
-  OpCondBranch(ccode, taken);
-}
-
-void X86Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation rl_result;
-  rl_src = LoadValue(rl_src, kCoreReg);
-  rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  OpRegRegImm(kOpAdd, rl_result.reg, rl_src.reg, 0x80000000);
-  StoreValue(rl_dest, rl_result);
-}
-
-void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation rl_result;
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  if (cu_->target64) {
-    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-    OpRegCopy(rl_result.reg, rl_src.reg);
-    // Flip sign bit.
-    NewLIR2(kX86Rol64RI, rl_result.reg.GetReg(), 1);
-    NewLIR2(kX86Xor64RI, rl_result.reg.GetReg(), 1);
-    NewLIR2(kX86Ror64RI, rl_result.reg.GetReg(), 1);
-  } else {
-    rl_result = ForceTempWide(rl_src);
-    OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), 0x80000000);
-  }
-  StoreValueWide(rl_dest, rl_result);
-}
-
-bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) {
-  RegLocation rl_dest = InlineTargetWide(info);  // double place for result
-  if (rl_dest.s_reg_low == INVALID_SREG) {
-    // Result is unused, the code is dead. Inlining successful, no code generated.
-    return true;
-  }
-  RegLocation rl_src = info->args[0];
-  rl_src = LoadValueWide(rl_src, kFPReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(kX86SqrtsdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  StoreValueWide(rl_dest, rl_result);
-  return true;
-}
-
-bool X86Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
-  // Get the argument
-  RegLocation rl_src = info->args[0];
-
-  // Get the inlined intrinsic target virtual register
-  RegLocation rl_dest = InlineTarget(info);
-
-  // Get the virtual register number
-  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
-  if (rl_dest.s_reg_low == INVALID_SREG) {
-    // Result is unused, the code is dead. Inlining successful, no code generated.
-    return true;
-  }
-  int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
-  int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
-
-  // if argument is the same as inlined intrinsic target
-  if (v_src_reg == v_dst_reg) {
-    rl_src = UpdateLoc(rl_src);
-
-    // if argument is in the physical register
-    if (rl_src.location == kLocPhysReg) {
-      rl_src = LoadValue(rl_src, kCoreReg);
-      OpRegImm(kOpAnd, rl_src.reg, 0x7fffffff);
-      StoreValue(rl_dest, rl_src);
-      return true;
-    }
-    // the argument is in memory
-    DCHECK((rl_src.location == kLocDalvikFrame) ||
-         (rl_src.location == kLocCompilerTemp));
-
-    // Operate directly into memory.
-    int displacement = SRegOffset(rl_dest.s_reg_low);
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    LIR *lir = NewLIR3(kX86And32MI, rs_rX86_SP_32.GetReg(), displacement, 0x7fffffff);
-    AnnotateDalvikRegAccess(lir, displacement >> 2, false /*is_load */, false /* is_64bit */);
-    AnnotateDalvikRegAccess(lir, displacement >> 2, true /* is_load */, false /* is_64bit*/);
-    return true;
-  } else {
-    rl_src = LoadValue(rl_src, kCoreReg);
-    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
-    StoreValue(rl_dest, rl_result);
-    return true;
-  }
-}
-
-bool X86Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
-  RegLocation rl_src = info->args[0];
-  RegLocation rl_dest = InlineTargetWide(info);
-  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
-  if (rl_dest.s_reg_low == INVALID_SREG) {
-    // Result is unused, the code is dead. Inlining successful, no code generated.
-    return true;
-  }
-  if (cu_->target64) {
-    rl_src = LoadValueWide(rl_src, kCoreReg);
-    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    OpRegCopyWide(rl_result.reg, rl_src.reg);
-    OpRegImm(kOpLsl, rl_result.reg, 1);
-    OpRegImm(kOpLsr, rl_result.reg, 1);
-    StoreValueWide(rl_dest, rl_result);
-    return true;
-  }
-  int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
-  int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
-  rl_src = UpdateLocWide(rl_src);
-
-  // if argument is in the physical XMM register
-  if (rl_src.location == kLocPhysReg && rl_src.reg.IsFloat()) {
-    RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
-    if (rl_result.reg != rl_src.reg) {
-      LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
-      NewLIR2(kX86PandRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-    } else {
-      RegStorage sign_mask = AllocTempDouble();
-      LoadConstantWide(sign_mask, 0x7fffffffffffffff);
-      NewLIR2(kX86PandRR, rl_result.reg.GetReg(), sign_mask.GetReg());
-      FreeTemp(sign_mask);
-    }
-    StoreValueWide(rl_dest, rl_result);
-    return true;
-  } else if (v_src_reg == v_dst_reg) {
-    // if argument is the same as inlined intrinsic target
-    // if argument is in the physical register
-    if (rl_src.location == kLocPhysReg) {
-      rl_src = LoadValueWide(rl_src, kCoreReg);
-      OpRegImm(kOpAnd, rl_src.reg.GetHigh(), 0x7fffffff);
-      StoreValueWide(rl_dest, rl_src);
-      return true;
-    }
-    // the argument is in memory
-    DCHECK((rl_src.location == kLocDalvikFrame) ||
-           (rl_src.location == kLocCompilerTemp));
-
-    // Operate directly into memory.
-    int displacement = SRegOffset(rl_dest.s_reg_low);
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    LIR *lir = NewLIR3(kX86And32MI, rs_rX86_SP_32.GetReg(), displacement  + HIWORD_OFFSET, 0x7fffffff);
-    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit*/);
-    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, false /*is_load */, true /* is_64bit */);
-    return true;
-  } else {
-    rl_src = LoadValueWide(rl_src, kCoreReg);
-    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    OpRegCopyWide(rl_result.reg, rl_src.reg);
-    OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
-    StoreValueWide(rl_dest, rl_result);
-    return true;
-  }
-}
-
-bool X86Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) {
-  if (is_double) {
-    RegLocation rl_dest = InlineTargetWide(info);
-    if (rl_dest.s_reg_low == INVALID_SREG) {
-      // Result is unused, the code is dead. Inlining successful, no code generated.
-      return true;
-    }
-    RegLocation rl_src1 = LoadValueWide(info->args[0], kFPReg);
-    RegLocation rl_src2 = LoadValueWide(info->args[2], kFPReg);
-    RegLocation rl_result = EvalLocWide(rl_dest, kFPReg, true);
-
-    // Avoid src2 corruption by OpRegCopyWide.
-    if (rl_result.reg == rl_src2.reg) {
-        std::swap(rl_src2.reg, rl_src1.reg);
-    }
-
-    OpRegCopyWide(rl_result.reg, rl_src1.reg);
-    NewLIR2(kX86UcomisdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
-    // If either arg is NaN, return NaN.
-    LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP);
-    // Min/Max branches.
-    LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB);
-    LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA);
-    // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0.
-    NewLIR2((is_min) ? kX86OrpdRR : kX86AndpdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
-    LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0);
-    // Handle NaN.
-    branch_nan->target = NewLIR0(kPseudoTargetLabel);
-    LoadConstantWide(rl_result.reg, INT64_C(0x7ff8000000000000));
-
-    LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
-    // Handle Min/Max. Copy greater/lesser value from src2.
-    branch_cond1->target = NewLIR0(kPseudoTargetLabel);
-    OpRegCopyWide(rl_result.reg, rl_src2.reg);
-    // Right operand is already in result reg.
-    branch_cond2->target = NewLIR0(kPseudoTargetLabel);
-    // Exit.
-    branch_exit_nan->target = NewLIR0(kPseudoTargetLabel);
-    branch_exit_equal->target = NewLIR0(kPseudoTargetLabel);
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    RegLocation rl_dest = InlineTarget(info);
-    if (rl_dest.s_reg_low == INVALID_SREG) {
-      // Result is unused, the code is dead. Inlining successful, no code generated.
-      return true;
-    }
-    RegLocation rl_src1 = LoadValue(info->args[0], kFPReg);
-    RegLocation rl_src2 = LoadValue(info->args[1], kFPReg);
-    RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
-
-    // Avoid src2 corruption by OpRegCopyWide.
-    if (rl_result.reg == rl_src2.reg) {
-        std::swap(rl_src2.reg, rl_src1.reg);
-    }
-
-    OpRegCopy(rl_result.reg, rl_src1.reg);
-    NewLIR2(kX86UcomissRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
-    // If either arg is NaN, return NaN.
-    LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP);
-    // Min/Max branches.
-    LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB);
-    LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA);
-    // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0.
-    NewLIR2((is_min) ? kX86OrpsRR : kX86AndpsRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
-    LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0);
-    // Handle NaN.
-    branch_nan->target = NewLIR0(kPseudoTargetLabel);
-    LoadConstantNoClobber(rl_result.reg, 0x7fc00000);
-    LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
-    // Handle Min/Max. Copy greater/lesser value from src2.
-    branch_cond1->target = NewLIR0(kPseudoTargetLabel);
-    OpRegCopy(rl_result.reg, rl_src2.reg);
-    // Right operand is already in result reg.
-    branch_cond2->target = NewLIR0(kPseudoTargetLabel);
-    // Exit.
-    branch_exit_nan->target = NewLIR0(kPseudoTargetLabel);
-    branch_exit_equal->target = NewLIR0(kPseudoTargetLabel);
-    StoreValue(rl_dest, rl_result);
-  }
-  return true;
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
deleted file mode 100755
index a8706c3..0000000
--- a/compiler/dex/quick/x86/int_x86.cc
+++ /dev/null
@@ -1,3467 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This file contains codegen for the X86 ISA */
-
-#include "codegen_x86.h"
-
-#include "art_method.h"
-#include "base/bit_utils.h"
-#include "base/logging.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "dex/reg_storage_eq.h"
-#include "mirror/array-inl.h"
-#include "x86_lir.h"
-
-namespace art {
-
-/*
- * Compare two 64-bit values
- *    x = y     return  0
- *    x < y     return -1
- *    x > y     return  1
- */
-void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
-  if (cu_->target64) {
-    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    RegStorage temp_reg = AllocTemp();
-    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG);   // result = (src1 > src2) ? 1 : 0
-    NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL);  // temp = (src1 >= src2) ? 0 : 1
-    NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg());
-    NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
-
-    StoreValue(rl_dest, rl_result);
-    FreeTemp(temp_reg);
-    return;
-  }
-
-  // Prepare for explicit register usage
-  ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3);
-  RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
-  RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
-  LoadValueDirectWideFixed(rl_src1, r_tmp1);
-  LoadValueDirectWideFixed(rl_src2, r_tmp2);
-  // Compute (r1:r0) = (r1:r0) - (r3:r2)
-  OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
-  OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
-  NewLIR2(kX86Set8R, rs_r2.GetReg(), kX86CondL);  // r2 = (r1:r0) < (r3:r2) ? 1 : 0
-  NewLIR2(kX86Movzx8RR, rs_r2.GetReg(), rs_r2.GetReg());
-  OpReg(kOpNeg, rs_r2);         // r2 = -r2
-  OpRegReg(kOpOr, rs_r0, rs_r1);   // r0 = high | low - sets ZF
-  NewLIR2(kX86Set8R, rs_r0.GetReg(), kX86CondNz);  // r0 = (r1:r0) != (r3:r2) ? 1 : 0
-  NewLIR2(kX86Movzx8RR, r0, r0);
-  OpRegReg(kOpOr, rs_r0, rs_r2);   // r0 = r0 | r2
-  RegLocation rl_result = LocCReturn();
-  StoreValue(rl_dest, rl_result);
-}
-
-X86ConditionCode X86ConditionEncoding(ConditionCode cond) {
-  switch (cond) {
-    case kCondEq: return kX86CondEq;
-    case kCondNe: return kX86CondNe;
-    case kCondCs: return kX86CondC;
-    case kCondCc: return kX86CondNc;
-    case kCondUlt: return kX86CondC;
-    case kCondUge: return kX86CondNc;
-    case kCondMi: return kX86CondS;
-    case kCondPl: return kX86CondNs;
-    case kCondVs: return kX86CondO;
-    case kCondVc: return kX86CondNo;
-    case kCondHi: return kX86CondA;
-    case kCondLs: return kX86CondBe;
-    case kCondGe: return kX86CondGe;
-    case kCondLt: return kX86CondL;
-    case kCondGt: return kX86CondG;
-    case kCondLe: return kX86CondLe;
-    case kCondAl:
-    case kCondNv: LOG(FATAL) << "Should not reach here";
-  }
-  return kX86CondO;
-}
-
-LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
-  NewLIR2(src1.Is64Bit() ? kX86Cmp64RR : kX86Cmp32RR, src1.GetReg(), src2.GetReg());
-  X86ConditionCode cc = X86ConditionEncoding(cond);
-  LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ ,
-                        cc);
-  branch->target = target;
-  return branch;
-}
-
-LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg,
-                                int check_value, LIR* target) {
-  if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) {
-    // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode
-    NewLIR2(reg.Is64Bit() ? kX86Test64RR: kX86Test32RR, reg.GetReg(), reg.GetReg());
-  } else {
-    if (reg.Is64Bit()) {
-      NewLIR2(IS_SIMM8(check_value) ? kX86Cmp64RI8 : kX86Cmp64RI, reg.GetReg(), check_value);
-    } else {
-      NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value);
-    }
-  }
-  X86ConditionCode cc = X86ConditionEncoding(cond);
-  LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc);
-  branch->target = target;
-  return branch;
-}
-
-LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
-  // If src or dest is a pair, we'll be using low reg.
-  if (r_dest.IsPair()) {
-    r_dest = r_dest.GetLow();
-  }
-  if (r_src.IsPair()) {
-    r_src = r_src.GetLow();
-  }
-  if (r_dest.IsFloat() || r_src.IsFloat())
-    return OpFpRegCopy(r_dest, r_src);
-  LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR,
-                    r_dest.GetReg(), r_src.GetReg());
-  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
-    res->flags.is_nop = true;
-  }
-  return res;
-}
-
-void X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
-  if (r_dest != r_src) {
-    LIR *res = OpRegCopyNoInsert(r_dest, r_src);
-    AppendLIR(res);
-  }
-}
-
-void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
-  if (r_dest != r_src) {
-    bool dest_fp = r_dest.IsFloat();
-    bool src_fp = r_src.IsFloat();
-    if (dest_fp) {
-      if (src_fp) {
-        OpRegCopy(r_dest, r_src);
-      } else {
-        // TODO: Prevent this from happening in the code. The result is often
-        // unused or could have been loaded more easily from memory.
-        if (!r_src.IsPair()) {
-          DCHECK(!r_dest.IsPair());
-          NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg());
-        } else {
-          NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
-          RegStorage r_tmp = AllocTempDouble();
-          NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
-          NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
-          FreeTemp(r_tmp);
-        }
-      }
-    } else {
-      if (src_fp) {
-        if (!r_dest.IsPair()) {
-          DCHECK(!r_src.IsPair());
-          NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg());
-        } else {
-          NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
-          RegStorage temp_reg = AllocTempDouble();
-          NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
-          NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
-          NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
-        }
-      } else {
-        DCHECK_EQ(r_dest.IsPair(), r_src.IsPair());
-        if (!r_src.IsPair()) {
-          // Just copy the register directly.
-          OpRegCopy(r_dest, r_src);
-        } else {
-          // Handle overlap
-          if (r_src.GetHighReg() == r_dest.GetLowReg() &&
-              r_src.GetLowReg() == r_dest.GetHighReg()) {
-            // Deal with cycles.
-            RegStorage temp_reg = AllocTemp();
-            OpRegCopy(temp_reg, r_dest.GetHigh());
-            OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
-            OpRegCopy(r_dest.GetLow(), temp_reg);
-            FreeTemp(temp_reg);
-          } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
-            OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
-            OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-          } else {
-            OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-            OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
-          }
-        }
-      }
-    }
-  }
-}
-
-void X86Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
-                                  int32_t true_val, int32_t false_val, RegStorage rs_dest,
-                                  RegisterClass dest_reg_class) {
-  DCHECK(!left_op.IsPair() && !right_op.IsPair() && !rs_dest.IsPair());
-  DCHECK(!left_op.IsFloat() && !right_op.IsFloat() && !rs_dest.IsFloat());
-
-  // We really need this check for correctness, otherwise we will need to do more checks in
-  // non zero/one case
-  if (true_val == false_val) {
-    LoadConstantNoClobber(rs_dest, true_val);
-    return;
-  }
-
-  const bool dest_intersect = IsSameReg(rs_dest, left_op) || IsSameReg(rs_dest, right_op);
-
-  const bool zero_one_case = (true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0);
-  if (zero_one_case && IsByteRegister(rs_dest)) {
-    if (!dest_intersect) {
-      LoadConstantNoClobber(rs_dest, 0);
-    }
-    OpRegReg(kOpCmp, left_op, right_op);
-    // Set the low byte of the result to 0 or 1 from the compare condition code.
-    NewLIR2(kX86Set8R, rs_dest.GetReg(),
-            X86ConditionEncoding(true_val == 1 ? code : FlipComparisonOrder(code)));
-    if (dest_intersect) {
-      NewLIR2(rs_dest.Is64Bit() ? kX86Movzx8qRR : kX86Movzx8RR, rs_dest.GetReg(), rs_dest.GetReg());
-    }
-  } else {
-    // Be careful rs_dest can be changed only after cmp because it can be the same as one of ops
-    // and it cannot use xor because it makes cc flags to be dirty
-    RegStorage temp_reg = AllocTypedTemp(false, dest_reg_class, false);
-    if (temp_reg.Valid()) {
-      if (false_val == 0 && dest_intersect) {
-        code = FlipComparisonOrder(code);
-        std::swap(true_val, false_val);
-      }
-      if (!dest_intersect) {
-        LoadConstantNoClobber(rs_dest, false_val);
-      }
-      LoadConstantNoClobber(temp_reg, true_val);
-      OpRegReg(kOpCmp, left_op, right_op);
-      if (dest_intersect) {
-        LoadConstantNoClobber(rs_dest, false_val);
-        DCHECK(!last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
-      }
-      OpCondRegReg(kOpCmov, code, rs_dest, temp_reg);
-      FreeTemp(temp_reg);
-    } else {
-      // slow path
-      LIR* cmp_branch = OpCmpBranch(code, left_op, right_op, nullptr);
-      LoadConstantNoClobber(rs_dest, false_val);
-      LIR* that_is_it = NewLIR1(kX86Jmp8, 0);
-      LIR* true_case = NewLIR0(kPseudoTargetLabel);
-      cmp_branch->target = true_case;
-      LoadConstantNoClobber(rs_dest, true_val);
-      LIR* end = NewLIR0(kPseudoTargetLabel);
-      that_is_it->target = end;
-    }
-  }
-}
-
-void X86Mir2Lir::GenSelect(BasicBlock* bb ATTRIBUTE_UNUSED, MIR* mir) {
-  RegLocation rl_result;
-  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
-  RegLocation rl_dest = mir_graph_->GetDest(mir);
-  // Avoid using float regs here.
-  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
-  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
-  ConditionCode ccode = mir->meta.ccode;
-
-  // The kMirOpSelect has two variants, one for constants and one for moves.
-  const bool is_constant_case = (mir->ssa_rep->num_uses == 1);
-
-  if (is_constant_case) {
-    int true_val = mir->dalvikInsn.vB;
-    int false_val = mir->dalvikInsn.vC;
-
-    // simplest strange case
-    if (true_val == false_val) {
-      rl_result = EvalLoc(rl_dest, result_reg_class, true);
-      LoadConstantNoClobber(rl_result.reg, true_val);
-    } else {
-      // TODO: use GenSelectConst32 and handle additional opcode patterns such as
-      // "cmp; setcc; movzx" or "cmp; sbb r0,r0; and r0,$mask; add r0,$literal".
-      rl_src = LoadValue(rl_src, src_reg_class);
-      rl_result = EvalLoc(rl_dest, result_reg_class, true);
-      /*
-       * For ccode == kCondEq:
-       *
-       * 1) When the true case is zero and result_reg is not same as src_reg:
-       *     xor result_reg, result_reg
-       *     cmp $0, src_reg
-       *     mov t1, $false_case
-       *     cmovnz result_reg, t1
-       * 2) When the false case is zero and result_reg is not same as src_reg:
-       *     xor result_reg, result_reg
-       *     cmp $0, src_reg
-       *     mov t1, $true_case
-       *     cmovz result_reg, t1
-       * 3) All other cases (we do compare first to set eflags):
-       *     cmp $0, src_reg
-       *     mov result_reg, $false_case
-       *     mov t1, $true_case
-       *     cmovz result_reg, t1
-       */
-      // FIXME: depending on how you use registers you could get a false != mismatch when dealing
-      // with different views of the same underlying physical resource (i.e. solo32 vs. solo64).
-      const bool result_reg_same_as_src =
-          (rl_src.location == kLocPhysReg && rl_src.reg.GetRegNum() == rl_result.reg.GetRegNum());
-      const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
-      const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src);
-      const bool catch_all_case = !(true_zero_case || false_zero_case);
-
-      if (true_zero_case || false_zero_case) {
-        OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
-      }
-
-      if (true_zero_case || false_zero_case || catch_all_case) {
-        OpRegImm(kOpCmp, rl_src.reg, 0);
-      }
-
-      if (catch_all_case) {
-        OpRegImm(kOpMov, rl_result.reg, false_val);
-      }
-
-      if (true_zero_case || false_zero_case || catch_all_case) {
-        ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode;
-        int immediateForTemp = true_zero_case ? false_val : true_val;
-        RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class);
-        OpRegImm(kOpMov, temp1_reg, immediateForTemp);
-
-        OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg);
-
-        FreeTemp(temp1_reg);
-      }
-    }
-  } else {
-    rl_src = LoadValue(rl_src, src_reg_class);
-    RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
-    RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
-    rl_true = LoadValue(rl_true, result_reg_class);
-    rl_false = LoadValue(rl_false, result_reg_class);
-    rl_result = EvalLoc(rl_dest, result_reg_class, true);
-
-    /*
-     * For ccode == kCondEq:
-     *
-     * 1) When true case is already in place:
-     *     cmp $0, src_reg
-     *     cmovnz result_reg, false_reg
-     * 2) When false case is already in place:
-     *     cmp $0, src_reg
-     *     cmovz result_reg, true_reg
-     * 3) When neither cases are in place:
-     *     cmp $0, src_reg
-     *     mov result_reg, false_reg
-     *     cmovz result_reg, true_reg
-     */
-
-    // kMirOpSelect is generated just for conditional cases when comparison is done with zero.
-    OpRegImm(kOpCmp, rl_src.reg, 0);
-
-    if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {
-      OpCondRegReg(kOpCmov, NegateComparison(ccode), rl_result.reg, rl_false.reg);
-    } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {
-      OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
-    } else {
-      OpRegCopy(rl_result.reg, rl_false.reg);
-      OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
-    }
-  }
-
-  StoreValue(rl_dest, rl_result);
-}
-
-void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
-  LIR* taken = &block_label_list_[bb->taken];
-  RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
-  RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
-  ConditionCode ccode = mir->meta.ccode;
-
-  if (rl_src1.is_const) {
-    std::swap(rl_src1, rl_src2);
-    ccode = FlipComparisonOrder(ccode);
-  }
-  if (rl_src2.is_const) {
-    // Do special compare/branch against simple const operand
-    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
-    GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
-    return;
-  }
-
-  if (cu_->target64) {
-    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-
-    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-    OpCondBranch(ccode, taken);
-    return;
-  }
-
-  // Prepare for explicit register usage
-  ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3);
-  RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
-  RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
-  LoadValueDirectWideFixed(rl_src1, r_tmp1);
-  LoadValueDirectWideFixed(rl_src2, r_tmp2);
-
-  // Swap operands and condition code to prevent use of zero flag.
-  if (ccode == kCondLe || ccode == kCondGt) {
-    // Compute (r3:r2) = (r3:r2) - (r1:r0)
-    OpRegReg(kOpSub, rs_r2, rs_r0);  // r2 = r2 - r0
-    OpRegReg(kOpSbc, rs_r3, rs_r1);  // r3 = r3 - r1 - CF
-  } else {
-    // Compute (r1:r0) = (r1:r0) - (r3:r2)
-    OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
-    OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
-  }
-  switch (ccode) {
-    case kCondEq:
-    case kCondNe:
-      OpRegReg(kOpOr, rs_r0, rs_r1);  // r0 = r0 | r1
-      break;
-    case kCondLe:
-      ccode = kCondGe;
-      break;
-    case kCondGt:
-      ccode = kCondLt;
-      break;
-    case kCondLt:
-    case kCondGe:
-      break;
-    default:
-      LOG(FATAL) << "Unexpected ccode: " << ccode;
-  }
-  OpCondBranch(ccode, taken);
-}
-
-void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
-                                          int64_t val, ConditionCode ccode) {
-  int32_t val_lo = Low32Bits(val);
-  int32_t val_hi = High32Bits(val);
-  LIR* taken = &block_label_list_[bb->taken];
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
-
-  if (cu_->target64) {
-    if (is_equality_test && val == 0) {
-      // We can simplify of comparing for ==, != to 0.
-      NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg());
-    } else if (is_equality_test && val_hi == 0 && val_lo > 0) {
-      OpRegImm(kOpCmp, rl_src1.reg, val_lo);
-    } else {
-      RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
-      LoadConstantWide(tmp, val);
-      OpRegReg(kOpCmp, rl_src1.reg, tmp);
-      FreeTemp(tmp);
-    }
-    OpCondBranch(ccode, taken);
-    return;
-  }
-
-  if (is_equality_test && val != 0) {
-    rl_src1 = ForceTempWide(rl_src1);
-  }
-  RegStorage low_reg = rl_src1.reg.GetLow();
-  RegStorage high_reg = rl_src1.reg.GetHigh();
-
-  if (is_equality_test) {
-    // We can simplify of comparing for ==, != to 0.
-    if (val == 0) {
-      if (IsTemp(low_reg)) {
-        OpRegReg(kOpOr, low_reg, high_reg);
-        // We have now changed it; ignore the old values.
-        Clobber(rl_src1.reg);
-      } else {
-        RegStorage t_reg = AllocTemp();
-        OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
-        FreeTemp(t_reg);
-      }
-      OpCondBranch(ccode, taken);
-      return;
-    }
-
-    // Need to compute the actual value for ==, !=.
-    OpRegImm(kOpSub, low_reg, val_lo);
-    NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
-    OpRegReg(kOpOr, high_reg, low_reg);
-    Clobber(rl_src1.reg);
-  } else if (ccode == kCondLe || ccode == kCondGt) {
-    // Swap operands and condition code to prevent use of zero flag.
-    RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
-    LoadConstantWide(tmp, val);
-    OpRegReg(kOpSub, tmp.GetLow(), low_reg);
-    OpRegReg(kOpSbc, tmp.GetHigh(), high_reg);
-    ccode = (ccode == kCondLe) ? kCondGe : kCondLt;
-    FreeTemp(tmp);
-  } else {
-    // We can use a compare for the low word to set CF.
-    OpRegImm(kOpCmp, low_reg, val_lo);
-    if (IsTemp(high_reg)) {
-      NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
-      // We have now changed it; ignore the old values.
-      Clobber(rl_src1.reg);
-    } else {
-      // mov temp_reg, high_reg; sbb temp_reg, high_constant
-      RegStorage t_reg = AllocTemp();
-      OpRegCopy(t_reg, high_reg);
-      NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi);
-      FreeTemp(t_reg);
-    }
-  }
-
-  OpCondBranch(ccode, taken);
-}
-
-void X86Mir2Lir::CalculateMagicAndShift(int64_t divisor, int64_t& magic, int& shift, bool is_long) {
-  // It does not make sense to calculate magic and shift for zero divisor.
-  DCHECK_NE(divisor, 0);
-
-  /* According to H.S.Warren's Hacker's Delight Chapter 10 and
-   * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
-   * The magic number M and shift S can be calculated in the following way:
-   * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
-   * where divisor(d) >=2.
-   * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
-   * where divisor(d) <= -2.
-   * Thus nc can be calculated like:
-   * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long
-   * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long
-   *
-   * So the shift p is the smallest p satisfying
-   * 2^p > nc * (d - 2^p % d), where d >= 2
-   * 2^p > nc * (d + 2^p % d), where d <= -2.
-   *
-   * the magic number M is calcuated by
-   * M = (2^p + d - 2^p % d) / d, where d >= 2
-   * M = (2^p - d - 2^p % d) / d, where d <= -2.
-   *
-   * Notice that p is always bigger than or equal to 32/64, so we just return 32-p/64-p as
-   * the shift number S.
-   */
-
-  int64_t p = (is_long) ? 63 : 31;
-  const uint64_t exp = (is_long) ? 0x8000000000000000ULL : 0x80000000U;
-
-  // Initialize the computations.
-  uint64_t abs_d = (divisor >= 0) ? divisor : -divisor;
-  uint64_t tmp = exp + ((is_long) ? static_cast<uint64_t>(divisor) >> 63 :
-                                    static_cast<uint32_t>(divisor) >> 31);
-  uint64_t abs_nc = tmp - 1 - tmp % abs_d;
-  uint64_t quotient1 = exp / abs_nc;
-  uint64_t remainder1 = exp % abs_nc;
-  uint64_t quotient2 = exp / abs_d;
-  uint64_t remainder2 = exp % abs_d;
-
-  /*
-   * To avoid handling both positive and negative divisor, Hacker's Delight
-   * introduces a method to handle these 2 cases together to avoid duplication.
-   */
-  uint64_t delta;
-  do {
-    p++;
-    quotient1 = 2 * quotient1;
-    remainder1 = 2 * remainder1;
-    if (remainder1 >= abs_nc) {
-      quotient1++;
-      remainder1 = remainder1 - abs_nc;
-    }
-    quotient2 = 2 * quotient2;
-    remainder2 = 2 * remainder2;
-    if (remainder2 >= abs_d) {
-      quotient2++;
-      remainder2 = remainder2 - abs_d;
-    }
-    delta = abs_d - remainder2;
-  } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
-
-  magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
-
-  if (!is_long) {
-    magic = static_cast<int>(magic);
-  }
-
-  shift = (is_long) ? p - 64 : p - 32;
-}
-
-RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest ATTRIBUTE_UNUSED,
-                                     RegStorage reg_lo ATTRIBUTE_UNUSED,
-                                     int lit ATTRIBUTE_UNUSED,
-                                     bool is_div ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of GenDivRemLit for x86";
-  UNREACHABLE();
-}
-
-RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src,
-                                     int imm, bool is_div) {
-  // Use a multiply (and fixup) to perform an int div/rem by a constant.
-  RegLocation rl_result;
-
-  if (imm == 1) {
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    if (is_div) {
-      // x / 1 == x.
-      LoadValueDirectFixed(rl_src, rl_result.reg);
-    } else {
-      // x % 1 == 0.
-      LoadConstantNoClobber(rl_result.reg, 0);
-    }
-  } else if (imm == -1) {  // handle 0x80000000 / -1 special case.
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    if (is_div) {
-      LoadValueDirectFixed(rl_src, rl_result.reg);
-
-      // Check if numerator is 0
-      OpRegImm(kOpCmp, rl_result.reg, 0);
-      LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
-
-      // handle 0x80000000 / -1
-      OpRegImm(kOpCmp, rl_result.reg, 0x80000000);
-      LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
-
-      // for x != MIN_INT, x / -1 == -x.
-      NewLIR1(kX86Neg32R, rl_result.reg.GetReg());
-
-      // EAX already contains the right value (0x80000000),
-      minint_branch->target = NewLIR0(kPseudoTargetLabel);
-      branch->target = NewLIR0(kPseudoTargetLabel);
-    } else {
-      // x % -1 == 0.
-      LoadConstantNoClobber(rl_result.reg, 0);
-    }
-  } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
-    // Division using shifting.
-    rl_src = LoadValue(rl_src, kCoreReg);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    if (IsSameReg(rl_result.reg, rl_src.reg)) {
-      RegStorage rs_temp = AllocTypedTemp(false, kCoreReg);
-      rl_result.reg.SetReg(rs_temp.GetReg());
-    }
-
-    // Check if numerator is 0
-    OpRegImm(kOpCmp, rl_src.reg, 0);
-    LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
-    LoadConstantNoClobber(rl_result.reg, 0);
-    LIR* done = NewLIR1(kX86Jmp8, 0);
-    branch->target = NewLIR0(kPseudoTargetLabel);
-
-    NewLIR3(kX86Lea32RM, rl_result.reg.GetReg(), rl_src.reg.GetReg(), std::abs(imm) - 1);
-    NewLIR2(kX86Test32RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
-    OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
-    int shift_amount = CTZ(imm);
-    OpRegImm(kOpAsr, rl_result.reg, shift_amount);
-    if (imm < 0) {
-      OpReg(kOpNeg, rl_result.reg);
-    }
-    done->target = NewLIR0(kPseudoTargetLabel);
-  } else {
-    CHECK(imm <= -2 || imm >= 2);
-
-    // Use H.S.Warren's Hacker's Delight Chapter 10 and
-    // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
-    int64_t magic;
-    int shift;
-    CalculateMagicAndShift((int64_t)imm, magic, shift, false /* is_long */);
-
-    /*
-     * For imm >= 2,
-     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
-     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
-     * For imm <= -2,
-     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
-     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
-     * We implement this algorithm in the following way:
-     * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX
-     * 2. if imm > 0 and magic < 0, add numerator to EDX
-     *    if imm < 0 and magic > 0, sub numerator from EDX
-     * 3. if S !=0, SAR S bits for EDX
-     * 4. add 1 to EDX if EDX < 0
-     * 5. Thus, EDX is the quotient
-     */
-
-    FlushReg(rs_r0);
-    Clobber(rs_r0);
-    LockTemp(rs_r0);
-    FlushReg(rs_r2);
-    Clobber(rs_r2);
-    LockTemp(rs_r2);
-
-    // Assume that the result will be in EDX for divide, and EAX for remainder.
-    rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, is_div ? rs_r2 : rs_r0,
-                 INVALID_SREG, INVALID_SREG};
-
-    // We need the value at least twice.  Load into a temp.
-    rl_src = LoadValue(rl_src, kCoreReg);
-    RegStorage numerator_reg = rl_src.reg;
-
-    // Check if numerator is 0.
-    OpRegImm(kOpCmp, numerator_reg, 0);
-    LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
-    // Return result 0 if numerator was 0.
-    LoadConstantNoClobber(rl_result.reg, 0);
-    LIR* done = NewLIR1(kX86Jmp8, 0);
-    branch->target = NewLIR0(kPseudoTargetLabel);
-
-    // EAX = magic.
-    LoadConstant(rs_r0, magic);
-
-    // EDX:EAX = magic * numerator.
-    NewLIR1(kX86Imul32DaR, numerator_reg.GetReg());
-
-    if (imm > 0 && magic < 0) {
-      // Add numerator to EDX.
-      DCHECK(numerator_reg.Valid());
-      NewLIR2(kX86Add32RR, rs_r2.GetReg(), numerator_reg.GetReg());
-    } else if (imm < 0 && magic > 0) {
-      DCHECK(numerator_reg.Valid());
-      NewLIR2(kX86Sub32RR, rs_r2.GetReg(), numerator_reg.GetReg());
-    }
-
-    // Do we need the shift?
-    if (shift != 0) {
-      // Shift EDX by 'shift' bits.
-      NewLIR2(kX86Sar32RI, rs_r2.GetReg(), shift);
-    }
-
-    // Add 1 to EDX if EDX < 0.
-
-    // Move EDX to EAX.
-    OpRegCopy(rs_r0, rs_r2);
-
-    // Move sign bit to bit 0, zeroing the rest.
-    NewLIR2(kX86Shr32RI, rs_r2.GetReg(), 31);
-
-    // EDX = EDX + EAX.
-    NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r0.GetReg());
-
-    // Quotient is in EDX.
-    if (!is_div) {
-      // We need to compute the remainder.
-      // Remainder is divisor - (quotient * imm).
-      DCHECK(numerator_reg.Valid());
-      OpRegCopy(rs_r0, numerator_reg);
-
-      // EAX = numerator * imm.
-      OpRegRegImm(kOpMul, rs_r2, rs_r2, imm);
-
-      // EAX -= EDX.
-      NewLIR2(kX86Sub32RR, rs_r0.GetReg(), rs_r2.GetReg());
-
-      // For this case, return the result in EAX.
-    }
-    done->target = NewLIR0(kPseudoTargetLabel);
-  }
-
-  return rl_result;
-}
-
-RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest ATTRIBUTE_UNUSED,
-                                  RegStorage reg_lo ATTRIBUTE_UNUSED,
-                                  RegStorage reg_hi ATTRIBUTE_UNUSED,
-                                  bool is_div ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of GenDivRem for x86";
-  UNREACHABLE();
-}
-
-RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest ATTRIBUTE_UNUSED,
-                                  RegLocation rl_src1,
-                                  RegLocation rl_src2,
-                                  bool is_div,
-                                  int flags) {
-  // We have to use fixed registers, so flush all the temps.
-
-  // Prepare for explicit register usage.
-  ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
-
-  // Load LHS into EAX.
-  LoadValueDirectFixed(rl_src1, rs_r0);
-
-  // Load RHS into EBX.
-  LoadValueDirectFixed(rl_src2, rs_r1);
-
-  // Copy LHS sign bit into EDX.
-  NewLIR0(kx86Cdq32Da);
-
-  if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
-    // Handle division by zero case.
-    GenDivZeroCheck(rs_r1);
-  }
-
-  // Check if numerator is 0
-  OpRegImm(kOpCmp, rs_r0, 0);
-  LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
-
-  // Have to catch 0x80000000/-1 case, or we will get an exception!
-  OpRegImm(kOpCmp, rs_r1, -1);
-  LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
-
-  // RHS is -1.
-  OpRegImm(kOpCmp, rs_r0, 0x80000000);
-  LIR* minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
-
-  branch->target = NewLIR0(kPseudoTargetLabel);
-
-  // In 0x80000000/-1 case.
-  if (!is_div) {
-    // For DIV, EAX is already right. For REM, we need EDX 0.
-    LoadConstantNoClobber(rs_r2, 0);
-  }
-  LIR* done = NewLIR1(kX86Jmp8, 0);
-
-  // Expected case.
-  minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
-  minint_branch->target = minus_one_branch->target;
-  NewLIR1(kX86Idivmod32DaR, rs_r1.GetReg());
-  done->target = NewLIR0(kPseudoTargetLabel);
-
-  // Result is in EAX for div and EDX for rem.
-  RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG};
-  if (!is_div) {
-    rl_result.reg.SetReg(r2);
-  }
-  return rl_result;
-}
-
-static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
-  return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
-}
-
-bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
-  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
-
-  if (is_long && !cu_->target64) {
-   /*
-    * We want to implement the following algorithm
-    * mov eax, low part of arg1
-    * mov edx, high part of arg1
-    * mov ebx, low part of arg2
-    * mov ecx, high part of arg2
-    * mov edi, eax
-    * sub edi, ebx
-    * mov edi, edx
-    * sbb edi, ecx
-    * is_min ? "cmovgel eax, ebx" : "cmovll eax, ebx"
-    * is_min ? "cmovgel edx, ecx" : "cmovll edx, ecx"
-    *
-    * The algorithm above needs 5 registers: a pair for the first operand
-    * (which later will be used as result), a pair for the second operand
-    * and a temp register (e.g. 'edi') for intermediate calculations.
-    * Ideally we have 6 GP caller-save registers in 32-bit mode. They are:
-    * 'eax', 'ebx', 'ecx', 'edx', 'esi' and 'edi'. So there should be
-    * always enough registers to operate on. Practically, there is a pair
-    * of registers 'edi' and 'esi' which holds promoted values and
-    * sometimes should be treated as 'callee save'. If one of the operands
-    * is in the promoted registers then we have enough register to
-    * operate on. Otherwise there is lack of resources and we have to
-    * save 'edi' before calculations and restore after.
-    */
-
-    RegLocation rl_src1 = info->args[0];
-    RegLocation rl_src2 = info->args[2];
-    RegLocation rl_dest = InlineTargetWide(info);
-
-    if (rl_dest.s_reg_low == INVALID_SREG) {
-      // Result is unused, the code is dead. Inlining successful, no code generated.
-      return true;
-    }
-
-    if (PartiallyIntersects(rl_src1, rl_dest) &&
-        PartiallyIntersects(rl_src2, rl_dest)) {
-      // A special case which we don't want to handle.
-      // This is when src1 is mapped on v0 and v1,
-      // src2 is mapped on v2, v3,
-      // result is mapped on v1, v2
-      return false;
-    }
-
-
-    /*
-     * If the result register is the same as the second element, then we
-     * need to be careful. The reason is that the first copy will
-     * inadvertently clobber the second element with the first one thus
-     * yielding the wrong result. Thus we do a swap in that case.
-     */
-    if (Intersects(rl_src2, rl_dest)) {
-      std::swap(rl_src1, rl_src2);
-    }
-
-    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-
-    // Pick the first integer as min/max.
-    OpRegCopyWide(rl_result.reg, rl_src1.reg);
-
-    /*
-     * If the integers are both in the same register, then there is
-     * nothing else to do because they are equal and we have already
-     * moved one into the result.
-     */
-    if (mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
-        mir_graph_->SRegToVReg(rl_src2.s_reg_low)) {
-      StoreValueWide(rl_dest, rl_result);
-      return true;
-    }
-
-    // Free registers to make some room for the second operand.
-    // But don't try to free part of a source which intersects
-    // part of result or promoted registers.
-
-    if (IsTemp(rl_src1.reg.GetLow()) &&
-       (rl_src1.reg.GetLowReg() != rl_result.reg.GetHighReg()) &&
-       (rl_src1.reg.GetLowReg() != rl_result.reg.GetLowReg())) {
-      // Is low part temporary and doesn't intersect any parts of result?
-      FreeTemp(rl_src1.reg.GetLow());
-    }
-
-    if (IsTemp(rl_src1.reg.GetHigh()) &&
-       (rl_src1.reg.GetHighReg() != rl_result.reg.GetLowReg()) &&
-       (rl_src1.reg.GetHighReg() != rl_result.reg.GetHighReg())) {
-      // Is high part temporary and doesn't intersect any parts of result?
-      FreeTemp(rl_src1.reg.GetHigh());
-    }
-
-    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-
-    // Do we have a free register for intermediate calculations?
-    RegStorage tmp = AllocTemp(false);
-    const int kRegSize = cu_->target64 ? 8 : 4;
-    if (tmp == RegStorage::InvalidReg()) {
-       /*
-        * No, will use 'edi'.
-        *
-        * As mentioned above we have 4 temporary and 2 promotable
-        * caller-save registers. Therefore, we assume that a free
-        * register can be allocated only if 'esi' and 'edi' are
-        * already used as operands. If number of promotable registers
-        * increases from 2 to 4 then our assumption fails and operand
-        * data is corrupted.
-        * Let's DCHECK it.
-        */
-       DCHECK(IsTemp(rl_src2.reg.GetLow()) &&
-              IsTemp(rl_src2.reg.GetHigh()) &&
-              IsTemp(rl_result.reg.GetLow()) &&
-              IsTemp(rl_result.reg.GetHigh()));
-       tmp = rs_rDI;
-       NewLIR1(kX86Push32R, tmp.GetReg());
-       cfi_.AdjustCFAOffset(kRegSize);
-       // Record cfi only if it is not already spilled.
-       if (!CoreSpillMaskContains(tmp.GetReg())) {
-         cfi_.RelOffset(DwarfCoreReg(cu_->target64, tmp.GetReg()), 0);
-       }
-    }
-
-    // Now we are ready to do calculations.
-    OpRegReg(kOpMov, tmp, rl_result.reg.GetLow());
-    OpRegReg(kOpSub, tmp, rl_src2.reg.GetLow());
-    OpRegReg(kOpMov, tmp, rl_result.reg.GetHigh());
-    OpRegReg(kOpSbc, tmp, rl_src2.reg.GetHigh());
-
-    // Let's put pop 'edi' here to break a bit the dependency chain.
-    if (tmp == rs_rDI) {
-      NewLIR1(kX86Pop32R, tmp.GetReg());
-      cfi_.AdjustCFAOffset(-kRegSize);
-      if (!CoreSpillMaskContains(tmp.GetReg())) {
-        cfi_.Restore(DwarfCoreReg(cu_->target64, tmp.GetReg()));
-      }
-    } else {
-      FreeTemp(tmp);
-    }
-
-    // Conditionally move the other integer into the destination register.
-    ConditionCode cc = is_min ? kCondGe : kCondLt;
-    OpCondRegReg(kOpCmov, cc, rl_result.reg.GetLow(), rl_src2.reg.GetLow());
-    OpCondRegReg(kOpCmov, cc, rl_result.reg.GetHigh(), rl_src2.reg.GetHigh());
-    FreeTemp(rl_src2.reg);
-    StoreValueWide(rl_dest, rl_result);
-    return true;
-  }
-
-  // Get the two arguments to the invoke and place them in GP registers.
-  RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
-  if (rl_dest.s_reg_low == INVALID_SREG) {
-    // Result is unused, the code is dead. Inlining successful, no code generated.
-    return true;
-  }
-  RegLocation rl_src1 = info->args[0];
-  RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
-  rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
-  rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
-
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-
-  /*
-   * If the result register is the same as the second element, then we need to be careful.
-   * The reason is that the first copy will inadvertently clobber the second element with
-   * the first one thus yielding the wrong result. Thus we do a swap in that case.
-   */
-  if (rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
-    std::swap(rl_src1, rl_src2);
-  }
-
-  // Pick the first integer as min/max.
-  OpRegCopy(rl_result.reg, rl_src1.reg);
-
-  // If the integers are both in the same register, then there is nothing else to do
-  // because they are equal and we have already moved one into the result.
-  if (rl_src1.reg.GetReg() != rl_src2.reg.GetReg()) {
-    // It is possible we didn't pick correctly so do the actual comparison now.
-    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-
-    // Conditionally move the other integer into the destination register.
-    ConditionCode condition_code = is_min ? kCondGt : kCondLt;
-    OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg);
-  }
-
-  if (is_long) {
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    StoreValue(rl_dest, rl_result);
-  }
-  return true;
-}
-
-bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
-  RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
-  if (rl_dest.s_reg_low == INVALID_SREG) {
-    // Result is unused, the code is dead. Inlining successful, no code generated.
-    return true;
-  }
-  RegLocation rl_src_address = info->args[0];  // long address
-  RegLocation rl_address;
-  if (!cu_->target64) {
-    rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
-    rl_address = LoadValue(rl_src_address, kCoreReg);
-  } else {
-    rl_address = LoadValueWide(rl_src_address, kCoreReg);
-  }
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  // Unaligned access is allowed on x86.
-  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
-  if (size == k64) {
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
-    StoreValue(rl_dest, rl_result);
-  }
-  return true;
-}
-
-bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
-  RegLocation rl_src_address = info->args[0];  // long address
-  RegLocation rl_address;
-  if (!cu_->target64) {
-    rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
-    rl_address = LoadValue(rl_src_address, kCoreReg);
-  } else {
-    rl_address = LoadValueWide(rl_src_address, kCoreReg);
-  }
-  RegLocation rl_src_value = info->args[2];  // [size] value
-  RegLocation rl_value;
-  if (size == k64) {
-    // Unaligned access is allowed on x86.
-    rl_value = LoadValueWide(rl_src_value, kCoreReg);
-  } else {
-    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
-    // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR.
-    if (!cu_->target64 && size == kSignedByte) {
-      rl_src_value = UpdateLocTyped(rl_src_value);
-      if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) {
-        RegStorage temp = AllocateByteRegister();
-        OpRegCopy(temp, rl_src_value.reg);
-        rl_value.reg = temp;
-      } else {
-        rl_value = LoadValue(rl_src_value, kCoreReg);
-      }
-    } else {
-      rl_value = LoadValue(rl_src_value, kCoreReg);
-    }
-  }
-  StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
-  return true;
-}
-
-void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
-  NewLIR5(kX86Lea32RA, r_base.GetReg(), reg1.GetReg(), reg2.GetReg(), scale, offset);
-}
-
-void X86Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
-  DCHECK_EQ(kX86, cu_->instruction_set);
-  NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
-}
-
-void X86Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) {
-  DCHECK_EQ(kX86_64, cu_->instruction_set);
-  NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
-}
-
-static bool IsInReg(X86Mir2Lir *pMir2Lir, const RegLocation &rl, RegStorage reg) {
-  return rl.reg.Valid() && rl.reg.GetReg() == reg.GetReg() && (pMir2Lir->IsLive(reg) || rl.home);
-}
-
-bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
-  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
-  // Unused - RegLocation rl_src_unsafe = info->args[0];
-  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
-  RegLocation rl_src_offset = info->args[2];  // long low
-  if (!cu_->target64) {
-    rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
-  }
-  RegLocation rl_src_expected = info->args[4];  // int, long or Object
-  // If is_long, high half is in info->args[5]
-  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
-  // If is_long, high half is in info->args[7]
-  const int kRegSize = cu_->target64 ? 8 : 4;
-
-  if (is_long && cu_->target64) {
-    // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
-    FlushReg(rs_r0q);
-    Clobber(rs_r0q);
-    LockTemp(rs_r0q);
-
-    RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
-    RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
-    RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
-    LoadValueDirectWide(rl_src_expected, rs_r0q);
-    NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
-            rl_new_value.reg.GetReg());
-
-    // After a store we need to insert barrier in case of potential load. Since the
-    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
-    GenMemBarrier(kAnyAny);
-
-    FreeTemp(rs_r0q);
-  } else if (is_long) {
-    // TODO: avoid unnecessary loads of SI and DI when the values are in registers.
-    FlushAllRegs();
-    LockCallTemps();
-    RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
-    RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX);
-    LoadValueDirectWideFixed(rl_src_expected, r_tmp1);
-    LoadValueDirectWideFixed(rl_src_new_value, r_tmp2);
-    // FIXME: needs 64-bit update.
-    const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI);
-    const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI);
-    DCHECK(!obj_in_si || !obj_in_di);
-    const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI);
-    const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI);
-    DCHECK(!off_in_si || !off_in_di);
-    // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg.
-    RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI;
-    RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI;
-    bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI);
-    bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI);
-    if (push_di) {
-      NewLIR1(kX86Push32R, rs_rDI.GetReg());
-      MarkTemp(rs_rDI);
-      LockTemp(rs_rDI);
-      cfi_.AdjustCFAOffset(kRegSize);
-      // Record cfi only if it is not already spilled.
-      if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
-        cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0);
-      }
-    }
-    if (push_si) {
-      NewLIR1(kX86Push32R, rs_rSI.GetReg());
-      MarkTemp(rs_rSI);
-      LockTemp(rs_rSI);
-      cfi_.AdjustCFAOffset(kRegSize);
-      // Record cfi only if it is not already spilled.
-      if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
-        cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetReg()), 0);
-      }
-    }
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
-    const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-    if (!obj_in_si && !obj_in_di) {
-      LoadWordDisp(rs_rSP, SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj);
-      // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
-      DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
-      int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
-      AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
-    }
-    if (!off_in_si && !off_in_di) {
-      LoadWordDisp(rs_rSP, SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off);
-      // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
-      DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
-      int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
-      AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
-    }
-    NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0);
-
-    // After a store we need to insert barrier to prevent reordering with either
-    // earlier or later memory accesses.  Since
-    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
-    // and it will be associated with the cmpxchg instruction, preventing both.
-    GenMemBarrier(kAnyAny);
-
-    if (push_si) {
-      FreeTemp(rs_rSI);
-      UnmarkTemp(rs_rSI);
-      NewLIR1(kX86Pop32R, rs_rSI.GetReg());
-      cfi_.AdjustCFAOffset(-kRegSize);
-      if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
-        cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()));
-      }
-    }
-    if (push_di) {
-      FreeTemp(rs_rDI);
-      UnmarkTemp(rs_rDI);
-      NewLIR1(kX86Pop32R, rs_rDI.GetReg());
-      cfi_.AdjustCFAOffset(-kRegSize);
-      if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
-        cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()));
-      }
-    }
-    FreeCallTemps();
-  } else {
-    // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX.
-    FlushReg(rs_r0);
-    Clobber(rs_r0);
-    LockTemp(rs_r0);
-
-    RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
-    RegLocation rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg);
-
-    if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
-      // Mark card for object assuming new value is stored.
-      FreeTemp(rs_r0);  // Temporarily release EAX for MarkGCCard().
-      MarkGCCard(0, rl_new_value.reg, rl_object.reg);
-      LockTemp(rs_r0);
-    }
-
-    RegLocation rl_offset;
-    if (cu_->target64) {
-      rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
-    } else {
-      rl_offset = LoadValue(rl_src_offset, kCoreReg);
-    }
-    LoadValueDirect(rl_src_expected, rs_r0);
-    NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
-            rl_new_value.reg.GetReg());
-
-    // After a store we need to insert barrier to prevent reordering with either
-    // earlier or later memory accesses.  Since
-    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
-    // and it will be associated with the cmpxchg instruction, preventing both.
-    GenMemBarrier(kAnyAny);
-
-    FreeTemp(rs_r0);
-  }
-
-  // Convert ZF to boolean
-  RegLocation rl_dest = InlineTarget(info);  // boolean place for result
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage result_reg = rl_result.reg;
-
-  // For 32-bit, SETcc only works with EAX..EDX.
-  if (!IsByteRegister(result_reg)) {
-    result_reg = AllocateByteRegister();
-  }
-  NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ);
-  NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg());
-  if (IsTemp(result_reg)) {
-    FreeTemp(result_reg);
-  }
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-void X86Mir2Lir::SwapBits(RegStorage result_reg, int shift, int32_t value) {
-  RegStorage r_temp = AllocTemp();
-  OpRegCopy(r_temp, result_reg);
-  OpRegImm(kOpLsr, result_reg, shift);
-  OpRegImm(kOpAnd, r_temp, value);
-  OpRegImm(kOpAnd, result_reg, value);
-  OpRegImm(kOpLsl, r_temp, shift);
-  OpRegReg(kOpOr, result_reg, r_temp);
-  FreeTemp(r_temp);
-}
-
-void X86Mir2Lir::SwapBits64(RegStorage result_reg, int shift, int64_t value) {
-  RegStorage r_temp = AllocTempWide();
-  OpRegCopy(r_temp, result_reg);
-  OpRegImm(kOpLsr, result_reg, shift);
-  RegStorage r_value = AllocTempWide();
-  LoadConstantWide(r_value, value);
-  OpRegReg(kOpAnd, r_temp, r_value);
-  OpRegReg(kOpAnd, result_reg, r_value);
-  OpRegImm(kOpLsl, r_temp, shift);
-  OpRegReg(kOpOr, result_reg, r_temp);
-  FreeTemp(r_temp);
-  FreeTemp(r_value);
-}
-
-bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
-  RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);
-  if (rl_dest.s_reg_low == INVALID_SREG) {
-    // Result is unused, the code is dead. Inlining successful, no code generated.
-    return true;
-  }
-  RegLocation rl_src_i = info->args[0];
-  RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg)
-                                   : LoadValue(rl_src_i, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (size == k64) {
-    if (cu_->instruction_set == kX86_64) {
-      /* Use one bswap instruction to reverse byte order first and then use 3 rounds of
-         swapping bits to reverse bits in a long number x. Using bswap to save instructions
-         compared to generic luni implementation which has 5 rounds of swapping bits.
-         x = bswap x
-         x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
-         x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
-         x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
-      */
-      OpRegReg(kOpRev, rl_result.reg, rl_i.reg);
-      SwapBits64(rl_result.reg, 1, 0x5555555555555555);
-      SwapBits64(rl_result.reg, 2, 0x3333333333333333);
-      SwapBits64(rl_result.reg, 4, 0x0f0f0f0f0f0f0f0f);
-      StoreValueWide(rl_dest, rl_result);
-      return true;
-    }
-    RegStorage r_i_low = rl_i.reg.GetLow();
-    if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
-      // First REV shall clobber rl_result.reg.GetLowReg(), save the value in a temp for the second
-      // REV.
-      r_i_low = AllocTemp();
-      OpRegCopy(r_i_low, rl_i.reg);
-    }
-    OpRegReg(kOpRev, rl_result.reg.GetLow(), rl_i.reg.GetHigh());
-    OpRegReg(kOpRev, rl_result.reg.GetHigh(), r_i_low);
-    // Free up at least one input register if it was a temp. Otherwise we may be in the bad
-    // situation of not having a temp available for SwapBits. Make sure it's not overlapping
-    // with the output, though.
-    if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
-      // There's definitely a free temp after this.
-      FreeTemp(r_i_low);
-    } else {
-      // We opportunistically release both here. That saves duplication of the register state
-      // lookup (to see if it's actually a temp).
-      if (rl_i.reg.GetLowReg() != rl_result.reg.GetHighReg()) {
-        FreeTemp(rl_i.reg.GetLow());
-      }
-      if (rl_i.reg.GetHighReg() != rl_result.reg.GetLowReg() &&
-          rl_i.reg.GetHighReg() != rl_result.reg.GetHighReg()) {
-        FreeTemp(rl_i.reg.GetHigh());
-      }
-    }
-
-    SwapBits(rl_result.reg.GetLow(), 1, 0x55555555);
-    SwapBits(rl_result.reg.GetLow(), 2, 0x33333333);
-    SwapBits(rl_result.reg.GetLow(), 4, 0x0f0f0f0f);
-    SwapBits(rl_result.reg.GetHigh(), 1, 0x55555555);
-    SwapBits(rl_result.reg.GetHigh(), 2, 0x33333333);
-    SwapBits(rl_result.reg.GetHigh(), 4, 0x0f0f0f0f);
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    OpRegReg(kOpRev, rl_result.reg, rl_i.reg);
-    SwapBits(rl_result.reg, 1, 0x55555555);
-    SwapBits(rl_result.reg, 2, 0x33333333);
-    SwapBits(rl_result.reg, 4, 0x0f0f0f0f);
-    StoreValue(rl_dest, rl_result);
-  }
-  return true;
-}
-
-void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
-  if (cu_->target64) {
-    // We can do this directly using RIP addressing.
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-    LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, kDummy32BitOffset);
-    res->target = target;
-    res->flags.fixup = kFixupLoad;
-    return;
-  }
-
-  // Get the PC to a register and get the anchor.
-  LIR* anchor;
-  RegStorage r_pc = GetPcAndAnchor(&anchor);
-
-  // Load the proper value from the literal area.
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), r_pc.GetReg(), kDummy32BitOffset);
-  res->operands[4] = WrapPointer(anchor);
-  res->target = target;
-  res->flags.fixup = kFixupLoad;
-}
-
-bool X86Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
-  return dex_cache_arrays_layout_.Valid();
-}
-
-LIR* X86Mir2Lir::OpLoadPc(RegStorage r_dest) {
-  DCHECK(!cu_->target64);
-  LIR* call = NewLIR1(kX86CallI, 0);
-  call->flags.fixup = kFixupLabel;
-  LIR* pop = NewLIR1(kX86Pop32R, r_dest.GetReg());
-  pop->flags.fixup = kFixupLabel;
-  DCHECK(NEXT_LIR(call) == pop);
-  return call;
-}
-
-RegStorage X86Mir2Lir::GetPcAndAnchor(LIR** anchor, RegStorage r_tmp) {
-  if (pc_rel_base_reg_.Valid()) {
-    DCHECK(setup_pc_rel_base_reg_ != nullptr);
-    *anchor = NEXT_LIR(setup_pc_rel_base_reg_);
-    DCHECK(*anchor != nullptr);
-    DCHECK_EQ((*anchor)->opcode, kX86Pop32R);
-    pc_rel_base_reg_used_ = true;
-    return pc_rel_base_reg_;
-  } else {
-    RegStorage r_pc = r_tmp.Valid() ? r_tmp : AllocTempRef();
-    LIR* load_pc = OpLoadPc(r_pc);
-    *anchor = NEXT_LIR(load_pc);
-    DCHECK(*anchor != nullptr);
-    DCHECK_EQ((*anchor)->opcode, kX86Pop32R);
-    return r_pc;
-  }
-}
-
-void X86Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest,
-                                          bool wide) {
-  if (cu_->target64) {
-    LIR* mov = NewLIR3(wide ? kX86Mov64RM : kX86Mov32RM, r_dest.GetReg(), kRIPReg,
-        kDummy32BitOffset);
-    mov->flags.fixup = kFixupLabel;
-    mov->operands[3] = WrapPointer(dex_file);
-    mov->operands[4] = offset;
-    mov->target = mov;  // Used for pc_insn_offset (not used by x86-64 relative patcher).
-    dex_cache_access_insns_.push_back(mov);
-  } else {
-    CHECK(!wide) << "Unsupported";
-    // Get the PC to a register and get the anchor. Use r_dest for the temp if needed.
-    LIR* anchor;
-    RegStorage r_pc = GetPcAndAnchor(&anchor, r_dest);
-    LIR* mov = NewLIR3(kX86Mov32RM, r_dest.GetReg(), r_pc.GetReg(), kDummy32BitOffset);
-    mov->flags.fixup = kFixupLabel;
-    mov->operands[3] = WrapPointer(dex_file);
-    mov->operands[4] = offset;
-    mov->target = anchor;  // Used for pc_insn_offset.
-    dex_cache_access_insns_.push_back(mov);
-  }
-}
-
-LIR* X86Mir2Lir::OpVldm(RegStorage r_base ATTRIBUTE_UNUSED, int count ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpVldm for x86";
-  UNREACHABLE();
-}
-
-LIR* X86Mir2Lir::OpVstm(RegStorage r_base ATTRIBUTE_UNUSED, int count ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpVstm for x86";
-  UNREACHABLE();
-}
-
-void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
-                                               RegLocation rl_result,
-                                               int lit ATTRIBUTE_UNUSED,
-                                               int first_bit,
-                                               int second_bit) {
-  RegStorage t_reg = AllocTemp();
-  OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit);
-  OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg);
-  FreeTemp(t_reg);
-  if (first_bit != 0) {
-    OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
-  }
-}
-
-void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
-  if (cu_->target64) {
-    DCHECK(reg.Is64Bit());
-
-    NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0);
-  } else {
-    DCHECK(reg.IsPair());
-
-    // We are not supposed to clobber the incoming storage, so allocate a temporary.
-    RegStorage t_reg = AllocTemp();
-    // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
-    OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
-    // The temp is no longer needed so free it at this time.
-    FreeTemp(t_reg);
-  }
-
-  // In case of zero, throw ArithmeticException.
-  GenDivZeroCheck(kCondEq);
-}
-
-void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index,
-                                     RegStorage array_base,
-                                     int len_offset) {
-  class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
-   public:
-    ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in,
-                             RegStorage index_in, RegStorage array_base_in, int32_t len_offset_in)
-        : LIRSlowPath(m2l, branch_in),
-          index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) {
-    }
-
-    void Compile() OVERRIDE {
-      m2l_->ResetRegPool();
-      m2l_->ResetDefTracking();
-      GenerateTargetLabel(kPseudoThrowTarget);
-
-      RegStorage new_index = index_;
-      // Move index out of kArg1, either directly to kArg0, or to kArg2.
-      // TODO: clean-up to check not a number but with type
-      if (index_ == m2l_->TargetReg(kArg1, kNotWide)) {
-        if (array_base_ == m2l_->TargetReg(kArg0, kRef)) {
-          m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kNotWide), index_);
-          new_index = m2l_->TargetReg(kArg2, kNotWide);
-        } else {
-          m2l_->OpRegCopy(m2l_->TargetReg(kArg0, kNotWide), index_);
-          new_index = m2l_->TargetReg(kArg0, kNotWide);
-        }
-      }
-      // Load array length to kArg1.
-      X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
-      x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
-      x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, new_index,
-                                       m2l_->TargetReg(kArg1, kNotWide), true);
-    }
-
-   private:
-    const RegStorage index_;
-    const RegStorage array_base_;
-    const int32_t len_offset_;
-  };
-
-  OpRegMem(kOpCmp, index, array_base, len_offset);
-  MarkPossibleNullPointerException(0);
-  LIR* branch = OpCondBranch(kCondUge, nullptr);
-  AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
-                                                    index, array_base, len_offset));
-}
-
-void X86Mir2Lir::GenArrayBoundsCheck(int32_t index,
-                                     RegStorage array_base,
-                                     int32_t len_offset) {
-  class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
-   public:
-    ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in,
-                             int32_t index_in, RegStorage array_base_in, int32_t len_offset_in)
-        : LIRSlowPath(m2l, branch_in),
-          index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) {
-    }
-
-    void Compile() OVERRIDE {
-      m2l_->ResetRegPool();
-      m2l_->ResetDefTracking();
-      GenerateTargetLabel(kPseudoThrowTarget);
-
-      // Load array length to kArg1.
-      X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
-      x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
-      x86_m2l->LoadConstant(m2l_->TargetReg(kArg0, kNotWide), index_);
-      x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, m2l_->TargetReg(kArg0, kNotWide),
-                                       m2l_->TargetReg(kArg1, kNotWide), true);
-    }
-
-   private:
-    const int32_t index_;
-    const RegStorage array_base_;
-    const int32_t len_offset_;
-  };
-
-  NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index);
-  MarkPossibleNullPointerException(0);
-  LIR* branch = OpCondBranch(kCondLs, nullptr);
-  AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
-                                                    index, array_base, len_offset));
-}
-
-// Test suspend flag, return target of taken suspend branch
-LIR* X86Mir2Lir::OpTestSuspend(LIR* target) {
-  if (cu_->target64) {
-    OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0);
-  } else {
-    OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0);
-  }
-  return OpCondBranch((target == nullptr) ? kCondNe : kCondEq, target);
-}
-
-// Decrement register and branch on condition
-LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
-  OpRegImm(kOpSub, reg, 1);
-  return OpCondBranch(c_code, target);
-}
-
-bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode ATTRIBUTE_UNUSED,
-                                    bool is_div ATTRIBUTE_UNUSED,
-                                    RegLocation rl_src ATTRIBUTE_UNUSED,
-                                    RegLocation rl_dest ATTRIBUTE_UNUSED,
-                                    int lit ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of smallLiteralDivRem in x86";
-  UNREACHABLE();
-}
-
-bool X86Mir2Lir::EasyMultiply(RegLocation rl_src ATTRIBUTE_UNUSED,
-                              RegLocation rl_dest ATTRIBUTE_UNUSED,
-                              int lit ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of easyMultiply in x86";
-  UNREACHABLE();
-}
-
-LIR* X86Mir2Lir::OpIT(ConditionCode cond ATTRIBUTE_UNUSED, const char* guide ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpIT in x86";
-  UNREACHABLE();
-}
-
-void X86Mir2Lir::OpEndIT(LIR* it ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of OpEndIT in x86";
-  UNREACHABLE();
-}
-
-void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) {
-  switch (val) {
-    case 0:
-      NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
-      break;
-    case 1:
-      OpRegCopy(dest, src);
-      break;
-    default:
-      OpRegRegImm(kOpMul, dest, src, val);
-      break;
-  }
-}
-
-void X86Mir2Lir::GenImulMemImm(RegStorage dest,
-                               int sreg ATTRIBUTE_UNUSED,
-                               int displacement,
-                               int val) {
-  // All memory accesses below reference dalvik regs.
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-
-  LIR *m;
-  switch (val) {
-    case 0:
-      NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
-      break;
-    case 1: {
-      const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-      LoadBaseDisp(rs_rSP, displacement, dest, k32, kNotVolatile);
-      break;
-    }
-    default:
-      m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(),
-                  rs_rX86_SP_32.GetReg(), displacement, val);
-      AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */);
-      break;
-  }
-}
-
-void X86Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                                RegLocation rl_src2, int flags) {
-  if (!cu_->target64) {
-    // Some x86 32b ops are fallback.
-    switch (opcode) {
-      case Instruction::NOT_LONG:
-      case Instruction::DIV_LONG:
-      case Instruction::DIV_LONG_2ADDR:
-      case Instruction::REM_LONG:
-      case Instruction::REM_LONG_2ADDR:
-        Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
-        return;
-
-      default:
-        // Everything else we can handle.
-        break;
-    }
-  }
-
-  switch (opcode) {
-    case Instruction::NOT_LONG:
-      GenNotLong(rl_dest, rl_src2);
-      return;
-
-    case Instruction::ADD_LONG:
-    case Instruction::ADD_LONG_2ADDR:
-      GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
-      return;
-
-    case Instruction::SUB_LONG:
-    case Instruction::SUB_LONG_2ADDR:
-      GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false);
-      return;
-
-    case Instruction::MUL_LONG:
-    case Instruction::MUL_LONG_2ADDR:
-      GenMulLong(opcode, rl_dest, rl_src1, rl_src2, flags);
-      return;
-
-    case Instruction::DIV_LONG:
-    case Instruction::DIV_LONG_2ADDR:
-      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true, flags);
-      return;
-
-    case Instruction::REM_LONG:
-    case Instruction::REM_LONG_2ADDR:
-      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false, flags);
-      return;
-
-    case Instruction::AND_LONG_2ADDR:
-    case Instruction::AND_LONG:
-      GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
-      return;
-
-    case Instruction::OR_LONG:
-    case Instruction::OR_LONG_2ADDR:
-      GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
-      return;
-
-    case Instruction::XOR_LONG:
-    case Instruction::XOR_LONG_2ADDR:
-      GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
-      return;
-
-    case Instruction::NEG_LONG:
-      GenNegLong(rl_dest, rl_src2);
-      return;
-
-    default:
-      LOG(FATAL) << "Invalid long arith op";
-      return;
-  }
-}
-
-bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val, int flags) {
-  // All memory accesses below reference dalvik regs.
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-
-  if (val == 0) {
-    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-    if (cu_->target64) {
-      OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
-    } else {
-      OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow());
-      OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());
-    }
-    StoreValueWide(rl_dest, rl_result);
-    return true;
-  } else if (val == 1) {
-    StoreValueWide(rl_dest, rl_src1);
-    return true;
-  } else if (val == 2) {
-    GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1, flags);
-    return true;
-  } else if (IsPowerOfTwo(val)) {
-    int shift_amount = CTZ(val);
-    if (!PartiallyIntersects(rl_src1, rl_dest)) {
-      rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-      RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, rl_src1,
-                                                shift_amount, flags);
-      StoreValueWide(rl_dest, rl_result);
-      return true;
-    }
-  }
-
-  // Okay, on 32b just bite the bullet and do it, still better than the general case.
-  if (!cu_->target64) {
-    int32_t val_lo = Low32Bits(val);
-    int32_t val_hi = High32Bits(val);
-    // Prepare for explicit register usage.
-    ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
-    rl_src1 = UpdateLocWideTyped(rl_src1);
-    bool src1_in_reg = rl_src1.location == kLocPhysReg;
-    int displacement = SRegOffset(rl_src1.s_reg_low);
-
-    // ECX <- 1H * 2L
-    // EAX <- 1L * 2H
-    if (src1_in_reg) {
-      GenImulRegImm(rs_r1, rl_src1.reg.GetHigh(), val_lo);
-      GenImulRegImm(rs_r0, rl_src1.reg.GetLow(), val_hi);
-    } else {
-      GenImulMemImm(rs_r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo);
-      GenImulMemImm(rs_r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi);
-    }
-
-    // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
-    NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
-
-    // EAX <- 2L
-    LoadConstantNoClobber(rs_r0, val_lo);
-
-    // EDX:EAX <- 2L * 1L (double precision)
-    if (src1_in_reg) {
-      NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
-    } else {
-      LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET);
-      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
-                              true /* is_load */, true /* is_64bit */);
-    }
-
-    // EDX <- EDX + ECX (add high words)
-    NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
-
-    // Result is EDX:EAX
-    RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
-                             RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
-    StoreValueWide(rl_dest, rl_result);
-    return true;
-  }
-  return false;
-}
-
-void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2, int flags) {
-  if (rl_src1.is_const) {
-    std::swap(rl_src1, rl_src2);
-  }
-
-  if (rl_src2.is_const) {
-    if (GenMulLongConst(rl_dest, rl_src1, mir_graph_->ConstantValueWide(rl_src2), flags)) {
-      return;
-    }
-  }
-
-  // All memory accesses below reference dalvik regs.
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-
-  if (cu_->target64) {
-    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-    if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
-        rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
-      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
-    } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() &&
-               rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
-      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg());
-    } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
-               rl_result.reg.GetReg() != rl_src2.reg.GetReg()) {
-      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
-    } else {
-      OpRegCopy(rl_result.reg, rl_src1.reg);
-      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
-    }
-    StoreValueWide(rl_dest, rl_result);
-    return;
-  }
-
-  // Not multiplying by a constant. Do it the hard way
-  // Check for V*V.  We can eliminate a multiply in that case, as 2L*1H == 2H*1L.
-  bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
-                   mir_graph_->SRegToVReg(rl_src2.s_reg_low);
-
-  // Prepare for explicit register usage.
-  ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
-  rl_src1 = UpdateLocWideTyped(rl_src1);
-  rl_src2 = UpdateLocWideTyped(rl_src2);
-
-  // At this point, the VRs are in their home locations.
-  bool src1_in_reg = rl_src1.location == kLocPhysReg;
-  bool src2_in_reg = rl_src2.location == kLocPhysReg;
-  const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-
-  // ECX <- 1H
-  if (src1_in_reg) {
-    NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg());
-  } else {
-    LoadBaseDisp(rs_rSP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32,
-                 kNotVolatile);
-  }
-
-  if (is_square) {
-    // Take advantage of the fact that the values are the same.
-    // ECX <- ECX * 2L  (1H * 2L)
-    if (src2_in_reg) {
-      NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
-    } else {
-      int displacement = SRegOffset(rl_src2.s_reg_low);
-      LIR* m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP_32.GetReg(),
-                       displacement + LOWORD_OFFSET);
-      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
-                              true /* is_load */, true /* is_64bit */);
-    }
-
-    // ECX <- 2*ECX (2H * 1L) + (1H * 2L)
-    NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r1.GetReg());
-  } else {
-    // EAX <- 2H
-    if (src2_in_reg) {
-      NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg());
-    } else {
-      LoadBaseDisp(rs_rSP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32,
-                   kNotVolatile);
-    }
-
-    // EAX <- EAX * 1L  (2H * 1L)
-    if (src1_in_reg) {
-      NewLIR2(kX86Imul32RR, rs_r0.GetReg(), rl_src1.reg.GetLowReg());
-    } else {
-      int displacement = SRegOffset(rl_src1.s_reg_low);
-      LIR *m = NewLIR3(kX86Imul32RM, rs_r0.GetReg(), rs_rX86_SP_32.GetReg(),
-                       displacement + LOWORD_OFFSET);
-      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
-                              true /* is_load */, true /* is_64bit */);
-    }
-
-    // ECX <- ECX * 2L  (1H * 2L)
-    if (src2_in_reg) {
-      NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
-    } else {
-      int displacement = SRegOffset(rl_src2.s_reg_low);
-      LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP_32.GetReg(),
-                       displacement + LOWORD_OFFSET);
-      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
-                              true /* is_load */, true /* is_64bit */);
-    }
-
-    // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
-    NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
-  }
-
-  // EAX <- 2L
-  if (src2_in_reg) {
-    NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg());
-  } else {
-    LoadBaseDisp(rs_rSP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32,
-                 kNotVolatile);
-  }
-
-  // EDX:EAX <- 2L * 1L (double precision)
-  if (src1_in_reg) {
-    NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
-  } else {
-    int displacement = SRegOffset(rl_src1.s_reg_low);
-    LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET);
-    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
-                            true /* is_load */, true /* is_64bit */);
-  }
-
-  // EDX <- EDX + ECX (add high words)
-  NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
-
-  // Result is EDX:EAX
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
-                           RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
-                                   Instruction::Code op) {
-  DCHECK_EQ(rl_dest.location, kLocPhysReg);
-  X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
-  if (rl_src.location == kLocPhysReg) {
-    // Both operands are in registers.
-    // But we must ensure that rl_src is in pair
-    if (cu_->target64) {
-      NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg());
-    } else {
-      rl_src = LoadValueWide(rl_src, kCoreReg);
-      if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
-        // The registers are the same, so we would clobber it before the use.
-        RegStorage temp_reg = AllocTemp();
-        OpRegCopy(temp_reg, rl_dest.reg);
-        rl_src.reg.SetHighReg(temp_reg.GetReg());
-      }
-      NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
-
-      x86op = GetOpcode(op, rl_dest, rl_src, true);
-      NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
-    }
-    return;
-  }
-
-  // RHS is in memory.
-  DCHECK((rl_src.location == kLocDalvikFrame) ||
-         (rl_src.location == kLocCompilerTemp));
-  int r_base = rs_rX86_SP_32.GetReg();
-  int displacement = SRegOffset(rl_src.s_reg_low);
-
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(),
-                     r_base, displacement + LOWORD_OFFSET);
-  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
-                          true /* is_load */, true /* is64bit */);
-  if (!cu_->target64) {
-    x86op = GetOpcode(op, rl_dest, rl_src, true);
-    lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
-    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
-                            true /* is_load */, true /* is64bit */);
-  }
-}
-
-void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
-  rl_dest = UpdateLocWideTyped(rl_dest);
-  if (rl_dest.location == kLocPhysReg) {
-    // Ensure we are in a register pair
-    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-
-    rl_src = UpdateLocWideTyped(rl_src);
-    GenLongRegOrMemOp(rl_result, rl_src, op);
-    StoreFinalValueWide(rl_dest, rl_result);
-    return;
-  } else if (!cu_->target64 && Intersects(rl_src, rl_dest)) {
-    // Handle the case when src and dest are intersect.
-    rl_src = LoadValueWide(rl_src, kCoreReg);
-    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-    rl_src = UpdateLocWideTyped(rl_src);
-    GenLongRegOrMemOp(rl_result, rl_src, op);
-    StoreFinalValueWide(rl_dest, rl_result);
-    return;
-  }
-
-  // It wasn't in registers, so it better be in memory.
-  DCHECK((rl_dest.location == kLocDalvikFrame) ||
-         (rl_dest.location == kLocCompilerTemp));
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-
-  // Operate directly into memory.
-  X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
-  int r_base = rs_rX86_SP_32.GetReg();
-  int displacement = SRegOffset(rl_dest.s_reg_low);
-
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET,
-                     cu_->target64 ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
-  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
-                          true /* is_load */, true /* is64bit */);
-  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
-                          false /* is_load */, true /* is64bit */);
-  if (!cu_->target64) {
-    x86op = GetOpcode(op, rl_dest, rl_src, true);
-    lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
-    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
-                            true /* is_load */, true /* is64bit */);
-    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
-                            false /* is_load */, true /* is64bit */);
-  }
-
-  int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
-  int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
-
-  // If the left operand is in memory and the right operand is in a register
-  // and both belong to the same dalvik register then we should clobber the
-  // right one because it doesn't hold valid data anymore.
-  if (v_src_reg == v_dst_reg) {
-    Clobber(rl_src.reg);
-  }
-}
-
-void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
-                              RegLocation rl_src2, Instruction::Code op,
-                              bool is_commutative) {
-  // Is this really a 2 operand operation?
-  switch (op) {
-    case Instruction::ADD_LONG_2ADDR:
-    case Instruction::SUB_LONG_2ADDR:
-    case Instruction::AND_LONG_2ADDR:
-    case Instruction::OR_LONG_2ADDR:
-    case Instruction::XOR_LONG_2ADDR:
-      if (GenerateTwoOperandInstructions()) {
-        GenLongArith(rl_dest, rl_src2, op);
-        return;
-      }
-      break;
-
-    default:
-      break;
-  }
-
-  if (rl_dest.location == kLocPhysReg) {
-    RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg);
-
-    // We are about to clobber the LHS, so it needs to be a temp.
-    rl_result = ForceTempWide(rl_result);
-
-    // Perform the operation using the RHS.
-    rl_src2 = UpdateLocWideTyped(rl_src2);
-    GenLongRegOrMemOp(rl_result, rl_src2, op);
-
-    // And now record that the result is in the temp.
-    StoreFinalValueWide(rl_dest, rl_result);
-    return;
-  }
-
-  // It wasn't in registers, so it better be in memory.
-  DCHECK((rl_dest.location == kLocDalvikFrame) || (rl_dest.location == kLocCompilerTemp));
-  rl_src1 = UpdateLocWideTyped(rl_src1);
-  rl_src2 = UpdateLocWideTyped(rl_src2);
-
-  // Get one of the source operands into temporary register.
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  if (cu_->target64) {
-    if (IsTemp(rl_src1.reg)) {
-      GenLongRegOrMemOp(rl_src1, rl_src2, op);
-    } else if (is_commutative) {
-      rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-      // We need at least one of them to be a temporary.
-      if (!IsTemp(rl_src2.reg)) {
-        rl_src1 = ForceTempWide(rl_src1);
-        GenLongRegOrMemOp(rl_src1, rl_src2, op);
-      } else {
-        GenLongRegOrMemOp(rl_src2, rl_src1, op);
-        StoreFinalValueWide(rl_dest, rl_src2);
-        return;
-      }
-    } else {
-      // Need LHS to be the temp.
-      rl_src1 = ForceTempWide(rl_src1);
-      GenLongRegOrMemOp(rl_src1, rl_src2, op);
-    }
-  } else {
-    if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
-      GenLongRegOrMemOp(rl_src1, rl_src2, op);
-    } else if (is_commutative) {
-      rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-      // We need at least one of them to be a temporary.
-      if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
-        rl_src1 = ForceTempWide(rl_src1);
-        GenLongRegOrMemOp(rl_src1, rl_src2, op);
-      } else {
-        GenLongRegOrMemOp(rl_src2, rl_src1, op);
-        StoreFinalValueWide(rl_dest, rl_src2);
-        return;
-      }
-    } else {
-      // Need LHS to be the temp.
-      rl_src1 = ForceTempWide(rl_src1);
-      GenLongRegOrMemOp(rl_src1, rl_src2, op);
-    }
-  }
-
-  StoreFinalValueWide(rl_dest, rl_src1);
-}
-
-void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
-  if (cu_->target64) {
-    rl_src = LoadValueWide(rl_src, kCoreReg);
-    RegLocation rl_result;
-    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-    OpRegCopy(rl_result.reg, rl_src.reg);
-    OpReg(kOpNot, rl_result.reg);
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    LOG(FATAL) << "Unexpected use GenNotLong()";
-  }
-}
-
-void X86Mir2Lir::GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src,
-                                  int64_t imm, bool is_div) {
-  if (imm == 0) {
-    GenDivZeroException();
-  } else if (imm == 1) {
-    if (is_div) {
-      // x / 1 == x.
-      StoreValueWide(rl_dest, rl_src);
-    } else {
-      // x % 1 == 0.
-      RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-      LoadConstantWide(rl_result.reg, 0);
-      StoreValueWide(rl_dest, rl_result);
-    }
-  } else if (imm == -1) {  // handle 0x8000000000000000 / -1 special case.
-    if (is_div) {
-      rl_src = LoadValueWide(rl_src, kCoreReg);
-      RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-      RegStorage rs_temp = AllocTempWide();
-
-      OpRegCopy(rl_result.reg, rl_src.reg);
-      LoadConstantWide(rs_temp, 0x8000000000000000);
-
-      // If x == MIN_LONG, return MIN_LONG.
-      OpRegReg(kOpCmp, rl_src.reg, rs_temp);
-      LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
-
-      // For x != MIN_LONG, x / -1 == -x.
-      OpReg(kOpNeg, rl_result.reg);
-
-      minint_branch->target = NewLIR0(kPseudoTargetLabel);
-      FreeTemp(rs_temp);
-      StoreValueWide(rl_dest, rl_result);
-    } else {
-      // x % -1 == 0.
-      RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-      LoadConstantWide(rl_result.reg, 0);
-      StoreValueWide(rl_dest, rl_result);
-    }
-  } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
-    // Division using shifting.
-    rl_src = LoadValueWide(rl_src, kCoreReg);
-    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-    if (IsSameReg(rl_result.reg, rl_src.reg)) {
-      RegStorage rs_temp = AllocTypedTempWide(false, kCoreReg);
-      rl_result.reg.SetReg(rs_temp.GetReg());
-    }
-    LoadConstantWide(rl_result.reg, std::abs(imm) - 1);
-    OpRegReg(kOpAdd, rl_result.reg, rl_src.reg);
-    NewLIR2(kX86Test64RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
-    OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
-    int shift_amount = CTZ(imm);
-    OpRegImm(kOpAsr, rl_result.reg, shift_amount);
-    if (imm < 0) {
-      OpReg(kOpNeg, rl_result.reg);
-    }
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    CHECK(imm <= -2 || imm >= 2);
-
-    FlushReg(rs_r0q);
-    Clobber(rs_r0q);
-    LockTemp(rs_r0q);
-    FlushReg(rs_r2q);
-    Clobber(rs_r2q);
-    LockTemp(rs_r2q);
-
-    RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
-                             is_div ? rs_r2q : rs_r0q, INVALID_SREG, INVALID_SREG};
-
-    // Use H.S.Warren's Hacker's Delight Chapter 10 and
-    // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
-    int64_t magic;
-    int shift;
-    CalculateMagicAndShift(imm, magic, shift, true /* is_long */);
-
-    /*
-     * For imm >= 2,
-     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
-     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
-     * For imm <= -2,
-     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
-     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
-     * We implement this algorithm in the following way:
-     * 1. multiply magic number m and numerator n, get the higher 64bit result in RDX
-     * 2. if imm > 0 and magic < 0, add numerator to RDX
-     *    if imm < 0 and magic > 0, sub numerator from RDX
-     * 3. if S !=0, SAR S bits for RDX
-     * 4. add 1 to RDX if RDX < 0
-     * 5. Thus, RDX is the quotient
-     */
-
-    // RAX = magic.
-    LoadConstantWide(rs_r0q, magic);
-
-    // Multiply by numerator.
-    RegStorage numerator_reg;
-    if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
-      // We will need the value later.
-      rl_src = LoadValueWide(rl_src, kCoreReg);
-      numerator_reg = rl_src.reg;
-
-      // RDX:RAX = magic * numerator.
-      NewLIR1(kX86Imul64DaR, numerator_reg.GetReg());
-    } else {
-      // Only need this once.  Multiply directly from the value.
-      rl_src = UpdateLocWideTyped(rl_src);
-      if (rl_src.location != kLocPhysReg) {
-        // Okay, we can do this from memory.
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        int displacement = SRegOffset(rl_src.s_reg_low);
-        // RDX:RAX = magic * numerator.
-        LIR *m = NewLIR2(kX86Imul64DaM, rs_rX86_SP_32.GetReg(), displacement);
-        AnnotateDalvikRegAccess(m, displacement >> 2,
-                                true /* is_load */, true /* is_64bit */);
-      } else {
-        // RDX:RAX = magic * numerator.
-        NewLIR1(kX86Imul64DaR, rl_src.reg.GetReg());
-      }
-    }
-
-    if (imm > 0 && magic < 0) {
-      // Add numerator to RDX.
-      DCHECK(numerator_reg.Valid());
-      OpRegReg(kOpAdd, rs_r2q, numerator_reg);
-    } else if (imm < 0 && magic > 0) {
-      DCHECK(numerator_reg.Valid());
-      OpRegReg(kOpSub, rs_r2q, numerator_reg);
-    }
-
-    // Do we need the shift?
-    if (shift != 0) {
-      // Shift RDX by 'shift' bits.
-      OpRegImm(kOpAsr, rs_r2q, shift);
-    }
-
-    // Move RDX to RAX.
-    OpRegCopyWide(rs_r0q, rs_r2q);
-
-    // Move sign bit to bit 0, zeroing the rest.
-    OpRegImm(kOpLsr, rs_r2q, 63);
-
-    // RDX = RDX + RAX.
-    OpRegReg(kOpAdd, rs_r2q, rs_r0q);
-
-    // Quotient is in RDX.
-    if (!is_div) {
-      // We need to compute the remainder.
-      // Remainder is divisor - (quotient * imm).
-      DCHECK(numerator_reg.Valid());
-      OpRegCopyWide(rs_r0q, numerator_reg);
-
-      // Imul doesn't support 64-bit imms.
-      if (imm > std::numeric_limits<int32_t>::max() ||
-          imm < std::numeric_limits<int32_t>::min()) {
-        RegStorage rs_temp = AllocTempWide();
-        LoadConstantWide(rs_temp, imm);
-
-        // RAX = numerator * imm.
-        NewLIR2(kX86Imul64RR, rs_r2q.GetReg(), rs_temp.GetReg());
-
-        FreeTemp(rs_temp);
-      } else {
-        // RAX = numerator * imm.
-        int short_imm = static_cast<int>(imm);
-        NewLIR3(kX86Imul64RRI, rs_r2q.GetReg(), rs_r2q.GetReg(), short_imm);
-      }
-
-      // RAX -= RDX.
-      OpRegReg(kOpSub, rs_r0q, rs_r2q);
-
-      // Result in RAX.
-    } else {
-      // Result in RDX.
-    }
-    StoreValueWide(rl_dest, rl_result);
-    FreeTemp(rs_r0q);
-    FreeTemp(rs_r2q);
-  }
-}
-
-void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
-                               RegLocation rl_src2, bool is_div, int flags) {
-  if (!cu_->target64) {
-    LOG(FATAL) << "Unexpected use GenDivRemLong()";
-    return;
-  }
-
-  if (rl_src2.is_const) {
-    DCHECK(rl_src2.wide);
-    int64_t imm = mir_graph_->ConstantValueWide(rl_src2);
-    GenDivRemLongLit(rl_dest, rl_src1, imm, is_div);
-    return;
-  }
-
-  // We have to use fixed registers, so flush all the temps.
-  // Prepare for explicit register usage.
-  ExplicitTempRegisterLock(this, 4, &rs_r0q, &rs_r1q, &rs_r2q, &rs_r6q);
-
-  // Load LHS into RAX.
-  LoadValueDirectWideFixed(rl_src1, rs_r0q);
-
-  // Load RHS into RCX.
-  LoadValueDirectWideFixed(rl_src2, rs_r1q);
-
-  // Copy LHS sign bit into RDX.
-  NewLIR0(kx86Cqo64Da);
-
-  // Handle division by zero case.
-  if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
-    GenDivZeroCheckWide(rs_r1q);
-  }
-
-  // Have to catch 0x8000000000000000/-1 case, or we will get an exception!
-  NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
-  LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
-
-  // RHS is -1.
-  LoadConstantWide(rs_r6q, 0x8000000000000000);
-  NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg());
-  LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
-
-  // In 0x8000000000000000/-1 case.
-  if (!is_div) {
-    // For DIV, RAX is already right. For REM, we need RDX 0.
-    NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg());
-  }
-  LIR* done = NewLIR1(kX86Jmp8, 0);
-
-  // Expected case.
-  minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
-  minint_branch->target = minus_one_branch->target;
-  NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg());
-  done->target = NewLIR0(kPseudoTargetLabel);
-
-  // Result is in RAX for div and RDX for rem.
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG};
-  if (!is_div) {
-    rl_result.reg.SetReg(r2q);
-  }
-
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_result;
-  if (cu_->target64) {
-    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-    OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
-  } else {
-    rl_result = ForceTempWide(rl_src);
-    OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow());    // rLow = -rLow
-    OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0);                   // rHigh = rHigh + CF
-    OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());  // rHigh = -rHigh
-  }
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset) {
-  DCHECK_EQ(kX86, cu_->instruction_set);
-  X86OpCode opcode = kX86Bkpt;
-  switch (op) {
-  case kOpCmp: opcode = kX86Cmp32RT;  break;
-  case kOpMov: opcode = kX86Mov32RT;  break;
-  default:
-    LOG(FATAL) << "Bad opcode: " << op;
-    break;
-  }
-  NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
-}
-
-void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) {
-  DCHECK_EQ(kX86_64, cu_->instruction_set);
-  X86OpCode opcode = kX86Bkpt;
-  if (cu_->target64 && r_dest.Is64BitSolo()) {
-    switch (op) {
-    case kOpCmp: opcode = kX86Cmp64RT;  break;
-    case kOpMov: opcode = kX86Mov64RT;  break;
-    default:
-      LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op;
-      break;
-    }
-  } else {
-    switch (op) {
-    case kOpCmp: opcode = kX86Cmp32RT;  break;
-    case kOpMov: opcode = kX86Mov32RT;  break;
-    default:
-      LOG(FATAL) << "Bad opcode: " << op;
-      break;
-    }
-  }
-  NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
-}
-
-/*
- * Generate array load
- */
-void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_dest, int scale) {
-  RegisterClass reg_class = RegClassForFieldLoadStore(size, false);
-  int len_offset = mirror::Array::LengthOffset().Int32Value();
-  RegLocation rl_result;
-  rl_array = LoadValue(rl_array, kRefReg);
-
-  int data_offset;
-  if (size == k64 || size == kDouble) {
-    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
-  } else {
-    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
-  }
-
-  bool constant_index = rl_index.is_const;
-  int32_t constant_index_value = 0;
-  if (!constant_index) {
-    rl_index = LoadValue(rl_index, kCoreReg);
-  } else {
-    constant_index_value = mir_graph_->ConstantValue(rl_index);
-    // If index is constant, just fold it into the data offset
-    data_offset += constant_index_value << scale;
-    // treat as non array below
-    rl_index.reg = RegStorage::InvalidReg();
-  }
-
-  /* null object? */
-  GenNullCheck(rl_array.reg, opt_flags);
-
-  if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
-    if (constant_index) {
-      GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
-    } else {
-      GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
-    }
-  }
-  rl_result = EvalLoc(rl_dest, reg_class, true);
-  LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size);
-  if ((size == k64) || (size == kDouble)) {
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    StoreValue(rl_dest, rl_result);
-  }
-}
-
-/*
- * Generate array store
- *
- */
-void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
-  RegisterClass reg_class = RegClassForFieldLoadStore(size, false);
-  int len_offset = mirror::Array::LengthOffset().Int32Value();
-  int data_offset;
-
-  if (size == k64 || size == kDouble) {
-    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
-  } else {
-    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
-  }
-
-  rl_array = LoadValue(rl_array, kRefReg);
-  bool constant_index = rl_index.is_const;
-  int32_t constant_index_value = 0;
-  if (!constant_index) {
-    rl_index = LoadValue(rl_index, kCoreReg);
-  } else {
-    // If index is constant, just fold it into the data offset
-    constant_index_value = mir_graph_->ConstantValue(rl_index);
-    data_offset += constant_index_value << scale;
-    // treat as non array below
-    rl_index.reg = RegStorage::InvalidReg();
-  }
-
-  /* null object? */
-  GenNullCheck(rl_array.reg, opt_flags);
-
-  if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
-    if (constant_index) {
-      GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
-    } else {
-      GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
-    }
-  }
-  if ((size == k64) || (size == kDouble)) {
-    rl_src = LoadValueWide(rl_src, reg_class);
-  } else {
-    rl_src = LoadValue(rl_src, reg_class);
-  }
-  // If the src reg can't be byte accessed, move it to a temp first.
-  if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) {
-    RegStorage temp = AllocTemp();
-    OpRegCopy(temp, rl_src.reg);
-    StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size, opt_flags);
-  } else {
-    StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size, opt_flags);
-  }
-  if (card_mark) {
-    // Free rl_index if its a temp. Ensures there are 2 free regs for card mark.
-    if (!constant_index) {
-      FreeTemp(rl_index.reg);
-    }
-    MarkGCCard(opt_flags, rl_src.reg, rl_array.reg);
-  }
-}
-
-RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
-                                          RegLocation rl_dest,
-                                          RegLocation rl_src,
-                                          int shift_amount,
-                                          int flags ATTRIBUTE_UNUSED) {
-  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  if (cu_->target64) {
-    OpKind op = static_cast<OpKind>(0);    /* Make gcc happy */
-    switch (opcode) {
-      case Instruction::SHL_LONG:
-      case Instruction::SHL_LONG_2ADDR:
-        op = kOpLsl;
-        break;
-      case Instruction::SHR_LONG:
-      case Instruction::SHR_LONG_2ADDR:
-        op = kOpAsr;
-        break;
-      case Instruction::USHR_LONG:
-      case Instruction::USHR_LONG_2ADDR:
-        op = kOpLsr;
-        break;
-      default:
-        LOG(FATAL) << "Unexpected case";
-    }
-    OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
-  } else {
-    switch (opcode) {
-      case Instruction::SHL_LONG:
-      case Instruction::SHL_LONG_2ADDR:
-        DCHECK_NE(shift_amount, 1);  // Prevent a double store from happening.
-        if (shift_amount == 32) {
-          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
-          LoadConstant(rl_result.reg.GetLow(), 0);
-        } else if (shift_amount > 31) {
-          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
-          NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
-          LoadConstant(rl_result.reg.GetLow(), 0);
-        } else {
-          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
-          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-          NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
-                  shift_amount);
-          NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
-        }
-        break;
-      case Instruction::SHR_LONG:
-      case Instruction::SHR_LONG_2ADDR:
-        if (shift_amount == 32) {
-          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
-        } else if (shift_amount > 31) {
-          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-          NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
-          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
-        } else {
-          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
-          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-          NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
-                  shift_amount);
-          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
-        }
-        break;
-      case Instruction::USHR_LONG:
-      case Instruction::USHR_LONG_2ADDR:
-        if (shift_amount == 32) {
-          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-          LoadConstant(rl_result.reg.GetHigh(), 0);
-        } else if (shift_amount > 31) {
-          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-          NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
-          LoadConstant(rl_result.reg.GetHigh(), 0);
-        } else {
-          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
-          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-          NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
-                  shift_amount);
-          NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
-        }
-        break;
-      default:
-        LOG(FATAL) << "Unexpected case";
-    }
-  }
-  return rl_result;
-}
-
-void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                   RegLocation rl_src, RegLocation rl_shift, int flags) {
-  // Per spec, we only care about low 6 bits of shift amount.
-  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
-  if (shift_amount == 0) {
-    rl_src = LoadValueWide(rl_src, kCoreReg);
-    StoreValueWide(rl_dest, rl_src);
-    return;
-  } else if (shift_amount == 1 &&
-            (opcode ==  Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) {
-    // Need to handle this here to avoid calling StoreValueWide twice.
-    GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src, flags);
-    return;
-  }
-  if (PartiallyIntersects(rl_src, rl_dest)) {
-    GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
-    return;
-  }
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount, flags);
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
-                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                                   int flags) {
-  bool isConstSuccess = false;
-  switch (opcode) {
-    case Instruction::ADD_LONG:
-    case Instruction::AND_LONG:
-    case Instruction::OR_LONG:
-    case Instruction::XOR_LONG:
-      if (rl_src2.is_const) {
-        isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
-      } else {
-        DCHECK(rl_src1.is_const);
-        isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
-      }
-      break;
-    case Instruction::SUB_LONG:
-    case Instruction::SUB_LONG_2ADDR:
-      if (rl_src2.is_const) {
-        isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
-      } else {
-        GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
-        isConstSuccess = true;
-      }
-      break;
-    case Instruction::ADD_LONG_2ADDR:
-    case Instruction::OR_LONG_2ADDR:
-    case Instruction::XOR_LONG_2ADDR:
-    case Instruction::AND_LONG_2ADDR:
-      if (rl_src2.is_const) {
-        if (GenerateTwoOperandInstructions()) {
-          isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode);
-        } else {
-          isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
-        }
-      } else {
-        DCHECK(rl_src1.is_const);
-        isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
-      }
-      break;
-    default:
-      isConstSuccess = false;
-      break;
-  }
-
-  if (!isConstSuccess) {
-    // Default - bail to non-const handler.
-    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
-  }
-}
-
-bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
-  switch (op) {
-    case Instruction::AND_LONG_2ADDR:
-    case Instruction::AND_LONG:
-      return value == -1;
-    case Instruction::OR_LONG:
-    case Instruction::OR_LONG_2ADDR:
-    case Instruction::XOR_LONG:
-    case Instruction::XOR_LONG_2ADDR:
-      return value == 0;
-    default:
-      return false;
-  }
-}
-
-X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
-                                bool is_high_op) {
-  bool rhs_in_mem = rhs.location != kLocPhysReg;
-  bool dest_in_mem = dest.location != kLocPhysReg;
-  bool is64Bit = cu_->target64;
-  DCHECK(!rhs_in_mem || !dest_in_mem);
-  switch (op) {
-    case Instruction::ADD_LONG:
-    case Instruction::ADD_LONG_2ADDR:
-      if (dest_in_mem) {
-        return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR;
-      } else if (rhs_in_mem) {
-        return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM;
-      }
-      return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR;
-    case Instruction::SUB_LONG:
-    case Instruction::SUB_LONG_2ADDR:
-      if (dest_in_mem) {
-        return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR;
-      } else if (rhs_in_mem) {
-        return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM;
-      }
-      return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR;
-    case Instruction::AND_LONG_2ADDR:
-    case Instruction::AND_LONG:
-      if (dest_in_mem) {
-        return is64Bit ? kX86And64MR : kX86And32MR;
-      }
-      if (is64Bit) {
-        return rhs_in_mem ? kX86And64RM : kX86And64RR;
-      }
-      return rhs_in_mem ? kX86And32RM : kX86And32RR;
-    case Instruction::OR_LONG:
-    case Instruction::OR_LONG_2ADDR:
-      if (dest_in_mem) {
-        return is64Bit ? kX86Or64MR : kX86Or32MR;
-      }
-      if (is64Bit) {
-        return rhs_in_mem ? kX86Or64RM : kX86Or64RR;
-      }
-      return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
-    case Instruction::XOR_LONG:
-    case Instruction::XOR_LONG_2ADDR:
-      if (dest_in_mem) {
-        return is64Bit ? kX86Xor64MR : kX86Xor32MR;
-      }
-      if (is64Bit) {
-        return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR;
-      }
-      return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << op;
-      return kX86Add32RR;
-  }
-}
-
-X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
-                                int32_t value) {
-  bool in_mem = loc.location != kLocPhysReg;
-  bool is64Bit = cu_->target64;
-  bool byte_imm = IS_SIMM8(value);
-  DCHECK(in_mem || !loc.reg.IsFloat());
-  switch (op) {
-    case Instruction::ADD_LONG:
-    case Instruction::ADD_LONG_2ADDR:
-      if (byte_imm) {
-        if (in_mem) {
-          return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
-        }
-        return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
-      }
-      if (in_mem) {
-        return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI;
-      }
-      return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI;
-    case Instruction::SUB_LONG:
-    case Instruction::SUB_LONG_2ADDR:
-      if (byte_imm) {
-        if (in_mem) {
-          return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
-        }
-        return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
-      }
-      if (in_mem) {
-        return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI;
-      }
-      return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI;
-    case Instruction::AND_LONG_2ADDR:
-    case Instruction::AND_LONG:
-      if (byte_imm) {
-        if (is64Bit) {
-          return in_mem ? kX86And64MI8 : kX86And64RI8;
-        }
-        return in_mem ? kX86And32MI8 : kX86And32RI8;
-      }
-      if (is64Bit) {
-        return in_mem ? kX86And64MI : kX86And64RI;
-      }
-      return in_mem ? kX86And32MI : kX86And32RI;
-    case Instruction::OR_LONG:
-    case Instruction::OR_LONG_2ADDR:
-      if (byte_imm) {
-        if (is64Bit) {
-          return in_mem ? kX86Or64MI8 : kX86Or64RI8;
-        }
-        return in_mem ? kX86Or32MI8 : kX86Or32RI8;
-      }
-      if (is64Bit) {
-        return in_mem ? kX86Or64MI : kX86Or64RI;
-      }
-      return in_mem ? kX86Or32MI : kX86Or32RI;
-    case Instruction::XOR_LONG:
-    case Instruction::XOR_LONG_2ADDR:
-      if (byte_imm) {
-        if (is64Bit) {
-          return in_mem ? kX86Xor64MI8 : kX86Xor64RI8;
-        }
-        return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
-      }
-      if (is64Bit) {
-        return in_mem ? kX86Xor64MI : kX86Xor64RI;
-      }
-      return in_mem ? kX86Xor32MI : kX86Xor32RI;
-    default:
-      LOG(FATAL) << "Unexpected opcode: " << op;
-      UNREACHABLE();
-  }
-}
-
-bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
-  DCHECK(rl_src.is_const);
-  int64_t val = mir_graph_->ConstantValueWide(rl_src);
-
-  if (cu_->target64) {
-    // We can do with imm only if it fits 32 bit
-    if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
-      return false;
-    }
-
-    rl_dest = UpdateLocWideTyped(rl_dest);
-
-    if ((rl_dest.location == kLocDalvikFrame) ||
-        (rl_dest.location == kLocCompilerTemp)) {
-      int r_base = rs_rX86_SP_32.GetReg();
-      int displacement = SRegOffset(rl_dest.s_reg_low);
-
-      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
-      LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val);
-      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
-                              true /* is_load */, true /* is64bit */);
-      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
-                              false /* is_load */, true /* is64bit */);
-      return true;
-    }
-
-    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-    DCHECK_EQ(rl_result.location, kLocPhysReg);
-    DCHECK(!rl_result.reg.IsFloat());
-
-    X86OpCode x86op = GetOpcode(op, rl_result, false, val);
-    NewLIR2(x86op, rl_result.reg.GetReg(), val);
-
-    StoreValueWide(rl_dest, rl_result);
-    return true;
-  }
-
-  int32_t val_lo = Low32Bits(val);
-  int32_t val_hi = High32Bits(val);
-  rl_dest = UpdateLocWideTyped(rl_dest);
-
-  // Can we just do this into memory?
-  if ((rl_dest.location == kLocDalvikFrame) ||
-      (rl_dest.location == kLocCompilerTemp)) {
-    int r_base = rs_rX86_SP_32.GetReg();
-    int displacement = SRegOffset(rl_dest.s_reg_low);
-
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    if (!IsNoOp(op, val_lo)) {
-      X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
-      LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo);
-      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
-                              true /* is_load */, true /* is64bit */);
-      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
-                              false /* is_load */, true /* is64bit */);
-    }
-    if (!IsNoOp(op, val_hi)) {
-      X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
-      LIR *lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, val_hi);
-      AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
-                                true /* is_load */, true /* is64bit */);
-      AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
-                                false /* is_load */, true /* is64bit */);
-    }
-    return true;
-  }
-
-  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  DCHECK_EQ(rl_result.location, kLocPhysReg);
-  DCHECK(!rl_result.reg.IsFloat());
-
-  if (!IsNoOp(op, val_lo)) {
-    X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
-    NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
-  }
-  if (!IsNoOp(op, val_hi)) {
-    X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
-    NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
-  }
-  StoreValueWide(rl_dest, rl_result);
-  return true;
-}
-
-bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
-                                RegLocation rl_src2, Instruction::Code op) {
-  DCHECK(rl_src2.is_const);
-  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
-
-  if (cu_->target64) {
-    // We can do with imm only if it fits 32 bit
-    if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
-      return false;
-    }
-    if (rl_dest.location == kLocPhysReg &&
-        rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) {
-      X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
-      OpRegCopy(rl_dest.reg, rl_src1.reg);
-      NewLIR2(x86op, rl_dest.reg.GetReg(), val);
-      StoreFinalValueWide(rl_dest, rl_dest);
-      return true;
-    }
-
-    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-    // We need the values to be in a temporary
-    RegLocation rl_result = ForceTempWide(rl_src1);
-
-    X86OpCode x86op = GetOpcode(op, rl_result, false, val);
-    NewLIR2(x86op, rl_result.reg.GetReg(), val);
-
-    StoreFinalValueWide(rl_dest, rl_result);
-    return true;
-  }
-
-  int32_t val_lo = Low32Bits(val);
-  int32_t val_hi = High32Bits(val);
-  rl_dest = UpdateLocWideTyped(rl_dest);
-  rl_src1 = UpdateLocWideTyped(rl_src1);
-
-  // Can we do this directly into the destination registers?
-  if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
-      rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() &&
-      rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && !rl_dest.reg.IsFloat()) {
-    if (!IsNoOp(op, val_lo)) {
-      X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
-      NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo);
-    }
-    if (!IsNoOp(op, val_hi)) {
-      X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
-      NewLIR2(x86op, rl_dest.reg.GetHighReg(), val_hi);
-    }
-
-    StoreFinalValueWide(rl_dest, rl_dest);
-    return true;
-  }
-
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  DCHECK_EQ(rl_src1.location, kLocPhysReg);
-
-  // We need the values to be in a temporary
-  RegLocation rl_result = ForceTempWide(rl_src1);
-  if (!IsNoOp(op, val_lo)) {
-    X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
-    NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
-  }
-  if (!IsNoOp(op, val_hi)) {
-    X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
-    NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
-  }
-
-  StoreFinalValueWide(rl_dest, rl_result);
-  return true;
-}
-
-// For final classes there are no sub-classes to check and so we can answer the instance-of
-// question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
-void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
-                                    RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation object = LoadValue(rl_src, kRefReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage result_reg = rl_result.reg;
-
-  // For 32-bit, SETcc only works with EAX..EDX.
-  RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg;
-  if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) {
-    result_reg = AllocateByteRegister();
-  }
-
-  // Assume that there is no match.
-  LoadConstant(result_reg, 0);
-  LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, nullptr);
-
-  // We will use this register to compare to memory below.
-  // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode).
-  // For this reason, force allocation of a 32 bit register to use, so that the
-  // compare to memory will be done using a 32 bit comparision.
-  // The LoadRefDisp(s) below will work normally, even in 64 bit mode.
-  RegStorage check_class = AllocTemp();
-
-  if (use_declaring_class) {
-    RegStorage r_method = LoadCurrMethodWithHint(check_class);
-    LoadRefDisp(r_method, ArtMethod::DeclaringClassOffset().Int32Value(),
-                check_class, kNotVolatile);
-  } else {
-    LoadTypeFromCache(type_idx, check_class);
-  }
-
-  // Compare the computed class to the class in the object.
-  DCHECK_EQ(object.location, kLocPhysReg);
-  OpRegMem(kOpCmp, check_class, object.reg, mirror::Object::ClassOffset().Int32Value());
-
-  // Set the low byte of the result to 0 or 1 from the compare condition code.
-  NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondEq);
-
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  null_branchover->target = target;
-  FreeTemp(check_class);
-  if (IsTemp(result_reg)) {
-    OpRegCopy(rl_result.reg, result_reg);
-    FreeTemp(result_reg);
-  }
-  StoreValue(rl_dest, rl_result);
-}
-
-void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
-                               RegLocation rl_lhs, RegLocation rl_rhs, int flags) {
-  OpKind op = kOpBkpt;
-  bool is_div_rem = false;
-  bool unary = false;
-  bool shift_op = false;
-  bool is_two_addr = false;
-  RegLocation rl_result;
-  switch (opcode) {
-    case Instruction::NEG_INT:
-      op = kOpNeg;
-      unary = true;
-      break;
-    case Instruction::NOT_INT:
-      op = kOpMvn;
-      unary = true;
-      break;
-    case Instruction::ADD_INT_2ADDR:
-      is_two_addr = true;
-      FALLTHROUGH_INTENDED;
-    case Instruction::ADD_INT:
-      op = kOpAdd;
-      break;
-    case Instruction::SUB_INT_2ADDR:
-      is_two_addr = true;
-      FALLTHROUGH_INTENDED;
-    case Instruction::SUB_INT:
-      op = kOpSub;
-      break;
-    case Instruction::MUL_INT_2ADDR:
-      is_two_addr = true;
-      FALLTHROUGH_INTENDED;
-    case Instruction::MUL_INT:
-      op = kOpMul;
-      break;
-    case Instruction::DIV_INT_2ADDR:
-      is_two_addr = true;
-      FALLTHROUGH_INTENDED;
-    case Instruction::DIV_INT:
-      op = kOpDiv;
-      is_div_rem = true;
-      break;
-    /* NOTE: returns in kArg1 */
-    case Instruction::REM_INT_2ADDR:
-      is_two_addr = true;
-      FALLTHROUGH_INTENDED;
-    case Instruction::REM_INT:
-      op = kOpRem;
-      is_div_rem = true;
-      break;
-    case Instruction::AND_INT_2ADDR:
-      is_two_addr = true;
-      FALLTHROUGH_INTENDED;
-    case Instruction::AND_INT:
-      op = kOpAnd;
-      break;
-    case Instruction::OR_INT_2ADDR:
-      is_two_addr = true;
-      FALLTHROUGH_INTENDED;
-    case Instruction::OR_INT:
-      op = kOpOr;
-      break;
-    case Instruction::XOR_INT_2ADDR:
-      is_two_addr = true;
-      FALLTHROUGH_INTENDED;
-    case Instruction::XOR_INT:
-      op = kOpXor;
-      break;
-    case Instruction::SHL_INT_2ADDR:
-      is_two_addr = true;
-      FALLTHROUGH_INTENDED;
-    case Instruction::SHL_INT:
-      shift_op = true;
-      op = kOpLsl;
-      break;
-    case Instruction::SHR_INT_2ADDR:
-      is_two_addr = true;
-      FALLTHROUGH_INTENDED;
-    case Instruction::SHR_INT:
-      shift_op = true;
-      op = kOpAsr;
-      break;
-    case Instruction::USHR_INT_2ADDR:
-      is_two_addr = true;
-      FALLTHROUGH_INTENDED;
-    case Instruction::USHR_INT:
-      shift_op = true;
-      op = kOpLsr;
-      break;
-    default:
-      LOG(FATAL) << "Invalid word arith op: " << opcode;
-  }
-
-  // Can we convert to a two address instruction?
-  if (!is_two_addr &&
-        (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
-         mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) {
-    is_two_addr = true;
-  }
-
-  if (!GenerateTwoOperandInstructions()) {
-    is_two_addr = false;
-  }
-
-  // Get the div/rem stuff out of the way.
-  if (is_div_rem) {
-    rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, flags);
-    StoreValue(rl_dest, rl_result);
-    return;
-  }
-
-  // If we generate any memory access below, it will reference a dalvik reg.
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-
-  if (unary) {
-    rl_lhs = LoadValue(rl_lhs, kCoreReg);
-    rl_result = UpdateLocTyped(rl_dest);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    OpRegReg(op, rl_result.reg, rl_lhs.reg);
-  } else {
-    if (shift_op) {
-      // X86 doesn't require masking and must use ECX.
-      RegStorage t_reg = TargetReg(kCount, kNotWide);  // rCX
-      LoadValueDirectFixed(rl_rhs, t_reg);
-      if (is_two_addr) {
-        // Can we do this directly into memory?
-        rl_result = UpdateLocTyped(rl_dest);
-        if (rl_result.location != kLocPhysReg) {
-          // Okay, we can do this into memory
-          OpMemReg(op, rl_result, t_reg.GetReg());
-          FreeTemp(t_reg);
-          return;
-        } else if (!rl_result.reg.IsFloat()) {
-          // Can do this directly into the result register
-          OpRegReg(op, rl_result.reg, t_reg);
-          FreeTemp(t_reg);
-          StoreFinalValue(rl_dest, rl_result);
-          return;
-        }
-      }
-      // Three address form, or we can't do directly.
-      rl_lhs = LoadValue(rl_lhs, kCoreReg);
-      rl_result = EvalLoc(rl_dest, kCoreReg, true);
-      OpRegRegReg(op, rl_result.reg, rl_lhs.reg, t_reg);
-      FreeTemp(t_reg);
-    } else {
-      // Multiply is 3 operand only (sort of).
-      if (is_two_addr && op != kOpMul) {
-        // Can we do this directly into memory?
-        rl_result = UpdateLocTyped(rl_dest);
-        if (rl_result.location == kLocPhysReg) {
-          // Ensure res is in a core reg
-          rl_result = EvalLoc(rl_dest, kCoreReg, true);
-          // Can we do this from memory directly?
-          rl_rhs = UpdateLocTyped(rl_rhs);
-          if (rl_rhs.location != kLocPhysReg) {
-            OpRegMem(op, rl_result.reg, rl_rhs);
-            StoreFinalValue(rl_dest, rl_result);
-            return;
-          } else if (!rl_rhs.reg.IsFloat()) {
-            OpRegReg(op, rl_result.reg, rl_rhs.reg);
-            StoreFinalValue(rl_dest, rl_result);
-            return;
-          }
-        }
-        rl_rhs = LoadValue(rl_rhs, kCoreReg);
-        // It might happen rl_rhs and rl_dest are the same VR
-        // in this case rl_dest is in reg after LoadValue while
-        // rl_result is not updated yet, so do this
-        rl_result = UpdateLocTyped(rl_dest);
-        if (rl_result.location != kLocPhysReg) {
-          // Okay, we can do this into memory.
-          OpMemReg(op, rl_result, rl_rhs.reg.GetReg());
-          return;
-        } else if (!rl_result.reg.IsFloat()) {
-          // Can do this directly into the result register.
-          OpRegReg(op, rl_result.reg, rl_rhs.reg);
-          StoreFinalValue(rl_dest, rl_result);
-          return;
-        } else {
-          rl_lhs = LoadValue(rl_lhs, kCoreReg);
-          rl_result = EvalLoc(rl_dest, kCoreReg, true);
-          OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
-        }
-      } else {
-        // Try to use reg/memory instructions.
-        rl_lhs = UpdateLocTyped(rl_lhs);
-        rl_rhs = UpdateLocTyped(rl_rhs);
-        // We can't optimize with FP registers.
-        if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
-          // Something is difficult, so fall back to the standard case.
-          rl_lhs = LoadValue(rl_lhs, kCoreReg);
-          rl_rhs = LoadValue(rl_rhs, kCoreReg);
-          rl_result = EvalLoc(rl_dest, kCoreReg, true);
-          OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
-        } else {
-          // We can optimize by moving to result and using memory operands.
-          if (rl_rhs.location != kLocPhysReg) {
-            // Force LHS into result.
-            // We should be careful with order here
-            // If rl_dest and rl_lhs points to the same VR we should load first
-            // If the are different we should find a register first for dest
-            if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
-                mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
-              rl_lhs = LoadValue(rl_lhs, kCoreReg);
-              rl_result = EvalLoc(rl_dest, kCoreReg, true);
-              // No-op if these are the same.
-              OpRegCopy(rl_result.reg, rl_lhs.reg);
-            } else {
-              rl_result = EvalLoc(rl_dest, kCoreReg, true);
-              LoadValueDirect(rl_lhs, rl_result.reg);
-            }
-            OpRegMem(op, rl_result.reg, rl_rhs);
-          } else if (rl_lhs.location != kLocPhysReg) {
-            // RHS is in a register; LHS is in memory.
-            if (op != kOpSub) {
-              // Force RHS into result and operate on memory.
-              rl_result = EvalLoc(rl_dest, kCoreReg, true);
-              OpRegCopy(rl_result.reg, rl_rhs.reg);
-              OpRegMem(op, rl_result.reg, rl_lhs);
-            } else {
-              // Subtraction isn't commutative.
-              rl_lhs = LoadValue(rl_lhs, kCoreReg);
-              rl_rhs = LoadValue(rl_rhs, kCoreReg);
-              rl_result = EvalLoc(rl_dest, kCoreReg, true);
-              OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
-            }
-          } else {
-            // Both are in registers.
-            rl_lhs = LoadValue(rl_lhs, kCoreReg);
-            rl_rhs = LoadValue(rl_rhs, kCoreReg);
-            rl_result = EvalLoc(rl_dest, kCoreReg, true);
-            OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
-          }
-        }
-      }
-    }
-  }
-  StoreValue(rl_dest, rl_result);
-}
-
-bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) {
-  // If we have non-core registers, then we can't do good things.
-  if (rl_lhs.location == kLocPhysReg && rl_lhs.reg.IsFloat()) {
-    return false;
-  }
-  if (rl_rhs.location == kLocPhysReg && rl_rhs.reg.IsFloat()) {
-    return false;
-  }
-
-  // Everything will be fine :-).
-  return true;
-}
-
-void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
-  if (!cu_->target64) {
-    Mir2Lir::GenIntToLong(rl_dest, rl_src);
-    return;
-  }
-  rl_src = UpdateLocTyped(rl_src);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (rl_src.location == kLocPhysReg) {
-    NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  } else {
-    int displacement = SRegOffset(rl_src.s_reg_low);
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP_32.GetReg(),
-                     displacement + LOWORD_OFFSET);
-    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
-                            true /* is_load */, true /* is_64bit */);
-  }
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void X86Mir2Lir::GenLongToInt(RegLocation rl_dest, RegLocation rl_src) {
-  rl_src = UpdateLocWide(rl_src);
-  rl_src = NarrowRegLoc(rl_src);
-  StoreValue(rl_dest, rl_src);
-
-  if (cu_->target64) {
-    // if src and dest are in the same phys reg then StoreValue generates
-    // no operation but we need explicit 32-bit mov R, R to clear
-    // the higher 32-bits
-    rl_dest = UpdateLoc(rl_dest);
-    if (rl_src.location == kLocPhysReg && rl_dest.location == kLocPhysReg
-           && IsSameReg(rl_src.reg, rl_dest.reg)) {
-        LIR* copy_lir = OpRegCopyNoInsert(rl_dest.reg, rl_dest.reg);
-        // remove nop flag set by OpRegCopyNoInsert if src == dest
-        copy_lir->flags.is_nop = false;
-        AppendLIR(copy_lir);
-    }
-  }
-}
-
-void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                        RegLocation rl_src1, RegLocation rl_shift) {
-  if (!cu_->target64) {
-    // Long shift operations in 32-bit. Use shld or shrd to create a 32-bit register filled from
-    // the other half, shift the other half, if the shift amount is less than 32 we're done,
-    // otherwise move one register to the other and place zero or sign bits in the other.
-    LIR* branch;
-    FlushAllRegs();
-    LockCallTemps();
-    LoadValueDirectFixed(rl_shift, rs_rCX);
-    RegStorage r_tmp = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
-    LoadValueDirectWideFixed(rl_src1, r_tmp);
-    switch (opcode) {
-      case Instruction::SHL_LONG:
-      case Instruction::SHL_LONG_2ADDR:
-        NewLIR3(kX86Shld32RRC, r_tmp.GetHighReg(), r_tmp.GetLowReg(), rs_rCX.GetReg());
-        NewLIR2(kX86Sal32RC, r_tmp.GetLowReg(), rs_rCX.GetReg());
-        NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
-        branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
-        OpRegCopy(r_tmp.GetHigh(), r_tmp.GetLow());
-        LoadConstant(r_tmp.GetLow(), 0);
-        branch->target = NewLIR0(kPseudoTargetLabel);
-        break;
-      case Instruction::SHR_LONG:
-      case Instruction::SHR_LONG_2ADDR:
-        NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(), rs_rCX.GetReg());
-        NewLIR2(kX86Sar32RC, r_tmp.GetHighReg(), rs_rCX.GetReg());
-        NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
-        branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
-        OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh());
-        NewLIR2(kX86Sar32RI, r_tmp.GetHighReg(), 31);
-        branch->target = NewLIR0(kPseudoTargetLabel);
-        break;
-      case Instruction::USHR_LONG:
-      case Instruction::USHR_LONG_2ADDR:
-        NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(),
-               rs_rCX.GetReg());
-        NewLIR2(kX86Shr32RC, r_tmp.GetHighReg(), rs_rCX.GetReg());
-        NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
-        branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
-        OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh());
-        LoadConstant(r_tmp.GetHigh(), 0);
-        branch->target = NewLIR0(kPseudoTargetLabel);
-        break;
-      default:
-        LOG(FATAL) << "Unexpected case: " << opcode;
-        return;
-    }
-    RegLocation rl_result = LocCReturnWide();
-    StoreValueWide(rl_dest, rl_result);
-    return;
-  }
-
-  bool is_two_addr = false;
-  OpKind op = kOpBkpt;
-  RegLocation rl_result;
-
-  switch (opcode) {
-    case Instruction::SHL_LONG_2ADDR:
-      is_two_addr = true;
-      FALLTHROUGH_INTENDED;
-    case Instruction::SHL_LONG:
-      op = kOpLsl;
-      break;
-    case Instruction::SHR_LONG_2ADDR:
-      is_two_addr = true;
-      FALLTHROUGH_INTENDED;
-    case Instruction::SHR_LONG:
-      op = kOpAsr;
-      break;
-    case Instruction::USHR_LONG_2ADDR:
-      is_two_addr = true;
-      FALLTHROUGH_INTENDED;
-    case Instruction::USHR_LONG:
-      op = kOpLsr;
-      break;
-    default:
-      op = kOpBkpt;
-  }
-
-  // X86 doesn't require masking and must use ECX.
-  RegStorage t_reg = TargetReg(kCount, kNotWide);  // rCX
-  LoadValueDirectFixed(rl_shift, t_reg);
-  if (is_two_addr) {
-    // Can we do this directly into memory?
-    rl_result = UpdateLocWideTyped(rl_dest);
-    if (rl_result.location != kLocPhysReg) {
-      // Okay, we can do this into memory
-      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      OpMemReg(op, rl_result, t_reg.GetReg());
-    } else if (!rl_result.reg.IsFloat()) {
-      // Can do this directly into the result register
-      OpRegReg(op, rl_result.reg, t_reg);
-      StoreFinalValueWide(rl_dest, rl_result);
-    }
-  } else {
-    // Three address form, or we can't do directly.
-    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-    OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg);
-    StoreFinalValueWide(rl_dest, rl_result);
-  }
-
-  FreeTemp(t_reg);
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
deleted file mode 100644
index d9571c5..0000000
--- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dex/quick/quick_compiler.h"
-#include "dex/pass_manager.h"
-#include "dex/verification_results.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "runtime/dex_file.h"
-#include "driver/compiler_options.h"
-#include "driver/compiler_driver.h"
-#include "codegen_x86.h"
-#include "gtest/gtest.h"
-#include "utils/assembler_test_base.h"
-
-namespace art {
-
-class QuickAssembleX86TestBase : public testing::Test {
- protected:
-  X86Mir2Lir* Prepare(InstructionSet target) {
-    isa_ = target;
-    pool_.reset(new ArenaPool());
-    compiler_options_.reset(new CompilerOptions(
-        CompilerOptions::kDefaultCompilerFilter,
-        CompilerOptions::kDefaultHugeMethodThreshold,
-        CompilerOptions::kDefaultLargeMethodThreshold,
-        CompilerOptions::kDefaultSmallMethodThreshold,
-        CompilerOptions::kDefaultTinyMethodThreshold,
-        CompilerOptions::kDefaultNumDexMethodsThreshold,
-        CompilerOptions::kDefaultInlineDepthLimit,
-        CompilerOptions::kDefaultInlineMaxCodeUnits,
-        false,
-        CompilerOptions::kDefaultTopKProfileThreshold,
-        false,
-        CompilerOptions::kDefaultGenerateDebugInfo,
-        false,
-        false,
-        false,
-        false,
-        nullptr,
-        new PassManagerOptions(),
-        nullptr,
-        false));
-    verification_results_.reset(new VerificationResults(compiler_options_.get()));
-    method_inliner_map_.reset(new DexFileToMethodInlinerMap());
-    compiler_driver_.reset(new CompilerDriver(
-        compiler_options_.get(),
-        verification_results_.get(),
-        method_inliner_map_.get(),
-        Compiler::kQuick,
-        isa_,
-        nullptr,
-        false,
-        nullptr,
-        nullptr,
-        nullptr,
-        0,
-        false,
-        false,
-        "",
-        false,
-        0,
-        -1,
-        ""));
-    cu_.reset(new CompilationUnit(pool_.get(), isa_, compiler_driver_.get(), nullptr));
-    DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>(
-        cu_->arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc));
-    memset(code_item, 0, sizeof(DexFile::CodeItem));
-    cu_->mir_graph.reset(new MIRGraph(cu_.get(), &cu_->arena));
-    cu_->mir_graph->current_code_item_ = code_item;
-    cu_->cg.reset(QuickCompiler::GetCodeGenerator(cu_.get(), nullptr));
-
-    test_helper_.reset(new AssemblerTestInfrastructure(
-        isa_ == kX86 ? "x86" : "x86_64",
-        "as",
-        isa_ == kX86 ? " --32" : "",
-        "objdump",
-        " -h",
-        "objdump",
-        isa_ == kX86 ?
-            " -D -bbinary -mi386 --no-show-raw-insn" :
-            " -D -bbinary -mi386:x86-64 -Mx86-64,addr64,data32 --no-show-raw-insn",
-        nullptr));
-
-    X86Mir2Lir* m2l = static_cast<X86Mir2Lir*>(cu_->cg.get());
-    m2l->CompilerInitializeRegAlloc();
-    return m2l;
-  }
-
-  void Release() {
-    cu_.reset();
-    compiler_driver_.reset();
-    method_inliner_map_.reset();
-    verification_results_.reset();
-    compiler_options_.reset();
-    pool_.reset();
-
-    test_helper_.reset();
-  }
-
-  void TearDown() OVERRIDE {
-    Release();
-  }
-
-  bool CheckTools(InstructionSet target) {
-    Prepare(target);
-    bool result = test_helper_->CheckTools();
-    Release();
-    return result;
-  }
-
-  std::unique_ptr<CompilationUnit> cu_;
-  std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
-
- private:
-  InstructionSet isa_;
-  std::unique_ptr<ArenaPool> pool_;
-  std::unique_ptr<CompilerOptions> compiler_options_;
-  std::unique_ptr<VerificationResults> verification_results_;
-  std::unique_ptr<DexFileToMethodInlinerMap> method_inliner_map_;
-  std::unique_ptr<CompilerDriver> compiler_driver_;
-};
-
-class QuickAssembleX86LowLevelTest : public QuickAssembleX86TestBase {
- protected:
-  void Test(InstructionSet target, std::string test_name, std::string gcc_asm,
-            int opcode, int op0 = 0, int op1 = 0, int op2 = 0, int op3 = 0, int op4 = 0) {
-    X86Mir2Lir* m2l = Prepare(target);
-
-    LIR lir;
-    memset(&lir, 0, sizeof(LIR));
-    lir.opcode = opcode;
-    lir.operands[0] = op0;
-    lir.operands[1] = op1;
-    lir.operands[2] = op2;
-    lir.operands[3] = op3;
-    lir.operands[4] = op4;
-    lir.flags.size = m2l->GetInsnSize(&lir);
-
-    AssemblerStatus status = m2l->AssembleInstructions(&lir, 0);
-    // We don't expect a retry.
-    ASSERT_EQ(status, AssemblerStatus::kSuccess);
-
-    // Need a "base" std::vector.
-    std::vector<uint8_t> buffer(m2l->code_buffer_.begin(), m2l->code_buffer_.end());
-    test_helper_->Driver(buffer, gcc_asm, test_name);
-
-    Release();
-  }
-};
-
-TEST_F(QuickAssembleX86LowLevelTest, Addpd) {
-  Test(kX86, "Addpd", "addpd %xmm1, %xmm0\n", kX86AddpdRR,
-       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
-  Test(kX86_64, "Addpd", "addpd %xmm1, %xmm0\n", kX86AddpdRR,
-       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
-}
-
-TEST_F(QuickAssembleX86LowLevelTest, Subpd) {
-  Test(kX86, "Subpd", "subpd %xmm1, %xmm0\n", kX86SubpdRR,
-       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
-  Test(kX86_64, "Subpd", "subpd %xmm1, %xmm0\n", kX86SubpdRR,
-       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
-}
-
-TEST_F(QuickAssembleX86LowLevelTest, Mulpd) {
-  Test(kX86, "Mulpd", "mulpd %xmm1, %xmm0\n", kX86MulpdRR,
-       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
-  Test(kX86_64, "Mulpd", "mulpd %xmm1, %xmm0\n", kX86MulpdRR,
-       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
-}
-
-TEST_F(QuickAssembleX86LowLevelTest, Pextrw) {
-  Test(kX86, "Pextrw", "pextrw $7, %xmm3, 8(%eax)\n", kX86PextrwMRI,
-       RegStorage::Solo32(r0).GetReg(), 8, RegStorage::Solo128(3).GetReg(), 7);
-  Test(kX86_64, "Pextrw", "pextrw $7, %xmm8, 8(%r10)\n", kX86PextrwMRI,
-       RegStorage::Solo64(r10q).GetReg(), 8, RegStorage::Solo128(8).GetReg(), 7);
-}
-
-class QuickAssembleX86MacroTest : public QuickAssembleX86TestBase {
- protected:
-  typedef void (X86Mir2Lir::*AsmFn)(MIR*);
-
-  void TestVectorFn(InstructionSet target,
-                    Instruction::Code opcode,
-                    AsmFn f,
-                    std::string inst_string) {
-    X86Mir2Lir *m2l = Prepare(target);
-
-    // Create a vector MIR.
-    MIR* mir = cu_->mir_graph->NewMIR();
-    mir->dalvikInsn.opcode = opcode;
-    mir->dalvikInsn.vA = 0;  // Destination and source.
-    mir->dalvikInsn.vB = 1;  // Source.
-    int vector_size = 128;
-    int vector_type = kDouble;
-    mir->dalvikInsn.vC = (vector_type << 16) | vector_size;  // Type size.
-    (m2l->*f)(mir);
-    m2l->AssembleLIR();
-
-    std::string gcc_asm = inst_string + " %xmm1, %xmm0\n";
-    // Need a "base" std::vector.
-    std::vector<uint8_t> buffer(m2l->code_buffer_.begin(), m2l->code_buffer_.end());
-    test_helper_->Driver(buffer, gcc_asm, inst_string);
-
-    Release();
-  }
-
-  // Tests are member functions as many of the assembler functions are protected or private,
-  // and it would be inelegant to define ART_FRIEND_TEST for all the tests.
-
-  void TestAddpd() {
-    TestVectorFn(kX86,
-                 static_cast<Instruction::Code>(kMirOpPackedAddition),
-                 &X86Mir2Lir::GenAddVector,
-                 "addpd");
-    TestVectorFn(kX86_64,
-                 static_cast<Instruction::Code>(kMirOpPackedAddition),
-                 &X86Mir2Lir::GenAddVector,
-                 "addpd");
-  }
-
-  void TestSubpd() {
-    TestVectorFn(kX86,
-                 static_cast<Instruction::Code>(kMirOpPackedSubtract),
-                 &X86Mir2Lir::GenSubtractVector,
-                 "subpd");
-    TestVectorFn(kX86_64,
-                 static_cast<Instruction::Code>(kMirOpPackedSubtract),
-                 &X86Mir2Lir::GenSubtractVector,
-                 "subpd");
-  }
-
-  void TestMulpd() {
-    TestVectorFn(kX86,
-                 static_cast<Instruction::Code>(kMirOpPackedMultiply),
-                 &X86Mir2Lir::GenMultiplyVector,
-                 "mulpd");
-    TestVectorFn(kX86_64,
-                 static_cast<Instruction::Code>(kMirOpPackedMultiply),
-                 &X86Mir2Lir::GenMultiplyVector,
-                 "mulpd");
-  }
-};
-
-TEST_F(QuickAssembleX86MacroTest, CheckTools) {
-  ASSERT_TRUE(CheckTools(kX86)) << "x86 tools not found.";
-  ASSERT_TRUE(CheckTools(kX86_64)) << "x86_64 tools not found.";
-}
-
-#define DECLARE_TEST(name)             \
-  TEST_F(QuickAssembleX86MacroTest, name) { \
-    Test ## name();                    \
-  }
-
-DECLARE_TEST(Addpd)
-DECLARE_TEST(Subpd)
-DECLARE_TEST(Mulpd)
-
-}  // namespace art
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
deleted file mode 100755
index 75f3fef..0000000
--- a/compiler/dex/quick/x86/target_x86.cc
+++ /dev/null
@@ -1,2636 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "codegen_x86.h"
-
-#include <cstdarg>
-#include <inttypes.h>
-#include <string>
-
-#include "arch/instruction_set_features.h"
-#include "art_method.h"
-#include "backend_x86.h"
-#include "base/logging.h"
-#include "dex/compiler_ir.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "dex/reg_storage_eq.h"
-#include "driver/compiler_driver.h"
-#include "mirror/array-inl.h"
-#include "mirror/string.h"
-#include "oat.h"
-#include "oat_quick_method_header.h"
-#include "x86_lir.h"
-
-namespace art {
-
-static constexpr RegStorage core_regs_arr_32[] = {
-    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
-};
-static constexpr RegStorage core_regs_arr_64[] = {
-    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
-    rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
-};
-static constexpr RegStorage core_regs_arr_64q[] = {
-    rs_r0q, rs_r1q, rs_r2q, rs_r3q, rs_rX86_SP_64, rs_r5q, rs_r6q, rs_r7q,
-    rs_r8q, rs_r9q, rs_r10q, rs_r11q, rs_r12q, rs_r13q, rs_r14q, rs_r15q
-};
-static constexpr RegStorage sp_regs_arr_32[] = {
-    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
-};
-static constexpr RegStorage sp_regs_arr_64[] = {
-    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
-    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
-};
-static constexpr RegStorage dp_regs_arr_32[] = {
-    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
-};
-static constexpr RegStorage dp_regs_arr_64[] = {
-    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
-    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
-};
-static constexpr RegStorage xp_regs_arr_32[] = {
-    rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
-};
-static constexpr RegStorage xp_regs_arr_64[] = {
-    rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
-    rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15
-};
-static constexpr RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
-static constexpr RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_32};
-static constexpr RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64};
-static constexpr RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
-static constexpr RegStorage core_temps_arr_64[] = {
-    rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
-    rs_r8, rs_r9, rs_r10, rs_r11
-};
-
-// How to add register to be available for promotion:
-// 1) Remove register from array defining temp
-// 2) Update ClobberCallerSave
-// 3) Update JNI compiler ABI:
-// 3.1) add reg in JniCallingConvention method
-// 3.2) update CoreSpillMask/FpSpillMask
-// 4) Update entrypoints
-// 4.1) Update constants in asm_support_x86_64.h for new frame size
-// 4.2) Remove entry in SmashCallerSaves
-// 4.3) Update jni_entrypoints to spill/unspill new callee save reg
-// 4.4) Update quick_entrypoints to spill/unspill new callee save reg
-// 5) Update runtime ABI
-// 5.1) Update quick_method_frame_info with new required spills
-// 5.2) Update QuickArgumentVisitor with new offsets to gprs and xmms
-// Note that you cannot use register corresponding to incoming args
-// according to ABI and QCG needs one additional XMM temp for
-// bulk copy in preparation to call.
-static constexpr RegStorage core_temps_arr_64q[] = {
-    rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q,
-    rs_r8q, rs_r9q, rs_r10q, rs_r11q
-};
-static constexpr RegStorage sp_temps_arr_32[] = {
-    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
-};
-static constexpr RegStorage sp_temps_arr_64[] = {
-    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
-    rs_fr8, rs_fr9, rs_fr10, rs_fr11
-};
-static constexpr RegStorage dp_temps_arr_32[] = {
-    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
-};
-static constexpr RegStorage dp_temps_arr_64[] = {
-    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
-    rs_dr8, rs_dr9, rs_dr10, rs_dr11
-};
-
-static constexpr RegStorage xp_temps_arr_32[] = {
-    rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
-};
-static constexpr RegStorage xp_temps_arr_64[] = {
-    rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
-    rs_xr8, rs_xr9, rs_xr10, rs_xr11
-};
-
-static constexpr ArrayRef<const RegStorage> empty_pool;
-static constexpr ArrayRef<const RegStorage> core_regs_32(core_regs_arr_32);
-static constexpr ArrayRef<const RegStorage> core_regs_64(core_regs_arr_64);
-static constexpr ArrayRef<const RegStorage> core_regs_64q(core_regs_arr_64q);
-static constexpr ArrayRef<const RegStorage> sp_regs_32(sp_regs_arr_32);
-static constexpr ArrayRef<const RegStorage> sp_regs_64(sp_regs_arr_64);
-static constexpr ArrayRef<const RegStorage> dp_regs_32(dp_regs_arr_32);
-static constexpr ArrayRef<const RegStorage> dp_regs_64(dp_regs_arr_64);
-static constexpr ArrayRef<const RegStorage> xp_regs_32(xp_regs_arr_32);
-static constexpr ArrayRef<const RegStorage> xp_regs_64(xp_regs_arr_64);
-static constexpr ArrayRef<const RegStorage> reserved_regs_32(reserved_regs_arr_32);
-static constexpr ArrayRef<const RegStorage> reserved_regs_64(reserved_regs_arr_64);
-static constexpr ArrayRef<const RegStorage> reserved_regs_64q(reserved_regs_arr_64q);
-static constexpr ArrayRef<const RegStorage> core_temps_32(core_temps_arr_32);
-static constexpr ArrayRef<const RegStorage> core_temps_64(core_temps_arr_64);
-static constexpr ArrayRef<const RegStorage> core_temps_64q(core_temps_arr_64q);
-static constexpr ArrayRef<const RegStorage> sp_temps_32(sp_temps_arr_32);
-static constexpr ArrayRef<const RegStorage> sp_temps_64(sp_temps_arr_64);
-static constexpr ArrayRef<const RegStorage> dp_temps_32(dp_temps_arr_32);
-static constexpr ArrayRef<const RegStorage> dp_temps_64(dp_temps_arr_64);
-
-static constexpr ArrayRef<const RegStorage> xp_temps_32(xp_temps_arr_32);
-static constexpr ArrayRef<const RegStorage> xp_temps_64(xp_temps_arr_64);
-
-RegLocation X86Mir2Lir::LocCReturn() {
-  return x86_loc_c_return;
-}
-
-RegLocation X86Mir2Lir::LocCReturnRef() {
-  return cu_->target64 ? x86_64_loc_c_return_ref : x86_loc_c_return_ref;
-}
-
-RegLocation X86Mir2Lir::LocCReturnWide() {
-  return cu_->target64 ? x86_64_loc_c_return_wide : x86_loc_c_return_wide;
-}
-
-RegLocation X86Mir2Lir::LocCReturnFloat() {
-  return x86_loc_c_return_float;
-}
-
-RegLocation X86Mir2Lir::LocCReturnDouble() {
-  return x86_loc_c_return_double;
-}
-
-// 32-bit reg storage locations for 32-bit targets.
-static const RegStorage RegStorage32FromSpecialTargetRegister_Target32[] {
-  RegStorage::InvalidReg(),  // kSelf - Thread pointer.
-  RegStorage::InvalidReg(),  // kSuspend - Used to reduce suspend checks for some targets.
-  RegStorage::InvalidReg(),  // kLr - no register as the return address is pushed on entry.
-  RegStorage::InvalidReg(),  // kPc - not exposed on X86 see kX86StartOfMethod.
-  rs_rX86_SP_32,             // kSp
-  rs_rAX,                    // kArg0
-  rs_rCX,                    // kArg1
-  rs_rDX,                    // kArg2
-  rs_rBX,                    // kArg3
-  RegStorage::InvalidReg(),  // kArg4
-  RegStorage::InvalidReg(),  // kArg5
-  RegStorage::InvalidReg(),  // kArg6
-  RegStorage::InvalidReg(),  // kArg7
-  rs_fr0,                    // kFArg0
-  rs_fr1,                    // kFArg1
-  rs_fr2,                    // kFArg2
-  rs_fr3,                    // kFArg3
-  RegStorage::InvalidReg(),  // kFArg4
-  RegStorage::InvalidReg(),  // kFArg5
-  RegStorage::InvalidReg(),  // kFArg6
-  RegStorage::InvalidReg(),  // kFArg7
-  RegStorage::InvalidReg(),  // kFArg8
-  RegStorage::InvalidReg(),  // kFArg9
-  RegStorage::InvalidReg(),  // kFArg10
-  RegStorage::InvalidReg(),  // kFArg11
-  RegStorage::InvalidReg(),  // kFArg12
-  RegStorage::InvalidReg(),  // kFArg13
-  RegStorage::InvalidReg(),  // kFArg14
-  RegStorage::InvalidReg(),  // kFArg15
-  rs_rAX,                    // kRet0
-  rs_rDX,                    // kRet1
-  rs_rAX,                    // kInvokeTgt
-  rs_rAX,                    // kHiddenArg - used to hold the method index before copying to fr0.
-  rs_fr7,                    // kHiddenFpArg
-  rs_rCX,                    // kCount
-};
-
-// 32-bit reg storage locations for 64-bit targets.
-static const RegStorage RegStorage32FromSpecialTargetRegister_Target64[] {
-  RegStorage::InvalidReg(),  // kSelf - Thread pointer.
-  RegStorage::InvalidReg(),  // kSuspend - Used to reduce suspend checks for some targets.
-  RegStorage::InvalidReg(),  // kLr - no register as the return address is pushed on entry.
-  RegStorage(kRIPReg),       // kPc
-  rs_rX86_SP_32,             // kSp
-  rs_rDI,                    // kArg0
-  rs_rSI,                    // kArg1
-  rs_rDX,                    // kArg2
-  rs_rCX,                    // kArg3
-  rs_r8,                     // kArg4
-  rs_r9,                     // kArg5
-  RegStorage::InvalidReg(),  // kArg6
-  RegStorage::InvalidReg(),  // kArg7
-  rs_fr0,                    // kFArg0
-  rs_fr1,                    // kFArg1
-  rs_fr2,                    // kFArg2
-  rs_fr3,                    // kFArg3
-  rs_fr4,                    // kFArg4
-  rs_fr5,                    // kFArg5
-  rs_fr6,                    // kFArg6
-  rs_fr7,                    // kFArg7
-  RegStorage::InvalidReg(),  // kFArg8
-  RegStorage::InvalidReg(),  // kFArg9
-  RegStorage::InvalidReg(),  // kFArg10
-  RegStorage::InvalidReg(),  // kFArg11
-  RegStorage::InvalidReg(),  // kFArg12
-  RegStorage::InvalidReg(),  // kFArg13
-  RegStorage::InvalidReg(),  // kFArg14
-  RegStorage::InvalidReg(),  // kFArg15
-  rs_rAX,                    // kRet0
-  rs_rDX,                    // kRet1
-  rs_rAX,                    // kInvokeTgt
-  rs_rAX,                    // kHiddenArg
-  RegStorage::InvalidReg(),  // kHiddenFpArg
-  rs_rCX,                    // kCount
-};
-static_assert(arraysize(RegStorage32FromSpecialTargetRegister_Target32) ==
-              arraysize(RegStorage32FromSpecialTargetRegister_Target64),
-              "Mismatch in RegStorage array sizes");
-
-// Return a target-dependent special register for 32-bit.
-RegStorage X86Mir2Lir::TargetReg32(SpecialTargetRegister reg) const {
-  DCHECK_EQ(RegStorage32FromSpecialTargetRegister_Target32[kCount], rs_rCX);
-  DCHECK_EQ(RegStorage32FromSpecialTargetRegister_Target64[kCount], rs_rCX);
-  DCHECK_LT(reg, arraysize(RegStorage32FromSpecialTargetRegister_Target32));
-  return cu_->target64 ? RegStorage32FromSpecialTargetRegister_Target64[reg]
-                       : RegStorage32FromSpecialTargetRegister_Target32[reg];
-}
-
-RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Do not use this function!!!";
-  UNREACHABLE();
-}
-
-/*
- * Decode the register id.
- */
-ResourceMask X86Mir2Lir::GetRegMaskCommon(const RegStorage& reg) const {
-  /* Double registers in x86 are just a single FP register. This is always just a single bit. */
-  return ResourceMask::Bit(
-      /* FP register starts at bit position 16 */
-      ((reg.IsFloat() || reg.StorageSize() > 8) ? kX86FPReg0 : 0) + reg.GetRegNum());
-}
-
-ResourceMask X86Mir2Lir::GetPCUseDefEncoding() const {
-  return kEncodeNone;
-}
-
-void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags,
-                                          ResourceMask* use_mask, ResourceMask* def_mask) {
-  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
-  DCHECK(!lir->flags.use_def_invalid);
-
-  // X86-specific resource map setup here.
-  if (flags & REG_USE_SP) {
-    use_mask->SetBit(kX86RegSP);
-  }
-
-  if (flags & REG_DEF_SP) {
-    def_mask->SetBit(kX86RegSP);
-  }
-
-  if (flags & REG_DEFA) {
-    SetupRegMask(def_mask, rs_rAX.GetReg());
-  }
-
-  if (flags & REG_DEFD) {
-    SetupRegMask(def_mask, rs_rDX.GetReg());
-  }
-  if (flags & REG_USEA) {
-    SetupRegMask(use_mask, rs_rAX.GetReg());
-  }
-
-  if (flags & REG_USEC) {
-    SetupRegMask(use_mask, rs_rCX.GetReg());
-  }
-
-  if (flags & REG_USED) {
-    SetupRegMask(use_mask, rs_rDX.GetReg());
-  }
-
-  if (flags & REG_USEB) {
-    SetupRegMask(use_mask, rs_rBX.GetReg());
-  }
-
-  // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
-  if (lir->opcode == kX86RepneScasw) {
-    SetupRegMask(use_mask, rs_rAX.GetReg());
-    SetupRegMask(use_mask, rs_rCX.GetReg());
-    SetupRegMask(use_mask, rs_rDI.GetReg());
-    SetupRegMask(def_mask, rs_rDI.GetReg());
-  }
-
-  if (flags & USE_FP_STACK) {
-    use_mask->SetBit(kX86FPStack);
-    def_mask->SetBit(kX86FPStack);
-  }
-}
-
-/* For dumping instructions */
-static const char* x86RegName[] = {
-  "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
-  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
-};
-
-static const char* x86CondName[] = {
-  "O",
-  "NO",
-  "B/NAE/C",
-  "NB/AE/NC",
-  "Z/EQ",
-  "NZ/NE",
-  "BE/NA",
-  "NBE/A",
-  "S",
-  "NS",
-  "P/PE",
-  "NP/PO",
-  "L/NGE",
-  "NL/GE",
-  "LE/NG",
-  "NLE/G"
-};
-
-/*
- * Interpret a format string and build a string no longer than size
- * See format key in Assemble.cc.
- */
-std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) {
-  std::string buf;
-  size_t i = 0;
-  size_t fmt_len = strlen(fmt);
-  while (i < fmt_len) {
-    if (fmt[i] != '!') {
-      buf += fmt[i];
-      i++;
-    } else {
-      i++;
-      DCHECK_LT(i, fmt_len);
-      char operand_number_ch = fmt[i];
-      i++;
-      if (operand_number_ch == '!') {
-        buf += "!";
-      } else {
-        int operand_number = operand_number_ch - '0';
-        DCHECK_LT(operand_number, 6);  // Expect upto 6 LIR operands.
-        DCHECK_LT(i, fmt_len);
-        int operand = lir->operands[operand_number];
-        switch (fmt[i]) {
-          case 'c':
-            DCHECK_LT(static_cast<size_t>(operand), sizeof(x86CondName));
-            buf += x86CondName[operand];
-            break;
-          case 'd':
-            buf += StringPrintf("%d", operand);
-            break;
-          case 'q': {
-             int64_t value = static_cast<int64_t>(static_cast<int64_t>(operand) << 32 |
-                             static_cast<uint32_t>(lir->operands[operand_number+1]));
-             buf +=StringPrintf("%" PRId64, value);
-             break;
-          }
-          case 'p': {
-            const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(operand);
-            buf += StringPrintf("0x%08x", tab_rec->offset);
-            break;
-          }
-          case 'r':
-            if (RegStorage::IsFloat(operand)) {
-              int fp_reg = RegStorage::RegNum(operand);
-              buf += StringPrintf("xmm%d", fp_reg);
-            } else {
-              int reg_num = RegStorage::RegNum(operand);
-              DCHECK_LT(static_cast<size_t>(reg_num), sizeof(x86RegName));
-              buf += x86RegName[reg_num];
-            }
-            break;
-          case 't':
-            buf += StringPrintf("0x%08" PRIxPTR " (L%p)",
-                                reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand,
-                                lir->target);
-            break;
-          default:
-            buf += StringPrintf("DecodeError '%c'", fmt[i]);
-            break;
-        }
-        i++;
-      }
-    }
-  }
-  return buf;
-}
-
-void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, const ResourceMask& mask, const char *prefix) {
-  char buf[256];
-  buf[0] = 0;
-
-  if (mask.Equals(kEncodeAll)) {
-    strcpy(buf, "all");
-  } else {
-    char num[8];
-    int i;
-
-    for (i = 0; i < kX86RegEnd; i++) {
-      if (mask.HasBit(i)) {
-        snprintf(num, arraysize(num), "%d ", i);
-        strcat(buf, num);
-      }
-    }
-
-    if (mask.HasBit(ResourceMask::kCCode)) {
-      strcat(buf, "cc ");
-    }
-    /* Memory bits */
-    if (x86LIR && (mask.HasBit(ResourceMask::kDalvikReg))) {
-      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
-               DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
-               (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
-    }
-    if (mask.HasBit(ResourceMask::kLiteral)) {
-      strcat(buf, "lit ");
-    }
-
-    if (mask.HasBit(ResourceMask::kHeapRef)) {
-      strcat(buf, "heap ");
-    }
-    if (mask.HasBit(ResourceMask::kMustNotAlias)) {
-      strcat(buf, "noalias ");
-    }
-  }
-  if (buf[0]) {
-    LOG(INFO) << prefix << ": " <<  buf;
-  }
-}
-
-void X86Mir2Lir::AdjustSpillMask() {
-  // Adjustment for LR spilling, x86 has no LR so nothing to do here
-  core_spill_mask_ |= (1 << rs_rRET.GetRegNum());
-  num_core_spills_++;
-}
-
-RegStorage X86Mir2Lir::AllocateByteRegister() {
-  RegStorage reg = AllocTypedTemp(false, kCoreReg);
-  if (!cu_->target64) {
-    DCHECK_LT(reg.GetRegNum(), rs_rX86_SP_32.GetRegNum());
-  }
-  return reg;
-}
-
-RegStorage X86Mir2Lir::Get128BitRegister(RegStorage reg) {
-  return GetRegInfo(reg)->Master()->GetReg();
-}
-
-bool X86Mir2Lir::IsByteRegister(RegStorage reg) const {
-  return cu_->target64 || reg.GetRegNum() < rs_rX86_SP_32.GetRegNum();
-}
-
-/* Clobber all regs that might be used by an external C call */
-void X86Mir2Lir::ClobberCallerSave() {
-  if (cu_->target64) {
-    Clobber(rs_rAX);
-    Clobber(rs_rCX);
-    Clobber(rs_rDX);
-    Clobber(rs_rSI);
-    Clobber(rs_rDI);
-
-    Clobber(rs_r8);
-    Clobber(rs_r9);
-    Clobber(rs_r10);
-    Clobber(rs_r11);
-
-    Clobber(rs_fr8);
-    Clobber(rs_fr9);
-    Clobber(rs_fr10);
-    Clobber(rs_fr11);
-  } else {
-    Clobber(rs_rAX);
-    Clobber(rs_rCX);
-    Clobber(rs_rDX);
-    Clobber(rs_rBX);
-  }
-
-  Clobber(rs_fr0);
-  Clobber(rs_fr1);
-  Clobber(rs_fr2);
-  Clobber(rs_fr3);
-  Clobber(rs_fr4);
-  Clobber(rs_fr5);
-  Clobber(rs_fr6);
-  Clobber(rs_fr7);
-}
-
-RegLocation X86Mir2Lir::GetReturnWideAlt() {
-  RegLocation res = LocCReturnWide();
-  DCHECK_EQ(res.reg.GetLowReg(), rs_rAX.GetReg());
-  DCHECK_EQ(res.reg.GetHighReg(), rs_rDX.GetReg());
-  Clobber(rs_rAX);
-  Clobber(rs_rDX);
-  MarkInUse(rs_rAX);
-  MarkInUse(rs_rDX);
-  MarkWide(res.reg);
-  return res;
-}
-
-RegLocation X86Mir2Lir::GetReturnAlt() {
-  RegLocation res = LocCReturn();
-  res.reg.SetReg(rs_rDX.GetReg());
-  Clobber(rs_rDX);
-  MarkInUse(rs_rDX);
-  return res;
-}
-
-/* To be used when explicitly managing register use */
-void X86Mir2Lir::LockCallTemps() {
-  LockTemp(TargetReg32(kArg0));
-  LockTemp(TargetReg32(kArg1));
-  LockTemp(TargetReg32(kArg2));
-  LockTemp(TargetReg32(kArg3));
-  LockTemp(TargetReg32(kFArg0));
-  LockTemp(TargetReg32(kFArg1));
-  LockTemp(TargetReg32(kFArg2));
-  LockTemp(TargetReg32(kFArg3));
-  if (cu_->target64) {
-    LockTemp(TargetReg32(kArg4));
-    LockTemp(TargetReg32(kArg5));
-    LockTemp(TargetReg32(kFArg4));
-    LockTemp(TargetReg32(kFArg5));
-    LockTemp(TargetReg32(kFArg6));
-    LockTemp(TargetReg32(kFArg7));
-  }
-}
-
-/* To be used when explicitly managing register use */
-void X86Mir2Lir::FreeCallTemps() {
-  FreeTemp(TargetReg32(kArg0));
-  FreeTemp(TargetReg32(kArg1));
-  FreeTemp(TargetReg32(kArg2));
-  FreeTemp(TargetReg32(kArg3));
-  FreeTemp(TargetReg32(kHiddenArg));
-  FreeTemp(TargetReg32(kFArg0));
-  FreeTemp(TargetReg32(kFArg1));
-  FreeTemp(TargetReg32(kFArg2));
-  FreeTemp(TargetReg32(kFArg3));
-  if (cu_->target64) {
-    FreeTemp(TargetReg32(kArg4));
-    FreeTemp(TargetReg32(kArg5));
-    FreeTemp(TargetReg32(kFArg4));
-    FreeTemp(TargetReg32(kFArg5));
-    FreeTemp(TargetReg32(kFArg6));
-    FreeTemp(TargetReg32(kFArg7));
-  }
-}
-
-bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
-    switch (opcode) {
-      case kX86LockCmpxchgMR:
-      case kX86LockCmpxchgAR:
-      case kX86LockCmpxchg64M:
-      case kX86LockCmpxchg64A:
-      case kX86XchgMR:
-      case kX86Mfence:
-        // Atomic memory instructions provide full barrier.
-        return true;
-      default:
-        break;
-    }
-
-    // Conservative if cannot prove it provides full barrier.
-    return false;
-}
-
-bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
-  if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
-    return false;
-  }
-  // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
-  LIR* mem_barrier = last_lir_insn_;
-
-  bool ret = false;
-  /*
-   * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
-   * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
-   * For those cases, all we need to ensure is that there is a scheduling barrier in place.
-   */
-  if (barrier_kind == kAnyAny) {
-    // If no LIR exists already that can be used a barrier, then generate an mfence.
-    if (mem_barrier == nullptr) {
-      mem_barrier = NewLIR0(kX86Mfence);
-      ret = true;
-    }
-
-    // If last instruction does not provide full barrier, then insert an mfence.
-    if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
-      mem_barrier = NewLIR0(kX86Mfence);
-      ret = true;
-    }
-  } else if (barrier_kind == kNTStoreStore) {
-      mem_barrier = NewLIR0(kX86Sfence);
-      ret = true;
-  }
-
-  // Now ensure that a scheduling barrier is in place.
-  if (mem_barrier == nullptr) {
-    GenBarrier();
-  } else {
-    // Mark as a scheduling barrier.
-    DCHECK(!mem_barrier->flags.use_def_invalid);
-    mem_barrier->u.m.def_mask = &kEncodeAll;
-  }
-  return ret;
-}
-
-void X86Mir2Lir::CompilerInitializeRegAlloc() {
-  if (cu_->target64) {
-    reg_pool_.reset(new (arena_) RegisterPool(this, arena_, core_regs_64, core_regs_64q, sp_regs_64,
-                                              dp_regs_64, reserved_regs_64, reserved_regs_64q,
-                                              core_temps_64, core_temps_64q,
-                                              sp_temps_64, dp_temps_64));
-  } else {
-    reg_pool_.reset(new (arena_) RegisterPool(this, arena_, core_regs_32, empty_pool, sp_regs_32,
-                                              dp_regs_32, reserved_regs_32, empty_pool,
-                                              core_temps_32, empty_pool,
-                                              sp_temps_32, dp_temps_32));
-  }
-
-  // Target-specific adjustments.
-
-  // Add in XMM registers.
-  const ArrayRef<const RegStorage> *xp_regs = cu_->target64 ? &xp_regs_64 : &xp_regs_32;
-  for (RegStorage reg : *xp_regs) {
-    RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg));
-    reginfo_map_[reg.GetReg()] = info;
-  }
-  const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32;
-  for (RegStorage reg : *xp_temps) {
-    RegisterInfo* xp_reg_info = GetRegInfo(reg);
-    xp_reg_info->SetIsTemp(true);
-  }
-
-  // Special Handling for x86_64 RIP addressing.
-  if (cu_->target64) {
-    RegisterInfo* info = new (arena_) RegisterInfo(RegStorage(kRIPReg), kEncodeNone);
-    reginfo_map_[kRIPReg] = info;
-  }
-
-  // Alias single precision xmm to double xmms.
-  // TODO: as needed, add larger vector sizes - alias all to the largest.
-  for (RegisterInfo* info : reg_pool_->sp_regs_) {
-    int sp_reg_num = info->GetReg().GetRegNum();
-    RegStorage xp_reg = RegStorage::Solo128(sp_reg_num);
-    RegisterInfo* xp_reg_info = GetRegInfo(xp_reg);
-    // 128-bit xmm vector register's master storage should refer to itself.
-    DCHECK_EQ(xp_reg_info, xp_reg_info->Master());
-
-    // Redirect 32-bit vector's master storage to 128-bit vector.
-    info->SetMaster(xp_reg_info);
-
-    RegStorage dp_reg = RegStorage::FloatSolo64(sp_reg_num);
-    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
-    // Redirect 64-bit vector's master storage to 128-bit vector.
-    dp_reg_info->SetMaster(xp_reg_info);
-    // Singles should show a single 32-bit mask bit, at first referring to the low half.
-    DCHECK_EQ(info->StorageMask(), 0x1U);
-  }
-
-  if (cu_->target64) {
-    // Alias 32bit W registers to corresponding 64bit X registers.
-    for (RegisterInfo* info : reg_pool_->core_regs_) {
-      int x_reg_num = info->GetReg().GetRegNum();
-      RegStorage x_reg = RegStorage::Solo64(x_reg_num);
-      RegisterInfo* x_reg_info = GetRegInfo(x_reg);
-      // 64bit X register's master storage should refer to itself.
-      DCHECK_EQ(x_reg_info, x_reg_info->Master());
-      // Redirect 32bit W master storage to 64bit X.
-      info->SetMaster(x_reg_info);
-      // 32bit W should show a single 32-bit mask bit, at first referring to the low half.
-      DCHECK_EQ(info->StorageMask(), 0x1U);
-    }
-  }
-
-  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
-  // TODO: adjust for x86/hard float calling convention.
-  reg_pool_->next_core_reg_ = 2;
-  reg_pool_->next_sp_reg_ = 2;
-  reg_pool_->next_dp_reg_ = 1;
-}
-
-int X86Mir2Lir::VectorRegisterSize() {
-  return 128;
-}
-
-int X86Mir2Lir::NumReservableVectorRegisters(bool long_or_fp) {
-  int num_vector_temps = cu_->target64 ? xp_temps_64.size() : xp_temps_32.size();
-
-  // Leave a few temps for use by backend as scratch.
-  return long_or_fp ? num_vector_temps - 2 : num_vector_temps - 1;
-}
-
-static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
-  return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
-}
-
-static dwarf::Reg DwarfFpReg(bool is_x86_64, int num) {
-  return is_x86_64 ? dwarf::Reg::X86_64Fp(num) : dwarf::Reg::X86Fp(num);
-}
-
-void X86Mir2Lir::SpillCoreRegs() {
-  if (num_core_spills_ == 0) {
-    return;
-  }
-  // Spill mask not including fake return address register
-  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
-  int offset =
-      frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
-  OpSize size = cu_->target64 ? k64 : k32;
-  const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
-    if ((mask & 0x1) != 0u) {
-      DCHECK_NE(offset, 0) << "offset 0 should be for method";
-      RegStorage r_src = cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg);
-      StoreBaseDisp(rs_rSP, offset, r_src, size, kNotVolatile);
-      cfi_.RelOffset(DwarfCoreReg(cu_->target64, reg), offset);
-      offset += GetInstructionSetPointerSize(cu_->instruction_set);
-    }
-  }
-}
-
-void X86Mir2Lir::UnSpillCoreRegs() {
-  if (num_core_spills_ == 0) {
-    return;
-  }
-  // Spill mask not including fake return address register
-  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
-  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
-  OpSize size = cu_->target64 ? k64 : k32;
-  const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
-    if ((mask & 0x1) != 0u) {
-      RegStorage r_dest = cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg);
-      LoadBaseDisp(rs_rSP, offset, r_dest, size, kNotVolatile);
-      cfi_.Restore(DwarfCoreReg(cu_->target64, reg));
-      offset += GetInstructionSetPointerSize(cu_->instruction_set);
-    }
-  }
-}
-
-void X86Mir2Lir::SpillFPRegs() {
-  if (num_fp_spills_ == 0) {
-    return;
-  }
-  uint32_t mask = fp_spill_mask_;
-  int offset = frame_size_ -
-      (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
-  const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
-    if ((mask & 0x1) != 0u) {
-      StoreBaseDisp(rs_rSP, offset, RegStorage::FloatSolo64(reg), k64, kNotVolatile);
-      cfi_.RelOffset(DwarfFpReg(cu_->target64, reg), offset);
-      offset += sizeof(double);
-    }
-  }
-}
-void X86Mir2Lir::UnSpillFPRegs() {
-  if (num_fp_spills_ == 0) {
-    return;
-  }
-  uint32_t mask = fp_spill_mask_;
-  int offset = frame_size_ -
-      (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
-  const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
-    if ((mask & 0x1) != 0u) {
-      LoadBaseDisp(rs_rSP, offset, RegStorage::FloatSolo64(reg),
-                   k64, kNotVolatile);
-      cfi_.Restore(DwarfFpReg(cu_->target64, reg));
-      offset += sizeof(double);
-    }
-  }
-}
-
-
-bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) {
-  return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
-}
-
-RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
-  // Prefer XMM registers.  Fixes a problem with iget/iput to a FP when cached temporary
-  // with same VR is a Core register.
-  if (size == kSingle || size == kDouble) {
-    return kFPReg;
-  }
-
-  // X86_64 can handle any size.
-  if (cu_->target64) {
-    return RegClassBySize(size);
-  }
-
-  if (UNLIKELY(is_volatile)) {
-    // On x86, atomic 64-bit load/store requires an fp register.
-    // Smaller aligned load/store is atomic for both core and fp registers.
-    if (size == k64 || size == kDouble) {
-      return kFPReg;
-    }
-  }
-  return RegClassBySize(size);
-}
-
-X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
-    : Mir2Lir(cu, mir_graph, arena),
-      in_to_reg_storage_x86_64_mapper_(this), in_to_reg_storage_x86_mapper_(this),
-      pc_rel_base_reg_(RegStorage::InvalidReg()),
-      pc_rel_base_reg_used_(false),
-      setup_pc_rel_base_reg_(nullptr),
-      method_address_insns_(arena->Adapter()),
-      class_type_address_insns_(arena->Adapter()),
-      call_method_insns_(arena->Adapter()),
-      dex_cache_access_insns_(arena->Adapter()),
-      const_vectors_(nullptr) {
-  method_address_insns_.reserve(100);
-  class_type_address_insns_.reserve(100);
-  call_method_insns_.reserve(100);
-  for (int i = 0; i < kX86Last; i++) {
-    DCHECK_EQ(X86Mir2Lir::EncodingMap[i].opcode, i)
-        << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
-        << " is wrong: expecting " << i << ", seeing "
-        << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
-  }
-}
-
-Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
-                          ArenaAllocator* const arena) {
-  return new X86Mir2Lir(cu, mir_graph, arena);
-}
-
-// Not used in x86(-64)
-RegStorage X86Mir2Lir::LoadHelper(QuickEntrypointEnum trampoline ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
-  UNREACHABLE();
-}
-
-LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
-  // First load the pointer in fs:[suspend-trigger] into eax
-  // Then use a test instruction to indirect via that address.
-  if (cu_->target64) {
-    NewLIR2(kX86Mov64RT, rs_rAX.GetReg(),
-        Thread::ThreadSuspendTriggerOffset<8>().Int32Value());
-  } else {
-    NewLIR2(kX86Mov32RT, rs_rAX.GetReg(),
-        Thread::ThreadSuspendTriggerOffset<4>().Int32Value());
-  }
-  return NewLIR3(kX86Test32RM, rs_rAX.GetReg(), rs_rAX.GetReg(), 0);
-}
-
-uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
-  DCHECK(!IsPseudoLirOp(opcode));
-  return X86Mir2Lir::EncodingMap[opcode].flags;
-}
-
-const char* X86Mir2Lir::GetTargetInstName(int opcode) {
-  DCHECK(!IsPseudoLirOp(opcode));
-  return X86Mir2Lir::EncodingMap[opcode].name;
-}
-
-const char* X86Mir2Lir::GetTargetInstFmt(int opcode) {
-  DCHECK(!IsPseudoLirOp(opcode));
-  return X86Mir2Lir::EncodingMap[opcode].fmt;
-}
-
-void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
-  // Can we do this directly to memory?
-  rl_dest = UpdateLocWide(rl_dest);
-  if ((rl_dest.location == kLocDalvikFrame) ||
-      (rl_dest.location == kLocCompilerTemp)) {
-    int32_t val_lo = Low32Bits(value);
-    int32_t val_hi = High32Bits(value);
-    int r_base = rs_rX86_SP_32.GetReg();
-    int displacement = SRegOffset(rl_dest.s_reg_low);
-
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo);
-    AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
-                              false /* is_load */, true /* is64bit */);
-    store = NewLIR3(kX86Mov32MI, r_base, displacement + HIWORD_OFFSET, val_hi);
-    AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2,
-                              false /* is_load */, true /* is64bit */);
-    return;
-  }
-
-  // Just use the standard code to do the generation.
-  Mir2Lir::GenConstWide(rl_dest, value);
-}
-
-// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
-void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
-  LOG(INFO)  << "location: " << loc.location << ','
-             << (loc.wide ? " w" : "  ")
-             << (loc.defined ? " D" : "  ")
-             << (loc.is_const ? " c" : "  ")
-             << (loc.fp ? " F" : "  ")
-             << (loc.core ? " C" : "  ")
-             << (loc.ref ? " r" : "  ")
-             << (loc.high_word ? " h" : "  ")
-             << (loc.home ? " H" : "  ")
-             << ", low: " << static_cast<int>(loc.reg.GetLowReg())
-             << ", high: " << static_cast<int>(loc.reg.GetHighReg())
-             << ", s_reg: " << loc.s_reg_low
-             << ", orig: " << loc.orig_sreg;
-}
-
-void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
-                                   SpecialTargetRegister symbolic_reg) {
-  /*
-   * For x86, just generate a 32 bit move immediate instruction, that will be filled
-   * in at 'link time'.  For now, put a unique value based on target to ensure that
-   * code deduplication works.
-   */
-  int target_method_idx = target_method.dex_method_index;
-  const DexFile* target_dex_file = target_method.dex_file;
-  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
-  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
-
-  // Generate the move instruction with the unique pointer and save index, dex_file, and type.
-  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI,
-                     TargetReg(symbolic_reg, kNotWide).GetReg(),
-                     static_cast<int>(target_method_id_ptr), target_method_idx,
-                     WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
-  AppendLIR(move);
-  method_address_insns_.push_back(move);
-}
-
-void X86Mir2Lir::LoadClassType(const DexFile& dex_file, uint32_t type_idx,
-                               SpecialTargetRegister symbolic_reg) {
-  /*
-   * For x86, just generate a 32 bit move immediate instruction, that will be filled
-   * in at 'link time'.  For now, put a unique value based on target to ensure that
-   * code deduplication works.
-   */
-  const DexFile::TypeId& id = dex_file.GetTypeId(type_idx);
-  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
-
-  // Generate the move instruction with the unique pointer and save index and type.
-  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI,
-                     TargetReg(symbolic_reg, kNotWide).GetReg(),
-                     static_cast<int>(ptr), type_idx,
-                     WrapPointer(const_cast<DexFile*>(&dex_file)));
-  AppendLIR(move);
-  class_type_address_insns_.push_back(move);
-}
-
-LIR* X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
-  /*
-   * For x86, just generate a 32 bit call relative instruction, that will be filled
-   * in at 'link time'.
-   */
-  int target_method_idx = target_method.dex_method_index;
-  const DexFile* target_dex_file = target_method.dex_file;
-
-  // Generate the call instruction with the unique pointer and save index, dex_file, and type.
-  // NOTE: Method deduplication takes linker patches into account, so we can just pass 0
-  // as a placeholder for the offset.
-  LIR* call = RawLIR(current_dalvik_offset_, kX86CallI, 0,
-                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
-  AppendLIR(call);
-  call_method_insns_.push_back(call);
-  return call;
-}
-
-static LIR* GenInvokeNoInlineCall(Mir2Lir* mir_to_lir, InvokeType type) {
-  QuickEntrypointEnum trampoline;
-  switch (type) {
-    case kInterface:
-      trampoline = kQuickInvokeInterfaceTrampolineWithAccessCheck;
-      break;
-    case kDirect:
-      trampoline = kQuickInvokeDirectTrampolineWithAccessCheck;
-      break;
-    case kStatic:
-      trampoline = kQuickInvokeStaticTrampolineWithAccessCheck;
-      break;
-    case kSuper:
-      trampoline = kQuickInvokeSuperTrampolineWithAccessCheck;
-      break;
-    case kVirtual:
-      trampoline = kQuickInvokeVirtualTrampolineWithAccessCheck;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected invoke type";
-      trampoline = kQuickInvokeInterfaceTrampolineWithAccessCheck;
-  }
-  return mir_to_lir->InvokeTrampoline(kOpBlx, RegStorage::InvalidReg(), trampoline);
-}
-
-LIR* X86Mir2Lir::GenCallInsn(const MirMethodLoweringInfo& method_info) {
-  LIR* call_insn;
-  if (method_info.FastPath()) {
-    if (method_info.DirectCode() == static_cast<uintptr_t>(-1)) {
-      // We can have the linker fixup a call relative.
-      call_insn = CallWithLinkerFixup(method_info.GetTargetMethod(), method_info.GetSharpType());
-    } else {
-      call_insn = OpMem(kOpBlx, TargetReg(kArg0, kRef),
-                        ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                            cu_->target64 ? 8 : 4).Int32Value());
-    }
-  } else {
-    call_insn = GenInvokeNoInlineCall(this, method_info.GetSharpType());
-  }
-  return call_insn;
-}
-
-void X86Mir2Lir::InstallLiteralPools() {
-  // These are handled differently for x86.
-  DCHECK(code_literal_list_ == nullptr);
-  DCHECK(method_literal_list_ == nullptr);
-  DCHECK(class_literal_list_ == nullptr);
-
-
-  if (const_vectors_ != nullptr) {
-    // Vector literals must be 16-byte aligned. The header that is placed
-    // in the code section causes misalignment so we take it into account.
-    // Otherwise, we are sure that for x86 method is aligned to 16.
-    DCHECK_EQ(GetInstructionSetAlignment(cu_->instruction_set), 16u);
-    uint32_t bytes_to_fill = (0x10 - ((code_buffer_.size() + sizeof(OatQuickMethodHeader)) & 0xF)) & 0xF;
-    while (bytes_to_fill > 0) {
-      code_buffer_.push_back(0);
-      bytes_to_fill--;
-    }
-
-    for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
-      Push32(&code_buffer_, p->operands[0]);
-      Push32(&code_buffer_, p->operands[1]);
-      Push32(&code_buffer_, p->operands[2]);
-      Push32(&code_buffer_, p->operands[3]);
-    }
-  }
-
-  patches_.reserve(method_address_insns_.size() + class_type_address_insns_.size() +
-                   call_method_insns_.size() + dex_cache_access_insns_.size());
-
-  // Handle the fixups for methods.
-  for (LIR* p : method_address_insns_) {
-      DCHECK_EQ(p->opcode, kX86Mov32RI);
-      uint32_t target_method_idx = p->operands[2];
-      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[3]);
-
-      // The offset to patch is the last 4 bytes of the instruction.
-      int patch_offset = p->offset + p->flags.size - 4;
-      patches_.push_back(LinkerPatch::MethodPatch(patch_offset,
-                                                  target_dex_file, target_method_idx));
-  }
-
-  // Handle the fixups for class types.
-  for (LIR* p : class_type_address_insns_) {
-      DCHECK_EQ(p->opcode, kX86Mov32RI);
-
-      const DexFile* class_dex_file = UnwrapPointer<DexFile>(p->operands[3]);
-      uint32_t target_type_idx = p->operands[2];
-
-      // The offset to patch is the last 4 bytes of the instruction.
-      int patch_offset = p->offset + p->flags.size - 4;
-      patches_.push_back(LinkerPatch::TypePatch(patch_offset,
-                                                class_dex_file, target_type_idx));
-  }
-
-  // And now the PC-relative calls to methods.
-  for (LIR* p : call_method_insns_) {
-      DCHECK_EQ(p->opcode, kX86CallI);
-      uint32_t target_method_idx = p->operands[1];
-      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
-
-      // The offset to patch is the last 4 bytes of the instruction.
-      int patch_offset = p->offset + p->flags.size - 4;
-      patches_.push_back(LinkerPatch::RelativeCodePatch(patch_offset,
-                                                        target_dex_file, target_method_idx));
-  }
-
-  // PC-relative references to dex cache arrays.
-  for (LIR* p : dex_cache_access_insns_) {
-    DCHECK(p->opcode == kX86Mov32RM || p->opcode == kX86Mov64RM);
-    const DexFile* dex_file = UnwrapPointer<DexFile>(p->operands[3]);
-    uint32_t offset = p->operands[4];
-    // The offset to patch is the last 4 bytes of the instruction.
-    int patch_offset = p->offset + p->flags.size - 4;
-    DCHECK(!p->flags.is_nop);
-    patches_.push_back(LinkerPatch::DexCacheArrayPatch(patch_offset, dex_file,
-                                                       p->target->offset, offset));
-  }
-
-  // And do the normal processing.
-  Mir2Lir::InstallLiteralPools();
-}
-
-bool X86Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) {
-  RegLocation rl_src = info->args[0];
-  RegLocation rl_srcPos = info->args[1];
-  RegLocation rl_dst = info->args[2];
-  RegLocation rl_dstPos = info->args[3];
-  RegLocation rl_length = info->args[4];
-  if (rl_srcPos.is_const && (mir_graph_->ConstantValue(rl_srcPos) < 0)) {
-    return false;
-  }
-  if (rl_dstPos.is_const && (mir_graph_->ConstantValue(rl_dstPos) < 0)) {
-    return false;
-  }
-  ClobberCallerSave();
-  LockCallTemps();  // Using fixed registers.
-  RegStorage tmp_reg = cu_->target64 ? rs_r11 : rs_rBX;
-  LoadValueDirectFixed(rl_src, rs_rAX);
-  LoadValueDirectFixed(rl_dst, rs_rCX);
-  LIR* src_dst_same  = OpCmpBranch(kCondEq, rs_rAX, rs_rCX, nullptr);
-  LIR* src_null_branch = OpCmpImmBranch(kCondEq, rs_rAX, 0, nullptr);
-  LIR* dst_null_branch = OpCmpImmBranch(kCondEq, rs_rCX, 0, nullptr);
-  LoadValueDirectFixed(rl_length, rs_rDX);
-  // If the length of the copy is > 128 characters (256 bytes) or negative then go slow path.
-  LIR* len_too_big  = OpCmpImmBranch(kCondHi, rs_rDX, 128, nullptr);
-  LoadValueDirectFixed(rl_src, rs_rAX);
-  LoadWordDisp(rs_rAX, mirror::Array::LengthOffset().Int32Value(), rs_rAX);
-  LIR* src_bad_len  = nullptr;
-  LIR* src_bad_off = nullptr;
-  LIR* srcPos_negative  = nullptr;
-  if (!rl_srcPos.is_const) {
-    LoadValueDirectFixed(rl_srcPos, tmp_reg);
-    srcPos_negative  = OpCmpImmBranch(kCondLt, tmp_reg, 0, nullptr);
-    // src_pos < src_len
-    src_bad_off = OpCmpBranch(kCondLt, rs_rAX, tmp_reg, nullptr);
-    // src_len - src_pos < copy_len
-    OpRegRegReg(kOpSub, tmp_reg, rs_rAX, tmp_reg);
-    src_bad_len = OpCmpBranch(kCondLt, tmp_reg, rs_rDX, nullptr);
-  } else {
-    int32_t pos_val = mir_graph_->ConstantValue(rl_srcPos.orig_sreg);
-    if (pos_val == 0) {
-      src_bad_len  = OpCmpBranch(kCondLt, rs_rAX, rs_rDX, nullptr);
-    } else {
-      // src_pos < src_len
-      src_bad_off = OpCmpImmBranch(kCondLt, rs_rAX, pos_val, nullptr);
-      // src_len - src_pos < copy_len
-      OpRegRegImm(kOpSub, tmp_reg, rs_rAX, pos_val);
-      src_bad_len = OpCmpBranch(kCondLt, tmp_reg, rs_rDX, nullptr);
-    }
-  }
-  LIR* dstPos_negative = nullptr;
-  LIR* dst_bad_len = nullptr;
-  LIR* dst_bad_off = nullptr;
-  LoadValueDirectFixed(rl_dst, rs_rAX);
-  LoadWordDisp(rs_rAX, mirror::Array::LengthOffset().Int32Value(), rs_rAX);
-  if (!rl_dstPos.is_const) {
-    LoadValueDirectFixed(rl_dstPos, tmp_reg);
-    dstPos_negative = OpCmpImmBranch(kCondLt, tmp_reg, 0, nullptr);
-    // dst_pos < dst_len
-    dst_bad_off = OpCmpBranch(kCondLt, rs_rAX, tmp_reg, nullptr);
-    // dst_len - dst_pos < copy_len
-    OpRegRegReg(kOpSub, tmp_reg, rs_rAX, tmp_reg);
-    dst_bad_len = OpCmpBranch(kCondLt, tmp_reg, rs_rDX, nullptr);
-  } else {
-    int32_t pos_val = mir_graph_->ConstantValue(rl_dstPos.orig_sreg);
-    if (pos_val == 0) {
-      dst_bad_len = OpCmpBranch(kCondLt, rs_rAX, rs_rDX, nullptr);
-    } else {
-      // dst_pos < dst_len
-      dst_bad_off = OpCmpImmBranch(kCondLt, rs_rAX, pos_val, nullptr);
-      // dst_len - dst_pos < copy_len
-      OpRegRegImm(kOpSub, tmp_reg, rs_rAX, pos_val);
-      dst_bad_len = OpCmpBranch(kCondLt, tmp_reg, rs_rDX, nullptr);
-    }
-  }
-  // Everything is checked now.
-  LoadValueDirectFixed(rl_src, rs_rAX);
-  LoadValueDirectFixed(rl_dst, tmp_reg);
-  LoadValueDirectFixed(rl_srcPos, rs_rCX);
-  NewLIR5(kX86Lea32RA, rs_rAX.GetReg(), rs_rAX.GetReg(),
-       rs_rCX.GetReg(), 1, mirror::Array::DataOffset(2).Int32Value());
-  // RAX now holds the address of the first src element to be copied.
-
-  LoadValueDirectFixed(rl_dstPos, rs_rCX);
-  NewLIR5(kX86Lea32RA, tmp_reg.GetReg(), tmp_reg.GetReg(),
-       rs_rCX.GetReg(), 1, mirror::Array::DataOffset(2).Int32Value() );
-  // RBX now holds the address of the first dst element to be copied.
-
-  // Check if the number of elements to be copied is odd or even. If odd
-  // then copy the first element (so that the remaining number of elements
-  // is even).
-  LoadValueDirectFixed(rl_length, rs_rCX);
-  OpRegImm(kOpAnd, rs_rCX, 1);
-  LIR* jmp_to_begin_loop  = OpCmpImmBranch(kCondEq, rs_rCX, 0, nullptr);
-  OpRegImm(kOpSub, rs_rDX, 1);
-  LoadBaseIndexedDisp(rs_rAX, rs_rDX, 1, 0, rs_rCX, kSignedHalf);
-  StoreBaseIndexedDisp(tmp_reg, rs_rDX, 1, 0, rs_rCX, kSignedHalf);
-
-  // Since the remaining number of elements is even, we will copy by
-  // two elements at a time.
-  LIR* beginLoop = NewLIR0(kPseudoTargetLabel);
-  LIR* jmp_to_ret  = OpCmpImmBranch(kCondEq, rs_rDX, 0, nullptr);
-  OpRegImm(kOpSub, rs_rDX, 2);
-  LoadBaseIndexedDisp(rs_rAX, rs_rDX, 1, 0, rs_rCX, kSingle);
-  StoreBaseIndexedDisp(tmp_reg, rs_rDX, 1, 0, rs_rCX, kSingle);
-  OpUnconditionalBranch(beginLoop);
-  LIR *check_failed = NewLIR0(kPseudoTargetLabel);
-  LIR* launchpad_branch  = OpUnconditionalBranch(nullptr);
-  LIR *return_point = NewLIR0(kPseudoTargetLabel);
-  jmp_to_ret->target = return_point;
-  jmp_to_begin_loop->target = beginLoop;
-  src_dst_same->target = check_failed;
-  len_too_big->target = check_failed;
-  src_null_branch->target = check_failed;
-  if (srcPos_negative != nullptr)
-    srcPos_negative ->target = check_failed;
-  if (src_bad_off != nullptr)
-    src_bad_off->target = check_failed;
-  if (src_bad_len != nullptr)
-    src_bad_len->target = check_failed;
-  dst_null_branch->target = check_failed;
-  if (dstPos_negative != nullptr)
-    dstPos_negative->target = check_failed;
-  if (dst_bad_off != nullptr)
-    dst_bad_off->target = check_failed;
-  if (dst_bad_len != nullptr)
-    dst_bad_len->target = check_failed;
-  AddIntrinsicSlowPath(info, launchpad_branch, return_point);
-  ClobberCallerSave();  // We must clobber everything because slow path will return here
-  return true;
-}
-
-
-/*
- * Fast string.index_of(I) & (II).  Inline check for simple case of char <= 0xffff,
- * otherwise bails to standard library code.
- */
-bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
-  RegLocation rl_obj = info->args[0];
-  RegLocation rl_char = info->args[1];
-  RegLocation rl_start;  // Note: only present in III flavor or IndexOf.
-  // RBX is promotable in 64-bit mode.
-  RegStorage rs_tmp = cu_->target64 ? rs_r11 : rs_rBX;
-  int start_value = -1;
-
-  uint32_t char_value =
-    rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0;
-
-  if (char_value > 0xFFFF) {
-    // We have to punt to the real String.indexOf.
-    return false;
-  }
-
-  // Okay, we are commited to inlining this.
-  // EAX: 16 bit character being searched.
-  // ECX: count: number of words to be searched.
-  // EDI: String being searched.
-  // EDX: temporary during execution.
-  // EBX or R11: temporary during execution (depending on mode).
-  // REP SCASW: search instruction.
-
-  FlushAllRegs();
-
-  RegLocation rl_return = GetReturn(kCoreReg);
-  RegLocation rl_dest = InlineTarget(info);
-
-  // Is the string non-null?
-  LoadValueDirectFixed(rl_obj, rs_rDX);
-  GenNullCheck(rs_rDX, info->opt_flags);
-  info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
-
-  LIR *slowpath_branch = nullptr, *length_compare = nullptr;
-
-  // We need the value in EAX.
-  if (rl_char.is_const) {
-    LoadConstantNoClobber(rs_rAX, char_value);
-  } else {
-    // Does the character fit in 16 bits? Compare it at runtime.
-    LoadValueDirectFixed(rl_char, rs_rAX);
-    slowpath_branch = OpCmpImmBranch(kCondGt, rs_rAX, 0xFFFF, nullptr);
-  }
-
-  // From here down, we know that we are looking for a char that fits in 16 bits.
-  // Location of reference to data array within the String object.
-  int value_offset = mirror::String::ValueOffset().Int32Value();
-  // Location of count within the String object.
-  int count_offset = mirror::String::CountOffset().Int32Value();
-
-  // Compute the number of words to search in to rCX.
-  Load32Disp(rs_rDX, count_offset, rs_rCX);
-
-  // Possible signal here due to null pointer dereference.
-  // Note that the signal handler will expect the top word of
-  // the stack to be the ArtMethod*.  If the PUSH edi instruction
-  // below is ahead of the load above then this will not be true
-  // and the signal handler will not work.
-  MarkPossibleNullPointerException(0);
-
-  if (!cu_->target64) {
-    // EDI is promotable in 32-bit mode.
-    NewLIR1(kX86Push32R, rs_rDI.GetReg());
-    cfi_.AdjustCFAOffset(4);
-    // Record cfi only if it is not already spilled.
-    if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
-      cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0);
-    }
-  }
-
-  if (zero_based) {
-    // Start index is not present.
-    // We have to handle an empty string.  Use special instruction JECXZ.
-    length_compare = NewLIR0(kX86Jecxz8);
-
-    // Copy the number of words to search in a temporary register.
-    // We will use the register at the end to calculate result.
-    OpRegReg(kOpMov, rs_tmp, rs_rCX);
-  } else {
-    // Start index is present.
-    rl_start = info->args[2];
-
-    // We have to offset by the start index.
-    if (rl_start.is_const) {
-      start_value = mir_graph_->ConstantValue(rl_start.orig_sreg);
-      start_value = std::max(start_value, 0);
-
-      // Is the start > count?
-      length_compare = OpCmpImmBranch(kCondLe, rs_rCX, start_value, nullptr);
-      OpRegImm(kOpMov, rs_rDI, start_value);
-
-      // Copy the number of words to search in a temporary register.
-      // We will use the register at the end to calculate result.
-      OpRegReg(kOpMov, rs_tmp, rs_rCX);
-
-      if (start_value != 0) {
-        // Decrease the number of words to search by the start index.
-        OpRegImm(kOpSub, rs_rCX, start_value);
-      }
-    } else {
-      // Handle "start index < 0" case.
-      if (!cu_->target64 && rl_start.location != kLocPhysReg) {
-        // Load the start index from stack, remembering that we pushed EDI.
-        int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        Load32Disp(rs_rX86_SP_32, displacement, rs_rDI);
-        // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
-        DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
-        int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - 1;
-        AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
-      } else {
-        LoadValueDirectFixed(rl_start, rs_rDI);
-      }
-      OpRegReg(kOpXor, rs_tmp, rs_tmp);
-      OpRegReg(kOpCmp, rs_rDI, rs_tmp);
-      OpCondRegReg(kOpCmov, kCondLt, rs_rDI, rs_tmp);
-
-      // The length of the string should be greater than the start index.
-      length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rDI, nullptr);
-
-      // Copy the number of words to search in a temporary register.
-      // We will use the register at the end to calculate result.
-      OpRegReg(kOpMov, rs_tmp, rs_rCX);
-
-      // Decrease the number of words to search by the start index.
-      OpRegReg(kOpSub, rs_rCX, rs_rDI);
-    }
-  }
-
-  // Load the address of the string into EDI.
-  // In case of start index we have to add the address to existing value in EDI.
-  if (zero_based || (!zero_based && rl_start.is_const && start_value == 0)) {
-    OpRegRegImm(kOpAdd, rs_rDI, rs_rDX, value_offset);
-  } else {
-    OpRegImm(kOpLsl, rs_rDI, 1);
-    OpRegReg(kOpAdd, rs_rDI, rs_rDX);
-    OpRegImm(kOpAdd, rs_rDI, value_offset);
-  }
-
-  // EDI now contains the start of the string to be searched.
-  // We are all prepared to do the search for the character.
-  NewLIR0(kX86RepneScasw);
-
-  // Did we find a match?
-  LIR* failed_branch = OpCondBranch(kCondNe, nullptr);
-
-  // yes, we matched.  Compute the index of the result.
-  OpRegReg(kOpSub, rs_tmp, rs_rCX);
-  NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_tmp.GetReg(), -1);
-
-  LIR *all_done = NewLIR1(kX86Jmp8, 0);
-
-  // Failed to match; return -1.
-  LIR *not_found = NewLIR0(kPseudoTargetLabel);
-  length_compare->target = not_found;
-  failed_branch->target = not_found;
-  LoadConstantNoClobber(rl_return.reg, -1);
-
-  // And join up at the end.
-  all_done->target = NewLIR0(kPseudoTargetLabel);
-
-  if (!cu_->target64) {
-    NewLIR1(kX86Pop32R, rs_rDI.GetReg());
-    cfi_.AdjustCFAOffset(-4);
-    if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
-      cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()));
-    }
-  }
-
-  // Out of line code returns here.
-  if (slowpath_branch != nullptr) {
-    LIR *return_point = NewLIR0(kPseudoTargetLabel);
-    AddIntrinsicSlowPath(info, slowpath_branch, return_point);
-    ClobberCallerSave();  // We must clobber everything because slow path will return here
-  }
-
-  StoreValue(rl_dest, rl_return);
-  return true;
-}
-
-void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
-  switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
-    case kMirOpReserveVectorRegisters:
-      ReserveVectorRegisters(mir);
-      break;
-    case kMirOpReturnVectorRegisters:
-      ReturnVectorRegisters(mir);
-      break;
-    case kMirOpConstVector:
-      GenConst128(mir);
-      break;
-    case kMirOpMoveVector:
-      GenMoveVector(mir);
-      break;
-    case kMirOpPackedMultiply:
-      GenMultiplyVector(mir);
-      break;
-    case kMirOpPackedAddition:
-      GenAddVector(mir);
-      break;
-    case kMirOpPackedSubtract:
-      GenSubtractVector(mir);
-      break;
-    case kMirOpPackedShiftLeft:
-      GenShiftLeftVector(mir);
-      break;
-    case kMirOpPackedSignedShiftRight:
-      GenSignedShiftRightVector(mir);
-      break;
-    case kMirOpPackedUnsignedShiftRight:
-      GenUnsignedShiftRightVector(mir);
-      break;
-    case kMirOpPackedAnd:
-      GenAndVector(mir);
-      break;
-    case kMirOpPackedOr:
-      GenOrVector(mir);
-      break;
-    case kMirOpPackedXor:
-      GenXorVector(mir);
-      break;
-    case kMirOpPackedAddReduce:
-      GenAddReduceVector(mir);
-      break;
-    case kMirOpPackedReduce:
-      GenReduceVector(mir);
-      break;
-    case kMirOpPackedSet:
-      GenSetVector(mir);
-      break;
-    case kMirOpMemBarrier:
-      GenMemBarrier(static_cast<MemBarrierKind>(mir->dalvikInsn.vA));
-      break;
-    case kMirOpPackedArrayGet:
-      GenPackedArrayGet(bb, mir);
-      break;
-    case kMirOpPackedArrayPut:
-      GenPackedArrayPut(bb, mir);
-      break;
-    default:
-      break;
-  }
-}
-
-void X86Mir2Lir::ReserveVectorRegisters(MIR* mir) {
-  for (uint32_t i = mir->dalvikInsn.vA; i <= mir->dalvikInsn.vB; i++) {
-    RegStorage xp_reg = RegStorage::Solo128(i);
-    RegisterInfo *xp_reg_info = GetRegInfo(xp_reg);
-    Clobber(xp_reg);
-
-    for (RegisterInfo *info = xp_reg_info->GetAliasChain();
-                       info != nullptr;
-                       info = info->GetAliasChain()) {
-      ArenaVector<RegisterInfo*>* regs =
-          info->GetReg().IsSingle() ? &reg_pool_->sp_regs_ : &reg_pool_->dp_regs_;
-      auto it = std::find(regs->begin(), regs->end(), info);
-      DCHECK(it != regs->end());
-      regs->erase(it);
-    }
-  }
-}
-
-void X86Mir2Lir::ReturnVectorRegisters(MIR* mir) {
-  for (uint32_t i = mir->dalvikInsn.vA; i <= mir->dalvikInsn.vB; i++) {
-    RegStorage xp_reg = RegStorage::Solo128(i);
-    RegisterInfo *xp_reg_info = GetRegInfo(xp_reg);
-
-    for (RegisterInfo *info = xp_reg_info->GetAliasChain();
-                       info != nullptr;
-                       info = info->GetAliasChain()) {
-      if (info->GetReg().IsSingle()) {
-        reg_pool_->sp_regs_.push_back(info);
-      } else {
-        reg_pool_->dp_regs_.push_back(info);
-      }
-    }
-  }
-}
-
-void X86Mir2Lir::GenConst128(MIR* mir) {
-  RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vA);
-  Clobber(rs_dest);
-
-  uint32_t *args = mir->dalvikInsn.arg;
-  int reg = rs_dest.GetReg();
-  // Check for all 0 case.
-  if (args[0] == 0 && args[1] == 0 && args[2] == 0 && args[3] == 0) {
-    NewLIR2(kX86XorpsRR, reg, reg);
-    return;
-  }
-
-  // Append the mov const vector to reg opcode.
-  AppendOpcodeWithConst(kX86MovdqaRM, reg, mir);
-}
-
-void X86Mir2Lir::AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir) {
-  // To deal with correct memory ordering, reverse order of constants.
-  int32_t constants[4];
-  constants[3] = mir->dalvikInsn.arg[0];
-  constants[2] = mir->dalvikInsn.arg[1];
-  constants[1] = mir->dalvikInsn.arg[2];
-  constants[0] = mir->dalvikInsn.arg[3];
-
-  // Search if there is already a constant in pool with this value.
-  LIR *data_target = ScanVectorLiteral(constants);
-  if (data_target == nullptr) {
-    data_target = AddVectorLiteral(constants);
-  }
-
-  // Load the proper value from the literal area.
-  // We don't know the proper offset for the value, so pick one that will force
-  // 4 byte offset.  We will fix this up in the assembler later to have the
-  // right value.
-  LIR* load;
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  if (cu_->target64) {
-    load = NewLIR3(opcode, reg, kRIPReg, kDummy32BitOffset);
-  } else {
-    // Get the PC to a register and get the anchor.
-    LIR* anchor;
-    RegStorage r_pc = GetPcAndAnchor(&anchor);
-
-    load = NewLIR3(opcode, reg, r_pc.GetReg(), kDummy32BitOffset);
-    load->operands[4] = WrapPointer(anchor);
-    if (IsTemp(r_pc)) {
-      FreeTemp(r_pc);
-    }
-  }
-  load->flags.fixup = kFixupLoad;
-  load->target = data_target;
-}
-
-void X86Mir2Lir::GenMoveVector(MIR* mir) {
-  // We only support 128 bit registers.
-  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
-  RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vA);
-  Clobber(rs_dest);
-  RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vB);
-  NewLIR2(kX86MovdqaRR, rs_dest.GetReg(), rs_src.GetReg());
-}
-
-void X86Mir2Lir::GenMultiplyVectorSignedByte(RegStorage rs_dest_src1, RegStorage rs_src2) {
-  /*
-   * Emulate the behavior of a kSignedByte by separating out the 16 values in the two XMM
-   * and multiplying 8 at a time before recombining back into one XMM register.
-   *
-   *   let xmm1, xmm2 be real srcs (keep low bits of 16bit lanes)
-   *       xmm3 is tmp             (operate on high bits of 16bit lanes)
-   *
-   *    xmm3 = xmm1
-   *    xmm1 = xmm1 .* xmm2
-   *    xmm1 = xmm1 & 0x00ff00ff00ff00ff00ff00ff00ff00ff  // xmm1 now has low bits
-   *    xmm3 = xmm3 .>> 8
-   *    xmm2 = xmm2 & 0xff00ff00ff00ff00ff00ff00ff00ff00
-   *    xmm2 = xmm2 .* xmm3                               // xmm2 now has high bits
-   *    xmm1 = xmm1 | xmm2                                // combine results
-   */
-
-  // Copy xmm1.
-  RegStorage rs_src1_high_tmp = Get128BitRegister(AllocTempDouble());
-  RegStorage rs_dest_high_tmp = Get128BitRegister(AllocTempDouble());
-  NewLIR2(kX86MovdqaRR, rs_src1_high_tmp.GetReg(), rs_src2.GetReg());
-  NewLIR2(kX86MovdqaRR, rs_dest_high_tmp.GetReg(), rs_dest_src1.GetReg());
-
-  // Multiply low bits.
-  // x7 *= x3
-  NewLIR2(kX86PmullwRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
-
-  // xmm1 now has low bits.
-  AndMaskVectorRegister(rs_dest_src1, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF);
-
-  // Prepare high bits for multiplication.
-  NewLIR2(kX86PsrlwRI, rs_src1_high_tmp.GetReg(), 0x8);
-  AndMaskVectorRegister(rs_dest_high_tmp,  0xFF00FF00, 0xFF00FF00, 0xFF00FF00, 0xFF00FF00);
-
-  // Multiply high bits and xmm2 now has high bits.
-  NewLIR2(kX86PmullwRR, rs_src1_high_tmp.GetReg(), rs_dest_high_tmp.GetReg());
-
-  // Combine back into dest XMM register.
-  NewLIR2(kX86PorRR, rs_dest_src1.GetReg(), rs_src1_high_tmp.GetReg());
-}
-
-void X86Mir2Lir::GenMultiplyVectorLong(RegStorage rs_dest_src1, RegStorage rs_src2) {
-  /*
-   * We need to emulate the packed long multiply.
-   * For kMirOpPackedMultiply xmm1, xmm0:
-   * - xmm1 is src/dest
-   * - xmm0 is src
-   * - Get xmm2 and xmm3 as temp
-   * - Idea is to multiply the lower 32 of each operand with the higher 32 of the other.
-   * - Then add the two results.
-   * - Move it to the upper 32 of the destination
-   * - Then multiply the lower 32-bits of the operands and add the result to the destination.
-   *
-   * (op     dest   src )
-   * movdqa  %xmm2, %xmm1
-   * movdqa  %xmm3, %xmm0
-   * psrlq   %xmm3, $0x20
-   * pmuludq %xmm3, %xmm2
-   * psrlq   %xmm1, $0x20
-   * pmuludq %xmm1, %xmm0
-   * paddq   %xmm1, %xmm3
-   * psllq   %xmm1, $0x20
-   * pmuludq %xmm2, %xmm0
-   * paddq   %xmm1, %xmm2
-   *
-   * When both the operands are the same, then we need to calculate the lower-32 * higher-32
-   * calculation only once. Thus we don't need the xmm3 temp above. That sequence becomes:
-   *
-   * (op     dest   src )
-   * movdqa  %xmm2, %xmm1
-   * psrlq   %xmm1, $0x20
-   * pmuludq %xmm1, %xmm0
-   * paddq   %xmm1, %xmm1
-   * psllq   %xmm1, $0x20
-   * pmuludq %xmm2, %xmm0
-   * paddq   %xmm1, %xmm2
-   *
-   */
-
-  bool both_operands_same = (rs_dest_src1.GetReg() == rs_src2.GetReg());
-
-  RegStorage rs_tmp_vector_1;
-  RegStorage rs_tmp_vector_2;
-  rs_tmp_vector_1 = Get128BitRegister(AllocTempDouble());
-  NewLIR2(kX86MovdqaRR, rs_tmp_vector_1.GetReg(), rs_dest_src1.GetReg());
-
-  if (both_operands_same == false) {
-    rs_tmp_vector_2 = Get128BitRegister(AllocTempDouble());
-    NewLIR2(kX86MovdqaRR, rs_tmp_vector_2.GetReg(), rs_src2.GetReg());
-    NewLIR2(kX86PsrlqRI, rs_tmp_vector_2.GetReg(), 0x20);
-    NewLIR2(kX86PmuludqRR, rs_tmp_vector_2.GetReg(), rs_tmp_vector_1.GetReg());
-  }
-
-  NewLIR2(kX86PsrlqRI, rs_dest_src1.GetReg(), 0x20);
-  NewLIR2(kX86PmuludqRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
-
-  if (both_operands_same == false) {
-    NewLIR2(kX86PaddqRR, rs_dest_src1.GetReg(), rs_tmp_vector_2.GetReg());
-  } else {
-    NewLIR2(kX86PaddqRR, rs_dest_src1.GetReg(), rs_dest_src1.GetReg());
-  }
-
-  NewLIR2(kX86PsllqRI, rs_dest_src1.GetReg(), 0x20);
-  NewLIR2(kX86PmuludqRR, rs_tmp_vector_1.GetReg(), rs_src2.GetReg());
-  NewLIR2(kX86PaddqRR, rs_dest_src1.GetReg(), rs_tmp_vector_1.GetReg());
-}
-
-void X86Mir2Lir::GenMultiplyVector(MIR* mir) {
-  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
-  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
-  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
-  Clobber(rs_dest_src1);
-  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
-  int opcode = 0;
-  switch (opsize) {
-    case k32:
-      opcode = kX86PmulldRR;
-      break;
-    case kSignedHalf:
-      opcode = kX86PmullwRR;
-      break;
-    case kSingle:
-      opcode = kX86MulpsRR;
-      break;
-    case kDouble:
-      opcode = kX86MulpdRR;
-      break;
-    case kSignedByte:
-      // HW doesn't support 16x16 byte multiplication so emulate it.
-      GenMultiplyVectorSignedByte(rs_dest_src1, rs_src2);
-      return;
-    case k64:
-      GenMultiplyVectorLong(rs_dest_src1, rs_src2);
-      return;
-    default:
-      LOG(FATAL) << "Unsupported vector multiply " << opsize;
-      break;
-  }
-  NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg());
-}
-
-void X86Mir2Lir::GenAddVector(MIR* mir) {
-  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
-  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
-  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
-  Clobber(rs_dest_src1);
-  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
-  int opcode = 0;
-  switch (opsize) {
-    case k32:
-      opcode = kX86PadddRR;
-      break;
-    case k64:
-      opcode = kX86PaddqRR;
-      break;
-    case kSignedHalf:
-    case kUnsignedHalf:
-      opcode = kX86PaddwRR;
-      break;
-    case kUnsignedByte:
-    case kSignedByte:
-      opcode = kX86PaddbRR;
-      break;
-    case kSingle:
-      opcode = kX86AddpsRR;
-      break;
-    case kDouble:
-      opcode = kX86AddpdRR;
-      break;
-    default:
-      LOG(FATAL) << "Unsupported vector addition " << opsize;
-      break;
-  }
-  NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg());
-}
-
-void X86Mir2Lir::GenSubtractVector(MIR* mir) {
-  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
-  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
-  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
-  Clobber(rs_dest_src1);
-  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
-  int opcode = 0;
-  switch (opsize) {
-    case k32:
-      opcode = kX86PsubdRR;
-      break;
-    case k64:
-      opcode = kX86PsubqRR;
-      break;
-    case kSignedHalf:
-    case kUnsignedHalf:
-      opcode = kX86PsubwRR;
-      break;
-    case kUnsignedByte:
-    case kSignedByte:
-      opcode = kX86PsubbRR;
-      break;
-    case kSingle:
-      opcode = kX86SubpsRR;
-      break;
-    case kDouble:
-      opcode = kX86SubpdRR;
-      break;
-    default:
-      LOG(FATAL) << "Unsupported vector subtraction " << opsize;
-      break;
-  }
-  NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg());
-}
-
-void X86Mir2Lir::GenShiftByteVector(MIR* mir) {
-  // Destination does not need clobbered because it has already been as part
-  // of the general packed shift handler (caller of this method).
-  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
-
-  int opcode = 0;
-  switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
-    case kMirOpPackedShiftLeft:
-      opcode = kX86PsllwRI;
-      break;
-    case kMirOpPackedSignedShiftRight:
-    case kMirOpPackedUnsignedShiftRight:
-      // TODO Add support for emulated byte shifts.
-    default:
-      LOG(FATAL) << "Unsupported shift operation on byte vector " << opcode;
-      break;
-  }
-
-  // Clear xmm register and return if shift more than byte length.
-  int imm = mir->dalvikInsn.vB;
-  if (imm >= 8) {
-    NewLIR2(kX86PxorRR, rs_dest_src1.GetReg(), rs_dest_src1.GetReg());
-    return;
-  }
-
-  // Shift lower values.
-  NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
-
-  /*
-   * The above shift will shift the whole word, but that means
-   * both the bytes will shift as well. To emulate a byte level
-   * shift, we can just throw away the lower (8 - N) bits of the
-   * upper byte, and we are done.
-   */
-  uint8_t byte_mask = 0xFF << imm;
-  uint32_t int_mask = byte_mask;
-  int_mask = int_mask << 8 | byte_mask;
-  int_mask = int_mask << 8 | byte_mask;
-  int_mask = int_mask << 8 | byte_mask;
-
-  // And the destination with the mask
-  AndMaskVectorRegister(rs_dest_src1, int_mask, int_mask, int_mask, int_mask);
-}
-
-void X86Mir2Lir::GenShiftLeftVector(MIR* mir) {
-  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
-  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
-  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
-  Clobber(rs_dest_src1);
-  int imm = mir->dalvikInsn.vB;
-  int opcode = 0;
-  switch (opsize) {
-    case k32:
-      opcode = kX86PslldRI;
-      break;
-    case k64:
-      opcode = kX86PsllqRI;
-      break;
-    case kSignedHalf:
-    case kUnsignedHalf:
-      opcode = kX86PsllwRI;
-      break;
-    case kSignedByte:
-    case kUnsignedByte:
-      GenShiftByteVector(mir);
-      return;
-    default:
-      LOG(FATAL) << "Unsupported vector shift left " << opsize;
-      break;
-  }
-  NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
-}
-
-void X86Mir2Lir::GenSignedShiftRightVector(MIR* mir) {
-  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
-  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
-  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
-  Clobber(rs_dest_src1);
-  int imm = mir->dalvikInsn.vB;
-  int opcode = 0;
-  switch (opsize) {
-    case k32:
-      opcode = kX86PsradRI;
-      break;
-    case kSignedHalf:
-    case kUnsignedHalf:
-      opcode = kX86PsrawRI;
-      break;
-    case kSignedByte:
-    case kUnsignedByte:
-      GenShiftByteVector(mir);
-      return;
-    case k64:
-      // TODO Implement emulated shift algorithm.
-    default:
-      LOG(FATAL) << "Unsupported vector signed shift right " << opsize;
-      UNREACHABLE();
-  }
-  NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
-}
-
-void X86Mir2Lir::GenUnsignedShiftRightVector(MIR* mir) {
-  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
-  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
-  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
-  Clobber(rs_dest_src1);
-  int imm = mir->dalvikInsn.vB;
-  int opcode = 0;
-  switch (opsize) {
-    case k32:
-      opcode = kX86PsrldRI;
-      break;
-    case k64:
-      opcode = kX86PsrlqRI;
-      break;
-    case kSignedHalf:
-    case kUnsignedHalf:
-      opcode = kX86PsrlwRI;
-      break;
-    case kSignedByte:
-    case kUnsignedByte:
-      GenShiftByteVector(mir);
-      return;
-    default:
-      LOG(FATAL) << "Unsupported vector unsigned shift right " << opsize;
-      break;
-  }
-  NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
-}
-
-void X86Mir2Lir::GenAndVector(MIR* mir) {
-  // We only support 128 bit registers.
-  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
-  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
-  Clobber(rs_dest_src1);
-  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
-  NewLIR2(kX86PandRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
-}
-
-void X86Mir2Lir::GenOrVector(MIR* mir) {
-  // We only support 128 bit registers.
-  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
-  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
-  Clobber(rs_dest_src1);
-  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
-  NewLIR2(kX86PorRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
-}
-
-void X86Mir2Lir::GenXorVector(MIR* mir) {
-  // We only support 128 bit registers.
-  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
-  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
-  Clobber(rs_dest_src1);
-  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
-  NewLIR2(kX86PxorRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
-}
-
-void X86Mir2Lir::AndMaskVectorRegister(RegStorage rs_src1, uint32_t m1, uint32_t m2, uint32_t m3, uint32_t m4) {
-  MaskVectorRegister(kX86PandRM, rs_src1, m1, m2, m3, m4);
-}
-
-void X86Mir2Lir::MaskVectorRegister(X86OpCode opcode, RegStorage rs_src1, uint32_t m0, uint32_t m1, uint32_t m2, uint32_t m3) {
-  // Create temporary MIR as container for 128-bit binary mask.
-  MIR const_mir;
-  MIR* const_mirp = &const_mir;
-  const_mirp->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpConstVector);
-  const_mirp->dalvikInsn.arg[0] = m0;
-  const_mirp->dalvikInsn.arg[1] = m1;
-  const_mirp->dalvikInsn.arg[2] = m2;
-  const_mirp->dalvikInsn.arg[3] = m3;
-
-  // Mask vector with const from literal pool.
-  AppendOpcodeWithConst(opcode, rs_src1.GetReg(), const_mirp);
-}
-
-void X86Mir2Lir::GenAddReduceVector(MIR* mir) {
-  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
-  RegStorage vector_src = RegStorage::Solo128(mir->dalvikInsn.vB);
-  bool is_wide = opsize == k64 || opsize == kDouble;
-
-  // Get the location of the virtual register. Since this bytecode is overloaded
-  // for different types (and sizes), we need different logic for each path.
-  // The design of bytecode uses same VR for source and destination.
-  RegLocation rl_src, rl_dest, rl_result;
-  if (is_wide) {
-    rl_src = mir_graph_->GetSrcWide(mir, 0);
-    rl_dest = mir_graph_->GetDestWide(mir);
-  } else {
-    rl_src = mir_graph_->GetSrc(mir, 0);
-    rl_dest = mir_graph_->GetDest(mir);
-  }
-
-  // We need a temp for byte and short values
-  RegStorage temp;
-
-  // There is a different path depending on type and size.
-  if (opsize == kSingle) {
-    // Handle float case.
-    // TODO Add support for fast math (not value safe) and do horizontal add in that case.
-
-    rl_src = LoadValue(rl_src, kFPReg);
-    rl_result = EvalLoc(rl_dest, kFPReg, true);
-
-    // Since we are doing an add-reduce, we move the reg holding the VR
-    // into the result so we include it in result.
-    OpRegCopy(rl_result.reg, rl_src.reg);
-    NewLIR2(kX86AddssRR, rl_result.reg.GetReg(), vector_src.GetReg());
-
-    // Since FP must keep order of operation for value safety, we shift to low
-    // 32-bits and add to result.
-    for (int i = 0; i < 3; i++) {
-      NewLIR3(kX86ShufpsRRI, vector_src.GetReg(), vector_src.GetReg(), 0x39);
-      NewLIR2(kX86AddssRR, rl_result.reg.GetReg(), vector_src.GetReg());
-    }
-
-    StoreValue(rl_dest, rl_result);
-  } else if (opsize == kDouble) {
-    // Handle double case.
-    rl_src = LoadValueWide(rl_src, kFPReg);
-    rl_result = EvalLocWide(rl_dest, kFPReg, true);
-    LOG(FATAL) << "Unsupported vector add reduce for double.";
-  } else if (opsize == k64) {
-    /*
-     * Handle long case:
-     * 1) Reduce the vector register to lower half (with addition).
-     * 1-1) Get an xmm temp and fill it with vector register.
-     * 1-2) Shift the xmm temp by 8-bytes.
-     * 1-3) Add the xmm temp to vector register that is being reduced.
-     * 2) Allocate temp GP / GP pair.
-     * 2-1) In 64-bit case, use movq to move result to a 64-bit GP.
-     * 2-2) In 32-bit case, use movd twice to move to 32-bit GP pair.
-     * 3) Finish the add reduction by doing what add-long/2addr does,
-     * but instead of having a VR as one of the sources, we have our temp GP.
-     */
-    RegStorage rs_tmp_vector = Get128BitRegister(AllocTempDouble());
-    NewLIR2(kX86MovdqaRR, rs_tmp_vector.GetReg(), vector_src.GetReg());
-    NewLIR2(kX86PsrldqRI, rs_tmp_vector.GetReg(), 8);
-    NewLIR2(kX86PaddqRR, vector_src.GetReg(), rs_tmp_vector.GetReg());
-    FreeTemp(rs_tmp_vector);
-
-    // We would like to be able to reuse the add-long implementation, so set up a fake
-    // register location to pass it.
-    RegLocation temp_loc = mir_graph_->GetBadLoc();
-    temp_loc.core = 1;
-    temp_loc.wide = 1;
-    temp_loc.location = kLocPhysReg;
-    temp_loc.reg = AllocTempWide();
-
-    if (cu_->target64) {
-      DCHECK(!temp_loc.reg.IsPair());
-      NewLIR2(kX86MovqrxRR, temp_loc.reg.GetReg(), vector_src.GetReg());
-    } else {
-      NewLIR2(kX86MovdrxRR, temp_loc.reg.GetLowReg(), vector_src.GetReg());
-      NewLIR2(kX86PsrlqRI, vector_src.GetReg(), 0x20);
-      NewLIR2(kX86MovdrxRR, temp_loc.reg.GetHighReg(), vector_src.GetReg());
-    }
-
-    GenArithOpLong(Instruction::ADD_LONG_2ADDR, rl_dest, temp_loc, temp_loc, mir->optimization_flags);
-  } else if (opsize == kSignedByte || opsize == kUnsignedByte) {
-    RegStorage rs_tmp = Get128BitRegister(AllocTempDouble());
-    NewLIR2(kX86PxorRR, rs_tmp.GetReg(), rs_tmp.GetReg());
-    NewLIR2(kX86PsadbwRR, vector_src.GetReg(), rs_tmp.GetReg());
-    NewLIR3(kX86PshufdRRI, rs_tmp.GetReg(), vector_src.GetReg(), 0x4e);
-    NewLIR2(kX86PaddbRR, vector_src.GetReg(), rs_tmp.GetReg());
-    // Move to a GPR
-    temp = AllocTemp();
-    NewLIR2(kX86MovdrxRR, temp.GetReg(), vector_src.GetReg());
-  } else {
-    // Handle and the int and short cases together
-
-    // Initialize as if we were handling int case. Below we update
-    // the opcode if handling byte or short.
-    int vec_bytes = (mir->dalvikInsn.vC & 0xFFFF) / 8;
-    int vec_unit_size;
-    int horizontal_add_opcode;
-    int extract_opcode;
-
-    if (opsize == kSignedHalf || opsize == kUnsignedHalf) {
-      extract_opcode = kX86PextrwRRI;
-      horizontal_add_opcode = kX86PhaddwRR;
-      vec_unit_size = 2;
-    } else if (opsize == k32) {
-      vec_unit_size = 4;
-      horizontal_add_opcode = kX86PhadddRR;
-      extract_opcode = kX86PextrdRRI;
-    } else {
-      LOG(FATAL) << "Unsupported vector add reduce " << opsize;
-      return;
-    }
-
-    int elems = vec_bytes / vec_unit_size;
-
-    while (elems > 1) {
-      NewLIR2(horizontal_add_opcode, vector_src.GetReg(), vector_src.GetReg());
-      elems >>= 1;
-    }
-
-    // Handle this as arithmetic unary case.
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-
-    // Extract to a GP register because this is integral typed.
-    temp = AllocTemp();
-    NewLIR3(extract_opcode, temp.GetReg(), vector_src.GetReg(), 0);
-  }
-
-  if (opsize != k64 && opsize != kSingle && opsize != kDouble) {
-    // The logic below looks very similar to the handling of ADD_INT_2ADDR
-    // except the rhs is not a VR but a physical register allocated above.
-    // No load of source VR is done because it assumes that rl_result will
-    // share physical register / memory location.
-    rl_result = UpdateLocTyped(rl_dest);
-    if (rl_result.location == kLocPhysReg) {
-      // Ensure res is in a core reg.
-      rl_result = EvalLoc(rl_dest, kCoreReg, true);
-      OpRegReg(kOpAdd, rl_result.reg, temp);
-      StoreFinalValue(rl_dest, rl_result);
-    } else {
-      // Do the addition directly to memory.
-      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      OpMemReg(kOpAdd, rl_result, temp.GetReg());
-    }
-  }
-}
-
-void X86Mir2Lir::GenReduceVector(MIR* mir) {
-  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
-  RegLocation rl_dest = mir_graph_->GetDest(mir);
-  RegStorage vector_src = RegStorage::Solo128(mir->dalvikInsn.vB);
-  RegLocation rl_result;
-  bool is_wide = false;
-
-  // There is a different path depending on type and size.
-  if (opsize == kSingle) {
-    // Handle float case.
-    // TODO Add support for fast math (not value safe) and do horizontal add in that case.
-
-    int extract_index = mir->dalvikInsn.arg[0];
-
-    rl_result = EvalLoc(rl_dest, kFPReg, true);
-    NewLIR2(kX86PxorRR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
-
-    if (LIKELY(extract_index != 0)) {
-      // We know the index of element which we want to extract. We want to extract it and
-      // keep values in vector register correct for future use. So the way we act is:
-      // 1. Generate shuffle mask that allows to swap zeroth and required elements;
-      // 2. Shuffle vector register with this mask;
-      // 3. Extract zeroth element where required value lies;
-      // 4. Shuffle with same mask again to restore original values in vector register.
-      // The mask is generated from equivalence mask 0b11100100 swapping 0th and extracted
-      // element indices.
-      int shuffle[4] = {0b00, 0b01, 0b10, 0b11};
-      shuffle[0] = extract_index;
-      shuffle[extract_index] = 0;
-      int mask = 0;
-      for (int i = 0; i < 4; i++) {
-        mask |= (shuffle[i] << (2 * i));
-      }
-      NewLIR3(kX86ShufpsRRI, vector_src.GetReg(), vector_src.GetReg(), mask);
-      NewLIR2(kX86AddssRR, rl_result.reg.GetReg(), vector_src.GetReg());
-      NewLIR3(kX86ShufpsRRI, vector_src.GetReg(), vector_src.GetReg(), mask);
-    } else {
-      // We need to extract zeroth element and don't need any complex stuff to do it.
-      NewLIR2(kX86AddssRR, rl_result.reg.GetReg(), vector_src.GetReg());
-    }
-
-    StoreFinalValue(rl_dest, rl_result);
-  } else if (opsize == kDouble) {
-    // TODO Handle double case.
-    LOG(FATAL) << "Unsupported add reduce for double.";
-  } else if (opsize == k64) {
-    /*
-     * Handle long case:
-     * 1) Reduce the vector register to lower half (with addition).
-     * 1-1) Get an xmm temp and fill it with vector register.
-     * 1-2) Shift the xmm temp by 8-bytes.
-     * 1-3) Add the xmm temp to vector register that is being reduced.
-     * 2) Evaluate destination to a GP / GP pair.
-     * 2-1) In 64-bit case, use movq to move result to a 64-bit GP.
-     * 2-2) In 32-bit case, use movd twice to move to 32-bit GP pair.
-     * 3) Store the result to the final destination.
-     */
-    NewLIR2(kX86PsrldqRI, vector_src.GetReg(), 8);
-    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-    if (cu_->target64) {
-      DCHECK(!rl_result.reg.IsPair());
-      NewLIR2(kX86MovqrxRR, rl_result.reg.GetReg(), vector_src.GetReg());
-    } else {
-      NewLIR2(kX86MovdrxRR, rl_result.reg.GetLowReg(), vector_src.GetReg());
-      NewLIR2(kX86PsrlqRI, vector_src.GetReg(), 0x20);
-      NewLIR2(kX86MovdrxRR, rl_result.reg.GetHighReg(), vector_src.GetReg());
-    }
-
-    StoreValueWide(rl_dest, rl_result);
-  } else {
-    int extract_index = mir->dalvikInsn.arg[0];
-    int extr_opcode = 0;
-    rl_result = UpdateLocTyped(rl_dest);
-
-    // Handle the rest of integral types now.
-    switch (opsize) {
-      case k32:
-        extr_opcode = (rl_result.location == kLocPhysReg) ? kX86PextrdRRI : kX86PextrdMRI;
-        break;
-      case kSignedHalf:
-      case kUnsignedHalf:
-        extr_opcode = (rl_result.location == kLocPhysReg) ? kX86PextrwRRI : kX86PextrwMRI;
-        break;
-      case kSignedByte:
-        extr_opcode = (rl_result.location == kLocPhysReg) ? kX86PextrbRRI : kX86PextrbMRI;
-        break;
-      default:
-        LOG(FATAL) << "Unsupported vector reduce " << opsize;
-        UNREACHABLE();
-    }
-
-    if (rl_result.location == kLocPhysReg) {
-      NewLIR3(extr_opcode, rl_result.reg.GetReg(), vector_src.GetReg(), extract_index);
-      StoreFinalValue(rl_dest, rl_result);
-    } else {
-      int displacement = SRegOffset(rl_result.s_reg_low);
-      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      LIR *l = NewLIR4(extr_opcode, rs_rX86_SP_32.GetReg(), displacement, vector_src.GetReg(),
-                       extract_index);
-      AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, is_wide /* is_64bit */);
-    }
-  }
-}
-
-void X86Mir2Lir::LoadVectorRegister(RegStorage rs_dest, RegStorage rs_src,
-                                    OpSize opsize, int op_mov) {
-  if (!cu_->target64 && opsize == k64) {
-    // Logic assumes that longs are loaded in GP register pairs.
-    NewLIR2(kX86MovdxrRR, rs_dest.GetReg(), rs_src.GetLowReg());
-    RegStorage r_tmp = AllocTempDouble();
-    NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), rs_src.GetHighReg());
-    NewLIR2(kX86PunpckldqRR, rs_dest.GetReg(), r_tmp.GetReg());
-    FreeTemp(r_tmp);
-  } else {
-    NewLIR2(op_mov, rs_dest.GetReg(), rs_src.GetReg());
-  }
-}
-
-void X86Mir2Lir::GenSetVector(MIR* mir) {
-  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
-  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
-  RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vA);
-  Clobber(rs_dest);
-  int op_shuffle = 0, op_shuffle_high = 0, op_mov = kX86MovdxrRR;
-  RegisterClass reg_type = kCoreReg;
-  bool is_wide = false;
-
-  switch (opsize) {
-    case k32:
-      op_shuffle = kX86PshufdRRI;
-      break;
-    case kSingle:
-      op_shuffle = kX86PshufdRRI;
-      op_mov = kX86MovdqaRR;
-      reg_type = kFPReg;
-      break;
-    case k64:
-      op_shuffle = kX86PunpcklqdqRR;
-      op_mov = kX86MovqxrRR;
-      is_wide = true;
-      break;
-    case kSignedByte:
-    case kUnsignedByte:
-      // We will have the source loaded up in a
-      // double-word before we use this shuffle
-      op_shuffle = kX86PshufdRRI;
-      break;
-    case kSignedHalf:
-    case kUnsignedHalf:
-      // Handles low quadword.
-      op_shuffle = kX86PshuflwRRI;
-      // Handles upper quadword.
-      op_shuffle_high = kX86PshufdRRI;
-      break;
-    default:
-      LOG(FATAL) << "Unsupported vector set " << opsize;
-      break;
-  }
-
-  // Load the value from the VR into a physical register.
-  RegLocation rl_src;
-  if (!is_wide) {
-    rl_src = mir_graph_->GetSrc(mir, 0);
-    rl_src = LoadValue(rl_src, reg_type);
-  } else {
-    rl_src = mir_graph_->GetSrcWide(mir, 0);
-    rl_src = LoadValueWide(rl_src, reg_type);
-  }
-  RegStorage reg_to_shuffle = rl_src.reg;
-
-  // Load the value into the XMM register.
-  LoadVectorRegister(rs_dest, reg_to_shuffle, opsize, op_mov);
-
-  if (opsize == kSignedByte || opsize == kUnsignedByte) {
-    // In the byte case, first duplicate it to be a word
-    // Then duplicate it to be a double-word
-    NewLIR2(kX86PunpcklbwRR, rs_dest.GetReg(), rs_dest.GetReg());
-    NewLIR2(kX86PunpcklwdRR, rs_dest.GetReg(), rs_dest.GetReg());
-  }
-
-  // Now shuffle the value across the destination.
-  if (op_shuffle == kX86PunpcklqdqRR) {
-    NewLIR2(op_shuffle, rs_dest.GetReg(), rs_dest.GetReg());
-  } else {
-    NewLIR3(op_shuffle, rs_dest.GetReg(), rs_dest.GetReg(), 0);
-  }
-
-  // And then repeat as needed.
-  if (op_shuffle_high != 0) {
-    NewLIR3(op_shuffle_high, rs_dest.GetReg(), rs_dest.GetReg(), 0);
-  }
-}
-
-void X86Mir2Lir::GenPackedArrayGet(BasicBlock* bb ATTRIBUTE_UNUSED, MIR* mir ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL) << "Extended opcode kMirOpPackedArrayGet not supported.";
-}
-
-void X86Mir2Lir::GenPackedArrayPut(BasicBlock* bb ATTRIBUTE_UNUSED, MIR* mir ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL) << "Extended opcode kMirOpPackedArrayPut not supported.";
-}
-
-LIR* X86Mir2Lir::ScanVectorLiteral(int32_t* constants) {
-  for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
-    if (constants[0] == p->operands[0] && constants[1] == p->operands[1] &&
-        constants[2] == p->operands[2] && constants[3] == p->operands[3]) {
-      return p;
-    }
-  }
-  return nullptr;
-}
-
-LIR* X86Mir2Lir::AddVectorLiteral(int32_t* constants) {
-  LIR* new_value = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocData));
-  new_value->operands[0] = constants[0];
-  new_value->operands[1] = constants[1];
-  new_value->operands[2] = constants[2];
-  new_value->operands[3] = constants[3];
-  new_value->next = const_vectors_;
-  if (const_vectors_ == nullptr) {
-    estimated_native_code_size_ += 12;  // Maximum needed to align to 16 byte boundary.
-  }
-  estimated_native_code_size_ += 16;  // Space for one vector.
-  const_vectors_ = new_value;
-  return new_value;
-}
-
-// ------------ ABI support: mapping of args to physical registers -------------
-RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(ShortyArg arg) {
-  const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3, kArg4, kArg5};
-  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
-  const SpecialTargetRegister fpArgMappingToPhysicalReg[] = {kFArg0, kFArg1, kFArg2, kFArg3,
-                                                             kFArg4, kFArg5, kFArg6, kFArg7};
-  const size_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
-
-  if (arg.IsFP()) {
-    if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
-      return m2l_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++],
-                             arg.IsWide() ? kWide : kNotWide);
-    }
-  } else {
-    if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-      return m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
-                             arg.IsRef() ? kRef : (arg.IsWide() ? kWide : kNotWide));
-    }
-  }
-  return RegStorage::InvalidReg();
-}
-
-RegStorage X86Mir2Lir::InToRegStorageX86Mapper::GetNextReg(ShortyArg arg) {
-  const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3};
-  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
-  const SpecialTargetRegister fpArgMappingToPhysicalReg[] = {kFArg0, kFArg1, kFArg2, kFArg3};
-  const size_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
-
-  RegStorage result = RegStorage::InvalidReg();
-  if (arg.IsFP()) {
-    if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
-      return m2l_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++],
-                             arg.IsWide() ? kWide : kNotWide);
-    }
-  } else if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-    result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
-                             arg.IsRef() ? kRef : kNotWide);
-    if (arg.IsWide()) {
-      // This must be a long, as double is handled above.
-      // Ensure that we don't split a long across the last register and the stack.
-      if (cur_core_reg_ == coreArgMappingToPhysicalRegSize) {
-        // Leave the last core register unused and force the whole long to the stack.
-        cur_core_reg_++;
-        result = RegStorage::InvalidReg();
-      } else if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-        result = RegStorage::MakeRegPair(
-            result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide));
-      }
-    }
-  }
-  return result;
-}
-
-// ---------End of ABI support: mapping of args to physical registers -------------
-
-bool X86Mir2Lir::GenInlinedCharAt(CallInfo* info) {
-  // Location of reference to data array
-  int value_offset = mirror::String::ValueOffset().Int32Value();
-  // Location of count
-  int count_offset = mirror::String::CountOffset().Int32Value();
-
-  RegLocation rl_obj = info->args[0];
-  RegLocation rl_idx = info->args[1];
-  rl_obj = LoadValue(rl_obj, kRefReg);
-  rl_idx = LoadValue(rl_idx, kCoreReg);
-  RegStorage reg_max;
-  GenNullCheck(rl_obj.reg, info->opt_flags);
-  bool range_check = (!(info->opt_flags & MIR_IGNORE_RANGE_CHECK));
-  LIR* range_check_branch = nullptr;
-  if (range_check) {
-    // On x86, we can compare to memory directly
-    // Set up a launch pad to allow retry in case of bounds violation */
-    if (rl_idx.is_const) {
-      LIR* comparison;
-      range_check_branch = OpCmpMemImmBranch(
-          kCondLs, RegStorage::InvalidReg(), rl_obj.reg, count_offset,
-          mir_graph_->ConstantValue(rl_idx.orig_sreg), nullptr, &comparison);
-      MarkPossibleNullPointerExceptionAfter(0, comparison);
-    } else {
-      OpRegMem(kOpCmp, rl_idx.reg, rl_obj.reg, count_offset);
-      MarkPossibleNullPointerException(0);
-      range_check_branch = OpCondBranch(kCondUge, nullptr);
-    }
-  }
-  RegLocation rl_dest = InlineTarget(info);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  LoadBaseIndexedDisp(rl_obj.reg, rl_idx.reg, 1, value_offset, rl_result.reg, kUnsignedHalf);
-  FreeTemp(rl_idx.reg);
-  FreeTemp(rl_obj.reg);
-  StoreValue(rl_dest, rl_result);
-  if (range_check) {
-    DCHECK(range_check_branch != nullptr);
-    info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've already null checked.
-    AddIntrinsicSlowPath(info, range_check_branch);
-  }
-  return true;
-}
-
-bool X86Mir2Lir::GenInlinedCurrentThread(CallInfo* info) {
-  RegLocation rl_dest = InlineTarget(info);
-
-  // Early exit if the result is unused.
-  if (rl_dest.orig_sreg < 0) {
-    return true;
-  }
-
-  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
-
-  if (cu_->target64) {
-    OpRegThreadMem(kOpMov, rl_result.reg, Thread::PeerOffset<8>());
-  } else {
-    OpRegThreadMem(kOpMov, rl_result.reg, Thread::PeerOffset<4>());
-  }
-
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-/**
- * Lock temp registers for explicit usage. Registers will be freed in destructor.
- */
-X86Mir2Lir::ExplicitTempRegisterLock::ExplicitTempRegisterLock(X86Mir2Lir* mir_to_lir,
-                                                               int n_regs, ...) :
-    temp_regs_(n_regs),
-    mir_to_lir_(mir_to_lir) {
-  va_list regs;
-  va_start(regs, n_regs);
-  for (int i = 0; i < n_regs; i++) {
-    RegStorage reg = *(va_arg(regs, RegStorage*));
-    RegisterInfo* info = mir_to_lir_->GetRegInfo(reg);
-
-    // Make sure we don't have promoted register here.
-    DCHECK(info->IsTemp());
-
-    temp_regs_.push_back(reg);
-    mir_to_lir_->FlushReg(reg);
-
-    if (reg.IsPair()) {
-      RegStorage partner = info->Partner();
-      temp_regs_.push_back(partner);
-      mir_to_lir_->FlushReg(partner);
-    }
-
-    mir_to_lir_->Clobber(reg);
-    mir_to_lir_->LockTemp(reg);
-  }
-
-  va_end(regs);
-}
-
-/*
- * Free all locked registers.
- */
-X86Mir2Lir::ExplicitTempRegisterLock::~ExplicitTempRegisterLock() {
-  // Free all locked temps.
-  for (auto it : temp_regs_) {
-    mir_to_lir_->FreeTemp(it);
-  }
-}
-
-int X86Mir2Lir::GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) {
-  if (count < 4) {
-    // It does not make sense to use this utility if we have no chance to use
-    // 128-bit move.
-    return count;
-  }
-  GenDalvikArgsFlushPromoted(info, first);
-
-  // The rest can be copied together
-  int current_src_offset = SRegOffset(info->args[first].s_reg_low);
-  int current_dest_offset = StackVisitor::GetOutVROffset(first, cu_->instruction_set);
-
-  // Only davik regs are accessed in this loop; no next_call_insn() calls.
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  while (count > 0) {
-    // This is based on the knowledge that the stack itself is 16-byte aligned.
-    bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
-    bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
-    size_t bytes_to_move;
-
-    /*
-     * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
-     * a 128-bit move because we won't get the chance to try to aligned. If there are more than
-     * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
-     * We do this because we could potentially do a smaller move to align.
-     */
-    if (count == 4 || (count > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
-      // Moving 128-bits via xmm register.
-      bytes_to_move = sizeof(uint32_t) * 4;
-
-      // Allocate a free xmm temp. Since we are working through the calling sequence,
-      // we expect to have an xmm temporary available. AllocTempDouble will abort if
-      // there are no free registers.
-      RegStorage temp = AllocTempDouble();
-
-      LIR* ld1 = nullptr;
-      LIR* ld2 = nullptr;
-      LIR* st1 = nullptr;
-      LIR* st2 = nullptr;
-
-      /*
-       * The logic is similar for both loads and stores. If we have 16-byte alignment,
-       * do an aligned move. If we have 8-byte alignment, then do the move in two
-       * parts. This approach prevents possible cache line splits. Finally, fall back
-       * to doing an unaligned move. In most cases we likely won't split the cache
-       * line but we cannot prove it and thus take a conservative approach.
-       */
-      bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
-      bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
-
-      if (src_is_16b_aligned) {
-        ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovA128FP);
-      } else if (src_is_8b_aligned) {
-        ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovLo128FP);
-        ld2 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset + (bytes_to_move >> 1),
-                          kMovHi128FP);
-      } else {
-        ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovU128FP);
-      }
-
-      if (dest_is_16b_aligned) {
-        st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovA128FP);
-      } else if (dest_is_8b_aligned) {
-        st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovLo128FP);
-        st2 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset + (bytes_to_move >> 1),
-                          temp, kMovHi128FP);
-      } else {
-        st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovU128FP);
-      }
-
-      // TODO If we could keep track of aliasing information for memory accesses that are wider
-      // than 64-bit, we wouldn't need to set up a barrier.
-      if (ld1 != nullptr) {
-        if (ld2 != nullptr) {
-          // For 64-bit load we can actually set up the aliasing information.
-          AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
-          AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true,
-                                  true);
-        } else {
-          // Set barrier for 128-bit load.
-          ld1->u.m.def_mask = &kEncodeAll;
-        }
-      }
-      if (st1 != nullptr) {
-        if (st2 != nullptr) {
-          // For 64-bit store we can actually set up the aliasing information.
-          AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
-          AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false,
-                                  true);
-        } else {
-          // Set barrier for 128-bit store.
-          st1->u.m.def_mask = &kEncodeAll;
-        }
-      }
-
-      // Free the temporary used for the data movement.
-      FreeTemp(temp);
-    } else {
-      // Moving 32-bits via general purpose register.
-      bytes_to_move = sizeof(uint32_t);
-
-      // Instead of allocating a new temp, simply reuse one of the registers being used
-      // for argument passing.
-      RegStorage temp = TargetReg(kArg3, kNotWide);
-
-      // Now load the argument VR and store to the outs.
-      Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
-      Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp);
-    }
-
-    current_src_offset += bytes_to_move;
-    current_dest_offset += bytes_to_move;
-    count -= (bytes_to_move >> 2);
-  }
-  DCHECK_EQ(count, 0);
-  return count;
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
deleted file mode 100644
index 61354df..0000000
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ /dev/null
@@ -1,1167 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "codegen_x86.h"
-
-#include "base/logging.h"
-#include "dex/mir_graph.h"
-#include "dex/quick/mir_to_lir-inl.h"
-#include "dex/dataflow_iterator-inl.h"
-#include "dex/quick/dex_file_method_inliner.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "dex/reg_storage_eq.h"
-#include "driver/compiler_driver.h"
-#include "x86_lir.h"
-
-namespace art {
-
-/* This file contains codegen for the X86 ISA */
-
-LIR* X86Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
-  int opcode;
-  /* must be both DOUBLE or both not DOUBLE */
-  DCHECK(r_dest.IsFloat() || r_src.IsFloat());
-  DCHECK_EQ(r_dest.IsDouble(), r_src.IsDouble());
-  if (r_dest.IsDouble()) {
-    opcode = kX86MovsdRR;
-  } else {
-    if (r_dest.IsSingle()) {
-      if (r_src.IsSingle()) {
-        opcode = kX86MovssRR;
-      } else {  // Fpr <- Gpr
-        opcode = kX86MovdxrRR;
-      }
-    } else {  // Gpr <- Fpr
-      DCHECK(r_src.IsSingle()) << "Raw: 0x" << std::hex << r_src.GetRawBits();
-      opcode = kX86MovdrxRR;
-    }
-  }
-  DCHECK_NE((EncodingMap[opcode].flags & IS_BINARY_OP), 0ULL);
-  LIR* res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
-  if (r_dest == r_src) {
-    res->flags.is_nop = true;
-  }
-  return res;
-}
-
-bool X86Mir2Lir::InexpensiveConstantInt(int32_t value ATTRIBUTE_UNUSED) {
-  return true;
-}
-
-bool X86Mir2Lir::InexpensiveConstantFloat(int32_t value) {
-  return value == 0;
-}
-
-bool X86Mir2Lir::InexpensiveConstantLong(int64_t value ATTRIBUTE_UNUSED) {
-  return true;
-}
-
-bool X86Mir2Lir::InexpensiveConstantDouble(int64_t value) {
-  return value == 0;
-}
-
-/*
- * Load a immediate using a shortcut if possible; otherwise
- * grab from the per-translation literal pool.  If target is
- * a high register, build constant into a low register and copy.
- *
- * No additional register clobbering operation performed. Use this version when
- * 1) r_dest is freshly returned from AllocTemp or
- * 2) The codegen is under fixed register usage
- */
-LIR* X86Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
-  RegStorage r_dest_save = r_dest;
-  if (r_dest.IsFloat()) {
-    if (value == 0) {
-      return NewLIR2(kX86XorpsRR, r_dest.GetReg(), r_dest.GetReg());
-    }
-    r_dest = AllocTemp();
-  }
-
-  LIR *res;
-  if (value == 0) {
-    res = NewLIR2(kX86Xor32RR, r_dest.GetReg(), r_dest.GetReg());
-  } else {
-    // Note, there is no byte immediate form of a 32 bit immediate move.
-    // 64-bit immediate is not supported by LIR structure
-    res = NewLIR2(kX86Mov32RI, r_dest.GetReg(), value);
-  }
-
-  if (r_dest_save.IsFloat()) {
-    NewLIR2(kX86MovdxrRR, r_dest_save.GetReg(), r_dest.GetReg());
-    FreeTemp(r_dest);
-  }
-
-  return res;
-}
-
-LIR* X86Mir2Lir::OpUnconditionalBranch(LIR* target) {
-  LIR* res = NewLIR1(kX86Jmp8, 0 /* offset to be patched during assembly*/);
-  res->target = target;
-  return res;
-}
-
-LIR* X86Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
-  LIR* branch = NewLIR2(kX86Jcc8, 0 /* offset to be patched */,
-                        X86ConditionEncoding(cc));
-  branch->target = target;
-  return branch;
-}
-
-LIR* X86Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
-  X86OpCode opcode = kX86Bkpt;
-  switch (op) {
-    case kOpNeg: opcode = r_dest_src.Is64Bit() ? kX86Neg64R : kX86Neg32R; break;
-    case kOpNot: opcode = r_dest_src.Is64Bit() ? kX86Not64R : kX86Not32R; break;
-    case kOpRev: opcode = r_dest_src.Is64Bit() ? kX86Bswap64R : kX86Bswap32R; break;
-    case kOpBlx: opcode = kX86CallR; break;
-    default:
-      LOG(FATAL) << "Bad case in OpReg " << op;
-  }
-  return NewLIR1(opcode, r_dest_src.GetReg());
-}
-
-LIR* X86Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
-  X86OpCode opcode = kX86Bkpt;
-  bool byte_imm = IS_SIMM8(value);
-  DCHECK(!r_dest_src1.IsFloat());
-  if (r_dest_src1.Is64Bit()) {
-    switch (op) {
-      case kOpAdd: opcode = byte_imm ? kX86Add64RI8 : kX86Add64RI; break;
-      case kOpSub: opcode = byte_imm ? kX86Sub64RI8 : kX86Sub64RI; break;
-      case kOpLsl: opcode = kX86Sal64RI; break;
-      case kOpLsr: opcode = kX86Shr64RI; break;
-      case kOpAsr: opcode = kX86Sar64RI; break;
-      case kOpCmp: opcode = byte_imm ? kX86Cmp64RI8 : kX86Cmp64RI; break;
-      default:
-        LOG(FATAL) << "Bad case in OpRegImm (64-bit) " << op;
-    }
-  } else {
-    switch (op) {
-      case kOpLsl: opcode = kX86Sal32RI; break;
-      case kOpLsr: opcode = kX86Shr32RI; break;
-      case kOpAsr: opcode = kX86Sar32RI; break;
-      case kOpAdd: opcode = byte_imm ? kX86Add32RI8 : kX86Add32RI; break;
-      case kOpOr:  opcode = byte_imm ? kX86Or32RI8  : kX86Or32RI;  break;
-      case kOpAdc: opcode = byte_imm ? kX86Adc32RI8 : kX86Adc32RI; break;
-      // case kOpSbb: opcode = kX86Sbb32RI; break;
-      case kOpAnd: opcode = byte_imm ? kX86And32RI8 : kX86And32RI; break;
-      case kOpSub: opcode = byte_imm ? kX86Sub32RI8 : kX86Sub32RI; break;
-      case kOpXor: opcode = byte_imm ? kX86Xor32RI8 : kX86Xor32RI; break;
-      case kOpCmp: opcode = byte_imm ? kX86Cmp32RI8 : kX86Cmp32RI; break;
-      case kOpMov:
-        /*
-         * Moving the constant zero into register can be specialized as an xor of the register.
-         * However, that sets eflags while the move does not. For that reason here, always do
-         * the move and if caller is flexible, they should be calling LoadConstantNoClobber instead.
-         */
-        opcode = kX86Mov32RI;
-        break;
-      case kOpMul:
-        opcode = byte_imm ? kX86Imul32RRI8 : kX86Imul32RRI;
-        return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), value);
-      case kOp2Byte:
-        opcode = kX86Mov32RI;
-        value = static_cast<int8_t>(value);
-        break;
-      case kOp2Short:
-        opcode = kX86Mov32RI;
-        value = static_cast<int16_t>(value);
-        break;
-      case kOp2Char:
-        opcode = kX86Mov32RI;
-        value = static_cast<uint16_t>(value);
-        break;
-      case kOpNeg:
-        opcode = kX86Mov32RI;
-        value = -value;
-        break;
-      default:
-        LOG(FATAL) << "Bad case in OpRegImm " << op;
-    }
-  }
-  return NewLIR2(opcode, r_dest_src1.GetReg(), value);
-}
-
-LIR* X86Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
-    bool is64Bit = r_dest_src1.Is64Bit();
-    X86OpCode opcode = kX86Nop;
-    bool src2_must_be_cx = false;
-    switch (op) {
-        // X86 unary opcodes
-      case kOpMvn:
-        OpRegCopy(r_dest_src1, r_src2);
-        return OpReg(kOpNot, r_dest_src1);
-      case kOpNeg:
-        OpRegCopy(r_dest_src1, r_src2);
-        return OpReg(kOpNeg, r_dest_src1);
-      case kOpRev:
-        OpRegCopy(r_dest_src1, r_src2);
-        return OpReg(kOpRev, r_dest_src1);
-      case kOpRevsh:
-        OpRegCopy(r_dest_src1, r_src2);
-        OpReg(kOpRev, r_dest_src1);
-        return OpRegImm(kOpAsr, r_dest_src1, 16);
-        // X86 binary opcodes
-      case kOpSub: opcode = is64Bit ? kX86Sub64RR : kX86Sub32RR; break;
-      case kOpSbc: opcode = is64Bit ? kX86Sbb64RR : kX86Sbb32RR; break;
-      case kOpLsl: opcode = is64Bit ? kX86Sal64RC : kX86Sal32RC; src2_must_be_cx = true; break;
-      case kOpLsr: opcode = is64Bit ? kX86Shr64RC : kX86Shr32RC; src2_must_be_cx = true; break;
-      case kOpAsr: opcode = is64Bit ? kX86Sar64RC : kX86Sar32RC; src2_must_be_cx = true; break;
-      case kOpMov: opcode = is64Bit ? kX86Mov64RR : kX86Mov32RR; break;
-      case kOpCmp: opcode = is64Bit ? kX86Cmp64RR : kX86Cmp32RR; break;
-      case kOpAdd: opcode = is64Bit ? kX86Add64RR : kX86Add32RR; break;
-      case kOpAdc: opcode = is64Bit ? kX86Adc64RR : kX86Adc32RR; break;
-      case kOpAnd: opcode = is64Bit ? kX86And64RR : kX86And32RR; break;
-      case kOpOr:  opcode = is64Bit ? kX86Or64RR : kX86Or32RR; break;
-      case kOpXor: opcode = is64Bit ? kX86Xor64RR : kX86Xor32RR; break;
-      case kOp2Byte:
-        // TODO: there are several instances of this check.  A utility function perhaps?
-        // TODO: Similar to Arm's reg < 8 check.  Perhaps add attribute checks to RegStorage?
-        // Use shifts instead of a byte operand if the source can't be byte accessed.
-        if (r_src2.GetRegNum() >= rs_rX86_SP_32.GetRegNum()) {
-          NewLIR2(is64Bit ? kX86Mov64RR : kX86Mov32RR, r_dest_src1.GetReg(), r_src2.GetReg());
-          NewLIR2(is64Bit ? kX86Sal64RI : kX86Sal32RI, r_dest_src1.GetReg(), is64Bit ? 56 : 24);
-          return NewLIR2(is64Bit ? kX86Sar64RI : kX86Sar32RI, r_dest_src1.GetReg(),
-                         is64Bit ? 56 : 24);
-        } else {
-          opcode = is64Bit ? kX86Bkpt : kX86Movsx8RR;
-        }
-        break;
-      case kOp2Short: opcode = is64Bit ? kX86Bkpt : kX86Movsx16RR; break;
-      case kOp2Char: opcode = is64Bit ? kX86Bkpt : kX86Movzx16RR; break;
-      case kOpMul: opcode = is64Bit ? kX86Bkpt : kX86Imul32RR; break;
-      default:
-        LOG(FATAL) << "Bad case in OpRegReg " << op;
-        break;
-    }
-    CHECK(!src2_must_be_cx || r_src2.GetReg() == rs_rCX.GetReg());
-    return NewLIR2(opcode, r_dest_src1.GetReg(), r_src2.GetReg());
-}
-
-LIR* X86Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) {
-  DCHECK(!r_base.IsFloat());
-  X86OpCode opcode = kX86Nop;
-  int dest = r_dest.IsPair() ? r_dest.GetLowReg() : r_dest.GetReg();
-  switch (move_type) {
-    case kMov8GP:
-      CHECK(!r_dest.IsFloat());
-      opcode = kX86Mov8RM;
-      break;
-    case kMov16GP:
-      CHECK(!r_dest.IsFloat());
-      opcode = kX86Mov16RM;
-      break;
-    case kMov32GP:
-      CHECK(!r_dest.IsFloat());
-      opcode = kX86Mov32RM;
-      break;
-    case kMov32FP:
-      CHECK(r_dest.IsFloat());
-      opcode = kX86MovssRM;
-      break;
-    case kMov64FP:
-      CHECK(r_dest.IsFloat());
-      opcode = kX86MovsdRM;
-      break;
-    case kMovU128FP:
-      CHECK(r_dest.IsFloat());
-      opcode = kX86MovupsRM;
-      break;
-    case kMovA128FP:
-      CHECK(r_dest.IsFloat());
-      opcode = kX86MovapsRM;
-      break;
-    case kMovLo128FP:
-      CHECK(r_dest.IsFloat());
-      opcode = kX86MovlpsRM;
-      break;
-    case kMovHi128FP:
-      CHECK(r_dest.IsFloat());
-      opcode = kX86MovhpsRM;
-      break;
-    case kMov64GP:
-    case kMovLo64FP:
-    case kMovHi64FP:
-    default:
-      LOG(FATAL) << "Bad case in OpMovRegMem";
-      break;
-  }
-
-  return NewLIR3(opcode, dest, r_base.GetReg(), offset);
-}
-
-LIR* X86Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) {
-  DCHECK(!r_base.IsFloat());
-  int src = r_src.IsPair() ? r_src.GetLowReg() : r_src.GetReg();
-
-  X86OpCode opcode = kX86Nop;
-  switch (move_type) {
-    case kMov8GP:
-      CHECK(!r_src.IsFloat());
-      opcode = kX86Mov8MR;
-      break;
-    case kMov16GP:
-      CHECK(!r_src.IsFloat());
-      opcode = kX86Mov16MR;
-      break;
-    case kMov32GP:
-      CHECK(!r_src.IsFloat());
-      opcode = kX86Mov32MR;
-      break;
-    case kMov32FP:
-      CHECK(r_src.IsFloat());
-      opcode = kX86MovssMR;
-      break;
-    case kMov64FP:
-      CHECK(r_src.IsFloat());
-      opcode = kX86MovsdMR;
-      break;
-    case kMovU128FP:
-      CHECK(r_src.IsFloat());
-      opcode = kX86MovupsMR;
-      break;
-    case kMovA128FP:
-      CHECK(r_src.IsFloat());
-      opcode = kX86MovapsMR;
-      break;
-    case kMovLo128FP:
-      CHECK(r_src.IsFloat());
-      opcode = kX86MovlpsMR;
-      break;
-    case kMovHi128FP:
-      CHECK(r_src.IsFloat());
-      opcode = kX86MovhpsMR;
-      break;
-    case kMov64GP:
-    case kMovLo64FP:
-    case kMovHi64FP:
-    default:
-      LOG(FATAL) << "Bad case in OpMovMemReg";
-      break;
-  }
-
-  return NewLIR3(opcode, r_base.GetReg(), offset, src);
-}
-
-LIR* X86Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) {
-  // The only conditional reg to reg operation supported is Cmov
-  DCHECK_EQ(op, kOpCmov);
-  DCHECK_EQ(r_dest.Is64Bit(), r_src.Is64Bit());
-  return NewLIR3(r_dest.Is64Bit() ? kX86Cmov64RRC : kX86Cmov32RRC, r_dest.GetReg(),
-                 r_src.GetReg(), X86ConditionEncoding(cc));
-}
-
-LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset) {
-  bool is64Bit = r_dest.Is64Bit();
-  X86OpCode opcode = kX86Nop;
-  switch (op) {
-      // X86 binary opcodes
-    case kOpSub: opcode = is64Bit ? kX86Sub64RM : kX86Sub32RM; break;
-    case kOpMov: opcode = is64Bit ? kX86Mov64RM : kX86Mov32RM; break;
-    case kOpCmp: opcode = is64Bit ? kX86Cmp64RM : kX86Cmp32RM; break;
-    case kOpAdd: opcode = is64Bit ? kX86Add64RM : kX86Add32RM; break;
-    case kOpAnd: opcode = is64Bit ? kX86And64RM : kX86And32RM; break;
-    case kOpOr:  opcode = is64Bit ? kX86Or64RM : kX86Or32RM; break;
-    case kOpXor: opcode = is64Bit ? kX86Xor64RM : kX86Xor32RM; break;
-    case kOp2Byte: opcode = kX86Movsx8RM; break;
-    case kOp2Short: opcode = kX86Movsx16RM; break;
-    case kOp2Char: opcode = kX86Movzx16RM; break;
-    case kOpMul:
-    default:
-      LOG(FATAL) << "Bad case in OpRegMem " << op;
-      break;
-  }
-  LIR *l = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), offset);
-  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
-    DCHECK_EQ(r_base, cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32);
-    AnnotateDalvikRegAccess(l, offset >> 2, true /* is_load */, false /* is_64bit */);
-  }
-  return l;
-}
-
-LIR* X86Mir2Lir::OpMemReg(OpKind op, RegLocation rl_dest, int r_value) {
-  DCHECK_NE(rl_dest.location, kLocPhysReg);
-  int displacement = SRegOffset(rl_dest.s_reg_low);
-  bool is64Bit = rl_dest.wide != 0;
-  X86OpCode opcode = kX86Nop;
-  switch (op) {
-    case kOpSub: opcode = is64Bit ? kX86Sub64MR : kX86Sub32MR; break;
-    case kOpMov: opcode = is64Bit ? kX86Mov64MR : kX86Mov32MR; break;
-    case kOpCmp: opcode = is64Bit ? kX86Cmp64MR : kX86Cmp32MR; break;
-    case kOpAdd: opcode = is64Bit ? kX86Add64MR : kX86Add32MR; break;
-    case kOpAnd: opcode = is64Bit ? kX86And64MR : kX86And32MR; break;
-    case kOpOr:  opcode = is64Bit ? kX86Or64MR : kX86Or32MR; break;
-    case kOpXor: opcode = is64Bit ? kX86Xor64MR : kX86Xor32MR; break;
-    case kOpLsl: opcode = is64Bit ? kX86Sal64MC : kX86Sal32MC; break;
-    case kOpLsr: opcode = is64Bit ? kX86Shr64MC : kX86Shr32MC; break;
-    case kOpAsr: opcode = is64Bit ? kX86Sar64MC : kX86Sar32MC; break;
-    default:
-      LOG(FATAL) << "Bad case in OpMemReg " << op;
-      break;
-  }
-  LIR *l = NewLIR3(opcode, rs_rX86_SP_32.GetReg(), displacement, r_value);
-  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
-    AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */);
-    AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, is64Bit /* is_64bit */);
-  }
-  return l;
-}
-
-LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegLocation rl_value) {
-  DCHECK_NE(rl_value.location, kLocPhysReg);
-  bool is64Bit = r_dest.Is64Bit();
-  int displacement = SRegOffset(rl_value.s_reg_low);
-  X86OpCode opcode = kX86Nop;
-  switch (op) {
-    case kOpSub: opcode = is64Bit ? kX86Sub64RM : kX86Sub32RM; break;
-    case kOpMov: opcode = is64Bit ? kX86Mov64RM : kX86Mov32RM; break;
-    case kOpCmp: opcode = is64Bit ? kX86Cmp64RM : kX86Cmp32RM; break;
-    case kOpAdd: opcode = is64Bit ? kX86Add64RM : kX86Add32RM; break;
-    case kOpAnd: opcode = is64Bit ? kX86And64RM : kX86And32RM; break;
-    case kOpOr:  opcode = is64Bit ? kX86Or64RM : kX86Or32RM; break;
-    case kOpXor: opcode = is64Bit ? kX86Xor64RM : kX86Xor32RM; break;
-    case kOpMul: opcode = is64Bit ? kX86Bkpt : kX86Imul32RM; break;
-    default:
-      LOG(FATAL) << "Bad case in OpRegMem " << op;
-      break;
-  }
-  LIR *l = NewLIR3(opcode, r_dest.GetReg(), rs_rX86_SP_32.GetReg(), displacement);
-  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
-    AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */);
-  }
-  return l;
-}
-
-LIR* X86Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1,
-                             RegStorage r_src2) {
-  bool is64Bit = r_dest.Is64Bit();
-  if (r_dest != r_src1 && r_dest != r_src2) {
-    if (op == kOpAdd) {  // lea special case, except can't encode rbp as base
-      if (r_src1 == r_src2) {
-        OpRegCopy(r_dest, r_src1);
-        return OpRegImm(kOpLsl, r_dest, 1);
-      } else if (r_src1 != rs_rBP) {
-        return NewLIR5(is64Bit ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(),
-                       r_src1.GetReg() /* base */, r_src2.GetReg() /* index */,
-                       0 /* scale */, 0 /* disp */);
-      } else {
-        return NewLIR5(is64Bit ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(),
-                       r_src2.GetReg() /* base */, r_src1.GetReg() /* index */,
-                       0 /* scale */, 0 /* disp */);
-      }
-    } else {
-      OpRegCopy(r_dest, r_src1);
-      return OpRegReg(op, r_dest, r_src2);
-    }
-  } else if (r_dest == r_src1) {
-    return OpRegReg(op, r_dest, r_src2);
-  } else {  // r_dest == r_src2
-    switch (op) {
-      case kOpSub:  // non-commutative
-        OpReg(kOpNeg, r_dest);
-        op = kOpAdd;
-        break;
-      case kOpSbc:
-      case kOpLsl: case kOpLsr: case kOpAsr: case kOpRor: {
-        RegStorage t_reg = AllocTemp();
-        OpRegCopy(t_reg, r_src1);
-        OpRegReg(op, t_reg, r_src2);
-        LIR* res = OpRegCopyNoInsert(r_dest, t_reg);
-        AppendLIR(res);
-        FreeTemp(t_reg);
-        return res;
-      }
-      case kOpAdd:  // commutative
-      case kOpOr:
-      case kOpAdc:
-      case kOpAnd:
-      case kOpXor:
-      case kOpMul:
-        break;
-      default:
-        LOG(FATAL) << "Bad case in OpRegRegReg " << op;
-    }
-    return OpRegReg(op, r_dest, r_src1);
-  }
-}
-
-LIR* X86Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src, int value) {
-  if (op == kOpMul && !cu_->target64) {
-    X86OpCode opcode = IS_SIMM8(value) ? kX86Imul32RRI8 : kX86Imul32RRI;
-    return NewLIR3(opcode, r_dest.GetReg(), r_src.GetReg(), value);
-  } else if (op == kOpAnd && !cu_->target64) {
-    if (value == 0xFF && r_src.Low4()) {
-      return NewLIR2(kX86Movzx8RR, r_dest.GetReg(), r_src.GetReg());
-    } else if (value == 0xFFFF) {
-      return NewLIR2(kX86Movzx16RR, r_dest.GetReg(), r_src.GetReg());
-    }
-  }
-  if (r_dest != r_src) {
-    if ((false) && op == kOpLsl && value >= 0 && value <= 3) {  // lea shift special case
-      // TODO: fix bug in LEA encoding when disp == 0
-      return NewLIR5(kX86Lea32RA, r_dest.GetReg(),  r5sib_no_base /* base */,
-                     r_src.GetReg() /* index */, value /* scale */, 0 /* disp */);
-    } else if (op == kOpAdd) {  // lea add special case
-      return NewLIR5(r_dest.Is64Bit() ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(),
-                     r_src.GetReg() /* base */, rs_rX86_SP_32.GetReg()/*r4sib_no_index*/ /* index */,
-                     0 /* scale */, value /* disp */);
-    }
-    OpRegCopy(r_dest, r_src);
-  }
-  return OpRegImm(op, r_dest, value);
-}
-
-LIR* X86Mir2Lir::OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) {
-  DCHECK_EQ(kX86, cu_->instruction_set);
-  X86OpCode opcode = kX86Bkpt;
-  switch (op) {
-    case kOpBlx: opcode = kX86CallT;  break;
-    case kOpBx: opcode = kX86JmpT;  break;
-    default:
-      LOG(FATAL) << "Bad opcode: " << op;
-      break;
-  }
-  return NewLIR1(opcode, thread_offset.Int32Value());
-}
-
-LIR* X86Mir2Lir::OpThreadMem(OpKind op, ThreadOffset<8> thread_offset) {
-  DCHECK_EQ(kX86_64, cu_->instruction_set);
-  X86OpCode opcode = kX86Bkpt;
-  switch (op) {
-    case kOpBlx: opcode = kX86CallT;  break;
-    case kOpBx: opcode = kX86JmpT;  break;
-    default:
-      LOG(FATAL) << "Bad opcode: " << op;
-      break;
-  }
-  return NewLIR1(opcode, thread_offset.Int32Value());
-}
-
-LIR* X86Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
-  X86OpCode opcode = kX86Bkpt;
-  switch (op) {
-    case kOpBlx: opcode = kX86CallM;  break;
-    default:
-      LOG(FATAL) << "Bad opcode: " << op;
-      break;
-  }
-  return NewLIR2(opcode, r_base.GetReg(), disp);
-}
-
-LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
-    int32_t val_lo = Low32Bits(value);
-    int32_t val_hi = High32Bits(value);
-    int32_t low_reg_val = r_dest.IsPair() ? r_dest.GetLowReg() : r_dest.GetReg();
-    LIR *res;
-    bool is_fp = r_dest.IsFloat();
-    // TODO: clean this up once we fully recognize 64-bit storage containers.
-    if (is_fp) {
-      DCHECK(r_dest.IsDouble());
-      if (value == 0) {
-        return NewLIR2(kX86XorpdRR, low_reg_val, low_reg_val);
-      } else if (pc_rel_base_reg_.Valid() || cu_->target64) {
-        // We will load the value from the literal area.
-        LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
-        if (data_target == nullptr) {
-          data_target = AddWideData(&literal_list_, val_lo, val_hi);
-        }
-
-        // Load the proper value from the literal area.
-        // We don't know the proper offset for the value, so pick one that
-        // will force 4 byte offset.  We will fix this up in the assembler
-        // later to have the right value.
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-        if (cu_->target64) {
-          res = NewLIR3(kX86MovsdRM, low_reg_val, kRIPReg, 256 /* bogus */);
-        } else {
-          // Get the PC to a register and get the anchor.
-          LIR* anchor;
-          RegStorage r_pc = GetPcAndAnchor(&anchor);
-
-          res = LoadBaseDisp(r_pc, kDummy32BitOffset, RegStorage::FloatSolo64(low_reg_val),
-                             kDouble, kNotVolatile);
-          res->operands[4] = WrapPointer(anchor);
-          if (IsTemp(r_pc)) {
-            FreeTemp(r_pc);
-          }
-        }
-        res->target = data_target;
-        res->flags.fixup = kFixupLoad;
-      } else {
-        if (r_dest.IsPair()) {
-          if (val_lo == 0) {
-            res = NewLIR2(kX86XorpsRR, low_reg_val, low_reg_val);
-          } else {
-            res = LoadConstantNoClobber(RegStorage::FloatSolo32(low_reg_val), val_lo);
-          }
-          if (val_hi != 0) {
-            RegStorage r_dest_hi = AllocTempDouble();
-            LoadConstantNoClobber(r_dest_hi, val_hi);
-            NewLIR2(kX86PunpckldqRR, low_reg_val, r_dest_hi.GetReg());
-            FreeTemp(r_dest_hi);
-          }
-        } else {
-          RegStorage r_temp = AllocTypedTempWide(false, kCoreReg);
-          res = LoadConstantWide(r_temp, value);
-          OpRegCopyWide(r_dest, r_temp);
-          FreeTemp(r_temp);
-        }
-      }
-    } else {
-      if (r_dest.IsPair()) {
-        res = LoadConstantNoClobber(r_dest.GetLow(), val_lo);
-        LoadConstantNoClobber(r_dest.GetHigh(), val_hi);
-      } else {
-        if (value == 0) {
-          res = NewLIR2(kX86Xor64RR, r_dest.GetReg(), r_dest.GetReg());
-        } else if (value >= INT_MIN && value <= INT_MAX) {
-          res = NewLIR2(kX86Mov64RI32, r_dest.GetReg(), val_lo);
-        } else {
-          res = NewLIR3(kX86Mov64RI64, r_dest.GetReg(), val_hi, val_lo);
-        }
-      }
-    }
-    return res;
-}
-
-LIR* X86Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                     int displacement, RegStorage r_dest, OpSize size) {
-  LIR *load = nullptr;
-  LIR *load2 = nullptr;
-  bool is_array = r_index.Valid();
-  bool pair = r_dest.IsPair();
-  bool is64bit = ((size == k64) || (size == kDouble));
-  X86OpCode opcode = kX86Nop;
-  switch (size) {
-    case k64:
-    case kDouble:
-      if (r_dest.IsFloat()) {
-        opcode = is_array ? kX86MovsdRA : kX86MovsdRM;
-      } else if (!pair) {
-        opcode = is_array ? kX86Mov64RA  : kX86Mov64RM;
-      } else {
-        opcode = is_array ? kX86Mov32RA  : kX86Mov32RM;
-      }
-      // TODO: double store is to unaligned address
-      DCHECK_ALIGNED(displacement, 4);
-      break;
-    case kWord:
-      if (cu_->target64) {
-        opcode = is_array ? kX86Mov64RA  : kX86Mov64RM;
-        CHECK_EQ(is_array, false);
-        CHECK_EQ(r_dest.IsFloat(), false);
-        break;
-      }
-      FALLTHROUGH_INTENDED;  // else fall-through to k32 case
-    case k32:
-    case kSingle:
-    case kReference:  // TODO: update for reference decompression on 64-bit targets.
-      opcode = is_array ? kX86Mov32RA : kX86Mov32RM;
-      if (r_dest.IsFloat()) {
-        opcode = is_array ? kX86MovssRA : kX86MovssRM;
-        DCHECK(r_dest.IsFloat());
-      }
-      DCHECK_ALIGNED(displacement, 4);
-      break;
-    case kUnsignedHalf:
-      opcode = is_array ? kX86Movzx16RA : kX86Movzx16RM;
-      DCHECK_ALIGNED(displacement, 2);
-      break;
-    case kSignedHalf:
-      opcode = is_array ? kX86Movsx16RA : kX86Movsx16RM;
-      DCHECK_ALIGNED(displacement, 2);
-      break;
-    case kUnsignedByte:
-      opcode = is_array ? kX86Movzx8RA : kX86Movzx8RM;
-      break;
-    case kSignedByte:
-      opcode = is_array ? kX86Movsx8RA : kX86Movsx8RM;
-      break;
-    default:
-      LOG(FATAL) << "Bad case in LoadBaseIndexedDispBody";
-  }
-
-  if (!is_array) {
-    if (!pair) {
-      load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
-    } else {
-      DCHECK(!r_dest.IsFloat());  // Make sure we're not still using a pair here.
-      if (r_base == r_dest.GetLow()) {
-        load = NewLIR3(opcode, r_dest.GetHighReg(), r_base.GetReg(),
-                        displacement + HIWORD_OFFSET);
-        load2 = NewLIR3(opcode, r_dest.GetLowReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
-      } else {
-        load = NewLIR3(opcode, r_dest.GetLowReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
-        load2 = NewLIR3(opcode, r_dest.GetHighReg(), r_base.GetReg(),
-                        displacement + HIWORD_OFFSET);
-      }
-    }
-    if (mem_ref_type_ == ResourceMask::kDalvikReg) {
-      DCHECK_EQ(r_base, cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32);
-      AnnotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
-                              true /* is_load */, is64bit);
-      if (pair) {
-        AnnotateDalvikRegAccess(load2, (displacement + HIWORD_OFFSET) >> 2,
-                                true /* is_load */, is64bit);
-      }
-    }
-  } else {
-    if (!pair) {
-      load = NewLIR5(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
-                     displacement + LOWORD_OFFSET);
-    } else {
-      DCHECK(!r_dest.IsFloat());  // Make sure we're not still using a pair here.
-      if (r_base == r_dest.GetLow()) {
-        if (r_dest.GetHigh() == r_index) {
-          // We can't use either register for the first load.
-          RegStorage temp = AllocTemp();
-          load = NewLIR5(opcode, temp.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
-                          displacement + HIWORD_OFFSET);
-          load2 = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
-                         displacement + LOWORD_OFFSET);
-          OpRegCopy(r_dest.GetHigh(), temp);
-          FreeTemp(temp);
-        } else {
-          load = NewLIR5(opcode, r_dest.GetHighReg(), r_base.GetReg(), r_index.GetReg(), scale,
-                          displacement + HIWORD_OFFSET);
-          load2 = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
-                         displacement + LOWORD_OFFSET);
-        }
-      } else {
-        if (r_dest.GetLow() == r_index) {
-          // We can't use either register for the first load.
-          RegStorage temp = AllocTemp();
-          load = NewLIR5(opcode, temp.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
-                         displacement + LOWORD_OFFSET);
-          load2 = NewLIR5(opcode, r_dest.GetHighReg(), r_base.GetReg(), r_index.GetReg(), scale,
-                          displacement + HIWORD_OFFSET);
-          OpRegCopy(r_dest.GetLow(), temp);
-          FreeTemp(temp);
-        } else {
-          load = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
-                         displacement + LOWORD_OFFSET);
-          load2 = NewLIR5(opcode, r_dest.GetHighReg(), r_base.GetReg(), r_index.GetReg(), scale,
-                          displacement + HIWORD_OFFSET);
-        }
-      }
-    }
-  }
-
-  // Always return first load generated as this might cause a fault if base is null.
-  return load;
-}
-
-/* Load value from base + scaled index. */
-LIR* X86Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
-                                 int scale, OpSize size) {
-  return LoadBaseIndexedDisp(r_base, r_index, scale, 0, r_dest, size);
-}
-
-LIR* X86Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
-                              OpSize size, VolatileKind is_volatile) {
-  // LoadBaseDisp() will emit correct insn for atomic load on x86
-  // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
-
-  LIR* load = LoadBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_dest,
-                                  size);
-
-  if (UNLIKELY(is_volatile == kVolatile)) {
-    GenMemBarrier(kLoadAny);  // Only a scheduling barrier.
-  }
-
-  return load;
-}
-
-LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                      int displacement, RegStorage r_src, OpSize size,
-                                      int opt_flags) {
-  LIR *store = nullptr;
-  LIR *store2 = nullptr;
-  bool is_array = r_index.Valid();
-  bool pair = r_src.IsPair();
-  bool is64bit = (size == k64) || (size == kDouble);
-  bool consider_non_temporal = false;
-
-  X86OpCode opcode = kX86Nop;
-  switch (size) {
-    case k64:
-      consider_non_temporal = true;
-      FALLTHROUGH_INTENDED;
-    case kDouble:
-      if (r_src.IsFloat()) {
-        opcode = is_array ? kX86MovsdAR : kX86MovsdMR;
-      } else if (!pair) {
-        opcode = is_array ? kX86Mov64AR  : kX86Mov64MR;
-      } else {
-        opcode = is_array ? kX86Mov32AR  : kX86Mov32MR;
-      }
-      // TODO: double store is to unaligned address
-      DCHECK_ALIGNED(displacement, 4);
-      break;
-    case kWord:
-      if (cu_->target64) {
-        opcode = is_array ? kX86Mov64AR  : kX86Mov64MR;
-        CHECK_EQ(is_array, false);
-        CHECK_EQ(r_src.IsFloat(), false);
-        consider_non_temporal = true;
-        break;
-      }
-      FALLTHROUGH_INTENDED;  // else fall-through to k32 case
-    case k32:
-    case kSingle:
-    case kReference:
-      opcode = is_array ? kX86Mov32AR : kX86Mov32MR;
-      if (r_src.IsFloat()) {
-        opcode = is_array ? kX86MovssAR : kX86MovssMR;
-        DCHECK(r_src.IsSingle());
-      }
-      DCHECK_ALIGNED(displacement, 4);
-      consider_non_temporal = true;
-      break;
-    case kUnsignedHalf:
-    case kSignedHalf:
-      opcode = is_array ? kX86Mov16AR : kX86Mov16MR;
-      DCHECK_ALIGNED(displacement, 2);
-      break;
-    case kUnsignedByte:
-    case kSignedByte:
-      opcode = is_array ? kX86Mov8AR : kX86Mov8MR;
-      break;
-    default:
-      LOG(FATAL) << "Bad case in StoreBaseIndexedDispBody";
-  }
-
-  // Handle non temporal hint here.
-  if (consider_non_temporal && ((opt_flags & MIR_STORE_NON_TEMPORAL) != 0)) {
-    switch (opcode) {
-      // We currently only handle 32/64 bit moves here.
-      case kX86Mov64AR:
-        opcode = kX86Movnti64AR;
-        break;
-      case kX86Mov64MR:
-        opcode = kX86Movnti64MR;
-        break;
-      case kX86Mov32AR:
-        opcode = kX86Movnti32AR;
-        break;
-      case kX86Mov32MR:
-        opcode = kX86Movnti32MR;
-        break;
-      default:
-        // Do nothing here.
-        break;
-    }
-  }
-
-  if (!is_array) {
-    if (!pair) {
-      store = NewLIR3(opcode, r_base.GetReg(), displacement + LOWORD_OFFSET, r_src.GetReg());
-    } else {
-      DCHECK(!r_src.IsFloat());  // Make sure we're not still using a pair here.
-      store = NewLIR3(opcode, r_base.GetReg(), displacement + LOWORD_OFFSET, r_src.GetLowReg());
-      store2 = NewLIR3(opcode, r_base.GetReg(), displacement + HIWORD_OFFSET, r_src.GetHighReg());
-    }
-    if (mem_ref_type_ == ResourceMask::kDalvikReg) {
-      DCHECK_EQ(r_base, cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32);
-      AnnotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
-                              false /* is_load */, is64bit);
-      if (pair) {
-        AnnotateDalvikRegAccess(store2, (displacement + HIWORD_OFFSET) >> 2,
-                                false /* is_load */, is64bit);
-      }
-    }
-  } else {
-    if (!pair) {
-      store = NewLIR5(opcode, r_base.GetReg(), r_index.GetReg(), scale,
-                      displacement + LOWORD_OFFSET, r_src.GetReg());
-    } else {
-      DCHECK(!r_src.IsFloat());  // Make sure we're not still using a pair here.
-      store = NewLIR5(opcode, r_base.GetReg(), r_index.GetReg(), scale,
-                      displacement + LOWORD_OFFSET, r_src.GetLowReg());
-      store2 = NewLIR5(opcode, r_base.GetReg(), r_index.GetReg(), scale,
-                       displacement + HIWORD_OFFSET, r_src.GetHighReg());
-    }
-  }
-  return store;
-}
-
-/* store value base base + scaled index. */
-LIR* X86Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
-                                  int scale, OpSize size) {
-  return StoreBaseIndexedDisp(r_base, r_index, scale, 0, r_src, size);
-}
-
-LIR* X86Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size,
-                               VolatileKind is_volatile) {
-  if (UNLIKELY(is_volatile == kVolatile)) {
-    GenMemBarrier(kAnyStore);  // Only a scheduling barrier.
-  }
-
-  // StoreBaseDisp() will emit correct insn for atomic store on x86
-  // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
-  // x86 only allows registers EAX-EDX to be used as byte registers, if the input src is not
-  // valid, allocate a temp.
-  bool allocated_temp = false;
-  if (size == kUnsignedByte || size == kSignedByte) {
-    if (!cu_->target64 && !r_src.Low4()) {
-      RegStorage r_input = r_src;
-      r_src = AllocateByteRegister();
-      OpRegCopy(r_src, r_input);
-      allocated_temp = true;
-    }
-  }
-
-  LIR* store = StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_src, size);
-
-  if (UNLIKELY(is_volatile == kVolatile)) {
-    // A volatile load might follow the volatile store so insert a StoreLoad barrier.
-    // This does require a fence, even on x86.
-    GenMemBarrier(kAnyAny);
-  }
-
-  if (allocated_temp) {
-    FreeTemp(r_src);
-  }
-
-  return store;
-}
-
-LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond,
-                                   // Comparison performed directly with memory.
-                                   RegStorage temp_reg ATTRIBUTE_UNUSED,
-                                   RegStorage base_reg,
-                                   int offset,
-                                   int check_value,
-                                   LIR* target,
-                                   LIR** compare) {
-  LIR* inst = NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base_reg.GetReg(),
-      offset, check_value);
-  if (compare != nullptr) {
-    *compare = inst;
-  }
-  LIR* branch = OpCondBranch(cond, target);
-  return branch;
-}
-
-void X86Mir2Lir::AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) {
-  if (cu_->target64) {
-    Mir2Lir::AnalyzeMIR(core_counts, mir, weight);
-    return;
-  }
-
-  int opcode = mir->dalvikInsn.opcode;
-  bool uses_pc_rel_load = false;
-  switch (opcode) {
-    // Instructions referencing doubles.
-    case Instruction::CMPL_DOUBLE:
-    case Instruction::CMPG_DOUBLE:
-    case Instruction::NEG_DOUBLE:
-    case Instruction::ADD_DOUBLE:
-    case Instruction::SUB_DOUBLE:
-    case Instruction::MUL_DOUBLE:
-    case Instruction::DIV_DOUBLE:
-    case Instruction::REM_DOUBLE:
-    case Instruction::ADD_DOUBLE_2ADDR:
-    case Instruction::SUB_DOUBLE_2ADDR:
-    case Instruction::MUL_DOUBLE_2ADDR:
-    case Instruction::DIV_DOUBLE_2ADDR:
-    case Instruction::REM_DOUBLE_2ADDR:
-    case kMirOpFusedCmplDouble:
-    case kMirOpFusedCmpgDouble:
-      uses_pc_rel_load = AnalyzeFPInstruction(opcode, mir);
-      break;
-
-    // Packed switch needs the PC-relative pointer if it's large.
-    case Instruction::PACKED_SWITCH:
-      if (mir_graph_->GetTable(mir, mir->dalvikInsn.vB)[1] > kSmallSwitchThreshold) {
-        uses_pc_rel_load = true;
-      }
-      break;
-
-    case kMirOpConstVector:
-      uses_pc_rel_load = true;
-      break;
-    case kMirOpPackedMultiply:
-    case kMirOpPackedShiftLeft:
-    case kMirOpPackedSignedShiftRight:
-    case kMirOpPackedUnsignedShiftRight:
-      {
-        // Byte emulation requires constants from the literal pool.
-        OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
-        if (opsize == kSignedByte || opsize == kUnsignedByte) {
-          uses_pc_rel_load = true;
-        }
-      }
-      break;
-
-    case Instruction::INVOKE_STATIC:
-    case Instruction::INVOKE_STATIC_RANGE:
-      if (mir_graph_->GetMethodLoweringInfo(mir).IsIntrinsic()) {
-        uses_pc_rel_load = AnalyzeInvokeStaticIntrinsic(mir);
-        break;
-      }
-      FALLTHROUGH_INTENDED;
-    default:
-      Mir2Lir::AnalyzeMIR(core_counts, mir, weight);
-      break;
-  }
-
-  if (uses_pc_rel_load) {
-    DCHECK(pc_rel_temp_ != nullptr);
-    core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count += weight;
-  }
-}
-
-bool X86Mir2Lir::AnalyzeFPInstruction(int opcode, MIR* mir) {
-  DCHECK(!cu_->target64);
-  // Look at all the uses, and see if they are double constants.
-  uint64_t attrs = MIRGraph::GetDataFlowAttributes(static_cast<Instruction::Code>(opcode));
-  int next_sreg = 0;
-  if (attrs & DF_UA) {
-    if (attrs & DF_A_WIDE) {
-      if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) {
-        return true;
-      }
-      next_sreg += 2;
-    } else {
-      next_sreg++;
-    }
-  }
-  if (attrs & DF_UB) {
-    if (attrs & DF_B_WIDE) {
-      if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) {
-        return true;
-      }
-      next_sreg += 2;
-    } else {
-      next_sreg++;
-    }
-  }
-  if (attrs & DF_UC) {
-    if (attrs & DF_C_WIDE) {
-      if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-inline bool X86Mir2Lir::AnalyzeDoubleUse(RegLocation use) {
-  // If this is a double literal, we will want it in the literal pool on 32b platforms.
-  DCHECK(!cu_->target64);
-  return use.is_const;
-}
-
-bool X86Mir2Lir::AnalyzeInvokeStaticIntrinsic(MIR* mir) {
-  // 64 bit RIP addressing doesn't need this analysis.
-  DCHECK(!cu_->target64);
-
-  // Retrieve the type of the intrinsic.
-  MethodReference method_ref = mir_graph_->GetMethodLoweringInfo(mir).GetTargetMethod();
-  DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
-  DexFileMethodInliner* method_inliner =
-    cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(method_ref.dex_file);
-  InlineMethod method;
-  bool is_intrinsic = method_inliner->IsIntrinsic(method_ref.dex_method_index, &method);
-  DCHECK(is_intrinsic);
-
-  switch (method.opcode) {
-    case kIntrinsicAbsDouble:
-    case kIntrinsicMinMaxDouble:
-      return true;
-    default:
-      return false;
-  }
-}
-
-RegLocation X86Mir2Lir::UpdateLocTyped(RegLocation loc) {
-  loc = UpdateLoc(loc);
-  if ((loc.location == kLocPhysReg) && (loc.fp != loc.reg.IsFloat())) {
-    if (GetRegInfo(loc.reg)->IsTemp()) {
-      Clobber(loc.reg);
-      FreeTemp(loc.reg);
-      loc.reg = RegStorage::InvalidReg();
-      loc.location = kLocDalvikFrame;
-    }
-  }
-  DCHECK(CheckCorePoolSanity());
-  return loc;
-}
-
-RegLocation X86Mir2Lir::UpdateLocWideTyped(RegLocation loc) {
-  loc = UpdateLocWide(loc);
-  if ((loc.location == kLocPhysReg) && (loc.fp != loc.reg.IsFloat())) {
-    if (GetRegInfo(loc.reg)->IsTemp()) {
-      Clobber(loc.reg);
-      FreeTemp(loc.reg);
-      loc.reg = RegStorage::InvalidReg();
-      loc.location = kLocDalvikFrame;
-    }
-  }
-  DCHECK(CheckCorePoolSanity());
-  return loc;
-}
-
-LIR* X86Mir2Lir::InvokeTrampoline(OpKind op,
-                                  // Call to absolute memory location doesn't
-                                  // need a temporary target register.
-                                  RegStorage r_tgt ATTRIBUTE_UNUSED,
-                                  QuickEntrypointEnum trampoline) {
-  if (cu_->target64) {
-    return OpThreadMem(op, GetThreadOffset<8>(trampoline));
-  } else {
-    return OpThreadMem(op, GetThreadOffset<4>(trampoline));
-  }
-}
-
-void X86Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
-  // Start with the default counts.
-  Mir2Lir::CountRefs(core_counts, fp_counts, num_regs);
-
-  if (pc_rel_temp_ != nullptr) {
-    // Now, if the dex cache array base temp is used only once outside any loops (weight = 1),
-    // avoid the promotion, otherwise boost the weight by factor 2 because the full PC-relative
-    // load sequence is 3 instructions long and by promoting the PC base we save 2 instructions
-    // per use.
-    int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low);
-    if (core_counts[p_map_idx].count == 1) {
-      core_counts[p_map_idx].count = 0;
-    } else {
-      core_counts[p_map_idx].count *= 2;
-    }
-  }
-}
-
-void X86Mir2Lir::DoPromotion() {
-  if (!cu_->target64) {
-    pc_rel_temp_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false);
-  }
-
-  Mir2Lir::DoPromotion();
-
-  if (pc_rel_temp_ != nullptr) {
-    // Now, if the dex cache array base temp is promoted, remember the register but
-    // always remove the temp's stack location to avoid unnecessarily bloating the stack.
-    pc_rel_base_reg_ = mir_graph_->reg_location_[pc_rel_temp_->s_reg_low].reg;
-    DCHECK(!pc_rel_base_reg_.Valid() || !pc_rel_base_reg_.IsFloat());
-    mir_graph_->RemoveLastCompilerTemp(kCompilerTempBackend, false, pc_rel_temp_);
-    pc_rel_temp_ = nullptr;
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
deleted file mode 100644
index d6a6a60..0000000
--- a/compiler/dex/quick/x86/x86_lir.h
+++ /dev/null
@@ -1,738 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_
-#define ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_
-
-#include "dex/reg_location.h"
-#include "dex/reg_storage.h"
-
-namespace art {
-
-/*
- * Runtime register conventions. We consider both x86, x86-64 and x32 (32bit mode x86-64). The ABI
- * has different conventions and we capture those here. Changing something that is callee save and
- * making it caller save places a burden on up-calls to save/restore the callee save register,
- * however, there are few registers that are callee save in the ABI. Changing something that is
- * caller save and making it callee save places a burden on down-calls to save/restore the callee
- * save register. For these reasons we aim to match native conventions for caller and callee save.
- * On x86 only the first 4 registers can be used for byte operations, for this reason they are
- * preferred for temporary scratch registers.
- *
- * General Purpose Register:
- *  Native: x86    | x86-64 / x32 | ART x86                                         | ART x86-64
- *  r0/eax: caller | caller       | caller, Method*, scratch, return value          | caller, scratch, return value
- *  r1/ecx: caller | caller, arg4 | caller, arg1, scratch                           | caller, arg3, scratch
- *  r2/edx: caller | caller, arg3 | caller, arg2, scratch, high half of long return | caller, arg2, scratch
- *  r3/ebx: callEE | callEE       | callER, arg3, scratch                           | callee, promotable
- *  r4/esp: stack pointer
- *  r5/ebp: callee | callee       | callee, promotable                              | callee, promotable
- *  r6/esi: callEE | callER, arg2 | callee, promotable                              | caller, arg1, scratch
- *  r7/edi: callEE | callER, arg1 | callee, promotable                              | caller, Method*, scratch
- *  ---  x86-64/x32 registers
- *  Native: x86-64 / x32      | ART
- *  r8:     caller save, arg5 | caller, arg4, scratch
- *  r9:     caller save, arg6 | caller, arg5, scratch
- *  r10:    caller save       | caller, scratch
- *  r11:    caller save       | caller, scratch
- *  r12:    callee save       | callee, available for register promotion (promotable)
- *  r13:    callee save       | callee, available for register promotion (promotable)
- *  r14:    callee save       | callee, available for register promotion (promotable)
- *  r15:    callee save       | callee, available for register promotion (promotable)
- *
- * There is no rSELF, instead on x86 fs: has a base address of Thread::Current, whereas on
- * x86-64/x32 gs: holds it.
- *
- * For floating point we don't support CPUs without SSE2 support (ie newer than PIII):
- *  Native: x86  | x86-64 / x32 | ART x86                          | ART x86-64
- *  XMM0: caller | caller, arg1 | caller, arg1, float return value | caller, arg1, float return value
- *  XMM1: caller | caller, arg2 | caller, arg2, scratch            | caller, arg2, scratch
- *  XMM2: caller | caller, arg3 | caller, arg3, scratch            | caller, arg3, scratch
- *  XMM3: caller | caller, arg4 | caller, arg4, scratch            | caller, arg4, scratch
- *  XMM4: caller | caller, arg5 | caller, scratch                  | caller, arg5, scratch
- *  XMM5: caller | caller, arg6 | caller, scratch                  | caller, arg6, scratch
- *  XMM6: caller | caller, arg7 | caller, scratch                  | caller, arg7, scratch
- *  XMM7: caller | caller, arg8 | caller, scratch                  | caller, arg8, scratch
- *  ---  x86-64/x32 registers
- *  XMM8 .. 11: caller save available as scratch registers for ART.
- *  XMM12 .. 15: callee save available as promoted registers for ART.
- *  This change (XMM12..15) is for QCG only, for others they are caller save.
- *
- * X87 is a necessary evil outside of ART code for x86:
- *  ST0:  x86 float/double native return value, caller save
- *  ST1 .. ST7: caller save
- *
- *  Stack frame diagram (stack grows down, higher addresses at top):
- *  For a more detailed view of each region see stack.h.
- *
- * +---------------------------+
- * | IN[ins-1]                 |  {Note: resides in caller's frame}
- * |       .                   |
- * | IN[0]                     |
- * | caller's ArtMethod*       |
- * +===========================+  {Note: start of callee's frame}
- * | return address            |  {pushed by call}
- * | spill region              |  {variable sized}
- * +---------------------------+
- * | ...filler 4-bytes...      |  {Note: used as 2nd word of V[locals-1] if long]
- * +---------------------------+
- * | V[locals-1]               |
- * | V[locals-2]               |
- * |      .                    |
- * |      .                    |
- * | V[1]                      |
- * | V[0]                      |
- * +---------------------------+
- * | 0 to 12-bytes padding     |
- * +---------------------------+
- * | compiler temp region      |
- * +---------------------------+
- * | OUT[outs-1]               |
- * | OUT[outs-2]               |
- * |       .                   |
- * | OUT[0]                    |
- * | ArtMethod*                | <<== sp w/ 16-byte alignment
- * +===========================+
- */
-
-enum X86ResourceEncodingPos {
-  kX86GPReg0   = 0,
-  kX86RegSP    = 4,
-  kX86FPReg0   = 16,  // xmm0 .. xmm7/xmm15.
-  kX86FPRegEnd = 32,
-  kX86FPStack  = 33,
-  kX86RegEnd   = kX86FPStack,
-};
-
-// FIXME: for 64-bit, perhaps add an X86_64NativeRegisterPool enum?
-enum X86NativeRegisterPool {
-  r0             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0,
-  r0q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 0,
-  rAX            = r0,
-  r1             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 1,
-  r1q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 1,
-  rCX            = r1,
-  r2             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 2,
-  r2q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 2,
-  rDX            = r2,
-  r3             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 3,
-  r3q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 3,
-  rBX            = r3,
-  r4sp_32        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4,
-  rX86_SP_32     = r4sp_32,
-  r4sp_64        = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 4,
-  rX86_SP_64     = r4sp_64,
-  r5             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 5,
-  r5q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 5,
-  rBP            = r5,
-  r5sib_no_base  = r5,
-  r6             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 6,
-  r6q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 6,
-  rSI            = r6,
-  r7             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 7,
-  r7q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 7,
-  rDI            = r7,
-  r8             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8,
-  r8q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 8,
-  r9             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 9,
-  r9q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 9,
-  r10            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 10,
-  r10q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 10,
-  r11            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 11,
-  r11q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 11,
-  r12            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 12,
-  r12q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 12,
-  r13            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 13,
-  r13q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 13,
-  r14            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 14,
-  r14q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 14,
-  r15            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 15,
-  r15q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 15,
-  // fake return address register for core spill mask.
-  rRET           = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 16,
-
-  // xmm registers, single precision view.
-  fr0  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 0,
-  fr1  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 1,
-  fr2  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 2,
-  fr3  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 3,
-  fr4  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 4,
-  fr5  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 5,
-  fr6  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6,
-  fr7  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7,
-  fr8  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 8,
-  fr9  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 9,
-  fr10 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10,
-  fr11 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 11,
-  fr12 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 12,
-  fr13 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13,
-  fr14 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14,
-  fr15 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15,
-
-  // xmm registers, double precision aliases.
-  dr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0,
-  dr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 1,
-  dr2  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 2,
-  dr3  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 3,
-  dr4  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 4,
-  dr5  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 5,
-  dr6  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6,
-  dr7  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7,
-  dr8  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 8,
-  dr9  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 9,
-  dr10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
-  dr11 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11,
-  dr12 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
-  dr13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
-  dr14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
-  dr15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
-
-  // xmm registers, quad precision aliases
-  xr0  = RegStorage::k128BitSolo | 0,
-  xr1  = RegStorage::k128BitSolo | 1,
-  xr2  = RegStorage::k128BitSolo | 2,
-  xr3  = RegStorage::k128BitSolo | 3,
-  xr4  = RegStorage::k128BitSolo | 4,
-  xr5  = RegStorage::k128BitSolo | 5,
-  xr6  = RegStorage::k128BitSolo | 6,
-  xr7  = RegStorage::k128BitSolo | 7,
-  xr8  = RegStorage::k128BitSolo | 8,
-  xr9  = RegStorage::k128BitSolo | 9,
-  xr10 = RegStorage::k128BitSolo | 10,
-  xr11 = RegStorage::k128BitSolo | 11,
-  xr12 = RegStorage::k128BitSolo | 12,
-  xr13 = RegStorage::k128BitSolo | 13,
-  xr14 = RegStorage::k128BitSolo | 14,
-  xr15 = RegStorage::k128BitSolo | 15,
-
-  // Special value for RIP 64 bit addressing.
-  kRIPReg = 255,
-
-  // TODO: as needed, add 256, 512 and 1024-bit xmm views.
-};
-
-constexpr RegStorage rs_r0(RegStorage::kValid | r0);
-constexpr RegStorage rs_r0q(RegStorage::kValid | r0q);
-constexpr RegStorage rs_rAX = rs_r0;
-constexpr RegStorage rs_r1(RegStorage::kValid | r1);
-constexpr RegStorage rs_r1q(RegStorage::kValid | r1q);
-constexpr RegStorage rs_rCX = rs_r1;
-constexpr RegStorage rs_r2(RegStorage::kValid | r2);
-constexpr RegStorage rs_r2q(RegStorage::kValid | r2q);
-constexpr RegStorage rs_rDX = rs_r2;
-constexpr RegStorage rs_r3(RegStorage::kValid | r3);
-constexpr RegStorage rs_r3q(RegStorage::kValid | r3q);
-constexpr RegStorage rs_rBX = rs_r3;
-constexpr RegStorage rs_rX86_SP_64(RegStorage::kValid | r4sp_64);
-constexpr RegStorage rs_rX86_SP_32(RegStorage::kValid | r4sp_32);
-static_assert(rs_rX86_SP_64.GetRegNum() == rs_rX86_SP_32.GetRegNum(), "Unexpected mismatch");
-constexpr RegStorage rs_r5(RegStorage::kValid | r5);
-constexpr RegStorage rs_r5q(RegStorage::kValid | r5q);
-constexpr RegStorage rs_rBP = rs_r5;
-constexpr RegStorage rs_r6(RegStorage::kValid | r6);
-constexpr RegStorage rs_r6q(RegStorage::kValid | r6q);
-constexpr RegStorage rs_rSI = rs_r6;
-constexpr RegStorage rs_r7(RegStorage::kValid | r7);
-constexpr RegStorage rs_r7q(RegStorage::kValid | r7q);
-constexpr RegStorage rs_rDI = rs_r7;
-constexpr RegStorage rs_rRET(RegStorage::kValid | rRET);
-constexpr RegStorage rs_r8(RegStorage::kValid | r8);
-constexpr RegStorage rs_r8q(RegStorage::kValid | r8q);
-constexpr RegStorage rs_r9(RegStorage::kValid | r9);
-constexpr RegStorage rs_r9q(RegStorage::kValid | r9q);
-constexpr RegStorage rs_r10(RegStorage::kValid | r10);
-constexpr RegStorage rs_r10q(RegStorage::kValid | r10q);
-constexpr RegStorage rs_r11(RegStorage::kValid | r11);
-constexpr RegStorage rs_r11q(RegStorage::kValid | r11q);
-constexpr RegStorage rs_r12(RegStorage::kValid | r12);
-constexpr RegStorage rs_r12q(RegStorage::kValid | r12q);
-constexpr RegStorage rs_r13(RegStorage::kValid | r13);
-constexpr RegStorage rs_r13q(RegStorage::kValid | r13q);
-constexpr RegStorage rs_r14(RegStorage::kValid | r14);
-constexpr RegStorage rs_r14q(RegStorage::kValid | r14q);
-constexpr RegStorage rs_r15(RegStorage::kValid | r15);
-constexpr RegStorage rs_r15q(RegStorage::kValid | r15q);
-
-constexpr RegStorage rs_fr0(RegStorage::kValid | fr0);
-constexpr RegStorage rs_fr1(RegStorage::kValid | fr1);
-constexpr RegStorage rs_fr2(RegStorage::kValid | fr2);
-constexpr RegStorage rs_fr3(RegStorage::kValid | fr3);
-constexpr RegStorage rs_fr4(RegStorage::kValid | fr4);
-constexpr RegStorage rs_fr5(RegStorage::kValid | fr5);
-constexpr RegStorage rs_fr6(RegStorage::kValid | fr6);
-constexpr RegStorage rs_fr7(RegStorage::kValid | fr7);
-constexpr RegStorage rs_fr8(RegStorage::kValid | fr8);
-constexpr RegStorage rs_fr9(RegStorage::kValid | fr9);
-constexpr RegStorage rs_fr10(RegStorage::kValid | fr10);
-constexpr RegStorage rs_fr11(RegStorage::kValid | fr11);
-constexpr RegStorage rs_fr12(RegStorage::kValid | fr12);
-constexpr RegStorage rs_fr13(RegStorage::kValid | fr13);
-constexpr RegStorage rs_fr14(RegStorage::kValid | fr14);
-constexpr RegStorage rs_fr15(RegStorage::kValid | fr15);
-
-constexpr RegStorage rs_dr0(RegStorage::kValid | dr0);
-constexpr RegStorage rs_dr1(RegStorage::kValid | dr1);
-constexpr RegStorage rs_dr2(RegStorage::kValid | dr2);
-constexpr RegStorage rs_dr3(RegStorage::kValid | dr3);
-constexpr RegStorage rs_dr4(RegStorage::kValid | dr4);
-constexpr RegStorage rs_dr5(RegStorage::kValid | dr5);
-constexpr RegStorage rs_dr6(RegStorage::kValid | dr6);
-constexpr RegStorage rs_dr7(RegStorage::kValid | dr7);
-constexpr RegStorage rs_dr8(RegStorage::kValid | dr8);
-constexpr RegStorage rs_dr9(RegStorage::kValid | dr9);
-constexpr RegStorage rs_dr10(RegStorage::kValid | dr10);
-constexpr RegStorage rs_dr11(RegStorage::kValid | dr11);
-constexpr RegStorage rs_dr12(RegStorage::kValid | dr12);
-constexpr RegStorage rs_dr13(RegStorage::kValid | dr13);
-constexpr RegStorage rs_dr14(RegStorage::kValid | dr14);
-constexpr RegStorage rs_dr15(RegStorage::kValid | dr15);
-
-constexpr RegStorage rs_xr0(RegStorage::kValid | xr0);
-constexpr RegStorage rs_xr1(RegStorage::kValid | xr1);
-constexpr RegStorage rs_xr2(RegStorage::kValid | xr2);
-constexpr RegStorage rs_xr3(RegStorage::kValid | xr3);
-constexpr RegStorage rs_xr4(RegStorage::kValid | xr4);
-constexpr RegStorage rs_xr5(RegStorage::kValid | xr5);
-constexpr RegStorage rs_xr6(RegStorage::kValid | xr6);
-constexpr RegStorage rs_xr7(RegStorage::kValid | xr7);
-constexpr RegStorage rs_xr8(RegStorage::kValid | xr8);
-constexpr RegStorage rs_xr9(RegStorage::kValid | xr9);
-constexpr RegStorage rs_xr10(RegStorage::kValid | xr10);
-constexpr RegStorage rs_xr11(RegStorage::kValid | xr11);
-constexpr RegStorage rs_xr12(RegStorage::kValid | xr12);
-constexpr RegStorage rs_xr13(RegStorage::kValid | xr13);
-constexpr RegStorage rs_xr14(RegStorage::kValid | xr14);
-constexpr RegStorage rs_xr15(RegStorage::kValid | xr15);
-
-constexpr RegStorage rs_rX86_RET0 = rs_rAX;
-constexpr RegStorage rs_rX86_RET1 = rs_rDX;
-
-// RegisterLocation templates return values (r_V0, or r_V0/r_V1).
-const RegLocation x86_loc_c_return
-    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1,
-     RegStorage(RegStorage::k32BitSolo, rAX), INVALID_SREG, INVALID_SREG};
-const RegLocation x86_loc_c_return_wide
-    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
-     RegStorage(RegStorage::k64BitPair, rAX, rDX), INVALID_SREG, INVALID_SREG};
-const RegLocation x86_loc_c_return_ref
-    {kLocPhysReg, 0, 0, 0, 0, 0, 1, 0, 1,
-     RegStorage(RegStorage::k32BitSolo, rAX), INVALID_SREG, INVALID_SREG};
-const RegLocation x86_64_loc_c_return_ref
-    {kLocPhysReg, 0, 0, 0, 0, 0, 1, 0, 1,
-     RegStorage(RegStorage::k64BitSolo, rAX), INVALID_SREG, INVALID_SREG};
-const RegLocation x86_64_loc_c_return_wide
-    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
-     RegStorage(RegStorage::k64BitSolo, rAX), INVALID_SREG, INVALID_SREG};
-const RegLocation x86_loc_c_return_float
-    {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1,
-     RegStorage(RegStorage::k32BitSolo, fr0), INVALID_SREG, INVALID_SREG};
-const RegLocation x86_loc_c_return_double
-    {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1,
-     RegStorage(RegStorage::k64BitSolo, dr0), INVALID_SREG, INVALID_SREG};
-
-/*
- * The following enum defines the list of supported X86 instructions by the
- * assembler. Their corresponding EncodingMap positions will be defined in
- * Assemble.cc.
- */
-enum X86OpCode {
-  kX86First = 0,
-  kX8632BitData = kX86First,  // data [31..0].
-  kX86Bkpt,
-  kX86Nop,
-  // Define groups of binary operations
-  // MR - Memory Register  - opcode [base + disp], reg
-  //             - lir operands - 0: base, 1: disp, 2: reg
-  // AR - Array Register   - opcode [base + index * scale + disp], reg
-  //             - lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
-  // TR - Thread Register  - opcode fs:[disp], reg - where fs: is equal to Thread::Current()
-  //             - lir operands - 0: disp, 1: reg
-  // RR - Register Register  - opcode reg1, reg2
-  //             - lir operands - 0: reg1, 1: reg2
-  // RM - Register Memory  - opcode reg, [base + disp]
-  //             - lir operands - 0: reg, 1: base, 2: disp
-  // RA - Register Array   - opcode reg, [base + index * scale + disp]
-  //             - lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
-  // RT - Register Thread  - opcode reg, fs:[disp] - where fs: is equal to Thread::Current()
-  //             - lir operands - 0: reg, 1: disp
-  // RI - Register Immediate - opcode reg, #immediate
-  //             - lir operands - 0: reg, 1: immediate
-  // MI - Memory Immediate   - opcode [base + disp], #immediate
-  //             - lir operands - 0: base, 1: disp, 2: immediate
-  // AI - Array Immediate  - opcode [base + index * scale + disp], #immediate
-  //             - lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
-  // TI - Thread Immediate  - opcode fs:[disp], imm - where fs: is equal to Thread::Current()
-  //             - lir operands - 0: disp, 1: imm
-#define BinaryOpCode(opcode) \
-  opcode ## 8MR, opcode ## 8AR, opcode ## 8TR, \
-  opcode ## 8RR, opcode ## 8RM, opcode ## 8RA, opcode ## 8RT, \
-  opcode ## 8RI, opcode ## 8MI, opcode ## 8AI, opcode ## 8TI, \
-  opcode ## 16MR, opcode ## 16AR, opcode ## 16TR, \
-  opcode ## 16RR, opcode ## 16RM, opcode ## 16RA, opcode ## 16RT, \
-  opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, opcode ## 16TI, \
-  opcode ## 16RI8, opcode ## 16MI8, opcode ## 16AI8, opcode ## 16TI8, \
-  opcode ## 32MR, opcode ## 32AR, opcode ## 32TR,  \
-  opcode ## 32RR, opcode ## 32RM, opcode ## 32RA, opcode ## 32RT, \
-  opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \
-  opcode ## 32RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8, \
-  opcode ## 64MR, opcode ## 64AR, opcode ## 64TR,  \
-  opcode ## 64RR, opcode ## 64RM, opcode ## 64RA, opcode ## 64RT, \
-  opcode ## 64RI, opcode ## 64MI, opcode ## 64AI, opcode ## 64TI, \
-  opcode ## 64RI8, opcode ## 64MI8, opcode ## 64AI8, opcode ## 64TI8
-  BinaryOpCode(kX86Add),
-  BinaryOpCode(kX86Or),
-  BinaryOpCode(kX86Adc),
-  BinaryOpCode(kX86Sbb),
-  BinaryOpCode(kX86And),
-  BinaryOpCode(kX86Sub),
-  BinaryOpCode(kX86Xor),
-  BinaryOpCode(kX86Cmp),
-#undef BinaryOpCode
-  kX86Imul16RRI, kX86Imul16RMI, kX86Imul16RAI,
-  kX86Imul32RRI, kX86Imul32RMI, kX86Imul32RAI,
-  kX86Imul32RRI8, kX86Imul32RMI8, kX86Imul32RAI8,
-  kX86Imul64RRI, kX86Imul64RMI, kX86Imul64RAI,
-  kX86Imul64RRI8, kX86Imul64RMI8, kX86Imul64RAI8,
-  kX86Mov8MR, kX86Mov8AR, kX86Mov8TR,
-  kX86Mov8RR, kX86Mov8RM, kX86Mov8RA, kX86Mov8RT,
-  kX86Mov8RI, kX86Mov8MI, kX86Mov8AI, kX86Mov8TI,
-  kX86Mov16MR, kX86Mov16AR, kX86Mov16TR,
-  kX86Mov16RR, kX86Mov16RM, kX86Mov16RA, kX86Mov16RT,
-  kX86Mov16RI, kX86Mov16MI, kX86Mov16AI, kX86Mov16TI,
-  kX86Mov32MR, kX86Mov32AR, kX86Movnti32MR, kX86Movnti32AR, kX86Mov32TR,
-  kX86Mov32RR, kX86Mov32RM, kX86Mov32RA, kX86Mov32RT,
-  kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI,
-  kX86Lea32RM,
-  kX86Lea32RA,
-  kX86Mov64MR, kX86Mov64AR, kX86Movnti64MR, kX86Movnti64AR, kX86Mov64TR,
-  kX86Mov64RR, kX86Mov64RM, kX86Mov64RA, kX86Mov64RT,
-  kX86Mov64RI32, kX86Mov64RI64, kX86Mov64MI, kX86Mov64AI, kX86Mov64TI,
-  kX86Lea64RM,
-  kX86Lea64RA,
-  // RRC - Register Register ConditionCode - cond_opcode reg1, reg2
-  //             - lir operands - 0: reg1, 1: reg2, 2: CC
-  kX86Cmov32RRC,
-  kX86Cmov64RRC,
-  // RMC - Register Memory ConditionCode - cond_opcode reg1, [base + disp]
-  //             - lir operands - 0: reg1, 1: base, 2: disp 3: CC
-  kX86Cmov32RMC,
-  kX86Cmov64RMC,
-
-  // RC - Register CL - opcode reg, CL
-  //          - lir operands - 0: reg, 1: CL
-  // MC - Memory CL   - opcode [base + disp], CL
-  //          - lir operands - 0: base, 1: disp, 2: CL
-  // AC - Array CL  - opcode [base + index * scale + disp], CL
-  //          - lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: CL
-#define BinaryShiftOpCode(opcode) \
-  opcode ## 8RI, opcode ## 8MI, opcode ## 8AI, \
-  opcode ## 8RC, opcode ## 8MC, opcode ## 8AC, \
-  opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, \
-  opcode ## 16RC, opcode ## 16MC, opcode ## 16AC, \
-  opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, \
-  opcode ## 32RC, opcode ## 32MC, opcode ## 32AC, \
-  opcode ## 64RI, opcode ## 64MI, opcode ## 64AI, \
-  opcode ## 64RC, opcode ## 64MC, opcode ## 64AC
-  BinaryShiftOpCode(kX86Rol),
-  BinaryShiftOpCode(kX86Ror),
-  BinaryShiftOpCode(kX86Rcl),
-  BinaryShiftOpCode(kX86Rcr),
-  BinaryShiftOpCode(kX86Sal),
-  BinaryShiftOpCode(kX86Shr),
-  BinaryShiftOpCode(kX86Sar),
-#undef BinaryShiftOpcode
-  kX86Cmc,
-  kX86Shld32RRI,
-  kX86Shld32RRC,
-  kX86Shld32MRI,
-  kX86Shrd32RRI,
-  kX86Shrd32RRC,
-  kX86Shrd32MRI,
-  kX86Shld64RRI,
-  kX86Shld64MRI,
-  kX86Shrd64RRI,
-  kX86Shrd64MRI,
-#define UnaryOpcode(opcode, reg, mem, array) \
-  opcode ## 8 ## reg, opcode ## 8 ## mem, opcode ## 8 ## array, \
-  opcode ## 16 ## reg, opcode ## 16 ## mem, opcode ## 16 ## array, \
-  opcode ## 32 ## reg, opcode ## 32 ## mem, opcode ## 32 ## array, \
-  opcode ## 64 ## reg, opcode ## 64 ## mem, opcode ## 64 ## array
-  UnaryOpcode(kX86Test, RI, MI, AI),
-  kX86Test32RR,
-  kX86Test64RR,
-  kX86Test32RM,
-  UnaryOpcode(kX86Not, R, M, A),
-  UnaryOpcode(kX86Neg, R, M, A),
-  UnaryOpcode(kX86Mul,  DaR, DaM, DaA),
-  UnaryOpcode(kX86Imul, DaR, DaM, DaA),
-  UnaryOpcode(kX86Divmod,  DaR, DaM, DaA),
-  UnaryOpcode(kX86Idivmod, DaR, DaM, DaA),
-  kx86Cdq32Da,
-  kx86Cqo64Da,
-  kX86Bswap32R,
-  kX86Bswap64R,
-  kX86Push32R, kX86Pop32R,
-#undef UnaryOpcode
-#define Binary0fOpCode(opcode) \
-  opcode ## RR, opcode ## RM, opcode ## RA
-  Binary0fOpCode(kX86Movsd),
-  kX86MovsdMR,
-  kX86MovsdAR,
-  Binary0fOpCode(kX86Movss),
-  kX86MovssMR,
-  kX86MovssAR,
-  Binary0fOpCode(kX86Cvtsi2sd),  // int to double
-  Binary0fOpCode(kX86Cvtsi2ss),  // int to float
-  Binary0fOpCode(kX86Cvtsqi2sd),  // long to double
-  Binary0fOpCode(kX86Cvtsqi2ss),  // long to float
-  Binary0fOpCode(kX86Cvttsd2si),  // truncating double to int
-  Binary0fOpCode(kX86Cvttss2si),  // truncating float to int
-  Binary0fOpCode(kX86Cvttsd2sqi),  // truncating double to long
-  Binary0fOpCode(kX86Cvttss2sqi),  // truncating float to long
-  Binary0fOpCode(kX86Cvtsd2si),  // rounding double to int
-  Binary0fOpCode(kX86Cvtss2si),  // rounding float to int
-  Binary0fOpCode(kX86Ucomisd),  // unordered double compare
-  Binary0fOpCode(kX86Ucomiss),  // unordered float compare
-  Binary0fOpCode(kX86Comisd),   // double compare
-  Binary0fOpCode(kX86Comiss),   // float compare
-  Binary0fOpCode(kX86Orpd),     // double logical OR
-  Binary0fOpCode(kX86Orps),     // float logical OR
-  Binary0fOpCode(kX86Andpd),    // double logical AND
-  Binary0fOpCode(kX86Andps),    // float logical AND
-  Binary0fOpCode(kX86Xorpd),    // double logical XOR
-  Binary0fOpCode(kX86Xorps),    // float logical XOR
-  Binary0fOpCode(kX86Addsd),    // double ADD
-  Binary0fOpCode(kX86Addss),    // float ADD
-  Binary0fOpCode(kX86Mulsd),    // double multiply
-  Binary0fOpCode(kX86Mulss),    // float multiply
-  Binary0fOpCode(kX86Cvtsd2ss),  // double to float
-  Binary0fOpCode(kX86Cvtss2sd),  // float to double
-  Binary0fOpCode(kX86Subsd),    // double subtract
-  Binary0fOpCode(kX86Subss),    // float subtract
-  Binary0fOpCode(kX86Divsd),    // double divide
-  Binary0fOpCode(kX86Divss),    // float divide
-  Binary0fOpCode(kX86Punpcklbw),  // Interleave low-order bytes
-  Binary0fOpCode(kX86Punpcklwd),  // Interleave low-order single words (16-bits)
-  Binary0fOpCode(kX86Punpckldq),  // Interleave low-order double words (32-bit)
-  Binary0fOpCode(kX86Punpcklqdq),  // Interleave low-order quad word
-  Binary0fOpCode(kX86Sqrtsd),   // square root
-  Binary0fOpCode(kX86Pmulld),   // parallel integer multiply 32 bits x 4
-  Binary0fOpCode(kX86Pmullw),   // parallel integer multiply 16 bits x 8
-  Binary0fOpCode(kX86Pmuludq),   // parallel unsigned 32 integer and stores result as 64
-  Binary0fOpCode(kX86Mulps),    // parallel FP multiply 32 bits x 4
-  Binary0fOpCode(kX86Mulpd),    // parallel FP multiply 64 bits x 2
-  Binary0fOpCode(kX86Paddb),    // parallel integer addition 8 bits x 16
-  Binary0fOpCode(kX86Paddw),    // parallel integer addition 16 bits x 8
-  Binary0fOpCode(kX86Paddd),    // parallel integer addition 32 bits x 4
-  Binary0fOpCode(kX86Paddq),    // parallel integer addition 64 bits x 2
-  Binary0fOpCode(kX86Psadbw),   // computes sum of absolute differences for unsigned byte integers
-  Binary0fOpCode(kX86Addps),    // parallel FP addition 32 bits x 4
-  Binary0fOpCode(kX86Addpd),    // parallel FP addition 64 bits x 2
-  Binary0fOpCode(kX86Psubb),    // parallel integer subtraction 8 bits x 16
-  Binary0fOpCode(kX86Psubw),    // parallel integer subtraction 16 bits x 8
-  Binary0fOpCode(kX86Psubd),    // parallel integer subtraction 32 bits x 4
-  Binary0fOpCode(kX86Psubq),    // parallel integer subtraction 32 bits x 4
-  Binary0fOpCode(kX86Subps),    // parallel FP subtraction 32 bits x 4
-  Binary0fOpCode(kX86Subpd),    // parallel FP subtraction 64 bits x 2
-  Binary0fOpCode(kX86Pand),     // parallel AND 128 bits x 1
-  Binary0fOpCode(kX86Por),      // parallel OR 128 bits x 1
-  Binary0fOpCode(kX86Pxor),     // parallel XOR 128 bits x 1
-  Binary0fOpCode(kX86Phaddw),   // parallel horizontal addition 16 bits x 8
-  Binary0fOpCode(kX86Phaddd),   // parallel horizontal addition 32 bits x 4
-  Binary0fOpCode(kX86Haddpd),   // parallel FP horizontal addition 64 bits x 2
-  Binary0fOpCode(kX86Haddps),   // parallel FP horizontal addition 32 bits x 4
-  kX86PextrbRRI,                // Extract 8 bits from XMM into GPR
-  kX86PextrwRRI,                // Extract 16 bits from XMM into GPR
-  kX86PextrdRRI,                // Extract 32 bits from XMM into GPR
-  kX86PextrbMRI,                // Extract 8 bits from XMM into memory
-  kX86PextrwMRI,                // Extract 16 bits from XMM into memory
-  kX86PextrdMRI,                // Extract 32 bits from XMM into memory
-  kX86PshuflwRRI,               // Shuffle 16 bits in lower 64 bits of XMM.
-  kX86PshufdRRI,                // Shuffle 32 bits in XMM.
-  kX86ShufpsRRI,                // FP Shuffle 32 bits in XMM.
-  kX86ShufpdRRI,                // FP Shuffle 64 bits in XMM.
-  kX86PsrawRI,                  // signed right shift of floating point registers 16 bits x 8
-  kX86PsradRI,                  // signed right shift of floating point registers 32 bits x 4
-  kX86PsrlwRI,                  // logical right shift of floating point registers 16 bits x 8
-  kX86PsrldRI,                  // logical right shift of floating point registers 32 bits x 4
-  kX86PsrlqRI,                  // logical right shift of floating point registers 64 bits x 2
-  kX86PsrldqRI,                 // logical shift of 128-bit vector register, immediate in bytes
-  kX86PsllwRI,                  // left shift of floating point registers 16 bits x 8
-  kX86PslldRI,                  // left shift of floating point registers 32 bits x 4
-  kX86PsllqRI,                  // left shift of floating point registers 64 bits x 2
-  kX86Fild32M,                  // push 32-bit integer on x87 stack
-  kX86Fild64M,                  // push 64-bit integer on x87 stack
-  kX86Fld32M,                   // push float on x87 stack
-  kX86Fld64M,                   // push double on x87 stack
-  kX86Fstp32M,                  // pop top x87 fp stack and do 32-bit store
-  kX86Fstp64M,                  // pop top x87 fp stack and do 64-bit store
-  kX86Fst32M,                   // do 32-bit store
-  kX86Fst64M,                   // do 64-bit store
-  kX86Fprem,                    // remainder from dividing of two floating point values
-  kX86Fucompp,                  // compare floating point values and pop x87 fp stack twice
-  kX86Fstsw16R,                 // store FPU status word
-  Binary0fOpCode(kX86Movdqa),   // move 128 bits aligned
-  kX86MovdqaMR, kX86MovdqaAR,   // store 128 bit aligned from xmm1 to m128
-  Binary0fOpCode(kX86Movups),   // load unaligned packed single FP values from xmm2/m128 to xmm1
-  kX86MovupsMR, kX86MovupsAR,   // store unaligned packed single FP values from xmm1 to m128
-  Binary0fOpCode(kX86Movaps),   // load aligned packed single FP values from xmm2/m128 to xmm1
-  kX86MovapsMR, kX86MovapsAR,   // store aligned packed single FP values from xmm1 to m128
-  kX86MovlpsRM, kX86MovlpsRA,   // load packed single FP values from m64 to low quadword of xmm
-  kX86MovlpsMR, kX86MovlpsAR,   // store packed single FP values from low quadword of xmm to m64
-  kX86MovhpsRM, kX86MovhpsRA,   // load packed single FP values from m64 to high quadword of xmm
-  kX86MovhpsMR, kX86MovhpsAR,   // store packed single FP values from high quadword of xmm to m64
-  Binary0fOpCode(kX86Movdxr),   // move into xmm from gpr
-  Binary0fOpCode(kX86Movqxr),   // move into xmm from 64 bit gpr
-  kX86MovqrxRR, kX86MovqrxMR, kX86MovqrxAR,  // move into 64 bit reg from xmm
-  kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR,  // move into reg from xmm
-  kX86MovsxdRR, kX86MovsxdRM, kX86MovsxdRA,  // move 32 bit to 64 bit with sign extension
-  kX86Set8R, kX86Set8M, kX86Set8A,  // set byte depending on condition operand
-  kX86Lfence,                   // memory barrier to serialize all previous
-                                // load-from-memory instructions
-  kX86Mfence,                   // memory barrier to serialize all previous
-                                // load-from-memory and store-to-memory instructions
-  kX86Sfence,                   // memory barrier to serialize all previous
-                                // store-to-memory instructions
-  Binary0fOpCode(kX86Imul16),   // 16bit multiply
-  Binary0fOpCode(kX86Imul32),   // 32bit multiply
-  Binary0fOpCode(kX86Imul64),   // 64bit multiply
-  kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR,  // compare and exchange
-  kX86LockCmpxchgMR, kX86LockCmpxchgAR, kX86LockCmpxchg64AR,  // locked compare and exchange
-  kX86LockCmpxchg64M, kX86LockCmpxchg64A,  // locked compare and exchange
-  kX86XchgMR,  // exchange memory with register (automatically locked)
-  Binary0fOpCode(kX86Movzx8),   // zero-extend 8-bit value
-  Binary0fOpCode(kX86Movzx16),  // zero-extend 16-bit value
-  Binary0fOpCode(kX86Movsx8),   // sign-extend 8-bit value
-  Binary0fOpCode(kX86Movsx16),  // sign-extend 16-bit value
-  Binary0fOpCode(kX86Movzx8q),   // zero-extend 8-bit value to quad word
-  Binary0fOpCode(kX86Movzx16q),  // zero-extend 16-bit value to quad word
-  Binary0fOpCode(kX86Movsx8q),   // sign-extend 8-bit value to quad word
-  Binary0fOpCode(kX86Movsx16q),  // sign-extend 16-bit value to quad word
-#undef Binary0fOpCode
-  kX86Jcc8, kX86Jcc32,  // jCC rel8/32; lir operands - 0: rel, 1: CC, target assigned
-  kX86Jmp8, kX86Jmp32,  // jmp rel8/32; lir operands - 0: rel, target assigned
-  kX86JmpR,             // jmp reg; lir operands - 0: reg
-  kX86Jecxz8,           // jcexz rel8; jump relative if ECX is zero.
-  kX86JmpT,             // jmp fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp
-
-  kX86CallR,            // call reg; lir operands - 0: reg
-  kX86CallM,            // call [base + disp]; lir operands - 0: base, 1: disp
-  kX86CallA,            // call [base + index * scale + disp]
-                        // lir operands - 0: base, 1: index, 2: scale, 3: disp
-  kX86CallT,            // call fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp
-  kX86CallI,            // call <relative> - 0: disp; Used for core.oat linking only
-  kX86Ret,              // ret; no lir operands
-  kX86PcRelLoadRA,      // mov reg, [base + index * scale + PC relative displacement]
-                        // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table
-  kX86PcRelAdr,         // mov reg, PC relative displacement; lir operands - 0: reg, 1: table
-  kX86RepneScasw,       // repne scasw
-  kX86Last
-};
-std::ostream& operator<<(std::ostream& os, const X86OpCode& rhs);
-
-/* Instruction assembly field_loc kind */
-enum X86EncodingKind {
-  kData,                                    // Special case for raw data.
-  kNop,                                     // Special case for variable length nop.
-  kNullary,                                 // Opcode that takes no arguments.
-  kRegOpcode,                               // Shorter form of R instruction kind (opcode+rd)
-  kReg, kMem, kArray,                       // R, M and A instruction kinds.
-  kMemReg, kArrayReg, kThreadReg,           // MR, AR and TR instruction kinds.
-  kRegReg, kRegMem, kRegArray, kRegThread,  // RR, RM, RA and RT instruction kinds.
-  kRegRegStore,                             // RR following the store modrm reg-reg encoding rather than the load.
-  kRegImm, kMemImm, kArrayImm, kThreadImm,  // RI, MI, AI and TI instruction kinds.
-  kRegRegImm, kRegMemImm, kRegArrayImm,     // RRI, RMI and RAI instruction kinds.
-  kMovRegImm,                               // Shorter form move RI.
-  kMovRegQuadImm,                           // 64 bit move RI
-  kRegRegImmStore,                          // RRI following the store modrm reg-reg encoding rather than the load.
-  kMemRegImm,                               // MRI instruction kinds.
-  kShiftRegImm, kShiftMemImm, kShiftArrayImm,  // Shift opcode with immediate.
-  kShiftRegCl, kShiftMemCl, kShiftArrayCl,     // Shift opcode with register CL.
-  kShiftRegRegCl,
-  // kRegRegReg, kRegRegMem, kRegRegArray,    // RRR, RRM, RRA instruction kinds.
-  kRegCond, kMemCond, kArrayCond,          // R, M, A instruction kinds following by a condition.
-  kRegRegCond,                             // RR instruction kind followed by a condition.
-  kRegMemCond,                             // RM instruction kind followed by a condition.
-  kJmp, kJcc, kCall,                       // Branch instruction kinds.
-  kPcRel,                                  // Operation with displacement that is PC relative
-  kUnimplemented                           // Encoding used when an instruction isn't yet implemented.
-};
-
-/* Struct used to define the EncodingMap positions for each X86 opcode */
-struct X86EncodingMap {
-  X86OpCode opcode;      // e.g. kOpAddRI
-  // The broad category the instruction conforms to, such as kRegReg. Identifies which LIR operands
-  // hold meaning for the opcode.
-  X86EncodingKind kind;
-  uint64_t flags;
-  struct {
-  uint8_t prefix1;       // Non-zero => a prefix byte.
-  uint8_t prefix2;       // Non-zero => a second prefix byte.
-  uint8_t opcode;        // 1 byte opcode.
-  uint8_t extra_opcode1;  // Possible extra opcode byte.
-  uint8_t extra_opcode2;  // Possible second extra opcode byte.
-  // 3-bit opcode that gets encoded in the register bits of the modrm byte, use determined by the
-  // encoding kind.
-  uint8_t modrm_opcode;
-  uint8_t ax_opcode;  // Non-zero => shorter encoding for AX as a destination.
-  uint8_t immediate_bytes;  // Number of bytes of immediate.
-  // Does the instruction address a byte register? In 32-bit mode the registers ah, bh, ch and dh
-  // are not used. In 64-bit mode the REX prefix is used to normalize and allow any byte register
-  // to be addressed.
-  bool r8_form;
-  } skeleton;
-  const char *name;
-  const char* fmt;
-};
-
-
-// FIXME: mem barrier type - what do we do for x86?
-#define kSY 0
-#define kST 0
-
-// Offsets of high and low halves of a 64bit value.
-#define LOWORD_OFFSET 0
-#define HIWORD_OFFSET 4
-
-// Segment override instruction prefix used for quick TLS access to Thread::Current().
-#define THREAD_PREFIX 0x64
-#define THREAD_PREFIX_GS 0x65
-
-// 64 Bit Operand Size
-#define REX_W 0x48
-// Extension of the ModR/M reg field
-#define REX_R 0x44
-// Extension of the SIB index field
-#define REX_X 0x42
-// Extension of the ModR/M r/m field, SIB base field, or Opcode reg field
-#define REX_B 0x41
-// An empty REX prefix used to normalize the byte operations so that they apply to R4 through R15
-#define REX 0x40
-// Mask extracting the least 3 bits of r0..r15
-#define kRegNumMask32 0x07
-// Value indicating that base or reg is not used
-#define NO_REG 0
-
-#define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127))
-#define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767))
-#define IS_SIMM32(v) ((INT64_C(-2147483648) <= (v)) && ((v) <= INT64_C(2147483647)))
-
-extern X86EncodingMap EncodingMap[kX86Last];
-extern X86ConditionCode X86ConditionEncoding(ConditionCode cond);
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_
diff --git a/compiler/dex/quick_compiler_callbacks.cc b/compiler/dex/quick_compiler_callbacks.cc
index 03bda78..2532bda 100644
--- a/compiler/dex/quick_compiler_callbacks.cc
+++ b/compiler/dex/quick_compiler_callbacks.cc
@@ -22,14 +22,10 @@
 
 namespace art {
 
-bool QuickCompilerCallbacks::MethodVerified(verifier::MethodVerifier* verifier) {
-  bool result = verification_results_->ProcessVerifiedMethod(verifier);
-  if (result) {
-    MethodReference ref = verifier->GetMethodReference();
-    method_inliner_map_->GetMethodInliner(ref.dex_file)
-        ->AnalyseMethodCode(verifier);
-  }
-  return result;
+void QuickCompilerCallbacks::MethodVerified(verifier::MethodVerifier* verifier) {
+  verification_results_->ProcessVerifiedMethod(verifier);
+  MethodReference ref = verifier->GetMethodReference();
+  method_inliner_map_->GetMethodInliner(ref.dex_file)->AnalyseMethodCode(verifier);
 }
 
 void QuickCompilerCallbacks::ClassRejected(ClassReference ref) {
diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h
index 03bf57b..4f5ea76 100644
--- a/compiler/dex/quick_compiler_callbacks.h
+++ b/compiler/dex/quick_compiler_callbacks.h
@@ -37,7 +37,7 @@
 
     ~QuickCompilerCallbacks() { }
 
-    bool MethodVerified(verifier::MethodVerifier* verifier)
+    void MethodVerified(verifier::MethodVerifier* verifier)
         SHARED_REQUIRES(Locks::mutator_lock_) OVERRIDE;
 
     void ClassRejected(ClassReference ref) OVERRIDE;
diff --git a/compiler/dex/reg_location.h b/compiler/dex/reg_location.h
deleted file mode 100644
index aa8ed46..0000000
--- a/compiler/dex/reg_location.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_REG_LOCATION_H_
-#define ART_COMPILER_DEX_REG_LOCATION_H_
-
-#include "reg_storage.h"
-
-namespace art {
-
-static constexpr int16_t INVALID_SREG = -1;
-
-/*
- * Whereas a SSA name describes a definition of a Dalvik vreg, the RegLocation describes
- * the type of an SSA name (and, can also be used by code generators to record where the
- * value is located (i.e. - physical register, frame, spill, etc.).  For each SSA name (SReg)
- * there is a RegLocation.
- * A note on SSA names:
- *   o SSA names for Dalvik vRegs v0..vN will be assigned 0..N.  These represent the "vN_0"
- *     names.  Negative SSA names represent special values not present in the Dalvik byte code.
- *     For example, SSA name -1 represents an invalid SSA name, and SSA name -2 represents the
- *     the Method pointer.  SSA names < -2 are reserved for future use.
- *   o The vN_0 names for non-argument Dalvik should in practice never be used (as they would
- *     represent the read of an undefined local variable).  The first definition of the
- *     underlying Dalvik vReg will result in a vN_1 name.
- *
- * FIXME: The orig_sreg field was added as a workaround for llvm bitcode generation.  With
- * the latest restructuring, we should be able to remove it and rely on s_reg_low throughout.
- */
-struct RegLocation {
-  RegLocationType location:3;
-  unsigned wide:1;
-  unsigned defined:1;   // Do we know the type?
-  unsigned is_const:1;  // Constant, value in mir_graph->constant_values[].
-  unsigned fp:1;        // Floating point?
-  unsigned core:1;      // Non-floating point?
-  unsigned ref:1;       // Something GC cares about.
-  unsigned high_word:1;  // High word of pair?
-  unsigned home:1;      // Does this represent the home location?
-  RegStorage reg;       // Encoded physical registers.
-  int16_t s_reg_low;    // SSA name for low Dalvik word.
-  int16_t orig_sreg;    // TODO: remove after Bitcode gen complete
-                        // and consolidate usage w/ s_reg_low.
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_REG_LOCATION_H_
diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h
deleted file mode 100644
index 46ed011..0000000
--- a/compiler/dex/reg_storage.h
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_REG_STORAGE_H_
-#define ART_COMPILER_DEX_REG_STORAGE_H_
-
-#include "base/logging.h"
-#include "base/value_object.h"
-#include "compiler_enums.h"  // For WideKind
-
-namespace art {
-
-/*
- * 16-bit representation of the physical register container holding a Dalvik value.
- * The encoding allows up to 64 physical elements per storage class, and supports eight
- * register container shapes.
- *
- * [V] [HHHHH] [SSS] [F] [LLLLLL]
- *
- * [LLLLLL]
- *  Physical register number for the low or solo register.
- *    0..63
- *
- * [F]
- *  Describes type of the [LLLLL] register.
- *    0: Core
- *    1: Floating point
- *
- * [SSS]
- *  Shape of the register container.
- *    000: Invalid
- *    001: 32-bit solo register
- *    010: 64-bit solo register
- *    011: 64-bit pair consisting of two 32-bit solo registers
- *    100: 128-bit solo register
- *    101: 256-bit solo register
- *    110: 512-bit solo register
- *    111: 1024-bit solo register
- *
- * [HHHHH]
- *  Physical register number of the high register (valid only for register pair).
- *    0..31
- *
- * [V]
- *    0 -> Invalid
- *    1 -> Valid
- *
- * Note that in all non-invalid cases, we can determine if the storage is floating point
- * by testing bit 7.  Note also that a register pair is effectively limited to a pair of
- * physical register numbers in the 0..31 range.
- *
- * On some target architectures, the same underlying physical register container can be given
- * different views.  For example, Arm's 32-bit single-precision floating point registers
- * s2 and s3 map to the low and high halves of double-precision d1.  Similarly, X86's xmm3
- * vector register can be viewed as 32-bit, 64-bit, 128-bit, etc.  In these cases the use of
- * one view will affect the other views.  The RegStorage class does not concern itself
- * with potential aliasing.  That will be done using the associated RegisterInfo struct.
- * Distinct RegStorage elements should be created for each view of a physical register
- * container.  The management of the aliased physical elements will be handled via RegisterInfo
- * records.
- */
-
-class RegStorage : public ValueObject {
- public:
-  enum RegStorageKind {
-    kValidMask     = 0x8000,
-    kValid         = 0x8000,
-    kInvalid       = 0x0000,
-    kShapeMask     = 0x0380,
-    k32BitSolo     = 0x0080,
-    k64BitSolo     = 0x0100,
-    k64BitPair     = 0x0180,
-    k128BitSolo    = 0x0200,
-    k256BitSolo    = 0x0280,
-    k512BitSolo    = 0x0300,
-    k1024BitSolo   = 0x0380,
-    k64BitMask     = 0x0300,
-    k64Bits        = 0x0100,
-    kShapeTypeMask = 0x03c0,
-    kFloatingPoint = 0x0040,
-    kCoreRegister  = 0x0000,
-  };
-
-  static const uint16_t kRegValMask  = 0x03ff;     // Num, type and shape.
-  static const uint16_t kRegTypeMask = 0x007f;     // Num and type.
-  static const uint16_t kRegNumMask  = 0x003f;     // Num only.
-  static const uint16_t kHighRegNumMask = 0x001f;  // 0..31 for high reg
-  static const uint16_t kMaxRegs     = kRegValMask + 1;
-  // TODO: deprecate use of kInvalidRegVal and speed up GetReg().  Rely on valid bit instead.
-  static const uint16_t kInvalidRegVal = 0x03ff;
-  static const uint16_t kHighRegShift = 10;
-  static const uint16_t kHighRegMask = (kHighRegNumMask << kHighRegShift);
-
-  // Reg is [F][LLLLL], will override any existing shape and use rs_kind.
-  constexpr RegStorage(RegStorageKind rs_kind, int reg)
-      : reg_(
-          DCHECK_CONSTEXPR(rs_kind != k64BitPair, , 0u)
-          DCHECK_CONSTEXPR((rs_kind & ~kShapeMask) == 0, , 0u)
-          kValid | rs_kind | (reg & kRegTypeMask)) {
-  }
-  constexpr RegStorage(RegStorageKind rs_kind, int low_reg, int high_reg)
-      : reg_(
-          DCHECK_CONSTEXPR(rs_kind == k64BitPair, << static_cast<int>(rs_kind), 0u)
-          DCHECK_CONSTEXPR((low_reg & kFloatingPoint) == (high_reg & kFloatingPoint),
-                           << low_reg << ", " << high_reg, 0u)
-          DCHECK_CONSTEXPR((high_reg & kRegNumMask) <= kHighRegNumMask,
-                           << "High reg must be in 0..31: " << high_reg, false)
-          kValid | rs_kind | ((high_reg & kHighRegNumMask) << kHighRegShift) |
-                  (low_reg & kRegTypeMask)) {
-  }
-  constexpr explicit RegStorage(uint16_t val) : reg_(val) {}
-  RegStorage() : reg_(kInvalid) {}
-
-  // We do not provide a general operator overload for equality of reg storage, as this is
-  // dangerous in the case of architectures with multiple views, and the naming ExactEquals
-  // expresses the exact match expressed here. It is more likely that a comparison between the views
-  // is intended in most cases. Such code can be found in, for example, Mir2Lir::IsSameReg.
-  //
-  // If you know what you are doing, include reg_storage_eq.h, which defines == and != for brevity.
-
-  bool ExactlyEquals(const RegStorage& rhs) const {
-    return (reg_ == rhs.GetRawBits());
-  }
-
-  bool NotExactlyEquals(const RegStorage& rhs) const {
-    return (reg_ != rhs.GetRawBits());
-  }
-
-  constexpr bool Valid() const {
-    return ((reg_ & kValidMask) == kValid);
-  }
-
-  constexpr bool Is32Bit() const {
-    return ((reg_ & kShapeMask) == k32BitSolo);
-  }
-
-  constexpr bool Is64Bit() const {
-    return ((reg_ & k64BitMask) == k64Bits);
-  }
-
-  constexpr WideKind GetWideKind() const {
-    return Is64Bit() ? kWide : kNotWide;
-  }
-
-  constexpr bool Is64BitSolo() const {
-    return ((reg_ & kShapeMask) == k64BitSolo);
-  }
-
-  constexpr bool IsPair() const {
-    return ((reg_ & kShapeMask) == k64BitPair);
-  }
-
-  constexpr bool IsFloat() const {
-    return
-        DCHECK_CONSTEXPR(Valid(), , false)
-        ((reg_ & kFloatingPoint) == kFloatingPoint);
-  }
-
-  constexpr bool IsDouble() const {
-    return
-        DCHECK_CONSTEXPR(Valid(), , false)
-        (reg_ & (kFloatingPoint | k64BitMask)) == (kFloatingPoint | k64Bits);
-  }
-
-  constexpr bool IsSingle() const {
-    return
-        DCHECK_CONSTEXPR(Valid(), , false)
-        (reg_ & (kFloatingPoint | k64BitMask)) == kFloatingPoint;
-  }
-
-  static constexpr bool IsFloat(uint16_t reg) {
-    return ((reg & kFloatingPoint) == kFloatingPoint);
-  }
-
-  static constexpr bool IsDouble(uint16_t reg) {
-    return (reg & (kFloatingPoint | k64BitMask)) == (kFloatingPoint | k64Bits);
-  }
-
-  static constexpr bool IsSingle(uint16_t reg) {
-    return (reg & (kFloatingPoint | k64BitMask)) == kFloatingPoint;
-  }
-
-  static constexpr bool Is32Bit(uint16_t reg) {
-    return ((reg & kShapeMask) == k32BitSolo);
-  }
-
-  static constexpr bool Is64Bit(uint16_t reg) {
-    return ((reg & k64BitMask) == k64Bits);
-  }
-
-  static constexpr bool Is64BitSolo(uint16_t reg) {
-    return ((reg & kShapeMask) == k64BitSolo);
-  }
-
-  // Used to retrieve either the low register of a pair, or the only register.
-  int GetReg() const {
-    DCHECK(!IsPair()) << "reg_ = 0x" << std::hex << reg_;
-    return Valid() ? (reg_ & kRegValMask) : kInvalidRegVal;
-  }
-
-  // Sets shape, type and num of solo.
-  void SetReg(int reg) {
-    DCHECK(Valid());
-    DCHECK(!IsPair());
-    reg_ = (reg_ & ~kRegValMask) | reg;
-  }
-
-  // Set the reg number and type only, target remain 64-bit pair.
-  void SetLowReg(int reg) {
-    DCHECK(IsPair());
-    reg_ = (reg_ & ~kRegTypeMask) | (reg & kRegTypeMask);
-  }
-
-  // Retrieve the least significant register of a pair and return as 32-bit solo.
-  int GetLowReg() const {
-    DCHECK(IsPair());
-    return ((reg_ & kRegTypeMask) | k32BitSolo);
-  }
-
-  // Create a stand-alone RegStorage from the low reg of a pair.
-  RegStorage GetLow() const {
-    DCHECK(IsPair());
-    return RegStorage(k32BitSolo, reg_ & kRegTypeMask);
-  }
-
-  // Retrieve the most significant register of a pair.
-  int GetHighReg() const {
-    DCHECK(IsPair());
-    return k32BitSolo | ((reg_ & kHighRegMask) >> kHighRegShift) | (reg_ & kFloatingPoint);
-  }
-
-  // Create a stand-alone RegStorage from the high reg of a pair.
-  RegStorage GetHigh() const {
-    DCHECK(IsPair());
-    return RegStorage(kValid | GetHighReg());
-  }
-
-  void SetHighReg(int reg) {
-    DCHECK(IsPair());
-    reg_ = (reg_ & ~kHighRegMask) | ((reg & kHighRegNumMask) << kHighRegShift);
-  }
-
-  // Return the register number of low or solo.
-  constexpr int GetRegNum() const {
-    return reg_ & kRegNumMask;
-  }
-
-  // Is register number in 0..7?
-  constexpr bool Low8() const {
-    return GetRegNum() < 8;
-  }
-
-  // Is register number in 0..3?
-  constexpr bool Low4() const {
-    return GetRegNum() < 4;
-  }
-
-  // Combine 2 32-bit solo regs into a pair.
-  static RegStorage MakeRegPair(RegStorage low, RegStorage high) {
-    DCHECK(!low.IsPair());
-    DCHECK(low.Is32Bit());
-    DCHECK(!high.IsPair());
-    DCHECK(high.Is32Bit());
-    return RegStorage(k64BitPair, low.GetReg(), high.GetReg());
-  }
-
-  static constexpr bool SameRegType(RegStorage reg1, RegStorage reg2) {
-    return ((reg1.reg_ & kShapeTypeMask) == (reg2.reg_ & kShapeTypeMask));
-  }
-
-  static constexpr bool SameRegType(int reg1, int reg2) {
-    return ((reg1 & kShapeTypeMask) == (reg2 & kShapeTypeMask));
-  }
-
-  // Create a 32-bit solo.
-  static RegStorage Solo32(int reg_num) {
-    return RegStorage(k32BitSolo, reg_num & kRegTypeMask);
-  }
-
-  // Create a floating point 32-bit solo.
-  static constexpr RegStorage FloatSolo32(int reg_num) {
-    return RegStorage(k32BitSolo, (reg_num & kRegNumMask) | kFloatingPoint);
-  }
-
-  // Create a 128-bit solo.
-  static constexpr RegStorage Solo128(int reg_num) {
-    return RegStorage(k128BitSolo, reg_num & kRegTypeMask);
-  }
-
-  // Create a 64-bit solo.
-  static constexpr RegStorage Solo64(int reg_num) {
-    return RegStorage(k64BitSolo, reg_num & kRegTypeMask);
-  }
-
-  // Create a floating point 64-bit solo.
-  static RegStorage FloatSolo64(int reg_num) {
-    return RegStorage(k64BitSolo, (reg_num & kRegNumMask) | kFloatingPoint);
-  }
-
-  static constexpr RegStorage InvalidReg() {
-    return RegStorage(kInvalid);
-  }
-
-  static constexpr uint16_t RegNum(int raw_reg_bits) {
-    return raw_reg_bits & kRegNumMask;
-  }
-
-  constexpr int GetRawBits() const {
-    return reg_;
-  }
-
-  size_t StorageSize() const {
-    switch (reg_ & kShapeMask) {
-      case kInvalid: return 0;
-      case k32BitSolo: return 4;
-      case k64BitSolo: return 8;
-      case k64BitPair: return 8;  // Is this useful?  Might want to disallow taking size of pair.
-      case k128BitSolo: return 16;
-      case k256BitSolo: return 32;
-      case k512BitSolo: return 64;
-      case k1024BitSolo: return 128;
-      default: LOG(FATAL) << "Unexpected shape"; UNREACHABLE();
-    }
-  }
-
- private:
-  uint16_t reg_;
-};
-static inline std::ostream& operator<<(std::ostream& o, const RegStorage& rhs) {
-  return o << rhs.GetRawBits();  // TODO: better output.
-}
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_REG_STORAGE_H_
diff --git a/compiler/dex/reg_storage_eq.h b/compiler/dex/reg_storage_eq.h
deleted file mode 100644
index b688dac..0000000
--- a/compiler/dex/reg_storage_eq.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_REG_STORAGE_EQ_H_
-#define ART_COMPILER_DEX_REG_STORAGE_EQ_H_
-
-#include "reg_storage.h"
-
-namespace art {
-
-// Define == and != operators for RegStorage. These are based on exact equality of the reg storage,
-// that is, 32b and 64b views of the same physical register won't match. This is often not the
-// intended behavior, so be careful when including this header.
-
-inline bool operator==(const RegStorage& lhs, const RegStorage& rhs) {
-  return lhs.ExactlyEquals(rhs);
-}
-
-inline bool operator!=(const RegStorage& lhs, const RegStorage& rhs) {
-  return lhs.NotExactlyEquals(rhs);
-}
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_REG_STORAGE_EQ_H_
-
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
deleted file mode 100644
index 6ed666b..0000000
--- a/compiler/dex/ssa_transformation.cc
+++ /dev/null
@@ -1,610 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "base/bit_vector-inl.h"
-#include "base/logging.h"
-#include "base/scoped_arena_containers.h"
-#include "compiler_ir.h"
-#include "dataflow_iterator-inl.h"
-
-#define NOTVISITED (-1)
-
-namespace art {
-
-void MIRGraph::ClearAllVisitedFlags() {
-  AllNodesIterator iter(this);
-  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-    bb->visited = false;
-  }
-}
-
-BasicBlock* MIRGraph::NeedsVisit(BasicBlock* bb) {
-  if (bb != nullptr) {
-    if (bb->visited || bb->hidden) {
-      bb = nullptr;
-    }
-  }
-  return bb;
-}
-
-BasicBlock* MIRGraph::NextUnvisitedSuccessor(BasicBlock* bb) {
-  BasicBlock* res = NeedsVisit(GetBasicBlock(bb->fall_through));
-  if (res == nullptr) {
-    res = NeedsVisit(GetBasicBlock(bb->taken));
-    if (res == nullptr) {
-      if (bb->successor_block_list_type != kNotUsed) {
-        for (SuccessorBlockInfo* sbi : bb->successor_blocks) {
-          res = NeedsVisit(GetBasicBlock(sbi->block));
-          if (res != nullptr) {
-            break;
-          }
-        }
-      }
-    }
-  }
-  return res;
-}
-
-void MIRGraph::MarkPreOrder(BasicBlock* block) {
-  block->visited = true;
-  /* Enqueue the pre_order block id */
-  if (block->id != NullBasicBlockId) {
-    dfs_order_.push_back(block->id);
-  }
-}
-
-void MIRGraph::RecordDFSOrders(BasicBlock* block) {
-  ScopedArenaAllocator allocator(&cu_->arena_stack);
-  ScopedArenaVector<BasicBlock*> succ(allocator.Adapter());
-  succ.reserve(GetNumBlocks());
-  MarkPreOrder(block);
-  succ.push_back(block);
-  while (!succ.empty()) {
-    BasicBlock* curr = succ.back();
-    BasicBlock* next_successor = NextUnvisitedSuccessor(curr);
-    if (next_successor != nullptr) {
-      MarkPreOrder(next_successor);
-      succ.push_back(next_successor);
-      continue;
-    }
-    curr->dfs_id = dfs_post_order_.size();
-    if (curr->id != NullBasicBlockId) {
-      dfs_post_order_.push_back(curr->id);
-    }
-    succ.pop_back();
-  }
-}
-
-/* Sort the blocks by the Depth-First-Search */
-void MIRGraph::ComputeDFSOrders() {
-  /* Clear the DFS pre-order and post-order lists. */
-  dfs_order_.clear();
-  dfs_order_.reserve(GetNumBlocks());
-  dfs_post_order_.clear();
-  dfs_post_order_.reserve(GetNumBlocks());
-
-  // Reset visited flags from all nodes
-  ClearAllVisitedFlags();
-
-  // Record dfs orders
-  RecordDFSOrders(GetEntryBlock());
-
-  num_reachable_blocks_ = dfs_order_.size();
-
-  if (num_reachable_blocks_ != GetNumBlocks()) {
-    // Kill all unreachable blocks.
-    AllNodesIterator iter(this);
-    for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-      if (!bb->visited) {
-        bb->Kill(this);
-      }
-    }
-  }
-  dfs_orders_up_to_date_ = true;
-}
-
-/*
- * Mark block bit on the per-Dalvik register vector to denote that Dalvik
- * register idx is defined in BasicBlock bb.
- */
-bool MIRGraph::FillDefBlockMatrix(BasicBlock* bb) {
-  if (bb->data_flow_info == nullptr) {
-    return false;
-  }
-
-  for (uint32_t idx : bb->data_flow_info->def_v->Indexes()) {
-    /* Block bb defines register idx */
-    temp_.ssa.def_block_matrix[idx]->SetBit(bb->id);
-  }
-  return true;
-}
-
-void MIRGraph::ComputeDefBlockMatrix() {
-  int num_registers = GetNumOfCodeAndTempVRs();
-  /* Allocate num_registers bit vector pointers */
-  DCHECK(temp_scoped_alloc_ != nullptr);
-  DCHECK(temp_.ssa.def_block_matrix == nullptr);
-  temp_.ssa.def_block_matrix =
-      temp_scoped_alloc_->AllocArray<ArenaBitVector*>(num_registers, kArenaAllocDFInfo);
-  int i;
-
-  /* Initialize num_register vectors with num_blocks bits each */
-  for (i = 0; i < num_registers; i++) {
-    temp_.ssa.def_block_matrix[i] = new (temp_scoped_alloc_.get()) ArenaBitVector(
-        arena_, GetNumBlocks(), false, kBitMapBMatrix);
-    temp_.ssa.def_block_matrix[i]->ClearAllBits();
-  }
-
-  AllNodesIterator iter(this);
-  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-    FindLocalLiveIn(bb);
-  }
-  AllNodesIterator iter2(this);
-  for (BasicBlock* bb = iter2.Next(); bb != nullptr; bb = iter2.Next()) {
-    FillDefBlockMatrix(bb);
-  }
-
-  /*
-   * Also set the incoming parameters as defs in the entry block.
-   * Only need to handle the parameters for the outer method.
-   */
-  int num_regs = GetNumOfCodeVRs();
-  int in_reg = GetFirstInVR();
-  for (; in_reg < num_regs; in_reg++) {
-    temp_.ssa.def_block_matrix[in_reg]->SetBit(GetEntryBlock()->id);
-  }
-}
-
-void MIRGraph::ComputeDomPostOrderTraversal(BasicBlock* bb) {
-  // Clear the dominator post-order list.
-  dom_post_order_traversal_.clear();
-  dom_post_order_traversal_.reserve(num_reachable_blocks_);
-
-  ClearAllVisitedFlags();
-  ScopedArenaAllocator allocator(&cu_->arena_stack);
-  ScopedArenaVector<std::pair<BasicBlock*, ArenaBitVector::IndexIterator>> work_stack(
-      allocator.Adapter());
-  bb->visited = true;
-  work_stack.push_back(std::make_pair(bb, bb->i_dominated->Indexes().begin()));
-  while (!work_stack.empty()) {
-    std::pair<BasicBlock*, ArenaBitVector::IndexIterator>* curr = &work_stack.back();
-    BasicBlock* curr_bb = curr->first;
-    ArenaBitVector::IndexIterator* curr_idom_iter = &curr->second;
-    while (!curr_idom_iter->Done() && (NeedsVisit(GetBasicBlock(**curr_idom_iter)) == nullptr)) {
-      ++*curr_idom_iter;
-    }
-    // NOTE: work_stack.push_back()/pop_back() invalidate curr and curr_idom_iter.
-    if (!curr_idom_iter->Done()) {
-      BasicBlock* new_bb = GetBasicBlock(**curr_idom_iter);
-      ++*curr_idom_iter;
-      new_bb->visited = true;
-      work_stack.push_back(std::make_pair(new_bb, new_bb->i_dominated->Indexes().begin()));
-    } else {
-      // no successor/next
-      if (curr_bb->id != NullBasicBlockId) {
-        dom_post_order_traversal_.push_back(curr_bb->id);
-      }
-      work_stack.pop_back();
-    }
-  }
-}
-
-void MIRGraph::CheckForDominanceFrontier(BasicBlock* dom_bb,
-                                         const BasicBlock* succ_bb) {
-  /*
-   * TODO - evaluate whether phi will ever need to be inserted into exit
-   * blocks.
-   */
-  if (succ_bb->i_dom != dom_bb->id &&
-    succ_bb->block_type == kDalvikByteCode &&
-    succ_bb->hidden == false) {
-    dom_bb->dom_frontier->SetBit(succ_bb->id);
-  }
-}
-
-/* Worker function to compute the dominance frontier */
-bool MIRGraph::ComputeDominanceFrontier(BasicBlock* bb) {
-  /* Calculate DF_local */
-  if (bb->taken != NullBasicBlockId) {
-    CheckForDominanceFrontier(bb, GetBasicBlock(bb->taken));
-  }
-  if (bb->fall_through != NullBasicBlockId) {
-    CheckForDominanceFrontier(bb, GetBasicBlock(bb->fall_through));
-  }
-  if (bb->successor_block_list_type != kNotUsed) {
-    for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
-      BasicBlock* succ_bb = GetBasicBlock(successor_block_info->block);
-      CheckForDominanceFrontier(bb, succ_bb);
-    }
-  }
-
-  /* Calculate DF_up */
-  for (uint32_t dominated_idx : bb->i_dominated->Indexes()) {
-    BasicBlock* dominated_bb = GetBasicBlock(dominated_idx);
-    for (uint32_t df_up_block_idx : dominated_bb->dom_frontier->Indexes()) {
-      BasicBlock* df_up_block = GetBasicBlock(df_up_block_idx);
-      CheckForDominanceFrontier(bb, df_up_block);
-    }
-  }
-
-  return true;
-}
-
-/* Worker function for initializing domination-related data structures */
-void MIRGraph::InitializeDominationInfo(BasicBlock* bb) {
-  int num_total_blocks = GetBasicBlockListCount();
-
-  if (bb->dominators == nullptr) {
-    bb->dominators = new (arena_) ArenaBitVector(arena_, num_total_blocks,
-                                                 true /* expandable */, kBitMapDominators);
-    bb->i_dominated = new (arena_) ArenaBitVector(arena_, num_total_blocks,
-                                                  true /* expandable */, kBitMapIDominated);
-    bb->dom_frontier = new (arena_) ArenaBitVector(arena_, num_total_blocks,
-                                                   true /* expandable */, kBitMapDomFrontier);
-  } else {
-    bb->dominators->ClearAllBits();
-    bb->i_dominated->ClearAllBits();
-    bb->dom_frontier->ClearAllBits();
-  }
-  /* Set all bits in the dominator vector */
-  bb->dominators->SetInitialBits(num_total_blocks);
-
-  return;
-}
-
-/*
- * Walk through the ordered i_dom list until we reach common parent.
- * Given the ordering of i_dom_list, this common parent represents the
- * last element of the intersection of block1 and block2 dominators.
-  */
-int MIRGraph::FindCommonParent(int block1, int block2) {
-  while (block1 != block2) {
-    while (block1 < block2) {
-      block1 = i_dom_list_[block1];
-      DCHECK_NE(block1, NOTVISITED);
-    }
-    while (block2 < block1) {
-      block2 = i_dom_list_[block2];
-      DCHECK_NE(block2, NOTVISITED);
-    }
-  }
-  return block1;
-}
-
-/* Worker function to compute each block's immediate dominator */
-bool MIRGraph::ComputeblockIDom(BasicBlock* bb) {
-  /* Special-case entry block */
-  if ((bb->id == NullBasicBlockId) || (bb == GetEntryBlock())) {
-    return false;
-  }
-
-  /* Iterate through the predecessors */
-  auto it = bb->predecessors.begin(), end = bb->predecessors.end();
-
-  /* Find the first processed predecessor */
-  int idom = -1;
-  for ( ; ; ++it) {
-    CHECK(it != end);
-    BasicBlock* pred_bb = GetBasicBlock(*it);
-    DCHECK(pred_bb != nullptr);
-    if (i_dom_list_[pred_bb->dfs_id] != NOTVISITED) {
-      idom = pred_bb->dfs_id;
-      break;
-    }
-  }
-
-  /* Scan the rest of the predecessors */
-  for ( ; it != end; ++it) {
-      BasicBlock* pred_bb = GetBasicBlock(*it);
-      DCHECK(pred_bb != nullptr);
-      if (i_dom_list_[pred_bb->dfs_id] == NOTVISITED) {
-        continue;
-      } else {
-        idom = FindCommonParent(pred_bb->dfs_id, idom);
-      }
-  }
-
-  DCHECK_NE(idom, NOTVISITED);
-
-  /* Did something change? */
-  if (i_dom_list_[bb->dfs_id] != idom) {
-    i_dom_list_[bb->dfs_id] = idom;
-    return true;
-  }
-  return false;
-}
-
-/* Worker function to compute each block's domintors */
-bool MIRGraph::ComputeBlockDominators(BasicBlock* bb) {
-  if (bb == GetEntryBlock()) {
-    bb->dominators->ClearAllBits();
-  } else {
-    bb->dominators->Copy(GetBasicBlock(bb->i_dom)->dominators);
-  }
-  bb->dominators->SetBit(bb->id);
-  return false;
-}
-
-bool MIRGraph::SetDominators(BasicBlock* bb) {
-  if (bb != GetEntryBlock()) {
-    int idom_dfs_idx = i_dom_list_[bb->dfs_id];
-    DCHECK_NE(idom_dfs_idx, NOTVISITED);
-    int i_dom_idx = dfs_post_order_[idom_dfs_idx];
-    BasicBlock* i_dom = GetBasicBlock(i_dom_idx);
-    bb->i_dom = i_dom->id;
-    /* Add bb to the i_dominated set of the immediate dominator block */
-    i_dom->i_dominated->SetBit(bb->id);
-  }
-  return false;
-}
-
-/* Compute dominators, immediate dominator, and dominance fronter */
-void MIRGraph::ComputeDominators() {
-  int num_reachable_blocks = num_reachable_blocks_;
-
-  /* Initialize domination-related data structures */
-  PreOrderDfsIterator iter(this);
-  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-    InitializeDominationInfo(bb);
-  }
-
-  /* Initialize & Clear i_dom_list */
-  if (max_num_reachable_blocks_ < num_reachable_blocks_) {
-    i_dom_list_ = arena_->AllocArray<int>(num_reachable_blocks, kArenaAllocDFInfo);
-  }
-  for (int i = 0; i < num_reachable_blocks; i++) {
-    i_dom_list_[i] = NOTVISITED;
-  }
-
-  /* For post-order, last block is entry block.  Set its i_dom to istelf */
-  DCHECK_EQ(GetEntryBlock()->dfs_id, num_reachable_blocks-1);
-  i_dom_list_[GetEntryBlock()->dfs_id] = GetEntryBlock()->dfs_id;
-
-  /* Compute the immediate dominators */
-  RepeatingReversePostOrderDfsIterator iter2(this);
-  bool change = false;
-  for (BasicBlock* bb = iter2.Next(false); bb != nullptr; bb = iter2.Next(change)) {
-    change = ComputeblockIDom(bb);
-  }
-
-  /* Set the dominator for the root node */
-  GetEntryBlock()->dominators->ClearAllBits();
-  GetEntryBlock()->dominators->SetBit(GetEntryBlock()->id);
-
-  GetEntryBlock()->i_dom = 0;
-
-  PreOrderDfsIterator iter3(this);
-  for (BasicBlock* bb = iter3.Next(); bb != nullptr; bb = iter3.Next()) {
-    SetDominators(bb);
-  }
-
-  ReversePostOrderDfsIterator iter4(this);
-  for (BasicBlock* bb = iter4.Next(); bb != nullptr; bb = iter4.Next()) {
-    ComputeBlockDominators(bb);
-  }
-
-  // Compute the dominance frontier for each block.
-  ComputeDomPostOrderTraversal(GetEntryBlock());
-  PostOrderDOMIterator iter5(this);
-  for (BasicBlock* bb = iter5.Next(); bb != nullptr; bb = iter5.Next()) {
-    ComputeDominanceFrontier(bb);
-  }
-
-  domination_up_to_date_ = true;
-}
-
-/*
- * Perform dest U= src1 ^ ~src2
- * This is probably not general enough to be placed in BitVector.[ch].
- */
-void MIRGraph::ComputeSuccLineIn(ArenaBitVector* dest, const ArenaBitVector* src1,
-                                 const ArenaBitVector* src2) {
-  if (dest->GetStorageSize() != src1->GetStorageSize() ||
-      dest->GetStorageSize() != src2->GetStorageSize() ||
-      dest->IsExpandable() != src1->IsExpandable() ||
-      dest->IsExpandable() != src2->IsExpandable()) {
-    LOG(FATAL) << "Incompatible set properties";
-  }
-
-  unsigned int idx;
-  for (idx = 0; idx < dest->GetStorageSize(); idx++) {
-    dest->GetRawStorage()[idx] |= src1->GetRawStorageWord(idx) & ~(src2->GetRawStorageWord(idx));
-  }
-}
-
-/*
- * Iterate through all successor blocks and propagate up the live-in sets.
- * The calculated result is used for phi-node pruning - where we only need to
- * insert a phi node if the variable is live-in to the block.
- */
-bool MIRGraph::ComputeBlockLiveIns(BasicBlock* bb) {
-  DCHECK_EQ(temp_.ssa.num_vregs, cu_->mir_graph.get()->GetNumOfCodeAndTempVRs());
-  ArenaBitVector* temp_live_vregs = temp_.ssa.work_live_vregs;
-
-  if (bb->data_flow_info == nullptr) {
-    return false;
-  }
-  temp_live_vregs->Copy(bb->data_flow_info->live_in_v);
-  BasicBlock* bb_taken = GetBasicBlock(bb->taken);
-  BasicBlock* bb_fall_through = GetBasicBlock(bb->fall_through);
-  if (bb_taken && bb_taken->data_flow_info)
-    ComputeSuccLineIn(temp_live_vregs, bb_taken->data_flow_info->live_in_v,
-                      bb->data_flow_info->def_v);
-  if (bb_fall_through && bb_fall_through->data_flow_info)
-    ComputeSuccLineIn(temp_live_vregs, bb_fall_through->data_flow_info->live_in_v,
-                      bb->data_flow_info->def_v);
-  if (bb->successor_block_list_type != kNotUsed) {
-    for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
-      BasicBlock* succ_bb = GetBasicBlock(successor_block_info->block);
-      if (succ_bb->data_flow_info) {
-        ComputeSuccLineIn(temp_live_vregs, succ_bb->data_flow_info->live_in_v,
-                          bb->data_flow_info->def_v);
-      }
-    }
-  }
-  if (!temp_live_vregs->Equal(bb->data_flow_info->live_in_v)) {
-    bb->data_flow_info->live_in_v->Copy(temp_live_vregs);
-    return true;
-  }
-  return false;
-}
-
-/* For each dalvik reg, find blocks that need phi nodes according to the dominance frontiers. */
-void MIRGraph::FindPhiNodeBlocks() {
-  RepeatingPostOrderDfsIterator iter(this);
-  bool change = false;
-  for (BasicBlock* bb = iter.Next(false); bb != nullptr; bb = iter.Next(change)) {
-    change = ComputeBlockLiveIns(bb);
-  }
-
-  ArenaBitVector* phi_blocks = new (temp_scoped_alloc_.get()) ArenaBitVector(
-      temp_scoped_alloc_.get(), GetNumBlocks(), false, kBitMapBMatrix);
-
-  // Reuse the def_block_matrix storage for phi_node_blocks.
-  ArenaBitVector** def_block_matrix = temp_.ssa.def_block_matrix;
-  ArenaBitVector** phi_node_blocks = def_block_matrix;
-  DCHECK(temp_.ssa.phi_node_blocks == nullptr);
-  temp_.ssa.phi_node_blocks = phi_node_blocks;
-  temp_.ssa.def_block_matrix = nullptr;
-
-  /* Iterate through each Dalvik register */
-  for (int dalvik_reg = GetNumOfCodeAndTempVRs() - 1; dalvik_reg >= 0; dalvik_reg--) {
-    phi_blocks->ClearAllBits();
-    ArenaBitVector* input_blocks = def_block_matrix[dalvik_reg];
-    do {
-      // TUNING: When we repeat this, we could skip indexes from the previous pass.
-      for (uint32_t idx : input_blocks->Indexes()) {
-        BasicBlock* def_bb = GetBasicBlock(idx);
-        if (def_bb->dom_frontier != nullptr) {
-          phi_blocks->Union(def_bb->dom_frontier);
-        }
-      }
-    } while (input_blocks->Union(phi_blocks));
-
-    def_block_matrix[dalvik_reg] = phi_blocks;
-    phi_blocks = input_blocks;  // Reuse the bit vector in next iteration.
-  }
-}
-
-/*
- * Worker function to insert phi-operands with latest SSA names from
- * predecessor blocks
- */
-bool MIRGraph::InsertPhiNodeOperands(BasicBlock* bb) {
-  /* Phi nodes are at the beginning of each block */
-  for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-    if (mir->dalvikInsn.opcode != static_cast<Instruction::Code>(kMirOpPhi))
-      return true;
-    int ssa_reg = mir->ssa_rep->defs[0];
-    DCHECK_GE(ssa_reg, 0);   // Shouldn't see compiler temps here
-    int v_reg = SRegToVReg(ssa_reg);
-
-    /* Iterate through the predecessors */
-    size_t num_uses = bb->predecessors.size();
-    AllocateSSAUseData(mir, num_uses);
-    int* uses = mir->ssa_rep->uses;
-    BasicBlockId* incoming = arena_->AllocArray<BasicBlockId>(num_uses, kArenaAllocDFInfo);
-    mir->meta.phi_incoming = incoming;
-    int idx = 0;
-    for (BasicBlockId pred_id : bb->predecessors) {
-      BasicBlock* pred_bb = GetBasicBlock(pred_id);
-      DCHECK(pred_bb != nullptr);
-      uses[idx] = pred_bb->data_flow_info->vreg_to_ssa_map_exit[v_reg];
-      incoming[idx] = pred_id;
-      idx++;
-    }
-  }
-
-  return true;
-}
-
-void MIRGraph::DoDFSPreOrderSSARename(BasicBlock* block) {
-  if (block->visited || block->hidden) {
-    return;
-  }
-
-  typedef struct {
-    BasicBlock* bb;
-    int32_t* ssa_map;
-  } BasicBlockInfo;
-  BasicBlockInfo temp;
-
-  ScopedArenaAllocator allocator(&cu_->arena_stack);
-  ScopedArenaVector<BasicBlockInfo> bi_stack(allocator.Adapter());
-  ScopedArenaVector<BasicBlock*> succ_stack(allocator.Adapter());
-
-  uint32_t num_vregs = GetNumOfCodeAndTempVRs();
-  size_t map_size = sizeof(int32_t) * num_vregs;
-  temp.bb = block;
-  temp.ssa_map = vreg_to_ssa_map_;
-  bi_stack.push_back(temp);
-
-  while (!bi_stack.empty()) {
-    temp = bi_stack.back();
-    bi_stack.pop_back();
-    BasicBlock* b = temp.bb;
-
-    if (b->visited || b->hidden) {
-      continue;
-    }
-    b->visited = true;
-
-    /* Restore SSA map snapshot, except for the first block */
-    if (b != block) {
-      memcpy(vreg_to_ssa_map_, temp.ssa_map, map_size);
-    }
-
-    /* Process this block */
-    DoSSAConversion(b);
-
-    /* If there are no successor, taken, and fall through blocks, continue */
-    if (b->successor_block_list_type == kNotUsed &&
-        b->taken == NullBasicBlockId &&
-        b->fall_through == NullBasicBlockId) {
-      continue;
-    }
-
-    /* Save SSA map snapshot */
-    int32_t* saved_ssa_map =
-      allocator.AllocArray<int32_t>(num_vregs, kArenaAllocDalvikToSSAMap);
-    memcpy(saved_ssa_map, vreg_to_ssa_map_, map_size);
-
-    if (b->successor_block_list_type != kNotUsed) {
-      for (SuccessorBlockInfo* successor_block_info : b->successor_blocks) {
-        BasicBlock* succ_bb = GetBasicBlock(successor_block_info->block);
-        succ_stack.push_back(succ_bb);
-      }
-      while (!succ_stack.empty()) {
-        temp.bb = succ_stack.back();
-        succ_stack.pop_back();
-        temp.ssa_map = saved_ssa_map;
-        bi_stack.push_back(temp);
-      }
-    }
-    if (b->taken != NullBasicBlockId) {
-      temp.bb = GetBasicBlock(b->taken);
-      temp.ssa_map = saved_ssa_map;
-      bi_stack.push_back(temp);
-    }
-    if (b->fall_through != NullBasicBlockId) {
-      temp.bb = GetBasicBlock(b->fall_through);
-      temp.ssa_map = saved_ssa_map;
-      bi_stack.push_back(temp);
-    }
-  }
-}
-
-}  // namespace art
diff --git a/compiler/dex/type_inference.cc b/compiler/dex/type_inference.cc
deleted file mode 100644
index c93fe20..0000000
--- a/compiler/dex/type_inference.cc
+++ /dev/null
@@ -1,1074 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "type_inference.h"
-
-#include "base/bit_vector-inl.h"
-#include "compiler_ir.h"
-#include "dataflow_iterator-inl.h"
-#include "dex_flags.h"
-#include "dex_file-inl.h"
-#include "driver/dex_compilation_unit.h"
-#include "mir_field_info.h"
-#include "mir_graph.h"
-#include "mir_method_info.h"
-#include "utils.h"
-
-namespace art {
-
-inline TypeInference::Type TypeInference::Type::ArrayType(uint32_t array_depth, Type nested_type) {
-  DCHECK_NE(array_depth, 0u);
-  return Type(kFlagNarrow | kFlagRef | kFlagLowWord | (array_depth << kBitArrayDepthStart) |
-              ((nested_type.raw_bits_ & kMaskWideAndType) << kArrayTypeShift));
-}
-
-inline TypeInference::Type TypeInference::Type::ArrayTypeFromComponent(Type component_type) {
-  if (component_type.ArrayDepth() == 0u) {
-    return ArrayType(1u, component_type);
-  }
-  if (UNLIKELY(component_type.ArrayDepth() == kMaxArrayDepth)) {
-    return component_type;
-  }
-  return Type(component_type.raw_bits_ + (1u << kBitArrayDepthStart));  // array_depth + 1u;
-}
-
-TypeInference::Type TypeInference::Type::ShortyType(char shorty) {
-  switch (shorty) {
-    case 'L':
-      return Type(kFlagLowWord | kFlagNarrow | kFlagRef);
-    case 'D':
-      return Type(kFlagLowWord | kFlagWide | kFlagFp);
-    case 'J':
-      return Type(kFlagLowWord | kFlagWide | kFlagCore);
-    case 'F':
-      return Type(kFlagLowWord | kFlagNarrow | kFlagFp);
-    default:
-      DCHECK(shorty == 'I' || shorty == 'S' || shorty == 'C' || shorty == 'B' || shorty == 'Z');
-      return Type(kFlagLowWord | kFlagNarrow | kFlagCore);
-  }
-}
-
-TypeInference::Type TypeInference::Type::DexType(const DexFile* dex_file, uint32_t type_idx) {
-  const char* desc = dex_file->GetTypeDescriptor(dex_file->GetTypeId(type_idx));
-  if (UNLIKELY(desc[0] == 'V')) {
-    return Unknown();
-  } else if (UNLIKELY(desc[0] == '[')) {
-    size_t array_depth = 0u;
-    while (*desc == '[') {
-      ++array_depth;
-      ++desc;
-    }
-    if (UNLIKELY(array_depth > kMaxArrayDepth)) {
-      LOG(WARNING) << "Array depth exceeds " << kMaxArrayDepth << ": " << array_depth
-          << " in dex file " << dex_file->GetLocation() << " type index " << type_idx;
-      array_depth = kMaxArrayDepth;
-    }
-    Type shorty_result = Type::ShortyType(desc[0]);
-    return ArrayType(array_depth, shorty_result);
-  } else {
-    return ShortyType(desc[0]);
-  }
-}
-
-bool TypeInference::Type::MergeArrayConflict(Type src_type) {
-  DCHECK(Ref());
-  DCHECK_NE(ArrayDepth(), src_type.ArrayDepth());
-  DCHECK_GE(std::min(ArrayDepth(), src_type.ArrayDepth()), 1u);
-  bool size_conflict =
-      (ArrayDepth() == 1u && (raw_bits_ & kFlagArrayWide) != 0u) ||
-      (src_type.ArrayDepth() == 1u && (src_type.raw_bits_ & kFlagArrayWide) != 0u);
-  // Mark all three array type bits so that merging any other type bits will not change this type.
-  return Copy(Type((raw_bits_ & kMaskNonArray) |
-                   (1u << kBitArrayDepthStart) | kFlagArrayCore | kFlagArrayRef | kFlagArrayFp |
-                   kFlagArrayNarrow | (size_conflict ? kFlagArrayWide : 0u)));
-}
-
-bool TypeInference::Type::MergeStrong(Type src_type) {
-  bool changed = MergeNonArrayFlags(src_type);
-  if (src_type.ArrayDepth() != 0u) {
-    if (ArrayDepth() == 0u) {
-      DCHECK_EQ(raw_bits_ & ~kMaskNonArray, 0u);
-      DCHECK_NE(src_type.raw_bits_ & kFlagRef, 0u);
-      raw_bits_ |= src_type.raw_bits_ & (~kMaskNonArray | kFlagRef);
-      changed = true;
-    } else if (ArrayDepth() == src_type.ArrayDepth()) {
-      changed |= MergeBits(src_type, kMaskArrayWideAndType);
-    } else if (src_type.ArrayDepth() == 1u &&
-        (((src_type.raw_bits_ ^ UnknownArrayType().raw_bits_) & kMaskArrayWideAndType) == 0u ||
-         ((src_type.raw_bits_ ^ ObjectArrayType().raw_bits_) & kMaskArrayWideAndType) == 0u)) {
-      // Source type is [L or [? but current type is at least [[, preserve it.
-    } else if (ArrayDepth() == 1u &&
-        (((raw_bits_ ^ UnknownArrayType().raw_bits_) & kMaskArrayWideAndType) == 0u ||
-         ((raw_bits_ ^ ObjectArrayType().raw_bits_) & kMaskArrayWideAndType) == 0u)) {
-      // Overwrite [? or [L with the source array type which is at least [[.
-      raw_bits_ = (raw_bits_ & kMaskNonArray) | (src_type.raw_bits_ & ~kMaskNonArray);
-      changed = true;
-    } else {
-      // Mark the array value type with conflict - both ref and fp.
-      changed |= MergeArrayConflict(src_type);
-    }
-  }
-  return changed;
-}
-
-bool TypeInference::Type::MergeWeak(Type src_type) {
-  bool changed = MergeNonArrayFlags(src_type);
-  if (src_type.ArrayDepth() != 0u && src_type.NonNull()) {
-    DCHECK_NE(src_type.ArrayDepth(), 0u);
-    if (ArrayDepth() == 0u) {
-      DCHECK_EQ(raw_bits_ & ~kMaskNonArray, 0u);
-      // Preserve current type.
-    } else if (ArrayDepth() == src_type.ArrayDepth()) {
-      changed |= MergeBits(src_type, kMaskArrayWideAndType);
-    } else if (src_type.ArrayDepth() == 1u &&
-        (((src_type.raw_bits_ ^ UnknownArrayType().raw_bits_) & kMaskArrayWideAndType) == 0u ||
-         ((src_type.raw_bits_ ^ ObjectArrayType().raw_bits_) & kMaskArrayWideAndType) == 0u)) {
-      // Source type is [L or [? but current type is at least [[, preserve it.
-    } else if (ArrayDepth() == 1u &&
-        (((raw_bits_ ^ UnknownArrayType().raw_bits_) & kMaskArrayWideAndType) == 0u ||
-         ((raw_bits_ ^ ObjectArrayType().raw_bits_) & kMaskArrayWideAndType) == 0u)) {
-      // We have [? or [L. If it's [?, upgrade to [L as the source array type is at least [[.
-      changed |= MergeBits(ObjectArrayType(), kMaskArrayWideAndType);
-    } else {
-      // Mark the array value type with conflict - both ref and fp.
-      changed |= MergeArrayConflict(src_type);
-    }
-  }
-  return changed;
-}
-
-TypeInference::CheckCastData::CheckCastData(MIRGraph* mir_graph, ScopedArenaAllocator* alloc)
-    : mir_graph_(mir_graph),
-      alloc_(alloc),
-      num_blocks_(mir_graph->GetNumBlocks()),
-      num_sregs_(mir_graph->GetNumSSARegs()),
-      check_cast_map_(std::less<MIR*>(), alloc->Adapter()),
-      split_sreg_data_(std::less<int32_t>(), alloc->Adapter()) {
-}
-
-void TypeInference::CheckCastData::AddCheckCast(MIR* check_cast, Type type) {
-  DCHECK_EQ(check_cast->dalvikInsn.opcode, Instruction::CHECK_CAST);
-  type.CheckPureRef();
-  int32_t extra_s_reg = static_cast<int32_t>(num_sregs_);
-  num_sregs_ += 1;
-  check_cast_map_.Put(check_cast, CheckCastMapValue{extra_s_reg, type});  // NOLINT
-  int32_t s_reg = check_cast->ssa_rep->uses[0];
-  auto lb = split_sreg_data_.lower_bound(s_reg);
-  if (lb == split_sreg_data_.end() || split_sreg_data_.key_comp()(s_reg, lb->first)) {
-    SplitSRegData split_s_reg_data = {
-        0,
-        alloc_->AllocArray<int32_t>(num_blocks_, kArenaAllocMisc),
-        alloc_->AllocArray<int32_t>(num_blocks_, kArenaAllocMisc),
-        new (alloc_) ArenaBitVector(alloc_, num_blocks_, false)
-    };
-    std::fill_n(split_s_reg_data.starting_mod_s_reg, num_blocks_, INVALID_SREG);
-    std::fill_n(split_s_reg_data.ending_mod_s_reg, num_blocks_, INVALID_SREG);
-    split_s_reg_data.def_phi_blocks_->ClearAllBits();
-    BasicBlock* def_bb = FindDefBlock(check_cast);
-    split_s_reg_data.ending_mod_s_reg[def_bb->id] = s_reg;
-    split_s_reg_data.def_phi_blocks_->SetBit(def_bb->id);
-    lb = split_sreg_data_.PutBefore(lb, s_reg, split_s_reg_data);
-  }
-  lb->second.ending_mod_s_reg[check_cast->bb] = extra_s_reg;
-  lb->second.def_phi_blocks_->SetBit(check_cast->bb);
-}
-
-void TypeInference::CheckCastData::AddPseudoPhis() {
-  // Look for pseudo-phis where a split SSA reg merges with a differently typed version
-  // and initialize all starting_mod_s_reg.
-  DCHECK(!split_sreg_data_.empty());
-  ArenaBitVector* phi_blocks = new (alloc_) ArenaBitVector(alloc_, num_blocks_, false);
-
-  for (auto& entry : split_sreg_data_) {
-    SplitSRegData& data = entry.second;
-
-    // Find pseudo-phi nodes.
-    phi_blocks->ClearAllBits();
-    ArenaBitVector* input_blocks = data.def_phi_blocks_;
-    do {
-      for (uint32_t idx : input_blocks->Indexes()) {
-        BasicBlock* def_bb = mir_graph_->GetBasicBlock(idx);
-        if (def_bb->dom_frontier != nullptr) {
-          phi_blocks->Union(def_bb->dom_frontier);
-        }
-      }
-    } while (input_blocks->Union(phi_blocks));
-
-    // Find live pseudo-phis. Make sure they're merging the same SSA reg.
-    data.def_phi_blocks_->ClearAllBits();
-    int32_t s_reg = entry.first;
-    int v_reg = mir_graph_->SRegToVReg(s_reg);
-    for (uint32_t phi_bb_id : phi_blocks->Indexes()) {
-      BasicBlock* phi_bb = mir_graph_->GetBasicBlock(phi_bb_id);
-      DCHECK(phi_bb != nullptr);
-      DCHECK(phi_bb->data_flow_info != nullptr);
-      DCHECK(phi_bb->data_flow_info->live_in_v != nullptr);
-      if (IsSRegLiveAtStart(phi_bb, v_reg, s_reg)) {
-        int32_t extra_s_reg = static_cast<int32_t>(num_sregs_);
-        num_sregs_ += 1;
-        data.starting_mod_s_reg[phi_bb_id] = extra_s_reg;
-        data.def_phi_blocks_->SetBit(phi_bb_id);
-      }
-    }
-
-    // SSA rename for s_reg.
-    TopologicalSortIterator iter(mir_graph_);
-    for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-      if (bb->data_flow_info == nullptr || bb->block_type == kEntryBlock) {
-        continue;
-      }
-      BasicBlockId bb_id = bb->id;
-      if (data.def_phi_blocks_->IsBitSet(bb_id)) {
-        DCHECK_NE(data.starting_mod_s_reg[bb_id], INVALID_SREG);
-      } else {
-        DCHECK_EQ(data.starting_mod_s_reg[bb_id], INVALID_SREG);
-        if (IsSRegLiveAtStart(bb, v_reg, s_reg)) {
-          // The earliest predecessor must have been processed already.
-          BasicBlock* pred_bb = FindTopologicallyEarliestPredecessor(bb);
-          int32_t mod_s_reg = data.ending_mod_s_reg[pred_bb->id];
-          data.starting_mod_s_reg[bb_id] = (mod_s_reg != INVALID_SREG) ? mod_s_reg : s_reg;
-        } else if (data.ending_mod_s_reg[bb_id] != INVALID_SREG) {
-          // Start the original defining block with s_reg.
-          data.starting_mod_s_reg[bb_id] = s_reg;
-        }
-      }
-      if (data.ending_mod_s_reg[bb_id] == INVALID_SREG) {
-        // If the block doesn't define the modified SSA reg, it propagates the starting type.
-        data.ending_mod_s_reg[bb_id] = data.starting_mod_s_reg[bb_id];
-      }
-    }
-  }
-}
-
-void TypeInference::CheckCastData::InitializeCheckCastSRegs(Type* sregs) const {
-  for (const auto& entry : check_cast_map_) {
-    DCHECK_LT(static_cast<size_t>(entry.second.modified_s_reg), num_sregs_);
-    sregs[entry.second.modified_s_reg] = entry.second.type.AsNonNull();
-  }
-}
-
-void TypeInference::CheckCastData::MergeCheckCastConflicts(Type* sregs) const {
-  for (const auto& entry : check_cast_map_) {
-    DCHECK_LT(static_cast<size_t>(entry.second.modified_s_reg), num_sregs_);
-    sregs[entry.first->ssa_rep->uses[0]].MergeNonArrayFlags(
-        sregs[entry.second.modified_s_reg].AsNull());
-  }
-}
-
-void TypeInference::CheckCastData::MarkPseudoPhiBlocks(uint64_t* bb_df_attrs) const {
-  for (auto& entry : split_sreg_data_) {
-    for (uint32_t bb_id : entry.second.def_phi_blocks_->Indexes()) {
-      bb_df_attrs[bb_id] |= DF_NULL_TRANSFER_N;
-    }
-  }
-}
-
-void TypeInference::CheckCastData::Start(BasicBlock* bb) {
-  for (auto& entry : split_sreg_data_) {
-    entry.second.current_mod_s_reg = entry.second.starting_mod_s_reg[bb->id];
-  }
-}
-
-bool TypeInference::CheckCastData::ProcessPseudoPhis(BasicBlock* bb, Type* sregs) {
-  bool changed = false;
-  for (auto& entry : split_sreg_data_) {
-    DCHECK_EQ(entry.second.current_mod_s_reg, entry.second.starting_mod_s_reg[bb->id]);
-    if (entry.second.def_phi_blocks_->IsBitSet(bb->id)) {
-      int32_t* ending_mod_s_reg = entry.second.ending_mod_s_reg;
-      Type merged_type = sregs[entry.second.current_mod_s_reg];
-      for (BasicBlockId pred_id : bb->predecessors) {
-        DCHECK_LT(static_cast<size_t>(ending_mod_s_reg[pred_id]), num_sregs_);
-        merged_type.MergeWeak(sregs[ending_mod_s_reg[pred_id]]);
-      }
-      if (UNLIKELY(!merged_type.IsDefined())) {
-        // This can happen during an initial merge of a loop head if the original def is
-        // actually an untyped null. (All other definitions are typed using the check-cast.)
-      } else if (merged_type.Wide()) {
-        // Ignore the pseudo-phi, just remember that there's a size mismatch.
-        sregs[entry.second.current_mod_s_reg].MarkSizeConflict();
-      } else {
-        DCHECK(merged_type.Narrow() && merged_type.LowWord() && !merged_type.HighWord());
-        // Propagate both down (fully) and up (without the "non-null" flag).
-        changed |= sregs[entry.second.current_mod_s_reg].Copy(merged_type);
-        merged_type = merged_type.AsNull();
-        for (BasicBlockId pred_id : bb->predecessors) {
-          DCHECK_LT(static_cast<size_t>(ending_mod_s_reg[pred_id]), num_sregs_);
-          sregs[ending_mod_s_reg[pred_id]].MergeStrong(merged_type);
-        }
-      }
-    }
-  }
-  return changed;
-}
-
-void TypeInference::CheckCastData::ProcessCheckCast(MIR* mir) {
-  auto mir_it = check_cast_map_.find(mir);
-  DCHECK(mir_it != check_cast_map_.end());
-  auto sreg_it = split_sreg_data_.find(mir->ssa_rep->uses[0]);
-  DCHECK(sreg_it != split_sreg_data_.end());
-  sreg_it->second.current_mod_s_reg = mir_it->second.modified_s_reg;
-}
-
-TypeInference::SplitSRegData* TypeInference::CheckCastData::GetSplitSRegData(int32_t s_reg) {
-  auto it = split_sreg_data_.find(s_reg);
-  return (it == split_sreg_data_.end()) ? nullptr : &it->second;
-}
-
-BasicBlock* TypeInference::CheckCastData::FindDefBlock(MIR* check_cast) {
-  // Find the initial definition of the SSA reg used by the check-cast.
-  DCHECK_EQ(check_cast->dalvikInsn.opcode, Instruction::CHECK_CAST);
-  int32_t s_reg = check_cast->ssa_rep->uses[0];
-  if (mir_graph_->IsInVReg(s_reg)) {
-    return mir_graph_->GetEntryBlock();
-  }
-  int v_reg = mir_graph_->SRegToVReg(s_reg);
-  BasicBlock* bb = mir_graph_->GetBasicBlock(check_cast->bb);
-  DCHECK(bb != nullptr);
-  while (true) {
-    // Find the earliest predecessor in the topological sort order to ensure we don't
-    // go in a loop.
-    BasicBlock* pred_bb = FindTopologicallyEarliestPredecessor(bb);
-    DCHECK(pred_bb != nullptr);
-    DCHECK(pred_bb->data_flow_info != nullptr);
-    DCHECK(pred_bb->data_flow_info->vreg_to_ssa_map_exit != nullptr);
-    if (pred_bb->data_flow_info->vreg_to_ssa_map_exit[v_reg] != s_reg) {
-      // The s_reg was not valid at the end of pred_bb, so it must have been defined in bb.
-      return bb;
-    }
-    bb = pred_bb;
-  }
-}
-
-BasicBlock* TypeInference::CheckCastData::FindTopologicallyEarliestPredecessor(BasicBlock* bb) {
-  DCHECK(!bb->predecessors.empty());
-  const auto& indexes = mir_graph_->GetTopologicalSortOrderIndexes();
-  DCHECK_LT(bb->id, indexes.size());
-  size_t best_idx = indexes[bb->id];
-  BasicBlockId best_id = NullBasicBlockId;
-  for (BasicBlockId pred_id : bb->predecessors) {
-    DCHECK_LT(pred_id, indexes.size());
-    if (best_idx > indexes[pred_id]) {
-      best_idx = indexes[pred_id];
-      best_id = pred_id;
-    }
-  }
-  // There must be at least one predecessor earlier than the bb.
-  DCHECK_LT(best_idx, indexes[bb->id]);
-  return mir_graph_->GetBasicBlock(best_id);
-}
-
-bool TypeInference::CheckCastData::IsSRegLiveAtStart(BasicBlock* bb, int v_reg, int32_t s_reg) {
-  DCHECK_EQ(v_reg, mir_graph_->SRegToVReg(s_reg));
-  DCHECK(bb != nullptr);
-  DCHECK(bb->data_flow_info != nullptr);
-  DCHECK(bb->data_flow_info->live_in_v != nullptr);
-  if (!bb->data_flow_info->live_in_v->IsBitSet(v_reg)) {
-    return false;
-  }
-  for (BasicBlockId pred_id : bb->predecessors) {
-    BasicBlock* pred_bb = mir_graph_->GetBasicBlock(pred_id);
-    DCHECK(pred_bb != nullptr);
-    DCHECK(pred_bb->data_flow_info != nullptr);
-    DCHECK(pred_bb->data_flow_info->vreg_to_ssa_map_exit != nullptr);
-    if (pred_bb->data_flow_info->vreg_to_ssa_map_exit[v_reg] != s_reg) {
-      return false;
-    }
-  }
-  return true;
-}
-
-TypeInference::TypeInference(MIRGraph* mir_graph, ScopedArenaAllocator* alloc)
-    : mir_graph_(mir_graph),
-      cu_(mir_graph->GetCurrentDexCompilationUnit()->GetCompilationUnit()),
-      check_cast_data_(!mir_graph->HasCheckCast() ? nullptr :
-          InitializeCheckCastData(mir_graph, alloc)),
-      num_sregs_(
-          check_cast_data_ != nullptr ? check_cast_data_->NumSRegs() : mir_graph->GetNumSSARegs()),
-      ifields_(mir_graph->GetIFieldLoweringInfoCount() == 0u ? nullptr :
-          PrepareIFieldTypes(cu_->dex_file, mir_graph, alloc)),
-      sfields_(mir_graph->GetSFieldLoweringInfoCount() == 0u ? nullptr :
-          PrepareSFieldTypes(cu_->dex_file, mir_graph, alloc)),
-      signatures_(mir_graph->GetMethodLoweringInfoCount() == 0u ? nullptr :
-          PrepareSignatures(cu_->dex_file, mir_graph, alloc)),
-      current_method_signature_(
-          Signature(cu_->dex_file, cu_->method_idx, (cu_->access_flags & kAccStatic) != 0, alloc)),
-      sregs_(alloc->AllocArray<Type>(num_sregs_, kArenaAllocMisc)),
-      bb_df_attrs_(alloc->AllocArray<uint64_t>(mir_graph->GetNumBlocks(), kArenaAllocDFInfo)) {
-  InitializeSRegs();
-}
-
-bool TypeInference::Apply(BasicBlock* bb) {
-  bool changed = false;
-  uint64_t bb_df_attrs = bb_df_attrs_[bb->id];
-  if (bb_df_attrs != 0u) {
-    if (UNLIKELY(check_cast_data_ != nullptr)) {
-      check_cast_data_->Start(bb);
-      if (bb_df_attrs & DF_NULL_TRANSFER_N) {
-        changed |= check_cast_data_->ProcessPseudoPhis(bb, sregs_);
-      }
-    }
-    MIR* mir = bb->first_mir_insn;
-    MIR* main_mirs_end = ((bb_df_attrs & DF_SAME_TYPE_AB) != 0u) ? bb->last_mir_insn : nullptr;
-    for (; mir != main_mirs_end && static_cast<int>(mir->dalvikInsn.opcode) == kMirOpPhi;
-        mir = mir->next) {
-      // Special-case handling for Phi comes first because we have 2 Phis instead of a wide one.
-      // At least one input must have been previously processed. Look for the first
-      // occurrence of a high_word or low_word flag to determine the type.
-      size_t num_uses = mir->ssa_rep->num_uses;
-      const int32_t* uses = mir->ssa_rep->uses;
-      const int32_t* defs = mir->ssa_rep->defs;
-      DCHECK_EQ(bb->predecessors.size(), num_uses);
-      Type merged_type = sregs_[defs[0]];
-      for (size_t pred_idx = 0; pred_idx != num_uses; ++pred_idx) {
-        int32_t input_mod_s_reg = PhiInputModifiedSReg(uses[pred_idx], bb, pred_idx);
-        merged_type.MergeWeak(sregs_[input_mod_s_reg]);
-      }
-      if (UNLIKELY(!merged_type.IsDefined())) {
-        // No change
-      } else if (merged_type.HighWord()) {
-        // Ignore the high word phi, just remember if there's a size mismatch.
-        if (UNLIKELY(merged_type.LowWord())) {
-          sregs_[defs[0]].MarkSizeConflict();
-        }
-      } else {
-        // Propagate both down (fully) and up (without the "non-null" flag).
-        changed |= sregs_[defs[0]].Copy(merged_type);
-        merged_type = merged_type.AsNull();
-        for (size_t pred_idx = 0; pred_idx != num_uses; ++pred_idx) {
-          int32_t input_mod_s_reg = PhiInputModifiedSReg(uses[pred_idx], bb, pred_idx);
-          changed |= UpdateSRegFromLowWordType(input_mod_s_reg, merged_type);
-        }
-      }
-    }
-
-    // Propagate types with MOVEs and AGETs, process CHECK_CASTs for modified SSA reg tracking.
-    for (; mir != main_mirs_end; mir = mir->next) {
-      uint64_t attrs = MIRGraph::GetDataFlowAttributes(mir);
-      size_t num_uses = mir->ssa_rep->num_uses;
-      const int32_t* uses = mir->ssa_rep->uses;
-      const int32_t* defs = mir->ssa_rep->defs;
-
-      // Special handling for moves. Propagate type both ways.
-      if ((attrs & DF_IS_MOVE) != 0) {
-        int32_t used_mod_s_reg = ModifiedSReg(uses[0]);
-        int32_t defd_mod_s_reg = defs[0];
-
-        // The "non-null" flag is propagated only downwards from actual definitions and it's
-        // not initially marked for moves, so used sreg must be marked before defined sreg.
-        // The only exception is an inlined move where we know the type from the original invoke.
-        DCHECK(sregs_[used_mod_s_reg].NonNull() || !sregs_[defd_mod_s_reg].NonNull() ||
-               (mir->optimization_flags & MIR_CALLEE) != 0);
-        changed |= UpdateSRegFromLowWordType(used_mod_s_reg, sregs_[defd_mod_s_reg].AsNull());
-
-        // The value is the same, so either both registers are null or no register is.
-        // In any case we can safely propagate the array type down.
-        changed |= UpdateSRegFromLowWordType(defd_mod_s_reg, sregs_[used_mod_s_reg]);
-        if (UNLIKELY((attrs & DF_REF_A) == 0 && sregs_[used_mod_s_reg].Ref())) {
-          // Mark type conflict: move instead of move-object.
-          sregs_[used_mod_s_reg].MarkTypeConflict();
-        }
-        continue;
-      }
-
-      // Handle AGET/APUT.
-      if ((attrs & DF_HAS_RANGE_CHKS) != 0) {
-        int32_t base_mod_s_reg = ModifiedSReg(uses[num_uses - 2u]);
-        int32_t mod_s_reg = (attrs & DF_DA) != 0 ? defs[0] : ModifiedSReg(uses[0]);
-        DCHECK_NE(sregs_[base_mod_s_reg].ArrayDepth(), 0u);
-        if (!sregs_[base_mod_s_reg].NonNull()) {
-          // If the base is null, don't propagate anything. All that we could determine
-          // has already been merged in the previous stage.
-        } else {
-          changed |= UpdateSRegFromLowWordType(mod_s_reg, sregs_[base_mod_s_reg].ComponentType());
-          Type array_type = Type::ArrayTypeFromComponent(sregs_[mod_s_reg]);
-          if ((attrs & DF_DA) != 0) {
-            changed |= sregs_[base_mod_s_reg].MergeStrong(array_type);
-          } else {
-            changed |= sregs_[base_mod_s_reg].MergeWeak(array_type);
-          }
-        }
-        if (UNLIKELY((attrs & DF_REF_A) == 0 && sregs_[mod_s_reg].Ref())) {
-          // Mark type conflict: aget/aput instead of aget/aput-object.
-          sregs_[mod_s_reg].MarkTypeConflict();
-        }
-        continue;
-      }
-
-      // Special-case handling for check-cast to advance modified SSA reg.
-      if (UNLIKELY((attrs & DF_CHK_CAST) != 0)) {
-        DCHECK(check_cast_data_ != nullptr);
-        check_cast_data_->ProcessCheckCast(mir);
-      }
-    }
-
-    // Propagate types for IF_cc if present.
-    if (mir != nullptr) {
-      DCHECK(mir == bb->last_mir_insn);
-      DCHECK(mir->next == nullptr);
-      DCHECK_NE(MIRGraph::GetDataFlowAttributes(mir) & DF_SAME_TYPE_AB, 0u);
-      DCHECK_EQ(mir->ssa_rep->num_uses, 2u);
-      const int32_t* uses = mir->ssa_rep->uses;
-      int32_t mod_s_reg0 = ModifiedSReg(uses[0]);
-      int32_t mod_s_reg1 = ModifiedSReg(uses[1]);
-      changed |= sregs_[mod_s_reg0].MergeWeak(sregs_[mod_s_reg1].AsNull());
-      changed |= sregs_[mod_s_reg1].MergeWeak(sregs_[mod_s_reg0].AsNull());
-    }
-  }
-  return changed;
-}
-
-void TypeInference::Finish() {
-  if (UNLIKELY(check_cast_data_ != nullptr)) {
-    check_cast_data_->MergeCheckCastConflicts(sregs_);
-  }
-
-  size_t num_sregs = mir_graph_->GetNumSSARegs();  // Without the extra SSA regs.
-  for (size_t s_reg = 0; s_reg != num_sregs; ++s_reg) {
-    if (sregs_[s_reg].SizeConflict()) {
-      /*
-       * The dex bytecode definition does not explicitly outlaw the definition of the same
-       * virtual register to be used in both a 32-bit and 64-bit pair context.  However, dx
-       * does not generate this pattern (at least recently).  Further, in the next revision of
-       * dex, we will forbid this.  To support the few cases in the wild, detect this pattern
-       * and punt to the interpreter.
-       */
-      LOG(WARNING) << PrettyMethod(cu_->method_idx, *cu_->dex_file)
-                   << " has size conflict block for sreg " << s_reg
-                   << ", punting to interpreter.";
-      mir_graph_->SetPuntToInterpreter(true);
-      return;
-    }
-  }
-
-  size_t conflict_s_reg = 0;
-  bool type_conflict = false;
-  for (size_t s_reg = 0; s_reg != num_sregs; ++s_reg) {
-    Type type = sregs_[s_reg];
-    RegLocation* loc = &mir_graph_->reg_location_[s_reg];
-    loc->wide = type.Wide();
-    loc->defined = type.IsDefined();
-    loc->fp = type.Fp();
-    loc->core = type.Core();
-    loc->ref = type.Ref();
-    loc->high_word = type.HighWord();
-    if (UNLIKELY(type.TypeConflict())) {
-      type_conflict = true;
-      conflict_s_reg = s_reg;
-    }
-  }
-
-  if (type_conflict) {
-    /*
-     * Each dalvik register definition should be used either as a reference, or an
-     * integer or a floating point value. We don't normally expect to see a Dalvik
-     * register definition used in two or three of these roles though technically it
-     * could happen with constants (0 for all three roles, non-zero for integer and
-     * FP). Detect this situation and disable optimizations that rely on correct
-     * typing, i.e. register promotion, GVN/LVN and GVN-based DCE.
-     */
-    LOG(WARNING) << PrettyMethod(cu_->method_idx, *cu_->dex_file)
-                 << " has type conflict block for sreg " << conflict_s_reg
-                 << ", disabling register promotion.";
-    cu_->disable_opt |=
-        (1u << kPromoteRegs) |
-        (1u << kGlobalValueNumbering) |
-        (1u << kGvnDeadCodeElimination) |
-        (1u << kLocalValueNumbering);
-  }
-}
-
-TypeInference::Type TypeInference::FieldType(const DexFile* dex_file, uint32_t field_idx) {
-  uint32_t type_idx = dex_file->GetFieldId(field_idx).type_idx_;
-  Type result = Type::DexType(dex_file, type_idx);
-  return result;
-}
-
-TypeInference::Type* TypeInference::PrepareIFieldTypes(const DexFile* dex_file,
-                                                       MIRGraph* mir_graph,
-                                                       ScopedArenaAllocator* alloc) {
-  size_t count = mir_graph->GetIFieldLoweringInfoCount();
-  Type* ifields = alloc->AllocArray<Type>(count, kArenaAllocDFInfo);
-  for (uint32_t i = 0u; i != count; ++i) {
-    // NOTE: Quickened field accesses have invalid FieldIndex() but they are always resolved.
-    const MirFieldInfo& info = mir_graph->GetIFieldLoweringInfo(i);
-    const DexFile* current_dex_file = info.IsResolved() ? info.DeclaringDexFile() : dex_file;
-    uint32_t field_idx = info.IsResolved() ? info.DeclaringFieldIndex() : info.FieldIndex();
-    ifields[i] = FieldType(current_dex_file, field_idx);
-    DCHECK_EQ(info.MemAccessType() == kDexMemAccessWide, ifields[i].Wide());
-    DCHECK_EQ(info.MemAccessType() == kDexMemAccessObject, ifields[i].Ref());
-  }
-  return ifields;
-}
-
-TypeInference::Type* TypeInference::PrepareSFieldTypes(const DexFile* dex_file,
-                                                       MIRGraph* mir_graph,
-                                                       ScopedArenaAllocator* alloc) {
-  size_t count = mir_graph->GetSFieldLoweringInfoCount();
-  Type* sfields = alloc->AllocArray<Type>(count, kArenaAllocDFInfo);
-  for (uint32_t i = 0u; i != count; ++i) {
-    // FieldIndex() is always valid for static fields (no quickened instructions).
-    sfields[i] = FieldType(dex_file, mir_graph->GetSFieldLoweringInfo(i).FieldIndex());
-  }
-  return sfields;
-}
-
-TypeInference::MethodSignature TypeInference::Signature(const DexFile* dex_file,
-                                                        uint32_t method_idx,
-                                                        bool is_static,
-                                                        ScopedArenaAllocator* alloc) {
-  const DexFile::MethodId& method_id = dex_file->GetMethodId(method_idx);
-  const DexFile::ProtoId& proto_id = dex_file->GetMethodPrototype(method_id);
-  Type return_type = Type::DexType(dex_file, proto_id.return_type_idx_);
-  const DexFile::TypeList* type_list = dex_file->GetProtoParameters(proto_id);
-  size_t this_size = (is_static ? 0u : 1u);
-  size_t param_size = ((type_list != nullptr) ? type_list->Size() : 0u);
-  size_t size = this_size + param_size;
-  Type* param_types = (size != 0u) ? alloc->AllocArray<Type>(size, kArenaAllocDFInfo) : nullptr;
-  if (!is_static) {
-    param_types[0] = Type::DexType(dex_file, method_id.class_idx_);
-  }
-  for (size_t i = 0; i != param_size; ++i)  {
-    uint32_t type_idx = type_list->GetTypeItem(i).type_idx_;
-    param_types[this_size + i] = Type::DexType(dex_file, type_idx);
-  }
-  return MethodSignature{ return_type, size, param_types };  // NOLINT
-}
-
-TypeInference::MethodSignature* TypeInference::PrepareSignatures(const DexFile* dex_file,
-                                                                 MIRGraph* mir_graph,
-                                                                 ScopedArenaAllocator* alloc) {
-  size_t count = mir_graph->GetMethodLoweringInfoCount();
-  MethodSignature* signatures = alloc->AllocArray<MethodSignature>(count, kArenaAllocDFInfo);
-  for (uint32_t i = 0u; i != count; ++i) {
-    // NOTE: Quickened invokes have invalid MethodIndex() but they are always resolved.
-    const MirMethodInfo& info = mir_graph->GetMethodLoweringInfo(i);
-    uint32_t method_idx = info.IsResolved() ? info.DeclaringMethodIndex() : info.MethodIndex();
-    const DexFile* current_dex_file = info.IsResolved() ? info.DeclaringDexFile() : dex_file;
-    signatures[i] = Signature(current_dex_file, method_idx, info.IsStatic(), alloc);
-  }
-  return signatures;
-}
-
-TypeInference::CheckCastData* TypeInference::InitializeCheckCastData(MIRGraph* mir_graph,
-                                                                     ScopedArenaAllocator* alloc) {
-  if (!mir_graph->HasCheckCast()) {
-    return nullptr;
-  }
-
-  CheckCastData* data = nullptr;
-  const DexFile* dex_file = nullptr;
-  PreOrderDfsIterator iter(mir_graph);
-  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      if (mir->dalvikInsn.opcode == Instruction::CHECK_CAST) {
-        if (data == nullptr) {
-          data = new (alloc) CheckCastData(mir_graph, alloc);
-          dex_file = mir_graph->GetCurrentDexCompilationUnit()->GetCompilationUnit()->dex_file;
-        }
-        Type type = Type::DexType(dex_file, mir->dalvikInsn.vB);
-        data->AddCheckCast(mir, type);
-      }
-    }
-  }
-  if (data != nullptr) {
-    data->AddPseudoPhis();
-  }
-  return data;
-}
-
-void TypeInference::InitializeSRegs() {
-  std::fill_n(sregs_, num_sregs_, Type::Unknown());
-
-  /* Treat ArtMethod* specially since they are pointer sized */
-  sregs_[mir_graph_->GetMethodSReg()] = Type::ArtMethodType(cu_->target64);
-
-  // Initialize parameter SSA regs at method entry.
-  int32_t entry_param_s_reg = mir_graph_->GetFirstInVR();
-  for (size_t i = 0, size = current_method_signature_.num_params; i != size; ++i)  {
-    Type param_type = current_method_signature_.param_types[i].AsNonNull();
-    sregs_[entry_param_s_reg] = param_type;
-    entry_param_s_reg += param_type.Wide() ? 2 : 1;
-  }
-  DCHECK_EQ(static_cast<uint32_t>(entry_param_s_reg),
-            mir_graph_->GetFirstInVR() + mir_graph_->GetNumOfInVRs());
-
-  // Initialize check-cast types.
-  if (UNLIKELY(check_cast_data_ != nullptr)) {
-    check_cast_data_->InitializeCheckCastSRegs(sregs_);
-  }
-
-  // Initialize well-known SSA register definition types. Merge inferred types
-  // upwards where a single merge is enough (INVOKE arguments and return type,
-  // RETURN type, IPUT/SPUT source type).
-  // NOTE: Using topological sort order to make sure the definition comes before
-  // any upward merging. This allows simple assignment of the defined types
-  // instead of MergeStrong().
-  TopologicalSortIterator iter(mir_graph_);
-  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
-    uint64_t bb_df_attrs = 0u;
-    if (UNLIKELY(check_cast_data_ != nullptr)) {
-      check_cast_data_->Start(bb);
-    }
-    // Ignore pseudo-phis, we're not setting types for SSA regs that depend on them in this pass.
-    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-      uint64_t attrs = MIRGraph::GetDataFlowAttributes(mir);
-      bb_df_attrs |= attrs;
-
-      const uint32_t num_uses = mir->ssa_rep->num_uses;
-      const int32_t* uses = mir->ssa_rep->uses;
-      const int32_t* defs = mir->ssa_rep->defs;
-
-      uint16_t opcode = mir->dalvikInsn.opcode;
-      switch (opcode) {
-        case Instruction::CONST_4:
-        case Instruction::CONST_16:
-        case Instruction::CONST:
-        case Instruction::CONST_HIGH16:
-        case Instruction::CONST_WIDE_16:
-        case Instruction::CONST_WIDE_32:
-        case Instruction::CONST_WIDE:
-        case Instruction::CONST_WIDE_HIGH16:
-        case Instruction::MOVE:
-        case Instruction::MOVE_FROM16:
-        case Instruction::MOVE_16:
-        case Instruction::MOVE_WIDE:
-        case Instruction::MOVE_WIDE_FROM16:
-        case Instruction::MOVE_WIDE_16:
-        case Instruction::MOVE_OBJECT:
-        case Instruction::MOVE_OBJECT_FROM16:
-        case Instruction::MOVE_OBJECT_16:
-          if ((mir->optimization_flags & MIR_CALLEE) != 0) {
-            // Inlined const/move keeps method_lowering_info for type inference.
-            DCHECK_LT(mir->meta.method_lowering_info, mir_graph_->GetMethodLoweringInfoCount());
-            Type return_type = signatures_[mir->meta.method_lowering_info].return_type;
-            DCHECK(return_type.IsDefined());  // Method return type can't be void.
-            sregs_[defs[0]] = return_type.AsNonNull();
-            if (return_type.Wide()) {
-              DCHECK_EQ(defs[0] + 1, defs[1]);
-              sregs_[defs[1]] = return_type.ToHighWord();
-            }
-            break;
-          }
-          FALLTHROUGH_INTENDED;
-        case kMirOpPhi:
-          // These cannot be determined in this simple pass and will be processed later.
-          break;
-
-        case Instruction::MOVE_RESULT:
-        case Instruction::MOVE_RESULT_WIDE:
-        case Instruction::MOVE_RESULT_OBJECT:
-          // Nothing to do, handled with invoke-* or filled-new-array/-range.
-          break;
-        case Instruction::MOVE_EXCEPTION:
-          // NOTE: We can never catch an array.
-          sregs_[defs[0]] = Type::NonArrayRefType().AsNonNull();
-          break;
-        case Instruction::CONST_STRING:
-        case Instruction::CONST_STRING_JUMBO:
-          sregs_[defs[0]] = Type::NonArrayRefType().AsNonNull();
-          break;
-        case Instruction::CONST_CLASS:
-          sregs_[defs[0]] = Type::NonArrayRefType().AsNonNull();
-          break;
-        case Instruction::CHECK_CAST:
-          DCHECK(check_cast_data_ != nullptr);
-          check_cast_data_->ProcessCheckCast(mir);
-          break;
-        case Instruction::ARRAY_LENGTH:
-          sregs_[ModifiedSReg(uses[0])].MergeStrong(Type::UnknownArrayType());
-          break;
-        case Instruction::NEW_INSTANCE:
-          sregs_[defs[0]] = Type::DexType(cu_->dex_file, mir->dalvikInsn.vB).AsNonNull();
-          DCHECK(sregs_[defs[0]].Ref());
-          DCHECK_EQ(sregs_[defs[0]].ArrayDepth(), 0u);
-          break;
-        case Instruction::NEW_ARRAY:
-          sregs_[defs[0]] = Type::DexType(cu_->dex_file, mir->dalvikInsn.vC).AsNonNull();
-          DCHECK(sregs_[defs[0]].Ref());
-          DCHECK_NE(sregs_[defs[0]].ArrayDepth(), 0u);
-          break;
-        case Instruction::FILLED_NEW_ARRAY:
-        case Instruction::FILLED_NEW_ARRAY_RANGE: {
-          Type array_type = Type::DexType(cu_->dex_file, mir->dalvikInsn.vB);
-          array_type.CheckPureRef();  // Previously checked by the method verifier.
-          DCHECK_NE(array_type.ArrayDepth(), 0u);
-          Type component_type = array_type.ComponentType();
-          DCHECK(!component_type.Wide());
-          MIR* move_result_mir = mir_graph_->FindMoveResult(bb, mir);
-          if (move_result_mir != nullptr) {
-            DCHECK_EQ(move_result_mir->dalvikInsn.opcode, Instruction::MOVE_RESULT_OBJECT);
-            sregs_[move_result_mir->ssa_rep->defs[0]] = array_type.AsNonNull();
-          }
-          DCHECK_EQ(num_uses, mir->dalvikInsn.vA);
-          for (size_t next = 0u; next != num_uses; ++next) {
-            int32_t input_mod_s_reg = ModifiedSReg(uses[next]);
-            sregs_[input_mod_s_reg].MergeStrong(component_type);
-          }
-          break;
-        }
-        case Instruction::INVOKE_VIRTUAL:
-        case Instruction::INVOKE_SUPER:
-        case Instruction::INVOKE_DIRECT:
-        case Instruction::INVOKE_STATIC:
-        case Instruction::INVOKE_INTERFACE:
-        case Instruction::INVOKE_VIRTUAL_RANGE:
-        case Instruction::INVOKE_SUPER_RANGE:
-        case Instruction::INVOKE_DIRECT_RANGE:
-        case Instruction::INVOKE_STATIC_RANGE:
-        case Instruction::INVOKE_INTERFACE_RANGE:
-        case Instruction::INVOKE_VIRTUAL_QUICK:
-        case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
-          const MethodSignature* signature = &signatures_[mir->meta.method_lowering_info];
-          MIR* move_result_mir = mir_graph_->FindMoveResult(bb, mir);
-          if (move_result_mir != nullptr) {
-            Type return_type = signature->return_type;
-            sregs_[move_result_mir->ssa_rep->defs[0]] = return_type.AsNonNull();
-            if (return_type.Wide()) {
-              DCHECK_EQ(move_result_mir->ssa_rep->defs[0] + 1, move_result_mir->ssa_rep->defs[1]);
-              sregs_[move_result_mir->ssa_rep->defs[1]] = return_type.ToHighWord();
-            }
-          }
-          size_t next = 0u;
-          for (size_t i = 0, size = signature->num_params; i != size; ++i)  {
-            Type param_type = signature->param_types[i];
-            int32_t param_s_reg = ModifiedSReg(uses[next]);
-            DCHECK(!param_type.Wide() || uses[next] + 1 == uses[next + 1]);
-            UpdateSRegFromLowWordType(param_s_reg, param_type);
-            next += param_type.Wide() ? 2 : 1;
-          }
-          DCHECK_EQ(next, num_uses);
-          DCHECK_EQ(next, mir->dalvikInsn.vA);
-          break;
-        }
-
-        case Instruction::RETURN_WIDE:
-          DCHECK(current_method_signature_.return_type.Wide());
-          DCHECK_EQ(uses[0] + 1, uses[1]);
-          DCHECK_EQ(ModifiedSReg(uses[0]), uses[0]);
-          FALLTHROUGH_INTENDED;
-        case Instruction::RETURN:
-        case Instruction::RETURN_OBJECT: {
-          int32_t mod_s_reg = ModifiedSReg(uses[0]);
-          UpdateSRegFromLowWordType(mod_s_reg, current_method_signature_.return_type);
-          break;
-        }
-
-        // NOTE: For AGET/APUT we set only the array type. The operand type is set
-        // below based on the data flow attributes.
-        case Instruction::AGET:
-        case Instruction::APUT:
-          sregs_[ModifiedSReg(uses[num_uses - 2u])].MergeStrong(Type::NarrowArrayType());
-          break;
-        case Instruction::AGET_WIDE:
-        case Instruction::APUT_WIDE:
-          sregs_[ModifiedSReg(uses[num_uses - 2u])].MergeStrong(Type::WideArrayType());
-          break;
-        case Instruction::AGET_OBJECT:
-          sregs_[defs[0]] = sregs_[defs[0]].AsNonNull();
-          FALLTHROUGH_INTENDED;
-        case Instruction::APUT_OBJECT:
-          sregs_[ModifiedSReg(uses[num_uses - 2u])].MergeStrong(Type::ObjectArrayType());
-          break;
-        case Instruction::AGET_BOOLEAN:
-        case Instruction::APUT_BOOLEAN:
-        case Instruction::AGET_BYTE:
-        case Instruction::APUT_BYTE:
-        case Instruction::AGET_CHAR:
-        case Instruction::APUT_CHAR:
-        case Instruction::AGET_SHORT:
-        case Instruction::APUT_SHORT:
-          sregs_[ModifiedSReg(uses[num_uses - 2u])].MergeStrong(Type::NarrowCoreArrayType());
-          break;
-
-        case Instruction::IGET_WIDE:
-        case Instruction::IGET_WIDE_QUICK:
-          DCHECK_EQ(defs[0] + 1, defs[1]);
-          DCHECK_LT(mir->meta.ifield_lowering_info, mir_graph_->GetIFieldLoweringInfoCount());
-          sregs_[defs[1]] = ifields_[mir->meta.ifield_lowering_info].ToHighWord();
-          FALLTHROUGH_INTENDED;
-        case Instruction::IGET:
-        case Instruction::IGET_OBJECT:
-        case Instruction::IGET_BOOLEAN:
-        case Instruction::IGET_BYTE:
-        case Instruction::IGET_CHAR:
-        case Instruction::IGET_SHORT:
-        case Instruction::IGET_QUICK:
-        case Instruction::IGET_OBJECT_QUICK:
-        case Instruction::IGET_BOOLEAN_QUICK:
-        case Instruction::IGET_BYTE_QUICK:
-        case Instruction::IGET_CHAR_QUICK:
-        case Instruction::IGET_SHORT_QUICK:
-          DCHECK_LT(mir->meta.ifield_lowering_info, mir_graph_->GetIFieldLoweringInfoCount());
-          sregs_[defs[0]] = ifields_[mir->meta.ifield_lowering_info].AsNonNull();
-          break;
-        case Instruction::IPUT_WIDE:
-        case Instruction::IPUT_WIDE_QUICK:
-          DCHECK_EQ(uses[0] + 1, uses[1]);
-          FALLTHROUGH_INTENDED;
-        case Instruction::IPUT:
-        case Instruction::IPUT_OBJECT:
-        case Instruction::IPUT_BOOLEAN:
-        case Instruction::IPUT_BYTE:
-        case Instruction::IPUT_CHAR:
-        case Instruction::IPUT_SHORT:
-        case Instruction::IPUT_QUICK:
-        case Instruction::IPUT_OBJECT_QUICK:
-        case Instruction::IPUT_BOOLEAN_QUICK:
-        case Instruction::IPUT_BYTE_QUICK:
-        case Instruction::IPUT_CHAR_QUICK:
-        case Instruction::IPUT_SHORT_QUICK:
-          DCHECK_LT(mir->meta.ifield_lowering_info, mir_graph_->GetIFieldLoweringInfoCount());
-          UpdateSRegFromLowWordType(ModifiedSReg(uses[0]),
-                                    ifields_[mir->meta.ifield_lowering_info]);
-          break;
-        case Instruction::SGET_WIDE:
-          DCHECK_EQ(defs[0] + 1, defs[1]);
-          DCHECK_LT(mir->meta.sfield_lowering_info, mir_graph_->GetSFieldLoweringInfoCount());
-          sregs_[defs[1]] = sfields_[mir->meta.sfield_lowering_info].ToHighWord();
-          FALLTHROUGH_INTENDED;
-        case Instruction::SGET:
-        case Instruction::SGET_OBJECT:
-        case Instruction::SGET_BOOLEAN:
-        case Instruction::SGET_BYTE:
-        case Instruction::SGET_CHAR:
-        case Instruction::SGET_SHORT:
-          DCHECK_LT(mir->meta.sfield_lowering_info, mir_graph_->GetSFieldLoweringInfoCount());
-          sregs_[defs[0]] = sfields_[mir->meta.sfield_lowering_info].AsNonNull();
-          break;
-        case Instruction::SPUT_WIDE:
-          DCHECK_EQ(uses[0] + 1, uses[1]);
-          FALLTHROUGH_INTENDED;
-        case Instruction::SPUT:
-        case Instruction::SPUT_OBJECT:
-        case Instruction::SPUT_BOOLEAN:
-        case Instruction::SPUT_BYTE:
-        case Instruction::SPUT_CHAR:
-        case Instruction::SPUT_SHORT:
-          DCHECK_LT(mir->meta.sfield_lowering_info, mir_graph_->GetSFieldLoweringInfoCount());
-          UpdateSRegFromLowWordType(ModifiedSReg(uses[0]),
-                                          sfields_[mir->meta.sfield_lowering_info]);
-          break;
-
-        default:
-          // No invokes or reference definitions here.
-          DCHECK_EQ(attrs & (DF_FORMAT_35C | DF_FORMAT_3RC), 0u);
-          DCHECK_NE(attrs & (DF_DA | DF_REF_A), (DF_DA | DF_REF_A));
-          break;
-      }
-
-      if ((attrs & DF_NULL_TRANSFER_N) != 0) {
-        // Don't process Phis at this stage.
-        continue;
-      }
-
-      // Handle defs
-      if (attrs & DF_DA) {
-        int32_t s_reg = defs[0];
-        sregs_[s_reg].SetLowWord();
-        if (attrs & DF_FP_A) {
-          sregs_[s_reg].SetFp();
-        }
-        if (attrs & DF_CORE_A) {
-          sregs_[s_reg].SetCore();
-        }
-        if (attrs & DF_REF_A) {
-          sregs_[s_reg].SetRef();
-        }
-        if (attrs & DF_A_WIDE) {
-          sregs_[s_reg].SetWide();
-          DCHECK_EQ(s_reg + 1, ModifiedSReg(defs[1]));
-          sregs_[s_reg + 1].MergeHighWord(sregs_[s_reg]);
-        } else {
-          sregs_[s_reg].SetNarrow();
-        }
-      }
-
-      // Handles uses
-      size_t next = 0;
-  #define PROCESS(REG)                                                        \
-      if (attrs & DF_U##REG) {                                                \
-        int32_t mod_s_reg = ModifiedSReg(uses[next]);                         \
-        sregs_[mod_s_reg].SetLowWord();                                       \
-        if (attrs & DF_FP_##REG) {                                            \
-          sregs_[mod_s_reg].SetFp();                                          \
-        }                                                                     \
-        if (attrs & DF_CORE_##REG) {                                          \
-          sregs_[mod_s_reg].SetCore();                                        \
-        }                                                                     \
-        if (attrs & DF_REF_##REG) {                                           \
-          sregs_[mod_s_reg].SetRef();                                         \
-        }                                                                     \
-        if (attrs & DF_##REG##_WIDE) {                                        \
-          sregs_[mod_s_reg].SetWide();                                        \
-          DCHECK_EQ(mod_s_reg + 1, ModifiedSReg(uses[next + 1]));             \
-          sregs_[mod_s_reg + 1].SetWide();                                    \
-          sregs_[mod_s_reg + 1].MergeHighWord(sregs_[mod_s_reg]);             \
-          next += 2;                                                          \
-        } else {                                                              \
-          sregs_[mod_s_reg].SetNarrow();                                      \
-          next++;                                                             \
-        }                                                                     \
-      }
-      PROCESS(A)
-      PROCESS(B)
-      PROCESS(C)
-  #undef PROCESS
-      DCHECK(next == mir->ssa_rep->num_uses || (attrs & (DF_FORMAT_35C | DF_FORMAT_3RC)) != 0);
-    }
-    // Record relevant attributes.
-    bb_df_attrs_[bb->id] = bb_df_attrs &
-        (DF_NULL_TRANSFER_N | DF_CHK_CAST | DF_IS_MOVE | DF_HAS_RANGE_CHKS | DF_SAME_TYPE_AB);
-  }
-
-  if (UNLIKELY(check_cast_data_ != nullptr)) {
-    check_cast_data_->MarkPseudoPhiBlocks(bb_df_attrs_);
-  }
-}
-
-int32_t TypeInference::ModifiedSReg(int32_t s_reg) {
-  if (UNLIKELY(check_cast_data_ != nullptr)) {
-    SplitSRegData* split_data = check_cast_data_->GetSplitSRegData(s_reg);
-    if (UNLIKELY(split_data != nullptr)) {
-      DCHECK_NE(split_data->current_mod_s_reg, INVALID_SREG);
-      return split_data->current_mod_s_reg;
-    }
-  }
-  return s_reg;
-}
-
-int32_t TypeInference::PhiInputModifiedSReg(int32_t s_reg, BasicBlock* bb, size_t pred_idx) {
-  DCHECK_LT(pred_idx, bb->predecessors.size());
-  if (UNLIKELY(check_cast_data_ != nullptr)) {
-    SplitSRegData* split_data = check_cast_data_->GetSplitSRegData(s_reg);
-    if (UNLIKELY(split_data != nullptr)) {
-      return split_data->ending_mod_s_reg[bb->predecessors[pred_idx]];
-    }
-  }
-  return s_reg;
-}
-
-bool TypeInference::UpdateSRegFromLowWordType(int32_t mod_s_reg, Type low_word_type) {
-  DCHECK(low_word_type.LowWord());
-  bool changed = sregs_[mod_s_reg].MergeStrong(low_word_type);
-  if (!sregs_[mod_s_reg].Narrow()) {  // Wide without conflict with narrow.
-    DCHECK(!low_word_type.Narrow());
-    DCHECK_LT(mod_s_reg, mir_graph_->GetNumSSARegs());  // Original SSA reg.
-    changed |= sregs_[mod_s_reg + 1].MergeHighWord(sregs_[mod_s_reg]);
-  }
-  return changed;
-}
-
-}  // namespace art
diff --git a/compiler/dex/type_inference.h b/compiler/dex/type_inference.h
deleted file mode 100644
index adc3b54..0000000
--- a/compiler/dex/type_inference.h
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_TYPE_INFERENCE_H_
-#define ART_COMPILER_DEX_TYPE_INFERENCE_H_
-
-#include "base/bit_utils.h"
-#include "base/logging.h"
-#include "base/arena_object.h"
-#include "base/scoped_arena_containers.h"
-
-namespace art {
-
-class ArenaBitVector;
-class BasicBlock;
-struct CompilationUnit;
-class DexFile;
-class MirFieldInfo;
-class MirMethodInfo;
-class MIR;
-class MIRGraph;
-
-/**
- * @brief Determine the type of SSA registers.
- *
- * @details
- * Because Dalvik's bytecode is not fully typed, we have to do some work to figure
- * out the sreg type.  For some operations it is clear based on the opcode (i.e.
- * ADD_FLOAT v0, v1, v2), but for others (MOVE), we may never know the "real" type.
- *
- * We perform the type inference operation in two phases:
- *   1. First, we make one pass over all insns in the topological sort order and
- *      extract known type information from all insns for their defs and uses.
- *   2. Then we repeatedly go through the graph to process insns that can propagate
- *      types from inputs to outputs and vice versa. These insns are just the MOVEs,
- *      AGET/APUTs, IF_ccs and Phis (including pseudo-Phis, see below).
- *
- * Since the main purpose is to determine the basic FP/core/reference type, we don't
- * need to record the precise reference type, we only record the array type to determine
- * the result types of agets and source type of aputs.
- *
- * One complication is the check-cast instruction that effectively defines a new
- * virtual register that has a different type than the original sreg. We need to
- * track these virtual sregs and insert pseudo-phis where they merge.
- *
- * Another problems is with null references. The same zero constant can be used
- * as differently typed null and moved around with move-object which would normally
- * be an ill-formed assignment. So we need to keep track of values that can be null
- * and values that cannot.
- *
- * Note that it's possible to have the same sreg show multiple defined types because dx
- * treats constants as untyped bit patterns. We disable register promotion in that case.
- */
-class TypeInference : public DeletableArenaObject<kArenaAllocMisc> {
- public:
-  TypeInference(MIRGraph* mir_graph, ScopedArenaAllocator* alloc);
-
-  bool Apply(BasicBlock* bb);
-  void Finish();
-
- private:
-  struct Type {
-    static Type Unknown() {
-      return Type(0u);
-    }
-
-    static Type NonArrayRefType() {
-      return Type(kFlagLowWord | kFlagNarrow | kFlagRef);
-    }
-
-    static Type ArtMethodType(bool wide) {
-      return Type(kFlagLowWord | kFlagRef | (wide ? kFlagWide : kFlagNarrow));
-    }
-
-    static Type ObjectArrayType() {
-      return Type(kFlagNarrow | kFlagRef | kFlagLowWord |
-                  (1u << kBitArrayDepthStart) | kFlagArrayNarrow | kFlagArrayRef);
-    }
-
-    static Type WideArrayType() {
-      // Core or FP unknown.
-      return Type(kFlagNarrow | kFlagRef | kFlagLowWord |
-                  (1u << kBitArrayDepthStart) | kFlagArrayWide);
-    }
-
-    static Type NarrowArrayType() {
-      // Core or FP unknown.
-      return Type(kFlagNarrow | kFlagRef | kFlagLowWord |
-                  (1u << kBitArrayDepthStart) | kFlagArrayNarrow);
-    }
-
-    static Type NarrowCoreArrayType() {
-      return Type(kFlagNarrow | kFlagRef | kFlagLowWord |
-                  (1u << kBitArrayDepthStart) | kFlagArrayNarrow | kFlagArrayCore);
-    }
-
-    static Type UnknownArrayType() {
-      return Type(kFlagNarrow | kFlagRef | kFlagLowWord | (1u << kBitArrayDepthStart));
-    }
-
-    static Type ArrayType(uint32_t array_depth, Type nested_type);
-    static Type ArrayTypeFromComponent(Type component_type);
-    static Type ShortyType(char shorty);
-    static Type DexType(const DexFile* dex_file, uint32_t type_idx);
-
-    bool IsDefined() {
-      return raw_bits_ != 0u;
-    }
-
-    bool SizeConflict() const {
-      // NOTE: Ignore array element conflicts that don't propagate to direct conflicts.
-      return (Wide() && Narrow()) || (HighWord() && LowWord());
-    }
-
-    bool TypeConflict() const {
-      // NOTE: Ignore array element conflicts that don't propagate to direct conflicts.
-      return (raw_bits_ & kMaskType) != 0u && !IsPowerOfTwo(raw_bits_ & kMaskType);  // 2+ bits.
-    }
-
-    void MarkSizeConflict() {
-      SetBits(kFlagLowWord | kFlagHighWord);
-    }
-
-    void MarkTypeConflict() {
-      // Mark all three type bits so that merging any other type bits will not change this type.
-      SetBits(kFlagFp | kFlagCore | kFlagRef);
-    }
-
-    void CheckPureRef() const {
-      DCHECK_EQ(raw_bits_ & (kMaskWideAndType | kMaskWord), kFlagNarrow | kFlagRef | kFlagLowWord);
-    }
-
-    // If reference, don't treat as possible null and require precise type.
-    //
-    // References without this flag are allowed to have a type conflict and their
-    // type will not be propagated down. However, for simplicity we allow propagation
-    // of other flags up as it will affect only other null references; should those
-    // references be marked non-null later, we would have to do it anyway.
-    // NOTE: This is a negative "non-null" flag rather then a positive "is-null"
-    // to simplify merging together with other non-array flags.
-    bool NonNull() const {
-      return IsBitSet(kFlagNonNull);
-    }
-
-    bool Wide() const {
-      return IsBitSet(kFlagWide);
-    }
-
-    bool Narrow() const {
-      return IsBitSet(kFlagNarrow);
-    }
-
-    bool Fp() const {
-      return IsBitSet(kFlagFp);
-    }
-
-    bool Core() const {
-      return IsBitSet(kFlagCore);
-    }
-
-    bool Ref() const {
-      return IsBitSet(kFlagRef);
-    }
-
-    bool LowWord() const {
-      return IsBitSet(kFlagLowWord);
-    }
-
-    bool HighWord() const {
-      return IsBitSet(kFlagHighWord);
-    }
-
-    uint32_t ArrayDepth() const {
-      return raw_bits_ >> kBitArrayDepthStart;
-    }
-
-    Type NestedType() const {
-      DCHECK_NE(ArrayDepth(), 0u);
-      return Type(kFlagLowWord | ((raw_bits_ & kMaskArrayWideAndType) >> kArrayTypeShift));
-    }
-
-    Type ComponentType() const {
-      DCHECK_NE(ArrayDepth(), 0u);
-      Type temp(raw_bits_ - (1u << kBitArrayDepthStart));  // array_depth - 1u;
-      return (temp.ArrayDepth() != 0u) ? temp.AsNull() : NestedType();
-    }
-
-    void SetWide() {
-      SetBits(kFlagWide);
-    }
-
-    void SetNarrow() {
-      SetBits(kFlagNarrow);
-    }
-
-    void SetFp() {
-      SetBits(kFlagFp);
-    }
-
-    void SetCore() {
-      SetBits(kFlagCore);
-    }
-
-    void SetRef() {
-      SetBits(kFlagRef);
-    }
-
-    void SetLowWord() {
-      SetBits(kFlagLowWord);
-    }
-
-    void SetHighWord() {
-      SetBits(kFlagHighWord);
-    }
-
-    Type ToHighWord() const {
-      DCHECK_EQ(raw_bits_ & (kMaskWide | kMaskWord), kFlagWide | kFlagLowWord);
-      return Type(raw_bits_ ^ (kFlagLowWord | kFlagHighWord));
-    }
-
-    bool MergeHighWord(Type low_word_type) {
-      // NOTE: low_word_type may be also Narrow() or HighWord().
-      DCHECK(low_word_type.Wide() && low_word_type.LowWord());
-      return MergeBits(Type(low_word_type.raw_bits_ | kFlagHighWord),
-                       kMaskWideAndType | kFlagHighWord);
-    }
-
-    bool Copy(Type type) {
-      if (raw_bits_ != type.raw_bits_) {
-        raw_bits_ = type.raw_bits_;
-        return true;
-      }
-      return false;
-    }
-
-    // Merge non-array flags.
-    bool MergeNonArrayFlags(Type src_type) {
-      return MergeBits(src_type, kMaskNonArray);
-    }
-
-    // Merge array flags for conflict.
-    bool MergeArrayConflict(Type src_type);
-
-    // Merge all flags.
-    bool MergeStrong(Type src_type);
-
-    // Merge all flags.
-    bool MergeWeak(Type src_type);
-
-    // Get the same type but mark that it should not be treated as null.
-    Type AsNonNull() const {
-      return Type(raw_bits_ | kFlagNonNull);
-    }
-
-    // Get the same type but mark that it can be treated as null.
-    Type AsNull() const {
-      return Type(raw_bits_ & ~kFlagNonNull);
-    }
-
-   private:
-    enum FlagBits {
-      kBitNonNull = 0,
-      kBitWide,
-      kBitNarrow,
-      kBitFp,
-      kBitCore,
-      kBitRef,
-      kBitLowWord,
-      kBitHighWord,
-      kBitArrayWide,
-      kBitArrayNarrow,
-      kBitArrayFp,
-      kBitArrayCore,
-      kBitArrayRef,
-      kBitArrayDepthStart,
-    };
-    static constexpr size_t kArrayDepthBits = sizeof(uint32_t) * 8u - kBitArrayDepthStart;
-
-    static constexpr uint32_t kFlagNonNull = 1u << kBitNonNull;
-    static constexpr uint32_t kFlagWide = 1u << kBitWide;
-    static constexpr uint32_t kFlagNarrow = 1u << kBitNarrow;
-    static constexpr uint32_t kFlagFp = 1u << kBitFp;
-    static constexpr uint32_t kFlagCore = 1u << kBitCore;
-    static constexpr uint32_t kFlagRef = 1u << kBitRef;
-    static constexpr uint32_t kFlagLowWord = 1u << kBitLowWord;
-    static constexpr uint32_t kFlagHighWord = 1u << kBitHighWord;
-    static constexpr uint32_t kFlagArrayWide = 1u << kBitArrayWide;
-    static constexpr uint32_t kFlagArrayNarrow = 1u << kBitArrayNarrow;
-    static constexpr uint32_t kFlagArrayFp = 1u << kBitArrayFp;
-    static constexpr uint32_t kFlagArrayCore = 1u << kBitArrayCore;
-    static constexpr uint32_t kFlagArrayRef = 1u << kBitArrayRef;
-
-    static constexpr uint32_t kMaskWide = kFlagWide | kFlagNarrow;
-    static constexpr uint32_t kMaskType = kFlagFp | kFlagCore | kFlagRef;
-    static constexpr uint32_t kMaskWord = kFlagLowWord | kFlagHighWord;
-    static constexpr uint32_t kMaskArrayWide = kFlagArrayWide | kFlagArrayNarrow;
-    static constexpr uint32_t kMaskArrayType = kFlagArrayFp | kFlagArrayCore | kFlagArrayRef;
-    static constexpr uint32_t kMaskWideAndType = kMaskWide | kMaskType;
-    static constexpr uint32_t kMaskArrayWideAndType = kMaskArrayWide | kMaskArrayType;
-
-    static constexpr size_t kArrayTypeShift = kBitArrayWide - kBitWide;
-    static_assert(kArrayTypeShift == kBitArrayNarrow - kBitNarrow, "shift mismatch");
-    static_assert(kArrayTypeShift == kBitArrayFp - kBitFp, "shift mismatch");
-    static_assert(kArrayTypeShift == kBitArrayCore - kBitCore, "shift mismatch");
-    static_assert(kArrayTypeShift == kBitArrayRef - kBitRef, "shift mismatch");
-    static_assert((kMaskWide << kArrayTypeShift) == kMaskArrayWide, "shift mismatch");
-    static_assert((kMaskType << kArrayTypeShift) == kMaskArrayType, "shift mismatch");
-    static_assert((kMaskWideAndType << kArrayTypeShift) == kMaskArrayWideAndType, "shift mismatch");
-
-    static constexpr uint32_t kMaskArrayDepth = static_cast<uint32_t>(-1) << kBitArrayDepthStart;
-    static constexpr uint32_t kMaskNonArray = ~(kMaskArrayWideAndType | kMaskArrayDepth);
-
-    // The maximum representable array depth. If we exceed the maximum (which can happen
-    // only with an absurd nested array type in a dex file which would presumably cause
-    // OOM while being resolved), we can report false conflicts.
-    static constexpr uint32_t kMaxArrayDepth = static_cast<uint32_t>(-1) >> kBitArrayDepthStart;
-
-    explicit Type(uint32_t raw_bits) : raw_bits_(raw_bits) { }
-
-    bool IsBitSet(uint32_t flag) const {
-      return (raw_bits_ & flag) != 0u;
-    }
-
-    void SetBits(uint32_t flags) {
-      raw_bits_ |= flags;
-    }
-
-    bool MergeBits(Type src_type, uint32_t mask) {
-      uint32_t new_bits = raw_bits_ | (src_type.raw_bits_ & mask);
-      if (new_bits != raw_bits_) {
-        raw_bits_ = new_bits;
-        return true;
-      }
-      return false;
-    }
-
-    uint32_t raw_bits_;
-  };
-
-  struct MethodSignature {
-    Type return_type;
-    size_t num_params;
-    Type* param_types;
-  };
-
-  struct SplitSRegData {
-    int32_t current_mod_s_reg;
-    int32_t* starting_mod_s_reg;        // Indexed by BasicBlock::id.
-    int32_t* ending_mod_s_reg;          // Indexed by BasicBlock::id.
-
-    // NOTE: Before AddPseudoPhis(), def_phi_blocks_ marks the blocks
-    // with check-casts and the block with the original SSA reg.
-    // After AddPseudoPhis(), it marks blocks with pseudo-phis.
-    ArenaBitVector* def_phi_blocks_;    // Indexed by BasicBlock::id.
-  };
-
-  class CheckCastData : public DeletableArenaObject<kArenaAllocMisc> {
-   public:
-    CheckCastData(MIRGraph* mir_graph, ScopedArenaAllocator* alloc);
-
-    size_t NumSRegs() const {
-      return num_sregs_;
-    }
-
-    void AddCheckCast(MIR* check_cast, Type type);
-    void AddPseudoPhis();
-    void InitializeCheckCastSRegs(Type* sregs) const;
-    void MergeCheckCastConflicts(Type* sregs) const;
-    void MarkPseudoPhiBlocks(uint64_t* bb_df_attrs) const;
-
-    void Start(BasicBlock* bb);
-    bool ProcessPseudoPhis(BasicBlock* bb, Type* sregs);
-    void ProcessCheckCast(MIR* mir);
-
-    SplitSRegData* GetSplitSRegData(int32_t s_reg);
-
-   private:
-    BasicBlock* FindDefBlock(MIR* check_cast);
-    BasicBlock* FindTopologicallyEarliestPredecessor(BasicBlock* bb);
-    bool IsSRegLiveAtStart(BasicBlock* bb, int v_reg, int32_t s_reg);
-
-    MIRGraph* const mir_graph_;
-    ScopedArenaAllocator* const alloc_;
-    const size_t num_blocks_;
-    size_t num_sregs_;
-
-    // Map check-cast mir to special sreg and type.
-    struct CheckCastMapValue {
-      int32_t modified_s_reg;
-      Type type;
-    };
-    ScopedArenaSafeMap<MIR*, CheckCastMapValue> check_cast_map_;
-    ScopedArenaSafeMap<int32_t, SplitSRegData> split_sreg_data_;
-  };
-
-  static Type FieldType(const DexFile* dex_file, uint32_t field_idx);
-  static Type* PrepareIFieldTypes(const DexFile* dex_file, MIRGraph* mir_graph,
-                                  ScopedArenaAllocator* alloc);
-  static Type* PrepareSFieldTypes(const DexFile* dex_file, MIRGraph* mir_graph,
-                                  ScopedArenaAllocator* alloc);
-  static MethodSignature Signature(const DexFile* dex_file, uint32_t method_idx, bool is_static,
-                                   ScopedArenaAllocator* alloc);
-  static MethodSignature* PrepareSignatures(const DexFile* dex_file, MIRGraph* mir_graph,
-                                            ScopedArenaAllocator* alloc);
-  static CheckCastData* InitializeCheckCastData(MIRGraph* mir_graph, ScopedArenaAllocator* alloc);
-
-  void InitializeSRegs();
-
-  int32_t ModifiedSReg(int32_t s_reg);
-  int32_t PhiInputModifiedSReg(int32_t s_reg, BasicBlock* bb, size_t pred_idx);
-
-  bool UpdateSRegFromLowWordType(int32_t mod_s_reg, Type low_word_type);
-
-  MIRGraph* const mir_graph_;
-  CompilationUnit* const cu_;
-
-  // The type inference propagates types also backwards but this must not happen across
-  // check-cast. So we need to effectively split an SSA reg into two at check-cast and
-  // keep track of the types separately.
-  std::unique_ptr<CheckCastData> check_cast_data_;
-
-  size_t num_sregs_;      // Number of SSA regs or modified SSA regs, see check-cast.
-  const Type* const ifields_;                 // Indexed by MIR::meta::ifield_lowering_info.
-  const Type* const sfields_;                 // Indexed by MIR::meta::sfield_lowering_info.
-  const MethodSignature* const signatures_;   // Indexed by MIR::meta::method_lowering_info.
-  const MethodSignature current_method_signature_;
-  Type* const sregs_;     // Indexed by SSA reg or modified SSA reg, see check-cast.
-  uint64_t* const bb_df_attrs_;               // Indexed by BasicBlock::id.
-
-  friend class TypeInferenceTest;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_TYPE_INFERENCE_H_
diff --git a/compiler/dex/type_inference_test.cc b/compiler/dex/type_inference_test.cc
deleted file mode 100644
index 528a18c..0000000
--- a/compiler/dex/type_inference_test.cc
+++ /dev/null
@@ -1,2045 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "base/logging.h"
-#include "compiler_ir.h"
-#include "dataflow_iterator-inl.h"
-#include "dex_flags.h"
-#include "dex/mir_field_info.h"
-#include "dex/mir_graph.h"
-#include "driver/dex_compilation_unit.h"
-#include "gtest/gtest.h"
-#include "type_inference.h"
-#include "utils/test_dex_file_builder.h"
-
-namespace art {
-
-class TypeInferenceTest : public testing::Test {
- protected:
-  struct TypeDef {
-    const char* descriptor;
-  };
-
-  struct FieldDef {
-    const char* class_descriptor;
-    const char* type;
-    const char* name;
-  };
-
-  struct MethodDef {
-    const char* class_descriptor;
-    const char* signature;
-    const char* name;
-    InvokeType type;
-  };
-
-  struct BBDef {
-    static constexpr size_t kMaxSuccessors = 4;
-    static constexpr size_t kMaxPredecessors = 4;
-
-    BBType type;
-    size_t num_successors;
-    BasicBlockId successors[kMaxPredecessors];
-    size_t num_predecessors;
-    BasicBlockId predecessors[kMaxPredecessors];
-  };
-
-  struct MIRDef {
-    static constexpr size_t kMaxSsaDefs = 2;
-    static constexpr size_t kMaxSsaUses = 4;
-
-    BasicBlockId bbid;
-    Instruction::Code opcode;
-    int64_t value;
-    uint32_t metadata;
-    size_t num_uses;
-    int32_t uses[kMaxSsaUses];
-    size_t num_defs;
-    int32_t defs[kMaxSsaDefs];
-  };
-
-#define DEF_SUCC0() \
-    0u, { }
-#define DEF_SUCC1(s1) \
-    1u, { s1 }
-#define DEF_SUCC2(s1, s2) \
-    2u, { s1, s2 }
-#define DEF_SUCC3(s1, s2, s3) \
-    3u, { s1, s2, s3 }
-#define DEF_SUCC4(s1, s2, s3, s4) \
-    4u, { s1, s2, s3, s4 }
-#define DEF_PRED0() \
-    0u, { }
-#define DEF_PRED1(p1) \
-    1u, { p1 }
-#define DEF_PRED2(p1, p2) \
-    2u, { p1, p2 }
-#define DEF_PRED3(p1, p2, p3) \
-    3u, { p1, p2, p3 }
-#define DEF_PRED4(p1, p2, p3, p4) \
-    4u, { p1, p2, p3, p4 }
-#define DEF_BB(type, succ, pred) \
-    { type, succ, pred }
-
-#define DEF_CONST(bb, opcode, reg, value) \
-    { bb, opcode, value, 0u, 0, { }, 1, { reg } }
-#define DEF_CONST_WIDE(bb, opcode, reg, value) \
-    { bb, opcode, value, 0u, 0, { }, 2, { reg, reg + 1 } }
-#define DEF_CONST_STRING(bb, opcode, reg, index) \
-    { bb, opcode, index, 0u, 0, { }, 1, { reg } }
-#define DEF_IGET(bb, opcode, reg, obj, field_info) \
-    { bb, opcode, 0u, field_info, 1, { obj }, 1, { reg } }
-#define DEF_IGET_WIDE(bb, opcode, reg, obj, field_info) \
-    { bb, opcode, 0u, field_info, 1, { obj }, 2, { reg, reg + 1 } }
-#define DEF_IPUT(bb, opcode, reg, obj, field_info) \
-    { bb, opcode, 0u, field_info, 2, { reg, obj }, 0, { } }
-#define DEF_IPUT_WIDE(bb, opcode, reg, obj, field_info) \
-    { bb, opcode, 0u, field_info, 3, { reg, reg + 1, obj }, 0, { } }
-#define DEF_SGET(bb, opcode, reg, field_info) \
-    { bb, opcode, 0u, field_info, 0, { }, 1, { reg } }
-#define DEF_SGET_WIDE(bb, opcode, reg, field_info) \
-    { bb, opcode, 0u, field_info, 0, { }, 2, { reg, reg + 1 } }
-#define DEF_SPUT(bb, opcode, reg, field_info) \
-    { bb, opcode, 0u, field_info, 1, { reg }, 0, { } }
-#define DEF_SPUT_WIDE(bb, opcode, reg, field_info) \
-    { bb, opcode, 0u, field_info, 2, { reg, reg + 1 }, 0, { } }
-#define DEF_AGET(bb, opcode, reg, obj, idx) \
-    { bb, opcode, 0u, 0u, 2, { obj, idx }, 1, { reg } }
-#define DEF_AGET_WIDE(bb, opcode, reg, obj, idx) \
-    { bb, opcode, 0u, 0u, 2, { obj, idx }, 2, { reg, reg + 1 } }
-#define DEF_APUT(bb, opcode, reg, obj, idx) \
-    { bb, opcode, 0u, 0u, 3, { reg, obj, idx }, 0, { } }
-#define DEF_APUT_WIDE(bb, opcode, reg, obj, idx) \
-    { bb, opcode, 0u, 0u, 4, { reg, reg + 1, obj, idx }, 0, { } }
-#define DEF_INVOKE0(bb, opcode, method_idx) \
-    { bb, opcode, 0u, method_idx, 0, { }, 0, { } }
-#define DEF_INVOKE1(bb, opcode, reg, method_idx) \
-    { bb, opcode, 0u, method_idx, 1, { reg }, 0, { } }
-#define DEF_INVOKE2(bb, opcode, reg1, reg2, method_idx) \
-    { bb, opcode, 0u, method_idx, 2, { reg1, reg2 }, 0, { } }
-#define DEF_IFZ(bb, opcode, reg) \
-    { bb, opcode, 0u, 0u, 1, { reg }, 0, { } }
-#define DEF_MOVE(bb, opcode, reg, src) \
-    { bb, opcode, 0u, 0u, 1, { src }, 1, { reg } }
-#define DEF_MOVE_WIDE(bb, opcode, reg, src) \
-    { bb, opcode, 0u, 0u, 2, { src, src + 1 }, 2, { reg, reg + 1 } }
-#define DEF_PHI2(bb, reg, src1, src2) \
-    { bb, static_cast<Instruction::Code>(kMirOpPhi), 0, 0u, 2u, { src1, src2 }, 1, { reg } }
-#define DEF_BINOP(bb, opcode, result, src1, src2) \
-    { bb, opcode, 0u, 0u, 2, { src1, src2 }, 1, { result } }
-#define DEF_UNOP(bb, opcode, result, src) DEF_MOVE(bb, opcode, result, src)
-#define DEF_NULOP(bb, opcode, result) DEF_CONST(bb, opcode, result, 0)
-#define DEF_NULOP_WIDE(bb, opcode, result) DEF_CONST_WIDE(bb, opcode, result, 0)
-#define DEF_CHECK_CAST(bb, opcode, reg, type) \
-    { bb, opcode, 0, type, 1, { reg }, 0, { } }
-#define DEF_NEW_ARRAY(bb, opcode, reg, length, type) \
-    { bb, opcode, 0, type, 1, { length }, 1, { reg } }
-
-  void AddTypes(const TypeDef* defs, size_t count) {
-    for (size_t i = 0; i != count; ++i) {
-      const TypeDef* def = &defs[i];
-      dex_file_builder_.AddType(def->descriptor);
-    }
-  }
-
-  template <size_t count>
-  void PrepareTypes(const TypeDef (&defs)[count]) {
-    type_defs_ = defs;
-    type_count_ = count;
-    AddTypes(defs, count);
-  }
-
-  void AddFields(const FieldDef* defs, size_t count) {
-    for (size_t i = 0; i != count; ++i) {
-      const FieldDef* def = &defs[i];
-      dex_file_builder_.AddField(def->class_descriptor, def->type, def->name);
-    }
-  }
-
-  template <size_t count>
-  void PrepareIFields(const FieldDef (&defs)[count]) {
-    ifield_defs_ = defs;
-    ifield_count_ = count;
-    AddFields(defs, count);
-  }
-
-  template <size_t count>
-  void PrepareSFields(const FieldDef (&defs)[count]) {
-    sfield_defs_ = defs;
-    sfield_count_ = count;
-    AddFields(defs, count);
-  }
-
-  void AddMethods(const MethodDef* defs, size_t count) {
-    for (size_t i = 0; i != count; ++i) {
-      const MethodDef* def = &defs[i];
-      dex_file_builder_.AddMethod(def->class_descriptor, def->signature, def->name);
-    }
-  }
-
-  template <size_t count>
-  void PrepareMethods(const MethodDef (&defs)[count]) {
-    method_defs_ = defs;
-    method_count_ = count;
-    AddMethods(defs, count);
-  }
-
-  DexMemAccessType AccessTypeForDescriptor(const char* descriptor) {
-    switch (descriptor[0]) {
-      case 'I':
-      case 'F':
-        return kDexMemAccessWord;
-      case 'J':
-      case 'D':
-        return kDexMemAccessWide;
-      case '[':
-      case 'L':
-        return kDexMemAccessObject;
-      case 'Z':
-        return kDexMemAccessBoolean;
-      case 'B':
-        return kDexMemAccessByte;
-      case 'C':
-        return kDexMemAccessChar;
-      case 'S':
-        return kDexMemAccessShort;
-      default:
-        LOG(FATAL) << "Bad descriptor: " << descriptor;
-        UNREACHABLE();
-    }
-  }
-
-  size_t CountIns(const std::string& test_method_signature, bool is_static) {
-    const char* sig = test_method_signature.c_str();
-    CHECK_EQ(sig[0], '(');
-    ++sig;
-    size_t result = is_static ? 0u : 1u;
-    while (*sig != ')') {
-      result += (AccessTypeForDescriptor(sig) == kDexMemAccessWide) ? 2u : 1u;
-      while (*sig == '[') {
-        ++sig;
-      }
-      if (*sig == 'L') {
-        do {
-          ++sig;
-          CHECK(*sig != '\0' && *sig != ')');
-        } while (*sig != ';');
-      }
-      ++sig;
-    }
-    return result;
-  }
-
-  void BuildDexFile(const std::string& test_method_signature, bool is_static) {
-    dex_file_builder_.AddMethod(kClassName, test_method_signature, kMethodName);
-    dex_file_ = dex_file_builder_.Build(kDexLocation);
-    cu_.dex_file = dex_file_.get();
-    cu_.method_idx = dex_file_builder_.GetMethodIdx(kClassName, test_method_signature, kMethodName);
-    cu_.access_flags = is_static ? kAccStatic : 0u;
-    cu_.mir_graph->m_units_.push_back(new (cu_.mir_graph->arena_) DexCompilationUnit(
-        &cu_, cu_.class_loader, cu_.class_linker, *cu_.dex_file, nullptr /* code_item not used */,
-        0u /* class_def_idx not used */, 0u /* method_index not used */,
-        cu_.access_flags, nullptr /* verified_method not used */,
-        NullHandle<mirror::DexCache>()));
-    cu_.mir_graph->current_method_ = 0u;
-    code_item_ = static_cast<DexFile::CodeItem*>(
-        cu_.arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc));
-
-    code_item_->ins_size_ = CountIns(test_method_signature, is_static);
-    code_item_->registers_size_ = kLocalVRs + code_item_->ins_size_;
-    cu_.mir_graph->current_code_item_ = code_item_;
-    cu_.mir_graph->num_ssa_regs_ = kMaxSsaRegs;
-
-    cu_.mir_graph->ifield_lowering_infos_.clear();
-    cu_.mir_graph->ifield_lowering_infos_.reserve(ifield_count_);
-    for (size_t i = 0u; i != ifield_count_; ++i) {
-      const FieldDef* def = &ifield_defs_[i];
-      uint32_t field_idx =
-          dex_file_builder_.GetFieldIdx(def->class_descriptor, def->type, def->name);
-      MirIFieldLoweringInfo field_info(field_idx, AccessTypeForDescriptor(def->type), false);
-      field_info.declaring_dex_file_ = cu_.dex_file;
-      field_info.declaring_field_idx_ = field_idx;
-      cu_.mir_graph->ifield_lowering_infos_.push_back(field_info);
-    }
-
-    cu_.mir_graph->sfield_lowering_infos_.clear();
-    cu_.mir_graph->sfield_lowering_infos_.reserve(sfield_count_);
-    for (size_t i = 0u; i != sfield_count_; ++i) {
-      const FieldDef* def = &sfield_defs_[i];
-      uint32_t field_idx =
-          dex_file_builder_.GetFieldIdx(def->class_descriptor, def->type, def->name);
-      MirSFieldLoweringInfo field_info(field_idx, AccessTypeForDescriptor(def->type));
-      field_info.declaring_dex_file_ = cu_.dex_file;
-      field_info.declaring_field_idx_ = field_idx;
-      cu_.mir_graph->sfield_lowering_infos_.push_back(field_info);
-    }
-
-    cu_.mir_graph->method_lowering_infos_.clear();
-    cu_.mir_graph->method_lowering_infos_.reserve(ifield_count_);
-    for (size_t i = 0u; i != method_count_; ++i) {
-      const MethodDef* def = &method_defs_[i];
-      uint32_t method_idx =
-          dex_file_builder_.GetMethodIdx(def->class_descriptor, def->signature, def->name);
-      MirMethodLoweringInfo method_info(method_idx, def->type, false);
-      method_info.declaring_dex_file_ = cu_.dex_file;
-      method_info.declaring_method_idx_ = method_idx;
-      cu_.mir_graph->method_lowering_infos_.push_back(method_info);
-    }
-  }
-
-  void DoPrepareBasicBlocks(const BBDef* defs, size_t count) {
-    cu_.mir_graph->block_id_map_.clear();
-    cu_.mir_graph->block_list_.clear();
-    ASSERT_LT(3u, count);  // null, entry, exit and at least one bytecode block.
-    ASSERT_EQ(kNullBlock, defs[0].type);
-    ASSERT_EQ(kEntryBlock, defs[1].type);
-    ASSERT_EQ(kExitBlock, defs[2].type);
-    for (size_t i = 0u; i != count; ++i) {
-      const BBDef* def = &defs[i];
-      BasicBlock* bb = cu_.mir_graph->CreateNewBB(def->type);
-      if (def->num_successors <= 2) {
-        bb->successor_block_list_type = kNotUsed;
-        bb->fall_through = (def->num_successors >= 1) ? def->successors[0] : 0u;
-        bb->taken = (def->num_successors >= 2) ? def->successors[1] : 0u;
-      } else {
-        bb->successor_block_list_type = kPackedSwitch;
-        bb->fall_through = 0u;
-        bb->taken = 0u;
-        bb->successor_blocks.reserve(def->num_successors);
-        for (size_t j = 0u; j != def->num_successors; ++j) {
-          SuccessorBlockInfo* successor_block_info =
-              static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
-                                                               kArenaAllocSuccessors));
-          successor_block_info->block = j;
-          successor_block_info->key = 0u;  // Not used by class init check elimination.
-          bb->successor_blocks.push_back(successor_block_info);
-        }
-      }
-      bb->predecessors.assign(def->predecessors, def->predecessors + def->num_predecessors);
-      if (def->type == kDalvikByteCode || def->type == kEntryBlock || def->type == kExitBlock) {
-        bb->data_flow_info = static_cast<BasicBlockDataFlow*>(
-            cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo));
-        bb->data_flow_info->live_in_v = live_in_v_;
-      }
-    }
-    ASSERT_EQ(count, cu_.mir_graph->block_list_.size());
-    cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1];
-    ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type);
-    cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_[2];
-    ASSERT_EQ(kExitBlock, cu_.mir_graph->exit_block_->block_type);
-  }
-
-  template <size_t count>
-  void PrepareBasicBlocks(const BBDef (&defs)[count]) {
-    DoPrepareBasicBlocks(defs, count);
-  }
-
-  void PrepareSingleBlock() {
-    static const BBDef bbs[] = {
-        DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-        DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-        DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(3)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(1)),
-    };
-    PrepareBasicBlocks(bbs);
-  }
-
-  void PrepareDiamond() {
-    static const BBDef bbs[] = {
-        DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-        DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-        DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(4, 5)),
-    };
-    PrepareBasicBlocks(bbs);
-  }
-
-  void PrepareLoop() {
-    static const BBDef bbs[] = {
-        DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
-        DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-        DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
-        DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED2(3, 4)),  // "taken" loops to self.
-        DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
-    };
-    PrepareBasicBlocks(bbs);
-  }
-
-  void DoPrepareMIRs(const MIRDef* defs, size_t count) {
-    mir_count_ = count;
-    mirs_ = cu_.arena.AllocArray<MIR>(count, kArenaAllocMIR);
-    ssa_reps_.resize(count);
-    for (size_t i = 0u; i != count; ++i) {
-      const MIRDef* def = &defs[i];
-      MIR* mir = &mirs_[i];
-      ASSERT_LT(def->bbid, cu_.mir_graph->block_list_.size());
-      BasicBlock* bb = cu_.mir_graph->block_list_[def->bbid];
-      bb->AppendMIR(mir);
-      mir->dalvikInsn.opcode = def->opcode;
-      mir->dalvikInsn.vB = static_cast<int32_t>(def->value);
-      mir->dalvikInsn.vB_wide = def->value;
-      if (IsInstructionIGetOrIPut(def->opcode)) {
-        ASSERT_LT(def->metadata, cu_.mir_graph->ifield_lowering_infos_.size());
-        mir->meta.ifield_lowering_info = def->metadata;
-        ASSERT_EQ(cu_.mir_graph->ifield_lowering_infos_[def->metadata].MemAccessType(),
-                  IGetOrIPutMemAccessType(def->opcode));
-        cu_.mir_graph->merged_df_flags_ |= DF_IFIELD;
-      } else if (IsInstructionSGetOrSPut(def->opcode)) {
-        ASSERT_LT(def->metadata, cu_.mir_graph->sfield_lowering_infos_.size());
-        mir->meta.sfield_lowering_info = def->metadata;
-        ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->metadata].MemAccessType(),
-                  SGetOrSPutMemAccessType(def->opcode));
-        cu_.mir_graph->merged_df_flags_ |= DF_SFIELD;
-      } else if (IsInstructionInvoke(def->opcode)) {
-        ASSERT_LT(def->metadata, cu_.mir_graph->method_lowering_infos_.size());
-        mir->meta.method_lowering_info = def->metadata;
-        mir->dalvikInsn.vA = def->num_uses;
-        cu_.mir_graph->merged_df_flags_ |= DF_FORMAT_35C;
-      } else if (def->opcode == static_cast<Instruction::Code>(kMirOpPhi)) {
-        mir->meta.phi_incoming =
-            allocator_->AllocArray<BasicBlockId>(def->num_uses, kArenaAllocDFInfo);
-        ASSERT_EQ(def->num_uses, bb->predecessors.size());
-        std::copy(bb->predecessors.begin(), bb->predecessors.end(), mir->meta.phi_incoming);
-      } else if (def->opcode == Instruction::CHECK_CAST) {
-        ASSERT_LT(def->metadata, type_count_);
-        mir->dalvikInsn.vB = dex_file_builder_.GetTypeIdx(type_defs_[def->metadata].descriptor);
-        cu_.mir_graph->merged_df_flags_ |= DF_CHK_CAST;
-      } else if (def->opcode == Instruction::NEW_ARRAY) {
-        ASSERT_LT(def->metadata, type_count_);
-        mir->dalvikInsn.vC = dex_file_builder_.GetTypeIdx(type_defs_[def->metadata].descriptor);
-      }
-      mir->ssa_rep = &ssa_reps_[i];
-      mir->ssa_rep->num_uses = def->num_uses;
-      mir->ssa_rep->uses = const_cast<int32_t*>(def->uses);  // Not modified by LVN.
-      mir->ssa_rep->num_defs = def->num_defs;
-      mir->ssa_rep->defs = const_cast<int32_t*>(def->defs);  // Not modified by LVN.
-      mir->dalvikInsn.opcode = def->opcode;
-      mir->offset = i;  // LVN uses offset only for debug output
-      mir->optimization_flags = 0u;
-    }
-    code_item_->insns_size_in_code_units_ = 2u * count;
-  }
-
-  template <size_t count>
-  void PrepareMIRs(const MIRDef (&defs)[count]) {
-    DoPrepareMIRs(defs, count);
-  }
-
-  // BasicBlockDataFlow::vreg_to_ssa_map_exit is used only for check-casts.
-  void AllocEndingVRegToSRegMaps() {
-    AllNodesIterator iterator(cu_.mir_graph.get());
-    for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
-      if (bb->data_flow_info != nullptr) {
-        if (bb->data_flow_info->vreg_to_ssa_map_exit == nullptr) {
-          size_t num_vregs = code_item_->registers_size_;
-          bb->data_flow_info->vreg_to_ssa_map_exit = static_cast<int32_t*>(
-              cu_.arena.AllocArray<int32_t>(num_vregs, kArenaAllocDFInfo));
-          std::fill_n(bb->data_flow_info->vreg_to_ssa_map_exit, num_vregs, INVALID_SREG);
-        }
-      }
-    }
-  }
-
-  template <size_t count>
-  void MapVRegToSReg(int vreg, int32_t sreg, const BasicBlockId (&bb_ids)[count]) {
-    AllocEndingVRegToSRegMaps();
-    for (BasicBlockId bb_id : bb_ids) {
-      BasicBlock* bb = cu_.mir_graph->GetBasicBlock(bb_id);
-      CHECK(bb != nullptr);
-      CHECK(bb->data_flow_info != nullptr);
-      CHECK(bb->data_flow_info->vreg_to_ssa_map_exit != nullptr);
-      bb->data_flow_info->vreg_to_ssa_map_exit[vreg] = sreg;
-    }
-  }
-
-  void PerformTypeInference() {
-    cu_.mir_graph->SSATransformationStart();
-    cu_.mir_graph->ComputeDFSOrders();
-    cu_.mir_graph->ComputeDominators();
-    cu_.mir_graph->ComputeTopologicalSortOrder();
-    cu_.mir_graph->SSATransformationEnd();
-    ASSERT_TRUE(type_inference_ == nullptr);
-    type_inference_.reset(new (allocator_.get()) TypeInference(cu_.mir_graph.get(),
-                                                               allocator_.get()));
-    RepeatingPreOrderDfsIterator iter(cu_.mir_graph.get());
-    bool changed = false;
-    for (BasicBlock* bb = iter.Next(changed); bb != nullptr; bb = iter.Next(changed)) {
-      changed = type_inference_->Apply(bb);
-    }
-    type_inference_->Finish();
-  }
-
-  TypeInferenceTest()
-      : pool_(),
-        cu_(&pool_, kRuntimeISA, nullptr, nullptr),
-        mir_count_(0u),
-        mirs_(nullptr),
-        code_item_(nullptr),
-        ssa_reps_(),
-        allocator_(),
-        live_in_v_(new (&cu_.arena) ArenaBitVector(&cu_.arena, kMaxSsaRegs, false, kBitMapMisc)),
-        type_defs_(nullptr),
-        type_count_(0u),
-        ifield_defs_(nullptr),
-        ifield_count_(0u),
-        sfield_defs_(nullptr),
-        sfield_count_(0u),
-        method_defs_(nullptr),
-        method_count_(0u),
-        dex_file_builder_(),
-        dex_file_(nullptr) {
-    cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
-    allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
-    // Bind all possible sregs to live vregs for test purposes.
-    live_in_v_->SetInitialBits(kMaxSsaRegs);
-    cu_.mir_graph->reg_location_ = static_cast<RegLocation*>(cu_.arena.Alloc(
-        kMaxSsaRegs * sizeof(cu_.mir_graph->reg_location_[0]), kArenaAllocRegAlloc));
-    cu_.mir_graph->method_sreg_ = kMaxSsaRegs - 1u;
-    cu_.mir_graph->reg_location_[cu_.mir_graph->GetMethodSReg()].location = kLocCompilerTemp;
-    // Bind all possible sregs to live vregs for test purposes.
-    live_in_v_->SetInitialBits(kMaxSsaRegs);
-    cu_.mir_graph->ssa_base_vregs_.reserve(kMaxSsaRegs);
-    cu_.mir_graph->ssa_subscripts_.reserve(kMaxSsaRegs);
-    for (unsigned int i = 0; i < kMaxSsaRegs; i++) {
-      cu_.mir_graph->ssa_base_vregs_.push_back(i);
-      cu_.mir_graph->ssa_subscripts_.push_back(0);
-    }
-  }
-
-  enum ExpectFlags : uint32_t {
-    kExpectWide         = 0x0001u,
-    kExpectNarrow       = 0x0002u,
-    kExpectFp           = 0x0004u,
-    kExpectCore         = 0x0008u,
-    kExpectRef          = 0x0010u,
-    kExpectArrayWide    = 0x0020u,
-    kExpectArrayNarrow  = 0x0040u,
-    kExpectArrayFp      = 0x0080u,
-    kExpectArrayCore    = 0x0100u,
-    kExpectArrayRef     = 0x0200u,
-    kExpectNull         = 0x0400u,
-    kExpectHigh         = 0x0800u,  // Reserved for ExpectSRegType().
-  };
-
-  struct SRegExpectation {
-    uint32_t array_depth;
-    uint32_t flags;
-  };
-
-  void ExpectSRegType(int s_reg, const SRegExpectation& expectation, bool check_loc = true) {
-    uint32_t flags = expectation.flags;
-    uint32_t array_depth = expectation.array_depth;
-    TypeInference::Type type = type_inference_->sregs_[s_reg];
-
-    if (check_loc) {
-      RegLocation loc = cu_.mir_graph->reg_location_[s_reg];
-      EXPECT_EQ((flags & kExpectWide) != 0u, loc.wide) << s_reg;
-      EXPECT_EQ((flags & kExpectFp) != 0u, loc.fp) << s_reg;
-      EXPECT_EQ((flags & kExpectCore) != 0u, loc.core) << s_reg;
-      EXPECT_EQ((flags & kExpectRef) != 0u, loc.ref) << s_reg;
-      EXPECT_EQ((flags & kExpectHigh) != 0u, loc.high_word) << s_reg;
-    }
-
-    EXPECT_EQ((flags & kExpectWide) != 0u, type.Wide()) << s_reg;
-    EXPECT_EQ((flags & kExpectNarrow) != 0u, type.Narrow()) << s_reg;
-    EXPECT_EQ((flags & kExpectFp) != 0u, type.Fp()) << s_reg;
-    EXPECT_EQ((flags & kExpectCore) != 0u, type.Core()) << s_reg;
-    EXPECT_EQ((flags & kExpectRef) != 0u, type.Ref()) << s_reg;
-    EXPECT_EQ((flags & kExpectHigh) == 0u, type.LowWord()) << s_reg;
-    EXPECT_EQ((flags & kExpectHigh) != 0u, type.HighWord()) << s_reg;
-
-    if ((flags & kExpectRef) != 0u) {
-      EXPECT_EQ((flags & kExpectNull) != 0u, !type.NonNull()) << s_reg;
-    } else {
-      // Null should be checked only for references.
-      ASSERT_EQ((flags & kExpectNull), 0u);
-    }
-
-    ASSERT_EQ(array_depth, type.ArrayDepth()) << s_reg;
-    if (array_depth != 0u) {
-      ASSERT_NE((flags & kExpectRef), 0u);
-      TypeInference::Type nested_type = type.NestedType();
-      EXPECT_EQ((flags & kExpectArrayWide) != 0u, nested_type.Wide()) << s_reg;
-      EXPECT_EQ((flags & kExpectArrayNarrow) != 0u, nested_type.Narrow()) << s_reg;
-      EXPECT_EQ((flags & kExpectArrayFp) != 0u, nested_type.Fp()) << s_reg;
-      EXPECT_EQ((flags & kExpectArrayCore) != 0u, nested_type.Core()) << s_reg;
-      EXPECT_EQ((flags & kExpectArrayRef) != 0u, nested_type.Ref()) << s_reg;
-    }
-    if (!type.Narrow() && type.LowWord() &&
-        (expectation.flags & (kExpectWide | kExpectNarrow | kExpectHigh)) == kExpectWide) {
-      SRegExpectation high_expectation = { array_depth, flags | kExpectHigh };
-      ExpectSRegType(s_reg + 1, high_expectation);
-    }
-  }
-
-  void ExpectCore(int s_reg, bool core) {
-    EXPECT_EQ(core, type_inference_->sregs_[s_reg].Core());
-  }
-
-  void ExpectRef(int s_reg, bool ref) {
-    EXPECT_EQ(ref, type_inference_->sregs_[s_reg].Ref());
-  }
-
-  void ExpectArrayDepth(int s_reg, uint32_t array_depth) {
-    EXPECT_EQ(array_depth, type_inference_->sregs_[s_reg].ArrayDepth());
-  }
-
-  static constexpr size_t kMaxSsaRegs = 16384u;
-  static constexpr uint16_t kLocalVRs = 1000u;
-
-  static constexpr const char* kDexLocation = "TypeInferenceDexFile;";
-  static constexpr const char* kClassName = "LTypeInferenceTest;";
-  static constexpr const char* kMethodName = "test";
-
-  ArenaPool pool_;
-  CompilationUnit cu_;
-  size_t mir_count_;
-  MIR* mirs_;
-  DexFile::CodeItem* code_item_;
-  std::vector<SSARepresentation> ssa_reps_;
-  std::unique_ptr<ScopedArenaAllocator> allocator_;
-  std::unique_ptr<TypeInference> type_inference_;
-  ArenaBitVector* live_in_v_;
-
-  const TypeDef* type_defs_;
-  size_t type_count_;
-  const FieldDef* ifield_defs_;
-  size_t ifield_count_;
-  const FieldDef* sfield_defs_;
-  size_t sfield_count_;
-  const MethodDef* method_defs_;
-  size_t method_count_;
-
-  TestDexFileBuilder dex_file_builder_;
-  std::unique_ptr<const DexFile> dex_file_;
-};
-
-TEST_F(TypeInferenceTest, IGet) {
-  static const FieldDef ifields[] = {
-      { kClassName, "B", "byteField" },
-      { kClassName, "C", "charField" },
-      { kClassName, "D", "doubleField" },
-      { kClassName, "F", "floatField" },
-      { kClassName, "I", "intField" },
-      { kClassName, "J", "longField" },
-      { kClassName, "S", "shortField" },
-      { kClassName, "Z", "booleanField" },
-      { kClassName, "Ljava/lang/Object;", "objectField" },
-      { kClassName, "[Ljava/lang/Object;", "objectArrayField" },
-  };
-  constexpr uint32_t thiz = kLocalVRs;
-  static const MIRDef mirs[] = {
-      DEF_IGET(3u, Instruction::IGET_BYTE, 0u, thiz, 0u),
-      DEF_IGET(3u, Instruction::IGET_CHAR, 1u, thiz, 1u),
-      DEF_IGET_WIDE(3u, Instruction::IGET_WIDE, 2u, thiz, 2u),
-      DEF_IGET(3u, Instruction::IGET, 4u, thiz, 3u),
-      DEF_IGET(3u, Instruction::IGET, 5u, thiz, 4u),
-      DEF_IGET_WIDE(3u, Instruction::IGET_WIDE, 6u, thiz, 5u),
-      DEF_IGET(3u, Instruction::IGET_SHORT, 8u, thiz, 6u),
-      DEF_IGET(3u, Instruction::IGET_BOOLEAN, 9u, thiz, 7u),
-      DEF_IGET(3u, Instruction::IGET_OBJECT, 10u, thiz, 8u),
-      DEF_IGET(3u, Instruction::IGET_OBJECT, 11u, thiz, 9u),
-  };
-
-  PrepareIFields(ifields);
-  BuildDexFile("()V", false);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectFp | kExpectWide },
-      { 0u, kExpectFp | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectWide },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectRef | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayRef | kExpectArrayNarrow },
-  };
-  static_assert(arraysize(expectations) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(expectations); ++i) {
-    EXPECT_EQ(mirs[i].opcode, mirs_[i].dalvikInsn.opcode);
-    ASSERT_LE(1u, mirs_[i].ssa_rep->num_defs);
-    ExpectSRegType(mirs_[i].ssa_rep->defs[0], expectations[i]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, SGet) {
-  static const FieldDef sfields[] = {
-      { kClassName, "B", "staticByteField" },
-      { kClassName, "C", "staticCharField" },
-      { kClassName, "D", "staticDoubleField" },
-      { kClassName, "F", "staticFloatField" },
-      { kClassName, "I", "staticIntField" },
-      { kClassName, "J", "staticLongField" },
-      { kClassName, "S", "staticShortField" },
-      { kClassName, "Z", "staticBooleanField" },
-      { kClassName, "Ljava/lang/Object;", "staticObjectField" },
-      { kClassName, "[Ljava/lang/Object;", "staticObjectArrayField" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_SGET(3u, Instruction::SGET_BYTE, 0u, 0u),
-      DEF_SGET(3u, Instruction::SGET_CHAR, 1u, 1u),
-      DEF_SGET_WIDE(3u, Instruction::SGET_WIDE, 2u, 2u),
-      DEF_SGET(3u, Instruction::SGET, 4u, 3u),
-      DEF_SGET(3u, Instruction::SGET, 5u, 4u),
-      DEF_SGET_WIDE(3u, Instruction::SGET_WIDE, 6u, 5u),
-      DEF_SGET(3u, Instruction::SGET_SHORT, 8u, 6u),
-      DEF_SGET(3u, Instruction::SGET_BOOLEAN, 9u, 7u),
-      DEF_SGET(3u, Instruction::SGET_OBJECT, 10u, 8u),
-      DEF_SGET(3u, Instruction::SGET_OBJECT, 11u, 9u),
-  };
-
-  PrepareSFields(sfields);
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectFp | kExpectWide },
-      { 0u, kExpectFp | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectWide },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectRef | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayRef | kExpectArrayNarrow },
-  };
-  static_assert(arraysize(expectations) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(expectations); ++i) {
-    EXPECT_EQ(mirs[i].opcode, mirs_[i].dalvikInsn.opcode);
-    ASSERT_LE(1u, mirs_[i].ssa_rep->num_defs);
-    ExpectSRegType(mirs_[i].ssa_rep->defs[0], expectations[i]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, IPut) {
-  static const FieldDef ifields[] = {
-      { kClassName, "B", "byteField" },
-      { kClassName, "C", "charField" },
-      { kClassName, "D", "doubleField" },
-      { kClassName, "F", "floatField" },
-      { kClassName, "I", "intField" },
-      { kClassName, "J", "longField" },
-      { kClassName, "S", "shortField" },
-      { kClassName, "Z", "booleanField" },
-      { kClassName, "Ljava/lang/Object;", "objectField" },
-      { kClassName, "[Ljava/lang/Object;", "objectArrayField" },
-  };
-  constexpr uint32_t thiz = kLocalVRs;
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),
-      DEF_IPUT(3u, Instruction::IPUT_BYTE, 0u, thiz, 0u),
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),
-      DEF_IPUT(3u, Instruction::IPUT_CHAR, 1u, thiz, 1u),
-      DEF_CONST_WIDE(3u, Instruction::CONST_WIDE, 2u, 0),
-      DEF_IPUT_WIDE(3u, Instruction::IPUT_WIDE, 2u, thiz, 2u),
-      DEF_CONST(3u, Instruction::CONST, 4u, 0),
-      DEF_IPUT(3u, Instruction::IPUT, 4u, thiz, 3u),
-      DEF_CONST(3u, Instruction::CONST, 5u, 0),
-      DEF_IPUT(3u, Instruction::IPUT, 5u, thiz, 4u),
-      DEF_CONST_WIDE(3u, Instruction::CONST_WIDE, 6u, 0),
-      DEF_IPUT_WIDE(3u, Instruction::IPUT_WIDE, 6u, thiz, 5u),
-      DEF_CONST(3u, Instruction::CONST, 8u, 0),
-      DEF_IPUT(3u, Instruction::IPUT_SHORT, 8u, thiz, 6u),
-      DEF_CONST(3u, Instruction::CONST, 9u, 0),
-      DEF_IPUT(3u, Instruction::IPUT_BOOLEAN, 9u, thiz, 7u),
-      DEF_CONST(3u, Instruction::CONST, 10u, 0),
-      DEF_IPUT(3u, Instruction::IPUT_OBJECT, 10u, thiz, 8u),
-      DEF_CONST(3u, Instruction::CONST, 11u, 0),
-      DEF_IPUT(3u, Instruction::IPUT_OBJECT, 11u, thiz, 9u),
-  };
-
-  PrepareIFields(ifields);
-  BuildDexFile("()V", false);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      // One expectation for every 2 MIRs.
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectFp | kExpectWide },
-      { 0u, kExpectFp | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectWide },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectRef | kExpectNarrow | kExpectNull },
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-  };
-  static_assert(2 * arraysize(expectations) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(expectations); ++i) {
-    EXPECT_EQ(mirs[2 * i].opcode, mirs_[2 * i].dalvikInsn.opcode);
-    EXPECT_EQ(mirs[2 * i + 1].opcode, mirs_[2 * i + 1].dalvikInsn.opcode);
-    ASSERT_LE(1u, mirs_[2 * i].ssa_rep->num_defs);
-    ExpectSRegType(mirs_[2 * i].ssa_rep->defs[0], expectations[i]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, SPut) {
-  static const FieldDef sfields[] = {
-      { kClassName, "B", "staticByteField" },
-      { kClassName, "C", "staticCharField" },
-      { kClassName, "D", "staticDoubleField" },
-      { kClassName, "F", "staticFloatField" },
-      { kClassName, "I", "staticIntField" },
-      { kClassName, "J", "staticLongField" },
-      { kClassName, "S", "staticShortField" },
-      { kClassName, "Z", "staticBooleanField" },
-      { kClassName, "Ljava/lang/Object;", "staticObjectField" },
-      { kClassName, "[Ljava/lang/Object;", "staticObjectArrayField" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),
-      DEF_SPUT(3u, Instruction::SPUT_BYTE, 0u, 0u),
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),
-      DEF_SPUT(3u, Instruction::SPUT_CHAR, 1u, 1u),
-      DEF_CONST_WIDE(3u, Instruction::CONST_WIDE, 2u, 0),
-      DEF_SPUT_WIDE(3u, Instruction::SPUT_WIDE, 2u, 2u),
-      DEF_CONST(3u, Instruction::CONST, 4u, 0),
-      DEF_SPUT(3u, Instruction::SPUT, 4u, 3u),
-      DEF_CONST(3u, Instruction::CONST, 5u, 0),
-      DEF_SPUT(3u, Instruction::SPUT, 5u, 4u),
-      DEF_CONST_WIDE(3u, Instruction::CONST_WIDE, 6u, 0),
-      DEF_SPUT_WIDE(3u, Instruction::SPUT_WIDE, 6u, 5u),
-      DEF_CONST(3u, Instruction::CONST, 8u, 0),
-      DEF_SPUT(3u, Instruction::SPUT_SHORT, 8u, 6u),
-      DEF_CONST(3u, Instruction::CONST, 9u, 0),
-      DEF_SPUT(3u, Instruction::SPUT_BOOLEAN, 9u, 7u),
-      DEF_CONST(3u, Instruction::CONST, 10u, 0),
-      DEF_SPUT(3u, Instruction::SPUT_OBJECT, 10u, 8u),
-      DEF_CONST(3u, Instruction::CONST, 11u, 0),
-      DEF_SPUT(3u, Instruction::SPUT_OBJECT, 11u, 9u),
-  };
-
-  PrepareSFields(sfields);
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      // One expectation for every 2 MIRs.
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectFp | kExpectWide },
-      { 0u, kExpectFp | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectWide },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectRef | kExpectNarrow | kExpectNull },
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-  };
-  static_assert(2 * arraysize(expectations) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(expectations); ++i) {
-    EXPECT_EQ(mirs[2 * i].opcode, mirs_[2 * i].dalvikInsn.opcode);
-    EXPECT_EQ(mirs[2 * i + 1].opcode, mirs_[2 * i + 1].dalvikInsn.opcode);
-    ASSERT_LE(1u, mirs_[2 * i].ssa_rep->num_defs);
-    ExpectSRegType(mirs_[2 * i].ssa_rep->defs[0], expectations[i]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, MethodReturnType) {
-  static const MethodDef methods[] = {
-      { kClassName, "()B", "byteFoo", kStatic },
-      { kClassName, "()C", "charFoo", kStatic },
-      { kClassName, "()D", "doubleFoo", kStatic },
-      { kClassName, "()F", "floatFoo", kStatic },
-      { kClassName, "()I", "intFoo", kStatic },
-      { kClassName, "()J", "longFoo", kStatic },
-      { kClassName, "()S", "shortFoo", kStatic },
-      { kClassName, "()Z", "booleanFoo", kStatic },
-      { kClassName, "()Ljava/lang/Object;", "objectFoo", kStatic },
-      { kClassName, "()[Ljava/lang/Object;", "objectArrayFoo", kStatic },
-  };
-  static const MIRDef mirs[] = {
-      DEF_INVOKE0(3u, Instruction::INVOKE_STATIC, 0u),
-      DEF_NULOP(3u, Instruction::MOVE_RESULT, 0u),
-      DEF_INVOKE0(3u, Instruction::INVOKE_STATIC, 1u),
-      DEF_NULOP(3u, Instruction::MOVE_RESULT, 1u),
-      DEF_INVOKE0(3u, Instruction::INVOKE_STATIC, 2u),
-      DEF_NULOP_WIDE(3u, Instruction::MOVE_RESULT_WIDE, 2u),
-      DEF_INVOKE0(3u, Instruction::INVOKE_STATIC, 3u),
-      DEF_NULOP(3u, Instruction::MOVE_RESULT, 4u),
-      DEF_INVOKE0(3u, Instruction::INVOKE_STATIC, 4u),
-      DEF_NULOP(3u, Instruction::MOVE_RESULT, 5u),
-      DEF_INVOKE0(3u, Instruction::INVOKE_STATIC, 5u),
-      DEF_NULOP_WIDE(3u, Instruction::MOVE_RESULT_WIDE, 6u),
-      DEF_INVOKE0(3u, Instruction::INVOKE_STATIC, 6u),
-      DEF_NULOP(3u, Instruction::MOVE_RESULT, 8u),
-      DEF_INVOKE0(3u, Instruction::INVOKE_STATIC, 7u),
-      DEF_NULOP(3u, Instruction::MOVE_RESULT, 9u),
-      DEF_INVOKE0(3u, Instruction::INVOKE_STATIC, 8u),
-      DEF_NULOP(3u, Instruction::MOVE_RESULT_OBJECT, 10u),
-      DEF_INVOKE0(3u, Instruction::INVOKE_STATIC, 9u),
-      DEF_NULOP(3u, Instruction::MOVE_RESULT_OBJECT, 11u),
-  };
-
-  PrepareMethods(methods);
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      // One expectation for every 2 MIRs.
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectFp | kExpectWide },
-      { 0u, kExpectFp | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectWide },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectRef | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayRef | kExpectArrayNarrow },
-  };
-  static_assert(2 * arraysize(expectations) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(expectations); ++i) {
-    EXPECT_EQ(mirs[2 * i].opcode, mirs_[2 * i].dalvikInsn.opcode);
-    EXPECT_EQ(mirs[2 * i + 1].opcode, mirs_[2 * i + 1].dalvikInsn.opcode);
-    ASSERT_LE(1u, mirs_[2 * i + 1].ssa_rep->num_defs);
-    ExpectSRegType(mirs_[2 * i + 1].ssa_rep->defs[0], expectations[i]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, MethodArgType) {
-  static const MethodDef methods[] = {
-      { kClassName, "(B)V", "fooByte", kStatic },
-      { kClassName, "(C)V", "fooChar", kStatic },
-      { kClassName, "(D)V", "fooDouble", kStatic },
-      { kClassName, "(F)V", "fooFloat", kStatic },
-      { kClassName, "(I)V", "fooInt", kStatic },
-      { kClassName, "(J)V", "fooLong", kStatic },
-      { kClassName, "(S)V", "fooShort", kStatic },
-      { kClassName, "(Z)V", "fooBoolean", kStatic },
-      { kClassName, "(Ljava/lang/Object;)V", "fooObject", kStatic },
-      { kClassName, "([Ljava/lang/Object;)V", "fooObjectArray", kStatic },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),
-      DEF_INVOKE1(3u, Instruction::INVOKE_STATIC, 0u, 0u),
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),
-      DEF_INVOKE1(3u, Instruction::INVOKE_STATIC, 1u, 1u),
-      DEF_CONST_WIDE(3u, Instruction::CONST_WIDE, 2u, 0),
-      DEF_INVOKE2(3u, Instruction::INVOKE_STATIC, 2u, 3u, 2u),
-      DEF_CONST(3u, Instruction::CONST, 4u, 0),
-      DEF_INVOKE1(3u, Instruction::INVOKE_STATIC, 4u, 3u),
-      DEF_CONST(3u, Instruction::CONST, 5u, 0),
-      DEF_INVOKE1(3u, Instruction::INVOKE_STATIC, 5u, 4u),
-      DEF_CONST_WIDE(3u, Instruction::CONST_WIDE, 6u, 0),
-      DEF_INVOKE2(3u, Instruction::INVOKE_STATIC, 6u, 7u, 5u),
-      DEF_CONST(3u, Instruction::CONST, 8u, 0),
-      DEF_INVOKE1(3u, Instruction::INVOKE_STATIC, 8u, 6u),
-      DEF_CONST(3u, Instruction::CONST, 9u, 0),
-      DEF_INVOKE1(3u, Instruction::INVOKE_STATIC, 9u, 7u),
-      DEF_CONST(3u, Instruction::CONST, 10u, 0),
-      DEF_INVOKE1(3u, Instruction::INVOKE_STATIC, 10u, 8u),
-      DEF_CONST(3u, Instruction::CONST, 11u, 0),
-      DEF_INVOKE1(3u, Instruction::INVOKE_STATIC, 11u, 9u),
-  };
-
-  PrepareMethods(methods);
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      // One expectation for every 2 MIRs.
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectFp | kExpectWide },
-      { 0u, kExpectFp | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectWide },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectRef | kExpectNarrow | kExpectNull },
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-  };
-  static_assert(2 * arraysize(expectations) == arraysize(mirs), "array size mismatch");
-  for (size_t i = 0; i != arraysize(expectations); ++i) {
-    EXPECT_EQ(mirs[2 * i].opcode, mirs_[2 * i].dalvikInsn.opcode);
-    EXPECT_EQ(mirs[2 * i + 1].opcode, mirs_[2 * i + 1].dalvikInsn.opcode);
-    ASSERT_LE(1u, mirs_[2 * i].ssa_rep->num_defs);
-    ExpectSRegType(mirs_[2 * i].ssa_rep->defs[0], expectations[i]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, APut1) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),  // Object[] array
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),  // value; can't even determine whether core or fp.
-      DEF_CONST(3u, Instruction::CONST, 2u, 0),  // index
-      DEF_APUT(3u, Instruction::APUT, 1u, 0u, 2u),
-  };
-
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayNarrow },
-      { 0u, kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, APut2) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),  // Object[] array
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),  // Object[] value
-      DEF_CONST(3u, Instruction::CONST, 2u, 0),  // index
-      DEF_APUT(3u, Instruction::APUT_OBJECT, 1u, 0u, 2u),
-  };
-
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectRef | kExpectNarrow | kExpectNull },
-      { 0u, kExpectCore | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, APut3) {
-  static const MIRDef mirs[] = {
-      // Either array1 or array2 could be Object[][] but there is no way to tell from the bytecode.
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),  // Object[] array1
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),  // Object[] array2
-      DEF_CONST(3u, Instruction::CONST, 2u, 0),  // index
-      DEF_APUT(3u, Instruction::APUT_OBJECT, 0u, 1u, 2u),
-      DEF_APUT(3u, Instruction::APUT_OBJECT, 1u, 0u, 2u),
-  };
-
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, APut4) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),  // index
-      DEF_AGET(3u, Instruction::AGET_OBJECT, 2u, 0u, 1u),  // Object[] array
-      DEF_CONST(3u, Instruction::CONST, 3u, 0),  // value; can't even determine whether core or fp.
-      DEF_APUT(3u, Instruction::APUT, 3u, 2u, 1u),
-  };
-
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayNarrow },
-      { 0u, kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, APut5) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),  // index
-      DEF_AGET(3u, Instruction::AGET_OBJECT, 2u, 0u, 1u),  // Object[] array
-      DEF_CONST(3u, Instruction::CONST, 3u, 0),  // Object[] value
-      DEF_APUT(3u, Instruction::APUT_OBJECT, 3u, 2u, 1u),
-  };
-
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectRef | kExpectNarrow | kExpectNull },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, APut6) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),  // index
-      // Either array1 or array2 could be Object[][] but there is no way to tell from the bytecode.
-      DEF_AGET(3u, Instruction::AGET_OBJECT, 2u, 0u, 1u),  // Object[] array1
-      DEF_AGET(3u, Instruction::AGET_OBJECT, 3u, 0u, 1u),  // Object[] array2
-      DEF_APUT(3u, Instruction::APUT_OBJECT, 2u, 3u, 1u),
-      DEF_APUT(3u, Instruction::APUT_OBJECT, 3u, 2u, 1u),
-  };
-
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayRef | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayRef | kExpectArrayNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, TwoNullObjectArraysInLoop) {
-  static const MIRDef mirs[] = {
-      // void foo() {
-      //   Object[] array1 = ((Object[])null)[0];
-      //   Object[] array2 = ((Object[])null)[0];
-      //   for (int i = 0; i != 3; ++i) {
-      //     Object[] a1 = null;  // One of these could be Object[][] but not both.
-      //     Object[] a2 = null;  // But they will be deduced as Object[].
-      //     try { a1[0] = a2; } catch (Throwable ignored) { }
-      //     try { a2[0] = a1; } catch (Throwable ignored) { }
-      //     array1 = a1;
-      //     array2 = a2;
-      //   }
-      // }
-      //
-      // Omitting the try-catch:
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),            // null
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),            // index
-      DEF_AGET(3u, Instruction::AGET_OBJECT, 2u, 0u, 1u),  // array1
-      DEF_AGET(3u, Instruction::AGET_OBJECT, 3u, 0u, 1u),  // array2
-      DEF_PHI2(4u, 4u, 2u, 8u),  // ? + [L -> [? gives [L (see array-length below)
-      DEF_PHI2(4u, 5u, 3u, 9u),  // ? + [L -> ? gives ?
-      DEF_AGET(4u, Instruction::AGET_OBJECT, 6u, 0u, 1u),  // a1
-      DEF_AGET(4u, Instruction::AGET_OBJECT, 7u, 0u, 1u),  // a2
-      DEF_APUT(4u, Instruction::APUT_OBJECT, 6u, 7u, 1u),
-      DEF_APUT(4u, Instruction::APUT_OBJECT, 7u, 6u, 1u),
-      DEF_MOVE(4u, Instruction::MOVE_OBJECT, 8u, 6u),
-      DEF_MOVE(4u, Instruction::MOVE_OBJECT, 9u, 7u),
-      DEF_UNOP(5u, Instruction::ARRAY_LENGTH, 10u, 4u),
-  };
-
-  BuildDexFile("()V", true);
-  PrepareLoop();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectRef | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectRef | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayRef | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayRef | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayRef | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, ArrayArrayFloat) {
-  static const MethodDef methods[] = {
-      { kClassName, "(F)V", "fooFloat", kStatic },
-  };
-  static const MIRDef mirs[] = {
-      // void foo() {
-      //   try {
-      //     float[][][] aaaf = null;
-      //     float[][] array = aaaf[0];  // Make sure array is treated as properly typed.
-      //     array[0][0] = 0.0f;      // const + aget-object[1] + aput
-      //     fooFloat(array[0][0]);   // aget-object[2] + aget + invoke
-      //     // invoke: signature => input is F.
-      //     // aget: output is F => base is [F (precise)
-      //     // aget-object[2]: output is [F => base is [[F (precise)
-      //     // aput: unknown input type => base is [?
-      //     // aget-object[1]: base is [[F => result is L or [F, merge with [? => result is [F
-      //     // aput (again): base is [F => result is F
-      //     // const: F determined by the aput reprocessing.
-      //   } catch (Throwable ignored) {
-      //   }
-      // }
-      //
-      // Omitting the try-catch:
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),             // 0
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),             // aaaf
-      DEF_AGET(3u, Instruction::AGET_OBJECT, 2u, 1u, 0u),   // array = aaaf[0]
-      DEF_CONST(3u, Instruction::CONST, 3u, 0),             // 0.0f
-      DEF_AGET(3u, Instruction::AGET_OBJECT, 4u, 2u, 0u),   // array[0]
-      DEF_APUT(3u, Instruction::APUT, 3u, 4u, 0u),          // array[0][0] = 0.0f
-      DEF_AGET(3u, Instruction::AGET_OBJECT, 5u, 2u, 0u),   // array[0]
-      DEF_AGET(3u, Instruction::AGET, 6u, 5u, 0u),          // array[0][0]
-      DEF_INVOKE1(3u, Instruction::INVOKE_STATIC, 6u, 0u),  // fooFloat(array[0][0])
-  };
-
-  PrepareMethods(methods);
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 0u, kExpectCore | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-      { 2u, kExpectRef | kExpectNarrow | kExpectArrayFp | kExpectArrayNarrow },
-      { 0u, kExpectFp | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayFp | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayFp | kExpectArrayNarrow },
-      { 0u, kExpectFp | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, CheckCast1) {
-  static const TypeDef types[] = {
-      { "[I" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),
-      DEF_AGET(3u, Instruction::AGET_OBJECT, 2u, 0u, 1u),
-      DEF_CHECK_CAST(4u, Instruction::CHECK_CAST, 2u, 0u),
-      DEF_CHECK_CAST(5u, Instruction::CHECK_CAST, 2u, 0u),
-      // Pseudo-phi from [I and [I into L infers only L but not [.
-      DEF_MOVE(6u, Instruction::MOVE_OBJECT, 3u, 2u),
-  };
-  PrepareTypes(types);
-  BuildDexFile("()V", true);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  static const BasicBlockId v0_def_blocks[] = { 3u, 4u, 5u, 6u };
-  MapVRegToSReg(2, 2, v0_def_blocks);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectRef | kExpectNarrow },
-      { 0u, kExpectRef | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, CheckCast2) {
-  static const TypeDef types[] = {
-      { "[I" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),
-      DEF_AGET(3u, Instruction::AGET_OBJECT, 2u, 0u, 1u),
-      DEF_CHECK_CAST(4u, Instruction::CHECK_CAST, 2u, 0u),
-      DEF_CHECK_CAST(5u, Instruction::CHECK_CAST, 2u, 0u),
-      // Pseudo-phi from [I and [I into [? infers [I.
-      DEF_MOVE(6u, Instruction::MOVE_OBJECT, 3u, 2u),
-      DEF_UNOP(6u, Instruction::ARRAY_LENGTH, 4u, 2u),
-  };
-  PrepareTypes(types);
-  BuildDexFile("()V", true);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  static const BasicBlockId v0_def_blocks[] = { 3u, 4u, 5u, 6u };
-  MapVRegToSReg(2, 2, v0_def_blocks);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectRef | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, CheckCast3) {
-  static const TypeDef types[] = {
-      { "[I" },
-      { "[F" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),
-      DEF_AGET(3u, Instruction::AGET_OBJECT, 2u, 0u, 1u),
-      DEF_CHECK_CAST(4u, Instruction::CHECK_CAST, 2u, 0u),
-      DEF_CHECK_CAST(5u, Instruction::CHECK_CAST, 2u, 1u),
-      // Pseudo-phi from [I and [F into L correctly leaves it as L.
-      DEF_MOVE(6u, Instruction::MOVE_OBJECT, 3u, 2u),
-  };
-  PrepareTypes(types);
-  BuildDexFile("()V", true);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  static const BasicBlockId v0_def_blocks[] = { 3u, 4u, 5u, 6u };
-  MapVRegToSReg(2, 2, v0_def_blocks);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectRef | kExpectNarrow },
-      { 0u, kExpectRef | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, CheckCastConflict1) {
-  static const TypeDef types[] = {
-      { "[I" },
-      { "[F" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),
-      DEF_AGET(3u, Instruction::AGET_OBJECT, 2u, 0u, 1u),
-      DEF_CHECK_CAST(4u, Instruction::CHECK_CAST, 2u, 0u),
-      DEF_CHECK_CAST(5u, Instruction::CHECK_CAST, 2u, 1u),
-      // Pseudo-phi from [I and [F into [? infers conflict [I/[F.
-      DEF_MOVE(6u, Instruction::MOVE_OBJECT, 3u, 2u),
-      DEF_UNOP(6u, Instruction::ARRAY_LENGTH, 4u, 2u),
-  };
-  PrepareTypes(types);
-  BuildDexFile("()V", true);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  static const BasicBlockId v0_def_blocks[] = { 3u, 4u, 5u, 6u };
-  MapVRegToSReg(2, 2, v0_def_blocks);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectRef | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayFp | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg], false);
-  }
-  // The type conflict in array element wasn't propagated to an SSA reg.
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, CheckCastConflict2) {
-  static const TypeDef types[] = {
-      { "[I" },
-      { "[F" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),
-      DEF_AGET(3u, Instruction::AGET_OBJECT, 2u, 0u, 1u),
-      DEF_CHECK_CAST(4u, Instruction::CHECK_CAST, 2u, 0u),
-      DEF_CHECK_CAST(5u, Instruction::CHECK_CAST, 2u, 1u),
-      // Pseudo-phi from [I and [F into [? infers conflict [I/[F.
-      DEF_MOVE(6u, Instruction::MOVE_OBJECT, 3u, 2u),
-      DEF_AGET(6u, Instruction::AGET, 4u, 2u, 1u),
-  };
-  PrepareTypes(types);
-  BuildDexFile("()V", true);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  static const BasicBlockId v0_def_blocks[] = { 3u, 4u, 5u, 6u };
-  MapVRegToSReg(2, 2, v0_def_blocks);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectRef | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayFp | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectFp | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg], false);
-  }
-  // Type conflict in an SSA reg, register promotion disabled.
-  EXPECT_NE(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, Phi1) {
-  static const TypeDef types[] = {
-      { "[I" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 100),
-      DEF_NEW_ARRAY(4u, Instruction::NEW_ARRAY, 1u, 0u, 0u),
-      DEF_NEW_ARRAY(5u, Instruction::NEW_ARRAY, 2u, 0u, 0u),
-      // Phi from [I and [I infers only L but not [.
-      DEF_PHI2(6u, 3u, 1u, 2u),
-  };
-  PrepareTypes(types);
-  BuildDexFile("()V", true);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 0u, kExpectCore | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayNarrow },
-      { 0u, kExpectRef | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, Phi2) {
-  static const TypeDef types[] = {
-      { "[F" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 100),
-      DEF_NEW_ARRAY(4u, Instruction::NEW_ARRAY, 1u, 0u, 0u),
-      DEF_NEW_ARRAY(5u, Instruction::NEW_ARRAY, 2u, 0u, 0u),
-      // Phi from [F and [F into [? infers [F.
-      DEF_PHI2(6u, 3u, 1u, 2u),
-      DEF_UNOP(6u, Instruction::ARRAY_LENGTH, 4u, 3u),
-  };
-  PrepareTypes(types);
-  BuildDexFile("()V", true);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 0u, kExpectCore | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayFp | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayFp | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayFp | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, Phi3) {
-  static const TypeDef types[] = {
-      { "[I" },
-      { "[F" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 100),
-      DEF_NEW_ARRAY(4u, Instruction::NEW_ARRAY, 1u, 0u, 0u),
-      DEF_NEW_ARRAY(5u, Instruction::NEW_ARRAY, 2u, 0u, 1u),
-      // Phi from [I and [F infers L.
-      DEF_PHI2(6u, 3u, 1u, 2u),
-  };
-  PrepareTypes(types);
-  BuildDexFile("()V", true);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 0u, kExpectCore | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayFp | kExpectArrayNarrow },
-      { 0u, kExpectRef | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, Phi4) {
-  static const TypeDef types[] = {
-      { "[I" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 100),
-      DEF_NEW_ARRAY(4u, Instruction::NEW_ARRAY, 1u, 0u, 0u),
-      DEF_CONST(5u, Instruction::CONST, 2u, 0),
-      // Pseudo-phi from [I and null infers L.
-      DEF_PHI2(6u, 3u, 1u, 2u),
-  };
-  PrepareTypes(types);
-  BuildDexFile("()V", true);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 0u, kExpectCore | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayNarrow },
-      { 0u, kExpectRef | kExpectNarrow | kExpectNull },
-      { 0u, kExpectRef | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, PhiConflict1) {
-  static const TypeDef types[] = {
-      { "[I" },
-      { "[F" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 100),
-      DEF_NEW_ARRAY(4u, Instruction::NEW_ARRAY, 1u, 0u, 0u),
-      DEF_NEW_ARRAY(5u, Instruction::NEW_ARRAY, 2u, 0u, 1u),
-      // Pseudo-phi from [I and [F into [? infers conflict [I/[F (then propagated upwards).
-      DEF_PHI2(6u, 3u, 1u, 2u),
-      DEF_UNOP(6u, Instruction::ARRAY_LENGTH, 4u, 3u),
-  };
-  PrepareTypes(types);
-  BuildDexFile("()V", true);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 0u, kExpectCore | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayFp | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayFp | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayFp | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg], false);
-  }
-  // The type conflict in array element wasn't propagated to an SSA reg.
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, PhiConflict2) {
-  static const TypeDef types[] = {
-      { "[I" },
-      { "[F" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 100),
-      DEF_NEW_ARRAY(4u, Instruction::NEW_ARRAY, 1u, 0u, 0u),
-      DEF_NEW_ARRAY(5u, Instruction::NEW_ARRAY, 2u, 0u, 1u),
-      // Pseudo-phi from [I and [F into [? infers conflict [I/[F (then propagated upwards).
-      DEF_PHI2(6u, 3u, 1u, 2u),
-      DEF_AGET(6u, Instruction::AGET, 4u, 3u, 0u),
-  };
-  PrepareTypes(types);
-  BuildDexFile("()V", true);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 0u, kExpectCore | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayFp | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayFp | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayFp | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectFp | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg], false);
-  }
-  // Type conflict in an SSA reg, register promotion disabled.
-  EXPECT_NE(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, Wide1) {
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),
-      DEF_CONST(3u, Instruction::CONST, 1u, 0),  // index
-      DEF_AGET(3u, Instruction::AGET_OBJECT, 2u, 0u, 1u),  // long[]
-      DEF_CONST_WIDE(3u, Instruction::CONST_WIDE, 3u, 0),  // long
-      DEF_APUT_WIDE(3u, Instruction::APUT_WIDE, 3u, 2u, 1u),
-      { 3u, Instruction::RETURN_OBJECT, 0, 0u, 1u, { 2u }, 0u, { } },
-  };
-
-  BuildDexFile("()[J", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayWide },
-      { 0u, kExpectCore | kExpectWide },
-      // NOTE: High word checked implicitly for sreg = 3.
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg], false);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, WideSizeConflict1) {
-  static const MIRDef mirs[] = {
-      DEF_CONST_WIDE(3u, Instruction::CONST_WIDE, 0u, 0),
-      DEF_MOVE(3u, Instruction::MOVE, 2u, 0u),
-  };
-
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 0u, kExpectNarrow | kExpectWide },
-      { 0u, kExpectNarrow | kExpectWide },
-  };
-  ExpectSRegType(0u, expectations[0], false);
-  ExpectSRegType(2u, expectations[1], false);
-  EXPECT_TRUE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, ArrayLongLength) {
-  static const FieldDef sfields[] = {
-      { kClassName, "[J", "arrayLongField" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(4u, Instruction::CONST, 0u, 0),
-      DEF_SGET(5u, Instruction::SGET_OBJECT, 1u, 0u),
-      DEF_PHI2(6u, 2u, 0u, 1u),
-      DEF_UNOP(6u, Instruction::ARRAY_LENGTH, 3u, 2u),
-      DEF_SGET(6u, Instruction::SGET_OBJECT, 4u, 0u),
-      DEF_UNOP(6u, Instruction::ARRAY_LENGTH, 5u, 4u),
-  };
-
-  PrepareSFields(sfields);
-  BuildDexFile("()V", true);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayCore | kExpectArrayWide },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayWide },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayWide },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayWide },
-      { 0u, kExpectCore | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, ArrayArrayObjectLength) {
-  static const FieldDef sfields[] = {
-      { kClassName, "[[Ljava/lang/Object;", "arrayLongField" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(4u, Instruction::CONST, 0u, 0),
-      DEF_SGET(5u, Instruction::SGET_OBJECT, 1u, 0u),
-      DEF_PHI2(6u, 2u, 0u, 1u),
-      DEF_UNOP(6u, Instruction::ARRAY_LENGTH, 3u, 2u),
-      DEF_SGET(6u, Instruction::SGET_OBJECT, 4u, 0u),
-      DEF_UNOP(6u, Instruction::ARRAY_LENGTH, 5u, 4u),
-  };
-
-  PrepareSFields(sfields);
-  BuildDexFile("()V", true);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull | kExpectArrayRef | kExpectArrayNarrow },
-      { 2u, kExpectRef | kExpectNarrow | kExpectArrayRef | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 2u, kExpectRef | kExpectNarrow | kExpectArrayRef | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, SGetAdd0SPut) {
-  static const FieldDef sfields[] = {
-      { kClassName, "I", "staticIntField" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_SGET(3u, Instruction::SGET, 0u, 0u),
-      DEF_UNOP(3u, Instruction::ADD_INT_LIT8, 1u, 0u),  // +0
-      DEF_SPUT(3u, Instruction::SPUT, 1u, 0u),
-  };
-
-  PrepareSFields(sfields);
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, MoveObjectNull) {
-  static const MethodDef methods[] = {
-      { kClassName, "([I[D)V", "foo", kStatic },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),
-      DEF_MOVE(3u, Instruction::MOVE_OBJECT, 1u, 0u),
-      DEF_INVOKE2(3u, Instruction::INVOKE_STATIC, 0u, 1u, 0u),
-  };
-
-  PrepareMethods(methods);
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectation = {
-      1u,
-      kExpectRef | kExpectNarrow | kExpectNull |
-      kExpectArrayCore | kExpectArrayFp | kExpectArrayNarrow | kExpectArrayWide
-  };
-  ExpectSRegType(0u, expectation);
-  ExpectSRegType(1u, expectation);
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, MoveNull1) {
-  static const MethodDef methods[] = {
-      { kClassName, "([I[D)V", "foo", kStatic },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),
-      DEF_MOVE(3u, Instruction::MOVE, 1u, 0u),
-      DEF_INVOKE2(3u, Instruction::INVOKE_STATIC, 0u, 1u, 0u),
-  };
-
-  PrepareMethods(methods);
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectation = {
-      1u,
-      kExpectCore | kExpectRef | kExpectFp | kExpectNarrow | kExpectNull |
-      kExpectArrayCore | kExpectArrayFp | kExpectArrayNarrow | kExpectArrayWide
-  };
-  ExpectSRegType(0u, expectation);
-  ExpectSRegType(1u, expectation);
-  // Type conflict using move instead of move-object for null, register promotion disabled.
-  EXPECT_NE(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, MoveNull2) {
-  static const FieldDef sfields[] = {
-      { kClassName, "[F", "staticArrayArrayFloatField" },
-      { kClassName, "[I", "staticArrayIntField" },
-      { kClassName, "[[I", "staticArrayArrayIntField" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(4u, Instruction::CONST, 0u, 0),
-      DEF_MOVE(4u, Instruction::MOVE_OBJECT, 1u, 0u),
-      DEF_MOVE(4u, Instruction::MOVE_OBJECT, 2u, 1u),
-      DEF_SGET(5u, Instruction::SGET_OBJECT, 3u, 0u),
-      DEF_SGET(5u, Instruction::SGET_OBJECT, 4u, 1u),
-      DEF_SGET(5u, Instruction::SGET_OBJECT, 5u, 2u),
-      DEF_PHI2(6u, 6u, 0u, 3u),
-      DEF_PHI2(6u, 7u, 1u, 4u),
-      DEF_PHI2(6u, 8u, 2u, 5u),
-      DEF_UNOP(6u, Instruction::ARRAY_LENGTH, 9u, 6u),
-      DEF_UNOP(6u, Instruction::ARRAY_LENGTH, 10u, 7u),
-      DEF_UNOP(6u, Instruction::ARRAY_LENGTH, 11u, 8u),
-      { 6u, Instruction::RETURN_OBJECT, 0, 0u, 1u, { 8u }, 0u, { } },
-  };
-
-  PrepareSFields(sfields);
-  BuildDexFile("()[[I", true);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull |
-          kExpectArrayCore | kExpectArrayFp | kExpectArrayRef | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull |
-          kExpectArrayCore | kExpectArrayFp | kExpectArrayRef | kExpectArrayNarrow},
-      { 1u, kExpectRef | kExpectNarrow | kExpectNull |
-          kExpectArrayCore | kExpectArrayFp | kExpectArrayRef | kExpectArrayNarrow},
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayFp | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayNarrow },
-      { 2u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayFp | kExpectArrayNarrow },
-      { 1u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayNarrow },
-      { 2u, kExpectRef | kExpectNarrow | kExpectArrayCore | kExpectArrayNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  // Type conflict in array type not propagated to actual register.
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, ReuseNull1) {
-  static const FieldDef sfields[] = {
-      { kClassName, "[I", "staticArrayLongField" },
-      { kClassName, "[[F", "staticArrayArrayFloatField" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),
-      DEF_SPUT(3u, Instruction::SPUT_OBJECT, 0u, 0u),
-      DEF_SPUT(3u, Instruction::SPUT_OBJECT, 0u, 1u),
-  };
-
-  PrepareSFields(sfields);
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectation = {
-      1u,
-      kExpectRef | kExpectNarrow | kExpectNull |
-      kExpectArrayCore | kExpectArrayRef | kExpectArrayFp | kExpectArrayNarrow
-  };
-  ExpectSRegType(0u, expectation);
-  // Type conflict in array type not propagated to actual register.
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, ReuseNull2) {
-  static const FieldDef sfields[] = {
-      { kClassName, "[J", "staticArrayLongField" },
-      { kClassName, "[[F", "staticArrayArrayFloatField" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_CONST(3u, Instruction::CONST, 0u, 0),
-      DEF_SPUT(3u, Instruction::SPUT_OBJECT, 0u, 0u),
-      DEF_SPUT(3u, Instruction::SPUT_OBJECT, 0u, 1u),
-  };
-
-  PrepareSFields(sfields);
-  BuildDexFile("()V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectation = {
-      1u,
-      kExpectRef | kExpectNarrow | kExpectNull |
-      kExpectArrayCore | kExpectArrayRef | kExpectArrayFp | kExpectArrayNarrow | kExpectArrayWide
-  };
-  ExpectSRegType(0u, expectation);
-  // Type conflict in array type not propagated to actual register.
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, ArgIsNonNull) {
-  constexpr uint32_t thiz = kLocalVRs;
-  static const MIRDef mirs[] = {
-      DEF_MOVE(3u, Instruction::MOVE_OBJECT, 0u, thiz),
-  };
-
-  BuildDexFile("(Ljava/lang/Object;)V", true);
-  PrepareSingleBlock();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectation = {
-      0u,
-      kExpectRef | kExpectNarrow
-  };
-  ExpectSRegType(0u, expectation);
-  // Type conflict in array type not propagated to actual register.
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-TEST_F(TypeInferenceTest, IfCc) {
-  static const FieldDef sfields[] = {
-      { kClassName, "I", "intField" },
-  };
-  static const MIRDef mirs[] = {
-      DEF_SGET(3u, Instruction::SGET, 0u, 0u),
-      DEF_CONST(3u, Instruction::CONST, 1u, 0u),
-      { 3u, Instruction::IF_EQ, 0, 0u, 2, { 0u, 1u }, 0, { } },
-  };
-
-  PrepareSFields(sfields);
-  BuildDexFile("()V", false);
-  PrepareDiamond();
-  PrepareMIRs(mirs);
-  PerformTypeInference();
-
-  ASSERT_EQ(arraysize(mirs), mir_count_);
-  static const SRegExpectation expectations[] = {
-      { 0u, kExpectCore | kExpectNarrow },
-      { 0u, kExpectCore | kExpectNarrow },
-  };
-  for (int32_t sreg = 0; sreg != arraysize(expectations); ++sreg) {
-    ExpectSRegType(sreg, expectations[sreg]);
-  }
-  EXPECT_EQ(cu_.disable_opt & (1u << kPromoteRegs), 0u);
-  EXPECT_FALSE(cu_.mir_graph->PuntToInterpreter());
-}
-
-}  // namespace art
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 65b0ad6..d87762d 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -44,14 +44,14 @@
   }
 }
 
-bool VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier) {
+void VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier) {
   DCHECK(method_verifier != nullptr);
   MethodReference ref = method_verifier->GetMethodReference();
   bool compile = IsCandidateForCompilation(ref, method_verifier->GetAccessFlags());
   const VerifiedMethod* verified_method = VerifiedMethod::Create(method_verifier, compile);
   if (verified_method == nullptr) {
-    // Do not report an error to the verifier. We'll just punt this later.
-    return true;
+    // We'll punt this later.
+    return;
   }
 
   WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
@@ -60,20 +60,18 @@
     // TODO: Investigate why are we doing the work again for this method and try to avoid it.
     LOG(WARNING) << "Method processed more than once: "
         << PrettyMethod(ref.dex_method_index, *ref.dex_file);
-    if (!Runtime::Current()->UseJit()) {
+    if (!Runtime::Current()->UseJitCompilation()) {
       DCHECK_EQ(it->second->GetDevirtMap().size(), verified_method->GetDevirtMap().size());
       DCHECK_EQ(it->second->GetSafeCastSet().size(), verified_method->GetSafeCastSet().size());
     }
-    DCHECK_EQ(it->second->GetDexGcMap().size(), verified_method->GetDexGcMap().size());
     // Delete the new verified method since there was already an existing one registered. It
     // is unsafe to replace the existing one since the JIT may be using it to generate a
     // native GC map.
     delete verified_method;
-    return true;
+    return;
   }
   verified_methods_.Put(ref, verified_method);
   DCHECK(verified_methods_.find(ref) != verified_methods_.end());
-  return true;
 }
 
 const VerifiedMethod* VerificationResults::GetVerifiedMethod(MethodReference ref) {
@@ -82,15 +80,6 @@
   return (it != verified_methods_.end()) ? it->second : nullptr;
 }
 
-void VerificationResults::RemoveVerifiedMethod(MethodReference ref) {
-  WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
-  auto it = verified_methods_.find(ref);
-  if (it != verified_methods_.end()) {
-    delete it->second;
-    verified_methods_.erase(it);
-  }
-}
-
 void VerificationResults::AddRejectedClass(ClassReference ref) {
   {
     WriterMutexLock mu(Thread::Current(), rejected_classes_lock_);
@@ -106,11 +95,11 @@
 
 bool VerificationResults::IsCandidateForCompilation(MethodReference&,
                                                     const uint32_t access_flags) {
-  if (!compiler_options_->IsCompilationEnabled()) {
+  if (!compiler_options_->IsBytecodeCompilationEnabled()) {
     return false;
   }
   // Don't compile class initializers unless kEverything.
-  if ((compiler_options_->GetCompilerFilter() != CompilerOptions::kEverything) &&
+  if ((compiler_options_->GetCompilerFilter() != CompilerFilter::kEverything) &&
      ((access_flags & kAccConstructor) != 0) && ((access_flags & kAccStatic) != 0)) {
     return false;
   }
diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h
index 9934f6b..1af11a8 100644
--- a/compiler/dex/verification_results.h
+++ b/compiler/dex/verification_results.h
@@ -42,13 +42,12 @@
     explicit VerificationResults(const CompilerOptions* compiler_options);
     ~VerificationResults();
 
-    bool ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier)
+    void ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier)
         SHARED_REQUIRES(Locks::mutator_lock_)
         REQUIRES(!verified_methods_lock_);
 
     const VerifiedMethod* GetVerifiedMethod(MethodReference ref)
         REQUIRES(!verified_methods_lock_);
-    void RemoveVerifiedMethod(MethodReference ref) REQUIRES(!verified_methods_lock_);
 
     void AddRejectedClass(ClassReference ref) REQUIRES(!rejected_classes_lock_);
     bool IsClassRejected(ClassReference ref) REQUIRES(!rejected_classes_lock_);
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 8eb37cf..4bcd59a 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -21,6 +21,7 @@
 #include <vector>
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "dex_file.h"
@@ -30,44 +31,32 @@
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
 #include "utils.h"
-#include "verifier/dex_gc_map.h"
 #include "verifier/method_verifier-inl.h"
 #include "verifier/reg_type-inl.h"
 #include "verifier/register_line-inl.h"
 
 namespace art {
 
-VerifiedMethod::VerifiedMethod(uint32_t encountered_error_types,
-                               bool has_runtime_throw,
-                               const SafeMap<uint32_t, std::set<uint32_t>>& string_init_pc_reg_map)
+VerifiedMethod::VerifiedMethod(uint32_t encountered_error_types, bool has_runtime_throw)
     : encountered_error_types_(encountered_error_types),
-      has_runtime_throw_(has_runtime_throw),
-      string_init_pc_reg_map_(string_init_pc_reg_map) {
+      has_runtime_throw_(has_runtime_throw) {
 }
 
 const VerifiedMethod* VerifiedMethod::Create(verifier::MethodVerifier* method_verifier,
                                              bool compile) {
   std::unique_ptr<VerifiedMethod> verified_method(
       new VerifiedMethod(method_verifier->GetEncounteredFailureTypes(),
-                         method_verifier->HasInstructionThatWillThrow(),
-                         method_verifier->GetStringInitPcRegMap()));
+                         method_verifier->HasInstructionThatWillThrow()));
 
   if (compile) {
-    /* Generate a register map. */
-    if (!verified_method->GenerateGcMap(method_verifier)) {
-      return nullptr;  // Not a real failure, but a failure to encode.
-    }
-    if (kIsDebugBuild) {
-      VerifyGcMap(method_verifier, verified_method->dex_gc_map_);
-    }
-
     // TODO: move this out when DEX-to-DEX supports devirtualization.
     if (method_verifier->HasVirtualOrInterfaceInvokes()) {
       verified_method->GenerateDevirtMap(method_verifier);
     }
 
     // Only need dequicken info for JIT so far.
-    if (Runtime::Current()->UseJit() && !verified_method->GenerateDequickenMap(method_verifier)) {
+    if (Runtime::Current()->UseJitCompilation() &&
+        !verified_method->GenerateDequickenMap(method_verifier)) {
       return nullptr;
     }
   }
@@ -85,7 +74,7 @@
 }
 
 const DexFileReference* VerifiedMethod::GetDequickenIndex(uint32_t dex_pc) const {
-  DCHECK(Runtime::Current()->UseJit());
+  DCHECK(Runtime::Current()->UseJitCompilation());
   auto it = dequicken_map_.find(dex_pc);
   return (it != dequicken_map_.end()) ? &it->second : nullptr;
 }
@@ -94,120 +83,6 @@
   return std::binary_search(safe_cast_set_.begin(), safe_cast_set_.end(), pc);
 }
 
-bool VerifiedMethod::GenerateGcMap(verifier::MethodVerifier* method_verifier) {
-  DCHECK(dex_gc_map_.empty());
-  size_t num_entries, ref_bitmap_bits, pc_bits;
-  ComputeGcMapSizes(method_verifier, &num_entries, &ref_bitmap_bits, &pc_bits);
-  const size_t ref_bitmap_bytes = RoundUp(ref_bitmap_bits, kBitsPerByte) / kBitsPerByte;
-  static constexpr size_t kFormatBits = 3;
-  // We have 16 - kFormatBits available for the ref_bitmap_bytes.
-  if ((ref_bitmap_bytes >> (16u - kFormatBits)) != 0) {
-    LOG(WARNING) << "Cannot encode GC map for method with " << ref_bitmap_bits << " registers: "
-                 << PrettyMethod(method_verifier->GetMethodReference().dex_method_index,
-                                 *method_verifier->GetMethodReference().dex_file);
-    return false;
-  }
-  // There are 2 bytes to encode the number of entries.
-  if (num_entries > std::numeric_limits<uint16_t>::max()) {
-    LOG(WARNING) << "Cannot encode GC map for method with " << num_entries << " entries: "
-                 << PrettyMethod(method_verifier->GetMethodReference().dex_method_index,
-                                 *method_verifier->GetMethodReference().dex_file);
-    return false;
-  }
-  size_t pc_bytes;
-  verifier::RegisterMapFormat format;
-  if (pc_bits <= kBitsPerByte) {
-    format = verifier::kRegMapFormatCompact8;
-    pc_bytes = 1;
-  } else if (pc_bits <= kBitsPerByte * 2) {
-    format = verifier::kRegMapFormatCompact16;
-    pc_bytes = 2;
-  } else {
-    LOG(WARNING) << "Cannot encode GC map for method with "
-                 << (1 << pc_bits) << " instructions (number is rounded up to nearest power of 2): "
-                 << PrettyMethod(method_verifier->GetMethodReference().dex_method_index,
-                                 *method_verifier->GetMethodReference().dex_file);
-    return false;
-  }
-  size_t table_size = ((pc_bytes + ref_bitmap_bytes) * num_entries) + 4;
-  dex_gc_map_.reserve(table_size);
-  // Write table header.
-  dex_gc_map_.push_back(format | ((ref_bitmap_bytes & ~0xFF) >> (kBitsPerByte - kFormatBits)));
-  dex_gc_map_.push_back(ref_bitmap_bytes & 0xFF);
-  dex_gc_map_.push_back(num_entries & 0xFF);
-  dex_gc_map_.push_back((num_entries >> 8) & 0xFF);
-  // Write table data.
-  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
-  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
-    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
-      dex_gc_map_.push_back(i & 0xFF);
-      if (pc_bytes == 2) {
-        dex_gc_map_.push_back((i >> 8) & 0xFF);
-      }
-      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
-      line->WriteReferenceBitMap(method_verifier, &dex_gc_map_, ref_bitmap_bytes);
-    }
-  }
-  DCHECK_EQ(dex_gc_map_.size(), table_size);
-  return true;
-}
-
-void VerifiedMethod::VerifyGcMap(verifier::MethodVerifier* method_verifier,
-                                 const std::vector<uint8_t>& data) {
-  // Check that for every GC point there is a map entry, there aren't entries for non-GC points,
-  // that the table data is well formed and all references are marked (or not) in the bitmap.
-  verifier::DexPcToReferenceMap map(&data[0]);
-  CHECK_EQ(data.size(), map.RawSize()) << map.NumEntries() << " " << map.RegWidth();
-  size_t map_index = 0;
-  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
-  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
-    const uint8_t* reg_bitmap = map.FindBitMap(i, false);
-    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
-      DCHECK_LT(map_index, map.NumEntries());
-      DCHECK_EQ(map.GetDexPc(map_index), i);
-      DCHECK_EQ(map.GetBitMap(map_index), reg_bitmap);
-      map_index++;
-      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
-      for (size_t j = 0; j < code_item->registers_size_; j++) {
-        if (line->GetRegisterType(method_verifier, j).IsNonZeroReferenceTypes()) {
-          DCHECK_LT(j / kBitsPerByte, map.RegWidth());
-          DCHECK_EQ((reg_bitmap[j / kBitsPerByte] >> (j % kBitsPerByte)) & 1, 1);
-        } else if ((j / kBitsPerByte) < map.RegWidth()) {
-          DCHECK_EQ((reg_bitmap[j / kBitsPerByte] >> (j % kBitsPerByte)) & 1, 0);
-        } else {
-          // If a register doesn't contain a reference then the bitmap may be shorter than the line.
-        }
-      }
-    } else {
-      DCHECK(i >= 65536 || reg_bitmap == nullptr);
-    }
-  }
-}
-
-void VerifiedMethod::ComputeGcMapSizes(verifier::MethodVerifier* method_verifier,
-                                       size_t* gc_points, size_t* ref_bitmap_bits,
-                                       size_t* log2_max_gc_pc) {
-  size_t local_gc_points = 0;
-  size_t max_insn = 0;
-  size_t max_ref_reg = -1;
-  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
-  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
-    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
-      local_gc_points++;
-      max_insn = i;
-      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
-      max_ref_reg = line->GetMaxNonZeroReferenceReg(method_verifier, max_ref_reg);
-    }
-  }
-  *gc_points = local_gc_points;
-  *ref_bitmap_bits = max_ref_reg + 1;  // If max register is 0 we need 1 bit to encode (ie +1).
-  size_t i = 0;
-  while ((1U << i) <= max_insn) {
-    i++;
-  }
-  *log2_max_gc_pc = i;
-}
-
 bool VerifiedMethod::GenerateDequickenMap(verifier::MethodVerifier* method_verifier) {
   if (method_verifier->HasFailures()) {
     return false;
@@ -295,7 +170,7 @@
       continue;
     }
     auto* cl = Runtime::Current()->GetClassLinker();
-    size_t pointer_size = cl->GetImagePointerSize();
+    PointerSize pointer_size = cl->GetImagePointerSize();
     ArtMethod* abstract_method = method_verifier->GetDexCache()->GetResolvedMethod(
         is_range ? inst->VRegB_3rc() : inst->VRegB_35c(), pointer_size);
     if (abstract_method == nullptr) {
@@ -313,8 +188,9 @@
       concrete_method = reg_type.GetClass()->FindVirtualMethodForVirtual(
           abstract_method, pointer_size);
     }
-    if (concrete_method == nullptr || concrete_method->IsAbstract()) {
-      // In cases where concrete_method is not found, or is abstract, continue to the next invoke.
+    if (concrete_method == nullptr || !concrete_method->IsInvokable()) {
+      // In cases where concrete_method is not found, or is not invokable, continue to the next
+      // invoke.
       continue;
     }
     if (reg_type.IsPreciseReference() || concrete_method->IsFinal() ||
diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h
index 74fcb07..495acf0 100644
--- a/compiler/dex/verified_method.h
+++ b/compiler/dex/verified_method.h
@@ -47,10 +47,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   ~VerifiedMethod() = default;
 
-  const std::vector<uint8_t>& GetDexGcMap() const {
-    return dex_gc_map_;
-  }
-
   const DevirtualizationMap& GetDevirtMap() const {
     return devirt_map_;
   }
@@ -83,14 +79,8 @@
     return has_runtime_throw_;
   }
 
-  const SafeMap<uint32_t, std::set<uint32_t>>& GetStringInitPcRegMap() const {
-    return string_init_pc_reg_map_;
-  }
-
  private:
-  VerifiedMethod(uint32_t encountered_error_types,
-                 bool has_runtime_throw,
-                 const SafeMap<uint32_t, std::set<uint32_t>>& string_init_pc_reg_map);
+  VerifiedMethod(uint32_t encountered_error_types, bool has_runtime_throw);
 
   /*
    * Generate the GC map for a method that has just been verified (i.e. we're doing this as part of
@@ -120,7 +110,6 @@
   void GenerateSafeCastSet(verifier::MethodVerifier* method_verifier)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  std::vector<uint8_t> dex_gc_map_;
   DevirtualizationMap devirt_map_;
   // Dequicken map is required for compiling quickened byte codes. The quicken maps from
   // dex PC to dex method index or dex field index based on the instruction.
@@ -129,10 +118,6 @@
 
   const uint32_t encountered_error_types_;
   const bool has_runtime_throw_;
-
-  // Copy of mapping generated by verifier of dex PCs of string init invocations
-  // to the set of other registers that the receiver has been copied into.
-  const SafeMap<uint32_t, std::set<uint32_t>> string_init_pc_reg_map_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
deleted file mode 100644
index 948ba7b..0000000
--- a/compiler/dex/vreg_analysis.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "base/logging.h"
-#include "base/stringprintf.h"
-#include "compiler_ir.h"
-#include "dex/dataflow_iterator-inl.h"
-#include "dex_flags.h"
-#include "driver/dex_compilation_unit.h"
-
-namespace art {
-
-static const char* storage_name[] = {" Frame ", "PhysReg", " CompilerTemp "};
-
-void MIRGraph::DumpRegLocTable(RegLocation* table, int count) {
-  for (int i = 0; i < count; i++) {
-    LOG(INFO) << StringPrintf("Loc[%02d] : %s, %c %c %c %c %c %c 0x%04x S%d",
-                              table[i].orig_sreg, storage_name[table[i].location],
-                              table[i].wide ? 'W' : 'N', table[i].defined ? 'D' : 'U',
-                              table[i].fp ? 'F' : table[i].ref ? 'R' :'C',
-                              table[i].is_const ? 'c' : 'n',
-                              table[i].high_word ? 'H' : 'L', table[i].home ? 'h' : 't',
-                              table[i].reg.GetRawBits(),
-                              table[i].s_reg_low);
-  }
-}
-
-// FIXME - will likely need to revisit all uses of this.
-static const RegLocation fresh_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0,
-                                      RegStorage(), INVALID_SREG, INVALID_SREG};
-
-void MIRGraph::InitRegLocations() {
-  // Allocate the location map. We also include the maximum possible temps because
-  // the temp allocation initializes reg location as well (in order to deal with
-  // case when it will be called after this pass).
-  int max_regs = GetNumSSARegs() + GetMaxPossibleCompilerTemps();
-  RegLocation* loc = arena_->AllocArray<RegLocation>(max_regs, kArenaAllocRegAlloc);
-  for (int i = 0; i < GetNumSSARegs(); i++) {
-    loc[i] = fresh_loc;
-    loc[i].s_reg_low = i;
-    loc[i].is_const = false;  // Constants will be marked by constant propagation pass later.
-  }
-
-  /* Mark the location of ArtMethod* as temporary */
-  loc[GetMethodSReg()].location = kLocCompilerTemp;
-
-  reg_location_ = loc;
-}
-
-/*
- * Set the s_reg_low field to refer to the pre-SSA name of the
- * base Dalvik virtual register.  Once we add a better register
- * allocator, remove this remapping.
- */
-void MIRGraph::RemapRegLocations() {
-  for (int i = 0; i < GetNumSSARegs(); i++) {
-    int orig_sreg = reg_location_[i].s_reg_low;
-    reg_location_[i].orig_sreg = orig_sreg;
-    reg_location_[i].s_reg_low = SRegToVReg(orig_sreg);
-  }
-}
-
-}  // namespace art
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
new file mode 100644
index 0000000..a0a8f81
--- /dev/null
+++ b/compiler/driver/compiled_method_storage.cc
@@ -0,0 +1,247 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <ostream>
+
+#include "compiled_method_storage.h"
+
+#include "base/logging.h"
+#include "compiled_method.h"
+#include "thread-inl.h"
+#include "utils.h"
+#include "utils/dedupe_set-inl.h"
+#include "utils/swap_space.h"
+
+namespace art {
+
+namespace {  // anonymous namespace
+
+template <typename T>
+const LengthPrefixedArray<T>* CopyArray(SwapSpace* swap_space, const ArrayRef<const T>& array) {
+  DCHECK(!array.empty());
+  SwapAllocator<uint8_t> allocator(swap_space);
+  void* storage = allocator.allocate(LengthPrefixedArray<T>::ComputeSize(array.size()));
+  LengthPrefixedArray<T>* array_copy = new(storage) LengthPrefixedArray<T>(array.size());
+  std::copy(array.begin(), array.end(), array_copy->begin());
+  return array_copy;
+}
+
+template <typename T>
+void ReleaseArray(SwapSpace* swap_space, const LengthPrefixedArray<T>* array) {
+  SwapAllocator<uint8_t> allocator(swap_space);
+  size_t size = LengthPrefixedArray<T>::ComputeSize(array->size());
+  array->~LengthPrefixedArray<T>();
+  allocator.deallocate(const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(array)), size);
+}
+
+}  // anonymous namespace
+
+template <typename T, typename DedupeSetType>
+inline const LengthPrefixedArray<T>* CompiledMethodStorage::AllocateOrDeduplicateArray(
+    const ArrayRef<const T>& data,
+    DedupeSetType* dedupe_set) {
+  if (data.empty()) {
+    return nullptr;
+  } else if (!DedupeEnabled()) {
+    return CopyArray(swap_space_.get(), data);
+  } else {
+    return dedupe_set->Add(Thread::Current(), data);
+  }
+}
+
+template <typename T>
+inline void CompiledMethodStorage::ReleaseArrayIfNotDeduplicated(
+    const LengthPrefixedArray<T>* array) {
+  if (array != nullptr && !DedupeEnabled()) {
+    ReleaseArray(swap_space_.get(), array);
+  }
+}
+
+template <typename ContentType>
+class CompiledMethodStorage::DedupeHashFunc {
+ private:
+  static constexpr bool kUseMurmur3Hash = true;
+
+ public:
+  size_t operator()(const ArrayRef<ContentType>& array) const {
+    const uint8_t* data = reinterpret_cast<const uint8_t*>(array.data());
+    // TODO: More reasonable assertion.
+    // static_assert(IsPowerOfTwo(sizeof(ContentType)),
+    //    "ContentType is not power of two, don't know whether array layout is as assumed");
+    uint32_t len = sizeof(ContentType) * array.size();
+    if (kUseMurmur3Hash) {
+      static constexpr uint32_t c1 = 0xcc9e2d51;
+      static constexpr uint32_t c2 = 0x1b873593;
+      static constexpr uint32_t r1 = 15;
+      static constexpr uint32_t r2 = 13;
+      static constexpr uint32_t m = 5;
+      static constexpr uint32_t n = 0xe6546b64;
+
+      uint32_t hash = 0;
+
+      const int nblocks = len / 4;
+      typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
+      const unaligned_uint32_t *blocks = reinterpret_cast<const uint32_t*>(data);
+      int i;
+      for (i = 0; i < nblocks; i++) {
+        uint32_t k = blocks[i];
+        k *= c1;
+        k = (k << r1) | (k >> (32 - r1));
+        k *= c2;
+
+        hash ^= k;
+        hash = ((hash << r2) | (hash >> (32 - r2))) * m + n;
+      }
+
+      const uint8_t *tail = reinterpret_cast<const uint8_t*>(data + nblocks * 4);
+      uint32_t k1 = 0;
+
+      switch (len & 3) {
+        case 3:
+          k1 ^= tail[2] << 16;
+          FALLTHROUGH_INTENDED;
+        case 2:
+          k1 ^= tail[1] << 8;
+          FALLTHROUGH_INTENDED;
+        case 1:
+          k1 ^= tail[0];
+
+          k1 *= c1;
+          k1 = (k1 << r1) | (k1 >> (32 - r1));
+          k1 *= c2;
+          hash ^= k1;
+      }
+
+      hash ^= len;
+      hash ^= (hash >> 16);
+      hash *= 0x85ebca6b;
+      hash ^= (hash >> 13);
+      hash *= 0xc2b2ae35;
+      hash ^= (hash >> 16);
+
+      return hash;
+    } else {
+      size_t hash = 0x811c9dc5;
+      for (uint32_t i = 0; i < len; ++i) {
+        hash = (hash * 16777619) ^ data[i];
+      }
+      hash += hash << 13;
+      hash ^= hash >> 7;
+      hash += hash << 3;
+      hash ^= hash >> 17;
+      hash += hash << 5;
+      return hash;
+    }
+  }
+};
+
+template <typename T>
+class CompiledMethodStorage::LengthPrefixedArrayAlloc {
+ public:
+  explicit LengthPrefixedArrayAlloc(SwapSpace* swap_space)
+      : swap_space_(swap_space) {
+  }
+
+  const LengthPrefixedArray<T>* Copy(const ArrayRef<const T>& array) {
+    return CopyArray(swap_space_, array);
+  }
+
+  void Destroy(const LengthPrefixedArray<T>* array) {
+    ReleaseArray(swap_space_, array);
+  }
+
+ private:
+  SwapSpace* const swap_space_;
+};
+
+CompiledMethodStorage::CompiledMethodStorage(int swap_fd)
+    : swap_space_(swap_fd == -1 ? nullptr : new SwapSpace(swap_fd, 10 * MB)),
+      dedupe_enabled_(true),
+      dedupe_code_("dedupe code", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
+      dedupe_src_mapping_table_("dedupe source mapping table",
+                                LengthPrefixedArrayAlloc<SrcMapElem>(swap_space_.get())),
+      dedupe_vmap_table_("dedupe vmap table",
+                         LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
+      dedupe_cfi_info_("dedupe cfi info", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
+      dedupe_linker_patches_("dedupe cfi info",
+                             LengthPrefixedArrayAlloc<LinkerPatch>(swap_space_.get())) {
+}
+
+CompiledMethodStorage::~CompiledMethodStorage() {
+  // All done by member destructors.
+}
+
+void CompiledMethodStorage::DumpMemoryUsage(std::ostream& os, bool extended) const {
+  if (swap_space_.get() != nullptr) {
+    const size_t swap_size = swap_space_->GetSize();
+    os << " swap=" << PrettySize(swap_size) << " (" << swap_size << "B)";
+  }
+  if (extended) {
+    Thread* self = Thread::Current();
+    os << "\nCode dedupe: " << dedupe_code_.DumpStats(self);
+    os << "\nVmap table dedupe: " << dedupe_vmap_table_.DumpStats(self);
+    os << "\nCFI info dedupe: " << dedupe_cfi_info_.DumpStats(self);
+  }
+}
+
+const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateCode(
+    const ArrayRef<const uint8_t>& code) {
+  return AllocateOrDeduplicateArray(code, &dedupe_code_);
+}
+
+void CompiledMethodStorage::ReleaseCode(const LengthPrefixedArray<uint8_t>* code) {
+  ReleaseArrayIfNotDeduplicated(code);
+}
+
+const LengthPrefixedArray<SrcMapElem>* CompiledMethodStorage::DeduplicateSrcMappingTable(
+    const ArrayRef<const SrcMapElem>& src_map) {
+  return AllocateOrDeduplicateArray(src_map, &dedupe_src_mapping_table_);
+}
+
+void CompiledMethodStorage::ReleaseSrcMappingTable(const LengthPrefixedArray<SrcMapElem>* src_map) {
+  ReleaseArrayIfNotDeduplicated(src_map);
+}
+
+const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateVMapTable(
+    const ArrayRef<const uint8_t>& table) {
+  return AllocateOrDeduplicateArray(table, &dedupe_vmap_table_);
+}
+
+void CompiledMethodStorage::ReleaseVMapTable(const LengthPrefixedArray<uint8_t>* table) {
+  ReleaseArrayIfNotDeduplicated(table);
+}
+
+const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateCFIInfo(
+    const ArrayRef<const uint8_t>& cfi_info) {
+  return AllocateOrDeduplicateArray(cfi_info, &dedupe_cfi_info_);
+}
+
+void CompiledMethodStorage::ReleaseCFIInfo(const LengthPrefixedArray<uint8_t>* cfi_info) {
+  ReleaseArrayIfNotDeduplicated(cfi_info);
+}
+
+const LengthPrefixedArray<LinkerPatch>* CompiledMethodStorage::DeduplicateLinkerPatches(
+    const ArrayRef<const LinkerPatch>& linker_patches) {
+  return AllocateOrDeduplicateArray(linker_patches, &dedupe_linker_patches_);
+}
+
+void CompiledMethodStorage::ReleaseLinkerPatches(
+    const LengthPrefixedArray<LinkerPatch>* linker_patches) {
+  ReleaseArrayIfNotDeduplicated(linker_patches);
+}
+
+}  // namespace art
diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h
new file mode 100644
index 0000000..8674abf
--- /dev/null
+++ b/compiler/driver/compiled_method_storage.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DRIVER_COMPILED_METHOD_STORAGE_H_
+#define ART_COMPILER_DRIVER_COMPILED_METHOD_STORAGE_H_
+
+#include <iosfwd>
+#include <memory>
+
+#include "base/length_prefixed_array.h"
+#include "base/macros.h"
+#include "utils/array_ref.h"
+#include "utils/dedupe_set.h"
+#include "utils/swap_space.h"
+
+namespace art {
+
+class LinkerPatch;
+class SrcMapElem;
+
+class CompiledMethodStorage {
+ public:
+  explicit CompiledMethodStorage(int swap_fd);
+  ~CompiledMethodStorage();
+
+  void DumpMemoryUsage(std::ostream& os, bool extended) const;
+
+  void SetDedupeEnabled(bool dedupe_enabled) {
+    dedupe_enabled_ = dedupe_enabled;
+  }
+  bool DedupeEnabled() const {
+    return dedupe_enabled_;
+  }
+
+  SwapAllocator<void> GetSwapSpaceAllocator() {
+    return SwapAllocator<void>(swap_space_.get());
+  }
+
+  const LengthPrefixedArray<uint8_t>* DeduplicateCode(const ArrayRef<const uint8_t>& code);
+  void ReleaseCode(const LengthPrefixedArray<uint8_t>* code);
+
+  const LengthPrefixedArray<SrcMapElem>* DeduplicateSrcMappingTable(
+      const ArrayRef<const SrcMapElem>& src_map);
+  void ReleaseSrcMappingTable(const LengthPrefixedArray<SrcMapElem>* src_map);
+
+  const LengthPrefixedArray<uint8_t>* DeduplicateVMapTable(const ArrayRef<const uint8_t>& table);
+  void ReleaseVMapTable(const LengthPrefixedArray<uint8_t>* table);
+
+  const LengthPrefixedArray<uint8_t>* DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info);
+  void ReleaseCFIInfo(const LengthPrefixedArray<uint8_t>* cfi_info);
+
+  const LengthPrefixedArray<LinkerPatch>* DeduplicateLinkerPatches(
+      const ArrayRef<const LinkerPatch>& linker_patches);
+  void ReleaseLinkerPatches(const LengthPrefixedArray<LinkerPatch>* linker_patches);
+
+ private:
+  template <typename T, typename DedupeSetType>
+  const LengthPrefixedArray<T>* AllocateOrDeduplicateArray(const ArrayRef<const T>& data,
+                                                           DedupeSetType* dedupe_set);
+
+  template <typename T>
+  void ReleaseArrayIfNotDeduplicated(const LengthPrefixedArray<T>* array);
+
+  // DeDuplication data structures.
+  template <typename ContentType>
+  class DedupeHashFunc;
+
+  template <typename T>
+  class LengthPrefixedArrayAlloc;
+
+  template <typename T>
+  using ArrayDedupeSet = DedupeSet<ArrayRef<const T>,
+                                   LengthPrefixedArray<T>,
+                                   LengthPrefixedArrayAlloc<T>,
+                                   size_t,
+                                   DedupeHashFunc<const T>,
+                                   4>;
+
+  // Swap pool and allocator used for native allocations. May be file-backed. Needs to be first
+  // as other fields rely on this.
+  std::unique_ptr<SwapSpace> swap_space_;
+
+  bool dedupe_enabled_;
+
+  ArrayDedupeSet<uint8_t> dedupe_code_;
+  ArrayDedupeSet<SrcMapElem> dedupe_src_mapping_table_;
+  ArrayDedupeSet<uint8_t> dedupe_vmap_table_;
+  ArrayDedupeSet<uint8_t> dedupe_cfi_info_;
+  ArrayDedupeSet<LinkerPatch> dedupe_linker_patches_;
+
+  DISALLOW_COPY_AND_ASSIGN(CompiledMethodStorage);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DRIVER_COMPILED_METHOD_STORAGE_H_
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
new file mode 100644
index 0000000..6863f42
--- /dev/null
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "compiled_method_storage.h"
+#include "compiled_method.h"
+#include "compiler_driver.h"
+#include "compiler_options.h"
+#include "dex/verification_results.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+
+namespace art {
+
+TEST(CompiledMethodStorage, Deduplicate) {
+  CompilerOptions compiler_options;
+  VerificationResults verification_results(&compiler_options);
+  DexFileToMethodInlinerMap method_inliner_map;
+  CompilerDriver driver(&compiler_options,
+                        &verification_results,
+                        &method_inliner_map,
+                        Compiler::kOptimizing,
+                        /* instruction_set_ */ kNone,
+                        /* instruction_set_features */ nullptr,
+                        /* boot_image */ false,
+                        /* app_image */ false,
+                        /* image_classes */ nullptr,
+                        /* compiled_classes */ nullptr,
+                        /* compiled_methods */ nullptr,
+                        /* thread_count */ 1u,
+                        /* dump_stats */ false,
+                        /* dump_passes */ false,
+                        /* timer */ nullptr,
+                        /* swap_fd */ -1,
+                        /* profile_compilation_info */ nullptr);
+  CompiledMethodStorage* storage = driver.GetCompiledMethodStorage();
+
+  ASSERT_TRUE(storage->DedupeEnabled());  // The default.
+
+  const uint8_t raw_code1[] = { 1u, 2u, 3u };
+  const uint8_t raw_code2[] = { 4u, 3u, 2u, 1u };
+  ArrayRef<const uint8_t> code[] = {
+      ArrayRef<const uint8_t>(raw_code1),
+      ArrayRef<const uint8_t>(raw_code2),
+  };
+  const SrcMapElem raw_src_map1[] = { { 1u, 2u }, { 3u, 4u }, { 5u, 6u } };
+  const SrcMapElem raw_src_map2[] = { { 8u, 7u }, { 6u, 5u }, { 4u, 3u }, { 2u, 1u } };
+  ArrayRef<const SrcMapElem> src_map[] = {
+      ArrayRef<const SrcMapElem>(raw_src_map1),
+      ArrayRef<const SrcMapElem>(raw_src_map2),
+  };
+  const uint8_t raw_vmap_table1[] = { 2, 4, 6 };
+  const uint8_t raw_vmap_table2[] = { 7, 5, 3, 1 };
+  ArrayRef<const uint8_t> vmap_table[] = {
+      ArrayRef<const uint8_t>(raw_vmap_table1),
+      ArrayRef<const uint8_t>(raw_vmap_table2),
+  };
+  const uint8_t raw_cfi_info1[] = { 1, 3, 5 };
+  const uint8_t raw_cfi_info2[] = { 8, 6, 4, 2 };
+  ArrayRef<const uint8_t> cfi_info[] = {
+      ArrayRef<const uint8_t>(raw_cfi_info1),
+      ArrayRef<const uint8_t>(raw_cfi_info2),
+  };
+  const LinkerPatch raw_patches1[] = {
+      LinkerPatch::CodePatch(0u, nullptr, 1u),
+      LinkerPatch::MethodPatch(4u, nullptr, 1u),
+  };
+  const LinkerPatch raw_patches2[] = {
+      LinkerPatch::CodePatch(0u, nullptr, 1u),
+      LinkerPatch::MethodPatch(4u, nullptr, 2u),
+  };
+  ArrayRef<const LinkerPatch> patches[] = {
+      ArrayRef<const LinkerPatch>(raw_patches1),
+      ArrayRef<const LinkerPatch>(raw_patches2),
+  };
+
+  std::vector<CompiledMethod*> compiled_methods;
+  compiled_methods.reserve(1u << 7);
+  for (auto&& c : code) {
+    for (auto&& s : src_map) {
+      for (auto&& v : vmap_table) {
+        for (auto&& f : cfi_info) {
+          for (auto&& p : patches) {
+            compiled_methods.push_back(CompiledMethod::SwapAllocCompiledMethod(
+                &driver, kNone, c, 0u, 0u, 0u, s, v, f, p));
+          }
+        }
+      }
+    }
+  }
+  constexpr size_t code_bit = 1u << 4;
+  constexpr size_t src_map_bit = 1u << 3;
+  constexpr size_t vmap_table_bit = 1u << 2;
+  constexpr size_t cfi_info_bit = 1u << 1;
+  constexpr size_t patches_bit = 1u << 0;
+  CHECK_EQ(compiled_methods.size(), 1u << 5);
+  for (size_t i = 0; i != compiled_methods.size(); ++i) {
+    for (size_t j = 0; j != compiled_methods.size(); ++j) {
+      CompiledMethod* lhs = compiled_methods[i];
+      CompiledMethod* rhs = compiled_methods[j];
+      bool same_code = ((i ^ j) & code_bit) == 0u;
+      bool same_src_map = ((i ^ j) & src_map_bit) == 0u;
+      bool same_vmap_table = ((i ^ j) & vmap_table_bit) == 0u;
+      bool same_cfi_info = ((i ^ j) & cfi_info_bit) == 0u;
+      bool same_patches = ((i ^ j) & patches_bit) == 0u;
+      ASSERT_EQ(same_code, lhs->GetQuickCode().data() == rhs->GetQuickCode().data())
+          << i << " " << j;
+      ASSERT_EQ(same_src_map, lhs->GetSrcMappingTable().data() == rhs->GetSrcMappingTable().data())
+          << i << " " << j;
+      ASSERT_EQ(same_vmap_table, lhs->GetVmapTable().data() == rhs->GetVmapTable().data())
+          << i << " " << j;
+      ASSERT_EQ(same_cfi_info, lhs->GetCFIInfo().data() == rhs->GetCFIInfo().data())
+          << i << " " << j;
+      ASSERT_EQ(same_patches, lhs->GetPatches().data() == rhs->GetPatches().data())
+          << i << " " << j;
+    }
+  }
+  for (CompiledMethod* method : compiled_methods) {
+    CompiledMethod::ReleaseSwapAllocatedCompiledMethod(&driver, method);
+  }
+}
+
+}  // namespace art
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 1a7dbe3..3a260f5 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -21,6 +21,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "dex_compilation_unit.h"
 #include "mirror/class_loader.h"
@@ -186,17 +187,7 @@
       } else {
         // Search dex file for localized ssb index, may fail if member's class is a parent
         // of the class mentioned in the dex file and there is no dex cache entry.
-        std::string temp;
-        const DexFile::StringId* string_id =
-            dex_file->FindStringId(resolved_member->GetDeclaringClass()->GetDescriptor(&temp));
-        if (string_id != nullptr) {
-          const DexFile::TypeId* type_id =
-             dex_file->FindTypeId(dex_file->GetIndexForStringId(*string_id));
-          if (type_id != nullptr) {
-            // medium path, needs check of static storage base being initialized
-            storage_idx = dex_file->GetIndexForTypeId(*type_id);
-          }
-        }
+        storage_idx = resolved_member->GetDeclaringClass()->FindTypeIndexInOtherDexFile(*dex_file);
       }
       if (storage_idx != DexFile::kDexNoIndex) {
         *storage_index = storage_idx;
@@ -268,18 +259,16 @@
     Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
     uint32_t method_idx, InvokeType invoke_type, bool check_incompatible_class_change) {
   DCHECK_EQ(class_loader.Get(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
-  ArtMethod* resolved_method = mUnit->GetClassLinker()->ResolveMethod(
-      *dex_cache->GetDexFile(), method_idx, dex_cache, class_loader, nullptr, invoke_type);
-  DCHECK_EQ(resolved_method == nullptr, soa.Self()->IsExceptionPending());
+  ArtMethod* resolved_method =
+      check_incompatible_class_change
+          ? mUnit->GetClassLinker()->ResolveMethod<ClassLinker::kForceICCECheck>(
+              *dex_cache->GetDexFile(), method_idx, dex_cache, class_loader, nullptr, invoke_type)
+          : mUnit->GetClassLinker()->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+              *dex_cache->GetDexFile(), method_idx, dex_cache, class_loader, nullptr, invoke_type);
   if (UNLIKELY(resolved_method == nullptr)) {
+    DCHECK(soa.Self()->IsExceptionPending());
     // Clean up any exception left by type resolution.
     soa.Self()->ClearException();
-    return nullptr;
-  }
-  if (check_incompatible_class_change &&
-      UNLIKELY(resolved_method->CheckIncompatibleClassChange(invoke_type))) {
-    // Silently return null on incompatible class change.
-    return nullptr;
   }
   return resolved_method;
 }
@@ -311,11 +300,31 @@
     MethodReference* target_method, const MethodReference* devirt_target,
     uintptr_t* direct_code, uintptr_t* direct_method) {
   // Don't try to fast-path if we don't understand the caller's class.
+  // Referrer_class is the class that this invoke is contained in.
   if (UNLIKELY(referrer_class == nullptr)) {
     return 0;
   }
-  mirror::Class* methods_class = resolved_method->GetDeclaringClass();
-  if (UNLIKELY(!referrer_class->CanAccessResolvedMethod(methods_class, resolved_method,
+  StackHandleScope<2> hs(soa.Self());
+  // Methods_class is the class refered to by the class_idx field of the methodId the method_idx is
+  // pointing to.
+  // For example in
+  //   .class LABC;
+  //   .super LDEF;
+  //   .method hi()V
+  //     ...
+  //     invoke-super {p0}, LDEF;->hi()V
+  //     ...
+  //   .end method
+  // the referrer_class is 'ABC' and the methods_class is DEF. Note that the methods class is 'DEF'
+  // even if 'DEF' inherits the method from it's superclass.
+  Handle<mirror::Class> methods_class(hs.NewHandle(mUnit->GetClassLinker()->ResolveType(
+      *target_method->dex_file,
+      target_method->dex_file->GetMethodId(target_method->dex_method_index).class_idx_,
+      dex_cache,
+      class_loader)));
+  DCHECK(methods_class.Get() != nullptr);
+  mirror::Class* methods_declaring_class = resolved_method->GetDeclaringClass();
+  if (UNLIKELY(!referrer_class->CanAccessResolvedMethod(methods_declaring_class, resolved_method,
                                                         dex_cache.Get(),
                                                         target_method->dex_method_index))) {
     return 0;
@@ -324,18 +333,31 @@
   // overridden (ie is final).
   const bool same_dex_file = target_method->dex_file == mUnit->GetDexFile();
   bool can_sharpen_virtual_based_on_type = same_dex_file &&
-      (*invoke_type == kVirtual) && (resolved_method->IsFinal() || methods_class->IsFinal());
+      (*invoke_type == kVirtual) && (resolved_method->IsFinal() ||
+                                     methods_declaring_class->IsFinal());
   // For invoke-super, ensure the vtable index will be correct to dispatch in the vtable of
   // the super class.
-  const size_t pointer_size = InstructionSetPointerSize(GetInstructionSet());
-  bool can_sharpen_super_based_on_type = same_dex_file && (*invoke_type == kSuper) &&
-      (referrer_class != methods_class) && referrer_class->IsSubClass(methods_class) &&
-      resolved_method->GetMethodIndex() < methods_class->GetVTableLength() &&
-      (methods_class->GetVTableEntry(
+  const PointerSize pointer_size = InstructionSetPointerSize(GetInstructionSet());
+  // TODO We should be able to sharpen if we are going into the boot image as well.
+  bool can_sharpen_super_based_on_type = same_dex_file &&
+      (*invoke_type == kSuper) &&
+      !methods_class->IsInterface() &&
+      (referrer_class != methods_declaring_class) &&
+      referrer_class->IsSubClass(methods_declaring_class) &&
+      resolved_method->GetMethodIndex() < methods_declaring_class->GetVTableLength() &&
+      (methods_declaring_class->GetVTableEntry(
           resolved_method->GetMethodIndex(), pointer_size) == resolved_method) &&
-      !resolved_method->IsAbstract();
+      resolved_method->IsInvokable();
+  // TODO We should be able to sharpen if we are going into the boot image as well.
+  bool can_sharpen_interface_super_based_on_type = same_dex_file &&
+      (*invoke_type == kSuper) &&
+      methods_class->IsInterface() &&
+      methods_class->IsAssignableFrom(referrer_class) &&
+      resolved_method->IsInvokable();
 
-  if (can_sharpen_virtual_based_on_type || can_sharpen_super_based_on_type) {
+  if (can_sharpen_virtual_based_on_type ||
+      can_sharpen_super_based_on_type ||
+      can_sharpen_interface_super_based_on_type) {
     // Sharpen a virtual call into a direct call. The method_idx is into referrer's
     // dex cache, check that this resolved method is where we expect it.
     CHECK_EQ(target_method->dex_file, mUnit->GetDexFile());
@@ -365,20 +387,18 @@
     ArtMethod* called_method;
     ClassLinker* class_linker = mUnit->GetClassLinker();
     if (LIKELY(devirt_target->dex_file == mUnit->GetDexFile())) {
-      called_method = class_linker->ResolveMethod(
+      called_method = class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
           *devirt_target->dex_file, devirt_target->dex_method_index, dex_cache, class_loader,
           nullptr, kVirtual);
     } else {
-      StackHandleScope<1> hs(soa.Self());
-      auto target_dex_cache(hs.NewHandle(class_linker->RegisterDexFile(
-          *devirt_target->dex_file,
-          class_linker->GetOrCreateAllocatorForClassLoader(class_loader.Get()))));
-      called_method = class_linker->ResolveMethod(
+      auto target_dex_cache(hs.NewHandle(class_linker->RegisterDexFile(*devirt_target->dex_file,
+                                                                       class_loader.Get())));
+      called_method = class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
           *devirt_target->dex_file, devirt_target->dex_method_index, target_dex_cache,
           class_loader, nullptr, kVirtual);
     }
     CHECK(called_method != nullptr);
-    CHECK(!called_method->IsAbstract());
+    CHECK(called_method->IsInvokable());
     int stats_flags = kFlagMethodResolved;
     GetCodeAndMethodForDirectCall(/*out*/invoke_type,
                                   kDirect,  // Sharp type
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index b956584..758cd93 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -16,9 +16,6 @@
 
 #include "compiler_driver.h"
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
-#include <utils/Trace.h>
-
 #include <unordered_set>
 #include <vector>
 #include <unistd.h>
@@ -29,7 +26,10 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/bit_vector.h"
+#include "base/enums.h"
 #include "base/stl_util.h"
+#include "base/systrace.h"
 #include "base/time_utils.h"
 #include "base/timing_logger.h"
 #include "class_linker-inl.h"
@@ -39,16 +39,15 @@
 #include "compiler_driver-inl.h"
 #include "dex_compilation_unit.h"
 #include "dex_file-inl.h"
+#include "dex_instruction-inl.h"
 #include "dex/dex_to_dex_compiler.h"
 #include "dex/verification_results.h"
 #include "dex/verified_method.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_options.h"
-#include "elf_writer_quick.h"
 #include "jni_internal.h"
 #include "object_lock.h"
-#include "profiler.h"
 #include "runtime.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap.h"
@@ -68,6 +67,7 @@
 #include "thread_pool.h"
 #include "trampolines/trampoline_compiler.h"
 #include "transaction.h"
+#include "utils/array_ref.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "utils/swap_space.h"
 #include "verifier/method_verifier.h"
@@ -77,12 +77,8 @@
 
 static constexpr bool kTimeCompileMethod = !kIsDebugBuild;
 
-// Whether to produce 64-bit ELF files for 64-bit targets.
-static constexpr bool kProduce64BitELFFiles = true;
-
-// Whether classes-to-compile and methods-to-compile are only applied to the boot image, or, when
-// given, too all compilations.
-static constexpr bool kRestrictCompilationFiltersToImage = true;
+// Print additional info during profile guided compilation.
+static constexpr bool kDebugProfileGuidedCompilation = false;
 
 static double Percentage(size_t x, size_t y) {
   return 100.0 * (static_cast<double>(x)) / (static_cast<double>(x + y));
@@ -335,101 +331,85 @@
   DISALLOW_COPY_AND_ASSIGN(AOTCompilationStats);
 };
 
-CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options,
-                               VerificationResults* verification_results,
-                               DexFileToMethodInlinerMap* method_inliner_map,
-                               Compiler::Kind compiler_kind,
-                               InstructionSet instruction_set,
-                               const InstructionSetFeatures* instruction_set_features,
-                               bool image, std::unordered_set<std::string>* image_classes,
-                               std::unordered_set<std::string>* compiled_classes,
-                               std::unordered_set<std::string>* compiled_methods,
-                               size_t thread_count, bool dump_stats, bool dump_passes,
-                               const std::string& dump_cfg_file_name, bool dump_cfg_append,
-                               CumulativeLogger* timer, int swap_fd,
-                               const std::string& profile_file)
-    : swap_space_(swap_fd == -1 ? nullptr : new SwapSpace(swap_fd, 10 * MB)),
-      swap_space_allocator_(new SwapAllocator<void>(swap_space_.get())),
-      profile_present_(false), compiler_options_(compiler_options),
+class CompilerDriver::DexFileMethodSet {
+ public:
+  explicit DexFileMethodSet(const DexFile& dex_file)
+    : dex_file_(dex_file),
+      method_indexes_(dex_file.NumMethodIds(), false, Allocator::GetMallocAllocator()) {
+  }
+  DexFileMethodSet(DexFileMethodSet&& other) = default;
+
+  const DexFile& GetDexFile() const { return dex_file_; }
+
+  BitVector& GetMethodIndexes() { return method_indexes_; }
+  const BitVector& GetMethodIndexes() const { return method_indexes_; }
+
+ private:
+  const DexFile& dex_file_;
+  BitVector method_indexes_;
+};
+
+CompilerDriver::CompilerDriver(
+    const CompilerOptions* compiler_options,
+    VerificationResults* verification_results,
+    DexFileToMethodInlinerMap* method_inliner_map,
+    Compiler::Kind compiler_kind,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features,
+    bool boot_image,
+    bool app_image,
+    std::unordered_set<std::string>* image_classes,
+    std::unordered_set<std::string>* compiled_classes,
+    std::unordered_set<std::string>* compiled_methods,
+    size_t thread_count,
+    bool dump_stats,
+    bool dump_passes,
+    CumulativeLogger* timer,
+    int swap_fd,
+    const ProfileCompilationInfo* profile_compilation_info)
+    : compiler_options_(compiler_options),
       verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
       compiler_(Compiler::Create(this, compiler_kind)),
       compiler_kind_(compiler_kind),
       instruction_set_(instruction_set),
       instruction_set_features_(instruction_set_features),
-      freezing_constructor_lock_("freezing constructor lock"),
+      requires_constructor_barrier_lock_("constructor barrier lock"),
       compiled_classes_lock_("compiled classes lock"),
       compiled_methods_lock_("compiled method lock"),
       compiled_methods_(MethodTable::key_compare()),
       non_relative_linker_patch_count_(0u),
-      image_(image),
+      boot_image_(boot_image),
+      app_image_(app_image),
       image_classes_(image_classes),
       classes_to_compile_(compiled_classes),
       methods_to_compile_(compiled_methods),
       had_hard_verifier_failure_(false),
-      thread_count_(thread_count),
+      parallel_thread_count_(thread_count),
       stats_(new AOTCompilationStats),
-      dedupe_enabled_(true),
       dump_stats_(dump_stats),
       dump_passes_(dump_passes),
-      dump_cfg_file_name_(dump_cfg_file_name),
-      dump_cfg_append_(dump_cfg_append),
       timings_logger_(timer),
       compiler_context_(nullptr),
-      support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64),
-      dedupe_code_("dedupe code", *swap_space_allocator_),
-      dedupe_src_mapping_table_("dedupe source mapping table", *swap_space_allocator_),
-      dedupe_mapping_table_("dedupe mapping table", *swap_space_allocator_),
-      dedupe_vmap_table_("dedupe vmap table", *swap_space_allocator_),
-      dedupe_gc_map_("dedupe gc map", *swap_space_allocator_),
-      dedupe_cfi_info_("dedupe cfi info", *swap_space_allocator_) {
+      support_boot_image_fixup_(instruction_set != kMips64),
+      dex_files_for_oat_file_(nullptr),
+      compiled_method_storage_(swap_fd),
+      profile_compilation_info_(profile_compilation_info),
+      max_arena_alloc_(0),
+      dex_to_dex_references_lock_("dex-to-dex references lock"),
+      dex_to_dex_references_(),
+      current_dex_to_dex_methods_(nullptr) {
   DCHECK(compiler_options_ != nullptr);
-  DCHECK(verification_results_ != nullptr);
   DCHECK(method_inliner_map_ != nullptr);
 
   compiler_->Init();
 
-  CHECK_EQ(image_, image_classes_.get() != nullptr);
-
-  // Read the profile file if one is provided.
-  if (!profile_file.empty()) {
-    profile_present_ = profile_file_.LoadFile(profile_file);
-    if (profile_present_) {
-      LOG(INFO) << "Using profile data form file " << profile_file;
-    } else {
-      LOG(INFO) << "Failed to load profile file " << profile_file;
-    }
+  if (compiler_options->VerifyOnlyProfile()) {
+    CHECK(profile_compilation_info_ != nullptr) << "Requires profile";
   }
-}
-
-SwapVector<uint8_t>* CompilerDriver::DeduplicateCode(const ArrayRef<const uint8_t>& code) {
-  DCHECK(dedupe_enabled_);
-  return dedupe_code_.Add(Thread::Current(), code);
-}
-
-SwapSrcMap* CompilerDriver::DeduplicateSrcMappingTable(const ArrayRef<SrcMapElem>& src_map) {
-  DCHECK(dedupe_enabled_);
-  return dedupe_src_mapping_table_.Add(Thread::Current(), src_map);
-}
-
-SwapVector<uint8_t>* CompilerDriver::DeduplicateMappingTable(const ArrayRef<const uint8_t>& code) {
-  DCHECK(dedupe_enabled_);
-  return dedupe_mapping_table_.Add(Thread::Current(), code);
-}
-
-SwapVector<uint8_t>* CompilerDriver::DeduplicateVMapTable(const ArrayRef<const uint8_t>& code) {
-  DCHECK(dedupe_enabled_);
-  return dedupe_vmap_table_.Add(Thread::Current(), code);
-}
-
-SwapVector<uint8_t>* CompilerDriver::DeduplicateGCMap(const ArrayRef<const uint8_t>& code) {
-  DCHECK(dedupe_enabled_);
-  return dedupe_gc_map_.Add(Thread::Current(), code);
-}
-
-SwapVector<uint8_t>* CompilerDriver::DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info) {
-  DCHECK(dedupe_enabled_);
-  return dedupe_cfi_info_.Add(Thread::Current(), cfi_info);
+  if (boot_image_) {
+    CHECK(image_classes_.get() != nullptr) << "Expected image classes for boot image";
+  }
 }
 
 CompilerDriver::~CompilerDriver() {
@@ -447,32 +427,37 @@
   compiler_->UnInit();
 }
 
+
 #define CREATE_TRAMPOLINE(type, abi, offset) \
     if (Is64BitInstructionSet(instruction_set_)) { \
       return CreateTrampoline64(instruction_set_, abi, \
-                                type ## _ENTRYPOINT_OFFSET(8, offset)); \
+                                type ## _ENTRYPOINT_OFFSET(PointerSize::k64, offset)); \
     } else { \
       return CreateTrampoline32(instruction_set_, abi, \
-                                type ## _ENTRYPOINT_OFFSET(4, offset)); \
+                                type ## _ENTRYPOINT_OFFSET(PointerSize::k32, offset)); \
     }
 
-const std::vector<uint8_t>* CompilerDriver::CreateJniDlsymLookup() const {
+std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateJniDlsymLookup() const {
   CREATE_TRAMPOLINE(JNI, kJniAbi, pDlsymLookup)
 }
 
-const std::vector<uint8_t>* CompilerDriver::CreateQuickGenericJniTrampoline() const {
+std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateQuickGenericJniTrampoline()
+    const {
   CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickGenericJniTrampoline)
 }
 
-const std::vector<uint8_t>* CompilerDriver::CreateQuickImtConflictTrampoline() const {
+std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateQuickImtConflictTrampoline()
+    const {
   CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickImtConflictTrampoline)
 }
 
-const std::vector<uint8_t>* CompilerDriver::CreateQuickResolutionTrampoline() const {
+std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateQuickResolutionTrampoline()
+    const {
   CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickResolutionTrampoline)
 }
 
-const std::vector<uint8_t>* CompilerDriver::CreateQuickToInterpreterBridge() const {
+std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateQuickToInterpreterBridge()
+    const {
   CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickToInterpreterBridge)
 }
 #undef CREATE_TRAMPOLINE
@@ -481,24 +466,27 @@
                                 const std::vector<const DexFile*>& dex_files,
                                 TimingLogger* timings) {
   DCHECK(!Runtime::Current()->IsStarted());
-  std::unique_ptr<ThreadPool> thread_pool(
-      new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
+
+  InitializeThreadPools();
+
   VLOG(compiler) << "Before precompile " << GetMemoryUsageString(false);
   // Precompile:
   // 1) Load image classes
   // 2) Resolve all classes
   // 3) Attempt to verify all classes
   // 4) Attempt to initialize image classes, and trivially initialized classes
-  PreCompile(class_loader, dex_files, thread_pool.get(), timings);
+  PreCompile(class_loader, dex_files, timings);
   // Compile:
   // 1) Compile all classes and methods enabled for compilation. May fall back to dex-to-dex
   //    compilation.
   if (!GetCompilerOptions().VerifyAtRuntime()) {
-    Compile(class_loader, dex_files, thread_pool.get(), timings);
+    Compile(class_loader, dex_files, timings);
   }
   if (dump_stats_) {
     stats_->Dump();
   }
+
+  FreeThreadPools();
 }
 
 static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel(
@@ -506,7 +494,7 @@
     const DexFile& dex_file, const DexFile::ClassDef& class_def)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   auto* const runtime = Runtime::Current();
-  if (runtime->UseJit() || driver.GetCompilerOptions().VerifyAtRuntime()) {
+  if (runtime->UseJitCompilation() || driver.GetCompilerOptions().VerifyAtRuntime()) {
     // Verify at runtime shouldn't dex to dex since we didn't resolve of verify.
     return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
   }
@@ -583,43 +571,15 @@
   uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0;
   MethodReference method_ref(&dex_file, method_idx);
 
-  if ((access_flags & kAccNative) != 0) {
-    // Are we interpreting only and have support for generic JNI down calls?
-    if (!driver->GetCompilerOptions().IsCompilationEnabled() &&
-        InstructionSetHasGenericJniStub(driver->GetInstructionSet())) {
-      // Leaving this empty will trigger the generic JNI version
-    } else {
-      compiled_method = driver->GetCompiler()->JniCompile(access_flags, method_idx, dex_file);
-      CHECK(compiled_method != nullptr);
-    }
-  } else if ((access_flags & kAccAbstract) != 0) {
-    // Abstract methods don't have code.
-  } else if (Runtime::Current()->IsAotCompiler()) {
-    const VerifiedMethod* verified_method =
-        driver->GetVerificationResults()->GetVerifiedMethod(method_ref);
-    bool compile = compilation_enabled &&
-        // Basic checks, e.g., not <clinit>.
-        driver->GetVerificationResults()
-            ->IsCandidateForCompilation(method_ref, access_flags) &&
-        // Did not fail to create VerifiedMethod metadata.
-        verified_method != nullptr &&
-        // Do not have failures that should punt to the interpreter.
-        !verified_method->HasRuntimeThrow() &&
-        (verified_method->GetEncounteredVerificationFailures() &
-            (verifier::VERIFY_ERROR_FORCE_INTERPRETER | verifier::VERIFY_ERROR_LOCKING)) == 0 &&
-        // Is eligable for compilation by methods-to-compile filter.
-        driver->IsMethodToCompile(method_ref);
-    if (compile) {
-      // NOTE: if compiler declines to compile this method, it will return null.
-      compiled_method = driver->GetCompiler()->Compile(code_item, access_flags, invoke_type,
-                                                       class_def_idx, method_idx, class_loader,
-                                                       dex_file, dex_cache);
-    }
-    if (compiled_method == nullptr &&
-        dex_to_dex_compilation_level != optimizer::DexToDexCompilationLevel::kDontDexToDexCompile) {
-      // TODO: add a command-line option to disable DEX-to-DEX compilation ?
-      // Do not optimize if a VerifiedMethod is missing. SafeCast elision, for example, relies on
-      // it.
+  if (driver->GetCurrentDexToDexMethods() != nullptr) {
+    // This is the second pass when we dex-to-dex compile previously marked methods.
+    // TODO: Refactor the compilation to avoid having to distinguish the two passes
+    // here. That should be done on a higher level. http://b/29089975
+    if (driver->GetCurrentDexToDexMethods()->IsBitSet(method_idx)) {
+      const VerifiedMethod* verified_method =
+          driver->GetVerificationResults()->GetVerifiedMethod(method_ref);
+      // Do not optimize if a VerifiedMethod is missing. SafeCast elision,
+      // for example, relies on it.
       compiled_method = optimizer::ArtCompileDEX(
           driver,
           code_item,
@@ -633,13 +593,77 @@
               ? dex_to_dex_compilation_level
               : optimizer::DexToDexCompilationLevel::kRequired);
     }
+  } else if ((access_flags & kAccNative) != 0) {
+    // Are we extracting only and have support for generic JNI down calls?
+    if (!driver->GetCompilerOptions().IsJniCompilationEnabled() &&
+        InstructionSetHasGenericJniStub(driver->GetInstructionSet())) {
+      // Leaving this empty will trigger the generic JNI version
+    } else {
+      // Look-up the ArtMethod associated with this code_item (if any)
+      // -- It is later used to lookup any [optimization] annotations for this method.
+      ScopedObjectAccess soa(self);
+      StackHandleScope<1> hs(soa.Self());
+      Handle<mirror::ClassLoader> class_loader_handle(hs.NewHandle(
+          soa.Decode<mirror::ClassLoader*>(class_loader)));
+
+      // TODO: Lookup annotation from DexFile directly without resolving method.
+      ArtMethod* method =
+          Runtime::Current()->GetClassLinker()->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+              dex_file,
+              method_idx,
+              dex_cache,
+              class_loader_handle,
+              /* referrer */ nullptr,
+              invoke_type);
+
+      bool fast_native = false;
+      if (LIKELY(method != nullptr)) {
+        fast_native = method->IsAnnotatedWithFastNative();
+      } else {
+        // Failed method resolutions happen very rarely, e.g. ancestor class cannot be resolved.
+        DCHECK(self->IsExceptionPending());
+        self->ClearException();
+      }
+
+      Compiler::JniOptimizationFlags optimization_flags =
+          fast_native ? Compiler::kFastNative : Compiler::kNone;
+      compiled_method = driver->GetCompiler()->JniCompile(access_flags,
+                                                          method_idx,
+                                                          dex_file,
+                                                          optimization_flags);
+      CHECK(compiled_method != nullptr);
+    }
+  } else if ((access_flags & kAccAbstract) != 0) {
+    // Abstract methods don't have code.
   } else {
-    // This is for the JIT compiler, which has already ensured the class is verified.
-    // We can go straight to compiling.
-    DCHECK(Runtime::Current()->UseJit());
-    compiled_method = driver->GetCompiler()->Compile(code_item, access_flags, invoke_type,
-                                                     class_def_idx, method_idx, class_loader,
-                                                     dex_file, dex_cache);
+    const VerifiedMethod* verified_method =
+        driver->GetVerificationResults()->GetVerifiedMethod(method_ref);
+    bool compile = compilation_enabled &&
+        // Basic checks, e.g., not <clinit>.
+        driver->GetVerificationResults()
+            ->IsCandidateForCompilation(method_ref, access_flags) &&
+        // Did not fail to create VerifiedMethod metadata.
+        verified_method != nullptr &&
+        // Do not have failures that should punt to the interpreter.
+        !verified_method->HasRuntimeThrow() &&
+        (verified_method->GetEncounteredVerificationFailures() &
+            (verifier::VERIFY_ERROR_FORCE_INTERPRETER | verifier::VERIFY_ERROR_LOCKING)) == 0 &&
+        // Is eligable for compilation by methods-to-compile filter.
+        driver->IsMethodToCompile(method_ref) &&
+        driver->ShouldCompileBasedOnProfile(method_ref);
+
+    if (compile) {
+      // NOTE: if compiler declines to compile this method, it will return null.
+      compiled_method = driver->GetCompiler()->Compile(code_item, access_flags, invoke_type,
+                                                       class_def_idx, method_idx, class_loader,
+                                                       dex_file, dex_cache);
+    }
+    if (compiled_method == nullptr &&
+        dex_to_dex_compilation_level != optimizer::DexToDexCompilationLevel::kDontDexToDexCompile) {
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      // TODO: add a command-line option to disable DEX-to-DEX compilation ?
+      driver->MarkForDexToDexCompilation(self, method_ref);
+    }
   }
   if (kTimeCompileMethod) {
     uint64_t duration_ns = NanoTime() - start_ns;
@@ -664,12 +688,6 @@
     driver->AddCompiledMethod(method_ref, compiled_method, non_relative_linker_patch_count);
   }
 
-  // Done compiling, delete the verified method to reduce native memory usage. Do not delete in
-  // optimizing compiler, which may need the verified method again for inlining.
-  if (driver->GetCompilerKind() != Compiler::kOptimizing) {
-    driver->GetVerificationResults()->RemoveVerifiedMethod(method_ref);
-  }
-
   if (self->IsExceptionPending()) {
     ScopedObjectAccess soa(self);
     LOG(FATAL) << "Unexpected exception compiling: " << PrettyMethod(method_idx, dex_file) << "\n"
@@ -704,8 +722,9 @@
   std::vector<const DexFile*> dex_files;
   dex_files.push_back(dex_file);
 
-  std::unique_ptr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", 0U));
-  PreCompile(jclass_loader, dex_files, thread_pool.get(), timings);
+  InitializeThreadPools();
+
+  PreCompile(jclass_loader, dex_files, timings);
 
   // Can we run DEX-to-DEX compiler on this class ?
   optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level =
@@ -715,6 +734,7 @@
                                   *dex_file,
                                   dex_file->GetClassDef(class_def_idx));
 
+  DCHECK(current_dex_to_dex_methods_ == nullptr);
   CompileMethod(self,
                 this,
                 code_item,
@@ -728,76 +748,206 @@
                 true,
                 dex_cache);
 
+  ArrayRef<DexFileMethodSet> dex_to_dex_references;
+  {
+    // From this point on, we shall not modify dex_to_dex_references_, so
+    // just grab a reference to it that we use without holding the mutex.
+    MutexLock lock(Thread::Current(), dex_to_dex_references_lock_);
+    dex_to_dex_references = ArrayRef<DexFileMethodSet>(dex_to_dex_references_);
+  }
+  if (!dex_to_dex_references.empty()) {
+    DCHECK_EQ(dex_to_dex_references.size(), 1u);
+    DCHECK(&dex_to_dex_references[0].GetDexFile() == dex_file);
+    current_dex_to_dex_methods_ = &dex_to_dex_references.front().GetMethodIndexes();
+    DCHECK(current_dex_to_dex_methods_->IsBitSet(method_idx));
+    DCHECK_EQ(current_dex_to_dex_methods_->NumSetBits(), 1u);
+    CompileMethod(self,
+                  this,
+                  code_item,
+                  access_flags,
+                  invoke_type,
+                  class_def_idx,
+                  method_idx,
+                  jclass_loader,
+                  *dex_file,
+                  dex_to_dex_compilation_level,
+                  true,
+                  dex_cache);
+    current_dex_to_dex_methods_ = nullptr;
+  }
+
+  FreeThreadPools();
+
   self->GetJniEnv()->DeleteGlobalRef(jclass_loader);
 }
 
-CompiledMethod* CompilerDriver::CompileArtMethod(Thread* self, ArtMethod* method) {
-  const uint32_t method_idx = method->GetDexMethodIndex();
-  const uint32_t access_flags = method->GetAccessFlags();
-  const InvokeType invoke_type = method->GetInvokeType();
-  StackHandleScope<2> hs(self);
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      method->GetDeclaringClass()->GetClassLoader()));
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
-  jobject jclass_loader = class_loader.ToJObject();
-  const DexFile* dex_file = method->GetDexFile();
-  const uint16_t class_def_idx = method->GetClassDefIndex();
-  const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_idx);
-  optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level =
-      GetDexToDexCompilationLevel(self, *this, class_loader, *dex_file, class_def);
-  const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
-  // Go to native so that we don't block GC during compilation.
-  ScopedThreadSuspension sts(self, kNative);
-  CompileMethod(self,
-                this,
-                code_item,
-                access_flags,
-                invoke_type,
-                class_def_idx,
-                method_idx,
-                jclass_loader,
-                *dex_file,
-                dex_to_dex_compilation_level,
-                true,
-                dex_cache);
-  auto* compiled_method = GetCompiledMethod(MethodReference(dex_file, method_idx));
-  return compiled_method;
-}
+void CompilerDriver::Resolve(jobject class_loader,
+                             const std::vector<const DexFile*>& dex_files,
+                             TimingLogger* timings) {
+  // Resolution allocates classes and needs to run single-threaded to be deterministic.
+  bool force_determinism = GetCompilerOptions().IsForceDeterminism();
+  ThreadPool* resolve_thread_pool = force_determinism
+                                     ? single_thread_pool_.get()
+                                     : parallel_thread_pool_.get();
+  size_t resolve_thread_count = force_determinism ? 1U : parallel_thread_count_;
 
-void CompilerDriver::Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                             ThreadPool* thread_pool, TimingLogger* timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
-    ResolveDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
+    ResolveDexFile(class_loader,
+                   *dex_file,
+                   dex_files,
+                   resolve_thread_pool,
+                   resolve_thread_count,
+                   timings);
   }
 }
 
-void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                                ThreadPool* thread_pool, TimingLogger* timings) {
+// Resolve const-strings in the code. Done to have deterministic allocation behavior. Right now
+// this is single-threaded for simplicity.
+// TODO: Collect the relevant string indices in parallel, then allocate them sequentially in a
+//       stable order.
+
+static void ResolveConstStrings(CompilerDriver* driver,
+                                const DexFile& dex_file,
+                                const DexFile::CodeItem* code_item) {
+  if (code_item == nullptr) {
+    // Abstract or native method.
+    return;
+  }
+
+  const uint16_t* code_ptr = code_item->insns_;
+  const uint16_t* code_end = code_item->insns_ + code_item->insns_size_in_code_units_;
+
+  while (code_ptr < code_end) {
+    const Instruction* inst = Instruction::At(code_ptr);
+    switch (inst->Opcode()) {
+      case Instruction::CONST_STRING: {
+        uint32_t string_index = inst->VRegB_21c();
+        driver->CanAssumeStringIsPresentInDexCache(dex_file, string_index);
+        break;
+      }
+      case Instruction::CONST_STRING_JUMBO: {
+        uint32_t string_index = inst->VRegB_31c();
+        driver->CanAssumeStringIsPresentInDexCache(dex_file, string_index);
+        break;
+      }
+
+      default:
+        break;
+    }
+
+    code_ptr += inst->SizeInCodeUnits();
+  }
+}
+
+static void ResolveConstStrings(CompilerDriver* driver,
+                                const std::vector<const DexFile*>& dex_files,
+                                TimingLogger* timings) {
+  for (const DexFile* dex_file : dex_files) {
+    TimingLogger::ScopedTiming t("Resolve const-string Strings", timings);
+
+    size_t class_def_count = dex_file->NumClassDefs();
+    for (size_t class_def_index = 0; class_def_index < class_def_count; ++class_def_index) {
+      const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
+
+      const uint8_t* class_data = dex_file->GetClassData(class_def);
+      if (class_data == nullptr) {
+        // empty class, probably a marker interface
+        continue;
+      }
+
+      ClassDataItemIterator it(*dex_file, class_data);
+      // Skip fields
+      while (it.HasNextStaticField()) {
+        it.Next();
+      }
+      while (it.HasNextInstanceField()) {
+        it.Next();
+      }
+
+      bool compilation_enabled = driver->IsClassToCompile(
+          dex_file->StringByTypeIdx(class_def.class_idx_));
+      if (!compilation_enabled) {
+        // Compilation is skipped, do not resolve const-string in code of this class.
+        // TODO: Make sure that inlining honors this.
+        continue;
+      }
+
+      // Direct methods.
+      int64_t previous_direct_method_idx = -1;
+      while (it.HasNextDirectMethod()) {
+        uint32_t method_idx = it.GetMemberIndex();
+        if (method_idx == previous_direct_method_idx) {
+          // smali can create dex files with two encoded_methods sharing the same method_idx
+          // http://code.google.com/p/smali/issues/detail?id=119
+          it.Next();
+          continue;
+        }
+        previous_direct_method_idx = method_idx;
+        ResolveConstStrings(driver, *dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      // Virtual methods.
+      int64_t previous_virtual_method_idx = -1;
+      while (it.HasNextVirtualMethod()) {
+        uint32_t method_idx = it.GetMemberIndex();
+        if (method_idx == previous_virtual_method_idx) {
+          // smali can create dex files with two encoded_methods sharing the same method_idx
+          // http://code.google.com/p/smali/issues/detail?id=119
+          it.Next();
+          continue;
+        }
+        previous_virtual_method_idx = method_idx;
+        ResolveConstStrings(driver, *dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      DCHECK(!it.HasNext());
+    }
+  }
+}
+
+inline void CompilerDriver::CheckThreadPools() {
+  DCHECK(parallel_thread_pool_ != nullptr);
+  DCHECK(single_thread_pool_ != nullptr);
+}
+
+void CompilerDriver::PreCompile(jobject class_loader,
+                                const std::vector<const DexFile*>& dex_files,
+                                TimingLogger* timings) {
+  CheckThreadPools();
+
   LoadImageClasses(timings);
   VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(false);
 
   const bool verification_enabled = compiler_options_->IsVerificationEnabled();
   const bool never_verify = compiler_options_->NeverVerify();
+  const bool verify_only_profile = compiler_options_->VerifyOnlyProfile();
 
   // We need to resolve for never_verify since it needs to run dex to dex to add the
   // RETURN_VOID_NO_BARRIER.
-  if (never_verify || verification_enabled) {
-    Resolve(class_loader, dex_files, thread_pool, timings);
+  // Let the verifier resolve as needed for the verify_only_profile case.
+  if ((never_verify || verification_enabled) && !verify_only_profile) {
+    Resolve(class_loader, dex_files, timings);
     VLOG(compiler) << "Resolve: " << GetMemoryUsageString(false);
   }
 
   if (never_verify) {
     VLOG(compiler) << "Verify none mode specified, skipping verification.";
-    SetVerified(class_loader, dex_files, thread_pool, timings);
+    SetVerified(class_loader, dex_files, timings);
   }
 
   if (!verification_enabled) {
     return;
   }
 
-  Verify(class_loader, dex_files, thread_pool, timings);
+  if (GetCompilerOptions().IsForceDeterminism() && IsBootImage()) {
+    // Resolve strings from const-string. Do this now to have a deterministic image.
+    ResolveConstStrings(this, dex_files, timings);
+    VLOG(compiler) << "Resolve const-strings: " << GetMemoryUsageString(false);
+  }
+
+  Verify(class_loader, dex_files, timings);
   VLOG(compiler) << "Verify: " << GetMemoryUsageString(false);
 
   if (had_hard_verifier_failure_ && GetCompilerOptions().AbortOnHardVerifierFailure()) {
@@ -805,7 +955,7 @@
                << "situations. Please check the log.";
   }
 
-  InitializeClasses(class_loader, dex_files, thread_pool, timings);
+  InitializeClasses(class_loader, dex_files, timings);
   VLOG(compiler) << "InitializeClasses: " << GetMemoryUsageString(false);
 
   UpdateImageClasses(timings);
@@ -813,19 +963,16 @@
 }
 
 bool CompilerDriver::IsImageClass(const char* descriptor) const {
-  if (!IsImage()) {
-    // NOTE: Currently unreachable, all callers check IsImage().
-    return false;
-  } else {
+  if (image_classes_ != nullptr) {
+    // If we have a set of image classes, use those.
     return image_classes_->find(descriptor) != image_classes_->end();
   }
+  // No set of image classes, assume we include all the classes.
+  // NOTE: Currently only reachable from InitImageMethodVisitor for the app image case.
+  return !IsBootImage();
 }
 
 bool CompilerDriver::IsClassToCompile(const char* descriptor) const {
-  if (kRestrictCompilationFiltersToImage && !IsImage()) {
-    return true;
-  }
-
   if (classes_to_compile_ == nullptr) {
     return true;
   }
@@ -833,10 +980,6 @@
 }
 
 bool CompilerDriver::IsMethodToCompile(const MethodReference& method_ref) const {
-  if (kRestrictCompilationFiltersToImage && !IsImage()) {
-    return true;
-  }
-
   if (methods_to_compile_ == nullptr) {
     return true;
   }
@@ -845,25 +988,54 @@
   return methods_to_compile_->find(tmp.c_str()) != methods_to_compile_->end();
 }
 
+bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_ref) const {
+  if (profile_compilation_info_ == nullptr) {
+    // If we miss profile information it means that we don't do a profile guided compilation.
+    // Return true, and let the other filters decide if the method should be compiled.
+    return true;
+  }
+  bool result = profile_compilation_info_->ContainsMethod(method_ref);
+
+  if (kDebugProfileGuidedCompilation) {
+    LOG(INFO) << "[ProfileGuidedCompilation] "
+        << (result ? "Compiled" : "Skipped") << " method:"
+        << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file, true);
+  }
+  return result;
+}
+
+bool CompilerDriver::ShouldVerifyClassBasedOnProfile(const DexFile& dex_file,
+                                                     uint16_t class_idx) const {
+  if (!compiler_options_->VerifyOnlyProfile()) {
+    // No profile, verify everything.
+    return true;
+  }
+  DCHECK(profile_compilation_info_ != nullptr);
+  bool result = profile_compilation_info_->ContainsClass(dex_file, class_idx);
+  if (kDebugProfileGuidedCompilation) {
+    LOG(INFO) << "[ProfileGuidedCompilation] "
+        << (result ? "Verified" : "Skipped") << " method:"
+        << dex_file.GetClassDescriptor(dex_file.GetClassDef(class_idx));
+  }
+  return result;
+}
+
 class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor {
  public:
-  ResolveCatchBlockExceptionsClassVisitor(
+  explicit ResolveCatchBlockExceptionsClassVisitor(
       std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve)
      : exceptions_to_resolve_(exceptions_to_resolve) {}
 
-  virtual bool Visit(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  virtual bool operator()(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     const auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-    for (auto& m : c->GetVirtualMethods(pointer_size)) {
-      ResolveExceptionsForMethod(&m, pointer_size);
-    }
-    for (auto& m : c->GetDirectMethods(pointer_size)) {
+    for (auto& m : c->GetMethods(pointer_size)) {
       ResolveExceptionsForMethod(&m, pointer_size);
     }
     return true;
   }
 
  private:
-  void ResolveExceptionsForMethod(ArtMethod* method_handle, size_t pointer_size)
+  void ResolveExceptionsForMethod(ArtMethod* method_handle, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     const DexFile::CodeItem* code_item = method_handle->GetCodeItem();
     if (code_item == nullptr) {
@@ -908,7 +1080,7 @@
   explicit RecordImageClassesVisitor(std::unordered_set<std::string>* image_classes)
       : image_classes_(image_classes) {}
 
-  bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     std::string temp;
     image_classes_->insert(klass->GetDescriptor(&temp));
     return true;
@@ -921,7 +1093,7 @@
 // Make a list of descriptors for classes to include in the image
 void CompilerDriver::LoadImageClasses(TimingLogger* timings) {
   CHECK(timings != nullptr);
-  if (!IsImage()) {
+  if (!IsBootImage()) {
     return;
   }
 
@@ -960,12 +1132,13 @@
       uint16_t exception_type_idx = exception_type.first;
       const DexFile* dex_file = exception_type.second;
       StackHandleScope<2> hs2(self);
-      Handle<mirror::DexCache> dex_cache(hs2.NewHandle(class_linker->RegisterDexFile(
-          *dex_file,
-          Runtime::Current()->GetLinearAlloc())));
+      Handle<mirror::DexCache> dex_cache(hs2.NewHandle(class_linker->RegisterDexFile(*dex_file,
+                                                                                     nullptr)));
       Handle<mirror::Class> klass(hs2.NewHandle(
-          class_linker->ResolveType(*dex_file, exception_type_idx, dex_cache,
-                                    NullHandle<mirror::ClassLoader>())));
+          class_linker->ResolveType(*dex_file,
+                                    exception_type_idx,
+                                    dex_cache,
+                                    ScopedNullHandle<mirror::ClassLoader>())));
       if (klass.Get() == nullptr) {
         const DexFile::TypeId& type_id = dex_file->GetTypeId(exception_type_idx);
         const char* descriptor = dex_file->GetTypeDescriptor(type_id);
@@ -994,7 +1167,7 @@
   // Make a copy of the handle so that we don't clobber it doing Assign.
   MutableHandle<mirror::Class> klass(hs.NewHandle(c.Get()));
   std::string temp;
-  const size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  const PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   while (!klass->IsObjectClass()) {
     const char* descriptor = klass->GetDescriptor(&temp);
     std::pair<std::unordered_set<std::string>::iterator, bool> result =
@@ -1009,10 +1182,8 @@
                              image_classes);
     }
     for (auto& m : c->GetVirtualMethods(pointer_size)) {
-      if (m.IsMiranda() || (true)) {
-        StackHandleScope<1> hs2(self);
-        MaybeAddToImageClasses(hs2.NewHandle(m.GetDeclaringClass()), image_classes);
-      }
+      StackHandleScope<1> hs2(self);
+      MaybeAddToImageClasses(hs2.NewHandle(m.GetDeclaringClass()), image_classes);
     }
     if (klass->IsArrayClass()) {
       StackHandleScope<1> hs2(self);
@@ -1073,7 +1244,7 @@
    public:
     explicit FindImageClassesVisitor(ClinitImageUpdate* data) : data_(data) {}
 
-    bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
       std::string temp;
       const char* name = klass->GetDescriptor(&temp);
       if (data_->image_class_descriptors_->find(name) != data_->image_class_descriptors_->end()) {
@@ -1150,7 +1321,7 @@
 };
 
 void CompilerDriver::UpdateImageClasses(TimingLogger* timings) {
-  if (IsImage()) {
+  if (IsBootImage()) {
     TimingLogger::ScopedTiming t("UpdateImageClasses", timings);
 
     Runtime* runtime = Runtime::Current();
@@ -1173,11 +1344,11 @@
 bool CompilerDriver::CanAssumeClassIsLoaded(mirror::Class* klass) {
   Runtime* runtime = Runtime::Current();
   if (!runtime->IsAotCompiler()) {
-    DCHECK(runtime->UseJit());
+    DCHECK(runtime->UseJitCompilation());
     // Having the klass reference here implies that the klass is already loaded.
     return true;
   }
-  if (!IsImage()) {
+  if (!IsBootImage()) {
     // Assume loaded only if klass is in the boot image. App classes cannot be assumed
     // loaded because we don't even know what class loader will be used to load them.
     bool class_in_image = runtime->GetHeap()->FindSpaceFromObject(klass, false)->IsImageSpace();
@@ -1188,26 +1359,34 @@
   return IsImageClass(descriptor);
 }
 
-bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) {
-  if (IsImage() &&
-      IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) {
-    {
-      ScopedObjectAccess soa(Thread::Current());
-      mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
-          soa.Self(), dex_file, false);
-      mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
-      if (resolved_class == nullptr) {
-        // Erroneous class.
-        stats_->TypeNotInDexCache();
-        return false;
-      }
-    }
+void CompilerDriver::MarkForDexToDexCompilation(Thread* self, const MethodReference& method_ref) {
+  MutexLock lock(self, dex_to_dex_references_lock_);
+  // Since we're compiling one dex file at a time, we need to look for the
+  // current dex file entry only at the end of dex_to_dex_references_.
+  if (dex_to_dex_references_.empty() ||
+      &dex_to_dex_references_.back().GetDexFile() != method_ref.dex_file) {
+    dex_to_dex_references_.emplace_back(*method_ref.dex_file);
+  }
+  dex_to_dex_references_.back().GetMethodIndexes().SetBit(method_ref.dex_method_index);
+}
+
+bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(Handle<mirror::DexCache> dex_cache,
+                                                      uint32_t type_idx) {
+  bool result = false;
+  if ((IsBootImage() &&
+       IsImageClass(dex_cache->GetDexFile()->StringDataByIdx(
+           dex_cache->GetDexFile()->GetTypeId(type_idx).descriptor_idx_))) ||
+      Runtime::Current()->UseJitCompilation()) {
+    mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
+    result = (resolved_class != nullptr);
+  }
+
+  if (result) {
     stats_->TypeInDexCache();
-    return true;
   } else {
     stats_->TypeNotInDexCache();
-    return false;
   }
+  return result;
 }
 
 bool CompilerDriver::CanAssumeStringIsPresentInDexCache(const DexFile& dex_file,
@@ -1215,15 +1394,21 @@
   // See also Compiler::ResolveDexFile
 
   bool result = false;
-  if (IsImage()) {
-    // We resolve all const-string strings when building for the image.
+  if (IsBootImage() || Runtime::Current()->UseJitCompilation()) {
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
         soa.Self(), dex_file, false)));
-    class_linker->ResolveString(dex_file, string_idx, dex_cache);
-    result = true;
+    if (IsBootImage()) {
+      // We resolve all const-string strings when building for the image.
+      class_linker->ResolveString(dex_file, string_idx, dex_cache);
+      result = true;
+    } else {
+      // Just check whether the dex cache already has the string.
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      result = (dex_cache->GetResolvedString(string_idx) != nullptr);
+    }
   }
   if (result) {
     stats_->StringInDexCache();
@@ -1233,75 +1418,61 @@
   return result;
 }
 
-bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx, const DexFile& dex_file,
-                                                uint32_t type_idx,
-                                                bool* type_known_final, bool* type_known_abstract,
-                                                bool* equals_referrers_class) {
-  if (type_known_final != nullptr) {
-    *type_known_final = false;
-  }
-  if (type_known_abstract != nullptr) {
-    *type_known_abstract = false;
-  }
-  if (equals_referrers_class != nullptr) {
-    *equals_referrers_class = false;
-  }
-  ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
-      soa.Self(), dex_file, false);
+bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx,
+                                                Handle<mirror::DexCache> dex_cache,
+                                                uint32_t type_idx) {
   // Get type from dex cache assuming it was populated by the verifier
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
     stats_->TypeNeedsAccessCheck();
     return false;  // Unknown class needs access checks.
   }
-  const DexFile::MethodId& method_id = dex_file.GetMethodId(referrer_idx);
-  if (equals_referrers_class != nullptr) {
-    *equals_referrers_class = (method_id.class_idx_ == type_idx);
+  const DexFile::MethodId& method_id = dex_cache->GetDexFile()->GetMethodId(referrer_idx);
+  bool is_accessible = resolved_class->IsPublic();  // Public classes are always accessible.
+  if (!is_accessible) {
+    mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
+    if (referrer_class == nullptr) {
+      stats_->TypeNeedsAccessCheck();
+      return false;  // Incomplete referrer knowledge needs access check.
+    }
+    // Perform access check, will return true if access is ok or false if we're going to have to
+    // check this at runtime (for example for class loaders).
+    is_accessible = referrer_class->CanAccess(resolved_class);
   }
-  mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
-  if (referrer_class == nullptr) {
-    stats_->TypeNeedsAccessCheck();
-    return false;  // Incomplete referrer knowledge needs access check.
-  }
-  // Perform access check, will return true if access is ok or false if we're going to have to
-  // check this at runtime (for example for class loaders).
-  bool result = referrer_class->CanAccess(resolved_class);
-  if (result) {
+  if (is_accessible) {
     stats_->TypeDoesntNeedAccessCheck();
-    if (type_known_final != nullptr) {
-      *type_known_final = resolved_class->IsFinal() && !resolved_class->IsArrayClass();
-    }
-    if (type_known_abstract != nullptr) {
-      *type_known_abstract = resolved_class->IsAbstract() && !resolved_class->IsArrayClass();
-    }
   } else {
     stats_->TypeNeedsAccessCheck();
   }
-  return result;
+  return is_accessible;
 }
 
 bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
-                                                            const DexFile& dex_file,
-                                                            uint32_t type_idx) {
-  ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
-      soa.Self(), dex_file, false);
+                                                            Handle<mirror::DexCache> dex_cache,
+                                                            uint32_t type_idx,
+                                                            bool* finalizable) {
   // Get type from dex cache assuming it was populated by the verifier.
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
     stats_->TypeNeedsAccessCheck();
+    // Be conservative.
+    *finalizable = true;
     return false;  // Unknown class needs access checks.
   }
-  const DexFile::MethodId& method_id = dex_file.GetMethodId(referrer_idx);
-  mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
-  if (referrer_class == nullptr) {
-    stats_->TypeNeedsAccessCheck();
-    return false;  // Incomplete referrer knowledge needs access check.
+  *finalizable = resolved_class->IsFinalizable();
+  const DexFile::MethodId& method_id = dex_cache->GetDexFile()->GetMethodId(referrer_idx);
+  bool is_accessible = resolved_class->IsPublic();  // Public classes are always accessible.
+  if (!is_accessible) {
+    mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
+    if (referrer_class == nullptr) {
+      stats_->TypeNeedsAccessCheck();
+      return false;  // Incomplete referrer knowledge needs access check.
+    }
+    // Perform access and instantiable checks, will return true if access is ok or false if we're
+    // going to have to check this at runtime (for example for class loaders).
+    is_accessible = referrer_class->CanAccess(resolved_class);
   }
-  // Perform access and instantiable checks, will return true if access is ok or false if we're
-  // going to have to check this at runtime (for example for class loaders).
-  bool result = referrer_class->CanAccess(resolved_class) && resolved_class->IsInstantiable();
+  bool result = is_accessible && resolved_class->IsInstantiable();
   if (result) {
     stats_->TypeDoesntNeedAccessCheck();
   } else {
@@ -1332,7 +1503,7 @@
   if (compiling_boot) {
     // boot -> boot class pointers.
     // True if the class is in the image at boot compiling time.
-    const bool is_image_class = IsImage() && IsImageClass(
+    const bool is_image_class = IsBootImage() && IsImageClass(
         dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_));
     // True if pc relative load works.
     if (is_image_class && support_boot_image_fixup) {
@@ -1343,7 +1514,7 @@
     } else {
       return false;
     }
-  } else if (runtime->UseJit() && !heap->IsMovableObject(resolved_class)) {
+  } else if (runtime->UseJitCompilation() && !heap->IsMovableObject(resolved_class)) {
     *is_type_initialized = resolved_class->IsInitialized();
     // If the class may move around, then don't embed it as a direct pointer.
     *use_direct_type_ptr = true;
@@ -1407,8 +1578,7 @@
 }
 
 DexCacheArraysLayout CompilerDriver::GetDexCacheArraysLayout(const DexFile* dex_file) {
-  // Currently only image dex caches have fixed array layout.
-  return IsImage() && GetSupportBootImageFixup()
+  return ContainsElement(GetDexFilesForOatFile(), dex_file)
       ? DexCacheArraysLayout(GetInstructionSetPointerSize(instruction_set_), dex_file)
       : DexCacheArraysLayout();
 }
@@ -1478,53 +1648,6 @@
   }
 }
 
-bool CompilerDriver::ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit,
-                                            bool is_put, MemberOffset* field_offset,
-                                            uint32_t* storage_index, bool* is_referrers_class,
-                                            bool* is_volatile, bool* is_initialized,
-                                            Primitive::Type* type) {
-  ScopedObjectAccess soa(Thread::Current());
-  // Try to resolve the field and compiling method's class.
-  ArtField* resolved_field;
-  mirror::Class* referrer_class;
-  Handle<mirror::DexCache> dex_cache(mUnit->GetDexCache());
-  {
-    StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::ClassLoader> class_loader_handle(
-        hs.NewHandle(soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader())));
-    resolved_field =
-        ResolveField(soa, dex_cache, class_loader_handle, mUnit, field_idx, true);
-    referrer_class = resolved_field != nullptr
-        ? ResolveCompilingMethodsClass(soa, dex_cache, class_loader_handle, mUnit) : nullptr;
-  }
-  bool result = false;
-  if (resolved_field != nullptr && referrer_class != nullptr) {
-    *is_volatile = IsFieldVolatile(resolved_field);
-    std::pair<bool, bool> fast_path = IsFastStaticField(
-        dex_cache.Get(), referrer_class, resolved_field, field_idx, storage_index);
-    result = is_put ? fast_path.second : fast_path.first;
-  }
-  if (result) {
-    *field_offset = GetFieldOffset(resolved_field);
-    *is_referrers_class = IsStaticFieldInReferrerClass(referrer_class, resolved_field);
-    // *is_referrers_class == true implies no worrying about class initialization.
-    *is_initialized = (*is_referrers_class) ||
-        (IsStaticFieldsClassInitialized(referrer_class, resolved_field) &&
-         CanAssumeTypeIsPresentInDexCache(*mUnit->GetDexFile(), *storage_index));
-    *type = resolved_field->GetTypeAsPrimitiveType();
-  } else {
-    // Conservative defaults.
-    *is_volatile = true;
-    *field_offset = MemberOffset(static_cast<size_t>(-1));
-    *storage_index = -1;
-    *is_referrers_class = false;
-    *is_initialized = false;
-    *type = Primitive::kPrimVoid;
-  }
-  ProcessedStaticField(result, *is_referrers_class);
-  return result;
-}
-
 void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType sharp_type,
                                                    bool no_guarantee_of_dex_cache_entry,
                                                    const mirror::Class* referrer_class,
@@ -1568,7 +1691,7 @@
       }
     }
   }
-  if (runtime->UseJit()) {
+  if (runtime->UseJitCompilation()) {
     // If we are the JIT, then don't allow a direct call to the interpreter bridge since this will
     // never be updated even after we compile the method.
     if (cl->IsQuickToInterpreterBridge(
@@ -1581,7 +1704,7 @@
   }
   if (!use_dex_cache && force_relocations) {
     bool is_in_image;
-    if (IsImage()) {
+    if (IsBootImage()) {
       is_in_image = IsImageClass(method->GetDeclaringClassDescriptor());
     } else {
       is_in_image = instruction_set_ != kX86 && instruction_set_ != kX86_64 &&
@@ -1600,7 +1723,7 @@
   bool must_use_direct_pointers = false;
   mirror::DexCache* dex_cache = declaring_class->GetDexCache();
   if (target_method->dex_file == dex_cache->GetDexFile() &&
-    !(runtime->UseJit() && dex_cache->GetResolvedMethod(
+    !(runtime->UseJitCompilation() && dex_cache->GetResolvedMethod(
         method->GetDexMethodIndex(), pointer_size) == nullptr)) {
     target_method->dex_method_index = method->GetDexMethodIndex();
   } else {
@@ -1628,14 +1751,16 @@
       *type = sharp_type;
     }
   } else {
-    auto* image_space = heap->GetImageSpace();
     bool method_in_image = false;
-    if (image_space != nullptr) {
+    const std::vector<gc::space::ImageSpace*> image_spaces = heap->GetBootImageSpaces();
+    for (gc::space::ImageSpace* image_space : image_spaces) {
       const auto& method_section = image_space->GetImageHeader().GetMethodsSection();
-      method_in_image = method_section.Contains(
-          reinterpret_cast<uint8_t*>(method) - image_space->Begin());
+      if (method_section.Contains(reinterpret_cast<uint8_t*>(method) - image_space->Begin())) {
+        method_in_image = true;
+        break;
+      }
     }
-    if (method_in_image || compiling_boot || runtime->UseJit()) {
+    if (method_in_image || compiling_boot || runtime->UseJitCompilation()) {
       // We know we must be able to get to the method in the image, so use that pointer.
       // In the case where we are the JIT, we can always use direct pointers since we know where
       // the method and its code are / will be. We don't sharpen to interpreter bridge since we
@@ -1806,6 +1931,9 @@
 
     // Wait for all the worker threads to finish.
     thread_pool_->Wait(self, true, false);
+
+    // And stop the workers accepting jobs.
+    thread_pool_->StopWorkers(self);
   }
 
   size_t NextIndex() {
@@ -1896,6 +2024,28 @@
   self->ClearException();
 }
 
+bool CompilerDriver::RequiresConstructorBarrier(const DexFile& dex_file,
+                                                uint16_t class_def_idx) const {
+  const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_idx);
+  const uint8_t* class_data = dex_file.GetClassData(class_def);
+  if (class_data == nullptr) {
+    // Empty class such as a marker interface.
+    return false;
+  }
+  ClassDataItemIterator it(dex_file, class_data);
+  while (it.HasNextStaticField()) {
+    it.Next();
+  }
+  // We require a constructor barrier if there are final instance fields.
+  while (it.HasNextInstanceField()) {
+    if (it.MemberIsFinal()) {
+      return true;
+    }
+    it.Next();
+  }
+  return false;
+}
+
 class ResolveClassFieldsAndMethodsVisitor : public CompilationVisitor {
  public:
   explicit ResolveClassFieldsAndMethodsVisitor(const ParallelCompilationManager* manager)
@@ -1980,7 +2130,7 @@
       }
       if (resolve_fields_and_methods) {
         while (it.HasNextDirectMethod()) {
-          ArtMethod* method = class_linker->ResolveMethod(
+          ArtMethod* method = class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
               dex_file, it.GetMemberIndex(), dex_cache, class_loader, nullptr,
               it.GetMethodInvokeType(class_def));
           if (method == nullptr) {
@@ -1989,7 +2139,7 @@
           it.Next();
         }
         while (it.HasNextVirtualMethod()) {
-          ArtMethod* method = class_linker->ResolveMethod(
+          ArtMethod* method = class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
               dex_file, it.GetMemberIndex(), dex_cache, class_loader, nullptr,
               it.GetMethodInvokeType(class_def));
           if (method == nullptr) {
@@ -2000,9 +2150,10 @@
         DCHECK(!it.HasNext());
       }
     }
-    if (requires_constructor_barrier) {
-      manager_->GetCompiler()->AddRequiresConstructorBarrier(self, &dex_file, class_def_index);
-    }
+    manager_->GetCompiler()->SetRequiresConstructorBarrier(self,
+                                                           &dex_file,
+                                                           class_def_index,
+                                                           requires_constructor_barrier);
   }
 
  private:
@@ -2023,7 +2174,7 @@
         hs.NewHandle(soa.Decode<mirror::ClassLoader*>(manager_->GetClassLoader())));
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->RegisterDexFile(
         dex_file,
-        class_linker->GetOrCreateAllocatorForClassLoader(class_loader.Get()))));
+        class_loader.Get())));
     mirror::Class* klass = class_linker->ResolveType(dex_file, type_idx, dex_cache, class_loader);
 
     if (klass == nullptr) {
@@ -2042,9 +2193,12 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_file,
+void CompilerDriver::ResolveDexFile(jobject class_loader,
+                                    const DexFile& dex_file,
                                     const std::vector<const DexFile*>& dex_files,
-                                    ThreadPool* thread_pool, TimingLogger* timings) {
+                                    ThreadPool* thread_pool,
+                                    size_t thread_count,
+                                    TimingLogger* timings) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
   // TODO: we could resolve strings here, although the string table is largely filled with class
@@ -2052,43 +2206,64 @@
 
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files,
                                      thread_pool);
-  if (IsImage()) {
+  if (IsBootImage()) {
     // For images we resolve all types, such as array, whereas for applications just those with
     // classdefs are resolved by ResolveClassFieldsAndMethods.
     TimingLogger::ScopedTiming t("Resolve Types", timings);
     ResolveTypeVisitor visitor(&context);
-    context.ForAll(0, dex_file.NumTypeIds(), &visitor, thread_count_);
+    context.ForAll(0, dex_file.NumTypeIds(), &visitor, thread_count);
   }
 
   TimingLogger::ScopedTiming t("Resolve MethodsAndFields", timings);
   ResolveClassFieldsAndMethodsVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
-void CompilerDriver::SetVerified(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                                 ThreadPool* thread_pool, TimingLogger* timings) {
+void CompilerDriver::SetVerified(jobject class_loader,
+                                 const std::vector<const DexFile*>& dex_files,
+                                 TimingLogger* timings) {
+  // This can be run in parallel.
   for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
-    SetVerifiedDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
+    SetVerifiedDexFile(class_loader,
+                       *dex_file,
+                       dex_files,
+                       parallel_thread_pool_.get(),
+                       parallel_thread_count_,
+                       timings);
   }
 }
 
-void CompilerDriver::Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                            ThreadPool* thread_pool, TimingLogger* timings) {
+void CompilerDriver::Verify(jobject class_loader,
+                            const std::vector<const DexFile*>& dex_files,
+                            TimingLogger* timings) {
+  // Note: verification should not be pulling in classes anymore when compiling the boot image,
+  //       as all should have been resolved before. As such, doing this in parallel should still
+  //       be deterministic.
   for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
-    VerifyDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
+    VerifyDexFile(class_loader,
+                  *dex_file,
+                  dex_files,
+                  parallel_thread_pool_.get(),
+                  parallel_thread_count_,
+                  timings);
   }
 }
 
 class VerifyClassVisitor : public CompilationVisitor {
  public:
-  explicit VerifyClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {}
+  VerifyClassVisitor(const ParallelCompilationManager* manager, LogSeverity log_level)
+     : manager_(manager), log_level_(log_level) {}
 
   virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE {
     ATRACE_CALL();
     ScopedObjectAccess soa(Thread::Current());
     const DexFile& dex_file = *manager_->GetDexFile();
+    if (!manager_->GetCompiler()->ShouldVerifyClassBasedOnProfile(dex_file, class_def_index)) {
+      // Skip verification since the class is not in the profile.
+      return;
+    }
     const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
     const char* descriptor = dex_file.GetClassDescriptor(class_def);
     ClassLinker* class_linker = manager_->GetClassLinker();
@@ -2110,8 +2285,15 @@
       Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
           soa.Self(), dex_file, false)));
       std::string error_msg;
-      if (verifier::MethodVerifier::VerifyClass(soa.Self(), &dex_file, dex_cache, class_loader,
-                                                &class_def, true, &error_msg) ==
+      if (verifier::MethodVerifier::VerifyClass(soa.Self(),
+                                                &dex_file,
+                                                dex_cache,
+                                                class_loader,
+                                                &class_def,
+                                                Runtime::Current()->GetCompilerCallbacks(),
+                                                true /* allow soft failures */,
+                                                log_level_,
+                                                &error_msg) ==
                                                     verifier::MethodVerifier::kHardFailure) {
         LOG(ERROR) << "Verification failed on class " << PrettyDescriptor(descriptor)
                    << " because: " << error_msg;
@@ -2119,7 +2301,7 @@
       }
     } else if (!SkipClass(jclass_loader, dex_file, klass.Get())) {
       CHECK(klass->IsResolved()) << PrettyClass(klass.Get());
-      class_linker->VerifyClass(soa.Self(), klass);
+      class_linker->VerifyClass(soa.Self(), klass, log_level_);
 
       if (klass->IsErroneous()) {
         // ClassLinker::VerifyClass throws, which isn't useful in the compiler.
@@ -2134,25 +2316,32 @@
       // It is *very* problematic if there are verification errors in the boot classpath. For example,
       // we rely on things working OK without verification when the decryption dialog is brought up.
       // So abort in a debug build if we find this violated.
-      DCHECK(!manager_->GetCompiler()->IsImage() || klass->IsVerified()) << "Boot classpath class "
-          << PrettyClass(klass.Get()) << " failed to fully verify.";
+      DCHECK(!manager_->GetCompiler()->IsBootImage() || klass->IsVerified())
+          << "Boot classpath class " << PrettyClass(klass.Get()) << " failed to fully verify.";
     }
     soa.Self()->AssertNoPendingException();
   }
 
  private:
   const ParallelCompilationManager* const manager_;
+  const LogSeverity log_level_;
 };
 
-void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file,
+void CompilerDriver::VerifyDexFile(jobject class_loader,
+                                   const DexFile& dex_file,
                                    const std::vector<const DexFile*>& dex_files,
-                                   ThreadPool* thread_pool, TimingLogger* timings) {
+                                   ThreadPool* thread_pool,
+                                   size_t thread_count,
+                                   TimingLogger* timings) {
   TimingLogger::ScopedTiming t("Verify Dex File", timings);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files,
                                      thread_pool);
-  VerifyClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_);
+  LogSeverity log_level = GetCompilerOptions().AbortOnHardVerifierFailure()
+                              ? LogSeverity::INTERNAL_FATAL
+                              : LogSeverity::WARNING;
+  VerifyClassVisitor visitor(&context, log_level);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
 class SetVerifiedClassVisitor : public CompilationVisitor {
@@ -2183,9 +2372,9 @@
           mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, soa.Self());
           // Mark methods as pre-verified. If we don't do this, the interpreter will run with
           // access checks.
-          klass->SetPreverifiedFlagOnAllMethods(
+          klass->SetSkipAccessChecksFlagOnAllMethods(
               GetInstructionSetPointerSize(manager_->GetCompiler()->GetInstructionSet()));
-          klass->SetPreverified();
+          klass->SetVerificationAttempted();
         }
         // Record the final class status if necessary.
         ClassReference ref(manager_->GetDexFile(), class_def_index);
@@ -2202,15 +2391,18 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::SetVerifiedDexFile(jobject class_loader, const DexFile& dex_file,
+void CompilerDriver::SetVerifiedDexFile(jobject class_loader,
+                                        const DexFile& dex_file,
                                         const std::vector<const DexFile*>& dex_files,
-                                        ThreadPool* thread_pool, TimingLogger* timings) {
+                                        ThreadPool* thread_pool,
+                                        size_t thread_count,
+                                        TimingLogger* timings) {
   TimingLogger::ScopedTiming t("Verify Dex File", timings);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files,
                                      thread_pool);
   SetVerifiedClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
 class InitializeClassVisitor : public CompilationVisitor {
@@ -2255,7 +2447,7 @@
           if (!klass->IsInitialized()) {
             // We need to initialize static fields, we only do this for image classes that aren't
             // marked with the $NoPreloadHolder (which implies this should not be initialized early).
-            bool can_init_static_fields = manager_->GetCompiler()->IsImage() &&
+            bool can_init_static_fields = manager_->GetCompiler()->IsBootImage() &&
                 manager_->GetCompiler()->IsImageClass(descriptor) &&
                 !StringPiece(descriptor).ends_with("$NoPreloadHolder;");
             if (can_init_static_fields) {
@@ -2311,45 +2503,135 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& dex_file,
+void CompilerDriver::InitializeClasses(jobject jni_class_loader,
+                                       const DexFile& dex_file,
                                        const std::vector<const DexFile*>& dex_files,
-                                       ThreadPool* thread_pool, TimingLogger* timings) {
+                                       TimingLogger* timings) {
   TimingLogger::ScopedTiming t("InitializeNoClinit", timings);
+
+  // Initialization allocates objects and needs to run single-threaded to be deterministic.
+  bool force_determinism = GetCompilerOptions().IsForceDeterminism();
+  ThreadPool* init_thread_pool = force_determinism
+                                     ? single_thread_pool_.get()
+                                     : parallel_thread_pool_.get();
+  size_t init_thread_count = force_determinism ? 1U : parallel_thread_count_;
+
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files,
-                                     thread_pool);
-  size_t thread_count;
-  if (IsImage()) {
+                                     init_thread_pool);
+  if (IsBootImage()) {
     // TODO: remove this when transactional mode supports multithreading.
-    thread_count = 1U;
-  } else {
-    thread_count = thread_count_;
+    init_thread_count = 1U;
   }
   InitializeClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, init_thread_count);
 }
 
+class InitializeArrayClassesAndCreateConflictTablesVisitor : public ClassVisitor {
+ public:
+  virtual bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
+      return true;
+    }
+    if (klass->IsArrayClass()) {
+      StackHandleScope<1> hs(Thread::Current());
+      Runtime::Current()->GetClassLinker()->EnsureInitialized(hs.Self(),
+                                                              hs.NewHandle(klass),
+                                                              true,
+                                                              true);
+    }
+    // Create the conflict tables.
+    FillIMTAndConflictTables(klass);
+    return true;
+  }
+
+ private:
+  void FillIMTAndConflictTables(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!klass->ShouldHaveImt()) {
+      return;
+    }
+    if (visited_classes_.find(klass) != visited_classes_.end()) {
+      return;
+    }
+    if (klass->HasSuperClass()) {
+      FillIMTAndConflictTables(klass->GetSuperClass());
+    }
+    if (!klass->IsTemp()) {
+      Runtime::Current()->GetClassLinker()->FillIMTAndConflictTables(klass);
+    }
+    visited_classes_.insert(klass);
+  }
+
+  std::set<mirror::Class*> visited_classes_;
+};
+
 void CompilerDriver::InitializeClasses(jobject class_loader,
                                        const std::vector<const DexFile*>& dex_files,
-                                       ThreadPool* thread_pool, TimingLogger* timings) {
+                                       TimingLogger* timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
-    InitializeClasses(class_loader, *dex_file, dex_files, thread_pool, timings);
+    InitializeClasses(class_loader, *dex_file, dex_files, timings);
   }
-  if (IsImage()) {
+  if (boot_image_ || app_image_) {
+    // Make sure that we call EnsureIntiailized on all the array classes to call
+    // SetVerificationAttempted so that the access flags are set. If we do not do this they get
+    // changed at runtime resulting in more dirty image pages.
+    // Also create conflict tables.
+    // Only useful if we are compiling an image (image_classes_ is not null).
+    ScopedObjectAccess soa(Thread::Current());
+    InitializeArrayClassesAndCreateConflictTablesVisitor visitor;
+    Runtime::Current()->GetClassLinker()->VisitClassesWithoutClassesLock(&visitor);
+  }
+  if (IsBootImage()) {
     // Prune garbage objects created during aborted transactions.
     Runtime::Current()->GetHeap()->CollectGarbage(true);
   }
 }
 
-void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                             ThreadPool* thread_pool, TimingLogger* timings) {
-  for (size_t i = 0; i != dex_files.size(); ++i) {
-    const DexFile* dex_file = dex_files[i];
-    CHECK(dex_file != nullptr);
-    CompileDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
+void CompilerDriver::Compile(jobject class_loader,
+                             const std::vector<const DexFile*>& dex_files,
+                             TimingLogger* timings) {
+  if (kDebugProfileGuidedCompilation) {
+    LOG(INFO) << "[ProfileGuidedCompilation] " <<
+        ((profile_compilation_info_ == nullptr)
+            ? "null"
+            : profile_compilation_info_->DumpInfo(&dex_files));
   }
+
+  DCHECK(current_dex_to_dex_methods_ == nullptr);
+  for (const DexFile* dex_file : dex_files) {
+    CHECK(dex_file != nullptr);
+    CompileDexFile(class_loader,
+                   *dex_file,
+                   dex_files,
+                   parallel_thread_pool_.get(),
+                   parallel_thread_count_,
+                   timings);
+    const ArenaPool* const arena_pool = Runtime::Current()->GetArenaPool();
+    const size_t arena_alloc = arena_pool->GetBytesAllocated();
+    max_arena_alloc_ = std::max(arena_alloc, max_arena_alloc_);
+    Runtime::Current()->ReclaimArenaPoolMemory();
+  }
+
+  ArrayRef<DexFileMethodSet> dex_to_dex_references;
+  {
+    // From this point on, we shall not modify dex_to_dex_references_, so
+    // just grab a reference to it that we use without holding the mutex.
+    MutexLock lock(Thread::Current(), dex_to_dex_references_lock_);
+    dex_to_dex_references = ArrayRef<DexFileMethodSet>(dex_to_dex_references_);
+  }
+  for (const auto& method_set : dex_to_dex_references) {
+    current_dex_to_dex_methods_ = &method_set.GetMethodIndexes();
+    CompileDexFile(class_loader,
+                   method_set.GetDexFile(),
+                   dex_files,
+                   parallel_thread_pool_.get(),
+                   parallel_thread_count_,
+                   timings);
+  }
+  current_dex_to_dex_methods_ = nullptr;
+
   VLOG(compiler) << "Compile: " << GetMemoryUsageString(false);
 }
 
@@ -2455,14 +2737,17 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_file,
+void CompilerDriver::CompileDexFile(jobject class_loader,
+                                    const DexFile& dex_file,
                                     const std::vector<const DexFile*>& dex_files,
-                                    ThreadPool* thread_pool, TimingLogger* timings) {
+                                    ThreadPool* thread_pool,
+                                    size_t thread_count,
+                                    TimingLogger* timings) {
   TimingLogger::ScopedTiming t("Compile Dex File", timings);
   ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this,
                                      &dex_file, dex_files, thread_pool);
   CompileClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
 void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref,
@@ -2519,6 +2804,7 @@
       case mirror::Class::kStatusRetryVerificationAtRuntime:
       case mirror::Class::kStatusVerified:
       case mirror::Class::kStatusInitialized:
+      case mirror::Class::kStatusResolved:
         break;  // Expected states.
       default:
         LOG(FATAL) << "Unexpected class status for class "
@@ -2570,88 +2856,45 @@
   return non_relative_linker_patch_count_;
 }
 
-void CompilerDriver::AddRequiresConstructorBarrier(Thread* self, const DexFile* dex_file,
-                                                   uint16_t class_def_index) {
-  WriterMutexLock mu(self, freezing_constructor_lock_);
-  freezing_constructor_classes_.insert(ClassReference(dex_file, class_def_index));
+void CompilerDriver::SetRequiresConstructorBarrier(Thread* self,
+                                                   const DexFile* dex_file,
+                                                   uint16_t class_def_index,
+                                                   bool requires) {
+  WriterMutexLock mu(self, requires_constructor_barrier_lock_);
+  requires_constructor_barrier_.emplace(ClassReference(dex_file, class_def_index), requires);
 }
 
-bool CompilerDriver::RequiresConstructorBarrier(Thread* self, const DexFile* dex_file,
-                                                uint16_t class_def_index) const {
-  ReaderMutexLock mu(self, freezing_constructor_lock_);
-  return freezing_constructor_classes_.count(ClassReference(dex_file, class_def_index)) != 0;
-}
-
-bool CompilerDriver::WriteElf(const std::string& android_root,
-                              bool is_host,
-                              const std::vector<const art::DexFile*>& dex_files,
-                              OatWriter* oat_writer,
-                              art::File* file)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (kProduce64BitELFFiles && Is64BitInstructionSet(GetInstructionSet())) {
-    return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, *this);
-  } else {
-    return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, *this);
-  }
-}
-
-bool CompilerDriver::SkipCompilation(const std::string& method_name) {
-  if (!profile_present_) {
-    return false;
-  }
-  // First find the method in the profile file.
-  ProfileFile::ProfileData data;
-  if (!profile_file_.GetProfileData(&data, method_name)) {
-    // Not in profile, no information can be determined.
-    if (kIsDebugBuild) {
-      VLOG(compiler) << "not compiling " << method_name << " because it's not in the profile";
-    }
-    return true;
-  }
-
-  // Methods that comprise top_k_threshold % of the total samples will be compiled.
-  // Compare against the start of the topK percentage bucket just in case the threshold
-  // falls inside a bucket.
-  bool compile = data.GetTopKUsedPercentage() - data.GetUsedPercent()
-                 <= compiler_options_->GetTopKProfileThreshold();
-  if (kIsDebugBuild) {
-    if (compile) {
-      LOG(INFO) << "compiling method " << method_name << " because its usage is part of top "
-          << data.GetTopKUsedPercentage() << "% with a percent of " << data.GetUsedPercent() << "%"
-          << " (topKThreshold=" << compiler_options_->GetTopKProfileThreshold() << ")";
-    } else {
-      VLOG(compiler) << "not compiling method " << method_name
-          << " because it's not part of leading " << compiler_options_->GetTopKProfileThreshold()
-          << "% samples)";
+bool CompilerDriver::RequiresConstructorBarrier(Thread* self,
+                                                const DexFile* dex_file,
+                                                uint16_t class_def_index) {
+  ClassReference class_ref(dex_file, class_def_index);
+  {
+    ReaderMutexLock mu(self, requires_constructor_barrier_lock_);
+    auto it = requires_constructor_barrier_.find(class_ref);
+    if (it != requires_constructor_barrier_.end()) {
+      return it->second;
     }
   }
-  return !compile;
+  WriterMutexLock mu(self, requires_constructor_barrier_lock_);
+  const bool requires = RequiresConstructorBarrier(*dex_file, class_def_index);
+  requires_constructor_barrier_.emplace(class_ref, requires);
+  return requires;
 }
 
 std::string CompilerDriver::GetMemoryUsageString(bool extended) const {
   std::ostringstream oss;
-  Runtime* const runtime = Runtime::Current();
-  const ArenaPool* arena_pool = runtime->GetArenaPool();
-  gc::Heap* const heap = runtime->GetHeap();
-  oss << "arena alloc=" << PrettySize(arena_pool->GetBytesAllocated());
-  oss << " java alloc=" << PrettySize(heap->GetBytesAllocated());
+  const gc::Heap* const heap = Runtime::Current()->GetHeap();
+  const size_t java_alloc = heap->GetBytesAllocated();
+  oss << "arena alloc=" << PrettySize(max_arena_alloc_) << " (" << max_arena_alloc_ << "B)";
+  oss << " java alloc=" << PrettySize(java_alloc) << " (" << java_alloc << "B)";
 #if defined(__BIONIC__) || defined(__GLIBC__)
-  struct mallinfo info = mallinfo();
+  const struct mallinfo info = mallinfo();
   const size_t allocated_space = static_cast<size_t>(info.uordblks);
   const size_t free_space = static_cast<size_t>(info.fordblks);
-  oss << " native alloc=" << PrettySize(allocated_space) << " free="
-      << PrettySize(free_space);
+  oss << " native alloc=" << PrettySize(allocated_space) << " (" << allocated_space << "B)"
+      << " free=" << PrettySize(free_space) << " (" << free_space << "B)";
 #endif
-  if (swap_space_.get() != nullptr) {
-    oss << " swap=" << PrettySize(swap_space_->GetSize());
-  }
-  if (extended) {
-    oss << "\nCode dedupe: " << dedupe_code_.DumpStats();
-    oss << "\nMapping table dedupe: " << dedupe_mapping_table_.DumpStats();
-    oss << "\nVmap table dedupe: " << dedupe_vmap_table_.DumpStats();
-    oss << "\nGC map dedupe: " << dedupe_gc_map_.DumpStats();
-    oss << "\nCFI info dedupe: " << dedupe_cfi_info_.DumpStats();
-  }
+  compiled_method_storage_.DumpMemoryUsage(oss, extended);
   return oss.str();
 }
 
@@ -2662,9 +2905,33 @@
 
 bool CompilerDriver::IsStringInit(uint32_t method_index, const DexFile* dex_file, int32_t* offset) {
   DexFileMethodInliner* inliner = GetMethodInlinerMap()->GetMethodInliner(dex_file);
-  size_t pointer_size = InstructionSetPointerSize(GetInstructionSet());
+  const PointerSize pointer_size = InstructionSetPointerSize(GetInstructionSet());
   *offset = inliner->GetOffsetForStringInit(method_index, pointer_size);
   return inliner->IsStringInitMethodIndex(method_index);
 }
 
+bool CompilerDriver::MayInlineInternal(const DexFile* inlined_from,
+                                       const DexFile* inlined_into) const {
+  // We're not allowed to inline across dex files if we're the no-inline-from dex file.
+  if (inlined_from != inlined_into &&
+      compiler_options_->GetNoInlineFromDexFile() != nullptr &&
+      ContainsElement(*compiler_options_->GetNoInlineFromDexFile(), inlined_from)) {
+    return false;
+  }
+
+  return true;
+}
+
+void CompilerDriver::InitializeThreadPools() {
+  size_t parallel_count = parallel_thread_count_ > 0 ? parallel_thread_count_ - 1 : 0;
+  parallel_thread_pool_.reset(
+      new ThreadPool("Compiler driver thread pool", parallel_count));
+  single_thread_pool_.reset(new ThreadPool("Single-threaded Compiler driver thread pool", 0));
+}
+
+void CompilerDriver::FreeThreadPools() {
+  parallel_thread_pool_.reset();
+  single_thread_pool_.reset();
+}
+
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 0dc8261..2dd4651 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -30,18 +30,17 @@
 #include "class_reference.h"
 #include "compiler.h"
 #include "dex_file.h"
+#include "driver/compiled_method_storage.h"
+#include "jit/offline_profiling_info.h"
 #include "invoke_type.h"
 #include "method_reference.h"
 #include "mirror/class.h"  // For mirror::Class::Status.
 #include "os.h"
-#include "profiler.h"
 #include "runtime.h"
 #include "safe_map.h"
 #include "thread_pool.h"
 #include "utils/array_ref.h"
-#include "utils/dedupe_set.h"
 #include "utils/dex_cache_arrays_layout.h"
-#include "utils/swap_space.h"
 
 namespace art {
 
@@ -53,6 +52,7 @@
 class MethodVerifier;
 }  // namespace verifier
 
+class BitVector;
 class CompiledClass;
 class CompiledMethod;
 class CompilerOptions;
@@ -60,7 +60,6 @@
 class DexFileToMethodInlinerMap;
 struct InlineIGetIPutData;
 class InstructionSetFeatures;
-class OatWriter;
 class ParallelCompilationManager;
 class ScopedObjectAccess;
 template <class Allocator> class SrcMap;
@@ -80,8 +79,6 @@
   kQuickAbi
 };
 
-static constexpr bool kUseMurmur3Hash = true;
-
 class CompilerDriver {
  public:
   // Create a compiler targeting the requested "instruction_set".
@@ -95,29 +92,44 @@
                  Compiler::Kind compiler_kind,
                  InstructionSet instruction_set,
                  const InstructionSetFeatures* instruction_set_features,
-                 bool image, std::unordered_set<std::string>* image_classes,
+                 bool boot_image,
+                 bool app_image,
+                 std::unordered_set<std::string>* image_classes,
                  std::unordered_set<std::string>* compiled_classes,
                  std::unordered_set<std::string>* compiled_methods,
-                 size_t thread_count, bool dump_stats, bool dump_passes,
-                 const std::string& dump_cfg_file_name, bool dump_cfg_append,
-                 CumulativeLogger* timer, int swap_fd,
-                 const std::string& profile_file);
+                 size_t thread_count,
+                 bool dump_stats,
+                 bool dump_passes,
+                 CumulativeLogger* timer,
+                 int swap_fd,
+                 const ProfileCompilationInfo* profile_compilation_info);
 
   ~CompilerDriver();
 
-  void CompileAll(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                  TimingLogger* timings)
-      REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
+  // Set dex files that will be stored in the oat file after being compiled.
+  void SetDexFilesForOatFile(const std::vector<const DexFile*>& dex_files) {
+    dex_files_for_oat_file_ = &dex_files;
+  }
 
-  CompiledMethod* CompileArtMethod(Thread* self, ArtMethod*)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!compiled_methods_lock_) WARN_UNUSED;
+  // Get dex file that will be stored in the oat file after being compiled.
+  ArrayRef<const DexFile* const> GetDexFilesForOatFile() const {
+    return (dex_files_for_oat_file_ != nullptr)
+        ? ArrayRef<const DexFile* const>(*dex_files_for_oat_file_)
+        : ArrayRef<const DexFile* const>();
+  }
+
+  void CompileAll(jobject class_loader,
+                  const std::vector<const DexFile*>& dex_files,
+                  TimingLogger* timings)
+      REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_, !dex_to_dex_references_lock_);
 
   // Compile a single Method.
   void CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings)
       SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!compiled_methods_lock_, !compiled_classes_lock_);
+      REQUIRES(!compiled_methods_lock_, !compiled_classes_lock_, !dex_to_dex_references_lock_);
 
   VerificationResults* GetVerificationResults() const {
+    DCHECK(Runtime::Current()->IsAotCompiler());
     return verification_results_;
   }
 
@@ -141,13 +153,9 @@
     return compiler_.get();
   }
 
-  bool ProfilePresent() const {
-    return profile_present_;
-  }
-
   // Are we compiling and creating an image file?
-  bool IsImage() const {
-    return image_;
+  bool IsBootImage() const {
+    return boot_image_;
   }
 
   const std::unordered_set<std::string>* GetImageClasses() const {
@@ -155,16 +163,11 @@
   }
 
   // Generate the trampolines that are invoked by unresolved direct methods.
-  const std::vector<uint8_t>* CreateJniDlsymLookup() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const std::vector<uint8_t>* CreateQuickGenericJniTrampoline() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const std::vector<uint8_t>* CreateQuickImtConflictTrampoline() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const std::vector<uint8_t>* CreateQuickResolutionTrampoline() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const std::vector<uint8_t>* CreateQuickToInterpreterBridge() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
+  std::unique_ptr<const std::vector<uint8_t>> CreateJniDlsymLookup() const;
+  std::unique_ptr<const std::vector<uint8_t>> CreateQuickGenericJniTrampoline() const;
+  std::unique_ptr<const std::vector<uint8_t>> CreateQuickImtConflictTrampoline() const;
+  std::unique_ptr<const std::vector<uint8_t>> CreateQuickResolutionTrampoline() const;
+  std::unique_ptr<const std::vector<uint8_t>> CreateQuickToInterpreterBridge() const;
 
   CompiledClass* GetCompiledClass(ClassReference ref) const
       REQUIRES(!compiled_classes_lock_);
@@ -182,31 +185,38 @@
   // Remove and delete a compiled method.
   void RemoveCompiledMethod(const MethodReference& method_ref) REQUIRES(!compiled_methods_lock_);
 
-  void AddRequiresConstructorBarrier(Thread* self, const DexFile* dex_file,
-                                     uint16_t class_def_index)
-      REQUIRES(!freezing_constructor_lock_);
-  bool RequiresConstructorBarrier(Thread* self, const DexFile* dex_file,
-                                  uint16_t class_def_index) const
-      REQUIRES(!freezing_constructor_lock_);
+  void SetRequiresConstructorBarrier(Thread* self,
+                                     const DexFile* dex_file,
+                                     uint16_t class_def_index,
+                                     bool requires)
+      REQUIRES(!requires_constructor_barrier_lock_);
+  bool RequiresConstructorBarrier(Thread* self,
+                                  const DexFile* dex_file,
+                                  uint16_t class_def_index)
+      REQUIRES(!requires_constructor_barrier_lock_);
 
   // Callbacks from compiler to see what runtime checks must be generated.
 
-  bool CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx);
+  bool CanAssumeTypeIsPresentInDexCache(Handle<mirror::DexCache> dex_cache,
+                                        uint32_t type_idx)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool CanAssumeStringIsPresentInDexCache(const DexFile& dex_file, uint32_t string_idx)
       REQUIRES(!Locks::mutator_lock_);
 
   // Are runtime access checks necessary in the compiled code?
-  bool CanAccessTypeWithoutChecks(uint32_t referrer_idx, const DexFile& dex_file,
-                                  uint32_t type_idx, bool* type_known_final = nullptr,
-                                  bool* type_known_abstract = nullptr,
-                                  bool* equals_referrers_class = nullptr)
-      REQUIRES(!Locks::mutator_lock_);
+  bool CanAccessTypeWithoutChecks(uint32_t referrer_idx,
+                                  Handle<mirror::DexCache> dex_cache,
+                                  uint32_t type_idx)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Are runtime access and instantiable checks necessary in the code?
-  bool CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx, const DexFile& dex_file,
-                                              uint32_t type_idx)
-      REQUIRES(!Locks::mutator_lock_);
+  // out_is_finalizable is set to whether the type is finalizable.
+  bool CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
+                                              Handle<mirror::DexCache> dex_cache,
+                                              uint32_t type_idx,
+                                              bool* out_is_finalizable)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx,
                           bool* is_type_initialized, bool* use_direct_type_ptr,
@@ -361,14 +371,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
 
-  // Can we fastpath static field access? Computes field's offset, volatility and whether the
-  // field is within the referrer (which can avoid checking class initialization).
-  bool ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put,
-                              MemberOffset* field_offset, uint32_t* storage_index,
-                              bool* is_referrers_class, bool* is_volatile, bool* is_initialized,
-                              Primitive::Type* type)
-      REQUIRES(!Locks::mutator_lock_);
-
   // Can we fastpath a interface, super class or virtual method call? Computes method's vtable
   // index.
   bool ComputeInvokeInfo(const DexCompilationUnit* mUnit, const uint32_t dex_pc,
@@ -388,16 +390,6 @@
     support_boot_image_fixup_ = support_boot_image_fixup;
   }
 
-  SwapAllocator<void>& GetSwapSpaceAllocator() {
-    return *swap_space_allocator_.get();
-  }
-
-  bool WriteElf(const std::string& android_root,
-                bool is_host,
-                const std::vector<const DexFile*>& dex_files,
-                OatWriter* oat_writer,
-                File* file);
-
   void SetCompilerContext(void* compiler_context) {
     compiler_context_ = compiler_context;
   }
@@ -407,7 +399,7 @@
   }
 
   size_t GetThreadCount() const {
-    return thread_count_;
+    return parallel_thread_count_;
   }
 
   bool GetDumpStats() const {
@@ -418,23 +410,15 @@
     return dump_passes_;
   }
 
-  const std::string& GetDumpCfgFileName() const {
-    return dump_cfg_file_name_;
-  }
-
-  bool GetDumpCfgAppend() const {
-    return dump_cfg_append_;
-  }
-
   CumulativeLogger* GetTimingsLogger() const {
     return timings_logger_;
   }
 
   void SetDedupeEnabled(bool dedupe_enabled) {
-    dedupe_enabled_ = dedupe_enabled;
+    compiled_method_storage_.SetDedupeEnabled(dedupe_enabled);
   }
   bool DedupeEnabled() const {
-    return dedupe_enabled_;
+    return compiled_method_storage_.DedupeEnabled();
   }
 
   // Checks if class specified by type_idx is one of the image_classes_
@@ -446,6 +430,14 @@
   // Checks whether the provided method should be compiled, i.e., is in method_to_compile_.
   bool IsMethodToCompile(const MethodReference& method_ref) const;
 
+  // Checks whether profile guided compilation is enabled and if the method should be compiled
+  // according to the profile file.
+  bool ShouldCompileBasedOnProfile(const MethodReference& method_ref) const;
+
+  // Checks whether profile guided verification is enabled and if the method should be verified
+  // according to the profile file.
+  bool ShouldVerifyClassBasedOnProfile(const DexFile& dex_file, uint16_t class_idx) const;
+
   void RecordClassStatus(ClassReference ref, mirror::Class::Status status)
       REQUIRES(!compiled_classes_lock_);
 
@@ -455,16 +447,6 @@
                                        uint16_t class_def_idx,
                                        const DexFile& dex_file) const;
 
-  SwapVector<uint8_t>* DeduplicateCode(const ArrayRef<const uint8_t>& code);
-  SwapSrcMap* DeduplicateSrcMappingTable(const ArrayRef<SrcMapElem>& src_map);
-  SwapVector<uint8_t>* DeduplicateMappingTable(const ArrayRef<const uint8_t>& code);
-  SwapVector<uint8_t>* DeduplicateVMapTable(const ArrayRef<const uint8_t>& code);
-  SwapVector<uint8_t>* DeduplicateGCMap(const ArrayRef<const uint8_t>& code);
-  SwapVector<uint8_t>* DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info);
-
-  // Should the compiler run on this method given profile information?
-  bool SkipCompilation(const std::string& method_name);
-
   // Get memory usage during compilation.
   std::string GetMemoryUsageString(bool extended) const;
 
@@ -479,6 +461,28 @@
     return compiler_kind_;
   }
 
+  CompiledMethodStorage* GetCompiledMethodStorage() {
+    return &compiled_method_storage_;
+  }
+
+  // Can we assume that the klass is loaded?
+  bool CanAssumeClassIsLoaded(mirror::Class* klass)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool MayInline(const DexFile* inlined_from, const DexFile* inlined_into) const {
+    if (!kIsTargetBuild) {
+      return MayInlineInternal(inlined_from, inlined_into);
+    }
+    return true;
+  }
+
+  void MarkForDexToDexCompilation(Thread* self, const MethodReference& method_ref)
+      REQUIRES(!dex_to_dex_references_lock_);
+
+  const BitVector* GetCurrentDexToDexMethods() const {
+    return current_dex_to_dex_methods_;
+  }
+
  private:
   // Return whether the declaring class of `resolved_member` is
   // available to `referrer_class` for read or write access using two
@@ -513,10 +517,6 @@
   bool CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class, mirror::Class* klass)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Can we assume that the klass is loaded?
-  bool CanAssumeClassIsLoaded(mirror::Class* klass)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics.
   // The only external contract is that unresolved method has flags 0 and resolved non-0.
   enum {
@@ -549,8 +549,9 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
-  void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                  ThreadPool* thread_pool, TimingLogger* timings)
+  void PreCompile(jobject class_loader,
+                  const std::vector<const DexFile*>& dex_files,
+                  TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
 
   void LoadImageClasses(TimingLogger* timings) REQUIRES(!Locks::mutator_lock_);
@@ -558,54 +559,72 @@
   // Attempt to resolve all type, methods, fields, and strings
   // referenced from code in the dex file following PathClassLoader
   // ordering semantics.
-  void Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-               ThreadPool* thread_pool, TimingLogger* timings)
+  void Resolve(jobject class_loader,
+               const std::vector<const DexFile*>& dex_files,
+               TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
-  void ResolveDexFile(jobject class_loader, const DexFile& dex_file,
+  void ResolveDexFile(jobject class_loader,
+                      const DexFile& dex_file,
                       const std::vector<const DexFile*>& dex_files,
-                      ThreadPool* thread_pool, TimingLogger* timings)
+                      ThreadPool* thread_pool,
+                      size_t thread_count,
+                      TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  void Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-              ThreadPool* thread_pool, TimingLogger* timings);
-  void VerifyDexFile(jobject class_loader, const DexFile& dex_file,
+  void Verify(jobject class_loader,
+              const std::vector<const DexFile*>& dex_files,
+              TimingLogger* timings);
+  void VerifyDexFile(jobject class_loader,
+                     const DexFile& dex_file,
                      const std::vector<const DexFile*>& dex_files,
-                     ThreadPool* thread_pool, TimingLogger* timings)
+                     ThreadPool* thread_pool,
+                     size_t thread_count,
+                     TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  void SetVerified(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                   ThreadPool* thread_pool, TimingLogger* timings);
-  void SetVerifiedDexFile(jobject class_loader, const DexFile& dex_file,
+  void SetVerified(jobject class_loader,
+                   const std::vector<const DexFile*>& dex_files,
+                   TimingLogger* timings);
+  void SetVerifiedDexFile(jobject class_loader,
+                          const DexFile& dex_file,
                           const std::vector<const DexFile*>& dex_files,
-                          ThreadPool* thread_pool, TimingLogger* timings)
+                          ThreadPool* thread_pool,
+                          size_t thread_count,
+                          TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  void InitializeClasses(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                         ThreadPool* thread_pool, TimingLogger* timings)
-      REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
-  void InitializeClasses(jobject class_loader, const DexFile& dex_file,
+  void InitializeClasses(jobject class_loader,
                          const std::vector<const DexFile*>& dex_files,
-                         ThreadPool* thread_pool, TimingLogger* timings)
+                         TimingLogger* timings)
+      REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
+  void InitializeClasses(jobject class_loader,
+                         const DexFile& dex_file,
+                         const std::vector<const DexFile*>& dex_files,
+                         TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
 
   void UpdateImageClasses(TimingLogger* timings) REQUIRES(!Locks::mutator_lock_);
   static void FindClinitImageClassesCallback(mirror::Object* object, void* arg)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-               ThreadPool* thread_pool, TimingLogger* timings);
-  void CompileDexFile(jobject class_loader, const DexFile& dex_file,
+  void Compile(jobject class_loader,
+               const std::vector<const DexFile*>& dex_files,
+               TimingLogger* timings) REQUIRES(!dex_to_dex_references_lock_);
+  void CompileDexFile(jobject class_loader,
+                      const DexFile& dex_file,
                       const std::vector<const DexFile*>& dex_files,
-                      ThreadPool* thread_pool, TimingLogger* timings)
+                      ThreadPool* thread_pool,
+                      size_t thread_count,
+                      TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  // Swap pool and allocator used for native allocations. May be file-backed. Needs to be first
-  // as other fields rely on this.
-  std::unique_ptr<SwapSpace> swap_space_;
-  std::unique_ptr<SwapAllocator<void> > swap_space_allocator_;
+  bool MayInlineInternal(const DexFile* inlined_from, const DexFile* inlined_into) const;
 
-  ProfileFile profile_file_;
-  bool profile_present_;
+  void InitializeThreadPools();
+  void FreeThreadPools();
+  void CheckThreadPools();
+
+  bool RequiresConstructorBarrier(const DexFile& dex_file, uint16_t class_def_idx) const;
 
   const CompilerOptions* const compiler_options_;
   VerificationResults* const verification_results_;
@@ -617,9 +636,11 @@
   const InstructionSet instruction_set_;
   const InstructionSetFeatures* const instruction_set_features_;
 
-  // All class references that require
-  mutable ReaderWriterMutex freezing_constructor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  std::set<ClassReference> freezing_constructor_classes_ GUARDED_BY(freezing_constructor_lock_);
+  // All class references that require constructor barriers. If the class reference is not in the
+  // set then the result has not yet been computed.
+  mutable ReaderWriterMutex requires_constructor_barrier_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  std::map<ClassReference, bool> requires_constructor_barrier_
+      GUARDED_BY(requires_constructor_barrier_lock_);
 
   typedef SafeMap<const ClassReference, CompiledClass*> ClassTable;
   // All class references that this compiler has compiled.
@@ -639,11 +660,11 @@
   // in the .oat_patches ELF section if requested in the compiler options.
   size_t non_relative_linker_patch_count_ GUARDED_BY(compiled_methods_lock_);
 
-  const bool image_;
+  const bool boot_image_;
+  const bool app_image_;
 
-  // If image_ is true, specifies the classes that will be included in
-  // the image. Note if image_classes_ is null, all classes are
-  // included in the image.
+  // If image_ is true, specifies the classes that will be included in the image.
+  // Note if image_classes_ is null, all classes are included in the image.
   std::unique_ptr<std::unordered_set<std::string>> image_classes_;
 
   // Specifies the classes that will be compiled. Note that if classes_to_compile_ is null,
@@ -658,16 +679,18 @@
 
   bool had_hard_verifier_failure_;
 
-  size_t thread_count_;
+  // A thread pool that can (potentially) run tasks in parallel.
+  std::unique_ptr<ThreadPool> parallel_thread_pool_;
+  size_t parallel_thread_count_;
+
+  // A thread pool that guarantees running single-threaded on the main thread.
+  std::unique_ptr<ThreadPool> single_thread_pool_;
 
   class AOTCompilationStats;
   std::unique_ptr<AOTCompilationStats> stats_;
 
-  bool dedupe_enabled_;
   bool dump_stats_;
   const bool dump_passes_;
-  const std::string dump_cfg_file_name_;
-  const bool dump_cfg_append_;
 
   CumulativeLogger* const timings_logger_;
 
@@ -678,93 +701,24 @@
 
   bool support_boot_image_fixup_;
 
-  // DeDuplication data structures, these own the corresponding byte arrays.
-  template <typename ContentType>
-  class DedupeHashFunc {
-   public:
-    size_t operator()(const ArrayRef<ContentType>& array) const {
-      const uint8_t* data = reinterpret_cast<const uint8_t*>(array.data());
-      static_assert(IsPowerOfTwo(sizeof(ContentType)),
-          "ContentType is not power of two, don't know whether array layout is as assumed");
-      uint32_t len = sizeof(ContentType) * array.size();
-      if (kUseMurmur3Hash) {
-        static constexpr uint32_t c1 = 0xcc9e2d51;
-        static constexpr uint32_t c2 = 0x1b873593;
-        static constexpr uint32_t r1 = 15;
-        static constexpr uint32_t r2 = 13;
-        static constexpr uint32_t m = 5;
-        static constexpr uint32_t n = 0xe6546b64;
+  // List of dex files that will be stored in the oat file.
+  const std::vector<const DexFile*>* dex_files_for_oat_file_;
 
-        uint32_t hash = 0;
+  CompiledMethodStorage compiled_method_storage_;
 
-        const int nblocks = len / 4;
-        typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
-        const unaligned_uint32_t *blocks = reinterpret_cast<const uint32_t*>(data);
-        int i;
-        for (i = 0; i < nblocks; i++) {
-          uint32_t k = blocks[i];
-          k *= c1;
-          k = (k << r1) | (k >> (32 - r1));
-          k *= c2;
+  // Info for profile guided compilation.
+  const ProfileCompilationInfo* const profile_compilation_info_;
 
-          hash ^= k;
-          hash = ((hash << r2) | (hash >> (32 - r2))) * m + n;
-        }
+  size_t max_arena_alloc_;
 
-        const uint8_t *tail = reinterpret_cast<const uint8_t*>(data + nblocks * 4);
-        uint32_t k1 = 0;
-
-        switch (len & 3) {
-          case 3:
-            k1 ^= tail[2] << 16;
-            FALLTHROUGH_INTENDED;
-          case 2:
-            k1 ^= tail[1] << 8;
-            FALLTHROUGH_INTENDED;
-          case 1:
-            k1 ^= tail[0];
-
-            k1 *= c1;
-            k1 = (k1 << r1) | (k1 >> (32 - r1));
-            k1 *= c2;
-            hash ^= k1;
-        }
-
-        hash ^= len;
-        hash ^= (hash >> 16);
-        hash *= 0x85ebca6b;
-        hash ^= (hash >> 13);
-        hash *= 0xc2b2ae35;
-        hash ^= (hash >> 16);
-
-        return hash;
-      } else {
-        size_t hash = 0x811c9dc5;
-        for (uint32_t i = 0; i < len; ++i) {
-          hash = (hash * 16777619) ^ data[i];
-        }
-        hash += hash << 13;
-        hash ^= hash >> 7;
-        hash += hash << 3;
-        hash ^= hash >> 17;
-        hash += hash << 5;
-        return hash;
-      }
-    }
-  };
-
-  DedupeSet<ArrayRef<const uint8_t>,
-            SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_code_;
-  DedupeSet<ArrayRef<SrcMapElem>,
-            SwapSrcMap, size_t, DedupeHashFunc<SrcMapElem>, 4> dedupe_src_mapping_table_;
-  DedupeSet<ArrayRef<const uint8_t>,
-            SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_mapping_table_;
-  DedupeSet<ArrayRef<const uint8_t>,
-            SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_vmap_table_;
-  DedupeSet<ArrayRef<const uint8_t>,
-            SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_gc_map_;
-  DedupeSet<ArrayRef<const uint8_t>,
-            SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_cfi_info_;
+  // Data for delaying dex-to-dex compilation.
+  Mutex dex_to_dex_references_lock_;
+  // In the first phase, dex_to_dex_references_ collects methods for dex-to-dex compilation.
+  class DexFileMethodSet;
+  std::vector<DexFileMethodSet> dex_to_dex_references_ GUARDED_BY(dex_to_dex_references_lock_);
+  // In the second phase, current_dex_to_dex_methods_ points to the BitVector with method
+  // indexes for dex-to-dex compilation in the current dex file.
+  const BitVector* current_dex_to_dex_methods_;
 
   friend class CompileClassVisitor;
   DISALLOW_COPY_AND_ASSIGN(CompilerDriver);
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 1107599..b9a5a78 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -31,6 +31,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "handle_scope-inl.h"
+#include "jit/offline_profiling_info.h"
 #include "scoped_thread_state_change.h"
 
 namespace art {
@@ -86,10 +87,7 @@
       mirror::Class* c = class_linker->FindClass(soa.Self(), descriptor, loader);
       CHECK(c != nullptr);
       const auto pointer_size = class_linker->GetImagePointerSize();
-      for (auto& m : c->GetDirectMethods(pointer_size)) {
-        MakeExecutable(&m);
-      }
-      for (auto& m : c->GetVirtualMethods(pointer_size)) {
+      for (auto& m : c->GetMethods(pointer_size)) {
         MakeExecutable(&m);
       }
     }
@@ -146,13 +144,10 @@
 }
 
 TEST_F(CompilerDriverTest, AbstractMethodErrorStub) {
-  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   jobject class_loader;
   {
     ScopedObjectAccess soa(Thread::Current());
-    CompileVirtualMethod(NullHandle<mirror::ClassLoader>(), "java.lang.Class", "isFinalizable",
-                         "()Z");
-    CompileDirectMethod(NullHandle<mirror::ClassLoader>(), "java.lang.Object", "<init>", "()V");
     class_loader = LoadDex("AbstractMethod");
   }
   ASSERT_TRUE(class_loader != nullptr);
@@ -192,7 +187,7 @@
 };
 
 TEST_F(CompilerDriverMethodsTest, Selection) {
-  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   Thread* self = Thread::Current();
   jobject class_loader;
   {
@@ -234,6 +229,92 @@
   EXPECT_TRUE(expected->empty());
 }
 
+class CompilerDriverProfileTest : public CompilerDriverTest {
+ protected:
+  ProfileCompilationInfo* GetProfileCompilationInfo() OVERRIDE {
+    ScopedObjectAccess soa(Thread::Current());
+    std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles("ProfileTestMultiDex");
+
+    ProfileCompilationInfo info;
+    for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
+      std::string key = ProfileCompilationInfo::GetProfileDexFileKey(dex_file->GetLocation());
+      profile_info_.AddMethodIndex(key, dex_file->GetLocationChecksum(), 1);
+      profile_info_.AddMethodIndex(key, dex_file->GetLocationChecksum(), 2);
+    }
+    return &profile_info_;
+  }
+
+  std::unordered_set<std::string> GetExpectedMethodsForClass(const std::string& clazz) {
+    if (clazz == "Main") {
+      return std::unordered_set<std::string>({
+          "java.lang.String Main.getA()",
+          "java.lang.String Main.getB()"});
+    } else if (clazz == "Second") {
+      return std::unordered_set<std::string>({
+          "java.lang.String Second.getX()",
+          "java.lang.String Second.getY()"});
+    } else {
+      return std::unordered_set<std::string>();
+    }
+  }
+
+  void CheckCompiledMethods(jobject class_loader,
+                            const std::string& clazz,
+                            const std::unordered_set<std::string>& expected_methods) {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ClassLoader> h_loader(hs.NewHandle(
+        reinterpret_cast<mirror::ClassLoader*>(self->DecodeJObject(class_loader))));
+    mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader);
+    ASSERT_NE(klass, nullptr);
+
+    const auto pointer_size = class_linker->GetImagePointerSize();
+    size_t number_of_compiled_methods = 0;
+    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+      std::string name = PrettyMethod(&m, true);
+      const void* code = m.GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
+      ASSERT_NE(code, nullptr);
+      if (expected_methods.find(name) != expected_methods.end()) {
+        number_of_compiled_methods++;
+        EXPECT_FALSE(class_linker->IsQuickToInterpreterBridge(code));
+      } else {
+        EXPECT_TRUE(class_linker->IsQuickToInterpreterBridge(code));
+      }
+    }
+    EXPECT_EQ(expected_methods.size(), number_of_compiled_methods);
+  }
+
+ private:
+  ProfileCompilationInfo profile_info_;
+};
+
+TEST_F(CompilerDriverProfileTest, ProfileGuidedCompilation) {
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
+  Thread* self = Thread::Current();
+  jobject class_loader;
+  {
+    ScopedObjectAccess soa(self);
+    class_loader = LoadDex("ProfileTestMultiDex");
+  }
+  ASSERT_NE(class_loader, nullptr);
+
+  // Need to enable dex-file writability. Methods rejected to be compiled will run through the
+  // dex-to-dex compiler.
+  ProfileCompilationInfo info;
+  for (const DexFile* dex_file : GetDexFiles(class_loader)) {
+    ASSERT_TRUE(dex_file->EnableWrite());
+  }
+
+  CompileAll(class_loader);
+
+  std::unordered_set<std::string> m = GetExpectedMethodsForClass("Main");
+  std::unordered_set<std::string> s = GetExpectedMethodsForClass("Second");
+  CheckCompiledMethods(class_loader, "LMain;", m);
+  CheckCompiledMethods(class_loader, "LSecond;", s);
+}
+
 // TODO: need check-cast test (when stub complete & we can throw/catch
 
 }  // namespace art
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 3f5a1ea..30ba8c9 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -16,31 +16,37 @@
 
 #include "compiler_options.h"
 
-#include "dex/pass_manager.h"
+#include <fstream>
 
 namespace art {
 
 CompilerOptions::CompilerOptions()
-    : compiler_filter_(kDefaultCompilerFilter),
+    : compiler_filter_(CompilerFilter::kDefaultCompilerFilter),
       huge_method_threshold_(kDefaultHugeMethodThreshold),
       large_method_threshold_(kDefaultLargeMethodThreshold),
       small_method_threshold_(kDefaultSmallMethodThreshold),
       tiny_method_threshold_(kDefaultTinyMethodThreshold),
       num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
-      inline_depth_limit_(kDefaultInlineDepthLimit),
-      inline_max_code_units_(kDefaultInlineMaxCodeUnits),
+      inline_depth_limit_(kUnsetInlineDepthLimit),
+      inline_max_code_units_(kUnsetInlineMaxCodeUnits),
+      no_inline_from_(nullptr),
       include_patch_information_(kDefaultIncludePatchInformation),
       top_k_profile_threshold_(kDefaultTopKProfileThreshold),
       debuggable_(false),
       generate_debug_info_(kDefaultGenerateDebugInfo),
+      generate_mini_debug_info_(kDefaultGenerateMiniDebugInfo),
       implicit_null_checks_(true),
       implicit_so_checks_(true),
       implicit_suspend_checks_(false),
       compile_pic_(false),
       verbose_methods_(nullptr),
-      pass_manager_options_(new PassManagerOptions),
       abort_on_hard_verifier_failure_(false),
-      init_failure_output_(nullptr) {
+      init_failure_output_(nullptr),
+      dump_cfg_file_name_(""),
+      dump_cfg_append_(false),
+      force_determinism_(false),
+      register_allocation_strategy_(RegisterAllocator::kRegisterAllocatorDefault),
+      passes_to_run_(nullptr) {
 }
 
 CompilerOptions::~CompilerOptions() {
@@ -48,7 +54,7 @@
   // because we don't want to include the PassManagerOptions definition from the header file.
 }
 
-CompilerOptions::CompilerOptions(CompilerFilter compiler_filter,
+CompilerOptions::CompilerOptions(CompilerFilter::Filter compiler_filter,
                                  size_t huge_method_threshold,
                                  size_t large_method_threshold,
                                  size_t small_method_threshold,
@@ -56,6 +62,7 @@
                                  size_t num_dex_methods_threshold,
                                  size_t inline_depth_limit,
                                  size_t inline_max_code_units,
+                                 const std::vector<const DexFile*>* no_inline_from,
                                  bool include_patch_information,
                                  double top_k_profile_threshold,
                                  bool debuggable,
@@ -65,9 +72,13 @@
                                  bool implicit_suspend_checks,
                                  bool compile_pic,
                                  const std::vector<std::string>* verbose_methods,
-                                 PassManagerOptions* pass_manager_options,
                                  std::ostream* init_failure_output,
-                                 bool abort_on_hard_verifier_failure
+                                 bool abort_on_hard_verifier_failure,
+                                 const std::string& dump_cfg_file_name,
+                                 bool dump_cfg_append,
+                                 bool force_determinism,
+                                 RegisterAllocator::Strategy regalloc_strategy,
+                                 const std::vector<std::string>* passes_to_run
                                  ) :  // NOLINT(whitespace/parens)
     compiler_filter_(compiler_filter),
     huge_method_threshold_(huge_method_threshold),
@@ -77,18 +88,134 @@
     num_dex_methods_threshold_(num_dex_methods_threshold),
     inline_depth_limit_(inline_depth_limit),
     inline_max_code_units_(inline_max_code_units),
+    no_inline_from_(no_inline_from),
     include_patch_information_(include_patch_information),
     top_k_profile_threshold_(top_k_profile_threshold),
     debuggable_(debuggable),
     generate_debug_info_(generate_debug_info),
+    generate_mini_debug_info_(kDefaultGenerateMiniDebugInfo),
     implicit_null_checks_(implicit_null_checks),
     implicit_so_checks_(implicit_so_checks),
     implicit_suspend_checks_(implicit_suspend_checks),
     compile_pic_(compile_pic),
     verbose_methods_(verbose_methods),
-    pass_manager_options_(pass_manager_options),
     abort_on_hard_verifier_failure_(abort_on_hard_verifier_failure),
-    init_failure_output_(init_failure_output) {
+    init_failure_output_(init_failure_output),
+    dump_cfg_file_name_(dump_cfg_file_name),
+    dump_cfg_append_(dump_cfg_append),
+    force_determinism_(force_determinism),
+    register_allocation_strategy_(regalloc_strategy),
+    passes_to_run_(passes_to_run) {
+}
+
+void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) {
+  ParseUintOption(option, "--huge-method-max", &huge_method_threshold_, Usage);
+}
+
+void CompilerOptions::ParseLargeMethodMax(const StringPiece& option, UsageFn Usage) {
+  ParseUintOption(option, "--large-method-max", &large_method_threshold_, Usage);
+}
+
+void CompilerOptions::ParseSmallMethodMax(const StringPiece& option, UsageFn Usage) {
+  ParseUintOption(option, "--small-method-max", &small_method_threshold_, Usage);
+}
+
+void CompilerOptions::ParseTinyMethodMax(const StringPiece& option, UsageFn Usage) {
+  ParseUintOption(option, "--tiny-method-max", &tiny_method_threshold_, Usage);
+}
+
+void CompilerOptions::ParseNumDexMethods(const StringPiece& option, UsageFn Usage) {
+  ParseUintOption(option, "--num-dex-methods", &num_dex_methods_threshold_, Usage);
+}
+
+void CompilerOptions::ParseInlineDepthLimit(const StringPiece& option, UsageFn Usage) {
+  ParseUintOption(option, "--inline-depth-limit", &inline_depth_limit_, Usage);
+}
+
+void CompilerOptions::ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage) {
+  ParseUintOption(option, "--inline-max-code-units", &inline_max_code_units_, Usage);
+}
+
+void CompilerOptions::ParseDumpInitFailures(const StringPiece& option,
+                                            UsageFn Usage ATTRIBUTE_UNUSED) {
+  DCHECK(option.starts_with("--dump-init-failures="));
+  std::string file_name = option.substr(strlen("--dump-init-failures=")).data();
+  init_failure_output_.reset(new std::ofstream(file_name));
+  if (init_failure_output_.get() == nullptr) {
+    LOG(ERROR) << "Failed to allocate ofstream";
+  } else if (init_failure_output_->fail()) {
+    LOG(ERROR) << "Failed to open " << file_name << " for writing the initialization "
+               << "failures.";
+    init_failure_output_.reset();
+  }
+}
+
+void CompilerOptions::ParseRegisterAllocationStrategy(const StringPiece& option,
+                                                      UsageFn Usage) {
+  DCHECK(option.starts_with("--register-allocation-strategy="));
+  StringPiece choice = option.substr(strlen("--register-allocation-strategy=")).data();
+  if (choice == "linear-scan") {
+    register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorLinearScan;
+  } else if (choice == "graph-color") {
+    register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorGraphColor;
+  } else {
+    Usage("Unrecognized register allocation strategy. Try linear-scan, or graph-color.");
+  }
+}
+
+bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usage) {
+  if (option.starts_with("--compiler-filter=")) {
+    const char* compiler_filter_string = option.substr(strlen("--compiler-filter=")).data();
+    if (!CompilerFilter::ParseCompilerFilter(compiler_filter_string, &compiler_filter_)) {
+      Usage("Unknown --compiler-filter value %s", compiler_filter_string);
+    }
+  } else if (option == "--compile-pic") {
+    compile_pic_ = true;
+  } else if (option.starts_with("--huge-method-max=")) {
+    ParseHugeMethodMax(option, Usage);
+  } else if (option.starts_with("--large-method-max=")) {
+    ParseLargeMethodMax(option, Usage);
+  } else if (option.starts_with("--small-method-max=")) {
+    ParseSmallMethodMax(option, Usage);
+  } else if (option.starts_with("--tiny-method-max=")) {
+    ParseTinyMethodMax(option, Usage);
+  } else if (option.starts_with("--num-dex-methods=")) {
+    ParseNumDexMethods(option, Usage);
+  } else if (option.starts_with("--inline-depth-limit=")) {
+    ParseInlineDepthLimit(option, Usage);
+  } else if (option.starts_with("--inline-max-code-units=")) {
+    ParseInlineMaxCodeUnits(option, Usage);
+  } else if (option == "--generate-debug-info" || option == "-g") {
+    generate_debug_info_ = true;
+  } else if (option == "--no-generate-debug-info") {
+    generate_debug_info_ = false;
+  } else if (option == "--generate-mini-debug-info") {
+    generate_mini_debug_info_ = true;
+  } else if (option == "--no-generate-mini-debug-info") {
+    generate_mini_debug_info_ = false;
+  } else if (option == "--debuggable") {
+    debuggable_ = true;
+  } else if (option.starts_with("--top-k-profile-threshold=")) {
+    ParseDouble(option.data(), '=', 0.0, 100.0, &top_k_profile_threshold_, Usage);
+  } else if (option == "--include-patch-information") {
+    include_patch_information_ = true;
+  } else if (option == "--no-include-patch-information") {
+    include_patch_information_ = false;
+  } else if (option == "--abort-on-hard-verifier-error") {
+    abort_on_hard_verifier_failure_ = true;
+  } else if (option.starts_with("--dump-init-failures=")) {
+    ParseDumpInitFailures(option, Usage);
+  } else if (option.starts_with("--dump-cfg=")) {
+    dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data();
+  } else if (option.starts_with("--dump-cfg-append")) {
+    dump_cfg_append_ = true;
+  } else if (option.starts_with("--register-allocation-strategy=")) {
+    ParseRegisterAllocationStrategy(option, Usage);
+  } else {
+    // Option not recognized.
+    return false;
+  }
+  return true;
 }
 
 }  // namespace art
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 18f215d..abc58d7 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -22,37 +22,29 @@
 #include <vector>
 
 #include "base/macros.h"
+#include "compiler_filter.h"
 #include "globals.h"
+#include "optimizing/register_allocator.h"
+#include "utils.h"
 
 namespace art {
 
-class PassManagerOptions;
-
 class CompilerOptions FINAL {
  public:
-  enum CompilerFilter {
-    kVerifyNone,          // Skip verification and compile nothing except JNI stubs.
-    kInterpretOnly,       // Verify, and compile only JNI stubs.
-    kVerifyAtRuntime,     // Only compile JNI stubs and verify at runtime.
-    kSpace,               // Maximize space savings.
-    kBalanced,            // Try to get the best performance return on compilation investment.
-    kSpeed,               // Maximize runtime performance.
-    kEverything,          // Force compilation of everything capable of being compiled.
-    kTime,                // Compile methods, but minimize compilation time.
-  };
-
   // Guide heuristics to determine whether to compile method if profile data not available.
-  static const CompilerFilter kDefaultCompilerFilter = kSpeed;
   static const size_t kDefaultHugeMethodThreshold = 10000;
   static const size_t kDefaultLargeMethodThreshold = 600;
   static const size_t kDefaultSmallMethodThreshold = 60;
   static const size_t kDefaultTinyMethodThreshold = 20;
   static const size_t kDefaultNumDexMethodsThreshold = 900;
   static constexpr double kDefaultTopKProfileThreshold = 90.0;
-  static const bool kDefaultGenerateDebugInfo = kIsDebugBuild;
+  static const bool kDefaultGenerateDebugInfo = false;
+  static const bool kDefaultGenerateMiniDebugInfo = false;
   static const bool kDefaultIncludePatchInformation = false;
   static const size_t kDefaultInlineDepthLimit = 3;
-  static const size_t kDefaultInlineMaxCodeUnits = 20;
+  static const size_t kDefaultInlineMaxCodeUnits = 32;
+  static constexpr size_t kUnsetInlineDepthLimit = -1;
+  static constexpr size_t kUnsetInlineMaxCodeUnits = -1;
 
   // Default inlining settings when the space filter is used.
   static constexpr size_t kSpaceFilterInlineDepthLimit = 3;
@@ -61,7 +53,7 @@
   CompilerOptions();
   ~CompilerOptions();
 
-  CompilerOptions(CompilerFilter compiler_filter,
+  CompilerOptions(CompilerFilter::Filter compiler_filter,
                   size_t huge_method_threshold,
                   size_t large_method_threshold,
                   size_t small_method_threshold,
@@ -69,6 +61,7 @@
                   size_t num_dex_methods_threshold,
                   size_t inline_depth_limit,
                   size_t inline_max_code_units,
+                  const std::vector<const DexFile*>* no_inline_from,
                   bool include_patch_information,
                   double top_k_profile_threshold,
                   bool debuggable,
@@ -78,35 +71,44 @@
                   bool implicit_suspend_checks,
                   bool compile_pic,
                   const std::vector<std::string>* verbose_methods,
-                  PassManagerOptions* pass_manager_options,
                   std::ostream* init_failure_output,
-                  bool abort_on_hard_verifier_failure);
+                  bool abort_on_hard_verifier_failure,
+                  const std::string& dump_cfg_file_name,
+                  bool dump_cfg_append,
+                  bool force_determinism,
+                  RegisterAllocator::Strategy regalloc_strategy,
+                  const std::vector<std::string>* passes_to_run);
 
-  CompilerFilter GetCompilerFilter() const {
+  CompilerFilter::Filter GetCompilerFilter() const {
     return compiler_filter_;
   }
 
-  void SetCompilerFilter(CompilerFilter compiler_filter) {
+  void SetCompilerFilter(CompilerFilter::Filter compiler_filter) {
     compiler_filter_ = compiler_filter;
   }
 
   bool VerifyAtRuntime() const {
-    return compiler_filter_ == CompilerOptions::kVerifyAtRuntime;
+    return compiler_filter_ == CompilerFilter::kVerifyAtRuntime;
   }
 
-  bool IsCompilationEnabled() const {
-    return compiler_filter_ != CompilerOptions::kVerifyNone &&
-        compiler_filter_ != CompilerOptions::kInterpretOnly &&
-        compiler_filter_ != CompilerOptions::kVerifyAtRuntime;
+  bool IsBytecodeCompilationEnabled() const {
+    return CompilerFilter::IsBytecodeCompilationEnabled(compiler_filter_);
+  }
+
+  bool IsJniCompilationEnabled() const {
+    return CompilerFilter::IsJniCompilationEnabled(compiler_filter_);
   }
 
   bool IsVerificationEnabled() const {
-    return compiler_filter_ != CompilerOptions::kVerifyNone &&
-        compiler_filter_ != CompilerOptions::kVerifyAtRuntime;
+    return CompilerFilter::IsVerificationEnabled(compiler_filter_);
   }
 
   bool NeverVerify() const {
-    return compiler_filter_ == CompilerOptions::kVerifyNone;
+    return compiler_filter_ == CompilerFilter::kVerifyNone;
+  }
+
+  bool VerifyOnlyProfile() const {
+    return compiler_filter_ == CompilerFilter::kVerifyProfile;
   }
 
   size_t GetHugeMethodThreshold() const {
@@ -148,10 +150,16 @@
   size_t GetInlineDepthLimit() const {
     return inline_depth_limit_;
   }
+  void SetInlineDepthLimit(size_t limit) {
+    inline_depth_limit_ = limit;
+  }
 
   size_t GetInlineMaxCodeUnits() const {
     return inline_max_code_units_;
   }
+  void SetInlineMaxCodeUnits(size_t units) {
+    inline_max_code_units_ = units;
+  }
 
   double GetTopKProfileThreshold() const {
     return top_k_profile_threshold_;
@@ -161,10 +169,24 @@
     return debuggable_;
   }
 
+  bool GetNativeDebuggable() const {
+    return GetDebuggable() && GetGenerateDebugInfo();
+  }
+
+  // This flag controls whether the compiler collects debugging information.
+  // The other flags control how the information is written to disk.
+  bool GenerateAnyDebugInfo() const {
+    return GetGenerateDebugInfo() || GetGenerateMiniDebugInfo();
+  }
+
   bool GetGenerateDebugInfo() const {
     return generate_debug_info_;
   }
 
+  bool GetGenerateMiniDebugInfo() const {
+    return generate_mini_debug_info_;
+  }
+
   bool GetImplicitNullChecks() const {
     return implicit_null_checks_;
   }
@@ -200,51 +222,107 @@
   }
 
   std::ostream* GetInitFailureOutput() const {
-    return init_failure_output_;
-  }
-
-  const PassManagerOptions* GetPassManagerOptions() const {
-    return pass_manager_options_.get();
+    return init_failure_output_.get();
   }
 
   bool AbortOnHardVerifierFailure() const {
     return abort_on_hard_verifier_failure_;
   }
 
+  const std::vector<const DexFile*>* GetNoInlineFromDexFile() const {
+    return no_inline_from_;
+  }
+
+  bool ParseCompilerOption(const StringPiece& option, UsageFn Usage);
+
+  const std::string& GetDumpCfgFileName() const {
+    return dump_cfg_file_name_;
+  }
+
+  bool GetDumpCfgAppend() const {
+    return dump_cfg_append_;
+  }
+
+  bool IsForceDeterminism() const {
+    return force_determinism_;
+  }
+
+  RegisterAllocator::Strategy GetRegisterAllocationStrategy() const {
+    return register_allocation_strategy_;
+  }
+
+  const std::vector<std::string>* GetPassesToRun() const {
+    return passes_to_run_;
+  }
+
  private:
-  CompilerFilter compiler_filter_;
-  const size_t huge_method_threshold_;
-  const size_t large_method_threshold_;
-  const size_t small_method_threshold_;
-  const size_t tiny_method_threshold_;
-  const size_t num_dex_methods_threshold_;
-  const size_t inline_depth_limit_;
-  const size_t inline_max_code_units_;
-  const bool include_patch_information_;
+  void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage);
+  void ParseDumpCfgPasses(const StringPiece& option, UsageFn Usage);
+  void ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage);
+  void ParseInlineDepthLimit(const StringPiece& option, UsageFn Usage);
+  void ParseNumDexMethods(const StringPiece& option, UsageFn Usage);
+  void ParseTinyMethodMax(const StringPiece& option, UsageFn Usage);
+  void ParseSmallMethodMax(const StringPiece& option, UsageFn Usage);
+  void ParseLargeMethodMax(const StringPiece& option, UsageFn Usage);
+  void ParseHugeMethodMax(const StringPiece& option, UsageFn Usage);
+  void ParseRegisterAllocationStrategy(const StringPiece& option, UsageFn Usage);
+
+  CompilerFilter::Filter compiler_filter_;
+  size_t huge_method_threshold_;
+  size_t large_method_threshold_;
+  size_t small_method_threshold_;
+  size_t tiny_method_threshold_;
+  size_t num_dex_methods_threshold_;
+  size_t inline_depth_limit_;
+  size_t inline_max_code_units_;
+
+  // Dex files from which we should not inline code.
+  // This is usually a very short list (i.e. a single dex file), so we
+  // prefer vector<> over a lookup-oriented container, such as set<>.
+  const std::vector<const DexFile*>* no_inline_from_;
+
+  bool include_patch_information_;
   // When using a profile file only the top K% of the profiled samples will be compiled.
-  const double top_k_profile_threshold_;
-  const bool debuggable_;
-  const bool generate_debug_info_;
-  const bool implicit_null_checks_;
-  const bool implicit_so_checks_;
-  const bool implicit_suspend_checks_;
-  const bool compile_pic_;
+  double top_k_profile_threshold_;
+  bool debuggable_;
+  bool generate_debug_info_;
+  bool generate_mini_debug_info_;
+  bool implicit_null_checks_;
+  bool implicit_so_checks_;
+  bool implicit_suspend_checks_;
+  bool compile_pic_;
 
   // Vector of methods to have verbose output enabled for.
-  const std::vector<std::string>* const verbose_methods_;
-
-  std::unique_ptr<PassManagerOptions> pass_manager_options_;
+  const std::vector<std::string>* verbose_methods_;
 
   // Abort compilation with an error if we find a class that fails verification with a hard
   // failure.
-  const bool abort_on_hard_verifier_failure_;
+  bool abort_on_hard_verifier_failure_;
 
   // Log initialization of initialization failures to this stream if not null.
-  std::ostream* const init_failure_output_;
+  std::unique_ptr<std::ostream> init_failure_output_;
+
+  std::string dump_cfg_file_name_;
+  bool dump_cfg_append_;
+
+  // Whether the compiler should trade performance for determinism to guarantee exactly reproducable
+  // outcomes.
+  bool force_determinism_;
+
+  RegisterAllocator::Strategy register_allocation_strategy_;
+
+  // If not null, specifies optimization passes which will be run instead of defaults.
+  // Note that passes_to_run_ is not checked for correctness and providing an incorrect
+  // list of passes can lead to unexpected compiler behaviour. This is caused by dependencies
+  // between passes. Failing to satisfy them can for example lead to compiler crashes.
+  // Passing pass names which are not recognized by the compiler will result in
+  // compiler-dependant behavior.
+  const std::vector<std::string>* passes_to_run_;
+
+  friend class Dex2Oat;
 
   DISALLOW_COPY_AND_ASSIGN(CompilerOptions);
 };
-std::ostream& operator<<(std::ostream& os, const CompilerOptions::CompilerFilter& rhs);
 
 }  // namespace art
 
diff --git a/compiler/driver/dex_compilation_unit.cc b/compiler/driver/dex_compilation_unit.cc
index cfaa01b..b0ee448 100644
--- a/compiler/driver/dex_compilation_unit.cc
+++ b/compiler/driver/dex_compilation_unit.cc
@@ -17,14 +17,12 @@
 #include "dex_compilation_unit.h"
 
 #include "base/stringprintf.h"
-#include "dex/compiler_ir.h"
 #include "mirror/dex_cache.h"
 #include "utils.h"
 
 namespace art {
 
-DexCompilationUnit::DexCompilationUnit(CompilationUnit* cu,
-                                       jobject class_loader,
+DexCompilationUnit::DexCompilationUnit(jobject class_loader,
                                        ClassLinker* class_linker,
                                        const DexFile& dex_file,
                                        const DexFile::CodeItem* code_item,
@@ -33,8 +31,7 @@
                                        uint32_t access_flags,
                                        const VerifiedMethod* verified_method,
                                        Handle<mirror::DexCache> dex_cache)
-    : cu_(cu),
-      class_loader_(class_loader),
+    : class_loader_(class_loader),
       class_linker_(class_linker),
       dex_file_(&dex_file),
       code_item_(code_item),
diff --git a/compiler/driver/dex_compilation_unit.h b/compiler/driver/dex_compilation_unit.h
index 16872f4..854927d 100644
--- a/compiler/driver/dex_compilation_unit.h
+++ b/compiler/driver/dex_compilation_unit.h
@@ -30,15 +30,11 @@
 class DexCache;
 }  // namespace mirror
 class ClassLinker;
-struct CompilationUnit;
 class VerifiedMethod;
 
 class DexCompilationUnit : public DeletableArenaObject<kArenaAllocMisc> {
  public:
-  explicit DexCompilationUnit(CompilationUnit* cu);
-
-  DexCompilationUnit(CompilationUnit* cu,
-                     jobject class_loader,
+  DexCompilationUnit(jobject class_loader,
                      ClassLinker* class_linker,
                      const DexFile& dex_file,
                      const DexFile::CodeItem* code_item,
@@ -48,10 +44,6 @@
                      const VerifiedMethod* verified_method,
                      Handle<mirror::DexCache> dex_cache);
 
-  CompilationUnit* GetCompilationUnit() const {
-    return cu_;
-  }
-
   jobject GetClassLoader() const {
     return class_loader_;
   }
@@ -121,8 +113,6 @@
   }
 
  private:
-  CompilationUnit* const cu_;
-
   const jobject class_loader_;
 
   ClassLinker* const class_linker_;
diff --git a/compiler/dwarf/debug_frame_opcode_writer.h b/compiler/dwarf/debug_frame_opcode_writer.h
deleted file mode 100644
index 60241f7..0000000
--- a/compiler/dwarf/debug_frame_opcode_writer.h
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_
-#define ART_COMPILER_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_
-
-#include "base/bit_utils.h"
-#include "dwarf/dwarf_constants.h"
-#include "dwarf/register.h"
-#include "dwarf/writer.h"
-
-namespace art {
-namespace dwarf {
-
-// Writer for .debug_frame opcodes (DWARF-3).
-// See the DWARF specification for the precise meaning of the opcodes.
-// The writer is very light-weight, however it will do the following for you:
-//  * Choose the most compact encoding of a given opcode.
-//  * Keep track of current state and convert absolute values to deltas.
-//  * Divide by header-defined factors as appropriate.
-template<typename Vector = std::vector<uint8_t> >
-class DebugFrameOpCodeWriter : private Writer<Vector> {
-  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
-
- public:
-  // To save space, DWARF divides most offsets by header-defined factors.
-  // They are used in integer divisions, so we make them constants.
-  // We usually subtract from stack base pointer, so making the factor
-  // negative makes the encoded values positive and thus easier to encode.
-  static constexpr int kDataAlignmentFactor = -4;
-  static constexpr int kCodeAlignmentFactor = 1;
-
-  // Explicitely advance the program counter to given location.
-  void ALWAYS_INLINE AdvancePC(int absolute_pc) {
-    DCHECK_GE(absolute_pc, current_pc_);
-    if (UNLIKELY(enabled_)) {
-      int delta = FactorCodeOffset(absolute_pc - current_pc_);
-      if (delta != 0) {
-        if (delta <= 0x3F) {
-          this->PushUint8(DW_CFA_advance_loc | delta);
-        } else if (delta <= UINT8_MAX) {
-          this->PushUint8(DW_CFA_advance_loc1);
-          this->PushUint8(delta);
-        } else if (delta <= UINT16_MAX) {
-          this->PushUint8(DW_CFA_advance_loc2);
-          this->PushUint16(delta);
-        } else {
-          this->PushUint8(DW_CFA_advance_loc4);
-          this->PushUint32(delta);
-        }
-      }
-      current_pc_ = absolute_pc;
-    }
-  }
-
-  // Override this method to automatically advance the PC before each opcode.
-  virtual void ImplicitlyAdvancePC() { }
-
-  // Common alias in assemblers - spill relative to current stack pointer.
-  void ALWAYS_INLINE RelOffset(Reg reg, int offset) {
-    Offset(reg, offset - current_cfa_offset_);
-  }
-
-  // Common alias in assemblers - increase stack frame size.
-  void ALWAYS_INLINE AdjustCFAOffset(int delta) {
-    DefCFAOffset(current_cfa_offset_ + delta);
-  }
-
-  // Custom alias - spill many registers based on bitmask.
-  void ALWAYS_INLINE RelOffsetForMany(Reg reg_base, int offset,
-                                      uint32_t reg_mask, int reg_size) {
-    DCHECK(reg_size == 4 || reg_size == 8);
-    if (UNLIKELY(enabled_)) {
-      for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) {
-        // Skip zero bits and go to the set bit.
-        int num_zeros = CTZ(reg_mask);
-        i += num_zeros;
-        reg_mask >>= num_zeros;
-        RelOffset(Reg(reg_base.num() + i), offset);
-        offset += reg_size;
-      }
-    }
-  }
-
-  // Custom alias - unspill many registers based on bitmask.
-  void ALWAYS_INLINE RestoreMany(Reg reg_base, uint32_t reg_mask) {
-    if (UNLIKELY(enabled_)) {
-      for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) {
-        // Skip zero bits and go to the set bit.
-        int num_zeros = CTZ(reg_mask);
-        i += num_zeros;
-        reg_mask >>= num_zeros;
-        Restore(Reg(reg_base.num() + i));
-      }
-    }
-  }
-
-  void ALWAYS_INLINE Nop() {
-    if (UNLIKELY(enabled_)) {
-      this->PushUint8(DW_CFA_nop);
-    }
-  }
-
-  void ALWAYS_INLINE Offset(Reg reg, int offset) {
-    if (UNLIKELY(enabled_)) {
-      ImplicitlyAdvancePC();
-      int factored_offset = FactorDataOffset(offset);  // May change sign.
-      if (factored_offset >= 0) {
-        if (0 <= reg.num() && reg.num() <= 0x3F) {
-          this->PushUint8(DW_CFA_offset | reg.num());
-          this->PushUleb128(factored_offset);
-        } else {
-          this->PushUint8(DW_CFA_offset_extended);
-          this->PushUleb128(reg.num());
-          this->PushUleb128(factored_offset);
-        }
-      } else {
-        uses_dwarf3_features_ = true;
-        this->PushUint8(DW_CFA_offset_extended_sf);
-        this->PushUleb128(reg.num());
-        this->PushSleb128(factored_offset);
-      }
-    }
-  }
-
-  void ALWAYS_INLINE Restore(Reg reg) {
-    if (UNLIKELY(enabled_)) {
-      ImplicitlyAdvancePC();
-      if (0 <= reg.num() && reg.num() <= 0x3F) {
-        this->PushUint8(DW_CFA_restore | reg.num());
-      } else {
-        this->PushUint8(DW_CFA_restore_extended);
-        this->PushUleb128(reg.num());
-      }
-    }
-  }
-
-  void ALWAYS_INLINE Undefined(Reg reg) {
-    if (UNLIKELY(enabled_)) {
-      ImplicitlyAdvancePC();
-      this->PushUint8(DW_CFA_undefined);
-      this->PushUleb128(reg.num());
-    }
-  }
-
-  void ALWAYS_INLINE SameValue(Reg reg) {
-    if (UNLIKELY(enabled_)) {
-      ImplicitlyAdvancePC();
-      this->PushUint8(DW_CFA_same_value);
-      this->PushUleb128(reg.num());
-    }
-  }
-
-  // The previous value of "reg" is stored in register "new_reg".
-  void ALWAYS_INLINE Register(Reg reg, Reg new_reg) {
-    if (UNLIKELY(enabled_)) {
-      ImplicitlyAdvancePC();
-      this->PushUint8(DW_CFA_register);
-      this->PushUleb128(reg.num());
-      this->PushUleb128(new_reg.num());
-    }
-  }
-
-  void ALWAYS_INLINE RememberState() {
-    if (UNLIKELY(enabled_)) {
-      ImplicitlyAdvancePC();
-      this->PushUint8(DW_CFA_remember_state);
-    }
-  }
-
-  void ALWAYS_INLINE RestoreState() {
-    if (UNLIKELY(enabled_)) {
-      ImplicitlyAdvancePC();
-      this->PushUint8(DW_CFA_restore_state);
-    }
-  }
-
-  void ALWAYS_INLINE DefCFA(Reg reg, int offset) {
-    if (UNLIKELY(enabled_)) {
-      ImplicitlyAdvancePC();
-      if (offset >= 0) {
-        this->PushUint8(DW_CFA_def_cfa);
-        this->PushUleb128(reg.num());
-        this->PushUleb128(offset);  // Non-factored.
-      } else {
-        uses_dwarf3_features_ = true;
-        this->PushUint8(DW_CFA_def_cfa_sf);
-        this->PushUleb128(reg.num());
-        this->PushSleb128(FactorDataOffset(offset));
-      }
-    }
-    current_cfa_offset_ = offset;
-  }
-
-  void ALWAYS_INLINE DefCFARegister(Reg reg) {
-    if (UNLIKELY(enabled_)) {
-      ImplicitlyAdvancePC();
-      this->PushUint8(DW_CFA_def_cfa_register);
-      this->PushUleb128(reg.num());
-    }
-  }
-
-  void ALWAYS_INLINE DefCFAOffset(int offset) {
-    if (UNLIKELY(enabled_)) {
-      if (current_cfa_offset_ != offset) {
-        ImplicitlyAdvancePC();
-        if (offset >= 0) {
-          this->PushUint8(DW_CFA_def_cfa_offset);
-          this->PushUleb128(offset);  // Non-factored.
-        } else {
-          uses_dwarf3_features_ = true;
-          this->PushUint8(DW_CFA_def_cfa_offset_sf);
-          this->PushSleb128(FactorDataOffset(offset));
-        }
-      }
-    }
-    // Uncoditional so that the user can still get and check the value.
-    current_cfa_offset_ = offset;
-  }
-
-  void ALWAYS_INLINE ValOffset(Reg reg, int offset) {
-    if (UNLIKELY(enabled_)) {
-      ImplicitlyAdvancePC();
-      uses_dwarf3_features_ = true;
-      int factored_offset = FactorDataOffset(offset);  // May change sign.
-      if (factored_offset >= 0) {
-        this->PushUint8(DW_CFA_val_offset);
-        this->PushUleb128(reg.num());
-        this->PushUleb128(factored_offset);
-      } else {
-        this->PushUint8(DW_CFA_val_offset_sf);
-        this->PushUleb128(reg.num());
-        this->PushSleb128(factored_offset);
-      }
-    }
-  }
-
-  void ALWAYS_INLINE DefCFAExpression(void * expr, int expr_size) {
-    if (UNLIKELY(enabled_)) {
-      ImplicitlyAdvancePC();
-      uses_dwarf3_features_ = true;
-      this->PushUint8(DW_CFA_def_cfa_expression);
-      this->PushUleb128(expr_size);
-      this->PushData(expr, expr_size);
-    }
-  }
-
-  void ALWAYS_INLINE Expression(Reg reg, void * expr, int expr_size) {
-    if (UNLIKELY(enabled_)) {
-      ImplicitlyAdvancePC();
-      uses_dwarf3_features_ = true;
-      this->PushUint8(DW_CFA_expression);
-      this->PushUleb128(reg.num());
-      this->PushUleb128(expr_size);
-      this->PushData(expr, expr_size);
-    }
-  }
-
-  void ALWAYS_INLINE ValExpression(Reg reg, void * expr, int expr_size) {
-    if (UNLIKELY(enabled_)) {
-      ImplicitlyAdvancePC();
-      uses_dwarf3_features_ = true;
-      this->PushUint8(DW_CFA_val_expression);
-      this->PushUleb128(reg.num());
-      this->PushUleb128(expr_size);
-      this->PushData(expr, expr_size);
-    }
-  }
-
-  bool IsEnabled() const { return enabled_; }
-
-  void SetEnabled(bool value) { enabled_ = value; }
-
-  int GetCurrentPC() const { return current_pc_; }
-
-  int GetCurrentCFAOffset() const { return current_cfa_offset_; }
-
-  void SetCurrentCFAOffset(int offset) { current_cfa_offset_ = offset; }
-
-  using Writer<Vector>::data;
-
-  DebugFrameOpCodeWriter(bool enabled = true,
-                         const typename Vector::allocator_type& alloc =
-                             typename Vector::allocator_type())
-      : Writer<Vector>(&opcodes_),
-        enabled_(enabled),
-        opcodes_(alloc),
-        current_cfa_offset_(0),
-        current_pc_(0),
-        uses_dwarf3_features_(false) {
-    if (enabled) {
-      // Best guess based on couple of observed outputs.
-      opcodes_.reserve(16);
-    }
-  }
-
-  virtual ~DebugFrameOpCodeWriter() { }
-
- protected:
-  int FactorDataOffset(int offset) const {
-    DCHECK_EQ(offset % kDataAlignmentFactor, 0);
-    return offset / kDataAlignmentFactor;
-  }
-
-  int FactorCodeOffset(int offset) const {
-    DCHECK_EQ(offset % kCodeAlignmentFactor, 0);
-    return offset / kCodeAlignmentFactor;
-  }
-
-  bool enabled_;  // If disabled all writes are no-ops.
-  Vector opcodes_;
-  int current_cfa_offset_;
-  int current_pc_;
-  bool uses_dwarf3_features_;
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(DebugFrameOpCodeWriter);
-};
-
-}  // namespace dwarf
-}  // namespace art
-
-#endif  // ART_COMPILER_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_
diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h
deleted file mode 100644
index d9b367b..0000000
--- a/compiler/dwarf/debug_info_entry_writer.h
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_
-#define ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_
-
-#include <cstdint>
-#include <unordered_map>
-
-#include "dwarf/dwarf_constants.h"
-#include "dwarf/writer.h"
-#include "leb128.h"
-
-namespace art {
-namespace dwarf {
-
-// 32-bit FNV-1a hash function which we use to find duplicate abbreviations.
-// See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
-template <typename Vector>
-struct FNVHash {
-  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
-
-  size_t operator()(const Vector& v) const {
-    uint32_t hash = 2166136261u;
-    for (size_t i = 0; i < v.size(); i++) {
-      hash = (hash ^ v[i]) * 16777619u;
-    }
-    return hash;
-  }
-};
-
-/*
- * Writer for debug information entries (DIE).
- * It also handles generation of abbreviations.
- *
- * Usage:
- *   StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes);
- *     WriteStrp(DW_AT_producer, "Compiler name", debug_str);
- *     StartTag(DW_TAG_subprogram, DW_CHILDREN_no);
- *       WriteStrp(DW_AT_name, "Foo", debug_str);
- *     EndTag();
- *   EndTag();
- */
-template <typename Vector = std::vector<uint8_t>>
-class DebugInfoEntryWriter FINAL : private Writer<Vector> {
-  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
-
- public:
-  // Start debugging information entry.
-  void StartTag(Tag tag, Children children) {
-    DCHECK(has_children) << "This tag can not have nested tags";
-    if (inside_entry_) {
-      // Write abbrev code for the previous entry.
-      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev());
-      inside_entry_ = false;
-    }
-    StartAbbrev(tag, children);
-    // Abbrev code placeholder of sufficient size.
-    abbrev_code_offset_ = this->data()->size();
-    this->PushUleb128(NextAbbrevCode());
-    depth_++;
-    inside_entry_ = true;
-    has_children = (children == DW_CHILDREN_yes);
-  }
-
-  // End debugging information entry.
-  void EndTag() {
-    DCHECK_GT(depth_, 0);
-    if (inside_entry_) {
-      // Write abbrev code for this tag.
-      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev());
-      inside_entry_ = false;
-    }
-    if (has_children) {
-      this->PushUint8(0);  // End of children.
-    }
-    depth_--;
-    has_children = true;  // Parent tag obviously has children.
-  }
-
-  void WriteAddr(Attribute attrib, uint64_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_addr);
-    patch_locations_.push_back(this->data()->size());
-    if (is64bit_) {
-      this->PushUint64(value);
-    } else {
-      this->PushUint32(value);
-    }
-  }
-
-  void WriteBlock(Attribute attrib, const void* ptr, int size) {
-    AddAbbrevAttribute(attrib, DW_FORM_block);
-    this->PushUleb128(size);
-    this->PushData(ptr, size);
-  }
-
-  void WriteData1(Attribute attrib, uint8_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_data1);
-    this->PushUint8(value);
-  }
-
-  void WriteData2(Attribute attrib, uint16_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_data2);
-    this->PushUint16(value);
-  }
-
-  void WriteData4(Attribute attrib, uint32_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_data4);
-    this->PushUint32(value);
-  }
-
-  void WriteData8(Attribute attrib, uint64_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_data8);
-    this->PushUint64(value);
-  }
-
-  void WriteSdata(Attribute attrib, int value) {
-    AddAbbrevAttribute(attrib, DW_FORM_sdata);
-    this->PushSleb128(value);
-  }
-
-  void WriteUdata(Attribute attrib, int value) {
-    AddAbbrevAttribute(attrib, DW_FORM_udata);
-    this->PushUleb128(value);
-  }
-
-  void WriteUdata(Attribute attrib, uint32_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_udata);
-    this->PushUleb128(value);
-  }
-
-  void WriteFlag(Attribute attrib, bool value) {
-    AddAbbrevAttribute(attrib, DW_FORM_flag);
-    this->PushUint8(value ? 1 : 0);
-  }
-
-  void WriteRef4(Attribute attrib, int cu_offset) {
-    AddAbbrevAttribute(attrib, DW_FORM_ref4);
-    this->PushUint32(cu_offset);
-  }
-
-  void WriteRef(Attribute attrib, int cu_offset) {
-    AddAbbrevAttribute(attrib, DW_FORM_ref_udata);
-    this->PushUleb128(cu_offset);
-  }
-
-  void WriteString(Attribute attrib, const char* value) {
-    AddAbbrevAttribute(attrib, DW_FORM_string);
-    this->PushString(value);
-  }
-
-  void WriteStrp(Attribute attrib, int address) {
-    AddAbbrevAttribute(attrib, DW_FORM_strp);
-    this->PushUint32(address);
-  }
-
-  void WriteStrp(Attribute attrib, const char* value, std::vector<uint8_t>* debug_str) {
-    AddAbbrevAttribute(attrib, DW_FORM_strp);
-    int address = debug_str->size();
-    debug_str->insert(debug_str->end(), value, value + strlen(value) + 1);
-    this->PushUint32(address);
-  }
-
-  bool Is64bit() const { return is64bit_; }
-
-  const std::vector<uintptr_t>& GetPatchLocations() const {
-    return patch_locations_;
-  }
-
-  using Writer<Vector>::data;
-
-  DebugInfoEntryWriter(bool is64bitArch,
-                       Vector* debug_abbrev,
-                       const typename Vector::allocator_type& alloc =
-                           typename Vector::allocator_type())
-      : Writer<Vector>(&entries_),
-        debug_abbrev_(debug_abbrev),
-        current_abbrev_(alloc),
-        abbrev_codes_(alloc),
-        entries_(alloc),
-        is64bit_(is64bitArch) {
-    debug_abbrev_.PushUint8(0);  // Add abbrev table terminator.
-  }
-
-  ~DebugInfoEntryWriter() {
-    DCHECK_EQ(depth_, 0);
-  }
-
- private:
-  // Start abbreviation declaration.
-  void StartAbbrev(Tag tag, Children children) {
-    DCHECK(!inside_entry_);
-    current_abbrev_.clear();
-    EncodeUnsignedLeb128(&current_abbrev_, tag);
-    current_abbrev_.push_back(children);
-  }
-
-  // Add attribute specification.
-  void AddAbbrevAttribute(Attribute name, Form type) {
-    DCHECK(inside_entry_) << "Call StartTag before adding attributes.";
-    EncodeUnsignedLeb128(&current_abbrev_, name);
-    EncodeUnsignedLeb128(&current_abbrev_, type);
-  }
-
-  int NextAbbrevCode() {
-    return 1 + abbrev_codes_.size();
-  }
-
-  // End abbreviation declaration and return its code.
-  int EndAbbrev() {
-    DCHECK(inside_entry_);
-    auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_),
-                                                  NextAbbrevCode()));
-    int abbrev_code = it.first->second;
-    if (UNLIKELY(it.second)) {  // Inserted new entry.
-      const Vector& abbrev = it.first->first;
-      debug_abbrev_.Pop();  // Remove abbrev table terminator.
-      debug_abbrev_.PushUleb128(abbrev_code);
-      debug_abbrev_.PushData(abbrev.data(), abbrev.size());
-      debug_abbrev_.PushUint8(0);  // Attribute list end.
-      debug_abbrev_.PushUint8(0);  // Attribute list end.
-      debug_abbrev_.PushUint8(0);  // Add abbrev table terminator.
-    }
-    return abbrev_code;
-  }
-
- private:
-  // Fields for writing and deduplication of abbrevs.
-  Writer<Vector> debug_abbrev_;
-  Vector current_abbrev_;
-  std::unordered_map<Vector, int,
-                     FNVHash<Vector> > abbrev_codes_;
-
-  // Fields for writing of debugging information entries.
-  Vector entries_;
-  bool is64bit_;
-  int depth_ = 0;
-  size_t abbrev_code_offset_ = 0;  // Location to patch once we know the code.
-  bool inside_entry_ = false;  // Entry ends at first child (if any).
-  bool has_children = true;
-  std::vector<uintptr_t> patch_locations_;
-};
-
-}  // namespace dwarf
-}  // namespace art
-
-#endif  // ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_
diff --git a/compiler/dwarf/debug_line_opcode_writer.h b/compiler/dwarf/debug_line_opcode_writer.h
deleted file mode 100644
index 201f0b4..0000000
--- a/compiler/dwarf/debug_line_opcode_writer.h
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DWARF_DEBUG_LINE_OPCODE_WRITER_H_
-#define ART_COMPILER_DWARF_DEBUG_LINE_OPCODE_WRITER_H_
-
-#include <cstdint>
-
-#include "dwarf/dwarf_constants.h"
-#include "dwarf/writer.h"
-
-namespace art {
-namespace dwarf {
-
-// Writer for the .debug_line opcodes (DWARF-3).
-// The writer is very light-weight, however it will do the following for you:
-//  * Choose the most compact encoding of a given opcode.
-//  * Keep track of current state and convert absolute values to deltas.
-//  * Divide by header-defined factors as appropriate.
-template<typename Vector = std::vector<uint8_t>>
-class DebugLineOpCodeWriter FINAL : private Writer<Vector> {
-  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
-
- public:
-  static constexpr int kOpcodeBase = 13;
-  static constexpr bool kDefaultIsStmt = true;
-  static constexpr int kLineBase = -5;
-  static constexpr int kLineRange = 14;
-
-  void AddRow() {
-    this->PushUint8(DW_LNS_copy);
-  }
-
-  void AdvancePC(uint64_t absolute_address) {
-    DCHECK_NE(current_address_, 0u);  // Use SetAddress for the first advance.
-    DCHECK_GE(absolute_address, current_address_);
-    if (absolute_address != current_address_) {
-      uint64_t delta = FactorCodeOffset(absolute_address - current_address_);
-      if (delta <= INT32_MAX) {
-        this->PushUint8(DW_LNS_advance_pc);
-        this->PushUleb128(static_cast<int>(delta));
-        current_address_ = absolute_address;
-      } else {
-        SetAddress(absolute_address);
-      }
-    }
-  }
-
-  void AdvanceLine(int absolute_line) {
-    int delta = absolute_line - current_line_;
-    if (delta != 0) {
-      this->PushUint8(DW_LNS_advance_line);
-      this->PushSleb128(delta);
-      current_line_ = absolute_line;
-    }
-  }
-
-  void SetFile(int file) {
-    if (current_file_ != file) {
-      this->PushUint8(DW_LNS_set_file);
-      this->PushUleb128(file);
-      current_file_ = file;
-    }
-  }
-
-  void SetColumn(int column) {
-    this->PushUint8(DW_LNS_set_column);
-    this->PushUleb128(column);
-  }
-
-  void NegateStmt() {
-    this->PushUint8(DW_LNS_negate_stmt);
-  }
-
-  void SetBasicBlock() {
-    this->PushUint8(DW_LNS_set_basic_block);
-  }
-
-  void SetPrologueEnd() {
-    uses_dwarf3_features_ = true;
-    this->PushUint8(DW_LNS_set_prologue_end);
-  }
-
-  void SetEpilogueBegin() {
-    uses_dwarf3_features_ = true;
-    this->PushUint8(DW_LNS_set_epilogue_begin);
-  }
-
-  void SetISA(int isa) {
-    uses_dwarf3_features_ = true;
-    this->PushUint8(DW_LNS_set_isa);
-    this->PushUleb128(isa);
-  }
-
-  void EndSequence() {
-    this->PushUint8(0);
-    this->PushUleb128(1);
-    this->PushUint8(DW_LNE_end_sequence);
-    current_address_ = 0;
-    current_file_ = 1;
-    current_line_ = 1;
-  }
-
-  // Uncoditionally set address using the long encoding.
-  // This gives the linker opportunity to relocate the address.
-  void SetAddress(uint64_t absolute_address) {
-    DCHECK_GE(absolute_address, current_address_);
-    FactorCodeOffset(absolute_address);  // Check if it is factorable.
-    this->PushUint8(0);
-    if (use_64bit_address_) {
-      this->PushUleb128(1 + 8);
-      this->PushUint8(DW_LNE_set_address);
-      patch_locations_.push_back(this->data()->size());
-      this->PushUint64(absolute_address);
-    } else {
-      this->PushUleb128(1 + 4);
-      this->PushUint8(DW_LNE_set_address);
-      patch_locations_.push_back(this->data()->size());
-      this->PushUint32(absolute_address);
-    }
-    current_address_ = absolute_address;
-  }
-
-  void DefineFile(const char* filename,
-                  int directory_index,
-                  int modification_time,
-                  int file_size) {
-    int size = 1 +
-               strlen(filename) + 1 +
-               UnsignedLeb128Size(directory_index) +
-               UnsignedLeb128Size(modification_time) +
-               UnsignedLeb128Size(file_size);
-    this->PushUint8(0);
-    this->PushUleb128(size);
-    size_t start = data()->size();
-    this->PushUint8(DW_LNE_define_file);
-    this->PushString(filename);
-    this->PushUleb128(directory_index);
-    this->PushUleb128(modification_time);
-    this->PushUleb128(file_size);
-    DCHECK_EQ(start + size, data()->size());
-  }
-
-  // Compact address and line opcode.
-  void AddRow(uint64_t absolute_address, int absolute_line) {
-    DCHECK_GE(absolute_address, current_address_);
-
-    // If the address is definitely too far, use the long encoding.
-    uint64_t delta_address = FactorCodeOffset(absolute_address - current_address_);
-    if (delta_address > UINT8_MAX) {
-      AdvancePC(absolute_address);
-      delta_address = 0;
-    }
-
-    // If the line is definitely too far, use the long encoding.
-    int delta_line = absolute_line - current_line_;
-    if (!(kLineBase <= delta_line && delta_line < kLineBase + kLineRange)) {
-      AdvanceLine(absolute_line);
-      delta_line = 0;
-    }
-
-    // Both address and line should be reasonable now.  Use the short encoding.
-    int opcode = kOpcodeBase + (delta_line - kLineBase) +
-                 (static_cast<int>(delta_address) * kLineRange);
-    if (opcode > UINT8_MAX) {
-      // If the address is still too far, try to increment it by const amount.
-      int const_advance = (0xFF - kOpcodeBase) / kLineRange;
-      opcode -= (kLineRange * const_advance);
-      if (opcode <= UINT8_MAX) {
-        this->PushUint8(DW_LNS_const_add_pc);
-      } else {
-        // Give up and use long encoding for address.
-        AdvancePC(absolute_address);
-        // Still use the opcode to do line advance and copy.
-        opcode = kOpcodeBase + (delta_line - kLineBase);
-      }
-    }
-    DCHECK(kOpcodeBase <= opcode && opcode <= 0xFF);
-    this->PushUint8(opcode);  // Special opcode.
-    current_line_ = absolute_line;
-    current_address_ = absolute_address;
-  }
-
-  int GetCodeFactorBits() const {
-    return code_factor_bits_;
-  }
-
-  uint64_t CurrentAddress() const {
-    return current_address_;
-  }
-
-  int CurrentFile() const {
-    return current_file_;
-  }
-
-  int CurrentLine() const {
-    return current_line_;
-  }
-
-  const std::vector<uintptr_t>& GetPatchLocations() const {
-    return patch_locations_;
-  }
-
-  using Writer<Vector>::data;
-
-  DebugLineOpCodeWriter(bool use64bitAddress,
-                        int codeFactorBits,
-                        const typename Vector::allocator_type& alloc =
-                            typename Vector::allocator_type())
-      : Writer<Vector>(&opcodes_),
-        opcodes_(alloc),
-        uses_dwarf3_features_(false),
-        use_64bit_address_(use64bitAddress),
-        code_factor_bits_(codeFactorBits),
-        current_address_(0),
-        current_file_(1),
-        current_line_(1) {
-  }
-
- private:
-  uint64_t FactorCodeOffset(uint64_t offset) const {
-    DCHECK_GE(code_factor_bits_, 0);
-    DCHECK_EQ((offset >> code_factor_bits_) << code_factor_bits_, offset);
-    return offset >> code_factor_bits_;
-  }
-
-  Vector opcodes_;
-  bool uses_dwarf3_features_;
-  bool use_64bit_address_;
-  int code_factor_bits_;
-  uint64_t current_address_;
-  int current_file_;
-  int current_line_;
-  std::vector<uintptr_t> patch_locations_;
-
-  DISALLOW_COPY_AND_ASSIGN(DebugLineOpCodeWriter);
-};
-
-}  // namespace dwarf
-}  // namespace art
-
-#endif  // ART_COMPILER_DWARF_DEBUG_LINE_OPCODE_WRITER_H_
diff --git a/compiler/dwarf/dwarf_constants.h b/compiler/dwarf/dwarf_constants.h
deleted file mode 100644
index 3b570e5..0000000
--- a/compiler/dwarf/dwarf_constants.h
+++ /dev/null
@@ -1,694 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DWARF_DWARF_CONSTANTS_H_
-#define ART_COMPILER_DWARF_DWARF_CONSTANTS_H_
-
-namespace art {
-namespace dwarf {
-
-// Based on the Dwarf 4 specification at dwarfstd.com and issues marked
-// for inclusion in Dwarf 5 on same. Values not specified in the Dwarf 4
-// standard might change or be removed in the future and may be different
-// than the values used currently by other implementations for the same trait,
-// use at your own risk.
-
-enum Tag {
-  DW_TAG_array_type = 0x01,
-  DW_TAG_class_type = 0x02,
-  DW_TAG_entry_point = 0x03,
-  DW_TAG_enumeration_type = 0x04,
-  DW_TAG_formal_parameter = 0x05,
-  DW_TAG_imported_declaration = 0x08,
-  DW_TAG_label = 0x0a,
-  DW_TAG_lexical_block = 0x0b,
-  DW_TAG_member = 0x0d,
-  DW_TAG_pointer_type = 0x0f,
-  DW_TAG_reference_type = 0x10,
-  DW_TAG_compile_unit = 0x11,
-  DW_TAG_string_type = 0x12,
-  DW_TAG_structure_type = 0x13,
-  DW_TAG_subroutine_type = 0x15,
-  DW_TAG_typedef = 0x16,
-  DW_TAG_union_type = 0x17,
-  DW_TAG_unspecified_parameters = 0x18,
-  DW_TAG_variant = 0x19,
-  DW_TAG_common_block = 0x1a,
-  DW_TAG_common_inclusion = 0x1b,
-  DW_TAG_inheritance = 0x1c,
-  DW_TAG_inlined_subroutine = 0x1d,
-  DW_TAG_module = 0x1e,
-  DW_TAG_ptr_to_member_type = 0x1f,
-  DW_TAG_set_type = 0x20,
-  DW_TAG_subrange_type = 0x21,
-  DW_TAG_with_stmt = 0x22,
-  DW_TAG_access_declaration = 0x23,
-  DW_TAG_base_type = 0x24,
-  DW_TAG_catch_block = 0x25,
-  DW_TAG_const_type = 0x26,
-  DW_TAG_constant = 0x27,
-  DW_TAG_enumerator = 0x28,
-  DW_TAG_file_type = 0x29,
-  DW_TAG_friend = 0x2a,
-  DW_TAG_namelist = 0x2b,
-  DW_TAG_namelist_item = 0x2c,
-  DW_TAG_packed_type = 0x2d,
-  DW_TAG_subprogram = 0x2e,
-  DW_TAG_template_type_parameter = 0x2f,
-  DW_TAG_template_value_parameter = 0x30,
-  DW_TAG_thrown_type = 0x31,
-  DW_TAG_try_block = 0x32,
-  DW_TAG_variant_part = 0x33,
-  DW_TAG_variable = 0x34,
-  DW_TAG_volatile_type = 0x35,
-  DW_TAG_dwarf_procedure = 0x36,
-  DW_TAG_restrict_type = 0x37,
-  DW_TAG_interface_type = 0x38,
-  DW_TAG_namespace = 0x39,
-  DW_TAG_imported_module = 0x3a,
-  DW_TAG_unspecified_type = 0x3b,
-  DW_TAG_partial_unit = 0x3c,
-  DW_TAG_imported_unit = 0x3d,
-  DW_TAG_condition = 0x3f,
-  DW_TAG_shared_type = 0x40,
-  DW_TAG_type_unit = 0x41,
-  DW_TAG_rvalue_reference_type = 0x42,
-  DW_TAG_template_alias = 0x43,
-#ifdef INCLUDE_DWARF5_VALUES
-  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
-  // may be different than other implementations. Use with caution.
-  // TODO Update these values when Dwarf 5 is released.
-  DW_TAG_coarray_type = 0x44,
-  DW_TAG_call_site = 0x45,
-  DW_TAG_call_site_parameter = 0x46,
-  DW_TAG_generic_subrange = 0x47,
-  DW_TAG_atomic_type = 0x48,
-  DW_TAG_dynamic_type = 0x49,
-  DW_TAG_aligned_type = 0x50,
-#endif
-  DW_TAG_lo_user = 0x4080,
-  DW_TAG_hi_user = 0xffff
-};
-
-enum Children : uint8_t {
-  DW_CHILDREN_no = 0x00,
-  DW_CHILDREN_yes = 0x01
-};
-
-enum Attribute {
-  DW_AT_sibling = 0x01,
-  DW_AT_location = 0x02,
-  DW_AT_name = 0x03,
-  DW_AT_ordering = 0x09,
-  DW_AT_byte_size = 0x0b,
-  DW_AT_bit_offset = 0x0c,
-  DW_AT_bit_size = 0x0d,
-  DW_AT_stmt_list = 0x10,
-  DW_AT_low_pc = 0x11,
-  DW_AT_high_pc = 0x12,
-  DW_AT_language = 0x13,
-  DW_AT_discr = 0x15,
-  DW_AT_discr_value = 0x16,
-  DW_AT_visibility = 0x17,
-  DW_AT_import = 0x18,
-  DW_AT_string_length = 0x19,
-  DW_AT_common_reference = 0x1a,
-  DW_AT_comp_dir = 0x1b,
-  DW_AT_const_value = 0x1c,
-  DW_AT_containing_type = 0x1d,
-  DW_AT_default_value = 0x1e,
-  DW_AT_inline = 0x20,
-  DW_AT_is_optional = 0x21,
-  DW_AT_lower_bound = 0x22,
-  DW_AT_producer = 0x25,
-  DW_AT_prototyped = 0x27,
-  DW_AT_return_addr = 0x2a,
-  DW_AT_start_scope = 0x2c,
-  DW_AT_bit_stride = 0x2e,
-  DW_AT_upper_bound = 0x2f,
-  DW_AT_abstract_origin = 0x31,
-  DW_AT_accessibility = 0x32,
-  DW_AT_address_class = 0x33,
-  DW_AT_artificial = 0x34,
-  DW_AT_base_types = 0x35,
-  DW_AT_calling_convention = 0x36,
-  DW_AT_count = 0x37,
-  DW_AT_data_member_location = 0x38,
-  DW_AT_decl_column = 0x39,
-  DW_AT_decl_file = 0x3a,
-  DW_AT_decl_line = 0x3b,
-  DW_AT_declaration = 0x3c,
-  DW_AT_discr_list = 0x3d,
-  DW_AT_encoding = 0x3e,
-  DW_AT_external = 0x3f,
-  DW_AT_frame_base = 0x40,
-  DW_AT_friend = 0x41,
-  DW_AT_identifier_case = 0x42,
-  DW_AT_macro_info = 0x43,
-  DW_AT_namelist_item = 0x44,
-  DW_AT_priority = 0x45,
-  DW_AT_segment = 0x46,
-  DW_AT_specification = 0x47,
-  DW_AT_static_link = 0x48,
-  DW_AT_type = 0x49,
-  DW_AT_use_location = 0x4a,
-  DW_AT_variable_parameter = 0x4b,
-  DW_AT_virtuality = 0x4c,
-  DW_AT_vtable_elem_location = 0x4d,
-  DW_AT_allocated = 0x4e,
-  DW_AT_associated = 0x4f,
-  DW_AT_data_location = 0x50,
-  DW_AT_byte_stride = 0x51,
-  DW_AT_entry_pc = 0x52,
-  DW_AT_use_UTF8 = 0x53,
-  DW_AT_extension = 0x54,
-  DW_AT_ranges = 0x55,
-  DW_AT_trampoline = 0x56,
-  DW_AT_call_column = 0x57,
-  DW_AT_call_file = 0x58,
-  DW_AT_call_line = 0x59,
-  DW_AT_description = 0x5a,
-  DW_AT_binary_scale = 0x5b,
-  DW_AT_decimal_scale = 0x5c,
-  DW_AT_small = 0x5d,
-  DW_AT_decimal_sign = 0x5e,
-  DW_AT_digit_count = 0x5f,
-  DW_AT_picture_string = 0x60,
-  DW_AT_mutable = 0x61,
-  DW_AT_threads_scaled = 0x62,
-  DW_AT_explicit = 0x63,
-  DW_AT_object_pointer = 0x64,
-  DW_AT_endianity = 0x65,
-  DW_AT_elemental = 0x66,
-  DW_AT_pure = 0x67,
-  DW_AT_recursive = 0x68,
-  DW_AT_signature = 0x69,
-  DW_AT_main_subprogram = 0x6a,
-  DW_AT_data_bit_offset = 0x6b,
-  DW_AT_const_expr = 0x6c,
-  DW_AT_enum_class = 0x6d,
-#ifdef INCLUDE_DWARF5_VALUES
-  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
-  // may be different than other implementations. Use with caution.
-  // TODO Update these values when Dwarf 5 is released.
-  DW_AT_linkage_name = 0x6e,
-  DW_AT_call_site_value = 0x6f,
-  DW_AT_call_site_data_value = 0x70,
-  DW_AT_call_site_target = 0x71,
-  DW_AT_call_site_target_clobbered = 0x72,
-  DW_AT_tail_call = 0x73,
-  DW_AT_all_tail_call_sites = 0x74,
-  DW_AT_all_call_sites = 0x75,
-  DW_AT_all_source_call_sites = 0x76,
-  DW_AT_call_site_parameter = 0x77,
-  DW_AT_tail_call = 0x78,
-  DW_AT_all_tail_call_sites = 0x79,
-  DW_AT_all_call_sites = 0x7a,
-  DW_AT_all_source_call_sites = 0x7b,
-  DW_AT_rank = 0x7c,
-  DW_AT_string_bitsize = 0x7d,
-  DW_AT_string_byte_size = 0x7e,
-  DW_AT_reference = 0x7f,
-  DW_AT_rvalue_reference = 0x80,
-  DW_AT_noreturn = 0x81,
-  DW_AT_alignment = 0x82,
-#endif
-  DW_AT_lo_user = 0x2000,
-  DW_AT_hi_user = 0xffff
-};
-
-enum Form : uint8_t {
-  DW_FORM_addr = 0x01,
-  DW_FORM_block2 = 0x03,
-  DW_FORM_block4 = 0x04,
-  DW_FORM_data2 = 0x05,
-  DW_FORM_data4 = 0x06,
-  DW_FORM_data8 = 0x07,
-  DW_FORM_string = 0x08,
-  DW_FORM_block = 0x09,
-  DW_FORM_block1 = 0x0a,
-  DW_FORM_data1 = 0x0b,
-  DW_FORM_flag = 0x0c,
-  DW_FORM_sdata = 0x0d,
-  DW_FORM_strp = 0x0e,
-  DW_FORM_udata = 0x0f,
-  DW_FORM_ref_addr = 0x10,
-  DW_FORM_ref1 = 0x11,
-  DW_FORM_ref2 = 0x12,
-  DW_FORM_ref4 = 0x13,
-  DW_FORM_ref8 = 0x14,
-  DW_FORM_ref_udata = 0x15,
-  DW_FORM_indirect = 0x16,
-  DW_FORM_sec_offset = 0x17,
-  DW_FORM_exprloc = 0x18,
-  DW_FORM_flag_present = 0x19,
-  DW_FORM_ref_sig8 = 0x20
-};
-
-enum Operation : uint16_t {
-  DW_OP_addr = 0x03,
-  DW_OP_deref = 0x06,
-  DW_OP_const1u = 0x08,
-  DW_OP_const1s = 0x09,
-  DW_OP_const2u = 0x0a,
-  DW_OP_const2s = 0x0b,
-  DW_OP_const4u = 0x0c,
-  DW_OP_const4s = 0x0d,
-  DW_OP_const8u = 0x0e,
-  DW_OP_const8s = 0x0f,
-  DW_OP_constu = 0x10,
-  DW_OP_consts = 0x11,
-  DW_OP_dup = 0x12,
-  DW_OP_drop = 0x13,
-  DW_OP_over = 0x14,
-  DW_OP_pick = 0x15,
-  DW_OP_swap = 0x16,
-  DW_OP_rot = 0x17,
-  DW_OP_xderef = 0x18,
-  DW_OP_abs = 0x19,
-  DW_OP_and = 0x1a,
-  DW_OP_div = 0x1b,
-  DW_OP_minus = 0x1c,
-  DW_OP_mod = 0x1d,
-  DW_OP_mul = 0x1e,
-  DW_OP_neg = 0x1f,
-  DW_OP_not = 0x20,
-  DW_OP_or = 0x21,
-  DW_OP_plus = 0x22,
-  DW_OP_plus_uconst = 0x23,
-  DW_OP_shl = 0x24,
-  DW_OP_shr = 0x25,
-  DW_OP_shra = 0x26,
-  DW_OP_xor = 0x27,
-  DW_OP_skip = 0x2f,
-  DW_OP_bra = 0x28,
-  DW_OP_eq = 0x29,
-  DW_OP_ge = 0x2a,
-  DW_OP_gt = 0x2b,
-  DW_OP_le = 0x2c,
-  DW_OP_lt = 0x2d,
-  DW_OP_ne = 0x2e,
-  DW_OP_lit0 = 0x30,
-  DW_OP_lit1 = 0x31,
-  DW_OP_lit2 = 0x32,
-  DW_OP_lit3 = 0x33,
-  DW_OP_lit4 = 0x34,
-  DW_OP_lit5 = 0x35,
-  DW_OP_lit6 = 0x36,
-  DW_OP_lit7 = 0x37,
-  DW_OP_lit8 = 0x38,
-  DW_OP_lit9 = 0x39,
-  DW_OP_lit10 = 0x3a,
-  DW_OP_lit11 = 0x3b,
-  DW_OP_lit12 = 0x3c,
-  DW_OP_lit13 = 0x3d,
-  DW_OP_lit14 = 0x3e,
-  DW_OP_lit15 = 0x3f,
-  DW_OP_lit16 = 0x40,
-  DW_OP_lit17 = 0x41,
-  DW_OP_lit18 = 0x42,
-  DW_OP_lit19 = 0x43,
-  DW_OP_lit20 = 0x44,
-  DW_OP_lit21 = 0x45,
-  DW_OP_lit22 = 0x46,
-  DW_OP_lit23 = 0x47,
-  DW_OP_lit24 = 0x48,
-  DW_OP_lit25 = 0x49,
-  DW_OP_lit26 = 0x4a,
-  DW_OP_lit27 = 0x4b,
-  DW_OP_lit28 = 0x4c,
-  DW_OP_lit29 = 0x4d,
-  DW_OP_lit30 = 0x4e,
-  DW_OP_lit31 = 0x4f,
-  DW_OP_reg0 = 0x50,
-  DW_OP_reg1 = 0x51,
-  DW_OP_reg2 = 0x52,
-  DW_OP_reg3 = 0x53,
-  DW_OP_reg4 = 0x54,
-  DW_OP_reg5 = 0x55,
-  DW_OP_reg6 = 0x56,
-  DW_OP_reg7 = 0x57,
-  DW_OP_reg8 = 0x58,
-  DW_OP_reg9 = 0x59,
-  DW_OP_reg10 = 0x5a,
-  DW_OP_reg11 = 0x5b,
-  DW_OP_reg12 = 0x5c,
-  DW_OP_reg13 = 0x5d,
-  DW_OP_reg14 = 0x5e,
-  DW_OP_reg15 = 0x5f,
-  DW_OP_reg16 = 0x60,
-  DW_OP_reg17 = 0x61,
-  DW_OP_reg18 = 0x62,
-  DW_OP_reg19 = 0x63,
-  DW_OP_reg20 = 0x64,
-  DW_OP_reg21 = 0x65,
-  DW_OP_reg22 = 0x66,
-  DW_OP_reg23 = 0x67,
-  DW_OP_reg24 = 0x68,
-  DW_OP_reg25 = 0x69,
-  DW_OP_reg26 = 0x6a,
-  DW_OP_reg27 = 0x6b,
-  DW_OP_reg28 = 0x6c,
-  DW_OP_reg29 = 0x6d,
-  DW_OP_reg30 = 0x6e,
-  DW_OP_reg31 = 0x6f,
-  DW_OP_breg0 = 0x70,
-  DW_OP_breg1 = 0x71,
-  DW_OP_breg2 = 0x72,
-  DW_OP_breg3 = 0x73,
-  DW_OP_breg4 = 0x74,
-  DW_OP_breg5 = 0x75,
-  DW_OP_breg6 = 0x76,
-  DW_OP_breg7 = 0x77,
-  DW_OP_breg8 = 0x78,
-  DW_OP_breg9 = 0x79,
-  DW_OP_breg10 = 0x7a,
-  DW_OP_breg11 = 0x7b,
-  DW_OP_breg12 = 0x7c,
-  DW_OP_breg13 = 0x7d,
-  DW_OP_breg14 = 0x7e,
-  DW_OP_breg15 = 0x7f,
-  DW_OP_breg16 = 0x80,
-  DW_OP_breg17 = 0x81,
-  DW_OP_breg18 = 0x82,
-  DW_OP_breg19 = 0x83,
-  DW_OP_breg20 = 0x84,
-  DW_OP_breg21 = 0x85,
-  DW_OP_breg22 = 0x86,
-  DW_OP_breg23 = 0x87,
-  DW_OP_breg24 = 0x88,
-  DW_OP_breg25 = 0x89,
-  DW_OP_breg26 = 0x8a,
-  DW_OP_breg27 = 0x8b,
-  DW_OP_breg28 = 0x8c,
-  DW_OP_breg29 = 0x8d,
-  DW_OP_breg30 = 0x8e,
-  DW_OP_breg31 = 0x8f,
-  DW_OP_regx = 0x90,
-  DW_OP_fbreg = 0x91,
-  DW_OP_bregx = 0x92,
-  DW_OP_piece = 0x93,
-  DW_OP_deref_size = 0x94,
-  DW_OP_xderef_size = 0x95,
-  DW_OP_nop = 0x96,
-  DW_OP_push_object_address = 0x97,
-  DW_OP_call2 = 0x98,
-  DW_OP_call4 = 0x99,
-  DW_OP_call_ref = 0x9a,
-  DW_OP_form_tls_address = 0x9b,
-  DW_OP_call_frame_cfa = 0x9c,
-  DW_OP_bit_piece = 0x9d,
-  DW_OP_implicit_value = 0x9e,
-  DW_OP_stack_value = 0x9f,
-#ifdef INCLUDE_DWARF5_VALUES
-  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
-  // may be different than other implementations. Use with caution.
-  // TODO Update these values when Dwarf 5 is released.
-  DW_OP_entry_value = 0xa0,
-  DW_OP_const_type = 0xa1,
-  DW_OP_regval_type = 0xa2,
-  DW_OP_deref_type = 0xa3,
-  DW_OP_xderef_type = 0xa4,
-  DW_OP_convert = 0xa5,
-  DW_OP_reinterpret = 0xa6,
-#endif
-  DW_OP_lo_user = 0xe0,
-  DW_OP_hi_user = 0xff
-};
-
-enum BaseTypeEncoding : uint8_t {
-  DW_ATE_address = 0x01,
-  DW_ATE_boolean = 0x02,
-  DW_ATE_complex_float = 0x03,
-  DW_ATE_float = 0x04,
-  DW_ATE_signed = 0x05,
-  DW_ATE_signed_char = 0x06,
-  DW_ATE_unsigned = 0x07,
-  DW_ATE_unsigned_char = 0x08,
-  DW_ATE_imaginary_float = 0x09,
-  DW_ATE_packed_decimal = 0x0a,
-  DW_ATE_numeric_string = 0x0b,
-  DW_ATE_edited = 0x0c,
-  DW_ATE_signed_fixed = 0x0d,
-  DW_ATE_unsigned_fixed = 0x0e,
-  DW_ATE_decimal_float = 0x0f,
-  DW_ATE_UTF = 0x10,
-  DW_ATE_lo_user = 0x80,
-  DW_ATE_hi_user = 0xff
-};
-
-enum DecimalSign : uint8_t {
-  DW_DS_unsigned = 0x01,
-  DW_DS_leading_overpunch = 0x02,
-  DW_DS_trailing_overpunch = 0x03,
-  DW_DS_leading_separate = 0x04,
-  DW_DS_trailing_separate = 0x05
-};
-
-enum Endianity : uint8_t {
-  DW_END_default = 0x00,
-  DW_END_big = 0x01,
-  DW_END_little = 0x02,
-  DW_END_lo_user = 0x40,
-  DW_END_hi_user = 0xff
-};
-
-enum Accessibility : uint8_t {
-  DW_ACCESS_public = 0x01,
-  DW_ACCESS_protected = 0x02,
-  DW_ACCESS_private = 0x03
-};
-
-enum Visibility : uint8_t {
-  DW_VIS_local = 0x01,
-  DW_VIS_exported = 0x02,
-  DW_VIS_qualified = 0x03
-};
-
-enum Virtuality : uint8_t {
-  DW_VIRTUALITY_none = 0x00,
-  DW_VIRTUALITY_virtual = 0x01,
-  DW_VIRTUALITY_pure_virtual = 0x02
-};
-
-enum Language {
-  DW_LANG_C89 = 0x01,
-  DW_LANG_C = 0x02,
-  DW_LANG_Ada83 = 0x03,
-  DW_LANG_C_plus_plus = 0x04,
-  DW_LANG_Cobol74 = 0x05,
-  DW_LANG_Cobol85 = 0x06,
-  DW_LANG_Fortran77 = 0x07,
-  DW_LANG_Fortran90 = 0x08,
-  DW_LANG_Pascal83 = 0x09,
-  DW_LANG_Modula2 = 0x0a,
-  DW_LANG_Java = 0x0b,
-  DW_LANG_C99 = 0x0c,
-  DW_LANG_Ada95 = 0x0d,
-  DW_LANG_Fortran95 = 0x0e,
-  DW_LANG_PLI = 0x0f,
-  DW_LANG_ObjC = 0x10,
-  DW_LANG_ObjC_plus_plus = 0x11,
-  DW_LANG_UPC = 0x12,
-  DW_LANG_D = 0x13,
-  DW_LANG_Python = 0x14,
-#ifdef INCLUDE_DWARF5_VALUES
-  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
-  // may be different than other implementations. Use with caution.
-  // TODO Update these values when Dwarf 5 is released.
-  DW_LANG_OpenCL = 0x15,
-  DW_LANG_Go = 0x16,
-  DW_LANG_Modula3 = 0x17,
-  DW_LANG_Haskell = 0x18,
-  DW_LANG_C_plus_plus_03 = 0x19,
-  DW_LANG_C_plus_plus_11 = 0x1a,
-  DW_LANG_OCaml = 0x1b,
-  DW_LANG_Rust = 0x1c,
-  DW_LANG_C11 = 0x1d,
-  DW_LANG_Swift = 0x1e,
-  DW_LANG_Julia = 0x1f,
-#endif
-  DW_LANG_lo_user = 0x8000,
-  DW_LANG_hi_user = 0xffff
-};
-
-enum Identifier : uint8_t {
-  DW_ID_case_sensitive = 0x00,
-  DW_ID_up_case = 0x01,
-  DW_ID_down_case = 0x02,
-  DW_ID_case_insensitive = 0x03
-};
-
-enum CallingConvention : uint8_t {
-  DW_CC_normal = 0x01,
-  DW_CC_program = 0x02,
-  DW_CC_nocall = 0x03,
-  DW_CC_lo_user = 0x40,
-  DW_CC_hi_user = 0xff
-};
-
-enum Inline : uint8_t {
-  DW_INL_not_inlined = 0x00,
-  DW_INL_inlined = 0x01,
-  DW_INL_declared_not_inlined = 0x02,
-  DW_INL_declared_inlined = 0x03
-};
-
-enum ArrayOrdering : uint8_t {
-  DW_ORD_row_major = 0x00,
-  DW_ORD_col_major = 0x01
-};
-
-enum DiscriminantList : uint8_t {
-  DW_DSC_label = 0x00,
-  DW_DSC_range = 0x01
-};
-
-enum LineNumberOpcode : uint8_t {
-  DW_LNS_copy = 0x01,
-  DW_LNS_advance_pc = 0x02,
-  DW_LNS_advance_line = 0x03,
-  DW_LNS_set_file = 0x04,
-  DW_LNS_set_column = 0x05,
-  DW_LNS_negate_stmt = 0x06,
-  DW_LNS_set_basic_block = 0x07,
-  DW_LNS_const_add_pc = 0x08,
-  DW_LNS_fixed_advance_pc = 0x09,
-  DW_LNS_set_prologue_end = 0x0a,
-  DW_LNS_set_epilogue_begin = 0x0b,
-  DW_LNS_set_isa = 0x0c
-};
-
-enum LineNumberExtendedOpcode : uint8_t {
-  DW_LNE_end_sequence = 0x01,
-  DW_LNE_set_address = 0x02,
-  DW_LNE_define_file = 0x03,
-  DW_LNE_set_discriminator = 0x04,
-  DW_LNE_lo_user = 0x80,
-  DW_LNE_hi_user = 0xff
-};
-
-#ifdef INCLUDE_DWARF5_VALUES
-enum LineNumberFormat : uint8_t {
-  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
-  // may be different than other implementations. Use with caution.
-  // TODO Update these values when Dwarf 5 is released.
-  //
-  DW_LNF_path = 0x1,
-  DW_LNF_include_index = 0x2,
-  DW_LNF_timestamp = 0x3,
-  DW_LNF_size = 0x4,
-  DW_LNF_MD5 = 0x5,
-  DW_LNF_lo_user = 0x2000,
-  DW_LNF_hi_user = 0x3fff
-};
-#endif
-
-enum MacroInfo : uint8_t {
-  DW_MACINFO_define = 0x01,
-  DW_MACINFO_undef = 0x02,
-  DW_MACINFO_start_file = 0x03,
-  DW_MACINFO_end_file = 0x04,
-  DW_MACINFO_vendor_ext = 0xff
-};
-
-#ifdef INCLUDE_DWARF5_VALUES
-enum Macro : uint8_t {
-  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
-  // may be different than other implementations. Use with caution.
-  // TODO Update these values when Dwarf 5 is released.
-  DW_MACRO_define = 0x01,
-  DW_MACRO_undef = 0x02,
-  DW_MACRO_start_file = 0x03,
-  DW_MACRO_end_file = 0x04,
-  DW_MACRO_define_indirect = 0x05,
-  DW_MACRO_undef_indirect = 0x06,
-  DW_MACRO_transparent_include = 0x07,
-  DW_MACRO_define_indirectx = 0x0b,
-  DW_MACRO_undef_indirectx = 0x0c,
-  DW_MACRO_lo_user = 0xe0,
-  DW_MACRO_hi_user = 0xff
-};
-#endif
-
-const uint32_t CIE_ID_32 = 0xffffffff;
-const uint64_t CIE_ID_64 = 0xffffffffffffffff;
-
-enum CallFrameInstruction : uint8_t {
-  DW_CFA_advance_loc = 0x40,
-  DW_CFA_offset = 0x80,
-  DW_CFA_restore = 0xc0,
-  DW_CFA_nop = 0x00,
-  DW_CFA_set_loc = 0x01,
-  DW_CFA_advance_loc1 = 0x02,
-  DW_CFA_advance_loc2 = 0x03,
-  DW_CFA_advance_loc4 = 0x04,
-  DW_CFA_offset_extended = 0x05,
-  DW_CFA_restore_extended = 0x06,
-  DW_CFA_undefined = 0x07,
-  DW_CFA_same_value = 0x08,
-  DW_CFA_register = 0x09,
-  DW_CFA_remember_state = 0x0a,
-  DW_CFA_restore_state = 0x0b,
-  DW_CFA_def_cfa = 0x0c,
-  DW_CFA_def_cfa_register = 0x0d,
-  DW_CFA_def_cfa_offset = 0x0e,
-  DW_CFA_def_cfa_expression = 0x0f,
-  DW_CFA_expression = 0x10,
-  DW_CFA_offset_extended_sf = 0x11,
-  DW_CFA_def_cfa_sf = 0x12,
-  DW_CFA_def_cfa_offset_sf = 0x13,
-  DW_CFA_val_offset = 0x14,
-  DW_CFA_val_offset_sf = 0x15,
-  DW_CFA_val_expression = 0x16,
-  DW_CFA_lo_user = 0x1c,
-  DW_CFA_hi_user = 0x3f
-};
-
-enum ExceptionHeaderValueFormat : uint8_t  {
-  DW_EH_PE_native = 0x00,
-  DW_EH_PE_uleb128 = 0x01,
-  DW_EH_PE_udata2 = 0x02,
-  DW_EH_PE_udata4 = 0x03,
-  DW_EH_PE_udata8 = 0x04,
-  DW_EH_PE_sleb128 = 0x09,
-  DW_EH_PE_sdata2 = 0x0A,
-  DW_EH_PE_sdata4 = 0x0B,
-  DW_EH_PE_sdata8 = 0x0C,
-  DW_EH_PE_omit = 0xFF,
-};
-
-enum ExceptionHeaderValueApplication : uint8_t {
-  DW_EH_PE_absptr = 0x00,
-  DW_EH_PE_pcrel = 0x10,
-  DW_EH_PE_textrel = 0x20,
-  DW_EH_PE_datarel = 0x30,
-  DW_EH_PE_funcrel = 0x40,
-  DW_EH_PE_aligned = 0x50,
-};
-
-enum CFIFormat : uint8_t {
-  // This is the original format as defined by the specification.
-  // It is used for the .debug_frame section.
-  DW_DEBUG_FRAME_FORMAT,
-  // Slightly modified format used for the .eh_frame section.
-  DW_EH_FRAME_FORMAT
-};
-
-}  // namespace dwarf
-}  // namespace art
-
-#endif  // ART_COMPILER_DWARF_DWARF_CONSTANTS_H_
diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc
deleted file mode 100644
index a07d27c..0000000
--- a/compiler/dwarf/dwarf_test.cc
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dwarf_test.h"
-
-#include "dwarf/dwarf_constants.h"
-#include "dwarf/debug_frame_opcode_writer.h"
-#include "dwarf/debug_info_entry_writer.h"
-#include "dwarf/debug_line_opcode_writer.h"
-#include "dwarf/headers.h"
-#include "gtest/gtest.h"
-
-namespace art {
-namespace dwarf {
-
-// Run the tests only on host since we need objdump.
-#ifndef __ANDROID__
-
-constexpr CFIFormat kCFIFormat = DW_DEBUG_FRAME_FORMAT;
-
-TEST_F(DwarfTest, DebugFrame) {
-  const bool is64bit = false;
-
-  // Pick offset value which would catch Uleb vs Sleb errors.
-  const int offset = 40000;
-  ASSERT_EQ(UnsignedLeb128Size(offset / 4), 2u);
-  ASSERT_EQ(SignedLeb128Size(offset / 4), 3u);
-  DW_CHECK("Data alignment factor: -4");
-  const Reg reg(6);
-
-  // Test the opcodes in the order mentioned in the spec.
-  // There are usually several encoding variations of each opcode.
-  DebugFrameOpCodeWriter<> opcodes;
-  DW_CHECK("FDE");
-  int pc = 0;
-  for (int i : {0, 1, 0x3F, 0x40, 0xFF, 0x100, 0xFFFF, 0x10000}) {
-    pc += i;
-    opcodes.AdvancePC(pc);
-  }
-  DW_CHECK_NEXT("DW_CFA_advance_loc: 1 to 01000001");
-  DW_CHECK_NEXT("DW_CFA_advance_loc: 63 to 01000040");
-  DW_CHECK_NEXT("DW_CFA_advance_loc1: 64 to 01000080");
-  DW_CHECK_NEXT("DW_CFA_advance_loc1: 255 to 0100017f");
-  DW_CHECK_NEXT("DW_CFA_advance_loc2: 256 to 0100027f");
-  DW_CHECK_NEXT("DW_CFA_advance_loc2: 65535 to 0101027e");
-  DW_CHECK_NEXT("DW_CFA_advance_loc4: 65536 to 0102027e");
-  opcodes.DefCFA(reg, offset);
-  DW_CHECK_NEXT("DW_CFA_def_cfa: r6 (esi) ofs 40000");
-  opcodes.DefCFA(reg, -offset);
-  DW_CHECK_NEXT("DW_CFA_def_cfa_sf: r6 (esi) ofs -40000");
-  opcodes.DefCFARegister(reg);
-  DW_CHECK_NEXT("DW_CFA_def_cfa_register: r6 (esi)");
-  opcodes.DefCFAOffset(offset);
-  DW_CHECK_NEXT("DW_CFA_def_cfa_offset: 40000");
-  opcodes.DefCFAOffset(-offset);
-  DW_CHECK_NEXT("DW_CFA_def_cfa_offset_sf: -40000");
-  uint8_t expr[] = { 0 };
-  opcodes.DefCFAExpression(expr, arraysize(expr));
-  DW_CHECK_NEXT("DW_CFA_def_cfa_expression");
-  opcodes.Undefined(reg);
-  DW_CHECK_NEXT("DW_CFA_undefined: r6 (esi)");
-  opcodes.SameValue(reg);
-  DW_CHECK_NEXT("DW_CFA_same_value: r6 (esi)");
-  opcodes.Offset(Reg(0x3F), -offset);
-  // Bad register likely means that it does not exist on x86,
-  // but we want to test high register numbers anyway.
-  DW_CHECK_NEXT("DW_CFA_offset: bad register: r63 at cfa-40000");
-  opcodes.Offset(Reg(0x40), -offset);
-  DW_CHECK_NEXT("DW_CFA_offset_extended: bad register: r64 at cfa-40000");
-  opcodes.Offset(Reg(0x40), offset);
-  DW_CHECK_NEXT("DW_CFA_offset_extended_sf: bad register: r64 at cfa+40000");
-  opcodes.ValOffset(reg, -offset);
-  DW_CHECK_NEXT("DW_CFA_val_offset: r6 (esi) at cfa-40000");
-  opcodes.ValOffset(reg, offset);
-  DW_CHECK_NEXT("DW_CFA_val_offset_sf: r6 (esi) at cfa+40000");
-  opcodes.Register(reg, Reg(1));
-  DW_CHECK_NEXT("DW_CFA_register: r6 (esi) in r1 (ecx)");
-  opcodes.Expression(reg, expr, arraysize(expr));
-  DW_CHECK_NEXT("DW_CFA_expression: r6 (esi)");
-  opcodes.ValExpression(reg, expr, arraysize(expr));
-  DW_CHECK_NEXT("DW_CFA_val_expression: r6 (esi)");
-  opcodes.Restore(Reg(0x3F));
-  DW_CHECK_NEXT("DW_CFA_restore: bad register: r63");
-  opcodes.Restore(Reg(0x40));
-  DW_CHECK_NEXT("DW_CFA_restore_extended: bad register: r64");
-  opcodes.Restore(reg);
-  DW_CHECK_NEXT("DW_CFA_restore: r6 (esi)");
-  opcodes.RememberState();
-  DW_CHECK_NEXT("DW_CFA_remember_state");
-  opcodes.RestoreState();
-  DW_CHECK_NEXT("DW_CFA_restore_state");
-  opcodes.Nop();
-  DW_CHECK_NEXT("DW_CFA_nop");
-
-  // Also test helpers.
-  opcodes.DefCFA(Reg(4), 100);  // ESP
-  DW_CHECK_NEXT("DW_CFA_def_cfa: r4 (esp) ofs 100");
-  opcodes.AdjustCFAOffset(8);
-  DW_CHECK_NEXT("DW_CFA_def_cfa_offset: 108");
-  opcodes.RelOffset(Reg(0), 0);  // push R0
-  DW_CHECK_NEXT("DW_CFA_offset: r0 (eax) at cfa-108");
-  opcodes.RelOffset(Reg(1), 4);  // push R1
-  DW_CHECK_NEXT("DW_CFA_offset: r1 (ecx) at cfa-104");
-  opcodes.RelOffsetForMany(Reg(2), 8, 1 | (1 << 3), 4);  // push R2 and R5
-  DW_CHECK_NEXT("DW_CFA_offset: r2 (edx) at cfa-100");
-  DW_CHECK_NEXT("DW_CFA_offset: r5 (ebp) at cfa-96");
-  opcodes.RestoreMany(Reg(2), 1 | (1 << 3));  // pop R2 and R5
-  DW_CHECK_NEXT("DW_CFA_restore: r2 (edx)");
-  DW_CHECK_NEXT("DW_CFA_restore: r5 (ebp)");
-
-  DebugFrameOpCodeWriter<> initial_opcodes;
-  WriteDebugFrameCIE(is64bit, DW_EH_PE_absptr, Reg(is64bit ? 16 : 8),
-                     initial_opcodes, kCFIFormat, &debug_frame_data_);
-  std::vector<uintptr_t> debug_frame_patches;
-  std::vector<uintptr_t> expected_patches { 28 };  // NOLINT
-  WriteDebugFrameFDE(is64bit, 0, 0x01000000, 0x01000000, opcodes.data(),
-                     kCFIFormat, &debug_frame_data_, &debug_frame_patches);
-
-  EXPECT_EQ(expected_patches, debug_frame_patches);
-  CheckObjdumpOutput(is64bit, "-W");
-}
-
-TEST_F(DwarfTest, DebugFrame64) {
-  constexpr bool is64bit = true;
-  DebugFrameOpCodeWriter<> initial_opcodes;
-  WriteDebugFrameCIE(is64bit, DW_EH_PE_absptr, Reg(16),
-                     initial_opcodes, kCFIFormat, &debug_frame_data_);
-  DebugFrameOpCodeWriter<> opcodes;
-  std::vector<uintptr_t> debug_frame_patches;
-  std::vector<uintptr_t> expected_patches { 32 };  // NOLINT
-  WriteDebugFrameFDE(is64bit, 0, 0x0100000000000000, 0x0200000000000000,
-                     opcodes.data(), kCFIFormat, &debug_frame_data_, &debug_frame_patches);
-  DW_CHECK("FDE cie=00000000 pc=100000000000000..300000000000000");
-
-  EXPECT_EQ(expected_patches, debug_frame_patches);
-  CheckObjdumpOutput(is64bit, "-W");
-}
-
-// Test x86_64 register mapping. It is the only non-trivial architecture.
-// ARM, X86, and Mips have: dwarf_reg = art_reg + constant.
-TEST_F(DwarfTest, x86_64_RegisterMapping) {
-  constexpr bool is64bit = true;
-  DebugFrameOpCodeWriter<> opcodes;
-  for (int i = 0; i < 16; i++) {
-    opcodes.RelOffset(Reg::X86_64Core(i), 0);
-  }
-  DW_CHECK("FDE");
-  DW_CHECK_NEXT("DW_CFA_offset: r0 (rax)");
-  DW_CHECK_NEXT("DW_CFA_offset: r2 (rcx)");
-  DW_CHECK_NEXT("DW_CFA_offset: r1 (rdx)");
-  DW_CHECK_NEXT("DW_CFA_offset: r3 (rbx)");
-  DW_CHECK_NEXT("DW_CFA_offset: r7 (rsp)");
-  DW_CHECK_NEXT("DW_CFA_offset: r6 (rbp)");
-  DW_CHECK_NEXT("DW_CFA_offset: r4 (rsi)");
-  DW_CHECK_NEXT("DW_CFA_offset: r5 (rdi)");
-  DW_CHECK_NEXT("DW_CFA_offset: r8 (r8)");
-  DW_CHECK_NEXT("DW_CFA_offset: r9 (r9)");
-  DW_CHECK_NEXT("DW_CFA_offset: r10 (r10)");
-  DW_CHECK_NEXT("DW_CFA_offset: r11 (r11)");
-  DW_CHECK_NEXT("DW_CFA_offset: r12 (r12)");
-  DW_CHECK_NEXT("DW_CFA_offset: r13 (r13)");
-  DW_CHECK_NEXT("DW_CFA_offset: r14 (r14)");
-  DW_CHECK_NEXT("DW_CFA_offset: r15 (r15)");
-  DebugFrameOpCodeWriter<> initial_opcodes;
-  WriteDebugFrameCIE(is64bit, DW_EH_PE_absptr, Reg(16),
-                     initial_opcodes, kCFIFormat, &debug_frame_data_);
-  std::vector<uintptr_t> debug_frame_patches;
-  WriteDebugFrameFDE(is64bit, 0, 0x0100000000000000, 0x0200000000000000,
-                     opcodes.data(), kCFIFormat, &debug_frame_data_, &debug_frame_patches);
-
-  CheckObjdumpOutput(is64bit, "-W");
-}
-
-TEST_F(DwarfTest, DebugLine) {
-  const bool is64bit = false;
-  const int code_factor_bits = 1;
-  DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits);
-
-  std::vector<std::string> include_directories;
-  include_directories.push_back("/path/to/source");
-  DW_CHECK("/path/to/source");
-
-  std::vector<FileEntry> files {
-    { "file0.c", 0, 1000, 2000 },
-    { "file1.c", 1, 1000, 2000 },
-    { "file2.c", 1, 1000, 2000 },
-  };
-  DW_CHECK("1\t0\t1000\t2000\tfile0.c");
-  DW_CHECK_NEXT("2\t1\t1000\t2000\tfile1.c");
-  DW_CHECK_NEXT("3\t1\t1000\t2000\tfile2.c");
-
-  DW_CHECK("Line Number Statements");
-  opcodes.SetAddress(0x01000000);
-  DW_CHECK_NEXT("Extended opcode 2: set Address to 0x1000000");
-  opcodes.AddRow();
-  DW_CHECK_NEXT("Copy");
-  opcodes.AdvancePC(0x01000100);
-  DW_CHECK_NEXT("Advance PC by 256 to 0x1000100");
-  opcodes.SetFile(2);
-  DW_CHECK_NEXT("Set File Name to entry 2 in the File Name Table");
-  opcodes.AdvanceLine(3);
-  DW_CHECK_NEXT("Advance Line by 2 to 3");
-  opcodes.SetColumn(4);
-  DW_CHECK_NEXT("Set column to 4");
-  opcodes.NegateStmt();
-  DW_CHECK_NEXT("Set is_stmt to 0");
-  opcodes.SetBasicBlock();
-  DW_CHECK_NEXT("Set basic block");
-  opcodes.SetPrologueEnd();
-  DW_CHECK_NEXT("Set prologue_end to true");
-  opcodes.SetEpilogueBegin();
-  DW_CHECK_NEXT("Set epilogue_begin to true");
-  opcodes.SetISA(5);
-  DW_CHECK_NEXT("Set ISA to 5");
-  opcodes.EndSequence();
-  DW_CHECK_NEXT("Extended opcode 1: End of Sequence");
-  opcodes.DefineFile("file.c", 0, 1000, 2000);
-  DW_CHECK_NEXT("Extended opcode 3: define new File Table entry");
-  DW_CHECK_NEXT("Entry\tDir\tTime\tSize\tName");
-  DW_CHECK_NEXT("1\t0\t1000\t2000\tfile.c");
-
-  std::vector<uintptr_t> debug_line_patches;
-  std::vector<uintptr_t> expected_patches { 87 };  // NOLINT
-  WriteDebugLineTable(include_directories, files, opcodes,
-                      &debug_line_data_, &debug_line_patches);
-
-  EXPECT_EQ(expected_patches, debug_line_patches);
-  CheckObjdumpOutput(is64bit, "-W");
-}
-
-// DWARF has special one byte codes which advance PC and line at the same time.
-TEST_F(DwarfTest, DebugLineSpecialOpcodes) {
-  const bool is64bit = false;
-  const int code_factor_bits = 1;
-  uint32_t pc = 0x01000000;
-  int line = 1;
-  DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits);
-  opcodes.SetAddress(pc);
-  size_t num_rows = 0;
-  DW_CHECK("Line Number Statements:");
-  DW_CHECK("Special opcode");
-  DW_CHECK("Advance PC by constant");
-  DW_CHECK("Decoded dump of debug contents of section .debug_line:");
-  DW_CHECK("Line number    Starting address");
-  for (int addr_delta = 0; addr_delta < 80; addr_delta += 2) {
-    for (int line_delta = 16; line_delta >= -16; --line_delta) {
-      pc += addr_delta;
-      line += line_delta;
-      opcodes.AddRow(pc, line);
-      num_rows++;
-      ASSERT_EQ(opcodes.CurrentAddress(), pc);
-      ASSERT_EQ(opcodes.CurrentLine(), line);
-      char expected[1024];
-      sprintf(expected, "%i           0x%x", line, pc);
-      DW_CHECK_NEXT(expected);
-    }
-  }
-  EXPECT_LT(opcodes.data()->size(), num_rows * 3);
-
-  std::vector<std::string> directories;
-  std::vector<FileEntry> files { { "file.c", 0, 1000, 2000 } };  // NOLINT
-  std::vector<uintptr_t> debug_line_patches;
-  WriteDebugLineTable(directories, files, opcodes,
-                      &debug_line_data_, &debug_line_patches);
-
-  CheckObjdumpOutput(is64bit, "-W -WL");
-}
-
-TEST_F(DwarfTest, DebugInfo) {
-  constexpr bool is64bit = false;
-  DebugInfoEntryWriter<> info(is64bit, &debug_abbrev_data_);
-  DW_CHECK("Contents of the .debug_info section:");
-  info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes);
-  DW_CHECK("Abbrev Number: 1 (DW_TAG_compile_unit)");
-  info.WriteStrp(dwarf::DW_AT_producer, "Compiler name", &debug_str_data_);
-  DW_CHECK_NEXT("DW_AT_producer    : (indirect string, offset: 0x0): Compiler name");
-  info.WriteAddr(dwarf::DW_AT_low_pc, 0x01000000);
-  DW_CHECK_NEXT("DW_AT_low_pc      : 0x1000000");
-  info.WriteAddr(dwarf::DW_AT_high_pc, 0x02000000);
-  DW_CHECK_NEXT("DW_AT_high_pc     : 0x2000000");
-  info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no);
-  DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
-  info.WriteStrp(dwarf::DW_AT_name, "Foo", &debug_str_data_);
-  DW_CHECK_NEXT("DW_AT_name        : (indirect string, offset: 0xe): Foo");
-  info.WriteAddr(dwarf::DW_AT_low_pc, 0x01010000);
-  DW_CHECK_NEXT("DW_AT_low_pc      : 0x1010000");
-  info.WriteAddr(dwarf::DW_AT_high_pc, 0x01020000);
-  DW_CHECK_NEXT("DW_AT_high_pc     : 0x1020000");
-  info.EndTag();  // DW_TAG_subprogram
-  info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no);
-  DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
-  info.WriteStrp(dwarf::DW_AT_name, "Bar", &debug_str_data_);
-  DW_CHECK_NEXT("DW_AT_name        : (indirect string, offset: 0x12): Bar");
-  info.WriteAddr(dwarf::DW_AT_low_pc, 0x01020000);
-  DW_CHECK_NEXT("DW_AT_low_pc      : 0x1020000");
-  info.WriteAddr(dwarf::DW_AT_high_pc, 0x01030000);
-  DW_CHECK_NEXT("DW_AT_high_pc     : 0x1030000");
-  info.EndTag();  // DW_TAG_subprogram
-  info.EndTag();  // DW_TAG_compile_unit
-  // Test that previous list was properly terminated and empty children.
-  info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes);
-  info.EndTag();  // DW_TAG_compile_unit
-
-  // The abbrev table is just side product, but check it as well.
-  DW_CHECK("Abbrev Number: 3 (DW_TAG_compile_unit)");
-  DW_CHECK("Contents of the .debug_abbrev section:");
-  DW_CHECK("1      DW_TAG_compile_unit    [has children]");
-  DW_CHECK_NEXT("DW_AT_producer     DW_FORM_strp");
-  DW_CHECK_NEXT("DW_AT_low_pc       DW_FORM_addr");
-  DW_CHECK_NEXT("DW_AT_high_pc      DW_FORM_addr");
-  DW_CHECK("2      DW_TAG_subprogram    [no children]");
-  DW_CHECK_NEXT("DW_AT_name         DW_FORM_strp");
-  DW_CHECK_NEXT("DW_AT_low_pc       DW_FORM_addr");
-  DW_CHECK_NEXT("DW_AT_high_pc      DW_FORM_addr");
-  DW_CHECK("3      DW_TAG_compile_unit    [has children]");
-
-  std::vector<uintptr_t> debug_info_patches;
-  std::vector<uintptr_t> expected_patches { 16, 20, 29, 33, 42, 46 };  // NOLINT
-  dwarf::WriteDebugInfoCU(0 /* debug_abbrev_offset */, info,
-                          &debug_info_data_, &debug_info_patches);
-
-  EXPECT_EQ(expected_patches, debug_info_patches);
-  CheckObjdumpOutput(is64bit, "-W");
-}
-
-#endif  // __ANDROID__
-
-}  // namespace dwarf
-}  // namespace art
diff --git a/compiler/dwarf/dwarf_test.h b/compiler/dwarf/dwarf_test.h
deleted file mode 100644
index f819c49..0000000
--- a/compiler/dwarf/dwarf_test.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DWARF_DWARF_TEST_H_
-#define ART_COMPILER_DWARF_DWARF_TEST_H_
-
-#include <cstring>
-#include <dirent.h>
-#include <memory>
-#include <set>
-#include <stdio.h>
-#include <string>
-#include <sys/types.h>
-
-#include "base/unix_file/fd_file.h"
-#include "common_runtime_test.h"
-#include "elf_builder.h"
-#include "gtest/gtest.h"
-#include "os.h"
-
-namespace art {
-namespace dwarf {
-
-#define DW_CHECK(substring) Check(substring, false, __FILE__, __LINE__)
-#define DW_CHECK_NEXT(substring) Check(substring, true, __FILE__, __LINE__)
-
-class DwarfTest : public CommonRuntimeTest {
- public:
-  static constexpr bool kPrintObjdumpOutput = false;  // debugging.
-
-  struct ExpectedLine {
-    std::string substring;
-    bool next;
-    const char* at_file;
-    int at_line;
-  };
-
-  // Check that the objdump output contains given output.
-  // If next is true, it must be the next line.  Otherwise lines are skipped.
-  void Check(const char* substr, bool next, const char* at_file, int at_line) {
-    expected_lines_.push_back(ExpectedLine {substr, next, at_file, at_line});
-  }
-
-  // Pretty-print the generated DWARF data using objdump.
-  template<typename ElfTypes>
-  std::vector<std::string> Objdump(const char* args) {
-    // Write simple elf file with just the DWARF sections.
-    InstructionSet isa = (sizeof(typename ElfTypes::Addr) == 8) ? kX86_64 : kX86;
-    class NoCode : public CodeOutput {
-      bool Write(OutputStream*) OVERRIDE { return true; }  // NOLINT
-    } no_code;
-    ElfBuilder<ElfTypes> builder(isa, 0, &no_code, 0, &no_code, 0);
-    typedef typename ElfBuilder<ElfTypes>::RawSection RawSection;
-    RawSection debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    RawSection debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    RawSection debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    RawSection debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    RawSection debug_frame(".debug_frame", SHT_PROGBITS, 0, nullptr, 0, 8, 0);
-    if (!debug_info_data_.empty()) {
-      debug_info.SetBuffer(debug_info_data_);
-      builder.RegisterSection(&debug_info);
-    }
-    if (!debug_abbrev_data_.empty()) {
-      debug_abbrev.SetBuffer(debug_abbrev_data_);
-      builder.RegisterSection(&debug_abbrev);
-    }
-    if (!debug_str_data_.empty()) {
-      debug_str.SetBuffer(debug_str_data_);
-      builder.RegisterSection(&debug_str);
-    }
-    if (!debug_line_data_.empty()) {
-      debug_line.SetBuffer(debug_line_data_);
-      builder.RegisterSection(&debug_line);
-    }
-    if (!debug_frame_data_.empty()) {
-      debug_frame.SetBuffer(debug_frame_data_);
-      builder.RegisterSection(&debug_frame);
-    }
-    ScratchFile file;
-    builder.Write(file.GetFile());
-
-    // Read the elf file back using objdump.
-    std::vector<std::string> lines;
-    std::string cmd = GetAndroidHostToolsDir();
-    cmd = cmd + "objdump " + args + " " + file.GetFilename() + " 2>&1";
-    FILE* output = popen(cmd.data(), "r");
-    char buffer[1024];
-    const char* line;
-    while ((line = fgets(buffer, sizeof(buffer), output)) != nullptr) {
-      if (kPrintObjdumpOutput) {
-        printf("%s", line);
-      }
-      if (line[0] != '\0' && line[0] != '\n') {
-        EXPECT_TRUE(strstr(line, "objdump: Error:") == nullptr) << line;
-        EXPECT_TRUE(strstr(line, "objdump: Warning:") == nullptr) << line;
-        std::string str(line);
-        if (str.back() == '\n') {
-          str.pop_back();
-        }
-        lines.push_back(str);
-      }
-    }
-    pclose(output);
-    return lines;
-  }
-
-  std::vector<std::string> Objdump(bool is64bit, const char* args) {
-    if (is64bit) {
-      return Objdump<ElfTypes64>(args);
-    } else {
-      return Objdump<ElfTypes32>(args);
-    }
-  }
-
-  // Compare objdump output to the recorded checks.
-  void CheckObjdumpOutput(bool is64bit, const char* args) {
-    std::vector<std::string> actual_lines = Objdump(is64bit, args);
-    auto actual_line = actual_lines.begin();
-    for (const ExpectedLine& expected_line : expected_lines_) {
-      const std::string& substring = expected_line.substring;
-      if (actual_line == actual_lines.end()) {
-        ADD_FAILURE_AT(expected_line.at_file, expected_line.at_line) <<
-            "Expected '" << substring << "'.\n" <<
-            "Seen end of output.";
-      } else if (expected_line.next) {
-        if (actual_line->find(substring) == std::string::npos) {
-          ADD_FAILURE_AT(expected_line.at_file, expected_line.at_line) <<
-            "Expected '" << substring << "'.\n" <<
-            "Seen '" << actual_line->data() << "'.";
-        } else {
-          // printf("Found '%s' in '%s'.\n", substring.data(), actual_line->data());
-        }
-        actual_line++;
-      } else {
-        bool found = false;
-        for (auto it = actual_line; it < actual_lines.end(); it++) {
-          if (it->find(substring) != std::string::npos) {
-            actual_line = it;
-            found = true;
-            break;
-          }
-        }
-        if (!found) {
-          ADD_FAILURE_AT(expected_line.at_file, expected_line.at_line) <<
-            "Expected '" << substring << "'.\n" <<
-            "Not found anywhere in the rest of the output.";
-        } else {
-          // printf("Found '%s' in '%s'.\n", substring.data(), actual_line->data());
-          actual_line++;
-        }
-      }
-    }
-  }
-
-  // Buffers which are going to assembled into ELF file and passed to objdump.
-  std::vector<uint8_t> debug_frame_data_;
-  std::vector<uint8_t> debug_info_data_;
-  std::vector<uint8_t> debug_abbrev_data_;
-  std::vector<uint8_t> debug_str_data_;
-  std::vector<uint8_t> debug_line_data_;
-
-  // The expected output of objdump.
-  std::vector<ExpectedLine> expected_lines_;
-};
-
-}  // namespace dwarf
-}  // namespace art
-
-#endif  // ART_COMPILER_DWARF_DWARF_TEST_H_
diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h
deleted file mode 100644
index b7eff19..0000000
--- a/compiler/dwarf/headers.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DWARF_HEADERS_H_
-#define ART_COMPILER_DWARF_HEADERS_H_
-
-#include <cstdint>
-
-#include "dwarf/debug_frame_opcode_writer.h"
-#include "dwarf/debug_info_entry_writer.h"
-#include "dwarf/debug_line_opcode_writer.h"
-#include "dwarf/dwarf_constants.h"
-#include "dwarf/register.h"
-#include "dwarf/writer.h"
-
-namespace art {
-namespace dwarf {
-
-// Note that all headers start with 32-bit length.
-// DWARF also supports 64-bit lengths, but we never use that.
-// It is intended to support very large debug sections (>4GB),
-// and compilers are expected *not* to use it by default.
-// In particular, it is not related to machine architecture.
-
-// Write common information entry (CIE) to .debug_frame or .eh_frame section.
-template<typename Vector>
-void WriteDebugFrameCIE(bool is64bit,
-                        ExceptionHeaderValueApplication address_type,
-                        Reg return_address_register,
-                        const DebugFrameOpCodeWriter<Vector>& opcodes,
-                        CFIFormat format,
-                        std::vector<uint8_t>* debug_frame) {
-  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
-
-  Writer<> writer(debug_frame);
-  size_t cie_header_start_ = writer.data()->size();
-  writer.PushUint32(0);  // Length placeholder.
-  writer.PushUint32((format == DW_EH_FRAME_FORMAT) ? 0 : 0xFFFFFFFF);  // CIE id.
-  writer.PushUint8(1);   // Version.
-  writer.PushString("zR");
-  writer.PushUleb128(DebugFrameOpCodeWriter<Vector>::kCodeAlignmentFactor);
-  writer.PushSleb128(DebugFrameOpCodeWriter<Vector>::kDataAlignmentFactor);
-  writer.PushUleb128(return_address_register.num());  // ubyte in DWARF2.
-  writer.PushUleb128(1);  // z: Augmentation data size.
-  if (is64bit) {
-    if (address_type == DW_EH_PE_pcrel) {
-      writer.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata8);   // R: Pointer encoding.
-    } else {
-      DCHECK(address_type == DW_EH_PE_absptr);
-      writer.PushUint8(DW_EH_PE_absptr | DW_EH_PE_udata8);  // R: Pointer encoding.
-    }
-  } else {
-    if (address_type == DW_EH_PE_pcrel) {
-      writer.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata4);   // R: Pointer encoding.
-    } else {
-      DCHECK(address_type == DW_EH_PE_absptr);
-      writer.PushUint8(DW_EH_PE_absptr | DW_EH_PE_udata4);  // R: Pointer encoding.
-    }
-  }
-  writer.PushData(opcodes.data());
-  writer.Pad(is64bit ? 8 : 4);
-  writer.UpdateUint32(cie_header_start_, writer.data()->size() - cie_header_start_ - 4);
-}
-
-// Write frame description entry (FDE) to .debug_frame or .eh_frame section.
-template<typename Vector>
-void WriteDebugFrameFDE(bool is64bit, size_t cie_offset,
-                        uint64_t initial_address, uint64_t address_range,
-                        const Vector* opcodes,
-                        CFIFormat format,
-                        std::vector<uint8_t>* debug_frame,
-                        std::vector<uintptr_t>* debug_frame_patches) {
-  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
-
-  Writer<> writer(debug_frame);
-  size_t fde_header_start = writer.data()->size();
-  writer.PushUint32(0);  // Length placeholder.
-  if (format == DW_EH_FRAME_FORMAT) {
-    uint32_t cie_pointer = writer.data()->size() - cie_offset;
-    writer.PushUint32(cie_pointer);
-  } else {
-    uint32_t cie_pointer = cie_offset;
-    writer.PushUint32(cie_pointer);
-  }
-  // Relocate initial_address, but not address_range (it is size).
-  debug_frame_patches->push_back(writer.data()->size());
-  if (is64bit) {
-    writer.PushUint64(initial_address);
-    writer.PushUint64(address_range);
-  } else {
-    writer.PushUint32(initial_address);
-    writer.PushUint32(address_range);
-  }
-  writer.PushUleb128(0);  // Augmentation data size.
-  writer.PushData(opcodes);
-  writer.Pad(is64bit ? 8 : 4);
-  writer.UpdateUint32(fde_header_start, writer.data()->size() - fde_header_start - 4);
-}
-
-// Write compilation unit (CU) to .debug_info section.
-template<typename Vector>
-void WriteDebugInfoCU(uint32_t debug_abbrev_offset,
-                      const DebugInfoEntryWriter<Vector>& entries,
-                      std::vector<uint8_t>* debug_info,
-                      std::vector<uintptr_t>* debug_info_patches) {
-  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
-
-  Writer<> writer(debug_info);
-  size_t start = writer.data()->size();
-  writer.PushUint32(0);  // Length placeholder.
-  writer.PushUint16(3);  // Version.
-  writer.PushUint32(debug_abbrev_offset);
-  writer.PushUint8(entries.Is64bit() ? 8 : 4);
-  size_t entries_offset = writer.data()->size();
-  writer.PushData(entries.data());
-  writer.UpdateUint32(start, writer.data()->size() - start - 4);
-  // Copy patch locations and make them relative to .debug_info section.
-  for (uintptr_t patch_location : entries.GetPatchLocations()) {
-    debug_info_patches->push_back(entries_offset + patch_location);
-  }
-}
-
-struct FileEntry {
-  std::string file_name;
-  int directory_index;
-  int modification_time;
-  int file_size;
-};
-
-// Write line table to .debug_line section.
-template<typename Vector>
-void WriteDebugLineTable(const std::vector<std::string>& include_directories,
-                         const std::vector<FileEntry>& files,
-                         const DebugLineOpCodeWriter<Vector>& opcodes,
-                         std::vector<uint8_t>* debug_line,
-                         std::vector<uintptr_t>* debug_line_patches) {
-  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
-
-  Writer<> writer(debug_line);
-  size_t header_start = writer.data()->size();
-  writer.PushUint32(0);  // Section-length placeholder.
-  // Claim DWARF-2 version even though we use some DWARF-3 features.
-  // DWARF-2 consumers will ignore the unknown opcodes.
-  // This is what clang currently does.
-  writer.PushUint16(2);  // .debug_line version.
-  size_t header_length_pos = writer.data()->size();
-  writer.PushUint32(0);  // Header-length placeholder.
-  writer.PushUint8(1 << opcodes.GetCodeFactorBits());
-  writer.PushUint8(DebugLineOpCodeWriter<Vector>::kDefaultIsStmt ? 1 : 0);
-  writer.PushInt8(DebugLineOpCodeWriter<Vector>::kLineBase);
-  writer.PushUint8(DebugLineOpCodeWriter<Vector>::kLineRange);
-  writer.PushUint8(DebugLineOpCodeWriter<Vector>::kOpcodeBase);
-  static const int opcode_lengths[DebugLineOpCodeWriter<Vector>::kOpcodeBase] = {
-      0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 };
-  for (int i = 1; i < DebugLineOpCodeWriter<Vector>::kOpcodeBase; i++) {
-    writer.PushUint8(opcode_lengths[i]);
-  }
-  for (const std::string& directory : include_directories) {
-    writer.PushData(directory.data(), directory.size() + 1);
-  }
-  writer.PushUint8(0);  // Terminate include_directories list.
-  for (const FileEntry& file : files) {
-    writer.PushData(file.file_name.data(), file.file_name.size() + 1);
-    writer.PushUleb128(file.directory_index);
-    writer.PushUleb128(file.modification_time);
-    writer.PushUleb128(file.file_size);
-  }
-  writer.PushUint8(0);  // Terminate file list.
-  writer.UpdateUint32(header_length_pos, writer.data()->size() - header_length_pos - 4);
-  size_t opcodes_offset = writer.data()->size();
-  writer.PushData(opcodes.data());
-  writer.UpdateUint32(header_start, writer.data()->size() - header_start - 4);
-  // Copy patch locations and make them relative to .debug_line section.
-  for (uintptr_t patch_location : opcodes.GetPatchLocations()) {
-    debug_line_patches->push_back(opcodes_offset + patch_location);
-  }
-}
-
-}  // namespace dwarf
-}  // namespace art
-
-#endif  // ART_COMPILER_DWARF_HEADERS_H_
diff --git a/compiler/dwarf/register.h b/compiler/dwarf/register.h
deleted file mode 100644
index 7045237..0000000
--- a/compiler/dwarf/register.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DWARF_REGISTER_H_
-#define ART_COMPILER_DWARF_REGISTER_H_
-
-namespace art {
-namespace dwarf {
-
-// Represents DWARF register.
-class Reg {
- public:
-  explicit Reg(int reg_num) : num_(reg_num) { }
-  int num() const { return num_; }
-
-  // TODO: Arm S0–S31 register mapping is obsolescent.
-  //   We should use VFP-v3/Neon D0-D31 mapping instead.
-  //   However, D0 is aliased to pair of S0 and S1, so using that
-  //   mapping we can not easily say S0 is spilled and S1 is not.
-  //   There are ways around this in DWARF but they are complex.
-  //   It would be much simpler to always spill whole D registers.
-  //   Arm64 mapping is correct since we already do this there.
-  //   libunwind might struggle with the new mapping as well.
-
-  static Reg ArmCore(int num) { return Reg(num); }
-  static Reg ArmFp(int num) { return Reg(64 + num); }  // S0–S31.
-  static Reg Arm64Core(int num) { return Reg(num); }
-  static Reg Arm64Fp(int num) { return Reg(64 + num); }  // V0-V31.
-  static Reg MipsCore(int num) { return Reg(num); }
-  static Reg Mips64Core(int num) { return Reg(num); }
-  static Reg X86Core(int num) { return Reg(num); }
-  static Reg X86Fp(int num) { return Reg(21 + num); }
-  static Reg X86_64Core(int num) {
-    static const int map[8] = {0, 2, 1, 3, 7, 6, 4, 5};
-    return Reg(num < 8 ? map[num] : num);
-  }
-  static Reg X86_64Fp(int num) { return Reg(17 + num); }
-
- private:
-  int num_;
-};
-
-}  // namespace dwarf
-}  // namespace art
-
-#endif  // ART_COMPILER_DWARF_REGISTER_H_
diff --git a/compiler/dwarf/writer.h b/compiler/dwarf/writer.h
deleted file mode 100644
index 42c32c4..0000000
--- a/compiler/dwarf/writer.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DWARF_WRITER_H_
-#define ART_COMPILER_DWARF_WRITER_H_
-
-#include <vector>
-#include "base/bit_utils.h"
-#include "base/logging.h"
-#include "leb128.h"
-
-namespace art {
-namespace dwarf {
-
-// The base class for all DWARF writers.
-template <typename Vector = std::vector<uint8_t>>
-class Writer {
-  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
-
- public:
-  void PushUint8(int value) {
-    DCHECK_GE(value, 0);
-    DCHECK_LE(value, UINT8_MAX);
-    data_->push_back(value & 0xff);
-  }
-
-  void PushUint16(int value) {
-    DCHECK_GE(value, 0);
-    DCHECK_LE(value, UINT16_MAX);
-    data_->push_back((value >> 0) & 0xff);
-    data_->push_back((value >> 8) & 0xff);
-  }
-
-  void PushUint32(uint32_t value) {
-    data_->push_back((value >> 0) & 0xff);
-    data_->push_back((value >> 8) & 0xff);
-    data_->push_back((value >> 16) & 0xff);
-    data_->push_back((value >> 24) & 0xff);
-  }
-
-  void PushUint32(int value) {
-    DCHECK_GE(value, 0);
-    PushUint32(static_cast<uint32_t>(value));
-  }
-
-  void PushUint32(uint64_t value) {
-    DCHECK_LE(value, UINT32_MAX);
-    PushUint32(static_cast<uint32_t>(value));
-  }
-
-  void PushUint64(uint64_t value) {
-    data_->push_back((value >> 0) & 0xff);
-    data_->push_back((value >> 8) & 0xff);
-    data_->push_back((value >> 16) & 0xff);
-    data_->push_back((value >> 24) & 0xff);
-    data_->push_back((value >> 32) & 0xff);
-    data_->push_back((value >> 40) & 0xff);
-    data_->push_back((value >> 48) & 0xff);
-    data_->push_back((value >> 56) & 0xff);
-  }
-
-  void PushInt8(int value) {
-    DCHECK_GE(value, INT8_MIN);
-    DCHECK_LE(value, INT8_MAX);
-    PushUint8(static_cast<uint8_t>(value));
-  }
-
-  void PushInt16(int value) {
-    DCHECK_GE(value, INT16_MIN);
-    DCHECK_LE(value, INT16_MAX);
-    PushUint16(static_cast<uint16_t>(value));
-  }
-
-  void PushInt32(int value) {
-    PushUint32(static_cast<uint32_t>(value));
-  }
-
-  void PushInt64(int64_t value) {
-    PushUint64(static_cast<uint64_t>(value));
-  }
-
-  // Variable-length encoders.
-
-  void PushUleb128(uint32_t value) {
-    EncodeUnsignedLeb128(data_, value);
-  }
-
-  void PushUleb128(int value) {
-    DCHECK_GE(value, 0);
-    EncodeUnsignedLeb128(data_, value);
-  }
-
-  void PushSleb128(int value) {
-    EncodeSignedLeb128(data_, value);
-  }
-
-  // Miscellaneous functions.
-
-  void PushString(const char* value) {
-    data_->insert(data_->end(), value, value + strlen(value) + 1);
-  }
-
-  void PushData(const void* ptr, size_t size) {
-    const char* p = reinterpret_cast<const char*>(ptr);
-    data_->insert(data_->end(), p, p + size);
-  }
-
-  template<typename Vector2>
-  void PushData(const Vector2* buffer) {
-    static_assert(std::is_same<typename Vector2::value_type, uint8_t>::value, "Invalid value type");
-    data_->insert(data_->end(), buffer->begin(), buffer->end());
-  }
-
-  void UpdateUint32(size_t offset, uint32_t value) {
-    DCHECK_LT(offset + 3, data_->size());
-    (*data_)[offset + 0] = (value >> 0) & 0xFF;
-    (*data_)[offset + 1] = (value >> 8) & 0xFF;
-    (*data_)[offset + 2] = (value >> 16) & 0xFF;
-    (*data_)[offset + 3] = (value >> 24) & 0xFF;
-  }
-
-  void UpdateUint64(size_t offset, uint64_t value) {
-    DCHECK_LT(offset + 7, data_->size());
-    (*data_)[offset + 0] = (value >> 0) & 0xFF;
-    (*data_)[offset + 1] = (value >> 8) & 0xFF;
-    (*data_)[offset + 2] = (value >> 16) & 0xFF;
-    (*data_)[offset + 3] = (value >> 24) & 0xFF;
-    (*data_)[offset + 4] = (value >> 32) & 0xFF;
-    (*data_)[offset + 5] = (value >> 40) & 0xFF;
-    (*data_)[offset + 6] = (value >> 48) & 0xFF;
-    (*data_)[offset + 7] = (value >> 56) & 0xFF;
-  }
-
-  void UpdateUleb128(size_t offset, uint32_t value) {
-    DCHECK_LE(offset + UnsignedLeb128Size(value), data_->size());
-    UpdateUnsignedLeb128(data_->data() + offset, value);
-  }
-
-  void Pop() {
-    return data_->pop_back();
-  }
-
-  void Pad(int alignment) {
-    DCHECK_NE(alignment, 0);
-    data_->resize(RoundUp(data_->size(), alignment), 0);
-  }
-
-  const Vector* data() const {
-    return data_;
-  }
-
-  explicit Writer(Vector* buffer) : data_(buffer) { }
-
- private:
-  Vector* const data_;
-
-  DISALLOW_COPY_AND_ASSIGN(Writer);
-};
-
-}  // namespace dwarf
-}  // namespace art
-
-#endif  // ART_COMPILER_DWARF_WRITER_H_
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index bbd962f..7f2e193 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -20,28 +20,61 @@
 #include <vector>
 
 #include "arch/instruction_set.h"
+#include "arch/mips/instruction_set_features_mips.h"
 #include "base/bit_utils.h"
+#include "base/casts.h"
 #include "base/unix_file/fd_file.h"
-#include "buffered_output_stream.h"
 #include "elf_utils.h"
-#include "file_output_stream.h"
+#include "leb128.h"
+#include "linker/error_delaying_output_stream.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
-class CodeOutput {
- public:
-  virtual bool Write(OutputStream* out) = 0;
-  virtual ~CodeOutput() {}
-};
-
 // Writes ELF file.
-// The main complication is that the sections often want to reference
-// each other.  We solve this by writing the ELF file in two stages:
-//  * Sections are asked about their size, and overall layout is calculated.
-//  * Sections do the actual writes which may use offsets of other sections.
+//
+// The basic layout of the elf file:
+//   Elf_Ehdr                    - The ELF header.
+//   Elf_Phdr[]                  - Program headers for the linker.
+//   .rodata                     - DEX files and oat metadata.
+//   .text                       - Compiled code.
+//   .bss                        - Zero-initialized writeable section.
+//   .MIPS.abiflags              - MIPS specific section.
+//   .dynstr                     - Names for .dynsym.
+//   .dynsym                     - A few oat-specific dynamic symbols.
+//   .hash                       - Hash-table for .dynsym.
+//   .dynamic                    - Tags which let the linker locate .dynsym.
+//   .strtab                     - Names for .symtab.
+//   .symtab                     - Debug symbols.
+//   .eh_frame                   - Unwind information (CFI).
+//   .eh_frame_hdr               - Index of .eh_frame.
+//   .debug_frame                - Unwind information (CFI).
+//   .debug_frame.oat_patches    - Addresses for relocation.
+//   .debug_info                 - Debug information.
+//   .debug_info.oat_patches     - Addresses for relocation.
+//   .debug_abbrev               - Decoding information for .debug_info.
+//   .debug_str                  - Strings for .debug_info.
+//   .debug_line                 - Line number tables.
+//   .debug_line.oat_patches     - Addresses for relocation.
+//   .text.oat_patches           - Addresses for relocation.
+//   .shstrtab                   - Names of ELF sections.
+//   Elf_Shdr[]                  - Section headers.
+//
+// Some section are optional (the debug sections in particular).
+//
+// We try write the section data directly into the file without much
+// in-memory buffering.  This means we generally write sections based on the
+// dependency order (e.g. .dynamic points to .dynsym which points to .text).
+//
+// In the cases where we need to buffer, we write the larger section first
+// and buffer the smaller one (e.g. .strtab is bigger than .symtab).
+//
+// The debug sections are written last for easier stripping.
+//
 template <typename ElfTypes>
 class ElfBuilder FINAL {
  public:
+  static constexpr size_t kMaxProgramHeaders = 16;
   using Elf_Addr = typename ElfTypes::Addr;
   using Elf_Off = typename ElfTypes::Off;
   using Elf_Word = typename ElfTypes::Word;
@@ -53,779 +86,670 @@
   using Elf_Dyn = typename ElfTypes::Dyn;
 
   // Base class of all sections.
-  class Section {
+  class Section : public OutputStream {
    public:
-    Section(const std::string& name, Elf_Word type, Elf_Word flags,
-            const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize)
-        : header_(), section_index_(0), name_(name), link_(link) {
+    Section(ElfBuilder<ElfTypes>* owner,
+            const std::string& name,
+            Elf_Word type,
+            Elf_Word flags,
+            const Section* link,
+            Elf_Word info,
+            Elf_Word align,
+            Elf_Word entsize)
+        : OutputStream(name),
+          owner_(owner),
+          header_(),
+          section_index_(0),
+          name_(name),
+          link_(link),
+          started_(false),
+          finished_(false),
+          phdr_flags_(PF_R),
+          phdr_type_(0) {
+      DCHECK_GE(align, 1u);
       header_.sh_type = type;
       header_.sh_flags = flags;
       header_.sh_info = info;
       header_.sh_addralign = align;
       header_.sh_entsize = entsize;
     }
-    virtual ~Section() {}
 
-    // Returns the size of the content of this section.  It is used to
-    // calculate file offsets of all sections before doing any writes.
-    virtual Elf_Word GetSize() const = 0;
-
-    // Write the content of this section to the given file.
-    // This must write exactly the number of bytes returned by GetSize().
-    // Offsets of all sections are known when this method is called.
-    virtual bool Write(File* elf_file) = 0;
-
-    Elf_Word GetLink() const {
-      return (link_ != nullptr) ? link_->GetSectionIndex() : 0;
+    // Start writing of this section.
+    void Start() {
+      CHECK(!started_);
+      CHECK(!finished_);
+      started_ = true;
+      auto& sections = owner_->sections_;
+      // Check that the previous section is complete.
+      CHECK(sections.empty() || sections.back()->finished_);
+      // The first ELF section index is 1. Index 0 is reserved for NULL.
+      section_index_ = sections.size() + 1;
+      // Page-align if we switch between allocated and non-allocated sections,
+      // or if we change the type of allocation (e.g. executable vs non-executable).
+      if (!sections.empty()) {
+        if (header_.sh_flags != sections.back()->header_.sh_flags) {
+          header_.sh_addralign = kPageSize;
+        }
+      }
+      // Align file position.
+      if (header_.sh_type != SHT_NOBITS) {
+        header_.sh_offset = owner_->AlignFileOffset(header_.sh_addralign);
+      } else {
+        header_.sh_offset = 0;
+      }
+      // Align virtual memory address.
+      if ((header_.sh_flags & SHF_ALLOC) != 0) {
+        header_.sh_addr = owner_->AlignVirtualAddress(header_.sh_addralign);
+      } else {
+        header_.sh_addr = 0;
+      }
+      // Push this section on the list of written sections.
+      sections.push_back(this);
     }
 
-    const Elf_Shdr* GetHeader() const {
-      return &header_;
+    // Finish writing of this section.
+    void End() {
+      CHECK(started_);
+      CHECK(!finished_);
+      finished_ = true;
+      if (header_.sh_type == SHT_NOBITS) {
+        CHECK_GT(header_.sh_size, 0u);
+      } else {
+        // Use the current file position to determine section size.
+        off_t file_offset = owner_->stream_.Seek(0, kSeekCurrent);
+        CHECK_GE(file_offset, (off_t)header_.sh_offset);
+        header_.sh_size = file_offset - header_.sh_offset;
+      }
+      if ((header_.sh_flags & SHF_ALLOC) != 0) {
+        owner_->virtual_address_ += header_.sh_size;
+      }
     }
 
-    Elf_Shdr* GetHeader() {
-      return &header_;
+    // Get the location of this section in virtual memory.
+    Elf_Addr GetAddress() const {
+      CHECK(started_);
+      return header_.sh_addr;
+    }
+
+    // Returns the size of the content of this section.
+    Elf_Word GetSize() const {
+      if (finished_) {
+        return header_.sh_size;
+      } else {
+        CHECK(started_);
+        CHECK_NE(header_.sh_type, (Elf_Word)SHT_NOBITS);
+        return owner_->stream_.Seek(0, kSeekCurrent) - header_.sh_offset;
+      }
+    }
+
+    // Write this section as "NOBITS" section. (used for the .bss section)
+    // This means that the ELF file does not contain the initial data for this section
+    // and it will be zero-initialized when the ELF file is loaded in the running program.
+    void WriteNoBitsSection(Elf_Word size) {
+      DCHECK_NE(header_.sh_flags & SHF_ALLOC, 0u);
+      header_.sh_type = SHT_NOBITS;
+      Start();
+      header_.sh_size = size;
+      End();
+    }
+
+    // This function always succeeds to simplify code.
+    // Use builder's Good() to check the actual status.
+    bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
+      CHECK(started_);
+      CHECK(!finished_);
+      return owner_->stream_.WriteFully(buffer, byte_count);
+    }
+
+    // This function always succeeds to simplify code.
+    // Use builder's Good() to check the actual status.
+    off_t Seek(off_t offset, Whence whence) OVERRIDE {
+      // Forward the seek as-is and trust the caller to use it reasonably.
+      return owner_->stream_.Seek(offset, whence);
+    }
+
+    // This function flushes the output and returns whether it succeeded.
+    // If there was a previous failure, this does nothing and returns false, i.e. failed.
+    bool Flush() OVERRIDE {
+      return owner_->stream_.Flush();
     }
 
     Elf_Word GetSectionIndex() const {
+      DCHECK(started_);
       DCHECK_NE(section_index_, 0u);
       return section_index_;
     }
 
-    void SetSectionIndex(Elf_Word section_index) {
-      section_index_ = section_index;
-    }
-
-    const std::string& GetName() const {
-      return name_;
-    }
-
    private:
+    ElfBuilder<ElfTypes>* owner_;
     Elf_Shdr header_;
     Elf_Word section_index_;
     const std::string name_;
     const Section* const link_;
+    bool started_;
+    bool finished_;
+    Elf_Word phdr_flags_;
+    Elf_Word phdr_type_;
+
+    friend class ElfBuilder;
 
     DISALLOW_COPY_AND_ASSIGN(Section);
   };
 
-  // Writer of .dynamic section.
-  class DynamicSection FINAL : public Section {
+  class CachedSection : public Section {
    public:
-    void AddDynamicTag(Elf_Sword tag, Elf_Word value, const Section* section) {
-      DCHECK_NE(tag, static_cast<Elf_Sword>(DT_NULL));
-      dynamics_.push_back({tag, value, section});
-    }
+    CachedSection(ElfBuilder<ElfTypes>* owner,
+                  const std::string& name,
+                  Elf_Word type,
+                  Elf_Word flags,
+                  const Section* link,
+                  Elf_Word info,
+                  Elf_Word align,
+                  Elf_Word entsize)
+        : Section(owner, name, type, flags, link, info, align, entsize), cache_() { }
 
-    DynamicSection(const std::string& name, Section* link)
-        : Section(name, SHT_DYNAMIC, SHF_ALLOC,
-                  link, 0, kPageSize, sizeof(Elf_Dyn)) {}
-
-    Elf_Word GetSize() const OVERRIDE {
-      return (dynamics_.size() + 1 /* DT_NULL */) * sizeof(Elf_Dyn);
-    }
-
-    bool Write(File* elf_file) OVERRIDE {
-      std::vector<Elf_Dyn> buffer;
-      buffer.reserve(dynamics_.size() + 1u);
-      for (const ElfDynamicState& it : dynamics_) {
-        if (it.section_ != nullptr) {
-          // We are adding an address relative to a section.
-          buffer.push_back(
-              {it.tag_, {it.value_ + it.section_->GetHeader()->sh_addr}});
-        } else {
-          buffer.push_back({it.tag_, {it.value_}});
-        }
-      }
-      buffer.push_back({DT_NULL, {0}});
-      return WriteArray(elf_file, buffer.data(), buffer.size());
-    }
-
-   private:
-    struct ElfDynamicState {
-      Elf_Sword tag_;
-      Elf_Word value_;
-      const Section* section_;
-    };
-    std::vector<ElfDynamicState> dynamics_;
-  };
-
-  using PatchFn = void (*)(const std::vector<uintptr_t>& patch_locations,
-                           Elf_Addr buffer_address,
-                           Elf_Addr base_address,
-                           std::vector<uint8_t>* buffer);
-
-  // Section with content based on simple memory buffer.
-  // The buffer can be optionally patched before writing.
-  class RawSection FINAL : public Section {
-   public:
-    RawSection(const std::string& name, Elf_Word type, Elf_Word flags,
-               const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize,
-               PatchFn patch = nullptr, const Section* patch_base_section = nullptr)
-        : Section(name, type, flags, link, info, align, entsize),
-          patched_(false), patch_(patch), patch_base_section_(patch_base_section) {
-    }
-
-    RawSection(const std::string& name, Elf_Word type)
-        : RawSection(name, type, 0, nullptr, 0, 1, 0, nullptr, nullptr) {
-    }
-
-    Elf_Word GetSize() const OVERRIDE {
-      return buffer_.size();
-    }
-
-    bool Write(File* elf_file) OVERRIDE {
-      if (!patch_locations_.empty()) {
-        DCHECK(!patched_);  // Do not patch twice.
-        DCHECK(patch_ != nullptr);
-        DCHECK(patch_base_section_ != nullptr);
-        patch_(patch_locations_,
-               this->GetHeader()->sh_addr,
-               patch_base_section_->GetHeader()->sh_addr,
-               &buffer_);
-        patched_ = true;
-      }
-      return WriteArray(elf_file, buffer_.data(), buffer_.size());
-    }
-
-    bool IsEmpty() const {
-      return buffer_.size() == 0;
-    }
-
-    std::vector<uint8_t>* GetBuffer() {
-      return &buffer_;
-    }
-
-    void SetBuffer(const std::vector<uint8_t>& buffer) {
-      buffer_ = buffer;
-    }
-
-    std::vector<uintptr_t>* GetPatchLocations() {
-      return &patch_locations_;
-    }
-
-   private:
-    std::vector<uint8_t> buffer_;
-    std::vector<uintptr_t> patch_locations_;
-    bool patched_;
-    // User-provided function to do the actual patching.
-    PatchFn patch_;
-    // The section that we patch against (usually .text).
-    const Section* patch_base_section_;
-  };
-
-  // Writer of .rodata section or .text section.
-  // The write is done lazily using the provided CodeOutput.
-  class OatSection FINAL : public Section {
-   public:
-    OatSection(const std::string& name, Elf_Word type, Elf_Word flags,
-               const Section* link, Elf_Word info, Elf_Word align,
-               Elf_Word entsize, Elf_Word size, CodeOutput* code_output)
-        : Section(name, type, flags, link, info, align, entsize),
-          size_(size), code_output_(code_output) {
-    }
-
-    Elf_Word GetSize() const OVERRIDE {
-      return size_;
-    }
-
-    bool Write(File* elf_file) OVERRIDE {
-      // The BufferedOutputStream class contains the buffer as field,
-      // therefore it is too big to allocate on the stack.
-      std::unique_ptr<BufferedOutputStream> output_stream(
-          new BufferedOutputStream(new FileOutputStream(elf_file)));
-      return code_output_->Write(output_stream.get());
-    }
-
-   private:
-    Elf_Word size_;
-    CodeOutput* code_output_;
-  };
-
-  // Writer of .bss section.
-  class NoBitsSection FINAL : public Section {
-   public:
-    NoBitsSection(const std::string& name, Elf_Word size)
-        : Section(name, SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
-          size_(size) {
-    }
-
-    Elf_Word GetSize() const OVERRIDE {
-      return size_;
-    }
-
-    bool Write(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE {
-      LOG(ERROR) << "This section should not be written to the ELF file";
-      return false;
-    }
-
-   private:
-    Elf_Word size_;
-  };
-
-  // Writer of .dynstr .strtab and .shstrtab sections.
-  class StrtabSection FINAL : public Section {
-   public:
-    StrtabSection(const std::string& name, Elf_Word flags)
-        : Section(name, SHT_STRTAB, flags, nullptr, 0, 1, 0) {
-      buffer_.reserve(4 * KB);
-      // The first entry of strtab must be empty string.
-      buffer_ += '\0';
-    }
-
-    Elf_Word AddName(const std::string& name) {
-      Elf_Word offset = buffer_.size();
-      buffer_ += name;
-      buffer_ += '\0';
+    Elf_Word Add(const void* data, size_t length) {
+      Elf_Word offset = cache_.size();
+      const uint8_t* d = reinterpret_cast<const uint8_t*>(data);
+      cache_.insert(cache_.end(), d, d + length);
       return offset;
     }
 
-    Elf_Word GetSize() const OVERRIDE {
-      return buffer_.size();
+    Elf_Word GetCacheSize() {
+      return cache_.size();
     }
 
-    bool Write(File* elf_file) OVERRIDE {
-      return WriteArray(elf_file, buffer_.data(), buffer_.size());
+    void Write() {
+      this->WriteFully(cache_.data(), cache_.size());
+      cache_.clear();
+      cache_.shrink_to_fit();
+    }
+
+    void WriteCachedSection() {
+      this->Start();
+      Write();
+      this->End();
     }
 
    private:
-    std::string buffer_;
+    std::vector<uint8_t> cache_;
   };
 
-  class HashSection;
+  // Writer of .dynstr section.
+  class CachedStringSection FINAL : public CachedSection {
+   public:
+    CachedStringSection(ElfBuilder<ElfTypes>* owner,
+                        const std::string& name,
+                        Elf_Word flags,
+                        Elf_Word align)
+        : CachedSection(owner,
+                        name,
+                        SHT_STRTAB,
+                        flags,
+                        /* link */ nullptr,
+                        /* info */ 0,
+                        align,
+                        /* entsize */ 0) { }
+
+    Elf_Word Add(const std::string& name) {
+      if (CachedSection::GetCacheSize() == 0u) {
+        DCHECK(name.empty());
+      }
+      return CachedSection::Add(name.c_str(), name.length() + 1);
+    }
+  };
+
+  // Writer of .strtab and .shstrtab sections.
+  class StringSection FINAL : public Section {
+   public:
+    StringSection(ElfBuilder<ElfTypes>* owner,
+                  const std::string& name,
+                  Elf_Word flags,
+                  Elf_Word align)
+        : Section(owner,
+                  name,
+                  SHT_STRTAB,
+                  flags,
+                  /* link */ nullptr,
+                  /* info */ 0,
+                  align,
+                  /* entsize */ 0),
+          current_offset_(0) {
+    }
+
+    Elf_Word Write(const std::string& name) {
+      if (current_offset_ == 0) {
+        DCHECK(name.empty());
+      }
+      Elf_Word offset = current_offset_;
+      this->WriteFully(name.c_str(), name.length() + 1);
+      current_offset_ += name.length() + 1;
+      return offset;
+    }
+
+   private:
+    Elf_Word current_offset_;
+  };
 
   // Writer of .dynsym and .symtab sections.
-  class SymtabSection FINAL : public Section {
+  class SymbolSection FINAL : public CachedSection {
    public:
-    // Add a symbol with given name to this symtab. The symbol refers to
-    // 'relative_addr' within the given section and has the given attributes.
-    void AddSymbol(const std::string& name, const Section* section,
-                   Elf_Addr addr, bool is_relative, Elf_Word size,
-                   uint8_t binding, uint8_t type, uint8_t other = 0) {
-      CHECK(section != nullptr);
-      Elf_Word name_idx = strtab_->AddName(name);
-      symbols_.push_back({ name, section, addr, size, is_relative,
-                           MakeStInfo(binding, type), other, name_idx });
+    SymbolSection(ElfBuilder<ElfTypes>* owner,
+                  const std::string& name,
+                  Elf_Word type,
+                  Elf_Word flags,
+                  Section* strtab)
+        : CachedSection(owner,
+                        name,
+                        type,
+                        flags,
+                        strtab,
+                        /* info */ 0,
+                        sizeof(Elf_Off),
+                        sizeof(Elf_Sym)) {
+      // The symbol table always has to start with NULL symbol.
+      Elf_Sym null_symbol = Elf_Sym();
+      CachedSection::Add(&null_symbol, sizeof(null_symbol));
     }
 
-    SymtabSection(const std::string& name, Elf_Word type, Elf_Word flags,
-                  StrtabSection* strtab)
-        : Section(name, type, flags, strtab, 0, sizeof(Elf_Off), sizeof(Elf_Sym)),
-          strtab_(strtab) {
-    }
-
-    bool IsEmpty() const {
-      return symbols_.empty();
-    }
-
-    Elf_Word GetSize() const OVERRIDE {
-      return (1 /* NULL */ + symbols_.size()) * sizeof(Elf_Sym);
-    }
-
-    bool Write(File* elf_file) OVERRIDE {
-      std::vector<Elf_Sym> buffer;
-      buffer.reserve(1u + symbols_.size());
-      buffer.push_back(Elf_Sym());  // NULL.
-      for (const ElfSymbolState& it : symbols_) {
-        Elf_Sym sym = Elf_Sym();
-        sym.st_name = it.name_idx_;
-        if (it.is_relative_) {
-          sym.st_value = it.addr_ + it.section_->GetHeader()->sh_addr;
-        } else {
-          sym.st_value = it.addr_;
-        }
-        sym.st_size = it.size_;
-        sym.st_other = it.other_;
-        sym.st_shndx = it.section_->GetSectionIndex();
-        sym.st_info = it.info_;
-        buffer.push_back(sym);
-      }
-      return WriteArray(elf_file, buffer.data(), buffer.size());
-    }
-
-   private:
-    struct ElfSymbolState {
-      const std::string name_;
-      const Section* section_;
-      Elf_Addr addr_;
-      Elf_Word size_;
-      bool is_relative_;
-      uint8_t info_;
-      uint8_t other_;
-      Elf_Word name_idx_;  // index in the strtab.
-    };
-
-    static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) {
-      return ((binding) << 4) + ((type) & 0xf);
-    }
-
-    // The symbols in the same order they will be in the symbol table.
-    std::vector<ElfSymbolState> symbols_;
-    StrtabSection* strtab_;
-
-    friend class HashSection;
-  };
-
-  // TODO: Consider removing.
-  // We use it only for the dynsym section which has only 5 symbols.
-  // We do not use it for symtab, and we probably do not have to
-  // since we use those symbols only to print backtraces.
-  class HashSection FINAL : public Section {
-   public:
-    HashSection(const std::string& name, Elf_Word flags, SymtabSection* symtab)
-        : Section(name, SHT_HASH, flags, symtab,
-                  0, sizeof(Elf_Word), sizeof(Elf_Word)),
-          symtab_(symtab) {
-    }
-
-    Elf_Word GetSize() const OVERRIDE {
-      Elf_Word nbuckets = GetNumBuckets();
-      Elf_Word chain_size = symtab_->symbols_.size() + 1 /* NULL */;
-      return (2 /* header */ + nbuckets + chain_size) * sizeof(Elf_Word);
-    }
-
-    bool Write(File* const elf_file) OVERRIDE {
-      // Here is how The ELF hash table works.
-      // There are 3 arrays to worry about.
-      // * The symbol table where the symbol information is.
-      // * The bucket array which is an array of indexes into the symtab and chain.
-      // * The chain array which is also an array of indexes into the symtab and chain.
-      //
-      // Lets say the state is something like this.
-      // +--------+       +--------+      +-----------+
-      // | symtab |       | bucket |      |   chain   |
-      // |  null  |       | 1      |      | STN_UNDEF |
-      // | <sym1> |       | 4      |      | 2         |
-      // | <sym2> |       |        |      | 5         |
-      // | <sym3> |       |        |      | STN_UNDEF |
-      // | <sym4> |       |        |      | 3         |
-      // | <sym5> |       |        |      | STN_UNDEF |
-      // +--------+       +--------+      +-----------+
-      //
-      // The lookup process (in python psudocode) is
-      //
-      // def GetSym(name):
-      //     # NB STN_UNDEF == 0
-      //     indx = bucket[elfhash(name) % num_buckets]
-      //     while indx != STN_UNDEF:
-      //         if GetSymbolName(symtab[indx]) == name:
-      //             return symtab[indx]
-      //         indx = chain[indx]
-      //     return SYMBOL_NOT_FOUND
-      //
-      // Between bucket and chain arrays every symtab index must be present exactly
-      // once (except for STN_UNDEF, which must be present 1 + num_bucket times).
-      const auto& symbols = symtab_->symbols_;
-      // Select number of buckets.
-      // This is essentially arbitrary.
-      Elf_Word nbuckets = GetNumBuckets();
-      // 1 is for the implicit NULL symbol.
-      Elf_Word chain_size = (symbols.size() + 1);
-      std::vector<Elf_Word> hash;
-      hash.push_back(nbuckets);
-      hash.push_back(chain_size);
-      uint32_t bucket_offset = hash.size();
-      uint32_t chain_offset = bucket_offset + nbuckets;
-      hash.resize(hash.size() + nbuckets + chain_size, 0);
-
-      Elf_Word* buckets = hash.data() + bucket_offset;
-      Elf_Word* chain   = hash.data() + chain_offset;
-
-      // Set up the actual hash table.
-      for (Elf_Word i = 0; i < symbols.size(); i++) {
-        // Add 1 since we need to have the null symbol that is not in the symbols
-        // list.
-        Elf_Word index = i + 1;
-        Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols[i].name_.c_str())) % nbuckets;
-        if (buckets[hash_val] == 0) {
-          buckets[hash_val] = index;
-        } else {
-          hash_val = buckets[hash_val];
-          CHECK_LT(hash_val, chain_size);
-          while (chain[hash_val] != 0) {
-            hash_val = chain[hash_val];
-            CHECK_LT(hash_val, chain_size);
-          }
-          chain[hash_val] = index;
-          // Check for loops. Works because if this is non-empty then there must be
-          // another cell which already contains the same symbol index as this one,
-          // which means some symbol has more then one name, which isn't allowed.
-          CHECK_EQ(chain[index], static_cast<Elf_Word>(0));
-        }
-      }
-      return WriteArray(elf_file, hash.data(), hash.size());
-    }
-
-   private:
-    Elf_Word GetNumBuckets() const {
-      const auto& symbols = symtab_->symbols_;
-      if (symbols.size() < 8) {
-        return 2;
-      } else if (symbols.size() < 32) {
-        return 4;
-      } else if (symbols.size() < 256) {
-        return 16;
+    // Buffer symbol for this section.  It will be written later.
+    // If the symbol's section is null, it will be considered absolute (SHN_ABS).
+    // (we use this in JIT to reference code which is stored outside the debug ELF file)
+    void Add(Elf_Word name,
+             const Section* section,
+             Elf_Addr addr,
+             Elf_Word size,
+             uint8_t binding,
+             uint8_t type) {
+      Elf_Word section_index;
+      if (section != nullptr) {
+        DCHECK_LE(section->GetAddress(), addr);
+        DCHECK_LE(addr, section->GetAddress() + section->GetSize());
+        section_index = section->GetSectionIndex();
       } else {
-        // Have about 32 ids per bucket.
-        return RoundUp(symbols.size()/32, 2);
+        section_index = static_cast<Elf_Word>(SHN_ABS);
       }
+      Add(name, section_index, addr, size, binding, type);
     }
 
-    // from bionic
-    static inline unsigned elfhash(const char *_name) {
-      const unsigned char *name = (const unsigned char *) _name;
-      unsigned h = 0, g;
-
-      while (*name) {
-        h = (h << 4) + *name++;
-        g = h & 0xf0000000;
-        h ^= g;
-        h ^= g >> 24;
-      }
-      return h;
+    void Add(Elf_Word name,
+             Elf_Word section_index,
+             Elf_Addr addr,
+             Elf_Word size,
+             uint8_t binding,
+             uint8_t type) {
+      Elf_Sym sym = Elf_Sym();
+      sym.st_name = name;
+      sym.st_value = addr;
+      sym.st_size = size;
+      sym.st_other = 0;
+      sym.st_shndx = section_index;
+      sym.st_info = (binding << 4) + (type & 0xf);
+      CachedSection::Add(&sym, sizeof(sym));
     }
-
-    SymtabSection* symtab_;
-
-    DISALLOW_COPY_AND_ASSIGN(HashSection);
   };
 
-  ElfBuilder(InstructionSet isa,
-             Elf_Word rodata_size, CodeOutput* rodata_writer,
-             Elf_Word text_size, CodeOutput* text_writer,
-             Elf_Word bss_size)
-    : isa_(isa),
-      dynstr_(".dynstr", SHF_ALLOC),
-      dynsym_(".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
-      hash_(".hash", SHF_ALLOC, &dynsym_),
-      rodata_(".rodata", SHT_PROGBITS, SHF_ALLOC,
-              nullptr, 0, kPageSize, 0, rodata_size, rodata_writer),
-      text_(".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR,
-            nullptr, 0, kPageSize, 0, text_size, text_writer),
-      bss_(".bss", bss_size),
-      dynamic_(".dynamic", &dynstr_),
-      strtab_(".strtab", 0),
-      symtab_(".symtab", SHT_SYMTAB, 0, &strtab_),
-      shstrtab_(".shstrtab", 0) {
+  class AbiflagsSection FINAL : public Section {
+   public:
+    // Section with Mips abiflag info.
+    static constexpr uint8_t MIPS_AFL_REG_NONE =         0;  // no registers
+    static constexpr uint8_t MIPS_AFL_REG_32 =           1;  // 32-bit registers
+    static constexpr uint8_t MIPS_AFL_REG_64 =           2;  // 64-bit registers
+    static constexpr uint32_t MIPS_AFL_FLAGS1_ODDSPREG = 1;  // Uses odd single-prec fp regs
+    static constexpr uint8_t MIPS_ABI_FP_DOUBLE =        1;  // -mdouble-float
+    static constexpr uint8_t MIPS_ABI_FP_XX =            5;  // -mfpxx
+    static constexpr uint8_t MIPS_ABI_FP_64A =           7;  // -mips32r* -mfp64 -mno-odd-spreg
+
+    AbiflagsSection(ElfBuilder<ElfTypes>* owner,
+                    const std::string& name,
+                    Elf_Word type,
+                    Elf_Word flags,
+                    const Section* link,
+                    Elf_Word info,
+                    Elf_Word align,
+                    Elf_Word entsize,
+                    InstructionSet isa,
+                    const InstructionSetFeatures* features)
+        : Section(owner, name, type, flags, link, info, align, entsize) {
+      if (isa == kMips || isa == kMips64) {
+        bool fpu32 = false;    // assume mips64 values
+        uint8_t isa_rev = 6;   // assume mips64 values
+        if (isa == kMips) {
+          // adjust for mips32 values
+          fpu32 = features->AsMipsInstructionSetFeatures()->Is32BitFloatingPoint();
+          isa_rev = features->AsMipsInstructionSetFeatures()->IsR6()
+              ? 6
+              : features->AsMipsInstructionSetFeatures()->IsMipsIsaRevGreaterThanEqual2()
+                  ? (fpu32 ? 2 : 5)
+                  : 1;
+        }
+        abiflags_.version = 0;  // version of flags structure
+        abiflags_.isa_level = (isa == kMips) ? 32 : 64;
+        abiflags_.isa_rev = isa_rev;
+        abiflags_.gpr_size = (isa == kMips) ? MIPS_AFL_REG_32 : MIPS_AFL_REG_64;
+        abiflags_.cpr1_size = fpu32 ? MIPS_AFL_REG_32 : MIPS_AFL_REG_64;
+        abiflags_.cpr2_size = MIPS_AFL_REG_NONE;
+        // Set the fp_abi to MIPS_ABI_FP_64A for mips32 with 64-bit FPUs (ie: mips32 R5 and R6).
+        // Otherwise set to MIPS_ABI_FP_DOUBLE.
+        abiflags_.fp_abi = (isa == kMips && !fpu32) ? MIPS_ABI_FP_64A : MIPS_ABI_FP_DOUBLE;
+        abiflags_.isa_ext = 0;
+        abiflags_.ases = 0;
+        // To keep the code simple, we are not using odd FP reg for single floats for both
+        // mips32 and mips64 ART. Therefore we are not setting the MIPS_AFL_FLAGS1_ODDSPREG bit.
+        abiflags_.flags1 = 0;
+        abiflags_.flags2 = 0;
+      }
+    }
+
+    Elf_Word GetSize() const {
+      return sizeof(abiflags_);
+    }
+
+    void Write() {
+      this->WriteFully(&abiflags_, sizeof(abiflags_));
+    }
+
+   private:
+    struct {
+      uint16_t version;  // version of this structure
+      uint8_t  isa_level, isa_rev, gpr_size, cpr1_size, cpr2_size;
+      uint8_t  fp_abi;
+      uint32_t isa_ext, ases, flags1, flags2;
+    } abiflags_;
+  };
+
+  ElfBuilder(InstructionSet isa, const InstructionSetFeatures* features, OutputStream* output)
+      : isa_(isa),
+        features_(features),
+        stream_(output),
+        rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+        text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0),
+        bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+        dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize),
+        dynsym_(this, ".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
+        hash_(this, ".hash", SHT_HASH, SHF_ALLOC, &dynsym_, 0, sizeof(Elf_Word), sizeof(Elf_Word)),
+        dynamic_(this, ".dynamic", SHT_DYNAMIC, SHF_ALLOC, &dynstr_, 0, kPageSize, sizeof(Elf_Dyn)),
+        eh_frame_(this, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+        eh_frame_hdr_(this, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0),
+        strtab_(this, ".strtab", 0, 1),
+        symtab_(this, ".symtab", SHT_SYMTAB, 0, &strtab_),
+        debug_frame_(this, ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, sizeof(Elf_Addr), 0),
+        debug_info_(this, ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
+        debug_line_(this, ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
+        shstrtab_(this, ".shstrtab", 0, 1),
+        abiflags_(this, ".MIPS.abiflags", SHT_MIPS_ABIFLAGS, SHF_ALLOC, nullptr, 0, kPageSize, 0,
+                  isa, features),
+        started_(false),
+        write_program_headers_(false),
+        loaded_size_(0u),
+        virtual_address_(0) {
+    text_.phdr_flags_ = PF_R | PF_X;
+    bss_.phdr_flags_ = PF_R | PF_W;
+    dynamic_.phdr_flags_ = PF_R | PF_W;
+    dynamic_.phdr_type_ = PT_DYNAMIC;
+    eh_frame_hdr_.phdr_type_ = PT_GNU_EH_FRAME;
+    abiflags_.phdr_type_ = PT_MIPS_ABIFLAGS;
   }
   ~ElfBuilder() {}
 
-  OatSection* GetText() { return &text_; }
-  SymtabSection* GetSymtab() { return &symtab_; }
+  InstructionSet GetIsa() { return isa_; }
+  Section* GetRoData() { return &rodata_; }
+  Section* GetText() { return &text_; }
+  Section* GetBss() { return &bss_; }
+  StringSection* GetStrTab() { return &strtab_; }
+  SymbolSection* GetSymTab() { return &symtab_; }
+  Section* GetEhFrame() { return &eh_frame_; }
+  Section* GetEhFrameHdr() { return &eh_frame_hdr_; }
+  Section* GetDebugFrame() { return &debug_frame_; }
+  Section* GetDebugInfo() { return &debug_info_; }
+  Section* GetDebugLine() { return &debug_line_; }
 
-  bool Write(File* elf_file) {
-    // Since the .text section of an oat file contains relative references to .rodata
-    // and (optionally) .bss, we keep these 2 or 3 sections together. This creates
-    // a non-traditional layout where the .bss section is mapped independently of the
-    // .dynamic section and needs its own program header with LOAD RW.
-    //
-    // The basic layout of the elf file. Order may be different in final output.
-    // +-------------------------+
-    // | Elf_Ehdr                |
-    // +-------------------------+
-    // | Elf_Phdr PHDR           |
-    // | Elf_Phdr LOAD R         | .dynsym .dynstr .hash .rodata
-    // | Elf_Phdr LOAD R X       | .text
-    // | Elf_Phdr LOAD RW        | .bss (Optional)
-    // | Elf_Phdr LOAD RW        | .dynamic
-    // | Elf_Phdr DYNAMIC        | .dynamic
-    // | Elf_Phdr LOAD R         | .eh_frame .eh_frame_hdr
-    // | Elf_Phdr EH_FRAME R     | .eh_frame_hdr
-    // +-------------------------+
-    // | .dynsym                 |
-    // | Elf_Sym  STN_UNDEF      |
-    // | Elf_Sym  oatdata        |
-    // | Elf_Sym  oatexec        |
-    // | Elf_Sym  oatlastword    |
-    // | Elf_Sym  oatbss         | (Optional)
-    // | Elf_Sym  oatbsslastword | (Optional)
-    // +-------------------------+
-    // | .dynstr                 |
-    // | names for .dynsym       |
-    // +-------------------------+
-    // | .hash                   |
-    // | hashtable for dynsym    |
-    // +-------------------------+
-    // | .rodata                 |
-    // | oatdata..oatexec-4      |
-    // +-------------------------+
-    // | .text                   |
-    // | oatexec..oatlastword    |
-    // +-------------------------+
-    // | .dynamic                |
-    // | Elf_Dyn DT_HASH         |
-    // | Elf_Dyn DT_STRTAB       |
-    // | Elf_Dyn DT_SYMTAB       |
-    // | Elf_Dyn DT_SYMENT       |
-    // | Elf_Dyn DT_STRSZ        |
-    // | Elf_Dyn DT_SONAME       |
-    // | Elf_Dyn DT_NULL         |
-    // +-------------------------+  (Optional)
-    // | .symtab                 |  (Optional)
-    // | program symbols         |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .strtab                 |  (Optional)
-    // | names for .symtab       |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .eh_frame               |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .eh_frame_hdr           |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .debug_info             |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .debug_abbrev           |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .debug_str              |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .debug_line             |  (Optional)
-    // +-------------------------+
-    // | .shstrtab               |
-    // | names of sections       |
-    // +-------------------------+
-    // | Elf_Shdr null           |
-    // | Elf_Shdr .dynsym        |
-    // | Elf_Shdr .dynstr        |
-    // | Elf_Shdr .hash          |
-    // | Elf_Shdr .rodata        |
-    // | Elf_Shdr .text          |
-    // | Elf_Shdr .bss           |  (Optional)
-    // | Elf_Shdr .dynamic       |
-    // | Elf_Shdr .symtab        |  (Optional)
-    // | Elf_Shdr .strtab        |  (Optional)
-    // | Elf_Shdr .eh_frame      |  (Optional)
-    // | Elf_Shdr .eh_frame_hdr  |  (Optional)
-    // | Elf_Shdr .debug_info    |  (Optional)
-    // | Elf_Shdr .debug_abbrev  |  (Optional)
-    // | Elf_Shdr .debug_str     |  (Optional)
-    // | Elf_Shdr .debug_line    |  (Optional)
-    // | Elf_Shdr .oat_patches   |  (Optional)
-    // | Elf_Shdr .shstrtab      |
-    // +-------------------------+
-    constexpr bool debug_logging_ = false;
+  // Encode patch locations as LEB128 list of deltas between consecutive addresses.
+  // (exposed publicly for tests)
+  static void EncodeOatPatches(const ArrayRef<const uintptr_t>& locations,
+                               std::vector<uint8_t>* buffer) {
+    buffer->reserve(buffer->size() + locations.size() * 2);  // guess 2 bytes per ULEB128.
+    uintptr_t address = 0;  // relative to start of section.
+    for (uintptr_t location : locations) {
+      DCHECK_GE(location, address) << "Patch locations are not in sorted order";
+      EncodeUnsignedLeb128(buffer, dchecked_integral_cast<uint32_t>(location - address));
+      address = location;
+    }
+  }
 
-    // Create a list of all section which we want to write.
-    // This is the order in which they will be written.
-    std::vector<Section*> sections;
-    sections.push_back(&dynsym_);
-    sections.push_back(&dynstr_);
-    sections.push_back(&hash_);
-    sections.push_back(&rodata_);
-    sections.push_back(&text_);
-    if (bss_.GetSize() != 0u) {
-      sections.push_back(&bss_);
-    }
-    sections.push_back(&dynamic_);
-    if (!symtab_.IsEmpty()) {
-      sections.push_back(&symtab_);
-      sections.push_back(&strtab_);
-    }
-    for (Section* section : other_sections_) {
-      sections.push_back(section);
-    }
-    sections.push_back(&shstrtab_);
-    for (size_t i = 0; i < sections.size(); i++) {
-      // The first section index is 1.  Index 0 is reserved for NULL.
-      // Section index is used for relative symbols and for section links.
-      sections[i]->SetSectionIndex(i + 1);
-      // Add section name to .shstrtab.
-      Elf_Word name_offset = shstrtab_.AddName(sections[i]->GetName());
-      sections[i]->GetHeader()->sh_name = name_offset;
-    }
+  void WritePatches(const char* name, const ArrayRef<const uintptr_t>& patch_locations) {
+    std::vector<uint8_t> buffer;
+    EncodeOatPatches(patch_locations, &buffer);
+    std::unique_ptr<Section> s(new Section(this, name, SHT_OAT_PATCH, 0, nullptr, 0, 1, 0));
+    s->Start();
+    s->WriteFully(buffer.data(), buffer.size());
+    s->End();
+    other_sections_.push_back(std::move(s));
+  }
 
-    // The running program does not have access to section headers
-    // and the loader is not supposed to use them either.
-    // The dynamic sections therefore replicates some of the layout
-    // information like the address and size of .rodata and .text.
-    // It also contains other metadata like the SONAME.
-    // The .dynamic section is found using the PT_DYNAMIC program header.
-    BuildDynsymSection();
-    BuildDynamicSection(elf_file->GetPath());
+  void WriteSection(const char* name, const std::vector<uint8_t>* buffer) {
+    std::unique_ptr<Section> s(new Section(this, name, SHT_PROGBITS, 0, nullptr, 0, 1, 0));
+    s->Start();
+    s->WriteFully(buffer->data(), buffer->size());
+    s->End();
+    other_sections_.push_back(std::move(s));
+  }
 
-    // We do not know the number of headers until the final stages of write.
-    // It is easiest to just reserve a fixed amount of space for them.
-    constexpr size_t kMaxProgramHeaders = 8;
-    constexpr size_t kProgramHeadersOffset = sizeof(Elf_Ehdr);
-
-    // Layout of all sections - determine the final file offsets and addresses.
-    // This must be done after we have built all sections and know their size.
-    Elf_Off file_offset = kProgramHeadersOffset + sizeof(Elf_Phdr) * kMaxProgramHeaders;
-    Elf_Addr load_address = file_offset;
-    std::vector<Elf_Shdr> section_headers;
-    section_headers.reserve(1u + sections.size());
-    section_headers.push_back(Elf_Shdr());  // NULL at index 0.
-    for (auto* section : sections) {
-      Elf_Shdr* header = section->GetHeader();
-      Elf_Off alignment = header->sh_addralign > 0 ? header->sh_addralign : 1;
-      header->sh_size = section->GetSize();
-      header->sh_link = section->GetLink();
-      // Allocate memory for the section in the file.
-      if (header->sh_type != SHT_NOBITS) {
-        header->sh_offset = RoundUp(file_offset, alignment);
-        file_offset = header->sh_offset + header->sh_size;
-      }
-      // Allocate memory for the section during program execution.
-      if ((header->sh_flags & SHF_ALLOC) != 0) {
-        header->sh_addr = RoundUp(load_address, alignment);
-        load_address = header->sh_addr + header->sh_size;
-      }
-      if (debug_logging_) {
-        LOG(INFO) << "Section " << section->GetName() << ":" << std::hex
-                  << " offset=0x" << header->sh_offset
-                  << " addr=0x" << header->sh_addr
-                  << " size=0x" << header->sh_size;
-      }
-      // Collect section headers into continuous array for convenience.
-      section_headers.push_back(*header);
+  // Reserve space for ELF header and program headers.
+  // We do not know the number of headers until later, so
+  // it is easiest to just reserve a fixed amount of space.
+  // Program headers are required for loading by the linker.
+  // It is possible to omit them for ELF files used for debugging.
+  void Start(bool write_program_headers = true) {
+    int size = sizeof(Elf_Ehdr);
+    if (write_program_headers) {
+      size += sizeof(Elf_Phdr) * kMaxProgramHeaders;
     }
-    Elf_Off section_headers_offset = RoundUp(file_offset, sizeof(Elf_Off));
+    stream_.Seek(size, kSeekSet);
+    started_ = true;
+    virtual_address_ += size;
+    write_program_headers_ = write_program_headers;
+  }
 
-    // Create program headers now that we know the layout of the whole file.
-    // Each segment contains one or more sections which are mapped together.
-    // Not all sections are mapped during the execution of the program.
-    // PT_LOAD does the mapping.  Other PT_* types allow the program to locate
-    // interesting parts of memory and their addresses overlap with PT_LOAD.
-    std::vector<Elf_Phdr> program_headers;
-    program_headers.push_back(Elf_Phdr());  // Placeholder for PT_PHDR.
-    // Create the main LOAD R segment which spans all sections up to .rodata.
-    const Elf_Shdr* rodata = rodata_.GetHeader();
-    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R,
-      0, rodata->sh_offset + rodata->sh_size, rodata->sh_addralign));
-    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_X, text_));
-    if (bss_.GetHeader()->sh_size != 0u) {
-      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, bss_));
-    }
-    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, dynamic_));
-    program_headers.push_back(MakeProgramHeader(PT_DYNAMIC, PF_R | PF_W, dynamic_));
-    const Section* eh_frame = FindSection(".eh_frame");
-    if (eh_frame != nullptr) {
-      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, *eh_frame));
-      const Section* eh_frame_hdr = FindSection(".eh_frame_hdr");
-      if (eh_frame_hdr != nullptr) {
-        // Check layout: eh_frame is before eh_frame_hdr and there is no gap.
-        CHECK_LE(eh_frame->GetHeader()->sh_offset, eh_frame_hdr->GetHeader()->sh_offset);
-        CHECK_EQ(eh_frame->GetHeader()->sh_offset + eh_frame->GetHeader()->sh_size,
-                 eh_frame_hdr->GetHeader()->sh_offset);
-        // Extend the PT_LOAD of .eh_frame to include the .eh_frame_hdr as well.
-        program_headers.back().p_filesz += eh_frame_hdr->GetHeader()->sh_size;
-        program_headers.back().p_memsz  += eh_frame_hdr->GetHeader()->sh_size;
-        program_headers.push_back(MakeProgramHeader(PT_GNU_EH_FRAME, PF_R, *eh_frame_hdr));
+  void End() {
+    DCHECK(started_);
+
+    // Note: loaded_size_ == 0 for tests that don't write .rodata, .text, .bss,
+    // .dynstr, dynsym, .hash and .dynamic. These tests should not read loaded_size_.
+    // TODO: Either refactor the .eh_frame creation so that it counts towards loaded_size_,
+    // or remove all support for .eh_frame. (The currently unused .eh_frame counts towards
+    // the virtual_address_ but we don't consider it for loaded_size_.)
+    CHECK(loaded_size_ == 0 || loaded_size_ == RoundUp(virtual_address_, kPageSize))
+        << loaded_size_ << " " << virtual_address_;
+
+    // Write section names and finish the section headers.
+    shstrtab_.Start();
+    shstrtab_.Write("");
+    for (auto* section : sections_) {
+      section->header_.sh_name = shstrtab_.Write(section->name_);
+      if (section->link_ != nullptr) {
+        section->header_.sh_link = section->link_->GetSectionIndex();
       }
     }
-    DCHECK_EQ(program_headers[0].p_type, 0u);  // Check placeholder.
-    program_headers[0] = MakeProgramHeader(PT_PHDR, PF_R,
-      kProgramHeadersOffset, program_headers.size() * sizeof(Elf_Phdr), sizeof(Elf_Off));
-    CHECK_LE(program_headers.size(), kMaxProgramHeaders);
+    shstrtab_.End();
 
-    // Create the main ELF header.
-    Elf_Ehdr elf_header = MakeElfHeader(isa_);
-    elf_header.e_phoff = kProgramHeadersOffset;
+    // Write section headers at the end of the ELF file.
+    std::vector<Elf_Shdr> shdrs;
+    shdrs.reserve(1u + sections_.size());
+    shdrs.push_back(Elf_Shdr());  // NULL at index 0.
+    for (auto* section : sections_) {
+      shdrs.push_back(section->header_);
+    }
+    Elf_Off section_headers_offset;
+    section_headers_offset = AlignFileOffset(sizeof(Elf_Off));
+    stream_.WriteFully(shdrs.data(), shdrs.size() * sizeof(shdrs[0]));
+
+    // Flush everything else before writing the program headers. This should prevent
+    // the OS from reordering writes, so that we don't end up with valid headers
+    // and partially written data if we suddenly lose power, for example.
+    stream_.Flush();
+
+    // The main ELF header.
+    Elf_Ehdr elf_header = MakeElfHeader(isa_, features_);
     elf_header.e_shoff = section_headers_offset;
-    elf_header.e_phnum = program_headers.size();
-    elf_header.e_shnum = section_headers.size();
+    elf_header.e_shnum = shdrs.size();
     elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
 
-    // Write all headers and section content to the file.
-    // Depending on the implementations of Section::Write, this
-    // might be just memory copies or some more elaborate operations.
-    if (!WriteArray(elf_file, &elf_header, 1)) {
-      LOG(INFO) << "Failed to write the ELF header";
-      return false;
+    // Program headers (i.e. mmap instructions).
+    std::vector<Elf_Phdr> phdrs;
+    if (write_program_headers_) {
+      phdrs = MakeProgramHeaders();
+      CHECK_LE(phdrs.size(), kMaxProgramHeaders);
+      elf_header.e_phoff = sizeof(Elf_Ehdr);
+      elf_header.e_phnum = phdrs.size();
     }
-    if (!WriteArray(elf_file, program_headers.data(), program_headers.size())) {
-      LOG(INFO) << "Failed to write the program headers";
-      return false;
-    }
-    for (Section* section : sections) {
-      const Elf_Shdr* header = section->GetHeader();
-      if (header->sh_type != SHT_NOBITS) {
-        if (!SeekTo(elf_file, header->sh_offset) || !section->Write(elf_file)) {
-          LOG(INFO) << "Failed to write section " << section->GetName();
-          return false;
-        }
-        Elf_Word current_offset = lseek(elf_file->Fd(), 0, SEEK_CUR);
-        CHECK_EQ(current_offset, header->sh_offset + header->sh_size)
-          << "The number of bytes written does not match GetSize()";
-      }
-    }
-    if (!SeekTo(elf_file, section_headers_offset) ||
-        !WriteArray(elf_file, section_headers.data(), section_headers.size())) {
-      LOG(INFO) << "Failed to write the section headers";
-      return false;
-    }
-    return true;
+
+    stream_.Seek(0, kSeekSet);
+    stream_.WriteFully(&elf_header, sizeof(elf_header));
+    stream_.WriteFully(phdrs.data(), phdrs.size() * sizeof(phdrs[0]));
+    stream_.Flush();
   }
 
-  // Adds the given section to the builder.  It does not take ownership.
-  void RegisterSection(Section* section) {
-    other_sections_.push_back(section);
+  // The running program does not have access to section headers
+  // and the loader is not supposed to use them either.
+  // The dynamic sections therefore replicates some of the layout
+  // information like the address and size of .rodata and .text.
+  // It also contains other metadata like the SONAME.
+  // The .dynamic section is found using the PT_DYNAMIC program header.
+  void PrepareDynamicSection(const std::string& elf_file_path,
+                             Elf_Word rodata_size,
+                             Elf_Word text_size,
+                             Elf_Word bss_size) {
+    std::string soname(elf_file_path);
+    size_t directory_separator_pos = soname.rfind('/');
+    if (directory_separator_pos != std::string::npos) {
+      soname = soname.substr(directory_separator_pos + 1);
+    }
+
+    // Calculate addresses of .text, .bss and .dynstr.
+    DCHECK_EQ(rodata_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize));
+    DCHECK_EQ(text_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize));
+    DCHECK_EQ(bss_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize));
+    DCHECK_EQ(dynstr_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize));
+    Elf_Word rodata_address = rodata_.GetAddress();
+    Elf_Word text_address = RoundUp(rodata_address + rodata_size, kPageSize);
+    Elf_Word bss_address = RoundUp(text_address + text_size, kPageSize);
+    Elf_Word abiflags_address = RoundUp(bss_address + bss_size, kPageSize);
+    Elf_Word abiflags_size = 0;
+    if (isa_ == kMips || isa_ == kMips64) {
+      abiflags_size = abiflags_.GetSize();
+    }
+    Elf_Word dynstr_address = RoundUp(abiflags_address + abiflags_size, kPageSize);
+
+    // Cache .dynstr, .dynsym and .hash data.
+    dynstr_.Add("");  // dynstr should start with empty string.
+    Elf_Word rodata_index = rodata_.GetSectionIndex();
+    Elf_Word oatdata = dynstr_.Add("oatdata");
+    dynsym_.Add(oatdata, rodata_index, rodata_address, rodata_size, STB_GLOBAL, STT_OBJECT);
+    if (text_size != 0u) {
+      Elf_Word text_index = rodata_index + 1u;
+      Elf_Word oatexec = dynstr_.Add("oatexec");
+      dynsym_.Add(oatexec, text_index, text_address, text_size, STB_GLOBAL, STT_OBJECT);
+      Elf_Word oatlastword = dynstr_.Add("oatlastword");
+      Elf_Word oatlastword_address = text_address + text_size - 4;
+      dynsym_.Add(oatlastword, text_index, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT);
+    } else if (rodata_size != 0) {
+      // rodata_ can be size 0 for dwarf_test.
+      Elf_Word oatlastword = dynstr_.Add("oatlastword");
+      Elf_Word oatlastword_address = rodata_address + rodata_size - 4;
+      dynsym_.Add(oatlastword, rodata_index, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT);
+    }
+    if (bss_size != 0u) {
+      Elf_Word bss_index = rodata_index + 1u + (text_size != 0 ? 1u : 0u);
+      Elf_Word oatbss = dynstr_.Add("oatbss");
+      dynsym_.Add(oatbss, bss_index, bss_address, bss_size, STB_GLOBAL, STT_OBJECT);
+      Elf_Word oatbsslastword = dynstr_.Add("oatbsslastword");
+      Elf_Word bsslastword_address = bss_address + bss_size - 4;
+      dynsym_.Add(oatbsslastword, bss_index, bsslastword_address, 4, STB_GLOBAL, STT_OBJECT);
+    }
+    Elf_Word soname_offset = dynstr_.Add(soname);
+
+    // We do not really need a hash-table since there is so few entries.
+    // However, the hash-table is the only way the linker can actually
+    // determine the number of symbols in .dynsym so it is required.
+    int count = dynsym_.GetCacheSize() / sizeof(Elf_Sym);  // Includes NULL.
+    std::vector<Elf_Word> hash;
+    hash.push_back(1);  // Number of buckets.
+    hash.push_back(count);  // Number of chains.
+    // Buckets.  Having just one makes it linear search.
+    hash.push_back(1);  // Point to first non-NULL symbol.
+    // Chains.  This creates linked list of symbols.
+    hash.push_back(0);  // Dummy entry for the NULL symbol.
+    for (int i = 1; i < count - 1; i++) {
+      hash.push_back(i + 1);  // Each symbol points to the next one.
+    }
+    hash.push_back(0);  // Last symbol terminates the chain.
+    hash_.Add(hash.data(), hash.size() * sizeof(hash[0]));
+
+    // Calculate addresses of .dynsym, .hash and .dynamic.
+    DCHECK_EQ(dynstr_.header_.sh_flags, dynsym_.header_.sh_flags);
+    DCHECK_EQ(dynsym_.header_.sh_flags, hash_.header_.sh_flags);
+    Elf_Word dynsym_address =
+        RoundUp(dynstr_address + dynstr_.GetCacheSize(), dynsym_.header_.sh_addralign);
+    Elf_Word hash_address =
+        RoundUp(dynsym_address + dynsym_.GetCacheSize(), hash_.header_.sh_addralign);
+    DCHECK_EQ(dynamic_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize));
+    Elf_Word dynamic_address = RoundUp(hash_address + dynsym_.GetCacheSize(), kPageSize);
+
+    Elf_Dyn dyns[] = {
+      { DT_HASH, { hash_address } },
+      { DT_STRTAB, { dynstr_address } },
+      { DT_SYMTAB, { dynsym_address } },
+      { DT_SYMENT, { sizeof(Elf_Sym) } },
+      { DT_STRSZ, { dynstr_.GetCacheSize() } },
+      { DT_SONAME, { soname_offset } },
+      { DT_NULL, { 0 } },
+    };
+    dynamic_.Add(&dyns, sizeof(dyns));
+
+    loaded_size_ = RoundUp(dynamic_address + dynamic_.GetCacheSize(), kPageSize);
   }
 
-  const Section* FindSection(const char* name) {
-    for (const auto* section : other_sections_) {
-      if (section->GetName() == name) {
-        return section;
-      }
-    }
-    return nullptr;
+  void WriteDynamicSection() {
+    dynstr_.WriteCachedSection();
+    dynsym_.WriteCachedSection();
+    hash_.WriteCachedSection();
+    dynamic_.WriteCachedSection();
+
+    CHECK_EQ(loaded_size_, RoundUp(dynamic_.GetAddress() + dynamic_.GetSize(), kPageSize));
+  }
+
+  Elf_Word GetLoadedSize() {
+    CHECK_NE(loaded_size_, 0u);
+    return loaded_size_;
+  }
+
+  void WriteMIPSabiflagsSection() {
+    abiflags_.Start();
+    abiflags_.Write();
+    abiflags_.End();
+  }
+
+  // Returns true if all writes and seeks on the output stream succeeded.
+  bool Good() {
+    return stream_.Good();
+  }
+
+  // Returns the builder's internal stream.
+  OutputStream* GetStream() {
+    return &stream_;
+  }
+
+  off_t AlignFileOffset(size_t alignment) {
+     return stream_.Seek(RoundUp(stream_.Seek(0, kSeekCurrent), alignment), kSeekSet);
+  }
+
+  Elf_Addr AlignVirtualAddress(size_t alignment) {
+     return virtual_address_ = RoundUp(virtual_address_, alignment);
   }
 
  private:
-  static bool SeekTo(File* elf_file, Elf_Word offset) {
-    DCHECK_LE(lseek(elf_file->Fd(), 0, SEEK_CUR), static_cast<off_t>(offset))
-      << "Seeking backwards";
-    if (static_cast<off_t>(offset) != lseek(elf_file->Fd(), offset, SEEK_SET)) {
-      PLOG(ERROR) << "Failed to seek in file " << elf_file->GetPath();
-      return false;
-    }
-    return true;
-  }
-
-  template<typename T>
-  static bool WriteArray(File* elf_file, const T* data, size_t count) {
-    if (count != 0) {
-      DCHECK(data != nullptr);
-      if (!elf_file->WriteFully(data, count * sizeof(T))) {
-        PLOG(ERROR) << "Failed to write to file " << elf_file->GetPath();
-        return false;
-      }
-    }
-    return true;
-  }
-
-  // Helper - create segment header based on memory range.
-  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
-                                    Elf_Off offset, Elf_Word size, Elf_Word align) {
-    Elf_Phdr phdr = Elf_Phdr();
-    phdr.p_type    = type;
-    phdr.p_flags   = flags;
-    phdr.p_offset  = offset;
-    phdr.p_vaddr   = offset;
-    phdr.p_paddr   = offset;
-    phdr.p_filesz  = size;
-    phdr.p_memsz   = size;
-    phdr.p_align   = align;
-    return phdr;
-  }
-
-  // Helper - create segment header based on section header.
-  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
-                                    const Section& section) {
-    const Elf_Shdr* shdr = section.GetHeader();
-    // Only run-time allocated sections should be in segment headers.
-    CHECK_NE(shdr->sh_flags & SHF_ALLOC, 0u);
-    Elf_Phdr phdr = Elf_Phdr();
-    phdr.p_type   = type;
-    phdr.p_flags  = flags;
-    phdr.p_offset = shdr->sh_offset;
-    phdr.p_vaddr  = shdr->sh_addr;
-    phdr.p_paddr  = shdr->sh_addr;
-    phdr.p_filesz = shdr->sh_type != SHT_NOBITS ? shdr->sh_size : 0u;
-    phdr.p_memsz  = shdr->sh_size;
-    phdr.p_align  = shdr->sh_addralign;
-    return phdr;
-  }
-
-  static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
+  static Elf_Ehdr MakeElfHeader(InstructionSet isa, const InstructionSetFeatures* features) {
     Elf_Ehdr elf_header = Elf_Ehdr();
     switch (isa) {
       case kArm:
@@ -853,22 +777,28 @@
       case kMips: {
         elf_header.e_machine = EM_MIPS;
         elf_header.e_flags = (EF_MIPS_NOREORDER |
-                               EF_MIPS_PIC       |
-                               EF_MIPS_CPIC      |
-                               EF_MIPS_ABI_O32   |
-                               EF_MIPS_ARCH_32R2);
+                              EF_MIPS_PIC       |
+                              EF_MIPS_CPIC      |
+                              EF_MIPS_ABI_O32   |
+                              (features->AsMipsInstructionSetFeatures()->IsR6()
+                                   ? EF_MIPS_ARCH_32R6
+                                   : EF_MIPS_ARCH_32R2));
         break;
       }
       case kMips64: {
         elf_header.e_machine = EM_MIPS;
         elf_header.e_flags = (EF_MIPS_NOREORDER |
-                               EF_MIPS_PIC       |
-                               EF_MIPS_CPIC      |
-                               EF_MIPS_ARCH_64R6);
+                              EF_MIPS_PIC       |
+                              EF_MIPS_CPIC      |
+                              EF_MIPS_ARCH_64R6);
         break;
       }
       case kNone: {
         LOG(FATAL) << "No instruction set";
+        break;
+      }
+      default: {
+        LOG(FATAL) << "Unknown instruction set " << isa;
       }
     }
 
@@ -892,50 +822,118 @@
     return elf_header;
   }
 
-  void BuildDynamicSection(const std::string& elf_file_path) {
-    std::string soname(elf_file_path);
-    size_t directory_separator_pos = soname.rfind('/');
-    if (directory_separator_pos != std::string::npos) {
-      soname = soname.substr(directory_separator_pos + 1);
+  // Create program headers based on written sections.
+  std::vector<Elf_Phdr> MakeProgramHeaders() {
+    CHECK(!sections_.empty());
+    std::vector<Elf_Phdr> phdrs;
+    {
+      // The program headers must start with PT_PHDR which is used in
+      // loaded process to determine the number of program headers.
+      Elf_Phdr phdr = Elf_Phdr();
+      phdr.p_type    = PT_PHDR;
+      phdr.p_flags   = PF_R;
+      phdr.p_offset  = phdr.p_vaddr = phdr.p_paddr = sizeof(Elf_Ehdr);
+      phdr.p_filesz  = phdr.p_memsz = 0;  // We need to fill this later.
+      phdr.p_align   = sizeof(Elf_Off);
+      phdrs.push_back(phdr);
+      // Tell the linker to mmap the start of file to memory.
+      Elf_Phdr load = Elf_Phdr();
+      load.p_type    = PT_LOAD;
+      load.p_flags   = PF_R;
+      load.p_offset  = load.p_vaddr = load.p_paddr = 0;
+      load.p_filesz  = load.p_memsz = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * kMaxProgramHeaders;
+      load.p_align   = kPageSize;
+      phdrs.push_back(load);
     }
-    // NB: We must add the name before adding DT_STRSZ.
-    Elf_Word soname_offset = dynstr_.AddName(soname);
-
-    dynamic_.AddDynamicTag(DT_HASH, 0, &hash_);
-    dynamic_.AddDynamicTag(DT_STRTAB, 0, &dynstr_);
-    dynamic_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_);
-    dynamic_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym), nullptr);
-    dynamic_.AddDynamicTag(DT_STRSZ, dynstr_.GetSize(), nullptr);
-    dynamic_.AddDynamicTag(DT_SONAME, soname_offset, nullptr);
-  }
-
-  void BuildDynsymSection() {
-    dynsym_.AddSymbol("oatdata", &rodata_, 0, true,
-                      rodata_.GetSize(), STB_GLOBAL, STT_OBJECT);
-    dynsym_.AddSymbol("oatexec", &text_, 0, true,
-                      text_.GetSize(), STB_GLOBAL, STT_OBJECT);
-    dynsym_.AddSymbol("oatlastword", &text_, text_.GetSize() - 4,
-                      true, 4, STB_GLOBAL, STT_OBJECT);
-    if (bss_.GetSize() != 0u) {
-      dynsym_.AddSymbol("oatbss", &bss_, 0, true,
-                        bss_.GetSize(), STB_GLOBAL, STT_OBJECT);
-      dynsym_.AddSymbol("oatbsslastword", &bss_, bss_.GetSize() - 4,
-                        true, 4, STB_GLOBAL, STT_OBJECT);
+    // Create program headers for sections.
+    for (auto* section : sections_) {
+      const Elf_Shdr& shdr = section->header_;
+      if ((shdr.sh_flags & SHF_ALLOC) != 0 && shdr.sh_size != 0) {
+        // PT_LOAD tells the linker to mmap part of the file.
+        // The linker can only mmap page-aligned sections.
+        // Single PT_LOAD may contain several ELF sections.
+        Elf_Phdr& prev = phdrs.back();
+        Elf_Phdr load = Elf_Phdr();
+        load.p_type   = PT_LOAD;
+        load.p_flags  = section->phdr_flags_;
+        load.p_offset = shdr.sh_offset;
+        load.p_vaddr  = load.p_paddr = shdr.sh_addr;
+        load.p_filesz = (shdr.sh_type != SHT_NOBITS ? shdr.sh_size : 0u);
+        load.p_memsz  = shdr.sh_size;
+        load.p_align  = shdr.sh_addralign;
+        if (prev.p_type == load.p_type &&
+            prev.p_flags == load.p_flags &&
+            prev.p_filesz == prev.p_memsz &&  // Do not merge .bss
+            load.p_filesz == load.p_memsz) {  // Do not merge .bss
+          // Merge this PT_LOAD with the previous one.
+          Elf_Word size = shdr.sh_offset + shdr.sh_size - prev.p_offset;
+          prev.p_filesz = size;
+          prev.p_memsz  = size;
+        } else {
+          // If we are adding new load, it must be aligned.
+          CHECK_EQ(shdr.sh_addralign, (Elf_Word)kPageSize);
+          phdrs.push_back(load);
+        }
+      }
     }
+    for (auto* section : sections_) {
+      const Elf_Shdr& shdr = section->header_;
+      if ((shdr.sh_flags & SHF_ALLOC) != 0 && shdr.sh_size != 0) {
+        // Other PT_* types allow the program to locate interesting
+        // parts of memory at runtime. They must overlap with PT_LOAD.
+        if (section->phdr_type_ != 0) {
+          Elf_Phdr phdr = Elf_Phdr();
+          phdr.p_type   = section->phdr_type_;
+          phdr.p_flags  = section->phdr_flags_;
+          phdr.p_offset = shdr.sh_offset;
+          phdr.p_vaddr  = phdr.p_paddr = shdr.sh_addr;
+          phdr.p_filesz = phdr.p_memsz = shdr.sh_size;
+          phdr.p_align  = shdr.sh_addralign;
+          phdrs.push_back(phdr);
+        }
+      }
+    }
+    // Set the size of the initial PT_PHDR.
+    CHECK_EQ(phdrs[0].p_type, (Elf_Word)PT_PHDR);
+    phdrs[0].p_filesz = phdrs[0].p_memsz = phdrs.size() * sizeof(Elf_Phdr);
+
+    return phdrs;
   }
 
   InstructionSet isa_;
-  StrtabSection dynstr_;
-  SymtabSection dynsym_;
-  HashSection hash_;
-  OatSection rodata_;
-  OatSection text_;
-  NoBitsSection bss_;
-  DynamicSection dynamic_;
-  StrtabSection strtab_;
-  SymtabSection symtab_;
-  std::vector<Section*> other_sections_;
-  StrtabSection shstrtab_;
+  const InstructionSetFeatures* features_;
+
+  ErrorDelayingOutputStream stream_;
+
+  Section rodata_;
+  Section text_;
+  Section bss_;
+  CachedStringSection dynstr_;
+  SymbolSection dynsym_;
+  CachedSection hash_;
+  CachedSection dynamic_;
+  Section eh_frame_;
+  Section eh_frame_hdr_;
+  StringSection strtab_;
+  SymbolSection symtab_;
+  Section debug_frame_;
+  Section debug_info_;
+  Section debug_line_;
+  StringSection shstrtab_;
+  AbiflagsSection abiflags_;
+  std::vector<std::unique_ptr<Section>> other_sections_;
+
+  // List of used section in the order in which they were written.
+  std::vector<Section*> sections_;
+
+  bool started_;
+  bool write_program_headers_;
+
+  // The size of the memory taken by the ELF file when loaded.
+  size_t loaded_size_;
+
+  // Used for allocation of virtual address space.
+  Elf_Addr virtual_address_;
 
   DISALLOW_COPY_AND_ASSIGN(ElfBuilder);
 };
diff --git a/compiler/elf_writer.cc b/compiler/elf_writer.cc
index 4219d97..ca0869a 100644
--- a/compiler/elf_writer.cc
+++ b/compiler/elf_writer.cc
@@ -42,7 +42,11 @@
                                      size_t* oat_loaded_size,
                                      size_t* oat_data_offset) {
   std::string error_msg;
-  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, false, false, &error_msg));
+  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file,
+                                                  false,
+                                                  false,
+                                                  /*low_4gb*/false,
+                                                  &error_msg));
   CHECK(elf_file.get() != nullptr) << error_msg;
 
   bool success = elf_file->GetLoadedSize(oat_loaded_size, &error_msg);
@@ -54,7 +58,7 @@
 
 bool ElfWriter::Fixup(File* file, uintptr_t oat_data_begin) {
   std::string error_msg;
-  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, true, false, &error_msg));
+  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, true, false, /*low_4gb*/false, &error_msg));
   CHECK(elf_file.get() != nullptr) << error_msg;
 
   // Lookup "oatdata" symbol address.
diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h
index 03f8ceb..c9ea0083 100644
--- a/compiler/elf_writer.h
+++ b/compiler/elf_writer.h
@@ -25,13 +25,16 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "os.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
-class CompilerDriver;
-class DexFile;
 class ElfFile;
-class OatWriter;
+class OutputStream;
+
+namespace debug {
+struct MethodDebugInfo;
+}  // namespace debug
 
 class ElfWriter {
  public:
@@ -46,21 +49,30 @@
 
   static bool Fixup(File* file, uintptr_t oat_data_begin);
 
- protected:
-  ElfWriter(const CompilerDriver& driver, File* elf_file)
-    : compiler_driver_(&driver), elf_file_(elf_file) {
-  }
-
   virtual ~ElfWriter() {}
 
-  virtual bool Write(OatWriter* oat_writer,
-                     const std::vector<const DexFile*>& dex_files,
-                     const std::string& android_root,
-                     bool is_host)
-      SHARED_REQUIRES(Locks::mutator_lock_) = 0;
+  virtual void Start() = 0;
+  virtual void SetLoadedSectionSizes(size_t rodata_size, size_t text_size, size_t bss_size) = 0;
+  virtual void PrepareDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) = 0;
+  virtual OutputStream* StartRoData() = 0;
+  virtual void EndRoData(OutputStream* rodata) = 0;
+  virtual OutputStream* StartText() = 0;
+  virtual void EndText(OutputStream* text) = 0;
+  virtual void WriteDynamicSection() = 0;
+  virtual void WriteDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) = 0;
+  virtual void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) = 0;
+  virtual bool End() = 0;
 
-  const CompilerDriver* const compiler_driver_;
-  File* const elf_file_;
+  // Get the ELF writer's stream. This stream can be used for writing data directly
+  // to a section after the section has been finished. When that's done, the user
+  // should Seek() back to the position where the stream was before this operation.
+  virtual OutputStream* GetStream() = 0;
+
+  // Get the size that the loaded ELF file will occupy in memory.
+  virtual size_t GetLoadedSize() = 0;
+
+ protected:
+  ElfWriter() = default;
 };
 
 }  // namespace art
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
deleted file mode 100644
index c10ffeb..0000000
--- a/compiler/elf_writer_debug.cc
+++ /dev/null
@@ -1,439 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "elf_writer_debug.h"
-
-#include <unordered_set>
-
-#include "base/casts.h"
-#include "compiled_method.h"
-#include "driver/compiler_driver.h"
-#include "dex_file-inl.h"
-#include "dwarf/headers.h"
-#include "dwarf/register.h"
-#include "oat_writer.h"
-#include "utils.h"
-
-namespace art {
-namespace dwarf {
-
-static void WriteDebugFrameCIE(InstructionSet isa,
-                               ExceptionHeaderValueApplication addr_type,
-                               CFIFormat format,
-                               std::vector<uint8_t>* eh_frame) {
-  // Scratch registers should be marked as undefined.  This tells the
-  // debugger that its value in the previous frame is not recoverable.
-  bool is64bit = Is64BitInstructionSet(isa);
-  switch (isa) {
-    case kArm:
-    case kThumb2: {
-      DebugFrameOpCodeWriter<> opcodes;
-      opcodes.DefCFA(Reg::ArmCore(13), 0);  // R13(SP).
-      // core registers.
-      for (int reg = 0; reg < 13; reg++) {
-        if (reg < 4 || reg == 12) {
-          opcodes.Undefined(Reg::ArmCore(reg));
-        } else {
-          opcodes.SameValue(Reg::ArmCore(reg));
-        }
-      }
-      // fp registers.
-      for (int reg = 0; reg < 32; reg++) {
-        if (reg < 16) {
-          opcodes.Undefined(Reg::ArmFp(reg));
-        } else {
-          opcodes.SameValue(Reg::ArmFp(reg));
-        }
-      }
-      auto return_reg = Reg::ArmCore(14);  // R14(LR).
-      WriteDebugFrameCIE(is64bit, addr_type, return_reg,
-                         opcodes, format, eh_frame);
-      return;
-    }
-    case kArm64: {
-      DebugFrameOpCodeWriter<> opcodes;
-      opcodes.DefCFA(Reg::Arm64Core(31), 0);  // R31(SP).
-      // core registers.
-      for (int reg = 0; reg < 30; reg++) {
-        if (reg < 8 || reg == 16 || reg == 17) {
-          opcodes.Undefined(Reg::Arm64Core(reg));
-        } else {
-          opcodes.SameValue(Reg::Arm64Core(reg));
-        }
-      }
-      // fp registers.
-      for (int reg = 0; reg < 32; reg++) {
-        if (reg < 8 || reg >= 16) {
-          opcodes.Undefined(Reg::Arm64Fp(reg));
-        } else {
-          opcodes.SameValue(Reg::Arm64Fp(reg));
-        }
-      }
-      auto return_reg = Reg::Arm64Core(30);  // R30(LR).
-      WriteDebugFrameCIE(is64bit, addr_type, return_reg,
-                         opcodes, format, eh_frame);
-      return;
-    }
-    case kMips:
-    case kMips64: {
-      DebugFrameOpCodeWriter<> opcodes;
-      opcodes.DefCFA(Reg::MipsCore(29), 0);  // R29(SP).
-      // core registers.
-      for (int reg = 1; reg < 26; reg++) {
-        if (reg < 16 || reg == 24 || reg == 25) {  // AT, V*, A*, T*.
-          opcodes.Undefined(Reg::MipsCore(reg));
-        } else {
-          opcodes.SameValue(Reg::MipsCore(reg));
-        }
-      }
-      auto return_reg = Reg::MipsCore(31);  // R31(RA).
-      WriteDebugFrameCIE(is64bit, addr_type, return_reg,
-                         opcodes, format, eh_frame);
-      return;
-    }
-    case kX86: {
-      // FIXME: Add fp registers once libunwind adds support for them. Bug: 20491296
-      constexpr bool generate_opcodes_for_x86_fp = false;
-      DebugFrameOpCodeWriter<> opcodes;
-      opcodes.DefCFA(Reg::X86Core(4), 4);   // R4(ESP).
-      opcodes.Offset(Reg::X86Core(8), -4);  // R8(EIP).
-      // core registers.
-      for (int reg = 0; reg < 8; reg++) {
-        if (reg <= 3) {
-          opcodes.Undefined(Reg::X86Core(reg));
-        } else if (reg == 4) {
-          // Stack pointer.
-        } else {
-          opcodes.SameValue(Reg::X86Core(reg));
-        }
-      }
-      // fp registers.
-      if (generate_opcodes_for_x86_fp) {
-        for (int reg = 0; reg < 8; reg++) {
-          opcodes.Undefined(Reg::X86Fp(reg));
-        }
-      }
-      auto return_reg = Reg::X86Core(8);  // R8(EIP).
-      WriteDebugFrameCIE(is64bit, addr_type, return_reg,
-                         opcodes, format, eh_frame);
-      return;
-    }
-    case kX86_64: {
-      DebugFrameOpCodeWriter<> opcodes;
-      opcodes.DefCFA(Reg::X86_64Core(4), 8);  // R4(RSP).
-      opcodes.Offset(Reg::X86_64Core(16), -8);  // R16(RIP).
-      // core registers.
-      for (int reg = 0; reg < 16; reg++) {
-        if (reg == 4) {
-          // Stack pointer.
-        } else if (reg < 12 && reg != 3 && reg != 5) {  // except EBX and EBP.
-          opcodes.Undefined(Reg::X86_64Core(reg));
-        } else {
-          opcodes.SameValue(Reg::X86_64Core(reg));
-        }
-      }
-      // fp registers.
-      for (int reg = 0; reg < 16; reg++) {
-        if (reg < 12) {
-          opcodes.Undefined(Reg::X86_64Fp(reg));
-        } else {
-          opcodes.SameValue(Reg::X86_64Fp(reg));
-        }
-      }
-      auto return_reg = Reg::X86_64Core(16);  // R16(RIP).
-      WriteDebugFrameCIE(is64bit, addr_type, return_reg,
-                         opcodes, format, eh_frame);
-      return;
-    }
-    case kNone:
-      break;
-  }
-  LOG(FATAL) << "Can not write CIE frame for ISA " << isa;
-  UNREACHABLE();
-}
-
-void WriteCFISection(const CompilerDriver* compiler,
-                     const OatWriter* oat_writer,
-                     ExceptionHeaderValueApplication address_type,
-                     CFIFormat format,
-                     std::vector<uint8_t>* debug_frame,
-                     std::vector<uintptr_t>* debug_frame_patches,
-                     std::vector<uint8_t>* eh_frame_hdr,
-                     std::vector<uintptr_t>* eh_frame_hdr_patches) {
-  const auto& method_infos = oat_writer->GetMethodDebugInfo();
-  const InstructionSet isa = compiler->GetInstructionSet();
-
-  // Write .eh_frame/.debug_frame section.
-  std::map<uint32_t, size_t> address_to_fde_offset_map;
-  size_t cie_offset = debug_frame->size();
-  WriteDebugFrameCIE(isa, address_type, format, debug_frame);
-  for (const OatWriter::DebugInfo& mi : method_infos) {
-    if (!mi.deduped_) {  // Only one FDE per unique address.
-      const SwapVector<uint8_t>* opcodes = mi.compiled_method_->GetCFIInfo();
-      if (opcodes != nullptr) {
-        address_to_fde_offset_map.emplace(mi.low_pc_, debug_frame->size());
-        WriteDebugFrameFDE(Is64BitInstructionSet(isa), cie_offset,
-                           mi.low_pc_, mi.high_pc_ - mi.low_pc_,
-                           opcodes, format, debug_frame, debug_frame_patches);
-      }
-    }
-  }
-
-  if (format == DW_EH_FRAME_FORMAT) {
-    // Write .eh_frame_hdr section.
-    Writer<> header(eh_frame_hdr);
-    header.PushUint8(1);  // Version.
-    // Encoding of .eh_frame pointer - libunwind does not honor datarel here,
-    // so we have to use pcrel which means relative to the pointer's location.
-    header.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata4);
-    // Encoding of binary search table size.
-    header.PushUint8(DW_EH_PE_udata4);
-    // Encoding of binary search table addresses - libunwind supports only this
-    // specific combination, which means relative to the start of .eh_frame_hdr.
-    header.PushUint8(DW_EH_PE_datarel | DW_EH_PE_sdata4);
-    // .eh_frame pointer - .eh_frame_hdr section is after .eh_frame section
-    const int32_t relative_eh_frame_begin = -static_cast<int32_t>(debug_frame->size());
-    header.PushInt32(relative_eh_frame_begin - 4U);
-    // Binary search table size (number of entries).
-    header.PushUint32(dchecked_integral_cast<uint32_t>(address_to_fde_offset_map.size()));
-    // Binary search table.
-    for (const auto& address_to_fde_offset : address_to_fde_offset_map) {
-      u_int32_t code_address = address_to_fde_offset.first;
-      int32_t fde_address = dchecked_integral_cast<int32_t>(address_to_fde_offset.second);
-      eh_frame_hdr_patches->push_back(header.data()->size());
-      header.PushUint32(code_address);
-      // We know the exact layout (eh_frame is immediately before eh_frame_hdr)
-      // and the data is relative to the start of the eh_frame_hdr,
-      // so patching isn't necessary (in contrast to the code address above).
-      header.PushInt32(relative_eh_frame_begin + fde_address);
-    }
-  }
-}
-
-/*
- * @brief Generate the DWARF sections.
- * @param oat_writer The Oat file Writer.
- * @param eh_frame Call Frame Information.
- * @param debug_info Compilation unit information.
- * @param debug_info_patches Address locations to be patched.
- * @param debug_abbrev Abbreviations used to generate dbg_info.
- * @param debug_str Debug strings.
- * @param debug_line Line number table.
- * @param debug_line_patches Address locations to be patched.
- */
-void WriteDebugSections(const CompilerDriver* compiler,
-                        const OatWriter* oat_writer,
-                        std::vector<uint8_t>* debug_info,
-                        std::vector<uintptr_t>* debug_info_patches,
-                        std::vector<uint8_t>* debug_abbrev,
-                        std::vector<uint8_t>* debug_str,
-                        std::vector<uint8_t>* debug_line,
-                        std::vector<uintptr_t>* debug_line_patches) {
-  const std::vector<OatWriter::DebugInfo>& method_infos = oat_writer->GetMethodDebugInfo();
-  const InstructionSet isa = compiler->GetInstructionSet();
-  const bool is64bit = Is64BitInstructionSet(isa);
-
-  // Find all addresses (low_pc) which contain deduped methods.
-  // The first instance of method is not marked deduped_, but the rest is.
-  std::unordered_set<uint32_t> deduped_addresses;
-  for (const OatWriter::DebugInfo& mi : method_infos) {
-    if (mi.deduped_) {
-      deduped_addresses.insert(mi.low_pc_);
-    }
-  }
-
-  // Group the methods into compilation units based on source file.
-  std::vector<std::vector<const OatWriter::DebugInfo*>> compilation_units;
-  const char* last_source_file = nullptr;
-  for (const OatWriter::DebugInfo& mi : method_infos) {
-    // Attribute given instruction range only to single method.
-    // Otherwise the debugger might get really confused.
-    if (!mi.deduped_) {
-      auto& dex_class_def = mi.dex_file_->GetClassDef(mi.class_def_index_);
-      const char* source_file = mi.dex_file_->GetSourceFile(dex_class_def);
-      if (compilation_units.empty() || source_file != last_source_file) {
-        compilation_units.push_back(std::vector<const OatWriter::DebugInfo*>());
-      }
-      compilation_units.back().push_back(&mi);
-      last_source_file = source_file;
-    }
-  }
-
-  // Write .debug_info section.
-  for (const auto& compilation_unit : compilation_units) {
-    uint32_t cunit_low_pc = 0xFFFFFFFFU;
-    uint32_t cunit_high_pc = 0;
-    for (auto method_info : compilation_unit) {
-      cunit_low_pc = std::min(cunit_low_pc, method_info->low_pc_);
-      cunit_high_pc = std::max(cunit_high_pc, method_info->high_pc_);
-    }
-
-    size_t debug_abbrev_offset = debug_abbrev->size();
-    DebugInfoEntryWriter<> info(is64bit, debug_abbrev);
-    info.StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes);
-    info.WriteStrp(DW_AT_producer, "Android dex2oat", debug_str);
-    info.WriteData1(DW_AT_language, DW_LANG_Java);
-    info.WriteAddr(DW_AT_low_pc, cunit_low_pc);
-    info.WriteAddr(DW_AT_high_pc, cunit_high_pc);
-    info.WriteData4(DW_AT_stmt_list, debug_line->size());
-    for (auto method_info : compilation_unit) {
-      std::string method_name = PrettyMethod(method_info->dex_method_index_,
-                                             *method_info->dex_file_, true);
-      if (deduped_addresses.find(method_info->low_pc_) != deduped_addresses.end()) {
-        method_name += " [DEDUPED]";
-      }
-      info.StartTag(DW_TAG_subprogram, DW_CHILDREN_no);
-      info.WriteStrp(DW_AT_name, method_name.data(), debug_str);
-      info.WriteAddr(DW_AT_low_pc, method_info->low_pc_);
-      info.WriteAddr(DW_AT_high_pc, method_info->high_pc_);
-      info.EndTag();  // DW_TAG_subprogram
-    }
-    info.EndTag();  // DW_TAG_compile_unit
-    WriteDebugInfoCU(debug_abbrev_offset, info, debug_info, debug_info_patches);
-
-    // Write .debug_line section.
-    std::vector<FileEntry> files;
-    std::unordered_map<std::string, size_t> files_map;
-    std::vector<std::string> directories;
-    std::unordered_map<std::string, size_t> directories_map;
-    int code_factor_bits_ = 0;
-    int dwarf_isa = -1;
-    switch (isa) {
-      case kArm:  // arm actually means thumb2.
-      case kThumb2:
-        code_factor_bits_ = 1;  // 16-bit instuctions
-        dwarf_isa = 1;  // DW_ISA_ARM_thumb.
-        break;
-      case kArm64:
-      case kMips:
-      case kMips64:
-        code_factor_bits_ = 2;  // 32-bit instructions
-        break;
-      case kNone:
-      case kX86:
-      case kX86_64:
-        break;
-    }
-    DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits_);
-    opcodes.SetAddress(cunit_low_pc);
-    if (dwarf_isa != -1) {
-      opcodes.SetISA(dwarf_isa);
-    }
-    for (const OatWriter::DebugInfo* mi : compilation_unit) {
-      struct DebugInfoCallbacks {
-        static bool NewPosition(void* ctx, uint32_t address, uint32_t line) {
-          auto* context = reinterpret_cast<DebugInfoCallbacks*>(ctx);
-          context->dex2line_.push_back({address, static_cast<int32_t>(line)});
-          return false;
-        }
-        DefaultSrcMap dex2line_;
-      } debug_info_callbacks;
-
-      const DexFile* dex = mi->dex_file_;
-      if (mi->code_item_ != nullptr) {
-        dex->DecodeDebugInfo(mi->code_item_,
-                             (mi->access_flags_ & kAccStatic) != 0,
-                             mi->dex_method_index_,
-                             DebugInfoCallbacks::NewPosition,
-                             nullptr,
-                             &debug_info_callbacks);
-      }
-
-      // Get and deduplicate directory and filename.
-      int file_index = 0;  // 0 - primary source file of the compilation.
-      auto& dex_class_def = dex->GetClassDef(mi->class_def_index_);
-      const char* source_file = dex->GetSourceFile(dex_class_def);
-      if (source_file != nullptr) {
-        std::string file_name(source_file);
-        size_t file_name_slash = file_name.find_last_of('/');
-        std::string class_name(dex->GetClassDescriptor(dex_class_def));
-        size_t class_name_slash = class_name.find_last_of('/');
-        std::string full_path(file_name);
-
-        // Guess directory from package name.
-        int directory_index = 0;  // 0 - current directory of the compilation.
-        if (file_name_slash == std::string::npos &&  // Just filename.
-            class_name.front() == 'L' &&  // Type descriptor for a class.
-            class_name_slash != std::string::npos) {  // Has package name.
-          std::string package_name = class_name.substr(1, class_name_slash - 1);
-          auto it = directories_map.find(package_name);
-          if (it == directories_map.end()) {
-            directory_index = 1 + directories.size();
-            directories_map.emplace(package_name, directory_index);
-            directories.push_back(package_name);
-          } else {
-            directory_index = it->second;
-          }
-          full_path = package_name + "/" + file_name;
-        }
-
-        // Add file entry.
-        auto it2 = files_map.find(full_path);
-        if (it2 == files_map.end()) {
-          file_index = 1 + files.size();
-          files_map.emplace(full_path, file_index);
-          files.push_back(FileEntry {
-            file_name,
-            directory_index,
-            0,  // Modification time - NA.
-            0,  // File size - NA.
-          });
-        } else {
-          file_index = it2->second;
-        }
-      }
-      opcodes.SetFile(file_index);
-
-      // Generate mapping opcodes from PC to Java lines.
-      const DefaultSrcMap& dex2line_map = debug_info_callbacks.dex2line_;
-      if (file_index != 0 && !dex2line_map.empty()) {
-        bool first = true;
-        for (SrcMapElem pc2dex : mi->compiled_method_->GetSrcMappingTable()) {
-          uint32_t pc = pc2dex.from_;
-          int dex_pc = pc2dex.to_;
-          auto dex2line = dex2line_map.Find(static_cast<uint32_t>(dex_pc));
-          if (dex2line.first) {
-            int line = dex2line.second;
-            if (first) {
-              first = false;
-              if (pc > 0) {
-                // Assume that any preceding code is prologue.
-                int first_line = dex2line_map.front().to_;
-                // Prologue is not a sensible place for a breakpoint.
-                opcodes.NegateStmt();
-                opcodes.AddRow(mi->low_pc_, first_line);
-                opcodes.NegateStmt();
-                opcodes.SetPrologueEnd();
-              }
-              opcodes.AddRow(mi->low_pc_ + pc, line);
-            } else if (line != opcodes.CurrentLine()) {
-              opcodes.AddRow(mi->low_pc_ + pc, line);
-            }
-          }
-        }
-      } else {
-        // line 0 - instruction cannot be attributed to any source line.
-        opcodes.AddRow(mi->low_pc_, 0);
-      }
-    }
-    opcodes.AdvancePC(cunit_high_pc);
-    opcodes.EndSequence();
-    WriteDebugLineTable(directories, files, opcodes, debug_line, debug_line_patches);
-  }
-}
-
-}  // namespace dwarf
-}  // namespace art
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
deleted file mode 100644
index 69f7e0d..0000000
--- a/compiler/elf_writer_debug.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_
-#define ART_COMPILER_ELF_WRITER_DEBUG_H_
-
-#include <vector>
-
-#include "dwarf/dwarf_constants.h"
-#include "oat_writer.h"
-
-namespace art {
-namespace dwarf {
-
-void WriteCFISection(const CompilerDriver* compiler,
-                     const OatWriter* oat_writer,
-                     ExceptionHeaderValueApplication address_type,
-                     CFIFormat format,
-                     std::vector<uint8_t>* debug_frame,
-                     std::vector<uintptr_t>* debug_frame_patches,
-                     std::vector<uint8_t>* eh_frame_hdr,
-                     std::vector<uintptr_t>* eh_frame_hdr_patches);
-
-void WriteDebugSections(const CompilerDriver* compiler,
-                        const OatWriter* oat_writer,
-                        std::vector<uint8_t>* debug_info,
-                        std::vector<uintptr_t>* debug_info_patches,
-                        std::vector<uint8_t>* debug_abbrev,
-                        std::vector<uint8_t>* debug_str,
-                        std::vector<uint8_t>* debug_line,
-                        std::vector<uintptr_t>* debug_line_patches);
-
-}  // namespace dwarf
-}  // namespace art
-
-#endif  // ART_COMPILER_ELF_WRITER_DEBUG_H_
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index dce1e86..bed864b 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -21,19 +21,20 @@
 
 #include "base/casts.h"
 #include "base/logging.h"
-#include "base/unix_file/fd_file.h"
+#include "base/stl_util.h"
 #include "compiled_method.h"
-#include "dex_file-inl.h"
-#include "driver/compiler_driver.h"
+#include "debug/elf_debug_writer.h"
+#include "debug/method_debug_info.h"
 #include "driver/compiler_options.h"
+#include "elf.h"
 #include "elf_builder.h"
-#include "elf_file.h"
 #include "elf_utils.h"
-#include "elf_writer_debug.h"
 #include "globals.h"
 #include "leb128.h"
-#include "oat.h"
-#include "oat_writer.h"
+#include "linker/buffered_output_stream.h"
+#include "linker/file_output_stream.h"
+#include "thread-inl.h"
+#include "thread_pool.h"
 #include "utils.h"
 
 namespace art {
@@ -47,251 +48,237 @@
 // Let's use .debug_frame because it is easier to strip or compress.
 constexpr dwarf::CFIFormat kCFIFormat = dwarf::DW_DEBUG_FRAME_FORMAT;
 
-// The ARM specification defines three special mapping symbols
-// $a, $t and $d which mark ARM, Thumb and data ranges respectively.
-// These symbols can be used by tools, for example, to pretty
-// print instructions correctly.  Objdump will use them if they
-// exist, but it will still work well without them.
-// However, these extra symbols take space, so let's just generate
-// one symbol which marks the whole .text section as code.
-constexpr bool kGenerateSingleArmMappingSymbol = true;
-
-template <typename ElfTypes>
-bool ElfWriterQuick<ElfTypes>::Create(File* elf_file,
-                                      OatWriter* oat_writer,
-                                      const std::vector<const DexFile*>& dex_files,
-                                      const std::string& android_root,
-                                      bool is_host,
-                                      const CompilerDriver& driver) {
-  ElfWriterQuick elf_writer(driver, elf_file);
-  return elf_writer.Write(oat_writer, dex_files, android_root, is_host);
-}
-
-template <typename ElfTypes>
-static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer);
-
-// Encode patch locations as LEB128 list of deltas between consecutive addresses.
-template <typename ElfTypes>
-void ElfWriterQuick<ElfTypes>::EncodeOatPatches(const std::vector<uintptr_t>& locations,
-                                                std::vector<uint8_t>* buffer) {
-  buffer->reserve(buffer->size() + locations.size() * 2);  // guess 2 bytes per ULEB128.
-  uintptr_t address = 0;  // relative to start of section.
-  for (uintptr_t location : locations) {
-    DCHECK_GE(location, address) << "Patch locations are not in sorted order";
-    EncodeUnsignedLeb128(buffer, dchecked_integral_cast<uint32_t>(location - address));
-    address = location;
-  }
-}
-
-class RodataWriter FINAL : public CodeOutput {
+class DebugInfoTask : public Task {
  public:
-  explicit RodataWriter(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
+  DebugInfoTask(InstructionSet isa,
+                const InstructionSetFeatures* features,
+                size_t rodata_section_size,
+                size_t text_section_size,
+                const ArrayRef<const debug::MethodDebugInfo>& method_infos)
+      : isa_(isa),
+        instruction_set_features_(features),
+        rodata_section_size_(rodata_section_size),
+        text_section_size_(text_section_size),
+        method_infos_(method_infos) {
+  }
 
-  bool Write(OutputStream* out) OVERRIDE {
-    return oat_writer_->WriteRodata(out);
+  void Run(Thread*) {
+    result_ = debug::MakeMiniDebugInfo(isa_,
+                                       instruction_set_features_,
+                                       rodata_section_size_,
+                                       text_section_size_,
+                                       method_infos_);
+  }
+
+  std::vector<uint8_t>* GetResult() {
+    return &result_;
   }
 
  private:
-  OatWriter* oat_writer_;
+  InstructionSet isa_;
+  const InstructionSetFeatures* instruction_set_features_;
+  size_t rodata_section_size_;
+  size_t text_section_size_;
+  const ArrayRef<const debug::MethodDebugInfo>& method_infos_;
+  std::vector<uint8_t> result_;
 };
 
-class TextWriter FINAL : public CodeOutput {
+template <typename ElfTypes>
+class ElfWriterQuick FINAL : public ElfWriter {
  public:
-  explicit TextWriter(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
+  ElfWriterQuick(InstructionSet instruction_set,
+                 const InstructionSetFeatures* features,
+                 const CompilerOptions* compiler_options,
+                 File* elf_file);
+  ~ElfWriterQuick();
 
-  bool Write(OutputStream* out) OVERRIDE {
-    return oat_writer_->WriteCode(out);
-  }
+  void Start() OVERRIDE;
+  void SetLoadedSectionSizes(size_t rodata_size, size_t text_size, size_t bss_size) OVERRIDE;
+  void PrepareDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) OVERRIDE;
+  OutputStream* StartRoData() OVERRIDE;
+  void EndRoData(OutputStream* rodata) OVERRIDE;
+  OutputStream* StartText() OVERRIDE;
+  void EndText(OutputStream* text) OVERRIDE;
+  void WriteDynamicSection() OVERRIDE;
+  void WriteDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) OVERRIDE;
+  void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) OVERRIDE;
+  bool End() OVERRIDE;
+
+  virtual OutputStream* GetStream() OVERRIDE;
+
+  size_t GetLoadedSize() OVERRIDE;
+
+  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
+                               std::vector<uint8_t>* buffer);
 
  private:
-  OatWriter* oat_writer_;
+  const InstructionSetFeatures* instruction_set_features_;
+  const CompilerOptions* const compiler_options_;
+  File* const elf_file_;
+  size_t rodata_size_;
+  size_t text_size_;
+  size_t bss_size_;
+  std::unique_ptr<BufferedOutputStream> output_stream_;
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder_;
+  std::unique_ptr<DebugInfoTask> debug_info_task_;
+  std::unique_ptr<ThreadPool> debug_info_thread_pool_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(ElfWriterQuick);
 };
 
-enum PatchResult {
-  kAbsoluteAddress,  // Absolute memory location.
-  kPointerRelativeAddress,  // Offset relative to the location of the pointer.
-  kSectionRelativeAddress,  // Offset relative to start of containing section.
-};
+std::unique_ptr<ElfWriter> CreateElfWriterQuick(InstructionSet instruction_set,
+                                                const InstructionSetFeatures* features,
+                                                const CompilerOptions* compiler_options,
+                                                File* elf_file) {
+  if (Is64BitInstructionSet(instruction_set)) {
+    return MakeUnique<ElfWriterQuick<ElfTypes64>>(instruction_set,
+                                                  features,
+                                                  compiler_options,
+                                                  elf_file);
+  } else {
+    return MakeUnique<ElfWriterQuick<ElfTypes32>>(instruction_set,
+                                                  features,
+                                                  compiler_options,
+                                                  elf_file);
+  }
+}
 
-// Patch memory addresses within a buffer.
-// It assumes that the unpatched addresses are offsets relative to base_address.
-// (which generally means method's low_pc relative to the start of .text)
-template <typename Elf_Addr, typename Address, PatchResult kPatchResult>
-static void Patch(const std::vector<uintptr_t>& patch_locations,
-                  Elf_Addr buffer_address, Elf_Addr base_address,
-                  std::vector<uint8_t>* buffer) {
-  for (uintptr_t location : patch_locations) {
-    typedef __attribute__((__aligned__(1))) Address UnalignedAddress;
-    auto* to_patch = reinterpret_cast<UnalignedAddress*>(buffer->data() + location);
-    switch (kPatchResult) {
-      case kAbsoluteAddress:
-        *to_patch = (base_address + *to_patch);
-        break;
-      case kPointerRelativeAddress:
-        *to_patch = (base_address + *to_patch) - (buffer_address + location);
-        break;
-      case kSectionRelativeAddress:
-        *to_patch = (base_address + *to_patch) - buffer_address;
-        break;
+template <typename ElfTypes>
+ElfWriterQuick<ElfTypes>::ElfWriterQuick(InstructionSet instruction_set,
+                                         const InstructionSetFeatures* features,
+                                         const CompilerOptions* compiler_options,
+                                         File* elf_file)
+    : ElfWriter(),
+      instruction_set_features_(features),
+      compiler_options_(compiler_options),
+      elf_file_(elf_file),
+      rodata_size_(0u),
+      text_size_(0u),
+      bss_size_(0u),
+      output_stream_(MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file))),
+      builder_(new ElfBuilder<ElfTypes>(instruction_set, features, output_stream_.get())) {}
+
+template <typename ElfTypes>
+ElfWriterQuick<ElfTypes>::~ElfWriterQuick() {}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::Start() {
+  builder_->Start();
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::SetLoadedSectionSizes(size_t rodata_size,
+                                                     size_t text_size,
+                                                     size_t bss_size) {
+  DCHECK_EQ(rodata_size_, 0u);
+  rodata_size_ = rodata_size;
+  DCHECK_EQ(text_size_, 0u);
+  text_size_ = text_size;
+  DCHECK_EQ(bss_size_, 0u);
+  bss_size_ = bss_size;
+  builder_->PrepareDynamicSection(elf_file_->GetPath(), rodata_size_, text_size_, bss_size_);
+}
+
+template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::StartRoData() {
+  auto* rodata = builder_->GetRoData();
+  rodata->Start();
+  return rodata;
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::EndRoData(OutputStream* rodata) {
+  CHECK_EQ(builder_->GetRoData(), rodata);
+  builder_->GetRoData()->End();
+}
+
+template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::StartText() {
+  auto* text = builder_->GetText();
+  text->Start();
+  return text;
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::EndText(OutputStream* text) {
+  CHECK_EQ(builder_->GetText(), text);
+  builder_->GetText()->End();
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::WriteDynamicSection() {
+  if (bss_size_ != 0u) {
+    builder_->GetBss()->WriteNoBitsSection(bss_size_);
+  }
+  if (builder_->GetIsa() == kMips || builder_->GetIsa() == kMips64) {
+    builder_->WriteMIPSabiflagsSection();
+  }
+  builder_->WriteDynamicSection();
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::PrepareDebugInfo(
+    const ArrayRef<const debug::MethodDebugInfo>& method_infos) {
+  if (!method_infos.empty() && compiler_options_->GetGenerateMiniDebugInfo()) {
+    // Prepare the mini-debug-info in background while we do other I/O.
+    Thread* self = Thread::Current();
+    debug_info_task_ = std::unique_ptr<DebugInfoTask>(
+        new DebugInfoTask(builder_->GetIsa(),
+                          instruction_set_features_,
+                          rodata_size_,
+                          text_size_,
+                          method_infos));
+    debug_info_thread_pool_ = std::unique_ptr<ThreadPool>(
+        new ThreadPool("Mini-debug-info writer", 1));
+    debug_info_thread_pool_->AddTask(self, debug_info_task_.get());
+    debug_info_thread_pool_->StartWorkers(self);
+  }
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::WriteDebugInfo(
+    const ArrayRef<const debug::MethodDebugInfo>& method_infos) {
+  if (!method_infos.empty()) {
+    if (compiler_options_->GetGenerateDebugInfo()) {
+      // Generate all the debug information we can.
+      debug::WriteDebugInfo(builder_.get(), method_infos, kCFIFormat, true /* write_oat_patches */);
+    }
+    if (compiler_options_->GetGenerateMiniDebugInfo()) {
+      // Wait for the mini-debug-info generation to finish and write it to disk.
+      Thread* self = Thread::Current();
+      DCHECK(debug_info_thread_pool_ != nullptr);
+      debug_info_thread_pool_->Wait(self, true, false);
+      builder_->WriteSection(".gnu_debugdata", debug_info_task_->GetResult());
     }
   }
 }
 
 template <typename ElfTypes>
-bool ElfWriterQuick<ElfTypes>::Write(
-    OatWriter* oat_writer,
-    const std::vector<const DexFile*>& dex_files_unused ATTRIBUTE_UNUSED,
-    const std::string& android_root_unused ATTRIBUTE_UNUSED,
-    bool is_host_unused ATTRIBUTE_UNUSED) {
-  using Elf_Addr = typename ElfTypes::Addr;
-  const InstructionSet isa = compiler_driver_->GetInstructionSet();
-
-  // Setup the builder with the main OAT sections (.rodata .text .bss).
-  const size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset();
-  const size_t text_size = oat_writer->GetSize() - rodata_size;
-  const size_t bss_size = oat_writer->GetBssSize();
-  RodataWriter rodata_writer(oat_writer);
-  TextWriter text_writer(oat_writer);
-  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(
-      isa, rodata_size, &rodata_writer, text_size, &text_writer, bss_size));
-
-  // Add debug sections.
-  // They are allocated here (in the same scope as the builder),
-  // but they are registered with the builder only if they are used.
-  using RawSection = typename ElfBuilder<ElfTypes>::RawSection;
-  const auto* text = builder->GetText();
-  const bool is64bit = Is64BitInstructionSet(isa);
-  const int pointer_size = GetInstructionSetPointerSize(isa);
-  std::unique_ptr<RawSection> eh_frame(new RawSection(
-      ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0,
-      is64bit ? Patch<Elf_Addr, uint64_t, kPointerRelativeAddress> :
-                Patch<Elf_Addr, uint32_t, kPointerRelativeAddress>,
-      text));
-  std::unique_ptr<RawSection> eh_frame_hdr(new RawSection(
-      ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0,
-      Patch<Elf_Addr, uint32_t, kSectionRelativeAddress>, text));
-  std::unique_ptr<RawSection> debug_frame(new RawSection(
-      ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, pointer_size, 0,
-      is64bit ? Patch<Elf_Addr, uint64_t, kAbsoluteAddress> :
-                Patch<Elf_Addr, uint32_t, kAbsoluteAddress>,
-      text));
-  std::unique_ptr<RawSection> debug_frame_oat_patches(new RawSection(
-      ".debug_frame.oat_patches", SHT_OAT_PATCH));
-  std::unique_ptr<RawSection> debug_info(new RawSection(
-      ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0,
-      Patch<Elf_Addr, uint32_t, kAbsoluteAddress>, text));
-  std::unique_ptr<RawSection> debug_info_oat_patches(new RawSection(
-      ".debug_info.oat_patches", SHT_OAT_PATCH));
-  std::unique_ptr<RawSection> debug_abbrev(new RawSection(
-      ".debug_abbrev", SHT_PROGBITS));
-  std::unique_ptr<RawSection> debug_str(new RawSection(
-      ".debug_str", SHT_PROGBITS));
-  std::unique_ptr<RawSection> debug_line(new RawSection(
-      ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0,
-      Patch<Elf_Addr, uint32_t, kAbsoluteAddress>, text));
-  std::unique_ptr<RawSection> debug_line_oat_patches(new RawSection(
-      ".debug_line.oat_patches", SHT_OAT_PATCH));
-  if (!oat_writer->GetMethodDebugInfo().empty()) {
-    if (compiler_driver_->GetCompilerOptions().GetGenerateDebugInfo()) {
-      // Generate CFI (stack unwinding information).
-      if (kCFIFormat == dwarf::DW_EH_FRAME_FORMAT) {
-        dwarf::WriteCFISection(
-            compiler_driver_, oat_writer,
-            dwarf::DW_EH_PE_pcrel, kCFIFormat,
-            eh_frame->GetBuffer(), eh_frame->GetPatchLocations(),
-            eh_frame_hdr->GetBuffer(), eh_frame_hdr->GetPatchLocations());
-        builder->RegisterSection(eh_frame.get());
-        builder->RegisterSection(eh_frame_hdr.get());
-      } else {
-        DCHECK(kCFIFormat == dwarf::DW_DEBUG_FRAME_FORMAT);
-        dwarf::WriteCFISection(
-            compiler_driver_, oat_writer,
-            dwarf::DW_EH_PE_absptr, kCFIFormat,
-            debug_frame->GetBuffer(), debug_frame->GetPatchLocations(),
-            nullptr, nullptr);
-        builder->RegisterSection(debug_frame.get());
-        EncodeOatPatches(*debug_frame->GetPatchLocations(),
-                         debug_frame_oat_patches->GetBuffer());
-        builder->RegisterSection(debug_frame_oat_patches.get());
-      }
-      // Add methods to .symtab.
-      WriteDebugSymbols(builder.get(), oat_writer);
-      // Generate DWARF .debug_* sections.
-      dwarf::WriteDebugSections(
-          compiler_driver_, oat_writer,
-          debug_info->GetBuffer(), debug_info->GetPatchLocations(),
-          debug_abbrev->GetBuffer(),
-          debug_str->GetBuffer(),
-          debug_line->GetBuffer(), debug_line->GetPatchLocations());
-      builder->RegisterSection(debug_info.get());
-      EncodeOatPatches(*debug_info->GetPatchLocations(),
-                       debug_info_oat_patches->GetBuffer());
-      builder->RegisterSection(debug_info_oat_patches.get());
-      builder->RegisterSection(debug_abbrev.get());
-      builder->RegisterSection(debug_str.get());
-      builder->RegisterSection(debug_line.get());
-      EncodeOatPatches(*debug_line->GetPatchLocations(),
-                       debug_line_oat_patches->GetBuffer());
-      builder->RegisterSection(debug_line_oat_patches.get());
-    }
-  }
-
+void ElfWriterQuick<ElfTypes>::WritePatchLocations(
+    const ArrayRef<const uintptr_t>& patch_locations) {
   // Add relocation section for .text.
-  std::unique_ptr<RawSection> text_oat_patches(new RawSection(
-      ".text.oat_patches", SHT_OAT_PATCH));
-  if (compiler_driver_->GetCompilerOptions().GetIncludePatchInformation()) {
+  if (compiler_options_->GetIncludePatchInformation()) {
     // Note that ElfWriter::Fixup will be called regardless and therefore
     // we need to include oat_patches for debug sections unconditionally.
-    EncodeOatPatches(oat_writer->GetAbsolutePatchLocations(),
-                     text_oat_patches->GetBuffer());
-    builder->RegisterSection(text_oat_patches.get());
+    builder_->WritePatches(".text.oat_patches", patch_locations);
   }
-
-  return builder->Write(elf_file_);
 }
 
 template <typename ElfTypes>
-static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer) {
-  const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetMethodDebugInfo();
-  bool generated_mapping_symbol = false;
+bool ElfWriterQuick<ElfTypes>::End() {
+  builder_->End();
 
-  // Find all addresses (low_pc) which contain deduped methods.
-  // The first instance of method is not marked deduped_, but the rest is.
-  std::unordered_set<uint32_t> deduped_addresses;
-  for (auto it = method_info.begin(); it != method_info.end(); ++it) {
-    if (it->deduped_) {
-      deduped_addresses.insert(it->low_pc_);
-    }
-  }
+  return builder_->Good();
+}
 
-  auto* symtab = builder->GetSymtab();
-  for (auto it = method_info.begin(); it != method_info.end(); ++it) {
-    if (it->deduped_) {
-      continue;  // Add symbol only for the first instance.
-    }
-    std::string name = PrettyMethod(it->dex_method_index_, *it->dex_file_, true);
-    if (deduped_addresses.find(it->low_pc_) != deduped_addresses.end()) {
-      name += " [DEDUPED]";
-    }
+template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::GetStream() {
+  return builder_->GetStream();
+}
 
-    uint32_t low_pc = it->low_pc_;
-    // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
-    low_pc += it->compiled_method_->CodeDelta();
-    symtab->AddSymbol(name, builder->GetText(), low_pc,
-                      true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
-
-    // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
-    // instructions, so that disassembler tools can correctly disassemble.
-    // Note that even if we generate just a single mapping symbol, ARM's Streamline
-    // requires it to match function symbol.  Just address 0 does not work.
-    if (it->compiled_method_->GetInstructionSet() == kThumb2) {
-      if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
-        symtab->AddSymbol("$t", builder->GetText(), it->low_pc_ & ~1, true,
-                          0, STB_LOCAL, STT_NOTYPE);
-        generated_mapping_symbol = true;
-      }
-    }
-  }
+template <typename ElfTypes>
+size_t ElfWriterQuick<ElfTypes>::GetLoadedSize() {
+  return builder_->GetLoadedSize();
 }
 
 // Explicit instantiations
diff --git a/compiler/elf_writer_quick.h b/compiler/elf_writer_quick.h
index 83781ab..3d5dd39 100644
--- a/compiler/elf_writer_quick.h
+++ b/compiler/elf_writer_quick.h
@@ -17,46 +17,21 @@
 #ifndef ART_COMPILER_ELF_WRITER_QUICK_H_
 #define ART_COMPILER_ELF_WRITER_QUICK_H_
 
-#include "elf_utils.h"
+#include <memory>
+
+#include "arch/instruction_set.h"
 #include "elf_writer.h"
-#include "oat_writer.h"
+#include "os.h"
 
 namespace art {
 
-template <typename ElfTypes>
-class ElfWriterQuick FINAL : public ElfWriter {
- public:
-  // Write an ELF file. Returns true on success, false on failure.
-  static bool Create(File* file,
-                     OatWriter* oat_writer,
-                     const std::vector<const DexFile*>& dex_files,
-                     const std::string& android_root,
-                     bool is_host,
-                     const CompilerDriver& driver)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+class CompilerOptions;
+class InstructionSetFeatures;
 
-  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
-                               std::vector<uint8_t>* buffer);
-
- protected:
-  bool Write(OatWriter* oat_writer,
-             const std::vector<const DexFile*>& dex_files,
-             const std::string& android_root,
-             bool is_host)
-      OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
- private:
-  ElfWriterQuick(const CompilerDriver& driver, File* elf_file)
-    : ElfWriter(driver, elf_file) {}
-  ~ElfWriterQuick() {}
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(ElfWriterQuick);
-};
-
-// Explicitly instantiated in elf_writer_quick.cc
-typedef ElfWriterQuick<ElfTypes32> ElfWriterQuick32;
-typedef ElfWriterQuick<ElfTypes64> ElfWriterQuick64;
+std::unique_ptr<ElfWriter> CreateElfWriterQuick(InstructionSet instruction_set,
+                                                const InstructionSetFeatures* features,
+                                                const CompilerOptions* compiler_options,
+                                                File* elf_file);
 
 }  // namespace art
 
diff --git a/compiler/elf_writer_test.cc b/compiler/elf_writer_test.cc
index ccf34b8..449f514 100644
--- a/compiler/elf_writer_test.cc
+++ b/compiler/elf_writer_test.cc
@@ -21,6 +21,7 @@
 #include "common_compiler_test.h"
 #include "elf_file.h"
 #include "elf_file_impl.h"
+#include "elf_builder.h"
 #include "elf_writer_quick.h"
 #include "oat.h"
 #include "utils.h"
@@ -63,7 +64,11 @@
   ASSERT_TRUE(file.get() != nullptr);
   {
     std::string error_msg;
-    std::unique_ptr<ElfFile> ef(ElfFile::Open(file.get(), false, false, &error_msg));
+    std::unique_ptr<ElfFile> ef(ElfFile::Open(file.get(),
+                                              false,
+                                              false,
+                                              /*low_4gb*/false,
+                                              &error_msg));
     CHECK(ef.get() != nullptr) << error_msg;
     EXPECT_ELF_FILE_ADDRESS(ef, dl_oatdata, "oatdata", false);
     EXPECT_ELF_FILE_ADDRESS(ef, dl_oatexec, "oatexec", false);
@@ -71,7 +76,11 @@
   }
   {
     std::string error_msg;
-    std::unique_ptr<ElfFile> ef(ElfFile::Open(file.get(), false, false, &error_msg));
+    std::unique_ptr<ElfFile> ef(ElfFile::Open(file.get(),
+                                              false,
+                                              false,
+                                              /*low_4gb*/false,
+                                              &error_msg));
     CHECK(ef.get() != nullptr) << error_msg;
     EXPECT_ELF_FILE_ADDRESS(ef, dl_oatdata, "oatdata", true);
     EXPECT_ELF_FILE_ADDRESS(ef, dl_oatexec, "oatexec", true);
@@ -79,9 +88,13 @@
   }
   {
     std::string error_msg;
-    std::unique_ptr<ElfFile> ef(ElfFile::Open(file.get(), false, true, &error_msg));
+    std::unique_ptr<ElfFile> ef(ElfFile::Open(file.get(),
+                                              false,
+                                              true,
+                                              /*low_4gb*/false,
+                                              &error_msg));
     CHECK(ef.get() != nullptr) << error_msg;
-    CHECK(ef->Load(false, &error_msg)) << error_msg;
+    CHECK(ef->Load(false, /*low_4gb*/false, &error_msg)) << error_msg;
     EXPECT_EQ(dl_oatdata, ef->FindDynamicSymbolAddress("oatdata"));
     EXPECT_EQ(dl_oatexec, ef->FindDynamicSymbolAddress("oatexec"));
     EXPECT_EQ(dl_oatlastword, ef->FindDynamicSymbolAddress("oatlastword"));
@@ -100,7 +113,8 @@
 
     // Encode patch locations.
     std::vector<uint8_t> oat_patches;
-    ElfWriterQuick32::EncodeOatPatches(patch_locations, &oat_patches);
+    ElfBuilder<ElfTypes32>::EncodeOatPatches(ArrayRef<const uintptr_t>(patch_locations),
+                                             &oat_patches);
 
     // Create buffer to be patched.
     std::vector<uint8_t> initial_data(256);
diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc
new file mode 100644
index 0000000..86f91c5
--- /dev/null
+++ b/compiler/exception_test.cc
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+
+#include "base/arena_allocator.h"
+#include "base/enums.h"
+#include "class_linker.h"
+#include "common_runtime_test.h"
+#include "dex_file.h"
+#include "dex_file-inl.h"
+#include "gtest/gtest.h"
+#include "leb128.h"
+#include "mirror/class-inl.h"
+#include "mirror/object_array-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/stack_trace_element.h"
+#include "oat_quick_method_header.h"
+#include "optimizing/stack_map_stream.h"
+#include "runtime.h"
+#include "scoped_thread_state_change.h"
+#include "handle_scope-inl.h"
+#include "thread.h"
+
+namespace art {
+
+class ExceptionTest : public CommonRuntimeTest {
+ protected:
+  virtual void SetUp() {
+    CommonRuntimeTest::SetUp();
+
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<2> hs(soa.Self());
+    Handle<mirror::ClassLoader> class_loader(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader*>(LoadDex("ExceptionHandle"))));
+    my_klass_ = class_linker_->FindClass(soa.Self(), "LExceptionHandle;", class_loader);
+    ASSERT_TRUE(my_klass_ != nullptr);
+    Handle<mirror::Class> klass(hs.NewHandle(my_klass_));
+    class_linker_->EnsureInitialized(soa.Self(), klass, true, true);
+    my_klass_ = klass.Get();
+
+    dex_ = my_klass_->GetDexCache()->GetDexFile();
+
+    uint32_t code_size = 12;
+    for (size_t i = 0 ; i < code_size; i++) {
+      fake_code_.push_back(0x70 | i);
+    }
+
+    ArenaPool pool;
+    ArenaAllocator allocator(&pool);
+    StackMapStream stack_maps(&allocator);
+    stack_maps.BeginStackMapEntry(/* dex_pc */ 3u,
+                                  /* native_pc_offset */ 3u,
+                                  /* register_mask */ 0u,
+                                  /* sp_mask */ nullptr,
+                                  /* num_dex_registers */ 0u,
+                                  /* inlining_depth */ 0u);
+    stack_maps.EndStackMapEntry();
+    size_t stack_maps_size = stack_maps.PrepareForFillIn();
+    size_t stack_maps_offset = stack_maps_size +  sizeof(OatQuickMethodHeader);
+
+    fake_header_code_and_maps_.resize(stack_maps_offset + fake_code_.size());
+    MemoryRegion stack_maps_region(&fake_header_code_and_maps_[0], stack_maps_size);
+    stack_maps.FillIn(stack_maps_region);
+    OatQuickMethodHeader method_header(stack_maps_offset, 4 * sizeof(void*), 0u, 0u, code_size);
+    memcpy(&fake_header_code_and_maps_[stack_maps_size], &method_header, sizeof(method_header));
+    std::copy(fake_code_.begin(),
+              fake_code_.end(),
+              fake_header_code_and_maps_.begin() + stack_maps_offset);
+
+    // Align the code.
+    const size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
+    fake_header_code_and_maps_.reserve(fake_header_code_and_maps_.size() + alignment);
+    const void* unaligned_code_ptr =
+        fake_header_code_and_maps_.data() + (fake_header_code_and_maps_.size() - code_size);
+    size_t offset = dchecked_integral_cast<size_t>(reinterpret_cast<uintptr_t>(unaligned_code_ptr));
+    size_t padding = RoundUp(offset, alignment) - offset;
+    // Make sure no resizing takes place.
+    CHECK_GE(fake_header_code_and_maps_.capacity(), fake_header_code_and_maps_.size() + padding);
+    fake_header_code_and_maps_.insert(fake_header_code_and_maps_.begin(), padding, 0);
+    const void* code_ptr = reinterpret_cast<const uint8_t*>(unaligned_code_ptr) + padding;
+    CHECK_EQ(code_ptr,
+             static_cast<const void*>(fake_header_code_and_maps_.data() +
+                                          (fake_header_code_and_maps_.size() - code_size)));
+
+    if (kRuntimeISA == kArm) {
+      // Check that the Thumb2 adjustment will be a NOP, see EntryPointToCodePointer().
+      CHECK_ALIGNED(stack_maps_offset, 2);
+    }
+
+    method_f_ = my_klass_->FindVirtualMethod("f", "()I", kRuntimePointerSize);
+    ASSERT_TRUE(method_f_ != nullptr);
+    method_f_->SetEntryPointFromQuickCompiledCode(code_ptr);
+
+    method_g_ = my_klass_->FindVirtualMethod("g", "(I)V", kRuntimePointerSize);
+    ASSERT_TRUE(method_g_ != nullptr);
+    method_g_->SetEntryPointFromQuickCompiledCode(code_ptr);
+  }
+
+  const DexFile* dex_;
+
+  std::vector<uint8_t> fake_code_;
+  std::vector<uint8_t> fake_header_code_and_maps_;
+
+  ArtMethod* method_f_;
+  ArtMethod* method_g_;
+
+ private:
+  mirror::Class* my_klass_;
+};
+
+TEST_F(ExceptionTest, FindCatchHandler) {
+  ScopedObjectAccess soa(Thread::Current());
+  const DexFile::CodeItem* code_item = dex_->GetCodeItem(method_f_->GetCodeItemOffset());
+
+  ASSERT_TRUE(code_item != nullptr);
+
+  ASSERT_EQ(2u, code_item->tries_size_);
+  ASSERT_NE(0u, code_item->insns_size_in_code_units_);
+
+  const DexFile::TryItem *t0, *t1;
+  t0 = dex_->GetTryItems(*code_item, 0);
+  t1 = dex_->GetTryItems(*code_item, 1);
+  EXPECT_LE(t0->start_addr_, t1->start_addr_);
+  {
+    CatchHandlerIterator iter(*code_item, 4 /* Dex PC in the first try block */);
+    EXPECT_STREQ("Ljava/io/IOException;", dex_->StringByTypeIdx(iter.GetHandlerTypeIndex()));
+    ASSERT_TRUE(iter.HasNext());
+    iter.Next();
+    EXPECT_STREQ("Ljava/lang/Exception;", dex_->StringByTypeIdx(iter.GetHandlerTypeIndex()));
+    ASSERT_TRUE(iter.HasNext());
+    iter.Next();
+    EXPECT_FALSE(iter.HasNext());
+  }
+  {
+    CatchHandlerIterator iter(*code_item, 8 /* Dex PC in the second try block */);
+    EXPECT_STREQ("Ljava/io/IOException;", dex_->StringByTypeIdx(iter.GetHandlerTypeIndex()));
+    ASSERT_TRUE(iter.HasNext());
+    iter.Next();
+    EXPECT_FALSE(iter.HasNext());
+  }
+  {
+    CatchHandlerIterator iter(*code_item, 11 /* Dex PC not in any try block */);
+    EXPECT_FALSE(iter.HasNext());
+  }
+}
+
+TEST_F(ExceptionTest, StackTraceElement) {
+  Thread* thread = Thread::Current();
+  thread->TransitionFromSuspendedToRunnable();
+  bool started = runtime_->Start();
+  CHECK(started);
+  JNIEnv* env = thread->GetJniEnv();
+  ScopedObjectAccess soa(env);
+
+  std::vector<uintptr_t> fake_stack;
+  Runtime* r = Runtime::Current();
+  r->SetInstructionSet(kRuntimeISA);
+  ArtMethod* save_method = r->CreateCalleeSaveMethod();
+  r->SetCalleeSaveMethod(save_method, Runtime::kSaveAllCalleeSaves);
+  QuickMethodFrameInfo frame_info = r->GetRuntimeMethodFrameInfo(save_method);
+
+  ASSERT_EQ(kStackAlignment, 16U);
+  // ASSERT_EQ(sizeof(uintptr_t), sizeof(uint32_t));
+
+
+  // Create three fake stack frames with mapping data created in SetUp. We map offset 3 in the
+  // code to dex pc 3.
+  const uint32_t dex_pc = 3;
+
+  // Create the stack frame for the callee save method, expected by the runtime.
+  fake_stack.push_back(reinterpret_cast<uintptr_t>(save_method));
+  for (size_t i = 0; i < frame_info.FrameSizeInBytes() - 2 * sizeof(uintptr_t);
+       i += sizeof(uintptr_t)) {
+    fake_stack.push_back(0);
+  }
+
+  fake_stack.push_back(method_g_->GetOatQuickMethodHeader(0)->ToNativeQuickPc(
+      method_g_, dex_pc, /* is_catch_handler */ false));  // return pc
+
+  // Create/push fake 16byte stack frame for method g
+  fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_));
+  fake_stack.push_back(0);
+  fake_stack.push_back(0);
+  fake_stack.push_back(method_g_->GetOatQuickMethodHeader(0)->ToNativeQuickPc(
+      method_g_, dex_pc, /* is_catch_handler */ false));  // return pc
+
+  // Create/push fake 16byte stack frame for method f
+  fake_stack.push_back(reinterpret_cast<uintptr_t>(method_f_));
+  fake_stack.push_back(0);
+  fake_stack.push_back(0);
+  fake_stack.push_back(0xEBAD6070);  // return pc
+
+  // Push Method* of null to terminate the trace
+  fake_stack.push_back(0);
+
+  // Push null values which will become null incoming arguments.
+  fake_stack.push_back(0);
+  fake_stack.push_back(0);
+  fake_stack.push_back(0);
+
+  // Set up thread to appear as if we called out of method_g_ at pc dex 3
+  thread->SetTopOfStack(reinterpret_cast<ArtMethod**>(&fake_stack[0]));
+
+  jobject internal = thread->CreateInternalStackTrace<false>(soa);
+  ASSERT_TRUE(internal != nullptr);
+  jobjectArray ste_array = Thread::InternalStackTraceToStackTraceElementArray(soa, internal);
+  ASSERT_TRUE(ste_array != nullptr);
+  auto* trace_array = soa.Decode<mirror::ObjectArray<mirror::StackTraceElement>*>(ste_array);
+
+  ASSERT_TRUE(trace_array != nullptr);
+  ASSERT_TRUE(trace_array->Get(0) != nullptr);
+  EXPECT_STREQ("ExceptionHandle",
+               trace_array->Get(0)->GetDeclaringClass()->ToModifiedUtf8().c_str());
+  EXPECT_STREQ("ExceptionHandle.java",
+               trace_array->Get(0)->GetFileName()->ToModifiedUtf8().c_str());
+  EXPECT_STREQ("g", trace_array->Get(0)->GetMethodName()->ToModifiedUtf8().c_str());
+  EXPECT_EQ(37, trace_array->Get(0)->GetLineNumber());
+
+  ASSERT_TRUE(trace_array->Get(1) != nullptr);
+  EXPECT_STREQ("ExceptionHandle",
+               trace_array->Get(1)->GetDeclaringClass()->ToModifiedUtf8().c_str());
+  EXPECT_STREQ("ExceptionHandle.java",
+               trace_array->Get(1)->GetFileName()->ToModifiedUtf8().c_str());
+  EXPECT_STREQ("f", trace_array->Get(1)->GetMethodName()->ToModifiedUtf8().c_str());
+  EXPECT_EQ(22, trace_array->Get(1)->GetLineNumber());
+
+  thread->SetTopOfStack(nullptr);  // Disarm the assertion that no code is running when we detach.
+}
+
+}  // namespace art
diff --git a/compiler/file_output_stream.cc b/compiler/file_output_stream.cc
deleted file mode 100644
index 3ee16f5..0000000
--- a/compiler/file_output_stream.cc
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "file_output_stream.h"
-
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "base/unix_file/fd_file.h"
-
-namespace art {
-
-FileOutputStream::FileOutputStream(File* file) : OutputStream(file->GetPath()), file_(file) {}
-
-bool FileOutputStream::WriteFully(const void* buffer, size_t byte_count) {
-  return file_->WriteFully(buffer, byte_count);
-}
-
-off_t FileOutputStream::Seek(off_t offset, Whence whence) {
-  return lseek(file_->Fd(), offset, static_cast<int>(whence));
-}
-
-}  // namespace art
diff --git a/compiler/file_output_stream.h b/compiler/file_output_stream.h
deleted file mode 100644
index 9dfbd7f..0000000
--- a/compiler/file_output_stream.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_FILE_OUTPUT_STREAM_H_
-#define ART_COMPILER_FILE_OUTPUT_STREAM_H_
-
-#include "output_stream.h"
-
-#include "os.h"
-
-namespace art {
-
-class FileOutputStream FINAL : public OutputStream {
- public:
-  explicit FileOutputStream(File* file);
-
-  virtual ~FileOutputStream() {}
-
-  virtual bool WriteFully(const void* buffer, size_t byte_count);
-
-  virtual off_t Seek(off_t offset, Whence whence);
-
- private:
-  File* const file_;
-
-  DISALLOW_COPY_AND_ASSIGN(FileOutputStream);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_FILE_OUTPUT_STREAM_H_
diff --git a/compiler/gc_map_builder.h b/compiler/gc_map_builder.h
deleted file mode 100644
index 2ef7f1a..0000000
--- a/compiler/gc_map_builder.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_GC_MAP_BUILDER_H_
-#define ART_COMPILER_GC_MAP_BUILDER_H_
-
-#include <vector>
-
-#include "base/bit_utils.h"
-#include "gc_map.h"
-
-namespace art {
-
-class GcMapBuilder {
- public:
-  template <typename Vector>
-  GcMapBuilder(Vector* table, size_t entries, uint32_t max_native_offset,
-               size_t references_width)
-      : entries_(entries), references_width_(entries != 0u ? references_width : 0u),
-        native_offset_width_(entries != 0 && max_native_offset != 0
-                             ? sizeof(max_native_offset) - CLZ(max_native_offset) / 8u
-                             : 0u),
-        in_use_(entries) {
-    static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
-
-    // Resize table and set up header.
-    table->resize((EntryWidth() * entries) + sizeof(uint32_t));
-    table_ = table->data();
-    CHECK_LT(native_offset_width_, 1U << 3);
-    (*table)[0] = native_offset_width_ & 7;
-    CHECK_LT(references_width_, 1U << 13);
-    (*table)[0] |= (references_width_ << 3) & 0xFF;
-    (*table)[1] = (references_width_ >> 5) & 0xFF;
-    CHECK_LT(entries, 1U << 16);
-    (*table)[2] = entries & 0xFF;
-    (*table)[3] = (entries >> 8) & 0xFF;
-  }
-
-  void AddEntry(uint32_t native_offset, const uint8_t* references) {
-    size_t table_index = TableIndex(native_offset);
-    while (in_use_[table_index]) {
-      table_index = (table_index + 1) % entries_;
-    }
-    in_use_[table_index] = true;
-    SetCodeOffset(table_index, native_offset);
-    DCHECK_EQ(native_offset, GetCodeOffset(table_index));
-    SetReferences(table_index, references);
-  }
-
- private:
-  size_t TableIndex(uint32_t native_offset) {
-    return NativePcOffsetToReferenceMap::Hash(native_offset) % entries_;
-  }
-
-  uint32_t GetCodeOffset(size_t table_index) {
-    uint32_t native_offset = 0;
-    size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
-    for (size_t i = 0; i < native_offset_width_; i++) {
-      native_offset |= table_[table_offset + i] << (i * 8);
-    }
-    return native_offset;
-  }
-
-  void SetCodeOffset(size_t table_index, uint32_t native_offset) {
-    size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
-    for (size_t i = 0; i < native_offset_width_; i++) {
-      table_[table_offset + i] = (native_offset >> (i * 8)) & 0xFF;
-    }
-  }
-
-  void SetReferences(size_t table_index, const uint8_t* references) {
-    size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
-    memcpy(&table_[table_offset + native_offset_width_], references, references_width_);
-  }
-
-  size_t EntryWidth() const {
-    return native_offset_width_ + references_width_;
-  }
-
-  // Number of entries in the table.
-  const size_t entries_;
-  // Number of bytes used to encode the reference bitmap.
-  const size_t references_width_;
-  // Number of bytes used to encode a native offset.
-  const size_t native_offset_width_;
-  // Entries that are in use.
-  std::vector<bool> in_use_;
-  // The table we're building.
-  uint8_t* table_;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_GC_MAP_BUILDER_H_
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 7e31a7a..e1ee0d2 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -23,16 +23,19 @@
 #include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
 #include "common_compiler_test.h"
+#include "debug/method_debug_info.h"
+#include "driver/compiler_options.h"
 #include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "gc/space/image_space.h"
 #include "image_writer.h"
+#include "linker/multi_oat_relative_patcher.h"
 #include "lock_word.h"
 #include "mirror/object-inl.h"
 #include "oat_writer.h"
 #include "scoped_thread_state_change.h"
 #include "signal_catcher.h"
 #include "utils.h"
-#include "vector_output_stream.h"
 
 namespace art {
 
@@ -42,10 +45,21 @@
     ReserveImageSpace();
     CommonCompilerTest::SetUp();
   }
+  void TestWriteRead(ImageHeader::StorageMode storage_mode);
 };
 
-TEST_F(ImageTest, WriteRead) {
-  TEST_DISABLED_FOR_NON_PIC_COMPILING_WITH_OPTIMIZING();
+void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) {
+  CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U);
+
+  // Set inline filter values.
+  compiler_options_->SetInlineDepthLimit(CompilerOptions::kDefaultInlineDepthLimit);
+  compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits);
+
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  // Enable write for dex2dex.
+  for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
+    dex_file->EnableWrite();
+  }
   // Create a generic location tmp file, to be the base of the .art and .oat temporary files.
   ScratchFile location;
   ScratchFile image_location(location, ".art");
@@ -64,30 +78,93 @@
   ScratchFile oat_file(OS::CreateEmptyFile(oat_filename.c_str()));
 
   const uintptr_t requested_image_base = ART_BASE_ADDRESS;
-  std::unique_ptr<ImageWriter> writer(new ImageWriter(*compiler_driver_, requested_image_base,
-                                                      /*compile_pic*/false));
+  std::unordered_map<const DexFile*, size_t> dex_file_to_oat_index_map;
+  std::vector<const char*> oat_filename_vector(1, oat_filename.c_str());
+  for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
+    dex_file_to_oat_index_map.emplace(dex_file, 0);
+  }
+  std::unique_ptr<ImageWriter> writer(new ImageWriter(*compiler_driver_,
+                                                      requested_image_base,
+                                                      /*compile_pic*/false,
+                                                      /*compile_app_image*/false,
+                                                      storage_mode,
+                                                      oat_filename_vector,
+                                                      dex_file_to_oat_index_map));
   // TODO: compile_pic should be a test argument.
   {
     {
       jobject class_loader = nullptr;
-      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
       TimingLogger timings("ImageTest::WriteRead", false, false);
       TimingLogger::ScopedTiming t("CompileAll", &timings);
-      for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
-        dex_file->EnableWrite();
-      }
+      compiler_driver_->SetDexFilesForOatFile(class_linker->GetBootClassPath());
       compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
 
       t.NewTiming("WriteElf");
       SafeMap<std::string, std::string> key_value_store;
-      OatWriter oat_writer(class_linker->GetBootClassPath(), 0, 0, 0, compiler_driver_.get(),
-                           writer.get(), &timings, &key_value_store);
-      bool success = writer->PrepareImageAddressSpace() &&
-          compiler_driver_->WriteElf(GetTestAndroidRoot(),
-                                     !kIsTargetBuild,
-                                     class_linker->GetBootClassPath(),
-                                     &oat_writer,
-                                     oat_file.GetFile());
+      const std::vector<const DexFile*>& dex_files = class_linker->GetBootClassPath();
+      std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
+          compiler_driver_->GetInstructionSet(),
+          compiler_driver_->GetInstructionSetFeatures(),
+          &compiler_driver_->GetCompilerOptions(),
+          oat_file.GetFile());
+      elf_writer->Start();
+      OatWriter oat_writer(/*compiling_boot_image*/true, &timings);
+      OutputStream* rodata = elf_writer->StartRoData();
+      for (const DexFile* dex_file : dex_files) {
+        ArrayRef<const uint8_t> raw_dex_file(
+            reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
+            dex_file->GetHeader().file_size_);
+        oat_writer.AddRawDexFileSource(raw_dex_file,
+                                       dex_file->GetLocation().c_str(),
+                                       dex_file->GetLocationChecksum());
+      }
+      std::unique_ptr<MemMap> opened_dex_files_map;
+      std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+      bool dex_files_ok = oat_writer.WriteAndOpenDexFiles(
+          rodata,
+          oat_file.GetFile(),
+          compiler_driver_->GetInstructionSet(),
+          compiler_driver_->GetInstructionSetFeatures(),
+          &key_value_store,
+          /* verify */ false,           // Dex files may be dex-to-dex-ed, don't verify.
+          &opened_dex_files_map,
+          &opened_dex_files);
+      ASSERT_TRUE(dex_files_ok);
+
+      bool image_space_ok = writer->PrepareImageAddressSpace();
+      ASSERT_TRUE(image_space_ok);
+
+      linker::MultiOatRelativePatcher patcher(compiler_driver_->GetInstructionSet(),
+                                              instruction_set_features_.get());
+      oat_writer.PrepareLayout(compiler_driver_.get(), writer.get(), dex_files, &patcher);
+      size_t rodata_size = oat_writer.GetOatHeader().GetExecutableOffset();
+      size_t text_size = oat_writer.GetSize() - rodata_size;
+      elf_writer->SetLoadedSectionSizes(rodata_size, text_size, oat_writer.GetBssSize());
+
+      writer->UpdateOatFileLayout(/* oat_index */ 0u,
+                                  elf_writer->GetLoadedSize(),
+                                  oat_writer.GetOatDataOffset(),
+                                  oat_writer.GetSize());
+
+      bool rodata_ok = oat_writer.WriteRodata(rodata);
+      ASSERT_TRUE(rodata_ok);
+      elf_writer->EndRoData(rodata);
+
+      OutputStream* text = elf_writer->StartText();
+      bool text_ok = oat_writer.WriteCode(text);
+      ASSERT_TRUE(text_ok);
+      elf_writer->EndText(text);
+
+      bool header_ok = oat_writer.WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u);
+      ASSERT_TRUE(header_ok);
+
+      writer->UpdateOatFileHeader(/* oat_index */ 0u, oat_writer.GetOatHeader());
+
+      elf_writer->WriteDynamicSection();
+      elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
+      elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations());
+
+      bool success = elf_writer->End();
       ASSERT_TRUE(success);
     }
   }
@@ -96,10 +173,14 @@
   ASSERT_TRUE(dup_oat.get() != nullptr);
 
   {
-    bool success_image =
-        writer->Write(image_file.GetFilename(), dup_oat->GetPath(), dup_oat->GetPath());
+    std::vector<const char*> dup_oat_filename(1, dup_oat->GetPath().c_str());
+    std::vector<const char*> dup_image_filename(1, image_file.GetFilename().c_str());
+    bool success_image = writer->Write(kInvalidFd,
+                                       dup_image_filename,
+                                       dup_oat_filename);
     ASSERT_TRUE(success_image);
-    bool success_fixup = ElfWriter::Fixup(dup_oat.get(), writer->GetOatDataBegin());
+    bool success_fixup = ElfWriter::Fixup(dup_oat.get(),
+                                          writer->GetOatDataBegin(0));
     ASSERT_TRUE(success_fixup);
 
     ASSERT_EQ(dup_oat->FlushCloseOrErase(), 0) << "Could not flush and close oat file "
@@ -107,6 +188,7 @@
   }
 
   uint64_t image_file_size;
+  size_t image_size;
   {
     std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str()));
     ASSERT_TRUE(file.get() != nullptr);
@@ -118,13 +200,14 @@
     ASSERT_NE(0U, bitmap_section.Size());
 
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    ASSERT_TRUE(!heap->GetContinuousSpaces().empty());
+    ASSERT_TRUE(heap->HaveContinuousSpaces());
     gc::space::ContinuousSpace* space = heap->GetNonMovingSpace();
     ASSERT_FALSE(space->IsImageSpace());
     ASSERT_TRUE(space != nullptr);
     ASSERT_TRUE(space->IsMallocSpace());
 
     image_file_size = file->GetLength();
+    image_size = image_header.GetImageSize();
   }
 
   ASSERT_TRUE(compiler_driver_->GetImageClasses() != nullptr);
@@ -144,7 +227,7 @@
   java_lang_dex_file_ = nullptr;
 
   MemMap::Init();
-  std::unique_ptr<const DexFile> dex(LoadExpectSingleDexFile(GetLibCoreDexFileName().c_str()));
+  std::unique_ptr<const DexFile> dex(LoadExpectSingleDexFile(GetLibCoreDexFileNames()[0].c_str()));
 
   RuntimeOptions options;
   std::string image("-Ximage:");
@@ -166,12 +249,19 @@
   class_linker_ = runtime_->GetClassLinker();
 
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  ASSERT_TRUE(heap->HasImageSpace());
+  ASSERT_TRUE(heap->HasBootImageSpace());
   ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace());
 
-  gc::space::ImageSpace* image_space = heap->GetImageSpace();
+  // We loaded the runtime with an explicit image, so it must exist.
+  gc::space::ImageSpace* image_space = heap->GetBootImageSpaces()[0];
   ASSERT_TRUE(image_space != nullptr);
-  ASSERT_LE(image_space->Size(), image_file_size);
+  if (storage_mode == ImageHeader::kStorageModeUncompressed) {
+    // Uncompressed, image should be smaller than file.
+    ASSERT_LE(image_size, image_file_size);
+  } else {
+    // Compressed, file should be smaller than image.
+    ASSERT_LE(image_file_size, image_size);
+  }
 
   image_space->VerifyImageAllocations();
   uint8_t* image_begin = image_space->Begin();
@@ -199,6 +289,19 @@
   CHECK_EQ(0, rmdir_result);
 }
 
+TEST_F(ImageTest, WriteReadUncompressed) {
+  TestWriteRead(ImageHeader::kStorageModeUncompressed);
+}
+
+TEST_F(ImageTest, WriteReadLZ4) {
+  TestWriteRead(ImageHeader::kStorageModeLZ4);
+}
+
+TEST_F(ImageTest, WriteReadLZ4HC) {
+  TestWriteRead(ImageHeader::kStorageModeLZ4HC);
+}
+
+
 TEST_F(ImageTest, ImageHeaderIsValid) {
     uint32_t image_begin = ART_BASE_ADDRESS;
     uint32_t image_size_ = 16 * KB;
@@ -218,9 +321,17 @@
                              oat_data_begin,
                              oat_data_end,
                              oat_file_end,
+                             /*boot_image_begin*/0U,
+                             /*boot_image_size*/0U,
+                             /*boot_oat_begin*/0U,
+                             /*boot_oat_size_*/0U,
                              sizeof(void*),
-                             /*compile_pic*/false);
+                             /*compile_pic*/false,
+                             /*is_pic*/false,
+                             ImageHeader::kDefaultStorageMode,
+                             /*data_size*/0u);
     ASSERT_TRUE(image_header.IsValid());
+    ASSERT_TRUE(!image_header.IsAppImage());
 
     char* magic = const_cast<char*>(image_header.GetMagic());
     strcpy(magic, "");  // bad magic
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 4310be6..bb45999 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -17,9 +17,12 @@
 #include "image_writer.h"
 
 #include <sys/stat.h>
+#include <lz4.h>
+#include <lz4hc.h>
 
 #include <memory>
 #include <numeric>
+#include <unordered_set>
 #include <vector>
 
 #include "art_field-inl.h"
@@ -36,6 +39,7 @@
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap.h"
 #include "gc/accounting/space_bitmap-inl.h"
+#include "gc/collector/concurrent_copying.h"
 #include "gc/heap.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
@@ -48,6 +52,7 @@
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
+#include "mirror/dex_cache.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
@@ -72,6 +77,39 @@
 // Separate objects into multiple bins to optimize dirty memory use.
 static constexpr bool kBinObjects = true;
 
+// Return true if an object is already in an image space.
+bool ImageWriter::IsInBootImage(const void* obj) const {
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
+  if (!compile_app_image_) {
+    DCHECK(heap->GetBootImageSpaces().empty());
+    return false;
+  }
+  for (gc::space::ImageSpace* boot_image_space : heap->GetBootImageSpaces()) {
+    const uint8_t* image_begin = boot_image_space->Begin();
+    // Real image end including ArtMethods and ArtField sections.
+    const uint8_t* image_end = image_begin + boot_image_space->GetImageHeader().GetImageSize();
+    if (image_begin <= obj && obj < image_end) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool ImageWriter::IsInBootOatFile(const void* ptr) const {
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
+  if (!compile_app_image_) {
+    DCHECK(heap->GetBootImageSpaces().empty());
+    return false;
+  }
+  for (gc::space::ImageSpace* boot_image_space : heap->GetBootImageSpaces()) {
+    const ImageHeader& image_header = boot_image_space->GetImageHeader();
+    if (image_header.GetOatFileBegin() <= ptr && ptr < image_header.GetOatFileEnd()) {
+      return true;
+    }
+  }
+  return false;
+}
+
 static void CheckNoDexObjectsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   Class* klass = obj->GetClass();
@@ -85,12 +123,15 @@
 
 bool ImageWriter::PrepareImageAddressSpace() {
   target_ptr_size_ = InstructionSetPointerSize(compiler_driver_.GetInstructionSet());
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
   {
     ScopedObjectAccess soa(Thread::Current());
     PruneNonImageClasses();  // Remove junk
-    ComputeLazyFieldsForImageClasses();  // Add useful information
+    if (!compile_app_image_) {
+      // Avoid for app image since this may increase RAM and image size.
+      ComputeLazyFieldsForImageClasses();  // Add useful information
+    }
   }
-  gc::Heap* heap = Runtime::Current()->GetHeap();
   heap->CollectGarbage(false);  // Remove garbage.
 
   // Dex caches must not have their dex fields set in the image. These are memory buffers of mapped
@@ -122,97 +163,175 @@
   return true;
 }
 
-bool ImageWriter::Write(const std::string& image_filename,
-                        const std::string& oat_filename,
-                        const std::string& oat_location) {
-  CHECK(!image_filename.empty());
-
-  std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
-  if (oat_file.get() == nullptr) {
-    PLOG(ERROR) << "Failed to open oat file " << oat_filename << " for " << oat_location;
-    return false;
+bool ImageWriter::Write(int image_fd,
+                        const std::vector<const char*>& image_filenames,
+                        const std::vector<const char*>& oat_filenames) {
+  // If image_fd or oat_fd are not kInvalidFd then we may have empty strings in image_filenames or
+  // oat_filenames.
+  CHECK(!image_filenames.empty());
+  if (image_fd != kInvalidFd) {
+    CHECK_EQ(image_filenames.size(), 1u);
   }
-  std::string error_msg;
-  oat_file_ = OatFile::OpenReadable(oat_file.get(), oat_location, nullptr, &error_msg);
-  if (oat_file_ == nullptr) {
-    PLOG(ERROR) << "Failed to open writable oat file " << oat_filename << " for " << oat_location
-        << ": " << error_msg;
-    oat_file->Erase();
-    return false;
-  }
-  Runtime::Current()->GetOatFileManager().RegisterOatFile(
-      std::unique_ptr<const OatFile>(oat_file_));
-
-  interpreter_to_interpreter_bridge_offset_ =
-      oat_file_->GetOatHeader().GetInterpreterToInterpreterBridgeOffset();
-  interpreter_to_compiled_code_bridge_offset_ =
-      oat_file_->GetOatHeader().GetInterpreterToCompiledCodeBridgeOffset();
-
-  jni_dlsym_lookup_offset_ = oat_file_->GetOatHeader().GetJniDlsymLookupOffset();
-
-  quick_generic_jni_trampoline_offset_ =
-      oat_file_->GetOatHeader().GetQuickGenericJniTrampolineOffset();
-  quick_imt_conflict_trampoline_offset_ =
-      oat_file_->GetOatHeader().GetQuickImtConflictTrampolineOffset();
-  quick_resolution_trampoline_offset_ =
-      oat_file_->GetOatHeader().GetQuickResolutionTrampolineOffset();
-  quick_to_interpreter_bridge_offset_ =
-      oat_file_->GetOatHeader().GetQuickToInterpreterBridgeOffset();
-
-  size_t oat_loaded_size = 0;
-  size_t oat_data_offset = 0;
-  ElfWriter::GetOatElfInformation(oat_file.get(), &oat_loaded_size, &oat_data_offset);
+  CHECK(!oat_filenames.empty());
+  CHECK_EQ(image_filenames.size(), oat_filenames.size());
 
   {
     ScopedObjectAccess soa(Thread::Current());
-    CreateHeader(oat_loaded_size, oat_data_offset);
-    CopyAndFixupNativeData();
+    for (size_t i = 0; i < oat_filenames.size(); ++i) {
+      CreateHeader(i);
+      CopyAndFixupNativeData(i);
+    }
+  }
+
+  {
     // TODO: heap validation can't handle these fix up passes.
+    ScopedObjectAccess soa(Thread::Current());
     Runtime::Current()->GetHeap()->DisableObjectValidation();
     CopyAndFixupObjects();
   }
 
-  SetOatChecksumFromElfFile(oat_file.get());
+  for (size_t i = 0; i < image_filenames.size(); ++i) {
+    const char* image_filename = image_filenames[i];
+    ImageInfo& image_info = GetImageInfo(i);
+    std::unique_ptr<File> image_file;
+    if (image_fd != kInvalidFd) {
+      if (strlen(image_filename) == 0u) {
+        image_file.reset(new File(image_fd, unix_file::kCheckSafeUsage));
+        // Empty the file in case it already exists.
+        if (image_file != nullptr) {
+          TEMP_FAILURE_RETRY(image_file->SetLength(0));
+          TEMP_FAILURE_RETRY(image_file->Flush());
+        }
+      } else {
+        LOG(ERROR) << "image fd " << image_fd << " name " << image_filename;
+      }
+    } else {
+      image_file.reset(OS::CreateEmptyFile(image_filename));
+    }
 
-  if (oat_file->FlushCloseOrErase() != 0) {
-    LOG(ERROR) << "Failed to flush and close oat file " << oat_filename << " for " << oat_location;
-    return false;
-  }
+    if (image_file == nullptr) {
+      LOG(ERROR) << "Failed to open image file " << image_filename;
+      return false;
+    }
 
-  std::unique_ptr<File> image_file(OS::CreateEmptyFile(image_filename.c_str()));
-  ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
-  if (image_file.get() == nullptr) {
-    LOG(ERROR) << "Failed to open image file " << image_filename;
-    return false;
-  }
-  if (fchmod(image_file->Fd(), 0644) != 0) {
-    PLOG(ERROR) << "Failed to make image file world readable: " << image_filename;
-    image_file->Erase();
-    return EXIT_FAILURE;
-  }
+    if (!compile_app_image_ && fchmod(image_file->Fd(), 0644) != 0) {
+      PLOG(ERROR) << "Failed to make image file world readable: " << image_filename;
+      image_file->Erase();
+      return EXIT_FAILURE;
+    }
 
-  // Write out the image + fields + methods.
-  const auto write_count = image_header->GetImageSize();
-  if (!image_file->WriteFully(image_->Begin(), write_count)) {
-    PLOG(ERROR) << "Failed to write image file " << image_filename;
-    image_file->Erase();
-    return false;
-  }
+    std::unique_ptr<char[]> compressed_data;
+    // Image data size excludes the bitmap and the header.
+    ImageHeader* const image_header = reinterpret_cast<ImageHeader*>(image_info.image_->Begin());
+    const size_t image_data_size = image_header->GetImageSize() - sizeof(ImageHeader);
+    char* image_data = reinterpret_cast<char*>(image_info.image_->Begin()) + sizeof(ImageHeader);
+    size_t data_size;
+    const char* image_data_to_write;
+    const uint64_t compress_start_time = NanoTime();
 
-  // Write out the image bitmap at the page aligned start of the image end.
-  const ImageSection& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap);
-  CHECK_ALIGNED(bitmap_section.Offset(), kPageSize);
-  if (!image_file->Write(reinterpret_cast<char*>(image_bitmap_->Begin()),
-                         bitmap_section.Size(), bitmap_section.Offset())) {
-    PLOG(ERROR) << "Failed to write image file " << image_filename;
-    image_file->Erase();
-    return false;
-  }
+    CHECK_EQ(image_header->storage_mode_, image_storage_mode_);
+    switch (image_storage_mode_) {
+      case ImageHeader::kStorageModeLZ4HC:  // Fall-through.
+      case ImageHeader::kStorageModeLZ4: {
+        const size_t compressed_max_size = LZ4_compressBound(image_data_size);
+        compressed_data.reset(new char[compressed_max_size]);
+        data_size = LZ4_compress(
+            reinterpret_cast<char*>(image_info.image_->Begin()) + sizeof(ImageHeader),
+            &compressed_data[0],
+            image_data_size);
 
-  CHECK_EQ(bitmap_section.End(), static_cast<size_t>(image_file->GetLength()));
-  if (image_file->FlushCloseOrErase() != 0) {
-    PLOG(ERROR) << "Failed to flush and close image file " << image_filename;
-    return false;
+        break;
+      }
+      /*
+       * Disabled due to image_test64 flakyness. Both use same decompression. b/27560444
+      case ImageHeader::kStorageModeLZ4HC: {
+        // Bound is same as non HC.
+        const size_t compressed_max_size = LZ4_compressBound(image_data_size);
+        compressed_data.reset(new char[compressed_max_size]);
+        data_size = LZ4_compressHC(
+            reinterpret_cast<char*>(image_info.image_->Begin()) + sizeof(ImageHeader),
+            &compressed_data[0],
+            image_data_size);
+        break;
+      }
+      */
+      case ImageHeader::kStorageModeUncompressed: {
+        data_size = image_data_size;
+        image_data_to_write = image_data;
+        break;
+      }
+      default: {
+        LOG(FATAL) << "Unsupported";
+        UNREACHABLE();
+      }
+    }
+
+    if (compressed_data != nullptr) {
+      image_data_to_write = &compressed_data[0];
+      VLOG(compiler) << "Compressed from " << image_data_size << " to " << data_size << " in "
+                     << PrettyDuration(NanoTime() - compress_start_time);
+      if (kIsDebugBuild) {
+        std::unique_ptr<uint8_t[]> temp(new uint8_t[image_data_size]);
+        const size_t decompressed_size = LZ4_decompress_safe(
+            reinterpret_cast<char*>(&compressed_data[0]),
+            reinterpret_cast<char*>(&temp[0]),
+            data_size,
+            image_data_size);
+        CHECK_EQ(decompressed_size, image_data_size);
+        CHECK_EQ(memcmp(image_data, &temp[0], image_data_size), 0) << image_storage_mode_;
+      }
+    }
+
+    // Write out the image + fields + methods.
+    const bool is_compressed = compressed_data != nullptr;
+    if (!image_file->PwriteFully(image_data_to_write, data_size, sizeof(ImageHeader))) {
+      PLOG(ERROR) << "Failed to write image file data " << image_filename;
+      image_file->Erase();
+      return false;
+    }
+
+    // Write out the image bitmap at the page aligned start of the image end, also uncompressed for
+    // convenience.
+    const ImageSection& bitmap_section = image_header->GetImageSection(
+        ImageHeader::kSectionImageBitmap);
+    // Align up since data size may be unaligned if the image is compressed.
+    size_t bitmap_position_in_file = RoundUp(sizeof(ImageHeader) + data_size, kPageSize);
+    if (!is_compressed) {
+      CHECK_EQ(bitmap_position_in_file, bitmap_section.Offset());
+    }
+    if (!image_file->PwriteFully(reinterpret_cast<char*>(image_info.image_bitmap_->Begin()),
+                                 bitmap_section.Size(),
+                                 bitmap_position_in_file)) {
+      PLOG(ERROR) << "Failed to write image file " << image_filename;
+      image_file->Erase();
+      return false;
+    }
+
+    int err = image_file->Flush();
+    if (err < 0) {
+      PLOG(ERROR) << "Failed to flush image file " << image_filename << " with result " << err;
+      image_file->Erase();
+      return false;
+    }
+
+    // Write header last in case the compiler gets killed in the middle of image writing.
+    // We do not want to have a corrupted image with a valid header.
+    // The header is uncompressed since it contains whether the image is compressed or not.
+    image_header->data_size_ = data_size;
+    if (!image_file->PwriteFully(reinterpret_cast<char*>(image_info.image_->Begin()),
+                                 sizeof(ImageHeader),
+                                 0)) {
+      PLOG(ERROR) << "Failed to write image file header " << image_filename;
+      image_file->Erase();
+      return false;
+    }
+
+    CHECK_EQ(bitmap_position_in_file + bitmap_section.Size(),
+             static_cast<size_t>(image_file->GetLength()));
+    if (image_file->FlushCloseOrErase() != 0) {
+      PLOG(ERROR) << "Failed to flush and close image file " << image_filename;
+      return false;
+    }
   }
   return true;
 }
@@ -237,12 +356,14 @@
   DCHECK(object != nullptr);
   DCHECK_NE(image_objects_offset_begin_, 0u);
 
-  size_t bin_slot_offset = bin_slot_offsets_[bin_slot.GetBin()];
+  size_t oat_index = GetOatIndex(object);
+  ImageInfo& image_info = GetImageInfo(oat_index);
+  size_t bin_slot_offset = image_info.bin_slot_offsets_[bin_slot.GetBin()];
   size_t new_offset = bin_slot_offset + bin_slot.GetIndex();
   DCHECK_ALIGNED(new_offset, kObjectAlignment);
 
   SetImageOffset(object, new_offset);
-  DCHECK_LT(new_offset, image_end_);
+  DCHECK_LT(new_offset, image_info.image_end_);
 }
 
 bool ImageWriter::IsImageOffsetAssigned(mirror::Object* object) const {
@@ -256,7 +377,9 @@
   DCHECK(IsImageOffsetAssigned(object));
   LockWord lock_word = object->GetLockWord(false);
   size_t offset = lock_word.ForwardingAddress();
-  DCHECK_LT(offset, image_end_);
+  size_t oat_index = GetOatIndex(object);
+  const ImageInfo& image_info = GetImageInfo(oat_index);
+  DCHECK_LT(offset, image_info.image_end_);
   return offset;
 }
 
@@ -294,40 +417,59 @@
 }
 
 void ImageWriter::PrepareDexCacheArraySlots() {
+  // Prepare dex cache array starts based on the ordering specified in the CompilerDriver.
+  // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned()
+  // when AssignImageBinSlot() assigns their indexes out or order.
+  for (const DexFile* dex_file : compiler_driver_.GetDexFilesForOatFile()) {
+    auto it = dex_file_oat_index_map_.find(dex_file);
+    DCHECK(it != dex_file_oat_index_map_.end()) << dex_file->GetLocation();
+    ImageInfo& image_info = GetImageInfo(it->second);
+    image_info.dex_cache_array_starts_.Put(dex_file, image_info.bin_slot_sizes_[kBinDexCacheArray]);
+    DexCacheArraysLayout layout(target_ptr_size_, dex_file);
+    image_info.bin_slot_sizes_[kBinDexCacheArray] += layout.Size();
+  }
+
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   Thread* const self = Thread::Current();
   ReaderMutexLock mu(self, *class_linker->DexLock());
-  uint32_t size = 0u;
-  for (jobject weak_root : class_linker->GetDexCaches()) {
+  for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
     mirror::DexCache* dex_cache =
-        down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
-    if (dex_cache == nullptr) {
+        down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+    if (dex_cache == nullptr || IsInBootImage(dex_cache)) {
       continue;
     }
     const DexFile* dex_file = dex_cache->GetDexFile();
-    dex_cache_array_starts_.Put(dex_file, size);
+    CHECK(dex_file_oat_index_map_.find(dex_file) != dex_file_oat_index_map_.end())
+        << "Dex cache should have been pruned " << dex_file->GetLocation()
+        << "; possibly in class path";
     DexCacheArraysLayout layout(target_ptr_size_, dex_file);
     DCHECK(layout.Valid());
+    size_t oat_index = GetOatIndexForDexCache(dex_cache);
+    ImageInfo& image_info = GetImageInfo(oat_index);
+    uint32_t start = image_info.dex_cache_array_starts_.Get(dex_file);
     DCHECK_EQ(dex_file->NumTypeIds() != 0u, dex_cache->GetResolvedTypes() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(), size + layout.TypesOffset());
+    AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(),
+                               start + layout.TypesOffset(),
+                               dex_cache);
     DCHECK_EQ(dex_file->NumMethodIds() != 0u, dex_cache->GetResolvedMethods() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetResolvedMethods(), size + layout.MethodsOffset());
+    AddDexCacheArrayRelocation(dex_cache->GetResolvedMethods(),
+                               start + layout.MethodsOffset(),
+                               dex_cache);
     DCHECK_EQ(dex_file->NumFieldIds() != 0u, dex_cache->GetResolvedFields() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetResolvedFields(), size + layout.FieldsOffset());
+    AddDexCacheArrayRelocation(dex_cache->GetResolvedFields(),
+                               start + layout.FieldsOffset(),
+                               dex_cache);
     DCHECK_EQ(dex_file->NumStringIds() != 0u, dex_cache->GetStrings() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetStrings(), size + layout.StringsOffset());
-    size += layout.Size();
+    AddDexCacheArrayRelocation(dex_cache->GetStrings(), start + layout.StringsOffset(), dex_cache);
   }
-  // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned()
-  // when AssignImageBinSlot() assigns their indexes out or order.
-  bin_slot_sizes_[kBinDexCacheArray] = size;
 }
 
-void ImageWriter::AddDexCacheArrayRelocation(void* array, size_t offset) {
+void ImageWriter::AddDexCacheArrayRelocation(void* array, size_t offset, DexCache* dex_cache) {
   if (array != nullptr) {
-    native_object_relocations_.emplace(
-        array,
-        NativeObjectRelocation { offset, kNativeObjectRelocationTypeDexCacheArray });
+    DCHECK(!IsInBootImage(array));
+    size_t oat_index = GetOatIndexForDexCache(dex_cache);
+    native_object_relocations_.emplace(array,
+        NativeObjectRelocation { oat_index, offset, kNativeObjectRelocationTypeDexCacheArray });
   }
 }
 
@@ -335,11 +477,11 @@
   DCHECK(arr != nullptr);
   if (kIsDebugBuild) {
     for (size_t i = 0, len = arr->GetLength(); i < len; i++) {
-      auto* method = arr->GetElementPtrSize<ArtMethod*>(i, target_ptr_size_);
+      ArtMethod* method = arr->GetElementPtrSize<ArtMethod*>(i, target_ptr_size_);
       if (method != nullptr && !method->IsRuntimeMethod()) {
-        auto* klass = method->GetDeclaringClass();
-        CHECK(klass == nullptr || IsImageClass(klass)) << PrettyClass(klass)
-            << " should be an image class";
+        mirror::Class* klass = method->GetDeclaringClass();
+        CHECK(klass == nullptr || KeepClass(klass))
+            << PrettyClass(klass) << " should be a kept class";
       }
     }
   }
@@ -439,21 +581,33 @@
       }
     } else if (object->GetClass<kVerifyNone>()->IsStringClass()) {
       bin = kBinString;  // Strings are almost always immutable (except for object header).
-    }  // else bin = kBinRegular
+    } else if (object->GetClass<kVerifyNone>() ==
+        Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kJavaLangObject)) {
+      // Instance of java lang object, probably a lock object. This means it will be dirty when we
+      // synchronize on it.
+      bin = kBinMiscDirty;
+    } else if (object->IsDexCache()) {
+      // Dex file field becomes dirty when the image is loaded.
+      bin = kBinMiscDirty;
+    }
+    // else bin = kBinRegular
   }
 
+  size_t oat_index = GetOatIndex(object);
+  ImageInfo& image_info = GetImageInfo(oat_index);
+
   size_t offset_delta = RoundUp(object_size, kObjectAlignment);  // 64-bit alignment
-  current_offset = bin_slot_sizes_[bin];  // How many bytes the current bin is at (aligned).
-  // Move the current bin size up to accomodate the object we just assigned a bin slot.
-  bin_slot_sizes_[bin] += offset_delta;
+  current_offset = image_info.bin_slot_sizes_[bin];  // How many bytes the current bin is at (aligned).
+  // Move the current bin size up to accommodate the object we just assigned a bin slot.
+  image_info.bin_slot_sizes_[bin] += offset_delta;
 
   BinSlot new_bin_slot(bin, current_offset);
   SetImageBinSlot(object, new_bin_slot);
 
-  ++bin_slot_count_[bin];
+  ++image_info.bin_slot_count_[bin];
 
   // Grow the image closer to the end by the object we just assigned.
-  image_end_ += offset_delta;
+  image_info.image_end_ += offset_delta;
 }
 
 bool ImageWriter::WillMethodBeDirty(ArtMethod* m) const {
@@ -476,8 +630,10 @@
     LockWord lock_word = object->GetLockWord(false);
     size_t offset = lock_word.ForwardingAddress();
     BinSlot bin_slot(offset);
-    DCHECK_LT(bin_slot.GetIndex(), bin_slot_sizes_[bin_slot.GetBin()])
-      << "bin slot offset should not exceed the size of that bin";
+    size_t oat_index = GetOatIndex(object);
+    const ImageInfo& image_info = GetImageInfo(oat_index);
+    DCHECK_LT(bin_slot.GetIndex(), image_info.bin_slot_sizes_[bin_slot.GetBin()])
+        << "bin slot offset should not exceed the size of that bin";
   }
   return true;
 }
@@ -491,36 +647,47 @@
   DCHECK_LE(offset, std::numeric_limits<uint32_t>::max());
 
   BinSlot bin_slot(static_cast<uint32_t>(offset));
-  DCHECK_LT(bin_slot.GetIndex(), bin_slot_sizes_[bin_slot.GetBin()]);
+  size_t oat_index = GetOatIndex(object);
+  const ImageInfo& image_info = GetImageInfo(oat_index);
+  DCHECK_LT(bin_slot.GetIndex(), image_info.bin_slot_sizes_[bin_slot.GetBin()]);
 
   return bin_slot;
 }
 
 bool ImageWriter::AllocMemory() {
-  const size_t length = RoundUp(image_objects_offset_begin_ + GetBinSizeSum() + intern_table_bytes_,
-                                kPageSize);
-  std::string error_msg;
-  image_.reset(MemMap::MapAnonymous("image writer image", nullptr, length, PROT_READ | PROT_WRITE,
-                                    false, false, &error_msg));
-  if (UNLIKELY(image_.get() == nullptr)) {
-    LOG(ERROR) << "Failed to allocate memory for image file generation: " << error_msg;
-    return false;
-  }
+  for (ImageInfo& image_info : image_infos_) {
+    ImageSection unused_sections[ImageHeader::kSectionCount];
+    const size_t length = RoundUp(
+        image_info.CreateImageSections(unused_sections), kPageSize);
 
-  // Create the image bitmap, only needs to cover mirror object section which is up to image_end_.
-  CHECK_LE(image_end_, length);
-  image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create(
-      "image bitmap", image_->Begin(), RoundUp(image_end_, kPageSize)));
-  if (image_bitmap_.get() == nullptr) {
-    LOG(ERROR) << "Failed to allocate memory for image bitmap";
-    return false;
+    std::string error_msg;
+    image_info.image_.reset(MemMap::MapAnonymous("image writer image",
+                                                 nullptr,
+                                                 length,
+                                                 PROT_READ | PROT_WRITE,
+                                                 false,
+                                                 false,
+                                                 &error_msg));
+    if (UNLIKELY(image_info.image_.get() == nullptr)) {
+      LOG(ERROR) << "Failed to allocate memory for image file generation: " << error_msg;
+      return false;
+    }
+
+    // Create the image bitmap, only needs to cover mirror object section which is up to image_end_.
+    CHECK_LE(image_info.image_end_, length);
+    image_info.image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create(
+        "image bitmap", image_info.image_->Begin(), RoundUp(image_info.image_end_, kPageSize)));
+    if (image_info.image_bitmap_.get() == nullptr) {
+      LOG(ERROR) << "Failed to allocate memory for image bitmap";
+      return false;
+    }
   }
   return true;
 }
 
 class ComputeLazyFieldsForClassesVisitor : public ClassVisitor {
  public:
-  bool Visit(Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     StackHandleScope<1> hs(Thread::Current());
     mirror::Class::ComputeName(hs.NewHandle(c));
     return true;
@@ -533,45 +700,168 @@
   class_linker->VisitClassesWithoutClassesLock(&visitor);
 }
 
-bool ImageWriter::IsImageClass(Class* klass) {
+static bool IsBootClassLoaderClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
+  return klass->GetClassLoader() == nullptr;
+}
+
+bool ImageWriter::IsBootClassLoaderNonImageClass(mirror::Class* klass) {
+  return IsBootClassLoaderClass(klass) && !IsInBootImage(klass);
+}
+
+bool ImageWriter::PruneAppImageClass(mirror::Class* klass) {
+  bool early_exit = false;
+  std::unordered_set<mirror::Class*> visited;
+  return PruneAppImageClassInternal(klass, &early_exit, &visited);
+}
+
+bool ImageWriter::PruneAppImageClassInternal(
+    mirror::Class* klass,
+    bool* early_exit,
+    std::unordered_set<mirror::Class*>* visited) {
+  DCHECK(early_exit != nullptr);
+  DCHECK(visited != nullptr);
+  DCHECK(compile_app_image_);
+  if (klass == nullptr || IsInBootImage(klass)) {
+    return false;
+  }
+  auto found = prune_class_memo_.find(klass);
+  if (found != prune_class_memo_.end()) {
+    // Already computed, return the found value.
+    return found->second;
+  }
+  // Circular dependencies, return false but do not store the result in the memoization table.
+  if (visited->find(klass) != visited->end()) {
+    *early_exit = true;
+    return false;
+  }
+  visited->emplace(klass);
+  bool result = IsBootClassLoaderClass(klass);
+  std::string temp;
+  // Prune if not an image class, this handles any broken sets of image classes such as having a
+  // class in the set but not it's superclass.
+  result = result || !compiler_driver_.IsImageClass(klass->GetDescriptor(&temp));
+  bool my_early_exit = false;  // Only for ourselves, ignore caller.
+  // Remove classes that failed to verify since we don't want to have java.lang.VerifyError in the
+  // app image.
+  if (klass->GetStatus() == mirror::Class::kStatusError) {
+    result = true;
+  } else {
+    CHECK(klass->GetVerifyError() == nullptr) << PrettyClass(klass);
+  }
+  if (!result) {
+    // Check interfaces since these wont be visited through VisitReferences.)
+    mirror::IfTable* if_table = klass->GetIfTable();
+    for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) {
+      result = result || PruneAppImageClassInternal(if_table->GetInterface(i),
+                                                    &my_early_exit,
+                                                    visited);
+    }
+  }
+  if (klass->IsObjectArrayClass()) {
+    result = result || PruneAppImageClassInternal(klass->GetComponentType(),
+                                                  &my_early_exit,
+                                                  visited);
+  }
+  // Check static fields and their classes.
+  size_t num_static_fields = klass->NumReferenceStaticFields();
+  if (num_static_fields != 0 && klass->IsResolved()) {
+    // Presumably GC can happen when we are cross compiling, it should not cause performance
+    // problems to do pointer size logic.
+    MemberOffset field_offset = klass->GetFirstReferenceStaticFieldOffset(
+        Runtime::Current()->GetClassLinker()->GetImagePointerSize());
+    for (size_t i = 0u; i < num_static_fields; ++i) {
+      mirror::Object* ref = klass->GetFieldObject<mirror::Object>(field_offset);
+      if (ref != nullptr) {
+        if (ref->IsClass()) {
+          result = result || PruneAppImageClassInternal(ref->AsClass(),
+                                                        &my_early_exit,
+                                                        visited);
+        } else {
+          result = result || PruneAppImageClassInternal(ref->GetClass(),
+                                                        &my_early_exit,
+                                                        visited);
+        }
+      }
+      field_offset = MemberOffset(field_offset.Uint32Value() +
+                                  sizeof(mirror::HeapReference<mirror::Object>));
+    }
+  }
+  result = result || PruneAppImageClassInternal(klass->GetSuperClass(),
+                                                &my_early_exit,
+                                                visited);
+  // Erase the element we stored earlier since we are exiting the function.
+  auto it = visited->find(klass);
+  DCHECK(it != visited->end());
+  visited->erase(it);
+  // Only store result if it is true or none of the calls early exited due to circular
+  // dependencies. If visited is empty then we are the root caller, in this case the cycle was in
+  // a child call and we can remember the result.
+  if (result == true || !my_early_exit || visited->empty()) {
+    prune_class_memo_[klass] = result;
+  }
+  *early_exit |= my_early_exit;
+  return result;
+}
+
+bool ImageWriter::KeepClass(Class* klass) {
   if (klass == nullptr) {
     return false;
   }
+  if (compile_app_image_ && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
+    // Already in boot image, return true.
+    return true;
+  }
   std::string temp;
-  return compiler_driver_.IsImageClass(klass->GetDescriptor(&temp));
+  if (!compiler_driver_.IsImageClass(klass->GetDescriptor(&temp))) {
+    return false;
+  }
+  if (compile_app_image_) {
+    // For app images, we need to prune boot loader classes that are not in the boot image since
+    // these may have already been loaded when the app image is loaded.
+    // Keep classes in the boot image space since we don't want to re-resolve these.
+    return !PruneAppImageClass(klass);
+  }
+  return true;
 }
 
 class NonImageClassesVisitor : public ClassVisitor {
  public:
   explicit NonImageClassesVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
 
-  bool Visit(Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (!image_writer_->IsImageClass(klass)) {
-      std::string temp;
-      non_image_classes_.insert(klass->GetDescriptor(&temp));
+  bool operator()(Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!image_writer_->KeepClass(klass)) {
+      classes_to_prune_.insert(klass);
     }
     return true;
   }
 
-  std::set<std::string> non_image_classes_;
+  std::unordered_set<mirror::Class*> classes_to_prune_;
   ImageWriter* const image_writer_;
 };
 
 void ImageWriter::PruneNonImageClasses() {
-  if (compiler_driver_.GetImageClasses() == nullptr) {
-    return;
-  }
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
   Thread* self = Thread::Current();
 
+  // Clear class table strong roots so that dex caches can get pruned. We require pruning the class
+  // path dex caches.
+  class_linker->ClearClassTableStrongRoots();
+
   // Make a list of classes we would like to prune.
   NonImageClassesVisitor visitor(this);
   class_linker->VisitClasses(&visitor);
 
   // Remove the undesired classes from the class roots.
-  for (const std::string& it : visitor.non_image_classes_) {
-    bool result = class_linker->RemoveClass(it.c_str(), nullptr);
+  VLOG(compiler) << "Pruning " << visitor.classes_to_prune_.size() << " classes";
+  for (mirror::Class* klass : visitor.classes_to_prune_) {
+    std::string temp;
+    const char* name = klass->GetDescriptor(&temp);
+    VLOG(compiler) << "Pruning class " << name;
+    if (!compile_app_image_) {
+      DCHECK(IsBootClassLoaderClass(klass));
+    }
+    bool result = class_linker->RemoveClass(name, klass->GetClassLoader());
     DCHECK(result);
   }
 
@@ -581,14 +871,14 @@
   ScopedAssertNoThreadSuspension sa(self, __FUNCTION__);
   ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);  // For ClassInClassTable
   ReaderMutexLock mu2(self, *class_linker->DexLock());
-  for (jobject weak_root : class_linker->GetDexCaches()) {
-    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
-    if (dex_cache == nullptr) {
+  for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
+    if (self->IsJWeakCleared(data.weak_root)) {
       continue;
     }
+    mirror::DexCache* dex_cache = self->DecodeJObject(data.weak_root)->AsDexCache();
     for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
       Class* klass = dex_cache->GetResolvedType(i);
-      if (klass != nullptr && !IsImageClass(klass)) {
+      if (klass != nullptr && !KeepClass(klass)) {
         dex_cache->SetResolvedType(i, nullptr);
       }
     }
@@ -596,26 +886,26 @@
     for (size_t i = 0, num = dex_cache->NumResolvedMethods(); i != num; ++i) {
       ArtMethod* method =
           mirror::DexCache::GetElementPtrSize(resolved_methods, i, target_ptr_size_);
-      if (method != nullptr) {
-        auto* declaring_class = method->GetDeclaringClass();
-        // Miranda methods may be held live by a class which was not an image class but have a
-        // declaring class which is an image class. Set it to the resolution method to be safe and
-        // prevent dangling pointers.
-        if (method->IsMiranda() || !IsImageClass(declaring_class)) {
-          mirror::DexCache::SetElementPtrSize(resolved_methods,
-                                              i,
-                                              resolution_method,
-                                              target_ptr_size_);
-        } else {
-          // Check that the class is still in the classes table.
-          DCHECK(class_linker->ClassInClassTable(declaring_class)) << "Class "
-              << PrettyClass(declaring_class) << " not in class linker table";
-        }
+      DCHECK(method != nullptr) << "Expected resolution method instead of null method";
+      mirror::Class* declaring_class = method->GetDeclaringClass();
+      // Copied methods may be held live by a class which was not an image class but have a
+      // declaring class which is an image class. Set it to the resolution method to be safe and
+      // prevent dangling pointers.
+      if (method->IsCopied() || !KeepClass(declaring_class)) {
+        mirror::DexCache::SetElementPtrSize(resolved_methods,
+                                            i,
+                                            resolution_method,
+                                            target_ptr_size_);
+      } else {
+        // Check that the class is still in the classes table.
+        DCHECK(class_linker->ClassInClassTable(declaring_class)) << "Class "
+            << PrettyClass(declaring_class) << " not in class linker table";
       }
     }
+    ArtField** resolved_fields = dex_cache->GetResolvedFields();
     for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) {
-      ArtField* field = dex_cache->GetResolvedField(i, target_ptr_size_);
-      if (field != nullptr && !IsImageClass(field->GetDeclaringClass())) {
+      ArtField* field = mirror::DexCache::GetElementPtrSize(resolved_fields, i, target_ptr_size_);
+      if (field != nullptr && !KeepClass(field->GetDeclaringClass())) {
         dex_cache->SetResolvedField(i, nullptr, target_ptr_size_);
       }
     }
@@ -626,6 +916,9 @@
 
   // Drop the array class cache in the ClassLinker, as these are roots holding those classes live.
   class_linker->DropFindArrayClassCache();
+
+  // Clear to save RAM.
+  prune_class_memo_.clear();
 }
 
 void ImageWriter::CheckNonImageClassesRemoved() {
@@ -637,13 +930,13 @@
 
 void ImageWriter::CheckNonImageClassesRemovedCallback(Object* obj, void* arg) {
   ImageWriter* image_writer = reinterpret_cast<ImageWriter*>(arg);
-  if (obj->IsClass()) {
+  if (obj->IsClass() && !image_writer->IsInBootImage(obj)) {
     Class* klass = obj->AsClass();
-    if (!image_writer->IsImageClass(klass)) {
+    if (!image_writer->KeepClass(klass)) {
       image_writer->DumpImageClasses();
       std::string temp;
-      CHECK(image_writer->IsImageClass(klass)) << klass->GetDescriptor(&temp)
-                                               << " " << PrettyDescriptor(klass);
+      CHECK(image_writer->KeepClass(klass)) << klass->GetDescriptor(&temp)
+                                            << " " << PrettyDescriptor(klass);
     }
   }
 }
@@ -656,19 +949,50 @@
   }
 }
 
+mirror::String* ImageWriter::FindInternedString(mirror::String* string) {
+  Thread* const self = Thread::Current();
+  for (const ImageInfo& image_info : image_infos_) {
+    mirror::String* const found = image_info.intern_table_->LookupStrong(self, string);
+    DCHECK(image_info.intern_table_->LookupWeak(self, string) == nullptr)
+        << string->ToModifiedUtf8();
+    if (found != nullptr) {
+      return found;
+    }
+  }
+  if (compile_app_image_) {
+    Runtime* const runtime = Runtime::Current();
+    mirror::String* found = runtime->GetInternTable()->LookupStrong(self, string);
+    // If we found it in the runtime intern table it could either be in the boot image or interned
+    // during app image compilation. If it was in the boot image return that, otherwise return null
+    // since it belongs to another image space.
+    if (found != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(found)) {
+      return found;
+    }
+    DCHECK(runtime->GetInternTable()->LookupWeak(self, string) == nullptr)
+        << string->ToModifiedUtf8();
+  }
+  return nullptr;
+}
+
 void ImageWriter::CalculateObjectBinSlots(Object* obj) {
   DCHECK(obj != nullptr);
   // if it is a string, we want to intern it if its not interned.
   if (obj->GetClass()->IsStringClass()) {
+    size_t oat_index = GetOatIndex(obj);
+    ImageInfo& image_info = GetImageInfo(oat_index);
+
     // we must be an interned string that was forward referenced and already assigned
     if (IsImageBinSlotAssigned(obj)) {
-      DCHECK_EQ(obj, obj->AsString()->Intern());
+      DCHECK_EQ(obj, FindInternedString(obj->AsString()));
       return;
     }
-    // InternImageString allows us to intern while holding the heap bitmap lock. This is safe since
-    // we are guaranteed to not have GC during image writing.
-    mirror::String* const interned = Runtime::Current()->GetInternTable()->InternStrongImageString(
-        obj->AsString());
+    // Need to check if the string is already interned in another image info so that we don't have
+    // the intern tables of two different images contain the same string.
+    mirror::String* interned = FindInternedString(obj->AsString());
+    if (interned == nullptr) {
+      // Not in another image space, insert to our table.
+      interned = image_info.intern_table_->InternStrongImageString(obj->AsString());
+    }
     if (obj != interned) {
       if (!IsImageBinSlotAssigned(interned)) {
         // interned obj is after us, allocate its location early
@@ -684,7 +1008,7 @@
   AssignImageBinSlot(obj);
 }
 
-ObjectArray<Object>* ImageWriter::CreateImageRoots() const {
+ObjectArray<Object>* ImageWriter::CreateImageRoots(size_t oat_index) const {
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
   Thread* self = Thread::Current();
@@ -692,30 +1016,68 @@
   Handle<Class> object_array_class(hs.NewHandle(
       class_linker->FindSystemClass(self, "[Ljava/lang/Object;")));
 
+  std::unordered_set<const DexFile*> image_dex_files;
+  for (auto& pair : dex_file_oat_index_map_) {
+    const DexFile* image_dex_file = pair.first;
+    size_t image_oat_index = pair.second;
+    if (oat_index == image_oat_index) {
+      image_dex_files.insert(image_dex_file);
+    }
+  }
+
   // build an Object[] of all the DexCaches used in the source_space_.
   // Since we can't hold the dex lock when allocating the dex_caches
   // ObjectArray, we lock the dex lock twice, first to get the number
   // of dex caches first and then lock it again to copy the dex
   // caches. We check that the number of dex caches does not change.
-  size_t dex_cache_count;
+  size_t dex_cache_count = 0;
   {
     ReaderMutexLock mu(self, *class_linker->DexLock());
-    dex_cache_count = class_linker->GetDexCacheCount();
+    // Count number of dex caches not in the boot image.
+    for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
+      mirror::DexCache* dex_cache =
+          down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+      if (dex_cache == nullptr) {
+        continue;
+      }
+      const DexFile* dex_file = dex_cache->GetDexFile();
+      if (!IsInBootImage(dex_cache)) {
+        dex_cache_count += image_dex_files.find(dex_file) != image_dex_files.end() ? 1u : 0u;
+      }
+    }
   }
   Handle<ObjectArray<Object>> dex_caches(
-      hs.NewHandle(ObjectArray<Object>::Alloc(self, object_array_class.Get(),
-                                              dex_cache_count)));
+      hs.NewHandle(ObjectArray<Object>::Alloc(self, object_array_class.Get(), dex_cache_count)));
   CHECK(dex_caches.Get() != nullptr) << "Failed to allocate a dex cache array.";
   {
     ReaderMutexLock mu(self, *class_linker->DexLock());
-    CHECK_EQ(dex_cache_count, class_linker->GetDexCacheCount())
-        << "The number of dex caches changed.";
-    size_t i = 0;
-    for (jobject weak_root : class_linker->GetDexCaches()) {
+    size_t non_image_dex_caches = 0;
+    // Re-count number of non image dex caches.
+    for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
       mirror::DexCache* dex_cache =
-          down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
-      dex_caches->Set<false>(i, dex_cache);
-      ++i;
+          down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+      if (dex_cache == nullptr) {
+        continue;
+      }
+      const DexFile* dex_file = dex_cache->GetDexFile();
+      if (!IsInBootImage(dex_cache)) {
+        non_image_dex_caches += image_dex_files.find(dex_file) != image_dex_files.end() ? 1u : 0u;
+      }
+    }
+    CHECK_EQ(dex_cache_count, non_image_dex_caches)
+        << "The number of non-image dex caches changed.";
+    size_t i = 0;
+    for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
+      mirror::DexCache* dex_cache =
+          down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+      if (dex_cache == nullptr) {
+        continue;
+      }
+      const DexFile* dex_file = dex_cache->GetDexFile();
+      if (!IsInBootImage(dex_cache) && image_dex_files.find(dex_file) != image_dex_files.end()) {
+        dex_caches->Set<false>(i, dex_cache);
+        ++i;
+      }
     }
   }
 
@@ -755,6 +1117,10 @@
 
 // For an unvisited object, visit it then all its children found via fields.
 void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) {
+  if (IsInBootImage(obj)) {
+    // Object is in the image, don't need to fix it up.
+    return;
+  }
   // Use our own visitor routine (instead of GC visitor) to get better locality between
   // an object and its fields
   if (!IsImageBinSlotAssigned(obj)) {
@@ -779,9 +1145,24 @@
       }
       // Visit and assign offsets for fields and field arrays.
       auto* as_klass = h_obj->AsClass();
+      mirror::DexCache* dex_cache = as_klass->GetDexCache();
+      DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
+      if (compile_app_image_) {
+        // Extra sanity, no boot loader classes should be left!
+        CHECK(!IsBootClassLoaderClass(as_klass)) << PrettyClass(as_klass);
+      }
       LengthPrefixedArray<ArtField>* fields[] = {
           as_klass->GetSFieldsPtr(), as_klass->GetIFieldsPtr(),
       };
+      size_t oat_index = GetOatIndexForDexCache(dex_cache);
+      ImageInfo& image_info = GetImageInfo(oat_index);
+      {
+        // Note: This table is only accessed from the image writer, so the lock is technically
+        // unnecessary.
+        WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+        // Insert in the class table for this iamge.
+        image_info.class_table_->Insert(as_klass);
+      }
       for (LengthPrefixedArray<ArtField>* cur_fields : fields) {
         // Total array length including header.
         if (cur_fields != nullptr) {
@@ -790,61 +1171,85 @@
           auto it = native_object_relocations_.find(cur_fields);
           CHECK(it == native_object_relocations_.end()) << "Field array " << cur_fields
                                                   << " already forwarded";
-          size_t& offset = bin_slot_sizes_[kBinArtField];
+          size_t& offset = image_info.bin_slot_sizes_[kBinArtField];
+          DCHECK(!IsInBootImage(cur_fields));
           native_object_relocations_.emplace(
-              cur_fields, NativeObjectRelocation {
-                  offset, kNativeObjectRelocationTypeArtFieldArray });
+              cur_fields,
+              NativeObjectRelocation {
+                  oat_index, offset, kNativeObjectRelocationTypeArtFieldArray
+              });
           offset += header_size;
           // Forward individual fields so that we can quickly find where they belong.
-          for (size_t i = 0, count = cur_fields->Length(); i < count; ++i) {
+          for (size_t i = 0, count = cur_fields->size(); i < count; ++i) {
             // Need to forward arrays separate of fields.
             ArtField* field = &cur_fields->At(i);
             auto it2 = native_object_relocations_.find(field);
             CHECK(it2 == native_object_relocations_.end()) << "Field at index=" << i
                 << " already assigned " << PrettyField(field) << " static=" << field->IsStatic();
+            DCHECK(!IsInBootImage(field));
             native_object_relocations_.emplace(
-                field, NativeObjectRelocation {offset, kNativeObjectRelocationTypeArtField });
+                field,
+                NativeObjectRelocation { oat_index, offset, kNativeObjectRelocationTypeArtField });
             offset += sizeof(ArtField);
           }
         }
       }
       // Visit and assign offsets for methods.
-      LengthPrefixedArray<ArtMethod>* method_arrays[] = {
-          as_klass->GetDirectMethodsPtr(), as_klass->GetVirtualMethodsPtr(),
-      };
-      for (LengthPrefixedArray<ArtMethod>* array : method_arrays) {
-        if (array == nullptr) {
-          continue;
-        }
+      size_t num_methods = as_klass->NumMethods();
+      if (num_methods != 0) {
         bool any_dirty = false;
-        size_t count = 0;
-        const size_t method_alignment = ArtMethod::Alignment(target_ptr_size_);
-        const size_t method_size = ArtMethod::Size(target_ptr_size_);
-        auto iteration_range =
-            MakeIterationRangeFromLengthPrefixedArray(array, method_size, method_alignment);
-        for (auto& m : iteration_range) {
-          any_dirty = any_dirty || WillMethodBeDirty(&m);
-          ++count;
+        for (auto& m : as_klass->GetMethods(target_ptr_size_)) {
+          if (WillMethodBeDirty(&m)) {
+            any_dirty = true;
+            break;
+          }
         }
-        NativeObjectRelocationType type = any_dirty ? kNativeObjectRelocationTypeArtMethodDirty :
-            kNativeObjectRelocationTypeArtMethodClean;
+        NativeObjectRelocationType type = any_dirty
+            ? kNativeObjectRelocationTypeArtMethodDirty
+            : kNativeObjectRelocationTypeArtMethodClean;
         Bin bin_type = BinTypeForNativeRelocationType(type);
         // Forward the entire array at once, but header first.
+        const size_t method_alignment = ArtMethod::Alignment(target_ptr_size_);
+        const size_t method_size = ArtMethod::Size(target_ptr_size_);
         const size_t header_size = LengthPrefixedArray<ArtMethod>::ComputeSize(0,
                                                                                method_size,
                                                                                method_alignment);
+        LengthPrefixedArray<ArtMethod>* array = as_klass->GetMethodsPtr();
         auto it = native_object_relocations_.find(array);
-        CHECK(it == native_object_relocations_.end()) << "Method array " << array
-            << " already forwarded";
-        size_t& offset = bin_slot_sizes_[bin_type];
-        native_object_relocations_.emplace(array, NativeObjectRelocation { offset,
-            any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty :
-                kNativeObjectRelocationTypeArtMethodArrayClean });
+        CHECK(it == native_object_relocations_.end())
+            << "Method array " << array << " already forwarded";
+        size_t& offset = image_info.bin_slot_sizes_[bin_type];
+        DCHECK(!IsInBootImage(array));
+        native_object_relocations_.emplace(array,
+            NativeObjectRelocation {
+                oat_index,
+                offset,
+                any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty
+                          : kNativeObjectRelocationTypeArtMethodArrayClean });
         offset += header_size;
-        for (auto& m : iteration_range) {
-          AssignMethodOffset(&m, type);
+        for (auto& m : as_klass->GetMethods(target_ptr_size_)) {
+          AssignMethodOffset(&m, type, oat_index);
         }
-        (any_dirty ? dirty_methods_ : clean_methods_) += count;
+        (any_dirty ? dirty_methods_ : clean_methods_) += num_methods;
+      }
+      // Assign offsets for all runtime methods in the IMT since these may hold conflict tables
+      // live.
+      if (as_klass->ShouldHaveImt()) {
+        ImTable* imt = as_klass->GetImt(target_ptr_size_);
+        for (size_t i = 0; i < ImTable::kSize; ++i) {
+          ArtMethod* imt_method = imt->Get(i, target_ptr_size_);
+          DCHECK(imt_method != nullptr);
+          if (imt_method->IsRuntimeMethod() &&
+              !IsInBootImage(imt_method) &&
+              !NativeRelocationAssigned(imt_method)) {
+            AssignMethodOffset(imt_method, kNativeObjectRelocationTypeRuntimeMethod, oat_index);
+          }
+        }
+      }
+
+      if (as_klass->ShouldHaveImt()) {
+        ImTable* imt = as_klass->GetImt(target_ptr_size_);
+        TryAssignImTableOffset(imt, oat_index);
       }
     } else if (h_obj->IsObjectArray()) {
       // Walk elements of an object array.
@@ -856,16 +1261,69 @@
           WalkFieldsInOrder(value);
         }
       }
+    } else if (h_obj->IsClassLoader()) {
+      // Register the class loader if it has a class table.
+      // The fake boot class loader should not get registered and we should end up with only one
+      // class loader.
+      mirror::ClassLoader* class_loader = h_obj->AsClassLoader();
+      if (class_loader->GetClassTable() != nullptr) {
+        class_loaders_.insert(class_loader);
+      }
     }
   }
 }
 
-void ImageWriter::AssignMethodOffset(ArtMethod* method, NativeObjectRelocationType type) {
-  auto it = native_object_relocations_.find(method);
-  CHECK(it == native_object_relocations_.end()) << "Method " << method << " already assigned "
+bool ImageWriter::NativeRelocationAssigned(void* ptr) const {
+  return native_object_relocations_.find(ptr) != native_object_relocations_.end();
+}
+
+void ImageWriter::TryAssignImTableOffset(ImTable* imt, size_t oat_index) {
+  // No offset, or already assigned.
+  if (imt == nullptr || IsInBootImage(imt) || NativeRelocationAssigned(imt)) {
+    return;
+  }
+  // If the method is a conflict method we also want to assign the conflict table offset.
+  ImageInfo& image_info = GetImageInfo(oat_index);
+  const size_t size = ImTable::SizeInBytes(target_ptr_size_);
+  native_object_relocations_.emplace(
+      imt,
+      NativeObjectRelocation {
+          oat_index,
+          image_info.bin_slot_sizes_[kBinImTable],
+          kNativeObjectRelocationTypeIMTable});
+  image_info.bin_slot_sizes_[kBinImTable] += size;
+}
+
+void ImageWriter::TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index) {
+  // No offset, or already assigned.
+  if (table == nullptr || NativeRelocationAssigned(table)) {
+    return;
+  }
+  CHECK(!IsInBootImage(table));
+  // If the method is a conflict method we also want to assign the conflict table offset.
+  ImageInfo& image_info = GetImageInfo(oat_index);
+  const size_t size = table->ComputeSize(target_ptr_size_);
+  native_object_relocations_.emplace(
+      table,
+      NativeObjectRelocation {
+          oat_index,
+          image_info.bin_slot_sizes_[kBinIMTConflictTable],
+          kNativeObjectRelocationTypeIMTConflictTable});
+  image_info.bin_slot_sizes_[kBinIMTConflictTable] += size;
+}
+
+void ImageWriter::AssignMethodOffset(ArtMethod* method,
+                                     NativeObjectRelocationType type,
+                                     size_t oat_index) {
+  DCHECK(!IsInBootImage(method));
+  CHECK(!NativeRelocationAssigned(method)) << "Method " << method << " already assigned "
       << PrettyMethod(method);
-  size_t& offset = bin_slot_sizes_[BinTypeForNativeRelocationType(type)];
-  native_object_relocations_.emplace(method, NativeObjectRelocation { offset, type });
+  if (method->IsRuntimeMethod()) {
+    TryAssignConflictTableOffset(method->GetImtConflictTable(target_ptr_size_), oat_index);
+  }
+  ImageInfo& image_info = GetImageInfo(oat_index);
+  size_t& offset = image_info.bin_slot_sizes_[BinTypeForNativeRelocationType(type)];
+  native_object_relocations_.emplace(method, NativeObjectRelocation { oat_index, offset, type });
   offset += ArtMethod::Size(target_ptr_size_);
 }
 
@@ -878,10 +1336,13 @@
 void ImageWriter::UnbinObjectsIntoOffsetCallback(mirror::Object* obj, void* arg) {
   ImageWriter* writer = reinterpret_cast<ImageWriter*>(arg);
   DCHECK(writer != nullptr);
-  writer->UnbinObjectsIntoOffset(obj);
+  if (!writer->IsInBootImage(obj)) {
+    writer->UnbinObjectsIntoOffset(obj);
+  }
 }
 
 void ImageWriter::UnbinObjectsIntoOffset(mirror::Object* obj) {
+  DCHECK(!IsInBootImage(obj));
   CHECK(obj != nullptr);
 
   // We know the bin slot, and the total bin sizes for all objects by now,
@@ -895,156 +1356,245 @@
 
 void ImageWriter::CalculateNewObjectOffsets() {
   Thread* const self = Thread::Current();
-  StackHandleScope<1> hs(self);
-  Handle<ObjectArray<Object>> image_roots(hs.NewHandle(CreateImageRoots()));
+  StackHandleScopeCollection handles(self);
+  std::vector<Handle<ObjectArray<Object>>> image_roots;
+  for (size_t i = 0, size = oat_filenames_.size(); i != size; ++i) {
+    image_roots.push_back(handles.NewHandle(CreateImageRoots(i)));
+  }
 
   auto* runtime = Runtime::Current();
   auto* heap = runtime->GetHeap();
-  DCHECK_EQ(0U, image_end_);
 
   // Leave space for the header, but do not write it yet, we need to
   // know where image_roots is going to end up
-  image_end_ += RoundUp(sizeof(ImageHeader), kObjectAlignment);  // 64-bit-alignment
+  image_objects_offset_begin_ = RoundUp(sizeof(ImageHeader), kObjectAlignment);  // 64-bit-alignment
 
-  image_objects_offset_begin_ = image_end_;
-  // Clear any pre-existing monitors which may have been in the monitor words, assign bin slots.
-  heap->VisitObjects(WalkFieldsCallback, this);
+  const size_t method_alignment = ArtMethod::Alignment(target_ptr_size_);
   // Write the image runtime methods.
   image_methods_[ImageHeader::kResolutionMethod] = runtime->GetResolutionMethod();
   image_methods_[ImageHeader::kImtConflictMethod] = runtime->GetImtConflictMethod();
   image_methods_[ImageHeader::kImtUnimplementedMethod] = runtime->GetImtUnimplementedMethod();
-  image_methods_[ImageHeader::kCalleeSaveMethod] = runtime->GetCalleeSaveMethod(Runtime::kSaveAll);
-  image_methods_[ImageHeader::kRefsOnlySaveMethod] =
-      runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
-  image_methods_[ImageHeader::kRefsAndArgsSaveMethod] =
-      runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
-
-  // Add room for fake length prefixed array.
-  const auto image_method_type = kNativeObjectRelocationTypeArtMethodArrayClean;
-  auto it = native_object_relocations_.find(&image_method_array_);
-  CHECK(it == native_object_relocations_.end());
-  size_t& offset = bin_slot_sizes_[BinTypeForNativeRelocationType(image_method_type)];
-  native_object_relocations_.emplace(&image_method_array_,
-                                     NativeObjectRelocation { offset, image_method_type });
-  size_t method_alignment = ArtMethod::Alignment(target_ptr_size_);
-  const size_t array_size = LengthPrefixedArray<ArtMethod>::ComputeSize(
-      0, ArtMethod::Size(target_ptr_size_), method_alignment);
-  CHECK_ALIGNED_PARAM(array_size, method_alignment);
-  offset += array_size;
+  image_methods_[ImageHeader::kSaveAllCalleeSavesMethod] =
+      runtime->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves);
+  image_methods_[ImageHeader::kSaveRefsOnlyMethod] =
+      runtime->GetCalleeSaveMethod(Runtime::kSaveRefsOnly);
+  image_methods_[ImageHeader::kSaveRefsAndArgsMethod] =
+      runtime->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs);
+  image_methods_[ImageHeader::kSaveEverythingMethod] =
+      runtime->GetCalleeSaveMethod(Runtime::kSaveEverything);
+  // Visit image methods first to have the main runtime methods in the first image.
   for (auto* m : image_methods_) {
     CHECK(m != nullptr);
     CHECK(m->IsRuntimeMethod());
-    AssignMethodOffset(m, kNativeObjectRelocationTypeArtMethodClean);
+    DCHECK_EQ(compile_app_image_, IsInBootImage(m)) << "Trampolines should be in boot image";
+    if (!IsInBootImage(m)) {
+      AssignMethodOffset(m, kNativeObjectRelocationTypeRuntimeMethod, GetDefaultOatIndex());
+    }
   }
+
+  // Clear any pre-existing monitors which may have been in the monitor words, assign bin slots.
+  heap->VisitObjects(WalkFieldsCallback, this);
+
   // Calculate size of the dex cache arrays slot and prepare offsets.
   PrepareDexCacheArraySlots();
 
-  // Calculate bin slot offsets.
-  size_t bin_offset = image_objects_offset_begin_;
-  for (size_t i = 0; i != kBinSize; ++i) {
-    bin_slot_offsets_[i] = bin_offset;
-    bin_offset += bin_slot_sizes_[i];
-    if (i == kBinArtField) {
-      static_assert(kBinArtField + 1 == kBinArtMethodClean, "Methods follow fields.");
-      static_assert(alignof(ArtField) == 4u, "ArtField alignment is 4.");
-      DCHECK_ALIGNED(bin_offset, 4u);
-      DCHECK(method_alignment == 4u || method_alignment == 8u);
-      bin_offset = RoundUp(bin_offset, method_alignment);
-    }
+  // Calculate the sizes of the intern tables and class tables.
+  for (ImageInfo& image_info : image_infos_) {
+    // Calculate how big the intern table will be after being serialized.
+    InternTable* const intern_table = image_info.intern_table_.get();
+    CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings";
+    image_info.intern_table_bytes_ = intern_table->WriteToMemory(nullptr);
+    // Calculate the size of the class table.
+    ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    image_info.class_table_bytes_ += image_info.class_table_->WriteToMemory(nullptr);
   }
-  // NOTE: There may be additional padding between the bin slots and the intern table.
 
-  DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_);
+  // Calculate bin slot offsets.
+  for (ImageInfo& image_info : image_infos_) {
+    size_t bin_offset = image_objects_offset_begin_;
+    for (size_t i = 0; i != kBinSize; ++i) {
+      switch (i) {
+        case kBinArtMethodClean:
+        case kBinArtMethodDirty: {
+          bin_offset = RoundUp(bin_offset, method_alignment);
+          break;
+        }
+        case kBinDexCacheArray:
+          bin_offset = RoundUp(bin_offset, DexCacheArraysLayout::Alignment());
+          break;
+        case kBinImTable:
+        case kBinIMTConflictTable: {
+          bin_offset = RoundUp(bin_offset, static_cast<size_t>(target_ptr_size_));
+          break;
+        }
+        default: {
+          // Normal alignment.
+        }
+      }
+      image_info.bin_slot_offsets_[i] = bin_offset;
+      bin_offset += image_info.bin_slot_sizes_[i];
+    }
+    // NOTE: There may be additional padding between the bin slots and the intern table.
+    DCHECK_EQ(image_info.image_end_,
+              GetBinSizeSum(image_info, kBinMirrorCount) + image_objects_offset_begin_);
+  }
+
+  // Calculate image offsets.
+  size_t image_offset = 0;
+  for (ImageInfo& image_info : image_infos_) {
+    image_info.image_begin_ = global_image_begin_ + image_offset;
+    image_info.image_offset_ = image_offset;
+    ImageSection unused_sections[ImageHeader::kSectionCount];
+    image_info.image_size_ = RoundUp(image_info.CreateImageSections(unused_sections), kPageSize);
+    // There should be no gaps until the next image.
+    image_offset += image_info.image_size_;
+  }
 
   // Transform each object's bin slot into an offset which will be used to do the final copy.
   heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this);
 
-  DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_);
+  // DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_);
 
-  image_roots_address_ = PointerToLowMemUInt32(GetImageAddress(image_roots.Get()));
+  size_t i = 0;
+  for (ImageInfo& image_info : image_infos_) {
+    image_info.image_roots_address_ = PointerToLowMemUInt32(GetImageAddress(image_roots[i].Get()));
+    i++;
+  }
 
   // Update the native relocations by adding their bin sums.
   for (auto& pair : native_object_relocations_) {
     NativeObjectRelocation& relocation = pair.second;
     Bin bin_type = BinTypeForNativeRelocationType(relocation.type);
-    relocation.offset += bin_slot_offsets_[bin_type];
+    ImageInfo& image_info = GetImageInfo(relocation.oat_index);
+    relocation.offset += image_info.bin_slot_offsets_[bin_type];
   }
 
-  // Calculate how big the intern table will be after being serialized.
-  auto* const intern_table = Runtime::Current()->GetInternTable();
-  CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings";
-  intern_table_bytes_ = intern_table->WriteToMemory(nullptr);
-
-  // Note that image_end_ is left at end of used mirror object section.
+  // Note that image_info.image_end_ is left at end of used mirror object section.
 }
 
-void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) {
-  CHECK_NE(0U, oat_loaded_size);
-  const uint8_t* oat_file_begin = GetOatFileBegin();
-  const uint8_t* oat_file_end = oat_file_begin + oat_loaded_size;
-  oat_data_begin_ = oat_file_begin + oat_data_offset;
-  const uint8_t* oat_data_end = oat_data_begin_ + oat_file_->Size();
+size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections) const {
+  DCHECK(out_sections != nullptr);
+
+  // Do not round up any sections here that are represented by the bins since it will break
+  // offsets.
+
+  // Objects section
+  ImageSection* objects_section = &out_sections[ImageHeader::kSectionObjects];
+  *objects_section = ImageSection(0u, image_end_);
+
+  // Add field section.
+  ImageSection* field_section = &out_sections[ImageHeader::kSectionArtFields];
+  *field_section = ImageSection(bin_slot_offsets_[kBinArtField], bin_slot_sizes_[kBinArtField]);
+  CHECK_EQ(bin_slot_offsets_[kBinArtField], field_section->Offset());
+
+  // Add method section.
+  ImageSection* methods_section = &out_sections[ImageHeader::kSectionArtMethods];
+  *methods_section = ImageSection(
+      bin_slot_offsets_[kBinArtMethodClean],
+      bin_slot_sizes_[kBinArtMethodClean] + bin_slot_sizes_[kBinArtMethodDirty]);
+
+  // IMT section.
+  ImageSection* imt_section = &out_sections[ImageHeader::kSectionImTables];
+  *imt_section = ImageSection(bin_slot_offsets_[kBinImTable], bin_slot_sizes_[kBinImTable]);
+
+  // Conflict tables section.
+  ImageSection* imt_conflict_tables_section = &out_sections[ImageHeader::kSectionIMTConflictTables];
+  *imt_conflict_tables_section = ImageSection(bin_slot_offsets_[kBinIMTConflictTable],
+                                              bin_slot_sizes_[kBinIMTConflictTable]);
+
+  // Runtime methods section.
+  ImageSection* runtime_methods_section = &out_sections[ImageHeader::kSectionRuntimeMethods];
+  *runtime_methods_section = ImageSection(bin_slot_offsets_[kBinRuntimeMethod],
+                                          bin_slot_sizes_[kBinRuntimeMethod]);
+
+  // Add dex cache arrays section.
+  ImageSection* dex_cache_arrays_section = &out_sections[ImageHeader::kSectionDexCacheArrays];
+  *dex_cache_arrays_section = ImageSection(bin_slot_offsets_[kBinDexCacheArray],
+                                           bin_slot_sizes_[kBinDexCacheArray]);
+
+  // Round up to the alignment the string table expects. See HashSet::WriteToMemory.
+  size_t cur_pos = RoundUp(dex_cache_arrays_section->End(), sizeof(uint64_t));
+  // Calculate the size of the interned strings.
+  ImageSection* interned_strings_section = &out_sections[ImageHeader::kSectionInternedStrings];
+  *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_);
+  cur_pos = interned_strings_section->End();
+  // Round up to the alignment the class table expects. See HashSet::WriteToMemory.
+  cur_pos = RoundUp(cur_pos, sizeof(uint64_t));
+  // Calculate the size of the class table section.
+  ImageSection* class_table_section = &out_sections[ImageHeader::kSectionClassTable];
+  *class_table_section = ImageSection(cur_pos, class_table_bytes_);
+  cur_pos = class_table_section->End();
+  // Image end goes right before the start of the image bitmap.
+  return cur_pos;
+}
+
+void ImageWriter::CreateHeader(size_t oat_index) {
+  ImageInfo& image_info = GetImageInfo(oat_index);
+  const uint8_t* oat_file_begin = image_info.oat_file_begin_;
+  const uint8_t* oat_file_end = oat_file_begin + image_info.oat_loaded_size_;
+  const uint8_t* oat_data_end = image_info.oat_data_begin_ + image_info.oat_size_;
 
   // Create the image sections.
   ImageSection sections[ImageHeader::kSectionCount];
-  // Objects section
-  auto* objects_section = &sections[ImageHeader::kSectionObjects];
-  *objects_section = ImageSection(0u, image_end_);
-  size_t cur_pos = objects_section->End();
-  // Add field section.
-  auto* field_section = &sections[ImageHeader::kSectionArtFields];
-  *field_section = ImageSection(cur_pos, bin_slot_sizes_[kBinArtField]);
-  CHECK_EQ(bin_slot_offsets_[kBinArtField], field_section->Offset());
-  cur_pos = field_section->End();
-  // Round up to the alignment the required by the method section.
-  cur_pos = RoundUp(cur_pos, ArtMethod::Alignment(target_ptr_size_));
-  // Add method section.
-  auto* methods_section = &sections[ImageHeader::kSectionArtMethods];
-  *methods_section = ImageSection(cur_pos, bin_slot_sizes_[kBinArtMethodClean] +
-                                  bin_slot_sizes_[kBinArtMethodDirty]);
-  CHECK_EQ(bin_slot_offsets_[kBinArtMethodClean], methods_section->Offset());
-  cur_pos = methods_section->End();
-  // Add dex cache arrays section.
-  auto* dex_cache_arrays_section = &sections[ImageHeader::kSectionDexCacheArrays];
-  *dex_cache_arrays_section = ImageSection(cur_pos, bin_slot_sizes_[kBinDexCacheArray]);
-  CHECK_EQ(bin_slot_offsets_[kBinDexCacheArray], dex_cache_arrays_section->Offset());
-  cur_pos = dex_cache_arrays_section->End();
-  // Round up to the alignment the string table expects. See HashSet::WriteToMemory.
-  cur_pos = RoundUp(cur_pos, sizeof(uint64_t));
-  // Calculate the size of the interned strings.
-  auto* interned_strings_section = &sections[ImageHeader::kSectionInternedStrings];
-  *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_);
-  cur_pos = interned_strings_section->End();
+  const size_t image_end = image_info.CreateImageSections(sections);
+
   // Finally bitmap section.
-  const size_t bitmap_bytes = image_bitmap_->Size();
+  const size_t bitmap_bytes = image_info.image_bitmap_->Size();
   auto* bitmap_section = &sections[ImageHeader::kSectionImageBitmap];
-  *bitmap_section = ImageSection(RoundUp(cur_pos, kPageSize), RoundUp(bitmap_bytes, kPageSize));
-  cur_pos = bitmap_section->End();
-  if (kIsDebugBuild) {
+  *bitmap_section = ImageSection(RoundUp(image_end, kPageSize), RoundUp(bitmap_bytes, kPageSize));
+  if (VLOG_IS_ON(compiler)) {
+    LOG(INFO) << "Creating header for " << oat_filenames_[oat_index];
     size_t idx = 0;
     for (const ImageSection& section : sections) {
       LOG(INFO) << static_cast<ImageHeader::ImageSections>(idx) << " " << section;
       ++idx;
     }
     LOG(INFO) << "Methods: clean=" << clean_methods_ << " dirty=" << dirty_methods_;
+    LOG(INFO) << "Image roots address=" << std::hex << image_info.image_roots_address_ << std::dec;
+    LOG(INFO) << "Image begin=" << std::hex << reinterpret_cast<uintptr_t>(global_image_begin_)
+              << " Image offset=" << image_info.image_offset_ << std::dec;
+    LOG(INFO) << "Oat file begin=" << std::hex << reinterpret_cast<uintptr_t>(oat_file_begin)
+              << " Oat data begin=" << reinterpret_cast<uintptr_t>(image_info.oat_data_begin_)
+              << " Oat data end=" << reinterpret_cast<uintptr_t>(oat_data_end)
+              << " Oat file end=" << reinterpret_cast<uintptr_t>(oat_file_end);
   }
-  const size_t image_end = static_cast<uint32_t>(interned_strings_section->End());
-  CHECK_EQ(AlignUp(image_begin_ + image_end, kPageSize), oat_file_begin) <<
-      "Oat file should be right after the image.";
-  // Create the header.
-  new (image_->Begin()) ImageHeader(
-      PointerToLowMemUInt32(image_begin_), image_end,
-      sections, image_roots_address_, oat_file_->GetOatHeader().GetChecksum(),
-      PointerToLowMemUInt32(oat_file_begin), PointerToLowMemUInt32(oat_data_begin_),
-      PointerToLowMemUInt32(oat_data_end), PointerToLowMemUInt32(oat_file_end), target_ptr_size_,
-      compile_pic_);
+  // Store boot image info for app image so that we can relocate.
+  uint32_t boot_image_begin = 0;
+  uint32_t boot_image_end = 0;
+  uint32_t boot_oat_begin = 0;
+  uint32_t boot_oat_end = 0;
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
+  heap->GetBootImagesSize(&boot_image_begin, &boot_image_end, &boot_oat_begin, &boot_oat_end);
+
+  // Create the header, leave 0 for data size since we will fill this in as we are writing the
+  // image.
+  new (image_info.image_->Begin()) ImageHeader(PointerToLowMemUInt32(image_info.image_begin_),
+                                               image_end,
+                                               sections,
+                                               image_info.image_roots_address_,
+                                               image_info.oat_checksum_,
+                                               PointerToLowMemUInt32(oat_file_begin),
+                                               PointerToLowMemUInt32(image_info.oat_data_begin_),
+                                               PointerToLowMemUInt32(oat_data_end),
+                                               PointerToLowMemUInt32(oat_file_end),
+                                               boot_image_begin,
+                                               boot_image_end - boot_image_begin,
+                                               boot_oat_begin,
+                                               boot_oat_end - boot_oat_begin,
+                                               static_cast<uint32_t>(target_ptr_size_),
+                                               compile_pic_,
+                                               /*is_pic*/compile_app_image_,
+                                               image_storage_mode_,
+                                               /*data_size*/0u);
 }
 
 ArtMethod* ImageWriter::GetImageMethodAddress(ArtMethod* method) {
   auto it = native_object_relocations_.find(method);
   CHECK(it != native_object_relocations_.end()) << PrettyMethod(method) << " @ " << method;
-  CHECK_GE(it->second.offset, image_end_) << "ArtMethods should be after Objects";
-  return reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset);
+  size_t oat_index = GetOatIndex(method->GetDexCache());
+  ImageInfo& image_info = GetImageInfo(oat_index);
+  CHECK_GE(it->second.offset, image_info.image_end_) << "ArtMethods should be after Objects";
+  return reinterpret_cast<ArtMethod*>(image_info.image_begin_ + it->second.offset);
 }
 
 class FixupRootVisitor : public RootVisitor {
@@ -1055,7 +1605,7 @@
   void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     for (size_t i = 0; i < count; ++i) {
-      *roots[i] = ImageAddress(*roots[i]);
+      *roots[i] = image_writer_->GetImageAddress(*roots[i]);
     }
   }
 
@@ -1063,27 +1613,45 @@
                   const RootInfo& info ATTRIBUTE_UNUSED)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     for (size_t i = 0; i < count; ++i) {
-      roots[i]->Assign(ImageAddress(roots[i]->AsMirrorPtr()));
+      roots[i]->Assign(image_writer_->GetImageAddress(roots[i]->AsMirrorPtr()));
     }
   }
 
  private:
   ImageWriter* const image_writer_;
-
-  mirror::Object* ImageAddress(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
-    const size_t offset = image_writer_->GetImageOffset(obj);
-    auto* const dest = reinterpret_cast<Object*>(image_writer_->image_begin_ + offset);
-    VLOG(compiler) << "Update root from " << obj << " to " << dest;
-    return dest;
-  }
 };
 
-void ImageWriter::CopyAndFixupNativeData() {
+void ImageWriter::CopyAndFixupImTable(ImTable* orig, ImTable* copy) {
+  for (size_t i = 0; i < ImTable::kSize; ++i) {
+    ArtMethod* method = orig->Get(i, target_ptr_size_);
+    copy->Set(i, NativeLocationInImage(method), target_ptr_size_);
+  }
+}
+
+void ImageWriter::CopyAndFixupImtConflictTable(ImtConflictTable* orig, ImtConflictTable* copy) {
+  const size_t count = orig->NumEntries(target_ptr_size_);
+  for (size_t i = 0; i < count; ++i) {
+    ArtMethod* interface_method = orig->GetInterfaceMethod(i, target_ptr_size_);
+    ArtMethod* implementation_method = orig->GetImplementationMethod(i, target_ptr_size_);
+    copy->SetInterfaceMethod(i, target_ptr_size_, NativeLocationInImage(interface_method));
+    copy->SetImplementationMethod(i,
+                                  target_ptr_size_,
+                                  NativeLocationInImage(implementation_method));
+  }
+}
+
+void ImageWriter::CopyAndFixupNativeData(size_t oat_index) {
+  const ImageInfo& image_info = GetImageInfo(oat_index);
   // Copy ArtFields and methods to their locations and update the array for convenience.
   for (auto& pair : native_object_relocations_) {
     NativeObjectRelocation& relocation = pair.second;
-    auto* dest = image_->Begin() + relocation.offset;
-    DCHECK_GE(dest, image_->Begin() + image_end_);
+    // Only work with fields and methods that are in the current oat file.
+    if (relocation.oat_index != oat_index) {
+      continue;
+    }
+    auto* dest = image_info.image_->Begin() + relocation.offset;
+    DCHECK_GE(dest, image_info.image_->Begin() + image_info.image_end_);
+    DCHECK(!IsInBootImage(pair.first));
     switch (relocation.type) {
       case kNativeObjectRelocationTypeArtField: {
         memcpy(dest, pair.first, sizeof(ArtField));
@@ -1091,10 +1659,12 @@
             GetImageAddress(reinterpret_cast<ArtField*>(pair.first)->GetDeclaringClass()));
         break;
       }
+      case kNativeObjectRelocationTypeRuntimeMethod:
       case kNativeObjectRelocationTypeArtMethodClean:
       case kNativeObjectRelocationTypeArtMethodDirty: {
         CopyAndFixupMethod(reinterpret_cast<ArtMethod*>(pair.first),
-                           reinterpret_cast<ArtMethod*>(dest));
+                           reinterpret_cast<ArtMethod*>(dest),
+                           image_info);
         break;
       }
       // For arrays, copy just the header since the elements will get copied by their corresponding
@@ -1105,49 +1675,85 @@
       }
       case kNativeObjectRelocationTypeArtMethodArrayClean:
       case kNativeObjectRelocationTypeArtMethodArrayDirty: {
-        memcpy(dest, pair.first, LengthPrefixedArray<ArtMethod>::ComputeSize(
-            0,
-            ArtMethod::Size(target_ptr_size_),
-            ArtMethod::Alignment(target_ptr_size_)));
+        size_t size = ArtMethod::Size(target_ptr_size_);
+        size_t alignment = ArtMethod::Alignment(target_ptr_size_);
+        memcpy(dest, pair.first, LengthPrefixedArray<ArtMethod>::ComputeSize(0, size, alignment));
+        // Clear padding to avoid non-deterministic data in the image (and placate valgrind).
+        reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(dest)->ClearPadding(size, alignment);
         break;
+      }
       case kNativeObjectRelocationTypeDexCacheArray:
         // Nothing to copy here, everything is done in FixupDexCache().
         break;
+      case kNativeObjectRelocationTypeIMTable: {
+        ImTable* orig_imt = reinterpret_cast<ImTable*>(pair.first);
+        ImTable* dest_imt = reinterpret_cast<ImTable*>(dest);
+        CopyAndFixupImTable(orig_imt, dest_imt);
+        break;
+      }
+      case kNativeObjectRelocationTypeIMTConflictTable: {
+        auto* orig_table = reinterpret_cast<ImtConflictTable*>(pair.first);
+        CopyAndFixupImtConflictTable(
+            orig_table,
+            new(dest)ImtConflictTable(orig_table->NumEntries(target_ptr_size_), target_ptr_size_));
+        break;
       }
     }
   }
   // Fixup the image method roots.
-  auto* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
-  const ImageSection& methods_section = image_header->GetMethodsSection();
+  auto* image_header = reinterpret_cast<ImageHeader*>(image_info.image_->Begin());
   for (size_t i = 0; i < ImageHeader::kImageMethodsCount; ++i) {
-    auto* m = image_methods_[i];
-    CHECK(m != nullptr);
-    auto it = native_object_relocations_.find(m);
-    CHECK(it != native_object_relocations_.end()) << "No fowarding for " << PrettyMethod(m);
-    NativeObjectRelocation& relocation = it->second;
-    CHECK(methods_section.Contains(relocation.offset)) << relocation.offset << " not in "
-        << methods_section;
-    CHECK(relocation.IsArtMethodRelocation()) << relocation.type;
-    auto* dest = reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset);
-    image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), dest);
+    ArtMethod* method = image_methods_[i];
+    CHECK(method != nullptr);
+    if (!IsInBootImage(method)) {
+      method = NativeLocationInImage(method);
+    }
+    image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), method);
   }
+  FixupRootVisitor root_visitor(this);
+
   // Write the intern table into the image.
-  const ImageSection& intern_table_section = image_header->GetImageSection(
-      ImageHeader::kSectionInternedStrings);
-  InternTable* const intern_table = Runtime::Current()->GetInternTable();
-  uint8_t* const memory_ptr = image_->Begin() + intern_table_section.Offset();
-  const size_t intern_table_bytes = intern_table->WriteToMemory(memory_ptr);
-  // Fixup the pointers in the newly written intern table to contain image addresses.
-  InternTable temp_table;
-  // Note that we require that ReadFromMemory does not make an internal copy of the elements so that
-  // the VisitRoots() will update the memory directly rather than the copies.
-  // This also relies on visit roots not doing any verification which could fail after we update
-  // the roots to be the image addresses.
-  temp_table.ReadFromMemory(memory_ptr);
-  CHECK_EQ(temp_table.Size(), intern_table->Size());
-  FixupRootVisitor visitor(this);
-  temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots);
-  CHECK_EQ(intern_table_bytes, intern_table_bytes_);
+  if (image_info.intern_table_bytes_ > 0) {
+    const ImageSection& intern_table_section = image_header->GetImageSection(
+        ImageHeader::kSectionInternedStrings);
+    InternTable* const intern_table = image_info.intern_table_.get();
+    uint8_t* const intern_table_memory_ptr =
+        image_info.image_->Begin() + intern_table_section.Offset();
+    const size_t intern_table_bytes = intern_table->WriteToMemory(intern_table_memory_ptr);
+    CHECK_EQ(intern_table_bytes, image_info.intern_table_bytes_);
+    // Fixup the pointers in the newly written intern table to contain image addresses.
+    InternTable temp_intern_table;
+    // Note that we require that ReadFromMemory does not make an internal copy of the elements so that
+    // the VisitRoots() will update the memory directly rather than the copies.
+    // This also relies on visit roots not doing any verification which could fail after we update
+    // the roots to be the image addresses.
+    temp_intern_table.AddTableFromMemory(intern_table_memory_ptr);
+    CHECK_EQ(temp_intern_table.Size(), intern_table->Size());
+    temp_intern_table.VisitRoots(&root_visitor, kVisitRootFlagAllRoots);
+  }
+  // Write the class table(s) into the image. class_table_bytes_ may be 0 if there are multiple
+  // class loaders. Writing multiple class tables into the image is currently unsupported.
+  if (image_info.class_table_bytes_ > 0u) {
+    const ImageSection& class_table_section = image_header->GetImageSection(
+        ImageHeader::kSectionClassTable);
+    uint8_t* const class_table_memory_ptr =
+        image_info.image_->Begin() + class_table_section.Offset();
+    ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+
+    ClassTable* table = image_info.class_table_.get();
+    CHECK(table != nullptr);
+    const size_t class_table_bytes = table->WriteToMemory(class_table_memory_ptr);
+    CHECK_EQ(class_table_bytes, image_info.class_table_bytes_);
+    // Fixup the pointers in the newly written class table to contain image addresses. See
+    // above comment for intern tables.
+    ClassTable temp_class_table;
+    temp_class_table.ReadFromMemory(class_table_memory_ptr);
+    CHECK_EQ(temp_class_table.NumZygoteClasses(), table->NumNonZygoteClasses() +
+             table->NumZygoteClasses());
+    BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor,
+                                                                    RootInfo(kRootUnknown));
+    temp_class_table.VisitRoots(buffered_visitor);
+  }
 }
 
 void ImageWriter::CopyAndFixupObjects() {
@@ -1177,8 +1783,8 @@
   dst->SetClass(GetImageAddress(arr->GetClass()));
   auto* dest_array = down_cast<mirror::PointerArray*>(dst);
   for (size_t i = 0, count = num_elements; i < count; ++i) {
-    auto* elem = arr->GetElementPtrSize<void*>(i, target_ptr_size_);
-    if (elem != nullptr) {
+    void* elem = arr->GetElementPtrSize<void*>(i, target_ptr_size_);
+    if (elem != nullptr && !IsInBootImage(elem)) {
       auto it = native_object_relocations_.find(elem);
       if (UNLIKELY(it == native_object_relocations_.end())) {
         if (it->second.IsArtMethodRelocation()) {
@@ -1195,7 +1801,8 @@
         }
         UNREACHABLE();
       } else {
-        elem = image_begin_ + it->second.offset;
+        ImageInfo& image_info = GetImageInfo(it->second.oat_index);
+        elem = image_info.image_begin_ + it->second.offset;
       }
     }
     dest_array->SetElementPtrSize<false, true>(i, elem, target_ptr_size_);
@@ -1203,15 +1810,20 @@
 }
 
 void ImageWriter::CopyAndFixupObject(Object* obj) {
+  if (IsInBootImage(obj)) {
+    return;
+  }
   size_t offset = GetImageOffset(obj);
-  auto* dst = reinterpret_cast<Object*>(image_->Begin() + offset);
-  DCHECK_LT(offset, image_end_);
+  size_t oat_index = GetOatIndex(obj);
+  ImageInfo& image_info = GetImageInfo(oat_index);
+  auto* dst = reinterpret_cast<Object*>(image_info.image_->Begin() + offset);
+  DCHECK_LT(offset, image_info.image_end_);
   const auto* src = reinterpret_cast<const uint8_t*>(obj);
 
-  image_bitmap_->Set(dst);  // Mark the obj as live.
+  image_info.image_bitmap_->Set(dst);  // Mark the obj as live.
 
   const size_t n = obj->SizeOf();
-  DCHECK_LE(offset + n, image_->Size());
+  DCHECK_LE(offset + n, image_info.image_->Size());
   memcpy(dst, src, n);
 
   // Write in a hash code of objects which have inflated monitors or a hash code in their monitor
@@ -1219,6 +1831,11 @@
   const auto it = saved_hashcode_map_.find(obj);
   dst->SetLockWord(it != saved_hashcode_map_.end() ?
       LockWord::FromHashCode(it->second, 0u) : LockWord::Default(), false);
+  if (kUseBakerReadBarrier && gc::collector::ConcurrentCopying::kGrayDirtyImmuneObjects) {
+    // Treat all of the objects in the image as marked to avoid unnecessary dirty pages. This is
+    // safe since we mark all of the objects that may reference non immune objects as gray.
+    CHECK(dst->AtomicSetMarkBit(0, 1));
+  }
   FixupObject(obj, dst);
 }
 
@@ -1241,14 +1858,16 @@
     // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
     // image.
     copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
-        offset, image_writer_->GetImageAddress(ref));
+        offset,
+        image_writer_->GetImageAddress(ref));
   }
 
   // java.lang.ref.Reference visitor.
   void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
     copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
-        mirror::Reference::ReferentOffset(), image_writer_->GetImageAddress(ref->GetReferent()));
+        mirror::Reference::ReferentOffset(),
+        image_writer_->GetImageAddress(ref->GetReferent()));
   }
 
  protected:
@@ -1276,46 +1895,71 @@
 
 uintptr_t ImageWriter::NativeOffsetInImage(void* obj) {
   DCHECK(obj != nullptr);
+  DCHECK(!IsInBootImage(obj));
   auto it = native_object_relocations_.find(obj);
-  CHECK(it != native_object_relocations_.end()) << obj;
+  CHECK(it != native_object_relocations_.end()) << obj << " spaces "
+      << Runtime::Current()->GetHeap()->DumpSpaces();
   const NativeObjectRelocation& relocation = it->second;
   return relocation.offset;
 }
 
 template <typename T>
-T* ImageWriter::NativeLocationInImage(T* obj) {
-  if (obj == nullptr) {
-    return nullptr;
-  }
-  return reinterpret_cast<T*>(image_begin_ + NativeOffsetInImage(obj));
+std::string PrettyPrint(T* ptr) SHARED_REQUIRES(Locks::mutator_lock_) {
+  std::ostringstream oss;
+  oss << ptr;
+  return oss.str();
 }
 
-void ImageWriter::FixupClass(mirror::Class* orig, mirror::Class* copy) {
-  // Update the field arrays.
-  copy->SetSFieldsPtrUnchecked(NativeLocationInImage(orig->GetSFieldsPtr()));
-  copy->SetIFieldsPtrUnchecked(NativeLocationInImage(orig->GetIFieldsPtr()));
-  // Update direct and virtual method arrays.
-  copy->SetDirectMethodsPtrUnchecked(NativeLocationInImage(orig->GetDirectMethodsPtr()));
-  copy->SetVirtualMethodsPtr(NativeLocationInImage(orig->GetVirtualMethodsPtr()));
-  // Update dex cache strings.
-  copy->SetDexCacheStrings(NativeLocationInImage(orig->GetDexCacheStrings()));
-  // Fix up embedded tables.
-  if (orig->ShouldHaveEmbeddedImtAndVTable()) {
-    for (int32_t i = 0; i < orig->GetEmbeddedVTableLength(); ++i) {
-      auto it = native_object_relocations_.find(orig->GetEmbeddedVTableEntry(i, target_ptr_size_));
-      CHECK(it != native_object_relocations_.end()) << PrettyClass(orig);
-      copy->SetEmbeddedVTableEntryUnchecked(
-          i, reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset), target_ptr_size_);
-    }
-    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-      auto it = native_object_relocations_.find(orig->GetEmbeddedImTableEntry(i, target_ptr_size_));
-      CHECK(it != native_object_relocations_.end()) << PrettyClass(orig);
-      copy->SetEmbeddedImTableEntry(
-          i, reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset), target_ptr_size_);
-    }
+template <>
+std::string PrettyPrint(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
+  return PrettyMethod(method);
+}
+
+template <typename T>
+T* ImageWriter::NativeLocationInImage(T* obj) {
+  if (obj == nullptr || IsInBootImage(obj)) {
+    return obj;
+  } else {
+    auto it = native_object_relocations_.find(obj);
+    CHECK(it != native_object_relocations_.end()) << obj << " " << PrettyPrint(obj)
+        << " spaces " << Runtime::Current()->GetHeap()->DumpSpaces();
+    const NativeObjectRelocation& relocation = it->second;
+    ImageInfo& image_info = GetImageInfo(relocation.oat_index);
+    return reinterpret_cast<T*>(image_info.image_begin_ + relocation.offset);
   }
+}
+
+template <typename T>
+T* ImageWriter::NativeCopyLocation(T* obj, mirror::DexCache* dex_cache) {
+  if (obj == nullptr || IsInBootImage(obj)) {
+    return obj;
+  } else {
+    size_t oat_index = GetOatIndexForDexCache(dex_cache);
+    ImageInfo& image_info = GetImageInfo(oat_index);
+    return reinterpret_cast<T*>(image_info.image_->Begin() + NativeOffsetInImage(obj));
+  }
+}
+
+class NativeLocationVisitor {
+ public:
+  explicit NativeLocationVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
+
+  template <typename T>
+  T* operator()(T* ptr) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return image_writer_->NativeLocationInImage(ptr);
+  }
+
+ private:
+  ImageWriter* const image_writer_;
+};
+
+void ImageWriter::FixupClass(mirror::Class* orig, mirror::Class* copy) {
+  orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this));
   FixupClassVisitor visitor(this, copy);
   static_cast<mirror::Object*>(orig)->VisitReferences(visitor, visitor);
+
+  // Remove the clinitThreadId. This is required for image determinism.
+  copy->SetClinitThreadId(static_cast<pid_t>(0));
 }
 
 void ImageWriter::FixupObject(Object* orig, Object* copy) {
@@ -1352,20 +1996,20 @@
       CHECK(it != native_object_relocations_.end())
           << "Missing relocation for AbstractMethod.artMethod " << PrettyMethod(src_method);
       dest->SetArtMethod(
-          reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset));
+          reinterpret_cast<ArtMethod*>(global_image_begin_ + it->second.offset));
     } else if (!klass->IsArrayClass()) {
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
       if (klass == class_linker->GetClassRoot(ClassLinker::kJavaLangDexCache)) {
         FixupDexCache(down_cast<mirror::DexCache*>(orig), down_cast<mirror::DexCache*>(copy));
-      } else if (klass->IsSubClass(down_cast<mirror::Class*>(
-          class_linker->GetClassRoot(ClassLinker::kJavaLangClassLoader)))) {
+      } else if (klass->IsClassLoaderClass()) {
+        mirror::ClassLoader* copy_loader = down_cast<mirror::ClassLoader*>(copy);
         // If src is a ClassLoader, set the class table to null so that it gets recreated by the
         // ClassLoader.
-        down_cast<mirror::ClassLoader*>(copy)->SetClassTable(nullptr);
+        copy_loader->SetClassTable(nullptr);
         // Also set allocator to null to be safe. The allocator is created when we create the class
         // table. We also never expect to unload things in the image since they are held live as
         // roots.
-        down_cast<mirror::ClassLoader*>(copy)->SetAllocator(nullptr);
+        copy_loader->SetAllocator(nullptr);
       }
     }
     FixupVisitor visitor(this, copy);
@@ -1373,128 +2017,165 @@
   }
 }
 
+
+class ImageAddressVisitor {
+ public:
+  explicit ImageAddressVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
+
+  template <typename T>
+  T* operator()(T* ptr) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return image_writer_->GetImageAddress(ptr);
+  }
+
+ private:
+  ImageWriter* const image_writer_;
+};
+
+
 void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache,
                                 mirror::DexCache* copy_dex_cache) {
   // Though the DexCache array fields are usually treated as native pointers, we set the full
   // 64-bit values here, clearing the top 32 bits for 32-bit targets. The zero-extension is
   // done by casting to the unsigned type uintptr_t before casting to int64_t, i.e.
   //     static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + offset))).
-  GcRoot<mirror::String>* orig_strings = orig_dex_cache->GetStrings();
+  mirror::StringDexCacheType* orig_strings = orig_dex_cache->GetStrings();
   if (orig_strings != nullptr) {
-    uintptr_t copy_strings_offset = NativeOffsetInImage(orig_strings);
-    copy_dex_cache->SetField64<false>(
-        mirror::DexCache::StringsOffset(),
-        static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + copy_strings_offset)));
-    GcRoot<mirror::String>* copy_strings =
-        reinterpret_cast<GcRoot<mirror::String>*>(image_->Begin() + copy_strings_offset);
-    for (size_t i = 0, num = orig_dex_cache->NumStrings(); i != num; ++i) {
-      copy_strings[i] = GcRoot<mirror::String>(GetImageAddress(orig_strings[i].Read()));
-    }
+    copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::StringsOffset(),
+                                               NativeLocationInImage(orig_strings),
+                                               PointerSize::k64);
+    orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings, orig_dex_cache),
+                                 ImageAddressVisitor(this));
   }
   GcRoot<mirror::Class>* orig_types = orig_dex_cache->GetResolvedTypes();
   if (orig_types != nullptr) {
-    uintptr_t copy_types_offset = NativeOffsetInImage(orig_types);
-    copy_dex_cache->SetField64<false>(
-        mirror::DexCache::ResolvedTypesOffset(),
-        static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + copy_types_offset)));
-    GcRoot<mirror::Class>* copy_types =
-        reinterpret_cast<GcRoot<mirror::Class>*>(image_->Begin() + copy_types_offset);
-    for (size_t i = 0, num = orig_dex_cache->NumResolvedTypes(); i != num; ++i) {
-      copy_types[i] = GcRoot<mirror::Class>(GetImageAddress(orig_types[i].Read()));
-    }
+    copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedTypesOffset(),
+                                               NativeLocationInImage(orig_types),
+                                               PointerSize::k64);
+    orig_dex_cache->FixupResolvedTypes(NativeCopyLocation(orig_types, orig_dex_cache),
+                                       ImageAddressVisitor(this));
   }
   ArtMethod** orig_methods = orig_dex_cache->GetResolvedMethods();
   if (orig_methods != nullptr) {
-    uintptr_t copy_methods_offset = NativeOffsetInImage(orig_methods);
-    copy_dex_cache->SetField64<false>(
-        mirror::DexCache::ResolvedMethodsOffset(),
-        static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + copy_methods_offset)));
-    ArtMethod** copy_methods =
-        reinterpret_cast<ArtMethod**>(image_->Begin() + copy_methods_offset);
+    copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedMethodsOffset(),
+                                               NativeLocationInImage(orig_methods),
+                                               PointerSize::k64);
+    ArtMethod** copy_methods = NativeCopyLocation(orig_methods, orig_dex_cache);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedMethods(); i != num; ++i) {
       ArtMethod* orig = mirror::DexCache::GetElementPtrSize(orig_methods, i, target_ptr_size_);
+      // NativeLocationInImage also handles runtime methods since these have relocation info.
       ArtMethod* copy = NativeLocationInImage(orig);
       mirror::DexCache::SetElementPtrSize(copy_methods, i, copy, target_ptr_size_);
     }
   }
   ArtField** orig_fields = orig_dex_cache->GetResolvedFields();
   if (orig_fields != nullptr) {
-    uintptr_t copy_fields_offset = NativeOffsetInImage(orig_fields);
-    copy_dex_cache->SetField64<false>(
-        mirror::DexCache::ResolvedFieldsOffset(),
-        static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + copy_fields_offset)));
-    ArtField** copy_fields = reinterpret_cast<ArtField**>(image_->Begin() + copy_fields_offset);
+    copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedFieldsOffset(),
+                                               NativeLocationInImage(orig_fields),
+                                               PointerSize::k64);
+    ArtField** copy_fields = NativeCopyLocation(orig_fields, orig_dex_cache);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedFields(); i != num; ++i) {
       ArtField* orig = mirror::DexCache::GetElementPtrSize(orig_fields, i, target_ptr_size_);
       ArtField* copy = NativeLocationInImage(orig);
       mirror::DexCache::SetElementPtrSize(copy_fields, i, copy, target_ptr_size_);
     }
   }
+
+  // Remove the DexFile pointers. They will be fixed up when the runtime loads the oat file. Leaving
+  // compiler pointers in here will make the output non-deterministic.
+  copy_dex_cache->SetDexFile(nullptr);
 }
 
-const uint8_t* ImageWriter::GetQuickCode(ArtMethod* method, bool* quick_is_interpreted) {
-  DCHECK(!method->IsResolutionMethod() && !method->IsImtConflictMethod() &&
-         !method->IsImtUnimplementedMethod() && !method->IsAbstract()) << PrettyMethod(method);
+const uint8_t* ImageWriter::GetOatAddress(OatAddress type) const {
+  DCHECK_LT(type, kOatAddressCount);
+  // If we are compiling an app image, we need to use the stubs of the boot image.
+  if (compile_app_image_) {
+    // Use the current image pointers.
+    const std::vector<gc::space::ImageSpace*>& image_spaces =
+        Runtime::Current()->GetHeap()->GetBootImageSpaces();
+    DCHECK(!image_spaces.empty());
+    const OatFile* oat_file = image_spaces[0]->GetOatFile();
+    CHECK(oat_file != nullptr);
+    const OatHeader& header = oat_file->GetOatHeader();
+    switch (type) {
+      // TODO: We could maybe clean this up if we stored them in an array in the oat header.
+      case kOatAddressQuickGenericJNITrampoline:
+        return static_cast<const uint8_t*>(header.GetQuickGenericJniTrampoline());
+      case kOatAddressInterpreterToInterpreterBridge:
+        return static_cast<const uint8_t*>(header.GetInterpreterToInterpreterBridge());
+      case kOatAddressInterpreterToCompiledCodeBridge:
+        return static_cast<const uint8_t*>(header.GetInterpreterToCompiledCodeBridge());
+      case kOatAddressJNIDlsymLookup:
+        return static_cast<const uint8_t*>(header.GetJniDlsymLookup());
+      case kOatAddressQuickIMTConflictTrampoline:
+        return static_cast<const uint8_t*>(header.GetQuickImtConflictTrampoline());
+      case kOatAddressQuickResolutionTrampoline:
+        return static_cast<const uint8_t*>(header.GetQuickResolutionTrampoline());
+      case kOatAddressQuickToInterpreterBridge:
+        return static_cast<const uint8_t*>(header.GetQuickToInterpreterBridge());
+      default:
+        UNREACHABLE();
+    }
+  }
+  const ImageInfo& primary_image_info = GetImageInfo(0);
+  return GetOatAddressForOffset(primary_image_info.oat_address_offsets_[type], primary_image_info);
+}
+
+const uint8_t* ImageWriter::GetQuickCode(ArtMethod* method,
+                                         const ImageInfo& image_info,
+                                         bool* quick_is_interpreted) {
+  DCHECK(!method->IsResolutionMethod()) << PrettyMethod(method);
+  DCHECK_NE(method, Runtime::Current()->GetImtConflictMethod()) << PrettyMethod(method);
+  DCHECK(!method->IsImtUnimplementedMethod()) << PrettyMethod(method);
+  DCHECK(method->IsInvokable()) << PrettyMethod(method);
+  DCHECK(!IsInBootImage(method)) << PrettyMethod(method);
 
   // Use original code if it exists. Otherwise, set the code pointer to the resolution
   // trampoline.
 
   // Quick entrypoint:
-  uint32_t quick_oat_code_offset = PointerToLowMemUInt32(
-      method->GetEntryPointFromQuickCompiledCodePtrSize(target_ptr_size_));
-  const uint8_t* quick_code = GetOatAddress(quick_oat_code_offset);
+  const void* quick_oat_entry_point =
+      method->GetEntryPointFromQuickCompiledCodePtrSize(target_ptr_size_);
+  const uint8_t* quick_code;
+
+  if (UNLIKELY(IsInBootImage(method->GetDeclaringClass()))) {
+    DCHECK(method->IsCopied());
+    // If the code is not in the oat file corresponding to this image (e.g. default methods)
+    quick_code = reinterpret_cast<const uint8_t*>(quick_oat_entry_point);
+  } else {
+    uint32_t quick_oat_code_offset = PointerToLowMemUInt32(quick_oat_entry_point);
+    quick_code = GetOatAddressForOffset(quick_oat_code_offset, image_info);
+  }
+
   *quick_is_interpreted = false;
   if (quick_code != nullptr && (!method->IsStatic() || method->IsConstructor() ||
       method->GetDeclaringClass()->IsInitialized())) {
     // We have code for a non-static or initialized method, just use the code.
-    DCHECK_GE(quick_code, oat_data_begin_);
   } else if (quick_code == nullptr && method->IsNative() &&
       (!method->IsStatic() || method->GetDeclaringClass()->IsInitialized())) {
     // Non-static or initialized native method missing compiled code, use generic JNI version.
-    quick_code = GetOatAddress(quick_generic_jni_trampoline_offset_);
-    DCHECK_GE(quick_code, oat_data_begin_);
+    quick_code = GetOatAddress(kOatAddressQuickGenericJNITrampoline);
   } else if (quick_code == nullptr && !method->IsNative()) {
     // We don't have code at all for a non-native method, use the interpreter.
-    quick_code = GetOatAddress(quick_to_interpreter_bridge_offset_);
+    quick_code = GetOatAddress(kOatAddressQuickToInterpreterBridge);
     *quick_is_interpreted = true;
-    DCHECK_GE(quick_code, oat_data_begin_);
   } else {
     CHECK(!method->GetDeclaringClass()->IsInitialized());
     // We have code for a static method, but need to go through the resolution stub for class
     // initialization.
-    quick_code = GetOatAddress(quick_resolution_trampoline_offset_);
-    DCHECK_GE(quick_code, oat_data_begin_);
+    quick_code = GetOatAddress(kOatAddressQuickResolutionTrampoline);
+  }
+  if (!IsInBootOatFile(quick_code)) {
+    // DCHECK_GE(quick_code, oat_data_begin_);
   }
   return quick_code;
 }
 
-const uint8_t* ImageWriter::GetQuickEntryPoint(ArtMethod* method) {
-  // Calculate the quick entry point following the same logic as FixupMethod() below.
-  // The resolution method has a special trampoline to call.
-  Runtime* runtime = Runtime::Current();
-  if (UNLIKELY(method == runtime->GetResolutionMethod())) {
-    return GetOatAddress(quick_resolution_trampoline_offset_);
-  } else if (UNLIKELY(method == runtime->GetImtConflictMethod() ||
-                      method == runtime->GetImtUnimplementedMethod())) {
-    return GetOatAddress(quick_imt_conflict_trampoline_offset_);
-  } else {
-    // We assume all methods have code. If they don't currently then we set them to the use the
-    // resolution trampoline. Abstract methods never have code and so we need to make sure their
-    // use results in an AbstractMethodError. We use the interpreter to achieve this.
-    if (UNLIKELY(method->IsAbstract())) {
-      return GetOatAddress(quick_to_interpreter_bridge_offset_);
-    } else {
-      bool quick_is_interpreted;
-      return GetQuickCode(method, &quick_is_interpreted);
-    }
-  }
-}
-
-void ImageWriter::CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy) {
+void ImageWriter::CopyAndFixupMethod(ArtMethod* orig,
+                                     ArtMethod* copy,
+                                     const ImageInfo& image_info) {
   memcpy(copy, orig, ArtMethod::Size(target_ptr_size_));
 
   copy->SetDeclaringClass(GetImageAddress(orig->GetDeclaringClassUnchecked()));
-
   ArtMethod** orig_resolved_methods = orig->GetDexCacheResolvedMethods(target_ptr_size_);
   copy->SetDexCacheResolvedMethods(NativeLocationInImage(orig_resolved_methods), target_ptr_size_);
   GcRoot<mirror::Class>* orig_resolved_types = orig->GetDexCacheResolvedTypes(target_ptr_size_);
@@ -1505,34 +2186,38 @@
 
   // The resolution method has a special trampoline to call.
   Runtime* runtime = Runtime::Current();
-  if (UNLIKELY(orig == runtime->GetResolutionMethod())) {
-    copy->SetEntryPointFromQuickCompiledCodePtrSize(
-        GetOatAddress(quick_resolution_trampoline_offset_), target_ptr_size_);
-  } else if (UNLIKELY(orig == runtime->GetImtConflictMethod() ||
-                      orig == runtime->GetImtUnimplementedMethod())) {
-    copy->SetEntryPointFromQuickCompiledCodePtrSize(
-        GetOatAddress(quick_imt_conflict_trampoline_offset_), target_ptr_size_);
-  } else if (UNLIKELY(orig->IsRuntimeMethod())) {
-    bool found_one = false;
-    for (size_t i = 0; i < static_cast<size_t>(Runtime::kLastCalleeSaveType); ++i) {
-      auto idx = static_cast<Runtime::CalleeSaveType>(i);
-      if (runtime->HasCalleeSaveMethod(idx) && runtime->GetCalleeSaveMethod(idx) == orig) {
-        found_one = true;
-        break;
+  if (orig->IsRuntimeMethod()) {
+    ImtConflictTable* orig_table = orig->GetImtConflictTable(target_ptr_size_);
+    if (orig_table != nullptr) {
+      // Special IMT conflict method, normal IMT conflict method or unimplemented IMT method.
+      copy->SetEntryPointFromQuickCompiledCodePtrSize(
+          GetOatAddress(kOatAddressQuickIMTConflictTrampoline), target_ptr_size_);
+      copy->SetImtConflictTable(NativeLocationInImage(orig_table), target_ptr_size_);
+    } else if (UNLIKELY(orig == runtime->GetResolutionMethod())) {
+      copy->SetEntryPointFromQuickCompiledCodePtrSize(
+          GetOatAddress(kOatAddressQuickResolutionTrampoline), target_ptr_size_);
+    } else {
+      bool found_one = false;
+      for (size_t i = 0; i < static_cast<size_t>(Runtime::kLastCalleeSaveType); ++i) {
+        auto idx = static_cast<Runtime::CalleeSaveType>(i);
+        if (runtime->HasCalleeSaveMethod(idx) && runtime->GetCalleeSaveMethod(idx) == orig) {
+          found_one = true;
+          break;
+        }
       }
+      CHECK(found_one) << "Expected to find callee save method but got " << PrettyMethod(orig);
+      CHECK(copy->IsRuntimeMethod());
     }
-    CHECK(found_one) << "Expected to find callee save method but got " << PrettyMethod(orig);
-    CHECK(copy->IsRuntimeMethod());
   } else {
     // We assume all methods have code. If they don't currently then we set them to the use the
     // resolution trampoline. Abstract methods never have code and so we need to make sure their
     // use results in an AbstractMethodError. We use the interpreter to achieve this.
-    if (UNLIKELY(orig->IsAbstract())) {
+    if (UNLIKELY(!orig->IsInvokable())) {
       copy->SetEntryPointFromQuickCompiledCodePtrSize(
-          GetOatAddress(quick_to_interpreter_bridge_offset_), target_ptr_size_);
+          GetOatAddress(kOatAddressQuickToInterpreterBridge), target_ptr_size_);
     } else {
       bool quick_is_interpreted;
-      const uint8_t* quick_code = GetQuickCode(orig, &quick_is_interpreted);
+      const uint8_t* quick_code = GetQuickCode(orig, image_info, &quick_is_interpreted);
       copy->SetEntryPointFromQuickCompiledCodePtrSize(quick_code, target_ptr_size_);
 
       // JNI entrypoint:
@@ -1540,40 +2225,17 @@
         // The native method's pointer is set to a stub to lookup via dlsym.
         // Note this is not the code_ pointer, that is handled above.
         copy->SetEntryPointFromJniPtrSize(
-            GetOatAddress(jni_dlsym_lookup_offset_), target_ptr_size_);
+            GetOatAddress(kOatAddressJNIDlsymLookup), target_ptr_size_);
       }
     }
   }
 }
 
-static OatHeader* GetOatHeaderFromElf(ElfFile* elf) {
-  uint64_t data_sec_offset;
-  bool has_data_sec = elf->GetSectionOffsetAndSize(".rodata", &data_sec_offset, nullptr);
-  if (!has_data_sec) {
-    return nullptr;
-  }
-  return reinterpret_cast<OatHeader*>(elf->Begin() + data_sec_offset);
-}
-
-void ImageWriter::SetOatChecksumFromElfFile(File* elf_file) {
-  std::string error_msg;
-  std::unique_ptr<ElfFile> elf(ElfFile::Open(elf_file, PROT_READ|PROT_WRITE,
-                                             MAP_SHARED, &error_msg));
-  if (elf.get() == nullptr) {
-    LOG(FATAL) << "Unable open oat file: " << error_msg;
-    return;
-  }
-  OatHeader* oat_header = GetOatHeaderFromElf(elf.get());
-  CHECK(oat_header != nullptr);
-  CHECK(oat_header->IsValid());
-
-  ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
-  image_header->SetOatChecksum(oat_header->GetChecksum());
-}
-
-size_t ImageWriter::GetBinSizeSum(ImageWriter::Bin up_to) const {
+size_t ImageWriter::GetBinSizeSum(ImageWriter::ImageInfo& image_info, ImageWriter::Bin up_to) const {
   DCHECK_LE(up_to, kBinSize);
-  return std::accumulate(&bin_slot_sizes_[0], &bin_slot_sizes_[up_to], /*init*/0);
+  return std::accumulate(&image_info.bin_slot_sizes_[0],
+                         &image_info.bin_slot_sizes_[up_to],
+                         /*init*/0);
 }
 
 ImageWriter::BinSlot::BinSlot(uint32_t lockword) : lockword_(lockword) {
@@ -1599,15 +2261,6 @@
   return lockword_ & ~kBinMask;
 }
 
-uint8_t* ImageWriter::GetOatFileBegin() const {
-  DCHECK_GT(intern_table_bytes_, 0u);
-  size_t native_sections_size =
-      bin_slot_sizes_[kBinArtField] + bin_slot_sizes_[kBinArtMethodDirty] +
-      bin_slot_sizes_[kBinArtMethodClean] + bin_slot_sizes_[kBinDexCacheArray] +
-      intern_table_bytes_;
-  return image_begin_ + RoundUp(image_end_ + native_sections_size, kPageSize);
-}
-
 ImageWriter::Bin ImageWriter::BinTypeForNativeRelocationType(NativeObjectRelocationType type) {
   switch (type) {
     case kNativeObjectRelocationTypeArtField:
@@ -1621,8 +2274,126 @@
       return kBinArtMethodDirty;
     case kNativeObjectRelocationTypeDexCacheArray:
       return kBinDexCacheArray;
+    case kNativeObjectRelocationTypeRuntimeMethod:
+      return kBinRuntimeMethod;
+    case kNativeObjectRelocationTypeIMTable:
+      return kBinImTable;
+    case kNativeObjectRelocationTypeIMTConflictTable:
+      return kBinIMTConflictTable;
   }
   UNREACHABLE();
 }
 
+size_t ImageWriter::GetOatIndex(mirror::Object* obj) const {
+  if (compile_app_image_) {
+    return GetDefaultOatIndex();
+  } else {
+    mirror::DexCache* dex_cache =
+        obj->IsDexCache() ? obj->AsDexCache()
+                          : obj->IsClass() ? obj->AsClass()->GetDexCache()
+                                           : obj->GetClass()->GetDexCache();
+    return GetOatIndexForDexCache(dex_cache);
+  }
+}
+
+size_t ImageWriter::GetOatIndexForDexFile(const DexFile* dex_file) const {
+  if (compile_app_image_) {
+    return GetDefaultOatIndex();
+  } else {
+    auto it = dex_file_oat_index_map_.find(dex_file);
+    DCHECK(it != dex_file_oat_index_map_.end()) << dex_file->GetLocation();
+    return it->second;
+  }
+}
+
+size_t ImageWriter::GetOatIndexForDexCache(mirror::DexCache* dex_cache) const {
+  if (dex_cache == nullptr) {
+    return GetDefaultOatIndex();
+  } else {
+    return GetOatIndexForDexFile(dex_cache->GetDexFile());
+  }
+}
+
+void ImageWriter::UpdateOatFileLayout(size_t oat_index,
+                                      size_t oat_loaded_size,
+                                      size_t oat_data_offset,
+                                      size_t oat_data_size) {
+  const uint8_t* images_end = image_infos_.back().image_begin_ + image_infos_.back().image_size_;
+  for (const ImageInfo& info : image_infos_) {
+    DCHECK_LE(info.image_begin_ + info.image_size_, images_end);
+  }
+  DCHECK(images_end != nullptr);  // Image space must be ready.
+
+  ImageInfo& cur_image_info = GetImageInfo(oat_index);
+  cur_image_info.oat_file_begin_ = images_end + cur_image_info.oat_offset_;
+  cur_image_info.oat_loaded_size_ = oat_loaded_size;
+  cur_image_info.oat_data_begin_ = cur_image_info.oat_file_begin_ + oat_data_offset;
+  cur_image_info.oat_size_ = oat_data_size;
+
+  if (compile_app_image_) {
+    CHECK_EQ(oat_filenames_.size(), 1u) << "App image should have no next image.";
+    return;
+  }
+
+  // Update the oat_offset of the next image info.
+  if (oat_index + 1u != oat_filenames_.size()) {
+    // There is a following one.
+    ImageInfo& next_image_info = GetImageInfo(oat_index + 1u);
+    next_image_info.oat_offset_ = cur_image_info.oat_offset_ + oat_loaded_size;
+  }
+}
+
+void ImageWriter::UpdateOatFileHeader(size_t oat_index, const OatHeader& oat_header) {
+  ImageInfo& cur_image_info = GetImageInfo(oat_index);
+  cur_image_info.oat_checksum_ = oat_header.GetChecksum();
+
+  if (oat_index == GetDefaultOatIndex()) {
+    // Primary oat file, read the trampolines.
+    cur_image_info.oat_address_offsets_[kOatAddressInterpreterToInterpreterBridge] =
+        oat_header.GetInterpreterToInterpreterBridgeOffset();
+    cur_image_info.oat_address_offsets_[kOatAddressInterpreterToCompiledCodeBridge] =
+        oat_header.GetInterpreterToCompiledCodeBridgeOffset();
+    cur_image_info.oat_address_offsets_[kOatAddressJNIDlsymLookup] =
+        oat_header.GetJniDlsymLookupOffset();
+    cur_image_info.oat_address_offsets_[kOatAddressQuickGenericJNITrampoline] =
+        oat_header.GetQuickGenericJniTrampolineOffset();
+    cur_image_info.oat_address_offsets_[kOatAddressQuickIMTConflictTrampoline] =
+        oat_header.GetQuickImtConflictTrampolineOffset();
+    cur_image_info.oat_address_offsets_[kOatAddressQuickResolutionTrampoline] =
+        oat_header.GetQuickResolutionTrampolineOffset();
+    cur_image_info.oat_address_offsets_[kOatAddressQuickToInterpreterBridge] =
+        oat_header.GetQuickToInterpreterBridgeOffset();
+  }
+}
+
+ImageWriter::ImageWriter(
+    const CompilerDriver& compiler_driver,
+    uintptr_t image_begin,
+    bool compile_pic,
+    bool compile_app_image,
+    ImageHeader::StorageMode image_storage_mode,
+    const std::vector<const char*>& oat_filenames,
+    const std::unordered_map<const DexFile*, size_t>& dex_file_oat_index_map)
+    : compiler_driver_(compiler_driver),
+      global_image_begin_(reinterpret_cast<uint8_t*>(image_begin)),
+      image_objects_offset_begin_(0),
+      compile_pic_(compile_pic),
+      compile_app_image_(compile_app_image),
+      target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())),
+      image_infos_(oat_filenames.size()),
+      dirty_methods_(0u),
+      clean_methods_(0u),
+      image_storage_mode_(image_storage_mode),
+      oat_filenames_(oat_filenames),
+      dex_file_oat_index_map_(dex_file_oat_index_map) {
+  CHECK_NE(image_begin, 0U);
+  std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
+  CHECK_EQ(compile_app_image, !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty())
+      << "Compiling a boot image should occur iff there are no boot image spaces loaded";
+}
+
+ImageWriter::ImageInfo::ImageInfo()
+    : intern_table_(new InternTable),
+      class_table_(new ClassTable) {}
+
 }  // namespace art
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index e235bc4..7d13656 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -27,10 +27,13 @@
 #include <ostream>
 
 #include "base/bit_utils.h"
+#include "base/dchecked_vector.h"
+#include "base/enums.h"
+#include "base/length_prefixed_array.h"
 #include "base/macros.h"
 #include "driver/compiler_driver.h"
 #include "gc/space/space.h"
-#include "length_prefixed_array.h"
+#include "image.h"
 #include "lock_word.h"
 #include "mem_map.h"
 #include "oat_file.h"
@@ -40,40 +43,48 @@
 #include "utils.h"
 
 namespace art {
+namespace gc {
+namespace space {
+class ImageSpace;
+}  // namespace space
+}  // namespace gc
+
+class ClassTable;
+
+static constexpr int kInvalidFd = -1;
 
 // Write a Space built during compilation for use during execution.
 class ImageWriter FINAL {
  public:
-  ImageWriter(const CompilerDriver& compiler_driver, uintptr_t image_begin,
-              bool compile_pic)
-      : compiler_driver_(compiler_driver), image_begin_(reinterpret_cast<uint8_t*>(image_begin)),
-        image_end_(0), image_objects_offset_begin_(0), image_roots_address_(0), oat_file_(nullptr),
-        oat_data_begin_(nullptr), interpreter_to_interpreter_bridge_offset_(0),
-        interpreter_to_compiled_code_bridge_offset_(0), jni_dlsym_lookup_offset_(0),
-        quick_generic_jni_trampoline_offset_(0),
-        quick_imt_conflict_trampoline_offset_(0), quick_resolution_trampoline_offset_(0),
-        quick_to_interpreter_bridge_offset_(0), compile_pic_(compile_pic),
-        target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())),
-        bin_slot_sizes_(), bin_slot_offsets_(), bin_slot_count_(),
-        intern_table_bytes_(0u), image_method_array_(ImageHeader::kImageMethodsCount),
-        dirty_methods_(0u), clean_methods_(0u) {
-    CHECK_NE(image_begin, 0U);
-    std::fill(image_methods_, image_methods_ + arraysize(image_methods_), nullptr);
-  }
-
-  ~ImageWriter() {
-  }
+  ImageWriter(const CompilerDriver& compiler_driver,
+              uintptr_t image_begin,
+              bool compile_pic,
+              bool compile_app_image,
+              ImageHeader::StorageMode image_storage_mode,
+              const std::vector<const char*>& oat_filenames,
+              const std::unordered_map<const DexFile*, size_t>& dex_file_oat_index_map);
 
   bool PrepareImageAddressSpace();
 
   bool IsImageAddressSpaceReady() const {
-    return image_roots_address_ != 0u;
+    DCHECK(!image_infos_.empty());
+    for (const ImageInfo& image_info : image_infos_) {
+      if (image_info.image_roots_address_ == 0u) {
+        return false;
+      }
+    }
+    return true;
   }
 
   template <typename T>
   T* GetImageAddress(T* object) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return object == nullptr ? nullptr :
-        reinterpret_cast<T*>(image_begin_ + GetImageOffset(object));
+    if (object == nullptr || IsInBootImage(object)) {
+      return object;
+    } else {
+      size_t oat_index = GetOatIndex(object);
+      const ImageInfo& image_info = GetImageInfo(oat_index);
+      return reinterpret_cast<T*>(image_info.image_begin_ + GetImageOffset(object));
+    }
   }
 
   ArtMethod* GetImageMethodAddress(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -81,22 +92,57 @@
   template <typename PtrType>
   PtrType GetDexCacheArrayElementImageAddress(const DexFile* dex_file, uint32_t offset)
       const SHARED_REQUIRES(Locks::mutator_lock_) {
-    auto it = dex_cache_array_starts_.find(dex_file);
-    DCHECK(it != dex_cache_array_starts_.end());
+    auto oat_it = dex_file_oat_index_map_.find(dex_file);
+    DCHECK(oat_it != dex_file_oat_index_map_.end());
+    const ImageInfo& image_info = GetImageInfo(oat_it->second);
+    auto it = image_info.dex_cache_array_starts_.find(dex_file);
+    DCHECK(it != image_info.dex_cache_array_starts_.end());
     return reinterpret_cast<PtrType>(
-        image_begin_ + bin_slot_offsets_[kBinDexCacheArray] + it->second + offset);
+        image_info.image_begin_ + image_info.bin_slot_offsets_[kBinDexCacheArray] +
+            it->second + offset);
   }
 
-  uint8_t* GetOatFileBegin() const;
+  size_t GetOatFileOffset(size_t oat_index) const {
+    return GetImageInfo(oat_index).oat_offset_;
+  }
 
-  bool Write(const std::string& image_filename, const std::string& oat_filename,
-             const std::string& oat_location)
+  const uint8_t* GetOatFileBegin(size_t oat_index) const {
+    return GetImageInfo(oat_index).oat_file_begin_;
+  }
+
+  // If image_fd is not kInvalidFd, then we use that for the image file. Otherwise we open
+  // the names in image_filenames.
+  // If oat_fd is not kInvalidFd, then we use that for the oat file. Otherwise we open
+  // the names in oat_filenames.
+  bool Write(int image_fd,
+             const std::vector<const char*>& image_filenames,
+             const std::vector<const char*>& oat_filenames)
       REQUIRES(!Locks::mutator_lock_);
 
-  uintptr_t GetOatDataBegin() {
-    return reinterpret_cast<uintptr_t>(oat_data_begin_);
+  uintptr_t GetOatDataBegin(size_t oat_index) {
+    return reinterpret_cast<uintptr_t>(GetImageInfo(oat_index).oat_data_begin_);
   }
 
+  // Get the index of the oat file containing the dex file.
+  //
+  // This "oat_index" is used to retrieve information about the the memory layout
+  // of the oat file and its associated image file, needed for link-time patching
+  // of references to the image or across oat files.
+  size_t GetOatIndexForDexFile(const DexFile* dex_file) const;
+
+  // Get the index of the oat file containing the dex file served by the dex cache.
+  size_t GetOatIndexForDexCache(mirror::DexCache* dex_cache) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Update the oat layout for the given oat file.
+  // This will make the oat_offset for the next oat file valid.
+  void UpdateOatFileLayout(size_t oat_index,
+                           size_t oat_loaded_size,
+                           size_t oat_data_offset,
+                           size_t oat_data_size);
+  // Update information about the oat header, i.e. checksum and trampoline offsets.
+  void UpdateOatFileHeader(size_t oat_index, const OatHeader& oat_header);
+
  private:
   bool AllocMemory();
 
@@ -104,16 +150,17 @@
   void RecordImageAllocations() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Classify different kinds of bins that objects end up getting packed into during image writing.
+  // Ordered from dirtiest to cleanest (until ArtMethods).
   enum Bin {
-    // Likely-clean:
-    kBinString,                        // [String] Almost always immutable (except for obj header).
+    kBinMiscDirty,                // Dex caches, object locks, etc...
+    kBinClassVerified,            // Class verified, but initializers haven't been run
     // Unknown mix of clean/dirty:
     kBinRegular,
-    // Likely-dirty:
+    kBinClassInitialized,         // Class initializers have been run
     // All classes get their own bins since their fields often dirty
     kBinClassInitializedFinalStatics,  // Class initializers have been run, no non-final statics
-    kBinClassInitialized,         // Class initializers have been run
-    kBinClassVerified,            // Class verified, but initializers haven't been run
+    // Likely-clean:
+    kBinString,                        // [String] Almost always immutable (except for obj header).
     // Add more bins here if we add more segregation code.
     // Non mirror fields must be below.
     // ArtFields should be always clean.
@@ -123,6 +170,12 @@
     // ArtMethods may be dirty if the class has native methods or a declaring class that isn't
     // initialized.
     kBinArtMethodDirty,
+    // IMT (clean)
+    kBinImTable,
+    // Conflict tables (clean).
+    kBinIMTConflictTable,
+    // Runtime methods (always clean, do not have a length prefix array).
+    kBinRuntimeMethod,
     // Dex cache arrays have a special slot for PC-relative addressing. Since they are
     // huge, and as such their dirtiness is not important for the clean/dirty separation,
     // we arbitrarily keep them at the end of the native data.
@@ -140,16 +193,31 @@
     kNativeObjectRelocationTypeArtMethodArrayClean,
     kNativeObjectRelocationTypeArtMethodDirty,
     kNativeObjectRelocationTypeArtMethodArrayDirty,
+    kNativeObjectRelocationTypeRuntimeMethod,
+    kNativeObjectRelocationTypeIMTable,
+    kNativeObjectRelocationTypeIMTConflictTable,
     kNativeObjectRelocationTypeDexCacheArray,
   };
   friend std::ostream& operator<<(std::ostream& stream, const NativeObjectRelocationType& type);
 
+  enum OatAddress {
+    kOatAddressInterpreterToInterpreterBridge,
+    kOatAddressInterpreterToCompiledCodeBridge,
+    kOatAddressJNIDlsymLookup,
+    kOatAddressQuickGenericJNITrampoline,
+    kOatAddressQuickIMTConflictTrampoline,
+    kOatAddressQuickResolutionTrampoline,
+    kOatAddressQuickToInterpreterBridge,
+    // Number of elements in the enum.
+    kOatAddressCount,
+  };
+  friend std::ostream& operator<<(std::ostream& stream, const OatAddress& oat_address);
+
   static constexpr size_t kBinBits = MinimumBitsToStore<uint32_t>(kBinMirrorCount - 1);
   // uint32 = typeof(lockword_)
   // Subtract read barrier bits since we want these to remain 0, or else it may result in DCHECK
   // failures due to invalid read barrier bits during object field reads.
-  static const size_t kBinShift = BitSizeOf<uint32_t>() - kBinBits -
-      LockWord::kReadBarrierStateSize;
+  static const size_t kBinShift = BitSizeOf<uint32_t>() - kBinBits - LockWord::kGCStateSize;
   // 111000.....0
   static const size_t kBinMask = ((static_cast<size_t>(1) << kBinBits) - 1) << kBinShift;
 
@@ -175,6 +243,72 @@
     const uint32_t lockword_;
   };
 
+  struct ImageInfo {
+    ImageInfo();
+    ImageInfo(ImageInfo&&) = default;
+
+    // Create the image sections into the out sections variable, returns the size of the image
+    // excluding the bitmap.
+    size_t CreateImageSections(ImageSection* out_sections) const;
+
+    std::unique_ptr<MemMap> image_;  // Memory mapped for generating the image.
+
+    // Target begin of this image. Notes: It is not valid to write here, this is the address
+    // of the target image, not necessarily where image_ is mapped. The address is only valid
+    // after layouting (otherwise null).
+    uint8_t* image_begin_ = nullptr;
+
+    // Offset to the free space in image_, initially size of image header.
+    size_t image_end_ = RoundUp(sizeof(ImageHeader), kObjectAlignment);
+    uint32_t image_roots_address_ = 0;  // The image roots address in the image.
+    size_t image_offset_ = 0;  // Offset of this image from the start of the first image.
+
+    // Image size is the *address space* covered by this image. As the live bitmap is aligned
+    // to the page size, the live bitmap will cover more address space than necessary. But live
+    // bitmaps may not overlap, so an image has a "shadow," which is accounted for in the size.
+    // The next image may only start at image_begin_ + image_size_ (which is guaranteed to be
+    // page-aligned).
+    size_t image_size_ = 0;
+
+    // Oat data.
+    // Offset of the oat file for this image from start of oat files. This is
+    // valid when the previous oat file has been written.
+    size_t oat_offset_ = 0;
+    // Layout of the loaded ELF file containing the oat file, valid after UpdateOatFileLayout().
+    const uint8_t* oat_file_begin_ = nullptr;
+    size_t oat_loaded_size_ = 0;
+    const uint8_t* oat_data_begin_ = nullptr;
+    size_t oat_size_ = 0;  // Size of the corresponding oat data.
+    // The oat header checksum, valid after UpdateOatFileHeader().
+    uint32_t oat_checksum_ = 0u;
+
+    // Image bitmap which lets us know where the objects inside of the image reside.
+    std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_;
+
+    // The start offsets of the dex cache arrays.
+    SafeMap<const DexFile*, size_t> dex_cache_array_starts_;
+
+    // Offset from oat_data_begin_ to the stubs.
+    uint32_t oat_address_offsets_[kOatAddressCount] = {};
+
+    // Bin slot tracking for dirty object packing.
+    size_t bin_slot_sizes_[kBinSize] = {};  // Number of bytes in a bin.
+    size_t bin_slot_offsets_[kBinSize] = {};  // Number of bytes in previous bins.
+    size_t bin_slot_count_[kBinSize] = {};  // Number of objects in a bin.
+
+    // Cached size of the intern table for when we allocate memory.
+    size_t intern_table_bytes_ = 0;
+
+    // Number of image class table bytes.
+    size_t class_table_bytes_ = 0;
+
+    // Intern table associated with this image for serialization.
+    std::unique_ptr<InternTable> intern_table_;
+
+    // Class table associated with this image for serialization.
+    std::unique_ptr<ClassTable> class_table_;
+  };
+
   // We use the lock word to store the offset of the object in the image.
   void AssignImageOffset(mirror::Object* object, BinSlot bin_slot)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -194,7 +328,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   BinSlot GetImageBinSlot(mirror::Object* object) const SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void AddDexCacheArrayRelocation(void* array, size_t offset) SHARED_REQUIRES(Locks::mutator_lock_);
+  void AddDexCacheArrayRelocation(void* array, size_t offset, mirror::DexCache* dex_cache)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void AddMethodPointerArray(mirror::PointerArray* arr) SHARED_REQUIRES(Locks::mutator_lock_);
 
   static void* GetImageAddressCallback(void* writer, mirror::Object* obj)
@@ -205,20 +340,25 @@
   mirror::Object* GetLocalAddress(mirror::Object* object) const
       SHARED_REQUIRES(Locks::mutator_lock_) {
     size_t offset = GetImageOffset(object);
-    uint8_t* dst = image_->Begin() + offset;
+    size_t oat_index = GetOatIndex(object);
+    const ImageInfo& image_info = GetImageInfo(oat_index);
+    uint8_t* dst = image_info.image_->Begin() + offset;
     return reinterpret_cast<mirror::Object*>(dst);
   }
 
-  const uint8_t* GetOatAddress(uint32_t offset) const {
+  // Returns the address in the boot image if we are compiling the app image.
+  const uint8_t* GetOatAddress(OatAddress type) const;
+
+  const uint8_t* GetOatAddressForOffset(uint32_t offset, const ImageInfo& image_info) const {
     // With Quick, code is within the OatFile, as there are all in one
-    // .o ELF object.
-    DCHECK_LE(offset, oat_file_->Size());
-    DCHECK(oat_data_begin_ != nullptr);
-    return offset == 0u ? nullptr : oat_data_begin_ + offset;
+    // .o ELF object. But interpret it as signed.
+    DCHECK_LE(static_cast<int32_t>(offset), static_cast<int32_t>(image_info.oat_size_));
+    DCHECK(image_info.oat_data_begin_ != nullptr);
+    return offset == 0u ? nullptr : image_info.oat_data_begin_ + static_cast<int32_t>(offset);
   }
 
   // Returns true if the class was in the original requested image classes list.
-  bool IsImageClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+  bool KeepClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Debug aid that list of requested image classes.
   void DumpImageClasses();
@@ -238,9 +378,9 @@
   // Lays out where the image objects will be at runtime.
   void CalculateNewObjectOffsets()
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void CreateHeader(size_t oat_loaded_size, size_t oat_data_offset)
+  void CreateHeader(size_t oat_index)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  mirror::ObjectArray<mirror::Object>* CreateImageRoots() const
+  mirror::ObjectArray<mirror::Object>* CreateImageRoots(size_t oat_index) const
       SHARED_REQUIRES(Locks::mutator_lock_);
   void CalculateObjectBinSlots(mirror::Object* obj)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -257,12 +397,15 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Creates the contiguous image in memory and adjusts pointers.
-  void CopyAndFixupNativeData() SHARED_REQUIRES(Locks::mutator_lock_);
+  void CopyAndFixupNativeData(size_t oat_index) SHARED_REQUIRES(Locks::mutator_lock_);
   void CopyAndFixupObjects() SHARED_REQUIRES(Locks::mutator_lock_);
   static void CopyAndFixupObjectsCallback(mirror::Object* obj, void* arg)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void CopyAndFixupObject(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
-  void CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy)
+  void CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy, const ImageInfo& image_info)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  void CopyAndFixupImTable(ImTable* orig, ImTable* copy) SHARED_REQUIRES(Locks::mutator_lock_);
+  void CopyAndFixupImtConflictTable(ImtConflictTable* orig, ImtConflictTable* copy)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void FixupClass(mirror::Class* orig, mirror::Class* copy)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -270,121 +413,167 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   void FixupDexCache(mirror::DexCache* orig_dex_cache, mirror::DexCache* copy_dex_cache)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void FixupPointerArray(mirror::Object* dst, mirror::PointerArray* arr, mirror::Class* klass,
-                         Bin array_type) SHARED_REQUIRES(Locks::mutator_lock_);
+  void FixupPointerArray(mirror::Object* dst,
+                         mirror::PointerArray* arr,
+                         mirror::Class* klass,
+                         Bin array_type)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Get quick code for non-resolution/imt_conflict/abstract method.
-  const uint8_t* GetQuickCode(ArtMethod* method, bool* quick_is_interpreted)
+  const uint8_t* GetQuickCode(ArtMethod* method,
+                              const ImageInfo& image_info,
+                              bool* quick_is_interpreted)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  const uint8_t* GetQuickEntryPoint(ArtMethod* method)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Patches references in OatFile to expect runtime addresses.
-  void SetOatChecksumFromElfFile(File* elf_file);
-
   // Calculate the sum total of the bin slot sizes in [0, up_to). Defaults to all bins.
-  size_t GetBinSizeSum(Bin up_to = kBinSize) const;
+  size_t GetBinSizeSum(ImageInfo& image_info, Bin up_to = kBinSize) const;
 
   // Return true if a method is likely to be dirtied at runtime.
   bool WillMethodBeDirty(ArtMethod* m) const SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Assign the offset for an ArtMethod.
-  void AssignMethodOffset(ArtMethod* method, NativeObjectRelocationType type)
+  void AssignMethodOffset(ArtMethod* method,
+                          NativeObjectRelocationType type,
+                          size_t oat_index)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void TryAssignImTableOffset(ImTable* imt, size_t oat_index) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Assign the offset for an IMT conflict table. Does nothing if the table already has a native
+  // relocation.
+  void TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Return true if klass is loaded by the boot class loader but not in the boot image.
+  bool IsBootClassLoaderNonImageClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Return true if klass depends on a boot class loader non image class. We want to prune these
+  // classes since we do not want any boot class loader classes in the image. This means that
+  // we also cannot have any classes which refer to these boot class loader non image classes.
+  // PruneAppImageClass also prunes if klass depends on a non-image class according to the compiler
+  // driver.
+  bool PruneAppImageClass(mirror::Class* klass)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // early_exit is true if we had a cyclic dependency anywhere down the chain.
+  bool PruneAppImageClassInternal(mirror::Class* klass,
+                                  bool* early_exit,
+                                  std::unordered_set<mirror::Class*>* visited)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static Bin BinTypeForNativeRelocationType(NativeObjectRelocationType type);
 
-  uintptr_t NativeOffsetInImage(void* obj);
+  uintptr_t NativeOffsetInImage(void* obj) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Location of where the object will be when the image is loaded at runtime.
   template <typename T>
-  T* NativeLocationInImage(T* obj);
+  T* NativeLocationInImage(T* obj) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Location of where the temporary copy of the object currently is.
+  template <typename T>
+  T* NativeCopyLocation(T* obj, mirror::DexCache* dex_cache) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Return true of obj is inside of the boot image space. This may only return true if we are
+  // compiling an app image.
+  bool IsInBootImage(const void* obj) const;
+
+  // Return true if ptr is within the boot oat file.
+  bool IsInBootOatFile(const void* ptr) const;
+
+  // Get the index of the oat file associated with the object.
+  size_t GetOatIndex(mirror::Object* object) const SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // The oat index for shared data in multi-image and all data in single-image compilation.
+  size_t GetDefaultOatIndex() const {
+    return 0u;
+  }
+
+  ImageInfo& GetImageInfo(size_t oat_index) {
+    return image_infos_[oat_index];
+  }
+
+  const ImageInfo& GetImageInfo(size_t oat_index) const {
+    return image_infos_[oat_index];
+  }
+
+  // Find an already strong interned string in the other images or in the boot image. Used to
+  // remove duplicates in the multi image and app image case.
+  mirror::String* FindInternedString(mirror::String* string) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Return true if there already exists a native allocation for an object.
+  bool NativeRelocationAssigned(void* ptr) const;
 
   const CompilerDriver& compiler_driver_;
 
-  // Beginning target image address for the output image.
-  uint8_t* image_begin_;
-
-  // Offset to the free space in image_.
-  size_t image_end_;
+  // Beginning target image address for the first image.
+  uint8_t* global_image_begin_;
 
   // Offset from image_begin_ to where the first object is in image_.
   size_t image_objects_offset_begin_;
 
-  // The image roots address in the image.
-  uint32_t image_roots_address_;
-
-  // oat file with code for this image
-  OatFile* oat_file_;
-
-  // Memory mapped for generating the image.
-  std::unique_ptr<MemMap> image_;
-
   // Pointer arrays that need to be updated. Since these are only some int and long arrays, we need
   // to keep track. These include vtable arrays, iftable arrays, and dex caches.
   std::unordered_map<mirror::PointerArray*, Bin> pointer_arrays_;
 
-  // The start offsets of the dex cache arrays.
-  SafeMap<const DexFile*, size_t> dex_cache_array_starts_;
-
   // Saved hash codes. We use these to restore lockwords which were temporarily used to have
   // forwarding addresses as well as copying over hash codes.
   std::unordered_map<mirror::Object*, uint32_t> saved_hashcode_map_;
 
-  // Beginning target oat address for the pointers from the output image to its oat file.
-  const uint8_t* oat_data_begin_;
-
-  // Image bitmap which lets us know where the objects inside of the image reside.
-  std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_;
-
-  // Offset from oat_data_begin_ to the stubs.
-  uint32_t interpreter_to_interpreter_bridge_offset_;
-  uint32_t interpreter_to_compiled_code_bridge_offset_;
-  uint32_t jni_dlsym_lookup_offset_;
-  uint32_t quick_generic_jni_trampoline_offset_;
-  uint32_t quick_imt_conflict_trampoline_offset_;
-  uint32_t quick_resolution_trampoline_offset_;
-  uint32_t quick_to_interpreter_bridge_offset_;
+  // Boolean flags.
   const bool compile_pic_;
+  const bool compile_app_image_;
 
   // Size of pointers on the target architecture.
-  size_t target_ptr_size_;
+  PointerSize target_ptr_size_;
 
-  // Bin slot tracking for dirty object packing
-  size_t bin_slot_sizes_[kBinSize];  // Number of bytes in a bin
-  size_t bin_slot_offsets_[kBinSize];  // Number of bytes in previous bins.
-  size_t bin_slot_count_[kBinSize];  // Number of objects in a bin
-
-  // Cached size of the intern table for when we allocate memory.
-  size_t intern_table_bytes_;
+  // Image data indexed by the oat file index.
+  dchecked_vector<ImageInfo> image_infos_;
 
   // ArtField, ArtMethod relocating map. These are allocated as array of structs but we want to
   // have one entry per art field for convenience. ArtFields are placed right after the end of the
   // image objects (aka sum of bin_slot_sizes_). ArtMethods are placed right after the ArtFields.
   struct NativeObjectRelocation {
+    size_t oat_index;
     uintptr_t offset;
     NativeObjectRelocationType type;
 
     bool IsArtMethodRelocation() const {
       return type == kNativeObjectRelocationTypeArtMethodClean ||
-          type == kNativeObjectRelocationTypeArtMethodDirty;
+          type == kNativeObjectRelocationTypeArtMethodDirty ||
+          type == kNativeObjectRelocationTypeRuntimeMethod;
     }
   };
   std::unordered_map<void*, NativeObjectRelocation> native_object_relocations_;
 
   // Runtime ArtMethods which aren't reachable from any Class but need to be copied into the image.
   ArtMethod* image_methods_[ImageHeader::kImageMethodsCount];
-  // Fake length prefixed array for image methods. This array does not contain the actual
-  // ArtMethods. We only use it for the header and relocation addresses.
-  LengthPrefixedArray<ArtMethod> image_method_array_;
 
   // Counters for measurements, used for logging only.
   uint64_t dirty_methods_;
   uint64_t clean_methods_;
 
+  // Prune class memoization table to speed up ContainsBootClassLoaderNonImageClass.
+  std::unordered_map<mirror::Class*, bool> prune_class_memo_;
+
+  // Class loaders with a class table to write out. There should only be one class loader because
+  // dex2oat loads the dex files to be compiled into a single class loader. For the boot image,
+  // null is a valid entry.
+  std::unordered_set<mirror::ClassLoader*> class_loaders_;
+
+  // Which mode the image is stored as, see image.h
+  const ImageHeader::StorageMode image_storage_mode_;
+
+  // The file names of oat files.
+  const std::vector<const char*>& oat_filenames_;
+
+  // Map of dex files to the indexes of oat files that they were compiled into.
+  const std::unordered_map<const DexFile*, size_t>& dex_file_oat_index_map_;
+
+  friend class ContainsBootClassLoaderNonImageClassVisitor;
   friend class FixupClassVisitor;
   friend class FixupRootVisitor;
   friend class FixupVisitor;
+  friend class NativeLocationVisitor;
   friend class NonImageClassesVisitor;
   DISALLOW_COPY_AND_ASSIGN(ImageWriter);
 };
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 3d1b42f..6f6a8f5 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -16,24 +16,24 @@
 
 #include "jit_compiler.h"
 
-#include "art_method-inl.h"
 #include "arch/instruction_set.h"
 #include "arch/instruction_set_features.h"
+#include "art_method-inl.h"
 #include "base/stringpiece.h"
 #include "base/time_utils.h"
 #include "base/timing_logger.h"
-#include "compiler_callbacks.h"
-#include "dex/pass_manager.h"
-#include "dex/quick_compiler_callbacks.h"
+#include "base/unix_file/fd_file.h"
+#include "debug/elf_debug_writer.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "jit/debugger_interface.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "oat_file-inl.h"
 #include "oat_quick_method_header.h"
 #include "object_lock.h"
+#include "optimizing/register_allocator.h"
 #include "thread_list.h"
-#include "verifier/method_verifier-inl.h"
 
 namespace art {
 namespace jit {
@@ -42,11 +42,11 @@
   return new JitCompiler();
 }
 
-extern "C" void* jit_load(CompilerCallbacks** callbacks) {
+extern "C" void* jit_load(bool* generate_debug_info) {
   VLOG(jit) << "loading jit compiler";
   auto* const jit_compiler = JitCompiler::Create();
   CHECK(jit_compiler != nullptr);
-  *callbacks = jit_compiler->GetCompilerCallbacks();
+  *generate_debug_info = jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo();
   VLOG(jit) << "Done loading jit compiler";
   return jit_compiler;
 }
@@ -56,18 +56,40 @@
   delete reinterpret_cast<JitCompiler*>(handle);
 }
 
-extern "C" bool jit_compile_method(void* handle, ArtMethod* method, Thread* self)
+extern "C" bool jit_compile_method(
+    void* handle, ArtMethod* method, Thread* self, bool osr)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   auto* jit_compiler = reinterpret_cast<JitCompiler*>(handle);
   DCHECK(jit_compiler != nullptr);
-  return jit_compiler->CompileMethod(self, method);
+  return jit_compiler->CompileMethod(self, method, osr);
 }
 
-JitCompiler::JitCompiler() : total_time_(0) {
-  auto* pass_manager_options = new PassManagerOptions;
-  pass_manager_options->SetDisablePassList("GVN,DCE,GVNCleanup");
+extern "C" void jit_types_loaded(void* handle, mirror::Class** types, size_t count)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  auto* jit_compiler = reinterpret_cast<JitCompiler*>(handle);
+  DCHECK(jit_compiler != nullptr);
+  if (jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo()) {
+    const ArrayRef<mirror::Class*> types_array(types, count);
+    std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForClasses(
+        kRuntimeISA, jit_compiler->GetCompilerDriver()->GetInstructionSetFeatures(), types_array);
+    CreateJITCodeEntry(std::move(elf_file));
+  }
+}
+
+// Callers of this method assume it has NO_RETURN.
+NO_RETURN static void Usage(const char* fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  std::string error;
+  StringAppendV(&error, fmt, ap);
+  LOG(FATAL) << error;
+  va_end(ap);
+  exit(EXIT_FAILURE);
+}
+
+JitCompiler::JitCompiler() {
   compiler_options_.reset(new CompilerOptions(
-      CompilerOptions::kDefaultCompilerFilter,
+      CompilerFilter::kDefaultCompilerFilter,
       CompilerOptions::kDefaultHugeMethodThreshold,
       CompilerOptions::kDefaultLargeMethodThreshold,
       CompilerOptions::kDefaultSmallMethodThreshold,
@@ -75,6 +97,7 @@
       CompilerOptions::kDefaultNumDexMethodsThreshold,
       CompilerOptions::kDefaultInlineDepthLimit,
       CompilerOptions::kDefaultInlineMaxCodeUnits,
+      /* no_inline_from */ nullptr,
       /* include_patch_information */ false,
       CompilerOptions::kDefaultTopKProfileThreshold,
       Runtime::Current()->IsDebuggable(),
@@ -84,9 +107,16 @@
       /* implicit_suspend_checks */ false,
       /* pic */ true,  // TODO: Support non-PIC in optimizing.
       /* verbose_methods */ nullptr,
-      pass_manager_options,
       /* init_failure_output */ nullptr,
-      /* abort_on_hard_verifier_failure */ false));
+      /* abort_on_hard_verifier_failure */ false,
+      /* dump_cfg_file_name */ "",
+      /* dump_cfg_append */ false,
+      /* force_determinism */ false,
+      RegisterAllocator::kRegisterAllocatorDefault,
+      /* passes_to_run */ nullptr));
+  for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) {
+    compiler_options_->ParseCompilerOption(argument, Usage);
+  }
   const InstructionSet instruction_set = kRuntimeISA;
   for (const StringPiece option : Runtime::Current()->GetCompilerOptions()) {
     VLOG(compiler) << "JIT compiler option " << option;
@@ -120,56 +150,65 @@
     instruction_set_features_.reset(InstructionSetFeatures::FromCppDefines());
   }
   cumulative_logger_.reset(new CumulativeLogger("jit times"));
-  verification_results_.reset(new VerificationResults(compiler_options_.get()));
   method_inliner_map_.reset(new DexFileToMethodInlinerMap);
-  callbacks_.reset(new QuickCompilerCallbacks(verification_results_.get(),
-                                              method_inliner_map_.get(),
-                                              CompilerCallbacks::CallbackMode::kCompileApp));
   compiler_driver_.reset(new CompilerDriver(
       compiler_options_.get(),
-      verification_results_.get(),
+      /* verification_results */ nullptr,
       method_inliner_map_.get(),
       Compiler::kOptimizing,
       instruction_set,
       instruction_set_features_.get(),
-      /* image */ false,
+      /* boot_image */ false,
+      /* app_image */ false,
       /* image_classes */ nullptr,
       /* compiled_classes */ nullptr,
       /* compiled_methods */ nullptr,
       /* thread_count */ 1,
       /* dump_stats */ false,
       /* dump_passes */ false,
-      /* dump_cfg_file_name */ "",
-      /* dump_cfg_append */ false,
       cumulative_logger_.get(),
       /* swap_fd */ -1,
-      /* profile_file */ ""));
+      /* profile_compilation_info */ nullptr));
   // Disable dedupe so we can remove compiled methods.
   compiler_driver_->SetDedupeEnabled(false);
   compiler_driver_->SetSupportBootImageFixup(false);
+
+  size_t thread_count = compiler_driver_->GetThreadCount();
+  if (compiler_options_->GetGenerateDebugInfo()) {
+#ifdef ART_TARGET_ANDROID
+    const char* prefix = "/data/misc/trace";
+#else
+    const char* prefix = "/tmp";
+#endif
+    DCHECK_EQ(thread_count, 1u)
+        << "Generating debug info only works with one compiler thread";
+    std::string perf_filename = std::string(prefix) + "/perf-" + std::to_string(getpid()) + ".map";
+    perf_file_.reset(OS::CreateEmptyFileWriteOnly(perf_filename.c_str()));
+    if (perf_file_ == nullptr) {
+      LOG(ERROR) << "Could not create perf file at " << perf_filename <<
+                    " Are you on a user build? Perf only works on userdebug/eng builds";
+    }
+  }
+
+  size_t inline_depth_limit = compiler_driver_->GetCompilerOptions().GetInlineDepthLimit();
+  DCHECK_LT(thread_count * inline_depth_limit, std::numeric_limits<uint16_t>::max())
+      << "ProfilingInfo's inline counter can potentially overflow";
 }
 
 JitCompiler::~JitCompiler() {
+  if (perf_file_ != nullptr) {
+    UNUSED(perf_file_->Flush());
+    UNUSED(perf_file_->Close());
+  }
 }
 
-bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) {
+bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method, bool osr) {
+  DCHECK(!method->IsProxyMethod());
   TimingLogger logger("JIT compiler timing logger", true, VLOG_IS_ON(jit));
-  const uint64_t start_time = NanoTime();
   StackHandleScope<2> hs(self);
   self->AssertNoPendingException();
   Runtime* runtime = Runtime::Current();
 
-  // Check if the method is already compiled.
-  if (runtime->GetJit()->GetCodeCache()->ContainsMethod(method)) {
-    VLOG(jit) << "Already compiled " << PrettyMethod(method);
-    return true;
-  }
-
-  // Don't compile the method if we are supposed to be deoptimized.
-  if (runtime->GetInstrumentation()->AreAllMethodsDeoptimized()) {
-    return false;
-  }
-
   // Ensure the class is initialized.
   Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
   if (!runtime->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
@@ -178,127 +217,36 @@
   }
 
   // Do the compilation.
-  CompiledMethod* compiled_method = nullptr;
+  bool success = false;
   {
     TimingLogger::ScopedTiming t2("Compiling", &logger);
-    compiled_method = compiler_driver_->CompileArtMethod(self, method);
+    JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
+    success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method, osr);
+    if (success && (perf_file_ != nullptr)) {
+      const void* ptr = method->GetEntryPointFromQuickCompiledCode();
+      std::ostringstream stream;
+      stream << std::hex
+             << reinterpret_cast<uintptr_t>(ptr)
+             << " "
+             << code_cache->GetMemorySizeOfCodePointer(ptr)
+             << " "
+             << PrettyMethod(method)
+             << std::endl;
+      std::string str = stream.str();
+      bool res = perf_file_->WriteFully(str.c_str(), str.size());
+      CHECK(res);
+    }
   }
 
   // Trim maps to reduce memory usage.
-  // TODO: measure how much this increases compile time.
+  // TODO: move this to an idle phase.
   {
     TimingLogger::ScopedTiming t2("TrimMaps", &logger);
-    runtime->GetArenaPool()->TrimMaps();
+    runtime->GetJitArenaPool()->TrimMaps();
   }
 
-  // Check if we failed compiling.
-  if (compiled_method == nullptr) {
-    return false;
-  }
-
-  total_time_ += NanoTime() - start_time;
-  bool result = false;
-  const void* code = runtime->GetClassLinker()->GetOatMethodQuickCodeFor(method);
-
-  if (code != nullptr) {
-    // Already have some compiled code, just use this instead of linking.
-    // TODO: Fix recompilation.
-    method->SetEntryPointFromQuickCompiledCode(code);
-    result = true;
-  } else {
-    TimingLogger::ScopedTiming t2("LinkCode", &logger);
-    OatFile::OatMethod oat_method(nullptr, 0);
-    if (AddToCodeCache(method, compiled_method, &oat_method)) {
-      oat_method.LinkMethod(method);
-      CHECK(runtime->GetJit()->GetCodeCache()->ContainsMethod(method)) << PrettyMethod(method);
-      result = true;
-    }
-  }
-
-  // Remove the compiled method to save memory.
-  compiler_driver_->RemoveCompiledMethod(
-      MethodReference(h_class->GetDexCache()->GetDexFile(), method->GetDexMethodIndex()));
   runtime->GetJit()->AddTimingLogger(logger);
-  return result;
-}
-
-CompilerCallbacks* JitCompiler::GetCompilerCallbacks() const {
-  return callbacks_.get();
-}
-
-bool JitCompiler::AddToCodeCache(ArtMethod* method,
-                                 const CompiledMethod* compiled_method,
-                                 OatFile::OatMethod* out_method) {
-  Runtime* runtime = Runtime::Current();
-  JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
-  const auto* quick_code = compiled_method->GetQuickCode();
-  if (quick_code == nullptr) {
-    return false;
-  }
-  const auto code_size = quick_code->size();
-  Thread* const self = Thread::Current();
-  auto* const mapping_table = compiled_method->GetMappingTable();
-  auto* const vmap_table = compiled_method->GetVmapTable();
-  auto* const gc_map = compiled_method->GetGcMap();
-  uint8_t* mapping_table_ptr = nullptr;
-  uint8_t* vmap_table_ptr = nullptr;
-  uint8_t* gc_map_ptr = nullptr;
-
-  if (mapping_table != nullptr) {
-    // Write out pre-header stuff.
-    mapping_table_ptr = code_cache->AddDataArray(
-        self, mapping_table->data(), mapping_table->data() + mapping_table->size());
-    if (mapping_table_ptr == nullptr) {
-      return false;  // Out of data cache.
-    }
-  }
-
-  if (vmap_table != nullptr) {
-    vmap_table_ptr = code_cache->AddDataArray(
-        self, vmap_table->data(), vmap_table->data() + vmap_table->size());
-    if (vmap_table_ptr == nullptr) {
-      return false;  // Out of data cache.
-    }
-  }
-
-  if (gc_map != nullptr) {
-    gc_map_ptr = code_cache->AddDataArray(
-        self, gc_map->data(), gc_map->data() + gc_map->size());
-    if (gc_map_ptr == nullptr) {
-      return false;  // Out of data cache.
-    }
-  }
-
-  uint8_t* const code = code_cache->CommitCode(self,
-                                               mapping_table_ptr,
-                                               vmap_table_ptr,
-                                               gc_map_ptr,
-                                               compiled_method->GetFrameSizeInBytes(),
-                                               compiled_method->GetCoreSpillMask(),
-                                               compiled_method->GetFpSpillMask(),
-                                               compiled_method->GetQuickCode()->data(),
-                                               compiled_method->GetQuickCode()->size());
-
-  if (code == nullptr) {
-    return false;
-  }
-
-  const size_t thumb_offset = compiled_method->CodeDelta();
-  const uint32_t code_offset = sizeof(OatQuickMethodHeader) + thumb_offset;
-  *out_method = OatFile::OatMethod(code, code_offset);
-  DCHECK_EQ(out_method->GetGcMap(), gc_map_ptr);
-  DCHECK_EQ(out_method->GetMappingTable(), mapping_table_ptr);
-  DCHECK_EQ(out_method->GetVmapTable(), vmap_table_ptr);
-  DCHECK_EQ(out_method->GetFrameSizeInBytes(), compiled_method->GetFrameSizeInBytes());
-  DCHECK_EQ(out_method->GetCoreSpillMask(), compiled_method->GetCoreSpillMask());
-  DCHECK_EQ(out_method->GetFpSpillMask(), compiled_method->GetFpSpillMask());
-  VLOG(jit)
-      << "JIT added "
-      << PrettyMethod(method) << "@" << method
-      << " ccache_size=" << PrettySize(code_cache->CodeCacheSize()) << ": "
-      << reinterpret_cast<void*>(code + code_offset)
-      << "," << reinterpret_cast<void*>(code + code_offset + code_size);
-  return true;
+  return success;
 }
 
 }  // namespace jit
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index 757f3f3..533dccf 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -18,13 +18,10 @@
 #define ART_COMPILER_JIT_JIT_COMPILER_H_
 
 #include "base/mutex.h"
-#include "compiler_callbacks.h"
 #include "compiled_method.h"
-#include "dex/verification_results.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
-#include "oat_file.h"
 
 namespace art {
 
@@ -37,30 +34,32 @@
  public:
   static JitCompiler* Create();
   virtual ~JitCompiler();
-  bool CompileMethod(Thread* self, ArtMethod* method)
+
+  // Compilation entrypoint. Returns whether the compilation succeeded.
+  bool CompileMethod(Thread* self, ArtMethod* method, bool osr)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  CompilerCallbacks* GetCompilerCallbacks() const;
-  size_t GetTotalCompileTime() const {
-    return total_time_;
+
+  CompilerOptions* GetCompilerOptions() const {
+    return compiler_options_.get();
+  }
+  CompilerDriver* GetCompilerDriver() const {
+    return compiler_driver_.get();
   }
 
  private:
-  uint64_t total_time_;
   std::unique_ptr<CompilerOptions> compiler_options_;
   std::unique_ptr<CumulativeLogger> cumulative_logger_;
-  std::unique_ptr<VerificationResults> verification_results_;
   std::unique_ptr<DexFileToMethodInlinerMap> method_inliner_map_;
-  std::unique_ptr<CompilerCallbacks> callbacks_;
   std::unique_ptr<CompilerDriver> compiler_driver_;
   std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
+  std::unique_ptr<File> perf_file_;
 
   JitCompiler();
 
   // This is in the compiler since the runtime doesn't have access to the compiled method
   // structures.
-  bool AddToCodeCache(ArtMethod* method,
-                      const CompiledMethod* compiled_method,
-                      OatFile::OatMethod* out_method) SHARED_REQUIRES(Locks::mutator_lock_);
+  bool AddToCodeCache(ArtMethod* method, const CompiledMethod* compiled_method)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   DISALLOW_COPY_AND_ASSIGN(JitCompiler);
 };
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 0bfe8a2..4b056f5 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -18,39 +18,62 @@
 #include <vector>
 
 #include "arch/instruction_set.h"
+#include "base/arena_allocator.h"
+#include "base/enums.h"
 #include "cfi_test.h"
 #include "gtest/gtest.h"
 #include "jni/quick/calling_convention.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 
 #include "jni/jni_cfi_test_expected.inc"
 
 namespace art {
 
 // Run the tests only on host.
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 
 class JNICFITest : public CFITest {
  public:
   // Enable this flag to generate the expected outputs.
   static constexpr bool kGenerateExpected = false;
 
-  void TestImpl(InstructionSet isa, const char* isa_str,
+  void TestImpl(InstructionSet isa,
+                const char* isa_str,
                 const std::vector<uint8_t>& expected_asm,
                 const std::vector<uint8_t>& expected_cfi) {
+    if (Is64BitInstructionSet(isa)) {
+      TestImplSized<PointerSize::k64>(isa, isa_str, expected_asm, expected_cfi);
+    } else {
+      TestImplSized<PointerSize::k32>(isa, isa_str, expected_asm, expected_cfi);
+    }
+  }
+
+ private:
+  template <PointerSize kPointerSize>
+  void TestImplSized(InstructionSet isa,
+                     const char* isa_str,
+                     const std::vector<uint8_t>& expected_asm,
+                     const std::vector<uint8_t>& expected_cfi) {
     // Description of simple method.
     const bool is_static = true;
     const bool is_synchronized = false;
     const char* shorty = "IIFII";
+
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
     std::unique_ptr<JniCallingConvention> jni_conv(
-        JniCallingConvention::Create(is_static, is_synchronized, shorty, isa));
+        JniCallingConvention::Create(&arena, is_static, is_synchronized, shorty, isa));
     std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
-        ManagedRuntimeCallingConvention::Create(is_static, is_synchronized, shorty, isa));
+        ManagedRuntimeCallingConvention::Create(&arena, is_static, is_synchronized, shorty, isa));
     const int frame_size(jni_conv->FrameSize());
-    const std::vector<ManagedRegister>& callee_save_regs = jni_conv->CalleeSaveRegisters();
+    ArrayRef<const ManagedRegister> callee_save_regs = jni_conv->CalleeSaveRegisters();
 
     // Assemble the method.
-    std::unique_ptr<Assembler> jni_asm(Assembler::Create(isa));
+    std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm(
+        JNIMacroAssembler<kPointerSize>::Create(&arena, isa));
+    jni_asm->cfi().SetEnabled(true);
     jni_asm->BuildFrame(frame_size, mr_conv->MethodRegister(),
                         callee_save_regs, mr_conv->EntrySpills());
     jni_asm->IncreaseFrameSize(32);
@@ -81,13 +104,25 @@
     TestImpl(isa, #isa, expected_asm, expected_cfi); \
   }
 
+#ifdef ART_ENABLE_CODEGEN_arm
 TEST_ISA(kThumb2)
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
 TEST_ISA(kArm64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
 TEST_ISA(kX86)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
 TEST_ISA(kX86_64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
 TEST_ISA(kMips)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
 TEST_ISA(kMips64)
+#endif
 
-#endif  // __ANDROID__
+#endif  // ART_TARGET_ANDROID
 
 }  // namespace art
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc
index 16b4386..da72c75 100644
--- a/compiler/jni/jni_cfi_test_expected.inc
+++ b/compiler/jni/jni_cfi_test_expected.inc
@@ -1,8 +1,7 @@
 static constexpr uint8_t expected_asm_kThumb2[] = {
     0x2D, 0xE9, 0xE0, 0x4D, 0x2D, 0xED, 0x10, 0x8A, 0x89, 0xB0, 0x00, 0x90,
-    0xCD, 0xF8, 0x84, 0x10, 0x8D, 0xED, 0x22, 0x0A, 0xCD, 0xF8, 0x8C, 0x20,
-    0xCD, 0xF8, 0x90, 0x30, 0x88, 0xB0, 0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC,
-    0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x8D,
+    0x21, 0x91, 0x8D, 0xED, 0x22, 0x0A, 0x23, 0x92, 0x24, 0x93, 0x88, 0xB0,
+    0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC, 0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x8D,
 };
 static constexpr uint8_t expected_cfi_kThumb2[] = {
     0x44, 0x0E, 0x1C, 0x85, 0x07, 0x86, 0x06, 0x87, 0x05, 0x88, 0x04, 0x8A,
@@ -11,7 +10,7 @@
     0x55, 0x12, 0x05, 0x56, 0x11, 0x05, 0x57, 0x10, 0x05, 0x58, 0x0F, 0x05,
     0x59, 0x0E, 0x05, 0x5A, 0x0D, 0x05, 0x5B, 0x0C, 0x05, 0x5C, 0x0B, 0x05,
     0x5D, 0x0A, 0x05, 0x5E, 0x09, 0x05, 0x5F, 0x08, 0x42, 0x0E, 0x80, 0x01,
-    0x54, 0x0E, 0xA0, 0x01, 0x42, 0x0E, 0x80, 0x01, 0x0A, 0x42, 0x0E, 0x5C,
+    0x4E, 0x0E, 0xA0, 0x01, 0x42, 0x0E, 0x80, 0x01, 0x0A, 0x42, 0x0E, 0x5C,
     0x44, 0x0E, 0x1C, 0x06, 0x50, 0x06, 0x51, 0x06, 0x52, 0x06, 0x53, 0x06,
     0x54, 0x06, 0x55, 0x06, 0x56, 0x06, 0x57, 0x06, 0x58, 0x06, 0x59, 0x06,
     0x5A, 0x06, 0x5B, 0x06, 0x5C, 0x06, 0x5D, 0x06, 0x5E, 0x06, 0x5F, 0x44,
@@ -47,38 +46,38 @@
 // 0x00000008: sub sp, sp, #36
 // 0x0000000a: .cfi_def_cfa_offset: 128
 // 0x0000000a: str r0, [sp, #0]
-// 0x0000000c: str.w r1, [sp, #132]
-// 0x00000010: vstr.f32 s0, [sp, #136]
-// 0x00000014: str.w r2, [sp, #140]
-// 0x00000018: str.w r3, [sp, #144]
-// 0x0000001c: sub sp, sp, #32
-// 0x0000001e: .cfi_def_cfa_offset: 160
-// 0x0000001e: add sp, sp, #32
-// 0x00000020: .cfi_def_cfa_offset: 128
-// 0x00000020: .cfi_remember_state
-// 0x00000020: add sp, sp, #36
-// 0x00000022: .cfi_def_cfa_offset: 92
-// 0x00000022: vpop.f32 {s16-s31}
-// 0x00000026: .cfi_def_cfa_offset: 28
-// 0x00000026: .cfi_restore_extended: r80
-// 0x00000026: .cfi_restore_extended: r81
-// 0x00000026: .cfi_restore_extended: r82
-// 0x00000026: .cfi_restore_extended: r83
-// 0x00000026: .cfi_restore_extended: r84
-// 0x00000026: .cfi_restore_extended: r85
-// 0x00000026: .cfi_restore_extended: r86
-// 0x00000026: .cfi_restore_extended: r87
-// 0x00000026: .cfi_restore_extended: r88
-// 0x00000026: .cfi_restore_extended: r89
-// 0x00000026: .cfi_restore_extended: r90
-// 0x00000026: .cfi_restore_extended: r91
-// 0x00000026: .cfi_restore_extended: r92
-// 0x00000026: .cfi_restore_extended: r93
-// 0x00000026: .cfi_restore_extended: r94
-// 0x00000026: .cfi_restore_extended: r95
-// 0x00000026: pop {r5, r6, r7, r8, r10, r11, pc}
-// 0x0000002a: .cfi_restore_state
-// 0x0000002a: .cfi_def_cfa_offset: 128
+// 0x0000000c: str r1, [sp, #132]
+// 0x0000000e: vstr.f32 s0, [sp, #136]
+// 0x00000012: str r2, [sp, #140]
+// 0x00000014: str r3, [sp, #144]
+// 0x00000016: sub sp, sp, #32
+// 0x00000018: .cfi_def_cfa_offset: 160
+// 0x00000018: add sp, sp, #32
+// 0x0000001a: .cfi_def_cfa_offset: 128
+// 0x0000001a: .cfi_remember_state
+// 0x0000001a: add sp, sp, #36
+// 0x0000001c: .cfi_def_cfa_offset: 92
+// 0x0000001c: vpop.f32 {s16-s31}
+// 0x00000020: .cfi_def_cfa_offset: 28
+// 0x00000020: .cfi_restore_extended: r80
+// 0x00000020: .cfi_restore_extended: r81
+// 0x00000020: .cfi_restore_extended: r82
+// 0x00000020: .cfi_restore_extended: r83
+// 0x00000020: .cfi_restore_extended: r84
+// 0x00000020: .cfi_restore_extended: r85
+// 0x00000020: .cfi_restore_extended: r86
+// 0x00000020: .cfi_restore_extended: r87
+// 0x00000020: .cfi_restore_extended: r88
+// 0x00000020: .cfi_restore_extended: r89
+// 0x00000020: .cfi_restore_extended: r90
+// 0x00000020: .cfi_restore_extended: r91
+// 0x00000020: .cfi_restore_extended: r92
+// 0x00000020: .cfi_restore_extended: r93
+// 0x00000020: .cfi_restore_extended: r94
+// 0x00000020: .cfi_restore_extended: r95
+// 0x00000020: pop {r5, r6, r7, r8, r10, r11, pc}
+// 0x00000024: .cfi_restore_state
+// 0x00000024: .cfi_def_cfa_offset: 128
 
 static constexpr uint8_t expected_asm_kArm64[] = {
     0xFF, 0x03, 0x03, 0xD1, 0xF3, 0x53, 0x06, 0xA9, 0xF5, 0x5B, 0x07, 0xA9,
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index f3bda2f..b83985a 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -31,6 +31,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/stack_trace_element.h"
+#include "nativeloader/native_loader.h"
 #include "runtime.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
@@ -53,6 +54,11 @@
     check_generic_jni_ = false;
   }
 
+  void TearDown() OVERRIDE {
+    android::ResetNativeLoader();
+    CommonCompilerTest::TearDown();
+  }
+
   void SetCheckGenericJni(bool generic) {
     check_generic_jni_ = generic;
   }
@@ -92,11 +98,13 @@
       CompileForTest(class_loader_, direct, method_name, method_sig);
       // Start runtime.
       Thread::Current()->TransitionFromSuspendedToRunnable();
+      android::InitializeNativeLoader();
       bool started = runtime_->Start();
       CHECK(started);
     }
     // JNI operations after runtime start.
     env_ = Thread::Current()->GetJniEnv();
+    library_search_path_ = env_->NewStringUTF("");
     jklass_ = env_->FindClass("MyClassNatives");
     ASSERT_TRUE(jklass_ != nullptr) << method_name << " " << method_sig;
 
@@ -167,7 +175,11 @@
   void StackArgsMixedImpl();
   void StackArgsSignExtendedMips64Impl();
 
+  void NormalNativeImpl();
+  void FastNativeImpl();
+
   JNIEnv* env_;
+  jstring library_search_path_;
   jmethodID jmethod_;
   bool check_generic_jni_;
 };
@@ -219,7 +231,8 @@
   // calling through stub will link with &Java_MyClassNatives_bar
 
   std::string reason;
-  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->LoadNativeLibrary(env_, "", class_loader_, &reason))
+  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->
+                  LoadNativeLibrary(env_, "", class_loader_, library_search_path_, &reason))
       << reason;
 
   jint result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 24);
@@ -233,7 +246,8 @@
   // calling through stub will link with &Java_MyClassNatives_sbar
 
   std::string reason;
-  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->LoadNativeLibrary(env_, "", class_loader_, &reason))
+  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->
+                  LoadNativeLibrary(env_, "", class_loader_, library_search_path_, &reason))
       << reason;
 
   jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 42);
@@ -1761,4 +1775,44 @@
 
 JNI_TEST(StackArgsSignExtendedMips64)
 
+void Java_MyClassNatives_normalNative(JNIEnv*, jclass) {
+  // Intentionally left empty.
+}
+
+// Methods not annotated with anything are not considered "fast native"
+// -- Check that the annotation lookup does not find it.
+void JniCompilerTest::NormalNativeImpl() {
+  SetUpForTest(/* direct */ true,
+               "normalNative",
+               "()V",
+               reinterpret_cast<void*>(&Java_MyClassNatives_normalNative));
+
+  ScopedObjectAccess soa(Thread::Current());
+  ArtMethod* method = soa.DecodeMethod(jmethod_);
+  ASSERT_TRUE(method != nullptr);
+
+  EXPECT_FALSE(method->IsAnnotatedWithFastNative());
+}
+JNI_TEST(NormalNative)
+
+// Methods annotated with @FastNative are considered "fast native"
+// -- Check that the annotation lookup succeeds.
+void Java_MyClassNatives_fastNative(JNIEnv*, jclass) {
+  // Intentionally left empty.
+}
+
+void JniCompilerTest::FastNativeImpl() {
+  SetUpForTest(/* direct */ true,
+               "fastNative",
+               "()V",
+               reinterpret_cast<void*>(&Java_MyClassNatives_fastNative));
+
+  ScopedObjectAccess soa(Thread::Current());
+  ArtMethod* method = soa.DecodeMethod(jmethod_);
+  ASSERT_TRUE(method != nullptr);
+
+  EXPECT_TRUE(method->IsAnnotatedWithFastNative());
+}
+JNI_TEST(FastNative)
+
 }  // namespace art
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index 9d2732a..0d16260 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -22,6 +22,8 @@
 namespace art {
 namespace arm {
 
+static_assert(kArmPointerSize == PointerSize::k32, "Unexpected ARM pointer size");
+
 // Used by hard float.
 static const Register kHFCoreArgumentRegisters[] = {
   R0, R1, R2, R3
@@ -31,10 +33,6 @@
   S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15
 };
 
-static const SRegister kHFSCalleeSaveRegisters[] = {
-  S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31
-};
-
 static const DRegister kHFDArgumentRegisters[] = {
   D0, D1, D2, D3, D4, D5, D6, D7
 };
@@ -42,6 +40,57 @@
 static_assert(arraysize(kHFDArgumentRegisters) * 2 == arraysize(kHFSArgumentRegisters),
     "ks d argument registers mismatch");
 
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    ArmManagedRegister::FromCoreRegister(R5),
+    ArmManagedRegister::FromCoreRegister(R6),
+    ArmManagedRegister::FromCoreRegister(R7),
+    ArmManagedRegister::FromCoreRegister(R8),
+    ArmManagedRegister::FromCoreRegister(R10),
+    ArmManagedRegister::FromCoreRegister(R11),
+    // Hard float registers.
+    ArmManagedRegister::FromSRegister(S16),
+    ArmManagedRegister::FromSRegister(S17),
+    ArmManagedRegister::FromSRegister(S18),
+    ArmManagedRegister::FromSRegister(S19),
+    ArmManagedRegister::FromSRegister(S20),
+    ArmManagedRegister::FromSRegister(S21),
+    ArmManagedRegister::FromSRegister(S22),
+    ArmManagedRegister::FromSRegister(S23),
+    ArmManagedRegister::FromSRegister(S24),
+    ArmManagedRegister::FromSRegister(S25),
+    ArmManagedRegister::FromSRegister(S26),
+    ArmManagedRegister::FromSRegister(S27),
+    ArmManagedRegister::FromSRegister(S28),
+    ArmManagedRegister::FromSRegister(S29),
+    ArmManagedRegister::FromSRegister(S30),
+    ArmManagedRegister::FromSRegister(S31)
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // LR is a special callee save which is not reported by CalleeSaveRegisters().
+  uint32_t result = 1 << LR;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsArm().IsCoreRegister()) {
+      result |= (1 << r.AsArm().AsCoreRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t CalculateFpCalleeSpillMask() {
+  uint32_t result = 0;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsArm().IsSRegister()) {
+      result |= (1 << r.AsArm().AsSRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask();
+
 // Calling convention
 
 ManagedRegister ArmManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
@@ -208,7 +257,7 @@
 
 ArmJniCallingConvention::ArmJniCallingConvention(bool is_static, bool is_synchronized,
                                                  const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
+    : JniCallingConvention(is_static, is_synchronized, shorty, kArmPointerSize) {
   // Compute padding to ensure longs and doubles are not split in AAPCS. Ignore the 'this' jobject
   // or jclass for static methods and the JNIEnv. We start at the aligned register r2.
   size_t padding = 0;
@@ -223,32 +272,15 @@
     cur_reg++;  // bump the iterator for every argument
   }
   padding_ = padding;
-
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R5));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R6));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R7));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R8));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R10));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R11));
-
-  for (size_t i = 0; i < arraysize(kHFSCalleeSaveRegisters); ++i) {
-    callee_save_regs_.push_back(ArmManagedRegister::FromSRegister(kHFSCalleeSaveRegisters[i]));
-  }
 }
 
 uint32_t ArmJniCallingConvention::CoreSpillMask() const {
   // Compute spill mask to agree with callee saves initialized in the constructor
-  uint32_t result = 0;
-  result = 1 << R5 | 1 << R6 | 1 << R7 | 1 << R8 | 1 << R10 | 1 << R11 | 1 << LR;
-  return result;
+  return kCoreCalleeSpillMask;
 }
 
 uint32_t ArmJniCallingConvention::FpSpillMask() const {
-  uint32_t result = 0;
-  for (size_t i = 0; i < arraysize(kHFSCalleeSaveRegisters); ++i) {
-    result |= (1 << kHFSCalleeSaveRegisters[i]);
-  }
-  return result;
+  return kFpCalleeSpillMask;
 }
 
 ManagedRegister ArmJniCallingConvention::ReturnScratchRegister() const {
@@ -257,9 +289,10 @@
 
 size_t ArmJniCallingConvention::FrameSize() {
   // Method*, LR and callee save area size, local reference segment state
-  size_t frame_data_size = kArmPointerSize + (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
+  size_t frame_data_size = static_cast<size_t>(kArmPointerSize)
+      + (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kArmPointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
@@ -269,6 +302,10 @@
                  kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> ArmJniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 // JniCallingConvention ABI follows AAPCS where longs and doubles must occur
 // in even register numbers and stack slots
 void ArmJniCallingConvention::Next() {
@@ -309,7 +346,8 @@
 
 FrameOffset ArmJniCallingConvention::CurrentParamStackOffset() {
   CHECK_GE(itr_slots_, 4u);
-  size_t offset = displacement_.Int32Value() - OutArgSize() + ((itr_slots_ - 4) * kFramePointerSize);
+  size_t offset =
+      displacement_.Int32Value() - OutArgSize() + ((itr_slots_ - 4) * kFramePointerSize);
   CHECK_LT(offset, OutArgSize());
   return FrameOffset(offset);
 }
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index 35b5093..7c717cc 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -17,17 +17,21 @@
 #ifndef ART_COMPILER_JNI_QUICK_ARM_CALLING_CONVENTION_ARM_H_
 #define ART_COMPILER_JNI_QUICK_ARM_CALLING_CONVENTION_ARM_H_
 
+#include "base/enums.h"
 #include "jni/quick/calling_convention.h"
 
 namespace art {
 namespace arm {
 
-constexpr size_t kFramePointerSize = 4;
+constexpr size_t kFramePointerSize = static_cast<size_t>(PointerSize::k32);
 
 class ArmManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention {
  public:
   ArmManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
-      : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {}
+      : ManagedRuntimeCallingConvention(is_static,
+                                        is_synchronized,
+                                        shorty,
+                                        PointerSize::k32) {}
   ~ArmManagedRuntimeCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
@@ -58,9 +62,7 @@
   void Next() OVERRIDE;  // Override default behavior for AAPCS
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
   uint32_t FpSpillMask() const OVERRIDE;
@@ -78,9 +80,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   // Padding to ensure longs and doubles are not split in AAPCS
   size_t padding_;
 
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 9aef10e..afa707d 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -22,6 +22,8 @@
 namespace art {
 namespace arm64 {
 
+static_assert(kArm64PointerSize == PointerSize::k64, "Unexpected ARM64 pointer size");
+
 static const XRegister kXArgumentRegisters[] = {
   X0, X1, X2, X3, X4, X5, X6, X7
 };
@@ -38,10 +40,65 @@
   S0, S1, S2, S3, S4, S5, S6, S7
 };
 
-static const DRegister kDCalleeSaveRegisters[] = {
-  D8, D9, D10, D11, D12, D13, D14, D15
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    // Note: The native jni function may call to some VM runtime functions which may suspend
+    // or trigger GC. And the jni method frame will become top quick frame in those cases.
+    // So we need to satisfy GC to save LR and callee-save registers which is similar to
+    // CalleeSaveMethod(RefOnly) frame.
+    // Jni function is the native function which the java code wants to call.
+    // Jni method is the method that is compiled by jni compiler.
+    // Call chain: managed code(java) --> jni method --> jni function.
+    // Thread register(X19) is saved on stack.
+    Arm64ManagedRegister::FromXRegister(X19),
+    Arm64ManagedRegister::FromXRegister(X20),
+    Arm64ManagedRegister::FromXRegister(X21),
+    Arm64ManagedRegister::FromXRegister(X22),
+    Arm64ManagedRegister::FromXRegister(X23),
+    Arm64ManagedRegister::FromXRegister(X24),
+    Arm64ManagedRegister::FromXRegister(X25),
+    Arm64ManagedRegister::FromXRegister(X26),
+    Arm64ManagedRegister::FromXRegister(X27),
+    Arm64ManagedRegister::FromXRegister(X28),
+    Arm64ManagedRegister::FromXRegister(X29),
+    Arm64ManagedRegister::FromXRegister(LR),
+    // Hard float registers.
+    // Considering the case, java_method_1 --> jni method --> jni function --> java_method_2,
+    // we may break on java_method_2 and we still need to find out the values of DEX registers
+    // in java_method_1. So all callee-saves(in managed code) need to be saved.
+    Arm64ManagedRegister::FromDRegister(D8),
+    Arm64ManagedRegister::FromDRegister(D9),
+    Arm64ManagedRegister::FromDRegister(D10),
+    Arm64ManagedRegister::FromDRegister(D11),
+    Arm64ManagedRegister::FromDRegister(D12),
+    Arm64ManagedRegister::FromDRegister(D13),
+    Arm64ManagedRegister::FromDRegister(D14),
+    Arm64ManagedRegister::FromDRegister(D15),
 };
 
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  uint32_t result = 0u;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsArm64().IsXRegister()) {
+      result |= (1 << r.AsArm64().AsXRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t CalculateFpCalleeSpillMask() {
+  uint32_t result = 0;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsArm64().IsDRegister()) {
+      result |= (1 << r.AsArm64().AsDRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask();
+
 // Calling convention
 ManagedRegister Arm64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
   return Arm64ManagedRegister::FromXRegister(X20);  // saved on entry restored on exit
@@ -156,48 +213,15 @@
 // JNI calling convention
 Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static, bool is_synchronized,
                                                      const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  uint32_t core_spill_mask = CoreSpillMask();
-  DCHECK_EQ(XZR, kNumberOfXRegisters - 1);  // Exclude XZR from the loop (avoid 1 << 32).
-  for (int x_reg = 0; x_reg < kNumberOfXRegisters - 1; ++x_reg) {
-    if (((1 << x_reg) & core_spill_mask) != 0) {
-      callee_save_regs_.push_back(
-          Arm64ManagedRegister::FromXRegister(static_cast<XRegister>(x_reg)));
-    }
-  }
-
-  uint32_t fp_spill_mask = FpSpillMask();
-  for (int d_reg = 0; d_reg < kNumberOfDRegisters; ++d_reg) {
-    if (((1 << d_reg) & fp_spill_mask) != 0) {
-      callee_save_regs_.push_back(
-          Arm64ManagedRegister::FromDRegister(static_cast<DRegister>(d_reg)));
-    }
-  }
+    : JniCallingConvention(is_static, is_synchronized, shorty, kArm64PointerSize) {
 }
 
 uint32_t Arm64JniCallingConvention::CoreSpillMask() const {
-  // Compute spill mask to agree with callee saves initialized in the constructor.
-  // Note: The native jni function may call to some VM runtime functions which may suspend
-  // or trigger GC. And the jni method frame will become top quick frame in those cases.
-  // So we need to satisfy GC to save LR and callee-save registers which is similar to
-  // CalleeSaveMethod(RefOnly) frame.
-  // Jni function is the native function which the java code wants to call.
-  // Jni method is the method that compiled by jni compiler.
-  // Call chain: managed code(java) --> jni method --> jni function.
-  // Thread register(X19) is saved on stack.
-  return 1 << X19 | 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 |
-         1 << X25 | 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR;
+  return kCoreCalleeSpillMask;
 }
 
 uint32_t Arm64JniCallingConvention::FpSpillMask() const {
-  // Considering the case, java_method_1 --> jni method --> jni function --> java_method_2, we may
-  // break on java_method_2 and we still need to find out the values of DEX registers in
-  // java_method_1. So all callee-saves(in managed code) need to be saved.
-  uint32_t result = 0;
-  for (size_t i = 0; i < arraysize(kDCalleeSaveRegisters); ++i) {
-    result |= (1 << kDCalleeSaveRegisters[i]);
-  }
-  return result;
+  return kFpCalleeSpillMask;
 }
 
 ManagedRegister Arm64JniCallingConvention::ReturnScratchRegister() const {
@@ -209,7 +233,7 @@
   size_t frame_data_size = kFramePointerSize +
       CalleeSaveRegisters().size() * kFramePointerSize + sizeof(uint32_t);
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kArm64PointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
@@ -218,6 +242,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> Arm64JniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 bool Arm64JniCallingConvention::IsCurrentParamInRegister() {
   if (IsCurrentParamAFloatOrDouble()) {
     return (itr_float_and_doubles_ < 8);
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index 37c92b2..90b12e5 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -17,17 +17,21 @@
 #ifndef ART_COMPILER_JNI_QUICK_ARM64_CALLING_CONVENTION_ARM64_H_
 #define ART_COMPILER_JNI_QUICK_ARM64_CALLING_CONVENTION_ARM64_H_
 
+#include "base/enums.h"
 #include "jni/quick/calling_convention.h"
 
 namespace art {
 namespace arm64 {
 
-constexpr size_t kFramePointerSize = 8;
+constexpr size_t kFramePointerSize = static_cast<size_t>(PointerSize::k64);
 
 class Arm64ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention {
  public:
   Arm64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
-      : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {}
+      : ManagedRuntimeCallingConvention(is_static,
+                                        is_synchronized,
+                                        shorty,
+                                        PointerSize::k64) {}
   ~Arm64ManagedRuntimeCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
@@ -57,9 +61,7 @@
   // JNI calling convention
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
   uint32_t FpSpillMask() const OVERRIDE;
@@ -77,9 +79,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   DISALLOW_COPY_AND_ASSIGN(Arm64JniCallingConvention);
 };
 
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index cef8c5d..c7ed9c9 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -46,37 +46,51 @@
 
 // Managed runtime calling convention
 
-ManagedRuntimeCallingConvention* ManagedRuntimeCallingConvention::Create(
-    bool is_static, bool is_synchronized, const char* shorty, InstructionSet instruction_set) {
+std::unique_ptr<ManagedRuntimeCallingConvention> ManagedRuntimeCallingConvention::Create(
+    ArenaAllocator* arena,
+    bool is_static,
+    bool is_synchronized,
+    const char* shorty,
+    InstructionSet instruction_set) {
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
     case kThumb2:
-      return new arm::ArmManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) arm::ArmManagedRuntimeCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
-      return new arm64::Arm64ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) arm64::Arm64ManagedRuntimeCallingConvention(
+              is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
-      return new mips::MipsManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) mips::MipsManagedRuntimeCallingConvention(
+              is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
-      return new mips64::Mips64ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) mips64::Mips64ManagedRuntimeCallingConvention(
+              is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
-      return new x86::X86ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) x86::X86ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
-      return new x86_64::X86_64ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) x86_64::X86_64ManagedRuntimeCallingConvention(
+              is_static, is_synchronized, shorty));
 #endif
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
-      return nullptr;
+      UNREACHABLE();
   }
 }
 
@@ -132,38 +146,46 @@
 
 // JNI calling convention
 
-JniCallingConvention* JniCallingConvention::Create(bool is_static, bool is_synchronized,
-                                                   const char* shorty,
-                                                   InstructionSet instruction_set) {
+std::unique_ptr<JniCallingConvention> JniCallingConvention::Create(ArenaAllocator* arena,
+                                                                   bool is_static,
+                                                                   bool is_synchronized,
+                                                                   const char* shorty,
+                                                                   InstructionSet instruction_set) {
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
     case kThumb2:
-      return new arm::ArmJniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) arm::ArmJniCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
-      return new arm64::Arm64JniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) arm64::Arm64JniCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
-      return new mips::MipsJniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) mips::MipsJniCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
-      return new mips64::Mips64JniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) mips64::Mips64JniCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
-      return new x86::X86JniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) x86::X86JniCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
-      return new x86_64::X86_64JniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) x86_64::X86_64JniCallingConvention(is_static, is_synchronized, shorty));
 #endif
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
-      return nullptr;
+      UNREACHABLE();
   }
 }
 
@@ -277,7 +299,7 @@
 
 size_t JniCallingConvention::CurrentParamSize() {
   if (itr_args_ <= kObjectOrClass) {
-    return frame_pointer_size_;  // JNIEnv or jobject/jclass
+    return static_cast<size_t>(frame_pointer_size_);  // JNIEnv or jobject/jclass
   } else {
     int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
     return ParamSize(arg_pos);
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 243d124..995fa51 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -17,16 +17,18 @@
 #ifndef ART_COMPILER_JNI_QUICK_CALLING_CONVENTION_H_
 #define ART_COMPILER_JNI_QUICK_CALLING_CONVENTION_H_
 
-#include <vector>
+#include "base/arena_object.h"
+#include "base/enums.h"
 #include "handle_scope.h"
 #include "primitive.h"
 #include "thread.h"
+#include "utils/array_ref.h"
 #include "utils/managed_register.h"
 
 namespace art {
 
 // Top-level abstraction for different calling conventions.
-class CallingConvention {
+class CallingConvention : public DeletableArenaObject<kArenaAllocCallingConvention> {
  public:
   bool IsReturnAReference() const { return shorty_[0] == 'L'; }
 
@@ -69,8 +71,10 @@
   virtual ~CallingConvention() {}
 
  protected:
-  CallingConvention(bool is_static, bool is_synchronized, const char* shorty,
-                    size_t frame_pointer_size)
+  CallingConvention(bool is_static,
+                    bool is_synchronized,
+                    const char* shorty,
+                    PointerSize frame_pointer_size)
       : itr_slots_(0), itr_refs_(0), itr_args_(0), itr_longs_and_doubles_(0),
         itr_float_and_doubles_(0), displacement_(0),
         frame_pointer_size_(frame_pointer_size),
@@ -197,7 +201,7 @@
   // Space for frames below this on the stack.
   FrameOffset displacement_;
   // The size of a pointer.
-  const size_t frame_pointer_size_;
+  const PointerSize frame_pointer_size_;
   // The size of a reference entry within the handle scope.
   const size_t handle_scope_pointer_size_;
 
@@ -221,9 +225,11 @@
 // | { Method* }             | <-- SP
 class ManagedRuntimeCallingConvention : public CallingConvention {
  public:
-  static ManagedRuntimeCallingConvention* Create(bool is_static, bool is_synchronized,
-                                                 const char* shorty,
-                                                 InstructionSet instruction_set);
+  static std::unique_ptr<ManagedRuntimeCallingConvention> Create(ArenaAllocator* arena,
+                                                                 bool is_static,
+                                                                 bool is_synchronized,
+                                                                 const char* shorty,
+                                                                 InstructionSet instruction_set);
 
   // Register that holds the incoming method argument
   virtual ManagedRegister MethodRegister() = 0;
@@ -249,8 +255,10 @@
   virtual const ManagedRegisterEntrySpills& EntrySpills() = 0;
 
  protected:
-  ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty,
-                                  size_t frame_pointer_size)
+  ManagedRuntimeCallingConvention(bool is_static,
+                                  bool is_synchronized,
+                                  const char* shorty,
+                                  PointerSize frame_pointer_size)
       : CallingConvention(is_static, is_synchronized, shorty, frame_pointer_size) {}
 };
 
@@ -270,8 +278,11 @@
 // callee saves for frames above this one.
 class JniCallingConvention : public CallingConvention {
  public:
-  static JniCallingConvention* Create(bool is_static, bool is_synchronized, const char* shorty,
-                                      InstructionSet instruction_set);
+  static std::unique_ptr<JniCallingConvention> Create(ArenaAllocator* arena,
+                                                      bool is_static,
+                                                      bool is_synchronized,
+                                                      const char* shorty,
+                                                      InstructionSet instruction_set);
 
   // Size of frame excluding space for outgoing args (its assumed Method* is
   // always at the bottom of a frame, but this doesn't work for outgoing
@@ -292,7 +303,7 @@
   virtual bool RequiresSmallResultTypeExtension() const = 0;
 
   // Callee save registers to spill prior to native code (which may clobber)
-  virtual const std::vector<ManagedRegister>& CalleeSaveRegisters() const = 0;
+  virtual ArrayRef<const ManagedRegister> CalleeSaveRegisters() const = 0;
 
   // Spill mask values
   virtual uint32_t CoreSpillMask() const = 0;
@@ -320,7 +331,7 @@
 
   // Position of handle scope and interior fields
   FrameOffset HandleScopeOffset() const {
-    return FrameOffset(this->displacement_.Int32Value() + frame_pointer_size_);
+    return FrameOffset(this->displacement_.Int32Value() + static_cast<size_t>(frame_pointer_size_));
     // above Method reference
   }
 
@@ -348,8 +359,10 @@
     kObjectOrClass = 1
   };
 
-  JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty,
-                       size_t frame_pointer_size)
+  JniCallingConvention(bool is_static,
+                       bool is_synchronized,
+                       const char* shorty,
+                       PointerSize frame_pointer_size)
       : CallingConvention(is_static, is_synchronized, shorty, frame_pointer_size) {}
 
   // Number of stack slots for outgoing arguments, above which the handle scope is
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 34f0802..d092c3f 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -17,13 +17,17 @@
 #include "jni_compiler.h"
 
 #include <algorithm>
+#include <ios>
 #include <memory>
 #include <vector>
 #include <fstream>
 
 #include "art_method.h"
+#include "base/arena_allocator.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/macros.h"
+#include "memory_region.h"
 #include "calling_convention.h"
 #include "class_linker.h"
 #include "compiled_method.h"
@@ -32,35 +36,51 @@
 #include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "jni_env_ext.h"
+#include "debug/dwarf/debug_frame_opcode_writer.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 #include "utils/managed_register.h"
 #include "utils/arm/managed_register_arm.h"
 #include "utils/arm64/managed_register_arm64.h"
 #include "utils/mips/managed_register_mips.h"
 #include "utils/mips64/managed_register_mips64.h"
 #include "utils/x86/managed_register_x86.h"
+#include "utils.h"
 #include "thread.h"
 
 #define __ jni_asm->
 
 namespace art {
 
-static void CopyParameter(Assembler* jni_asm,
+using JniOptimizationFlags = Compiler::JniOptimizationFlags;
+
+template <PointerSize kPointerSize>
+static void CopyParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                           ManagedRuntimeCallingConvention* mr_conv,
                           JniCallingConvention* jni_conv,
                           size_t frame_size, size_t out_arg_size);
-static void SetNativeParameter(Assembler* jni_asm,
+template <PointerSize kPointerSize>
+static void SetNativeParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                                JniCallingConvention* jni_conv,
                                ManagedRegister in_reg);
 
+template <PointerSize kPointerSize>
+static std::unique_ptr<JNIMacroAssembler<kPointerSize>> GetMacroAssembler(
+    ArenaAllocator* arena, InstructionSet isa, const InstructionSetFeatures* features) {
+  return JNIMacroAssembler<kPointerSize>::Create(arena, isa, features);
+}
+
 // Generate the JNI bridge for the given method, general contract:
 // - Arguments are in the managed runtime format, either on stack or in
 //   registers, a reference to the method object is supplied as part of this
 //   convention.
 //
-CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
-                                            uint32_t access_flags, uint32_t method_idx,
-                                            const DexFile& dex_file) {
+template <PointerSize kPointerSize>
+static CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
+                                                   uint32_t access_flags,
+                                                   uint32_t method_idx,
+                                                   const DexFile& dex_file,
+                                                   JniOptimizationFlags optimization_flags) {
   const bool is_native = (access_flags & kAccNative) != 0;
   CHECK(is_native);
   const bool is_static = (access_flags & kAccStatic) != 0;
@@ -68,14 +88,31 @@
   const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
   InstructionSet instruction_set = driver->GetInstructionSet();
   const InstructionSetFeatures* instruction_set_features = driver->GetInstructionSetFeatures();
-  const bool is_64_bit_target = Is64BitInstructionSet(instruction_set);
+
+  // i.e. if the method was annotated with @FastNative
+  const bool is_fast_native =
+      (static_cast<uint32_t>(optimization_flags) & Compiler::kFastNative) != 0;
+
+  VLOG(jni) << "JniCompile: Method :: "
+              << art::PrettyMethod(method_idx, dex_file, /* with signature */ true)
+              << " :: access_flags = " << std::hex << access_flags << std::dec;
+
+  if (UNLIKELY(is_fast_native)) {
+    VLOG(jni) << "JniCompile: Fast native method detected :: "
+              << art::PrettyMethod(method_idx, dex_file, /* with signature */ true);
+  }
+
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+
   // Calling conventions used to iterate over parameters to method
   std::unique_ptr<JniCallingConvention> main_jni_conv(
-      JniCallingConvention::Create(is_static, is_synchronized, shorty, instruction_set));
+      JniCallingConvention::Create(&arena, is_static, is_synchronized, shorty, instruction_set));
   bool reference_return = main_jni_conv->IsReturnAReference();
 
   std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
-      ManagedRuntimeCallingConvention::Create(is_static, is_synchronized, shorty, instruction_set));
+      ManagedRuntimeCallingConvention::Create(
+          &arena, is_static, is_synchronized, shorty, instruction_set));
 
   // Calling conventions to call into JNI method "end" possibly passing a returned reference, the
   //     method and the current thread.
@@ -90,12 +127,13 @@
     jni_end_shorty = "V";
   }
 
-  std::unique_ptr<JniCallingConvention> end_jni_conv(
-      JniCallingConvention::Create(is_static, is_synchronized, jni_end_shorty, instruction_set));
+  std::unique_ptr<JniCallingConvention> end_jni_conv(JniCallingConvention::Create(
+      &arena, is_static, is_synchronized, jni_end_shorty, instruction_set));
 
   // Assembler that holds generated instructions
-  std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set, instruction_set_features));
-  jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GetGenerateDebugInfo());
+  std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm =
+      GetMacroAssembler<kPointerSize>(&arena, instruction_set, instruction_set_features);
+  jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GenerateAnyDebugInfo());
 
   // Offsets into data structures
   // TODO: if cross compiling these offsets are for the host not the target
@@ -105,7 +143,7 @@
 
   // 1. Build the frame saving all callee saves
   const size_t frame_size(main_jni_conv->FrameSize());
-  const std::vector<ManagedRegister>& callee_save_regs = main_jni_conv->CalleeSaveRegisters();
+  ArrayRef<const ManagedRegister> callee_save_regs = main_jni_conv->CalleeSaveRegisters();
   __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills());
   DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
 
@@ -116,21 +154,12 @@
                            main_jni_conv->ReferenceCount(),
                            mr_conv->InterproceduralScratchRegister());
 
-  if (is_64_bit_target) {
-    __ CopyRawPtrFromThread64(main_jni_conv->HandleScopeLinkOffset(),
-                              Thread::TopHandleScopeOffset<8>(),
+  __ CopyRawPtrFromThread(main_jni_conv->HandleScopeLinkOffset(),
+                          Thread::TopHandleScopeOffset<kPointerSize>(),
+                          mr_conv->InterproceduralScratchRegister());
+  __ StoreStackOffsetToThread(Thread::TopHandleScopeOffset<kPointerSize>(),
+                              main_jni_conv->HandleScopeOffset(),
                               mr_conv->InterproceduralScratchRegister());
-    __ StoreStackOffsetToThread64(Thread::TopHandleScopeOffset<8>(),
-                                  main_jni_conv->HandleScopeOffset(),
-                                  mr_conv->InterproceduralScratchRegister());
-  } else {
-    __ CopyRawPtrFromThread32(main_jni_conv->HandleScopeLinkOffset(),
-                              Thread::TopHandleScopeOffset<4>(),
-                              mr_conv->InterproceduralScratchRegister());
-    __ StoreStackOffsetToThread32(Thread::TopHandleScopeOffset<4>(),
-                                  main_jni_conv->HandleScopeOffset(),
-                                  mr_conv->InterproceduralScratchRegister());
-  }
 
   // 3. Place incoming reference arguments into handle scope
   main_jni_conv->Next();  // Skip JNIEnv*
@@ -180,11 +209,7 @@
   }
 
   // 4. Write out the end of the quick frames.
-  if (is_64_bit_target) {
-    __ StoreStackPointerToThread64(Thread::TopOfManagedStackOffset<8>());
-  } else {
-    __ StoreStackPointerToThread32(Thread::TopOfManagedStackOffset<4>());
-  }
+  __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>());
 
   // 5. Move frame down to allow space for out going args.
   const size_t main_out_arg_size = main_jni_conv->OutArgSize();
@@ -194,8 +219,8 @@
   // Call the read barrier for the declaring class loaded from the method for a static call.
   // Note that we always have outgoing param space available for at least two params.
   if (kUseReadBarrier && is_static) {
-    ThreadOffset<4> read_barrier32 = QUICK_ENTRYPOINT_OFFSET(4, pReadBarrierJni);
-    ThreadOffset<8> read_barrier64 = QUICK_ENTRYPOINT_OFFSET(8, pReadBarrierJni);
+    ThreadOffset<kPointerSize> read_barrier = QUICK_ENTRYPOINT_OFFSET(kPointerSize,
+                                                                      pReadBarrierJni);
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
     main_jni_conv->Next();  // Skip JNIEnv.
     FrameOffset class_handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
@@ -215,21 +240,13 @@
     // Pass the current thread as the second argument and call.
     if (main_jni_conv->IsCurrentParamInRegister()) {
       __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
-      if (is_64_bit_target) {
-        __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier64),
-                main_jni_conv->InterproceduralScratchRegister());
-      } else {
-        __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier32),
-                main_jni_conv->InterproceduralScratchRegister());
-      }
+      __ Call(main_jni_conv->CurrentParamRegister(),
+              Offset(read_barrier),
+              main_jni_conv->InterproceduralScratchRegister());
     } else {
       __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
                           main_jni_conv->InterproceduralScratchRegister());
-      if (is_64_bit_target) {
-        __ CallFromThread64(read_barrier64, main_jni_conv->InterproceduralScratchRegister());
-      } else {
-        __ CallFromThread32(read_barrier32, main_jni_conv->InterproceduralScratchRegister());
-      }
+      __ CallFromThread(read_barrier, main_jni_conv->InterproceduralScratchRegister());
     }
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));  // Reset.
   }
@@ -238,10 +255,13 @@
   //    can occur. The result is the saved JNI local state that is restored by the exit call. We
   //    abuse the JNI calling convention here, that is guaranteed to support passing 2 pointer
   //    arguments.
-  ThreadOffset<4> jni_start32 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(4, pJniMethodStartSynchronized)
-                                                : QUICK_ENTRYPOINT_OFFSET(4, pJniMethodStart);
-  ThreadOffset<8> jni_start64 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(8, pJniMethodStartSynchronized)
-                                                : QUICK_ENTRYPOINT_OFFSET(8, pJniMethodStart);
+  ThreadOffset<kPointerSize> jni_start =
+      is_synchronized
+          ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStartSynchronized)
+          : (is_fast_native
+                 ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastStart)
+                 : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart));
+
   main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
   FrameOffset locked_object_handle_scope_offset(0);
   if (is_synchronized) {
@@ -262,21 +282,13 @@
   }
   if (main_jni_conv->IsCurrentParamInRegister()) {
     __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
-    if (is_64_bit_target) {
-      __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start64),
-              main_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start32),
-              main_jni_conv->InterproceduralScratchRegister());
-    }
+    __ Call(main_jni_conv->CurrentParamRegister(),
+            Offset(jni_start),
+            main_jni_conv->InterproceduralScratchRegister());
   } else {
     __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
                         main_jni_conv->InterproceduralScratchRegister());
-    if (is_64_bit_target) {
-      __ CallFromThread64(jni_start64, main_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ CallFromThread32(jni_start32, main_jni_conv->InterproceduralScratchRegister());
-    }
+    __ CallFromThread(jni_start, main_jni_conv->InterproceduralScratchRegister());
   }
   if (is_synchronized) {  // Check for exceptions from monitor enter.
     __ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), main_out_arg_size);
@@ -338,20 +350,12 @@
   if (main_jni_conv->IsCurrentParamInRegister()) {
     ManagedRegister jni_env = main_jni_conv->CurrentParamRegister();
     DCHECK(!jni_env.Equals(main_jni_conv->InterproceduralScratchRegister()));
-    if (is_64_bit_target) {
-      __ LoadRawPtrFromThread64(jni_env, Thread::JniEnvOffset<8>());
-    } else {
-      __ LoadRawPtrFromThread32(jni_env, Thread::JniEnvOffset<4>());
-    }
+    __ LoadRawPtrFromThread(jni_env, Thread::JniEnvOffset<kPointerSize>());
   } else {
     FrameOffset jni_env = main_jni_conv->CurrentParamStackOffset();
-    if (is_64_bit_target) {
-      __ CopyRawPtrFromThread64(jni_env, Thread::JniEnvOffset<8>(),
-                                main_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ CopyRawPtrFromThread32(jni_env, Thread::JniEnvOffset<4>(),
-                                main_jni_conv->InterproceduralScratchRegister());
-    }
+    __ CopyRawPtrFromThread(jni_env,
+                            Thread::JniEnvOffset<kPointerSize>(),
+                            main_jni_conv->InterproceduralScratchRegister());
   }
 
   // 9. Plant call to native code associated with method.
@@ -380,10 +384,13 @@
         main_jni_conv->GetReturnType() == Primitive::kPrimDouble &&
         return_save_location.Uint32Value() % 8 != 0) {
       // Ensure doubles are 8-byte aligned for MIPS
-      return_save_location = FrameOffset(return_save_location.Uint32Value() + kMipsPointerSize);
+      return_save_location = FrameOffset(return_save_location.Uint32Value()
+                                             + static_cast<size_t>(kMipsPointerSize));
     }
     CHECK_LT(return_save_location.Uint32Value(), frame_size + main_out_arg_size);
-    __ Store(return_save_location, main_jni_conv->ReturnRegister(), main_jni_conv->SizeOfReturnValue());
+    __ Store(return_save_location,
+             main_jni_conv->ReturnRegister(),
+             main_jni_conv->SizeOfReturnValue());
   }
 
   // Increase frame size for out args if needed by the end_jni_conv.
@@ -399,21 +406,21 @@
   }
   //     thread.
   end_jni_conv->ResetIterator(FrameOffset(end_out_arg_size));
-  ThreadOffset<4> jni_end32(-1);
-  ThreadOffset<8> jni_end64(-1);
+
+  ThreadOffset<kPointerSize> jni_end(-1);
   if (reference_return) {
     // Pass result.
-    jni_end32 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(4, pJniMethodEndWithReferenceSynchronized)
-                                : QUICK_ENTRYPOINT_OFFSET(4, pJniMethodEndWithReference);
-    jni_end64 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(8, pJniMethodEndWithReferenceSynchronized)
-                                : QUICK_ENTRYPOINT_OFFSET(8, pJniMethodEndWithReference);
+    jni_end = is_synchronized
+                  ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReferenceSynchronized)
+                  : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference);
     SetNativeParameter(jni_asm.get(), end_jni_conv.get(), end_jni_conv->ReturnRegister());
     end_jni_conv->Next();
   } else {
-    jni_end32 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(4, pJniMethodEndSynchronized)
-                                : QUICK_ENTRYPOINT_OFFSET(4, pJniMethodEnd);
-    jni_end64 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(8, pJniMethodEndSynchronized)
-                                : QUICK_ENTRYPOINT_OFFSET(8, pJniMethodEnd);
+    jni_end = is_synchronized
+                  ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndSynchronized)
+                  : (is_fast_native
+                         ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastEnd)
+                         : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd));
   }
   // Pass saved local reference state.
   if (end_jni_conv->IsCurrentParamOnStack()) {
@@ -440,21 +447,13 @@
   }
   if (end_jni_conv->IsCurrentParamInRegister()) {
     __ GetCurrentThread(end_jni_conv->CurrentParamRegister());
-    if (is_64_bit_target) {
-      __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end64),
-              end_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end32),
-              end_jni_conv->InterproceduralScratchRegister());
-    }
+    __ Call(end_jni_conv->CurrentParamRegister(),
+            Offset(jni_end),
+            end_jni_conv->InterproceduralScratchRegister());
   } else {
     __ GetCurrentThread(end_jni_conv->CurrentParamStackOffset(),
                         end_jni_conv->InterproceduralScratchRegister());
-    if (is_64_bit_target) {
-      __ CallFromThread64(ThreadOffset<8>(jni_end64), end_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ CallFromThread32(ThreadOffset<4>(jni_end32), end_jni_conv->InterproceduralScratchRegister());
-    }
+    __ CallFromThread(jni_end, end_jni_conv->InterproceduralScratchRegister());
   }
 
   // 13. Reload return value
@@ -487,16 +486,15 @@
                                                  frame_size,
                                                  main_jni_conv->CoreSpillMask(),
                                                  main_jni_conv->FpSpillMask(),
-                                                 nullptr,  // src_mapping_table.
-                                                 ArrayRef<const uint8_t>(),  // mapping_table.
+                                                 ArrayRef<const SrcMapElem>(),
                                                  ArrayRef<const uint8_t>(),  // vmap_table.
-                                                 ArrayRef<const uint8_t>(),  // native_gc_map.
                                                  ArrayRef<const uint8_t>(*jni_asm->cfi().data()),
                                                  ArrayRef<const LinkerPatch>());
 }
 
 // Copy a single parameter from the managed to the JNI calling convention.
-static void CopyParameter(Assembler* jni_asm,
+template <PointerSize kPointerSize>
+static void CopyParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                           ManagedRuntimeCallingConvention* mr_conv,
                           JniCallingConvention* jni_conv,
                           size_t frame_size, size_t out_arg_size) {
@@ -585,7 +583,8 @@
   }
 }
 
-static void SetNativeParameter(Assembler* jni_asm,
+template <PointerSize kPointerSize>
+static void SetNativeParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                                JniCallingConvention* jni_conv,
                                ManagedRegister in_reg) {
   if (jni_conv->IsCurrentParamOnStack()) {
@@ -598,9 +597,18 @@
   }
 }
 
-CompiledMethod* ArtQuickJniCompileMethod(CompilerDriver* compiler, uint32_t access_flags,
-                                         uint32_t method_idx, const DexFile& dex_file) {
-  return ArtJniCompileMethodInternal(compiler, access_flags, method_idx, dex_file);
+CompiledMethod* ArtQuickJniCompileMethod(CompilerDriver* compiler,
+                                         uint32_t access_flags,
+                                         uint32_t method_idx,
+                                         const DexFile& dex_file,
+                                         Compiler::JniOptimizationFlags optimization_flags) {
+  if (Is64BitInstructionSet(compiler->GetInstructionSet())) {
+    return ArtJniCompileMethodInternal<PointerSize::k64>(
+        compiler, access_flags, method_idx, dex_file, optimization_flags);
+  } else {
+    return ArtJniCompileMethodInternal<PointerSize::k32>(
+        compiler, access_flags, method_idx, dex_file, optimization_flags);
+  }
 }
 
 }  // namespace art
diff --git a/compiler/jni/quick/jni_compiler.h b/compiler/jni/quick/jni_compiler.h
index 46277f1..26c32a3 100644
--- a/compiler/jni/quick/jni_compiler.h
+++ b/compiler/jni/quick/jni_compiler.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_JNI_QUICK_JNI_COMPILER_H_
 #define ART_COMPILER_JNI_QUICK_JNI_COMPILER_H_
 
+#include "compiler.h"
 #include "dex_file.h"
 
 namespace art {
@@ -24,8 +25,11 @@
 class CompilerDriver;
 class CompiledMethod;
 
-CompiledMethod* ArtQuickJniCompileMethod(CompilerDriver* compiler, uint32_t access_flags,
-                                         uint32_t method_idx, const DexFile& dex_file);
+CompiledMethod* ArtQuickJniCompileMethod(CompilerDriver* compiler,
+                                         uint32_t access_flags,
+                                         uint32_t method_idx,
+                                         const DexFile& dex_file,
+                                         Compiler::JniOptimizationFlags optimization_flags);
 
 }  // namespace art
 
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index 2d31a98..f5ab5f7 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -27,6 +27,32 @@
 static const FRegister kFArgumentRegisters[] = { F12, F14 };
 static const DRegister kDArgumentRegisters[] = { D6, D7 };
 
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    MipsManagedRegister::FromCoreRegister(S2),
+    MipsManagedRegister::FromCoreRegister(S3),
+    MipsManagedRegister::FromCoreRegister(S4),
+    MipsManagedRegister::FromCoreRegister(S5),
+    MipsManagedRegister::FromCoreRegister(S6),
+    MipsManagedRegister::FromCoreRegister(S7),
+    MipsManagedRegister::FromCoreRegister(FP),
+    // No hard float callee saves.
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // RA is a special callee save which is not reported by CalleeSaveRegisters().
+  uint32_t result = 1 << RA;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsMips().IsCoreRegister()) {
+      result |= (1 << r.AsMips().AsCoreRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = 0u;
+
 // Calling convention
 ManagedRegister MipsManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
   return MipsManagedRegister::FromCoreRegister(T9);
@@ -146,7 +172,7 @@
 
 MipsJniCallingConvention::MipsJniCallingConvention(bool is_static, bool is_synchronized,
                                                    const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
+    : JniCallingConvention(is_static, is_synchronized, shorty, kMipsPointerSize) {
   // Compute padding to ensure longs and doubles are not split in AAPCS. Ignore the 'this' jobject
   // or jclass for static methods and the JNIEnv. We start at the aligned register A2.
   size_t padding = 0;
@@ -161,21 +187,14 @@
     cur_reg++;  // bump the iterator for every argument
   }
   padding_ = padding;
-
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S2));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S3));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S4));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S5));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S6));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S7));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(FP));
 }
 
 uint32_t MipsJniCallingConvention::CoreSpillMask() const {
-  // Compute spill mask to agree with callee saves initialized in the constructor
-  uint32_t result = 0;
-  result = 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 | 1 << S7 | 1 << FP | 1 << RA;
-  return result;
+  return kCoreCalleeSpillMask;
+}
+
+uint32_t MipsJniCallingConvention::FpSpillMask() const {
+  return kFpCalleeSpillMask;
 }
 
 ManagedRegister MipsJniCallingConvention::ReturnScratchRegister() const {
@@ -184,10 +203,10 @@
 
 size_t MipsJniCallingConvention::FrameSize() {
   // ArtMethod*, RA and callee save area size, local reference segment state
-  size_t frame_data_size = kMipsPointerSize +
+  size_t frame_data_size = static_cast<size_t>(kMipsPointerSize) +
       (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kMipsPointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
@@ -196,6 +215,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize + padding_, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> MipsJniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 // JniCallingConvention ABI follows AAPCS where longs and doubles must occur
 // in even register numbers and stack slots
 void MipsJniCallingConvention::Next() {
diff --git a/compiler/jni/quick/mips/calling_convention_mips.h b/compiler/jni/quick/mips/calling_convention_mips.h
index dc45432..e95a738 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.h
+++ b/compiler/jni/quick/mips/calling_convention_mips.h
@@ -17,17 +17,23 @@
 #ifndef ART_COMPILER_JNI_QUICK_MIPS_CALLING_CONVENTION_MIPS_H_
 #define ART_COMPILER_JNI_QUICK_MIPS_CALLING_CONVENTION_MIPS_H_
 
+#include "base/enums.h"
 #include "jni/quick/calling_convention.h"
 
 namespace art {
 namespace mips {
 
 constexpr size_t kFramePointerSize = 4;
+static_assert(kFramePointerSize == static_cast<size_t>(PointerSize::k32),
+              "Invalid frame pointer size");
 
 class MipsManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention {
  public:
   MipsManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
-      : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {}
+      : ManagedRuntimeCallingConvention(is_static,
+                                        is_synchronized,
+                                        shorty,
+                                        PointerSize::k32) {}
   ~MipsManagedRuntimeCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
@@ -58,14 +64,10 @@
   void Next() OVERRIDE;  // Override default behavior for AAPCS
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
-  uint32_t FpSpillMask() const OVERRIDE {
-    return 0;  // Floats aren't spilled in JNI down call
-  }
+  uint32_t FpSpillMask() const OVERRIDE;
   bool IsCurrentParamInRegister() OVERRIDE;
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
@@ -80,9 +82,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   // Padding to ensure longs and doubles are not split in AAPCS
   size_t padding_;
 
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.cc b/compiler/jni/quick/mips64/calling_convention_mips64.cc
index 807d740..8341e8e 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.cc
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.cc
@@ -31,6 +31,33 @@
   F12, F13, F14, F15, F16, F17, F18, F19
 };
 
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    Mips64ManagedRegister::FromGpuRegister(S2),
+    Mips64ManagedRegister::FromGpuRegister(S3),
+    Mips64ManagedRegister::FromGpuRegister(S4),
+    Mips64ManagedRegister::FromGpuRegister(S5),
+    Mips64ManagedRegister::FromGpuRegister(S6),
+    Mips64ManagedRegister::FromGpuRegister(S7),
+    Mips64ManagedRegister::FromGpuRegister(GP),
+    Mips64ManagedRegister::FromGpuRegister(S8),
+    // No hard float callee saves.
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // RA is a special callee save which is not reported by CalleeSaveRegisters().
+  uint32_t result = 1 << RA;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsMips64().IsGpuRegister()) {
+      result |= (1 << r.AsMips64().AsGpuRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = 0u;
+
 // Calling convention
 ManagedRegister Mips64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
   return Mips64ManagedRegister::FromGpuRegister(T9);
@@ -125,23 +152,15 @@
 
 Mips64JniCallingConvention::Mips64JniCallingConvention(bool is_static, bool is_synchronized,
                                                        const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S2));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S3));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S4));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S5));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S6));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S7));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(GP));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S8));
+    : JniCallingConvention(is_static, is_synchronized, shorty, kMips64PointerSize) {
 }
 
 uint32_t Mips64JniCallingConvention::CoreSpillMask() const {
-  // Compute spill mask to agree with callee saves initialized in the constructor
-  uint32_t result = 0;
-  result = 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 | 1 << S7 | 1 << GP | 1 << S8 | 1 << RA;
-  DCHECK_EQ(static_cast<size_t>(POPCOUNT(result)), callee_save_regs_.size() + 1);
-  return result;
+  return kCoreCalleeSpillMask;
+}
+
+uint32_t Mips64JniCallingConvention::FpSpillMask() const {
+  return kFpCalleeSpillMask;
 }
 
 ManagedRegister Mips64JniCallingConvention::ReturnScratchRegister() const {
@@ -153,7 +172,7 @@
   size_t frame_data_size = kFramePointerSize +
       (CalleeSaveRegisters().size() + 1) * kFramePointerSize + sizeof(uint32_t);
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kMips64PointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
@@ -162,6 +181,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> Mips64JniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 bool Mips64JniCallingConvention::IsCurrentParamInRegister() {
   return itr_args_ < 8;
 }
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.h b/compiler/jni/quick/mips64/calling_convention_mips64.h
index 3d6aab7..a5fd111 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.h
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.h
@@ -17,17 +17,23 @@
 #ifndef ART_COMPILER_JNI_QUICK_MIPS64_CALLING_CONVENTION_MIPS64_H_
 #define ART_COMPILER_JNI_QUICK_MIPS64_CALLING_CONVENTION_MIPS64_H_
 
+#include "base/enums.h"
 #include "jni/quick/calling_convention.h"
 
 namespace art {
 namespace mips64 {
 
 constexpr size_t kFramePointerSize = 8;
+static_assert(kFramePointerSize == static_cast<size_t>(PointerSize::k64),
+              "Invalid frame pointer size");
 
 class Mips64ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention {
  public:
   Mips64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
-      : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {}
+      : ManagedRuntimeCallingConvention(is_static,
+                                        is_synchronized,
+                                        shorty,
+                                        PointerSize::k64) {}
   ~Mips64ManagedRuntimeCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
@@ -57,14 +63,10 @@
   // JNI calling convention
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
-  uint32_t FpSpillMask() const OVERRIDE {
-    return 0;  // Floats aren't spilled in JNI down call
-  }
+  uint32_t FpSpillMask() const OVERRIDE;
   bool IsCurrentParamInRegister() OVERRIDE;
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
@@ -79,9 +81,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   DISALLOW_COPY_AND_ASSIGN(Mips64JniCallingConvention);
 };
 
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 322caca..1d06f26 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -23,6 +23,30 @@
 namespace art {
 namespace x86 {
 
+static_assert(kX86PointerSize == PointerSize::k32, "Unexpected x86 pointer size");
+
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    X86ManagedRegister::FromCpuRegister(EBP),
+    X86ManagedRegister::FromCpuRegister(ESI),
+    X86ManagedRegister::FromCpuRegister(EDI),
+    // No hard float callee saves.
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // The spilled PC gets a special marker.
+  uint32_t result = 1 << kNumberOfCpuRegisters;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsX86().IsCpuRegister()) {
+      result |= (1 << r.AsX86().AsCpuRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = 0u;
+
 // Calling convention
 
 ManagedRegister X86ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
@@ -168,22 +192,23 @@
 
 X86JniCallingConvention::X86JniCallingConvention(bool is_static, bool is_synchronized,
                                                  const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(EBP));
-  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(ESI));
-  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(EDI));
+    : JniCallingConvention(is_static, is_synchronized, shorty, kX86PointerSize) {
 }
 
 uint32_t X86JniCallingConvention::CoreSpillMask() const {
-  return 1 << EBP | 1 << ESI | 1 << EDI | 1 << kNumberOfCpuRegisters;
+  return kCoreCalleeSpillMask;
+}
+
+uint32_t X86JniCallingConvention::FpSpillMask() const {
+  return kFpCalleeSpillMask;
 }
 
 size_t X86JniCallingConvention::FrameSize() {
   // Method*, return address and callee save area size, local reference segment state
-  size_t frame_data_size = kX86PointerSize +
+  size_t frame_data_size = static_cast<size_t>(kX86PointerSize) +
       (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kX86PointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
@@ -192,6 +217,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> X86JniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 bool X86JniCallingConvention::IsCurrentParamInRegister() {
   return false;  // Everything is passed by stack.
 }
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index cdf0956..ff92fc9 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -17,17 +17,21 @@
 #ifndef ART_COMPILER_JNI_QUICK_X86_CALLING_CONVENTION_X86_H_
 #define ART_COMPILER_JNI_QUICK_X86_CALLING_CONVENTION_X86_H_
 
+#include "base/enums.h"
 #include "jni/quick/calling_convention.h"
 
 namespace art {
 namespace x86 {
 
-constexpr size_t kFramePointerSize = 4;
+constexpr size_t kFramePointerSize = static_cast<size_t>(PointerSize::k32);
 
 class X86ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention {
  public:
   X86ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
-      : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize),
+      : ManagedRuntimeCallingConvention(is_static,
+                                        is_synchronized,
+                                        shorty,
+                                        PointerSize::k32),
         gpr_arg_count_(0) {}
   ~X86ManagedRuntimeCallingConvention() OVERRIDE {}
   // Calling convention
@@ -59,14 +63,10 @@
   // JNI calling convention
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
-  uint32_t FpSpillMask() const OVERRIDE {
-    return 0;
-  }
+  uint32_t FpSpillMask() const OVERRIDE;
   bool IsCurrentParamInRegister() OVERRIDE;
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
@@ -81,9 +81,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   DISALLOW_COPY_AND_ASSIGN(X86JniCallingConvention);
 };
 
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index b6b11ca..cbf10bd 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -24,6 +24,49 @@
 namespace art {
 namespace x86_64 {
 
+constexpr size_t kFramePointerSize = static_cast<size_t>(PointerSize::k64);
+
+static_assert(kX86_64PointerSize == PointerSize::k64, "Unexpected x86_64 pointer size");
+
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    X86_64ManagedRegister::FromCpuRegister(RBX),
+    X86_64ManagedRegister::FromCpuRegister(RBP),
+    X86_64ManagedRegister::FromCpuRegister(R12),
+    X86_64ManagedRegister::FromCpuRegister(R13),
+    X86_64ManagedRegister::FromCpuRegister(R14),
+    X86_64ManagedRegister::FromCpuRegister(R15),
+    // Hard float registers.
+    X86_64ManagedRegister::FromXmmRegister(XMM12),
+    X86_64ManagedRegister::FromXmmRegister(XMM13),
+    X86_64ManagedRegister::FromXmmRegister(XMM14),
+    X86_64ManagedRegister::FromXmmRegister(XMM15),
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // The spilled PC gets a special marker.
+  uint32_t result = 1 << kNumberOfCpuRegisters;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsX86_64().IsCpuRegister()) {
+      result |= (1 << r.AsX86_64().AsCpuRegister().AsRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t CalculateFpCalleeSpillMask() {
+  uint32_t result = 0;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsX86_64().IsXmmRegister()) {
+      result |= (1 << r.AsX86_64().AsXmmRegister().AsFloatRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask();
+
 // Calling convention
 
 ManagedRegister X86_64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
@@ -97,7 +140,7 @@
 
 FrameOffset X86_64ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
   return FrameOffset(displacement_.Int32Value() +  // displacement
-                     kX86_64PointerSize +  // Method ref
+                     static_cast<size_t>(kX86_64PointerSize) +  // Method ref
                      itr_slots_ * sizeof(uint32_t));  // offset into in args
 }
 
@@ -124,34 +167,23 @@
 
 X86_64JniCallingConvention::X86_64JniCallingConvention(bool is_static, bool is_synchronized,
                                                        const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(RBX));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(RBP));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R12));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R13));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R14));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R15));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM12));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM13));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM14));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM15));
+    : JniCallingConvention(is_static, is_synchronized, shorty, kX86_64PointerSize) {
 }
 
 uint32_t X86_64JniCallingConvention::CoreSpillMask() const {
-  return 1 << RBX | 1 << RBP | 1 << R12 | 1 << R13 | 1 << R14 | 1 << R15 |
-      1 << kNumberOfCpuRegisters;
+  return kCoreCalleeSpillMask;
 }
 
 uint32_t X86_64JniCallingConvention::FpSpillMask() const {
-  return 1 << XMM12 | 1 << XMM13 | 1 << XMM14 | 1 << XMM15;
+  return kFpCalleeSpillMask;
 }
 
 size_t X86_64JniCallingConvention::FrameSize() {
   // Method*, return address and callee save area size, local reference segment state
-  size_t frame_data_size = kX86_64PointerSize +
+  size_t frame_data_size = static_cast<size_t>(kX86_64PointerSize) +
       (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus link_ (pointer) and number_of_references_ (uint32_t) for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kX86_64PointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
@@ -160,6 +192,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> X86_64JniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 bool X86_64JniCallingConvention::IsCurrentParamInRegister() {
   return !IsCurrentParamOnStack();
 }
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index 6e47c9f..b98f505 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -17,17 +17,19 @@
 #ifndef ART_COMPILER_JNI_QUICK_X86_64_CALLING_CONVENTION_X86_64_H_
 #define ART_COMPILER_JNI_QUICK_X86_64_CALLING_CONVENTION_X86_64_H_
 
+#include "base/enums.h"
 #include "jni/quick/calling_convention.h"
 
 namespace art {
 namespace x86_64 {
 
-constexpr size_t kFramePointerSize = 8;
-
 class X86_64ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention {
  public:
   X86_64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
-      : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {}
+      : ManagedRuntimeCallingConvention(is_static,
+                                        is_synchronized,
+                                        shorty,
+                                        PointerSize::k64) {}
   ~X86_64ManagedRuntimeCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
@@ -55,9 +57,7 @@
   // JNI calling convention
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
   uint32_t FpSpillMask() const OVERRIDE;
@@ -75,9 +75,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   DISALLOW_COPY_AND_ASSIGN(X86_64JniCallingConvention);
 };
 
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index cb9ea38..2471f79 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -17,9 +17,9 @@
 #include "linker/arm/relative_patcher_arm_base.h"
 
 #include "compiled_method.h"
+#include "linker/output_stream.h"
 #include "oat.h"
 #include "oat_quick_method_header.h"
-#include "output_stream.h"
 
 namespace art {
 namespace linker {
@@ -31,16 +31,18 @@
 }
 
 uint32_t ArmBaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) {
-  // NOTE: The final thunk can be reserved from InitCodeMethodVisitor::EndClass() while it
-  // may be written early by WriteCodeMethodVisitor::VisitMethod() for a deduplicated chunk
-  // of code. To avoid any alignment discrepancies for the final chunk, we always align the
-  // offset after reserving of writing any chunk.
   uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
-  bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset, MethodReference(nullptr, 0u),
+  bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset,
+                                                MethodReference(nullptr, 0u),
                                                 aligned_offset);
   if (needs_thunk) {
+    // All remaining patches will be handled by this thunk.
+    DCHECK(!unprocessed_patches_.empty());
+    DCHECK_LE(aligned_offset - unprocessed_patches_.front().second, max_positive_displacement_);
+    unprocessed_patches_.clear();
+
     thunk_locations_.push_back(aligned_offset);
-    offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_);
+    offset = aligned_offset + thunk_code_.size();
   }
   return offset;
 }
@@ -59,13 +61,7 @@
     if (UNLIKELY(!WriteRelCallThunk(out, ArrayRef<const uint8_t>(thunk_code_)))) {
       return 0u;
     }
-    uint32_t thunk_end_offset = aligned_offset + thunk_code_.size();
-    // Align after writing chunk, see the ReserveSpace() above.
-    offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_);
-    aligned_code_delta = offset - thunk_end_offset;
-    if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) {
-      return 0u;
-    }
+    offset = aligned_offset + thunk_code_.size();
   }
   return offset;
 }
@@ -85,9 +81,8 @@
                                                       const CompiledMethod* compiled_method,
                                                       MethodReference method_ref,
                                                       uint32_t max_extra_space) {
-  DCHECK(compiled_method->GetQuickCode() != nullptr);
-  uint32_t quick_code_size = compiled_method->GetQuickCode()->size();
-  uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
+  uint32_t quick_code_size = compiled_method->GetQuickCode().size();
+  uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader));
   uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size);
   // Adjust for extra space required by the subclass.
   next_aligned_offset = compiled_method->AlignCode(next_aligned_offset + max_extra_space);
@@ -95,18 +90,19 @@
   // We need the MethodReference for that.
   if (!unprocessed_patches_.empty() &&
       next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) {
-    bool needs_thunk = ReserveSpaceProcessPatches(quick_code_offset, method_ref,
+    bool needs_thunk = ReserveSpaceProcessPatches(quick_code_offset,
+                                                  method_ref,
                                                   next_aligned_offset);
     if (needs_thunk) {
       // A single thunk will cover all pending patches.
       unprocessed_patches_.clear();
-      uint32_t thunk_location = compiled_method->AlignCode(offset);
+      uint32_t thunk_location = CompiledMethod::AlignCode(offset, instruction_set_);
       thunk_locations_.push_back(thunk_location);
-      offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_);
+      offset = thunk_location + thunk_code_.size();
     }
   }
   for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-    if (patch.Type() == kLinkerPatchCallRelative) {
+    if (patch.GetType() == LinkerPatch::Type::kCallRelative) {
       unprocessed_patches_.emplace_back(patch.TargetMethod(),
                                         quick_code_offset + patch.LiteralOffset());
     }
@@ -157,7 +153,10 @@
         // If still unresolved, check if we have a thunk within range.
         if (thunk_locations_.empty() ||
             patch_offset - thunk_locations_.back() > max_negative_displacement_) {
-          return next_aligned_offset - patch_offset > max_positive_displacement_;
+          // No thunk in range, we need a thunk if the next aligned offset
+          // is out of range, or if we're at the end of all code.
+          return (next_aligned_offset - patch_offset > max_positive_displacement_) ||
+              (quick_code_offset == next_aligned_offset);  // End of code.
         }
       } else {
         uint32_t target_offset = result.second - CompiledCode::CodeDelta(instruction_set_);
diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h
index f80dd96..25fd35e 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.h
+++ b/compiler/linker/arm/relative_patcher_arm_base.h
@@ -27,18 +27,23 @@
 
 class ArmBaseRelativePatcher : public RelativePatcher {
  public:
-  uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method,
+  uint32_t ReserveSpace(uint32_t offset,
+                        const CompiledMethod* compiled_method,
                         MethodReference method_ref) OVERRIDE;
   uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
   uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
 
  protected:
   ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider,
-                         InstructionSet instruction_set, std::vector<uint8_t> thunk_code,
-                         uint32_t max_positive_displacement, uint32_t max_negative_displacement);
+                         InstructionSet instruction_set,
+                         std::vector<uint8_t> thunk_code,
+                         uint32_t max_positive_displacement,
+                         uint32_t max_negative_displacement);
 
-  uint32_t ReserveSpaceInternal(uint32_t offset, const CompiledMethod* compiled_method,
-                                MethodReference method_ref, uint32_t max_extra_space);
+  uint32_t ReserveSpaceInternal(uint32_t offset,
+                                const CompiledMethod* compiled_method,
+                                MethodReference method_ref,
+                                uint32_t max_extra_space);
   uint32_t CalculateDisplacement(uint32_t patch_offset, uint32_t target_offset);
 
  private:
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index 5f4f760..fa49fc4 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -28,8 +28,10 @@
                              kMaxPositiveDisplacement, kMaxNegativeDisplacement) {
 }
 
-void Thumb2RelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
-                                      uint32_t patch_offset, uint32_t target_offset) {
+void Thumb2RelativePatcher::PatchCall(std::vector<uint8_t>* code,
+                                      uint32_t literal_offset,
+                                      uint32_t patch_offset,
+                                      uint32_t target_offset) {
   DCHECK_LE(literal_offset + 4u, code->size());
   DCHECK_EQ(literal_offset & 1u, 0u);
   DCHECK_EQ(patch_offset & 1u, 0u);
@@ -54,10 +56,10 @@
   SetInsn32(code, literal_offset, value);
 }
 
-void Thumb2RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
-                                                   const LinkerPatch& patch,
-                                                   uint32_t patch_offset,
-                                                   uint32_t target_offset) {
+void Thumb2RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                                     const LinkerPatch& patch,
+                                                     uint32_t patch_offset,
+                                                     uint32_t target_offset) {
   uint32_t literal_offset = patch.LiteralOffset();
   uint32_t pc_literal_offset = patch.PcInsnOffset();
   uint32_t pc_base = patch_offset + (pc_literal_offset - literal_offset) + 4u /* PC adjustment */;
@@ -77,7 +79,9 @@
 std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() {
   // The thunk just uses the entry point in the ArtMethod. This works even for calls
   // to the generic JNI and interpreter trampolines.
-  arm::Thumb2Assembler assembler;
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  arm::Thumb2Assembler assembler(&arena);
   assembler.LoadFromOffset(
       arm::kLoadWord, arm::PC, arm::R0,
       ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
index 006d6fb..d85739c 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.h
+++ b/compiler/linker/arm/relative_patcher_thumb2.h
@@ -26,10 +26,14 @@
  public:
   explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider);
 
-  void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
-                 uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
-  void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch,
-                              uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+  void PatchCall(std::vector<uint8_t>* code,
+                 uint32_t literal_offset,
+                 uint32_t patch_offset,
+                 uint32_t target_offset) OVERRIDE;
+  void PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                const LinkerPatch& patch,
+                                uint32_t patch_offset,
+                                uint32_t target_offset) OVERRIDE;
 
  private:
   static std::vector<uint8_t> CompileThunkCode();
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index 5515313..eace3d4 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -30,6 +30,9 @@
   static const ArrayRef<const uint8_t> kCallCode;
   static const uint8_t kNopRawCode[];
   static const ArrayRef<const uint8_t> kNopCode;
+  static const uint8_t kUnpatchedPcRelativeRawCode[];
+  static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode;
+  static const uint32_t kPcInsnOffset;
 
   // Branches within range [-256, 256) can be created from these by adding the low 8 bits.
   static constexpr uint32_t kBlPlus0 = 0xf000f800;
@@ -45,18 +48,18 @@
                              const ArrayRef<const LinkerPatch>& method3_patches,
                              uint32_t distance_without_thunks) {
     CHECK_EQ(distance_without_thunks % kArmAlignment, 0u);
-    const uint32_t method1_offset =
-        CompiledCode::AlignCode(kTrampolineSize, kThumb2) + sizeof(OatQuickMethodHeader);
+    uint32_t method1_offset =
+        kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
     AddCompiledMethod(MethodRef(1u), method1_code, method1_patches);
 
     // We want to put the method3 at a very precise offset.
     const uint32_t method3_offset = method1_offset + distance_without_thunks;
-    CHECK_ALIGNED(method3_offset - sizeof(OatQuickMethodHeader), kArmAlignment);
+    CHECK_ALIGNED(method3_offset, kArmAlignment);
 
     // Calculate size of method2 so that we put method3 at the correct place.
+    const uint32_t method1_end = method1_offset + method1_code.size();
     const uint32_t method2_offset =
-        CompiledCode::AlignCode(method1_offset + method1_code.size(), kThumb2) +
-        sizeof(OatQuickMethodHeader);
+        method1_end + CodeAlignmentSize(method1_end) + sizeof(OatQuickMethodHeader);
     const uint32_t method2_size = (method3_offset - sizeof(OatQuickMethodHeader) - method2_offset);
     std::vector<uint8_t> method2_raw_code(method2_size);
     ArrayRef<const uint8_t> method2_code(method2_raw_code);
@@ -75,8 +78,11 @@
     if (result3.second == method3_offset + 1 /* thumb mode */) {
       return false;  // No thunk.
     } else {
-      uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kThumb2);
-      CHECK_EQ(result3.second, method3_offset + aligned_thunk_size + 1 /* thumb mode */);
+      uint32_t thunk_end =
+          CompiledCode::AlignCode(method3_offset - sizeof(OatQuickMethodHeader), kThumb2) +
+          ThunkSize();
+      uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end);
+      CHECK_EQ(result3.second, header_offset + sizeof(OatQuickMethodHeader) + 1 /* thumb mode */);
       return true;   // Thunk present.
     }
   }
@@ -123,47 +129,9 @@
     return result;
   }
 
-  void TestDexCachereference(uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
-    dex_cache_arrays_begin_ = dex_cache_arrays_begin;
-    static const uint8_t raw_code[] = {
-        0x40, 0xf2, 0x00, 0x00,   // MOVW r0, #0 (placeholder)
-        0xc0, 0xf2, 0x00, 0x00,   // MOVT r0, #0 (placeholder)
-        0x78, 0x44,               // ADD r0, pc
-    };
-    constexpr uint32_t pc_insn_offset = 8u;
-    const ArrayRef<const uint8_t> code(raw_code);
-    LinkerPatch patches[] = {
-        LinkerPatch::DexCacheArrayPatch(0u, nullptr, pc_insn_offset, element_offset),
-        LinkerPatch::DexCacheArrayPatch(4u, nullptr, pc_insn_offset, element_offset),
-    };
-    AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
-    Link();
-
-    uint32_t method1_offset = GetMethodOffset(1u);
-    uint32_t pc_base_offset = method1_offset + pc_insn_offset + 4u /* PC adjustment */;
-    uint32_t diff = dex_cache_arrays_begin_ + element_offset - pc_base_offset;
-    // Distribute the bits of the diff between the MOVW and MOVT:
-    uint32_t diffw = diff & 0xffffu;
-    uint32_t difft = diff >> 16;
-    uint32_t movw = 0xf2400000u |           // MOVW r0, #0 (placeholder),
-        ((diffw & 0xf000u) << (16 - 12)) |  // move imm4 from bits 12-15 to bits 16-19,
-        ((diffw & 0x0800u) << (26 - 11)) |  // move imm from bit 11 to bit 26,
-        ((diffw & 0x0700u) << (12 - 8)) |   // move imm3 from bits 8-10 to bits 12-14,
-        ((diffw & 0x00ffu));                // keep imm8 at bits 0-7.
-    uint32_t movt = 0xf2c00000u |           // MOVT r0, #0 (placeholder),
-        ((difft & 0xf000u) << (16 - 12)) |  // move imm4 from bits 12-15 to bits 16-19,
-        ((difft & 0x0800u) << (26 - 11)) |  // move imm from bit 11 to bit 26,
-        ((difft & 0x0700u) << (12 - 8)) |   // move imm3 from bits 8-10 to bits 12-14,
-        ((difft & 0x00ffu));                // keep imm8 at bits 0-7.
-    const uint8_t expected_code[] = {
-        static_cast<uint8_t>(movw >> 16), static_cast<uint8_t>(movw >> 24),
-        static_cast<uint8_t>(movw >> 0), static_cast<uint8_t>(movw >> 8),
-        static_cast<uint8_t>(movt >> 16), static_cast<uint8_t>(movt >> 24),
-        static_cast<uint8_t>(movt >> 0), static_cast<uint8_t>(movt >> 8),
-        0x78, 0x44,
-    };
-    EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-  }
+  void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+  void TestStringReference(uint32_t string_offset);
+  void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
 };
 
 const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = {
@@ -178,6 +146,67 @@
 
 const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kNopCode(kNopRawCode);
 
+const uint8_t Thumb2RelativePatcherTest::kUnpatchedPcRelativeRawCode[] = {
+    0x40, 0xf2, 0x00, 0x00,   // MOVW r0, #0 (placeholder)
+    0xc0, 0xf2, 0x00, 0x00,   // MOVT r0, #0 (placeholder)
+    0x78, 0x44,               // ADD r0, pc
+};
+const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kUnpatchedPcRelativeCode(
+    kUnpatchedPcRelativeRawCode);
+const uint32_t Thumb2RelativePatcherTest::kPcInsnOffset = 8u;
+
+void Thumb2RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
+                                                      uint32_t element_offset) {
+  dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+  LinkerPatch patches[] = {
+      LinkerPatch::DexCacheArrayPatch(0u, nullptr, kPcInsnOffset, element_offset),
+      LinkerPatch::DexCacheArrayPatch(4u, nullptr, kPcInsnOffset, element_offset),
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
+                       dex_cache_arrays_begin_ + element_offset);
+}
+
+void Thumb2RelativePatcherTest::TestStringReference(uint32_t string_offset) {
+  constexpr uint32_t kStringIndex = 1u;
+  string_index_to_offset_map_.Put(kStringIndex, string_offset);
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeStringPatch(0u, nullptr, kPcInsnOffset, kStringIndex),
+      LinkerPatch::RelativeStringPatch(4u, nullptr, kPcInsnOffset, kStringIndex),
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
+}
+
+void Thumb2RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches,
+                                                     uint32_t target_offset) {
+  AddCompiledMethod(MethodRef(1u), kUnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  uint32_t method1_offset = GetMethodOffset(1u);
+  uint32_t pc_base_offset = method1_offset + kPcInsnOffset + 4u /* PC adjustment */;
+  uint32_t diff = target_offset - pc_base_offset;
+  // Distribute the bits of the diff between the MOVW and MOVT:
+  uint32_t diffw = diff & 0xffffu;
+  uint32_t difft = diff >> 16;
+  uint32_t movw = 0xf2400000u |           // MOVW r0, #0 (placeholder),
+      ((diffw & 0xf000u) << (16 - 12)) |  // move imm4 from bits 12-15 to bits 16-19,
+      ((diffw & 0x0800u) << (26 - 11)) |  // move imm from bit 11 to bit 26,
+      ((diffw & 0x0700u) << (12 - 8)) |   // move imm3 from bits 8-10 to bits 12-14,
+      ((diffw & 0x00ffu));                // keep imm8 at bits 0-7.
+  uint32_t movt = 0xf2c00000u |           // MOVT r0, #0 (placeholder),
+      ((difft & 0xf000u) << (16 - 12)) |  // move imm4 from bits 12-15 to bits 16-19,
+      ((difft & 0x0800u) << (26 - 11)) |  // move imm from bit 11 to bit 26,
+      ((difft & 0x0700u) << (12 - 8)) |   // move imm3 from bits 8-10 to bits 12-14,
+      ((difft & 0x00ffu));                // keep imm8 at bits 0-7.
+  const uint8_t expected_code[] = {
+      static_cast<uint8_t>(movw >> 16), static_cast<uint8_t>(movw >> 24),
+      static_cast<uint8_t>(movw >> 0), static_cast<uint8_t>(movw >> 8),
+      static_cast<uint8_t>(movt >> 16), static_cast<uint8_t>(movt >> 24),
+      static_cast<uint8_t>(movt >> 0), static_cast<uint8_t>(movt >> 8),
+      0x78, 0x44,
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
 TEST_F(Thumb2RelativePatcherTest, CallSelf) {
   LinkerPatch patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
@@ -233,6 +262,36 @@
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
 
+TEST_F(Thumb2RelativePatcherTest, CallTrampolineTooFar) {
+  constexpr uint32_t missing_method_index = 1024u;
+  auto method3_raw_code = GenNopsAndBl(3u, kBlPlus0);
+  constexpr uint32_t bl_offset_in_method3 = 3u * 2u;  // After NOPs.
+  ArrayRef<const uint8_t> method3_code(method3_raw_code);
+  ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
+  LinkerPatch method3_patches[] = {
+      LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, missing_method_index),
+  };
+
+  constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */;
+  bool thunk_in_gap = Create2MethodsWithGap(kNopCode,
+                                            ArrayRef<const LinkerPatch>(),
+                                            method3_code,
+                                            ArrayRef<const LinkerPatch>(method3_patches),
+                                            just_over_max_negative_disp - bl_offset_in_method3);
+  ASSERT_FALSE(thunk_in_gap);  // There should be a thunk but it should be after the method2.
+  ASSERT_FALSE(method_offset_map_.FindMethodOffset(MethodRef(missing_method_index)).first);
+
+  // Check linked code.
+  uint32_t method3_offset = GetMethodOffset(3u);
+  uint32_t thunk_offset = CompiledCode::AlignCode(method3_offset + method3_code.size(), kThumb2);
+  uint32_t diff = thunk_offset - (method3_offset + bl_offset_in_method3 + 4u /* PC adjustment */);
+  ASSERT_EQ(diff & 1u, 0u);
+  ASSERT_LT(diff >> 1, 1u << 8);  // Simple encoding, (diff >> 1) fits into 8 bits.
+  auto expected_code = GenNopsAndBl(3u, kBlPlus0 | ((diff >> 1) & 0xffu));
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code)));
+  EXPECT_TRUE(CheckThunk(thunk_offset));
+}
+
 TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) {
   auto method1_raw_code = GenNopsAndBl(3u, kBlPlus0);
   constexpr uint32_t bl_offset_in_method1 = 3u * 2u;  // After NOPs.
@@ -296,9 +355,12 @@
 
   uint32_t method1_offset = GetMethodOffset(1u);
   uint32_t method3_offset = GetMethodOffset(3u);
+  ASSERT_TRUE(IsAligned<kArmAlignment>(method3_offset));
   uint32_t method3_header_offset = method3_offset - sizeof(OatQuickMethodHeader);
-  ASSERT_TRUE(IsAligned<kArmAlignment>(method3_header_offset));
-  uint32_t thunk_offset = method3_header_offset - CompiledCode::AlignCode(ThunkSize(), kThumb2);
+  uint32_t thunk_offset =
+      RoundDown(method3_header_offset - ThunkSize(), GetInstructionSetAlignment(kThumb2));
+  DCHECK_EQ(thunk_offset + ThunkSize() + CodeAlignmentSize(thunk_offset + ThunkSize()),
+            method3_header_offset);
   ASSERT_TRUE(IsAligned<kArmAlignment>(thunk_offset));
   uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1 + 4u /* PC adjustment */);
   ASSERT_EQ(diff & 1u, 0u);
@@ -336,23 +398,43 @@
   EXPECT_TRUE(CheckThunk(thunk_offset));
 }
 
-TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm8) {
-  TestDexCachereference(0x00ff0000u, 0x00fcu);
+TEST_F(Thumb2RelativePatcherTest, DexCacheReference1) {
+  TestDexCacheReference(0x00ff0000u, 0x00fcu);
   ASSERT_LT(GetMethodOffset(1u), 0xfcu);
 }
 
-TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm3) {
-  TestDexCachereference(0x02ff0000u, 0x05fcu);
+TEST_F(Thumb2RelativePatcherTest, DexCacheReference2) {
+  TestDexCacheReference(0x02ff0000u, 0x05fcu);
   ASSERT_LT(GetMethodOffset(1u), 0xfcu);
 }
 
-TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm) {
-  TestDexCachereference(0x08ff0000u, 0x08fcu);
+TEST_F(Thumb2RelativePatcherTest, DexCacheReference3) {
+  TestDexCacheReference(0x08ff0000u, 0x08fcu);
   ASSERT_LT(GetMethodOffset(1u), 0xfcu);
 }
 
-TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceimm4) {
-  TestDexCachereference(0xd0ff0000u, 0x60fcu);
+TEST_F(Thumb2RelativePatcherTest, DexCacheReference4) {
+  TestDexCacheReference(0xd0ff0000u, 0x60fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, StringReference1) {
+  TestStringReference(0x00ff00fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, StringReference2) {
+  TestStringReference(0x02ff05fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, StringReference3) {
+  TestStringReference(0x08ff08fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, StringReference4) {
+  TestStringReference(0xd0ff60fcu);
   ASSERT_LT(GetMethodOffset(1u), 0xfcu);
 }
 
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 6f234a8..4c8788e 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -20,14 +20,25 @@
 #include "art_method.h"
 #include "compiled_method.h"
 #include "driver/compiler_driver.h"
-#include "utils/arm64/assembler_arm64.h"
+#include "linker/output_stream.h"
 #include "oat.h"
 #include "oat_quick_method_header.h"
-#include "output_stream.h"
+#include "utils/arm64/assembler_arm64.h"
 
 namespace art {
 namespace linker {
 
+namespace {
+
+inline bool IsAdrpPatch(const LinkerPatch& patch) {
+  LinkerPatch::Type type = patch.GetType();
+  return
+      (type == LinkerPatch::Type::kStringRelative || type == LinkerPatch::Type::kDexCacheArray) &&
+      patch.LiteralOffset() == patch.PcInsnOffset();
+}
+
+}  // anonymous namespace
+
 Arm64RelativePatcher::Arm64RelativePatcher(RelativePatcherTargetProvider* provider,
                                            const Arm64InstructionSetFeatures* features)
     : ArmBaseRelativePatcher(provider, kArm64, CompileThunkCode(),
@@ -61,8 +72,7 @@
   size_t num_adrp = 0u;
   DCHECK(compiled_method != nullptr);
   for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-    if (patch.Type() == kLinkerPatchDexCacheArray &&
-        patch.LiteralOffset() == patch.PcInsnOffset()) {  // ADRP patch
+    if (IsAdrpPatch(patch)) {
       ++num_adrp;
     }
   }
@@ -73,13 +83,12 @@
 
   // Now that we have the actual offset where the code will be placed, locate the ADRP insns
   // that actually require the thunk.
-  uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
-  ArrayRef<const uint8_t> code(*compiled_method->GetQuickCode());
+  uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader));
+  ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
   uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size());
   DCHECK(compiled_method != nullptr);
   for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-    if (patch.Type() == kLinkerPatchDexCacheArray &&
-        patch.LiteralOffset() == patch.PcInsnOffset()) {  // ADRP patch
+    if (IsAdrpPatch(patch)) {
       uint32_t patch_offset = quick_code_offset + patch.LiteralOffset();
       if (NeedsErratum843419Thunk(code, patch.LiteralOffset(), patch_offset)) {
         adrp_thunk_locations_.emplace_back(patch_offset, thunk_offset);
@@ -131,8 +140,10 @@
   return ArmBaseRelativePatcher::WriteThunks(out, offset);
 }
 
-void Arm64RelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
-                                     uint32_t patch_offset, uint32_t target_offset) {
+void Arm64RelativePatcher::PatchCall(std::vector<uint8_t>* code,
+                                     uint32_t literal_offset,
+                                     uint32_t patch_offset, uint32_t
+                                     target_offset) {
   DCHECK_LE(literal_offset + 4u, code->size());
   DCHECK_EQ(literal_offset & 3u, 0u);
   DCHECK_EQ(patch_offset & 3u, 0u);
@@ -149,10 +160,10 @@
   SetInsn(code, literal_offset, insn);
 }
 
-void Arm64RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
-                                                  const LinkerPatch& patch,
-                                                  uint32_t patch_offset,
-                                                  uint32_t target_offset) {
+void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                                    const LinkerPatch& patch,
+                                                    uint32_t patch_offset,
+                                                    uint32_t target_offset) {
   DCHECK_EQ(patch_offset & 3u, 0u);
   DCHECK_EQ(target_offset & 3u, 0u);
   uint32_t literal_offset = patch.LiteralOffset();
@@ -197,8 +208,24 @@
     // Write the new ADRP (or B to the erratum 843419 thunk).
     SetInsn(code, literal_offset, insn);
   } else {
-    // LDR 32-bit or 64-bit with imm12 == 0 (unset).
-    DCHECK_EQ(insn & 0xbffffc00, 0xb9400000) << insn;
+    if ((insn & 0xfffffc00) == 0x91000000) {
+      // ADD immediate, 64-bit with imm12 == 0 (unset).
+      if (!kEmitCompilerReadBarrier) {
+        DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
+               patch.GetType() == LinkerPatch::Type::kTypeRelative) << patch.GetType();
+      } else {
+        // With the read barrier (non-baker) enabled, it could be kDexCacheArray in the
+        // HLoadString::LoadKind::kDexCachePcRelative case of VisitLoadString().
+        DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
+               patch.GetType() == LinkerPatch::Type::kTypeRelative ||
+               patch.GetType() == LinkerPatch::Type::kDexCacheArray) << patch.GetType();
+      }
+      shift = 0u;  // No shift for ADD.
+    } else {
+      // LDR 32-bit or 64-bit with imm12 == 0 (unset).
+      DCHECK(patch.GetType() == LinkerPatch::Type::kDexCacheArray) << patch.GetType();
+      DCHECK_EQ(insn & 0xbffffc00, 0xb9400000) << std::hex << insn;
+    }
     if (kIsDebugBuild) {
       uint32_t adrp = GetInsn(code, pc_insn_offset);
       if ((adrp & 0x9f000000u) != 0x90000000u) {
@@ -229,7 +256,9 @@
 std::vector<uint8_t> Arm64RelativePatcher::CompileThunkCode() {
   // The thunk just uses the entry point in the ArtMethod. This works even for calls
   // to the generic JNI and interpreter trampolines.
-  arm64::Arm64Assembler assembler;
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  arm64::Arm64Assembler assembler(&arena);
   Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
       kArm64PointerSize).Int32Value());
   assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
@@ -261,7 +290,7 @@
   DCHECK_EQ(patch_offset & 0x3u, 0u);
   if ((patch_offset & 0xff8) == 0xff8) {  // ...ff8 or ...ffc
     uint32_t adrp = GetInsn(code, literal_offset);
-    DCHECK_EQ(adrp & 0xff000000, 0x90000000);
+    DCHECK_EQ(adrp & 0x9f000000, 0x90000000);
     uint32_t next_offset = patch_offset + 4u;
     uint32_t next_insn = GetInsn(code, literal_offset + 4u);
 
@@ -275,6 +304,15 @@
       return false;
     }
 
+    // And since LinkerPatch::Type::kStringRelative is using the result of the ADRP
+    // for an ADD immediate, check for that as well. We generalize a bit to include
+    // ADD/ADDS/SUB/SUBS immediate that either uses the ADRP destination or stores
+    // the result to a different register.
+    if ((next_insn & 0x1f000000) == 0x11000000 &&
+        ((((next_insn >> 5) ^ adrp) & 0x1f) == 0 || ((next_insn ^ adrp) & 0x1f) != 0)) {
+      return false;
+    }
+
     // LDR <Wt>, <label> is always aligned and thus it doesn't cause boundary crossing.
     if ((next_insn & 0xff000000) == 0x18000000) {
       return false;
diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h
index 2d07e75..48ad105 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.h
+++ b/compiler/linker/arm64/relative_patcher_arm64.h
@@ -28,14 +28,19 @@
   Arm64RelativePatcher(RelativePatcherTargetProvider* provider,
                        const Arm64InstructionSetFeatures* features);
 
-  uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method,
+  uint32_t ReserveSpace(uint32_t offset,
+                        const CompiledMethod* compiled_method,
                         MethodReference method_ref) OVERRIDE;
   uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
   uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
-  void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
-                 uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
-  void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch,
-                              uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+  void PatchCall(std::vector<uint8_t>* code,
+                 uint32_t literal_offset,
+                 uint32_t patch_offset,
+                 uint32_t target_offset) OVERRIDE;
+  void PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                const LinkerPatch& patch,
+                                uint32_t patch_offset,
+                                uint32_t target_offset) OVERRIDE;
 
  private:
   static std::vector<uint8_t> CompileThunkCode();
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index 857d584..573de73 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -40,6 +40,15 @@
   static constexpr uint32_t kBlPlusMax = 0x95ffffffu;
   static constexpr uint32_t kBlMinusMax = 0x96000000u;
 
+  // LDR immediate, 32-bit.
+  static constexpr uint32_t kLdrWInsn = 0xb9400000u;
+
+  // ADD/ADDS/SUB/SUBS immediate, 64-bit.
+  static constexpr uint32_t kAddXInsn = 0x91000000u;
+  static constexpr uint32_t kAddsXInsn = 0xb1000000u;
+  static constexpr uint32_t kSubXInsn = 0xd1000000u;
+  static constexpr uint32_t kSubsXInsn = 0xf1000000u;
+
   // LDUR x2, [sp, #4], i.e. unaligned load crossing 64-bit boundary (assuming aligned sp).
   static constexpr uint32_t kLdurInsn = 0xf840405fu;
 
@@ -58,36 +67,39 @@
                                  const ArrayRef<const LinkerPatch>& last_method_patches,
                                  uint32_t distance_without_thunks) {
     CHECK_EQ(distance_without_thunks % kArm64Alignment, 0u);
-    const uint32_t method1_offset =
-        CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
+    uint32_t method1_offset =
+        kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
     AddCompiledMethod(MethodRef(1u), method1_code, method1_patches);
-    const uint32_t gap_start =
-        CompiledCode::AlignCode(method1_offset + method1_code.size(), kArm64);
+    const uint32_t gap_start = method1_offset + method1_code.size();
 
     // We want to put the method3 at a very precise offset.
     const uint32_t last_method_offset = method1_offset + distance_without_thunks;
+    CHECK_ALIGNED(last_method_offset, kArm64Alignment);
     const uint32_t gap_end = last_method_offset - sizeof(OatQuickMethodHeader);
-    CHECK_ALIGNED(gap_end, kArm64Alignment);
 
-    // Fill the gap with intermediate methods in chunks of 2MiB and the last in [2MiB, 4MiB).
+    // Fill the gap with intermediate methods in chunks of 2MiB and the first in [2MiB, 4MiB).
     // (This allows deduplicating the small chunks to avoid using 256MiB of memory for +-128MiB
-    // offsets by this test.)
+    // offsets by this test. Making the first chunk bigger makes it easy to give all intermediate
+    // methods the same alignment of the end, so the thunk insertion adds a predictable size as
+    // long as it's after the first chunk.)
     uint32_t method_idx = 2u;
     constexpr uint32_t kSmallChunkSize = 2 * MB;
     std::vector<uint8_t> gap_code;
-    size_t gap_size = gap_end - gap_start;
-    for (; gap_size >= 2u * kSmallChunkSize; gap_size -= kSmallChunkSize) {
-      uint32_t chunk_code_size = kSmallChunkSize - sizeof(OatQuickMethodHeader);
+    uint32_t gap_size = gap_end - gap_start;
+    uint32_t num_small_chunks = std::max(gap_size / kSmallChunkSize, 1u) - 1u;
+    uint32_t chunk_start = gap_start;
+    uint32_t chunk_size = gap_size - num_small_chunks * kSmallChunkSize;
+    for (uint32_t i = 0; i <= num_small_chunks; ++i) {  // num_small_chunks+1 iterations.
+      uint32_t chunk_code_size =
+          chunk_size - CodeAlignmentSize(chunk_start) - sizeof(OatQuickMethodHeader);
       gap_code.resize(chunk_code_size, 0u);
       AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code),
                         ArrayRef<const LinkerPatch>());
       method_idx += 1u;
+      chunk_start += chunk_size;
+      chunk_size = kSmallChunkSize;  // For all but the first chunk.
+      DCHECK_EQ(CodeAlignmentSize(gap_end), CodeAlignmentSize(chunk_start));
     }
-    uint32_t chunk_code_size = gap_size - sizeof(OatQuickMethodHeader);
-    gap_code.resize(chunk_code_size, 0u);
-    AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code),
-                      ArrayRef<const LinkerPatch>());
-    method_idx += 1u;
 
     // Add the last method and link
     AddCompiledMethod(MethodRef(method_idx), last_method_code, last_method_patches);
@@ -100,8 +112,9 @@
     // There may be a thunk before method2.
     if (last_result.second != last_method_offset) {
       // Thunk present. Check that there's only one.
-      uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kArm64);
-      CHECK_EQ(last_result.second, last_method_offset + aligned_thunk_size);
+      uint32_t thunk_end = CompiledCode::AlignCode(gap_end, kArm64) + ThunkSize();
+      uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end);
+      CHECK_EQ(last_result.second, header_offset + sizeof(OatQuickMethodHeader));
     }
     return method_idx;
   }
@@ -109,7 +122,7 @@
   uint32_t GetMethodOffset(uint32_t method_idx) {
     auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
     CHECK(result.first);
-    CHECK_EQ(result.second & 3u, 0u);
+    CHECK_ALIGNED(result.second, 4u);
     return result.second;
   }
 
@@ -147,20 +160,29 @@
     return result;
   }
 
-  std::vector<uint8_t> GenNopsAndAdrpLdr(size_t num_nops,
-                                         uint32_t method_offset, uint32_t target_offset) {
+  std::vector<uint8_t> GenNopsAndAdrpAndUse(size_t num_nops,
+                                            uint32_t method_offset,
+                                            uint32_t target_offset,
+                                            uint32_t use_insn) {
     std::vector<uint8_t> result;
     result.reserve(num_nops * 4u + 8u);
     for (size_t i = 0; i != num_nops; ++i) {
       result.insert(result.end(), kNopCode.begin(), kNopCode.end());
     }
-    DCHECK_EQ(method_offset & 3u, 0u);
-    DCHECK_EQ(target_offset & 3u, 0u);
+    CHECK_ALIGNED(method_offset, 4u);
+    CHECK_ALIGNED(target_offset, 4u);
     uint32_t adrp_offset = method_offset + num_nops * 4u;
     uint32_t disp = target_offset - (adrp_offset & ~0xfffu);
-    DCHECK_EQ(disp & 3u, 0u);
-    uint32_t ldr = 0xb9400001 |               // LDR w1, [x0, #(imm12 * 2)]
-        ((disp & 0xfffu) << (10 - 2));        // imm12 = ((disp & 0xfffu) >> 2) is at bit 10.
+    if (use_insn == kLdrWInsn) {
+      DCHECK_ALIGNED(disp, 1u << 2);
+      use_insn |= 1 |                         // LDR x1, [x0, #(imm12 << 2)]
+          ((disp & 0xfffu) << (10 - 2));      // imm12 = ((disp & 0xfffu) >> 2) is at bit 10.
+    } else if (use_insn == kAddXInsn) {
+      use_insn |= 1 |                         // ADD x1, x0, #imm
+          (disp & 0xfffu) << 10;              // imm12 = (disp & 0xfffu) is at bit 10.
+    } else {
+      LOG(FATAL) << "Unexpected instruction: 0x" << std::hex << use_insn;
+    }
     uint32_t adrp = 0x90000000 |              // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64)
         ((disp & 0x3000u) << (29 - 12)) |     // immlo = ((disp & 0x3000u) >> 12) is at bit 29,
         ((disp & 0xffffc000) >> (14 - 5)) |   // immhi = (disp >> 14) is at bit 5,
@@ -170,13 +192,19 @@
     result.push_back(static_cast<uint8_t>(adrp >> 8));
     result.push_back(static_cast<uint8_t>(adrp >> 16));
     result.push_back(static_cast<uint8_t>(adrp >> 24));
-    result.push_back(static_cast<uint8_t>(ldr));
-    result.push_back(static_cast<uint8_t>(ldr >> 8));
-    result.push_back(static_cast<uint8_t>(ldr >> 16));
-    result.push_back(static_cast<uint8_t>(ldr >> 24));
+    result.push_back(static_cast<uint8_t>(use_insn));
+    result.push_back(static_cast<uint8_t>(use_insn >> 8));
+    result.push_back(static_cast<uint8_t>(use_insn >> 16));
+    result.push_back(static_cast<uint8_t>(use_insn >> 24));
     return result;
   }
 
+  std::vector<uint8_t> GenNopsAndAdrpLdr(size_t num_nops,
+                                         uint32_t method_offset,
+                                         uint32_t target_offset) {
+    return GenNopsAndAdrpAndUse(num_nops, method_offset, target_offset, kLdrWInsn);
+  }
+
   void TestNopsAdrpLdr(size_t num_nops, uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
     dex_cache_arrays_begin_ = dex_cache_arrays_begin;
     auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u);  // Unpatched.
@@ -184,7 +212,8 @@
         LinkerPatch::DexCacheArrayPatch(num_nops * 4u     , nullptr, num_nops * 4u, element_offset),
         LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, element_offset),
     };
-    AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code),
+    AddCompiledMethod(MethodRef(1u),
+                      ArrayRef<const uint8_t>(code),
                       ArrayRef<const LinkerPatch>(patches));
     Link();
 
@@ -194,6 +223,30 @@
     EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
   }
 
+  std::vector<uint8_t> GenNopsAndAdrpAdd(size_t num_nops,
+                                         uint32_t method_offset,
+                                         uint32_t target_offset) {
+    return GenNopsAndAdrpAndUse(num_nops, method_offset, target_offset, kAddXInsn);
+  }
+
+  void TestNopsAdrpAdd(size_t num_nops, uint32_t string_offset) {
+    constexpr uint32_t kStringIndex = 1u;
+    string_index_to_offset_map_.Put(kStringIndex, string_offset);
+    auto code = GenNopsAndAdrpAdd(num_nops, 0u, 0u);  // Unpatched.
+    LinkerPatch patches[] = {
+        LinkerPatch::RelativeStringPatch(num_nops * 4u     , nullptr, num_nops * 4u, kStringIndex),
+        LinkerPatch::RelativeStringPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, kStringIndex),
+    };
+    AddCompiledMethod(MethodRef(1u),
+                      ArrayRef<const uint8_t>(code),
+                      ArrayRef<const LinkerPatch>(patches));
+    Link();
+
+    uint32_t method1_offset = GetMethodOffset(1u);
+    auto expected_code = GenNopsAndAdrpAdd(num_nops, method1_offset, string_offset);
+    EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+  }
+
   void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
     CHECK_LE(pos, code->size());
     const uint8_t insn_code[] = {
@@ -204,8 +257,10 @@
     code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
   }
 
-  void PrepareNopsAdrpInsn2Ldr(size_t num_nops, uint32_t insn2,
-                               uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+  void PrepareNopsAdrpInsn2Ldr(size_t num_nops,
+                               uint32_t insn2,
+                               uint32_t dex_cache_arrays_begin,
+                               uint32_t element_offset) {
     dex_cache_arrays_begin_ = dex_cache_arrays_begin;
     auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u);  // Unpatched.
     InsertInsn(&code, num_nops * 4u + 4u, insn2);
@@ -213,40 +268,54 @@
         LinkerPatch::DexCacheArrayPatch(num_nops * 4u     , nullptr, num_nops * 4u, element_offset),
         LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, element_offset),
     };
-    AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code),
+    AddCompiledMethod(MethodRef(1u),
+                      ArrayRef<const uint8_t>(code),
                       ArrayRef<const LinkerPatch>(patches));
     Link();
   }
 
-  void TestNopsAdrpInsn2Ldr(size_t num_nops, uint32_t insn2,
-                            uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
-    PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
+  void PrepareNopsAdrpInsn2Add(size_t num_nops, uint32_t insn2, uint32_t string_offset) {
+    constexpr uint32_t kStringIndex = 1u;
+    string_index_to_offset_map_.Put(kStringIndex, string_offset);
+    auto code = GenNopsAndAdrpAdd(num_nops, 0u, 0u);  // Unpatched.
+    InsertInsn(&code, num_nops * 4u + 4u, insn2);
+    LinkerPatch patches[] = {
+        LinkerPatch::RelativeStringPatch(num_nops * 4u     , nullptr, num_nops * 4u, kStringIndex),
+        LinkerPatch::RelativeStringPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, kStringIndex),
+    };
+    AddCompiledMethod(MethodRef(1u),
+                      ArrayRef<const uint8_t>(code),
+                      ArrayRef<const LinkerPatch>(patches));
+    Link();
+  }
 
+  void TestNopsAdrpInsn2AndUse(size_t num_nops,
+                               uint32_t insn2,
+                               uint32_t target_offset,
+                               uint32_t use_insn) {
     uint32_t method1_offset = GetMethodOffset(1u);
-    uint32_t target_offset = dex_cache_arrays_begin_ + element_offset;
-    auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset);
+    auto expected_code = GenNopsAndAdrpAndUse(num_nops, method1_offset, target_offset, use_insn);
     InsertInsn(&expected_code, num_nops * 4u + 4u, insn2);
     EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
   }
 
-  void TestNopsAdrpInsn2LdrHasThunk(size_t num_nops, uint32_t insn2,
-                                    uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
-    PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
-
+  void TestNopsAdrpInsn2AndUseHasThunk(size_t num_nops,
+                                       uint32_t insn2,
+                                       uint32_t target_offset,
+                                       uint32_t use_insn) {
     uint32_t method1_offset = GetMethodOffset(1u);
     CHECK(!compiled_method_refs_.empty());
     CHECK_EQ(compiled_method_refs_[0].dex_method_index, 1u);
     CHECK_EQ(compiled_method_refs_.size(), compiled_methods_.size());
-    uint32_t method1_size = compiled_methods_[0]->GetQuickCode()->size();
+    uint32_t method1_size = compiled_methods_[0]->GetQuickCode().size();
     uint32_t thunk_offset = CompiledCode::AlignCode(method1_offset + method1_size, kArm64);
     uint32_t b_diff = thunk_offset - (method1_offset + num_nops * 4u);
-    ASSERT_EQ(b_diff & 3u, 0u);
+    CHECK_ALIGNED(b_diff, 4u);
     ASSERT_LT(b_diff, 128 * MB);
     uint32_t b_out = kBPlus0 + ((b_diff >> 2) & 0x03ffffffu);
     uint32_t b_in = kBPlus0 + ((-b_diff >> 2) & 0x03ffffffu);
 
-    uint32_t target_offset = dex_cache_arrays_begin_ + element_offset;
-    auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset);
+    auto expected_code = GenNopsAndAdrpAndUse(num_nops, method1_offset, target_offset, use_insn);
     InsertInsn(&expected_code, num_nops * 4u + 4u, insn2);
     // Replace adrp with bl.
     expected_code.erase(expected_code.begin() + num_nops * 4u,
@@ -270,29 +339,39 @@
     }
   }
 
-  void TestAdrpInsn2Ldr(uint32_t insn2, uint32_t adrp_offset, bool has_thunk,
-                        uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+  void TestAdrpInsn2Ldr(uint32_t insn2,
+                        uint32_t adrp_offset,
+                        bool has_thunk,
+                        uint32_t dex_cache_arrays_begin,
+                        uint32_t element_offset) {
     uint32_t method1_offset =
-        CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
+        kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
     ASSERT_LT(method1_offset, adrp_offset);
-    ASSERT_EQ(adrp_offset & 3u, 0u);
+    CHECK_ALIGNED(adrp_offset, 4u);
     uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
+    PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
+    uint32_t target_offset = dex_cache_arrays_begin_ + element_offset;
     if (has_thunk) {
-      TestNopsAdrpInsn2LdrHasThunk(num_nops, insn2, dex_cache_arrays_begin, element_offset);
+      TestNopsAdrpInsn2AndUseHasThunk(num_nops, insn2, target_offset, kLdrWInsn);
     } else {
-      TestNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
+      TestNopsAdrpInsn2AndUse(num_nops, insn2, target_offset, kLdrWInsn);
     }
     ASSERT_EQ(method1_offset, GetMethodOffset(1u));  // If this fails, num_nops is wrong.
   }
 
-  void TestAdrpLdurLdr(uint32_t adrp_offset, bool has_thunk,
-                       uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+  void TestAdrpLdurLdr(uint32_t adrp_offset,
+                       bool has_thunk,
+                       uint32_t dex_cache_arrays_begin,
+                       uint32_t element_offset) {
     TestAdrpInsn2Ldr(kLdurInsn, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
   }
 
-  void TestAdrpLdrPcRelLdr(uint32_t pcrel_ldr_insn, int32_t pcrel_disp,
-                           uint32_t adrp_offset, bool has_thunk,
-                           uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+  void TestAdrpLdrPcRelLdr(uint32_t pcrel_ldr_insn,
+                           int32_t pcrel_disp,
+                           uint32_t adrp_offset,
+                           bool has_thunk,
+                           uint32_t dex_cache_arrays_begin,
+                           uint32_t element_offset) {
     ASSERT_LT(pcrel_disp, 0x100000);
     ASSERT_GE(pcrel_disp, -0x100000);
     ASSERT_EQ(pcrel_disp & 0x3, 0);
@@ -300,13 +379,60 @@
     TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
   }
 
-  void TestAdrpLdrSpRelLdr(uint32_t sprel_ldr_insn, uint32_t sprel_disp_in_load_units,
-                           uint32_t adrp_offset, bool has_thunk,
-                           uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+  void TestAdrpLdrSpRelLdr(uint32_t sprel_ldr_insn,
+                           uint32_t sprel_disp_in_load_units,
+                           uint32_t adrp_offset,
+                           bool has_thunk,
+                           uint32_t dex_cache_arrays_begin,
+                           uint32_t element_offset) {
     ASSERT_LT(sprel_disp_in_load_units, 0x1000u);
     uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10);
     TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
   }
+
+  void TestAdrpInsn2Add(uint32_t insn2,
+                        uint32_t adrp_offset,
+                        bool has_thunk,
+                        uint32_t string_offset) {
+    uint32_t method1_offset =
+        kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
+    ASSERT_LT(method1_offset, adrp_offset);
+    CHECK_ALIGNED(adrp_offset, 4u);
+    uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
+    PrepareNopsAdrpInsn2Add(num_nops, insn2, string_offset);
+    if (has_thunk) {
+      TestNopsAdrpInsn2AndUseHasThunk(num_nops, insn2, string_offset, kAddXInsn);
+    } else {
+      TestNopsAdrpInsn2AndUse(num_nops, insn2, string_offset, kAddXInsn);
+    }
+    ASSERT_EQ(method1_offset, GetMethodOffset(1u));  // If this fails, num_nops is wrong.
+  }
+
+  void TestAdrpLdurAdd(uint32_t adrp_offset, bool has_thunk, uint32_t string_offset) {
+    TestAdrpInsn2Add(kLdurInsn, adrp_offset, has_thunk, string_offset);
+  }
+
+  void TestAdrpLdrPcRelAdd(uint32_t pcrel_ldr_insn,
+                           int32_t pcrel_disp,
+                           uint32_t adrp_offset,
+                           bool has_thunk,
+                           uint32_t string_offset) {
+    ASSERT_LT(pcrel_disp, 0x100000);
+    ASSERT_GE(pcrel_disp, -0x100000);
+    ASSERT_EQ(pcrel_disp & 0x3, 0);
+    uint32_t insn2 = pcrel_ldr_insn | (((static_cast<uint32_t>(pcrel_disp) >> 2) & 0x7ffffu) << 5);
+    TestAdrpInsn2Add(insn2, adrp_offset, has_thunk, string_offset);
+  }
+
+  void TestAdrpLdrSpRelAdd(uint32_t sprel_ldr_insn,
+                           uint32_t sprel_disp_in_load_units,
+                           uint32_t adrp_offset,
+                           bool has_thunk,
+                           uint32_t string_offset) {
+    ASSERT_LT(sprel_disp_in_load_units, 0x1000u);
+    uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10);
+    TestAdrpInsn2Add(insn2, adrp_offset, has_thunk, string_offset);
+  }
 };
 
 const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = {
@@ -358,14 +484,14 @@
   uint32_t method1_offset = GetMethodOffset(1u);
   uint32_t method2_offset = GetMethodOffset(2u);
   uint32_t diff_after = method2_offset - method1_offset;
-  ASSERT_EQ(diff_after & 3u, 0u);
+  CHECK_ALIGNED(diff_after, 4u);
   ASSERT_LT(diff_after >> 2, 1u << 8);  // Simple encoding, (diff_after >> 2) fits into 8 bits.
   static const uint8_t method1_expected_code[] = {
       static_cast<uint8_t>(diff_after >> 2), 0x00, 0x00, 0x94
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
   uint32_t diff_before = method1_offset - method2_offset;
-  ASSERT_EQ(diff_before & 3u, 0u);
+  CHECK_ALIGNED(diff_before, 4u);
   ASSERT_GE(diff_before, -1u << 27);
   auto method2_expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff_before >> 2) & 0x03ffffffu));
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
@@ -386,6 +512,39 @@
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
 
+TEST_F(Arm64RelativePatcherTestDefault, CallTrampolineTooFar) {
+  constexpr uint32_t missing_method_index = 1024u;
+  auto last_method_raw_code = GenNopsAndBl(1u, kBlPlus0);
+  constexpr uint32_t bl_offset_in_last_method = 1u * 4u;  // After NOPs.
+  ArrayRef<const uint8_t> last_method_code(last_method_raw_code);
+  ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size());
+  LinkerPatch last_method_patches[] = {
+      LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, missing_method_index),
+  };
+
+  constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4;
+  uint32_t last_method_idx = Create2MethodsWithGap(
+      kNopCode, ArrayRef<const LinkerPatch>(), last_method_code,
+      ArrayRef<const LinkerPatch>(last_method_patches),
+      just_over_max_negative_disp - bl_offset_in_last_method);
+  uint32_t method1_offset = GetMethodOffset(1u);
+  uint32_t last_method_offset = GetMethodOffset(last_method_idx);
+  ASSERT_EQ(method1_offset,
+            last_method_offset + bl_offset_in_last_method - just_over_max_negative_disp);
+  ASSERT_FALSE(method_offset_map_.FindMethodOffset(MethodRef(missing_method_index)).first);
+
+  // Check linked code.
+  uint32_t thunk_offset =
+      CompiledCode::AlignCode(last_method_offset + last_method_code.size(), kArm64);
+  uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method);
+  CHECK_ALIGNED(diff, 4u);
+  ASSERT_LT(diff, 128 * MB);
+  auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2));
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx),
+                                ArrayRef<const uint8_t>(expected_code)));
+  EXPECT_TRUE(CheckThunk(thunk_offset));
+}
+
 TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarAfter) {
   auto method1_raw_code = GenNopsAndBl(1u, kBlPlus0);
   constexpr uint32_t bl_offset_in_method1 = 1u * 4u;  // After NOPs.
@@ -459,12 +618,14 @@
 
   uint32_t method1_offset = GetMethodOffset(1u);
   uint32_t last_method_offset = GetMethodOffset(last_method_idx);
+  ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_offset));
   uint32_t last_method_header_offset = last_method_offset - sizeof(OatQuickMethodHeader);
-  ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_header_offset));
-  uint32_t thunk_offset = last_method_header_offset - CompiledCode::AlignCode(ThunkSize(), kArm64);
-  ASSERT_TRUE(IsAligned<kArm64Alignment>(thunk_offset));
+  uint32_t thunk_offset =
+      RoundDown(last_method_header_offset - ThunkSize(), GetInstructionSetAlignment(kArm64));
+  DCHECK_EQ(thunk_offset + ThunkSize() + CodeAlignmentSize(thunk_offset + ThunkSize()),
+            last_method_header_offset);
   uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1);
-  ASSERT_EQ(diff & 3u, 0u);
+  CHECK_ALIGNED(diff, 4u);
   ASSERT_LT(diff, 128 * MB);
   auto expected_code = GenNopsAndBl(0u, kBlPlus0 | (diff >> 2));
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
@@ -494,7 +655,7 @@
   uint32_t thunk_offset =
       CompiledCode::AlignCode(last_method_offset + last_method_code.size(), kArm64);
   uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method);
-  ASSERT_EQ(diff & 3u, 0u);
+  CHECK_ALIGNED(diff, 4u);
   ASSERT_LT(diff, 128 * MB);
   auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2));
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx),
@@ -518,74 +679,158 @@
   TestNopsAdrpLdr(0u, 0x12345000u, 0x4000u);
 }
 
-TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xff4) {
-  TestAdrpLdurLdr(0xff4u, false, 0x12345678u, 0x1234u);
+TEST_F(Arm64RelativePatcherTestDefault, StringReference1) {
+  TestNopsAdrpAdd(0u, 0x12345678u);
 }
 
-TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xff8) {
-  TestAdrpLdurLdr(0xff8u, true, 0x12345678u, 0x1234u);
+TEST_F(Arm64RelativePatcherTestDefault, StringReference2) {
+  TestNopsAdrpAdd(0u, -0x12345678u);
 }
 
-TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xffc) {
-  TestAdrpLdurLdr(0xffcu, true, 0x12345678u, 0x1234u);
+TEST_F(Arm64RelativePatcherTestDefault, StringReference3) {
+  TestNopsAdrpAdd(0u, 0x12345000u);
 }
 
-TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0x1000) {
-  TestAdrpLdurLdr(0x1000u, false, 0x12345678u, 0x1234u);
-}
-
-TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xff4) {
-  TestAdrpLdurLdr(0xff4u, false, 0x12345678u, 0x1234u);
-}
-
-TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xff8) {
-  TestAdrpLdurLdr(0xff8u, false, 0x12345678u, 0x1234u);
-}
-
-TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xffc) {
-  TestAdrpLdurLdr(0xffcu, false, 0x12345678u, 0x1234u);
-}
-
-TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0x1000) {
-  TestAdrpLdurLdr(0x1000u, false, 0x12345678u, 0x1234u);
+TEST_F(Arm64RelativePatcherTestDefault, StringReference4) {
+  TestNopsAdrpAdd(0u, 0x12345ffcu);
 }
 
 #define TEST_FOR_OFFSETS(test, disp1, disp2) \
   test(0xff4u, disp1) test(0xff8u, disp1) test(0xffcu, disp1) test(0x1000u, disp1) \
   test(0xff4u, disp2) test(0xff8u, disp2) test(0xffcu, disp2) test(0x1000u, disp2)
 
+#define DEFAULT_LDUR_LDR_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## Ldur ## disp) { \
+    bool has_thunk = (adrp_offset == 0xff8u || adrp_offset == 0xffcu); \
+    TestAdrpLdurLdr(adrp_offset, has_thunk, 0x12345678u, disp); \
+  }
+
+TEST_FOR_OFFSETS(DEFAULT_LDUR_LDR_TEST, 0x1234, 0x1238)
+
+#define DENVER64_LDUR_LDR_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference ## adrp_offset ## Ldur ## disp) { \
+    TestAdrpLdurLdr(adrp_offset, false, 0x12345678u, disp); \
+  }
+
+TEST_FOR_OFFSETS(DENVER64_LDUR_LDR_TEST, 0x1234, 0x1238)
+
 // LDR <Wt>, <label> is always aligned. We should never have to use a fixup.
-#define LDRW_PCREL_TEST(adrp_offset, disp) \
+#define LDRW_PCREL_LDR_TEST(adrp_offset, disp) \
   TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WPcRel ## disp) { \
     TestAdrpLdrPcRelLdr(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u, 0x1234u); \
   }
 
-TEST_FOR_OFFSETS(LDRW_PCREL_TEST, 0x1234, 0x1238)
+TEST_FOR_OFFSETS(LDRW_PCREL_LDR_TEST, 0x1234, 0x1238)
 
 // LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8.
-#define LDRX_PCREL_TEST(adrp_offset, disp) \
+#define LDRX_PCREL_LDR_TEST(adrp_offset, disp) \
   TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XPcRel ## disp) { \
-    bool unaligned = ((adrp_offset + 4u + static_cast<uint32_t>(disp)) & 7u) != 0; \
+    bool unaligned = !IsAligned<8u>(adrp_offset + 4u + static_cast<uint32_t>(disp)); \
     bool has_thunk = (adrp_offset == 0xff8u || adrp_offset == 0xffcu) && unaligned; \
     TestAdrpLdrPcRelLdr(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u, 0x1234u); \
   }
 
-TEST_FOR_OFFSETS(LDRX_PCREL_TEST, 0x1234, 0x1238)
+TEST_FOR_OFFSETS(LDRX_PCREL_LDR_TEST, 0x1234, 0x1238)
 
 // LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed.
-#define LDRW_SPREL_TEST(adrp_offset, disp) \
+#define LDRW_SPREL_LDR_TEST(adrp_offset, disp) \
   TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WSpRel ## disp) { \
     TestAdrpLdrSpRelLdr(kLdrWSpRelInsn, disp >> 2, adrp_offset, false, 0x12345678u, 0x1234u); \
   }
 
-TEST_FOR_OFFSETS(LDRW_SPREL_TEST, 0, 4)
+TEST_FOR_OFFSETS(LDRW_SPREL_LDR_TEST, 0, 4)
 
-#define LDRX_SPREL_TEST(adrp_offset, disp) \
+#define LDRX_SPREL_LDR_TEST(adrp_offset, disp) \
   TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XSpRel ## disp) { \
     TestAdrpLdrSpRelLdr(kLdrXSpRelInsn, disp >> 3, adrp_offset, false, 0x12345678u, 0x1234u); \
   }
 
-TEST_FOR_OFFSETS(LDRX_SPREL_TEST, 0, 8)
+TEST_FOR_OFFSETS(LDRX_SPREL_LDR_TEST, 0, 8)
+
+#define DEFAULT_LDUR_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## Ldur ## disp) { \
+    bool has_thunk = (adrp_offset == 0xff8u || adrp_offset == 0xffcu); \
+    TestAdrpLdurAdd(adrp_offset, has_thunk, disp); \
+  }
+
+TEST_FOR_OFFSETS(DEFAULT_LDUR_ADD_TEST, 0x12345678, 0xffffc840)
+
+#define DENVER64_LDUR_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDenver64, StringReference ## adrp_offset ## Ldur ## disp) { \
+    TestAdrpLdurAdd(adrp_offset, false, disp); \
+  }
+
+TEST_FOR_OFFSETS(DENVER64_LDUR_ADD_TEST, 0x12345678, 0xffffc840)
+
+#define DEFAULT_SUBX3X2_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## SubX3X2 ## disp) { \
+    /* SUB unrelated to "ADRP x0, addr". */ \
+    uint32_t sub = kSubXInsn | (100 << 10) | (2u << 5) | 3u;  /* SUB x3, x2, #100 */ \
+    TestAdrpInsn2Add(sub, adrp_offset, false, disp); \
+  }
+
+TEST_FOR_OFFSETS(DEFAULT_SUBX3X2_ADD_TEST, 0x12345678, 0xffffc840)
+
+#define DEFAULT_SUBSX3X0_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## SubsX3X0 ## disp) { \
+    /* SUBS that uses the result of "ADRP x0, addr". */ \
+    uint32_t subs = kSubsXInsn | (100 << 10) | (0u << 5) | 3u;  /* SUBS x3, x0, #100 */ \
+    TestAdrpInsn2Add(subs, adrp_offset, false, disp); \
+  }
+
+TEST_FOR_OFFSETS(DEFAULT_SUBSX3X0_ADD_TEST, 0x12345678, 0xffffc840)
+
+#define DEFAULT_ADDX0X0_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## AddX0X0 ## disp) { \
+    /* ADD that uses the result register of "ADRP x0, addr" as both source and destination. */ \
+    uint32_t add = kSubXInsn | (100 << 10) | (0u << 5) | 0u;  /* ADD x0, x0, #100 */ \
+    TestAdrpInsn2Add(add, adrp_offset, false, disp); \
+  }
+
+TEST_FOR_OFFSETS(DEFAULT_ADDX0X0_ADD_TEST, 0x12345678, 0xffffc840)
+
+#define DEFAULT_ADDSX0X2_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## AddsX0X2 ## disp) { \
+    /* ADDS that does not use the result of "ADRP x0, addr" but overwrites that register. */ \
+    uint32_t adds = kAddsXInsn | (100 << 10) | (2u << 5) | 0u;  /* ADDS x0, x2, #100 */ \
+    bool has_thunk = (adrp_offset == 0xff8u || adrp_offset == 0xffcu); \
+    TestAdrpInsn2Add(adds, adrp_offset, has_thunk, disp); \
+  }
+
+TEST_FOR_OFFSETS(DEFAULT_ADDSX0X2_ADD_TEST, 0x12345678, 0xffffc840)
+
+// LDR <Wt>, <label> is always aligned. We should never have to use a fixup.
+#define LDRW_PCREL_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## WPcRel ## disp) { \
+    TestAdrpLdrPcRelAdd(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u); \
+  }
+
+TEST_FOR_OFFSETS(LDRW_PCREL_ADD_TEST, 0x1234, 0x1238)
+
+// LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8.
+#define LDRX_PCREL_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## XPcRel ## disp) { \
+    bool unaligned = !IsAligned<8u>(adrp_offset + 4u + static_cast<uint32_t>(disp)); \
+    bool has_thunk = (adrp_offset == 0xff8u || adrp_offset == 0xffcu) && unaligned; \
+    TestAdrpLdrPcRelAdd(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u); \
+  }
+
+TEST_FOR_OFFSETS(LDRX_PCREL_ADD_TEST, 0x1234, 0x1238)
+
+// LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed.
+#define LDRW_SPREL_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## WSpRel ## disp) { \
+    TestAdrpLdrSpRelAdd(kLdrWSpRelInsn, disp >> 2, adrp_offset, false, 0x12345678u); \
+  }
+
+TEST_FOR_OFFSETS(LDRW_SPREL_ADD_TEST, 0, 4)
+
+#define LDRX_SPREL_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## XSpRel ## disp) { \
+    TestAdrpLdrSpRelAdd(kLdrXSpRelInsn, disp >> 3, adrp_offset, false, 0x12345678u); \
+  }
+
+TEST_FOR_OFFSETS(LDRX_SPREL_ADD_TEST, 0, 8)
 
 }  // namespace linker
 }  // namespace art
diff --git a/compiler/linker/buffered_output_stream.cc b/compiler/linker/buffered_output_stream.cc
new file mode 100644
index 0000000..4c66c76
--- /dev/null
+++ b/compiler/linker/buffered_output_stream.cc
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "buffered_output_stream.h"
+
+#include <string.h>
+
+namespace art {
+
+BufferedOutputStream::BufferedOutputStream(std::unique_ptr<OutputStream> out)
+    : OutputStream(out->GetLocation()),  // Before out is moved to out_.
+      out_(std::move(out)),
+      used_(0) {}
+
+BufferedOutputStream::~BufferedOutputStream() {
+  FlushBuffer();
+}
+
+bool BufferedOutputStream::WriteFully(const void* buffer, size_t byte_count) {
+  if (byte_count > kBufferSize) {
+    if (!FlushBuffer()) {
+      return false;
+    }
+    return out_->WriteFully(buffer, byte_count);
+  }
+  if (used_ + byte_count > kBufferSize) {
+    if (!FlushBuffer()) {
+      return false;
+    }
+  }
+  const uint8_t* src = reinterpret_cast<const uint8_t*>(buffer);
+  memcpy(&buffer_[used_], src, byte_count);
+  used_ += byte_count;
+  return true;
+}
+
+bool BufferedOutputStream::Flush() {
+  return FlushBuffer() && out_->Flush();
+}
+
+bool BufferedOutputStream::FlushBuffer() {
+  bool success = true;
+  if (used_ > 0) {
+    success = out_->WriteFully(&buffer_[0], used_);
+    used_ = 0;
+  }
+  return success;
+}
+
+off_t BufferedOutputStream::Seek(off_t offset, Whence whence) {
+  if (!FlushBuffer()) {
+    return -1;
+  }
+  return out_->Seek(offset, whence);
+}
+
+}  // namespace art
diff --git a/compiler/linker/buffered_output_stream.h b/compiler/linker/buffered_output_stream.h
new file mode 100644
index 0000000..a2eefbb
--- /dev/null
+++ b/compiler/linker/buffered_output_stream.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_
+
+#include <memory>
+
+#include "output_stream.h"
+
+#include "globals.h"
+
+namespace art {
+
+class BufferedOutputStream FINAL : public OutputStream {
+ public:
+  explicit BufferedOutputStream(std::unique_ptr<OutputStream> out);
+
+  ~BufferedOutputStream() OVERRIDE;
+
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE;
+
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
+
+  bool Flush() OVERRIDE;
+
+ private:
+  static const size_t kBufferSize = 8 * KB;
+
+  bool FlushBuffer();
+
+  std::unique_ptr<OutputStream> const out_;
+  uint8_t buffer_[kBufferSize];
+  size_t used_;
+
+  DISALLOW_COPY_AND_ASSIGN(BufferedOutputStream);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_
diff --git a/compiler/linker/error_delaying_output_stream.h b/compiler/linker/error_delaying_output_stream.h
new file mode 100644
index 0000000..99410e4
--- /dev/null
+++ b/compiler/linker/error_delaying_output_stream.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_
+
+#include "output_stream.h"
+
+#include "base/logging.h"
+
+namespace art {
+
+// OutputStream wrapper that delays reporting an error until Flush().
+class ErrorDelayingOutputStream FINAL : public OutputStream {
+ public:
+  explicit ErrorDelayingOutputStream(OutputStream* output)
+      : OutputStream(output->GetLocation()),
+        output_(output),
+        output_good_(true),
+        output_offset_(0) { }
+
+  // This function always succeeds to simplify code.
+  // Use Good() to check the actual status of the output stream.
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
+    if (output_good_) {
+      if (!output_->WriteFully(buffer, byte_count)) {
+        PLOG(ERROR) << "Failed to write " << byte_count
+                    << " bytes to " << GetLocation() << " at offset " << output_offset_;
+        output_good_ = false;
+      }
+    }
+    output_offset_ += byte_count;
+    return true;
+  }
+
+  // This function always succeeds to simplify code.
+  // Use Good() to check the actual status of the output stream.
+  off_t Seek(off_t offset, Whence whence) OVERRIDE {
+    // We keep shadow copy of the offset so that we return
+    // the expected value even if the output stream failed.
+    off_t new_offset;
+    switch (whence) {
+      case kSeekSet:
+        new_offset = offset;
+        break;
+      case kSeekCurrent:
+        new_offset = output_offset_ + offset;
+        break;
+      default:
+        LOG(FATAL) << "Unsupported seek type: " << whence;
+        UNREACHABLE();
+    }
+    if (output_good_) {
+      off_t actual_offset = output_->Seek(offset, whence);
+      if (actual_offset == static_cast<off_t>(-1)) {
+        PLOG(ERROR) << "Failed to seek in " << GetLocation() << ". Offset=" << offset
+                    << " whence=" << whence << " new_offset=" << new_offset;
+        output_good_ = false;
+      }
+      DCHECK_EQ(actual_offset, new_offset);
+    }
+    output_offset_ = new_offset;
+    return new_offset;
+  }
+
+  // Flush the output and return whether all operations have succeeded.
+  // Do nothing if we already have a pending error.
+  bool Flush() OVERRIDE {
+    if (output_good_) {
+      output_good_ = output_->Flush();
+    }
+    return output_good_;
+  }
+
+  // Check (without flushing) whether all operations have succeeded so far.
+  bool Good() const {
+    return output_good_;
+  }
+
+ private:
+  OutputStream* output_;
+  bool output_good_;  // True if all writes to output succeeded.
+  off_t output_offset_;  // Keep track of the current position in the stream.
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_
diff --git a/compiler/linker/file_output_stream.cc b/compiler/linker/file_output_stream.cc
new file mode 100644
index 0000000..bbfbdfd
--- /dev/null
+++ b/compiler/linker/file_output_stream.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "file_output_stream.h"
+
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "base/unix_file/fd_file.h"
+
+namespace art {
+
+FileOutputStream::FileOutputStream(File* file) : OutputStream(file->GetPath()), file_(file) {}
+
+bool FileOutputStream::WriteFully(const void* buffer, size_t byte_count) {
+  return file_->WriteFully(buffer, byte_count);
+}
+
+off_t FileOutputStream::Seek(off_t offset, Whence whence) {
+  return lseek(file_->Fd(), offset, static_cast<int>(whence));
+}
+
+bool FileOutputStream::Flush() {
+  return file_->Flush() == 0;
+}
+
+}  // namespace art
diff --git a/compiler/linker/file_output_stream.h b/compiler/linker/file_output_stream.h
new file mode 100644
index 0000000..f2d8453
--- /dev/null
+++ b/compiler/linker/file_output_stream.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
+
+#include "output_stream.h"
+
+#include "os.h"
+
+namespace art {
+
+class FileOutputStream FINAL : public OutputStream {
+ public:
+  explicit FileOutputStream(File* file);
+
+  ~FileOutputStream() OVERRIDE {}
+
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE;
+
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
+
+  bool Flush() OVERRIDE;
+
+ private:
+  File* const file_;
+
+  DISALLOW_COPY_AND_ASSIGN(FileOutputStream);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc
new file mode 100644
index 0000000..c09950c
--- /dev/null
+++ b/compiler/linker/mips/relative_patcher_mips.cc
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/mips/relative_patcher_mips.h"
+
+#include "compiled_method.h"
+
+namespace art {
+namespace linker {
+
+uint32_t MipsRelativePatcher::ReserveSpace(
+    uint32_t offset,
+    const CompiledMethod* compiled_method ATTRIBUTE_UNUSED,
+    MethodReference method_ref ATTRIBUTE_UNUSED) {
+  return offset;  // No space reserved; no limit on relative call distance.
+}
+
+uint32_t MipsRelativePatcher::ReserveSpaceEnd(uint32_t offset) {
+  return offset;  // No space reserved; no limit on relative call distance.
+}
+
+uint32_t MipsRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) {
+  return offset;  // No thunks added; no limit on relative call distance.
+}
+
+void MipsRelativePatcher::PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+                                    uint32_t literal_offset ATTRIBUTE_UNUSED,
+                                    uint32_t patch_offset ATTRIBUTE_UNUSED,
+                                    uint32_t target_offset ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "PatchCall unimplemented on MIPS";
+}
+
+void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                                   const LinkerPatch& patch,
+                                                   uint32_t patch_offset,
+                                                   uint32_t target_offset) {
+  uint32_t anchor_literal_offset = patch.PcInsnOffset();
+  uint32_t literal_offset = patch.LiteralOffset();
+  bool dex_cache_array = (patch.GetType() == LinkerPatch::Type::kDexCacheArray);
+
+  // Basic sanity checks.
+  if (is_r6) {
+    DCHECK_GE(code->size(), 8u);
+    DCHECK_LE(literal_offset, code->size() - 8u);
+    DCHECK_EQ(literal_offset, anchor_literal_offset);
+    // AUIPC reg, offset_high
+    DCHECK_EQ((*code)[literal_offset + 0], 0x34);
+    DCHECK_EQ((*code)[literal_offset + 1], 0x12);
+    DCHECK_EQ(((*code)[literal_offset + 2] & 0x1F), 0x1E);
+    DCHECK_EQ(((*code)[literal_offset + 3] & 0xFC), 0xEC);
+    // ADDIU reg, reg, offset_low
+    DCHECK_EQ((*code)[literal_offset + 4], 0x78);
+    DCHECK_EQ((*code)[literal_offset + 5], 0x56);
+    DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x24);
+  } else {
+    DCHECK_GE(code->size(), 16u);
+    DCHECK_LE(literal_offset, code->size() - 12u);
+    DCHECK_GE(literal_offset, 4u);
+    // The NAL instruction may not precede immediately as the PC+0 value may
+    // come from HMipsComputeBaseMethodAddress.
+    if (dex_cache_array) {
+      DCHECK_EQ(literal_offset + 4u, anchor_literal_offset);
+      // NAL
+      DCHECK_EQ((*code)[literal_offset - 4], 0x00);
+      DCHECK_EQ((*code)[literal_offset - 3], 0x00);
+      DCHECK_EQ((*code)[literal_offset - 2], 0x10);
+      DCHECK_EQ((*code)[literal_offset - 1], 0x04);
+    }
+    // LUI reg, offset_high
+    DCHECK_EQ((*code)[literal_offset + 0], 0x34);
+    DCHECK_EQ((*code)[literal_offset + 1], 0x12);
+    DCHECK_EQ(((*code)[literal_offset + 2] & 0xE0), 0x00);
+    DCHECK_EQ((*code)[literal_offset + 3], 0x3C);
+    // ORI reg, reg, offset_low
+    DCHECK_EQ((*code)[literal_offset + 4], 0x78);
+    DCHECK_EQ((*code)[literal_offset + 5], 0x56);
+    DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x34);
+    // ADDU reg, reg, reg2
+    DCHECK_EQ((*code)[literal_offset + 8], 0x21);
+    DCHECK_EQ(((*code)[literal_offset + 9] & 0x07), 0x00);
+    if (dex_cache_array) {
+      // reg2 is either RA or from HMipsComputeBaseMethodAddress.
+      DCHECK_EQ(((*code)[literal_offset + 10] & 0x1F), 0x1F);
+    }
+    DCHECK_EQ(((*code)[literal_offset + 11] & 0xFC), 0x00);
+  }
+
+  // Apply patch.
+  uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset;
+  uint32_t diff = target_offset - anchor_offset;
+  if (dex_cache_array) {
+    diff += kDexCacheArrayLwOffset;
+  }
+  if (is_r6) {
+    diff += (diff & 0x8000) << 1;  // Account for sign extension in ADDIU.
+  }
+
+  // LUI reg, offset_high / AUIPC reg, offset_high
+  (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 16);
+  (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 24);
+  // ORI reg, reg, offset_low / ADDIU reg, reg, offset_low
+  (*code)[literal_offset + 4] = static_cast<uint8_t>(diff >> 0);
+  (*code)[literal_offset + 5] = static_cast<uint8_t>(diff >> 8);
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/mips/relative_patcher_mips.h b/compiler/linker/mips/relative_patcher_mips.h
new file mode 100644
index 0000000..4ff2f2f
--- /dev/null
+++ b/compiler/linker/mips/relative_patcher_mips.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_
+#define ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_
+
+#include "linker/relative_patcher.h"
+#include "arch/mips/instruction_set_features_mips.h"
+
+namespace art {
+namespace linker {
+
+class MipsRelativePatcher FINAL : public RelativePatcher {
+ public:
+  explicit MipsRelativePatcher(const MipsInstructionSetFeatures* features)
+      : is_r6(features->IsR6()) {}
+
+  uint32_t ReserveSpace(uint32_t offset,
+                        const CompiledMethod* compiled_method,
+                        MethodReference method_ref) OVERRIDE;
+  uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
+  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
+  void PatchCall(std::vector<uint8_t>* code,
+                 uint32_t literal_offset,
+                 uint32_t patch_offset,
+                 uint32_t target_offset) OVERRIDE;
+  void PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                const LinkerPatch& patch,
+                                uint32_t patch_offset,
+                                uint32_t target_offset) OVERRIDE;
+
+ private:
+  // We'll maximize the range of a single load instruction for dex cache array accesses
+  // by aligning offset -32768 with the offset of the first used element.
+  static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000;
+  bool is_r6;
+
+  DISALLOW_COPY_AND_ASSIGN(MipsRelativePatcher);
+};
+
+}  // namespace linker
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_
diff --git a/compiler/linker/mips/relative_patcher_mips32r6_test.cc b/compiler/linker/mips/relative_patcher_mips32r6_test.cc
new file mode 100644
index 0000000..a16aaca
--- /dev/null
+++ b/compiler/linker/mips/relative_patcher_mips32r6_test.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/relative_patcher_test.h"
+#include "linker/mips/relative_patcher_mips.h"
+
+namespace art {
+namespace linker {
+
+// We'll maximize the range of a single load instruction for dex cache array accesses
+// by aligning offset -32768 with the offset of the first used element.
+static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000;
+
+class Mips32r6RelativePatcherTest : public RelativePatcherTest {
+ public:
+  Mips32r6RelativePatcherTest() : RelativePatcherTest(kMips, "mips32r6") {}
+
+ protected:
+  static const uint8_t UnpatchedPcRelativeRawCode[];
+  static const uint32_t LiteralOffset;
+  static const uint32_t AnchorOffset;
+  static const ArrayRef<const uint8_t> UnpatchedPcRelativeCode;
+
+  uint32_t GetMethodOffset(uint32_t method_idx) {
+    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
+    CHECK(result.first);
+    return result.second;
+  }
+
+  void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
+  void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+  void TestStringReference(uint32_t string_offset);
+};
+
+const uint8_t Mips32r6RelativePatcherTest::UnpatchedPcRelativeRawCode[] = {
+    0x34, 0x12, 0x5E, 0xEE,  // auipc s2, high(diff); placeholder = 0x1234
+    0x78, 0x56, 0x52, 0x26,  // addiu s2, s2, low(diff); placeholder = 0x5678
+};
+const uint32_t Mips32r6RelativePatcherTest::LiteralOffset = 0;  // At auipc (where patching starts).
+const uint32_t Mips32r6RelativePatcherTest::AnchorOffset = 0;  // At auipc (where PC+0 points).
+const ArrayRef<const uint8_t> Mips32r6RelativePatcherTest::UnpatchedPcRelativeCode(
+    UnpatchedPcRelativeRawCode);
+
+void Mips32r6RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches,
+                                                       uint32_t target_offset) {
+  AddCompiledMethod(MethodRef(1u), UnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+  ASSERT_TRUE(result.first);
+
+  uint32_t diff = target_offset - (result.second + AnchorOffset);
+  if (patches[0].GetType() == LinkerPatch::Type::kDexCacheArray) {
+    diff += kDexCacheArrayLwOffset;
+  }
+  diff += (diff & 0x8000) << 1;  // Account for sign extension in addiu.
+
+  const uint8_t expected_code[] = {
+      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x5E, 0xEE,
+      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x26,
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+void Mips32r6RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
+                                                        uint32_t element_offset) {
+  dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+  LinkerPatch patches[] = {
+      LinkerPatch::DexCacheArrayPatch(LiteralOffset, nullptr, AnchorOffset, element_offset)
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
+                       dex_cache_arrays_begin_ + element_offset);
+}
+
+void Mips32r6RelativePatcherTest::TestStringReference(uint32_t string_offset) {
+  constexpr uint32_t kStringIndex = 1u;
+  string_index_to_offset_map_.Put(kStringIndex, string_offset);
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeStringPatch(LiteralOffset, nullptr, AnchorOffset, kStringIndex)
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
+}
+
+TEST_F(Mips32r6RelativePatcherTest, DexCacheReference) {
+  TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234);
+}
+
+TEST_F(Mips32r6RelativePatcherTest, StringReference) {
+  TestStringReference(/* string_offset*/ 0x87651234);
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc
new file mode 100644
index 0000000..335ce2e
--- /dev/null
+++ b/compiler/linker/mips/relative_patcher_mips_test.cc
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/relative_patcher_test.h"
+#include "linker/mips/relative_patcher_mips.h"
+
+namespace art {
+namespace linker {
+
+// We'll maximize the range of a single load instruction for dex cache array accesses
+// by aligning offset -32768 with the offset of the first used element.
+static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000;
+
+class MipsRelativePatcherTest : public RelativePatcherTest {
+ public:
+  MipsRelativePatcherTest() : RelativePatcherTest(kMips, "mips32r2") {}
+
+ protected:
+  static const uint8_t UnpatchedPcRelativeRawCode[];
+  static const uint32_t LiteralOffset;
+  static const uint32_t AnchorOffset;
+  static const ArrayRef<const uint8_t> UnpatchedPcRelativeCode;
+
+  uint32_t GetMethodOffset(uint32_t method_idx) {
+    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
+    CHECK(result.first);
+    return result.second;
+  }
+
+  void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
+  void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+  void TestStringReference(uint32_t string_offset);
+};
+
+const uint8_t MipsRelativePatcherTest::UnpatchedPcRelativeRawCode[] = {
+    0x00, 0x00, 0x10, 0x04,  // nal
+    0x34, 0x12, 0x12, 0x3C,  // lui  s2, high(diff); placeholder = 0x1234
+    0x78, 0x56, 0x52, 0x36,  // ori  s2, s2, low(diff); placeholder = 0x5678
+    0x21, 0x90, 0x5F, 0x02,  // addu s2, s2, ra
+};
+const uint32_t MipsRelativePatcherTest::LiteralOffset = 4;  // At lui (where patching starts).
+const uint32_t MipsRelativePatcherTest::AnchorOffset = 8;  // At ori (where PC+0 points).
+const ArrayRef<const uint8_t> MipsRelativePatcherTest::UnpatchedPcRelativeCode(
+    UnpatchedPcRelativeRawCode);
+
+void MipsRelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches,
+                                                   uint32_t target_offset) {
+  AddCompiledMethod(MethodRef(1u), UnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+  ASSERT_TRUE(result.first);
+
+  uint32_t diff = target_offset - (result.second + AnchorOffset);
+  if (patches[0].GetType() == LinkerPatch::Type::kDexCacheArray) {
+    diff += kDexCacheArrayLwOffset;
+  }
+
+  const uint8_t expected_code[] = {
+      0x00, 0x00, 0x10, 0x04,
+      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x12, 0x3C,
+      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x36,
+      0x21, 0x90, 0x5F, 0x02,
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+void MipsRelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
+                                                    uint32_t element_offset) {
+  dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+  LinkerPatch patches[] = {
+      LinkerPatch::DexCacheArrayPatch(LiteralOffset, nullptr, AnchorOffset, element_offset)
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
+                       dex_cache_arrays_begin_ + element_offset);
+}
+
+void MipsRelativePatcherTest::TestStringReference(uint32_t string_offset) {
+  constexpr uint32_t kStringIndex = 1u;
+  string_index_to_offset_map_.Put(kStringIndex, string_offset);
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeStringPatch(LiteralOffset, nullptr, AnchorOffset, kStringIndex)
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
+}
+
+TEST_F(MipsRelativePatcherTest, DexCacheReference) {
+  TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234);
+}
+
+TEST_F(MipsRelativePatcherTest, StringReference) {
+  TestStringReference(/* string_offset*/ 0x87651234);
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/multi_oat_relative_patcher.cc b/compiler/linker/multi_oat_relative_patcher.cc
new file mode 100644
index 0000000..e9e242b
--- /dev/null
+++ b/compiler/linker/multi_oat_relative_patcher.cc
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "multi_oat_relative_patcher.h"
+
+#include "globals.h"
+#include "base/bit_utils.h"
+#include "base/logging.h"
+
+namespace art {
+namespace linker {
+
+MultiOatRelativePatcher::MultiOatRelativePatcher(InstructionSet instruction_set,
+                                                 const InstructionSetFeatures* features)
+    : method_offset_map_(),
+      relative_patcher_(
+          linker::RelativePatcher::Create(instruction_set, features, &method_offset_map_)),
+      adjustment_(0u),
+      instruction_set_(instruction_set),
+      start_size_code_alignment_(0u),
+      start_size_relative_call_thunks_(0u),
+      start_size_misc_thunks_(0u) {
+}
+
+void MultiOatRelativePatcher::StartOatFile(uint32_t adjustment) {
+  DCHECK_ALIGNED(adjustment, kPageSize);
+  adjustment_ = adjustment;
+
+  start_size_code_alignment_ = relative_patcher_->CodeAlignmentSize();
+  start_size_relative_call_thunks_ = relative_patcher_->RelativeCallThunksSize();
+  start_size_misc_thunks_ = relative_patcher_->MiscThunksSize();
+}
+
+uint32_t MultiOatRelativePatcher::CodeAlignmentSize() const {
+  DCHECK_GE(relative_patcher_->CodeAlignmentSize(), start_size_code_alignment_);
+  return relative_patcher_->CodeAlignmentSize() - start_size_code_alignment_;
+}
+
+uint32_t MultiOatRelativePatcher::RelativeCallThunksSize() const {
+  DCHECK_GE(relative_patcher_->RelativeCallThunksSize(), start_size_relative_call_thunks_);
+  return relative_patcher_->RelativeCallThunksSize() - start_size_relative_call_thunks_;
+}
+
+uint32_t MultiOatRelativePatcher::MiscThunksSize() const {
+  DCHECK_GE(relative_patcher_->MiscThunksSize(), start_size_misc_thunks_);
+  return relative_patcher_->MiscThunksSize() - start_size_misc_thunks_;
+}
+
+std::pair<bool, uint32_t> MultiOatRelativePatcher::MethodOffsetMap::FindMethodOffset(
+    MethodReference ref) {
+  auto it = map.find(ref);
+  if (it == map.end()) {
+    return std::pair<bool, uint32_t>(false, 0u);
+  } else {
+    return std::pair<bool, uint32_t>(true, it->second);
+  }
+}
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/multi_oat_relative_patcher.h b/compiler/linker/multi_oat_relative_patcher.h
new file mode 100644
index 0000000..dbda03f
--- /dev/null
+++ b/compiler/linker/multi_oat_relative_patcher.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_MULTI_OAT_RELATIVE_PATCHER_H_
+#define ART_COMPILER_LINKER_MULTI_OAT_RELATIVE_PATCHER_H_
+
+#include "arch/instruction_set.h"
+#include "method_reference.h"
+#include "relative_patcher.h"
+#include "safe_map.h"
+
+namespace art {
+
+class CompiledMethod;
+class LinkerPatch;
+class InstructionSetFeatures;
+
+namespace linker {
+
+// MultiOatRelativePatcher is a helper class for handling patching across
+// any number of oat files. It provides storage for method code offsets
+// and wraps RelativePatcher calls, adjusting relative offsets according
+// to the value set by SetAdjustment().
+class MultiOatRelativePatcher FINAL {
+ public:
+  using const_iterator =
+      SafeMap<MethodReference, uint32_t, MethodReferenceComparator>::const_iterator;
+
+  MultiOatRelativePatcher(InstructionSet instruction_set, const InstructionSetFeatures* features);
+
+  // Mark the start of a new oat file (for statistics retrieval) and set the
+  // adjustment for a new oat file to apply to all relative offsets that are
+  // passed to the MultiOatRelativePatcher.
+  //
+  // The adjustment should be the global offset of the base from which relative
+  // offsets are calculated, such as the start of .rodata for the current oat file.
+  // It must must never point directly to a method's code to avoid relative offsets
+  // with value 0 because this value is used as a missing offset indication in
+  // GetOffset() and an error indication in WriteThunks(). Additionally, it must be
+  // page-aligned, so that it does not skew alignment calculations, say arm64 ADRP.
+  void StartOatFile(uint32_t adjustment);
+
+  // Get relative offset. Returns 0 when the offset has not been set yet.
+  uint32_t GetOffset(MethodReference method_ref) {
+    auto it = method_offset_map_.map.find(method_ref);
+    return (it != method_offset_map_.map.end()) ? it->second - adjustment_ : 0u;
+  }
+
+  // Set the offset.
+  void SetOffset(MethodReference method_ref, uint32_t offset) {
+    method_offset_map_.map.Put(method_ref, offset + adjustment_);
+  }
+
+  // Wrapper around RelativePatcher::ReserveSpace(), doing offset adjustment.
+  uint32_t ReserveSpace(uint32_t offset,
+                        const CompiledMethod* compiled_method,
+                        MethodReference method_ref) {
+    offset += adjustment_;
+    offset = relative_patcher_->ReserveSpace(offset, compiled_method, method_ref);
+    offset -= adjustment_;
+    return offset;
+  }
+
+  // Wrapper around RelativePatcher::ReserveSpaceEnd(), doing offset adjustment.
+  uint32_t ReserveSpaceEnd(uint32_t offset) {
+    offset += adjustment_;
+    offset = relative_patcher_->ReserveSpaceEnd(offset);
+    offset -= adjustment_;
+    return offset;
+  }
+
+  // Wrapper around RelativePatcher::WriteThunks(), doing offset adjustment.
+  uint32_t WriteThunks(OutputStream* out, uint32_t offset) {
+    offset += adjustment_;
+    offset = relative_patcher_->WriteThunks(out, offset);
+    if (offset != 0u) {  // 0u indicates write error.
+      offset -= adjustment_;
+    }
+    return offset;
+  }
+
+  // Wrapper around RelativePatcher::PatchCall(), doing offset adjustment.
+  void PatchCall(std::vector<uint8_t>* code,
+                 uint32_t literal_offset,
+                 uint32_t patch_offset,
+                 uint32_t target_offset) {
+    patch_offset += adjustment_;
+    target_offset += adjustment_;
+    relative_patcher_->PatchCall(code, literal_offset, patch_offset, target_offset);
+  }
+
+  // Wrapper around RelativePatcher::PatchDexCacheReference(), doing offset adjustment.
+  void PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                const LinkerPatch& patch,
+                                uint32_t patch_offset,
+                                uint32_t target_offset) {
+    patch_offset += adjustment_;
+    target_offset += adjustment_;
+    relative_patcher_->PatchPcRelativeReference(code, patch, patch_offset, target_offset);
+  }
+
+  // Wrappers around RelativePatcher for statistics retrieval.
+  uint32_t CodeAlignmentSize() const;
+  uint32_t RelativeCallThunksSize() const;
+  uint32_t MiscThunksSize() const;
+
+ private:
+  // Map method reference to assigned offset.
+  // Wrap the map in a class implementing linker::RelativePatcherTargetProvider.
+  class MethodOffsetMap : public linker::RelativePatcherTargetProvider {
+   public:
+    std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) OVERRIDE;
+    SafeMap<MethodReference, uint32_t, MethodReferenceComparator> map;
+  };
+
+  MethodOffsetMap method_offset_map_;
+  std::unique_ptr<RelativePatcher> relative_patcher_;
+  uint32_t adjustment_;
+  InstructionSet instruction_set_;
+
+  uint32_t start_size_code_alignment_;
+  uint32_t start_size_relative_call_thunks_;
+  uint32_t start_size_misc_thunks_;
+
+  friend class MultiOatRelativePatcherTest;
+
+  DISALLOW_COPY_AND_ASSIGN(MultiOatRelativePatcher);
+};
+
+}  // namespace linker
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_MULTI_OAT_RELATIVE_PATCHER_H_
diff --git a/compiler/linker/multi_oat_relative_patcher_test.cc b/compiler/linker/multi_oat_relative_patcher_test.cc
new file mode 100644
index 0000000..92a96a0
--- /dev/null
+++ b/compiler/linker/multi_oat_relative_patcher_test.cc
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiled_method.h"
+#include "gtest/gtest.h"
+#include "multi_oat_relative_patcher.h"
+#include "vector_output_stream.h"
+
+namespace art {
+namespace linker {
+
+static const MethodReference kNullMethodRef = MethodReference(nullptr, 0u);
+
+static bool EqualRef(MethodReference lhs, MethodReference rhs) {
+  return lhs.dex_file == rhs.dex_file && lhs.dex_method_index == rhs.dex_method_index;
+}
+
+class MultiOatRelativePatcherTest : public testing::Test {
+ protected:
+  class MockPatcher : public RelativePatcher {
+   public:
+    MockPatcher() { }
+
+    uint32_t ReserveSpace(uint32_t offset,
+                          const CompiledMethod* compiled_method ATTRIBUTE_UNUSED,
+                          MethodReference method_ref) OVERRIDE {
+      last_reserve_offset_ = offset;
+      last_reserve_method_ = method_ref;
+      offset += next_reserve_adjustment_;
+      next_reserve_adjustment_ = 0u;
+      return offset;
+    }
+
+    uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE {
+      last_reserve_offset_ = offset;
+      last_reserve_method_ = kNullMethodRef;
+      offset += next_reserve_adjustment_;
+      next_reserve_adjustment_ = 0u;
+      return offset;
+    }
+
+    uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE {
+      last_write_offset_ = offset;
+      if (next_write_alignment_ != 0u) {
+        offset += next_write_alignment_;
+        bool success = WriteCodeAlignment(out, next_write_alignment_);
+        CHECK(success);
+        next_write_alignment_ = 0u;
+      }
+      if (next_write_call_thunk_ != 0u) {
+        offset += next_write_call_thunk_;
+        std::vector<uint8_t> thunk(next_write_call_thunk_, 'c');
+        bool success = WriteRelCallThunk(out, ArrayRef<const uint8_t>(thunk));
+        CHECK(success);
+        next_write_call_thunk_ = 0u;
+      }
+      if (next_write_misc_thunk_ != 0u) {
+        offset += next_write_misc_thunk_;
+        std::vector<uint8_t> thunk(next_write_misc_thunk_, 'm');
+        bool success = WriteMiscThunk(out, ArrayRef<const uint8_t>(thunk));
+        CHECK(success);
+        next_write_misc_thunk_ = 0u;
+      }
+      return offset;
+    }
+
+    void PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+                   uint32_t literal_offset,
+                   uint32_t patch_offset,
+                   uint32_t target_offset) OVERRIDE {
+      last_literal_offset_ = literal_offset;
+      last_patch_offset_ = patch_offset;
+      last_target_offset_ = target_offset;
+    }
+
+    void PatchPcRelativeReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+                                  const LinkerPatch& patch,
+                                  uint32_t patch_offset,
+                                  uint32_t target_offset) OVERRIDE {
+      last_literal_offset_ = patch.LiteralOffset();
+      last_patch_offset_ = patch_offset;
+      last_target_offset_ = target_offset;
+    }
+
+    uint32_t last_reserve_offset_ = 0u;
+    MethodReference last_reserve_method_ = kNullMethodRef;
+    uint32_t next_reserve_adjustment_ = 0u;
+
+    uint32_t last_write_offset_ = 0u;
+    uint32_t next_write_alignment_ = 0u;
+    uint32_t next_write_call_thunk_ = 0u;
+    uint32_t next_write_misc_thunk_ = 0u;
+
+    uint32_t last_literal_offset_ = 0u;
+    uint32_t last_patch_offset_ = 0u;
+    uint32_t last_target_offset_ = 0u;
+  };
+
+  MultiOatRelativePatcherTest()
+      : instruction_set_features_(InstructionSetFeatures::FromCppDefines()),
+        patcher_(kRuntimeISA, instruction_set_features_.get()) {
+    std::unique_ptr<MockPatcher> mock(new MockPatcher());
+    mock_ = mock.get();
+    patcher_.relative_patcher_ = std::move(mock);
+  }
+
+  std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
+  MultiOatRelativePatcher patcher_;
+  MockPatcher* mock_;
+};
+
+TEST_F(MultiOatRelativePatcherTest, Offsets) {
+  const DexFile* dex_file = reinterpret_cast<const DexFile*>(1);
+  MethodReference ref1(dex_file, 1u);
+  MethodReference ref2(dex_file, 2u);
+  EXPECT_EQ(0u, patcher_.GetOffset(ref1));
+  EXPECT_EQ(0u, patcher_.GetOffset(ref2));
+
+  uint32_t adjustment1 = 0x1000;
+  patcher_.StartOatFile(adjustment1);
+  EXPECT_EQ(0u, patcher_.GetOffset(ref1));
+  EXPECT_EQ(0u, patcher_.GetOffset(ref2));
+
+  uint32_t off1 = 0x1234;
+  patcher_.SetOffset(ref1, off1);
+  EXPECT_EQ(off1, patcher_.GetOffset(ref1));
+  EXPECT_EQ(0u, patcher_.GetOffset(ref2));
+
+  uint32_t adjustment2 = 0x30000;
+  patcher_.StartOatFile(adjustment2);
+  EXPECT_EQ(off1 + adjustment1 - adjustment2, patcher_.GetOffset(ref1));
+  EXPECT_EQ(0u, patcher_.GetOffset(ref2));
+
+  uint32_t off2 = 0x4321;
+  patcher_.SetOffset(ref2, off2);
+  EXPECT_EQ(off1 + adjustment1 - adjustment2, patcher_.GetOffset(ref1));
+  EXPECT_EQ(off2, patcher_.GetOffset(ref2));
+
+  uint32_t adjustment3 = 0x78000;
+  patcher_.StartOatFile(adjustment3);
+  EXPECT_EQ(off1 + adjustment1 - adjustment3, patcher_.GetOffset(ref1));
+  EXPECT_EQ(off2 + adjustment2 - adjustment3, patcher_.GetOffset(ref2));
+}
+
+TEST_F(MultiOatRelativePatcherTest, OffsetsInReserve) {
+  const DexFile* dex_file = reinterpret_cast<const DexFile*>(1);
+  MethodReference ref1(dex_file, 1u);
+  MethodReference ref2(dex_file, 2u);
+  MethodReference ref3(dex_file, 3u);
+  const CompiledMethod* method = reinterpret_cast<const CompiledMethod*>(-1);
+
+  uint32_t adjustment1 = 0x1000;
+  patcher_.StartOatFile(adjustment1);
+
+  uint32_t method1_offset = 0x100;
+  uint32_t method1_offset_check = patcher_.ReserveSpace(method1_offset, method, ref1);
+  ASSERT_EQ(adjustment1 + method1_offset, mock_->last_reserve_offset_);
+  ASSERT_TRUE(EqualRef(ref1, mock_->last_reserve_method_));
+  ASSERT_EQ(method1_offset, method1_offset_check);
+
+  uint32_t method2_offset = 0x1230;
+  uint32_t method2_reserve_adjustment = 0x10;
+  mock_->next_reserve_adjustment_ = method2_reserve_adjustment;
+  uint32_t method2_offset_adjusted = patcher_.ReserveSpace(method2_offset, method, ref2);
+  ASSERT_EQ(adjustment1 + method2_offset, mock_->last_reserve_offset_);
+  ASSERT_TRUE(EqualRef(ref2, mock_->last_reserve_method_));
+  ASSERT_EQ(method2_offset + method2_reserve_adjustment, method2_offset_adjusted);
+
+  uint32_t end1_offset = 0x4320;
+  uint32_t end1_offset_check = patcher_.ReserveSpaceEnd(end1_offset);
+  ASSERT_EQ(adjustment1 + end1_offset, mock_->last_reserve_offset_);
+  ASSERT_TRUE(EqualRef(kNullMethodRef, mock_->last_reserve_method_));
+  ASSERT_EQ(end1_offset, end1_offset_check);
+
+  uint32_t adjustment2 = 0xd000;
+  patcher_.StartOatFile(adjustment2);
+
+  uint32_t method3_offset = 0xf00;
+  uint32_t method3_offset_check = patcher_.ReserveSpace(method3_offset, method, ref3);
+  ASSERT_EQ(adjustment2 + method3_offset, mock_->last_reserve_offset_);
+  ASSERT_TRUE(EqualRef(ref3, mock_->last_reserve_method_));
+  ASSERT_EQ(method3_offset, method3_offset_check);
+
+  uint32_t end2_offset = 0x2400;
+  uint32_t end2_reserve_adjustment = 0x20;
+  mock_->next_reserve_adjustment_ = end2_reserve_adjustment;
+  uint32_t end2_offset_adjusted = patcher_.ReserveSpaceEnd(end2_offset);
+  ASSERT_EQ(adjustment2 + end2_offset, mock_->last_reserve_offset_);
+  ASSERT_TRUE(EqualRef(kNullMethodRef, mock_->last_reserve_method_));
+  ASSERT_EQ(end2_offset + end2_reserve_adjustment, end2_offset_adjusted);
+}
+
+TEST_F(MultiOatRelativePatcherTest, Write) {
+  std::vector<uint8_t> output;
+  VectorOutputStream vos("output", &output);
+
+  uint32_t adjustment1 = 0x1000;
+  patcher_.StartOatFile(adjustment1);
+
+  uint32_t method1_offset = 0x100;
+  uint32_t method1_offset_check = patcher_.WriteThunks(&vos, method1_offset);
+  ASSERT_EQ(adjustment1 + method1_offset, mock_->last_write_offset_);
+  ASSERT_EQ(method1_offset, method1_offset_check);
+  vos.WriteFully("1", 1);  // Mark method1.
+
+  uint32_t method2_offset = 0x1230;
+  uint32_t method2_alignment_size = 1;
+  uint32_t method2_call_thunk_size = 2;
+  mock_->next_write_alignment_ = method2_alignment_size;
+  mock_->next_write_call_thunk_ = method2_call_thunk_size;
+  uint32_t method2_offset_adjusted = patcher_.WriteThunks(&vos, method2_offset);
+  ASSERT_EQ(adjustment1 + method2_offset, mock_->last_write_offset_);
+  ASSERT_EQ(method2_offset + method2_alignment_size + method2_call_thunk_size,
+            method2_offset_adjusted);
+  vos.WriteFully("2", 1);  // Mark method2.
+
+  EXPECT_EQ(method2_alignment_size, patcher_.CodeAlignmentSize());
+  EXPECT_EQ(method2_call_thunk_size, patcher_.RelativeCallThunksSize());
+
+  uint32_t adjustment2 = 0xd000;
+  patcher_.StartOatFile(adjustment2);
+
+  uint32_t method3_offset = 0xf00;
+  uint32_t method3_alignment_size = 2;
+  uint32_t method3_misc_thunk_size = 1;
+  mock_->next_write_alignment_ = method3_alignment_size;
+  mock_->next_write_misc_thunk_ = method3_misc_thunk_size;
+  uint32_t method3_offset_adjusted = patcher_.WriteThunks(&vos, method3_offset);
+  ASSERT_EQ(adjustment2 + method3_offset, mock_->last_write_offset_);
+  ASSERT_EQ(method3_offset + method3_alignment_size + method3_misc_thunk_size,
+            method3_offset_adjusted);
+  vos.WriteFully("3", 1);  // Mark method3.
+
+  EXPECT_EQ(method3_alignment_size, patcher_.CodeAlignmentSize());
+  EXPECT_EQ(method3_misc_thunk_size, patcher_.MiscThunksSize());
+
+  uint8_t expected_output[] = {
+      '1',
+      0, 'c', 'c', '2',
+      0, 0, 'm', '3',
+  };
+  ASSERT_EQ(arraysize(expected_output), output.size());
+  for (size_t i = 0; i != arraysize(expected_output); ++i) {
+    ASSERT_EQ(expected_output[i], output[i]) << i;
+  }
+}
+
+TEST_F(MultiOatRelativePatcherTest, Patch) {
+  std::vector<uint8_t> code(16);
+
+  uint32_t adjustment1 = 0x1000;
+  patcher_.StartOatFile(adjustment1);
+
+  uint32_t method1_literal_offset = 4u;
+  uint32_t method1_patch_offset = 0x1234u;
+  uint32_t method1_target_offset = 0x8888u;
+  patcher_.PatchCall(&code, method1_literal_offset, method1_patch_offset, method1_target_offset);
+  DCHECK_EQ(method1_literal_offset, mock_->last_literal_offset_);
+  DCHECK_EQ(method1_patch_offset + adjustment1, mock_->last_patch_offset_);
+  DCHECK_EQ(method1_target_offset + adjustment1, mock_->last_target_offset_);
+
+  uint32_t method2_literal_offset = 12u;
+  uint32_t method2_patch_offset = 0x7654u;
+  uint32_t method2_target_offset = 0xccccu;
+  LinkerPatch method2_patch =
+      LinkerPatch::DexCacheArrayPatch(method2_literal_offset, nullptr, 0u, 1234u);
+  patcher_.PatchPcRelativeReference(
+      &code, method2_patch, method2_patch_offset, method2_target_offset);
+  DCHECK_EQ(method2_literal_offset, mock_->last_literal_offset_);
+  DCHECK_EQ(method2_patch_offset + adjustment1, mock_->last_patch_offset_);
+  DCHECK_EQ(method2_target_offset + adjustment1, mock_->last_target_offset_);
+
+  uint32_t adjustment2 = 0xd000;
+  patcher_.StartOatFile(adjustment2);
+
+  uint32_t method3_literal_offset = 8u;
+  uint32_t method3_patch_offset = 0x108u;
+  uint32_t method3_target_offset = 0x200u;
+  patcher_.PatchCall(&code, method3_literal_offset, method3_patch_offset, method3_target_offset);
+  DCHECK_EQ(method3_literal_offset, mock_->last_literal_offset_);
+  DCHECK_EQ(method3_patch_offset + adjustment2, mock_->last_patch_offset_);
+  DCHECK_EQ(method3_target_offset + adjustment2, mock_->last_target_offset_);
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/output_stream.cc b/compiler/linker/output_stream.cc
similarity index 100%
rename from compiler/output_stream.cc
rename to compiler/linker/output_stream.cc
diff --git a/compiler/linker/output_stream.h b/compiler/linker/output_stream.h
new file mode 100644
index 0000000..96a5f48
--- /dev/null
+++ b/compiler/linker/output_stream.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_OUTPUT_STREAM_H_
+
+#include <ostream>
+#include <string>
+
+#include "base/macros.h"
+
+namespace art {
+
+enum Whence {
+  kSeekSet = SEEK_SET,
+  kSeekCurrent = SEEK_CUR,
+  kSeekEnd = SEEK_END,
+};
+std::ostream& operator<<(std::ostream& os, const Whence& rhs);
+
+class OutputStream {
+ public:
+  explicit OutputStream(const std::string& location) : location_(location) {}
+
+  virtual ~OutputStream() {}
+
+  const std::string& GetLocation() const {
+    return location_;
+  }
+
+  virtual bool WriteFully(const void* buffer, size_t byte_count) = 0;
+
+  virtual off_t Seek(off_t offset, Whence whence) = 0;
+
+  /*
+   * Flushes the stream. Returns whether the operation was successful.
+   *
+   * An OutputStream may delay reporting errors from WriteFully() or
+   * Seek(). In that case, Flush() shall report any pending error.
+   */
+  virtual bool Flush() = 0;
+
+ private:
+  const std::string location_;
+
+  DISALLOW_COPY_AND_ASSIGN(OutputStream);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_OUTPUT_STREAM_H_
diff --git a/compiler/linker/output_stream_test.cc b/compiler/linker/output_stream_test.cc
new file mode 100644
index 0000000..84c76f2
--- /dev/null
+++ b/compiler/linker/output_stream_test.cc
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "file_output_stream.h"
+#include "vector_output_stream.h"
+
+#include "base/unix_file/fd_file.h"
+#include "base/logging.h"
+#include "base/stl_util.h"
+#include "buffered_output_stream.h"
+#include "common_runtime_test.h"
+
+namespace art {
+
+class OutputStreamTest : public CommonRuntimeTest {
+ protected:
+  void CheckOffset(off_t expected) {
+    off_t actual = output_stream_->Seek(0, kSeekCurrent);
+    EXPECT_EQ(expected, actual);
+  }
+
+  void SetOutputStream(OutputStream& output_stream) {
+    output_stream_ = &output_stream;
+  }
+
+  void GenerateTestOutput() {
+    EXPECT_EQ(3, output_stream_->Seek(3, kSeekCurrent));
+    CheckOffset(3);
+    EXPECT_EQ(2, output_stream_->Seek(2, kSeekSet));
+    CheckOffset(2);
+    uint8_t buf[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+    EXPECT_TRUE(output_stream_->WriteFully(buf, 2));
+    CheckOffset(4);
+    EXPECT_EQ(6, output_stream_->Seek(2, kSeekEnd));
+    CheckOffset(6);
+    EXPECT_TRUE(output_stream_->WriteFully(buf, 4));
+    CheckOffset(10);
+    EXPECT_TRUE(output_stream_->WriteFully(buf, 6));
+    EXPECT_TRUE(output_stream_->Flush());
+  }
+
+  void CheckTestOutput(const std::vector<uint8_t>& actual) {
+    uint8_t expected[] = {
+        0, 0, 1, 2, 0, 0, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6
+    };
+    EXPECT_EQ(sizeof(expected), actual.size());
+    EXPECT_EQ(0, memcmp(expected, &actual[0], actual.size()));
+  }
+
+  OutputStream* output_stream_;
+};
+
+TEST_F(OutputStreamTest, File) {
+  ScratchFile tmp;
+  FileOutputStream output_stream(tmp.GetFile());
+  SetOutputStream(output_stream);
+  GenerateTestOutput();
+  std::unique_ptr<File> in(OS::OpenFileForReading(tmp.GetFilename().c_str()));
+  EXPECT_TRUE(in.get() != nullptr);
+  std::vector<uint8_t> actual(in->GetLength());
+  bool readSuccess = in->ReadFully(&actual[0], actual.size());
+  EXPECT_TRUE(readSuccess);
+  CheckTestOutput(actual);
+}
+
+TEST_F(OutputStreamTest, Buffered) {
+  ScratchFile tmp;
+  {
+    BufferedOutputStream buffered_output_stream(MakeUnique<FileOutputStream>(tmp.GetFile()));
+    SetOutputStream(buffered_output_stream);
+    GenerateTestOutput();
+  }
+  std::unique_ptr<File> in(OS::OpenFileForReading(tmp.GetFilename().c_str()));
+  EXPECT_TRUE(in.get() != nullptr);
+  std::vector<uint8_t> actual(in->GetLength());
+  bool readSuccess = in->ReadFully(&actual[0], actual.size());
+  EXPECT_TRUE(readSuccess);
+  CheckTestOutput(actual);
+}
+
+TEST_F(OutputStreamTest, Vector) {
+  std::vector<uint8_t> output;
+  VectorOutputStream output_stream("test vector output", &output);
+  SetOutputStream(output_stream);
+  GenerateTestOutput();
+  CheckTestOutput(output);
+}
+
+TEST_F(OutputStreamTest, BufferedFlush) {
+  struct CheckingOutputStream : OutputStream {
+    CheckingOutputStream()
+        : OutputStream("dummy"),
+          flush_called(false) { }
+    ~CheckingOutputStream() OVERRIDE {}
+
+    bool WriteFully(const void* buffer ATTRIBUTE_UNUSED,
+                    size_t byte_count ATTRIBUTE_UNUSED) OVERRIDE {
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+    }
+
+    off_t Seek(off_t offset ATTRIBUTE_UNUSED, Whence whence ATTRIBUTE_UNUSED) OVERRIDE {
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+    }
+
+    bool Flush() OVERRIDE {
+      flush_called = true;
+      return true;
+    }
+
+    bool flush_called;
+  };
+
+  std::unique_ptr<CheckingOutputStream> cos = MakeUnique<CheckingOutputStream>();
+  CheckingOutputStream* checking_output_stream = cos.get();
+  BufferedOutputStream buffered(std::move(cos));
+  ASSERT_FALSE(checking_output_stream->flush_called);
+  bool flush_result = buffered.Flush();
+  ASSERT_TRUE(flush_result);
+  ASSERT_TRUE(checking_output_stream->flush_called);
+}
+
+}  // namespace art
diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc
index 82702dc..7765594 100644
--- a/compiler/linker/relative_patcher.cc
+++ b/compiler/linker/relative_patcher.cc
@@ -22,6 +22,9 @@
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "linker/arm64/relative_patcher_arm64.h"
 #endif
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "linker/mips/relative_patcher_mips.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_x86
 #include "linker/x86/relative_patcher_x86.h"
 #endif
@@ -34,7 +37,8 @@
 namespace linker {
 
 std::unique_ptr<RelativePatcher> RelativePatcher::Create(
-    InstructionSet instruction_set, const InstructionSetFeatures* features,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* features,
     RelativePatcherTargetProvider* provider) {
   class RelativePatcherNone FINAL : public RelativePatcher {
    public:
@@ -61,10 +65,10 @@
       LOG(FATAL) << "Unexpected relative call patch.";
     }
 
-    virtual void PatchDexCacheReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                        const LinkerPatch& patch ATTRIBUTE_UNUSED,
-                                        uint32_t patch_offset ATTRIBUTE_UNUSED,
-                                        uint32_t target_offset ATTRIBUTE_UNUSED) {
+    void PatchPcRelativeReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+                                  const LinkerPatch& patch ATTRIBUTE_UNUSED,
+                                  uint32_t patch_offset ATTRIBUTE_UNUSED,
+                                  uint32_t target_offset ATTRIBUTE_UNUSED) OVERRIDE {
       LOG(FATAL) << "Unexpected relative dex cache array patch.";
     }
 
@@ -94,6 +98,11 @@
       return std::unique_ptr<RelativePatcher>(
           new Arm64RelativePatcher(provider, features->AsArm64InstructionSetFeatures()));
 #endif
+#ifdef ART_ENABLE_CODEGEN_mips
+    case kMips:
+      return std::unique_ptr<RelativePatcher>(
+          new MipsRelativePatcher(features->AsMipsInstructionSetFeatures()));
+#endif
     default:
       return std::unique_ptr<RelativePatcher>(new RelativePatcherNone);
   }
diff --git a/compiler/linker/relative_patcher.h b/compiler/linker/relative_patcher.h
index 8a9f3f8..a22b9f2 100644
--- a/compiler/linker/relative_patcher.h
+++ b/compiler/linker/relative_patcher.h
@@ -83,23 +83,31 @@
   }
 
   // Reserve space for thunks if needed before a method, return adjusted offset.
-  virtual uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method,
+  virtual uint32_t ReserveSpace(uint32_t offset,
+                                const CompiledMethod* compiled_method,
                                 MethodReference method_ref) = 0;
 
   // Reserve space for thunks if needed after the last method, return adjusted offset.
+  // The caller may use this method to preemptively force thunk space reservation and
+  // then resume reservation for more methods. This is useful when there is a gap in
+  // the .text segment, for example when going to the next oat file for multi-image.
   virtual uint32_t ReserveSpaceEnd(uint32_t offset) = 0;
 
-  // Write relative call thunks if needed, return adjusted offset.
+  // Write relative call thunks if needed, return adjusted offset. Returns 0 on write failure.
   virtual uint32_t WriteThunks(OutputStream* out, uint32_t offset) = 0;
 
   // Patch method code. The input displacement is relative to the patched location,
   // the patcher may need to adjust it if the correct base is different.
-  virtual void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
-                         uint32_t patch_offset, uint32_t target_offset) = 0;
+  virtual void PatchCall(std::vector<uint8_t>* code,
+                         uint32_t literal_offset,
+                         uint32_t patch_offset,
+                         uint32_t target_offset) = 0;
 
   // Patch a reference to a dex cache location.
-  virtual void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch,
-                                      uint32_t patch_offset, uint32_t target_offset) = 0;
+  virtual void PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                        const LinkerPatch& patch,
+                                        uint32_t patch_offset,
+                                        uint32_t target_offset) = 0;
 
  protected:
   RelativePatcher()
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index e357662..d21f33e 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -44,10 +44,23 @@
       : compiler_options_(),
         verification_results_(&compiler_options_),
         inliner_map_(),
-        driver_(&compiler_options_, &verification_results_, &inliner_map_,
-                Compiler::kQuick, instruction_set, nullptr,
-                false, nullptr, nullptr, nullptr, 1u,
-                false, false, "", false, nullptr, -1, ""),
+        driver_(&compiler_options_,
+                &verification_results_,
+                &inliner_map_,
+                Compiler::kQuick,
+                instruction_set,
+                /* instruction_set_features*/ nullptr,
+                /* boot_image */ false,
+                /* app_image */ false,
+                /* image_classes */ nullptr,
+                /* compiled_classes */ nullptr,
+                /* compiled_methods */ nullptr,
+                /* thread_count */ 1u,
+                /* dump_stats */ false,
+                /* dump_passes */ false,
+                /* timer */ nullptr,
+                /* swap_fd */ -1,
+                /* profile_compilation_info */ nullptr),
         error_msg_(),
         instruction_set_(instruction_set),
         features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)),
@@ -73,12 +86,26 @@
                          const ArrayRef<const LinkerPatch>& patches) {
     compiled_method_refs_.push_back(method_ref);
     compiled_methods_.emplace_back(new CompiledMethod(
-        &driver_, instruction_set_, code,
-        0u, 0u, 0u, nullptr, ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(),
-        ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(),
+        &driver_,
+        instruction_set_,
+        code,
+        /* frame_size_in_bytes */ 0u,
+        /* core_spill_mask */ 0u,
+        /* fp_spill_mask */ 0u,
+        /* src_mapping_table */ ArrayRef<const SrcMapElem>(),
+        /* vmap_table */ ArrayRef<const uint8_t>(),
+        /* cfi_info */ ArrayRef<const uint8_t>(),
         patches));
   }
 
+  uint32_t CodeAlignmentSize(uint32_t header_offset_to_align) {
+    // We want to align the code rather than the preheader.
+    uint32_t unaligned_code_offset = header_offset_to_align + sizeof(OatQuickMethodHeader);
+    uint32_t aligned_code_offset =
+        CompiledMethod::AlignCode(unaligned_code_offset, instruction_set_);
+    return aligned_code_offset - unaligned_code_offset;
+  }
+
   void Link() {
     // Reserve space.
     static_assert(kTrampolineOffset == 0u, "Unexpected trampoline offset.");
@@ -87,13 +114,12 @@
     for (auto& compiled_method : compiled_methods_) {
       offset = patcher_->ReserveSpace(offset, compiled_method.get(), compiled_method_refs_[idx]);
 
-      uint32_t aligned_offset = compiled_method->AlignCode(offset);
-      uint32_t aligned_code_delta = aligned_offset - offset;
-      offset += aligned_code_delta;
+      uint32_t alignment_size = CodeAlignmentSize(offset);
+      offset += alignment_size;
 
       offset += sizeof(OatQuickMethodHeader);
       uint32_t quick_code_offset = offset + compiled_method->CodeDelta();
-      const auto& code = *compiled_method->GetQuickCode();
+      const auto code = compiled_method->GetQuickCode();
       offset += code.size();
 
       method_offset_map_.map.Put(compiled_method_refs_[idx], quick_code_offset);
@@ -117,31 +143,39 @@
     for (auto& compiled_method : compiled_methods_) {
       offset = patcher_->WriteThunks(&out_, offset);
 
-      uint32_t aligned_offset = compiled_method->AlignCode(offset);
-      uint32_t aligned_code_delta = aligned_offset - offset;
-      CHECK_LE(aligned_code_delta, sizeof(kPadding));
-      out_.WriteFully(kPadding, aligned_code_delta);
-      offset += aligned_code_delta;
+      uint32_t alignment_size = CodeAlignmentSize(offset);
+      CHECK_LE(alignment_size, sizeof(kPadding));
+      out_.WriteFully(kPadding, alignment_size);
+      offset += alignment_size;
 
       out_.WriteFully(dummy_header, sizeof(OatQuickMethodHeader));
       offset += sizeof(OatQuickMethodHeader);
-      ArrayRef<const uint8_t> code(*compiled_method->GetQuickCode());
+      ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
       if (!compiled_method->GetPatches().empty()) {
         patched_code_.assign(code.begin(), code.end());
         code = ArrayRef<const uint8_t>(patched_code_);
         for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-          if (patch.Type() == kLinkerPatchCallRelative) {
+          if (patch.GetType() == LinkerPatch::Type::kCallRelative) {
             auto result = method_offset_map_.FindMethodOffset(patch.TargetMethod());
             uint32_t target_offset =
                 result.first ? result.second : kTrampolineOffset + compiled_method->CodeDelta();
             patcher_->PatchCall(&patched_code_, patch.LiteralOffset(),
                                 offset + patch.LiteralOffset(), target_offset);
-          } else if (patch.Type() == kLinkerPatchDexCacheArray) {
+          } else if (patch.GetType() == LinkerPatch::Type::kDexCacheArray) {
             uint32_t target_offset = dex_cache_arrays_begin_ + patch.TargetDexCacheElementOffset();
-            patcher_->PatchDexCacheReference(&patched_code_, patch,
-                                             offset + patch.LiteralOffset(), target_offset);
+            patcher_->PatchPcRelativeReference(&patched_code_,
+                                               patch,
+                                               offset + patch.LiteralOffset(),
+                                               target_offset);
+          } else if (patch.GetType() == LinkerPatch::Type::kStringRelative) {
+            uint32_t target_offset = string_index_to_offset_map_.Get(patch.TargetStringIndex());
+            patcher_->PatchPcRelativeReference(&patched_code_,
+                                               patch,
+                                               offset + patch.LiteralOffset(),
+                                               target_offset);
           } else {
-            LOG(FATAL) << "Bad patch type.";
+            LOG(FATAL) << "Bad patch type. " << patch.GetType();
+            UNREACHABLE();
           }
         }
       }
@@ -164,7 +198,7 @@
       ++idx;
     }
     CHECK_NE(idx, compiled_method_refs_.size());
-    CHECK_EQ(compiled_methods_[idx]->GetQuickCode()->size(), expected_code.size());
+    CHECK_EQ(compiled_methods_[idx]->GetQuickCode().size(), expected_code.size());
 
     auto result = method_offset_map_.FindMethodOffset(method_ref);
     CHECK(result.first);  // Must have been linked.
@@ -243,6 +277,7 @@
   MethodOffsetMap method_offset_map_;
   std::unique_ptr<RelativePatcher> patcher_;
   uint32_t dex_cache_arrays_begin_;
+  SafeMap<uint32_t, uint32_t> string_index_to_offset_map_;
   std::vector<MethodReference> compiled_method_refs_;
   std::vector<std::unique_ptr<CompiledMethod>> compiled_methods_;
   std::vector<uint8_t> patched_code_;
diff --git a/compiler/linker/vector_output_stream.cc b/compiler/linker/vector_output_stream.cc
new file mode 100644
index 0000000..f758005
--- /dev/null
+++ b/compiler/linker/vector_output_stream.cc
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "vector_output_stream.h"
+
+#include "base/logging.h"
+
+namespace art {
+
+VectorOutputStream::VectorOutputStream(const std::string& location, std::vector<uint8_t>* vector)
+    : OutputStream(location), offset_(vector->size()), vector_(vector) {}
+
+off_t VectorOutputStream::Seek(off_t offset, Whence whence) {
+  CHECK(whence == kSeekSet || whence == kSeekCurrent || whence == kSeekEnd) << whence;
+  off_t new_offset = 0;
+  switch (whence) {
+    case kSeekSet: {
+      new_offset = offset;
+      break;
+    }
+    case kSeekCurrent: {
+      new_offset = offset_ + offset;
+      break;
+    }
+    case kSeekEnd: {
+      new_offset = vector_->size() + offset;
+      break;
+    }
+  }
+  EnsureCapacity(new_offset);
+  offset_ = new_offset;
+  return offset_;
+}
+
+}  // namespace art
diff --git a/compiler/linker/vector_output_stream.h b/compiler/linker/vector_output_stream.h
new file mode 100644
index 0000000..3210143
--- /dev/null
+++ b/compiler/linker/vector_output_stream.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_
+
+#include "output_stream.h"
+
+#include <string>
+#include <string.h>
+#include <vector>
+
+namespace art {
+
+class VectorOutputStream FINAL : public OutputStream {
+ public:
+  VectorOutputStream(const std::string& location, std::vector<uint8_t>* vector);
+
+  ~VectorOutputStream() OVERRIDE {}
+
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
+    if (static_cast<size_t>(offset_) == vector_->size()) {
+      const uint8_t* start = reinterpret_cast<const uint8_t*>(buffer);
+      vector_->insert(vector_->end(), &start[0], &start[byte_count]);
+      offset_ += byte_count;
+    } else {
+      off_t new_offset = offset_ + byte_count;
+      EnsureCapacity(new_offset);
+      memcpy(&(*vector_)[offset_], buffer, byte_count);
+      offset_ = new_offset;
+    }
+    return true;
+  }
+
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
+
+  bool Flush() OVERRIDE {
+    return true;
+  }
+
+ private:
+  void EnsureCapacity(off_t new_offset) {
+    if (new_offset > static_cast<off_t>(vector_->size())) {
+      vector_->resize(new_offset);
+    }
+  }
+
+  off_t offset_;
+  std::vector<uint8_t>* const vector_;
+
+  DISALLOW_COPY_AND_ASSIGN(VectorOutputStream);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_
diff --git a/compiler/linker/x86/relative_patcher_x86.cc b/compiler/linker/x86/relative_patcher_x86.cc
index 24b1481..768d31a 100644
--- a/compiler/linker/x86/relative_patcher_x86.cc
+++ b/compiler/linker/x86/relative_patcher_x86.cc
@@ -21,10 +21,10 @@
 namespace art {
 namespace linker {
 
-void X86RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
-                                                const LinkerPatch& patch,
-                                                uint32_t patch_offset,
-                                                uint32_t target_offset) {
+void X86RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                                  const LinkerPatch& patch,
+                                                  uint32_t patch_offset,
+                                                  uint32_t target_offset) {
   uint32_t anchor_literal_offset = patch.PcInsnOffset();
   uint32_t literal_offset = patch.LiteralOffset();
 
diff --git a/compiler/linker/x86/relative_patcher_x86.h b/compiler/linker/x86/relative_patcher_x86.h
index 0c881f0..fbf9ad4 100644
--- a/compiler/linker/x86/relative_patcher_x86.h
+++ b/compiler/linker/x86/relative_patcher_x86.h
@@ -26,8 +26,10 @@
  public:
   X86RelativePatcher() { }
 
-  void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch,
-                              uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+  void PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                const LinkerPatch& patch,
+                                uint32_t patch_offset,
+                                uint32_t target_offset) OVERRIDE;
 };
 
 }  // namespace linker
diff --git a/compiler/linker/x86/relative_patcher_x86_base.cc b/compiler/linker/x86/relative_patcher_x86_base.cc
index bc285a7..bf3a648 100644
--- a/compiler/linker/x86/relative_patcher_x86_base.cc
+++ b/compiler/linker/x86/relative_patcher_x86_base.cc
@@ -34,8 +34,10 @@
   return offset;  // No thunks added; no limit on relative call distance.
 }
 
-void X86BaseRelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
-                                       uint32_t patch_offset, uint32_t target_offset) {
+void X86BaseRelativePatcher::PatchCall(std::vector<uint8_t>* code,
+                                       uint32_t literal_offset,
+                                       uint32_t patch_offset,
+                                       uint32_t target_offset) {
   DCHECK_LE(literal_offset + 4u, code->size());
   // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
   uint32_t displacement = target_offset - patch_offset;
diff --git a/compiler/linker/x86/relative_patcher_x86_base.h b/compiler/linker/x86/relative_patcher_x86_base.h
index 9200709..ca83a72 100644
--- a/compiler/linker/x86/relative_patcher_x86_base.h
+++ b/compiler/linker/x86/relative_patcher_x86_base.h
@@ -29,8 +29,10 @@
                         MethodReference method_ref) OVERRIDE;
   uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
   uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
-  void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
-                 uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+  void PatchCall(std::vector<uint8_t>* code,
+                 uint32_t literal_offset,
+                 uint32_t patch_offset,
+                 uint32_t target_offset) OVERRIDE;
 
  protected:
   X86BaseRelativePatcher() { }
diff --git a/compiler/linker/x86/relative_patcher_x86_test.cc b/compiler/linker/x86/relative_patcher_x86_test.cc
index 7acc330..2a44b79 100644
--- a/compiler/linker/x86/relative_patcher_x86_test.cc
+++ b/compiler/linker/x86/relative_patcher_x86_test.cc
@@ -70,15 +70,19 @@
   uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */);
   static const uint8_t method1_expected_code[] = {
       0xe8,
-      static_cast<uint8_t>(diff_after), static_cast<uint8_t>(diff_after >> 8),
-      static_cast<uint8_t>(diff_after >> 16), static_cast<uint8_t>(diff_after >> 24)
+      static_cast<uint8_t>(diff_after),
+      static_cast<uint8_t>(diff_after >> 8),
+      static_cast<uint8_t>(diff_after >> 16),
+      static_cast<uint8_t>(diff_after >> 24)
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
   uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */);
   static const uint8_t method2_expected_code[] = {
       0xe8,
-      static_cast<uint8_t>(diff_before), static_cast<uint8_t>(diff_before >> 8),
-      static_cast<uint8_t>(diff_before >> 16), static_cast<uint8_t>(diff_before >> 24)
+      static_cast<uint8_t>(diff_before),
+      static_cast<uint8_t>(diff_before >> 8),
+      static_cast<uint8_t>(diff_before >> 16),
+      static_cast<uint8_t>(diff_before >> 24)
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
 }
@@ -95,8 +99,10 @@
   uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size());
   static const uint8_t expected_code[] = {
       0xe8,
-      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
-      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+      static_cast<uint8_t>(diff),
+      static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16),
+      static_cast<uint8_t>(diff >> 24)
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
@@ -125,8 +131,42 @@
       0xe8, 0x00, 0x00, 0x00, 0x00,         // call +0
       0x5b,                                 // pop ebx
       0x8b, 0x83,                           // mov eax, [ebx + diff]
-      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
-      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+      static_cast<uint8_t>(diff),
+      static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16),
+      static_cast<uint8_t>(diff >> 24)
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(X86RelativePatcherTest, StringReference) {
+  constexpr uint32_t kStringIndex = 1u;
+  constexpr uint32_t kStringOffset = 0x12345678;
+  string_index_to_offset_map_.Put(kStringIndex, kStringOffset);
+  static const uint8_t raw_code[] = {
+      0xe8, 0x00, 0x00, 0x00, 0x00,         // call +0
+      0x5b,                                 // pop ebx
+      0x8d, 0x83, 0x00, 0x01, 0x00, 0x00,   // lea eax, [ebx + 256 (kDummy32BitValue)]
+  };
+  constexpr uint32_t anchor_offset = 5u;  // After call +0.
+  ArrayRef<const uint8_t> code(raw_code);
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeStringPatch(code.size() - 4u, nullptr, anchor_offset, kStringIndex),
+  };
+  AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+  ASSERT_TRUE(result.first);
+  uint32_t diff = kStringOffset - (result.second + anchor_offset);
+  static const uint8_t expected_code[] = {
+      0xe8, 0x00, 0x00, 0x00, 0x00,         // call +0
+      0x5b,                                 // pop ebx
+      0x8d, 0x83,                           // lea eax, [ebx + diff]
+      static_cast<uint8_t>(diff),
+      static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16),
+      static_cast<uint8_t>(diff >> 24)
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.cc b/compiler/linker/x86_64/relative_patcher_x86_64.cc
index 598f3ac..2ff6930 100644
--- a/compiler/linker/x86_64/relative_patcher_x86_64.cc
+++ b/compiler/linker/x86_64/relative_patcher_x86_64.cc
@@ -21,9 +21,10 @@
 namespace art {
 namespace linker {
 
-void X86_64RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
-                                                   const LinkerPatch& patch,
-                                                   uint32_t patch_offset, uint32_t target_offset) {
+void X86_64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                                     const LinkerPatch& patch,
+                                                     uint32_t patch_offset,
+                                                     uint32_t target_offset) {
   DCHECK_LE(patch.LiteralOffset() + 4u, code->size());
   // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
   uint32_t displacement = target_offset - patch_offset;
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.h b/compiler/linker/x86_64/relative_patcher_x86_64.h
index af687b4..11bb6d5 100644
--- a/compiler/linker/x86_64/relative_patcher_x86_64.h
+++ b/compiler/linker/x86_64/relative_patcher_x86_64.h
@@ -26,8 +26,10 @@
  public:
   X86_64RelativePatcher() { }
 
-  void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch,
-                              uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+  void PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                const LinkerPatch& patch,
+                                uint32_t patch_offset,
+                                uint32_t target_offset) OVERRIDE;
 };
 
 }  // namespace linker
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
index 36e0f01..2b46453 100644
--- a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
+++ b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
@@ -29,6 +29,8 @@
   static const ArrayRef<const uint8_t> kCallCode;
   static const uint8_t kDexCacheLoadRawCode[];
   static const ArrayRef<const uint8_t> kDexCacheLoadCode;
+  static const uint8_t kStringReferenceRawCode[];
+  static const ArrayRef<const uint8_t> kStringReferenceCode;
 
   uint32_t GetMethodOffset(uint32_t method_idx) {
     auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
@@ -51,6 +53,14 @@
 const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kDexCacheLoadCode(
     kDexCacheLoadRawCode);
 
+const uint8_t X86_64RelativePatcherTest::kStringReferenceRawCode[] = {
+    0x8d, 0x05,  // lea eax, [rip + <offset>]
+    0x00, 0x01, 0x00, 0x00
+};
+
+const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kStringReferenceCode(
+    kStringReferenceRawCode);
+
 TEST_F(X86_64RelativePatcherTest, CallSelf) {
   LinkerPatch patches[] = {
       LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
@@ -80,15 +90,19 @@
   uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */);
   static const uint8_t method1_expected_code[] = {
       0xe8,
-      static_cast<uint8_t>(diff_after), static_cast<uint8_t>(diff_after >> 8),
-      static_cast<uint8_t>(diff_after >> 16), static_cast<uint8_t>(diff_after >> 24)
+      static_cast<uint8_t>(diff_after),
+      static_cast<uint8_t>(diff_after >> 8),
+      static_cast<uint8_t>(diff_after >> 16),
+      static_cast<uint8_t>(diff_after >> 24)
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
   uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */);
   static const uint8_t method2_expected_code[] = {
       0xe8,
-      static_cast<uint8_t>(diff_before), static_cast<uint8_t>(diff_before >> 8),
-      static_cast<uint8_t>(diff_before >> 16), static_cast<uint8_t>(diff_before >> 24)
+      static_cast<uint8_t>(diff_before),
+      static_cast<uint8_t>(diff_before >> 8),
+      static_cast<uint8_t>(diff_before >> 16),
+      static_cast<uint8_t>(diff_before >> 24)
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
 }
@@ -105,8 +119,10 @@
   uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size());
   static const uint8_t expected_code[] = {
       0xe8,
-      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
-      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+      static_cast<uint8_t>(diff),
+      static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16),
+      static_cast<uint8_t>(diff >> 24)
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
@@ -126,8 +142,34 @@
       dex_cache_arrays_begin_ + kElementOffset - (result.second + kDexCacheLoadCode.size());
   static const uint8_t expected_code[] = {
       0x8b, 0x05,
-      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
-      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+      static_cast<uint8_t>(diff),
+      static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16),
+      static_cast<uint8_t>(diff >> 24)
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(X86_64RelativePatcherTest, StringReference) {
+  constexpr uint32_t kStringIndex = 1u;
+  constexpr uint32_t kStringOffset = 0x12345678;
+  string_index_to_offset_map_.Put(kStringIndex, kStringOffset);
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeStringPatch(
+          kStringReferenceCode.size() - 4u, nullptr, 0u, kStringIndex),
+  };
+  AddCompiledMethod(MethodRef(1u), kStringReferenceCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+  ASSERT_TRUE(result.first);
+  uint32_t diff = kStringOffset - (result.second + kStringReferenceCode.size());
+  static const uint8_t expected_code[] = {
+      0x8d, 0x05,
+      static_cast<uint8_t>(diff),
+      static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16),
+      static_cast<uint8_t>(diff >> 24)
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 2d9d91a..bf53bb2 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -16,27 +16,43 @@
 
 #include "arch/instruction_set_features.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
+#include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
 #include "compiled_method.h"
 #include "compiler.h"
-#include "dex/pass_manager.h"
+#include "debug/method_debug_info.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/quick_compiler_callbacks.h"
 #include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "linker/multi_oat_relative_patcher.h"
+#include "linker/vector_output_stream.h"
 #include "mirror/class-inl.h"
-#include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
+#include "mirror/object_array-inl.h"
 #include "oat_file-inl.h"
 #include "oat_writer.h"
 #include "scoped_thread_state_change.h"
-#include "vector_output_stream.h"
+#include "utils/test_dex_file_builder.h"
 
 namespace art {
 
+NO_RETURN static void Usage(const char* fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  std::string error;
+  StringAppendV(&error, fmt, ap);
+  LOG(FATAL) << error;
+  va_end(ap);
+  UNREACHABLE();
+}
+
 class OatTest : public CommonCompilerTest {
  protected:
   static const bool kCompile = false;  // DISABLED_ due to the time to compile libcore
@@ -63,14 +79,274 @@
       EXPECT_EQ(oat_method.GetFpSpillMask(), compiled_method->GetFpSpillMask());
       uintptr_t oat_code_aligned = RoundDown(reinterpret_cast<uintptr_t>(quick_oat_code), 2);
       quick_oat_code = reinterpret_cast<const void*>(oat_code_aligned);
-      const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode();
-      EXPECT_TRUE(quick_code != nullptr);
-      size_t code_size = quick_code->size() * sizeof(quick_code[0]);
+      ArrayRef<const uint8_t> quick_code = compiled_method->GetQuickCode();
+      EXPECT_FALSE(quick_code.empty());
+      size_t code_size = quick_code.size() * sizeof(quick_code[0]);
       EXPECT_EQ(0, memcmp(quick_oat_code, &quick_code[0], code_size))
           << PrettyMethod(method) << " " << code_size;
       CHECK_EQ(0, memcmp(quick_oat_code, &quick_code[0], code_size));
     }
   }
+
+  void SetupCompiler(Compiler::Kind compiler_kind,
+                     InstructionSet insn_set,
+                     const std::vector<std::string>& compiler_options,
+                     /*out*/std::string* error_msg) {
+    ASSERT_TRUE(error_msg != nullptr);
+    insn_features_.reset(InstructionSetFeatures::FromVariant(insn_set, "default", error_msg));
+    ASSERT_TRUE(insn_features_ != nullptr) << error_msg;
+    compiler_options_.reset(new CompilerOptions);
+    for (const std::string& option : compiler_options) {
+      compiler_options_->ParseCompilerOption(option, Usage);
+    }
+    verification_results_.reset(new VerificationResults(compiler_options_.get()));
+    method_inliner_map_.reset(new DexFileToMethodInlinerMap);
+    callbacks_.reset(new QuickCompilerCallbacks(verification_results_.get(),
+                                                method_inliner_map_.get(),
+                                                CompilerCallbacks::CallbackMode::kCompileApp));
+    Runtime::Current()->SetCompilerCallbacks(callbacks_.get());
+    timer_.reset(new CumulativeLogger("Compilation times"));
+    compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
+                                              verification_results_.get(),
+                                              method_inliner_map_.get(),
+                                              compiler_kind,
+                                              insn_set,
+                                              insn_features_.get(),
+                                              /* boot_image */ false,
+                                              /* app_image */ false,
+                                              /* image_classes */ nullptr,
+                                              /* compiled_classes */ nullptr,
+                                              /* compiled_methods */ nullptr,
+                                              /* thread_count */ 2,
+                                              /* dump_stats */ true,
+                                              /* dump_passes */ true,
+                                              timer_.get(),
+                                              /* swap_fd */ -1,
+                                              /* profile_compilation_info */ nullptr));
+  }
+
+  bool WriteElf(File* file,
+                const std::vector<const DexFile*>& dex_files,
+                SafeMap<std::string, std::string>& key_value_store,
+                bool verify) {
+    TimingLogger timings("WriteElf", false, false);
+    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    for (const DexFile* dex_file : dex_files) {
+      ArrayRef<const uint8_t> raw_dex_file(
+          reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
+          dex_file->GetHeader().file_size_);
+      if (!oat_writer.AddRawDexFileSource(raw_dex_file,
+                                          dex_file->GetLocation().c_str(),
+                                          dex_file->GetLocationChecksum())) {
+        return false;
+      }
+    }
+    return DoWriteElf(file, oat_writer, key_value_store, verify);
+  }
+
+  bool WriteElf(File* file,
+                const std::vector<const char*>& dex_filenames,
+                SafeMap<std::string, std::string>& key_value_store,
+                bool verify) {
+    TimingLogger timings("WriteElf", false, false);
+    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    for (const char* dex_filename : dex_filenames) {
+      if (!oat_writer.AddDexFileSource(dex_filename, dex_filename)) {
+        return false;
+      }
+    }
+    return DoWriteElf(file, oat_writer, key_value_store, verify);
+  }
+
+  bool WriteElf(File* file,
+                File&& zip_fd,
+                const char* location,
+                SafeMap<std::string, std::string>& key_value_store,
+                bool verify) {
+    TimingLogger timings("WriteElf", false, false);
+    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    if (!oat_writer.AddZippedDexFilesSource(std::move(zip_fd), location)) {
+      return false;
+    }
+    return DoWriteElf(file, oat_writer, key_value_store, verify);
+  }
+
+  bool DoWriteElf(File* file,
+                  OatWriter& oat_writer,
+                  SafeMap<std::string, std::string>& key_value_store,
+                  bool verify) {
+    std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
+        compiler_driver_->GetInstructionSet(),
+        compiler_driver_->GetInstructionSetFeatures(),
+        &compiler_driver_->GetCompilerOptions(),
+        file);
+    elf_writer->Start();
+    OutputStream* rodata = elf_writer->StartRoData();
+    std::unique_ptr<MemMap> opened_dex_files_map;
+    std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+    if (!oat_writer.WriteAndOpenDexFiles(rodata,
+                                         file,
+                                         compiler_driver_->GetInstructionSet(),
+                                         compiler_driver_->GetInstructionSetFeatures(),
+                                         &key_value_store,
+                                         verify,
+                                         &opened_dex_files_map,
+                                         &opened_dex_files)) {
+      return false;
+    }
+    Runtime* runtime = Runtime::Current();
+    ClassLinker* const class_linker = runtime->GetClassLinker();
+    std::vector<const DexFile*> dex_files;
+    for (const std::unique_ptr<const DexFile>& dex_file : opened_dex_files) {
+      dex_files.push_back(dex_file.get());
+      ScopedObjectAccess soa(Thread::Current());
+      class_linker->RegisterDexFile(*dex_file, nullptr);
+    }
+    linker::MultiOatRelativePatcher patcher(compiler_driver_->GetInstructionSet(),
+                                            instruction_set_features_.get());
+    oat_writer.PrepareLayout(compiler_driver_.get(), nullptr, dex_files, &patcher);
+    size_t rodata_size = oat_writer.GetOatHeader().GetExecutableOffset();
+    size_t text_size = oat_writer.GetSize() - rodata_size;
+    elf_writer->SetLoadedSectionSizes(rodata_size, text_size, oat_writer.GetBssSize());
+
+    if (!oat_writer.WriteRodata(rodata)) {
+      return false;
+    }
+    elf_writer->EndRoData(rodata);
+
+    OutputStream* text = elf_writer->StartText();
+    if (!oat_writer.WriteCode(text)) {
+      return false;
+    }
+    elf_writer->EndText(text);
+
+    if (!oat_writer.WriteHeader(elf_writer->GetStream(), 42U, 4096U, 0)) {
+      return false;
+    }
+
+    elf_writer->WriteDynamicSection();
+    elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
+    elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations());
+
+    return elf_writer->End();
+  }
+
+  void TestDexFileInput(bool verify, bool low_4gb);
+  void TestZipFileInput(bool verify);
+
+  std::unique_ptr<const InstructionSetFeatures> insn_features_;
+  std::unique_ptr<QuickCompilerCallbacks> callbacks_;
+};
+
+class ZipBuilder {
+ public:
+  explicit ZipBuilder(File* zip_file) : zip_file_(zip_file) { }
+
+  bool AddFile(const char* location, const void* data, size_t size) {
+    off_t offset = lseek(zip_file_->Fd(), 0, SEEK_CUR);
+    if (offset == static_cast<off_t>(-1)) {
+      return false;
+    }
+
+    ZipFileHeader file_header;
+    file_header.crc32 = crc32(0u, reinterpret_cast<const Bytef*>(data), size);
+    file_header.compressed_size = size;
+    file_header.uncompressed_size = size;
+    file_header.filename_length = strlen(location);
+
+    if (!zip_file_->WriteFully(&file_header, sizeof(file_header)) ||
+        !zip_file_->WriteFully(location, file_header.filename_length) ||
+        !zip_file_->WriteFully(data, size)) {
+      return false;
+    }
+
+    CentralDirectoryFileHeader cdfh;
+    cdfh.crc32 = file_header.crc32;
+    cdfh.compressed_size = size;
+    cdfh.uncompressed_size = size;
+    cdfh.filename_length = file_header.filename_length;
+    cdfh.relative_offset_of_local_file_header = offset;
+    file_data_.push_back(FileData { cdfh, location });
+    return true;
+  }
+
+  bool Finish() {
+    off_t offset = lseek(zip_file_->Fd(), 0, SEEK_CUR);
+    if (offset == static_cast<off_t>(-1)) {
+      return false;
+    }
+
+    size_t central_directory_size = 0u;
+    for (const FileData& file_data : file_data_) {
+      if (!zip_file_->WriteFully(&file_data.cdfh, sizeof(file_data.cdfh)) ||
+          !zip_file_->WriteFully(file_data.location, file_data.cdfh.filename_length)) {
+        return false;
+      }
+      central_directory_size += sizeof(file_data.cdfh) + file_data.cdfh.filename_length;
+    }
+    EndOfCentralDirectoryRecord eocd_record;
+    eocd_record.number_of_central_directory_records_on_this_disk = file_data_.size();
+    eocd_record.total_number_of_central_directory_records = file_data_.size();
+    eocd_record.size_of_central_directory = central_directory_size;
+    eocd_record.offset_of_start_of_central_directory = offset;
+    return
+        zip_file_->WriteFully(&eocd_record, sizeof(eocd_record)) &&
+        zip_file_->Flush() == 0;
+  }
+
+ private:
+  struct PACKED(1) ZipFileHeader {
+    uint32_t signature = 0x04034b50;
+    uint16_t version_needed_to_extract = 10;
+    uint16_t general_purpose_bit_flag = 0;
+    uint16_t compression_method = 0;            // 0 = store only.
+    uint16_t file_last_modification_time = 0u;
+    uint16_t file_last_modification_date = 0u;
+    uint32_t crc32;
+    uint32_t compressed_size;
+    uint32_t uncompressed_size;
+    uint16_t filename_length;
+    uint16_t extra_field_length = 0u;           // No extra fields.
+  };
+
+  struct PACKED(1) CentralDirectoryFileHeader {
+    uint32_t signature = 0x02014b50;
+    uint16_t version_made_by = 10;
+    uint16_t version_needed_to_extract = 10;
+    uint16_t general_purpose_bit_flag = 0;
+    uint16_t compression_method = 0;            // 0 = store only.
+    uint16_t file_last_modification_time = 0u;
+    uint16_t file_last_modification_date = 0u;
+    uint32_t crc32;
+    uint32_t compressed_size;
+    uint32_t uncompressed_size;
+    uint16_t filename_length;
+    uint16_t extra_field_length = 0u;           // No extra fields.
+    uint16_t file_comment_length = 0u;          // No file comment.
+    uint16_t disk_number_where_file_starts = 0u;
+    uint16_t internal_file_attributes = 0u;
+    uint32_t external_file_attributes = 0u;
+    uint32_t relative_offset_of_local_file_header;
+  };
+
+  struct PACKED(1) EndOfCentralDirectoryRecord {
+    uint32_t signature = 0x06054b50;
+    uint16_t number_of_this_disk = 0u;
+    uint16_t disk_where_central_directory_starts = 0u;
+    uint16_t number_of_central_directory_records_on_this_disk;
+    uint16_t total_number_of_central_directory_records;
+    uint32_t size_of_central_directory;
+    uint32_t offset_of_start_of_central_directory;
+    uint16_t comment_length = 0u;               // No file comment.
+  };
+
+  struct FileData {
+    CentralDirectoryFileHeader cdfh;
+    const char* location;
+  };
+
+  File* zip_file_;
+  std::vector<FileData> file_data_;
 };
 
 TEST_F(OatTest, WriteRead) {
@@ -80,54 +356,37 @@
   // TODO: make selectable.
   Compiler::Kind compiler_kind = Compiler::kQuick;
   InstructionSet insn_set = kIsTargetBuild ? kThumb2 : kX86;
-
   std::string error_msg;
-  std::unique_ptr<const InstructionSetFeatures> insn_features(
-      InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
-  ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
-  compiler_options_.reset(new CompilerOptions);
-  verification_results_.reset(new VerificationResults(compiler_options_.get()));
-  method_inliner_map_.reset(new DexFileToMethodInlinerMap);
-  timer_.reset(new CumulativeLogger("Compilation times"));
-  compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
-                                            verification_results_.get(),
-                                            method_inliner_map_.get(),
-                                            compiler_kind, insn_set,
-                                            insn_features.get(), false, nullptr, nullptr, nullptr,
-                                            2, true, true, "", false, timer_.get(), -1, ""));
+  SetupCompiler(compiler_kind, insn_set, std::vector<std::string>(), /*out*/ &error_msg);
+
   jobject class_loader = nullptr;
   if (kCompile) {
     TimingLogger timings2("OatTest::WriteRead", false, false);
+    compiler_driver_->SetDexFilesForOatFile(class_linker->GetBootClassPath());
     compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings2);
   }
 
   ScratchFile tmp;
   SafeMap<std::string, std::string> key_value_store;
   key_value_store.Put(OatHeader::kImageLocationKey, "lue.art");
-  OatWriter oat_writer(class_linker->GetBootClassPath(),
-                       42U,
-                       4096U,
-                       0,
-                       compiler_driver_.get(),
-                       nullptr,
-                       &timings,
-                       &key_value_store);
-  bool success = compiler_driver_->WriteElf(GetTestAndroidRoot(),
-                                            !kIsTargetBuild,
-                                            class_linker->GetBootClassPath(),
-                                            &oat_writer,
-                                            tmp.GetFile());
+  bool success = WriteElf(tmp.GetFile(), class_linker->GetBootClassPath(), key_value_store, false);
   ASSERT_TRUE(success);
 
   if (kCompile) {  // OatWriter strips the code, regenerate to compare
     compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
   }
-  std::unique_ptr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(), tmp.GetFilename(), nullptr,
-                                                  nullptr, false, nullptr, &error_msg));
+  std::unique_ptr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(),
+                                                  tmp.GetFilename(),
+                                                  nullptr,
+                                                  nullptr,
+                                                  false,
+                                                  /*low_4gb*/true,
+                                                  nullptr,
+                                                  &error_msg));
   ASSERT_TRUE(oat_file.get() != nullptr) << error_msg;
   const OatHeader& oat_header = oat_file->GetOatHeader();
   ASSERT_TRUE(oat_header.IsValid());
-  ASSERT_EQ(1U, oat_header.GetDexFileCount());  // core
+  ASSERT_EQ(class_linker->GetBootClassPath().size(), oat_header.GetDexFileCount());  // core
   ASSERT_EQ(42U, oat_header.GetImageFileLocationOatChecksum());
   ASSERT_EQ(4096U, oat_header.GetImageFileLocationOatDataBegin());
   ASSERT_EQ("lue.art", std::string(oat_header.GetStoreValueByKey(OatHeader::kImageLocationKey)));
@@ -152,8 +411,9 @@
     }
 
     const char* descriptor = dex_file.GetClassDescriptor(class_def);
-    mirror::Class* klass = class_linker->FindClass(soa.Self(), descriptor,
-                                                   NullHandle<mirror::ClassLoader>());
+    mirror::Class* klass = class_linker->FindClass(soa.Self(),
+                                                   descriptor,
+                                                   ScopedNullHandle<mirror::ClassLoader>());
 
     const OatFile::OatClass oat_class = oat_dex_file->GetOatClass(i);
     CHECK_EQ(mirror::Class::Status::kStatusNotReady, oat_class.GetStatus()) << descriptor;
@@ -166,12 +426,14 @@
       ++method_index;
     }
     size_t visited_virtuals = 0;
-    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
-      if (!m.IsMiranda()) {
-        CheckMethod(&m, oat_class.GetOatMethod(method_index), dex_file);
-        ++method_index;
-        ++visited_virtuals;
+    // TODO We should also check copied methods in this test.
+    for (auto& m : klass->GetDeclaredVirtualMethods(pointer_size)) {
+      if (!klass->IsInterface()) {
+        EXPECT_FALSE(m.IsCopied());
       }
+      CheckMethod(&m, oat_class.GetOatMethod(method_index), dex_file);
+      ++method_index;
+      ++visited_virtuals;
     }
     EXPECT_EQ(visited_virtuals, num_virtual_methods);
   }
@@ -182,33 +444,349 @@
   // it is time to update OatHeader::kOatVersion
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
-  EXPECT_EQ(28U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(113 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(20U, sizeof(OatQuickMethodHeader));
+  EXPECT_EQ(164 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
+            sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
-    InstructionSet insn_set = kX86;
-    std::string error_msg;
-    std::unique_ptr<const InstructionSetFeatures> insn_features(
-        InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
-    ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
-    std::vector<const DexFile*> dex_files;
-    uint32_t image_file_location_oat_checksum = 0;
-    uint32_t image_file_location_oat_begin = 0;
-    std::unique_ptr<OatHeader> oat_header(OatHeader::Create(insn_set,
-                                                            insn_features.get(),
-                                                            &dex_files,
-                                                            image_file_location_oat_checksum,
-                                                            image_file_location_oat_begin,
-                                                            nullptr));
-    ASSERT_NE(oat_header.get(), nullptr);
-    ASSERT_TRUE(oat_header->IsValid());
+  InstructionSet insn_set = kX86;
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> insn_features(
+    InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
+  ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
+  std::unique_ptr<OatHeader> oat_header(OatHeader::Create(insn_set,
+                                                          insn_features.get(),
+                                                          0u,
+                                                          nullptr));
+  ASSERT_NE(oat_header.get(), nullptr);
+  ASSERT_TRUE(oat_header->IsValid());
 
-    char* magic = const_cast<char*>(oat_header->GetMagic());
-    strcpy(magic, "");  // bad magic
-    ASSERT_FALSE(oat_header->IsValid());
-    strcpy(magic, "oat\n000");  // bad version
-    ASSERT_FALSE(oat_header->IsValid());
+  char* magic = const_cast<char*>(oat_header->GetMagic());
+  strcpy(magic, "");  // bad magic
+  ASSERT_FALSE(oat_header->IsValid());
+  strcpy(magic, "oat\n000");  // bad version
+  ASSERT_FALSE(oat_header->IsValid());
+}
+
+TEST_F(OatTest, EmptyTextSection) {
+  TimingLogger timings("OatTest::EmptyTextSection", false, false);
+
+  // TODO: make selectable.
+  Compiler::Kind compiler_kind = Compiler::kQuick;
+  InstructionSet insn_set = kRuntimeISA;
+  if (insn_set == kArm) insn_set = kThumb2;
+  std::string error_msg;
+  std::vector<std::string> compiler_options;
+  compiler_options.push_back("--compiler-filter=verify-at-runtime");
+  SetupCompiler(compiler_kind, insn_set, compiler_options, /*out*/ &error_msg);
+
+  jobject class_loader;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    class_loader = LoadDex("Main");
+  }
+  ASSERT_TRUE(class_loader != nullptr);
+  std::vector<const DexFile*> dex_files = GetDexFiles(class_loader);
+  ASSERT_TRUE(!dex_files.empty());
+
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  for (const DexFile* dex_file : dex_files) {
+    ScopedObjectAccess soa(Thread::Current());
+    class_linker->RegisterDexFile(*dex_file, soa.Decode<mirror::ClassLoader*>(class_loader));
+  }
+  compiler_driver_->SetDexFilesForOatFile(dex_files);
+  compiler_driver_->CompileAll(class_loader, dex_files, &timings);
+
+  ScratchFile tmp;
+  SafeMap<std::string, std::string> key_value_store;
+  key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+  bool success = WriteElf(tmp.GetFile(), dex_files, key_value_store, false);
+  ASSERT_TRUE(success);
+
+  std::unique_ptr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(),
+                                                  tmp.GetFilename(),
+                                                  nullptr,
+                                                  nullptr,
+                                                  false,
+                                                  /*low_4gb*/false,
+                                                  nullptr,
+                                                  &error_msg));
+  ASSERT_TRUE(oat_file != nullptr);
+  EXPECT_LT(static_cast<size_t>(oat_file->Size()), static_cast<size_t>(tmp.GetFile()->GetLength()));
+}
+
+static void MaybeModifyDexFileToFail(bool verify, std::unique_ptr<const DexFile>& data) {
+  // If in verify mode (= fail the verifier mode), make sure we fail early. We'll fail already
+  // because of the missing map, but that may lead to out of bounds reads.
+  if (verify) {
+    const_cast<DexFile::Header*>(&data->GetHeader())->checksum_++;
+  }
+}
+
+void OatTest::TestDexFileInput(bool verify, bool low_4gb) {
+  TimingLogger timings("OatTest::DexFileInput", false, false);
+
+  std::vector<const char*> input_filenames;
+
+  ScratchFile dex_file1;
+  TestDexFileBuilder builder1;
+  builder1.AddField("Lsome.TestClass;", "int", "someField");
+  builder1.AddMethod("Lsome.TestClass;", "()I", "foo");
+  std::unique_ptr<const DexFile> dex_file1_data = builder1.Build(dex_file1.GetFilename());
+
+  MaybeModifyDexFileToFail(verify, dex_file1_data);
+
+  bool success = dex_file1.GetFile()->WriteFully(&dex_file1_data->GetHeader(),
+                                                 dex_file1_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file1.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  input_filenames.push_back(dex_file1.GetFilename().c_str());
+
+  ScratchFile dex_file2;
+  TestDexFileBuilder builder2;
+  builder2.AddField("Land.AnotherTestClass;", "boolean", "someOtherField");
+  builder2.AddMethod("Land.AnotherTestClass;", "()J", "bar");
+  std::unique_ptr<const DexFile> dex_file2_data = builder2.Build(dex_file2.GetFilename());
+
+  MaybeModifyDexFileToFail(verify, dex_file2_data);
+
+  success = dex_file2.GetFile()->WriteFully(&dex_file2_data->GetHeader(),
+                                            dex_file2_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file2.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  input_filenames.push_back(dex_file2.GetFilename().c_str());
+
+  ScratchFile oat_file;
+  SafeMap<std::string, std::string> key_value_store;
+  key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+  success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store, verify);
+
+  // In verify mode, we expect failure.
+  if (verify) {
+    ASSERT_FALSE(success);
+    return;
+  }
+
+  ASSERT_TRUE(success);
+
+  std::string error_msg;
+  std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+                                                         oat_file.GetFilename(),
+                                                         nullptr,
+                                                         nullptr,
+                                                         false,
+                                                         low_4gb,
+                                                         nullptr,
+                                                         &error_msg));
+  if (low_4gb) {
+    uintptr_t begin = reinterpret_cast<uintptr_t>(opened_oat_file->Begin());
+    EXPECT_EQ(begin, static_cast<uint32_t>(begin));
+  }
+  ASSERT_TRUE(opened_oat_file != nullptr);
+  ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+  std::unique_ptr<const DexFile> opened_dex_file1 =
+      opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+  std::unique_ptr<const DexFile> opened_dex_file2 =
+      opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+  ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+  ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+                      &opened_dex_file1->GetHeader(),
+                      dex_file1_data->GetHeader().file_size_));
+  ASSERT_EQ(dex_file1_data->GetLocation(), opened_dex_file1->GetLocation());
+
+  ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+  ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+                      &opened_dex_file2->GetHeader(),
+                      dex_file2_data->GetHeader().file_size_));
+  ASSERT_EQ(dex_file2_data->GetLocation(), opened_dex_file2->GetLocation());
+}
+
+TEST_F(OatTest, DexFileInputCheckOutput) {
+  TestDexFileInput(false, /*low_4gb*/false);
+}
+
+TEST_F(OatTest, DexFileInputCheckOutputLow4GB) {
+  TestDexFileInput(false, /*low_4gb*/true);
+}
+
+TEST_F(OatTest, DexFileInputCheckVerifier) {
+  TestDexFileInput(true, /*low_4gb*/false);
+}
+
+void OatTest::TestZipFileInput(bool verify) {
+  TimingLogger timings("OatTest::DexFileInput", false, false);
+
+  ScratchFile zip_file;
+  ZipBuilder zip_builder(zip_file.GetFile());
+
+  ScratchFile dex_file1;
+  TestDexFileBuilder builder1;
+  builder1.AddField("Lsome.TestClass;", "long", "someField");
+  builder1.AddMethod("Lsome.TestClass;", "()D", "foo");
+  std::unique_ptr<const DexFile> dex_file1_data = builder1.Build(dex_file1.GetFilename());
+
+  MaybeModifyDexFileToFail(verify, dex_file1_data);
+
+  bool success = dex_file1.GetFile()->WriteFully(&dex_file1_data->GetHeader(),
+                                                 dex_file1_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file1.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  success = zip_builder.AddFile("classes.dex",
+                                &dex_file1_data->GetHeader(),
+                                dex_file1_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+
+  ScratchFile dex_file2;
+  TestDexFileBuilder builder2;
+  builder2.AddField("Land.AnotherTestClass;", "boolean", "someOtherField");
+  builder2.AddMethod("Land.AnotherTestClass;", "()J", "bar");
+  std::unique_ptr<const DexFile> dex_file2_data = builder2.Build(dex_file2.GetFilename());
+
+  MaybeModifyDexFileToFail(verify, dex_file2_data);
+
+  success = dex_file2.GetFile()->WriteFully(&dex_file2_data->GetHeader(),
+                                            dex_file2_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file2.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  success = zip_builder.AddFile("classes2.dex",
+                                &dex_file2_data->GetHeader(),
+                                dex_file2_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+
+  success = zip_builder.Finish();
+  ASSERT_TRUE(success) << strerror(errno);
+
+  SafeMap<std::string, std::string> key_value_store;
+  key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+  {
+    // Test using the AddDexFileSource() interface with the zip file.
+    std::vector<const char*> input_filenames { zip_file.GetFilename().c_str() };  // NOLINT [readability/braces] [4]
+
+    ScratchFile oat_file;
+    success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store, verify);
+
+    if (verify) {
+      ASSERT_FALSE(success);
+    } else {
+      ASSERT_TRUE(success);
+
+      std::string error_msg;
+      std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+                                                             oat_file.GetFilename(),
+                                                             nullptr,
+                                                             nullptr,
+                                                             false,
+                                                             /*low_4gb*/false,
+                                                             nullptr,
+                                                             &error_msg));
+      ASSERT_TRUE(opened_oat_file != nullptr);
+      ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+      std::unique_ptr<const DexFile> opened_dex_file1 =
+          opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+      std::unique_ptr<const DexFile> opened_dex_file2 =
+          opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+      ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+      ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+                          &opened_dex_file1->GetHeader(),
+                          dex_file1_data->GetHeader().file_size_));
+      ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()),
+                opened_dex_file1->GetLocation());
+
+      ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+      ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+                          &opened_dex_file2->GetHeader(),
+                          dex_file2_data->GetHeader().file_size_));
+      ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()),
+                opened_dex_file2->GetLocation());
+    }
+  }
+
+  {
+    // Test using the AddZipDexFileSource() interface with the zip file handle.
+    File zip_fd(dup(zip_file.GetFd()), /* check_usage */ false);
+    ASSERT_NE(-1, zip_fd.Fd());
+
+    ScratchFile oat_file;
+    success = WriteElf(oat_file.GetFile(),
+                       std::move(zip_fd),
+                       zip_file.GetFilename().c_str(),
+                       key_value_store,
+                       verify);
+    if (verify) {
+      ASSERT_FALSE(success);
+    } else {
+      ASSERT_TRUE(success);
+
+      std::string error_msg;
+      std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+                                                             oat_file.GetFilename(),
+                                                             nullptr,
+                                                             nullptr,
+                                                             false,
+                                                             /*low_4gb*/false,
+                                                             nullptr,
+                                                             &error_msg));
+      ASSERT_TRUE(opened_oat_file != nullptr);
+      ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+      std::unique_ptr<const DexFile> opened_dex_file1 =
+          opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+      std::unique_ptr<const DexFile> opened_dex_file2 =
+          opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+      ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+      ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+                          &opened_dex_file1->GetHeader(),
+                          dex_file1_data->GetHeader().file_size_));
+      ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()),
+                opened_dex_file1->GetLocation());
+
+      ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+      ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+                          &opened_dex_file2->GetHeader(),
+                          dex_file2_data->GetHeader().file_size_));
+      ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()),
+                opened_dex_file2->GetLocation());
+    }
+  }
+}
+
+TEST_F(OatTest, ZipFileInputCheckOutput) {
+  TestZipFileInput(false);
+}
+
+TEST_F(OatTest, ZipFileInputCheckVerifier) {
+  TestZipFileInput(true);
+}
+
+TEST_F(OatTest, UpdateChecksum) {
+  InstructionSet insn_set = kX86;
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> insn_features(
+    InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
+  ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
+  std::unique_ptr<OatHeader> oat_header(OatHeader::Create(insn_set,
+                                                          insn_features.get(),
+                                                          0u,
+                                                          nullptr));
+  // The starting adler32 value is 1.
+  EXPECT_EQ(1U, oat_header->GetChecksum());
+
+  oat_header->UpdateChecksum(OatHeader::kOatMagic, sizeof(OatHeader::kOatMagic));
+  EXPECT_EQ(64291151U, oat_header->GetChecksum());
+
+  // Make sure that null data does not reset the checksum.
+  oat_header->UpdateChecksum(nullptr, 0);
+  EXPECT_EQ(64291151U, oat_header->GetChecksum());
+
+  oat_header->UpdateChecksum(OatHeader::kOatMagic, sizeof(OatHeader::kOatMagic));
+  EXPECT_EQ(216138397U, oat_header->GetChecksum());
 }
 
 }  // namespace art
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 640698b..8a80982 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -16,40 +16,255 @@
 
 #include "oat_writer.h"
 
+#include <unistd.h>
 #include <zlib.h>
 
 #include "arch/arm64/instruction_set_features_arm64.h"
 #include "art_method-inl.h"
 #include "base/allocator.h"
 #include "base/bit_vector.h"
+#include "base/enums.h"
+#include "base/file_magic.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "compiled_class.h"
 #include "compiled_method.h"
-#include "dex_file-inl.h"
+#include "debug/method_debug_info.h"
 #include "dex/verification_results.h"
+#include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space.h"
+#include "handle_scope-inl.h"
 #include "image_writer.h"
-#include "linker/relative_patcher.h"
+#include "linker/multi_oat_relative_patcher.h"
+#include "linker/output_stream.h"
 #include "mirror/array.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
 #include "oat_quick_method_header.h"
 #include "os.h"
-#include "output_stream.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
-#include "handle_scope-inl.h"
+#include "type_lookup_table.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "verifier/method_verifier.h"
+#include "zip_archive.h"
 
 namespace art {
 
+namespace {  // anonymous namespace
+
+typedef DexFile::Header __attribute__((aligned(1))) UnalignedDexFileHeader;
+
+const UnalignedDexFileHeader* AsUnalignedDexFileHeader(const uint8_t* raw_data) {
+    return reinterpret_cast<const UnalignedDexFileHeader*>(raw_data);
+}
+
+class ChecksumUpdatingOutputStream : public OutputStream {
+ public:
+  ChecksumUpdatingOutputStream(OutputStream* out, OatHeader* oat_header)
+      : OutputStream(out->GetLocation()), out_(out), oat_header_(oat_header) { }
+
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
+    oat_header_->UpdateChecksum(buffer, byte_count);
+    return out_->WriteFully(buffer, byte_count);
+  }
+
+  off_t Seek(off_t offset, Whence whence) OVERRIDE {
+    return out_->Seek(offset, whence);
+  }
+
+  bool Flush() OVERRIDE {
+    return out_->Flush();
+  }
+
+ private:
+  OutputStream* const out_;
+  OatHeader* const oat_header_;
+};
+
+inline uint32_t CodeAlignmentSize(uint32_t header_offset, const CompiledMethod& compiled_method) {
+  // We want to align the code rather than the preheader.
+  uint32_t unaligned_code_offset = header_offset + sizeof(OatQuickMethodHeader);
+  uint32_t aligned_code_offset =  compiled_method.AlignCode(unaligned_code_offset);
+  return aligned_code_offset - unaligned_code_offset;
+}
+
+}  // anonymous namespace
+
+// Defines the location of the raw dex file to write.
+class OatWriter::DexFileSource {
+ public:
+  explicit DexFileSource(ZipEntry* zip_entry)
+      : type_(kZipEntry), source_(zip_entry) {
+    DCHECK(source_ != nullptr);
+  }
+
+  explicit DexFileSource(File* raw_file)
+      : type_(kRawFile), source_(raw_file) {
+    DCHECK(source_ != nullptr);
+  }
+
+  explicit DexFileSource(const uint8_t* dex_file)
+      : type_(kRawData), source_(dex_file) {
+    DCHECK(source_ != nullptr);
+  }
+
+  bool IsZipEntry() const { return type_ == kZipEntry; }
+  bool IsRawFile() const { return type_ == kRawFile; }
+  bool IsRawData() const { return type_ == kRawData; }
+
+  ZipEntry* GetZipEntry() const {
+    DCHECK(IsZipEntry());
+    DCHECK(source_ != nullptr);
+    return static_cast<ZipEntry*>(const_cast<void*>(source_));
+  }
+
+  File* GetRawFile() const {
+    DCHECK(IsRawFile());
+    DCHECK(source_ != nullptr);
+    return static_cast<File*>(const_cast<void*>(source_));
+  }
+
+  const uint8_t* GetRawData() const {
+    DCHECK(IsRawData());
+    DCHECK(source_ != nullptr);
+    return static_cast<const uint8_t*>(source_);
+  }
+
+  void Clear() {
+    type_ = kNone;
+    source_ = nullptr;
+  }
+
+ private:
+  enum Type {
+    kNone,
+    kZipEntry,
+    kRawFile,
+    kRawData,
+  };
+
+  Type type_;
+  const void* source_;
+};
+
+class OatWriter::OatClass {
+ public:
+  OatClass(size_t offset,
+           const dchecked_vector<CompiledMethod*>& compiled_methods,
+           uint32_t num_non_null_compiled_methods,
+           mirror::Class::Status status);
+  OatClass(OatClass&& src) = default;
+  size_t GetOatMethodOffsetsOffsetFromOatHeader(size_t class_def_method_index_) const;
+  size_t GetOatMethodOffsetsOffsetFromOatClass(size_t class_def_method_index_) const;
+  size_t SizeOf() const;
+  bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const;
+
+  CompiledMethod* GetCompiledMethod(size_t class_def_method_index) const {
+    return compiled_methods_[class_def_method_index];
+  }
+
+  // Offset of start of OatClass from beginning of OatHeader. It is
+  // used to validate file position when writing.
+  size_t offset_;
+
+  // CompiledMethods for each class_def_method_index, or null if no method is available.
+  dchecked_vector<CompiledMethod*> compiled_methods_;
+
+  // Offset from OatClass::offset_ to the OatMethodOffsets for the
+  // class_def_method_index. If 0, it means the corresponding
+  // CompiledMethod entry in OatClass::compiled_methods_ should be
+  // null and that the OatClass::type_ should be kOatClassBitmap.
+  dchecked_vector<uint32_t> oat_method_offsets_offsets_from_oat_class_;
+
+  // Data to write.
+
+  static_assert(mirror::Class::Status::kStatusMax < (1 << 16), "class status won't fit in 16bits");
+  int16_t status_;
+
+  static_assert(OatClassType::kOatClassMax < (1 << 16), "oat_class type won't fit in 16bits");
+  uint16_t type_;
+
+  uint32_t method_bitmap_size_;
+
+  // bit vector indexed by ClassDef method index. When
+  // OatClassType::type_ is kOatClassBitmap, a set bit indicates the
+  // method has an OatMethodOffsets in methods_offsets_, otherwise
+  // the entry was ommited to save space. If OatClassType::type_ is
+  // not is kOatClassBitmap, the bitmap will be null.
+  std::unique_ptr<BitVector> method_bitmap_;
+
+  // OatMethodOffsets and OatMethodHeaders for each CompiledMethod
+  // present in the OatClass. Note that some may be missing if
+  // OatClass::compiled_methods_ contains null values (and
+  // oat_method_offsets_offsets_from_oat_class_ should contain 0
+  // values in this case).
+  dchecked_vector<OatMethodOffsets> method_offsets_;
+  dchecked_vector<OatQuickMethodHeader> method_headers_;
+
+ private:
+  size_t GetMethodOffsetsRawSize() const {
+    return method_offsets_.size() * sizeof(method_offsets_[0]);
+  }
+
+  DISALLOW_COPY_AND_ASSIGN(OatClass);
+};
+
+class OatWriter::OatDexFile {
+ public:
+  OatDexFile(const char* dex_file_location,
+             DexFileSource source,
+             CreateTypeLookupTable create_type_lookup_table);
+  OatDexFile(OatDexFile&& src) = default;
+
+  const char* GetLocation() const {
+    return dex_file_location_data_;
+  }
+
+  void ReserveTypeLookupTable(OatWriter* oat_writer);
+  void ReserveClassOffsets(OatWriter* oat_writer);
+
+  size_t SizeOf() const;
+  bool Write(OatWriter* oat_writer, OutputStream* out) const;
+  bool WriteClassOffsets(OatWriter* oat_writer, OutputStream* out);
+
+  // The source of the dex file.
+  DexFileSource source_;
+
+  // Whether to create the type lookup table.
+  CreateTypeLookupTable create_type_lookup_table_;
+
+  // Dex file size. Initialized when writing the dex file.
+  size_t dex_file_size_;
+
+  // Offset of start of OatDexFile from beginning of OatHeader. It is
+  // used to validate file position when writing.
+  size_t offset_;
+
+  // Data to write.
+  uint32_t dex_file_location_size_;
+  const char* dex_file_location_data_;
+  uint32_t dex_file_location_checksum_;
+  uint32_t dex_file_offset_;
+  uint32_t class_offsets_offset_;
+  uint32_t lookup_table_offset_;
+
+  // Data to write to a separate section.
+  dchecked_vector<uint32_t> class_offsets_;
+
+ private:
+  size_t GetClassOffsetsRawSize() const {
+    return class_offsets_.size() * sizeof(class_offsets_[0]);
+  }
+
+  DISALLOW_COPY_AND_ASSIGN(OatDexFile);
+};
+
 #define DCHECK_OFFSET() \
   DCHECK_EQ(static_cast<off_t>(file_offset + relative_offset), out->Seek(0, kSeekCurrent)) \
     << "file_offset=" << file_offset << " relative_offset=" << relative_offset
@@ -58,24 +273,20 @@
   DCHECK_EQ(static_cast<off_t>(file_offset + offset_), out->Seek(0, kSeekCurrent)) \
     << "file_offset=" << file_offset << " offset_=" << offset_
 
-OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
-                     uint32_t image_file_location_oat_checksum,
-                     uintptr_t image_file_location_oat_begin,
-                     int32_t image_patch_delta,
-                     const CompilerDriver* compiler,
-                     ImageWriter* image_writer,
-                     TimingLogger* timings,
-                     SafeMap<std::string, std::string>* key_value_store)
-  : compiler_driver_(compiler),
-    image_writer_(image_writer),
-    dex_files_(&dex_files),
+OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings)
+  : write_state_(WriteState::kAddingDexFileSources),
+    timings_(timings),
+    raw_dex_files_(),
+    zip_archives_(),
+    zipped_dex_files_(),
+    zipped_dex_file_locations_(),
+    compiler_driver_(nullptr),
+    image_writer_(nullptr),
+    compiling_boot_image_(compiling_boot_image),
+    dex_files_(nullptr),
     size_(0u),
     bss_size_(0u),
     oat_data_offset_(0u),
-    image_file_location_oat_checksum_(image_file_location_oat_checksum),
-    image_file_location_oat_begin_(image_file_location_oat_begin),
-    image_patch_delta_(image_patch_delta),
-    key_value_store_(key_value_store),
     oat_header_(nullptr),
     size_dex_file_alignment_(0),
     size_executable_offset_alignment_(0),
@@ -95,63 +306,217 @@
     size_code_alignment_(0),
     size_relative_call_thunks_(0),
     size_misc_thunks_(0),
-    size_mapping_table_(0),
     size_vmap_table_(0),
-    size_gc_map_(0),
     size_oat_dex_file_location_size_(0),
     size_oat_dex_file_location_data_(0),
     size_oat_dex_file_location_checksum_(0),
     size_oat_dex_file_offset_(0),
-    size_oat_dex_file_methods_offsets_(0),
+    size_oat_dex_file_class_offsets_offset_(0),
+    size_oat_dex_file_lookup_table_offset_(0),
+    size_oat_lookup_table_alignment_(0),
+    size_oat_lookup_table_(0),
+    size_oat_class_offsets_alignment_(0),
+    size_oat_class_offsets_(0),
     size_oat_class_type_(0),
     size_oat_class_status_(0),
     size_oat_class_method_bitmaps_(0),
     size_oat_class_method_offsets_(0),
-    method_offset_map_() {
-  CHECK(key_value_store != nullptr);
+    relative_patcher_(nullptr),
+    absolute_patch_locations_() {
+}
 
+bool OatWriter::AddDexFileSource(const char* filename,
+                                 const char* location,
+                                 CreateTypeLookupTable create_type_lookup_table) {
+  DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+  uint32_t magic;
+  std::string error_msg;
+  File fd = OpenAndReadMagic(filename, &magic, &error_msg);
+  if (fd.Fd() == -1) {
+    PLOG(ERROR) << "Failed to read magic number from dex file: '" << filename << "'";
+    return false;
+  } else if (IsDexMagic(magic)) {
+    // The file is open for reading, not writing, so it's OK to let the File destructor
+    // close it without checking for explicit Close(), so pass checkUsage = false.
+    raw_dex_files_.emplace_back(new File(fd.Release(), location, /* checkUsage */ false));
+    oat_dex_files_.emplace_back(location,
+                                DexFileSource(raw_dex_files_.back().get()),
+                                create_type_lookup_table);
+  } else if (IsZipMagic(magic)) {
+    if (!AddZippedDexFilesSource(std::move(fd), location, create_type_lookup_table)) {
+      return false;
+    }
+  } else {
+    LOG(ERROR) << "Expected valid zip or dex file: '" << filename << "'";
+    return false;
+  }
+  return true;
+}
+
+// Add dex file source(s) from a zip file specified by a file handle.
+bool OatWriter::AddZippedDexFilesSource(File&& zip_fd,
+                                        const char* location,
+                                        CreateTypeLookupTable create_type_lookup_table) {
+  DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+  std::string error_msg;
+  zip_archives_.emplace_back(ZipArchive::OpenFromFd(zip_fd.Release(), location, &error_msg));
+  ZipArchive* zip_archive = zip_archives_.back().get();
+  if (zip_archive == nullptr) {
+    LOG(ERROR) << "Failed to open zip from file descriptor for '" << location << "': "
+        << error_msg;
+    return false;
+  }
+  for (size_t i = 0; ; ++i) {
+    std::string entry_name = DexFile::GetMultiDexClassesDexName(i);
+    std::unique_ptr<ZipEntry> entry(zip_archive->Find(entry_name.c_str(), &error_msg));
+    if (entry == nullptr) {
+      break;
+    }
+    zipped_dex_files_.push_back(std::move(entry));
+    zipped_dex_file_locations_.push_back(DexFile::GetMultiDexLocation(i, location));
+    const char* full_location = zipped_dex_file_locations_.back().c_str();
+    oat_dex_files_.emplace_back(full_location,
+                                DexFileSource(zipped_dex_files_.back().get()),
+                                create_type_lookup_table);
+  }
+  if (zipped_dex_file_locations_.empty()) {
+    LOG(ERROR) << "No dex files in zip file '" << location << "': " << error_msg;
+    return false;
+  }
+  return true;
+}
+
+// Add dex file source from raw memory.
+bool OatWriter::AddRawDexFileSource(const ArrayRef<const uint8_t>& data,
+                                    const char* location,
+                                    uint32_t location_checksum,
+                                    CreateTypeLookupTable create_type_lookup_table) {
+  DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+  if (data.size() < sizeof(DexFile::Header)) {
+    LOG(ERROR) << "Provided data is shorter than dex file header. size: "
+               << data.size() << " File: " << location;
+    return false;
+  }
+  if (!ValidateDexFileHeader(data.data(), location)) {
+    return false;
+  }
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(data.data());
+  if (data.size() < header->file_size_) {
+    LOG(ERROR) << "Truncated dex file data. Data size: " << data.size()
+               << " file size from header: " << header->file_size_ << " File: " << location;
+    return false;
+  }
+
+  oat_dex_files_.emplace_back(location, DexFileSource(data.data()), create_type_lookup_table);
+  oat_dex_files_.back().dex_file_location_checksum_ = location_checksum;
+  return true;
+}
+
+dchecked_vector<const char*> OatWriter::GetSourceLocations() const {
+  dchecked_vector<const char*> locations;
+  locations.reserve(oat_dex_files_.size());
+  for (const OatDexFile& oat_dex_file : oat_dex_files_) {
+    locations.push_back(oat_dex_file.GetLocation());
+  }
+  return locations;
+}
+
+bool OatWriter::WriteAndOpenDexFiles(
+    OutputStream* rodata,
+    File* file,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features,
+    SafeMap<std::string, std::string>* key_value_store,
+    bool verify,
+    /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+    /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+  CHECK(write_state_ == WriteState::kAddingDexFileSources);
+
+  size_t offset = InitOatHeader(instruction_set,
+                                instruction_set_features,
+                                dchecked_integral_cast<uint32_t>(oat_dex_files_.size()),
+                                key_value_store);
+  offset = InitOatDexFiles(offset);
+  size_ = offset;
+
+  std::unique_ptr<MemMap> dex_files_map;
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  if (!WriteDexFiles(rodata, file)) {
+    return false;
+  }
+  // Reserve space for type lookup tables and update type_lookup_table_offset_.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.ReserveTypeLookupTable(this);
+  }
+  size_t size_after_type_lookup_tables = size_;
+  // Reserve space for class offsets and update class_offsets_offset_.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.ReserveClassOffsets(this);
+  }
+  ChecksumUpdatingOutputStream checksum_updating_rodata(rodata, oat_header_.get());
+  if (!WriteOatDexFiles(&checksum_updating_rodata) ||
+      !ExtendForTypeLookupTables(rodata, file, size_after_type_lookup_tables) ||
+      !OpenDexFiles(file, verify, &dex_files_map, &dex_files) ||
+      !WriteTypeLookupTables(dex_files_map.get(), dex_files)) {
+    return false;
+  }
+
+  // Do a bulk checksum update for Dex[] and TypeLookupTable[]. Doing it piece by
+  // piece would be difficult because we're not using the OutpuStream directly.
+  if (!oat_dex_files_.empty()) {
+    size_t size = size_after_type_lookup_tables - oat_dex_files_[0].dex_file_offset_;
+    oat_header_->UpdateChecksum(dex_files_map->Begin(), size);
+  }
+
+  *opened_dex_files_map = std::move(dex_files_map);
+  *opened_dex_files = std::move(dex_files);
+  write_state_ = WriteState::kPrepareLayout;
+  return true;
+}
+
+void OatWriter::PrepareLayout(const CompilerDriver* compiler,
+                              ImageWriter* image_writer,
+                              const std::vector<const DexFile*>& dex_files,
+                              linker::MultiOatRelativePatcher* relative_patcher) {
+  CHECK(write_state_ == WriteState::kPrepareLayout);
+
+  compiler_driver_ = compiler;
+  image_writer_ = image_writer;
+  dex_files_ = &dex_files;
+  relative_patcher_ = relative_patcher;
+  SetMultiOatRelativePatcherAdjustment();
+
+  if (compiling_boot_image_) {
+    CHECK(image_writer_ != nullptr);
+  }
   InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
-  const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
-  relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features,
-                                                      &method_offset_map_);
+  CHECK_EQ(instruction_set, oat_header_->GetInstructionSet());
 
-  size_t offset;
+  uint32_t offset = size_;
   {
-    TimingLogger::ScopedTiming split("InitOatHeader", timings);
-    offset = InitOatHeader();
-  }
-  {
-    TimingLogger::ScopedTiming split("InitOatDexFiles", timings);
-    offset = InitOatDexFiles(offset);
-  }
-  {
-    TimingLogger::ScopedTiming split("InitDexFiles", timings);
-    offset = InitDexFiles(offset);
-  }
-  {
-    TimingLogger::ScopedTiming split("InitOatClasses", timings);
+    TimingLogger::ScopedTiming split("InitOatClasses", timings_);
     offset = InitOatClasses(offset);
   }
   {
-    TimingLogger::ScopedTiming split("InitOatMaps", timings);
+    TimingLogger::ScopedTiming split("InitOatMaps", timings_);
     offset = InitOatMaps(offset);
   }
   {
-    TimingLogger::ScopedTiming split("InitOatCode", timings);
+    TimingLogger::ScopedTiming split("InitOatCode", timings_);
     offset = InitOatCode(offset);
   }
   {
-    TimingLogger::ScopedTiming split("InitOatCodeDexFiles", timings);
+    TimingLogger::ScopedTiming split("InitOatCodeDexFiles", timings_);
     offset = InitOatCodeDexFiles(offset);
   }
   size_ = offset;
 
-  if (!HasImage()) {
+  if (!HasBootImage()) {
     // Allocate space for app dex cache arrays in the .bss section.
     size_t bss_start = RoundUp(size_, kPageSize);
-    size_t pointer_size = GetInstructionSetPointerSize(instruction_set);
+    PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set);
     bss_size_ = 0u;
-    for (const DexFile* dex_file : dex_files) {
+    for (const DexFile* dex_file : *dex_files_) {
       dex_cache_arrays_offsets_.Put(dex_file, bss_start + bss_size_);
       DexCacheArraysLayout layout(pointer_size, dex_file);
       bss_size_ += layout.Size();
@@ -159,84 +524,17 @@
   }
 
   CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
-  CHECK_EQ(compiler->IsImage(), image_writer_ != nullptr);
-  CHECK_EQ(compiler->IsImage(),
-           key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end());
-  CHECK_ALIGNED(image_patch_delta_, kPageSize);
+  if (compiling_boot_image_) {
+    CHECK_EQ(image_writer_ != nullptr,
+             oat_header_->GetStoreValueByKey(OatHeader::kImageLocationKey) == nullptr);
+  }
+
+  write_state_ = WriteState::kWriteRoData;
 }
 
 OatWriter::~OatWriter() {
-  delete oat_header_;
-  STLDeleteElements(&oat_dex_files_);
-  STLDeleteElements(&oat_classes_);
 }
 
-struct OatWriter::GcMapDataAccess {
-  static const SwapVector<uint8_t>* GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE {
-    return compiled_method->GetGcMap();
-  }
-
-  static uint32_t GetOffset(OatClass* oat_class, size_t method_offsets_index) ALWAYS_INLINE {
-    uint32_t offset = oat_class->method_headers_[method_offsets_index].gc_map_offset_;
-    return offset == 0u ? 0u :
-        (oat_class->method_offsets_[method_offsets_index].code_offset_ & ~1) - offset;
-  }
-
-  static void SetOffset(OatClass* oat_class, size_t method_offsets_index, uint32_t offset)
-      ALWAYS_INLINE {
-    oat_class->method_headers_[method_offsets_index].gc_map_offset_ =
-        (oat_class->method_offsets_[method_offsets_index].code_offset_ & ~1) - offset;
-  }
-
-  static const char* Name() {
-    return "GC map";
-  }
-};
-
-struct OatWriter::MappingTableDataAccess {
-  static const SwapVector<uint8_t>* GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE {
-    return compiled_method->GetMappingTable();
-  }
-
-  static uint32_t GetOffset(OatClass* oat_class, size_t method_offsets_index) ALWAYS_INLINE {
-    uint32_t offset = oat_class->method_headers_[method_offsets_index].mapping_table_offset_;
-    return offset == 0u ? 0u :
-        (oat_class->method_offsets_[method_offsets_index].code_offset_ & ~1) - offset;
-  }
-
-  static void SetOffset(OatClass* oat_class, size_t method_offsets_index, uint32_t offset)
-      ALWAYS_INLINE {
-    oat_class->method_headers_[method_offsets_index].mapping_table_offset_ =
-        (oat_class->method_offsets_[method_offsets_index].code_offset_ & ~1) - offset;
-  }
-
-  static const char* Name() {
-    return "mapping table";
-  }
-};
-
-struct OatWriter::VmapTableDataAccess {
-  static const SwapVector<uint8_t>* GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE {
-    return compiled_method->GetVmapTable();
-  }
-
-  static uint32_t GetOffset(OatClass* oat_class, size_t method_offsets_index) ALWAYS_INLINE {
-    uint32_t offset = oat_class->method_headers_[method_offsets_index].vmap_table_offset_;
-    return offset == 0u ? 0u :
-        (oat_class->method_offsets_[method_offsets_index].code_offset_ & ~1) - offset;
-  }
-
-  static void SetOffset(OatClass* oat_class, size_t method_offsets_index, uint32_t offset)
-      ALWAYS_INLINE {
-    oat_class->method_headers_[method_offsets_index].vmap_table_offset_ =
-        (oat_class->method_offsets_[method_offsets_index].code_offset_ & ~1) - offset;
-  }
-
-  static const char* Name() {
-    return "vmap table";
-  }
-};
-
 class OatWriter::DexMethodVisitor {
  public:
   DexMethodVisitor(OatWriter* writer, size_t offset)
@@ -312,6 +610,11 @@
     : DexMethodVisitor(writer, offset),
       compiled_methods_(),
       num_non_null_compiled_methods_(0u) {
+    size_t num_classes = 0u;
+    for (const OatDexFile& oat_dex_file : writer_->oat_dex_files_) {
+      num_classes += oat_dex_file.class_offsets_.size();
+    }
+    writer_->oat_classes_.reserve(num_classes);
     compiled_methods_.reserve(256u);
   }
 
@@ -322,7 +625,8 @@
     return true;
   }
 
-  bool VisitMethod(size_t class_def_method_index ATTRIBUTE_UNUSED, const ClassDataItemIterator& it) {
+  bool VisitMethod(size_t class_def_method_index ATTRIBUTE_UNUSED,
+                   const ClassDataItemIterator& it) {
     // Fill in the compiled_methods_ array for methods that have a
     // CompiledMethod. We track the number of non-null entries in
     // num_non_null_compiled_methods_ since we only want to allocate
@@ -349,16 +653,16 @@
       status = mirror::Class::kStatusNotReady;
     }
 
-    OatClass* oat_class = new OatClass(offset_, compiled_methods_,
-                                       num_non_null_compiled_methods_, status);
-    writer_->oat_classes_.push_back(oat_class);
-    oat_class->UpdateChecksum(writer_->oat_header_);
-    offset_ += oat_class->SizeOf();
+    writer_->oat_classes_.emplace_back(offset_,
+                                       compiled_methods_,
+                                       num_non_null_compiled_methods_,
+                                       status);
+    offset_ += writer_->oat_classes_.back().SizeOf();
     return DexMethodVisitor::EndClass();
   }
 
  private:
-  std::vector<CompiledMethod*> compiled_methods_;
+  dchecked_vector<CompiledMethod*> compiled_methods_;
   size_t num_non_null_compiled_methods_;
 };
 
@@ -381,76 +685,72 @@
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
     if (compiled_method != nullptr) {
       // Derived from CompiledMethod.
       uint32_t quick_code_offset = 0;
 
-      const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode();
-      uint32_t code_size = quick_code->size() * sizeof(uint8_t);
+      ArrayRef<const uint8_t> quick_code = compiled_method->GetQuickCode();
+      uint32_t code_size = quick_code.size() * sizeof(uint8_t);
       uint32_t thumb_offset = compiled_method->CodeDelta();
 
       // Deduplicate code arrays if we are not producing debuggable code.
-      bool deduped = false;
+      bool deduped = true;
+      MethodReference method_ref(dex_file_, it.GetMemberIndex());
       if (debuggable_) {
-        quick_code_offset = NewQuickCodeOffset(compiled_method, it, thumb_offset);
-      } else {
-        auto lb = dedupe_map_.lower_bound(compiled_method);
-        if (lb != dedupe_map_.end() && !dedupe_map_.key_comp()(compiled_method, lb->first)) {
-          quick_code_offset = lb->second;
-          deduped = true;
+        quick_code_offset = writer_->relative_patcher_->GetOffset(method_ref);
+        if (quick_code_offset != 0u) {
+          // Duplicate methods, we want the same code for both of them so that the oat writer puts
+          // the same code in both ArtMethods so that we do not get different oat code at runtime.
         } else {
           quick_code_offset = NewQuickCodeOffset(compiled_method, it, thumb_offset);
-          dedupe_map_.PutBefore(lb, compiled_method, quick_code_offset);
+          deduped = false;
         }
+      } else {
+        quick_code_offset = dedupe_map_.GetOrCreate(
+            compiled_method,
+            [this, &deduped, compiled_method, &it, thumb_offset]() {
+              deduped = false;
+              return NewQuickCodeOffset(compiled_method, it, thumb_offset);
+            });
       }
 
       if (code_size != 0) {
-        MethodReference method_ref(dex_file_, it.GetMemberIndex());
-        auto method_lb = writer_->method_offset_map_.map.lower_bound(method_ref);
-        if (method_lb != writer_->method_offset_map_.map.end() &&
-            !writer_->method_offset_map_.map.key_comp()(method_ref, method_lb->first)) {
+        if (writer_->relative_patcher_->GetOffset(method_ref) != 0u) {
           // TODO: Should this be a hard failure?
           LOG(WARNING) << "Multiple definitions of "
               << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file)
-              << ((method_lb->second != quick_code_offset) ? "; OFFSET MISMATCH" : "");
+              << " offsets " << writer_->relative_patcher_->GetOffset(method_ref)
+              << " " << quick_code_offset;
         } else {
-          writer_->method_offset_map_.map.PutBefore(method_lb, method_ref, quick_code_offset);
+          writer_->relative_patcher_->SetOffset(method_ref, quick_code_offset);
         }
       }
 
       // Update quick method header.
       DCHECK_LT(method_offsets_index_, oat_class->method_headers_.size());
       OatQuickMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_];
-      uint32_t mapping_table_offset = method_header->mapping_table_offset_;
       uint32_t vmap_table_offset = method_header->vmap_table_offset_;
       // If we don't have quick code, then we must have a vmap, as that is how the dex2dex
       // compiler records its transformations.
-      DCHECK(quick_code != nullptr || vmap_table_offset != 0);
-      uint32_t gc_map_offset = method_header->gc_map_offset_;
+      DCHECK(!quick_code.empty() || vmap_table_offset != 0);
       // The code offset was 0 when the mapping/vmap table offset was set, so it's set
       // to 0-offset and we need to adjust it by code_offset.
       uint32_t code_offset = quick_code_offset - thumb_offset;
-      if (mapping_table_offset != 0u && code_offset != 0u) {
-        mapping_table_offset += code_offset;
-        DCHECK_LT(mapping_table_offset, code_offset) << "Overflow in oat offsets";
-      }
       if (vmap_table_offset != 0u && code_offset != 0u) {
         vmap_table_offset += code_offset;
         DCHECK_LT(vmap_table_offset, code_offset) << "Overflow in oat offsets";
       }
-      if (gc_map_offset != 0u && code_offset != 0u) {
-        gc_map_offset += code_offset;
-        DCHECK_LT(gc_map_offset, code_offset) << "Overflow in oat offsets";
-      }
       uint32_t frame_size_in_bytes = compiled_method->GetFrameSizeInBytes();
       uint32_t core_spill_mask = compiled_method->GetCoreSpillMask();
       uint32_t fp_spill_mask = compiled_method->GetFpSpillMask();
-      *method_header = OatQuickMethodHeader(mapping_table_offset, vmap_table_offset,
-                                            gc_map_offset, frame_size_in_bytes, core_spill_mask,
-                                            fp_spill_mask, code_size);
+      *method_header = OatQuickMethodHeader(vmap_table_offset,
+                                            frame_size_in_bytes,
+                                            core_spill_mask,
+                                            fp_spill_mask,
+                                            code_size);
 
       if (!deduped) {
         // Update offsets. (Checksum is updated when writing.)
@@ -467,44 +767,29 @@
         }
       }
 
-      if (writer_->compiler_driver_->GetCompilerOptions().GetGenerateDebugInfo()) {
+      const CompilerOptions& compiler_options = writer_->compiler_driver_->GetCompilerOptions();
+      // Exclude quickened dex methods (code_size == 0) since they have no native code.
+      if (compiler_options.GenerateAnyDebugInfo() && code_size != 0) {
+        bool has_code_info = method_header->IsOptimized();
         // Record debug information for this function if we are doing that.
-        const uint32_t quick_code_start = quick_code_offset -
-            writer_->oat_header_->GetExecutableOffset() - thumb_offset;
-        writer_->method_info_.push_back(DebugInfo {
-            dex_file_,
-            class_def_index_,
-            it.GetMemberIndex(),
-            it.GetMethodAccessFlags(),
-            it.GetMethodCodeItem(),
-            deduped,
-            quick_code_start,
-            quick_code_start + code_size,
-            compiled_method});
-      }
-
-      if (kIsDebugBuild) {
-        // We expect GC maps except when the class hasn't been verified or the method is native.
-        const CompilerDriver* compiler_driver = writer_->compiler_driver_;
-        ClassReference class_ref(dex_file_, class_def_index_);
-        CompiledClass* compiled_class = compiler_driver->GetCompiledClass(class_ref);
-        mirror::Class::Status status;
-        if (compiled_class != nullptr) {
-          status = compiled_class->GetStatus();
-        } else if (compiler_driver->GetVerificationResults()->IsClassRejected(class_ref)) {
-          status = mirror::Class::kStatusError;
-        } else {
-          status = mirror::Class::kStatusNotReady;
-        }
-        const SwapVector<uint8_t>* gc_map = compiled_method->GetGcMap();
-        if (gc_map != nullptr) {
-          size_t gc_map_size = gc_map->size() * sizeof(gc_map[0]);
-          bool is_native = it.MemberIsNative();
-          CHECK(gc_map_size != 0 || is_native || status < mirror::Class::kStatusVerified)
-              << gc_map << " " << gc_map_size << " " << (is_native ? "true" : "false") << " "
-              << (status < mirror::Class::kStatusVerified) << " " << status << " "
-              << PrettyMethod(it.GetMemberIndex(), *dex_file_);
-        }
+        debug::MethodDebugInfo info = debug::MethodDebugInfo();
+        info.trampoline_name = nullptr;
+        info.dex_file = dex_file_;
+        info.class_def_index = class_def_index_;
+        info.dex_method_index = it.GetMemberIndex();
+        info.access_flags = it.GetMethodAccessFlags();
+        info.code_item = it.GetMethodCodeItem();
+        info.isa = compiled_method->GetInstructionSet();
+        info.deduped = deduped;
+        info.is_native_debuggable = compiler_options.GetNativeDebuggable();
+        info.is_optimized = method_header->IsOptimized();
+        info.is_code_address_text_relative = true;
+        info.code_address = code_offset - writer_->oat_header_->GetExecutableOffset();
+        info.code_size = code_size;
+        info.frame_size_in_bytes = compiled_method->GetFrameSizeInBytes();
+        info.code_info = has_code_info ? compiled_method->GetVmapTable().data() : nullptr;
+        info.cfi = compiled_method->GetCFIInfo();
+        writer_->method_info_.push_back(info);
       }
 
       DCHECK_LT(method_offsets_index_, oat_class->method_offsets_.size());
@@ -519,30 +804,16 @@
  private:
   struct CodeOffsetsKeyComparator {
     bool operator()(const CompiledMethod* lhs, const CompiledMethod* rhs) const {
-      if (lhs->GetQuickCode() != rhs->GetQuickCode()) {
-        return lhs->GetQuickCode() < rhs->GetQuickCode();
+      // Code is deduplicated by CompilerDriver, compare only data pointers.
+      if (lhs->GetQuickCode().data() != rhs->GetQuickCode().data()) {
+        return lhs->GetQuickCode().data() < rhs->GetQuickCode().data();
       }
       // If the code is the same, all other fields are likely to be the same as well.
-      if (UNLIKELY(lhs->GetMappingTable() != rhs->GetMappingTable())) {
-        return lhs->GetMappingTable() < rhs->GetMappingTable();
+      if (UNLIKELY(lhs->GetVmapTable().data() != rhs->GetVmapTable().data())) {
+        return lhs->GetVmapTable().data() < rhs->GetVmapTable().data();
       }
-      if (UNLIKELY(lhs->GetVmapTable() != rhs->GetVmapTable())) {
-        return lhs->GetVmapTable() < rhs->GetVmapTable();
-      }
-      if (UNLIKELY(lhs->GetGcMap() != rhs->GetGcMap())) {
-        return lhs->GetGcMap() < rhs->GetGcMap();
-      }
-      const auto& lhs_patches = lhs->GetPatches();
-      const auto& rhs_patches = rhs->GetPatches();
-      if (UNLIKELY(lhs_patches.size() != rhs_patches.size())) {
-        return lhs_patches.size() < rhs_patches.size();
-      }
-      auto rit = rhs_patches.begin();
-      for (const LinkerPatch& lpatch : lhs_patches) {
-        if (UNLIKELY(!(lpatch == *rit))) {
-          return lpatch < *rit;
-        }
-        ++rit;
+      if (UNLIKELY(lhs->GetPatches().data() != rhs->GetPatches().data())) {
+        return lhs->GetPatches().data() < rhs->GetPatches().data();
       }
       return false;
     }
@@ -553,8 +824,8 @@
                               uint32_t thumb_offset) {
     offset_ = writer_->relative_patcher_->ReserveSpace(
         offset_, compiled_method, MethodReference(dex_file_, it.GetMemberIndex()));
-    offset_ = compiled_method->AlignCode(offset_);
-    DCHECK_ALIGNED_PARAM(offset_,
+    offset_ += CodeAlignmentSize(offset_, *compiled_method);
+    DCHECK_ALIGNED_PARAM(offset_ + sizeof(OatQuickMethodHeader),
                          GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
     return offset_ + sizeof(OatQuickMethodHeader) + thumb_offset;
   }
@@ -567,7 +838,6 @@
   const bool debuggable_;
 };
 
-template <typename DataAccess>
 class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor {
  public:
   InitMapMethodVisitor(OatWriter* writer, size_t offset)
@@ -576,25 +846,26 @@
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it ATTRIBUTE_UNUSED)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
     if (compiled_method != nullptr) {
       DCHECK_LT(method_offsets_index_, oat_class->method_offsets_.size());
-      DCHECK_EQ(DataAccess::GetOffset(oat_class, method_offsets_index_), 0u);
+      DCHECK_EQ(oat_class->method_headers_[method_offsets_index_].vmap_table_offset_, 0u);
 
-      const SwapVector<uint8_t>* map = DataAccess::GetData(compiled_method);
-      uint32_t map_size = map == nullptr ? 0 : map->size() * sizeof((*map)[0]);
+      ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
+      uint32_t map_size = map.size() * sizeof(map[0]);
       if (map_size != 0u) {
-        auto lb = dedupe_map_.lower_bound(map);
-        if (lb != dedupe_map_.end() && !dedupe_map_.key_comp()(map, lb->first)) {
-          DataAccess::SetOffset(oat_class, method_offsets_index_, lb->second);
-        } else {
-          DataAccess::SetOffset(oat_class, method_offsets_index_, offset_);
-          dedupe_map_.PutBefore(lb, map, offset_);
-          offset_ += map_size;
-          writer_->oat_header_->UpdateChecksum(&(*map)[0], map_size);
-        }
+        size_t offset = dedupe_map_.GetOrCreate(
+            map.data(),
+            [this, map_size]() {
+              uint32_t new_offset = offset_;
+              offset_ += map_size;
+              return new_offset;
+            });
+        // Code offset is not initialized yet, so set the map offset to 0u-offset.
+        DCHECK_EQ(oat_class->method_offsets_[method_offsets_index_].code_offset_, 0u);
+        oat_class->method_headers_[method_offsets_index_].vmap_table_offset_ = 0u - offset;
       }
       ++method_offsets_index_;
     }
@@ -605,7 +876,7 @@
  private:
   // Deduplication is already done on a pointer basis by the compiler driver,
   // so we can simply compare the pointers to find out if things are duplicated.
-  SafeMap<const SwapVector<uint8_t>*, uint32_t> dedupe_map_;
+  SafeMap<const uint8_t*, uint32_t> dedupe_map_;
 };
 
 class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
@@ -617,7 +888,15 @@
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
+    const DexFile::TypeId& type_id =
+        dex_file_->GetTypeId(dex_file_->GetClassDef(class_def_index_).class_idx_);
+    const char* class_descriptor = dex_file_->GetTypeDescriptor(type_id);
+    // Skip methods that are not in the image.
+    if (!writer_->GetCompilerDriver()->IsImageClass(class_descriptor)) {
+      return true;
+    }
+
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
     OatMethodOffsets offsets(0u);
@@ -628,26 +907,40 @@
     }
 
     ClassLinker* linker = Runtime::Current()->GetClassLinker();
-    InvokeType invoke_type = it.GetMethodInvokeType(dex_file_->GetClassDef(class_def_index_));
     // Unchecked as we hold mutator_lock_ on entry.
     ScopedObjectAccessUnchecked soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(
         Thread::Current(), *dex_file_)));
-    ArtMethod* method = linker->ResolveMethod(
-        *dex_file_, it.GetMemberIndex(), dex_cache, NullHandle<mirror::ClassLoader>(), nullptr,
-        invoke_type);
-    if (method == nullptr) {
-      LOG(INTERNAL_FATAL) << "Unexpected failure to resolve a method: "
-                          << PrettyMethod(it.GetMemberIndex(), *dex_file_, true);
-      soa.Self()->AssertPendingException();
-      mirror::Throwable* exc = soa.Self()->GetException();
-      std::string dump = exc->Dump();
-      LOG(FATAL) << dump;
-      UNREACHABLE();
+    ArtMethod* method;
+    if (writer_->HasBootImage()) {
+      const InvokeType invoke_type = it.GetMethodInvokeType(
+          dex_file_->GetClassDef(class_def_index_));
+      method = linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+          *dex_file_,
+          it.GetMemberIndex(),
+          dex_cache,
+          ScopedNullHandle<mirror::ClassLoader>(),
+          nullptr,
+          invoke_type);
+      if (method == nullptr) {
+        LOG(INTERNAL_FATAL) << "Unexpected failure to resolve a method: "
+            << PrettyMethod(it.GetMemberIndex(), *dex_file_, true);
+        soa.Self()->AssertPendingException();
+        mirror::Throwable* exc = soa.Self()->GetException();
+        std::string dump = exc->Dump();
+        LOG(FATAL) << dump;
+        UNREACHABLE();
+      }
+    } else {
+      // Should already have been resolved by the compiler, just peek into the dex cache.
+      // It may not be resolved if the class failed to verify, in this case, don't set the
+      // entrypoint. This is not fatal since the dex cache will contain a resolution method.
+      method = dex_cache->GetResolvedMethod(it.GetMemberIndex(), linker->GetImagePointerSize());
     }
-
-    if (compiled_method != nullptr && compiled_method->GetQuickCode()->size() != 0) {
+    if (method != nullptr &&
+        compiled_method != nullptr &&
+        compiled_method->GetQuickCode().size() != 0) {
       method->SetEntryPointFromQuickCompiledCodePtrSize(
           reinterpret_cast<void*>(offsets.code_offset_), pointer_size_);
     }
@@ -656,7 +949,7 @@
   }
 
  protected:
-  const size_t pointer_size_;
+  const PointerSize pointer_size_;
 };
 
 class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
@@ -671,7 +964,7 @@
       class_linker_(Runtime::Current()->GetClassLinker()),
       dex_cache_(nullptr) {
     patched_code_.reserve(16 * KB);
-    if (writer_->HasImage()) {
+    if (writer_->HasBootImage()) {
       // If we're creating the image, the address space must be ready so that we can apply patches.
       CHECK(writer_->image_writer_->IsImageAddressSpaceReady());
     }
@@ -685,6 +978,7 @@
     OatDexMethodVisitor::StartClass(dex_file, class_def_index);
     if (dex_cache_ == nullptr || dex_cache_->GetDexFile() != dex_file) {
       dex_cache_ = class_linker_->FindDexCache(Thread::Current(), *dex_file);
+      DCHECK(dex_cache_ != nullptr);
     }
     return true;
   }
@@ -704,7 +998,7 @@
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
     // No thread suspension since dex_cache_ that may get invalidated if that occurs.
@@ -713,10 +1007,8 @@
       size_t file_offset = file_offset_;
       OutputStream* out = out_;
 
-      const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode();
-      // Need a wrapper if we create a copy for patching.
-      ArrayRef<const uint8_t> wrapped(*quick_code);
-      uint32_t code_size = quick_code->size() * sizeof(uint8_t);
+      ArrayRef<const uint8_t> quick_code = compiled_method->GetQuickCode();
+      uint32_t code_size = quick_code.size() * sizeof(uint8_t);
 
       // Deduplicate code arrays.
       const OatMethodOffsets& method_offsets = oat_class->method_offsets_[method_offsets_index_];
@@ -726,24 +1018,22 @@
           ReportWriteFailure("relative call thunk", it);
           return false;
         }
-        uint32_t aligned_offset = compiled_method->AlignCode(offset_);
-        uint32_t aligned_code_delta = aligned_offset - offset_;
-        if (aligned_code_delta != 0) {
-          if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) {
+        uint32_t alignment_size = CodeAlignmentSize(offset_, *compiled_method);
+        if (alignment_size != 0) {
+          if (!writer_->WriteCodeAlignment(out, alignment_size)) {
             ReportWriteFailure("code alignment padding", it);
             return false;
           }
-          offset_ += aligned_code_delta;
+          offset_ += alignment_size;
           DCHECK_OFFSET_();
         }
-        DCHECK_ALIGNED_PARAM(offset_,
+        DCHECK_ALIGNED_PARAM(offset_ + sizeof(OatQuickMethodHeader),
                              GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
         DCHECK_EQ(method_offsets.code_offset_,
                   offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta())
             << PrettyMethod(it.GetMemberIndex(), *dex_file_);
         const OatQuickMethodHeader& method_header =
             oat_class->method_headers_[method_offsets_index_];
-        writer_->oat_header_->UpdateChecksum(&method_header, sizeof(method_header));
         if (!out->WriteFully(&method_header, sizeof(method_header))) {
           ReportWriteFailure("method header", it);
           return false;
@@ -753,36 +1043,73 @@
         DCHECK_OFFSET_();
 
         if (!compiled_method->GetPatches().empty()) {
-          patched_code_.assign(quick_code->begin(), quick_code->end());
-          wrapped = ArrayRef<const uint8_t>(patched_code_);
+          patched_code_.assign(quick_code.begin(), quick_code.end());
+          quick_code = ArrayRef<const uint8_t>(patched_code_);
           for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-            if (patch.Type() == kLinkerPatchCallRelative) {
-              // NOTE: Relative calls across oat files are not supported.
-              uint32_t target_offset = GetTargetOffset(patch);
-              uint32_t literal_offset = patch.LiteralOffset();
-              writer_->relative_patcher_->PatchCall(&patched_code_, literal_offset,
-                                                     offset_ + literal_offset, target_offset);
-            } else if (patch.Type() == kLinkerPatchDexCacheArray) {
-              uint32_t target_offset = GetDexCacheOffset(patch);
-              uint32_t literal_offset = patch.LiteralOffset();
-              writer_->relative_patcher_->PatchDexCacheReference(&patched_code_, patch,
-                                                                 offset_ + literal_offset,
-                                                                 target_offset);
-            } else if (patch.Type() == kLinkerPatchCall) {
-              uint32_t target_offset = GetTargetOffset(patch);
-              PatchCodeAddress(&patched_code_, patch.LiteralOffset(), target_offset);
-            } else if (patch.Type() == kLinkerPatchMethod) {
-              ArtMethod* method = GetTargetMethod(patch);
-              PatchMethodAddress(&patched_code_, patch.LiteralOffset(), method);
-            } else if (patch.Type() == kLinkerPatchType) {
-              mirror::Class* type = GetTargetType(patch);
-              PatchObjectAddress(&patched_code_, patch.LiteralOffset(), type);
+            uint32_t literal_offset = patch.LiteralOffset();
+            switch (patch.GetType()) {
+              case LinkerPatch::Type::kCallRelative: {
+                // NOTE: Relative calls across oat files are not supported.
+                uint32_t target_offset = GetTargetOffset(patch);
+                writer_->relative_patcher_->PatchCall(&patched_code_,
+                                                      literal_offset,
+                                                      offset_ + literal_offset,
+                                                      target_offset);
+                break;
+              }
+              case LinkerPatch::Type::kDexCacheArray: {
+                uint32_t target_offset = GetDexCacheOffset(patch);
+                writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
+                                                                     patch,
+                                                                     offset_ + literal_offset,
+                                                                     target_offset);
+                break;
+              }
+              case LinkerPatch::Type::kStringRelative: {
+                uint32_t target_offset = GetTargetObjectOffset(GetTargetString(patch));
+                writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
+                                                                     patch,
+                                                                     offset_ + literal_offset,
+                                                                     target_offset);
+                break;
+              }
+              case LinkerPatch::Type::kTypeRelative: {
+                uint32_t target_offset = GetTargetObjectOffset(GetTargetType(patch));
+                writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
+                                                                     patch,
+                                                                     offset_ + literal_offset,
+                                                                     target_offset);
+                break;
+              }
+              case LinkerPatch::Type::kCall: {
+                uint32_t target_offset = GetTargetOffset(patch);
+                PatchCodeAddress(&patched_code_, literal_offset, target_offset);
+                break;
+              }
+              case LinkerPatch::Type::kMethod: {
+                ArtMethod* method = GetTargetMethod(patch);
+                PatchMethodAddress(&patched_code_, literal_offset, method);
+                break;
+              }
+              case LinkerPatch::Type::kString: {
+                mirror::String* string = GetTargetString(patch);
+                PatchObjectAddress(&patched_code_, literal_offset, string);
+                break;
+              }
+              case LinkerPatch::Type::kType: {
+                mirror::Class* type = GetTargetType(patch);
+                PatchObjectAddress(&patched_code_, literal_offset, type);
+                break;
+              }
+              default: {
+                DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kRecordPosition);
+                break;
+              }
             }
           }
         }
 
-        writer_->oat_header_->UpdateChecksum(wrapped.data(), code_size);
-        if (!out->WriteFully(wrapped.data(), code_size)) {
+        if (!out->WriteFully(quick_code.data(), code_size)) {
           ReportWriteFailure("method code", it);
           return false;
         }
@@ -823,16 +1150,17 @@
   }
 
   uint32_t GetTargetOffset(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) {
-    auto target_it = writer_->method_offset_map_.map.find(patch.TargetMethod());
-    uint32_t target_offset =
-        (target_it != writer_->method_offset_map_.map.end()) ? target_it->second : 0u;
-    // If there's no compiled code, point to the correct trampoline.
+    uint32_t target_offset = writer_->relative_patcher_->GetOffset(patch.TargetMethod());
+    // If there's no new compiled code, either we're compiling an app and the target method
+    // is in the boot image, or we need to point to the correct trampoline.
     if (UNLIKELY(target_offset == 0)) {
       ArtMethod* target = GetTargetMethod(patch);
       DCHECK(target != nullptr);
-      size_t size = GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet());
+      PointerSize size =
+          GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet());
       const void* oat_code_offset = target->GetEntryPointFromQuickCompiledCodePtrSize(size);
       if (oat_code_offset != 0) {
+        DCHECK(!writer_->HasBootImage());
         DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickResolutionStub(oat_code_offset));
         DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(oat_code_offset));
         DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickGenericJniStub(oat_code_offset));
@@ -846,20 +1174,40 @@
     return target_offset;
   }
 
-  mirror::Class* GetTargetType(const LinkerPatch& patch)
+  mirror::DexCache* GetDexCache(const DexFile* target_dex_file)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    mirror::DexCache* dex_cache = (dex_file_ == patch.TargetTypeDexFile())
-        ? dex_cache_ : class_linker_->FindDexCache(Thread::Current(), *patch.TargetTypeDexFile());
+    return (target_dex_file == dex_file_)
+        ? dex_cache_
+        : class_linker_->FindDexCache(Thread::Current(), *target_dex_file);
+  }
+
+  mirror::Class* GetTargetType(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) {
+    mirror::DexCache* dex_cache = GetDexCache(patch.TargetTypeDexFile());
     mirror::Class* type = dex_cache->GetResolvedType(patch.TargetTypeIndex());
     CHECK(type != nullptr);
     return type;
   }
 
+  mirror::String* GetTargetString(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) {
+    ScopedObjectAccessUnchecked soa(Thread::Current());
+    StackHandleScope<1> hs(soa.Self());
+    ClassLinker* linker = Runtime::Current()->GetClassLinker();
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(GetDexCache(patch.TargetStringDexFile())));
+    mirror::String* string = linker->LookupString(*patch.TargetStringDexFile(),
+                                                  patch.TargetStringIndex(),
+                                                  dex_cache);
+    DCHECK(string != nullptr);
+    DCHECK(writer_->HasBootImage() ||
+           Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(string));
+    return string;
+  }
+
   uint32_t GetDexCacheOffset(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (writer_->HasImage()) {
-      auto* element = writer_->image_writer_->GetDexCacheArrayElementImageAddress<const uint8_t*>(
-              patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset());
-      const uint8_t* oat_data = writer_->image_writer_->GetOatFileBegin() + file_offset_;
+    if (writer_->HasBootImage()) {
+      uintptr_t element = writer_->image_writer_->GetDexCacheArrayElementImageAddress<uintptr_t>(
+          patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset());
+      size_t oat_index = writer_->image_writer_->GetOatIndexForDexCache(dex_cache_);
+      uintptr_t oat_data = writer_->image_writer_->GetOatDataBegin(oat_index);
       return element - oat_data;
     } else {
       size_t start = writer_->dex_cache_arrays_offsets_.Get(patch.TargetDexCacheDexFile());
@@ -867,9 +1215,18 @@
     }
   }
 
+  uint32_t GetTargetObjectOffset(mirror::Object* object) SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(writer_->HasBootImage());
+    object = writer_->image_writer_->GetImageAddress(object);
+    size_t oat_index = writer_->image_writer_->GetOatIndexForDexFile(dex_file_);
+    uintptr_t oat_data_begin = writer_->image_writer_->GetOatDataBegin(oat_index);
+    // TODO: Clean up offset types. The target offset must be treated as signed.
+    return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(object) - oat_data_begin);
+  }
+
   void PatchObjectAddress(std::vector<uint8_t>* code, uint32_t offset, mirror::Object* object)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (writer_->HasImage()) {
+    if (writer_->HasBootImage()) {
       object = writer_->image_writer_->GetImageAddress(object);
     } else {
       // NOTE: We're using linker patches for app->boot references when the image can
@@ -889,15 +1246,21 @@
 
   void PatchMethodAddress(std::vector<uint8_t>* code, uint32_t offset, ArtMethod* method)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (writer_->HasImage()) {
+    if (writer_->HasBootImage()) {
       method = writer_->image_writer_->GetImageMethodAddress(method);
     } else if (kIsDebugBuild) {
       // NOTE: We're using linker patches for app->boot references when the image can
       // be relocated and therefore we need to emit .oat_patches. We're not using this
       // for app->app references, so check that the method is an image method.
-      gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
-      size_t method_offset = reinterpret_cast<const uint8_t*>(method) - image_space->Begin();
-      CHECK(image_space->GetImageHeader().GetMethodsSection().Contains(method_offset));
+      std::vector<gc::space::ImageSpace*> image_spaces =
+          Runtime::Current()->GetHeap()->GetBootImageSpaces();
+      bool contains_method = false;
+      for (gc::space::ImageSpace* image_space : image_spaces) {
+        size_t method_offset = reinterpret_cast<const uint8_t*>(method) - image_space->Begin();
+        contains_method |=
+            image_space->GetImageHeader().GetMethodsSection().Contains(method_offset);
+      }
+      CHECK(contains_method);
     }
     // Note: We only patch targeting ArtMethods in image which is in the low 4gb.
     uint32_t address = PointerToLowMemUInt32(method);
@@ -912,9 +1275,14 @@
   void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     uint32_t address = target_offset;
-    if (writer_->HasImage()) {
-      address = PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() +
-                                      writer_->oat_data_offset_ + target_offset);
+    if (writer_->HasBootImage()) {
+      size_t oat_index = writer_->image_writer_->GetOatIndexForDexCache(dex_cache_);
+      // TODO: Clean up offset types.
+      // The target_offset must be treated as signed for cross-oat patching.
+      const void* target = reinterpret_cast<const void*>(
+          writer_->image_writer_->GetOatDataBegin(oat_index) +
+          static_cast<int32_t>(target_offset));
+      address = PointerToLowMemUInt32(target);
     }
     DCHECK_LE(offset + 4, code->size());
     uint8_t* data = &(*code)[offset];
@@ -925,10 +1293,11 @@
   }
 };
 
-template <typename DataAccess>
 class OatWriter::WriteMapMethodVisitor : public OatDexMethodVisitor {
  public:
-  WriteMapMethodVisitor(OatWriter* writer, OutputStream* out, const size_t file_offset,
+  WriteMapMethodVisitor(OatWriter* writer,
+                        OutputStream* out,
+                        const size_t file_offset,
                         size_t relative_offset)
     : OatDexMethodVisitor(writer, relative_offset),
       out_(out),
@@ -936,29 +1305,38 @@
   }
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) {
-    OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
     if (compiled_method != nullptr) {  // ie. not an abstract method
       size_t file_offset = file_offset_;
       OutputStream* out = out_;
 
-      uint32_t map_offset = DataAccess::GetOffset(oat_class, method_offsets_index_);
+      uint32_t map_offset = oat_class->method_headers_[method_offsets_index_].vmap_table_offset_;
+      uint32_t code_offset = oat_class->method_offsets_[method_offsets_index_].code_offset_;
       ++method_offsets_index_;
 
-      // Write deduplicated map.
-      const SwapVector<uint8_t>* map = DataAccess::GetData(compiled_method);
-      size_t map_size = map == nullptr ? 0 : map->size() * sizeof((*map)[0]);
-      DCHECK((map_size == 0u && map_offset == 0u) ||
-            (map_size != 0u && map_offset != 0u && map_offset <= offset_))
-          << map_size << " " << map_offset << " " << offset_ << " "
-          << PrettyMethod(it.GetMemberIndex(), *dex_file_) << " for " << DataAccess::Name();
-      if (map_size != 0u && map_offset == offset_) {
-        if (UNLIKELY(!out->WriteFully(&(*map)[0], map_size))) {
-          ReportWriteFailure(it);
-          return false;
+      DCHECK((compiled_method->GetVmapTable().size() == 0u && map_offset == 0u) ||
+             (compiled_method->GetVmapTable().size() != 0u && map_offset != 0u))
+          << compiled_method->GetVmapTable().size() << " " << map_offset << " "
+          << PrettyMethod(it.GetMemberIndex(), *dex_file_);
+
+      if (map_offset != 0u) {
+        // Transform map_offset to actual oat data offset.
+        map_offset = (code_offset - compiled_method->CodeDelta()) - map_offset;
+        DCHECK_NE(map_offset, 0u);
+        DCHECK_LE(map_offset, offset_) << PrettyMethod(it.GetMemberIndex(), *dex_file_);
+
+        ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
+        size_t map_size = map.size() * sizeof(map[0]);
+        if (map_offset == offset_) {
+          // Write deduplicated map (code info for Optimizing or transformation info for dex2dex).
+          if (UNLIKELY(!out->WriteFully(map.data(), map_size))) {
+            ReportWriteFailure(it);
+            return false;
+          }
+          offset_ += map_size;
         }
-        offset_ += map_size;
       }
       DCHECK_OFFSET_();
     }
@@ -971,7 +1349,7 @@
   size_t const file_offset_;
 
   void ReportWriteFailure(const ClassDataItemIterator& it) {
-    PLOG(ERROR) << "Failed to write " << DataAccess::Name() << " for "
+    PLOG(ERROR) << "Failed to write map for "
         << PrettyMethod(it.GetMemberIndex(), *dex_file_) << " to " << out_->GetLocation();
   }
 };
@@ -1018,42 +1396,26 @@
   return true;
 }
 
-size_t OatWriter::InitOatHeader() {
-  oat_header_ = OatHeader::Create(compiler_driver_->GetInstructionSet(),
-                                  compiler_driver_->GetInstructionSetFeatures(),
-                                  dex_files_,
-                                  image_file_location_oat_checksum_,
-                                  image_file_location_oat_begin_,
-                                  key_value_store_);
-
+size_t OatWriter::InitOatHeader(InstructionSet instruction_set,
+                                const InstructionSetFeatures* instruction_set_features,
+                                uint32_t num_dex_files,
+                                SafeMap<std::string, std::string>* key_value_store) {
+  TimingLogger::ScopedTiming split("InitOatHeader", timings_);
+  oat_header_.reset(OatHeader::Create(instruction_set,
+                                      instruction_set_features,
+                                      num_dex_files,
+                                      key_value_store));
+  size_oat_header_ += sizeof(OatHeader);
+  size_oat_header_key_value_store_ += oat_header_->GetHeaderSize() - sizeof(OatHeader);
   return oat_header_->GetHeaderSize();
 }
 
 size_t OatWriter::InitOatDexFiles(size_t offset) {
-  // create the OatDexFiles
-  for (size_t i = 0; i != dex_files_->size(); ++i) {
-    const DexFile* dex_file = (*dex_files_)[i];
-    CHECK(dex_file != nullptr);
-    OatDexFile* oat_dex_file = new OatDexFile(offset, *dex_file);
-    oat_dex_files_.push_back(oat_dex_file);
-    offset += oat_dex_file->SizeOf();
-  }
-  return offset;
-}
-
-size_t OatWriter::InitDexFiles(size_t offset) {
-  // calculate the offsets within OatDexFiles to the DexFiles
-  for (size_t i = 0; i != dex_files_->size(); ++i) {
-    // dex files are required to be 4 byte aligned
-    size_t original_offset = offset;
-    offset = RoundUp(offset, 4);
-    size_dex_file_alignment_ += offset - original_offset;
-
-    // set offset in OatDexFile to DexFile
-    oat_dex_files_[i]->dex_file_offset_ = offset;
-
-    const DexFile* dex_file = (*dex_files_)[i];
-    offset += dex_file->GetHeader().file_size_;
+  TimingLogger::ScopedTiming split("InitOatDexFiles", timings_);
+  // Initialize offsets of dex files.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.offset_ = offset;
+    offset += oat_dex_file.SizeOf();
   }
   return offset;
 }
@@ -1067,13 +1429,12 @@
 
   // Update oat_dex_files_.
   auto oat_class_it = oat_classes_.begin();
-  for (OatDexFile* oat_dex_file : oat_dex_files_) {
-    for (uint32_t& method_offset : oat_dex_file->methods_offsets_) {
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    for (uint32_t& class_offset : oat_dex_file.class_offsets_) {
       DCHECK(oat_class_it != oat_classes_.end());
-      method_offset = (*oat_class_it)->offset_;
+      class_offset = oat_class_it->offset_;
       ++oat_class_it;
     }
-    oat_dex_file->UpdateChecksum(oat_header_);
   }
   CHECK(oat_class_it == oat_classes_.end());
 
@@ -1081,19 +1442,10 @@
 }
 
 size_t OatWriter::InitOatMaps(size_t offset) {
-  #define VISIT(VisitorType)                          \
-    do {                                              \
-      VisitorType visitor(this, offset);              \
-      bool success = VisitDexMethods(&visitor);       \
-      DCHECK(success);                                \
-      offset = visitor.GetOffset();                   \
-    } while (false)
-
-  VISIT(InitMapMethodVisitor<GcMapDataAccess>);
-  VISIT(InitMapMethodVisitor<MappingTableDataAccess>);
-  VISIT(InitMapMethodVisitor<VmapTableDataAccess>);
-
-  #undef VISIT
+  InitMapMethodVisitor visitor(this, offset);
+  bool success = VisitDexMethods(&visitor);
+  DCHECK(success);
+  offset = visitor.GetOffset();
 
   return offset;
 }
@@ -1106,16 +1458,15 @@
   offset = RoundUp(offset, kPageSize);
   oat_header_->SetExecutableOffset(offset);
   size_executable_offset_alignment_ = offset - old_offset;
-  if (compiler_driver_->IsImage()) {
-    CHECK_EQ(image_patch_delta_, 0);
+  if (compiler_driver_->IsBootImage()) {
     InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
 
     #define DO_TRAMPOLINE(field, fn_name) \
       offset = CompiledCode::AlignCode(offset, instruction_set); \
       adjusted_offset = offset + CompiledCode::CodeDelta(instruction_set); \
       oat_header_->Set ## fn_name ## Offset(adjusted_offset); \
-      field.reset(compiler_driver_->Create ## fn_name()); \
-      offset += field->size();
+      (field) = compiler_driver_->Create ## fn_name(); \
+      offset += (field)->size();
 
     DO_TRAMPOLINE(jni_dlsym_lookup_, JniDlsymLookup);
     DO_TRAMPOLINE(quick_generic_jni_trampoline_, QuickGenericJniTrampoline);
@@ -1132,7 +1483,6 @@
     oat_header_->SetQuickImtConflictTrampolineOffset(0);
     oat_header_->SetQuickResolutionTrampolineOffset(0);
     oat_header_->SetQuickToInterpreterBridgeOffset(0);
-    oat_header_->SetImagePatchDelta(image_patch_delta_);
   }
   return offset;
 }
@@ -1147,7 +1497,7 @@
     } while (false)
 
   VISIT(InitCodeMethodVisitor);
-  if (compiler_driver_->IsImage()) {
+  if (HasImage()) {
     VISIT(InitImageMethodVisitor);
   }
 
@@ -1157,33 +1507,28 @@
 }
 
 bool OatWriter::WriteRodata(OutputStream* out) {
-  const off_t raw_file_offset = out->Seek(0, kSeekCurrent);
-  if (raw_file_offset == (off_t) -1) {
-    LOG(ERROR) << "Failed to get file offset in " << out->GetLocation();
+  CHECK(write_state_ == WriteState::kWriteRoData);
+
+  // Wrap out to update checksum with each write.
+  ChecksumUpdatingOutputStream checksum_updating_out(out, oat_header_.get());
+  out = &checksum_updating_out;
+
+  if (!WriteClassOffsets(out)) {
+    LOG(ERROR) << "Failed to write class offsets to " << out->GetLocation();
     return false;
   }
-  const size_t file_offset = static_cast<size_t>(raw_file_offset);
-  oat_data_offset_ = file_offset;
 
-  // Reserve space for header. It will be written last - after updating the checksum.
-  size_t header_size = oat_header_->GetHeaderSize();
-  if (out->Seek(header_size, kSeekCurrent) == (off_t) -1) {
-    PLOG(ERROR) << "Failed to reserve space for oat header in " << out->GetLocation();
-    return false;
-  }
-  size_oat_header_ += sizeof(OatHeader);
-  size_oat_header_key_value_store_ += oat_header_->GetHeaderSize() - sizeof(OatHeader);
-
-  if (!WriteTables(out, file_offset)) {
-    LOG(ERROR) << "Failed to write oat tables to " << out->GetLocation();
+  if (!WriteClasses(out)) {
+    LOG(ERROR) << "Failed to write classes to " << out->GetLocation();
     return false;
   }
 
   off_t tables_end_offset = out->Seek(0, kSeekCurrent);
-  if (tables_end_offset == (off_t) -1) {
+  if (tables_end_offset == static_cast<off_t>(-1)) {
     LOG(ERROR) << "Failed to seek to oat code position in " << out->GetLocation();
     return false;
   }
+  size_t file_offset = oat_data_offset_;
   size_t relative_offset = static_cast<size_t>(tables_end_offset) - file_offset;
   relative_offset = WriteMaps(out, file_offset, relative_offset);
   if (relative_offset == 0) {
@@ -1203,11 +1548,19 @@
   }
   DCHECK_OFFSET();
 
+  write_state_ = WriteState::kWriteText;
   return true;
 }
 
 bool OatWriter::WriteCode(OutputStream* out) {
-  size_t header_size = oat_header_->GetHeaderSize();
+  CHECK(write_state_ == WriteState::kWriteText);
+
+  // Wrap out to update checksum with each write.
+  ChecksumUpdatingOutputStream checksum_updating_out(out, oat_header_.get());
+  out = &checksum_updating_out;
+
+  SetMultiOatRelativePatcherAdjustment();
+
   const size_t file_offset = oat_data_offset_;
   size_t relative_offset = oat_header_->GetExecutableOffset();
   DCHECK_OFFSET();
@@ -1225,7 +1578,7 @@
   }
 
   const off_t oat_end_file_offset = out->Seek(0, kSeekCurrent);
-  if (oat_end_file_offset == (off_t) -1) {
+  if (oat_end_file_offset == static_cast<off_t>(-1)) {
     LOG(ERROR) << "Failed to get oat end file offset in " << out->GetLocation();
     return false;
   }
@@ -1233,8 +1586,8 @@
   if (kIsDebugBuild) {
     uint32_t size_total = 0;
     #define DO_STAT(x) \
-      VLOG(compiler) << #x "=" << PrettySize(x) << " (" << x << "B)"; \
-      size_total += x;
+      VLOG(compiler) << #x "=" << PrettySize(x) << " (" << (x) << "B)"; \
+      size_total += (x);
 
     DO_STAT(size_dex_file_alignment_);
     DO_STAT(size_executable_offset_alignment_);
@@ -1254,14 +1607,17 @@
     DO_STAT(size_code_alignment_);
     DO_STAT(size_relative_call_thunks_);
     DO_STAT(size_misc_thunks_);
-    DO_STAT(size_mapping_table_);
     DO_STAT(size_vmap_table_);
-    DO_STAT(size_gc_map_);
     DO_STAT(size_oat_dex_file_location_size_);
     DO_STAT(size_oat_dex_file_location_data_);
     DO_STAT(size_oat_dex_file_location_checksum_);
     DO_STAT(size_oat_dex_file_offset_);
-    DO_STAT(size_oat_dex_file_methods_offsets_);
+    DO_STAT(size_oat_dex_file_class_offsets_offset_);
+    DO_STAT(size_oat_dex_file_lookup_table_offset_);
+    DO_STAT(size_oat_lookup_table_alignment_);
+    DO_STAT(size_oat_lookup_table_);
+    DO_STAT(size_oat_class_offsets_alignment_);
+    DO_STAT(size_oat_class_offsets_);
     DO_STAT(size_oat_class_type_);
     DO_STAT(size_oat_class_status_);
     DO_STAT(size_oat_class_method_bitmaps_);
@@ -1276,51 +1632,88 @@
   CHECK_EQ(file_offset + size_, static_cast<size_t>(oat_end_file_offset));
   CHECK_EQ(size_, relative_offset);
 
-  // Write the header now that the checksum is final.
-  if (out->Seek(file_offset, kSeekSet) == (off_t) -1) {
+  write_state_ = WriteState::kWriteHeader;
+  return true;
+}
+
+bool OatWriter::WriteHeader(OutputStream* out,
+                            uint32_t image_file_location_oat_checksum,
+                            uintptr_t image_file_location_oat_begin,
+                            int32_t image_patch_delta) {
+  CHECK(write_state_ == WriteState::kWriteHeader);
+
+  oat_header_->SetImageFileLocationOatChecksum(image_file_location_oat_checksum);
+  oat_header_->SetImageFileLocationOatDataBegin(image_file_location_oat_begin);
+  if (compiler_driver_->IsBootImage()) {
+    CHECK_EQ(image_patch_delta, 0);
+    CHECK_EQ(oat_header_->GetImagePatchDelta(), 0);
+  } else {
+    CHECK_ALIGNED(image_patch_delta, kPageSize);
+    oat_header_->SetImagePatchDelta(image_patch_delta);
+  }
+  oat_header_->UpdateChecksumWithHeaderData();
+
+  const size_t file_offset = oat_data_offset_;
+
+  off_t current_offset = out->Seek(0, kSeekCurrent);
+  if (current_offset == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed to get current offset from " << out->GetLocation();
+    return false;
+  }
+  if (out->Seek(file_offset, kSeekSet) == static_cast<off_t>(-1)) {
     PLOG(ERROR) << "Failed to seek to oat header position in " << out->GetLocation();
     return false;
   }
   DCHECK_EQ(file_offset, static_cast<size_t>(out->Seek(0, kSeekCurrent)));
-  if (!out->WriteFully(oat_header_, header_size)) {
+
+  // Flush all other data before writing the header.
+  if (!out->Flush()) {
+    PLOG(ERROR) << "Failed to flush before writing oat header to " << out->GetLocation();
+    return false;
+  }
+  // Write the header.
+  size_t header_size = oat_header_->GetHeaderSize();
+  if (!out->WriteFully(oat_header_.get(), header_size)) {
     PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation();
     return false;
   }
-  if (out->Seek(oat_end_file_offset, kSeekSet) == (off_t) -1) {
-    PLOG(ERROR) << "Failed to seek to end after writing oat header to " << out->GetLocation();
+  // Flush the header data.
+  if (!out->Flush()) {
+    PLOG(ERROR) << "Failed to flush after writing oat header to " << out->GetLocation();
     return false;
   }
-  DCHECK_EQ(oat_end_file_offset, out->Seek(0, kSeekCurrent));
 
+  if (out->Seek(current_offset, kSeekSet) == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed to seek back after writing oat header to " << out->GetLocation();
+    return false;
+  }
+  DCHECK_EQ(current_offset, out->Seek(0, kSeekCurrent));
+
+  write_state_ = WriteState::kDone;
   return true;
 }
 
-bool OatWriter::WriteTables(OutputStream* out, const size_t file_offset) {
-  for (size_t i = 0; i != oat_dex_files_.size(); ++i) {
-    if (!oat_dex_files_[i]->Write(this, out, file_offset)) {
-      PLOG(ERROR) << "Failed to write oat dex information to " << out->GetLocation();
-      return false;
+bool OatWriter::WriteClassOffsets(OutputStream* out) {
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    if (oat_dex_file.class_offsets_offset_ != 0u) {
+      uint32_t expected_offset = oat_data_offset_ + oat_dex_file.class_offsets_offset_;
+      off_t actual_offset = out->Seek(expected_offset, kSeekSet);
+      if (static_cast<uint32_t>(actual_offset) != expected_offset) {
+        PLOG(ERROR) << "Failed to seek to oat class offsets section. Actual: " << actual_offset
+                    << " Expected: " << expected_offset << " File: " << oat_dex_file.GetLocation();
+        return false;
+      }
+      if (!oat_dex_file.WriteClassOffsets(this, out)) {
+        return false;
+      }
     }
   }
-  for (size_t i = 0; i != oat_dex_files_.size(); ++i) {
-    uint32_t expected_offset = file_offset + oat_dex_files_[i]->dex_file_offset_;
-    off_t actual_offset = out->Seek(expected_offset, kSeekSet);
-    if (static_cast<uint32_t>(actual_offset) != expected_offset) {
-      const DexFile* dex_file = (*dex_files_)[i];
-      PLOG(ERROR) << "Failed to seek to dex file section. Actual: " << actual_offset
-                  << " Expected: " << expected_offset << " File: " << dex_file->GetLocation();
-      return false;
-    }
-    const DexFile* dex_file = (*dex_files_)[i];
-    if (!out->WriteFully(&dex_file->GetHeader(), dex_file->GetHeader().file_size_)) {
-      PLOG(ERROR) << "Failed to write dex file " << dex_file->GetLocation()
-                  << " to " << out->GetLocation();
-      return false;
-    }
-    size_dex_file_ += dex_file->GetHeader().file_size_;
-  }
-  for (size_t i = 0; i != oat_classes_.size(); ++i) {
-    if (!oat_classes_[i]->Write(this, out, file_offset)) {
+  return true;
+}
+
+bool OatWriter::WriteClasses(OutputStream* out) {
+  for (OatClass& oat_class : oat_classes_) {
+    if (!oat_class.Write(this, out, oat_data_offset_)) {
       PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation();
       return false;
     }
@@ -1329,34 +1722,19 @@
 }
 
 size_t OatWriter::WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset) {
-  #define VISIT(VisitorType)                                              \
-    do {                                                                  \
-      VisitorType visitor(this, out, file_offset, relative_offset);       \
-      if (UNLIKELY(!VisitDexMethods(&visitor))) {                         \
-        return 0;                                                         \
-      }                                                                   \
-      relative_offset = visitor.GetOffset();                              \
-    } while (false)
-
-  size_t gc_maps_offset = relative_offset;
-  VISIT(WriteMapMethodVisitor<GcMapDataAccess>);
-  size_gc_map_ = relative_offset - gc_maps_offset;
-
-  size_t mapping_tables_offset = relative_offset;
-  VISIT(WriteMapMethodVisitor<MappingTableDataAccess>);
-  size_mapping_table_ = relative_offset - mapping_tables_offset;
-
   size_t vmap_tables_offset = relative_offset;
-  VISIT(WriteMapMethodVisitor<VmapTableDataAccess>);
+  WriteMapMethodVisitor visitor(this, out, file_offset, relative_offset);
+  if (UNLIKELY(!VisitDexMethods(&visitor))) {
+    return 0;
+  }
+  relative_offset = visitor.GetOffset();
   size_vmap_table_ = relative_offset - vmap_tables_offset;
 
-  #undef VISIT
-
   return relative_offset;
 }
 
 size_t OatWriter::WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset) {
-  if (compiler_driver_->IsImage()) {
+  if (compiler_driver_->IsBootImage()) {
     InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
 
     #define DO_TRAMPOLINE(field) \
@@ -1365,12 +1743,12 @@
         uint32_t alignment_padding = aligned_offset - relative_offset; \
         out->Seek(alignment_padding, kSeekCurrent); \
         size_trampoline_alignment_ += alignment_padding; \
-        if (!out->WriteFully(&(*field)[0], field->size())) { \
+        if (!out->WriteFully((field)->data(), (field)->size())) { \
           PLOG(ERROR) << "Failed to write " # field " to " << out->GetLocation(); \
           return false; \
         } \
-        size_ ## field += field->size(); \
-        relative_offset += alignment_padding + field->size(); \
+        size_ ## field += (field)->size(); \
+        relative_offset += alignment_padding + (field)->size(); \
         DCHECK_OFFSET(); \
       } while (false)
 
@@ -1407,6 +1785,470 @@
   return relative_offset;
 }
 
+bool OatWriter::RecordOatDataOffset(OutputStream* out) {
+  // Get the elf file offset of the oat file.
+  const off_t raw_file_offset = out->Seek(0, kSeekCurrent);
+  if (raw_file_offset == static_cast<off_t>(-1)) {
+    LOG(ERROR) << "Failed to get file offset in " << out->GetLocation();
+    return false;
+  }
+  oat_data_offset_ = static_cast<size_t>(raw_file_offset);
+  return true;
+}
+
+bool OatWriter::ReadDexFileHeader(File* file, OatDexFile* oat_dex_file) {
+  // Read the dex file header and perform minimal verification.
+  uint8_t raw_header[sizeof(DexFile::Header)];
+  if (!file->ReadFully(&raw_header, sizeof(DexFile::Header))) {
+    PLOG(ERROR) << "Failed to read dex file header. Actual: "
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!ValidateDexFileHeader(raw_header, oat_dex_file->GetLocation())) {
+    return false;
+  }
+
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header);
+  oat_dex_file->dex_file_size_ = header->file_size_;
+  oat_dex_file->dex_file_location_checksum_ = header->checksum_;
+  oat_dex_file->class_offsets_.resize(header->class_defs_size_);
+  return true;
+}
+
+bool OatWriter::ValidateDexFileHeader(const uint8_t* raw_header, const char* location) {
+  if (!DexFile::IsMagicValid(raw_header)) {
+    LOG(ERROR) << "Invalid magic number in dex file header. " << " File: " << location;
+    return false;
+  }
+  if (!DexFile::IsVersionValid(raw_header)) {
+    LOG(ERROR) << "Invalid version number in dex file header. " << " File: " << location;
+    return false;
+  }
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header);
+  if (header->file_size_ < sizeof(DexFile::Header)) {
+    LOG(ERROR) << "Dex file header specifies file size insufficient to contain the header."
+               << " File: " << location;
+    return false;
+  }
+  return true;
+}
+
+bool OatWriter::WriteDexFiles(OutputStream* rodata, File* file) {
+  TimingLogger::ScopedTiming split("WriteDexFiles", timings_);
+
+  // Get the elf file offset of the oat file.
+  if (!RecordOatDataOffset(rodata)) {
+    return false;
+  }
+
+  // Write dex files.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    if (!WriteDexFile(rodata, file, &oat_dex_file)) {
+      return false;
+    }
+  }
+
+  // Close sources.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.source_.Clear();  // Get rid of the reference, it's about to be invalidated.
+  }
+  zipped_dex_files_.clear();
+  zip_archives_.clear();
+  raw_dex_files_.clear();
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file) {
+  if (!SeekToDexFile(rodata, file, oat_dex_file)) {
+    return false;
+  }
+  if (oat_dex_file->source_.IsZipEntry()) {
+    if (!WriteDexFile(rodata, file, oat_dex_file, oat_dex_file->source_.GetZipEntry())) {
+      return false;
+    }
+  } else if (oat_dex_file->source_.IsRawFile()) {
+    if (!WriteDexFile(rodata, file, oat_dex_file, oat_dex_file->source_.GetRawFile())) {
+      return false;
+    }
+  } else {
+    DCHECK(oat_dex_file->source_.IsRawData());
+    if (!WriteDexFile(rodata, oat_dex_file, oat_dex_file->source_.GetRawData())) {
+      return false;
+    }
+  }
+
+  // Update current size and account for the written data.
+  DCHECK_EQ(size_, oat_dex_file->dex_file_offset_);
+  size_ += oat_dex_file->dex_file_size_;
+  size_dex_file_ += oat_dex_file->dex_file_size_;
+  return true;
+}
+
+bool OatWriter::SeekToDexFile(OutputStream* out, File* file, OatDexFile* oat_dex_file) {
+  // Dex files are required to be 4 byte aligned.
+  size_t original_offset = size_;
+  size_t offset = RoundUp(original_offset, 4);
+  size_dex_file_alignment_ += offset - original_offset;
+
+  // Seek to the start of the dex file and flush any pending operations in the stream.
+  // Verify that, after flushing the stream, the file is at the same offset as the stream.
+  uint32_t start_offset = oat_data_offset_ + offset;
+  off_t actual_offset = out->Seek(start_offset, kSeekSet);
+  if (actual_offset != static_cast<off_t>(start_offset)) {
+    PLOG(ERROR) << "Failed to seek to dex file section. Actual: " << actual_offset
+                << " Expected: " << start_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!out->Flush()) {
+    PLOG(ERROR) << "Failed to flush before writing dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  actual_offset = lseek(file->Fd(), 0, SEEK_CUR);
+  if (actual_offset != static_cast<off_t>(start_offset)) {
+    PLOG(ERROR) << "Stream/file position mismatch! Actual: " << actual_offset
+                << " Expected: " << start_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  size_ = offset;
+  oat_dex_file->dex_file_offset_ = offset;
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+                             File* file,
+                             OatDexFile* oat_dex_file,
+                             ZipEntry* dex_file) {
+  size_t start_offset = oat_data_offset_ + size_;
+  DCHECK_EQ(static_cast<off_t>(start_offset), rodata->Seek(0, kSeekCurrent));
+
+  // Extract the dex file and get the extracted size.
+  std::string error_msg;
+  if (!dex_file->ExtractToFile(*file, &error_msg)) {
+    LOG(ERROR) << "Failed to extract dex file from ZIP entry: " << error_msg
+               << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (file->Flush() != 0) {
+    PLOG(ERROR) << "Failed to flush dex file from ZIP entry."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  off_t extracted_end = lseek(file->Fd(), 0, SEEK_CUR);
+  if (extracted_end == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed get end offset after writing dex file from ZIP entry."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (extracted_end < static_cast<off_t>(start_offset)) {
+    LOG(ERROR) << "Dex file end position is before start position! End: " << extracted_end
+               << " Start: " << start_offset
+               << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  uint64_t extracted_size = static_cast<uint64_t>(extracted_end - start_offset);
+  if (extracted_size < sizeof(DexFile::Header)) {
+    LOG(ERROR) << "Extracted dex file is shorter than dex file header. size: "
+               << extracted_size << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+
+  // Read the dex file header and extract required data to OatDexFile.
+  off_t actual_offset = lseek(file->Fd(), start_offset, SEEK_SET);
+  if (actual_offset != static_cast<off_t>(start_offset)) {
+    PLOG(ERROR) << "Failed to seek back to dex file header. Actual: " << actual_offset
+                << " Expected: " << start_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!ReadDexFileHeader(file, oat_dex_file)) {
+    return false;
+  }
+  if (extracted_size < oat_dex_file->dex_file_size_) {
+    LOG(ERROR) << "Extracted truncated dex file. Extracted size: " << extracted_size
+               << " file size from header: " << oat_dex_file->dex_file_size_
+               << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+
+  // Override the checksum from header with the CRC from ZIP entry.
+  oat_dex_file->dex_file_location_checksum_ = dex_file->GetCrc32();
+
+  // Seek both file and stream to the end offset.
+  size_t end_offset = start_offset + oat_dex_file->dex_file_size_;
+  actual_offset = lseek(file->Fd(), end_offset, SEEK_SET);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Failed to seek to end of dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  actual_offset = rodata->Seek(end_offset, kSeekSet);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Failed to seek stream to end of dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after seeking over dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  // If we extracted more than the size specified in the header, truncate the file.
+  if (extracted_size > oat_dex_file->dex_file_size_) {
+    if (file->SetLength(end_offset) != 0) {
+      PLOG(ERROR) << "Failed to truncate excessive dex file length."
+                  << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+                             File* file,
+                             OatDexFile* oat_dex_file,
+                             File* dex_file) {
+  size_t start_offset = oat_data_offset_ + size_;
+  DCHECK_EQ(static_cast<off_t>(start_offset), rodata->Seek(0, kSeekCurrent));
+
+  off_t input_offset = lseek(dex_file->Fd(), 0, SEEK_SET);
+  if (input_offset != static_cast<off_t>(0)) {
+    PLOG(ERROR) << "Failed to seek to dex file header. Actual: " << input_offset
+                << " Expected: 0"
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!ReadDexFileHeader(dex_file, oat_dex_file)) {
+    return false;
+  }
+
+  // Copy the input dex file using sendfile().
+  if (!file->Copy(dex_file, 0, oat_dex_file->dex_file_size_)) {
+    PLOG(ERROR) << "Failed to copy dex file to oat file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (file->Flush() != 0) {
+    PLOG(ERROR) << "Failed to flush dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  // Check file position and seek the stream to the end offset.
+  size_t end_offset = start_offset + oat_dex_file->dex_file_size_;
+  off_t actual_offset = lseek(file->Fd(), 0, SEEK_CUR);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Unexpected file position after copying dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  actual_offset = rodata->Seek(end_offset, kSeekSet);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Failed to seek stream to end of dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after seeking over dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+                             OatDexFile* oat_dex_file,
+                             const uint8_t* dex_file) {
+  // Note: The raw data has already been checked to contain the header
+  // and all the data that the header specifies as the file size.
+  DCHECK(dex_file != nullptr);
+  DCHECK(ValidateDexFileHeader(dex_file, oat_dex_file->GetLocation()));
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(dex_file);
+
+  if (!rodata->WriteFully(dex_file, header->file_size_)) {
+    PLOG(ERROR) << "Failed to write dex file " << oat_dex_file->GetLocation()
+                << " to " << rodata->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after writing dex file."
+                << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+
+  // Update dex file size and resize class offsets in the OatDexFile.
+  // Note: For raw data, the checksum is passed directly to AddRawDexFileSource().
+  oat_dex_file->dex_file_size_ = header->file_size_;
+  oat_dex_file->class_offsets_.resize(header->class_defs_size_);
+  return true;
+}
+
+bool OatWriter::WriteOatDexFiles(OutputStream* rodata) {
+  TimingLogger::ScopedTiming split("WriteOatDexFiles", timings_);
+
+  // Seek to the start of OatDexFiles, i.e. to the end of the OatHeader.  If there are
+  // no OatDexFiles, no data is actually written to .rodata before WriteHeader() and
+  // this Seek() ensures that we reserve the space for OatHeader in .rodata.
+  DCHECK(oat_dex_files_.empty() || oat_dex_files_[0u].offset_ == oat_header_->GetHeaderSize());
+  uint32_t expected_offset = oat_data_offset_ + oat_header_->GetHeaderSize();
+  off_t actual_offset = rodata->Seek(expected_offset, kSeekSet);
+  if (static_cast<uint32_t>(actual_offset) != expected_offset) {
+    PLOG(ERROR) << "Failed to seek to OatDexFile table section. Actual: " << actual_offset
+                << " Expected: " << expected_offset << " File: " << rodata->GetLocation();
+    return false;
+  }
+
+  for (size_t i = 0, size = oat_dex_files_.size(); i != size; ++i) {
+    OatDexFile* oat_dex_file = &oat_dex_files_[i];
+
+    DCHECK_EQ(oat_data_offset_ + oat_dex_file->offset_,
+              static_cast<size_t>(rodata->Seek(0, kSeekCurrent)));
+
+    // Write OatDexFile.
+    if (!oat_dex_file->Write(this, rodata)) {
+      PLOG(ERROR) << "Failed to write oat dex information to " << rodata->GetLocation();
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool OatWriter::ExtendForTypeLookupTables(OutputStream* rodata, File* file, size_t offset) {
+  TimingLogger::ScopedTiming split("ExtendForTypeLookupTables", timings_);
+
+  int64_t new_length = oat_data_offset_ + dchecked_integral_cast<int64_t>(offset);
+  if (file->SetLength(new_length) != 0) {
+    PLOG(ERROR) << "Failed to extend file for type lookup tables. new_length: " << new_length
+        << "File: " << file->GetPath();
+    return false;
+  }
+  off_t actual_offset = rodata->Seek(new_length, kSeekSet);
+  if (actual_offset != static_cast<off_t>(new_length)) {
+    PLOG(ERROR) << "Failed to seek stream after extending file for type lookup tables."
+                << " Actual: " << actual_offset << " Expected: " << new_length
+                << " File: " << rodata->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after extending for type lookup tables."
+                << " File: " << rodata->GetLocation();
+    return false;
+  }
+  return true;
+}
+
+bool OatWriter::OpenDexFiles(
+    File* file,
+    bool verify,
+    /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+    /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+  TimingLogger::ScopedTiming split("OpenDexFiles", timings_);
+
+  if (oat_dex_files_.empty()) {
+    // Nothing to do.
+    return true;
+  }
+
+  size_t map_offset = oat_dex_files_[0].dex_file_offset_;
+  size_t length = size_ - map_offset;
+  std::string error_msg;
+  std::unique_ptr<MemMap> dex_files_map(MemMap::MapFile(length,
+                                                        PROT_READ | PROT_WRITE,
+                                                        MAP_SHARED,
+                                                        file->Fd(),
+                                                        oat_data_offset_ + map_offset,
+                                                        /* low_4gb */ false,
+                                                        file->GetPath().c_str(),
+                                                        &error_msg));
+  if (dex_files_map == nullptr) {
+    LOG(ERROR) << "Failed to mmap() dex files from oat file. File: " << file->GetPath()
+               << " error: " << error_msg;
+    return false;
+  }
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    // Make sure no one messed with input files while we were copying data.
+    // At the very least we need consistent file size and number of class definitions.
+    const uint8_t* raw_dex_file =
+        dex_files_map->Begin() + oat_dex_file.dex_file_offset_ - map_offset;
+    if (!ValidateDexFileHeader(raw_dex_file, oat_dex_file.GetLocation())) {
+      // Note: ValidateDexFileHeader() already logged an error message.
+      LOG(ERROR) << "Failed to verify written dex file header!"
+          << " Output: " << file->GetPath() << " ~ " << std::hex << map_offset
+          << " ~ " << static_cast<const void*>(raw_dex_file);
+      return false;
+    }
+    const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_dex_file);
+    if (header->file_size_ != oat_dex_file.dex_file_size_) {
+      LOG(ERROR) << "File size mismatch in written dex file header! Expected: "
+          << oat_dex_file.dex_file_size_ << " Actual: " << header->file_size_
+          << " Output: " << file->GetPath();
+      return false;
+    }
+    if (header->class_defs_size_ != oat_dex_file.class_offsets_.size()) {
+      LOG(ERROR) << "Class defs size mismatch in written dex file header! Expected: "
+          << oat_dex_file.class_offsets_.size() << " Actual: " << header->class_defs_size_
+          << " Output: " << file->GetPath();
+      return false;
+    }
+
+    // Now, open the dex file.
+    dex_files.emplace_back(DexFile::Open(raw_dex_file,
+                                         oat_dex_file.dex_file_size_,
+                                         oat_dex_file.GetLocation(),
+                                         oat_dex_file.dex_file_location_checksum_,
+                                         /* oat_dex_file */ nullptr,
+                                         verify,
+                                         verify,
+                                         &error_msg));
+    if (dex_files.back() == nullptr) {
+      LOG(ERROR) << "Failed to open dex file from oat file. File: " << oat_dex_file.GetLocation()
+                 << " Error: " << error_msg;
+      return false;
+    }
+  }
+
+  *opened_dex_files_map = std::move(dex_files_map);
+  *opened_dex_files = std::move(dex_files);
+  return true;
+}
+
+bool OatWriter::WriteTypeLookupTables(
+    MemMap* opened_dex_files_map,
+    const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files) {
+  TimingLogger::ScopedTiming split("WriteTypeLookupTables", timings_);
+
+  DCHECK_EQ(opened_dex_files.size(), oat_dex_files_.size());
+  for (size_t i = 0, size = opened_dex_files.size(); i != size; ++i) {
+    OatDexFile* oat_dex_file = &oat_dex_files_[i];
+    if (oat_dex_file->lookup_table_offset_ != 0u) {
+      DCHECK(oat_dex_file->create_type_lookup_table_ == CreateTypeLookupTable::kCreate);
+      DCHECK_NE(oat_dex_file->class_offsets_.size(), 0u);
+      size_t map_offset = oat_dex_files_[0].dex_file_offset_;
+      size_t lookup_table_offset = oat_dex_file->lookup_table_offset_;
+      uint8_t* lookup_table = opened_dex_files_map->Begin() + (lookup_table_offset - map_offset);
+      opened_dex_files[i]->CreateTypeLookupTable(lookup_table);
+    }
+  }
+
+  DCHECK_EQ(opened_dex_files_map == nullptr, opened_dex_files.empty());
+  if (opened_dex_files_map != nullptr && !opened_dex_files_map->Sync()) {
+    PLOG(ERROR) << "Failed to Sync() type lookup tables. Map: " << opened_dex_files_map->GetName();
+    return false;
+  }
+
+  return true;
+}
+
 bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) {
   static const uint8_t kPadding[] = {
       0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
@@ -1419,23 +2261,32 @@
   return true;
 }
 
-std::pair<bool, uint32_t> OatWriter::MethodOffsetMap::FindMethodOffset(MethodReference ref) {
-  auto it = map.find(ref);
-  if (it == map.end()) {
-    return std::pair<bool, uint32_t>(false, 0u);
-  } else {
-    return std::pair<bool, uint32_t>(true, it->second);
+void OatWriter::SetMultiOatRelativePatcherAdjustment() {
+  DCHECK(dex_files_ != nullptr);
+  DCHECK(relative_patcher_ != nullptr);
+  DCHECK_NE(oat_data_offset_, 0u);
+  if (image_writer_ != nullptr && !dex_files_->empty()) {
+    // The oat data begin may not be initialized yet but the oat file offset is ready.
+    size_t oat_index = image_writer_->GetOatIndexForDexFile(dex_files_->front());
+    size_t elf_file_offset = image_writer_->GetOatFileOffset(oat_index);
+    relative_patcher_->StartOatFile(elf_file_offset + oat_data_offset_);
   }
 }
 
-OatWriter::OatDexFile::OatDexFile(size_t offset, const DexFile& dex_file) {
-  offset_ = offset;
-  const std::string& location(dex_file.GetLocation());
-  dex_file_location_size_ = location.size();
-  dex_file_location_data_ = reinterpret_cast<const uint8_t*>(location.data());
-  dex_file_location_checksum_ = dex_file.GetLocationChecksum();
-  dex_file_offset_ = 0;
-  methods_offsets_.resize(dex_file.NumClassDefs());
+OatWriter::OatDexFile::OatDexFile(const char* dex_file_location,
+                                  DexFileSource source,
+                                  CreateTypeLookupTable create_type_lookup_table)
+    : source_(source),
+      create_type_lookup_table_(create_type_lookup_table),
+      dex_file_size_(0),
+      offset_(0),
+      dex_file_location_size_(strlen(dex_file_location)),
+      dex_file_location_data_(dex_file_location),
+      dex_file_location_checksum_(0u),
+      dex_file_offset_(0u),
+      class_offsets_offset_(0u),
+      lookup_table_offset_(0u),
+      class_offsets_() {
 }
 
 size_t OatWriter::OatDexFile::SizeOf() const {
@@ -1443,54 +2294,93 @@
           + dex_file_location_size_
           + sizeof(dex_file_location_checksum_)
           + sizeof(dex_file_offset_)
-          + (sizeof(methods_offsets_[0]) * methods_offsets_.size());
+          + sizeof(class_offsets_offset_)
+          + sizeof(lookup_table_offset_);
 }
 
-void OatWriter::OatDexFile::UpdateChecksum(OatHeader* oat_header) const {
-  oat_header->UpdateChecksum(&dex_file_location_size_, sizeof(dex_file_location_size_));
-  oat_header->UpdateChecksum(dex_file_location_data_, dex_file_location_size_);
-  oat_header->UpdateChecksum(&dex_file_location_checksum_, sizeof(dex_file_location_checksum_));
-  oat_header->UpdateChecksum(&dex_file_offset_, sizeof(dex_file_offset_));
-  oat_header->UpdateChecksum(&methods_offsets_[0],
-                            sizeof(methods_offsets_[0]) * methods_offsets_.size());
+void OatWriter::OatDexFile::ReserveTypeLookupTable(OatWriter* oat_writer) {
+  DCHECK_EQ(lookup_table_offset_, 0u);
+  if (create_type_lookup_table_ == CreateTypeLookupTable::kCreate && !class_offsets_.empty()) {
+    size_t table_size = TypeLookupTable::RawDataLength(class_offsets_.size());
+    if (table_size != 0u) {
+      // Type tables are required to be 4 byte aligned.
+      size_t original_offset = oat_writer->size_;
+      size_t offset = RoundUp(original_offset, 4);
+      oat_writer->size_oat_lookup_table_alignment_ += offset - original_offset;
+      lookup_table_offset_ = offset;
+      oat_writer->size_ = offset + table_size;
+      oat_writer->size_oat_lookup_table_ += table_size;
+    }
+  }
 }
 
-bool OatWriter::OatDexFile::Write(OatWriter* oat_writer,
-                                  OutputStream* out,
-                                  const size_t file_offset) const {
+void OatWriter::OatDexFile::ReserveClassOffsets(OatWriter* oat_writer) {
+  DCHECK_EQ(class_offsets_offset_, 0u);
+  if (!class_offsets_.empty()) {
+    // Class offsets are required to be 4 byte aligned.
+    size_t original_offset = oat_writer->size_;
+    size_t offset = RoundUp(original_offset, 4);
+    oat_writer->size_oat_class_offsets_alignment_ += offset - original_offset;
+    class_offsets_offset_ = offset;
+    oat_writer->size_ = offset + GetClassOffsetsRawSize();
+  }
+}
+
+bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, OutputStream* out) const {
+  const size_t file_offset = oat_writer->oat_data_offset_;
   DCHECK_OFFSET_();
+
   if (!out->WriteFully(&dex_file_location_size_, sizeof(dex_file_location_size_))) {
     PLOG(ERROR) << "Failed to write dex file location length to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_size_ += sizeof(dex_file_location_size_);
+
   if (!out->WriteFully(dex_file_location_data_, dex_file_location_size_)) {
     PLOG(ERROR) << "Failed to write dex file location data to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_data_ += dex_file_location_size_;
+
   if (!out->WriteFully(&dex_file_location_checksum_, sizeof(dex_file_location_checksum_))) {
     PLOG(ERROR) << "Failed to write dex file location checksum to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_checksum_ += sizeof(dex_file_location_checksum_);
+
   if (!out->WriteFully(&dex_file_offset_, sizeof(dex_file_offset_))) {
     PLOG(ERROR) << "Failed to write dex file offset to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_offset_ += sizeof(dex_file_offset_);
-  if (!out->WriteFully(&methods_offsets_[0],
-                      sizeof(methods_offsets_[0]) * methods_offsets_.size())) {
-    PLOG(ERROR) << "Failed to write methods offsets to " << out->GetLocation();
+
+  if (!out->WriteFully(&class_offsets_offset_, sizeof(class_offsets_offset_))) {
+    PLOG(ERROR) << "Failed to write class offsets offset to " << out->GetLocation();
     return false;
   }
-  oat_writer->size_oat_dex_file_methods_offsets_ +=
-      sizeof(methods_offsets_[0]) * methods_offsets_.size();
+  oat_writer->size_oat_dex_file_class_offsets_offset_ += sizeof(class_offsets_offset_);
+
+  if (!out->WriteFully(&lookup_table_offset_, sizeof(lookup_table_offset_))) {
+    PLOG(ERROR) << "Failed to write lookup table offset to " << out->GetLocation();
+    return false;
+  }
+  oat_writer->size_oat_dex_file_lookup_table_offset_ += sizeof(lookup_table_offset_);
+
+  return true;
+}
+
+bool OatWriter::OatDexFile::WriteClassOffsets(OatWriter* oat_writer, OutputStream* out) {
+  if (!out->WriteFully(class_offsets_.data(), GetClassOffsetsRawSize())) {
+    PLOG(ERROR) << "Failed to write oat class offsets for " << GetLocation()
+                << " to " << out->GetLocation();
+    return false;
+  }
+  oat_writer->size_oat_class_offsets_ += GetClassOffsetsRawSize();
   return true;
 }
 
 OatWriter::OatClass::OatClass(size_t offset,
-                              const std::vector<CompiledMethod*>& compiled_methods,
+                              const dchecked_vector<CompiledMethod*>& compiled_methods,
                               uint32_t num_non_null_compiled_methods,
                               mirror::Class::Status status)
     : compiled_methods_(compiled_methods) {
@@ -1520,7 +2410,7 @@
 
   uint32_t oat_method_offsets_offset_from_oat_class = sizeof(type_) + sizeof(status_);
   if (type_ == kOatClassSomeCompiled) {
-    method_bitmap_ = new BitVector(num_methods, false, Allocator::GetMallocAllocator());
+    method_bitmap_.reset(new BitVector(num_methods, false, Allocator::GetMallocAllocator()));
     method_bitmap_size_ = method_bitmap_->GetSizeOf();
     oat_method_offsets_offset_from_oat_class += sizeof(method_bitmap_size_);
     oat_method_offsets_offset_from_oat_class += method_bitmap_size_;
@@ -1543,10 +2433,6 @@
   }
 }
 
-OatWriter::OatClass::~OatClass() {
-  delete method_bitmap_;
-}
-
 size_t OatWriter::OatClass::GetOatMethodOffsetsOffsetFromOatHeader(
     size_t class_def_method_index_) const {
   uint32_t method_offset = GetOatMethodOffsetsOffsetFromOatClass(class_def_method_index_);
@@ -1569,18 +2455,6 @@
           + (sizeof(method_offsets_[0]) * method_offsets_.size());
 }
 
-void OatWriter::OatClass::UpdateChecksum(OatHeader* oat_header) const {
-  oat_header->UpdateChecksum(&status_, sizeof(status_));
-  oat_header->UpdateChecksum(&type_, sizeof(type_));
-  if (method_bitmap_size_ != 0) {
-    CHECK_EQ(kOatClassSomeCompiled, type_);
-    oat_header->UpdateChecksum(&method_bitmap_size_, sizeof(method_bitmap_size_));
-    oat_header->UpdateChecksum(method_bitmap_->GetRawStorage(), method_bitmap_size_);
-  }
-  oat_header->UpdateChecksum(&method_offsets_[0],
-                             sizeof(method_offsets_[0]) * method_offsets_.size());
-}
-
 bool OatWriter::OatClass::Write(OatWriter* oat_writer,
                                 OutputStream* out,
                                 const size_t file_offset) const {
@@ -1590,11 +2464,13 @@
     return false;
   }
   oat_writer->size_oat_class_status_ += sizeof(status_);
+
   if (!out->WriteFully(&type_, sizeof(type_))) {
     PLOG(ERROR) << "Failed to write oat class type to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_class_type_ += sizeof(type_);
+
   if (method_bitmap_size_ != 0) {
     CHECK_EQ(kOatClassSomeCompiled, type_);
     if (!out->WriteFully(&method_bitmap_size_, sizeof(method_bitmap_size_))) {
@@ -1602,18 +2478,19 @@
       return false;
     }
     oat_writer->size_oat_class_method_bitmaps_ += sizeof(method_bitmap_size_);
+
     if (!out->WriteFully(method_bitmap_->GetRawStorage(), method_bitmap_size_)) {
       PLOG(ERROR) << "Failed to write method bitmap to " << out->GetLocation();
       return false;
     }
     oat_writer->size_oat_class_method_bitmaps_ += method_bitmap_size_;
   }
-  if (!out->WriteFully(&method_offsets_[0],
-                      sizeof(method_offsets_[0]) * method_offsets_.size())) {
+
+  if (!out->WriteFully(method_offsets_.data(), GetMethodOffsetsRawSize())) {
     PLOG(ERROR) << "Failed to write method offsets to " << out->GetLocation();
     return false;
   }
-  oat_writer->size_oat_class_method_offsets_ += sizeof(method_offsets_[0]) * method_offsets_.size();
+  oat_writer->size_oat_class_method_offsets_ += GetMethodOffsetsRawSize();
   return true;
 }
 
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index d6cb65b..decb7db 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -21,12 +21,15 @@
 #include <cstddef>
 #include <memory>
 
+#include "base/dchecked_vector.h"
 #include "linker/relative_patcher.h"  // For linker::RelativePatcherTargetProvider.
 #include "mem_map.h"
 #include "method_reference.h"
-#include "oat.h"
 #include "mirror/class.h"
+#include "oat.h"
+#include "os.h"
 #include "safe_map.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
@@ -36,6 +39,16 @@
 class ImageWriter;
 class OutputStream;
 class TimingLogger;
+class TypeLookupTable;
+class ZipEntry;
+
+namespace debug {
+struct MethodDebugInfo;
+}  // namespace debug
+
+namespace linker {
+class MultiOatRelativePatcher;
+}  // namespace linker
 
 // OatHeader         variable length with count of D OatDexFiles
 //
@@ -49,6 +62,16 @@
 // ...
 // Dex[D]
 //
+// TypeLookupTable[0] one descriptor to class def index hash table for each OatDexFile.
+// TypeLookupTable[1]
+// ...
+// TypeLookupTable[D]
+//
+// ClassOffsets[0]   one table of OatClass offsets for each class def for each OatDexFile.
+// ClassOffsets[1]
+// ...
+// ClassOffsets[D]
+//
 // OatClass[0]       one variable sized OatClass for each of C DexFile::ClassDefs
 // OatClass[1]       contains OatClass entries with class status, offsets to code, etc.
 // ...
@@ -81,14 +104,69 @@
 //
 class OatWriter {
  public:
-  OatWriter(const std::vector<const DexFile*>& dex_files,
-            uint32_t image_file_location_oat_checksum,
-            uintptr_t image_file_location_oat_begin,
-            int32_t image_patch_delta,
-            const CompilerDriver* compiler,
-            ImageWriter* image_writer,
-            TimingLogger* timings,
-            SafeMap<std::string, std::string>* key_value_store);
+  enum class CreateTypeLookupTable {
+    kCreate,
+    kDontCreate,
+    kDefault = kCreate
+  };
+
+  OatWriter(bool compiling_boot_image, TimingLogger* timings);
+
+  // To produce a valid oat file, the user must first add sources with any combination of
+  //   - AddDexFileSource(),
+  //   - AddZippedDexFilesSource(),
+  //   - AddRawDexFileSource().
+  // Then the user must call in order
+  //   - WriteAndOpenDexFiles()
+  //   - PrepareLayout(),
+  //   - WriteRodata(),
+  //   - WriteCode(),
+  //   - WriteHeader().
+
+  // Add dex file source(s) from a file, either a plain dex file or
+  // a zip file with one or more dex files.
+  bool AddDexFileSource(
+      const char* filename,
+      const char* location,
+      CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+  // Add dex file source(s) from a zip file specified by a file handle.
+  bool AddZippedDexFilesSource(
+      File&& zip_fd,
+      const char* location,
+      CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+  // Add dex file source from raw memory.
+  bool AddRawDexFileSource(
+      const ArrayRef<const uint8_t>& data,
+      const char* location,
+      uint32_t location_checksum,
+      CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+  dchecked_vector<const char*> GetSourceLocations() const;
+
+  // Write raw dex files to the .rodata section and open them from the oat file. The verify
+  // setting dictates whether the dex file verifier should check the dex files. This is generally
+  // the case, and should only be false for tests.
+  bool WriteAndOpenDexFiles(OutputStream* rodata,
+                            File* file,
+                            InstructionSet instruction_set,
+                            const InstructionSetFeatures* instruction_set_features,
+                            SafeMap<std::string, std::string>* key_value_store,
+                            bool verify,
+                            /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+                            /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files);
+  // Prepare layout of remaining data.
+  void PrepareLayout(const CompilerDriver* compiler,
+                     ImageWriter* image_writer,
+                     const std::vector<const DexFile*>& dex_files,
+                     linker::MultiOatRelativePatcher* relative_patcher);
+  // Write the rest of .rodata section (ClassOffsets[], OatClass[], maps).
+  bool WriteRodata(OutputStream* out);
+  // Write the code to the .text section.
+  bool WriteCode(OutputStream* out);
+  // Write the oat header. This finalizes the oat file.
+  bool WriteHeader(OutputStream* out,
+                   uint32_t image_file_location_oat_checksum,
+                   uintptr_t image_file_location_oat_begin,
+                   int32_t image_patch_delta);
 
   // Returns whether the oat file has an associated image.
   bool HasImage() const {
@@ -97,6 +175,10 @@
     return image_writer_ != nullptr;
   }
 
+  bool HasBootImage() const {
+    return compiling_boot_image_;
+  }
+
   const OatHeader& GetOatHeader() const {
     return *oat_header_;
   }
@@ -109,29 +191,22 @@
     return bss_size_;
   }
 
-  const std::vector<uintptr_t>& GetAbsolutePatchLocations() const {
-    return absolute_patch_locations_;
+  size_t GetOatDataOffset() const {
+    return oat_data_offset_;
   }
 
-  bool WriteRodata(OutputStream* out);
-  bool WriteCode(OutputStream* out);
+  ArrayRef<const uintptr_t> GetAbsolutePatchLocations() const {
+    return ArrayRef<const uintptr_t>(absolute_patch_locations_);
+  }
 
   ~OatWriter();
 
-  struct DebugInfo {
-    const DexFile* dex_file_;
-    size_t class_def_index_;
-    uint32_t dex_method_index_;
-    uint32_t access_flags_;
-    const DexFile::CodeItem *code_item_;
-    bool deduped_;
-    uint32_t low_pc_;
-    uint32_t high_pc_;
-    CompiledMethod* compiled_method_;
-  };
+  void AddMethodDebugInfos(const std::vector<debug::MethodDebugInfo>& infos) {
+    method_info_.insert(method_info_.end(), infos.begin(), infos.end());
+  }
 
-  const std::vector<DebugInfo>& GetMethodDebugInfo() const {
-    return method_info_;
+  ArrayRef<const debug::MethodDebugInfo> GetMethodDebugInfo() const {
+    return ArrayRef<const debug::MethodDebugInfo>(method_info_);
   }
 
   const CompilerDriver* GetCompilerDriver() {
@@ -139,12 +214,9 @@
   }
 
  private:
-  // The DataAccess classes are helper classes that provide access to members related to
-  // a given map, i.e. GC map, mapping table or vmap table. By abstracting these away
-  // we can share a lot of code for processing the maps with template classes below.
-  struct GcMapDataAccess;
-  struct MappingTableDataAccess;
-  struct VmapTableDataAccess;
+  class DexFileSource;
+  class OatClass;
+  class OatDexFile;
 
   // The function VisitDexMethods() below iterates through all the methods in all
   // the compiled dex files in order of their definitions. The method visitor
@@ -155,120 +227,76 @@
   class OatDexMethodVisitor;
   class InitOatClassesMethodVisitor;
   class InitCodeMethodVisitor;
-  template <typename DataAccess>
   class InitMapMethodVisitor;
   class InitImageMethodVisitor;
   class WriteCodeMethodVisitor;
-  template <typename DataAccess>
   class WriteMapMethodVisitor;
 
   // Visit all the methods in all the compiled dex files in their definition order
   // with a given DexMethodVisitor.
   bool VisitDexMethods(DexMethodVisitor* visitor);
 
-  size_t InitOatHeader();
+  size_t InitOatHeader(InstructionSet instruction_set,
+                       const InstructionSetFeatures* instruction_set_features,
+                       uint32_t num_dex_files,
+                       SafeMap<std::string, std::string>* key_value_store);
   size_t InitOatDexFiles(size_t offset);
-  size_t InitDexFiles(size_t offset);
   size_t InitOatClasses(size_t offset);
   size_t InitOatMaps(size_t offset);
-  size_t InitOatCode(size_t offset)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  size_t InitOatCodeDexFiles(size_t offset)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+  size_t InitOatCode(size_t offset);
+  size_t InitOatCodeDexFiles(size_t offset);
 
-  bool WriteTables(OutputStream* out, const size_t file_offset);
+  bool WriteClassOffsets(OutputStream* out);
+  bool WriteClasses(OutputStream* out);
   size_t WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset);
   size_t WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset);
   size_t WriteCodeDexFiles(OutputStream* out, const size_t file_offset, size_t relative_offset);
 
+  bool RecordOatDataOffset(OutputStream* out);
+  bool ReadDexFileHeader(File* file, OatDexFile* oat_dex_file);
+  bool ValidateDexFileHeader(const uint8_t* raw_header, const char* location);
+  bool WriteDexFiles(OutputStream* rodata, File* file);
+  bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file);
+  bool SeekToDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file);
+  bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file, ZipEntry* dex_file);
+  bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file, File* dex_file);
+  bool WriteDexFile(OutputStream* rodata, OatDexFile* oat_dex_file, const uint8_t* dex_file);
+  bool WriteOatDexFiles(OutputStream* rodata);
+  bool ExtendForTypeLookupTables(OutputStream* rodata, File* file, size_t offset);
+  bool OpenDexFiles(File* file,
+                    bool verify,
+                    /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+                    /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files);
+  bool WriteTypeLookupTables(MemMap* opened_dex_files_map,
+                             const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files);
   bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
+  void SetMultiOatRelativePatcherAdjustment();
 
-  class OatDexFile {
-   public:
-    OatDexFile(size_t offset, const DexFile& dex_file);
-    size_t SizeOf() const;
-    void UpdateChecksum(OatHeader* oat_header) const;
-    bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const;
-
-    // Offset of start of OatDexFile from beginning of OatHeader. It is
-    // used to validate file position when writing.
-    size_t offset_;
-
-    // data to write
-    uint32_t dex_file_location_size_;
-    const uint8_t* dex_file_location_data_;
-    uint32_t dex_file_location_checksum_;
-    uint32_t dex_file_offset_;
-    std::vector<uint32_t> methods_offsets_;
-
-   private:
-    DISALLOW_COPY_AND_ASSIGN(OatDexFile);
+  enum class WriteState {
+    kAddingDexFileSources,
+    kPrepareLayout,
+    kWriteRoData,
+    kWriteText,
+    kWriteHeader,
+    kDone
   };
 
-  class OatClass {
-   public:
-    OatClass(size_t offset,
-             const std::vector<CompiledMethod*>& compiled_methods,
-             uint32_t num_non_null_compiled_methods,
-             mirror::Class::Status status);
-    ~OatClass();
-    size_t GetOatMethodOffsetsOffsetFromOatHeader(size_t class_def_method_index_) const;
-    size_t GetOatMethodOffsetsOffsetFromOatClass(size_t class_def_method_index_) const;
-    size_t SizeOf() const;
-    void UpdateChecksum(OatHeader* oat_header) const;
-    bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const;
+  WriteState write_state_;
+  TimingLogger* timings_;
 
-    CompiledMethod* GetCompiledMethod(size_t class_def_method_index) const {
-      DCHECK_LT(class_def_method_index, compiled_methods_.size());
-      return compiled_methods_[class_def_method_index];
-    }
+  std::vector<std::unique_ptr<File>> raw_dex_files_;
+  std::vector<std::unique_ptr<ZipArchive>> zip_archives_;
+  std::vector<std::unique_ptr<ZipEntry>> zipped_dex_files_;
 
-    // Offset of start of OatClass from beginning of OatHeader. It is
-    // used to validate file position when writing.
-    size_t offset_;
+  // Using std::list<> which doesn't move elements around on push/emplace_back().
+  // We need this because we keep plain pointers to the strings' c_str().
+  std::list<std::string> zipped_dex_file_locations_;
 
-    // CompiledMethods for each class_def_method_index, or null if no method is available.
-    std::vector<CompiledMethod*> compiled_methods_;
+  dchecked_vector<debug::MethodDebugInfo> method_info_;
 
-    // Offset from OatClass::offset_ to the OatMethodOffsets for the
-    // class_def_method_index. If 0, it means the corresponding
-    // CompiledMethod entry in OatClass::compiled_methods_ should be
-    // null and that the OatClass::type_ should be kOatClassBitmap.
-    std::vector<uint32_t> oat_method_offsets_offsets_from_oat_class_;
-
-    // data to write
-
-    static_assert(mirror::Class::Status::kStatusMax < (2 ^ 16), "class status won't fit in 16bits");
-    int16_t status_;
-
-    static_assert(OatClassType::kOatClassMax < (2 ^ 16), "oat_class type won't fit in 16bits");
-    uint16_t type_;
-
-    uint32_t method_bitmap_size_;
-
-    // bit vector indexed by ClassDef method index. When
-    // OatClassType::type_ is kOatClassBitmap, a set bit indicates the
-    // method has an OatMethodOffsets in methods_offsets_, otherwise
-    // the entry was ommited to save space. If OatClassType::type_ is
-    // not is kOatClassBitmap, the bitmap will be null.
-    BitVector* method_bitmap_;
-
-    // OatMethodOffsets and OatMethodHeaders for each CompiledMethod
-    // present in the OatClass. Note that some may be missing if
-    // OatClass::compiled_methods_ contains null values (and
-    // oat_method_offsets_offsets_from_oat_class_ should contain 0
-    // values in this case).
-    std::vector<OatMethodOffsets> method_offsets_;
-    std::vector<OatQuickMethodHeader> method_headers_;
-
-   private:
-    DISALLOW_COPY_AND_ASSIGN(OatClass);
-  };
-
-  std::vector<DebugInfo> method_info_;
-
-  const CompilerDriver* const compiler_driver_;
-  ImageWriter* const image_writer_;
+  const CompilerDriver* compiler_driver_;
+  ImageWriter* image_writer_;
+  const bool compiling_boot_image_;
 
   // note OatFile does not take ownership of the DexFiles
   const std::vector<const DexFile*>* dex_files_;
@@ -286,16 +314,10 @@
   // Offset of the oat data from the start of the mmapped region of the elf file.
   size_t oat_data_offset_;
 
-  // dependencies on the image.
-  uint32_t image_file_location_oat_checksum_;
-  uintptr_t image_file_location_oat_begin_;
-  int32_t image_patch_delta_;
-
   // data to write
-  SafeMap<std::string, std::string>* key_value_store_;
-  OatHeader* oat_header_;
-  std::vector<OatDexFile*> oat_dex_files_;
-  std::vector<OatClass*> oat_classes_;
+  std::unique_ptr<OatHeader> oat_header_;
+  dchecked_vector<OatDexFile> oat_dex_files_;
+  dchecked_vector<OatClass> oat_classes_;
   std::unique_ptr<const std::vector<uint8_t>> jni_dlsym_lookup_;
   std::unique_ptr<const std::vector<uint8_t>> quick_generic_jni_trampoline_;
   std::unique_ptr<const std::vector<uint8_t>> quick_imt_conflict_trampoline_;
@@ -321,32 +343,27 @@
   uint32_t size_code_alignment_;
   uint32_t size_relative_call_thunks_;
   uint32_t size_misc_thunks_;
-  uint32_t size_mapping_table_;
   uint32_t size_vmap_table_;
-  uint32_t size_gc_map_;
   uint32_t size_oat_dex_file_location_size_;
   uint32_t size_oat_dex_file_location_data_;
   uint32_t size_oat_dex_file_location_checksum_;
   uint32_t size_oat_dex_file_offset_;
-  uint32_t size_oat_dex_file_methods_offsets_;
+  uint32_t size_oat_dex_file_class_offsets_offset_;
+  uint32_t size_oat_dex_file_lookup_table_offset_;
+  uint32_t size_oat_lookup_table_alignment_;
+  uint32_t size_oat_lookup_table_;
+  uint32_t size_oat_class_offsets_alignment_;
+  uint32_t size_oat_class_offsets_;
   uint32_t size_oat_class_type_;
   uint32_t size_oat_class_status_;
   uint32_t size_oat_class_method_bitmaps_;
   uint32_t size_oat_class_method_offsets_;
 
-  std::unique_ptr<linker::RelativePatcher> relative_patcher_;
+  // The helper for processing relative patches is external so that we can patch across oat files.
+  linker::MultiOatRelativePatcher* relative_patcher_;
 
   // The locations of absolute patches relative to the start of the executable section.
-  std::vector<uintptr_t> absolute_patch_locations_;
-
-  // Map method reference to assigned offset.
-  // Wrap the map in a class implementing linker::RelativePatcherTargetProvider.
-  class MethodOffsetMap FINAL : public linker::RelativePatcherTargetProvider {
-   public:
-    std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) OVERRIDE;
-    SafeMap<MethodReference, uint32_t, MethodReferenceComparator> map;
-  };
-  MethodOffsetMap method_offset_map_;
+  dchecked_vector<uintptr_t> absolute_patch_locations_;
 
   DISALLOW_COPY_AND_ASSIGN(OatWriter);
 };
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
new file mode 100644
index 0000000..5e70a82
--- /dev/null
+++ b/compiler/optimizing/block_builder.cc
@@ -0,0 +1,370 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "block_builder.h"
+
+#include "bytecode_utils.h"
+
+namespace art {
+
+HBasicBlock* HBasicBlockBuilder::MaybeCreateBlockAt(uint32_t dex_pc) {
+  return MaybeCreateBlockAt(dex_pc, dex_pc);
+}
+
+HBasicBlock* HBasicBlockBuilder::MaybeCreateBlockAt(uint32_t semantic_dex_pc,
+                                                    uint32_t store_dex_pc) {
+  HBasicBlock* block = branch_targets_[store_dex_pc];
+  if (block == nullptr) {
+    block = new (arena_) HBasicBlock(graph_, semantic_dex_pc);
+    branch_targets_[store_dex_pc] = block;
+  }
+  DCHECK_EQ(block->GetDexPc(), semantic_dex_pc);
+  return block;
+}
+
+bool HBasicBlockBuilder::CreateBranchTargets() {
+  // Create the first block for the dex instructions, single successor of the entry block.
+  MaybeCreateBlockAt(0u);
+
+  if (code_item_.tries_size_ != 0) {
+    // Create branch targets at the start/end of the TryItem range. These are
+    // places where the program might fall through into/out of the a block and
+    // where TryBoundary instructions will be inserted later. Other edges which
+    // enter/exit the try blocks are a result of branches/switches.
+    for (size_t idx = 0; idx < code_item_.tries_size_; ++idx) {
+      const DexFile::TryItem* try_item = DexFile::GetTryItems(code_item_, idx);
+      uint32_t dex_pc_start = try_item->start_addr_;
+      uint32_t dex_pc_end = dex_pc_start + try_item->insn_count_;
+      MaybeCreateBlockAt(dex_pc_start);
+      if (dex_pc_end < code_item_.insns_size_in_code_units_) {
+        // TODO: Do not create block if the last instruction cannot fall through.
+        MaybeCreateBlockAt(dex_pc_end);
+      } else if (dex_pc_end == code_item_.insns_size_in_code_units_) {
+        // The TryItem spans until the very end of the CodeItem and therefore
+        // cannot have any code afterwards.
+      } else {
+        // The TryItem spans beyond the end of the CodeItem. This is invalid code.
+        return false;
+      }
+    }
+
+    // Create branch targets for exception handlers.
+    const uint8_t* handlers_ptr = DexFile::GetCatchHandlerData(code_item_, 0);
+    uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr);
+    for (uint32_t idx = 0; idx < handlers_size; ++idx) {
+      CatchHandlerIterator iterator(handlers_ptr);
+      for (; iterator.HasNext(); iterator.Next()) {
+        MaybeCreateBlockAt(iterator.GetHandlerAddress());
+      }
+      handlers_ptr = iterator.EndDataPointer();
+    }
+  }
+
+  // Iterate over all instructions and find branching instructions. Create blocks for
+  // the locations these instructions branch to.
+  for (CodeItemIterator it(code_item_); !it.Done(); it.Advance()) {
+    uint32_t dex_pc = it.CurrentDexPc();
+    const Instruction& instruction = it.CurrentInstruction();
+
+    if (instruction.IsBranch()) {
+      number_of_branches_++;
+      MaybeCreateBlockAt(dex_pc + instruction.GetTargetOffset());
+    } else if (instruction.IsSwitch()) {
+      DexSwitchTable table(instruction, dex_pc);
+      for (DexSwitchTableIterator s_it(table); !s_it.Done(); s_it.Advance()) {
+        MaybeCreateBlockAt(dex_pc + s_it.CurrentTargetOffset());
+
+        // Create N-1 blocks where we will insert comparisons of the input value
+        // against the Switch's case keys.
+        if (table.ShouldBuildDecisionTree() && !s_it.IsLast()) {
+          // Store the block under dex_pc of the current key at the switch data
+          // instruction for uniqueness but give it the dex_pc of the SWITCH
+          // instruction which it semantically belongs to.
+          MaybeCreateBlockAt(dex_pc, s_it.GetDexPcForCurrentIndex());
+        }
+      }
+    } else if (instruction.Opcode() == Instruction::MOVE_EXCEPTION) {
+      // End the basic block after MOVE_EXCEPTION. This simplifies the later
+      // stage of TryBoundary-block insertion.
+    } else {
+      continue;
+    }
+
+    if (instruction.CanFlowThrough()) {
+      if (it.IsLast()) {
+        // In the normal case we should never hit this but someone can artificially forge a dex
+        // file to fall-through out the method code. In this case we bail out compilation.
+        return false;
+      } else {
+        MaybeCreateBlockAt(dex_pc + it.CurrentInstruction().SizeInCodeUnits());
+      }
+    }
+  }
+
+  return true;
+}
+
+void HBasicBlockBuilder::ConnectBasicBlocks() {
+  HBasicBlock* block = graph_->GetEntryBlock();
+  graph_->AddBlock(block);
+
+  bool is_throwing_block = false;
+  for (CodeItemIterator it(code_item_); !it.Done(); it.Advance()) {
+    uint32_t dex_pc = it.CurrentDexPc();
+
+    // Check if this dex_pc address starts a new basic block.
+    HBasicBlock* next_block = GetBlockAt(dex_pc);
+    if (next_block != nullptr) {
+      if (block != nullptr) {
+        // Last instruction did not end its basic block but a new one starts here.
+        // It must have been a block falling through into the next one.
+        block->AddSuccessor(next_block);
+      }
+      block = next_block;
+      is_throwing_block = false;
+      graph_->AddBlock(block);
+    }
+
+    if (block == nullptr) {
+      // Ignore dead code.
+      continue;
+    }
+
+    const Instruction& instruction = it.CurrentInstruction();
+
+    if (!is_throwing_block && IsThrowingDexInstruction(instruction)) {
+      DCHECK(!ContainsElement(throwing_blocks_, block));
+      is_throwing_block = true;
+      throwing_blocks_.push_back(block);
+    }
+
+    if (instruction.IsBranch()) {
+      uint32_t target_dex_pc = dex_pc + instruction.GetTargetOffset();
+      block->AddSuccessor(GetBlockAt(target_dex_pc));
+    } else if (instruction.IsReturn() || (instruction.Opcode() == Instruction::THROW)) {
+      block->AddSuccessor(graph_->GetExitBlock());
+    } else if (instruction.IsSwitch()) {
+      DexSwitchTable table(instruction, dex_pc);
+      for (DexSwitchTableIterator s_it(table); !s_it.Done(); s_it.Advance()) {
+        uint32_t target_dex_pc = dex_pc + s_it.CurrentTargetOffset();
+        block->AddSuccessor(GetBlockAt(target_dex_pc));
+
+        if (table.ShouldBuildDecisionTree() && !s_it.IsLast()) {
+          uint32_t next_case_dex_pc = s_it.GetDexPcForCurrentIndex();
+          HBasicBlock* next_case_block = GetBlockAt(next_case_dex_pc);
+          block->AddSuccessor(next_case_block);
+          block = next_case_block;
+          graph_->AddBlock(block);
+        }
+      }
+    } else {
+      // Remaining code only applies to instructions which end their basic block.
+      continue;
+    }
+
+    if (instruction.CanFlowThrough()) {
+      uint32_t next_dex_pc = dex_pc + instruction.SizeInCodeUnits();
+      block->AddSuccessor(GetBlockAt(next_dex_pc));
+    }
+
+    // The basic block ends here. Do not add any more instructions.
+    block = nullptr;
+  }
+
+  graph_->AddBlock(graph_->GetExitBlock());
+}
+
+// Returns the TryItem stored for `block` or nullptr if there is no info for it.
+static const DexFile::TryItem* GetTryItem(
+    HBasicBlock* block,
+    const ArenaSafeMap<uint32_t, const DexFile::TryItem*>& try_block_info) {
+  auto iterator = try_block_info.find(block->GetBlockId());
+  return (iterator == try_block_info.end()) ? nullptr : iterator->second;
+}
+
+// Iterates over the exception handlers of `try_item`, finds the corresponding
+// catch blocks and makes them successors of `try_boundary`. The order of
+// successors matches the order in which runtime exception delivery searches
+// for a handler.
+static void LinkToCatchBlocks(HTryBoundary* try_boundary,
+                              const DexFile::CodeItem& code_item,
+                              const DexFile::TryItem* try_item,
+                              const ArenaSafeMap<uint32_t, HBasicBlock*>& catch_blocks) {
+  for (CatchHandlerIterator it(code_item, *try_item); it.HasNext(); it.Next()) {
+    try_boundary->AddExceptionHandler(catch_blocks.Get(it.GetHandlerAddress()));
+  }
+}
+
+bool HBasicBlockBuilder::MightHaveLiveNormalPredecessors(HBasicBlock* catch_block) {
+  if (kIsDebugBuild) {
+    DCHECK_NE(catch_block->GetDexPc(), kNoDexPc) << "Should not be called on synthetic blocks";
+    DCHECK(!graph_->GetEntryBlock()->GetSuccessors().empty())
+        << "Basic blocks must have been created and connected";
+    for (HBasicBlock* predecessor : catch_block->GetPredecessors()) {
+      DCHECK(!predecessor->IsSingleTryBoundary())
+          << "TryBoundary blocks must not have not been created yet";
+    }
+  }
+
+  const Instruction& first = GetDexInstructionAt(code_item_, catch_block->GetDexPc());
+  if (first.Opcode() == Instruction::MOVE_EXCEPTION) {
+    // Verifier guarantees that if a catch block begins with MOVE_EXCEPTION then
+    // it has no live normal predecessors.
+    return false;
+  } else if (catch_block->GetPredecessors().empty()) {
+    // Normal control-flow edges have already been created. Since block's list of
+    // predecessors is empty, it cannot have any live or dead normal predecessors.
+    return false;
+  }
+
+  // The catch block has normal predecessors but we do not know which are live
+  // and which will be removed during the initial DCE. Return `true` to signal
+  // that it may have live normal predecessors.
+  return true;
+}
+
+void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
+  if (code_item_.tries_size_ == 0) {
+    return;
+  }
+
+  // Keep a map of all try blocks and their respective TryItems. We do not use
+  // the block's pointer but rather its id to ensure deterministic iteration.
+  ArenaSafeMap<uint32_t, const DexFile::TryItem*> try_block_info(
+      std::less<uint32_t>(), arena_->Adapter(kArenaAllocGraphBuilder));
+
+  // Obtain TryItem information for blocks with throwing instructions, and split
+  // blocks which are both try & catch to simplify the graph.
+  for (HBasicBlock* block : graph_->GetBlocks()) {
+    if (block->GetDexPc() == kNoDexPc) {
+      continue;
+    }
+
+    // Do not bother creating exceptional edges for try blocks which have no
+    // throwing instructions. In that case we simply assume that the block is
+    // not covered by a TryItem. This prevents us from creating a throw-catch
+    // loop for synchronized blocks.
+    if (ContainsElement(throwing_blocks_, block)) {
+      // Try to find a TryItem covering the block.
+      const int32_t try_item_idx = DexFile::FindTryItem(code_item_, block->GetDexPc());
+      if (try_item_idx != -1) {
+        // Block throwing and in a TryItem. Store the try block information.
+        try_block_info.Put(block->GetBlockId(), DexFile::GetTryItems(code_item_, try_item_idx));
+      }
+    }
+  }
+
+  // Map from a handler dex_pc to the corresponding catch block.
+  ArenaSafeMap<uint32_t, HBasicBlock*> catch_blocks(
+      std::less<uint32_t>(), arena_->Adapter(kArenaAllocGraphBuilder));
+
+  // Iterate over catch blocks, create artifical landing pads if necessary to
+  // simplify the CFG, and set metadata.
+  const uint8_t* handlers_ptr = DexFile::GetCatchHandlerData(code_item_, 0);
+  uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr);
+  for (uint32_t idx = 0; idx < handlers_size; ++idx) {
+    CatchHandlerIterator iterator(handlers_ptr);
+    for (; iterator.HasNext(); iterator.Next()) {
+      uint32_t address = iterator.GetHandlerAddress();
+      if (catch_blocks.find(address) != catch_blocks.end()) {
+        // Catch block already processed.
+        continue;
+      }
+
+      // Check if we should create an artifical landing pad for the catch block.
+      // We create one if the catch block is also a try block because we do not
+      // have a strategy for inserting TryBoundaries on exceptional edges.
+      // We also create one if the block might have normal predecessors so as to
+      // simplify register allocation.
+      HBasicBlock* catch_block = GetBlockAt(address);
+      bool is_try_block = (try_block_info.find(catch_block->GetBlockId()) != try_block_info.end());
+      if (is_try_block || MightHaveLiveNormalPredecessors(catch_block)) {
+        HBasicBlock* new_catch_block = new (arena_) HBasicBlock(graph_, address);
+        new_catch_block->AddInstruction(new (arena_) HGoto(address));
+        new_catch_block->AddSuccessor(catch_block);
+        graph_->AddBlock(new_catch_block);
+        catch_block = new_catch_block;
+      }
+
+      catch_blocks.Put(address, catch_block);
+      catch_block->SetTryCatchInformation(
+        new (arena_) TryCatchInformation(iterator.GetHandlerTypeIndex(), *dex_file_));
+    }
+    handlers_ptr = iterator.EndDataPointer();
+  }
+
+  // Do a pass over the try blocks and insert entering TryBoundaries where at
+  // least one predecessor is not covered by the same TryItem as the try block.
+  // We do not split each edge separately, but rather create one boundary block
+  // that all predecessors are relinked to. This preserves loop headers (b/23895756).
+  for (auto entry : try_block_info) {
+    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
+    for (HBasicBlock* predecessor : try_block->GetPredecessors()) {
+      if (GetTryItem(predecessor, try_block_info) != entry.second) {
+        // Found a predecessor not covered by the same TryItem. Insert entering
+        // boundary block.
+        HTryBoundary* try_entry =
+            new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kEntry, try_block->GetDexPc());
+        try_block->CreateImmediateDominator()->AddInstruction(try_entry);
+        LinkToCatchBlocks(try_entry, code_item_, entry.second, catch_blocks);
+        break;
+      }
+    }
+  }
+
+  // Do a second pass over the try blocks and insert exit TryBoundaries where
+  // the successor is not in the same TryItem.
+  for (auto entry : try_block_info) {
+    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
+    // NOTE: Do not use iterators because SplitEdge would invalidate them.
+    for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) {
+      HBasicBlock* successor = try_block->GetSuccessors()[i];
+
+      // If the successor is a try block, all of its predecessors must be
+      // covered by the same TryItem. Otherwise the previous pass would have
+      // created a non-throwing boundary block.
+      if (GetTryItem(successor, try_block_info) != nullptr) {
+        DCHECK_EQ(entry.second, GetTryItem(successor, try_block_info));
+        continue;
+      }
+
+      // Insert TryBoundary and link to catch blocks.
+      HTryBoundary* try_exit =
+          new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kExit, successor->GetDexPc());
+      graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit);
+      LinkToCatchBlocks(try_exit, code_item_, entry.second, catch_blocks);
+    }
+  }
+}
+
+bool HBasicBlockBuilder::Build() {
+  DCHECK(graph_->GetBlocks().empty());
+
+  graph_->SetEntryBlock(new (arena_) HBasicBlock(graph_, kNoDexPc));
+  graph_->SetExitBlock(new (arena_) HBasicBlock(graph_, kNoDexPc));
+
+  // TODO(dbrazdil): Do CreateBranchTargets and ConnectBasicBlocks in one pass.
+  if (!CreateBranchTargets()) {
+    return false;
+  }
+
+  ConnectBasicBlocks();
+  InsertTryBoundaryBlocks();
+
+  return true;
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h
new file mode 100644
index 0000000..1be0b4c
--- /dev/null
+++ b/compiler/optimizing/block_builder.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_
+#define ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_
+
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
+#include "dex_file.h"
+#include "nodes.h"
+
+namespace art {
+
+class HBasicBlockBuilder : public ValueObject {
+ public:
+  HBasicBlockBuilder(HGraph* graph,
+                     const DexFile* const dex_file,
+                     const DexFile::CodeItem& code_item)
+      : arena_(graph->GetArena()),
+        graph_(graph),
+        dex_file_(dex_file),
+        code_item_(code_item),
+        branch_targets_(code_item.insns_size_in_code_units_,
+                        nullptr,
+                        arena_->Adapter(kArenaAllocGraphBuilder)),
+        throwing_blocks_(kDefaultNumberOfThrowingBlocks, arena_->Adapter(kArenaAllocGraphBuilder)),
+        number_of_branches_(0u) {}
+
+  // Creates basic blocks in `graph_` at branch target dex_pc positions of the
+  // `code_item_`. Blocks are connected but left unpopulated with instructions.
+  // TryBoundary blocks are inserted at positions where control-flow enters/
+  // exits a try block.
+  bool Build();
+
+  size_t GetNumberOfBranches() const { return number_of_branches_; }
+  HBasicBlock* GetBlockAt(uint32_t dex_pc) const { return branch_targets_[dex_pc]; }
+
+ private:
+  // Creates a basic block starting at given `dex_pc`.
+  HBasicBlock* MaybeCreateBlockAt(uint32_t dex_pc);
+
+  // Creates a basic block for bytecode instructions at `semantic_dex_pc` and
+  // stores it under the `store_dex_pc` key. This is used when multiple blocks
+  // share the same semantic dex_pc, e.g. when building switch decision trees.
+  HBasicBlock* MaybeCreateBlockAt(uint32_t semantic_dex_pc, uint32_t store_dex_pc);
+
+  bool CreateBranchTargets();
+  void ConnectBasicBlocks();
+  void InsertTryBoundaryBlocks();
+
+  // Helper method which decides whether `catch_block` may have live normal
+  // predecessors and thus whether a synthetic catch block needs to be created
+  // to avoid mixing normal and exceptional predecessors.
+  // Should only be called during InsertTryBoundaryBlocks on blocks at catch
+  // handler dex_pcs.
+  bool MightHaveLiveNormalPredecessors(HBasicBlock* catch_block);
+
+  ArenaAllocator* const arena_;
+  HGraph* const graph_;
+
+  const DexFile* const dex_file_;
+  const DexFile::CodeItem& code_item_;
+
+  ArenaVector<HBasicBlock*> branch_targets_;
+  ArenaVector<HBasicBlock*> throwing_blocks_;
+  size_t number_of_branches_;
+
+  static constexpr size_t kDefaultNumberOfThrowingBlocks = 2u;
+
+  DISALLOW_COPY_AND_ASSIGN(HBasicBlockBuilder);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
deleted file mode 100644
index f985745..0000000
--- a/compiler/optimizing/boolean_simplifier.cc
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "boolean_simplifier.h"
-
-namespace art {
-
-void HBooleanSimplifier::TryRemovingNegatedCondition(HBasicBlock* block) {
-  DCHECK(block->EndsWithIf());
-
-  // Check if the condition is a Boolean negation.
-  HIf* if_instruction = block->GetLastInstruction()->AsIf();
-  HInstruction* boolean_not = if_instruction->InputAt(0);
-  if (!boolean_not->IsBooleanNot()) {
-    return;
-  }
-
-  // Make BooleanNot's input the condition of the If and swap branches.
-  if_instruction->ReplaceInput(boolean_not->InputAt(0), 0);
-  block->SwapSuccessors();
-
-  // Remove the BooleanNot if it is now unused.
-  if (!boolean_not->HasUses()) {
-    boolean_not->GetBlock()->RemoveInstruction(boolean_not);
-  }
-}
-
-// Returns true if 'block1' and 'block2' are empty, merge into the same single
-// successor and the successor can only be reached from them.
-static bool BlocksDoMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
-  if (!block1->IsSingleGoto() || !block2->IsSingleGoto()) return false;
-  HBasicBlock* succ1 = block1->GetSuccessors()[0];
-  HBasicBlock* succ2 = block2->GetSuccessors()[0];
-  return succ1 == succ2 && succ1->GetPredecessors().size() == 2u;
-}
-
-// Returns true if the outcome of the branching matches the boolean value of
-// the branching condition.
-static bool PreservesCondition(HInstruction* input_true, HInstruction* input_false) {
-  return input_true->IsIntConstant() && input_true->AsIntConstant()->IsOne()
-      && input_false->IsIntConstant() && input_false->AsIntConstant()->IsZero();
-}
-
-// Returns true if the outcome of the branching is exactly opposite of the
-// boolean value of the branching condition.
-static bool NegatesCondition(HInstruction* input_true, HInstruction* input_false) {
-  return input_true->IsIntConstant() && input_true->AsIntConstant()->IsZero()
-      && input_false->IsIntConstant() && input_false->AsIntConstant()->IsOne();
-}
-
-// Returns an instruction with the opposite boolean value from 'cond'.
-static HInstruction* GetOppositeCondition(HInstruction* cond) {
-  HGraph* graph = cond->GetBlock()->GetGraph();
-  ArenaAllocator* allocator = graph->GetArena();
-
-  if (cond->IsCondition()) {
-    HInstruction* lhs = cond->InputAt(0);
-    HInstruction* rhs = cond->InputAt(1);
-    switch (cond->AsCondition()->GetOppositeCondition()) {  // get *opposite*
-      case kCondEQ: return new (allocator) HEqual(lhs, rhs);
-      case kCondNE: return new (allocator) HNotEqual(lhs, rhs);
-      case kCondLT: return new (allocator) HLessThan(lhs, rhs);
-      case kCondLE: return new (allocator) HLessThanOrEqual(lhs, rhs);
-      case kCondGT: return new (allocator) HGreaterThan(lhs, rhs);
-      case kCondGE: return new (allocator) HGreaterThanOrEqual(lhs, rhs);
-      case kCondB:  return new (allocator) HBelow(lhs, rhs);
-      case kCondBE: return new (allocator) HBelowOrEqual(lhs, rhs);
-      case kCondA:  return new (allocator) HAbove(lhs, rhs);
-      case kCondAE: return new (allocator) HAboveOrEqual(lhs, rhs);
-    }
-  } else if (cond->IsIntConstant()) {
-    HIntConstant* int_const = cond->AsIntConstant();
-    if (int_const->IsZero()) {
-      return graph->GetIntConstant(1);
-    } else {
-      DCHECK(int_const->IsOne());
-      return graph->GetIntConstant(0);
-    }
-  }
-  // General case when 'cond' is another instruction of type boolean,
-  // as verified by SSAChecker.
-  return new (allocator) HBooleanNot(cond);
-}
-
-void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) {
-  DCHECK(block->EndsWithIf());
-
-  // Find elements of the pattern.
-  HIf* if_instruction = block->GetLastInstruction()->AsIf();
-  HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
-  HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
-  if (!BlocksDoMergeTogether(true_block, false_block)) {
-    return;
-  }
-  HBasicBlock* merge_block = true_block->GetSuccessors()[0];
-  if (!merge_block->HasSinglePhi()) {
-    return;
-  }
-  HPhi* phi = merge_block->GetFirstPhi()->AsPhi();
-  HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block));
-  HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block));
-
-  // Check if the selection negates/preserves the value of the condition and
-  // if so, generate a suitable replacement instruction.
-  HInstruction* if_condition = if_instruction->InputAt(0);
-
-  // Don't change FP compares.  The definition of compares involving NaNs forces
-  // the compares to be done as written by the user.
-  if (if_condition->IsCondition() &&
-      Primitive::IsFloatingPointType(if_condition->InputAt(0)->GetType())) {
-    return;
-  }
-
-  HInstruction* replacement;
-  if (NegatesCondition(true_value, false_value)) {
-    replacement = GetOppositeCondition(if_condition);
-    if (replacement->GetBlock() == nullptr) {
-      block->InsertInstructionBefore(replacement, if_instruction);
-    }
-  } else if (PreservesCondition(true_value, false_value)) {
-    replacement = if_condition;
-  } else {
-    return;
-  }
-
-  // Replace the selection outcome with the new instruction.
-  phi->ReplaceWith(replacement);
-  merge_block->RemovePhi(phi);
-
-  // Delete the true branch and merge the resulting chain of blocks
-  // 'block->false_block->merge_block' into one.
-  true_block->DisconnectAndDelete();
-  block->MergeWith(false_block);
-  block->MergeWith(merge_block);
-
-  // No need to update any dominance information, as we are simplifying
-  // a simple diamond shape, where the join block is merged with the
-  // entry block. Any following blocks would have had the join block
-  // as a dominator, and `MergeWith` handles changing that to the
-  // entry block.
-}
-
-void HBooleanSimplifier::Run() {
-  // Iterate in post order in the unlikely case that removing one occurrence of
-  // the selection pattern empties a branch block of another occurrence.
-  // Otherwise the order does not matter.
-  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-    if (!block->EndsWithIf()) continue;
-
-    // If condition is negated, remove the negation and swap the branches.
-    TryRemovingNegatedCondition(block);
-
-    // If this is a boolean-selection diamond pattern, replace its result with
-    // the condition value (or its negation) and simplify the graph.
-    TryRemovingBooleanSelection(block);
-  }
-}
-
-}  // namespace art
diff --git a/compiler/optimizing/boolean_simplifier.h b/compiler/optimizing/boolean_simplifier.h
deleted file mode 100644
index e12a12c..0000000
--- a/compiler/optimizing/boolean_simplifier.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// This optimization recognizes two common patterns:
-//  (a) Boolean selection: Casting a boolean to an integer or negating it is
-//      carried out with an If statement selecting from zero/one integer
-//      constants. Because Boolean values are represented as zero/one, the
-//      pattern can be replaced with the condition instruction itself or its
-//      negation, depending on the layout.
-//  (b) Negated condition: Instruction simplifier may replace an If's condition
-//      with a boolean value. If this value is the result of a Boolean negation,
-//      the true/false branches can be swapped and negation removed.
-
-// Example: Negating a boolean value
-//     B1:
-//       z1   ParameterValue
-//       i2   IntConstant 0
-//       i3   IntConstant 1
-//       v4   Goto B2
-//     B2:
-//       z5   NotEquals [ z1 i2 ]
-//       v6   If [ z5 ] then B3 else B4
-//     B3:
-//       v7   Goto B5
-//     B4:
-//       v8   Goto B5
-//     B5:
-//       i9   Phi [ i3 i2 ]
-//       v10  Return [ i9 ]
-// turns into
-//     B1:
-//       z1   ParameterValue
-//       i2   IntConstant 0
-//       v4   Goto B2
-//     B2:
-//       z11  Equals [ z1 i2 ]
-//       v10  Return [ z11 ]
-//     B3, B4, B5: removed
-
-// Note: in order to recognize empty blocks, this optimization must be run
-// after the instruction simplifier has removed redundant suspend checks.
-
-#ifndef ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_
-#define ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_
-
-#include "optimization.h"
-
-namespace art {
-
-class HBooleanSimplifier : public HOptimization {
- public:
-  explicit HBooleanSimplifier(HGraph* graph)
-    : HOptimization(graph, kBooleanSimplifierPassName) {}
-
-  void Run() OVERRIDE;
-
-  static constexpr const char* kBooleanSimplifierPassName = "boolean_simplifier";
-
- private:
-  void TryRemovingNegatedCondition(HBasicBlock* block);
-  void TryRemovingBooleanSelection(HBasicBlock* block);
-
-  DISALLOW_COPY_AND_ASSIGN(HBooleanSimplifier);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index bcc3240..8aefd9e 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -20,6 +20,7 @@
 
 #include "base/arena_containers.h"
 #include "induction_var_range.h"
+#include "side_effects_analysis.h"
 #include "nodes.h"
 
 namespace art {
@@ -62,28 +63,50 @@
     return true;
   }
 
+  // Return true if instruction can be expressed as "left_instruction + right_constant".
   static bool IsAddOrSubAConstant(HInstruction* instruction,
-                                  HInstruction** left_instruction,
-                                  int* right_constant) {
-    if (instruction->IsAdd() || instruction->IsSub()) {
+                                  /* out */ HInstruction** left_instruction,
+                                  /* out */ int32_t* right_constant) {
+    HInstruction* left_so_far = nullptr;
+    int32_t right_so_far = 0;
+    while (instruction->IsAdd() || instruction->IsSub()) {
       HBinaryOperation* bin_op = instruction->AsBinaryOperation();
       HInstruction* left = bin_op->GetLeft();
       HInstruction* right = bin_op->GetRight();
       if (right->IsIntConstant()) {
-        *left_instruction = left;
-        int32_t c = right->AsIntConstant()->GetValue();
-        *right_constant = instruction->IsAdd() ? c : -c;
-        return true;
+        int32_t v = right->AsIntConstant()->GetValue();
+        int32_t c = instruction->IsAdd() ? v : -v;
+        if (!WouldAddOverflowOrUnderflow(right_so_far, c)) {
+          instruction = left;
+          left_so_far = left;
+          right_so_far += c;
+          continue;
+        }
       }
+      break;
     }
-    *left_instruction = nullptr;
-    *right_constant = 0;
-    return false;
+    // Return result: either false and "null+0" or true and "instr+constant".
+    *left_instruction = left_so_far;
+    *right_constant = right_so_far;
+    return left_so_far != nullptr;
+  }
+
+  // Expresses any instruction as a value bound.
+  static ValueBound AsValueBound(HInstruction* instruction) {
+    if (instruction->IsIntConstant()) {
+      return ValueBound(nullptr, instruction->AsIntConstant()->GetValue());
+    }
+    HInstruction *left;
+    int32_t right;
+    if (IsAddOrSubAConstant(instruction, &left, &right)) {
+      return ValueBound(left, right);
+    }
+    return ValueBound(instruction, 0);
   }
 
   // Try to detect useful value bound format from an instruction, e.g.
   // a constant or array length related value.
-  static ValueBound DetectValueBoundFromValue(HInstruction* instruction, bool* found) {
+  static ValueBound DetectValueBoundFromValue(HInstruction* instruction, /* out */ bool* found) {
     DCHECK(instruction != nullptr);
     if (instruction->IsIntConstant()) {
       *found = true;
@@ -175,6 +198,24 @@
     return false;
   }
 
+  // Returns if it's certain this->bound > `bound`.
+  bool GreaterThan(ValueBound bound) const {
+    if (Equal(instruction_, bound.instruction_)) {
+      return constant_ > bound.constant_;
+    }
+    // Not comparable. Just return false.
+    return false;
+  }
+
+  // Returns if it's certain this->bound < `bound`.
+  bool LessThan(ValueBound bound) const {
+    if (Equal(instruction_, bound.instruction_)) {
+      return constant_ < bound.constant_;
+    }
+    // Not comparable. Just return false.
+    return false;
+  }
+
   // Try to narrow lower bound. Returns the greatest of the two if possible.
   // Pick one if they are not comparable.
   static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) {
@@ -208,7 +249,7 @@
   // Add a constant to a ValueBound.
   // `overflow` or `underflow` will return whether the resulting bound may
   // overflow or underflow an int.
-  ValueBound Add(int32_t c, bool* overflow, bool* underflow) const {
+  ValueBound Add(int32_t c, /* out */ bool* overflow, /* out */ bool* underflow) const {
     *overflow = *underflow = false;
     if (c == 0) {
       return *this;
@@ -252,157 +293,6 @@
   int32_t constant_;
 };
 
-// Collect array access data for a loop.
-// TODO: make it work for multiple arrays inside the loop.
-class ArrayAccessInsideLoopFinder : public ValueObject {
- public:
-  explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable)
-      : induction_variable_(induction_variable),
-        found_array_length_(nullptr),
-        offset_low_(std::numeric_limits<int32_t>::max()),
-        offset_high_(std::numeric_limits<int32_t>::min()) {
-    Run();
-  }
-
-  HArrayLength* GetFoundArrayLength() const { return found_array_length_; }
-  bool HasFoundArrayLength() const { return found_array_length_ != nullptr; }
-  int32_t GetOffsetLow() const { return offset_low_; }
-  int32_t GetOffsetHigh() const { return offset_high_; }
-
-  // Returns if `block` that is in loop_info may exit the loop, unless it's
-  // the loop header for loop_info.
-  static bool EarlyExit(HBasicBlock* block, HLoopInformation* loop_info) {
-    DCHECK(loop_info->Contains(*block));
-    if (block == loop_info->GetHeader()) {
-      // Loop header of loop_info. Exiting loop is normal.
-      return false;
-    }
-    for (HBasicBlock* successor : block->GetSuccessors()) {
-      if (!loop_info->Contains(*successor)) {
-        // One of the successors exits the loop.
-        return true;
-      }
-    }
-    return false;
-  }
-
-  static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) {
-    for (HBasicBlock* back_edge : loop_info->GetBackEdges()) {
-      if (!block->Dominates(back_edge)) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  void Run() {
-    HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation();
-    HBlocksInLoopReversePostOrderIterator it_loop(*loop_info);
-    HBasicBlock* block = it_loop.Current();
-    DCHECK(block == induction_variable_->GetBlock());
-    // Skip loop header. Since narrowed value range of a MonotonicValueRange only
-    // applies to the loop body (after the test at the end of the loop header).
-    it_loop.Advance();
-    for (; !it_loop.Done(); it_loop.Advance()) {
-      block = it_loop.Current();
-      DCHECK(block->IsInLoop());
-      if (!DominatesAllBackEdges(block, loop_info)) {
-        // In order not to trigger deoptimization unnecessarily, make sure
-        // that all array accesses collected are really executed in the loop.
-        // For array accesses in a branch inside the loop, don't collect the
-        // access. The bounds check in that branch might not be eliminated.
-        continue;
-      }
-      if (EarlyExit(block, loop_info)) {
-        // If the loop body can exit loop (like break, return, etc.), it's not guaranteed
-        // that the loop will loop through the full monotonic value range from
-        // initial_ to end_. So adding deoptimization might be too aggressive and can
-        // trigger deoptimization unnecessarily even if the loop won't actually throw
-        // AIOOBE.
-        found_array_length_ = nullptr;
-        return;
-      }
-      for (HInstruction* instruction = block->GetFirstInstruction();
-           instruction != nullptr;
-           instruction = instruction->GetNext()) {
-        if (!instruction->IsBoundsCheck()) {
-          continue;
-        }
-
-        HInstruction* length_value = instruction->InputAt(1);
-        if (length_value->IsIntConstant()) {
-          // TODO: may optimize for constant case.
-          continue;
-        }
-
-        if (length_value->IsPhi()) {
-          // When adding deoptimizations in outer loops, we might create
-          // a phi for the array length, and update all uses of the
-          // length in the loop to that phi. Therefore, inner loops having
-          // bounds checks on the same array will use that phi.
-          // TODO: handle these cases.
-          continue;
-        }
-
-        DCHECK(length_value->IsArrayLength());
-        HArrayLength* array_length = length_value->AsArrayLength();
-
-        HInstruction* array = array_length->InputAt(0);
-        if (array->IsNullCheck()) {
-          array = array->AsNullCheck()->InputAt(0);
-        }
-        if (loop_info->Contains(*array->GetBlock())) {
-          // Array is defined inside the loop. Skip.
-          continue;
-        }
-
-        if (found_array_length_ != nullptr && found_array_length_ != array_length) {
-          // There is already access for another array recorded for the loop.
-          // TODO: handle multiple arrays.
-          continue;
-        }
-
-        HInstruction* index = instruction->AsBoundsCheck()->InputAt(0);
-        HInstruction* left = index;
-        int32_t right = 0;
-        if (left == induction_variable_ ||
-            (ValueBound::IsAddOrSubAConstant(index, &left, &right) &&
-             left == induction_variable_)) {
-          // For patterns like array[i] or array[i + 2].
-          if (right < offset_low_) {
-            offset_low_ = right;
-          }
-          if (right > offset_high_) {
-            offset_high_ = right;
-          }
-        } else {
-          // Access not in induction_variable/(induction_variable_ + constant)
-          // format. Skip.
-          continue;
-        }
-        // Record this array.
-        found_array_length_ = array_length;
-      }
-    }
-  }
-
- private:
-  // The instruction that corresponds to a MonotonicValueRange.
-  HInstruction* induction_variable_;
-
-  // The array length of the array that's accessed inside the loop body.
-  HArrayLength* found_array_length_;
-
-  // The lowest and highest constant offsets relative to induction variable
-  // instruction_ in all array accesses.
-  // If array access are: array[i-1], array[i], array[i+1],
-  // offset_low_ is -1 and offset_high is 1.
-  int32_t offset_low_;
-  int32_t offset_high_;
-
-  DISALLOW_COPY_AND_ASSIGN(ArrayAccessInsideLoopFinder);
-};
-
 /**
  * Represent a range of lower bound and upper bound, both being inclusive.
  * Currently a ValueRange may be generated as a result of the following:
@@ -500,18 +390,13 @@
       : ValueRange(allocator, ValueBound::Min(), ValueBound::Max()),
         induction_variable_(induction_variable),
         initial_(initial),
-        end_(nullptr),
-        inclusive_(false),
         increment_(increment),
         bound_(bound) {}
 
   virtual ~MonotonicValueRange() {}
 
-  HInstruction* GetInductionVariable() const { return induction_variable_; }
   int32_t GetIncrement() const { return increment_; }
   ValueBound GetBound() const { return bound_; }
-  void SetEnd(HInstruction* end) { end_ = end; }
-  void SetInclusive(bool inclusive) { inclusive_ = inclusive; }
   HBasicBlock* GetLoopHeader() const {
     DCHECK(induction_variable_->GetBlock()->IsLoopHeader());
     return induction_variable_->GetBlock();
@@ -519,23 +404,6 @@
 
   MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; }
 
-  HBasicBlock* GetLoopHeaderSuccesorInLoop() {
-    HBasicBlock* header = GetLoopHeader();
-    HInstruction* instruction = header->GetLastInstruction();
-    DCHECK(instruction->IsIf());
-    HIf* h_if = instruction->AsIf();
-    HLoopInformation* loop_info = header->GetLoopInformation();
-    bool true_successor_in_loop = loop_info->Contains(*h_if->IfTrueSuccessor());
-    bool false_successor_in_loop = loop_info->Contains(*h_if->IfFalseSuccessor());
-
-    // Just in case it's some strange loop structure.
-    if (true_successor_in_loop && false_successor_in_loop) {
-      return nullptr;
-    }
-    DCHECK(true_successor_in_loop || false_successor_in_loop);
-    return false_successor_in_loop ? h_if->IfFalseSuccessor() : h_if->IfTrueSuccessor();
-  }
-
   // If it's certain that this value range fits in other_range.
   bool FitsIn(ValueRange* other_range) const OVERRIDE {
     if (other_range == nullptr) {
@@ -627,467 +495,9 @@
     }
   }
 
-  // Try to add HDeoptimize's in the loop pre-header first to narrow this range.
-  // For example, this loop:
-  //
-  //   for (int i = start; i < end; i++) {
-  //     array[i - 1] = array[i] + array[i + 1];
-  //   }
-  //
-  // will be transformed to:
-  //
-  //   int array_length_in_loop_body_if_needed;
-  //   if (start >= end) {
-  //     array_length_in_loop_body_if_needed = 0;
-  //   } else {
-  //     if (start < 1) deoptimize();
-  //     if (array == null) deoptimize();
-  //     array_length = array.length;
-  //     if (end > array_length - 1) deoptimize;
-  //     array_length_in_loop_body_if_needed = array_length;
-  //   }
-  //   for (int i = start; i < end; i++) {
-  //     // No more null check and bounds check.
-  //     // array.length value is replaced with array_length_in_loop_body_if_needed
-  //     // in the loop body.
-  //     array[i - 1] = array[i] + array[i + 1];
-  //   }
-  //
-  // We basically first go through the loop body and find those array accesses whose
-  // index is at a constant offset from the induction variable ('i' in the above example),
-  // and update offset_low and offset_high along the way. We then add the following
-  // deoptimizations in the loop pre-header (suppose end is not inclusive).
-  //   if (start < -offset_low) deoptimize();
-  //   if (end >= array.length - offset_high) deoptimize();
-  // It might be necessary to first hoist array.length (and the null check on it) out of
-  // the loop with another deoptimization.
-  //
-  // In order not to trigger deoptimization unnecessarily, we want to make a strong
-  // guarantee that no deoptimization is triggered if the loop body itself doesn't
-  // throw AIOOBE. (It's the same as saying if deoptimization is triggered, the loop
-  // body must throw AIOOBE).
-  // This is achieved by the following:
-  // 1) We only process loops that iterate through the full monotonic range from
-  //    initial_ to end_. We do the following checks to make sure that's the case:
-  //    a) The loop doesn't have early exit (via break, return, etc.)
-  //    b) The increment_ is 1/-1. An increment of 2, for example, may skip end_.
-  // 2) We only collect array accesses of blocks in the loop body that dominate
-  //    all loop back edges, these array accesses are guaranteed to happen
-  //    at each loop iteration.
-  // With 1) and 2), if the loop body doesn't throw AIOOBE, collected array accesses
-  // when the induction variable is at initial_ and end_ must be in a legal range.
-  // Since the added deoptimizations are basically checking the induction variable
-  // at initial_ and end_ values, no deoptimization will be triggered either.
-  //
-  // A special case is the loop body isn't entered at all. In that case, we may still
-  // add deoptimization due to the analysis described above. In order not to trigger
-  // deoptimization, we do a test between initial_ and end_ first and skip over
-  // the added deoptimization.
-  ValueRange* NarrowWithDeoptimization() {
-    if (increment_ != 1 && increment_ != -1) {
-      // In order not to trigger deoptimization unnecessarily, we want to
-      // make sure the loop iterates through the full range from initial_ to
-      // end_ so that boundaries are covered by the loop. An increment of 2,
-      // for example, may skip end_.
-      return this;
-    }
-
-    if (end_ == nullptr) {
-      // No full info to add deoptimization.
-      return this;
-    }
-
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    if (!initial_->GetBlock()->Dominates(pre_header) ||
-        !end_->GetBlock()->Dominates(pre_header)) {
-      // Can't add a check in loop pre-header if the value isn't available there.
-      return this;
-    }
-
-    ArrayAccessInsideLoopFinder finder(induction_variable_);
-
-    if (!finder.HasFoundArrayLength()) {
-      // No array access was found inside the loop that can benefit
-      // from deoptimization.
-      return this;
-    }
-
-    if (!AddDeoptimization(finder)) {
-      return this;
-    }
-
-    // After added deoptimizations, induction variable fits in
-    // [-offset_low, array.length-1-offset_high], adjusted with collected offsets.
-    ValueBound lower = ValueBound(0, -finder.GetOffsetLow());
-    ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh());
-    // We've narrowed the range after added deoptimizations.
-    return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper);
-  }
-
-  // Returns true if adding a (constant >= value) check for deoptimization
-  // is allowed and will benefit compiled code.
-  bool CanAddDeoptimizationConstant(HInstruction* value, int32_t constant, bool* is_proven) {
-    *is_proven = false;
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    DCHECK(value->GetBlock()->Dominates(pre_header));
-
-    // See if we can prove the relationship first.
-    if (value->IsIntConstant()) {
-      if (value->AsIntConstant()->GetValue() >= constant) {
-        // Already true.
-        *is_proven = true;
-        return true;
-      } else {
-        // May throw exception. Don't add deoptimization.
-        // Keep bounds checks in the loops.
-        return false;
-      }
-    }
-    // Can benefit from deoptimization.
-    return true;
-  }
-
-  // Try to filter out cases that the loop entry test will never be true.
-  bool LoopEntryTestUseful() {
-    if (initial_->IsIntConstant() && end_->IsIntConstant()) {
-      int32_t initial_val = initial_->AsIntConstant()->GetValue();
-      int32_t end_val = end_->AsIntConstant()->GetValue();
-      if (increment_ == 1) {
-        if (inclusive_) {
-          return initial_val > end_val;
-        } else {
-          return initial_val >= end_val;
-        }
-      } else {
-        DCHECK_EQ(increment_, -1);
-        if (inclusive_) {
-          return initial_val < end_val;
-        } else {
-          return initial_val <= end_val;
-        }
-      }
-    }
-    return true;
-  }
-
-  // Returns the block for adding deoptimization.
-  HBasicBlock* TransformLoopForDeoptimizationIfNeeded() {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    // Deoptimization is only added when both initial_ and end_ are defined
-    // before the loop.
-    DCHECK(initial_->GetBlock()->Dominates(pre_header));
-    DCHECK(end_->GetBlock()->Dominates(pre_header));
-
-    // If it can be proven the loop body is definitely entered (unless exception
-    // is thrown in the loop header for which triggering deoptimization is fine),
-    // there is no need for tranforming the loop. In that case, deoptimization
-    // will just be added in the loop pre-header.
-    if (!LoopEntryTestUseful()) {
-      return pre_header;
-    }
-
-    HGraph* graph = header->GetGraph();
-    graph->TransformLoopHeaderForBCE(header);
-    HBasicBlock* new_pre_header = header->GetDominator();
-    DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader());
-    HBasicBlock* if_block = new_pre_header->GetDominator();
-    HBasicBlock* dummy_block = if_block->GetSuccessors()[0];  // True successor.
-    HBasicBlock* deopt_block = if_block->GetSuccessors()[1];  // False successor.
-
-    dummy_block->AddInstruction(new (graph->GetArena()) HGoto());
-    deopt_block->AddInstruction(new (graph->GetArena()) HGoto());
-    new_pre_header->AddInstruction(new (graph->GetArena()) HGoto());
-    return deopt_block;
-  }
-
-  // Adds a test between initial_ and end_ to see if the loop body is entered.
-  // If the loop body isn't entered at all, it jumps to the loop pre-header (after
-  // transformation) to avoid any deoptimization.
-  void AddLoopBodyEntryTest() {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    HBasicBlock* if_block = pre_header->GetDominator();
-    HGraph* graph = header->GetGraph();
-
-    HCondition* cond;
-    if (increment_ == 1) {
-      if (inclusive_) {
-        cond = new (graph->GetArena()) HGreaterThan(initial_, end_);
-      } else {
-        cond = new (graph->GetArena()) HGreaterThanOrEqual(initial_, end_);
-      }
-    } else {
-      DCHECK_EQ(increment_, -1);
-      if (inclusive_) {
-        cond = new (graph->GetArena()) HLessThan(initial_, end_);
-      } else {
-        cond = new (graph->GetArena()) HLessThanOrEqual(initial_, end_);
-      }
-    }
-    HIf* h_if = new (graph->GetArena()) HIf(cond);
-    if_block->AddInstruction(cond);
-    if_block->AddInstruction(h_if);
-  }
-
-  // Adds a check that (value >= constant), and HDeoptimize otherwise.
-  void AddDeoptimizationConstant(HInstruction* value,
-                                 int32_t constant,
-                                 HBasicBlock* deopt_block,
-                                 bool loop_entry_test_block_added) {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetDominator();
-    if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
-    } else {
-      DCHECK(deopt_block == pre_header);
-    }
-    HGraph* graph = header->GetGraph();
-    HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
-    if (loop_entry_test_block_added) {
-      DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors()[1]);
-    }
-
-    HIntConstant* const_instr = graph->GetIntConstant(constant);
-    HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr);
-    HDeoptimize* deoptimize = new (graph->GetArena())
-        HDeoptimize(cond, suspend_check->GetDexPc());
-    deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction());
-    deopt_block->InsertInstructionBefore(deoptimize, deopt_block->GetLastInstruction());
-    deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
-        suspend_check->GetEnvironment(), header);
-  }
-
-  // Returns true if adding a (value <= array_length + offset) check for deoptimization
-  // is allowed and will benefit compiled code.
-  bool CanAddDeoptimizationArrayLength(HInstruction* value,
-                                       HArrayLength* array_length,
-                                       int32_t offset,
-                                       bool* is_proven) {
-    *is_proven = false;
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    DCHECK(value->GetBlock()->Dominates(pre_header));
-
-    if (array_length->GetBlock() == header) {
-      // array_length_in_loop_body_if_needed only has correct value when the loop
-      // body is entered. We bail out in this case. Usually array_length defined
-      // in the loop header is already hoisted by licm.
-      return false;
-    } else {
-      // array_length is defined either before the loop header already, or in
-      // the loop body since it's used in the loop body. If it's defined in the loop body,
-      // a phi array_length_in_loop_body_if_needed is used to replace it. In that case,
-      // all the uses of array_length must be dominated by its definition in the loop
-      // body. array_length_in_loop_body_if_needed is guaranteed to be the same as
-      // array_length once the loop body is entered so all the uses of the phi will
-      // use the correct value.
-    }
-
-    if (offset > 0) {
-      // There might be overflow issue.
-      // TODO: handle this, possibly with some distance relationship between
-      // offset_low and offset_high, or using another deoptimization to make
-      // sure (array_length + offset) doesn't overflow.
-      return false;
-    }
-
-    // See if we can prove the relationship first.
-    if (value == array_length) {
-      if (offset >= 0) {
-        // Already true.
-        *is_proven = true;
-        return true;
-      } else {
-        // May throw exception. Don't add deoptimization.
-        // Keep bounds checks in the loops.
-        return false;
-      }
-    }
-    // Can benefit from deoptimization.
-    return true;
-  }
-
-  // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise.
-  void AddDeoptimizationArrayLength(HInstruction* value,
-                                    HArrayLength* array_length,
-                                    int32_t offset,
-                                    HBasicBlock* deopt_block,
-                                    bool loop_entry_test_block_added) {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetDominator();
-    if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
-    } else {
-      DCHECK(deopt_block == pre_header);
-    }
-    HGraph* graph = header->GetGraph();
-    HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
-
-    // We may need to hoist null-check and array_length out of loop first.
-    if (!array_length->GetBlock()->Dominates(deopt_block)) {
-      // array_length must be defined in the loop body.
-      DCHECK(header->GetLoopInformation()->Contains(*array_length->GetBlock()));
-      DCHECK(array_length->GetBlock() != header);
-
-      HInstruction* array = array_length->InputAt(0);
-      HNullCheck* null_check = array->AsNullCheck();
-      if (null_check != nullptr) {
-        array = null_check->InputAt(0);
-      }
-      // We've already made sure the array is defined before the loop when collecting
-      // array accesses for the loop.
-      DCHECK(array->GetBlock()->Dominates(deopt_block));
-      if (null_check != nullptr && !null_check->GetBlock()->Dominates(deopt_block)) {
-        // Hoist null check out of loop with a deoptimization.
-        HNullConstant* null_constant = graph->GetNullConstant();
-        HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant);
-        // TODO: for one dex_pc, share the same deoptimization slow path.
-        HDeoptimize* null_check_deoptimize = new (graph->GetArena())
-            HDeoptimize(null_check_cond, suspend_check->GetDexPc());
-        deopt_block->InsertInstructionBefore(
-            null_check_cond, deopt_block->GetLastInstruction());
-        deopt_block->InsertInstructionBefore(
-            null_check_deoptimize, deopt_block->GetLastInstruction());
-        // Eliminate null check in the loop.
-        null_check->ReplaceWith(array);
-        null_check->GetBlock()->RemoveInstruction(null_check);
-        null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
-            suspend_check->GetEnvironment(), header);
-      }
-
-      HArrayLength* new_array_length
-          = new (graph->GetArena()) HArrayLength(array, array->GetDexPc());
-      deopt_block->InsertInstructionBefore(new_array_length, deopt_block->GetLastInstruction());
-
-      if (loop_entry_test_block_added) {
-        // Replace array_length defined inside the loop body with a phi
-        // array_length_in_loop_body_if_needed. This is a synthetic phi so there is
-        // no vreg number for it.
-        HPhi* phi = new (graph->GetArena()) HPhi(
-            graph->GetArena(), kNoRegNumber, 2, Primitive::kPrimInt);
-        // Set to 0 if the loop body isn't entered.
-        phi->SetRawInputAt(0, graph->GetIntConstant(0));
-        // Set to array.length if the loop body is entered.
-        phi->SetRawInputAt(1, new_array_length);
-        pre_header->AddPhi(phi);
-        array_length->ReplaceWith(phi);
-        // Make sure phi is only used after the loop body is entered.
-        if (kIsDebugBuild) {
-          for (HUseIterator<HInstruction*> it(phi->GetUses());
-               !it.Done();
-               it.Advance()) {
-            HInstruction* user = it.Current()->GetUser();
-            DCHECK(GetLoopHeaderSuccesorInLoop()->Dominates(user->GetBlock()));
-          }
-        }
-      } else {
-        array_length->ReplaceWith(new_array_length);
-      }
-
-      array_length->GetBlock()->RemoveInstruction(array_length);
-      // Use new_array_length for deopt.
-      array_length = new_array_length;
-    }
-
-    HInstruction* added = array_length;
-    if (offset != 0) {
-      HIntConstant* offset_instr = graph->GetIntConstant(offset);
-      added = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr);
-      deopt_block->InsertInstructionBefore(added, deopt_block->GetLastInstruction());
-    }
-    HCondition* cond = new (graph->GetArena()) HGreaterThan(value, added);
-    HDeoptimize* deopt = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc());
-    deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction());
-    deopt_block->InsertInstructionBefore(deopt, deopt_block->GetLastInstruction());
-    deopt->CopyEnvironmentFromWithLoopPhiAdjustment(suspend_check->GetEnvironment(), header);
-  }
-
-  // Adds deoptimizations in loop pre-header with the collected array access
-  // data so that value ranges can be established in loop body.
-  // Returns true if deoptimizations are successfully added, or if it's proven
-  // it's not necessary.
-  bool AddDeoptimization(const ArrayAccessInsideLoopFinder& finder) {
-    int32_t offset_low = finder.GetOffsetLow();
-    int32_t offset_high = finder.GetOffsetHigh();
-    HArrayLength* array_length = finder.GetFoundArrayLength();
-
-    HBasicBlock* pre_header =
-        induction_variable_->GetBlock()->GetLoopInformation()->GetPreHeader();
-    if (!initial_->GetBlock()->Dominates(pre_header) ||
-        !end_->GetBlock()->Dominates(pre_header)) {
-      // Can't move initial_ or end_ into pre_header for comparisons.
-      return false;
-    }
-
-    HBasicBlock* deopt_block;
-    bool loop_entry_test_block_added = false;
-    bool is_constant_proven, is_length_proven;
-
-    HInstruction* const_comparing_instruction;
-    int32_t const_compared_to;
-    HInstruction* array_length_comparing_instruction;
-    int32_t array_length_offset;
-    if (increment_ == 1) {
-      // Increasing from initial_ to end_.
-      const_comparing_instruction = initial_;
-      const_compared_to = -offset_low;
-      array_length_comparing_instruction = end_;
-      array_length_offset = inclusive_ ? -offset_high - 1 : -offset_high;
-    } else {
-      const_comparing_instruction = end_;
-      const_compared_to = inclusive_ ? -offset_low : -offset_low - 1;
-      array_length_comparing_instruction = initial_;
-      array_length_offset = -offset_high - 1;
-    }
-
-    if (CanAddDeoptimizationConstant(const_comparing_instruction,
-                                     const_compared_to,
-                                     &is_constant_proven) &&
-        CanAddDeoptimizationArrayLength(array_length_comparing_instruction,
-                                        array_length,
-                                        array_length_offset,
-                                        &is_length_proven)) {
-      if (!is_constant_proven || !is_length_proven) {
-        deopt_block = TransformLoopForDeoptimizationIfNeeded();
-        loop_entry_test_block_added = (deopt_block != pre_header);
-        if (loop_entry_test_block_added) {
-          // Loop body may be entered.
-          AddLoopBodyEntryTest();
-        }
-      }
-      if (!is_constant_proven) {
-        AddDeoptimizationConstant(const_comparing_instruction,
-                                  const_compared_to,
-                                  deopt_block,
-                                  loop_entry_test_block_added);
-      }
-      if (!is_length_proven) {
-        AddDeoptimizationArrayLength(array_length_comparing_instruction,
-                                     array_length,
-                                     array_length_offset,
-                                     deopt_block,
-                                     loop_entry_test_block_added);
-      }
-      return true;
-    }
-    return false;
-  }
-
  private:
   HPhi* const induction_variable_;  // Induction variable for this monotonic value range.
   HInstruction* const initial_;     // Initial value.
-  HInstruction* end_;               // End value.
-  bool inclusive_;                  // Whether end value is inclusive.
   const int32_t increment_;         // Increment for each loop iteration.
   const ValueBound bound_;          // Additional value bound info for initial_.
 
@@ -1100,10 +510,10 @@
   // the deoptimization technique.
   static constexpr size_t kThresholdForAddingDeoptimize = 2;
 
-  // Very large constant index is considered as an anomaly. This is a threshold
-  // beyond which we don't bother to apply the deoptimization technique since
-  // it's likely some AIOOBE will be thrown.
-  static constexpr int32_t kMaxConstantForAddingDeoptimize =
+  // Very large lengths are considered an anomaly. This is a threshold beyond which we don't
+  // bother to apply the deoptimization technique since it's likely, or sometimes certain,
+  // an AIOOBE will be thrown.
+  static constexpr uint32_t kMaxLengthForAddingDeoptimize =
       std::numeric_limits<int32_t>::max() - 1024 * 1024;
 
   // Added blocks for loop body entry test.
@@ -1111,33 +521,52 @@
     return block->GetBlockId() >= initial_block_size_;
   }
 
-  BCEVisitor(HGraph* graph, HInductionVarAnalysis* induction_analysis)
+  BCEVisitor(HGraph* graph,
+             const SideEffectsAnalysis& side_effects,
+             HInductionVarAnalysis* induction_analysis)
       : HGraphVisitor(graph),
         maps_(graph->GetBlocks().size(),
               ArenaSafeMap<int, ValueRange*>(
                   std::less<int>(),
                   graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
               graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
-        first_constant_index_bounds_check_map_(
+        first_index_bounds_check_map_(
             std::less<int>(),
             graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
-        need_to_revisit_block_(false),
+        early_exit_loop_(
+            std::less<uint32_t>(),
+            graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        taken_test_loop_(
+            std::less<uint32_t>(),
+            graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        finite_loop_(graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        has_dom_based_dynamic_bce_(false),
         initial_block_size_(graph->GetBlocks().size()),
+        side_effects_(side_effects),
         induction_range_(induction_analysis) {}
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
     DCHECK(!IsAddedBlock(block));
-    first_constant_index_bounds_check_map_.clear();
+    first_index_bounds_check_map_.clear();
     HGraphVisitor::VisitBasicBlock(block);
-    if (need_to_revisit_block_) {
+    // We should never deoptimize from an osr method, otherwise we might wrongly optimize
+    // code dominated by the deoptimization.
+    if (!GetGraph()->IsCompilingOsr()) {
       AddComparesWithDeoptimization(block);
-      need_to_revisit_block_ = false;
-      first_constant_index_bounds_check_map_.clear();
-      GetValueRangeMap(block)->clear();
-      HGraphVisitor::VisitBasicBlock(block);
     }
   }
 
+  void Finish() {
+    // Preserve SSA structure which may have been broken by adding one or more
+    // new taken-test structures (see TransformLoopForDeoptimizationIfNeeded()).
+    InsertPhiNodes();
+
+    // Clear the loop data structures.
+    early_exit_loop_.clear();
+    taken_test_loop_.clear();
+    finite_loop_.clear();
+  }
+
  private:
   // Return the map of proven value ranges at the beginning of a basic block.
   ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) {
@@ -1145,8 +574,7 @@
       // Added blocks don't keep value ranges.
       return nullptr;
     }
-    uint32_t block_id = basic_block->GetBlockId();
-    return &maps_[block_id];
+    return &maps_[basic_block->GetBlockId()];
   }
 
   // Traverse up the dominator tree to look for value range info.
@@ -1166,21 +594,9 @@
     return nullptr;
   }
 
-  // Return the range resulting from induction variable analysis of "instruction" when the value
-  // is used from "context", for example, an index used from a bounds-check inside a loop body.
-  ValueRange* LookupInductionRange(HInstruction* context, HInstruction* instruction) {
-    InductionVarRange::Value v1 = induction_range_.GetMinInduction(context, instruction);
-    InductionVarRange::Value v2 = induction_range_.GetMaxInduction(context, instruction);
-    if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
-        v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
-      DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
-      DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
-      ValueBound low = ValueBound(v1.instruction, v1.b_constant);
-      ValueBound up = ValueBound(v2.instruction, v2.b_constant);
-      return new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), low, up);
-    }
-    // Didn't find anything useful.
-    return nullptr;
+  // Helper method to assign a new range to an instruction in given basic block.
+  void AssignRange(HBasicBlock* basic_block, HInstruction* instruction, ValueRange* range) {
+    GetValueRangeMap(basic_block)->Overwrite(instruction->GetId(), range);
   }
 
   // Narrow the value range of `instruction` at the end of `basic_block` with `range`,
@@ -1190,7 +606,7 @@
     ValueRange* existing_range = LookupValueRange(instruction, basic_block);
     if (existing_range == nullptr) {
       if (range != nullptr) {
-        GetValueRangeMap(successor)->Overwrite(instruction->GetId(), range);
+        AssignRange(successor, instruction, range);
       }
       return;
     }
@@ -1202,8 +618,7 @@
         return;
       }
     }
-    ValueRange* narrowed_range = existing_range->Narrow(range);
-    GetValueRangeMap(successor)->Overwrite(instruction->GetId(), narrowed_range);
+    AssignRange(successor, instruction, existing_range->Narrow(range));
   }
 
   // Special case that we may simultaneously narrow two MonotonicValueRange's to
@@ -1328,17 +743,6 @@
 
     bool overflow, underflow;
     if (cond == kCondLT || cond == kCondLE) {
-      if (left_monotonic_range != nullptr) {
-        // Update the info for monotonic value range.
-        if (left_monotonic_range->GetInductionVariable() == left &&
-            left_monotonic_range->GetIncrement() < 0 &&
-            block == left_monotonic_range->GetLoopHeader() &&
-            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
-          left_monotonic_range->SetEnd(right);
-          left_monotonic_range->SetInclusive(cond == kCondLT);
-        }
-      }
-
       if (!upper.Equals(ValueBound::Max())) {
         int32_t compensation = (cond == kCondLT) ? -1 : 0;  // upper bound is inclusive
         ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
@@ -1362,17 +766,6 @@
         ApplyRangeFromComparison(left, block, false_successor, new_range);
       }
     } else if (cond == kCondGT || cond == kCondGE) {
-      if (left_monotonic_range != nullptr) {
-        // Update the info for monotonic value range.
-        if (left_monotonic_range->GetInductionVariable() == left &&
-            left_monotonic_range->GetIncrement() > 0 &&
-            block == left_monotonic_range->GetLoopHeader() &&
-            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
-          left_monotonic_range->SetEnd(right);
-          left_monotonic_range->SetInclusive(cond == kCondGT);
-        }
-      }
-
       // array.length as a lower bound isn't considered useful.
       if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
         int32_t compensation = (cond == kCondGT) ? 1 : 0;  // lower bound is inclusive
@@ -1395,60 +788,76 @@
             ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper);
         ApplyRangeFromComparison(left, block, false_successor, new_range);
       }
+    } else if (cond == kCondNE || cond == kCondEQ) {
+      if (left->IsArrayLength() && lower.IsConstant() && upper.IsConstant()) {
+        // Special case:
+        //   length == [c,d] yields [c, d] along true
+        //   length != [c,d] yields [c, d] along false
+        if (!lower.Equals(ValueBound::Min()) || !upper.Equals(ValueBound::Max())) {
+          ValueRange* new_range = new (GetGraph()->GetArena())
+              ValueRange(GetGraph()->GetArena(), lower, upper);
+          ApplyRangeFromComparison(
+              left, block, cond == kCondEQ ? true_successor : false_successor, new_range);
+        }
+        // In addition:
+        //   length == 0 yields [1, max] along false
+        //   length != 0 yields [1, max] along true
+        if (lower.GetConstant() == 0 && upper.GetConstant() == 0) {
+          ValueRange* new_range = new (GetGraph()->GetArena())
+              ValueRange(GetGraph()->GetArena(), ValueBound(nullptr, 1), ValueBound::Max());
+          ApplyRangeFromComparison(
+              left, block, cond == kCondEQ ? false_successor : true_successor, new_range);
+        }
+      }
     }
   }
 
-  void VisitBoundsCheck(HBoundsCheck* bounds_check) {
+  void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE {
     HBasicBlock* block = bounds_check->GetBlock();
     HInstruction* index = bounds_check->InputAt(0);
     HInstruction* array_length = bounds_check->InputAt(1);
     DCHECK(array_length->IsIntConstant() ||
            array_length->IsArrayLength() ||
            array_length->IsPhi());
-
-    if (array_length->IsPhi()) {
-      // Input 1 of the phi contains the real array.length once the loop body is
-      // entered. That value will be used for bound analysis. The graph is still
-      // strictly in SSA form.
-      array_length = array_length->AsPhi()->InputAt(1)->AsArrayLength();
-    }
-
+    bool try_dynamic_bce = true;
+    // Analyze index range.
     if (!index->IsIntConstant()) {
+      // Non-constant index.
       ValueBound lower = ValueBound(nullptr, 0);        // constant 0
       ValueBound upper = ValueBound(array_length, -1);  // array_length - 1
       ValueRange array_range(GetGraph()->GetArena(), lower, upper);
-      // Try range obtained by local analysis.
+      // Try index range obtained by dominator-based analysis.
       ValueRange* index_range = LookupValueRange(index, block);
       if (index_range != nullptr && index_range->FitsIn(&array_range)) {
-        ReplaceBoundsCheck(bounds_check, index);
+        ReplaceInstruction(bounds_check, index);
         return;
       }
-      // Try range obtained by induction variable analysis.
-      index_range = LookupInductionRange(bounds_check, index);
-      if (index_range != nullptr && index_range->FitsIn(&array_range)) {
-        ReplaceBoundsCheck(bounds_check, index);
+      // Try index range obtained by induction variable analysis.
+      // Disables dynamic bce if OOB is certain.
+      if (InductionRangeFitsIn(&array_range, bounds_check, &try_dynamic_bce)) {
+        ReplaceInstruction(bounds_check, index);
         return;
       }
     } else {
+      // Constant index.
       int32_t constant = index->AsIntConstant()->GetValue();
       if (constant < 0) {
         // Will always throw exception.
         return;
-      }
-      if (array_length->IsIntConstant()) {
+      } else if (array_length->IsIntConstant()) {
         if (constant < array_length->AsIntConstant()->GetValue()) {
-          ReplaceBoundsCheck(bounds_check, index);
+          ReplaceInstruction(bounds_check, index);
         }
         return;
       }
-
+      // Analyze array length range.
       DCHECK(array_length->IsArrayLength());
       ValueRange* existing_range = LookupValueRange(array_length, block);
       if (existing_range != nullptr) {
         ValueBound lower = existing_range->GetLower();
         DCHECK(lower.IsConstant());
         if (constant < lower.GetConstant()) {
-          ReplaceBoundsCheck(bounds_check, index);
+          ReplaceInstruction(bounds_check, index);
           return;
         } else {
           // Existing range isn't strong enough to eliminate the bounds check.
@@ -1456,57 +865,66 @@
           // bounds check.
         }
       }
-
-      if (first_constant_index_bounds_check_map_.find(array_length->GetId()) ==
-          first_constant_index_bounds_check_map_.end()) {
-        // Remember the first bounds check against array_length of a constant index.
-        // That bounds check instruction has an associated HEnvironment where we
-        // may add an HDeoptimize to eliminate bounds checks of constant indices
-        // against array_length.
-        first_constant_index_bounds_check_map_.Put(array_length->GetId(), bounds_check);
-      } else {
-        // We've seen it at least twice. It's beneficial to introduce a compare with
-        // deoptimization fallback to eliminate the bounds checks.
-        need_to_revisit_block_ = true;
-      }
-
       // Once we have an array access like 'array[5] = 1', we record array.length >= 6.
       // We currently don't do it for non-constant index since a valid array[i] can't prove
       // a valid array[i-1] yet due to the lower bound side.
       if (constant == std::numeric_limits<int32_t>::max()) {
         // Max() as an index will definitely throw AIOOBE.
         return;
+      } else {
+        ValueBound lower = ValueBound(nullptr, constant + 1);
+        ValueBound upper = ValueBound::Max();
+        ValueRange* range = new (GetGraph()->GetArena())
+            ValueRange(GetGraph()->GetArena(), lower, upper);
+        AssignRange(block, array_length, range);
       }
-      ValueBound lower = ValueBound(nullptr, constant + 1);
-      ValueBound upper = ValueBound::Max();
-      ValueRange* range = new (GetGraph()->GetArena())
-          ValueRange(GetGraph()->GetArena(), lower, upper);
-      GetValueRangeMap(block)->Overwrite(array_length->GetId(), range);
     }
-  }
 
-  void ReplaceBoundsCheck(HInstruction* bounds_check, HInstruction* index) {
-    bounds_check->ReplaceWith(index);
-    bounds_check->GetBlock()->RemoveInstruction(bounds_check);
+    // If static analysis fails, and OOB is not certain, try dynamic elimination.
+    if (try_dynamic_bce) {
+      // Try loop-based dynamic elimination.
+      HLoopInformation* loop = bounds_check->GetBlock()->GetLoopInformation();
+      bool needs_finite_test = false;
+      bool needs_taken_test = false;
+      if (DynamicBCESeemsProfitable(loop, bounds_check->GetBlock()) &&
+          induction_range_.CanGenerateCode(
+              bounds_check, index, &needs_finite_test, &needs_taken_test) &&
+          CanHandleInfiniteLoop(loop, index, needs_finite_test) &&
+          // Do this test last, since it may generate code.
+          CanHandleLength(loop, array_length, needs_taken_test)) {
+        TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+        TransformLoopForDynamicBCE(loop, bounds_check);
+        return;
+      }
+      // Otherwise, prepare dominator-based dynamic elimination.
+      if (first_index_bounds_check_map_.find(array_length->GetId()) ==
+          first_index_bounds_check_map_.end()) {
+        // Remember the first bounds check against each array_length. That bounds check
+        // instruction has an associated HEnvironment where we may add an HDeoptimize
+        // to eliminate subsequent bounds checks against the same array_length.
+        first_index_bounds_check_map_.Put(array_length->GetId(), bounds_check);
+      }
+    }
   }
 
   static bool HasSameInputAtBackEdges(HPhi* phi) {
     DCHECK(phi->IsLoopHeaderPhi());
+    HConstInputsRef inputs = phi->GetInputs();
     // Start with input 1. Input 0 is from the incoming block.
-    HInstruction* input1 = phi->InputAt(1);
+    const HInstruction* input1 = inputs[1];
     DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
         *phi->GetBlock()->GetPredecessors()[1]));
-    for (size_t i = 2, e = phi->InputCount(); i < e; ++i) {
+    for (size_t i = 2; i < inputs.size(); ++i) {
       DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
           *phi->GetBlock()->GetPredecessors()[i]));
-      if (input1 != phi->InputAt(i)) {
+      if (input1 != inputs[i]) {
         return false;
       }
     }
     return true;
   }
 
-  void VisitPhi(HPhi* phi) {
+  void VisitPhi(HPhi* phi) OVERRIDE {
     if (phi->IsLoopHeaderPhi()
         && (phi->GetType() == Primitive::kPrimInt)
         && HasSameInputAtBackEdges(phi)) {
@@ -1547,57 +965,20 @@
                 increment,
                 bound);
           }
-          GetValueRangeMap(phi->GetBlock())->Overwrite(phi->GetId(), range);
+          AssignRange(phi->GetBlock(), phi, range);
         }
       }
     }
   }
 
-  void VisitIf(HIf* instruction) {
+  void VisitIf(HIf* instruction) OVERRIDE {
     if (instruction->InputAt(0)->IsCondition()) {
       HCondition* cond = instruction->InputAt(0)->AsCondition();
-      IfCondition cmp = cond->GetCondition();
-      if (cmp == kCondGT || cmp == kCondGE ||
-          cmp == kCondLT || cmp == kCondLE) {
-        HInstruction* left = cond->GetLeft();
-        HInstruction* right = cond->GetRight();
-        HandleIf(instruction, left, right, cmp);
-
-        HBasicBlock* block = instruction->GetBlock();
-        ValueRange* left_range = LookupValueRange(left, block);
-        if (left_range == nullptr) {
-          return;
-        }
-
-        if (left_range->IsMonotonicValueRange() &&
-            block == left_range->AsMonotonicValueRange()->GetLoopHeader()) {
-          // The comparison is for an induction variable in the loop header.
-          DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable());
-          HBasicBlock* loop_body_successor =
-            left_range->AsMonotonicValueRange()->GetLoopHeaderSuccesorInLoop();
-          if (loop_body_successor == nullptr) {
-            // In case it's some strange loop structure.
-            return;
-          }
-          ValueRange* new_left_range = LookupValueRange(left, loop_body_successor);
-          if ((new_left_range == left_range) ||
-              // Range narrowed with deoptimization is usually more useful than
-              // a constant range.
-              new_left_range->IsConstantValueRange()) {
-            // We are not successful in narrowing the monotonic value range to
-            // a regular value range. Try using deoptimization.
-            new_left_range = left_range->AsMonotonicValueRange()->
-                NarrowWithDeoptimization();
-            if (new_left_range != left_range) {
-              GetValueRangeMap(loop_body_successor)->Overwrite(left->GetId(), new_left_range);
-            }
-          }
-        }
-      }
+      HandleIf(instruction, cond->GetLeft(), cond->GetRight(), cond->GetCondition());
     }
   }
 
-  void VisitAdd(HAdd* add) {
+  void VisitAdd(HAdd* add) OVERRIDE {
     HInstruction* right = add->GetRight();
     if (right->IsIntConstant()) {
       ValueRange* left_range = LookupValueRange(add->GetLeft(), add->GetBlock());
@@ -1606,12 +987,12 @@
       }
       ValueRange* range = left_range->Add(right->AsIntConstant()->GetValue());
       if (range != nullptr) {
-        GetValueRangeMap(add->GetBlock())->Overwrite(add->GetId(), range);
+        AssignRange(add->GetBlock(), add, range);
       }
     }
   }
 
-  void VisitSub(HSub* sub) {
+  void VisitSub(HSub* sub) OVERRIDE {
     HInstruction* left = sub->GetLeft();
     HInstruction* right = sub->GetRight();
     if (right->IsIntConstant()) {
@@ -1621,7 +1002,7 @@
       }
       ValueRange* range = left_range->Add(-right->AsIntConstant()->GetValue());
       if (range != nullptr) {
-        GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range);
+        AssignRange(sub->GetBlock(), sub, range);
         return;
       }
     }
@@ -1661,7 +1042,7 @@
                     GetGraph()->GetArena(),
                     ValueBound(nullptr, right_const - upper.GetConstant()),
                     ValueBound(array_length, right_const - lower.GetConstant()));
-                GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range);
+                AssignRange(sub->GetBlock(), sub, range);
               }
             }
           }
@@ -1709,23 +1090,23 @@
           GetGraph()->GetArena(),
           ValueBound(nullptr, std::numeric_limits<int32_t>::min()),
           ValueBound(left, 0));
-      GetValueRangeMap(instruction->GetBlock())->Overwrite(instruction->GetId(), range);
+      AssignRange(instruction->GetBlock(), instruction, range);
     }
   }
 
-  void VisitDiv(HDiv* div) {
+  void VisitDiv(HDiv* div) OVERRIDE {
     FindAndHandlePartialArrayLength(div);
   }
 
-  void VisitShr(HShr* shr) {
+  void VisitShr(HShr* shr) OVERRIDE {
     FindAndHandlePartialArrayLength(shr);
   }
 
-  void VisitUShr(HUShr* ushr) {
+  void VisitUShr(HUShr* ushr) OVERRIDE {
     FindAndHandlePartialArrayLength(ushr);
   }
 
-  void VisitAnd(HAnd* instruction) {
+  void VisitAnd(HAnd* instruction) OVERRIDE {
     if (instruction->GetRight()->IsIntConstant()) {
       int32_t constant = instruction->GetRight()->AsIntConstant()->GetValue();
       if (constant > 0) {
@@ -1735,12 +1116,12 @@
             GetGraph()->GetArena(),
             ValueBound(nullptr, 0),
             ValueBound(nullptr, constant));
-        GetValueRangeMap(instruction->GetBlock())->Overwrite(instruction->GetId(), range);
+        AssignRange(instruction->GetBlock(), instruction, range);
       }
     }
   }
 
-  void VisitNewArray(HNewArray* new_array) {
+  void VisitNewArray(HNewArray* new_array) OVERRIDE {
     HInstruction* len = new_array->InputAt(0);
     if (!len->IsIntConstant()) {
       HInstruction *left;
@@ -1759,106 +1140,692 @@
         if (existing_range != nullptr) {
           range = existing_range->Narrow(range);
         }
-        GetValueRangeMap(new_array->GetBlock())->Overwrite(left->GetId(), range);
+        AssignRange(new_array->GetBlock(), left, range);
       }
     }
   }
 
-  void VisitDeoptimize(HDeoptimize* deoptimize) {
-    // Right now it's only HLessThanOrEqual.
-    DCHECK(deoptimize->InputAt(0)->IsLessThanOrEqual());
-    HLessThanOrEqual* less_than_or_equal = deoptimize->InputAt(0)->AsLessThanOrEqual();
-    HInstruction* instruction = less_than_or_equal->InputAt(0);
-    if (instruction->IsArrayLength()) {
-      HInstruction* constant = less_than_or_equal->InputAt(1);
-      DCHECK(constant->IsIntConstant());
-      DCHECK(constant->AsIntConstant()->GetValue() <= kMaxConstantForAddingDeoptimize);
-      ValueBound lower = ValueBound(nullptr, constant->AsIntConstant()->GetValue() + 1);
-      ValueRange* range = new (GetGraph()->GetArena())
-          ValueRange(GetGraph()->GetArena(), lower, ValueBound::Max());
-      GetValueRangeMap(deoptimize->GetBlock())->Overwrite(instruction->GetId(), range);
+  /**
+    * After null/bounds checks are eliminated, some invariant array references
+    * may be exposed underneath which can be hoisted out of the loop to the
+    * preheader or, in combination with dynamic bce, the deoptimization block.
+    *
+    * for (int i = 0; i < n; i++) {
+    *                                <-------+
+    *   for (int j = 0; j < n; j++)          |
+    *     a[i][j] = 0;               --a[i]--+
+    * }
+    *
+    * Note: this optimization is no longer applied after dominator-based dynamic deoptimization
+    * has occurred (see AddCompareWithDeoptimization()), since in those cases it would be
+    * unsafe to hoist array references across their deoptimization instruction inside a loop.
+    */
+  void VisitArrayGet(HArrayGet* array_get) OVERRIDE {
+    if (!has_dom_based_dynamic_bce_ && array_get->IsInLoop()) {
+      HLoopInformation* loop = array_get->GetBlock()->GetLoopInformation();
+      if (loop->IsDefinedOutOfTheLoop(array_get->InputAt(0)) &&
+          loop->IsDefinedOutOfTheLoop(array_get->InputAt(1))) {
+        SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader());
+        if (!array_get->GetSideEffects().MayDependOn(loop_effects)) {
+          // We can hoist ArrayGet only if its execution is guaranteed on every iteration.
+          // In other words only if array_get_bb dominates all back branches.
+          if (loop->DominatesAllBackEdges(array_get->GetBlock())) {
+            HoistToPreHeaderOrDeoptBlock(loop, array_get);
+          }
+        }
+      }
     }
   }
 
-  void AddCompareWithDeoptimization(HInstruction* array_length,
-                                    HIntConstant* const_instr,
-                                    HBasicBlock* block) {
-    DCHECK(array_length->IsArrayLength());
-    ValueRange* range = LookupValueRange(array_length, block);
-    ValueBound lower_bound = range->GetLower();
-    DCHECK(lower_bound.IsConstant());
-    DCHECK(const_instr->GetValue() <= kMaxConstantForAddingDeoptimize);
-    // Note that the lower bound of the array length may have been refined
-    // through other instructions (such as `HNewArray(length - 4)`).
-    DCHECK_LE(const_instr->GetValue() + 1, lower_bound.GetConstant());
+  /** Performs dominator-based dynamic elimination on suitable set of bounds checks. */
+  void AddCompareWithDeoptimization(HBasicBlock* block,
+                                    HInstruction* array_length,
+                                    HInstruction* base,
+                                    int32_t min_c, int32_t max_c) {
+    HBoundsCheck* bounds_check =
+        first_index_bounds_check_map_.Get(array_length->GetId())->AsBoundsCheck();
+    // Construct deoptimization on single or double bounds on range [base-min_c,base+max_c],
+    // for example either for a[0]..a[3] just 3 or for a[base-1]..a[base+3] both base-1
+    // and base+3, since we made the assumption any in between value may occur too.
+    // In code, using unsigned comparisons:
+    // (1) constants only
+    //       if (max_c >= a.length) deoptimize;
+    // (2) general case
+    //       if (base-min_c >  base+max_c) deoptimize;
+    //       if (base+max_c >= a.length  ) deoptimize;
+    static_assert(kMaxLengthForAddingDeoptimize < std::numeric_limits<int32_t>::max(),
+                  "Incorrect max length may be subject to arithmetic wrap-around");
+    HInstruction* upper = GetGraph()->GetIntConstant(max_c);
+    if (base == nullptr) {
+      DCHECK_GE(min_c, 0);
+    } else {
+      HInstruction* lower = new (GetGraph()->GetArena())
+          HAdd(Primitive::kPrimInt, base, GetGraph()->GetIntConstant(min_c));
+      upper = new (GetGraph()->GetArena()) HAdd(Primitive::kPrimInt, base, upper);
+      block->InsertInstructionBefore(lower, bounds_check);
+      block->InsertInstructionBefore(upper, bounds_check);
+      InsertDeoptInBlock(bounds_check, new (GetGraph()->GetArena()) HAbove(lower, upper));
+    }
+    InsertDeoptInBlock(bounds_check, new (GetGraph()->GetArena()) HAboveOrEqual(upper, array_length));
+    // Flag that this kind of deoptimization has occurred.
+    has_dom_based_dynamic_bce_ = true;
+  }
 
-    // If array_length is less than lower_const, deoptimize.
-    HBoundsCheck* bounds_check = first_constant_index_bounds_check_map_.Get(
-        array_length->GetId())->AsBoundsCheck();
-    HCondition* cond = new (GetGraph()->GetArena()) HLessThanOrEqual(array_length, const_instr);
-    HDeoptimize* deoptimize = new (GetGraph()->GetArena())
-        HDeoptimize(cond, bounds_check->GetDexPc());
-    block->InsertInstructionBefore(cond, bounds_check);
+  /** Attempts dominator-based dynamic elimination on remaining candidates. */
+  void AddComparesWithDeoptimization(HBasicBlock* block) {
+    for (const auto& entry : first_index_bounds_check_map_) {
+      HBoundsCheck* bounds_check = entry.second;
+      HInstruction* index = bounds_check->InputAt(0);
+      HInstruction* array_length = bounds_check->InputAt(1);
+      if (!array_length->IsArrayLength()) {
+        continue;  // disregard phis and constants
+      }
+      // Collect all bounds checks that are still there and that are related as "a[base + constant]"
+      // for a base instruction (possibly absent) and various constants. Note that no attempt
+      // is made to partition the set into matching subsets (viz. a[0], a[1] and a[base+1] and
+      // a[base+2] are considered as one set).
+      // TODO: would such a partitioning be worthwhile?
+      ValueBound value = ValueBound::AsValueBound(index);
+      HInstruction* base = value.GetInstruction();
+      int32_t min_c = base == nullptr ? 0 : value.GetConstant();
+      int32_t max_c = value.GetConstant();
+      ArenaVector<HBoundsCheck*> candidates(
+          GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
+      ArenaVector<HBoundsCheck*> standby(
+          GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
+      for (const HUseListNode<HInstruction*>& use : array_length->GetUses()) {
+        // Another bounds check in same or dominated block?
+        HInstruction* user = use.GetUser();
+        HBasicBlock* other_block = user->GetBlock();
+        if (user->IsBoundsCheck() && block->Dominates(other_block)) {
+          HBoundsCheck* other_bounds_check = user->AsBoundsCheck();
+          HInstruction* other_index = other_bounds_check->InputAt(0);
+          HInstruction* other_array_length = other_bounds_check->InputAt(1);
+          ValueBound other_value = ValueBound::AsValueBound(other_index);
+          if (array_length == other_array_length && base == other_value.GetInstruction()) {
+            // Reject certain OOB if BoundsCheck(l, l) occurs on considered subset.
+            if (array_length == other_index) {
+              candidates.clear();
+              standby.clear();
+              break;
+            }
+            // Since a subsequent dominated block could be under a conditional, only accept
+            // the other bounds check if it is in same block or both blocks dominate the exit.
+            // TODO: we could improve this by testing proper post-dominance, or even if this
+            //       constant is seen along *all* conditional paths that follow.
+            HBasicBlock* exit = GetGraph()->GetExitBlock();
+            if (block == user->GetBlock() ||
+                (block->Dominates(exit) && other_block->Dominates(exit))) {
+              int32_t other_c = other_value.GetConstant();
+              min_c = std::min(min_c, other_c);
+              max_c = std::max(max_c, other_c);
+              candidates.push_back(other_bounds_check);
+            } else {
+              // Add this candidate later only if it falls into the range.
+              standby.push_back(other_bounds_check);
+            }
+          }
+        }
+      }
+      // Add standby candidates that fall in selected range.
+      for (HBoundsCheck* other_bounds_check : standby) {
+        HInstruction* other_index = other_bounds_check->InputAt(0);
+        int32_t other_c = ValueBound::AsValueBound(other_index).GetConstant();
+        if (min_c <= other_c && other_c <= max_c) {
+          candidates.push_back(other_bounds_check);
+        }
+      }
+      // Perform dominator-based deoptimization if it seems profitable, where we eliminate
+      // bounds checks and replace these with deopt checks that guard against any possible
+      // OOB. Note that we reject cases where the distance min_c:max_c range gets close to
+      // the maximum possible array length, since those cases are likely to always deopt
+      // (such situations do not necessarily go OOB, though, since the array could be really
+      // large, or the programmer could rely on arithmetic wrap-around from max to min).
+      size_t threshold = kThresholdForAddingDeoptimize + (base == nullptr ? 0 : 1);  // extra test?
+      uint32_t distance = static_cast<uint32_t>(max_c) - static_cast<uint32_t>(min_c);
+      if (candidates.size() >= threshold &&
+          (base != nullptr || min_c >= 0) &&  // reject certain OOB
+           distance <= kMaxLengthForAddingDeoptimize) {  // reject likely/certain deopt
+        AddCompareWithDeoptimization(block, array_length, base, min_c, max_c);
+        for (HBoundsCheck* other_bounds_check : candidates) {
+          // Only replace if still in the graph. This avoids visiting the same
+          // bounds check twice if it occurred multiple times in the use list.
+          if (other_bounds_check->IsInBlock()) {
+            ReplaceInstruction(other_bounds_check, other_bounds_check->InputAt(0));
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Returns true if static range analysis based on induction variables can determine the bounds
+   * check on the given array range is always satisfied with the computed index range. The output
+   * parameter try_dynamic_bce is set to false if OOB is certain.
+   */
+  bool InductionRangeFitsIn(ValueRange* array_range,
+                            HBoundsCheck* context,
+                            bool* try_dynamic_bce) {
+    InductionVarRange::Value v1;
+    InductionVarRange::Value v2;
+    bool needs_finite_test = false;
+    HInstruction* index = context->InputAt(0);
+    HInstruction* hint = ValueBound::HuntForDeclaration(context->InputAt(1));
+    if (induction_range_.GetInductionRange(context, index, hint, &v1, &v2, &needs_finite_test)) {
+      if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
+          v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
+        DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
+        DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
+        ValueRange index_range(GetGraph()->GetArena(),
+                               ValueBound(v1.instruction, v1.b_constant),
+                               ValueBound(v2.instruction, v2.b_constant));
+        // If analysis reveals a certain OOB, disable dynamic BCE. Otherwise,
+        // use analysis for static bce only if loop is finite.
+        if (index_range.GetLower().LessThan(array_range->GetLower()) ||
+            index_range.GetUpper().GreaterThan(array_range->GetUpper())) {
+          *try_dynamic_bce = false;
+        } else if (!needs_finite_test && index_range.FitsIn(array_range)) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Performs loop-based dynamic elimination on a bounds check. In order to minimize the
+   * number of eventually generated tests, related bounds checks with tests that can be
+   * combined with tests for the given bounds check are collected first.
+   */
+  void TransformLoopForDynamicBCE(HLoopInformation* loop, HBoundsCheck* bounds_check) {
+    HInstruction* index = bounds_check->InputAt(0);
+    HInstruction* array_length = bounds_check->InputAt(1);
+    DCHECK(loop->IsDefinedOutOfTheLoop(array_length));  // pre-checked
+    DCHECK(loop->DominatesAllBackEdges(bounds_check->GetBlock()));
+    // Collect all bounds checks in the same loop that are related as "a[base + constant]"
+    // for a base instruction (possibly absent) and various constants.
+    ValueBound value = ValueBound::AsValueBound(index);
+    HInstruction* base = value.GetInstruction();
+    int32_t min_c = base == nullptr ? 0 : value.GetConstant();
+    int32_t max_c = value.GetConstant();
+    ArenaVector<HBoundsCheck*> candidates(
+        GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
+    ArenaVector<HBoundsCheck*> standby(
+        GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
+    for (const HUseListNode<HInstruction*>& use : array_length->GetUses()) {
+      HInstruction* user = use.GetUser();
+      if (user->IsBoundsCheck() && loop == user->GetBlock()->GetLoopInformation()) {
+        HBoundsCheck* other_bounds_check = user->AsBoundsCheck();
+        HInstruction* other_index = other_bounds_check->InputAt(0);
+        HInstruction* other_array_length = other_bounds_check->InputAt(1);
+        ValueBound other_value = ValueBound::AsValueBound(other_index);
+        int32_t other_c = other_value.GetConstant();
+        if (array_length == other_array_length && base == other_value.GetInstruction()) {
+          // Does the current basic block dominate all back edges? If not,
+          // add this candidate later only if it falls into the range.
+          if (!loop->DominatesAllBackEdges(user->GetBlock())) {
+            standby.push_back(other_bounds_check);
+            continue;
+          }
+          min_c = std::min(min_c, other_c);
+          max_c = std::max(max_c, other_c);
+          candidates.push_back(other_bounds_check);
+        }
+      }
+    }
+    // Add standby candidates that fall in selected range.
+    for (HBoundsCheck* other_bounds_check : standby) {
+      HInstruction* other_index = other_bounds_check->InputAt(0);
+      int32_t other_c = ValueBound::AsValueBound(other_index).GetConstant();
+      if (min_c <= other_c && other_c <= max_c) {
+        candidates.push_back(other_bounds_check);
+      }
+    }
+    // Perform loop-based deoptimization if it seems profitable, where we eliminate bounds
+    // checks and replace these with deopt checks that guard against any possible OOB.
+    DCHECK_LT(0u, candidates.size());
+    uint32_t distance = static_cast<uint32_t>(max_c) - static_cast<uint32_t>(min_c);
+    if ((base != nullptr || min_c >= 0) &&  // reject certain OOB
+        distance <= kMaxLengthForAddingDeoptimize) {  // reject likely/certain deopt
+      HBasicBlock* block = GetPreHeader(loop, bounds_check);
+      HInstruction* min_lower = nullptr;
+      HInstruction* min_upper = nullptr;
+      HInstruction* max_lower = nullptr;
+      HInstruction* max_upper = nullptr;
+      // Iterate over all bounds checks.
+      for (HBoundsCheck* other_bounds_check : candidates) {
+        // Only handle if still in the graph. This avoids visiting the same
+        // bounds check twice if it occurred multiple times in the use list.
+        if (other_bounds_check->IsInBlock()) {
+          HInstruction* other_index = other_bounds_check->InputAt(0);
+          int32_t other_c = ValueBound::AsValueBound(other_index).GetConstant();
+          // Generate code for either the maximum or minimum. Range analysis already was queried
+          // whether code generation on the original and, thus, related bounds check was possible.
+          // It handles either loop invariants (lower is not set) or unit strides.
+          if (other_c == max_c) {
+            induction_range_.GenerateRangeCode(
+                other_bounds_check, other_index, GetGraph(), block, &max_lower, &max_upper);
+          } else if (other_c == min_c && base != nullptr) {
+            induction_range_.GenerateRangeCode(
+                other_bounds_check, other_index, GetGraph(), block, &min_lower, &min_upper);
+          }
+          ReplaceInstruction(other_bounds_check, other_index);
+        }
+      }
+      // In code, using unsigned comparisons:
+      // (1) constants only
+      //       if (max_upper >= a.length ) deoptimize;
+      // (2) two symbolic invariants
+      //       if (min_upper >  max_upper) deoptimize;   unless min_c == max_c
+      //       if (max_upper >= a.length ) deoptimize;
+      // (3) general case, unit strides (where lower would exceed upper for arithmetic wrap-around)
+      //       if (min_lower >  max_lower) deoptimize;   unless min_c == max_c
+      //       if (max_lower >  max_upper) deoptimize;
+      //       if (max_upper >= a.length ) deoptimize;
+      if (base == nullptr) {
+        // Constants only.
+        DCHECK_GE(min_c, 0);
+        DCHECK(min_lower == nullptr && min_upper == nullptr &&
+               max_lower == nullptr && max_upper != nullptr);
+      } else if (max_lower == nullptr) {
+        // Two symbolic invariants.
+        if (min_c != max_c) {
+          DCHECK(min_lower == nullptr && min_upper != nullptr &&
+                 max_lower == nullptr && max_upper != nullptr);
+          InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(min_upper, max_upper));
+        } else {
+          DCHECK(min_lower == nullptr && min_upper == nullptr &&
+                 max_lower == nullptr && max_upper != nullptr);
+        }
+      } else {
+        // General case, unit strides.
+        if (min_c != max_c) {
+          DCHECK(min_lower != nullptr && min_upper != nullptr &&
+                 max_lower != nullptr && max_upper != nullptr);
+          InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(min_lower, max_lower));
+        } else {
+          DCHECK(min_lower == nullptr && min_upper == nullptr &&
+                 max_lower != nullptr && max_upper != nullptr);
+        }
+        InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(max_lower, max_upper));
+      }
+      InsertDeoptInLoop(
+          loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(max_upper, array_length));
+    } else {
+      // TODO: if rejected, avoid doing this again for subsequent instructions in this set?
+    }
+  }
+
+  /**
+   * Returns true if heuristics indicate that dynamic bce may be profitable.
+   */
+  bool DynamicBCESeemsProfitable(HLoopInformation* loop, HBasicBlock* block) {
+    if (loop != nullptr) {
+      // The loop preheader of an irreducible loop does not dominate all the blocks in
+      // the loop. We would need to find the common dominator of all blocks in the loop.
+      if (loop->IsIrreducible()) {
+        return false;
+      }
+      // We should never deoptimize from an osr method, otherwise we might wrongly optimize
+      // code dominated by the deoptimization.
+      if (GetGraph()->IsCompilingOsr()) {
+        return false;
+      }
+      // A try boundary preheader is hard to handle.
+      // TODO: remove this restriction.
+      if (loop->GetPreHeader()->GetLastInstruction()->IsTryBoundary()) {
+        return false;
+      }
+      // Does loop have early-exits? If so, the full range may not be covered by the loop
+      // at runtime and testing the range may apply deoptimization unnecessarily.
+      if (IsEarlyExitLoop(loop)) {
+        return false;
+      }
+      // Does the current basic block dominate all back edges? If not,
+      // don't apply dynamic bce to something that may not be executed.
+      return loop->DominatesAllBackEdges(block);
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if the loop has early exits, which implies it may not cover
+   * the full range computed by range analysis based on induction variables.
+   */
+  bool IsEarlyExitLoop(HLoopInformation* loop) {
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    // If loop has been analyzed earlier for early-exit, don't repeat the analysis.
+    auto it = early_exit_loop_.find(loop_id);
+    if (it != early_exit_loop_.end()) {
+      return it->second;
+    }
+    // First time early-exit analysis for this loop. Since analysis requires scanning
+    // the full loop-body, results of the analysis is stored for subsequent queries.
+    HBlocksInLoopReversePostOrderIterator it_loop(*loop);
+    for (it_loop.Advance(); !it_loop.Done(); it_loop.Advance()) {
+      for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) {
+        if (!loop->Contains(*successor)) {
+          early_exit_loop_.Put(loop_id, true);
+          return true;
+        }
+      }
+    }
+    early_exit_loop_.Put(loop_id, false);
+    return false;
+  }
+
+  /**
+   * Returns true if the array length is already loop invariant, or can be made so
+   * by handling the null check under the hood of the array length operation.
+   */
+  bool CanHandleLength(HLoopInformation* loop, HInstruction* length, bool needs_taken_test) {
+    if (loop->IsDefinedOutOfTheLoop(length)) {
+      return true;
+    } else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) {
+      if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) {
+        HoistToPreHeaderOrDeoptBlock(loop, length);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if the null check is already loop invariant, or can be made so
+   * by generating a deoptimization test.
+   */
+  bool CanHandleNullCheck(HLoopInformation* loop, HInstruction* check, bool needs_taken_test) {
+    if (loop->IsDefinedOutOfTheLoop(check)) {
+      return true;
+    } else if (check->IsNullCheck() && check->GetBlock()->GetLoopInformation() == loop) {
+      HInstruction* array = check->InputAt(0);
+      if (loop->IsDefinedOutOfTheLoop(array)) {
+        // Generate: if (array == null) deoptimize;
+        TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+        HBasicBlock* block = GetPreHeader(loop, check);
+        HInstruction* cond =
+            new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant());
+        InsertDeoptInLoop(loop, block, cond);
+        ReplaceInstruction(check, array);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if compiler can apply dynamic bce to loops that may be infinite
+   * (e.g. for (int i = 0; i <= U; i++) with U = MAX_INT), which would invalidate
+   * the range analysis evaluation code by "overshooting" the computed range.
+   * Since deoptimization would be a bad choice, and there is no other version
+   * of the loop to use, dynamic bce in such cases is only allowed if other tests
+   * ensure the loop is finite.
+   */
+  bool CanHandleInfiniteLoop(HLoopInformation* loop, HInstruction* index, bool needs_infinite_test) {
+    if (needs_infinite_test) {
+      // If we already forced the loop to be finite, allow directly.
+      const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+      if (finite_loop_.find(loop_id) != finite_loop_.end()) {
+        return true;
+      }
+      // Otherwise, allow dynamic bce if the index (which is necessarily an induction at
+      // this point) is the direct loop index (viz. a[i]), since then the runtime tests
+      // ensure upper bound cannot cause an infinite loop.
+      HInstruction* control = loop->GetHeader()->GetLastInstruction();
+      if (control->IsIf()) {
+        HInstruction* if_expr = control->AsIf()->InputAt(0);
+        if (if_expr->IsCondition()) {
+          HCondition* condition = if_expr->AsCondition();
+          if (index == condition->InputAt(0) ||
+              index == condition->InputAt(1)) {
+            finite_loop_.insert(loop_id);
+            return true;
+          }
+        }
+      }
+      return false;
+    }
+    return true;
+  }
+
+  /**
+   * Returns appropriate preheader for the loop, depending on whether the
+   * instruction appears in the loop header or proper loop-body.
+   */
+  HBasicBlock* GetPreHeader(HLoopInformation* loop, HInstruction* instruction) {
+    // Use preheader unless there is an earlier generated deoptimization block since
+    // hoisted expressions may depend on and/or used by the deoptimization tests.
+    HBasicBlock* header = loop->GetHeader();
+    const uint32_t loop_id = header->GetBlockId();
+    auto it = taken_test_loop_.find(loop_id);
+    if (it != taken_test_loop_.end()) {
+      HBasicBlock* block = it->second;
+      // If always taken, keep it that way by returning the original preheader,
+      // which can be found by following the predecessor of the true-block twice.
+      if (instruction->GetBlock() == header) {
+        return block->GetSinglePredecessor()->GetSinglePredecessor();
+      }
+      return block;
+    }
+    return loop->GetPreHeader();
+  }
+
+  /** Inserts a deoptimization test in a loop preheader. */
+  void InsertDeoptInLoop(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
+    HInstruction* suspend = loop->GetSuspendCheck();
+    block->InsertInstructionBefore(condition, block->GetLastInstruction());
+    HDeoptimize* deoptimize =
+        new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc());
+    block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
+    if (suspend->HasEnvironment()) {
+      deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+          suspend->GetEnvironment(), loop->GetHeader());
+    }
+  }
+
+  /** Inserts a deoptimization test right before a bounds check. */
+  void InsertDeoptInBlock(HBoundsCheck* bounds_check, HInstruction* condition) {
+    HBasicBlock* block = bounds_check->GetBlock();
+    block->InsertInstructionBefore(condition, bounds_check);
+    HDeoptimize* deoptimize =
+        new (GetGraph()->GetArena()) HDeoptimize(condition, bounds_check->GetDexPc());
     block->InsertInstructionBefore(deoptimize, bounds_check);
     deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
   }
 
-  void AddComparesWithDeoptimization(HBasicBlock* block) {
-    for (ArenaSafeMap<int, HBoundsCheck*>::iterator it =
-             first_constant_index_bounds_check_map_.begin();
-         it != first_constant_index_bounds_check_map_.end();
-         ++it) {
-      HBoundsCheck* bounds_check = it->second;
-      HInstruction* array_length = bounds_check->InputAt(1);
-      if (!array_length->IsArrayLength()) {
-        // Prior deoptimizations may have changed the array length to a phi.
-        // TODO(mingyao): propagate the range to the phi?
-        DCHECK(array_length->IsPhi()) << array_length->DebugName();
-        continue;
-      }
-      HIntConstant* lower_bound_const_instr = nullptr;
-      int32_t lower_bound_const = std::numeric_limits<int32_t>::min();
-      size_t counter = 0;
-      // Count the constant indexing for which bounds checks haven't
-      // been removed yet.
-      for (HUseIterator<HInstruction*> it2(array_length->GetUses());
-           !it2.Done();
-           it2.Advance()) {
-        HInstruction* user = it2.Current()->GetUser();
-        if (user->GetBlock() == block &&
-            user->IsBoundsCheck() &&
-            user->AsBoundsCheck()->InputAt(0)->IsIntConstant()) {
-          DCHECK_EQ(array_length, user->AsBoundsCheck()->InputAt(1));
-          HIntConstant* const_instr = user->AsBoundsCheck()->InputAt(0)->AsIntConstant();
-          if (const_instr->GetValue() > lower_bound_const) {
-            lower_bound_const = const_instr->GetValue();
-            lower_bound_const_instr = const_instr;
+  /** Hoists instruction out of the loop to preheader or deoptimization block. */
+  void HoistToPreHeaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
+    HBasicBlock* block = GetPreHeader(loop, instruction);
+    DCHECK(!instruction->HasEnvironment());
+    instruction->MoveBefore(block->GetLastInstruction());
+  }
+
+  /**
+   * Adds a new taken-test structure to a loop if needed and not already done.
+   * The taken-test protects range analysis evaluation code to avoid any
+   * deoptimization caused by incorrect trip-count evaluation in non-taken loops.
+   *
+   *          old_preheader
+   *               |
+   *            if_block          <- taken-test protects deoptimization block
+   *            /      \
+   *     true_block  false_block  <- deoptimizations/invariants are placed in true_block
+   *            \       /
+   *          new_preheader       <- may require phi nodes to preserve SSA structure
+   *                |
+   *             header
+   *
+   * For example, this loop:
+   *
+   *   for (int i = lower; i < upper; i++) {
+   *     array[i] = 0;
+   *   }
+   *
+   * will be transformed to:
+   *
+   *   if (lower < upper) {
+   *     if (array == null) deoptimize;
+   *     array_length = array.length;
+   *     if (lower > upper)         deoptimize;  // unsigned
+   *     if (upper >= array_length) deoptimize;  // unsigned
+   *   } else {
+   *     array_length = 0;
+   *   }
+   *   for (int i = lower; i < upper; i++) {
+   *     // Loop without null check and bounds check, and any array.length replaced with array_length.
+   *     array[i] = 0;
+   *   }
+   */
+  void TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
+    // Not needed (can use preheader) or already done (can reuse)?
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    if (!needs_taken_test || taken_test_loop_.find(loop_id) != taken_test_loop_.end()) {
+      return;
+    }
+
+    // Generate top test structure.
+    HBasicBlock* header = loop->GetHeader();
+    GetGraph()->TransformLoopHeaderForBCE(header);
+    HBasicBlock* new_preheader = loop->GetPreHeader();
+    HBasicBlock* if_block = new_preheader->GetDominator();
+    HBasicBlock* true_block = if_block->GetSuccessors()[0];  // True successor.
+    HBasicBlock* false_block = if_block->GetSuccessors()[1];  // False successor.
+
+    // Goto instructions.
+    true_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+    false_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+    new_preheader->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+
+    // Insert the taken-test to see if the loop body is entered. If the
+    // loop isn't entered at all, it jumps around the deoptimization block.
+    if_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());  // placeholder
+    HInstruction* condition = nullptr;
+    induction_range_.GenerateTakenTest(header->GetLastInstruction(),
+                                       GetGraph(),
+                                       if_block,
+                                       &condition);
+    DCHECK(condition != nullptr);
+    if_block->RemoveInstruction(if_block->GetLastInstruction());
+    if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition));
+
+    taken_test_loop_.Put(loop_id, true_block);
+  }
+
+  /**
+   * Inserts phi nodes that preserve SSA structure in generated top test structures.
+   * All uses of instructions in the deoptimization block that reach the loop need
+   * a phi node in the new loop preheader to fix the dominance relation.
+   *
+   * Example:
+   *           if_block
+   *            /      \
+   *         x_0 = ..  false_block
+   *            \       /
+   *           x_1 = phi(x_0, null)   <- synthetic phi
+   *               |
+   *          new_preheader
+   */
+  void InsertPhiNodes() {
+    // Scan all new deoptimization blocks.
+    for (auto it1 = taken_test_loop_.begin(); it1 != taken_test_loop_.end(); ++it1) {
+      HBasicBlock* true_block = it1->second;
+      HBasicBlock* new_preheader = true_block->GetSingleSuccessor();
+      // Scan all instructions in a new deoptimization block.
+      for (HInstructionIterator it(true_block->GetInstructions()); !it.Done(); it.Advance()) {
+        HInstruction* instruction = it.Current();
+        Primitive::Type type = instruction->GetType();
+        HPhi* phi = nullptr;
+        // Scan all uses of an instruction and replace each later use with a phi node.
+        const HUseList<HInstruction*>& uses = instruction->GetUses();
+        for (auto it2 = uses.begin(), end2 = uses.end(); it2 != end2; /* ++it2 below */) {
+          HInstruction* user = it2->GetUser();
+          size_t index = it2->GetIndex();
+          // Increment `it2` now because `*it2` may disappear thanks to user->ReplaceInput().
+          ++it2;
+          if (user->GetBlock() != true_block) {
+            if (phi == nullptr) {
+              phi = NewPhi(new_preheader, instruction, type);
+            }
+            user->ReplaceInput(phi, index);  // Removes the use node from the list.
           }
-          counter++;
         }
-      }
-      if (counter >= kThresholdForAddingDeoptimize &&
-          lower_bound_const_instr->GetValue() <= kMaxConstantForAddingDeoptimize) {
-        AddCompareWithDeoptimization(array_length, lower_bound_const_instr, block);
+        // Scan all environment uses of an instruction and replace each later use with a phi node.
+        const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses();
+        for (auto it2 = env_uses.begin(), end2 = env_uses.end(); it2 != end2; /* ++it2 below */) {
+          HEnvironment* user = it2->GetUser();
+          size_t index = it2->GetIndex();
+          // Increment `it2` now because `*it2` may disappear thanks to user->RemoveAsUserOfInput().
+          ++it2;
+          if (user->GetHolder()->GetBlock() != true_block) {
+            if (phi == nullptr) {
+              phi = NewPhi(new_preheader, instruction, type);
+            }
+            user->RemoveAsUserOfInput(index);
+            user->SetRawEnvAt(index, phi);
+            phi->AddEnvUseAt(user, index);
+          }
+        }
       }
     }
   }
 
+  /**
+   * Construct a phi(instruction, 0) in the new preheader to fix the dominance relation.
+   * These are synthetic phi nodes without a virtual register.
+   */
+  HPhi* NewPhi(HBasicBlock* new_preheader,
+               HInstruction* instruction,
+               Primitive::Type type) {
+    HGraph* graph = GetGraph();
+    HInstruction* zero;
+    switch (type) {
+      case Primitive::kPrimNot: zero = graph->GetNullConstant(); break;
+      case Primitive::kPrimFloat: zero = graph->GetFloatConstant(0); break;
+      case Primitive::kPrimDouble: zero = graph->GetDoubleConstant(0); break;
+      default: zero = graph->GetConstant(type, 0); break;
+    }
+    HPhi* phi = new (graph->GetArena())
+        HPhi(graph->GetArena(), kNoRegNumber, /*number_of_inputs*/ 2, HPhi::ToPhiType(type));
+    phi->SetRawInputAt(0, instruction);
+    phi->SetRawInputAt(1, zero);
+    if (type == Primitive::kPrimNot) {
+      phi->SetReferenceTypeInfo(instruction->GetReferenceTypeInfo());
+    }
+    new_preheader->AddPhi(phi);
+    return phi;
+  }
+
+  /** Helper method to replace an instruction with another instruction. */
+  static void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) {
+    instruction->ReplaceWith(replacement);
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  }
+
+  // A set of maps, one per basic block, from instruction to range.
   ArenaVector<ArenaSafeMap<int, ValueRange*>> maps_;
 
-  // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in
-  // a block that checks a constant index against that HArrayLength.
-  ArenaSafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_;
+  // Map an HArrayLength instruction's id to the first HBoundsCheck instruction
+  // in a block that checks an index against that HArrayLength.
+  ArenaSafeMap<int, HBoundsCheck*> first_index_bounds_check_map_;
 
-  // For the block, there is at least one HArrayLength instruction for which there
-  // is more than one bounds check instruction with constant indexing. And it's
-  // beneficial to add a compare instruction that has deoptimization fallback and
-  // eliminate those bounds checks.
-  bool need_to_revisit_block_;
+  // Early-exit loop bookkeeping.
+  ArenaSafeMap<uint32_t, bool> early_exit_loop_;
+
+  // Taken-test loop bookkeeping.
+  ArenaSafeMap<uint32_t, HBasicBlock*> taken_test_loop_;
+
+  // Finite loop bookkeeping.
+  ArenaSet<uint32_t> finite_loop_;
+
+  // Flag that denotes whether dominator-based dynamic elimination has occurred.
+  bool has_dom_based_dynamic_bce_;
 
   // Initial number of blocks.
   uint32_t initial_block_size_;
 
+  // Side effects.
+  const SideEffectsAnalysis& side_effects_;
+
   // Range analysis based on induction variables.
   InductionVarRange induction_range_;
 
@@ -1870,30 +1837,28 @@
     return;
   }
 
-  BCEVisitor visitor(graph_, induction_analysis_);
   // Reverse post order guarantees a node's dominators are visited first.
   // We want to visit in the dominator-based order since if a value is known to
   // be bounded by a range at one instruction, it must be true that all uses of
   // that value dominated by that instruction fits in that range. Range of that
   // value can be narrowed further down in the dominator tree.
-  //
-  // TODO: only visit blocks that dominate some array accesses.
-  HBasicBlock* last_visited_block = nullptr;
+  BCEVisitor visitor(graph_, side_effects_, induction_analysis_);
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* current = it.Current();
-    if (current == last_visited_block) {
-      // We may insert blocks into the reverse post order list when processing
-      // a loop header. Don't process it again.
-      DCHECK(current->IsLoopHeader());
-      continue;
-    }
     if (visitor.IsAddedBlock(current)) {
       // Skip added blocks. Their effects are already taken care of.
       continue;
     }
     visitor.VisitBasicBlock(current);
-    last_visited_block = current;
+    // Skip forward to the current block in case new basic blocks were inserted
+    // (which always appear earlier in reverse post order) to avoid visiting the
+    // same basic block twice.
+    for ( ; !it.Done() && it.Current() != current; it.Advance()) {
+    }
   }
+
+  // Perform cleanup.
+  visitor.Finish();
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h
index cdff3ca..6dc5320 100644
--- a/compiler/optimizing/bounds_check_elimination.h
+++ b/compiler/optimizing/bounds_check_elimination.h
@@ -21,19 +21,24 @@
 
 namespace art {
 
+class SideEffectsAnalysis;
 class HInductionVarAnalysis;
 
 class BoundsCheckElimination : public HOptimization {
  public:
-  BoundsCheckElimination(HGraph* graph, HInductionVarAnalysis* induction_analysis)
-      : HOptimization(graph, kBoundsCheckEliminiationPassName),
+  BoundsCheckElimination(HGraph* graph,
+                         const SideEffectsAnalysis& side_effects,
+                         HInductionVarAnalysis* induction_analysis)
+      : HOptimization(graph, kBoundsCheckEliminationPassName),
+        side_effects_(side_effects),
         induction_analysis_(induction_analysis) {}
 
   void Run() OVERRIDE;
 
-  static constexpr const char* kBoundsCheckEliminiationPassName = "BCE";
+  static constexpr const char* kBoundsCheckEliminationPassName = "BCE";
 
  private:
+  const SideEffectsAnalysis& side_effects_;
   HInductionVarAnalysis* induction_analysis_;
 
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckElimination);
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index c9afdf2..b7c24ff 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -42,7 +42,6 @@
 
   void RunBCE() {
     graph_->BuildDominatorTree();
-    graph_->AnalyzeNaturalLoops();
 
     InstructionSimplifier(graph_).Run();
 
@@ -54,7 +53,7 @@
     HInductionVarAnalysis induction(graph_);
     induction.Run();
 
-    BoundsCheckElimination(graph_, &induction).Run();
+    BoundsCheckElimination(graph_, side_effects, &induction).Run();
   }
 
   ArenaPool pool_;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 8ca352f..86742e6 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -17,253 +17,52 @@
 #include "builder.h"
 
 #include "art_field-inl.h"
+#include "base/arena_bit_vector.h"
+#include "base/bit_vector-inl.h"
 #include "base/logging.h"
-#include "class_linker.h"
 #include "dex/verified_method.h"
-#include "dex_file-inl.h"
-#include "dex_instruction-inl.h"
-#include "dex/verified_method.h"
-#include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "nodes.h"
 #include "primitive.h"
-#include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 
-/**
- * Helper class to add HTemporary instructions. This class is used when
- * converting a DEX instruction to multiple HInstruction, and where those
- * instructions do not die at the following instruction, but instead spans
- * multiple instructions.
- */
-class Temporaries : public ValueObject {
- public:
-  explicit Temporaries(HGraph* graph) : graph_(graph), index_(0) {}
-
-  void Add(HInstruction* instruction) {
-    HInstruction* temp = new (graph_->GetArena()) HTemporary(index_, instruction->GetDexPc());
-    instruction->GetBlock()->AddInstruction(temp);
-
-    DCHECK(temp->GetPrevious() == instruction);
-
-    size_t offset;
-    if (instruction->GetType() == Primitive::kPrimLong
-        || instruction->GetType() == Primitive::kPrimDouble) {
-      offset = 2;
-    } else {
-      offset = 1;
-    }
-    index_ += offset;
-
-    graph_->UpdateTemporariesVRegSlots(index_);
-  }
-
- private:
-  HGraph* const graph_;
-
-  // Current index in the temporary stack, updated by `Add`.
-  size_t index_;
-};
-
-class SwitchTable : public ValueObject {
- public:
-  SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse)
-      : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) {
-    int32_t table_offset = instruction.VRegB_31t();
-    const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
-    if (sparse) {
-      CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
-    } else {
-      CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
-    }
-    num_entries_ = table[1];
-    values_ = reinterpret_cast<const int32_t*>(&table[2]);
-  }
-
-  uint16_t GetNumEntries() const {
-    return num_entries_;
-  }
-
-  void CheckIndex(size_t index) const {
-    if (sparse_) {
-      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
-      DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
-    } else {
-      // In a packed table, we have the starting key and num_entries_ values.
-      DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
-    }
-  }
-
-  int32_t GetEntryAt(size_t index) const {
-    CheckIndex(index);
-    return values_[index];
-  }
-
-  uint32_t GetDexPcForIndex(size_t index) const {
-    CheckIndex(index);
-    return dex_pc_ +
-        (reinterpret_cast<const int16_t*>(values_ + index) -
-         reinterpret_cast<const int16_t*>(&instruction_));
-  }
-
-  // Index of the first value in the table.
-  size_t GetFirstValueIndex() const {
-    if (sparse_) {
-      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
-      return num_entries_;
-    } else {
-      // In a packed table, we have the starting key and num_entries_ values.
-      return 1;
-    }
-  }
-
- private:
-  const Instruction& instruction_;
-  const uint32_t dex_pc_;
-
-  // Whether this is a sparse-switch table (or a packed-switch one).
-  const bool sparse_;
-
-  // This can't be const as it needs to be computed off of the given instruction, and complicated
-  // expressions in the initializer list seemed very ugly.
-  uint16_t num_entries_;
-
-  const int32_t* values_;
-
-  DISALLOW_COPY_AND_ASSIGN(SwitchTable);
-};
-
-void HGraphBuilder::InitializeLocals(uint16_t count) {
-  graph_->SetNumberOfVRegs(count);
-  locals_.resize(count);
-  for (int i = 0; i < count; i++) {
-    HLocal* local = new (arena_) HLocal(i);
-    entry_block_->AddInstruction(local);
-    locals_[i] = local;
-  }
-}
-
-void HGraphBuilder::InitializeParameters(uint16_t number_of_parameters) {
-  // dex_compilation_unit_ is null only when unit testing.
-  if (dex_compilation_unit_ == nullptr) {
-    return;
-  }
-
-  graph_->SetNumberOfInVRegs(number_of_parameters);
-  const char* shorty = dex_compilation_unit_->GetShorty();
-  int locals_index = locals_.size() - number_of_parameters;
-  int parameter_index = 0;
-
-  const DexFile::MethodId& referrer_method_id =
-      dex_file_->GetMethodId(dex_compilation_unit_->GetDexMethodIndex());
-  if (!dex_compilation_unit_->IsStatic()) {
-    // Add the implicit 'this' argument, not expressed in the signature.
-    HParameterValue* parameter = new (arena_) HParameterValue(*dex_file_,
-                                                              referrer_method_id.class_idx_,
-                                                              parameter_index++,
-                                                              Primitive::kPrimNot,
-                                                              true);
-    entry_block_->AddInstruction(parameter);
-    HLocal* local = GetLocalAt(locals_index++);
-    entry_block_->AddInstruction(new (arena_) HStoreLocal(local, parameter, local->GetDexPc()));
-    number_of_parameters--;
-  }
-
-  const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
-  const DexFile::TypeList* arg_types = dex_file_->GetProtoParameters(proto);
-  for (int i = 0, shorty_pos = 1; i < number_of_parameters; i++) {
-    HParameterValue* parameter = new (arena_) HParameterValue(
-        *dex_file_,
-        arg_types->GetTypeItem(shorty_pos - 1).type_idx_,
-        parameter_index++,
-        Primitive::GetType(shorty[shorty_pos]),
-        false);
-    ++shorty_pos;
-    entry_block_->AddInstruction(parameter);
-    HLocal* local = GetLocalAt(locals_index++);
-    // Store the parameter value in the local that the dex code will use
-    // to reference that parameter.
-    entry_block_->AddInstruction(new (arena_) HStoreLocal(local, parameter, local->GetDexPc()));
-    bool is_wide = (parameter->GetType() == Primitive::kPrimLong)
-        || (parameter->GetType() == Primitive::kPrimDouble);
-    if (is_wide) {
-      i++;
-      locals_index++;
-      parameter_index++;
-    }
-  }
-}
-
-template<typename T>
-void HGraphBuilder::If_22t(const Instruction& instruction, uint32_t dex_pc) {
-  int32_t target_offset = instruction.GetTargetOffset();
-  HBasicBlock* branch_target = FindBlockStartingAt(dex_pc + target_offset);
-  HBasicBlock* fallthrough_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
-  DCHECK(branch_target != nullptr);
-  DCHECK(fallthrough_target != nullptr);
-  PotentiallyAddSuspendCheck(branch_target, dex_pc);
-  HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-  T* comparison = new (arena_) T(first, second, dex_pc);
-  current_block_->AddInstruction(comparison);
-  HInstruction* ifinst = new (arena_) HIf(comparison, dex_pc);
-  current_block_->AddInstruction(ifinst);
-  current_block_->AddSuccessor(branch_target);
-  current_block_->AddSuccessor(fallthrough_target);
-  current_block_ = nullptr;
-}
-
-template<typename T>
-void HGraphBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) {
-  int32_t target_offset = instruction.GetTargetOffset();
-  HBasicBlock* branch_target = FindBlockStartingAt(dex_pc + target_offset);
-  HBasicBlock* fallthrough_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
-  DCHECK(branch_target != nullptr);
-  DCHECK(fallthrough_target != nullptr);
-  PotentiallyAddSuspendCheck(branch_target, dex_pc);
-  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc);
-  T* comparison = new (arena_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc);
-  current_block_->AddInstruction(comparison);
-  HInstruction* ifinst = new (arena_) HIf(comparison, dex_pc);
-  current_block_->AddInstruction(ifinst);
-  current_block_->AddSuccessor(branch_target);
-  current_block_->AddSuccessor(fallthrough_target);
-  current_block_ = nullptr;
-}
-
 void HGraphBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
   if (compilation_stats_ != nullptr) {
     compilation_stats_->RecordStat(compilation_stat);
   }
 }
 
-bool HGraphBuilder::SkipCompilation(const DexFile::CodeItem& code_item,
-                                    size_t number_of_branches) {
-  const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
-  CompilerOptions::CompilerFilter compiler_filter = compiler_options.GetCompilerFilter();
-  if (compiler_filter == CompilerOptions::kEverything) {
+bool HGraphBuilder::SkipCompilation(size_t number_of_branches) {
+  if (compiler_driver_ == nullptr) {
+    // Note that the compiler driver is null when unit testing.
     return false;
   }
 
-  if (compiler_options.IsHugeMethod(code_item.insns_size_in_code_units_)) {
+  const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
+  CompilerFilter::Filter compiler_filter = compiler_options.GetCompilerFilter();
+  if (compiler_filter == CompilerFilter::kEverything) {
+    return false;
+  }
+
+  if (compiler_options.IsHugeMethod(code_item_.insns_size_in_code_units_)) {
     VLOG(compiler) << "Skip compilation of huge method "
                    << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                   << ": " << code_item.insns_size_in_code_units_ << " code units";
+                   << ": " << code_item_.insns_size_in_code_units_ << " code units";
     MaybeRecordStat(MethodCompilationStat::kNotCompiledHugeMethod);
     return true;
   }
 
   // If it's large and contains no branches, it's likely to be machine generated initialization.
-  if (compiler_options.IsLargeMethod(code_item.insns_size_in_code_units_)
+  if (compiler_options.IsLargeMethod(code_item_.insns_size_in_code_units_)
       && (number_of_branches == 0)) {
     VLOG(compiler) << "Skip compilation of large method with no branch "
                    << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                   << ": " << code_item.insns_size_in_code_units_ << " code units";
+                   << ": " << code_item_.insns_size_in_code_units_ << " code units";
     MaybeRecordStat(MethodCompilationStat::kNotCompiledLargeMethodNoBranches);
     return true;
   }
@@ -271,2649 +70,39 @@
   return false;
 }
 
-void HGraphBuilder::CreateBlocksForTryCatch(const DexFile::CodeItem& code_item) {
-  if (code_item.tries_size_ == 0) {
-    return;
-  }
-
-  // Create branch targets at the start/end of the TryItem range. These are
-  // places where the program might fall through into/out of the a block and
-  // where TryBoundary instructions will be inserted later. Other edges which
-  // enter/exit the try blocks are a result of branches/switches.
-  for (size_t idx = 0; idx < code_item.tries_size_; ++idx) {
-    const DexFile::TryItem* try_item = DexFile::GetTryItems(code_item, idx);
-    uint32_t dex_pc_start = try_item->start_addr_;
-    uint32_t dex_pc_end = dex_pc_start + try_item->insn_count_;
-    FindOrCreateBlockStartingAt(dex_pc_start);
-    if (dex_pc_end < code_item.insns_size_in_code_units_) {
-      // TODO: Do not create block if the last instruction cannot fall through.
-      FindOrCreateBlockStartingAt(dex_pc_end);
-    } else {
-      // The TryItem spans until the very end of the CodeItem (or beyond if
-      // invalid) and therefore cannot have any code afterwards.
-    }
-  }
-
-  // Create branch targets for exception handlers.
-  const uint8_t* handlers_ptr = DexFile::GetCatchHandlerData(code_item, 0);
-  uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr);
-  for (uint32_t idx = 0; idx < handlers_size; ++idx) {
-    CatchHandlerIterator iterator(handlers_ptr);
-    for (; iterator.HasNext(); iterator.Next()) {
-      uint32_t address = iterator.GetHandlerAddress();
-      HBasicBlock* block = FindOrCreateBlockStartingAt(address);
-      block->SetTryCatchInformation(
-        new (arena_) TryCatchInformation(iterator.GetHandlerTypeIndex(), *dex_file_));
-    }
-    handlers_ptr = iterator.EndDataPointer();
-  }
-}
-
-// Returns the TryItem stored for `block` or nullptr if there is no info for it.
-static const DexFile::TryItem* GetTryItem(
-    HBasicBlock* block,
-    const ArenaSafeMap<uint32_t, const DexFile::TryItem*>& try_block_info) {
-  auto iterator = try_block_info.find(block->GetBlockId());
-  return (iterator == try_block_info.end()) ? nullptr : iterator->second;
-}
-
-void HGraphBuilder::LinkToCatchBlocks(HTryBoundary* try_boundary,
-                                      const DexFile::CodeItem& code_item,
-                                      const DexFile::TryItem* try_item) {
-  for (CatchHandlerIterator it(code_item, *try_item); it.HasNext(); it.Next()) {
-    try_boundary->AddExceptionHandler(FindBlockStartingAt(it.GetHandlerAddress()));
-  }
-}
-
-void HGraphBuilder::InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item) {
-  if (code_item.tries_size_ == 0) {
-    return;
-  }
-
-  // Keep a map of all try blocks and their respective TryItems. We do not use
-  // the block's pointer but rather its id to ensure deterministic iteration.
-  ArenaSafeMap<uint32_t, const DexFile::TryItem*> try_block_info(
-      std::less<uint32_t>(), arena_->Adapter(kArenaAllocGraphBuilder));
-
-  // Obtain TryItem information for blocks with throwing instructions, and split
-  // blocks which are both try & catch to simplify the graph.
-  // NOTE: We are appending new blocks inside the loop, so we need to use index
-  // because iterators can be invalidated. We remember the initial size to avoid
-  // iterating over the new blocks which cannot throw.
-  for (size_t i = 0, e = graph_->GetBlocks().size(); i < e; ++i) {
-    HBasicBlock* block = graph_->GetBlocks()[i];
-
-    // Do not bother creating exceptional edges for try blocks which have no
-    // throwing instructions. In that case we simply assume that the block is
-    // not covered by a TryItem. This prevents us from creating a throw-catch
-    // loop for synchronized blocks.
-    if (block->HasThrowingInstructions()) {
-      // Try to find a TryItem covering the block.
-      DCHECK_NE(block->GetDexPc(), kNoDexPc) << "Block must have a dec_pc to find its TryItem.";
-      const int32_t try_item_idx = DexFile::FindTryItem(code_item, block->GetDexPc());
-      if (try_item_idx != -1) {
-        // Block throwing and in a TryItem. Store the try block information.
-        HBasicBlock* throwing_block = block;
-        if (block->IsCatchBlock()) {
-          // Simplify blocks which are both try and catch, otherwise we would
-          // need a strategy for splitting exceptional edges. We split the block
-          // after the move-exception (if present) and mark the first part not
-          // throwing. The normal-flow edge between them will be split later.
-          HInstruction* first_insn = block->GetFirstInstruction();
-          if (first_insn->IsLoadException()) {
-            // Catch block starts with a LoadException. Split the block after
-            // the StoreLocal and ClearException which must come after the load.
-            DCHECK(first_insn->GetNext()->IsStoreLocal());
-            DCHECK(first_insn->GetNext()->GetNext()->IsClearException());
-            throwing_block = block->SplitBefore(first_insn->GetNext()->GetNext()->GetNext());
-          } else {
-            // Catch block does not load the exception. Split at the beginning
-            // to create an empty catch block.
-            throwing_block = block->SplitBefore(first_insn);
-          }
-        }
-
-        try_block_info.Put(throwing_block->GetBlockId(),
-                           DexFile::GetTryItems(code_item, try_item_idx));
-      }
-    }
-  }
-
-  // Do a pass over the try blocks and insert entering TryBoundaries where at
-  // least one predecessor is not covered by the same TryItem as the try block.
-  // We do not split each edge separately, but rather create one boundary block
-  // that all predecessors are relinked to. This preserves loop headers (b/23895756).
-  for (auto entry : try_block_info) {
-    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
-    for (HBasicBlock* predecessor : try_block->GetPredecessors()) {
-      if (GetTryItem(predecessor, try_block_info) != entry.second) {
-        // Found a predecessor not covered by the same TryItem. Insert entering
-        // boundary block.
-        HTryBoundary* try_entry =
-            new (arena_) HTryBoundary(HTryBoundary::kEntry, try_block->GetDexPc());
-        try_block->CreateImmediateDominator()->AddInstruction(try_entry);
-        LinkToCatchBlocks(try_entry, code_item, entry.second);
-        break;
-      }
-    }
-  }
-
-  // Do a second pass over the try blocks and insert exit TryBoundaries where
-  // the successor is not in the same TryItem.
-  for (auto entry : try_block_info) {
-    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
-    // NOTE: Do not use iterators because SplitEdge would invalidate them.
-    for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) {
-      HBasicBlock* successor = try_block->GetSuccessors()[i];
-
-      // If the successor is a try block, all of its predecessors must be
-      // covered by the same TryItem. Otherwise the previous pass would have
-      // created a non-throwing boundary block.
-      if (GetTryItem(successor, try_block_info) != nullptr) {
-        DCHECK_EQ(entry.second, GetTryItem(successor, try_block_info));
-        continue;
-      }
-
-      // Preserve the invariant that Return(Void) always jumps to Exit by moving
-      // it outside the try block if necessary.
-      HInstruction* last_instruction = try_block->GetLastInstruction();
-      if (last_instruction->IsReturn() || last_instruction->IsReturnVoid()) {
-        DCHECK_EQ(successor, exit_block_);
-        successor = try_block->SplitBefore(last_instruction);
-      }
-
-      // Insert TryBoundary and link to catch blocks.
-      HTryBoundary* try_exit =
-          new (arena_) HTryBoundary(HTryBoundary::kExit, successor->GetDexPc());
-      graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit);
-      LinkToCatchBlocks(try_exit, code_item, entry.second);
-    }
-  }
-}
-
-bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) {
+GraphAnalysisResult HGraphBuilder::BuildGraph() {
   DCHECK(graph_->GetBlocks().empty());
 
-  const uint16_t* code_ptr = code_item.insns_;
-  const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
-  code_start_ = code_ptr;
+  graph_->SetNumberOfVRegs(code_item_.registers_size_);
+  graph_->SetNumberOfInVRegs(code_item_.ins_size_);
+  graph_->SetMaximumNumberOfOutVRegs(code_item_.outs_size_);
+  graph_->SetHasTryCatch(code_item_.tries_size_ != 0);
 
-  // Setup the graph with the entry block and exit block.
-  entry_block_ = new (arena_) HBasicBlock(graph_, 0);
-  graph_->AddBlock(entry_block_);
-  exit_block_ = new (arena_) HBasicBlock(graph_, kNoDexPc);
-  graph_->SetEntryBlock(entry_block_);
-  graph_->SetExitBlock(exit_block_);
-
-  graph_->SetHasTryCatch(code_item.tries_size_ != 0);
-
-  InitializeLocals(code_item.registers_size_);
-  graph_->SetMaximumNumberOfOutVRegs(code_item.outs_size_);
-
-  // Compute the number of dex instructions, blocks, and branches. We will
-  // check these values against limits given to the compiler.
-  size_t number_of_branches = 0;
-
-  // To avoid splitting blocks, we compute ahead of time the instructions that
-  // start a new block, and create these blocks.
-  if (!ComputeBranchTargets(code_ptr, code_end, &number_of_branches)) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledBranchOutsideMethodCode);
-    return false;
+  // 1) Create basic blocks and link them together. Basic blocks are left
+  //    unpopulated with the exception of synthetic blocks, e.g. HTryBoundaries.
+  if (!block_builder_.Build()) {
+    return kAnalysisInvalidBytecode;
   }
 
-  // Note that the compiler driver is null when unit testing.
-  if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) {
-    return false;
+  // 2) Decide whether to skip this method based on its code size and number
+  //    of branches.
+  if (SkipCompilation(block_builder_.GetNumberOfBranches())) {
+    return kAnalysisSkipped;
   }
 
-  CreateBlocksForTryCatch(code_item);
-
-  InitializeParameters(code_item.ins_size_);
-
-  size_t dex_pc = 0;
-  while (code_ptr < code_end) {
-    // Update the current block if dex_pc starts a new block.
-    MaybeUpdateCurrentBlock(dex_pc);
-    const Instruction& instruction = *Instruction::At(code_ptr);
-    if (!AnalyzeDexInstruction(instruction, dex_pc)) {
-      return false;
-    }
-    dex_pc += instruction.SizeInCodeUnits();
-    code_ptr += instruction.SizeInCodeUnits();
+  // 3) Build the dominator tree and fill in loop and try/catch metadata.
+  GraphAnalysisResult result = graph_->BuildDominatorTree();
+  if (result != kAnalysisSuccess) {
+    return result;
   }
 
-  // Add Exit to the exit block.
-  exit_block_->AddInstruction(new (arena_) HExit());
-  // Add the suspend check to the entry block.
-  entry_block_->AddInstruction(new (arena_) HSuspendCheck(0));
-  entry_block_->AddInstruction(new (arena_) HGoto());
-  // Add the exit block at the end.
-  graph_->AddBlock(exit_block_);
-
-  // Iterate over blocks covered by TryItems and insert TryBoundaries at entry
-  // and exit points. This requires all control-flow instructions and
-  // non-exceptional edges to have been created.
-  InsertTryBoundaryBlocks(code_item);
-
-  return true;
-}
-
-void HGraphBuilder::MaybeUpdateCurrentBlock(size_t dex_pc) {
-  HBasicBlock* block = FindBlockStartingAt(dex_pc);
-  if (block == nullptr) {
-    return;
+  // 4) Populate basic blocks with instructions.
+  if (!instruction_builder_.Build()) {
+    return kAnalysisInvalidBytecode;
   }
 
-  if (current_block_ != nullptr) {
-    // Branching instructions clear current_block, so we know
-    // the last instruction of the current block is not a branching
-    // instruction. We add an unconditional goto to the found block.
-    current_block_->AddInstruction(new (arena_) HGoto(dex_pc));
-    current_block_->AddSuccessor(block);
-  }
-  graph_->AddBlock(block);
-  current_block_ = block;
-}
-
-bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
-                                         const uint16_t* code_end,
-                                         size_t* number_of_branches) {
-  branch_targets_.resize(code_end - code_ptr, nullptr);
-
-  // Create the first block for the dex instructions, single successor of the entry block.
-  HBasicBlock* block = new (arena_) HBasicBlock(graph_, 0);
-  branch_targets_[0] = block;
-  entry_block_->AddSuccessor(block);
-
-  // Iterate over all instructions and find branching instructions. Create blocks for
-  // the locations these instructions branch to.
-  uint32_t dex_pc = 0;
-  while (code_ptr < code_end) {
-    const Instruction& instruction = *Instruction::At(code_ptr);
-    if (instruction.IsBranch()) {
-      (*number_of_branches)++;
-      int32_t target = instruction.GetTargetOffset() + dex_pc;
-      // Create a block for the target instruction.
-      FindOrCreateBlockStartingAt(target);
-
-      dex_pc += instruction.SizeInCodeUnits();
-      code_ptr += instruction.SizeInCodeUnits();
-
-      if (instruction.CanFlowThrough()) {
-        if (code_ptr >= code_end) {
-          // In the normal case we should never hit this but someone can artificially forge a dex
-          // file to fall-through out the method code. In this case we bail out compilation.
-          return false;
-        } else {
-          FindOrCreateBlockStartingAt(dex_pc);
-        }
-      }
-    } else if (instruction.IsSwitch()) {
-      SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH);
-
-      uint16_t num_entries = table.GetNumEntries();
-
-      // In a packed-switch, the entry at index 0 is the starting key. In a sparse-switch, the
-      // entry at index 0 is the first key, and values are after *all* keys.
-      size_t offset = table.GetFirstValueIndex();
-
-      // Use a larger loop counter type to avoid overflow issues.
-      for (size_t i = 0; i < num_entries; ++i) {
-        // The target of the case.
-        uint32_t target = dex_pc + table.GetEntryAt(i + offset);
-        FindOrCreateBlockStartingAt(target);
-
-        // Create a block for the switch-case logic. The block gets the dex_pc
-        // of the SWITCH instruction because it is part of its semantics.
-        block = new (arena_) HBasicBlock(graph_, dex_pc);
-        branch_targets_[table.GetDexPcForIndex(i)] = block;
-      }
-
-      // Fall-through. Add a block if there is more code afterwards.
-      dex_pc += instruction.SizeInCodeUnits();
-      code_ptr += instruction.SizeInCodeUnits();
-      if (code_ptr >= code_end) {
-        // In the normal case we should never hit this but someone can artificially forge a dex
-        // file to fall-through out the method code. In this case we bail out compilation.
-        // (A switch can fall-through so we don't need to check CanFlowThrough().)
-        return false;
-      } else {
-        FindOrCreateBlockStartingAt(dex_pc);
-      }
-    } else {
-      code_ptr += instruction.SizeInCodeUnits();
-      dex_pc += instruction.SizeInCodeUnits();
-    }
-  }
-  return true;
-}
-
-HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t dex_pc) const {
-  DCHECK_GE(dex_pc, 0);
-  return branch_targets_[dex_pc];
-}
-
-HBasicBlock* HGraphBuilder::FindOrCreateBlockStartingAt(int32_t dex_pc) {
-  HBasicBlock* block = FindBlockStartingAt(dex_pc);
-  if (block == nullptr) {
-    block = new (arena_) HBasicBlock(graph_, dex_pc);
-    branch_targets_[dex_pc] = block;
-  }
-  return block;
-}
-
-template<typename T>
-void HGraphBuilder::Unop_12x(const Instruction& instruction,
-                             Primitive::Type type,
-                             uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), type, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-void HGraphBuilder::Conversion_12x(const Instruction& instruction,
-                                   Primitive::Type input_type,
-                                   Primitive::Type result_type,
-                                   uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), input_type, dex_pc);
-  current_block_->AddInstruction(new (arena_) HTypeConversion(result_type, first, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_23x(const Instruction& instruction,
-                              Primitive::Type type,
-                              uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegC(), type, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_23x_shift(const Instruction& instruction,
-                                    Primitive::Type type,
-                                    uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegC(), Primitive::kPrimInt, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-void HGraphBuilder::Binop_23x_cmp(const Instruction& instruction,
-                                  Primitive::Type type,
-                                  ComparisonBias bias,
-                                  uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegC(), type, dex_pc);
-  current_block_->AddInstruction(new (arena_) HCompare(type, first, second, bias, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_12x_shift(const Instruction& instruction, Primitive::Type type,
-                                    uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegA(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_12x(const Instruction& instruction,
-                              Primitive::Type type,
-                              uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegA(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegB(), type, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-  HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22s(), dex_pc);
-  if (reverse) {
-    std::swap(first, second);
-  }
-  current_block_->AddInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-  HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22b(), dex_pc);
-  if (reverse) {
-    std::swap(first, second);
-  }
-  current_block_->AddInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, const CompilerDriver& driver) {
-  Thread* self = Thread::Current();
-  return cu->IsConstructor()
-      && driver.RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex());
-}
-
-void HGraphBuilder::BuildReturn(const Instruction& instruction,
-                                Primitive::Type type,
-                                uint32_t dex_pc) {
-  if (type == Primitive::kPrimVoid) {
-    if (graph_->ShouldGenerateConstructorBarrier()) {
-      // The compilation unit is null during testing.
-      if (dex_compilation_unit_ != nullptr) {
-        DCHECK(RequiresConstructorBarrier(dex_compilation_unit_, *compiler_driver_))
-          << "Inconsistent use of ShouldGenerateConstructorBarrier. Should not generate a barrier.";
-      }
-      current_block_->AddInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc));
-    }
-    current_block_->AddInstruction(new (arena_) HReturnVoid(dex_pc));
-  } else {
-    HInstruction* value = LoadLocal(instruction.VRegA(), type, dex_pc);
-    current_block_->AddInstruction(new (arena_) HReturn(value, dex_pc));
-  }
-  current_block_->AddSuccessor(exit_block_);
-  current_block_ = nullptr;
-}
-
-static InvokeType GetInvokeTypeFromOpCode(Instruction::Code opcode) {
-  switch (opcode) {
-    case Instruction::INVOKE_STATIC:
-    case Instruction::INVOKE_STATIC_RANGE:
-      return kStatic;
-    case Instruction::INVOKE_DIRECT:
-    case Instruction::INVOKE_DIRECT_RANGE:
-      return kDirect;
-    case Instruction::INVOKE_VIRTUAL:
-    case Instruction::INVOKE_VIRTUAL_QUICK:
-    case Instruction::INVOKE_VIRTUAL_RANGE:
-    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
-      return kVirtual;
-    case Instruction::INVOKE_INTERFACE:
-    case Instruction::INVOKE_INTERFACE_RANGE:
-      return kInterface;
-    case Instruction::INVOKE_SUPER_RANGE:
-    case Instruction::INVOKE_SUPER:
-      return kSuper;
-    default:
-      LOG(FATAL) << "Unexpected invoke opcode: " << opcode;
-      UNREACHABLE();
-  }
-}
-
-bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
-                                uint32_t dex_pc,
-                                uint32_t method_idx,
-                                uint32_t number_of_vreg_arguments,
-                                bool is_range,
-                                uint32_t* args,
-                                uint32_t register_index) {
-  InvokeType original_invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
-  InvokeType optimized_invoke_type = original_invoke_type;
-  const char* descriptor = dex_file_->GetMethodShorty(method_idx);
-  Primitive::Type return_type = Primitive::GetType(descriptor[0]);
-
-  // Remove the return type from the 'proto'.
-  size_t number_of_arguments = strlen(descriptor) - 1;
-  if (original_invoke_type != kStatic) {  // instance call
-    // One extra argument for 'this'.
-    number_of_arguments++;
-  }
-
-  MethodReference target_method(dex_file_, method_idx);
-  int32_t table_index = 0;
-  uintptr_t direct_code = 0;
-  uintptr_t direct_method = 0;
-
-  // Special handling for string init.
-  int32_t string_init_offset = 0;
-  bool is_string_init = compiler_driver_->IsStringInit(method_idx,
-                                                       dex_file_,
-                                                       &string_init_offset);
-  // Replace calls to String.<init> with StringFactory.
-  if (is_string_init) {
-    HInvokeStaticOrDirect::DispatchInfo dispatch_info = ComputeDispatchInfo(is_string_init,
-                                                                            string_init_offset,
-                                                                            target_method,
-                                                                            direct_method,
-                                                                            direct_code);
-    HInvoke* invoke = new (arena_) HInvokeStaticOrDirect(
-        arena_,
-        number_of_arguments - 1,
-        Primitive::kPrimNot /*return_type */,
-        dex_pc,
-        method_idx,
-        target_method,
-        dispatch_info,
-        original_invoke_type,
-        kStatic /* optimized_invoke_type */,
-        HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
-    return HandleStringInit(invoke,
-                            number_of_vreg_arguments,
-                            args,
-                            register_index,
-                            is_range,
-                            descriptor);
-  }
-
-  // Handle unresolved methods.
-  if (!compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_,
-                                           dex_pc,
-                                           true /* update_stats */,
-                                           true /* enable_devirtualization */,
-                                           &optimized_invoke_type,
-                                           &target_method,
-                                           &table_index,
-                                           &direct_code,
-                                           &direct_method)) {
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
-    HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
-                                                     number_of_arguments,
-                                                     return_type,
-                                                     dex_pc,
-                                                     method_idx,
-                                                     original_invoke_type);
-    return HandleInvoke(invoke,
-                        number_of_vreg_arguments,
-                        args,
-                        register_index,
-                        is_range,
-                        descriptor,
-                        nullptr /* clinit_check */);
-  }
-
-  // Handle resolved methods (non string init).
-
-  DCHECK(optimized_invoke_type != kSuper);
-
-  // Potential class initialization check, in the case of a static method call.
-  HClinitCheck* clinit_check = nullptr;
-  HInvoke* invoke = nullptr;
-
-  if (optimized_invoke_type == kDirect || optimized_invoke_type == kStatic) {
-    // By default, consider that the called method implicitly requires
-    // an initialization check of its declaring method.
-    HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement
-        = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
-    if (optimized_invoke_type == kStatic) {
-      clinit_check = ProcessClinitCheckForInvoke(dex_pc, method_idx, &clinit_check_requirement);
-    }
-
-    HInvokeStaticOrDirect::DispatchInfo dispatch_info = ComputeDispatchInfo(is_string_init,
-                                                                            string_init_offset,
-                                                                            target_method,
-                                                                            direct_method,
-                                                                            direct_code);
-    invoke = new (arena_) HInvokeStaticOrDirect(arena_,
-                                                number_of_arguments,
-                                                return_type,
-                                                dex_pc,
-                                                method_idx,
-                                                target_method,
-                                                dispatch_info,
-                                                original_invoke_type,
-                                                optimized_invoke_type,
-                                                clinit_check_requirement);
-  } else if (optimized_invoke_type == kVirtual) {
-    invoke = new (arena_) HInvokeVirtual(arena_,
-                                         number_of_arguments,
-                                         return_type,
-                                         dex_pc,
-                                         method_idx,
-                                         table_index);
-  } else {
-    DCHECK_EQ(optimized_invoke_type, kInterface);
-    invoke = new (arena_) HInvokeInterface(arena_,
-                                           number_of_arguments,
-                                           return_type,
-                                           dex_pc,
-                                           method_idx,
-                                           table_index);
-  }
-
-  return HandleInvoke(invoke,
-                      number_of_vreg_arguments,
-                      args,
-                      register_index,
-                      is_range,
-                      descriptor,
-                      clinit_check);
-}
-
-HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke(
-      uint32_t dex_pc,
-      uint32_t method_idx,
-      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<4> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  ArtMethod* resolved_method = compiler_driver_->ResolveMethod(
-      soa, dex_cache, class_loader, dex_compilation_unit_, method_idx, InvokeType::kStatic);
-
-  DCHECK(resolved_method != nullptr);
-
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
-  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
-
-  // The index at which the method's class is stored in the DexCache's type array.
-  uint32_t storage_index = DexFile::kDexNoIndex;
-  bool is_outer_class = (resolved_method->GetDeclaringClass() == outer_class.Get());
-  if (is_outer_class) {
-    storage_index = outer_class->GetDexTypeIndex();
-  } else if (outer_dex_cache.Get() == dex_cache.Get()) {
-    // Get `storage_index` from IsClassOfStaticMethodAvailableToReferrer.
-    compiler_driver_->IsClassOfStaticMethodAvailableToReferrer(outer_dex_cache.Get(),
-                                                               GetCompilingClass(),
-                                                               resolved_method,
-                                                               method_idx,
-                                                               &storage_index);
-  }
-
-  HClinitCheck* clinit_check = nullptr;
-
-  if (!outer_class->IsInterface()
-      && outer_class->IsSubClass(resolved_method->GetDeclaringClass())) {
-    // If the outer class is the declaring class or a subclass
-    // of the declaring class, no class initialization is needed
-    // before the static method call.
-    // Note that in case of inlining, we do not need to add clinit checks
-    // to calls that satisfy this subclass check with any inlined methods. This
-    // will be detected by the optimization passes.
-    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
-  } else if (storage_index != DexFile::kDexNoIndex) {
-    // If the method's class type index is available, check
-    // whether we should add an explicit class initialization
-    // check for its declaring class before the static method call.
-
-    // TODO: find out why this check is needed.
-    bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache(
-        *outer_compilation_unit_->GetDexFile(), storage_index);
-    bool is_initialized =
-        resolved_method->GetDeclaringClass()->IsInitialized() && is_in_dex_cache;
-
-    if (is_initialized) {
-      *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
-    } else {
-      *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
-      HLoadClass* load_class = new (arena_) HLoadClass(
-          graph_->GetCurrentMethod(),
-          storage_index,
-          *dex_compilation_unit_->GetDexFile(),
-          is_outer_class,
-          dex_pc,
-          /*needs_access_check*/ false);
-      current_block_->AddInstruction(load_class);
-      clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
-      current_block_->AddInstruction(clinit_check);
-    }
-  }
-  return clinit_check;
-}
-
-HInvokeStaticOrDirect::DispatchInfo HGraphBuilder::ComputeDispatchInfo(
-    bool is_string_init,
-    int32_t string_init_offset,
-    MethodReference target_method,
-    uintptr_t direct_method,
-    uintptr_t direct_code) {
-  HInvokeStaticOrDirect::MethodLoadKind method_load_kind;
-  HInvokeStaticOrDirect::CodePtrLocation code_ptr_location;
-  uint64_t method_load_data = 0u;
-  uint64_t direct_code_ptr = 0u;
-
-  if (is_string_init) {
-    // TODO: Use direct_method and direct_code for the appropriate StringFactory method.
-    method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kStringInit;
-    code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
-    method_load_data = string_init_offset;
-  } else if (target_method.dex_file == outer_compilation_unit_->GetDexFile() &&
-      target_method.dex_method_index == outer_compilation_unit_->GetDexMethodIndex()) {
-    method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive;
-    code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf;
-  } else {
-    if (direct_method != 0u) {  // Should we use a direct pointer to the method?
-      if (direct_method != static_cast<uintptr_t>(-1)) {  // Is the method pointer known now?
-        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress;
-        method_load_data = direct_method;
-      } else {  // The direct pointer will be known at link time.
-        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup;
-      }
-    } else {  // Use dex cache.
-      DCHECK(target_method.dex_file == dex_compilation_unit_->GetDexFile());
-      DexCacheArraysLayout layout =
-          compiler_driver_->GetDexCacheArraysLayout(target_method.dex_file);
-      if (layout.Valid()) {  // Can we use PC-relative access to the dex cache arrays?
-        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative;
-        method_load_data = layout.MethodOffset(target_method.dex_method_index);
-      } else {  // We must go through the ArtMethod's pointer to resolved methods.
-        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
-      }
-    }
-    if (direct_code != 0u) {  // Should we use a direct pointer to the code?
-      if (direct_code != static_cast<uintptr_t>(-1)) {  // Is the code pointer known now?
-        code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirect;
-        direct_code_ptr = direct_code;
-      } else if (compiler_driver_->IsImage() ||
-          target_method.dex_file == dex_compilation_unit_->GetDexFile()) {
-        // Use PC-relative calls for invokes within a multi-dex oat file.
-        // TODO: Recognize when the target dex file is within the current oat file for
-        // app compilation. At the moment we recognize only the boot image as multi-dex.
-        // NOTE: This will require changing the ARM backend which currently falls
-        // through from kCallPCRelative to kDirectCodeFixup for different dex files.
-        code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative;
-      } else {  // The direct pointer will be known at link time.
-        // NOTE: This is used for app->boot calls when compiling an app against
-        // a relocatable but not yet relocated image.
-        code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup;
-      }
-    } else {  // We must use the code pointer from the ArtMethod.
-      code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
-    }
-  }
-
-  if (graph_->IsDebuggable()) {
-    // For debuggable apps always use the code pointer from ArtMethod
-    // so that we don't circumvent instrumentation stubs if installed.
-    code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
-  }
-
-  return HInvokeStaticOrDirect::DispatchInfo {
-    method_load_kind, code_ptr_location, method_load_data, direct_code_ptr };
-}
-
-bool HGraphBuilder::SetupInvokeArguments(HInvoke* invoke,
-                                         uint32_t number_of_vreg_arguments,
-                                         uint32_t* args,
-                                         uint32_t register_index,
-                                         bool is_range,
-                                         const char* descriptor,
-                                         size_t start_index,
-                                         size_t* argument_index) {
-  uint32_t descriptor_index = 1;  // Skip the return type.
-  uint32_t dex_pc = invoke->GetDexPc();
-
-  for (size_t i = start_index;
-       // Make sure we don't go over the expected arguments or over the number of
-       // dex registers given. If the instruction was seen as dead by the verifier,
-       // it hasn't been properly checked.
-       (i < number_of_vreg_arguments) && (*argument_index < invoke->GetNumberOfArguments());
-       i++, (*argument_index)++) {
-    Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]);
-    bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble);
-    if (!is_range
-        && is_wide
-        && ((i + 1 == number_of_vreg_arguments) || (args[i] + 1 != args[i + 1]))) {
-      // Longs and doubles should be in pairs, that is, sequential registers. The verifier should
-      // reject any class where this is violated. However, the verifier only does these checks
-      // on non trivially dead instructions, so we just bailout the compilation.
-      VLOG(compiler) << "Did not compile "
-                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                     << " because of non-sequential dex register pair in wide argument";
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
-      return false;
-    }
-    HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type, dex_pc);
-    invoke->SetArgumentAt(*argument_index, arg);
-    if (is_wide) {
-      i++;
-    }
-  }
-
-  if (*argument_index != invoke->GetNumberOfArguments()) {
-    VLOG(compiler) << "Did not compile "
-                   << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                   << " because of wrong number of arguments in invoke instruction";
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
-    return false;
-  }
-
-  if (invoke->IsInvokeStaticOrDirect()) {
-    invoke->SetArgumentAt(*argument_index, graph_->GetCurrentMethod());
-    (*argument_index)++;
-  }
-
-  return true;
-}
-
-bool HGraphBuilder::HandleInvoke(HInvoke* invoke,
-                                 uint32_t number_of_vreg_arguments,
-                                 uint32_t* args,
-                                 uint32_t register_index,
-                                 bool is_range,
-                                 const char* descriptor,
-                                 HClinitCheck* clinit_check) {
-  DCHECK(!invoke->IsInvokeStaticOrDirect() || !invoke->AsInvokeStaticOrDirect()->IsStringInit());
-
-  size_t start_index = 0;
-  size_t argument_index = 0;
-  if (invoke->GetOriginalInvokeType() != InvokeType::kStatic) {  // Instance call.
-    Temporaries temps(graph_);
-    HInstruction* arg = LoadLocal(
-        is_range ? register_index : args[0], Primitive::kPrimNot, invoke->GetDexPc());
-    HNullCheck* null_check = new (arena_) HNullCheck(arg, invoke->GetDexPc());
-    current_block_->AddInstruction(null_check);
-    temps.Add(null_check);
-    invoke->SetArgumentAt(0, null_check);
-    start_index = 1;
-    argument_index = 1;
-  }
-
-  if (!SetupInvokeArguments(invoke,
-                            number_of_vreg_arguments,
-                            args,
-                            register_index,
-                            is_range,
-                            descriptor,
-                            start_index,
-                            &argument_index)) {
-    return false;
-  }
-
-  if (clinit_check != nullptr) {
-    // Add the class initialization check as last input of `invoke`.
-    DCHECK(invoke->IsInvokeStaticOrDirect());
-    DCHECK(invoke->AsInvokeStaticOrDirect()->GetClinitCheckRequirement()
-        == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit);
-    invoke->SetArgumentAt(argument_index, clinit_check);
-    argument_index++;
-  }
-
-  current_block_->AddInstruction(invoke);
-  latest_result_ = invoke;
-
-  return true;
-}
-
-bool HGraphBuilder::HandleStringInit(HInvoke* invoke,
-                                     uint32_t number_of_vreg_arguments,
-                                     uint32_t* args,
-                                     uint32_t register_index,
-                                     bool is_range,
-                                     const char* descriptor) {
-  DCHECK(invoke->IsInvokeStaticOrDirect());
-  DCHECK(invoke->AsInvokeStaticOrDirect()->IsStringInit());
-
-  size_t start_index = 1;
-  size_t argument_index = 0;
-  if (!SetupInvokeArguments(invoke,
-                            number_of_vreg_arguments,
-                            args,
-                            register_index,
-                            is_range,
-                            descriptor,
-                            start_index,
-                            &argument_index)) {
-    return false;
-  }
-
-  // Add move-result for StringFactory method.
-  uint32_t orig_this_reg = is_range ? register_index : args[0];
-  HInstruction* fake_string = LoadLocal(orig_this_reg, Primitive::kPrimNot, invoke->GetDexPc());
-  invoke->SetArgumentAt(argument_index, fake_string);
-  current_block_->AddInstruction(invoke);
-  PotentiallySimplifyFakeString(orig_this_reg, invoke->GetDexPc(), invoke);
-
-  latest_result_ = invoke;
-
-  return true;
-}
-
-void HGraphBuilder::PotentiallySimplifyFakeString(uint16_t original_dex_register,
-                                                  uint32_t dex_pc,
-                                                  HInvoke* actual_string) {
-  if (!graph_->IsDebuggable()) {
-    // Notify that we cannot compile with baseline. The dex registers aliasing
-    // with `original_dex_register` will be handled when we optimize
-    // (see HInstructionSimplifer::VisitFakeString).
-    can_use_baseline_for_string_init_ = false;
-    return;
-  }
-  const VerifiedMethod* verified_method =
-      compiler_driver_->GetVerifiedMethod(dex_file_, dex_compilation_unit_->GetDexMethodIndex());
-  if (verified_method != nullptr) {
-    UpdateLocal(original_dex_register, actual_string, dex_pc);
-    const SafeMap<uint32_t, std::set<uint32_t>>& string_init_map =
-        verified_method->GetStringInitPcRegMap();
-    auto map_it = string_init_map.find(dex_pc);
-    if (map_it != string_init_map.end()) {
-      for (uint32_t reg : map_it->second) {
-        HInstruction* load_local = LoadLocal(original_dex_register, Primitive::kPrimNot, dex_pc);
-        UpdateLocal(reg, load_local, dex_pc);
-      }
-    }
-  } else {
-    can_use_baseline_for_string_init_ = false;
-  }
-}
-
-static Primitive::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) {
-  const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
-  const char* type = dex_file.GetFieldTypeDescriptor(field_id);
-  return Primitive::GetType(type[0]);
-}
-
-bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction,
-                                             uint32_t dex_pc,
-                                             bool is_put) {
-  uint32_t source_or_dest_reg = instruction.VRegA_22c();
-  uint32_t obj_reg = instruction.VRegB_22c();
-  uint16_t field_index;
-  if (instruction.IsQuickened()) {
-    if (!CanDecodeQuickenedInfo()) {
-      return false;
-    }
-    field_index = LookupQuickenedInfo(dex_pc);
-  } else {
-    field_index = instruction.VRegC_22c();
-  }
-
-  ScopedObjectAccess soa(Thread::Current());
-  ArtField* resolved_field =
-      compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa);
-
-
-  HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot, dex_pc);
-  HInstruction* null_check = new (arena_) HNullCheck(object, dex_pc);
-  current_block_->AddInstruction(null_check);
-
-  Primitive::Type field_type = (resolved_field == nullptr)
-      ? GetFieldAccessType(*dex_file_, field_index)
-      : resolved_field->GetTypeAsPrimitiveType();
-  if (is_put) {
-    Temporaries temps(graph_);
-    // We need one temporary for the null check.
-    temps.Add(null_check);
-    HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc);
-    HInstruction* field_set = nullptr;
-    if (resolved_field == nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
-      field_set = new (arena_) HUnresolvedInstanceFieldSet(null_check,
-                                                           value,
-                                                           field_type,
-                                                           field_index,
-                                                           dex_pc);
-    } else {
-      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
-      field_set = new (arena_) HInstanceFieldSet(null_check,
-                                                 value,
-                                                 field_type,
-                                                 resolved_field->GetOffset(),
-                                                 resolved_field->IsVolatile(),
-                                                 field_index,
-                                                 class_def_index,
-                                                 *dex_file_,
-                                                 dex_compilation_unit_->GetDexCache(),
-                                                 dex_pc);
-    }
-    current_block_->AddInstruction(field_set);
-  } else {
-    HInstruction* field_get = nullptr;
-    if (resolved_field == nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
-      field_get = new (arena_) HUnresolvedInstanceFieldGet(null_check,
-                                                           field_type,
-                                                           field_index,
-                                                           dex_pc);
-    } else {
-      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
-      field_get = new (arena_) HInstanceFieldGet(null_check,
-                                                 field_type,
-                                                 resolved_field->GetOffset(),
-                                                 resolved_field->IsVolatile(),
-                                                 field_index,
-                                                 class_def_index,
-                                                 *dex_file_,
-                                                 dex_compilation_unit_->GetDexCache(),
-                                                 dex_pc);
-    }
-    current_block_->AddInstruction(field_get);
-    UpdateLocal(source_or_dest_reg, field_get, dex_pc);
-  }
-
-  return true;
-}
-
-static mirror::Class* GetClassFrom(CompilerDriver* driver,
-                                   const DexCompilationUnit& compilation_unit) {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<2> hs(soa.Self());
-  const DexFile& dex_file = *compilation_unit.GetDexFile();
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(compilation_unit.GetClassLoader())));
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      compilation_unit.GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
-
-  return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
-}
-
-mirror::Class* HGraphBuilder::GetOutermostCompilingClass() const {
-  return GetClassFrom(compiler_driver_, *outer_compilation_unit_);
-}
-
-mirror::Class* HGraphBuilder::GetCompilingClass() const {
-  return GetClassFrom(compiler_driver_, *dex_compilation_unit_);
-}
-
-bool HGraphBuilder::IsOutermostCompilingClass(uint16_t type_index) const {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<4> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass(
-      soa, dex_cache, class_loader, type_index, dex_compilation_unit_)));
-  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
-
-  // GetOutermostCompilingClass returns null when the class is unresolved
-  // (e.g. if it derives from an unresolved class). This is bogus knowing that
-  // we are compiling it.
-  // When this happens we cannot establish a direct relation between the current
-  // class and the outer class, so we return false.
-  // (Note that this is only used for optimizing invokes and field accesses)
-  return (cls.Get() != nullptr) && (outer_class.Get() == cls.Get());
-}
-
-void HGraphBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
-                                                     uint32_t dex_pc,
-                                                     bool is_put,
-                                                     Primitive::Type field_type) {
-  uint32_t source_or_dest_reg = instruction.VRegA_21c();
-  uint16_t field_index = instruction.VRegB_21c();
-
-  if (is_put) {
-    HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc);
-    current_block_->AddInstruction(
-        new (arena_) HUnresolvedStaticFieldSet(value, field_type, field_index, dex_pc));
-  } else {
-    current_block_->AddInstruction(
-        new (arena_) HUnresolvedStaticFieldGet(field_type, field_index, dex_pc));
-    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction(), dex_pc);
-  }
-}
-bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
-                                           uint32_t dex_pc,
-                                           bool is_put) {
-  uint32_t source_or_dest_reg = instruction.VRegA_21c();
-  uint16_t field_index = instruction.VRegB_21c();
-
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<4> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  ArtField* resolved_field = compiler_driver_->ResolveField(
-      soa, dex_cache, class_loader, dex_compilation_unit_, field_index, true);
-
-  if (resolved_field == nullptr) {
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
-    Primitive::Type field_type = GetFieldAccessType(*dex_file_, field_index);
-    BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
-    return true;
-  }
-
-  Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
-  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
-
-  // The index at which the field's class is stored in the DexCache's type array.
-  uint32_t storage_index;
-  bool is_outer_class = (outer_class.Get() == resolved_field->GetDeclaringClass());
-  if (is_outer_class) {
-    storage_index = outer_class->GetDexTypeIndex();
-  } else if (outer_dex_cache.Get() != dex_cache.Get()) {
-    // The compiler driver cannot currently understand multiple dex caches involved. Just bailout.
-    return false;
-  } else {
-    // TODO: This is rather expensive. Perf it and cache the results if needed.
-    std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField(
-        outer_dex_cache.Get(),
-        GetCompilingClass(),
-        resolved_field,
-        field_index,
-        &storage_index);
-    bool can_easily_access = is_put ? pair.second : pair.first;
-    if (!can_easily_access) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
-      BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
-      return true;
-    }
-  }
-
-  // TODO: find out why this check is needed.
-  bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache(
-      *outer_compilation_unit_->GetDexFile(), storage_index);
-  bool is_initialized = resolved_field->GetDeclaringClass()->IsInitialized() && is_in_dex_cache;
-
-  HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
-                                                 storage_index,
-                                                 *dex_compilation_unit_->GetDexFile(),
-                                                 is_outer_class,
-                                                 dex_pc,
-                                                 /*needs_access_check*/ false);
-  current_block_->AddInstruction(constant);
-
-  HInstruction* cls = constant;
-  if (!is_initialized && !is_outer_class) {
-    cls = new (arena_) HClinitCheck(constant, dex_pc);
-    current_block_->AddInstruction(cls);
-  }
-
-  uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
-  if (is_put) {
-    // We need to keep the class alive before loading the value.
-    Temporaries temps(graph_);
-    temps.Add(cls);
-    HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc);
-    DCHECK_EQ(value->GetType(), field_type);
-    current_block_->AddInstruction(new (arena_) HStaticFieldSet(cls,
-                                                                value,
-                                                                field_type,
-                                                                resolved_field->GetOffset(),
-                                                                resolved_field->IsVolatile(),
-                                                                field_index,
-                                                                class_def_index,
-                                                                *dex_file_,
-                                                                dex_cache_,
-                                                                dex_pc));
-  } else {
-    current_block_->AddInstruction(new (arena_) HStaticFieldGet(cls,
-                                                                field_type,
-                                                                resolved_field->GetOffset(),
-                                                                resolved_field->IsVolatile(),
-                                                                field_index,
-                                                                class_def_index,
-                                                                *dex_file_,
-                                                                dex_cache_,
-                                                                dex_pc));
-    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction(), dex_pc);
-  }
-  return true;
-}
-
-void HGraphBuilder::BuildCheckedDivRem(uint16_t out_vreg,
-                                       uint16_t first_vreg,
-                                       int64_t second_vreg_or_constant,
-                                       uint32_t dex_pc,
-                                       Primitive::Type type,
-                                       bool second_is_constant,
-                                       bool isDiv) {
-  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
-  HInstruction* first = LoadLocal(first_vreg, type, dex_pc);
-  HInstruction* second = nullptr;
-  if (second_is_constant) {
-    if (type == Primitive::kPrimInt) {
-      second = graph_->GetIntConstant(second_vreg_or_constant, dex_pc);
-    } else {
-      second = graph_->GetLongConstant(second_vreg_or_constant, dex_pc);
-    }
-  } else {
-    second = LoadLocal(second_vreg_or_constant, type, dex_pc);
-  }
-
-  if (!second_is_constant
-      || (type == Primitive::kPrimInt && second->AsIntConstant()->GetValue() == 0)
-      || (type == Primitive::kPrimLong && second->AsLongConstant()->GetValue() == 0)) {
-    second = new (arena_) HDivZeroCheck(second, dex_pc);
-    Temporaries temps(graph_);
-    current_block_->AddInstruction(second);
-    temps.Add(current_block_->GetLastInstruction());
-  }
-
-  if (isDiv) {
-    current_block_->AddInstruction(new (arena_) HDiv(type, first, second, dex_pc));
-  } else {
-    current_block_->AddInstruction(new (arena_) HRem(type, first, second, dex_pc));
-  }
-  UpdateLocal(out_vreg, current_block_->GetLastInstruction(), dex_pc);
-}
-
-void HGraphBuilder::BuildArrayAccess(const Instruction& instruction,
-                                     uint32_t dex_pc,
-                                     bool is_put,
-                                     Primitive::Type anticipated_type) {
-  uint8_t source_or_dest_reg = instruction.VRegA_23x();
-  uint8_t array_reg = instruction.VRegB_23x();
-  uint8_t index_reg = instruction.VRegC_23x();
-
-  // We need one temporary for the null check, one for the index, and one for the length.
-  Temporaries temps(graph_);
-
-  HInstruction* object = LoadLocal(array_reg, Primitive::kPrimNot, dex_pc);
-  object = new (arena_) HNullCheck(object, dex_pc);
-  current_block_->AddInstruction(object);
-  temps.Add(object);
-
-  HInstruction* length = new (arena_) HArrayLength(object, dex_pc);
-  current_block_->AddInstruction(length);
-  temps.Add(length);
-  HInstruction* index = LoadLocal(index_reg, Primitive::kPrimInt, dex_pc);
-  index = new (arena_) HBoundsCheck(index, length, dex_pc);
-  current_block_->AddInstruction(index);
-  temps.Add(index);
-  if (is_put) {
-    HInstruction* value = LoadLocal(source_or_dest_reg, anticipated_type, dex_pc);
-    // TODO: Insert a type check node if the type is Object.
-    current_block_->AddInstruction(new (arena_) HArraySet(
-        object, index, value, anticipated_type, dex_pc));
-  } else {
-    current_block_->AddInstruction(new (arena_) HArrayGet(object, index, anticipated_type, dex_pc));
-    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction(), dex_pc);
-  }
-  graph_->SetHasBoundsChecks(true);
-}
-
-void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc,
-                                        uint32_t type_index,
-                                        uint32_t number_of_vreg_arguments,
-                                        bool is_range,
-                                        uint32_t* args,
-                                        uint32_t register_index) {
-  HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc);
-  QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
-      ? kQuickAllocArrayWithAccessCheck
-      : kQuickAllocArray;
-  HInstruction* object = new (arena_) HNewArray(length,
-                                                graph_->GetCurrentMethod(),
-                                                dex_pc,
-                                                type_index,
-                                                *dex_compilation_unit_->GetDexFile(),
-                                                entrypoint);
-  current_block_->AddInstruction(object);
-
-  const char* descriptor = dex_file_->StringByTypeIdx(type_index);
-  DCHECK_EQ(descriptor[0], '[') << descriptor;
-  char primitive = descriptor[1];
-  DCHECK(primitive == 'I'
-      || primitive == 'L'
-      || primitive == '[') << descriptor;
-  bool is_reference_array = (primitive == 'L') || (primitive == '[');
-  Primitive::Type type = is_reference_array ? Primitive::kPrimNot : Primitive::kPrimInt;
-
-  Temporaries temps(graph_);
-  temps.Add(object);
-  for (size_t i = 0; i < number_of_vreg_arguments; ++i) {
-    HInstruction* value = LoadLocal(is_range ? register_index + i : args[i], type, dex_pc);
-    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
-    current_block_->AddInstruction(
-        new (arena_) HArraySet(object, index, value, type, dex_pc));
-  }
-  latest_result_ = object;
-}
-
-template <typename T>
-void HGraphBuilder::BuildFillArrayData(HInstruction* object,
-                                       const T* data,
-                                       uint32_t element_count,
-                                       Primitive::Type anticipated_type,
-                                       uint32_t dex_pc) {
-  for (uint32_t i = 0; i < element_count; ++i) {
-    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
-    HInstruction* value = graph_->GetIntConstant(data[i], dex_pc);
-    current_block_->AddInstruction(new (arena_) HArraySet(
-      object, index, value, anticipated_type, dex_pc));
-  }
-}
-
-void HGraphBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc) {
-  Temporaries temps(graph_);
-  HInstruction* array = LoadLocal(instruction.VRegA_31t(), Primitive::kPrimNot, dex_pc);
-  HNullCheck* null_check = new (arena_) HNullCheck(array, dex_pc);
-  current_block_->AddInstruction(null_check);
-  temps.Add(null_check);
-
-  HInstruction* length = new (arena_) HArrayLength(null_check, dex_pc);
-  current_block_->AddInstruction(length);
-
-  int32_t payload_offset = instruction.VRegB_31t() + dex_pc;
-  const Instruction::ArrayDataPayload* payload =
-      reinterpret_cast<const Instruction::ArrayDataPayload*>(code_start_ + payload_offset);
-  const uint8_t* data = payload->data;
-  uint32_t element_count = payload->element_count;
-
-  // Implementation of this DEX instruction seems to be that the bounds check is
-  // done before doing any stores.
-  HInstruction* last_index = graph_->GetIntConstant(payload->element_count - 1, dex_pc);
-  current_block_->AddInstruction(new (arena_) HBoundsCheck(last_index, length, dex_pc));
-
-  switch (payload->element_width) {
-    case 1:
-      BuildFillArrayData(null_check,
-                         reinterpret_cast<const int8_t*>(data),
-                         element_count,
-                         Primitive::kPrimByte,
-                         dex_pc);
-      break;
-    case 2:
-      BuildFillArrayData(null_check,
-                         reinterpret_cast<const int16_t*>(data),
-                         element_count,
-                         Primitive::kPrimShort,
-                         dex_pc);
-      break;
-    case 4:
-      BuildFillArrayData(null_check,
-                         reinterpret_cast<const int32_t*>(data),
-                         element_count,
-                         Primitive::kPrimInt,
-                         dex_pc);
-      break;
-    case 8:
-      BuildFillWideArrayData(null_check,
-                             reinterpret_cast<const int64_t*>(data),
-                             element_count,
-                             dex_pc);
-      break;
-    default:
-      LOG(FATAL) << "Unknown element width for " << payload->element_width;
-  }
-  graph_->SetHasBoundsChecks(true);
-}
-
-void HGraphBuilder::BuildFillWideArrayData(HInstruction* object,
-                                           const int64_t* data,
-                                           uint32_t element_count,
-                                           uint32_t dex_pc) {
-  for (uint32_t i = 0; i < element_count; ++i) {
-    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
-    HInstruction* value = graph_->GetLongConstant(data[i], dex_pc);
-    current_block_->AddInstruction(new (arena_) HArraySet(
-      object, index, value, Primitive::kPrimLong, dex_pc));
-  }
-}
-
-static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (cls.Get() == nullptr) {
-    return TypeCheckKind::kUnresolvedCheck;
-  } else if (cls->IsInterface()) {
-    return TypeCheckKind::kInterfaceCheck;
-  } else if (cls->IsArrayClass()) {
-    if (cls->GetComponentType()->IsObjectClass()) {
-      return TypeCheckKind::kArrayObjectCheck;
-    } else if (cls->CannotBeAssignedFromOtherTypes()) {
-      return TypeCheckKind::kExactCheck;
-    } else {
-      return TypeCheckKind::kArrayCheck;
-    }
-  } else if (cls->IsFinal()) {
-    return TypeCheckKind::kExactCheck;
-  } else if (cls->IsAbstract()) {
-    return TypeCheckKind::kAbstractClassCheck;
-  } else {
-    return TypeCheckKind::kClassHierarchyCheck;
-  }
-}
-
-void HGraphBuilder::BuildTypeCheck(const Instruction& instruction,
-                                   uint8_t destination,
-                                   uint8_t reference,
-                                   uint16_t type_index,
-                                   uint32_t dex_pc) {
-  bool type_known_final, type_known_abstract, use_declaring_class;
-  bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
-      dex_compilation_unit_->GetDexMethodIndex(),
-      *dex_compilation_unit_->GetDexFile(),
-      type_index,
-      &type_known_final,
-      &type_known_abstract,
-      &use_declaring_class);
-
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<2> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
-
-  HInstruction* object = LoadLocal(reference, Primitive::kPrimNot, dex_pc);
-  HLoadClass* cls = new (arena_) HLoadClass(
-      graph_->GetCurrentMethod(),
-      type_index,
-      *dex_compilation_unit_->GetDexFile(),
-      IsOutermostCompilingClass(type_index),
-      dex_pc,
-      !can_access);
-  current_block_->AddInstruction(cls);
-
-  // The class needs a temporary before being used by the type check.
-  Temporaries temps(graph_);
-  temps.Add(cls);
-
-  TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class);
-  if (instruction.Opcode() == Instruction::INSTANCE_OF) {
-    current_block_->AddInstruction(new (arena_) HInstanceOf(object, cls, check_kind, dex_pc));
-    UpdateLocal(destination, current_block_->GetLastInstruction(), dex_pc);
-  } else {
-    DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
-    current_block_->AddInstruction(new (arena_) HCheckCast(object, cls, check_kind, dex_pc));
-  }
-}
-
-bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index) const {
-  return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks(
-      dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index);
-}
-
-void HGraphBuilder::BuildSwitchJumpTable(const SwitchTable& table,
-                                         const Instruction& instruction,
-                                         HInstruction* value,
-                                         uint32_t dex_pc) {
-  // Add the successor blocks to the current block.
-  uint16_t num_entries = table.GetNumEntries();
-  for (size_t i = 1; i <= num_entries; i++) {
-    int32_t target_offset = table.GetEntryAt(i);
-    HBasicBlock* case_target = FindBlockStartingAt(dex_pc + target_offset);
-    DCHECK(case_target != nullptr);
-
-    // Add the target block as a successor.
-    current_block_->AddSuccessor(case_target);
-  }
-
-  // Add the default target block as the last successor.
-  HBasicBlock* default_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
-  DCHECK(default_target != nullptr);
-  current_block_->AddSuccessor(default_target);
-
-  // Now add the Switch instruction.
-  int32_t starting_key = table.GetEntryAt(0);
-  current_block_->AddInstruction(
-      new (arena_) HPackedSwitch(starting_key, num_entries, value, dex_pc));
-  // This block ends with control flow.
-  current_block_ = nullptr;
-}
-
-void HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc) {
-  // Verifier guarantees that the payload for PackedSwitch contains:
-  //   (a) number of entries (may be zero)
-  //   (b) first and lowest switch case value (entry 0, always present)
-  //   (c) list of target pcs (entries 1 <= i <= N)
-  SwitchTable table(instruction, dex_pc, false);
-
-  // Value to test against.
-  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc);
-
-  // Starting key value.
-  int32_t starting_key = table.GetEntryAt(0);
-
-  // Retrieve number of entries.
-  uint16_t num_entries = table.GetNumEntries();
-  if (num_entries == 0) {
-    return;
-  }
-
-  // Don't use a packed switch if there are very few entries.
-  if (num_entries > kSmallSwitchThreshold) {
-    BuildSwitchJumpTable(table, instruction, value, dex_pc);
-  } else {
-    // Chained cmp-and-branch, starting from starting_key.
-    for (size_t i = 1; i <= num_entries; i++) {
-      BuildSwitchCaseHelper(instruction,
-                            i,
-                            i == num_entries,
-                            table,
-                            value,
-                            starting_key + i - 1,
-                            table.GetEntryAt(i),
-                            dex_pc);
-    }
-  }
-}
-
-void HGraphBuilder::BuildSparseSwitch(const Instruction& instruction, uint32_t dex_pc) {
-  // Verifier guarantees that the payload for SparseSwitch contains:
-  //   (a) number of entries (may be zero)
-  //   (b) sorted key values (entries 0 <= i < N)
-  //   (c) target pcs corresponding to the switch values (entries N <= i < 2*N)
-  SwitchTable table(instruction, dex_pc, true);
-
-  // Value to test against.
-  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc);
-
-  uint16_t num_entries = table.GetNumEntries();
-
-  for (size_t i = 0; i < num_entries; i++) {
-    BuildSwitchCaseHelper(instruction, i, i == static_cast<size_t>(num_entries) - 1, table, value,
-                          table.GetEntryAt(i), table.GetEntryAt(i + num_entries), dex_pc);
-  }
-}
-
-void HGraphBuilder::BuildSwitchCaseHelper(const Instruction& instruction, size_t index,
-                                          bool is_last_case, const SwitchTable& table,
-                                          HInstruction* value, int32_t case_value_int,
-                                          int32_t target_offset, uint32_t dex_pc) {
-  HBasicBlock* case_target = FindBlockStartingAt(dex_pc + target_offset);
-  DCHECK(case_target != nullptr);
-  PotentiallyAddSuspendCheck(case_target, dex_pc);
-
-  // The current case's value.
-  HInstruction* this_case_value = graph_->GetIntConstant(case_value_int, dex_pc);
-
-  // Compare value and this_case_value.
-  HEqual* comparison = new (arena_) HEqual(value, this_case_value, dex_pc);
-  current_block_->AddInstruction(comparison);
-  HInstruction* ifinst = new (arena_) HIf(comparison, dex_pc);
-  current_block_->AddInstruction(ifinst);
-
-  // Case hit: use the target offset to determine where to go.
-  current_block_->AddSuccessor(case_target);
-
-  // Case miss: go to the next case (or default fall-through).
-  // When there is a next case, we use the block stored with the table offset representing this
-  // case (that is where we registered them in ComputeBranchTargets).
-  // When there is no next case, we use the following instruction.
-  // TODO: Find a good way to peel the last iteration to avoid conditional, but still have re-use.
-  if (!is_last_case) {
-    HBasicBlock* next_case_target = FindBlockStartingAt(table.GetDexPcForIndex(index));
-    DCHECK(next_case_target != nullptr);
-    current_block_->AddSuccessor(next_case_target);
-
-    // Need to manually add the block, as there is no dex-pc transition for the cases.
-    graph_->AddBlock(next_case_target);
-
-    current_block_ = next_case_target;
-  } else {
-    HBasicBlock* default_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
-    DCHECK(default_target != nullptr);
-    current_block_->AddSuccessor(default_target);
-    current_block_ = nullptr;
-  }
-}
-
-void HGraphBuilder::PotentiallyAddSuspendCheck(HBasicBlock* target, uint32_t dex_pc) {
-  int32_t target_offset = target->GetDexPc() - dex_pc;
-  if (target_offset <= 0) {
-    // DX generates back edges to the first encountered return. We can save
-    // time of later passes by not adding redundant suspend checks.
-    HInstruction* last_in_target = target->GetLastInstruction();
-    if (last_in_target != nullptr &&
-        (last_in_target->IsReturn() || last_in_target->IsReturnVoid())) {
-      return;
-    }
-
-    // Add a suspend check to backward branches which may potentially loop. We
-    // can remove them after we recognize loops in the graph.
-    current_block_->AddInstruction(new (arena_) HSuspendCheck(dex_pc));
-  }
-}
-
-bool HGraphBuilder::CanDecodeQuickenedInfo() const {
-  return interpreter_metadata_ != nullptr;
-}
-
-uint16_t HGraphBuilder::LookupQuickenedInfo(uint32_t dex_pc) {
-  DCHECK(interpreter_metadata_ != nullptr);
-  uint32_t dex_pc_in_map = DecodeUnsignedLeb128(&interpreter_metadata_);
-  DCHECK_EQ(dex_pc, dex_pc_in_map);
-  return DecodeUnsignedLeb128(&interpreter_metadata_);
-}
-
-bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_pc) {
-  if (current_block_ == nullptr) {
-    return true;  // Dead code
-  }
-
-  switch (instruction.Opcode()) {
-    case Instruction::CONST_4: {
-      int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_11n(), dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_16: {
-      int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21s(), dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST: {
-      int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_31i(), dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_HIGH16: {
-      int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21h() << 16, dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_WIDE_16: {
-      int32_t register_index = instruction.VRegA();
-      // Get 16 bits of constant value, sign extended to 64 bits.
-      int64_t value = instruction.VRegB_21s();
-      value <<= 48;
-      value >>= 48;
-      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_WIDE_32: {
-      int32_t register_index = instruction.VRegA();
-      // Get 32 bits of constant value, sign extended to 64 bits.
-      int64_t value = instruction.VRegB_31i();
-      value <<= 32;
-      value >>= 32;
-      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_WIDE: {
-      int32_t register_index = instruction.VRegA();
-      HLongConstant* constant = graph_->GetLongConstant(instruction.VRegB_51l(), dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_WIDE_HIGH16: {
-      int32_t register_index = instruction.VRegA();
-      int64_t value = static_cast<int64_t>(instruction.VRegB_21h()) << 48;
-      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    // Note that the SSA building will refine the types.
-    case Instruction::MOVE:
-    case Instruction::MOVE_FROM16:
-    case Instruction::MOVE_16: {
-      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-      UpdateLocal(instruction.VRegA(), value, dex_pc);
-      break;
-    }
-
-    // Note that the SSA building will refine the types.
-    case Instruction::MOVE_WIDE:
-    case Instruction::MOVE_WIDE_FROM16:
-    case Instruction::MOVE_WIDE_16: {
-      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimLong, dex_pc);
-      UpdateLocal(instruction.VRegA(), value, dex_pc);
-      break;
-    }
-
-    case Instruction::MOVE_OBJECT:
-    case Instruction::MOVE_OBJECT_16:
-    case Instruction::MOVE_OBJECT_FROM16: {
-      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimNot, dex_pc);
-      UpdateLocal(instruction.VRegA(), value, dex_pc);
-      break;
-    }
-
-    case Instruction::RETURN_VOID_NO_BARRIER:
-    case Instruction::RETURN_VOID: {
-      BuildReturn(instruction, Primitive::kPrimVoid, dex_pc);
-      break;
-    }
-
-#define IF_XX(comparison, cond) \
-    case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_pc); break; \
-    case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_pc); break
-
-    IF_XX(HEqual, EQ);
-    IF_XX(HNotEqual, NE);
-    IF_XX(HLessThan, LT);
-    IF_XX(HLessThanOrEqual, LE);
-    IF_XX(HGreaterThan, GT);
-    IF_XX(HGreaterThanOrEqual, GE);
-
-    case Instruction::GOTO:
-    case Instruction::GOTO_16:
-    case Instruction::GOTO_32: {
-      int32_t offset = instruction.GetTargetOffset();
-      HBasicBlock* target = FindBlockStartingAt(offset + dex_pc);
-      DCHECK(target != nullptr);
-      PotentiallyAddSuspendCheck(target, dex_pc);
-      current_block_->AddInstruction(new (arena_) HGoto(dex_pc));
-      current_block_->AddSuccessor(target);
-      current_block_ = nullptr;
-      break;
-    }
-
-    case Instruction::RETURN: {
-      BuildReturn(instruction, return_type_, dex_pc);
-      break;
-    }
-
-    case Instruction::RETURN_OBJECT: {
-      BuildReturn(instruction, return_type_, dex_pc);
-      break;
-    }
-
-    case Instruction::RETURN_WIDE: {
-      BuildReturn(instruction, return_type_, dex_pc);
-      break;
-    }
-
-    case Instruction::INVOKE_DIRECT:
-    case Instruction::INVOKE_INTERFACE:
-    case Instruction::INVOKE_STATIC:
-    case Instruction::INVOKE_SUPER:
-    case Instruction::INVOKE_VIRTUAL:
-    case Instruction::INVOKE_VIRTUAL_QUICK: {
-      uint16_t method_idx;
-      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_QUICK) {
-        if (!CanDecodeQuickenedInfo()) {
-          return false;
-        }
-        method_idx = LookupQuickenedInfo(dex_pc);
-      } else {
-        method_idx = instruction.VRegB_35c();
-      }
-      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
-      uint32_t args[5];
-      instruction.GetVarArgs(args);
-      if (!BuildInvoke(instruction, dex_pc, method_idx,
-                       number_of_vreg_arguments, false, args, -1)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::INVOKE_DIRECT_RANGE:
-    case Instruction::INVOKE_INTERFACE_RANGE:
-    case Instruction::INVOKE_STATIC_RANGE:
-    case Instruction::INVOKE_SUPER_RANGE:
-    case Instruction::INVOKE_VIRTUAL_RANGE:
-    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
-      uint16_t method_idx;
-      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_RANGE_QUICK) {
-        if (!CanDecodeQuickenedInfo()) {
-          return false;
-        }
-        method_idx = LookupQuickenedInfo(dex_pc);
-      } else {
-        method_idx = instruction.VRegB_3rc();
-      }
-      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
-      uint32_t register_index = instruction.VRegC();
-      if (!BuildInvoke(instruction, dex_pc, method_idx,
-                       number_of_vreg_arguments, true, nullptr, register_index)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::NEG_INT: {
-      Unop_12x<HNeg>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::NEG_LONG: {
-      Unop_12x<HNeg>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::NEG_FLOAT: {
-      Unop_12x<HNeg>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::NEG_DOUBLE: {
-      Unop_12x<HNeg>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::NOT_INT: {
-      Unop_12x<HNot>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::NOT_LONG: {
-      Unop_12x<HNot>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_LONG: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_FLOAT: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_DOUBLE: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::LONG_TO_INT: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::LONG_TO_FLOAT: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::LONG_TO_DOUBLE: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::FLOAT_TO_INT: {
-      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::FLOAT_TO_LONG: {
-      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::FLOAT_TO_DOUBLE: {
-      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::DOUBLE_TO_INT: {
-      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::DOUBLE_TO_LONG: {
-      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::DOUBLE_TO_FLOAT: {
-      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_BYTE: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_SHORT: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_CHAR: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_INT: {
-      Binop_23x<HAdd>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_LONG: {
-      Binop_23x<HAdd>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_DOUBLE: {
-      Binop_23x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_FLOAT: {
-      Binop_23x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_INT: {
-      Binop_23x<HSub>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_LONG: {
-      Binop_23x<HSub>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_FLOAT: {
-      Binop_23x<HSub>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_DOUBLE: {
-      Binop_23x<HSub>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_INT_2ADDR: {
-      Binop_12x<HAdd>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_INT: {
-      Binop_23x<HMul>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_LONG: {
-      Binop_23x<HMul>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_FLOAT: {
-      Binop_23x<HMul>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_DOUBLE: {
-      Binop_23x<HMul>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_INT: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimInt, false, true);
-      break;
-    }
-
-    case Instruction::DIV_LONG: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimLong, false, true);
-      break;
-    }
-
-    case Instruction::DIV_FLOAT: {
-      Binop_23x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_DOUBLE: {
-      Binop_23x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::REM_INT: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimInt, false, false);
-      break;
-    }
-
-    case Instruction::REM_LONG: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimLong, false, false);
-      break;
-    }
-
-    case Instruction::REM_FLOAT: {
-      Binop_23x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::REM_DOUBLE: {
-      Binop_23x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_INT: {
-      Binop_23x<HAnd>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_LONG: {
-      Binop_23x<HAnd>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SHL_INT: {
-      Binop_23x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SHL_LONG: {
-      Binop_23x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_INT: {
-      Binop_23x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_LONG: {
-      Binop_23x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_INT: {
-      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_LONG: {
-      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_INT: {
-      Binop_23x<HOr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_LONG: {
-      Binop_23x<HOr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_INT: {
-      Binop_23x<HXor>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_LONG: {
-      Binop_23x<HXor>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_LONG_2ADDR: {
-      Binop_12x<HAdd>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_DOUBLE_2ADDR: {
-      Binop_12x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_FLOAT_2ADDR: {
-      Binop_12x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_INT_2ADDR: {
-      Binop_12x<HSub>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_LONG_2ADDR: {
-      Binop_12x<HSub>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_FLOAT_2ADDR: {
-      Binop_12x<HSub>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_DOUBLE_2ADDR: {
-      Binop_12x<HSub>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_INT_2ADDR: {
-      Binop_12x<HMul>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_LONG_2ADDR: {
-      Binop_12x<HMul>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_FLOAT_2ADDR: {
-      Binop_12x<HMul>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_DOUBLE_2ADDR: {
-      Binop_12x<HMul>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_INT_2ADDR: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                         dex_pc, Primitive::kPrimInt, false, true);
-      break;
-    }
-
-    case Instruction::DIV_LONG_2ADDR: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                         dex_pc, Primitive::kPrimLong, false, true);
-      break;
-    }
-
-    case Instruction::REM_INT_2ADDR: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                         dex_pc, Primitive::kPrimInt, false, false);
-      break;
-    }
-
-    case Instruction::REM_LONG_2ADDR: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                         dex_pc, Primitive::kPrimLong, false, false);
-      break;
-    }
-
-    case Instruction::REM_FLOAT_2ADDR: {
-      Binop_12x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::REM_DOUBLE_2ADDR: {
-      Binop_12x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::SHL_INT_2ADDR: {
-      Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SHL_LONG_2ADDR: {
-      Binop_12x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_INT_2ADDR: {
-      Binop_12x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_LONG_2ADDR: {
-      Binop_12x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_INT_2ADDR: {
-      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_LONG_2ADDR: {
-      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_FLOAT_2ADDR: {
-      Binop_12x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_DOUBLE_2ADDR: {
-      Binop_12x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_INT_2ADDR: {
-      Binop_12x<HAnd>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_LONG_2ADDR: {
-      Binop_12x<HAnd>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_INT_2ADDR: {
-      Binop_12x<HOr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_LONG_2ADDR: {
-      Binop_12x<HOr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_INT_2ADDR: {
-      Binop_12x<HXor>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_LONG_2ADDR: {
-      Binop_12x<HXor>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_INT_LIT16: {
-      Binop_22s<HAdd>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_INT_LIT16: {
-      Binop_22s<HAnd>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_INT_LIT16: {
-      Binop_22s<HOr>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_INT_LIT16: {
-      Binop_22s<HXor>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::RSUB_INT: {
-      Binop_22s<HSub>(instruction, true, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_INT_LIT16: {
-      Binop_22s<HMul>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_INT_LIT8: {
-      Binop_22b<HAdd>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_INT_LIT8: {
-      Binop_22b<HAnd>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_INT_LIT8: {
-      Binop_22b<HOr>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_INT_LIT8: {
-      Binop_22b<HXor>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::RSUB_INT_LIT8: {
-      Binop_22b<HSub>(instruction, true, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_INT_LIT8: {
-      Binop_22b<HMul>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_INT_LIT16:
-    case Instruction::DIV_INT_LIT8: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimInt, true, true);
-      break;
-    }
-
-    case Instruction::REM_INT_LIT16:
-    case Instruction::REM_INT_LIT8: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimInt, true, false);
-      break;
-    }
-
-    case Instruction::SHL_INT_LIT8: {
-      Binop_22b<HShl>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_INT_LIT8: {
-      Binop_22b<HShr>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_INT_LIT8: {
-      Binop_22b<HUShr>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::NEW_INSTANCE: {
-      uint16_t type_index = instruction.VRegB_21c();
-      if (compiler_driver_->IsStringTypeIndex(type_index, dex_file_)) {
-        int32_t register_index = instruction.VRegA();
-        HFakeString* fake_string = new (arena_) HFakeString(dex_pc);
-        current_block_->AddInstruction(fake_string);
-        UpdateLocal(register_index, fake_string, dex_pc);
-      } else {
-        QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
-            ? kQuickAllocObjectWithAccessCheck
-            : kQuickAllocObject;
-
-        current_block_->AddInstruction(new (arena_) HNewInstance(
-            graph_->GetCurrentMethod(),
-            dex_pc,
-            type_index,
-            *dex_compilation_unit_->GetDexFile(),
-            entrypoint));
-        UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-      }
-      break;
-    }
-
-    case Instruction::NEW_ARRAY: {
-      uint16_t type_index = instruction.VRegC_22c();
-      HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt, dex_pc);
-      QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
-          ? kQuickAllocArrayWithAccessCheck
-          : kQuickAllocArray;
-      current_block_->AddInstruction(new (arena_) HNewArray(length,
-                                                            graph_->GetCurrentMethod(),
-                                                            dex_pc,
-                                                            type_index,
-                                                            *dex_compilation_unit_->GetDexFile(),
-                                                            entrypoint));
-      UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::FILLED_NEW_ARRAY: {
-      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
-      uint32_t type_index = instruction.VRegB_35c();
-      uint32_t args[5];
-      instruction.GetVarArgs(args);
-      BuildFilledNewArray(dex_pc, type_index, number_of_vreg_arguments, false, args, 0);
-      break;
-    }
-
-    case Instruction::FILLED_NEW_ARRAY_RANGE: {
-      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
-      uint32_t type_index = instruction.VRegB_3rc();
-      uint32_t register_index = instruction.VRegC_3rc();
-      BuildFilledNewArray(
-          dex_pc, type_index, number_of_vreg_arguments, true, nullptr, register_index);
-      break;
-    }
-
-    case Instruction::FILL_ARRAY_DATA: {
-      BuildFillArrayData(instruction, dex_pc);
-      break;
-    }
-
-    case Instruction::MOVE_RESULT:
-    case Instruction::MOVE_RESULT_WIDE:
-    case Instruction::MOVE_RESULT_OBJECT: {
-      if (latest_result_ == nullptr) {
-        // Only dead code can lead to this situation, where the verifier
-        // does not reject the method.
-      } else {
-        // An Invoke/FilledNewArray and its MoveResult could have landed in
-        // different blocks if there was a try/catch block boundary between
-        // them. For Invoke, we insert a StoreLocal after the instruction. For
-        // FilledNewArray, the local needs to be updated after the array was
-        // filled, otherwise we might overwrite an input vreg.
-        HStoreLocal* update_local =
-            new (arena_) HStoreLocal(GetLocalAt(instruction.VRegA()), latest_result_, dex_pc);
-        HBasicBlock* block = latest_result_->GetBlock();
-        if (block == current_block_) {
-          // MoveResult and the previous instruction are in the same block.
-          current_block_->AddInstruction(update_local);
-        } else {
-          // The two instructions are in different blocks. Insert the MoveResult
-          // before the final control-flow instruction of the previous block.
-          DCHECK(block->EndsWithControlFlowInstruction());
-          DCHECK(current_block_->GetInstructions().IsEmpty());
-          block->InsertInstructionBefore(update_local, block->GetLastInstruction());
-        }
-        latest_result_ = nullptr;
-      }
-      break;
-    }
-
-    case Instruction::CMP_LONG: {
-      Binop_23x_cmp(instruction, Primitive::kPrimLong, ComparisonBias::kNoBias, dex_pc);
-      break;
-    }
-
-    case Instruction::CMPG_FLOAT: {
-      Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kGtBias, dex_pc);
-      break;
-    }
-
-    case Instruction::CMPG_DOUBLE: {
-      Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kGtBias, dex_pc);
-      break;
-    }
-
-    case Instruction::CMPL_FLOAT: {
-      Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kLtBias, dex_pc);
-      break;
-    }
-
-    case Instruction::CMPL_DOUBLE: {
-      Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kLtBias, dex_pc);
-      break;
-    }
-
-    case Instruction::NOP:
-      break;
-
-    case Instruction::IGET:
-    case Instruction::IGET_QUICK:
-    case Instruction::IGET_WIDE:
-    case Instruction::IGET_WIDE_QUICK:
-    case Instruction::IGET_OBJECT:
-    case Instruction::IGET_OBJECT_QUICK:
-    case Instruction::IGET_BOOLEAN:
-    case Instruction::IGET_BOOLEAN_QUICK:
-    case Instruction::IGET_BYTE:
-    case Instruction::IGET_BYTE_QUICK:
-    case Instruction::IGET_CHAR:
-    case Instruction::IGET_CHAR_QUICK:
-    case Instruction::IGET_SHORT:
-    case Instruction::IGET_SHORT_QUICK: {
-      if (!BuildInstanceFieldAccess(instruction, dex_pc, false)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::IPUT:
-    case Instruction::IPUT_QUICK:
-    case Instruction::IPUT_WIDE:
-    case Instruction::IPUT_WIDE_QUICK:
-    case Instruction::IPUT_OBJECT:
-    case Instruction::IPUT_OBJECT_QUICK:
-    case Instruction::IPUT_BOOLEAN:
-    case Instruction::IPUT_BOOLEAN_QUICK:
-    case Instruction::IPUT_BYTE:
-    case Instruction::IPUT_BYTE_QUICK:
-    case Instruction::IPUT_CHAR:
-    case Instruction::IPUT_CHAR_QUICK:
-    case Instruction::IPUT_SHORT:
-    case Instruction::IPUT_SHORT_QUICK: {
-      if (!BuildInstanceFieldAccess(instruction, dex_pc, true)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::SGET:
-    case Instruction::SGET_WIDE:
-    case Instruction::SGET_OBJECT:
-    case Instruction::SGET_BOOLEAN:
-    case Instruction::SGET_BYTE:
-    case Instruction::SGET_CHAR:
-    case Instruction::SGET_SHORT: {
-      if (!BuildStaticFieldAccess(instruction, dex_pc, false)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::SPUT:
-    case Instruction::SPUT_WIDE:
-    case Instruction::SPUT_OBJECT:
-    case Instruction::SPUT_BOOLEAN:
-    case Instruction::SPUT_BYTE:
-    case Instruction::SPUT_CHAR:
-    case Instruction::SPUT_SHORT: {
-      if (!BuildStaticFieldAccess(instruction, dex_pc, true)) {
-        return false;
-      }
-      break;
-    }
-
-#define ARRAY_XX(kind, anticipated_type)                                          \
-    case Instruction::AGET##kind: {                                               \
-      BuildArrayAccess(instruction, dex_pc, false, anticipated_type);         \
-      break;                                                                      \
-    }                                                                             \
-    case Instruction::APUT##kind: {                                               \
-      BuildArrayAccess(instruction, dex_pc, true, anticipated_type);          \
-      break;                                                                      \
-    }
-
-    ARRAY_XX(, Primitive::kPrimInt);
-    ARRAY_XX(_WIDE, Primitive::kPrimLong);
-    ARRAY_XX(_OBJECT, Primitive::kPrimNot);
-    ARRAY_XX(_BOOLEAN, Primitive::kPrimBoolean);
-    ARRAY_XX(_BYTE, Primitive::kPrimByte);
-    ARRAY_XX(_CHAR, Primitive::kPrimChar);
-    ARRAY_XX(_SHORT, Primitive::kPrimShort);
-
-    case Instruction::ARRAY_LENGTH: {
-      HInstruction* object = LoadLocal(instruction.VRegB_12x(), Primitive::kPrimNot, dex_pc);
-      // No need for a temporary for the null check, it is the only input of the following
-      // instruction.
-      object = new (arena_) HNullCheck(object, dex_pc);
-      current_block_->AddInstruction(object);
-      current_block_->AddInstruction(new (arena_) HArrayLength(object, dex_pc));
-      UpdateLocal(instruction.VRegA_12x(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_STRING: {
-      current_block_->AddInstruction(
-          new (arena_) HLoadString(graph_->GetCurrentMethod(), instruction.VRegB_21c(), dex_pc));
-      UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_STRING_JUMBO: {
-      current_block_->AddInstruction(
-          new (arena_) HLoadString(graph_->GetCurrentMethod(), instruction.VRegB_31c(), dex_pc));
-      UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_CLASS: {
-      uint16_t type_index = instruction.VRegB_21c();
-      bool type_known_final;
-      bool type_known_abstract;
-      bool dont_use_is_referrers_class;
-      // `CanAccessTypeWithoutChecks` will tell whether the method being
-      // built is trying to access its own class, so that the generated
-      // code can optimize for this case. However, the optimization does not
-      // work for inlining, so we use `IsOutermostCompilingClass` instead.
-      bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
-          dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index,
-          &type_known_final, &type_known_abstract, &dont_use_is_referrers_class);
-      current_block_->AddInstruction(new (arena_) HLoadClass(
-          graph_->GetCurrentMethod(),
-          type_index,
-          *dex_compilation_unit_->GetDexFile(),
-          IsOutermostCompilingClass(type_index),
-          dex_pc,
-          !can_access));
-      UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::MOVE_EXCEPTION: {
-      current_block_->AddInstruction(new (arena_) HLoadException(dex_pc));
-      UpdateLocal(instruction.VRegA_11x(), current_block_->GetLastInstruction(), dex_pc);
-      current_block_->AddInstruction(new (arena_) HClearException(dex_pc));
-      break;
-    }
-
-    case Instruction::THROW: {
-      HInstruction* exception = LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot, dex_pc);
-      current_block_->AddInstruction(new (arena_) HThrow(exception, dex_pc));
-      // A throw instruction must branch to the exit block.
-      current_block_->AddSuccessor(exit_block_);
-      // We finished building this block. Set the current block to null to avoid
-      // adding dead instructions to it.
-      current_block_ = nullptr;
-      break;
-    }
-
-    case Instruction::INSTANCE_OF: {
-      uint8_t destination = instruction.VRegA_22c();
-      uint8_t reference = instruction.VRegB_22c();
-      uint16_t type_index = instruction.VRegC_22c();
-      BuildTypeCheck(instruction, destination, reference, type_index, dex_pc);
-      break;
-    }
-
-    case Instruction::CHECK_CAST: {
-      uint8_t reference = instruction.VRegA_21c();
-      uint16_t type_index = instruction.VRegB_21c();
-      BuildTypeCheck(instruction, -1, reference, type_index, dex_pc);
-      break;
-    }
-
-    case Instruction::MONITOR_ENTER: {
-      current_block_->AddInstruction(new (arena_) HMonitorOperation(
-          LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot, dex_pc),
-          HMonitorOperation::kEnter,
-          dex_pc));
-      break;
-    }
-
-    case Instruction::MONITOR_EXIT: {
-      current_block_->AddInstruction(new (arena_) HMonitorOperation(
-          LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot, dex_pc),
-          HMonitorOperation::kExit,
-          dex_pc));
-      break;
-    }
-
-    case Instruction::PACKED_SWITCH: {
-      BuildPackedSwitch(instruction, dex_pc);
-      break;
-    }
-
-    case Instruction::SPARSE_SWITCH: {
-      BuildSparseSwitch(instruction, dex_pc);
-      break;
-    }
-
-    default:
-      VLOG(compiler) << "Did not compile "
-                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                     << " because of unhandled instruction "
-                     << instruction.Name();
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledUnhandledInstruction);
-      return false;
-  }
-  return true;
-}  // NOLINT(readability/fn_size)
-
-HLocal* HGraphBuilder::GetLocalAt(uint32_t register_index) const {
-  return locals_[register_index];
-}
-
-void HGraphBuilder::UpdateLocal(uint32_t register_index,
-                                HInstruction* instruction,
-                                uint32_t dex_pc) const {
-  HLocal* local = GetLocalAt(register_index);
-  current_block_->AddInstruction(new (arena_) HStoreLocal(local, instruction, dex_pc));
-}
-
-HInstruction* HGraphBuilder::LoadLocal(uint32_t register_index,
-                                       Primitive::Type type,
-                                       uint32_t dex_pc) const {
-  HLocal* local = GetLocalAt(register_index);
-  current_block_->AddInstruction(new (arena_) HLoadLocal(local, type, dex_pc));
-  return current_block_->GetLastInstruction();
+  // 5) Type the graph and eliminate dead/redundant phis.
+  return ssa_builder_.BuildSsa();
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 6910d51..580ef72 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -19,314 +19,90 @@
 
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
+#include "block_builder.h"
 #include "dex_file.h"
 #include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
+#include "instruction_builder.h"
 #include "optimizing_compiler_stats.h"
 #include "primitive.h"
 #include "nodes.h"
+#include "ssa_builder.h"
 
 namespace art {
 
-class Instruction;
-class SwitchTable;
-
 class HGraphBuilder : public ValueObject {
  public:
   HGraphBuilder(HGraph* graph,
                 DexCompilationUnit* dex_compilation_unit,
                 const DexCompilationUnit* const outer_compilation_unit,
                 const DexFile* dex_file,
+                const DexFile::CodeItem& code_item,
                 CompilerDriver* driver,
                 OptimizingCompilerStats* compiler_stats,
                 const uint8_t* interpreter_metadata,
-                Handle<mirror::DexCache> dex_cache)
-      : arena_(graph->GetArena()),
-        branch_targets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
-        locals_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
-        entry_block_(nullptr),
-        exit_block_(nullptr),
-        current_block_(nullptr),
-        graph_(graph),
+                Handle<mirror::DexCache> dex_cache,
+                StackHandleScopeCollection* handles)
+      : graph_(graph),
         dex_file_(dex_file),
+        code_item_(code_item),
         dex_compilation_unit_(dex_compilation_unit),
         compiler_driver_(driver),
-        outer_compilation_unit_(outer_compilation_unit),
-        return_type_(Primitive::GetType(dex_compilation_unit_->GetShorty()[0])),
-        code_start_(nullptr),
-        latest_result_(nullptr),
-        can_use_baseline_for_string_init_(true),
         compilation_stats_(compiler_stats),
-        interpreter_metadata_(interpreter_metadata),
-        dex_cache_(dex_cache) {}
+        block_builder_(graph, dex_file, code_item),
+        ssa_builder_(graph, dex_compilation_unit->GetDexCache(), handles),
+        instruction_builder_(graph,
+                             &block_builder_,
+                             &ssa_builder_,
+                             dex_file,
+                             code_item_,
+                             Primitive::GetType(dex_compilation_unit_->GetShorty()[0]),
+                             dex_compilation_unit,
+                             outer_compilation_unit,
+                             driver,
+                             interpreter_metadata,
+                             compiler_stats,
+                             dex_cache) {}
 
   // Only for unit testing.
-  HGraphBuilder(HGraph* graph, Primitive::Type return_type = Primitive::kPrimInt)
-      : arena_(graph->GetArena()),
-        branch_targets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
-        locals_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
-        entry_block_(nullptr),
-        exit_block_(nullptr),
-        current_block_(nullptr),
-        graph_(graph),
+  HGraphBuilder(HGraph* graph,
+                const DexFile::CodeItem& code_item,
+                StackHandleScopeCollection* handles,
+                Primitive::Type return_type = Primitive::kPrimInt)
+      : graph_(graph),
         dex_file_(nullptr),
+        code_item_(code_item),
         dex_compilation_unit_(nullptr),
         compiler_driver_(nullptr),
-        outer_compilation_unit_(nullptr),
-        return_type_(return_type),
-        code_start_(nullptr),
-        latest_result_(nullptr),
-        can_use_baseline_for_string_init_(true),
+        null_dex_cache_(),
         compilation_stats_(nullptr),
-        interpreter_metadata_(nullptr),
-        dex_cache_(NullHandle<mirror::DexCache>()) {}
+        block_builder_(graph, nullptr, code_item),
+        ssa_builder_(graph, null_dex_cache_, handles),
+        instruction_builder_(graph,
+                             &block_builder_,
+                             &ssa_builder_,
+                             /* dex_file */ nullptr,
+                             code_item_,
+                             return_type,
+                             /* dex_compilation_unit */ nullptr,
+                             /* outer_compilation_unit */ nullptr,
+                             /* compiler_driver */ nullptr,
+                             /* interpreter_metadata */ nullptr,
+                             /* compiler_stats */ nullptr,
+                             null_dex_cache_) {}
 
-  bool BuildGraph(const DexFile::CodeItem& code);
-
-  bool CanUseBaselineForStringInit() const {
-    return can_use_baseline_for_string_init_;
-  }
+  GraphAnalysisResult BuildGraph();
 
   static constexpr const char* kBuilderPassName = "builder";
 
-  // The number of entries in a packed switch before we use a jump table.
-  static constexpr uint16_t kSmallSwitchThreshold = 5;
-
  private:
-  // Analyzes the dex instruction and adds HInstruction to the graph
-  // to execute that instruction. Returns whether the instruction can
-  // be handled.
-  bool AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_pc);
-
-  // Finds all instructions that start a new block, and populates branch_targets_ with
-  // the newly created blocks.
-  // As a side effect, also compute the number of dex instructions, blocks, and
-  // branches.
-  // Returns true if all the branches fall inside the method code, false otherwise.
-  // (In normal cases this should always return true but someone can artificially
-  // create a code unit in which branches fall-through out of it).
-  bool ComputeBranchTargets(const uint16_t* start,
-                            const uint16_t* end,
-                            size_t* number_of_branches);
-  void MaybeUpdateCurrentBlock(size_t dex_pc);
-  HBasicBlock* FindBlockStartingAt(int32_t dex_pc) const;
-  HBasicBlock* FindOrCreateBlockStartingAt(int32_t dex_pc);
-
-  // Adds new blocks to `branch_targets_` starting at the limits of TryItems and
-  // their exception handlers.
-  void CreateBlocksForTryCatch(const DexFile::CodeItem& code_item);
-
-  // Splits edges which cross the boundaries of TryItems, inserts TryBoundary
-  // instructions and links them to the corresponding catch blocks.
-  void InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item);
-
-  // Iterates over the exception handlers of `try_item`, finds the corresponding
-  // catch blocks and makes them successors of `try_boundary`. The order of
-  // successors matches the order in which runtime exception delivery searches
-  // for a handler.
-  void LinkToCatchBlocks(HTryBoundary* try_boundary,
-                         const DexFile::CodeItem& code_item,
-                         const DexFile::TryItem* try_item);
-
-  bool CanDecodeQuickenedInfo() const;
-  uint16_t LookupQuickenedInfo(uint32_t dex_pc);
-
-  void InitializeLocals(uint16_t count);
-  HLocal* GetLocalAt(uint32_t register_index) const;
-  void UpdateLocal(uint32_t register_index, HInstruction* instruction, uint32_t dex_pc) const;
-  HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type, uint32_t dex_pc) const;
-  void PotentiallyAddSuspendCheck(HBasicBlock* target, uint32_t dex_pc);
-  void InitializeParameters(uint16_t number_of_parameters);
-  bool NeedsAccessCheck(uint32_t type_index) const;
-
-  template<typename T>
-  void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_23x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_23x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  void Binop_23x_cmp(const Instruction& instruction,
-                     Primitive::Type type,
-                     ComparisonBias bias,
-                     uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_12x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc);
-
-  template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_pc);
-  template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_pc);
-
-  void Conversion_12x(const Instruction& instruction,
-                      Primitive::Type input_type,
-                      Primitive::Type result_type,
-                      uint32_t dex_pc);
-
-  void BuildCheckedDivRem(uint16_t out_reg,
-                          uint16_t first_reg,
-                          int64_t second_reg_or_constant,
-                          uint32_t dex_pc,
-                          Primitive::Type type,
-                          bool second_is_lit,
-                          bool is_div);
-
-  void BuildReturn(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  // Builds an instance field access node and returns whether the instruction is supported.
-  bool BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
-
-  void BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
-                                        uint32_t dex_pc,
-                                        bool is_put,
-                                        Primitive::Type field_type);
-  // Builds a static field access node and returns whether the instruction is supported.
-  bool BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
-
-  void BuildArrayAccess(const Instruction& instruction,
-                        uint32_t dex_pc,
-                        bool is_get,
-                        Primitive::Type anticipated_type);
-
-  // Builds an invocation node and returns whether the instruction is supported.
-  bool BuildInvoke(const Instruction& instruction,
-                   uint32_t dex_pc,
-                   uint32_t method_idx,
-                   uint32_t number_of_vreg_arguments,
-                   bool is_range,
-                   uint32_t* args,
-                   uint32_t register_index);
-
-  // Builds a new array node and the instructions that fill it.
-  void BuildFilledNewArray(uint32_t dex_pc,
-                           uint32_t type_index,
-                           uint32_t number_of_vreg_arguments,
-                           bool is_range,
-                           uint32_t* args,
-                           uint32_t register_index);
-
-  void BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc);
-
-  // Fills the given object with data as specified in the fill-array-data
-  // instruction. Currently only used for non-reference and non-floating point
-  // arrays.
-  template <typename T>
-  void BuildFillArrayData(HInstruction* object,
-                          const T* data,
-                          uint32_t element_count,
-                          Primitive::Type anticipated_type,
-                          uint32_t dex_pc);
-
-  // Fills the given object with data as specified in the fill-array-data
-  // instruction. The data must be for long and double arrays.
-  void BuildFillWideArrayData(HInstruction* object,
-                              const int64_t* data,
-                              uint32_t element_count,
-                              uint32_t dex_pc);
-
-  // Builds a `HInstanceOf`, or a `HCheckCast` instruction.
-  void BuildTypeCheck(const Instruction& instruction,
-                      uint8_t destination,
-                      uint8_t reference,
-                      uint16_t type_index,
-                      uint32_t dex_pc);
-
-  // Builds an instruction sequence for a packed switch statement.
-  void BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc);
-
-  // Build a switch instruction from a packed switch statement.
-  void BuildSwitchJumpTable(const SwitchTable& table,
-                            const Instruction& instruction,
-                            HInstruction* value,
-                            uint32_t dex_pc);
-
-  // Builds an instruction sequence for a sparse switch statement.
-  void BuildSparseSwitch(const Instruction& instruction, uint32_t dex_pc);
-
-  void BuildSwitchCaseHelper(const Instruction& instruction, size_t index,
-                             bool is_last_case, const SwitchTable& table,
-                             HInstruction* value, int32_t case_value_int,
-                             int32_t target_offset, uint32_t dex_pc);
-
-  bool SkipCompilation(const DexFile::CodeItem& code_item, size_t number_of_branches);
-
   void MaybeRecordStat(MethodCompilationStat compilation_stat);
+  bool SkipCompilation(size_t number_of_branches);
 
-  // Returns the outer-most compiling method's class.
-  mirror::Class* GetOutermostCompilingClass() const;
-
-  // Returns the class whose method is being compiled.
-  mirror::Class* GetCompilingClass() const;
-
-  // Returns whether `type_index` points to the outer-most compiling method's class.
-  bool IsOutermostCompilingClass(uint16_t type_index) const;
-
-  void PotentiallySimplifyFakeString(uint16_t original_dex_register,
-                                     uint32_t dex_pc,
-                                     HInvoke* invoke);
-
-  HInvokeStaticOrDirect::DispatchInfo ComputeDispatchInfo(bool is_string_init,
-                                                          int32_t string_init_offset,
-                                                          MethodReference target_method,
-                                                          uintptr_t direct_method,
-                                                          uintptr_t direct_code);
-
-  bool SetupInvokeArguments(HInvoke* invoke,
-                            uint32_t number_of_vreg_arguments,
-                            uint32_t* args,
-                            uint32_t register_index,
-                            bool is_range,
-                            const char* descriptor,
-                            size_t start_index,
-                            size_t* argument_index);
-
-  bool HandleInvoke(HInvoke* invoke,
-                    uint32_t number_of_vreg_arguments,
-                    uint32_t* args,
-                    uint32_t register_index,
-                    bool is_range,
-                    const char* descriptor,
-                    HClinitCheck* clinit_check);
-
-  bool HandleStringInit(HInvoke* invoke,
-                        uint32_t number_of_vreg_arguments,
-                        uint32_t* args,
-                        uint32_t register_index,
-                        bool is_range,
-                        const char* descriptor);
-
-  HClinitCheck* ProcessClinitCheckForInvoke(
-      uint32_t dex_pc,
-      uint32_t method_idx,
-      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement);
-
-  ArenaAllocator* const arena_;
-
-  // A list of the size of the dex code holding block information for
-  // the method. If an entry contains a block, then the dex instruction
-  // starting at that entry is the first instruction of a new block.
-  ArenaVector<HBasicBlock*> branch_targets_;
-
-  ArenaVector<HLocal*> locals_;
-
-  HBasicBlock* entry_block_;
-  HBasicBlock* exit_block_;
-  HBasicBlock* current_block_;
   HGraph* const graph_;
-
-  // The dex file where the method being compiled is.
   const DexFile* const dex_file_;
+  const DexFile::CodeItem& code_item_;
 
   // The compilation unit of the current method being compiled. Note that
   // it can be an inlined method.
@@ -334,33 +110,13 @@
 
   CompilerDriver* const compiler_driver_;
 
-  // The compilation unit of the outermost method being compiled. That is the
-  // method being compiled (and not inlined), and potentially inlining other
-  // methods.
-  const DexCompilationUnit* const outer_compilation_unit_;
-
-  // The return type of the method being compiled.
-  const Primitive::Type return_type_;
-
-  // The pointer in the dex file where the instructions of the code item
-  // being currently compiled start.
-  const uint16_t* code_start_;
-
-  // The last invoke or fill-new-array being built. Only to be
-  // used by move-result instructions.
-  HInstruction* latest_result_;
-
-  // We need to know whether we have built a graph that has calls to StringFactory
-  // and hasn't gone through the verifier. If the following flag is `false`, then
-  // we cannot compile with baseline.
-  bool can_use_baseline_for_string_init_;
+  ScopedNullHandle<mirror::DexCache> null_dex_cache_;
 
   OptimizingCompilerStats* compilation_stats_;
 
-  const uint8_t* interpreter_metadata_;
-
-  // Dex cache for dex_file_.
-  Handle<mirror::DexCache> dex_cache_;
+  HBasicBlockBuilder block_builder_;
+  SsaBuilder ssa_builder_;
+  HInstructionBuilder instruction_builder_;
 
   DISALLOW_COPY_AND_ASSIGN(HGraphBuilder);
 };
diff --git a/compiler/optimizing/bytecode_utils.h b/compiler/optimizing/bytecode_utils.h
new file mode 100644
index 0000000..6dfffce
--- /dev/null
+++ b/compiler/optimizing/bytecode_utils.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
+#define ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
+
+#include "base/arena_object.h"
+#include "dex_file.h"
+#include "dex_file-inl.h"
+#include "dex_instruction-inl.h"
+
+namespace art {
+
+class CodeItemIterator : public ValueObject {
+ public:
+  CodeItemIterator(const DexFile::CodeItem& code_item, uint32_t start_dex_pc = 0u)
+      : code_ptr_(code_item.insns_ + start_dex_pc),
+        code_end_(code_item.insns_ + code_item.insns_size_in_code_units_),
+        dex_pc_(start_dex_pc) {}
+
+  bool Done() const { return code_ptr_ >= code_end_; }
+  bool IsLast() const { return code_ptr_ + CurrentInstruction().SizeInCodeUnits() >= code_end_; }
+
+  const Instruction& CurrentInstruction() const { return *Instruction::At(code_ptr_); }
+  uint32_t CurrentDexPc() const { return dex_pc_; }
+
+  void Advance() {
+    DCHECK(!Done());
+    size_t instruction_size = CurrentInstruction().SizeInCodeUnits();
+    code_ptr_ += instruction_size;
+    dex_pc_ += instruction_size;
+  }
+
+ private:
+  const uint16_t* code_ptr_;
+  const uint16_t* const code_end_;
+  uint32_t dex_pc_;
+
+  DISALLOW_COPY_AND_ASSIGN(CodeItemIterator);
+};
+
+class DexSwitchTable : public ValueObject {
+ public:
+  DexSwitchTable(const Instruction& instruction, uint32_t dex_pc)
+      : instruction_(instruction),
+        dex_pc_(dex_pc),
+        sparse_(instruction.Opcode() == Instruction::SPARSE_SWITCH) {
+    int32_t table_offset = instruction.VRegB_31t();
+    const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
+    DCHECK_EQ(table[0], sparse_ ? static_cast<uint16_t>(Instruction::kSparseSwitchSignature)
+                                : static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
+    num_entries_ = table[1];
+    values_ = reinterpret_cast<const int32_t*>(&table[2]);
+  }
+
+  uint16_t GetNumEntries() const {
+    return num_entries_;
+  }
+
+  void CheckIndex(size_t index) const {
+    if (sparse_) {
+      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+      DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
+    } else {
+      // In a packed table, we have the starting key and num_entries_ values.
+      DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
+    }
+  }
+
+  int32_t GetEntryAt(size_t index) const {
+    CheckIndex(index);
+    return values_[index];
+  }
+
+  uint32_t GetDexPcForIndex(size_t index) const {
+    CheckIndex(index);
+    return dex_pc_ +
+        (reinterpret_cast<const int16_t*>(values_ + index) -
+         reinterpret_cast<const int16_t*>(&instruction_));
+  }
+
+  // Index of the first value in the table.
+  size_t GetFirstValueIndex() const {
+    if (sparse_) {
+      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+      return num_entries_;
+    } else {
+      // In a packed table, we have the starting key and num_entries_ values.
+      return 1;
+    }
+  }
+
+  bool IsSparse() const { return sparse_; }
+
+  bool ShouldBuildDecisionTree() {
+    return IsSparse() || GetNumEntries() <= kSmallSwitchThreshold;
+  }
+
+ private:
+  const Instruction& instruction_;
+  const uint32_t dex_pc_;
+
+  // Whether this is a sparse-switch table (or a packed-switch one).
+  const bool sparse_;
+
+  // This can't be const as it needs to be computed off of the given instruction, and complicated
+  // expressions in the initializer list seemed very ugly.
+  uint16_t num_entries_;
+
+  const int32_t* values_;
+
+  // The number of entries in a packed switch before we use a jump table or specified
+  // compare/jump series.
+  static constexpr uint16_t kSmallSwitchThreshold = 3;
+
+  DISALLOW_COPY_AND_ASSIGN(DexSwitchTable);
+};
+
+class DexSwitchTableIterator {
+ public:
+  explicit DexSwitchTableIterator(const DexSwitchTable& table)
+      : table_(table),
+        num_entries_(static_cast<size_t>(table_.GetNumEntries())),
+        first_target_offset_(table_.GetFirstValueIndex()),
+        index_(0u) {}
+
+  bool Done() const { return index_ >= num_entries_; }
+  bool IsLast() const { return index_ == num_entries_ - 1; }
+
+  void Advance() {
+    DCHECK(!Done());
+    index_++;
+  }
+
+  int32_t CurrentKey() const {
+    return table_.IsSparse() ? table_.GetEntryAt(index_) : table_.GetEntryAt(0) + index_;
+  }
+
+  int32_t CurrentTargetOffset() const {
+    return table_.GetEntryAt(index_ + first_target_offset_);
+  }
+
+  uint32_t GetDexPcForCurrentIndex() const { return table_.GetDexPcForIndex(index_); }
+
+ private:
+  const DexSwitchTable& table_;
+  const size_t num_entries_;
+  const size_t first_target_offset_;
+
+  size_t index_;
+};
+
+inline const Instruction& GetDexInstructionAt(const DexFile::CodeItem& code_item, uint32_t dex_pc) {
+  return CodeItemIterator(code_item, dex_pc).CurrentInstruction();
+}
+
+inline bool IsThrowingDexInstruction(const Instruction& instruction) {
+  // Special-case MONITOR_EXIT which is a throwing instruction but the verifier
+  // guarantees that it will never throw. This is necessary to avoid rejecting
+  // 'synchronized' blocks/methods.
+  return instruction.IsThrow() && instruction.Opcode() != Instruction::MONITOR_EXIT;
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 1c62dfa..c532e72 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -40,22 +40,20 @@
 #include "code_generator_mips64.h"
 #endif
 
+#include "bytecode_utils.h"
 #include "compiled_method.h"
 #include "dex/verified_method.h"
-#include "driver/dex_compilation_unit.h"
-#include "gc_map_builder.h"
+#include "driver/compiler_driver.h"
 #include "graph_visualizer.h"
 #include "intrinsics.h"
 #include "leb128.h"
-#include "mapping_table.h"
 #include "mirror/array-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object_reference.h"
+#include "mirror/string.h"
 #include "parallel_move_resolver.h"
 #include "ssa_liveness_analysis.h"
 #include "utils/assembler.h"
-#include "verifier/dex_gc_map.h"
-#include "vmap_table.h"
 
 namespace art {
 
@@ -113,10 +111,10 @@
         << " " << locations->Out();
   }
 
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
-    DCHECK(CheckType(instruction->InputAt(i)->GetType(), locations->InAt(i)))
-      << instruction->InputAt(i)->GetType()
-      << " " << locations->InAt(i);
+  HConstInputsRef inputs = instruction->GetInputs();
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    DCHECK(CheckType(inputs[i]->GetType(), locations->InAt(i)))
+      << inputs[i]->GetType() << " " << locations->InAt(i);
   }
 
   HEnvironment* environment = instruction->GetEnvironment();
@@ -139,24 +137,20 @@
 
 size_t CodeGenerator::GetCachePointerOffset(uint32_t index) {
   auto pointer_size = InstructionSetPointerSize(GetInstructionSet());
-  return pointer_size * index;
+  return static_cast<size_t>(pointer_size) * index;
 }
 
-void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
-  Initialize();
-  if (!is_leaf) {
-    MarkNotLeaf();
-  }
-  const bool is_64_bit = Is64BitInstructionSet(GetInstructionSet());
-  InitializeCodeGeneration(GetGraph()->GetNumberOfLocalVRegs()
-                             + GetGraph()->GetTemporariesVRegSlots()
-                             + 1 /* filler */,
-                           0, /* the baseline compiler does not have live registers at slow path */
-                           0, /* the baseline compiler does not have live registers at slow path */
-                           GetGraph()->GetMaximumNumberOfOutVRegs()
-                             + (is_64_bit ? 2 : 1) /* current method */,
-                           GetGraph()->GetBlocks());
-  CompileInternal(allocator, /* is_baseline */ true);
+uint32_t CodeGenerator::GetArrayLengthOffset(HArrayLength* array_length) {
+  return array_length->IsStringLength()
+      ? mirror::String::CountOffset().Uint32Value()
+      : mirror::Array::LengthOffset().Uint32Value();
+}
+
+uint32_t CodeGenerator::GetArrayDataOffset(HArrayGet* array_get) {
+  DCHECK(array_get->GetType() == Primitive::kPrimChar || !array_get->IsStringCharAt());
+  return array_get->IsStringCharAt()
+      ? mirror::String::ValueOffset().Uint32Value()
+      : mirror::Array::DataOffset(Primitive::ComponentSize(array_get->GetType())).Uint32Value();
 }
 
 bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
@@ -207,19 +201,28 @@
 
 void CodeGenerator::GenerateSlowPaths() {
   size_t code_start = 0;
-  for (SlowPathCode* slow_path : slow_paths_) {
+  for (const std::unique_ptr<SlowPathCode>& slow_path_unique_ptr : slow_paths_) {
+    SlowPathCode* slow_path = slow_path_unique_ptr.get();
+    current_slow_path_ = slow_path;
     if (disasm_info_ != nullptr) {
       code_start = GetAssembler()->CodeSize();
     }
+    // Record the dex pc at start of slow path (required for java line number mapping).
+    MaybeRecordNativeDebugInfo(slow_path->GetInstruction(), slow_path->GetDexPc(), slow_path);
     slow_path->EmitNativeCode(this);
     if (disasm_info_ != nullptr) {
       disasm_info_->AddSlowPathInterval(slow_path, code_start, GetAssembler()->CodeSize());
     }
   }
+  current_slow_path_ = nullptr;
 }
 
-void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) {
-  is_baseline_ = is_baseline;
+void CodeGenerator::Compile(CodeAllocator* allocator) {
+  // The register allocator already called `InitializeCodeGeneration`,
+  // where the frame size has been computed.
+  DCHECK(block_order_ != nullptr);
+  Initialize();
+
   HGraphVisitor* instruction_visitor = GetInstructionVisitor();
   DCHECK_EQ(current_block_index_, 0u);
 
@@ -237,17 +240,21 @@
     // errors where we reference that label.
     if (block->IsSingleJump()) continue;
     Bind(block);
+    // This ensures that we have correct native line mapping for all native instructions.
+    // It is necessary to make stepping over a statement work. Otherwise, any initial
+    // instructions (e.g. moves) would be assumed to be the start of next statement.
+    MaybeRecordNativeDebugInfo(nullptr /* instruction */, block->GetDexPc());
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
-      DisassemblyScope disassembly_scope(current, *this);
-      if (is_baseline) {
-        InitLocationsBaseline(current);
+      if (current->HasEnvironment()) {
+        // Create stackmap for HNativeDebugInfo or any instruction which calls native code.
+        // Note that we need correct mapping for the native PC of the call instruction,
+        // so the runtime's stackmap is not sufficient since it is at PC after the call.
+        MaybeRecordNativeDebugInfo(current, block->GetDexPc());
       }
+      DisassemblyScope disassembly_scope(current, *this);
       DCHECK(CheckTypeConsistency(current));
-      uintptr_t native_pc_begin = GetAssembler()->CodeSize();
       current->Accept(instruction_visitor);
-      uintptr_t native_pc_end = GetAssembler()->CodeSize();
-      RecordNativeDebugInfo(current->GetDexPc(), native_pc_begin, native_pc_end);
     }
   }
 
@@ -255,7 +262,7 @@
 
   // Emit catch stack maps at the end of the stack map stream as expected by the
   // runtime exception handler.
-  if (!is_baseline && graph_->HasTryCatch()) {
+  if (graph_->HasTryCatch()) {
     RecordCatchBlockInfo();
   }
 
@@ -263,14 +270,6 @@
   Finalize(allocator);
 }
 
-void CodeGenerator::CompileOptimized(CodeAllocator* allocator) {
-  // The register allocator already called `InitializeCodeGeneration`,
-  // where the frame size has been computed.
-  DCHECK(block_order_ != nullptr);
-  Initialize();
-  CompileInternal(allocator, /* is_baseline */ false);
-}
-
 void CodeGenerator::Finalize(CodeAllocator* allocator) {
   size_t code_size = GetAssembler()->CodeSize();
   uint8_t* buffer = allocator->Allocate(code_size);
@@ -283,92 +282,40 @@
   // No linker patches by default.
 }
 
-size_t CodeGenerator::FindFreeEntry(bool* array, size_t length) {
-  for (size_t i = 0; i < length; ++i) {
-    if (!array[i]) {
-      array[i] = true;
-      return i;
-    }
-  }
-  LOG(FATAL) << "Could not find a register in baseline register allocator";
-  UNREACHABLE();
-}
-
-size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length) {
-  for (size_t i = 0; i < length - 1; i += 2) {
-    if (!array[i] && !array[i + 1]) {
-      array[i] = true;
-      array[i + 1] = true;
-      return i;
-    }
-  }
-  LOG(FATAL) << "Could not find a register in baseline register allocator";
-  UNREACHABLE();
-}
-
 void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
                                              size_t maximum_number_of_live_core_registers,
-                                             size_t maximum_number_of_live_fp_registers,
+                                             size_t maximum_number_of_live_fpu_registers,
                                              size_t number_of_out_slots,
                                              const ArenaVector<HBasicBlock*>& block_order) {
   block_order_ = &block_order;
   DCHECK(!block_order.empty());
   DCHECK(block_order[0] == GetGraph()->GetEntryBlock());
   ComputeSpillMask();
-  first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
+  first_register_slot_in_slow_path_ = RoundUp(
+      (number_of_out_slots + number_of_spill_slots) * kVRegSize, GetPreferredSlotsAlignment());
 
   if (number_of_spill_slots == 0
       && !HasAllocatedCalleeSaveRegisters()
       && IsLeafMethod()
       && !RequiresCurrentMethod()) {
     DCHECK_EQ(maximum_number_of_live_core_registers, 0u);
-    DCHECK_EQ(maximum_number_of_live_fp_registers, 0u);
+    DCHECK_EQ(maximum_number_of_live_fpu_registers, 0u);
     SetFrameSize(CallPushesPC() ? GetWordSize() : 0);
   } else {
     SetFrameSize(RoundUp(
-        number_of_spill_slots * kVRegSize
-        + number_of_out_slots * kVRegSize
+        first_register_slot_in_slow_path_
         + maximum_number_of_live_core_registers * GetWordSize()
-        + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize()
+        + maximum_number_of_live_fpu_registers * GetFloatingPointSpillSlotSize()
         + FrameEntrySpillSize(),
         kStackAlignment));
   }
 }
 
-Location CodeGenerator::GetTemporaryLocation(HTemporary* temp) const {
-  uint16_t number_of_locals = GetGraph()->GetNumberOfLocalVRegs();
-  // The type of the previous instruction tells us if we need a single or double stack slot.
-  Primitive::Type type = temp->GetType();
-  int32_t temp_size = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble) ? 2 : 1;
-  // Use the temporary region (right below the dex registers).
-  int32_t slot = GetFrameSize() - FrameEntrySpillSize()
-                                - kVRegSize  // filler
-                                - (number_of_locals * kVRegSize)
-                                - ((temp_size + temp->GetIndex()) * kVRegSize);
-  return temp_size == 2 ? Location::DoubleStackSlot(slot) : Location::StackSlot(slot);
-}
-
-int32_t CodeGenerator::GetStackSlot(HLocal* local) const {
-  uint16_t reg_number = local->GetRegNumber();
-  uint16_t number_of_locals = GetGraph()->GetNumberOfLocalVRegs();
-  if (reg_number >= number_of_locals) {
-    // Local is a parameter of the method. It is stored in the caller's frame.
-    // TODO: Share this logic with StackVisitor::GetVRegOffsetFromQuickCode.
-    return GetFrameSize() + InstructionSetPointerSize(GetInstructionSet())  // ART method
-                          + (reg_number - number_of_locals) * kVRegSize;
-  } else {
-    // Local is a temporary in this method. It is stored in this method's frame.
-    return GetFrameSize() - FrameEntrySpillSize()
-                          - kVRegSize  // filler.
-                          - (number_of_locals * kVRegSize)
-                          + (reg_number * kVRegSize);
-  }
-}
-
 void CodeGenerator::CreateCommonInvokeLocationSummary(
     HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor) {
   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
-  LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kCall);
+  LocationSummary* locations = new (allocator) LocationSummary(invoke,
+                                                               LocationSummary::kCallOnMainOnly);
 
   for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
     HInstruction* input = invoke->InputAt(i);
@@ -379,13 +326,17 @@
 
   if (invoke->IsInvokeStaticOrDirect()) {
     HInvokeStaticOrDirect* call = invoke->AsInvokeStaticOrDirect();
-    if (call->IsStringInit()) {
-      locations->AddTemp(visitor->GetMethodLocation());
-    } else if (call->IsRecursive()) {
-      locations->SetInAt(call->GetCurrentMethodInputIndex(), visitor->GetMethodLocation());
-    } else {
-      locations->AddTemp(visitor->GetMethodLocation());
-      locations->SetInAt(call->GetCurrentMethodInputIndex(), Location::RequiresRegister());
+    switch (call->GetMethodLoadKind()) {
+      case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+        locations->SetInAt(call->GetSpecialInputIndex(), visitor->GetMethodLocation());
+        break;
+      case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
+        locations->AddTemp(visitor->GetMethodLocation());
+        locations->SetInAt(call->GetSpecialInputIndex(), Location::RequiresRegister());
+        break;
+      default:
+        locations->AddTemp(visitor->GetMethodLocation());
+        break;
     }
   } else {
     locations->AddTemp(visitor->GetMethodLocation());
@@ -428,7 +379,7 @@
 
   ArenaAllocator* allocator = field_access->GetBlock()->GetGraph()->GetArena();
   LocationSummary* locations =
-      new (allocator) LocationSummary(field_access, LocationSummary::kCall);
+      new (allocator) LocationSummary(field_access, LocationSummary::kCallOnMainOnly);
 
   locations->AddTemp(calling_convention.GetFieldIndexLocation());
 
@@ -541,15 +492,19 @@
   }
 }
 
+// TODO: Remove argument `code_generator_supports_read_barrier` when
+// all code generators have read barrier support.
 void CodeGenerator::CreateLoadClassLocationSummary(HLoadClass* cls,
                                                    Location runtime_type_index_location,
-                                                   Location runtime_return_location) {
+                                                   Location runtime_return_location,
+                                                   bool code_generator_supports_read_barrier) {
   ArenaAllocator* allocator = cls->GetBlock()->GetGraph()->GetArena();
   LocationSummary::CallKind call_kind = cls->NeedsAccessCheck()
-      ? LocationSummary::kCall
-      : (cls->CanCallRuntime()
-          ? LocationSummary::kCallOnSlowPath
-          : LocationSummary::kNoCall);
+      ? LocationSummary::kCallOnMainOnly
+      : (((code_generator_supports_read_barrier && kEmitCompilerReadBarrier) ||
+          cls->CanCallRuntime())
+            ? LocationSummary::kCallOnSlowPath
+            : LocationSummary::kNoCall);
   LocationSummary* locations = new (allocator) LocationSummary(cls, call_kind);
   if (cls->NeedsAccessCheck()) {
     locations->SetInAt(0, Location::NoLocation());
@@ -585,130 +540,21 @@
   }
 }
 
-void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const {
-  LocationSummary* locations = instruction->GetLocations();
-  if (locations == nullptr) return;
-
-  for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
-    blocked_core_registers_[i] = false;
-  }
-
-  for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
-    blocked_fpu_registers_[i] = false;
-  }
-
-  for (size_t i = 0, e = number_of_register_pairs_; i < e; ++i) {
-    blocked_register_pairs_[i] = false;
-  }
-
-  // Mark all fixed input, temp and output registers as used.
-  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
-    BlockIfInRegister(locations->InAt(i));
-  }
-
-  for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
-    Location loc = locations->GetTemp(i);
-    BlockIfInRegister(loc);
-  }
-  Location result_location = locations->Out();
-  if (locations->OutputCanOverlapWithInputs()) {
-    BlockIfInRegister(result_location, /* is_out */ true);
-  }
-
-  SetupBlockedRegisters(/* is_baseline */ true);
-
-  // Allocate all unallocated input locations.
-  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
-    Location loc = locations->InAt(i);
-    HInstruction* input = instruction->InputAt(i);
-    if (loc.IsUnallocated()) {
-      if ((loc.GetPolicy() == Location::kRequiresRegister)
-          || (loc.GetPolicy() == Location::kRequiresFpuRegister)) {
-        loc = AllocateFreeRegister(input->GetType());
-      } else {
-        DCHECK_EQ(loc.GetPolicy(), Location::kAny);
-        HLoadLocal* load = input->AsLoadLocal();
-        if (load != nullptr) {
-          loc = GetStackLocation(load);
-        } else {
-          loc = AllocateFreeRegister(input->GetType());
-        }
-      }
-      locations->SetInAt(i, loc);
-    }
-  }
-
-  // Allocate all unallocated temp locations.
-  for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
-    Location loc = locations->GetTemp(i);
-    if (loc.IsUnallocated()) {
-      switch (loc.GetPolicy()) {
-        case Location::kRequiresRegister:
-          // Allocate a core register (large enough to fit a 32-bit integer).
-          loc = AllocateFreeRegister(Primitive::kPrimInt);
-          break;
-
-        case Location::kRequiresFpuRegister:
-          // Allocate a core register (large enough to fit a 64-bit double).
-          loc = AllocateFreeRegister(Primitive::kPrimDouble);
-          break;
-
-        default:
-          LOG(FATAL) << "Unexpected policy for temporary location "
-                     << loc.GetPolicy();
-      }
-      locations->SetTempAt(i, loc);
-    }
-  }
-  if (result_location.IsUnallocated()) {
-    switch (result_location.GetPolicy()) {
-      case Location::kAny:
-      case Location::kRequiresRegister:
-      case Location::kRequiresFpuRegister:
-        result_location = AllocateFreeRegister(instruction->GetType());
-        break;
-      case Location::kSameAsFirstInput:
-        result_location = locations->InAt(0);
-        break;
-    }
-    locations->UpdateOut(result_location);
-  }
-}
-
-void CodeGenerator::InitLocationsBaseline(HInstruction* instruction) {
-  AllocateLocations(instruction);
-  if (instruction->GetLocations() == nullptr) {
-    if (instruction->IsTemporary()) {
-      HInstruction* previous = instruction->GetPrevious();
-      Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-      Move(previous, temp_location, instruction);
-    }
-    return;
-  }
-  AllocateRegistersLocally(instruction);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
-    Location location = instruction->GetLocations()->InAt(i);
-    HInstruction* input = instruction->InputAt(i);
-    if (location.IsValid()) {
-      // Move the input to the desired location.
-      if (input->GetNext()->IsTemporary()) {
-        // If the input was stored in a temporary, use that temporary to
-        // perform the move.
-        Move(input->GetNext(), location, instruction);
-      } else {
-        Move(input, location, instruction);
-      }
-    }
-  }
-}
-
 void CodeGenerator::AllocateLocations(HInstruction* instruction) {
   instruction->Accept(GetLocationBuilder());
   DCHECK(CheckTypeConsistency(instruction));
   LocationSummary* locations = instruction->GetLocations();
   if (!instruction->IsSuspendCheckEntry()) {
-    if (locations != nullptr && locations->CanCall()) {
-      MarkNotLeaf();
+    if (locations != nullptr) {
+      if (locations->CanCall()) {
+        MarkNotLeaf();
+      } else if (locations->Intrinsified() &&
+                 instruction->IsInvokeStaticOrDirect() &&
+                 !instruction->AsInvokeStaticOrDirect()->HasCurrentMethodInput()) {
+        // A static method call that has been fully intrinsified, and cannot call on the slow
+        // path or refer to the current method directly, no longer needs current method.
+        return;
+      }
     }
     if (instruction->NeedsCurrentMethod()) {
       SetRequiresCurrentMethod();
@@ -722,59 +568,66 @@
   }
 }
 
-CodeGenerator* CodeGenerator::Create(HGraph* graph,
-                                     InstructionSet instruction_set,
-                                     const InstructionSetFeatures& isa_features,
-                                     const CompilerOptions& compiler_options,
-                                     OptimizingCompilerStats* stats) {
+std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph,
+                                                     InstructionSet instruction_set,
+                                                     const InstructionSetFeatures& isa_features,
+                                                     const CompilerOptions& compiler_options,
+                                                     OptimizingCompilerStats* stats) {
+  ArenaAllocator* arena = graph->GetArena();
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
     case kThumb2: {
-      return new arm::CodeGeneratorARM(graph,
-                                      *isa_features.AsArmInstructionSetFeatures(),
-                                      compiler_options,
-                                      stats);
+      return std::unique_ptr<CodeGenerator>(
+          new (arena) arm::CodeGeneratorARM(graph,
+                                            *isa_features.AsArmInstructionSetFeatures(),
+                                            compiler_options,
+                                            stats));
     }
 #endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64: {
-      return new arm64::CodeGeneratorARM64(graph,
-                                          *isa_features.AsArm64InstructionSetFeatures(),
-                                          compiler_options,
-                                          stats);
+      return std::unique_ptr<CodeGenerator>(
+          new (arena) arm64::CodeGeneratorARM64(graph,
+                                                *isa_features.AsArm64InstructionSetFeatures(),
+                                                compiler_options,
+                                                stats));
     }
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips: {
-      return new mips::CodeGeneratorMIPS(graph,
-                                         *isa_features.AsMipsInstructionSetFeatures(),
-                                         compiler_options,
-                                         stats);
+      return std::unique_ptr<CodeGenerator>(
+          new (arena) mips::CodeGeneratorMIPS(graph,
+                                              *isa_features.AsMipsInstructionSetFeatures(),
+                                              compiler_options,
+                                              stats));
     }
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64: {
-      return new mips64::CodeGeneratorMIPS64(graph,
-                                            *isa_features.AsMips64InstructionSetFeatures(),
-                                            compiler_options,
-                                            stats);
+      return std::unique_ptr<CodeGenerator>(
+          new (arena) mips64::CodeGeneratorMIPS64(graph,
+                                                  *isa_features.AsMips64InstructionSetFeatures(),
+                                                  compiler_options,
+                                                  stats));
     }
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86: {
-      return new x86::CodeGeneratorX86(graph,
-                                      *isa_features.AsX86InstructionSetFeatures(),
-                                      compiler_options,
-                                      stats);
+      return std::unique_ptr<CodeGenerator>(
+          new (arena) x86::CodeGeneratorX86(graph,
+                                            *isa_features.AsX86InstructionSetFeatures(),
+                                            compiler_options,
+                                            stats));
     }
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64: {
-      return new x86_64::CodeGeneratorX86_64(graph,
-                                            *isa_features.AsX86_64InstructionSetFeatures(),
-                                            compiler_options,
-                                            stats);
+      return std::unique_ptr<CodeGenerator>(
+          new (arena) x86_64::CodeGeneratorX86_64(graph,
+                                                  *isa_features.AsX86_64InstructionSetFeatures(),
+                                                  compiler_options,
+                                                  stats));
     }
 #endif
     default:
@@ -782,144 +635,79 @@
   }
 }
 
-void CodeGenerator::BuildNativeGCMap(
-    ArenaVector<uint8_t>* data, const DexCompilationUnit& dex_compilation_unit) const {
-  const std::vector<uint8_t>& gc_map_raw =
-      dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap();
-  verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
+size_t CodeGenerator::ComputeStackMapsSize() {
+  return stack_map_stream_.PrepareForFillIn();
+}
 
-  uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset();
-
-  size_t num_stack_maps = stack_map_stream_.GetNumberOfStackMaps();
-  GcMapBuilder builder(data, num_stack_maps, max_native_offset, dex_gc_map.RegWidth());
-  for (size_t i = 0; i != num_stack_maps; ++i) {
-    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-    uint32_t native_offset = stack_map_entry.native_pc_offset;
-    uint32_t dex_pc = stack_map_entry.dex_pc;
-    const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false);
-    CHECK(references != nullptr) << "Missing ref for dex pc 0x" << std::hex << dex_pc;
-    builder.AddEntry(native_offset, references);
+static void CheckCovers(uint32_t dex_pc,
+                        const HGraph& graph,
+                        const CodeInfo& code_info,
+                        const ArenaVector<HSuspendCheck*>& loop_headers,
+                        ArenaVector<size_t>* covered) {
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  for (size_t i = 0; i < loop_headers.size(); ++i) {
+    if (loop_headers[i]->GetDexPc() == dex_pc) {
+      if (graph.IsCompilingOsr()) {
+        DCHECK(code_info.GetOsrStackMapForDexPc(dex_pc, encoding).IsValid());
+      }
+      ++(*covered)[i];
+    }
   }
 }
 
-void CodeGenerator::BuildMappingTable(ArenaVector<uint8_t>* data) const {
-  uint32_t pc2dex_data_size = 0u;
-  uint32_t pc2dex_entries = stack_map_stream_.GetNumberOfStackMaps();
-  uint32_t pc2dex_offset = 0u;
-  int32_t pc2dex_dalvik_offset = 0;
-  uint32_t dex2pc_data_size = 0u;
-  uint32_t dex2pc_entries = 0u;
-  uint32_t dex2pc_offset = 0u;
-  int32_t dex2pc_dalvik_offset = 0;
-
-  for (size_t i = 0; i < pc2dex_entries; i++) {
-    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-    pc2dex_data_size += UnsignedLeb128Size(stack_map_entry.native_pc_offset - pc2dex_offset);
-    pc2dex_data_size += SignedLeb128Size(stack_map_entry.dex_pc - pc2dex_dalvik_offset);
-    pc2dex_offset = stack_map_entry.native_pc_offset;
-    pc2dex_dalvik_offset = stack_map_entry.dex_pc;
+// Debug helper to ensure loop entries in compiled code are matched by
+// dex branch instructions.
+static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph,
+                                            const CodeInfo& code_info,
+                                            const DexFile::CodeItem& code_item) {
+  if (graph.HasTryCatch()) {
+    // One can write loops through try/catch, which we do not support for OSR anyway.
+    return;
   }
-
-  // Walk over the blocks and find which ones correspond to catch block entries.
-  for (HBasicBlock* block : graph_->GetBlocks()) {
-    if (block->IsCatchBlock()) {
-      intptr_t native_pc = GetAddressOf(block);
-      ++dex2pc_entries;
-      dex2pc_data_size += UnsignedLeb128Size(native_pc - dex2pc_offset);
-      dex2pc_data_size += SignedLeb128Size(block->GetDexPc() - dex2pc_dalvik_offset);
-      dex2pc_offset = native_pc;
-      dex2pc_dalvik_offset = block->GetDexPc();
-    }
-  }
-
-  uint32_t total_entries = pc2dex_entries + dex2pc_entries;
-  uint32_t hdr_data_size = UnsignedLeb128Size(total_entries) + UnsignedLeb128Size(pc2dex_entries);
-  uint32_t data_size = hdr_data_size + pc2dex_data_size + dex2pc_data_size;
-  data->resize(data_size);
-
-  uint8_t* data_ptr = &(*data)[0];
-  uint8_t* write_pos = data_ptr;
-
-  write_pos = EncodeUnsignedLeb128(write_pos, total_entries);
-  write_pos = EncodeUnsignedLeb128(write_pos, pc2dex_entries);
-  DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size);
-  uint8_t* write_pos2 = write_pos + pc2dex_data_size;
-
-  pc2dex_offset = 0u;
-  pc2dex_dalvik_offset = 0u;
-  dex2pc_offset = 0u;
-  dex2pc_dalvik_offset = 0u;
-
-  for (size_t i = 0; i < pc2dex_entries; i++) {
-    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-    DCHECK(pc2dex_offset <= stack_map_entry.native_pc_offset);
-    write_pos = EncodeUnsignedLeb128(write_pos, stack_map_entry.native_pc_offset - pc2dex_offset);
-    write_pos = EncodeSignedLeb128(write_pos, stack_map_entry.dex_pc - pc2dex_dalvik_offset);
-    pc2dex_offset = stack_map_entry.native_pc_offset;
-    pc2dex_dalvik_offset = stack_map_entry.dex_pc;
-  }
-
-  for (HBasicBlock* block : graph_->GetBlocks()) {
-    if (block->IsCatchBlock()) {
-      intptr_t native_pc = GetAddressOf(block);
-      write_pos2 = EncodeUnsignedLeb128(write_pos2, native_pc - dex2pc_offset);
-      write_pos2 = EncodeSignedLeb128(write_pos2, block->GetDexPc() - dex2pc_dalvik_offset);
-      dex2pc_offset = native_pc;
-      dex2pc_dalvik_offset = block->GetDexPc();
-    }
-  }
-
-
-  DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size + pc2dex_data_size);
-  DCHECK_EQ(static_cast<size_t>(write_pos2 - data_ptr), data_size);
-
-  if (kIsDebugBuild) {
-    // Verify the encoded table holds the expected data.
-    MappingTable table(data_ptr);
-    CHECK_EQ(table.TotalSize(), total_entries);
-    CHECK_EQ(table.PcToDexSize(), pc2dex_entries);
-    auto it = table.PcToDexBegin();
-    auto it2 = table.DexToPcBegin();
-    for (size_t i = 0; i < pc2dex_entries; i++) {
-      const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-      CHECK_EQ(stack_map_entry.native_pc_offset, it.NativePcOffset());
-      CHECK_EQ(stack_map_entry.dex_pc, it.DexPc());
-      ++it;
-    }
-    for (HBasicBlock* block : graph_->GetBlocks()) {
-      if (block->IsCatchBlock()) {
-        CHECK_EQ(GetAddressOf(block), it2.NativePcOffset());
-        CHECK_EQ(block->GetDexPc(), it2.DexPc());
-        ++it2;
+  ArenaVector<HSuspendCheck*> loop_headers(graph.GetArena()->Adapter(kArenaAllocMisc));
+  for (HReversePostOrderIterator it(graph); !it.Done(); it.Advance()) {
+    if (it.Current()->IsLoopHeader()) {
+      HSuspendCheck* suspend_check = it.Current()->GetLoopInformation()->GetSuspendCheck();
+      if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) {
+        loop_headers.push_back(suspend_check);
       }
     }
-    CHECK(it == table.PcToDexEnd());
-    CHECK(it2 == table.DexToPcEnd());
+  }
+  ArenaVector<size_t> covered(loop_headers.size(), 0, graph.GetArena()->Adapter(kArenaAllocMisc));
+  const uint16_t* code_ptr = code_item.insns_;
+  const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
+
+  size_t dex_pc = 0;
+  while (code_ptr < code_end) {
+    const Instruction& instruction = *Instruction::At(code_ptr);
+    if (instruction.IsBranch()) {
+      uint32_t target = dex_pc + instruction.GetTargetOffset();
+      CheckCovers(target, graph, code_info, loop_headers, &covered);
+    } else if (instruction.IsSwitch()) {
+      DexSwitchTable table(instruction, dex_pc);
+      uint16_t num_entries = table.GetNumEntries();
+      size_t offset = table.GetFirstValueIndex();
+
+      // Use a larger loop counter type to avoid overflow issues.
+      for (size_t i = 0; i < num_entries; ++i) {
+        // The target of the case.
+        uint32_t target = dex_pc + table.GetEntryAt(i + offset);
+        CheckCovers(target, graph, code_info, loop_headers, &covered);
+      }
+    }
+    dex_pc += instruction.SizeInCodeUnits();
+    code_ptr += instruction.SizeInCodeUnits();
+  }
+
+  for (size_t i = 0; i < covered.size(); ++i) {
+    DCHECK_NE(covered[i], 0u) << "Loop in compiled code has no dex branch equivalent";
   }
 }
 
-void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const {
-  Leb128Encoder<ArenaVector<uint8_t>> vmap_encoder(data);
-  // We currently don't use callee-saved registers.
-  size_t size = 0 + 1 /* marker */ + 0;
-  vmap_encoder.Reserve(size + 1u);  // All values are likely to be one byte in ULEB128 (<128).
-  vmap_encoder.PushBackUnsigned(size);
-  vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
-}
-
-void CodeGenerator::BuildStackMaps(ArenaVector<uint8_t>* data) {
-  uint32_t size = stack_map_stream_.PrepareForFillIn();
-  data->resize(size);
-  MemoryRegion region(data->data(), size);
+void CodeGenerator::BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item) {
   stack_map_stream_.FillIn(region);
-}
-
-void CodeGenerator::RecordNativeDebugInfo(uint32_t dex_pc,
-                                          uintptr_t native_pc_begin,
-                                          uintptr_t native_pc_end) {
-  if (src_map_ != nullptr && dex_pc != kNoDexPc && native_pc_begin != native_pc_end) {
-    src_map_->push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin),
-                                    static_cast<int32_t>(dex_pc)}));
+  if (kIsDebugBuild) {
+    CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(region), code_item);
   }
 }
 
@@ -927,7 +715,7 @@
                                  uint32_t dex_pc,
                                  SlowPathCode* slow_path) {
   if (instruction != nullptr) {
-    // The code generated for some type conversions and comparisons
+    // The code generated for some type conversions
     // may call the runtime, thus normally requiring a subsequent
     // call to this method. However, the method verifier does not
     // produce PC information for certain instructions, which are
@@ -938,7 +726,7 @@
     // CodeGenerator::RecordPcInfo without triggering an error in
     // CodeGenerator::BuildNativeGCMap ("Missing ref for dex pc 0x")
     // thereafter.
-    if (instruction->IsTypeConversion() || instruction->IsCompare()) {
+    if (instruction->IsTypeConversion()) {
       return;
     }
     if (instruction->IsRem()) {
@@ -968,7 +756,8 @@
   uint32_t native_pc = GetAssembler()->CodeSize();
 
   if (instruction == nullptr) {
-    // For stack overflow checks.
+    // For stack overflow checks and native-debug-info entries without dex register
+    // mapping (i.e. start of basic block or start of slow path).
     stack_map_stream_.BeginStackMapEntry(outer_dex_pc, native_pc, 0, 0, 0, 0);
     stack_map_stream_.EndStackMapEntry();
     return;
@@ -976,16 +765,24 @@
   LocationSummary* locations = instruction->GetLocations();
 
   uint32_t register_mask = locations->GetRegisterMask();
-  if (locations->OnlyCallsOnSlowPath()) {
-    // In case of slow path, we currently set the location of caller-save registers
-    // to register (instead of their stack location when pushed before the slow-path
-    // call). Therefore register_mask contains both callee-save and caller-save
-    // registers that hold objects. We must remove the caller-save from the mask, since
-    // they will be overwritten by the callee.
-    register_mask &= core_callee_save_mask_;
+  if (instruction->IsSuspendCheck()) {
+    // Suspend check has special ABI that saves the caller-save registers in callee,
+    // so we want to emit stack maps containing the registers.
+    // TODO: Register allocator still reserves space for the caller-save registers.
+    // We should add slow-path-specific caller-save information into LocationSummary
+    // and refactor the code here as well as in the register allocator to use it.
+  } else {
+    if (locations->OnlyCallsOnSlowPath()) {
+      // In case of slow path, we currently set the location of caller-save registers
+      // to register (instead of their stack location when pushed before the slow-path
+      // call). Therefore register_mask contains both callee-save and caller-save
+      // registers that hold objects. We must remove the caller-save from the mask, since
+      // they will be overwritten by the callee.
+      register_mask &= core_callee_save_mask_;
+    }
+    // The register mask must be a subset of callee-save registers.
+    DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
   }
-  // The register mask must be a subset of callee-save registers.
-  DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
   stack_map_stream_.BeginStackMapEntry(outer_dex_pc,
                                        native_pc,
                                        register_mask,
@@ -995,6 +792,64 @@
 
   EmitEnvironment(instruction->GetEnvironment(), slow_path);
   stack_map_stream_.EndStackMapEntry();
+
+  HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
+  if (instruction->IsSuspendCheck() &&
+      (info != nullptr) &&
+      graph_->IsCompilingOsr() &&
+      (inlining_depth == 0)) {
+    DCHECK_EQ(info->GetSuspendCheck(), instruction);
+    // We duplicate the stack map as a marker that this stack map can be an OSR entry.
+    // Duplicating it avoids having the runtime recognize and skip an OSR stack map.
+    DCHECK(info->IsIrreducible());
+    stack_map_stream_.BeginStackMapEntry(
+        dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, 0);
+    EmitEnvironment(instruction->GetEnvironment(), slow_path);
+    stack_map_stream_.EndStackMapEntry();
+    if (kIsDebugBuild) {
+      HEnvironment* environment = instruction->GetEnvironment();
+      for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) {
+        HInstruction* in_environment = environment->GetInstructionAt(i);
+        if (in_environment != nullptr) {
+          DCHECK(in_environment->IsPhi() || in_environment->IsConstant());
+          Location location = environment->GetLocationAt(i);
+          DCHECK(location.IsStackSlot() ||
+                 location.IsDoubleStackSlot() ||
+                 location.IsConstant() ||
+                 location.IsInvalid());
+          if (location.IsStackSlot() || location.IsDoubleStackSlot()) {
+            DCHECK_LT(location.GetStackIndex(), static_cast<int32_t>(GetFrameSize()));
+          }
+        }
+      }
+    }
+  } else if (kIsDebugBuild) {
+    // Ensure stack maps are unique, by checking that the native pc in the stack map
+    // last emitted is different than the native pc of the stack map just emitted.
+    size_t number_of_stack_maps = stack_map_stream_.GetNumberOfStackMaps();
+    if (number_of_stack_maps > 1) {
+      DCHECK_NE(stack_map_stream_.GetStackMap(number_of_stack_maps - 1).native_pc_offset,
+                stack_map_stream_.GetStackMap(number_of_stack_maps - 2).native_pc_offset);
+    }
+  }
+}
+
+bool CodeGenerator::HasStackMapAtCurrentPc() {
+  uint32_t pc = GetAssembler()->CodeSize();
+  size_t count = stack_map_stream_.GetNumberOfStackMaps();
+  return count > 0 && stack_map_stream_.GetStackMap(count - 1).native_pc_offset == pc;
+}
+
+void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction,
+                                               uint32_t dex_pc,
+                                               SlowPathCode* slow_path) {
+  if (GetCompilerOptions().GetNativeDebuggable() && dex_pc != kNoDexPc) {
+    if (HasStackMapAtCurrentPc()) {
+      // Ensure that we do not collide with the stack map of the previous instruction.
+      GenerateNop();
+    }
+    RecordPcInfo(instruction, dex_pc, slow_path);
+  }
 }
 
 void CodeGenerator::RecordCatchBlockInfo() {
@@ -1012,7 +867,8 @@
     uint32_t register_mask = 0;   // Not used.
 
     // The stack mask is not used, so we leave it empty.
-    ArenaBitVector* stack_mask = new (arena) ArenaBitVector(arena, 0, /* expandable */ true);
+    ArenaBitVector* stack_mask =
+        ArenaBitVector::Create(arena, 0, /* expandable */ true, kArenaAllocCodeGenerator);
 
     stack_map_stream_.BeginStackMapEntry(dex_pc,
                                          native_pc,
@@ -1274,6 +1130,16 @@
   }
 }
 
+void CodeGenerator::GenerateNullCheck(HNullCheck* instruction) {
+  if (IsImplicitNullCheckAllowed(instruction)) {
+    MaybeRecordStat(kImplicitNullCheckGenerated);
+    GenerateImplicitNullCheck(instruction);
+  } else {
+    MaybeRecordStat(kExplicitNullCheckGenerated);
+    GenerateExplicitNullCheck(instruction);
+  }
+}
+
 void CodeGenerator::ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const {
   LocationSummary* locations = suspend_check->GetLocations();
   HBasicBlock* block = suspend_check->GetBlock();
@@ -1310,21 +1176,32 @@
   // coherent with the runtime call generated, and that the GC side effect is
   // set when required.
   if (slow_path == nullptr) {
-    DCHECK(instruction->GetLocations()->WillCall()) << instruction->DebugName();
+    DCHECK(instruction->GetLocations()->WillCall())
+        << "instruction->DebugName()=" << instruction->DebugName();
     DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()))
-        << instruction->DebugName() << instruction->GetSideEffects().ToString();
+        << "instruction->DebugName()=" << instruction->DebugName()
+        << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString();
   } else {
-    DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath() || slow_path->IsFatal())
-        << instruction->DebugName() << slow_path->GetDescription();
+    DCHECK(instruction->GetLocations()->CallsOnSlowPath() || slow_path->IsFatal())
+        << "instruction->DebugName()=" << instruction->DebugName()
+        << " slow_path->GetDescription()=" << slow_path->GetDescription();
     DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
-           // Control flow would not come back into the code if a fatal slow
-           // path is taken, so we do not care if it triggers GC.
-           slow_path->IsFatal() ||
-           // HDeoptimize is a special case: we know we are not coming back from
-           // it into the code.
-           instruction->IsDeoptimize())
-        << instruction->DebugName() << instruction->GetSideEffects().ToString()
-        << slow_path->GetDescription();
+           // When (non-Baker) read barriers are enabled, some instructions
+           // use a slow path to emit a read barrier, which does not trigger
+           // GC.
+           (kEmitCompilerReadBarrier &&
+            !kUseBakerReadBarrier &&
+            (instruction->IsInstanceFieldGet() ||
+             instruction->IsStaticFieldGet() ||
+             instruction->IsArrayGet() ||
+             instruction->IsLoadClass() ||
+             instruction->IsLoadString() ||
+             instruction->IsInstanceOf() ||
+             instruction->IsCheckCast() ||
+             (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()))))
+        << "instruction->DebugName()=" << instruction->DebugName()
+        << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString()
+        << " slow_path->GetDescription()=" << slow_path->GetDescription();
   }
 
   // Check the coherency of leaf information.
@@ -1335,12 +1212,35 @@
       << instruction->DebugName() << ((slow_path != nullptr) ? slow_path->GetDescription() : "");
 }
 
+void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
+                                                                SlowPathCode* slow_path) {
+  DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath())
+      << "instruction->DebugName()=" << instruction->DebugName()
+      << " slow_path->GetDescription()=" << slow_path->GetDescription();
+  // Only the Baker read barrier marking slow path used by certains
+  // instructions is expected to invoke the runtime without recording
+  // PC-related information.
+  DCHECK(kUseBakerReadBarrier);
+  DCHECK(instruction->IsInstanceFieldGet() ||
+         instruction->IsStaticFieldGet() ||
+         instruction->IsArrayGet() ||
+         instruction->IsLoadClass() ||
+         instruction->IsLoadString() ||
+         instruction->IsInstanceOf() ||
+         instruction->IsCheckCast() ||
+         (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()) ||
+         (instruction->IsInvokeStaticOrDirect() && instruction->GetLocations()->Intrinsified()))
+      << "instruction->DebugName()=" << instruction->DebugName()
+      << " slow_path->GetDescription()=" << slow_path->GetDescription();
+}
+
 void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
-  RegisterSet* register_set = locations->GetLiveRegisters();
+  RegisterSet* live_registers = locations->GetLiveRegisters();
   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+
   for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
     if (!codegen->IsCoreCalleeSaveRegister(i)) {
-      if (register_set->ContainsCoreRegister(i)) {
+      if (live_registers->ContainsCoreRegister(i)) {
         // If the register holds an object, update the stack mask.
         if (locations->RegisterContainsObject(i)) {
           locations->SetStackBit(stack_offset / kVRegSize);
@@ -1355,7 +1255,7 @@
 
   for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
     if (!codegen->IsFloatingPointCalleeSaveRegister(i)) {
-      if (register_set->ContainsFloatingPointRegister(i)) {
+      if (live_registers->ContainsFloatingPointRegister(i)) {
         DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
         DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
         saved_fpu_stack_offsets_[i] = stack_offset;
@@ -1366,12 +1266,14 @@
 }
 
 void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
-  RegisterSet* register_set = locations->GetLiveRegisters();
+  RegisterSet* live_registers = locations->GetLiveRegisters();
   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+
   for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
     if (!codegen->IsCoreCalleeSaveRegister(i)) {
-      if (register_set->ContainsCoreRegister(i)) {
+      if (live_registers->ContainsCoreRegister(i)) {
         DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+        DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
         stack_offset += codegen->RestoreCoreRegister(stack_offset, i);
       }
     }
@@ -1379,8 +1281,9 @@
 
   for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
     if (!codegen->IsFloatingPointCalleeSaveRegister(i)) {
-      if (register_set->ContainsFloatingPointRegister(i)) {
+      if (live_registers->ContainsFloatingPointRegister(i)) {
         DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+        DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
         stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i);
       }
     }
@@ -1440,4 +1343,18 @@
   locations->AddTemp(Location::RequiresRegister());
 }
 
+uint32_t CodeGenerator::GetReferenceSlowFlagOffset() const {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
+  DCHECK(klass->IsInitialized());
+  return klass->GetSlowPathFlagOffset().Uint32Value();
+}
+
+uint32_t CodeGenerator::GetReferenceDisableFlagOffset() const {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
+  DCHECK(klass->IsInitialized());
+  return klass->GetDisableIntrinsicFlagOffset().Uint32Value();
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index b04dfc0..fd396c4 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -22,6 +22,8 @@
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
 #include "base/bit_field.h"
+#include "base/enums.h"
+#include "compiled_method.h"
 #include "driver/compiler_options.h"
 #include "globals.h"
 #include "graph_visualizer.h"
@@ -51,13 +53,9 @@
 
 class Assembler;
 class CodeGenerator;
-class DexCompilationUnit;
+class CompilerDriver;
 class LinkerPatch;
 class ParallelMoveResolver;
-class SrcMapElem;
-template <class Alloc>
-class SrcMap;
-using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
 
 class CodeAllocator {
  public:
@@ -70,9 +68,9 @@
   DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
 };
 
-class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
+class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
  public:
-  SlowPathCode() {
+  explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
     for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
       saved_core_stack_offsets_[i] = kRegisterNotSaved;
       saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
@@ -83,7 +81,11 @@
 
   virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
 
+  // Save live core and floating-point caller-save registers and
+  // update the stack mask in `locations` for registers holding object
+  // references.
   virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
+  // Restore live core and floating-point caller-save registers.
   virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
 
   bool IsCoreRegisterSaved(int reg) const {
@@ -109,9 +111,19 @@
   Label* GetEntryLabel() { return &entry_label_; }
   Label* GetExitLabel() { return &exit_label_; }
 
+  HInstruction* GetInstruction() const {
+    return instruction_;
+  }
+
+  uint32_t GetDexPc() const {
+    return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
+  }
+
  protected:
   static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
   static constexpr uint32_t kRegisterNotSaved = -1;
+  // The instruction where this slow path is happening.
+  HInstruction* instruction_;
   uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
   uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
 
@@ -159,19 +171,18 @@
   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
 };
 
-class CodeGenerator {
+class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
  public:
-  // Compiles the graph to executable instructions. Returns whether the compilation
-  // succeeded.
-  void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
-  void CompileOptimized(CodeAllocator* allocator);
-  static CodeGenerator* Create(HGraph* graph,
-                               InstructionSet instruction_set,
-                               const InstructionSetFeatures& isa_features,
-                               const CompilerOptions& compiler_options,
-                               OptimizingCompilerStats* stats = nullptr);
+  // Compiles the graph to executable instructions.
+  void Compile(CodeAllocator* allocator);
+  static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
+                                               InstructionSet instruction_set,
+                                               const InstructionSetFeatures& isa_features,
+                                               const CompilerOptions& compiler_options,
+                                               OptimizingCompilerStats* stats = nullptr);
   virtual ~CodeGenerator() {}
 
+  // Get the graph. This is the outermost graph, never the graph of a method being inlined.
   HGraph* GetGraph() const { return graph_; }
 
   HBasicBlock* GetNextBlockToEmit() const;
@@ -181,7 +192,7 @@
   size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
     // Note that this follows the current calling convention.
     return GetFrameSize()
-        + InstructionSetPointerSize(GetInstructionSet())  // Art method
+        + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet()))  // Art method
         + parameter->GetIndex() * kVRegSize;
   }
 
@@ -191,7 +202,6 @@
   virtual void GenerateFrameEntry() = 0;
   virtual void GenerateFrameExit() = 0;
   virtual void Bind(HBasicBlock* block) = 0;
-  virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0;
   virtual void MoveConstant(Location destination, int32_t value) = 0;
   virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0;
   virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;
@@ -200,14 +210,14 @@
   virtual const Assembler& GetAssembler() const = 0;
   virtual size_t GetWordSize() const = 0;
   virtual size_t GetFloatingPointSpillSlotSize() const = 0;
-  virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
+  virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
   void InitializeCodeGeneration(size_t number_of_spill_slots,
                                 size_t maximum_number_of_live_core_registers,
-                                size_t maximum_number_of_live_fp_registers,
+                                size_t maximum_number_of_live_fpu_registers,
                                 size_t number_of_out_slots,
                                 const ArenaVector<HBasicBlock*>& block_order);
-  int32_t GetStackSlot(HLocal* local) const;
-  Location GetTemporaryLocation(HTemporary* temp) const;
+  // Backends can override this as necessary. For most, no special alignment is required.
+  virtual uint32_t GetPreferredSlotsAlignment() const { return 1; }
 
   uint32_t GetFrameSize() const { return frame_size_; }
   void SetFrameSize(uint32_t size) { frame_size_ = size; }
@@ -216,7 +226,7 @@
 
   size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
-  virtual void SetupBlockedRegisters(bool is_baseline) const = 0;
+  virtual void SetupBlockedRegisters() const = 0;
 
   virtual void ComputeSpillMask() {
     core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
@@ -252,6 +262,15 @@
   // Returns whether we should split long moves in parallel moves.
   virtual bool ShouldSplitLongMoves() const { return false; }
 
+  size_t GetNumberOfCoreCalleeSaveRegisters() const {
+    return POPCOUNT(core_callee_save_mask_);
+  }
+
+  size_t GetNumberOfCoreCallerSaveRegisters() const {
+    DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
+    return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
+  }
+
   bool IsCoreCalleeSaveRegister(int reg) const {
     return (core_callee_save_mask_ & (1 << reg)) != 0;
   }
@@ -262,11 +281,18 @@
 
   // Record native to dex mapping for a suspend point.  Required by runtime.
   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
-  // Record additional native to dex mappings for native debugging/profiling tools.
-  void RecordNativeDebugInfo(uint32_t dex_pc, uintptr_t native_pc_begin, uintptr_t native_pc_end);
+  // Check whether we have already recorded mapping at this PC.
+  bool HasStackMapAtCurrentPc();
+  // Record extra stack maps if we support native debugging.
+  void MaybeRecordNativeDebugInfo(HInstruction* instruction,
+                                  uint32_t dex_pc,
+                                  SlowPathCode* slow_path = nullptr);
 
   bool CanMoveNullCheckToUser(HNullCheck* null_check);
   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
+  void GenerateNullCheck(HNullCheck* null_check);
+  virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
+  virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;
 
   // Records a stack map which the runtime might use to set catch phi values
   // during exception delivery.
@@ -279,21 +305,13 @@
   // save live registers, which may be needed by the runtime to set catch phis.
   bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const;
 
+  // TODO: Avoid creating the `std::unique_ptr` here.
   void AddSlowPath(SlowPathCode* slow_path) {
-    slow_paths_.push_back(slow_path);
+    slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
   }
 
-  void SetSrcMap(DefaultSrcMap* src_map) { src_map_ = src_map; }
-
-  void BuildMappingTable(ArenaVector<uint8_t>* vector) const;
-  void BuildVMapTable(ArenaVector<uint8_t>* vector) const;
-  void BuildNativeGCMap(
-      ArenaVector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
-  void BuildStackMaps(ArenaVector<uint8_t>* vector);
-
-  bool IsBaseline() const {
-    return is_baseline_;
-  }
+  void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
+  size_t ComputeStackMapsSize();
 
   bool IsLeafMethod() const {
     return is_leaf_;
@@ -322,6 +340,9 @@
   bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
   bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
 
+  bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; }
+  bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; }
+
   // Helper that returns the pointer offset of an index in an object array.
   // Note: this method assumes we always have the same pointer size, regardless
   // of the architecture.
@@ -329,6 +350,27 @@
   // Pointer variant for ArtMethod and ArtField arrays.
   size_t GetCachePointerOffset(uint32_t index);
 
+  // Helper that returns the offset of the array's length field.
+  // Note: Besides the normal arrays, we also use the HArrayLength for
+  // accessing the String's `count` field in String intrinsics.
+  static uint32_t GetArrayLengthOffset(HArrayLength* array_length);
+
+  // Helper that returns the offset of the array's data.
+  // Note: Besides the normal arrays, we also use the HArrayGet for
+  // accessing the String's `value` field in String intrinsics.
+  static uint32_t GetArrayDataOffset(HArrayGet* array_get);
+
+  // Return the entry point offset for ReadBarrierMarkRegX, where X is `reg`.
+  template <PointerSize pointer_size>
+  static int32_t GetReadBarrierMarkEntryPointsOffset(size_t reg) {
+    // The entry point list defines 30 ReadBarrierMarkRegX entry points.
+    DCHECK_LT(reg, 30u);
+    // The ReadBarrierMarkRegX entry points are ordered by increasing
+    // register number in Thread::tls_Ptr_.quick_entrypoints.
+    return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value()
+        + static_cast<size_t>(pointer_size) * reg;
+  }
+
   void EmitParallelMoves(Location from1,
                          Location to1,
                          Primitive::Type type1,
@@ -342,8 +384,14 @@
     return type == Primitive::kPrimNot && !value->IsNullConstant();
   }
 
+
+  // Perfoms checks pertaining to an InvokeRuntime call.
   void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path);
 
+  // Perfoms checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
+  static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
+                                                          SlowPathCode* slow_path);
+
   void AddAllocatedRegister(Location location) {
     allocated_registers_.Add(location);
   }
@@ -419,7 +467,8 @@
   // TODO: This overlaps a bit with MoveFromReturnRegister. Refactor for a better design.
   static void CreateLoadClassLocationSummary(HLoadClass* cls,
                                              Location runtime_type_index_location,
-                                             Location runtime_return_location);
+                                             Location runtime_return_location,
+                                             bool code_generator_supports_read_barrier = false);
 
   static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
 
@@ -431,6 +480,22 @@
                              uint32_t dex_pc,
                              SlowPathCode* slow_path) = 0;
 
+  // Check if the desired_string_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  virtual HLoadString::LoadKind GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind) = 0;
+
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  virtual HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) = 0;
+
+  // Check if the desired_dispatch_info is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method) = 0;
+
   // Generate a call to a static or direct method.
   virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0;
   // Generate a call to a virtual method.
@@ -439,6 +504,11 @@
   // Copy the result of a call into the given target.
   virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
 
+  virtual void GenerateNop() = 0;
+
+  uint32_t GetReferenceSlowFlagOffset() const;
+  uint32_t GetReferenceDisableFlagOffset() const;
+
  protected:
   // Method patch info used for recording locations of required linker patches and
   // target methods. The target method can be used for various purposes, whether for
@@ -451,6 +521,32 @@
     LabelType label;
   };
 
+  // String patch info used for recording locations of required linker patches and
+  // target strings. The actual string address can be absolute or PC-relative.
+  template <typename LabelType>
+  struct StringPatchInfo {
+    StringPatchInfo(const DexFile& df, uint32_t index)
+        : dex_file(df), string_index(index), label() { }
+
+    const DexFile& dex_file;
+    uint32_t string_index;
+    LabelType label;
+  };
+
+  // Type patch info used for recording locations of required linker patches and
+  // target types. The actual type address can be absolute or PC-relative.
+  // TODO: Consider merging with MethodPatchInfo and StringPatchInfo - all these
+  // classes contain the dex file, some index and the label.
+  template <typename LabelType>
+  struct TypePatchInfo {
+    TypePatchInfo(const DexFile& df, uint32_t index)
+        : dex_file(df), type_index(index), label() { }
+
+    const DexFile& dex_file;
+    uint32_t type_index;
+    LabelType label;
+  };
+
   CodeGenerator(HGraph* graph,
                 size_t number_of_core_registers,
                 size_t number_of_fpu_registers,
@@ -476,30 +572,18 @@
         fpu_callee_save_mask_(fpu_callee_save_mask),
         stack_map_stream_(graph->GetArena()),
         block_order_(nullptr),
-        is_baseline_(false),
         disasm_info_(nullptr),
         stats_(stats),
         graph_(graph),
         compiler_options_(compiler_options),
-        src_map_(nullptr),
         slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        current_slow_path_(nullptr),
         current_block_index_(0),
         is_leaf_(true),
         requires_current_method_(false) {
     slow_paths_.reserve(8);
   }
 
-  // Register allocation logic.
-  void AllocateRegistersLocally(HInstruction* instruction) const;
-
-  // Backend specific implementation for allocating a register.
-  virtual Location AllocateFreeRegister(Primitive::Type type) const = 0;
-
-  static size_t FindFreeEntry(bool* array, size_t length);
-  static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length);
-
-  virtual Location GetStackLocation(HLoadLocal* load) const = 0;
-
   virtual HGraphVisitor* GetLocationBuilder() = 0;
   virtual HGraphVisitor* GetInstructionVisitor() = 0;
 
@@ -550,6 +634,10 @@
     return raw_pointer_to_labels_array + block->GetBlockId();
   }
 
+  SlowPathCode* GetCurrentSlowPath() {
+    return current_slow_path_;
+  }
+
   // Frame size required for this method.
   uint32_t frame_size_;
   uint32_t core_spill_mask_;
@@ -576,16 +664,11 @@
   // The order to use for code generation.
   const ArenaVector<HBasicBlock*>* block_order_;
 
-  // Whether we are using baseline.
-  bool is_baseline_;
-
   DisassemblyInformation* disasm_info_;
 
  private:
-  void InitLocationsBaseline(HInstruction* instruction);
   size_t GetStackOffsetOfSavedRegister(size_t index);
   void GenerateSlowPaths();
-  void CompileInternal(CodeAllocator* allocator, bool is_baseline);
   void BlockIfInRegister(Location location, bool is_out = false) const;
   void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
 
@@ -594,9 +677,10 @@
   HGraph* const graph_;
   const CompilerOptions& compiler_options_;
 
-  // Native to dex_pc map used for native debugging/profiling tools.
-  DefaultSrcMap* src_map_;
-  ArenaVector<SlowPathCode*> slow_paths_;
+  ArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
+
+  // The current slow-path that we're generating code for.
+  SlowPathCode* current_slow_path_;
 
   // The current block index in `block_order_` of the block
   // we are generating code for.
@@ -620,7 +704,7 @@
                     size_t number_of_registers,
                     const F* fpu_registers,
                     size_t number_of_fpu_registers,
-                    size_t pointer_size)
+                    PointerSize pointer_size)
       : registers_(registers),
         number_of_registers_(number_of_registers),
         fpu_registers_(fpu_registers),
@@ -643,7 +727,7 @@
   size_t GetStackOffsetOf(size_t index) const {
     // We still reserve the space for parameters passed by registers.
     // Add space for the method pointer.
-    return pointer_size_ + index * kVRegSize;
+    return static_cast<size_t>(pointer_size_) + index * kVRegSize;
   }
 
  private:
@@ -651,11 +735,127 @@
   const size_t number_of_registers_;
   const F* fpu_registers_;
   const size_t number_of_fpu_registers_;
-  const size_t pointer_size_;
+  const PointerSize pointer_size_;
 
   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
 };
 
+/**
+ * A templated class SlowPathGenerator with a templated method NewSlowPath()
+ * that can be used by any code generator to share equivalent slow-paths with
+ * the objective of reducing generated code size.
+ *
+ * InstructionType:  instruction that requires SlowPathCodeType
+ * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
+ */
+template <typename InstructionType>
+class SlowPathGenerator {
+  static_assert(std::is_base_of<HInstruction, InstructionType>::value,
+                "InstructionType is not a subclass of art::HInstruction");
+
+ public:
+  SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
+      : graph_(graph),
+        codegen_(codegen),
+        slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {}
+
+  // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
+  // Templating the method (rather than the whole class) on the slow-path type enables
+  // keeping this code at a generic, non architecture-specific place.
+  //
+  // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
+  //       To relax this requirement, we would need some RTTI on the stored slow-paths,
+  //       or template the class as a whole on SlowPathType.
+  template <typename SlowPathCodeType>
+  SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
+    static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
+                  "SlowPathCodeType is not a subclass of art::SlowPathCode");
+    static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
+                  "SlowPathCodeType is not constructible from InstructionType*");
+    // Iterate over potential candidates for sharing. Currently, only same-typed
+    // slow-paths with exactly the same dex-pc are viable candidates.
+    // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
+    const uint32_t dex_pc = instruction->GetDexPc();
+    auto iter = slow_path_map_.find(dex_pc);
+    if (iter != slow_path_map_.end()) {
+      auto candidates = iter->second;
+      for (const auto& it : candidates) {
+        InstructionType* other_instruction = it.first;
+        SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
+        // Determine if the instructions allow for slow-path sharing.
+        if (HaveSameLiveRegisters(instruction, other_instruction) &&
+            HaveSameStackMap(instruction, other_instruction)) {
+          // Can share: reuse existing one.
+          return other_slow_path;
+        }
+      }
+    } else {
+      // First time this dex-pc is seen.
+      iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}});
+    }
+    // Cannot share: create and add new slow-path for this particular dex-pc.
+    SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction);
+    iter->second.emplace_back(std::make_pair(instruction, slow_path));
+    codegen_->AddSlowPath(slow_path);
+    return slow_path;
+  }
+
+ private:
+  // Tests if both instructions have same set of live physical registers. This ensures
+  // the slow-path has exactly the same preamble on saving these registers to stack.
+  bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
+    const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
+    const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
+    RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
+    RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
+    return (((live1->GetCoreRegisters() & core_spill) ==
+             (live2->GetCoreRegisters() & core_spill)) &&
+            ((live1->GetFloatingPointRegisters() & fpu_spill) ==
+             (live2->GetFloatingPointRegisters() & fpu_spill)));
+  }
+
+  // Tests if both instructions have the same stack map. This ensures the interpreter
+  // will find exactly the same dex-registers at the same entries.
+  bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
+    DCHECK(i1->HasEnvironment());
+    DCHECK(i2->HasEnvironment());
+    // We conservatively test if the two instructions find exactly the same instructions
+    // and location in each dex-register. This guarantees they will have the same stack map.
+    HEnvironment* e1 = i1->GetEnvironment();
+    HEnvironment* e2 = i2->GetEnvironment();
+    if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
+      return false;
+    }
+    for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
+      if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
+          !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  HGraph* const graph_;
+  CodeGenerator* const codegen_;
+
+  // Map from dex-pc to vector of already existing instruction/slow-path pairs.
+  ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
+
+  DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
+};
+
+class InstructionCodeGenerator : public HGraphVisitor {
+ public:
+  InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
+      : HGraphVisitor(graph),
+        deopt_slow_paths_(graph, codegen) {}
+
+ protected:
+  // Add slow-path generator for each instruction/slow-path combination that desires sharing.
+  // TODO: under current regime, only deopt sharing make sense; extend later.
+  SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 92a5878..404f044 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -34,6 +34,9 @@
 
 namespace art {
 
+template<class MirrorType>
+class GcRoot;
+
 namespace arm {
 
 static bool ExpectedPairLayout(Location location) {
@@ -44,9 +47,7 @@
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = R0;
 
-// We unconditionally allocate R5 to ensure we can do long operations
-// with baseline.
-static constexpr Register kCoreSavedRegisterForBaseline = R5;
+static constexpr Register kCoreAlwaysSpillRegister = R5;
 static constexpr Register kCoreCalleeSaves[] =
     { R5, R6, R7, R8, R10, R11, LR };
 static constexpr SRegister kFpuCalleeSaves[] =
@@ -56,12 +57,15 @@
 // S registers. Therefore there is no need to block it.
 static constexpr DRegister DTMP = D31;
 
-#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->  // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
 
 class NullCheckSlowPathARM : public SlowPathCode {
  public:
-  explicit NullCheckSlowPathARM(HNullCheck* instruction) : instruction_(instruction) {}
+  explicit NullCheckSlowPathARM(HNullCheck* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
@@ -72,6 +76,7 @@
     }
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -79,13 +84,12 @@
   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM"; }
 
  private:
-  HNullCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM);
 };
 
 class DivZeroCheckSlowPathARM : public SlowPathCode {
  public:
-  explicit DivZeroCheckSlowPathARM(HDivZeroCheck* instruction) : instruction_(instruction) {}
+  explicit DivZeroCheckSlowPathARM(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
@@ -96,6 +100,7 @@
     }
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -103,22 +108,20 @@
   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM"; }
 
  private:
-  HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM);
 };
 
 class SuspendCheckSlowPathARM : public SlowPathCode {
  public:
   SuspendCheckSlowPathARM(HSuspendCheck* instruction, HBasicBlock* successor)
-      : instruction_(instruction), successor_(successor) {}
+      : SlowPathCode(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
     if (successor_ == nullptr) {
       __ b(GetReturnLabel());
     } else {
@@ -138,7 +141,6 @@
   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM"; }
 
  private:
-  HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
   HBasicBlock* const successor_;
 
@@ -151,7 +153,7 @@
 class BoundsCheckSlowPathARM : public SlowPathCode {
  public:
   explicit BoundsCheckSlowPathARM(HBoundsCheck* instruction)
-      : instruction_(instruction) {}
+      : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
@@ -172,8 +174,12 @@
         locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
-    arm_codegen->InvokeRuntime(
-        QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    arm_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -181,8 +187,6 @@
   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM"; }
 
  private:
-  HBoundsCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM);
 };
 
@@ -192,7 +196,7 @@
                        HInstruction* at,
                        uint32_t dex_pc,
                        bool do_clinit)
-      : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
@@ -209,6 +213,11 @@
         ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
         : QUICK_ENTRY_POINT(pInitializeType);
     arm_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this);
+    if (do_clinit_) {
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    } else {
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    }
 
     // Move the class to the desired location.
     Location out = locations->Out();
@@ -241,7 +250,7 @@
 
 class LoadStringSlowPathARM : public SlowPathCode {
  public:
-  explicit LoadStringSlowPathARM(HLoadString* instruction) : instruction_(instruction) {}
+  explicit LoadStringSlowPathARM(HLoadString* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -252,9 +261,11 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    __ LoadImmediate(calling_convention.GetRegisterAt(0), string_index);
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
     arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
 
     RestoreLiveRegisters(codegen, locations);
@@ -264,15 +275,13 @@
   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM"; }
 
  private:
-  HLoadString* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM);
 };
 
 class TypeCheckSlowPathARM : public SlowPathCode {
  public:
   TypeCheckSlowPathARM(HInstruction* instruction, bool is_fatal)
-      : instruction_(instruction), is_fatal_(is_fatal) {}
+      : SlowPathCode(instruction), is_fatal_(is_fatal) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -284,15 +293,6 @@
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
 
-    if (instruction_->IsCheckCast()) {
-      // The codegen for the instruction overwrites `temp`, so put it back in place.
-      Register obj = locations->InAt(0).AsRegister<Register>();
-      Register temp = locations->GetTemp(0).AsRegister<Register>();
-      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(temp);
-    }
-
     if (!is_fatal_) {
       SaveLiveRegisters(codegen, locations);
     }
@@ -313,6 +313,8 @@
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
+      CheckEntrypointTypes<
+          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
       arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
     } else {
       DCHECK(instruction_->IsCheckCast());
@@ -320,6 +322,7 @@
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
+      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -333,7 +336,6 @@
   bool IsFatal() const OVERRIDE { return is_fatal_; }
 
  private:
-  HInstruction* const instruction_;
   const bool is_fatal_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM);
@@ -341,29 +343,29 @@
 
 class DeoptimizationSlowPathARM : public SlowPathCode {
  public:
-  explicit DeoptimizationSlowPathARM(HInstruction* instruction)
-    : instruction_(instruction) {}
+  explicit DeoptimizationSlowPathARM(HDeoptimize* instruction)
+    : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
-    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; }
 
  private:
-  HInstruction* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM);
 };
 
 class ArraySetSlowPathARM : public SlowPathCode {
  public:
-  explicit ArraySetSlowPathARM(HInstruction* instruction) : instruction_(instruction) {}
+  explicit ArraySetSlowPathARM(HInstruction* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -394,6 +396,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
     RestoreLiveRegisters(codegen, locations);
     __ b(GetExitLabel());
   }
@@ -401,13 +404,304 @@
   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM"; }
 
  private:
-  HInstruction* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
 };
 
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathARM : public SlowPathCode {
+ public:
+  ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location obj)
+      : SlowPathCode(instruction), obj_(obj) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg = obj_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    DCHECK_NE(reg, SP);
+    DCHECK_NE(reg, LR);
+    DCHECK_NE(reg, PC);
+    // IP is used internally by the ReadBarrierMarkRegX entry point
+    // as a temporary, it cannot be the entry point's input/output.
+    DCHECK_NE(reg, IP);
+    DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in R0):
+    //
+    //   R0 <- obj
+    //   R0 <- ReadBarrierMark(R0)
+    //   obj <- R0
+    //
+    // we just use rX (the register holding `obj`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(reg);
+    // This runtime call does not require a stack map.
+    arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    __ b(GetExitLabel());
+  }
+
+ private:
+  const Location obj_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
+};
+
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
+ public:
+  ReadBarrierForHeapReferenceSlowPathARM(HInstruction* instruction,
+                                         Location out,
+                                         Location ref,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location index)
+      : SlowPathCode(instruction),
+        out_(out),
+        ref_(ref),
+        obj_(obj),
+        offset_(offset),
+        index_(index) {
+    DCHECK(kEmitCompilerReadBarrier);
+    // If `obj` is equal to `out` or `ref`, it means the initial object
+    // has been overwritten by (or after) the heap object reference load
+    // to be instrumented, e.g.:
+    //
+    //   __ LoadFromOffset(kLoadWord, out, out, offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
+    //
+    // In that case, we have lost the information about the original
+    // object, and the emitted read barrier cannot work properly.
+    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    // We may have to change the index's value, but as `index_` is a
+    // constant member (like other "inputs" of this slow path),
+    // introduce a copy of it, `index`.
+    Location index = index_;
+    if (index_.IsValid()) {
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+      if (instruction_->IsArrayGet()) {
+        // Compute the actual memory offset and store it in `index`.
+        Register index_reg = index_.AsRegister<Register>();
+        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+        if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+          // We are about to change the value of `index_reg` (see the
+          // calls to art::arm::Thumb2Assembler::Lsl and
+          // art::arm::Thumb2Assembler::AddConstant below), but it has
+          // not been saved by the previous call to
+          // art::SlowPathCode::SaveLiveRegisters, as it is a
+          // callee-save register --
+          // art::SlowPathCode::SaveLiveRegisters does not consider
+          // callee-save registers, as it has been designed with the
+          // assumption that callee-save registers are supposed to be
+          // handled by the called function.  So, as a callee-save
+          // register, `index_reg` _would_ eventually be saved onto
+          // the stack, but it would be too late: we would have
+          // changed its value earlier.  Therefore, we manually save
+          // it here into another freely available register,
+          // `free_reg`, chosen of course among the caller-save
+          // registers (as a callee-save `free_reg` register would
+          // exhibit the same problem).
+          //
+          // Note we could have requested a temporary register from
+          // the register allocator instead; but we prefer not to, as
+          // this is a slow path, and we know we can find a
+          // caller-save register that is available.
+          Register free_reg = FindAvailableCallerSaveRegister(codegen);
+          __ Mov(free_reg, index_reg);
+          index_reg = free_reg;
+          index = Location::RegisterLocation(index_reg);
+        } else {
+          // The initial register stored in `index_` has already been
+          // saved in the call to art::SlowPathCode::SaveLiveRegisters
+          // (as it is not a callee-save register), so we can freely
+          // use it.
+        }
+        // Shifting the index value contained in `index_reg` by the scale
+        // factor (2) cannot overflow in practice, as the runtime is
+        // unable to allocate object arrays with a size larger than
+        // 2^26 - 1 (that is, 2^28 - 4 bytes).
+        __ Lsl(index_reg, index_reg, TIMES_4);
+        static_assert(
+            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+        __ AddConstant(index_reg, index_reg, offset_);
+      } else {
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
+        DCHECK(instruction_->GetLocations()->Intrinsified());
+        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+            << instruction_->AsInvoke()->GetIntrinsic();
+        DCHECK_EQ(offset_, 0U);
+        DCHECK(index_.IsRegisterPair());
+        // UnsafeGet's offset location is a register pair, the low
+        // part contains the correct offset.
+        index = index_.ToLow();
+      }
+    }
+
+    // We're moving two or three locations to locations that could
+    // overlap, so we need a parallel move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(ref_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    parallel_move.AddMove(obj_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    if (index.IsValid()) {
+      parallel_move.AddMove(index,
+                            Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+                            Primitive::kPrimInt,
+                            nullptr);
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+    } else {
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+      __ LoadImmediate(calling_convention.GetRegisterAt(2), offset_);
+    }
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<
+        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+    arm_codegen->Move32(out_, Location::RegisterLocation(R0));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ b(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM"; }
+
+ private:
+  Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+    size_t ref = static_cast<int>(ref_.AsRegister<Register>());
+    size_t obj = static_cast<int>(obj_.AsRegister<Register>());
+    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+      if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
+        return static_cast<Register>(i);
+      }
+    }
+    // We shall never fail to find a free caller-save register, as
+    // there are more than two core caller-save registers on ARM
+    // (meaning it is possible to find one which is different from
+    // `ref` and `obj`).
+    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+    LOG(FATAL) << "Could not find a free caller-save register";
+    UNREACHABLE();
+  }
+
+  const Location out_;
+  const Location ref_;
+  const Location obj_;
+  const uint32_t offset_;
+  // An additional location containing an index to an array.
+  // Only used for HArrayGet and the UnsafeGetObject &
+  // UnsafeGetObjectVolatile intrinsics.
+  const Location index_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathARM : public SlowPathCode {
+ public:
+  ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root)
+      : SlowPathCode(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    arm_codegen->Move32(out_, Location::RegisterLocation(R0));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ b(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM"; }
+
+ private:
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM);
+};
+
 #undef __
-#define __ down_cast<ArmAssembler*>(GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(GetAssembler())->  // NOLINT
 
 inline Condition ARMCondition(IfCondition cond) {
   switch (cond) {
@@ -446,6 +740,24 @@
   UNREACHABLE();
 }
 
+inline Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
+  // The ARM condition codes can express all the necessary branches, see the
+  // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
+  // There is no dex instruction or HIR that would need the missing conditions
+  // "equal or unordered" or "not equal".
+  switch (cond) {
+    case kCondEQ: return EQ;
+    case kCondNE: return NE /* unordered */;
+    case kCondLT: return gt_bias ? CC : LT /* unordered */;
+    case kCondLE: return gt_bias ? LS : LE /* unordered */;
+    case kCondGT: return gt_bias ? HI /* unordered */ : GT;
+    case kCondGE: return gt_bias ? CS /* unordered */ : GE;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
 void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << Register(reg);
 }
@@ -492,13 +804,24 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
-      assembler_(),
+      assembler_(graph->GetArena()),
       isa_features_(isa_features),
+      uint32_literals_(std::less<uint32_t>(),
+                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       method_patches_(MethodReferenceComparator(),
                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       call_patches_(MethodReferenceComparator(),
                     graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_string_patches_(StringReferenceValueComparator(),
+                                 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_type_patches_(TypeReferenceValueComparator(),
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_address_patches_(std::less<uint32_t>(),
+                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
 }
@@ -513,17 +836,6 @@
     uint32_t new_position = __ GetAdjustedPosition(old_position);
     stack_map_stream_.SetStackMapNativePcOffset(i, new_position);
   }
-  // Adjust native pc offsets of block labels.
-  for (HBasicBlock* block : *block_order_) {
-    // Get the label directly from block_labels_ rather than through GetLabelOf() to avoid
-    // FirstNonEmptyBlock() which could lead to adjusting a label more than once.
-    DCHECK_LT(block->GetBlockId(), GetGraph()->GetBlocks().size());
-    Label* block_label = &block_labels_[block->GetBlockId()];
-    DCHECK_EQ(block_label->IsBound(), !block->IsSingleJump());
-    if (block_label->IsBound()) {
-      __ AdjustLabelPosition(block_label);
-    }
-  }
   // Adjust pc offsets for the disassembly information.
   if (disasm_info_ != nullptr) {
     GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval();
@@ -538,66 +850,11 @@
       it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end);
     }
   }
-  // Adjust pc offsets for relative call patches.
-  for (MethodPatchInfo<Label>& info : relative_call_patches_) {
-    __ AdjustLabelPosition(&info.label);
-  }
 
   CodeGenerator::Finalize(allocator);
 }
 
-Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong: {
-      size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
-      ArmManagedRegister pair =
-          ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-
-      blocked_core_registers_[pair.AsRegisterPairLow()] = true;
-      blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
-      UpdateBlockedPairRegisters();
-      return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
-    }
-
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters);
-      // Block all register pairs that contain `reg`.
-      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-        ArmManagedRegister current =
-            ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
-          blocked_register_pairs_[i] = true;
-        }
-      }
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat: {
-      int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfSRegisters);
-      return Location::FpuRegisterLocation(reg);
-    }
-
-    case Primitive::kPrimDouble: {
-      int reg = FindTwoFreeConsecutiveAlignedEntries(blocked_fpu_registers_, kNumberOfSRegisters);
-      DCHECK_EQ(reg % 2, 0);
-      return Location::FpuRegisterPairLocation(reg, reg + 1);
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  return Location();
-}
-
-void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorARM::SetupBlockedRegisters() const {
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs_[R1_R2] = true;
 
@@ -612,15 +869,7 @@
   // Reserve temp register.
   blocked_core_registers_[IP] = true;
 
-  if (is_baseline) {
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-    }
-
-    blocked_core_registers_[kCoreSavedRegisterForBaseline] = false;
-  }
-
-  if (is_baseline || GetGraph()->IsDebuggable()) {
+  if (GetGraph()->IsDebuggable()) {
     // Stubs do not save callee-save floating point registers. If the graph
     // is debuggable, we need to deal with these registers differently. For
     // now, just block them.
@@ -644,17 +893,16 @@
 }
 
 InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
 void CodeGeneratorARM::ComputeSpillMask() {
   core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
-  // Save one extra register for baseline. Note that on thumb2, there is no easy
-  // instruction to restore just the PC, so this actually helps both baseline
-  // and non-baseline to save and restore at least two registers at entry and exit.
-  core_spill_mask_ |= (1 << kCoreSavedRegisterForBaseline);
   DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
+  // There is no easy instruction to restore just the PC on thumb2. We spill and
+  // restore another arbitrary register.
+  core_spill_mask_ |= (1 << kCoreAlwaysSpillRegister);
   fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
   // We use vpush and vpop for saving and restoring floating point registers, which take
   // a SRegister and the number of registers to save/restore after that SRegister. We
@@ -720,7 +968,7 @@
   if (fpu_spill_mask_ != 0) {
     SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
     __ vpops(start_register, POPCOUNT(fpu_spill_mask_));
-    __ cfi().AdjustCFAOffset(-kArmPointerSize * POPCOUNT(fpu_spill_mask_));
+    __ cfi().AdjustCFAOffset(-static_cast<int>(kArmPointerSize) * POPCOUNT(fpu_spill_mask_));
     __ cfi().RestoreMany(DWARFReg(SRegister(0)), fpu_spill_mask_);
   }
   // Pop LR into PC to return.
@@ -732,31 +980,8 @@
 }
 
 void CodeGeneratorARM::Bind(HBasicBlock* block) {
-  __ Bind(GetLabelOf(block));
-}
-
-Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const {
-  switch (load->GetType()) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << load->GetType();
-      UNREACHABLE();
-  }
-
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
+  Label* label = GetLabelOf(block);
+  __ BindTrackedLabel(label);
 }
 
 Location InvokeDexCallingConventionVisitorARM::GetNextLocation(Primitive::Type type) {
@@ -832,7 +1057,7 @@
       LOG(FATAL) << "Unexpected parameter type " << type;
       break;
   }
-  return Location();
+  return Location::NoLocation();
 }
 
 Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type type) const {
@@ -859,7 +1084,7 @@
     }
 
     case Primitive::kPrimVoid:
-      return Location();
+      return Location::NoLocation();
   }
 
   UNREACHABLE();
@@ -969,101 +1194,15 @@
   }
 }
 
-void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
-  LocationSummary* locations = instruction->GetLocations();
-  if (instruction->IsCurrentMethod()) {
-    Move32(location, Location::StackSlot(kCurrentMethodStackOffset));
-  } else if (locations != nullptr && locations->Out().Equals(location)) {
-    return;
-  } else if (locations != nullptr && locations->Out().IsConstant()) {
-    HConstant* const_to_move = locations->Out().GetConstant();
-    if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
-      int32_t value = GetInt32ValueOf(const_to_move);
-      if (location.IsRegister()) {
-        __ LoadImmediate(location.AsRegister<Register>(), value);
-      } else {
-        DCHECK(location.IsStackSlot());
-        __ LoadImmediate(IP, value);
-        __ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex());
-      }
-    } else {
-      DCHECK(const_to_move->IsLongConstant()) << const_to_move->DebugName();
-      int64_t value = const_to_move->AsLongConstant()->GetValue();
-      if (location.IsRegisterPair()) {
-        __ LoadImmediate(location.AsRegisterPairLow<Register>(), Low32Bits(value));
-        __ LoadImmediate(location.AsRegisterPairHigh<Register>(), High32Bits(value));
-      } else {
-        DCHECK(location.IsDoubleStackSlot());
-        __ LoadImmediate(IP, Low32Bits(value));
-        __ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex());
-        __ LoadImmediate(IP, High32Bits(value));
-        __ StoreToOffset(kStoreWord, IP, SP, location.GetHighStackIndex(kArmWordSize));
-      }
-    }
-  } else if (instruction->IsLoadLocal()) {
-    uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
-    switch (instruction->GetType()) {
-      case Primitive::kPrimBoolean:
-      case Primitive::kPrimByte:
-      case Primitive::kPrimChar:
-      case Primitive::kPrimShort:
-      case Primitive::kPrimInt:
-      case Primitive::kPrimNot:
-      case Primitive::kPrimFloat:
-        Move32(location, Location::StackSlot(stack_slot));
-        break;
-
-      case Primitive::kPrimLong:
-      case Primitive::kPrimDouble:
-        Move64(location, Location::DoubleStackSlot(stack_slot));
-        break;
-
-      default:
-        LOG(FATAL) << "Unexpected type " << instruction->GetType();
-    }
-  } else if (instruction->IsTemporary()) {
-    Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    if (temp_location.IsStackSlot()) {
-      Move32(location, temp_location);
-    } else {
-      DCHECK(temp_location.IsDoubleStackSlot());
-      Move64(location, temp_location);
-    }
-  } else {
-    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
-    switch (instruction->GetType()) {
-      case Primitive::kPrimBoolean:
-      case Primitive::kPrimByte:
-      case Primitive::kPrimChar:
-      case Primitive::kPrimShort:
-      case Primitive::kPrimNot:
-      case Primitive::kPrimInt:
-      case Primitive::kPrimFloat:
-        Move32(location, locations->Out());
-        break;
-
-      case Primitive::kPrimLong:
-      case Primitive::kPrimDouble:
-        Move64(location, locations->Out());
-        break;
-
-      default:
-        LOG(FATAL) << "Unexpected type " << instruction->GetType();
-    }
-  }
-}
-
 void CodeGeneratorARM::MoveConstant(Location location, int32_t value) {
   DCHECK(location.IsRegister());
   __ LoadImmediate(location.AsRegister<Register>(), value);
 }
 
 void CodeGeneratorARM::MoveLocation(Location dst, Location src, Primitive::Type dst_type) {
-  if (Primitive::Is64BitType(dst_type)) {
-    Move64(dst, src);
-  } else {
-    Move32(dst, src);
-  }
+  HParallelMove move(GetGraph()->GetArena());
+  move.AddMove(src, dst, dst_type, nullptr);
+  GetMoveResolver()->EmitNativeCode(&move);
 }
 
 void CodeGeneratorARM::AddLocationAsTemp(Location location, LocationSummary* locations) {
@@ -1081,7 +1220,7 @@
                                      HInstruction* instruction,
                                      uint32_t dex_pc,
                                      SlowPathCode* slow_path) {
-  InvokeRuntime(GetThreadOffset<kArmWordSize>(entrypoint).Int32Value(),
+  InvokeRuntime(GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value(),
                 instruction,
                 dex_pc,
                 slow_path);
@@ -1097,6 +1236,14 @@
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
+void CodeGeneratorARM::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                           HInstruction* instruction,
+                                                           SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  __ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset);
+  __ blx(LR);
+}
+
 void InstructionCodeGeneratorARM::HandleGoto(HInstruction* got, HBasicBlock* successor) {
   DCHECK(!successor->IsExitBlock());
 
@@ -1144,28 +1291,49 @@
 void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
-void InstructionCodeGeneratorARM::GenerateCompareWithImmediate(Register left, int32_t right) {
-  ShifterOperand operand;
-  if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, right, &operand)) {
-    __ cmp(left, operand);
+void InstructionCodeGeneratorARM::GenerateVcmp(HInstruction* instruction) {
+  Primitive::Type type = instruction->InputAt(0)->GetType();
+  Location lhs_loc = instruction->GetLocations()->InAt(0);
+  Location rhs_loc = instruction->GetLocations()->InAt(1);
+  if (rhs_loc.IsConstant()) {
+    // 0.0 is the only immediate that can be encoded directly in
+    // a VCMP instruction.
+    //
+    // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
+    // specify that in a floating-point comparison, positive zero
+    // and negative zero are considered equal, so we can use the
+    // literal 0.0 for both cases here.
+    //
+    // Note however that some methods (Float.equal, Float.compare,
+    // Float.compareTo, Double.equal, Double.compare,
+    // Double.compareTo, Math.max, Math.min, StrictMath.max,
+    // StrictMath.min) consider 0.0 to be (strictly) greater than
+    // -0.0. So if we ever translate calls to these methods into a
+    // HCompare instruction, we must handle the -0.0 case with
+    // care here.
+    DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
+    if (type == Primitive::kPrimFloat) {
+      __ vcmpsz(lhs_loc.AsFpuRegister<SRegister>());
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ vcmpdz(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()));
+    }
   } else {
-    Register temp = IP;
-    __ LoadImmediate(temp, right);
-    __ cmp(left, ShifterOperand(temp));
+    if (type == Primitive::kPrimFloat) {
+      __ vcmps(lhs_loc.AsFpuRegister<SRegister>(), rhs_loc.AsFpuRegister<SRegister>());
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ vcmpd(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()),
+               FromLowSToD(rhs_loc.AsFpuRegisterPairLow<SRegister>()));
+    }
   }
 }
 
 void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
                                                   Label* true_label,
-                                                  Label* false_label) {
+                                                  Label* false_label ATTRIBUTE_UNUSED) {
   __ vmstat();  // transfer FP status register to ARM APSR.
-  // TODO: merge into a single branch (except "equal or unordered" and "not equal")
-  if (cond->IsFPConditionTrueIfNaN()) {
-    __ b(true_label, VS);  // VS for unordered.
-  } else if (cond->IsFPConditionFalseIfNaN()) {
-    __ b(false_label, VS);  // VS for unordered.
-  }
-  __ b(true_label, ARMCondition(cond->GetCondition()));
+  __ b(true_label, ARMFPCondition(cond->GetCondition(), cond->IsGtBias()));
 }
 
 void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
@@ -1220,7 +1388,7 @@
     int32_t val_low = Low32Bits(value);
     int32_t val_high = High32Bits(value);
 
-    GenerateCompareWithImmediate(left_high, val_high);
+    __ CmpConstant(left_high, val_high);
     if (if_cond == kCondNE) {
       __ b(true_label, ARMCondition(true_high_cond));
     } else if (if_cond == kCondEQ) {
@@ -1230,7 +1398,7 @@
       __ b(false_label, ARMCondition(false_high_cond));
     }
     // Must be equal high, so compare the lows.
-    GenerateCompareWithImmediate(left_low, val_low);
+    __ CmpConstant(left_low, val_low);
   } else {
     Register right_high = right.AsRegisterPairHigh<Register>();
     Register right_low = right.AsRegisterPairLow<Register>();
@@ -1252,25 +1420,14 @@
   __ b(true_label, final_condition);
 }
 
-void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HIf* if_instr,
-                                                               HCondition* condition,
-                                                               Label* true_target,
-                                                               Label* false_target,
-                                                               Label* always_true_target) {
-  LocationSummary* locations = condition->GetLocations();
-  Location left = locations->InAt(0);
-  Location right = locations->InAt(1);
-
-  // We don't want true_target as a nullptr.
-  if (true_target == nullptr) {
-    true_target = always_true_target;
-  }
-  bool falls_through = (false_target == nullptr);
-
-  // FP compares don't like null false_targets.
-  if (false_target == nullptr) {
-    false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  }
+void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition,
+                                                               Label* true_target_in,
+                                                               Label* false_target_in) {
+  // Generated branching requires both targets to be explicit. If either of the
+  // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
+  Label fallthrough_target;
+  Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
+  Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
 
   Primitive::Type type = condition->InputAt(0)->GetType();
   switch (type) {
@@ -1278,118 +1435,173 @@
       GenerateLongComparesAndJumps(condition, true_target, false_target);
       break;
     case Primitive::kPrimFloat:
-      __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>());
-      GenerateFPJumps(condition, true_target, false_target);
-      break;
     case Primitive::kPrimDouble:
-      __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()),
-               FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
+      GenerateVcmp(condition);
       GenerateFPJumps(condition, true_target, false_target);
       break;
     default:
       LOG(FATAL) << "Unexpected compare type " << type;
   }
 
-  if (!falls_through) {
+  if (false_target != &fallthrough_target) {
     __ b(false_target);
   }
+
+  if (fallthrough_target.IsLinked()) {
+    __ Bind(&fallthrough_target);
+  }
 }
 
 void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instruction,
+                                                        size_t condition_input_index,
                                                         Label* true_target,
-                                                        Label* false_target,
-                                                        Label* always_true_target) {
-  HInstruction* cond = instruction->InputAt(0);
-  if (cond->IsIntConstant()) {
-    // Constant condition, statically compared against 1.
-    int32_t cond_value = cond->AsIntConstant()->GetValue();
-    if (cond_value == 1) {
-      if (always_true_target != nullptr) {
-        __ b(always_true_target);
+                                                        Label* false_target) {
+  HInstruction* cond = instruction->InputAt(condition_input_index);
+
+  if (true_target == nullptr && false_target == nullptr) {
+    // Nothing to do. The code always falls through.
+    return;
+  } else if (cond->IsIntConstant()) {
+    // Constant condition, statically compared against "true" (integer value 1).
+    if (cond->AsIntConstant()->IsTrue()) {
+      if (true_target != nullptr) {
+        __ b(true_target);
       }
-      return;
     } else {
-      DCHECK_EQ(cond_value, 0);
+      DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
+      if (false_target != nullptr) {
+        __ b(false_target);
+      }
+    }
+    return;
+  }
+
+  // The following code generates these patterns:
+  //  (1) true_target == nullptr && false_target != nullptr
+  //        - opposite condition true => branch to false_target
+  //  (2) true_target != nullptr && false_target == nullptr
+  //        - condition true => branch to true_target
+  //  (3) true_target != nullptr && false_target != nullptr
+  //        - condition true => branch to true_target
+  //        - branch to false_target
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
+    // Condition has been materialized, compare the output to 0.
+    Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
+    DCHECK(cond_val.IsRegister());
+    if (true_target == nullptr) {
+      __ CompareAndBranchIfZero(cond_val.AsRegister<Register>(), false_target);
+    } else {
+      __ CompareAndBranchIfNonZero(cond_val.AsRegister<Register>(), true_target);
     }
   } else {
-    if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
-      // Condition has been materialized, compare the output to 0
-      DCHECK(instruction->GetLocations()->InAt(0).IsRegister());
-      __ CompareAndBranchIfNonZero(instruction->GetLocations()->InAt(0).AsRegister<Register>(),
-                                   true_target);
-    } else {
-      // Condition has not been materialized, use its inputs as the
-      // comparison and its condition as the branch condition.
-      Primitive::Type type =
-          cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
-      // Is this a long or FP comparison that has been folded into the HCondition?
-      if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
-        // Generate the comparison directly.
-        GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(),
-                                     true_target, false_target, always_true_target);
-        return;
-      }
+    // Condition has not been materialized. Use its inputs as the comparison and
+    // its condition as the branch condition.
+    HCondition* condition = cond->AsCondition();
 
-      LocationSummary* locations = cond->GetLocations();
-      DCHECK(locations->InAt(0).IsRegister()) << locations->InAt(0);
-      Register left = locations->InAt(0).AsRegister<Register>();
-      Location right = locations->InAt(1);
-      if (right.IsRegister()) {
-        __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
-      } else {
-        DCHECK(right.IsConstant());
-        GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
-      }
-      __ b(true_target, ARMCondition(cond->AsCondition()->GetCondition()));
+    // If this is a long or FP comparison that has been folded into
+    // the HCondition, generate the comparison directly.
+    Primitive::Type type = condition->InputAt(0)->GetType();
+    if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
+      GenerateCompareTestAndBranch(condition, true_target, false_target);
+      return;
+    }
+
+    LocationSummary* locations = cond->GetLocations();
+    DCHECK(locations->InAt(0).IsRegister());
+    Register left = locations->InAt(0).AsRegister<Register>();
+    Location right = locations->InAt(1);
+    if (right.IsRegister()) {
+      __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
+    } else {
+      DCHECK(right.IsConstant());
+      __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+    }
+    if (true_target == nullptr) {
+      __ b(false_target, ARMCondition(condition->GetOppositeCondition()));
+    } else {
+      __ b(true_target, ARMCondition(condition->GetCondition()));
     }
   }
-  if (false_target != nullptr) {
+
+  // If neither branch falls through (case 3), the conditional branch to `true_target`
+  // was already emitted (case 2) and we need to emit a jump to `false_target`.
+  if (true_target != nullptr && false_target != nullptr) {
     __ b(false_target);
   }
 }
 
 void LocationsBuilderARM::VisitIf(HIf* if_instr) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
-  HInstruction* cond = if_instr->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+  if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
-  Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
-  Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  Label* always_true_target = true_target;
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfTrueSuccessor())) {
-    always_true_target = nullptr;
-  }
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfFalseSuccessor())) {
-    false_target = nullptr;
-  }
-  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+  HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+  HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+  Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+      nullptr : codegen_->GetLabelOf(true_successor);
+  Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+      nullptr : codegen_->GetLabelOf(false_successor);
+  GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
 
 void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  HInstruction* cond = deoptimize->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathARM(deoptimize);
-  codegen_->AddSlowPath(slow_path);
-  Label* slow_path_entry = slow_path->GetEntryLabel();
-  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+  SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM>(deoptimize);
+  GenerateTestAndBranch(deoptimize,
+                        /* condition_input_index */ 0,
+                        slow_path->GetEntryLabel(),
+                        /* false_target */ nullptr);
 }
 
-void LocationsBuilderARM::VisitCondition(HCondition* cond) {
+void LocationsBuilderARM::VisitSelect(HSelect* select) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) {
+  LocationSummary* locations = select->GetLocations();
+  Label false_target;
+  GenerateTestAndBranch(select,
+                        /* condition_input_index */ 2,
+                        /* true_target */ nullptr,
+                        &false_target);
+  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+  __ Bind(&false_target);
+}
+
+void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorARM::VisitNativeDebugInfo(HNativeDebugInfo*) {
+  // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+}
+
+void CodeGeneratorARM::GenerateNop() {
+  __ nop();
+}
+
+void LocationsBuilderARM::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
   // Handle the long/FP comparisons made in instruction simplification.
@@ -1397,7 +1609,7 @@
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
-      if (cond->NeedsMaterialization()) {
+      if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
       }
       break;
@@ -1405,8 +1617,8 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
-      if (cond->NeedsMaterialization()) {
+      locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
+      if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       }
       break;
@@ -1414,14 +1626,14 @@
     default:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
-      if (cond->NeedsMaterialization()) {
+      if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       }
   }
 }
 
-void InstructionCodeGeneratorARM::VisitCondition(HCondition* cond) {
-  if (!cond->NeedsMaterialization()) {
+void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) {
+  if (cond->IsEmittedAtUseSite()) {
     return;
   }
 
@@ -1438,8 +1650,8 @@
         __ cmp(left.AsRegister<Register>(), ShifterOperand(right.AsRegister<Register>()));
       } else {
         DCHECK(right.IsConstant());
-        GenerateCompareWithImmediate(left.AsRegister<Register>(),
-                                     CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+        __ CmpConstant(left.AsRegister<Register>(),
+                       CodeGenerator::GetInt32ValueOf(right.GetConstant()));
       }
       __ it(ARMCondition(cond->GetCondition()), kItElse);
       __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1),
@@ -1452,12 +1664,8 @@
       GenerateLongComparesAndJumps(cond, &true_label, &false_label);
       break;
     case Primitive::kPrimFloat:
-      __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>());
-      GenerateFPJumps(cond, &true_label, &false_label);
-      break;
     case Primitive::kPrimDouble:
-      __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()),
-               FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
+      GenerateVcmp(cond);
       GenerateFPJumps(cond, &true_label, &false_label);
       break;
   }
@@ -1477,126 +1685,83 @@
 }
 
 void LocationsBuilderARM::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
-}
-
-void LocationsBuilderARM::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
-void LocationsBuilderARM::VisitLoadLocal(HLoadLocal* load) {
-  load->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
-void LocationsBuilderARM::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(store, LocationSummary::kNoCall);
-  switch (store->InputAt(1)->GetType()) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unexpected local type " << store->InputAt(1)->GetType();
-  }
-}
-
-void InstructionCodeGeneratorARM::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitIntConstant(HIntConstant* constant) {
@@ -1654,7 +1819,7 @@
 }
 
 void InstructionCodeGeneratorARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
-  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
 }
 
 void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) {
@@ -1687,18 +1852,26 @@
 }
 
 void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
                                          codegen_->GetAssembler(),
                                          codegen_->GetInstructionSetFeatures());
   if (intrinsic.TryDispatch(invoke)) {
+    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+    }
     return;
   }
 
   HandleInvoke(invoke);
+
+  // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
+  if (invoke->HasPcRelativeDexCache()) {
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
+  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) {
@@ -1711,9 +1884,9 @@
 }
 
 void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -1759,30 +1932,42 @@
 
 void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
-  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
   LocationSummary* locations = invoke->GetLocations();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+  Register hidden_reg = locations->GetTemp(1).AsRegister<Register>();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
-  // Set the hidden argument.
-  __ LoadImmediate(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(),
-                   invoke->GetDexMethodIndex());
+  // Set the hidden argument. This is safe to do this here, as R12
+  // won't be modified thereafter, before the `blx` (call) instruction.
+  DCHECK_EQ(R12, hidden_reg);
+  __ LoadImmediate(hidden_reg, invoke->GetDexMethodIndex());
 
-  // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
     __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex());
+    // /* HeapReference<Class> */ temp = temp->klass_
     __ LoadFromOffset(kLoadWord, temp, temp, class_offset);
   } else {
+    // /* HeapReference<Class> */ temp = receiver->klass_
     __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
+  __ LoadFromOffset(kLoadWord, temp, temp,
+        mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex(), kArmPointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
-  uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-      kArmWordSize).Int32Value();
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
+  uint32_t entry_point =
+      ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value();
   // LR = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadWord, LR, temp, entry_point);
   // LR();
@@ -1875,7 +2060,7 @@
       (((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
         && result_type == Primitive::kPrimLong)
        || (input_type == Primitive::kPrimLong && result_type == Primitive::kPrimFloat))
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
@@ -1886,6 +2071,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to byte is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -1904,6 +2091,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -1988,6 +2177,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to char is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2087,6 +2278,10 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to byte is a result of code transformations.
+          __ sbfx(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>(), 0, 8);
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -2104,6 +2299,10 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
+          __ sbfx(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>(), 0, 16);
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2139,8 +2338,7 @@
         case Primitive::kPrimFloat: {
           // Processing a Dex `float-to-int' instruction.
           SRegister temp = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
-          __ vmovs(temp, in.AsFpuRegister<SRegister>());
-          __ vcvtis(temp, temp);
+          __ vcvtis(temp, in.AsFpuRegister<SRegister>());
           __ vmovrs(out.AsRegister<Register>(), temp);
           break;
         }
@@ -2148,9 +2346,7 @@
         case Primitive::kPrimDouble: {
           // Processing a Dex `double-to-int' instruction.
           SRegister temp_s = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
-          DRegister temp_d = FromLowSToD(temp_s);
-          __ vmovd(temp_d, FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
-          __ vcvtid(temp_s, temp_d);
+          __ vcvtid(temp_s, FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
           __ vmovrs(out.AsRegister<Register>(), temp_s);
           break;
         }
@@ -2185,6 +2381,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickF2l, int64_t, float>();
           break;
 
         case Primitive::kPrimDouble:
@@ -2193,6 +2390,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickD2l, int64_t, double>();
           break;
 
         default:
@@ -2203,6 +2401,10 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to char is a result of code transformations.
+          __ ubfx(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>(), 0, 16);
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2238,6 +2440,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickL2f, float, int64_t>();
           break;
 
         case Primitive::kPrimDouble:
@@ -2322,7 +2525,7 @@
 
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -2359,13 +2562,18 @@
       break;
 
     case Primitive::kPrimLong: {
-      DCHECK(second.IsRegisterPair());
-      __ adds(out.AsRegisterPairLow<Register>(),
-              first.AsRegisterPairLow<Register>(),
-              ShifterOperand(second.AsRegisterPairLow<Register>()));
-      __ adc(out.AsRegisterPairHigh<Register>(),
-             first.AsRegisterPairHigh<Register>(),
-             ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      if (second.IsConstant()) {
+        uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+        GenerateAddLongConst(out, first, value);
+      } else {
+        DCHECK(second.IsRegisterPair());
+        __ adds(out.AsRegisterPairLow<Register>(),
+                first.AsRegisterPairLow<Register>(),
+                ShifterOperand(second.AsRegisterPairLow<Register>()));
+        __ adc(out.AsRegisterPairHigh<Register>(),
+               first.AsRegisterPairHigh<Register>(),
+               ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      }
       break;
     }
 
@@ -2399,7 +2607,7 @@
 
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -2435,13 +2643,18 @@
     }
 
     case Primitive::kPrimLong: {
-      DCHECK(second.IsRegisterPair());
-      __ subs(out.AsRegisterPairLow<Register>(),
-              first.AsRegisterPairLow<Register>(),
-              ShifterOperand(second.AsRegisterPairLow<Register>()));
-      __ sbc(out.AsRegisterPairHigh<Register>(),
-             first.AsRegisterPairHigh<Register>(),
-             ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      if (second.IsConstant()) {
+        uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+        GenerateAddLongConst(out, first, -value);
+      } else {
+        DCHECK(second.IsRegisterPair());
+        __ subs(out.AsRegisterPairLow<Register>(),
+                first.AsRegisterPairLow<Register>(),
+                ShifterOperand(second.AsRegisterPairLow<Register>()));
+        __ sbc(out.AsRegisterPairHigh<Register>(),
+               first.AsRegisterPairHigh<Register>(),
+               ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      }
       break;
     }
 
@@ -2588,8 +2801,7 @@
   Register dividend = locations->InAt(0).AsRegister<Register>();
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
-  uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm));
-  DCHECK(IsPowerOfTwo(abs_imm));
+  uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
 
   if (ctz_imm == 1) {
@@ -2665,7 +2877,7 @@
     // Do not generate anything. DivZeroCheck would prevent any code to be executed.
   } else if (imm == 1 || imm == -1) {
     DivRemOneOrMinusOne(instruction);
-  } else if (IsPowerOfTwo(std::abs(imm))) {
+  } else if (IsPowerOfTwo(AbsOrMin(imm))) {
     DivRemByPowerOfTwo(instruction);
   } else {
     DCHECK(imm <= -2 || imm >= 2);
@@ -2677,13 +2889,13 @@
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   if (div->GetResultType() == Primitive::kPrimLong) {
     // pLdiv runtime call.
-    call_kind = LocationSummary::kCall;
+    call_kind = LocationSummary::kCallOnMainOnly;
   } else if (div->GetResultType() == Primitive::kPrimInt && div->InputAt(1)->IsConstant()) {
     // sdiv will be replaced by other instruction sequence.
   } else if (div->GetResultType() == Primitive::kPrimInt &&
              !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
     // pIdivmod runtime call.
-    call_kind = LocationSummary::kCall;
+    call_kind = LocationSummary::kCallOnMainOnly;
   }
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
@@ -2692,14 +2904,14 @@
     case Primitive::kPrimInt: {
       if (div->InputAt(1)->IsConstant()) {
         locations->SetInAt(0, Location::RequiresRegister());
-        locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
+        locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant()));
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-        int32_t abs_imm = std::abs(div->InputAt(1)->AsIntConstant()->GetValue());
-        if (abs_imm <= 1) {
+        int32_t value = div->InputAt(1)->AsIntConstant()->GetValue();
+        if (value == 1 || value == 0 || value == -1) {
           // No temp register required.
         } else {
           locations->AddTemp(Location::RequiresRegister());
-          if (!IsPowerOfTwo(abs_imm)) {
+          if (!IsPowerOfTwo(AbsOrMin(value))) {
             locations->AddTemp(Location::RequiresRegister());
           }
         }
@@ -2760,6 +2972,7 @@
         DCHECK_EQ(R0, out.AsRegister<Register>());
 
         codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), div, div->GetDexPc(), nullptr);
+        CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
       }
       break;
     }
@@ -2774,6 +2987,7 @@
       DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>());
 
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc(), nullptr);
+      CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
       break;
     }
 
@@ -2800,7 +3014,7 @@
   Primitive::Type type = rem->GetResultType();
 
   // Most remainders are implemented in the runtime.
-  LocationSummary::CallKind call_kind = LocationSummary::kCall;
+  LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
   if (rem->GetResultType() == Primitive::kPrimInt && rem->InputAt(1)->IsConstant()) {
     // sdiv will be replaced by other instruction sequence.
     call_kind = LocationSummary::kNoCall;
@@ -2816,14 +3030,14 @@
     case Primitive::kPrimInt: {
       if (rem->InputAt(1)->IsConstant()) {
         locations->SetInAt(0, Location::RequiresRegister());
-        locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
+        locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-        int32_t abs_imm = std::abs(rem->InputAt(1)->AsIntConstant()->GetValue());
-        if (abs_imm <= 1) {
+        int32_t value = rem->InputAt(1)->AsIntConstant()->GetValue();
+        if (value == 1 || value == 0 || value == -1) {
           // No temp register required.
         } else {
           locations->AddTemp(Location::RequiresRegister());
-          if (!IsPowerOfTwo(abs_imm)) {
+          if (!IsPowerOfTwo(AbsOrMin(value))) {
             locations->AddTemp(Location::RequiresRegister());
           }
         }
@@ -2902,22 +3116,26 @@
         DCHECK_EQ(R1, out.AsRegister<Register>());
 
         codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), rem, rem->GetDexPc(), nullptr);
+        CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
       }
       break;
     }
 
     case Primitive::kPrimLong: {
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc(), nullptr);
+        CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
       break;
     }
 
     case Primitive::kPrimFloat: {
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc(), nullptr);
+      CheckEntrypointTypes<kQuickFmodf, float, float, float>();
       break;
     }
 
     case Primitive::kPrimDouble: {
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc(), nullptr);
+      CheckEntrypointTypes<kQuickFmod, double, double, double>();
       break;
     }
 
@@ -2945,6 +3163,7 @@
   Location value = locations->InAt(0);
 
   switch (instruction->GetType()) {
+    case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
@@ -2978,6 +3197,140 @@
   }
 }
 
+void InstructionCodeGeneratorARM::HandleIntegerRotate(LocationSummary* locations) {
+  Register in = locations->InAt(0).AsRegister<Register>();
+  Location rhs = locations->InAt(1);
+  Register out = locations->Out().AsRegister<Register>();
+
+  if (rhs.IsConstant()) {
+    // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
+    // so map all rotations to a +ve. equivalent in that range.
+    // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
+    uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()) & 0x1F;
+    if (rot) {
+      // Rotate, mapping left rotations to right equivalents if necessary.
+      // (e.g. left by 2 bits == right by 30.)
+      __ Ror(out, in, rot);
+    } else if (out != in) {
+      __ Mov(out, in);
+    }
+  } else {
+    __ Ror(out, in, rhs.AsRegister<Register>());
+  }
+}
+
+// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
+// rotates by swapping input regs (effectively rotating by the first 32-bits of
+// a larger rotation) or flipping direction (thus treating larger right/left
+// rotations as sub-word sized rotations in the other direction) as appropriate.
+void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
+  Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+  Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Location rhs = locations->InAt(1);
+  Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
+  Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+  if (rhs.IsConstant()) {
+    uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant());
+    // Map all rotations to +ve. equivalents on the interval [0,63].
+    rot &= kMaxLongShiftDistance;
+    // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
+    // logic below to a simple pair of binary orr.
+    // (e.g. 34 bits == in_reg swap + 2 bits right.)
+    if (rot >= kArmBitsPerWord) {
+      rot -= kArmBitsPerWord;
+      std::swap(in_reg_hi, in_reg_lo);
+    }
+    // Rotate, or mov to out for zero or word size rotations.
+    if (rot != 0u) {
+      __ Lsr(out_reg_hi, in_reg_hi, rot);
+      __ orr(out_reg_hi, out_reg_hi, ShifterOperand(in_reg_lo, arm::LSL, kArmBitsPerWord - rot));
+      __ Lsr(out_reg_lo, in_reg_lo, rot);
+      __ orr(out_reg_lo, out_reg_lo, ShifterOperand(in_reg_hi, arm::LSL, kArmBitsPerWord - rot));
+    } else {
+      __ Mov(out_reg_lo, in_reg_lo);
+      __ Mov(out_reg_hi, in_reg_hi);
+    }
+  } else {
+    Register shift_right = locations->GetTemp(0).AsRegister<Register>();
+    Register shift_left = locations->GetTemp(1).AsRegister<Register>();
+    Label end;
+    Label shift_by_32_plus_shift_right;
+
+    __ and_(shift_right, rhs.AsRegister<Register>(), ShifterOperand(0x1F));
+    __ Lsrs(shift_left, rhs.AsRegister<Register>(), 6);
+    __ rsb(shift_left, shift_right, ShifterOperand(kArmBitsPerWord), AL, kCcKeep);
+    __ b(&shift_by_32_plus_shift_right, CC);
+
+    // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
+    // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
+    __ Lsl(out_reg_hi, in_reg_hi, shift_left);
+    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
+    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+    __ Lsr(shift_left, in_reg_hi, shift_right);
+    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left));
+    __ b(&end);
+
+    __ Bind(&shift_by_32_plus_shift_right);  // Shift by 32+shift_right.
+    // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
+    // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
+    __ Lsr(out_reg_hi, in_reg_hi, shift_right);
+    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
+    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+    __ Lsl(shift_right, in_reg_hi, shift_left);
+    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right));
+
+    __ Bind(&end);
+  }
+}
+
+void LocationsBuilderARM::VisitRor(HRor* ror) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    }
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      if (ror->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant()));
+      } else {
+        locations->SetInAt(1, Location::RequiresRegister());
+        locations->AddTemp(Location::RequiresRegister());
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARM::VisitRor(HRor* ror) {
+  LocationSummary* locations = ror->GetLocations();
+  Primitive::Type type = ror->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+      HandleIntegerRotate(locations);
+      break;
+    }
+    case Primitive::kPrimLong: {
+      HandleLongRotate(locations);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << type;
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderARM::HandleShift(HBinaryOperation* op) {
   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
 
@@ -2987,17 +3340,29 @@
   switch (op->GetResultType()) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(op->InputAt(1)));
-      // Make the output overlap, as it will be used to hold the masked
-      // second input.
-      locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      if (op->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      } else {
+        locations->SetInAt(1, Location::RequiresRegister());
+        // Make the output overlap, as it will be used to hold the masked
+        // second input.
+        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      }
       break;
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
-      locations->AddTemp(Location::RequiresRegister());
-      locations->SetOut(Location::RequiresRegister());
+      if (op->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+        // For simplicity, use kOutputOverlap even though we only require that low registers
+        // don't clash with high registers which the register allocator currently guarantees.
+        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      } else {
+        locations->SetInAt(1, Location::RequiresRegister());
+        locations->AddTemp(Location::RequiresRegister());
+        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      }
       break;
     }
     default:
@@ -3018,10 +3383,10 @@
     case Primitive::kPrimInt: {
       Register out_reg = out.AsRegister<Register>();
       Register first_reg = first.AsRegister<Register>();
-      // Arm doesn't mask the shift count so we need to do it ourselves.
       if (second.IsRegister()) {
         Register second_reg = second.AsRegister<Register>();
-        __ and_(out_reg, second_reg, ShifterOperand(kMaxIntShiftValue));
+        // ARM doesn't mask the shift count so we need to do it ourselves.
+        __ and_(out_reg, second_reg, ShifterOperand(kMaxIntShiftDistance));
         if (op->IsShl()) {
           __ Lsl(out_reg, first_reg, out_reg);
         } else if (op->IsShr()) {
@@ -3031,8 +3396,8 @@
         }
       } else {
         int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
-        uint32_t shift_value = static_cast<uint32_t>(cst & kMaxIntShiftValue);
-        if (shift_value == 0) {  // arm does not support shifting with 0 immediate.
+        uint32_t shift_value = cst & kMaxIntShiftDistance;
+        if (shift_value == 0) {  // ARM does not support shifting with 0 immediate.
           __ Mov(out_reg, first_reg);
         } else if (op->IsShl()) {
           __ Lsl(out_reg, first_reg, shift_value);
@@ -3048,57 +3413,115 @@
       Register o_h = out.AsRegisterPairHigh<Register>();
       Register o_l = out.AsRegisterPairLow<Register>();
 
-      Register temp = locations->GetTemp(0).AsRegister<Register>();
-
       Register high = first.AsRegisterPairHigh<Register>();
       Register low = first.AsRegisterPairLow<Register>();
 
-      Register second_reg = second.AsRegister<Register>();
+      if (second.IsRegister()) {
+        Register temp = locations->GetTemp(0).AsRegister<Register>();
 
-      if (op->IsShl()) {
-        __ and_(o_l, second_reg, ShifterOperand(kMaxLongShiftValue));
-        // Shift the high part
-        __ Lsl(o_h, high, o_l);
-        // Shift the low part and `or` what overflew on the high part
-        __ rsb(temp, o_l, ShifterOperand(kArmBitsPerWord));
-        __ Lsr(temp, low, temp);
-        __ orr(o_h, o_h, ShifterOperand(temp));
-        // If the shift is > 32 bits, override the high part
-        __ subs(temp, o_l, ShifterOperand(kArmBitsPerWord));
-        __ it(PL);
-        __ Lsl(o_h, low, temp, PL);
-        // Shift the low part
-        __ Lsl(o_l, low, o_l);
-      } else if (op->IsShr()) {
-        __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
-        // Shift the low part
-        __ Lsr(o_l, low, o_h);
-        // Shift the high part and `or` what underflew on the low part
-        __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
-        __ Lsl(temp, high, temp);
-        __ orr(o_l, o_l, ShifterOperand(temp));
-        // If the shift is > 32 bits, override the low part
-        __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
-        __ it(PL);
-        __ Asr(o_l, high, temp, PL);
-        // Shift the high part
-        __ Asr(o_h, high, o_h);
+        Register second_reg = second.AsRegister<Register>();
+
+        if (op->IsShl()) {
+          __ and_(o_l, second_reg, ShifterOperand(kMaxLongShiftDistance));
+          // Shift the high part
+          __ Lsl(o_h, high, o_l);
+          // Shift the low part and `or` what overflew on the high part
+          __ rsb(temp, o_l, ShifterOperand(kArmBitsPerWord));
+          __ Lsr(temp, low, temp);
+          __ orr(o_h, o_h, ShifterOperand(temp));
+          // If the shift is > 32 bits, override the high part
+          __ subs(temp, o_l, ShifterOperand(kArmBitsPerWord));
+          __ it(PL);
+          __ Lsl(o_h, low, temp, PL);
+          // Shift the low part
+          __ Lsl(o_l, low, o_l);
+        } else if (op->IsShr()) {
+          __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftDistance));
+          // Shift the low part
+          __ Lsr(o_l, low, o_h);
+          // Shift the high part and `or` what underflew on the low part
+          __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
+          __ Lsl(temp, high, temp);
+          __ orr(o_l, o_l, ShifterOperand(temp));
+          // If the shift is > 32 bits, override the low part
+          __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
+          __ it(PL);
+          __ Asr(o_l, high, temp, PL);
+          // Shift the high part
+          __ Asr(o_h, high, o_h);
+        } else {
+          __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftDistance));
+          // same as Shr except we use `Lsr`s and not `Asr`s
+          __ Lsr(o_l, low, o_h);
+          __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
+          __ Lsl(temp, high, temp);
+          __ orr(o_l, o_l, ShifterOperand(temp));
+          __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
+          __ it(PL);
+          __ Lsr(o_l, high, temp, PL);
+          __ Lsr(o_h, high, o_h);
+        }
       } else {
-        __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
-        // same as Shr except we use `Lsr`s and not `Asr`s
-        __ Lsr(o_l, low, o_h);
-        __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
-        __ Lsl(temp, high, temp);
-        __ orr(o_l, o_l, ShifterOperand(temp));
-        __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
-        __ it(PL);
-        __ Lsr(o_l, high, temp, PL);
-        __ Lsr(o_h, high, o_h);
+        // Register allocator doesn't create partial overlap.
+        DCHECK_NE(o_l, high);
+        DCHECK_NE(o_h, low);
+        int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
+        uint32_t shift_value = cst & kMaxLongShiftDistance;
+        if (shift_value > 32) {
+          if (op->IsShl()) {
+            __ Lsl(o_h, low, shift_value - 32);
+            __ LoadImmediate(o_l, 0);
+          } else if (op->IsShr()) {
+            __ Asr(o_l, high, shift_value - 32);
+            __ Asr(o_h, high, 31);
+          } else {
+            __ Lsr(o_l, high, shift_value - 32);
+            __ LoadImmediate(o_h, 0);
+          }
+        } else if (shift_value == 32) {
+          if (op->IsShl()) {
+            __ mov(o_h, ShifterOperand(low));
+            __ LoadImmediate(o_l, 0);
+          } else if (op->IsShr()) {
+            __ mov(o_l, ShifterOperand(high));
+            __ Asr(o_h, high, 31);
+          } else {
+            __ mov(o_l, ShifterOperand(high));
+            __ LoadImmediate(o_h, 0);
+          }
+        } else if (shift_value == 1) {
+          if (op->IsShl()) {
+            __ Lsls(o_l, low, 1);
+            __ adc(o_h, high, ShifterOperand(high));
+          } else if (op->IsShr()) {
+            __ Asrs(o_h, high, 1);
+            __ Rrx(o_l, low);
+          } else {
+            __ Lsrs(o_h, high, 1);
+            __ Rrx(o_l, low);
+          }
+        } else {
+          DCHECK(2 <= shift_value && shift_value < 32) << shift_value;
+          if (op->IsShl()) {
+            __ Lsl(o_h, high, shift_value);
+            __ orr(o_h, o_h, ShifterOperand(low, LSR, 32 - shift_value));
+            __ Lsl(o_l, low, shift_value);
+          } else if (op->IsShr()) {
+            __ Lsr(o_l, low, shift_value);
+            __ orr(o_l, o_l, ShifterOperand(high, LSL, 32 - shift_value));
+            __ Asr(o_h, high, shift_value);
+          } else {
+            __ Lsr(o_l, low, shift_value);
+            __ orr(o_l, o_l, ShifterOperand(high, LSL, 32 - shift_value));
+            __ Lsr(o_h, high, shift_value);
+          }
+        }
       }
       break;
     }
     default:
       LOG(FATAL) << "Unexpected operation type " << type;
+      UNREACHABLE();
   }
 }
 
@@ -3128,27 +3551,40 @@
 
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
-  InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
+  } else {
+    InvokeRuntimeCallingConvention calling_convention;
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  }
   locations->SetOut(Location::RegisterLocation(R0));
 }
 
 void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) {
-  InvokeRuntimeCallingConvention calling_convention;
-  __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
-  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
-                          instruction,
-                          instruction->GetDexPc(),
-                          nullptr);
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
+    __ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
+    __ LoadFromOffset(kLoadWord, LR, temp, code_offset.Int32Value());
+    __ blx(LR);
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr);
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  }
 }
 
 void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(R0));
@@ -3165,6 +3601,7 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
 }
 
 void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) {
@@ -3240,6 +3677,11 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RequiresRegister());
@@ -3250,7 +3692,7 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1)));
       locations->SetOut(Location::RequiresRegister());
       break;
     }
@@ -3267,7 +3709,19 @@
 
   Label less, greater, done;
   Primitive::Type type = compare->InputAt(0)->GetType();
+  Condition less_cond;
   switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
+      __ LoadImmediate(out, 0);
+      __ cmp(left.AsRegister<Register>(),
+             ShifterOperand(right.AsRegister<Register>()));  // Signed compare.
+      less_cond = LT;
+      break;
+    }
     case Primitive::kPrimLong: {
       __ cmp(left.AsRegisterPairHigh<Register>(),
              ShifterOperand(right.AsRegisterPairHigh<Register>()));  // Signed compare.
@@ -3277,26 +3731,24 @@
       __ LoadImmediate(out, 0);
       __ cmp(left.AsRegisterPairLow<Register>(),
              ShifterOperand(right.AsRegisterPairLow<Register>()));  // Unsigned compare.
+      less_cond = LO;
       break;
     }
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       __ LoadImmediate(out, 0);
-      if (type == Primitive::kPrimFloat) {
-        __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>());
-      } else {
-        __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()),
-                 FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
-      }
+      GenerateVcmp(compare);
       __ vmstat();  // transfer FP status register to ARM APSR.
-      __ b(compare->IsGtBias() ? &greater : &less, VS);  // VS for unordered.
+      less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
       break;
     }
     default:
       LOG(FATAL) << "Unexpected compare type " << type;
+      UNREACHABLE();
   }
+
   __ b(&done, EQ);
-  __ b(&less, LO);  // LO is for both: unsigned compare for longs and 'less than' for floats.
+  __ b(&less, less_cond);
 
   __ Bind(&greater);
   __ LoadImmediate(out, 1);
@@ -3311,7 +3763,7 @@
 void LocationsBuilderARM::VisitPhi(HPhi* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
@@ -3321,9 +3773,9 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) {
-  // TODO (ported from quick): revisit Arm barrier kinds
-  DmbOptions flavor = DmbOptions::ISH;  // quiet c++ warnings
+void CodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) {
+  // TODO (ported from quick): revisit ARM barrier kinds.
+  DmbOptions flavor = DmbOptions::ISH;  // Quiet C++ warnings.
   switch (kind) {
     case MemBarrierKind::kAnyStore:
     case MemBarrierKind::kLoadAny:
@@ -3346,6 +3798,9 @@
                                                          Register out_lo,
                                                          Register out_hi) {
   if (offset != 0) {
+    // Ensure `out_lo` is different from `addr`, so that loading
+    // `offset` into `out_lo` does not clutter `addr`.
+    DCHECK_NE(out_lo, addr);
     __ LoadImmediate(out_lo, offset);
     __ add(IP, addr, ShifterOperand(out_lo));
     addr = IP;
@@ -3401,11 +3856,11 @@
     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
     locations->AddTemp(Location::RequiresRegister());
   } else if (generate_volatile) {
-    // Arm encoding have some additional constraints for ldrexd/strexd:
+    // ARM encoding have some additional constraints for ldrexd/strexd:
     // - registers need to be consecutive
     // - the first register should be even but not R14.
-    // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever
-    // enable Arm encoding.
+    // We don't test for ARM yet, and the assertion makes sure that we
+    // revisit this if we ever enable ARM encoding.
     DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
 
     locations->AddTemp(Location::RequiresRegister());
@@ -3435,7 +3890,7 @@
       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
   switch (field_type) {
@@ -3527,20 +3982,32 @@
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   }
 }
 
 void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (field_info.GetFieldType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_field_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
 
   bool volatile_for_double = field_info.IsVolatile()
       && (field_info.GetFieldType() == Primitive::kPrimDouble)
       && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
-  bool overlap = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong);
+  // The output overlaps in case of volatile long: we don't want the
+  // code generated by GenerateWideAtomicLoad to overwrite the
+  // object's location.  Likewise, in the case of an object field get
+  // with read barriers enabled, we do not want the load to overwrite
+  // the object's location, as we need it to emit the read barrier.
+  bool overlap = (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) ||
+      object_field_get_with_read_barrier;
 
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister());
@@ -3549,14 +4016,29 @@
                       (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
   }
   if (volatile_for_double) {
-    // Arm encoding have some additional constraints for ldrexd/strexd:
+    // ARM encoding have some additional constraints for ldrexd/strexd:
     // - registers need to be consecutive
     // - the first register should be even but not R14.
-    // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever
-    // enable Arm encoding.
+    // We don't test for ARM yet, and the assertion makes sure that we
+    // revisit this if we ever enable ARM encoding.
     DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
     locations->AddTemp(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresRegister());
+  } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+Location LocationsBuilderARM::ArithmeticZeroOrFpuRegister(HInstruction* input) {
+  DCHECK(input->GetType() == Primitive::kPrimDouble || input->GetType() == Primitive::kPrimFloat)
+      << input->GetType();
+  if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
+      (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
+    return Location::ConstantLocation(input->AsConstant());
+  } else {
+    return Location::RequiresFpuRegister();
   }
 }
 
@@ -3574,31 +4056,51 @@
                                                        Opcode opcode) {
   uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
   if (Primitive::Is64BitType(input_cst->GetType())) {
-    return CanEncodeConstantAsImmediate(Low32Bits(value), opcode) &&
-        CanEncodeConstantAsImmediate(High32Bits(value), opcode);
+    Opcode high_opcode = opcode;
+    SetCc low_set_cc = kCcDontCare;
+    switch (opcode) {
+      case SUB:
+        // Flip the operation to an ADD.
+        value = -value;
+        opcode = ADD;
+        FALLTHROUGH_INTENDED;
+      case ADD:
+        if (Low32Bits(value) == 0u) {
+          return CanEncodeConstantAsImmediate(High32Bits(value), opcode, kCcDontCare);
+        }
+        high_opcode = ADC;
+        low_set_cc = kCcSet;
+        break;
+      default:
+        break;
+    }
+    return CanEncodeConstantAsImmediate(Low32Bits(value), opcode, low_set_cc) &&
+        CanEncodeConstantAsImmediate(High32Bits(value), high_opcode, kCcDontCare);
   } else {
     return CanEncodeConstantAsImmediate(Low32Bits(value), opcode);
   }
 }
 
-bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode) {
+bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value,
+                                                       Opcode opcode,
+                                                       SetCc set_cc) {
   ShifterOperand so;
   ArmAssembler* assembler = codegen_->GetAssembler();
-  if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, &so)) {
+  if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, set_cc, &so)) {
     return true;
   }
   Opcode neg_opcode = kNoOperand;
   switch (opcode) {
-    case AND:
-      neg_opcode = BIC;
-      break;
-    case ORR:
-      neg_opcode = ORN;
-      break;
+    case AND: neg_opcode = BIC; value = ~value; break;
+    case ORR: neg_opcode = ORN; value = ~value; break;
+    case ADD: neg_opcode = SUB; value = -value; break;
+    case ADC: neg_opcode = SBC; value = ~value; break;
+    case SUB: neg_opcode = ADD; value = -value; break;
+    case SBC: neg_opcode = ADC; value = ~value; break;
     default:
       return false;
   }
-  return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, ~value, &so);
+  return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, value, set_cc, &so);
 }
 
 void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
@@ -3606,7 +4108,8 @@
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
 
   LocationSummary* locations = instruction->GetLocations();
-  Register base = locations->InAt(0).AsRegister<Register>();
+  Location base_loc = locations->InAt(0);
+  Register base = base_loc.AsRegister<Register>();
   Location out = locations->Out();
   bool is_volatile = field_info.IsVolatile();
   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
@@ -3614,33 +4117,52 @@
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   switch (field_type) {
-    case Primitive::kPrimBoolean: {
+    case Primitive::kPrimBoolean:
       __ LoadFromOffset(kLoadUnsignedByte, out.AsRegister<Register>(), base, offset);
       break;
-    }
 
-    case Primitive::kPrimByte: {
+    case Primitive::kPrimByte:
       __ LoadFromOffset(kLoadSignedByte, out.AsRegister<Register>(), base, offset);
       break;
-    }
 
-    case Primitive::kPrimShort: {
+    case Primitive::kPrimShort:
       __ LoadFromOffset(kLoadSignedHalfword, out.AsRegister<Register>(), base, offset);
       break;
-    }
 
-    case Primitive::kPrimChar: {
+    case Primitive::kPrimChar:
       __ LoadFromOffset(kLoadUnsignedHalfword, out.AsRegister<Register>(), base, offset);
       break;
-    }
 
     case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
       __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset);
       break;
+
+    case Primitive::kPrimNot: {
+      // /* HeapReference<Object> */ out = *(base + offset)
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp_loc = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier call.
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+      } else {
+        __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+        // If read barriers are enabled, emit read barriers other than
+        // Baker's using a slow path (and also unpoison the loaded
+        // reference, if heap poisoning is enabled).
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+      }
+      break;
     }
 
-    case Primitive::kPrimLong: {
+    case Primitive::kPrimLong:
       if (is_volatile && !atomic_ldrd_strd) {
         GenerateWideAtomicLoad(base, offset,
                                out.AsRegisterPairLow<Register>(),
@@ -3649,12 +4171,10 @@
         __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), base, offset);
       }
       break;
-    }
 
-    case Primitive::kPrimFloat: {
+    case Primitive::kPrimFloat:
       __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), base, offset);
       break;
-    }
 
     case Primitive::kPrimDouble: {
       DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>());
@@ -3676,17 +4196,20 @@
       UNREACHABLE();
   }
 
-  // Doubles are handled in the switch.
-  if (field_type != Primitive::kPrimDouble) {
+  if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimDouble) {
+    // Potential implicit null checks, in the case of reference or
+    // double fields, are handled in the previous switch statement.
+  } else {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
-  }
-
-  if (field_type == Primitive::kPrimNot) {
-    __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
+    if (field_type == Primitive::kPrimNot) {
+      // Memory barriers, in the case of references, are also handled
+      // in the previous switch statement.
+    } else {
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+    }
   }
 }
 
@@ -3801,19 +4324,19 @@
   }
 }
 
-void InstructionCodeGeneratorARM::GenerateImplicitNullCheck(HNullCheck* instruction) {
-  if (codegen_->CanMoveNullCheckToUser(instruction)) {
+void CodeGeneratorARM::GenerateImplicitNullCheck(HNullCheck* instruction) {
+  if (CanMoveNullCheckToUser(instruction)) {
     return;
   }
   Location obj = instruction->GetLocations()->InAt(0);
 
   __ LoadFromOffset(kLoadWord, IP, obj.AsRegister<Register>(), 0);
-  codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  RecordPcInfo(instruction, instruction->GetDexPc());
 }
 
-void InstructionCodeGeneratorARM::GenerateExplicitNullCheck(HNullCheck* instruction) {
+void CodeGeneratorARM::GenerateExplicitNullCheck(HNullCheck* instruction) {
   SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM(instruction);
-  codegen_->AddSlowPath(slow_path);
+  AddSlowPath(slow_path);
 
   LocationSummary* locations = instruction->GetLocations();
   Location obj = locations->InAt(0);
@@ -3822,143 +4345,280 @@
 }
 
 void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) {
-  if (codegen_->IsImplicitNullCheckAllowed(instruction)) {
-    GenerateImplicitNullCheck(instruction);
-  } else {
-    GenerateExplicitNullCheck(instruction);
+  codegen_->GenerateNullCheck(instruction);
+}
+
+static LoadOperandType GetLoadOperandType(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimNot:
+      return kLoadWord;
+    case Primitive::kPrimBoolean:
+      return kLoadUnsignedByte;
+    case Primitive::kPrimByte:
+      return kLoadSignedByte;
+    case Primitive::kPrimChar:
+      return kLoadUnsignedHalfword;
+    case Primitive::kPrimShort:
+      return kLoadSignedHalfword;
+    case Primitive::kPrimInt:
+      return kLoadWord;
+    case Primitive::kPrimLong:
+      return kLoadWordPair;
+    case Primitive::kPrimFloat:
+      return kLoadSWord;
+    case Primitive::kPrimDouble:
+      return kLoadDWord;
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+static StoreOperandType GetStoreOperandType(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimNot:
+      return kStoreWord;
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      return kStoreByte;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      return kStoreHalfword;
+    case Primitive::kPrimInt:
+      return kStoreWord;
+    case Primitive::kPrimLong:
+      return kStoreWordPair;
+    case Primitive::kPrimFloat:
+      return kStoreSWord;
+    case Primitive::kPrimDouble:
+      return kStoreDWord;
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+void CodeGeneratorARM::LoadFromShiftedRegOffset(Primitive::Type type,
+                                                Location out_loc,
+                                                Register base,
+                                                Register reg_offset,
+                                                Condition cond) {
+  uint32_t shift_count = Primitive::ComponentSizeShift(type);
+  Address mem_address(base, reg_offset, Shift::LSL, shift_count);
+
+  switch (type) {
+    case Primitive::kPrimByte:
+      __ ldrsb(out_loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimBoolean:
+      __ ldrb(out_loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimShort:
+      __ ldrsh(out_loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimChar:
+      __ ldrh(out_loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+      __ ldr(out_loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types.
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+void CodeGeneratorARM::StoreToShiftedRegOffset(Primitive::Type type,
+                                               Location loc,
+                                               Register base,
+                                               Register reg_offset,
+                                               Condition cond) {
+  uint32_t shift_count = Primitive::ComponentSizeShift(type);
+  Address mem_address(base, reg_offset, Shift::LSL, shift_count);
+
+  switch (type) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimBoolean:
+      __ strb(loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+      __ strh(loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+      __ str(loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types.
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
   }
 }
 
 void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps in the case of an object array get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // array's location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+  }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
+  Location out_loc = locations->Out();
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
   Primitive::Type type = instruction->GetType();
+  HInstruction* array_instr = instruction->GetArray();
+  bool has_intermediate_address = array_instr->IsIntermediateAddress();
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
 
   switch (type) {
-    case Primitive::kPrimBoolean: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
       if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
+        int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+        uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
+
+        LoadOperandType load_type = GetLoadOperandType(type);
+        __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>()));
-        __ LoadFromOffset(kLoadUnsignedByte, out, IP, data_offset);
+        Register temp = IP;
+
+        if (has_intermediate_address) {
+          // We do not need to compute the intermediate address from the array: the
+          // input instruction has done it already. See the comment in
+          // `TryExtractArrayAccessAddress()`.
+          if (kIsDebugBuild) {
+            HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+            DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+          }
+          temp = obj;
+        } else {
+          __ add(temp, obj, ShifterOperand(data_offset));
+        }
+        codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
       }
       break;
     }
 
-    case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
-      } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>()));
-        __ LoadFromOffset(kLoadSignedByte, out, IP, data_offset);
-      }
-      break;
-    }
-
-    case Primitive::kPrimShort: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
-      } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
-        __ LoadFromOffset(kLoadSignedHalfword, out, IP, data_offset);
-      }
-      break;
-    }
-
-    case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset);
-      } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
-        __ LoadFromOffset(kLoadUnsignedHalfword, out, IP, data_offset);
-      }
-      break;
-    }
-
-    case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-                    "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes.");
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ LoadFromOffset(kLoadWord, out, obj, offset);
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call.
+        codegen_->GenerateArrayLoadWithBakerReadBarrier(
+            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
       } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-        __ LoadFromOffset(kLoadWord, out, IP, data_offset);
+        Register out = out_loc.AsRegister<Register>();
+        if (index.IsConstant()) {
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ LoadFromOffset(kLoadWord, out, obj, offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          Register temp = IP;
+
+          if (has_intermediate_address) {
+            // We do not need to compute the intermediate address from the array: the
+            // input instruction has done it already. See the comment in
+            // `TryExtractArrayAccessAddress()`.
+            if (kIsDebugBuild) {
+              HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+              DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+            }
+            temp = obj;
+          } else {
+            __ add(temp, obj, ShifterOperand(data_offset));
+          }
+          codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
+
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(
+              instruction, out_loc, out_loc, obj_loc, data_offset, index);
+        }
       }
       break;
     }
 
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      Location out = locations->Out();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), obj, offset);
+        __ LoadFromOffset(kLoadWordPair, out_loc.AsRegisterPairLow<Register>(), obj, offset);
       } else {
         __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
-        __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), IP, data_offset);
+        __ LoadFromOffset(kLoadWordPair, out_loc.AsRegisterPairLow<Register>(), IP, data_offset);
       }
       break;
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
-      Location out = locations->Out();
-      DCHECK(out.IsFpuRegister());
+      SRegister out = out_loc.AsFpuRegister<SRegister>();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), obj, offset);
+        __ LoadSFromOffset(out, obj, offset);
       } else {
         __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-        __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), IP, data_offset);
+        __ LoadSFromOffset(out, IP, data_offset);
       }
       break;
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
-      Location out = locations->Out();
-      DCHECK(out.IsFpuRegisterPair());
+      SRegister out = out_loc.AsFpuRegisterPairLow<SRegister>();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), obj, offset);
+        __ LoadDFromOffset(FromLowSToD(out), obj, offset);
       } else {
         __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
-        __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), IP, data_offset);
+        __ LoadDFromOffset(FromLowSToD(out), IP, data_offset);
       }
       break;
     }
@@ -3967,11 +4627,12 @@
       LOG(FATAL) << "Unreachable type " << type;
       UNREACHABLE();
   }
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
 
   if (type == Primitive::kPrimNot) {
-    Register out = locations->Out().AsRegister<Register>();
-    __ MaybeUnpoisonHeapReference(out);
+    // Potential implicit null checks, in the case of reference
+    // arrays, are handled in the previous switch statement.
+  } else {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 }
 
@@ -3980,11 +4641,16 @@
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool object_array_set_with_read_barrier =
+      kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+      (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
+
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(value_type)) {
@@ -3992,7 +4658,6 @@
   } else {
     locations->SetInAt(2, Location::RequiresRegister());
   }
-
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
@@ -4002,60 +4667,79 @@
 
 void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register array = locations->InAt(0).AsRegister<Register>();
+  Location array_loc = locations->InAt(0);
+  Register array = array_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool may_need_runtime_call = locations->CanCall();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  uint32_t data_offset =
+      mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
+  Location value_loc = locations->InAt(2);
+  HInstruction* array_instr = instruction->GetArray();
+  bool has_intermediate_address = array_instr->IsIntermediateAddress();
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      Register value = locations->InAt(2).AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ StoreToOffset(kStoreByte, value, array, offset);
-      } else {
-        __ add(IP, array, ShifterOperand(index.AsRegister<Register>()));
-        __ StoreToOffset(kStoreByte, value, IP, data_offset);
-      }
-      break;
-    }
-
+    case Primitive::kPrimByte:
     case Primitive::kPrimShort:
-    case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      Register value = locations->InAt(2).AsRegister<Register>();
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
       if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ StoreToOffset(kStoreHalfword, value, array, offset);
+        int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+        uint32_t full_offset =
+            data_offset + (const_index << Primitive::ComponentSizeShift(value_type));
+        StoreOperandType store_type = GetStoreOperandType(value_type);
+        __ StoreToOffset(store_type, value_loc.AsRegister<Register>(), array, full_offset);
       } else {
-        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
-        __ StoreToOffset(kStoreHalfword, value, IP, data_offset);
+        Register temp = IP;
+
+        if (has_intermediate_address) {
+          // We do not need to compute the intermediate address from the array: the
+          // input instruction has done it already. See the comment in
+          // `TryExtractArrayAccessAddress()`.
+          if (kIsDebugBuild) {
+            HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+            DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == data_offset);
+          }
+          temp = array;
+        } else {
+          __ add(temp, array, ShifterOperand(data_offset));
+        }
+        codegen_->StoreToShiftedRegOffset(value_type,
+                                          value_loc,
+                                          temp,
+                                          index.AsRegister<Register>());
       }
       break;
     }
 
     case Primitive::kPrimNot: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register value = locations->InAt(2).AsRegister<Register>();
-      Register source = value;
+      Register value = value_loc.AsRegister<Register>();
+      // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet.
+      // See the comment in instruction_simplifier_shared.cc.
+      DCHECK(!has_intermediate_address);
 
       if (instruction->InputAt(2)->IsNullConstant()) {
         // Just setting null.
         if (index.IsConstant()) {
           size_t offset =
               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-          __ StoreToOffset(kStoreWord, source, array, offset);
+          __ StoreToOffset(kStoreWord, value, array, offset);
         } else {
           DCHECK(index.IsRegister()) << index;
-          __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-          __ StoreToOffset(kStoreWord, source, IP, data_offset);
+          __ add(IP, array, ShifterOperand(data_offset));
+          codegen_->StoreToShiftedRegOffset(value_type,
+                                            value_loc,
+                                            IP,
+                                            index.AsRegister<Register>());
         }
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        DCHECK(!needs_write_barrier);
+        DCHECK(!may_need_runtime_call_for_type_check);
         break;
       }
 
@@ -4068,7 +4752,7 @@
       Label done;
       SlowPathCode* slow_path = nullptr;
 
-      if (may_need_runtime_call) {
+      if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
@@ -4080,34 +4764,78 @@
             __ StoreToOffset(kStoreWord, value, array, offset);
           } else {
             DCHECK(index.IsRegister()) << index;
-            __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-            __ StoreToOffset(kStoreWord, value, IP, data_offset);
+            __ add(IP, array, ShifterOperand(data_offset));
+            codegen_->StoreToShiftedRegOffset(value_type,
+                                              value_loc,
+                                              IP,
+                                              index.AsRegister<Register>());
           }
           codegen_->MaybeRecordImplicitNullCheck(instruction);
           __ b(&done);
           __ Bind(&non_zero);
         }
 
-        __ LoadFromOffset(kLoadWord, temp1, array, class_offset);
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ MaybeUnpoisonHeapReference(temp1);
-        __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
-        __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
-        // No need to poison/unpoison, we're comparing two poisoined references.
-        __ cmp(temp1, ShifterOperand(temp2));
-        if (instruction->StaticTypeOfArrayIsObjectArray()) {
-          Label do_put;
-          __ b(&do_put, EQ);
-          __ MaybeUnpoisonHeapReference(temp1);
-          __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
-          // No need to poison/unpoison, we're comparing against null.
-          __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
-          __ Bind(&do_put);
+        if (kEmitCompilerReadBarrier) {
+          // When read barriers are enabled, the type checking
+          // instrumentation requires two read barriers:
+          //
+          //   __ Mov(temp2, temp1);
+          //   // /* HeapReference<Class> */ temp1 = temp1->component_type_
+          //   __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+          //   codegen_->GenerateReadBarrierSlow(
+          //       instruction, temp1_loc, temp1_loc, temp2_loc, component_offset);
+          //
+          //   // /* HeapReference<Class> */ temp2 = value->klass_
+          //   __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+          //   codegen_->GenerateReadBarrierSlow(
+          //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc);
+          //
+          //   __ cmp(temp1, ShifterOperand(temp2));
+          //
+          // However, the second read barrier may trash `temp`, as it
+          // is a temporary register, and as such would not be saved
+          // along with live registers before calling the runtime (nor
+          // restored afterwards).  So in this case, we bail out and
+          // delegate the work to the array set slow path.
+          //
+          // TODO: Extend the register allocator to support a new
+          // "(locally) live temp" location so as to avoid always
+          // going into the slow path when read barriers are enabled.
+          __ b(slow_path->GetEntryLabel());
         } else {
-          __ b(slow_path->GetEntryLabel(), NE);
+          // /* HeapReference<Class> */ temp1 = array->klass_
+          __ LoadFromOffset(kLoadWord, temp1, array, class_offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ MaybeUnpoisonHeapReference(temp1);
+
+          // /* HeapReference<Class> */ temp1 = temp1->component_type_
+          __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+          // /* HeapReference<Class> */ temp2 = value->klass_
+          __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+          // If heap poisoning is enabled, no need to unpoison `temp1`
+          // nor `temp2`, as we are comparing two poisoned references.
+          __ cmp(temp1, ShifterOperand(temp2));
+
+          if (instruction->StaticTypeOfArrayIsObjectArray()) {
+            Label do_put;
+            __ b(&do_put, EQ);
+            // If heap poisoning is enabled, the `temp1` reference has
+            // not been unpoisoned yet; unpoison it now.
+            __ MaybeUnpoisonHeapReference(temp1);
+
+            // /* HeapReference<Class> */ temp1 = temp1->super_class_
+            __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+            // If heap poisoning is enabled, no need to unpoison
+            // `temp1`, as we are comparing against null below.
+            __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
+            __ Bind(&do_put);
+          } else {
+            __ b(slow_path->GetEntryLabel(), NE);
+          }
         }
       }
 
+      Register source = value;
       if (kPoisonHeapReferences) {
         // Note that in the case where `value` is a null reference,
         // we do not enter this block, as a null reference does not
@@ -4124,11 +4852,15 @@
         __ StoreToOffset(kStoreWord, source, array, offset);
       } else {
         DCHECK(index.IsRegister()) << index;
-        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-        __ StoreToOffset(kStoreWord, source, IP, data_offset);
+
+        __ add(IP, array, ShifterOperand(data_offset));
+        codegen_->StoreToShiftedRegOffset(value_type,
+                                          Location::RegisterLocation(source),
+                                          IP,
+                                          index.AsRegister<Register>());
       }
 
-      if (!may_need_runtime_call) {
+      if (!may_need_runtime_call_for_type_check) {
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
 
@@ -4145,25 +4877,7 @@
       break;
     }
 
-    case Primitive::kPrimInt: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register value = locations->InAt(2).AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ StoreToOffset(kStoreWord, value, array, offset);
-      } else {
-        DCHECK(index.IsRegister()) << index;
-        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-        __ StoreToOffset(kStoreWord, value, IP, data_offset);
-      }
-
-      codegen_->MaybeRecordImplicitNullCheck(instruction);
-      break;
-    }
-
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       Location value = locations->InAt(2);
       if (index.IsConstant()) {
         size_t offset =
@@ -4177,7 +4891,6 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
       Location value = locations->InAt(2);
       DCHECK(value.IsFpuRegister());
       if (index.IsConstant()) {
@@ -4191,7 +4904,6 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
       Location value = locations->InAt(2);
       DCHECK(value.IsFpuRegisterPair());
       if (index.IsConstant()) {
@@ -4210,8 +4922,8 @@
       UNREACHABLE();
   }
 
-  // Ints and objects are handled in the switch.
-  if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) {
+  // Objects are handled in the switch.
+  if (value_type != Primitive::kPrimNot) {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 }
@@ -4225,13 +4937,44 @@
 
 void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   Register obj = locations->InAt(0).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
   __ LoadFromOffset(kLoadWord, out, obj, offset);
   codegen_->MaybeRecordImplicitNullCheck(instruction);
 }
 
+void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset()));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location out = locations->Out();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
+
+  if (second.IsRegister()) {
+    __ add(out.AsRegister<Register>(),
+           first.AsRegister<Register>(),
+           ShifterOperand(second.AsRegister<Register>()));
+  } else {
+    __ AddConstant(out.AsRegister<Register>(),
+                   first.AsRegister<Register>(),
+                   second.GetConstant()->AsIntConstant()->GetValue());
+  }
+}
+
 void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) {
   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
       ? LocationSummary::kCallOnSlowPath
@@ -4266,7 +5009,7 @@
   if (can_be_null) {
     __ CompareAndBranchIfZero(value, &is_null);
   }
-  __ LoadFromOffset(kLoadWord, card, TR, Thread::CardTableOffset<kArmWordSize>().Int32Value());
+  __ LoadFromOffset(kLoadWord, card, TR, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
   __ strb(card, Address(card, temp));
   if (can_be_null) {
@@ -4274,14 +5017,6 @@
   }
 }
 
-void LocationsBuilderARM::VisitTemporary(HTemporary* temp) {
-  temp->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderARM::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unreachable";
 }
@@ -4325,7 +5060,7 @@
   }
 
   __ LoadFromOffset(
-      kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmWordSize>().Int32Value());
+      kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
   if (successor == nullptr) {
     __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -4347,6 +5082,8 @@
   if (source.IsRegister()) {
     if (destination.IsRegister()) {
       __ Mov(destination.AsRegister<Register>(), source.AsRegister<Register>());
+    } else if (destination.IsFpuRegister()) {
+      __ vmovsr(destination.AsFpuRegister<SRegister>(), source.AsRegister<Register>());
     } else {
       DCHECK(destination.IsStackSlot());
       __ StoreToOffset(kStoreWord, source.AsRegister<Register>(),
@@ -4364,7 +5101,9 @@
       __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
     }
   } else if (source.IsFpuRegister()) {
-    if (destination.IsFpuRegister()) {
+    if (destination.IsRegister()) {
+      __ vmovrs(destination.AsRegister<Register>(), source.AsFpuRegister<SRegister>());
+    } else if (destination.IsFpuRegister()) {
       __ vmovs(destination.AsFpuRegister<SRegister>(), source.AsFpuRegister<SRegister>());
     } else {
       DCHECK(destination.IsStackSlot());
@@ -4388,6 +5127,10 @@
     if (destination.IsRegisterPair()) {
       __ Mov(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
       __ Mov(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
+    } else if (destination.IsFpuRegisterPair()) {
+      __ vmovdrr(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
+                 source.AsRegisterPairLow<Register>(),
+                 source.AsRegisterPairHigh<Register>());
     } else {
       DCHECK(destination.IsDoubleStackSlot()) << destination;
       DCHECK(ExpectedPairLayout(source));
@@ -4395,7 +5138,11 @@
           kStoreWordPair, source.AsRegisterPairLow<Register>(), SP, destination.GetStackIndex());
     }
   } else if (source.IsFpuRegisterPair()) {
-    if (destination.IsFpuRegisterPair()) {
+    if (destination.IsRegisterPair()) {
+      __ vmovrrd(destination.AsRegisterPairLow<Register>(),
+                 destination.AsRegisterPairHigh<Register>(),
+                 FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()));
+    } else if (destination.IsFpuRegisterPair()) {
       __ vmovd(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
                FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()));
     } else {
@@ -4552,12 +5299,71 @@
   __ Pop(static_cast<Register>(reg));
 }
 
+HLoadClass::LoadKind CodeGeneratorARM::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_class_load_kind) {
+      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadClass::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      // We disable pc-relative load when there is an irreducible loop, as the optimization
+      // is incompatible with it.
+      // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods
+      // with irreducible loops.
+      if (GetGraph()->HasIrreducibleLoops()) {
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      }
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_class_load_kind;
+}
+
 void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) {
-  InvokeRuntimeCallingConvention calling_convention;
-  CodeGenerator::CreateLoadClassLocationSummary(
-      cls,
-      Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(R0));
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Location::RegisterLocation(R0),
+        /* code_generator_supports_read_barrier */ true);
+    return;
+  }
+
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
+      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
+      load_kind == HLoadClass::LoadKind::kDexCachePcRelative) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
@@ -4568,29 +5374,99 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
-  Register out = locations->Out().AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
-  if (cls->IsReferrersClass()) {
-    DCHECK(!cls->CanCallRuntime());
-    DCHECK(!cls->MustGenerateClinitCheck());
-    __ LoadFromOffset(
-        kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
-  } else {
-    DCHECK(cls->CanCallRuntime());
-    __ LoadFromOffset(kLoadWord,
-                      out,
-                      current_method,
-                      ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
-    __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
-    // TODO: We will need a read barrier here.
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
 
+  bool generate_null_check = false;
+  switch (cls->GetLoadKind()) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      Register current_method = locations->InAt(0).AsRegister<Register>();
+      GenerateGcRootFieldLoad(
+          cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ LoadLiteral(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
+                                                                    cls->GetTypeIndex()));
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      CodeGeneratorARM::PcRelativePatchInfo* labels =
+          codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+      __ BindTrackedLabel(&labels->movw_label);
+      __ movw(out, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->movt_label);
+      __ movt(out, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->add_pc_label);
+      __ add(out, out, ShifterOperand(PC));
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      // 16-bit LDR immediate has a 5-bit offset multiplied by the size and that gives
+      // a 128B range. To try and reduce the number of literals if we load multiple types,
+      // simply split the dex cache address to a 128B aligned base loaded from a literal
+      // and the remaining offset embedded in the load.
+      static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes.");
+      DCHECK_ALIGNED(cls->GetAddress(), 4u);
+      constexpr size_t offset_bits = /* encoded bits */ 5 + /* scale */ 2;
+      uint32_t base_address = address & ~MaxInt<uint32_t>(offset_bits);
+      uint32_t offset = address & MaxInt<uint32_t>(offset_bits);
+      __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address));
+      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      Register base_reg = locations->InAt(0).AsRegister<Register>();
+      HArmDexCacheArraysBase* base = cls->InputAt(0)->AsArmDexCacheArraysBase();
+      int32_t offset = cls->GetDexCacheElementOffset() - base->GetElementOffset();
+      // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      Register current_method = locations->InAt(0).AsRegister<Register>();
+      __ LoadFromOffset(kLoadWord,
+                        out,
+                        current_method,
+                        ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+      generate_null_check = !cls->IsInDexCache();
+    }
+  }
+
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
+    DCHECK(cls->CanCallRuntime());
     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
     codegen_->AddSlowPath(slow_path);
-    __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
+    if (generate_null_check) {
+      __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
+    }
     if (cls->MustGenerateClinitCheck()) {
       GenerateClassInitializationCheck(slow_path, out);
     } else {
@@ -4628,31 +5504,104 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind(
+    HLoadString::LoadKind desired_string_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_string_load_kind) {
+      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadString::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadString::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_string_load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageAddress:
+      break;
+    case HLoadString::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadString::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      // We disable pc-relative load when there is an irreducible loop, as the optimization
+      // is incompatible with it.
+      // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods
+      // with irreducible loops.
+      if (GetGraph()->HasIrreducibleLoops()) {
+        return HLoadString::LoadKind::kDexCacheViaMethod;
+      }
+      break;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_string_load_kind;
+}
+
 void LocationsBuilderARM::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
-  locations->SetInAt(0, Location::RequiresRegister());
+  LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
+  HLoadString::LoadKind load_kind = load->GetLoadKind();
+  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod ||
+      load_kind == HLoadString::LoadKind::kDexCachePcRelative) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) {
+  LocationSummary* locations = load->GetLocations();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
+
+  switch (load->GetLoadKind()) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ LoadLiteral(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
+                                                                      load->GetStringIndex()));
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      CodeGeneratorARM::PcRelativePatchInfo* labels =
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+      __ BindTrackedLabel(&labels->movw_label);
+      __ movw(out, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->movt_label);
+      __ movt(out, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->add_pc_label);
+      __ add(out, out, ShifterOperand(PC));
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(load->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+      __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+      return;  // No dex cache slow path.
+    }
+    default:
+      break;
+  }
+
+  // TODO: Re-add the compiler code to do string dex cache lookup again.
   SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
   codegen_->AddSlowPath(slow_path);
-
-  LocationSummary* locations = load->GetLocations();
-  Register out = locations->Out().AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
-  __ LoadFromOffset(
-      kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
-  __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-  __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
-  // TODO: We will need a read barrier here.
-  __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
+  __ b(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
 static int32_t GetExceptionTlsOffset() {
-  return Thread::ExceptionOffset<kArmWordSize>().Int32Value();
+  return Thread::ExceptionOffset<kArmPointerSize>().Int32Value();
 }
 
 void LocationsBuilderARM::VisitLoadException(HLoadException* load) {
@@ -4677,7 +5626,7 @@
 
 void LocationsBuilderARM::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -4685,45 +5634,59 @@
 void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) {
   codegen_->InvokeRuntime(
       QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr);
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+}
+
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
 }
 
 void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-    // The out register is used as a temporary, so it overlaps with the inputs.
-    // Note that TypeCheckSlowPathARM uses this register too.
-    locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetOut(Location::RegisterLocation(R0));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // The "out" register is used as a temporary, so it overlaps with the inputs.
+  // Note that TypeCheckSlowPathARM uses this register too.
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+  // When read barriers are enabled, we need a temporary register for
+  // some cases.
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
+  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(0) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -4737,17 +5700,10 @@
     __ CompareAndBranchIfZero(obj, &zero);
   }
 
-  // In case of an interface/unresolved check, we put the object class into the object register.
-  // This is safe, as the register is caller-save, and the object must be in another
-  // register if it survives the runtime call.
-  Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) ||
-      (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck)
-      ? obj
-      : out;
-  __ LoadFromOffset(kLoadWord, target, obj, class_offset);
-  __ MaybeUnpoisonHeapReference(target);
+  // /* HeapReference<Class> */ out = obj->klass_
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       __ cmp(out, ShifterOperand(cls));
       // Classes must be equal for the instanceof to succeed.
@@ -4756,13 +5712,14 @@
       __ b(&done);
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       Label loop;
       __ Bind(&loop);
-      __ LoadFromOffset(kLoadWord, out, out, super_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ cmp(out, ShifterOperand(cls));
@@ -4773,14 +5730,15 @@
       }
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       Label loop, success;
       __ Bind(&loop);
       __ cmp(out, ShifterOperand(cls));
       __ b(&success, EQ);
-      __ LoadFromOffset(kLoadWord, out, out, super_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       __ CompareAndBranchIfNonZero(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ b(&done);
@@ -4791,14 +5749,15 @@
       }
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
       Label exact_check;
       __ cmp(out, ShifterOperand(cls));
       __ b(&exact_check, EQ);
-      // Otherwise, we need to check that the object's class is a non primitive array.
-      __ LoadFromOffset(kLoadWord, out, out, component_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ out = out->component_type_
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -4809,11 +5768,12 @@
       __ b(&done);
       break;
     }
+
     case TypeCheckKind::kArrayCheck: {
       __ cmp(out, ShifterOperand(cls));
       DCHECK(locations->OnlyCallsOnSlowPath());
-      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
-          instruction, /* is_fatal */ false);
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
+                                                                    /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
       __ b(slow_path->GetEntryLabel(), NE);
       __ LoadImmediate(out, 1);
@@ -4822,13 +5782,32 @@
       }
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-    default: {
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved and interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
+                                                                    /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ b(slow_path->GetEntryLabel());
       if (zero.IsLinked()) {
         __ b(&done);
       }
@@ -4854,57 +5833,61 @@
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
 
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = throws_into_catch
-          ? LocationSummary::kCallOnSlowPath
-          : LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
 
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-    // Note that TypeCheckSlowPathARM uses this register too.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathARM uses this "temp" register too.
+  locations->AddTemp(Location::RequiresRegister());
+  // When read barriers are enabled, we need an additional temporary
+  // register for some cases.
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   }
 }
 
 void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
-  Register temp = locations->WillCall()
-      ? Register(kNoRegister)
-      : locations->GetTemp(0).AsRegister<Register>();
-
+  Location temp_loc = locations->GetTemp(0);
+  Register temp = temp_loc.AsRegister<Register>();
+  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
-  SlowPathCode* slow_path = nullptr;
 
-  if (!locations->WillCall()) {
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
-        instruction, !locations->CanCall());
-    codegen_->AddSlowPath(slow_path);
-  }
+  bool is_type_check_slow_path_fatal =
+      (type_check_kind == TypeCheckKind::kExactCheck ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+      !instruction->CanThrowIntoCatchBlock();
+  SlowPathCode* type_check_slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
+                                                        is_type_check_slow_path_fatal);
+  codegen_->AddSlowPath(type_check_slow_path);
 
   Label done;
   // Avoid null check if we know obj is not null.
@@ -4912,81 +5895,139 @@
     __ CompareAndBranchIfZero(obj, &done);
   }
 
-  if (locations->WillCall()) {
-    __ LoadFromOffset(kLoadWord, obj, obj, class_offset);
-    __ MaybeUnpoisonHeapReference(obj);
-  } else {
-    __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-    __ MaybeUnpoisonHeapReference(temp);
-  }
+  // /* HeapReference<Class> */ temp = obj->klass_
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
       __ cmp(temp, ShifterOperand(cls));
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ b(slow_path->GetEntryLabel(), NE);
+      __ b(type_check_slow_path->GetEntryLabel(), NE);
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      Label loop;
+      Label loop, compare_classes;
       __ Bind(&loop);
-      __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
-      __ MaybeUnpoisonHeapReference(temp);
-      // Jump to the slow path to throw the exception.
-      __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel());
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+
+      // If the class reference currently in `temp` is not null, jump
+      // to the `compare_classes` label to compare it with the checked
+      // class.
+      __ CompareAndBranchIfNonZero(temp, &compare_classes);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      __ b(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&compare_classes);
       __ cmp(temp, ShifterOperand(cls));
       __ b(&loop, NE);
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       Label loop;
       __ Bind(&loop);
       __ cmp(temp, ShifterOperand(cls));
       __ b(&done, EQ);
-      __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+
+      // If the class reference currently in `temp` is not null, jump
+      // back at the beginning of the loop.
       __ CompareAndBranchIfNonZero(temp, &loop);
-      // Jump to the slow path to throw the exception.
-      __ b(slow_path->GetEntryLabel());
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      __ b(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
+      Label check_non_primitive_component_type;
       __ cmp(temp, ShifterOperand(cls));
       __ b(&done, EQ);
-      // Otherwise, we need to check that the object's class is a non primitive array.
-      __ LoadFromOffset(kLoadWord, temp, temp, component_offset);
-      __ MaybeUnpoisonHeapReference(temp);
-      __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel());
+
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ temp = temp->component_type_
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
+
+      // If the component type is not null (i.e. the object is indeed
+      // an array), jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive
+      // type.
+      __ CompareAndBranchIfNonZero(temp, &check_non_primitive_component_type);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      __ b(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&check_non_primitive_component_type);
       __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
-      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot");
+      __ CompareAndBranchIfZero(temp, &done);
+      // Same comment as above regarding `temp` and the slow path.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      __ b(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-    default:
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
+      //
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
+      __ b(type_check_slow_path->GetEntryLabel());
       break;
   }
   __ Bind(&done);
 
-  if (slow_path != nullptr) {
-    __ Bind(slow_path->GetExitLabel());
-  }
+  __ Bind(type_check_slow_path->GetExitLabel());
 }
 
 void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -4997,6 +6038,11 @@
       instruction,
       instruction->GetDexPc(),
       nullptr);
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction, AND); }
@@ -5026,6 +6072,71 @@
   HandleBitwiseOperation(instruction);
 }
 
+
+void LocationsBuilderARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  DCHECK(instruction->GetResultType() == Primitive::kPrimInt
+         || instruction->GetResultType() == Primitive::kPrimLong);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+  Location out = locations->Out();
+
+  if (instruction->GetResultType() == Primitive::kPrimInt) {
+    Register first_reg = first.AsRegister<Register>();
+    ShifterOperand second_reg(second.AsRegister<Register>());
+    Register out_reg = out.AsRegister<Register>();
+
+    switch (instruction->GetOpKind()) {
+      case HInstruction::kAnd:
+        __ bic(out_reg, first_reg, second_reg);
+        break;
+      case HInstruction::kOr:
+        __ orn(out_reg, first_reg, second_reg);
+        break;
+      // There is no EON on arm.
+      case HInstruction::kXor:
+      default:
+        LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
+        UNREACHABLE();
+    }
+    return;
+
+  } else {
+    DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+    Register first_low = first.AsRegisterPairLow<Register>();
+    Register first_high = first.AsRegisterPairHigh<Register>();
+    ShifterOperand second_low(second.AsRegisterPairLow<Register>());
+    ShifterOperand second_high(second.AsRegisterPairHigh<Register>());
+    Register out_low = out.AsRegisterPairLow<Register>();
+    Register out_high = out.AsRegisterPairHigh<Register>();
+
+    switch (instruction->GetOpKind()) {
+      case HInstruction::kAnd:
+        __ bic(out_low, first_low, second_low);
+        __ bic(out_high, first_high, second_high);
+        break;
+      case HInstruction::kOr:
+        __ orn(out_low, first_low, second_low);
+        __ orn(out_high, first_high, second_high);
+        break;
+      // There is no EON on arm.
+      case HInstruction::kXor:
+      default:
+        LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
+        UNREACHABLE();
+    }
+  }
+}
+
 void InstructionCodeGeneratorARM::GenerateAndConst(Register out, Register first, uint32_t value) {
   // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
   if (value == 0xffffffffu) {
@@ -5079,6 +6190,34 @@
   __ eor(out, first, ShifterOperand(value));
 }
 
+void InstructionCodeGeneratorARM::GenerateAddLongConst(Location out,
+                                                       Location first,
+                                                       uint64_t value) {
+  Register out_low = out.AsRegisterPairLow<Register>();
+  Register out_high = out.AsRegisterPairHigh<Register>();
+  Register first_low = first.AsRegisterPairLow<Register>();
+  Register first_high = first.AsRegisterPairHigh<Register>();
+  uint32_t value_low = Low32Bits(value);
+  uint32_t value_high = High32Bits(value);
+  if (value_low == 0u) {
+    if (out_low != first_low) {
+      __ mov(out_low, ShifterOperand(first_low));
+    }
+    __ AddConstant(out_high, first_high, value_high);
+    return;
+  }
+  __ AddConstantSetFlags(out_low, first_low, value_low);
+  ShifterOperand so;
+  if (__ ShifterOperandCanHold(out_high, first_high, ADC, value_high, kCcDontCare, &so)) {
+    __ adc(out_high, first_high, so);
+  } else if (__ ShifterOperandCanHold(out_low, first_low, SBC, ~value_high, kCcDontCare, &so)) {
+    __ sbc(out_high, first_high, so);
+  } else {
+    LOG(FATAL) << "Unexpected constant " << value_high;
+    UNREACHABLE();
+  }
+}
+
 void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Location first = locations->InAt(0);
@@ -5155,26 +6294,381 @@
   }
 }
 
+void InstructionCodeGeneratorARM::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                                                   Location out,
+                                                                   uint32_t offset,
+                                                                   Location maybe_temp) {
+  Register out_reg = out.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `maybe_temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ Mov(maybe_temp.AsRegister<Register>(), out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARM::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                                                    Location out,
+                                                                    Location obj,
+                                                                    uint32_t offset,
+                                                                    Location maybe_temp) {
+  Register out_reg = out.AsRegister<Register>();
+  Register obj_reg = obj.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                          Location root,
+                                                          Register obj,
+                                                          uint32_t offset) {
+  Register root_reg = root.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   if (Thread::Current()->GetIsGcMarking()) {
+      //     root = ReadBarrier::Mark(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path used to mark the GC root `root`.
+      SlowPathCode* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root);
+      codegen_->AddSlowPath(slow_path);
+
+      // IP = Thread::Current()->GetIsGcMarking()
+      __ LoadFromOffset(
+          kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value());
+      __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      __ AddConstant(root_reg, obj, offset);
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
+  }
+}
+
+void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                             Location ref,
+                                                             Register obj,
+                                                             uint32_t offset,
+                                                             Location temp,
+                                                             bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Location no_index = Location::NoLocation();
+  ScaleFactor no_scale_factor = TIMES_1;
+  GenerateReferenceLoadWithBakerReadBarrier(
+      instruction, ref, obj, offset, no_index, no_scale_factor, temp, needs_null_check);
+}
+
+void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                             Location ref,
+                                                             Register obj,
+                                                             uint32_t data_offset,
+                                                             Location index,
+                                                             Location temp,
+                                                             bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  ScaleFactor scale_factor = TIMES_4;
+  GenerateReferenceLoadWithBakerReadBarrier(
+      instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check);
+}
+
+void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                 Location ref,
+                                                                 Register obj,
+                                                                 uint32_t offset,
+                                                                 Location index,
+                                                                 ScaleFactor scale_factor,
+                                                                 Location temp,
+                                                                 bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as it performs additional checks that we do
+  // not do here for performance reasons.
+
+  Register ref_reg = ref.AsRegister<Register>();
+  Register temp_reg = temp.AsRegister<Register>();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+
+  // Introduce a dependency on the lock_word including the rb_state,
+  // which shall prevent load-load reordering without using
+  // a memory barrier (which would be more expensive).
+  // `obj` is unchanged by this operation, but its value now depends
+  // on `temp_reg`.
+  __ add(obj, obj, ShifterOperand(temp_reg, LSR, 32));
+
+  // The actual reference load.
+  if (index.IsValid()) {
+    // Load types involving an "index": ArrayGet and
+    // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+    // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+    if (index.IsConstant()) {
+      size_t computed_offset =
+          (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset;
+      __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
+    } else {
+      // Handle the special case of the
+      // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics, which use
+      // a register pair as index ("long offset"), of which only the low
+      // part contains data.
+      Register index_reg = index.IsRegisterPair()
+          ? index.AsRegisterPairLow<Register>()
+          : index.AsRegister<Register>();
+      __ add(IP, obj, ShifterOperand(index_reg, LSL, scale_factor));
+      __ LoadFromOffset(kLoadWord, ref_reg, IP, offset);
+    }
+  } else {
+    // /* HeapReference<Object> */ ref = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, ref_reg, obj, offset);
+  }
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  __ MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path used to mark the object `ref` when it is gray.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref);
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  // Given the numeric representation, it's enough to check the low bit of the
+  // rb_state. We do that by shifting the bit out of the lock word with LSRS
+  // which can be a 16-bit instruction unlike the TST immediate.
+  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1);
+  __ b(slow_path->GetEntryLabel(), CS);  // Carry flag is the last bit shifted out by LSRS.
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM::GenerateReadBarrierSlow(HInstruction* instruction,
+                                               Location out,
+                                               Location ref,
+                                               Location obj,
+                                               uint32_t offset,
+                                               Location index) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Insert a slow path based read barrier *after* the reference load.
+  //
+  // If heap poisoning is enabled, the unpoisoning of the loaded
+  // reference will be carried out by the runtime within the slow
+  // path.
+  //
+  // Note that `ref` currently does not get unpoisoned (when heap
+  // poisoning is enabled), which is alright as the `ref` argument is
+  // not used by the artReadBarrierSlow entry point.
+  //
+  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+  SlowPathCode* slow_path = new (GetGraph()->GetArena())
+      ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index);
+  AddSlowPath(slow_path);
+
+  __ b(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                    Location out,
+                                                    Location ref,
+                                                    Location obj,
+                                                    uint32_t offset,
+                                                    Location index) {
+  if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    __ UnpoisonHeapReference(out.AsRegister<Register>());
+  }
+}
+
+void CodeGeneratorARM::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                      Location out,
+                                                      Location root) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  __ b(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method) {
+  HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
+  // We disable pc-relative load when there is an irreducible loop, as the optimization
+  // is incompatible with it.
+  // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods
+  // with irreducible loops.
+  if (GetGraph()->HasIrreducibleLoops() &&
+      (dispatch_info.method_load_kind ==
+          HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative)) {
+    dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
+  }
+
+  if (dispatch_info.code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) {
+    const DexFile& outer_dex_file = GetGraph()->GetDexFile();
+    if (&outer_dex_file != target_method.dex_file) {
+      // Calls across dex files are more likely to exceed the available BL range,
+      // so use absolute patch with fixup if available and kCallArtMethod otherwise.
+      HInvokeStaticOrDirect::CodePtrLocation code_ptr_location =
+          (desired_dispatch_info.method_load_kind ==
+           HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup)
+          ? HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup
+          : HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+      return HInvokeStaticOrDirect::DispatchInfo {
+        dispatch_info.method_load_kind,
+        code_ptr_location,
+        dispatch_info.method_load_data,
+        0u
+      };
+    }
+  }
+  return dispatch_info;
+}
+
+Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
+                                                                 Register temp) {
+  DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+  Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+  if (!invoke->GetLocations()->Intrinsified()) {
+    return location.AsRegister<Register>();
+  }
+  // For intrinsics we allow any location, so it may be on the stack.
+  if (!location.IsRegister()) {
+    __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex());
+    return temp;
+  }
+  // For register locations, check if the register was saved. If so, get it from the stack.
+  // Note: There is a chance that the register was saved but not overwritten, so we could
+  // save one load. However, since this is just an intrinsic slow path we prefer this
+  // simple and more robust approach rather that trying to determine if that's the case.
+  SlowPathCode* slow_path = GetCurrentSlowPath();
+  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
+  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+    int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+    __ LoadFromOffset(kLoadWord, temp, SP, stack_offset);
+    return temp;
+  }
+  return location.AsRegister<Register>();
+}
+
 void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   // For better instruction scheduling we load the direct code pointer before the method pointer.
-  bool direct_code_loaded = false;
   switch (invoke->GetCodePtrLocation()) {
-    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
-      if (IsSameDexFile(*invoke->GetTargetMethod().dex_file, GetGraph()->GetDexFile())) {
-        break;
-      }
-      // Calls across dex files are more likely to exceed the available BL range,
-      // so use absolute patch by falling through to kDirectCodeFixup.
-      FALLTHROUGH_INTENDED;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
       // LR = code address from literal pool with link-time patch.
       __ LoadLiteral(LR, DeduplicateMethodCodeLiteral(invoke->GetTargetMethod()));
-      direct_code_loaded = true;
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
       // LR = invoke->GetDirectCodePtr();
       __ LoadImmediate(LR, invoke->GetDirectCodePtr());
-      direct_code_loaded = true;
       break;
     default:
       break;
@@ -5187,7 +6681,7 @@
       __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, invoke->GetStringInitOffset());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress());
@@ -5196,11 +6690,17 @@
       __ LoadLiteral(temp.AsRegister<Register>(),
                      DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
-      FALLTHROUGH_INTENDED;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+      HArmDexCacheArraysBase* base =
+          invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase();
+      Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+                                                                temp.AsRegister<Register>());
+      int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset();
+      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
       Register reg = temp.AsRegister<Register>();
       if (current_method.IsRegister()) {
@@ -5211,12 +6711,14 @@
         method_reg = reg;
         __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset);
       }
-      // temp = current_method->dex_cache_resolved_methods_;
-      __ LoadFromOffset(
-          kLoadWord, reg, method_reg, ArtMethod::DexCacheResolvedMethodsOffset(
-              kArmPointerSize).Int32Value());
-      // temp = temp[index_in_cache]
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
+      __ LoadFromOffset(kLoadWord,
+                        reg,
+                        method_reg,
+                        ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value());
+      // temp = temp[index_in_cache];
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
       __ LoadFromOffset(kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache));
       break;
     }
@@ -5227,20 +6729,14 @@
       __ bl(GetFrameEntryLabel());
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
-      if (!direct_code_loaded) {
-        relative_call_patches_.emplace_back(invoke->GetTargetMethod());
-        __ Bind(&relative_call_patches_.back().label);
-        Label label;
-        __ bl(&label);  // Arbitrarily branch to the instruction after BL, override at link time.
-        __ Bind(&label);
-        break;
-      }
-      // If we loaded the direct code above, fall through.
-      FALLTHROUGH_INTENDED;
+      relative_call_patches_.emplace_back(invoke->GetTargetMethod());
+      __ BindTrackedLabel(&relative_call_patches_.back().label);
+      // Arbitrarily branch to the BL itself, override at link time.
+      __ bl(&relative_call_patches_.back().label);
+      break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
       // LR prepared above for better instruction scheduling.
-      DCHECK(direct_code_loaded);
       // LR()
       __ blx(LR);
       break;
@@ -5248,7 +6744,7 @@
       // LR = callee_method->entry_point_from_quick_compiled_code_
       __ LoadFromOffset(
           kLoadWord, LR, callee_method.AsRegister<Register>(),
-          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value());
+          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
       // LR()
       __ blx(LR);
       break;
@@ -5261,17 +6757,28 @@
   Register temp = temp_location.AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  // temp = object->GetClass();
-  DCHECK(receiver.IsRegister());
-  __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
+  // /* HeapReference<Class> */ temp = receiver->klass_
+  __ LoadFromOffset(kLoadWord, temp, receiver, class_offset);
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-      kArmWordSize).Int32Value();
+      kArmPointerSize).Int32Value();
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // LR = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadWord, LR, temp, entry_point);
@@ -5279,9 +6786,63 @@
   __ blx(LR);
 }
 
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatch(
+    const DexFile& dex_file, uint32_t string_index) {
+  return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_);
+}
+
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeTypePatch(
+    const DexFile& dex_file, uint32_t type_index) {
+  return NewPcRelativePatch(dex_file, type_index, &pc_relative_type_patches_);
+}
+
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeDexCacheArrayPatch(
+    const DexFile& dex_file, uint32_t element_offset) {
+  return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
+}
+
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativePatch(
+    const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) {
+  patches->emplace_back(dex_file, offset_or_index);
+  return &patches->back();
+}
+
+Literal* CodeGeneratorARM::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+                                                             uint32_t string_index) {
+  return boot_image_string_patches_.GetOrCreate(
+      StringReference(&dex_file, string_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorARM::DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
+                                                           uint32_t type_index) {
+  return boot_image_type_patches_.GetOrCreate(
+      TypeReference(&dex_file, type_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorARM::DeduplicateBootImageAddressLiteral(uint32_t address) {
+  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
+  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+}
+
+Literal* CodeGeneratorARM::DeduplicateDexCacheAddressLiteral(uint32_t address) {
+  return DeduplicateUint32Literal(address, &uint32_literals_);
+}
+
 void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
-  size_t size = method_patches_.size() + call_patches_.size() + relative_call_patches_.size();
+  size_t size =
+      method_patches_.size() +
+      call_patches_.size() +
+      relative_call_patches_.size() +
+      /* MOVW+MOVT for each base */ 2u * pc_relative_dex_cache_patches_.size() +
+      boot_image_string_patches_.size() +
+      /* MOVW+MOVT for each base */ 2u * pc_relative_string_patches_.size() +
+      boot_image_type_patches_.size() +
+      /* MOVW+MOVT for each base */ 2u * pc_relative_type_patches_.size() +
+      boot_image_address_patches_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
     const MethodReference& target_method = entry.first;
@@ -5307,19 +6868,104 @@
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
+  for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    size_t base_element_offset = info.offset_or_index;
+    DCHECK(info.add_pc_label.IsBound());
+    uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position());
+    // Add MOVW patch.
+    DCHECK(info.movw_label.IsBound());
+    uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position());
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movw_offset,
+                                                              &dex_file,
+                                                              add_pc_offset,
+                                                              base_element_offset));
+    // Add MOVT patch.
+    DCHECK(info.movt_label.IsBound());
+    uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position());
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movt_offset,
+                                                              &dex_file,
+                                                              add_pc_offset,
+                                                              base_element_offset));
+  }
+  for (const auto& entry : boot_image_string_patches_) {
+    const StringReference& target_string = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = literal->GetLabel()->Position();
+    linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
+                                                       target_string.dex_file,
+                                                       target_string.string_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_string_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    uint32_t string_index = info.offset_or_index;
+    DCHECK(info.add_pc_label.IsBound());
+    uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position());
+    // Add MOVW patch.
+    DCHECK(info.movw_label.IsBound());
+    uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position());
+    linker_patches->push_back(LinkerPatch::RelativeStringPatch(movw_offset,
+                                                               &dex_file,
+                                                               add_pc_offset,
+                                                               string_index));
+    // Add MOVT patch.
+    DCHECK(info.movt_label.IsBound());
+    uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position());
+    linker_patches->push_back(LinkerPatch::RelativeStringPatch(movt_offset,
+                                                               &dex_file,
+                                                               add_pc_offset,
+                                                               string_index));
+  }
+  for (const auto& entry : boot_image_type_patches_) {
+    const TypeReference& target_type = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = literal->GetLabel()->Position();
+    linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
+                                                     target_type.dex_file,
+                                                     target_type.type_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_type_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    uint32_t type_index = info.offset_or_index;
+    DCHECK(info.add_pc_label.IsBound());
+    uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position());
+    // Add MOVW patch.
+    DCHECK(info.movw_label.IsBound());
+    uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position());
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(movw_offset,
+                                                             &dex_file,
+                                                             add_pc_offset,
+                                                             type_index));
+    // Add MOVT patch.
+    DCHECK(info.movt_label.IsBound());
+    uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position());
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(movt_offset,
+                                                             &dex_file,
+                                                             add_pc_offset,
+                                                             type_index));
+  }
+  for (const auto& entry : boot_image_address_patches_) {
+    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = literal->GetLabel()->Position();
+    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
+  }
+}
+
+Literal* CodeGeneratorARM::DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map) {
+  return map->GetOrCreate(
+      value,
+      [this, value]() { return __ NewLiteral<uint32_t>(value); });
 }
 
 Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_method,
                                                     MethodToLiteralMap* map) {
-  // Look up the literal for target_method.
-  auto lb = map->lower_bound(target_method);
-  if (lb != map->end() && !map->key_comp()(target_method, lb->first)) {
-    return lb->second;
-  }
-  // We don't have a literal for this method yet, insert a new one.
-  Literal* literal = __ NewLiteral<uint32_t>(0u);
-  map->PutBefore(lb, target_method, literal);
-  return literal;
+  return map->GetOrCreate(
+      target_method,
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
 }
 
 Literal* CodeGeneratorARM::DeduplicateMethodAddressLiteral(MethodReference target_method) {
@@ -5330,6 +6976,33 @@
   return DeduplicateMethodLiteral(target_method, &call_patches_);
 }
 
+void LocationsBuilderARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
+  locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
+                     Location::RequiresRegister());
+  locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+  locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+  LocationSummary* locations = instr->GetLocations();
+  Register res = locations->Out().AsRegister<Register>();
+  Register accumulator =
+      locations->InAt(HMultiplyAccumulate::kInputAccumulatorIndex).AsRegister<Register>();
+  Register mul_left =
+      locations->InAt(HMultiplyAccumulate::kInputMulLeftIndex).AsRegister<Register>();
+  Register mul_right =
+      locations->InAt(HMultiplyAccumulate::kInputMulRightIndex).AsRegister<Register>();
+
+  if (instr->GetOpKind() == HInstruction::kAdd) {
+    __ mla(res, mul_left, mul_right, accumulator);
+  } else {
+    __ mls(res, mul_left, mul_right, accumulator);
+  }
+}
+
 void LocationsBuilderARM::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
@@ -5340,48 +7013,110 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void LocationsBuilderARM::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorARM::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
-}
-
 // Simple implementation of packed switch - generate cascaded compare/jumps.
 void LocationsBuilderARM::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
+  if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
+      codegen_->GetAssembler()->IsThumb()) {
+    locations->AddTemp(Location::RequiresRegister());  // We need a temp for the table base.
+    if (switch_instr->GetStartValue() != 0) {
+      locations->AddTemp(Location::RequiresRegister());  // We need a temp for the bias.
+    }
+  }
 }
 
 void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
+  uint32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
   Register value_reg = locations->InAt(0).AsRegister<Register>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  // Create a series of compare/jumps.
-  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int32_t i = 0; i < num_entries; i++) {
-    GenerateCompareWithImmediate(value_reg, lower_bound + i);
-    __ b(codegen_->GetLabelOf(successors[i]), EQ);
-  }
+  if (num_entries <= kPackedSwitchCompareJumpThreshold || !codegen_->GetAssembler()->IsThumb()) {
+    // Create a series of compare/jumps.
+    Register temp_reg = IP;
+    // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
+    // the immediate, because IP is used as the destination register. For the other
+    // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
+    // and they can be encoded in the instruction without making use of IP register.
+    __ AddConstantSetFlags(temp_reg, value_reg, -lower_bound);
 
-  // And the default for any other value.
-  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-    __ b(codegen_->GetLabelOf(default_block));
+    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+    // Jump to successors[0] if value == lower_bound.
+    __ b(codegen_->GetLabelOf(successors[0]), EQ);
+    int32_t last_index = 0;
+    for (; num_entries - last_index > 2; last_index += 2) {
+      __ AddConstantSetFlags(temp_reg, temp_reg, -2);
+      // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+      __ b(codegen_->GetLabelOf(successors[last_index + 1]), LO);
+      // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+      __ b(codegen_->GetLabelOf(successors[last_index + 2]), EQ);
+    }
+    if (num_entries - last_index == 2) {
+      // The last missing case_value.
+      __ CmpConstant(temp_reg, 1);
+      __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ);
+    }
+
+    // And the default for any other value.
+    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+      __ b(codegen_->GetLabelOf(default_block));
+    }
+  } else {
+    // Create a table lookup.
+    Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
+
+    // Materialize a pointer to the switch table
+    std::vector<Label*> labels(num_entries);
+    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+    for (uint32_t i = 0; i < num_entries; i++) {
+      labels[i] = codegen_->GetLabelOf(successors[i]);
+    }
+    JumpTable* table = __ CreateJumpTable(std::move(labels), temp_reg);
+
+    // Remove the bias.
+    Register key_reg;
+    if (lower_bound != 0) {
+      key_reg = locations->GetTemp(1).AsRegister<Register>();
+      __ AddConstant(key_reg, value_reg, -lower_bound);
+    } else {
+      key_reg = value_reg;
+    }
+
+    // Check whether the value is in the table, jump to default block if not.
+    __ CmpConstant(key_reg, num_entries - 1);
+    __ b(codegen_->GetLabelOf(default_block), Condition::HI);
+
+    // Load the displacement from the table.
+    __ ldr(temp_reg, Address(temp_reg, key_reg, Shift::LSL, 2));
+
+    // Dispatch is a direct add to the PC (for Thumb2).
+    __ EmitJumpTableDispatch(table, temp_reg);
   }
 }
 
+void LocationsBuilderARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+  Register base_reg = base->GetLocations()->Out().AsRegister<Register>();
+  CodeGeneratorARM::PcRelativePatchInfo* labels =
+      codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset());
+  __ BindTrackedLabel(&labels->movw_label);
+  __ movw(base_reg, /* placeholder */ 0u);
+  __ BindTrackedLabel(&labels->movt_label);
+  __ movt(base_reg, /* placeholder */ 0u);
+  __ BindTrackedLabel(&labels->add_pc_label);
+  __ add(base_reg, base_reg, ShifterOperand(PC));
+}
+
 void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   if (!trg.IsValid()) {
-    DCHECK(type == Primitive::kPrimVoid);
+    DCHECK_EQ(type, Primitive::kPrimVoid);
     return;
   }
 
@@ -5412,6 +7147,36 @@
   }
 }
 
+void LocationsBuilderARM::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kArmPointerSize).SizeValue();
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->InAt(0).AsRegister<Register>(),
+                      method_offset);
+  } else {
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex(), kArmPointerSize));
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->InAt(0).AsRegister<Register>(),
+                      mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->Out().AsRegister<Register>(),
+                      method_offset);
+  }
+}
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 6900933..5d9b2dc 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -17,12 +17,14 @@
 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_H_
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_H_
 
+#include "base/enums.h"
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
+#include "string_reference.h"
 #include "parallel_move_resolver.h"
 #include "utils/arm/assembler_thumb2.h"
+#include "utils/type_reference.h"
 
 namespace art {
 namespace arm {
@@ -30,7 +32,7 @@
 class CodeGeneratorARM;
 
 // Use a local definition to prevent copying mistakes.
-static constexpr size_t kArmWordSize = kArmPointerSize;
+static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize);
 static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
 
 static constexpr Register kParameterCoreRegisters[] = { R1, R2, R3 };
@@ -159,6 +161,7 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -170,13 +173,17 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode);
+  void HandleCondition(HCondition* condition);
+  void HandleIntegerRotate(LocationSummary* locations);
+  void HandleLongRotate(LocationSummary* locations);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
+  Location ArithmeticZeroOrFpuRegister(HInstruction* input);
   Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode);
   bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode);
-  bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode);
+  bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare);
 
   CodeGeneratorARM* const codegen_;
   InvokeDexCallingConventionVisitorARM parameter_visitor_;
@@ -184,7 +191,7 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM);
 };
 
-class InstructionCodeGeneratorARM : public HGraphVisitor {
+class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen);
 
@@ -193,6 +200,7 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -212,31 +220,70 @@
   void GenerateAndConst(Register out, Register first, uint32_t value);
   void GenerateOrrConst(Register out, Register first, uint32_t value);
   void GenerateEorConst(Register out, Register first, uint32_t value);
+  void GenerateAddLongConst(Location out, Location first, uint64_t value);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleCondition(HCondition* condition);
+  void HandleIntegerRotate(LocationSummary* locations);
+  void HandleLongRotate(LocationSummary* locations);
   void HandleShift(HBinaryOperation* operation);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   void GenerateWideAtomicStore(Register addr, uint32_t offset,
                                Register value_lo, Register value_hi,
                                Register temp1, Register temp2,
                                HInstruction* instruction);
   void GenerateWideAtomicLoad(Register addr, uint32_t offset,
                               Register out_lo, Register out_hi);
+
   void HandleFieldSet(HInstruction* instruction,
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
-  void GenerateImplicitNullCheck(HNullCheck* instruction);
-  void GenerateExplicitNullCheck(HNullCheck* instruction);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a read barrier and
+  // shall be a register in that case; it may be an invalid location
+  // otherwise.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location maybe_temp);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location maybe_temp);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               Register obj,
+                               uint32_t offset);
   void GenerateTestAndBranch(HInstruction* instruction,
+                             size_t condition_input_index,
                              Label* true_target,
-                             Label* false_target,
-                             Label* always_true_target);
-  void GenerateCompareWithImmediate(Register left, int32_t right);
-  void GenerateCompareTestAndBranch(HIf* if_instr,
-                                    HCondition* condition,
+                             Label* false_target);
+  void GenerateCompareTestAndBranch(HCondition* condition,
                                     Label* true_target,
-                                    Label* false_target,
-                                    Label* always_true_target);
+                                    Label* false_target);
+  void GenerateVcmp(HInstruction* instruction);
   void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
   void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
@@ -262,7 +309,6 @@
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
   void Bind(HBasicBlock* block) OVERRIDE;
-  void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
   void MoveConstant(Location destination, int32_t value) OVERRIDE;
   void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
@@ -297,15 +343,11 @@
     return assembler_;
   }
 
-  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
+  uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
     return GetLabelOf(block)->Position();
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
-
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
@@ -326,6 +368,24 @@
   // Helper method to move a 64bits value between two locations.
   void Move64(Location destination, Location source);
 
+  void LoadOrStoreToOffset(Primitive::Type type,
+                           Location loc,
+                           Register base,
+                           int32_t offset,
+                           bool is_load,
+                           Condition cond = AL);
+
+  void LoadFromShiftedRegOffset(Primitive::Type type,
+                                Location out_loc,
+                                Register base,
+                                Register reg_offset,
+                                Condition cond = AL);
+  void StoreToShiftedRegOffset(Primitive::Type type,
+                               Location out_loc,
+                               Register base,
+                               Register reg_offset,
+                               Condition cond = AL);
+
   // Generate code to invoke a runtime entry point.
   void InvokeRuntime(QuickEntrypointEnum entrypoint,
                      HInstruction* instruction,
@@ -337,9 +397,17 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
   // Emit a write barrier.
   void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null);
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   Label* GetLabelOf(HBasicBlock* block) const {
     return CommonGetLabelOf<Label>(block_labels_, block);
   }
@@ -362,19 +430,158 @@
 
   Label* GetFrameEntryLabel() { return &frame_entry_label_; }
 
+  // Check if the desired_string_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadString::LoadKind GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
+  // Check if the desired_dispatch_info is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method) OVERRIDE;
+
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
+  // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
+  // and boot image strings/types. The only difference is the interpretation of the
+  // offset_or_index. The PC-relative address is loaded with three instructions,
+  // MOVW+MOVT to load the offset to base_reg and then ADD base_reg, PC. The offset
+  // is calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we
+  // currently emit these 3 instructions together, instruction scheduling could
+  // split this sequence apart, so we keep separate labels for each of them.
+  struct PcRelativePatchInfo {
+    PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx)
+        : target_dex_file(dex_file), offset_or_index(off_or_idx) { }
+    PcRelativePatchInfo(PcRelativePatchInfo&& other) = default;
+
+    const DexFile& target_dex_file;
+    // Either the dex cache array element offset or the string/type index.
+    uint32_t offset_or_index;
+    Label movw_label;
+    Label movt_label;
+    Label add_pc_label;
+  };
+
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index);
+  PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, uint32_t type_index);
+  PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
+                                                       uint32_t element_offset);
+  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index);
+  Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index);
+  Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
+  Literal* DeduplicateDexCacheAddressLiteral(uint32_t address);
+
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
- private:
-  using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             Register obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             Register obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 ScaleFactor scale_factor,
+                                                 Location temp,
+                                                 bool needs_null_check);
 
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+
+  void GenerateNop();
+
+  void GenerateImplicitNullCheck(HNullCheck* instruction);
+  void GenerateExplicitNullCheck(HNullCheck* instruction);
+
+ private:
+  Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
+
+  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
+  using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
+  using BootStringToLiteralMap = ArenaSafeMap<StringReference,
+                                              Literal*,
+                                              StringReferenceValueComparator>;
+  using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
+                                            Literal*,
+                                            TypeReferenceValueComparator>;
+
+  Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
   Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
   Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
   Literal* DeduplicateMethodCodeLiteral(MethodReference target_method);
+  PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
+                                          uint32_t offset_or_index,
+                                          ArenaDeque<PcRelativePatchInfo>* patches);
 
   // Labels for each block that will be compiled.
   Label* block_labels_;  // Indexed by block id.
@@ -385,12 +592,26 @@
   Thumb2Assembler assembler_;
   const ArmInstructionSetFeatures& isa_features_;
 
+  // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
+  Uint32ToLiteralMap uint32_literals_;
   // Method patch info, map MethodReference to a literal for method address and method code.
   MethodToLiteralMap method_patches_;
   MethodToLiteralMap call_patches_;
   // Relative call patch info.
   // Using ArenaDeque<> which retains element addresses on push/emplace_back().
   ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
+  // PC-relative patch info for each HArmDexCacheArraysBase.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
+  // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
+  BootStringToLiteralMap boot_image_string_patches_;
+  // PC-relative String patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
+  BootTypeToLiteralMap boot_image_type_patches_;
+  // PC-relative type patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
+  // Deduplication map for patchable boot image addresses.
+  Uint32ToLiteralMap boot_image_address_patches_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 1773c06..122c174 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -33,8 +33,7 @@
 #include "utils/assembler.h"
 #include "utils/stack_checks.h"
 
-
-using namespace vixl;   // NOLINT(build/namespaces)
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
 
 #ifdef __
 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
@@ -42,6 +41,9 @@
 
 namespace art {
 
+template<class MirrorType>
+class GcRoot;
+
 namespace arm64 {
 
 using helpers::CPURegisterFrom;
@@ -68,6 +70,10 @@
 using helpers::ArtVixlRegCodeCoherentForRegSet;
 
 static constexpr int kCurrentMethodStackOffset = 0;
+// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
+// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+// generates less code/data with a small num_entries.
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
 inline Condition ARM64Condition(IfCondition cond) {
   switch (cond) {
@@ -86,6 +92,24 @@
   UNREACHABLE();
 }
 
+inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
+  // The ARM64 condition codes can express all the necessary branches, see the
+  // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
+  // There is no dex instruction or HIR that would need the missing conditions
+  // "equal or unordered" or "not equal".
+  switch (cond) {
+    case kCondEQ: return eq;
+    case kCondNE: return ne /* unordered */;
+    case kCondLT: return gt_bias ? cc : lt /* unordered */;
+    case kCondLE: return gt_bias ? ls : le /* unordered */;
+    case kCondGT: return gt_bias ? hi /* unordered */ : gt;
+    case kCondGE: return gt_bias ? cs /* unordered */ : ge;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
 Location ARM64ReturnLocation(Primitive::Type return_type) {
   // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
   // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
@@ -107,8 +131,9 @@
   return ARM64ReturnLocation(return_type);
 }
 
-#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->  // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
 
 // Calculate memory accessing operand for save/restore live registers.
 static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
@@ -121,20 +146,20 @@
                                          codegen->GetNumberOfFloatingPointRegisters()));
 
   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize,
-      register_set->GetCoreRegisters() & (~callee_saved_core_registers.list()));
+      register_set->GetCoreRegisters() & (~callee_saved_core_registers.GetList()));
   CPURegList fp_list = CPURegList(CPURegister::kFPRegister, kDRegSize,
-      register_set->GetFloatingPointRegisters() & (~callee_saved_fp_registers.list()));
+      register_set->GetFloatingPointRegisters() & (~callee_saved_fp_registers.GetList()));
 
   MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
 
   Register base = masm->StackPointer();
-  int64_t core_spill_size = core_list.TotalSizeInBytes();
-  int64_t fp_spill_size = fp_list.TotalSizeInBytes();
+  int64_t core_spill_size = core_list.GetTotalSizeInBytes();
+  int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
   int64_t reg_size = kXRegSizeInBytes;
   int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
   uint32_t ls_access_size = WhichPowerOf2(reg_size);
-  if (((core_list.Count() > 1) || (fp_list.Count() > 1)) &&
+  if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
       !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
     // If the offset does not fit in the instruction's immediate field, use an alternate register
     // to compute the base address(float point registers spill base address).
@@ -194,7 +219,7 @@
 
 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : instruction_(instruction) {}
+  explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -211,8 +236,11 @@
     codegen->EmitParallelMoves(
         locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt,
         locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt);
-    arm64_codegen->InvokeRuntime(
-        QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    arm64_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
@@ -221,14 +249,12 @@
   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; }
 
  private:
-  HBoundsCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
 };
 
 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : instruction_(instruction) {}
+  explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
@@ -247,7 +273,6 @@
   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; }
 
  private:
-  HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
 };
 
@@ -257,7 +282,7 @@
                          HInstruction* at,
                          uint32_t dex_pc,
                          bool do_clinit)
-      : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCodeARM64(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
@@ -312,7 +337,7 @@
 
 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit LoadStringSlowPathARM64(HLoadString* instruction) : instruction_(instruction) {}
+  explicit LoadStringSlowPathARM64(HLoadString* instruction) : SlowPathCodeARM64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -323,7 +348,8 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ Mov(calling_convention.GetRegisterAt(0).W(), instruction_->GetStringIndex());
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    __ Mov(calling_convention.GetRegisterAt(0).W(), string_index);
     arm64_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -337,14 +363,12 @@
   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; }
 
  private:
-  HLoadString* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
 };
 
 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit NullCheckSlowPathARM64(HNullCheck* instr) : instruction_(instr) {}
+  explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
@@ -363,24 +387,20 @@
   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; }
 
  private:
-  HNullCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
 };
 
 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
   SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
-      : instruction_(instruction), successor_(successor) {}
+      : SlowPathCodeARM64(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     arm64_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ B(GetReturnLabel());
     } else {
@@ -388,7 +408,7 @@
     }
   }
 
-  vixl::Label* GetReturnLabel() {
+  vixl::aarch64::Label* GetReturnLabel() {
     DCHECK(successor_ == nullptr);
     return &return_label_;
   }
@@ -400,12 +420,11 @@
   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; }
 
  private:
-  HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
   HBasicBlock* const successor_;
 
   // If `successor_` is null, the label to branch to after the suspend check.
-  vixl::Label return_label_;
+  vixl::aarch64::Label return_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
 };
@@ -413,7 +432,7 @@
 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
   TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
-      : instruction_(instruction), is_fatal_(is_fatal) {}
+      : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -427,15 +446,6 @@
 
     __ Bind(GetEntryLabel());
 
-    if (instruction_->IsCheckCast()) {
-      // The codegen for the instruction overwrites `temp`, so put it back in place.
-      Register obj = InputRegisterAt(instruction_, 0);
-      Register temp = WRegisterFrom(locations->GetTemp(0));
-      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-      __ Ldr(temp, HeapOperand(obj, class_offset));
-      arm64_codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
-    }
-
     if (!is_fatal_) {
       SaveLiveRegisters(codegen, locations);
     }
@@ -450,11 +460,11 @@
     if (instruction_->IsInstanceOf()) {
       arm64_codegen->InvokeRuntime(
           QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t,
+                           const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t,
-                           const mirror::Class*, const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this);
@@ -471,7 +481,6 @@
   bool IsFatal() const { return is_fatal_; }
 
  private:
-  HInstruction* const instruction_;
   const bool is_fatal_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
@@ -479,29 +488,29 @@
 
 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit DeoptimizationSlowPathARM64(HInstruction* instruction)
-      : instruction_(instruction) {}
+  explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
+      : SlowPathCodeARM64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
-    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
 
  private:
-  HInstruction* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
 };
 
 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit ArraySetSlowPathARM64(HInstruction* instruction) : instruction_(instruction) {}
+  explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -540,11 +549,335 @@
   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; }
 
  private:
-  HInstruction* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
 };
 
+void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
+  uint32_t num_entries = switch_instr_->GetNumEntries();
+  DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
+
+  // We are about to use the assembler to place literals directly. Make sure we have enough
+  // underlying code buffer and we have generated the jump table with right size.
+  CodeBufferCheckScope scope(codegen->GetVIXLAssembler(), num_entries * sizeof(int32_t),
+                             CodeBufferCheckScope::kCheck, CodeBufferCheckScope::kExactSize);
+
+  __ Bind(&table_start_);
+  const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
+  for (uint32_t i = 0; i < num_entries; i++) {
+    vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
+    DCHECK(target_label->IsBound());
+    ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
+    DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
+    DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
+    Literal<int32_t> literal(jump_offset);
+    __ place(&literal);
+  }
+}
+
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location obj)
+      : SlowPathCodeARM64(instruction), obj_(obj) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(obj_.reg()));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    DCHECK_NE(obj_.reg(), LR);
+    DCHECK_NE(obj_.reg(), WSP);
+    DCHECK_NE(obj_.reg(), WZR);
+    // IP0 is used internally by the ReadBarrierMarkRegX entry point
+    // as a temporary, it cannot be the entry point's input/output.
+    DCHECK_NE(obj_.reg(), IP0);
+    DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg();
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in W0):
+    //
+    //   W0 <- obj
+    //   W0 <- ReadBarrierMark(W0)
+    //   obj <- W0
+    //
+    // we just use rX (the register holding `obj`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(obj_.reg());
+    // This runtime call does not require a stack map.
+    arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    __ B(GetExitLabel());
+  }
+
+ private:
+  const Location obj_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
+};
+
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
+                                           Location out,
+                                           Location ref,
+                                           Location obj,
+                                           uint32_t offset,
+                                           Location index)
+      : SlowPathCodeARM64(instruction),
+        out_(out),
+        ref_(ref),
+        obj_(obj),
+        offset_(offset),
+        index_(index) {
+    DCHECK(kEmitCompilerReadBarrier);
+    // If `obj` is equal to `out` or `ref`, it means the initial object
+    // has been overwritten by (or after) the heap object reference load
+    // to be instrumented, e.g.:
+    //
+    //   __ Ldr(out, HeapOperand(out, class_offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
+    //
+    // In that case, we have lost the information about the original
+    // object, and the emitted read barrier cannot work properly.
+    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    Primitive::Type type = Primitive::kPrimNot;
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
+    // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
+
+    __ Bind(GetEntryLabel());
+
+    SaveLiveRegisters(codegen, locations);
+
+    // We may have to change the index's value, but as `index_` is a
+    // constant member (like other "inputs" of this slow path),
+    // introduce a copy of it, `index`.
+    Location index = index_;
+    if (index_.IsValid()) {
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+      if (instruction_->IsArrayGet()) {
+        // Compute the actual memory offset and store it in `index`.
+        Register index_reg = RegisterFrom(index_, Primitive::kPrimInt);
+        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
+        if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
+          // We are about to change the value of `index_reg` (see the
+          // calls to vixl::MacroAssembler::Lsl and
+          // vixl::MacroAssembler::Mov below), but it has
+          // not been saved by the previous call to
+          // art::SlowPathCode::SaveLiveRegisters, as it is a
+          // callee-save register --
+          // art::SlowPathCode::SaveLiveRegisters does not consider
+          // callee-save registers, as it has been designed with the
+          // assumption that callee-save registers are supposed to be
+          // handled by the called function.  So, as a callee-save
+          // register, `index_reg` _would_ eventually be saved onto
+          // the stack, but it would be too late: we would have
+          // changed its value earlier.  Therefore, we manually save
+          // it here into another freely available register,
+          // `free_reg`, chosen of course among the caller-save
+          // registers (as a callee-save `free_reg` register would
+          // exhibit the same problem).
+          //
+          // Note we could have requested a temporary register from
+          // the register allocator instead; but we prefer not to, as
+          // this is a slow path, and we know we can find a
+          // caller-save register that is available.
+          Register free_reg = FindAvailableCallerSaveRegister(codegen);
+          __ Mov(free_reg.W(), index_reg);
+          index_reg = free_reg;
+          index = LocationFrom(index_reg);
+        } else {
+          // The initial register stored in `index_` has already been
+          // saved in the call to art::SlowPathCode::SaveLiveRegisters
+          // (as it is not a callee-save register), so we can freely
+          // use it.
+        }
+        // Shifting the index value contained in `index_reg` by the scale
+        // factor (2) cannot overflow in practice, as the runtime is
+        // unable to allocate object arrays with a size larger than
+        // 2^26 - 1 (that is, 2^28 - 4 bytes).
+        __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type));
+        static_assert(
+            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+        __ Add(index_reg, index_reg, Operand(offset_));
+      } else {
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
+        DCHECK(instruction_->GetLocations()->Intrinsified());
+        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+            << instruction_->AsInvoke()->GetIntrinsic();
+        DCHECK_EQ(offset_, 0U);
+        DCHECK(index_.IsRegister());
+      }
+    }
+
+    // We're moving two or three locations to locations that could
+    // overlap, so we need a parallel move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(ref_,
+                          LocationFrom(calling_convention.GetRegisterAt(0)),
+                          type,
+                          nullptr);
+    parallel_move.AddMove(obj_,
+                          LocationFrom(calling_convention.GetRegisterAt(1)),
+                          type,
+                          nullptr);
+    if (index.IsValid()) {
+      parallel_move.AddMove(index,
+                            LocationFrom(calling_convention.GetRegisterAt(2)),
+                            Primitive::kPrimInt,
+                            nullptr);
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+    } else {
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+      arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
+    }
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+    CheckEntrypointTypes<
+        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+    arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+    RestoreLiveRegisters(codegen, locations);
+
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
+
+ private:
+  Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+    size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
+    size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
+    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+      if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
+        return Register(VIXLRegCodeFromART(i), kXRegSize);
+      }
+    }
+    // We shall never fail to find a free caller-save register, as
+    // there are more than two core caller-save registers on ARM64
+    // (meaning it is possible to find one which is different from
+    // `ref` and `obj`).
+    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+    LOG(FATAL) << "Could not find a free register";
+    UNREACHABLE();
+  }
+
+  const Location out_;
+  const Location ref_;
+  const Location obj_;
+  const uint32_t offset_;
+  // An additional location containing an index to an array.
+  // Only used for HArrayGet and the UnsafeGetObject &
+  // UnsafeGetObjectVolatile intrinsics.
+  const Location index_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
+      : SlowPathCodeARM64(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Primitive::Type type = Primitive::kPrimNot;
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    // The argument of the ReadBarrierForRootSlow is not a managed
+    // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
+    // thus we need a 64-bit move here, and we cannot use
+    //
+    //   arm64_codegen->MoveLocation(
+    //       LocationFrom(calling_convention.GetRegisterAt(0)),
+    //       root_,
+    //       type);
+    //
+    // which would emit a 32-bit move, as `type` is a (32-bit wide)
+    // reference type (`Primitive::kPrimNot`).
+    __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; }
+
+ private:
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
+};
+
 #undef __
 
 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
@@ -582,15 +915,19 @@
                     kNumberOfAllocatableRegisters,
                     kNumberOfAllocatableFPRegisters,
                     kNumberOfAllocatableRegisterPairs,
-                    callee_saved_core_registers.list(),
-                    callee_saved_fp_registers.list(),
+                    callee_saved_core_registers.GetList(),
+                    callee_saved_fp_registers.GetList(),
                     compiler_options,
                     stats),
-      block_labels_(nullptr),
+      block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
+      assembler_(graph->GetArena()),
       isa_features_(isa_features),
+      uint32_literals_(std::less<uint32_t>(),
+                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       uint64_literals_(std::less<uint64_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       method_patches_(MethodReferenceComparator(),
@@ -598,15 +935,29 @@
       call_patches_(MethodReferenceComparator(),
                     graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_string_patches_(StringReferenceValueComparator(),
+                                 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_type_patches_(TypeReferenceValueComparator(),
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_address_patches_(std::less<uint32_t>(),
+                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Save the link register (containing the return address) to mimic Quick.
   AddAllocatedRegister(LocationFrom(lr));
 }
 
-#undef __
 #define __ GetVIXLAssembler()->
 
+void CodeGeneratorARM64::EmitJumpTables() {
+  for (auto&& jump_table : jump_tables_) {
+    jump_table->EmitTable(this);
+  }
+}
+
 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
+  EmitJumpTables();
   // Ensure we emit the literal pool.
   __ FinalizeCode();
 
@@ -715,78 +1066,23 @@
   GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
 }
 
-vixl::CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
+CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
-  return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
-                          core_spill_mask_);
+  return CPURegList(CPURegister::kRegister, kXRegSize,
+                    core_spill_mask_);
 }
 
-vixl::CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
+CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
   DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
                                          GetNumberOfFloatingPointRegisters()));
-  return vixl::CPURegList(vixl::CPURegister::kFPRegister, vixl::kDRegSize,
-                          fpu_spill_mask_);
+  return CPURegList(CPURegister::kFPRegister, kDRegSize,
+                    fpu_spill_mask_);
 }
 
 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
   __ Bind(GetLabelOf(block));
 }
 
-void CodeGeneratorARM64::Move(HInstruction* instruction,
-                              Location location,
-                              HInstruction* move_for) {
-  LocationSummary* locations = instruction->GetLocations();
-  Primitive::Type type = instruction->GetType();
-  DCHECK_NE(type, Primitive::kPrimVoid);
-
-  if (instruction->IsFakeString()) {
-    // The fake string is an alias for null.
-    DCHECK(IsBaseline());
-    instruction = locations->Out().GetConstant();
-    DCHECK(instruction->IsNullConstant()) << instruction->DebugName();
-  }
-
-  if (instruction->IsCurrentMethod()) {
-    MoveLocation(location,
-                 Location::DoubleStackSlot(kCurrentMethodStackOffset),
-                 Primitive::kPrimVoid);
-  } else if (locations != nullptr && locations->Out().Equals(location)) {
-    return;
-  } else if (instruction->IsIntConstant()
-             || instruction->IsLongConstant()
-             || instruction->IsNullConstant()) {
-    int64_t value = GetInt64ValueOf(instruction->AsConstant());
-    if (location.IsRegister()) {
-      Register dst = RegisterFrom(location, type);
-      DCHECK(((instruction->IsIntConstant() || instruction->IsNullConstant()) && dst.Is32Bits()) ||
-             (instruction->IsLongConstant() && dst.Is64Bits()));
-      __ Mov(dst, value);
-    } else {
-      DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
-      UseScratchRegisterScope temps(GetVIXLAssembler());
-      Register temp = (instruction->IsIntConstant() || instruction->IsNullConstant())
-          ? temps.AcquireW()
-          : temps.AcquireX();
-      __ Mov(temp, value);
-      __ Str(temp, StackOperandFrom(location));
-    }
-  } else if (instruction->IsTemporary()) {
-    Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    MoveLocation(location, temp_location, type);
-  } else if (instruction->IsLoadLocal()) {
-    uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
-    if (Primitive::Is64BitType(type)) {
-      MoveLocation(location, Location::DoubleStackSlot(stack_slot), type);
-    } else {
-      MoveLocation(location, Location::StackSlot(stack_slot), type);
-    }
-
-  } else {
-    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
-    MoveLocation(location, locations->Out(), type);
-  }
-}
-
 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
   DCHECK(location.IsRegister());
   __ Mov(RegisterFrom(location, Primitive::kPrimInt), value);
@@ -800,40 +1096,15 @@
   }
 }
 
-Location CodeGeneratorARM64::GetStackLocation(HLoadLocal* load) const {
-  Primitive::Type type = load->GetType();
-
-  switch (type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << type;
-  }
-
-  LOG(FATAL) << "Unreachable";
-  return Location::NoLocation();
-}
-
 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register card = temps.AcquireX();
   Register temp = temps.AcquireW();   // Index within the CardTable - 32bit.
-  vixl::Label done;
+  vixl::aarch64::Label done;
   if (value_can_be_null) {
     __ Cbz(value, &done);
   }
-  __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64WordSize>().Int32Value()));
+  __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
   __ Strb(card, MemOperand(card, temp.X()));
   if (value_can_be_null) {
@@ -841,7 +1112,7 @@
   }
 }
 
-void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorARM64::SetupBlockedRegisters() const {
   // Blocked core registers:
   //      lr        : Runtime reserved.
   //      tr        : Runtime reserved.
@@ -854,48 +1125,25 @@
   CPURegList reserved_core_registers = vixl_reserved_core_registers;
   reserved_core_registers.Combine(runtime_reserved_core_registers);
   while (!reserved_core_registers.IsEmpty()) {
-    blocked_core_registers_[reserved_core_registers.PopLowestIndex().code()] = true;
+    blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
   }
 
   CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
   while (!reserved_fp_registers.IsEmpty()) {
-    blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true;
+    blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
   }
 
-  if (is_baseline) {
-    CPURegList reserved_core_baseline_registers = callee_saved_core_registers;
-    while (!reserved_core_baseline_registers.IsEmpty()) {
-      blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true;
-    }
-  }
-
-  if (is_baseline || GetGraph()->IsDebuggable()) {
+  if (GetGraph()->IsDebuggable()) {
     // Stubs do not save callee-save floating point registers. If the graph
     // is debuggable, we need to deal with these registers differently. For
     // now, just block them.
-    CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers;
-    while (!reserved_fp_baseline_registers.IsEmpty()) {
-      blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true;
+    CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
+    while (!reserved_fp_registers_debuggable.IsEmpty()) {
+      blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
     }
   }
 }
 
-Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const {
-  if (type == Primitive::kPrimVoid) {
-    LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  if (Primitive::IsFloatingPointType(type)) {
-    ssize_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfAllocatableFPRegisters);
-    DCHECK_NE(reg, -1);
-    return Location::FpuRegisterLocation(reg);
-  } else {
-    ssize_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfAllocatableRegisters);
-    DCHECK_NE(reg, -1);
-    return Location::RegisterLocation(reg);
-  }
-}
-
 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
   __ Str(reg, MemOperand(sp, stack_index));
@@ -1035,17 +1283,21 @@
       UseScratchRegisterScope temps(GetVIXLAssembler());
       HConstant* src_cst = source.GetConstant();
       CPURegister temp;
-      if (src_cst->IsIntConstant() || src_cst->IsNullConstant()) {
-        temp = temps.AcquireW();
-      } else if (src_cst->IsLongConstant()) {
-        temp = temps.AcquireX();
-      } else if (src_cst->IsFloatConstant()) {
-        temp = temps.AcquireS();
+      if (src_cst->IsZeroBitPattern()) {
+        temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant()) ? xzr : wzr;
       } else {
-        DCHECK(src_cst->IsDoubleConstant());
-        temp = temps.AcquireD();
+        if (src_cst->IsIntConstant()) {
+          temp = temps.AcquireW();
+        } else if (src_cst->IsLongConstant()) {
+          temp = temps.AcquireX();
+        } else if (src_cst->IsFloatConstant()) {
+          temp = temps.AcquireS();
+        } else {
+          DCHECK(src_cst->IsDoubleConstant());
+          temp = temps.AcquireD();
+        }
+        MoveConstant(temp, src_cst);
       }
-      MoveConstant(temp, src_cst);
       __ Str(temp, StackOperandFrom(destination));
     } else {
       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
@@ -1090,7 +1342,8 @@
 
 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
                                      CPURegister dst,
-                                     const MemOperand& src) {
+                                     const MemOperand& src,
+                                     bool needs_null_check) {
   MacroAssembler* masm = GetVIXLAssembler();
   BlockPoolsScope block_pools(masm);
   UseScratchRegisterScope temps(masm);
@@ -1101,25 +1354,33 @@
   DCHECK(!src.IsPostIndex());
 
   // TODO(vixl): Let the MacroAssembler handle MemOperand.
-  __ Add(temp_base, src.base(), OperandFromMemOperand(src));
+  __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
   MemOperand base = MemOperand(temp_base);
   switch (type) {
     case Primitive::kPrimBoolean:
       __ Ldarb(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       break;
     case Primitive::kPrimByte:
       __ Ldarb(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
       break;
     case Primitive::kPrimChar:
       __ Ldarh(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       break;
     case Primitive::kPrimShort:
       __ Ldarh(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
       break;
     case Primitive::kPrimInt:
@@ -1127,7 +1388,9 @@
     case Primitive::kPrimLong:
       DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
       __ Ldar(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       break;
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
@@ -1136,7 +1399,9 @@
 
       Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
       __ Ldar(temp, base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       __ Fmov(FPRegister(dst), temp);
       break;
     }
@@ -1181,7 +1446,7 @@
 
   // TODO(vixl): Let the MacroAssembler handle this.
   Operand op = OperandFromMemOperand(dst);
-  __ Add(temp_base, dst.base(), op);
+  __ Add(temp_base, dst.GetBaseRegister(), op);
   MemOperand base = MemOperand(temp_base);
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -1217,7 +1482,7 @@
                                        HInstruction* instruction,
                                        uint32_t dex_pc,
                                        SlowPathCode* slow_path) {
-  InvokeRuntime(GetThreadOffset<kArm64WordSize>(entrypoint).Int32Value(),
+  InvokeRuntime(GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value(),
                 instruction,
                 dex_pc,
                 slow_path);
@@ -1234,30 +1499,31 @@
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
+void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                             HInstruction* instruction,
+                                                             SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  BlockPoolsScope block_pools(GetVIXLAssembler());
+  __ Ldr(lr, MemOperand(tr, entry_point_offset));
+  __ Blr(lr);
+}
+
 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
-                                                                     vixl::Register class_reg) {
+                                                                     Register class_reg) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register temp = temps.AcquireW();
   size_t status_offset = mirror::Class::StatusOffset().SizeValue();
-  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
-  if (use_acquire_release) {
-    // TODO(vixl): Let the MacroAssembler handle MemOperand.
-    __ Add(temp, class_reg, status_offset);
-    __ Ldar(temp, HeapOperand(temp));
-    __ Cmp(temp, mirror::Class::kStatusInitialized);
-    __ B(lt, slow_path->GetEntryLabel());
-  } else {
-    __ Ldr(temp, HeapOperand(class_reg, status_offset));
-    __ Cmp(temp, mirror::Class::kStatusInitialized);
-    __ B(lt, slow_path->GetEntryLabel());
-    __ Dmb(InnerShareable, BarrierReads);
-  }
+  // TODO(vixl): Let the MacroAssembler handle MemOperand.
+  __ Add(temp, class_reg, status_offset);
+  __ Ldar(temp, HeapOperand(temp));
+  __ Cmp(temp, mirror::Class::kStatusInitialized);
+  __ B(lt, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
-void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
+void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
   BarrierType type = BarrierAll;
 
   switch (kind) {
@@ -1299,7 +1565,7 @@
   UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
   Register temp = temps.AcquireW();
 
-  __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64WordSize>().SizeValue()));
+  __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
   if (successor == nullptr) {
     __ Cbnz(temp, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -1312,7 +1578,7 @@
 
 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
                                                              CodeGeneratorARM64* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -1368,42 +1634,74 @@
 }
 
 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) {
+  DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_field_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister());
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps for an object field get when read barriers
+    // are enabled: we do not want the load to overwrite the object's
+    // location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
 }
 
 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
                                                    const FieldInfo& field_info) {
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+  LocationSummary* locations = instruction->GetLocations();
+  Location base_loc = locations->InAt(0);
+  Location out = locations->Out();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   Primitive::Type field_type = field_info.GetFieldType();
   BlockPoolsScope block_pools(GetVIXLAssembler());
-
   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
-  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
-  if (field_info.IsVolatile()) {
-    if (use_acquire_release) {
+  if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Object FieldGet with Baker's read barrier case.
+    MacroAssembler* masm = GetVIXLAssembler();
+    UseScratchRegisterScope temps(masm);
+    // /* HeapReference<Object> */ out = *(base + offset)
+    Register base = RegisterFrom(base_loc, Primitive::kPrimNot);
+    Register temp = temps.AcquireW();
+    // Note that potential implicit null checks are handled in this
+    // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
+    codegen_->GenerateFieldLoadWithBakerReadBarrier(
+        instruction,
+        out,
+        base,
+        offset,
+        temp,
+        /* needs_null_check */ true,
+        field_info.IsVolatile());
+  } else {
+    // General case.
+    if (field_info.IsVolatile()) {
+      // Note that a potential implicit null check is handled in this
+      // CodeGeneratorARM64::LoadAcquire call.
       // NB: LoadAcquire will record the pc info if needed.
-      codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
+      codegen_->LoadAcquire(
+          instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
     } else {
       codegen_->Load(field_type, OutputCPURegister(instruction), field);
       codegen_->MaybeRecordImplicitNullCheck(instruction);
-      // For IRIW sequential consistency kLoadAny is not sufficient.
-      GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
     }
-  } else {
-    codegen_->Load(field_type, OutputCPURegister(instruction), field);
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
-  }
-
-  if (field_type == Primitive::kPrimNot) {
-    GetAssembler()->MaybeUnpoisonHeapReference(OutputCPURegister(instruction).W());
+    if (field_type == Primitive::kPrimNot) {
+      // If read barriers are enabled, emit read barriers other than
+      // Baker's using a slow path (and also unpoison the loaded
+      // reference, if heap poisoning is enabled).
+      codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+    }
   }
 }
 
@@ -1429,7 +1727,6 @@
   CPURegister source = value;
   Offset offset = field_info.GetFieldOffset();
   Primitive::Type field_type = field_info.GetFieldType();
-  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   {
     // We use a block to end the scratch scope before the write barrier, thus
@@ -1445,15 +1742,8 @@
     }
 
     if (field_info.IsVolatile()) {
-      if (use_acquire_release) {
-        codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-      } else {
-        GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
-        codegen_->Store(field_type, source, HeapOperand(obj, offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
-      }
+      codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
     } else {
       codegen_->Store(field_type, source, HeapOperand(obj, offset));
       codegen_->MaybeRecordImplicitNullCheck(instruction);
@@ -1482,6 +1772,17 @@
         __ Orr(dst, lhs, rhs);
       } else if (instr->IsSub()) {
         __ Sub(dst, lhs, rhs);
+      } else if (instr->IsRor()) {
+        if (rhs.IsImmediate()) {
+          uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
+          __ Ror(dst, lhs, shift);
+        } else {
+          // Ensure shift distance is in the same size register as the result. If
+          // we are rotating a long and the shift comes in a w register originally,
+          // we don't need to sxtw for use as an x since the shift distances are
+          // all & reg_bits - 1.
+          __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
+        }
       } else {
         DCHECK(instr->IsXor());
         __ Eor(dst, lhs, rhs);
@@ -1536,9 +1837,8 @@
       Register lhs = InputRegisterAt(instr, 0);
       Operand rhs = InputOperandAt(instr, 1);
       if (rhs.IsImmediate()) {
-        uint32_t shift_value = (type == Primitive::kPrimInt)
-          ? static_cast<uint32_t>(rhs.immediate() & kMaxIntShiftValue)
-          : static_cast<uint32_t>(rhs.immediate() & kMaxLongShiftValue);
+        uint32_t shift_value = rhs.GetImmediate() &
+            (type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance);
         if (instr->IsShl()) {
           __ Lsl(dst, lhs, shift_value);
         } else if (instr->IsShr()) {
@@ -1547,7 +1847,7 @@
           __ Lsr(dst, lhs, shift_value);
         }
       } else {
-        Register rhs_reg = dst.IsX() ? rhs.reg().X() : rhs.reg().W();
+        Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
 
         if (instr->IsShl()) {
           __ Lsl(dst, lhs, rhs_reg);
@@ -1580,7 +1880,113 @@
   HandleBinaryOp(instruction);
 }
 
-void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) {
+void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
+  DCHECK(Primitive::IsIntegralType(instr->GetType())) << instr->GetType();
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+  locations->SetInAt(0, Location::RequiresRegister());
+  // There is no immediate variant of negated bitwise instructions in AArch64.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
+  Register dst = OutputRegister(instr);
+  Register lhs = InputRegisterAt(instr, 0);
+  Register rhs = InputRegisterAt(instr, 1);
+
+  switch (instr->GetOpKind()) {
+    case HInstruction::kAnd:
+      __ Bic(dst, lhs, rhs);
+      break;
+    case HInstruction::kOr:
+      __ Orn(dst, lhs, rhs);
+      break;
+    case HInstruction::kXor:
+      __ Eon(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unreachable";
+  }
+}
+
+void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp(
+    HArm64DataProcWithShifterOp* instruction) {
+  DCHECK(instruction->GetType() == Primitive::kPrimInt ||
+         instruction->GetType() == Primitive::kPrimLong);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  if (instruction->GetInstrKind() == HInstruction::kNeg) {
+    locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp(
+    HArm64DataProcWithShifterOp* instruction) {
+  Primitive::Type type = instruction->GetType();
+  HInstruction::InstructionKind kind = instruction->GetInstrKind();
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+  Register out = OutputRegister(instruction);
+  Register left;
+  if (kind != HInstruction::kNeg) {
+    left = InputRegisterAt(instruction, 0);
+  }
+  // If this `HArm64DataProcWithShifterOp` was created by merging a type conversion as the
+  // shifter operand operation, the IR generating `right_reg` (input to the type
+  // conversion) can have a different type from the current instruction's type,
+  // so we manually indicate the type.
+  Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
+  int64_t shift_amount = instruction->GetShiftAmount() &
+      (type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance);
+
+  Operand right_operand(0);
+
+  HArm64DataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
+  if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind)) {
+    right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
+  } else {
+    right_operand = Operand(right_reg, helpers::ShiftFromOpKind(op_kind), shift_amount);
+  }
+
+  // Logical binary operations do not support extension operations in the
+  // operand. Note that VIXL would still manage if it was passed by generating
+  // the extension as a separate instruction.
+  // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
+  DCHECK(!right_operand.IsExtendedRegister() ||
+         (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor &&
+          kind != HInstruction::kNeg));
+  switch (kind) {
+    case HInstruction::kAdd:
+      __ Add(out, left, right_operand);
+      break;
+    case HInstruction::kAnd:
+      __ And(out, left, right_operand);
+      break;
+    case HInstruction::kNeg:
+      DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero());
+      __ Neg(out, right_operand);
+      break;
+    case HInstruction::kOr:
+      __ Orr(out, left, right_operand);
+      break;
+    case HInstruction::kSub:
+      __ Sub(out, left, right_operand);
+      break;
+    case HInstruction::kXor:
+      __ Eor(out, left, right_operand);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected operation kind: " << kind;
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
@@ -1588,63 +1994,154 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress(
-    HArm64IntermediateAddress* instruction) {
+void InstructionCodeGeneratorARM64::VisitIntermediateAddress(
+    HIntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
   __ Add(OutputRegister(instruction),
          InputRegisterAt(instruction, 0),
          Operand(InputOperandAt(instruction, 1)));
 }
 
-void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
+void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
+  HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
+  if (instr->GetOpKind() == HInstruction::kSub &&
+      accumulator->IsConstant() &&
+      accumulator->AsConstant()->IsArithmeticZero()) {
+    // Don't allocate register for Mneg instruction.
+  } else {
+    locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
+                       Location::RequiresRegister());
+  }
+  locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+  locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+  Register res = OutputRegister(instr);
+  Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
+  Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
+
+  // Avoid emitting code that could trigger Cortex A53's erratum 835769.
+  // This fixup should be carried out for all multiply-accumulate instructions:
+  // madd, msub, smaddl, smsubl, umaddl and umsubl.
+  if (instr->GetType() == Primitive::kPrimLong &&
+      codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
+    MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
+    vixl::aarch64::Instruction* prev =
+        masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
+    if (prev->IsLoadOrStore()) {
+      // Make sure we emit only exactly one nop.
+      vixl::aarch64::CodeBufferCheckScope scope(masm,
+                                                kInstructionSize,
+                                                vixl::aarch64::CodeBufferCheckScope::kCheck,
+                                                vixl::aarch64::CodeBufferCheckScope::kExactSize);
+      __ nop();
+    }
+  }
+
+  if (instr->GetOpKind() == HInstruction::kAdd) {
+    Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
+    __ Madd(res, mul_left, mul_right, accumulator);
+  } else {
+    DCHECK(instr->GetOpKind() == HInstruction::kSub);
+    HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
+    if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) {
+      __ Mneg(res, mul_left, mul_right);
+    } else {
+      Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
+      __ Msub(res, mul_left, mul_right, accumulator);
+    }
+  }
+}
+
+void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps in the case of an object array get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // array's location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
   Primitive::Type type = instruction->GetType();
   Register obj = InputRegisterAt(instruction, 0);
-  Location index = instruction->GetLocations()->InAt(1);
-  size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
-  MemOperand source = HeapOperand(obj);
-  CPURegister dest = OutputCPURegister(instruction);
+  LocationSummary* locations = instruction->GetLocations();
+  Location index = locations->InAt(1);
+  Location out = locations->Out();
+  uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
 
   MacroAssembler* masm = GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
   // Block pools between `Load` and `MaybeRecordImplicitNullCheck`.
   BlockPoolsScope block_pools(masm);
 
-  if (index.IsConstant()) {
-    offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
-    source = HeapOperand(obj, offset);
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Object ArrayGet with Baker's read barrier case.
+    Register temp = temps.AcquireW();
+    // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+    DCHECK(!instruction->GetArray()->IsIntermediateAddress());
+    // Note that a potential implicit null check is handled in the
+    // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
+    codegen_->GenerateArrayLoadWithBakerReadBarrier(
+        instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
   } else {
-    Register temp = temps.AcquireSameSizeAs(obj);
-    if (instruction->GetArray()->IsArm64IntermediateAddress()) {
-      // We do not need to compute the intermediate address from the array: the
-      // input instruction has done it already. See the comment in
-      // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
-      if (kIsDebugBuild) {
-        HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
-        DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
-      }
-      temp = obj;
+    // General case.
+    MemOperand source = HeapOperand(obj);
+    if (index.IsConstant()) {
+      offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+      source = HeapOperand(obj, offset);
     } else {
-      __ Add(temp, obj, offset);
+      Register temp = temps.AcquireSameSizeAs(obj);
+      if (instruction->GetArray()->IsIntermediateAddress()) {
+        // The read barrier instrumentation does not support the
+        // HIntermediateAddress instruction yet.
+        DCHECK(!kEmitCompilerReadBarrier);
+        // We do not need to compute the intermediate address from the array: the
+        // input instruction has done it already. See the comment in
+        // `TryExtractArrayAccessAddress()`.
+        if (kIsDebugBuild) {
+          HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
+          DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
+        }
+        temp = obj;
+      } else {
+        __ Add(temp, obj, offset);
+      }
+      source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
     }
-    source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
-  }
 
-  codegen_->Load(type, dest, source);
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
+    codegen_->Load(type, OutputCPURegister(instruction), source);
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
 
-  if (instruction->GetType() == Primitive::kPrimNot) {
-    GetAssembler()->MaybeUnpoisonHeapReference(dest.W());
+    if (type == Primitive::kPrimNot) {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      Location obj_loc = locations->InAt(0);
+      if (index.IsConstant()) {
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
+      } else {
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
+      }
+    }
   }
 }
 
@@ -1655,19 +2152,26 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   BlockPoolsScope block_pools(GetVIXLAssembler());
-  __ Ldr(OutputRegister(instruction),
-         HeapOperand(InputRegisterAt(instruction, 0), mirror::Array::LengthOffset()));
+  __ Ldr(OutputRegister(instruction), HeapOperand(InputRegisterAt(instruction, 0), offset));
   codegen_->MaybeRecordImplicitNullCheck(instruction);
 }
 
 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
+  Primitive::Type value_type = instruction->GetComponentType();
+
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool object_array_set_with_read_barrier =
+      kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      instruction->NeedsTypeCheck() ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+      (may_need_runtime_call_for_type_check  || object_array_set_with_read_barrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-  if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+  if (Primitive::IsFloatingPointType(value_type)) {
     locations->SetInAt(2, Location::RequiresFpuRegister());
   } else {
     locations->SetInAt(2, Location::RequiresRegister());
@@ -1677,7 +2181,7 @@
 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
   Primitive::Type value_type = instruction->GetComponentType();
   LocationSummary* locations = instruction->GetLocations();
-  bool may_need_runtime_call = locations->CanCall();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
@@ -1691,19 +2195,22 @@
   BlockPoolsScope block_pools(masm);
 
   if (!needs_write_barrier) {
-    DCHECK(!may_need_runtime_call);
+    DCHECK(!may_need_runtime_call_for_type_check);
     if (index.IsConstant()) {
       offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
       destination = HeapOperand(array, offset);
     } else {
       UseScratchRegisterScope temps(masm);
       Register temp = temps.AcquireSameSizeAs(array);
-      if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+      if (instruction->GetArray()->IsIntermediateAddress()) {
+        // The read barrier instrumentation does not support the
+        // HIntermediateAddress instruction yet.
+        DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
-        // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+        // `TryExtractArrayAccessAddress()`.
         if (kIsDebugBuild) {
-          HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+          HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
           DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
         }
         temp = array;
@@ -1719,8 +2226,8 @@
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   } else {
     DCHECK(needs_write_barrier);
-    DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress());
-    vixl::Label done;
+    DCHECK(!instruction->GetArray()->IsIntermediateAddress());
+    vixl::aarch64::Label done;
     SlowPathCodeARM64* slow_path = nullptr;
     {
       // We use a block to end the scratch scope before the write barrier, thus
@@ -1741,11 +2248,11 @@
       uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
       uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
 
-      if (may_need_runtime_call) {
+      if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
-          vixl::Label non_zero;
+          vixl::aarch64::Label non_zero;
           __ Cbnz(Register(value), &non_zero);
           if (!index.IsConstant()) {
             __ Add(temp, array, offset);
@@ -1756,26 +2263,66 @@
           __ Bind(&non_zero);
         }
 
-        Register temp2 = temps.AcquireSameSizeAs(array);
-        __ Ldr(temp, HeapOperand(array, class_offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        GetAssembler()->MaybeUnpoisonHeapReference(temp);
-        __ Ldr(temp, HeapOperand(temp, component_offset));
-        __ Ldr(temp2, HeapOperand(Register(value), class_offset));
-        // No need to poison/unpoison, we're comparing two poisoned references.
-        __ Cmp(temp, temp2);
-        if (instruction->StaticTypeOfArrayIsObjectArray()) {
-          vixl::Label do_put;
-          __ B(eq, &do_put);
-          GetAssembler()->MaybeUnpoisonHeapReference(temp);
-          __ Ldr(temp, HeapOperand(temp, super_offset));
-          // No need to unpoison, we're comparing against null.
-          __ Cbnz(temp, slow_path->GetEntryLabel());
-          __ Bind(&do_put);
+        if (kEmitCompilerReadBarrier) {
+          // When read barriers are enabled, the type checking
+          // instrumentation requires two read barriers:
+          //
+          //   __ Mov(temp2, temp);
+          //   // /* HeapReference<Class> */ temp = temp->component_type_
+          //   __ Ldr(temp, HeapOperand(temp, component_offset));
+          //   codegen_->GenerateReadBarrierSlow(
+          //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+          //
+          //   // /* HeapReference<Class> */ temp2 = value->klass_
+          //   __ Ldr(temp2, HeapOperand(Register(value), class_offset));
+          //   codegen_->GenerateReadBarrierSlow(
+          //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc);
+          //
+          //   __ Cmp(temp, temp2);
+          //
+          // However, the second read barrier may trash `temp`, as it
+          // is a temporary register, and as such would not be saved
+          // along with live registers before calling the runtime (nor
+          // restored afterwards).  So in this case, we bail out and
+          // delegate the work to the array set slow path.
+          //
+          // TODO: Extend the register allocator to support a new
+          // "(locally) live temp" location so as to avoid always
+          // going into the slow path when read barriers are enabled.
+          __ B(slow_path->GetEntryLabel());
         } else {
-          __ B(ne, slow_path->GetEntryLabel());
+          Register temp2 = temps.AcquireSameSizeAs(array);
+          // /* HeapReference<Class> */ temp = array->klass_
+          __ Ldr(temp, HeapOperand(array, class_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          GetAssembler()->MaybeUnpoisonHeapReference(temp);
+
+          // /* HeapReference<Class> */ temp = temp->component_type_
+          __ Ldr(temp, HeapOperand(temp, component_offset));
+          // /* HeapReference<Class> */ temp2 = value->klass_
+          __ Ldr(temp2, HeapOperand(Register(value), class_offset));
+          // If heap poisoning is enabled, no need to unpoison `temp`
+          // nor `temp2`, as we are comparing two poisoned references.
+          __ Cmp(temp, temp2);
+
+          if (instruction->StaticTypeOfArrayIsObjectArray()) {
+            vixl::aarch64::Label do_put;
+            __ B(eq, &do_put);
+            // If heap poisoning is enabled, the `temp` reference has
+            // not been unpoisoned yet; unpoison it now.
+            GetAssembler()->MaybeUnpoisonHeapReference(temp);
+
+            // /* HeapReference<Class> */ temp = temp->super_class_
+            __ Ldr(temp, HeapOperand(temp, super_offset));
+            // If heap poisoning is enabled, no need to unpoison
+            // `temp`, as we are comparing against null below.
+            __ Cbnz(temp, slow_path->GetEntryLabel());
+            __ Bind(&do_put);
+          } else {
+            __ B(ne, slow_path->GetEntryLabel());
+          }
+          temps.Release(temp2);
         }
-        temps.Release(temp2);
       }
 
       if (kPoisonHeapReferences) {
@@ -1791,7 +2338,7 @@
       }
       __ Str(source, destination);
 
-      if (!may_need_runtime_call) {
+      if (!may_need_runtime_call_for_type_check) {
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
     }
@@ -1846,9 +2393,35 @@
   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
 }
 
-static bool IsFloatingPointZeroConstant(HInstruction* instruction) {
-  return (instruction->IsFloatConstant() && (instruction->AsFloatConstant()->GetValue() == 0.0f))
-      || (instruction->IsDoubleConstant() && (instruction->AsDoubleConstant()->GetValue() == 0.0));
+static bool IsFloatingPointZeroConstant(HInstruction* inst) {
+  return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero()))
+      || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero()));
+}
+
+void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) {
+  FPRegister lhs_reg = InputFPRegisterAt(instruction, 0);
+  Location rhs_loc = instruction->GetLocations()->InAt(1);
+  if (rhs_loc.IsConstant()) {
+    // 0.0 is the only immediate that can be encoded directly in
+    // an FCMP instruction.
+    //
+    // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
+    // specify that in a floating-point comparison, positive zero
+    // and negative zero are considered equal, so we can use the
+    // literal 0.0 for both cases here.
+    //
+    // Note however that some methods (Float.equal, Float.compare,
+    // Float.compareTo, Double.equal, Double.compare,
+    // Double.compareTo, Math.max, Math.min, StrictMath.max,
+    // StrictMath.min) consider 0.0 to be (strictly) greater than
+    // -0.0. So if we ever translate calls to these methods into a
+    // HCompare instruction, we must handle the -0.0 case with
+    // care here.
+    DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant()));
+    __ Fcmp(lhs_reg, 0.0);
+  } else {
+    __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1));
+  }
 }
 
 void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
@@ -1856,6 +2429,11 @@
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   Primitive::Type in_type = compare->InputAt(0)->GetType();
   switch (in_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
@@ -1884,33 +2462,26 @@
   //  1 if: left  > right
   // -1 if: left  < right
   switch (in_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       Register result = OutputRegister(compare);
       Register left = InputRegisterAt(compare, 0);
       Operand right = InputOperandAt(compare, 1);
-
       __ Cmp(left, right);
-      __ Cset(result, ne);
-      __ Cneg(result, result, lt);
+      __ Cset(result, ne);          // result == +1 if NE or 0 otherwise
+      __ Cneg(result, result, lt);  // result == -1 if LT or unchanged otherwise
       break;
     }
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       Register result = OutputRegister(compare);
-      FPRegister left = InputFPRegisterAt(compare, 0);
-      if (compare->GetLocations()->InAt(1).IsConstant()) {
-        DCHECK(IsFloatingPointZeroConstant(compare->GetLocations()->InAt(1).GetConstant()));
-        // 0.0 is the only immediate that can be encoded directly in an FCMP instruction.
-        __ Fcmp(left, 0.0);
-      } else {
-        __ Fcmp(left, InputFPRegisterAt(compare, 1));
-      }
-      if (compare->IsGtBias()) {
-        __ Cset(result, ne);
-      } else {
-        __ Csetm(result, ne);
-      }
-      __ Cneg(result, result, compare->IsGtBias() ? mi : gt);
+      GenerateFcmp(compare);
+      __ Cset(result, ne);
+      __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
       break;
     }
     default:
@@ -1918,7 +2489,7 @@
   }
 }
 
-void LocationsBuilderARM64::VisitCondition(HCondition* instruction) {
+void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
 
   if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
@@ -1933,44 +2504,29 @@
     locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
   }
 
-  if (instruction->NeedsMaterialization()) {
+  if (!instruction->IsEmittedAtUseSite()) {
     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
-void InstructionCodeGeneratorARM64::VisitCondition(HCondition* instruction) {
-  if (!instruction->NeedsMaterialization()) {
+void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
+  if (instruction->IsEmittedAtUseSite()) {
     return;
   }
 
   LocationSummary* locations = instruction->GetLocations();
   Register res = RegisterFrom(locations->Out(), instruction->GetType());
   IfCondition if_cond = instruction->GetCondition();
-  Condition arm64_cond = ARM64Condition(if_cond);
 
   if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
-    FPRegister lhs = InputFPRegisterAt(instruction, 0);
-    if (locations->InAt(1).IsConstant()) {
-      DCHECK(IsFloatingPointZeroConstant(locations->InAt(1).GetConstant()));
-      // 0.0 is the only immediate that can be encoded directly in an FCMP instruction.
-      __ Fcmp(lhs, 0.0);
-    } else {
-      __ Fcmp(lhs, InputFPRegisterAt(instruction, 1));
-    }
-    __ Cset(res, arm64_cond);
-    if (instruction->IsFPConditionTrueIfNaN()) {
-      // res = IsUnordered(arm64_cond) ? 1 : res  <=>  res = IsNotUnordered(arm64_cond) ? res : 1
-      __ Csel(res, res, Operand(1), vc);  // VC for "not unordered".
-    } else if (instruction->IsFPConditionFalseIfNaN()) {
-      // res = IsUnordered(arm64_cond) ? 0 : res  <=>  res = IsNotUnordered(arm64_cond) ? res : 0
-      __ Csel(res, res, Operand(0), vc);  // VC for "not unordered".
-    }
+    GenerateFcmp(instruction);
+    __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
   } else {
     // Integer cases.
     Register lhs = InputRegisterAt(instruction, 0);
     Operand rhs = InputOperandAt(instruction, 1);
     __ Cmp(lhs, rhs);
-    __ Cset(res, arm64_cond);
+    __ Cset(res, ARM64Condition(if_cond));
   }
 }
 
@@ -1986,8 +2542,8 @@
   M(Above)                                                                               \
   M(AboveOrEqual)
 #define DEFINE_CONDITION_VISITORS(Name)                                                  \
-void LocationsBuilderARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); }         \
-void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); }
+void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }         \
+void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
 #undef DEFINE_CONDITION_VISITORS
 #undef FOR_EACH_CONDITION_INSTRUCTION
@@ -2025,8 +2581,7 @@
   Register out = OutputRegister(instruction);
   Register dividend = InputRegisterAt(instruction, 0);
   int64_t imm = Int64FromConstant(second.GetConstant());
-  uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm));
-  DCHECK(IsPowerOfTwo(abs_imm));
+  uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
 
   UseScratchRegisterScope temps(GetVIXLAssembler());
@@ -2118,7 +2673,7 @@
       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
     } else if (imm == 1 || imm == -1) {
       DivRemOneOrMinusOne(instruction);
-    } else if (IsPowerOfTwo(std::abs(imm))) {
+    } else if (IsPowerOfTwo(AbsOrMin(imm))) {
       DivRemByPowerOfTwo(instruction);
     } else {
       DCHECK(imm <= -2 || imm >= 2);
@@ -2198,8 +2753,8 @@
 
   Primitive::Type type = instruction->GetType();
 
-  if ((type == Primitive::kPrimBoolean) || !Primitive::IsIntegralType(type)) {
-      LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
+  if (!Primitive::IsIntegralType(type)) {
+    LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
     return;
   }
 
@@ -2283,72 +2838,94 @@
 }
 
 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
-                                                          vixl::Label* true_target,
-                                                          vixl::Label* false_target,
-                                                          vixl::Label* always_true_target) {
-  HInstruction* cond = instruction->InputAt(0);
-  HCondition* condition = cond->AsCondition();
+                                                          size_t condition_input_index,
+                                                          vixl::aarch64::Label* true_target,
+                                                          vixl::aarch64::Label* false_target) {
+  // FP branching requires both targets to be explicit. If either of the targets
+  // is nullptr (fallthrough) use and bind `fallthrough_target` instead.
+  vixl::aarch64::Label fallthrough_target;
+  HInstruction* cond = instruction->InputAt(condition_input_index);
 
-  if (cond->IsIntConstant()) {
-    int32_t cond_value = cond->AsIntConstant()->GetValue();
-    if (cond_value == 1) {
-      if (always_true_target != nullptr) {
-        __ B(always_true_target);
+  if (true_target == nullptr && false_target == nullptr) {
+    // Nothing to do. The code always falls through.
+    return;
+  } else if (cond->IsIntConstant()) {
+    // Constant condition, statically compared against "true" (integer value 1).
+    if (cond->AsIntConstant()->IsTrue()) {
+      if (true_target != nullptr) {
+        __ B(true_target);
       }
-      return;
     } else {
-      DCHECK_EQ(cond_value, 0);
+      DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
+      if (false_target != nullptr) {
+        __ B(false_target);
+      }
     }
-  } else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
+    return;
+  }
+
+  // The following code generates these patterns:
+  //  (1) true_target == nullptr && false_target != nullptr
+  //        - opposite condition true => branch to false_target
+  //  (2) true_target != nullptr && false_target == nullptr
+  //        - condition true => branch to true_target
+  //  (3) true_target != nullptr && false_target != nullptr
+  //        - condition true => branch to true_target
+  //        - branch to false_target
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
     // The condition instruction has been materialized, compare the output to 0.
-    Location cond_val = instruction->GetLocations()->InAt(0);
+    Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
     DCHECK(cond_val.IsRegister());
-    __ Cbnz(InputRegisterAt(instruction, 0), true_target);
+      if (true_target == nullptr) {
+      __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
+    } else {
+      __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
+    }
   } else {
     // The condition instruction has not been materialized, use its inputs as
     // the comparison and its condition as the branch condition.
-    Primitive::Type type =
-        cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
+    HCondition* condition = cond->AsCondition();
 
+    Primitive::Type type = condition->InputAt(0)->GetType();
     if (Primitive::IsFloatingPointType(type)) {
-      // FP compares don't like null false_targets.
-      if (false_target == nullptr) {
-        false_target = codegen_->GetLabelOf(instruction->AsIf()->IfFalseSuccessor());
-      }
-      FPRegister lhs = InputFPRegisterAt(condition, 0);
-      if (condition->GetLocations()->InAt(1).IsConstant()) {
-        DCHECK(IsFloatingPointZeroConstant(condition->GetLocations()->InAt(1).GetConstant()));
-        // 0.0 is the only immediate that can be encoded directly in an FCMP instruction.
-        __ Fcmp(lhs, 0.0);
+      GenerateFcmp(condition);
+      if (true_target == nullptr) {
+        IfCondition opposite_condition = condition->GetOppositeCondition();
+        __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
       } else {
-        __ Fcmp(lhs, InputFPRegisterAt(condition, 1));
+        __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
       }
-      if (condition->IsFPConditionTrueIfNaN()) {
-        __ B(vs, true_target);  // VS for unordered.
-      } else if (condition->IsFPConditionFalseIfNaN()) {
-        __ B(vs, false_target);  // VS for unordered.
-      }
-      __ B(ARM64Condition(condition->GetCondition()), true_target);
     } else {
       // Integer cases.
       Register lhs = InputRegisterAt(condition, 0);
       Operand rhs = InputOperandAt(condition, 1);
-      Condition arm64_cond = ARM64Condition(condition->GetCondition());
-      if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
+
+      Condition arm64_cond;
+      vixl::aarch64::Label* non_fallthrough_target;
+      if (true_target == nullptr) {
+        arm64_cond = ARM64Condition(condition->GetOppositeCondition());
+        non_fallthrough_target = false_target;
+      } else {
+        arm64_cond = ARM64Condition(condition->GetCondition());
+        non_fallthrough_target = true_target;
+      }
+
+      if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
+          rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
         switch (arm64_cond) {
           case eq:
-            __ Cbz(lhs, true_target);
+            __ Cbz(lhs, non_fallthrough_target);
             break;
           case ne:
-            __ Cbnz(lhs, true_target);
+            __ Cbnz(lhs, non_fallthrough_target);
             break;
           case lt:
             // Test the sign bit and branch accordingly.
-            __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+            __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
             break;
           case ge:
             // Test the sign bit and branch accordingly.
-            __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+            __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
             break;
           default:
             // Without the `static_cast` the compiler throws an error for
@@ -2357,53 +2934,150 @@
         }
       } else {
         __ Cmp(lhs, rhs);
-        __ B(arm64_cond, true_target);
+        __ B(arm64_cond, non_fallthrough_target);
       }
     }
   }
-  if (false_target != nullptr) {
+
+  // If neither branch falls through (case 3), the conditional branch to `true_target`
+  // was already emitted (case 2) and we need to emit a jump to `false_target`.
+  if (true_target != nullptr && false_target != nullptr) {
     __ B(false_target);
   }
+
+  if (fallthrough_target.IsLinked()) {
+    __ Bind(&fallthrough_target);
+  }
 }
 
 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  HInstruction* cond = if_instr->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
-  vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
-  vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  vixl::Label* always_true_target = true_target;
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfTrueSuccessor())) {
-    always_true_target = nullptr;
+  HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+  HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+  vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
+    true_target = nullptr;
   }
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfFalseSuccessor())) {
+  vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
     false_target = nullptr;
   }
-  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+  GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
 
 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  HInstruction* cond = deoptimize->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathARM64(deoptimize);
-  codegen_->AddSlowPath(slow_path);
-  vixl::Label* slow_path_entry = slow_path->GetEntryLabel();
-  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+  SlowPathCodeARM64* slow_path =
+      deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
+  GenerateTestAndBranch(deoptimize,
+                        /* condition_input_index */ 0,
+                        slow_path->GetEntryLabel(),
+                        /* false_target */ nullptr);
+}
+
+static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
+  return condition->IsCondition() &&
+         Primitive::IsFloatingPointType(condition->InputAt(0)->GetType());
+}
+
+static inline Condition GetConditionForSelect(HCondition* condition) {
+  IfCondition cond = condition->AsCondition()->GetCondition();
+  return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
+                                                     : ARM64Condition(cond);
+}
+
+void LocationsBuilderARM64::VisitSelect(HSelect* select) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::RequiresFpuRegister());
+    locations->SetOut(Location::RequiresFpuRegister());
+  } else {
+    HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
+    HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
+    bool is_true_value_constant = cst_true_value != nullptr;
+    bool is_false_value_constant = cst_false_value != nullptr;
+    // Ask VIXL whether we should synthesize constants in registers.
+    // We give an arbitrary register to VIXL when dealing with non-constant inputs.
+    Operand true_op = is_true_value_constant ?
+        Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
+    Operand false_op = is_false_value_constant ?
+        Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
+    bool true_value_in_register = false;
+    bool false_value_in_register = false;
+    MacroAssembler::GetCselSynthesisInformation(
+        x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
+    true_value_in_register |= !is_true_value_constant;
+    false_value_in_register |= !is_false_value_constant;
+
+    locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
+                                                 : Location::ConstantLocation(cst_true_value));
+    locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
+                                                  : Location::ConstantLocation(cst_false_value));
+    locations->SetOut(Location::RequiresRegister());
+  }
+
+  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
+  HInstruction* cond = select->GetCondition();
+  Condition csel_cond;
+
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
+    if (cond->IsCondition() && cond->GetNext() == select) {
+      // Use the condition flags set by the previous instruction.
+      csel_cond = GetConditionForSelect(cond->AsCondition());
+    } else {
+      __ Cmp(InputRegisterAt(select, 2), 0);
+      csel_cond = ne;
+    }
+  } else if (IsConditionOnFloatingPointValues(cond)) {
+    GenerateFcmp(cond);
+    csel_cond = GetConditionForSelect(cond->AsCondition());
+  } else {
+    __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
+    csel_cond = GetConditionForSelect(cond->AsCondition());
+  }
+
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    __ Fcsel(OutputFPRegister(select),
+             InputFPRegisterAt(select, 1),
+             InputFPRegisterAt(select, 0),
+             csel_cond);
+  } else {
+    __ Csel(OutputRegister(select),
+            InputOperandAt(select, 1),
+            InputOperandAt(select, 0),
+            csel_cond);
+  }
+}
+
+void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) {
+  // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+}
+
+void CodeGeneratorARM64::GenerateNop() {
+  __ Nop();
 }
 
 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
@@ -2422,49 +3096,62 @@
   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
 }
 
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-    // The out register is used as a temporary, so it overlaps with the inputs.
-    // Note that TypeCheckSlowPathARM64 uses this register too.
-    locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
-    locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // The "out" register is used as a temporary, so it overlaps with the inputs.
+  // Note that TypeCheckSlowPathARM64 uses this register too.
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+  // When read barriers are enabled, we need a temporary register for
+  // some cases.
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
   Register cls = InputRegisterAt(instruction, 1);
+  Location out_loc = locations->Out();
   Register out = OutputRegister(instruction);
+  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(0) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
 
-  vixl::Label done, zero;
+  vixl::aarch64::Label done, zero;
   SlowPathCodeARM64* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
@@ -2473,17 +3160,10 @@
     __ Cbz(obj, &zero);
   }
 
-  // In case of an interface/unresolved check, we put the object class into the object register.
-  // This is safe, as the register is caller-save, and the object must be in another
-  // register if it survives the runtime call.
-  Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) ||
-      (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck)
-      ? obj
-      : out;
-  __ Ldr(target, HeapOperand(obj.W(), class_offset));
-  GetAssembler()->MaybeUnpoisonHeapReference(target);
+  // /* HeapReference<Class> */ out = obj->klass_
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       __ Cmp(out, cls);
       __ Cset(out, eq);
@@ -2492,13 +3172,14 @@
       }
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      vixl::Label loop, success;
+      vixl::aarch64::Label loop, success;
       __ Bind(&loop);
-      __ Ldr(out, HeapOperand(out, super_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(out);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Cmp(out, cls);
@@ -2509,14 +3190,15 @@
       }
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
-      vixl::Label loop, success;
+      vixl::aarch64::Label loop, success;
       __ Bind(&loop);
       __ Cmp(out, cls);
       __ B(eq, &success);
-      __ Ldr(out, HeapOperand(out, super_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(out);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       __ Cbnz(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ B(&done);
@@ -2527,14 +3209,15 @@
       }
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
-      vixl::Label exact_check;
+      vixl::aarch64::Label exact_check;
       __ Cmp(out, cls);
       __ B(eq, &exact_check);
-      // Otherwise, we need to check that the object's class is a non primitive array.
-      __ Ldr(out, HeapOperand(out, component_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(out);
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ out = out->component_type_
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Ldrh(out, HeapOperand(out, primitive_offset));
@@ -2545,11 +3228,12 @@
       __ B(&done);
       break;
     }
+
     case TypeCheckKind::kArrayCheck: {
       __ Cmp(out, cls);
       DCHECK(locations->OnlyCallsOnSlowPath());
-      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
-          instruction, /* is_fatal */ false);
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
+                                                                      /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
       __ B(ne, slow_path->GetEntryLabel());
       __ Mov(out, 1);
@@ -2558,13 +3242,32 @@
       }
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-    default: {
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved and interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
+                                                                      /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ B(slow_path->GetEntryLabel());
       if (zero.IsLinked()) {
         __ B(&done);
       }
@@ -2590,135 +3293,196 @@
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
 
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = throws_into_catch
-          ? LocationSummary::kCallOnSlowPath
-          : LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
 
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-    // Note that TypeCheckSlowPathARM64 uses this register too.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathARM64 uses this "temp" register too.
+  locations->AddTemp(Location::RequiresRegister());
+  // When read barriers are enabled, we need an additional temporary
+  // register for some cases.
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
   Register cls = InputRegisterAt(instruction, 1);
-  Register temp;
-  if (!locations->WillCall()) {
-    temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
-  }
-
+  Location temp_loc = locations->GetTemp(0);
+  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
+  Register temp = WRegisterFrom(temp_loc);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
-  SlowPathCodeARM64* slow_path = nullptr;
 
-  if (!locations->WillCall()) {
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
-        instruction, !locations->CanCall());
-    codegen_->AddSlowPath(slow_path);
-  }
+  bool is_type_check_slow_path_fatal =
+      (type_check_kind == TypeCheckKind::kExactCheck ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+      !instruction->CanThrowIntoCatchBlock();
+  SlowPathCodeARM64* type_check_slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
+                                                          is_type_check_slow_path_fatal);
+  codegen_->AddSlowPath(type_check_slow_path);
 
-  vixl::Label done;
+  vixl::aarch64::Label done;
   // Avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
     __ Cbz(obj, &done);
   }
 
-  if (locations->WillCall()) {
-    __ Ldr(obj, HeapOperand(obj, class_offset));
-    GetAssembler()->MaybeUnpoisonHeapReference(obj);
-  } else {
-    __ Ldr(temp, HeapOperand(obj, class_offset));
-    GetAssembler()->MaybeUnpoisonHeapReference(temp);
-  }
+  // /* HeapReference<Class> */ temp = obj->klass_
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
       __ Cmp(temp, cls);
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ B(ne, slow_path->GetEntryLabel());
+      __ B(ne, type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      vixl::Label loop;
+      vixl::aarch64::Label loop, compare_classes;
       __ Bind(&loop);
-      __ Ldr(temp, HeapOperand(temp, super_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(temp);
-      // Jump to the slow path to throw the exception.
-      __ Cbz(temp, slow_path->GetEntryLabel());
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+
+      // If the class reference currently in `temp` is not null, jump
+      // to the `compare_classes` label to compare it with the checked
+      // class.
+      __ Cbnz(temp, &compare_classes);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      __ B(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&compare_classes);
       __ Cmp(temp, cls);
       __ B(ne, &loop);
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
-      vixl::Label loop;
+      vixl::aarch64::Label loop;
       __ Bind(&loop);
       __ Cmp(temp, cls);
       __ B(eq, &done);
-      __ Ldr(temp, HeapOperand(temp, super_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(temp);
+
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+
+      // If the class reference currently in `temp` is not null, jump
+      // back at the beginning of the loop.
       __ Cbnz(temp, &loop);
-      // Jump to the slow path to throw the exception.
-      __ B(slow_path->GetEntryLabel());
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
+      vixl::aarch64::Label check_non_primitive_component_type;
       __ Cmp(temp, cls);
       __ B(eq, &done);
-      // Otherwise, we need to check that the object's class is a non primitive array.
-      __ Ldr(temp, HeapOperand(temp, component_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(temp);
-      __ Cbz(temp, slow_path->GetEntryLabel());
+
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ temp = temp->component_type_
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
+
+      // If the component type is not null (i.e. the object is indeed
+      // an array), jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive
+      // type.
+      __ Cbnz(temp, &check_non_primitive_component_type);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      __ B(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&check_non_primitive_component_type);
       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ Cbnz(temp, slow_path->GetEntryLabel());
+      __ Cbz(temp, &done);
+      // Same comment as above regarding `temp` and the slow path.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-    default:
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
+      //
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
+      __ B(type_check_slow_path->GetEntryLabel());
       break;
   }
   __ Bind(&done);
 
-  if (slow_path != nullptr) {
-    __ Bind(slow_path->GetExitLabel());
-  }
+  __ Bind(type_check_slow_path->GetExitLabel());
 }
 
 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
@@ -2761,12 +3525,11 @@
 
 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
-  Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0));
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
-  Location receiver = invoke->GetLocations()->InAt(0);
+  LocationSummary* locations = invoke->GetLocations();
+  Register temp = XRegisterFrom(locations->GetTemp(0));
+  Location receiver = locations->InAt(0);
   Offset class_offset = mirror::Object::ClassOffset();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
 
   // The register ip1 is required to be used for the hidden argument in
   // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
@@ -2776,15 +3539,27 @@
   scratch_scope.Exclude(ip1);
   __ Mov(ip1, invoke->GetDexMethodIndex());
 
-  // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
     __ Ldr(temp.W(), StackOperandFrom(receiver));
+    // /* HeapReference<Class> */ temp = temp->klass_
     __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
   } else {
+    // /* HeapReference<Class> */ temp = receiver->klass_
     __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
+  __ Ldr(temp,
+      MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex(), kArm64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ Ldr(temp, MemOperand(temp, method_offset));
   // lr = temp->GetEntryPoint();
@@ -2805,9 +3580,9 @@
 }
 
 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
   if (intrinsic.TryDispatch(invoke)) {
@@ -2826,6 +3601,13 @@
   return false;
 }
 
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method ATTRIBUTE_UNUSED) {
+  // On ARM64 we support all dispatch types.
+  return desired_dispatch_info;
+}
+
 void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   // For better instruction scheduling we load the direct code pointer before the method pointer.
   bool direct_code_loaded = false;
@@ -2849,41 +3631,42 @@
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
       // temp = thread->string_init_entrypoint
-      __ Ldr(XRegisterFrom(temp).X(), MemOperand(tr, invoke->GetStringInitOffset()));
+      __ Ldr(XRegisterFrom(temp), MemOperand(tr, invoke->GetStringInitOffset()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       // Load method address from literal pool.
-      __ Ldr(XRegisterFrom(temp).X(), DeduplicateUint64Literal(invoke->GetMethodAddress()));
+      __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
       // Load method address from literal pool with a link-time patch.
-      __ Ldr(XRegisterFrom(temp).X(),
+      __ Ldr(XRegisterFrom(temp),
              DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
       // Add ADRP with its PC-relative DexCache access patch.
-      pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
-                                             invoke->GetDexCacheArrayOffset());
-      vixl::Label* pc_insn_label = &pc_rel_dex_cache_patches_.back().label;
+      const DexFile& dex_file = *invoke->GetTargetMethod().dex_file;
+      uint32_t element_offset = invoke->GetDexCacheArrayOffset();
+      vixl::aarch64::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
       {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
-        __ adrp(XRegisterFrom(temp).X(), 0);
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(adrp_label);
+        __ adrp(XRegisterFrom(temp), /* offset placeholder */ 0);
       }
-      __ Bind(pc_insn_label);  // Bind after ADRP.
-      pc_rel_dex_cache_patches_.back().pc_insn_label = pc_insn_label;
       // Add LDR with its PC-relative DexCache access patch.
-      pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
-                                             invoke->GetDexCacheArrayOffset());
-      __ Ldr(XRegisterFrom(temp).X(), MemOperand(XRegisterFrom(temp).X(), 0));
-      __ Bind(&pc_rel_dex_cache_patches_.back().label);  // Bind after LDR.
-      pc_rel_dex_cache_patches_.back().pc_insn_label = pc_insn_label;
+      vixl::aarch64::Label* ldr_label =
+          NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
+      {
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(ldr_label);
+        __ ldr(XRegisterFrom(temp), MemOperand(XRegisterFrom(temp), /* offset placeholder */ 0));
+      }
       break;
     }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register reg = XRegisterFrom(temp);
       Register method_reg;
       if (current_method.IsRegister()) {
@@ -2895,12 +3678,13 @@
         __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset));
       }
 
-      // temp = current_method->dex_cache_resolved_methods_;
+      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
       __ Ldr(reg.X(),
              MemOperand(method_reg.X(),
-                        ArtMethod::DexCacheResolvedMethodsOffset(kArm64WordSize).Int32Value()));
+                        ArtMethod::DexCacheResolvedMethodsOffset(kArm64PointerSize).Int32Value()));
       // temp = temp[index_in_cache];
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
     __ Ldr(reg.X(), MemOperand(reg.X(), GetCachePointerOffset(index_in_cache)));
       break;
     }
@@ -2912,9 +3696,10 @@
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
       relative_call_patches_.emplace_back(invoke->GetTargetMethod());
-      vixl::Label* label = &relative_call_patches_.back().label;
-      __ Bl(label);  // Arbitrarily branch to the instruction after BL, override at link time.
-      __ Bind(label);  // Bind after BL.
+      vixl::aarch64::Label* label = &relative_call_patches_.back().label;
+      SingleEmissionCheckScope guard(GetVIXLAssembler());
+      __ Bind(label);
+      __ bl(0);  // Branch and link to itself. This will be overriden at link time.
       break;
     }
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
@@ -2927,8 +3712,8 @@
     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
       // LR = callee_method->entry_point_from_quick_compiled_code_;
       __ Ldr(lr, MemOperand(
-          XRegisterFrom(callee_method).X(),
-          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize).Int32Value()));
+          XRegisterFrom(callee_method),
+          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
       // lr()
       __ Blr(lr);
       break;
@@ -2938,19 +3723,30 @@
 }
 
 void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   Register temp = XRegisterFrom(temp_in);
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
   Offset class_offset = mirror::Object::ClassOffset();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
 
   BlockPoolsScope block_pools(GetVIXLAssembler());
 
   DCHECK(receiver.IsRegister());
-  __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
+  // /* HeapReference<Class> */ temp = receiver->klass_
+  __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
   // temp = temp->GetMethodAt(method_offset);
   __ Ldr(temp, MemOperand(temp, method_offset));
@@ -2960,82 +3756,174 @@
   __ Blr(lr);
 }
 
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
+    const DexFile& dex_file,
+    uint32_t string_index,
+    vixl::aarch64::Label* adrp_label) {
+  return NewPcRelativePatch(dex_file, string_index, adrp_label, &pc_relative_string_patches_);
+}
+
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(
+    const DexFile& dex_file,
+    uint32_t type_index,
+    vixl::aarch64::Label* adrp_label) {
+  return NewPcRelativePatch(dex_file, type_index, adrp_label, &pc_relative_type_patches_);
+}
+
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(
+    const DexFile& dex_file,
+    uint32_t element_offset,
+    vixl::aarch64::Label* adrp_label) {
+  return NewPcRelativePatch(dex_file, element_offset, adrp_label, &pc_relative_dex_cache_patches_);
+}
+
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
+    const DexFile& dex_file,
+    uint32_t offset_or_index,
+    vixl::aarch64::Label* adrp_label,
+    ArenaDeque<PcRelativePatchInfo>* patches) {
+  // Add a patch entry and return the label.
+  patches->emplace_back(dex_file, offset_or_index);
+  PcRelativePatchInfo* info = &patches->back();
+  vixl::aarch64::Label* label = &info->label;
+  // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
+  info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
+  return label;
+}
+
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral(
+    const DexFile& dex_file, uint32_t string_index) {
+  return boot_image_string_patches_.GetOrCreate(
+      StringReference(&dex_file, string_index),
+      [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
+}
+
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLiteral(
+    const DexFile& dex_file, uint32_t type_index) {
+  return boot_image_type_patches_.GetOrCreate(
+      TypeReference(&dex_file, type_index),
+      [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
+}
+
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
+    uint64_t address) {
+  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
+  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+}
+
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateDexCacheAddressLiteral(
+    uint64_t address) {
+  return DeduplicateUint64Literal(address);
+}
+
 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
       method_patches_.size() +
       call_patches_.size() +
       relative_call_patches_.size() +
-      pc_rel_dex_cache_patches_.size();
+      pc_relative_dex_cache_patches_.size() +
+      boot_image_string_patches_.size() +
+      pc_relative_string_patches_.size() +
+      boot_image_type_patches_.size() +
+      pc_relative_type_patches_.size() +
+      boot_image_address_patches_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
     const MethodReference& target_method = entry.first;
-    vixl::Literal<uint64_t>* literal = entry.second;
-    linker_patches->push_back(LinkerPatch::MethodPatch(literal->offset(),
+    vixl::aarch64::Literal<uint64_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::MethodPatch(literal->GetOffset(),
                                                        target_method.dex_file,
                                                        target_method.dex_method_index));
   }
   for (const auto& entry : call_patches_) {
     const MethodReference& target_method = entry.first;
-    vixl::Literal<uint64_t>* literal = entry.second;
-    linker_patches->push_back(LinkerPatch::CodePatch(literal->offset(),
+    vixl::aarch64::Literal<uint64_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::CodePatch(literal->GetOffset(),
                                                      target_method.dex_file,
                                                      target_method.dex_method_index));
   }
-  for (const MethodPatchInfo<vixl::Label>& info : relative_call_patches_) {
-    linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.location() - 4u,
+  for (const MethodPatchInfo<vixl::aarch64::Label>& info : relative_call_patches_) {
+    linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.GetLocation(),
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
-  for (const PcRelativeDexCacheAccessInfo& info : pc_rel_dex_cache_patches_) {
-    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.location() - 4u,
+  for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(),
                                                               &info.target_dex_file,
-                                                              info.pc_insn_label->location() - 4u,
-                                                              info.element_offset));
+                                                              info.pc_insn_label->GetLocation(),
+                                                              info.offset_or_index));
+  }
+  for (const auto& entry : boot_image_string_patches_) {
+    const StringReference& target_string = entry.first;
+    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::StringPatch(literal->GetOffset(),
+                                                       target_string.dex_file,
+                                                       target_string.string_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_string_patches_) {
+    linker_patches->push_back(LinkerPatch::RelativeStringPatch(info.label.GetLocation(),
+                                                               &info.target_dex_file,
+                                                               info.pc_insn_label->GetLocation(),
+                                                               info.offset_or_index));
+  }
+  for (const auto& entry : boot_image_type_patches_) {
+    const TypeReference& target_type = entry.first;
+    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::TypePatch(literal->GetOffset(),
+                                                     target_type.dex_file,
+                                                     target_type.type_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_type_patches_) {
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(info.label.GetLocation(),
+                                                             &info.target_dex_file,
+                                                             info.pc_insn_label->GetLocation(),
+                                                             info.offset_or_index));
+  }
+  for (const auto& entry : boot_image_address_patches_) {
+    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
+    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::RecordPosition(literal->GetOffset()));
   }
 }
 
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
-  // Look up the literal for value.
-  auto lb = uint64_literals_.lower_bound(value);
-  if (lb != uint64_literals_.end() && !uint64_literals_.key_comp()(value, lb->first)) {
-    return lb->second;
-  }
-  // We don't have a literal for this value, insert a new one.
-  vixl::Literal<uint64_t>* literal = __ CreateLiteralDestroyedWithPool<uint64_t>(value);
-  uint64_literals_.PutBefore(lb, value, literal);
-  return literal;
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value,
+                                                                      Uint32ToLiteralMap* map) {
+  return map->GetOrCreate(
+      value,
+      [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
 }
 
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
+  return uint64_literals_.GetOrCreate(
+      value,
+      [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
+}
+
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
     MethodReference target_method,
     MethodToLiteralMap* map) {
-  // Look up the literal for target_method.
-  auto lb = map->lower_bound(target_method);
-  if (lb != map->end() && !map->key_comp()(target_method, lb->first)) {
-    return lb->second;
-  }
-  // We don't have a literal for this method yet, insert a new one.
-  vixl::Literal<uint64_t>* literal = __ CreateLiteralDestroyedWithPool<uint64_t>(0u);
-  map->PutBefore(lb, target_method, literal);
-  return literal;
+  return map->GetOrCreate(
+      target_method,
+      [this]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(/* placeholder */ 0u); });
 }
 
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodAddressLiteral(
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodAddressLiteral(
     MethodReference target_method) {
   return DeduplicateMethodLiteral(target_method, &method_patches_);
 }
 
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodCodeLiteral(
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodCodeLiteral(
     MethodReference target_method) {
   return DeduplicateMethodLiteral(target_method, &call_patches_);
 }
 
 
 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -3058,12 +3946,63 @@
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
+HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_class_load_kind) {
+      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadClass::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_class_load_kind;
+}
+
 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
-  InvokeRuntimeCallingConvention calling_convention;
-  CodeGenerator::CreateLoadClassLocationSummary(
-      cls,
-      LocationFrom(calling_convention.GetRegisterAt(0)),
-      LocationFrom(vixl::x0));
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        LocationFrom(calling_convention.GetRegisterAt(0)),
+        LocationFrom(vixl::aarch64::x0),
+        /* code_generator_supports_read_barrier */ true);
+    return;
+  }
+
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
+      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
@@ -3073,26 +4012,115 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
+  Location out_loc = cls->GetLocations()->Out();
   Register out = OutputRegister(cls);
-  Register current_method = InputRegisterAt(cls, 0);
-  if (cls->IsReferrersClass()) {
-    DCHECK(!cls->CanCallRuntime());
-    DCHECK(!cls->MustGenerateClinitCheck());
-    __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  } else {
-    DCHECK(cls->CanCallRuntime());
-    MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
-    __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
-    __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
-    // TODO: We will need a read barrier here.
 
+  bool generate_null_check = false;
+  switch (cls->GetLoadKind()) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      Register current_method = InputRegisterAt(cls, 0);
+      GenerateGcRootFieldLoad(
+          cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
+                                                            cls->GetTypeIndex()));
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      // Add ADRP with its PC-relative type patch.
+      const DexFile& dex_file = cls->GetDexFile();
+      uint32_t type_index = cls->GetTypeIndex();
+      vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index);
+      {
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(adrp_label);
+        __ adrp(out.X(), /* offset placeholder */ 0);
+      }
+      // Add ADD with its PC-relative type patch.
+      vixl::aarch64::Label* add_label =
+          codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label);
+      {
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(add_label);
+        __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0));
+      }
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK(cls->GetAddress() != 0u && IsUint<32>(cls->GetAddress()));
+      __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(cls->GetAddress()));
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(cls->GetAddress(), 0u);
+      // LDR immediate has a 12-bit offset multiplied by the size and for 32-bit loads
+      // that gives a 16KiB range. To try and reduce the number of literals if we load
+      // multiple types, simply split the dex cache address to a 16KiB aligned base
+      // loaded from a literal and the remaining offset embedded in the load.
+      static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes.");
+      DCHECK_ALIGNED(cls->GetAddress(), 4u);
+      constexpr size_t offset_bits = /* encoded bits */ 12 + /* scale */ 2;
+      uint64_t base_address = cls->GetAddress() & ~MaxInt<uint64_t>(offset_bits);
+      uint32_t offset = cls->GetAddress() & MaxInt<uint64_t>(offset_bits);
+      __ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address));
+      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, out.X(), offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      // Add ADRP with its PC-relative DexCache access patch.
+      const DexFile& dex_file = cls->GetDexFile();
+      uint32_t element_offset = cls->GetDexCacheElementOffset();
+      vixl::aarch64::Label* adrp_label =
+          codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
+      {
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(adrp_label);
+        __ adrp(out.X(), /* offset placeholder */ 0);
+      }
+      // Add LDR with its PC-relative DexCache access patch.
+      vixl::aarch64::Label* ldr_label =
+          codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
+      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
+      GenerateGcRootFieldLoad(cls, out_loc, out.X(), /* offset placeholder */ 0, ldr_label);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      MemberOffset resolved_types_offset =
+          ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      Register current_method = InputRegisterAt(cls, 0);
+      __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      GenerateGcRootFieldLoad(
+          cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+  }
+
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
+    DCHECK(cls->CanCallRuntime());
     SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
     codegen_->AddSlowPath(slow_path);
-    __ Cbz(out, slow_path->GetEntryLabel());
+    if (generate_null_check) {
+      __ Cbz(out, slow_path->GetEntryLabel());
+    }
     if (cls->MustGenerateClinitCheck()) {
       GenerateClassInitializationCheck(slow_path, out);
     } else {
@@ -3102,7 +4130,7 @@
 }
 
 static MemOperand GetExceptionTlsAddress() {
-  return MemOperand(tr, Thread::ExceptionOffset<kArm64WordSize>().Int32Value());
+  return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
 }
 
 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
@@ -3123,43 +4151,98 @@
   __ Str(wzr, GetExceptionTlsAddress());
 }
 
-void LocationsBuilderARM64::VisitLoadLocal(HLoadLocal* load) {
-  load->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
+HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
+    HLoadString::LoadKind desired_string_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_string_load_kind) {
+      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadString::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadString::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_string_load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageAddress:
+      break;
+    case HLoadString::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadString::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_string_load_kind;
 }
 
 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
-  locations->SetInAt(0, Location::RequiresRegister());
+  LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
+  if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
+  Register out = OutputRegister(load);
+
+  switch (load->GetLoadKind()) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
+                                                              load->GetStringIndex()));
+      return;  // No dex cache slow path.
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      // Add ADRP with its PC-relative String patch.
+      const DexFile& dex_file = load->GetDexFile();
+      uint32_t string_index = load->GetStringIndex();
+      vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
+      {
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(adrp_label);
+        __ adrp(out.X(), /* offset placeholder */ 0);
+      }
+      // Add ADD with its PC-relative String patch.
+      vixl::aarch64::Label* add_label =
+          codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
+      {
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(add_label);
+        __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0));
+      }
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK(load->GetAddress() != 0u && IsUint<32>(load->GetAddress()));
+      __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(load->GetAddress()));
+      return;  // No dex cache slow path.
+    }
+    default:
+      break;
+  }
+
+  // TODO: Re-add the compiler code to do string dex cache lookup again.
   SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
   codegen_->AddSlowPath(slow_path);
-
-  Register out = OutputRegister(load);
-  Register current_method = InputRegisterAt(load, 0);
-  __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset()));
-  __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex())));
-  // TODO: We will need a read barrier here.
-  __ Cbz(out, slow_path->GetEntryLabel());
+  __ B(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
-void LocationsBuilderARM64::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM64::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   locations->SetOut(Location::ConstantLocation(constant));
@@ -3171,7 +4254,7 @@
 
 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
 }
@@ -3182,7 +4265,11 @@
       instruction,
       instruction->GetDexPc(),
       nullptr);
-  CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderARM64::VisitMul(HMul* mul) {
@@ -3265,14 +4352,12 @@
 
 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
   locations->SetOut(LocationFrom(x0));
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2)));
-  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck,
-                       void*, uint32_t, int32_t, ArtMethod*>();
 }
 
 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
@@ -3292,26 +4377,35 @@
 
 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(LocationFrom(kArtMethodRegister));
+  } else {
+    locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  }
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
 }
 
 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt);
-  DCHECK(type_index.Is(w0));
-  __ Mov(type_index, instruction->GetTypeIndex());
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
-  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
-                          instruction,
-                          instruction->GetDexPc(),
-                          nullptr);
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    Location temp = instruction->GetLocations()->GetTemp(0);
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
+    __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString)));
+    __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value()));
+    __ Blr(lr);
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr);
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  }
 }
 
 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
@@ -3339,7 +4433,7 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
-  __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::Operand(1));
+  __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
 }
 
 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
@@ -3353,20 +4447,20 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
-  if (codegen_->CanMoveNullCheckToUser(instruction)) {
+void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
+  if (CanMoveNullCheckToUser(instruction)) {
     return;
   }
 
   BlockPoolsScope block_pools(GetVIXLAssembler());
   Location obj = instruction->GetLocations()->InAt(0);
   __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
-  codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  RecordPcInfo(instruction, instruction->GetDexPc());
 }
 
-void InstructionCodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
+void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
   SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM64(instruction);
-  codegen_->AddSlowPath(slow_path);
+  AddSlowPath(slow_path);
 
   LocationSummary* locations = instruction->GetLocations();
   Location obj = locations->InAt(0);
@@ -3375,11 +4469,7 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
-  if (codegen_->IsImplicitNullCheckAllowed(instruction)) {
-    GenerateImplicitNullCheck(instruction);
-  } else {
-    GenerateExplicitNullCheck(instruction);
-  }
+  codegen_->GenerateNullCheck(instruction);
 }
 
 void LocationsBuilderARM64::VisitOr(HOr* instruction) {
@@ -3427,7 +4517,7 @@
 
 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
@@ -3440,7 +4530,8 @@
 void LocationsBuilderARM64::VisitRem(HRem* rem) {
   Primitive::Type type = rem->GetResultType();
   LocationSummary::CallKind call_kind =
-      Primitive::IsFloatingPointType(type) ? LocationSummary::kCall : LocationSummary::kNoCall;
+      Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
+                                           : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
   switch (type) {
@@ -3481,11 +4572,17 @@
       int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
                                                              : QUICK_ENTRY_POINT(pFmod);
       codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc(), nullptr);
+      if (type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickFmodf, float, float, float>();
+      } else {
+        CheckEntrypointTypes<kQuickFmod, double, double, double>();
+      }
       break;
     }
 
     default:
       LOG(FATAL) << "Unexpected rem type " << type;
+      UNREACHABLE();
   }
 }
 
@@ -3494,7 +4591,7 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
-  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
 }
 
 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
@@ -3515,6 +4612,14 @@
   codegen_->GenerateFrameExit();
 }
 
+void LocationsBuilderARM64::VisitRor(HRor* ror) {
+  HandleBinaryOp(ror);
+}
+
+void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
+  HandleBinaryOp(ror);
+}
+
 void LocationsBuilderARM64::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
@@ -3531,33 +4636,6 @@
   HandleShift(shr);
 }
 
-void LocationsBuilderARM64::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
-  Primitive::Type field_type = store->InputAt(1)->GetType();
-  switch (field_type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unimplemented local type " << field_type;
-  }
-}
-
-void InstructionCodeGeneratorARM64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
   HandleBinaryOp(instruction);
 }
@@ -3668,17 +4746,9 @@
   GenerateSuspendCheck(instruction, nullptr);
 }
 
-void LocationsBuilderARM64::VisitTemporary(HTemporary* temp) {
-  temp->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM64::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
 }
@@ -3725,9 +4795,7 @@
     int min_size = std::min(result_size, input_size);
     Register output = OutputRegister(conversion);
     Register source = InputRegisterAt(conversion, 0);
-    if ((result_type == Primitive::kPrimChar) && (input_size < result_size)) {
-      __ Ubfx(output, source, 0, result_size * kBitsPerByte);
-    } else if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
+    if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
       // 'int' values are used directly as W registers, discarding the top
       // bits, so we don't need to sign-extend and can just perform a move.
       // We do not pass the `kDiscardForSameWReg` argument to force clearing the
@@ -3736,9 +4804,11 @@
       // 32bit input value as a 64bit value assuming that the top 32 bits are
       // zero.
       __ Mov(output.W(), source.W());
-    } else if ((result_type == Primitive::kPrimChar) ||
-               ((input_type == Primitive::kPrimChar) && (result_size > input_size))) {
-      __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
+    } else if (result_type == Primitive::kPrimChar ||
+               (input_type == Primitive::kPrimChar && input_size < result_size)) {
+      __ Ubfx(output,
+              output.IsX() ? source.X() : source.W(),
+              0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte);
     } else {
       __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
     }
@@ -3782,18 +4852,6 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void LocationsBuilderARM64::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorARM64::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
-}
-
 // Simple implementation of packed switch - generate cascaded compare/jumps.
 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   LocationSummary* locations =
@@ -3803,29 +4861,487 @@
 
 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
+  uint32_t num_entries = switch_instr->GetNumEntries();
   Register value_reg = InputRegisterAt(switch_instr, 0);
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  // Create a series of compare/jumps.
-  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int32_t i = 0; i < num_entries; i++) {
-    int32_t case_value = lower_bound + i;
-    vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
-    if (case_value == 0) {
-      __ Cbz(value_reg, succ);
+  // Roughly set 16 as max average assemblies generated per HIR in a graph.
+  static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
+  // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
+  // make sure we don't emit it if the target may run out of range.
+  // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
+  // ranges and emit the tables only as required.
+  static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
+
+  if (num_entries <= kPackedSwitchCompareJumpThreshold ||
+      // Current instruction id is an upper bound of the number of HIRs in the graph.
+      GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
+    // Create a series of compare/jumps.
+    UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+    Register temp = temps.AcquireW();
+    __ Subs(temp, value_reg, Operand(lower_bound));
+
+    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+    // Jump to successors[0] if value == lower_bound.
+    __ B(eq, codegen_->GetLabelOf(successors[0]));
+    int32_t last_index = 0;
+    for (; num_entries - last_index > 2; last_index += 2) {
+      __ Subs(temp, temp, Operand(2));
+      // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+      __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
+      // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+      __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
+    }
+    if (num_entries - last_index == 2) {
+      // The last missing case_value.
+      __ Cmp(temp, Operand(1));
+      __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
+    }
+
+    // And the default for any other value.
+    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+      __ B(codegen_->GetLabelOf(default_block));
+    }
+  } else {
+    JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
+
+    UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+
+    // Below instructions should use at most one blocked register. Since there are two blocked
+    // registers, we are free to block one.
+    Register temp_w = temps.AcquireW();
+    Register index;
+    // Remove the bias.
+    if (lower_bound != 0) {
+      index = temp_w;
+      __ Sub(index, value_reg, Operand(lower_bound));
     } else {
-      __ Cmp(value_reg, vixl::Operand(case_value));
-      __ B(eq, succ);
+      index = value_reg;
+    }
+
+    // Jump to default block if index is out of the range.
+    __ Cmp(index, Operand(num_entries));
+    __ B(hs, codegen_->GetLabelOf(default_block));
+
+    // In current VIXL implementation, it won't require any blocked registers to encode the
+    // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
+    // register pressure.
+    Register table_base = temps.AcquireX();
+    // Load jump offset from the table.
+    __ Adr(table_base, jump_table->GetTableStartLabel());
+    Register jump_offset = temp_w;
+    __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
+
+    // Jump to target block by branching to table_base(pc related) + offset.
+    Register target_address = table_base;
+    __ Add(target_address, table_base, Operand(jump_offset, SXTW));
+    __ Br(target_address);
+  }
+}
+
+void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                                                     Location out,
+                                                                     uint32_t offset,
+                                                                     Location maybe_temp) {
+  Primitive::Type type = Primitive::kPrimNot;
+  Register out_reg = RegisterFrom(out, type);
+  if (kEmitCompilerReadBarrier) {
+    Register temp_reg = RegisterFrom(maybe_temp, type);
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      out_reg,
+                                                      offset,
+                                                      temp_reg,
+                                                      /* needs_null_check */ false,
+                                                      /* use_load_acquire */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `maybe_temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ Mov(temp_reg, out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ Ldr(out_reg, HeapOperand(out_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ Ldr(out_reg, HeapOperand(out_reg, offset));
+    GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                                                      Location out,
+                                                                      Location obj,
+                                                                      uint32_t offset,
+                                                                      Location maybe_temp) {
+  Primitive::Type type = Primitive::kPrimNot;
+  Register out_reg = RegisterFrom(out, type);
+  Register obj_reg = RegisterFrom(obj, type);
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      Register temp_reg = RegisterFrom(maybe_temp, type);
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      obj_reg,
+                                                      offset,
+                                                      temp_reg,
+                                                      /* needs_null_check */ false,
+                                                      /* use_load_acquire */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ Ldr(out_reg, HeapOperand(obj_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ Ldr(out_reg, HeapOperand(obj_reg, offset));
+    GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                            Location root,
+                                                            Register obj,
+                                                            uint32_t offset,
+                                                            vixl::aarch64::Label* fixup_label) {
+  Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   if (Thread::Current()->GetIsGcMarking()) {
+      //     root = ReadBarrier::Mark(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      if (fixup_label == nullptr) {
+        __ Ldr(root_reg, MemOperand(obj, offset));
+      } else {
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(fixup_label);
+        __ ldr(root_reg, MemOperand(obj, offset));
+      }
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path used to mark the GC root `root`.
+      SlowPathCodeARM64* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root);
+      codegen_->AddSlowPath(slow_path);
+
+      MacroAssembler* masm = GetVIXLAssembler();
+      UseScratchRegisterScope temps(masm);
+      Register temp = temps.AcquireW();
+      // temp = Thread::Current()->GetIsGcMarking()
+      __ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64PointerSize>().Int32Value()));
+      __ Cbnz(temp, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      if (fixup_label == nullptr) {
+        __ Add(root_reg.X(), obj.X(), offset);
+      } else {
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(fixup_label);
+        __ add(root_reg.X(), obj.X(), offset);
+      }
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    if (fixup_label == nullptr) {
+      __ Ldr(root_reg, MemOperand(obj, offset));
+    } else {
+      SingleEmissionCheckScope guard(GetVIXLAssembler());
+      __ Bind(fixup_label);
+      __ ldr(root_reg, MemOperand(obj, offset));
+    }
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
+  }
+}
+
+void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                               Location ref,
+                                                               Register obj,
+                                                               uint32_t offset,
+                                                               Register temp,
+                                                               bool needs_null_check,
+                                                               bool use_load_acquire) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Location no_index = Location::NoLocation();
+  size_t no_scale_factor = 0U;
+  GenerateReferenceLoadWithBakerReadBarrier(instruction,
+                                            ref,
+                                            obj,
+                                            offset,
+                                            no_index,
+                                            no_scale_factor,
+                                            temp,
+                                            needs_null_check,
+                                            use_load_acquire);
+}
+
+void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                               Location ref,
+                                                               Register obj,
+                                                               uint32_t data_offset,
+                                                               Location index,
+                                                               Register temp,
+                                                               bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // Array cells are never volatile variables, therefore array loads
+  // never use Load-Acquire instructions on ARM64.
+  const bool use_load_acquire = false;
+
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction,
+                                            ref,
+                                            obj,
+                                            data_offset,
+                                            index,
+                                            scale_factor,
+                                            temp,
+                                            needs_null_check,
+                                            use_load_acquire);
+}
+
+void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                   Location ref,
+                                                                   Register obj,
+                                                                   uint32_t offset,
+                                                                   Location index,
+                                                                   size_t scale_factor,
+                                                                   Register temp,
+                                                                   bool needs_null_check,
+                                                                   bool use_load_acquire) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+  // If we are emitting an array load, we should not be using a
+  // Load Acquire instruction.  In other words:
+  // `instruction->IsArrayGet()` => `!use_load_acquire`.
+  DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
+
+  MacroAssembler* masm = GetVIXLAssembler();
+  UseScratchRegisterScope temps(masm);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as it performs additional checks that we do
+  // not do here for performance reasons.
+
+  Primitive::Type type = Primitive::kPrimNot;
+  Register ref_reg = RegisterFrom(ref, type);
+  DCHECK(obj.IsW());
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ Ldr(temp, HeapOperand(obj, monitor_offset));
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+
+  // Introduce a dependency on the lock_word including rb_state,
+  // to prevent load-load reordering, and without using
+  // a memory barrier (which would be more expensive).
+  // `obj` is unchanged by this operation, but its value now depends
+  // on `temp`.
+  __ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32));
+
+  // The actual reference load.
+  if (index.IsValid()) {
+    // Load types involving an "index".
+    if (use_load_acquire) {
+      // UnsafeGetObjectVolatile intrinsic case.
+      // Register `index` is not an index in an object array, but an
+      // offset to an object reference field within object `obj`.
+      DCHECK(instruction->IsInvoke()) << instruction->DebugName();
+      DCHECK(instruction->GetLocations()->Intrinsified());
+      DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)
+          << instruction->AsInvoke()->GetIntrinsic();
+      DCHECK_EQ(offset, 0U);
+      DCHECK_EQ(scale_factor, 0U);
+      DCHECK_EQ(needs_null_check, 0U);
+      // /* HeapReference<Object> */ ref = *(obj + index)
+      MemOperand field = HeapOperand(obj, XRegisterFrom(index));
+      LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
+    } else {
+      // ArrayGet and UnsafeGetObject intrinsics cases.
+      // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+      if (index.IsConstant()) {
+        uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
+        Load(type, ref_reg, HeapOperand(obj, computed_offset));
+      } else {
+        Register temp2 = temps.AcquireW();
+        __ Add(temp2, obj, offset);
+        Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor));
+        temps.Release(temp2);
+      }
+    }
+  } else {
+    // /* HeapReference<Object> */ ref = *(obj + offset)
+    MemOperand field = HeapOperand(obj, offset);
+    if (use_load_acquire) {
+      LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
+    } else {
+      Load(type, ref_reg, field);
     }
   }
 
-  // And the default for any other value.
-  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-    __ B(codegen_->GetLabelOf(default_block));
+  // Object* ref = ref_addr->AsMirrorPtr()
+  GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path used to mark the object `ref` when it is gray.
+  SlowPathCodeARM64* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref);
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  // Given the numeric representation, it's enough to check the low bit of the rb_state.
+  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  __ Tbnz(temp, LockWord::kReadBarrierStateShift, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
+                                                 Location out,
+                                                 Location ref,
+                                                 Location obj,
+                                                 uint32_t offset,
+                                                 Location index) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Insert a slow path based read barrier *after* the reference load.
+  //
+  // If heap poisoning is enabled, the unpoisoning of the loaded
+  // reference will be carried out by the runtime within the slow
+  // path.
+  //
+  // Note that `ref` currently does not get unpoisoned (when heap
+  // poisoning is enabled), which is alright as the `ref` argument is
+  // not used by the artReadBarrierSlow entry point.
+  //
+  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
+      ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
+  AddSlowPath(slow_path);
+
+  __ B(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                      Location out,
+                                                      Location ref,
+                                                      Location obj,
+                                                      uint32_t offset,
+                                                      Location index) {
+  if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
   }
 }
 
+void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                        Location out,
+                                                        Location root) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCodeARM64* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  __ B(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kArm64PointerSize).SizeValue();
+    __ Ldr(XRegisterFrom(locations->Out()),
+           MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
+  } else {
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex(), kArm64PointerSize));
+    __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
+        mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
+    __ Ldr(XRegisterFrom(locations->Out()),
+           MemOperand(XRegisterFrom(locations->Out()), method_offset));
+  }
+}
+
+
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 799f1bd..921ce10 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -17,16 +17,22 @@
 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
 
+#include "arch/arm64/quick_method_frame_info_arm64.h"
 #include "code_generator.h"
 #include "common_arm64.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
+#include "string_reference.h"
 #include "utils/arm64/assembler_arm64.h"
-#include "vixl/a64/disasm-a64.h"
-#include "vixl/a64/macro-assembler-a64.h"
-#include "arch/arm64/quick_method_frame_info_arm64.h"
+#include "utils/type_reference.h"
+
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#pragma GCC diagnostic pop
 
 namespace art {
 namespace arm64 {
@@ -34,63 +40,110 @@
 class CodeGeneratorARM64;
 
 // Use a local definition to prevent copying mistakes.
-static constexpr size_t kArm64WordSize = kArm64PointerSize;
+static constexpr size_t kArm64WordSize = static_cast<size_t>(kArm64PointerSize);
 
-static const vixl::Register kParameterCoreRegisters[] = {
-  vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7
+static const vixl::aarch64::Register kParameterCoreRegisters[] = {
+  vixl::aarch64::x1,
+  vixl::aarch64::x2,
+  vixl::aarch64::x3,
+  vixl::aarch64::x4,
+  vixl::aarch64::x5,
+  vixl::aarch64::x6,
+  vixl::aarch64::x7
 };
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
-static const vixl::FPRegister kParameterFPRegisters[] = {
-  vixl::d0, vixl::d1, vixl::d2, vixl::d3, vixl::d4, vixl::d5, vixl::d6, vixl::d7
+static const vixl::aarch64::FPRegister kParameterFPRegisters[] = {
+  vixl::aarch64::d0,
+  vixl::aarch64::d1,
+  vixl::aarch64::d2,
+  vixl::aarch64::d3,
+  vixl::aarch64::d4,
+  vixl::aarch64::d5,
+  vixl::aarch64::d6,
+  vixl::aarch64::d7
 };
 static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
 
-const vixl::Register tr = vixl::x19;                        // Thread Register
-static const vixl::Register kArtMethodRegister = vixl::x0;  // Method register on invoke.
+// Thread Register
+const vixl::aarch64::Register tr = vixl::aarch64::x19;
+// Method register on invoke.
+static const vixl::aarch64::Register kArtMethodRegister = vixl::aarch64::x0;
+const vixl::aarch64::CPURegList vixl_reserved_core_registers(vixl::aarch64::ip0,
+                                                             vixl::aarch64::ip1);
+const vixl::aarch64::CPURegList vixl_reserved_fp_registers(vixl::aarch64::d31);
 
-const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1);
-const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31);
-
-const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr);
+const vixl::aarch64::CPURegList runtime_reserved_core_registers(tr, vixl::aarch64::lr);
 
 // Callee-saved registers AAPCS64 (without x19 - Thread Register)
-const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister,
-                                                   vixl::kXRegSize,
-                                                   vixl::x20.code(),
-                                                   vixl::x30.code());
-const vixl::CPURegList callee_saved_fp_registers(vixl::CPURegister::kFPRegister,
-                                                 vixl::kDRegSize,
-                                                 vixl::d8.code(),
-                                                 vixl::d15.code());
+const vixl::aarch64::CPURegList callee_saved_core_registers(vixl::aarch64::CPURegister::kRegister,
+                                                            vixl::aarch64::kXRegSize,
+                                                            vixl::aarch64::x20.GetCode(),
+                                                            vixl::aarch64::x30.GetCode());
+const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kFPRegister,
+                                                          vixl::aarch64::kDRegSize,
+                                                          vixl::aarch64::d8.GetCode(),
+                                                          vixl::aarch64::d15.GetCode());
 Location ARM64ReturnLocation(Primitive::Type return_type);
 
 class SlowPathCodeARM64 : public SlowPathCode {
  public:
-  SlowPathCodeARM64() : entry_label_(), exit_label_() {}
+  explicit SlowPathCodeARM64(HInstruction* instruction)
+      : SlowPathCode(instruction), entry_label_(), exit_label_() {}
 
-  vixl::Label* GetEntryLabel() { return &entry_label_; }
-  vixl::Label* GetExitLabel() { return &exit_label_; }
+  vixl::aarch64::Label* GetEntryLabel() { return &entry_label_; }
+  vixl::aarch64::Label* GetExitLabel() { return &exit_label_; }
 
   void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE;
   void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE;
 
  private:
-  vixl::Label entry_label_;
-  vixl::Label exit_label_;
+  vixl::aarch64::Label entry_label_;
+  vixl::aarch64::Label exit_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM64);
 };
 
-static const vixl::Register kRuntimeParameterCoreRegisters[] =
-    { vixl::x0, vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7 };
+class JumpTableARM64 : public DeletableArenaObject<kArenaAllocSwitchTable> {
+ public:
+  explicit JumpTableARM64(HPackedSwitch* switch_instr)
+    : switch_instr_(switch_instr), table_start_() {}
+
+  vixl::aarch64::Label* GetTableStartLabel() { return &table_start_; }
+
+  void EmitTable(CodeGeneratorARM64* codegen);
+
+ private:
+  HPackedSwitch* const switch_instr_;
+  vixl::aarch64::Label table_start_;
+
+  DISALLOW_COPY_AND_ASSIGN(JumpTableARM64);
+};
+
+static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] =
+    { vixl::aarch64::x0,
+      vixl::aarch64::x1,
+      vixl::aarch64::x2,
+      vixl::aarch64::x3,
+      vixl::aarch64::x4,
+      vixl::aarch64::x5,
+      vixl::aarch64::x6,
+      vixl::aarch64::x7 };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
-static const vixl::FPRegister kRuntimeParameterFpuRegisters[] =
-    { vixl::d0, vixl::d1, vixl::d2, vixl::d3, vixl::d4, vixl::d5, vixl::d6, vixl::d7 };
+static const vixl::aarch64::FPRegister kRuntimeParameterFpuRegisters[] =
+    { vixl::aarch64::d0,
+      vixl::aarch64::d1,
+      vixl::aarch64::d2,
+      vixl::aarch64::d3,
+      vixl::aarch64::d4,
+      vixl::aarch64::d5,
+      vixl::aarch64::d6,
+      vixl::aarch64::d7 };
 static constexpr size_t kRuntimeParameterFpuRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
 
-class InvokeRuntimeCallingConvention : public CallingConvention<vixl::Register, vixl::FPRegister> {
+class InvokeRuntimeCallingConvention : public CallingConvention<vixl::aarch64::Register,
+                                                                vixl::aarch64::FPRegister> {
  public:
   static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
 
@@ -107,7 +160,8 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
 };
 
-class InvokeDexCallingConvention : public CallingConvention<vixl::Register, vixl::FPRegister> {
+class InvokeDexCallingConvention : public CallingConvention<vixl::aarch64::Register,
+                                                            vixl::aarch64::FPRegister> {
  public:
   InvokeDexCallingConvention()
       : CallingConvention(kParameterCoreRegisters,
@@ -147,30 +201,30 @@
   FieldAccessCallingConventionARM64() {}
 
   Location GetObjectLocation() const OVERRIDE {
-    return helpers::LocationFrom(vixl::x1);
+    return helpers::LocationFrom(vixl::aarch64::x1);
   }
   Location GetFieldIndexLocation() const OVERRIDE {
-    return helpers::LocationFrom(vixl::x0);
+    return helpers::LocationFrom(vixl::aarch64::x0);
   }
   Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
-    return helpers::LocationFrom(vixl::x0);
+    return helpers::LocationFrom(vixl::aarch64::x0);
   }
   Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
     return Primitive::Is64BitType(type)
-        ? helpers::LocationFrom(vixl::x2)
+        ? helpers::LocationFrom(vixl::aarch64::x2)
         : (is_instance
-            ? helpers::LocationFrom(vixl::x2)
-            : helpers::LocationFrom(vixl::x1));
+            ? helpers::LocationFrom(vixl::aarch64::x2)
+            : helpers::LocationFrom(vixl::aarch64::x1));
   }
   Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
-    return helpers::LocationFrom(vixl::d0);
+    return helpers::LocationFrom(vixl::aarch64::d0);
   }
 
  private:
   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARM64);
 };
 
-class InstructionCodeGeneratorARM64 : public HGraphVisitor {
+class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen);
 
@@ -179,6 +233,7 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -188,24 +243,67 @@
   }
 
   Arm64Assembler* GetAssembler() const { return assembler_; }
-  vixl::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
 
  private:
-  void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
+  void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
+                                        vixl::aarch64::Register class_reg);
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* instr);
+
   void HandleFieldSet(HInstruction* instruction,
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+  void HandleCondition(HCondition* instruction);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a read barrier and
+  // shall be a register in that case; it may be an invalid location
+  // otherwise.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location maybe_temp);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location maybe_temp);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               vixl::aarch64::Register obj,
+                               uint32_t offset,
+                               vixl::aarch64::Label* fixup_label = nullptr);
+
+  // Generate a floating-point comparison.
+  void GenerateFcmp(HInstruction* instruction);
+
   void HandleShift(HBinaryOperation* instr);
-  void GenerateImplicitNullCheck(HNullCheck* instruction);
-  void GenerateExplicitNullCheck(HNullCheck* instruction);
   void GenerateTestAndBranch(HInstruction* instruction,
-                             vixl::Label* true_target,
-                             vixl::Label* false_target,
-                             vixl::Label* always_true_target);
+                             size_t condition_input_index,
+                             vixl::aarch64::Label* true_target,
+                             vixl::aarch64::Label* false_target);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivRemByPowerOfTwo(HBinaryOperation* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
@@ -228,6 +326,7 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -241,6 +340,7 @@
   void HandleFieldSet(HInstruction* instruction);
   void HandleFieldGet(HInstruction* instruction);
   void HandleInvoke(HInvoke* instr);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* instr);
 
   CodeGeneratorARM64* const codegen_;
@@ -263,12 +363,12 @@
 
  private:
   Arm64Assembler* GetAssembler() const;
-  vixl::MacroAssembler* GetVIXLAssembler() const {
-    return GetAssembler()->vixl_masm_;
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler() const {
+    return GetAssembler()->GetVIXLAssembler();
   }
 
   CodeGeneratorARM64* const codegen_;
-  vixl::UseScratchRegisterScope vixl_temps_;
+  vixl::aarch64::UseScratchRegisterScope vixl_temps_;
 
   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM64);
 };
@@ -284,17 +384,16 @@
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
 
-  vixl::CPURegList GetFramePreservedCoreRegisters() const;
-  vixl::CPURegList GetFramePreservedFPRegisters() const;
+  vixl::aarch64::CPURegList GetFramePreservedCoreRegisters() const;
+  vixl::aarch64::CPURegList GetFramePreservedFPRegisters() const;
 
   void Bind(HBasicBlock* block) OVERRIDE;
 
-  vixl::Label* GetLabelOf(HBasicBlock* block) const {
-    return CommonGetLabelOf<vixl::Label>(block_labels_, block);
+  vixl::aarch64::Label* GetLabelOf(HBasicBlock* block) {
+    block = FirstNonEmptyBlock(block);
+    return &(block_labels_[block->GetBlockId()]);
   }
 
-  void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
-
   size_t GetWordSize() const OVERRIDE {
     return kArm64WordSize;
   }
@@ -304,29 +403,28 @@
     return kArm64WordSize;
   }
 
-  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
-    vixl::Label* block_entry_label = GetLabelOf(block);
+  uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
+    vixl::aarch64::Label* block_entry_label = GetLabelOf(block);
     DCHECK(block_entry_label->IsBound());
-    return block_entry_label->location();
+    return block_entry_label->GetLocation();
   }
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
   HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
   Arm64Assembler* GetAssembler() OVERRIDE { return &assembler_; }
   const Arm64Assembler& GetAssembler() const OVERRIDE { return assembler_; }
-  vixl::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
 
   // Emit a write barrier.
-  void MarkGCCard(vixl::Register object, vixl::Register value, bool value_can_be_null);
+  void MarkGCCard(vixl::aarch64::Register object,
+                  vixl::aarch64::Register value,
+                  bool value_can_be_null);
+
+  void GenerateMemoryBarrier(MemBarrierKind kind);
 
   // Register allocation.
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  // AllocateFreeRegister() is only used when allocating registers locally
-  // during CompileBaseline().
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
-
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
@@ -339,8 +437,8 @@
   // (xzr, wzr), or make for poor allocatable registers (sp alignment
   // requirements, etc.). This also facilitates our task as all other registers
   // can easily be mapped via to or from their type and index or code.
-  static const int kNumberOfAllocatableRegisters = vixl::kNumberOfRegisters - 1;
-  static const int kNumberOfAllocatableFPRegisters = vixl::kNumberOfFPRegisters;
+  static const int kNumberOfAllocatableRegisters = vixl::aarch64::kNumberOfRegisters - 1;
+  static const int kNumberOfAllocatableFPRegisters = vixl::aarch64::kNumberOfFPRegisters;
   static constexpr int kNumberOfAllocatableRegisterPairs = 0;
 
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
@@ -355,21 +453,39 @@
   }
 
   void Initialize() OVERRIDE {
-    block_labels_ = CommonInitializeLabels<vixl::Label>();
+    block_labels_.resize(GetGraph()->GetBlocks().size());
+  }
+
+  // We want to use the STP and LDP instructions to spill and restore registers for slow paths.
+  // These instructions can only encode offsets that are multiples of the register size accessed.
+  uint32_t GetPreferredSlotsAlignment() const OVERRIDE { return vixl::aarch64::kXRegSizeInBytes; }
+
+  JumpTableARM64* CreateJumpTable(HPackedSwitch* switch_instr) {
+    jump_tables_.emplace_back(new (GetGraph()->GetArena()) JumpTableARM64(switch_instr));
+    return jump_tables_.back().get();
   }
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
 
   // Code generation helpers.
-  void MoveConstant(vixl::CPURegister destination, HConstant* constant);
+  void MoveConstant(vixl::aarch64::CPURegister destination, HConstant* constant);
   void MoveConstant(Location destination, int32_t value) OVERRIDE;
   void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
 
-  void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
-  void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
-  void LoadAcquire(HInstruction* instruction, vixl::CPURegister dst, const vixl::MemOperand& src);
-  void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
+  void Load(Primitive::Type type,
+            vixl::aarch64::CPURegister dst,
+            const vixl::aarch64::MemOperand& src);
+  void Store(Primitive::Type type,
+             vixl::aarch64::CPURegister src,
+             const vixl::aarch64::MemOperand& dst);
+  void LoadAcquire(HInstruction* instruction,
+                   vixl::aarch64::CPURegister dst,
+                   const vixl::aarch64::MemOperand& src,
+                   bool needs_null_check);
+  void StoreRelease(Primitive::Type type,
+                    vixl::aarch64::CPURegister src,
+                    const vixl::aarch64::MemOperand& dst);
 
   // Generate code to invoke a runtime entry point.
   void InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -382,12 +498,34 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
   ParallelMoveResolverARM64* GetMoveResolver() OVERRIDE { return &move_resolver_; }
 
   bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
     return false;
   }
 
+  // Check if the desired_string_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadString::LoadKind GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
+  // Check if the desired_dispatch_info is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method) OVERRIDE;
+
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
@@ -396,35 +534,170 @@
     UNIMPLEMENTED(FATAL);
   }
 
+  // Add a new PC-relative string patch for an instruction and return the label
+  // to be bound before the instruction. The instruction will be either the
+  // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
+  // to the associated ADRP patch label).
+  vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                 uint32_t string_index,
+                                                 vixl::aarch64::Label* adrp_label = nullptr);
+
+  // Add a new PC-relative type patch for an instruction and return the label
+  // to be bound before the instruction. The instruction will be either the
+  // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
+  // to the associated ADRP patch label).
+  vixl::aarch64::Label* NewPcRelativeTypePatch(const DexFile& dex_file,
+                                               uint32_t type_index,
+                                               vixl::aarch64::Label* adrp_label = nullptr);
+
+  // Add a new PC-relative dex cache array patch for an instruction and return
+  // the label to be bound before the instruction. The instruction will be
+  // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label`
+  // pointing to the associated ADRP patch label).
+  vixl::aarch64::Label* NewPcRelativeDexCacheArrayPatch(
+      const DexFile& dex_file,
+      uint32_t element_offset,
+      vixl::aarch64::Label* adrp_label = nullptr);
+
+  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+                                                                      uint32_t string_index);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
+                                                                    uint32_t type_index);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
+  vixl::aarch64::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address);
+
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             vixl::aarch64::Register obj,
+                                             uint32_t offset,
+                                             vixl::aarch64::Register temp,
+                                             bool needs_null_check,
+                                             bool use_load_acquire);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             vixl::aarch64::Register obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             vixl::aarch64::Register temp,
+                                             bool needs_null_check);
+  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 vixl::aarch64::Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 size_t scale_factor,
+                                                 vixl::aarch64::Register temp,
+                                                 bool needs_null_check,
+                                                 bool use_load_acquire);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+
+  void GenerateNop();
+
+  void GenerateImplicitNullCheck(HNullCheck* instruction);
+  void GenerateExplicitNullCheck(HNullCheck* instruction);
+
  private:
-  using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>;
+  using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
+  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
   using MethodToLiteralMap = ArenaSafeMap<MethodReference,
-                                          vixl::Literal<uint64_t>*,
+                                          vixl::aarch64::Literal<uint64_t>*,
                                           MethodReferenceComparator>;
+  using BootStringToLiteralMap = ArenaSafeMap<StringReference,
+                                              vixl::aarch64::Literal<uint32_t>*,
+                                              StringReferenceValueComparator>;
+  using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
+                                            vixl::aarch64::Literal<uint32_t>*,
+                                            TypeReferenceValueComparator>;
 
-  vixl::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
-  vixl::Literal<uint64_t>* DeduplicateMethodLiteral(MethodReference target_method,
-                                                    MethodToLiteralMap* map);
-  vixl::Literal<uint64_t>* DeduplicateMethodAddressLiteral(MethodReference target_method);
-  vixl::Literal<uint64_t>* DeduplicateMethodCodeLiteral(MethodReference target_method);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value,
+                                                             Uint32ToLiteralMap* map);
+  vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
+  vixl::aarch64::Literal<uint64_t>* DeduplicateMethodLiteral(MethodReference target_method,
+                                                             MethodToLiteralMap* map);
+  vixl::aarch64::Literal<uint64_t>* DeduplicateMethodAddressLiteral(MethodReference target_method);
+  vixl::aarch64::Literal<uint64_t>* DeduplicateMethodCodeLiteral(MethodReference target_method);
 
-  struct PcRelativeDexCacheAccessInfo {
-    PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
-        : target_dex_file(dex_file), element_offset(element_off), label(), pc_insn_label() { }
+  // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
+  // and boot image strings/types. The only difference is the interpretation of the
+  // offset_or_index.
+  struct PcRelativePatchInfo {
+    PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx)
+        : target_dex_file(dex_file), offset_or_index(off_or_idx), label(), pc_insn_label() { }
 
     const DexFile& target_dex_file;
-    uint32_t element_offset;
-    // NOTE: Labels are bound to the end of the patched instruction because
-    // we don't know if there will be a veneer or how big it will be.
-    vixl::Label label;
-    vixl::Label* pc_insn_label;
+    // Either the dex cache array element offset or the string/type index.
+    uint32_t offset_or_index;
+    vixl::aarch64::Label label;
+    vixl::aarch64::Label* pc_insn_label;
   };
 
+  vixl::aarch64::Label* NewPcRelativePatch(const DexFile& dex_file,
+                                           uint32_t offset_or_index,
+                                           vixl::aarch64::Label* adrp_label,
+                                           ArenaDeque<PcRelativePatchInfo>* patches);
+
+  void EmitJumpTables();
+
   // Labels for each block that will be compiled.
-  vixl::Label* block_labels_;  // Indexed by block id.
-  vixl::Label frame_entry_label_;
+  // We use a deque so that the `vixl::aarch64::Label` objects do not move in memory.
+  ArenaDeque<vixl::aarch64::Label> block_labels_;  // Indexed by block id.
+  vixl::aarch64::Label frame_entry_label_;
+  ArenaVector<std::unique_ptr<JumpTableARM64>> jump_tables_;
 
   LocationsBuilderARM64 location_builder_;
   InstructionCodeGeneratorARM64 instruction_visitor_;
@@ -432,16 +705,29 @@
   Arm64Assembler assembler_;
   const Arm64InstructionSetFeatures& isa_features_;
 
-  // Deduplication map for 64-bit literals, used for non-patchable method address and method code.
+  // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
+  Uint32ToLiteralMap uint32_literals_;
+  // Deduplication map for 64-bit literals, used for non-patchable method address, method code
+  // or string dex cache address.
   Uint64ToLiteralMap uint64_literals_;
   // Method patch info, map MethodReference to a literal for method address and method code.
   MethodToLiteralMap method_patches_;
   MethodToLiteralMap call_patches_;
   // Relative call patch info.
   // Using ArenaDeque<> which retains element addresses on push/emplace_back().
-  ArenaDeque<MethodPatchInfo<vixl::Label>> relative_call_patches_;
+  ArenaDeque<MethodPatchInfo<vixl::aarch64::Label>> relative_call_patches_;
   // PC-relative DexCache access info.
-  ArenaDeque<PcRelativeDexCacheAccessInfo> pc_rel_dex_cache_patches_;
+  ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
+  // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
+  BootStringToLiteralMap boot_image_string_patches_;
+  // PC-relative String patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
+  BootTypeToLiteralMap boot_image_type_patches_;
+  // PC-relative type patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
+  // Deduplication map for patchable boot image addresses.
+  Uint32ToLiteralMap boot_image_address_patches_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
 };
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 8ba4556..a7fbc84 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -19,10 +19,12 @@
 #include "arch/mips/entrypoints_direct_mips.h"
 #include "arch/mips/instruction_set_features_mips.h"
 #include "art_method.h"
+#include "code_generator_utils.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "gc/accounting/card_table.h"
 #include "intrinsics.h"
+#include "intrinsics_mips.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "offsets.h"
@@ -37,12 +39,9 @@
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = A0;
 
-// We need extra temporary/scratch registers (in addition to AT) in some cases.
-static constexpr Register TMP = T8;
-static constexpr FRegister FTMP = F8;
-
-// ART Thread Register.
-static constexpr Register TR = S1;
+// We'll maximize the range of a single load instruction for dex cache array accesses
+// by aligning offset -32768 with the offset of the first used element.
+static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000;
 
 Location MipsReturnLocation(Primitive::Type return_type) {
   switch (return_type) {
@@ -146,12 +145,13 @@
   return MipsReturnLocation(type);
 }
 
-#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()->  // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value()
 
 class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit BoundsCheckSlowPathMIPS(HBoundsCheck* instruction) : instruction_(instruction) {}
+  explicit BoundsCheckSlowPathMIPS(HBoundsCheck* instruction) : SlowPathCodeMIPS(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -170,11 +170,15 @@
                                locations->InAt(1),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
                                Primitive::kPrimInt);
-    mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    mips_codegen->InvokeRuntime(entry_point_offset,
                                 instruction_,
                                 instruction_->GetDexPc(),
                                 this,
                                 IsDirectEntrypoint(kQuickThrowArrayBounds));
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
@@ -183,14 +187,12 @@
   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathMIPS"; }
 
  private:
-  HBoundsCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS);
 };
 
 class DivZeroCheckSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit DivZeroCheckSlowPathMIPS(HDivZeroCheck* instruction) : instruction_(instruction) {}
+  explicit DivZeroCheckSlowPathMIPS(HDivZeroCheck* instruction) : SlowPathCodeMIPS(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
@@ -212,7 +214,6 @@
   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathMIPS"; }
 
  private:
-  HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS);
 };
 
@@ -222,7 +223,7 @@
                         HInstruction* at,
                         uint32_t dex_pc,
                         bool do_clinit)
-      : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCodeMIPS(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
@@ -281,7 +282,7 @@
 
 class LoadStringSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit LoadStringSlowPathMIPS(HLoadString* instruction) : instruction_(instruction) {}
+  explicit LoadStringSlowPathMIPS(HLoadString* instruction) : SlowPathCodeMIPS(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -292,7 +293,8 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ LoadConst32(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    __ LoadConst32(calling_convention.GetRegisterAt(0), string_index);
     mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
                                 instruction_,
                                 instruction_->GetDexPc(),
@@ -311,14 +313,12 @@
   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS"; }
 
  private:
-  HLoadString* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS);
 };
 
 class NullCheckSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit NullCheckSlowPathMIPS(HNullCheck* instr) : instruction_(instr) {}
+  explicit NullCheckSlowPathMIPS(HNullCheck* instr) : SlowPathCodeMIPS(instr) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
@@ -340,27 +340,23 @@
   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathMIPS"; }
 
  private:
-  HNullCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS);
 };
 
 class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS {
  public:
   SuspendCheckSlowPathMIPS(HSuspendCheck* instruction, HBasicBlock* successor)
-      : instruction_(instruction), successor_(successor) {}
+      : SlowPathCodeMIPS(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
                                 instruction_,
                                 instruction_->GetDexPc(),
                                 this,
                                 IsDirectEntrypoint(kQuickTestSuspend));
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ B(GetReturnLabel());
     } else {
@@ -376,7 +372,6 @@
   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS"; }
 
  private:
-  HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
   HBasicBlock* const successor_;
 
@@ -388,7 +383,7 @@
 
 class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit TypeCheckSlowPathMIPS(HInstruction* instruction) : instruction_(instruction) {}
+  explicit TypeCheckSlowPathMIPS(HInstruction* instruction) : SlowPathCodeMIPS(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -417,13 +412,11 @@
                                   dex_pc,
                                   this,
                                   IsDirectEntrypoint(kQuickInstanceofNonTrivial));
+      CheckEntrypointTypes<
+          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial,
-                           uint32_t,
-                           const mirror::Class*,
-                           const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
@@ -441,34 +434,29 @@
   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS"; }
 
  private:
-  HInstruction* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS);
 };
 
 class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit DeoptimizationSlowPathMIPS(HInstruction* instruction)
-    : instruction_(instruction) {}
+  explicit DeoptimizationSlowPathMIPS(HDeoptimize* instruction)
+    : SlowPathCodeMIPS(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
     mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
                                 instruction_,
-                                dex_pc,
+                                instruction_->GetDexPc(),
                                 this,
                                 IsDirectEntrypoint(kQuickDeoptimize));
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; }
 
  private:
-  HInstruction* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS);
 };
 
@@ -490,15 +478,32 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
-      assembler_(&isa_features),
-      isa_features_(isa_features) {
+      assembler_(graph->GetArena(), &isa_features),
+      isa_features_(isa_features),
+      uint32_literals_(std::less<uint32_t>(),
+                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      method_patches_(MethodReferenceComparator(),
+                      graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      call_patches_(MethodReferenceComparator(),
+                    graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_string_patches_(StringReferenceValueComparator(),
+                                 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_type_patches_(TypeReferenceValueComparator(),
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_address_patches_(std::less<uint32_t>(),
+                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      clobbered_ra_(false) {
   // Save RA (containing the return address) to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(RA));
 }
 
 #undef __
-#define __ down_cast<MipsAssembler*>(GetAssembler())->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<MipsAssembler*>(GetAssembler())->  // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value()
 
 void CodeGeneratorMIPS::Finalize(CodeAllocator* allocator) {
   // Ensure that we fix up branches.
@@ -611,15 +616,40 @@
     // then swap the high 32 bits of the same FPR. mtc1 makes the high 32 bits of an FPR
     // unpredictable and the following mfch1 will fail.
     __ Mfc1(TMP, f1);
-    __ Mfhc1(AT, f1);
+    __ MoveFromFpuHigh(AT, f1);
     __ Mtc1(r2_l, f1);
-    __ Mthc1(r2_h, f1);
+    __ MoveToFpuHigh(r2_h, f1);
     __ Move(r2_l, TMP);
     __ Move(r2_h, AT);
   } else if (loc1.IsStackSlot() && loc2.IsStackSlot()) {
     Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ false);
   } else if (loc1.IsDoubleStackSlot() && loc2.IsDoubleStackSlot()) {
     Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ true);
+  } else if ((loc1.IsRegister() && loc2.IsStackSlot()) ||
+             (loc1.IsStackSlot() && loc2.IsRegister())) {
+    Register reg = loc1.IsRegister() ? loc1.AsRegister<Register>()
+                                     : loc2.AsRegister<Register>();
+    intptr_t offset = loc1.IsStackSlot() ? loc1.GetStackIndex()
+                                         : loc2.GetStackIndex();
+    __ Move(TMP, reg);
+    __ LoadFromOffset(kLoadWord, reg, SP, offset);
+    __ StoreToOffset(kStoreWord, TMP, SP, offset);
+  } else if ((loc1.IsRegisterPair() && loc2.IsDoubleStackSlot()) ||
+             (loc1.IsDoubleStackSlot() && loc2.IsRegisterPair())) {
+    Register reg_l = loc1.IsRegisterPair() ? loc1.AsRegisterPairLow<Register>()
+                                           : loc2.AsRegisterPairLow<Register>();
+    Register reg_h = loc1.IsRegisterPair() ? loc1.AsRegisterPairHigh<Register>()
+                                           : loc2.AsRegisterPairHigh<Register>();
+    intptr_t offset_l = loc1.IsDoubleStackSlot() ? loc1.GetStackIndex()
+                                                 : loc2.GetStackIndex();
+    intptr_t offset_h = loc1.IsDoubleStackSlot() ? loc1.GetHighStackIndex(kMipsWordSize)
+                                                 : loc2.GetHighStackIndex(kMipsWordSize);
+    __ Move(TMP, reg_l);
+    __ LoadFromOffset(kLoadWord, reg_l, SP, offset_l);
+    __ StoreToOffset(kStoreWord, TMP, SP, offset_l);
+    __ Move(TMP, reg_h);
+    __ LoadFromOffset(kLoadWord, reg_h, SP, offset_h);
+    __ StoreToOffset(kStoreWord, TMP, SP, offset_h);
   } else {
     LOG(FATAL) << "Swap between " << loc1 << " and " << loc2 << " is unsupported";
   }
@@ -657,6 +687,28 @@
   }
 }
 
+void CodeGeneratorMIPS::ComputeSpillMask() {
+  core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
+  fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
+  DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
+  // If there're FPU callee-saved registers and there's an odd number of GPR callee-saved
+  // registers, include the ZERO register to force alignment of FPU callee-saved registers
+  // within the stack frame.
+  if ((fpu_spill_mask_ != 0) && (POPCOUNT(core_spill_mask_) % 2 != 0)) {
+    core_spill_mask_ |= (1 << ZERO);
+  }
+  // If RA is clobbered by PC-relative operations on R2 and it's the only spilled register
+  // (this can happen in leaf methods), artificially spill the ZERO register in order to
+  // force explicit saving and restoring of RA. RA isn't saved/restored when it's the only
+  // spilled register.
+  // TODO: Can this be improved? It causes creation of a stack frame (while RA might be
+  // saved in an unused temporary register) and saving of RA and the current method pointer
+  // in the frame.
+  if (clobbered_ra_ && core_spill_mask_ == (1u << RA) && fpu_spill_mask_ == 0) {
+    core_spill_mask_ |= (1 << ZERO);
+  }
+}
+
 static dwarf::Reg DWARFReg(Register reg) {
   return dwarf::Reg::MipsCore(static_cast<int>(reg));
 }
@@ -686,105 +738,61 @@
   }
 
   // Spill callee-saved registers.
-  // Note that their cumulative size is small and they can be indexed using
-  // 16-bit offsets.
 
-  // TODO: increment/decrement SP in one step instead of two or remove this comment.
-
-  uint32_t ofs = FrameEntrySpillSize();
-  bool unaligned_float = ofs & 0x7;
-  bool fpu_32bit = isa_features_.Is32BitFloatingPoint();
+  uint32_t ofs = GetFrameSize();
   __ IncreaseFrameSize(ofs);
 
-  for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
-    Register reg = kCoreCalleeSaves[i];
-    if (allocated_registers_.ContainsCoreRegister(reg)) {
-      ofs -= kMipsWordSize;
-      __ Sw(reg, SP, ofs);
+  for (uint32_t mask = core_spill_mask_; mask != 0; ) {
+    Register reg = static_cast<Register>(MostSignificantBit(mask));
+    mask ^= 1u << reg;
+    ofs -= kMipsWordSize;
+    // The ZERO register is only included for alignment.
+    if (reg != ZERO) {
+      __ StoreToOffset(kStoreWord, reg, SP, ofs);
       __ cfi().RelOffset(DWARFReg(reg), ofs);
     }
   }
 
-  for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
-    FRegister reg = kFpuCalleeSaves[i];
-    if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
-      ofs -= kMipsDoublewordSize;
-      // TODO: Change the frame to avoid unaligned accesses for fpu registers.
-      if (unaligned_float) {
-        if (fpu_32bit) {
-          __ Swc1(reg, SP, ofs);
-          __ Swc1(static_cast<FRegister>(reg + 1), SP, ofs + 4);
-        } else {
-          __ Mfhc1(TMP, reg);
-          __ Swc1(reg, SP, ofs);
-          __ Sw(TMP, SP, ofs + 4);
-        }
-      } else {
-        __ Sdc1(reg, SP, ofs);
-      }
-      // TODO: __ cfi().RelOffset(DWARFReg(reg), ofs);
-    }
+  for (uint32_t mask = fpu_spill_mask_; mask != 0; ) {
+    FRegister reg = static_cast<FRegister>(MostSignificantBit(mask));
+    mask ^= 1u << reg;
+    ofs -= kMipsDoublewordSize;
+    __ StoreDToOffset(reg, SP, ofs);
+    // TODO: __ cfi().RelOffset(DWARFReg(reg), ofs);
   }
 
-  // Allocate the rest of the frame and store the current method pointer
-  // at its end.
-
-  __ IncreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
-
-  static_assert(IsInt<16>(kCurrentMethodStackOffset),
-                "kCurrentMethodStackOffset must fit into int16_t");
-  __ Sw(kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+  // Store the current method pointer.
+  __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
 }
 
 void CodeGeneratorMIPS::GenerateFrameExit() {
   __ cfi().RememberState();
 
   if (!HasEmptyFrame()) {
-    // Deallocate the rest of the frame.
-
-    __ DecreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
-
     // Restore callee-saved registers.
-    // Note that their cumulative size is small and they can be indexed using
-    // 16-bit offsets.
 
-    // TODO: increment/decrement SP in one step instead of two or remove this comment.
-
-    uint32_t ofs = 0;
-    bool unaligned_float = FrameEntrySpillSize() & 0x7;
-    bool fpu_32bit = isa_features_.Is32BitFloatingPoint();
-
-    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-      FRegister reg = kFpuCalleeSaves[i];
-      if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
-        if (unaligned_float) {
-          if (fpu_32bit) {
-            __ Lwc1(reg, SP, ofs);
-            __ Lwc1(static_cast<FRegister>(reg + 1), SP, ofs + 4);
-          } else {
-            __ Lwc1(reg, SP, ofs);
-            __ Lw(TMP, SP, ofs + 4);
-            __ Mthc1(TMP, reg);
-          }
-        } else {
-          __ Ldc1(reg, SP, ofs);
-        }
-        ofs += kMipsDoublewordSize;
-        // TODO: __ cfi().Restore(DWARFReg(reg));
-      }
-    }
-
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      Register reg = kCoreCalleeSaves[i];
-      if (allocated_registers_.ContainsCoreRegister(reg)) {
-        __ Lw(reg, SP, ofs);
-        ofs += kMipsWordSize;
+    // For better instruction scheduling restore RA before other registers.
+    uint32_t ofs = GetFrameSize();
+    for (uint32_t mask = core_spill_mask_; mask != 0; ) {
+      Register reg = static_cast<Register>(MostSignificantBit(mask));
+      mask ^= 1u << reg;
+      ofs -= kMipsWordSize;
+      // The ZERO register is only included for alignment.
+      if (reg != ZERO) {
+        __ LoadFromOffset(kLoadWord, reg, SP, ofs);
         __ cfi().Restore(DWARFReg(reg));
       }
     }
 
-    DCHECK_EQ(ofs, FrameEntrySpillSize());
-    __ DecreaseFrameSize(ofs);
+    for (uint32_t mask = fpu_spill_mask_; mask != 0; ) {
+      FRegister reg = static_cast<FRegister>(MostSignificantBit(mask));
+      mask ^= 1u << reg;
+      ofs -= kMipsDoublewordSize;
+      __ LoadDFromOffset(reg, SP, ofs);
+      // TODO: __ cfi().Restore(DWARFReg(reg));
+    }
+
+    __ DecreaseFrameSize(GetFrameSize());
   }
 
   __ Jr(RA);
@@ -865,7 +873,7 @@
       Register dst_low =  destination.AsRegisterPairLow<Register>();
       FRegister src = source.AsFpuRegister<FRegister>();
       __ Mfc1(dst_low, src);
-      __ Mfhc1(dst_high, src);
+      __ MoveFromFpuHigh(dst_high, src);
     } else {
       DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination;
       int32_t off = source.GetStackIndex();
@@ -878,7 +886,7 @@
       Register src_high = source.AsRegisterPairHigh<Register>();
       Register src_low = source.AsRegisterPairLow<Register>();
       __ Mtc1(src_low, dst);
-      __ Mthc1(src_high, dst);
+      __ MoveToFpuHigh(src_high, dst);
     } else if (source.IsFpuRegister()) {
       __ MovD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
     } else {
@@ -957,77 +965,176 @@
   __ LoadConst32(dst, value);
 }
 
-void CodeGeneratorMIPS::Move(HInstruction* instruction,
-                             Location location,
-                             HInstruction* move_for) {
-  LocationSummary* locations = instruction->GetLocations();
-  Primitive::Type type = instruction->GetType();
-  DCHECK_NE(type, Primitive::kPrimVoid);
-
-  if (instruction->IsCurrentMethod()) {
-    Move32(location, Location::StackSlot(kCurrentMethodStackOffset));
-  } else if (locations != nullptr && locations->Out().Equals(location)) {
-    return;
-  } else if (instruction->IsIntConstant()
-             || instruction->IsLongConstant()
-             || instruction->IsNullConstant()) {
-    MoveConstant(location, instruction->AsConstant());
-  } else if (instruction->IsTemporary()) {
-    Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    if (temp_location.IsStackSlot()) {
-      Move32(location, temp_location);
-    } else {
-      DCHECK(temp_location.IsDoubleStackSlot());
-      Move64(location, temp_location);
-    }
-  } else if (instruction->IsLoadLocal()) {
-    uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
-    if (Primitive::Is64BitType(type)) {
-      Move64(location, Location::DoubleStackSlot(stack_slot));
-    } else {
-      Move32(location, Location::StackSlot(stack_slot));
-    }
-  } else {
-    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
-    if (Primitive::Is64BitType(type)) {
-      Move64(location, locations->Out());
-    } else {
-      Move32(location, locations->Out());
-    }
-  }
-}
-
 void CodeGeneratorMIPS::AddLocationAsTemp(Location location, LocationSummary* locations) {
   if (location.IsRegister()) {
     locations->AddTemp(location);
+  } else if (location.IsRegisterPair()) {
+    locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
+    locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
   } else {
     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
   }
 }
 
-Location CodeGeneratorMIPS::GetStackLocation(HLoadLocal* load) const {
-  Primitive::Type type = load->GetType();
-
-  switch (type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << type;
+void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
+  DCHECK(linker_patches->empty());
+  size_t size =
+      method_patches_.size() +
+      call_patches_.size() +
+      pc_relative_dex_cache_patches_.size() +
+      pc_relative_string_patches_.size() +
+      pc_relative_type_patches_.size() +
+      boot_image_string_patches_.size() +
+      boot_image_type_patches_.size() +
+      boot_image_address_patches_.size();
+  linker_patches->reserve(size);
+  for (const auto& entry : method_patches_) {
+    const MethodReference& target_method = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
+                                                       target_method.dex_file,
+                                                       target_method.dex_method_index));
   }
+  for (const auto& entry : call_patches_) {
+    const MethodReference& target_method = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::CodePatch(literal_offset,
+                                                     target_method.dex_file,
+                                                     target_method.dex_method_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    size_t base_element_offset = info.offset_or_index;
+    DCHECK(info.high_label.IsBound());
+    uint32_t high_offset = __ GetLabelLocation(&info.high_label);
+    DCHECK(info.pc_rel_label.IsBound());
+    uint32_t pc_rel_offset = __ GetLabelLocation(&info.pc_rel_label);
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(high_offset,
+                                                              &dex_file,
+                                                              pc_rel_offset,
+                                                              base_element_offset));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_string_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    size_t string_index = info.offset_or_index;
+    DCHECK(info.high_label.IsBound());
+    uint32_t high_offset = __ GetLabelLocation(&info.high_label);
+    // On R2 we use HMipsComputeBaseMethodAddress and patch relative to
+    // the assembler's base label used for PC-relative literals.
+    uint32_t pc_rel_offset = info.pc_rel_label.IsBound()
+        ? __ GetLabelLocation(&info.pc_rel_label)
+        : __ GetPcRelBaseLabelLocation();
+    linker_patches->push_back(LinkerPatch::RelativeStringPatch(high_offset,
+                                                               &dex_file,
+                                                               pc_rel_offset,
+                                                               string_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_type_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    size_t type_index = info.offset_or_index;
+    DCHECK(info.high_label.IsBound());
+    uint32_t high_offset = __ GetLabelLocation(&info.high_label);
+    // On R2 we use HMipsComputeBaseMethodAddress and patch relative to
+    // the assembler's base label used for PC-relative literals.
+    uint32_t pc_rel_offset = info.pc_rel_label.IsBound()
+        ? __ GetLabelLocation(&info.pc_rel_label)
+        : __ GetPcRelBaseLabelLocation();
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(high_offset,
+                                                             &dex_file,
+                                                             pc_rel_offset,
+                                                             type_index));
+  }
+  for (const auto& entry : boot_image_string_patches_) {
+    const StringReference& target_string = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
+                                                       target_string.dex_file,
+                                                       target_string.string_index));
+  }
+  for (const auto& entry : boot_image_type_patches_) {
+    const TypeReference& target_type = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
+                                                     target_type.dex_file,
+                                                     target_type.type_index));
+  }
+  for (const auto& entry : boot_image_address_patches_) {
+    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
+  }
+}
 
-  LOG(FATAL) << "Unreachable";
-  return Location::NoLocation();
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch(
+    const DexFile& dex_file, uint32_t string_index) {
+  return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_);
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch(
+    const DexFile& dex_file, uint32_t type_index) {
+  return NewPcRelativePatch(dex_file, type_index, &pc_relative_type_patches_);
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch(
+    const DexFile& dex_file, uint32_t element_offset) {
+  return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch(
+    const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) {
+  patches->emplace_back(dex_file, offset_or_index);
+  return &patches->back();
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map) {
+  return map->GetOrCreate(
+      value,
+      [this, value]() { return __ NewLiteral<uint32_t>(value); });
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateMethodLiteral(MethodReference target_method,
+                                                     MethodToLiteralMap* map) {
+  return map->GetOrCreate(
+      target_method,
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateMethodAddressLiteral(MethodReference target_method) {
+  return DeduplicateMethodLiteral(target_method, &method_patches_);
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateMethodCodeLiteral(MethodReference target_method) {
+  return DeduplicateMethodLiteral(target_method, &call_patches_);
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+                                                              uint32_t string_index) {
+  return boot_image_string_patches_.GetOrCreate(
+      StringReference(&dex_file, string_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
+                                                            uint32_t type_index) {
+  return boot_image_type_patches_.GetOrCreate(
+      TypeReference(&dex_file, type_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) {
+  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
+  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
 }
 
 void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) {
@@ -1038,14 +1145,14 @@
   __ LoadFromOffset(kLoadWord,
                     card,
                     TR,
-                    Thread::CardTableOffset<kMipsWordSize>().Int32Value());
+                    Thread::CardTableOffset<kMipsPointerSize>().Int32Value());
   __ Srl(temp, object, gc::accounting::CardTable::kCardShift);
   __ Addu(temp, card, temp);
   __ Sb(card, temp, 0);
   __ Bind(&done);
 }
 
-void CodeGeneratorMIPS::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorMIPS::SetupBlockedRegisters() const {
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs_[A1_A2] = true;
 
@@ -1075,11 +1182,10 @@
     blocked_fpu_registers_[i] = true;
   }
 
-  if (is_baseline) {
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-    }
-
+  if (GetGraph()->IsDebuggable()) {
+    // Stubs do not save callee-save floating point registers. If the graph
+    // is debuggable, we need to deal with these registers differently. For
+    // now, just block them.
     for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
       blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
     }
@@ -1099,52 +1205,6 @@
   }
 }
 
-Location CodeGeneratorMIPS::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong: {
-      size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
-      MipsManagedRegister pair =
-          MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-
-      blocked_core_registers_[pair.AsRegisterPairLow()] = true;
-      blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
-      UpdateBlockedPairRegisters();
-      return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
-    }
-
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters);
-      // Block all register pairs that contain `reg`.
-      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-        MipsManagedRegister current =
-            MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
-          blocked_register_pairs_[i] = true;
-        }
-      }
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFRegisters);
-      return Location::FpuRegisterLocation(reg);
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  UNREACHABLE();
-}
-
 size_t CodeGeneratorMIPS::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreToOffset(kStoreWord, Register(reg_id), SP, stack_index);
   return kMipsWordSize;
@@ -1166,18 +1226,18 @@
 }
 
 void CodeGeneratorMIPS::DumpCoreRegister(std::ostream& stream, int reg) const {
-  stream << MipsManagedRegister::FromCoreRegister(Register(reg));
+  stream << Register(reg);
 }
 
 void CodeGeneratorMIPS::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
-  stream << MipsManagedRegister::FromFRegister(FRegister(reg));
+  stream << FRegister(reg);
 }
 
 void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint,
                                       HInstruction* instruction,
                                       uint32_t dex_pc,
                                       SlowPathCode* slow_path) {
-  InvokeRuntime(GetThreadOffset<kMipsWordSize>(entrypoint).Int32Value(),
+  InvokeRuntime(GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value(),
                 instruction,
                 dex_pc,
                 slow_path,
@@ -1191,17 +1251,16 @@
                                       uint32_t dex_pc,
                                       SlowPathCode* slow_path,
                                       bool is_direct_entrypoint) {
+  __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
+  __ Jalr(T9);
   if (is_direct_entrypoint) {
     // Reserve argument space on stack (for $a0-$a3) for
     // entrypoints that directly reference native implementations.
     // Called function may use this space to store $a0-$a3 regs.
-    __ IncreaseFrameSize(kMipsDirectEntrypointRuntimeOffset);
-  }
-  __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
-  __ Jalr(T9);
-  __ Nop();
-  if (is_direct_entrypoint) {
+    __ IncreaseFrameSize(kMipsDirectEntrypointRuntimeOffset);  // Single instruction in delay slot.
     __ DecreaseFrameSize(kMipsDirectEntrypointRuntimeOffset);
+  } else {
+    __ Nop();  // In delay slot.
   }
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
@@ -1229,7 +1288,7 @@
   __ LoadFromOffset(kLoadUnsignedHalfword,
                     TMP,
                     TR,
-                    Thread::ThreadFlagsOffset<kMipsWordSize>().Int32Value());
+                    Thread::ThreadFlagsOffset<kMipsPointerSize>().Int32Value());
   if (successor == nullptr) {
     __ Bnez(TMP, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -1242,7 +1301,7 @@
 
 InstructionCodeGeneratorMIPS::InstructionCodeGeneratorMIPS(HGraph* graph,
                                                            CodeGeneratorMIPS* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -1275,15 +1334,9 @@
     }
 
     case Primitive::kPrimLong: {
-      // TODO: can 2nd param be const?
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
-      if (instruction->IsAdd() || instruction->IsSub()) {
-        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-      } else {
-        DCHECK(instruction->IsAnd() || instruction->IsOr() || instruction->IsXor());
-        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-      }
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
 
@@ -1350,34 +1403,142 @@
     }
 
     case Primitive::kPrimLong: {
-      // TODO: can 2nd param be const?
       Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
       Register dst_low = locations->Out().AsRegisterPairLow<Register>();
       Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
       Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>();
-      Register rhs_high = locations->InAt(1).AsRegisterPairHigh<Register>();
-      Register rhs_low = locations->InAt(1).AsRegisterPairLow<Register>();
-
-      if (instruction->IsAnd()) {
-        __ And(dst_low, lhs_low, rhs_low);
-        __ And(dst_high, lhs_high, rhs_high);
-      } else if (instruction->IsOr()) {
-        __ Or(dst_low, lhs_low, rhs_low);
-        __ Or(dst_high, lhs_high, rhs_high);
-      } else if (instruction->IsXor()) {
-        __ Xor(dst_low, lhs_low, rhs_low);
-        __ Xor(dst_high, lhs_high, rhs_high);
-      } else if (instruction->IsAdd()) {
-        __ Addu(dst_low, lhs_low, rhs_low);
-        __ Sltu(TMP, dst_low, lhs_low);
-        __ Addu(dst_high, lhs_high, rhs_high);
-        __ Addu(dst_high, dst_high, TMP);
+      Location rhs_location = locations->InAt(1);
+      bool use_imm = rhs_location.IsConstant();
+      if (!use_imm) {
+        Register rhs_high = rhs_location.AsRegisterPairHigh<Register>();
+        Register rhs_low = rhs_location.AsRegisterPairLow<Register>();
+        if (instruction->IsAnd()) {
+          __ And(dst_low, lhs_low, rhs_low);
+          __ And(dst_high, lhs_high, rhs_high);
+        } else if (instruction->IsOr()) {
+          __ Or(dst_low, lhs_low, rhs_low);
+          __ Or(dst_high, lhs_high, rhs_high);
+        } else if (instruction->IsXor()) {
+          __ Xor(dst_low, lhs_low, rhs_low);
+          __ Xor(dst_high, lhs_high, rhs_high);
+        } else if (instruction->IsAdd()) {
+          if (lhs_low == rhs_low) {
+            // Special case for lhs = rhs and the sum potentially overwriting both lhs and rhs.
+            __ Slt(TMP, lhs_low, ZERO);
+            __ Addu(dst_low, lhs_low, rhs_low);
+          } else {
+            __ Addu(dst_low, lhs_low, rhs_low);
+            // If the sum overwrites rhs, lhs remains unchanged, otherwise rhs remains unchanged.
+            __ Sltu(TMP, dst_low, (dst_low == rhs_low) ? lhs_low : rhs_low);
+          }
+          __ Addu(dst_high, lhs_high, rhs_high);
+          __ Addu(dst_high, dst_high, TMP);
+        } else {
+          DCHECK(instruction->IsSub());
+          __ Sltu(TMP, lhs_low, rhs_low);
+          __ Subu(dst_low, lhs_low, rhs_low);
+          __ Subu(dst_high, lhs_high, rhs_high);
+          __ Subu(dst_high, dst_high, TMP);
+        }
       } else {
-        DCHECK(instruction->IsSub());
-        __ Subu(dst_low, lhs_low, rhs_low);
-        __ Sltu(TMP, lhs_low, dst_low);
-        __ Subu(dst_high, lhs_high, rhs_high);
-        __ Subu(dst_high, dst_high, TMP);
+        int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant());
+        if (instruction->IsOr()) {
+          uint32_t low = Low32Bits(value);
+          uint32_t high = High32Bits(value);
+          if (IsUint<16>(low)) {
+            if (dst_low != lhs_low || low != 0) {
+              __ Ori(dst_low, lhs_low, low);
+            }
+          } else {
+            __ LoadConst32(TMP, low);
+            __ Or(dst_low, lhs_low, TMP);
+          }
+          if (IsUint<16>(high)) {
+            if (dst_high != lhs_high || high != 0) {
+              __ Ori(dst_high, lhs_high, high);
+            }
+          } else {
+            if (high != low) {
+              __ LoadConst32(TMP, high);
+            }
+            __ Or(dst_high, lhs_high, TMP);
+          }
+        } else if (instruction->IsXor()) {
+          uint32_t low = Low32Bits(value);
+          uint32_t high = High32Bits(value);
+          if (IsUint<16>(low)) {
+            if (dst_low != lhs_low || low != 0) {
+              __ Xori(dst_low, lhs_low, low);
+            }
+          } else {
+            __ LoadConst32(TMP, low);
+            __ Xor(dst_low, lhs_low, TMP);
+          }
+          if (IsUint<16>(high)) {
+            if (dst_high != lhs_high || high != 0) {
+              __ Xori(dst_high, lhs_high, high);
+            }
+          } else {
+            if (high != low) {
+              __ LoadConst32(TMP, high);
+            }
+            __ Xor(dst_high, lhs_high, TMP);
+          }
+        } else if (instruction->IsAnd()) {
+          uint32_t low = Low32Bits(value);
+          uint32_t high = High32Bits(value);
+          if (IsUint<16>(low)) {
+            __ Andi(dst_low, lhs_low, low);
+          } else if (low != 0xFFFFFFFF) {
+            __ LoadConst32(TMP, low);
+            __ And(dst_low, lhs_low, TMP);
+          } else if (dst_low != lhs_low) {
+            __ Move(dst_low, lhs_low);
+          }
+          if (IsUint<16>(high)) {
+            __ Andi(dst_high, lhs_high, high);
+          } else if (high != 0xFFFFFFFF) {
+            if (high != low) {
+              __ LoadConst32(TMP, high);
+            }
+            __ And(dst_high, lhs_high, TMP);
+          } else if (dst_high != lhs_high) {
+            __ Move(dst_high, lhs_high);
+          }
+        } else {
+          if (instruction->IsSub()) {
+            value = -value;
+          } else {
+            DCHECK(instruction->IsAdd());
+          }
+          int32_t low = Low32Bits(value);
+          int32_t high = High32Bits(value);
+          if (IsInt<16>(low)) {
+            if (dst_low != lhs_low || low != 0) {
+              __ Addiu(dst_low, lhs_low, low);
+            }
+            if (low != 0) {
+              __ Sltiu(AT, dst_low, low);
+            }
+          } else {
+            __ LoadConst32(TMP, low);
+            __ Addu(dst_low, lhs_low, TMP);
+            __ Sltu(AT, dst_low, TMP);
+          }
+          if (IsInt<16>(high)) {
+            if (dst_high != lhs_high || high != 0) {
+              __ Addiu(dst_high, lhs_high, high);
+            }
+          } else {
+            if (high != low) {
+              __ LoadConst32(TMP, high);
+            }
+            __ Addu(dst_high, lhs_high, TMP);
+          }
+          if (low != 0) {
+            __ Addu(dst_high, dst_high, AT);
+          }
+        }
       }
       break;
     }
@@ -1410,18 +1571,21 @@
 }
 
 void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) {
-  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
   Primitive::Type type = instr->GetResultType();
   switch (type) {
     case Primitive::kPrimInt:
-    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
       locations->SetOut(Location::RequiresRegister());
       break;
-    }
     default:
       LOG(FATAL) << "Unexpected shift type " << type;
   }
@@ -1430,7 +1594,7 @@
 static constexpr size_t kMipsBitsPerWord = kMipsWordSize * kBitsPerByte;
 
 void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
-  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
   LocationSummary* locations = instr->GetLocations();
   Primitive::Type type = instr->GetType();
 
@@ -1438,28 +1602,57 @@
   bool use_imm = rhs_location.IsConstant();
   Register rhs_reg = use_imm ? ZERO : rhs_location.AsRegister<Register>();
   int64_t rhs_imm = use_imm ? CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()) : 0;
-  uint32_t shift_mask = (type == Primitive::kPrimInt) ? kMaxIntShiftValue : kMaxLongShiftValue;
-  uint32_t shift_value = rhs_imm & shift_mask;
+  const uint32_t shift_mask =
+      (type == Primitive::kPrimInt) ? kMaxIntShiftDistance : kMaxLongShiftDistance;
+  const uint32_t shift_value = rhs_imm & shift_mask;
+  // Are the INS (Insert Bit Field) and ROTR instructions supported?
+  bool has_ins_rotr = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
 
   switch (type) {
     case Primitive::kPrimInt: {
       Register dst = locations->Out().AsRegister<Register>();
       Register lhs = locations->InAt(0).AsRegister<Register>();
       if (use_imm) {
-        if (instr->IsShl()) {
+        if (shift_value == 0) {
+          if (dst != lhs) {
+            __ Move(dst, lhs);
+          }
+        } else if (instr->IsShl()) {
           __ Sll(dst, lhs, shift_value);
         } else if (instr->IsShr()) {
           __ Sra(dst, lhs, shift_value);
-        } else {
+        } else if (instr->IsUShr()) {
           __ Srl(dst, lhs, shift_value);
+        } else {
+          if (has_ins_rotr) {
+            __ Rotr(dst, lhs, shift_value);
+          } else {
+            __ Sll(TMP, lhs, (kMipsBitsPerWord - shift_value) & shift_mask);
+            __ Srl(dst, lhs, shift_value);
+            __ Or(dst, dst, TMP);
+          }
         }
       } else {
         if (instr->IsShl()) {
           __ Sllv(dst, lhs, rhs_reg);
         } else if (instr->IsShr()) {
           __ Srav(dst, lhs, rhs_reg);
-        } else {
+        } else if (instr->IsUShr()) {
           __ Srlv(dst, lhs, rhs_reg);
+        } else {
+          if (has_ins_rotr) {
+            __ Rotrv(dst, lhs, rhs_reg);
+          } else {
+            __ Subu(TMP, ZERO, rhs_reg);
+            // 32-bit shift instructions use the 5 least significant bits of the shift count, so
+            // shifting by `-rhs_reg` is equivalent to shifting by `(32 - rhs_reg) & 31`. The case
+            // when `rhs_reg & 31 == 0` is OK even though we don't shift `lhs` left all the way out
+            // by 32, because the result in this case is computed as `(lhs >> 0) | (lhs << 0)`,
+            // IOW, the OR'd values are equal.
+            __ Sllv(TMP, lhs, TMP);
+            __ Srlv(dst, lhs, rhs_reg);
+            __ Or(dst, dst, TMP);
+          }
         }
       }
       break;
@@ -1474,33 +1667,81 @@
           if (shift_value == 0) {
             codegen_->Move64(locations->Out(), locations->InAt(0));
           } else if (shift_value < kMipsBitsPerWord) {
-            if (instr->IsShl()) {
-              __ Sll(dst_low, lhs_low, shift_value);
-              __ Srl(TMP, lhs_low, kMipsBitsPerWord - shift_value);
-              __ Sll(dst_high, lhs_high, shift_value);
-              __ Or(dst_high, dst_high, TMP);
-            } else if (instr->IsShr()) {
-              __ Sra(dst_high, lhs_high, shift_value);
-              __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
-              __ Srl(dst_low, lhs_low, shift_value);
-              __ Or(dst_low, dst_low, TMP);
+            if (has_ins_rotr) {
+              if (instr->IsShl()) {
+                __ Srl(dst_high, lhs_low, kMipsBitsPerWord - shift_value);
+                __ Ins(dst_high, lhs_high, shift_value, kMipsBitsPerWord - shift_value);
+                __ Sll(dst_low, lhs_low, shift_value);
+              } else if (instr->IsShr()) {
+                __ Srl(dst_low, lhs_low, shift_value);
+                __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
+                __ Sra(dst_high, lhs_high, shift_value);
+              } else if (instr->IsUShr()) {
+                __ Srl(dst_low, lhs_low, shift_value);
+                __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
+                __ Srl(dst_high, lhs_high, shift_value);
+              } else {
+                __ Srl(dst_low, lhs_low, shift_value);
+                __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
+                __ Srl(dst_high, lhs_high, shift_value);
+                __ Ins(dst_high, lhs_low, kMipsBitsPerWord - shift_value, shift_value);
+              }
             } else {
-              __ Srl(dst_high, lhs_high, shift_value);
-              __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
-              __ Srl(dst_low, lhs_low, shift_value);
-              __ Or(dst_low, dst_low, TMP);
+              if (instr->IsShl()) {
+                __ Sll(dst_low, lhs_low, shift_value);
+                __ Srl(TMP, lhs_low, kMipsBitsPerWord - shift_value);
+                __ Sll(dst_high, lhs_high, shift_value);
+                __ Or(dst_high, dst_high, TMP);
+              } else if (instr->IsShr()) {
+                __ Sra(dst_high, lhs_high, shift_value);
+                __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
+                __ Srl(dst_low, lhs_low, shift_value);
+                __ Or(dst_low, dst_low, TMP);
+              } else if (instr->IsUShr()) {
+                __ Srl(dst_high, lhs_high, shift_value);
+                __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
+                __ Srl(dst_low, lhs_low, shift_value);
+                __ Or(dst_low, dst_low, TMP);
+              } else {
+                __ Srl(TMP, lhs_low, shift_value);
+                __ Sll(dst_low, lhs_high, kMipsBitsPerWord - shift_value);
+                __ Or(dst_low, dst_low, TMP);
+                __ Srl(TMP, lhs_high, shift_value);
+                __ Sll(dst_high, lhs_low, kMipsBitsPerWord - shift_value);
+                __ Or(dst_high, dst_high, TMP);
+              }
             }
           } else {
-            shift_value -= kMipsBitsPerWord;
+            const uint32_t shift_value_high = shift_value - kMipsBitsPerWord;
             if (instr->IsShl()) {
-              __ Sll(dst_high, lhs_low, shift_value);
+              __ Sll(dst_high, lhs_low, shift_value_high);
               __ Move(dst_low, ZERO);
             } else if (instr->IsShr()) {
-              __ Sra(dst_low, lhs_high, shift_value);
+              __ Sra(dst_low, lhs_high, shift_value_high);
               __ Sra(dst_high, dst_low, kMipsBitsPerWord - 1);
-            } else {
-              __ Srl(dst_low, lhs_high, shift_value);
+            } else if (instr->IsUShr()) {
+              __ Srl(dst_low, lhs_high, shift_value_high);
               __ Move(dst_high, ZERO);
+            } else {
+              if (shift_value == kMipsBitsPerWord) {
+                // 64-bit rotation by 32 is just a swap.
+                __ Move(dst_low, lhs_high);
+                __ Move(dst_high, lhs_low);
+              } else {
+                if (has_ins_rotr) {
+                  __ Srl(dst_low, lhs_high, shift_value_high);
+                  __ Ins(dst_low, lhs_low, kMipsBitsPerWord - shift_value_high, shift_value_high);
+                  __ Srl(dst_high, lhs_low, shift_value_high);
+                  __ Ins(dst_high, lhs_high, kMipsBitsPerWord - shift_value_high, shift_value_high);
+                } else {
+                  __ Sll(TMP, lhs_low, kMipsBitsPerWord - shift_value_high);
+                  __ Srl(dst_low, lhs_high, shift_value_high);
+                  __ Or(dst_low, dst_low, TMP);
+                  __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value_high);
+                  __ Srl(dst_high, lhs_low, shift_value_high);
+                  __ Or(dst_high, dst_high, TMP);
+                }
+              }
             }
           }
       } else {
@@ -1527,7 +1768,7 @@
           __ Beqz(TMP, &done);
           __ Move(dst_low, dst_high);
           __ Sra(dst_high, dst_high, 31);
-        } else {
+        } else if (instr->IsUShr()) {
           __ Srlv(dst_high, lhs_high, rhs_reg);
           __ Nor(AT, ZERO, rhs_reg);
           __ Sll(TMP, lhs_high, 1);
@@ -1538,6 +1779,21 @@
           __ Beqz(TMP, &done);
           __ Move(dst_low, dst_high);
           __ Move(dst_high, ZERO);
+        } else {
+          __ Nor(AT, ZERO, rhs_reg);
+          __ Srlv(TMP, lhs_low, rhs_reg);
+          __ Sll(dst_low, lhs_high, 1);
+          __ Sllv(dst_low, dst_low, AT);
+          __ Or(dst_low, dst_low, TMP);
+          __ Srlv(TMP, lhs_high, rhs_reg);
+          __ Sll(dst_high, lhs_low, 1);
+          __ Sllv(dst_high, dst_high, AT);
+          __ Or(dst_high, dst_high, TMP);
+          __ Andi(TMP, rhs_reg, kMipsBitsPerWord);
+          __ Beqz(TMP, &done);
+          __ Move(TMP, dst_high);
+          __ Move(dst_high, dst_low);
+          __ Move(dst_low, TMP);
         }
         __ Bind(&done);
       }
@@ -1581,11 +1837,11 @@
   LocationSummary* locations = instruction->GetLocations();
   Register obj = locations->InAt(0).AsRegister<Register>();
   Location index = locations->InAt(1);
-  Primitive::Type type = instruction->GetType();
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
 
+  Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1599,7 +1855,6 @@
     }
 
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1613,7 +1868,6 @@
     }
 
     case Primitive::kPrimShort: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1628,7 +1882,6 @@
     }
 
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1645,7 +1898,6 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
       DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1660,7 +1912,6 @@
     }
 
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       Register out = locations->Out().AsRegisterPairLow<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1675,7 +1926,6 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
       FRegister out = locations->Out().AsFpuRegister<FRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1690,7 +1940,6 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
       FRegister out = locations->Out().AsFpuRegister<FRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1719,7 +1968,7 @@
 
 void InstructionCodeGeneratorMIPS::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   Register obj = locations->InAt(0).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
   __ LoadFromOffset(kLoadWord, out, obj, offset);
@@ -1727,12 +1976,11 @@
 }
 
 void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) {
-  Primitive::Type value_type = instruction->GetComponentType();
-  bool is_object = value_type == Primitive::kPrimNot;
+  bool needs_runtime_call = instruction->NeedsTypeCheck();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      is_object ? LocationSummary::kCall : LocationSummary::kNoCall);
-  if (is_object) {
+      needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
+  if (needs_runtime_call) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
     locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
@@ -1957,13 +2205,15 @@
 void LocationsBuilderMIPS::VisitCompare(HCompare* compare) {
   Primitive::Type in_type = compare->InputAt(0)->GetType();
 
-  LocationSummary::CallKind call_kind = Primitive::IsFloatingPointType(in_type)
-      ? LocationSummary::kCall
-      : LocationSummary::kNoCall;
-
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, call_kind);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
 
   switch (in_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RequiresRegister());
@@ -1972,13 +2222,11 @@
       break;
 
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      InvokeRuntimeCallingConvention calling_convention;
-      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
-      locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
-      locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
-    }
 
     default:
       LOG(FATAL) << "Unexpected type for compare operation " << in_type;
@@ -1987,15 +2235,28 @@
 
 void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  Register res = locations->Out().AsRegister<Register>();
   Primitive::Type in_type = instruction->InputAt(0)->GetType();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   //  0 if: left == right
   //  1 if: left  > right
   // -1 if: left  < right
   switch (in_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
+      Register lhs = locations->InAt(0).AsRegister<Register>();
+      Register rhs = locations->InAt(1).AsRegister<Register>();
+      __ Slt(TMP, lhs, rhs);
+      __ Slt(res, rhs, lhs);
+      __ Subu(res, res, TMP);
+      break;
+    }
     case Primitive::kPrimLong: {
       MipsLabel done;
-      Register res = locations->Out().AsRegister<Register>();
       Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
       Register lhs_low  = locations->InAt(0).AsRegisterPairLow<Register>();
       Register rhs_high = locations->InAt(1).AsRegisterPairHigh<Register>();
@@ -2012,45 +2273,84 @@
       break;
     }
 
-    case Primitive::kPrimFloat:
+    case Primitive::kPrimFloat: {
+      bool gt_bias = instruction->IsGtBias();
+      FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+      FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+      MipsLabel done;
+      if (isR6) {
+        __ CmpEqS(FTMP, lhs, rhs);
+        __ LoadConst32(res, 0);
+        __ Bc1nez(FTMP, &done);
+        if (gt_bias) {
+          __ CmpLtS(FTMP, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Bc1nez(FTMP, &done);
+          __ LoadConst32(res, 1);
+        } else {
+          __ CmpLtS(FTMP, rhs, lhs);
+          __ LoadConst32(res, 1);
+          __ Bc1nez(FTMP, &done);
+          __ LoadConst32(res, -1);
+        }
+      } else {
+        if (gt_bias) {
+          __ ColtS(0, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Bc1t(0, &done);
+          __ CeqS(0, lhs, rhs);
+          __ LoadConst32(res, 1);
+          __ Movt(res, ZERO, 0);
+        } else {
+          __ ColtS(0, rhs, lhs);
+          __ LoadConst32(res, 1);
+          __ Bc1t(0, &done);
+          __ CeqS(0, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Movt(res, ZERO, 0);
+        }
+      }
+      __ Bind(&done);
+      break;
+    }
     case Primitive::kPrimDouble: {
-      int32_t entry_point_offset;
-      bool direct;
-      if (in_type == Primitive::kPrimFloat) {
-        if (instruction->IsGtBias()) {
-          entry_point_offset = QUICK_ENTRY_POINT(pCmpgFloat);
-          direct = IsDirectEntrypoint(kQuickCmpgFloat);
+      bool gt_bias = instruction->IsGtBias();
+      FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+      FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+      MipsLabel done;
+      if (isR6) {
+        __ CmpEqD(FTMP, lhs, rhs);
+        __ LoadConst32(res, 0);
+        __ Bc1nez(FTMP, &done);
+        if (gt_bias) {
+          __ CmpLtD(FTMP, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Bc1nez(FTMP, &done);
+          __ LoadConst32(res, 1);
         } else {
-          entry_point_offset = QUICK_ENTRY_POINT(pCmplFloat);
-          direct = IsDirectEntrypoint(kQuickCmplFloat);
+          __ CmpLtD(FTMP, rhs, lhs);
+          __ LoadConst32(res, 1);
+          __ Bc1nez(FTMP, &done);
+          __ LoadConst32(res, -1);
         }
       } else {
-        if (instruction->IsGtBias()) {
-          entry_point_offset = QUICK_ENTRY_POINT(pCmpgDouble);
-          direct = IsDirectEntrypoint(kQuickCmpgDouble);
+        if (gt_bias) {
+          __ ColtD(0, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Bc1t(0, &done);
+          __ CeqD(0, lhs, rhs);
+          __ LoadConst32(res, 1);
+          __ Movt(res, ZERO, 0);
         } else {
-          entry_point_offset = QUICK_ENTRY_POINT(pCmplDouble);
-          direct = IsDirectEntrypoint(kQuickCmplDouble);
+          __ ColtD(0, rhs, lhs);
+          __ LoadConst32(res, 1);
+          __ Bc1t(0, &done);
+          __ CeqD(0, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Movt(res, ZERO, 0);
         }
       }
-      codegen_->InvokeRuntime(entry_point_offset,
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr,
-                              direct);
-      if (in_type == Primitive::kPrimFloat) {
-        if (instruction->IsGtBias()) {
-          CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>();
-        } else {
-          CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>();
-        }
-      } else {
-        if (instruction->IsGtBias()) {
-          CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>();
-        } else {
-          CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>();
-        }
-      }
+      __ Bind(&done);
       break;
     }
 
@@ -2059,170 +2359,239 @@
   }
 }
 
-void LocationsBuilderMIPS::VisitCondition(HCondition* instruction) {
+void LocationsBuilderMIPS::HandleCondition(HCondition* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-  if (instruction->NeedsMaterialization()) {
+  switch (instruction->InputAt(0)->GetType()) {
+    default:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      break;
+  }
+  if (!instruction->IsEmittedAtUseSite()) {
     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
-void InstructionCodeGeneratorMIPS::VisitCondition(HCondition* instruction) {
-  if (!instruction->NeedsMaterialization()) {
+void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) {
+  if (instruction->IsEmittedAtUseSite()) {
     return;
   }
-  // TODO: generalize to long
-  DCHECK_NE(instruction->InputAt(0)->GetType(), Primitive::kPrimLong);
 
+  Primitive::Type type = instruction->InputAt(0)->GetType();
   LocationSummary* locations = instruction->GetLocations();
   Register dst = locations->Out().AsRegister<Register>();
+  MipsLabel true_label;
 
-  Register lhs = locations->InAt(0).AsRegister<Register>();
-  Location rhs_location = locations->InAt(1);
+  switch (type) {
+    default:
+      // Integer case.
+      GenerateIntCompare(instruction->GetCondition(), locations);
+      return;
 
-  Register rhs_reg = ZERO;
-  int64_t rhs_imm = 0;
-  bool use_imm = rhs_location.IsConstant();
-  if (use_imm) {
-    rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
-  } else {
-    rhs_reg = rhs_location.AsRegister<Register>();
+    case Primitive::kPrimLong:
+      // TODO: don't use branches.
+      GenerateLongCompareAndBranch(instruction->GetCondition(), locations, &true_label);
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      // TODO: don't use branches.
+      GenerateFpCompareAndBranch(instruction->GetCondition(),
+                                 instruction->IsGtBias(),
+                                 type,
+                                 locations,
+                                 &true_label);
+      break;
   }
 
-  IfCondition if_cond = instruction->GetCondition();
+  // Convert the branches into the result.
+  MipsLabel done;
 
-  switch (if_cond) {
-    case kCondEQ:
-    case kCondNE:
-      if (use_imm && IsUint<16>(rhs_imm)) {
-        __ Xori(dst, lhs, rhs_imm);
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Xor(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondEQ) {
-        __ Sltiu(dst, dst, 1);
-      } else {
-        __ Sltu(dst, ZERO, dst);
-      }
-      break;
+  // False case: result = 0.
+  __ LoadConst32(dst, 0);
+  __ B(&done);
 
-    case kCondLT:
-    case kCondGE:
-      if (use_imm && IsInt<16>(rhs_imm)) {
-        __ Slti(dst, lhs, rhs_imm);
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Slt(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondGE) {
-        // Simulate lhs >= rhs via !(lhs < rhs) since there's
-        // only the slt instruction but no sge.
-        __ Xori(dst, dst, 1);
-      }
-      break;
+  // True case: result = 1.
+  __ Bind(&true_label);
+  __ LoadConst32(dst, 1);
+  __ Bind(&done);
+}
 
-    case kCondLE:
-    case kCondGT:
-      if (use_imm && IsInt<16>(rhs_imm + 1)) {
-        // Simulate lhs <= rhs via lhs < rhs + 1.
-        __ Slti(dst, lhs, rhs_imm + 1);
-        if (if_cond == kCondGT) {
-          // Simulate lhs > rhs via !(lhs <= rhs) since there's
-          // only the slti instruction but no sgti.
-          __ Xori(dst, dst, 1);
-        }
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Slt(dst, rhs_reg, lhs);
-        if (if_cond == kCondLE) {
-          // Simulate lhs <= rhs via !(rhs < lhs) since there's
-          // only the slt instruction but no sle.
-          __ Xori(dst, dst, 1);
-        }
-      }
-      break;
+void InstructionCodeGeneratorMIPS::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt);
 
-    case kCondB:
-    case kCondAE:
-      // Use sltiu instruction if rhs_imm is in range [0, 32767] or in
-      // [max_unsigned - 32767 = 0xffff8000, max_unsigned = 0xffffffff].
-      if (use_imm &&
-          (IsUint<15>(rhs_imm) ||
-              IsUint<15>(rhs_imm - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(15))))) {
-        if (IsUint<15>(rhs_imm)) {
-          __ Sltiu(dst, lhs, rhs_imm);
-        } else {
-          // 16-bit value (in range [0x8000, 0xffff]) passed to sltiu is sign-extended
-          // and then used as unsigned integer (range [0xffff8000, 0xffffffff]).
-          __ Sltiu(dst, lhs, rhs_imm - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(16)));
-        }
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Sltu(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondAE) {
-        // Simulate lhs >= rhs via !(lhs < rhs) since there's
-        // only the sltu instruction but no sgeu.
-        __ Xori(dst, dst, 1);
-      }
-      break;
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
 
-    case kCondBE:
-    case kCondA:
-      // Use sltiu instruction if rhs_imm is in range [0, 32766] or in
-      // [max_unsigned - 32767 - 1 = 0xffff7fff, max_unsigned - 1 = 0xfffffffe].
-      // lhs <= rhs is simulated via lhs < rhs + 1.
-      if (use_imm && (rhs_imm != -1) &&
-          (IsUint<15>(rhs_imm + 1) ||
-              IsUint<15>(rhs_imm + 1 - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(15))))) {
-        if (IsUint<15>(rhs_imm + 1)) {
-          // Simulate lhs <= rhs via lhs < rhs + 1.
-          __ Sltiu(dst, lhs, rhs_imm + 1);
-        } else {
-          // 16-bit value (in range [0x8000, 0xffff]) passed to sltiu is sign-extended
-          // and then used as unsigned integer (range [0xffff8000, 0xffffffff] where rhs_imm
-          // is in range [0xffff7fff, 0xfffffffe] since lhs <= rhs is simulated via lhs < rhs + 1).
-          __ Sltiu(dst, lhs, rhs_imm + 1 - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(16)));
-        }
-        if (if_cond == kCondA) {
-          // Simulate lhs > rhs via !(lhs <= rhs) since there's
-          // only the sltiu instruction but no sgtiu.
-          __ Xori(dst, dst, 1);
-        }
+  Register out = locations->Out().AsRegister<Register>();
+  Register dividend = locations->InAt(0).AsRegister<Register>();
+  int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+  DCHECK(imm == 1 || imm == -1);
+
+  if (instruction->IsRem()) {
+    __ Move(out, ZERO);
+  } else {
+    if (imm == -1) {
+      __ Subu(out, ZERO, dividend);
+    } else if (out != dividend) {
+      __ Move(out, dividend);
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register dividend = locations->InAt(0).AsRegister<Register>();
+  int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+  uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
+  int ctz_imm = CTZ(abs_imm);
+
+  if (instruction->IsDiv()) {
+    if (ctz_imm == 1) {
+      // Fast path for division by +/-2, which is very common.
+      __ Srl(TMP, dividend, 31);
+    } else {
+      __ Sra(TMP, dividend, 31);
+      __ Srl(TMP, TMP, 32 - ctz_imm);
+    }
+    __ Addu(out, dividend, TMP);
+    __ Sra(out, out, ctz_imm);
+    if (imm < 0) {
+      __ Subu(out, ZERO, out);
+    }
+  } else {
+    if (ctz_imm == 1) {
+      // Fast path for modulo +/-2, which is very common.
+      __ Sra(TMP, dividend, 31);
+      __ Subu(out, dividend, TMP);
+      __ Andi(out, out, 1);
+      __ Addu(out, out, TMP);
+    } else {
+      __ Sra(TMP, dividend, 31);
+      __ Srl(TMP, TMP, 32 - ctz_imm);
+      __ Addu(out, dividend, TMP);
+      if (IsUint<16>(abs_imm - 1)) {
+        __ Andi(out, out, abs_imm - 1);
       } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Sltu(dst, rhs_reg, lhs);
-        if (if_cond == kCondBE) {
-          // Simulate lhs <= rhs via !(rhs < lhs) since there's
-          // only the sltu instruction but no sleu.
-          __ Xori(dst, dst, 1);
-        }
+        __ Sll(out, out, 32 - ctz_imm);
+        __ Srl(out, out, 32 - ctz_imm);
       }
-      break;
+      __ Subu(out, out, TMP);
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register dividend = locations->InAt(0).AsRegister<Register>();
+  int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+
+  int64_t magic;
+  int shift;
+  CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+
+  __ LoadConst32(TMP, magic);
+  if (isR6) {
+    __ MuhR6(TMP, dividend, TMP);
+  } else {
+    __ MultR2(dividend, TMP);
+    __ Mfhi(TMP);
+  }
+  if (imm > 0 && magic < 0) {
+    __ Addu(TMP, TMP, dividend);
+  } else if (imm < 0 && magic > 0) {
+    __ Subu(TMP, TMP, dividend);
+  }
+
+  if (shift != 0) {
+    __ Sra(TMP, TMP, shift);
+  }
+
+  if (instruction->IsDiv()) {
+    __ Sra(out, TMP, 31);
+    __ Subu(out, TMP, out);
+  } else {
+    __ Sra(AT, TMP, 31);
+    __ Subu(AT, TMP, AT);
+    __ LoadConst32(TMP, imm);
+    if (isR6) {
+      __ MulR6(TMP, AT, TMP);
+    } else {
+      __ MulR2(TMP, AT, TMP);
+    }
+    __ Subu(out, dividend, TMP);
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateDivRemIntegral(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Register out = locations->Out().AsRegister<Register>();
+  Location second = locations->InAt(1);
+
+  if (second.IsConstant()) {
+    int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+    if (imm == 0) {
+      // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+    } else if (imm == 1 || imm == -1) {
+      DivRemOneOrMinusOne(instruction);
+    } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+      DivRemByPowerOfTwo(instruction);
+    } else {
+      DCHECK(imm <= -2 || imm >= 2);
+      GenerateDivRemWithAnyConstant(instruction);
+    }
+  } else {
+    Register dividend = locations->InAt(0).AsRegister<Register>();
+    Register divisor = second.AsRegister<Register>();
+    bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+    if (instruction->IsDiv()) {
+      if (isR6) {
+        __ DivR6(out, dividend, divisor);
+      } else {
+        __ DivR2(out, dividend, divisor);
+      }
+    } else {
+      if (isR6) {
+        __ ModR6(out, dividend, divisor);
+      } else {
+        __ ModR2(out, dividend, divisor);
+      }
+    }
   }
 }
 
 void LocationsBuilderMIPS::VisitDiv(HDiv* div) {
   Primitive::Type type = div->GetResultType();
   LocationSummary::CallKind call_kind = (type == Primitive::kPrimLong)
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
@@ -2230,7 +2599,7 @@
   switch (type) {
     case Primitive::kPrimInt:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -2259,20 +2628,11 @@
 void InstructionCodeGeneratorMIPS::VisitDiv(HDiv* instruction) {
   Primitive::Type type = instruction->GetType();
   LocationSummary* locations = instruction->GetLocations();
-  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   switch (type) {
-    case Primitive::kPrimInt: {
-      Register dst = locations->Out().AsRegister<Register>();
-      Register lhs = locations->InAt(0).AsRegister<Register>();
-      Register rhs = locations->InAt(1).AsRegister<Register>();
-      if (isR6) {
-        __ DivR6(dst, lhs, rhs);
-      } else {
-        __ DivR2(dst, lhs, rhs);
-      }
+    case Primitive::kPrimInt:
+      GenerateDivRemIntegral(instruction);
       break;
-    }
     case Primitive::kPrimLong: {
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv),
                               instruction,
@@ -2317,6 +2677,7 @@
   Primitive::Type type = instruction->GetType();
 
   switch (type) {
+    case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
@@ -2419,158 +2780,773 @@
   }
 }
 
-void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instruction,
-                                                         MipsLabel* true_target,
-                                                         MipsLabel* false_target,
-                                                         MipsLabel* always_true_target) {
-  HInstruction* cond = instruction->InputAt(0);
-  HCondition* condition = cond->AsCondition();
-
-  if (cond->IsIntConstant()) {
-    int32_t cond_value = cond->AsIntConstant()->GetValue();
-    if (cond_value == 1) {
-      if (always_true_target != nullptr) {
-        __ B(always_true_target);
-      }
-      return;
-    } else {
-      DCHECK_EQ(cond_value, 0);
-    }
-  } else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
-    // The condition instruction has been materialized, compare the output to 0.
-    Location cond_val = instruction->GetLocations()->InAt(0);
-    DCHECK(cond_val.IsRegister());
-    __ Bnez(cond_val.AsRegister<Register>(), true_target);
+void InstructionCodeGeneratorMIPS::GenerateIntCompare(IfCondition cond,
+                                                      LocationSummary* locations) {
+  Register dst = locations->Out().AsRegister<Register>();
+  Register lhs = locations->InAt(0).AsRegister<Register>();
+  Location rhs_location = locations->InAt(1);
+  Register rhs_reg = ZERO;
+  int64_t rhs_imm = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
   } else {
-    // The condition instruction has not been materialized, use its inputs as
-    // the comparison and its condition as the branch condition.
-    Register lhs = condition->GetLocations()->InAt(0).AsRegister<Register>();
-    Location rhs_location = condition->GetLocations()->InAt(1);
-    Register rhs_reg = ZERO;
-    int32_t rhs_imm = 0;
-    bool use_imm = rhs_location.IsConstant();
-    if (use_imm) {
-      rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
-    } else {
-      rhs_reg = rhs_location.AsRegister<Register>();
-    }
+    rhs_reg = rhs_location.AsRegister<Register>();
+  }
 
-    IfCondition if_cond = condition->GetCondition();
-    if (use_imm && rhs_imm == 0) {
-      switch (if_cond) {
+  switch (cond) {
+    case kCondEQ:
+    case kCondNE:
+      if (use_imm && IsUint<16>(rhs_imm)) {
+        __ Xori(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Xor(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondEQ) {
+        __ Sltiu(dst, dst, 1);
+      } else {
+        __ Sltu(dst, ZERO, dst);
+      }
+      break;
+
+    case kCondLT:
+    case kCondGE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        __ Slti(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondGE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the slt instruction but no sge.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondLE:
+    case kCondGT:
+      if (use_imm && IsInt<16>(rhs_imm + 1)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        __ Slti(dst, lhs, rhs_imm + 1);
+        if (cond == kCondGT) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the slti instruction but no sgti.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, rhs_reg, lhs);
+        if (cond == kCondLE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the slt instruction but no sle.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
+
+    case kCondB:
+    case kCondAE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        // Sltiu sign-extends its 16-bit immediate operand before
+        // the comparison and thus lets us compare directly with
+        // unsigned values in the ranges [0, 0x7fff] and
+        // [0xffff8000, 0xffffffff].
+        __ Sltiu(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondAE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the sltu instruction but no sgeu.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondBE:
+    case kCondA:
+      if (use_imm && (rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        // Note that this only works if rhs + 1 does not overflow
+        // to 0, hence the check above.
+        // Sltiu sign-extends its 16-bit immediate operand before
+        // the comparison and thus lets us compare directly with
+        // unsigned values in the ranges [0, 0x7fff] and
+        // [0xffff8000, 0xffffffff].
+        __ Sltiu(dst, lhs, rhs_imm + 1);
+        if (cond == kCondA) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the sltiu instruction but no sgtiu.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, rhs_reg, lhs);
+        if (cond == kCondBE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the sltu instruction but no sleu.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateIntCompareAndBranch(IfCondition cond,
+                                                               LocationSummary* locations,
+                                                               MipsLabel* label) {
+  Register lhs = locations->InAt(0).AsRegister<Register>();
+  Location rhs_location = locations->InAt(1);
+  Register rhs_reg = ZERO;
+  int32_t rhs_imm = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+  } else {
+    rhs_reg = rhs_location.AsRegister<Register>();
+  }
+
+  if (use_imm && rhs_imm == 0) {
+    switch (cond) {
+      case kCondEQ:
+      case kCondBE:  // <= 0 if zero
+        __ Beqz(lhs, label);
+        break;
+      case kCondNE:
+      case kCondA:  // > 0 if non-zero
+        __ Bnez(lhs, label);
+        break;
+      case kCondLT:
+        __ Bltz(lhs, label);
+        break;
+      case kCondGE:
+        __ Bgez(lhs, label);
+        break;
+      case kCondLE:
+        __ Blez(lhs, label);
+        break;
+      case kCondGT:
+        __ Bgtz(lhs, label);
+        break;
+      case kCondB:  // always false
+        break;
+      case kCondAE:  // always true
+        __ B(label);
+        break;
+    }
+  } else {
+    if (use_imm) {
+      // TODO: more efficient comparison with 16-bit constants without loading them into TMP.
+      rhs_reg = TMP;
+      __ LoadConst32(rhs_reg, rhs_imm);
+    }
+    switch (cond) {
+      case kCondEQ:
+        __ Beq(lhs, rhs_reg, label);
+        break;
+      case kCondNE:
+        __ Bne(lhs, rhs_reg, label);
+        break;
+      case kCondLT:
+        __ Blt(lhs, rhs_reg, label);
+        break;
+      case kCondGE:
+        __ Bge(lhs, rhs_reg, label);
+        break;
+      case kCondLE:
+        __ Bge(rhs_reg, lhs, label);
+        break;
+      case kCondGT:
+        __ Blt(rhs_reg, lhs, label);
+        break;
+      case kCondB:
+        __ Bltu(lhs, rhs_reg, label);
+        break;
+      case kCondAE:
+        __ Bgeu(lhs, rhs_reg, label);
+        break;
+      case kCondBE:
+        __ Bgeu(rhs_reg, lhs, label);
+        break;
+      case kCondA:
+        __ Bltu(rhs_reg, lhs, label);
+        break;
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateLongCompareAndBranch(IfCondition cond,
+                                                                LocationSummary* locations,
+                                                                MipsLabel* label) {
+  Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>();
+  Location rhs_location = locations->InAt(1);
+  Register rhs_high = ZERO;
+  Register rhs_low = ZERO;
+  int64_t imm = 0;
+  uint32_t imm_high = 0;
+  uint32_t imm_low = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    imm = rhs_location.GetConstant()->AsLongConstant()->GetValue();
+    imm_high = High32Bits(imm);
+    imm_low = Low32Bits(imm);
+  } else {
+    rhs_high = rhs_location.AsRegisterPairHigh<Register>();
+    rhs_low = rhs_location.AsRegisterPairLow<Register>();
+  }
+
+  if (use_imm && imm == 0) {
+    switch (cond) {
+      case kCondEQ:
+      case kCondBE:  // <= 0 if zero
+        __ Or(TMP, lhs_high, lhs_low);
+        __ Beqz(TMP, label);
+        break;
+      case kCondNE:
+      case kCondA:  // > 0 if non-zero
+        __ Or(TMP, lhs_high, lhs_low);
+        __ Bnez(TMP, label);
+        break;
+      case kCondLT:
+        __ Bltz(lhs_high, label);
+        break;
+      case kCondGE:
+        __ Bgez(lhs_high, label);
+        break;
+      case kCondLE:
+        __ Or(TMP, lhs_high, lhs_low);
+        __ Sra(AT, lhs_high, 31);
+        __ Bgeu(AT, TMP, label);
+        break;
+      case kCondGT:
+        __ Or(TMP, lhs_high, lhs_low);
+        __ Sra(AT, lhs_high, 31);
+        __ Bltu(AT, TMP, label);
+        break;
+      case kCondB:  // always false
+        break;
+      case kCondAE:  // always true
+        __ B(label);
+        break;
+    }
+  } else if (use_imm) {
+    // TODO: more efficient comparison with constants without loading them into TMP/AT.
+    switch (cond) {
+      case kCondEQ:
+        __ LoadConst32(TMP, imm_high);
+        __ Xor(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Xor(AT, AT, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondNE:
+        __ LoadConst32(TMP, imm_high);
+        __ Xor(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Xor(AT, AT, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Bnez(TMP, label);
+        break;
+      case kCondLT:
+        __ LoadConst32(TMP, imm_high);
+        __ Blt(lhs_high, TMP, label);
+        __ Slt(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, lhs_low, AT);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondGE:
+        __ LoadConst32(TMP, imm_high);
+        __ Blt(TMP, lhs_high, label);
+        __ Slt(TMP, lhs_high, TMP);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, lhs_low, AT);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondLE:
+        __ LoadConst32(TMP, imm_high);
+        __ Blt(lhs_high, TMP, label);
+        __ Slt(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, AT, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondGT:
+        __ LoadConst32(TMP, imm_high);
+        __ Blt(TMP, lhs_high, label);
+        __ Slt(TMP, lhs_high, TMP);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, AT, lhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondB:
+        __ LoadConst32(TMP, imm_high);
+        __ Bltu(lhs_high, TMP, label);
+        __ Sltu(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, lhs_low, AT);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondAE:
+        __ LoadConst32(TMP, imm_high);
+        __ Bltu(TMP, lhs_high, label);
+        __ Sltu(TMP, lhs_high, TMP);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, lhs_low, AT);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondBE:
+        __ LoadConst32(TMP, imm_high);
+        __ Bltu(lhs_high, TMP, label);
+        __ Sltu(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, AT, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondA:
+        __ LoadConst32(TMP, imm_high);
+        __ Bltu(TMP, lhs_high, label);
+        __ Sltu(TMP, lhs_high, TMP);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, AT, lhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+    }
+  } else {
+    switch (cond) {
+      case kCondEQ:
+        __ Xor(TMP, lhs_high, rhs_high);
+        __ Xor(AT, lhs_low, rhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondNE:
+        __ Xor(TMP, lhs_high, rhs_high);
+        __ Xor(AT, lhs_low, rhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Bnez(TMP, label);
+        break;
+      case kCondLT:
+        __ Blt(lhs_high, rhs_high, label);
+        __ Slt(TMP, rhs_high, lhs_high);
+        __ Sltu(AT, lhs_low, rhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondGE:
+        __ Blt(rhs_high, lhs_high, label);
+        __ Slt(TMP, lhs_high, rhs_high);
+        __ Sltu(AT, lhs_low, rhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondLE:
+        __ Blt(lhs_high, rhs_high, label);
+        __ Slt(TMP, rhs_high, lhs_high);
+        __ Sltu(AT, rhs_low, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondGT:
+        __ Blt(rhs_high, lhs_high, label);
+        __ Slt(TMP, lhs_high, rhs_high);
+        __ Sltu(AT, rhs_low, lhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondB:
+        __ Bltu(lhs_high, rhs_high, label);
+        __ Sltu(TMP, rhs_high, lhs_high);
+        __ Sltu(AT, lhs_low, rhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondAE:
+        __ Bltu(rhs_high, lhs_high, label);
+        __ Sltu(TMP, lhs_high, rhs_high);
+        __ Sltu(AT, lhs_low, rhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondBE:
+        __ Bltu(lhs_high, rhs_high, label);
+        __ Sltu(TMP, rhs_high, lhs_high);
+        __ Sltu(AT, rhs_low, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondA:
+        __ Bltu(rhs_high, lhs_high, label);
+        __ Sltu(TMP, lhs_high, rhs_high);
+        __ Sltu(AT, rhs_low, lhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond,
+                                                              bool gt_bias,
+                                                              Primitive::Type type,
+                                                              LocationSummary* locations,
+                                                              MipsLabel* label) {
+  FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  if (type == Primitive::kPrimFloat) {
+    if (isR6) {
+      switch (cond) {
         case kCondEQ:
-          __ Beqz(lhs, true_target);
+          __ CmpEqS(FTMP, lhs, rhs);
+          __ Bc1nez(FTMP, label);
           break;
         case kCondNE:
-          __ Bnez(lhs, true_target);
+          __ CmpEqS(FTMP, lhs, rhs);
+          __ Bc1eqz(FTMP, label);
           break;
         case kCondLT:
-          __ Bltz(lhs, true_target);
-          break;
-        case kCondGE:
-          __ Bgez(lhs, true_target);
+          if (gt_bias) {
+            __ CmpLtS(FTMP, lhs, rhs);
+          } else {
+            __ CmpUltS(FTMP, lhs, rhs);
+          }
+          __ Bc1nez(FTMP, label);
           break;
         case kCondLE:
-          __ Blez(lhs, true_target);
+          if (gt_bias) {
+            __ CmpLeS(FTMP, lhs, rhs);
+          } else {
+            __ CmpUleS(FTMP, lhs, rhs);
+          }
+          __ Bc1nez(FTMP, label);
           break;
         case kCondGT:
-          __ Bgtz(lhs, true_target);
+          if (gt_bias) {
+            __ CmpUltS(FTMP, rhs, lhs);
+          } else {
+            __ CmpLtS(FTMP, rhs, lhs);
+          }
+          __ Bc1nez(FTMP, label);
           break;
-        case kCondB:
-          break;  // always false
-        case kCondBE:
-          __ Beqz(lhs, true_target);  // <= 0 if zero
+        case kCondGE:
+          if (gt_bias) {
+            __ CmpUleS(FTMP, rhs, lhs);
+          } else {
+            __ CmpLeS(FTMP, rhs, lhs);
+          }
+          __ Bc1nez(FTMP, label);
           break;
-        case kCondA:
-          __ Bnez(lhs, true_target);  // > 0 if non-zero
-          break;
-        case kCondAE:
-          __ B(true_target);  // always true
-          break;
+        default:
+          LOG(FATAL) << "Unexpected non-floating-point condition";
       }
     } else {
-      if (use_imm) {
-        // TODO: more efficient comparison with 16-bit constants without loading them into TMP.
-        rhs_reg = TMP;
-        __ LoadConst32(rhs_reg, rhs_imm);
-      }
-      switch (if_cond) {
+      switch (cond) {
         case kCondEQ:
-          __ Beq(lhs, rhs_reg, true_target);
+          __ CeqS(0, lhs, rhs);
+          __ Bc1t(0, label);
           break;
         case kCondNE:
-          __ Bne(lhs, rhs_reg, true_target);
+          __ CeqS(0, lhs, rhs);
+          __ Bc1f(0, label);
           break;
         case kCondLT:
-          __ Blt(lhs, rhs_reg, true_target);
-          break;
-        case kCondGE:
-          __ Bge(lhs, rhs_reg, true_target);
+          if (gt_bias) {
+            __ ColtS(0, lhs, rhs);
+          } else {
+            __ CultS(0, lhs, rhs);
+          }
+          __ Bc1t(0, label);
           break;
         case kCondLE:
-          __ Bge(rhs_reg, lhs, true_target);
+          if (gt_bias) {
+            __ ColeS(0, lhs, rhs);
+          } else {
+            __ CuleS(0, lhs, rhs);
+          }
+          __ Bc1t(0, label);
           break;
         case kCondGT:
-          __ Blt(rhs_reg, lhs, true_target);
+          if (gt_bias) {
+            __ CultS(0, rhs, lhs);
+          } else {
+            __ ColtS(0, rhs, lhs);
+          }
+          __ Bc1t(0, label);
           break;
-        case kCondB:
-          __ Bltu(lhs, rhs_reg, true_target);
+        case kCondGE:
+          if (gt_bias) {
+            __ CuleS(0, rhs, lhs);
+          } else {
+            __ ColeS(0, rhs, lhs);
+          }
+          __ Bc1t(0, label);
           break;
-        case kCondAE:
-          __ Bgeu(lhs, rhs_reg, true_target);
+        default:
+          LOG(FATAL) << "Unexpected non-floating-point condition";
+      }
+    }
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimDouble);
+    if (isR6) {
+      switch (cond) {
+        case kCondEQ:
+          __ CmpEqD(FTMP, lhs, rhs);
+          __ Bc1nez(FTMP, label);
           break;
-        case kCondBE:
-          __ Bgeu(rhs_reg, lhs, true_target);
+        case kCondNE:
+          __ CmpEqD(FTMP, lhs, rhs);
+          __ Bc1eqz(FTMP, label);
           break;
-        case kCondA:
-          __ Bltu(rhs_reg, lhs, true_target);
+        case kCondLT:
+          if (gt_bias) {
+            __ CmpLtD(FTMP, lhs, rhs);
+          } else {
+            __ CmpUltD(FTMP, lhs, rhs);
+          }
+          __ Bc1nez(FTMP, label);
           break;
+        case kCondLE:
+          if (gt_bias) {
+            __ CmpLeD(FTMP, lhs, rhs);
+          } else {
+            __ CmpUleD(FTMP, lhs, rhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondGT:
+          if (gt_bias) {
+            __ CmpUltD(FTMP, rhs, lhs);
+          } else {
+            __ CmpLtD(FTMP, rhs, lhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondGE:
+          if (gt_bias) {
+            __ CmpUleD(FTMP, rhs, lhs);
+          } else {
+            __ CmpLeD(FTMP, rhs, lhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        default:
+          LOG(FATAL) << "Unexpected non-floating-point condition";
+      }
+    } else {
+      switch (cond) {
+        case kCondEQ:
+          __ CeqD(0, lhs, rhs);
+          __ Bc1t(0, label);
+          break;
+        case kCondNE:
+          __ CeqD(0, lhs, rhs);
+          __ Bc1f(0, label);
+          break;
+        case kCondLT:
+          if (gt_bias) {
+            __ ColtD(0, lhs, rhs);
+          } else {
+            __ CultD(0, lhs, rhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        case kCondLE:
+          if (gt_bias) {
+            __ ColeD(0, lhs, rhs);
+          } else {
+            __ CuleD(0, lhs, rhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        case kCondGT:
+          if (gt_bias) {
+            __ CultD(0, rhs, lhs);
+          } else {
+            __ ColtD(0, rhs, lhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        case kCondGE:
+          if (gt_bias) {
+            __ CuleD(0, rhs, lhs);
+          } else {
+            __ ColeD(0, rhs, lhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        default:
+          LOG(FATAL) << "Unexpected non-floating-point condition";
       }
     }
   }
-  if (false_target != nullptr) {
+}
+
+void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instruction,
+                                                         size_t condition_input_index,
+                                                         MipsLabel* true_target,
+                                                         MipsLabel* false_target) {
+  HInstruction* cond = instruction->InputAt(condition_input_index);
+
+  if (true_target == nullptr && false_target == nullptr) {
+    // Nothing to do. The code always falls through.
+    return;
+  } else if (cond->IsIntConstant()) {
+    // Constant condition, statically compared against "true" (integer value 1).
+    if (cond->AsIntConstant()->IsTrue()) {
+      if (true_target != nullptr) {
+        __ B(true_target);
+      }
+    } else {
+      DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
+      if (false_target != nullptr) {
+        __ B(false_target);
+      }
+    }
+    return;
+  }
+
+  // The following code generates these patterns:
+  //  (1) true_target == nullptr && false_target != nullptr
+  //        - opposite condition true => branch to false_target
+  //  (2) true_target != nullptr && false_target == nullptr
+  //        - condition true => branch to true_target
+  //  (3) true_target != nullptr && false_target != nullptr
+  //        - condition true => branch to true_target
+  //        - branch to false_target
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
+    // The condition instruction has been materialized, compare the output to 0.
+    Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
+    DCHECK(cond_val.IsRegister());
+    if (true_target == nullptr) {
+      __ Beqz(cond_val.AsRegister<Register>(), false_target);
+    } else {
+      __ Bnez(cond_val.AsRegister<Register>(), true_target);
+    }
+  } else {
+    // The condition instruction has not been materialized, use its inputs as
+    // the comparison and its condition as the branch condition.
+    HCondition* condition = cond->AsCondition();
+    Primitive::Type type = condition->InputAt(0)->GetType();
+    LocationSummary* locations = cond->GetLocations();
+    IfCondition if_cond = condition->GetCondition();
+    MipsLabel* branch_target = true_target;
+
+    if (true_target == nullptr) {
+      if_cond = condition->GetOppositeCondition();
+      branch_target = false_target;
+    }
+
+    switch (type) {
+      default:
+        GenerateIntCompareAndBranch(if_cond, locations, branch_target);
+        break;
+      case Primitive::kPrimLong:
+        GenerateLongCompareAndBranch(if_cond, locations, branch_target);
+        break;
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimDouble:
+        GenerateFpCompareAndBranch(if_cond, condition->IsGtBias(), type, locations, branch_target);
+        break;
+    }
+  }
+
+  // If neither branch falls through (case 3), the conditional branch to `true_target`
+  // was already emitted (case 2) and we need to emit a jump to `false_target`.
+  if (true_target != nullptr && false_target != nullptr) {
     __ B(false_target);
   }
 }
 
 void LocationsBuilderMIPS::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  HInstruction* cond = if_instr->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) {
-  MipsLabel* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
-  MipsLabel* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  MipsLabel* always_true_target = true_target;
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfTrueSuccessor())) {
-    always_true_target = nullptr;
-  }
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfFalseSuccessor())) {
-    false_target = nullptr;
-  }
-  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+  HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+  HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+  MipsLabel* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+      nullptr : codegen_->GetLabelOf(true_successor);
+  MipsLabel* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+      nullptr : codegen_->GetLabelOf(false_successor);
+  GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
 
 void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  HInstruction* cond = deoptimize->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathMIPS(deoptimize);
-  codegen_->AddSlowPath(slow_path);
-  MipsLabel* slow_path_entry = slow_path->GetEntryLabel();
-  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+  SlowPathCodeMIPS* slow_path =
+      deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS>(deoptimize);
+  GenerateTestAndBranch(deoptimize,
+                        /* condition_input_index */ 0,
+                        slow_path->GetEntryLabel(),
+                        /* false_target */ nullptr);
+}
+
+void LocationsBuilderMIPS::VisitSelect(HSelect* select) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void InstructionCodeGeneratorMIPS::VisitSelect(HSelect* select) {
+  LocationSummary* locations = select->GetLocations();
+  MipsLabel false_target;
+  GenerateTestAndBranch(select,
+                        /* condition_input_index */ 2,
+                        /* true_target */ nullptr,
+                        &false_target);
+  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+  __ Bind(&false_target);
+}
+
+void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorMIPS::VisitNativeDebugInfo(HNativeDebugInfo*) {
+  // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+}
+
+void CodeGeneratorMIPS::GenerateNop() {
+  __ Nop();
 }
 
 void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
@@ -2578,7 +3554,7 @@
   bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
   bool generate_volatile = field_info.IsVolatile() && is_wide;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, generate_volatile ? LocationSummary::kCall : LocationSummary::kNoCall);
+      instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
 
   locations->SetInAt(0, Location::RequiresRegister());
   if (generate_volatile) {
@@ -2588,7 +3564,8 @@
     if (field_type == Primitive::kPrimLong) {
       locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimLong));
     } else {
-      locations->SetOut(Location::RequiresFpuRegister());
+      // Use Location::Any() to prevent situations when running out of available fp registers.
+      locations->SetOut(Location::Any());
       // Need some temp core regs since FP results are returned in core registers
       Location reg = calling_convention.GetReturnLocation(Primitive::kPrimLong);
       locations->AddTemp(Location::RegisterLocation(reg.AsRegisterPairLow<Register>()));
@@ -2611,6 +3588,7 @@
   Register obj = locations->InAt(0).AsRegister<Register>();
   LoadOperandType load_type = kLoadUnsignedByte;
   bool is_volatile = field_info.IsVolatile();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -2641,8 +3619,7 @@
 
   if (is_volatile && load_type == kLoadDoubleword) {
     InvokeRuntimeCallingConvention calling_convention;
-    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(),
-               obj, field_info.GetFieldOffset().Uint32Value());
+    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset);
     // Do implicit Null check
     __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0);
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -2653,11 +3630,23 @@
                             IsDirectEntrypoint(kQuickA64Load));
     CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>();
     if (type == Primitive::kPrimDouble) {
-      // Need to move to FP regs since FP results are returned in core registers.
-      __ Mtc1(locations->GetTemp(1).AsRegister<Register>(),
-              locations->Out().AsFpuRegister<FRegister>());
-      __ Mthc1(locations->GetTemp(2).AsRegister<Register>(),
-               locations->Out().AsFpuRegister<FRegister>());
+      // FP results are returned in core registers. Need to move them.
+      Location out = locations->Out();
+      if (out.IsFpuRegister()) {
+        __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), out.AsFpuRegister<FRegister>());
+        __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+                         out.AsFpuRegister<FRegister>());
+      } else {
+        DCHECK(out.IsDoubleStackSlot());
+        __ StoreToOffset(kStoreWord,
+                         locations->GetTemp(1).AsRegister<Register>(),
+                         SP,
+                         out.GetStackIndex());
+        __ StoreToOffset(kStoreWord,
+                         locations->GetTemp(2).AsRegister<Register>(),
+                         SP,
+                         out.GetStackIndex() + 4);
+      }
     }
   } else {
     if (!Primitive::IsFloatingPointType(type)) {
@@ -2665,21 +3654,34 @@
       if (type == Primitive::kPrimLong) {
         DCHECK(locations->Out().IsRegisterPair());
         dst = locations->Out().AsRegisterPairLow<Register>();
+        Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+        if (obj == dst) {
+          __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ LoadFromOffset(kLoadWord, dst, obj, offset);
+        } else {
+          __ LoadFromOffset(kLoadWord, dst, obj, offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize);
+        }
       } else {
         DCHECK(locations->Out().IsRegister());
         dst = locations->Out().AsRegister<Register>();
+        __ LoadFromOffset(load_type, dst, obj, offset);
       }
-      __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
     } else {
       DCHECK(locations->Out().IsFpuRegister());
       FRegister dst = locations->Out().AsFpuRegister<FRegister>();
       if (type == Primitive::kPrimFloat) {
-        __ LoadSFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value());
+        __ LoadSFromOffset(dst, obj, offset);
       } else {
-        __ LoadDFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value());
+        __ LoadDFromOffset(dst, obj, offset);
       }
     }
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
+    // Longs are handled earlier.
+    if (type != Primitive::kPrimLong) {
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
+    }
   }
 
   if (is_volatile) {
@@ -2692,7 +3694,7 @@
   bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
   bool generate_volatile = field_info.IsVolatile() && is_wide;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, generate_volatile ? LocationSummary::kCall : LocationSummary::kNoCall);
+      instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
 
   locations->SetInAt(0, Location::RequiresRegister());
   if (generate_volatile) {
@@ -2703,7 +3705,8 @@
       locations->SetInAt(1, Location::RegisterPairLocation(
           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
     } else {
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      // Use Location::Any() to prevent situations when running out of available fp registers.
+      locations->SetInAt(1, Location::Any());
       // Pass FP parameters in core registers.
       locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
       locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
@@ -2725,6 +3728,7 @@
   Register obj = locations->InAt(0).AsRegister<Register>();
   StoreOperandType store_type = kStoreByte;
   bool is_volatile = field_info.IsVolatile();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -2755,17 +3759,34 @@
 
   if (is_volatile && store_type == kStoreDoubleword) {
     InvokeRuntimeCallingConvention calling_convention;
-    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(),
-               obj, field_info.GetFieldOffset().Uint32Value());
+    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset);
     // Do implicit Null check.
     __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0);
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
     if (type == Primitive::kPrimDouble) {
       // Pass FP parameters in core registers.
-      __ Mfc1(locations->GetTemp(1).AsRegister<Register>(),
-              locations->InAt(1).AsFpuRegister<FRegister>());
-      __ Mfhc1(locations->GetTemp(2).AsRegister<Register>(),
-               locations->InAt(1).AsFpuRegister<FRegister>());
+      Location in = locations->InAt(1);
+      if (in.IsFpuRegister()) {
+        __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), in.AsFpuRegister<FRegister>());
+        __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+                           in.AsFpuRegister<FRegister>());
+      } else if (in.IsDoubleStackSlot()) {
+        __ LoadFromOffset(kLoadWord,
+                          locations->GetTemp(1).AsRegister<Register>(),
+                          SP,
+                          in.GetStackIndex());
+        __ LoadFromOffset(kLoadWord,
+                          locations->GetTemp(2).AsRegister<Register>(),
+                          SP,
+                          in.GetStackIndex() + 4);
+      } else {
+        DCHECK(in.IsConstant());
+        DCHECK(in.GetConstant()->IsDoubleConstant());
+        int64_t value = bit_cast<int64_t, double>(in.GetConstant()->AsDoubleConstant()->GetValue());
+        __ LoadConst64(locations->GetTemp(2).AsRegister<Register>(),
+                       locations->GetTemp(1).AsRegister<Register>(),
+                       value);
+      }
     }
     codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store),
                             instruction,
@@ -2779,21 +3800,28 @@
       if (type == Primitive::kPrimLong) {
         DCHECK(locations->InAt(1).IsRegisterPair());
         src = locations->InAt(1).AsRegisterPairLow<Register>();
+        Register src_high = locations->InAt(1).AsRegisterPairHigh<Register>();
+        __ StoreToOffset(kStoreWord, src, obj, offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ StoreToOffset(kStoreWord, src_high, obj, offset + kMipsWordSize);
       } else {
         DCHECK(locations->InAt(1).IsRegister());
         src = locations->InAt(1).AsRegister<Register>();
+        __ StoreToOffset(store_type, src, obj, offset);
       }
-      __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
     } else {
       DCHECK(locations->InAt(1).IsFpuRegister());
       FRegister src = locations->InAt(1).AsFpuRegister<FRegister>();
       if (type == Primitive::kPrimFloat) {
-        __ StoreSToOffset(src, obj, field_info.GetFieldOffset().Uint32Value());
+        __ StoreSToOffset(src, obj, offset);
       } else {
-        __ StoreDToOffset(src, obj, field_info.GetFieldOffset().Uint32Value());
+        __ StoreDToOffset(src, obj, offset);
       }
     }
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
+    // Longs are handled earlier.
+    if (type != Primitive::kPrimLong) {
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
+    }
   }
 
   // TODO: memory barriers?
@@ -2824,6 +3852,23 @@
   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetDexPc());
 }
 
+void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(
+    HInstruction* instruction ATTRIBUTE_UNUSED,
+    Location root,
+    Register obj,
+    uint32_t offset) {
+  Register root_reg = root.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    UNIMPLEMENTED(FATAL) << "for read barrier";
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
+  }
+}
+
 void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind =
       instruction->IsExactCheck() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath;
@@ -2900,11 +3945,9 @@
 void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kMipsPointerSize).Uint32Value();
   Location receiver = invoke->GetLocations()->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize);
 
   // Set the hidden argument.
   __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(),
@@ -2918,6 +3961,10 @@
     __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ LoadFromOffset(kLoadWord, temp, temp,
+      mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex(), kMipsPointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -2930,32 +3977,236 @@
 }
 
 void LocationsBuilderMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  // TODO: intrinsic function.
+  IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 }
 
 void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
-  // TODO: intrinsic function.
+  HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind();
+  HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+
+  // kDirectAddressWithFixup and kCallDirectWithFixup need no extra input on R6 because
+  // R6 has PC-relative addressing.
+  bool has_extra_input = !isR6 &&
+      ((method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) ||
+       (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup));
+
+  if (invoke->HasPcRelativeDexCache()) {
+    // kDexCachePcRelative is mutually exclusive with
+    // kDirectAddressWithFixup/kCallDirectWithFixup.
+    CHECK(!has_extra_input);
+    has_extra_input = true;
+  }
+
+  IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    if (invoke->GetLocations()->CanCall() && has_extra_input) {
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+    }
+    return;
+  }
+
   HandleInvoke(invoke);
+
+  // Add the extra input register if either the dex cache array base register
+  // or the PC-relative base register for accessing literals is needed.
+  if (has_extra_input) {
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
+  }
 }
 
-static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen ATTRIBUTE_UNUSED) {
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen) {
   if (invoke->GetLocations()->Intrinsified()) {
-    // TODO: intrinsic function.
+    IntrinsicCodeGeneratorMIPS intrinsic(codegen);
+    intrinsic.Dispatch(invoke);
     return true;
   }
   return false;
 }
 
+HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
+    HLoadString::LoadKind desired_string_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    UNIMPLEMENTED(FATAL) << "for read barrier";
+  }
+  // We disable PC-relative load when there is an irreducible loop, as the optimization
+  // is incompatible with it.
+  bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
+  bool fallback_load = has_irreducible_loops;
+  switch (desired_string_load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageAddress:
+      break;
+    case HLoadString::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      fallback_load = false;
+      break;
+    case HLoadString::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
+      // with irreducible loops.
+      break;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      fallback_load = false;
+      break;
+  }
+  if (fallback_load) {
+    desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+  }
+  return desired_string_load_kind;
+}
+
+HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    UNIMPLEMENTED(FATAL) << "for read barrier";
+  }
+  // We disable pc-relative load when there is an irreducible loop, as the optimization
+  // is incompatible with it.
+  bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
+  bool fallback_load = has_irreducible_loops;
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      fallback_load = false;
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      fallback_load = false;
+      break;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
+      // with irreducible loops.
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      fallback_load = false;
+      break;
+  }
+  if (fallback_load) {
+    desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+  }
+  return desired_class_load_kind;
+}
+
+Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
+                                                                  Register temp) {
+  CHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+  Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+  if (!invoke->GetLocations()->Intrinsified()) {
+    return location.AsRegister<Register>();
+  }
+  // For intrinsics we allow any location, so it may be on the stack.
+  if (!location.IsRegister()) {
+    __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex());
+    return temp;
+  }
+  // For register locations, check if the register was saved. If so, get it from the stack.
+  // Note: There is a chance that the register was saved but not overwritten, so we could
+  // save one load. However, since this is just an intrinsic slow path we prefer this
+  // simple and more robust approach rather that trying to determine if that's the case.
+  SlowPathCode* slow_path = GetCurrentSlowPath();
+  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
+  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+    int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+    __ LoadFromOffset(kLoadWord, temp, SP, stack_offset);
+    return temp;
+  }
+  return location.AsRegister<Register>();
+}
+
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method ATTRIBUTE_UNUSED) {
+  HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
+  // We disable PC-relative load when there is an irreducible loop, as the optimization
+  // is incompatible with it.
+  bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
+  bool fallback_load = true;
+  bool fallback_call = true;
+  switch (dispatch_info.method_load_kind) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      fallback_load = has_irreducible_loops;
+      break;
+    default:
+      fallback_load = false;
+      break;
+  }
+  switch (dispatch_info.code_ptr_location) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+      fallback_call = has_irreducible_loops;
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
+      // TODO: Implement this type.
+      break;
+    default:
+      fallback_call = false;
+      break;
+  }
+  if (fallback_load) {
+    dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
+    dispatch_info.method_load_data = 0;
+  }
+  if (fallback_call) {
+    dispatch_info.code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+    dispatch_info.direct_code_ptr = 0;
+  }
+  return dispatch_info;
+}
+
 void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   // All registers are assumed to be correctly set up per the calling convention.
-
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
-  switch (invoke->GetMethodLoadKind()) {
+  HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind();
+  HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation();
+  bool isR6 = isa_features_.IsR6();
+  // kDirectAddressWithFixup and kCallDirectWithFixup have no extra input on R6 because
+  // R6 has PC-relative addressing.
+  bool has_extra_input = invoke->HasPcRelativeDexCache() ||
+      (!isR6 &&
+       ((method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) ||
+        (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup)));
+  Register base_reg = has_extra_input
+      ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>())
+      : ZERO;
+
+  // For better instruction scheduling we load the direct code pointer before the method pointer.
+  switch (code_ptr_location) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // T9 = invoke->GetDirectCodePtr();
+      __ LoadConst32(T9, invoke->GetDirectCodePtr());
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+      // T9 = code address from literal pool with link-time patch.
+      __ LoadLiteral(T9, base_reg, DeduplicateMethodCodeLiteral(invoke->GetTargetMethod()));
+      break;
+    default:
+      break;
+  }
+
+  switch (method_load_kind) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
       // temp = thread->string_init_entrypoint
       __ LoadFromOffset(kLoadWord,
@@ -2964,21 +4215,26 @@
                         invoke->GetStringInitOffset());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
-      // TODO: Implement this type. (Needs literal support.) At the moment, the
-      // CompilerDriver will not direct the backend to use this type for MIPS.
-      LOG(FATAL) << "Unsupported!";
-      UNREACHABLE();
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
-      FALLTHROUGH_INTENDED;
+      __ LoadLiteral(temp.AsRegister<Register>(),
+                     base_reg,
+                     DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+      HMipsDexCacheArraysBase* base =
+          invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase();
+      int32_t offset =
+          invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
+      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register reg = temp.AsRegister<Register>();
       Register method_reg;
       if (current_method.IsRegister()) {
@@ -2996,8 +4252,9 @@
                         reg,
                         method_reg,
                         ArtMethod::DexCacheResolvedMethodsOffset(kMipsPointerSize).Int32Value());
-      // temp = temp[index_in_cache]
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // temp = temp[index_in_cache];
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
       __ LoadFromOffset(kLoadWord,
                         reg,
                         reg,
@@ -3006,30 +4263,29 @@
     }
   }
 
-  switch (invoke->GetCodePtrLocation()) {
+  switch (code_ptr_location) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
-      __ Jalr(&frame_entry_label_, T9);
+      __ Bal(&frame_entry_label_);
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
-      // LR = invoke->GetDirectCodePtr();
-      __ LoadConst32(T9, invoke->GetDirectCodePtr());
-      // LR()
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+      // T9 prepared above for better instruction scheduling.
+      // T9()
       __ Jalr(T9);
       __ Nop();
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
-      // TODO: Implement kCallPCRelative. For the moment, we fall back to kMethodCode.
-      FALLTHROUGH_INTENDED;
-    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
-      // TODO: Implement kDirectCodeFixup. For the moment, we fall back to kMethodCode.
-      FALLTHROUGH_INTENDED;
+      // TODO: Implement this type.
+      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
+      LOG(FATAL) << "Unsupported";
+      UNREACHABLE();
     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
       // T9 = callee_method->entry_point_from_quick_compiled_code_;
       __ LoadFromOffset(kLoadWord,
                         T9,
                         callee_method.AsRegister<Register>(),
                         ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                            kMipsWordSize).Int32Value());
+                            kMipsPointerSize).Int32Value());
       // T9()
       __ Jalr(T9);
       __ Nop();
@@ -3039,9 +4295,9 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -3055,25 +4311,19 @@
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
-void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  // TODO: Try to generate intrinsics code.
+void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
   LocationSummary* locations = invoke->GetLocations();
   Location receiver = locations->InAt(0);
-  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
+  Register temp = temp_location.AsRegister<Register>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kMipsPointerSize).SizeValue();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize);
 
   // temp = object->GetClass();
-  if (receiver.IsStackSlot()) {
-    __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex());
-    __ LoadFromOffset(kLoadWord, temp, temp, class_offset);
-  } else {
-    DCHECK(receiver.IsRegister());
-    __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
-  }
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  DCHECK(receiver.IsRegister());
+  __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
+  MaybeRecordImplicitNullCheck(invoke);
   // temp = temp->GetMethodAt(method_offset);
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -3081,39 +4331,184 @@
   // T9();
   __ Jalr(T9);
   __ Nop();
+}
+
+void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
+  codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
-  LocationSummary::CallKind call_kind = cls->CanCallRuntime() ? LocationSummary::kCallOnSlowPath
-                                                              : LocationSummary::kNoCall;
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Location::RegisterLocation(V0),
+        /* code_generator_supports_read_barrier */ false);  // TODO: revisit this bool.
+    return;
+  }
+
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  switch (load_kind) {
+    // We need an extra register for PC-relative literals on R2.
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadClass::LoadKind::kBootImageAddress:
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      if (codegen_->GetInstructionSetFeatures().IsR6()) {
+        break;
+      }
+      FALLTHROUGH_INTENDED;
+    // We need an extra register for PC-relative dex cache accesses.
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+    case HLoadClass::LoadKind::kReferrersClass:
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      locations->SetInAt(0, Location::RequiresRegister());
+      break;
+    default:
+      break;
+  }
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) {
   LocationSummary* locations = cls->GetLocations();
-  Register out = locations->Out().AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
-  if (cls->IsReferrersClass()) {
-    DCHECK(!cls->CanCallRuntime());
-    DCHECK(!cls->MustGenerateClinitCheck());
-    __ LoadFromOffset(kLoadWord, out, current_method,
-                      ArtMethod::DeclaringClassOffset().Int32Value());
-  } else {
+  if (cls->NeedsAccessCheck()) {
+    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
+    codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
+                            cls,
+                            cls->GetDexPc(),
+                            nullptr,
+                            IsDirectEntrypoint(kQuickInitializeTypeAndVerifyAccess));
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
+    return;
+  }
+
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
+  Register base_or_current_method_reg;
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  switch (load_kind) {
+    // We need an extra register for PC-relative literals on R2.
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadClass::LoadKind::kBootImageAddress:
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
+      break;
+    // We need an extra register for PC-relative dex cache accesses.
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+    case HLoadClass::LoadKind::kReferrersClass:
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      base_or_current_method_reg = locations->InAt(0).AsRegister<Register>();
+      break;
+    default:
+      base_or_current_method_reg = ZERO;
+      break;
+  }
+
+  bool generate_null_check = false;
+  switch (load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      GenerateGcRootFieldLoad(cls,
+                              out_loc,
+                              base_or_current_method_reg,
+                              ArtMethod::DeclaringClassOffset().Int32Value());
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ LoadLiteral(out,
+                     base_or_current_method_reg,
+                     codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
+                                                               cls->GetTypeIndex()));
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      CodeGeneratorMIPS::PcRelativePatchInfo* info =
+          codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+      if (isR6) {
+        __ Bind(&info->high_label);
+        __ Bind(&info->pc_rel_label);
+        // Add a 32-bit offset to PC.
+        __ Auipc(out, /* placeholder */ 0x1234);
+        __ Addiu(out, out, /* placeholder */ 0x5678);
+      } else {
+        __ Bind(&info->high_label);
+        __ Lui(out, /* placeholder */ 0x1234);
+        // We do not bind info->pc_rel_label here, we'll use the assembler's label
+        // for PC-relative literals and the base from HMipsComputeBaseMethodAddress.
+        __ Ori(out, out, /* placeholder */ 0x5678);
+        // Add a 32-bit offset to PC.
+        __ Addu(out, out, base_or_current_method_reg);
+      }
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      __ LoadLiteral(out,
+                     base_or_current_method_reg,
+                     codegen_->DeduplicateBootImageAddressLiteral(address));
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes.");
+      DCHECK_ALIGNED(cls->GetAddress(), 4u);
+      int16_t offset = Low16Bits(address);
+      uint32_t base_address = address - offset;  // This accounts for offset sign extension.
+      __ Lui(out, High16Bits(base_address));
+      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      HMipsDexCacheArraysBase* base = cls->InputAt(0)->AsMipsDexCacheArraysBase();
+      int32_t offset =
+          cls->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
+      // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, base_or_current_method_reg, offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      __ LoadFromOffset(kLoadWord,
+                        out,
+                        base_or_current_method_reg,
+                        ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+      generate_null_check = !cls->IsInDexCache();
+    }
+  }
+
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
     DCHECK(cls->CanCallRuntime());
-    __ LoadFromOffset(kLoadWord, out, current_method,
-                      ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value());
-    __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
     SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
-        cls,
-        cls,
-        cls->GetDexPc(),
-        cls->MustGenerateClinitCheck());
+        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
     codegen_->AddSlowPath(slow_path);
-    __ Beqz(out, slow_path->GetEntryLabel());
+    if (generate_null_check) {
+      __ Beqz(out, slow_path->GetEntryLabel());
+    }
     if (cls->MustGenerateClinitCheck()) {
       GenerateClassInitializationCheck(slow_path, out);
     } else {
@@ -3123,7 +4518,7 @@
 }
 
 static int32_t GetExceptionTlsOffset() {
-  return Thread::ExceptionOffset<kMipsWordSize>().Int32Value();
+  return Thread::ExceptionOffset<kMipsPointerSize>().Int32Value();
 }
 
 void LocationsBuilderMIPS::VisitLoadException(HLoadException* load) {
@@ -3145,43 +4540,100 @@
   __ StoreToOffset(kStoreWord, ZERO, TR, GetExceptionTlsOffset());
 }
 
-void LocationsBuilderMIPS::VisitLoadLocal(HLoadLocal* load) {
-  load->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
-  locations->SetInAt(0, Location::RequiresRegister());
+  LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
+  HLoadString::LoadKind load_kind = load->GetLoadKind();
+  switch (load_kind) {
+    // We need an extra register for PC-relative literals on R2.
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadString::LoadKind::kBootImageAddress:
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      if (codegen_->GetInstructionSetFeatures().IsR6()) {
+        break;
+      }
+      FALLTHROUGH_INTENDED;
+    // We need an extra register for PC-relative dex cache accesses.
+    case HLoadString::LoadKind::kDexCachePcRelative:
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      locations->SetInAt(0, Location::RequiresRegister());
+      break;
+    default:
+      break;
+  }
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) {
+  HLoadString::LoadKind load_kind = load->GetLoadKind();
+  LocationSummary* locations = load->GetLocations();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
+  Register base_or_current_method_reg;
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  switch (load_kind) {
+    // We need an extra register for PC-relative literals on R2.
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadString::LoadKind::kBootImageAddress:
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
+      break;
+    default:
+      base_or_current_method_reg = ZERO;
+      break;
+  }
+
+  switch (load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ LoadLiteral(out,
+                     base_or_current_method_reg,
+                     codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
+                                                                 load->GetStringIndex()));
+      return;  // No dex cache slow path.
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      CodeGeneratorMIPS::PcRelativePatchInfo* info =
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+      if (isR6) {
+        __ Bind(&info->high_label);
+        __ Bind(&info->pc_rel_label);
+        // Add a 32-bit offset to PC.
+        __ Auipc(out, /* placeholder */ 0x1234);
+        __ Addiu(out, out, /* placeholder */ 0x5678);
+      } else {
+        __ Bind(&info->high_label);
+        __ Lui(out, /* placeholder */ 0x1234);
+        // We do not bind info->pc_rel_label here, we'll use the assembler's label
+        // for PC-relative literals and the base from HMipsComputeBaseMethodAddress.
+        __ Ori(out, out, /* placeholder */ 0x5678);
+        // Add a 32-bit offset to PC.
+        __ Addu(out, out, base_or_current_method_reg);
+      }
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(load->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+      __ LoadLiteral(out,
+                     base_or_current_method_reg,
+                     codegen_->DeduplicateBootImageAddressLiteral(address));
+      return;  // No dex cache slow path.
+    }
+    default:
+      break;
+  }
+
+  // TODO: Re-add the compiler code to do string dex cache lookup again.
   SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
   codegen_->AddSlowPath(slow_path);
-
-  LocationSummary* locations = load->GetLocations();
-  Register out = locations->Out().AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
-  __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
-  __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-  __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
-  __ Beqz(out, slow_path->GetEntryLabel());
+  __ B(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
-void LocationsBuilderMIPS::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
 void LocationsBuilderMIPS::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   locations->SetOut(Location::ConstantLocation(constant));
@@ -3193,7 +4645,7 @@
 
 void LocationsBuilderMIPS::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -3372,7 +4824,7 @@
 
 void LocationsBuilderMIPS::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
@@ -3387,7 +4839,7 @@
   // Move an uint16_t value to a register.
   __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
   codegen_->InvokeRuntime(
-      GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(),
+      GetThreadOffset<kMipsPointerSize>(instruction->GetEntrypoint()).Int32Value(),
       instruction,
       instruction->GetDexPc(),
       nullptr,
@@ -3398,26 +4850,36 @@
 
 void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
+  } else {
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  }
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
 }
 
 void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
-  InvokeRuntimeCallingConvention calling_convention;
-  Register current_method_register = calling_convention.GetRegisterAt(1);
-  __ Lw(current_method_register, SP, kCurrentMethodStackOffset);
-  // Move an uint16_t value to a register.
-  __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
-  codegen_->InvokeRuntime(
-      GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(),
-      instruction,
-      instruction->GetDexPc(),
-      nullptr,
-      IsDirectEntrypoint(kQuickAllocObjectWithAccessCheck));
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize);
+    __ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
+    __ LoadFromOffset(kLoadWord, T9, temp, code_offset.Int32Value());
+    __ Jalr(T9);
+    __ Nop();
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(
+        GetThreadOffset<kMipsPointerSize>(instruction->GetEntrypoint()).Int32Value(),
+        instruction,
+        instruction->GetDexPc(),
+        nullptr,
+        IsDirectEntrypoint(kQuickAllocObjectWithAccessCheck));
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  }
 }
 
 void LocationsBuilderMIPS::VisitNot(HNot* instruction) {
@@ -3477,19 +4939,19 @@
   }
 }
 
-void InstructionCodeGeneratorMIPS::GenerateImplicitNullCheck(HNullCheck* instruction) {
-  if (codegen_->CanMoveNullCheckToUser(instruction)) {
+void CodeGeneratorMIPS::GenerateImplicitNullCheck(HNullCheck* instruction) {
+  if (CanMoveNullCheckToUser(instruction)) {
     return;
   }
   Location obj = instruction->GetLocations()->InAt(0);
 
   __ Lw(ZERO, obj.AsRegister<Register>(), 0);
-  codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  RecordPcInfo(instruction, instruction->GetDexPc());
 }
 
-void InstructionCodeGeneratorMIPS::GenerateExplicitNullCheck(HNullCheck* instruction) {
+void CodeGeneratorMIPS::GenerateExplicitNullCheck(HNullCheck* instruction) {
   SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathMIPS(instruction);
-  codegen_->AddSlowPath(slow_path);
+  AddSlowPath(slow_path);
 
   Location obj = instruction->GetLocations()->InAt(0);
 
@@ -3497,11 +4959,7 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitNullCheck(HNullCheck* instruction) {
-  if (codegen_->IsImplicitNullCheckAllowed(instruction)) {
-    GenerateImplicitNullCheck(instruction);
-  } else {
-    GenerateExplicitNullCheck(instruction);
-  }
+  codegen_->GenerateNullCheck(instruction);
 }
 
 void LocationsBuilderMIPS::VisitOr(HOr* instruction) {
@@ -3549,7 +5007,7 @@
 
 void LocationsBuilderMIPS::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
@@ -3562,13 +5020,13 @@
 void LocationsBuilderMIPS::VisitRem(HRem* rem) {
   Primitive::Type type = rem->GetResultType();
   LocationSummary::CallKind call_kind =
-      (type == Primitive::kPrimInt) ? LocationSummary::kNoCall : LocationSummary::kCall;
+      (type == Primitive::kPrimInt) ? LocationSummary::kNoCall : LocationSummary::kCallOnMainOnly;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
   switch (type) {
     case Primitive::kPrimInt:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -3598,21 +5056,11 @@
 
 void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) {
   Primitive::Type type = instruction->GetType();
-  LocationSummary* locations = instruction->GetLocations();
-  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   switch (type) {
-    case Primitive::kPrimInt: {
-      Register dst = locations->Out().AsRegister<Register>();
-      Register lhs = locations->InAt(0).AsRegister<Register>();
-      Register rhs = locations->InAt(1).AsRegister<Register>();
-      if (isR6) {
-        __ ModR6(dst, lhs, rhs);
-      } else {
-        __ ModR2(dst, lhs, rhs);
-      }
+    case Primitive::kPrimInt:
+      GenerateDivRemIntegral(instruction);
       break;
-    }
     case Primitive::kPrimLong: {
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod),
                               instruction,
@@ -3627,7 +5075,7 @@
                               instruction, instruction->GetDexPc(),
                               nullptr,
                               IsDirectEntrypoint(kQuickFmodf));
-      CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+      CheckEntrypointTypes<kQuickFmodf, float, float, float>();
       break;
     }
     case Primitive::kPrimDouble: {
@@ -3635,7 +5083,7 @@
                               instruction, instruction->GetDexPc(),
                               nullptr,
                               IsDirectEntrypoint(kQuickFmod));
-      CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+      CheckEntrypointTypes<kQuickFmod, double, double, double>();
       break;
     }
     default:
@@ -3669,6 +5117,14 @@
   codegen_->GenerateFrameExit();
 }
 
+void LocationsBuilderMIPS::VisitRor(HRor* ror) {
+  HandleShift(ror);
+}
+
+void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror) {
+  HandleShift(ror);
+}
+
 void LocationsBuilderMIPS::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
@@ -3685,33 +5141,6 @@
   HandleShift(shr);
 }
 
-void LocationsBuilderMIPS::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
-  Primitive::Type field_type = store->InputAt(1)->GetType();
-  switch (field_type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unimplemented local type " << field_type;
-  }
-}
-
-void InstructionCodeGeneratorMIPS::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderMIPS::VisitSub(HSub* instruction) {
   HandleBinaryOp(instruction);
 }
@@ -3826,17 +5255,9 @@
   GenerateSuspendCheck(instruction, nullptr);
 }
 
-void LocationsBuilderMIPS::VisitTemporary(HTemporary* temp) {
-  temp->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderMIPS::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -3854,6 +5275,7 @@
   Primitive::Type input_type = conversion->GetInputType();
   Primitive::Type result_type = conversion->GetResultType();
   DCHECK_NE(input_type, result_type);
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
       (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
@@ -3861,9 +5283,10 @@
   }
 
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
-      (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
-    call_kind = LocationSummary::kCall;
+  if (!isR6 &&
+      ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
+       (result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) {
+    call_kind = LocationSummary::kCallOnMainOnly;
   }
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
@@ -3900,6 +5323,7 @@
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   DCHECK_NE(input_type, result_type);
 
@@ -3908,7 +5332,9 @@
     Register dst_low = locations->Out().AsRegisterPairLow<Register>();
     Register src = locations->InAt(0).AsRegister<Register>();
 
-    __ Move(dst_low, src);
+    if (dst_low != src) {
+      __ Move(dst_low, src);
+    }
     __ Sra(dst_high, src, 31);
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
     Register dst = locations->Out().AsRegister<Register>();
@@ -3937,7 +5363,9 @@
         }
         break;
       case Primitive::kPrimInt:
-        __ Move(dst, src);
+        if (dst != src) {
+          __ Move(dst, src);
+        }
         break;
 
       default:
@@ -3945,7 +5373,37 @@
                    << " to " << result_type;
     }
   } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
-    if (input_type != Primitive::kPrimLong) {
+    if (input_type == Primitive::kPrimLong) {
+      if (isR6) {
+        // cvt.s.l/cvt.d.l requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
+        // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction.
+        Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+        Register src_low = locations->InAt(0).AsRegisterPairLow<Register>();
+        FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+        __ Mtc1(src_low, FTMP);
+        __ Mthc1(src_high, FTMP);
+        if (result_type == Primitive::kPrimFloat) {
+          __ Cvtsl(dst, FTMP);
+        } else {
+          __ Cvtdl(dst, FTMP);
+        }
+      } else {
+        int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
+                                                                      : QUICK_ENTRY_POINT(pL2d);
+        bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f)
+                                                             : IsDirectEntrypoint(kQuickL2d);
+        codegen_->InvokeRuntime(entry_offset,
+                                conversion,
+                                conversion->GetDexPc(),
+                                nullptr,
+                                direct);
+        if (result_type == Primitive::kPrimFloat) {
+          CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+        } else {
+          CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+        }
+      }
+    } else {
       Register src = locations->InAt(0).AsRegister<Register>();
       FRegister dst = locations->Out().AsFpuRegister<FRegister>();
       __ Mtc1(src, FTMP);
@@ -3954,54 +5412,164 @@
       } else {
         __ Cvtdw(dst, FTMP);
       }
-    } else {
-      int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
-                                                                    : QUICK_ENTRY_POINT(pL2d);
-      bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f)
-                                                           : IsDirectEntrypoint(kQuickL2d);
-      codegen_->InvokeRuntime(entry_offset,
-                              conversion,
-                              conversion->GetDexPc(),
-                              nullptr,
-                              direct);
-      if (result_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickL2f, float, int64_t>();
-      } else {
-        CheckEntrypointTypes<kQuickL2d, double, int64_t>();
-      }
     }
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
-    int32_t entry_offset;
-    bool direct;
-    if (result_type != Primitive::kPrimLong) {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
-                                                           : QUICK_ENTRY_POINT(pD2iz);
-      direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2iz)
-                                                      : IsDirectEntrypoint(kQuickD2iz);
-    } else {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
-                                                           : QUICK_ENTRY_POINT(pD2l);
-      direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l)
-                                                      : IsDirectEntrypoint(kQuickD2l);
-    }
-    codegen_->InvokeRuntime(entry_offset,
-                            conversion,
-                            conversion->GetDexPc(),
-                            nullptr,
-                            direct);
-    if (result_type != Primitive::kPrimLong) {
-      if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+    if (result_type == Primitive::kPrimLong) {
+      if (isR6) {
+        // trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
+        // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction.
+        FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+        Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+        Register dst_low = locations->Out().AsRegisterPairLow<Register>();
+        MipsLabel truncate;
+        MipsLabel done;
+
+        // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+        // value when the input is either a NaN or is outside of the range of the output type
+        // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+        // the same result.
+        //
+        // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+        // value of the output type if the input is outside of the range after the truncation or
+        // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+        // results. This matches the desired float/double-to-int/long conversion exactly.
+        //
+        // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
+        //
+        // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+        // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+        // even though it must be NAN2008=1 on R6.
+        //
+        // The code takes care of the different behaviors by first comparing the input to the
+        // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+        // If the input is greater than or equal to the minimum, it procedes to the truncate
+        // instruction, which will handle such an input the same way irrespective of NAN2008.
+        // Otherwise the input is compared to itself to determine whether it is a NaN or not
+        // in order to return either zero or the minimum value.
+        //
+        // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+        // truncate instruction for MIPS64R6.
+        if (input_type == Primitive::kPrimFloat) {
+          uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min());
+          __ LoadConst32(TMP, min_val);
+          __ Mtc1(TMP, FTMP);
+          __ CmpLeS(FTMP, FTMP, src);
+        } else {
+          uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min());
+          __ LoadConst32(TMP, High32Bits(min_val));
+          __ Mtc1(ZERO, FTMP);
+          __ Mthc1(TMP, FTMP);
+          __ CmpLeD(FTMP, FTMP, src);
+        }
+
+        __ Bc1nez(FTMP, &truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ CmpEqS(FTMP, src, src);
+        } else {
+          __ CmpEqD(FTMP, src, src);
+        }
+        __ Move(dst_low, ZERO);
+        __ LoadConst32(dst_high, std::numeric_limits<int32_t>::min());
+        __ Mfc1(TMP, FTMP);
+        __ And(dst_high, dst_high, TMP);
+
+        __ B(&done);
+
+        __ Bind(&truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ TruncLS(FTMP, src);
+        } else {
+          __ TruncLD(FTMP, src);
+        }
+        __ Mfc1(dst_low, FTMP);
+        __ Mfhc1(dst_high, FTMP);
+
+        __ Bind(&done);
       } else {
-        CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+        int32_t entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
+                                                                     : QUICK_ENTRY_POINT(pD2l);
+        bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l)
+                                                             : IsDirectEntrypoint(kQuickD2l);
+        codegen_->InvokeRuntime(entry_offset, conversion, conversion->GetDexPc(), nullptr, direct);
+        if (input_type == Primitive::kPrimFloat) {
+          CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+        } else {
+          CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+        }
       }
     } else {
+      FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+      Register dst = locations->Out().AsRegister<Register>();
+      MipsLabel truncate;
+      MipsLabel done;
+
+      // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+      // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+      // even though it must be NAN2008=1 on R6.
+      //
+      // For details see the large comment above for the truncation of float/double to long on R6.
+      //
+      // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+      // truncate instruction for MIPS64R6.
       if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+        uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+        __ LoadConst32(TMP, min_val);
+        __ Mtc1(TMP, FTMP);
       } else {
-        CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+        uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+        __ LoadConst32(TMP, High32Bits(min_val));
+        __ Mtc1(ZERO, FTMP);
+        __ MoveToFpuHigh(TMP, FTMP);
       }
+
+      if (isR6) {
+        if (input_type == Primitive::kPrimFloat) {
+          __ CmpLeS(FTMP, FTMP, src);
+        } else {
+          __ CmpLeD(FTMP, FTMP, src);
+        }
+        __ Bc1nez(FTMP, &truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ CmpEqS(FTMP, src, src);
+        } else {
+          __ CmpEqD(FTMP, src, src);
+        }
+        __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+        __ Mfc1(TMP, FTMP);
+        __ And(dst, dst, TMP);
+      } else {
+        if (input_type == Primitive::kPrimFloat) {
+          __ ColeS(0, FTMP, src);
+        } else {
+          __ ColeD(0, FTMP, src);
+        }
+        __ Bc1t(0, &truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ CeqS(0, src, src);
+        } else {
+          __ CeqD(0, src, src);
+        }
+        __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+        __ Movf(dst, ZERO, 0);
+      }
+
+      __ B(&done);
+
+      __ Bind(&truncate);
+
+      if (input_type == Primitive::kPrimFloat) {
+        __ TruncWS(FTMP, src);
+      } else {
+        __ TruncWD(FTMP, src);
+      }
+      __ Mfc1(dst, FTMP);
+
+      __ Bind(&done);
     }
   } else if (Primitive::IsFloatingPointType(result_type) &&
              Primitive::IsFloatingPointType(input_type)) {
@@ -4045,95 +5613,83 @@
 }
 
 void LocationsBuilderMIPS::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
-}
-
-void LocationsBuilderMIPS::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorMIPS::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) {
@@ -4150,24 +5706,89 @@
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
   // Create a set of compare/jumps.
+  Register temp_reg = TMP;
+  __ Addiu32(temp_reg, value_reg, -lower_bound);
+  // Jump to default if index is negative
+  // Note: We don't check the case that index is positive while value < lower_bound, because in
+  // this case, index >= num_entries must be true. So that we can save one branch instruction.
+  __ Bltz(temp_reg, codegen_->GetLabelOf(default_block));
+
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int32_t i = 0; i < num_entries; ++i) {
-    int32_t case_value = lower_bound + i;
-    MipsLabel* successor_label = codegen_->GetLabelOf(successors[i]);
-    if (case_value == 0) {
-      __ Beqz(value_reg, successor_label);
-    } else {
-      __ LoadConst32(TMP, case_value);
-      __ Beq(value_reg, TMP, successor_label);
-    }
+  // Jump to successors[0] if value == lower_bound.
+  __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0]));
+  int32_t last_index = 0;
+  for (; num_entries - last_index > 2; last_index += 2) {
+    __ Addiu(temp_reg, temp_reg, -2);
+    // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+    __ Bltz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+    // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+    __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
+  }
+  if (num_entries - last_index == 2) {
+    // The last missing case_value.
+    __ Addiu(temp_reg, temp_reg, -1);
+    __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
   }
 
-  // Insert the default branch for every other value.
+  // And the default for any other value.
   if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
     __ B(codegen_->GetLabelOf(default_block));
   }
 }
 
+void LocationsBuilderMIPS::VisitMipsComputeBaseMethodAddress(
+    HMipsComputeBaseMethodAddress* insn) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(insn, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS::VisitMipsComputeBaseMethodAddress(
+    HMipsComputeBaseMethodAddress* insn) {
+  LocationSummary* locations = insn->GetLocations();
+  Register reg = locations->Out().AsRegister<Register>();
+
+  CHECK(!codegen_->GetInstructionSetFeatures().IsR6());
+
+  // Generate a dummy PC-relative call to obtain PC.
+  __ Nal();
+  // Grab the return address off RA.
+  __ Move(reg, RA);
+  // TODO: Can we share this code with that of VisitMipsDexCacheArraysBase()?
+
+  // Remember this offset (the obtained PC value) for later use with constant area.
+  __ BindPcRelBaseLabel();
+}
+
+void LocationsBuilderMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) {
+  Register reg = base->GetLocations()->Out().AsRegister<Register>();
+  CodeGeneratorMIPS::PcRelativePatchInfo* info =
+      codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset());
+
+  if (codegen_->GetInstructionSetFeatures().IsR6()) {
+    __ Bind(&info->high_label);
+    __ Bind(&info->pc_rel_label);
+    // Add a 32-bit offset to PC.
+    __ Auipc(reg, /* placeholder */ 0x1234);
+    __ Addiu(reg, reg, /* placeholder */ 0x5678);
+  } else {
+    // Generate a dummy PC-relative call to obtain PC.
+    __ Nal();
+    __ Bind(&info->high_label);
+    __ Lui(reg, /* placeholder */ 0x1234);
+    __ Bind(&info->pc_rel_label);
+    __ Ori(reg, reg, /* placeholder */ 0x5678);
+    // Add a 32-bit offset to PC.
+    __ Addu(reg, reg, RA);
+    // TODO: Can we share this code with that of VisitMipsComputeBaseMethodAddress()?
+  }
+}
+
 void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   // The trampoline uses the same calling convention as dex calling conventions,
   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
@@ -4179,6 +5800,36 @@
   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
 }
 
+void LocationsBuilderMIPS::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kMipsPointerSize).SizeValue();
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->InAt(0).AsRegister<Register>(),
+                      method_offset);
+  } else {
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex(), kMipsPointerSize));
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->InAt(0).AsRegister<Register>(),
+                      mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->Out().AsRegister<Register>(),
+                      method_offset);
+  }
+}
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index a571e76..63a0345 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -18,11 +18,12 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS_H_
 
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
+#include "string_reference.h"
 #include "utils/mips/assembler_mips.h"
+#include "utils/type_reference.h"
 
 namespace art {
 namespace mips {
@@ -152,7 +153,8 @@
 
 class SlowPathCodeMIPS : public SlowPathCode {
  public:
-  SlowPathCodeMIPS() : entry_label_(), exit_label_() {}
+  explicit SlowPathCodeMIPS(HInstruction* instruction)
+      : SlowPathCode(instruction), entry_label_(), exit_label_() {}
 
   MipsLabel* GetEntryLabel() { return &entry_label_; }
   MipsLabel* GetExitLabel() { return &exit_label_; }
@@ -185,6 +187,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -196,7 +199,7 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS);
 };
 
-class InstructionCodeGeneratorMIPS : public HGraphVisitor {
+class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen);
 
@@ -220,15 +223,39 @@
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
-  void GenerateImplicitNullCheck(HNullCheck* instruction);
-  void GenerateExplicitNullCheck(HNullCheck* instruction);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               Register obj,
+                               uint32_t offset);
+  void GenerateIntCompare(IfCondition cond, LocationSummary* locations);
+  void GenerateIntCompareAndBranch(IfCondition cond,
+                                   LocationSummary* locations,
+                                   MipsLabel* label);
+  void GenerateLongCompareAndBranch(IfCondition cond,
+                                    LocationSummary* locations,
+                                    MipsLabel* label);
+  void GenerateFpCompareAndBranch(IfCondition cond,
+                                  bool gt_bias,
+                                  Primitive::Type type,
+                                  LocationSummary* locations,
+                                  MipsLabel* label);
   void GenerateTestAndBranch(HInstruction* instruction,
+                             size_t condition_input_index,
                              MipsLabel* true_target,
-                             MipsLabel* false_target,
-                             MipsLabel* always_true_target);
+                             MipsLabel* false_target);
+  void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+  void DivRemByPowerOfTwo(HBinaryOperation* instruction);
+  void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
+  void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
   MipsAssembler* const assembler_;
@@ -245,12 +272,12 @@
                     OptimizingCompilerStats* stats = nullptr);
   virtual ~CodeGeneratorMIPS() {}
 
+  void ComputeSpillMask() OVERRIDE;
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
 
   void Bind(HBasicBlock* block) OVERRIDE;
 
-  void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
   void Move32(Location destination, Location source);
   void Move64(Location destination, Location source);
   void MoveConstant(Location location, HConstant* c);
@@ -259,7 +286,7 @@
 
   size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMipsDoublewordSize; }
 
-  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
+  uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
     return assembler_.GetLabelLocation(GetLabelOf(block));
   }
 
@@ -268,21 +295,22 @@
   MipsAssembler* GetAssembler() OVERRIDE { return &assembler_; }
   const MipsAssembler& GetAssembler() const OVERRIDE { return assembler_; }
 
+  // Emit linker patches.
+  void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+
   void MarkGCCard(Register object, Register value);
 
   // Register allocation.
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  // AllocateFreeRegister() is only used when allocating registers locally
-  // during CompileBaseline().
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
-
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
   size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id);
+  void ClobberRA() {
+    clobbered_ra_ = true;
+  }
 
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
@@ -332,18 +360,80 @@
     return type == Primitive::kPrimLong;
   }
 
+  // Check if the desired_string_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadString::LoadKind GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
+  // Check if the desired_dispatch_info is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method) OVERRIDE;
+
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
-  void GenerateVirtualCall(HInvokeVirtual* invoke ATTRIBUTE_UNUSED,
-                           Location temp ATTRIBUTE_UNUSED) OVERRIDE {
-    UNIMPLEMENTED(FATAL) << "Not implemented on MIPS";
-  }
+  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
   void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
                               Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE {
     UNIMPLEMENTED(FATAL) << "Not implemented on MIPS";
   }
 
+  void GenerateNop();
+  void GenerateImplicitNullCheck(HNullCheck* instruction);
+  void GenerateExplicitNullCheck(HNullCheck* instruction);
+
+  // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
+  // and boot image strings. The only difference is the interpretation of the offset_or_index.
+  struct PcRelativePatchInfo {
+    PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx)
+        : target_dex_file(dex_file), offset_or_index(off_or_idx) { }
+    PcRelativePatchInfo(PcRelativePatchInfo&& other) = default;
+
+    const DexFile& target_dex_file;
+    // Either the dex cache array element offset or the string/type index.
+    uint32_t offset_or_index;
+    // Label for the instruction loading the most significant half of the offset that's added to PC
+    // to form the base address (the least significant half is loaded with the instruction that
+    // follows).
+    MipsLabel high_label;
+    // Label for the instruction corresponding to PC+0.
+    MipsLabel pc_rel_label;
+  };
+
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index);
+  PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, uint32_t type_index);
+  PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
+                                                       uint32_t element_offset);
+  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index);
+  Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index);
+  Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
+
  private:
+  Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
+
+  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
+  using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
+  using BootStringToLiteralMap = ArenaSafeMap<StringReference,
+                                              Literal*,
+                                              StringReferenceValueComparator>;
+  using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
+                                            Literal*,
+                                            TypeReferenceValueComparator>;
+
+  Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
+  Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
+  Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
+  Literal* DeduplicateMethodCodeLiteral(MethodReference target_method);
+  PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
+                                          uint32_t offset_or_index,
+                                          ArenaDeque<PcRelativePatchInfo>* patches);
+
   // Labels for each block that will be compiled.
   MipsLabel* block_labels_;
   MipsLabel frame_entry_label_;
@@ -353,6 +443,28 @@
   MipsAssembler assembler_;
   const MipsInstructionSetFeatures& isa_features_;
 
+  // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
+  Uint32ToLiteralMap uint32_literals_;
+  // Method patch info, map MethodReference to a literal for method address and method code.
+  MethodToLiteralMap method_patches_;
+  MethodToLiteralMap call_patches_;
+  // PC-relative patch info for each HMipsDexCacheArraysBase.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
+  // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
+  BootStringToLiteralMap boot_image_string_patches_;
+  // PC-relative String patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
+  BootTypeToLiteralMap boot_image_type_patches_;
+  // PC-relative type patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
+  // Deduplication map for patchable boot image addresses.
+  Uint32ToLiteralMap boot_image_address_patches_;
+
+  // PC-relative loads on R2 clobber RA, which may need to be preserved explicitly in leaf methods.
+  // This is a flag set by pc_relative_fixups_mips and dex_cache_array_fixups_mips optimizations.
+  bool clobbered_ra_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS);
 };
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 5f78285..4a5755c 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -16,19 +16,19 @@
 
 #include "code_generator_mips64.h"
 
+#include "art_method.h"
+#include "code_generator_utils.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "gc/accounting/card_table.h"
 #include "intrinsics.h"
 #include "intrinsics_mips64.h"
-#include "art_method.h"
-#include "code_generator_utils.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "offsets.h"
 #include "thread.h"
-#include "utils/mips64/assembler_mips64.h"
 #include "utils/assembler.h"
+#include "utils/mips64/assembler_mips64.h"
 #include "utils/stack_checks.h"
 
 namespace art {
@@ -37,9 +37,6 @@
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr GpuRegister kMethodRegisterArgument = A0;
 
-// We need extra temporary/scratch registers (in addition to AT) in some cases.
-static constexpr FpuRegister FTMP = F8;
-
 Location Mips64ReturnLocation(Primitive::Type return_type) {
   switch (return_type) {
     case Primitive::kPrimBoolean:
@@ -105,12 +102,13 @@
   return Mips64ReturnLocation(type);
 }
 
-#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()->  // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value()
 
 class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit BoundsCheckSlowPathMIPS64(HBoundsCheck* instruction) : instruction_(instruction) {}
+  explicit BoundsCheckSlowPathMIPS64(HBoundsCheck* instruction) : SlowPathCodeMIPS64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -129,10 +127,14 @@
                                locations->InAt(1),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
                                Primitive::kPrimInt);
-    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    mips64_codegen->InvokeRuntime(entry_point_offset,
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
@@ -141,14 +143,12 @@
   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathMIPS64"; }
 
  private:
-  HBoundsCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS64);
 };
 
 class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) : instruction_(instruction) {}
+  explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) : SlowPathCodeMIPS64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
@@ -169,7 +169,6 @@
   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathMIPS64"; }
 
  private:
-  HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS64);
 };
 
@@ -179,7 +178,7 @@
                           HInstruction* at,
                           uint32_t dex_pc,
                           bool do_clinit)
-      : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCodeMIPS64(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
@@ -210,7 +209,7 @@
     }
 
     RestoreLiveRegisters(codegen, locations);
-    __ B(GetExitLabel());
+    __ Bc(GetExitLabel());
   }
 
   const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS64"; }
@@ -234,7 +233,7 @@
 
 class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit LoadStringSlowPathMIPS64(HLoadString* instruction) : instruction_(instruction) {}
+  explicit LoadStringSlowPathMIPS64(HLoadString* instruction) : SlowPathCodeMIPS64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -245,7 +244,8 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ LoadConst32(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    __ LoadConst32(calling_convention.GetRegisterAt(0), string_index);
     mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
                                   instruction_,
                                   instruction_->GetDexPc(),
@@ -257,20 +257,18 @@
                                  type);
 
     RestoreLiveRegisters(codegen, locations);
-    __ B(GetExitLabel());
+    __ Bc(GetExitLabel());
   }
 
   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; }
 
  private:
-  HLoadString* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS64);
 };
 
 class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit NullCheckSlowPathMIPS64(HNullCheck* instr) : instruction_(instr) {}
+  explicit NullCheckSlowPathMIPS64(HNullCheck* instr) : SlowPathCodeMIPS64(instr) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
@@ -291,34 +289,30 @@
   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathMIPS64"; }
 
  private:
-  HNullCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS64);
 };
 
 class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
   SuspendCheckSlowPathMIPS64(HSuspendCheck* instruction, HBasicBlock* successor)
-      : instruction_(instruction), successor_(successor) {}
+      : SlowPathCodeMIPS64(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
-      __ B(GetReturnLabel());
+      __ Bc(GetReturnLabel());
     } else {
-      __ B(mips64_codegen->GetLabelOf(successor_));
+      __ Bc(mips64_codegen->GetLabelOf(successor_));
     }
   }
 
-  Label* GetReturnLabel() {
+  Mips64Label* GetReturnLabel() {
     DCHECK(successor_ == nullptr);
     return &return_label_;
   }
@@ -326,19 +320,18 @@
   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS64"; }
 
  private:
-  HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
   HBasicBlock* const successor_;
 
   // If `successor_` is null, the label to branch to after the suspend check.
-  Label return_label_;
+  Mips64Label return_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathMIPS64);
 };
 
 class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit TypeCheckSlowPathMIPS64(HInstruction* instruction) : instruction_(instruction) {}
+  explicit TypeCheckSlowPathMIPS64(HInstruction* instruction) : SlowPathCodeMIPS64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -366,13 +359,11 @@
                                     instruction_,
                                     dex_pc,
                                     this);
+      CheckEntrypointTypes<
+          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial,
-                           uint32_t,
-                           const mirror::Class*,
-                           const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this);
@@ -380,36 +371,34 @@
     }
 
     RestoreLiveRegisters(codegen, locations);
-    __ B(GetExitLabel());
+    __ Bc(GetExitLabel());
   }
 
   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; }
 
  private:
-  HInstruction* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS64);
 };
 
 class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit DeoptimizationSlowPathMIPS64(HInstruction* instruction)
-    : instruction_(instruction) {}
+  explicit DeoptimizationSlowPathMIPS64(HDeoptimize* instruction)
+    : SlowPathCodeMIPS64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
-    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
 
  private:
-  HInstruction* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64);
 };
 
@@ -420,7 +409,7 @@
     : CodeGenerator(graph,
                     kNumberOfGpuRegisters,
                     kNumberOfFpuRegisters,
-                    0,  // kNumberOfRegisterPairs
+                    /* number_of_register_pairs */ 0,
                     ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
                                         arraysize(kCoreCalleeSaves)),
                     ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
@@ -431,16 +420,44 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
+      assembler_(graph->GetArena()),
       isa_features_(isa_features) {
   // Save RA (containing the return address) to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(RA));
 }
 
 #undef __
-#define __ down_cast<Mips64Assembler*>(GetAssembler())->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<Mips64Assembler*>(GetAssembler())->  // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value()
 
 void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) {
+  // Ensure that we fix up branches.
+  __ FinalizeCode();
+
+  // Adjust native pc offsets in stack maps.
+  for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) {
+    uint32_t old_position = stack_map_stream_.GetStackMap(i).native_pc_offset;
+    uint32_t new_position = __ GetAdjustedPosition(old_position);
+    DCHECK_GE(new_position, old_position);
+    stack_map_stream_.SetStackMapNativePcOffset(i, new_position);
+  }
+
+  // Adjust pc offsets for the disassembly information.
+  if (disasm_info_ != nullptr) {
+    GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval();
+    frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start);
+    frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end);
+    for (auto& it : *disasm_info_->GetInstructionIntervals()) {
+      it.second.start = __ GetAdjustedPosition(it.second.start);
+      it.second.end = __ GetAdjustedPosition(it.second.end);
+    }
+    for (auto& it : *disasm_info_->GetSlowPathIntervals()) {
+      it.code_interval.start = __ GetAdjustedPosition(it.code_interval.start);
+      it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end);
+    }
+  }
+
   CodeGenerator::Finalize(allocator);
 }
 
@@ -461,12 +478,12 @@
 void ParallelMoveResolverMIPS64::RestoreScratch(int reg) {
   // Pop reg
   __ Ld(GpuRegister(reg), SP, 0);
-  __ DecreaseFrameSize(kMips64WordSize);
+  __ DecreaseFrameSize(kMips64DoublewordSize);
 }
 
 void ParallelMoveResolverMIPS64::SpillScratch(int reg) {
   // Push reg
-  __ IncreaseFrameSize(kMips64WordSize);
+  __ IncreaseFrameSize(kMips64DoublewordSize);
   __ Sd(GpuRegister(reg), SP, 0);
 }
 
@@ -478,7 +495,7 @@
   // automatically unspilled when the scratch scope object is destroyed).
   ScratchRegisterScope ensure_scratch(this, TMP, V0, codegen_->GetNumberOfCoreRegisters());
   // If V0 spills onto the stack, SP-relative offsets need to be adjusted.
-  int stack_offset = ensure_scratch.IsSpilled() ? kMips64WordSize : 0;
+  int stack_offset = ensure_scratch.IsSpilled() ? kMips64DoublewordSize : 0;
   __ LoadFromOffset(load_type,
                     GpuRegister(ensure_scratch.GetRegister()),
                     SP,
@@ -498,7 +515,9 @@
   return dwarf::Reg::Mips64Core(static_cast<int>(reg));
 }
 
-// TODO: mapping of floating-point registers to DWARF
+static dwarf::Reg DWARFReg(FpuRegister reg) {
+  return dwarf::Reg::Mips64Fp(static_cast<int>(reg));
+}
 
 void CodeGeneratorMIPS64::GenerateFrameEntry() {
   __ Bind(&frame_entry_label_);
@@ -537,7 +556,7 @@
   for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
     GpuRegister reg = kCoreCalleeSaves[i];
     if (allocated_registers_.ContainsCoreRegister(reg)) {
-      ofs -= kMips64WordSize;
+      ofs -= kMips64DoublewordSize;
       __ Sd(reg, SP, ofs);
       __ cfi().RelOffset(DWARFReg(reg), ofs);
     }
@@ -546,9 +565,9 @@
   for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
     FpuRegister reg = kFpuCalleeSaves[i];
     if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
-      ofs -= kMips64WordSize;
+      ofs -= kMips64DoublewordSize;
       __ Sdc1(reg, SP, ofs);
-      // TODO: __ cfi().RelOffset(DWARFReg(reg), ofs);
+      __ cfi().RelOffset(DWARFReg(reg), ofs);
     }
   }
 
@@ -584,8 +603,8 @@
       FpuRegister reg = kFpuCalleeSaves[i];
       if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
         __ Ldc1(reg, SP, ofs);
-        ofs += kMips64WordSize;
-        // TODO: __ cfi().Restore(DWARFReg(reg));
+        ofs += kMips64DoublewordSize;
+        __ cfi().Restore(DWARFReg(reg));
       }
     }
 
@@ -593,7 +612,7 @@
       GpuRegister reg = kCoreCalleeSaves[i];
       if (allocated_registers_.ContainsCoreRegister(reg)) {
         __ Ld(reg, SP, ofs);
-        ofs += kMips64WordSize;
+        ofs += kMips64DoublewordSize;
         __ cfi().Restore(DWARFReg(reg));
       }
     }
@@ -603,6 +622,7 @@
   }
 
   __ Jr(RA);
+  __ Nop();
 
   __ cfi().RestoreState();
   __ cfi().DefCFAOffset(GetFrameSize());
@@ -666,9 +686,19 @@
         gpr = destination.AsRegister<GpuRegister>();
       }
       if (dst_type == Primitive::kPrimInt || dst_type == Primitive::kPrimFloat) {
-        __ LoadConst32(gpr, GetInt32ValueOf(source.GetConstant()->AsConstant()));
+        int32_t value = GetInt32ValueOf(source.GetConstant()->AsConstant());
+        if (Primitive::IsFloatingPointType(dst_type) && value == 0) {
+          gpr = ZERO;
+        } else {
+          __ LoadConst32(gpr, value);
+        }
       } else {
-        __ LoadConst64(gpr, GetInt64ValueOf(source.GetConstant()->AsConstant()));
+        int64_t value = GetInt64ValueOf(source.GetConstant()->AsConstant());
+        if (Primitive::IsFloatingPointType(dst_type) && value == 0) {
+          gpr = ZERO;
+        } else {
+          __ LoadConst64(gpr, value);
+        }
       }
       if (dst_type == Primitive::kPrimFloat) {
         __ Mtc1(gpr, destination.AsFpuRegister<FpuRegister>());
@@ -734,12 +764,22 @@
       // Move to stack from constant
       HConstant* src_cst = source.GetConstant();
       StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword;
+      GpuRegister gpr = ZERO;
       if (destination.IsStackSlot()) {
-        __ LoadConst32(TMP, GetInt32ValueOf(src_cst->AsConstant()));
+        int32_t value = GetInt32ValueOf(src_cst->AsConstant());
+        if (value != 0) {
+          gpr = TMP;
+          __ LoadConst32(gpr, value);
+        }
       } else {
-        __ LoadConst64(TMP, GetInt64ValueOf(src_cst->AsConstant()));
+        DCHECK(destination.IsDoubleStackSlot());
+        int64_t value = GetInt64ValueOf(src_cst->AsConstant());
+        if (value != 0) {
+          gpr = TMP;
+          __ LoadConst64(gpr, value);
+        }
       }
-      __ StoreToOffset(store_type, TMP, SP, destination.GetStackIndex());
+      __ StoreToOffset(store_type, gpr, SP, destination.GetStackIndex());
     } else {
       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
       DCHECK_EQ(source.IsDoubleStackSlot(), destination.IsDoubleStackSlot());
@@ -755,9 +795,7 @@
   }
 }
 
-void CodeGeneratorMIPS64::SwapLocations(Location loc1,
-                                        Location loc2,
-                                        Primitive::Type type ATTRIBUTE_UNUSED) {
+void CodeGeneratorMIPS64::SwapLocations(Location loc1, Location loc2, Primitive::Type type) {
   DCHECK(!loc1.IsConstant());
   DCHECK(!loc2.IsConstant());
 
@@ -781,12 +819,16 @@
     // Swap 2 FPRs
     FpuRegister r1 = loc1.AsFpuRegister<FpuRegister>();
     FpuRegister r2 = loc2.AsFpuRegister<FpuRegister>();
-    // TODO: Can MOV.S/MOV.D be used here to save one instruction?
-    // Need to distinguish float from double, right?
-    __ Dmfc1(TMP, r2);
-    __ Dmfc1(AT, r1);
-    __ Dmtc1(TMP, r1);
-    __ Dmtc1(AT, r2);
+    if (type == Primitive::kPrimFloat) {
+      __ MovS(FTMP, r1);
+      __ MovS(r1, r2);
+      __ MovS(r2, FTMP);
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ MovD(FTMP, r1);
+      __ MovD(r1, r2);
+      __ MovD(r2, FTMP);
+    }
   } else if (is_slot1 != is_slot2) {
     // Swap GPR/FPR and stack slot
     Location reg_loc = is_slot1 ? loc2 : loc1;
@@ -800,7 +842,6 @@
                           reg_loc.AsFpuRegister<FpuRegister>(),
                           SP,
                           mem_loc.GetStackIndex());
-      // TODO: review this MTC1/DMTC1 move
       if (mem_loc.IsStackSlot()) {
         __ Mtc1(TMP, reg_loc.AsFpuRegister<FpuRegister>());
       } else {
@@ -820,55 +861,6 @@
   }
 }
 
-void CodeGeneratorMIPS64::Move(HInstruction* instruction,
-                               Location location,
-                               HInstruction* move_for) {
-  LocationSummary* locations = instruction->GetLocations();
-  Primitive::Type type = instruction->GetType();
-  DCHECK_NE(type, Primitive::kPrimVoid);
-
-  if (instruction->IsCurrentMethod()) {
-    MoveLocation(location, Location::DoubleStackSlot(kCurrentMethodStackOffset), type);
-  } else if (locations != nullptr && locations->Out().Equals(location)) {
-    return;
-  } else if (instruction->IsIntConstant()
-             || instruction->IsLongConstant()
-             || instruction->IsNullConstant()) {
-    if (location.IsRegister()) {
-      // Move to GPR from constant
-      GpuRegister dst = location.AsRegister<GpuRegister>();
-      if (instruction->IsNullConstant() || instruction->IsIntConstant()) {
-        __ LoadConst32(dst, GetInt32ValueOf(instruction->AsConstant()));
-      } else {
-        __ LoadConst64(dst, instruction->AsLongConstant()->GetValue());
-      }
-    } else {
-      DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
-      // Move to stack from constant
-      if (location.IsStackSlot()) {
-        __ LoadConst32(TMP, GetInt32ValueOf(instruction->AsConstant()));
-        __ StoreToOffset(kStoreWord, TMP, SP, location.GetStackIndex());
-      } else {
-        __ LoadConst64(TMP, instruction->AsLongConstant()->GetValue());
-        __ StoreToOffset(kStoreDoubleword, TMP, SP, location.GetStackIndex());
-      }
-    }
-  } else if (instruction->IsTemporary()) {
-    Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    MoveLocation(location, temp_location, type);
-  } else if (instruction->IsLoadLocal()) {
-    uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
-    if (Primitive::Is64BitType(type)) {
-      MoveLocation(location, Location::DoubleStackSlot(stack_slot), type);
-    } else {
-      MoveLocation(location, Location::StackSlot(stack_slot), type);
-    }
-  } else {
-    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
-    MoveLocation(location, locations->Out(), type);
-  }
-}
-
 void CodeGeneratorMIPS64::MoveConstant(Location location, int32_t value) {
   DCHECK(location.IsRegister());
   __ LoadConst32(location.AsRegister<GpuRegister>(), value);
@@ -882,47 +874,28 @@
   }
 }
 
-Location CodeGeneratorMIPS64::GetStackLocation(HLoadLocal* load) const {
-  Primitive::Type type = load->GetType();
-
-  switch (type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << type;
-  }
-
-  LOG(FATAL) << "Unreachable";
-  return Location::NoLocation();
-}
-
-void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) {
-  Label done;
+void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object,
+                                     GpuRegister value,
+                                     bool value_can_be_null) {
+  Mips64Label done;
   GpuRegister card = AT;
   GpuRegister temp = TMP;
-  __ Beqzc(value, &done);
+  if (value_can_be_null) {
+    __ Beqzc(value, &done);
+  }
   __ LoadFromOffset(kLoadDoubleword,
                     card,
                     TR,
-                    Thread::CardTableOffset<kMips64WordSize>().Int32Value());
+                    Thread::CardTableOffset<kMips64PointerSize>().Int32Value());
   __ Dsrl(temp, object, gc::accounting::CardTable::kCardShift);
   __ Daddu(temp, card, temp);
   __ Sb(card, temp, 0);
-  __ Bind(&done);
+  if (value_can_be_null) {
+    __ Bind(&done);
+  }
 }
 
-void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const {
+void CodeGeneratorMIPS64::SetupBlockedRegisters() const {
   // ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated.
   blocked_core_registers_[ZERO] = true;
   blocked_core_registers_[K0] = true;
@@ -931,10 +904,11 @@
   blocked_core_registers_[SP] = true;
   blocked_core_registers_[RA] = true;
 
-  // AT and TMP(T8) are used as temporary/scratch registers
-  // (similar to how AT is used by MIPS assemblers).
+  // AT, TMP(T8) and TMP2(T3) are used as temporary/scratch
+  // registers (similar to how AT is used by MIPS assemblers).
   blocked_core_registers_[AT] = true;
   blocked_core_registers_[TMP] = true;
+  blocked_core_registers_[TMP2] = true;
   blocked_fpu_registers_[FTMP] = true;
 
   // Reserve suspend and thread registers.
@@ -946,49 +920,34 @@
 
   // TODO: review; anything else?
 
-  // TODO: make these two for's conditional on is_baseline once
-  // all the issues with register saving/restoring are sorted out.
-  for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-    blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-  }
-
-  for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-    blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
-  }
-}
-
-Location CodeGeneratorMIPS64::AllocateFreeRegister(Primitive::Type type) const {
-  if (type == Primitive::kPrimVoid) {
-    LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  if (Primitive::IsFloatingPointType(type)) {
-    size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFpuRegisters);
-    return Location::FpuRegisterLocation(reg);
-  } else {
-    size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfGpuRegisters);
-    return Location::RegisterLocation(reg);
+  if (GetGraph()->IsDebuggable()) {
+    // Stubs do not save callee-save floating point registers. If the graph
+    // is debuggable, we need to deal with these registers differently. For
+    // now, just block them.
+    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+    }
   }
 }
 
 size_t CodeGeneratorMIPS64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreToOffset(kStoreDoubleword, GpuRegister(reg_id), SP, stack_index);
-  return kMips64WordSize;
+  return kMips64DoublewordSize;
 }
 
 size_t CodeGeneratorMIPS64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ LoadFromOffset(kLoadDoubleword, GpuRegister(reg_id), SP, stack_index);
-  return kMips64WordSize;
+  return kMips64DoublewordSize;
 }
 
 size_t CodeGeneratorMIPS64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreFpuToOffset(kStoreDoubleword, FpuRegister(reg_id), SP, stack_index);
-  return kMips64WordSize;
+  return kMips64DoublewordSize;
 }
 
 size_t CodeGeneratorMIPS64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
   __ LoadFpuFromOffset(kLoadDoubleword, FpuRegister(reg_id), SP, stack_index);
-  return kMips64WordSize;
+  return kMips64DoublewordSize;
 }
 
 void CodeGeneratorMIPS64::DumpCoreRegister(std::ostream& stream, int reg) const {
@@ -1003,7 +962,7 @@
                                      HInstruction* instruction,
                                      uint32_t dex_pc,
                                      SlowPathCode* slow_path) {
-  InvokeRuntime(GetThreadOffset<kMips64WordSize>(entrypoint).Int32Value(),
+  InvokeRuntime(GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value(),
                 instruction,
                 dex_pc,
                 slow_path);
@@ -1017,6 +976,7 @@
   // TODO: anything related to T9/GP/GOT/PIC/.so's?
   __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
   __ Jalr(T9);
+  __ Nop();
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
@@ -1042,20 +1002,20 @@
   __ LoadFromOffset(kLoadUnsignedHalfword,
                     TMP,
                     TR,
-                    Thread::ThreadFlagsOffset<kMips64WordSize>().Int32Value());
+                    Thread::ThreadFlagsOffset<kMips64PointerSize>().Int32Value());
   if (successor == nullptr) {
     __ Bnezc(TMP, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
   } else {
     __ Beqzc(TMP, codegen_->GetLabelOf(successor));
-    __ B(slow_path->GetEntryLabel());
+    __ Bc(slow_path->GetEntryLabel());
     // slow_path will return to GetLabelOf(successor).
   }
 }
 
 InstructionCodeGeneratorMIPS64::InstructionCodeGeneratorMIPS64(HGraph* graph,
                                                                CodeGeneratorMIPS64* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -1189,7 +1149,7 @@
 }
 
 void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) {
-  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
   Primitive::Type type = instr->GetResultType();
@@ -1198,7 +1158,7 @@
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
-      locations->SetOut(Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
     default:
@@ -1207,7 +1167,7 @@
 }
 
 void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
-  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
   LocationSummary* locations = instr->GetLocations();
   Primitive::Type type = instr->GetType();
 
@@ -1228,17 +1188,22 @@
       }
 
       if (use_imm) {
-        uint32_t shift_value = (type == Primitive::kPrimInt)
-          ? static_cast<uint32_t>(rhs_imm & kMaxIntShiftValue)
-          : static_cast<uint32_t>(rhs_imm & kMaxLongShiftValue);
+        uint32_t shift_value = rhs_imm &
+            (type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance);
 
-        if (type == Primitive::kPrimInt) {
+        if (shift_value == 0) {
+          if (dst != lhs) {
+            __ Move(dst, lhs);
+          }
+        } else if (type == Primitive::kPrimInt) {
           if (instr->IsShl()) {
             __ Sll(dst, lhs, shift_value);
           } else if (instr->IsShr()) {
             __ Sra(dst, lhs, shift_value);
-          } else {
+          } else if (instr->IsUShr()) {
             __ Srl(dst, lhs, shift_value);
+          } else {
+            __ Rotr(dst, lhs, shift_value);
           }
         } else {
           if (shift_value < 32) {
@@ -1246,8 +1211,10 @@
               __ Dsll(dst, lhs, shift_value);
             } else if (instr->IsShr()) {
               __ Dsra(dst, lhs, shift_value);
-            } else {
+            } else if (instr->IsUShr()) {
               __ Dsrl(dst, lhs, shift_value);
+            } else {
+              __ Drotr(dst, lhs, shift_value);
             }
           } else {
             shift_value -= 32;
@@ -1255,8 +1222,10 @@
               __ Dsll32(dst, lhs, shift_value);
             } else if (instr->IsShr()) {
               __ Dsra32(dst, lhs, shift_value);
-            } else {
+            } else if (instr->IsUShr()) {
               __ Dsrl32(dst, lhs, shift_value);
+            } else {
+              __ Drotr32(dst, lhs, shift_value);
             }
           }
         }
@@ -1266,16 +1235,20 @@
             __ Sllv(dst, lhs, rhs_reg);
           } else if (instr->IsShr()) {
             __ Srav(dst, lhs, rhs_reg);
-          } else {
+          } else if (instr->IsUShr()) {
             __ Srlv(dst, lhs, rhs_reg);
+          } else {
+            __ Rotrv(dst, lhs, rhs_reg);
           }
         } else {
           if (instr->IsShl()) {
             __ Dsllv(dst, lhs, rhs_reg);
           } else if (instr->IsShr()) {
             __ Dsrav(dst, lhs, rhs_reg);
-          } else {
+          } else if (instr->IsUShr()) {
             __ Dsrlv(dst, lhs, rhs_reg);
+          } else {
+            __ Drotrv(dst, lhs, rhs_reg);
           }
         }
       }
@@ -1318,11 +1291,11 @@
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
   Location index = locations->InAt(1);
-  Primitive::Type type = instruction->GetType();
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
 
+  Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1336,7 +1309,6 @@
     }
 
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1350,7 +1322,6 @@
     }
 
     case Primitive::kPrimShort: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1365,7 +1336,6 @@
     }
 
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1382,7 +1352,6 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
       DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       LoadOperandType load_type = (type == Primitive::kPrimNot) ? kLoadUnsignedWord : kLoadWord;
       if (index.IsConstant()) {
@@ -1398,7 +1367,6 @@
     }
 
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1413,7 +1381,6 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
       FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1428,7 +1395,6 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
       FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1457,7 +1423,7 @@
 
 void InstructionCodeGeneratorMIPS64::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
   __ LoadFromOffset(kLoadWord, out, obj, offset);
@@ -1468,7 +1434,7 @@
   bool needs_runtime_call = instruction->NeedsTypeCheck();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+      needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
   if (needs_runtime_call) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1544,7 +1510,7 @@
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         if (needs_write_barrier) {
           DCHECK_EQ(value_type, Primitive::kPrimNot);
-          codegen_->MarkGCCard(obj, value);
+          codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
         }
       } else {
         DCHECK_EQ(value_type, Primitive::kPrimNot);
@@ -1552,6 +1518,7 @@
                                 instruction,
                                 instruction->GetDexPc(),
                                 nullptr);
+        CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
       }
       break;
     }
@@ -1638,12 +1605,7 @@
   // length is limited by the maximum positive signed 32-bit integer.
   // Unsigned comparison of length and index checks for index < 0
   // and for length <= index simultaneously.
-  // Mips R6 requires lhs != rhs for compact branches.
-  if (index == length) {
-    __ B(slow_path->GetEntryLabel());
-  } else {
-    __ Bgeuc(index, length, slow_path->GetEntryLabel());
-  }
+  __ Bgeuc(index, length, slow_path->GetEntryLabel());
 }
 
 void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
@@ -1698,27 +1660,26 @@
 void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) {
   Primitive::Type in_type = compare->InputAt(0)->GetType();
 
-  LocationSummary::CallKind call_kind = Primitive::IsFloatingPointType(in_type)
-      ? LocationSummary::kCall
-      : LocationSummary::kNoCall;
-
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, call_kind);
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
 
   switch (in_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      InvokeRuntimeCallingConvention calling_convention;
-      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
-      locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
-      locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
-    }
 
     default:
       LOG(FATAL) << "Unexpected type for compare operation " << in_type;
@@ -1727,34 +1688,87 @@
 
 void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  GpuRegister res = locations->Out().AsRegister<GpuRegister>();
   Primitive::Type in_type = instruction->InputAt(0)->GetType();
 
   //  0 if: left == right
   //  1 if: left  > right
   // -1 if: left  < right
   switch (in_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
       GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
-      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
-      // TODO: more efficient (direct) comparison with a constant
+      Location rhs_location = locations->InAt(1);
+      bool use_imm = rhs_location.IsConstant();
+      GpuRegister rhs = ZERO;
+      if (use_imm) {
+        if (in_type == Primitive::kPrimLong) {
+          int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant());
+          if (value != 0) {
+            rhs = AT;
+            __ LoadConst64(rhs, value);
+          }
+        } else {
+          int32_t value = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()->AsConstant());
+          if (value != 0) {
+            rhs = AT;
+            __ LoadConst32(rhs, value);
+          }
+        }
+      } else {
+        rhs = rhs_location.AsRegister<GpuRegister>();
+      }
       __ Slt(TMP, lhs, rhs);
-      __ Slt(dst, rhs, lhs);
-      __ Subu(dst, dst, TMP);
+      __ Slt(res, rhs, lhs);
+      __ Subu(res, res, TMP);
       break;
     }
 
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      int32_t entry_point_offset;
-      if (in_type == Primitive::kPrimFloat) {
-        entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgFloat)
-                                                     : QUICK_ENTRY_POINT(pCmplFloat);
+    case Primitive::kPrimFloat: {
+      FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+      FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+      Mips64Label done;
+      __ CmpEqS(FTMP, lhs, rhs);
+      __ LoadConst32(res, 0);
+      __ Bc1nez(FTMP, &done);
+      if (instruction->IsGtBias()) {
+        __ CmpLtS(FTMP, lhs, rhs);
+        __ LoadConst32(res, -1);
+        __ Bc1nez(FTMP, &done);
+        __ LoadConst32(res, 1);
       } else {
-        entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgDouble)
-                                                     : QUICK_ENTRY_POINT(pCmplDouble);
+        __ CmpLtS(FTMP, rhs, lhs);
+        __ LoadConst32(res, 1);
+        __ Bc1nez(FTMP, &done);
+        __ LoadConst32(res, -1);
       }
-      codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr);
+      __ Bind(&done);
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+      FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+      Mips64Label done;
+      __ CmpEqD(FTMP, lhs, rhs);
+      __ LoadConst32(res, 0);
+      __ Bc1nez(FTMP, &done);
+      if (instruction->IsGtBias()) {
+        __ CmpLtD(FTMP, lhs, rhs);
+        __ LoadConst32(res, -1);
+        __ Bc1nez(FTMP, &done);
+        __ LoadConst32(res, 1);
+      } else {
+        __ CmpLtD(FTMP, rhs, lhs);
+        __ LoadConst32(res, 1);
+        __ Bc1nez(FTMP, &done);
+        __ LoadConst32(res, -1);
+      }
+      __ Bind(&done);
       break;
     }
 
@@ -1763,142 +1777,311 @@
   }
 }
 
-void LocationsBuilderMIPS64::VisitCondition(HCondition* instruction) {
+void LocationsBuilderMIPS64::HandleCondition(HCondition* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-  if (instruction->NeedsMaterialization()) {
+  switch (instruction->InputAt(0)->GetType()) {
+    default:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      break;
+  }
+  if (!instruction->IsEmittedAtUseSite()) {
     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
-void InstructionCodeGeneratorMIPS64::VisitCondition(HCondition* instruction) {
-  if (!instruction->NeedsMaterialization()) {
+void InstructionCodeGeneratorMIPS64::HandleCondition(HCondition* instruction) {
+  if (instruction->IsEmittedAtUseSite()) {
     return;
   }
 
-  // TODO: generalize to long
-  DCHECK_NE(instruction->InputAt(0)->GetType(), Primitive::kPrimLong);
-
+  Primitive::Type type = instruction->InputAt(0)->GetType();
   LocationSummary* locations = instruction->GetLocations();
-
   GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
-  GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
-  Location rhs_location = locations->InAt(1);
+  Mips64Label true_label;
 
-  GpuRegister rhs_reg = ZERO;
-  int64_t rhs_imm = 0;
-  bool use_imm = rhs_location.IsConstant();
-  if (use_imm) {
-    rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
-  } else {
-    rhs_reg = rhs_location.AsRegister<GpuRegister>();
+  switch (type) {
+    default:
+      // Integer case.
+      GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ false, locations);
+      return;
+    case Primitive::kPrimLong:
+      GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ true, locations);
+      return;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      // TODO: don't use branches.
+      GenerateFpCompareAndBranch(instruction->GetCondition(),
+                                 instruction->IsGtBias(),
+                                 type,
+                                 locations,
+                                 &true_label);
+      break;
   }
 
-  IfCondition if_cond = instruction->GetCondition();
+  // Convert the branches into the result.
+  Mips64Label done;
 
-  switch (if_cond) {
-    case kCondEQ:
-    case kCondNE:
-      if (use_imm && IsUint<16>(rhs_imm)) {
-        __ Xori(dst, lhs, rhs_imm);
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Xor(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondEQ) {
-        __ Sltiu(dst, dst, 1);
-      } else {
-        __ Sltu(dst, ZERO, dst);
-      }
-      break;
+  // False case: result = 0.
+  __ LoadConst32(dst, 0);
+  __ Bc(&done);
 
-    case kCondLT:
-    case kCondGE:
-      if (use_imm && IsInt<16>(rhs_imm)) {
-        __ Slti(dst, lhs, rhs_imm);
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Slt(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondGE) {
-        // Simulate lhs >= rhs via !(lhs < rhs) since there's
-        // only the slt instruction but no sge.
-        __ Xori(dst, dst, 1);
-      }
-      break;
+  // True case: result = 1.
+  __ Bind(&true_label);
+  __ LoadConst32(dst, 1);
+  __ Bind(&done);
+}
 
-    case kCondLE:
-    case kCondGT:
-      if (use_imm && IsInt<16>(rhs_imm + 1)) {
-        // Simulate lhs <= rhs via lhs < rhs + 1.
-        __ Slti(dst, lhs, rhs_imm + 1);
-        if (if_cond == kCondGT) {
-          // Simulate lhs > rhs via !(lhs <= rhs) since there's
-          // only the slti instruction but no sgti.
-          __ Xori(dst, dst, 1);
-        }
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Slt(dst, rhs_reg, lhs);
-        if (if_cond == kCondLE) {
-          // Simulate lhs <= rhs via !(rhs < lhs) since there's
-          // only the slt instruction but no sle.
-          __ Xori(dst, dst, 1);
-        }
-      }
-      break;
+void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  Primitive::Type type = instruction->GetResultType();
 
-    case kCondB:
-    case kCondAE:
-      if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7fff) {
-        __ Sltiu(dst, lhs, rhs_imm);
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Sltu(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondAE) {
-        // Simulate lhs >= rhs via !(lhs < rhs) since there's
-        // only the sltu instruction but no sgeu.
-        __ Xori(dst, dst, 1);
-      }
-      break;
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
 
-    case kCondBE:
-    case kCondA:
-      if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7ffe) {
-        // Simulate lhs <= rhs via lhs < rhs + 1.
-        __ Sltiu(dst, lhs, rhs_imm + 1);
-        if (if_cond == kCondA) {
-          // Simulate lhs > rhs via !(lhs <= rhs) since there's
-          // only the sltiu instruction but no sgtiu.
-          __ Xori(dst, dst, 1);
-        }
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+  DCHECK(imm == 1 || imm == -1);
+
+  if (instruction->IsRem()) {
+    __ Move(out, ZERO);
+  } else {
+    if (imm == -1) {
+      if (type == Primitive::kPrimInt) {
+        __ Subu(out, ZERO, dividend);
       } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Sltu(dst, rhs_reg, lhs);
-        if (if_cond == kCondBE) {
-          // Simulate lhs <= rhs via !(rhs < lhs) since there's
-          // only the sltu instruction but no sleu.
-          __ Xori(dst, dst, 1);
+        DCHECK_EQ(type, Primitive::kPrimLong);
+        __ Dsubu(out, ZERO, dividend);
+      }
+    } else if (out != dividend) {
+      __ Move(out, dividend);
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  Primitive::Type type = instruction->GetResultType();
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+  uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
+  int ctz_imm = CTZ(abs_imm);
+
+  if (instruction->IsDiv()) {
+    if (type == Primitive::kPrimInt) {
+      if (ctz_imm == 1) {
+        // Fast path for division by +/-2, which is very common.
+        __ Srl(TMP, dividend, 31);
+      } else {
+        __ Sra(TMP, dividend, 31);
+        __ Srl(TMP, TMP, 32 - ctz_imm);
+      }
+      __ Addu(out, dividend, TMP);
+      __ Sra(out, out, ctz_imm);
+      if (imm < 0) {
+        __ Subu(out, ZERO, out);
+      }
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimLong);
+      if (ctz_imm == 1) {
+        // Fast path for division by +/-2, which is very common.
+        __ Dsrl32(TMP, dividend, 31);
+      } else {
+        __ Dsra32(TMP, dividend, 31);
+        if (ctz_imm > 32) {
+          __ Dsrl(TMP, TMP, 64 - ctz_imm);
+        } else {
+          __ Dsrl32(TMP, TMP, 32 - ctz_imm);
         }
       }
-      break;
+      __ Daddu(out, dividend, TMP);
+      if (ctz_imm < 32) {
+        __ Dsra(out, out, ctz_imm);
+      } else {
+        __ Dsra32(out, out, ctz_imm - 32);
+      }
+      if (imm < 0) {
+        __ Dsubu(out, ZERO, out);
+      }
+    }
+  } else {
+    if (type == Primitive::kPrimInt) {
+      if (ctz_imm == 1) {
+        // Fast path for modulo +/-2, which is very common.
+        __ Sra(TMP, dividend, 31);
+        __ Subu(out, dividend, TMP);
+        __ Andi(out, out, 1);
+        __ Addu(out, out, TMP);
+      } else {
+        __ Sra(TMP, dividend, 31);
+        __ Srl(TMP, TMP, 32 - ctz_imm);
+        __ Addu(out, dividend, TMP);
+        if (IsUint<16>(abs_imm - 1)) {
+          __ Andi(out, out, abs_imm - 1);
+        } else {
+          __ Sll(out, out, 32 - ctz_imm);
+          __ Srl(out, out, 32 - ctz_imm);
+        }
+        __ Subu(out, out, TMP);
+      }
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimLong);
+      if (ctz_imm == 1) {
+        // Fast path for modulo +/-2, which is very common.
+        __ Dsra32(TMP, dividend, 31);
+        __ Dsubu(out, dividend, TMP);
+        __ Andi(out, out, 1);
+        __ Daddu(out, out, TMP);
+      } else {
+        __ Dsra32(TMP, dividend, 31);
+        if (ctz_imm > 32) {
+          __ Dsrl(TMP, TMP, 64 - ctz_imm);
+        } else {
+          __ Dsrl32(TMP, TMP, 32 - ctz_imm);
+        }
+        __ Daddu(out, dividend, TMP);
+        if (IsUint<16>(abs_imm - 1)) {
+          __ Andi(out, out, abs_imm - 1);
+        } else {
+          if (ctz_imm > 32) {
+            __ Dsll(out, out, 64 - ctz_imm);
+            __ Dsrl(out, out, 64 - ctz_imm);
+          } else {
+            __ Dsll32(out, out, 32 - ctz_imm);
+            __ Dsrl32(out, out, 32 - ctz_imm);
+          }
+        }
+        __ Dsubu(out, out, TMP);
+      }
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+
+  Primitive::Type type = instruction->GetResultType();
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type;
+
+  int64_t magic;
+  int shift;
+  CalculateMagicAndShiftForDivRem(imm,
+                                  (type == Primitive::kPrimLong),
+                                  &magic,
+                                  &shift);
+
+  if (type == Primitive::kPrimInt) {
+    __ LoadConst32(TMP, magic);
+    __ MuhR6(TMP, dividend, TMP);
+
+    if (imm > 0 && magic < 0) {
+      __ Addu(TMP, TMP, dividend);
+    } else if (imm < 0 && magic > 0) {
+      __ Subu(TMP, TMP, dividend);
+    }
+
+    if (shift != 0) {
+      __ Sra(TMP, TMP, shift);
+    }
+
+    if (instruction->IsDiv()) {
+      __ Sra(out, TMP, 31);
+      __ Subu(out, TMP, out);
+    } else {
+      __ Sra(AT, TMP, 31);
+      __ Subu(AT, TMP, AT);
+      __ LoadConst32(TMP, imm);
+      __ MulR6(TMP, AT, TMP);
+      __ Subu(out, dividend, TMP);
+    }
+  } else {
+    __ LoadConst64(TMP, magic);
+    __ Dmuh(TMP, dividend, TMP);
+
+    if (imm > 0 && magic < 0) {
+      __ Daddu(TMP, TMP, dividend);
+    } else if (imm < 0 && magic > 0) {
+      __ Dsubu(TMP, TMP, dividend);
+    }
+
+    if (shift >= 32) {
+      __ Dsra32(TMP, TMP, shift - 32);
+    } else if (shift > 0) {
+      __ Dsra(TMP, TMP, shift);
+    }
+
+    if (instruction->IsDiv()) {
+      __ Dsra32(out, TMP, 31);
+      __ Dsubu(out, TMP, out);
+    } else {
+      __ Dsra32(AT, TMP, 31);
+      __ Dsubu(AT, TMP, AT);
+      __ LoadConst64(TMP, imm);
+      __ Dmul(TMP, AT, TMP);
+      __ Dsubu(out, dividend, TMP);
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  Primitive::Type type = instruction->GetResultType();
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type;
+
+  LocationSummary* locations = instruction->GetLocations();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  Location second = locations->InAt(1);
+
+  if (second.IsConstant()) {
+    int64_t imm = Int64FromConstant(second.GetConstant());
+    if (imm == 0) {
+      // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+    } else if (imm == 1 || imm == -1) {
+      DivRemOneOrMinusOne(instruction);
+    } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+      DivRemByPowerOfTwo(instruction);
+    } else {
+      DCHECK(imm <= -2 || imm >= 2);
+      GenerateDivRemWithAnyConstant(instruction);
+    }
+  } else {
+    GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+    GpuRegister divisor = second.AsRegister<GpuRegister>();
+    if (instruction->IsDiv()) {
+      if (type == Primitive::kPrimInt)
+        __ DivR6(out, dividend, divisor);
+      else
+        __ Ddiv(out, dividend, divisor);
+    } else {
+      if (type == Primitive::kPrimInt)
+        __ ModR6(out, dividend, divisor);
+      else
+        __ Dmod(out, dividend, divisor);
+    }
   }
 }
 
@@ -1909,7 +2092,7 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -1931,16 +2114,9 @@
 
   switch (type) {
     case Primitive::kPrimInt:
-    case Primitive::kPrimLong: {
-      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
-      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
-      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
-      if (type == Primitive::kPrimInt)
-        __ DivR6(dst, lhs, rhs);
-      else
-        __ Ddiv(dst, lhs, rhs);
+    case Primitive::kPrimLong:
+      GenerateDivRemIntegral(instruction);
       break;
-    }
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
@@ -1976,15 +2152,15 @@
 
   Primitive::Type type = instruction->GetType();
 
-  if ((type == Primitive::kPrimBoolean) || !Primitive::IsIntegralType(type)) {
-      LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
+  if (!Primitive::IsIntegralType(type)) {
+    LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
     return;
   }
 
   if (value.IsConstant()) {
     int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant());
     if (divisor == 0) {
-      __ B(slow_path->GetEntryLabel());
+      __ Bc(slow_path->GetEntryLabel());
     } else {
       // A division by a non-null constant is valid. We don't need to perform
       // any check, so simply fall through.
@@ -2036,7 +2212,7 @@
     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
   }
   if (!codegen_->GoesToNextBlock(block, successor)) {
-    __ B(codegen_->GetLabelOf(successor));
+    __ Bc(codegen_->GetLabelOf(successor));
   }
 }
 
@@ -2059,180 +2235,475 @@
   }
 }
 
-void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction,
-                                                           Label* true_target,
-                                                           Label* false_target,
-                                                           Label* always_true_target) {
-  HInstruction* cond = instruction->InputAt(0);
-  HCondition* condition = cond->AsCondition();
-
-  if (cond->IsIntConstant()) {
-    int32_t cond_value = cond->AsIntConstant()->GetValue();
-    if (cond_value == 1) {
-      if (always_true_target != nullptr) {
-        __ B(always_true_target);
-      }
-      return;
+void InstructionCodeGeneratorMIPS64::GenerateIntLongCompare(IfCondition cond,
+                                                            bool is64bit,
+                                                            LocationSummary* locations) {
+  GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+  Location rhs_location = locations->InAt(1);
+  GpuRegister rhs_reg = ZERO;
+  int64_t rhs_imm = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    if (is64bit) {
+      rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant());
     } else {
-      DCHECK_EQ(cond_value, 0);
+      rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
     }
-  } else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
+  } else {
+    rhs_reg = rhs_location.AsRegister<GpuRegister>();
+  }
+  int64_t rhs_imm_plus_one = rhs_imm + UINT64_C(1);
+
+  switch (cond) {
+    case kCondEQ:
+    case kCondNE:
+      if (use_imm && IsUint<16>(rhs_imm)) {
+        __ Xori(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst64(rhs_reg, rhs_imm);
+        }
+        __ Xor(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondEQ) {
+        __ Sltiu(dst, dst, 1);
+      } else {
+        __ Sltu(dst, ZERO, dst);
+      }
+      break;
+
+    case kCondLT:
+    case kCondGE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        __ Slti(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst64(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondGE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the slt instruction but no sge.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondLE:
+    case kCondGT:
+      if (use_imm && IsInt<16>(rhs_imm_plus_one)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        __ Slti(dst, lhs, rhs_imm_plus_one);
+        if (cond == kCondGT) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the slti instruction but no sgti.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst64(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, rhs_reg, lhs);
+        if (cond == kCondLE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the slt instruction but no sle.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
+
+    case kCondB:
+    case kCondAE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        // Sltiu sign-extends its 16-bit immediate operand before
+        // the comparison and thus lets us compare directly with
+        // unsigned values in the ranges [0, 0x7fff] and
+        // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff].
+        __ Sltiu(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst64(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondAE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the sltu instruction but no sgeu.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondBE:
+    case kCondA:
+      if (use_imm && (rhs_imm_plus_one != 0) && IsInt<16>(rhs_imm_plus_one)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        // Note that this only works if rhs + 1 does not overflow
+        // to 0, hence the check above.
+        // Sltiu sign-extends its 16-bit immediate operand before
+        // the comparison and thus lets us compare directly with
+        // unsigned values in the ranges [0, 0x7fff] and
+        // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff].
+        __ Sltiu(dst, lhs, rhs_imm_plus_one);
+        if (cond == kCondA) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the sltiu instruction but no sgtiu.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst64(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, rhs_reg, lhs);
+        if (cond == kCondBE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the sltu instruction but no sleu.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateIntLongCompareAndBranch(IfCondition cond,
+                                                                     bool is64bit,
+                                                                     LocationSummary* locations,
+                                                                     Mips64Label* label) {
+  GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+  Location rhs_location = locations->InAt(1);
+  GpuRegister rhs_reg = ZERO;
+  int64_t rhs_imm = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    if (is64bit) {
+      rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant());
+    } else {
+      rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+    }
+  } else {
+    rhs_reg = rhs_location.AsRegister<GpuRegister>();
+  }
+
+  if (use_imm && rhs_imm == 0) {
+    switch (cond) {
+      case kCondEQ:
+      case kCondBE:  // <= 0 if zero
+        __ Beqzc(lhs, label);
+        break;
+      case kCondNE:
+      case kCondA:  // > 0 if non-zero
+        __ Bnezc(lhs, label);
+        break;
+      case kCondLT:
+        __ Bltzc(lhs, label);
+        break;
+      case kCondGE:
+        __ Bgezc(lhs, label);
+        break;
+      case kCondLE:
+        __ Blezc(lhs, label);
+        break;
+      case kCondGT:
+        __ Bgtzc(lhs, label);
+        break;
+      case kCondB:  // always false
+        break;
+      case kCondAE:  // always true
+        __ Bc(label);
+        break;
+    }
+  } else {
+    if (use_imm) {
+      rhs_reg = TMP;
+      __ LoadConst64(rhs_reg, rhs_imm);
+    }
+    switch (cond) {
+      case kCondEQ:
+        __ Beqc(lhs, rhs_reg, label);
+        break;
+      case kCondNE:
+        __ Bnec(lhs, rhs_reg, label);
+        break;
+      case kCondLT:
+        __ Bltc(lhs, rhs_reg, label);
+        break;
+      case kCondGE:
+        __ Bgec(lhs, rhs_reg, label);
+        break;
+      case kCondLE:
+        __ Bgec(rhs_reg, lhs, label);
+        break;
+      case kCondGT:
+        __ Bltc(rhs_reg, lhs, label);
+        break;
+      case kCondB:
+        __ Bltuc(lhs, rhs_reg, label);
+        break;
+      case kCondAE:
+        __ Bgeuc(lhs, rhs_reg, label);
+        break;
+      case kCondBE:
+        __ Bgeuc(rhs_reg, lhs, label);
+        break;
+      case kCondA:
+        __ Bltuc(rhs_reg, lhs, label);
+        break;
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond,
+                                                                bool gt_bias,
+                                                                Primitive::Type type,
+                                                                LocationSummary* locations,
+                                                                Mips64Label* label) {
+  FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+  if (type == Primitive::kPrimFloat) {
+    switch (cond) {
+      case kCondEQ:
+        __ CmpEqS(FTMP, lhs, rhs);
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondNE:
+        __ CmpEqS(FTMP, lhs, rhs);
+        __ Bc1eqz(FTMP, label);
+        break;
+      case kCondLT:
+        if (gt_bias) {
+          __ CmpLtS(FTMP, lhs, rhs);
+        } else {
+          __ CmpUltS(FTMP, lhs, rhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondLE:
+        if (gt_bias) {
+          __ CmpLeS(FTMP, lhs, rhs);
+        } else {
+          __ CmpUleS(FTMP, lhs, rhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondGT:
+        if (gt_bias) {
+          __ CmpUltS(FTMP, rhs, lhs);
+        } else {
+          __ CmpLtS(FTMP, rhs, lhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondGE:
+        if (gt_bias) {
+          __ CmpUleS(FTMP, rhs, lhs);
+        } else {
+          __ CmpLeS(FTMP, rhs, lhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      default:
+        LOG(FATAL) << "Unexpected non-floating-point condition";
+    }
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimDouble);
+    switch (cond) {
+      case kCondEQ:
+        __ CmpEqD(FTMP, lhs, rhs);
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondNE:
+        __ CmpEqD(FTMP, lhs, rhs);
+        __ Bc1eqz(FTMP, label);
+        break;
+      case kCondLT:
+        if (gt_bias) {
+          __ CmpLtD(FTMP, lhs, rhs);
+        } else {
+          __ CmpUltD(FTMP, lhs, rhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondLE:
+        if (gt_bias) {
+          __ CmpLeD(FTMP, lhs, rhs);
+        } else {
+          __ CmpUleD(FTMP, lhs, rhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondGT:
+        if (gt_bias) {
+          __ CmpUltD(FTMP, rhs, lhs);
+        } else {
+          __ CmpLtD(FTMP, rhs, lhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondGE:
+        if (gt_bias) {
+          __ CmpUleD(FTMP, rhs, lhs);
+        } else {
+          __ CmpLeD(FTMP, rhs, lhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      default:
+        LOG(FATAL) << "Unexpected non-floating-point condition";
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction,
+                                                           size_t condition_input_index,
+                                                           Mips64Label* true_target,
+                                                           Mips64Label* false_target) {
+  HInstruction* cond = instruction->InputAt(condition_input_index);
+
+  if (true_target == nullptr && false_target == nullptr) {
+    // Nothing to do. The code always falls through.
+    return;
+  } else if (cond->IsIntConstant()) {
+    // Constant condition, statically compared against "true" (integer value 1).
+    if (cond->AsIntConstant()->IsTrue()) {
+      if (true_target != nullptr) {
+        __ Bc(true_target);
+      }
+    } else {
+      DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
+      if (false_target != nullptr) {
+        __ Bc(false_target);
+      }
+    }
+    return;
+  }
+
+  // The following code generates these patterns:
+  //  (1) true_target == nullptr && false_target != nullptr
+  //        - opposite condition true => branch to false_target
+  //  (2) true_target != nullptr && false_target == nullptr
+  //        - condition true => branch to true_target
+  //  (3) true_target != nullptr && false_target != nullptr
+  //        - condition true => branch to true_target
+  //        - branch to false_target
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
     // The condition instruction has been materialized, compare the output to 0.
-    Location cond_val = instruction->GetLocations()->InAt(0);
+    Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
     DCHECK(cond_val.IsRegister());
-    __ Bnezc(cond_val.AsRegister<GpuRegister>(), true_target);
+    if (true_target == nullptr) {
+      __ Beqzc(cond_val.AsRegister<GpuRegister>(), false_target);
+    } else {
+      __ Bnezc(cond_val.AsRegister<GpuRegister>(), true_target);
+    }
   } else {
     // The condition instruction has not been materialized, use its inputs as
     // the comparison and its condition as the branch condition.
-    GpuRegister lhs = condition->GetLocations()->InAt(0).AsRegister<GpuRegister>();
-    Location rhs_location = condition->GetLocations()->InAt(1);
-    GpuRegister rhs_reg = ZERO;
-    int32_t rhs_imm = 0;
-    bool use_imm = rhs_location.IsConstant();
-    if (use_imm) {
-      rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
-    } else {
-      rhs_reg = rhs_location.AsRegister<GpuRegister>();
+    HCondition* condition = cond->AsCondition();
+    Primitive::Type type = condition->InputAt(0)->GetType();
+    LocationSummary* locations = cond->GetLocations();
+    IfCondition if_cond = condition->GetCondition();
+    Mips64Label* branch_target = true_target;
+
+    if (true_target == nullptr) {
+      if_cond = condition->GetOppositeCondition();
+      branch_target = false_target;
     }
 
-    IfCondition if_cond = condition->GetCondition();
-    if (use_imm && rhs_imm == 0) {
-      switch (if_cond) {
-        case kCondEQ:
-          __ Beqzc(lhs, true_target);
-          break;
-        case kCondNE:
-          __ Bnezc(lhs, true_target);
-          break;
-        case kCondLT:
-          __ Bltzc(lhs, true_target);
-          break;
-        case kCondGE:
-          __ Bgezc(lhs, true_target);
-          break;
-        case kCondLE:
-          __ Blezc(lhs, true_target);
-          break;
-        case kCondGT:
-          __ Bgtzc(lhs, true_target);
-          break;
-        case kCondB:
-          break;  // always false
-        case kCondBE:
-          __ Beqzc(lhs, true_target);  // <= 0 if zero
-          break;
-        case kCondA:
-          __ Bnezc(lhs, true_target);  // > 0 if non-zero
-          break;
-        case kCondAE:
-          __ B(true_target);  // always true
-          break;
-      }
-    } else {
-      if (use_imm) {
-        rhs_reg = TMP;
-        __ LoadConst32(rhs_reg, rhs_imm);
-      }
-      // It looks like we can get here with lhs == rhs. Should that be possible at all?
-      // Mips R6 requires lhs != rhs for compact branches.
-      if (lhs == rhs_reg) {
-        DCHECK(!use_imm);
-        switch (if_cond) {
-          case kCondEQ:
-          case kCondGE:
-          case kCondLE:
-          case kCondBE:
-          case kCondAE:
-            // if lhs == rhs for a positive condition, then it is a branch
-            __ B(true_target);
-            break;
-          case kCondNE:
-          case kCondLT:
-          case kCondGT:
-          case kCondB:
-          case kCondA:
-            // if lhs == rhs for a negative condition, then it is a NOP
-            break;
-        }
-      } else {
-        switch (if_cond) {
-          case kCondEQ:
-            __ Beqc(lhs, rhs_reg, true_target);
-            break;
-          case kCondNE:
-            __ Bnec(lhs, rhs_reg, true_target);
-            break;
-          case kCondLT:
-            __ Bltc(lhs, rhs_reg, true_target);
-            break;
-          case kCondGE:
-            __ Bgec(lhs, rhs_reg, true_target);
-            break;
-          case kCondLE:
-            __ Bgec(rhs_reg, lhs, true_target);
-            break;
-          case kCondGT:
-            __ Bltc(rhs_reg, lhs, true_target);
-            break;
-          case kCondB:
-            __ Bltuc(lhs, rhs_reg, true_target);
-            break;
-          case kCondAE:
-            __ Bgeuc(lhs, rhs_reg, true_target);
-            break;
-          case kCondBE:
-            __ Bgeuc(rhs_reg, lhs, true_target);
-            break;
-          case kCondA:
-            __ Bltuc(rhs_reg, lhs, true_target);
-            break;
-        }
-      }
+    switch (type) {
+      default:
+        GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ false, locations, branch_target);
+        break;
+      case Primitive::kPrimLong:
+        GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ true, locations, branch_target);
+        break;
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimDouble:
+        GenerateFpCompareAndBranch(if_cond, condition->IsGtBias(), type, locations, branch_target);
+        break;
     }
   }
-  if (false_target != nullptr) {
-    __ B(false_target);
+
+  // If neither branch falls through (case 3), the conditional branch to `true_target`
+  // was already emitted (case 2) and we need to emit a jump to `false_target`.
+  if (true_target != nullptr && false_target != nullptr) {
+    __ Bc(false_target);
   }
 }
 
 void LocationsBuilderMIPS64::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  HInstruction* cond = if_instr->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) {
-  Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
-  Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  Label* always_true_target = true_target;
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfTrueSuccessor())) {
-    always_true_target = nullptr;
-  }
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfFalseSuccessor())) {
-    false_target = nullptr;
-  }
-  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+  HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+  HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+  Mips64Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+      nullptr : codegen_->GetLabelOf(true_successor);
+  Mips64Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+      nullptr : codegen_->GetLabelOf(false_successor);
+  GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
 
 void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  HInstruction* cond = deoptimize->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathMIPS64(deoptimize);
-  codegen_->AddSlowPath(slow_path);
-  Label* slow_path_entry = slow_path->GetEntryLabel();
-  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+  SlowPathCodeMIPS64* slow_path =
+      deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS64>(deoptimize);
+  GenerateTestAndBranch(deoptimize,
+                        /* condition_input_index */ 0,
+                        slow_path->GetEntryLabel(),
+                        /* false_target */ nullptr);
+}
+
+void LocationsBuilderMIPS64::VisitSelect(HSelect* select) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitSelect(HSelect* select) {
+  LocationSummary* locations = select->GetLocations();
+  Mips64Label false_target;
+  GenerateTestAndBranch(select,
+                        /* condition_input_index */ 2,
+                        /* true_target */ nullptr,
+                        &false_target);
+  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+  __ Bind(&false_target);
+}
+
+void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNativeDebugInfo(HNativeDebugInfo*) {
+  // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+}
+
+void CodeGeneratorMIPS64::GenerateNop() {
+  __ Nop();
 }
 
 void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction,
@@ -2308,7 +2779,8 @@
 }
 
 void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction,
-                                                    const FieldInfo& field_info) {
+                                                    const FieldInfo& field_info,
+                                                    bool value_can_be_null) {
   Primitive::Type type = field_info.GetFieldType();
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
@@ -2350,7 +2822,7 @@
   if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) {
     DCHECK(locations->InAt(1).IsRegister());
     GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>();
-    codegen_->MarkGCCard(obj, src);
+    codegen_->MarkGCCard(obj, src, value_can_be_null);
   }
 }
 
@@ -2367,7 +2839,7 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
-  HandleFieldSet(instruction, instruction->GetFieldInfo());
+  HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
 }
 
 void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -2387,7 +2859,7 @@
   GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
 
-  Label done;
+  Mips64Label done;
 
   // Return 0 if `obj` is null.
   // TODO: Avoid this check if we know `obj` is not null.
@@ -2458,11 +2930,9 @@
 void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kMips64PointerSize).Uint32Value();
   Location receiver = invoke->GetLocations()->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64PointerSize);
 
   // Set the hidden argument.
   __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<GpuRegister>(),
@@ -2476,12 +2946,17 @@
     __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ LoadFromOffset(kLoadDoubleword, temp, temp,
+      mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex(), kMips64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value());
   // T9();
   __ Jalr(T9);
+  __ Nop();
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
@@ -2496,9 +2971,9 @@
 }
 
 void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -2506,17 +2981,6 @@
   }
 
   HandleInvoke(invoke);
-
-  // While SetupBlockedRegisters() blocks registers S2-S8 due to their
-  // clobbering somewhere else, reduce further register pressure by avoiding
-  // allocation of a register for the current method pointer like on x86 baseline.
-  // TODO: remove this once all the issues with register saving/restoring are
-  // sorted out.
-  LocationSummary* locations = invoke->GetLocations();
-  Location location = locations->InAt(invoke->GetCurrentMethodInputIndex());
-  if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-    locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation());
-  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codegen) {
@@ -2528,6 +2992,50 @@
   return false;
 }
 
+HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind(
+    HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) {
+  // TODO: Implement other kinds.
+  return HLoadString::LoadKind::kDexCacheViaMethod;
+}
+
+HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  DCHECK_NE(desired_class_load_kind, HLoadClass::LoadKind::kReferrersClass);
+  // TODO: Implement other kinds.
+  return HLoadClass::LoadKind::kDexCacheViaMethod;
+}
+
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method ATTRIBUTE_UNUSED) {
+  switch (desired_dispatch_info.method_load_kind) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      // TODO: Implement these types. For the moment, we fall back to kDexCacheViaMethod.
+      return HInvokeStaticOrDirect::DispatchInfo {
+        HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        0u,
+        0u
+      };
+    default:
+      break;
+  }
+  switch (desired_dispatch_info.code_ptr_location) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
+      // TODO: Implement these types. For the moment, we fall back to kCallArtMethod.
+      return HInvokeStaticOrDirect::DispatchInfo {
+        desired_dispatch_info.method_load_kind,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        desired_dispatch_info.method_load_data,
+        0u
+      };
+    default:
+      return desired_dispatch_info;
+  }
+}
+
 void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   // All registers are assumed to be correctly set up per the calling convention.
 
@@ -2541,21 +3049,19 @@
                         invoke->GetStringInitOffset());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadConst64(temp.AsRegister<GpuRegister>(), invoke->GetMethodAddress());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
-      // TODO: Implement this type. (Needs literal support.) At the moment, the
-      // CompilerDriver will not direct the backend to use this type for MIPS.
-      LOG(FATAL) << "Unsupported!";
-      UNREACHABLE();
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
-      FALLTHROUGH_INTENDED;
+      // TODO: Implement these types.
+      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
+      LOG(FATAL) << "Unsupported";
+      UNREACHABLE();
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       GpuRegister reg = temp.AsRegister<GpuRegister>();
       GpuRegister method_reg;
       if (current_method.IsRegister()) {
@@ -2573,8 +3079,9 @@
                         reg,
                         method_reg,
                         ArtMethod::DexCacheResolvedMethodsOffset(kMips64PointerSize).Int32Value());
-      // temp = temp[index_in_cache]
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // temp = temp[index_in_cache];
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
       __ LoadFromOffset(kLoadDoubleword,
                         reg,
                         reg,
@@ -2585,38 +3092,40 @@
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
-      __ Jalr(&frame_entry_label_, T9);
+      __ Jialc(&frame_entry_label_, T9);
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
       // LR = invoke->GetDirectCodePtr();
       __ LoadConst64(T9, invoke->GetDirectCodePtr());
       // LR()
       __ Jalr(T9);
+      __ Nop();
       break;
-    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
-      // TODO: Implement kCallPCRelative. For the moment, we fall back to kMethodCode.
-      FALLTHROUGH_INTENDED;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
-      // TODO: Implement kDirectCodeFixup. For the moment, we fall back to kMethodCode.
-      FALLTHROUGH_INTENDED;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
+      // TODO: Implement these types.
+      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
+      LOG(FATAL) << "Unsupported";
+      UNREACHABLE();
     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
       // T9 = callee_method->entry_point_from_quick_compiled_code_;
       __ LoadFromOffset(kLoadDoubleword,
                         T9,
                         callee_method.AsRegister<GpuRegister>(),
                         ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                            kMips64WordSize).Int32Value());
+                            kMips64PointerSize).Int32Value());
       // T9()
       __ Jalr(T9);
+      __ Nop();
       break;
   }
   DCHECK(!IsLeafMethod());
 }
 
 void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -2630,29 +3139,38 @@
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
-void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
-    return;
-  }
+void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  GpuRegister receiver = calling_convention.GetRegisterAt(0);
 
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
-  GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
+  GpuRegister temp = temp_location.AsRegister<GpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kMips64PointerSize).SizeValue();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64PointerSize);
 
   // temp = object->GetClass();
-  DCHECK(receiver.IsRegister());
-  __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
+  MaybeRecordImplicitNullCheck(invoke);
   // temp = temp->GetMethodAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value());
   // T9();
   __ Jalr(T9);
+  __ Nop();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
+  codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
@@ -2662,7 +3180,7 @@
   CodeGenerator::CreateLoadClassLocationSummary(
       cls,
       Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(A0));
+      calling_convention.GetReturnLocation(cls->GetType()));
 }
 
 void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) {
@@ -2673,6 +3191,7 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
@@ -2684,28 +3203,33 @@
     __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
                       ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
-    DCHECK(cls->CanCallRuntime());
     __ LoadFromOffset(kLoadDoubleword, out, current_method,
                       ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value());
-    __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+    __ LoadFromOffset(
+        kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
     // TODO: We will need a read barrier here.
-    SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64(
-        cls,
-        cls,
-        cls->GetDexPc(),
-        cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ Beqzc(out, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
-    } else {
-      __ Bind(slow_path->GetExitLabel());
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64(
+          cls,
+          cls,
+          cls->GetDexPc(),
+          cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+      if (!cls->IsInDexCache()) {
+        __ Beqzc(out, slow_path->GetEntryLabel());
+      }
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
 
 static int32_t GetExceptionTlsOffset() {
-  return Thread::ExceptionOffset<kMips64WordSize>().Int32Value();
+  return Thread::ExceptionOffset<kMips64PointerSize>().Int32Value();
 }
 
 void LocationsBuilderMIPS64::VisitLoadException(HLoadException* load) {
@@ -2727,45 +3251,23 @@
   __ StoreToOffset(kStoreWord, ZERO, TR, GetExceptionTlsOffset());
 }
 
-void LocationsBuilderMIPS64::VisitLoadLocal(HLoadLocal* load) {
-  load->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = load->NeedsEnvironment()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) {
+  // TODO: Re-add the compiler code to do string dex cache lookup again.
   SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
   codegen_->AddSlowPath(slow_path);
-
-  LocationSummary* locations = load->GetLocations();
-  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-  GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>();
-  __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
-                    ArtMethod::DeclaringClassOffset().Int32Value());
-  __ LoadFromOffset(kLoadDoubleword, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-  __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
-  // TODO: We will need a read barrier here.
-  __ Beqzc(out, slow_path->GetEntryLabel());
+  __ Bc(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
-void LocationsBuilderMIPS64::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS64::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
 void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   locations->SetOut(Location::ConstantLocation(constant));
@@ -2777,7 +3279,7 @@
 
 void LocationsBuilderMIPS64::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -2789,7 +3291,11 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
-  CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitMul(HMul* mul) {
@@ -2900,7 +3406,7 @@
 
 void LocationsBuilderMIPS64::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
@@ -2921,22 +3427,35 @@
 
 void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
+  } else {
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  }
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  // Move an uint16_t value to a register.
-  __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex());
-  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
-                          instruction,
-                          instruction->GetDexPc(),
-                          nullptr);
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
+    MemberOffset code_offset =
+        ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64PointerSize);
+    __ LoadFromOffset(kLoadDoubleword, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
+    __ LoadFromOffset(kLoadDoubleword, T9, temp, code_offset.Int32Value());
+    __ Jalr(T9);
+    __ Nop();
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr);
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitNot(HNot* instruction) {
@@ -2987,19 +3506,19 @@
   }
 }
 
-void InstructionCodeGeneratorMIPS64::GenerateImplicitNullCheck(HNullCheck* instruction) {
-  if (codegen_->CanMoveNullCheckToUser(instruction)) {
+void CodeGeneratorMIPS64::GenerateImplicitNullCheck(HNullCheck* instruction) {
+  if (CanMoveNullCheckToUser(instruction)) {
     return;
   }
   Location obj = instruction->GetLocations()->InAt(0);
 
   __ Lw(ZERO, obj.AsRegister<GpuRegister>(), 0);
-  codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  RecordPcInfo(instruction, instruction->GetDexPc());
 }
 
-void InstructionCodeGeneratorMIPS64::GenerateExplicitNullCheck(HNullCheck* instruction) {
+void CodeGeneratorMIPS64::GenerateExplicitNullCheck(HNullCheck* instruction) {
   SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathMIPS64(instruction);
-  codegen_->AddSlowPath(slow_path);
+  AddSlowPath(slow_path);
 
   Location obj = instruction->GetLocations()->InAt(0);
 
@@ -3007,11 +3526,7 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNullCheck(HNullCheck* instruction) {
-  if (codegen_->IsImplicitNullCheckAllowed(instruction)) {
-    GenerateImplicitNullCheck(instruction);
-  } else {
-    GenerateExplicitNullCheck(instruction);
-  }
+  codegen_->GenerateNullCheck(instruction);
 }
 
 void LocationsBuilderMIPS64::VisitOr(HOr* instruction) {
@@ -3059,7 +3574,7 @@
 
 void LocationsBuilderMIPS64::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
@@ -3072,14 +3587,15 @@
 void LocationsBuilderMIPS64::VisitRem(HRem* rem) {
   Primitive::Type type = rem->GetResultType();
   LocationSummary::CallKind call_kind =
-      Primitive::IsFloatingPointType(type) ? LocationSummary::kCall : LocationSummary::kNoCall;
+      Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
+                                           : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
   switch (type) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -3099,26 +3615,23 @@
 
 void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) {
   Primitive::Type type = instruction->GetType();
-  LocationSummary* locations = instruction->GetLocations();
 
   switch (type) {
     case Primitive::kPrimInt:
-    case Primitive::kPrimLong: {
-      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
-      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
-      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
-      if (type == Primitive::kPrimInt)
-        __ ModR6(dst, lhs, rhs);
-      else
-        __ Dmod(dst, lhs, rhs);
+    case Primitive::kPrimLong:
+      GenerateDivRemIntegral(instruction);
       break;
-    }
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
                                                              : QUICK_ENTRY_POINT(pFmod);
       codegen_->InvokeRuntime(entry_offset, instruction, instruction->GetDexPc(), nullptr);
+      if (type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickFmodf, float, float, float>();
+      } else {
+        CheckEntrypointTypes<kQuickFmod, double, double, double>();
+      }
       break;
     }
     default:
@@ -3152,6 +3665,14 @@
   codegen_->GenerateFrameExit();
 }
 
+void LocationsBuilderMIPS64::VisitRor(HRor* ror) {
+  HandleShift(ror);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror) {
+  HandleShift(ror);
+}
+
 void LocationsBuilderMIPS64::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
@@ -3168,33 +3689,6 @@
   HandleShift(shr);
 }
 
-void LocationsBuilderMIPS64::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
-  Primitive::Type field_type = store->InputAt(1)->GetType();
-  switch (field_type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unimplemented local type " << field_type;
-  }
-}
-
-void InstructionCodeGeneratorMIPS64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderMIPS64::VisitSub(HSub* instruction) {
   HandleBinaryOp(instruction);
 }
@@ -3216,7 +3710,7 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
-  HandleFieldSet(instruction, instruction->GetFieldInfo());
+  HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
 }
 
 void LocationsBuilderMIPS64::VisitUnresolvedInstanceFieldGet(
@@ -3305,17 +3799,9 @@
   GenerateSuspendCheck(instruction, nullptr);
 }
 
-void LocationsBuilderMIPS64::VisitTemporary(HTemporary* temp) {
-  temp->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS64::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderMIPS64::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -3338,36 +3824,18 @@
     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
   }
 
-  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
-      (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
-    call_kind = LocationSummary::kCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion);
+
+  if (Primitive::IsFloatingPointType(input_type)) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
   }
 
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
-
-  if (call_kind == LocationSummary::kNoCall) {
-    if (Primitive::IsFloatingPointType(input_type)) {
-      locations->SetInAt(0, Location::RequiresFpuRegister());
-    } else {
-      locations->SetInAt(0, Location::RequiresRegister());
-    }
-
-    if (Primitive::IsFloatingPointType(result_type)) {
-      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
-    } else {
-      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-    }
+  if (Primitive::IsFloatingPointType(result_type)) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    InvokeRuntimeCallingConvention calling_convention;
-
-    if (Primitive::IsFloatingPointType(input_type)) {
-      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
-    } else {
-      locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    }
-
-    locations->SetOut(calling_convention.GetReturnLocation(result_type));
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
@@ -3387,18 +3855,26 @@
         __ Andi(dst, src, 0xFFFF);
         break;
       case Primitive::kPrimByte:
-        // long is never converted into types narrower than int directly,
-        // so SEB and SEH can be used without ever causing unpredictable results
-        // on 64-bit inputs
-        DCHECK(input_type != Primitive::kPrimLong);
-        __ Seb(dst, src);
+        if (input_type == Primitive::kPrimLong) {
+          // Type conversion from long to types narrower than int is a result of code
+          // transformations. To avoid unpredictable results for SEB and SEH, we first
+          // need to sign-extend the low 32-bit value into bits 32 through 63.
+          __ Sll(dst, src, 0);
+          __ Seb(dst, dst);
+        } else {
+          __ Seb(dst, src);
+        }
         break;
       case Primitive::kPrimShort:
-        // long is never converted into types narrower than int directly,
-        // so SEB and SEH can be used without ever causing unpredictable results
-        // on 64-bit inputs
-        DCHECK(input_type != Primitive::kPrimLong);
-        __ Seh(dst, src);
+        if (input_type == Primitive::kPrimLong) {
+          // Type conversion from long to types narrower than int is a result of code
+          // transformations. To avoid unpredictable results for SEB and SEH, we first
+          // need to sign-extend the low 32-bit value into bits 32 through 63.
+          __ Sll(dst, src, 0);
+          __ Seh(dst, dst);
+        } else {
+          __ Seh(dst, src);
+        }
         break;
       case Primitive::kPrimInt:
       case Primitive::kPrimLong:
@@ -3412,37 +3888,107 @@
                    << " to " << result_type;
     }
   } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
-    if (input_type != Primitive::kPrimLong) {
-      FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
-      GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+    FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+    GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+    if (input_type == Primitive::kPrimLong) {
+      __ Dmtc1(src, FTMP);
+      if (result_type == Primitive::kPrimFloat) {
+        __ Cvtsl(dst, FTMP);
+      } else {
+        __ Cvtdl(dst, FTMP);
+      }
+    } else {
       __ Mtc1(src, FTMP);
       if (result_type == Primitive::kPrimFloat) {
         __ Cvtsw(dst, FTMP);
       } else {
         __ Cvtdw(dst, FTMP);
       }
-    } else {
-      int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
-                                                                    : QUICK_ENTRY_POINT(pL2d);
-      codegen_->InvokeRuntime(entry_offset,
-                              conversion,
-                              conversion->GetDexPc(),
-                              nullptr);
     }
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
-    int32_t entry_offset;
-    if (result_type != Primitive::kPrimLong) {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
-                                                           : QUICK_ENTRY_POINT(pD2iz);
+    GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+    FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>();
+    Mips64Label truncate;
+    Mips64Label done;
+
+    // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+    // value when the input is either a NaN or is outside of the range of the output type
+    // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+    // the same result.
+    //
+    // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+    // value of the output type if the input is outside of the range after the truncation or
+    // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+    // results. This matches the desired float/double-to-int/long conversion exactly.
+    //
+    // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
+    //
+    // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+    // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+    // even though it must be NAN2008=1 on R6.
+    //
+    // The code takes care of the different behaviors by first comparing the input to the
+    // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+    // If the input is greater than or equal to the minimum, it procedes to the truncate
+    // instruction, which will handle such an input the same way irrespective of NAN2008.
+    // Otherwise the input is compared to itself to determine whether it is a NaN or not
+    // in order to return either zero or the minimum value.
+    //
+    // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+    // truncate instruction for MIPS64R6.
+    if (input_type == Primitive::kPrimFloat) {
+      uint32_t min_val = (result_type == Primitive::kPrimLong)
+          ? bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min())
+          : bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+      __ LoadConst32(TMP, min_val);
+      __ Mtc1(TMP, FTMP);
+      __ CmpLeS(FTMP, FTMP, src);
     } else {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
-                                                           : QUICK_ENTRY_POINT(pD2l);
+      uint64_t min_val = (result_type == Primitive::kPrimLong)
+          ? bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min())
+          : bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+      __ LoadConst64(TMP, min_val);
+      __ Dmtc1(TMP, FTMP);
+      __ CmpLeD(FTMP, FTMP, src);
     }
-    codegen_->InvokeRuntime(entry_offset,
-                            conversion,
-                            conversion->GetDexPc(),
-                            nullptr);
+
+    __ Bc1nez(FTMP, &truncate);
+
+    if (input_type == Primitive::kPrimFloat) {
+      __ CmpEqS(FTMP, src, src);
+    } else {
+      __ CmpEqD(FTMP, src, src);
+    }
+    if (result_type == Primitive::kPrimLong) {
+      __ LoadConst64(dst, std::numeric_limits<int64_t>::min());
+    } else {
+      __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+    }
+    __ Mfc1(TMP, FTMP);
+    __ And(dst, dst, TMP);
+
+    __ Bc(&done);
+
+    __ Bind(&truncate);
+
+    if (result_type == Primitive::kPrimLong) {
+      if (input_type == Primitive::kPrimFloat) {
+        __ TruncLS(FTMP, src);
+      } else {
+        __ TruncLD(FTMP, src);
+      }
+      __ Dmfc1(dst, FTMP);
+    } else {
+      if (input_type == Primitive::kPrimFloat) {
+        __ TruncWS(FTMP, src);
+      } else {
+        __ TruncWD(FTMP, src);
+      }
+      __ Mfc1(dst, FTMP);
+    }
+
+    __ Bind(&done);
   } else if (Primitive::IsFloatingPointType(result_type) &&
              Primitive::IsFloatingPointType(input_type)) {
     FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
@@ -3485,95 +4031,83 @@
 }
 
 void LocationsBuilderMIPS64::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
-}
-
-void LocationsBuilderMIPS64::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorMIPS64::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
+  HandleCondition(comp);
 }
 
 // Simple implementation of packed switch - generate cascaded compare/jumps.
@@ -3590,24 +4124,49 @@
   GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  // Create a series of compare/jumps.
+  // Create a set of compare/jumps.
+  GpuRegister temp_reg = TMP;
+  if (IsInt<16>(-lower_bound)) {
+    __ Addiu(temp_reg, value_reg, -lower_bound);
+  } else {
+    __ LoadConst32(AT, -lower_bound);
+    __ Addu(temp_reg, value_reg, AT);
+  }
+  // Jump to default if index is negative
+  // Note: We don't check the case that index is positive while value < lower_bound, because in
+  // this case, index >= num_entries must be true. So that we can save one branch instruction.
+  __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block));
+
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int32_t i = 0; i < num_entries; i++) {
-    int32_t case_value = lower_bound + i;
-    Label* succ = codegen_->GetLabelOf(successors[i]);
-    if (case_value == 0) {
-      __ Beqzc(value_reg, succ);
-    } else {
-      __ LoadConst32(TMP, case_value);
-      __ Beqc(value_reg, TMP, succ);
-    }
+  // Jump to successors[0] if value == lower_bound.
+  __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0]));
+  int32_t last_index = 0;
+  for (; num_entries - last_index > 2; last_index += 2) {
+    __ Addiu(temp_reg, temp_reg, -2);
+    // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+    __ Bltzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+    // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+    __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
+  }
+  if (num_entries - last_index == 2) {
+    // The last missing case_value.
+    __ Addiu(temp_reg, temp_reg, -1);
+    __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
   }
 
   // And the default for any other value.
   if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-    __ B(codegen_->GetLabelOf(default_block));
+    __ Bc(codegen_->GetLabelOf(default_block));
   }
 }
 
+void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet*) {
+  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64";
+}
+
+void InstructionCodeGeneratorMIPS64::VisitClassTableGet(HClassTableGet*) {
+  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64";
+}
+
 }  // namespace mips64
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index df3fc0d..197f86b 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -18,7 +18,6 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_
 
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
@@ -27,10 +26,6 @@
 namespace art {
 namespace mips64 {
 
-// Use a local definition to prevent copying mistakes.
-static constexpr size_t kMips64WordSize = kMips64PointerSize;
-
-
 // InvokeDexCallingConvention registers
 
 static constexpr GpuRegister kParameterCoreRegisters[] =
@@ -119,9 +114,12 @@
   Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
     return Location::RegisterLocation(V0);
   }
-  Location GetSetValueLocation(
-      Primitive::Type type ATTRIBUTE_UNUSED, bool is_instance) const OVERRIDE {
-    return is_instance ? Location::RegisterLocation(A2) : Location::RegisterLocation(A1);
+  Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? Location::RegisterLocation(A2)
+        : (is_instance
+            ? Location::RegisterLocation(A2)
+            : Location::RegisterLocation(A1));
   }
   Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
     return Location::FpuRegisterLocation(F0);
@@ -153,14 +151,15 @@
 
 class SlowPathCodeMIPS64 : public SlowPathCode {
  public:
-  SlowPathCodeMIPS64() : entry_label_(), exit_label_() {}
+  explicit SlowPathCodeMIPS64(HInstruction* instruction)
+      : SlowPathCode(instruction), entry_label_(), exit_label_() {}
 
-  Label* GetEntryLabel() { return &entry_label_; }
-  Label* GetExitLabel() { return &exit_label_; }
+  Mips64Label* GetEntryLabel() { return &entry_label_; }
+  Mips64Label* GetExitLabel() { return &exit_label_; }
 
  private:
-  Label entry_label_;
-  Label exit_label_;
+  Mips64Label entry_label_;
+  Mips64Label exit_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SlowPathCodeMIPS64);
 };
@@ -186,6 +185,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -197,7 +197,7 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS64);
 };
 
-class InstructionCodeGeneratorMIPS64 : public HGraphVisitor {
+class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen);
 
@@ -221,15 +221,30 @@
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
-  void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+  void HandleFieldSet(HInstruction* instruction,
+                      const FieldInfo& field_info,
+                      bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
-  void GenerateImplicitNullCheck(HNullCheck* instruction);
-  void GenerateExplicitNullCheck(HNullCheck* instruction);
   void GenerateTestAndBranch(HInstruction* instruction,
-                             Label* true_target,
-                             Label* false_target,
-                             Label* always_true_target);
+                             size_t condition_input_index,
+                             Mips64Label* true_target,
+                             Mips64Label* false_target);
+  void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+  void DivRemByPowerOfTwo(HBinaryOperation* instruction);
+  void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
+  void GenerateDivRemIntegral(HBinaryOperation* instruction);
+  void GenerateIntLongCompare(IfCondition cond, bool is64bit, LocationSummary* locations);
+  void GenerateIntLongCompareAndBranch(IfCondition cond,
+                                       bool is64bit,
+                                       LocationSummary* locations,
+                                       Mips64Label* label);
+  void GenerateFpCompareAndBranch(IfCondition cond,
+                                  bool gt_bias,
+                                  Primitive::Type type,
+                                  LocationSummary* locations,
+                                  Mips64Label* label);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
   Mips64Assembler* const assembler_;
@@ -251,14 +266,12 @@
 
   void Bind(HBasicBlock* block) OVERRIDE;
 
-  void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
+  size_t GetWordSize() const OVERRIDE { return kMips64DoublewordSize; }
 
-  size_t GetWordSize() const OVERRIDE { return kMips64WordSize; }
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64DoublewordSize; }
 
-  size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64WordSize; }
-
-  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
-    return GetLabelOf(block)->Position();
+  uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
+    return assembler_.GetLabelLocation(GetLabelOf(block));
   }
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
@@ -266,16 +279,11 @@
   Mips64Assembler* GetAssembler() OVERRIDE { return &assembler_; }
   const Mips64Assembler& GetAssembler() const OVERRIDE { return assembler_; }
 
-  void MarkGCCard(GpuRegister object, GpuRegister value);
+  void MarkGCCard(GpuRegister object, GpuRegister value, bool value_can_be_null);
 
   // Register allocation.
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  // AllocateFreeRegister() is only used when allocating registers locally
-  // during CompileBaseline().
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
-
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
@@ -291,12 +299,12 @@
     return isa_features_;
   }
 
-  Label* GetLabelOf(HBasicBlock* block) const {
-    return CommonGetLabelOf<Label>(block_labels_, block);
+  Mips64Label* GetLabelOf(HBasicBlock* block) const {
+    return CommonGetLabelOf<Mips64Label>(block_labels_, block);
   }
 
   void Initialize() OVERRIDE {
-    block_labels_ = CommonInitializeLabels<Label>();
+    block_labels_ = CommonInitializeLabels<Mips64Label>();
   }
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -326,21 +334,38 @@
 
   bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const { return false; }
 
+  // Check if the desired_string_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadString::LoadKind GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
+  // Check if the desired_dispatch_info is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method) OVERRIDE;
+
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
-  void GenerateVirtualCall(HInvokeVirtual* invoke ATTRIBUTE_UNUSED,
-                           Location temp ATTRIBUTE_UNUSED) OVERRIDE {
-    UNIMPLEMENTED(FATAL);
-  }
+  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
   void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
                               Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE {
-    UNIMPLEMENTED(FATAL);
+    UNIMPLEMENTED(FATAL) << "Not implemented on MIPS64";
   }
 
+  void GenerateNop();
+  void GenerateImplicitNullCheck(HNullCheck* instruction);
+  void GenerateExplicitNullCheck(HNullCheck* instruction);
+
  private:
   // Labels for each block that will be compiled.
-  Label* block_labels_;  // Indexed by block id.
-  Label frame_entry_label_;
+  Mips64Label* block_labels_;  // Indexed by block id.
+  Mips64Label frame_entry_label_;
   LocationsBuilderMIPS64 location_builder_;
   InstructionCodeGeneratorMIPS64 instruction_visitor_;
   ParallelMoveResolverMIPS64 move_resolver_;
diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc
index 921c1d8..96fe2a1 100644
--- a/compiler/optimizing/code_generator_utils.cc
+++ b/compiler/optimizing/code_generator_utils.cc
@@ -15,6 +15,7 @@
  */
 
 #include "code_generator_utils.h"
+#include "nodes.h"
 
 #include "base/logging.h"
 
@@ -94,4 +95,8 @@
   *shift = is_long ? p - 64 : p - 32;
 }
 
+bool IsBooleanValueOrMaterializedCondition(HInstruction* cond_input) {
+  return !cond_input->IsCondition() || !cond_input->IsEmittedAtUseSite();
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h
index 59b495c..7efed8c 100644
--- a/compiler/optimizing/code_generator_utils.h
+++ b/compiler/optimizing/code_generator_utils.h
@@ -21,10 +21,17 @@
 
 namespace art {
 
+class HInstruction;
+
 // Computes the magic number and the shift needed in the div/rem by constant algorithm, as out
 // arguments `magic` and `shift`
 void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift);
 
+// Returns true if `cond_input` is expected to have a location. Assumes that
+// `cond_input` is a conditional input of the currently emitted instruction and
+// that it has been previously visited by the InstructionCodeGenerator.
+bool IsBooleanValueOrMaterializedCondition(HInstruction* cond_input);
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 963eec2..7aca16f 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -19,7 +19,6 @@
 #include "art_method.h"
 #include "code_generator_utils.h"
 #include "compiled_method.h"
-#include "constant_area_fixups_x86.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "gc/accounting/card_table.h"
@@ -35,23 +34,26 @@
 
 namespace art {
 
+template<class MirrorType>
+class GcRoot;
+
 namespace x86 {
 
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = EAX;
-
 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
 
 static constexpr int kC2ConditionMask = 0x400;
 
 static constexpr int kFakeReturnRegister = Register(8);
 
-#define __ down_cast<X86Assembler*>(codegen->GetAssembler())->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86WordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
 
 class NullCheckSlowPathX86 : public SlowPathCode {
  public:
-  explicit NullCheckSlowPathX86(HNullCheck* instruction) : instruction_(instruction) {}
+  explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
@@ -64,6 +66,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -71,13 +74,12 @@
   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86"; }
 
  private:
-  HNullCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
 };
 
 class DivZeroCheckSlowPathX86 : public SlowPathCode {
  public:
-  explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : instruction_(instruction) {}
+  explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
@@ -90,6 +92,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -97,13 +100,13 @@
   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86"; }
 
  private:
-  HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
 };
 
 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
  public:
-  DivRemMinusOneSlowPathX86(Register reg, bool is_div) : reg_(reg), is_div_(is_div) {}
+  DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
+      : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
@@ -125,7 +128,7 @@
 
 class BoundsCheckSlowPathX86 : public SlowPathCode {
  public:
-  explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : instruction_(instruction) {}
+  explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -137,18 +140,40 @@
       // Live registers will be restored in the catch block if caught.
       SaveLiveRegisters(codegen, instruction_->GetLocations());
     }
+
+    // Are we using an array length from memory?
+    HInstruction* array_length = instruction_->InputAt(1);
+    Location length_loc = locations->InAt(1);
     InvokeRuntimeCallingConvention calling_convention;
+    if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
+      // Load the array length into our temporary.
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<Register>(), len_offset);
+      length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
+      // Check for conflicts with index.
+      if (length_loc.Equals(locations->InAt(0))) {
+        // We know we aren't using parameter 2.
+        length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
+      }
+      __ movl(length_loc.AsRegister<Register>(), array_len);
+    }
     x86_codegen->EmitParallelMoves(
         locations->InAt(0),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimInt,
-        locations->InAt(1),
+        length_loc,
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
-    x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    x86_codegen->InvokeRuntime(entry_point_offset,
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -156,25 +181,22 @@
   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86"; }
 
  private:
-  HBoundsCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
 };
 
 class SuspendCheckSlowPathX86 : public SlowPathCode {
  public:
   SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
-      : instruction_(instruction), successor_(successor) {}
+      : SlowPathCode(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
     } else {
@@ -194,7 +216,6 @@
   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86"; }
 
  private:
-  HSuspendCheck* const instruction_;
   HBasicBlock* const successor_;
   Label return_label_;
 
@@ -203,7 +224,7 @@
 
 class LoadStringSlowPathX86 : public SlowPathCode {
  public:
-  explicit LoadStringSlowPathX86(HLoadString* instruction) : instruction_(instruction) {}
+  explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -214,11 +235,13 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction_->GetStringIndex()));
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index));
     x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
     x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
     RestoreLiveRegisters(codegen, locations);
 
@@ -228,8 +251,6 @@
   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86"; }
 
  private:
-  HLoadString* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
 };
 
@@ -239,7 +260,7 @@
                        HInstruction* at,
                        uint32_t dex_pc,
                        bool do_clinit)
-      : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
@@ -254,6 +275,11 @@
     x86_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
                                           : QUICK_ENTRY_POINT(pInitializeType),
                                at_, dex_pc_, this);
+    if (do_clinit_) {
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    } else {
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    }
 
     // Move the class to the desired location.
     Location out = locations->Out();
@@ -288,7 +314,7 @@
 class TypeCheckSlowPathX86 : public SlowPathCode {
  public:
   TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
-      : instruction_(instruction), is_fatal_(is_fatal) {}
+      : SlowPathCode(instruction), is_fatal_(is_fatal) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -300,15 +326,6 @@
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
 
-    if (instruction_->IsCheckCast()) {
-      // The codegen for the instruction overwrites `temp`, so put it back in place.
-      Register obj = locations->InAt(0).AsRegister<Register>();
-      Register temp = locations->GetTemp(0).AsRegister<Register>();
-      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-      __ movl(temp, Address(obj, class_offset));
-      __ MaybeUnpoisonHeapReference(temp);
-    }
-
     if (!is_fatal_) {
       SaveLiveRegisters(codegen, locations);
     }
@@ -329,12 +346,15 @@
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
+      CheckEntrypointTypes<
+          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
+      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -351,7 +371,6 @@
   bool IsFatal() const OVERRIDE { return is_fatal_; }
 
  private:
-  HInstruction* const instruction_;
   const bool is_fatal_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
@@ -359,11 +378,10 @@
 
 class DeoptimizationSlowPathX86 : public SlowPathCode {
  public:
-  explicit DeoptimizationSlowPathX86(HInstruction* instruction)
-    : instruction_(instruction) {}
+  explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
+    : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    DCHECK(instruction_->IsDeoptimize());
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
@@ -371,18 +389,18 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
 
  private:
-  HInstruction* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
 };
 
 class ArraySetSlowPathX86 : public SlowPathCode {
  public:
-  explicit ArraySetSlowPathX86(HInstruction* instruction) : instruction_(instruction) {}
+  explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -413,6 +431,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
     RestoreLiveRegisters(codegen, locations);
     __ jmp(GetExitLabel());
   }
@@ -420,13 +439,299 @@
   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86"; }
 
  private:
-  HInstruction* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
 };
 
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
+ public:
+  ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj)
+      : SlowPathCode(instruction), obj_(obj) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg = obj_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+    DCHECK_NE(reg, ESP);
+    DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in EAX):
+    //
+    //   EAX <- obj
+    //   EAX <- ReadBarrierMark(EAX)
+    //   obj <- EAX
+    //
+    // we just use rX (the register holding `obj`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(reg);
+    // This runtime call does not require a stack map.
+    x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    __ jmp(GetExitLabel());
+  }
+
+ private:
+  const Location obj_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
+};
+
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
+ public:
+  ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
+                                         Location out,
+                                         Location ref,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location index)
+      : SlowPathCode(instruction),
+        out_(out),
+        ref_(ref),
+        obj_(obj),
+        offset_(offset),
+        index_(index) {
+    DCHECK(kEmitCompilerReadBarrier);
+    // If `obj` is equal to `out` or `ref`, it means the initial object
+    // has been overwritten by (or after) the heap object reference load
+    // to be instrumented, e.g.:
+    //
+    //   __ movl(out, Address(out, offset));
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
+    //
+    // In that case, we have lost the information about the original
+    // object, and the emitted read barrier cannot work properly.
+    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    // We may have to change the index's value, but as `index_` is a
+    // constant member (like other "inputs" of this slow path),
+    // introduce a copy of it, `index`.
+    Location index = index_;
+    if (index_.IsValid()) {
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+      if (instruction_->IsArrayGet()) {
+        // Compute the actual memory offset and store it in `index`.
+        Register index_reg = index_.AsRegister<Register>();
+        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+        if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+          // We are about to change the value of `index_reg` (see the
+          // calls to art::x86::X86Assembler::shll and
+          // art::x86::X86Assembler::AddImmediate below), but it has
+          // not been saved by the previous call to
+          // art::SlowPathCode::SaveLiveRegisters, as it is a
+          // callee-save register --
+          // art::SlowPathCode::SaveLiveRegisters does not consider
+          // callee-save registers, as it has been designed with the
+          // assumption that callee-save registers are supposed to be
+          // handled by the called function.  So, as a callee-save
+          // register, `index_reg` _would_ eventually be saved onto
+          // the stack, but it would be too late: we would have
+          // changed its value earlier.  Therefore, we manually save
+          // it here into another freely available register,
+          // `free_reg`, chosen of course among the caller-save
+          // registers (as a callee-save `free_reg` register would
+          // exhibit the same problem).
+          //
+          // Note we could have requested a temporary register from
+          // the register allocator instead; but we prefer not to, as
+          // this is a slow path, and we know we can find a
+          // caller-save register that is available.
+          Register free_reg = FindAvailableCallerSaveRegister(codegen);
+          __ movl(free_reg, index_reg);
+          index_reg = free_reg;
+          index = Location::RegisterLocation(index_reg);
+        } else {
+          // The initial register stored in `index_` has already been
+          // saved in the call to art::SlowPathCode::SaveLiveRegisters
+          // (as it is not a callee-save register), so we can freely
+          // use it.
+        }
+        // Shifting the index value contained in `index_reg` by the scale
+        // factor (2) cannot overflow in practice, as the runtime is
+        // unable to allocate object arrays with a size larger than
+        // 2^26 - 1 (that is, 2^28 - 4 bytes).
+        __ shll(index_reg, Immediate(TIMES_4));
+        static_assert(
+            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+        __ AddImmediate(index_reg, Immediate(offset_));
+      } else {
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
+        DCHECK(instruction_->GetLocations()->Intrinsified());
+        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+            << instruction_->AsInvoke()->GetIntrinsic();
+        DCHECK_EQ(offset_, 0U);
+        DCHECK(index_.IsRegisterPair());
+        // UnsafeGet's offset location is a register pair, the low
+        // part contains the correct offset.
+        index = index_.ToLow();
+      }
+    }
+
+    // We're moving two or three locations to locations that could
+    // overlap, so we need a parallel move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(ref_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    parallel_move.AddMove(obj_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    if (index.IsValid()) {
+      parallel_move.AddMove(index,
+                            Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+                            Primitive::kPrimInt,
+                            nullptr);
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+    } else {
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+      __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
+    }
+    x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<
+        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+    x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathX86"; }
+
+ private:
+  Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+    size_t ref = static_cast<int>(ref_.AsRegister<Register>());
+    size_t obj = static_cast<int>(obj_.AsRegister<Register>());
+    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+      if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
+        return static_cast<Register>(i);
+      }
+    }
+    // We shall never fail to find a free caller-save register, as
+    // there are more than two core caller-save registers on x86
+    // (meaning it is possible to find one which is different from
+    // `ref` and `obj`).
+    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+    LOG(FATAL) << "Could not find a free caller-save register";
+    UNREACHABLE();
+  }
+
+  const Location out_;
+  const Location ref_;
+  const Location obj_;
+  const uint32_t offset_;
+  // An additional location containing an index to an array.
+  // Only used for HArrayGet and the UnsafeGetObject &
+  // UnsafeGetObjectVolatile intrinsics.
+  const Location index_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
+ public:
+  ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
+      : SlowPathCode(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+    x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
+    x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86"; }
+
+ private:
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
+};
+
 #undef __
-#define __ down_cast<X86Assembler*>(GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(GetAssembler())->  // NOLINT
 
 inline Condition X86Condition(IfCondition cond) {
   switch (cond) {
@@ -497,7 +802,7 @@
                                      HInstruction* instruction,
                                      uint32_t dex_pc,
                                      SlowPathCode* slow_path) {
-  InvokeRuntime(GetThreadOffset<kX86WordSize>(entrypoint).Int32Value(),
+  InvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value(),
                 instruction,
                 dex_pc,
                 slow_path);
@@ -512,10 +817,17 @@
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
+void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                           HInstruction* instruction,
+                                                           SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  __ fs()->call(Address::Absolute(entry_point_offset));
+}
+
 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
-                   const X86InstructionSetFeatures& isa_features,
-                   const CompilerOptions& compiler_options,
-                   OptimizingCompilerStats* stats)
+                                   const X86InstructionSetFeatures& isa_features,
+                                   const CompilerOptions& compiler_options,
+                                   OptimizingCompilerStats* stats)
     : CodeGenerator(graph,
                     kNumberOfCpuRegisters,
                     kNumberOfXmmRegisters,
@@ -530,73 +842,28 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
+      assembler_(graph->GetArena()),
       isa_features_(isa_features),
       method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      constant_area_start_(-1),
+      fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      method_address_offset_(-1) {
   // Use a fake return address register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
-Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong: {
-      size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
-      X86ManagedRegister pair =
-          X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-      blocked_core_registers_[pair.AsRegisterPairLow()] = true;
-      blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
-      UpdateBlockedPairRegisters();
-      return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
-    }
-
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      Register reg = static_cast<Register>(
-          FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters));
-      // Block all register pairs that contain `reg`.
-      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-        X86ManagedRegister current =
-            X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
-          blocked_register_pairs_[i] = true;
-        }
-      }
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      return Location::FpuRegisterLocation(
-          FindFreeEntry(blocked_fpu_registers_, kNumberOfXmmRegisters));
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  return Location();
-}
-
-void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorX86::SetupBlockedRegisters() const {
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs_[ECX_EDX] = true;
 
   // Stack register is always reserved.
   blocked_core_registers_[ESP] = true;
 
-  if (is_baseline) {
-    blocked_core_registers_[EBP] = true;
-    blocked_core_registers_[ESI] = true;
-    blocked_core_registers_[EDI] = true;
-  }
-
   UpdateBlockedPairRegisters();
 }
 
@@ -612,7 +879,7 @@
 }
 
 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -676,30 +943,6 @@
   __ Bind(GetLabelOf(block));
 }
 
-Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const {
-  switch (load->GetType()) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << load->GetType();
-      UNREACHABLE();
-  }
-
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
-}
-
 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(Primitive::Type type) const {
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -782,7 +1025,7 @@
       LOG(FATAL) << "Unexpected parameter type " << type;
       break;
   }
-  return Location();
+  return Location::NoLocation();
 }
 
 void CodeGeneratorX86::Move32(Location destination, Location source) {
@@ -900,102 +1143,20 @@
   }
 }
 
-void CodeGeneratorX86::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
-  LocationSummary* locations = instruction->GetLocations();
-  if (instruction->IsCurrentMethod()) {
-    Move32(location, Location::StackSlot(kCurrentMethodStackOffset));
-  } else if (locations != nullptr && locations->Out().Equals(location)) {
-    return;
-  } else if (locations != nullptr && locations->Out().IsConstant()) {
-    HConstant* const_to_move = locations->Out().GetConstant();
-    if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
-      Immediate imm(GetInt32ValueOf(const_to_move));
-      if (location.IsRegister()) {
-        __ movl(location.AsRegister<Register>(), imm);
-      } else if (location.IsStackSlot()) {
-        __ movl(Address(ESP, location.GetStackIndex()), imm);
-      } else {
-        DCHECK(location.IsConstant());
-        DCHECK_EQ(location.GetConstant(), const_to_move);
-      }
-    } else if (const_to_move->IsLongConstant()) {
-      int64_t value = const_to_move->AsLongConstant()->GetValue();
-      if (location.IsRegisterPair()) {
-        __ movl(location.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
-        __ movl(location.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
-      } else if (location.IsDoubleStackSlot()) {
-        __ movl(Address(ESP, location.GetStackIndex()), Immediate(Low32Bits(value)));
-        __ movl(Address(ESP, location.GetHighStackIndex(kX86WordSize)),
-                Immediate(High32Bits(value)));
-      } else {
-        DCHECK(location.IsConstant());
-        DCHECK_EQ(location.GetConstant(), instruction);
-      }
-    }
-  } else if (instruction->IsTemporary()) {
-    Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    if (temp_location.IsStackSlot()) {
-      Move32(location, temp_location);
-    } else {
-      DCHECK(temp_location.IsDoubleStackSlot());
-      Move64(location, temp_location);
-    }
-  } else if (instruction->IsLoadLocal()) {
-    int slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
-    switch (instruction->GetType()) {
-      case Primitive::kPrimBoolean:
-      case Primitive::kPrimByte:
-      case Primitive::kPrimChar:
-      case Primitive::kPrimShort:
-      case Primitive::kPrimInt:
-      case Primitive::kPrimNot:
-      case Primitive::kPrimFloat:
-        Move32(location, Location::StackSlot(slot));
-        break;
-
-      case Primitive::kPrimLong:
-      case Primitive::kPrimDouble:
-        Move64(location, Location::DoubleStackSlot(slot));
-        break;
-
-      default:
-        LOG(FATAL) << "Unimplemented local type " << instruction->GetType();
-    }
-  } else {
-    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
-    switch (instruction->GetType()) {
-      case Primitive::kPrimBoolean:
-      case Primitive::kPrimByte:
-      case Primitive::kPrimChar:
-      case Primitive::kPrimShort:
-      case Primitive::kPrimInt:
-      case Primitive::kPrimNot:
-      case Primitive::kPrimFloat:
-        Move32(location, locations->Out());
-        break;
-
-      case Primitive::kPrimLong:
-      case Primitive::kPrimDouble:
-        Move64(location, locations->Out());
-        break;
-
-      default:
-        LOG(FATAL) << "Unexpected type " << instruction->GetType();
-    }
-  }
-}
-
 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
   DCHECK(location.IsRegister());
   __ movl(location.AsRegister<Register>(), Immediate(value));
 }
 
 void CodeGeneratorX86::MoveLocation(Location dst, Location src, Primitive::Type dst_type) {
-  if (Primitive::Is64BitType(dst_type)) {
-    Move64(dst, src);
+  HParallelMove move(GetGraph()->GetArena());
+  if (dst_type == Primitive::kPrimLong && !src.IsConstant() && !src.IsFpuRegister()) {
+    move.AddMove(src.ToLow(), dst.ToLow(), Primitive::kPrimInt, nullptr);
+    move.AddMove(src.ToHigh(), dst.ToHigh(), Primitive::kPrimInt, nullptr);
   } else {
-    Move32(dst, src);
+    move.AddMove(src, dst, dst_type, nullptr);
   }
+  GetMoveResolver()->EmitNativeCode(&move);
 }
 
 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
@@ -1055,9 +1216,10 @@
 void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
+template<class LabelType>
 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
-                                                  Label* true_label,
-                                                  Label* false_label) {
+                                                  LabelType* true_label,
+                                                  LabelType* false_label) {
   if (cond->IsFPConditionTrueIfNaN()) {
     __ j(kUnordered, true_label);
   } else if (cond->IsFPConditionFalseIfNaN()) {
@@ -1066,9 +1228,10 @@
   __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
 }
 
+template<class LabelType>
 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
-                                                               Label* true_label,
-                                                               Label* false_label) {
+                                                               LabelType* true_label,
+                                                               LabelType* false_label) {
   LocationSummary* locations = cond->GetLocations();
   Location left = locations->InAt(0);
   Location right = locations->InAt(1);
@@ -1118,11 +1281,7 @@
     int32_t val_high = High32Bits(value);
     int32_t val_low = Low32Bits(value);
 
-    if (val_high == 0) {
-      __ testl(left_high, left_high);
-    } else {
-      __ cmpl(left_high, Immediate(val_high));
-    }
+    codegen_->Compare32BitValue(left_high, val_high);
     if (if_cond == kCondNE) {
       __ j(X86Condition(true_high_cond), true_label);
     } else if (if_cond == kCondEQ) {
@@ -1132,12 +1291,8 @@
       __ j(X86Condition(false_high_cond), false_label);
     }
     // Must be equal high, so compare the lows.
-    if (val_low == 0) {
-      __ testl(left_low, left_low);
-    } else {
-      __ cmpl(left_low, Immediate(val_low));
-    }
-  } else {
+    codegen_->Compare32BitValue(left_low, val_low);
+  } else if (right.IsRegisterPair()) {
     Register right_high = right.AsRegisterPairHigh<Register>();
     Register right_low = right.AsRegisterPairLow<Register>();
 
@@ -1152,225 +1307,381 @@
     }
     // Must be equal high, so compare the lows.
     __ cmpl(left_low, right_low);
+  } else {
+    DCHECK(right.IsDoubleStackSlot());
+    __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
+    if (if_cond == kCondNE) {
+      __ j(X86Condition(true_high_cond), true_label);
+    } else if (if_cond == kCondEQ) {
+      __ j(X86Condition(false_high_cond), false_label);
+    } else {
+      __ j(X86Condition(true_high_cond), true_label);
+      __ j(X86Condition(false_high_cond), false_label);
+    }
+    // Must be equal high, so compare the lows.
+    __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
   }
   // The last comparison might be unsigned.
   __ j(final_condition, true_label);
 }
 
-void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HIf* if_instr,
-                                                               HCondition* condition,
-                                                               Label* true_target,
-                                                               Label* false_target,
-                                                               Label* always_true_target) {
+void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
+                                                    Location rhs,
+                                                    HInstruction* insn,
+                                                    bool is_double) {
+  HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
+  if (is_double) {
+    if (rhs.IsFpuRegister()) {
+      __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
+    } else if (const_area != nullptr) {
+      DCHECK(const_area->IsEmittedAtUseSite());
+      __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralDoubleAddress(
+                   const_area->GetConstant()->AsDoubleConstant()->GetValue(),
+                   const_area->GetLocations()->InAt(0).AsRegister<Register>()));
+    } else {
+      DCHECK(rhs.IsDoubleStackSlot());
+      __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
+    }
+  } else {
+    if (rhs.IsFpuRegister()) {
+      __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
+    } else if (const_area != nullptr) {
+      DCHECK(const_area->IsEmittedAtUseSite());
+      __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralFloatAddress(
+                   const_area->GetConstant()->AsFloatConstant()->GetValue(),
+                   const_area->GetLocations()->InAt(0).AsRegister<Register>()));
+    } else {
+      DCHECK(rhs.IsStackSlot());
+      __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
+    }
+  }
+}
+
+template<class LabelType>
+void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
+                                                               LabelType* true_target_in,
+                                                               LabelType* false_target_in) {
+  // Generated branching requires both targets to be explicit. If either of the
+  // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
+  LabelType fallthrough_target;
+  LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
+  LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
+
   LocationSummary* locations = condition->GetLocations();
   Location left = locations->InAt(0);
   Location right = locations->InAt(1);
 
-  // We don't want true_target as a nullptr.
-  if (true_target == nullptr) {
-    true_target = always_true_target;
-  }
-  bool falls_through = (false_target == nullptr);
-
-  // FP compares don't like null false_targets.
-  if (false_target == nullptr) {
-    false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  }
-
   Primitive::Type type = condition->InputAt(0)->GetType();
   switch (type) {
     case Primitive::kPrimLong:
       GenerateLongComparesAndJumps(condition, true_target, false_target);
       break;
     case Primitive::kPrimFloat:
-      __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      GenerateFPCompare(left, right, condition, false);
       GenerateFPJumps(condition, true_target, false_target);
       break;
     case Primitive::kPrimDouble:
-      __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      GenerateFPCompare(left, right, condition, true);
       GenerateFPJumps(condition, true_target, false_target);
       break;
     default:
       LOG(FATAL) << "Unexpected compare type " << type;
   }
 
-  if (!falls_through) {
+  if (false_target != &fallthrough_target) {
     __ jmp(false_target);
   }
+
+  if (fallthrough_target.IsLinked()) {
+    __ Bind(&fallthrough_target);
+  }
 }
 
+static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
+  // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
+  // are set only strictly before `branch`. We can't use the eflags on long/FP
+  // conditions if they are materialized due to the complex branching.
+  return cond->IsCondition() &&
+         cond->GetNext() == branch &&
+         cond->InputAt(0)->GetType() != Primitive::kPrimLong &&
+         !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
+}
+
+template<class LabelType>
 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
-                                                        Label* true_target,
-                                                        Label* false_target,
-                                                        Label* always_true_target) {
-  HInstruction* cond = instruction->InputAt(0);
-  if (cond->IsIntConstant()) {
-    // Constant condition, statically compared against 1.
-    int32_t cond_value = cond->AsIntConstant()->GetValue();
-    if (cond_value == 1) {
-      if (always_true_target != nullptr) {
-        __ jmp(always_true_target);
+                                                        size_t condition_input_index,
+                                                        LabelType* true_target,
+                                                        LabelType* false_target) {
+  HInstruction* cond = instruction->InputAt(condition_input_index);
+
+  if (true_target == nullptr && false_target == nullptr) {
+    // Nothing to do. The code always falls through.
+    return;
+  } else if (cond->IsIntConstant()) {
+    // Constant condition, statically compared against "true" (integer value 1).
+    if (cond->AsIntConstant()->IsTrue()) {
+      if (true_target != nullptr) {
+        __ jmp(true_target);
       }
-      return;
     } else {
-      DCHECK_EQ(cond_value, 0);
+      DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
+      if (false_target != nullptr) {
+        __ jmp(false_target);
+      }
     }
-  } else {
-    bool is_materialized =
-        !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization();
-    // Moves do not affect the eflags register, so if the condition is
-    // evaluated just before the if, we don't need to evaluate it
-    // again.  We can't use the eflags on long/FP conditions if they are
-    // materialized due to the complex branching.
-    Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
-    bool eflags_set = cond->IsCondition()
-        && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction)
-        && (type != Primitive::kPrimLong && !Primitive::IsFloatingPointType(type));
-    if (is_materialized) {
-      if (!eflags_set) {
-        // Materialized condition, compare against 0.
-        Location lhs = instruction->GetLocations()->InAt(0);
-        if (lhs.IsRegister()) {
-          __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
-        } else {
-          __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
-        }
-        __ j(kNotEqual, true_target);
+    return;
+  }
+
+  // The following code generates these patterns:
+  //  (1) true_target == nullptr && false_target != nullptr
+  //        - opposite condition true => branch to false_target
+  //  (2) true_target != nullptr && false_target == nullptr
+  //        - condition true => branch to true_target
+  //  (3) true_target != nullptr && false_target != nullptr
+  //        - condition true => branch to true_target
+  //        - branch to false_target
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
+    if (AreEflagsSetFrom(cond, instruction)) {
+      if (true_target == nullptr) {
+        __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
       } else {
         __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
       }
     } else {
-      // Condition has not been materialized, use its inputs as the
-      // comparison and its condition as the branch condition.
-
-      // Is this a long or FP comparison that has been folded into the HCondition?
-      if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
-        // Generate the comparison directly.
-        GenerateCompareTestAndBranch(instruction->AsIf(),
-                                     cond->AsCondition(),
-                                     true_target,
-                                     false_target,
-                                     always_true_target);
-        return;
-      }
-
-      Location lhs = cond->GetLocations()->InAt(0);
-      Location rhs = cond->GetLocations()->InAt(1);
-      // LHS is guaranteed to be in a register (see
-      // LocationsBuilderX86::VisitCondition).
-      if (rhs.IsRegister()) {
-        __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
-      } else if (rhs.IsConstant()) {
-        int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-        if (constant == 0) {
-          __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
-        } else {
-          __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
-        }
+      // Materialized condition, compare against 0.
+      Location lhs = instruction->GetLocations()->InAt(condition_input_index);
+      if (lhs.IsRegister()) {
+        __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
       } else {
-        __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
+        __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
       }
-      __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
+      if (true_target == nullptr) {
+        __ j(kEqual, false_target);
+      } else {
+        __ j(kNotEqual, true_target);
+      }
+    }
+  } else {
+    // Condition has not been materialized, use its inputs as the comparison and
+    // its condition as the branch condition.
+    HCondition* condition = cond->AsCondition();
+
+    // If this is a long or FP comparison that has been folded into
+    // the HCondition, generate the comparison directly.
+    Primitive::Type type = condition->InputAt(0)->GetType();
+    if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
+      GenerateCompareTestAndBranch(condition, true_target, false_target);
+      return;
+    }
+
+    Location lhs = condition->GetLocations()->InAt(0);
+    Location rhs = condition->GetLocations()->InAt(1);
+    // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
+    if (rhs.IsRegister()) {
+      __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
+    } else if (rhs.IsConstant()) {
+      int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+      codegen_->Compare32BitValue(lhs.AsRegister<Register>(), constant);
+    } else {
+      __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
+    }
+    if (true_target == nullptr) {
+      __ j(X86Condition(condition->GetOppositeCondition()), false_target);
+    } else {
+      __ j(X86Condition(condition->GetCondition()), true_target);
     }
   }
-  if (false_target != nullptr) {
+
+  // If neither branch falls through (case 3), the conditional branch to `true_target`
+  // was already emitted (case 2) and we need to emit a jump to `false_target`.
+  if (true_target != nullptr && false_target != nullptr) {
     __ jmp(false_target);
   }
 }
 
 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
-  HInstruction* cond = if_instr->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+  if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
     locations->SetInAt(0, Location::Any());
   }
 }
 
 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
-  Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
-  Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  Label* always_true_target = true_target;
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfTrueSuccessor())) {
-    always_true_target = nullptr;
-  }
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfFalseSuccessor())) {
-    false_target = nullptr;
-  }
-  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+  HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+  HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+  Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+      nullptr : codegen_->GetLabelOf(true_successor);
+  Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+      nullptr : codegen_->GetLabelOf(false_successor);
+  GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
 
 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  HInstruction* cond = deoptimize->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::Any());
   }
 }
 
 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathX86(deoptimize);
-  codegen_->AddSlowPath(slow_path);
-  Label* slow_path_entry = slow_path->GetEntryLabel();
-  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+  SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
+  GenerateTestAndBranch<Label>(deoptimize,
+                               /* condition_input_index */ 0,
+                               slow_path->GetEntryLabel(),
+                               /* false_target */ nullptr);
 }
 
-void LocationsBuilderX86::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
+static bool SelectCanUseCMOV(HSelect* select) {
+  // There are no conditional move instructions for XMMs.
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    return false;
+  }
+
+  // A FP condition doesn't generate the single CC that we need.
+  // In 32 bit mode, a long condition doesn't generate a single CC either.
+  HInstruction* condition = select->GetCondition();
+  if (condition->IsCondition()) {
+    Primitive::Type compare_type = condition->InputAt(0)->GetType();
+    if (compare_type == Primitive::kPrimLong ||
+        Primitive::IsFloatingPointType(compare_type)) {
+      return false;
+    }
+  }
+
+  // We can generate a CMOV for this Select.
+  return true;
 }
 
-void InstructionCodeGeneratorX86::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
+void LocationsBuilderX86::VisitSelect(HSelect* select) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::Any());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+    if (SelectCanUseCMOV(select)) {
+      if (select->InputAt(1)->IsConstant()) {
+        // Cmov can't handle a constant value.
+        locations->SetInAt(1, Location::RequiresRegister());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
+    } else {
+      locations->SetInAt(1, Location::Any());
+    }
+  }
+  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::SameAsFirstInput());
 }
 
-void LocationsBuilderX86::VisitLoadLocal(HLoadLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
-void LocationsBuilderX86::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(store, LocationSummary::kNoCall);
-  switch (store->InputAt(1)->GetType()) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unknown local type " << store->InputAt(1)->GetType();
+void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
+  Register lhs_reg = lhs.AsRegister<Register>();
+  if (rhs.IsConstant()) {
+    int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+    Compare32BitValue(lhs_reg, value);
+  } else if (rhs.IsStackSlot()) {
+    assembler_.cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
+  } else {
+    assembler_.cmpl(lhs_reg, rhs.AsRegister<Register>());
   }
 }
 
-void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
+  LocationSummary* locations = select->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  if (SelectCanUseCMOV(select)) {
+    // If both the condition and the source types are integer, we can generate
+    // a CMOV to implement Select.
+
+    HInstruction* select_condition = select->GetCondition();
+    Condition cond = kNotEqual;
+
+    // Figure out how to test the 'condition'.
+    if (select_condition->IsCondition()) {
+      HCondition* condition = select_condition->AsCondition();
+      if (!condition->IsEmittedAtUseSite()) {
+        // This was a previously materialized condition.
+        // Can we use the existing condition code?
+        if (AreEflagsSetFrom(condition, select)) {
+          // Materialization was the previous instruction. Condition codes are right.
+          cond = X86Condition(condition->GetCondition());
+        } else {
+          // No, we have to recreate the condition code.
+          Register cond_reg = locations->InAt(2).AsRegister<Register>();
+          __ testl(cond_reg, cond_reg);
+        }
+      } else {
+        // We can't handle FP or long here.
+        DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
+        DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()));
+        LocationSummary* cond_locations = condition->GetLocations();
+        codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
+        cond = X86Condition(condition->GetCondition());
+      }
+    } else {
+      // Must be a boolean condition, which needs to be compared to 0.
+      Register cond_reg = locations->InAt(2).AsRegister<Register>();
+      __ testl(cond_reg, cond_reg);
+    }
+
+    // If the condition is true, overwrite the output, which already contains false.
+    Location false_loc = locations->InAt(0);
+    Location true_loc = locations->InAt(1);
+    if (select->GetType() == Primitive::kPrimLong) {
+      // 64 bit conditional move.
+      Register false_high = false_loc.AsRegisterPairHigh<Register>();
+      Register false_low = false_loc.AsRegisterPairLow<Register>();
+      if (true_loc.IsRegisterPair()) {
+        __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
+        __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
+      } else {
+        __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
+        __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
+      }
+    } else {
+      // 32 bit conditional move.
+      Register false_reg = false_loc.AsRegister<Register>();
+      if (true_loc.IsRegister()) {
+        __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
+      } else {
+        __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
+      }
+    }
+  } else {
+    NearLabel false_target;
+    GenerateTestAndBranch<NearLabel>(
+        select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target);
+    codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+    __ Bind(&false_target);
+  }
 }
 
-void LocationsBuilderX86::VisitCondition(HCondition* cond) {
+void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) {
+  // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+}
+
+void CodeGeneratorX86::GenerateNop() {
+  __ nop();
+}
+
+void LocationsBuilderX86::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
   // Handle the long/FP comparisons made in instruction simplification.
   switch (cond->InputAt(0)->GetType()) {
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
-      if (cond->NeedsMaterialization()) {
+      locations->SetInAt(1, Location::Any());
+      if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister());
       }
       break;
@@ -1378,8 +1689,14 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
-      if (cond->NeedsMaterialization()) {
+      if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
+        DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
+      } else if (cond->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
+      if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister());
       }
       break;
@@ -1387,7 +1704,7 @@
     default:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::Any());
-      if (cond->NeedsMaterialization()) {
+      if (!cond->IsEmittedAtUseSite()) {
         // We need a byte register.
         locations->SetOut(Location::RegisterLocation(ECX));
       }
@@ -1395,8 +1712,8 @@
   }
 }
 
-void InstructionCodeGeneratorX86::VisitCondition(HCondition* cond) {
-  if (!cond->NeedsMaterialization()) {
+void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
+  if (cond->IsEmittedAtUseSite()) {
     return;
   }
 
@@ -1404,7 +1721,7 @@
   Location lhs = locations->InAt(0);
   Location rhs = locations->InAt(1);
   Register reg = locations->Out().AsRegister<Register>();
-  Label true_label, false_label;
+  NearLabel true_label, false_label;
 
   switch (cond->InputAt(0)->GetType()) {
     default: {
@@ -1412,19 +1729,7 @@
 
       // Clear output register: setb only sets the low byte.
       __ xorl(reg, reg);
-
-      if (rhs.IsRegister()) {
-        __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
-      } else if (rhs.IsConstant()) {
-        int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-        if (constant == 0) {
-          __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
-        } else {
-          __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
-        }
-      } else {
-        __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
-      }
+      codegen_->GenerateIntCompare(lhs, rhs);
       __ setb(X86Condition(cond->GetCondition()), reg);
       return;
     }
@@ -1432,11 +1737,11 @@
       GenerateLongComparesAndJumps(cond, &true_label, &false_label);
       break;
     case Primitive::kPrimFloat:
-      __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
+      GenerateFPCompare(lhs, rhs, cond, false);
       GenerateFPJumps(cond, &true_label, &false_label);
       break;
     case Primitive::kPrimDouble:
-      __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
+      GenerateFPCompare(lhs, rhs, cond, true);
       GenerateFPJumps(cond, &true_label, &false_label);
       break;
   }
@@ -1456,83 +1761,83 @@
 }
 
 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
@@ -1590,7 +1895,7 @@
 }
 
 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
-  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
 }
 
 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
@@ -1671,26 +1976,23 @@
 }
 
 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
+    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+    }
     return;
   }
 
   HandleInvoke(invoke);
 
-  if (codegen_->IsBaseline()) {
-    // Baseline does not have enough registers if the current method also
-    // needs a register. We therefore do not require a register for it, and let
-    // the code generation of the invoke handle it.
-    LocationSummary* locations = invoke->GetLocations();
-    Location location = locations->InAt(invoke->GetCurrentMethodInputIndex());
-    if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-      locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation());
-    }
+  // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
+  if (invoke->HasPcRelativeDexCache()) {
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
   }
 }
 
@@ -1704,9 +2006,9 @@
 }
 
 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -1719,6 +2021,11 @@
 }
 
 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  IntrinsicLocationsBuilderX86 intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 }
 
@@ -1738,6 +2045,9 @@
 }
 
 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
+  // This call to HandleInvoke allocates a temporary (core) register
+  // which is also used to transfer the hidden argument from FP to
+  // core register.
   HandleInvoke(invoke);
   // Add the hidden argument.
   invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
@@ -1745,31 +2055,45 @@
 
 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
-  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value();
   LocationSummary* locations = invoke->GetLocations();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+  XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
-  // Set the hidden argument.
+  // Set the hidden argument. This is safe to do this here, as XMM7
+  // won't be modified thereafter, before the `call` instruction.
+  DCHECK_EQ(XMM7, hidden_reg);
   __ movl(temp, Immediate(invoke->GetDexMethodIndex()));
-  __ movd(invoke->GetLocations()->GetTemp(1).AsFpuRegister<XmmRegister>(), temp);
+  __ movd(hidden_reg, temp);
 
-  // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
     __ movl(temp, Address(ESP, receiver.GetStackIndex()));
+    // /* HeapReference<Class> */ temp = temp->klass_
     __ movl(temp, Address(temp, class_offset));
   } else {
+    // /* HeapReference<Class> */ temp = receiver->klass_
     __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
+  // temp = temp->GetAddressOfIMT()
+  __ movl(temp,
+      Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
   // temp = temp->GetImtEntryAt(method_offset);
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex(), kX86PointerSize));
   __ movl(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
-  __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-      kX86WordSize).Int32Value()));
+  __ call(Address(temp,
+                  ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
 
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1856,6 +2180,32 @@
   }
 }
 
+void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
+  DCHECK(Primitive::IsFloatingPointType(neg->GetType()));
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresFpuRegister());
+}
+
+void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
+  LocationSummary* locations = neg->GetLocations();
+  Location out = locations->Out();
+  DCHECK(locations->InAt(0).Equals(out));
+
+  Register constant_area = locations->InAt(1).AsRegister<Register>();
+  XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+  if (neg->GetType() == Primitive::kPrimFloat) {
+    __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000), constant_area));
+    __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
+  } else {
+     __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000), constant_area));
+     __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
+  }
+}
+
 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
@@ -1866,7 +2216,7 @@
   LocationSummary::CallKind call_kind =
       ((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
        && result_type == Primitive::kPrimLong)
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
@@ -1877,6 +2227,18 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong: {
+          // Type conversion from long to byte is a result of code transformations.
+          HInstruction* input = conversion->InputAt(0);
+          Location input_location = input->IsConstant()
+              ? Location::ConstantLocation(input->AsConstant())
+              : Location::RegisterPairLocation(EAX, EDX);
+          locations->SetInAt(0, input_location);
+          // Make the output overlap to please the register allocator. This greatly simplifies
+          // the validation of the linear scan implementation
+          locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+          break;
+        }
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -1897,6 +2259,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -1974,6 +2338,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to char is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2068,6 +2434,16 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to byte is a result of code transformations.
+          if (in.IsRegisterPair()) {
+            __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
+          } else {
+            DCHECK(in.GetConstant()->IsLongConstant());
+            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
+          }
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -2091,6 +2467,18 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
+          if (in.IsRegisterPair()) {
+            __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
+          } else if (in.IsDoubleStackSlot()) {
+            __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
+          } else {
+            DCHECK(in.GetConstant()->IsLongConstant());
+            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
+          }
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2207,6 +2595,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickF2l, int64_t, float>();
           break;
 
         case Primitive::kPrimDouble:
@@ -2215,6 +2604,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickD2l, int64_t, double>();
           break;
 
         default:
@@ -2225,6 +2615,18 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
+          if (in.IsRegisterPair()) {
+            __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
+          } else if (in.IsDoubleStackSlot()) {
+            __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
+          } else {
+            DCHECK(in.GetConstant()->IsLongConstant());
+            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
+          }
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2381,7 +2783,13 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::Any());
+      if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
+        DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
+      } else if (add->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2445,7 +2853,7 @@
         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
         HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
-        DCHECK(!const_area->NeedsMaterialization());
+        DCHECK(const_area->IsEmittedAtUseSite());
         __ addss(first.AsFpuRegister<XmmRegister>(),
                  codegen_->LiteralFloatAddress(
                    const_area->GetConstant()->AsFloatConstant()->GetValue(),
@@ -2462,7 +2870,7 @@
         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
         HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
-        DCHECK(!const_area->NeedsMaterialization());
+        DCHECK(const_area->IsEmittedAtUseSite());
         __ addsd(first.AsFpuRegister<XmmRegister>(),
                  codegen_->LiteralDoubleAddress(
                    const_area->GetConstant()->AsDoubleConstant()->GetValue(),
@@ -2493,7 +2901,13 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::Any());
+      if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
+        DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
+      } else if (sub->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2543,7 +2957,7 @@
         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
         HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
-        DCHECK(!const_area->NeedsMaterialization());
+        DCHECK(const_area->IsEmittedAtUseSite());
         __ subss(first.AsFpuRegister<XmmRegister>(),
                  codegen_->LiteralFloatAddress(
                    const_area->GetConstant()->AsFloatConstant()->GetValue(),
@@ -2560,7 +2974,7 @@
         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
         HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
-        DCHECK(!const_area->NeedsMaterialization());
+        DCHECK(const_area->IsEmittedAtUseSite());
         __ subsd(first.AsFpuRegister<XmmRegister>(),
                  codegen_->LiteralDoubleAddress(
                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
@@ -2603,7 +3017,13 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::Any());
+      if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
+        DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
+      } else if (mul->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2724,7 +3144,7 @@
         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
         HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
-        DCHECK(!const_area->NeedsMaterialization());
+        DCHECK(const_area->IsEmittedAtUseSite());
         __ mulss(first.AsFpuRegister<XmmRegister>(),
                  codegen_->LiteralFloatAddress(
                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
@@ -2742,7 +3162,7 @@
         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
         HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
-        DCHECK(!const_area->NeedsMaterialization());
+        DCHECK(const_area->IsEmittedAtUseSite());
         __ mulsd(first.AsFpuRegister<XmmRegister>(),
                  codegen_->LiteralDoubleAddress(
                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
@@ -2885,11 +3305,12 @@
   Register out_register = locations->Out().AsRegister<Register>();
   Register input_register = locations->InAt(0).AsRegister<Register>();
   int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+  uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
 
-  DCHECK(IsPowerOfTwo(std::abs(imm)));
   Register num = locations->GetTemp(0).AsRegister<Register>();
 
-  __ leal(num, Address(input_register, std::abs(imm) - 1));
+  __ leal(num, Address(input_register, abs_imm - 1));
   __ testl(input_register, input_register);
   __ cmovl(kGreaterEqual, num, input_register);
   int shift = CTZ(imm);
@@ -2933,17 +3354,6 @@
   int shift;
   CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
 
-  NearLabel ndiv;
-  NearLabel end;
-  // If numerator is 0, the result is 0, no computation needed.
-  __ testl(eax, eax);
-  __ j(kNotEqual, &ndiv);
-
-  __ xorl(out, out);
-  __ jmp(&end);
-
-  __ Bind(&ndiv);
-
   // Save the numerator.
   __ movl(num, eax);
 
@@ -2978,7 +3388,6 @@
   } else {
     __ movl(eax, edx);
   }
-  __ Bind(&end);
 }
 
 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
@@ -2995,23 +3404,22 @@
       DCHECK_EQ(EAX, first.AsRegister<Register>());
       DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
 
-      if (instruction->InputAt(1)->IsIntConstant()) {
+      if (second.IsConstant()) {
         int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
 
         if (imm == 0) {
           // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
         } else if (imm == 1 || imm == -1) {
           DivRemOneOrMinusOne(instruction);
-        } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
+        } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) {
           DivByPowerOfTwo(instruction->AsDiv());
         } else {
           DCHECK(imm <= -2 || imm >= 2);
           GenerateDivRemWithAnyConstant(instruction);
         }
       } else {
-        SlowPathCode* slow_path =
-          new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.AsRegister<Register>(),
-              is_div);
+        SlowPathCode* slow_path = new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(
+            instruction, out.AsRegister<Register>(), is_div);
         codegen_->AddSlowPath(slow_path);
 
         Register second_reg = second.AsRegister<Register>();
@@ -3045,11 +3453,13 @@
                                 instruction,
                                 instruction->GetDexPc(),
                                 nullptr);
+        CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
       } else {
         codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod),
                                 instruction,
                                 instruction->GetDexPc(),
                                 nullptr);
+        CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
       }
       break;
     }
@@ -3061,7 +3471,7 @@
 
 void LocationsBuilderX86::VisitDiv(HDiv* div) {
   LocationSummary::CallKind call_kind = (div->GetResultType() == Primitive::kPrimLong)
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
 
@@ -3093,7 +3503,13 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::Any());
+      if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
+        DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
+      } else if (div->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -3120,7 +3536,7 @@
         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
         HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
-        DCHECK(!const_area->NeedsMaterialization());
+        DCHECK(const_area->IsEmittedAtUseSite());
         __ divss(first.AsFpuRegister<XmmRegister>(),
                  codegen_->LiteralFloatAddress(
                    const_area->GetConstant()->AsFloatConstant()->GetValue(),
@@ -3137,7 +3553,7 @@
         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
         HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
-        DCHECK(!const_area->NeedsMaterialization());
+        DCHECK(const_area->IsEmittedAtUseSite());
         __ divsd(first.AsFpuRegister<XmmRegister>(),
                  codegen_->LiteralDoubleAddress(
                    const_area->GetConstant()->AsDoubleConstant()->GetValue(),
@@ -3158,7 +3574,7 @@
   Primitive::Type type = rem->GetResultType();
 
   LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong)
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
@@ -3223,6 +3639,7 @@
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   switch (instruction->GetType()) {
+    case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
@@ -3253,6 +3670,7 @@
   Location value = locations->InAt(0);
 
   switch (instruction->GetType()) {
+    case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
@@ -3334,7 +3752,7 @@
           __ shrl(first_reg, second_reg);
         }
       } else {
-        int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue;
+        int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
         if (shift == 0) {
           return;
         }
@@ -3362,7 +3780,7 @@
         }
       } else {
         // Shift by a constant.
-        int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue;
+        int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
         // Nothing to do if the shift is 0, as the input is already the output.
         if (shift != 0) {
           if (op->IsShl()) {
@@ -3487,6 +3905,91 @@
   __ Bind(&done);
 }
 
+void LocationsBuilderX86::VisitRor(HRor* ror) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimLong:
+      // Add the temporary needed.
+      locations->AddTemp(Location::RequiresRegister());
+      FALLTHROUGH_INTENDED;
+    case Primitive::kPrimInt:
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL (unless it is a constant).
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
+  LocationSummary* locations = ror->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
+  if (ror->GetResultType() == Primitive::kPrimInt) {
+    Register first_reg = first.AsRegister<Register>();
+    if (second.IsRegister()) {
+      Register second_reg = second.AsRegister<Register>();
+      __ rorl(first_reg, second_reg);
+    } else {
+      Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
+      __ rorl(first_reg, imm);
+    }
+    return;
+  }
+
+  DCHECK_EQ(ror->GetResultType(), Primitive::kPrimLong);
+  Register first_reg_lo = first.AsRegisterPairLow<Register>();
+  Register first_reg_hi = first.AsRegisterPairHigh<Register>();
+  Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
+  if (second.IsRegister()) {
+    Register second_reg = second.AsRegister<Register>();
+    DCHECK_EQ(second_reg, ECX);
+    __ movl(temp_reg, first_reg_hi);
+    __ shrd(first_reg_hi, first_reg_lo, second_reg);
+    __ shrd(first_reg_lo, temp_reg, second_reg);
+    __ movl(temp_reg, first_reg_hi);
+    __ testl(second_reg, Immediate(32));
+    __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
+    __ cmovl(kNotEqual, first_reg_lo, temp_reg);
+  } else {
+    int32_t shift_amt = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
+    if (shift_amt == 0) {
+      // Already fine.
+      return;
+    }
+    if (shift_amt == 32) {
+      // Just swap.
+      __ movl(temp_reg, first_reg_lo);
+      __ movl(first_reg_lo, first_reg_hi);
+      __ movl(first_reg_hi, temp_reg);
+      return;
+    }
+
+    Immediate imm(shift_amt);
+    // Save the constents of the low value.
+    __ movl(temp_reg, first_reg_lo);
+
+    // Shift right into low, feeding bits from high.
+    __ shrd(first_reg_lo, first_reg_hi, imm);
+
+    // Shift right into high, feeding bits from the original low.
+    __ shrd(first_reg_hi, temp_reg, imm);
+
+    // Swap if needed.
+    if (shift_amt > 32) {
+      __ movl(temp_reg, first_reg_lo);
+      __ movl(first_reg_lo, first_reg_hi);
+      __ movl(first_reg_hi, temp_reg);
+    }
+  }
+}
+
 void LocationsBuilderX86::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
@@ -3513,28 +4016,40 @@
 
 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   locations->SetOut(Location::RegisterLocation(EAX));
-  InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
+  } else {
+    InvokeRuntimeCallingConvention calling_convention;
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
-  InvokeRuntimeCallingConvention calling_convention;
-  __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex()));
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
-  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
-                          instruction,
-                          instruction->GetDexPc(),
-                          nullptr);
-  DCHECK(!codegen_->IsLeafMethod());
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize);
+    __ fs()->movl(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString)));
+    __ call(Address(temp, code_offset.Int32Value()));
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr);
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+    DCHECK(!codegen_->IsLeafMethod());
+  }
 }
 
 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   locations->SetOut(Location::RegisterLocation(EAX));
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -3545,13 +4060,13 @@
 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
   InvokeRuntimeCallingConvention calling_convention;
   __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex()));
-
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
   DCHECK(!codegen_->IsLeafMethod());
 }
 
@@ -3580,6 +4095,32 @@
 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
 }
 
+void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kX86PointerSize).SizeValue();
+    __ movl(locations->Out().AsRegister<Register>(),
+            Address(locations->InAt(0).AsRegister<Register>(), method_offset));
+  } else {
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex(), kX86PointerSize));
+    __ movl(locations->Out().AsRegister<Register>(),
+            Address(locations->InAt(0).AsRegister<Register>(),
+                    mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
+    // temp = temp->GetImtEntryAt(method_offset);
+    __ movl(locations->Out().AsRegister<Register>(),
+            Address(locations->Out().AsRegister<Register>(), method_offset));
+  }
+}
+
 void LocationsBuilderX86::VisitNot(HNot* not_) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
@@ -3626,6 +4167,11 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::Any());
@@ -3635,7 +4181,13 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
+        DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
+      } else if (compare->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
       locations->SetOut(Location::RequiresRegister());
       break;
     }
@@ -3651,7 +4203,17 @@
   Location right = locations->InAt(1);
 
   NearLabel less, greater, done;
+  Condition less_cond = kLess;
+
   switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
+      codegen_->GenerateIntCompare(left, right);
+      break;
+    }
     case Primitive::kPrimLong: {
       Register left_low = left.AsRegisterPairLow<Register>();
       Register left_high = left.AsRegisterPairHigh<Register>();
@@ -3673,11 +4235,7 @@
         __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
       } else {
         DCHECK(right_is_const) << right;
-        if (val_high == 0) {
-          __ testl(left_high, left_high);
-        } else {
-          __ cmpl(left_high, Immediate(val_high));
-        }
+        codegen_->Compare32BitValue(left_high, val_high);
       }
       __ j(kLess, &less);  // Signed compare.
       __ j(kGreater, &greater);  // Signed compare.
@@ -3687,30 +4245,30 @@
         __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
       } else {
         DCHECK(right_is_const) << right;
-        if (val_low == 0) {
-          __ testl(left_low, left_low);
-        } else {
-          __ cmpl(left_low, Immediate(val_low));
-        }
+        codegen_->Compare32BitValue(left_low, val_low);
       }
+      less_cond = kBelow;  // for CF (unsigned).
       break;
     }
     case Primitive::kPrimFloat: {
-      __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      GenerateFPCompare(left, right, compare, false);
       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+      less_cond = kBelow;  // for CF (floats).
       break;
     }
     case Primitive::kPrimDouble: {
-      __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      GenerateFPCompare(left, right, compare, true);
       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+      less_cond = kBelow;  // for CF (floats).
       break;
     }
     default:
       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
   }
+
   __ movl(out, Immediate(0));
   __ j(kEqual, &done);
-  __ j(kBelow, &less);  // kBelow is for CF (unsigned & floats).
+  __ j(less_cond, &less);
 
   __ Bind(&greater);
   __ movl(out, Immediate(1));
@@ -3725,7 +4283,7 @@
 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
@@ -3735,7 +4293,7 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
+void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
   /*
    * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
@@ -3743,7 +4301,7 @@
    */
   switch (kind) {
     case MemBarrierKind::kAnyAny: {
-      __ mfence();
+      MemoryFence();
       break;
     }
     case MemBarrierKind::kAnyStore:
@@ -3752,13 +4310,72 @@
       // nop
       break;
     }
-    default:
-      LOG(FATAL) << "Unexpected memory barrier " << kind;
+    case MemBarrierKind::kNTStoreStore:
+      // Non-Temporal Store/Store needs an explicit fence.
+      MemoryFence(/* non-temporal */ true);
+      break;
   }
 }
 
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method ATTRIBUTE_UNUSED) {
+  HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
 
-void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+  // We disable pc-relative load when there is an irreducible loop, as the optimization
+  // is incompatible with it.
+  // TODO: Create as many X86ComputeBaseMethodAddress instructions
+  // as needed for methods with irreducible loops.
+  if (GetGraph()->HasIrreducibleLoops() &&
+      (dispatch_info.method_load_kind ==
+          HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative)) {
+    dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
+  }
+  switch (dispatch_info.code_ptr_location) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
+      // (Though the direct CALL ptr16:32 is available for consideration).
+      return HInvokeStaticOrDirect::DispatchInfo {
+        dispatch_info.method_load_kind,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        dispatch_info.method_load_data,
+        0u
+      };
+    default:
+      return dispatch_info;
+  }
+}
+
+Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
+                                                                 Register temp) {
+  DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+  Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+  if (!invoke->GetLocations()->Intrinsified()) {
+    return location.AsRegister<Register>();
+  }
+  // For intrinsics we allow any location, so it may be on the stack.
+  if (!location.IsRegister()) {
+    __ movl(temp, Address(ESP, location.GetStackIndex()));
+    return temp;
+  }
+  // For register locations, check if the register was saved. If so, get it from the stack.
+  // Note: There is a chance that the register was saved but not overwritten, so we could
+  // save one load. However, since this is just an intrinsic slow path we prefer this
+  // simple and more robust approach rather that trying to determine if that's the case.
+  SlowPathCode* slow_path = GetCurrentSlowPath();
+  if (slow_path != nullptr) {
+    if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+      int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+      __ movl(temp, Address(ESP, stack_offset));
+      return temp;
+    }
+  }
+  return location.AsRegister<Register>();
+}
+
+Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                                  Location temp) {
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
@@ -3766,40 +4383,52 @@
       __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(invoke->GetStringInitOffset()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
-      __ movl(temp.AsRegister<Register>(), Immediate(0));  // Placeholder.
+      __ movl(temp.AsRegister<Register>(), Immediate(/* placeholder */ 0));
       method_patches_.emplace_back(invoke->GetTargetMethod());
       __ Bind(&method_patches_.back().label);  // Bind the label at the end of the "movl" insn.
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
-      FALLTHROUGH_INTENDED;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+      Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+                                                                temp.AsRegister<Register>());
+      __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
+      // Bind a new fixup label at the end of the "movl" insn.
+      uint32_t offset = invoke->GetDexCacheArrayOffset();
+      __ Bind(NewPcRelativeDexCacheArrayPatch(*invoke->GetTargetMethod().dex_file, offset));
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
       Register reg = temp.AsRegister<Register>();
       if (current_method.IsRegister()) {
         method_reg = current_method.AsRegister<Register>();
       } else {
-        DCHECK(IsBaseline() || invoke->GetLocations()->Intrinsified());
+        DCHECK(invoke->GetLocations()->Intrinsified());
         DCHECK(!current_method.IsValid());
         method_reg = reg;
         __ movl(reg, Address(ESP, kCurrentMethodStackOffset));
       }
-      // temp = temp->dex_cache_resolved_methods_;
+      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
       __ movl(reg, Address(method_reg,
                            ArtMethod::DexCacheResolvedMethodsOffset(kX86PointerSize).Int32Value()));
-      // temp = temp[index_in_cache]
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // temp = temp[index_in_cache];
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
       __ movl(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
       break;
     }
   }
+  return callee_method;
+}
+
+void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
@@ -3814,14 +4443,14 @@
     }
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
-      // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
-      // (Though the direct CALL ptr16:32 is available for consideration).
-      FALLTHROUGH_INTENDED;
+      // Filtered out by GetSupportedInvokeStaticOrDirectDispatch().
+      LOG(FATAL) << "Unsupported";
+      UNREACHABLE();
     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
       // (callee_method + offset_of_quick_compiled_code)()
       __ call(Address(callee_method.AsRegister<Register>(),
                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                          kX86WordSize).Int32Value()));
+                          kX86PointerSize).Int32Value()));
       break;
   }
 
@@ -3832,40 +4461,121 @@
   Register temp = temp_in.AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  // temp = object->GetClass();
-  DCHECK(receiver.IsRegister());
-  __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
+  // /* HeapReference<Class> */ temp = receiver->klass_
+  __ movl(temp, Address(receiver, class_offset));
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   __ movl(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
   __ call(Address(
-      temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
+      temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
+}
+
+void CodeGeneratorX86::RecordSimplePatch() {
+  if (GetCompilerOptions().GetIncludePatchInformation()) {
+    simple_patches_.emplace_back();
+    __ Bind(&simple_patches_.back());
+  }
+}
+
+void CodeGeneratorX86::RecordStringPatch(HLoadString* load_string) {
+  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
+  __ Bind(&string_patches_.back().label);
+}
+
+void CodeGeneratorX86::RecordTypePatch(HLoadClass* load_class) {
+  type_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex());
+  __ Bind(&type_patches_.back().label);
+}
+
+Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
+                                                         uint32_t element_offset) {
+  // Add the patch entry and bind its label at the end of the instruction.
+  pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset);
+  return &pc_relative_dex_cache_patches_.back().label;
 }
 
 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
-  linker_patches->reserve(method_patches_.size() + relative_call_patches_.size());
+  size_t size =
+      method_patches_.size() +
+      relative_call_patches_.size() +
+      pc_relative_dex_cache_patches_.size() +
+      simple_patches_.size() +
+      string_patches_.size() +
+      type_patches_.size();
+  linker_patches->reserve(size);
+  // The label points to the end of the "movl" insn but the literal offset for method
+  // patch needs to point to the embedded constant which occupies the last 4 bytes.
+  constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
   for (const MethodPatchInfo<Label>& info : method_patches_) {
-    // The label points to the end of the "movl" insn but the literal offset for method
-    // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
-    uint32_t literal_offset = info.label.Position() - 4;
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
     linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
                                                        info.target_method.dex_file,
                                                        info.target_method.dex_method_index));
   }
   for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
-    // The label points to the end of the "call" insn but the literal offset for method
-    // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
-    uint32_t literal_offset = info.label.Position() - 4;
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
     linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
+  for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset,
+                                                              &info.target_dex_file,
+                                                              GetMethodAddressOffset(),
+                                                              info.element_offset));
+  }
+  for (const Label& label : simple_patches_) {
+    uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
+  }
+  if (GetCompilerOptions().GetCompilePic()) {
+    for (const StringPatchInfo<Label>& info : string_patches_) {
+      uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+      linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset,
+                                                                 &info.dex_file,
+                                                                 GetMethodAddressOffset(),
+                                                                 info.string_index));
+    }
+    for (const TypePatchInfo<Label>& info : type_patches_) {
+      uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+      linker_patches->push_back(LinkerPatch::RelativeTypePatch(literal_offset,
+                                                               &info.dex_file,
+                                                               GetMethodAddressOffset(),
+                                                               info.type_index));
+    }
+  } else {
+    for (const StringPatchInfo<Label>& info : string_patches_) {
+      uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+      linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
+                                                         &info.dex_file,
+                                                         info.string_index));
+    }
+    for (const TypePatchInfo<Label>& info : type_patches_) {
+      uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+      linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
+                                                       &info.dex_file,
+                                                       info.type_index));
+    }
+  }
 }
 
 void CodeGeneratorX86::MarkGCCard(Register temp,
@@ -3878,7 +4588,7 @@
     __ testl(value, value);
     __ j(kEqual, &is_null);
   }
-  __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value()));
+  __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
   __ movl(temp, object);
   __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
   __ movb(Address(temp, card, TIMES_1, 0),
@@ -3890,25 +4600,41 @@
 
 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   kEmitCompilerReadBarrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
 
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister());
   } else {
-    // The output overlaps in case of long: we don't want the low move to overwrite
-    // the object's location.
-    locations->SetOut(Location::RequiresRegister(),
-        (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap
-                                                         : Location::kNoOutputOverlap);
+    // The output overlaps in case of long: we don't want the low move
+    // to overwrite the object's location.  Likewise, in the case of
+    // an object field get with read barriers enabled, we do not want
+    // the move to overwrite the object's location, as we need it to emit
+    // the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        (object_field_get_with_read_barrier || instruction->GetType() == Primitive::kPrimLong) ?
+            Location::kOutputOverlap :
+            Location::kNoOutputOverlap);
   }
 
   if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) {
     // Long values can be loaded atomically into an XMM using movsd.
-    // So we use an XMM register as a temp to achieve atomicity (first load the temp into the XMM
-    // and then copy the XMM into the output 32bits at a time).
+    // So we use an XMM register as a temp to achieve atomicity (first
+    // load the temp into the XMM and then copy the XMM into the
+    // output, 32 bits at a time).
     locations->AddTemp(Location::RequiresFpuRegister());
+  } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
@@ -3917,7 +4643,8 @@
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
 
   LocationSummary* locations = instruction->GetLocations();
-  Register base = locations->InAt(0).AsRegister<Register>();
+  Location base_loc = locations->InAt(0);
+  Register base = base_loc.AsRegister<Register>();
   Location out = locations->Out();
   bool is_volatile = field_info.IsVolatile();
   Primitive::Type field_type = field_info.GetFieldType();
@@ -3945,9 +4672,32 @@
     }
 
     case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
       __ movl(out.AsRegister<Register>(), Address(base, offset));
       break;
+
+    case Primitive::kPrimNot: {
+      // /* HeapReference<Object> */ out = *(base + offset)
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp_loc = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+      } else {
+        __ movl(out.AsRegister<Register>(), Address(base, offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+        // If read barriers are enabled, emit read barriers other than
+        // Baker's using a slow path (and also unpoison the loaded
+        // reference, if heap poisoning is enabled).
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+      }
+      break;
     }
 
     case Primitive::kPrimLong: {
@@ -3982,17 +4732,20 @@
       UNREACHABLE();
   }
 
-  // Longs are handled in the switch.
-  if (field_type != Primitive::kPrimLong) {
+  if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimLong) {
+    // Potential implicit null checks, in the case of reference or
+    // long fields, are handled in the previous switch statement.
+  } else {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
-  }
-
-  if (field_type == Primitive::kPrimNot) {
-    __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
+    if (field_type == Primitive::kPrimNot) {
+      // Memory barriers, in the case of references, are also handled
+      // in the previous switch statement.
+    } else {
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+    }
   }
 }
 
@@ -4013,16 +4766,16 @@
     // Ensure the value is in a byte register.
     locations->SetInAt(1, Location::RegisterLocation(EAX));
   } else if (Primitive::IsFloatingPointType(field_type)) {
-    locations->SetInAt(1, Location::RequiresFpuRegister());
-  } else {
+    if (is_volatile && field_type == Primitive::kPrimDouble) {
+      // In order to satisfy the semantics of volatile, this must be a single instruction store.
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+    } else {
+      locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
+    }
+  } else if (is_volatile && field_type == Primitive::kPrimLong) {
+    // In order to satisfy the semantics of volatile, this must be a single instruction store.
     locations->SetInAt(1, Location::RequiresRegister());
-  }
-  if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
-    // Temporary registers for the write barrier.
-    locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
-    // Ensure the card is in a byte register.
-    locations->AddTemp(Location::RegisterLocation(ECX));
-  } else if (is_volatile && (field_type == Primitive::kPrimLong)) {
+
     // 64bits value can be atomically written to an address with movsd and an XMM register.
     // We need two XMM registers because there's no easier way to (bit) copy a register pair
     // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
@@ -4030,6 +4783,15 @@
     // isolated cases when we need this it isn't worth adding the extra complexity.
     locations->AddTemp(Location::RequiresFpuRegister());
     locations->AddTemp(Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+
+    if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+      // Temporary registers for the write barrier.
+      locations->AddTemp(Location::RequiresRegister());  // May be used for reference poisoning too.
+      // Ensure the card is in a byte register.
+      locations->AddTemp(Location::RegisterLocation(ECX));
+    }
   }
 }
 
@@ -4048,9 +4810,11 @@
       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
+  bool maybe_record_implicit_null_check_done = false;
+
   switch (field_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
@@ -4060,7 +4824,12 @@
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      __ movw(Address(base, offset), value.AsRegister<Register>());
+      if (value.IsConstant()) {
+        int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movw(Address(base, offset), Immediate(v));
+      } else {
+        __ movw(Address(base, offset), value.AsRegister<Register>());
+      }
       break;
     }
 
@@ -4075,7 +4844,11 @@
         __ movl(temp, value.AsRegister<Register>());
         __ PoisonHeapReference(temp);
         __ movl(Address(base, offset), temp);
+      } else if (value.IsConstant()) {
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movl(Address(base, offset), Immediate(v));
       } else {
+        DCHECK(value.IsRegister()) << value;
         __ movl(Address(base, offset), value.AsRegister<Register>());
       }
       break;
@@ -4090,21 +4863,40 @@
         __ punpckldq(temp1, temp2);
         __ movsd(Address(base, offset), temp1);
         codegen_->MaybeRecordImplicitNullCheck(instruction);
+      } else if (value.IsConstant()) {
+        int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
+        __ movl(Address(base, offset), Immediate(Low32Bits(v)));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
       } else {
         __ movl(Address(base, offset), value.AsRegisterPairLow<Register>());
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>());
       }
+      maybe_record_implicit_null_check_done = true;
       break;
     }
 
     case Primitive::kPrimFloat: {
-      __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      if (value.IsConstant()) {
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movl(Address(base, offset), Immediate(v));
+      } else {
+        __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      if (value.IsConstant()) {
+        int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
+        __ movl(Address(base, offset), Immediate(Low32Bits(v)));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
+        maybe_record_implicit_null_check_done = true;
+      } else {
+        __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      }
       break;
     }
 
@@ -4113,8 +4905,7 @@
       UNREACHABLE();
   }
 
-  // Longs are handled in the switch.
-  if (field_type != Primitive::kPrimLong) {
+  if (!maybe_record_implicit_null_check_done) {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
@@ -4125,7 +4916,7 @@
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   }
 }
 
@@ -4243,20 +5034,20 @@
   }
 }
 
-void InstructionCodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
-  if (codegen_->CanMoveNullCheckToUser(instruction)) {
+void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
+  if (CanMoveNullCheckToUser(instruction)) {
     return;
   }
   LocationSummary* locations = instruction->GetLocations();
   Location obj = locations->InAt(0);
 
   __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
-  codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  RecordPcInfo(instruction, instruction->GetDexPc());
 }
 
-void InstructionCodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
+void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
   SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86(instruction);
-  codegen_->AddSlowPath(slow_path);
+  AddSlowPath(slow_path);
 
   LocationSummary* locations = instruction->GetLocations();
   Location obj = locations->InAt(0);
@@ -4275,39 +5066,52 @@
 }
 
 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
-  if (codegen_->IsImplicitNullCheckAllowed(instruction)) {
-    GenerateImplicitNullCheck(instruction);
-  } else {
-    GenerateExplicitNullCheck(instruction);
-  }
+  codegen_->GenerateNullCheck(instruction);
 }
 
 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    // The output overlaps in case of long: we don't want the low move to overwrite
-    // the array's location.
-    locations->SetOut(Location::RequiresRegister(),
-        (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap
-                                                         : Location::kNoOutputOverlap);
+    // The output overlaps in case of long: we don't want the low move
+    // to overwrite the array's location.  Likewise, in the case of an
+    // object array get with read barriers enabled, we do not want the
+    // move to overwrite the array's location, as we need it to emit
+    // the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        (instruction->GetType() == Primitive::kPrimLong || object_array_get_with_read_barrier) ?
+            Location::kOutputOverlap :
+            Location::kNoOutputOverlap);
+  }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
+  Location out_loc = locations->Out();
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
 
   Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movzxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4318,8 +5122,7 @@
     }
 
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movsxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4330,8 +5133,7 @@
     }
 
     case Primitive::kPrimShort: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movsxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4342,8 +5144,7 @@
     }
 
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movzxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4353,10 +5154,8 @@
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+    case Primitive::kPrimInt: {
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movl(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4366,28 +5165,61 @@
       break;
     }
 
+    case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
+        codegen_->GenerateArrayLoadWithBakerReadBarrier(
+            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+      } else {
+        Register out = out_loc.AsRegister<Register>();
+        if (index.IsConstant()) {
+          uint32_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ movl(out, Address(obj, offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          __ movl(out, Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(
+              instruction, out_loc, out_loc, obj_loc, data_offset, index);
+        }
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      Location out = locations->Out();
-      DCHECK_NE(obj, out.AsRegisterPairLow<Register>());
+      DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ movl(out.AsRegisterPairLow<Register>(), Address(obj, offset));
+        __ movl(out_loc.AsRegisterPairLow<Register>(), Address(obj, offset));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ movl(out.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize));
+        __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize));
       } else {
-        __ movl(out.AsRegisterPairLow<Register>(),
+        __ movl(out_loc.AsRegisterPairLow<Register>(),
                 Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ movl(out.AsRegisterPairHigh<Register>(),
+        __ movl(out_loc.AsRegisterPairHigh<Register>(),
                 Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize));
       }
       break;
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
-      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movss(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4398,8 +5230,7 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
-      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movsd(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4414,31 +5245,28 @@
       UNREACHABLE();
   }
 
-  if (type != Primitive::kPrimLong) {
+  if (type == Primitive::kPrimNot || type == Primitive::kPrimLong) {
+    // Potential implicit null checks, in the case of reference or
+    // long arrays, are handled in the previous switch statement.
+  } else {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
-
-  if (type == Primitive::kPrimNot) {
-    Register out = locations->Out().AsRegister<Register>();
-    __ MaybeUnpoisonHeapReference(out);
-  }
 }
 
 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
-  // This location builder might end up asking to up to four registers, which is
-  // not currently possible for baseline. The situation in which we need four
-  // registers cannot be met by baseline though, because it has not run any
-  // optimization.
-
   Primitive::Type value_type = instruction->GetComponentType();
+
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool object_array_set_with_read_barrier =
+      kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+      (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
 
   bool is_byte_type = (value_type == Primitive::kPrimBoolean)
       || (value_type == Primitive::kPrimByte);
@@ -4451,7 +5279,7 @@
     // Ensure the value is in a byte register.
     locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
   } else if (Primitive::IsFloatingPointType(value_type)) {
-    locations->SetInAt(2, Location::RequiresFpuRegister());
+    locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
   } else {
     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
   }
@@ -4465,14 +5293,15 @@
 
 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register array = locations->InAt(0).AsRegister<Register>();
+  Location array_loc = locations->InAt(0);
+  Register array = array_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
-  bool may_need_runtime_call = locations->CanCall();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
@@ -4512,6 +5341,7 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
           : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
+
       if (!value.IsRegister()) {
         // Just setting null.
         DCHECK(instruction->InputAt(2)->IsNullConstant());
@@ -4519,7 +5349,7 @@
         __ movl(address, Immediate(0));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         DCHECK(!needs_write_barrier);
-        DCHECK(!may_need_runtime_call);
+        DCHECK(!may_need_runtime_call_for_type_check);
         break;
       }
 
@@ -4528,7 +5358,7 @@
       NearLabel done, not_null, do_put;
       SlowPathCode* slow_path = nullptr;
       Register temp = locations->GetTemp(0).AsRegister<Register>();
-      if (may_need_runtime_call) {
+      if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
@@ -4540,22 +5370,62 @@
           __ Bind(&not_null);
         }
 
-        __ movl(temp, Address(array, class_offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ MaybeUnpoisonHeapReference(temp);
-        __ movl(temp, Address(temp, component_offset));
-        // No need to poison/unpoison, we're comparing two poisoned references.
-        __ cmpl(temp, Address(register_value, class_offset));
-        if (instruction->StaticTypeOfArrayIsObjectArray()) {
-          __ j(kEqual, &do_put);
-          __ MaybeUnpoisonHeapReference(temp);
-          __ movl(temp, Address(temp, super_offset));
-          // No need to unpoison, we're comparing against null..
-          __ testl(temp, temp);
-          __ j(kNotEqual, slow_path->GetEntryLabel());
-          __ Bind(&do_put);
+        if (kEmitCompilerReadBarrier) {
+          // When read barriers are enabled, the type checking
+          // instrumentation requires two read barriers:
+          //
+          //   __ movl(temp2, temp);
+          //   // /* HeapReference<Class> */ temp = temp->component_type_
+          //   __ movl(temp, Address(temp, component_offset));
+          //   codegen_->GenerateReadBarrierSlow(
+          //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+          //
+          //   // /* HeapReference<Class> */ temp2 = register_value->klass_
+          //   __ movl(temp2, Address(register_value, class_offset));
+          //   codegen_->GenerateReadBarrierSlow(
+          //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
+          //
+          //   __ cmpl(temp, temp2);
+          //
+          // However, the second read barrier may trash `temp`, as it
+          // is a temporary register, and as such would not be saved
+          // along with live registers before calling the runtime (nor
+          // restored afterwards).  So in this case, we bail out and
+          // delegate the work to the array set slow path.
+          //
+          // TODO: Extend the register allocator to support a new
+          // "(locally) live temp" location so as to avoid always
+          // going into the slow path when read barriers are enabled.
+          __ jmp(slow_path->GetEntryLabel());
         } else {
-          __ j(kNotEqual, slow_path->GetEntryLabel());
+          // /* HeapReference<Class> */ temp = array->klass_
+          __ movl(temp, Address(array, class_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ MaybeUnpoisonHeapReference(temp);
+
+          // /* HeapReference<Class> */ temp = temp->component_type_
+          __ movl(temp, Address(temp, component_offset));
+          // If heap poisoning is enabled, no need to unpoison `temp`
+          // nor the object reference in `register_value->klass`, as
+          // we are comparing two poisoned references.
+          __ cmpl(temp, Address(register_value, class_offset));
+
+          if (instruction->StaticTypeOfArrayIsObjectArray()) {
+            __ j(kEqual, &do_put);
+            // If heap poisoning is enabled, the `temp` reference has
+            // not been unpoisoned yet; unpoison it now.
+            __ MaybeUnpoisonHeapReference(temp);
+
+            // /* HeapReference<Class> */ temp = temp->super_class_
+            __ movl(temp, Address(temp, super_offset));
+            // If heap poisoning is enabled, no need to unpoison
+            // `temp`, as we are comparing against null below.
+            __ testl(temp, temp);
+            __ j(kNotEqual, slow_path->GetEntryLabel());
+            __ Bind(&do_put);
+          } else {
+            __ j(kNotEqual, slow_path->GetEntryLabel());
+          }
         }
       }
 
@@ -4566,7 +5436,7 @@
       } else {
         __ movl(address, register_value);
       }
-      if (!may_need_runtime_call) {
+      if (!may_need_runtime_call_for_type_check) {
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
 
@@ -4581,6 +5451,7 @@
 
       break;
     }
+
     case Primitive::kPrimInt: {
       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       Address address = index.IsConstant()
@@ -4637,8 +5508,14 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
           : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
-      DCHECK(value.IsFpuRegister());
-      __ movss(address, value.AsFpuRegister<XmmRegister>());
+      if (value.IsFpuRegister()) {
+        __ movss(address, value.AsFpuRegister<XmmRegister>());
+      } else {
+        DCHECK(value.IsConstant());
+        int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
+        __ movl(address, Immediate(v));
+      }
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
@@ -4647,8 +5524,19 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
           : Address(array, index.AsRegister<Register>(), TIMES_8, offset);
-      DCHECK(value.IsFpuRegister());
-      __ movsd(address, value.AsFpuRegister<XmmRegister>());
+      if (value.IsFpuRegister()) {
+        __ movsd(address, value.AsFpuRegister<XmmRegister>());
+      } else {
+        DCHECK(value.IsConstant());
+        Address address_hi = index.IsConstant() ?
+            Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
+                           offset + kX86WordSize) :
+            Address(array, index.AsRegister<Register>(), TIMES_8, offset + kX86WordSize);
+        int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+        __ movl(address, Immediate(Low32Bits(v)));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ movl(address_hi, Immediate(High32Bits(v)));
+      }
       break;
     }
 
@@ -4661,12 +5549,18 @@
 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  if (!instruction->IsEmittedAtUseSite()) {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
+  if (instruction->IsEmittedAtUseSite()) {
+    return;
+  }
+
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   Register obj = locations->InAt(0).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
   __ movl(out, Address(obj, offset));
@@ -4679,7 +5573,10 @@
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  HInstruction* length = instruction->InputAt(1);
+  if (!length->IsEmittedAtUseSite()) {
+    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  }
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -4713,26 +5610,34 @@
     codegen_->AddSlowPath(slow_path);
     __ j(kAboveEqual, slow_path->GetEntryLabel());
   } else {
-    Register length = length_loc.AsRegister<Register>();
-    if (index_loc.IsConstant()) {
-      int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
-      __ cmpl(length, Immediate(value));
+    HInstruction* array_length = instruction->InputAt(1);
+    if (array_length->IsEmittedAtUseSite()) {
+      // Address the length field in the array.
+      DCHECK(array_length->IsArrayLength());
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<Register>(), len_offset);
+      if (index_loc.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+        __ cmpl(array_len, Immediate(value));
+      } else {
+        __ cmpl(array_len, index_loc.AsRegister<Register>());
+      }
+      codegen_->MaybeRecordImplicitNullCheck(array_length);
     } else {
-      __ cmpl(length, index_loc.AsRegister<Register>());
+      Register length = length_loc.AsRegister<Register>();
+      if (index_loc.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+        __ cmpl(length, Immediate(value));
+      } else {
+        __ cmpl(length, index_loc.AsRegister<Register>());
+      }
     }
     codegen_->AddSlowPath(slow_path);
     __ j(kBelowEqual, slow_path->GetEntryLabel());
   }
 }
 
-void LocationsBuilderX86::VisitTemporary(HTemporary* temp) {
-  temp->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unreachable";
 }
@@ -4775,8 +5680,8 @@
     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   }
 
-  __ fs()->cmpw(Address::Absolute(
-      Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0));
+  __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
+                Immediate(0));
   if (successor == nullptr) {
     __ j(kNotEqual, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -4818,13 +5723,31 @@
   if (source.IsRegister()) {
     if (destination.IsRegister()) {
       __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
+    } else if (destination.IsFpuRegister()) {
+      __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
     } else {
       DCHECK(destination.IsStackSlot());
       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
     }
+  } else if (source.IsRegisterPair()) {
+      size_t elem_size = Primitive::ComponentSize(Primitive::kPrimInt);
+      // Create stack space for 2 elements.
+      __ subl(ESP, Immediate(2 * elem_size));
+      __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
+      __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
+      __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
+      // And remove the temporary stack space we allocated.
+      __ addl(ESP, Immediate(2 * elem_size));
   } else if (source.IsFpuRegister()) {
-    if (destination.IsFpuRegister()) {
+    if (destination.IsRegister()) {
+      __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
+    } else if (destination.IsFpuRegister()) {
       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
+    } else if (destination.IsRegisterPair()) {
+      XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
+      __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
+      __ psrlq(src_reg, Immediate(32));
+      __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
     } else if (destination.IsStackSlot()) {
       __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
     } else {
@@ -4841,7 +5764,11 @@
       MoveMemoryToMemory32(destination.GetStackIndex(), source.GetStackIndex());
     }
   } else if (source.IsDoubleStackSlot()) {
-    if (destination.IsFpuRegister()) {
+    if (destination.IsRegisterPair()) {
+      __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
+      __ movl(destination.AsRegisterPairHigh<Register>(),
+              Address(ESP, source.GetHighStackIndex(kX86WordSize)));
+    } else if (destination.IsFpuRegister()) {
       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
     } else {
       DCHECK(destination.IsDoubleStackSlot()) << destination;
@@ -5029,12 +5956,72 @@
   __ popl(static_cast<Register>(reg));
 }
 
+HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_class_load_kind) {
+      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadClass::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      FALLTHROUGH_INTENDED;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());  // Note: boot image is also non-JIT.
+      // We disable pc-relative load when there is an irreducible loop, as the optimization
+      // is incompatible with it.
+      // TODO: Create as many X86ComputeBaseMethodAddress instructions as needed for methods
+      // with irreducible loops.
+      if (GetGraph()->HasIrreducibleLoops()) {
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      }
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_class_load_kind;
+}
+
 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
-  InvokeRuntimeCallingConvention calling_convention;
-  CodeGenerator::CreateLoadClassLocationSummary(
-      cls,
-      Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(EAX));
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Location::RegisterLocation(EAX),
+        /* code_generator_supports_read_barrier */ true);
+    return;
+  }
+
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
+      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
+      load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
+      load_kind == HLoadClass::LoadKind::kDexCachePcRelative) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
@@ -5045,27 +6032,88 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
-  Register out = locations->Out().AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
-  if (cls->IsReferrersClass()) {
-    DCHECK(!cls->CanCallRuntime());
-    DCHECK(!cls->MustGenerateClinitCheck());
-    __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  } else {
-    DCHECK(cls->CanCallRuntime());
-    __ movl(out, Address(
-        current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
-    __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
-    // TODO: We will need a read barrier here.
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
 
+  bool generate_null_check = false;
+  switch (cls->GetLoadKind()) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      Register current_method = locations->InAt(0).AsRegister<Register>();
+      GenerateGcRootFieldLoad(
+          cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ movl(out, Immediate(/* placeholder */ 0));
+      codegen_->RecordTypePatch(cls);
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      Register method_address = locations->InAt(0).AsRegister<Register>();
+      __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
+      codegen_->RecordTypePatch(cls);
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      __ movl(out, Immediate(address));
+      codegen_->RecordSimplePatch();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      // /* GcRoot<mirror::Class> */ out = *address
+      GenerateGcRootFieldLoad(cls, out_loc, Address::Absolute(address));
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      Register base_reg = locations->InAt(0).AsRegister<Register>();
+      uint32_t offset = cls->GetDexCacheElementOffset();
+      Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(cls->GetDexFile(), offset);
+      // /* GcRoot<mirror::Class> */ out = *(base + offset)  /* PC-relative */
+      GenerateGcRootFieldLoad(
+          cls, out_loc, Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), fixup_label);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      Register current_method = locations->InAt(0).AsRegister<Register>();
+      __ movl(out, Address(current_method,
+                           ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      GenerateGcRootFieldLoad(
+          cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+  }
+
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
+    DCHECK(cls->CanCallRuntime());
     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
     codegen_->AddSlowPath(slow_path);
-    __ testl(out, out);
-    __ j(kEqual, slow_path->GetEntryLabel());
+
+    if (generate_null_check) {
+      __ testl(out, out);
+      __ j(kEqual, slow_path->GetEntryLabel());
+    }
+
     if (cls->MustGenerateClinitCheck()) {
       GenerateClassInitializationCheck(slow_path, out);
     } else {
@@ -5101,31 +6149,101 @@
   // No need for memory fence, thanks to the X86 memory model.
 }
 
+HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
+    HLoadString::LoadKind desired_string_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_string_load_kind) {
+      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadString::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadString::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_string_load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      FALLTHROUGH_INTENDED;
+    case HLoadString::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());  // Note: boot image is also non-JIT.
+      // We disable pc-relative load when there is an irreducible loop, as the optimization
+      // is incompatible with it.
+      // TODO: Create as many X86ComputeBaseMethodAddress instructions as needed for methods
+      // with irreducible loops.
+      if (GetGraph()->HasIrreducibleLoops()) {
+        return HLoadString::LoadKind::kDexCacheViaMethod;
+      }
+      break;
+    case HLoadString::LoadKind::kBootImageAddress:
+      break;
+    case HLoadString::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_string_load_kind;
+}
+
 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
-  locations->SetInAt(0, Location::RequiresRegister());
+  LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
+  HLoadString::LoadKind load_kind = load->GetLoadKind();
+  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod ||
+      load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
+      load_kind == HLoadString::LoadKind::kDexCachePcRelative) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) {
+  LocationSummary* locations = load->GetLocations();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
+
+  switch (load->GetLoadKind()) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ movl(out, Immediate(/* placeholder */ 0));
+      codegen_->RecordStringPatch(load);
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      Register method_address = locations->InAt(0).AsRegister<Register>();
+      __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
+      codegen_->RecordStringPatch(load);
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(load->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+      __ movl(out, Immediate(address));
+      codegen_->RecordSimplePatch();
+      return;  // No dex cache slow path.
+    }
+    default:
+      break;
+  }
+
+  // TODO: Re-add the compiler code to do string dex cache lookup again.
   SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
   codegen_->AddSlowPath(slow_path);
-
-  LocationSummary* locations = load->GetLocations();
-  Register out = locations->Out().AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
-  __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
-  __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
-  // TODO: We will need a read barrier here.
-  __ testl(out, out);
-  __ j(kEqual, slow_path->GetEntryLabel());
+  __ jmp(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
 static Address GetExceptionTlsAddress() {
-  return Address::Absolute(Thread::ExceptionOffset<kX86WordSize>().Int32Value());
+  return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
 }
 
 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
@@ -5148,7 +6266,7 @@
 
 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -5158,44 +6276,58 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+}
+
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
 }
 
 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::Any());
-    // Note that TypeCheckSlowPathX86 uses this register too.
-    locations->SetOut(Location::RequiresRegister());
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetOut(Location::RegisterLocation(EAX));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::Any());
+  // Note that TypeCheckSlowPathX86 uses this "out" register too.
+  locations->SetOut(Location::RequiresRegister());
+  // When read barriers are enabled, we need a temporary register for
+  // some cases.
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Location cls = locations->InAt(1);
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
+  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(0) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5210,17 +6342,10 @@
     __ j(kEqual, &zero);
   }
 
-  // In case of an interface/unresolved check, we put the object class into the object register.
-  // This is safe, as the register is caller-save, and the object must be in another
-  // register if it survives the runtime call.
-  Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) ||
-      (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck)
-      ? obj
-      : out;
-  __ movl(target, Address(obj, class_offset));
-  __ MaybeUnpoisonHeapReference(target);
+  // /* HeapReference<Class> */ out = obj->klass_
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<Register>());
@@ -5235,13 +6360,14 @@
       __ jmp(&done);
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       NearLabel loop;
       __ Bind(&loop);
-      __ movl(out, Address(out, super_offset));
-      __ MaybeUnpoisonHeapReference(out);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5258,6 +6384,7 @@
       }
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       NearLabel loop, success;
@@ -5269,8 +6396,8 @@
         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
       }
       __ j(kEqual, &success);
-      __ movl(out, Address(out, super_offset));
-      __ MaybeUnpoisonHeapReference(out);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       __ testl(out, out);
       __ j(kNotEqual, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -5282,6 +6409,7 @@
       }
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
       NearLabel exact_check;
@@ -5292,9 +6420,9 @@
         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
       }
       __ j(kEqual, &exact_check);
-      // Otherwise, we need to check that the object's class is a non primitive array.
-      __ movl(out, Address(out, component_offset));
-      __ MaybeUnpoisonHeapReference(out);
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ out = out->component_type_
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5305,6 +6433,7 @@
       __ jmp(&done);
       break;
     }
+
     case TypeCheckKind::kArrayCheck: {
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<Register>());
@@ -5313,8 +6442,8 @@
         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
       }
       DCHECK(locations->OnlyCallsOnSlowPath());
-      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(
-          instruction, /* is_fatal */ false);
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
+                                                                    /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
       __ j(kNotEqual, slow_path->GetEntryLabel());
       __ movl(out, Immediate(1));
@@ -5323,13 +6452,32 @@
       }
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-    default: {
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved and interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
+                                                                    /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ jmp(slow_path->GetEntryLabel());
       if (zero.IsLinked()) {
         __ jmp(&done);
       }
@@ -5354,75 +6502,72 @@
 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
-
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = throws_into_catch
-          ? LocationSummary::kCallOnSlowPath
-          : LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kInterfaceCheck:
-    case TypeCheckKind::kUnresolvedCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
-
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::Any());
-    // Note that TypeCheckSlowPathX86 uses this register too.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::Any());
+  // Note that TypeCheckSlowPathX86 uses this "temp" register too.
+  locations->AddTemp(Location::RequiresRegister());
+  // When read barriers are enabled, we need an additional temporary
+  // register for some cases.
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   }
 }
 
 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Location cls = locations->InAt(1);
-  Register temp = locations->WillCall()
-      ? kNoRegister
-      : locations->GetTemp(0).AsRegister<Register>();
-
+  Location temp_loc = locations->GetTemp(0);
+  Register temp = temp_loc.AsRegister<Register>();
+  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
-  SlowPathCode* slow_path = nullptr;
 
-  if (!locations->WillCall()) {
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(
-        instruction, !locations->CanCall());
-    codegen_->AddSlowPath(slow_path);
-  }
+  bool is_type_check_slow_path_fatal =
+      (type_check_kind == TypeCheckKind::kExactCheck ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+      !instruction->CanThrowIntoCatchBlock();
+  SlowPathCode* type_check_slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
+                                                        is_type_check_slow_path_fatal);
+  codegen_->AddSlowPath(type_check_slow_path);
 
-  NearLabel done, abstract_entry;
+  NearLabel done;
   // Avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
     __ testl(obj, obj);
     __ j(kEqual, &done);
   }
 
-  if (locations->WillCall()) {
-    __ movl(obj, Address(obj, class_offset));
-    __ MaybeUnpoisonHeapReference(obj);
-  } else {
-    __ movl(temp, Address(obj, class_offset));
-    __ MaybeUnpoisonHeapReference(temp);
-  }
+  // /* HeapReference<Class> */ temp = obj->klass_
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
       if (cls.IsRegister()) {
@@ -5433,19 +6578,34 @@
       }
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      NearLabel loop, success;
+      NearLabel loop, compare_classes;
       __ Bind(&loop);
-      __ movl(temp, Address(temp, super_offset));
-      __ MaybeUnpoisonHeapReference(temp);
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+
+      // If the class reference currently in `temp` is not null, jump
+      // to the `compare_classes` label to compare it with the checked
+      // class.
       __ testl(temp, temp);
-      // Jump to the slow path to throw the exception.
-      __ j(kEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, &compare_classes);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      __ jmp(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&compare_classes);
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<Register>());
       } else {
@@ -5455,6 +6615,7 @@
       __ j(kNotEqual, &loop);
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       NearLabel loop;
@@ -5466,16 +6627,29 @@
         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
       }
       __ j(kEqual, &done);
-      __ movl(temp, Address(temp, super_offset));
-      __ MaybeUnpoisonHeapReference(temp);
+
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+
+      // If the class reference currently in `temp` is not null, jump
+      // back at the beginning of the loop.
       __ testl(temp, temp);
       __ j(kNotEqual, &loop);
-      // Jump to the slow path to throw the exception.
-      __ jmp(slow_path->GetEntryLabel());
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
+      NearLabel check_non_primitive_component_type;
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<Register>());
       } else {
@@ -5483,34 +6657,68 @@
         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
       }
       __ j(kEqual, &done);
-      // Otherwise, we need to check that the object's class is a non primitive array.
-      __ movl(temp, Address(temp, component_offset));
-      __ MaybeUnpoisonHeapReference(temp);
+
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ temp = temp->component_type_
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
+
+      // If the component type is not null (i.e. the object is indeed
+      // an array), jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive
+      // type.
       __ testl(temp, temp);
-      __ j(kEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, &check_non_primitive_component_type);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      __ jmp(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&check_non_primitive_component_type);
       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
-      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ j(kEqual, &done);
+      // Same comment as above regarding `temp` and the slow path.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-    default:
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
+      //
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
+      __ jmp(type_check_slow_path->GetEntryLabel());
       break;
   }
   __ Bind(&done);
 
-  if (slow_path != nullptr) {
-    __ Bind(slow_path->GetExitLabel());
-  }
+  __ Bind(type_check_slow_path->GetExitLabel());
 }
 
 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -5521,6 +6729,11 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
@@ -5656,6 +6869,295 @@
   }
 }
 
+void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                                                   Location out,
+                                                                   uint32_t offset,
+                                                                   Location maybe_temp) {
+  Register out_reg = out.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `maybe_temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ movl(maybe_temp.AsRegister<Register>(), out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ movl(out_reg, Address(out_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ movl(out_reg, Address(out_reg, offset));
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                                                    Location out,
+                                                                    Location obj,
+                                                                    uint32_t offset,
+                                                                    Location maybe_temp) {
+  Register out_reg = out.AsRegister<Register>();
+  Register obj_reg = obj.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ movl(out_reg, Address(obj_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ movl(out_reg, Address(obj_reg, offset));
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                          Location root,
+                                                          const Address& address,
+                                                          Label* fixup_label) {
+  Register root_reg = root.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = *address;
+      //   if (Thread::Current()->GetIsGcMarking()) {
+      //     root = ReadBarrier::Mark(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *address
+      __ movl(root_reg, address);
+      if (fixup_label != nullptr) {
+        __ Bind(fixup_label);
+      }
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path used to mark the GC root `root`.
+      SlowPathCode* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root);
+      codegen_->AddSlowPath(slow_path);
+
+      __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value()),
+                    Immediate(0));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = address
+      __ leal(root_reg, address);
+      if (fixup_label != nullptr) {
+        __ Bind(fixup_label);
+      }
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *address
+    __ movl(root_reg, address);
+    if (fixup_label != nullptr) {
+      __ Bind(fixup_label);
+    }
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
+  }
+}
+
+void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                             Location ref,
+                                                             Register obj,
+                                                             uint32_t offset,
+                                                             Location temp,
+                                                             bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Address src(obj, offset);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                             Location ref,
+                                                             Register obj,
+                                                             uint32_t data_offset,
+                                                             Location index,
+                                                             Location temp,
+                                                             bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  Address src = index.IsConstant() ?
+      Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
+      Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                 Location ref,
+                                                                 Register obj,
+                                                                 const Address& src,
+                                                                 Location temp,
+                                                                 bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as:
+  // - it implements the load-load fence using a data dependency on
+  //   the high-bits of rb_state, which are expected to be all zeroes
+  //   (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
+  //   which is a no-op thanks to the x86 memory model);
+  // - it performs additional checks that we do not do here for
+  //   performance reasons.
+
+  Register ref_reg = ref.AsRegister<Register>();
+  Register temp_reg = temp.AsRegister<Register>();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ movl(temp_reg, Address(obj, monitor_offset));
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+
+  // Load fence to prevent load-load reordering.
+  // Note that this is a no-op, thanks to the x86 memory model.
+  GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+  // The actual reference load.
+  // /* HeapReference<Object> */ ref = *src
+  __ movl(ref_reg, src);
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  __ MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path used to mark the object `ref` when it is gray.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref);
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  // Given the numeric representation, it's enough to check the low bit of the
+  // rb_state. We do that by shifting the bit out of the lock word with SHR.
+  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1));
+  __ j(kCarrySet, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
+                                               Location out,
+                                               Location ref,
+                                               Location obj,
+                                               uint32_t offset,
+                                               Location index) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Insert a slow path based read barrier *after* the reference load.
+  //
+  // If heap poisoning is enabled, the unpoisoning of the loaded
+  // reference will be carried out by the runtime within the slow
+  // path.
+  //
+  // Note that `ref` currently does not get unpoisoned (when heap
+  // poisoning is enabled), which is alright as the `ref` argument is
+  // not used by the artReadBarrierSlow entry point.
+  //
+  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+  SlowPathCode* slow_path = new (GetGraph()->GetArena())
+      ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
+  AddSlowPath(slow_path);
+
+  __ jmp(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                    Location out,
+                                                    Location ref,
+                                                    Location obj,
+                                                    uint32_t offset,
+                                                    Location index) {
+  if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    __ UnpoisonHeapReference(out.AsRegister<Register>());
+  }
+}
+
+void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                      Location out,
+                                                      Location root) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  __ jmp(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
@@ -5666,18 +7168,6 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void LocationsBuilderX86::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorX86::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
-}
-
 // Simple implementation of packed switch - generate cascaded compare/jumps.
 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   LocationSummary* locations =
@@ -5685,31 +7175,67 @@
   locations->SetInAt(0, Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
-  int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
-  LocationSummary* locations = switch_instr->GetLocations();
-  Register value_reg = locations->InAt(0).AsRegister<Register>();
-  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
+                                                              int32_t lower_bound,
+                                                              uint32_t num_entries,
+                                                              HBasicBlock* switch_block,
+                                                              HBasicBlock* default_block) {
+  // Figure out the correct compare values and jump conditions.
+  // Handle the first compare/branch as a special case because it might
+  // jump to the default case.
+  DCHECK_GT(num_entries, 2u);
+  Condition first_condition;
+  uint32_t index;
+  const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
+  if (lower_bound != 0) {
+    first_condition = kLess;
+    __ cmpl(value_reg, Immediate(lower_bound));
+    __ j(first_condition, codegen_->GetLabelOf(default_block));
+    __ j(kEqual, codegen_->GetLabelOf(successors[0]));
 
-  // Create a series of compare/jumps.
-  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int i = 0; i < num_entries; i++) {
-    int32_t case_value = lower_bound + i;
-    if (case_value == 0) {
-      __ testl(value_reg, value_reg);
-    } else {
-      __ cmpl(value_reg, Immediate(case_value));
-    }
-    __ j(kEqual, codegen_->GetLabelOf(successors[i]));
+    index = 1;
+  } else {
+    // Handle all the compare/jumps below.
+    first_condition = kBelow;
+    index = 0;
+  }
+
+  // Handle the rest of the compare/jumps.
+  for (; index + 1 < num_entries; index += 2) {
+    int32_t compare_to_value = lower_bound + index + 1;
+    __ cmpl(value_reg, Immediate(compare_to_value));
+    // Jump to successors[index] if value < case_value[index].
+    __ j(first_condition, codegen_->GetLabelOf(successors[index]));
+    // Jump to successors[index + 1] if value == case_value[index + 1].
+    __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
+  }
+
+  if (index != num_entries) {
+    // There are an odd number of entries. Handle the last one.
+    DCHECK_EQ(index + 1, num_entries);
+    __ cmpl(value_reg, Immediate(lower_bound + index));
+    __ j(kEqual, codegen_->GetLabelOf(successors[index]));
   }
 
   // And the default for any other value.
-  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-      __ jmp(codegen_->GetLabelOf(default_block));
+  if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
+    __ jmp(codegen_->GetLabelOf(default_block));
   }
 }
 
+void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  uint32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  Register value_reg = locations->InAt(0).AsRegister<Register>();
+
+  GenPackedSwitchWithCompares(value_reg,
+                              lower_bound,
+                              num_entries,
+                              switch_instr->GetBlock(),
+                              switch_instr->GetDefaultBlock());
+}
+
 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
@@ -5724,11 +7250,20 @@
 
 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
+  uint32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
   Register value_reg = locations->InAt(0).AsRegister<Register>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
+  if (num_entries <= kPackedSwitchJumpTableThreshold) {
+    GenPackedSwitchWithCompares(value_reg,
+                                lower_bound,
+                                num_entries,
+                                switch_instr->GetBlock(),
+                                default_block);
+    return;
+  }
+
   // Optimizing has a jump area.
   Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
   Register constant_area = locations->InAt(1).AsRegister<Register>();
@@ -5740,7 +7275,7 @@
   }
 
   // Is the value in range?
-  DCHECK_GE(num_entries, 1);
+  DCHECK_GE(num_entries, 1u);
   __ cmpl(value_reg, Immediate(num_entries - 1));
   __ j(kAbove, codegen_->GetLabelOf(default_block));
 
@@ -5788,7 +7323,7 @@
   locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
 
   // If we don't need to be materialized, we only need the inputs to be set.
-  if (!insn->NeedsMaterialization()) {
+  if (insn->IsEmittedAtUseSite()) {
     return;
   }
 
@@ -5808,7 +7343,7 @@
 }
 
 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
-  if (!insn->NeedsMaterialization()) {
+  if (insn->IsEmittedAtUseSite()) {
     return;
   }
 
@@ -5948,6 +7483,22 @@
   return Address(reg, kDummy32BitOffset, fixup);
 }
 
+void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
+  if (value == 0) {
+    __ xorl(dest, dest);
+  } else {
+    __ movl(dest, Immediate(value));
+  }
+}
+
+void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
+  if (value == 0) {
+    __ testl(dest, dest);
+  } else {
+    __ cmpl(dest, Immediate(value));
+  }
+}
+
 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
                                            Register reg,
                                            Register value) {
@@ -5965,7 +7516,7 @@
 // TODO: target as memory.
 void CodeGeneratorX86::MoveFromReturnRegister(Location target, Primitive::Type type) {
   if (!target.IsValid()) {
-    DCHECK(type == Primitive::kPrimVoid);
+    DCHECK_EQ(type, Primitive::kPrimVoid);
     return;
   }
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index fdfc5ab..894f2e8 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -17,8 +17,9 @@
 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
 
+#include "arch/x86/instruction_set_features_x86.h"
+#include "base/enums.h"
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
@@ -28,7 +29,7 @@
 namespace x86 {
 
 // Use a local definition to prevent copying mistakes.
-static constexpr size_t kX86WordSize = kX86PointerSize;
+static constexpr size_t kX86WordSize = static_cast<size_t>(kX86PointerSize);
 
 class CodeGeneratorX86;
 
@@ -166,6 +167,7 @@
  private:
   void HandleBitwiseOperation(HBinaryOperation* instruction);
   void HandleInvoke(HInvoke* invoke);
+  void HandleCondition(HCondition* condition);
   void HandleShift(HBinaryOperation* instruction);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -176,7 +178,7 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86);
 };
 
-class InstructionCodeGeneratorX86 : public HGraphVisitor {
+class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen);
 
@@ -195,6 +197,11 @@
 
   X86Assembler* GetAssembler() const { return assembler_; }
 
+  // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
+  // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+  // generates less code/data with a small num_entries.
+  static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
+
  private:
   // Generate code for the given suspend check. If not null, `successor`
   // is the block to branch to if the suspend check is not needed, and after
@@ -207,6 +214,7 @@
   void DivByPowerOfTwo(HDiv* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateRemFP(HRem* rem);
+  void HandleCondition(HCondition* condition);
   void HandleShift(HBinaryOperation* instruction);
   void GenerateShlLong(const Location& loc, Register shifter);
   void GenerateShrLong(const Location& loc, Register shifter);
@@ -214,30 +222,79 @@
   void GenerateShlLong(const Location& loc, int shift);
   void GenerateShrLong(const Location& loc, int shift);
   void GenerateUShrLong(const Location& loc, int shift);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   void HandleFieldSet(HInstruction* instruction,
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a read barrier and
+  // shall be a register in that case; it may be an invalid location
+  // otherwise.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location maybe_temp);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location maybe_temp);
+  // Generate a GC root reference load:
+  //
+  //   root <- *address
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               const Address& address,
+                               Label* fixup_label = nullptr);
+
   // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not.
   // `is_wide` specifies whether it is long/double or not.
   void PushOntoFPStack(Location source, uint32_t temp_offset,
                        uint32_t stack_adjustment, bool is_fp, bool is_wide);
 
-  void GenerateImplicitNullCheck(HNullCheck* instruction);
-  void GenerateExplicitNullCheck(HNullCheck* instruction);
+  template<class LabelType>
   void GenerateTestAndBranch(HInstruction* instruction,
-                             Label* true_target,
-                             Label* false_target,
-                             Label* always_true_target);
-  void GenerateCompareTestAndBranch(HIf* if_inst,
-                                    HCondition* condition,
-                                    Label* true_target,
-                                    Label* false_target,
-                                    Label* always_true_target);
-  void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
-  void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
+                             size_t condition_input_index,
+                             LabelType* true_target,
+                             LabelType* false_target);
+  template<class LabelType>
+  void GenerateCompareTestAndBranch(HCondition* condition,
+                                    LabelType* true_target,
+                                    LabelType* false_target);
+  template<class LabelType>
+  void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label);
+  template<class LabelType>
+  void GenerateLongComparesAndJumps(HCondition* cond,
+                                    LabelType* true_label,
+                                    LabelType* false_label);
+
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
+  void GenPackedSwitchWithCompares(Register value_reg,
+                                   int32_t lower_bound,
+                                   uint32_t num_entries,
+                                   HBasicBlock* switch_block,
+                                   HBasicBlock* default_block);
+
+  void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
 
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
@@ -258,7 +315,6 @@
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
   void Bind(HBasicBlock* block) OVERRIDE;
-  void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
   void MoveConstant(Location destination, int32_t value) OVERRIDE;
   void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
@@ -279,6 +335,12 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
   size_t GetWordSize() const OVERRIDE {
     return kX86WordSize;
   }
@@ -304,15 +366,11 @@
     return assembler_;
   }
 
-  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
+  uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
     return GetLabelOf(block)->Position();
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
-
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
@@ -333,11 +391,33 @@
   // Helper method to move a 64bits value between two locations.
   void Move64(Location destination, Location source);
 
+  // Check if the desired_string_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadString::LoadKind GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
+  // Check if the desired_dispatch_info is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method) OVERRIDE;
+
   // Generate a call to a static or direct method.
+  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   // Generate a call to a virtual method.
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
+  void RecordSimplePatch();
+  void RecordStringPatch(HLoadString* load_string);
+  void RecordTypePatch(HLoadClass* load_class);
+  Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
+
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
   // Emit linker patches.
@@ -350,6 +430,10 @@
                   Register value,
                   bool value_can_be_null);
 
+  void GenerateIntCompare(Location lhs, Location rhs);
+
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   Label* GetLabelOf(HBasicBlock* block) const {
     return CommonGetLabelOf<Label>(block_labels_, block);
   }
@@ -387,11 +471,123 @@
   Address LiteralInt32Address(int32_t v, Register reg);
   Address LiteralInt64Address(int64_t v, Register reg);
 
+  // Load a 32-bit value into a register in the most efficient manner.
+  void Load32BitValue(Register dest, int32_t value);
+
+  // Compare a register with a 32-bit value in the most efficient manner.
+  void Compare32BitValue(Register dest, int32_t value);
+
   Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value);
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
 
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             Register obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             Register obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 Register obj,
+                                                 const Address& src,
+                                                 Location temp,
+                                                 bool needs_null_check);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+
+  // Ensure that prior stores complete to memory before subsequent loads.
+  // The locked add implementation will avoid serializing device memory, but will
+  // touch (but not change) the top of the stack.
+  // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
+  void MemoryFence(bool non_temporal = false) {
+    if (!non_temporal) {
+      assembler_.lock()->addl(Address(ESP, 0), Immediate(0));
+    } else {
+      assembler_.mfence();
+    }
+  }
+
+  void GenerateNop();
+  void GenerateImplicitNullCheck(HNullCheck* instruction);
+  void GenerateExplicitNullCheck(HNullCheck* instruction);
+
+  // When we don't know the proper offset for the value, we use kDummy32BitOffset.
+  // The correct value will be inserted when processing Assembler fixups.
+  static constexpr int32_t kDummy32BitOffset = 256;
+
  private:
+  Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
+
+  struct PcRelativeDexCacheAccessInfo {
+    PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
+        : target_dex_file(dex_file), element_offset(element_off), label() { }
+
+    const DexFile& target_dex_file;
+    uint32_t element_offset;
+    // NOTE: Label is bound to the end of the instruction that has an embedded 32-bit offset.
+    Label label;
+  };
+
   // Labels for each block that will be compiled.
   Label* block_labels_;  // Indexed by block id.
   Label frame_entry_label_;
@@ -404,6 +600,14 @@
   // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back().
   ArenaDeque<MethodPatchInfo<Label>> method_patches_;
   ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
+  // PC-relative DexCache access info.
+  ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_;
+  // Patch locations for patchoat where the linker doesn't do any other work.
+  ArenaDeque<Label> simple_patches_;
+  // String patch locations.
+  ArenaDeque<StringPatchInfo<Label>> string_patches_;
+  // Type patch locations.
+  ArenaDeque<TypePatchInfo<Label>> type_patches_;
 
   // Offset to the start of the constant area in the assembled code.
   // Used for fixups to the constant area.
@@ -418,10 +622,6 @@
   // instruction gives the address of the start of this method.
   int32_t method_address_offset_;
 
-  // When we don't know the proper offset for the value, we use kDummy32BitOffset.
-  // The correct value will be inserted when processing Assembler fixups.
-  static constexpr int32_t kDummy32BitOffset = 256;
-
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86);
 };
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ed2e4ca..0c55ae4 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -34,34 +34,43 @@
 
 namespace art {
 
+template<class MirrorType>
+class GcRoot;
+
 namespace x86_64 {
 
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = RDI;
+// The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
+// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+// generates less code/data with a small num_entries.
+static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
 
 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
 
 static constexpr int kC2ConditionMask = 0x400;
 
-#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
 
 class NullCheckSlowPathX86_64 : public SlowPathCode {
  public:
-  explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {}
+  explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
     if (instruction_->CanThrowIntoCatchBlock()) {
       // Live registers will be restored in the catch block if caught.
       SaveLiveRegisters(codegen, instruction_->GetLocations());
     }
-    x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -69,25 +78,25 @@
   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
 
  private:
-  HNullCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
 };
 
 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
  public:
-  explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : instruction_(instruction) {}
+  explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
     if (instruction_->CanThrowIntoCatchBlock()) {
       // Live registers will be restored in the catch block if caught.
       SaveLiveRegisters(codegen, instruction_->GetLocations());
     }
-    x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -95,14 +104,13 @@
   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
 
  private:
-  HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
 };
 
 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
  public:
-  DivRemMinusOneSlowPathX86_64(Register reg, Primitive::Type type, bool is_div)
-      : cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
+  DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, Primitive::Type type, bool is_div)
+      : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
@@ -136,21 +144,20 @@
 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
  public:
   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
-      : instruction_(instruction), successor_(successor) {}
+      : SlowPathCode(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
-    x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
     } else {
-      __ jmp(x64_codegen->GetLabelOf(successor_));
+      __ jmp(x86_64_codegen->GetLabelOf(successor_));
     }
   }
 
@@ -166,7 +173,6 @@
   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
 
  private:
-  HSuspendCheck* const instruction_;
   HBasicBlock* const successor_;
   Label return_label_;
 
@@ -176,28 +182,52 @@
 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
  public:
   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
-    : instruction_(instruction) {}
+    : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
     if (instruction_->CanThrowIntoCatchBlock()) {
       // Live registers will be restored in the catch block if caught.
       SaveLiveRegisters(codegen, instruction_->GetLocations());
     }
+    // Are we using an array length from memory?
+    HInstruction* array_length = instruction_->InputAt(1);
+    Location length_loc = locations->InAt(1);
+    InvokeRuntimeCallingConvention calling_convention;
+    if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
+      // Load the array length into our temporary.
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
+      length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
+      // Check for conflicts with index.
+      if (length_loc.Equals(locations->InAt(0))) {
+        // We know we aren't using parameter 2.
+        length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
+      }
+      __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
+    }
+
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
-    InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
         locations->InAt(0),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimInt,
-        locations->InAt(1),
+        length_loc,
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
-    x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
-                               instruction_, instruction_->GetDexPc(), this);
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    x86_64_codegen->InvokeRuntime(entry_point_offset,
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -205,8 +235,6 @@
   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
 
  private:
-  HBoundsCheck* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
 };
 
@@ -216,28 +244,36 @@
                           HInstruction* at,
                           uint32_t dex_pc,
                           bool do_clinit)
-      : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+      : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = at_->GetLocations();
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
 
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
     __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex()));
-    x64_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
-                                          : QUICK_ENTRY_POINT(pInitializeType),
-                                          at_, dex_pc_, this);
+    x86_64_codegen->InvokeRuntime(do_clinit_ ?
+                                      QUICK_ENTRY_POINT(pInitializeStaticStorage) :
+                                      QUICK_ENTRY_POINT(pInitializeType),
+                                  at_,
+                                  dex_pc_,
+                                  this);
+    if (do_clinit_) {
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    } else {
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    }
 
     Location out = locations->Out();
     // Move the class to the desired location.
     if (out.IsValid()) {
       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
-      x64_codegen->Move(out, Location::RegisterLocation(RAX));
+      x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
     }
 
     RestoreLiveRegisters(codegen, locations);
@@ -265,24 +301,25 @@
 
 class LoadStringSlowPathX86_64 : public SlowPathCode {
  public:
-  explicit LoadStringSlowPathX86_64(HLoadString* instruction) : instruction_(instruction) {}
+  explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ movl(CpuRegister(calling_convention.GetRegisterAt(0)),
-            Immediate(instruction_->GetStringIndex()));
-    x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
-    x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index));
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+    x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
     RestoreLiveRegisters(codegen, locations);
     __ jmp(GetExitLabel());
   }
@@ -290,15 +327,13 @@
   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
 
  private:
-  HLoadString* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
 };
 
 class TypeCheckSlowPathX86_64 : public SlowPathCode {
  public:
   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
-      : instruction_(instruction), is_fatal_(is_fatal) {}
+      : SlowPathCode(instruction), is_fatal_(is_fatal) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -308,18 +343,9 @@
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
 
-    if (instruction_->IsCheckCast()) {
-      // The codegen for the instruction overwrites `temp`, so put it back in place.
-      CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
-      CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-      __ movl(temp, Address(obj, class_offset));
-      __ MaybeUnpoisonHeapReference(temp);
-    }
-
     if (!is_fatal_) {
       SaveLiveRegisters(codegen, locations);
     }
@@ -336,21 +362,24 @@
         Primitive::kPrimNot);
 
     if (instruction_->IsInstanceOf()) {
-      x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
-                                 instruction_,
-                                 dex_pc,
-                                 this);
+      x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
+                                    instruction_,
+                                    dex_pc,
+                                    this);
+      CheckEntrypointTypes<
+          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
-      x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
-                                 instruction_,
-                                 dex_pc,
-                                 this);
+      x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
+                                    instruction_,
+                                    dex_pc,
+                                    this);
+      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
     }
 
     if (!is_fatal_) {
       if (instruction_->IsInstanceOf()) {
-        x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
+        x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
       }
 
       RestoreLiveRegisters(codegen, locations);
@@ -363,7 +392,6 @@
   bool IsFatal() const OVERRIDE { return is_fatal_; }
 
  private:
-  HInstruction* const instruction_;
   const bool is_fatal_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
@@ -371,31 +399,29 @@
 
 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
  public:
-  explicit DeoptimizationSlowPathX86_64(HInstruction* instruction)
-      : instruction_(instruction) {}
+  explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
+      : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
-                               deoptimize,
-                               deoptimize->GetDexPc(),
-                               this);
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
 
  private:
-  HInstruction* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
 };
 
 class ArraySetSlowPathX86_64 : public SlowPathCode {
  public:
-  explicit ArraySetSlowPathX86_64(HInstruction* instruction) : instruction_(instruction) {}
+  explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -421,11 +447,12 @@
         nullptr);
     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
 
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
-    x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
     RestoreLiveRegisters(codegen, locations);
     __ jmp(GetExitLabel());
   }
@@ -433,13 +460,297 @@
   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
 
  private:
-  HInstruction* const instruction_;
-
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
 };
 
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
+ public:
+  ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj)
+      : SlowPathCode(instruction), obj_(obj) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg = obj_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    DCHECK_NE(reg, RSP);
+    DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in R0):
+    //
+    //   RDI <- obj
+    //   RAX <- ReadBarrierMark(RDI)
+    //   obj <- RAX
+    //
+    // we just use rX (the register holding `obj`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(reg);
+    // This runtime call does not require a stack map.
+    x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    __ jmp(GetExitLabel());
+  }
+
+ private:
+  const Location obj_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
+};
+
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
+ public:
+  ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
+                                            Location out,
+                                            Location ref,
+                                            Location obj,
+                                            uint32_t offset,
+                                            Location index)
+      : SlowPathCode(instruction),
+        out_(out),
+        ref_(ref),
+        obj_(obj),
+        offset_(offset),
+        index_(index) {
+    DCHECK(kEmitCompilerReadBarrier);
+    // If `obj` is equal to `out` or `ref`, it means the initial
+    // object has been overwritten by (or after) the heap object
+    // reference load to be instrumented, e.g.:
+    //
+    //   __ movl(out, Address(out, offset));
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
+    //
+    // In that case, we have lost the information about the original
+    // object, and the emitted read barrier cannot work properly.
+    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    CpuRegister reg_out = out_.AsRegister<CpuRegister>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    // We may have to change the index's value, but as `index_` is a
+    // constant member (like other "inputs" of this slow path),
+    // introduce a copy of it, `index`.
+    Location index = index_;
+    if (index_.IsValid()) {
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+      if (instruction_->IsArrayGet()) {
+        // Compute real offset and store it in index_.
+        Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
+        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+        if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+          // We are about to change the value of `index_reg` (see the
+          // calls to art::x86_64::X86_64Assembler::shll and
+          // art::x86_64::X86_64Assembler::AddImmediate below), but it
+          // has not been saved by the previous call to
+          // art::SlowPathCode::SaveLiveRegisters, as it is a
+          // callee-save register --
+          // art::SlowPathCode::SaveLiveRegisters does not consider
+          // callee-save registers, as it has been designed with the
+          // assumption that callee-save registers are supposed to be
+          // handled by the called function.  So, as a callee-save
+          // register, `index_reg` _would_ eventually be saved onto
+          // the stack, but it would be too late: we would have
+          // changed its value earlier.  Therefore, we manually save
+          // it here into another freely available register,
+          // `free_reg`, chosen of course among the caller-save
+          // registers (as a callee-save `free_reg` register would
+          // exhibit the same problem).
+          //
+          // Note we could have requested a temporary register from
+          // the register allocator instead; but we prefer not to, as
+          // this is a slow path, and we know we can find a
+          // caller-save register that is available.
+          Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
+          __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
+          index_reg = free_reg;
+          index = Location::RegisterLocation(index_reg);
+        } else {
+          // The initial register stored in `index_` has already been
+          // saved in the call to art::SlowPathCode::SaveLiveRegisters
+          // (as it is not a callee-save register), so we can freely
+          // use it.
+        }
+        // Shifting the index value contained in `index_reg` by the
+        // scale factor (2) cannot overflow in practice, as the
+        // runtime is unable to allocate object arrays with a size
+        // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
+        __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
+        static_assert(
+            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+        __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
+      } else {
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
+        DCHECK(instruction_->GetLocations()->Intrinsified());
+        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+            << instruction_->AsInvoke()->GetIntrinsic();
+        DCHECK_EQ(offset_, 0U);
+        DCHECK(index_.IsRegister());
+      }
+    }
+
+    // We're moving two or three locations to locations that could
+    // overlap, so we need a parallel move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(ref_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    parallel_move.AddMove(obj_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    if (index.IsValid()) {
+      parallel_move.AddMove(index,
+                            Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+                            Primitive::kPrimInt,
+                            nullptr);
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+    } else {
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+      __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
+    }
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<
+        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+    x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE {
+    return "ReadBarrierForHeapReferenceSlowPathX86_64";
+  }
+
+ private:
+  CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+    size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
+    size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
+    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+      if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
+        return static_cast<CpuRegister>(i);
+      }
+    }
+    // We shall never fail to find a free caller-save register, as
+    // there are more than two core caller-save registers on x86-64
+    // (meaning it is possible to find one which is different from
+    // `ref` and `obj`).
+    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+    LOG(FATAL) << "Could not find a free caller-save register";
+    UNREACHABLE();
+  }
+
+  const Location out_;
+  const Location ref_;
+  const Location obj_;
+  const uint32_t offset_;
+  // An additional location containing an index to an array.
+  // Only used for HArrayGet and the UnsafeGetObject &
+  // UnsafeGetObjectVolatile intrinsics.
+  const Location index_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
+ public:
+  ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
+      : SlowPathCode(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
+
+ private:
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
+};
+
 #undef __
-#define __ down_cast<X86_64Assembler*>(GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
 
 inline Condition X86_64IntegerCondition(IfCondition cond) {
   switch (cond) {
@@ -473,19 +784,36 @@
   UNREACHABLE();
 }
 
-void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
-                                                     Location temp) {
-  // All registers are assumed to be correctly set up.
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method ATTRIBUTE_UNUSED) {
+  switch (desired_dispatch_info.code_ptr_location) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
+      return HInvokeStaticOrDirect::DispatchInfo {
+        desired_dispatch_info.method_load_kind,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        desired_dispatch_info.method_load_data,
+        0u
+      };
+    default:
+      return desired_dispatch_info;
+  }
+}
 
+Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                                     Location temp) {
+  // All registers are assumed to be correctly set up.
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
       // temp = thread->string_init_entrypoint
-      __ gs()->movl(temp.AsRegister<CpuRegister>(),
-                    Address::Absolute(invoke->GetStringInitOffset(), true));
+      __ gs()->movq(temp.AsRegister<CpuRegister>(),
+                    Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress()));
@@ -495,16 +823,16 @@
       method_patches_.emplace_back(invoke->GetTargetMethod());
       __ Bind(&method_patches_.back().label);  // Bind the label at the end of the "movl" insn.
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
-                                             invoke->GetDexCacheArrayOffset());
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
       __ movq(temp.AsRegister<CpuRegister>(),
-              Address::Absolute(kDummy32BitOffset, false /* no_rip */));
-      // Bind the label at the end of the "movl" insn.
-      __ Bind(&pc_rel_dex_cache_patches_.back().label);
+              Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
+      // Bind a new fixup label at the end of the "movl" insn.
+      uint32_t offset = invoke->GetDexCacheArrayOffset();
+      __ Bind(NewPcRelativeDexCacheArrayPatch(*invoke->GetTargetMethod().dex_file, offset));
       break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
       CpuRegister reg = temp.AsRegister<CpuRegister>();
       if (current_method.IsRegister()) {
@@ -515,16 +843,24 @@
         method_reg = reg.AsRegister();
         __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
       }
-      // temp = temp->dex_cache_resolved_methods_;
+      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
       __ movq(reg,
               Address(CpuRegister(method_reg),
                       ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue()));
-      // temp = temp[index_in_cache]
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // temp = temp[index_in_cache];
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
       __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
       break;
     }
   }
+  return callee_method;
+}
+
+void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                     Location temp) {
+  // All registers are assumed to be correctly set up.
+  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
@@ -539,13 +875,14 @@
     }
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
-      // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
-      FALLTHROUGH_INTENDED;
+      // Filtered out by GetSupportedInvokeStaticOrDirectDispatch().
+      LOG(FATAL) << "Unsupported";
+      UNREACHABLE();
     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
       // (callee_method + offset_of_quick_compiled_code)()
       __ call(Address(callee_method.AsRegister<CpuRegister>(),
                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                          kX86_64WordSize).SizeValue()));
+                          kX86_64PointerSize).SizeValue()));
       break;
   }
 
@@ -556,51 +893,109 @@
   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
+
   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
-  // temp = object->GetClass();
-  DCHECK(receiver.IsRegister());
-  __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
+  // /* HeapReference<Class> */ temp = receiver->klass_
+  __ movl(temp, Address(CpuRegister(receiver), class_offset));
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   __ movq(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-      kX86_64WordSize).SizeValue()));
+      kX86_64PointerSize).SizeValue()));
+}
+
+void CodeGeneratorX86_64::RecordSimplePatch() {
+  if (GetCompilerOptions().GetIncludePatchInformation()) {
+    simple_patches_.emplace_back();
+    __ Bind(&simple_patches_.back());
+  }
+}
+
+void CodeGeneratorX86_64::RecordStringPatch(HLoadString* load_string) {
+  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
+  __ Bind(&string_patches_.back().label);
+}
+
+void CodeGeneratorX86_64::RecordTypePatch(HLoadClass* load_class) {
+  type_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex());
+  __ Bind(&type_patches_.back().label);
+}
+
+Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
+                                                            uint32_t element_offset) {
+  // Add a patch entry and return the label.
+  pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset);
+  return &pc_relative_dex_cache_patches_.back().label;
 }
 
 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
-      method_patches_.size() + relative_call_patches_.size() + pc_rel_dex_cache_patches_.size();
+      method_patches_.size() +
+      relative_call_patches_.size() +
+      pc_relative_dex_cache_patches_.size() +
+      simple_patches_.size() +
+      string_patches_.size() +
+      type_patches_.size();
   linker_patches->reserve(size);
+  // The label points to the end of the "movl" insn but the literal offset for method
+  // patch needs to point to the embedded constant which occupies the last 4 bytes.
+  constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
   for (const MethodPatchInfo<Label>& info : method_patches_) {
-    // The label points to the end of the "movl" instruction but the literal offset for method
-    // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
-    uint32_t literal_offset = info.label.Position() - 4;
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
     linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
                                                        info.target_method.dex_file,
                                                        info.target_method.dex_method_index));
   }
   for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
-    // The label points to the end of the "call" instruction but the literal offset for method
-    // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
-    uint32_t literal_offset = info.label.Position() - 4;
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
     linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
-  for (const PcRelativeDexCacheAccessInfo& info : pc_rel_dex_cache_patches_) {
-    // The label points to the end of the "mov" instruction but the literal offset for method
-    // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
-    uint32_t literal_offset = info.label.Position() - 4;
+  for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
     linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset,
                                                               &info.target_dex_file,
                                                               info.label.Position(),
                                                               info.element_offset));
   }
+  for (const Label& label : simple_patches_) {
+    uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
+  }
+  for (const StringPatchInfo<Label>& info : string_patches_) {
+    // These are always PC-relative, see GetSupportedLoadStringKind().
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+    linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset,
+                                                               &info.dex_file,
+                                                               info.label.Position(),
+                                                               info.string_index));
+  }
+  for (const TypePatchInfo<Label>& info : type_patches_) {
+    // These are always PC-relative, see GetSupportedLoadClassKind().
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(literal_offset,
+                                                             &info.dex_file,
+                                                             info.label.Position(),
+                                                             info.type_index));
+  }
 }
 
 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
@@ -635,7 +1030,7 @@
                                         HInstruction* instruction,
                                         uint32_t dex_pc,
                                         SlowPathCode* slow_path) {
-  InvokeRuntime(GetThreadOffset<kX86_64WordSize>(entrypoint).Int32Value(),
+  InvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value(),
                 instruction,
                 dex_pc,
                 slow_path);
@@ -646,17 +1041,24 @@
                                         uint32_t dex_pc,
                                         SlowPathCode* slow_path) {
   ValidateInvokeRuntime(instruction, slow_path);
-  __ gs()->call(Address::Absolute(entry_point_offset, true));
+  __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
+void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                              HInstruction* instruction,
+                                                              SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
+}
+
 static constexpr int kNumberOfCpuRegisterPairs = 0;
 // Use a fake return address register to mimic Quick.
 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
-                const X86_64InstructionSetFeatures& isa_features,
-                const CompilerOptions& compiler_options,
-                OptimizingCompilerStats* stats)
+                                         const X86_64InstructionSetFeatures& isa_features,
+                                         const CompilerOptions& compiler_options,
+                                         OptimizingCompilerStats* stats)
       : CodeGenerator(graph,
                       kNumberOfCpuRegisters,
                       kNumberOfFloatRegisters,
@@ -672,62 +1074,31 @@
         location_builder_(graph, this),
         instruction_visitor_(graph, this),
         move_resolver_(graph->GetArena(), this),
+        assembler_(graph->GetArena()),
         isa_features_(isa_features),
         constant_area_start_(0),
         method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
                                                                CodeGeneratorX86_64* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
-Location CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters);
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFloatRegisters);
-      return Location::FpuRegisterLocation(reg);
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  return Location();
-}
-
-void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorX86_64::SetupBlockedRegisters() const {
   // Stack register is always reserved.
   blocked_core_registers_[RSP] = true;
 
   // Block the register used as TMP.
   blocked_core_registers_[TMP] = true;
-
-  if (is_baseline) {
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-    }
-    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
-    }
-  }
 }
 
 static dwarf::Reg DWARFReg(Register reg) {
@@ -817,59 +1188,48 @@
   __ Bind(GetLabelOf(block));
 }
 
-Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const {
-  switch (load->GetType()) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << load->GetType();
-      UNREACHABLE();
-  }
-
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
-}
-
 void CodeGeneratorX86_64::Move(Location destination, Location source) {
   if (source.Equals(destination)) {
     return;
   }
   if (destination.IsRegister()) {
+    CpuRegister dest = destination.AsRegister<CpuRegister>();
     if (source.IsRegister()) {
-      __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
+      __ movq(dest, source.AsRegister<CpuRegister>());
     } else if (source.IsFpuRegister()) {
-      __ movd(destination.AsRegister<CpuRegister>(), source.AsFpuRegister<XmmRegister>());
+      __ movd(dest, source.AsFpuRegister<XmmRegister>());
     } else if (source.IsStackSlot()) {
-      __ movl(destination.AsRegister<CpuRegister>(),
-              Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else if (source.IsConstant()) {
+      HConstant* constant = source.GetConstant();
+      if (constant->IsLongConstant()) {
+        Load64BitValue(dest, constant->AsLongConstant()->GetValue());
+      } else {
+        Load32BitValue(dest, GetInt32ValueOf(constant));
+      }
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ movq(destination.AsRegister<CpuRegister>(),
-              Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
     }
   } else if (destination.IsFpuRegister()) {
+    XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
     if (source.IsRegister()) {
-      __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<CpuRegister>());
+      __ movd(dest, source.AsRegister<CpuRegister>());
     } else if (source.IsFpuRegister()) {
-      __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
+      __ movaps(dest, source.AsFpuRegister<XmmRegister>());
+    } else if (source.IsConstant()) {
+      HConstant* constant = source.GetConstant();
+      int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+      if (constant->IsFloatConstant()) {
+        Load32BitValue(dest, static_cast<int32_t>(value));
+      } else {
+        Load64BitValue(dest, value);
+      }
     } else if (source.IsStackSlot()) {
-      __ movss(destination.AsFpuRegister<XmmRegister>(),
-              Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ movsd(destination.AsFpuRegister<XmmRegister>(),
-               Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
     }
   } else if (destination.IsStackSlot()) {
     if (source.IsRegister()) {
@@ -913,82 +1273,6 @@
   }
 }
 
-void CodeGeneratorX86_64::Move(HInstruction* instruction,
-                               Location location,
-                               HInstruction* move_for) {
-  LocationSummary* locations = instruction->GetLocations();
-  if (instruction->IsCurrentMethod()) {
-    Move(location, Location::DoubleStackSlot(kCurrentMethodStackOffset));
-  } else if (locations != nullptr && locations->Out().Equals(location)) {
-    return;
-  } else if (locations != nullptr && locations->Out().IsConstant()) {
-    HConstant* const_to_move = locations->Out().GetConstant();
-    if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
-      Immediate imm(GetInt32ValueOf(const_to_move));
-      if (location.IsRegister()) {
-        __ movl(location.AsRegister<CpuRegister>(), imm);
-      } else if (location.IsStackSlot()) {
-        __ movl(Address(CpuRegister(RSP), location.GetStackIndex()), imm);
-      } else {
-        DCHECK(location.IsConstant());
-        DCHECK_EQ(location.GetConstant(), const_to_move);
-      }
-    } else if (const_to_move->IsLongConstant()) {
-      int64_t value = const_to_move->AsLongConstant()->GetValue();
-      if (location.IsRegister()) {
-        Load64BitValue(location.AsRegister<CpuRegister>(), value);
-      } else if (location.IsDoubleStackSlot()) {
-        Store64BitValueToStack(location, value);
-      } else {
-        DCHECK(location.IsConstant());
-        DCHECK_EQ(location.GetConstant(), const_to_move);
-      }
-    }
-  } else if (instruction->IsLoadLocal()) {
-    switch (instruction->GetType()) {
-      case Primitive::kPrimBoolean:
-      case Primitive::kPrimByte:
-      case Primitive::kPrimChar:
-      case Primitive::kPrimShort:
-      case Primitive::kPrimInt:
-      case Primitive::kPrimNot:
-      case Primitive::kPrimFloat:
-        Move(location, Location::StackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
-        break;
-
-      case Primitive::kPrimLong:
-      case Primitive::kPrimDouble:
-        Move(location,
-             Location::DoubleStackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
-        break;
-
-      default:
-        LOG(FATAL) << "Unexpected local type " << instruction->GetType();
-    }
-  } else if (instruction->IsTemporary()) {
-    Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    Move(location, temp_location);
-  } else {
-    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
-    switch (instruction->GetType()) {
-      case Primitive::kPrimBoolean:
-      case Primitive::kPrimByte:
-      case Primitive::kPrimChar:
-      case Primitive::kPrimShort:
-      case Primitive::kPrimInt:
-      case Primitive::kPrimNot:
-      case Primitive::kPrimLong:
-      case Primitive::kPrimFloat:
-      case Primitive::kPrimDouble:
-        Move(location, locations->Out());
-        break;
-
-      default:
-        LOG(FATAL) << "Unexpected type " << instruction->GetType();
-    }
-  }
-}
-
 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
   DCHECK(location.IsRegister());
   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
@@ -1053,9 +1337,10 @@
 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
+template<class LabelType>
 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
-                                                     Label* true_label,
-                                                     Label* false_label) {
+                                                     LabelType* true_label,
+                                                     LabelType* false_label) {
   if (cond->IsFPConditionTrueIfNaN()) {
     __ j(kUnordered, true_label);
   } else if (cond->IsFPConditionFalseIfNaN()) {
@@ -1064,48 +1349,44 @@
   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
 }
 
-void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HIf* if_instr,
-                                                                  HCondition* condition,
-                                                                  Label* true_target,
-                                                                  Label* false_target,
-                                                                  Label* always_true_target) {
+void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
   LocationSummary* locations = condition->GetLocations();
+
   Location left = locations->InAt(0);
   Location right = locations->InAt(1);
-
-  // We don't want true_target as a nullptr.
-  if (true_target == nullptr) {
-    true_target = always_true_target;
-  }
-  bool falls_through = (false_target == nullptr);
-
-  // FP compares don't like null false_targets.
-  if (false_target == nullptr) {
-    false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  }
-
   Primitive::Type type = condition->InputAt(0)->GetType();
   switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      CpuRegister left_reg = left.AsRegister<CpuRegister>();
+      if (right.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(right.GetConstant());
+        if (value == 0) {
+          __ testl(left_reg, left_reg);
+        } else {
+          __ cmpl(left_reg, Immediate(value));
+        }
+      } else if (right.IsStackSlot()) {
+        __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
+      } else {
+        __ cmpl(left_reg, right.AsRegister<CpuRegister>());
+      }
+      break;
+    }
     case Primitive::kPrimLong: {
       CpuRegister left_reg = left.AsRegister<CpuRegister>();
       if (right.IsConstant()) {
         int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
-        if (IsInt<32>(value)) {
-          if (value == 0) {
-            __ testq(left_reg, left_reg);
-          } else {
-            __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
-          }
-        } else {
-          // Value won't fit in a 32-bit integer.
-          __ cmpq(left_reg, codegen_->LiteralInt64Address(value));
-        }
+        codegen_->Compare64BitValue(left_reg, value);
       } else if (right.IsDoubleStackSlot()) {
         __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
       } else {
         __ cmpq(left_reg, right.AsRegister<CpuRegister>());
       }
-      __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
       break;
     }
     case Primitive::kPrimFloat: {
@@ -1120,7 +1401,6 @@
         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
                    Address(CpuRegister(RSP), right.GetStackIndex()));
       }
-      GenerateFPJumps(condition, true_target, false_target);
       break;
     }
     case Primitive::kPrimDouble: {
@@ -1135,6 +1415,38 @@
         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
                    Address(CpuRegister(RSP), right.GetStackIndex()));
       }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected condition type " << type;
+  }
+}
+
+template<class LabelType>
+void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
+                                                                  LabelType* true_target_in,
+                                                                  LabelType* false_target_in) {
+  // Generated branching requires both targets to be explicit. If either of the
+  // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
+  LabelType fallthrough_target;
+  LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
+  LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
+
+  // Generate the comparison to set the CC.
+  GenerateCompareTest(condition);
+
+  // Now generate the correct jump(s).
+  Primitive::Type type = condition->InputAt(0)->GetType();
+  switch (type) {
+    case Primitive::kPrimLong: {
+      __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
+      break;
+    }
+    case Primitive::kPrimFloat: {
+      GenerateFPJumps(condition, true_target, false_target);
+      break;
+    }
+    case Primitive::kPrimDouble: {
       GenerateFPJumps(condition, true_target, false_target);
       break;
     }
@@ -1142,173 +1454,259 @@
       LOG(FATAL) << "Unexpected condition type " << type;
   }
 
-  if (!falls_through) {
+  if (false_target != &fallthrough_target) {
     __ jmp(false_target);
   }
+
+  if (fallthrough_target.IsLinked()) {
+    __ Bind(&fallthrough_target);
+  }
 }
 
-void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
-                                                           Label* true_target,
-                                                           Label* false_target,
-                                                           Label* always_true_target) {
-  HInstruction* cond = instruction->InputAt(0);
-  if (cond->IsIntConstant()) {
-    // Constant condition, statically compared against 1.
-    int32_t cond_value = cond->AsIntConstant()->GetValue();
-    if (cond_value == 1) {
-      if (always_true_target != nullptr) {
-        __ jmp(always_true_target);
-      }
-      return;
-    } else {
-      DCHECK_EQ(cond_value, 0);
-    }
-  } else {
-    bool is_materialized =
-        !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization();
-    // Moves do not affect the eflags register, so if the condition is
-    // evaluated just before the if, we don't need to evaluate it
-    // again.  We can't use the eflags on FP conditions if they are
-    // materialized due to the complex branching.
-    Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
-    bool eflags_set = cond->IsCondition()
-        && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction)
-        && !Primitive::IsFloatingPointType(type);
+static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
+  // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
+  // are set only strictly before `branch`. We can't use the eflags on long
+  // conditions if they are materialized due to the complex branching.
+  return cond->IsCondition() &&
+         cond->GetNext() == branch &&
+         !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
+}
 
-    if (is_materialized) {
-      if (!eflags_set) {
-        // Materialized condition, compare against 0.
-        Location lhs = instruction->GetLocations()->InAt(0);
-        if (lhs.IsRegister()) {
-          __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
-        } else {
-          __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()),
-                  Immediate(0));
-        }
-        __ j(kNotEqual, true_target);
+template<class LabelType>
+void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
+                                                           size_t condition_input_index,
+                                                           LabelType* true_target,
+                                                           LabelType* false_target) {
+  HInstruction* cond = instruction->InputAt(condition_input_index);
+
+  if (true_target == nullptr && false_target == nullptr) {
+    // Nothing to do. The code always falls through.
+    return;
+  } else if (cond->IsIntConstant()) {
+    // Constant condition, statically compared against "true" (integer value 1).
+    if (cond->AsIntConstant()->IsTrue()) {
+      if (true_target != nullptr) {
+        __ jmp(true_target);
+      }
+    } else {
+      DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
+      if (false_target != nullptr) {
+        __ jmp(false_target);
+      }
+    }
+    return;
+  }
+
+  // The following code generates these patterns:
+  //  (1) true_target == nullptr && false_target != nullptr
+  //        - opposite condition true => branch to false_target
+  //  (2) true_target != nullptr && false_target == nullptr
+  //        - condition true => branch to true_target
+  //  (3) true_target != nullptr && false_target != nullptr
+  //        - condition true => branch to true_target
+  //        - branch to false_target
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
+    if (AreEflagsSetFrom(cond, instruction)) {
+      if (true_target == nullptr) {
+        __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
       } else {
         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
       }
     } else {
-      // Condition has not been materialized, use its inputs as the
-      // comparison and its condition as the branch condition.
-
-      // Is this a long or FP comparison that has been folded into the HCondition?
-      if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
-        // Generate the comparison directly.
-        GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(),
-                                     true_target, false_target, always_true_target);
-        return;
-      }
-
-      Location lhs = cond->GetLocations()->InAt(0);
-      Location rhs = cond->GetLocations()->InAt(1);
-      if (rhs.IsRegister()) {
-        __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
-      } else if (rhs.IsConstant()) {
-        int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-        if (constant == 0) {
-          __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
-        } else {
-          __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
-        }
+      // Materialized condition, compare against 0.
+      Location lhs = instruction->GetLocations()->InAt(condition_input_index);
+      if (lhs.IsRegister()) {
+        __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
       } else {
-        __ cmpl(lhs.AsRegister<CpuRegister>(),
-                Address(CpuRegister(RSP), rhs.GetStackIndex()));
+        __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
       }
-      __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
+      if (true_target == nullptr) {
+        __ j(kEqual, false_target);
+      } else {
+        __ j(kNotEqual, true_target);
+      }
+    }
+  } else {
+    // Condition has not been materialized, use its inputs as the
+    // comparison and its condition as the branch condition.
+    HCondition* condition = cond->AsCondition();
+
+    // If this is a long or FP comparison that has been folded into
+    // the HCondition, generate the comparison directly.
+    Primitive::Type type = condition->InputAt(0)->GetType();
+    if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
+      GenerateCompareTestAndBranch(condition, true_target, false_target);
+      return;
+    }
+
+    Location lhs = condition->GetLocations()->InAt(0);
+    Location rhs = condition->GetLocations()->InAt(1);
+    if (rhs.IsRegister()) {
+      __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
+    } else if (rhs.IsConstant()) {
+      int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+      codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
+    } else {
+      __ cmpl(lhs.AsRegister<CpuRegister>(),
+              Address(CpuRegister(RSP), rhs.GetStackIndex()));
+    }
+      if (true_target == nullptr) {
+      __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
+    } else {
+      __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
     }
   }
-  if (false_target != nullptr) {
+
+  // If neither branch falls through (case 3), the conditional branch to `true_target`
+  // was already emitted (case 2) and we need to emit a jump to `false_target`.
+  if (true_target != nullptr && false_target != nullptr) {
     __ jmp(false_target);
   }
 }
 
 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
-  HInstruction* cond = if_instr->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+  if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
     locations->SetInAt(0, Location::Any());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
-  Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
-  Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  Label* always_true_target = true_target;
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfTrueSuccessor())) {
-    always_true_target = nullptr;
-  }
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfFalseSuccessor())) {
-    false_target = nullptr;
-  }
-  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+  HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+  HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+  Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+      nullptr : codegen_->GetLabelOf(true_successor);
+  Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+      nullptr : codegen_->GetLabelOf(false_successor);
+  GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
 
 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  HInstruction* cond = deoptimize->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::Any());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathX86_64(deoptimize);
-  codegen_->AddSlowPath(slow_path);
-  Label* slow_path_entry = slow_path->GetEntryLabel();
-  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+  SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
+  GenerateTestAndBranch<Label>(deoptimize,
+                               /* condition_input_index */ 0,
+                               slow_path->GetEntryLabel(),
+                               /* false_target */ nullptr);
 }
 
-void LocationsBuilderX86_64::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
+static bool SelectCanUseCMOV(HSelect* select) {
+  // There are no conditional move instructions for XMMs.
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    return false;
+  }
+
+  // A FP condition doesn't generate the single CC that we need.
+  HInstruction* condition = select->GetCondition();
+  if (condition->IsCondition() &&
+      Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) {
+    return false;
+  }
+
+  // We can generate a CMOV for this Select.
+  return true;
 }
 
-void InstructionCodeGeneratorX86_64::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
+void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::Any());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+    if (SelectCanUseCMOV(select)) {
+      if (select->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresRegister());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
+    } else {
+      locations->SetInAt(1, Location::Any());
+    }
+  }
+  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::SameAsFirstInput());
 }
 
-void LocationsBuilderX86_64::VisitLoadLocal(HLoadLocal* local) {
-  local->SetLocations(nullptr);
-}
+void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
+  LocationSummary* locations = select->GetLocations();
+  if (SelectCanUseCMOV(select)) {
+    // If both the condition and the source types are integer, we can generate
+    // a CMOV to implement Select.
+    CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
+    Location value_true_loc = locations->InAt(1);
+    DCHECK(locations->InAt(0).Equals(locations->Out()));
 
-void InstructionCodeGeneratorX86_64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
+    HInstruction* select_condition = select->GetCondition();
+    Condition cond = kNotEqual;
 
-void LocationsBuilderX86_64::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(store, LocationSummary::kNoCall);
-  switch (store->InputAt(1)->GetType()) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
+    // Figure out how to test the 'condition'.
+    if (select_condition->IsCondition()) {
+      HCondition* condition = select_condition->AsCondition();
+      if (!condition->IsEmittedAtUseSite()) {
+        // This was a previously materialized condition.
+        // Can we use the existing condition code?
+        if (AreEflagsSetFrom(condition, select)) {
+          // Materialization was the previous instruction.  Condition codes are right.
+          cond = X86_64IntegerCondition(condition->GetCondition());
+        } else {
+          // No, we have to recreate the condition code.
+          CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
+          __ testl(cond_reg, cond_reg);
+        }
+      } else {
+        GenerateCompareTest(condition);
+        cond = X86_64IntegerCondition(condition->GetCondition());
+      }
+    } else {
+      // Must be a boolean condition, which needs to be compared to 0.
+      CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
+      __ testl(cond_reg, cond_reg);
+    }
 
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unexpected local type " << store->InputAt(1)->GetType();
+    // If the condition is true, overwrite the output, which already contains false.
+    // Generate the correct sized CMOV.
+    bool is_64_bit = Primitive::Is64BitType(select->GetType());
+    if (value_true_loc.IsRegister()) {
+      __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
+    } else {
+      __ cmov(cond,
+              value_false,
+              Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
+    }
+  } else {
+    NearLabel false_target;
+    GenerateTestAndBranch<NearLabel>(select,
+                                     /* condition_input_index */ 2,
+                                     /* true_target */ nullptr,
+                                     &false_target);
+    codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+    __ Bind(&false_target);
   }
 }
 
-void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
 }
 
-void LocationsBuilderX86_64::VisitCondition(HCondition* cond) {
+void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
+  // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+}
+
+void CodeGeneratorX86_64::GenerateNop() {
+  __ nop();
+}
+
+void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
   // Handle the long/FP comparisons made in instruction simplification.
@@ -1327,13 +1725,13 @@
       locations->SetInAt(1, Location::Any());
       break;
   }
-  if (cond->NeedsMaterialization()) {
+  if (!cond->IsEmittedAtUseSite()) {
     locations->SetOut(Location::RequiresRegister());
   }
 }
 
-void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* cond) {
-  if (!cond->NeedsMaterialization()) {
+void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
+  if (cond->IsEmittedAtUseSite()) {
     return;
   }
 
@@ -1341,7 +1739,7 @@
   Location lhs = locations->InAt(0);
   Location rhs = locations->InAt(1);
   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
-  Label true_label, false_label;
+  NearLabel true_label, false_label;
 
   switch (cond->InputAt(0)->GetType()) {
     default:
@@ -1354,11 +1752,7 @@
         __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
       } else if (rhs.IsConstant()) {
         int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-        if (constant == 0) {
-          __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
-        } else {
-          __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
-        }
+        codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
       } else {
         __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
       }
@@ -1372,16 +1766,7 @@
         __ cmpq(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
       } else if (rhs.IsConstant()) {
         int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
-        if (IsInt<32>(value)) {
-          if (value == 0) {
-            __ testq(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
-          } else {
-            __ cmpq(lhs.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
-          }
-        } else {
-          // Value won't fit in an int.
-          __ cmpq(lhs.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
-        }
+        codegen_->Compare64BitValue(lhs.AsRegister<CpuRegister>(), value);
       } else {
         __ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
       }
@@ -1430,89 +1815,94 @@
 }
 
 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::Any());
@@ -1539,21 +1929,30 @@
 
   NearLabel less, greater, done;
   Primitive::Type type = compare->InputAt(0)->GetType();
+  Condition less_cond = kLess;
+
   switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
+      CpuRegister left_reg = left.AsRegister<CpuRegister>();
+      if (right.IsConstant()) {
+        int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
+        codegen_->Compare32BitValue(left_reg, value);
+      } else if (right.IsStackSlot()) {
+        __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
+      } else {
+        __ cmpl(left_reg, right.AsRegister<CpuRegister>());
+      }
+      break;
+    }
     case Primitive::kPrimLong: {
       CpuRegister left_reg = left.AsRegister<CpuRegister>();
       if (right.IsConstant()) {
         int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
-        if (IsInt<32>(value)) {
-          if (value == 0) {
-            __ testq(left_reg, left_reg);
-          } else {
-            __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
-          }
-        } else {
-          // Value won't fit in an int.
-          __ cmpq(left_reg, codegen_->LiteralInt64Address(value));
-        }
+        codegen_->Compare64BitValue(left_reg, value);
       } else if (right.IsDoubleStackSlot()) {
         __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
       } else {
@@ -1572,6 +1971,7 @@
         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
       }
       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+      less_cond = kBelow;  //  ucomis{s,d} sets CF
       break;
     }
     case Primitive::kPrimDouble: {
@@ -1585,14 +1985,16 @@
         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
       }
       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+      less_cond = kBelow;  //  ucomis{s,d} sets CF
       break;
     }
     default:
       LOG(FATAL) << "Unexpected compare type " << type;
   }
+
   __ movl(out, Immediate(0));
   __ j(kEqual, &done);
-  __ j(type == Primitive::kPrimLong ? kLess : kBelow, &less);  //  ucomis{s,d} sets CF (kBelow)
+  __ j(less_cond, &less);
 
   __ Bind(&greater);
   __ movl(out, Immediate(1));
@@ -1660,7 +2062,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
-  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
 }
 
 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
@@ -1800,7 +2202,7 @@
       LOG(FATAL) << "Unexpected parameter type " << type;
       break;
   }
-  return Location();
+  return Location::NoLocation();
 }
 
 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
@@ -1815,9 +2217,9 @@
 }
 
 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -1837,9 +2239,9 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -1871,7 +2273,6 @@
   }
 
   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
-
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
@@ -1884,31 +2285,45 @@
 
 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
-  CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
   LocationSummary* locations = invoke->GetLocations();
+  CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
   Location receiver = locations->InAt(0);
   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
 
-  // Set the hidden argument.
-  CpuRegister hidden_reg = invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>();
+  // Set the hidden argument. This is safe to do this here, as RAX
+  // won't be modified thereafter, before the `call` instruction.
+  DCHECK_EQ(RAX, hidden_reg.AsRegister());
   codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
 
-  // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
+    // /* HeapReference<Class> */ temp = temp->klass_
     __ movl(temp, Address(temp, class_offset));
   } else {
+    // /* HeapReference<Class> */ temp = receiver->klass_
     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
+  // temp = temp->GetAddressOfIMT()
+  __ movq(temp,
+      Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
+  // temp = temp->GetImtEntryAt(method_offset);
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex(), kX86_64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ movq(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
-  __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-      kX86_64WordSize).SizeValue()));
+  __ call(Address(
+      temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
 
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1993,6 +2408,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to byte is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -2011,6 +2428,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2088,6 +2507,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to char is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2182,6 +2603,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to byte is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -2190,13 +2613,12 @@
           // Processing a Dex `int-to-byte' instruction.
           if (in.IsRegister()) {
             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
-          } else if (in.IsStackSlot()) {
+          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
             __ movsxb(out.AsRegister<CpuRegister>(),
                       Address(CpuRegister(RSP), in.GetStackIndex()));
           } else {
-            DCHECK(in.GetConstant()->IsIntConstant());
             __ movl(out.AsRegister<CpuRegister>(),
-                    Immediate(static_cast<int8_t>(in.GetConstant()->AsIntConstant()->GetValue())));
+                    Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
           }
           break;
 
@@ -2208,6 +2630,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2216,13 +2640,12 @@
           // Processing a Dex `int-to-short' instruction.
           if (in.IsRegister()) {
             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
-          } else if (in.IsStackSlot()) {
+          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
             __ movsxw(out.AsRegister<CpuRegister>(),
                       Address(CpuRegister(RSP), in.GetStackIndex()));
           } else {
-            DCHECK(in.GetConstant()->IsIntConstant());
             __ movl(out.AsRegister<CpuRegister>(),
-                    Immediate(static_cast<int16_t>(in.GetConstant()->AsIntConstant()->GetValue())));
+                    Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
           }
           break;
 
@@ -2365,6 +2788,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to char is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2373,13 +2798,12 @@
           // Processing a Dex `int-to-char' instruction.
           if (in.IsRegister()) {
             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
-          } else if (in.IsStackSlot()) {
+          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
             __ movzxw(out.AsRegister<CpuRegister>(),
                       Address(CpuRegister(RSP), in.GetStackIndex()));
           } else {
-            DCHECK(in.GetConstant()->IsIntConstant());
             __ movl(out.AsRegister<CpuRegister>(),
-                    Immediate(static_cast<uint16_t>(in.GetConstant()->AsIntConstant()->GetValue())));
+                    Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
           }
           break;
 
@@ -2403,11 +2827,7 @@
           } else if (in.IsConstant()) {
             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (v == 0) {
-              __ xorps(dest, dest);
-            } else {
-              __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
-            }
+            codegen_->Load32BitValue(dest, static_cast<float>(v));
           } else {
             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
@@ -2421,11 +2841,7 @@
           } else if (in.IsConstant()) {
             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (v == 0) {
-              __ xorps(dest, dest);
-            } else {
-              __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
-            }
+            codegen_->Load32BitValue(dest, static_cast<float>(v));
           } else {
             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
@@ -2439,11 +2855,7 @@
           } else if (in.IsConstant()) {
             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (bit_cast<int64_t, double>(v) == 0) {
-              __ xorps(dest, dest);
-            } else {
-              __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
-            }
+            codegen_->Load32BitValue(dest, static_cast<float>(v));
           } else {
             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()));
@@ -2470,11 +2882,7 @@
           } else if (in.IsConstant()) {
             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (v == 0) {
-              __ xorpd(dest, dest);
-            } else {
-              __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
-            }
+            codegen_->Load64BitValue(dest, static_cast<double>(v));
           } else {
             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
@@ -2488,11 +2896,7 @@
           } else if (in.IsConstant()) {
             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (v == 0) {
-              __ xorpd(dest, dest);
-            } else {
-              __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
-            }
+            codegen_->Load64BitValue(dest, static_cast<double>(v));
           } else {
             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
@@ -2506,11 +2910,7 @@
           } else if (in.IsConstant()) {
             float v = in.GetConstant()->AsFloatConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (bit_cast<int32_t, float>(v) == 0) {
-              __ xorpd(dest, dest);
-            } else {
-              __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
-            }
+            codegen_->Load64BitValue(dest, static_cast<double>(v));
           } else {
             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()));
@@ -2543,7 +2943,7 @@
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       // We can use a leaq or addq if the constant can fit in an immediate.
-      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(add->InputAt(1)));
+      locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -2623,7 +3023,8 @@
         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ addss(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+                 codegen_->LiteralFloatAddress(
+                     second.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(second.IsStackSlot());
         __ addss(first.AsFpuRegister<XmmRegister>(),
@@ -2637,7 +3038,8 @@
         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ addsd(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+                 codegen_->LiteralDoubleAddress(
+                     second.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(second.IsDoubleStackSlot());
         __ addsd(first.AsFpuRegister<XmmRegister>(),
@@ -2663,7 +3065,7 @@
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(sub->InputAt(1)));
+      locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2712,7 +3114,8 @@
         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ subss(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+                 codegen_->LiteralFloatAddress(
+                     second.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(second.IsStackSlot());
         __ subss(first.AsFpuRegister<XmmRegister>(),
@@ -2726,7 +3129,8 @@
         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ subsd(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+                 codegen_->LiteralDoubleAddress(
+                     second.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(second.IsDoubleStackSlot());
         __ subsd(first.AsFpuRegister<XmmRegister>(),
@@ -2833,7 +3237,8 @@
         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ mulss(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+                 codegen_->LiteralFloatAddress(
+                     second.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(second.IsStackSlot());
         __ mulss(first.AsFpuRegister<XmmRegister>(),
@@ -2848,7 +3253,8 @@
         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ mulsd(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+                 codegen_->LiteralDoubleAddress(
+                     second.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(second.IsDoubleStackSlot());
         __ mulsd(first.AsFpuRegister<XmmRegister>(),
@@ -2988,13 +3394,13 @@
   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
 
   int64_t imm = Int64FromConstant(second.GetConstant());
-
-  DCHECK(IsPowerOfTwo(std::abs(imm)));
+  DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+  uint64_t abs_imm = AbsOrMin(imm);
 
   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
 
   if (instruction->GetResultType() == Primitive::kPrimInt) {
-    __ leal(tmp, Address(numerator, std::abs(imm) - 1));
+    __ leal(tmp, Address(numerator, abs_imm - 1));
     __ testl(numerator, numerator);
     __ cmov(kGreaterEqual, tmp, numerator);
     int shift = CTZ(imm);
@@ -3009,7 +3415,7 @@
     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
 
-    codegen_->Load64BitValue(rdx, std::abs(imm) - 1);
+    codegen_->Load64BitValue(rdx, abs_imm - 1);
     __ addq(rdx, numerator);
     __ testq(numerator, numerator);
     __ cmov(kGreaterEqual, rdx, numerator);
@@ -3056,16 +3462,6 @@
 
     __ movl(numerator, eax);
 
-    NearLabel no_div;
-    NearLabel end;
-    __ testl(eax, eax);
-    __ j(kNotEqual, &no_div);
-
-    __ xorl(out, out);
-    __ jmp(&end);
-
-    __ Bind(&no_div);
-
     __ movl(eax, Immediate(magic));
     __ imull(numerator);
 
@@ -3091,7 +3487,6 @@
     } else {
       __ movl(eax, edx);
     }
-    __ Bind(&end);
   } else {
     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
 
@@ -3167,7 +3562,7 @@
       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
     } else if (imm == 1 || imm == -1) {
       DivRemOneOrMinusOne(instruction);
-    } else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) {
+    } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
       DivByPowerOfTwo(instruction->AsDiv());
     } else {
       DCHECK(imm <= -2 || imm >= 2);
@@ -3176,7 +3571,7 @@
   } else {
     SlowPathCode* slow_path =
         new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
-            out.AsRegister(), type, is_div);
+            instruction, out.AsRegister(), type, is_div);
     codegen_->AddSlowPath(slow_path);
 
     CpuRegister second_reg = second.AsRegister<CpuRegister>();
@@ -3254,7 +3649,8 @@
         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ divss(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+                 codegen_->LiteralFloatAddress(
+                     second.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(second.IsStackSlot());
         __ divss(first.AsFpuRegister<XmmRegister>(),
@@ -3268,7 +3664,8 @@
         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ divsd(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+                 codegen_->LiteralDoubleAddress(
+                     second.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(second.IsDoubleStackSlot());
         __ divsd(first.AsFpuRegister<XmmRegister>(),
@@ -3355,6 +3752,7 @@
   Location value = locations->InAt(0);
 
   switch (instruction->GetType()) {
+    case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
@@ -3432,7 +3830,7 @@
           __ shrl(first_reg, second_reg);
         }
       } else {
-        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
         if (op->IsShl()) {
           __ shll(first_reg, imm);
         } else if (op->IsShr()) {
@@ -3454,7 +3852,7 @@
           __ shrq(first_reg, second_reg);
         }
       } else {
-        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue);
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
         if (op->IsShl()) {
           __ shlq(first_reg, imm);
         } else if (op->IsShr()) {
@@ -3467,6 +3865,56 @@
     }
     default:
       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitRor(HRor* ror) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL (unless it is a constant).
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
+  LocationSummary* locations = ror->GetLocations();
+  CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
+  Location second = locations->InAt(1);
+
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimInt:
+      if (second.IsRegister()) {
+        CpuRegister second_reg = second.AsRegister<CpuRegister>();
+        __ rorl(first_reg, second_reg);
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
+        __ rorl(first_reg, imm);
+      }
+      break;
+    case Primitive::kPrimLong:
+      if (second.IsRegister()) {
+        CpuRegister second_reg = second.AsRegister<CpuRegister>();
+        __ rorq(first_reg, second_reg);
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
+        __ rorq(first_reg, imm);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+      UNREACHABLE();
   }
 }
 
@@ -3496,31 +3944,40 @@
 
 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
+  } else {
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  }
   locations->SetOut(Location::RegisterLocation(RAX));
 }
 
 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
-  InvokeRuntimeCallingConvention calling_convention;
-  codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
-                           instruction->GetTypeIndex());
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
-
-  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
-                          instruction,
-                          instruction->GetDexPc(),
-                          nullptr);
-
-  DCHECK(!codegen_->IsLeafMethod());
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize);
+    __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
+    __ call(Address(temp, code_offset.SizeValue()));
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr);
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+    DCHECK(!codegen_->IsLeafMethod());
+  }
 }
 
 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(RAX));
@@ -3532,13 +3989,13 @@
   InvokeRuntimeCallingConvention calling_convention;
   codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
                            instruction->GetTypeIndex());
-
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
 
   DCHECK(!codegen_->IsLeafMethod());
 }
@@ -3571,6 +4028,31 @@
   // Nothing to do, the method is already at its location.
 }
 
+void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kX86_64PointerSize).SizeValue();
+    __ movq(locations->Out().AsRegister<CpuRegister>(),
+            Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
+  } else {
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex(), kX86_64PointerSize));
+    __ movq(locations->Out().AsRegister<CpuRegister>(),
+            Address(locations->InAt(0).AsRegister<CpuRegister>(),
+            mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
+    __ movq(locations->Out().AsRegister<CpuRegister>(),
+            Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
+  }
+}
+
 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
@@ -3615,7 +4097,7 @@
 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
@@ -3625,15 +4107,15 @@
   LOG(FATAL) << "Unimplemented";
 }
 
-void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
+void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
   /*
    * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
-   * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
+   * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
    */
   switch (kind) {
     case MemBarrierKind::kAnyAny: {
-      __ mfence();
+      MemoryFence();
       break;
     }
     case MemBarrierKind::kAnyStore:
@@ -3642,21 +4124,38 @@
       // nop
       break;
     }
-    default:
-      LOG(FATAL) << "Unexpected memory barier " << kind;
+    case MemBarrierKind::kNTStoreStore:
+      // Non-Temporal Store/Store needs an explicit fence.
+      MemoryFence(/* non-temporal */ true);
+      break;
   }
 }
 
 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
 
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_field_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister());
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps for an object field get when read barriers
+    // are enabled: we do not want the move to overwrite the object's
+    // location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+  }
+  if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
@@ -3665,7 +4164,8 @@
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
 
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
+  Location base_loc = locations->InAt(0);
+  CpuRegister base = base_loc.AsRegister<CpuRegister>();
   Location out = locations->Out();
   bool is_volatile = field_info.IsVolatile();
   Primitive::Type field_type = field_info.GetFieldType();
@@ -3692,12 +4192,36 @@
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
+    case Primitive::kPrimInt: {
       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
       break;
     }
 
+    case Primitive::kPrimNot: {
+      // /* HeapReference<Object> */ out = *(base + offset)
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp_loc = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+      } else {
+        __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+        // If read barriers are enabled, emit read barriers other than
+        // Baker's using a slow path (and also unpoison the loaded
+        // reference, if heap poisoning is enabled).
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
       __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
       break;
@@ -3718,14 +4242,20 @@
       UNREACHABLE();
   }
 
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
-
-  if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+  if (field_type == Primitive::kPrimNot) {
+    // Potential implicit null checks, in the case of reference
+    // fields, are handled in the previous switch statement.
+  } else {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
-  if (field_type == Primitive::kPrimNot) {
-    __ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>());
+  if (is_volatile) {
+    if (field_type == Primitive::kPrimNot) {
+      // Memory barriers, in the case of references, are also handled
+      // in the previous switch statement.
+    } else {
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+    }
   }
 }
 
@@ -3736,14 +4266,25 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   Primitive::Type field_type = field_info.GetFieldType();
+  bool is_volatile = field_info.IsVolatile();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
 
   locations->SetInAt(0, Location::RequiresRegister());
   if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
-    locations->SetInAt(1, Location::RequiresFpuRegister());
+    if (is_volatile) {
+      // In order to satisfy the semantics of volatile, this must be a single instruction store.
+      locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
+    } else {
+      locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
+    }
   } else {
-    locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1)));
+    if (is_volatile) {
+      // In order to satisfy the semantics of volatile, this must be a single instruction store.
+      locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
+    } else {
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+    }
   }
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
@@ -3768,14 +4309,16 @@
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
+  bool maybe_record_implicit_null_check_done = false;
+
   switch (field_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
       if (value.IsConstant()) {
-        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
         __ movb(Address(base, offset), Immediate(v));
       } else {
         __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
@@ -3786,7 +4329,7 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
       if (value.IsConstant()) {
-        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
         __ movw(Address(base, offset), Immediate(v));
       } else {
         __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
@@ -3819,9 +4362,11 @@
     case Primitive::kPrimLong: {
       if (value.IsConstant()) {
         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
-        DCHECK(IsInt<32>(v));
-        int32_t v_32 = v;
-        __ movq(Address(base, offset), Immediate(v_32));
+        codegen_->MoveInt64ToAddress(Address(base, offset),
+                                     Address(base, offset + sizeof(int32_t)),
+                                     v,
+                                     instruction);
+        maybe_record_implicit_null_check_done = true;
       } else {
         __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
       }
@@ -3829,12 +4374,28 @@
     }
 
     case Primitive::kPrimFloat: {
-      __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      if (value.IsConstant()) {
+        int32_t v =
+            bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
+        __ movl(Address(base, offset), Immediate(v));
+      } else {
+        __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      if (value.IsConstant()) {
+        int64_t v =
+            bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+        codegen_->MoveInt64ToAddress(Address(base, offset),
+                                     Address(base, offset + sizeof(int32_t)),
+                                     v,
+                                     instruction);
+        maybe_record_implicit_null_check_done = true;
+      } else {
+        __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      }
       break;
     }
 
@@ -3843,7 +4404,9 @@
       UNREACHABLE();
   }
 
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
+  if (!maybe_record_implicit_null_check_done) {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
 
   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
@@ -3852,7 +4415,7 @@
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   }
 }
 
@@ -3970,20 +4533,20 @@
   }
 }
 
-void InstructionCodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
-  if (codegen_->CanMoveNullCheckToUser(instruction)) {
+void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
+  if (CanMoveNullCheckToUser(instruction)) {
     return;
   }
   LocationSummary* locations = instruction->GetLocations();
   Location obj = locations->InAt(0);
 
   __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
-  codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  RecordPcInfo(instruction, instruction->GetDexPc());
 }
 
-void InstructionCodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
+void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
   SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction);
-  codegen_->AddSlowPath(slow_path);
+  AddSlowPath(slow_path);
 
   LocationSummary* locations = instruction->GetLocations();
   Location obj = locations->InAt(0);
@@ -4002,35 +4565,48 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
-  if (codegen_->IsImplicitNullCheckAllowed(instruction)) {
-    GenerateImplicitNullCheck(instruction);
-  } else {
-    GenerateExplicitNullCheck(instruction);
-  }
+  codegen_->GenerateNullCheck(instruction);
 }
 
 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps for an object array get when read barriers
+    // are enabled: we do not want the move to overwrite the array's
+    // location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+  }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+  Location obj_loc = locations->InAt(0);
+  CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location index = locations->InAt(1);
-  Primitive::Type type = instruction->GetType();
+  Location out_loc = locations->Out();
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
 
+  Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movzxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4041,8 +4617,7 @@
     }
 
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movsxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4053,8 +4628,7 @@
     }
 
     case Primitive::kPrimShort: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movsxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4065,8 +4639,7 @@
     }
 
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movzxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4076,12 +4649,8 @@
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-                    "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes.");
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+    case Primitive::kPrimInt: {
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movl(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4091,9 +4660,44 @@
       break;
     }
 
+    case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
+        codegen_->GenerateArrayLoadWithBakerReadBarrier(
+            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+      } else {
+        CpuRegister out = out_loc.AsRegister<CpuRegister>();
+        if (index.IsConstant()) {
+          uint32_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ movl(out, Address(obj, offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(
+              instruction, out_loc, out_loc, obj_loc, data_offset, index);
+        }
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movq(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4104,8 +4708,7 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
-      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movss(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4116,8 +4719,7 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
-      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movsd(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4131,11 +4733,12 @@
       LOG(FATAL) << "Unreachable type " << type;
       UNREACHABLE();
   }
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
 
   if (type == Primitive::kPrimNot) {
-    CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-    __ MaybeUnpoisonHeapReference(out);
+    // Potential implicit null checks, in the case of reference
+    // arrays, are handled in the previous switch statement.
+  } else {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 }
 
@@ -4144,38 +4747,42 @@
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool object_array_set_with_read_barrier =
+      kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+      (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
 
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(
-      1, Location::RegisterOrConstant(instruction->InputAt(1)));
-  locations->SetInAt(2, Location::RequiresRegister());
-  if (value_type == Primitive::kPrimLong) {
-    locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2)));
-  } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) {
-    locations->SetInAt(2, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(value_type)) {
+    locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
   } else {
     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
   }
 
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
-    locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
+
+    // This first temporary register is possibly used for heap
+    // reference poisoning and/or read barrier emission too.
+    locations->AddTemp(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister array = locations->InAt(0).AsRegister<CpuRegister>();
+  Location array_loc = locations->InAt(0);
+  CpuRegister array = array_loc.AsRegister<CpuRegister>();
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool may_need_runtime_call = locations->CanCall();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -4219,6 +4826,7 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
           : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
+
       if (!value.IsRegister()) {
         // Just setting null.
         DCHECK(instruction->InputAt(2)->IsNullConstant());
@@ -4226,7 +4834,7 @@
         __ movl(address, Immediate(0));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         DCHECK(!needs_write_barrier);
-        DCHECK(!may_need_runtime_call);
+        DCHECK(!may_need_runtime_call_for_type_check);
         break;
       }
 
@@ -4235,7 +4843,7 @@
       NearLabel done, not_null, do_put;
       SlowPathCode* slow_path = nullptr;
       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-      if (may_need_runtime_call) {
+      if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
@@ -4247,22 +4855,62 @@
           __ Bind(&not_null);
         }
 
-        __ movl(temp, Address(array, class_offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ MaybeUnpoisonHeapReference(temp);
-        __ movl(temp, Address(temp, component_offset));
-        // No need to poison/unpoison, we're comparing two poisoned references.
-        __ cmpl(temp, Address(register_value, class_offset));
-        if (instruction->StaticTypeOfArrayIsObjectArray()) {
-          __ j(kEqual, &do_put);
-          __ MaybeUnpoisonHeapReference(temp);
-          __ movl(temp, Address(temp, super_offset));
-          // No need to unpoison the result, we're comparing against null.
-          __ testl(temp, temp);
-          __ j(kNotEqual, slow_path->GetEntryLabel());
-          __ Bind(&do_put);
+        if (kEmitCompilerReadBarrier) {
+          // When read barriers are enabled, the type checking
+          // instrumentation requires two read barriers:
+          //
+          //   __ movl(temp2, temp);
+          //   // /* HeapReference<Class> */ temp = temp->component_type_
+          //   __ movl(temp, Address(temp, component_offset));
+          //   codegen_->GenerateReadBarrierSlow(
+          //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+          //
+          //   // /* HeapReference<Class> */ temp2 = register_value->klass_
+          //   __ movl(temp2, Address(register_value, class_offset));
+          //   codegen_->GenerateReadBarrierSlow(
+          //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
+          //
+          //   __ cmpl(temp, temp2);
+          //
+          // However, the second read barrier may trash `temp`, as it
+          // is a temporary register, and as such would not be saved
+          // along with live registers before calling the runtime (nor
+          // restored afterwards).  So in this case, we bail out and
+          // delegate the work to the array set slow path.
+          //
+          // TODO: Extend the register allocator to support a new
+          // "(locally) live temp" location so as to avoid always
+          // going into the slow path when read barriers are enabled.
+          __ jmp(slow_path->GetEntryLabel());
         } else {
-          __ j(kNotEqual, slow_path->GetEntryLabel());
+          // /* HeapReference<Class> */ temp = array->klass_
+          __ movl(temp, Address(array, class_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ MaybeUnpoisonHeapReference(temp);
+
+          // /* HeapReference<Class> */ temp = temp->component_type_
+          __ movl(temp, Address(temp, component_offset));
+          // If heap poisoning is enabled, no need to unpoison `temp`
+          // nor the object reference in `register_value->klass`, as
+          // we are comparing two poisoned references.
+          __ cmpl(temp, Address(register_value, class_offset));
+
+          if (instruction->StaticTypeOfArrayIsObjectArray()) {
+            __ j(kEqual, &do_put);
+            // If heap poisoning is enabled, the `temp` reference has
+            // not been unpoisoned yet; unpoison it now.
+            __ MaybeUnpoisonHeapReference(temp);
+
+            // /* HeapReference<Class> */ temp = temp->super_class_
+            __ movl(temp, Address(temp, super_offset));
+            // If heap poisoning is enabled, no need to unpoison
+            // `temp`, as we are comparing against null below.
+            __ testl(temp, temp);
+            __ j(kNotEqual, slow_path->GetEntryLabel());
+            __ Bind(&do_put);
+          } else {
+            __ j(kNotEqual, slow_path->GetEntryLabel());
+          }
         }
       }
 
@@ -4273,7 +4921,7 @@
       } else {
         __ movl(address, register_value);
       }
-      if (!may_need_runtime_call) {
+      if (!may_need_runtime_call_for_type_check) {
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
 
@@ -4288,6 +4936,7 @@
 
       break;
     }
+
     case Primitive::kPrimInt: {
       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       Address address = index.IsConstant()
@@ -4311,13 +4960,15 @@
           : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
       if (value.IsRegister()) {
         __ movq(address, value.AsRegister<CpuRegister>());
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
       } else {
         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
-        DCHECK(IsInt<32>(v));
-        int32_t v_32 = v;
-        __ movq(address, Immediate(v_32));
+        Address address_high = index.IsConstant()
+            ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
+                offset + sizeof(int32_t))
+            : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
+        codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
       }
-      codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
@@ -4326,8 +4977,14 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
           : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
-      DCHECK(value.IsFpuRegister());
-      __ movss(address, value.AsFpuRegister<XmmRegister>());
+      if (value.IsFpuRegister()) {
+        __ movss(address, value.AsFpuRegister<XmmRegister>());
+      } else {
+        DCHECK(value.IsConstant());
+        int32_t v =
+            bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
+        __ movl(address, Immediate(v));
+      }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
@@ -4337,9 +4994,18 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
           : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
-      DCHECK(value.IsFpuRegister());
-      __ movsd(address, value.AsFpuRegister<XmmRegister>());
-      codegen_->MaybeRecordImplicitNullCheck(instruction);
+      if (value.IsFpuRegister()) {
+        __ movsd(address, value.AsFpuRegister<XmmRegister>());
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      } else {
+        int64_t v =
+            bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+        Address address_high = index.IsConstant()
+            ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
+                offset + sizeof(int32_t))
+            : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
+        codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
+      }
       break;
     }
 
@@ -4353,12 +5019,18 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  if (!instruction->IsEmittedAtUseSite()) {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
 }
 
 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
+  if (instruction->IsEmittedAtUseSite()) {
+    return;
+  }
+
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   __ movl(out, Address(obj, offset));
@@ -4371,7 +5043,10 @@
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  HInstruction* length = instruction->InputAt(1);
+  if (!length->IsEmittedAtUseSite()) {
+    locations->SetInAt(1, Location::RegisterOrConstant(length));
+  }
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -4381,8 +5056,7 @@
   LocationSummary* locations = instruction->GetLocations();
   Location index_loc = locations->InAt(0);
   Location length_loc = locations->InAt(1);
-  SlowPathCode* slow_path =
-      new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
 
   if (length_loc.IsConstant()) {
     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
@@ -4405,12 +5079,28 @@
     codegen_->AddSlowPath(slow_path);
     __ j(kAboveEqual, slow_path->GetEntryLabel());
   } else {
-    CpuRegister length = length_loc.AsRegister<CpuRegister>();
-    if (index_loc.IsConstant()) {
-      int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
-      __ cmpl(length, Immediate(value));
+    HInstruction* array_length = instruction->InputAt(1);
+    if (array_length->IsEmittedAtUseSite()) {
+      // Address the length field in the array.
+      DCHECK(array_length->IsArrayLength());
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
+      if (index_loc.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+        __ cmpl(array_len, Immediate(value));
+      } else {
+        __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
+      }
+      codegen_->MaybeRecordImplicitNullCheck(array_length);
     } else {
-      __ cmpl(length, index_loc.AsRegister<CpuRegister>());
+      CpuRegister length = length_loc.AsRegister<CpuRegister>();
+      if (index_loc.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+        __ cmpl(length, Immediate(value));
+      } else {
+        __ cmpl(length, index_loc.AsRegister<CpuRegister>());
+      }
     }
     codegen_->AddSlowPath(slow_path);
     __ j(kBelowEqual, slow_path->GetEntryLabel());
@@ -4427,8 +5117,8 @@
     __ testl(value, value);
     __ j(kEqual, &is_null);
   }
-  __ gs()->movq(card, Address::Absolute(
-      Thread::CardTableOffset<kX86_64WordSize>().Int32Value(), true));
+  __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
+                                        /* no_rip */ true));
   __ movq(temp, object);
   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
   __ movb(Address(temp, card, TIMES_1, 0), card);
@@ -4437,14 +5127,6 @@
   }
 }
 
-void LocationsBuilderX86_64::VisitTemporary(HTemporary* temp) {
-  temp->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86_64::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unimplemented";
 }
@@ -4487,8 +5169,9 @@
     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   }
 
-  __ gs()->cmpw(Address::Absolute(
-      Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0));
+  __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
+                                  /* no_rip */ true),
+                Immediate(0));
   if (successor == nullptr) {
     __ j(kNotEqual, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -4566,18 +5249,12 @@
       }
     } else if (constant->IsFloatConstant()) {
       float fp_value = constant->AsFloatConstant()->GetValue();
-      int32_t value = bit_cast<int32_t, float>(fp_value);
       if (destination.IsFpuRegister()) {
         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
-        if (value == 0) {
-          // easy FP 0.0.
-          __ xorps(dest, dest);
-        } else {
-          __ movss(dest, codegen_->LiteralFloatAddress(fp_value));
-        }
+        codegen_->Load32BitValue(dest, fp_value);
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
-        Immediate imm(value);
+        Immediate imm(bit_cast<int32_t, float>(fp_value));
         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
       }
     } else {
@@ -4586,11 +5263,7 @@
       int64_t value = bit_cast<int64_t, double>(fp_value);
       if (destination.IsFpuRegister()) {
         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
-        if (value == 0) {
-          __ xorpd(dest, dest);
-        } else {
-          __ movsd(dest, codegen_->LiteralDoubleAddress(fp_value));
-        }
+        codegen_->Load64BitValue(dest, fp_value);
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
         codegen_->Store64BitValueToStack(destination, value);
@@ -4629,6 +5302,12 @@
           CpuRegister(ensure_scratch.GetRegister()));
 }
 
+void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
+  __ movq(CpuRegister(TMP), reg1);
+  __ movq(reg1, reg2);
+  __ movq(reg2, CpuRegister(TMP));
+}
+
 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   __ movq(Address(CpuRegister(RSP), mem), reg);
@@ -4666,7 +5345,7 @@
   Location destination = move->GetDestination();
 
   if (source.IsRegister() && destination.IsRegister()) {
-    __ xchgq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
+    Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
   } else if (source.IsRegister() && destination.IsStackSlot()) {
     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsRegister()) {
@@ -4712,15 +5391,67 @@
           Immediate(mirror::Class::kStatusInitialized));
   __ j(kLess, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
-  // No need for memory fence, thanks to the X86_64 memory model.
+  // No need for memory fence, thanks to the x86-64 memory model.
+}
+
+HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_class_load_kind) {
+      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadClass::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      // We prefer the always-available RIP-relative address for the x86-64 boot image.
+      return HLoadClass::LoadKind::kBootImageLinkTimePcRelative;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_class_load_kind;
 }
 
 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
-  InvokeRuntimeCallingConvention calling_convention;
-  CodeGenerator::CreateLoadClassLocationSummary(
-      cls,
-      Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(RAX));
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Location::RegisterLocation(RAX),
+        /* code_generator_supports_read_barrier */ true);
+    return;
+  }
+
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
+      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
@@ -4731,27 +5462,88 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-  CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
-  if (cls->IsReferrersClass()) {
-    DCHECK(!cls->CanCallRuntime());
-    DCHECK(!cls->MustGenerateClinitCheck());
-    __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  } else {
-    DCHECK(cls->CanCallRuntime());
-    __ movq(out, Address(
-        current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
-    __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
-    // TODO: We will need a read barrier here.
+  Location out_loc = locations->Out();
+  CpuRegister out = out_loc.AsRegister<CpuRegister>();
 
+  bool generate_null_check = false;
+  switch (cls->GetLoadKind()) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
+      GenerateGcRootFieldLoad(
+          cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
+      codegen_->RecordTypePatch(cls);
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      __ movl(out, Immediate(address));  // Zero-extended.
+      codegen_->RecordSimplePatch();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(cls->GetAddress(), 0u);
+      // /* GcRoot<mirror::Class> */ out = *address
+      if (IsUint<32>(cls->GetAddress())) {
+        Address address = Address::Absolute(cls->GetAddress(), /* no_rip */ true);
+        GenerateGcRootFieldLoad(cls, out_loc, address);
+      } else {
+        // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
+        __ movq(out, Immediate(cls->GetAddress()));
+        GenerateGcRootFieldLoad(cls, out_loc, Address(out, 0));
+      }
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      uint32_t offset = cls->GetDexCacheElementOffset();
+      Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(cls->GetDexFile(), offset);
+      Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
+                                          /* no_rip */ false);
+      // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
+      GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
+      __ movq(out,
+              Address(current_method,
+                      ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      GenerateGcRootFieldLoad(
+          cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
+      UNREACHABLE();
+  }
+
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
+    DCHECK(cls->CanCallRuntime());
     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
     codegen_->AddSlowPath(slow_path);
-    __ testl(out, out);
-    __ j(kEqual, slow_path->GetEntryLabel());
+    if (generate_null_check) {
+      __ testl(out, out);
+      __ j(kEqual, slow_path->GetEntryLabel());
+    }
     if (cls->MustGenerateClinitCheck()) {
       GenerateClassInitializationCheck(slow_path, out);
     } else {
@@ -4778,31 +5570,86 @@
                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
 }
 
+HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
+    HLoadString::LoadKind desired_string_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_string_load_kind) {
+      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadString::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadString::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_string_load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      // We prefer the always-available RIP-relative address for the x86-64 boot image.
+      return HLoadString::LoadKind::kBootImageLinkTimePcRelative;
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageAddress:
+      break;
+    case HLoadString::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadString::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_string_load_kind;
+}
+
 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
-  locations->SetInAt(0, Location::RequiresRegister());
+  LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
+  if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
+  LocationSummary* locations = load->GetLocations();
+  Location out_loc = locations->Out();
+  CpuRegister out = out_loc.AsRegister<CpuRegister>();
+
+  switch (load->GetLoadKind()) {
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
+      codegen_->RecordStringPatch(load);
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(load->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+      __ movl(out, Immediate(address));  // Zero-extended.
+      codegen_->RecordSimplePatch();
+      return;  // No dex cache slow path.
+    }
+    default:
+      break;
+  }
+
+  // TODO: Re-add the compiler code to do string dex cache lookup again.
   SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
   codegen_->AddSlowPath(slow_path);
-
-  LocationSummary* locations = load->GetLocations();
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-  CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
-  __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
-  __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
-  // TODO: We will need a read barrier here.
-  __ testl(out, out);
-  __ j(kEqual, slow_path->GetEntryLabel());
+  __ jmp(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
 static Address GetExceptionTlsAddress() {
-  return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(), true);
+  return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
+                           /* no_rip */ true);
 }
 
 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
@@ -4825,7 +5672,7 @@
 
 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -4835,44 +5682,58 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+}
+
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
 }
 
 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::Any());
-    // Note that TypeCheckSlowPathX86_64 uses this register too.
-    locations->SetOut(Location::RequiresRegister());
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetOut(Location::RegisterLocation(RAX));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::Any());
+  // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
+  locations->SetOut(Location::RequiresRegister());
+  // When read barriers are enabled, we need a temporary register for
+  // some cases.
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+  Location obj_loc = locations->InAt(0);
+  CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location cls = locations->InAt(1);
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  Location out_loc =  locations->Out();
+  CpuRegister out = out_loc.AsRegister<CpuRegister>();
+  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(0) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -4887,17 +5748,10 @@
     __ j(kEqual, &zero);
   }
 
-  // In case of an interface/unresolved check, we put the object class into the object register.
-  // This is safe, as the register is caller-save, and the object must be in another
-  // register if it survives the runtime call.
-  CpuRegister target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) ||
-      (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck)
-      ? obj
-      : out;
-  __ movl(target, Address(obj, class_offset));
-  __ MaybeUnpoisonHeapReference(target);
+  // /* HeapReference<Class> */ out = obj->klass_
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<CpuRegister>());
@@ -4917,13 +5771,14 @@
       }
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       NearLabel loop, success;
       __ Bind(&loop);
-      __ movl(out, Address(out, super_offset));
-      __ MaybeUnpoisonHeapReference(out);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -4940,6 +5795,7 @@
       }
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       NearLabel loop, success;
@@ -4951,8 +5807,8 @@
         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       __ j(kEqual, &success);
-      __ movl(out, Address(out, super_offset));
-      __ MaybeUnpoisonHeapReference(out);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
       __ testl(out, out);
       __ j(kNotEqual, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -4964,6 +5820,7 @@
       }
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
       NearLabel exact_check;
@@ -4974,9 +5831,9 @@
         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       __ j(kEqual, &exact_check);
-      // Otherwise, we need to check that the object's class is a non primitive array.
-      __ movl(out, Address(out, component_offset));
-      __ MaybeUnpoisonHeapReference(out);
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ out = out->component_type_
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -4987,6 +5844,7 @@
       __ jmp(&done);
       break;
     }
+
     case TypeCheckKind::kArrayCheck: {
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<CpuRegister>());
@@ -4995,8 +5853,8 @@
         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       DCHECK(locations->OnlyCallsOnSlowPath());
-      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(
-          instruction, /* is_fatal */ false);
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
+                                                                       /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
       __ j(kNotEqual, slow_path->GetEntryLabel());
       __ movl(out, Immediate(1));
@@ -5005,13 +5863,32 @@
       }
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-    default: {
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved and interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
+                                                                       /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ jmp(slow_path->GetEntryLabel());
       if (zero.IsLinked()) {
         __ jmp(&done);
       }
@@ -5036,77 +5913,75 @@
 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
-
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = throws_into_catch
-          ? LocationSummary::kCallOnSlowPath
-          : LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
-
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::Any());
-    // Note that TypeCheckSlowPathX86_64 uses this register too.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::Any());
+  // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
+  locations->AddTemp(Location::RequiresRegister());
+  // When read barriers are enabled, we need an additional temporary
+  // register for some cases.
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+  Location obj_loc = locations->InAt(0);
+  CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location cls = locations->InAt(1);
-  CpuRegister temp = locations->WillCall()
-      ? CpuRegister(kNoRegister)
-      : locations->GetTemp(0).AsRegister<CpuRegister>();
-
+  Location temp_loc = locations->GetTemp(0);
+  CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
-  SlowPathCode* slow_path = nullptr;
 
-  if (!locations->WillCall()) {
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(
-        instruction, !locations->CanCall());
-    codegen_->AddSlowPath(slow_path);
-  }
+  bool is_type_check_slow_path_fatal =
+      (type_check_kind == TypeCheckKind::kExactCheck ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+      !instruction->CanThrowIntoCatchBlock();
+  SlowPathCode* type_check_slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
+                                                           is_type_check_slow_path_fatal);
+  codegen_->AddSlowPath(type_check_slow_path);
 
-  NearLabel done;
-  // Avoid null check if we know obj is not null.
-  if (instruction->MustDoNullCheck()) {
-    __ testl(obj, obj);
-    __ j(kEqual, &done);
-  }
-
-  if (locations->WillCall()) {
-    __ movl(obj, Address(obj, class_offset));
-    __ MaybeUnpoisonHeapReference(obj);
-  } else {
-    __ movl(temp, Address(obj, class_offset));
-    __ MaybeUnpoisonHeapReference(temp);
-  }
-
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
+      NearLabel done;
+      // Avoid null check if we know obj is not null.
+      if (instruction->MustDoNullCheck()) {
+        __ testl(obj, obj);
+        __ j(kEqual, &done);
+      }
+
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<CpuRegister>());
       } else {
@@ -5115,19 +5990,46 @@
       }
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
+      __ Bind(&done);
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
+      NearLabel done;
+      // Avoid null check if we know obj is not null.
+      if (instruction->MustDoNullCheck()) {
+        __ testl(obj, obj);
+        __ j(kEqual, &done);
+      }
+
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      NearLabel loop;
+      NearLabel loop, compare_classes;
       __ Bind(&loop);
-      __ movl(temp, Address(temp, super_offset));
-      __ MaybeUnpoisonHeapReference(temp);
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+
+      // If the class reference currently in `temp` is not null, jump
+      // to the `compare_classes` label to compare it with the checked
+      // class.
       __ testl(temp, temp);
-      // Jump to the slow path to throw the exception.
-      __ j(kEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, &compare_classes);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      __ jmp(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&compare_classes);
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<CpuRegister>());
       } else {
@@ -5135,9 +6037,22 @@
         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       __ j(kNotEqual, &loop);
+      __ Bind(&done);
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
+      NearLabel done;
+      // Avoid null check if we know obj is not null.
+      if (instruction->MustDoNullCheck()) {
+        __ testl(obj, obj);
+        __ j(kEqual, &done);
+      }
+
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+
       // Walk over the class hierarchy to find a match.
       NearLabel loop;
       __ Bind(&loop);
@@ -5148,16 +6063,45 @@
         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       __ j(kEqual, &done);
-      __ movl(temp, Address(temp, super_offset));
-      __ MaybeUnpoisonHeapReference(temp);
+
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+
+      // If the class reference currently in `temp` is not null, jump
+      // back at the beginning of the loop.
       __ testl(temp, temp);
       __ j(kNotEqual, &loop);
-      // Jump to the slow path to throw the exception.
-      __ jmp(slow_path->GetEntryLabel());
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      __ jmp(type_check_slow_path->GetEntryLabel());
+      __ Bind(&done);
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
+      // We cannot use a NearLabel here, as its range might be too
+      // short in some cases when read barriers are enabled.  This has
+      // been observed for instance when the code emitted for this
+      // case uses high x86-64 registers (R8-R15).
+      Label done;
+      // Avoid null check if we know obj is not null.
+      if (instruction->MustDoNullCheck()) {
+        __ testl(obj, obj);
+        __ j(kEqual, &done);
+      }
+
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+
       // Do an exact check.
+      NearLabel check_non_primitive_component_type;
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<CpuRegister>());
       } else {
@@ -5165,34 +6109,80 @@
         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       __ j(kEqual, &done);
-      // Otherwise, we need to check that the object's class is a non primitive array.
-      __ movl(temp, Address(temp, component_offset));
-      __ MaybeUnpoisonHeapReference(temp);
+
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ temp = temp->component_type_
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
+
+      // If the component type is not null (i.e. the object is indeed
+      // an array), jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive
+      // type.
       __ testl(temp, temp);
-      __ j(kEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, &check_non_primitive_component_type);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      __ jmp(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&check_non_primitive_component_type);
       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
-      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ j(kEqual, &done);
+      // Same comment as above regarding `temp` and the slow path.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      __ jmp(type_check_slow_path->GetEntryLabel());
+      __ Bind(&done);
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-    default:
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+      NearLabel done;
+      // Avoid null check if we know obj is not null.
+      if (instruction->MustDoNullCheck()) {
+        __ testl(obj, obj);
+        __ j(kEqual, &done);
+      }
+
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
+      //
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
+      __ jmp(type_check_slow_path->GetEntryLabel());
+      __ Bind(&done);
       break;
   }
-  __ Bind(&done);
 
-  if (slow_path != nullptr) {
-    __ Bind(slow_path->GetExitLabel());
-  }
+  __ Bind(type_check_slow_path->GetExitLabel());
 }
 
 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -5203,6 +6193,11 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
@@ -5320,6 +6315,296 @@
   }
 }
 
+void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                                                      Location out,
+                                                                      uint32_t offset,
+                                                                      Location maybe_temp) {
+  CpuRegister out_reg = out.AsRegister<CpuRegister>();
+  if (kEmitCompilerReadBarrier) {
+    DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `maybe_temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ movl(out_reg, Address(out_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ movl(out_reg, Address(out_reg, offset));
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                                                       Location out,
+                                                                       Location obj,
+                                                                       uint32_t offset,
+                                                                       Location maybe_temp) {
+  CpuRegister out_reg = out.AsRegister<CpuRegister>();
+  CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ movl(out_reg, Address(obj_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ movl(out_reg, Address(obj_reg, offset));
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                             Location root,
+                                                             const Address& address,
+                                                             Label* fixup_label) {
+  CpuRegister root_reg = root.AsRegister<CpuRegister>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = *address;
+      //   if (Thread::Current()->GetIsGcMarking()) {
+      //     root = ReadBarrier::Mark(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *address
+      __ movl(root_reg, address);
+      if (fixup_label != nullptr) {
+        __ Bind(fixup_label);
+      }
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path used to mark the GC root `root`.
+      SlowPathCode* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root);
+      codegen_->AddSlowPath(slow_path);
+
+      __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>().Int32Value(),
+                                      /* no_rip */ true),
+                    Immediate(0));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = address
+      __ leaq(root_reg, address);
+      if (fixup_label != nullptr) {
+        __ Bind(fixup_label);
+      }
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *address
+    __ movl(root_reg, address);
+    if (fixup_label != nullptr) {
+      __ Bind(fixup_label);
+    }
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
+  }
+}
+
+void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                Location ref,
+                                                                CpuRegister obj,
+                                                                uint32_t offset,
+                                                                Location temp,
+                                                                bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Address src(obj, offset);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                Location ref,
+                                                                CpuRegister obj,
+                                                                uint32_t data_offset,
+                                                                Location index,
+                                                                Location temp,
+                                                                bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  Address src = index.IsConstant() ?
+      Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
+      Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                    Location ref,
+                                                                    CpuRegister obj,
+                                                                    const Address& src,
+                                                                    Location temp,
+                                                                    bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as:
+  // - it implements the load-load fence using a data dependency on
+  //   the high-bits of rb_state, which are expected to be all zeroes
+  //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
+  //   here, which is a no-op thanks to the x86-64 memory model);
+  // - it performs additional checks that we do not do here for
+  //   performance reasons.
+
+  CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
+  CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ movl(temp_reg, Address(obj, monitor_offset));
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+
+  // Load fence to prevent load-load reordering.
+  // Note that this is a no-op, thanks to the x86-64 memory model.
+  GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+  // The actual reference load.
+  // /* HeapReference<Object> */ ref = *src
+  __ movl(ref_reg, src);
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  __ MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path used to mark the object `ref` when it is gray.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref);
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  // Given the numeric representation, it's enough to check the low bit of the
+  // rb_state. We do that by shifting the bit out of the lock word with SHR.
+  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1));
+  __ j(kCarrySet, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
+                                                  Location out,
+                                                  Location ref,
+                                                  Location obj,
+                                                  uint32_t offset,
+                                                  Location index) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Insert a slow path based read barrier *after* the reference load.
+  //
+  // If heap poisoning is enabled, the unpoisoning of the loaded
+  // reference will be carried out by the runtime within the slow
+  // path.
+  //
+  // Note that `ref` currently does not get unpoisoned (when heap
+  // poisoning is enabled), which is alright as the `ref` argument is
+  // not used by the artReadBarrierSlow entry point.
+  //
+  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+  SlowPathCode* slow_path = new (GetGraph()->GetArena())
+      ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
+  AddSlowPath(slow_path);
+
+  __ jmp(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                       Location out,
+                                                       Location ref,
+                                                       Location obj,
+                                                       uint32_t offset,
+                                                       Location index) {
+  if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
+  }
+}
+
+void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                         Location out,
+                                                         Location root) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  __ jmp(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
@@ -5330,18 +6615,6 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void LocationsBuilderX86_64::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorX86_64::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
-}
-
 // Simple implementation of packed switch - generate cascaded compare/jumps.
 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   LocationSummary* locations =
@@ -5353,11 +6626,58 @@
 
 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
+  uint32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  // Should we generate smaller inline compare/jumps?
+  if (num_entries <= kPackedSwitchJumpTableThreshold) {
+    // Figure out the correct compare values and jump conditions.
+    // Handle the first compare/branch as a special case because it might
+    // jump to the default case.
+    DCHECK_GT(num_entries, 2u);
+    Condition first_condition;
+    uint32_t index;
+    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+    if (lower_bound != 0) {
+      first_condition = kLess;
+      __ cmpl(value_reg_in, Immediate(lower_bound));
+      __ j(first_condition, codegen_->GetLabelOf(default_block));
+      __ j(kEqual, codegen_->GetLabelOf(successors[0]));
+
+      index = 1;
+    } else {
+      // Handle all the compare/jumps below.
+      first_condition = kBelow;
+      index = 0;
+    }
+
+    // Handle the rest of the compare/jumps.
+    for (; index + 1 < num_entries; index += 2) {
+      int32_t compare_to_value = lower_bound + index + 1;
+      __ cmpl(value_reg_in, Immediate(compare_to_value));
+      // Jump to successors[index] if value < case_value[index].
+      __ j(first_condition, codegen_->GetLabelOf(successors[index]));
+      // Jump to successors[index + 1] if value == case_value[index + 1].
+      __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
+    }
+
+    if (index != num_entries) {
+      // There are an odd number of entries. Handle the last one.
+      DCHECK_EQ(index + 1, num_entries);
+      __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
+      __ j(kEqual, codegen_->GetLabelOf(successors[index]));
+    }
+
+    // And the default for any other value.
+    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+      __ jmp(codegen_->GetLabelOf(default_block));
+    }
+    return;
+  }
 
   // Remove the bias, if needed.
   Register value_reg_out = value_reg_in.AsRegister();
@@ -5368,7 +6688,6 @@
   CpuRegister value_reg(value_reg_out);
 
   // Is the value in range?
-  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
   __ cmpl(value_reg, Immediate(num_entries - 1));
   __ j(kAbove, codegen_->GetLabelOf(default_block));
 
@@ -5386,17 +6705,71 @@
   __ jmp(temp_reg);
 }
 
-void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
+void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
   if (value == 0) {
     __ xorl(dest, dest);
-  } else if (value > 0 && IsInt<32>(value)) {
-    // We can use a 32 bit move, as it will zero-extend and is one byte shorter.
+  } else {
+    __ movl(dest, Immediate(value));
+  }
+}
+
+void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
+  if (value == 0) {
+    // Clears upper bits too.
+    __ xorl(dest, dest);
+  } else if (IsUint<32>(value)) {
+    // We can use a 32 bit move, as it will zero-extend and is shorter.
     __ movl(dest, Immediate(static_cast<int32_t>(value)));
   } else {
     __ movq(dest, Immediate(value));
   }
 }
 
+void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
+  if (value == 0) {
+    __ xorps(dest, dest);
+  } else {
+    __ movss(dest, LiteralInt32Address(value));
+  }
+}
+
+void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
+  if (value == 0) {
+    __ xorpd(dest, dest);
+  } else {
+    __ movsd(dest, LiteralInt64Address(value));
+  }
+}
+
+void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
+  Load32BitValue(dest, bit_cast<int32_t, float>(value));
+}
+
+void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
+  Load64BitValue(dest, bit_cast<int64_t, double>(value));
+}
+
+void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
+  if (value == 0) {
+    __ testl(dest, dest);
+  } else {
+    __ cmpl(dest, Immediate(value));
+  }
+}
+
+void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
+  if (IsInt<32>(value)) {
+    if (value == 0) {
+      __ testq(dest, dest);
+    } else {
+      __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
+    }
+  } else {
+    // Value won't fit in an int.
+    __ cmpq(dest, LiteralInt64Address(value));
+  }
+}
+
 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
   DCHECK(dest.IsDoubleStackSlot());
   if (IsInt<32>(value)) {
@@ -5518,7 +6891,7 @@
 // TODO: trg as memory.
 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   if (!trg.IsValid()) {
-    DCHECK(type == Primitive::kPrimVoid);
+    DCHECK_EQ(type, Primitive::kPrimVoid);
     return;
   }
 
@@ -5545,6 +6918,24 @@
   return Address::RIP(table_fixup);
 }
 
+void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
+                                             const Address& addr_high,
+                                             int64_t v,
+                                             HInstruction* instruction) {
+  if (IsInt<32>(v)) {
+    int32_t v_32 = v;
+    __ movq(addr_low, Immediate(v_32));
+    MaybeRecordImplicitNullCheck(instruction);
+  } else {
+    // Didn't fit in a register.  Do it in pieces.
+    int32_t low_v = Low32Bits(v);
+    int32_t high_v = High32Bits(v);
+    __ movl(addr_low, Immediate(low_v));
+    MaybeRecordImplicitNullCheck(instruction);
+    __ movl(addr_high, Immediate(high_v));
+  }
+}
+
 #undef __
 
 }  // namespace x86_64
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index dc86a48..4e0e34c 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -17,8 +17,8 @@
 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
 
+#include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
@@ -28,7 +28,7 @@
 namespace x86_64 {
 
 // Use a local definition to prevent copying mistakes.
-static constexpr size_t kX86_64WordSize = kX86_64PointerSize;
+static constexpr size_t kX86_64WordSize = static_cast<size_t>(kX86_64PointerSize);
 
 // Some x86_64 instructions require a register to be available as temp.
 static constexpr Register TMP = R11;
@@ -47,6 +47,12 @@
 static constexpr size_t kRuntimeParameterFpuRegistersLength =
     arraysize(kRuntimeParameterFpuRegisters);
 
+// These XMM registers are non-volatile in ART ABI, but volatile in native ABI.
+// If the ART ABI changes, this list must be updated.  It is used to ensure that
+// these are not clobbered by any direct call to native code (such as math intrinsics).
+static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 };
+
+
 class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> {
  public:
   InvokeRuntimeCallingConvention()
@@ -135,6 +141,7 @@
   void Exchange32(CpuRegister reg, int mem);
   void Exchange32(XmmRegister reg, int mem);
   void Exchange32(int mem1, int mem2);
+  void Exchange64(CpuRegister reg1, CpuRegister reg2);
   void Exchange64(CpuRegister reg, int mem);
   void Exchange64(XmmRegister reg, int mem);
   void Exchange64(int mem1, int mem2);
@@ -165,6 +172,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleCondition(HCondition* condition);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction);
@@ -175,7 +183,7 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64);
 };
 
-class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
+class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen);
 
@@ -206,26 +214,67 @@
   void DivByPowerOfTwo(HDiv* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
+  void HandleCondition(HCondition* condition);
   void HandleShift(HBinaryOperation* operation);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   void HandleFieldSet(HInstruction* instruction,
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
-  void GenerateImplicitNullCheck(HNullCheck* instruction);
-  void GenerateExplicitNullCheck(HNullCheck* instruction);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a read barrier and
+  // shall be a register in that case; it may be an invalid location
+  // otherwise.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location maybe_temp);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location maybe_temp);
+  // Generate a GC root reference load:
+  //
+  //   root <- *address
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               const Address& address,
+                               Label* fixup_label = nullptr);
+
   void PushOntoFPStack(Location source, uint32_t temp_offset,
                        uint32_t stack_adjustment, bool is_float);
+  void GenerateCompareTest(HCondition* condition);
+  template<class LabelType>
   void GenerateTestAndBranch(HInstruction* instruction,
-                             Label* true_target,
-                             Label* false_target,
-                             Label* always_true_target);
-  void GenerateCompareTestAndBranch(HIf* if_inst,
-                                    HCondition* condition,
-                                    Label* true_target,
-                                    Label* false_target,
-                                    Label* always_true_target);
-  void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
+                             size_t condition_input_index,
+                             LabelType* true_target,
+                             LabelType* false_target);
+  template<class LabelType>
+  void GenerateCompareTestAndBranch(HCondition* condition,
+                                    LabelType* true_target,
+                                    LabelType* false_target);
+  template<class LabelType>
+  void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label);
+
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
   X86_64Assembler* const assembler_;
@@ -248,7 +297,6 @@
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
   void Bind(HBasicBlock* block) OVERRIDE;
-  void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
   void MoveConstant(Location destination, int32_t value) OVERRIDE;
   void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
@@ -269,6 +317,12 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
   size_t GetWordSize() const OVERRIDE {
     return kX86_64WordSize;
   }
@@ -297,14 +351,11 @@
     return &move_resolver_;
   }
 
-  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
+  uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
     return GetLabelOf(block)->Position();
   }
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
   void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -320,6 +371,8 @@
                   CpuRegister value,
                   bool value_can_be_null);
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   // Helper method to move a value between two locations.
   void Move(Location destination, Location source);
 
@@ -335,9 +388,31 @@
     return false;
   }
 
+  // Check if the desired_string_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadString::LoadKind GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
+  // Check if the desired_dispatch_info is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method) OVERRIDE;
+
+  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
+  void RecordSimplePatch();
+  void RecordStringPatch(HLoadString* load_string);
+  void RecordTypePatch(HLoadClass* load_class);
+  Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
+
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
@@ -346,6 +421,80 @@
     return isa_features_;
   }
 
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             CpuRegister obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             CpuRegister obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 CpuRegister obj,
+                                                 const Address& src,
+                                                 Location temp,
+                                                 bool needs_null_check);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` provided (i.e., when it is different from
+  // Location::NoLocation()), the offset value passed to
+  // artReadBarrierSlow is adjusted to take `index` into account.
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+
   int ConstantAreaStart() const {
     return constant_area_start_;
   }
@@ -355,13 +504,49 @@
   Address LiteralInt32Address(int32_t v);
   Address LiteralInt64Address(int64_t v);
 
-  // Load a 64 bit value into a register in the most efficient manner.
+  // Load a 32/64-bit value into a register in the most efficient manner.
+  void Load32BitValue(CpuRegister dest, int32_t value);
   void Load64BitValue(CpuRegister dest, int64_t value);
+  void Load32BitValue(XmmRegister dest, int32_t value);
+  void Load64BitValue(XmmRegister dest, int64_t value);
+  void Load32BitValue(XmmRegister dest, float value);
+  void Load64BitValue(XmmRegister dest, double value);
+
+  // Compare a register with a 32/64-bit value in the most efficient manner.
+  void Compare32BitValue(CpuRegister dest, int32_t value);
+  void Compare64BitValue(CpuRegister dest, int64_t value);
+
   Address LiteralCaseTable(HPackedSwitch* switch_instr);
 
   // Store a 64 bit value into a DoubleStackSlot in the most efficient manner.
   void Store64BitValueToStack(Location dest, int64_t value);
 
+  // Assign a 64 bit constant to an address.
+  void MoveInt64ToAddress(const Address& addr_low,
+                          const Address& addr_high,
+                          int64_t v,
+                          HInstruction* instruction);
+
+  // Ensure that prior stores complete to memory before subsequent loads.
+  // The locked add implementation will avoid serializing device memory, but will
+  // touch (but not change) the top of the stack.
+  // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
+  void MemoryFence(bool force_mfence = false) {
+    if (!force_mfence) {
+      assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0));
+    } else {
+      assembler_.mfence();
+    }
+  }
+
+  void GenerateNop();
+  void GenerateImplicitNullCheck(HNullCheck* instruction);
+  void GenerateExplicitNullCheck(HNullCheck* instruction);
+
+  // When we don't know the proper offset for the value, we use kDummy32BitOffset.
+  // We will fix this up in the linker later to have the right value.
+  static constexpr int32_t kDummy32BitOffset = 256;
+
  private:
   struct PcRelativeDexCacheAccessInfo {
     PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
@@ -389,11 +574,13 @@
   ArenaDeque<MethodPatchInfo<Label>> method_patches_;
   ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
   // PC-relative DexCache access info.
-  ArenaDeque<PcRelativeDexCacheAccessInfo> pc_rel_dex_cache_patches_;
-
-  // When we don't know the proper offset for the value, we use kDummy32BitOffset.
-  // We will fix this up in the linker later to have the right value.
-  static constexpr int32_t kDummy32BitOffset = 256;
+  ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_;
+  // Patch locations for patchoat where the linker doesn't do any other work.
+  ArenaDeque<Label> simple_patches_;
+  // String patch locations.
+  ArenaDeque<StringPatchInfo<Label>> string_patches_;
+  // Type patch locations.
+  ArenaDeque<TypePatchInfo<Label>> type_patches_;
 
   // Fixups for jump tables need to be handled specially.
   ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 57de41f..fe6c0a3 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -29,20 +29,16 @@
 #include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "base/macros.h"
 #include "builder.h"
-#include "code_generator_arm.h"
-#include "code_generator_arm64.h"
-#include "code_generator_mips.h"
-#include "code_generator_mips64.h"
-#include "code_generator_x86.h"
-#include "code_generator_x86_64.h"
+#include "code_simulator_container.h"
 #include "common_compiler_test.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
 #include "driver/compiler_options.h"
+#include "graph_checker.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "prepare_for_register_allocation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
 #include "ssa_liveness_analysis.h"
 #include "utils.h"
 #include "utils/arm/managed_register_arm.h"
@@ -50,10 +46,35 @@
 #include "utils/mips64/managed_register_mips64.h"
 #include "utils/x86/managed_register_x86.h"
 
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "code_generator_arm.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "code_generator_arm64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
+#include "code_generator_x86.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86_64
+#include "code_generator_x86_64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "code_generator_mips.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
+#include "code_generator_mips64.h"
+#endif
+
 #include "gtest/gtest.h"
 
 namespace art {
 
+#ifdef ART_ENABLE_CODEGEN_arm
 // Provide our own codegen, that ensures the C calling conventions
 // are preserved. Currently, ART and C do not match as R4 is caller-save
 // in ART, and callee-save in C. Alternatively, we could use or write
@@ -69,8 +90,8 @@
     AddAllocatedRegister(Location::RegisterLocation(arm::R7));
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE {
-    arm::CodeGeneratorARM::SetupBlockedRegisters(is_baseline);
+  void SetupBlockedRegisters() const OVERRIDE {
+    arm::CodeGeneratorARM::SetupBlockedRegisters();
     blocked_core_registers_[arm::R4] = true;
     blocked_core_registers_[arm::R6] = false;
     blocked_core_registers_[arm::R7] = false;
@@ -78,7 +99,9 @@
     blocked_register_pairs_[arm::R6_R7] = false;
   }
 };
+#endif
 
+#ifdef ART_ENABLE_CODEGEN_x86
 class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
  public:
   TestCodeGeneratorX86(HGraph* graph,
@@ -89,8 +112,8 @@
     AddAllocatedRegister(Location::RegisterLocation(x86::EDI));
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE {
-    x86::CodeGeneratorX86::SetupBlockedRegisters(is_baseline);
+  void SetupBlockedRegisters() const OVERRIDE {
+    x86::CodeGeneratorX86::SetupBlockedRegisters();
     // ebx is a callee-save register in C, but caller-save for ART.
     blocked_core_registers_[x86::EBX] = true;
     blocked_register_pairs_[x86::EAX_EBX] = true;
@@ -103,6 +126,7 @@
     blocked_register_pairs_[x86::ECX_EDI] = false;
   }
 };
+#endif
 
 class InternalCodeAllocator : public CodeAllocator {
  public:
@@ -124,173 +148,220 @@
   DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
 };
 
+static bool CanExecuteOnHardware(InstructionSet target_isa) {
+  return (target_isa == kRuntimeISA)
+      // Handle the special case of ARM, with two instructions sets (ARM32 and Thumb-2).
+      || (kRuntimeISA == kArm && target_isa == kThumb2);
+}
+
+static bool CanExecute(InstructionSet target_isa) {
+  CodeSimulatorContainer simulator(target_isa);
+  return CanExecuteOnHardware(target_isa) || simulator.CanSimulate();
+}
+
+template <typename Expected>
+static Expected SimulatorExecute(CodeSimulator* simulator, Expected (*f)());
+
+template <>
+bool SimulatorExecute<bool>(CodeSimulator* simulator, bool (*f)()) {
+  simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+  return simulator->GetCReturnBool();
+}
+
+template <>
+int32_t SimulatorExecute<int32_t>(CodeSimulator* simulator, int32_t (*f)()) {
+  simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+  return simulator->GetCReturnInt32();
+}
+
+template <>
+int64_t SimulatorExecute<int64_t>(CodeSimulator* simulator, int64_t (*f)()) {
+  simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+  return simulator->GetCReturnInt64();
+}
+
+template <typename Expected>
+static void VerifyGeneratedCode(InstructionSet target_isa,
+                                Expected (*f)(),
+                                bool has_result,
+                                Expected expected) {
+  ASSERT_TRUE(CanExecute(target_isa)) << "Target isa is not executable.";
+
+  // Verify on simulator.
+  CodeSimulatorContainer simulator(target_isa);
+  if (simulator.CanSimulate()) {
+    Expected result = SimulatorExecute<Expected>(simulator.Get(), f);
+    if (has_result) {
+      ASSERT_EQ(expected, result);
+    }
+  }
+
+  // Verify on hardware.
+  if (CanExecuteOnHardware(target_isa)) {
+    Expected result = f();
+    if (has_result) {
+      ASSERT_EQ(expected, result);
+    }
+  }
+}
+
 template <typename Expected>
 static void Run(const InternalCodeAllocator& allocator,
                 const CodeGenerator& codegen,
                 bool has_result,
                 Expected expected) {
+  InstructionSet target_isa = codegen.GetInstructionSet();
+
   typedef Expected (*fptr)();
   CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
   fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
-  if (codegen.GetInstructionSet() == kThumb2) {
+  if (target_isa == kThumb2) {
     // For thumb we need the bottom bit set.
     f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
   }
-  Expected result = f();
-  if (has_result) {
-    ASSERT_EQ(expected, result);
-  }
+  VerifyGeneratedCode(target_isa, f, has_result, expected);
 }
 
 template <typename Expected>
-static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
-  InternalCodeAllocator allocator;
-
-  CompilerOptions compiler_options;
-  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
-      X86InstructionSetFeatures::FromCppDefines());
-  TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
-  // We avoid doing a stack overflow check that requires the runtime being setup,
-  // by making sure the compiler knows the methods we are running are leaf methods.
-  codegenX86.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kX86) {
-    Run(allocator, codegenX86, has_result, expected);
+static void RunCode(CodeGenerator* codegen,
+                    HGraph* graph,
+                    std::function<void(HGraph*)> hook_before_codegen,
+                    bool has_result,
+                    Expected expected) {
+  GraphChecker graph_checker(graph);
+  graph_checker.Run();
+  if (!graph_checker.IsValid()) {
+    for (auto error : graph_checker.GetErrors()) {
+      std::cout << error << std::endl;
+    }
   }
+  ASSERT_TRUE(graph_checker.IsValid());
 
-  std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
-      ArmInstructionSetFeatures::FromCppDefines());
-  TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
-  codegenARM.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
-    Run(allocator, codegenARM, has_result, expected);
-  }
-
-  std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
-      X86_64InstructionSetFeatures::FromCppDefines());
-  x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
-  codegenX86_64.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kX86_64) {
-    Run(allocator, codegenX86_64, has_result, expected);
-  }
-
-  std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
-      Arm64InstructionSetFeatures::FromCppDefines());
-  arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
-  codegenARM64.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kArm64) {
-    Run(allocator, codegenARM64, has_result, expected);
-  }
-
-  std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
-      MipsInstructionSetFeatures::FromCppDefines());
-  mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
-  codegenMIPS.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kMips) {
-    Run(allocator, codegenMIPS, has_result, expected);
-  }
-
-  std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
-      Mips64InstructionSetFeatures::FromCppDefines());
-  mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
-  codegenMIPS64.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kMips64) {
-    Run(allocator, codegenMIPS64, has_result, expected);
-  }
-}
-
-template <typename Expected>
-static void RunCodeOptimized(CodeGenerator* codegen,
-                             HGraph* graph,
-                             std::function<void(HGraph*)> hook_before_codegen,
-                             bool has_result,
-                             Expected expected) {
-  // Tests may have already computed it.
-  if (graph->GetReversePostOrder().empty()) {
-    graph->BuildDominatorTree();
-  }
   SsaLivenessAnalysis liveness(graph, codegen);
-  liveness.Analyze();
 
-  RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness);
-  register_allocator.AllocateRegisters();
+  PrepareForRegisterAllocation(graph).Run();
+  liveness.Analyze();
+  RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
   hook_before_codegen(graph);
 
   InternalCodeAllocator allocator;
-  codegen->CompileOptimized(&allocator);
+  codegen->Compile(&allocator);
   Run(allocator, *codegen, has_result, expected);
 }
 
 template <typename Expected>
-static void RunCodeOptimized(HGraph* graph,
-                             std::function<void(HGraph*)> hook_before_codegen,
-                             bool has_result,
-                             Expected expected) {
+static void RunCode(InstructionSet target_isa,
+                    HGraph* graph,
+                    std::function<void(HGraph*)> hook_before_codegen,
+                    bool has_result,
+                    Expected expected) {
   CompilerOptions compiler_options;
-  if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
-    TestCodeGeneratorARM codegenARM(graph,
-                                    *ArmInstructionSetFeatures::FromCppDefines(),
-                                    compiler_options);
-    RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
-  } else if (kRuntimeISA == kArm64) {
-    arm64::CodeGeneratorARM64 codegenARM64(graph,
-                                           *Arm64InstructionSetFeatures::FromCppDefines(),
-                                           compiler_options);
-    RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
-  } else if (kRuntimeISA == kX86) {
+#ifdef ART_ENABLE_CODEGEN_arm
+  if (target_isa == kArm || target_isa == kThumb2) {
+    std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
+        ArmInstructionSetFeatures::FromCppDefines());
+    TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
+    RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected);
+  }
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+  if (target_isa == kArm64) {
+    std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
+        Arm64InstructionSetFeatures::FromCppDefines());
+    arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
+    RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected);
+  }
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+  if (target_isa == kX86) {
     std::unique_ptr<const X86InstructionSetFeatures> features_x86(
         X86InstructionSetFeatures::FromCppDefines());
-    x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
-    RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
-  } else if (kRuntimeISA == kX86_64) {
+    TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
+    RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected);
+  }
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+  if (target_isa == kX86_64) {
     std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
         X86_64InstructionSetFeatures::FromCppDefines());
     x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
-    RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
-  } else if (kRuntimeISA == kMips) {
+    RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
+  }
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+  if (target_isa == kMips) {
     std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
         MipsInstructionSetFeatures::FromCppDefines());
     mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
-    RunCodeOptimized(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
-  } else if (kRuntimeISA == kMips64) {
+    RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
+  }
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+  if (target_isa == kMips64) {
     std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
         Mips64InstructionSetFeatures::FromCppDefines());
     mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
-    RunCodeOptimized(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
+  }
+#endif
+}
+
+static ::std::vector<InstructionSet> GetTargetISAs() {
+  ::std::vector<InstructionSet> v;
+  // Add all ISAs that are executable on hardware or on simulator.
+  const ::std::vector<InstructionSet> executable_isa_candidates = {
+    kArm,
+    kArm64,
+    kThumb2,
+    kX86,
+    kX86_64,
+    kMips,
+    kMips64
+  };
+
+  for (auto target_isa : executable_isa_candidates) {
+    if (CanExecute(target_isa)) {
+      v.push_back(target_isa);
+    }
+  }
+
+  return v;
+}
+
+static void TestCode(const uint16_t* data,
+                     bool has_result = false,
+                     int32_t expected = 0) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    HGraph* graph = CreateCFG(&arena, data);
+    // Remove suspend checks, they cannot be executed in this context.
+    RemoveSuspendChecks(graph);
+    RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
   }
 }
 
-static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) {
-  ArenaPool pool;
-  ArenaAllocator arena(&pool);
-  HGraph* graph = CreateGraph(&arena);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-  // Remove suspend checks, they cannot be executed in this context.
-  RemoveSuspendChecks(graph);
-  RunCodeBaseline(graph, has_result, expected);
+static void TestCodeLong(const uint16_t* data,
+                         bool has_result,
+                         int64_t expected) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    HGraph* graph = CreateCFG(&arena, data, Primitive::kPrimLong);
+    // Remove suspend checks, they cannot be executed in this context.
+    RemoveSuspendChecks(graph);
+    RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
+  }
 }
 
-static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) {
-  ArenaPool pool;
-  ArenaAllocator arena(&pool);
-  HGraph* graph = CreateGraph(&arena);
-  HGraphBuilder builder(graph, Primitive::kPrimLong);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-  // Remove suspend checks, they cannot be executed in this context.
-  RemoveSuspendChecks(graph);
-  RunCodeBaseline(graph, has_result, expected);
-}
+class CodegenTest : public CommonCompilerTest {};
 
-TEST(CodegenTest, ReturnVoid) {
+TEST_F(CodegenTest, ReturnVoid) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID);
   TestCode(data);
 }
 
-TEST(CodegenTest, CFG1) {
+TEST_F(CodegenTest, CFG1) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
@@ -298,7 +369,7 @@
   TestCode(data);
 }
 
-TEST(CodegenTest, CFG2) {
+TEST_F(CodegenTest, CFG2) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
     Instruction::GOTO | 0x100,
@@ -307,7 +378,7 @@
   TestCode(data);
 }
 
-TEST(CodegenTest, CFG3) {
+TEST_F(CodegenTest, CFG3) {
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x200,
     Instruction::RETURN_VOID,
@@ -330,7 +401,7 @@
   TestCode(data3);
 }
 
-TEST(CodegenTest, CFG4) {
+TEST_F(CodegenTest, CFG4) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::RETURN_VOID,
     Instruction::GOTO | 0x100,
@@ -339,7 +410,7 @@
   TestCode(data);
 }
 
-TEST(CodegenTest, CFG5) {
+TEST_F(CodegenTest, CFG5) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
@@ -349,7 +420,7 @@
   TestCode(data);
 }
 
-TEST(CodegenTest, IntConstant) {
+TEST_F(CodegenTest, IntConstant) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN_VOID);
@@ -357,7 +428,7 @@
   TestCode(data);
 }
 
-TEST(CodegenTest, Return1) {
+TEST_F(CodegenTest, Return1) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN | 0);
@@ -365,7 +436,7 @@
   TestCode(data, true, 0);
 }
 
-TEST(CodegenTest, Return2) {
+TEST_F(CodegenTest, Return2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 0 | 1 << 8,
@@ -374,7 +445,7 @@
   TestCode(data, true, 0);
 }
 
-TEST(CodegenTest, Return3) {
+TEST_F(CodegenTest, Return3) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -383,7 +454,7 @@
   TestCode(data, true, 1);
 }
 
-TEST(CodegenTest, ReturnIf1) {
+TEST_F(CodegenTest, ReturnIf1) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -394,7 +465,7 @@
   TestCode(data, true, 1);
 }
 
-TEST(CodegenTest, ReturnIf2) {
+TEST_F(CodegenTest, ReturnIf2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -407,7 +478,7 @@
 
 // Exercise bit-wise (one's complement) not-int instruction.
 #define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \
-TEST(CodegenTest, TEST_NAME) {                          \
+TEST_F(CodegenTest, TEST_NAME) {                        \
   const int32_t input = INPUT;                          \
   const uint16_t input_lo = Low16Bits(input);           \
   const uint16_t input_hi = High16Bits(input);          \
@@ -432,7 +503,7 @@
 
 // Exercise bit-wise (one's complement) not-long instruction.
 #define NOT_LONG_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT)                 \
-TEST(CodegenTest, TEST_NAME) {                                           \
+TEST_F(CodegenTest, TEST_NAME) {                                         \
   const int64_t input = INPUT;                                           \
   const uint16_t word0 = Low16Bits(Low32Bits(input));   /* LSW. */       \
   const uint16_t word1 = High16Bits(Low32Bits(input));                   \
@@ -482,7 +553,7 @@
 
 #undef NOT_LONG_TEST
 
-TEST(CodegenTest, IntToLongOfLongToInt) {
+TEST_F(CodegenTest, IntToLongOfLongToInt) {
   const int64_t input = INT64_C(4294967296);             // 2^32
   const uint16_t word0 = Low16Bits(Low32Bits(input));    // LSW.
   const uint16_t word1 = High16Bits(Low32Bits(input));
@@ -499,7 +570,7 @@
   TestCodeLong(data, true, 1);
 }
 
-TEST(CodegenTest, ReturnAdd1) {
+TEST_F(CodegenTest, ReturnAdd1) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
@@ -509,7 +580,7 @@
   TestCode(data, true, 7);
 }
 
-TEST(CodegenTest, ReturnAdd2) {
+TEST_F(CodegenTest, ReturnAdd2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
@@ -519,7 +590,7 @@
   TestCode(data, true, 7);
 }
 
-TEST(CodegenTest, ReturnAdd3) {
+TEST_F(CodegenTest, ReturnAdd3) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::ADD_INT_LIT8, 3 << 8 | 0,
@@ -528,7 +599,7 @@
   TestCode(data, true, 7);
 }
 
-TEST(CodegenTest, ReturnAdd4) {
+TEST_F(CodegenTest, ReturnAdd4) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::ADD_INT_LIT16, 3,
@@ -537,56 +608,7 @@
   TestCode(data, true, 7);
 }
 
-TEST(CodegenTest, NonMaterializedCondition) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-
-  HGraph* graph = CreateGraph(&allocator);
-  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(entry);
-  graph->SetEntryBlock(entry);
-  entry->AddInstruction(new (&allocator) HGoto());
-
-  HBasicBlock* first_block = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(first_block);
-  entry->AddSuccessor(first_block);
-  HIntConstant* constant0 = graph->GetIntConstant(0);
-  HIntConstant* constant1 = graph->GetIntConstant(1);
-  HEqual* equal = new (&allocator) HEqual(constant0, constant0);
-  first_block->AddInstruction(equal);
-  first_block->AddInstruction(new (&allocator) HIf(equal));
-
-  HBasicBlock* then = new (&allocator) HBasicBlock(graph);
-  HBasicBlock* else_ = new (&allocator) HBasicBlock(graph);
-  HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
-
-  graph->AddBlock(then);
-  graph->AddBlock(else_);
-  graph->AddBlock(exit);
-  first_block->AddSuccessor(then);
-  first_block->AddSuccessor(else_);
-  then->AddSuccessor(exit);
-  else_->AddSuccessor(exit);
-
-  exit->AddInstruction(new (&allocator) HExit());
-  then->AddInstruction(new (&allocator) HReturn(constant0));
-  else_->AddInstruction(new (&allocator) HReturn(constant1));
-
-  ASSERT_TRUE(equal->NeedsMaterialization());
-  graph->BuildDominatorTree();
-  PrepareForRegisterAllocation(graph).Run();
-  ASSERT_FALSE(equal->NeedsMaterialization());
-
-  auto hook_before_codegen = [](HGraph* graph_in) {
-    HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
-    HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
-    block->InsertInstructionBefore(move, block->GetLastInstruction());
-  };
-
-  RunCodeOptimized(graph, hook_before_codegen, true, 0);
-}
-
-TEST(CodegenTest, ReturnMulInt) {
+TEST_F(CodegenTest, ReturnMulInt) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
@@ -596,7 +618,7 @@
   TestCode(data, true, 12);
 }
 
-TEST(CodegenTest, ReturnMulInt2addr) {
+TEST_F(CodegenTest, ReturnMulInt2addr) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
@@ -606,31 +628,27 @@
   TestCode(data, true, 12);
 }
 
-TEST(CodegenTest, ReturnMulLong) {
+TEST_F(CodegenTest, ReturnMulLong) {
   const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
-    Instruction::CONST_4 | 3 << 12 | 0,
-    Instruction::CONST_4 | 0 << 12 | 1 << 8,
-    Instruction::CONST_4 | 4 << 12 | 2 << 8,
-    Instruction::CONST_4 | 0 << 12 | 3 << 8,
+    Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0,
+    Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0,
     Instruction::MUL_LONG, 2 << 8 | 0,
     Instruction::RETURN_WIDE);
 
   TestCodeLong(data, true, 12);
 }
 
-TEST(CodegenTest, ReturnMulLong2addr) {
+TEST_F(CodegenTest, ReturnMulLong2addr) {
   const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
-    Instruction::CONST_4 | 3 << 12 | 0 << 8,
-    Instruction::CONST_4 | 0 << 12 | 1 << 8,
-    Instruction::CONST_4 | 4 << 12 | 2 << 8,
-    Instruction::CONST_4 | 0 << 12 | 3 << 8,
+    Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0,
+    Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0,
     Instruction::MUL_LONG_2ADDR | 2 << 12,
     Instruction::RETURN_WIDE);
 
   TestCodeLong(data, true, 12);
 }
 
-TEST(CodegenTest, ReturnMulIntLit8) {
+TEST_F(CodegenTest, ReturnMulIntLit8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::MUL_INT_LIT8, 3 << 8 | 0,
@@ -639,7 +657,7 @@
   TestCode(data, true, 12);
 }
 
-TEST(CodegenTest, ReturnMulIntLit16) {
+TEST_F(CodegenTest, ReturnMulIntLit16) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::MUL_INT_LIT16, 3,
@@ -648,40 +666,48 @@
   TestCode(data, true, 12);
 }
 
-TEST(CodegenTest, MaterializedCondition1) {
-  // Check that condition are materialized correctly. A materialized condition
-  // should yield `1` if it evaluated to true, and `0` otherwise.
-  // We force the materialization of comparisons for different combinations of
-  // inputs and check the results.
-
-  int lhs[] = {1, 2, -1, 2, 0xabc};
-  int rhs[] = {2, 1, 2, -1, 0xabc};
-
-  for (size_t i = 0; i < arraysize(lhs); i++) {
+TEST_F(CodegenTest, NonMaterializedCondition) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
     ArenaPool pool;
     ArenaAllocator allocator(&pool);
+
     HGraph* graph = CreateGraph(&allocator);
 
-    HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(entry_block);
-    graph->SetEntryBlock(entry_block);
-    entry_block->AddInstruction(new (&allocator) HGoto());
-    HBasicBlock* code_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(code_block);
-    HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(exit_block);
-    exit_block->AddInstruction(new (&allocator) HExit());
+    HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(entry);
+    graph->SetEntryBlock(entry);
+    entry->AddInstruction(new (&allocator) HGoto());
 
-    entry_block->AddSuccessor(code_block);
-    code_block->AddSuccessor(exit_block);
+    HBasicBlock* first_block = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(first_block);
+    entry->AddSuccessor(first_block);
+    HIntConstant* constant0 = graph->GetIntConstant(0);
+    HIntConstant* constant1 = graph->GetIntConstant(1);
+    HEqual* equal = new (&allocator) HEqual(constant0, constant0);
+    first_block->AddInstruction(equal);
+    first_block->AddInstruction(new (&allocator) HIf(equal));
+
+    HBasicBlock* then_block = new (&allocator) HBasicBlock(graph);
+    HBasicBlock* else_block = new (&allocator) HBasicBlock(graph);
+    HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
     graph->SetExitBlock(exit_block);
 
-    HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
-    HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
-    HLessThan cmp_lt(cst_lhs, cst_rhs);
-    code_block->AddInstruction(&cmp_lt);
-    HReturn ret(&cmp_lt);
-    code_block->AddInstruction(&ret);
+    graph->AddBlock(then_block);
+    graph->AddBlock(else_block);
+    graph->AddBlock(exit_block);
+    first_block->AddSuccessor(then_block);
+    first_block->AddSuccessor(else_block);
+    then_block->AddSuccessor(exit_block);
+    else_block->AddSuccessor(exit_block);
+
+    exit_block->AddInstruction(new (&allocator) HExit());
+    then_block->AddInstruction(new (&allocator) HReturn(constant0));
+    else_block->AddInstruction(new (&allocator) HReturn(constant1));
+
+    ASSERT_FALSE(equal->IsEmittedAtUseSite());
+    graph->BuildDominatorTree();
+    PrepareForRegisterAllocation(graph).Run();
+    ASSERT_TRUE(equal->IsEmittedAtUseSite());
 
     auto hook_before_codegen = [](HGraph* graph_in) {
       HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
@@ -689,77 +715,127 @@
       block->InsertInstructionBefore(move, block->GetLastInstruction());
     };
 
-    RunCodeOptimized(graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    RunCode(target_isa, graph, hook_before_codegen, true, 0);
   }
 }
 
-TEST(CodegenTest, MaterializedCondition2) {
-  // Check that HIf correctly interprets a materialized condition.
-  // We force the materialization of comparisons for different combinations of
-  // inputs. An HIf takes the materialized combination as input and returns a
-  // value that we verify.
+TEST_F(CodegenTest, MaterializedCondition1) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    // Check that condition are materialized correctly. A materialized condition
+    // should yield `1` if it evaluated to true, and `0` otherwise.
+    // We force the materialization of comparisons for different combinations of
 
-  int lhs[] = {1, 2, -1, 2, 0xabc};
-  int rhs[] = {2, 1, 2, -1, 0xabc};
+    // inputs and check the results.
 
+    int lhs[] = {1, 2, -1, 2, 0xabc};
+    int rhs[] = {2, 1, 2, -1, 0xabc};
 
-  for (size_t i = 0; i < arraysize(lhs); i++) {
-    ArenaPool pool;
-    ArenaAllocator allocator(&pool);
-    HGraph* graph = CreateGraph(&allocator);
+    for (size_t i = 0; i < arraysize(lhs); i++) {
+      ArenaPool pool;
+      ArenaAllocator allocator(&pool);
+      HGraph* graph = CreateGraph(&allocator);
 
-    HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(entry_block);
-    graph->SetEntryBlock(entry_block);
-    entry_block->AddInstruction(new (&allocator) HGoto());
+      HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(entry_block);
+      graph->SetEntryBlock(entry_block);
+      entry_block->AddInstruction(new (&allocator) HGoto());
+      HBasicBlock* code_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(code_block);
+      HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(exit_block);
+      exit_block->AddInstruction(new (&allocator) HExit());
 
-    HBasicBlock* if_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(if_block);
-    HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(if_true_block);
-    HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(if_false_block);
-    HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(exit_block);
-    exit_block->AddInstruction(new (&allocator) HExit());
+      entry_block->AddSuccessor(code_block);
+      code_block->AddSuccessor(exit_block);
+      graph->SetExitBlock(exit_block);
 
-    graph->SetEntryBlock(entry_block);
-    entry_block->AddSuccessor(if_block);
-    if_block->AddSuccessor(if_true_block);
-    if_block->AddSuccessor(if_false_block);
-    if_true_block->AddSuccessor(exit_block);
-    if_false_block->AddSuccessor(exit_block);
-    graph->SetExitBlock(exit_block);
+      HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
+      HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
+      HLessThan cmp_lt(cst_lhs, cst_rhs);
+      code_block->AddInstruction(&cmp_lt);
+      HReturn ret(&cmp_lt);
+      code_block->AddInstruction(&ret);
 
-    HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
-    HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
-    HLessThan cmp_lt(cst_lhs, cst_rhs);
-    if_block->AddInstruction(&cmp_lt);
-    // We insert a temporary to separate the HIf from the HLessThan and force
-    // the materialization of the condition.
-    HTemporary force_materialization(0);
-    if_block->AddInstruction(&force_materialization);
-    HIf if_lt(&cmp_lt);
-    if_block->AddInstruction(&if_lt);
-
-    HIntConstant* cst_lt = graph->GetIntConstant(1);
-    HReturn ret_lt(cst_lt);
-    if_true_block->AddInstruction(&ret_lt);
-    HIntConstant* cst_ge = graph->GetIntConstant(0);
-    HReturn ret_ge(cst_ge);
-    if_false_block->AddInstruction(&ret_ge);
-
-    auto hook_before_codegen = [](HGraph* graph_in) {
-      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
-      HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
-      block->InsertInstructionBefore(move, block->GetLastInstruction());
-    };
-
-    RunCodeOptimized(graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+      graph->BuildDominatorTree();
+      auto hook_before_codegen = [](HGraph* graph_in) {
+        HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
+        HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
+        block->InsertInstructionBefore(move, block->GetLastInstruction());
+      };
+      RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    }
   }
 }
 
-TEST(CodegenTest, ReturnDivIntLit8) {
+TEST_F(CodegenTest, MaterializedCondition2) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    // Check that HIf correctly interprets a materialized condition.
+    // We force the materialization of comparisons for different combinations of
+    // inputs. An HIf takes the materialized combination as input and returns a
+    // value that we verify.
+
+    int lhs[] = {1, 2, -1, 2, 0xabc};
+    int rhs[] = {2, 1, 2, -1, 0xabc};
+
+
+    for (size_t i = 0; i < arraysize(lhs); i++) {
+      ArenaPool pool;
+      ArenaAllocator allocator(&pool);
+      HGraph* graph = CreateGraph(&allocator);
+
+      HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(entry_block);
+      graph->SetEntryBlock(entry_block);
+      entry_block->AddInstruction(new (&allocator) HGoto());
+
+      HBasicBlock* if_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(if_block);
+      HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(if_true_block);
+      HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(if_false_block);
+      HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(exit_block);
+      exit_block->AddInstruction(new (&allocator) HExit());
+
+      graph->SetEntryBlock(entry_block);
+      entry_block->AddSuccessor(if_block);
+      if_block->AddSuccessor(if_true_block);
+      if_block->AddSuccessor(if_false_block);
+      if_true_block->AddSuccessor(exit_block);
+      if_false_block->AddSuccessor(exit_block);
+      graph->SetExitBlock(exit_block);
+
+      HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
+      HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
+      HLessThan cmp_lt(cst_lhs, cst_rhs);
+      if_block->AddInstruction(&cmp_lt);
+      // We insert a dummy instruction to separate the HIf from the HLessThan
+      // and force the materialization of the condition.
+      HMemoryBarrier force_materialization(MemBarrierKind::kAnyAny, 0);
+      if_block->AddInstruction(&force_materialization);
+      HIf if_lt(&cmp_lt);
+      if_block->AddInstruction(&if_lt);
+
+      HIntConstant* cst_lt = graph->GetIntConstant(1);
+      HReturn ret_lt(cst_lt);
+      if_true_block->AddInstruction(&ret_lt);
+      HIntConstant* cst_ge = graph->GetIntConstant(0);
+      HReturn ret_ge(cst_ge);
+      if_false_block->AddInstruction(&ret_ge);
+
+      graph->BuildDominatorTree();
+      auto hook_before_codegen = [](HGraph* graph_in) {
+        HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
+        HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
+        block->InsertInstructionBefore(move, block->GetLastInstruction());
+      };
+      RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    }
+  }
+}
+
+TEST_F(CodegenTest, ReturnDivIntLit8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::DIV_INT_LIT8, 3 << 8 | 0,
@@ -768,7 +844,7 @@
   TestCode(data, true, 1);
 }
 
-TEST(CodegenTest, ReturnDivInt2Addr) {
+TEST_F(CodegenTest, ReturnDivInt2Addr) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0,
     Instruction::CONST_4 | 2 << 12 | 1 << 8,
@@ -779,7 +855,11 @@
 }
 
 // Helper method.
-static void TestComparison(IfCondition condition, int64_t i, int64_t j, Primitive::Type type) {
+static void TestComparison(IfCondition condition,
+                           int64_t i,
+                           int64_t j,
+                           Primitive::Type type,
+                           const InstructionSet target_isa) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = CreateGraph(&allocator);
@@ -860,46 +940,53 @@
   block->AddInstruction(comparison);
   block->AddInstruction(new (&allocator) HReturn(comparison));
 
-  auto hook_before_codegen = [](HGraph*) {
-  };
-  RunCodeOptimized(graph, hook_before_codegen, true, expected_result);
+  graph->BuildDominatorTree();
+  RunCode(target_isa, graph, [](HGraph*) {}, true, expected_result);
 }
 
-TEST(CodegenTest, ComparisonsInt) {
-  for (int64_t i = -1; i <= 1; i++) {
-    for (int64_t j = -1; j <= 1; j++) {
-      TestComparison(kCondEQ, i, j, Primitive::kPrimInt);
-      TestComparison(kCondNE, i, j, Primitive::kPrimInt);
-      TestComparison(kCondLT, i, j, Primitive::kPrimInt);
-      TestComparison(kCondLE, i, j, Primitive::kPrimInt);
-      TestComparison(kCondGT, i, j, Primitive::kPrimInt);
-      TestComparison(kCondGE, i, j, Primitive::kPrimInt);
-      TestComparison(kCondB,  i, j, Primitive::kPrimInt);
-      TestComparison(kCondBE, i, j, Primitive::kPrimInt);
-      TestComparison(kCondA,  i, j, Primitive::kPrimInt);
-      TestComparison(kCondAE, i, j, Primitive::kPrimInt);
+TEST_F(CodegenTest, ComparisonsInt) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    for (int64_t i = -1; i <= 1; i++) {
+      for (int64_t j = -1; j <= 1; j++) {
+        TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondB,  i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondA,  i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa);
+      }
     }
   }
 }
 
-TEST(CodegenTest, ComparisonsLong) {
+TEST_F(CodegenTest, ComparisonsLong) {
   // TODO: make MIPS work for long
   if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
     return;
   }
 
-  for (int64_t i = -1; i <= 1; i++) {
-    for (int64_t j = -1; j <= 1; j++) {
-      TestComparison(kCondEQ, i, j, Primitive::kPrimLong);
-      TestComparison(kCondNE, i, j, Primitive::kPrimLong);
-      TestComparison(kCondLT, i, j, Primitive::kPrimLong);
-      TestComparison(kCondLE, i, j, Primitive::kPrimLong);
-      TestComparison(kCondGT, i, j, Primitive::kPrimLong);
-      TestComparison(kCondGE, i, j, Primitive::kPrimLong);
-      TestComparison(kCondB,  i, j, Primitive::kPrimLong);
-      TestComparison(kCondBE, i, j, Primitive::kPrimLong);
-      TestComparison(kCondA,  i, j, Primitive::kPrimLong);
-      TestComparison(kCondAE, i, j, Primitive::kPrimLong);
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    if (target_isa == kMips || target_isa == kMips64) {
+      continue;
+    }
+
+    for (int64_t i = -1; i <= 1; i++) {
+      for (int64_t j = -1; j <= 1; j++) {
+        TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondB,  i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondA,  i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa);
+      }
     }
   }
 }
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index e1a8c9c..cc949c5 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -17,11 +17,18 @@
 #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_
 #define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_
 
+#include "code_generator.h"
 #include "locations.h"
 #include "nodes.h"
 #include "utils/arm64/assembler_arm64.h"
-#include "vixl/a64/disasm-a64.h"
-#include "vixl/a64/macro-assembler-a64.h"
+
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#include "aarch64/simulator-aarch64.h"
+#pragma GCC diagnostic pop
 
 namespace art {
 namespace arm64 {
@@ -33,87 +40,88 @@
 
 static inline int VIXLRegCodeFromART(int code) {
   if (code == SP) {
-    return vixl::kSPRegInternalCode;
+    return vixl::aarch64::kSPRegInternalCode;
   }
   if (code == XZR) {
-    return vixl::kZeroRegCode;
+    return vixl::aarch64::kZeroRegCode;
   }
   return code;
 }
 
 static inline int ARTRegCodeFromVIXL(int code) {
-  if (code == vixl::kSPRegInternalCode) {
+  if (code == vixl::aarch64::kSPRegInternalCode) {
     return SP;
   }
-  if (code == vixl::kZeroRegCode) {
+  if (code == vixl::aarch64::kZeroRegCode) {
     return XZR;
   }
   return code;
 }
 
-static inline vixl::Register XRegisterFrom(Location location) {
-  DCHECK(location.IsRegister());
-  return vixl::Register::XRegFromCode(VIXLRegCodeFromART(location.reg()));
+static inline vixl::aarch64::Register XRegisterFrom(Location location) {
+  DCHECK(location.IsRegister()) << location;
+  return vixl::aarch64::Register::GetXRegFromCode(VIXLRegCodeFromART(location.reg()));
 }
 
-static inline vixl::Register WRegisterFrom(Location location) {
-  DCHECK(location.IsRegister());
-  return vixl::Register::WRegFromCode(VIXLRegCodeFromART(location.reg()));
+static inline vixl::aarch64::Register WRegisterFrom(Location location) {
+  DCHECK(location.IsRegister()) << location;
+  return vixl::aarch64::Register::GetWRegFromCode(VIXLRegCodeFromART(location.reg()));
 }
 
-static inline vixl::Register RegisterFrom(Location location, Primitive::Type type) {
-  DCHECK(type != Primitive::kPrimVoid && !Primitive::IsFloatingPointType(type));
+static inline vixl::aarch64::Register RegisterFrom(Location location, Primitive::Type type) {
+  DCHECK(type != Primitive::kPrimVoid && !Primitive::IsFloatingPointType(type)) << type;
   return type == Primitive::kPrimLong ? XRegisterFrom(location) : WRegisterFrom(location);
 }
 
-static inline vixl::Register OutputRegister(HInstruction* instr) {
+static inline vixl::aarch64::Register OutputRegister(HInstruction* instr) {
   return RegisterFrom(instr->GetLocations()->Out(), instr->GetType());
 }
 
-static inline vixl::Register InputRegisterAt(HInstruction* instr, int input_index) {
+static inline vixl::aarch64::Register InputRegisterAt(HInstruction* instr, int input_index) {
   return RegisterFrom(instr->GetLocations()->InAt(input_index),
                       instr->InputAt(input_index)->GetType());
 }
 
-static inline vixl::FPRegister DRegisterFrom(Location location) {
-  DCHECK(location.IsFpuRegister());
-  return vixl::FPRegister::DRegFromCode(location.reg());
+static inline vixl::aarch64::FPRegister DRegisterFrom(Location location) {
+  DCHECK(location.IsFpuRegister()) << location;
+  return vixl::aarch64::FPRegister::GetDRegFromCode(location.reg());
 }
 
-static inline vixl::FPRegister SRegisterFrom(Location location) {
-  DCHECK(location.IsFpuRegister());
-  return vixl::FPRegister::SRegFromCode(location.reg());
+static inline vixl::aarch64::FPRegister SRegisterFrom(Location location) {
+  DCHECK(location.IsFpuRegister()) << location;
+  return vixl::aarch64::FPRegister::GetSRegFromCode(location.reg());
 }
 
-static inline vixl::FPRegister FPRegisterFrom(Location location, Primitive::Type type) {
-  DCHECK(Primitive::IsFloatingPointType(type));
+static inline vixl::aarch64::FPRegister FPRegisterFrom(Location location, Primitive::Type type) {
+  DCHECK(Primitive::IsFloatingPointType(type)) << type;
   return type == Primitive::kPrimDouble ? DRegisterFrom(location) : SRegisterFrom(location);
 }
 
-static inline vixl::FPRegister OutputFPRegister(HInstruction* instr) {
+static inline vixl::aarch64::FPRegister OutputFPRegister(HInstruction* instr) {
   return FPRegisterFrom(instr->GetLocations()->Out(), instr->GetType());
 }
 
-static inline vixl::FPRegister InputFPRegisterAt(HInstruction* instr, int input_index) {
+static inline vixl::aarch64::FPRegister InputFPRegisterAt(HInstruction* instr, int input_index) {
   return FPRegisterFrom(instr->GetLocations()->InAt(input_index),
                         instr->InputAt(input_index)->GetType());
 }
 
-static inline vixl::CPURegister CPURegisterFrom(Location location, Primitive::Type type) {
-  return Primitive::IsFloatingPointType(type) ? vixl::CPURegister(FPRegisterFrom(location, type))
-                                              : vixl::CPURegister(RegisterFrom(location, type));
+static inline vixl::aarch64::CPURegister CPURegisterFrom(Location location, Primitive::Type type) {
+  return Primitive::IsFloatingPointType(type)
+      ? vixl::aarch64::CPURegister(FPRegisterFrom(location, type))
+      : vixl::aarch64::CPURegister(RegisterFrom(location, type));
 }
 
-static inline vixl::CPURegister OutputCPURegister(HInstruction* instr) {
+static inline vixl::aarch64::CPURegister OutputCPURegister(HInstruction* instr) {
   return Primitive::IsFloatingPointType(instr->GetType())
-      ? static_cast<vixl::CPURegister>(OutputFPRegister(instr))
-      : static_cast<vixl::CPURegister>(OutputRegister(instr));
+      ? static_cast<vixl::aarch64::CPURegister>(OutputFPRegister(instr))
+      : static_cast<vixl::aarch64::CPURegister>(OutputRegister(instr));
 }
 
-static inline vixl::CPURegister InputCPURegisterAt(HInstruction* instr, int index) {
+static inline vixl::aarch64::CPURegister InputCPURegisterAt(HInstruction* instr, int index) {
   return Primitive::IsFloatingPointType(instr->InputAt(index)->GetType())
-      ? static_cast<vixl::CPURegister>(InputFPRegisterAt(instr, index))
-      : static_cast<vixl::CPURegister>(InputRegisterAt(instr, index));
+      ? static_cast<vixl::aarch64::CPURegister>(InputFPRegisterAt(instr, index))
+      : static_cast<vixl::aarch64::CPURegister>(InputRegisterAt(instr, index));
 }
 
 static inline int64_t Int64ConstantFrom(Location location) {
@@ -123,68 +131,75 @@
   } else if (instr->IsNullConstant()) {
     return 0;
   } else {
-    DCHECK(instr->IsLongConstant());
+    DCHECK(instr->IsLongConstant()) << instr->DebugName();
     return instr->AsLongConstant()->GetValue();
   }
 }
 
-static inline vixl::Operand OperandFrom(Location location, Primitive::Type type) {
+static inline vixl::aarch64::Operand OperandFrom(Location location, Primitive::Type type) {
   if (location.IsRegister()) {
-    return vixl::Operand(RegisterFrom(location, type));
+    return vixl::aarch64::Operand(RegisterFrom(location, type));
   } else {
-    return vixl::Operand(Int64ConstantFrom(location));
+    return vixl::aarch64::Operand(Int64ConstantFrom(location));
   }
 }
 
-static inline vixl::Operand InputOperandAt(HInstruction* instr, int input_index) {
+static inline vixl::aarch64::Operand InputOperandAt(HInstruction* instr, int input_index) {
   return OperandFrom(instr->GetLocations()->InAt(input_index),
                      instr->InputAt(input_index)->GetType());
 }
 
-static inline vixl::MemOperand StackOperandFrom(Location location) {
-  return vixl::MemOperand(vixl::sp, location.GetStackIndex());
+static inline vixl::aarch64::MemOperand StackOperandFrom(Location location) {
+  return vixl::aarch64::MemOperand(vixl::aarch64::sp, location.GetStackIndex());
 }
 
-static inline vixl::MemOperand HeapOperand(const vixl::Register& base, size_t offset = 0) {
+static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+                                                    size_t offset = 0) {
   // A heap reference must be 32bit, so fit in a W register.
   DCHECK(base.IsW());
-  return vixl::MemOperand(base.X(), offset);
+  return vixl::aarch64::MemOperand(base.X(), offset);
 }
 
-static inline vixl::MemOperand HeapOperand(const vixl::Register& base,
-                                           const vixl::Register& regoffset,
-                                           vixl::Shift shift = vixl::LSL,
-                                           unsigned shift_amount = 0) {
+static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+                                                    const vixl::aarch64::Register& regoffset,
+                                                    vixl::aarch64::Shift shift = vixl::aarch64::LSL,
+                                                    unsigned shift_amount = 0) {
   // A heap reference must be 32bit, so fit in a W register.
   DCHECK(base.IsW());
-  return vixl::MemOperand(base.X(), regoffset, shift, shift_amount);
+  return vixl::aarch64::MemOperand(base.X(), regoffset, shift, shift_amount);
 }
 
-static inline vixl::MemOperand HeapOperand(const vixl::Register& base, Offset offset) {
+static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+                                                    Offset offset) {
   return HeapOperand(base, offset.SizeValue());
 }
 
-static inline vixl::MemOperand HeapOperandFrom(Location location, Offset offset) {
+static inline vixl::aarch64::MemOperand HeapOperandFrom(Location location, Offset offset) {
   return HeapOperand(RegisterFrom(location, Primitive::kPrimNot), offset);
 }
 
-static inline Location LocationFrom(const vixl::Register& reg) {
-  return Location::RegisterLocation(ARTRegCodeFromVIXL(reg.code()));
+static inline Location LocationFrom(const vixl::aarch64::Register& reg) {
+  return Location::RegisterLocation(ARTRegCodeFromVIXL(reg.GetCode()));
 }
 
-static inline Location LocationFrom(const vixl::FPRegister& fpreg) {
-  return Location::FpuRegisterLocation(fpreg.code());
+static inline Location LocationFrom(const vixl::aarch64::FPRegister& fpreg) {
+  return Location::FpuRegisterLocation(fpreg.GetCode());
 }
 
-static inline vixl::Operand OperandFromMemOperand(const vixl::MemOperand& mem_op) {
+static inline vixl::aarch64::Operand OperandFromMemOperand(
+    const vixl::aarch64::MemOperand& mem_op) {
   if (mem_op.IsImmediateOffset()) {
-    return vixl::Operand(mem_op.offset());
+    return vixl::aarch64::Operand(mem_op.GetOffset());
   } else {
     DCHECK(mem_op.IsRegisterOffset());
-    if (mem_op.extend() != vixl::NO_EXTEND) {
-      return vixl::Operand(mem_op.regoffset(), mem_op.extend(), mem_op.shift_amount());
-    } else if (mem_op.shift() != vixl::NO_SHIFT) {
-      return vixl::Operand(mem_op.regoffset(), mem_op.shift(), mem_op.shift_amount());
+    if (mem_op.GetExtend() != vixl::aarch64::NO_EXTEND) {
+      return vixl::aarch64::Operand(mem_op.GetRegisterOffset(),
+                                    mem_op.GetExtend(),
+                                    mem_op.GetShiftAmount());
+    } else if (mem_op.GetShift() != vixl::aarch64::NO_SHIFT) {
+      return vixl::aarch64::Operand(mem_op.GetRegisterOffset(),
+                                    mem_op.GetShift(),
+                                    mem_op.GetShiftAmount());
     } else {
       LOG(FATAL) << "Should not reach here";
       UNREACHABLE();
@@ -193,11 +208,17 @@
 }
 
 static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
-  DCHECK(constant->IsIntConstant() || constant->IsLongConstant() || constant->IsNullConstant());
+  DCHECK(constant->IsIntConstant() || constant->IsLongConstant() || constant->IsNullConstant())
+      << constant->DebugName();
 
   // For single uses we let VIXL handle the constant generation since it will
   // use registers that are not managed by the register allocator (wip0, wip1).
-  if (constant->GetUses().HasOnlyOneUse()) {
+  if (constant->GetUses().HasExactlyOneElement()) {
+    return true;
+  }
+
+  // Our code generator ensures shift distances are within an encodable range.
+  if (instr->IsRor()) {
     return true;
   }
 
@@ -205,21 +226,23 @@
 
   if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
     // Uses logical operations.
-    return vixl::Assembler::IsImmLogical(value, vixl::kXRegSize);
+    return vixl::aarch64::Assembler::IsImmLogical(value, vixl::aarch64::kXRegSize);
   } else if (instr->IsNeg()) {
     // Uses mov -immediate.
-    return vixl::Assembler::IsImmMovn(value, vixl::kXRegSize);
+    return vixl::aarch64::Assembler::IsImmMovn(value, vixl::aarch64::kXRegSize);
   } else {
     DCHECK(instr->IsAdd() ||
-           instr->IsArm64IntermediateAddress() ||
+           instr->IsIntermediateAddress() ||
            instr->IsBoundsCheck() ||
            instr->IsCompare() ||
            instr->IsCondition() ||
-           instr->IsSub());
+           instr->IsSub())
+        << instr->DebugName();
     // Uses aliases of ADD/SUB instructions.
     // If `value` does not fit but `-value` does, VIXL will automatically use
     // the 'opposite' instruction.
-    return vixl::Assembler::IsImmAddSub(value) || vixl::Assembler::IsImmAddSub(-value);
+    return vixl::aarch64::Assembler::IsImmAddSub(value)
+        || vixl::aarch64::Assembler::IsImmAddSub(-value);
   }
 }
 
@@ -255,6 +278,67 @@
   return true;
 }
 
+static inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+  switch (op_kind) {
+    case HArm64DataProcWithShifterOp::kASR: return vixl::aarch64::ASR;
+    case HArm64DataProcWithShifterOp::kLSL: return vixl::aarch64::LSL;
+    case HArm64DataProcWithShifterOp::kLSR: return vixl::aarch64::LSR;
+    default:
+      LOG(FATAL) << "Unexpected op kind " << op_kind;
+      UNREACHABLE();
+      return vixl::aarch64::NO_SHIFT;
+  }
+}
+
+static inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+  switch (op_kind) {
+    case HArm64DataProcWithShifterOp::kUXTB: return vixl::aarch64::UXTB;
+    case HArm64DataProcWithShifterOp::kUXTH: return vixl::aarch64::UXTH;
+    case HArm64DataProcWithShifterOp::kUXTW: return vixl::aarch64::UXTW;
+    case HArm64DataProcWithShifterOp::kSXTB: return vixl::aarch64::SXTB;
+    case HArm64DataProcWithShifterOp::kSXTH: return vixl::aarch64::SXTH;
+    case HArm64DataProcWithShifterOp::kSXTW: return vixl::aarch64::SXTW;
+    default:
+      LOG(FATAL) << "Unexpected op kind " << op_kind;
+      UNREACHABLE();
+      return vixl::aarch64::NO_EXTEND;
+  }
+}
+
+static inline bool CanFitInShifterOperand(HInstruction* instruction) {
+  if (instruction->IsTypeConversion()) {
+    HTypeConversion* conversion = instruction->AsTypeConversion();
+    Primitive::Type result_type = conversion->GetResultType();
+    Primitive::Type input_type = conversion->GetInputType();
+    // We don't expect to see the same type as input and result.
+    return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) &&
+        (result_type != input_type);
+  } else {
+    return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) ||
+        (instruction->IsShr() && instruction->AsShr()->InputAt(1)->IsIntConstant()) ||
+        (instruction->IsUShr() && instruction->AsUShr()->InputAt(1)->IsIntConstant());
+  }
+}
+
+static inline bool HasShifterOperand(HInstruction* instr) {
+  // `neg` instructions are an alias of `sub` using the zero register as the
+  // first register input.
+  bool res = instr->IsAdd() || instr->IsAnd() || instr->IsNeg() ||
+      instr->IsOr() || instr->IsSub() || instr->IsXor();
+  return res;
+}
+
+static inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
+  DCHECK(HasShifterOperand(instruction));
+  // Although the `neg` instruction is an alias of the `sub` instruction, `HNeg`
+  // does *not* support extension. This is because the `extended register` form
+  // of the `sub` instruction interprets the left register with code 31 as the
+  // stack pointer and not the zero register. (So does the `immediate` form.) In
+  // the other form `shifted register, the register with code 31 is interpreted
+  // as the zero register.
+  return instruction->IsAdd() || instruction->IsSub();
+}
+
 }  // namespace helpers
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/common_dominator.h b/compiler/optimizing/common_dominator.h
new file mode 100644
index 0000000..b459d24
--- /dev/null
+++ b/compiler/optimizing/common_dominator.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_
+#define ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_
+
+#include "nodes.h"
+
+namespace art {
+
+// Helper class for finding common dominators of two or more blocks in a graph.
+// The domination information of a graph must not be modified while there is
+// a CommonDominator object as it's internal state could become invalid.
+class CommonDominator {
+ public:
+  // Convenience function to find the common dominator of 2 blocks.
+  static HBasicBlock* ForPair(HBasicBlock* block1, HBasicBlock* block2) {
+    CommonDominator finder(block1);
+    finder.Update(block2);
+    return finder.Get();
+  }
+
+  // Create a finder starting with a given block.
+  explicit CommonDominator(HBasicBlock* block)
+      : dominator_(block), chain_length_(ChainLength(block)) {
+    DCHECK(block != nullptr);
+  }
+
+  // Update the common dominator with another block.
+  void Update(HBasicBlock* block) {
+    DCHECK(block != nullptr);
+    HBasicBlock* block2 = dominator_;
+    DCHECK(block2 != nullptr);
+    if (block == block2) {
+      return;
+    }
+    size_t chain_length = ChainLength(block);
+    size_t chain_length2 = chain_length_;
+    // Equalize the chain lengths
+    for ( ; chain_length > chain_length2; --chain_length) {
+      block = block->GetDominator();
+      DCHECK(block != nullptr);
+    }
+    for ( ; chain_length2 > chain_length; --chain_length2) {
+      block2 = block2->GetDominator();
+      DCHECK(block2 != nullptr);
+    }
+    // Now run up the chain until we hit the common dominator.
+    while (block != block2) {
+      --chain_length;
+      block = block->GetDominator();
+      DCHECK(block != nullptr);
+      block2 = block2->GetDominator();
+      DCHECK(block2 != nullptr);
+    }
+    dominator_ = block;
+    chain_length_ = chain_length;
+  }
+
+  HBasicBlock* Get() const {
+    return dominator_;
+  }
+
+ private:
+  static size_t ChainLength(HBasicBlock* block) {
+    size_t result = 0;
+    while (block != nullptr) {
+      ++result;
+      block = block->GetDominator();
+    }
+    return result;
+  }
+
+  HBasicBlock* dominator_;
+  size_t chain_length_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_
diff --git a/compiler/optimizing/constant_area_fixups_x86.cc b/compiler/optimizing/constant_area_fixups_x86.cc
deleted file mode 100644
index c347000..0000000
--- a/compiler/optimizing/constant_area_fixups_x86.cc
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "constant_area_fixups_x86.h"
-
-namespace art {
-namespace x86 {
-
-/**
- * Finds instructions that need the constant area base as an input.
- */
-class ConstantHandlerVisitor : public HGraphVisitor {
- public:
-  explicit ConstantHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {}
-
- private:
-  void VisitAdd(HAdd* add) OVERRIDE {
-    BinaryFP(add);
-  }
-
-  void VisitSub(HSub* sub) OVERRIDE {
-    BinaryFP(sub);
-  }
-
-  void VisitMul(HMul* mul) OVERRIDE {
-    BinaryFP(mul);
-  }
-
-  void VisitDiv(HDiv* div) OVERRIDE {
-    BinaryFP(div);
-  }
-
-  void VisitReturn(HReturn* ret) OVERRIDE {
-    HConstant* value = ret->InputAt(0)->AsConstant();
-    if ((value != nullptr && Primitive::IsFloatingPointType(value->GetType()))) {
-      ReplaceInput(ret, value, 0, true);
-    }
-  }
-
-  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
-    HandleInvoke(invoke);
-  }
-
-  void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
-    HandleInvoke(invoke);
-  }
-
-  void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE {
-    HandleInvoke(invoke);
-  }
-
-  void BinaryFP(HBinaryOperation* bin) {
-    HConstant* rhs = bin->InputAt(1)->AsConstant();
-    if (rhs != nullptr && Primitive::IsFloatingPointType(bin->GetResultType())) {
-      ReplaceInput(bin, rhs, 1, false);
-    }
-  }
-
-  void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
-    // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
-    // address the constant area.
-    InitializeConstantAreaPointer(switch_insn);
-    HGraph* graph = GetGraph();
-    HBasicBlock* block = switch_insn->GetBlock();
-    HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch(
-        switch_insn->GetStartValue(),
-        switch_insn->GetNumEntries(),
-        switch_insn->InputAt(0),
-        base_,
-        switch_insn->GetDexPc());
-    block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch);
-  }
-
-  void InitializeConstantAreaPointer(HInstruction* user) {
-    // Ensure we only initialize the pointer once.
-    if (base_ != nullptr) {
-      return;
-    }
-
-    HGraph* graph = GetGraph();
-    HBasicBlock* entry = graph->GetEntryBlock();
-    base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress();
-    HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction();
-    entry->InsertInstructionBefore(base_, insert_pos);
-    DCHECK(base_ != nullptr);
-  }
-
-  void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) {
-    InitializeConstantAreaPointer(insn);
-    HGraph* graph = GetGraph();
-    HBasicBlock* block = insn->GetBlock();
-    HX86LoadFromConstantTable* load_constant =
-        new (graph->GetArena()) HX86LoadFromConstantTable(base_, value, materialize);
-    block->InsertInstructionBefore(load_constant, insn);
-    insn->ReplaceInput(load_constant, input_index);
-  }
-
-  void HandleInvoke(HInvoke* invoke) {
-    // Ensure that we can load FP arguments from the constant area.
-    for (size_t i = 0, e = invoke->InputCount(); i < e; i++) {
-      HConstant* input = invoke->InputAt(i)->AsConstant();
-      if (input != nullptr && Primitive::IsFloatingPointType(input->GetType())) {
-        ReplaceInput(invoke, input, i, true);
-      }
-    }
-  }
-
-  // The generated HX86ComputeBaseMethodAddress in the entry block needed as an
-  // input to the HX86LoadFromConstantTable instructions.
-  HX86ComputeBaseMethodAddress* base_;
-};
-
-void ConstantAreaFixups::Run() {
-  ConstantHandlerVisitor visitor(graph_);
-  visitor.VisitInsertionOrder();
-}
-
-}  // namespace x86
-}  // namespace art
diff --git a/compiler/optimizing/constant_area_fixups_x86.h b/compiler/optimizing/constant_area_fixups_x86.h
deleted file mode 100644
index 4138039..0000000
--- a/compiler/optimizing/constant_area_fixups_x86.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_CONSTANT_AREA_FIXUPS_X86_H_
-#define ART_COMPILER_OPTIMIZING_CONSTANT_AREA_FIXUPS_X86_H_
-
-#include "nodes.h"
-#include "optimization.h"
-
-namespace art {
-namespace x86 {
-
-class ConstantAreaFixups : public HOptimization {
- public:
-  ConstantAreaFixups(HGraph* graph, OptimizingCompilerStats* stats)
-      : HOptimization(graph, "constant_area_fixups_x86", stats) {}
-
-  void Run() OVERRIDE;
-};
-
-}  // namespace x86
-}  // namespace art
-
-#endif  // ART_COMPILER_OPTIMIZING_CONSTANT_AREA_FIXUPS_X86_H_
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index e0aa4ff..0614945 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -18,8 +18,28 @@
 
 namespace art {
 
-// This visitor tries to simplify operations that yield a constant. For example
-// `input * 0` is replaced by a null constant.
+// This visitor tries to simplify instructions that can be evaluated
+// as constants.
+class HConstantFoldingVisitor : public HGraphDelegateVisitor {
+ public:
+  explicit HConstantFoldingVisitor(HGraph* graph)
+      : HGraphDelegateVisitor(graph) {}
+
+ private:
+  void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
+
+  void VisitUnaryOperation(HUnaryOperation* inst) OVERRIDE;
+  void VisitBinaryOperation(HBinaryOperation* inst) OVERRIDE;
+
+  void VisitTypeConversion(HTypeConversion* inst) OVERRIDE;
+  void VisitDivZeroCheck(HDivZeroCheck* inst) OVERRIDE;
+
+  DISALLOW_COPY_AND_ASSIGN(HConstantFoldingVisitor);
+};
+
+// This visitor tries to simplify operations with an absorbing input,
+// yielding a constant. For example `input * 0` is replaced by a
+// null constant.
 class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor {
  public:
   explicit InstructionWithAbsorbingInputSimplifier(HGraph* graph) : HGraphVisitor(graph) {}
@@ -27,6 +47,11 @@
  private:
   void VisitShift(HBinaryOperation* shift);
 
+  void VisitAbove(HAbove* instruction) OVERRIDE;
+  void VisitAboveOrEqual(HAboveOrEqual* instruction) OVERRIDE;
+  void VisitBelow(HBelow* instruction) OVERRIDE;
+  void VisitBelowOrEqual(HBelowOrEqual* instruction) OVERRIDE;
+
   void VisitAnd(HAnd* instruction) OVERRIDE;
   void VisitCompare(HCompare* instruction) OVERRIDE;
   void VisitMul(HMul* instruction) OVERRIDE;
@@ -39,63 +64,73 @@
   void VisitXor(HXor* instruction) OVERRIDE;
 };
 
+
 void HConstantFolding::Run() {
-  InstructionWithAbsorbingInputSimplifier simplifier(graph_);
+  HConstantFoldingVisitor visitor(graph_);
   // Process basic blocks in reverse post-order in the dominator tree,
   // so that an instruction turned into a constant, used as input of
   // another instruction, may possibly be used to turn that second
   // instruction into a constant as well.
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-    // Traverse this block's instructions in (forward) order and
-    // replace the ones that can be statically evaluated by a
-    // compile-time counterpart.
-    for (HInstructionIterator inst_it(block->GetInstructions());
-         !inst_it.Done(); inst_it.Advance()) {
-      HInstruction* inst = inst_it.Current();
-      if (inst->IsBinaryOperation()) {
-        // Constant folding: replace `op(a, b)' with a constant at
-        // compile time if `a' and `b' are both constants.
-        HConstant* constant = inst->AsBinaryOperation()->TryStaticEvaluation();
-        if (constant != nullptr) {
-          inst->ReplaceWith(constant);
-          inst->GetBlock()->RemoveInstruction(inst);
-        } else {
-          inst->Accept(&simplifier);
-        }
-      } else if (inst->IsUnaryOperation()) {
-        // Constant folding: replace `op(a)' with a constant at compile
-        // time if `a' is a constant.
-        HConstant* constant = inst->AsUnaryOperation()->TryStaticEvaluation();
-        if (constant != nullptr) {
-          inst->ReplaceWith(constant);
-          inst->GetBlock()->RemoveInstruction(inst);
-        }
-      } else if (inst->IsTypeConversion()) {
-        // Constant folding: replace `TypeConversion(a)' with a constant at
-        // compile time if `a' is a constant.
-        HConstant* constant = inst->AsTypeConversion()->TryStaticEvaluation();
-        if (constant != nullptr) {
-          inst->ReplaceWith(constant);
-          inst->GetBlock()->RemoveInstruction(inst);
-        }
-      } else if (inst->IsDivZeroCheck()) {
-        // We can safely remove the check if the input is a non-null constant.
-        HDivZeroCheck* check = inst->AsDivZeroCheck();
-        HInstruction* check_input = check->InputAt(0);
-        if (check_input->IsConstant() && !check_input->AsConstant()->IsZero()) {
-          check->ReplaceWith(check_input);
-          check->GetBlock()->RemoveInstruction(check);
-        }
-      }
-    }
+  visitor.VisitReversePostOrder();
+}
+
+
+void HConstantFoldingVisitor::VisitBasicBlock(HBasicBlock* block) {
+  // Traverse this block's instructions (phis don't need to be
+  // processed) in (forward) order and replace the ones that can be
+  // statically evaluated by a compile-time counterpart.
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    it.Current()->Accept(this);
   }
 }
 
+void HConstantFoldingVisitor::VisitUnaryOperation(HUnaryOperation* inst) {
+  // Constant folding: replace `op(a)' with a constant at compile
+  // time if `a' is a constant.
+  HConstant* constant = inst->TryStaticEvaluation();
+  if (constant != nullptr) {
+    inst->ReplaceWith(constant);
+    inst->GetBlock()->RemoveInstruction(inst);
+  }
+}
+
+void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) {
+  // Constant folding: replace `op(a, b)' with a constant at
+  // compile time if `a' and `b' are both constants.
+  HConstant* constant = inst->TryStaticEvaluation();
+  if (constant != nullptr) {
+    inst->ReplaceWith(constant);
+    inst->GetBlock()->RemoveInstruction(inst);
+  } else {
+    InstructionWithAbsorbingInputSimplifier simplifier(GetGraph());
+    inst->Accept(&simplifier);
+  }
+}
+
+void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) {
+  // Constant folding: replace `TypeConversion(a)' with a constant at
+  // compile time if `a' is a constant.
+  HConstant* constant = inst->AsTypeConversion()->TryStaticEvaluation();
+  if (constant != nullptr) {
+    inst->ReplaceWith(constant);
+    inst->GetBlock()->RemoveInstruction(inst);
+  }
+}
+
+void HConstantFoldingVisitor::VisitDivZeroCheck(HDivZeroCheck* inst) {
+  // We can safely remove the check if the input is a non-null constant.
+  HInstruction* check_input = inst->InputAt(0);
+  if (check_input->IsConstant() && !check_input->AsConstant()->IsArithmeticZero()) {
+    inst->ReplaceWith(check_input);
+    inst->GetBlock()->RemoveInstruction(inst);
+  }
+}
+
+
 void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instruction) {
   DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
   HInstruction* left = instruction->GetLeft();
-  if (left->IsConstant() && left->AsConstant()->IsZero()) {
+  if (left->IsConstant() && left->AsConstant()->IsArithmeticZero()) {
     // Replace code looking like
     //    SHL dst, 0, shift_amount
     // with
@@ -105,9 +140,57 @@
   }
 }
 
+void InstructionWithAbsorbingInputSimplifier::VisitAbove(HAbove* instruction) {
+  if (instruction->GetLeft()->IsConstant() &&
+      instruction->GetLeft()->AsConstant()->IsArithmeticZero()) {
+    // Replace code looking like
+    //    ABOVE dst, 0, src  // unsigned 0 > src is always false
+    // with
+    //    CONSTANT false
+    instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 0));
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitAboveOrEqual(HAboveOrEqual* instruction) {
+  if (instruction->GetRight()->IsConstant() &&
+      instruction->GetRight()->AsConstant()->IsArithmeticZero()) {
+    // Replace code looking like
+    //    ABOVE_OR_EQUAL dst, src, 0  // unsigned src >= 0 is always true
+    // with
+    //    CONSTANT true
+    instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 1));
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitBelow(HBelow* instruction) {
+  if (instruction->GetRight()->IsConstant() &&
+      instruction->GetRight()->AsConstant()->IsArithmeticZero()) {
+    // Replace code looking like
+    //    BELOW dst, src, 0  // unsigned src < 0 is always false
+    // with
+    //    CONSTANT false
+    instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 0));
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitBelowOrEqual(HBelowOrEqual* instruction) {
+  if (instruction->GetLeft()->IsConstant() &&
+      instruction->GetLeft()->AsConstant()->IsArithmeticZero()) {
+    // Replace code looking like
+    //    BELOW_OR_EQUAL dst, 0, src  // unsigned 0 <= src is always true
+    // with
+    //    CONSTANT true
+    instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 1));
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  }
+}
+
 void InstructionWithAbsorbingInputSimplifier::VisitAnd(HAnd* instruction) {
   HConstant* input_cst = instruction->GetConstantRight();
-  if ((input_cst != nullptr) && input_cst->IsZero()) {
+  if ((input_cst != nullptr) && input_cst->IsZeroBitPattern()) {
     // Replace code looking like
     //    AND dst, src, 0
     // with
@@ -125,7 +208,7 @@
         ((input_cst->IsFloatConstant() && input_cst->AsFloatConstant()->IsNaN()) ||
          (input_cst->IsDoubleConstant() && input_cst->AsDoubleConstant()->IsNaN()))) {
       // Replace code looking like
-      //    CMP{G,L} dst, src, NaN
+      //    CMP{G,L}-{FLOAT,DOUBLE} dst, src, NaN
       // with
       //    CONSTANT +1 (gt bias)
       // or
@@ -141,7 +224,7 @@
   HConstant* input_cst = instruction->GetConstantRight();
   Primitive::Type type = instruction->GetType();
   if (Primitive::IsIntOrLongType(type) &&
-      (input_cst != nullptr) && input_cst->IsZero()) {
+      (input_cst != nullptr) && input_cst->IsArithmeticZero()) {
     // Replace code looking like
     //    MUL dst, src, 0
     // with
@@ -181,7 +264,7 @@
   HBasicBlock* block = instruction->GetBlock();
 
   if (instruction->GetLeft()->IsConstant() &&
-      instruction->GetLeft()->AsConstant()->IsZero()) {
+      instruction->GetLeft()->AsConstant()->IsArithmeticZero()) {
     // Replace code looking like
     //    REM dst, 0, src
     // with
diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h
index 2698b2d..e10b1d6 100644
--- a/compiler/optimizing/constant_folding.h
+++ b/compiler/optimizing/constant_folding.h
@@ -26,13 +26,20 @@
  * Optimization pass performing a simple constant-expression
  * evaluation on the SSA form.
  *
+ * Note that graph simplifications producing a constant should be
+ * implemented in art::HConstantFolding, while graph simplifications
+ * not producing constants should be implemented in
+ * art::InstructionSimplifier.  (This convention is a choice that was
+ * made during the development of these parts of the compiler and is
+ * not bound by any technical requirement.)
+ *
  * This class is named art::HConstantFolding to avoid name
  * clashes with the art::ConstantPropagation class defined in
  * compiler/dex/post_opt_passes.h.
  */
 class HConstantFolding : public HOptimization {
  public:
-  explicit HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName)
+  HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName)
       : HOptimization(graph, name) {}
 
   void Run() OVERRIDE;
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 2feb75c..d1a2a26 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -29,50 +29,69 @@
 
 namespace art {
 
-static void TestCode(const uint16_t* data,
-                     const std::string& expected_before,
-                     const std::string& expected_after_cf,
-                     const std::string& expected_after_dce,
-                     std::function<void(HGraph*)> check_after_cf,
-                     Primitive::Type return_type = Primitive::kPrimInt) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateCFG(&allocator, data, return_type);
-  ASSERT_NE(graph, nullptr);
+/**
+ * Fixture class for the constant folding and dce tests.
+ */
+class ConstantFoldingTest : public CommonCompilerTest {
+ public:
+  ConstantFoldingTest() : pool_(), allocator_(&pool_) {
+    graph_ = CreateGraph(&allocator_);
+  }
 
-  graph->TryBuildingSsa();
+  void TestCode(const uint16_t* data,
+                const std::string& expected_before,
+                const std::string& expected_after_cf,
+                const std::string& expected_after_dce,
+                std::function<void(HGraph*)> check_after_cf,
+                Primitive::Type return_type = Primitive::kPrimInt) {
+    graph_ = CreateCFG(&allocator_, data, return_type);
+    TestCodeOnReadyGraph(expected_before,
+                         expected_after_cf,
+                         expected_after_dce,
+                         check_after_cf);
+  }
 
-  StringPrettyPrinter printer_before(graph);
-  printer_before.VisitInsertionOrder();
-  std::string actual_before = printer_before.str();
-  ASSERT_EQ(expected_before, actual_before);
+  void TestCodeOnReadyGraph(const std::string& expected_before,
+                            const std::string& expected_after_cf,
+                            const std::string& expected_after_dce,
+                            std::function<void(HGraph*)> check_after_cf) {
+    ASSERT_NE(graph_, nullptr);
 
-  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
-      X86InstructionSetFeatures::FromCppDefines());
-  x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
-  HConstantFolding(graph).Run();
-  SSAChecker ssa_checker_cf(graph);
-  ssa_checker_cf.Run();
-  ASSERT_TRUE(ssa_checker_cf.IsValid());
+    StringPrettyPrinter printer_before(graph_);
+    printer_before.VisitInsertionOrder();
+    std::string actual_before = printer_before.str();
+    EXPECT_EQ(expected_before, actual_before);
 
-  StringPrettyPrinter printer_after_cf(graph);
-  printer_after_cf.VisitInsertionOrder();
-  std::string actual_after_cf = printer_after_cf.str();
-  ASSERT_EQ(expected_after_cf, actual_after_cf);
+    std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+        X86InstructionSetFeatures::FromCppDefines());
+    x86::CodeGeneratorX86 codegenX86(graph_, *features_x86.get(), CompilerOptions());
+    HConstantFolding(graph_).Run();
+    GraphChecker graph_checker_cf(graph_);
+    graph_checker_cf.Run();
+    ASSERT_TRUE(graph_checker_cf.IsValid());
 
-  check_after_cf(graph);
+    StringPrettyPrinter printer_after_cf(graph_);
+    printer_after_cf.VisitInsertionOrder();
+    std::string actual_after_cf = printer_after_cf.str();
+    EXPECT_EQ(expected_after_cf, actual_after_cf);
 
-  HDeadCodeElimination(graph).Run();
-  SSAChecker ssa_checker_dce(graph);
-  ssa_checker_dce.Run();
-  ASSERT_TRUE(ssa_checker_dce.IsValid());
+    check_after_cf(graph_);
 
-  StringPrettyPrinter printer_after_dce(graph);
-  printer_after_dce.VisitInsertionOrder();
-  std::string actual_after_dce = printer_after_dce.str();
-  ASSERT_EQ(expected_after_dce, actual_after_dce);
-}
+    HDeadCodeElimination(graph_).Run();
+    GraphChecker graph_checker_dce(graph_);
+    graph_checker_dce.Run();
+    ASSERT_TRUE(graph_checker_dce.IsValid());
 
+    StringPrettyPrinter printer_after_dce(graph_);
+    printer_after_dce.VisitInsertionOrder();
+    std::string actual_after_dce = printer_after_dce.str();
+    EXPECT_EQ(expected_after_dce, actual_after_dce);
+  }
+
+  ArenaPool pool_;
+  ArenaAllocator allocator_;
+  HGraph* graph_;
+};
 
 /**
  * Tiny three-register program exercising int constant folding on negation.
@@ -84,7 +103,7 @@
  *     v1 <- -v0                1.      neg-int v1, v0
  *     return v1                2.      return v1
  */
-TEST(ConstantFolding, IntConstantFoldingNegation) {
+TEST_F(ConstantFoldingTest, IntConstantFoldingNegation) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 << 8 | 1 << 12,
     Instruction::NEG_INT | 1 << 8 | 0 << 12,
@@ -92,22 +111,21 @@
 
   std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  2: IntConstant [5]\n"
-      "  10: SuspendCheck\n"
-      "  11: Goto 1\n"
+      "  2: IntConstant [3]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  5: Neg(2) [8]\n"
-      "  8: Return(5)\n"
+      "  3: Neg(2) [4]\n"
+      "  4: Return(3)\n"
       "BasicBlock 2, pred: 1\n"
-      "  9: Exit\n";
+      "  5: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  2: IntConstant [5]\n", "  2: IntConstant\n" },
-    { "  10: SuspendCheck\n",   "  10: SuspendCheck\n"
-                                "  12: IntConstant [8]\n" },
-    { "  5: Neg(2) [8]\n",      removed },
-    { "  8: Return(5)\n",       "  8: Return(12)\n" }
+    { "  2: IntConstant [3]\n", "  2: IntConstant\n"
+                                "  6: IntConstant [4]\n" },
+    { "  3: Neg(2) [4]\n",      removed },
+    { "  4: Return(3)\n",       "  4: Return(6)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -141,7 +159,7 @@
  *     (v2, v3) <- -(v0, v1)    1.      neg-long v2, v0
  *     return (v2, v3)          2.      return-wide v2
  */
-TEST(ConstantFolding, LongConstantFoldingNegation) {
+TEST_F(ConstantFoldingTest, LongConstantFoldingNegation) {
   const int64_t input = INT64_C(4294967296);             // 2^32
   const uint16_t word0 = Low16Bits(Low32Bits(input));    // LSW.
   const uint16_t word1 = High16Bits(Low32Bits(input));
@@ -154,22 +172,21 @@
 
   std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  4: LongConstant [7]\n"
-      "  12: SuspendCheck\n"
-      "  13: Goto 1\n"
+      "  2: LongConstant [3]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  7: Neg(4) [10]\n"
-      "  10: Return(7)\n"
+      "  3: Neg(2) [4]\n"
+      "  4: Return(3)\n"
       "BasicBlock 2, pred: 1\n"
-      "  11: Exit\n";
+      "  5: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  4: LongConstant [7]\n", "  4: LongConstant\n" },
-    { "  12: SuspendCheck\n",    "  12: SuspendCheck\n"
-                                 "  14: LongConstant [10]\n" },
-    { "  7: Neg(4) [10]\n",      removed },
-    { "  10: Return(7)\n",       "  10: Return(14)\n" }
+    { "  2: LongConstant [3]\n", "  2: LongConstant\n"
+                                 "  6: LongConstant [4]\n" },
+    { "  3: Neg(2) [4]\n",       removed },
+    { "  4: Return(3)\n",        "  4: Return(6)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -182,7 +199,7 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  4: LongConstant\n", removed },
+    { "  2: LongConstant\n", removed },
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -205,7 +222,7 @@
  *     v2 <- v0 + v1            2.      add-int v2, v0, v1
  *     return v2                4.      return v2
  */
-TEST(ConstantFolding, IntConstantFoldingOnAddition1) {
+TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition1) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 << 8 | 1 << 12,
     Instruction::CONST_4 | 1 << 8 | 2 << 12,
@@ -213,25 +230,24 @@
     Instruction::RETURN | 2 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"
-    "  5: IntConstant [9]\n"
-    "  14: SuspendCheck\n"
-    "  15: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  9: Add(3, 5) [12]\n"
-    "  12: Return(9)\n"
-    "BasicBlock 2, pred: 1\n"
-    "  13: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant [4]\n"
+      "  3: IntConstant [4]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  4: Add(2, 3) [5]\n"
+      "  5: Return(4)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  6: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  3: IntConstant [9]\n", "  3: IntConstant\n" },
-    { "  5: IntConstant [9]\n", "  5: IntConstant\n" },
-    { "  14: SuspendCheck\n",   "  14: SuspendCheck\n"
-                                "  16: IntConstant [12]\n" },
-    { "  9: Add(3, 5) [12]\n",  removed },
-    { "  12: Return(9)\n",      "  12: Return(16)\n" }
+    { "  2: IntConstant [4]\n", "  2: IntConstant\n" },
+    { "  3: IntConstant [4]\n", "  3: IntConstant\n"
+                                "  7: IntConstant [5]\n" },
+    { "  4: Add(2, 3) [5]\n",   removed },
+    { "  5: Return(4)\n",       "  5: Return(7)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -244,8 +260,8 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  3: IntConstant\n", removed },
-    { "  5: IntConstant\n", removed }
+    { "  2: IntConstant\n", removed },
+    { "  3: IntConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -271,7 +287,7 @@
  *     v2 <- v0 + v1            6.      add-int v2, v0, v1
  *     return v2                8.      return v2
  */
-TEST(ConstantFolding, IntConstantFoldingOnAddition2) {
+TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition2) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 << 8 | 1 << 12,
     Instruction::CONST_4 | 1 << 8 | 2 << 12,
@@ -283,35 +299,34 @@
     Instruction::RETURN | 2 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"
-    "  5: IntConstant [9]\n"
-    "  11: IntConstant [17]\n"
-    "  13: IntConstant [17]\n"
-    "  26: SuspendCheck\n"
-    "  27: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  9: Add(3, 5) [21]\n"
-    "  17: Add(11, 13) [21]\n"
-    "  21: Add(9, 17) [24]\n"
-    "  24: Return(21)\n"
-    "BasicBlock 2, pred: 1\n"
-    "  25: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant [4]\n"
+      "  3: IntConstant [4]\n"
+      "  5: IntConstant [7]\n"
+      "  6: IntConstant [7]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  4: Add(2, 3) [8]\n"
+      "  7: Add(5, 6) [8]\n"
+      "  8: Add(4, 7) [9]\n"
+      "  9: Return(8)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  10: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  3: IntConstant [9]\n",   "  3: IntConstant\n" },
-    { "  5: IntConstant [9]\n",   "  5: IntConstant\n" },
-    { "  11: IntConstant [17]\n", "  11: IntConstant\n" },
-    { "  13: IntConstant [17]\n", "  13: IntConstant\n" },
-    { "  26: SuspendCheck\n",     "  26: SuspendCheck\n"
-                                  "  28: IntConstant\n"
-                                  "  29: IntConstant\n"
-                                  "  30: IntConstant [24]\n" },
-    { "  9: Add(3, 5) [21]\n",    removed },
-    { "  17: Add(11, 13) [21]\n", removed },
-    { "  21: Add(9, 17) [24]\n",  removed  },
-    { "  24: Return(21)\n",       "  24: Return(30)\n" }
+    { "  2: IntConstant [4]\n",  "  2: IntConstant\n" },
+    { "  3: IntConstant [4]\n",  "  3: IntConstant\n" },
+    { "  5: IntConstant [7]\n",  "  5: IntConstant\n" },
+    { "  6: IntConstant [7]\n",  "  6: IntConstant\n"
+                                 "  11: IntConstant\n"
+                                 "  12: IntConstant\n"
+                                 "  13: IntConstant [9]\n" },
+    { "  4: Add(2, 3) [8]\n",    removed },
+    { "  7: Add(5, 6) [8]\n",    removed },
+    { "  8: Add(4, 7) [9]\n",    removed  },
+    { "  9: Return(8)\n",        "  9: Return(13)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -330,12 +345,12 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
+    { "  2: IntConstant\n",  removed },
     { "  3: IntConstant\n",  removed },
     { "  5: IntConstant\n",  removed },
+    { "  6: IntConstant\n",  removed },
     { "  11: IntConstant\n", removed },
-    { "  13: IntConstant\n", removed },
-    { "  28: IntConstant\n", removed },
-    { "  29: IntConstant\n", removed }
+    { "  12: IntConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -357,7 +372,7 @@
  *     v2 <- v0 - v1            2.      sub-int v2, v0, v1
  *     return v2                4.      return v2
  */
-TEST(ConstantFolding, IntConstantFoldingOnSubtraction) {
+TEST_F(ConstantFoldingTest, IntConstantFoldingOnSubtraction) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 << 8 | 3 << 12,
     Instruction::CONST_4 | 1 << 8 | 2 << 12,
@@ -365,25 +380,24 @@
     Instruction::RETURN | 2 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"
-    "  5: IntConstant [9]\n"
-    "  14: SuspendCheck\n"
-    "  15: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  9: Sub(3, 5) [12]\n"
-    "  12: Return(9)\n"
-    "BasicBlock 2, pred: 1\n"
-    "  13: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant [4]\n"
+      "  3: IntConstant [4]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  4: Sub(2, 3) [5]\n"
+      "  5: Return(4)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  6: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  3: IntConstant [9]\n", "  3: IntConstant\n" },
-    { "  5: IntConstant [9]\n", "  5: IntConstant\n" },
-    { "  14: SuspendCheck\n",   "  14: SuspendCheck\n"
-                                "  16: IntConstant [12]\n" },
-    { "  9: Sub(3, 5) [12]\n",  removed },
-    { "  12: Return(9)\n",      "  12: Return(16)\n" }
+    { "  2: IntConstant [4]\n",  "  2: IntConstant\n" },
+    { "  3: IntConstant [4]\n",  "  3: IntConstant\n"
+                                 "  7: IntConstant [5]\n" },
+    { "  4: Sub(2, 3) [5]\n",    removed },
+    { "  5: Return(4)\n",        "  5: Return(7)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -396,8 +410,8 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  3: IntConstant\n", removed },
-    { "  5: IntConstant\n", removed }
+    { "  2: IntConstant\n", removed },
+    { "  3: IntConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -421,7 +435,7 @@
  *       (v0, v1) + (v1, v2)    4.      add-long v4, v0, v2
  *     return (v4, v5)          6.      return-wide v4
  */
-TEST(ConstantFolding, LongConstantFoldingOnAddition) {
+TEST_F(ConstantFoldingTest, LongConstantFoldingOnAddition) {
   const uint16_t data[] = SIX_REGISTERS_CODE_ITEM(
     Instruction::CONST_WIDE_16 | 0 << 8, 1,
     Instruction::CONST_WIDE_16 | 2 << 8, 2,
@@ -429,25 +443,24 @@
     Instruction::RETURN_WIDE | 4 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  6: LongConstant [12]\n"
-    "  8: LongConstant [12]\n"
-    "  17: SuspendCheck\n"
-    "  18: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  12: Add(6, 8) [15]\n"
-    "  15: Return(12)\n"
-    "BasicBlock 2, pred: 1\n"
-    "  16: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: LongConstant [4]\n"
+      "  3: LongConstant [4]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  4: Add(2, 3) [5]\n"
+      "  5: Return(4)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  6: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  6: LongConstant [12]\n", "  6: LongConstant\n" },
-    { "  8: LongConstant [12]\n", "  8: LongConstant\n" },
-    { "  17: SuspendCheck\n",     "  17: SuspendCheck\n"
-                                  "  19: LongConstant [15]\n" },
-    { "  12: Add(6, 8) [15]\n",   removed },
-    { "  15: Return(12)\n",       "  15: Return(19)\n" }
+    { "  2: LongConstant [4]\n",  "  2: LongConstant\n" },
+    { "  3: LongConstant [4]\n",  "  3: LongConstant\n"
+                                  "  7: LongConstant [5]\n" },
+    { "  4: Add(2, 3) [5]\n",     removed },
+    { "  5: Return(4)\n",         "  5: Return(7)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -460,8 +473,8 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  6: LongConstant\n", removed },
-    { "  8: LongConstant\n", removed }
+    { "  2: LongConstant\n", removed },
+    { "  3: LongConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -486,7 +499,7 @@
  *       (v0, v1) - (v1, v2)    4.      sub-long v4, v0, v2
  *     return (v4, v5)          6.      return-wide v4
  */
-TEST(ConstantFolding, LongConstantFoldingOnSubtraction) {
+TEST_F(ConstantFoldingTest, LongConstantFoldingOnSubtraction) {
   const uint16_t data[] = SIX_REGISTERS_CODE_ITEM(
     Instruction::CONST_WIDE_16 | 0 << 8, 3,
     Instruction::CONST_WIDE_16 | 2 << 8, 2,
@@ -494,25 +507,24 @@
     Instruction::RETURN_WIDE | 4 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  6: LongConstant [12]\n"
-    "  8: LongConstant [12]\n"
-    "  17: SuspendCheck\n"
-    "  18: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  12: Sub(6, 8) [15]\n"
-    "  15: Return(12)\n"
-    "BasicBlock 2, pred: 1\n"
-    "  16: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: LongConstant [4]\n"
+      "  3: LongConstant [4]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  4: Sub(2, 3) [5]\n"
+      "  5: Return(4)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  6: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  6: LongConstant [12]\n", "  6: LongConstant\n" },
-    { "  8: LongConstant [12]\n", "  8: LongConstant\n" },
-    { "  17: SuspendCheck\n",     "  17: SuspendCheck\n"
-                                  "  19: LongConstant [15]\n" },
-    { "  12: Sub(6, 8) [15]\n",   removed },
-    { "  15: Return(12)\n",       "  15: Return(19)\n" }
+    { "  2: LongConstant [4]\n",  "  2: LongConstant\n" },
+    { "  3: LongConstant [4]\n",  "  3: LongConstant\n"
+                                  "  7: LongConstant [5]\n" },
+    { "  4: Sub(2, 3) [5]\n",     removed },
+    { "  5: Return(4)\n",         "  5: Return(7)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -525,8 +537,8 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  6: LongConstant\n", removed },
-    { "  8: LongConstant\n", removed }
+    { "  2: LongConstant\n", removed },
+    { "  3: LongConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -560,7 +572,7 @@
  * L3: v2 <- v1 + 8             11.     add-int/lit16 v2, v1, #+8
  *     return v2                13.     return v2
  */
-TEST(ConstantFolding, IntConstantFoldingAndJumps) {
+TEST_F(ConstantFoldingTest, IntConstantFoldingAndJumps) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 << 8 | 1 << 12,
     Instruction::CONST_4 | 1 << 8 | 2 << 12,
@@ -574,47 +586,45 @@
     Instruction::RETURN | 2 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"            // v0 <- 1
-    "  5: IntConstant [9]\n"            // v1 <- 2
-    "  13: IntConstant [14]\n"          // const 5
-    "  18: IntConstant [19]\n"          // const 4
-    "  24: IntConstant [25]\n"          // const 8
-    "  30: SuspendCheck\n"
-    "  31: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 3\n"
-    "  9: Add(3, 5) [19]\n"             // v2 <- v0 + v1 = 1 + 2 = 3
-    "  11: Goto 3\n"                    // goto L2
-    "BasicBlock 2, pred: 3, succ: 4\n"  // L1:
-    "  14: Add(19, 13) [25]\n"          // v1 <- v0 + 3 = 7 + 5 = 12
-    "  16: Goto 4\n"                    // goto L3
-    "BasicBlock 3, pred: 1, succ: 2\n"  // L2:
-    "  19: Add(9, 18) [14]\n"           // v0 <- v2 + 2 = 3 + 4 = 7
-    "  21: SuspendCheck\n"
-    "  22: Goto 2\n"                    // goto L1
-    "BasicBlock 4, pred: 2, succ: 5\n"  // L3:
-    "  25: Add(14, 24) [28]\n"          // v2 <- v1 + 4 = 12 + 8 = 20
-    "  28: Return(25)\n"                // return v2
-    "BasicBlock 5, pred: 4\n"
-    "  29: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant [4]\n"             // v0 <- 1
+      "  3: IntConstant [4]\n"             // v1 <- 2
+      "  6: IntConstant [7]\n"             // const 5
+      "  9: IntConstant [10]\n"            // const 4
+      "  12: IntConstant [13]\n"           // const 8
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 3\n"
+      "  4: Add(2, 3) [7]\n"               // v2 <- v0 + v1 = 1 + 2 = 3
+      "  5: Goto 3\n"                      // goto L2
+      "BasicBlock 2, pred: 3, succ: 4\n"   // L1:
+      "  10: Add(7, 9) [13]\n"             // v1 <- v0 + 3 = 7 + 5 = 12
+      "  11: Goto 4\n"                     // goto L3
+      "BasicBlock 3, pred: 1, succ: 2\n"   // L2:
+      "  7: Add(4, 6) [10]\n"              // v0 <- v2 + 2 = 3 + 4 = 7
+      "  8: Goto 2\n"                      // goto L1
+      "BasicBlock 4, pred: 2, succ: 5\n"   // L3:
+      "  13: Add(10, 12) [14]\n"           // v2 <- v1 + 4 = 12 + 8 = 20
+      "  14: Return(13)\n"                 // return v2
+      "BasicBlock 5, pred: 4\n"
+      "  15: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  3: IntConstant [9]\n",   "  3: IntConstant\n" },
-    { "  5: IntConstant [9]\n",   "  5: IntConstant []\n" },
-    { "  13: IntConstant [14]\n", "  13: IntConstant\n" },
-    { "  18: IntConstant [19]\n", "  18: IntConstant\n" },
-    { "  24: IntConstant [25]\n", "  24: IntConstant\n" },
-    { "  30: SuspendCheck\n",     "  30: SuspendCheck\n"
-                                  "  32: IntConstant []\n"
-                                  "  33: IntConstant []\n"
-                                  "  34: IntConstant\n"
-                                  "  35: IntConstant [28]\n" },
-    { "  9: Add(3, 5) [19]\n",    removed },
-    { "  14: Add(19, 13) [25]\n", removed },
-    { "  19: Add(9, 18) [14]\n",  removed },
-    { "  25: Add(14, 24) [28]\n", removed },
-    { "  28: Return(25)\n",       "  28: Return(35)\n"}
+    { "  2: IntConstant [4]\n",   "  2: IntConstant\n" },
+    { "  3: IntConstant [4]\n",   "  3: IntConstant\n" },
+    { "  6: IntConstant [7]\n",   "  6: IntConstant\n" },
+    { "  9: IntConstant [10]\n",  "  9: IntConstant\n" },
+    { "  12: IntConstant [13]\n", "  12: IntConstant\n"
+                                  "  16: IntConstant\n"
+                                  "  17: IntConstant\n"
+                                  "  18: IntConstant\n"
+                                  "  19: IntConstant [14]\n" },
+    { "  4: Add(2, 3) [7]\n",     removed },
+    { "  10: Add(7, 9) [13]\n",   removed },
+    { "  7: Add(4, 6) [10]\n",    removed },
+    { "  13: Add(10, 12) [14]\n", removed },
+    { "  14: Return(13)\n",       "  14: Return(19)\n"}
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -636,18 +646,14 @@
 
   // Expected difference after dead code elimination.
   std::string expected_after_dce =
-    "BasicBlock 0, succ: 1\n"
-    "  5: IntConstant []\n"
-    "  30: SuspendCheck\n"
-    "  32: IntConstant []\n"
-    "  33: IntConstant []\n"
-    "  35: IntConstant [28]\n"
-    "  31: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 5\n"
-    "  21: SuspendCheck\n"
-    "  28: Return(35)\n"
-    "BasicBlock 5, pred: 1\n"
-    "  29: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  19: IntConstant [14]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5\n"
+      "  14: Return(19)\n"
+      "BasicBlock 5, pred: 1\n"
+      "  15: Exit\n";
 
   TestCode(data,
            expected_before,
@@ -656,7 +662,6 @@
            check_after_cf);
 }
 
-
 /**
  * Three-register program with a constant (static) condition.
  *
@@ -670,7 +675,7 @@
  * L1: v2 <- v0 + v1            5.      add-int v2, v0, v1
  *     return-void              7.      return
  */
-TEST(ConstantFolding, ConstantCondition) {
+TEST_F(ConstantFoldingTest, ConstantCondition) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
     Instruction::CONST_4 | 0 << 8 | 0 << 12,
@@ -680,31 +685,31 @@
     Instruction::RETURN_VOID);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [15, 22, 8]\n"
-    "  5: IntConstant [22, 8]\n"
-    "  19: SuspendCheck\n"
-    "  20: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 5, 2\n"
-    "  8: GreaterThanOrEqual(3, 5) [9]\n"
-    "  9: If(8)\n"
-    "BasicBlock 2, pred: 1, succ: 3\n"
-    "  12: Goto 3\n"
-    "BasicBlock 3, pred: 5, 2, succ: 4\n"
-    "  22: Phi(5, 3) [15]\n"
-    "  15: Add(22, 3)\n"
-    "  17: ReturnVoid\n"
-    "BasicBlock 4, pred: 3\n"
-    "  18: Exit\n"
-    "BasicBlock 5, pred: 1, succ: 3\n"
-    "  21: Goto 3\n";
+      "BasicBlock 0, succ: 1\n"
+      "  3: IntConstant [9, 8, 5]\n"
+      "  4: IntConstant [8, 5]\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5, 2\n"
+      "  5: GreaterThanOrEqual(3, 4) [6]\n"
+      "  6: If(5)\n"
+      "BasicBlock 2, pred: 1, succ: 3\n"
+      "  7: Goto 3\n"
+      "BasicBlock 3, pred: 5, 2, succ: 4\n"
+      "  8: Phi(4, 3) [9]\n"
+      "  9: Add(8, 3)\n"
+      "  10: ReturnVoid\n"
+      "BasicBlock 4, pred: 3\n"
+      "  11: Exit\n"
+      "BasicBlock 5, pred: 1, succ: 3\n"
+      "  0: Goto 3\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  3: IntConstant [15, 22, 8]\n",      "  3: IntConstant [9, 15, 22]\n" },
-    { "  5: IntConstant [22, 8]\n",          "  5: IntConstant [22]\n" },
-    { "  8: GreaterThanOrEqual(3, 5) [9]\n", removed },
-    { "  9: If(8)\n",                        "  9: If(3)\n" }
+    { "  3: IntConstant [9, 8, 5]\n",        "  3: IntConstant [6, 9, 8]\n" },
+    { "  4: IntConstant [8, 5]\n",           "  4: IntConstant [8]\n" },
+    { "  5: GreaterThanOrEqual(3, 4) [6]\n", removed },
+    { "  6: If(5)\n",                        "  6: If(3)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -717,13 +722,13 @@
 
   // Expected graph after dead code elimination.
   std::string expected_after_dce =
-    "BasicBlock 0, succ: 1\n"
-    "  19: SuspendCheck\n"
-    "  20: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 4\n"
-    "  17: ReturnVoid\n"
-    "BasicBlock 4, pred: 1\n"
-    "  18: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 4\n"
+      "  10: ReturnVoid\n"
+      "BasicBlock 4, pred: 1\n"
+      "  11: Exit\n";
 
   TestCode(data,
            expected_before,
@@ -732,4 +737,123 @@
            check_after_cf);
 }
 
+/**
+ * Unsigned comparisons with zero. Since these instructions are not present
+ * in the bytecode, we need to set up the graph explicitly.
+ */
+TEST_F(ConstantFoldingTest, UnsignedComparisonsWithZero) {
+  graph_ = CreateGraph(&allocator_);
+  HBasicBlock* entry_block = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(entry_block);
+  graph_->SetEntryBlock(entry_block);
+  HBasicBlock* block = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block);
+  HBasicBlock* exit_block = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(exit_block);
+  graph_->SetExitBlock(exit_block);
+  entry_block->AddSuccessor(block);
+  block->AddSuccessor(exit_block);
+
+  // Make various unsigned comparisons with zero against a parameter.
+  HInstruction* parameter = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), 0, 0, Primitive::kPrimInt, true);
+  entry_block->AddInstruction(parameter);
+  entry_block->AddInstruction(new (&allocator_) HGoto());
+
+  HInstruction* zero = graph_->GetIntConstant(0);
+
+  HInstruction* last;
+  block->AddInstruction(last = new (&allocator_) HAbove(zero, parameter));
+  block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
+  block->AddInstruction(last = new (&allocator_) HAbove(parameter, zero));
+  block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
+  block->AddInstruction(last = new (&allocator_) HAboveOrEqual(zero, parameter));
+  block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
+  block->AddInstruction(last = new (&allocator_) HAboveOrEqual(parameter, zero));
+  block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
+  block->AddInstruction(last = new (&allocator_) HBelow(zero, parameter));
+  block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
+  block->AddInstruction(last = new (&allocator_) HBelow(parameter, zero));
+  block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
+  block->AddInstruction(last = new (&allocator_) HBelowOrEqual(zero, parameter));
+  block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
+  block->AddInstruction(last = new (&allocator_) HBelowOrEqual(parameter, zero));
+  block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
+  block->AddInstruction(new (&allocator_) HReturn(zero));
+
+  exit_block->AddInstruction(new (&allocator_) HExit());
+
+  graph_->BuildDominatorTree();
+
+  const std::string expected_before =
+      "BasicBlock 0, succ: 1\n"
+      "  0: ParameterValue [18, 18, 17, 16, 16, 15, 14, 14, 13, 12, 12, 11, 10, 10, 9, "
+                            "8, 8, 7, 6, 6, 5, 4, 4, 3]\n"
+      "  2: IntConstant [19, 17, 15, 13, 11, 9, 7, 5, 3]\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  3: Above(2, 0) [4]\n"
+      "  4: Select(0, 0, 3)\n"
+      "  5: Above(0, 2) [6]\n"
+      "  6: Select(0, 0, 5)\n"
+      "  7: AboveOrEqual(2, 0) [8]\n"
+      "  8: Select(0, 0, 7)\n"
+      "  9: AboveOrEqual(0, 2) [10]\n"
+      "  10: Select(0, 0, 9)\n"
+      "  11: Below(2, 0) [12]\n"
+      "  12: Select(0, 0, 11)\n"
+      "  13: Below(0, 2) [14]\n"
+      "  14: Select(0, 0, 13)\n"
+      "  15: BelowOrEqual(2, 0) [16]\n"
+      "  16: Select(0, 0, 15)\n"
+      "  17: BelowOrEqual(0, 2) [18]\n"
+      "  18: Select(0, 0, 17)\n"
+      "  19: Return(2)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  20: Exit\n";
+
+  const std::string expected_after_cf =
+      "BasicBlock 0, succ: 1\n"
+      "  0: ParameterValue [18, 18, 17, 16, 16, 14, 14, 12, 12, 11, 10, 10, "
+                            "8, 8, 7, 6, 6, 5, 4, 4]\n"
+      "  2: IntConstant [14, 4, 19, 17, 11, 7, 5]\n"
+      "  21: IntConstant [16, 10]\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  4: Select(0, 0, 2)\n"
+      "  5: Above(0, 2) [6]\n"
+      "  6: Select(0, 0, 5)\n"
+      "  7: AboveOrEqual(2, 0) [8]\n"
+      "  8: Select(0, 0, 7)\n"
+      "  10: Select(0, 0, 21)\n"
+      "  11: Below(2, 0) [12]\n"
+      "  12: Select(0, 0, 11)\n"
+      "  14: Select(0, 0, 2)\n"
+      "  16: Select(0, 0, 21)\n"
+      "  17: BelowOrEqual(0, 2) [18]\n"
+      "  18: Select(0, 0, 17)\n"
+      "  19: Return(2)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  20: Exit\n";
+
+  const std::string expected_after_dce =
+      "BasicBlock 0, succ: 1\n"
+      "  0: ParameterValue\n"
+      "  2: IntConstant [19]\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  19: Return(2)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  20: Exit\n";
+
+  auto check_after_cf = [](HGraph* graph) {
+    CHECK(graph != nullptr);
+  };
+
+  TestCodeOnReadyGraph(expected_before,
+                       expected_after_cf,
+                       expected_after_dce,
+                       check_after_cf);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 9754043..e1bde7c 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -23,7 +23,7 @@
 namespace art {
 
 static void MarkReachableBlocks(HGraph* graph, ArenaBitVector* visited) {
-  ArenaVector<HBasicBlock*> worklist(graph->GetArena()->Adapter());
+  ArenaVector<HBasicBlock*> worklist(graph->GetArena()->Adapter(kArenaAllocDCE));
   constexpr size_t kDefaultWorlistSize = 8;
   worklist.reserve(kDefaultWorlistSize);
   visited->SetBit(graph->GetEntryBlock()->GetBlockId());
@@ -41,11 +41,11 @@
       HIf* if_instruction = last_instruction->AsIf();
       HInstruction* condition = if_instruction->InputAt(0);
       if (condition->IsIntConstant()) {
-        if (condition->AsIntConstant()->IsOne()) {
+        if (condition->AsIntConstant()->IsTrue()) {
           live_successors = live_successors.SubArray(0u, 1u);
           DCHECK_EQ(live_successors[0], if_instruction->IfTrueSuccessor());
         } else {
-          DCHECK(condition->AsIntConstant()->IsZero());
+          DCHECK(condition->AsIntConstant()->IsFalse()) << condition->AsIntConstant()->GetValue();
           live_successors = live_successors.SubArray(1u, 1u);
           DCHECK_EQ(live_successors[0], if_instruction->IfFalseSuccessor());
         }
@@ -81,12 +81,6 @@
   }
 }
 
-static void MarkLoopHeadersContaining(const HBasicBlock& block, ArenaBitVector* set) {
-  for (HLoopInformationOutwardIterator it(block); !it.Done(); it.Advance()) {
-    set->SetBit(it.Current()->GetHeader()->GetBlockId());
-  }
-}
-
 void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) {
   if (stats_ != nullptr) {
     stats_->RecordStat(MethodCompilationStat::kRemovedDeadInstruction,
@@ -94,14 +88,214 @@
   }
 }
 
-void HDeadCodeElimination::RemoveDeadBlocks() {
+void HDeadCodeElimination::MaybeRecordSimplifyIf() {
+  if (stats_ != nullptr) {
+    stats_->RecordStat(MethodCompilationStat::kSimplifyIf);
+  }
+}
+
+static bool HasInput(HCondition* instruction, HInstruction* input) {
+  return (instruction->InputAt(0) == input) ||
+         (instruction->InputAt(1) == input);
+}
+
+static bool HasEquality(IfCondition condition) {
+  switch (condition) {
+    case kCondEQ:
+    case kCondLE:
+    case kCondGE:
+    case kCondBE:
+    case kCondAE:
+      return true;
+    case kCondNE:
+    case kCondLT:
+    case kCondGT:
+    case kCondB:
+    case kCondA:
+      return false;
+  }
+}
+
+static HConstant* Evaluate(HCondition* condition, HInstruction* left, HInstruction* right) {
+  if (left == right && !Primitive::IsFloatingPointType(left->GetType())) {
+    return condition->GetBlock()->GetGraph()->GetIntConstant(
+        HasEquality(condition->GetCondition()) ? 1 : 0);
+  }
+
+  if (!left->IsConstant() || !right->IsConstant()) {
+    return nullptr;
+  }
+
+  if (left->IsIntConstant()) {
+    return condition->Evaluate(left->AsIntConstant(), right->AsIntConstant());
+  } else if (left->IsNullConstant()) {
+    return condition->Evaluate(left->AsNullConstant(), right->AsNullConstant());
+  } else if (left->IsLongConstant()) {
+    return condition->Evaluate(left->AsLongConstant(), right->AsLongConstant());
+  } else if (left->IsFloatConstant()) {
+    return condition->Evaluate(left->AsFloatConstant(), right->AsFloatConstant());
+  } else {
+    DCHECK(left->IsDoubleConstant());
+    return condition->Evaluate(left->AsDoubleConstant(), right->AsDoubleConstant());
+  }
+}
+
+// Simplify the pattern:
+//
+//        B1    B2    ...
+//       goto  goto  goto
+//         \    |    /
+//          \   |   /
+//             B3
+//     i1 = phi(input, input)
+//     (i2 = condition on i1)
+//        if i1 (or i2)
+//          /     \
+//         /       \
+//        B4       B5
+//
+// Into:
+//
+//       B1      B2    ...
+//        |      |      |
+//       B4      B5    B?
+//
+// This simplification cannot be applied for loop headers, as they
+// contain a suspend check.
+//
+// Note that we rely on the dead code elimination to get rid of B3.
+bool HDeadCodeElimination::SimplifyIfs() {
+  bool simplified_one_or_more_ifs = false;
+  bool rerun_dominance_and_loop_analysis = false;
+
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    HInstruction* last = block->GetLastInstruction();
+    HInstruction* first = block->GetFirstInstruction();
+    if (last->IsIf() &&
+        block->HasSinglePhi() &&
+        block->GetFirstPhi()->HasOnlyOneNonEnvironmentUse()) {
+      bool has_only_phi_and_if = (last == first) && (last->InputAt(0) == block->GetFirstPhi());
+      bool has_only_phi_condition_and_if =
+          !has_only_phi_and_if &&
+          first->IsCondition() &&
+          HasInput(first->AsCondition(), block->GetFirstPhi()) &&
+          (first->GetNext() == last) &&
+          (last->InputAt(0) == first) &&
+          first->HasOnlyOneNonEnvironmentUse();
+
+      if (has_only_phi_and_if || has_only_phi_condition_and_if) {
+        DCHECK(!block->IsLoopHeader());
+        HPhi* phi = block->GetFirstPhi()->AsPhi();
+        bool phi_input_is_left = (first->InputAt(0) == phi);
+
+        // Walk over all inputs of the phis and update the control flow of
+        // predecessors feeding constants to the phi.
+        // Note that phi->InputCount() may change inside the loop.
+        for (size_t i = 0; i < phi->InputCount();) {
+          HInstruction* input = phi->InputAt(i);
+          HInstruction* value_to_check = nullptr;
+          if (has_only_phi_and_if) {
+            if (input->IsIntConstant()) {
+              value_to_check = input;
+            }
+          } else {
+            DCHECK(has_only_phi_condition_and_if);
+            if (phi_input_is_left) {
+              value_to_check = Evaluate(first->AsCondition(), input, first->InputAt(1));
+            } else {
+              value_to_check = Evaluate(first->AsCondition(), first->InputAt(0), input);
+            }
+          }
+          if (value_to_check == nullptr) {
+            // Could not evaluate to a constant, continue iterating over the inputs.
+            ++i;
+          } else {
+            HBasicBlock* predecessor_to_update = block->GetPredecessors()[i];
+            HBasicBlock* successor_to_update = nullptr;
+            if (value_to_check->AsIntConstant()->IsTrue()) {
+              successor_to_update = last->AsIf()->IfTrueSuccessor();
+            } else {
+              DCHECK(value_to_check->AsIntConstant()->IsFalse())
+                  << value_to_check->AsIntConstant()->GetValue();
+              successor_to_update = last->AsIf()->IfFalseSuccessor();
+            }
+            predecessor_to_update->ReplaceSuccessor(block, successor_to_update);
+            phi->RemoveInputAt(i);
+            simplified_one_or_more_ifs = true;
+            if (block->IsInLoop()) {
+              rerun_dominance_and_loop_analysis = true;
+            }
+            // For simplicity, don't create a dead block, let the dead code elimination
+            // pass deal with it.
+            if (phi->InputCount() == 1) {
+              break;
+            }
+          }
+        }
+        if (block->GetPredecessors().size() == 1) {
+          phi->ReplaceWith(phi->InputAt(0));
+          block->RemovePhi(phi);
+          if (has_only_phi_condition_and_if) {
+            // Evaluate here (and not wait for a constant folding pass) to open
+            // more opportunities for DCE.
+            HInstruction* result = first->AsCondition()->TryStaticEvaluation();
+            if (result != nullptr) {
+              first->ReplaceWith(result);
+              block->RemoveInstruction(first);
+            }
+          }
+        }
+        if (simplified_one_or_more_ifs) {
+          MaybeRecordSimplifyIf();
+        }
+      }
+    }
+  }
+  // We need to re-analyze the graph in order to run DCE afterwards.
+  if (simplified_one_or_more_ifs) {
+    if (rerun_dominance_and_loop_analysis) {
+      graph_->ClearLoopInformation();
+      graph_->ClearDominanceInformation();
+      graph_->BuildDominatorTree();
+    } else {
+      graph_->ClearDominanceInformation();
+      // We have introduced critical edges, remove them.
+      graph_->SimplifyCFG();
+      graph_->ComputeDominanceInformation();
+      graph_->ComputeTryBlockInformation();
+    }
+  }
+
+  return simplified_one_or_more_ifs;
+}
+
+void HDeadCodeElimination::ConnectSuccessiveBlocks() {
+  // Order does not matter.
+  for (HReversePostOrderIterator it(*graph_); !it.Done();) {
+    HBasicBlock* block  = it.Current();
+    if (block->IsEntryBlock() || !block->GetLastInstruction()->IsGoto()) {
+      it.Advance();
+      continue;
+    }
+    HBasicBlock* successor = block->GetSingleSuccessor();
+    if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
+      it.Advance();
+      continue;
+    }
+    block->MergeWith(successor);
+    // Reiterate on this block in case it can be merged with its new successor.
+  }
+}
+
+bool HDeadCodeElimination::RemoveDeadBlocks() {
   // Classify blocks as reachable/unreachable.
   ArenaAllocator* allocator = graph_->GetArena();
-  ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false);
-  ArenaBitVector affected_loops(allocator, graph_->GetBlocks().size(), false);
+  ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false, kArenaAllocDCE);
 
   MarkReachableBlocks(graph_, &live_blocks);
   bool removed_one_or_more_blocks = false;
+  bool rerun_dominance_and_loop_analysis = false;
 
   // Remove all dead blocks. Iterate in post order because removal needs the
   // block's chain of dominators and nested loops need to be updated from the
@@ -109,42 +303,30 @@
   for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block  = it.Current();
     int id = block->GetBlockId();
-    if (live_blocks.IsBitSet(id)) {
-      if (affected_loops.IsBitSet(id)) {
-        DCHECK(block->IsLoopHeader());
-        block->GetLoopInformation()->Update();
-      }
-    } else {
+    if (!live_blocks.IsBitSet(id)) {
       MaybeRecordDeadBlock(block);
-      MarkLoopHeadersContaining(*block, &affected_loops);
       block->DisconnectAndDelete();
       removed_one_or_more_blocks = true;
+      if (block->IsInLoop()) {
+        rerun_dominance_and_loop_analysis = true;
+      }
     }
   }
 
   // If we removed at least one block, we need to recompute the full
-  // dominator tree.
+  // dominator tree and try block membership.
   if (removed_one_or_more_blocks) {
-    graph_->ClearDominanceInformation();
-    graph_->ComputeDominanceInformation();
-  }
-
-  // Connect successive blocks created by dead branches. Order does not matter.
-  for (HReversePostOrderIterator it(*graph_); !it.Done();) {
-    HBasicBlock* block  = it.Current();
-    if (block->IsEntryBlock() || block->GetSuccessors().size() != 1u) {
-      it.Advance();
-      continue;
+    if (rerun_dominance_and_loop_analysis) {
+      graph_->ClearLoopInformation();
+      graph_->ClearDominanceInformation();
+      graph_->BuildDominatorTree();
+    } else {
+      graph_->ClearDominanceInformation();
+      graph_->ComputeDominanceInformation();
+      graph_->ComputeTryBlockInformation();
     }
-    HBasicBlock* successor = block->GetSuccessors()[0];
-    if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
-      it.Advance();
-      continue;
-    }
-    block->MergeWith(successor);
-
-    // Reiterate on this block in case it can be merged with its new successor.
   }
+  return removed_one_or_more_blocks;
 }
 
 void HDeadCodeElimination::RemoveDeadInstructions() {
@@ -164,6 +346,7 @@
       if (!inst->HasSideEffects()
           && !inst->CanThrow()
           && !inst->IsSuspendCheck()
+          && !inst->IsNativeDebugInfo()
           // If we added an explicit barrier then we should keep it.
           && !inst->IsMemoryBarrier()
           && !inst->IsParameterValue()
@@ -176,9 +359,19 @@
 }
 
 void HDeadCodeElimination::Run() {
-  if (!graph_->HasTryCatch()) {
-    // TODO: Update dead block elimination and enable for try/catch.
-    RemoveDeadBlocks();
+  // Do not eliminate dead blocks if the graph has irreducible loops. We could
+  // support it, but that would require changes in our loop representation to handle
+  // multiple entry points. We decided it was not worth the complexity.
+  if (!graph_->HasIrreducibleLoops()) {
+    // Simplify graph to generate more dead block patterns.
+    ConnectSuccessiveBlocks();
+    bool did_any_simplification = false;
+    did_any_simplification |= SimplifyIfs();
+    did_any_simplification |= RemoveDeadBlocks();
+    if (did_any_simplification) {
+      // Connect successive blocks created by dead branches.
+      ConnectSuccessiveBlocks();
+    }
   }
   SsaRedundantPhiElimination(graph_).Run();
   RemoveDeadInstructions();
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 8d6008b..58e700d 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -31,18 +31,19 @@
  public:
   HDeadCodeElimination(HGraph* graph,
                        OptimizingCompilerStats* stats = nullptr,
-                       const char* name = kInitialDeadCodeEliminationPassName)
+                       const char* name = kDeadCodeEliminationPassName)
       : HOptimization(graph, name, stats) {}
 
   void Run() OVERRIDE;
-
-  static constexpr const char* kInitialDeadCodeEliminationPassName = "dead_code_elimination";
-  static constexpr const char* kFinalDeadCodeEliminationPassName = "dead_code_elimination_final";
+  static constexpr const char* kDeadCodeEliminationPassName = "dead_code_elimination";
 
  private:
   void MaybeRecordDeadBlock(HBasicBlock* block);
-  void RemoveDeadBlocks();
+  void MaybeRecordSimplifyIf();
+  bool RemoveDeadBlocks();
   void RemoveDeadInstructions();
+  bool SimplifyIfs();
+  void ConnectSuccessiveBlocks();
 
   DISALLOW_COPY_AND_ASSIGN(HDeadCodeElimination);
 };
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index 2c6a1ef..fe52aac 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -26,6 +26,8 @@
 
 namespace art {
 
+class DeadCodeEliminationTest : public CommonCompilerTest {};
+
 static void TestCode(const uint16_t* data,
                      const std::string& expected_before,
                      const std::string& expected_after) {
@@ -34,8 +36,6 @@
   HGraph* graph = CreateCFG(&allocator, data);
   ASSERT_NE(graph, nullptr);
 
-  graph->TryBuildingSsa();
-
   StringPrettyPrinter printer_before(graph);
   printer_before.VisitInsertionOrder();
   std::string actual_before = printer_before.str();
@@ -45,9 +45,9 @@
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
   HDeadCodeElimination(graph).Run();
-  SSAChecker ssa_checker(graph);
-  ssa_checker.Run();
-  ASSERT_TRUE(ssa_checker.IsValid());
+  GraphChecker graph_checker(graph);
+  graph_checker.Run();
+  ASSERT_TRUE(graph_checker.IsValid());
 
   StringPrettyPrinter printer_after(graph);
   printer_after.VisitInsertionOrder();
@@ -55,7 +55,6 @@
   ASSERT_EQ(actual_after, expected_after);
 }
 
-
 /**
  * Small three-register program.
  *
@@ -69,7 +68,7 @@
  * L1: v2 <- v0 + v1            5.      add-int v2, v0, v1
  *     return-void              7.      return
  */
-TEST(DeadCodeElimination, AdditionAndConditionalJump) {
+TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
     Instruction::CONST_4 | 0 << 8 | 0 << 12,
@@ -79,30 +78,30 @@
     Instruction::RETURN_VOID);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [15, 22, 8]\n"
-    "  5: IntConstant [22, 8]\n"
-    "  19: SuspendCheck\n"
-    "  20: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 5, 2\n"
-    "  8: GreaterThanOrEqual(3, 5) [9]\n"
-    "  9: If(8)\n"
-    "BasicBlock 2, pred: 1, succ: 3\n"
-    "  12: Goto 3\n"
-    "BasicBlock 3, pred: 5, 2, succ: 4\n"
-    "  22: Phi(5, 3) [15]\n"
-    "  15: Add(22, 3)\n"
-    "  17: ReturnVoid\n"
-    "BasicBlock 4, pred: 3\n"
-    "  18: Exit\n"
-    "BasicBlock 5, pred: 1, succ: 3\n"
-    "  21: Goto 3\n";
+      "BasicBlock 0, succ: 1\n"
+      "  3: IntConstant [9, 8, 5]\n"
+      "  4: IntConstant [8, 5]\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5, 2\n"
+      "  5: GreaterThanOrEqual(3, 4) [6]\n"
+      "  6: If(5)\n"
+      "BasicBlock 2, pred: 1, succ: 3\n"
+      "  7: Goto 3\n"
+      "BasicBlock 3, pred: 5, 2, succ: 4\n"
+      "  8: Phi(4, 3) [9]\n"
+      "  9: Add(8, 3)\n"
+      "  10: ReturnVoid\n"
+      "BasicBlock 4, pred: 3\n"
+      "  11: Exit\n"
+      "BasicBlock 5, pred: 1, succ: 3\n"
+      "  0: Goto 3\n";
 
   // Expected difference after dead code elimination.
   diff_t expected_diff = {
-    { "  3: IntConstant [15, 22, 8]\n", "  3: IntConstant [22, 8]\n" },
-    { "  22: Phi(5, 3) [15]\n",         "  22: Phi(5, 3)\n" },
-    { "  15: Add(22, 3)\n",             removed }
+    { "  3: IntConstant [9, 8, 5]\n",  "  3: IntConstant [8, 5]\n" },
+    { "  8: Phi(4, 3) [9]\n",          "  8: Phi(4, 3)\n" },
+    { "  9: Add(8, 3)\n",              removed }
   };
   std::string expected_after = Patch(expected_before, expected_diff);
 
@@ -131,7 +130,7 @@
  * L3: v2 <- v1 + 4             11.     add-int/lit16 v2, v1, #+4
  *     return                   13.     return-void
  */
-TEST(DeadCodeElimination, AdditionsAndInconditionalJumps) {
+TEST_F(DeadCodeEliminationTest, AdditionsAndInconditionalJumps) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 << 8 | 0 << 12,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -145,49 +144,37 @@
     Instruction::RETURN_VOID);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"
-    "  5: IntConstant [9]\n"
-    "  13: IntConstant [14]\n"
-    "  18: IntConstant [19]\n"
-    "  24: IntConstant [25]\n"
-    "  29: SuspendCheck\n"
-    "  30: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 3\n"
-    "  9: Add(3, 5) [19]\n"
-    "  11: Goto 3\n"
-    "BasicBlock 2, pred: 3, succ: 4\n"
-    "  14: Add(19, 13) [25]\n"
-    "  16: Goto 4\n"
-    "BasicBlock 3, pred: 1, succ: 2\n"
-    "  19: Add(9, 18) [14]\n"
-    "  21: SuspendCheck\n"
-    "  22: Goto 2\n"
-    "BasicBlock 4, pred: 2, succ: 5\n"
-    "  25: Add(14, 24)\n"
-    "  27: ReturnVoid\n"
-    "BasicBlock 5, pred: 4\n"
-    "  28: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant [4]\n"
+      "  3: IntConstant [4]\n"
+      "  6: IntConstant [7]\n"
+      "  9: IntConstant [10]\n"
+      "  12: IntConstant [13]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 3\n"
+      "  4: Add(2, 3) [7]\n"
+      "  5: Goto 3\n"
+      "BasicBlock 2, pred: 3, succ: 4\n"
+      "  10: Add(7, 9) [13]\n"
+      "  11: Goto 4\n"
+      "BasicBlock 3, pred: 1, succ: 2\n"
+      "  7: Add(4, 6) [10]\n"
+      "  8: Goto 2\n"
+      "BasicBlock 4, pred: 2, succ: 5\n"
+      "  13: Add(10, 12)\n"
+      "  14: ReturnVoid\n"
+      "BasicBlock 5, pred: 4\n"
+      "  15: Exit\n";
 
-  // The SuspendCheck instruction following this Add instruction
-  // inserts the latter in an environment, thus making it "used" and
-  // therefore non removable.  It ensures that some other Add and
-  // IntConstant instructions cannot be removed, as they are direct
-  // or indirect inputs of the initial Add instruction.
   std::string expected_after =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"
-    "  5: IntConstant [9]\n"
-    "  18: IntConstant [19]\n"
-    "  29: SuspendCheck\n"
-    "  30: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 5\n"
-    "  9: Add(3, 5) [19]\n"
-    "  19: Add(9, 18) []\n"
-    "  21: SuspendCheck\n"
-    "  27: ReturnVoid\n"
-    "BasicBlock 5, pred: 1\n"
-    "  28: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5\n"
+      "  14: ReturnVoid\n"
+      "BasicBlock 5, pred: 1\n"
+      "  15: Exit\n";
 
   TestCode(data, expected_before, expected_after);
 }
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
new file mode 100644
index 0000000..14c318e
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_cache_array_fixups_arm.h"
+
+#include "base/arena_containers.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
+
+namespace art {
+namespace arm {
+
+/**
+ * Finds instructions that need the dex cache arrays base as an input.
+ */
+class DexCacheArrayFixupsVisitor : public HGraphVisitor {
+ public:
+  explicit DexCacheArrayFixupsVisitor(HGraph* graph)
+      : HGraphVisitor(graph),
+        dex_cache_array_bases_(std::less<const DexFile*>(),
+                               // Attribute memory use to code generator.
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
+
+  void MoveBasesIfNeeded() {
+    for (const auto& entry : dex_cache_array_bases_) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      HArmDexCacheArraysBase* base = entry.second;
+      base->MoveBeforeFirstUserAndOutOfLoops();
+    }
+  }
+
+ private:
+  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    // If this is a load with PC-relative access to the dex cache types array,
+    // we need to add the dex cache arrays base as the special input.
+    if (load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCachePcRelative) {
+      // Initialize base for target dex file if needed.
+      const DexFile& dex_file = load_class->GetDexFile();
+      HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kArmPointerSize, &dex_file);
+      base->UpdateElementOffset(layout.TypeOffset(load_class->GetTypeIndex()));
+      // Add the special argument base to the load.
+      load_class->AddSpecialInput(base);
+    }
+  }
+
+  void VisitLoadString(HLoadString* load_string) OVERRIDE {
+    // If this is a load with PC-relative access to the dex cache strings array,
+    // we need to add the dex cache arrays base as the special input.
+    if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) {
+      // Initialize base for target dex file if needed.
+      const DexFile& dex_file = load_string->GetDexFile();
+      HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kArmPointerSize, &dex_file);
+      base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex()));
+      // Add the special argument base to the load.
+      load_string->AddSpecialInput(base);
+    }
+  }
+
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    // If this is an invoke with PC-relative access to the dex cache methods array,
+    // we need to add the dex cache arrays base as the special input.
+    if (invoke->HasPcRelativeDexCache()) {
+      // Initialize base for target method dex file if needed.
+      MethodReference target_method = invoke->GetTargetMethod();
+      HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kArmPointerSize, target_method.dex_file);
+      base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index));
+      // Add the special argument base to the method.
+      DCHECK(!invoke->HasCurrentMethodInput());
+      invoke->AddSpecialInput(base);
+    }
+  }
+
+  HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) {
+    // Ensure we only initialize the pointer once for each dex file.
+    auto lb = dex_cache_array_bases_.lower_bound(&dex_file);
+    if (lb != dex_cache_array_bases_.end() &&
+        !dex_cache_array_bases_.key_comp()(&dex_file, lb->first)) {
+      return lb->second;
+    }
+
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file);
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction());
+    dex_cache_array_bases_.PutBefore(lb, &dex_file, base);
+    return base;
+  }
+
+  using DexCacheArraysBaseMap =
+      ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>;
+  DexCacheArraysBaseMap dex_cache_array_bases_;
+};
+
+void DexCacheArrayFixups::Run() {
+  if (graph_->HasIrreducibleLoops()) {
+    // Do not run this optimization, as irreducible loops do not work with an instruction
+    // that can be live-in at the irreducible loop header.
+    return;
+  }
+  DexCacheArrayFixupsVisitor visitor(graph_);
+  visitor.VisitInsertionOrder();
+  visitor.MoveBasesIfNeeded();
+}
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h
new file mode 100644
index 0000000..9142e29
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
+#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+namespace arm {
+
+class DexCacheArrayFixups : public HOptimization {
+ public:
+  DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kDexCacheArrayFixupsArmPassName, stats) {}
+
+  static constexpr const char* kDexCacheArrayFixupsArmPassName = "dex_cache_array_fixups_arm";
+
+  void Run() OVERRIDE;
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc
new file mode 100644
index 0000000..19bab08
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_mips.h"
+#include "dex_cache_array_fixups_mips.h"
+
+#include "base/arena_containers.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
+
+namespace art {
+namespace mips {
+
+/**
+ * Finds instructions that need the dex cache arrays base as an input.
+ */
+class DexCacheArrayFixupsVisitor : public HGraphVisitor {
+ public:
+  explicit DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen)
+      : HGraphVisitor(graph),
+        codegen_(down_cast<CodeGeneratorMIPS*>(codegen)),
+        dex_cache_array_bases_(std::less<const DexFile*>(),
+                               // Attribute memory use to code generator.
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
+
+  void MoveBasesIfNeeded() {
+    for (const auto& entry : dex_cache_array_bases_) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      HMipsDexCacheArraysBase* base = entry.second;
+      base->MoveBeforeFirstUserAndOutOfLoops();
+    }
+    // Computing the dex cache base for PC-relative accesses will clobber RA with
+    // the NAL instruction on R2. Take a note of this before generating the method
+    // entry.
+    if (!dex_cache_array_bases_.empty() && !codegen_->GetInstructionSetFeatures().IsR6()) {
+      codegen_->ClobberRA();
+    }
+  }
+
+ private:
+  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    // If this is a load with PC-relative access to the dex cache types array,
+    // we need to add the dex cache arrays base as the special input.
+    if (load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCachePcRelative) {
+      // Initialize base for target dex file if needed.
+      const DexFile& dex_file = load_class->GetDexFile();
+      HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kMipsPointerSize, &dex_file);
+      base->UpdateElementOffset(layout.TypeOffset(load_class->GetTypeIndex()));
+      // Add the special argument base to the load.
+      load_class->AddSpecialInput(base);
+    }
+  }
+
+  void VisitLoadString(HLoadString* load_string) OVERRIDE {
+    // If this is a load with PC-relative access to the dex cache strings array,
+    // we need to add the dex cache arrays base as the special input.
+    if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) {
+      // Initialize base for target dex file if needed.
+      const DexFile& dex_file = load_string->GetDexFile();
+      HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kMipsPointerSize, &dex_file);
+      base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex()));
+      // Add the special argument base to the load.
+      load_string->AddSpecialInput(base);
+    }
+  }
+
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    // If this is an invoke with PC-relative access to the dex cache methods array,
+    // we need to add the dex cache arrays base as the special input.
+    if (invoke->HasPcRelativeDexCache()) {
+      // Initialize base for target method dex file if needed.
+      MethodReference target_method = invoke->GetTargetMethod();
+      HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kMipsPointerSize, target_method.dex_file);
+      base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index));
+      // Add the special argument base to the method.
+      DCHECK(!invoke->HasCurrentMethodInput());
+      invoke->AddSpecialInput(base);
+    }
+  }
+
+  HMipsDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) {
+    return dex_cache_array_bases_.GetOrCreate(
+        &dex_file,
+        [this, &dex_file]() {
+          HMipsDexCacheArraysBase* base =
+              new (GetGraph()->GetArena()) HMipsDexCacheArraysBase(dex_file);
+          HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+          // Insert the base at the start of the entry block, move it to a better
+          // position later in MoveBaseIfNeeded().
+          entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction());
+          return base;
+        });
+  }
+
+  CodeGeneratorMIPS* codegen_;
+
+  using DexCacheArraysBaseMap =
+      ArenaSafeMap<const DexFile*, HMipsDexCacheArraysBase*, std::less<const DexFile*>>;
+  DexCacheArraysBaseMap dex_cache_array_bases_;
+};
+
+void DexCacheArrayFixups::Run() {
+  if (graph_->HasIrreducibleLoops()) {
+    // Do not run this optimization, as irreducible loops do not work with an instruction
+    // that can be live-in at the irreducible loop header.
+    return;
+  }
+  DexCacheArrayFixupsVisitor visitor(graph_, codegen_);
+  visitor.VisitInsertionOrder();
+  visitor.MoveBasesIfNeeded();
+}
+
+}  // namespace mips
+}  // namespace art
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h
new file mode 100644
index 0000000..861a199
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_
+#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+class CodeGenerator;
+
+namespace mips {
+
+class DexCacheArrayFixups : public HOptimization {
+ public:
+  DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kDexCacheArrayFixupsMipsPassName, stats),
+        codegen_(codegen) {}
+
+  static constexpr const char* kDexCacheArrayFixupsMipsPassName = "dex_cache_array_fixups_mips";
+
+  void Run() OVERRIDE;
+
+ private:
+  CodeGenerator* codegen_;
+};
+
+}  // namespace mips
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 91e4a99..50c677a 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -24,15 +24,12 @@
 
 namespace art {
 
+class OptimizerTest : public CommonCompilerTest {};
+
 static void TestCode(const uint16_t* data, const uint32_t* blocks, size_t blocks_length) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateGraph(&allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-  graph->BuildDominatorTree();
+  HGraph* graph = CreateCFG(&allocator, data);
   ASSERT_EQ(graph->GetBlocks().size(), blocks_length);
   for (size_t i = 0, e = blocks_length; i < e; ++i) {
     if (blocks[i] == kInvalidBlockId) {
@@ -50,7 +47,7 @@
   }
 }
 
-TEST(OptimizerTest, ReturnVoid) {
+TEST_F(OptimizerTest, ReturnVoid) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
       Instruction::RETURN_VOID);  // Block number 1
 
@@ -63,7 +60,7 @@
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG1) {
+TEST_F(OptimizerTest, CFG1) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,  // Block number 1
     Instruction::RETURN_VOID);  // Block number 2
@@ -78,7 +75,7 @@
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG2) {
+TEST_F(OptimizerTest, CFG2) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,  // Block number 1
     Instruction::GOTO | 0x100,  // Block number 2
@@ -95,7 +92,7 @@
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG3) {
+TEST_F(OptimizerTest, CFG3) {
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x200,    // Block number 1
     Instruction::RETURN_VOID,     // Block number 2
@@ -126,15 +123,16 @@
   TestCode(data3, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG4) {
+TEST_F(OptimizerTest, CFG4) {
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::NOP,
     Instruction::GOTO | 0xFF00);
 
   const uint32_t dominators[] = {
       kInvalidBlockId,
-      0,
-      kInvalidBlockId
+      3,
+      kInvalidBlockId,
+      0
   };
 
   TestCode(data1, dominators, sizeof(dominators) / sizeof(int));
@@ -145,7 +143,7 @@
   TestCode(data2, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG5) {
+TEST_F(OptimizerTest, CFG5) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::RETURN_VOID,     // Block number 1
     Instruction::GOTO | 0x100,    // Dead block
@@ -162,7 +160,7 @@
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG6) {
+TEST_F(OptimizerTest, CFG6) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
@@ -181,7 +179,7 @@
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG7) {
+TEST_F(OptimizerTest, CFG7) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,        // Block number 1
@@ -201,7 +199,7 @@
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG8) {
+TEST_F(OptimizerTest, CFG8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,        // Block number 1
@@ -222,7 +220,7 @@
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG9) {
+TEST_F(OptimizerTest, CFG9) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,        // Block number 1
@@ -243,7 +241,7 @@
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG10) {
+TEST_F(OptimizerTest, CFG10) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 6,  // Block number 1
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index 9b0eb70..04789d9 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -27,17 +27,9 @@
 
 namespace art {
 
-static HGraph* TestCode(const uint16_t* data, ArenaAllocator* allocator) {
-  HGraph* graph = CreateGraph(allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  builder.BuildGraph(*item);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  return graph;
-}
+class FindLoopsTest : public CommonCompilerTest {};
 
-TEST(FindLoopsTest, CFG1) {
+TEST_F(FindLoopsTest, CFG1) {
   // Constant is not used.
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -45,26 +37,26 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   for (HBasicBlock* block : graph->GetBlocks()) {
     ASSERT_EQ(block->GetLoopInformation(), nullptr);
   }
 }
 
-TEST(FindLoopsTest, CFG2) {
+TEST_F(FindLoopsTest, CFG2) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN);
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   for (HBasicBlock* block : graph->GetBlocks()) {
     ASSERT_EQ(block->GetLoopInformation(), nullptr);
   }
 }
 
-TEST(FindLoopsTest, CFG3) {
+TEST_F(FindLoopsTest, CFG3) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
@@ -74,13 +66,13 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   for (HBasicBlock* block : graph->GetBlocks()) {
     ASSERT_EQ(block->GetLoopInformation(), nullptr);
   }
 }
 
-TEST(FindLoopsTest, CFG4) {
+TEST_F(FindLoopsTest, CFG4) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 4,
@@ -91,13 +83,13 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   for (HBasicBlock* block : graph->GetBlocks()) {
     ASSERT_EQ(block->GetLoopInformation(), nullptr);
   }
 }
 
-TEST(FindLoopsTest, CFG5) {
+TEST_F(FindLoopsTest, CFG5) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
@@ -106,7 +98,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   for (HBasicBlock* block : graph->GetBlocks()) {
     ASSERT_EQ(block->GetLoopInformation(), nullptr);
   }
@@ -138,7 +130,7 @@
   }
 }
 
-TEST(FindLoopsTest, Loop1) {
+TEST_F(FindLoopsTest, Loop1) {
   // Simple loop with one preheader and one back edge.
   // var a = 0;
   // while (a == a) {
@@ -152,7 +144,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
 
   TestBlock(graph, 0, false, kInvalidBlockId);  // entry block
   TestBlock(graph, 1, false, kInvalidBlockId);  // pre header
@@ -163,7 +155,7 @@
   TestBlock(graph, 5, false, kInvalidBlockId);  // exit block
 }
 
-TEST(FindLoopsTest, Loop2) {
+TEST_F(FindLoopsTest, Loop2) {
   // Make sure we support a preheader of a loop not being the first predecessor
   // in the predecessor list of the header.
   // var a = 0;
@@ -180,7 +172,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
 
   TestBlock(graph, 0, false, kInvalidBlockId);  // entry block
   TestBlock(graph, 1, false, kInvalidBlockId);  // goto block
@@ -192,7 +184,7 @@
   TestBlock(graph, 6, false, kInvalidBlockId);  // exit block
 }
 
-TEST(FindLoopsTest, Loop3) {
+TEST_F(FindLoopsTest, Loop3) {
   // Make sure we create a preheader of a loop when a header originally has two
   // incoming blocks and one back edge.
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
@@ -205,7 +197,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
 
   TestBlock(graph, 0, false, kInvalidBlockId);  // entry block
   TestBlock(graph, 1, false, kInvalidBlockId);  // goto block
@@ -219,7 +211,7 @@
   TestBlock(graph, 8, false, kInvalidBlockId);  // synthesized pre header
 }
 
-TEST(FindLoopsTest, Loop4) {
+TEST_F(FindLoopsTest, Loop4) {
   // Test loop with originally two back edges.
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -231,7 +223,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
 
   TestBlock(graph, 0, false, kInvalidBlockId);  // entry block
   TestBlock(graph, 1, false, kInvalidBlockId);  // pre header
@@ -245,7 +237,7 @@
 }
 
 
-TEST(FindLoopsTest, Loop5) {
+TEST_F(FindLoopsTest, Loop5) {
   // Test loop with two exit edges.
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -257,7 +249,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
 
   TestBlock(graph, 0, false, kInvalidBlockId);  // entry block
   TestBlock(graph, 1, false, kInvalidBlockId);  // pre header
@@ -271,7 +263,7 @@
   TestBlock(graph, 8, false, kInvalidBlockId);  // synthesized block at the loop exit
 }
 
-TEST(FindLoopsTest, InnerLoop) {
+TEST_F(FindLoopsTest, InnerLoop) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 6,
@@ -282,7 +274,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
 
   TestBlock(graph, 0, false, kInvalidBlockId);  // entry block
   TestBlock(graph, 1, false, kInvalidBlockId);  // pre header of outer loop
@@ -302,7 +294,7 @@
                     *graph->GetBlocks()[3]->GetLoopInformation()));
 }
 
-TEST(FindLoopsTest, TwoLoops) {
+TEST_F(FindLoopsTest, TwoLoops) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
@@ -313,7 +305,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
 
   TestBlock(graph, 0, false, kInvalidBlockId);  // entry block
   TestBlock(graph, 1, false, kInvalidBlockId);  // pre header of first loop
@@ -332,7 +324,7 @@
                     *graph->GetBlocks()[4]->GetLoopInformation()));
 }
 
-TEST(FindLoopsTest, NonNaturalLoop) {
+TEST_F(FindLoopsTest, NonNaturalLoop) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
@@ -343,14 +335,14 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   ASSERT_TRUE(graph->GetBlocks()[3]->IsLoopHeader());
   HLoopInformation* info = graph->GetBlocks()[3]->GetLoopInformation();
   ASSERT_EQ(1u, info->NumberOfBackEdges());
   ASSERT_FALSE(info->GetHeader()->Dominates(info->GetBackEdges()[0]));
 }
 
-TEST(FindLoopsTest, DoWhileLoop) {
+TEST_F(FindLoopsTest, DoWhileLoop) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::GOTO | 0x0100,
@@ -359,7 +351,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
 
   TestBlock(graph, 0, false, kInvalidBlockId);  // entry block
   TestBlock(graph, 1, false, kInvalidBlockId);  // pre header of first loop
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 3de96b5..c8cba20 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -17,33 +17,46 @@
 #include "graph_checker.h"
 
 #include <algorithm>
-#include <map>
 #include <string>
 #include <sstream>
 
 #include "base/arena_containers.h"
 #include "base/bit_vector-inl.h"
 #include "base/stringprintf.h"
+#include "handle_scope-inl.h"
 
 namespace art {
 
+static bool IsAllowedToJumpToExitBlock(HInstruction* instruction) {
+  return instruction->IsThrow() || instruction->IsReturn() || instruction->IsReturnVoid();
+}
+
+static bool IsExitTryBoundaryIntoExitBlock(HBasicBlock* block) {
+  if (!block->IsSingleTryBoundary()) {
+    return false;
+  }
+
+  HTryBoundary* boundary = block->GetLastInstruction()->AsTryBoundary();
+  return block->GetPredecessors().size() == 1u &&
+         boundary->GetNormalFlowSuccessor()->IsExitBlock() &&
+         !boundary->IsEntry();
+}
+
 void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
   current_block_ = block;
 
   // Check consistency with respect to predecessors of `block`.
-  ArenaSafeMap<HBasicBlock*, size_t> predecessors_count(
-      std::less<HBasicBlock*>(), GetGraph()->GetArena()->Adapter(kArenaAllocGraphChecker));
-  for (HBasicBlock* p : block->GetPredecessors()) {
-    auto it = predecessors_count.find(p);
-    if (it != predecessors_count.end()) {
-      ++it->second;
-    } else {
-      predecessors_count.Put(p, 1u);
+  // Note: Counting duplicates with a sorted vector uses up to 6x less memory
+  // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse.
+  ArenaVector<HBasicBlock*>& sorted_predecessors = blocks_storage_;
+  sorted_predecessors.assign(block->GetPredecessors().begin(), block->GetPredecessors().end());
+  std::sort(sorted_predecessors.begin(), sorted_predecessors.end());
+  for (auto it = sorted_predecessors.begin(), end = sorted_predecessors.end(); it != end; ) {
+    HBasicBlock* p = *it++;
+    size_t p_count_in_block_predecessors = 1u;
+    for (; it != end && *it == p; ++it) {
+      ++p_count_in_block_predecessors;
     }
-  }
-  for (auto& pc : predecessors_count) {
-    HBasicBlock* p = pc.first;
-    size_t p_count_in_block_predecessors = pc.second;
     size_t block_count_in_p_successors =
         std::count(p->GetSuccessors().begin(), p->GetSuccessors().end(), block);
     if (p_count_in_block_predecessors != block_count_in_p_successors) {
@@ -56,19 +69,17 @@
   }
 
   // Check consistency with respect to successors of `block`.
-  ArenaSafeMap<HBasicBlock*, size_t> successors_count(
-      std::less<HBasicBlock*>(), GetGraph()->GetArena()->Adapter(kArenaAllocGraphChecker));
-  for (HBasicBlock* s : block->GetSuccessors()) {
-    auto it = successors_count.find(s);
-    if (it != successors_count.end()) {
-      ++it->second;
-    } else {
-      successors_count.Put(s, 1u);
+  // Note: Counting duplicates with a sorted vector uses up to 6x less memory
+  // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse.
+  ArenaVector<HBasicBlock*>& sorted_successors = blocks_storage_;
+  sorted_successors.assign(block->GetSuccessors().begin(), block->GetSuccessors().end());
+  std::sort(sorted_successors.begin(), sorted_successors.end());
+  for (auto it = sorted_successors.begin(), end = sorted_successors.end(); it != end; ) {
+    HBasicBlock* s = *it++;
+    size_t s_count_in_block_successors = 1u;
+    for (; it != end && *it == s; ++it) {
+      ++s_count_in_block_successors;
     }
-  }
-  for (auto& sc : successors_count) {
-    HBasicBlock* s = sc.first;
-    size_t s_count_in_block_successors = sc.second;
     size_t block_count_in_s_predecessors =
         std::count(s->GetPredecessors().begin(), s->GetPredecessors().end(), block);
     if (s_count_in_block_successors != block_count_in_s_predecessors) {
@@ -89,28 +100,17 @@
                           block->GetBlockId()));
   }
 
-  // Ensure that only Return(Void) and Throw jump to Exit. An exiting
-  // TryBoundary may be between a Throw and the Exit if the Throw is in a try.
+  // Ensure that only Return(Void) and Throw jump to Exit. An exiting TryBoundary
+  // may be between the instructions if the Throw/Return(Void) is in a try block.
   if (block->IsExitBlock()) {
     for (HBasicBlock* predecessor : block->GetPredecessors()) {
-      if (predecessor->IsSingleTryBoundary()
-          && !predecessor->GetLastInstruction()->AsTryBoundary()->IsEntry()) {
-        HBasicBlock* real_predecessor = predecessor->GetSinglePredecessor();
-        HInstruction* last_instruction = real_predecessor->GetLastInstruction();
-        if (!last_instruction->IsThrow()) {
-          AddError(StringPrintf("Unexpected TryBoundary between %s:%d and Exit.",
-                                last_instruction->DebugName(),
-                                last_instruction->GetId()));
-        }
-      } else {
-        HInstruction* last_instruction = predecessor->GetLastInstruction();
-        if (!last_instruction->IsReturn()
-            && !last_instruction->IsReturnVoid()
-            && !last_instruction->IsThrow()) {
-          AddError(StringPrintf("Unexpected instruction %s:%d jumps into the exit block.",
-                                last_instruction->DebugName(),
-                                last_instruction->GetId()));
-        }
+      HInstruction* last_instruction = IsExitTryBoundaryIntoExitBlock(predecessor) ?
+        predecessor->GetSinglePredecessor()->GetLastInstruction() :
+        predecessor->GetLastInstruction();
+      if (!IsAllowedToJumpToExitBlock(last_instruction)) {
+        AddError(StringPrintf("Unexpected instruction %s:%d jumps into the exit block.",
+                              last_instruction->DebugName(),
+                              last_instruction->GetId()));
       }
     }
   }
@@ -148,224 +148,17 @@
     }
     current->Accept(this);
   }
-}
-
-void GraphChecker::VisitBoundsCheck(HBoundsCheck* check) {
-  if (!GetGraph()->HasBoundsChecks()) {
-    AddError(StringPrintf("Instruction %s:%d is a HBoundsCheck, "
-                          "but HasBoundsChecks() returns false",
-                          check->DebugName(),
-                          check->GetId()));
-  }
-
-  // Perform the instruction base checks too.
-  VisitInstruction(check);
-}
-
-void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) {
-  // Ensure that all exception handlers are catch blocks and that handlers
-  // are not listed multiple times.
-  // Note that a normal-flow successor may be a catch block before CFG
-  // simplification. We only test normal-flow successors in SsaChecker.
-  for (HExceptionHandlerIterator it(*try_boundary); !it.Done(); it.Advance()) {
-    HBasicBlock* handler = it.Current();
-    if (!handler->IsCatchBlock()) {
-      AddError(StringPrintf("Block %d with %s:%d has exceptional successor %d which "
-                            "is not a catch block.",
-                            current_block_->GetBlockId(),
-                            try_boundary->DebugName(),
-                            try_boundary->GetId(),
-                            handler->GetBlockId()));
-    }
-    if (current_block_->HasSuccessor(handler, it.CurrentSuccessorIndex() + 1)) {
-      AddError(StringPrintf("Exception handler block %d of %s:%d is listed multiple times.",
-                            handler->GetBlockId(),
-                            try_boundary->DebugName(),
-                            try_boundary->GetId()));
-    }
-  }
-
-  VisitInstruction(try_boundary);
-}
-
-void GraphChecker::VisitInstruction(HInstruction* instruction) {
-  if (seen_ids_.IsBitSet(instruction->GetId())) {
-    AddError(StringPrintf("Instruction id %d is duplicate in graph.",
-                          instruction->GetId()));
-  } else {
-    seen_ids_.SetBit(instruction->GetId());
-  }
-
-  // Ensure `instruction` is associated with `current_block_`.
-  if (instruction->GetBlock() == nullptr) {
-    AddError(StringPrintf("%s %d in block %d not associated with any block.",
-                          instruction->IsPhi() ? "Phi" : "Instruction",
-                          instruction->GetId(),
-                          current_block_->GetBlockId()));
-  } else if (instruction->GetBlock() != current_block_) {
-    AddError(StringPrintf("%s %d in block %d associated with block %d.",
-                          instruction->IsPhi() ? "Phi" : "Instruction",
-                          instruction->GetId(),
-                          current_block_->GetBlockId(),
-                          instruction->GetBlock()->GetBlockId()));
-  }
-
-  // Ensure the inputs of `instruction` are defined in a block of the graph.
-  for (HInputIterator input_it(instruction); !input_it.Done();
-       input_it.Advance()) {
-    HInstruction* input = input_it.Current();
-    const HInstructionList& list = input->IsPhi()
-        ? input->GetBlock()->GetPhis()
-        : input->GetBlock()->GetInstructions();
-    if (!list.Contains(input)) {
-      AddError(StringPrintf("Input %d of instruction %d is not defined "
-                            "in a basic block of the control-flow graph.",
-                            input->GetId(),
-                            instruction->GetId()));
-    }
-  }
-
-  // Ensure the uses of `instruction` are defined in a block of the graph,
-  // and the entry in the use list is consistent.
-  for (HUseIterator<HInstruction*> use_it(instruction->GetUses());
-       !use_it.Done(); use_it.Advance()) {
-    HInstruction* use = use_it.Current()->GetUser();
-    const HInstructionList& list = use->IsPhi()
-        ? use->GetBlock()->GetPhis()
-        : use->GetBlock()->GetInstructions();
-    if (!list.Contains(use)) {
-      AddError(StringPrintf("User %s:%d of instruction %d is not defined "
-                            "in a basic block of the control-flow graph.",
-                            use->DebugName(),
-                            use->GetId(),
-                            instruction->GetId()));
-    }
-    size_t use_index = use_it.Current()->GetIndex();
-    if ((use_index >= use->InputCount()) || (use->InputAt(use_index) != instruction)) {
-      AddError(StringPrintf("User %s:%d of instruction %d has a wrong "
-                            "UseListNode index.",
-                            use->DebugName(),
-                            use->GetId(),
-                            instruction->GetId()));
-    }
-  }
-
-  // Ensure the environment uses entries are consistent.
-  for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses());
-       !use_it.Done(); use_it.Advance()) {
-    HEnvironment* use = use_it.Current()->GetUser();
-    size_t use_index = use_it.Current()->GetIndex();
-    if ((use_index >= use->Size()) || (use->GetInstructionAt(use_index) != instruction)) {
-      AddError(StringPrintf("Environment user of %s:%d has a wrong "
-                            "UseListNode index.",
-                            instruction->DebugName(),
-                            instruction->GetId()));
-    }
-  }
-
-  // Ensure 'instruction' has pointers to its inputs' use entries.
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
-    HUserRecord<HInstruction*> input_record = instruction->InputRecordAt(i);
-    HInstruction* input = input_record.GetInstruction();
-    HUseListNode<HInstruction*>* use_node = input_record.GetUseNode();
-    size_t use_index = use_node->GetIndex();
-    if ((use_node == nullptr)
-        || !input->GetUses().Contains(use_node)
-        || (use_index >= e)
-        || (use_index != i)) {
-      AddError(StringPrintf("Instruction %s:%d has an invalid pointer to use entry "
-                            "at input %u (%s:%d).",
-                            instruction->DebugName(),
-                            instruction->GetId(),
-                            static_cast<unsigned>(i),
-                            input->DebugName(),
-                            input->GetId()));
-    }
-  }
-}
-
-void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  VisitInstruction(invoke);
-
-  if (invoke->IsStaticWithExplicitClinitCheck()) {
-    size_t last_input_index = invoke->InputCount() - 1;
-    HInstruction* last_input = invoke->InputAt(last_input_index);
-    if (last_input == nullptr) {
-      AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check "
-                            "has a null pointer as last input.",
-                            invoke->DebugName(),
-                            invoke->GetId()));
-    }
-    if (!last_input->IsClinitCheck() && !last_input->IsLoadClass()) {
-      AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check "
-                            "has a last instruction (%s:%d) which is neither a clinit check "
-                            "nor a load class instruction.",
-                            invoke->DebugName(),
-                            invoke->GetId(),
-                            last_input->DebugName(),
-                            last_input->GetId()));
-    }
-  }
-}
-
-void GraphChecker::VisitReturn(HReturn* ret) {
-  VisitInstruction(ret);
-  if (!ret->GetBlock()->GetSingleSuccessor()->IsExitBlock()) {
-    AddError(StringPrintf("%s:%d does not jump to the exit block.",
-                          ret->DebugName(),
-                          ret->GetId()));
-  }
-}
-
-void GraphChecker::VisitReturnVoid(HReturnVoid* ret) {
-  VisitInstruction(ret);
-  if (!ret->GetBlock()->GetSingleSuccessor()->IsExitBlock()) {
-    AddError(StringPrintf("%s:%d does not jump to the exit block.",
-                          ret->DebugName(),
-                          ret->GetId()));
-  }
-}
-
-void GraphChecker::VisitCheckCast(HCheckCast* check) {
-  VisitInstruction(check);
-  HInstruction* input = check->InputAt(1);
-  if (!input->IsLoadClass()) {
-    AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.",
-                          check->DebugName(),
-                          check->GetId(),
-                          input->DebugName(),
-                          input->GetId()));
-  }
-}
-
-void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) {
-  VisitInstruction(instruction);
-  HInstruction* input = instruction->InputAt(1);
-  if (!input->IsLoadClass()) {
-    AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.",
-                          instruction->DebugName(),
-                          instruction->GetId(),
-                          input->DebugName(),
-                          input->GetId()));
-  }
-}
-
-void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
-  super_type::VisitBasicBlock(block);
 
   // Ensure that catch blocks are not normal successors, and normal blocks are
   // never exceptional successors.
-  const size_t num_normal_successors = block->NumberOfNormalSuccessors();
-  for (size_t j = 0; j < num_normal_successors; ++j) {
-    HBasicBlock* successor = block->GetSuccessors()[j];
+  for (HBasicBlock* successor : block->GetNormalSuccessors()) {
     if (successor->IsCatchBlock()) {
       AddError(StringPrintf("Catch block %d is a normal successor of block %d.",
                             successor->GetBlockId(),
                             block->GetBlockId()));
     }
   }
-  for (size_t j = num_normal_successors, e = block->GetSuccessors().size(); j < e; ++j) {
-    HBasicBlock* successor = block->GetSuccessors()[j];
+  for (HBasicBlock* successor : block->GetExceptionalSuccessors()) {
     if (!successor->IsCatchBlock()) {
       AddError(StringPrintf("Normal block %d is an exceptional successor of block %d.",
                             successor->GetBlockId(),
@@ -373,17 +166,29 @@
     }
   }
 
+  // Ensure dominated blocks have `block` as the dominator.
+  for (HBasicBlock* dominated : block->GetDominatedBlocks()) {
+    if (dominated->GetDominator() != block) {
+      AddError(StringPrintf("Block %d should be the dominator of %d.",
+                            block->GetBlockId(),
+                            dominated->GetBlockId()));
+    }
+  }
+
   // Ensure there is no critical edge (i.e., an edge connecting a
   // block with multiple successors to a block with multiple
   // predecessors). Exceptional edges are synthesized and hence
   // not accounted for.
-  if (block->NumberOfNormalSuccessors() > 1) {
-    for (size_t j = 0, e = block->NumberOfNormalSuccessors(); j < e; ++j) {
-      HBasicBlock* successor = block->GetSuccessors()[j];
-      if (successor->GetPredecessors().size() > 1) {
-        AddError(StringPrintf("Critical edge between blocks %d and %d.",
-                              block->GetBlockId(),
-                              successor->GetBlockId()));
+  if (block->GetSuccessors().size() > 1) {
+    if (IsExitTryBoundaryIntoExitBlock(block)) {
+      // Allowed critical edge (Throw/Return/ReturnVoid)->TryBoundary->Exit.
+    } else {
+      for (HBasicBlock* successor : block->GetNormalSuccessors()) {
+        if (successor->GetPredecessors().size() > 1) {
+          AddError(StringPrintf("Critical edge between blocks %d and %d.",
+                                block->GetBlockId(),
+                                successor->GetBlockId()));
+        }
       }
     }
   }
@@ -437,20 +242,339 @@
   }
 
   if (block->IsLoopHeader()) {
-    CheckLoop(block);
+    HandleLoop(block);
   }
 }
 
-void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
+void GraphChecker::VisitBoundsCheck(HBoundsCheck* check) {
+  if (!GetGraph()->HasBoundsChecks()) {
+    AddError(StringPrintf("Instruction %s:%d is a HBoundsCheck, "
+                          "but HasBoundsChecks() returns false",
+                          check->DebugName(),
+                          check->GetId()));
+  }
+
+  // Perform the instruction base checks too.
+  VisitInstruction(check);
+}
+
+void GraphChecker::VisitDeoptimize(HDeoptimize* deopt) {
+  if (GetGraph()->IsCompilingOsr()) {
+    AddError(StringPrintf("A graph compiled OSR cannot have a HDeoptimize instruction"));
+  }
+
+  // Perform the instruction base checks too.
+  VisitInstruction(deopt);
+}
+
+void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) {
+  ArrayRef<HBasicBlock* const> handlers = try_boundary->GetExceptionHandlers();
+
+  // Ensure that all exception handlers are catch blocks.
+  // Note that a normal-flow successor may be a catch block before CFG
+  // simplification. We only test normal-flow successors in GraphChecker.
+  for (HBasicBlock* handler : handlers) {
+    if (!handler->IsCatchBlock()) {
+      AddError(StringPrintf("Block %d with %s:%d has exceptional successor %d which "
+                            "is not a catch block.",
+                            current_block_->GetBlockId(),
+                            try_boundary->DebugName(),
+                            try_boundary->GetId(),
+                            handler->GetBlockId()));
+    }
+  }
+
+  // Ensure that handlers are not listed multiple times.
+  for (size_t i = 0, e = handlers.size(); i < e; ++i) {
+    if (ContainsElement(handlers, handlers[i], i + 1)) {
+        AddError(StringPrintf("Exception handler block %d of %s:%d is listed multiple times.",
+                            handlers[i]->GetBlockId(),
+                            try_boundary->DebugName(),
+                            try_boundary->GetId()));
+    }
+  }
+
+  VisitInstruction(try_boundary);
+}
+
+void GraphChecker::VisitLoadException(HLoadException* load) {
+  // Ensure that LoadException is the first instruction in a catch block.
+  if (!load->GetBlock()->IsCatchBlock()) {
+    AddError(StringPrintf("%s:%d is in a non-catch block %d.",
+                          load->DebugName(),
+                          load->GetId(),
+                          load->GetBlock()->GetBlockId()));
+  } else if (load->GetBlock()->GetFirstInstruction() != load) {
+    AddError(StringPrintf("%s:%d is not the first instruction in catch block %d.",
+                          load->DebugName(),
+                          load->GetId(),
+                          load->GetBlock()->GetBlockId()));
+  }
+}
+
+void GraphChecker::VisitInstruction(HInstruction* instruction) {
+  if (seen_ids_.IsBitSet(instruction->GetId())) {
+    AddError(StringPrintf("Instruction id %d is duplicate in graph.",
+                          instruction->GetId()));
+  } else {
+    seen_ids_.SetBit(instruction->GetId());
+  }
+
+  // Ensure `instruction` is associated with `current_block_`.
+  if (instruction->GetBlock() == nullptr) {
+    AddError(StringPrintf("%s %d in block %d not associated with any block.",
+                          instruction->IsPhi() ? "Phi" : "Instruction",
+                          instruction->GetId(),
+                          current_block_->GetBlockId()));
+  } else if (instruction->GetBlock() != current_block_) {
+    AddError(StringPrintf("%s %d in block %d associated with block %d.",
+                          instruction->IsPhi() ? "Phi" : "Instruction",
+                          instruction->GetId(),
+                          current_block_->GetBlockId(),
+                          instruction->GetBlock()->GetBlockId()));
+  }
+
+  // Ensure the inputs of `instruction` are defined in a block of the graph.
+  for (HInstruction* input : instruction->GetInputs()) {
+    const HInstructionList& list = input->IsPhi()
+        ? input->GetBlock()->GetPhis()
+        : input->GetBlock()->GetInstructions();
+    if (!list.Contains(input)) {
+      AddError(StringPrintf("Input %d of instruction %d is not defined "
+                            "in a basic block of the control-flow graph.",
+                            input->GetId(),
+                            instruction->GetId()));
+    }
+  }
+
+  // Ensure the uses of `instruction` are defined in a block of the graph,
+  // and the entry in the use list is consistent.
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    HInstruction* user = use.GetUser();
+    const HInstructionList& list = user->IsPhi()
+        ? user->GetBlock()->GetPhis()
+        : user->GetBlock()->GetInstructions();
+    if (!list.Contains(user)) {
+      AddError(StringPrintf("User %s:%d of instruction %d is not defined "
+                            "in a basic block of the control-flow graph.",
+                            user->DebugName(),
+                            user->GetId(),
+                            instruction->GetId()));
+    }
+    size_t use_index = use.GetIndex();
+    HConstInputsRef user_inputs = user->GetInputs();
+    if ((use_index >= user_inputs.size()) || (user_inputs[use_index] != instruction)) {
+      AddError(StringPrintf("User %s:%d of instruction %s:%d has a wrong "
+                            "UseListNode index.",
+                            user->DebugName(),
+                            user->GetId(),
+                            instruction->DebugName(),
+                            instruction->GetId()));
+    }
+  }
+
+  // Ensure the environment uses entries are consistent.
+  for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+    HEnvironment* user = use.GetUser();
+    size_t use_index = use.GetIndex();
+    if ((use_index >= user->Size()) || (user->GetInstructionAt(use_index) != instruction)) {
+      AddError(StringPrintf("Environment user of %s:%d has a wrong "
+                            "UseListNode index.",
+                            instruction->DebugName(),
+                            instruction->GetId()));
+    }
+  }
+
+  // Ensure 'instruction' has pointers to its inputs' use entries.
+  auto&& input_records = instruction->GetInputRecords();
+  for (size_t i = 0; i < input_records.size(); ++i) {
+    const HUserRecord<HInstruction*>& input_record = input_records[i];
+    HInstruction* input = input_record.GetInstruction();
+    if ((input_record.GetBeforeUseNode() == input->GetUses().end()) ||
+        (input_record.GetUseNode() == input->GetUses().end()) ||
+        !input->GetUses().ContainsNode(*input_record.GetUseNode()) ||
+        (input_record.GetUseNode()->GetIndex() != i)) {
+      AddError(StringPrintf("Instruction %s:%d has an invalid iterator before use entry "
+                            "at input %u (%s:%d).",
+                            instruction->DebugName(),
+                            instruction->GetId(),
+                            static_cast<unsigned>(i),
+                            input->DebugName(),
+                            input->GetId()));
+    }
+  }
+
+  // Ensure an instruction dominates all its uses.
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    HInstruction* user = use.GetUser();
+    if (!user->IsPhi() && !instruction->StrictlyDominates(user)) {
+      AddError(StringPrintf("Instruction %s:%d in block %d does not dominate "
+                            "use %s:%d in block %d.",
+                            instruction->DebugName(),
+                            instruction->GetId(),
+                            current_block_->GetBlockId(),
+                            user->DebugName(),
+                            user->GetId(),
+                            user->GetBlock()->GetBlockId()));
+    }
+  }
+
+  if (instruction->NeedsEnvironment() && !instruction->HasEnvironment()) {
+    AddError(StringPrintf("Instruction %s:%d in block %d requires an environment "
+                          "but does not have one.",
+                          instruction->DebugName(),
+                          instruction->GetId(),
+                          current_block_->GetBlockId()));
+  }
+
+  // Ensure an instruction having an environment is dominated by the
+  // instructions contained in the environment.
+  for (HEnvironment* environment = instruction->GetEnvironment();
+       environment != nullptr;
+       environment = environment->GetParent()) {
+    for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+      HInstruction* env_instruction = environment->GetInstructionAt(i);
+      if (env_instruction != nullptr
+          && !env_instruction->StrictlyDominates(instruction)) {
+        AddError(StringPrintf("Instruction %d in environment of instruction %d "
+                              "from block %d does not dominate instruction %d.",
+                              env_instruction->GetId(),
+                              instruction->GetId(),
+                              current_block_->GetBlockId(),
+                              instruction->GetId()));
+      }
+    }
+  }
+
+  // Ensure that reference type instructions have reference type info.
+  if (instruction->GetType() == Primitive::kPrimNot) {
+    ScopedObjectAccess soa(Thread::Current());
+    if (!instruction->GetReferenceTypeInfo().IsValid()) {
+      AddError(StringPrintf("Reference type instruction %s:%d does not have "
+                            "valid reference type information.",
+                            instruction->DebugName(),
+                            instruction->GetId()));
+    }
+  }
+
+  if (instruction->CanThrowIntoCatchBlock()) {
+    // Find the top-level environment. This corresponds to the environment of
+    // the catch block since we do not inline methods with try/catch.
+    HEnvironment* environment = instruction->GetEnvironment();
+    while (environment->GetParent() != nullptr) {
+      environment = environment->GetParent();
+    }
+
+    // Find all catch blocks and test that `instruction` has an environment
+    // value for each one.
+    const HTryBoundary& entry = instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry();
+    for (HBasicBlock* catch_block : entry.GetExceptionHandlers()) {
+      for (HInstructionIterator phi_it(catch_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+        HPhi* catch_phi = phi_it.Current()->AsPhi();
+        if (environment->GetInstructionAt(catch_phi->GetRegNumber()) == nullptr) {
+          AddError(StringPrintf("Instruction %s:%d throws into catch block %d "
+                                "with catch phi %d for vreg %d but its "
+                                "corresponding environment slot is empty.",
+                                instruction->DebugName(),
+                                instruction->GetId(),
+                                catch_block->GetBlockId(),
+                                catch_phi->GetId(),
+                                catch_phi->GetRegNumber()));
+        }
+      }
+    }
+  }
+}
+
+void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  VisitInstruction(invoke);
+
+  if (invoke->IsStaticWithExplicitClinitCheck()) {
+    const HInstruction* last_input = invoke->GetInputs().back();
+    if (last_input == nullptr) {
+      AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check "
+                            "has a null pointer as last input.",
+                            invoke->DebugName(),
+                            invoke->GetId()));
+    }
+    if (!last_input->IsClinitCheck() && !last_input->IsLoadClass()) {
+      AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check "
+                            "has a last instruction (%s:%d) which is neither a clinit check "
+                            "nor a load class instruction.",
+                            invoke->DebugName(),
+                            invoke->GetId(),
+                            last_input->DebugName(),
+                            last_input->GetId()));
+    }
+  }
+}
+
+void GraphChecker::VisitReturn(HReturn* ret) {
+  VisitInstruction(ret);
+  HBasicBlock* successor = ret->GetBlock()->GetSingleSuccessor();
+  if (!successor->IsExitBlock() && !IsExitTryBoundaryIntoExitBlock(successor)) {
+    AddError(StringPrintf("%s:%d does not jump to the exit block.",
+                          ret->DebugName(),
+                          ret->GetId()));
+  }
+}
+
+void GraphChecker::VisitReturnVoid(HReturnVoid* ret) {
+  VisitInstruction(ret);
+  HBasicBlock* successor = ret->GetBlock()->GetSingleSuccessor();
+  if (!successor->IsExitBlock() && !IsExitTryBoundaryIntoExitBlock(successor)) {
+    AddError(StringPrintf("%s:%d does not jump to the exit block.",
+                          ret->DebugName(),
+                          ret->GetId()));
+  }
+}
+
+void GraphChecker::VisitCheckCast(HCheckCast* check) {
+  VisitInstruction(check);
+  HInstruction* input = check->InputAt(1);
+  if (!input->IsLoadClass()) {
+    AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.",
+                          check->DebugName(),
+                          check->GetId(),
+                          input->DebugName(),
+                          input->GetId()));
+  }
+}
+
+void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) {
+  VisitInstruction(instruction);
+  HInstruction* input = instruction->InputAt(1);
+  if (!input->IsLoadClass()) {
+    AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.",
+                          instruction->DebugName(),
+                          instruction->GetId(),
+                          input->DebugName(),
+                          input->GetId()));
+  }
+}
+
+void GraphChecker::HandleLoop(HBasicBlock* loop_header) {
   int id = loop_header->GetBlockId();
   HLoopInformation* loop_information = loop_header->GetLoopInformation();
 
-  // Ensure the pre-header block is first in the list of
-  // predecessors of a loop header.
-  if (!loop_header->IsLoopPreHeaderFirstPredecessor()) {
+  if (loop_information->GetPreHeader()->GetSuccessors().size() != 1) {
     AddError(StringPrintf(
-        "Loop pre-header is not the first predecessor of the loop header %d.",
-        id));
+        "Loop pre-header %d of loop defined by header %d has %zu successors.",
+        loop_information->GetPreHeader()->GetBlockId(),
+        id,
+        loop_information->GetPreHeader()->GetSuccessors().size()));
+  }
+
+  if (loop_information->GetSuspendCheck() == nullptr) {
+    AddError(StringPrintf(
+        "Loop with header %d does not have a suspend check.",
+        loop_header->GetBlockId()));
+  }
+
+  if (loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) {
+    AddError(StringPrintf(
+        "Loop header %d does not have the loop suspend check as the first instruction.",
+        loop_header->GetBlockId()));
   }
 
   // Ensure the loop header has only one incoming branch and the remaining
@@ -472,8 +596,9 @@
       HBasicBlock* predecessor = loop_header->GetPredecessors()[i];
       if (!loop_information->IsBackEdge(*predecessor)) {
         AddError(StringPrintf(
-            "Loop header %d has multiple incoming (non back edge) blocks.",
-            id));
+            "Loop header %d has multiple incoming (non back edge) blocks: %d.",
+            id,
+            predecessor->GetBlockId()));
       }
     }
   }
@@ -493,24 +618,17 @@
             "Loop defined by header %d has an invalid back edge %d.",
             id,
             back_edge_id));
+      } else if (back_edge->GetLoopInformation() != loop_information) {
+        AddError(StringPrintf(
+            "Back edge %d of loop defined by header %d belongs to nested loop "
+            "with header %d.",
+            back_edge_id,
+            id,
+            back_edge->GetLoopInformation()->GetHeader()->GetBlockId()));
       }
     }
   }
 
-  // Ensure all blocks in the loop are live and dominated by the loop header.
-  for (uint32_t i : loop_blocks.Indexes()) {
-    HBasicBlock* loop_block = GetGraph()->GetBlocks()[i];
-    if (loop_block == nullptr) {
-      AddError(StringPrintf("Loop defined by header %d contains a previously removed block %d.",
-                            id,
-                            i));
-    } else if (!loop_header->Dominates(loop_block)) {
-      AddError(StringPrintf("Loop block %d not dominated by loop header %d.",
-                            i,
-                            id));
-    }
-  }
-
   // If this is a nested loop, ensure the outer loops contain a superset of the blocks.
   for (HLoopInformationOutwardIterator it(*loop_header); !it.Done(); it.Advance()) {
     HLoopInformation* outer_info = it.Current();
@@ -521,74 +639,56 @@
                             outer_info->GetHeader()->GetBlockId()));
     }
   }
-}
 
-void SSAChecker::VisitInstruction(HInstruction* instruction) {
-  super_type::VisitInstruction(instruction);
-
-  // Ensure an instruction dominates all its uses.
-  for (HUseIterator<HInstruction*> use_it(instruction->GetUses());
-       !use_it.Done(); use_it.Advance()) {
-    HInstruction* use = use_it.Current()->GetUser();
-    if (!use->IsPhi() && !instruction->StrictlyDominates(use)) {
-      AddError(StringPrintf("Instruction %d in block %d does not dominate "
-                            "use %d in block %d.",
-                            instruction->GetId(), current_block_->GetBlockId(),
-                            use->GetId(), use->GetBlock()->GetBlockId()));
-    }
+  // Ensure the pre-header block is first in the list of predecessors of a loop
+  // header and that the header block is its only successor.
+  if (!loop_header->IsLoopPreHeaderFirstPredecessor()) {
+    AddError(StringPrintf(
+        "Loop pre-header is not the first predecessor of the loop header %d.",
+        id));
   }
 
-  // Ensure an instruction having an environment is dominated by the
-  // instructions contained in the environment.
-  for (HEnvironment* environment = instruction->GetEnvironment();
-       environment != nullptr;
-       environment = environment->GetParent()) {
-    for (size_t i = 0, e = environment->Size(); i < e; ++i) {
-      HInstruction* env_instruction = environment->GetInstructionAt(i);
-      if (env_instruction != nullptr
-          && !env_instruction->StrictlyDominates(instruction)) {
-        AddError(StringPrintf("Instruction %d in environment of instruction %d "
-                              "from block %d does not dominate instruction %d.",
-                              env_instruction->GetId(),
-                              instruction->GetId(),
-                              current_block_->GetBlockId(),
-                              instruction->GetId()));
-      }
+  // Ensure all blocks in the loop are live and dominated by the loop header in
+  // the case of natural loops.
+  for (uint32_t i : loop_blocks.Indexes()) {
+    HBasicBlock* loop_block = GetGraph()->GetBlocks()[i];
+    if (loop_block == nullptr) {
+      AddError(StringPrintf("Loop defined by header %d contains a previously removed block %d.",
+                            id,
+                            i));
+    } else if (!loop_information->IsIrreducible() && !loop_header->Dominates(loop_block)) {
+      AddError(StringPrintf("Loop block %d not dominated by loop header %d.",
+                            i,
+                            id));
     }
   }
 }
 
-static Primitive::Type PrimitiveKind(Primitive::Type type) {
-  switch (type) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimInt:
-      return Primitive::kPrimInt;
-    default:
-      return type;
-  }
-}
-
-static bool IsSameSizeConstant(HInstruction* insn1, HInstruction* insn2) {
+static bool IsSameSizeConstant(const HInstruction* insn1, const HInstruction* insn2) {
   return insn1->IsConstant()
       && insn2->IsConstant()
       && Primitive::Is64BitType(insn1->GetType()) == Primitive::Is64BitType(insn2->GetType());
 }
 
-static bool IsConstantEquivalent(HInstruction* insn1, HInstruction* insn2, BitVector* visited) {
+static bool IsConstantEquivalent(const HInstruction* insn1,
+                                 const HInstruction* insn2,
+                                 BitVector* visited) {
   if (insn1->IsPhi() &&
-      insn1->AsPhi()->IsVRegEquivalentOf(insn2) &&
-      insn1->InputCount() == insn2->InputCount()) {
+      insn1->AsPhi()->IsVRegEquivalentOf(insn2)) {
+    HConstInputsRef insn1_inputs = insn1->GetInputs();
+    HConstInputsRef insn2_inputs = insn2->GetInputs();
+    if (insn1_inputs.size() != insn2_inputs.size()) {
+      return false;
+    }
+
     // Testing only one of the two inputs for recursion is sufficient.
     if (visited->IsBitSet(insn1->GetId())) {
       return true;
     }
     visited->SetBit(insn1->GetId());
 
-    for (size_t i = 0, e = insn1->InputCount(); i < e; ++i) {
-      if (!IsConstantEquivalent(insn1->InputAt(i), insn2->InputAt(i), visited)) {
+    for (size_t i = 0; i < insn1_inputs.size(); ++i) {
+      if (!IsConstantEquivalent(insn1_inputs[i], insn2_inputs[i], visited)) {
         return false;
       }
     }
@@ -600,23 +700,24 @@
   }
 }
 
-void SSAChecker::VisitPhi(HPhi* phi) {
+void GraphChecker::VisitPhi(HPhi* phi) {
   VisitInstruction(phi);
 
   // Ensure the first input of a phi is not itself.
-  if (phi->InputAt(0) == phi) {
+  ArrayRef<HUserRecord<HInstruction*>> input_records = phi->GetInputRecords();
+  if (input_records[0].GetInstruction() == phi) {
     AddError(StringPrintf("Loop phi %d in block %d is its own first input.",
                           phi->GetId(),
                           phi->GetBlock()->GetBlockId()));
   }
 
   // Ensure that the inputs have the same primitive kind as the phi.
-  for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-    HInstruction* input = phi->InputAt(i);
-    if (PrimitiveKind(input->GetType()) != PrimitiveKind(phi->GetType())) {
+  for (size_t i = 0; i < input_records.size(); ++i) {
+    HInstruction* input = input_records[i].GetInstruction();
+    if (Primitive::PrimitiveKind(input->GetType()) != Primitive::PrimitiveKind(phi->GetType())) {
         AddError(StringPrintf(
             "Input %d at index %zu of phi %d from block %d does not have the "
-            "same type as the phi: %s versus %s",
+            "same kind as the phi: %s versus %s",
             input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(),
             Primitive::PrettyDescriptor(input->GetType()),
             Primitive::PrettyDescriptor(phi->GetType())));
@@ -635,8 +736,7 @@
     // because we do not remove the corresponding inputs when we prove that an
     // instruction cannot throw. Instead, we at least test that all phis have the
     // same, non-zero number of inputs (b/24054676).
-    size_t input_count_this = phi->InputCount();
-    if (input_count_this == 0u) {
+    if (input_records.empty()) {
       AddError(StringPrintf("Phi %d in catch block %d has zero inputs.",
                             phi->GetId(),
                             phi->GetBlock()->GetBlockId()));
@@ -644,12 +744,12 @@
       HInstruction* next_phi = phi->GetNext();
       if (next_phi != nullptr) {
         size_t input_count_next = next_phi->InputCount();
-        if (input_count_this != input_count_next) {
+        if (input_records.size() != input_count_next) {
           AddError(StringPrintf("Phi %d in catch block %d has %zu inputs, "
                                 "but phi %d has %zu inputs.",
                                 phi->GetId(),
                                 phi->GetBlock()->GetBlockId(),
-                                input_count_this,
+                                input_records.size(),
                                 next_phi->GetId(),
                                 input_count_next));
         }
@@ -659,17 +759,17 @@
     // Ensure the number of inputs of a non-catch phi is the same as the number
     // of its predecessors.
     const ArenaVector<HBasicBlock*>& predecessors = phi->GetBlock()->GetPredecessors();
-    if (phi->InputCount() != predecessors.size()) {
+    if (input_records.size() != predecessors.size()) {
       AddError(StringPrintf(
           "Phi %d in block %d has %zu inputs, "
           "but block %d has %zu predecessors.",
-          phi->GetId(), phi->GetBlock()->GetBlockId(), phi->InputCount(),
+          phi->GetId(), phi->GetBlock()->GetBlockId(), input_records.size(),
           phi->GetBlock()->GetBlockId(), predecessors.size()));
     } else {
       // Ensure phi input at index I either comes from the Ith
       // predecessor or from a block that dominates this predecessor.
-      for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-        HInstruction* input = phi->InputAt(i);
+      for (size_t i = 0; i < input_records.size(); ++i) {
+        HInstruction* input = input_records[i].GetInstruction();
         HBasicBlock* predecessor = predecessors[i];
         if (!(input->GetBlock() == predecessor
               || input->GetBlock()->Dominates(predecessor))) {
@@ -697,33 +797,50 @@
     }
   }
 
-  // Test phi equivalents. There should not be two of the same type and they
-  // should only be created for constants which were untyped in DEX.
-  for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
-    HPhi* other_phi = phi_it.Current()->AsPhi();
-    if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) {
-      if (phi->GetType() == other_phi->GetType()) {
-        std::stringstream type_str;
-        type_str << phi->GetType();
-        AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.",
-                              phi->GetId(),
-                              phi->GetRegNumber(),
-                              type_str.str().c_str()));
-      } else {
-        ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true);
-        if (!IsConstantEquivalent(phi, other_phi, &visited)) {
-          AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they "
-                                "are not equivalents of constants.",
+  // Test phi equivalents. There should not be two of the same type and they should only be
+  // created for constants which were untyped in DEX. Note that this test can be skipped for
+  // a synthetic phi (indicated by lack of a virtual register).
+  if (phi->GetRegNumber() != kNoRegNumber) {
+    for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis());
+         !phi_it.Done();
+         phi_it.Advance()) {
+      HPhi* other_phi = phi_it.Current()->AsPhi();
+      if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) {
+        if (phi->GetType() == other_phi->GetType()) {
+          std::stringstream type_str;
+          type_str << phi->GetType();
+          AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.",
                                 phi->GetId(),
-                                other_phi->GetId(),
-                                phi->GetRegNumber()));
+                                phi->GetRegNumber(),
+                                type_str.str().c_str()));
+        } else if (phi->GetType() == Primitive::kPrimNot) {
+          std::stringstream type_str;
+          type_str << other_phi->GetType();
+          AddError(StringPrintf(
+              "Equivalent non-reference phi (%d) found for VReg %d with type: %s.",
+              phi->GetId(),
+              phi->GetRegNumber(),
+              type_str.str().c_str()));
+        } else {
+          // If we get here, make sure we allocate all the necessary storage at once
+          // because the BitVector reallocation strategy has very bad worst-case behavior.
+          ArenaBitVector& visited = visited_storage_;
+          visited.SetBit(GetGraph()->GetCurrentInstructionId());
+          visited.ClearAllBits();
+          if (!IsConstantEquivalent(phi, other_phi, &visited)) {
+            AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they "
+                                  "are not equivalents of constants.",
+                                  phi->GetId(),
+                                  other_phi->GetId(),
+                                  phi->GetRegNumber()));
+          }
         }
       }
     }
   }
 }
 
-void SSAChecker::HandleBooleanInput(HInstruction* instruction, size_t input_index) {
+void GraphChecker::HandleBooleanInput(HInstruction* instruction, size_t input_index) {
   HInstruction* input = instruction->InputAt(input_index);
   if (input->IsIntConstant()) {
     int32_t value = input->AsIntConstant()->GetValue();
@@ -735,13 +852,11 @@
           static_cast<int>(input_index),
           value));
     }
-  } else if (input->GetType() == Primitive::kPrimInt
-             && (input->IsPhi() || input->IsAnd() || input->IsOr() || input->IsXor())) {
-    // TODO: We need a data-flow analysis to determine if the Phi or
-    //       binary operation is actually Boolean. Allow for now.
-  } else if (input->GetType() != Primitive::kPrimBoolean) {
+  } else if (Primitive::PrimitiveKind(input->GetType()) != Primitive::kPrimInt) {
+    // TODO: We need a data-flow analysis to determine if an input like Phi,
+    //       Select or a binary operation is actually Boolean. Allow for now.
     AddError(StringPrintf(
-        "%s instruction %d has a non-Boolean input %d whose type is: %s.",
+        "%s instruction %d has a non-integer input %d whose type is: %s.",
         instruction->DebugName(),
         instruction->GetId(),
         static_cast<int>(input_index),
@@ -749,7 +864,7 @@
   }
 }
 
-void SSAChecker::VisitPackedSwitch(HPackedSwitch* instruction) {
+void GraphChecker::VisitPackedSwitch(HPackedSwitch* instruction) {
   VisitInstruction(instruction);
   // Check that the number of block successors matches the switch count plus
   // one for the default block.
@@ -765,17 +880,22 @@
   }
 }
 
-void SSAChecker::VisitIf(HIf* instruction) {
+void GraphChecker::VisitIf(HIf* instruction) {
   VisitInstruction(instruction);
   HandleBooleanInput(instruction, 0);
 }
 
-void SSAChecker::VisitBooleanNot(HBooleanNot* instruction) {
+void GraphChecker::VisitSelect(HSelect* instruction) {
+  VisitInstruction(instruction);
+  HandleBooleanInput(instruction, 2);
+}
+
+void GraphChecker::VisitBooleanNot(HBooleanNot* instruction) {
   VisitInstruction(instruction);
   HandleBooleanInput(instruction, 0);
 }
 
-void SSAChecker::VisitCondition(HCondition* op) {
+void GraphChecker::VisitCondition(HCondition* op) {
   VisitInstruction(op);
   if (op->GetType() != Primitive::kPrimBoolean) {
     AddError(StringPrintf(
@@ -785,9 +905,9 @@
   }
   HInstruction* lhs = op->InputAt(0);
   HInstruction* rhs = op->InputAt(1);
-  if (PrimitiveKind(lhs->GetType()) != PrimitiveKind(rhs->GetType())) {
+  if (Primitive::PrimitiveKind(lhs->GetType()) != Primitive::PrimitiveKind(rhs->GetType())) {
     AddError(StringPrintf(
-        "Condition %s %d has inputs of different types: %s, and %s.",
+        "Condition %s %d has inputs of different kinds: %s, and %s.",
         op->DebugName(), op->GetId(),
         Primitive::PrettyDescriptor(lhs->GetType()),
         Primitive::PrettyDescriptor(rhs->GetType())));
@@ -805,49 +925,79 @@
   }
 }
 
-void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) {
+void GraphChecker::VisitNeg(HNeg* instruction) {
+  VisitInstruction(instruction);
+  Primitive::Type input_type = instruction->InputAt(0)->GetType();
+  Primitive::Type result_type = instruction->GetType();
+  if (result_type != Primitive::PrimitiveKind(input_type)) {
+    AddError(StringPrintf("Binary operation %s %d has a result type different "
+                          "from its input kind: %s vs %s.",
+                          instruction->DebugName(), instruction->GetId(),
+                          Primitive::PrettyDescriptor(result_type),
+                          Primitive::PrettyDescriptor(input_type)));
+  }
+}
+
+void GraphChecker::VisitBinaryOperation(HBinaryOperation* op) {
   VisitInstruction(op);
-  if (op->IsUShr() || op->IsShr() || op->IsShl()) {
-    if (PrimitiveKind(op->InputAt(1)->GetType()) != Primitive::kPrimInt) {
-      AddError(StringPrintf(
-          "Shift operation %s %d has a non-int kind second input: "
-          "%s of type %s.",
-          op->DebugName(), op->GetId(),
-          op->InputAt(1)->DebugName(),
-          Primitive::PrettyDescriptor(op->InputAt(1)->GetType())));
+  Primitive::Type lhs_type = op->InputAt(0)->GetType();
+  Primitive::Type rhs_type = op->InputAt(1)->GetType();
+  Primitive::Type result_type = op->GetType();
+
+  // Type consistency between inputs.
+  if (op->IsUShr() || op->IsShr() || op->IsShl() || op->IsRor()) {
+    if (Primitive::PrimitiveKind(rhs_type) != Primitive::kPrimInt) {
+      AddError(StringPrintf("Shift/rotate operation %s %d has a non-int kind second input: "
+                            "%s of type %s.",
+                            op->DebugName(), op->GetId(),
+                            op->InputAt(1)->DebugName(),
+                            Primitive::PrettyDescriptor(rhs_type)));
     }
   } else {
-    if (PrimitiveKind(op->InputAt(0)->GetType()) != PrimitiveKind(op->InputAt(1)->GetType())) {
-      AddError(StringPrintf(
-          "Binary operation %s %d has inputs of different types: "
-          "%s, and %s.",
-          op->DebugName(), op->GetId(),
-          Primitive::PrettyDescriptor(op->InputAt(0)->GetType()),
-          Primitive::PrettyDescriptor(op->InputAt(1)->GetType())));
+    if (Primitive::PrimitiveKind(lhs_type) != Primitive::PrimitiveKind(rhs_type)) {
+      AddError(StringPrintf("Binary operation %s %d has inputs of different kinds: %s, and %s.",
+                            op->DebugName(), op->GetId(),
+                            Primitive::PrettyDescriptor(lhs_type),
+                            Primitive::PrettyDescriptor(rhs_type)));
     }
   }
 
+  // Type consistency between result and input(s).
   if (op->IsCompare()) {
-    if (op->GetType() != Primitive::kPrimInt) {
-      AddError(StringPrintf(
-          "Compare operation %d has a non-int result type: %s.",
-          op->GetId(),
-          Primitive::PrettyDescriptor(op->GetType())));
+    if (result_type != Primitive::kPrimInt) {
+      AddError(StringPrintf("Compare operation %d has a non-int result type: %s.",
+                            op->GetId(),
+                            Primitive::PrettyDescriptor(result_type)));
+    }
+  } else if (op->IsUShr() || op->IsShr() || op->IsShl() || op->IsRor()) {
+    // Only check the first input (value), as the second one (distance)
+    // must invariably be of kind `int`.
+    if (result_type != Primitive::PrimitiveKind(lhs_type)) {
+      AddError(StringPrintf("Shift/rotate operation %s %d has a result type different "
+                            "from its left-hand side (value) input kind: %s vs %s.",
+                            op->DebugName(), op->GetId(),
+                            Primitive::PrettyDescriptor(result_type),
+                            Primitive::PrettyDescriptor(lhs_type)));
     }
   } else {
-    // Use the first input, so that we can also make this check for shift operations.
-    if (PrimitiveKind(op->GetType()) != PrimitiveKind(op->InputAt(0)->GetType())) {
-      AddError(StringPrintf(
-          "Binary operation %s %d has a result type different "
-          "from its input type: %s vs %s.",
-          op->DebugName(), op->GetId(),
-          Primitive::PrettyDescriptor(op->GetType()),
-          Primitive::PrettyDescriptor(op->InputAt(0)->GetType())));
+    if (Primitive::PrimitiveKind(result_type) != Primitive::PrimitiveKind(lhs_type)) {
+      AddError(StringPrintf("Binary operation %s %d has a result kind different "
+                            "from its left-hand side input kind: %s vs %s.",
+                            op->DebugName(), op->GetId(),
+                            Primitive::PrettyDescriptor(result_type),
+                            Primitive::PrettyDescriptor(lhs_type)));
+    }
+    if (Primitive::PrimitiveKind(result_type) != Primitive::PrimitiveKind(rhs_type)) {
+      AddError(StringPrintf("Binary operation %s %d has a result kind different "
+                            "from its right-hand side input kind: %s vs %s.",
+                            op->DebugName(), op->GetId(),
+                            Primitive::PrettyDescriptor(result_type),
+                            Primitive::PrettyDescriptor(rhs_type)));
     }
   }
 }
 
-void SSAChecker::VisitConstant(HConstant* instruction) {
+void GraphChecker::VisitConstant(HConstant* instruction) {
   HBasicBlock* block = instruction->GetBlock();
   if (!block->IsEntryBlock()) {
     AddError(StringPrintf(
@@ -858,4 +1008,31 @@
   }
 }
 
+void GraphChecker::VisitBoundType(HBoundType* instruction) {
+  VisitInstruction(instruction);
+
+  ScopedObjectAccess soa(Thread::Current());
+  if (!instruction->GetUpperBound().IsValid()) {
+    AddError(StringPrintf(
+        "%s %d does not have a valid upper bound RTI.",
+        instruction->DebugName(),
+        instruction->GetId()));
+  }
+}
+
+void GraphChecker::VisitTypeConversion(HTypeConversion* instruction) {
+  VisitInstruction(instruction);
+  Primitive::Type result_type = instruction->GetResultType();
+  Primitive::Type input_type = instruction->GetInputType();
+  // Invariant: We should never generate a conversion to a Boolean value.
+  if (result_type == Primitive::kPrimBoolean) {
+    AddError(StringPrintf(
+        "%s %d converts to a %s (from a %s).",
+        instruction->DebugName(),
+        instruction->GetId(),
+        Primitive::PrettyDescriptor(result_type),
+        Primitive::PrettyDescriptor(input_type)));
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index abf3659..3060c80 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -30,33 +30,48 @@
     : HGraphDelegateVisitor(graph),
       errors_(graph->GetArena()->Adapter(kArenaAllocGraphChecker)),
       dump_prefix_(dump_prefix),
-      seen_ids_(graph->GetArena(), graph->GetCurrentInstructionId(), false) {}
+      seen_ids_(graph->GetArena(),
+                graph->GetCurrentInstructionId(),
+                false,
+                kArenaAllocGraphChecker),
+      blocks_storage_(graph->GetArena()->Adapter(kArenaAllocGraphChecker)),
+      visited_storage_(graph->GetArena(), 0u, true, kArenaAllocGraphChecker) {}
 
-  // Check the whole graph (in insertion order).
-  virtual void Run() { VisitInsertionOrder(); }
+  // Check the whole graph (in reverse post-order).
+  void Run() {
+    // VisitReversePostOrder is used instead of VisitInsertionOrder,
+    // as the latter might visit dead blocks removed by the dominator
+    // computation.
+    VisitReversePostOrder();
+  }
 
-  // Check `block`.
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
 
-  // Check `instruction`.
   void VisitInstruction(HInstruction* instruction) OVERRIDE;
+  void VisitPhi(HPhi* phi) OVERRIDE;
 
-  // Perform control-flow graph checks on instruction.
-  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
-
-  // Check that the HasBoundsChecks() flag is set for bounds checks.
+  void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE;
+  void VisitBooleanNot(HBooleanNot* instruction) OVERRIDE;
+  void VisitBoundType(HBoundType* instruction) OVERRIDE;
   void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
-
-  // Check successors of blocks ending in TryBoundary.
-  void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;
-
-  // Check that HCheckCast and HInstanceOf have HLoadClass as second input.
   void VisitCheckCast(HCheckCast* check) OVERRIDE;
+  void VisitCondition(HCondition* op) OVERRIDE;
+  void VisitConstant(HConstant* instruction) OVERRIDE;
+  void VisitDeoptimize(HDeoptimize* instruction) OVERRIDE;
+  void VisitIf(HIf* instruction) OVERRIDE;
   void VisitInstanceOf(HInstanceOf* check) OVERRIDE;
-
-  // Check that the Return and ReturnVoid jump to the exit block.
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+  void VisitLoadException(HLoadException* load) OVERRIDE;
+  void VisitNeg(HNeg* instruction) OVERRIDE;
+  void VisitPackedSwitch(HPackedSwitch* instruction) OVERRIDE;
   void VisitReturn(HReturn* ret) OVERRIDE;
   void VisitReturnVoid(HReturnVoid* ret) OVERRIDE;
+  void VisitSelect(HSelect* instruction) OVERRIDE;
+  void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;
+  void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
+
+  void HandleLoop(HBasicBlock* loop_header);
+  void HandleBooleanInput(HInstruction* instruction, size_t input_index);
 
   // Was the last visit of the graph valid?
   bool IsValid() const {
@@ -91,47 +106,13 @@
   const char* const dump_prefix_;
   ArenaBitVector seen_ids_;
 
+  // To reduce the total arena memory allocation, we reuse the same storage.
+  ArenaVector<HBasicBlock*> blocks_storage_;
+  ArenaBitVector visited_storage_;
+
   DISALLOW_COPY_AND_ASSIGN(GraphChecker);
 };
 
-
-// An SSA graph visitor performing various checks.
-class SSAChecker : public GraphChecker {
- public:
-  typedef GraphChecker super_type;
-
-  explicit SSAChecker(HGraph* graph)
-    : GraphChecker(graph, "art::SSAChecker: ") {}
-
-  // Check the whole graph (in reverse post-order).
-  void Run() OVERRIDE {
-    // VisitReversePostOrder is used instead of VisitInsertionOrder,
-    // as the latter might visit dead blocks removed by the dominator
-    // computation.
-    VisitReversePostOrder();
-  }
-
-  // Perform SSA form checks on `block`.
-  void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
-  // Loop-related checks from block `loop_header`.
-  void CheckLoop(HBasicBlock* loop_header);
-
-  // Perform SSA form checks on instructions.
-  void VisitInstruction(HInstruction* instruction) OVERRIDE;
-  void VisitPhi(HPhi* phi) OVERRIDE;
-  void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE;
-  void VisitCondition(HCondition* op) OVERRIDE;
-  void VisitIf(HIf* instruction) OVERRIDE;
-  void VisitPackedSwitch(HPackedSwitch* instruction) OVERRIDE;
-  void VisitBooleanNot(HBooleanNot* instruction) OVERRIDE;
-  void VisitConstant(HConstant* instruction) OVERRIDE;
-
-  void HandleBooleanInput(HInstruction* instruction, size_t input_index);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(SSAChecker);
-};
-
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_GRAPH_CHECKER_H_
diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc
index fee56c7..2b82319 100644
--- a/compiler/optimizing/graph_checker_test.cc
+++ b/compiler/optimizing/graph_checker_test.cc
@@ -17,8 +17,6 @@
 #include "graph_checker.h"
 #include "optimizing_unit_test.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
 /**
@@ -40,10 +38,10 @@
   graph->AddBlock(exit_block);
   graph->SetExitBlock(exit_block);
   entry_block->AddSuccessor(exit_block);
+  graph->BuildDominatorTree();
   return graph;
 }
 
-
 static void TestCode(const uint16_t* data) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
@@ -55,29 +53,16 @@
   ASSERT_TRUE(graph_checker.IsValid());
 }
 
-static void TestCodeSSA(const uint16_t* data) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateCFG(&allocator, data);
-  ASSERT_NE(graph, nullptr);
+class GraphCheckerTest : public CommonCompilerTest {};
 
-  graph->BuildDominatorTree();
-  graph->TransformToSsa();
-
-  SSAChecker ssa_checker(graph);
-  ssa_checker.Run();
-  ASSERT_TRUE(ssa_checker.IsValid());
-}
-
-
-TEST(GraphChecker, ReturnVoid) {
+TEST_F(GraphCheckerTest, ReturnVoid) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
       Instruction::RETURN_VOID);
 
   TestCode(data);
 }
 
-TEST(GraphChecker, CFG1) {
+TEST_F(GraphCheckerTest, CFG1) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
       Instruction::GOTO | 0x100,
       Instruction::RETURN_VOID);
@@ -85,7 +70,7 @@
   TestCode(data);
 }
 
-TEST(GraphChecker, CFG2) {
+TEST_F(GraphCheckerTest, CFG2) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
@@ -95,7 +80,7 @@
   TestCode(data);
 }
 
-TEST(GraphChecker, CFG3) {
+TEST_F(GraphCheckerTest, CFG3) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
@@ -107,7 +92,7 @@
 
 // Test case with an invalid graph containing inconsistent
 // predecessor/successor arcs in CFG.
-TEST(GraphChecker, InconsistentPredecessorsAndSuccessors) {
+TEST_F(GraphCheckerTest, InconsistentPredecessorsAndSuccessors) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
@@ -125,7 +110,7 @@
 
 // Test case with an invalid graph containing a non-branch last
 // instruction in a block.
-TEST(GraphChecker, BlockEndingWithNonBranchInstruction) {
+TEST_F(GraphCheckerTest, BlockEndingWithNonBranchInstruction) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
@@ -145,7 +130,7 @@
   ASSERT_FALSE(graph_checker.IsValid());
 }
 
-TEST(SSAChecker, SSAPhi) {
+TEST_F(GraphCheckerTest, SSAPhi) {
   // This code creates one Phi function during the conversion to SSA form.
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -153,7 +138,7 @@
     Instruction::CONST_4 | 4 << 12 | 0,
     Instruction::RETURN | 0 << 8);
 
-  TestCodeSSA(data);
+  TestCode(data);
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index d4b9b71..d530564 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -164,7 +164,7 @@
 
   // Ensure there is only one back edge.
   ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
-  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor());
   ASSERT_NE(if_block->GetPredecessors()[1], if_block);
 
   // Ensure the new block is the back edge.
@@ -199,7 +199,7 @@
 
   // Ensure there is only one back edge.
   ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
-  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor());
   ASSERT_NE(if_block->GetPredecessors()[1], if_block);
 
   // Ensure the new block is the back edge.
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 4111671..b3d5341 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -21,14 +21,17 @@
 #include <cctype>
 #include <sstream>
 
+#include "bounds_check_elimination.h"
+#include "builder.h"
 #include "code_generator.h"
 #include "dead_code_elimination.h"
 #include "disassembler.h"
+#include "inliner.h"
 #include "licm.h"
 #include "nodes.h"
 #include "optimization.h"
 #include "reference_type_propagation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
 #include "ssa_liveness_analysis.h"
 #include "utils/assembler.h"
 
@@ -95,7 +98,9 @@
                                               DisassemblerOptions* options);
 class HGraphVisualizerDisassembler {
  public:
-  HGraphVisualizerDisassembler(InstructionSet instruction_set, const uint8_t* base_address)
+  HGraphVisualizerDisassembler(InstructionSet instruction_set,
+                               const uint8_t* base_address,
+                               const uint8_t* end_address)
       : instruction_set_(instruction_set), disassembler_(nullptr) {
     libart_disassembler_handle_ =
         dlopen(kIsDebugBuild ? "libartd-disassembler.so" : "libart-disassembler.so", RTLD_NOW);
@@ -116,7 +121,11 @@
             instruction_set,
             new DisassemblerOptions(/* absolute_addresses */ false,
                                     base_address,
-                                    /* can_read_literals */ true)));
+                                    end_address,
+                                    /* can_read_literals */ true,
+                                    Is64BitInstructionSet(instruction_set)
+                                        ? &Thread::DumpThreadOffset<PointerSize::k64>
+                                        : &Thread::DumpThreadOffset<PointerSize::k32>)));
   }
 
   ~HGraphVisualizerDisassembler() {
@@ -171,45 +180,53 @@
         disassembler_(disasm_info_ != nullptr
                       ? new HGraphVisualizerDisassembler(
                             codegen_.GetInstructionSet(),
-                            codegen_.GetAssembler().CodeBufferBaseAddress())
+                            codegen_.GetAssembler().CodeBufferBaseAddress(),
+                            codegen_.GetAssembler().CodeBufferBaseAddress()
+                                + codegen_.GetAssembler().CodeSize())
                       : nullptr),
         indent_(0) {}
 
+  void Flush() {
+    // We use "\n" instead of std::endl to avoid implicit flushing which
+    // generates too many syscalls during debug-GC tests (b/27826765).
+    output_ << std::flush;
+  }
+
   void StartTag(const char* name) {
     AddIndent();
-    output_ << "begin_" << name << std::endl;
+    output_ << "begin_" << name << "\n";
     indent_++;
   }
 
   void EndTag(const char* name) {
     indent_--;
     AddIndent();
-    output_ << "end_" << name << std::endl;
+    output_ << "end_" << name << "\n";
   }
 
   void PrintProperty(const char* name, const char* property) {
     AddIndent();
-    output_ << name << " \"" << property << "\"" << std::endl;
+    output_ << name << " \"" << property << "\"\n";
   }
 
   void PrintProperty(const char* name, const char* property, int id) {
     AddIndent();
-    output_ << name << " \"" << property << id << "\"" << std::endl;
+    output_ << name << " \"" << property << id << "\"\n";
   }
 
   void PrintEmptyProperty(const char* name) {
     AddIndent();
-    output_ << name << std::endl;
+    output_ << name << "\n";
   }
 
   void PrintTime(const char* name) {
     AddIndent();
-    output_ << name << " " << time(nullptr) << std::endl;
+    output_ << name << " " << time(nullptr) << "\n";
   }
 
   void PrintInt(const char* name, int value) {
     AddIndent();
-    output_ << name << " " << value << std::endl;
+    output_ << name << " " << value << "\n";
   }
 
   void AddIndent() {
@@ -246,24 +263,22 @@
     if (block->IsEntryBlock() && (disasm_info_ != nullptr)) {
       output_ << " \"" << kDisassemblyBlockFrameEntry << "\" ";
     }
-    output_<< std::endl;
+    output_<< "\n";
   }
 
   void PrintSuccessors(HBasicBlock* block) {
     AddIndent();
     output_ << "successors";
-    for (size_t i = 0; i < block->NumberOfNormalSuccessors(); ++i) {
-      HBasicBlock* successor = block->GetSuccessors()[i];
+    for (HBasicBlock* successor : block->GetNormalSuccessors()) {
       output_ << " \"B" << successor->GetBlockId() << "\" ";
     }
-    output_<< std::endl;
+    output_<< "\n";
   }
 
   void PrintExceptionHandlers(HBasicBlock* block) {
     AddIndent();
     output_ << "xhandlers";
-    for (size_t i = block->NumberOfNormalSuccessors(); i < block->GetSuccessors().size(); ++i) {
-      HBasicBlock* handler = block->GetSuccessors()[i];
+    for (HBasicBlock* handler : block->GetExceptionalSuccessors()) {
       output_ << " \"B" << handler->GetBlockId() << "\" ";
     }
     if (block->IsExitBlock() &&
@@ -271,7 +286,7 @@
         !disasm_info_->GetSlowPathIntervals().empty()) {
       output_ << " \"" << kDisassemblyBlockSlowPaths << "\" ";
     }
-    output_<< std::endl;
+    output_<< "\n";
   }
 
   void DumpLocation(std::ostream& stream, const Location& location) {
@@ -286,6 +301,12 @@
         stream << constant->AsIntConstant()->GetValue();
       } else if (constant->IsLongConstant()) {
         stream << constant->AsLongConstant()->GetValue();
+      } else if (constant->IsFloatConstant()) {
+        stream << constant->AsFloatConstant()->GetValue();
+      } else if (constant->IsDoubleConstant()) {
+        stream << constant->AsDoubleConstant()->GetValue();
+      } else if (constant->IsNullConstant()) {
+        stream << "null";
       }
     } else if (location.IsInvalid()) {
       stream << "invalid";
@@ -360,25 +381,62 @@
   }
 
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    StartAttributeStream("load_kind") << load_class->GetLoadKind();
+    const char* descriptor = load_class->GetDexFile().GetTypeDescriptor(
+        load_class->GetDexFile().GetTypeId(load_class->GetTypeIndex()));
+    StartAttributeStream("class_name") << PrettyDescriptor(descriptor);
     StartAttributeStream("gen_clinit_check") << std::boolalpha
         << load_class->MustGenerateClinitCheck() << std::noboolalpha;
     StartAttributeStream("needs_access_check") << std::boolalpha
         << load_class->NeedsAccessCheck() << std::noboolalpha;
   }
 
+  void VisitLoadString(HLoadString* load_string) OVERRIDE {
+    StartAttributeStream("load_kind") << load_string->GetLoadKind();
+  }
+
   void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
+    StartAttributeStream("check_kind") << check_cast->GetTypeCheckKind();
     StartAttributeStream("must_do_null_check") << std::boolalpha
         << check_cast->MustDoNullCheck() << std::noboolalpha;
   }
 
   void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE {
+    StartAttributeStream("check_kind") << instance_of->GetTypeCheckKind();
     StartAttributeStream("must_do_null_check") << std::boolalpha
         << instance_of->MustDoNullCheck() << std::noboolalpha;
   }
 
+  void VisitArrayLength(HArrayLength* array_length) OVERRIDE {
+    StartAttributeStream("is_string_length") << std::boolalpha
+        << array_length->IsStringLength() << std::noboolalpha;
+    if (array_length->IsEmittedAtUseSite()) {
+      StartAttributeStream("emitted_at_use") << "true";
+    }
+  }
+
+  void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE {
+    StartAttributeStream("is_string_char_at") << std::boolalpha
+        << bounds_check->IsStringCharAt() << std::noboolalpha;
+  }
+
+  void VisitArrayGet(HArrayGet* array_get) OVERRIDE {
+    StartAttributeStream("is_string_char_at") << std::boolalpha
+        << array_get->IsStringCharAt() << std::noboolalpha;
+  }
+
   void VisitArraySet(HArraySet* array_set) OVERRIDE {
     StartAttributeStream("value_can_be_null") << std::boolalpha
         << array_set->GetValueCanBeNull() << std::noboolalpha;
+    StartAttributeStream("needs_type_check") << std::boolalpha
+        << array_set->NeedsTypeCheck() << std::noboolalpha;
+  }
+
+  void VisitCompare(HCompare* compare) OVERRIDE {
+    ComparisonBias bias = compare->GetBias();
+    StartAttributeStream("bias") << (bias == ComparisonBias::kGtBias
+                                     ? "gt"
+                                     : (bias == ComparisonBias::kLtBias ? "lt" : "none"));
   }
 
   void VisitInvoke(HInvoke* invoke) OVERRIDE {
@@ -394,10 +452,30 @@
 
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
     VisitInvoke(invoke);
-    StartAttributeStream("recursive") << std::boolalpha
-                                      << invoke->IsRecursive()
-                                      << std::noboolalpha;
+    StartAttributeStream("method_load_kind") << invoke->GetMethodLoadKind();
     StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
+    if (invoke->IsStatic()) {
+      StartAttributeStream("clinit_check") << invoke->GetClinitCheckRequirement();
+    }
+  }
+
+  void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
+    VisitInvoke(invoke);
+    StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
+  }
+
+  void VisitInstanceFieldGet(HInstanceFieldGet* iget) OVERRIDE {
+    StartAttributeStream("field_name") << PrettyField(iget->GetFieldInfo().GetFieldIndex(),
+                                                      iget->GetFieldInfo().GetDexFile(),
+                                                      /* with type */ false);
+    StartAttributeStream("field_type") << iget->GetFieldType();
+  }
+
+  void VisitInstanceFieldSet(HInstanceFieldSet* iset) OVERRIDE {
+    StartAttributeStream("field_name") << PrettyField(iset->GetFieldInfo().GetFieldIndex(),
+                                                      iset->GetFieldInfo().GetDexFile(),
+                                                      /* with type */ false);
+    StartAttributeStream("field_type") << iset->GetFieldType();
   }
 
   void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE {
@@ -420,23 +498,38 @@
     StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
   }
 
+#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
+  void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetOpKind();
+  }
+
+  void VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetOpKind();
+  }
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+  void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind();
+    if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) {
+      StartAttributeStream("shift") << instruction->GetShiftAmount();
+    }
+  }
+#endif
+
   bool IsPass(const char* name) {
     return strcmp(pass_name_, name) == 0;
   }
 
-  bool IsReferenceTypePropagationPass() {
-    return strstr(pass_name_, ReferenceTypePropagation::kReferenceTypePropagationPassName)
-        != nullptr;
-  }
-
   void PrintInstruction(HInstruction* instruction) {
     output_ << instruction->DebugName();
-    if (instruction->InputCount() > 0) {
-      StringList inputs;
-      for (HInputIterator it(instruction); !it.Done(); it.Advance()) {
-        inputs.NewEntryStream() << GetTypeId(it.Current()->GetType()) << it.Current()->GetId();
+    HConstInputsRef inputs = instruction->GetInputs();
+    if (!inputs.empty()) {
+      StringList input_list;
+      for (const HInstruction* input : inputs) {
+        input_list.NewEntryStream() << GetTypeId(input->GetType()) << input->GetId();
       }
-      StartAttributeStream() << inputs;
+      StartAttributeStream() << input_list;
     }
     instruction->Accept(this);
     if (instruction->HasEnvironment()) {
@@ -472,27 +565,39 @@
         StartAttributeStream("is_low") << interval->IsLowInterval();
         StartAttributeStream("is_high") << interval->IsHighInterval();
       }
-    } else if (IsPass(RegisterAllocator::kRegisterAllocatorPassName) && is_after_pass_) {
+    }
+
+    if (IsPass(RegisterAllocator::kRegisterAllocatorPassName) && is_after_pass_) {
       StartAttributeStream("liveness") << instruction->GetLifetimePosition();
       LocationSummary* locations = instruction->GetLocations();
       if (locations != nullptr) {
-        StringList inputs;
-        for (size_t i = 0; i < instruction->InputCount(); ++i) {
-          DumpLocation(inputs.NewEntryStream(), locations->InAt(i));
+        StringList input_list;
+        for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
+          DumpLocation(input_list.NewEntryStream(), locations->InAt(i));
         }
         std::ostream& attr = StartAttributeStream("locations");
-        attr << inputs << "->";
+        attr << input_list << "->";
         DumpLocation(attr, locations->Out());
       }
-    } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName)
-               || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName)) {
-      HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
-      if (info == nullptr) {
-        StartAttributeStream("loop") << "none";
+    }
+
+    HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
+    if (loop_info == nullptr) {
+      StartAttributeStream("loop") << "none";
+    } else {
+      StartAttributeStream("loop") << "B" << loop_info->GetHeader()->GetBlockId();
+      HLoopInformation* outer = loop_info->GetPreHeader()->GetLoopInformation();
+      if (outer != nullptr) {
+        StartAttributeStream("outer_loop") << "B" << outer->GetHeader()->GetBlockId();
       } else {
-        StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId();
+        StartAttributeStream("outer_loop") << "none";
       }
-    } else if (IsReferenceTypePropagationPass()
+      StartAttributeStream("irreducible")
+          << std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha;
+    }
+
+    if ((IsPass(HGraphBuilder::kBuilderPassName)
+        || IsPass(HInliner::kInlinerPassName))
         && (instruction->GetType() == Primitive::kPrimNot)) {
       ReferenceTypeInfo info = instruction->IsLoadClass()
         ? instruction->AsLoadClass()->GetLoadedClassRTI()
@@ -506,8 +611,14 @@
       } else if (instruction->IsLoadClass()) {
         StartAttributeStream("klass") << "unresolved";
       } else {
-        DCHECK(!is_after_pass_)
-            << "Expected a valid rti after reference type propagation";
+        // The NullConstant may be added to the graph during other passes that happen between
+        // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner
+        // doesn't run or doesn't inline anything, the NullConstant remains untyped.
+        // So we should check NullConstants for validity only after reference type propagation.
+        DCHECK(graph_in_bad_state_ ||
+               (!is_after_pass_ && IsPass(HGraphBuilder::kBuilderPassName)))
+            << instruction->DebugName() << instruction->GetId() << " has invalid rti "
+            << (is_after_pass_ ? "after" : "before") << " pass " << pass_name_;
       }
     }
     if (disasm_info_ != nullptr) {
@@ -517,7 +628,7 @@
       auto it = disasm_info_->GetInstructionIntervals().find(instruction);
       if (it != disasm_info_->GetInstructionIntervals().end()
           && it->second.start != it->second.end) {
-        output_ << std::endl;
+        output_ << "\n";
         disassembler_->Disassemble(output_, it->second.start, it->second.end);
       }
     }
@@ -527,17 +638,12 @@
     for (HInstructionIterator it(list); !it.Done(); it.Advance()) {
       HInstruction* instruction = it.Current();
       int bci = 0;
-      size_t num_uses = 0;
-      for (HUseIterator<HInstruction*> use_it(instruction->GetUses());
-           !use_it.Done();
-           use_it.Advance()) {
-        ++num_uses;
-      }
+      size_t num_uses = instruction->GetUses().SizeSlow();
       AddIndent();
       output_ << bci << " " << num_uses << " "
               << GetTypeId(instruction->GetType()) << instruction->GetId() << " ";
       PrintInstruction(instruction);
-      output_ << " " << kEndInstructionMarker << std::endl;
+      output_ << " " << kEndInstructionMarker << "\n";
     }
   }
 
@@ -581,10 +687,10 @@
     output_ << "    0 0 disasm " << kDisassemblyBlockFrameEntry << " ";
     GeneratedCodeInterval frame_entry = disasm_info_->GetFrameEntryInterval();
     if (frame_entry.start != frame_entry.end) {
-      output_ << std::endl;
+      output_ << "\n";
       disassembler_->Disassemble(output_, frame_entry.start, frame_entry.end);
     }
-    output_ << kEndInstructionMarker << std::endl;
+    output_ << kEndInstructionMarker << "\n";
     DumpEndOfDisassemblyBlock();
   }
 
@@ -600,9 +706,9 @@
         GetGraph()->HasExitBlock() ? GetGraph()->GetExitBlock()->GetBlockId() : -1,
         -1);
     for (SlowPathCodeInfo info : disasm_info_->GetSlowPathIntervals()) {
-      output_ << "    0 0 disasm " << info.slow_path->GetDescription() << std::endl;
+      output_ << "    0 0 disasm " << info.slow_path->GetDescription() << "\n";
       disassembler_->Disassemble(output_, info.code_interval.start, info.code_interval.end);
-      output_ << kEndInstructionMarker << std::endl;
+      output_ << kEndInstructionMarker << "\n";
     }
     DumpEndOfDisassemblyBlock();
   }
@@ -623,6 +729,7 @@
       DumpDisassemblyBlockForSlowPaths();
     }
     EndTag("cfg");
+    Flush();
   }
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
@@ -659,10 +766,10 @@
       HInstruction* instruction = it.Current();
       output_ << instruction->GetId() << " " << GetTypeId(instruction->GetType())
               << instruction->GetId() << "[ ";
-      for (HInputIterator inputs(instruction); !inputs.Done(); inputs.Advance()) {
-        output_ << inputs.Current()->GetId() << " ";
+      for (const HInstruction* input : instruction->GetInputs()) {
+        output_ << input->GetId() << " ";
       }
-      output_ << "]" << std::endl;
+      output_ << "]\n";
     }
     EndTag("locals");
     EndTag("states");
@@ -704,6 +811,7 @@
   printer.PrintProperty("method", method_name);
   printer.PrintTime("date");
   printer.EndTag("compilation");
+  printer.Flush();
 }
 
 void HGraphVisualizer::DumpGraph(const char* pass_name,
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index c36de84..1e86b75 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -40,8 +40,8 @@
       : allocator_(allocator),
         num_buckets_(kMinimumNumberOfBuckets),
         buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
-        buckets_owned_(allocator, num_buckets_, false),
-        num_entries_(0) {
+        buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
+        num_entries_(0u) {
     // ArenaAllocator returns zeroed memory, so no need to set buckets to null.
     DCHECK(IsPowerOfTwo(num_buckets_));
     buckets_owned_.SetInitialBits(num_buckets_);
@@ -49,29 +49,35 @@
 
   // Copy constructor. Depending on the load factor, it will either make a deep
   // copy (all buckets owned) or a shallow one (buckets pointing to the parent).
-  ValueSet(ArenaAllocator* allocator, const ValueSet& to_copy)
+  ValueSet(ArenaAllocator* allocator, const ValueSet& other)
       : allocator_(allocator),
-        num_buckets_(to_copy.IdealBucketCount()),
+        num_buckets_(other.IdealBucketCount()),
         buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
-        buckets_owned_(allocator, num_buckets_, false),
-        num_entries_(to_copy.num_entries_) {
+        buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
+        num_entries_(0u) {
     // ArenaAllocator returns zeroed memory, so entries of buckets_ and
     // buckets_owned_ are initialized to null and false, respectively.
     DCHECK(IsPowerOfTwo(num_buckets_));
-    if (num_buckets_ == to_copy.num_buckets_) {
-      // Hash table remains the same size. We copy the bucket pointers and leave
-      // all buckets_owned_ bits false.
-      memcpy(buckets_, to_copy.buckets_, num_buckets_ * sizeof(Node*));
+    PopulateFromInternal(other, /* is_dirty */ false);
+  }
+
+  // Erases all values in this set and populates it with values from `other`.
+  void PopulateFrom(const ValueSet& other) {
+    if (this == &other) {
+      return;
+    }
+    PopulateFromInternal(other, /* is_dirty */ true);
+  }
+
+  // Returns true if `this` has enough buckets so that if `other` is copied into
+  // it, the load factor will not cross the upper threshold.
+  // If `exact_match` is set, true is returned only if `this` has the ideal
+  // number of buckets. Larger number of buckets is allowed otherwise.
+  bool CanHoldCopyOf(const ValueSet& other, bool exact_match) {
+    if (exact_match) {
+      return other.IdealBucketCount() == num_buckets_;
     } else {
-      // Hash table size changes. We copy and rehash all entries, and set all
-      // buckets_owned_ bits to true.
-      for (size_t i = 0; i < to_copy.num_buckets_; ++i) {
-        for (Node* node = to_copy.buckets_[i]; node != nullptr; node = node->GetNext()) {
-          size_t new_index = BucketIndex(node->GetHashCode());
-          buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]);
-        }
-      }
-      buckets_owned_.SetInitialBits(num_buckets_);
+      return other.IdealBucketCount() <= num_buckets_;
     }
   }
 
@@ -125,6 +131,14 @@
     });
   }
 
+  void Clear() {
+    num_entries_ = 0;
+    for (size_t i = 0; i < num_buckets_; ++i) {
+      buckets_[i] = nullptr;
+    }
+    buckets_owned_.SetInitialBits(num_buckets_);
+  }
+
   // Updates this set by intersecting with instructions in a predecessor's set.
   void IntersectWith(ValueSet* predecessor) {
     if (IsEmpty()) {
@@ -144,6 +158,46 @@
   size_t GetNumberOfEntries() const { return num_entries_; }
 
  private:
+  // Copies all entries from `other` to `this`.
+  // If `is_dirty` is set to true, existing data will be wiped first. It is
+  // assumed that `buckets_` and `buckets_owned_` are zero-allocated otherwise.
+  void PopulateFromInternal(const ValueSet& other, bool is_dirty) {
+    DCHECK_NE(this, &other);
+    DCHECK_GE(num_buckets_, other.IdealBucketCount());
+
+    if (num_buckets_ == other.num_buckets_) {
+      // Hash table remains the same size. We copy the bucket pointers and leave
+      // all buckets_owned_ bits false.
+      if (is_dirty) {
+        buckets_owned_.ClearAllBits();
+      } else {
+        DCHECK_EQ(buckets_owned_.NumSetBits(), 0u);
+      }
+      memcpy(buckets_, other.buckets_, num_buckets_ * sizeof(Node*));
+    } else {
+      // Hash table size changes. We copy and rehash all entries, and set all
+      // buckets_owned_ bits to true.
+      if (is_dirty) {
+        memset(buckets_, 0, num_buckets_ * sizeof(Node*));
+      } else {
+        if (kIsDebugBuild) {
+          for (size_t i = 0; i < num_buckets_; ++i) {
+            DCHECK(buckets_[i] == nullptr) << i;
+          }
+        }
+      }
+      for (size_t i = 0; i < other.num_buckets_; ++i) {
+        for (Node* node = other.buckets_[i]; node != nullptr; node = node->GetNext()) {
+          size_t new_index = BucketIndex(node->GetHashCode());
+          buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]);
+        }
+      }
+      buckets_owned_.SetInitialBits(num_buckets_);
+    }
+
+    num_entries_ = other.num_entries_;
+  }
+
   class Node : public ArenaObject<kArenaAllocGvn> {
    public:
     Node(HInstruction* instruction, size_t hash_code, Node* next)
@@ -191,14 +245,6 @@
     return clone_iterator;
   }
 
-  void Clear() {
-    num_entries_ = 0;
-    for (size_t i = 0; i < num_buckets_; ++i) {
-      buckets_[i] = nullptr;
-    }
-    buckets_owned_.SetInitialBits(num_buckets_);
-  }
-
   // Iterates over buckets with impure instructions (even indices) and deletes
   // the ones on which 'cond' returns true.
   template<typename Functor>
@@ -261,11 +307,14 @@
     }
   }
 
-  // Generates a hash code for an instruction. Pure instructions are put into
-  // odd buckets to speed up deletion.
+  // Generates a hash code for an instruction.
   size_t HashCode(HInstruction* instruction) const {
     size_t hash_code = instruction->ComputeHashCode();
-    if (instruction->GetSideEffects().HasDependencies()) {
+    // Pure instructions are put into odd buckets to speed up deletion. Note that in the
+    // case of irreducible loops, we don't put pure instructions in odd buckets, as we
+    // need to delete them when entering the loop.
+    if (instruction->GetSideEffects().HasDependencies() ||
+        instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) {
       return (hash_code << 1) | 0;
     } else {
       return (hash_code << 1) | 1;
@@ -307,7 +356,9 @@
       : graph_(graph),
         allocator_(allocator),
         side_effects_(side_effects),
-        sets_(graph->GetBlocks().size(), nullptr, allocator->Adapter(kArenaAllocGvn)) {}
+        sets_(graph->GetBlocks().size(), nullptr, allocator->Adapter(kArenaAllocGvn)),
+        visited_blocks_(
+            allocator, graph->GetBlocks().size(), /* expandable */ false, kArenaAllocGvn) {}
 
   void Run();
 
@@ -320,11 +371,37 @@
   ArenaAllocator* const allocator_;
   const SideEffectsAnalysis& side_effects_;
 
+  ValueSet* FindSetFor(HBasicBlock* block) const {
+    ValueSet* result = sets_[block->GetBlockId()];
+    DCHECK(result != nullptr) << "Could not find set for block B" << block->GetBlockId();
+    return result;
+  }
+
+  void AbandonSetFor(HBasicBlock* block) {
+    DCHECK(sets_[block->GetBlockId()] != nullptr)
+        << "Block B" << block->GetBlockId() << " expected to have a set";
+    sets_[block->GetBlockId()] = nullptr;
+  }
+
+  // Returns false if the GlobalValueNumberer has already visited all blocks
+  // which may reference `block`.
+  bool WillBeReferencedAgain(HBasicBlock* block) const;
+
+  // Iterates over visited blocks and finds one which has a ValueSet such that:
+  // (a) it will not be referenced in the future, and
+  // (b) it can hold a copy of `reference_set` with a reasonable load factor.
+  HBasicBlock* FindVisitedBlockWithRecyclableSet(HBasicBlock* block,
+                                                 const ValueSet& reference_set) const;
+
   // ValueSet for blocks. Initially null, but for an individual block they
   // are allocated and populated by the dominator, and updated by all blocks
   // in the path from the dominator to the block.
   ArenaVector<ValueSet*> sets_;
 
+  // BitVector which serves as a fast-access map from block id to
+  // visited/unvisited boolean.
+  ArenaBitVector visited_blocks_;
+
   DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
 };
 
@@ -341,6 +418,7 @@
 
 void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
   ValueSet* set = nullptr;
+
   const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
   if (predecessors.size() == 0 || predecessors[0]->IsEntryBlock()) {
     // The entry block should only accumulate constant instructions, and
@@ -349,22 +427,49 @@
     set = new (allocator_) ValueSet(allocator_);
   } else {
     HBasicBlock* dominator = block->GetDominator();
-    ValueSet* dominator_set = sets_[dominator->GetBlockId()];
+    ValueSet* dominator_set = FindSetFor(dominator);
+
     if (dominator->GetSuccessors().size() == 1) {
-      DCHECK_EQ(dominator->GetSuccessors()[0], block);
+      // `block` is a direct successor of its dominator. No need to clone the
+      // dominator's set, `block` can take over its ownership including its buckets.
+      DCHECK_EQ(dominator->GetSingleSuccessor(), block);
+      AbandonSetFor(dominator);
       set = dominator_set;
     } else {
-      // We have to copy if the dominator has other successors, or `block` is not a successor
-      // of the dominator.
-      set = new (allocator_) ValueSet(allocator_, *dominator_set);
+      // Try to find a basic block which will never be referenced again and whose
+      // ValueSet can therefore be recycled. We will need to copy `dominator_set`
+      // into the recycled set, so we pass `dominator_set` as a reference for size.
+      HBasicBlock* recyclable = FindVisitedBlockWithRecyclableSet(block, *dominator_set);
+      if (recyclable == nullptr) {
+        // No block with a suitable ValueSet found. Allocate a new one and
+        // copy `dominator_set` into it.
+        set = new (allocator_) ValueSet(allocator_, *dominator_set);
+      } else {
+        // Block with a recyclable ValueSet found. Clone `dominator_set` into it.
+        set = FindSetFor(recyclable);
+        AbandonSetFor(recyclable);
+        set->PopulateFrom(*dominator_set);
+      }
     }
+
     if (!set->IsEmpty()) {
       if (block->IsLoopHeader()) {
-        DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
-        set->Kill(side_effects_.GetLoopEffects(block));
+        if (block->GetLoopInformation()->ContainsIrreducibleLoop()) {
+          // To satisfy our linear scan algorithm, no instruction should flow in an irreducible
+          // loop header. We clear the set at entry of irreducible loops and any loop containing
+          // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction
+          // across the irreducible loop.
+          // Note that, if we're not compiling OSR, we could still do GVN and introduce
+          // phis at irreducible loop headers. We decided it was not worth the complexity.
+          set->Clear();
+        } else {
+          DCHECK(!block->GetLoopInformation()->IsIrreducible());
+          DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
+          set->Kill(side_effects_.GetLoopEffects(block));
+        }
       } else if (predecessors.size() > 1) {
         for (HBasicBlock* predecessor : predecessors) {
-          set->IntersectWith(sets_[predecessor->GetBlockId()]);
+          set->IntersectWith(FindSetFor(predecessor));
           if (set->IsEmpty()) {
             break;
           }
@@ -377,9 +482,10 @@
 
   HInstruction* current = block->GetFirstInstruction();
   while (current != nullptr) {
-    set->Kill(current->GetSideEffects());
     // Save the next instruction in case `current` is removed from the graph.
     HInstruction* next = current->GetNext();
+    // Do not kill the set with the side effects of the instruction just now: if
+    // the instruction is GVN'ed, we don't need to kill.
     if (current->CanBeMoved()) {
       if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) {
         // For commutative ops, (x op y) will be treated the same as (y op x)
@@ -395,11 +501,68 @@
         current->ReplaceWith(existing);
         current->GetBlock()->RemoveInstruction(current);
       } else {
+        set->Kill(current->GetSideEffects());
         set->Add(current);
       }
+    } else {
+      set->Kill(current->GetSideEffects());
     }
     current = next;
   }
+
+  visited_blocks_.SetBit(block->GetBlockId());
+}
+
+bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const {
+  DCHECK(visited_blocks_.IsBitSet(block->GetBlockId()));
+
+  for (auto dominated_block : block->GetDominatedBlocks()) {
+    if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) {
+      return true;
+    }
+  }
+
+  for (auto successor : block->GetSuccessors()) {
+    if (!visited_blocks_.IsBitSet(successor->GetBlockId())) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet(
+    HBasicBlock* block, const ValueSet& reference_set) const {
+  HBasicBlock* secondary_match = nullptr;
+
+  for (size_t block_id : visited_blocks_.Indexes()) {
+    ValueSet* current_set = sets_[block_id];
+    if (current_set == nullptr) {
+      // Set was already recycled.
+      continue;
+    }
+
+    HBasicBlock* current_block = block->GetGraph()->GetBlocks()[block_id];
+
+    // We test if `current_set` has enough buckets to store a copy of
+    // `reference_set` with a reasonable load factor. If we find a set whose
+    // number of buckets matches perfectly, we return right away. If we find one
+    // that is larger, we return it if no perfectly-matching set is found.
+    // Note that we defer testing WillBeReferencedAgain until all other criteria
+    // have been satisfied because it might be expensive.
+    if (current_set->CanHoldCopyOf(reference_set, /* exact_match */ true)) {
+      if (!WillBeReferencedAgain(current_block)) {
+        return current_block;
+      }
+    } else if (secondary_match == nullptr &&
+               current_set->CanHoldCopyOf(reference_set, /* exact_match */ false)) {
+      if (!WillBeReferencedAgain(current_block)) {
+        secondary_match = current_block;
+      }
+    }
+  }
+
+  return secondary_match;
 }
 
 void GVNOptimization::Run() {
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index de60cf2..6abf00e 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -21,14 +21,14 @@
 #include "optimizing_unit_test.h"
 #include "side_effects_analysis.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
-TEST(GVNTest, LocalFieldElimination) {
+class GVNTest : public CommonCompilerTest {};
+
+TEST_F(GVNTest, LocalFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
 
   HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -100,7 +100,7 @@
   ASSERT_EQ(different_offset->GetBlock(), block);
   ASSERT_EQ(use_after_kill->GetBlock(), block);
 
-  graph->TryBuildingSsa();
+  graph->BuildDominatorTree();
   SideEffectsAnalysis side_effects(graph);
   side_effects.Run();
   GVNOptimization(graph, side_effects).Run();
@@ -110,10 +110,10 @@
   ASSERT_EQ(use_after_kill->GetBlock(), block);
 }
 
-TEST(GVNTest, GlobalFieldElimination) {
+TEST_F(GVNTest, GlobalFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
 
   HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -182,7 +182,7 @@
                                                           0));
   join->AddInstruction(new (&allocator) HExit());
 
-  graph->TryBuildingSsa();
+  graph->BuildDominatorTree();
   SideEffectsAnalysis side_effects(graph);
   side_effects.Run();
   GVNOptimization(graph, side_effects).Run();
@@ -193,10 +193,10 @@
   ASSERT_TRUE(join->GetFirstInstruction()->IsExit());
 }
 
-TEST(GVNTest, LoopFieldElimination) {
+TEST_F(GVNTest, LoopFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
 
   HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -288,7 +288,7 @@
   ASSERT_EQ(field_get_in_loop_body->GetBlock(), loop_body);
   ASSERT_EQ(field_get_in_exit->GetBlock(), exit);
 
-  graph->TryBuildingSsa();
+  graph->BuildDominatorTree();
   {
     SideEffectsAnalysis side_effects(graph);
     side_effects.Run();
@@ -316,10 +316,10 @@
 }
 
 // Test that inner loops affect the side effects of the outer loop.
-TEST(GVNTest, LoopSideEffects) {
+TEST_F(GVNTest, LoopSideEffects) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
 
   static const SideEffects kCanTriggerGC = SideEffects::CanTriggerGC();
 
@@ -357,14 +357,16 @@
                                                              Primitive::kPrimBoolean);
   entry->AddInstruction(parameter);
   entry->AddInstruction(new (&allocator) HGoto());
+  outer_loop_header->AddInstruction(new (&allocator) HSuspendCheck());
   outer_loop_header->AddInstruction(new (&allocator) HIf(parameter));
   outer_loop_body->AddInstruction(new (&allocator) HGoto());
+  inner_loop_header->AddInstruction(new (&allocator) HSuspendCheck());
   inner_loop_header->AddInstruction(new (&allocator) HIf(parameter));
   inner_loop_body->AddInstruction(new (&allocator) HGoto());
   inner_loop_exit->AddInstruction(new (&allocator) HGoto());
   outer_loop_exit->AddInstruction(new (&allocator) HExit());
 
-  graph->TryBuildingSsa();
+  graph->BuildDominatorTree();
 
   ASSERT_TRUE(inner_loop_header->GetLoopInformation()->IsIn(
       *outer_loop_header->GetLoopInformation()));
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index 8968a44..129c2a9 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -20,19 +20,6 @@
 namespace art {
 
 /**
- * Returns true if instruction is invariant within the given loop.
- */
-static bool IsLoopInvariant(HLoopInformation* loop, HInstruction* instruction) {
-  HLoopInformation* other_loop = instruction->GetBlock()->GetLoopInformation();
-  if (other_loop != loop) {
-    // If instruction does not occur in same loop, it is invariant
-    // if it appears in an outer loop (including no loop at all).
-    return other_loop == nullptr || loop->IsIn(*other_loop);
-  }
-  return false;
-}
-
-/**
  * Since graph traversal may enter a SCC at any position, an initial representation may be rotated,
  * along dependences, viz. any of (a, b, c, d), (d, a, b, c)  (c, d, a, b), (b, c, d, a) assuming
  * a chain of dependences (mutual independent items may occur in arbitrary order). For proper
@@ -66,6 +53,32 @@
   }
 }
 
+/**
+ * Returns true if the from/to types denote a narrowing, integral conversion (precision loss).
+ */
+static bool IsNarrowingIntegralConversion(Primitive::Type from, Primitive::Type to) {
+  switch (from) {
+    case Primitive::kPrimLong:
+      return to == Primitive::kPrimByte || to == Primitive::kPrimShort
+          || to == Primitive::kPrimChar || to == Primitive::kPrimInt;
+    case Primitive::kPrimInt:
+      return to == Primitive::kPrimByte || to == Primitive::kPrimShort
+          || to == Primitive::kPrimChar;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      return to == Primitive::kPrimByte;
+    default:
+      return false;
+  }
+}
+
+/**
+ * Returns narrowest data type.
+ */
+static Primitive::Type Narrowest(Primitive::Type type1, Primitive::Type type2) {
+  return Primitive::ComponentSize(type1) <= Primitive::ComponentSize(type2) ? type1 : type2;
+}
+
 //
 // Class methods.
 //
@@ -84,12 +97,14 @@
 }
 
 void HInductionVarAnalysis::Run() {
-  // Detects sequence variables (generalized induction variables) during an inner-loop-first
-  // traversal of all loops using Gerlek's algorithm. The order is only relevant if outer
-  // loops would use induction information of inner loops (not currently done).
-  for (HPostOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) {
+  // Detects sequence variables (generalized induction variables) during an outer to inner
+  // traversal of all loops using Gerlek's algorithm. The order is important to enable
+  // range analysis on outer loop while visiting inner loops.
+  for (HReversePostOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) {
     HBasicBlock* graph_block = it_graph.Current();
-    if (graph_block->IsLoopHeader()) {
+    // Don't analyze irreducible loops.
+    // TODO(ajcbik): could/should we remove this restriction?
+    if (graph_block->IsLoopHeader() && !graph_block->GetLoopInformation()->IsIrreducible()) {
       VisitLoop(graph_block->GetLoopInformation());
     }
   }
@@ -126,7 +141,7 @@
   DCHECK(stack_.empty());
   map_.clear();
 
-  // Determine the loop's trip count.
+  // Determine the loop's trip-count.
   VisitControl(loop);
 }
 
@@ -137,8 +152,8 @@
 
   // Visit all descendants.
   uint32_t low = d1;
-  for (size_t i = 0, count = instruction->InputCount(); i < count; ++i) {
-    low = std::min(low, VisitDescendant(loop, instruction->InputAt(i)));
+  for (HInstruction* input : instruction->GetInputs()) {
+    low = std::min(low, VisitDescendant(loop, input));
   }
 
   // Lower or found SCC?
@@ -159,6 +174,9 @@
       }
     }
 
+    // Type of induction.
+    type_ = scc_[0]->GetType();
+
     // Classify the SCC.
     if (scc_.size() == 1 && !scc_[0]->IsLoopHeaderPhi()) {
       ClassifyTrivial(loop, scc_[0]);
@@ -208,14 +226,13 @@
                        instruction->InputAt(0)->GetType());
   } else if (instruction->IsNeg()) {
     info = TransferNeg(LookupInfo(loop, instruction->InputAt(0)));
+  } else if (instruction->IsTypeConversion()) {
+    info = TransferCnv(LookupInfo(loop, instruction->InputAt(0)),
+                       instruction->AsTypeConversion()->GetInputType(),
+                       instruction->AsTypeConversion()->GetResultType());
+
   } else if (instruction->IsBoundsCheck()) {
     info = LookupInfo(loop, instruction->InputAt(0));  // Pass-through.
-  } else if (instruction->IsTypeConversion()) {
-    HTypeConversion* conversion = instruction->AsTypeConversion();
-    // TODO: accept different conversion scenarios.
-    if (conversion->GetResultType() == conversion->GetInputType()) {
-      info = LookupInfo(loop, conversion->GetInput());
-    }
   }
 
   // Successfully classified?
@@ -250,7 +267,7 @@
   if (size == 1) {
     InductionInfo* update = TransferPhi(loop, phi, /* input_index */ 1);
     if (update != nullptr) {
-      AssignInfo(loop, phi, CreateInduction(kWrapAround, initial, update));
+      AssignInfo(loop, phi, CreateInduction(kWrapAround, initial, update, type_));
     }
     return;
   }
@@ -268,6 +285,8 @@
     } else if (instruction->IsSub()) {
       update = SolveAddSub(
           loop, phi, instruction, instruction->InputAt(0), instruction->InputAt(1), kSub, true);
+    } else if (instruction->IsTypeConversion()) {
+      update = SolveCnv(instruction->AsTypeConversion());
     }
     if (update == nullptr) {
       return;
@@ -282,7 +301,7 @@
       case kInvariant:
         // Classify first phi and then the rest of the cycle "on-demand".
         // Statements are scanned in order.
-        AssignInfo(loop, phi, CreateInduction(kLinear, induction, initial));
+        AssignInfo(loop, phi, CreateInduction(kLinear, induction, initial, type_));
         for (size_t i = 1; i < size; i++) {
           ClassifyTrivial(loop, scc_[i]);
         }
@@ -312,20 +331,21 @@
   //   (b, c, d, e, a)
   // in preparation of assigning this to the previous variable in the sequence.
   if (induction->induction_class == kInvariant) {
-    return CreateInduction(kPeriodic, induction, last);
+    return CreateInduction(kPeriodic, induction, last, type_);
   }
-  return CreateInduction(kPeriodic, induction->op_a, RotatePeriodicInduction(induction->op_b, last));
+  return CreateInduction(
+      kPeriodic, induction->op_a, RotatePeriodicInduction(induction->op_b, last), type_);
 }
 
 HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferPhi(HLoopInformation* loop,
                                                                          HInstruction* phi,
                                                                          size_t input_index) {
   // Match all phi inputs from input_index onwards exactly.
-  const size_t count = phi->InputCount();
-  DCHECK_LT(input_index, count);
-  InductionInfo* a = LookupInfo(loop, phi->InputAt(input_index));
-  for (size_t i = input_index + 1; i < count; i++) {
-    InductionInfo* b = LookupInfo(loop, phi->InputAt(i));
+  HInputsRef inputs = phi->GetInputs();
+  DCHECK_LT(input_index, inputs.size());
+  InductionInfo* a = LookupInfo(loop, inputs[input_index]);
+  for (size_t i = input_index + 1; i < inputs.size(); i++) {
+    InductionInfo* b = LookupInfo(loop, inputs[i]);
     if (!InductionEqual(a, b)) {
       return nullptr;
     }
@@ -343,8 +363,10 @@
     if (a->induction_class == kInvariant && b->induction_class == kInvariant) {
       return CreateInvariantOp(op, a, b);
     } else if (a->induction_class == kLinear && b->induction_class == kLinear) {
-      return CreateInduction(
-          kLinear, TransferAddSub(a->op_a, b->op_a, op), TransferAddSub(a->op_b, b->op_b, op));
+      return CreateInduction(kLinear,
+                             TransferAddSub(a->op_a, b->op_a, op),
+                             TransferAddSub(a->op_b, b->op_b, op),
+                             type_);
     } else if (a->induction_class == kInvariant) {
       InductionInfo* new_a = b->op_a;
       InductionInfo* new_b = TransferAddSub(a, b->op_b, op);
@@ -354,7 +376,7 @@
       } else if (op == kSub) {  // Negation required.
         new_a = TransferNeg(new_a);
       }
-      return CreateInduction(b->induction_class, new_a, new_b);
+      return CreateInduction(b->induction_class, new_a, new_b, type_);
     } else if (b->induction_class == kInvariant) {
       InductionInfo* new_a = a->op_a;
       InductionInfo* new_b = TransferAddSub(a->op_b, b, op);
@@ -362,7 +384,7 @@
         DCHECK(a->induction_class == kWrapAround || a->induction_class == kPeriodic);
         new_a = TransferAddSub(new_a, b, op);
       }
-      return CreateInduction(a->induction_class, new_a, new_b);
+      return CreateInduction(a->induction_class, new_a, new_b, type_);
     }
   }
   return nullptr;
@@ -377,9 +399,15 @@
     if (a->induction_class == kInvariant && b->induction_class == kInvariant) {
       return CreateInvariantOp(kMul, a, b);
     } else if (a->induction_class == kInvariant) {
-      return CreateInduction(b->induction_class, TransferMul(a, b->op_a), TransferMul(a, b->op_b));
+      return CreateInduction(b->induction_class,
+                             TransferMul(a, b->op_a),
+                             TransferMul(a, b->op_b),
+                             type_);
     } else if (b->induction_class == kInvariant) {
-      return CreateInduction(a->induction_class, TransferMul(a->op_a, b), TransferMul(a->op_b, b));
+      return CreateInduction(a->induction_class,
+                             TransferMul(a->op_a, b),
+                             TransferMul(a->op_b, b),
+                             type_);
     }
   }
   return nullptr;
@@ -390,7 +418,7 @@
                                                                          Primitive::Type type) {
   // Transfer over a shift left: treat shift by restricted constant as equivalent multiplication.
   int64_t value = -1;
-  if (a != nullptr && IsIntAndGet(b, &value)) {
+  if (a != nullptr && IsExact(b, &value)) {
     // Obtain the constant needed for the multiplication. This yields an existing instruction
     // if the constants is already there. Otherwise, this has a side effect on the HIR.
     // The restriction on the shift factor avoids generating a negative constant
@@ -411,7 +439,24 @@
     if (a->induction_class == kInvariant) {
       return CreateInvariantOp(kNeg, nullptr, a);
     }
-    return CreateInduction(a->induction_class, TransferNeg(a->op_a), TransferNeg(a->op_b));
+    return CreateInduction(a->induction_class, TransferNeg(a->op_a), TransferNeg(a->op_b), type_);
+  }
+  return nullptr;
+}
+
+HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferCnv(InductionInfo* a,
+                                                                         Primitive::Type from,
+                                                                         Primitive::Type to) {
+  if (a != nullptr) {
+    // Allow narrowing conversion in certain cases.
+    if (IsNarrowingIntegralConversion(from, to)) {
+      if (a->induction_class == kLinear) {
+        if (a->type == to || (a->type == from && IsNarrowingIntegralConversion(from, to))) {
+          return CreateInduction(kLinear, a->op_a, a->op_b, to);
+        }
+      }
+      // TODO: other cases useful too?
+    }
   }
   return nullptr;
 }
@@ -419,12 +464,12 @@
 HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolvePhi(HInstruction* phi,
                                                                       size_t input_index) {
   // Match all phi inputs from input_index onwards exactly.
-  const size_t count = phi->InputCount();
-  DCHECK_LT(input_index, count);
-  auto ita = cycle_.find(phi->InputAt(input_index));
+  HInputsRef inputs = phi->GetInputs();
+  DCHECK_LT(input_index, inputs.size());
+  auto ita = cycle_.find(inputs[input_index]);
   if (ita != cycle_.end()) {
-    for (size_t i = input_index + 1; i < count; i++) {
-      auto itb = cycle_.find(phi->InputAt(i));
+    for (size_t i = input_index + 1; i < inputs.size(); i++) {
+      auto itb = cycle_.find(inputs[i]);
       if (itb == cycle_.end() ||
           !HInductionVarAnalysis::InductionEqual(ita->second, itb->second)) {
         return nullptr;
@@ -453,11 +498,11 @@
     if (a != nullptr && a->induction_class == kInvariant) {
       if (phi->InputAt(1) == entry_phi) {
         InductionInfo* initial = LookupInfo(loop, entry_phi->InputAt(0));
-        return CreateInduction(kPeriodic, a, initial);
+        return CreateInduction(kPeriodic, a, initial, type_);
       }
       InductionInfo* b = SolvePhi(phi, /* input_index */ 1);
       if (b != nullptr && b->induction_class == kPeriodic) {
-        return CreateInduction(kPeriodic, a, b);
+        return CreateInduction(kPeriodic, a, b, type_);
       }
     }
   }
@@ -500,7 +545,7 @@
       InductionInfo* a = LookupInfo(loop, x);
       if (a != nullptr && a->induction_class == kInvariant) {
         InductionInfo* initial = LookupInfo(loop, entry_phi->InputAt(0));
-        return CreateInduction(kPeriodic, CreateInvariantOp(kSub, a, initial), initial);
+        return CreateInduction(kPeriodic, CreateInvariantOp(kSub, a, initial), initial, type_);
       }
     }
   }
@@ -508,6 +553,21 @@
   return nullptr;
 }
 
+HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveCnv(HTypeConversion* conversion) {
+  Primitive::Type from = conversion->GetInputType();
+  Primitive::Type to = conversion->GetResultType();
+  // A narrowing conversion is allowed within the cycle of a linear induction, provided that the
+  // narrowest encountered type is recorded with the induction to account for the precision loss.
+  if (IsNarrowingIntegralConversion(from, to)) {
+    auto it = cycle_.find(conversion->GetInput());
+    if (it != cycle_.end() && it->second->induction_class == kInvariant) {
+      type_ = Narrowest(type_, to);
+      return it->second;
+    }
+  }
+  return nullptr;
+}
+
 void HInductionVarAnalysis::VisitControl(HLoopInformation* loop) {
   HInstruction* control = loop->GetHeader()->GetLastInstruction();
   if (control->IsIf()) {
@@ -523,12 +583,10 @@
       InductionInfo* a = LookupInfo(loop, condition->InputAt(0));
       InductionInfo* b = LookupInfo(loop, condition->InputAt(1));
       Primitive::Type type = condition->InputAt(0)->GetType();
-      // Determine if the loop control uses integral arithmetic and an if-exit (X outside) or an
-      // if-iterate (X inside), always expressed as if-iterate when passing into VisitCondition().
-      if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) {
-        // Loop control is not 32/64-bit integral.
-      } else if (a == nullptr || b == nullptr) {
-        // Loop control is not a sequence.
+      // Determine if the loop control uses a known sequence on an if-exit (X outside) or on
+      // an if-iterate (X inside), expressed as if-iterate when passed into VisitCondition().
+      if (a == nullptr || b == nullptr) {
+        return;  // Loop control is not a sequence.
       } else if (if_true->GetLoopInformation() != loop && if_false->GetLoopInformation() == loop) {
         VisitCondition(loop, a, b, type, condition->GetOppositeCondition());
       } else if (if_true->GetLoopInformation() == loop && if_false->GetLoopInformation() != loop) {
@@ -557,22 +615,33 @@
     // Analyze condition with induction at left-hand-side (e.g. i < U).
     InductionInfo* lower_expr = a->op_b;
     InductionInfo* upper_expr = b;
-    InductionInfo* stride = a->op_a;
+    InductionInfo* stride_expr = a->op_a;
+    // Constant stride?
     int64_t stride_value = 0;
-    if (!IsIntAndGet(stride, &stride_value)) {
+    if (!IsExact(stride_expr, &stride_value)) {
       return;
     }
-    // Rewrite condition i != U into i < U or i > U if end condition is reached exactly.
-    if (cmp == kCondNE && ((stride_value == +1 && IsTaken(lower_expr, upper_expr, kCondLT)) ||
-                           (stride_value == -1 && IsTaken(lower_expr, upper_expr, kCondGT)))) {
+    // Rewrite condition i != U into strict end condition i < U or i > U if this end condition
+    // is reached exactly (tested by verifying if the loop has a unit stride and the non-strict
+    // condition would be always taken).
+    if (cmp == kCondNE && ((stride_value == +1 && IsTaken(lower_expr, upper_expr, kCondLE)) ||
+                           (stride_value == -1 && IsTaken(lower_expr, upper_expr, kCondGE)))) {
       cmp = stride_value > 0 ? kCondLT : kCondGT;
     }
+    // Only accept integral condition. A mismatch between the type of condition and the induction
+    // is only allowed if the, necessarily narrower, induction range fits the narrower control.
+    if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) {
+      return;  // not integral
+    } else if (type != a->type &&
+               !FitsNarrowerControl(lower_expr, upper_expr, stride_value, a->type, cmp)) {
+      return;  // mismatched type
+    }
     // Normalize a linear loop control with a nonzero stride:
     //   stride > 0, either i < U or i <= U
     //   stride < 0, either i > U or i >= U
     if ((stride_value > 0 && (cmp == kCondLT || cmp == kCondLE)) ||
         (stride_value < 0 && (cmp == kCondGT || cmp == kCondGE))) {
-      VisitTripCount(loop, lower_expr, upper_expr, stride, stride_value, type, cmp);
+      VisitTripCount(loop, lower_expr, upper_expr, stride_expr, stride_value, type, cmp);
     }
   }
 }
@@ -580,7 +649,7 @@
 void HInductionVarAnalysis::VisitTripCount(HLoopInformation* loop,
                                            InductionInfo* lower_expr,
                                            InductionInfo* upper_expr,
-                                           InductionInfo* stride,
+                                           InductionInfo* stride_expr,
                                            int64_t stride_value,
                                            Primitive::Type type,
                                            IfCondition cmp) {
@@ -601,15 +670,16 @@
   //     an unsigned entity, for example, as in the following loop that uses the full range:
   //     for (int i = INT_MIN; i < INT_MAX; i++) // TC = UINT_MAX
   // (2) The TC is only valid if the loop is taken, otherwise TC = 0, as in:
-  //     for (int i = 12; i < U; i++) // TC = 0 when U >= 12
+  //     for (int i = 12; i < U; i++) // TC = 0 when U <= 12
   //     If this cannot be determined at compile-time, the TC is only valid within the
-  //     loop-body proper, not the loop-header unless enforced with an explicit condition.
+  //     loop-body proper, not the loop-header unless enforced with an explicit taken-test.
   // (3) The TC is only valid if the loop is finite, otherwise TC has no value, as in:
   //     for (int i = 0; i <= U; i++) // TC = Inf when U = INT_MAX
   //     If this cannot be determined at compile-time, the TC is only valid when enforced
-  //     with an explicit condition.
+  //     with an explicit finite-test.
   // (4) For loops which early-exits, the TC forms an upper bound, as in:
   //     for (int i = 0; i < 10 && ....; i++) // TC <= 10
+  InductionInfo* trip_count = upper_expr;
   const bool is_taken = IsTaken(lower_expr, upper_expr, cmp);
   const bool is_finite = IsFinite(upper_expr, stride_value, type, cmp);
   const bool cancels = (cmp == kCondLT || cmp == kCondGT) && std::abs(stride_value) == 1;
@@ -617,26 +687,37 @@
     // Convert exclusive integral inequality into inclusive integral inequality,
     // viz. condition i < U is i <= U - 1 and condition i > U is i >= U + 1.
     if (cmp == kCondLT) {
-      upper_expr = CreateInvariantOp(kSub, upper_expr, CreateConstant(1, type));
+      trip_count = CreateInvariantOp(kSub, trip_count, CreateConstant(1, type));
     } else if (cmp == kCondGT) {
-      upper_expr = CreateInvariantOp(kAdd, upper_expr, CreateConstant(1, type));
+      trip_count = CreateInvariantOp(kAdd, trip_count, CreateConstant(1, type));
     }
     // Compensate for stride.
-    upper_expr = CreateInvariantOp(kAdd, upper_expr, stride);
+    trip_count = CreateInvariantOp(kAdd, trip_count, stride_expr);
   }
-  InductionInfo* trip_count
-      = CreateInvariantOp(kDiv, CreateInvariantOp(kSub, upper_expr, lower_expr), stride);
+  trip_count = CreateInvariantOp(
+      kDiv, CreateInvariantOp(kSub, trip_count, lower_expr), stride_expr);
   // Assign the trip-count expression to the loop control. Clients that use the information
   // should be aware that the expression is only valid under the conditions listed above.
-  InductionOp tcKind = kTripCountInBodyUnsafe;
+  InductionOp tcKind = kTripCountInBodyUnsafe;  // needs both tests
   if (is_taken && is_finite) {
-    tcKind = kTripCountInLoop;
+    tcKind = kTripCountInLoop;  // needs neither test
   } else if (is_finite) {
-    tcKind = kTripCountInBody;
+    tcKind = kTripCountInBody;  // needs taken-test
   } else if (is_taken) {
-    tcKind = kTripCountInLoopUnsafe;
+    tcKind = kTripCountInLoopUnsafe;  // needs finite-test
   }
-  AssignInfo(loop, loop->GetHeader()->GetLastInstruction(), CreateTripCount(tcKind, trip_count));
+  InductionOp op = kNop;
+  switch (cmp) {
+    case kCondLT: op = kLT; break;
+    case kCondLE: op = kLE; break;
+    case kCondGT: op = kGT; break;
+    case kCondGE: op = kGE; break;
+    default:      LOG(FATAL) << "CONDITION UNREACHABLE";
+  }
+  InductionInfo* taken_test = CreateInvariantOp(op, lower_expr, upper_expr);
+  AssignInfo(loop,
+             loop->GetHeader()->GetLastInstruction(),
+             CreateTripCount(tcKind, trip_count, taken_test, type));
 }
 
 bool HInductionVarAnalysis::IsTaken(InductionInfo* lower_expr,
@@ -644,14 +725,25 @@
                                     IfCondition cmp) {
   int64_t lower_value;
   int64_t upper_value;
-  if (IsIntAndGet(lower_expr, &lower_value) && IsIntAndGet(upper_expr, &upper_value)) {
-    switch (cmp) {
-      case kCondLT: return lower_value <  upper_value;
-      case kCondLE: return lower_value <= upper_value;
-      case kCondGT: return lower_value >  upper_value;
-      case kCondGE: return lower_value >= upper_value;
-      default:      LOG(FATAL) << "CONDITION UNREACHABLE";
-    }
+  switch (cmp) {
+    case kCondLT:
+      return IsAtMost(lower_expr, &lower_value)
+          && IsAtLeast(upper_expr, &upper_value)
+          && lower_value < upper_value;
+    case kCondLE:
+      return IsAtMost(lower_expr, &lower_value)
+          && IsAtLeast(upper_expr, &upper_value)
+          && lower_value <= upper_value;
+    case kCondGT:
+      return IsAtLeast(lower_expr, &lower_value)
+          && IsAtMost(upper_expr, &upper_value)
+          && lower_value > upper_value;
+    case kCondGE:
+      return IsAtLeast(lower_expr, &lower_value)
+          && IsAtMost(upper_expr, &upper_value)
+          && lower_value >= upper_value;
+    default:
+      LOG(FATAL) << "CONDITION UNREACHABLE";
   }
   return false;  // not certain, may be untaken
 }
@@ -660,31 +752,52 @@
                                      int64_t stride_value,
                                      Primitive::Type type,
                                      IfCondition cmp) {
-  const int64_t min = type == Primitive::kPrimInt
-      ? std::numeric_limits<int32_t>::min()
-      : std::numeric_limits<int64_t>::min();
-  const int64_t max = type == Primitive::kPrimInt
-        ? std::numeric_limits<int32_t>::max()
-        : std::numeric_limits<int64_t>::max();
+  const int64_t min = Primitive::MinValueOfIntegralType(type);
+  const int64_t max = Primitive::MaxValueOfIntegralType(type);
   // Some rules under which it is certain at compile-time that the loop is finite.
   int64_t value;
   switch (cmp) {
     case kCondLT:
       return stride_value == 1 ||
-          (IsIntAndGet(upper_expr, &value) && value <= (max - stride_value + 1));
+          (IsAtMost(upper_expr, &value) && value <= (max - stride_value + 1));
     case kCondLE:
-      return (IsIntAndGet(upper_expr, &value) && value <= (max - stride_value));
+      return (IsAtMost(upper_expr, &value) && value <= (max - stride_value));
     case kCondGT:
       return stride_value == -1 ||
-          (IsIntAndGet(upper_expr, &value) && value >= (min - stride_value - 1));
+          (IsAtLeast(upper_expr, &value) && value >= (min - stride_value - 1));
     case kCondGE:
-      return (IsIntAndGet(upper_expr, &value) && value >= (min - stride_value));
+      return (IsAtLeast(upper_expr, &value) && value >= (min - stride_value));
     default:
       LOG(FATAL) << "CONDITION UNREACHABLE";
   }
   return false;  // not certain, may be infinite
 }
 
+bool HInductionVarAnalysis::FitsNarrowerControl(InductionInfo* lower_expr,
+                                                InductionInfo* upper_expr,
+                                                int64_t stride_value,
+                                                Primitive::Type type,
+                                                IfCondition cmp) {
+  int64_t min = Primitive::MinValueOfIntegralType(type);
+  int64_t max = Primitive::MaxValueOfIntegralType(type);
+  // Inclusive test need one extra.
+  if (stride_value != 1 && stride_value != -1) {
+    return false;  // non-unit stride
+  } else if (cmp == kCondLE) {
+    max--;
+  } else if (cmp == kCondGE) {
+    min++;
+  }
+  // Do both bounds fit the range?
+  // Note: The `value` is initialized to please valgrind - the compiler can reorder
+  // the return value check with the `value` check, b/27651442 .
+  int64_t value = 0;
+  return IsAtLeast(lower_expr, &value) && value >= min &&
+         IsAtMost(lower_expr, &value)  && value <= max &&
+         IsAtLeast(upper_expr, &value) && value >= min &&
+         IsAtMost(upper_expr, &value)  && value <= max;
+}
+
 void HInductionVarAnalysis::AssignInfo(HLoopInformation* loop,
                                        HInstruction* instruction,
                                        InductionInfo* info) {
@@ -707,7 +820,7 @@
       return loop_it->second;
     }
   }
-  if (IsLoopInvariant(loop, instruction)) {
+  if (loop->IsDefinedOutOfTheLoop(instruction)) {
     InductionInfo* info = CreateInvariantFetch(instruction);
     AssignInfo(loop, instruction, info);
     return info;
@@ -733,7 +846,7 @@
   // More exhaustive simplifications are done by later phases once induction nodes are
   // translated back into HIR code (e.g. by loop optimizations or BCE).
   int64_t value = -1;
-  if (IsIntAndGet(a, &value)) {
+  if (IsExact(a, &value)) {
     if (value == 0) {
       // Simplify 0 + b = b, 0 * b = 0.
       if (op == kAdd) {
@@ -746,12 +859,11 @@
       if (value == 1) {
         return b;
       } else if (value == -1) {
-        op = kNeg;
-        a = nullptr;
+        return CreateSimplifiedInvariant(kNeg, nullptr, b);
       }
     }
   }
-  if (IsIntAndGet(b, &value)) {
+  if (IsExact(b, &value)) {
     if (value == 0) {
       // Simplify a + 0 = a, a - 0 = a, a * 0 = 0, -0 = 0.
       if (op == kAdd || op == kSub) {
@@ -764,24 +876,37 @@
       if (value == 1) {
         return a;
       } else if (value == -1) {
-        op = kNeg;
-        b = a;
-        a = nullptr;
+        return CreateSimplifiedInvariant(kNeg, nullptr, a);
       }
     }
   } else if (b->operation == kNeg) {
     // Simplify a + (-b) = a - b, a - (-b) = a + b, -(-b) = b.
     if (op == kAdd) {
-      op = kSub;
-      b = b->op_b;
+      return CreateSimplifiedInvariant(kSub, a, b->op_b);
     } else if (op == kSub) {
-      op = kAdd;
-      b = b->op_b;
+      return CreateSimplifiedInvariant(kAdd, a, b->op_b);
     } else if (op == kNeg) {
       return b->op_b;
     }
+  } else if (b->operation == kSub) {
+    // Simplify - (a - b) = b - a.
+    if (op == kNeg) {
+      return CreateSimplifiedInvariant(kSub, b->op_b, b->op_a);
+    }
   }
-  return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr);
+  return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr, b->type);
+}
+
+bool HInductionVarAnalysis::IsExact(InductionInfo* info, int64_t* value) {
+  return InductionVarRange(this).IsConstant(info, InductionVarRange::kExact, value);
+}
+
+bool HInductionVarAnalysis::IsAtMost(InductionInfo* info, int64_t* value) {
+  return InductionVarRange(this).IsConstant(info, InductionVarRange::kAtMost, value);
+}
+
+bool HInductionVarAnalysis::IsAtLeast(InductionInfo* info, int64_t* value) {
+  return InductionVarRange(this).IsConstant(info, InductionVarRange::kAtLeast, value);
 }
 
 bool HInductionVarAnalysis::InductionEqual(InductionInfo* info1,
@@ -792,6 +917,7 @@
         info1->induction_class == info2->induction_class &&
         info1->operation       == info2->operation       &&
         info1->fetch           == info2->fetch           &&
+        info1->type            == info2->type            &&
         InductionEqual(info1->op_a, info2->op_a)         &&
         InductionEqual(info1->op_b, info2->op_b);
   }
@@ -799,68 +925,54 @@
   return info1 == info2;
 }
 
-bool HInductionVarAnalysis::IsIntAndGet(InductionInfo* info, int64_t* value) {
-  if (info != nullptr && info->induction_class == kInvariant) {
-    // A direct constant fetch.
-    if (info->operation == kFetch) {
-      DCHECK(info->fetch);
-      if (info->fetch->IsIntConstant()) {
-        *value = info->fetch->AsIntConstant()->GetValue();
-        return true;
-      } else if (info->fetch->IsLongConstant()) {
-        *value = info->fetch->AsLongConstant()->GetValue();
-        return true;
-      }
-    }
-    // Use range analysis to resolve compound values.
-    int32_t range_value;
-    if (InductionVarRange::GetConstant(info, &range_value)) {
-      *value = range_value;
-      return true;
-    }
-  }
-  return false;
-}
-
 std::string HInductionVarAnalysis::InductionToString(InductionInfo* info) {
   if (info != nullptr) {
     if (info->induction_class == kInvariant) {
-      int64_t value = -1;
       std::string inv = "(";
       inv += InductionToString(info->op_a);
       switch (info->operation) {
-        case kNop:   inv += " @ "; break;
-        case kAdd:   inv += " + "; break;
+        case kNop:   inv += " @ ";  break;
+        case kAdd:   inv += " + ";  break;
         case kSub:
-        case kNeg:   inv += " - "; break;
-        case kMul:   inv += " * "; break;
-        case kDiv:   inv += " / "; break;
+        case kNeg:   inv += " - ";  break;
+        case kMul:   inv += " * ";  break;
+        case kDiv:   inv += " / ";  break;
+        case kLT:    inv += " < ";  break;
+        case kLE:    inv += " <= "; break;
+        case kGT:    inv += " > ";  break;
+        case kGE:    inv += " >= "; break;
         case kFetch:
           DCHECK(info->fetch);
-          if (IsIntAndGet(info, &value)) {
-            inv += std::to_string(value);
+          if (info->fetch->IsIntConstant()) {
+            inv += std::to_string(info->fetch->AsIntConstant()->GetValue());
+          } else if (info->fetch->IsLongConstant()) {
+            inv += std::to_string(info->fetch->AsLongConstant()->GetValue());
           } else {
             inv += std::to_string(info->fetch->GetId()) + ":" + info->fetch->DebugName();
           }
           break;
-        case kTripCountInLoop:       inv += "TC-loop:"; break;
-        case kTripCountInBody:       inv += "TC-body:"; break;
-        case kTripCountInLoopUnsafe: inv += "TC-loop-unsafe:"; break;
-        case kTripCountInBodyUnsafe: inv += "TC-body-unsafe:"; break;
+        case kTripCountInLoop:       inv += " (TC-loop) ";        break;
+        case kTripCountInBody:       inv += " (TC-body) ";        break;
+        case kTripCountInLoopUnsafe: inv += " (TC-loop-unsafe) "; break;
+        case kTripCountInBodyUnsafe: inv += " (TC-body-unsafe) "; break;
       }
       inv += InductionToString(info->op_b);
-      return inv + ")";
+      inv += ")";
+      return inv;
     } else {
       DCHECK(info->operation == kNop);
       if (info->induction_class == kLinear) {
         return "(" + InductionToString(info->op_a) + " * i + " +
-                     InductionToString(info->op_b) + ")";
+                     InductionToString(info->op_b) + "):" +
+                     Primitive::PrettyDescriptor(info->type);
       } else if (info->induction_class == kWrapAround) {
         return "wrap(" + InductionToString(info->op_a) + ", " +
-                         InductionToString(info->op_b) + ")";
+                         InductionToString(info->op_b) + "):" +
+                         Primitive::PrettyDescriptor(info->type);
       } else if (info->induction_class == kPeriodic) {
         return "periodic(" + InductionToString(info->op_a) + ", " +
-                             InductionToString(info->op_b) + ")";
+                             InductionToString(info->op_b) + "):" +
+                             Primitive::PrettyDescriptor(info->type);
       }
     }
   }
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index 7ab80cd..cd4c830 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -39,9 +39,9 @@
 
   void Run() OVERRIDE;
 
- private:
   static constexpr const char* kInductionPassName = "induction_var_analysis";
 
+ private:
   struct NodeInfo {
     explicit NodeInfo(uint32_t d) : depth(d), done(false) {}
     uint32_t depth;
@@ -65,11 +65,16 @@
     kMul,
     kDiv,
     kFetch,
-    // Trip counts (valid in full loop or only body proper; unsafe implies loop may be infinite).
-    kTripCountInLoop,
-    kTripCountInBody,
-    kTripCountInLoopUnsafe,
-    kTripCountInBodyUnsafe
+    // Trip-counts.
+    kTripCountInLoop,        // valid in full loop; loop is finite
+    kTripCountInBody,        // valid in body only; loop is finite
+    kTripCountInLoopUnsafe,  // valid in full loop; loop may be infinite
+    kTripCountInBodyUnsafe,  // valid in body only; loop may be infinite
+    // Comparisons for trip-count tests.
+    kLT,
+    kLE,
+    kGT,
+    kGE
   };
 
   /**
@@ -85,24 +90,27 @@
    *   (4) periodic
    *         nop: a, then defined by b (repeated when exhausted)
    *   (5) trip-count:
-   *         tc: defined by b
+   *         tc: defined by a, taken-test in b
    */
   struct InductionInfo : public ArenaObject<kArenaAllocInductionVarAnalysis> {
     InductionInfo(InductionClass ic,
                   InductionOp op,
                   InductionInfo* a,
                   InductionInfo* b,
-                  HInstruction* f)
+                  HInstruction* f,
+                  Primitive::Type t)
         : induction_class(ic),
           operation(op),
           op_a(a),
           op_b(b),
-          fetch(f) {}
+          fetch(f),
+          type(t) {}
     InductionClass induction_class;
     InductionOp operation;
     InductionInfo* op_a;
     InductionInfo* op_b;
     HInstruction* fetch;
+    Primitive::Type type;  // precision of induction
   };
 
   bool IsVisitedNode(HInstruction* instruction) const {
@@ -116,16 +124,24 @@
 
   InductionInfo* CreateInvariantFetch(HInstruction* f) {
     DCHECK(f != nullptr);
-    return new (graph_->GetArena()) InductionInfo(kInvariant, kFetch, nullptr, nullptr, f);
+    return new (graph_->GetArena())
+        InductionInfo(kInvariant, kFetch, nullptr, nullptr, f, f->GetType());
   }
 
-  InductionInfo* CreateTripCount(InductionOp op, InductionInfo* b) {
-    return new (graph_->GetArena()) InductionInfo(kInvariant, op, nullptr, b, nullptr);
-  }
-
-  InductionInfo* CreateInduction(InductionClass ic, InductionInfo* a, InductionInfo* b) {
+  InductionInfo* CreateTripCount(InductionOp op,
+                                 InductionInfo* a,
+                                 InductionInfo* b,
+                                 Primitive::Type type) {
     DCHECK(a != nullptr && b != nullptr);
-    return new (graph_->GetArena()) InductionInfo(ic, kNop, a, b, nullptr);
+    return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr, type);
+  }
+
+  InductionInfo* CreateInduction(InductionClass ic,
+                                 InductionInfo* a,
+                                 InductionInfo* b,
+                                 Primitive::Type type) {
+    DCHECK(a != nullptr && b != nullptr);
+    return new (graph_->GetArena()) InductionInfo(ic, kNop, a, b, nullptr, type);
   }
 
   // Methods for analysis.
@@ -142,6 +158,7 @@
   InductionInfo* TransferMul(InductionInfo* a, InductionInfo* b);
   InductionInfo* TransferShl(InductionInfo* a, InductionInfo* b, Primitive::Type type);
   InductionInfo* TransferNeg(InductionInfo* a);
+  InductionInfo* TransferCnv(InductionInfo* a, Primitive::Type from, Primitive::Type to);
 
   // Solvers.
   InductionInfo* SolvePhi(HInstruction* phi, size_t input_index);
@@ -155,6 +172,7 @@
                              HInstruction* y,
                              InductionOp op,
                              bool is_first_call);
+  InductionInfo* SolveCnv(HTypeConversion* conversion);
 
   // Trip count information.
   void VisitControl(HLoopInformation* loop);
@@ -175,6 +193,11 @@
                 int64_t stride_value,
                 Primitive::Type type,
                 IfCondition cmp);
+  bool FitsNarrowerControl(InductionInfo* lower_expr,
+                           InductionInfo* upper_expr,
+                           int64_t stride_value,
+                           Primitive::Type type,
+                           IfCondition cmp);
 
   // Assign and lookup.
   void AssignInfo(HLoopInformation* loop, HInstruction* instruction, InductionInfo* info);
@@ -182,9 +205,13 @@
   InductionInfo* CreateConstant(int64_t value, Primitive::Type type);
   InductionInfo* CreateSimplifiedInvariant(InductionOp op, InductionInfo* a, InductionInfo* b);
 
+  // Constants.
+  bool IsExact(InductionInfo* info, /*out*/ int64_t* value);
+  bool IsAtMost(InductionInfo* info, /*out*/ int64_t* value);
+  bool IsAtLeast(InductionInfo* info, /*out*/ int64_t* value);
+
   // Helpers.
   static bool InductionEqual(InductionInfo* info1, InductionInfo* info2);
-  static bool IsIntAndGet(InductionInfo* info, int64_t* value);
   static std::string InductionToString(InductionInfo* info);
 
   // TODO: fine tune the following data structures, only keep relevant data.
@@ -195,6 +222,7 @@
   ArenaVector<HInstruction*> scc_;
   ArenaSafeMap<HInstruction*, NodeInfo> map_;
   ArenaSafeMap<HInstruction*, InductionInfo*> cycle_;
+  Primitive::Type type_;
 
   /**
    * Maintains the results of the analysis as a mapping from loops to a mapping from instructions
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index f16da2a..580d24b 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -18,7 +18,6 @@
 
 #include "base/arena_allocator.h"
 #include "builder.h"
-#include "gtest/gtest.h"
 #include "induction_var_analysis.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
@@ -28,7 +27,7 @@
 /**
  * Fixture class for the InductionVarAnalysis tests.
  */
-class InductionVarAnalysisTest : public testing::Test {
+class InductionVarAnalysisTest : public CommonCompilerTest {
  public:
   InductionVarAnalysisTest() : pool_(), allocator_(&pool_) {
     graph_ = CreateGraph(&allocator_);
@@ -69,10 +68,13 @@
     entry_ = new (&allocator_) HBasicBlock(graph_);
     graph_->AddBlock(entry_);
     BuildForLoop(0, n);
+    return_ = new (&allocator_) HBasicBlock(graph_);
+    graph_->AddBlock(return_);
     exit_ = new (&allocator_) HBasicBlock(graph_);
     graph_->AddBlock(exit_);
     entry_->AddSuccessor(loop_preheader_[0]);
-    loop_header_[0]->AddSuccessor(exit_);
+    loop_header_[0]->AddSuccessor(return_);
+    return_->AddSuccessor(exit_);
     graph_->SetEntryBlock(entry_);
     graph_->SetExitBlock(exit_);
 
@@ -83,37 +85,29 @@
     constant0_ = graph_->GetIntConstant(0);
     constant1_ = graph_->GetIntConstant(1);
     constant100_ = graph_->GetIntConstant(100);
-    induc_ = new (&allocator_) HLocal(n);
-    entry_->AddInstruction(induc_);
-    entry_->AddInstruction(new (&allocator_) HStoreLocal(induc_, constant0_));
-    tmp_ = new (&allocator_) HLocal(n + 1);
-    entry_->AddInstruction(tmp_);
-    entry_->AddInstruction(new (&allocator_) HStoreLocal(tmp_, constant100_));
-    dum_ = new (&allocator_) HLocal(n + 2);
-    entry_->AddInstruction(dum_);
+    float_constant0_ = graph_->GetFloatConstant(0.0f);
+    return_->AddInstruction(new (&allocator_) HReturnVoid());
     exit_->AddInstruction(new (&allocator_) HExit());
 
     // Provide loop instructions.
     for (int d = 0; d < n; d++) {
-      basic_[d] = new (&allocator_) HLocal(d);
-      entry_->AddInstruction(basic_[d]);
-      loop_preheader_[d]->AddInstruction(new (&allocator_) HStoreLocal(basic_[d], constant0_));
-      HInstruction* load = new (&allocator_) HLoadLocal(basic_[d], Primitive::kPrimInt);
-      loop_header_[d]->AddInstruction(load);
-      HInstruction* compare = new (&allocator_) HLessThan(load, constant100_);
+      basic_[d] = new (&allocator_) HPhi(&allocator_, d, 0, Primitive::kPrimInt);
+      loop_preheader_[d]->AddInstruction(new (&allocator_) HGoto());
+      loop_header_[d]->AddPhi(basic_[d]);
+      HInstruction* compare = new (&allocator_) HLessThan(basic_[d], constant100_);
       loop_header_[d]->AddInstruction(compare);
       loop_header_[d]->AddInstruction(new (&allocator_) HIf(compare));
-      load = new (&allocator_) HLoadLocal(basic_[d], Primitive::kPrimInt);
-      loop_body_[d]->AddInstruction(load);
-      increment_[d] = new (&allocator_) HAdd(Primitive::kPrimInt, load, constant1_);
+      increment_[d] = new (&allocator_) HAdd(Primitive::kPrimInt, basic_[d], constant1_);
       loop_body_[d]->AddInstruction(increment_[d]);
-      loop_body_[d]->AddInstruction(new (&allocator_) HStoreLocal(basic_[d], increment_[d]));
       loop_body_[d]->AddInstruction(new (&allocator_) HGoto());
+
+      basic_[d]->AddInput(constant0_);
+      basic_[d]->AddInput(increment_[d]);
     }
   }
 
   // Builds if-statement at depth d.
-  void BuildIf(int d, HBasicBlock** ifT, HBasicBlock **ifF) {
+  HPhi* BuildIf(int d, HBasicBlock** ifT, HBasicBlock **ifF) {
     HBasicBlock* cond = new (&allocator_) HBasicBlock(graph_);
     HBasicBlock* ifTrue = new (&allocator_) HBasicBlock(graph_);
     HBasicBlock* ifFalse = new (&allocator_) HBasicBlock(graph_);
@@ -129,6 +123,10 @@
     cond->AddInstruction(new (&allocator_) HIf(parameter_));
     *ifT = ifTrue;
     *ifF = ifFalse;
+
+    HPhi* select_phi = new (&allocator_) HPhi(&allocator_, -1, 0, Primitive::kPrimInt);
+    loop_body_[d]->AddPhi(select_phi);
+    return select_phi;
   }
 
   // Inserts instruction right before increment at depth d.
@@ -137,23 +135,20 @@
     return instruction;
   }
 
-  // Inserts local load at depth d.
-  HInstruction* InsertLocalLoad(HLocal* local, int d) {
-    return InsertInstruction(new (&allocator_) HLoadLocal(local, Primitive::kPrimInt), d);
+  // Inserts a phi to loop header at depth d and returns it.
+  HPhi* InsertLoopPhi(int vreg, int d) {
+    HPhi* phi = new (&allocator_) HPhi(&allocator_, vreg, 0, Primitive::kPrimInt);
+    loop_header_[d]->AddPhi(phi);
+    return phi;
   }
 
-  // Inserts local store at depth d.
-  HInstruction* InsertLocalStore(HLocal* local, HInstruction* rhs, int d) {
-    return InsertInstruction(new (&allocator_) HStoreLocal(local, rhs), d);
-  }
-
-  // Inserts an array store with given local as subscript at depth d to
+  // Inserts an array store with given `subscript` at depth d to
   // enable tests to inspect the computed induction at that point easily.
-  HInstruction* InsertArrayStore(HLocal* subscript, int d) {
-    HInstruction* load = InsertInstruction(
-        new (&allocator_) HLoadLocal(subscript, Primitive::kPrimInt), d);
+  HInstruction* InsertArrayStore(HInstruction* subscript, int d) {
+    // ArraySet is given a float value in order to avoid SsaBuilder typing
+    // it from the array's non-existent reference type info.
     return InsertInstruction(new (&allocator_) HArraySet(
-        parameter_, load, constant0_, Primitive::kPrimInt, 0), d);
+        parameter_, subscript, float_constant0_, Primitive::kPrimFloat, 0), d);
   }
 
   // Returns induction information of instruction in loop at depth d.
@@ -162,9 +157,16 @@
         iva_->LookupInfo(loop_body_[d]->GetLoopInformation(), instruction));
   }
 
+  // Returns true if instructions have identical induction.
+  bool HaveSameInduction(HInstruction* instruction1, HInstruction* instruction2) {
+    return HInductionVarAnalysis::InductionEqual(
+      iva_->LookupInfo(loop_body_[0]->GetLoopInformation(), instruction1),
+      iva_->LookupInfo(loop_body_[0]->GetLoopInformation(), instruction2));
+  }
+
   // Performs InductionVarAnalysis (after proper set up).
   void PerformInductionVarAnalysis() {
-    ASSERT_TRUE(graph_->TryBuildingSsa());
+    graph_->BuildDominatorTree();
     iva_ = new (&allocator_) HInductionVarAnalysis(graph_);
     iva_->Run();
   }
@@ -177,21 +179,20 @@
 
   // Fixed basic blocks and instructions.
   HBasicBlock* entry_;
+  HBasicBlock* return_;
   HBasicBlock* exit_;
   HInstruction* parameter_;  // "this"
   HInstruction* constant0_;
   HInstruction* constant1_;
   HInstruction* constant100_;
-  HLocal* induc_;  // "vreg_n", the "k"
-  HLocal* tmp_;    // "vreg_n+1"
-  HLocal* dum_;    // "vreg_n+2"
+  HInstruction* float_constant0_;
 
   // Loop specifics.
   HBasicBlock* loop_preheader_[10];
   HBasicBlock* loop_header_[10];
   HBasicBlock* loop_body_[10];
   HInstruction* increment_[10];
-  HLocal* basic_[10];  // "vreg_d", the "i_d"
+  HPhi* basic_[10];  // "vreg_d", the "i_d"
 };
 
 //
@@ -207,7 +208,8 @@
   //   ..
   // }
   BuildLoopNest(10);
-  ASSERT_TRUE(graph_->TryBuildingSsa());
+  graph_->BuildDominatorTree();
+
   ASSERT_EQ(entry_->GetLoopInformation(), nullptr);
   for (int d = 0; d < 1; d++) {
     ASSERT_EQ(loop_preheader_[d]->GetLoopInformation(),
@@ -230,11 +232,14 @@
   HInstruction* store = InsertArrayStore(basic_[0], 0);
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("((1) * i + (0))", GetInductionInfo(store->InputAt(1), 0).c_str());
-  EXPECT_STREQ("((1) * i + (1))", GetInductionInfo(increment_[0], 0).c_str());
+  EXPECT_STREQ("((1) * i + (0)):PrimInt", GetInductionInfo(store->InputAt(1), 0).c_str());
+  EXPECT_STREQ("((1) * i + (1)):PrimInt", GetInductionInfo(increment_[0], 0).c_str());
+
+  // Offset matters!
+  EXPECT_FALSE(HaveSameInduction(store->InputAt(1), increment_[0]));
 
   // Trip-count.
-  EXPECT_STREQ("(TC-loop:(100))",
+  EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))",
                GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
 }
 
@@ -249,27 +254,22 @@
   // }
   BuildLoopNest(1);
   HInstruction *add = InsertInstruction(
-      new (&allocator_) HAdd(Primitive::kPrimInt, constant100_, InsertLocalLoad(basic_[0], 0)), 0);
-  InsertLocalStore(induc_, add, 0);
+      new (&allocator_) HAdd(Primitive::kPrimInt, constant100_, basic_[0]), 0);
   HInstruction *sub = InsertInstruction(
-      new (&allocator_) HSub(Primitive::kPrimInt, constant100_, InsertLocalLoad(basic_[0], 0)), 0);
-  InsertLocalStore(induc_, sub, 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0]), 0);
   HInstruction *mul = InsertInstruction(
-      new (&allocator_) HMul(Primitive::kPrimInt, constant100_, InsertLocalLoad(basic_[0], 0)), 0);
-  InsertLocalStore(induc_, mul, 0);
+      new (&allocator_) HMul(Primitive::kPrimInt, constant100_, basic_[0]), 0);
   HInstruction *shl = InsertInstruction(
-      new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(basic_[0], 0), constant1_), 0);
-  InsertLocalStore(induc_, shl, 0);
+      new (&allocator_) HShl(Primitive::kPrimInt, basic_[0], constant1_), 0);
   HInstruction *neg = InsertInstruction(
-      new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(basic_[0], 0)), 0);
-  InsertLocalStore(induc_, neg, 0);
+      new (&allocator_) HNeg(Primitive::kPrimInt, basic_[0]), 0);
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("((1) * i + (100))", GetInductionInfo(add, 0).c_str());
-  EXPECT_STREQ("(( - (1)) * i + (100))", GetInductionInfo(sub, 0).c_str());
-  EXPECT_STREQ("((100) * i + (0))", GetInductionInfo(mul, 0).c_str());
-  EXPECT_STREQ("((2) * i + (0))", GetInductionInfo(shl, 0).c_str());
-  EXPECT_STREQ("(( - (1)) * i + (0))", GetInductionInfo(neg, 0).c_str());
+  EXPECT_STREQ("((1) * i + (100)):PrimInt", GetInductionInfo(add, 0).c_str());
+  EXPECT_STREQ("(( - (1)) * i + (100)):PrimInt", GetInductionInfo(sub, 0).c_str());
+  EXPECT_STREQ("((100) * i + (0)):PrimInt", GetInductionInfo(mul, 0).c_str());
+  EXPECT_STREQ("((2) * i + (0)):PrimInt", GetInductionInfo(shl, 0).c_str());
+  EXPECT_STREQ("(( - (1)) * i + (0)):PrimInt", GetInductionInfo(neg, 0).c_str());
 }
 
 TEST_F(InductionVarAnalysisTest, FindChainInduction) {
@@ -282,19 +282,21 @@
   //   a[k] = 0;
   // }
   BuildLoopNest(1);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+
   HInstruction *add = InsertInstruction(
-      new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
-  InsertLocalStore(induc_, add, 0);
-  HInstruction* store1 = InsertArrayStore(induc_, 0);
+      new (&allocator_) HAdd(Primitive::kPrimInt, k, constant100_), 0);
+  HInstruction* store1 = InsertArrayStore(add, 0);
   HInstruction *sub = InsertInstruction(
-      new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0);
-  InsertLocalStore(induc_, sub, 0);
-  HInstruction* store2 = InsertArrayStore(induc_, 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, add, constant1_), 0);
+  HInstruction* store2 = InsertArrayStore(sub, 0);
+  k->AddInput(sub);
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("(((100) - (1)) * i + (100))",
+  EXPECT_STREQ("(((100) - (1)) * i + (100)):PrimInt",
                GetInductionInfo(store1->InputAt(1), 0).c_str());
-  EXPECT_STREQ("(((100) - (1)) * i + ((100) - (1)))",
+  EXPECT_STREQ("(((100) - (1)) * i + ((100) - (1))):PrimInt",
                GetInductionInfo(store2->InputAt(1), 0).c_str());
 }
 
@@ -307,26 +309,31 @@
   //   a[k] = 0;
   // }
   BuildLoopNest(1);
+  HPhi* k_header = InsertLoopPhi(0, 0);
+  k_header->AddInput(constant0_);
+
   HBasicBlock* ifTrue;
   HBasicBlock* ifFalse;
-  BuildIf(0, &ifTrue, &ifFalse);
+  HPhi* k_body = BuildIf(0, &ifTrue, &ifFalse);
+
   // True-branch.
-  HInstruction* load1 = new (&allocator_) HLoadLocal(induc_, Primitive::kPrimInt);
-  ifTrue->AddInstruction(load1);
-  HInstruction* inc1 = new (&allocator_) HAdd(Primitive::kPrimInt, load1, constant1_);
+  HInstruction* inc1 = new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant1_);
   ifTrue->AddInstruction(inc1);
-  ifTrue->AddInstruction(new (&allocator_) HStoreLocal(induc_, inc1));
+  k_body->AddInput(inc1);
   // False-branch.
-  HInstruction* load2 = new (&allocator_) HLoadLocal(induc_, Primitive::kPrimInt);
-  ifFalse->AddInstruction(load2);
-  HInstruction* inc2 = new (&allocator_) HAdd(Primitive::kPrimInt, load2, constant1_);
+  HInstruction* inc2 = new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant1_);
   ifFalse->AddInstruction(inc2);
-  ifFalse->AddInstruction(new (&allocator_) HStoreLocal(induc_, inc2));
+  k_body->AddInput(inc2);
   // Merge over a phi.
-  HInstruction* store = InsertArrayStore(induc_, 0);
+  HInstruction* store = InsertArrayStore(k_body, 0);
+  k_header->AddInput(k_body);
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("((1) * i + (1))", GetInductionInfo(store->InputAt(1), 0).c_str());
+  EXPECT_STREQ("((1) * i + (1)):PrimInt", GetInductionInfo(store->InputAt(1), 0).c_str());
+
+  // Both increments get same induction.
+  EXPECT_TRUE(HaveSameInduction(store->InputAt(1), inc1));
+  EXPECT_TRUE(HaveSameInduction(store->InputAt(1), inc2));
 }
 
 TEST_F(InductionVarAnalysisTest, FindTwoWayDerivedInduction) {
@@ -339,24 +346,21 @@
   BuildLoopNest(1);
   HBasicBlock* ifTrue;
   HBasicBlock* ifFalse;
-  BuildIf(0, &ifTrue, &ifFalse);
+  HPhi* k = BuildIf(0, &ifTrue, &ifFalse);
+
   // True-branch.
-  HInstruction* load1 = new (&allocator_) HLoadLocal(basic_[0], Primitive::kPrimInt);
-  ifTrue->AddInstruction(load1);
-  HInstruction* inc1 = new (&allocator_) HAdd(Primitive::kPrimInt, load1, constant1_);
+  HInstruction* inc1 = new (&allocator_) HAdd(Primitive::kPrimInt, basic_[0], constant1_);
   ifTrue->AddInstruction(inc1);
-  ifTrue->AddInstruction(new (&allocator_) HStoreLocal(induc_, inc1));
+  k->AddInput(inc1);
   // False-branch.
-  HInstruction* load2 = new (&allocator_) HLoadLocal(basic_[0], Primitive::kPrimInt);
-  ifFalse->AddInstruction(load2);
-  HInstruction* inc2 = new (&allocator_) HAdd(Primitive::kPrimInt, load2, constant1_);
+  HInstruction* inc2 = new (&allocator_) HAdd(Primitive::kPrimInt, basic_[0], constant1_);
   ifFalse->AddInstruction(inc2);
-  ifFalse->AddInstruction(new (&allocator_) HStoreLocal(induc_, inc2));
+  k->AddInput(inc2);
   // Merge over a phi.
-  HInstruction* store = InsertArrayStore(induc_, 0);
+  HInstruction* store = InsertArrayStore(k, 0);
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("((1) * i + (1))", GetInductionInfo(store->InputAt(1), 0).c_str());
+  EXPECT_STREQ("((1) * i + (1)):PrimInt", GetInductionInfo(store->InputAt(1), 0).c_str());
 }
 
 TEST_F(InductionVarAnalysisTest, FindFirstOrderWrapAroundInduction) {
@@ -367,13 +371,16 @@
   //   k = 100 - i;
   // }
   BuildLoopNest(1);
-  HInstruction* store = InsertArrayStore(induc_, 0);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+
+  HInstruction* store = InsertArrayStore(k, 0);
   HInstruction *sub = InsertInstruction(
-      new (&allocator_) HSub(Primitive::kPrimInt, constant100_, InsertLocalLoad(basic_[0], 0)), 0);
-  InsertLocalStore(induc_, sub, 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0]), 0);
+  k->AddInput(sub);
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("wrap((0), (( - (1)) * i + (100)))",
+  EXPECT_STREQ("wrap((0), (( - (1)) * i + (100)):PrimInt):PrimInt",
                GetInductionInfo(store->InputAt(1), 0).c_str());
 }
 
@@ -387,14 +394,19 @@
   //   t = 100 - i;
   // }
   BuildLoopNest(1);
-  HInstruction* store = InsertArrayStore(induc_, 0);
-  InsertLocalStore(induc_, InsertLocalLoad(tmp_, 0), 0);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+  HPhi* t = InsertLoopPhi(1, 0);
+  t->AddInput(constant100_);
+
+  HInstruction* store = InsertArrayStore(k, 0);
+  k->AddInput(t);
   HInstruction *sub = InsertInstruction(
-      new (&allocator_) HSub(Primitive::kPrimInt, constant100_, InsertLocalLoad(basic_[0], 0)), 0);
-  InsertLocalStore(tmp_, sub, 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0], 0), 0);
+  t->AddInput(sub);
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("wrap((0), wrap((100), (( - (1)) * i + (100))))",
+  EXPECT_STREQ("wrap((0), wrap((100), (( - (1)) * i + (100)):PrimInt):PrimInt):PrimInt",
                GetInductionInfo(store->InputAt(1), 0).c_str());
 }
 
@@ -410,33 +422,33 @@
   //   k = i << 1;
   // }
   BuildLoopNest(1);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+
   HInstruction *add = InsertInstruction(
-      new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
-  InsertLocalStore(tmp_, add, 0);
+      new (&allocator_) HAdd(Primitive::kPrimInt, k, constant100_), 0);
   HInstruction *sub = InsertInstruction(
-      new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
-  InsertLocalStore(tmp_, sub, 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, k, constant100_), 0);
   HInstruction *mul = InsertInstruction(
-      new (&allocator_) HMul(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
-  InsertLocalStore(tmp_, mul, 0);
+      new (&allocator_) HMul(Primitive::kPrimInt, k, constant100_), 0);
   HInstruction *shl = InsertInstruction(
-      new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0);
-  InsertLocalStore(tmp_, shl, 0);
+      new (&allocator_) HShl(Primitive::kPrimInt, k, constant1_), 0);
   HInstruction *neg = InsertInstruction(
-      new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(induc_, 0)), 0);
-  InsertLocalStore(tmp_, neg, 0);
-  InsertLocalStore(
-      induc_,
-      InsertInstruction(
-          new (&allocator_)
-          HShl(Primitive::kPrimInt, InsertLocalLoad(basic_[0], 0), constant1_), 0), 0);
+      new (&allocator_) HNeg(Primitive::kPrimInt, k), 0);
+  k->AddInput(
+      InsertInstruction(new (&allocator_) HShl(Primitive::kPrimInt, basic_[0], constant1_), 0));
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("wrap((100), ((2) * i + (100)))", GetInductionInfo(add, 0).c_str());
-  EXPECT_STREQ("wrap(((0) - (100)), ((2) * i + ((0) - (100))))", GetInductionInfo(sub, 0).c_str());
-  EXPECT_STREQ("wrap((0), (((2) * (100)) * i + (0)))", GetInductionInfo(mul, 0).c_str());
-  EXPECT_STREQ("wrap((0), (((2) * (2)) * i + (0)))", GetInductionInfo(shl, 0).c_str());
-  EXPECT_STREQ("wrap((0), (( - (2)) * i + (0)))", GetInductionInfo(neg, 0).c_str());
+  EXPECT_STREQ("wrap((100), ((2) * i + (100)):PrimInt):PrimInt",
+               GetInductionInfo(add, 0).c_str());
+  EXPECT_STREQ("wrap(((0) - (100)), ((2) * i + ((0) - (100))):PrimInt):PrimInt",
+               GetInductionInfo(sub, 0).c_str());
+  EXPECT_STREQ("wrap((0), (((2) * (100)) * i + (0)):PrimInt):PrimInt",
+               GetInductionInfo(mul, 0).c_str());
+  EXPECT_STREQ("wrap((0), (((2) * (2)) * i + (0)):PrimInt):PrimInt",
+               GetInductionInfo(shl, 0).c_str());
+  EXPECT_STREQ("wrap((0), (( - (2)) * i + (0)):PrimInt):PrimInt",
+               GetInductionInfo(neg, 0).c_str());
 }
 
 TEST_F(InductionVarAnalysisTest, FindPeriodicInduction) {
@@ -452,15 +464,19 @@
   //   k = d;
   // }
   BuildLoopNest(1);
-  HInstruction* store1 = InsertArrayStore(induc_, 0);
-  HInstruction* store2 = InsertArrayStore(tmp_, 0);
-  InsertLocalStore(dum_, InsertLocalLoad(tmp_, 0), 0);
-  InsertLocalStore(tmp_, InsertLocalLoad(induc_, 0), 0);
-  InsertLocalStore(induc_, InsertLocalLoad(dum_, 0), 0);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+  HPhi* t = InsertLoopPhi(1, 0);
+  t->AddInput(constant100_);
+
+  HInstruction* store1 = InsertArrayStore(k, 0);
+  HInstruction* store2 = InsertArrayStore(t, 0);
+  k->AddInput(t);
+  t->AddInput(k);
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("periodic((0), (100))", GetInductionInfo(store1->InputAt(1), 0).c_str());
-  EXPECT_STREQ("periodic((100), (0))", GetInductionInfo(store2->InputAt(1), 0).c_str());
+  EXPECT_STREQ("periodic((0), (100)):PrimInt", GetInductionInfo(store1->InputAt(1), 0).c_str());
+  EXPECT_STREQ("periodic((100), (0)):PrimInt", GetInductionInfo(store2->InputAt(1), 0).c_str());
 }
 
 TEST_F(InductionVarAnalysisTest, FindIdiomaticPeriodicInduction) {
@@ -471,14 +487,17 @@
   //   k = 1 - k;
   // }
   BuildLoopNest(1);
-  HInstruction* store = InsertArrayStore(induc_, 0);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+
+  HInstruction* store = InsertArrayStore(k, 0);
   HInstruction *sub = InsertInstruction(
-      new (&allocator_) HSub(Primitive::kPrimInt, constant1_, InsertLocalLoad(induc_, 0)), 0);
-  InsertLocalStore(induc_, sub, 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, constant1_, k), 0);
+  k->AddInput(sub);
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("periodic((0), (1))", GetInductionInfo(store->InputAt(1), 0).c_str());
-  EXPECT_STREQ("periodic((1), (0))", GetInductionInfo(sub, 0).c_str());
+  EXPECT_STREQ("periodic((0), (1)):PrimInt", GetInductionInfo(store->InputAt(1), 0).c_str());
+  EXPECT_STREQ("periodic((1), (0)):PrimInt", GetInductionInfo(sub, 0).c_str());
 }
 
 TEST_F(InductionVarAnalysisTest, FindDerivedPeriodicInduction) {
@@ -493,33 +512,31 @@
   //   t = - k;
   // }
   BuildLoopNest(1);
-  InsertLocalStore(
-      induc_,
-      InsertInstruction(new (&allocator_)
-                        HSub(Primitive::kPrimInt, constant1_, InsertLocalLoad(induc_, 0)), 0), 0);
+  HPhi* k_header = InsertLoopPhi(0, 0);
+  k_header->AddInput(constant0_);
+
+  HInstruction* k_body = InsertInstruction(
+      new (&allocator_) HSub(Primitive::kPrimInt, constant1_, k_header), 0);
+  k_header->AddInput(k_body);
+
   // Derived expressions.
   HInstruction *add = InsertInstruction(
-      new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
-  InsertLocalStore(tmp_, add, 0);
+      new (&allocator_) HAdd(Primitive::kPrimInt, k_body, constant100_), 0);
   HInstruction *sub = InsertInstruction(
-      new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
-  InsertLocalStore(tmp_, sub, 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, k_body, constant100_), 0);
   HInstruction *mul = InsertInstruction(
-      new (&allocator_) HMul(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
-  InsertLocalStore(tmp_, mul, 0);
+      new (&allocator_) HMul(Primitive::kPrimInt, k_body, constant100_), 0);
   HInstruction *shl = InsertInstruction(
-      new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0);
-  InsertLocalStore(tmp_, shl, 0);
+      new (&allocator_) HShl(Primitive::kPrimInt, k_body, constant1_), 0);
   HInstruction *neg = InsertInstruction(
-      new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(induc_, 0)), 0);
-  InsertLocalStore(tmp_, neg, 0);
+      new (&allocator_) HNeg(Primitive::kPrimInt, k_body), 0);
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("periodic(((1) + (100)), (100))", GetInductionInfo(add, 0).c_str());
-  EXPECT_STREQ("periodic(((1) - (100)), ((0) - (100)))", GetInductionInfo(sub, 0).c_str());
-  EXPECT_STREQ("periodic((100), (0))", GetInductionInfo(mul, 0).c_str());
-  EXPECT_STREQ("periodic((2), (0))", GetInductionInfo(shl, 0).c_str());
-  EXPECT_STREQ("periodic(( - (1)), (0))", GetInductionInfo(neg, 0).c_str());
+  EXPECT_STREQ("periodic(((1) + (100)), (100)):PrimInt", GetInductionInfo(add, 0).c_str());
+  EXPECT_STREQ("periodic(((1) - (100)), ((0) - (100))):PrimInt", GetInductionInfo(sub, 0).c_str());
+  EXPECT_STREQ("periodic((100), (0)):PrimInt", GetInductionInfo(mul, 0).c_str());
+  EXPECT_STREQ("periodic((2), (0)):PrimInt", GetInductionInfo(shl, 0).c_str());
+  EXPECT_STREQ("periodic(( - (1)), (0)):PrimInt", GetInductionInfo(neg, 0).c_str());
 }
 
 TEST_F(InductionVarAnalysisTest, FindDeepLoopInduction) {
@@ -534,15 +551,25 @@
   //   ..
   // }
   BuildLoopNest(10);
+
+  HPhi* k[10];
+  for (int d = 0; d < 10; d++) {
+    k[d] = InsertLoopPhi(0, d);
+  }
+
   HInstruction *inc = InsertInstruction(
-      new (&allocator_) HAdd(Primitive::kPrimInt, constant1_, InsertLocalLoad(induc_, 9)), 9);
-  InsertLocalStore(induc_, inc, 9);
-  HInstruction* store = InsertArrayStore(induc_, 9);
+      new (&allocator_) HAdd(Primitive::kPrimInt, constant1_, k[9]), 9);
+  HInstruction* store = InsertArrayStore(inc, 9);
+
+  for (int d = 0; d < 10; d++) {
+    k[d]->AddInput((d != 0) ? k[d - 1] : constant0_);
+    k[d]->AddInput((d != 9) ? k[d + 1] : inc);
+  }
   PerformInductionVarAnalysis();
 
   // Avoid exact phi number, since that depends on the SSA building phase.
   std::regex r("\\(\\(1\\) \\* i \\+ "
-               "\\(\\(1\\) \\+ \\(\\d+:Phi\\)\\)\\)");
+               "\\(\\(1\\) \\+ \\(\\d+:Phi\\)\\)\\):PrimInt");
 
   for (int d = 0; d < 10; d++) {
     if (d == 9) {
@@ -550,11 +577,149 @@
     } else {
       EXPECT_STREQ("", GetInductionInfo(store->InputAt(1), d).c_str());
     }
-    EXPECT_STREQ("((1) * i + (1))", GetInductionInfo(increment_[d], d).c_str());
+    EXPECT_STREQ("((1) * i + (1)):PrimInt", GetInductionInfo(increment_[d], d).c_str());
     // Trip-count.
-    EXPECT_STREQ("(TC-loop:(100))",
+    EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))",
                  GetInductionInfo(loop_header_[d]->GetLastInstruction(), d).c_str());
   }
 }
 
+TEST_F(InductionVarAnalysisTest, ByteInductionIntLoopControl) {
+  // Setup:
+  // for (int i = 0; i < 100; i++) {
+  //   k = (byte) i;
+  //   a[k] = 0;
+  //   a[i] = 0;
+  // }
+  BuildLoopNest(1);
+  HInstruction *conv = InsertInstruction(
+      new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], -1), 0);
+  HInstruction* store1 = InsertArrayStore(conv, 0);
+  HInstruction* store2 = InsertArrayStore(basic_[0], 0);
+  PerformInductionVarAnalysis();
+
+  // Regular int induction (i) is "transferred" over conversion into byte induction (k).
+  EXPECT_STREQ("((1) * i + (0)):PrimByte", GetInductionInfo(store1->InputAt(1), 0).c_str());
+  EXPECT_STREQ("((1) * i + (0)):PrimInt",  GetInductionInfo(store2->InputAt(1), 0).c_str());
+  EXPECT_STREQ("((1) * i + (1)):PrimInt",  GetInductionInfo(increment_[0], 0).c_str());
+
+  // Type matters!
+  EXPECT_FALSE(HaveSameInduction(store1->InputAt(1), store2->InputAt(1)));
+
+  // Trip-count.
+  EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))",
+               GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, ByteLoopControl1) {
+  // Setup:
+  // for (byte i = -128; i < 127; i++) {  // just fits!
+  // }
+  BuildLoopNest(1);
+  basic_[0]->ReplaceInput(graph_->GetIntConstant(-128), 0);
+  HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious();
+  ifs->ReplaceInput(graph_->GetIntConstant(127), 1);
+  HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimByte, increment_[0], -1);
+  loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext());
+  basic_[0]->ReplaceInput(conv, 1);
+  PerformInductionVarAnalysis();
+
+  EXPECT_STREQ("((1) * i + ((-128) + (1))):PrimByte", GetInductionInfo(increment_[0], 0).c_str());
+  // Trip-count.
+  EXPECT_STREQ("(((127) - (-128)) (TC-loop) ((-128) < (127)))",
+               GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, ByteLoopControl2) {
+  // Setup:
+  // for (byte i = -128; i < 128; i++) {  // infinite loop!
+  // }
+  BuildLoopNest(1);
+  basic_[0]->ReplaceInput(graph_->GetIntConstant(-128), 0);
+  HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious();
+  ifs->ReplaceInput(graph_->GetIntConstant(128), 1);
+  HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimByte, increment_[0], -1);
+  loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext());
+  basic_[0]->ReplaceInput(conv, 1);
+  PerformInductionVarAnalysis();
+
+  EXPECT_STREQ("((1) * i + ((-128) + (1))):PrimByte", GetInductionInfo(increment_[0], 0).c_str());
+  // Trip-count undefined.
+  EXPECT_STREQ("", GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, ShortLoopControl1) {
+  // Setup:
+  // for (short i = -32768; i < 32767; i++) {  // just fits!
+  // }
+  BuildLoopNest(1);
+  basic_[0]->ReplaceInput(graph_->GetIntConstant(-32768), 0);
+  HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious();
+  ifs->ReplaceInput(graph_->GetIntConstant(32767), 1);
+  HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimShort, increment_[0], -1);
+  loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext());
+  basic_[0]->ReplaceInput(conv, 1);
+  PerformInductionVarAnalysis();
+
+  EXPECT_STREQ("((1) * i + ((-32768) + (1))):PrimShort",
+               GetInductionInfo(increment_[0], 0).c_str());
+  // Trip-count.
+  EXPECT_STREQ("(((32767) - (-32768)) (TC-loop) ((-32768) < (32767)))",
+               GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, ShortLoopControl2) {
+  // Setup:
+  // for (short i = -32768; i < 32768; i++) {  // infinite loop!
+  // }
+  BuildLoopNest(1);
+  basic_[0]->ReplaceInput(graph_->GetIntConstant(-32768), 0);
+  HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious();
+  ifs->ReplaceInput(graph_->GetIntConstant(32768), 1);
+  HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimShort, increment_[0], -1);
+  loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext());
+  basic_[0]->ReplaceInput(conv, 1);
+  PerformInductionVarAnalysis();
+
+  EXPECT_STREQ("((1) * i + ((-32768) + (1))):PrimShort",
+               GetInductionInfo(increment_[0], 0).c_str());
+  // Trip-count undefined.
+  EXPECT_STREQ("", GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, CharLoopControl1) {
+  // Setup:
+  // for (char i = 0; i < 65535; i++) {  // just fits!
+  // }
+  BuildLoopNest(1);
+  HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious();
+  ifs->ReplaceInput(graph_->GetIntConstant(65535), 1);
+  HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimChar, increment_[0], -1);
+  loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext());
+  basic_[0]->ReplaceInput(conv, 1);
+  PerformInductionVarAnalysis();
+
+  EXPECT_STREQ("((1) * i + (1)):PrimChar", GetInductionInfo(increment_[0], 0).c_str());
+  // Trip-count.
+  EXPECT_STREQ("((65535) (TC-loop) ((0) < (65535)))",
+               GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, CharLoopControl2) {
+  // Setup:
+  // for (char i = 0; i < 65536; i++) {  // infinite loop!
+  // }
+  BuildLoopNest(1);
+  HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious();
+  ifs->ReplaceInput(graph_->GetIntConstant(65536), 1);
+  HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimChar, increment_[0], -1);
+  loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext());
+  basic_[0]->ReplaceInput(conv, 1);
+  PerformInductionVarAnalysis();
+
+  EXPECT_STREQ("((1) * i + (1)):PrimChar", GetInductionInfo(increment_[0], 0).c_str());
+  // Trip-count undefined.
+  EXPECT_STREQ("", GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index f4842f9..5e587e0 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -45,28 +45,26 @@
   return c2 != 0 && CanLongValueFitIntoInt(static_cast<int64_t>(c1) / static_cast<int64_t>(c2));
 }
 
-/** Returns true for 32/64-bit integral constant. */
-static bool IsIntAndGet(HInstruction* instruction, int32_t* value) {
+/** Returns true for 32/64-bit constant instruction. */
+static bool IsIntAndGet(HInstruction* instruction, int64_t* value) {
   if (instruction->IsIntConstant()) {
     *value = instruction->AsIntConstant()->GetValue();
     return true;
   } else if (instruction->IsLongConstant()) {
-    const int64_t c = instruction->AsLongConstant()->GetValue();
-    if (CanLongValueFitIntoInt(c)) {
-      *value = static_cast<int32_t>(c);
-      return true;
-    }
+    *value = instruction->AsLongConstant()->GetValue();
+    return true;
   }
   return false;
 }
 
 /**
- * An upper bound a * (length / a) + b, where a > 0, can be conservatively rewritten as length + b
+ * An upper bound a * (length / a) + b, where a >= 1, can be conservatively rewritten as length + b
  * because length >= 0 is true. This makes it more likely the bound is useful to clients.
  */
 static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) {
-  int32_t value;
-  if (v.a_constant > 1 &&
+  int64_t value;
+  if (v.is_known &&
+      v.a_constant >= 1 &&
       v.instruction->IsDiv() &&
       v.instruction->InputAt(0)->IsArrayLength() &&
       IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
@@ -75,10 +73,36 @@
   return v;
 }
 
-static HInstruction* Insert(HBasicBlock* preheader, HInstruction* instruction) {
-  DCHECK(preheader != nullptr);
+/** Helper method to test for a constant value. */
+static bool IsConstantValue(InductionVarRange::Value v) {
+  return v.is_known && v.a_constant == 0;
+}
+
+/** Corrects a value for type to account for arithmetic wrap-around in lower precision. */
+static InductionVarRange::Value CorrectForType(InductionVarRange::Value v, Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimByte: {
+      // Constants within range only.
+      // TODO: maybe some room for improvement, like allowing widening conversions
+      const int32_t min = Primitive::MinValueOfIntegralType(type);
+      const int32_t max = Primitive::MaxValueOfIntegralType(type);
+      return (IsConstantValue(v) && min <= v.b_constant && v.b_constant <= max)
+          ? v
+          : InductionVarRange::Value();
+    }
+    default:
+      return v;
+  }
+}
+
+/** Helper method to insert an instruction. */
+static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
+  DCHECK(block != nullptr);
+  DCHECK(block->GetLastInstruction() != nullptr) << block->GetBlockId();
   DCHECK(instruction != nullptr);
-  preheader->InsertInstructionBefore(instruction, preheader->GetLastInstruction());
+  block->InsertInstructionBefore(instruction, block->GetLastInstruction());
   return instruction;
 }
 
@@ -87,82 +111,303 @@
 //
 
 InductionVarRange::InductionVarRange(HInductionVarAnalysis* induction_analysis)
-    : induction_analysis_(induction_analysis) {
+    : induction_analysis_(induction_analysis),
+      chase_hint_(nullptr) {
   DCHECK(induction_analysis != nullptr);
 }
 
-InductionVarRange::Value InductionVarRange::GetMinInduction(HInstruction* context,
-                                                            HInstruction* instruction) {
-  return GetInduction(context, instruction, /* is_min */ true);
-}
-
-InductionVarRange::Value InductionVarRange::GetMaxInduction(HInstruction* context,
-                                                            HInstruction* instruction) {
-  return SimplifyMax(GetInduction(context, instruction, /* is_min */ false));
+bool InductionVarRange::GetInductionRange(HInstruction* context,
+                                          HInstruction* instruction,
+                                          HInstruction* chase_hint,
+                                          /*out*/Value* min_val,
+                                          /*out*/Value* max_val,
+                                          /*out*/bool* needs_finite_test) {
+  HLoopInformation* loop = nullptr;
+  HInductionVarAnalysis::InductionInfo* info = nullptr;
+  HInductionVarAnalysis::InductionInfo* trip = nullptr;
+  if (!HasInductionInfo(context, instruction, &loop, &info, &trip)) {
+    return false;
+  }
+  // Type int or lower (this is not too restrictive since intended clients, like
+  // bounds check elimination, will have truncated higher precision induction
+  // at their use point already).
+  switch (info->type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimByte:
+      break;
+    default:
+      return false;
+  }
+  // Find range.
+  chase_hint_ = chase_hint;
+  bool in_body = context->GetBlock() != loop->GetHeader();
+  *min_val = GetVal(info, trip, in_body, /* is_min */ true);
+  *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false));
+  *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
+  return true;
 }
 
 bool InductionVarRange::CanGenerateCode(HInstruction* context,
                                         HInstruction* instruction,
-                                        /*out*/bool* top_test) {
-  return GenerateCode(context, instruction, nullptr, nullptr, nullptr, nullptr, top_test);
+                                        /*out*/bool* needs_finite_test,
+                                        /*out*/bool* needs_taken_test) {
+  return GenerateCode(context,
+                      instruction,
+                      nullptr, nullptr, nullptr, nullptr, nullptr,  // nothing generated yet
+                      needs_finite_test,
+                      needs_taken_test);
 }
 
-bool InductionVarRange::GenerateCode(HInstruction* context,
-                                     HInstruction* instruction,
-                                     HGraph* graph,
-                                     HBasicBlock* block,
-                                     /*out*/HInstruction** lower,
-                                     /*out*/HInstruction** upper) {
-  return GenerateCode(context, instruction, graph, block, lower, upper, nullptr);
+void InductionVarRange::GenerateRangeCode(HInstruction* context,
+                                          HInstruction* instruction,
+                                          HGraph* graph,
+                                          HBasicBlock* block,
+                                          /*out*/HInstruction** lower,
+                                          /*out*/HInstruction** upper) {
+  bool b1, b2;  // unused
+  if (!GenerateCode(context, instruction, graph, block, lower, upper, nullptr, &b1, &b2)) {
+    LOG(FATAL) << "Failed precondition: GenerateCode()";
+  }
+}
+
+void InductionVarRange::GenerateTakenTest(HInstruction* context,
+                                          HGraph* graph,
+                                          HBasicBlock* block,
+                                          /*out*/HInstruction** taken_test) {
+  bool b1, b2;  // unused
+  if (!GenerateCode(context, context, graph, block, nullptr, nullptr, taken_test, &b1, &b2)) {
+    LOG(FATAL) << "Failed precondition: GenerateCode()";
+  }
 }
 
 //
 // Private class methods.
 //
 
-InductionVarRange::Value InductionVarRange::GetInduction(HInstruction* context,
-                                                         HInstruction* instruction,
-                                                         bool is_min) {
-  HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
-  if (loop != nullptr) {
-    HBasicBlock* header = loop->GetHeader();
-    bool in_body = context->GetBlock() != header;
-    return GetVal(induction_analysis_->LookupInfo(loop, instruction),
-                  induction_analysis_->LookupInfo(loop, header->GetLastInstruction()),
-                  in_body,
-                  is_min);
+bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info,
+                                   ConstantRequest request,
+                                   /*out*/ int64_t* value) const {
+  if (info != nullptr) {
+    // A direct 32-bit or 64-bit constant fetch. This immediately satisfies
+    // any of the three requests (kExact, kAtMost, and KAtLeast).
+    if (info->induction_class == HInductionVarAnalysis::kInvariant &&
+        info->operation == HInductionVarAnalysis::kFetch) {
+      if (IsIntAndGet(info->fetch, value)) {
+        return true;
+      }
+    }
+    // Try range analysis on the invariant, but only on proper range to avoid wrap-around anomalies.
+    Value min_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ true);
+    Value max_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ false);
+    if (IsConstantValue(min_val) &&
+        IsConstantValue(max_val) && min_val.b_constant <= max_val.b_constant) {
+      if ((request == kExact && min_val.b_constant == max_val.b_constant) || request == kAtMost) {
+        *value = max_val.b_constant;
+        return true;
+      } else if (request == kAtLeast) {
+        *value = min_val.b_constant;
+        return true;
+      }
+    }
   }
-  return Value();
+  return false;
+}
+
+bool InductionVarRange::HasInductionInfo(
+    HInstruction* context,
+    HInstruction* instruction,
+    /*out*/ HLoopInformation** loop,
+    /*out*/ HInductionVarAnalysis::InductionInfo** info,
+    /*out*/ HInductionVarAnalysis::InductionInfo** trip) const {
+  HLoopInformation* l = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
+  if (l != nullptr) {
+    HInductionVarAnalysis::InductionInfo* i = induction_analysis_->LookupInfo(l, instruction);
+    if (i != nullptr) {
+      *loop = l;
+      *info = i;
+      *trip = induction_analysis_->LookupInfo(l, l->GetHeader()->GetLastInstruction());
+      return true;
+    }
+  }
+  return false;
+}
+
+bool InductionVarRange::IsWellBehavedTripCount(HInductionVarAnalysis::InductionInfo* trip) const {
+  if (trip != nullptr) {
+    // Both bounds that define a trip-count are well-behaved if they either are not defined
+    // in any loop, or are contained in a proper interval. This allows finding the min/max
+    // of an expression by chasing outward.
+    InductionVarRange range(induction_analysis_);
+    HInductionVarAnalysis::InductionInfo* lower = trip->op_b->op_a;
+    HInductionVarAnalysis::InductionInfo* upper = trip->op_b->op_b;
+    int64_t not_used = 0;
+    return (!HasFetchInLoop(lower) || range.IsConstant(lower, kAtLeast, &not_used)) &&
+           (!HasFetchInLoop(upper) || range.IsConstant(upper, kAtLeast, &not_used));
+  }
+  return true;
+}
+
+bool InductionVarRange::HasFetchInLoop(HInductionVarAnalysis::InductionInfo* info) const {
+  if (info != nullptr) {
+    if (info->induction_class == HInductionVarAnalysis::kInvariant &&
+        info->operation == HInductionVarAnalysis::kFetch) {
+      return info->fetch->GetBlock()->GetLoopInformation() != nullptr;
+    }
+    return HasFetchInLoop(info->op_a) || HasFetchInLoop(info->op_b);
+  }
+  return false;
+}
+
+bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const {
+  if (info != nullptr) {
+    if (info->induction_class == HInductionVarAnalysis::kLinear) {
+      return true;
+    } else if (info->induction_class == HInductionVarAnalysis::kWrapAround) {
+      return NeedsTripCount(info->op_b);
+    }
+  }
+  return false;
+}
+
+bool InductionVarRange::IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) const {
+  if (trip != nullptr) {
+    if (trip->induction_class == HInductionVarAnalysis::kInvariant) {
+      return trip->operation == HInductionVarAnalysis::kTripCountInBody ||
+             trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe;
+    }
+  }
+  return false;
+}
+
+bool InductionVarRange::IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) const {
+  if (trip != nullptr) {
+    if (trip->induction_class == HInductionVarAnalysis::kInvariant) {
+      return trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe ||
+             trip->operation == HInductionVarAnalysis::kTripCountInLoopUnsafe;
+    }
+  }
+  return false;
+}
+
+InductionVarRange::Value InductionVarRange::GetLinear(HInductionVarAnalysis::InductionInfo* info,
+                                                      HInductionVarAnalysis::InductionInfo* trip,
+                                                      bool in_body,
+                                                      bool is_min) const {
+  // Detect common situation where an offset inside the trip-count cancels out during range
+  // analysis (finding max a * (TC - 1) + OFFSET for a == 1 and TC = UPPER - OFFSET or finding
+  // min a * (TC - 1) + OFFSET for a == -1 and TC = OFFSET - UPPER) to avoid losing information
+  // with intermediate results that only incorporate single instructions.
+  if (trip != nullptr) {
+    HInductionVarAnalysis::InductionInfo* trip_expr = trip->op_a;
+    if (trip_expr->type == info->type && trip_expr->operation == HInductionVarAnalysis::kSub) {
+      int64_t stride_value = 0;
+      if (IsConstant(info->op_a, kExact, &stride_value)) {
+        if (!is_min && stride_value == 1) {
+          // Test original trip's negative operand (trip_expr->op_b) against offset of induction.
+          if (HInductionVarAnalysis::InductionEqual(trip_expr->op_b, info->op_b)) {
+            // Analyze cancelled trip with just the positive operand (trip_expr->op_a).
+            HInductionVarAnalysis::InductionInfo cancelled_trip(
+                trip->induction_class,
+                trip->operation,
+                trip_expr->op_a,
+                trip->op_b,
+                nullptr,
+                trip->type);
+            return GetVal(&cancelled_trip, trip, in_body, is_min);
+          }
+        } else if (is_min && stride_value == -1) {
+          // Test original trip's positive operand (trip_expr->op_a) against offset of induction.
+          if (HInductionVarAnalysis::InductionEqual(trip_expr->op_a, info->op_b)) {
+            // Analyze cancelled trip with just the negative operand (trip_expr->op_b).
+            HInductionVarAnalysis::InductionInfo neg(
+                HInductionVarAnalysis::kInvariant,
+                HInductionVarAnalysis::kNeg,
+                nullptr,
+                trip_expr->op_b,
+                nullptr,
+                trip->type);
+            HInductionVarAnalysis::InductionInfo cancelled_trip(
+                trip->induction_class, trip->operation, &neg, trip->op_b, nullptr, trip->type);
+            return SubValue(Value(0), GetVal(&cancelled_trip, trip, in_body, !is_min));
+          }
+        }
+      }
+    }
+  }
+  // General rule of linear induction a * i + b, for normalized 0 <= i < TC.
+  return AddValue(GetMul(info->op_a, trip, trip, in_body, is_min),
+                  GetVal(info->op_b, trip, in_body, is_min));
 }
 
 InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction,
                                                      HInductionVarAnalysis::InductionInfo* trip,
                                                      bool in_body,
-                                                     bool is_min) {
-  // Detect constants and chase the fetch a bit deeper into the HIR tree, so that it becomes
-  // more likely range analysis will compare the same instructions as terminal nodes.
-  int32_t value;
-  if (IsIntAndGet(instruction, &value)) {
-    return Value(value);
-  } else if (instruction->IsAdd()) {
-    if (IsIntAndGet(instruction->InputAt(0), &value)) {
-      return AddValue(Value(value), GetFetch(instruction->InputAt(1), trip, in_body, is_min));
-    } else if (IsIntAndGet(instruction->InputAt(1), &value)) {
-      return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), Value(value));
-    }
-  } else if (is_min) {
-    // Special case for finding minimum: minimum of trip-count in loop-body is 1.
-    if (trip != nullptr && in_body && instruction == trip->op_b->fetch) {
+                                                     bool is_min) const {
+  // Stop chasing the instruction at constant or hint.
+  int64_t value;
+  if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
+    return Value(static_cast<int32_t>(value));
+  } else if (instruction == chase_hint_) {
+    return Value(instruction, 1, 0);
+  }
+  // Special cases when encountering a single instruction that denotes trip count in the
+  // loop-body: min is 1 and, when chasing constants, max of safe trip-count is max int
+  if (in_body && trip != nullptr && instruction == trip->op_a->fetch) {
+    if (is_min) {
       return Value(1);
+    } else if (chase_hint_ == nullptr && !IsUnsafeTripCount(trip)) {
+      return Value(std::numeric_limits<int32_t>::max());
     }
   }
+  // Chase the instruction a bit deeper into the HIR tree, so that it becomes more likely
+  // range analysis will compare the same instructions as terminal nodes.
+  if (instruction->IsAdd()) {
+    if (IsIntAndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) {
+      return AddValue(Value(static_cast<int32_t>(value)),
+                      GetFetch(instruction->InputAt(1), trip, in_body, is_min));
+    } else if (IsIntAndGet(instruction->InputAt(1), &value) && CanLongValueFitIntoInt(value)) {
+      return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min),
+                      Value(static_cast<int32_t>(value)));
+    }
+  } else if (instruction->IsArrayLength()) {
+    // Return extreme values when chasing constants. Otherwise, chase deeper.
+    if (chase_hint_ == nullptr) {
+      return is_min ? Value(0) : Value(std::numeric_limits<int32_t>::max());
+    } else if (instruction->InputAt(0)->IsNewArray()) {
+      return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min);
+    }
+  } else if (instruction->IsTypeConversion()) {
+    // Since analysis is 32-bit (or narrower) we allow a widening along the path.
+    if (instruction->AsTypeConversion()->GetInputType() == Primitive::kPrimInt &&
+        instruction->AsTypeConversion()->GetResultType() == Primitive::kPrimLong) {
+      return GetFetch(instruction->InputAt(0), trip, in_body, is_min);
+    }
+  }
+  // Chase an invariant fetch that is defined by an outer loop if the trip-count used
+  // so far is well-behaved in both bounds and the next trip-count is safe.
+  // Example:
+  //   for (int i = 0; i <= 100; i++)  // safe
+  //     for (int j = 0; j <= i; j++)  // well-behaved
+  //       j is in range [0, i  ] (if i is chase hint)
+  //         or in range [0, 100] (otherwise)
+  HLoopInformation* next_loop = nullptr;
+  HInductionVarAnalysis::InductionInfo* next_info = nullptr;
+  HInductionVarAnalysis::InductionInfo* next_trip = nullptr;
+  bool next_in_body = true;  // inner loop is always in body of outer loop
+  if (HasInductionInfo(instruction, instruction, &next_loop, &next_info, &next_trip) &&
+      IsWellBehavedTripCount(trip) &&
+      !IsUnsafeTripCount(next_trip)) {
+    return GetVal(next_info, next_trip, next_in_body, is_min);
+  }
   return Value(instruction, 1, 0);
 }
 
 InductionVarRange::Value InductionVarRange::GetVal(HInductionVarAnalysis::InductionInfo* info,
                                                    HInductionVarAnalysis::InductionInfo* trip,
                                                    bool in_body,
-                                                   bool is_min) {
+                                                   bool is_min) const {
   if (info != nullptr) {
     switch (info->induction_class) {
       case HInductionVarAnalysis::kInvariant:
@@ -184,15 +429,17 @@
           case HInductionVarAnalysis::kFetch:
             return GetFetch(info->fetch, trip, in_body, is_min);
           case HInductionVarAnalysis::kTripCountInLoop:
+          case HInductionVarAnalysis::kTripCountInLoopUnsafe:
             if (!in_body && !is_min) {  // one extra!
-              return GetVal(info->op_b, trip, in_body, is_min);
+              return GetVal(info->op_a, trip, in_body, is_min);
             }
             FALLTHROUGH_INTENDED;
           case HInductionVarAnalysis::kTripCountInBody:
+          case HInductionVarAnalysis::kTripCountInBodyUnsafe:
             if (is_min) {
               return Value(0);
             } else if (in_body) {
-              return SubValue(GetVal(info->op_b, trip, in_body, is_min), Value(1));
+              return SubValue(GetVal(info->op_a, trip, in_body, is_min), Value(1));
             }
             break;
           default:
@@ -200,12 +447,9 @@
         }
         break;
       case HInductionVarAnalysis::kLinear:
-        // Linear induction a * i + b, for normalized 0 <= i < TC.
-        return AddValue(GetMul(info->op_a, trip, trip, in_body, is_min),
-                        GetVal(info->op_b, trip, in_body, is_min));
+        return CorrectForType(GetLinear(info, trip, in_body, is_min), info->type);
       case HInductionVarAnalysis::kWrapAround:
       case HInductionVarAnalysis::kPeriodic:
-        // Merge values in the wrap-around/periodic.
         return MergeVal(GetVal(info->op_a, trip, in_body, is_min),
                         GetVal(info->op_b, trip, in_body, is_min), is_min);
     }
@@ -217,28 +461,33 @@
                                                    HInductionVarAnalysis::InductionInfo* info2,
                                                    HInductionVarAnalysis::InductionInfo* trip,
                                                    bool in_body,
-                                                   bool is_min) {
+                                                   bool is_min) const {
+  // Constant times range.
+  int64_t value = 0;
+  if (IsConstant(info1, kExact, &value)) {
+    return MulRangeAndConstant(value, info2, trip, in_body, is_min);
+  } else if (IsConstant(info2, kExact, &value)) {
+    return MulRangeAndConstant(value, info1, trip, in_body, is_min);
+  }
+  // Interval ranges.
   Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true);
   Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false);
   Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true);
   Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false);
-  if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) {
-    // Positive range vs. positive or negative range.
-    if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
-      return is_min ? MulValue(v1_min, v2_min)
-                    : MulValue(v1_max, v2_max);
-    } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) {
-      return is_min ? MulValue(v1_max, v2_min)
-                    : MulValue(v1_min, v2_max);
+  // Positive range vs. positive or negative range.
+  if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) {
+    if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) {
+      return is_min ? MulValue(v1_min, v2_min) : MulValue(v1_max, v2_max);
+    } else if (IsConstantValue(v2_max) && v2_max.b_constant <= 0) {
+      return is_min ? MulValue(v1_max, v2_min) : MulValue(v1_min, v2_max);
     }
-  } else if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant <= 0) {
-    // Negative range vs. positive or negative range.
-    if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
-      return is_min ? MulValue(v1_min, v2_max)
-                    : MulValue(v1_max, v2_min);
-    } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) {
-      return is_min ? MulValue(v1_max, v2_max)
-                    : MulValue(v1_min, v2_min);
+  }
+  // Negative range vs. positive or negative range.
+  if (IsConstantValue(v1_max) && v1_max.b_constant <= 0) {
+    if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) {
+      return is_min ? MulValue(v1_min, v2_max) : MulValue(v1_max, v2_min);
+    } else if (IsConstantValue(v2_max) && v2_max.b_constant <= 0) {
+      return is_min ? MulValue(v1_max, v2_max) : MulValue(v1_min, v2_min);
     }
   }
   return Value();
@@ -248,46 +497,63 @@
                                                    HInductionVarAnalysis::InductionInfo* info2,
                                                    HInductionVarAnalysis::InductionInfo* trip,
                                                    bool in_body,
-                                                   bool is_min) {
+                                                   bool is_min) const {
+  // Range divided by constant.
+  int64_t value = 0;
+  if (IsConstant(info2, kExact, &value)) {
+    return DivRangeAndConstant(value, info1, trip, in_body, is_min);
+  }
+  // Interval ranges.
   Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true);
   Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false);
   Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true);
   Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false);
-  if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) {
-    // Positive range vs. positive or negative range.
-    if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
-      return is_min ? DivValue(v1_min, v2_max)
-                    : DivValue(v1_max, v2_min);
-    } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) {
-      return is_min ? DivValue(v1_max, v2_max)
-                    : DivValue(v1_min, v2_min);
+  // Positive range vs. positive or negative range.
+  if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) {
+    if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) {
+      return is_min ? DivValue(v1_min, v2_max) : DivValue(v1_max, v2_min);
+    } else if (IsConstantValue(v2_max) && v2_max.b_constant <= 0) {
+      return is_min ? DivValue(v1_max, v2_max) : DivValue(v1_min, v2_min);
     }
-  } else if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant <= 0) {
-    // Negative range vs. positive or negative range.
-    if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
-      return is_min ? DivValue(v1_min, v2_min)
-                    : DivValue(v1_max, v2_max);
-    } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) {
-      return is_min ? DivValue(v1_max, v2_min)
-                    : DivValue(v1_min, v2_max);
+  }
+  // Negative range vs. positive or negative range.
+  if (IsConstantValue(v1_max) && v1_max.b_constant <= 0) {
+    if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) {
+      return is_min ? DivValue(v1_min, v2_min) : DivValue(v1_max, v2_max);
+    } else if (IsConstantValue(v2_max) && v2_max.b_constant <= 0) {
+      return is_min ? DivValue(v1_max, v2_min) : DivValue(v1_min, v2_max);
     }
   }
   return Value();
 }
 
-bool InductionVarRange::GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t *value) {
-  Value v_min = GetVal(info, nullptr, false, /* is_min */ true);
-  Value v_max = GetVal(info, nullptr, false, /* is_min */ false);
-  if (v_min.is_known && v_max.is_known) {
-    if (v_min.a_constant == 0 && v_max.a_constant == 0 && v_min.b_constant == v_max.b_constant) {
-      *value = v_min.b_constant;
-      return true;
-    }
+InductionVarRange::Value InductionVarRange::MulRangeAndConstant(
+    int64_t value,
+    HInductionVarAnalysis::InductionInfo* info,
+    HInductionVarAnalysis::InductionInfo* trip,
+    bool in_body,
+    bool is_min) const {
+  if (CanLongValueFitIntoInt(value)) {
+    Value c(static_cast<int32_t>(value));
+    return MulValue(GetVal(info, trip, in_body, is_min == value >= 0), c);
   }
-  return false;
+  return Value();
 }
 
-InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) {
+InductionVarRange::Value InductionVarRange::DivRangeAndConstant(
+    int64_t value,
+    HInductionVarAnalysis::InductionInfo* info,
+    HInductionVarAnalysis::InductionInfo* trip,
+    bool in_body,
+    bool is_min) const {
+  if (CanLongValueFitIntoInt(value)) {
+    Value c(static_cast<int32_t>(value));
+    return DivValue(GetVal(info, trip, in_body, is_min == value >= 0), c);
+  }
+  return Value();
+}
+
+InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) const {
   if (v1.is_known && v2.is_known && IsSafeAdd(v1.b_constant, v2.b_constant)) {
     const int32_t b = v1.b_constant + v2.b_constant;
     if (v1.a_constant == 0) {
@@ -301,7 +567,7 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::SubValue(Value v1, Value v2) {
+InductionVarRange::Value InductionVarRange::SubValue(Value v1, Value v2) const {
   if (v1.is_known && v2.is_known && IsSafeSub(v1.b_constant, v2.b_constant)) {
     const int32_t b = v1.b_constant - v2.b_constant;
     if (v1.a_constant == 0 && IsSafeSub(0, v2.a_constant)) {
@@ -315,7 +581,7 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::MulValue(Value v1, Value v2) {
+InductionVarRange::Value InductionVarRange::MulValue(Value v1, Value v2) const {
   if (v1.is_known && v2.is_known) {
     if (v1.a_constant == 0) {
       if (IsSafeMul(v1.b_constant, v2.a_constant) && IsSafeMul(v1.b_constant, v2.b_constant)) {
@@ -330,7 +596,7 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::DivValue(Value v1, Value v2) {
+InductionVarRange::Value InductionVarRange::DivValue(Value v1, Value v2) const {
   if (v1.is_known && v2.is_known && v1.a_constant == 0 && v2.a_constant == 0) {
     if (IsSafeDiv(v1.b_constant, v2.b_constant)) {
       return Value(v1.b_constant / v2.b_constant);
@@ -339,7 +605,7 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is_min) {
+InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is_min) const {
   if (v1.is_known && v2.is_known) {
     if (v1.instruction == v2.instruction && v1.a_constant == v2.a_constant) {
       return Value(v1.instruction, v1.a_constant,
@@ -356,27 +622,40 @@
                                      HBasicBlock* block,
                                      /*out*/HInstruction** lower,
                                      /*out*/HInstruction** upper,
-                                     /*out*/bool* top_test) {
-  HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
-  if (loop != nullptr) {
-    HBasicBlock* header = loop->GetHeader();
-    bool in_body = context->GetBlock() != header;
-    HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction);
-    HInductionVarAnalysis::InductionInfo* trip =
-        induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
-    if (info != nullptr && trip != nullptr) {
-      if (top_test != nullptr) {
-        *top_test = trip->operation != HInductionVarAnalysis::kTripCountInLoop;
-      }
-      return
-        // Success on lower if invariant (not set), or code can be generated.
-        ((info->induction_class == HInductionVarAnalysis::kInvariant) ||
-            GenerateCode(info, trip, graph, block, lower, in_body, /* is_min */ true)) &&
-        // And success on upper.
-        GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false);
+                                     /*out*/HInstruction** taken_test,
+                                     /*out*/bool* needs_finite_test,
+                                     /*out*/bool* needs_taken_test) const {
+  HLoopInformation* loop = nullptr;
+  HInductionVarAnalysis::InductionInfo* info = nullptr;
+  HInductionVarAnalysis::InductionInfo* trip = nullptr;
+  if (!HasInductionInfo(context, instruction, &loop, &info, &trip) || trip == nullptr) {
+    return false;  // codegen needs all information, including tripcount
+  }
+  // Determine what tests are needed. A finite test is needed if the evaluation code uses the
+  // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot"
+  // the computed range). A taken test is needed for any unknown trip-count, even if evaluation
+  // code does not use the trip-count explicitly (since there could be an implicit relation
+  // between e.g. an invariant subscript and a not-taken condition).
+  bool in_body = context->GetBlock() != loop->GetHeader();
+  *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
+  *needs_taken_test = IsBodyTripCount(trip);
+  // Code generation for taken test: generate the code when requested or otherwise analyze
+  // if code generation is feasible when taken test is needed.
+  if (taken_test != nullptr) {
+    return GenerateCode(trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min */ false);
+  } else if (*needs_taken_test) {
+    if (!GenerateCode(
+        trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min */ false)) {
+      return false;
     }
   }
-  return false;
+  // Code generation for lower and upper.
+  return
+      // Success on lower if invariant (not set), or code can be generated.
+      ((info->induction_class == HInductionVarAnalysis::kInvariant) ||
+          GenerateCode(info, trip, graph, block, lower, in_body, /* is_min */ true)) &&
+      // And success on upper.
+      GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false);
 }
 
 bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
@@ -385,21 +664,44 @@
                                      HBasicBlock* block,
                                      /*out*/HInstruction** result,
                                      bool in_body,
-                                     bool is_min) {
+                                     bool is_min) const {
   if (info != nullptr) {
+    // Verify type safety.
     Primitive::Type type = Primitive::kPrimInt;
+    if (info->type != type) {
+      return false;
+    }
+    // Handle current operation.
     HInstruction* opa = nullptr;
     HInstruction* opb = nullptr;
-    int32_t value = 0;
     switch (info->induction_class) {
       case HInductionVarAnalysis::kInvariant:
         // Invariants.
         switch (info->operation) {
           case HInductionVarAnalysis::kAdd:
+          case HInductionVarAnalysis::kLT:
+          case HInductionVarAnalysis::kLE:
+          case HInductionVarAnalysis::kGT:
+          case HInductionVarAnalysis::kGE:
             if (GenerateCode(info->op_a, trip, graph, block, &opa, in_body, is_min) &&
                 GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
               if (graph != nullptr) {
-                *result = Insert(block, new (graph->GetArena()) HAdd(type, opa, opb));
+                HInstruction* operation = nullptr;
+                switch (info->operation) {
+                  case HInductionVarAnalysis::kAdd:
+                    operation = new (graph->GetArena()) HAdd(type, opa, opb); break;
+                  case HInductionVarAnalysis::kLT:
+                    operation = new (graph->GetArena()) HLessThan(opa, opb); break;
+                  case HInductionVarAnalysis::kLE:
+                    operation = new (graph->GetArena()) HLessThanOrEqual(opa, opb); break;
+                  case HInductionVarAnalysis::kGT:
+                    operation = new (graph->GetArena()) HGreaterThan(opa, opb); break;
+                  case HInductionVarAnalysis::kGE:
+                    operation = new (graph->GetArena()) HGreaterThanOrEqual(opa, opb); break;
+                  default:
+                    LOG(FATAL) << "unknown operation";
+                }
+                *result = Insert(block, operation);
               }
               return true;
             }
@@ -427,18 +729,20 @@
             }
             return true;
           case HInductionVarAnalysis::kTripCountInLoop:
+          case HInductionVarAnalysis::kTripCountInLoopUnsafe:
             if (!in_body && !is_min) {  // one extra!
-              return GenerateCode(info->op_b, trip, graph, block, result, in_body, is_min);
+              return GenerateCode(info->op_a, trip, graph, block, result, in_body, is_min);
             }
             FALLTHROUGH_INTENDED;
           case HInductionVarAnalysis::kTripCountInBody:
+          case HInductionVarAnalysis::kTripCountInBodyUnsafe:
             if (is_min) {
               if (graph != nullptr) {
                 *result = graph->GetIntConstant(0);
               }
               return true;
             } else if (in_body) {
-              if (GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
+              if (GenerateCode(info->op_a, trip, graph, block, &opb, in_body, is_min)) {
                 if (graph != nullptr) {
                   *result = Insert(block,
                                    new (graph->GetArena())
@@ -452,23 +756,44 @@
             break;
         }
         break;
-      case HInductionVarAnalysis::kLinear:
+      case HInductionVarAnalysis::kLinear: {
         // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
         // to avoid arithmetic wrap-around situations that are hard to guard against.
-        if (GetConstant(info->op_a, &value)) {
-          if (value == 1 || value == -1) {
-            const bool is_min_a = value == 1 ? is_min : !is_min;
+        int64_t stride_value = 0;
+        if (IsConstant(info->op_a, kExact, &stride_value)) {
+          if (stride_value == 1 || stride_value == -1) {
+            const bool is_min_a = stride_value == 1 ? is_min : !is_min;
             if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
                 GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
               if (graph != nullptr) {
-                *result = Insert(block, new (graph->GetArena()) HAdd(type, opa, opb));
+                HInstruction* oper;
+                if (stride_value == 1) {
+                  oper = new (graph->GetArena()) HAdd(type, opa, opb);
+                } else {
+                  oper = new (graph->GetArena()) HSub(type, opb, opa);
+                }
+                *result = Insert(block, oper);
               }
               return true;
             }
           }
         }
         break;
-      default:  // TODO(ajcbik): add more cases
+      }
+      case HInductionVarAnalysis::kWrapAround:
+      case HInductionVarAnalysis::kPeriodic: {
+        // Wrap-around and periodic inductions are restricted to constants only, so that extreme
+        // values are easy to test at runtime without complications of arithmetic wrap-around.
+        Value extreme = GetVal(info, trip, in_body, is_min);
+        if (IsConstantValue(extreme)) {
+          if (graph != nullptr) {
+            *result = graph->GetIntConstant(extreme.b_constant);
+          }
+          return true;
+        }
+        break;
+      }
+      default:
         break;
     }
   }
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 7fa5a26..00aaa16 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -57,29 +57,35 @@
   explicit InductionVarRange(HInductionVarAnalysis* induction);
 
   /**
-   * Given a context denoted by the first instruction, returns a,
-   * possibly conservative, lower bound on the instruction's value.
+   * Given a context denoted by the first instruction, returns a possibly conservative lower
+   * and upper bound on the instruction's value in the output parameters min_val and max_val,
+   * respectively. The need_finite_test flag denotes if an additional finite-test is needed
+   * to protect the range evaluation inside its loop. The parameter chase_hint defines an
+   * instruction at which chasing may stop. Returns false on failure.
    */
-  Value GetMinInduction(HInstruction* context, HInstruction* instruction);
+  bool GetInductionRange(HInstruction* context,
+                         HInstruction* instruction,
+                         HInstruction* chase_hint,
+                         /*out*/ Value* min_val,
+                         /*out*/ Value* max_val,
+                         /*out*/ bool* needs_finite_test);
 
   /**
-   * Given a context denoted by the first instruction, returns a,
-   * possibly conservative, upper bound on the instruction's value.
+   * Returns true if range analysis is able to generate code for the lower and upper
+   * bound expressions on the instruction in the given context. The need_finite_test
+   * and need_taken test flags denote if an additional finite-test and/or taken-test
+   * are needed to protect the range evaluation inside its loop.
    */
-  Value GetMaxInduction(HInstruction* context, HInstruction* instruction);
-
-  /**
-   * Returns true if range analysis is able to generate code for the lower and upper bound
-   * expressions on the instruction in the given context. Output parameter top_test denotes
-   * whether a top test is needed to protect the trip-count expression evaluation.
-   */
-  bool CanGenerateCode(HInstruction* context, HInstruction* instruction, /*out*/bool* top_test);
+  bool CanGenerateCode(HInstruction* context,
+                       HInstruction* instruction,
+                       /*out*/ bool* needs_finite_test,
+                       /*out*/ bool* needs_taken_test);
 
   /**
    * Generates the actual code in the HIR for the lower and upper bound expressions on the
    * instruction in the given context. Code for the lower and upper bound expression are
-   * generated in given block and graph and are returned in lower and upper, respectively.
-   * For a loop invariant, lower is not set.
+   * generated in given block and graph and are returned in the output parameters lower and
+   * upper, respectively. For a loop invariant, lower is not set.
    *
    * For example, given expression x+i with range [0, 5] for i, calling this method
    * will generate the following sequence:
@@ -87,71 +93,126 @@
    * block:
    *   lower: add x, 0
    *   upper: add x, 5
+   *
+   * Precondition: CanGenerateCode() returns true.
    */
-  bool GenerateCode(HInstruction* context,
-                    HInstruction* instruction,
-                    HGraph* graph,
-                    HBasicBlock* block,
-                    /*out*/HInstruction** lower,
-                    /*out*/HInstruction** upper);
-
- private:
-  //
-  // Private helper methods.
-  //
-
-  Value GetInduction(HInstruction* context, HInstruction* instruction, bool is_min);
-
-  static Value GetFetch(HInstruction* instruction,
-                        HInductionVarAnalysis::InductionInfo* trip,
-                        bool in_body,
-                        bool is_min);
-  static Value GetVal(HInductionVarAnalysis::InductionInfo* info,
-                      HInductionVarAnalysis::InductionInfo* trip,
-                      bool in_body,
-                      bool is_min);
-  static Value GetMul(HInductionVarAnalysis::InductionInfo* info1,
-                      HInductionVarAnalysis::InductionInfo* info2,
-                      HInductionVarAnalysis::InductionInfo* trip,
-                      bool in_body,
-                      bool is_min);
-  static Value GetDiv(HInductionVarAnalysis::InductionInfo* info1,
-                      HInductionVarAnalysis::InductionInfo* info2,
-                      HInductionVarAnalysis::InductionInfo* trip,
-                      bool in_body,
-                      bool is_min);
-
-  static bool GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t *value);
-
-  static Value AddValue(Value v1, Value v2);
-  static Value SubValue(Value v1, Value v2);
-  static Value MulValue(Value v1, Value v2);
-  static Value DivValue(Value v1, Value v2);
-  static Value MergeVal(Value v1, Value v2, bool is_min);
+  void GenerateRangeCode(HInstruction* context,
+                         HInstruction* instruction,
+                         HGraph* graph,
+                         HBasicBlock* block,
+                         /*out*/ HInstruction** lower,
+                         /*out*/ HInstruction** upper);
 
   /**
-   * Generates code for lower/upper expression in the HIR. Returns true on success.
-   * With graph == nullptr, the method can be used to determine if code generation
+   * Generates explicit taken-test for the loop in the given context. Code is generated in
+   * given block and graph. The taken-test is returned in parameter test.
+   *
+   * Precondition: CanGenerateCode() returns true and needs_taken_test is set.
+   */
+  void GenerateTakenTest(HInstruction* context,
+                         HGraph* graph,
+                         HBasicBlock* block,
+                         /*out*/ HInstruction** taken_test);
+
+ private:
+  /*
+   * Enum used in IsConstant() request.
+   */
+  enum ConstantRequest {
+    kExact,
+    kAtMost,
+    kAtLeast
+  };
+
+  /**
+   * Returns true if exact or upper/lower bound on the given induction
+   * information is known as a 64-bit constant, which is returned in value.
+   */
+  bool IsConstant(HInductionVarAnalysis::InductionInfo* info,
+                  ConstantRequest request,
+                  /*out*/ int64_t* value) const;
+
+  /** Returns whether induction information can be obtained. */
+  bool HasInductionInfo(HInstruction* context,
+                        HInstruction* instruction,
+                        /*out*/ HLoopInformation** loop,
+                        /*out*/ HInductionVarAnalysis::InductionInfo** info,
+                        /*out*/ HInductionVarAnalysis::InductionInfo** trip) const;
+
+  bool HasFetchInLoop(HInductionVarAnalysis::InductionInfo* info) const;
+  bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const;
+  bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
+  bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
+  bool IsWellBehavedTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
+
+  Value GetLinear(HInductionVarAnalysis::InductionInfo* info,
+                  HInductionVarAnalysis::InductionInfo* trip,
+                  bool in_body,
+                  bool is_min) const;
+  Value GetFetch(HInstruction* instruction,
+                 HInductionVarAnalysis::InductionInfo* trip,
+                 bool in_body,
+                 bool is_min) const;
+  Value GetVal(HInductionVarAnalysis::InductionInfo* info,
+               HInductionVarAnalysis::InductionInfo* trip,
+               bool in_body,
+               bool is_min) const;
+  Value GetMul(HInductionVarAnalysis::InductionInfo* info1,
+               HInductionVarAnalysis::InductionInfo* info2,
+               HInductionVarAnalysis::InductionInfo* trip,
+               bool in_body,
+               bool is_min) const;
+  Value GetDiv(HInductionVarAnalysis::InductionInfo* info1,
+               HInductionVarAnalysis::InductionInfo* info2,
+               HInductionVarAnalysis::InductionInfo* trip,
+               bool in_body,
+               bool is_min) const;
+
+  Value MulRangeAndConstant(int64_t value,
+                            HInductionVarAnalysis::InductionInfo* info,
+                            HInductionVarAnalysis::InductionInfo* trip,
+                            bool in_body,
+                            bool is_min) const;
+  Value DivRangeAndConstant(int64_t value,
+                            HInductionVarAnalysis::InductionInfo* info,
+                            HInductionVarAnalysis::InductionInfo* trip,
+                            bool in_body,
+                            bool is_min) const;
+
+  Value AddValue(Value v1, Value v2) const;
+  Value SubValue(Value v1, Value v2) const;
+  Value MulValue(Value v1, Value v2) const;
+  Value DivValue(Value v1, Value v2) const;
+  Value MergeVal(Value v1, Value v2, bool is_min) const;
+
+  /**
+   * Generates code for lower/upper/taken-test in the HIR. Returns true on success.
+   * With values nullptr, the method can be used to determine if code generation
    * would be successful without generating actual code yet.
    */
   bool GenerateCode(HInstruction* context,
                     HInstruction* instruction,
                     HGraph* graph,
                     HBasicBlock* block,
-                    /*out*/HInstruction** lower,
-                    /*out*/HInstruction** upper,
-                    bool* top_test);
+                    /*out*/ HInstruction** lower,
+                    /*out*/ HInstruction** upper,
+                    /*out*/ HInstruction** taken_test,
+                    /*out*/ bool* needs_finite_test,
+                    /*out*/ bool* needs_taken_test) const;
 
-  static bool GenerateCode(HInductionVarAnalysis::InductionInfo* info,
-                           HInductionVarAnalysis::InductionInfo* trip,
-                           HGraph* graph,
-                           HBasicBlock* block,
-                           /*out*/HInstruction** result,
-                           bool in_body,
-                           bool is_min);
+  bool GenerateCode(HInductionVarAnalysis::InductionInfo* info,
+                    HInductionVarAnalysis::InductionInfo* trip,
+                    HGraph* graph,
+                    HBasicBlock* block,
+                    /*out*/ HInstruction** result,
+                    bool in_body,
+                    bool is_min) const;
 
   /** Results of prior induction variable analysis. */
-  HInductionVarAnalysis *induction_analysis_;
+  HInductionVarAnalysis* induction_analysis_;
+
+  /** Instruction at which chasing may stop. */
+  HInstruction* chase_hint_;
 
   friend class HInductionVarAnalysis;
   friend class InductionVarRangeTest;
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index 8fbc59f..4ea170f 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -16,7 +16,6 @@
 
 #include "base/arena_allocator.h"
 #include "builder.h"
-#include "gtest/gtest.h"
 #include "induction_var_analysis.h"
 #include "induction_var_range.h"
 #include "nodes.h"
@@ -29,11 +28,14 @@
 /**
  * Fixture class for the InductionVarRange tests.
  */
-class InductionVarRangeTest : public testing::Test {
+class InductionVarRangeTest : public CommonCompilerTest {
  public:
-  InductionVarRangeTest()  : pool_(), allocator_(&pool_) {
-    graph_ = CreateGraph(&allocator_);
-    iva_ = new (&allocator_) HInductionVarAnalysis(graph_);
+  InductionVarRangeTest()
+      : pool_(),
+        allocator_(&pool_),
+        graph_(CreateGraph(&allocator_)),
+        iva_(new (&allocator_) HInductionVarAnalysis(graph_)),
+        range_(iva_) {
     BuildGraph();
   }
 
@@ -46,6 +48,10 @@
     EXPECT_EQ(v1.is_known, v2.is_known);
   }
 
+  //
+  // Construction methods.
+  //
+
   /** Constructs bare minimum graph. */
   void BuildGraph() {
     graph_->SetNumberOfVRegs(1);
@@ -55,10 +61,17 @@
     graph_->AddBlock(exit_block_);
     graph_->SetEntryBlock(entry_block_);
     graph_->SetExitBlock(exit_block_);
+    // Two parameters.
+    x_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
+    entry_block_->AddInstruction(x_);
+    y_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
+    entry_block_->AddInstruction(y_);
+    // Set arbitrary range analysis hint while testing private methods.
+    SetHint(x_);
   }
 
   /** Constructs loop with given upper bound. */
-  void BuildLoop(HInstruction* upper) {
+  void BuildLoop(int32_t lower, HInstruction* upper, int32_t stride) {
     // Control flow.
     loop_preheader_ = new (&allocator_) HBasicBlock(graph_);
     graph_->AddBlock(loop_preheader_);
@@ -66,37 +79,45 @@
     graph_->AddBlock(loop_header);
     HBasicBlock* loop_body = new (&allocator_) HBasicBlock(graph_);
     graph_->AddBlock(loop_body);
+    HBasicBlock* return_block = new (&allocator_) HBasicBlock(graph_);
+    graph_->AddBlock(return_block);
     entry_block_->AddSuccessor(loop_preheader_);
     loop_preheader_->AddSuccessor(loop_header);
     loop_header->AddSuccessor(loop_body);
-    loop_header->AddSuccessor(exit_block_);
+    loop_header->AddSuccessor(return_block);
     loop_body->AddSuccessor(loop_header);
+    return_block->AddSuccessor(exit_block_);
     // Instructions.
-    HLocal* induc = new (&allocator_) HLocal(0);
-    entry_block_->AddInstruction(induc);
-    loop_preheader_->AddInstruction(
-        new (&allocator_) HStoreLocal(induc, graph_->GetIntConstant(0)));  // i = 0
     loop_preheader_->AddInstruction(new (&allocator_) HGoto());
-    HInstruction* load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt);
-          loop_header->AddInstruction(load);
-    condition_ = new (&allocator_) HLessThan(load, upper);
+    HPhi* phi = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt);
+    loop_header->AddPhi(phi);
+    phi->AddInput(graph_->GetIntConstant(lower));  // i = l
+    if (stride > 0) {
+      condition_ = new (&allocator_) HLessThan(phi, upper);  // i < u
+    } else {
+      condition_ = new (&allocator_) HGreaterThan(phi, upper);  // i > u
+    }
     loop_header->AddInstruction(condition_);
-    loop_header->AddInstruction(new (&allocator_) HIf(condition_));  // i < u
-    load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt);
-    loop_body->AddInstruction(load);
-    increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, load, graph_->GetIntConstant(1));
-    loop_body->AddInstruction(increment_);
-    loop_body->AddInstruction(new (&allocator_) HStoreLocal(induc, increment_));  // i++
+    loop_header->AddInstruction(new (&allocator_) HIf(condition_));
+    increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, phi, graph_->GetIntConstant(stride));
+    loop_body->AddInstruction(increment_);  // i += s
+    phi->AddInput(increment_);
     loop_body->AddInstruction(new (&allocator_) HGoto());
-    exit_block_->AddInstruction(new (&allocator_) HReturnVoid());
+    return_block->AddInstruction(new (&allocator_) HReturnVoid());
+    exit_block_->AddInstruction(new (&allocator_) HExit());
   }
 
-  /** Performs induction variable analysis. */
+  /** Constructs SSA and performs induction variable analysis. */
   void PerformInductionVarAnalysis() {
-    ASSERT_TRUE(graph_->TryBuildingSsa());
+    graph_->BuildDominatorTree();
     iva_->Run();
   }
 
+  /** Sets hint. */
+  void SetHint(HInstruction* hint) {
+    range_.chase_hint_ = hint;
+  }
+
   /** Constructs an invariant. */
   HInductionVarAnalysis::InductionInfo* CreateInvariant(char opc,
                                                         HInductionVarAnalysis::InductionInfo* a,
@@ -108,6 +129,7 @@
       case 'n': op = HInductionVarAnalysis::kNeg; break;
       case '*': op = HInductionVarAnalysis::kMul; break;
       case '/': op = HInductionVarAnalysis::kDiv; break;
+      case '<': op = HInductionVarAnalysis::kLT;  break;
       default:  op = HInductionVarAnalysis::kNop; break;
     }
     return iva_->CreateInvariantOp(op, a, b);
@@ -123,64 +145,104 @@
     return CreateFetch(graph_->GetIntConstant(c));
   }
 
-  /** Constructs a trip-count. */
-  HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc) {
-    return iva_->CreateTripCount(HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc));
+  /** Constructs a constant trip-count. */
+  HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc, bool in_loop, bool safe) {
+    HInductionVarAnalysis::InductionOp op = HInductionVarAnalysis::kTripCountInBodyUnsafe;
+    if (in_loop && safe) {
+      op = HInductionVarAnalysis::kTripCountInLoop;
+    } else if (in_loop) {
+      op = HInductionVarAnalysis::kTripCountInLoopUnsafe;
+    } else if (safe) {
+      op = HInductionVarAnalysis::kTripCountInBody;
+    }
+    // Return TC with taken-test 0 < TC.
+    return iva_->CreateTripCount(op,
+                                 CreateConst(tc),
+                                 CreateInvariant('<', CreateConst(0), CreateConst(tc)),
+                                 Primitive::kPrimInt);
   }
 
   /** Constructs a linear a * i + b induction. */
   HInductionVarAnalysis::InductionInfo* CreateLinear(int32_t a, int32_t b) {
-    return iva_->CreateInduction(HInductionVarAnalysis::kLinear, CreateConst(a), CreateConst(b));
+    return iva_->CreateInduction(
+        HInductionVarAnalysis::kLinear, CreateConst(a), CreateConst(b), Primitive::kPrimInt);
   }
 
   /** Constructs a range [lo, hi] using a periodic induction. */
   HInductionVarAnalysis::InductionInfo* CreateRange(int32_t lo, int32_t hi) {
     return iva_->CreateInduction(
-        HInductionVarAnalysis::kPeriodic, CreateConst(lo), CreateConst(hi));
+        HInductionVarAnalysis::kPeriodic, CreateConst(lo), CreateConst(hi), Primitive::kPrimInt);
+  }
+
+  /** Constructs a wrap-around induction consisting of a constant, followed info */
+  HInductionVarAnalysis::InductionInfo* CreateWrapAround(
+      int32_t initial,
+      HInductionVarAnalysis::InductionInfo* info) {
+    return iva_->CreateInduction(
+        HInductionVarAnalysis::kWrapAround, CreateConst(initial), info, Primitive::kPrimInt);
   }
 
   /** Constructs a wrap-around induction consisting of a constant, followed by a range. */
   HInductionVarAnalysis::InductionInfo* CreateWrapAround(int32_t initial, int32_t lo, int32_t hi) {
-    return iva_->CreateInduction(
-        HInductionVarAnalysis::kWrapAround, CreateConst(initial), CreateRange(lo, hi));
+    return CreateWrapAround(initial, CreateRange(lo, hi));
   }
 
   //
   // Relay methods.
   //
 
+  bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) {
+    return range_.NeedsTripCount(info);
+  }
+
+  bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+    return range_.IsBodyTripCount(trip);
+  }
+
+  bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+    return range_.IsUnsafeTripCount(trip);
+  }
+
   Value GetMin(HInductionVarAnalysis::InductionInfo* info,
-               HInductionVarAnalysis::InductionInfo* induc) {
-    return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ true);
+               HInductionVarAnalysis::InductionInfo* trip) {
+    return range_.GetVal(info, trip, /* in_body */ true, /* is_min */ true);
   }
 
   Value GetMax(HInductionVarAnalysis::InductionInfo* info,
-               HInductionVarAnalysis::InductionInfo* induc) {
-    return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ false);
+               HInductionVarAnalysis::InductionInfo* trip) {
+    return range_.GetVal(info, trip, /* in_body */ true, /* is_min */ false);
   }
 
   Value GetMul(HInductionVarAnalysis::InductionInfo* info1,
                HInductionVarAnalysis::InductionInfo* info2,
                bool is_min) {
-    return InductionVarRange::GetMul(info1, info2, nullptr, /* in_body */ true, is_min);
+    return range_.GetMul(info1, info2, nullptr, /* in_body */ true, is_min);
   }
 
   Value GetDiv(HInductionVarAnalysis::InductionInfo* info1,
                HInductionVarAnalysis::InductionInfo* info2,
                bool is_min) {
-    return InductionVarRange::GetDiv(info1, info2, nullptr, /* in_body */ true, is_min);
+    return range_.GetDiv(info1, info2, nullptr, /* in_body */ true, is_min);
   }
 
-  bool GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t* value) {
-    return InductionVarRange::GetConstant(info, value);
+  bool IsExact(HInductionVarAnalysis::InductionInfo* info, int64_t* value) {
+    return range_.IsConstant(info, InductionVarRange::kExact, value);
   }
 
-  Value AddValue(Value v1, Value v2) { return InductionVarRange::AddValue(v1, v2); }
-  Value SubValue(Value v1, Value v2) { return InductionVarRange::SubValue(v1, v2); }
-  Value MulValue(Value v1, Value v2) { return InductionVarRange::MulValue(v1, v2); }
-  Value DivValue(Value v1, Value v2) { return InductionVarRange::DivValue(v1, v2); }
-  Value MinValue(Value v1, Value v2) { return InductionVarRange::MergeVal(v1, v2, true); }
-  Value MaxValue(Value v1, Value v2) { return InductionVarRange::MergeVal(v1, v2, false); }
+  bool IsAtMost(HInductionVarAnalysis::InductionInfo* info, int64_t* value) {
+    return range_.IsConstant(info, InductionVarRange::kAtMost, value);
+  }
+
+  bool IsAtLeast(HInductionVarAnalysis::InductionInfo* info, int64_t* value) {
+    return range_.IsConstant(info, InductionVarRange::kAtLeast, value);
+  }
+
+  Value AddValue(Value v1, Value v2) { return range_.AddValue(v1, v2); }
+  Value SubValue(Value v1, Value v2) { return range_.SubValue(v1, v2); }
+  Value MulValue(Value v1, Value v2) { return range_.MulValue(v1, v2); }
+  Value DivValue(Value v1, Value v2) { return range_.DivValue(v1, v2); }
+  Value MinValue(Value v1, Value v2) { return range_.MergeVal(v1, v2, true); }
+  Value MaxValue(Value v1, Value v2) { return range_.MergeVal(v1, v2, false); }
 
   // General building fields.
   ArenaPool pool_;
@@ -190,18 +252,67 @@
   HBasicBlock* exit_block_;
   HBasicBlock* loop_preheader_;
   HInductionVarAnalysis* iva_;
+  InductionVarRange range_;
 
   // Instructions.
   HInstruction* condition_;
   HInstruction* increment_;
-  HReturnVoid x_;
-  HReturnVoid y_;
+  HInstruction* x_;
+  HInstruction* y_;
 };
 
 //
-// Tests on static methods.
+// Tests on private methods.
 //
 
+TEST_F(InductionVarRangeTest, IsConstant) {
+  int64_t value;
+  // Constant.
+  EXPECT_TRUE(IsExact(CreateConst(12345), &value));
+  EXPECT_EQ(12345, value);
+  EXPECT_TRUE(IsAtMost(CreateConst(12345), &value));
+  EXPECT_EQ(12345, value);
+  EXPECT_TRUE(IsAtLeast(CreateConst(12345), &value));
+  EXPECT_EQ(12345, value);
+  // Constant trivial range.
+  EXPECT_TRUE(IsExact(CreateRange(111, 111), &value));
+  EXPECT_EQ(111, value);
+  EXPECT_TRUE(IsAtMost(CreateRange(111, 111), &value));
+  EXPECT_EQ(111, value);
+  EXPECT_TRUE(IsAtLeast(CreateRange(111, 111), &value));
+  EXPECT_EQ(111, value);
+  // Constant non-trivial range.
+  EXPECT_FALSE(IsExact(CreateRange(11, 22), &value));
+  EXPECT_TRUE(IsAtMost(CreateRange(11, 22), &value));
+  EXPECT_EQ(22, value);
+  EXPECT_TRUE(IsAtLeast(CreateRange(11, 22), &value));
+  EXPECT_EQ(11, value);
+  // Symbolic.
+  EXPECT_FALSE(IsExact(CreateFetch(x_), &value));
+  EXPECT_FALSE(IsAtMost(CreateFetch(x_), &value));
+  EXPECT_FALSE(IsAtLeast(CreateFetch(x_), &value));
+}
+
+TEST_F(InductionVarRangeTest, TripCountProperties) {
+  EXPECT_FALSE(NeedsTripCount(nullptr));
+  EXPECT_FALSE(NeedsTripCount(CreateConst(1)));
+  EXPECT_TRUE(NeedsTripCount(CreateLinear(1, 1)));
+  EXPECT_FALSE(NeedsTripCount(CreateWrapAround(1, 2, 3)));
+  EXPECT_TRUE(NeedsTripCount(CreateWrapAround(1, CreateLinear(1, 1))));
+
+  EXPECT_FALSE(IsBodyTripCount(nullptr));
+  EXPECT_FALSE(IsBodyTripCount(CreateTripCount(100, true, true)));
+  EXPECT_FALSE(IsBodyTripCount(CreateTripCount(100, true, false)));
+  EXPECT_TRUE(IsBodyTripCount(CreateTripCount(100, false, true)));
+  EXPECT_TRUE(IsBodyTripCount(CreateTripCount(100, false, false)));
+
+  EXPECT_FALSE(IsUnsafeTripCount(nullptr));
+  EXPECT_FALSE(IsUnsafeTripCount(CreateTripCount(100, true, true)));
+  EXPECT_TRUE(IsUnsafeTripCount(CreateTripCount(100, true, false)));
+  EXPECT_FALSE(IsUnsafeTripCount(CreateTripCount(100, false, true)));
+  EXPECT_TRUE(IsUnsafeTripCount(CreateTripCount(100, false, false)));
+}
+
 TEST_F(InductionVarRangeTest, GetMinMaxNull) {
   ExpectEqual(Value(), GetMin(nullptr, nullptr));
   ExpectEqual(Value(), GetMax(nullptr, nullptr));
@@ -212,14 +323,14 @@
               GetMin(CreateInvariant('+', CreateConst(2), CreateRange(10, 20)), nullptr));
   ExpectEqual(Value(22),
               GetMax(CreateInvariant('+', CreateConst(2), CreateRange(10, 20)), nullptr));
-  ExpectEqual(Value(&x_, 1, -20),
-              GetMin(CreateInvariant('+', CreateFetch(&x_), CreateRange(-20, -10)), nullptr));
-  ExpectEqual(Value(&x_, 1, -10),
-              GetMax(CreateInvariant('+', CreateFetch(&x_), CreateRange(-20, -10)), nullptr));
-  ExpectEqual(Value(&x_, 1, 10),
-              GetMin(CreateInvariant('+', CreateRange(10, 20), CreateFetch(&x_)), nullptr));
-  ExpectEqual(Value(&x_, 1, 20),
-              GetMax(CreateInvariant('+', CreateRange(10, 20), CreateFetch(&x_)), nullptr));
+  ExpectEqual(Value(x_, 1, -20),
+              GetMin(CreateInvariant('+', CreateFetch(x_), CreateRange(-20, -10)), nullptr));
+  ExpectEqual(Value(x_, 1, -10),
+              GetMax(CreateInvariant('+', CreateFetch(x_), CreateRange(-20, -10)), nullptr));
+  ExpectEqual(Value(x_, 1, 10),
+              GetMin(CreateInvariant('+', CreateRange(10, 20), CreateFetch(x_)), nullptr));
+  ExpectEqual(Value(x_, 1, 20),
+              GetMax(CreateInvariant('+', CreateRange(10, 20), CreateFetch(x_)), nullptr));
   ExpectEqual(Value(5),
               GetMin(CreateInvariant('+', CreateRange(-5, -1), CreateRange(10, 20)), nullptr));
   ExpectEqual(Value(19),
@@ -231,14 +342,14 @@
               GetMin(CreateInvariant('-', CreateConst(2), CreateRange(10, 20)), nullptr));
   ExpectEqual(Value(-8),
               GetMax(CreateInvariant('-', CreateConst(2), CreateRange(10, 20)), nullptr));
-  ExpectEqual(Value(&x_, 1, 10),
-              GetMin(CreateInvariant('-', CreateFetch(&x_), CreateRange(-20, -10)), nullptr));
-  ExpectEqual(Value(&x_, 1, 20),
-              GetMax(CreateInvariant('-', CreateFetch(&x_), CreateRange(-20, -10)), nullptr));
-  ExpectEqual(Value(&x_, -1, 10),
-              GetMin(CreateInvariant('-', CreateRange(10, 20), CreateFetch(&x_)), nullptr));
-  ExpectEqual(Value(&x_, -1, 20),
-              GetMax(CreateInvariant('-', CreateRange(10, 20), CreateFetch(&x_)), nullptr));
+  ExpectEqual(Value(x_, 1, 10),
+              GetMin(CreateInvariant('-', CreateFetch(x_), CreateRange(-20, -10)), nullptr));
+  ExpectEqual(Value(x_, 1, 20),
+              GetMax(CreateInvariant('-', CreateFetch(x_), CreateRange(-20, -10)), nullptr));
+  ExpectEqual(Value(x_, -1, 10),
+              GetMin(CreateInvariant('-', CreateRange(10, 20), CreateFetch(x_)), nullptr));
+  ExpectEqual(Value(x_, -1, 20),
+              GetMax(CreateInvariant('-', CreateRange(10, 20), CreateFetch(x_)), nullptr));
   ExpectEqual(Value(-25),
               GetMin(CreateInvariant('-', CreateRange(-5, -1), CreateRange(10, 20)), nullptr));
   ExpectEqual(Value(-11),
@@ -250,8 +361,8 @@
   ExpectEqual(Value(-10), GetMax(CreateInvariant('n', nullptr, CreateRange(10, 20)), nullptr));
   ExpectEqual(Value(10), GetMin(CreateInvariant('n', nullptr, CreateRange(-20, -10)), nullptr));
   ExpectEqual(Value(20), GetMax(CreateInvariant('n', nullptr, CreateRange(-20, -10)), nullptr));
-  ExpectEqual(Value(&x_, -1, 0), GetMin(CreateInvariant('n', nullptr, CreateFetch(&x_)), nullptr));
-  ExpectEqual(Value(&x_, -1, 0), GetMax(CreateInvariant('n', nullptr, CreateFetch(&x_)), nullptr));
+  ExpectEqual(Value(x_, -1, 0), GetMin(CreateInvariant('n', nullptr, CreateFetch(x_)), nullptr));
+  ExpectEqual(Value(x_, -1, 0), GetMax(CreateInvariant('n', nullptr, CreateFetch(x_)), nullptr));
 }
 
 TEST_F(InductionVarRangeTest, GetMinMaxMul) {
@@ -274,15 +385,15 @@
 }
 
 TEST_F(InductionVarRangeTest, GetMinMaxFetch) {
-  ExpectEqual(Value(&x_, 1, 0), GetMin(CreateFetch(&x_), nullptr));
-  ExpectEqual(Value(&x_, 1, 0), GetMax(CreateFetch(&x_), nullptr));
+  ExpectEqual(Value(x_, 1, 0), GetMin(CreateFetch(x_), nullptr));
+  ExpectEqual(Value(x_, 1, 0), GetMax(CreateFetch(x_), nullptr));
 }
 
 TEST_F(InductionVarRangeTest, GetMinMaxLinear) {
-  ExpectEqual(Value(20), GetMin(CreateLinear(10, 20), CreateTripCount(100)));
-  ExpectEqual(Value(1010), GetMax(CreateLinear(10, 20), CreateTripCount(100)));
-  ExpectEqual(Value(-970), GetMin(CreateLinear(-10, 20), CreateTripCount(100)));
-  ExpectEqual(Value(20), GetMax(CreateLinear(-10, 20), CreateTripCount(100)));
+  ExpectEqual(Value(20), GetMin(CreateLinear(10, 20), CreateTripCount(100, true, true)));
+  ExpectEqual(Value(1010), GetMax(CreateLinear(10, 20), CreateTripCount(100, true, true)));
+  ExpectEqual(Value(-970), GetMin(CreateLinear(-10, 20), CreateTripCount(100, true, true)));
+  ExpectEqual(Value(20), GetMax(CreateLinear(-10, 20), CreateTripCount(100, true, true)));
 }
 
 TEST_F(InductionVarRangeTest, GetMinMaxWrapAround) {
@@ -300,47 +411,72 @@
 }
 
 TEST_F(InductionVarRangeTest, GetMulMin) {
+  ExpectEqual(Value(-14), GetMul(CreateConst(2), CreateRange(-7, 8), true));
+  ExpectEqual(Value(-16), GetMul(CreateConst(-2), CreateRange(-7, 8), true));
+  ExpectEqual(Value(-14), GetMul(CreateRange(-7, 8), CreateConst(2), true));
+  ExpectEqual(Value(-16), GetMul(CreateRange(-7, 8), CreateConst(-2), true));
   ExpectEqual(Value(6), GetMul(CreateRange(2, 10), CreateRange(3, 5), true));
   ExpectEqual(Value(-50), GetMul(CreateRange(2, 10), CreateRange(-5, -3), true));
+  ExpectEqual(Value(), GetMul(CreateRange(2, 10), CreateRange(-1, 1), true));
   ExpectEqual(Value(-50), GetMul(CreateRange(-10, -2), CreateRange(3, 5), true));
   ExpectEqual(Value(6), GetMul(CreateRange(-10, -2), CreateRange(-5, -3), true));
+  ExpectEqual(Value(), GetMul(CreateRange(-10, -2), CreateRange(-1, 1), true));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(2, 10), true));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(-10, -2), true));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(-1, 1), true));
 }
 
 TEST_F(InductionVarRangeTest, GetMulMax) {
+  ExpectEqual(Value(16), GetMul(CreateConst(2), CreateRange(-7, 8), false));
+  ExpectEqual(Value(14), GetMul(CreateConst(-2), CreateRange(-7, 8), false));
+  ExpectEqual(Value(16), GetMul(CreateRange(-7, 8), CreateConst(2), false));
+  ExpectEqual(Value(14), GetMul(CreateRange(-7, 8), CreateConst(-2), false));
   ExpectEqual(Value(50), GetMul(CreateRange(2, 10), CreateRange(3, 5), false));
   ExpectEqual(Value(-6), GetMul(CreateRange(2, 10), CreateRange(-5, -3), false));
+  ExpectEqual(Value(), GetMul(CreateRange(2, 10), CreateRange(-1, 1), false));
   ExpectEqual(Value(-6), GetMul(CreateRange(-10, -2), CreateRange(3, 5), false));
   ExpectEqual(Value(50), GetMul(CreateRange(-10, -2), CreateRange(-5, -3), false));
+  ExpectEqual(Value(), GetMul(CreateRange(-10, -2), CreateRange(-1, 1), false));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(2, 10), false));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(-10, -2), false));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(-1, 1), false));
 }
 
 TEST_F(InductionVarRangeTest, GetDivMin) {
+  ExpectEqual(Value(-5), GetDiv(CreateRange(-10, 20), CreateConst(2), true));
+  ExpectEqual(Value(-10), GetDiv(CreateRange(-10, 20), CreateConst(-2), true));
   ExpectEqual(Value(10), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), true));
   ExpectEqual(Value(-500), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), true));
+  ExpectEqual(Value(), GetDiv(CreateRange(40, 1000), CreateRange(-1, 1), true));
   ExpectEqual(Value(-500), GetDiv(CreateRange(-1000, -40), CreateRange(2, 4), true));
   ExpectEqual(Value(10), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), true));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1000, -40), CreateRange(-1, 1), true));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(40, 1000), true));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1000, -40), true));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1, 1), true));
 }
 
 TEST_F(InductionVarRangeTest, GetDivMax) {
+  ExpectEqual(Value(10), GetDiv(CreateRange(-10, 20), CreateConst(2), false));
+  ExpectEqual(Value(5), GetDiv(CreateRange(-10, 20), CreateConst(-2), false));
   ExpectEqual(Value(500), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), false));
   ExpectEqual(Value(-10), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), false));
+  ExpectEqual(Value(), GetDiv(CreateRange(40, 1000), CreateRange(-1, 1), false));
   ExpectEqual(Value(-10), GetDiv(CreateRange(-1000, -40), CreateRange(2, 4), false));
   ExpectEqual(Value(500), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), false));
-}
-
-TEST_F(InductionVarRangeTest, GetConstant) {
-  int32_t value;
-  ASSERT_TRUE(GetConstant(CreateConst(12345), &value));
-  EXPECT_EQ(12345, value);
-  EXPECT_FALSE(GetConstant(CreateRange(1, 2), &value));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1000, -40), CreateRange(-1, 1), false));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(40, 1000), false));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1000, 40), false));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1, 1), false));
 }
 
 TEST_F(InductionVarRangeTest, AddValue) {
   ExpectEqual(Value(110), AddValue(Value(10), Value(100)));
-  ExpectEqual(Value(-5), AddValue(Value(&x_, 1, -4), Value(&x_, -1, -1)));
-  ExpectEqual(Value(&x_, 3, -5), AddValue(Value(&x_, 2, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(), AddValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(&x_, 1, 23), AddValue(Value(&x_, 1, 20), Value(3)));
-  ExpectEqual(Value(&y_, 1, 5), AddValue(Value(55), Value(&y_, 1, -50)));
+  ExpectEqual(Value(-5), AddValue(Value(x_, 1, -4), Value(x_, -1, -1)));
+  ExpectEqual(Value(x_, 3, -5), AddValue(Value(x_, 2, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(), AddValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(x_, 1, 23), AddValue(Value(x_, 1, 20), Value(3)));
+  ExpectEqual(Value(y_, 1, 5), AddValue(Value(55), Value(y_, 1, -50)));
   const int32_t max_value = std::numeric_limits<int32_t>::max();
   ExpectEqual(Value(max_value), AddValue(Value(max_value - 5), Value(5)));
   ExpectEqual(Value(), AddValue(Value(max_value - 5), Value(6)));  // unsafe
@@ -348,11 +484,11 @@
 
 TEST_F(InductionVarRangeTest, SubValue) {
   ExpectEqual(Value(-90), SubValue(Value(10), Value(100)));
-  ExpectEqual(Value(-3), SubValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(&x_, 2, -3), SubValue(Value(&x_, 3, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(), SubValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(&x_, 1, 17), SubValue(Value(&x_, 1, 20), Value(3)));
-  ExpectEqual(Value(&y_, -4, 105), SubValue(Value(55), Value(&y_, 4, -50)));
+  ExpectEqual(Value(-3), SubValue(Value(x_, 1, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(x_, 2, -3), SubValue(Value(x_, 3, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(), SubValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(x_, 1, 17), SubValue(Value(x_, 1, 20), Value(3)));
+  ExpectEqual(Value(y_, -4, 105), SubValue(Value(55), Value(y_, 4, -50)));
   const int32_t min_value = std::numeric_limits<int32_t>::min();
   ExpectEqual(Value(min_value), SubValue(Value(min_value + 5), Value(5)));
   ExpectEqual(Value(), SubValue(Value(min_value + 5), Value(6)));  // unsafe
@@ -360,99 +496,191 @@
 
 TEST_F(InductionVarRangeTest, MulValue) {
   ExpectEqual(Value(1000), MulValue(Value(10), Value(100)));
-  ExpectEqual(Value(), MulValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(), MulValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(&x_, 9, 60), MulValue(Value(&x_, 3, 20), Value(3)));
-  ExpectEqual(Value(&y_, 55, -110), MulValue(Value(55), Value(&y_, 1, -2)));
+  ExpectEqual(Value(), MulValue(Value(x_, 1, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(), MulValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(x_, 9, 60), MulValue(Value(x_, 3, 20), Value(3)));
+  ExpectEqual(Value(y_, 55, -110), MulValue(Value(55), Value(y_, 1, -2)));
   ExpectEqual(Value(), MulValue(Value(90000), Value(-90000)));  // unsafe
 }
 
+TEST_F(InductionVarRangeTest, MulValueSpecial) {
+  const int32_t min_value = std::numeric_limits<int32_t>::min();
+  const int32_t max_value = std::numeric_limits<int32_t>::max();
+
+  // Unsafe.
+  ExpectEqual(Value(), MulValue(Value(min_value), Value(min_value)));
+  ExpectEqual(Value(), MulValue(Value(min_value), Value(-1)));
+  ExpectEqual(Value(), MulValue(Value(min_value), Value(max_value)));
+  ExpectEqual(Value(), MulValue(Value(max_value), Value(max_value)));
+
+  // Safe.
+  ExpectEqual(Value(min_value), MulValue(Value(min_value), Value(1)));
+  ExpectEqual(Value(max_value), MulValue(Value(max_value), Value(1)));
+  ExpectEqual(Value(-max_value), MulValue(Value(max_value), Value(-1)));
+  ExpectEqual(Value(-1), MulValue(Value(1), Value(-1)));
+  ExpectEqual(Value(1), MulValue(Value(-1), Value(-1)));
+}
+
 TEST_F(InductionVarRangeTest, DivValue) {
   ExpectEqual(Value(25), DivValue(Value(100), Value(4)));
-  ExpectEqual(Value(), DivValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(), DivValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(), DivValue(Value(&x_, 12, 24), Value(3)));
-  ExpectEqual(Value(), DivValue(Value(55), Value(&y_, 1, -50)));
+  ExpectEqual(Value(), DivValue(Value(x_, 1, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(), DivValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(), DivValue(Value(x_, 12, 24), Value(3)));
+  ExpectEqual(Value(), DivValue(Value(55), Value(y_, 1, -50)));
   ExpectEqual(Value(), DivValue(Value(1), Value(0)));  // unsafe
 }
 
+TEST_F(InductionVarRangeTest, DivValueSpecial) {
+  const int32_t min_value = std::numeric_limits<int32_t>::min();
+  const int32_t max_value = std::numeric_limits<int32_t>::max();
+
+  // Unsafe.
+  ExpectEqual(Value(), DivValue(Value(min_value), Value(-1)));
+
+  // Safe.
+  ExpectEqual(Value(1), DivValue(Value(min_value), Value(min_value)));
+  ExpectEqual(Value(1), DivValue(Value(max_value), Value(max_value)));
+  ExpectEqual(Value(min_value), DivValue(Value(min_value), Value(1)));
+  ExpectEqual(Value(max_value), DivValue(Value(max_value), Value(1)));
+  ExpectEqual(Value(-max_value), DivValue(Value(max_value), Value(-1)));
+  ExpectEqual(Value(-1), DivValue(Value(1), Value(-1)));
+  ExpectEqual(Value(1), DivValue(Value(-1), Value(-1)));
+}
+
 TEST_F(InductionVarRangeTest, MinValue) {
   ExpectEqual(Value(10), MinValue(Value(10), Value(100)));
-  ExpectEqual(Value(&x_, 1, -4), MinValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(&x_, 4, -4), MinValue(Value(&x_, 4, -4), Value(&x_, 4, -1)));
-  ExpectEqual(Value(), MinValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(), MinValue(Value(&x_, 1, 20), Value(3)));
-  ExpectEqual(Value(), MinValue(Value(55), Value(&y_, 1, -50)));
+  ExpectEqual(Value(x_, 1, -4), MinValue(Value(x_, 1, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(x_, 4, -4), MinValue(Value(x_, 4, -4), Value(x_, 4, -1)));
+  ExpectEqual(Value(), MinValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(), MinValue(Value(x_, 1, 20), Value(3)));
+  ExpectEqual(Value(), MinValue(Value(55), Value(y_, 1, -50)));
 }
 
 TEST_F(InductionVarRangeTest, MaxValue) {
   ExpectEqual(Value(100), MaxValue(Value(10), Value(100)));
-  ExpectEqual(Value(&x_, 1, -1), MaxValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(&x_, 4, -1), MaxValue(Value(&x_, 4, -4), Value(&x_, 4, -1)));
-  ExpectEqual(Value(), MaxValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(), MaxValue(Value(&x_, 1, 20), Value(3)));
-  ExpectEqual(Value(), MaxValue(Value(55), Value(&y_, 1, -50)));
+  ExpectEqual(Value(x_, 1, -1), MaxValue(Value(x_, 1, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(x_, 4, -1), MaxValue(Value(x_, 4, -4), Value(x_, 4, -1)));
+  ExpectEqual(Value(), MaxValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(), MaxValue(Value(x_, 1, 20), Value(3)));
+  ExpectEqual(Value(), MaxValue(Value(55), Value(y_, 1, -50)));
+}
+
+TEST_F(InductionVarRangeTest, ArrayLengthAndHints) {
+  HInstruction* new_array = new (&allocator_)
+      HNewArray(x_,
+                graph_->GetCurrentMethod(),
+                0, Primitive::kPrimInt,
+                graph_->GetDexFile(),
+                kQuickAllocArray);
+  entry_block_->AddInstruction(new_array);
+  HInstruction* array_length = new (&allocator_) HArrayLength(new_array, 0);
+  entry_block_->AddInstruction(array_length);
+  // With null hint: yields extreme constants.
+  const int32_t max_value = std::numeric_limits<int32_t>::max();
+  SetHint(nullptr);
+  ExpectEqual(Value(0), GetMin(CreateFetch(array_length), nullptr));
+  ExpectEqual(Value(max_value), GetMax(CreateFetch(array_length), nullptr));
+  // With explicit hint: yields the length instruction.
+  SetHint(array_length);
+  ExpectEqual(Value(array_length, 1, 0), GetMin(CreateFetch(array_length), nullptr));
+  ExpectEqual(Value(array_length, 1, 0), GetMax(CreateFetch(array_length), nullptr));
+  // With any non-null hint: chases beyond the length instruction.
+  SetHint(x_);
+  ExpectEqual(Value(x_, 1, 0), GetMin(CreateFetch(array_length), nullptr));
+  ExpectEqual(Value(x_, 1, 0), GetMax(CreateFetch(array_length), nullptr));
 }
 
 //
-// Tests on instance methods.
+// Tests on public methods.
 //
 
-TEST_F(InductionVarRangeTest, FindRangeConstantTripCount) {
-  BuildLoop(graph_->GetIntConstant(1000));
+TEST_F(InductionVarRangeTest, ConstantTripCountUp) {
+  BuildLoop(0, graph_->GetIntConstant(1000), 1);
   PerformInductionVarAnalysis();
-  InductionVarRange range(iva_);
+
+  Value v1, v2;
+  bool needs_finite_test = true;
 
   // In context of header: known.
-  ExpectEqual(Value(0), range.GetMinInduction(condition_, condition_->InputAt(0)));
-  ExpectEqual(Value(1000), range.GetMaxInduction(condition_, condition_->InputAt(0)));
+  range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(1000), v2);
 
   // In context of loop-body: known.
-  ExpectEqual(Value(0), range.GetMinInduction(increment_, condition_->InputAt(0)));
-  ExpectEqual(Value(999), range.GetMaxInduction(increment_, condition_->InputAt(0)));
-  ExpectEqual(Value(1), range.GetMinInduction(increment_, increment_));
-  ExpectEqual(Value(1000), range.GetMaxInduction(increment_, increment_));
+  range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(999), v2);
+  range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(1), v1);
+  ExpectEqual(Value(1000), v2);
 }
 
-TEST_F(InductionVarRangeTest, FindRangeSymbolicTripCount) {
-  HInstruction* parameter = new (&allocator_) HParameterValue(
-      graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
-  entry_block_->AddInstruction(parameter);
-  BuildLoop(parameter);
+TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
+  BuildLoop(1000, graph_->GetIntConstant(0), -1);
   PerformInductionVarAnalysis();
-  InductionVarRange range(iva_);
 
-  // In context of header: full range unknown.
-  ExpectEqual(Value(0), range.GetMinInduction(condition_, condition_->InputAt(0)));
-  ExpectEqual(Value(), range.GetMaxInduction(condition_, condition_->InputAt(0)));
+  Value v1, v2;
+  bool needs_finite_test = true;
+
+  // In context of header: known.
+  range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(1000), v2);
 
   // In context of loop-body: known.
-  ExpectEqual(Value(0), range.GetMinInduction(increment_, condition_->InputAt(0)));
-  ExpectEqual(Value(parameter, 1, -1), range.GetMaxInduction(increment_, condition_->InputAt(0)));
-  ExpectEqual(Value(1), range.GetMinInduction(increment_, increment_));
-  ExpectEqual(Value(parameter, 1, 0), range.GetMaxInduction(increment_, increment_));
+  range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(1), v1);
+  ExpectEqual(Value(1000), v2);
+  range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(999), v2);
 }
 
-TEST_F(InductionVarRangeTest, CodeGeneration) {
-  HInstruction* parameter = new (&allocator_) HParameterValue(
-      graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
-  entry_block_->AddInstruction(parameter);
-  BuildLoop(parameter);
+TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
+  BuildLoop(0, x_, 1);
   PerformInductionVarAnalysis();
-  InductionVarRange range(iva_);
+
+  Value v1, v2;
+  bool needs_finite_test = true;
+  bool needs_taken_test = true;
+
+  // In context of header: upper unknown.
+  range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(), v2);
+
+  // In context of loop-body: known.
+  range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(x_, 1, -1), v2);
+  range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(1), v1);
+  ExpectEqual(Value(x_, 1, 0), v2);
 
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
-  bool top_test = false;
+  HInstruction* taken = nullptr;
 
   // Can generate code in context of loop-body only.
-  EXPECT_FALSE(range.CanGenerateCode(condition_, condition_->InputAt(0), &top_test));
-  ASSERT_TRUE(range.CanGenerateCode(increment_, condition_->InputAt(0), &top_test));
-  EXPECT_TRUE(top_test);
+  EXPECT_FALSE(range_.CanGenerateCode(
+      condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+  ASSERT_TRUE(range_.CanGenerateCode(
+      increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+  EXPECT_FALSE(needs_finite_test);
+  EXPECT_TRUE(needs_taken_test);
 
   // Generates code.
-  EXPECT_TRUE(range.GenerateCode(
-      increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper));
+  range_.GenerateRangeCode(
+      increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
 
   // Verify lower is 0+0.
   ASSERT_TRUE(lower != nullptr);
@@ -462,7 +690,7 @@
   ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
   EXPECT_EQ(0, lower->InputAt(1)->AsIntConstant()->GetValue());
 
-  // Verify upper is (V-1)+0
+  // Verify upper is (V-1)+0.
   ASSERT_TRUE(upper != nullptr);
   ASSERT_TRUE(upper->IsAdd());
   ASSERT_TRUE(upper->InputAt(0)->IsSub());
@@ -471,6 +699,86 @@
   EXPECT_EQ(1, upper->InputAt(0)->InputAt(1)->AsIntConstant()->GetValue());
   ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
   EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+
+  // Verify taken-test is 0<V.
+  range_.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
+  ASSERT_TRUE(taken != nullptr);
+  ASSERT_TRUE(taken->IsLessThan());
+  ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
+  EXPECT_EQ(0, taken->InputAt(0)->AsIntConstant()->GetValue());
+  EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
+}
+
+TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
+  BuildLoop(1000, x_, -1);
+  PerformInductionVarAnalysis();
+
+  Value v1, v2;
+  bool needs_finite_test = true;
+  bool needs_taken_test = true;
+
+  // In context of header: lower unknown.
+  range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(), v1);
+  ExpectEqual(Value(1000), v2);
+
+  // In context of loop-body: known.
+  range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(x_, 1, 1), v1);
+  ExpectEqual(Value(1000), v2);
+  range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(x_, 1, 0), v1);
+  ExpectEqual(Value(999), v2);
+
+  HInstruction* lower = nullptr;
+  HInstruction* upper = nullptr;
+  HInstruction* taken = nullptr;
+
+  // Can generate code in context of loop-body only.
+  EXPECT_FALSE(range_.CanGenerateCode(
+      condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+  ASSERT_TRUE(range_.CanGenerateCode(
+      increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+  EXPECT_FALSE(needs_finite_test);
+  EXPECT_TRUE(needs_taken_test);
+
+  // Generates code.
+  range_.GenerateRangeCode(
+      increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
+
+  // Verify lower is 1000-((1000-V)-1).
+  ASSERT_TRUE(lower != nullptr);
+  ASSERT_TRUE(lower->IsSub());
+  ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
+  EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue());
+  lower = lower->InputAt(1);
+  ASSERT_TRUE(lower->IsSub());
+  ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
+  EXPECT_EQ(1, lower->InputAt(1)->AsIntConstant()->GetValue());
+  lower = lower->InputAt(0);
+  ASSERT_TRUE(lower->IsSub());
+  ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
+  EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue());
+  EXPECT_TRUE(lower->InputAt(1)->IsParameterValue());
+
+  // Verify upper is 1000-0.
+  ASSERT_TRUE(upper != nullptr);
+  ASSERT_TRUE(upper->IsSub());
+  ASSERT_TRUE(upper->InputAt(0)->IsIntConstant());
+  EXPECT_EQ(1000, upper->InputAt(0)->AsIntConstant()->GetValue());
+  ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
+  EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+
+  // Verify taken-test is 1000>V.
+  range_.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
+  ASSERT_TRUE(taken != nullptr);
+  ASSERT_TRUE(taken->IsGreaterThan());
+  ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
+  EXPECT_EQ(1000, taken->InputAt(0)->AsIntConstant()->GetValue());
+  EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index e2aca30..451aa38 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -17,30 +17,43 @@
 #include "inliner.h"
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "builder.h"
 #include "class_linker.h"
 #include "constant_folding.h"
 #include "dead_code_elimination.h"
+#include "dex/verified_method.h"
+#include "dex/verification_results.h"
 #include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
 #include "driver/dex_compilation_unit.h"
 #include "instruction_simplifier.h"
 #include "intrinsics.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "nodes.h"
 #include "optimizing_compiler.h"
 #include "reference_type_propagation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
+#include "quick/inline_method_analyser.h"
+#include "sharpening.h"
+#include "ssa_builder.h"
 #include "ssa_phi_elimination.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
-#include "dex/verified_method.h"
-#include "dex/verification_results.h"
 
 namespace art {
 
-static constexpr size_t kMaximumNumberOfHInstructions = 12;
+static constexpr size_t kMaximumNumberOfHInstructions = 32;
+
+// Limit the number of dex registers that we accumulate while inlining
+// to avoid creating large amount of nested environments.
+static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64;
+
+// Avoid inlining within a huge method due to memory pressure.
+static constexpr size_t kMaximumCodeUnitSize = 4096;
 
 void HInliner::Run() {
   const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
@@ -48,6 +61,9 @@
       || (compiler_options.GetInlineMaxCodeUnits() == 0)) {
     return;
   }
+  if (caller_compilation_unit_.GetCodeItem()->insns_size_in_code_units_ > kMaximumCodeUnitSize) {
+    return;
+  }
   if (graph_->IsDebuggable()) {
     // For simplicity, we currently never inline when the graph is debuggable. This avoids
     // doing some logic in the runtime to discover if a method could have been inlined.
@@ -129,10 +145,14 @@
   } else if (!resolved_method->GetDeclaringClass()->IsAssignableFrom(info.GetTypeHandle().Get())) {
     // The method that we're trying to call is not in the receiver's class or super classes.
     return nullptr;
+  } else if (info.GetTypeHandle()->IsErroneous()) {
+    // If the type is erroneous, do not go further, as we are going to query the vtable or
+    // imt table, that we can only safely do on non-erroneous classes.
+    return nullptr;
   }
 
   ClassLinker* cl = Runtime::Current()->GetClassLinker();
-  size_t pointer_size = cl->GetImagePointerSize();
+  PointerSize pointer_size = cl->GetImagePointerSize();
   if (invoke->IsInvokeInterface()) {
     resolved_method = info.GetTypeHandle()->FindVirtualMethodForInterface(
         resolved_method, pointer_size);
@@ -147,7 +167,7 @@
     // the target method. Since we check above the exact type of the receiver,
     // the only reason this can happen is an IncompatibleClassChangeError.
     return nullptr;
-  } else if (resolved_method->IsAbstract()) {
+  } else if (!resolved_method->IsInvokable()) {
     // The information we had on the receiver was not enough to find
     // the target method. Since we check above the exact type of the receiver,
     // the only reason this can happen is an IncompatibleClassChangeError.
@@ -168,15 +188,76 @@
 
 static uint32_t FindMethodIndexIn(ArtMethod* method,
                                   const DexFile& dex_file,
-                                  uint32_t referrer_index)
+                                  uint32_t name_and_signature_index)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (method->GetDexFile()->GetLocation().compare(dex_file.GetLocation()) == 0) {
+  if (IsSameDexFile(*method->GetDexFile(), dex_file)) {
     return method->GetDexMethodIndex();
   } else {
-    return method->FindDexMethodIndexInOtherDexFile(dex_file, referrer_index);
+    return method->FindDexMethodIndexInOtherDexFile(dex_file, name_and_signature_index);
   }
 }
 
+static uint32_t FindClassIndexIn(mirror::Class* cls,
+                                 const DexFile& dex_file,
+                                 Handle<mirror::DexCache> dex_cache)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  uint32_t index = DexFile::kDexNoIndex;
+  if (cls->GetDexCache() == nullptr) {
+    DCHECK(cls->IsArrayClass()) << PrettyClass(cls);
+    index = cls->FindTypeIndexInOtherDexFile(dex_file);
+  } else if (cls->GetDexTypeIndex() == DexFile::kDexNoIndex16) {
+    DCHECK(cls->IsProxyClass()) << PrettyClass(cls);
+    // TODO: deal with proxy classes.
+  } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
+    DCHECK_EQ(cls->GetDexCache(), dex_cache.Get());
+    index = cls->GetDexTypeIndex();
+    // Update the dex cache to ensure the class is in. The generated code will
+    // consider it is. We make it safe by updating the dex cache, as other
+    // dex files might also load the class, and there is no guarantee the dex
+    // cache of the dex file of the class will be updated.
+    if (dex_cache->GetResolvedType(index) == nullptr) {
+      dex_cache->SetResolvedType(index, cls);
+    }
+  } else {
+    index = cls->FindTypeIndexInOtherDexFile(dex_file);
+    // We cannot guarantee the entry in the dex cache will resolve to the same class,
+    // as there may be different class loaders. So only return the index if it's
+    // the right class in the dex cache already.
+    if (index != DexFile::kDexNoIndex && dex_cache->GetResolvedType(index) != cls) {
+      index = DexFile::kDexNoIndex;
+    }
+  }
+
+  return index;
+}
+
+class ScopedProfilingInfoInlineUse {
+ public:
+  explicit ScopedProfilingInfoInlineUse(ArtMethod* method, Thread* self)
+      : method_(method),
+        self_(self),
+        // Fetch the profiling info ahead of using it. If it's null when fetching,
+        // we should not call JitCodeCache::DoneInlining.
+        profiling_info_(
+            Runtime::Current()->GetJit()->GetCodeCache()->NotifyCompilerUse(method, self)) {
+  }
+
+  ~ScopedProfilingInfoInlineUse() {
+    if (profiling_info_ != nullptr) {
+      PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+      DCHECK_EQ(profiling_info_, method_->GetProfilingInfo(pointer_size));
+      Runtime::Current()->GetJit()->GetCodeCache()->DoneCompilerUse(method_, self_);
+    }
+  }
+
+  ProfilingInfo* GetProfilingInfo() const { return profiling_info_; }
+
+ private:
+  ArtMethod* const method_;
+  Thread* const self_;
+  ProfilingInfo* const profiling_info_;
+};
+
 bool HInliner::TryInline(HInvoke* invoke_instruction) {
   if (invoke_instruction->IsInvokeUnresolved()) {
     return false;  // Don't bother to move further if we know the method is unresolved.
@@ -190,16 +271,27 @@
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
   // We can query the dex cache directly. The verifier has populated it already.
   ArtMethod* resolved_method;
+  ArtMethod* actual_method = nullptr;
   if (invoke_instruction->IsInvokeStaticOrDirect()) {
+    if (invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()) {
+      VLOG(compiler) << "Not inlining a String.<init> method";
+      return false;
+    }
     MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod();
-    mirror::DexCache* const dex_cache = (&caller_dex_file == ref.dex_file)
+    mirror::DexCache* const dex_cache = IsSameDexFile(caller_dex_file, *ref.dex_file)
         ? caller_compilation_unit_.GetDexCache().Get()
         : class_linker->FindDexCache(soa.Self(), *ref.dex_file);
     resolved_method = dex_cache->GetResolvedMethod(
         ref.dex_method_index, class_linker->GetImagePointerSize());
+    // actual_method == resolved_method for direct or static calls.
+    actual_method = resolved_method;
   } else {
     resolved_method = caller_compilation_unit_.GetDexCache().Get()->GetResolvedMethod(
         method_index, class_linker->GetImagePointerSize());
+    if (resolved_method != nullptr) {
+      // Check if we can statically find the method.
+      actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
+    }
   }
 
   if (resolved_method == nullptr) {
@@ -209,54 +301,573 @@
     return false;
   }
 
-  if (!invoke_instruction->IsInvokeStaticOrDirect()) {
-    resolved_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
-    if (resolved_method == nullptr) {
-      VLOG(compiler) << "Interface or virtual call to "
-                     << PrettyMethod(method_index, caller_dex_file)
-                     << " could not be statically determined";
-      return false;
+  if (actual_method != nullptr) {
+    bool result = TryInlineAndReplace(invoke_instruction, actual_method, /* do_rtp */ true);
+    if (result && !invoke_instruction->IsInvokeStaticOrDirect()) {
+      MaybeRecordStat(kInlinedInvokeVirtualOrInterface);
     }
-    // We have found a method, but we need to find where that method is for the caller's
-    // dex file.
-    method_index = FindMethodIndexIn(resolved_method, caller_dex_file, method_index);
-    if (method_index == DexFile::kDexNoIndex) {
-      VLOG(compiler) << "Interface or virtual call to "
-                     << PrettyMethod(resolved_method)
-                     << " cannot be inlined because unaccessible to caller";
+    return result;
+  }
+
+  DCHECK(!invoke_instruction->IsInvokeStaticOrDirect());
+
+  // Check if we can use an inline cache.
+  ArtMethod* caller = graph_->GetArtMethod();
+  if (Runtime::Current()->UseJitCompilation()) {
+    // Under JIT, we should always know the caller.
+    DCHECK(caller != nullptr);
+    ScopedProfilingInfoInlineUse spiis(caller, soa.Self());
+    ProfilingInfo* profiling_info = spiis.GetProfilingInfo();
+    if (profiling_info != nullptr) {
+      const InlineCache& ic = *profiling_info->GetInlineCache(invoke_instruction->GetDexPc());
+      if (ic.IsUninitialized()) {
+        VLOG(compiler) << "Interface or virtual call to "
+                       << PrettyMethod(method_index, caller_dex_file)
+                       << " is not hit and not inlined";
+        return false;
+      } else if (ic.IsMonomorphic()) {
+        MaybeRecordStat(kMonomorphicCall);
+        if (outermost_graph_->IsCompilingOsr()) {
+          // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the
+          // interpreter and it may have seen different receiver types.
+          return TryInlinePolymorphicCall(invoke_instruction, resolved_method, ic);
+        } else {
+          return TryInlineMonomorphicCall(invoke_instruction, resolved_method, ic);
+        }
+      } else if (ic.IsPolymorphic()) {
+        MaybeRecordStat(kPolymorphicCall);
+        return TryInlinePolymorphicCall(invoke_instruction, resolved_method, ic);
+      } else {
+        DCHECK(ic.IsMegamorphic());
+        VLOG(compiler) << "Interface or virtual call to "
+                       << PrettyMethod(method_index, caller_dex_file)
+                       << " is megamorphic and not inlined";
+        MaybeRecordStat(kMegamorphicCall);
+        return false;
+      }
+    }
+  }
+
+  VLOG(compiler) << "Interface or virtual call to "
+                 << PrettyMethod(method_index, caller_dex_file)
+                 << " could not be statically determined";
+  return false;
+}
+
+HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker,
+                                                   HInstruction* receiver,
+                                                   uint32_t dex_pc) const {
+  ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
+  DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
+  HInstanceFieldGet* result = new (graph_->GetArena()) HInstanceFieldGet(
+      receiver,
+      Primitive::kPrimNot,
+      field->GetOffset(),
+      field->IsVolatile(),
+      field->GetDexFieldIndex(),
+      field->GetDeclaringClass()->GetDexClassDefIndex(),
+      *field->GetDexFile(),
+      handles_->NewHandle(field->GetDexCache()),
+      dex_pc);
+  // The class of a field is effectively final, and does not have any memory dependencies.
+  result->SetSideEffects(SideEffects::None());
+  return result;
+}
+
+bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
+                                        ArtMethod* resolved_method,
+                                        const InlineCache& ic) {
+  DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface())
+      << invoke_instruction->DebugName();
+
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  uint32_t class_index = FindClassIndexIn(
+      ic.GetMonomorphicType(), caller_dex_file, caller_compilation_unit_.GetDexCache());
+  if (class_index == DexFile::kDexNoIndex) {
+    VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+                   << " from inline cache is not inlined because its class is not"
+                   << " accessible to the caller";
+    return false;
+  }
+
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  PointerSize pointer_size = class_linker->GetImagePointerSize();
+  if (invoke_instruction->IsInvokeInterface()) {
+    resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForInterface(
+        resolved_method, pointer_size);
+  } else {
+    DCHECK(invoke_instruction->IsInvokeVirtual());
+    resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForVirtual(
+        resolved_method, pointer_size);
+  }
+  DCHECK(resolved_method != nullptr);
+  HInstruction* receiver = invoke_instruction->InputAt(0);
+  HInstruction* cursor = invoke_instruction->GetPrevious();
+  HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
+
+  if (!TryInlineAndReplace(invoke_instruction, resolved_method, /* do_rtp */ false)) {
+    return false;
+  }
+
+  // We successfully inlined, now add a guard.
+  bool is_referrer =
+      (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass());
+  AddTypeGuard(receiver,
+               cursor,
+               bb_cursor,
+               class_index,
+               is_referrer,
+               invoke_instruction,
+               /* with_deoptimization */ true);
+
+  // Run type propagation to get the guard typed, and eventually propagate the
+  // type of the receiver.
+  ReferenceTypePropagation rtp_fixup(graph_,
+                                     outer_compilation_unit_.GetDexCache(),
+                                     handles_,
+                                     /* is_first_run */ false);
+  rtp_fixup.Run();
+
+  MaybeRecordStat(kInlinedMonomorphicCall);
+  return true;
+}
+
+HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
+                                     HInstruction* cursor,
+                                     HBasicBlock* bb_cursor,
+                                     uint32_t class_index,
+                                     bool is_referrer,
+                                     HInstruction* invoke_instruction,
+                                     bool with_deoptimization) {
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
+      class_linker, receiver, invoke_instruction->GetDexPc());
+
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  // Note that we will just compare the classes, so we don't need Java semantics access checks.
+  // Also, the caller of `AddTypeGuard` must have guaranteed that the class is in the dex cache.
+  HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
+                                                               class_index,
+                                                               caller_dex_file,
+                                                               is_referrer,
+                                                               invoke_instruction->GetDexPc(),
+                                                               /* needs_access_check */ false,
+                                                               /* is_in_dex_cache */ true);
+
+  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
+  // TODO: Extend reference type propagation to understand the guard.
+  if (cursor != nullptr) {
+    bb_cursor->InsertInstructionAfter(receiver_class, cursor);
+  } else {
+    bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
+  }
+  bb_cursor->InsertInstructionAfter(load_class, receiver_class);
+  bb_cursor->InsertInstructionAfter(compare, load_class);
+  if (with_deoptimization) {
+    HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
+        compare, invoke_instruction->GetDexPc());
+    bb_cursor->InsertInstructionAfter(deoptimize, compare);
+    deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+  }
+  return compare;
+}
+
+bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
+                                        ArtMethod* resolved_method,
+                                        const InlineCache& ic) {
+  DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface())
+      << invoke_instruction->DebugName();
+
+  if (TryInlinePolymorphicCallToSameTarget(invoke_instruction, resolved_method, ic)) {
+    return true;
+  }
+
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  PointerSize pointer_size = class_linker->GetImagePointerSize();
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+
+  bool all_targets_inlined = true;
+  bool one_target_inlined = false;
+  for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
+    if (ic.GetTypeAt(i) == nullptr) {
+      break;
+    }
+    ArtMethod* method = nullptr;
+    if (invoke_instruction->IsInvokeInterface()) {
+      method = ic.GetTypeAt(i)->FindVirtualMethodForInterface(
+          resolved_method, pointer_size);
+    } else {
+      DCHECK(invoke_instruction->IsInvokeVirtual());
+      method = ic.GetTypeAt(i)->FindVirtualMethodForVirtual(
+          resolved_method, pointer_size);
+    }
+
+    HInstruction* receiver = invoke_instruction->InputAt(0);
+    HInstruction* cursor = invoke_instruction->GetPrevious();
+    HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
+
+    uint32_t class_index = FindClassIndexIn(
+        ic.GetTypeAt(i), caller_dex_file, caller_compilation_unit_.GetDexCache());
+    HInstruction* return_replacement = nullptr;
+    if (class_index == DexFile::kDexNoIndex ||
+        !TryBuildAndInline(invoke_instruction, method, &return_replacement)) {
+      all_targets_inlined = false;
+    } else {
+      one_target_inlined = true;
+      bool is_referrer = (ic.GetTypeAt(i) == outermost_graph_->GetArtMethod()->GetDeclaringClass());
+
+      // If we have inlined all targets before, and this receiver is the last seen,
+      // we deoptimize instead of keeping the original invoke instruction.
+      bool deoptimize = all_targets_inlined &&
+          (i != InlineCache::kIndividualCacheSize - 1) &&
+          (ic.GetTypeAt(i + 1) == nullptr);
+
+      if (outermost_graph_->IsCompilingOsr()) {
+        // We do not support HDeoptimize in OSR methods.
+        deoptimize = false;
+      }
+      HInstruction* compare = AddTypeGuard(
+          receiver, cursor, bb_cursor, class_index, is_referrer, invoke_instruction, deoptimize);
+      if (deoptimize) {
+        if (return_replacement != nullptr) {
+          invoke_instruction->ReplaceWith(return_replacement);
+        }
+        invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+        // Because the inline cache data can be populated concurrently, we force the end of the
+        // iteration. Otherhwise, we could see a new receiver type.
+        break;
+      } else {
+        CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
+      }
+    }
+  }
+
+  if (!one_target_inlined) {
+    VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+                   << " from inline cache is not inlined because none"
+                   << " of its targets could be inlined";
+    return false;
+  }
+  MaybeRecordStat(kInlinedPolymorphicCall);
+
+  // Run type propagation to get the guards typed.
+  ReferenceTypePropagation rtp_fixup(graph_,
+                                     outer_compilation_unit_.GetDexCache(),
+                                     handles_,
+                                     /* is_first_run */ false);
+  rtp_fixup.Run();
+  return true;
+}
+
+void HInliner::CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
+                                                        HInstruction* return_replacement,
+                                                        HInstruction* invoke_instruction) {
+  uint32_t dex_pc = invoke_instruction->GetDexPc();
+  HBasicBlock* cursor_block = compare->GetBlock();
+  HBasicBlock* original_invoke_block = invoke_instruction->GetBlock();
+  ArenaAllocator* allocator = graph_->GetArena();
+
+  // Spit the block after the compare: `cursor_block` will now be the start of the diamond,
+  // and the returned block is the start of the then branch (that could contain multiple blocks).
+  HBasicBlock* then = cursor_block->SplitAfterForInlining(compare);
+
+  // Split the block containing the invoke before and after the invoke. The returned block
+  // of the split before will contain the invoke and will be the otherwise branch of
+  // the diamond. The returned block of the split after will be the merge block
+  // of the diamond.
+  HBasicBlock* end_then = invoke_instruction->GetBlock();
+  HBasicBlock* otherwise = end_then->SplitBeforeForInlining(invoke_instruction);
+  HBasicBlock* merge = otherwise->SplitAfterForInlining(invoke_instruction);
+
+  // If the methods we are inlining return a value, we create a phi in the merge block
+  // that will have the `invoke_instruction and the `return_replacement` as inputs.
+  if (return_replacement != nullptr) {
+    HPhi* phi = new (allocator) HPhi(
+        allocator, kNoRegNumber, 0, HPhi::ToPhiType(invoke_instruction->GetType()), dex_pc);
+    merge->AddPhi(phi);
+    invoke_instruction->ReplaceWith(phi);
+    phi->AddInput(return_replacement);
+    phi->AddInput(invoke_instruction);
+  }
+
+  // Add the control flow instructions.
+  otherwise->AddInstruction(new (allocator) HGoto(dex_pc));
+  end_then->AddInstruction(new (allocator) HGoto(dex_pc));
+  cursor_block->AddInstruction(new (allocator) HIf(compare, dex_pc));
+
+  // Add the newly created blocks to the graph.
+  graph_->AddBlock(then);
+  graph_->AddBlock(otherwise);
+  graph_->AddBlock(merge);
+
+  // Set up successor (and implictly predecessor) relations.
+  cursor_block->AddSuccessor(otherwise);
+  cursor_block->AddSuccessor(then);
+  end_then->AddSuccessor(merge);
+  otherwise->AddSuccessor(merge);
+
+  // Set up dominance information.
+  then->SetDominator(cursor_block);
+  cursor_block->AddDominatedBlock(then);
+  otherwise->SetDominator(cursor_block);
+  cursor_block->AddDominatedBlock(otherwise);
+  merge->SetDominator(cursor_block);
+  cursor_block->AddDominatedBlock(merge);
+
+  // Update the revert post order.
+  size_t index = IndexOfElement(graph_->reverse_post_order_, cursor_block);
+  MakeRoomFor(&graph_->reverse_post_order_, 1, index);
+  graph_->reverse_post_order_[++index] = then;
+  index = IndexOfElement(graph_->reverse_post_order_, end_then);
+  MakeRoomFor(&graph_->reverse_post_order_, 2, index);
+  graph_->reverse_post_order_[++index] = otherwise;
+  graph_->reverse_post_order_[++index] = merge;
+
+
+  graph_->UpdateLoopAndTryInformationOfNewBlock(
+      then, original_invoke_block, /* replace_if_back_edge */ false);
+  graph_->UpdateLoopAndTryInformationOfNewBlock(
+      otherwise, original_invoke_block, /* replace_if_back_edge */ false);
+
+  // In case the original invoke location was a back edge, we need to update
+  // the loop to now have the merge block as a back edge.
+  graph_->UpdateLoopAndTryInformationOfNewBlock(
+      merge, original_invoke_block, /* replace_if_back_edge */ true);
+}
+
+bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
+                                                    ArtMethod* resolved_method,
+                                                    const InlineCache& ic) {
+  // This optimization only works under JIT for now.
+  DCHECK(Runtime::Current()->UseJitCompilation());
+  if (graph_->GetInstructionSet() == kMips64) {
+    // TODO: Support HClassTableGet for mips64.
+    return false;
+  }
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  PointerSize pointer_size = class_linker->GetImagePointerSize();
+
+  DCHECK(resolved_method != nullptr);
+  ArtMethod* actual_method = nullptr;
+  size_t method_index = invoke_instruction->IsInvokeVirtual()
+      ? invoke_instruction->AsInvokeVirtual()->GetVTableIndex()
+      : invoke_instruction->AsInvokeInterface()->GetImtIndex();
+
+  // Check whether we are actually calling the same method among
+  // the different types seen.
+  for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
+    if (ic.GetTypeAt(i) == nullptr) {
+      break;
+    }
+    ArtMethod* new_method = nullptr;
+    if (invoke_instruction->IsInvokeInterface()) {
+      new_method = ic.GetTypeAt(i)->GetImt(pointer_size)->Get(
+          method_index, pointer_size);
+      if (new_method->IsRuntimeMethod()) {
+        // Bail out as soon as we see a conflict trampoline in one of the target's
+        // interface table.
+        return false;
+      }
+    } else {
+      DCHECK(invoke_instruction->IsInvokeVirtual());
+      new_method = ic.GetTypeAt(i)->GetEmbeddedVTableEntry(method_index, pointer_size);
+    }
+    DCHECK(new_method != nullptr);
+    if (actual_method == nullptr) {
+      actual_method = new_method;
+    } else if (actual_method != new_method) {
+      // Different methods, bailout.
+      VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+                     << " from inline cache is not inlined because it resolves"
+                     << " to different methods";
       return false;
     }
   }
 
-  bool same_dex_file =
-      IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *resolved_method->GetDexFile());
+  HInstruction* receiver = invoke_instruction->InputAt(0);
+  HInstruction* cursor = invoke_instruction->GetPrevious();
+  HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
 
-  const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
+  HInstruction* return_replacement = nullptr;
+  if (!TryBuildAndInline(invoke_instruction, actual_method, &return_replacement)) {
+    return false;
+  }
+
+  // We successfully inlined, now add a guard.
+  HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
+      class_linker, receiver, invoke_instruction->GetDexPc());
+
+  Primitive::Type type = Is64BitInstructionSet(graph_->GetInstructionSet())
+      ? Primitive::kPrimLong
+      : Primitive::kPrimInt;
+  HClassTableGet* class_table_get = new (graph_->GetArena()) HClassTableGet(
+      receiver_class,
+      type,
+      invoke_instruction->IsInvokeVirtual() ? HClassTableGet::TableKind::kVTable
+                                            : HClassTableGet::TableKind::kIMTable,
+      method_index,
+      invoke_instruction->GetDexPc());
+
+  HConstant* constant;
+  if (type == Primitive::kPrimLong) {
+    constant = graph_->GetLongConstant(
+        reinterpret_cast<intptr_t>(actual_method), invoke_instruction->GetDexPc());
+  } else {
+    constant = graph_->GetIntConstant(
+        reinterpret_cast<intptr_t>(actual_method), invoke_instruction->GetDexPc());
+  }
+
+  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(class_table_get, constant);
+  if (cursor != nullptr) {
+    bb_cursor->InsertInstructionAfter(receiver_class, cursor);
+  } else {
+    bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
+  }
+  bb_cursor->InsertInstructionAfter(class_table_get, receiver_class);
+  bb_cursor->InsertInstructionAfter(compare, class_table_get);
+
+  if (outermost_graph_->IsCompilingOsr()) {
+    CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
+  } else {
+    // TODO: Extend reference type propagation to understand the guard.
+    HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
+        compare, invoke_instruction->GetDexPc());
+    bb_cursor->InsertInstructionAfter(deoptimize, compare);
+    deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+    if (return_replacement != nullptr) {
+      invoke_instruction->ReplaceWith(return_replacement);
+    }
+    invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+  }
+
+  // Run type propagation to get the guard typed.
+  ReferenceTypePropagation rtp_fixup(graph_,
+                                     outer_compilation_unit_.GetDexCache(),
+                                     handles_,
+                                     /* is_first_run */ false);
+  rtp_fixup.Run();
+
+  MaybeRecordStat(kInlinedPolymorphicCall);
+
+  return true;
+}
+
+bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
+  HInstruction* return_replacement = nullptr;
+  if (!TryBuildAndInline(invoke_instruction, method, &return_replacement)) {
+    if (invoke_instruction->IsInvokeInterface()) {
+      // Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always
+      // better than an invoke-interface because:
+      // 1) In the best case, the interface call has one more indirection (to fetch the IMT).
+      // 2) We will not go to the conflict trampoline with an invoke-virtual.
+      // TODO: Consider sharpening once it is not dependent on the compiler driver.
+      const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+      uint32_t method_index = FindMethodIndexIn(
+          method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
+      if (method_index == DexFile::kDexNoIndex) {
+        return false;
+      }
+      HInvokeVirtual* new_invoke = new (graph_->GetArena()) HInvokeVirtual(
+          graph_->GetArena(),
+          invoke_instruction->GetNumberOfArguments(),
+          invoke_instruction->GetType(),
+          invoke_instruction->GetDexPc(),
+          method_index,
+          method->GetMethodIndex());
+      HInputsRef inputs = invoke_instruction->GetInputs();
+      for (size_t index = 0; index != inputs.size(); ++index) {
+        new_invoke->SetArgumentAt(index, inputs[index]);
+      }
+      invoke_instruction->GetBlock()->InsertInstructionBefore(new_invoke, invoke_instruction);
+      new_invoke->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+      if (invoke_instruction->GetType() == Primitive::kPrimNot) {
+        new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo());
+      }
+      return_replacement = new_invoke;
+    } else {
+      // TODO: Consider sharpening an invoke virtual once it is not dependent on the
+      // compiler driver.
+      return false;
+    }
+  }
+  if (return_replacement != nullptr) {
+    invoke_instruction->ReplaceWith(return_replacement);
+  }
+  invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+  FixUpReturnReferenceType(method, return_replacement);
+  if (do_rtp && ReturnTypeMoreSpecific(invoke_instruction, return_replacement)) {
+    // Actual return value has a more specific type than the method's declared
+    // return type. Run RTP again on the outer graph to propagate it.
+    ReferenceTypePropagation(graph_,
+                             outer_compilation_unit_.GetDexCache(),
+                             handles_,
+                             /* is_first_run */ false).Run();
+  }
+  return true;
+}
+
+bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
+                                 ArtMethod* method,
+                                 HInstruction** return_replacement) {
+  if (method->IsProxyMethod()) {
+    VLOG(compiler) << "Method " << PrettyMethod(method)
+                   << " is not inlined because of unimplemented inline support for proxy methods.";
+    return false;
+  }
+
+  // Check whether we're allowed to inline. The outermost compilation unit is the relevant
+  // dex file here (though the transitivity of an inline chain would allow checking the calller).
+  if (!compiler_driver_->MayInline(method->GetDexFile(),
+                                   outer_compilation_unit_.GetDexFile())) {
+    if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) {
+      VLOG(compiler) << "Successfully replaced pattern of invoke " << PrettyMethod(method);
+      MaybeRecordStat(kReplacedInvokeWithSimplePattern);
+      return true;
+    }
+    VLOG(compiler) << "Won't inline " << PrettyMethod(method) << " in "
+                   << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
+                   << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
+                   << method->GetDexFile()->GetLocation();
+    return false;
+  }
+
+  bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile());
+
+  const DexFile::CodeItem* code_item = method->GetCodeItem();
 
   if (code_item == nullptr) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is not inlined because it is native";
     return false;
   }
 
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
-                   << " is too big to inline";
+    VLOG(compiler) << "Method " << PrettyMethod(method)
+                   << " is too big to inline: "
+                   << code_item->insns_size_in_code_units_
+                   << " > "
+                   << inline_max_code_units;
     return false;
   }
 
   if (code_item->tries_size_ != 0) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is not inlined because of try block";
     return false;
   }
 
-  if (!resolved_method->GetDeclaringClass()->IsVerified()) {
-    uint16_t class_def_idx = resolved_method->GetDeclaringClass()->GetDexClassDefIndex();
-    if (!compiler_driver_->IsMethodVerifiedWithoutFailures(
-          resolved_method->GetDexMethodIndex(), class_def_idx, *resolved_method->GetDexFile())) {
-      VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+  if (!method->IsCompilable()) {
+    VLOG(compiler) << "Method " << PrettyMethod(method)
+                   << " has soft failures un-handled by the compiler, so it cannot be inlined";
+  }
+
+  if (!method->GetDeclaringClass()->IsVerified()) {
+    uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex();
+    if (Runtime::Current()->UseJitCompilation() ||
+        !compiler_driver_->IsMethodVerifiedWithoutFailures(
+            method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
+      VLOG(compiler) << "Method " << PrettyMethod(method)
                      << " couldn't be verified, so it cannot be inlined";
       return false;
     }
@@ -266,41 +877,230 @@
       invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
     // Case of a static method that cannot be inlined because it implicitly
     // requires an initialization check of its declaring class.
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is not inlined because it is static and requires a clinit"
                    << " check that cannot be emitted due to Dex cache limitations";
     return false;
   }
 
-  if (!TryBuildAndInline(resolved_method, invoke_instruction, same_dex_file)) {
+  if (!TryBuildAndInlineHelper(invoke_instruction, method, same_dex_file, return_replacement)) {
     return false;
   }
 
-  VLOG(compiler) << "Successfully inlined " << PrettyMethod(method_index, caller_dex_file);
+  VLOG(compiler) << "Successfully inlined " << PrettyMethod(method);
   MaybeRecordStat(kInlinedInvoke);
   return true;
 }
 
-bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
-                                 HInvoke* invoke_instruction,
-                                 bool same_dex_file) {
+static HInstruction* GetInvokeInputForArgVRegIndex(HInvoke* invoke_instruction,
+                                                   size_t arg_vreg_index)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  size_t input_index = 0;
+  for (size_t i = 0; i < arg_vreg_index; ++i, ++input_index) {
+    DCHECK_LT(input_index, invoke_instruction->GetNumberOfArguments());
+    if (Primitive::Is64BitType(invoke_instruction->InputAt(input_index)->GetType())) {
+      ++i;
+      DCHECK_NE(i, arg_vreg_index);
+    }
+  }
+  DCHECK_LT(input_index, invoke_instruction->GetNumberOfArguments());
+  return invoke_instruction->InputAt(input_index);
+}
+
+// Try to recognize known simple patterns and replace invoke call with appropriate instructions.
+bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction,
+                                      ArtMethod* resolved_method,
+                                      HInstruction** return_replacement) {
+  InlineMethod inline_method;
+  if (!InlineMethodAnalyser::AnalyseMethodCode(resolved_method, &inline_method)) {
+    return false;
+  }
+
+  switch (inline_method.opcode) {
+    case kInlineOpNop:
+      DCHECK_EQ(invoke_instruction->GetType(), Primitive::kPrimVoid);
+      *return_replacement = nullptr;
+      break;
+    case kInlineOpReturnArg:
+      *return_replacement = GetInvokeInputForArgVRegIndex(invoke_instruction,
+                                                          inline_method.d.return_data.arg);
+      break;
+    case kInlineOpNonWideConst:
+      if (resolved_method->GetShorty()[0] == 'L') {
+        DCHECK_EQ(inline_method.d.data, 0u);
+        *return_replacement = graph_->GetNullConstant();
+      } else {
+        *return_replacement = graph_->GetIntConstant(static_cast<int32_t>(inline_method.d.data));
+      }
+      break;
+    case kInlineOpIGet: {
+      const InlineIGetIPutData& data = inline_method.d.ifield_data;
+      if (data.method_is_static || data.object_arg != 0u) {
+        // TODO: Needs null check.
+        return false;
+      }
+      Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache()));
+      HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, data.object_arg);
+      HInstanceFieldGet* iget = CreateInstanceFieldGet(dex_cache, data.field_idx, obj);
+      DCHECK_EQ(iget->GetFieldOffset().Uint32Value(), data.field_offset);
+      DCHECK_EQ(iget->IsVolatile() ? 1u : 0u, data.is_volatile);
+      invoke_instruction->GetBlock()->InsertInstructionBefore(iget, invoke_instruction);
+      *return_replacement = iget;
+      break;
+    }
+    case kInlineOpIPut: {
+      const InlineIGetIPutData& data = inline_method.d.ifield_data;
+      if (data.method_is_static || data.object_arg != 0u) {
+        // TODO: Needs null check.
+        return false;
+      }
+      Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache()));
+      HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, data.object_arg);
+      HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, data.src_arg);
+      HInstanceFieldSet* iput = CreateInstanceFieldSet(dex_cache, data.field_idx, obj, value);
+      DCHECK_EQ(iput->GetFieldOffset().Uint32Value(), data.field_offset);
+      DCHECK_EQ(iput->IsVolatile() ? 1u : 0u, data.is_volatile);
+      invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction);
+      if (data.return_arg_plus1 != 0u) {
+        size_t return_arg = data.return_arg_plus1 - 1u;
+        *return_replacement = GetInvokeInputForArgVRegIndex(invoke_instruction, return_arg);
+      }
+      break;
+    }
+    case kInlineOpConstructor: {
+      const InlineConstructorData& data = inline_method.d.constructor_data;
+      // Get the indexes to arrays for easier processing.
+      uint16_t iput_field_indexes[] = {
+          data.iput0_field_index, data.iput1_field_index, data.iput2_field_index
+      };
+      uint16_t iput_args[] = { data.iput0_arg, data.iput1_arg, data.iput2_arg };
+      static_assert(arraysize(iput_args) == arraysize(iput_field_indexes), "Size mismatch");
+      // Count valid field indexes.
+      size_t number_of_iputs = 0u;
+      while (number_of_iputs != arraysize(iput_field_indexes) &&
+          iput_field_indexes[number_of_iputs] != DexFile::kDexNoIndex16) {
+        // Check that there are no duplicate valid field indexes.
+        DCHECK_EQ(0, std::count(iput_field_indexes + number_of_iputs + 1,
+                                iput_field_indexes + arraysize(iput_field_indexes),
+                                iput_field_indexes[number_of_iputs]));
+        ++number_of_iputs;
+      }
+      // Check that there are no valid field indexes in the rest of the array.
+      DCHECK_EQ(0, std::count_if(iput_field_indexes + number_of_iputs,
+                                 iput_field_indexes + arraysize(iput_field_indexes),
+                                 [](uint16_t index) { return index != DexFile::kDexNoIndex16; }));
+
+      // Create HInstanceFieldSet for each IPUT that stores non-zero data.
+      Handle<mirror::DexCache> dex_cache;
+      HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, /* this */ 0u);
+      bool needs_constructor_barrier = false;
+      for (size_t i = 0; i != number_of_iputs; ++i) {
+        HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, iput_args[i]);
+        if (!value->IsConstant() || !value->AsConstant()->IsZeroBitPattern()) {
+          if (dex_cache.GetReference() == nullptr) {
+            dex_cache = handles_->NewHandle(resolved_method->GetDexCache());
+          }
+          uint16_t field_index = iput_field_indexes[i];
+          HInstanceFieldSet* iput = CreateInstanceFieldSet(dex_cache, field_index, obj, value);
+          invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction);
+
+          // Check whether the field is final. If it is, we need to add a barrier.
+          PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+          ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
+          DCHECK(resolved_field != nullptr);
+          if (resolved_field->IsFinal()) {
+            needs_constructor_barrier = true;
+          }
+        }
+      }
+      if (needs_constructor_barrier) {
+        HMemoryBarrier* barrier = new (graph_->GetArena()) HMemoryBarrier(kStoreStore, kNoDexPc);
+        invoke_instruction->GetBlock()->InsertInstructionBefore(barrier, invoke_instruction);
+      }
+      *return_replacement = nullptr;
+      break;
+    }
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+  return true;
+}
+
+HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle<mirror::DexCache> dex_cache,
+                                                    uint32_t field_index,
+                                                    HInstruction* obj)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+  ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
+  DCHECK(resolved_field != nullptr);
+  HInstanceFieldGet* iget = new (graph_->GetArena()) HInstanceFieldGet(
+      obj,
+      resolved_field->GetTypeAsPrimitiveType(),
+      resolved_field->GetOffset(),
+      resolved_field->IsVolatile(),
+      field_index,
+      resolved_field->GetDeclaringClass()->GetDexClassDefIndex(),
+      *dex_cache->GetDexFile(),
+      dex_cache,
+      // Read barrier generates a runtime call in slow path and we need a valid
+      // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537.
+      /* dex_pc */ 0);
+  if (iget->GetType() == Primitive::kPrimNot) {
+    // Use the same dex_cache that we used for field lookup as the hint_dex_cache.
+    ReferenceTypePropagation rtp(graph_, dex_cache, handles_, /* is_first_run */ false);
+    rtp.Visit(iget);
+  }
+  return iget;
+}
+
+HInstanceFieldSet* HInliner::CreateInstanceFieldSet(Handle<mirror::DexCache> dex_cache,
+                                                    uint32_t field_index,
+                                                    HInstruction* obj,
+                                                    HInstruction* value)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+  ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
+  DCHECK(resolved_field != nullptr);
+  HInstanceFieldSet* iput = new (graph_->GetArena()) HInstanceFieldSet(
+      obj,
+      value,
+      resolved_field->GetTypeAsPrimitiveType(),
+      resolved_field->GetOffset(),
+      resolved_field->IsVolatile(),
+      field_index,
+      resolved_field->GetDeclaringClass()->GetDexClassDefIndex(),
+      *dex_cache->GetDexFile(),
+      dex_cache,
+      // Read barrier generates a runtime call in slow path and we need a valid
+      // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537.
+      /* dex_pc */ 0);
+  return iput;
+}
+
+bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
+                                       ArtMethod* resolved_method,
+                                       bool same_dex_file,
+                                       HInstruction** return_replacement) {
   ScopedObjectAccess soa(Thread::Current());
   const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
   const DexFile& callee_dex_file = *resolved_method->GetDexFile();
   uint32_t method_index = resolved_method->GetDexMethodIndex();
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
   Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(handles_->NewHandle(
+      resolved_method->GetDeclaringClass()->GetClassLoader()));
+
   DexCompilationUnit dex_compilation_unit(
-    nullptr,
-    caller_compilation_unit_.GetClassLoader(),
-    class_linker,
-    callee_dex_file,
-    code_item,
-    resolved_method->GetDeclaringClass()->GetDexClassDefIndex(),
-    method_index,
-    resolved_method->GetAccessFlags(),
-    compiler_driver_->GetVerifiedMethod(&callee_dex_file, method_index),
-    dex_cache);
+      class_loader.ToJObject(),
+      class_linker,
+      callee_dex_file,
+      code_item,
+      resolved_method->GetDeclaringClass()->GetDexClassDefIndex(),
+      method_index,
+      resolved_method->GetAccessFlags(),
+      /* verified_method */ nullptr,
+      dex_cache);
 
   bool requires_ctor_barrier = false;
 
@@ -327,6 +1127,8 @@
     // at runtime, we change this call as if it was a virtual call.
     invoke_type = kVirtual;
   }
+
+  const int32_t caller_instruction_counter = graph_->GetCurrentInstructionId();
   HGraph* callee_graph = new (graph_->GetArena()) HGraph(
       graph_->GetArena(),
       callee_dex_file,
@@ -335,19 +1137,28 @@
       compiler_driver_->GetInstructionSet(),
       invoke_type,
       graph_->IsDebuggable(),
-      graph_->GetCurrentInstructionId());
+      /* osr */ false,
+      caller_instruction_counter);
+  callee_graph->SetArtMethod(resolved_method);
 
-  OptimizingCompilerStats inline_stats;
+  // When they are needed, allocate `inline_stats` on the heap instead
+  // of on the stack, as Clang might produce a stack frame too large
+  // for this function, that would not fit the requirements of the
+  // `-Wframe-larger-than` option.
+  std::unique_ptr<OptimizingCompilerStats> inline_stats =
+      (stats_ == nullptr) ? nullptr : MakeUnique<OptimizingCompilerStats>();
   HGraphBuilder builder(callee_graph,
                         &dex_compilation_unit,
                         &outer_compilation_unit_,
                         resolved_method->GetDexFile(),
+                        *code_item,
                         compiler_driver_,
-                        &inline_stats,
+                        inline_stats.get(),
                         resolved_method->GetQuickenedInfo(),
-                        dex_cache);
+                        dex_cache,
+                        handles_);
 
-  if (!builder.BuildGraph(*code_item)) {
+  if (builder.BuildGraph() != kAnalysisSuccess) {
     VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
                    << " could not be built, so cannot be inlined";
     return false;
@@ -360,12 +1171,6 @@
     return false;
   }
 
-  if (!callee_graph->TryBuildingSsa()) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
-                   << " could not be transformed to SSA";
-    return false;
-  }
-
   size_t parameter_index = 0;
   for (HInstructionIterator instructions(callee_graph->GetEntryBlock()->GetInstructions());
        !instructions.Done();
@@ -392,38 +1197,19 @@
     }
   }
 
-  // Run simple optimizations on the graph.
-  HDeadCodeElimination dce(callee_graph, stats_);
-  HConstantFolding fold(callee_graph);
-  ReferenceTypePropagation type_propagation(callee_graph, handles_);
-  InstructionSimplifier simplify(callee_graph, stats_);
-  IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_);
-
-  HOptimization* optimizations[] = {
-    &intrinsics,
-    &type_propagation,
-    &simplify,
-    &dce,
-    &fold,
-  };
-
-  for (size_t i = 0; i < arraysize(optimizations); ++i) {
-    HOptimization* optimization = optimizations[i];
-    optimization->Run();
+  // We have replaced formal arguments with actual arguments. If actual types
+  // are more specific than the declared ones, run RTP again on the inner graph.
+  if (ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) {
+    ReferenceTypePropagation(callee_graph,
+                             dex_compilation_unit.GetDexCache(),
+                             handles_,
+                             /* is_first_run */ false).Run();
   }
 
   size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
-  if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
-    HInliner inliner(callee_graph,
-                     outer_compilation_unit_,
-                     dex_compilation_unit,
-                     compiler_driver_,
-                     handles_,
-                     stats_,
-                     depth_ + 1);
-    inliner.Run();
-    number_of_instructions_budget += inliner.number_of_inlined_instructions_;
-  }
+  size_t number_of_inlined_instructions =
+      RunOptimizations(callee_graph, code_item, dex_compilation_unit);
+  number_of_instructions_budget += number_of_inlined_instructions;
 
   // TODO: We should abort only if all predecessors throw. However,
   // HGraph::InlineInto currently does not handle an exit block with
@@ -451,29 +1237,35 @@
   HReversePostOrderIterator it(*callee_graph);
   it.Advance();  // Past the entry block, it does not contain instructions that prevent inlining.
   size_t number_of_instructions = 0;
+
+  bool can_inline_environment =
+      total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters;
+
   for (; !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
-    if (block->IsLoopHeader()) {
+
+    if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
+      // Don't inline methods with irreducible loops, they could prevent some
+      // optimizations to run.
       VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
-                     << " could not be inlined because it contains a loop";
+                     << " could not be inlined because it contains an irreducible loop";
       return false;
     }
 
     for (HInstructionIterator instr_it(block->GetInstructions());
          !instr_it.Done();
          instr_it.Advance()) {
-      if (number_of_instructions++ ==  number_of_instructions_budget) {
+      if (number_of_instructions++ == number_of_instructions_budget) {
         VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
-                       << " could not be inlined because it is too big.";
+                       << " is not inlined because its caller has reached"
+                       << " its instruction budget limit.";
         return false;
       }
       HInstruction* current = instr_it.Current();
-
-      if (current->IsInvokeInterface()) {
-        // Disable inlining of interface calls. The cost in case of entering the
-        // resolution conflict is currently too high.
+      if (!can_inline_environment && current->NeedsEnvironment()) {
         VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
-                       << " could not be inlined because it has an interface call.";
+                       << " is not inlined because its caller has reached"
+                       << " its environment budget limit.";
         return false;
       }
 
@@ -484,49 +1276,194 @@
         return false;
       }
 
-      if (!same_dex_file && current->NeedsDexCache()) {
+      if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) {
         VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
                        << " could not be inlined because " << current->DebugName()
                        << " it is in a different dex file and requires access to the dex cache";
         return false;
       }
+
+      if (current->IsNewInstance() &&
+          (current->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectWithAccessCheck)) {
+        VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+                       << " could not be inlined because it is using an entrypoint"
+                       << " with access checks";
+        // Allocation entrypoint does not handle inlined frames.
+        return false;
+      }
+
+      if (current->IsNewArray() &&
+          (current->AsNewArray()->GetEntrypoint() == kQuickAllocArrayWithAccessCheck)) {
+        VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+                       << " could not be inlined because it is using an entrypoint"
+                       << " with access checks";
+        // Allocation entrypoint does not handle inlined frames.
+        return false;
+      }
+
+      if (current->IsUnresolvedStaticFieldGet() ||
+          current->IsUnresolvedInstanceFieldGet() ||
+          current->IsUnresolvedStaticFieldSet() ||
+          current->IsUnresolvedInstanceFieldSet()) {
+        // Entrypoint for unresolved fields does not handle inlined frames.
+        VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+                       << " could not be inlined because it is using an unresolved"
+                       << " entrypoint";
+        return false;
+      }
     }
   }
   number_of_inlined_instructions_ += number_of_instructions;
 
-  HInstruction* return_replacement = callee_graph->InlineInto(graph_, invoke_instruction);
-  if (return_replacement != nullptr) {
-    DCHECK_EQ(graph_, return_replacement->GetBlock()->GetGraph());
+  DCHECK_EQ(caller_instruction_counter, graph_->GetCurrentInstructionId())
+      << "No instructions can be added to the outer graph while inner graph is being built";
+
+  const int32_t callee_instruction_counter = callee_graph->GetCurrentInstructionId();
+  graph_->SetCurrentInstructionId(callee_instruction_counter);
+  *return_replacement = callee_graph->InlineInto(graph_, invoke_instruction);
+
+  DCHECK_EQ(callee_instruction_counter, callee_graph->GetCurrentInstructionId())
+      << "No instructions can be added to the inner graph during inlining into the outer graph";
+
+  return true;
+}
+
+size_t HInliner::RunOptimizations(HGraph* callee_graph,
+                                  const DexFile::CodeItem* code_item,
+                                  const DexCompilationUnit& dex_compilation_unit) {
+  // Note: if the outermost_graph_ is being compiled OSR, we should not run any
+  // optimization that could lead to a HDeoptimize. The following optimizations do not.
+  HDeadCodeElimination dce(callee_graph, stats_);
+  HConstantFolding fold(callee_graph);
+  HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
+  InstructionSimplifier simplify(callee_graph, stats_);
+  IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_, stats_);
+
+  HOptimization* optimizations[] = {
+    &intrinsics,
+    &sharpening,
+    &simplify,
+    &fold,
+    &dce,
+  };
+
+  for (size_t i = 0; i < arraysize(optimizations); ++i) {
+    HOptimization* optimization = optimizations[i];
+    optimization->Run();
   }
 
-  // When merging the graph we might create a new NullConstant in the caller graph which does
-  // not have the chance to be typed. We assign the correct type here so that we can keep the
-  // assertion that every reference has a valid type. This also simplifies checks along the way.
-  HNullConstant* null_constant = graph_->GetNullConstant();
-  if (!null_constant->GetReferenceTypeInfo().IsValid()) {
-    ReferenceTypeInfo::TypeHandle obj_handle =
-            handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject));
-    null_constant->SetReferenceTypeInfo(
-            ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
+  size_t number_of_inlined_instructions = 0u;
+  if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
+    HInliner inliner(callee_graph,
+                     outermost_graph_,
+                     codegen_,
+                     outer_compilation_unit_,
+                     dex_compilation_unit,
+                     compiler_driver_,
+                     handles_,
+                     stats_,
+                     total_number_of_dex_registers_ + code_item->registers_size_,
+                     depth_ + 1);
+    inliner.Run();
+    number_of_inlined_instructions += inliner.number_of_inlined_instructions_;
   }
 
-  if ((return_replacement != nullptr)
-      && (return_replacement->GetType() == Primitive::kPrimNot)) {
-    if (!return_replacement->GetReferenceTypeInfo().IsValid()) {
-      // Make sure that we have a valid type for the return. We may get an invalid one when
-      // we inline invokes with multiple branches and create a Phi for the result.
-      // TODO: we could be more precise by merging the phi inputs but that requires
-      // some functionality from the reference type propagation.
-      DCHECK(return_replacement->IsPhi());
-      size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-      ReferenceTypeInfo::TypeHandle return_handle =
-        handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size));
-      return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create(
-         return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
+  return number_of_inlined_instructions;
+}
+
+static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti,
+                                      bool declared_can_be_null,
+                                      HInstruction* actual_obj)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (declared_can_be_null && !actual_obj->CanBeNull()) {
+    return true;
+  }
+
+  ReferenceTypeInfo actual_rti = actual_obj->GetReferenceTypeInfo();
+  return (actual_rti.IsExact() && !declared_rti.IsExact()) ||
+         declared_rti.IsStrictSupertypeOf(actual_rti);
+}
+
+ReferenceTypeInfo HInliner::GetClassRTI(mirror::Class* klass) {
+  return ReferenceTypePropagation::IsAdmissible(klass)
+      ? ReferenceTypeInfo::Create(handles_->NewHandle(klass))
+      : graph_->GetInexactObjectRti();
+}
+
+bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method) {
+  // If this is an instance call, test whether the type of the `this` argument
+  // is more specific than the class which declares the method.
+  if (!resolved_method->IsStatic()) {
+    if (IsReferenceTypeRefinement(GetClassRTI(resolved_method->GetDeclaringClass()),
+                                  /* declared_can_be_null */ false,
+                                  invoke_instruction->InputAt(0u))) {
+      return true;
     }
   }
 
-  return true;
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+
+  // Iterate over the list of parameter types and test whether any of the
+  // actual inputs has a more specific reference type than the type declared in
+  // the signature.
+  const DexFile::TypeList* param_list = resolved_method->GetParameterTypeList();
+  for (size_t param_idx = 0,
+              input_idx = resolved_method->IsStatic() ? 0 : 1,
+              e = (param_list == nullptr ? 0 : param_list->Size());
+       param_idx < e;
+       ++param_idx, ++input_idx) {
+    HInstruction* input = invoke_instruction->InputAt(input_idx);
+    if (input->GetType() == Primitive::kPrimNot) {
+      mirror::Class* param_cls = resolved_method->GetDexCacheResolvedType(
+          param_list->GetTypeItem(param_idx).type_idx_,
+          pointer_size);
+      if (IsReferenceTypeRefinement(GetClassRTI(param_cls),
+                                    /* declared_can_be_null */ true,
+                                    input)) {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool HInliner::ReturnTypeMoreSpecific(HInvoke* invoke_instruction,
+                                      HInstruction* return_replacement) {
+  // Check the integrity of reference types and run another type propagation if needed.
+  if (return_replacement != nullptr) {
+    if (return_replacement->GetType() == Primitive::kPrimNot) {
+      // Test if the return type is a refinement of the declared return type.
+      if (IsReferenceTypeRefinement(invoke_instruction->GetReferenceTypeInfo(),
+                                    /* declared_can_be_null */ true,
+                                    return_replacement)) {
+        return true;
+      }
+    } else if (return_replacement->IsInstanceOf()) {
+      // Inlining InstanceOf into an If may put a tighter bound on reference types.
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void HInliner::FixUpReturnReferenceType(ArtMethod* resolved_method,
+                                        HInstruction* return_replacement) {
+  if (return_replacement != nullptr) {
+    if (return_replacement->GetType() == Primitive::kPrimNot) {
+      if (!return_replacement->GetReferenceTypeInfo().IsValid()) {
+        // Make sure that we have a valid type for the return. We may get an invalid one when
+        // we inline invokes with multiple branches and create a Phi for the result.
+        // TODO: we could be more precise by merging the phi inputs but that requires
+        // some functionality from the reference type propagation.
+        DCHECK(return_replacement->IsPhi());
+        PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+        mirror::Class* cls = resolved_method->GetReturnType(false /* resolve */, pointer_size);
+        return_replacement->SetReferenceTypeInfo(GetClassRTI(cls));
+      }
+    }
+  }
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index bce5915..02d3a5f 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -22,25 +22,33 @@
 
 namespace art {
 
+class CodeGenerator;
 class CompilerDriver;
 class DexCompilationUnit;
 class HGraph;
 class HInvoke;
+class InlineCache;
 class OptimizingCompilerStats;
 
 class HInliner : public HOptimization {
  public:
   HInliner(HGraph* outer_graph,
+           HGraph* outermost_graph,
+           CodeGenerator* codegen,
            const DexCompilationUnit& outer_compilation_unit,
            const DexCompilationUnit& caller_compilation_unit,
            CompilerDriver* compiler_driver,
            StackHandleScopeCollection* handles,
            OptimizingCompilerStats* stats,
-           size_t depth = 0)
+           size_t total_number_of_dex_registers,
+           size_t depth)
       : HOptimization(outer_graph, kInlinerPassName, stats),
+        outermost_graph_(outermost_graph),
         outer_compilation_unit_(outer_compilation_unit),
         caller_compilation_unit_(caller_compilation_unit),
+        codegen_(codegen),
         compiler_driver_(compiler_driver),
+        total_number_of_dex_registers_(total_number_of_dex_registers),
         depth_(depth),
         number_of_inlined_instructions_(0),
         handles_(handles) {}
@@ -51,13 +59,142 @@
 
  private:
   bool TryInline(HInvoke* invoke_instruction);
-  bool TryBuildAndInline(ArtMethod* resolved_method,
-                         HInvoke* invoke_instruction,
-                         bool same_dex_file);
 
+  // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
+  // reference type propagation can run after the inlining. If the inlining is successful, this
+  // method will replace and remove the `invoke_instruction`.
+  bool TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool TryBuildAndInline(HInvoke* invoke_instruction,
+                         ArtMethod* resolved_method,
+                         HInstruction** return_replacement)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool TryBuildAndInlineHelper(HInvoke* invoke_instruction,
+                               ArtMethod* resolved_method,
+                               bool same_dex_file,
+                               HInstruction** return_replacement);
+
+  // Run simple optimizations on `callee_graph`.
+  // Returns the number of inlined instructions.
+  size_t RunOptimizations(HGraph* callee_graph,
+                          const DexFile::CodeItem* code_item,
+                          const DexCompilationUnit& dex_compilation_unit);
+
+  // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
+  bool TryPatternSubstitution(HInvoke* invoke_instruction,
+                              ArtMethod* resolved_method,
+                              HInstruction** return_replacement)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Create a new HInstanceFieldGet.
+  HInstanceFieldGet* CreateInstanceFieldGet(Handle<mirror::DexCache> dex_cache,
+                                            uint32_t field_index,
+                                            HInstruction* obj);
+  // Create a new HInstanceFieldSet.
+  HInstanceFieldSet* CreateInstanceFieldSet(Handle<mirror::DexCache> dex_cache,
+                                            uint32_t field_index,
+                                            HInstruction* obj,
+                                            HInstruction* value);
+
+  // Try to inline the target of a monomorphic call. If successful, the code
+  // in the graph will look like:
+  // if (receiver.getClass() != ic.GetMonomorphicType()) deopt
+  // ... // inlined code
+  bool TryInlineMonomorphicCall(HInvoke* invoke_instruction,
+                                ArtMethod* resolved_method,
+                                const InlineCache& ic)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to inline targets of a polymorphic call.
+  bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
+                                ArtMethod* resolved_method,
+                                const InlineCache& ic)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
+                                            ArtMethod* resolved_method,
+                                            const InlineCache& ic)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+
+  HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
+                                           HInstruction* receiver,
+                                           uint32_t dex_pc) const
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Creates an instance of ReferenceTypeInfo from `klass` if `klass` is
+  // admissible (see ReferenceTypePropagation::IsAdmissible for details).
+  // Otherwise returns inexact Object RTI.
+  ReferenceTypeInfo GetClassRTI(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool ReturnTypeMoreSpecific(HInvoke* invoke_instruction, HInstruction* return_replacement)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Add a type guard on the given `receiver`. This will add to the graph:
+  // i0 = HFieldGet(receiver, klass)
+  // i1 = HLoadClass(class_index, is_referrer)
+  // i2 = HNotEqual(i0, i1)
+  //
+  // And if `with_deoptimization` is true:
+  // HDeoptimize(i2)
+  //
+  // The method returns the `HNotEqual`, that will be used for polymorphic inlining.
+  HInstruction* AddTypeGuard(HInstruction* receiver,
+                             HInstruction* cursor,
+                             HBasicBlock* bb_cursor,
+                             uint32_t class_index,
+                             bool is_referrer,
+                             HInstruction* invoke_instruction,
+                             bool with_deoptimization)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  /*
+   * Ad-hoc implementation for implementing a diamond pattern in the graph for
+   * polymorphic inlining:
+   * 1) `compare` becomes the input of the new `HIf`.
+   * 2) Everything up until `invoke_instruction` is in the then branch (could
+   *    contain multiple blocks).
+   * 3) `invoke_instruction` is moved to the otherwise block.
+   * 4) If `return_replacement` is not null, the merge block will have
+   *    a phi whose inputs are `return_replacement` and `invoke_instruction`.
+   *
+   * Before:
+   *             Block1
+   *             compare
+   *              ...
+   *         invoke_instruction
+   *
+   * After:
+   *            Block1
+   *            compare
+   *              if
+   *          /        \
+   *         /          \
+   *   Then block    Otherwise block
+   *      ...       invoke_instruction
+   *       \              /
+   *        \            /
+   *          Merge block
+   *  phi(return_replacement, invoke_instruction)
+   */
+  void CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
+                                                HInstruction* return_replacement,
+                                                HInstruction* invoke_instruction);
+
+  HGraph* const outermost_graph_;
   const DexCompilationUnit& outer_compilation_unit_;
   const DexCompilationUnit& caller_compilation_unit_;
+  CodeGenerator* const codegen_;
   CompilerDriver* const compiler_driver_;
+  const size_t total_number_of_dex_registers_;
   const size_t depth_;
   size_t number_of_inlined_instructions_;
   StackHandleScopeCollection* const handles_;
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
new file mode 100644
index 0000000..e5dab56
--- /dev/null
+++ b/compiler/optimizing/instruction_builder.cc
@@ -0,0 +1,2700 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_builder.h"
+
+#include "art_method-inl.h"
+#include "bytecode_utils.h"
+#include "class_linker.h"
+#include "dex_instruction-inl.h"
+#include "driver/compiler_options.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+void HInstructionBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
+  if (compilation_stats_ != nullptr) {
+    compilation_stats_->RecordStat(compilation_stat);
+  }
+}
+
+HBasicBlock* HInstructionBuilder::FindBlockStartingAt(uint32_t dex_pc) const {
+  return block_builder_->GetBlockAt(dex_pc);
+}
+
+ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsFor(HBasicBlock* block) {
+  ArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()];
+  const size_t vregs = graph_->GetNumberOfVRegs();
+  if (locals->size() != vregs) {
+    locals->resize(vregs, nullptr);
+
+    if (block->IsCatchBlock()) {
+      // We record incoming inputs of catch phis at throwing instructions and
+      // must therefore eagerly create the phis. Phis for undefined vregs will
+      // be deleted when the first throwing instruction with the vreg undefined
+      // is encountered. Unused phis will be removed by dead phi analysis.
+      for (size_t i = 0; i < vregs; ++i) {
+        // No point in creating the catch phi if it is already undefined at
+        // the first throwing instruction.
+        HInstruction* current_local_value = (*current_locals_)[i];
+        if (current_local_value != nullptr) {
+          HPhi* phi = new (arena_) HPhi(
+              arena_,
+              i,
+              0,
+              current_local_value->GetType());
+          block->AddPhi(phi);
+          (*locals)[i] = phi;
+        }
+      }
+    }
+  }
+  return locals;
+}
+
+HInstruction* HInstructionBuilder::ValueOfLocalAt(HBasicBlock* block, size_t local) {
+  ArenaVector<HInstruction*>* locals = GetLocalsFor(block);
+  return (*locals)[local];
+}
+
+void HInstructionBuilder::InitializeBlockLocals() {
+  current_locals_ = GetLocalsFor(current_block_);
+
+  if (current_block_->IsCatchBlock()) {
+    // Catch phis were already created and inputs collected from throwing sites.
+    if (kIsDebugBuild) {
+      // Make sure there was at least one throwing instruction which initialized
+      // locals (guaranteed by HGraphBuilder) and that all try blocks have been
+      // visited already (from HTryBoundary scoping and reverse post order).
+      bool catch_block_visited = false;
+      for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+        HBasicBlock* current = it.Current();
+        if (current == current_block_) {
+          catch_block_visited = true;
+        } else if (current->IsTryBlock()) {
+          const HTryBoundary& try_entry = current->GetTryCatchInformation()->GetTryEntry();
+          if (try_entry.HasExceptionHandler(*current_block_)) {
+            DCHECK(!catch_block_visited) << "Catch block visited before its try block.";
+          }
+        }
+      }
+      DCHECK_EQ(current_locals_->size(), graph_->GetNumberOfVRegs())
+          << "No instructions throwing into a live catch block.";
+    }
+  } else if (current_block_->IsLoopHeader()) {
+    // If the block is a loop header, we know we only have visited the pre header
+    // because we are visiting in reverse post order. We create phis for all initialized
+    // locals from the pre header. Their inputs will be populated at the end of
+    // the analysis.
+    for (size_t local = 0; local < current_locals_->size(); ++local) {
+      HInstruction* incoming =
+          ValueOfLocalAt(current_block_->GetLoopInformation()->GetPreHeader(), local);
+      if (incoming != nullptr) {
+        HPhi* phi = new (arena_) HPhi(
+            arena_,
+            local,
+            0,
+            incoming->GetType());
+        current_block_->AddPhi(phi);
+        (*current_locals_)[local] = phi;
+      }
+    }
+
+    // Save the loop header so that the last phase of the analysis knows which
+    // blocks need to be updated.
+    loop_headers_.push_back(current_block_);
+  } else if (current_block_->GetPredecessors().size() > 0) {
+    // All predecessors have already been visited because we are visiting in reverse post order.
+    // We merge the values of all locals, creating phis if those values differ.
+    for (size_t local = 0; local < current_locals_->size(); ++local) {
+      bool one_predecessor_has_no_value = false;
+      bool is_different = false;
+      HInstruction* value = ValueOfLocalAt(current_block_->GetPredecessors()[0], local);
+
+      for (HBasicBlock* predecessor : current_block_->GetPredecessors()) {
+        HInstruction* current = ValueOfLocalAt(predecessor, local);
+        if (current == nullptr) {
+          one_predecessor_has_no_value = true;
+          break;
+        } else if (current != value) {
+          is_different = true;
+        }
+      }
+
+      if (one_predecessor_has_no_value) {
+        // If one predecessor has no value for this local, we trust the verifier has
+        // successfully checked that there is a store dominating any read after this block.
+        continue;
+      }
+
+      if (is_different) {
+        HInstruction* first_input = ValueOfLocalAt(current_block_->GetPredecessors()[0], local);
+        HPhi* phi = new (arena_) HPhi(
+            arena_,
+            local,
+            current_block_->GetPredecessors().size(),
+            first_input->GetType());
+        for (size_t i = 0; i < current_block_->GetPredecessors().size(); i++) {
+          HInstruction* pred_value = ValueOfLocalAt(current_block_->GetPredecessors()[i], local);
+          phi->SetRawInputAt(i, pred_value);
+        }
+        current_block_->AddPhi(phi);
+        value = phi;
+      }
+      (*current_locals_)[local] = value;
+    }
+  }
+}
+
+void HInstructionBuilder::PropagateLocalsToCatchBlocks() {
+  const HTryBoundary& try_entry = current_block_->GetTryCatchInformation()->GetTryEntry();
+  for (HBasicBlock* catch_block : try_entry.GetExceptionHandlers()) {
+    ArenaVector<HInstruction*>* handler_locals = GetLocalsFor(catch_block);
+    DCHECK_EQ(handler_locals->size(), current_locals_->size());
+    for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
+      HInstruction* handler_value = (*handler_locals)[vreg];
+      if (handler_value == nullptr) {
+        // Vreg was undefined at a previously encountered throwing instruction
+        // and the catch phi was deleted. Do not record the local value.
+        continue;
+      }
+      DCHECK(handler_value->IsPhi());
+
+      HInstruction* local_value = (*current_locals_)[vreg];
+      if (local_value == nullptr) {
+        // This is the first instruction throwing into `catch_block` where
+        // `vreg` is undefined. Delete the catch phi.
+        catch_block->RemovePhi(handler_value->AsPhi());
+        (*handler_locals)[vreg] = nullptr;
+      } else {
+        // Vreg has been defined at all instructions throwing into `catch_block`
+        // encountered so far. Record the local value in the catch phi.
+        handler_value->AsPhi()->AddInput(local_value);
+      }
+    }
+  }
+}
+
+void HInstructionBuilder::AppendInstruction(HInstruction* instruction) {
+  current_block_->AddInstruction(instruction);
+  InitializeInstruction(instruction);
+}
+
+void HInstructionBuilder::InsertInstructionAtTop(HInstruction* instruction) {
+  if (current_block_->GetInstructions().IsEmpty()) {
+    current_block_->AddInstruction(instruction);
+  } else {
+    current_block_->InsertInstructionBefore(instruction, current_block_->GetFirstInstruction());
+  }
+  InitializeInstruction(instruction);
+}
+
+void HInstructionBuilder::InitializeInstruction(HInstruction* instruction) {
+  if (instruction->NeedsEnvironment()) {
+    HEnvironment* environment = new (arena_) HEnvironment(
+        arena_,
+        current_locals_->size(),
+        graph_->GetDexFile(),
+        graph_->GetMethodIdx(),
+        instruction->GetDexPc(),
+        graph_->GetInvokeType(),
+        instruction);
+    environment->CopyFrom(*current_locals_);
+    instruction->SetRawEnvironment(environment);
+  }
+}
+
+HInstruction* HInstructionBuilder::LoadNullCheckedLocal(uint32_t register_index, uint32_t dex_pc) {
+  HInstruction* ref = LoadLocal(register_index, Primitive::kPrimNot);
+  if (!ref->CanBeNull()) {
+    return ref;
+  }
+
+  HNullCheck* null_check = new (arena_) HNullCheck(ref, dex_pc);
+  AppendInstruction(null_check);
+  return null_check;
+}
+
+void HInstructionBuilder::SetLoopHeaderPhiInputs() {
+  for (size_t i = loop_headers_.size(); i > 0; --i) {
+    HBasicBlock* block = loop_headers_[i - 1];
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HPhi* phi = it.Current()->AsPhi();
+      size_t vreg = phi->GetRegNumber();
+      for (HBasicBlock* predecessor : block->GetPredecessors()) {
+        HInstruction* value = ValueOfLocalAt(predecessor, vreg);
+        if (value == nullptr) {
+          // Vreg is undefined at this predecessor. Mark it dead and leave with
+          // fewer inputs than predecessors. SsaChecker will fail if not removed.
+          phi->SetDead();
+          break;
+        } else {
+          phi->AddInput(value);
+        }
+      }
+    }
+  }
+}
+
+static bool IsBlockPopulated(HBasicBlock* block) {
+  if (block->IsLoopHeader()) {
+    // Suspend checks were inserted into loop headers during building of dominator tree.
+    DCHECK(block->GetFirstInstruction()->IsSuspendCheck());
+    return block->GetFirstInstruction() != block->GetLastInstruction();
+  } else {
+    return !block->GetInstructions().IsEmpty();
+  }
+}
+
+bool HInstructionBuilder::Build() {
+  locals_for_.resize(graph_->GetBlocks().size(),
+                     ArenaVector<HInstruction*>(arena_->Adapter(kArenaAllocGraphBuilder)));
+
+  // Find locations where we want to generate extra stackmaps for native debugging.
+  // This allows us to generate the info only at interesting points (for example,
+  // at start of java statement) rather than before every dex instruction.
+  const bool native_debuggable = compiler_driver_ != nullptr &&
+                                 compiler_driver_->GetCompilerOptions().GetNativeDebuggable();
+  ArenaBitVector* native_debug_info_locations = nullptr;
+  if (native_debuggable) {
+    const uint32_t num_instructions = code_item_.insns_size_in_code_units_;
+    native_debug_info_locations = new (arena_) ArenaBitVector (arena_, num_instructions, false);
+    FindNativeDebugInfoLocations(native_debug_info_locations);
+  }
+
+  for (HReversePostOrderIterator block_it(*graph_); !block_it.Done(); block_it.Advance()) {
+    current_block_ = block_it.Current();
+    uint32_t block_dex_pc = current_block_->GetDexPc();
+
+    InitializeBlockLocals();
+
+    if (current_block_->IsEntryBlock()) {
+      InitializeParameters();
+      AppendInstruction(new (arena_) HSuspendCheck(0u));
+      AppendInstruction(new (arena_) HGoto(0u));
+      continue;
+    } else if (current_block_->IsExitBlock()) {
+      AppendInstruction(new (arena_) HExit());
+      continue;
+    } else if (current_block_->IsLoopHeader()) {
+      HSuspendCheck* suspend_check = new (arena_) HSuspendCheck(current_block_->GetDexPc());
+      current_block_->GetLoopInformation()->SetSuspendCheck(suspend_check);
+      // This is slightly odd because the loop header might not be empty (TryBoundary).
+      // But we're still creating the environment with locals from the top of the block.
+      InsertInstructionAtTop(suspend_check);
+    }
+
+    if (block_dex_pc == kNoDexPc || current_block_ != block_builder_->GetBlockAt(block_dex_pc)) {
+      // Synthetic block that does not need to be populated.
+      DCHECK(IsBlockPopulated(current_block_));
+      continue;
+    }
+
+    DCHECK(!IsBlockPopulated(current_block_));
+
+    for (CodeItemIterator it(code_item_, block_dex_pc); !it.Done(); it.Advance()) {
+      if (current_block_ == nullptr) {
+        // The previous instruction ended this block.
+        break;
+      }
+
+      uint32_t dex_pc = it.CurrentDexPc();
+      if (dex_pc != block_dex_pc && FindBlockStartingAt(dex_pc) != nullptr) {
+        // This dex_pc starts a new basic block.
+        break;
+      }
+
+      if (current_block_->IsTryBlock() && IsThrowingDexInstruction(it.CurrentInstruction())) {
+        PropagateLocalsToCatchBlocks();
+      }
+
+      if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) {
+        AppendInstruction(new (arena_) HNativeDebugInfo(dex_pc));
+      }
+
+      if (!ProcessDexInstruction(it.CurrentInstruction(), dex_pc)) {
+        return false;
+      }
+    }
+
+    if (current_block_ != nullptr) {
+      // Branching instructions clear current_block, so we know the last
+      // instruction of the current block is not a branching instruction.
+      // We add an unconditional Goto to the next block.
+      DCHECK_EQ(current_block_->GetSuccessors().size(), 1u);
+      AppendInstruction(new (arena_) HGoto());
+    }
+  }
+
+  SetLoopHeaderPhiInputs();
+
+  return true;
+}
+
+void HInstructionBuilder::FindNativeDebugInfoLocations(ArenaBitVector* locations) {
+  // The callback gets called when the line number changes.
+  // In other words, it marks the start of new java statement.
+  struct Callback {
+    static bool Position(void* ctx, const DexFile::PositionInfo& entry) {
+      static_cast<ArenaBitVector*>(ctx)->SetBit(entry.address_);
+      return false;
+    }
+  };
+  dex_file_->DecodeDebugPositionInfo(&code_item_, Callback::Position, locations);
+  // Instruction-specific tweaks.
+  const Instruction* const begin = Instruction::At(code_item_.insns_);
+  const Instruction* const end = begin->RelativeAt(code_item_.insns_size_in_code_units_);
+  for (const Instruction* inst = begin; inst < end; inst = inst->Next()) {
+    switch (inst->Opcode()) {
+      case Instruction::MOVE_EXCEPTION: {
+        // Stop in native debugger after the exception has been moved.
+        // The compiler also expects the move at the start of basic block so
+        // we do not want to interfere by inserting native-debug-info before it.
+        locations->ClearBit(inst->GetDexPc(code_item_.insns_));
+        const Instruction* next = inst->Next();
+        if (next < end) {
+          locations->SetBit(next->GetDexPc(code_item_.insns_));
+        }
+        break;
+      }
+      default:
+        break;
+    }
+  }
+}
+
+HInstruction* HInstructionBuilder::LoadLocal(uint32_t reg_number, Primitive::Type type) const {
+  HInstruction* value = (*current_locals_)[reg_number];
+  DCHECK(value != nullptr);
+
+  // If the operation requests a specific type, we make sure its input is of that type.
+  if (type != value->GetType()) {
+    if (Primitive::IsFloatingPointType(type)) {
+      value = ssa_builder_->GetFloatOrDoubleEquivalent(value, type);
+    } else if (type == Primitive::kPrimNot) {
+      value = ssa_builder_->GetReferenceTypeEquivalent(value);
+    }
+    DCHECK(value != nullptr);
+  }
+
+  return value;
+}
+
+void HInstructionBuilder::UpdateLocal(uint32_t reg_number, HInstruction* stored_value) {
+  Primitive::Type stored_type = stored_value->GetType();
+  DCHECK_NE(stored_type, Primitive::kPrimVoid);
+
+  // Storing into vreg `reg_number` may implicitly invalidate the surrounding
+  // registers. Consider the following cases:
+  // (1) Storing a wide value must overwrite previous values in both `reg_number`
+  //     and `reg_number+1`. We store `nullptr` in `reg_number+1`.
+  // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number`
+  //     must invalidate it. We store `nullptr` in `reg_number-1`.
+  // Consequently, storing a wide value into the high vreg of another wide value
+  // will invalidate both `reg_number-1` and `reg_number+1`.
+
+  if (reg_number != 0) {
+    HInstruction* local_low = (*current_locals_)[reg_number - 1];
+    if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) {
+      // The vreg we are storing into was previously the high vreg of a pair.
+      // We need to invalidate its low vreg.
+      DCHECK((*current_locals_)[reg_number] == nullptr);
+      (*current_locals_)[reg_number - 1] = nullptr;
+    }
+  }
+
+  (*current_locals_)[reg_number] = stored_value;
+  if (Primitive::Is64BitType(stored_type)) {
+    // We are storing a pair. Invalidate the instruction in the high vreg.
+    (*current_locals_)[reg_number + 1] = nullptr;
+  }
+}
+
+void HInstructionBuilder::InitializeParameters() {
+  DCHECK(current_block_->IsEntryBlock());
+
+  // dex_compilation_unit_ is null only when unit testing.
+  if (dex_compilation_unit_ == nullptr) {
+    return;
+  }
+
+  const char* shorty = dex_compilation_unit_->GetShorty();
+  uint16_t number_of_parameters = graph_->GetNumberOfInVRegs();
+  uint16_t locals_index = graph_->GetNumberOfLocalVRegs();
+  uint16_t parameter_index = 0;
+
+  const DexFile::MethodId& referrer_method_id =
+      dex_file_->GetMethodId(dex_compilation_unit_->GetDexMethodIndex());
+  if (!dex_compilation_unit_->IsStatic()) {
+    // Add the implicit 'this' argument, not expressed in the signature.
+    HParameterValue* parameter = new (arena_) HParameterValue(*dex_file_,
+                                                              referrer_method_id.class_idx_,
+                                                              parameter_index++,
+                                                              Primitive::kPrimNot,
+                                                              true);
+    AppendInstruction(parameter);
+    UpdateLocal(locals_index++, parameter);
+    number_of_parameters--;
+  }
+
+  const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
+  const DexFile::TypeList* arg_types = dex_file_->GetProtoParameters(proto);
+  for (int i = 0, shorty_pos = 1; i < number_of_parameters; i++) {
+    HParameterValue* parameter = new (arena_) HParameterValue(
+        *dex_file_,
+        arg_types->GetTypeItem(shorty_pos - 1).type_idx_,
+        parameter_index++,
+        Primitive::GetType(shorty[shorty_pos]),
+        false);
+    ++shorty_pos;
+    AppendInstruction(parameter);
+    // Store the parameter value in the local that the dex code will use
+    // to reference that parameter.
+    UpdateLocal(locals_index++, parameter);
+    if (Primitive::Is64BitType(parameter->GetType())) {
+      i++;
+      locals_index++;
+      parameter_index++;
+    }
+  }
+}
+
+template<typename T>
+void HInstructionBuilder::If_22t(const Instruction& instruction, uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  T* comparison = new (arena_) T(first, second, dex_pc);
+  AppendInstruction(comparison);
+  AppendInstruction(new (arena_) HIf(comparison, dex_pc));
+  current_block_ = nullptr;
+}
+
+template<typename T>
+void HInstructionBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) {
+  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+  T* comparison = new (arena_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc);
+  AppendInstruction(comparison);
+  AppendInstruction(new (arena_) HIf(comparison, dex_pc));
+  current_block_ = nullptr;
+}
+
+template<typename T>
+void HInstructionBuilder::Unop_12x(const Instruction& instruction,
+                                   Primitive::Type type,
+                                   uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  AppendInstruction(new (arena_) T(type, first, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+void HInstructionBuilder::Conversion_12x(const Instruction& instruction,
+                                         Primitive::Type input_type,
+                                         Primitive::Type result_type,
+                                         uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), input_type);
+  AppendInstruction(new (arena_) HTypeConversion(result_type, first, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_23x(const Instruction& instruction,
+                                    Primitive::Type type,
+                                    uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), type);
+  AppendInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_23x_shift(const Instruction& instruction,
+                                          Primitive::Type type,
+                                          uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), Primitive::kPrimInt);
+  AppendInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+void HInstructionBuilder::Binop_23x_cmp(const Instruction& instruction,
+                                        Primitive::Type type,
+                                        ComparisonBias bias,
+                                        uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), type);
+  AppendInstruction(new (arena_) HCompare(type, first, second, bias, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_12x_shift(const Instruction& instruction,
+                                          Primitive::Type type,
+                                          uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), type);
+  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  AppendInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_12x(const Instruction& instruction,
+                                    Primitive::Type type,
+                                    uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), type);
+  HInstruction* second = LoadLocal(instruction.VRegB(), type);
+  AppendInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22s(), dex_pc);
+  if (reverse) {
+    std::swap(first, second);
+  }
+  AppendInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22b(), dex_pc);
+  if (reverse) {
+    std::swap(first, second);
+  }
+  AppendInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDriver* driver) {
+  Thread* self = Thread::Current();
+  return cu->IsConstructor()
+      && driver->RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex());
+}
+
+// Returns true if `block` has only one successor which starts at the next
+// dex_pc after `instruction` at `dex_pc`.
+static bool IsFallthroughInstruction(const Instruction& instruction,
+                                     uint32_t dex_pc,
+                                     HBasicBlock* block) {
+  uint32_t next_dex_pc = dex_pc + instruction.SizeInCodeUnits();
+  return block->GetSingleSuccessor()->GetDexPc() == next_dex_pc;
+}
+
+void HInstructionBuilder::BuildSwitch(const Instruction& instruction, uint32_t dex_pc) {
+  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+  DexSwitchTable table(instruction, dex_pc);
+
+  if (table.GetNumEntries() == 0) {
+    // Empty Switch. Code falls through to the next block.
+    DCHECK(IsFallthroughInstruction(instruction, dex_pc, current_block_));
+    AppendInstruction(new (arena_) HGoto(dex_pc));
+  } else if (table.ShouldBuildDecisionTree()) {
+    for (DexSwitchTableIterator it(table); !it.Done(); it.Advance()) {
+      HInstruction* case_value = graph_->GetIntConstant(it.CurrentKey(), dex_pc);
+      HEqual* comparison = new (arena_) HEqual(value, case_value, dex_pc);
+      AppendInstruction(comparison);
+      AppendInstruction(new (arena_) HIf(comparison, dex_pc));
+
+      if (!it.IsLast()) {
+        current_block_ = FindBlockStartingAt(it.GetDexPcForCurrentIndex());
+      }
+    }
+  } else {
+    AppendInstruction(
+        new (arena_) HPackedSwitch(table.GetEntryAt(0), table.GetNumEntries(), value, dex_pc));
+  }
+
+  current_block_ = nullptr;
+}
+
+void HInstructionBuilder::BuildReturn(const Instruction& instruction,
+                                      Primitive::Type type,
+                                      uint32_t dex_pc) {
+  if (type == Primitive::kPrimVoid) {
+    if (graph_->ShouldGenerateConstructorBarrier()) {
+      // The compilation unit is null during testing.
+      if (dex_compilation_unit_ != nullptr) {
+        DCHECK(RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_))
+          << "Inconsistent use of ShouldGenerateConstructorBarrier. Should not generate a barrier.";
+      }
+      AppendInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc));
+    }
+    AppendInstruction(new (arena_) HReturnVoid(dex_pc));
+  } else {
+    HInstruction* value = LoadLocal(instruction.VRegA(), type);
+    AppendInstruction(new (arena_) HReturn(value, dex_pc));
+  }
+  current_block_ = nullptr;
+}
+
+static InvokeType GetInvokeTypeFromOpCode(Instruction::Code opcode) {
+  switch (opcode) {
+    case Instruction::INVOKE_STATIC:
+    case Instruction::INVOKE_STATIC_RANGE:
+      return kStatic;
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_DIRECT_RANGE:
+      return kDirect;
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_VIRTUAL_QUICK:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
+      return kVirtual;
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+      return kInterface;
+    case Instruction::INVOKE_SUPER_RANGE:
+    case Instruction::INVOKE_SUPER:
+      return kSuper;
+    default:
+      LOG(FATAL) << "Unexpected invoke opcode: " << opcode;
+      UNREACHABLE();
+  }
+}
+
+ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<3> hs(soa.Self());
+
+  ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
+  // We fetch the referenced class eagerly (that is, the class pointed by in the MethodId
+  // at method_idx), as `CanAccessResolvedMethod` expects it be be in the dex cache.
+  Handle<mirror::Class> methods_class(hs.NewHandle(class_linker->ResolveReferencedClassOfMethod(
+      method_idx, dex_compilation_unit_->GetDexCache(), class_loader)));
+
+  if (UNLIKELY(methods_class.Get() == nullptr)) {
+    // Clean up any exception left by type resolution.
+    soa.Self()->ClearException();
+    return nullptr;
+  }
+
+  ArtMethod* resolved_method = class_linker->ResolveMethod<ClassLinker::kForceICCECheck>(
+      *dex_compilation_unit_->GetDexFile(),
+      method_idx,
+      dex_compilation_unit_->GetDexCache(),
+      class_loader,
+      /* referrer */ nullptr,
+      invoke_type);
+
+  if (UNLIKELY(resolved_method == nullptr)) {
+    // Clean up any exception left by type resolution.
+    soa.Self()->ClearException();
+    return nullptr;
+  }
+
+  // Check access. The class linker has a fast path for looking into the dex cache
+  // and does not check the access if it hits it.
+  if (compiling_class.Get() == nullptr) {
+    if (!resolved_method->IsPublic()) {
+      return nullptr;
+    }
+  } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(),
+                                                       resolved_method,
+                                                       dex_compilation_unit_->GetDexCache().Get(),
+                                                       method_idx)) {
+    return nullptr;
+  }
+
+  // We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not.
+  // We need to look at the referrer's super class vtable. We need to do this to know if we need to
+  // make this an invoke-unresolved to handle cross-dex invokes or abstract super methods, both of
+  // which require runtime handling.
+  if (invoke_type == kSuper) {
+    if (compiling_class.Get() == nullptr) {
+      // We could not determine the method's class we need to wait until runtime.
+      DCHECK(Runtime::Current()->IsAotCompiler());
+      return nullptr;
+    }
+    if (!methods_class->IsAssignableFrom(compiling_class.Get())) {
+      // We cannot statically determine the target method. The runtime will throw a
+      // NoSuchMethodError on this one.
+      return nullptr;
+    }
+    ArtMethod* actual_method;
+    if (methods_class->IsInterface()) {
+      actual_method = methods_class->FindVirtualMethodForInterfaceSuper(
+          resolved_method, class_linker->GetImagePointerSize());
+    } else {
+      uint16_t vtable_index = resolved_method->GetMethodIndex();
+      actual_method = compiling_class->GetSuperClass()->GetVTableEntry(
+          vtable_index, class_linker->GetImagePointerSize());
+    }
+    if (actual_method != resolved_method &&
+        !IsSameDexFile(*actual_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+      // The back-end code generator relies on this check in order to ensure that it will not
+      // attempt to read the dex_cache with a dex_method_index that is not from the correct
+      // dex_file. If we didn't do this check then the dex_method_index will not be updated in the
+      // builder, which means that the code-generator (and compiler driver during sharpening and
+      // inliner, maybe) might invoke an incorrect method.
+      // TODO: The actual method could still be referenced in the current dex file, so we
+      //       could try locating it.
+      // TODO: Remove the dex_file restriction.
+      return nullptr;
+    }
+    if (!actual_method->IsInvokable()) {
+      // Fail if the actual method cannot be invoked. Otherwise, the runtime resolution stub
+      // could resolve the callee to the wrong method.
+      return nullptr;
+    }
+    resolved_method = actual_method;
+  }
+
+  // Check for incompatible class changes. The class linker has a fast path for
+  // looking into the dex cache and does not check incompatible class changes if it hits it.
+  if (resolved_method->CheckIncompatibleClassChange(invoke_type)) {
+    return nullptr;
+  }
+
+  return resolved_method;
+}
+
+bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
+                                      uint32_t dex_pc,
+                                      uint32_t method_idx,
+                                      uint32_t number_of_vreg_arguments,
+                                      bool is_range,
+                                      uint32_t* args,
+                                      uint32_t register_index) {
+  InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
+  const char* descriptor = dex_file_->GetMethodShorty(method_idx);
+  Primitive::Type return_type = Primitive::GetType(descriptor[0]);
+
+  // Remove the return type from the 'proto'.
+  size_t number_of_arguments = strlen(descriptor) - 1;
+  if (invoke_type != kStatic) {  // instance call
+    // One extra argument for 'this'.
+    number_of_arguments++;
+  }
+
+  MethodReference target_method(dex_file_, method_idx);
+
+  // Special handling for string init.
+  int32_t string_init_offset = 0;
+  bool is_string_init = compiler_driver_->IsStringInit(method_idx,
+                                                       dex_file_,
+                                                       &string_init_offset);
+  // Replace calls to String.<init> with StringFactory.
+  if (is_string_init) {
+    HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
+        HInvokeStaticOrDirect::MethodLoadKind::kStringInit,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        dchecked_integral_cast<uint64_t>(string_init_offset),
+        0U
+    };
+    HInvoke* invoke = new (arena_) HInvokeStaticOrDirect(
+        arena_,
+        number_of_arguments - 1,
+        Primitive::kPrimNot /*return_type */,
+        dex_pc,
+        method_idx,
+        target_method,
+        dispatch_info,
+        invoke_type,
+        kStatic /* optimized_invoke_type */,
+        HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
+    return HandleStringInit(invoke,
+                            number_of_vreg_arguments,
+                            args,
+                            register_index,
+                            is_range,
+                            descriptor);
+  }
+
+  ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
+
+  if (UNLIKELY(resolved_method == nullptr)) {
+    MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
+    HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
+                                                     number_of_arguments,
+                                                     return_type,
+                                                     dex_pc,
+                                                     method_idx,
+                                                     invoke_type);
+    return HandleInvoke(invoke,
+                        number_of_vreg_arguments,
+                        args,
+                        register_index,
+                        is_range,
+                        descriptor,
+                        nullptr, /* clinit_check */
+                        true /* is_unresolved */);
+  }
+
+  // Potential class initialization check, in the case of a static method call.
+  HClinitCheck* clinit_check = nullptr;
+  HInvoke* invoke = nullptr;
+  if (invoke_type == kDirect || invoke_type == kStatic || invoke_type == kSuper) {
+    // By default, consider that the called method implicitly requires
+    // an initialization check of its declaring method.
+    HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement
+        = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
+    ScopedObjectAccess soa(Thread::Current());
+    if (invoke_type == kStatic) {
+      clinit_check = ProcessClinitCheckForInvoke(
+          dex_pc, resolved_method, method_idx, &clinit_check_requirement);
+    } else if (invoke_type == kSuper) {
+      if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+        // Update the target method to the one resolved. Note that this may be a no-op if
+        // we resolved to the method referenced by the instruction.
+        method_idx = resolved_method->GetDexMethodIndex();
+        target_method = MethodReference(dex_file_, method_idx);
+      }
+    }
+
+    HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
+        HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        0u,
+        0U
+    };
+    invoke = new (arena_) HInvokeStaticOrDirect(arena_,
+                                                number_of_arguments,
+                                                return_type,
+                                                dex_pc,
+                                                method_idx,
+                                                target_method,
+                                                dispatch_info,
+                                                invoke_type,
+                                                invoke_type,
+                                                clinit_check_requirement);
+  } else if (invoke_type == kVirtual) {
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
+    invoke = new (arena_) HInvokeVirtual(arena_,
+                                         number_of_arguments,
+                                         return_type,
+                                         dex_pc,
+                                         method_idx,
+                                         resolved_method->GetMethodIndex());
+  } else {
+    DCHECK_EQ(invoke_type, kInterface);
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
+    invoke = new (arena_) HInvokeInterface(arena_,
+                                           number_of_arguments,
+                                           return_type,
+                                           dex_pc,
+                                           method_idx,
+                                           resolved_method->GetImtIndex());
+  }
+
+  return HandleInvoke(invoke,
+                      number_of_vreg_arguments,
+                      args,
+                      register_index,
+                      is_range,
+                      descriptor,
+                      clinit_check,
+                      false /* is_unresolved */);
+}
+
+bool HInstructionBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Handle<mirror::DexCache> outer_dex_cache = outer_compilation_unit_->GetDexCache();
+
+  bool finalizable;
+  bool needs_access_check = NeedsAccessCheck(type_index, dex_cache, &finalizable);
+
+  // Only the non-resolved entrypoint handles the finalizable class case. If we
+  // need access checks, then we haven't resolved the method and the class may
+  // again be finalizable.
+  QuickEntrypointEnum entrypoint = (finalizable || needs_access_check)
+      ? kQuickAllocObject
+      : kQuickAllocObjectInitialized;
+
+  if (outer_dex_cache.Get() != dex_cache.Get()) {
+    // We currently do not support inlining allocations across dex files.
+    return false;
+  }
+
+  HLoadClass* load_class = new (arena_) HLoadClass(
+      graph_->GetCurrentMethod(),
+      type_index,
+      outer_dex_file,
+      IsOutermostCompilingClass(type_index),
+      dex_pc,
+      needs_access_check,
+      /* is_in_dex_cache */ false);
+
+  AppendInstruction(load_class);
+  HInstruction* cls = load_class;
+  if (!IsInitialized(resolved_class)) {
+    cls = new (arena_) HClinitCheck(load_class, dex_pc);
+    AppendInstruction(cls);
+  }
+
+  AppendInstruction(new (arena_) HNewInstance(
+      cls,
+      graph_->GetCurrentMethod(),
+      dex_pc,
+      type_index,
+      *dex_compilation_unit_->GetDexFile(),
+      needs_access_check,
+      finalizable,
+      entrypoint));
+  return true;
+}
+
+static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class);
+}
+
+bool HInstructionBuilder::IsInitialized(Handle<mirror::Class> cls) const {
+  if (cls.Get() == nullptr) {
+    return false;
+  }
+
+  // `CanAssumeClassIsLoaded` will return true if we're JITting, or will
+  // check whether the class is in an image for the AOT compilation.
+  if (cls->IsInitialized() &&
+      compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) {
+    return true;
+  }
+
+  if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) {
+    return true;
+  }
+
+  // TODO: We should walk over the inlined methods, but we don't pass
+  //       that information to the builder.
+  if (IsSubClass(GetCompilingClass(), cls.Get())) {
+    return true;
+  }
+
+  return false;
+}
+
+HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke(
+      uint32_t dex_pc,
+      ArtMethod* resolved_method,
+      uint32_t method_idx,
+      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Thread* self = Thread::Current();
+  StackHandleScope<2> hs(self);
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+  Handle<mirror::DexCache> outer_dex_cache = outer_compilation_unit_->GetDexCache();
+  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
+  Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
+
+  // The index at which the method's class is stored in the DexCache's type array.
+  uint32_t storage_index = DexFile::kDexNoIndex;
+  bool is_outer_class = (resolved_method->GetDeclaringClass() == outer_class.Get());
+  if (is_outer_class) {
+    storage_index = outer_class->GetDexTypeIndex();
+  } else if (outer_dex_cache.Get() == dex_cache.Get()) {
+    // Get `storage_index` from IsClassOfStaticMethodAvailableToReferrer.
+    compiler_driver_->IsClassOfStaticMethodAvailableToReferrer(outer_dex_cache.Get(),
+                                                               GetCompilingClass(),
+                                                               resolved_method,
+                                                               method_idx,
+                                                               &storage_index);
+  }
+
+  HClinitCheck* clinit_check = nullptr;
+
+  if (IsInitialized(resolved_method_class)) {
+    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
+  } else if (storage_index != DexFile::kDexNoIndex) {
+    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
+    HLoadClass* load_class = new (arena_) HLoadClass(
+        graph_->GetCurrentMethod(),
+        storage_index,
+        outer_dex_file,
+        is_outer_class,
+        dex_pc,
+        /*needs_access_check*/ false,
+        /* is_in_dex_cache */ false);
+    AppendInstruction(load_class);
+    clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
+    AppendInstruction(clinit_check);
+  }
+  return clinit_check;
+}
+
+bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke,
+                                               uint32_t number_of_vreg_arguments,
+                                               uint32_t* args,
+                                               uint32_t register_index,
+                                               bool is_range,
+                                               const char* descriptor,
+                                               size_t start_index,
+                                               size_t* argument_index) {
+  uint32_t descriptor_index = 1;  // Skip the return type.
+
+  for (size_t i = start_index;
+       // Make sure we don't go over the expected arguments or over the number of
+       // dex registers given. If the instruction was seen as dead by the verifier,
+       // it hasn't been properly checked.
+       (i < number_of_vreg_arguments) && (*argument_index < invoke->GetNumberOfArguments());
+       i++, (*argument_index)++) {
+    Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]);
+    bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble);
+    if (!is_range
+        && is_wide
+        && ((i + 1 == number_of_vreg_arguments) || (args[i] + 1 != args[i + 1]))) {
+      // Longs and doubles should be in pairs, that is, sequential registers. The verifier should
+      // reject any class where this is violated. However, the verifier only does these checks
+      // on non trivially dead instructions, so we just bailout the compilation.
+      VLOG(compiler) << "Did not compile "
+                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                     << " because of non-sequential dex register pair in wide argument";
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
+      return false;
+    }
+    HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type);
+    invoke->SetArgumentAt(*argument_index, arg);
+    if (is_wide) {
+      i++;
+    }
+  }
+
+  if (*argument_index != invoke->GetNumberOfArguments()) {
+    VLOG(compiler) << "Did not compile "
+                   << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                   << " because of wrong number of arguments in invoke instruction";
+    MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
+    return false;
+  }
+
+  if (invoke->IsInvokeStaticOrDirect() &&
+      HInvokeStaticOrDirect::NeedsCurrentMethodInput(
+          invoke->AsInvokeStaticOrDirect()->GetMethodLoadKind())) {
+    invoke->SetArgumentAt(*argument_index, graph_->GetCurrentMethod());
+    (*argument_index)++;
+  }
+
+  return true;
+}
+
+bool HInstructionBuilder::HandleInvoke(HInvoke* invoke,
+                                       uint32_t number_of_vreg_arguments,
+                                       uint32_t* args,
+                                       uint32_t register_index,
+                                       bool is_range,
+                                       const char* descriptor,
+                                       HClinitCheck* clinit_check,
+                                       bool is_unresolved) {
+  DCHECK(!invoke->IsInvokeStaticOrDirect() || !invoke->AsInvokeStaticOrDirect()->IsStringInit());
+
+  size_t start_index = 0;
+  size_t argument_index = 0;
+  if (invoke->GetOriginalInvokeType() != InvokeType::kStatic) {  // Instance call.
+    uint32_t obj_reg = is_range ? register_index : args[0];
+    HInstruction* arg = is_unresolved
+        ? LoadLocal(obj_reg, Primitive::kPrimNot)
+        : LoadNullCheckedLocal(obj_reg, invoke->GetDexPc());
+    invoke->SetArgumentAt(0, arg);
+    start_index = 1;
+    argument_index = 1;
+  }
+
+  if (!SetupInvokeArguments(invoke,
+                            number_of_vreg_arguments,
+                            args,
+                            register_index,
+                            is_range,
+                            descriptor,
+                            start_index,
+                            &argument_index)) {
+    return false;
+  }
+
+  if (clinit_check != nullptr) {
+    // Add the class initialization check as last input of `invoke`.
+    DCHECK(invoke->IsInvokeStaticOrDirect());
+    DCHECK(invoke->AsInvokeStaticOrDirect()->GetClinitCheckRequirement()
+        == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit);
+    invoke->SetArgumentAt(argument_index, clinit_check);
+    argument_index++;
+  }
+
+  AppendInstruction(invoke);
+  latest_result_ = invoke;
+
+  return true;
+}
+
+bool HInstructionBuilder::HandleStringInit(HInvoke* invoke,
+                                           uint32_t number_of_vreg_arguments,
+                                           uint32_t* args,
+                                           uint32_t register_index,
+                                           bool is_range,
+                                           const char* descriptor) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  DCHECK(invoke->AsInvokeStaticOrDirect()->IsStringInit());
+
+  size_t start_index = 1;
+  size_t argument_index = 0;
+  if (!SetupInvokeArguments(invoke,
+                            number_of_vreg_arguments,
+                            args,
+                            register_index,
+                            is_range,
+                            descriptor,
+                            start_index,
+                            &argument_index)) {
+    return false;
+  }
+
+  AppendInstruction(invoke);
+
+  // This is a StringFactory call, not an actual String constructor. Its result
+  // replaces the empty String pre-allocated by NewInstance.
+  uint32_t orig_this_reg = is_range ? register_index : args[0];
+  HInstruction* arg_this = LoadLocal(orig_this_reg, Primitive::kPrimNot);
+
+  // Replacing the NewInstance might render it redundant. Keep a list of these
+  // to be visited once it is clear whether it is has remaining uses.
+  if (arg_this->IsNewInstance()) {
+    ssa_builder_->AddUninitializedString(arg_this->AsNewInstance());
+  } else {
+    DCHECK(arg_this->IsPhi());
+    // NewInstance is not the direct input of the StringFactory call. It might
+    // be redundant but optimizing this case is not worth the effort.
+  }
+
+  // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
+  for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
+    if ((*current_locals_)[vreg] == arg_this) {
+      (*current_locals_)[vreg] = invoke;
+    }
+  }
+
+  return true;
+}
+
+static Primitive::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) {
+  const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
+  const char* type = dex_file.GetFieldTypeDescriptor(field_id);
+  return Primitive::GetType(type[0]);
+}
+
+bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instruction,
+                                                   uint32_t dex_pc,
+                                                   bool is_put) {
+  uint32_t source_or_dest_reg = instruction.VRegA_22c();
+  uint32_t obj_reg = instruction.VRegB_22c();
+  uint16_t field_index;
+  if (instruction.IsQuickened()) {
+    if (!CanDecodeQuickenedInfo()) {
+      return false;
+    }
+    field_index = LookupQuickenedInfo(dex_pc);
+  } else {
+    field_index = instruction.VRegC_22c();
+  }
+
+  ScopedObjectAccess soa(Thread::Current());
+  ArtField* resolved_field =
+      compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa);
+
+
+  // Generate an explicit null check on the reference, unless the field access
+  // is unresolved. In that case, we rely on the runtime to perform various
+  // checks first, followed by a null check.
+  HInstruction* object = (resolved_field == nullptr)
+      ? LoadLocal(obj_reg, Primitive::kPrimNot)
+      : LoadNullCheckedLocal(obj_reg, dex_pc);
+
+  Primitive::Type field_type = (resolved_field == nullptr)
+      ? GetFieldAccessType(*dex_file_, field_index)
+      : resolved_field->GetTypeAsPrimitiveType();
+  if (is_put) {
+    HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
+    HInstruction* field_set = nullptr;
+    if (resolved_field == nullptr) {
+      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+      field_set = new (arena_) HUnresolvedInstanceFieldSet(object,
+                                                           value,
+                                                           field_type,
+                                                           field_index,
+                                                           dex_pc);
+    } else {
+      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
+      field_set = new (arena_) HInstanceFieldSet(object,
+                                                 value,
+                                                 field_type,
+                                                 resolved_field->GetOffset(),
+                                                 resolved_field->IsVolatile(),
+                                                 field_index,
+                                                 class_def_index,
+                                                 *dex_file_,
+                                                 dex_compilation_unit_->GetDexCache(),
+                                                 dex_pc);
+    }
+    AppendInstruction(field_set);
+  } else {
+    HInstruction* field_get = nullptr;
+    if (resolved_field == nullptr) {
+      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+      field_get = new (arena_) HUnresolvedInstanceFieldGet(object,
+                                                           field_type,
+                                                           field_index,
+                                                           dex_pc);
+    } else {
+      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
+      field_get = new (arena_) HInstanceFieldGet(object,
+                                                 field_type,
+                                                 resolved_field->GetOffset(),
+                                                 resolved_field->IsVolatile(),
+                                                 field_index,
+                                                 class_def_index,
+                                                 *dex_file_,
+                                                 dex_compilation_unit_->GetDexCache(),
+                                                 dex_pc);
+    }
+    AppendInstruction(field_get);
+    UpdateLocal(source_or_dest_reg, field_get);
+  }
+
+  return true;
+}
+
+static mirror::Class* GetClassFrom(CompilerDriver* driver,
+                                   const DexCompilationUnit& compilation_unit) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(compilation_unit.GetClassLoader())));
+  Handle<mirror::DexCache> dex_cache = compilation_unit.GetDexCache();
+
+  return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
+}
+
+mirror::Class* HInstructionBuilder::GetOutermostCompilingClass() const {
+  return GetClassFrom(compiler_driver_, *outer_compilation_unit_);
+}
+
+mirror::Class* HInstructionBuilder::GetCompilingClass() const {
+  return GetClassFrom(compiler_driver_, *dex_compilation_unit_);
+}
+
+bool HInstructionBuilder::IsOutermostCompilingClass(uint16_t type_index) const {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass(
+      soa, dex_cache, class_loader, type_index, dex_compilation_unit_)));
+  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
+
+  // GetOutermostCompilingClass returns null when the class is unresolved
+  // (e.g. if it derives from an unresolved class). This is bogus knowing that
+  // we are compiling it.
+  // When this happens we cannot establish a direct relation between the current
+  // class and the outer class, so we return false.
+  // (Note that this is only used for optimizing invokes and field accesses)
+  return (cls.Get() != nullptr) && (outer_class.Get() == cls.Get());
+}
+
+void HInstructionBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
+                                                     uint32_t dex_pc,
+                                                     bool is_put,
+                                                     Primitive::Type field_type) {
+  uint32_t source_or_dest_reg = instruction.VRegA_21c();
+  uint16_t field_index = instruction.VRegB_21c();
+
+  if (is_put) {
+    HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
+    AppendInstruction(
+        new (arena_) HUnresolvedStaticFieldSet(value, field_type, field_index, dex_pc));
+  } else {
+    AppendInstruction(new (arena_) HUnresolvedStaticFieldGet(field_type, field_index, dex_pc));
+    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
+  }
+}
+
+bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction,
+                                                 uint32_t dex_pc,
+                                                 bool is_put) {
+  uint32_t source_or_dest_reg = instruction.VRegA_21c();
+  uint16_t field_index = instruction.VRegB_21c();
+
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  ArtField* resolved_field = compiler_driver_->ResolveField(
+      soa, dex_cache, class_loader, dex_compilation_unit_, field_index, true);
+
+  if (resolved_field == nullptr) {
+    MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+    Primitive::Type field_type = GetFieldAccessType(*dex_file_, field_index);
+    BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
+    return true;
+  }
+
+  Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Handle<mirror::DexCache> outer_dex_cache = outer_compilation_unit_->GetDexCache();
+  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
+
+  // The index at which the field's class is stored in the DexCache's type array.
+  uint32_t storage_index;
+  bool is_outer_class = (outer_class.Get() == resolved_field->GetDeclaringClass());
+  if (is_outer_class) {
+    storage_index = outer_class->GetDexTypeIndex();
+  } else if (outer_dex_cache.Get() != dex_cache.Get()) {
+    // The compiler driver cannot currently understand multiple dex caches involved. Just bailout.
+    return false;
+  } else {
+    // TODO: This is rather expensive. Perf it and cache the results if needed.
+    std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField(
+        outer_dex_cache.Get(),
+        GetCompilingClass(),
+        resolved_field,
+        field_index,
+        &storage_index);
+    bool can_easily_access = is_put ? pair.second : pair.first;
+    if (!can_easily_access) {
+      MaybeRecordStat(MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
+      BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
+      return true;
+    }
+  }
+
+  HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
+                                                 storage_index,
+                                                 outer_dex_file,
+                                                 is_outer_class,
+                                                 dex_pc,
+                                                 /*needs_access_check*/ false,
+                                                 /* is_in_dex_cache */ false);
+  AppendInstruction(constant);
+
+  HInstruction* cls = constant;
+
+  Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass()));
+  if (!IsInitialized(klass)) {
+    cls = new (arena_) HClinitCheck(constant, dex_pc);
+    AppendInstruction(cls);
+  }
+
+  uint16_t class_def_index = klass->GetDexClassDefIndex();
+  if (is_put) {
+    // We need to keep the class alive before loading the value.
+    HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
+    DCHECK_EQ(HPhi::ToPhiType(value->GetType()), HPhi::ToPhiType(field_type));
+    AppendInstruction(new (arena_) HStaticFieldSet(cls,
+                                                   value,
+                                                   field_type,
+                                                   resolved_field->GetOffset(),
+                                                   resolved_field->IsVolatile(),
+                                                   field_index,
+                                                   class_def_index,
+                                                   *dex_file_,
+                                                   dex_cache_,
+                                                   dex_pc));
+  } else {
+    AppendInstruction(new (arena_) HStaticFieldGet(cls,
+                                                   field_type,
+                                                   resolved_field->GetOffset(),
+                                                   resolved_field->IsVolatile(),
+                                                   field_index,
+                                                   class_def_index,
+                                                   *dex_file_,
+                                                   dex_cache_,
+                                                   dex_pc));
+    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
+  }
+  return true;
+}
+
+void HInstructionBuilder::BuildCheckedDivRem(uint16_t out_vreg,
+                                       uint16_t first_vreg,
+                                       int64_t second_vreg_or_constant,
+                                       uint32_t dex_pc,
+                                       Primitive::Type type,
+                                       bool second_is_constant,
+                                       bool isDiv) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  HInstruction* first = LoadLocal(first_vreg, type);
+  HInstruction* second = nullptr;
+  if (second_is_constant) {
+    if (type == Primitive::kPrimInt) {
+      second = graph_->GetIntConstant(second_vreg_or_constant, dex_pc);
+    } else {
+      second = graph_->GetLongConstant(second_vreg_or_constant, dex_pc);
+    }
+  } else {
+    second = LoadLocal(second_vreg_or_constant, type);
+  }
+
+  if (!second_is_constant
+      || (type == Primitive::kPrimInt && second->AsIntConstant()->GetValue() == 0)
+      || (type == Primitive::kPrimLong && second->AsLongConstant()->GetValue() == 0)) {
+    second = new (arena_) HDivZeroCheck(second, dex_pc);
+    AppendInstruction(second);
+  }
+
+  if (isDiv) {
+    AppendInstruction(new (arena_) HDiv(type, first, second, dex_pc));
+  } else {
+    AppendInstruction(new (arena_) HRem(type, first, second, dex_pc));
+  }
+  UpdateLocal(out_vreg, current_block_->GetLastInstruction());
+}
+
+void HInstructionBuilder::BuildArrayAccess(const Instruction& instruction,
+                                           uint32_t dex_pc,
+                                           bool is_put,
+                                           Primitive::Type anticipated_type) {
+  uint8_t source_or_dest_reg = instruction.VRegA_23x();
+  uint8_t array_reg = instruction.VRegB_23x();
+  uint8_t index_reg = instruction.VRegC_23x();
+
+  HInstruction* object = LoadNullCheckedLocal(array_reg, dex_pc);
+  HInstruction* length = new (arena_) HArrayLength(object, dex_pc);
+  AppendInstruction(length);
+  HInstruction* index = LoadLocal(index_reg, Primitive::kPrimInt);
+  index = new (arena_) HBoundsCheck(index, length, dex_pc);
+  AppendInstruction(index);
+  if (is_put) {
+    HInstruction* value = LoadLocal(source_or_dest_reg, anticipated_type);
+    // TODO: Insert a type check node if the type is Object.
+    HArraySet* aset = new (arena_) HArraySet(object, index, value, anticipated_type, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArraySet(aset);
+    AppendInstruction(aset);
+  } else {
+    HArrayGet* aget = new (arena_) HArrayGet(object, index, anticipated_type, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArrayGet(aget);
+    AppendInstruction(aget);
+    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
+  }
+  graph_->SetHasBoundsChecks(true);
+}
+
+void HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc,
+                                              uint32_t type_index,
+                                              uint32_t number_of_vreg_arguments,
+                                              bool is_range,
+                                              uint32_t* args,
+                                              uint32_t register_index) {
+  HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc);
+  bool finalizable;
+  QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
+      ? kQuickAllocArrayWithAccessCheck
+      : kQuickAllocArray;
+  HInstruction* object = new (arena_) HNewArray(length,
+                                                graph_->GetCurrentMethod(),
+                                                dex_pc,
+                                                type_index,
+                                                *dex_compilation_unit_->GetDexFile(),
+                                                entrypoint);
+  AppendInstruction(object);
+
+  const char* descriptor = dex_file_->StringByTypeIdx(type_index);
+  DCHECK_EQ(descriptor[0], '[') << descriptor;
+  char primitive = descriptor[1];
+  DCHECK(primitive == 'I'
+      || primitive == 'L'
+      || primitive == '[') << descriptor;
+  bool is_reference_array = (primitive == 'L') || (primitive == '[');
+  Primitive::Type type = is_reference_array ? Primitive::kPrimNot : Primitive::kPrimInt;
+
+  for (size_t i = 0; i < number_of_vreg_arguments; ++i) {
+    HInstruction* value = LoadLocal(is_range ? register_index + i : args[i], type);
+    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
+    HArraySet* aset = new (arena_) HArraySet(object, index, value, type, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArraySet(aset);
+    AppendInstruction(aset);
+  }
+  latest_result_ = object;
+}
+
+template <typename T>
+void HInstructionBuilder::BuildFillArrayData(HInstruction* object,
+                                             const T* data,
+                                             uint32_t element_count,
+                                             Primitive::Type anticipated_type,
+                                             uint32_t dex_pc) {
+  for (uint32_t i = 0; i < element_count; ++i) {
+    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
+    HInstruction* value = graph_->GetIntConstant(data[i], dex_pc);
+    HArraySet* aset = new (arena_) HArraySet(object, index, value, anticipated_type, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArraySet(aset);
+    AppendInstruction(aset);
+  }
+}
+
+void HInstructionBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc) {
+  HInstruction* array = LoadNullCheckedLocal(instruction.VRegA_31t(), dex_pc);
+  HInstruction* length = new (arena_) HArrayLength(array, dex_pc);
+  AppendInstruction(length);
+
+  int32_t payload_offset = instruction.VRegB_31t() + dex_pc;
+  const Instruction::ArrayDataPayload* payload =
+      reinterpret_cast<const Instruction::ArrayDataPayload*>(code_item_.insns_ + payload_offset);
+  const uint8_t* data = payload->data;
+  uint32_t element_count = payload->element_count;
+
+  // Implementation of this DEX instruction seems to be that the bounds check is
+  // done before doing any stores.
+  HInstruction* last_index = graph_->GetIntConstant(payload->element_count - 1, dex_pc);
+  AppendInstruction(new (arena_) HBoundsCheck(last_index, length, dex_pc));
+
+  switch (payload->element_width) {
+    case 1:
+      BuildFillArrayData(array,
+                         reinterpret_cast<const int8_t*>(data),
+                         element_count,
+                         Primitive::kPrimByte,
+                         dex_pc);
+      break;
+    case 2:
+      BuildFillArrayData(array,
+                         reinterpret_cast<const int16_t*>(data),
+                         element_count,
+                         Primitive::kPrimShort,
+                         dex_pc);
+      break;
+    case 4:
+      BuildFillArrayData(array,
+                         reinterpret_cast<const int32_t*>(data),
+                         element_count,
+                         Primitive::kPrimInt,
+                         dex_pc);
+      break;
+    case 8:
+      BuildFillWideArrayData(array,
+                             reinterpret_cast<const int64_t*>(data),
+                             element_count,
+                             dex_pc);
+      break;
+    default:
+      LOG(FATAL) << "Unknown element width for " << payload->element_width;
+  }
+  graph_->SetHasBoundsChecks(true);
+}
+
+void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object,
+                                                 const int64_t* data,
+                                                 uint32_t element_count,
+                                                 uint32_t dex_pc) {
+  for (uint32_t i = 0; i < element_count; ++i) {
+    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
+    HInstruction* value = graph_->GetLongConstant(data[i], dex_pc);
+    HArraySet* aset = new (arena_) HArraySet(object, index, value, Primitive::kPrimLong, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArraySet(aset);
+    AppendInstruction(aset);
+  }
+}
+
+static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (cls.Get() == nullptr) {
+    return TypeCheckKind::kUnresolvedCheck;
+  } else if (cls->IsInterface()) {
+    return TypeCheckKind::kInterfaceCheck;
+  } else if (cls->IsArrayClass()) {
+    if (cls->GetComponentType()->IsObjectClass()) {
+      return TypeCheckKind::kArrayObjectCheck;
+    } else if (cls->CannotBeAssignedFromOtherTypes()) {
+      return TypeCheckKind::kExactCheck;
+    } else {
+      return TypeCheckKind::kArrayCheck;
+    }
+  } else if (cls->IsFinal()) {
+    return TypeCheckKind::kExactCheck;
+  } else if (cls->IsAbstract()) {
+    return TypeCheckKind::kAbstractClassCheck;
+  } else {
+    return TypeCheckKind::kClassHierarchyCheck;
+  }
+}
+
+void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction,
+                                         uint8_t destination,
+                                         uint8_t reference,
+                                         uint16_t type_index,
+                                         uint32_t dex_pc) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
+
+  bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
+      dex_compilation_unit_->GetDexMethodIndex(),
+      dex_cache,
+      type_index);
+
+  HInstruction* object = LoadLocal(reference, Primitive::kPrimNot);
+  HLoadClass* cls = new (arena_) HLoadClass(
+      graph_->GetCurrentMethod(),
+      type_index,
+      dex_file,
+      IsOutermostCompilingClass(type_index),
+      dex_pc,
+      !can_access,
+      /* is_in_dex_cache */ false);
+  AppendInstruction(cls);
+
+  TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class);
+  if (instruction.Opcode() == Instruction::INSTANCE_OF) {
+    AppendInstruction(new (arena_) HInstanceOf(object, cls, check_kind, dex_pc));
+    UpdateLocal(destination, current_block_->GetLastInstruction());
+  } else {
+    DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
+    // We emit a CheckCast followed by a BoundType. CheckCast is a statement
+    // which may throw. If it succeeds BoundType sets the new type of `object`
+    // for all subsequent uses.
+    AppendInstruction(new (arena_) HCheckCast(object, cls, check_kind, dex_pc));
+    AppendInstruction(new (arena_) HBoundType(object, dex_pc));
+    UpdateLocal(reference, current_block_->GetLastInstruction());
+  }
+}
+
+bool HInstructionBuilder::NeedsAccessCheck(uint32_t type_index,
+                                           Handle<mirror::DexCache> dex_cache,
+                                           bool* finalizable) const {
+  return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks(
+      dex_compilation_unit_->GetDexMethodIndex(), dex_cache, type_index, finalizable);
+}
+
+bool HInstructionBuilder::NeedsAccessCheck(uint32_t type_index, bool* finalizable) const {
+  ScopedObjectAccess soa(Thread::Current());
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+  return NeedsAccessCheck(type_index, dex_cache, finalizable);
+}
+
+bool HInstructionBuilder::CanDecodeQuickenedInfo() const {
+  return interpreter_metadata_ != nullptr;
+}
+
+uint16_t HInstructionBuilder::LookupQuickenedInfo(uint32_t dex_pc) {
+  DCHECK(interpreter_metadata_ != nullptr);
+
+  // First check if the info has already been decoded from `interpreter_metadata_`.
+  auto it = skipped_interpreter_metadata_.find(dex_pc);
+  if (it != skipped_interpreter_metadata_.end()) {
+    // Remove the entry from the map and return the parsed info.
+    uint16_t value_in_map = it->second;
+    skipped_interpreter_metadata_.erase(it);
+    return value_in_map;
+  }
+
+  // Otherwise start parsing `interpreter_metadata_` until the slot for `dex_pc`
+  // is found. Store skipped values in the `skipped_interpreter_metadata_` map.
+  while (true) {
+    uint32_t dex_pc_in_map = DecodeUnsignedLeb128(&interpreter_metadata_);
+    uint16_t value_in_map = DecodeUnsignedLeb128(&interpreter_metadata_);
+    DCHECK_LE(dex_pc_in_map, dex_pc);
+
+    if (dex_pc_in_map == dex_pc) {
+      return value_in_map;
+    } else {
+      skipped_interpreter_metadata_.Put(dex_pc_in_map, value_in_map);
+    }
+  }
+}
+
+bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc) {
+  switch (instruction.Opcode()) {
+    case Instruction::CONST_4: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_11n(), dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_16: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21s(), dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_31i(), dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_HIGH16: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21h() << 16, dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE_16: {
+      int32_t register_index = instruction.VRegA();
+      // Get 16 bits of constant value, sign extended to 64 bits.
+      int64_t value = instruction.VRegB_21s();
+      value <<= 48;
+      value >>= 48;
+      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE_32: {
+      int32_t register_index = instruction.VRegA();
+      // Get 32 bits of constant value, sign extended to 64 bits.
+      int64_t value = instruction.VRegB_31i();
+      value <<= 32;
+      value >>= 32;
+      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE: {
+      int32_t register_index = instruction.VRegA();
+      HLongConstant* constant = graph_->GetLongConstant(instruction.VRegB_51l(), dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE_HIGH16: {
+      int32_t register_index = instruction.VRegA();
+      int64_t value = static_cast<int64_t>(instruction.VRegB_21h()) << 48;
+      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    // Note that the SSA building will refine the types.
+    case Instruction::MOVE:
+    case Instruction::MOVE_FROM16:
+    case Instruction::MOVE_16: {
+      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+      UpdateLocal(instruction.VRegA(), value);
+      break;
+    }
+
+    // Note that the SSA building will refine the types.
+    case Instruction::MOVE_WIDE:
+    case Instruction::MOVE_WIDE_FROM16:
+    case Instruction::MOVE_WIDE_16: {
+      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimLong);
+      UpdateLocal(instruction.VRegA(), value);
+      break;
+    }
+
+    case Instruction::MOVE_OBJECT:
+    case Instruction::MOVE_OBJECT_16:
+    case Instruction::MOVE_OBJECT_FROM16: {
+      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimNot);
+      UpdateLocal(instruction.VRegA(), value);
+      break;
+    }
+
+    case Instruction::RETURN_VOID_NO_BARRIER:
+    case Instruction::RETURN_VOID: {
+      BuildReturn(instruction, Primitive::kPrimVoid, dex_pc);
+      break;
+    }
+
+#define IF_XX(comparison, cond) \
+    case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_pc); break; \
+    case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_pc); break
+
+    IF_XX(HEqual, EQ);
+    IF_XX(HNotEqual, NE);
+    IF_XX(HLessThan, LT);
+    IF_XX(HLessThanOrEqual, LE);
+    IF_XX(HGreaterThan, GT);
+    IF_XX(HGreaterThanOrEqual, GE);
+
+    case Instruction::GOTO:
+    case Instruction::GOTO_16:
+    case Instruction::GOTO_32: {
+      AppendInstruction(new (arena_) HGoto(dex_pc));
+      current_block_ = nullptr;
+      break;
+    }
+
+    case Instruction::RETURN: {
+      BuildReturn(instruction, return_type_, dex_pc);
+      break;
+    }
+
+    case Instruction::RETURN_OBJECT: {
+      BuildReturn(instruction, return_type_, dex_pc);
+      break;
+    }
+
+    case Instruction::RETURN_WIDE: {
+      BuildReturn(instruction, return_type_, dex_pc);
+      break;
+    }
+
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_STATIC:
+    case Instruction::INVOKE_SUPER:
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_VIRTUAL_QUICK: {
+      uint16_t method_idx;
+      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_QUICK) {
+        if (!CanDecodeQuickenedInfo()) {
+          return false;
+        }
+        method_idx = LookupQuickenedInfo(dex_pc);
+      } else {
+        method_idx = instruction.VRegB_35c();
+      }
+      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
+      uint32_t args[5];
+      instruction.GetVarArgs(args);
+      if (!BuildInvoke(instruction, dex_pc, method_idx,
+                       number_of_vreg_arguments, false, args, -1)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::INVOKE_DIRECT_RANGE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+    case Instruction::INVOKE_STATIC_RANGE:
+    case Instruction::INVOKE_SUPER_RANGE:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
+      uint16_t method_idx;
+      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_RANGE_QUICK) {
+        if (!CanDecodeQuickenedInfo()) {
+          return false;
+        }
+        method_idx = LookupQuickenedInfo(dex_pc);
+      } else {
+        method_idx = instruction.VRegB_3rc();
+      }
+      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
+      uint32_t register_index = instruction.VRegC();
+      if (!BuildInvoke(instruction, dex_pc, method_idx,
+                       number_of_vreg_arguments, true, nullptr, register_index)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::NEG_INT: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::NEG_LONG: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::NEG_FLOAT: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::NEG_DOUBLE: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::NOT_INT: {
+      Unop_12x<HNot>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::NOT_LONG: {
+      Unop_12x<HNot>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_LONG: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_FLOAT: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_DOUBLE: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::LONG_TO_INT: {
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::LONG_TO_FLOAT: {
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::LONG_TO_DOUBLE: {
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::FLOAT_TO_INT: {
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::FLOAT_TO_LONG: {
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::FLOAT_TO_DOUBLE: {
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::DOUBLE_TO_INT: {
+      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::DOUBLE_TO_LONG: {
+      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::DOUBLE_TO_FLOAT: {
+      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_BYTE: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_SHORT: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_CHAR: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_INT: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_LONG: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_DOUBLE: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_FLOAT: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_INT: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_LONG: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_FLOAT: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_DOUBLE: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_INT_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_INT: {
+      Binop_23x<HMul>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_LONG: {
+      Binop_23x<HMul>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_FLOAT: {
+      Binop_23x<HMul>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_DOUBLE: {
+      Binop_23x<HMul>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_INT: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, false, true);
+      break;
+    }
+
+    case Instruction::DIV_LONG: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimLong, false, true);
+      break;
+    }
+
+    case Instruction::DIV_FLOAT: {
+      Binop_23x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_DOUBLE: {
+      Binop_23x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_INT: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, false, false);
+      break;
+    }
+
+    case Instruction::REM_LONG: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimLong, false, false);
+      break;
+    }
+
+    case Instruction::REM_FLOAT: {
+      Binop_23x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_DOUBLE: {
+      Binop_23x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_INT: {
+      Binop_23x<HAnd>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_LONG: {
+      Binop_23x<HAnd>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SHL_INT: {
+      Binop_23x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SHL_LONG: {
+      Binop_23x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_INT: {
+      Binop_23x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_LONG: {
+      Binop_23x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_INT: {
+      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_LONG: {
+      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_INT: {
+      Binop_23x<HOr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_LONG: {
+      Binop_23x<HOr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_INT: {
+      Binop_23x<HXor>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_LONG: {
+      Binop_23x<HXor>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_LONG_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_DOUBLE_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_FLOAT_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_INT_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_LONG_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_FLOAT_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_DOUBLE_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_INT_2ADDR: {
+      Binop_12x<HMul>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_LONG_2ADDR: {
+      Binop_12x<HMul>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_FLOAT_2ADDR: {
+      Binop_12x<HMul>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_DOUBLE_2ADDR: {
+      Binop_12x<HMul>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_INT_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimInt, false, true);
+      break;
+    }
+
+    case Instruction::DIV_LONG_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimLong, false, true);
+      break;
+    }
+
+    case Instruction::REM_INT_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimInt, false, false);
+      break;
+    }
+
+    case Instruction::REM_LONG_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimLong, false, false);
+      break;
+    }
+
+    case Instruction::REM_FLOAT_2ADDR: {
+      Binop_12x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_DOUBLE_2ADDR: {
+      Binop_12x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::SHL_INT_2ADDR: {
+      Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SHL_LONG_2ADDR: {
+      Binop_12x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_INT_2ADDR: {
+      Binop_12x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_LONG_2ADDR: {
+      Binop_12x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_INT_2ADDR: {
+      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_LONG_2ADDR: {
+      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_FLOAT_2ADDR: {
+      Binop_12x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_DOUBLE_2ADDR: {
+      Binop_12x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_INT_2ADDR: {
+      Binop_12x<HAnd>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_LONG_2ADDR: {
+      Binop_12x<HAnd>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_INT_2ADDR: {
+      Binop_12x<HOr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_LONG_2ADDR: {
+      Binop_12x<HOr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_INT_2ADDR: {
+      Binop_12x<HXor>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_LONG_2ADDR: {
+      Binop_12x<HXor>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_INT_LIT16: {
+      Binop_22s<HAdd>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_INT_LIT16: {
+      Binop_22s<HAnd>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_INT_LIT16: {
+      Binop_22s<HOr>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_INT_LIT16: {
+      Binop_22s<HXor>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::RSUB_INT: {
+      Binop_22s<HSub>(instruction, true, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_INT_LIT16: {
+      Binop_22s<HMul>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_INT_LIT8: {
+      Binop_22b<HAdd>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_INT_LIT8: {
+      Binop_22b<HAnd>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_INT_LIT8: {
+      Binop_22b<HOr>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_INT_LIT8: {
+      Binop_22b<HXor>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::RSUB_INT_LIT8: {
+      Binop_22b<HSub>(instruction, true, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_INT_LIT8: {
+      Binop_22b<HMul>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_INT_LIT16:
+    case Instruction::DIV_INT_LIT8: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, true, true);
+      break;
+    }
+
+    case Instruction::REM_INT_LIT16:
+    case Instruction::REM_INT_LIT8: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, true, false);
+      break;
+    }
+
+    case Instruction::SHL_INT_LIT8: {
+      Binop_22b<HShl>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_INT_LIT8: {
+      Binop_22b<HShr>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_INT_LIT8: {
+      Binop_22b<HUShr>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::NEW_INSTANCE: {
+      if (!BuildNewInstance(instruction.VRegB_21c(), dex_pc)) {
+        return false;
+      }
+      UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::NEW_ARRAY: {
+      uint16_t type_index = instruction.VRegC_22c();
+      HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt);
+      bool finalizable;
+      QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
+          ? kQuickAllocArrayWithAccessCheck
+          : kQuickAllocArray;
+      AppendInstruction(new (arena_) HNewArray(length,
+                                               graph_->GetCurrentMethod(),
+                                               dex_pc,
+                                               type_index,
+                                               *dex_compilation_unit_->GetDexFile(),
+                                               entrypoint));
+      UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::FILLED_NEW_ARRAY: {
+      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
+      uint32_t type_index = instruction.VRegB_35c();
+      uint32_t args[5];
+      instruction.GetVarArgs(args);
+      BuildFilledNewArray(dex_pc, type_index, number_of_vreg_arguments, false, args, 0);
+      break;
+    }
+
+    case Instruction::FILLED_NEW_ARRAY_RANGE: {
+      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
+      uint32_t type_index = instruction.VRegB_3rc();
+      uint32_t register_index = instruction.VRegC_3rc();
+      BuildFilledNewArray(
+          dex_pc, type_index, number_of_vreg_arguments, true, nullptr, register_index);
+      break;
+    }
+
+    case Instruction::FILL_ARRAY_DATA: {
+      BuildFillArrayData(instruction, dex_pc);
+      break;
+    }
+
+    case Instruction::MOVE_RESULT:
+    case Instruction::MOVE_RESULT_WIDE:
+    case Instruction::MOVE_RESULT_OBJECT: {
+      DCHECK(latest_result_ != nullptr);
+      UpdateLocal(instruction.VRegA(), latest_result_);
+      latest_result_ = nullptr;
+      break;
+    }
+
+    case Instruction::CMP_LONG: {
+      Binop_23x_cmp(instruction, Primitive::kPrimLong, ComparisonBias::kNoBias, dex_pc);
+      break;
+    }
+
+    case Instruction::CMPG_FLOAT: {
+      Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kGtBias, dex_pc);
+      break;
+    }
+
+    case Instruction::CMPG_DOUBLE: {
+      Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kGtBias, dex_pc);
+      break;
+    }
+
+    case Instruction::CMPL_FLOAT: {
+      Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kLtBias, dex_pc);
+      break;
+    }
+
+    case Instruction::CMPL_DOUBLE: {
+      Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kLtBias, dex_pc);
+      break;
+    }
+
+    case Instruction::NOP:
+      break;
+
+    case Instruction::IGET:
+    case Instruction::IGET_QUICK:
+    case Instruction::IGET_WIDE:
+    case Instruction::IGET_WIDE_QUICK:
+    case Instruction::IGET_OBJECT:
+    case Instruction::IGET_OBJECT_QUICK:
+    case Instruction::IGET_BOOLEAN:
+    case Instruction::IGET_BOOLEAN_QUICK:
+    case Instruction::IGET_BYTE:
+    case Instruction::IGET_BYTE_QUICK:
+    case Instruction::IGET_CHAR:
+    case Instruction::IGET_CHAR_QUICK:
+    case Instruction::IGET_SHORT:
+    case Instruction::IGET_SHORT_QUICK: {
+      if (!BuildInstanceFieldAccess(instruction, dex_pc, false)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::IPUT:
+    case Instruction::IPUT_QUICK:
+    case Instruction::IPUT_WIDE:
+    case Instruction::IPUT_WIDE_QUICK:
+    case Instruction::IPUT_OBJECT:
+    case Instruction::IPUT_OBJECT_QUICK:
+    case Instruction::IPUT_BOOLEAN:
+    case Instruction::IPUT_BOOLEAN_QUICK:
+    case Instruction::IPUT_BYTE:
+    case Instruction::IPUT_BYTE_QUICK:
+    case Instruction::IPUT_CHAR:
+    case Instruction::IPUT_CHAR_QUICK:
+    case Instruction::IPUT_SHORT:
+    case Instruction::IPUT_SHORT_QUICK: {
+      if (!BuildInstanceFieldAccess(instruction, dex_pc, true)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::SGET:
+    case Instruction::SGET_WIDE:
+    case Instruction::SGET_OBJECT:
+    case Instruction::SGET_BOOLEAN:
+    case Instruction::SGET_BYTE:
+    case Instruction::SGET_CHAR:
+    case Instruction::SGET_SHORT: {
+      if (!BuildStaticFieldAccess(instruction, dex_pc, false)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::SPUT:
+    case Instruction::SPUT_WIDE:
+    case Instruction::SPUT_OBJECT:
+    case Instruction::SPUT_BOOLEAN:
+    case Instruction::SPUT_BYTE:
+    case Instruction::SPUT_CHAR:
+    case Instruction::SPUT_SHORT: {
+      if (!BuildStaticFieldAccess(instruction, dex_pc, true)) {
+        return false;
+      }
+      break;
+    }
+
+#define ARRAY_XX(kind, anticipated_type)                                          \
+    case Instruction::AGET##kind: {                                               \
+      BuildArrayAccess(instruction, dex_pc, false, anticipated_type);         \
+      break;                                                                      \
+    }                                                                             \
+    case Instruction::APUT##kind: {                                               \
+      BuildArrayAccess(instruction, dex_pc, true, anticipated_type);          \
+      break;                                                                      \
+    }
+
+    ARRAY_XX(, Primitive::kPrimInt);
+    ARRAY_XX(_WIDE, Primitive::kPrimLong);
+    ARRAY_XX(_OBJECT, Primitive::kPrimNot);
+    ARRAY_XX(_BOOLEAN, Primitive::kPrimBoolean);
+    ARRAY_XX(_BYTE, Primitive::kPrimByte);
+    ARRAY_XX(_CHAR, Primitive::kPrimChar);
+    ARRAY_XX(_SHORT, Primitive::kPrimShort);
+
+    case Instruction::ARRAY_LENGTH: {
+      HInstruction* object = LoadNullCheckedLocal(instruction.VRegB_12x(), dex_pc);
+      AppendInstruction(new (arena_) HArrayLength(object, dex_pc));
+      UpdateLocal(instruction.VRegA_12x(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::CONST_STRING: {
+      uint32_t string_index = instruction.VRegB_21c();
+      AppendInstruction(
+          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc));
+      UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::CONST_STRING_JUMBO: {
+      uint32_t string_index = instruction.VRegB_31c();
+      AppendInstruction(
+          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc));
+      UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::CONST_CLASS: {
+      uint16_t type_index = instruction.VRegB_21c();
+      // `CanAccessTypeWithoutChecks` will tell whether the method being
+      // built is trying to access its own class, so that the generated
+      // code can optimize for this case. However, the optimization does not
+      // work for inlining, so we use `IsOutermostCompilingClass` instead.
+      ScopedObjectAccess soa(Thread::Current());
+      Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+      bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
+          dex_compilation_unit_->GetDexMethodIndex(), dex_cache, type_index);
+      AppendInstruction(new (arena_) HLoadClass(
+          graph_->GetCurrentMethod(),
+          type_index,
+          *dex_file_,
+          IsOutermostCompilingClass(type_index),
+          dex_pc,
+          !can_access,
+          /* is_in_dex_cache */ false));
+      UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::MOVE_EXCEPTION: {
+      AppendInstruction(new (arena_) HLoadException(dex_pc));
+      UpdateLocal(instruction.VRegA_11x(), current_block_->GetLastInstruction());
+      AppendInstruction(new (arena_) HClearException(dex_pc));
+      break;
+    }
+
+    case Instruction::THROW: {
+      HInstruction* exception = LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot);
+      AppendInstruction(new (arena_) HThrow(exception, dex_pc));
+      // We finished building this block. Set the current block to null to avoid
+      // adding dead instructions to it.
+      current_block_ = nullptr;
+      break;
+    }
+
+    case Instruction::INSTANCE_OF: {
+      uint8_t destination = instruction.VRegA_22c();
+      uint8_t reference = instruction.VRegB_22c();
+      uint16_t type_index = instruction.VRegC_22c();
+      BuildTypeCheck(instruction, destination, reference, type_index, dex_pc);
+      break;
+    }
+
+    case Instruction::CHECK_CAST: {
+      uint8_t reference = instruction.VRegA_21c();
+      uint16_t type_index = instruction.VRegB_21c();
+      BuildTypeCheck(instruction, -1, reference, type_index, dex_pc);
+      break;
+    }
+
+    case Instruction::MONITOR_ENTER: {
+      AppendInstruction(new (arena_) HMonitorOperation(
+          LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot),
+          HMonitorOperation::OperationKind::kEnter,
+          dex_pc));
+      break;
+    }
+
+    case Instruction::MONITOR_EXIT: {
+      AppendInstruction(new (arena_) HMonitorOperation(
+          LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot),
+          HMonitorOperation::OperationKind::kExit,
+          dex_pc));
+      break;
+    }
+
+    case Instruction::SPARSE_SWITCH:
+    case Instruction::PACKED_SWITCH: {
+      BuildSwitch(instruction, dex_pc);
+      break;
+    }
+
+    default:
+      VLOG(compiler) << "Did not compile "
+                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                     << " because of unhandled instruction "
+                     << instruction.Name();
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledUnhandledInstruction);
+      return false;
+  }
+  return true;
+}  // NOLINT(readability/fn_size)
+
+}  // namespace art
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
new file mode 100644
index 0000000..517cf76
--- /dev/null
+++ b/compiler/optimizing/instruction_builder.h
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_
+
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
+#include "block_builder.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_driver-inl.h"
+#include "driver/dex_compilation_unit.h"
+#include "mirror/dex_cache.h"
+#include "nodes.h"
+#include "optimizing_compiler_stats.h"
+#include "ssa_builder.h"
+
+namespace art {
+
+class Instruction;
+
+class HInstructionBuilder : public ValueObject {
+ public:
+  HInstructionBuilder(HGraph* graph,
+                      HBasicBlockBuilder* block_builder,
+                      SsaBuilder* ssa_builder,
+                      const DexFile* dex_file,
+                      const DexFile::CodeItem& code_item,
+                      Primitive::Type return_type,
+                      DexCompilationUnit* dex_compilation_unit,
+                      const DexCompilationUnit* const outer_compilation_unit,
+                      CompilerDriver* driver,
+                      const uint8_t* interpreter_metadata,
+                      OptimizingCompilerStats* compiler_stats,
+                      Handle<mirror::DexCache> dex_cache)
+      : arena_(graph->GetArena()),
+        graph_(graph),
+        dex_file_(dex_file),
+        code_item_(code_item),
+        return_type_(return_type),
+        block_builder_(block_builder),
+        ssa_builder_(ssa_builder),
+        locals_for_(arena_->Adapter(kArenaAllocGraphBuilder)),
+        current_block_(nullptr),
+        current_locals_(nullptr),
+        latest_result_(nullptr),
+        compiler_driver_(driver),
+        dex_compilation_unit_(dex_compilation_unit),
+        outer_compilation_unit_(outer_compilation_unit),
+        interpreter_metadata_(interpreter_metadata),
+        skipped_interpreter_metadata_(std::less<uint32_t>(),
+                                      arena_->Adapter(kArenaAllocGraphBuilder)),
+        compilation_stats_(compiler_stats),
+        dex_cache_(dex_cache),
+        loop_headers_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)) {
+    loop_headers_.reserve(kDefaultNumberOfLoops);
+  }
+
+  bool Build();
+
+ private:
+  void MaybeRecordStat(MethodCompilationStat compilation_stat);
+
+  void InitializeBlockLocals();
+  void PropagateLocalsToCatchBlocks();
+  void SetLoopHeaderPhiInputs();
+
+  bool ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc);
+  void FindNativeDebugInfoLocations(ArenaBitVector* locations);
+
+  bool CanDecodeQuickenedInfo() const;
+  uint16_t LookupQuickenedInfo(uint32_t dex_pc);
+
+  HBasicBlock* FindBlockStartingAt(uint32_t dex_pc) const;
+
+  ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block);
+  HInstruction* ValueOfLocalAt(HBasicBlock* block, size_t local);
+  HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type) const;
+  HInstruction* LoadNullCheckedLocal(uint32_t register_index, uint32_t dex_pc);
+  void UpdateLocal(uint32_t register_index, HInstruction* instruction);
+
+  void AppendInstruction(HInstruction* instruction);
+  void InsertInstructionAtTop(HInstruction* instruction);
+  void InitializeInstruction(HInstruction* instruction);
+
+  void InitializeParameters();
+
+  // Returns whether the current method needs access check for the type.
+  // Output parameter finalizable is set to whether the type is finalizable.
+  bool NeedsAccessCheck(uint32_t type_index,
+                        Handle<mirror::DexCache> dex_cache,
+                        /*out*/bool* finalizable) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const;
+
+  template<typename T>
+  void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_23x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_23x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  void Binop_23x_cmp(const Instruction& instruction,
+                     Primitive::Type type,
+                     ComparisonBias bias,
+                     uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_12x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc);
+
+  template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_pc);
+  template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_pc);
+
+  void Conversion_12x(const Instruction& instruction,
+                      Primitive::Type input_type,
+                      Primitive::Type result_type,
+                      uint32_t dex_pc);
+
+  void BuildCheckedDivRem(uint16_t out_reg,
+                          uint16_t first_reg,
+                          int64_t second_reg_or_constant,
+                          uint32_t dex_pc,
+                          Primitive::Type type,
+                          bool second_is_lit,
+                          bool is_div);
+
+  void BuildReturn(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  // Builds an instance field access node and returns whether the instruction is supported.
+  bool BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
+
+  void BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
+                                        uint32_t dex_pc,
+                                        bool is_put,
+                                        Primitive::Type field_type);
+  // Builds a static field access node and returns whether the instruction is supported.
+  bool BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
+
+  void BuildArrayAccess(const Instruction& instruction,
+                        uint32_t dex_pc,
+                        bool is_get,
+                        Primitive::Type anticipated_type);
+
+  // Builds an invocation node and returns whether the instruction is supported.
+  bool BuildInvoke(const Instruction& instruction,
+                   uint32_t dex_pc,
+                   uint32_t method_idx,
+                   uint32_t number_of_vreg_arguments,
+                   bool is_range,
+                   uint32_t* args,
+                   uint32_t register_index);
+
+  // Builds a new array node and the instructions that fill it.
+  void BuildFilledNewArray(uint32_t dex_pc,
+                           uint32_t type_index,
+                           uint32_t number_of_vreg_arguments,
+                           bool is_range,
+                           uint32_t* args,
+                           uint32_t register_index);
+
+  void BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc);
+
+  // Fills the given object with data as specified in the fill-array-data
+  // instruction. Currently only used for non-reference and non-floating point
+  // arrays.
+  template <typename T>
+  void BuildFillArrayData(HInstruction* object,
+                          const T* data,
+                          uint32_t element_count,
+                          Primitive::Type anticipated_type,
+                          uint32_t dex_pc);
+
+  // Fills the given object with data as specified in the fill-array-data
+  // instruction. The data must be for long and double arrays.
+  void BuildFillWideArrayData(HInstruction* object,
+                              const int64_t* data,
+                              uint32_t element_count,
+                              uint32_t dex_pc);
+
+  // Builds a `HInstanceOf`, or a `HCheckCast` instruction.
+  void BuildTypeCheck(const Instruction& instruction,
+                      uint8_t destination,
+                      uint8_t reference,
+                      uint16_t type_index,
+                      uint32_t dex_pc);
+
+  // Builds an instruction sequence for a switch statement.
+  void BuildSwitch(const Instruction& instruction, uint32_t dex_pc);
+
+  // Returns the outer-most compiling method's class.
+  mirror::Class* GetOutermostCompilingClass() const;
+
+  // Returns the class whose method is being compiled.
+  mirror::Class* GetCompilingClass() const;
+
+  // Returns whether `type_index` points to the outer-most compiling method's class.
+  bool IsOutermostCompilingClass(uint16_t type_index) const;
+
+  void PotentiallySimplifyFakeString(uint16_t original_dex_register,
+                                     uint32_t dex_pc,
+                                     HInvoke* invoke);
+
+  bool SetupInvokeArguments(HInvoke* invoke,
+                            uint32_t number_of_vreg_arguments,
+                            uint32_t* args,
+                            uint32_t register_index,
+                            bool is_range,
+                            const char* descriptor,
+                            size_t start_index,
+                            size_t* argument_index);
+
+  bool HandleInvoke(HInvoke* invoke,
+                    uint32_t number_of_vreg_arguments,
+                    uint32_t* args,
+                    uint32_t register_index,
+                    bool is_range,
+                    const char* descriptor,
+                    HClinitCheck* clinit_check,
+                    bool is_unresolved);
+
+  bool HandleStringInit(HInvoke* invoke,
+                        uint32_t number_of_vreg_arguments,
+                        uint32_t* args,
+                        uint32_t register_index,
+                        bool is_range,
+                        const char* descriptor);
+  void HandleStringInitResult(HInvokeStaticOrDirect* invoke);
+
+  HClinitCheck* ProcessClinitCheckForInvoke(
+      uint32_t dex_pc,
+      ArtMethod* method,
+      uint32_t method_idx,
+      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Build a HNewInstance instruction.
+  bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
+
+  // Return whether the compiler can assume `cls` is initialized.
+  bool IsInitialized(Handle<mirror::Class> cls) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to resolve a method using the class linker. Return null if a method could
+  // not be resolved.
+  ArtMethod* ResolveMethod(uint16_t method_idx, InvokeType invoke_type);
+
+  ArenaAllocator* const arena_;
+  HGraph* const graph_;
+
+  // The dex file where the method being compiled is, and the bytecode data.
+  const DexFile* const dex_file_;
+  const DexFile::CodeItem& code_item_;
+
+  // The return type of the method being compiled.
+  const Primitive::Type return_type_;
+
+  HBasicBlockBuilder* block_builder_;
+  SsaBuilder* ssa_builder_;
+
+  ArenaVector<ArenaVector<HInstruction*>> locals_for_;
+  HBasicBlock* current_block_;
+  ArenaVector<HInstruction*>* current_locals_;
+  HInstruction* latest_result_;
+
+  CompilerDriver* const compiler_driver_;
+
+  // The compilation unit of the current method being compiled. Note that
+  // it can be an inlined method.
+  DexCompilationUnit* const dex_compilation_unit_;
+
+  // The compilation unit of the outermost method being compiled. That is the
+  // method being compiled (and not inlined), and potentially inlining other
+  // methods.
+  const DexCompilationUnit* const outer_compilation_unit_;
+
+  // Original values kept after instruction quickening. This is a data buffer
+  // of Leb128-encoded (dex_pc, value) pairs sorted by dex_pc.
+  const uint8_t* interpreter_metadata_;
+
+  // InstructionBuilder does not parse instructions in dex_pc order. Quickening
+  // info for out-of-order dex_pcs is stored in a map until the positions
+  // are eventually visited.
+  ArenaSafeMap<uint32_t, uint16_t> skipped_interpreter_metadata_;
+
+  OptimizingCompilerStats* compilation_stats_;
+  Handle<mirror::DexCache> dex_cache_;
+
+  ArenaVector<HBasicBlock*> loop_headers_;
+
+  static constexpr int kDefaultNumberOfLoops = 2;
+
+  DISALLOW_COPY_AND_ASSIGN(HInstructionBuilder);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index b97dc1a..4ca0600 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -34,15 +34,31 @@
   void RecordSimplification() {
     simplification_occurred_ = true;
     simplifications_at_current_position_++;
-    if (stats_) {
-      stats_->RecordStat(kInstructionSimplifications);
+    MaybeRecordStat(kInstructionSimplifications);
+  }
+
+  void MaybeRecordStat(MethodCompilationStat stat) {
+    if (stats_ != nullptr) {
+      stats_->RecordStat(stat);
     }
   }
 
+  bool ReplaceRotateWithRor(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+  bool TryReplaceWithRotate(HBinaryOperation* instruction);
+  bool TryReplaceWithRotateConstantPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+  bool TryReplaceWithRotateRegisterNegPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+  bool TryReplaceWithRotateRegisterSubPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+
   bool TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop);
+  // `op` should be either HOr or HAnd.
+  // De Morgan's laws:
+  // ~a & ~b = ~(a | b)  and  ~a | ~b = ~(a & b)
+  bool TryDeMorganNegationFactoring(HBinaryOperation* op);
+  bool TryHandleAssociativeAndCommutativeOperation(HBinaryOperation* instruction);
+  bool TrySubtractionChainSimplification(HBinaryOperation* instruction);
+
   void VisitShift(HBinaryOperation* shift);
 
-  void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
   void VisitEqual(HEqual* equal) OVERRIDE;
   void VisitNotEqual(HNotEqual* equal) OVERRIDE;
   void VisitBooleanNot(HBooleanNot* bool_not) OVERRIDE;
@@ -60,6 +76,10 @@
   void VisitGreaterThanOrEqual(HGreaterThanOrEqual* condition) OVERRIDE;
   void VisitLessThan(HLessThan* condition) OVERRIDE;
   void VisitLessThanOrEqual(HLessThanOrEqual* condition) OVERRIDE;
+  void VisitBelow(HBelow* condition) OVERRIDE;
+  void VisitBelowOrEqual(HBelowOrEqual* condition) OVERRIDE;
+  void VisitAbove(HAbove* condition) OVERRIDE;
+  void VisitAboveOrEqual(HAboveOrEqual* condition) OVERRIDE;
   void VisitDiv(HDiv* instruction) OVERRIDE;
   void VisitMul(HMul* instruction) OVERRIDE;
   void VisitNeg(HNeg* instruction) OVERRIDE;
@@ -70,15 +90,23 @@
   void VisitSub(HSub* instruction) OVERRIDE;
   void VisitUShr(HUShr* instruction) OVERRIDE;
   void VisitXor(HXor* instruction) OVERRIDE;
+  void VisitSelect(HSelect* select) OVERRIDE;
+  void VisitIf(HIf* instruction) OVERRIDE;
   void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE;
-  void VisitFakeString(HFakeString* fake_string) OVERRIDE;
   void VisitInvoke(HInvoke* invoke) OVERRIDE;
   void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
 
   bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const;
 
+  void SimplifyRotate(HInvoke* invoke, bool is_left, Primitive::Type type);
   void SimplifySystemArrayCopy(HInvoke* invoke);
   void SimplifyStringEquals(HInvoke* invoke);
+  void SimplifyCompare(HInvoke* invoke, bool is_signum, Primitive::Type type);
+  void SimplifyIsNaN(HInvoke* invoke);
+  void SimplifyFP2Int(HInvoke* invoke);
+  void SimplifyStringCharAt(HInvoke* invoke);
+  void SimplifyStringIsEmptyOrLength(HInvoke* invoke);
+  void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
 
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
@@ -156,33 +184,240 @@
   return true;
 }
 
+bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation* op) {
+  DCHECK(op->IsAnd() || op->IsOr()) << op->DebugName();
+  Primitive::Type type = op->GetType();
+  HInstruction* left = op->GetLeft();
+  HInstruction* right = op->GetRight();
+
+  // We can apply De Morgan's laws if both inputs are Not's and are only used
+  // by `op`.
+  if (((left->IsNot() && right->IsNot()) ||
+       (left->IsBooleanNot() && right->IsBooleanNot())) &&
+      left->HasOnlyOneNonEnvironmentUse() &&
+      right->HasOnlyOneNonEnvironmentUse()) {
+    // Replace code looking like
+    //    NOT nota, a
+    //    NOT notb, b
+    //    AND dst, nota, notb (respectively OR)
+    // with
+    //    OR or, a, b         (respectively AND)
+    //    NOT dest, or
+    HInstruction* src_left = left->InputAt(0);
+    HInstruction* src_right = right->InputAt(0);
+    uint32_t dex_pc = op->GetDexPc();
+
+    // Remove the negations on the inputs.
+    left->ReplaceWith(src_left);
+    right->ReplaceWith(src_right);
+    left->GetBlock()->RemoveInstruction(left);
+    right->GetBlock()->RemoveInstruction(right);
+
+    // Replace the `HAnd` or `HOr`.
+    HBinaryOperation* hbin;
+    if (op->IsAnd()) {
+      hbin = new (GetGraph()->GetArena()) HOr(type, src_left, src_right, dex_pc);
+    } else {
+      hbin = new (GetGraph()->GetArena()) HAnd(type, src_left, src_right, dex_pc);
+    }
+    HInstruction* hnot;
+    if (left->IsBooleanNot()) {
+      hnot = new (GetGraph()->GetArena()) HBooleanNot(hbin, dex_pc);
+    } else {
+      hnot = new (GetGraph()->GetArena()) HNot(type, hbin, dex_pc);
+    }
+
+    op->GetBlock()->InsertInstructionBefore(hbin, op);
+    op->GetBlock()->ReplaceAndRemoveInstructionWith(op, hnot);
+
+    RecordSimplification();
+    return true;
+  }
+
+  return false;
+}
+
 void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
   DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
-  HConstant* input_cst = instruction->GetConstantRight();
-  HInstruction* input_other = instruction->GetLeastConstantLeft();
+  HInstruction* shift_amount = instruction->GetRight();
+  HInstruction* value = instruction->GetLeft();
 
-  if (input_cst != nullptr) {
-    if (input_cst->IsZero()) {
+  int64_t implicit_mask = (value->GetType() == Primitive::kPrimLong)
+      ? kMaxLongShiftDistance
+      : kMaxIntShiftDistance;
+
+  if (shift_amount->IsConstant()) {
+    int64_t cst = Int64FromConstant(shift_amount->AsConstant());
+    if ((cst & implicit_mask) == 0) {
       // Replace code looking like
-      //    SHL dst, src, 0
+      //    SHL dst, value, 0
       // with
-      //    src
-      instruction->ReplaceWith(input_other);
+      //    value
+      instruction->ReplaceWith(value);
       instruction->GetBlock()->RemoveInstruction(instruction);
-    } else if (instruction->IsShl() && input_cst->IsOne()) {
-      // Replace Shl looking like
-      //    SHL dst, src, 1
-      // with
-      //    ADD dst, src, src
-      HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(),
-                                                   input_other,
-                                                   input_other);
-      instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add);
+      RecordSimplification();
+      return;
+    }
+  }
+
+  // Shift operations implicitly mask the shift amount according to the type width. Get rid of
+  // unnecessary explicit masking operations on the shift amount.
+  // Replace code looking like
+  //    AND masked_shift, shift, <superset of implicit mask>
+  //    SHL dst, value, masked_shift
+  // with
+  //    SHL dst, value, shift
+  if (shift_amount->IsAnd()) {
+    HAnd* and_insn = shift_amount->AsAnd();
+    HConstant* mask = and_insn->GetConstantRight();
+    if ((mask != nullptr) && ((Int64FromConstant(mask) & implicit_mask) == implicit_mask)) {
+      instruction->ReplaceInput(and_insn->GetLeastConstantLeft(), 1);
       RecordSimplification();
     }
   }
 }
 
+static bool IsSubRegBitsMinusOther(HSub* sub, size_t reg_bits, HInstruction* other) {
+  return (sub->GetRight() == other &&
+          sub->GetLeft()->IsConstant() &&
+          (Int64FromConstant(sub->GetLeft()->AsConstant()) & (reg_bits - 1)) == 0);
+}
+
+bool InstructionSimplifierVisitor::ReplaceRotateWithRor(HBinaryOperation* op,
+                                                        HUShr* ushr,
+                                                        HShl* shl) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()) << op->DebugName();
+  HRor* ror = new (GetGraph()->GetArena()) HRor(ushr->GetType(), ushr->GetLeft(), ushr->GetRight());
+  op->GetBlock()->ReplaceAndRemoveInstructionWith(op, ror);
+  if (!ushr->HasUses()) {
+    ushr->GetBlock()->RemoveInstruction(ushr);
+  }
+  if (!ushr->GetRight()->HasUses()) {
+    ushr->GetRight()->GetBlock()->RemoveInstruction(ushr->GetRight());
+  }
+  if (!shl->HasUses()) {
+    shl->GetBlock()->RemoveInstruction(shl);
+  }
+  if (!shl->GetRight()->HasUses()) {
+    shl->GetRight()->GetBlock()->RemoveInstruction(shl->GetRight());
+  }
+  RecordSimplification();
+  return true;
+}
+
+// Try to replace a binary operation flanked by one UShr and one Shl with a bitfield rotation.
+bool InstructionSimplifierVisitor::TryReplaceWithRotate(HBinaryOperation* op) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  HInstruction* left = op->GetLeft();
+  HInstruction* right = op->GetRight();
+  // If we have an UShr and a Shl (in either order).
+  if ((left->IsUShr() && right->IsShl()) || (left->IsShl() && right->IsUShr())) {
+    HUShr* ushr = left->IsUShr() ? left->AsUShr() : right->AsUShr();
+    HShl* shl = left->IsShl() ? left->AsShl() : right->AsShl();
+    DCHECK(Primitive::IsIntOrLongType(ushr->GetType()));
+    if (ushr->GetType() == shl->GetType() &&
+        ushr->GetLeft() == shl->GetLeft()) {
+      if (ushr->GetRight()->IsConstant() && shl->GetRight()->IsConstant()) {
+        // Shift distances are both constant, try replacing with Ror if they
+        // add up to the register size.
+        return TryReplaceWithRotateConstantPattern(op, ushr, shl);
+      } else if (ushr->GetRight()->IsSub() || shl->GetRight()->IsSub()) {
+        // Shift distances are potentially of the form x and (reg_size - x).
+        return TryReplaceWithRotateRegisterSubPattern(op, ushr, shl);
+      } else if (ushr->GetRight()->IsNeg() || shl->GetRight()->IsNeg()) {
+        // Shift distances are potentially of the form d and -d.
+        return TryReplaceWithRotateRegisterNegPattern(op, ushr, shl);
+      }
+    }
+  }
+  return false;
+}
+
+// Try replacing code looking like (x >>> #rdist OP x << #ldist):
+//    UShr dst, x,   #rdist
+//    Shl  tmp, x,   #ldist
+//    OP   dst, dst, tmp
+// or like (x >>> #rdist OP x << #-ldist):
+//    UShr dst, x,   #rdist
+//    Shl  tmp, x,   #-ldist
+//    OP   dst, dst, tmp
+// with
+//    Ror  dst, x,   #rdist
+bool InstructionSimplifierVisitor::TryReplaceWithRotateConstantPattern(HBinaryOperation* op,
+                                                                       HUShr* ushr,
+                                                                       HShl* shl) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  size_t reg_bits = Primitive::ComponentSize(ushr->GetType()) * kBitsPerByte;
+  size_t rdist = Int64FromConstant(ushr->GetRight()->AsConstant());
+  size_t ldist = Int64FromConstant(shl->GetRight()->AsConstant());
+  if (((ldist + rdist) & (reg_bits - 1)) == 0) {
+    ReplaceRotateWithRor(op, ushr, shl);
+    return true;
+  }
+  return false;
+}
+
+// Replace code looking like (x >>> -d OP x << d):
+//    Neg  neg, d
+//    UShr dst, x,   neg
+//    Shl  tmp, x,   d
+//    OP   dst, dst, tmp
+// with
+//    Neg  neg, d
+//    Ror  dst, x,   neg
+// *** OR ***
+// Replace code looking like (x >>> d OP x << -d):
+//    UShr dst, x,   d
+//    Neg  neg, d
+//    Shl  tmp, x,   neg
+//    OP   dst, dst, tmp
+// with
+//    Ror  dst, x,   d
+bool InstructionSimplifierVisitor::TryReplaceWithRotateRegisterNegPattern(HBinaryOperation* op,
+                                                                          HUShr* ushr,
+                                                                          HShl* shl) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  DCHECK(ushr->GetRight()->IsNeg() || shl->GetRight()->IsNeg());
+  bool neg_is_left = shl->GetRight()->IsNeg();
+  HNeg* neg = neg_is_left ? shl->GetRight()->AsNeg() : ushr->GetRight()->AsNeg();
+  // And the shift distance being negated is the distance being shifted the other way.
+  if (neg->InputAt(0) == (neg_is_left ? ushr->GetRight() : shl->GetRight())) {
+    ReplaceRotateWithRor(op, ushr, shl);
+  }
+  return false;
+}
+
+// Try replacing code looking like (x >>> d OP x << (#bits - d)):
+//    UShr dst, x,     d
+//    Sub  ld,  #bits, d
+//    Shl  tmp, x,     ld
+//    OP   dst, dst,   tmp
+// with
+//    Ror  dst, x,     d
+// *** OR ***
+// Replace code looking like (x >>> (#bits - d) OP x << d):
+//    Sub  rd,  #bits, d
+//    UShr dst, x,     rd
+//    Shl  tmp, x,     d
+//    OP   dst, dst,   tmp
+// with
+//    Neg  neg, d
+//    Ror  dst, x,     neg
+bool InstructionSimplifierVisitor::TryReplaceWithRotateRegisterSubPattern(HBinaryOperation* op,
+                                                                          HUShr* ushr,
+                                                                          HShl* shl) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  DCHECK(ushr->GetRight()->IsSub() || shl->GetRight()->IsSub());
+  size_t reg_bits = Primitive::ComponentSize(ushr->GetType()) * kBitsPerByte;
+  HInstruction* shl_shift = shl->GetRight();
+  HInstruction* ushr_shift = ushr->GetRight();
+  if ((shl_shift->IsSub() && IsSubRegBitsMinusOther(shl_shift->AsSub(), reg_bits, ushr_shift)) ||
+      (ushr_shift->IsSub() && IsSubRegBitsMinusOther(ushr_shift->AsSub(), reg_bits, shl_shift))) {
+    return ReplaceRotateWithRor(op, ushr, shl);
+  }
+  return false;
+}
+
 void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) {
   HInstruction* obj = null_check->InputAt(0);
   if (!obj->CanBeNull()) {
@@ -199,9 +434,9 @@
     return true;
   }
 
-  for (HUseIterator<HInstruction*> it(input->GetUses()); !it.Done(); it.Advance()) {
-    HInstruction* use = it.Current()->GetUser();
-    if (use->IsNullCheck() && use->StrictlyDominates(at)) {
+  for (const HUseListNode<HInstruction*>& use : input->GetUses()) {
+    HInstruction* user = use.GetUser();
+    if (user->IsNullCheck() && user->StrictlyDominates(at)) {
       return true;
     }
   }
@@ -258,19 +493,17 @@
 
   if (object->IsNullConstant()) {
     check_cast->GetBlock()->RemoveInstruction(check_cast);
-    if (stats_ != nullptr) {
-      stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast);
-    }
+    MaybeRecordStat(MethodCompilationStat::kRemovedCheckedCast);
     return;
   }
 
-  bool outcome;
+  // Note: The `outcome` is initialized to please valgrind - the compiler can reorder
+  // the return value check with the `outcome` check, b/27651442 .
+  bool outcome = false;
   if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
     if (outcome) {
       check_cast->GetBlock()->RemoveInstruction(check_cast);
-      if (stats_ != nullptr) {
-        stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast);
-      }
+      MaybeRecordStat(MethodCompilationStat::kRemovedCheckedCast);
       if (!load_class->HasUses()) {
         // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
         // However, here we know that it cannot because the checkcast was successfull, hence
@@ -300,14 +533,18 @@
 
   HGraph* graph = GetGraph();
   if (object->IsNullConstant()) {
+    MaybeRecordStat(kRemovedInstanceOf);
     instruction->ReplaceWith(graph->GetIntConstant(0));
     instruction->GetBlock()->RemoveInstruction(instruction);
     RecordSimplification();
     return;
   }
 
-  bool outcome;
+  // Note: The `outcome` is initialized to please valgrind - the compiler can reorder
+  // the return value check with the `outcome` check, b/27651442 .
+  bool outcome = false;
   if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
+    MaybeRecordStat(kRemovedInstanceOf);
     if (outcome && can_be_null) {
       // Type test will succeed, we just need a null test.
       HNotEqual* test = new (graph->GetArena()) HNotEqual(graph->GetNullConstant(), object);
@@ -342,20 +579,34 @@
   }
 }
 
-void InstructionSimplifierVisitor::VisitSuspendCheck(HSuspendCheck* check) {
-  HBasicBlock* block = check->GetBlock();
-  // Currently always keep the suspend check at entry.
-  if (block->IsEntryBlock()) return;
-
-  // Currently always keep suspend checks at loop entry.
-  if (block->IsLoopHeader() && block->GetFirstInstruction() == check) {
-    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == check);
-    return;
+static HCondition* GetOppositeConditionSwapOps(ArenaAllocator* arena, HInstruction* cond) {
+  HInstruction *lhs = cond->InputAt(0);
+  HInstruction *rhs = cond->InputAt(1);
+  switch (cond->GetKind()) {
+    case HInstruction::kEqual:
+      return new (arena) HEqual(rhs, lhs);
+    case HInstruction::kNotEqual:
+      return new (arena) HNotEqual(rhs, lhs);
+    case HInstruction::kLessThan:
+      return new (arena) HGreaterThan(rhs, lhs);
+    case HInstruction::kLessThanOrEqual:
+      return new (arena) HGreaterThanOrEqual(rhs, lhs);
+    case HInstruction::kGreaterThan:
+      return new (arena) HLessThan(rhs, lhs);
+    case HInstruction::kGreaterThanOrEqual:
+      return new (arena) HLessThanOrEqual(rhs, lhs);
+    case HInstruction::kBelow:
+      return new (arena) HAbove(rhs, lhs);
+    case HInstruction::kBelowOrEqual:
+      return new (arena) HAboveOrEqual(rhs, lhs);
+    case HInstruction::kAbove:
+      return new (arena) HBelow(rhs, lhs);
+    case HInstruction::kAboveOrEqual:
+      return new (arena) HBelowOrEqual(rhs, lhs);
+    default:
+      LOG(FATAL) << "Unknown ConditionType " << cond->GetKind();
   }
-
-  // Remove the suspend check that was added at build time for the baseline
-  // compiler.
-  block->RemoveInstruction(check);
+  return nullptr;
 }
 
 void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) {
@@ -366,15 +617,14 @@
       HBasicBlock* block = equal->GetBlock();
       // We are comparing the boolean to a constant which is of type int and can
       // be any constant.
-      if (input_const->AsIntConstant()->IsOne()) {
+      if (input_const->AsIntConstant()->IsTrue()) {
         // Replace (bool_value == true) with bool_value
         equal->ReplaceWith(input_value);
         block->RemoveInstruction(equal);
         RecordSimplification();
-      } else if (input_const->AsIntConstant()->IsZero()) {
-        // Replace (bool_value == false) with !bool_value
-        block->ReplaceAndRemoveInstructionWith(
-            equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value));
+      } else if (input_const->AsIntConstant()->IsFalse()) {
+        equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, equal));
+        block->RemoveInstruction(equal);
         RecordSimplification();
       } else {
         // Replace (bool_value == integer_not_zero_nor_one_constant) with false
@@ -398,12 +648,11 @@
       HBasicBlock* block = not_equal->GetBlock();
       // We are comparing the boolean to a constant which is of type int and can
       // be any constant.
-      if (input_const->AsIntConstant()->IsOne()) {
-        // Replace (bool_value != true) with !bool_value
-        block->ReplaceAndRemoveInstructionWith(
-            not_equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value));
+      if (input_const->AsIntConstant()->IsTrue()) {
+        not_equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, not_equal));
+        block->RemoveInstruction(not_equal);
         RecordSimplification();
-      } else if (input_const->AsIntConstant()->IsZero()) {
+      } else if (input_const->AsIntConstant()->IsFalse()) {
         // Replace (bool_value != false) with bool_value
         not_equal->ReplaceWith(input_value);
         block->RemoveInstruction(not_equal);
@@ -423,14 +672,86 @@
 }
 
 void InstructionSimplifierVisitor::VisitBooleanNot(HBooleanNot* bool_not) {
-  HInstruction* parent = bool_not->InputAt(0);
-  if (parent->IsBooleanNot()) {
-    HInstruction* value = parent->InputAt(0);
-    // Replace (!(!bool_value)) with bool_value
-    bool_not->ReplaceWith(value);
+  HInstruction* input = bool_not->InputAt(0);
+  HInstruction* replace_with = nullptr;
+
+  if (input->IsIntConstant()) {
+    // Replace !(true/false) with false/true.
+    if (input->AsIntConstant()->IsTrue()) {
+      replace_with = GetGraph()->GetIntConstant(0);
+    } else {
+      DCHECK(input->AsIntConstant()->IsFalse()) << input->AsIntConstant()->GetValue();
+      replace_with = GetGraph()->GetIntConstant(1);
+    }
+  } else if (input->IsBooleanNot()) {
+    // Replace (!(!bool_value)) with bool_value.
+    replace_with = input->InputAt(0);
+  } else if (input->IsCondition() &&
+             // Don't change FP compares. The definition of compares involving
+             // NaNs forces the compares to be done as written by the user.
+             !Primitive::IsFloatingPointType(input->InputAt(0)->GetType())) {
+    // Replace condition with its opposite.
+    replace_with = GetGraph()->InsertOppositeCondition(input->AsCondition(), bool_not);
+  }
+
+  if (replace_with != nullptr) {
+    bool_not->ReplaceWith(replace_with);
     bool_not->GetBlock()->RemoveInstruction(bool_not);
-    // It is possible that `parent` is dead at this point but we leave
-    // its removal to DCE for simplicity.
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierVisitor::VisitSelect(HSelect* select) {
+  HInstruction* replace_with = nullptr;
+  HInstruction* condition = select->GetCondition();
+  HInstruction* true_value = select->GetTrueValue();
+  HInstruction* false_value = select->GetFalseValue();
+
+  if (condition->IsBooleanNot()) {
+    // Change ((!cond) ? x : y) to (cond ? y : x).
+    condition = condition->InputAt(0);
+    std::swap(true_value, false_value);
+    select->ReplaceInput(false_value, 0);
+    select->ReplaceInput(true_value, 1);
+    select->ReplaceInput(condition, 2);
+    RecordSimplification();
+  }
+
+  if (true_value == false_value) {
+    // Replace (cond ? x : x) with (x).
+    replace_with = true_value;
+  } else if (condition->IsIntConstant()) {
+    if (condition->AsIntConstant()->IsTrue()) {
+      // Replace (true ? x : y) with (x).
+      replace_with = true_value;
+    } else {
+      // Replace (false ? x : y) with (y).
+      DCHECK(condition->AsIntConstant()->IsFalse()) << condition->AsIntConstant()->GetValue();
+      replace_with = false_value;
+    }
+  } else if (true_value->IsIntConstant() && false_value->IsIntConstant()) {
+    if (true_value->AsIntConstant()->IsTrue() && false_value->AsIntConstant()->IsFalse()) {
+      // Replace (cond ? true : false) with (cond).
+      replace_with = condition;
+    } else if (true_value->AsIntConstant()->IsFalse() && false_value->AsIntConstant()->IsTrue()) {
+      // Replace (cond ? false : true) with (!cond).
+      replace_with = GetGraph()->InsertOppositeCondition(condition, select);
+    }
+  }
+
+  if (replace_with != nullptr) {
+    select->ReplaceWith(replace_with);
+    select->GetBlock()->RemoveInstruction(select);
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierVisitor::VisitIf(HIf* instruction) {
+  HInstruction* condition = instruction->InputAt(0);
+  if (condition->IsBooleanNot()) {
+    // Swap successors if input is negated.
+    instruction->ReplaceInput(condition->InputAt(0), 0);
+    instruction->GetBlock()->SwapSuccessors();
     RecordSimplification();
   }
 }
@@ -489,18 +810,118 @@
   }
 }
 
+static bool IsTypeConversionImplicit(Primitive::Type input_type, Primitive::Type result_type) {
+  // Invariant: We should never generate a conversion to a Boolean value.
+  DCHECK_NE(Primitive::kPrimBoolean, result_type);
+
+  // Besides conversion to the same type, widening integral conversions are implicit,
+  // excluding conversions to long and the byte->char conversion where we need to
+  // clear the high 16 bits of the 32-bit sign-extended representation of byte.
+  return result_type == input_type ||
+      (result_type == Primitive::kPrimInt && (input_type == Primitive::kPrimBoolean ||
+                                              input_type == Primitive::kPrimByte ||
+                                              input_type == Primitive::kPrimShort ||
+                                              input_type == Primitive::kPrimChar)) ||
+      (result_type == Primitive::kPrimChar && input_type == Primitive::kPrimBoolean) ||
+      (result_type == Primitive::kPrimShort && (input_type == Primitive::kPrimBoolean ||
+                                                input_type == Primitive::kPrimByte)) ||
+      (result_type == Primitive::kPrimByte && input_type == Primitive::kPrimBoolean);
+}
+
+static bool IsTypeConversionLossless(Primitive::Type input_type, Primitive::Type result_type) {
+  // The conversion to a larger type is loss-less with the exception of two cases,
+  //   - conversion to char, the only unsigned type, where we may lose some bits, and
+  //   - conversion from float to long, the only FP to integral conversion with smaller FP type.
+  // For integral to FP conversions this holds because the FP mantissa is large enough.
+  DCHECK_NE(input_type, result_type);
+  return Primitive::ComponentSize(result_type) > Primitive::ComponentSize(input_type) &&
+      result_type != Primitive::kPrimChar &&
+      !(result_type == Primitive::kPrimLong && input_type == Primitive::kPrimFloat);
+}
+
 void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruction) {
-  if (instruction->GetResultType() == instruction->GetInputType()) {
-    // Remove the instruction if it's converting to the same type.
-    instruction->ReplaceWith(instruction->GetInput());
+  HInstruction* input = instruction->GetInput();
+  Primitive::Type input_type = input->GetType();
+  Primitive::Type result_type = instruction->GetResultType();
+  if (IsTypeConversionImplicit(input_type, result_type)) {
+    // Remove the implicit conversion; this includes conversion to the same type.
+    instruction->ReplaceWith(input);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
+    return;
+  }
+
+  if (input->IsTypeConversion()) {
+    HTypeConversion* input_conversion = input->AsTypeConversion();
+    HInstruction* original_input = input_conversion->GetInput();
+    Primitive::Type original_type = original_input->GetType();
+
+    // When the first conversion is lossless, a direct conversion from the original type
+    // to the final type yields the same result, even for a lossy second conversion, for
+    // example float->double->int or int->double->float.
+    bool is_first_conversion_lossless = IsTypeConversionLossless(original_type, input_type);
+
+    // For integral conversions, see if the first conversion loses only bits that the second
+    // doesn't need, i.e. the final type is no wider than the intermediate. If so, direct
+    // conversion yields the same result, for example long->int->short or int->char->short.
+    bool integral_conversions_with_non_widening_second =
+        Primitive::IsIntegralType(input_type) &&
+        Primitive::IsIntegralType(original_type) &&
+        Primitive::IsIntegralType(result_type) &&
+        Primitive::ComponentSize(result_type) <= Primitive::ComponentSize(input_type);
+
+    if (is_first_conversion_lossless || integral_conversions_with_non_widening_second) {
+      // If the merged conversion is implicit, do the simplification unconditionally.
+      if (IsTypeConversionImplicit(original_type, result_type)) {
+        instruction->ReplaceWith(original_input);
+        instruction->GetBlock()->RemoveInstruction(instruction);
+        if (!input_conversion->HasUses()) {
+          // Don't wait for DCE.
+          input_conversion->GetBlock()->RemoveInstruction(input_conversion);
+        }
+        RecordSimplification();
+        return;
+      }
+      // Otherwise simplify only if the first conversion has no other use.
+      if (input_conversion->HasOnlyOneNonEnvironmentUse()) {
+        input_conversion->ReplaceWith(original_input);
+        input_conversion->GetBlock()->RemoveInstruction(input_conversion);
+        RecordSimplification();
+        return;
+      }
+    }
+  } else if (input->IsAnd() && Primitive::IsIntegralType(result_type)) {
+    DCHECK(Primitive::IsIntegralType(input_type));
+    HAnd* input_and = input->AsAnd();
+    HConstant* constant = input_and->GetConstantRight();
+    if (constant != nullptr) {
+      int64_t value = Int64FromConstant(constant);
+      DCHECK_NE(value, -1);  // "& -1" would have been optimized away in VisitAnd().
+      size_t trailing_ones = CTZ(~static_cast<uint64_t>(value));
+      if (trailing_ones >= kBitsPerByte * Primitive::ComponentSize(result_type)) {
+        // The `HAnd` is useless, for example in `(byte) (x & 0xff)`, get rid of it.
+        HInstruction* original_input = input_and->GetLeastConstantLeft();
+        if (IsTypeConversionImplicit(original_input->GetType(), result_type)) {
+          instruction->ReplaceWith(original_input);
+          instruction->GetBlock()->RemoveInstruction(instruction);
+          RecordSimplification();
+          return;
+        } else if (input->HasOnlyOneNonEnvironmentUse()) {
+          input_and->ReplaceWith(original_input);
+          input_and->GetBlock()->RemoveInstruction(input_and);
+          RecordSimplification();
+          return;
+        }
+      }
+    }
   }
 }
 
 void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
   HConstant* input_cst = instruction->GetConstantRight();
   HInstruction* input_other = instruction->GetLeastConstantLeft();
-  if ((input_cst != nullptr) && input_cst->IsZero()) {
+  bool integral_type = Primitive::IsIntegralType(instruction->GetType());
+  if ((input_cst != nullptr) && input_cst->IsArithmeticZero()) {
     // Replace code looking like
     //    ADD dst, src, 0
     // with
@@ -508,9 +929,10 @@
     // Note that we cannot optimize `x + 0.0` to `x` for floating-point. When
     // `x` is `-0.0`, the former expression yields `0.0`, while the later
     // yields `-0.0`.
-    if (Primitive::IsIntegralType(instruction->GetType())) {
+    if (integral_type) {
       instruction->ReplaceWith(input_other);
       instruction->GetBlock()->RemoveInstruction(instruction);
+      RecordSimplification();
       return;
     }
   }
@@ -542,6 +964,41 @@
     instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub);
     RecordSimplification();
     neg->GetBlock()->RemoveInstruction(neg);
+    return;
+  }
+
+  if (TryReplaceWithRotate(instruction)) {
+    return;
+  }
+
+  // TryHandleAssociativeAndCommutativeOperation() does not remove its input,
+  // so no need to return.
+  TryHandleAssociativeAndCommutativeOperation(instruction);
+
+  if ((left->IsSub() || right->IsSub()) &&
+      TrySubtractionChainSimplification(instruction)) {
+    return;
+  }
+
+  if (integral_type) {
+    // Replace code patterns looking like
+    //    SUB dst1, x, y        SUB dst1, x, y
+    //    ADD dst2, dst1, y     ADD dst2, y, dst1
+    // with
+    //    SUB dst1, x, y
+    // ADD instruction is not needed in this case, we may use
+    // one of inputs of SUB instead.
+    if (left->IsSub() && left->InputAt(1) == right) {
+      instruction->ReplaceWith(left->InputAt(0));
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    } else if (right->IsSub() && right->InputAt(1) == left) {
+      instruction->ReplaceWith(right->InputAt(0));
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    }
   }
 }
 
@@ -600,7 +1057,17 @@
     //    src
     instruction->ReplaceWith(instruction->GetLeft());
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
+    return;
   }
+
+  if (TryDeMorganNegationFactoring(instruction)) {
+    return;
+  }
+
+  // TryHandleAssociativeAndCommutativeOperation() does not remove its input,
+  // so no need to return.
+  TryHandleAssociativeAndCommutativeOperation(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitGreaterThan(HGreaterThan* condition) {
@@ -619,20 +1086,47 @@
   VisitCondition(condition);
 }
 
-// TODO: unsigned comparisons too?
+void InstructionSimplifierVisitor::VisitBelow(HBelow* condition) {
+  VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitBelowOrEqual(HBelowOrEqual* condition) {
+  VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitAbove(HAbove* condition) {
+  VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitAboveOrEqual(HAboveOrEqual* condition) {
+  VisitCondition(condition);
+}
 
 void InstructionSimplifierVisitor::VisitCondition(HCondition* condition) {
-  // Try to fold an HCompare into this HCondition.
+  // Reverse condition if left is constant. Our code generators prefer constant
+  // on the right hand side.
+  if (condition->GetLeft()->IsConstant() && !condition->GetRight()->IsConstant()) {
+    HBasicBlock* block = condition->GetBlock();
+    HCondition* replacement = GetOppositeConditionSwapOps(block->GetGraph()->GetArena(), condition);
+    // If it is a fp we must set the opposite bias.
+    if (replacement != nullptr) {
+      if (condition->IsLtBias()) {
+        replacement->SetBias(ComparisonBias::kGtBias);
+      } else if (condition->IsGtBias()) {
+        replacement->SetBias(ComparisonBias::kLtBias);
+      }
+      block->ReplaceAndRemoveInstructionWith(condition, replacement);
+      RecordSimplification();
 
-  // This simplification is currently supported on x86, x86_64, ARM and ARM64.
-  // TODO: Implement it for MIPS and MIPS64.
-  InstructionSet instruction_set = GetGraph()->GetInstructionSet();
-  if (instruction_set == kMips || instruction_set == kMips64) {
-    return;
+      condition = replacement;
+    }
   }
 
   HInstruction* left = condition->GetLeft();
   HInstruction* right = condition->GetRight();
+
+  // Try to fold an HCompare into this HCondition.
+
   // We can only replace an HCondition which compares a Compare to 0.
   // Both 'dx' and 'jack' generate a compare to 0 when compiling a
   // condition with a long, float or double comparison as input.
@@ -642,12 +1136,12 @@
   }
 
   // Is the Compare only used for this purpose?
-  if (!left->GetUses().HasOnlyOneUse()) {
+  if (!left->GetUses().HasExactlyOneElement()) {
     // Someone else also wants the result of the compare.
     return;
   }
 
-  if (!left->GetEnvUses().IsEmpty()) {
+  if (!left->GetEnvUses().empty()) {
     // There is a reference to the compare result in an environment. Do we really need it?
     if (GetGraph()->IsDebuggable()) {
       return;
@@ -687,6 +1181,7 @@
     //    src
     instruction->ReplaceWith(input_other);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -747,6 +1242,7 @@
     //    src
     instruction->ReplaceWith(input_other);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -787,17 +1283,54 @@
       //    0
       instruction->ReplaceWith(input_cst);
       instruction->GetBlock()->RemoveInstruction(instruction);
+      RecordSimplification();
+      return;
     } else if (IsPowerOfTwo(factor)) {
       // Replace code looking like
       //    MUL dst, src, pow_of_2
       // with
       //    SHL dst, src, log2(pow_of_2)
       HIntConstant* shift = GetGraph()->GetIntConstant(WhichPowerOf2(factor));
-      HShl* shl = new(allocator) HShl(type, input_other, shift);
+      HShl* shl = new (allocator) HShl(type, input_other, shift);
       block->ReplaceAndRemoveInstructionWith(instruction, shl);
       RecordSimplification();
+      return;
+    } else if (IsPowerOfTwo(factor - 1)) {
+      // Transform code looking like
+      //    MUL dst, src, (2^n + 1)
+      // into
+      //    SHL tmp, src, n
+      //    ADD dst, src, tmp
+      HShl* shl = new (allocator) HShl(type,
+                                       input_other,
+                                       GetGraph()->GetIntConstant(WhichPowerOf2(factor - 1)));
+      HAdd* add = new (allocator) HAdd(type, input_other, shl);
+
+      block->InsertInstructionBefore(shl, instruction);
+      block->ReplaceAndRemoveInstructionWith(instruction, add);
+      RecordSimplification();
+      return;
+    } else if (IsPowerOfTwo(factor + 1)) {
+      // Transform code looking like
+      //    MUL dst, src, (2^n - 1)
+      // into
+      //    SHL tmp, src, n
+      //    SUB dst, tmp, src
+      HShl* shl = new (allocator) HShl(type,
+                                       input_other,
+                                       GetGraph()->GetIntConstant(WhichPowerOf2(factor + 1)));
+      HSub* sub = new (allocator) HSub(type, shl, input_other);
+
+      block->InsertInstructionBefore(shl, instruction);
+      block->ReplaceAndRemoveInstructionWith(instruction, sub);
+      RecordSimplification();
+      return;
     }
   }
+
+  // TryHandleAssociativeAndCommutativeOperation() does not remove its input,
+  // so no need to return.
+  TryHandleAssociativeAndCommutativeOperation(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) {
@@ -870,13 +1403,14 @@
   HConstant* input_cst = instruction->GetConstantRight();
   HInstruction* input_other = instruction->GetLeastConstantLeft();
 
-  if ((input_cst != nullptr) && input_cst->IsZero()) {
+  if ((input_cst != nullptr) && input_cst->IsZeroBitPattern()) {
     // Replace code looking like
     //    OR dst, src, 0
     // with
     //    src
     instruction->ReplaceWith(input_other);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -890,7 +1424,19 @@
     //    src
     instruction->ReplaceWith(instruction->GetLeft());
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
+    return;
   }
+
+  if (TryDeMorganNegationFactoring(instruction)) return;
+
+  if (TryReplaceWithRotate(instruction)) {
+    return;
+  }
+
+  // TryHandleAssociativeAndCommutativeOperation() does not remove its input,
+  // so no need to return.
+  TryHandleAssociativeAndCommutativeOperation(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitShl(HShl* instruction) {
@@ -910,7 +1456,7 @@
     return;
   }
 
-  if ((input_cst != nullptr) && input_cst->IsZero()) {
+  if ((input_cst != nullptr) && input_cst->IsArithmeticZero()) {
     // Replace code looking like
     //    SUB dst, src, 0
     // with
@@ -920,6 +1466,7 @@
     // yields `-0.0`.
     instruction->ReplaceWith(input_other);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -980,6 +1527,34 @@
     instruction->GetBlock()->RemoveInstruction(instruction);
     RecordSimplification();
     left->GetBlock()->RemoveInstruction(left);
+    return;
+  }
+
+  if (TrySubtractionChainSimplification(instruction)) {
+    return;
+  }
+
+  if (left->IsAdd()) {
+    // Replace code patterns looking like
+    //    ADD dst1, x, y        ADD dst1, x, y
+    //    SUB dst2, dst1, y     SUB dst2, dst1, x
+    // with
+    //    ADD dst1, x, y
+    // SUB instruction is not needed in this case, we may use
+    // one of inputs of ADD instead.
+    // It is applicable to integral types only.
+    DCHECK(Primitive::IsIntegralType(type));
+    if (left->InputAt(1) == right) {
+      instruction->ReplaceWith(left->InputAt(0));
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    } else if (left->InputAt(0) == right) {
+      instruction->ReplaceWith(left->InputAt(1));
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    }
   }
 }
 
@@ -991,13 +1566,14 @@
   HConstant* input_cst = instruction->GetConstantRight();
   HInstruction* input_other = instruction->GetLeastConstantLeft();
 
-  if ((input_cst != nullptr) && input_cst->IsZero()) {
+  if ((input_cst != nullptr) && input_cst->IsZeroBitPattern()) {
     // Replace code looking like
     //    XOR dst, src, 0
     // with
     //    src
     instruction->ReplaceWith(input_other);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -1011,48 +1587,34 @@
     RecordSimplification();
     return;
   }
-}
 
-void InstructionSimplifierVisitor::VisitFakeString(HFakeString* instruction) {
-  HInstruction* actual_string = nullptr;
-
-  // Find the string we need to replace this instruction with. The actual string is
-  // the return value of a StringFactory call.
-  for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
-    HInstruction* use = it.Current()->GetUser();
-    if (use->IsInvokeStaticOrDirect()
-        && use->AsInvokeStaticOrDirect()->IsStringFactoryFor(instruction)) {
-      use->AsInvokeStaticOrDirect()->RemoveFakeStringArgumentAsLastInput();
-      actual_string = use;
-      break;
-    }
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  if (((left->IsNot() && right->IsNot()) ||
+       (left->IsBooleanNot() && right->IsBooleanNot())) &&
+      left->HasOnlyOneNonEnvironmentUse() &&
+      right->HasOnlyOneNonEnvironmentUse()) {
+    // Replace code looking like
+    //    NOT nota, a
+    //    NOT notb, b
+    //    XOR dst, nota, notb
+    // with
+    //    XOR dst, a, b
+    instruction->ReplaceInput(left->InputAt(0), 0);
+    instruction->ReplaceInput(right->InputAt(0), 1);
+    left->GetBlock()->RemoveInstruction(left);
+    right->GetBlock()->RemoveInstruction(right);
+    RecordSimplification();
+    return;
   }
 
-  // Check that there is no other instruction that thinks it is the factory for that string.
-  if (kIsDebugBuild) {
-    CHECK(actual_string != nullptr);
-    for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
-      HInstruction* use = it.Current()->GetUser();
-      if (use->IsInvokeStaticOrDirect()) {
-        CHECK(!use->AsInvokeStaticOrDirect()->IsStringFactoryFor(instruction));
-      }
-    }
+  if (TryReplaceWithRotate(instruction)) {
+    return;
   }
 
-  // We need to remove any environment uses of the fake string that are not dominated by
-  // `actual_string` to null.
-  for (HUseIterator<HEnvironment*> it(instruction->GetEnvUses()); !it.Done(); it.Advance()) {
-    HEnvironment* environment = it.Current()->GetUser();
-    if (!actual_string->StrictlyDominates(environment->GetHolder())) {
-      environment->RemoveAsUserOfInput(it.Current()->GetIndex());
-      environment->SetRawEnvAt(it.Current()->GetIndex(), nullptr);
-    }
-  }
-
-  // Only uses dominated by `actual_string` must remain. We can safely replace and remove
-  // `instruction`.
-  instruction->ReplaceWith(actual_string);
-  instruction->GetBlock()->RemoveInstruction(instruction);
+  // TryHandleAssociativeAndCommutativeOperation() does not remove its input,
+  // so no need to return.
+  TryHandleAssociativeAndCommutativeOperation(instruction);
 }
 
 void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) {
@@ -1079,6 +1641,34 @@
   }
 }
 
+void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke,
+                                                  bool is_left,
+                                                  Primitive::Type type) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic);
+  HInstruction* value = invoke->InputAt(0);
+  HInstruction* distance = invoke->InputAt(1);
+  // Replace the invoke with an HRor.
+  if (is_left) {
+    // Unconditionally set the type of the negated distance to `int`,
+    // as shift and rotate operations expect a 32-bit (or narrower)
+    // value for their distance input.
+    distance = new (GetGraph()->GetArena()) HNeg(Primitive::kPrimInt, distance);
+    invoke->GetBlock()->InsertInstructionBefore(distance, invoke);
+  }
+  HRor* ror = new (GetGraph()->GetArena()) HRor(type, value, distance);
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, ror);
+  // Remove ClinitCheck and LoadClass, if possible.
+  HInstruction* clinit = invoke->GetInputs().back();
+  if (clinit->IsClinitCheck() && !clinit->HasUses()) {
+    clinit->GetBlock()->RemoveInstruction(clinit);
+    HInstruction* ldclass = clinit->InputAt(0);
+    if (ldclass->IsLoadClass() && !ldclass->HasUses()) {
+      ldclass->GetBlock()->RemoveInstruction(ldclass);
+    }
+  }
+}
+
 static bool IsArrayLengthOf(HInstruction* potential_length, HInstruction* potential_array) {
   if (potential_length->IsArrayLength()) {
     return potential_length->InputAt(0) == potential_array;
@@ -1144,18 +1734,177 @@
   }
 }
 
+void InstructionSimplifierVisitor::SimplifyCompare(HInvoke* invoke,
+                                                   bool is_signum,
+                                                   Primitive::Type type) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  uint32_t dex_pc = invoke->GetDexPc();
+  HInstruction* left = invoke->InputAt(0);
+  HInstruction* right;
+  if (!is_signum) {
+    right = invoke->InputAt(1);
+  } else if (type == Primitive::kPrimLong) {
+    right = GetGraph()->GetLongConstant(0);
+  } else {
+    right = GetGraph()->GetIntConstant(0);
+  }
+  HCompare* compare = new (GetGraph()->GetArena())
+      HCompare(type, left, right, ComparisonBias::kNoBias, dex_pc);
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, compare);
+}
+
+void InstructionSimplifierVisitor::SimplifyIsNaN(HInvoke* invoke) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  uint32_t dex_pc = invoke->GetDexPc();
+  // IsNaN(x) is the same as x != x.
+  HInstruction* x = invoke->InputAt(0);
+  HCondition* condition = new (GetGraph()->GetArena()) HNotEqual(x, x, dex_pc);
+  condition->SetBias(ComparisonBias::kLtBias);
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, condition);
+}
+
+void InstructionSimplifierVisitor::SimplifyFP2Int(HInvoke* invoke) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  uint32_t dex_pc = invoke->GetDexPc();
+  HInstruction* x = invoke->InputAt(0);
+  Primitive::Type type = x->GetType();
+  // Set proper bit pattern for NaN and replace intrinsic with raw version.
+  HInstruction* nan;
+  if (type == Primitive::kPrimDouble) {
+    nan = GetGraph()->GetLongConstant(0x7ff8000000000000L);
+    invoke->SetIntrinsic(Intrinsics::kDoubleDoubleToRawLongBits,
+                         kNeedsEnvironmentOrCache,
+                         kNoSideEffects,
+                         kNoThrow);
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimFloat);
+    nan = GetGraph()->GetIntConstant(0x7fc00000);
+    invoke->SetIntrinsic(Intrinsics::kFloatFloatToRawIntBits,
+                         kNeedsEnvironmentOrCache,
+                         kNoSideEffects,
+                         kNoThrow);
+  }
+  // Test IsNaN(x), which is the same as x != x.
+  HCondition* condition = new (GetGraph()->GetArena()) HNotEqual(x, x, dex_pc);
+  condition->SetBias(ComparisonBias::kLtBias);
+  invoke->GetBlock()->InsertInstructionBefore(condition, invoke->GetNext());
+  // Select between the two.
+  HInstruction* select = new (GetGraph()->GetArena()) HSelect(condition, nan, invoke, dex_pc);
+  invoke->GetBlock()->InsertInstructionBefore(select, condition->GetNext());
+  invoke->ReplaceWithExceptInReplacementAtIndex(select, 0);  // false at index 0
+}
+
+void InstructionSimplifierVisitor::SimplifyStringCharAt(HInvoke* invoke) {
+  HInstruction* str = invoke->InputAt(0);
+  HInstruction* index = invoke->InputAt(1);
+  uint32_t dex_pc = invoke->GetDexPc();
+  ArenaAllocator* arena = GetGraph()->GetArena();
+  // We treat String as an array to allow DCE and BCE to seamlessly work on strings,
+  // so create the HArrayLength, HBoundsCheck and HArrayGet.
+  HArrayLength* length = new (arena) HArrayLength(str, dex_pc, /* is_string_length */ true);
+  invoke->GetBlock()->InsertInstructionBefore(length, invoke);
+  HBoundsCheck* bounds_check =
+      new (arena) HBoundsCheck(index, length, dex_pc, invoke->GetDexMethodIndex());
+  invoke->GetBlock()->InsertInstructionBefore(bounds_check, invoke);
+  HArrayGet* array_get =
+      new (arena) HArrayGet(str, index, Primitive::kPrimChar, dex_pc, /* is_string_char_at */ true);
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, array_get);
+  bounds_check->CopyEnvironmentFrom(invoke->GetEnvironment());
+  GetGraph()->SetHasBoundsChecks(true);
+}
+
+void InstructionSimplifierVisitor::SimplifyStringIsEmptyOrLength(HInvoke* invoke) {
+  HInstruction* str = invoke->InputAt(0);
+  uint32_t dex_pc = invoke->GetDexPc();
+  // We treat String as an array to allow DCE and BCE to seamlessly work on strings,
+  // so create the HArrayLength.
+  HArrayLength* length =
+      new (GetGraph()->GetArena()) HArrayLength(str, dex_pc, /* is_string_length */ true);
+  HInstruction* replacement;
+  if (invoke->GetIntrinsic() == Intrinsics::kStringIsEmpty) {
+    // For String.isEmpty(), create the `HEqual` representing the `length == 0`.
+    invoke->GetBlock()->InsertInstructionBefore(length, invoke);
+    HIntConstant* zero = GetGraph()->GetIntConstant(0);
+    HEqual* equal = new (GetGraph()->GetArena()) HEqual(length, zero, dex_pc);
+    replacement = equal;
+  } else {
+    DCHECK_EQ(invoke->GetIntrinsic(), Intrinsics::kStringLength);
+    replacement = length;
+  }
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, replacement);
+}
+
+void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind) {
+  uint32_t dex_pc = invoke->GetDexPc();
+  HMemoryBarrier* mem_barrier = new (GetGraph()->GetArena()) HMemoryBarrier(barrier_kind, dex_pc);
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, mem_barrier);
+}
+
 void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
-  if (instruction->GetIntrinsic() == Intrinsics::kStringEquals) {
-    SimplifyStringEquals(instruction);
-  } else if (instruction->GetIntrinsic() == Intrinsics::kSystemArrayCopy) {
-    SimplifySystemArrayCopy(instruction);
+  switch (instruction->GetIntrinsic()) {
+    case Intrinsics::kStringEquals:
+      SimplifyStringEquals(instruction);
+      break;
+    case Intrinsics::kSystemArrayCopy:
+      SimplifySystemArrayCopy(instruction);
+      break;
+    case Intrinsics::kIntegerRotateRight:
+      SimplifyRotate(instruction, /* is_left */ false, Primitive::kPrimInt);
+      break;
+    case Intrinsics::kLongRotateRight:
+      SimplifyRotate(instruction, /* is_left */ false, Primitive::kPrimLong);
+      break;
+    case Intrinsics::kIntegerRotateLeft:
+      SimplifyRotate(instruction, /* is_left */ true, Primitive::kPrimInt);
+      break;
+    case Intrinsics::kLongRotateLeft:
+      SimplifyRotate(instruction, /* is_left */ true, Primitive::kPrimLong);
+      break;
+    case Intrinsics::kIntegerCompare:
+      SimplifyCompare(instruction, /* is_signum */ false, Primitive::kPrimInt);
+      break;
+    case Intrinsics::kLongCompare:
+      SimplifyCompare(instruction, /* is_signum */ false, Primitive::kPrimLong);
+      break;
+    case Intrinsics::kIntegerSignum:
+      SimplifyCompare(instruction, /* is_signum */ true, Primitive::kPrimInt);
+      break;
+    case Intrinsics::kLongSignum:
+      SimplifyCompare(instruction, /* is_signum */ true, Primitive::kPrimLong);
+      break;
+    case Intrinsics::kFloatIsNaN:
+    case Intrinsics::kDoubleIsNaN:
+      SimplifyIsNaN(instruction);
+      break;
+    case Intrinsics::kFloatFloatToIntBits:
+    case Intrinsics::kDoubleDoubleToLongBits:
+      SimplifyFP2Int(instruction);
+      break;
+    case Intrinsics::kStringCharAt:
+      SimplifyStringCharAt(instruction);
+      break;
+    case Intrinsics::kStringIsEmpty:
+    case Intrinsics::kStringLength:
+      SimplifyStringIsEmptyOrLength(instruction);
+      break;
+    case Intrinsics::kUnsafeLoadFence:
+      SimplifyMemBarrier(instruction, MemBarrierKind::kLoadAny);
+      break;
+    case Intrinsics::kUnsafeStoreFence:
+      SimplifyMemBarrier(instruction, MemBarrierKind::kAnyStore);
+      break;
+    case Intrinsics::kUnsafeFullFence:
+      SimplifyMemBarrier(instruction, MemBarrierKind::kAnyAny);
+      break;
+    default:
+      break;
   }
 }
 
 void InstructionSimplifierVisitor::VisitDeoptimize(HDeoptimize* deoptimize) {
   HInstruction* cond = deoptimize->InputAt(0);
   if (cond->IsConstant()) {
-    if (cond->AsIntConstant()->IsZero()) {
+    if (cond->AsIntConstant()->IsFalse()) {
       // Never deopt: instruction can be removed.
       deoptimize->GetBlock()->RemoveInstruction(deoptimize);
     } else {
@@ -1164,4 +1913,150 @@
   }
 }
 
+// Replace code looking like
+//    OP y, x, const1
+//    OP z, y, const2
+// with
+//    OP z, x, const3
+// where OP is both an associative and a commutative operation.
+bool InstructionSimplifierVisitor::TryHandleAssociativeAndCommutativeOperation(
+    HBinaryOperation* instruction) {
+  DCHECK(instruction->IsCommutative());
+
+  if (!Primitive::IsIntegralType(instruction->GetType())) {
+    return false;
+  }
+
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  // Variable names as described above.
+  HConstant* const2;
+  HBinaryOperation* y;
+
+  if (instruction->InstructionTypeEquals(left) && right->IsConstant()) {
+    const2 = right->AsConstant();
+    y = left->AsBinaryOperation();
+  } else if (left->IsConstant() && instruction->InstructionTypeEquals(right)) {
+    const2 = left->AsConstant();
+    y = right->AsBinaryOperation();
+  } else {
+    // The node does not match the pattern.
+    return false;
+  }
+
+  // If `y` has more than one use, we do not perform the optimization
+  // because it might increase code size (e.g. if the new constant is
+  // no longer encodable as an immediate operand in the target ISA).
+  if (!y->HasOnlyOneNonEnvironmentUse()) {
+    return false;
+  }
+
+  // GetConstantRight() can return both left and right constants
+  // for commutative operations.
+  HConstant* const1 = y->GetConstantRight();
+  if (const1 == nullptr) {
+    return false;
+  }
+
+  instruction->ReplaceInput(const1, 0);
+  instruction->ReplaceInput(const2, 1);
+  HConstant* const3 = instruction->TryStaticEvaluation();
+  DCHECK(const3 != nullptr);
+  instruction->ReplaceInput(y->GetLeastConstantLeft(), 0);
+  instruction->ReplaceInput(const3, 1);
+  RecordSimplification();
+  return true;
+}
+
+static HBinaryOperation* AsAddOrSub(HInstruction* binop) {
+  return (binop->IsAdd() || binop->IsSub()) ? binop->AsBinaryOperation() : nullptr;
+}
+
+// Helper function that performs addition statically, considering the result type.
+static int64_t ComputeAddition(Primitive::Type type, int64_t x, int64_t y) {
+  // Use the Compute() method for consistency with TryStaticEvaluation().
+  if (type == Primitive::kPrimInt) {
+    return HAdd::Compute<int32_t>(x, y);
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimLong);
+    return HAdd::Compute<int64_t>(x, y);
+  }
+}
+
+// Helper function that handles the child classes of HConstant
+// and returns an integer with the appropriate sign.
+static int64_t GetValue(HConstant* constant, bool is_negated) {
+  int64_t ret = Int64FromConstant(constant);
+  return is_negated ? -ret : ret;
+}
+
+// Replace code looking like
+//    OP1 y, x, const1
+//    OP2 z, y, const2
+// with
+//    OP3 z, x, const3
+// where OPx is either ADD or SUB, and at least one of OP{1,2} is SUB.
+bool InstructionSimplifierVisitor::TrySubtractionChainSimplification(
+    HBinaryOperation* instruction) {
+  DCHECK(instruction->IsAdd() || instruction->IsSub()) << instruction->DebugName();
+
+  Primitive::Type type = instruction->GetType();
+  if (!Primitive::IsIntegralType(type)) {
+    return false;
+  }
+
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  // Variable names as described above.
+  HConstant* const2 = right->IsConstant() ? right->AsConstant() : left->AsConstant();
+  if (const2 == nullptr) {
+    return false;
+  }
+
+  HBinaryOperation* y = (AsAddOrSub(left) != nullptr)
+      ? left->AsBinaryOperation()
+      : AsAddOrSub(right);
+  // If y has more than one use, we do not perform the optimization because
+  // it might increase code size (e.g. if the new constant is no longer
+  // encodable as an immediate operand in the target ISA).
+  if ((y == nullptr) || !y->HasOnlyOneNonEnvironmentUse()) {
+    return false;
+  }
+
+  left = y->GetLeft();
+  HConstant* const1 = left->IsConstant() ? left->AsConstant() : y->GetRight()->AsConstant();
+  if (const1 == nullptr) {
+    return false;
+  }
+
+  HInstruction* x = (const1 == left) ? y->GetRight() : left;
+  // If both inputs are constants, let the constant folding pass deal with it.
+  if (x->IsConstant()) {
+    return false;
+  }
+
+  bool is_const2_negated = (const2 == right) && instruction->IsSub();
+  int64_t const2_val = GetValue(const2, is_const2_negated);
+  bool is_y_negated = (y == right) && instruction->IsSub();
+  right = y->GetRight();
+  bool is_const1_negated = is_y_negated ^ ((const1 == right) && y->IsSub());
+  int64_t const1_val = GetValue(const1, is_const1_negated);
+  bool is_x_negated = is_y_negated ^ ((x == right) && y->IsSub());
+  int64_t const3_val = ComputeAddition(type, const1_val, const2_val);
+  HBasicBlock* block = instruction->GetBlock();
+  HConstant* const3 = block->GetGraph()->GetConstant(type, const3_val);
+  ArenaAllocator* arena = instruction->GetArena();
+  HInstruction* z;
+
+  if (is_x_negated) {
+    z = new (arena) HSub(type, const3, x, instruction->GetDexPc());
+  } else {
+    z = new (arena) HAdd(type, x, const3, instruction->GetDexPc());
+  }
+
+  block->ReplaceAndRemoveInstructionWith(instruction, z);
+  RecordSimplification();
+  return true;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index cc4b6f6..7905104 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -25,6 +25,13 @@
 
 /**
  * Implements optimizations specific to each instruction.
+ *
+ * Note that graph simplifications producing a constant should be
+ * implemented in art::HConstantFolding, while graph simplifications
+ * not producing constants should be implemented in
+ * art::InstructionSimplifier.  (This convention is a choice that was
+ * made during the development of these parts of the compiler and is
+ * not bound by any technical requirement.)
  */
 class InstructionSimplifier : public HOptimization {
  public:
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
new file mode 100644
index 0000000..495f3fd
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator.h"
+#include "instruction_simplifier_arm.h"
+#include "instruction_simplifier_shared.h"
+#include "mirror/array-inl.h"
+
+namespace art {
+namespace arm {
+
+void InstructionSimplifierArmVisitor::VisitMul(HMul* instruction) {
+  if (TryCombineMultiplyAccumulate(instruction, kArm)) {
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierArmVisitor::VisitOr(HOr* instruction) {
+  if (TryMergeNegatedInput(instruction)) {
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierArmVisitor::VisitAnd(HAnd* instruction) {
+  if (TryMergeNegatedInput(instruction)) {
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) {
+  size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+  Primitive::Type type = instruction->GetType();
+
+  if (type == Primitive::kPrimLong
+      || type == Primitive::kPrimFloat
+      || type == Primitive::kPrimDouble) {
+    // T32 doesn't support ShiftedRegOffset mem address mode for these types
+    // to enable optimization.
+    return;
+  }
+
+  if (TryExtractArrayAccessAddress(instruction,
+                                   instruction->GetArray(),
+                                   instruction->GetIndex(),
+                                   data_offset)) {
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierArmVisitor::VisitArraySet(HArraySet* instruction) {
+  size_t access_size = Primitive::ComponentSize(instruction->GetComponentType());
+  size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value();
+  Primitive::Type type = instruction->GetComponentType();
+
+  if (type == Primitive::kPrimLong
+      || type == Primitive::kPrimFloat
+      || type == Primitive::kPrimDouble) {
+    // T32 doesn't support ShiftedRegOffset mem address mode for these types
+    // to enable optimization.
+    return;
+  }
+
+  if (TryExtractArrayAccessAddress(instruction,
+                                   instruction->GetArray(),
+                                   instruction->GetIndex(),
+                                   data_offset)) {
+    RecordSimplification();
+  }
+}
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
new file mode 100644
index 0000000..782110c
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+namespace arm {
+
+class InstructionSimplifierArmVisitor : public HGraphVisitor {
+ public:
+  InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+      : HGraphVisitor(graph), stats_(stats) {}
+
+ private:
+  void RecordSimplification() {
+    if (stats_ != nullptr) {
+      stats_->RecordStat(kInstructionSimplificationsArch);
+    }
+  }
+
+  void VisitMul(HMul* instruction) OVERRIDE;
+  void VisitOr(HOr* instruction) OVERRIDE;
+  void VisitAnd(HAnd* instruction) OVERRIDE;
+  void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
+  void VisitArraySet(HArraySet* instruction) OVERRIDE;
+
+  OptimizingCompilerStats* stats_;
+};
+
+
+class InstructionSimplifierArm : public HOptimization {
+ public:
+  InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats)
+    : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {}
+
+  static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm";
+
+  void Run() OVERRIDE {
+    InstructionSimplifierArmVisitor visitor(graph_, stats_);
+    visitor.VisitReversePostOrder();
+  }
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index eb79f46..6d107d5 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -16,64 +16,197 @@
 
 #include "instruction_simplifier_arm64.h"
 
+#include "common_arm64.h"
+#include "instruction_simplifier_shared.h"
 #include "mirror/array-inl.h"
+#include "mirror/string.h"
 
 namespace art {
 namespace arm64 {
 
-void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access,
-                                                                     HInstruction* array,
-                                                                     HInstruction* index,
-                                                                     int access_size) {
-  if (index->IsConstant() ||
-      (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
-    // When the index is a constant all the addressing can be fitted in the
-    // memory access instruction, so do not split the access.
-    return;
-  }
-  if (access->IsArraySet() &&
-      access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) {
-    // The access may require a runtime call or the original array pointer.
-    return;
+using helpers::CanFitInShifterOperand;
+using helpers::HasShifterOperand;
+using helpers::ShifterOperandSupportsExtension;
+
+bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use,
+                                                                   HInstruction* bitfield_op,
+                                                                   bool do_merge) {
+  DCHECK(HasShifterOperand(use));
+  DCHECK(use->IsBinaryOperation() || use->IsNeg());
+  DCHECK(CanFitInShifterOperand(bitfield_op));
+  DCHECK(!bitfield_op->HasEnvironmentUses());
+
+  Primitive::Type type = use->GetType();
+  if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) {
+    return false;
   }
 
-  // Proceed to extract the base address computation.
-  ArenaAllocator* arena = GetGraph()->GetArena();
+  HInstruction* left;
+  HInstruction* right;
+  if (use->IsBinaryOperation()) {
+    left = use->InputAt(0);
+    right = use->InputAt(1);
+  } else {
+    DCHECK(use->IsNeg());
+    right = use->AsNeg()->InputAt(0);
+    left = GetGraph()->GetConstant(right->GetType(), 0);
+  }
+  DCHECK(left == bitfield_op || right == bitfield_op);
 
-  HIntConstant* offset =
-      GetGraph()->GetIntConstant(mirror::Array::DataOffset(access_size).Uint32Value());
-  HArm64IntermediateAddress* address =
-      new (arena) HArm64IntermediateAddress(array, offset, kNoDexPc);
-  access->GetBlock()->InsertInstructionBefore(address, access);
-  access->ReplaceInput(address, 0);
-  // Both instructions must depend on GC to prevent any instruction that can
-  // trigger GC to be inserted between the two.
-  access->AddSideEffects(SideEffects::DependsOnGC());
-  DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC()));
-  DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC()));
-  // TODO: Code generation for HArrayGet and HArraySet will check whether the input address
-  // is an HArm64IntermediateAddress and generate appropriate code.
-  // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe
-  // `HArm64Load` and `HArm64Store`). We defer these changes because these new instructions would
-  // not bring any advantages yet.
-  // Also see the comments in
-  // `InstructionCodeGeneratorARM64::VisitArrayGet()` and
-  // `InstructionCodeGeneratorARM64::VisitArraySet()`.
-  RecordSimplification();
+  if (left == right) {
+    // TODO: Handle special transformations in this situation?
+    // For example should we transform `(x << 1) + (x << 1)` into `(x << 2)`?
+    // Or should this be part of a separate transformation logic?
+    return false;
+  }
+
+  bool is_commutative = use->IsBinaryOperation() && use->AsBinaryOperation()->IsCommutative();
+  HInstruction* other_input;
+  if (bitfield_op == right) {
+    other_input = left;
+  } else {
+    if (is_commutative) {
+      other_input = right;
+    } else {
+      return false;
+    }
+  }
+
+  HArm64DataProcWithShifterOp::OpKind op_kind;
+  int shift_amount = 0;
+  HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount);
+
+  if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind) &&
+      !ShifterOperandSupportsExtension(use)) {
+    return false;
+  }
+
+  if (do_merge) {
+    HArm64DataProcWithShifterOp* alu_with_op =
+        new (GetGraph()->GetArena()) HArm64DataProcWithShifterOp(use,
+                                                                 other_input,
+                                                                 bitfield_op->InputAt(0),
+                                                                 op_kind,
+                                                                 shift_amount,
+                                                                 use->GetDexPc());
+    use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op);
+    if (bitfield_op->GetUses().empty()) {
+      bitfield_op->GetBlock()->RemoveInstruction(bitfield_op);
+    }
+    RecordSimplification();
+  }
+
+  return true;
+}
+
+// Merge a bitfield move instruction into its uses if it can be merged in all of them.
+bool InstructionSimplifierArm64Visitor::TryMergeIntoUsersShifterOperand(HInstruction* bitfield_op) {
+  DCHECK(CanFitInShifterOperand(bitfield_op));
+
+  if (bitfield_op->HasEnvironmentUses()) {
+    return false;
+  }
+
+  const HUseList<HInstruction*>& uses = bitfield_op->GetUses();
+
+  // Check whether we can merge the instruction in all its users' shifter operand.
+  for (const HUseListNode<HInstruction*>& use : uses) {
+    HInstruction* user = use.GetUser();
+    if (!HasShifterOperand(user)) {
+      return false;
+    }
+    if (!CanMergeIntoShifterOperand(user, bitfield_op)) {
+      return false;
+    }
+  }
+
+  // Merge the instruction into its uses.
+  for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+    HInstruction* user = it->GetUser();
+    // Increment `it` now because `*it` will disappear thanks to MergeIntoShifterOperand().
+    ++it;
+    bool merged = MergeIntoShifterOperand(user, bitfield_op);
+    DCHECK(merged);
+  }
+
+  return true;
+}
+
+void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) {
+  if (TryMergeNegatedInput(instruction)) {
+    RecordSimplification();
+  }
 }
 
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
-  TryExtractArrayAccessAddress(instruction,
-                               instruction->GetArray(),
-                               instruction->GetIndex(),
-                               Primitive::ComponentSize(instruction->GetType()));
+  size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+  if (TryExtractArrayAccessAddress(instruction,
+                                   instruction->GetArray(),
+                                   instruction->GetIndex(),
+                                   data_offset)) {
+    RecordSimplification();
+  }
 }
 
 void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) {
-  TryExtractArrayAccessAddress(instruction,
-                               instruction->GetArray(),
-                               instruction->GetIndex(),
-                               Primitive::ComponentSize(instruction->GetComponentType()));
+  size_t access_size = Primitive::ComponentSize(instruction->GetComponentType());
+  size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value();
+  if (TryExtractArrayAccessAddress(instruction,
+                                   instruction->GetArray(),
+                                   instruction->GetIndex(),
+                                   data_offset)) {
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) {
+  if (TryCombineMultiplyAccumulate(instruction, kArm64)) {
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitOr(HOr* instruction) {
+  if (TryMergeNegatedInput(instruction)) {
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitShl(HShl* instruction) {
+  if (instruction->InputAt(1)->IsConstant()) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitShr(HShr* instruction) {
+  if (instruction->InputAt(1)->IsConstant()) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitTypeConversion(HTypeConversion* instruction) {
+  Primitive::Type result_type = instruction->GetResultType();
+  Primitive::Type input_type = instruction->GetInputType();
+
+  if (input_type == result_type) {
+    // We let the arch-independent code handle this.
+    return;
+  }
+
+  if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitUShr(HUShr* instruction) {
+  if (instruction->InputAt(1)->IsConstant()) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitXor(HXor* instruction) {
+  if (TryMergeNegatedInput(instruction)) {
+    RecordSimplification();
+  }
 }
 
 }  // namespace arm64
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 4b697db..f71684e 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -35,13 +35,45 @@
     }
   }
 
-  void TryExtractArrayAccessAddress(HInstruction* access,
-                                    HInstruction* array,
-                                    HInstruction* index,
-                                    int access_size);
+  bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
+  bool TryMergeIntoShifterOperand(HInstruction* use,
+                                  HInstruction* bitfield_op,
+                                  bool do_merge);
+  bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+    return TryMergeIntoShifterOperand(use, bitfield_op, false);
+  }
+  bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+    DCHECK(CanMergeIntoShifterOperand(use, bitfield_op));
+    return TryMergeIntoShifterOperand(use, bitfield_op, true);
+  }
 
+  /**
+   * This simplifier uses a special-purpose BB visitor.
+   * (1) No need to visit Phi nodes.
+   * (2) Since statements can be removed in a "forward" fashion,
+   *     the visitor should test if each statement is still there.
+   */
+  void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+    // TODO: fragile iteration, provide more robust iterators?
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instruction = it.Current();
+      if (instruction->IsInBlock()) {
+        instruction->Accept(this);
+      }
+    }
+  }
+
+  // HInstruction visitors, sorted alphabetically.
+  void VisitAnd(HAnd* instruction) OVERRIDE;
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
   void VisitArraySet(HArraySet* instruction) OVERRIDE;
+  void VisitMul(HMul* instruction) OVERRIDE;
+  void VisitOr(HOr* instruction) OVERRIDE;
+  void VisitShl(HShl* instruction) OVERRIDE;
+  void VisitShr(HShr* instruction) OVERRIDE;
+  void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
+  void VisitUShr(HUShr* instruction) OVERRIDE;
+  void VisitXor(HXor* instruction) OVERRIDE;
 
   OptimizingCompilerStats* stats_;
 };
@@ -50,8 +82,9 @@
 class InstructionSimplifierArm64 : public HOptimization {
  public:
   InstructionSimplifierArm64(HGraph* graph, OptimizingCompilerStats* stats)
-    : HOptimization(graph, "instruction_simplifier_arm64", stats) {}
-
+    : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {}
+  static constexpr const char* kInstructionSimplifierArm64PassName
+      = "instruction_simplifier_arm64";
   void Run() OVERRIDE {
     InstructionSimplifierArm64Visitor visitor(graph_, stats_);
     visitor.VisitReversePostOrder();
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
new file mode 100644
index 0000000..8f7778f
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier_shared.h"
+
+namespace art {
+
+namespace {
+
+bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
+                                         HBinaryOperation* input_binop,
+                                         HInstruction* input_other) {
+  DCHECK(Primitive::IsIntOrLongType(mul->GetType()));
+  DCHECK(input_binop->IsAdd() || input_binop->IsSub());
+  DCHECK_NE(input_binop, input_other);
+  if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
+    return false;
+  }
+
+  // Try to interpret patterns like
+  //    a * (b <+/-> 1)
+  // as
+  //    (a * b) <+/-> a
+  HInstruction* input_a = input_other;
+  HInstruction* input_b = nullptr;  // Set to a non-null value if we found a pattern to optimize.
+  HInstruction::InstructionKind op_kind;
+
+  if (input_binop->IsAdd()) {
+    if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
+      // Interpret
+      //    a * (b + 1)
+      // as
+      //    (a * b) + a
+      input_b = input_binop->GetLeastConstantLeft();
+      op_kind = HInstruction::kAdd;
+    }
+  } else {
+    DCHECK(input_binop->IsSub());
+    if (input_binop->GetRight()->IsConstant() &&
+        input_binop->GetRight()->AsConstant()->IsMinusOne()) {
+      // Interpret
+      //    a * (b - (-1))
+      // as
+      //    a + (a * b)
+      input_b = input_binop->GetLeft();
+      op_kind = HInstruction::kAdd;
+    } else if (input_binop->GetLeft()->IsConstant() &&
+               input_binop->GetLeft()->AsConstant()->IsOne()) {
+      // Interpret
+      //    a * (1 - b)
+      // as
+      //    a - (a * b)
+      input_b = input_binop->GetRight();
+      op_kind = HInstruction::kSub;
+    }
+  }
+
+  if (input_b == nullptr) {
+    // We did not find a pattern we can optimize.
+    return false;
+  }
+
+  ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
+  HMultiplyAccumulate* mulacc = new(arena) HMultiplyAccumulate(
+      mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
+
+  mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
+  input_binop->GetBlock()->RemoveInstruction(input_binop);
+
+  return true;
+}
+
+}  // namespace
+
+bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa) {
+  Primitive::Type type = mul->GetType();
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      if (type != Primitive::kPrimInt) {
+        return false;
+      }
+      break;
+    case kArm64:
+      if (!Primitive::IsIntOrLongType(type)) {
+        return false;
+      }
+      break;
+    default:
+      return false;
+  }
+
+  ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
+
+  if (mul->HasOnlyOneNonEnvironmentUse()) {
+    HInstruction* use = mul->GetUses().front().GetUser();
+    if (use->IsAdd() || use->IsSub()) {
+      // Replace code looking like
+      //    MUL tmp, x, y
+      //    SUB dst, acc, tmp
+      // with
+      //    MULSUB dst, acc, x, y
+      // Note that we do not want to (unconditionally) perform the merge when the
+      // multiplication has multiple uses and it can be merged in all of them.
+      // Multiple uses could happen on the same control-flow path, and we would
+      // then increase the amount of work. In the future we could try to evaluate
+      // whether all uses are on different control-flow paths (using dominance and
+      // reverse-dominance information) and only perform the merge when they are.
+      HInstruction* accumulator = nullptr;
+      HBinaryOperation* binop = use->AsBinaryOperation();
+      HInstruction* binop_left = binop->GetLeft();
+      HInstruction* binop_right = binop->GetRight();
+      // Be careful after GVN. This should not happen since the `HMul` has only
+      // one use.
+      DCHECK_NE(binop_left, binop_right);
+      if (binop_right == mul) {
+        accumulator = binop_left;
+      } else if (use->IsAdd()) {
+        DCHECK_EQ(binop_left, mul);
+        accumulator = binop_right;
+      }
+
+      if (accumulator != nullptr) {
+        HMultiplyAccumulate* mulacc =
+            new (arena) HMultiplyAccumulate(type,
+                                            binop->GetKind(),
+                                            accumulator,
+                                            mul->GetLeft(),
+                                            mul->GetRight());
+
+        binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
+        DCHECK(!mul->HasUses());
+        mul->GetBlock()->RemoveInstruction(mul);
+        return true;
+      }
+    } else if (use->IsNeg() && isa != kArm) {
+      HMultiplyAccumulate* mulacc =
+          new (arena) HMultiplyAccumulate(type,
+                                          HInstruction::kSub,
+                                          mul->GetBlock()->GetGraph()->GetConstant(type, 0),
+                                          mul->GetLeft(),
+                                          mul->GetRight());
+
+      use->GetBlock()->ReplaceAndRemoveInstructionWith(use, mulacc);
+      DCHECK(!mul->HasUses());
+      mul->GetBlock()->RemoveInstruction(mul);
+      return true;
+    }
+  }
+
+  // Use multiply accumulate instruction for a few simple patterns.
+  // We prefer not applying the following transformations if the left and
+  // right inputs perform the same operation.
+  // We rely on GVN having squashed the inputs if appropriate. However the
+  // results are still correct even if that did not happen.
+  if (mul->GetLeft() == mul->GetRight()) {
+    return false;
+  }
+
+  HInstruction* left = mul->GetLeft();
+  HInstruction* right = mul->GetRight();
+  if ((right->IsAdd() || right->IsSub()) &&
+      TrySimpleMultiplyAccumulatePatterns(mul, right->AsBinaryOperation(), left)) {
+    return true;
+  }
+  if ((left->IsAdd() || left->IsSub()) &&
+      TrySimpleMultiplyAccumulatePatterns(mul, left->AsBinaryOperation(), right)) {
+    return true;
+  }
+  return false;
+}
+
+
+bool TryMergeNegatedInput(HBinaryOperation* op) {
+  DCHECK(op->IsAnd() || op->IsOr() || op->IsXor()) << op->DebugName();
+  HInstruction* left = op->GetLeft();
+  HInstruction* right = op->GetRight();
+
+  // Only consider the case where there is exactly one Not, with 2 Not's De
+  // Morgan's laws should be applied instead.
+  if (left->IsNot() ^ right->IsNot()) {
+    HInstruction* hnot = (left->IsNot() ? left : right);
+    HInstruction* hother = (left->IsNot() ? right : left);
+
+    // Only do the simplification if the Not has only one use and can thus be
+    // safely removed. Even though ARM64 negated bitwise operations do not have
+    // an immediate variant (only register), we still do the simplification when
+    // `hother` is a constant, because it removes an instruction if the constant
+    // cannot be encoded as an immediate:
+    //   mov r0, #large_constant
+    //   neg r2, r1
+    //   and r0, r0, r2
+    // becomes:
+    //   mov r0, #large_constant
+    //   bic r0, r0, r1
+    if (hnot->HasOnlyOneNonEnvironmentUse()) {
+      // Replace code looking like
+      //    NOT tmp, mask
+      //    AND dst, src, tmp   (respectively ORR, EOR)
+      // with
+      //    BIC dst, src, mask  (respectively ORN, EON)
+      HInstruction* src = hnot->AsNot()->GetInput();
+
+      HBitwiseNegatedRight* neg_op = new (hnot->GetBlock()->GetGraph()->GetArena())
+          HBitwiseNegatedRight(op->GetType(), op->GetKind(), hother, src, op->GetDexPc());
+
+      op->GetBlock()->ReplaceAndRemoveInstructionWith(op, neg_op);
+      hnot->GetBlock()->RemoveInstruction(hnot);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+
+bool TryExtractArrayAccessAddress(HInstruction* access,
+                                  HInstruction* array,
+                                  HInstruction* index,
+                                  size_t data_offset) {
+  if (kEmitCompilerReadBarrier) {
+    // The read barrier instrumentation does not support the
+    // HIntermediateAddress instruction yet.
+    //
+    // TODO: Handle this case properly in the ARM64 and ARM code generator and
+    // re-enable this optimization; otherwise, remove this TODO.
+    // b/26601270
+    return false;
+  }
+  if (index->IsConstant() ||
+      (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
+    // When the index is a constant all the addressing can be fitted in the
+    // memory access instruction, so do not split the access.
+    return false;
+  }
+  if (access->IsArraySet() &&
+      access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) {
+    // The access may require a runtime call or the original array pointer.
+    return false;
+  }
+
+  // Proceed to extract the base address computation.
+  HGraph* graph = access->GetBlock()->GetGraph();
+  ArenaAllocator* arena = graph->GetArena();
+
+  HIntConstant* offset = graph->GetIntConstant(data_offset);
+  HIntermediateAddress* address =
+      new (arena) HIntermediateAddress(array, offset, kNoDexPc);
+  address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
+  access->GetBlock()->InsertInstructionBefore(address, access);
+  access->ReplaceInput(address, 0);
+  // Both instructions must depend on GC to prevent any instruction that can
+  // trigger GC to be inserted between the two.
+  access->AddSideEffects(SideEffects::DependsOnGC());
+  DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC()));
+  DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC()));
+  // TODO: Code generation for HArrayGet and HArraySet will check whether the input address
+  // is an HIntermediateAddress and generate appropriate code.
+  // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe
+  // `HArm64Load` and `HArm64Store`,`HArmLoad` and `HArmStore`). We defer these changes
+  // because these new instructions would not bring any advantages yet.
+  // Also see the comments in
+  // `InstructionCodeGeneratorARM::VisitArrayGet()`
+  // `InstructionCodeGeneratorARM::VisitArraySet()`
+  // `InstructionCodeGeneratorARM64::VisitArrayGet()`
+  // `InstructionCodeGeneratorARM64::VisitArraySet()`.
+  return true;
+}
+
+
+}  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
new file mode 100644
index 0000000..56804f5
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
+
+#include "nodes.h"
+
+namespace art {
+
+bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa);
+// For bitwise operations (And/Or/Xor) with a negated input, try to use
+// a negated bitwise instruction.
+bool TryMergeNegatedInput(HBinaryOperation* op);
+
+bool TryExtractArrayAccessAddress(HInstruction* access,
+                                  HInstruction* array,
+                                  HInstruction* index,
+                                  size_t data_offset);
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index dbe7524..418d59c 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -36,8 +36,8 @@
   switch (i) {
     case Intrinsics::kNone:
       return kInterface;  // Non-sensical for intrinsic.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \
-    case Intrinsics::k ## Name:               \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+    case Intrinsics::k ## Name: \
       return IsStatic;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -52,8 +52,8 @@
   switch (i) {
     case Intrinsics::kNone:
       return kNeedsEnvironmentOrCache;  // Non-sensical for intrinsic.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \
-    case Intrinsics::k ## Name:               \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+    case Intrinsics::k ## Name: \
       return NeedsEnvironmentOrCache;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -63,6 +63,38 @@
   return kNeedsEnvironmentOrCache;
 }
 
+// Function that returns whether an intrinsic has side effects.
+static inline IntrinsicSideEffects GetSideEffects(Intrinsics i) {
+  switch (i) {
+    case Intrinsics::kNone:
+      return kAllSideEffects;
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+    case Intrinsics::k ## Name: \
+      return SideEffects;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+  }
+  return kAllSideEffects;
+}
+
+// Function that returns whether an intrinsic can throw exceptions.
+static inline IntrinsicExceptions GetExceptions(Intrinsics i) {
+  switch (i) {
+    case Intrinsics::kNone:
+      return kCanThrow;
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+    case Intrinsics::k ## Name: \
+      return Exceptions;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+  }
+  return kCanThrow;
+}
+
 static Primitive::Type GetType(uint64_t data, bool is_op_size) {
   if (is_op_size) {
     switch (static_cast<OpSize>(data)) {
@@ -89,10 +121,7 @@
   }
 }
 
-static Intrinsics GetIntrinsic(InlineMethod method, InstructionSet instruction_set) {
-  if (instruction_set == kMips) {
-    return Intrinsics::kNone;
-  }
+static Intrinsics GetIntrinsic(InlineMethod method) {
   switch (method.opcode) {
     // Floating-point conversions.
     case kIntrinsicDoubleCvt:
@@ -101,6 +130,20 @@
     case kIntrinsicFloatCvt:
       return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ?
           Intrinsics::kFloatFloatToRawIntBits : Intrinsics::kFloatIntBitsToFloat;
+    case kIntrinsicFloat2Int:
+      return Intrinsics::kFloatFloatToIntBits;
+    case kIntrinsicDouble2Long:
+      return Intrinsics::kDoubleDoubleToLongBits;
+
+    // Floating-point tests.
+    case kIntrinsicFloatIsInfinite:
+      return Intrinsics::kFloatIsInfinite;
+    case kIntrinsicDoubleIsInfinite:
+      return Intrinsics::kDoubleIsInfinite;
+    case kIntrinsicFloatIsNaN:
+      return Intrinsics::kFloatIsNaN;
+    case kIntrinsicDoubleIsNaN:
+      return Intrinsics::kDoubleIsNaN;
 
     // Bit manipulations.
     case kIntrinsicReverseBits:
@@ -147,6 +190,46 @@
       }
 
     // Misc data processing.
+    case kIntrinsicBitCount:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerBitCount;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongBitCount;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+    case kIntrinsicCompare:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerCompare;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongCompare;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+    case kIntrinsicHighestOneBit:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerHighestOneBit;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongHighestOneBit;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+    case kIntrinsicLowestOneBit:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerLowestOneBit;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongLowestOneBit;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
     case kIntrinsicNumberOfLeadingZeros:
       switch (GetType(method.d.data, true)) {
         case Primitive::kPrimInt:
@@ -167,6 +250,16 @@
           LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
           UNREACHABLE();
       }
+    case kIntrinsicSignum:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerSignum;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongSignum;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
 
     // Abs.
     case kIntrinsicAbsDouble:
@@ -192,6 +285,42 @@
       return ((method.d.data & kIntrinsicFlagMin) == 0) ?
           Intrinsics::kMathMaxLongLong : Intrinsics::kMathMinLongLong;
 
+    // More math builtins.
+    case kIntrinsicCos:
+      return Intrinsics::kMathCos;
+    case kIntrinsicSin:
+      return Intrinsics::kMathSin;
+    case kIntrinsicAcos:
+      return Intrinsics::kMathAcos;
+    case kIntrinsicAsin:
+      return Intrinsics::kMathAsin;
+    case kIntrinsicAtan:
+      return Intrinsics::kMathAtan;
+    case kIntrinsicAtan2:
+      return Intrinsics::kMathAtan2;
+    case kIntrinsicCbrt:
+      return Intrinsics::kMathCbrt;
+    case kIntrinsicCosh:
+      return Intrinsics::kMathCosh;
+    case kIntrinsicExp:
+      return Intrinsics::kMathExp;
+    case kIntrinsicExpm1:
+      return Intrinsics::kMathExpm1;
+    case kIntrinsicHypot:
+      return Intrinsics::kMathHypot;
+    case kIntrinsicLog:
+      return Intrinsics::kMathLog;
+    case kIntrinsicLog10:
+      return Intrinsics::kMathLog10;
+    case kIntrinsicNextAfter:
+      return Intrinsics::kMathNextAfter;
+    case kIntrinsicSinh:
+      return Intrinsics::kMathSinh;
+    case kIntrinsicTan:
+      return Intrinsics::kMathTan;
+    case kIntrinsicTanh:
+      return Intrinsics::kMathTanh;
+
     // Misc math.
     case kIntrinsicSqrt:
       return Intrinsics::kMathSqrt;
@@ -215,7 +344,7 @@
 
     // Thread.currentThread.
     case kIntrinsicCurrentThread:
-      return  Intrinsics::kThreadCurrentThread;
+      return Intrinsics::kThreadCurrentThread;
 
     // Memory.peek.
     case kIntrinsicPeek:
@@ -259,10 +388,8 @@
     case kIntrinsicGetCharsNoCheck:
       return Intrinsics::kStringGetCharsNoCheck;
     case kIntrinsicIsEmptyOrLength:
-      // The inliner can handle these two cases - and this is the preferred approach
-      // since after inlining the call is no longer visible (as opposed to waiting
-      // until codegen to handle intrinsic).
-      return Intrinsics::kNone;
+      return ((method.d.data & kIntrinsicFlagIsEmpty) == 0) ?
+          Intrinsics::kStringLength : Intrinsics::kStringIsEmpty;
     case kIntrinsicIndexOf:
       return ((method.d.data & kIntrinsicFlagBase0) == 0) ?
           Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf;
@@ -343,6 +470,24 @@
       break;
     }
 
+    // 1.8.
+    case kIntrinsicUnsafeGetAndAddInt:
+      return Intrinsics::kUnsafeGetAndAddInt;
+    case kIntrinsicUnsafeGetAndAddLong:
+      return Intrinsics::kUnsafeGetAndAddLong;
+    case kIntrinsicUnsafeGetAndSetInt:
+      return Intrinsics::kUnsafeGetAndSetInt;
+    case kIntrinsicUnsafeGetAndSetLong:
+      return Intrinsics::kUnsafeGetAndSetLong;
+    case kIntrinsicUnsafeGetAndSetObject:
+      return Intrinsics::kUnsafeGetAndSetObject;
+    case kIntrinsicUnsafeLoadFence:
+      return Intrinsics::kUnsafeLoadFence;
+    case kIntrinsicUnsafeStoreFence:
+      return Intrinsics::kUnsafeStoreFence;
+    case kIntrinsicUnsafeFullFence:
+      return Intrinsics::kUnsafeFullFence;
+
     // Virtual cases.
 
     case kIntrinsicReferenceGetReferent:
@@ -356,6 +501,7 @@
     case kInlineOpNonWideConst:
     case kInlineOpIGet:
     case kInlineOpIPut:
+    case kInlineOpConstructor:
       return Intrinsics::kNone;
 
     // String init cases, not intrinsics.
@@ -387,7 +533,7 @@
   // InvokeStaticOrDirect.
   InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic);
   InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ?
-      invoke->AsInvokeStaticOrDirect()->GetInvokeType() :
+      invoke->AsInvokeStaticOrDirect()->GetOptimizedInvokeType() :
       invoke->IsInvokeVirtual() ? kVirtual : kSuper;
   switch (intrinsic_type) {
     case kStatic:
@@ -431,7 +577,7 @@
         DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(&dex_file);
         DCHECK(inliner != nullptr);
         if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) {
-          Intrinsics intrinsic = GetIntrinsic(method, graph_->GetInstructionSet());
+          Intrinsics intrinsic = GetIntrinsic(method);
 
           if (intrinsic != Intrinsics::kNone) {
             if (!CheckInvokeType(intrinsic, invoke, dex_file)) {
@@ -440,7 +586,11 @@
                   << PrettyMethod(invoke->GetDexMethodIndex(), invoke->GetDexFile())
                   << invoke->DebugName();
             } else {
-              invoke->SetIntrinsic(intrinsic, NeedsEnvironmentOrCache(intrinsic));
+              invoke->SetIntrinsic(intrinsic,
+                                   NeedsEnvironmentOrCache(intrinsic),
+                                   GetSideEffects(intrinsic),
+                                   GetExceptions(intrinsic));
+              MaybeRecordStat(MethodCompilationStat::kIntrinsicRecognized);
             }
           }
         }
@@ -454,7 +604,7 @@
     case Intrinsics::kNone:
       os << "None";
       break;
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
     case Intrinsics::k ## Name: \
       os << # Name; \
       break;
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index e459516..1a8eb58 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -27,11 +27,15 @@
 class CompilerDriver;
 class DexFile;
 
+// Positive floating-point infinities.
+static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U;
+static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000);
+
 // Recognize intrinsics from HInvoke nodes.
 class IntrinsicsRecognizer : public HOptimization {
  public:
-  IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver)
-      : HOptimization(graph, kIntrinsicsRecognizerPassName),
+  IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kIntrinsicsRecognizerPassName, stats),
         driver_(driver) {}
 
   void Run() OVERRIDE;
@@ -54,9 +58,9 @@
     switch (invoke->GetIntrinsic()) {
       case Intrinsics::kNone:
         return;
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \
-      case Intrinsics::k ## Name:             \
-        Visit ## Name(invoke);                \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment, SideEffects, Exceptions) \
+      case Intrinsics::k ## Name: \
+        Visit ## Name(invoke);    \
         return;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -69,7 +73,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment)                    \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment, SideEffects, Exceptions) \
   virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
   }
 #include "intrinsics_list.h"
@@ -82,9 +86,9 @@
                             InvokeDexCallingConventionVisitor* calling_convention_visitor) {
     if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) {
       HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
-      // When we do not run baseline, explicit clinit checks triggered by static
-      // invokes must have been pruned by art::PrepareForRegisterAllocation.
-      DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
+      // Explicit clinit checks triggered by static invokes must have been
+      // pruned by art::PrepareForRegisterAllocation.
+      DCHECK(!invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
     }
 
     if (invoke->GetNumberOfArguments() == 0) {
@@ -121,11 +125,12 @@
 void Set##name() { SetBit(k##name); }                  \
 bool Get##name() const { return IsBitSet(k##name); }   \
 private:                                               \
-static constexpr int k##name = bit
+static constexpr size_t k##name = bit
 
 class IntrinsicOptimizations : public ValueObject {
  public:
-  explicit IntrinsicOptimizations(HInvoke* invoke) : value_(invoke->GetIntrinsicOptimizations()) {}
+  explicit IntrinsicOptimizations(HInvoke* invoke)
+      : value_(invoke->GetIntrinsicOptimizations()) {}
   explicit IntrinsicOptimizations(const HInvoke& invoke)
       : value_(invoke.GetIntrinsicOptimizations()) {}
 
@@ -135,15 +140,17 @@
 
  protected:
   bool IsBitSet(uint32_t bit) const {
+    DCHECK_LT(bit, sizeof(uint32_t) * kBitsPerByte);
     return (*value_ & (1 << bit)) != 0u;
   }
 
   void SetBit(uint32_t bit) {
-    *(const_cast<uint32_t*>(value_)) |= (1 << bit);
+    DCHECK_LT(bit, sizeof(uint32_t) * kBitsPerByte);
+    *(const_cast<uint32_t* const>(value_)) |= (1 << bit);
   }
 
  private:
-  const uint32_t *value_;
+  const uint32_t* const value_;
 
   DISALLOW_COPY_AND_ASSIGN(IntrinsicOptimizations);
 };
@@ -155,7 +162,7 @@
 void Set##name() { SetBit(k##name); }                                 \
 bool Get##name() const { return IsBitSet(k##name); }                  \
 private:                                                              \
-static constexpr int k##name = bit + kNumberOfGenericOptimizations
+static constexpr size_t k##name = (bit) + kNumberOfGenericOptimizations
 
 class StringEqualsOptimizations : public IntrinsicOptimizations {
  public:
@@ -190,6 +197,52 @@
 
 #undef INTRISIC_OPTIMIZATION
 
+//
+// Macros for use in the intrinsics code generators.
+//
+
+// Defines an unimplemented intrinsic: that is, a method call that is recognized as an
+// intrinsic to exploit e.g. no side-effects or exceptions, but otherwise not handled
+// by this architecture-specific intrinsics code generator. Eventually it is implemented
+// as a true method call.
+#define UNIMPLEMENTED_INTRINSIC(Arch, Name)                                               \
+void IntrinsicLocationsBuilder ## Arch::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+}                                                                                         \
+void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
+}
+
+// Defines a list of unreached intrinsics: that is, method calls that are recognized as
+// an intrinsic, and then always converted into HIR instructions before they reach any
+// architecture-specific intrinsics code generator.
+#define UNREACHABLE_INTRINSIC(Arch, Name)                                \
+void IntrinsicLocationsBuilder ## Arch::Visit ## Name(HInvoke* invoke) { \
+  LOG(FATAL) << "Unreachable: intrinsic " << invoke->GetIntrinsic()      \
+             << " should have been converted to HIR";                    \
+}                                                                        \
+void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke) {    \
+  LOG(FATAL) << "Unreachable: intrinsic " << invoke->GetIntrinsic()      \
+             << " should have been converted to HIR";                    \
+}
+#define UNREACHABLE_INTRINSICS(Arch)                \
+UNREACHABLE_INTRINSIC(Arch, FloatFloatToIntBits)    \
+UNREACHABLE_INTRINSIC(Arch, DoubleDoubleToLongBits) \
+UNREACHABLE_INTRINSIC(Arch, FloatIsNaN)             \
+UNREACHABLE_INTRINSIC(Arch, DoubleIsNaN)            \
+UNREACHABLE_INTRINSIC(Arch, IntegerRotateLeft)      \
+UNREACHABLE_INTRINSIC(Arch, LongRotateLeft)         \
+UNREACHABLE_INTRINSIC(Arch, IntegerRotateRight)     \
+UNREACHABLE_INTRINSIC(Arch, LongRotateRight)        \
+UNREACHABLE_INTRINSIC(Arch, IntegerCompare)         \
+UNREACHABLE_INTRINSIC(Arch, LongCompare)            \
+UNREACHABLE_INTRINSIC(Arch, IntegerSignum)          \
+UNREACHABLE_INTRINSIC(Arch, LongSignum)             \
+UNREACHABLE_INTRINSIC(Arch, StringCharAt)           \
+UNREACHABLE_INTRINSIC(Arch, StringIsEmpty)          \
+UNREACHABLE_INTRINSIC(Arch, StringLength)           \
+UNREACHABLE_INTRINSIC(Arch, UnsafeLoadFence)        \
+UNREACHABLE_INTRINSIC(Arch, UnsafeStoreFence)       \
+UNREACHABLE_INTRINSIC(Arch, UnsafeFullFence)
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_H_
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 0a5acc3..0bbc0e5 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -41,10 +41,99 @@
 
 using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM>;
 
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->  // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode {
+ public:
+  explicit ReadBarrierSystemArrayCopySlowPathARM(HInstruction* instruction)
+      : SlowPathCode(instruction) {
+    DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(instruction_->IsInvokeStaticOrDirect())
+        << "Unexpected instruction in read barrier arraycopy slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+    int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+    uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+    uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+    Register dest = locations->InAt(2).AsRegister<Register>();
+    Location dest_pos = locations->InAt(3);
+    Register src_curr_addr = locations->GetTemp(0).AsRegister<Register>();
+    Register dst_curr_addr = locations->GetTemp(1).AsRegister<Register>();
+    Register src_stop_addr = locations->GetTemp(2).AsRegister<Register>();
+    Register tmp = locations->GetTemp(3).AsRegister<Register>();
+
+    __ Bind(GetEntryLabel());
+    // Compute the base destination address in `dst_curr_addr`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ AddConstant(dst_curr_addr, dest, element_size * constant + offset);
+    } else {
+      __ add(dst_curr_addr,
+             dest,
+             ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+      __ AddConstant(dst_curr_addr, offset);
+    }
+
+    Label loop;
+    __ Bind(&loop);
+    __ ldr(tmp, Address(src_curr_addr, element_size, Address::PostIndex));
+    __ MaybeUnpoisonHeapReference(tmp);
+    // TODO: Inline the mark bit check before calling the runtime?
+    // tmp = ReadBarrier::Mark(tmp);
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
+    // explanations.)
+    DCHECK_NE(tmp, SP);
+    DCHECK_NE(tmp, LR);
+    DCHECK_NE(tmp, PC);
+    // IP is used internally by the ReadBarrierMarkRegX entry point
+    // as a temporary (and not preserved).  It thus cannot be used by
+    // any live register in this slow path.
+    DCHECK_NE(src_curr_addr, IP);
+    DCHECK_NE(dst_curr_addr, IP);
+    DCHECK_NE(src_stop_addr, IP);
+    DCHECK_NE(tmp, IP);
+    DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp;
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp);
+    // This runtime call does not require a stack map.
+    arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    __ MaybePoisonHeapReference(tmp);
+    __ str(tmp, Address(dst_curr_addr, element_size, Address::PostIndex));
+    __ cmp(src_curr_addr, ShifterOperand(src_stop_addr));
+    __ b(&loop, NE);
+    __ b(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM);
+};
+
+#undef __
+
 bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
   Dispatch(invoke);
   LocationSummary* res = invoke->GetLocations();
-  return res != nullptr && res->Intrinsified();
+  if (res == nullptr) {
+    return false;
+  }
+  return res->Intrinsified();
 }
 
 #define __ assembler->
@@ -97,10 +186,10 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 void IntrinsicCodeGeneratorARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
@@ -111,10 +200,10 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 void IntrinsicCodeGeneratorARM::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -224,178 +313,6 @@
   GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
 }
 
-static void GenIntegerRotate(LocationSummary* locations,
-                             ArmAssembler* assembler,
-                             bool is_left) {
-  Register in = locations->InAt(0).AsRegister<Register>();
-  Location rhs = locations->InAt(1);
-  Register out = locations->Out().AsRegister<Register>();
-
-  if (rhs.IsConstant()) {
-    // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
-    // so map all rotations to a +ve. equivalent in that range.
-    // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
-    uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue() & 0x1F;
-    if (rot) {
-      // Rotate, mapping left rotations to right equivalents if necessary.
-      // (e.g. left by 2 bits == right by 30.)
-      __ Ror(out, in, is_left ? (0x20 - rot) : rot);
-    } else if (out != in) {
-      __ Mov(out, in);
-    }
-  } else {
-    if (is_left) {
-      __ rsb(out, rhs.AsRegister<Register>(), ShifterOperand(0));
-      __ Ror(out, in, out);
-    } else {
-      __ Ror(out, in, rhs.AsRegister<Register>());
-    }
-  }
-}
-
-// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
-// rotates by swapping input regs (effectively rotating by the first 32-bits of
-// a larger rotation) or flipping direction (thus treating larger right/left
-// rotations as sub-word sized rotations in the other direction) as appropriate.
-static void GenLongRotate(LocationSummary* locations,
-                          ArmAssembler* assembler,
-                          bool is_left) {
-  Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
-  Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
-  Location rhs = locations->InAt(1);
-  Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
-  Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
-
-  if (rhs.IsConstant()) {
-    uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue();
-    // Map all left rotations to right equivalents.
-    if (is_left) {
-      rot = 0x40 - rot;
-    }
-    // Map all rotations to +ve. equivalents on the interval [0,63].
-    rot &= 0x3F;
-    // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
-    // logic below to a simple pair of binary orr.
-    // (e.g. 34 bits == in_reg swap + 2 bits right.)
-    if (rot >= 0x20) {
-      rot -= 0x20;
-      std::swap(in_reg_hi, in_reg_lo);
-    }
-    // Rotate, or mov to out for zero or word size rotations.
-    if (rot) {
-      __ Lsr(out_reg_hi, in_reg_hi, rot);
-      __ orr(out_reg_hi, out_reg_hi, ShifterOperand(in_reg_lo, arm::LSL, 0x20 - rot));
-      __ Lsr(out_reg_lo, in_reg_lo, rot);
-      __ orr(out_reg_lo, out_reg_lo, ShifterOperand(in_reg_hi, arm::LSL, 0x20 - rot));
-    } else {
-      __ Mov(out_reg_lo, in_reg_lo);
-      __ Mov(out_reg_hi, in_reg_hi);
-    }
-  } else {
-    Register shift_left = locations->GetTemp(0).AsRegister<Register>();
-    Register shift_right = locations->GetTemp(1).AsRegister<Register>();
-    Label end;
-    Label right;
-
-    __ and_(shift_left, rhs.AsRegister<Register>(), ShifterOperand(0x1F));
-    __ Lsrs(shift_right, rhs.AsRegister<Register>(), 6);
-    __ rsb(shift_right, shift_left, ShifterOperand(0x20), AL, kCcKeep);
-
-    if (is_left) {
-      __ b(&right, CS);
-    } else {
-      __ b(&right, CC);
-      std::swap(shift_left, shift_right);
-    }
-
-    // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
-    // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
-    __ Lsl(out_reg_hi, in_reg_hi, shift_left);
-    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
-    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
-    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
-    __ Lsr(shift_left, in_reg_hi, shift_right);
-    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left));
-    __ b(&end);
-
-    // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
-    // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
-    __ Bind(&right);
-    __ Lsr(out_reg_hi, in_reg_hi, shift_right);
-    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
-    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
-    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
-    __ Lsl(shift_right, in_reg_hi, shift_left);
-    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right));
-
-    __ Bind(&end);
-  }
-}
-
-void IntrinsicLocationsBuilderARM::VisitIntegerRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kNoCall,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitIntegerRotateRight(HInvoke* invoke) {
-  GenIntegerRotate(invoke->GetLocations(), GetAssembler(), false /* is_left */);
-}
-
-void IntrinsicLocationsBuilderARM::VisitLongRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kNoCall,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  if (invoke->InputAt(1)->IsConstant()) {
-    locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant()));
-  } else {
-    locations->SetInAt(1, Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
-  }
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitLongRotateRight(HInvoke* invoke) {
-  GenLongRotate(invoke->GetLocations(), GetAssembler(), false /* is_left */);
-}
-
-void IntrinsicLocationsBuilderARM::VisitIntegerRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kNoCall,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitIntegerRotateLeft(HInvoke* invoke) {
-  GenIntegerRotate(invoke->GetLocations(), GetAssembler(), true /* is_left */);
-}
-
-void IntrinsicLocationsBuilderARM::VisitLongRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kNoCall,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  if (invoke->InputAt(1)->IsConstant()) {
-    locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant()));
-  } else {
-    locations->SetInAt(1, Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
-  }
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitLongRotateLeft(HInvoke* invoke) {
-  GenLongRotate(invoke->GetLocations(), GetAssembler(), true /* is_left */);
-}
-
 static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
   Location in = locations->InAt(0);
   Location out = locations->Out();
@@ -413,7 +330,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderARM::VisitMathAbsFloat(HInvoke* invoke) {
@@ -421,7 +338,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
@@ -470,7 +387,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 
@@ -479,7 +396,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 static void GenMinMax(LocationSummary* locations,
@@ -510,7 +427,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderARM::VisitMathMaxIntInt(HInvoke* invoke) {
@@ -518,7 +435,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderARM::VisitMathSqrt(HInvoke* invoke) {
@@ -658,83 +575,130 @@
                          bool is_volatile,
                          CodeGeneratorARM* codegen) {
   LocationSummary* locations = invoke->GetLocations();
-  DCHECK((type == Primitive::kPrimInt) ||
-         (type == Primitive::kPrimLong) ||
-         (type == Primitive::kPrimNot));
   ArmAssembler* assembler = codegen->GetAssembler();
-  Register base = locations->InAt(1).AsRegister<Register>();           // Object pointer.
-  Register offset = locations->InAt(2).AsRegisterPairLow<Register>();  // Long offset, lo part only.
+  Location base_loc = locations->InAt(1);
+  Register base = base_loc.AsRegister<Register>();             // Object pointer.
+  Location offset_loc = locations->InAt(2);
+  Register offset = offset_loc.AsRegisterPairLow<Register>();  // Long offset, lo part only.
+  Location trg_loc = locations->Out();
 
-  if (type == Primitive::kPrimLong) {
-    Register trg_lo = locations->Out().AsRegisterPairLow<Register>();
-    __ add(IP, base, ShifterOperand(offset));
-    if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
-      Register trg_hi = locations->Out().AsRegisterPairHigh<Register>();
-      __ ldrexd(trg_lo, trg_hi, IP);
-    } else {
-      __ ldrd(trg_lo, Address(IP));
+  switch (type) {
+    case Primitive::kPrimInt: {
+      Register trg = trg_loc.AsRegister<Register>();
+      __ ldr(trg, Address(base, offset));
+      if (is_volatile) {
+        __ dmb(ISH);
+      }
+      break;
     }
-  } else {
-    Register trg = locations->Out().AsRegister<Register>();
-    __ ldr(trg, Address(base, offset));
-  }
 
-  if (is_volatile) {
-    __ dmb(ISH);
-  }
+    case Primitive::kPrimNot: {
+      Register trg = trg_loc.AsRegister<Register>();
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(
+              invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
+          if (is_volatile) {
+            __ dmb(ISH);
+          }
+        } else {
+          __ ldr(trg, Address(base, offset));
+          if (is_volatile) {
+            __ dmb(ISH);
+          }
+          codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+        }
+      } else {
+        __ ldr(trg, Address(base, offset));
+        if (is_volatile) {
+          __ dmb(ISH);
+        }
+        __ MaybeUnpoisonHeapReference(trg);
+      }
+      break;
+    }
 
-  if (type == Primitive::kPrimNot) {
-    Register trg = locations->Out().AsRegister<Register>();
-    __ MaybeUnpoisonHeapReference(trg);
+    case Primitive::kPrimLong: {
+      Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
+      __ add(IP, base, ShifterOperand(offset));
+      if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+        Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
+        __ ldrexd(trg_lo, trg_hi, IP);
+      } else {
+        __ ldrd(trg_lo, Address(IP));
+      }
+      if (is_volatile) {
+        __ dmb(ISH);
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type " << type;
+      UNREACHABLE();
   }
 }
 
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           can_call ?
+                                                               LocationSummary::kCallOnSlowPath :
+                                                               LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  locations->SetOut(Location::RequiresRegister(),
+                    can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void IntrinsicLocationsBuilderARM::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 
 void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 
 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
@@ -764,31 +728,34 @@
 }
 
 void IntrinsicLocationsBuilderARM::VisitUnsafePut(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, true, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutObject(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, true, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutLong(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke);
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke);
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, true, invoke);
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke);
 }
 
 static void GenUnsafePut(LocationSummary* locations,
@@ -850,35 +817,72 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitUnsafePut(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutObject(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutLong(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
-                                                HInvoke* invoke) {
+                                                HInvoke* invoke,
+                                                Primitive::Type type) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
@@ -888,11 +892,15 @@
   locations->SetInAt(3, Location::RequiresRegister());
   locations->SetInAt(4, Location::RequiresRegister());
 
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  // If heap poisoning is enabled, we don't want the unpoisoning
+  // operations to potentially clobber the output.
+  Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot)
+      ? Location::kOutputOverlap
+      : Location::kNoOutputOverlap;
+  locations->SetOut(Location::RequiresRegister(), overlaps);
 
   locations->AddTemp(Location::RequiresRegister());  // Pointer.
   locations->AddTemp(Location::RequiresRegister());  // Temp 1.
-  locations->AddTemp(Location::RequiresRegister());  // Temp 2.
 }
 
 static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM* codegen) {
@@ -918,13 +926,21 @@
   }
 
   // Prevent reordering with prior memory operations.
+  // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
+  // latter allows a preceding load to be delayed past the STXR
+  // instruction below.
   __ dmb(ISH);
 
   __ add(tmp_ptr, base, ShifterOperand(offset));
 
   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
     codegen->GetAssembler()->PoisonHeapReference(expected_lo);
-    codegen->GetAssembler()->PoisonHeapReference(value_lo);
+    if (value_lo == expected_lo) {
+      // Do not poison `value_lo`, as it is the same register as
+      // `expected_lo`, which has just been poisoned.
+    } else {
+      codegen->GetAssembler()->PoisonHeapReference(value_lo);
+    }
   }
 
   // do {
@@ -935,6 +951,14 @@
   Label loop_head;
   __ Bind(&loop_head);
 
+  // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+  // the reference stored in the object before attempting the CAS,
+  // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+  // implementation.
+  //
+  // Note that this code is not (yet) used when read barriers are
+  // enabled (see IntrinsicLocationsBuilderARM::VisitUnsafeCASObject).
+  DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
   __ ldrex(tmp_lo, tmp_ptr);
 
   __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo));
@@ -952,109 +976,172 @@
   __ mov(out, ShifterOperand(0), CC);
 
   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
-    codegen->GetAssembler()->UnpoisonHeapReference(value_lo);
     codegen->GetAssembler()->UnpoisonHeapReference(expected_lo);
+    if (value_lo == expected_lo) {
+      // Do not unpoison `value_lo`, as it is the same register as
+      // `expected_lo`, which has just been unpoisoned.
+    } else {
+      codegen->GetAssembler()->UnpoisonHeapReference(value_lo);
+    }
   }
 }
 
 void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) {
-  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) {
-  // The UnsafeCASObject intrinsic does not always work when heap
-  // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
-  // off temporarily as a quick fix.
-  // TODO(rpl): Fix it and turn it back on.
-  if (kPoisonHeapReferences) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  if (kEmitCompilerReadBarrier) {
     return;
   }
 
-  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) {
   GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  DCHECK(!kEmitCompilerReadBarrier);
+
   GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
 }
 
-void IntrinsicLocationsBuilderARM::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-
-  locations->AddTemp(Location::RequiresRegister());
-  locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) {
-  ArmAssembler* assembler = GetAssembler();
-  LocationSummary* locations = invoke->GetLocations();
-
-  // Location of reference to data array
-  const MemberOffset value_offset = mirror::String::ValueOffset();
-  // Location of count
-  const MemberOffset count_offset = mirror::String::CountOffset();
-
-  Register obj = locations->InAt(0).AsRegister<Register>();  // String object pointer.
-  Register idx = locations->InAt(1).AsRegister<Register>();  // Index of character.
-  Register out = locations->Out().AsRegister<Register>();    // Result character.
-
-  Register temp = locations->GetTemp(0).AsRegister<Register>();
-  Register array_temp = locations->GetTemp(1).AsRegister<Register>();
-
-  // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
-  //       the cost.
-  // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
-  //       we will not optimize the code for constants (which would save a register).
-
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  __ ldr(temp, Address(obj, count_offset.Int32Value()));          // temp = str.length.
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ cmp(idx, ShifterOperand(temp));
-  __ b(slow_path->GetEntryLabel(), CS);
-
-  __ add(array_temp, obj, ShifterOperand(value_offset.Int32Value()));  // array_temp := str.value.
-
-  // Load the value.
-  __ ldrh(out, Address(array_temp, idx, LSL, 1));                 // out := array_temp[idx].
-
-  __ Bind(slow_path->GetExitLabel());
-}
-
 void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            invoke->InputAt(1)->CanBeNull()
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall,
                                                             kIntrinsified);
-  InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-  locations->SetOut(Location::RegisterLocation(R0));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
 void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
   ArmAssembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
+  Register str = locations->InAt(0).AsRegister<Register>();
+  Register arg = locations->InAt(1).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  Register temp0 = locations->GetTemp(0).AsRegister<Register>();
+  Register temp1 = locations->GetTemp(1).AsRegister<Register>();
+  Register temp2 = locations->GetTemp(2).AsRegister<Register>();
+
+  Label loop;
+  Label find_char_diff;
+  Label end;
+
+  // Get offsets of count and value fields within a string object.
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  Register argument = locations->InAt(1).AsRegister<Register>();
-  __ cmp(argument, ShifterOperand(0));
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
-  codegen_->AddSlowPath(slow_path);
-  __ b(slow_path->GetEntryLabel(), EQ);
+  // Take slow path and throw if input can be and is null.
+  SlowPathCode* slow_path = nullptr;
+  const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
+  if (can_slow_path) {
+    slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+    codegen_->AddSlowPath(slow_path);
+    __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
+  }
 
-  __ LoadFromOffset(
-      kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pStringCompareTo).Int32Value());
-  __ blx(LR);
-  __ Bind(slow_path->GetExitLabel());
+  // Reference equality check, return 0 if same reference.
+  __ subs(out, str, ShifterOperand(arg));
+  __ b(&end, EQ);
+  // Load lengths of this and argument strings.
+  __ ldr(temp2, Address(str, count_offset));
+  __ ldr(temp1, Address(arg, count_offset));
+  // out = length diff.
+  __ subs(out, temp2, ShifterOperand(temp1));
+  // temp0 = min(len(str), len(arg)).
+  __ it(Condition::LT, kItElse);
+  __ mov(temp0, ShifterOperand(temp2), Condition::LT);
+  __ mov(temp0, ShifterOperand(temp1), Condition::GE);
+  // Shorter string is empty?
+  __ CompareAndBranchIfZero(temp0, &end);
+
+  // Store offset of string value in preparation for comparison loop.
+  __ mov(temp1, ShifterOperand(value_offset));
+
+  // Assertions that must hold in order to compare multiple characters at a time.
+  CHECK_ALIGNED(value_offset, 8);
+  static_assert(IsAligned<8>(kObjectAlignment),
+                "String data must be 8-byte aligned for unrolled CompareTo loop.");
+
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
+  __ Bind(&loop);
+  __ ldr(IP, Address(str, temp1));
+  __ ldr(temp2, Address(arg, temp1));
+  __ cmp(IP, ShifterOperand(temp2));
+  __ b(&find_char_diff, NE);
+  __ add(temp1, temp1, ShifterOperand(char_size * 2));
+  __ sub(temp0, temp0, ShifterOperand(2));
+
+  __ ldr(IP, Address(str, temp1));
+  __ ldr(temp2, Address(arg, temp1));
+  __ cmp(IP, ShifterOperand(temp2));
+  __ b(&find_char_diff, NE);
+  __ add(temp1, temp1, ShifterOperand(char_size * 2));
+  __ subs(temp0, temp0, ShifterOperand(2));
+
+  __ b(&loop, GT);
+  __ b(&end);
+
+  // Find the single 16-bit character difference.
+  __ Bind(&find_char_diff);
+  // Get the bit position of the first character that differs.
+  __ eor(temp1, temp2, ShifterOperand(IP));
+  __ rbit(temp1, temp1);
+  __ clz(temp1, temp1);
+
+  // temp0 = number of 16-bit characters remaining to compare.
+  // (it could be < 1 if a difference is found after the first SUB in the comparison loop, and
+  // after the end of the shorter string data).
+
+  // (temp1 >> 4) = character where difference occurs between the last two words compared, on the
+  // interval [0,1] (0 for low half-word different, 1 for high half-word different).
+
+  // If temp0 <= (temp1 >> 4), the difference occurs outside the remaining string data, so just
+  // return length diff (out).
+  __ cmp(temp0, ShifterOperand(temp1, LSR, 4));
+  __ b(&end, LE);
+  // Extract the characters and calculate the difference.
+  __ bic(temp1, temp1, ShifterOperand(0xf));
+  __ Lsr(temp2, temp2, temp1);
+  __ Lsr(IP, IP, temp1);
+  __ movt(temp2, 0);
+  __ movt(IP, 0);
+  __ sub(out, IP, ShifterOperand(temp2));
+
+  __ Bind(&end);
+
+  if (can_slow_path) {
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void IntrinsicLocationsBuilderARM::VisitStringEquals(HInvoke* invoke) {
@@ -1098,17 +1185,22 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check if input is null, return false if it is.
-  __ CompareAndBranchIfZero(arg, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ CompareAndBranchIfZero(arg, &return_false);
+  }
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ ldr(temp, Address(str, class_offset));
-  __ ldr(temp1, Address(arg, class_offset));
-  __ cmp(temp, ShifterOperand(temp1));
-  __ b(&return_false, NE);
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ ldr(temp, Address(str, class_offset));
+    __ ldr(temp1, Address(arg, class_offset));
+    __ cmp(temp, ShifterOperand(temp1));
+    __ b(&return_false, NE);
+  }
 
   // Load lengths of this and argument strings.
   __ ldr(temp, Address(str, count_offset));
@@ -1125,7 +1217,7 @@
 
   // Assertions that must hold in order to compare strings 2 characters at a time.
   DCHECK_ALIGNED(value_offset, 4);
-  static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
+  static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
 
   __ LoadImmediate(temp1, value_offset);
 
@@ -1158,16 +1250,16 @@
                                        ArenaAllocator* allocator,
                                        bool start_at_zero) {
   LocationSummary* locations = invoke->GetLocations();
-  Register tmp_reg = locations->GetTemp(0).AsRegister<Register>();
 
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
-  // or directly dispatch if we have a constant.
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCode* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
-    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
+    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
         std::numeric_limits<uint16_t>::max()) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
@@ -1177,23 +1269,26 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     Register char_reg = locations->InAt(1).AsRegister<Register>();
-    __ LoadImmediate(tmp_reg, std::numeric_limits<uint16_t>::max());
-    __ cmp(char_reg, ShifterOperand(tmp_reg));
+    // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
+    __ cmp(char_reg,
+           ShifterOperand(static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1));
     slow_path = new (allocator) IntrinsicSlowPathARM(invoke);
     codegen->AddSlowPath(slow_path);
-    __ b(slow_path->GetEntryLabel(), HI);
+    __ b(slow_path->GetEntryLabel(), HS);
   }
 
   if (start_at_zero) {
+    Register tmp_reg = locations->GetTemp(0).AsRegister<Register>();
     DCHECK_EQ(tmp_reg, R2);
     // Start-index = 0.
     __ LoadImmediate(tmp_reg, 0);
   }
 
   __ LoadFromOffset(kLoadWord, LR, TR,
-                    QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pIndexOf).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pIndexOf).Int32Value());
+  CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
   __ blx(LR);
 
   if (slow_path != nullptr) {
@@ -1203,7 +1298,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1212,17 +1307,18 @@
   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(Location::RegisterLocation(R0));
 
-  // Need a temp for slow-path codepoint compare, and need to send start-index=0.
+  // Need to send start-index=0.
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
 }
 
 void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) {
-  GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+  GenerateVisitStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
 }
 
 void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1231,18 +1327,16 @@
   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
   locations->SetOut(Location::RegisterLocation(R0));
-
-  // Need a temp for slow-path codepoint compare.
-  locations->AddTemp(Location::RequiresRegister());
 }
 
 void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) {
-  GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+  GenerateVisitStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
 }
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1262,16 +1356,19 @@
   codegen_->AddSlowPath(slow_path);
   __ b(slow_path->GetEntryLabel(), EQ);
 
-  __ LoadFromOffset(
-      kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromBytes).Int32Value());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ LoadFromOffset(kLoadWord,
+                    LR,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pAllocStringFromBytes).Int32Value());
+  CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   __ blx(LR);
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1283,15 +1380,24 @@
 void IntrinsicCodeGeneratorARM::VisitStringNewStringFromChars(HInvoke* invoke) {
   ArmAssembler* assembler = GetAssembler();
 
-  __ LoadFromOffset(
-      kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromChars).Int32Value());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  // No need to emit code checking whether `locations->InAt(2)` is a null
+  // pointer, as callers of the native method
+  //
+  //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
+  //
+  // all include a null check on `data` before calling that method.
+  __ LoadFromOffset(kLoadWord,
+                    LR,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pAllocStringFromChars).Int32Value());
+  CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   __ blx(LR);
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1309,13 +1415,20 @@
   __ b(slow_path->GetEntryLabel(), EQ);
 
   __ LoadFromOffset(kLoadWord,
-      LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromString).Int32Value());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+      LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pAllocStringFromString).Int32Value());
+  CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   __ blx(LR);
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
 
 void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
   LocationSummary* locations = invoke->GetLocations();
   if (locations == nullptr) {
@@ -1335,6 +1448,13 @@
   if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
     locations->SetInAt(4, Location::RequiresRegister());
   }
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Temporary register IP cannot be used in
+    // ReadBarrierSystemArrayCopySlowPathARM64 (because that register
+    // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
+    // temporary register from the register allocator.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 static void CheckPosition(ArmAssembler* assembler,
@@ -1342,7 +1462,6 @@
                           Register input,
                           Location length,
                           SlowPathCode* slow_path,
-                          Register input_len,
                           Register temp,
                           bool length_is_input_length = false) {
   // Where is the length in the Array?
@@ -1363,8 +1482,8 @@
       }
     } else {
       // Check that length(input) >= pos.
-      __ LoadFromOffset(kLoadWord, input_len, input, length_offset);
-      __ subs(temp, input_len, ShifterOperand(pos_const));
+      __ LoadFromOffset(kLoadWord, temp, input, length_offset);
+      __ subs(temp, temp, ShifterOperand(pos_const));
       __ b(slow_path->GetEntryLabel(), LT);
 
       // Check that (length(input) - pos) >= length.
@@ -1401,6 +1520,10 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
   ArmAssembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
@@ -1408,46 +1531,55 @@
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
 
   Register src = locations->InAt(0).AsRegister<Register>();
   Location src_pos = locations->InAt(1);
   Register dest = locations->InAt(2).AsRegister<Register>();
   Location dest_pos = locations->InAt(3);
   Location length = locations->InAt(4);
-  Register temp1 = locations->GetTemp(0).AsRegister<Register>();
-  Register temp2 = locations->GetTemp(1).AsRegister<Register>();
-  Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+  Location temp1_loc = locations->GetTemp(0);
+  Register temp1 = temp1_loc.AsRegister<Register>();
+  Location temp2_loc = locations->GetTemp(1);
+  Register temp2 = temp2_loc.AsRegister<Register>();
+  Location temp3_loc = locations->GetTemp(2);
+  Register temp3 = temp3_loc.AsRegister<Register>();
 
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+  codegen_->AddSlowPath(intrinsic_slow_path);
 
-  Label ok;
+  Label conditions_on_positions_validated;
   SystemArrayCopyOptimizations optimizations(invoke);
 
-  if (!optimizations.GetDestinationIsSource()) {
-    if (!src_pos.IsConstant() || !dest_pos.IsConstant()) {
-      __ cmp(src, ShifterOperand(dest));
-    }
-  }
-
   // If source and destination are the same, we go to slow path if we need to do
   // forward copying.
   if (src_pos.IsConstant()) {
     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
     if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      if (optimizations.GetDestinationIsSource()) {
+        // Checked when building locations.
+        DCHECK_GE(src_pos_constant, dest_pos_constant);
+      } else if (src_pos_constant < dest_pos_constant) {
+        __ cmp(src, ShifterOperand(dest));
+        __ b(intrinsic_slow_path->GetEntryLabel(), EQ);
+      }
+
       // Checked when building locations.
       DCHECK(!optimizations.GetDestinationIsSource()
              || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
     } else {
       if (!optimizations.GetDestinationIsSource()) {
-        __ b(&ok, NE);
+        __ cmp(src, ShifterOperand(dest));
+        __ b(&conditions_on_positions_validated, NE);
       }
       __ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant));
-      __ b(slow_path->GetEntryLabel(), GT);
+      __ b(intrinsic_slow_path->GetEntryLabel(), GT);
     }
   } else {
     if (!optimizations.GetDestinationIsSource()) {
-      __ b(&ok, NE);
+      __ cmp(src, ShifterOperand(dest));
+      __ b(&conditions_on_positions_validated, NE);
     }
     if (dest_pos.IsConstant()) {
       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
@@ -1455,19 +1587,19 @@
     } else {
       __ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos.AsRegister<Register>()));
     }
-    __ b(slow_path->GetEntryLabel(), LT);
+    __ b(intrinsic_slow_path->GetEntryLabel(), LT);
   }
 
-  __ Bind(&ok);
+  __ Bind(&conditions_on_positions_validated);
 
   if (!optimizations.GetSourceIsNotNull()) {
     // Bail out if the source is null.
-    __ CompareAndBranchIfZero(src, slow_path->GetEntryLabel());
+    __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
   }
 
   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
     // Bail out if the destination is null.
-    __ CompareAndBranchIfZero(dest, slow_path->GetEntryLabel());
+    __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
   }
 
   // If the length is negative, bail out.
@@ -1476,7 +1608,7 @@
       !optimizations.GetCountIsSourceLength() &&
       !optimizations.GetCountIsDestinationLength()) {
     __ cmp(length.AsRegister<Register>(), ShifterOperand(0));
-    __ b(slow_path->GetEntryLabel(), LT);
+    __ b(intrinsic_slow_path->GetEntryLabel(), LT);
   }
 
   // Validity checks: source.
@@ -1484,9 +1616,8 @@
                 src_pos,
                 src,
                 length,
-                slow_path,
+                intrinsic_slow_path,
                 temp1,
-                temp2,
                 optimizations.GetCountIsSourceLength());
 
   // Validity checks: dest.
@@ -1494,9 +1625,8 @@
                 dest_pos,
                 dest,
                 length,
-                slow_path,
+                intrinsic_slow_path,
                 temp1,
-                temp2,
                 optimizations.GetCountIsDestinationLength());
 
   if (!optimizations.GetDoesNotNeedTypeCheck()) {
@@ -1504,149 +1634,839 @@
     // type of the destination array. We do two checks: the classes are the same,
     // or the destination is Object[]. If none of these checks succeed, we go to the
     // slow path.
-    __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
-    __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
-    bool did_unpoison = false;
-    if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
-        !optimizations.GetSourceIsNonPrimitiveArray()) {
-      // One or two of the references need to be unpoisoned. Unpoisoned them
-      // both to make the identity check valid.
-      __ MaybeUnpoisonHeapReference(temp1);
-      __ MaybeUnpoisonHeapReference(temp2);
-      did_unpoison = true;
-    }
 
-    if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
-      // Bail out if the destination is not a non primitive array.
-      __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
-      __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
-      __ MaybeUnpoisonHeapReference(temp3);
-      __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
-      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
-    }
-
-    if (!optimizations.GetSourceIsNonPrimitiveArray()) {
-      // Bail out if the source is not a non primitive array.
-      // Bail out if the destination is not a non primitive array.
-      __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
-      __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
-      __ MaybeUnpoisonHeapReference(temp3);
-      __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
-      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
-    }
-
-    __ cmp(temp1, ShifterOperand(temp2));
-
-    if (optimizations.GetDestinationIsTypedObjectArray()) {
-      Label do_copy;
-      __ b(&do_copy, EQ);
-      if (!did_unpoison) {
-        __ MaybeUnpoisonHeapReference(temp1);
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+        // /* HeapReference<Class> */ temp1 = src->klass_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+        // Bail out if the source is not a non primitive array.
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+        __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `temp1` has been unpoisoned
+        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+        // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+        __ LoadFromOffset(kLoadUnsignedHalfword, temp1, temp1, primitive_offset);
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
       }
-      __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
-      __ MaybeUnpoisonHeapReference(temp1);
-      __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
-      // No need to unpoison the result, we're comparing against null.
-      __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
-      __ Bind(&do_copy);
+
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
+
+      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+        // Bail out if the destination is not a non primitive array.
+        //
+        // Register `temp1` is not trashed by the read barrier emitted
+        // by GenerateFieldLoadWithBakerReadBarrier below, as that
+        // method produces a call to a ReadBarrierMarkRegX entry point,
+        // which saves all potentially live registers, including
+        // temporaries such a `temp1`.
+        // /* HeapReference<Class> */ temp2 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+        __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `temp2` has been unpoisoned
+        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+        // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+        __ LoadFromOffset(kLoadUnsignedHalfword, temp2, temp2, primitive_offset);
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      // For the same reason given earlier, `temp1` is not trashed by the
+      // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+      // /* HeapReference<Class> */ temp2 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+      // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+      __ cmp(temp1, ShifterOperand(temp2));
+
+      if (optimizations.GetDestinationIsTypedObjectArray()) {
+        Label do_copy;
+        __ b(&do_copy, EQ);
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+        // /* HeapReference<Class> */ temp1 = temp1->super_class_
+        // We do not need to emit a read barrier for the following
+        // heap reference load, as `temp1` is only used in a
+        // comparison with null below, and this reference is not
+        // kept afterwards.
+        __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+        __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+        __ Bind(&do_copy);
+      } else {
+        __ b(intrinsic_slow_path->GetEntryLabel(), NE);
+      }
     } else {
-      __ b(slow_path->GetEntryLabel(), NE);
+      // Non read barrier code.
+
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
+      // /* HeapReference<Class> */ temp2 = src->klass_
+      __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
+      bool did_unpoison = false;
+      if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+          !optimizations.GetSourceIsNonPrimitiveArray()) {
+        // One or two of the references need to be unpoisoned. Unpoison them
+        // both to make the identity check valid.
+        __ MaybeUnpoisonHeapReference(temp1);
+        __ MaybeUnpoisonHeapReference(temp2);
+        did_unpoison = true;
+      }
+
+      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+        // Bail out if the destination is not a non primitive array.
+        // /* HeapReference<Class> */ temp3 = temp1->component_type_
+        __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+        __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+        __ MaybeUnpoisonHeapReference(temp3);
+        // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+        __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+        // Bail out if the source is not a non primitive array.
+        // /* HeapReference<Class> */ temp3 = temp2->component_type_
+        __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
+        __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+        __ MaybeUnpoisonHeapReference(temp3);
+        // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+        __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      __ cmp(temp1, ShifterOperand(temp2));
+
+      if (optimizations.GetDestinationIsTypedObjectArray()) {
+        Label do_copy;
+        __ b(&do_copy, EQ);
+        if (!did_unpoison) {
+          __ MaybeUnpoisonHeapReference(temp1);
+        }
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+        __ MaybeUnpoisonHeapReference(temp1);
+        // /* HeapReference<Class> */ temp1 = temp1->super_class_
+        __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+        // No need to unpoison the result, we're comparing against null.
+        __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+        __ Bind(&do_copy);
+      } else {
+        __ b(intrinsic_slow_path->GetEntryLabel(), NE);
+      }
     }
   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
     // Bail out if the source is not a non primitive array.
-    __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
-    __ MaybeUnpoisonHeapReference(temp1);
-    __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
-    __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
-    __ MaybeUnpoisonHeapReference(temp3);
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+      // /* HeapReference<Class> */ temp3 = temp1->component_type_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+      __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+      // If heap poisoning is enabled, `temp3` has been unpoisoned
+      // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+    } else {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
+      __ MaybeUnpoisonHeapReference(temp1);
+      // /* HeapReference<Class> */ temp3 = temp1->component_type_
+      __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+      __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(temp3);
+    }
+    // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
     __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
     static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-    __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
+    __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
   }
 
-  // Compute base source address, base destination address, and end source address.
-
-  uint32_t element_size = sizeof(int32_t);
+  int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+  uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
   uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+  // Compute the base source address in `temp1`.
   if (src_pos.IsConstant()) {
     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
     __ AddConstant(temp1, src, element_size * constant + offset);
   } else {
-    __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, 2));
+    __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, element_size_shift));
     __ AddConstant(temp1, offset);
   }
 
-  if (dest_pos.IsConstant()) {
-    int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
-    __ AddConstant(temp2, dest, element_size * constant + offset);
-  } else {
-    __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, 2));
-    __ AddConstant(temp2, offset);
-  }
-
+  // Compute the end source address in `temp3`.
   if (length.IsConstant()) {
     int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
     __ AddConstant(temp3, temp1, element_size * constant);
   } else {
-    __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, 2));
+    __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, element_size_shift));
   }
 
-  // Iterate over the arrays and do a raw copy of the objects. We don't need to
-  // poison/unpoison, nor do any read barrier as the next uses of the destination
-  // array will do it.
-  Label loop, done;
-  __ cmp(temp1, ShifterOperand(temp3));
-  __ b(&done, EQ);
-  __ Bind(&loop);
-  __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
-  __ str(IP, Address(temp2, element_size, Address::PostIndex));
-  __ cmp(temp1, ShifterOperand(temp3));
-  __ b(&loop, NE);
-  __ Bind(&done);
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // The base destination address is computed later, as `temp2` is
+    // used for intermediate computations.
+
+    // SystemArrayCopy implementation for Baker read barriers (see
+    // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+    //
+    //   if (src_ptr != end_ptr) {
+    //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+    //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+    //     if (is_gray) {
+    //       // Slow-path copy.
+    //       do {
+    //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+    //       } while (src_ptr != end_ptr)
+    //     } else {
+    //       // Fast-path copy.
+    //       do {
+    //         *dest_ptr++ = *src_ptr++;
+    //       } while (src_ptr != end_ptr)
+    //     }
+    //   }
+
+    Label loop, done;
+
+    // Don't enter copy loop if `length == 0`.
+    __ cmp(temp1, ShifterOperand(temp3));
+    __ b(&done, EQ);
+
+    // /* int32_t */ monitor = src->monitor_
+    __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset);
+    // /* LockWord */ lock_word = LockWord(monitor)
+    static_assert(sizeof(LockWord) == sizeof(int32_t),
+                  "art::LockWord and int32_t have different sizes.");
+
+    // Introduce a dependency on the lock_word including the rb_state,
+    // which shall prevent load-load reordering without using
+    // a memory barrier (which would be more expensive).
+    // `src` is unchanged by this operation, but its value now depends
+    // on `temp2`.
+    __ add(src, src, ShifterOperand(temp2, LSR, 32));
+
+    // Slow path used to copy array when `src` is gray.
+    SlowPathCode* read_barrier_slow_path =
+        new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke);
+    codegen_->AddSlowPath(read_barrier_slow_path);
+
+    // Given the numeric representation, it's enough to check the low bit of the
+    // rb_state. We do that by shifting the bit out of the lock word with LSRS
+    // which can be a 16-bit instruction unlike the TST immediate.
+    static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+    static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+    __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
+    // Carry flag is the last bit shifted out by LSRS.
+    __ b(read_barrier_slow_path->GetEntryLabel(), CS);
+
+    // Fast-path copy.
+
+    // Compute the base destination address in `temp2`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ AddConstant(temp2, dest, element_size * constant + offset);
+    } else {
+      __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+      __ AddConstant(temp2, offset);
+    }
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    __ Bind(&loop);
+    __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+    __ str(IP, Address(temp2, element_size, Address::PostIndex));
+    __ cmp(temp1, ShifterOperand(temp3));
+    __ b(&loop, NE);
+
+    __ Bind(read_barrier_slow_path->GetExitLabel());
+    __ Bind(&done);
+  } else {
+    // Non read barrier code.
+
+    // Compute the base destination address in `temp2`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ AddConstant(temp2, dest, element_size * constant + offset);
+    } else {
+      __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+      __ AddConstant(temp2, offset);
+    }
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    Label loop, done;
+    __ cmp(temp1, ShifterOperand(temp3));
+    __ b(&done, EQ);
+    __ Bind(&loop);
+    __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+    __ str(IP, Address(temp2, element_size, Address::PostIndex));
+    __ cmp(temp1, ShifterOperand(temp3));
+    __ b(&loop, NE);
+    __ Bind(&done);
+  }
 
   // We only need one card marking on the destination array.
   codegen_->MarkGCCard(temp1,
                        temp2,
                        dest,
                        Register(kNoRegister),
-                       false);
+                       /* value_can_be_null */ false);
 
-  __ Bind(slow_path->GetExitLabel());
+  __ Bind(intrinsic_slow_path->GetExitLabel());
 }
 
-// Unimplemented intrinsics.
+static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  // If the graph is debuggable, all callee-saved floating-point registers are blocked by
+  // the code generator. Furthermore, the register allocator creates fixed live intervals
+  // for all caller-saved registers because we are doing a function call. As a result, if
+  // the input and output locations are unallocated, the register allocator runs out of
+  // registers and fails; however, a debuggable graph is not the common case.
+  if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
+    return;
+  }
 
-#define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
-void IntrinsicLocationsBuilderARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
-}                                                                                      \
-void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
+  DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
+  DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
+
+  LocationSummary* const locations = new (arena) LocationSummary(invoke,
+                                                                 LocationSummary::kCallOnMainOnly,
+                                                                 kIntrinsified);
+  const InvokeRuntimeCallingConvention calling_convention;
+
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+  // Native code uses the soft float ABI.
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
 }
 
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
-UNIMPLEMENTED_INTRINSIC(ShortReverseBytes)
-UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble)
-UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat)
-UNIMPLEMENTED_INTRINSIC(MathMaxDoubleDouble)
-UNIMPLEMENTED_INTRINSIC(MathMaxFloatFloat)
-UNIMPLEMENTED_INTRINSIC(MathMinLongLong)
-UNIMPLEMENTED_INTRINSIC(MathMaxLongLong)
-UNIMPLEMENTED_INTRINSIC(MathCeil)          // Could be done by changing rounding mode, maybe?
-UNIMPLEMENTED_INTRINSIC(MathFloor)         // Could be done by changing rounding mode, maybe?
-UNIMPLEMENTED_INTRINSIC(MathRint)
-UNIMPLEMENTED_INTRINSIC(MathRoundDouble)   // Could be done by changing rounding mode, maybe?
-UNIMPLEMENTED_INTRINSIC(MathRoundFloat)    // Could be done by changing rounding mode, maybe?
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)     // High register pressure.
-UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  // If the graph is debuggable, all callee-saved floating-point registers are blocked by
+  // the code generator. Furthermore, the register allocator creates fixed live intervals
+  // for all caller-saved registers because we are doing a function call. As a result, if
+  // the input and output locations are unallocated, the register allocator runs out of
+  // registers and fails; however, a debuggable graph is not the common case.
+  if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
+    return;
+  }
 
-#undef UNIMPLEMENTED_INTRINSIC
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
+  DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
+  DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble);
+  DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
+
+  LocationSummary* const locations = new (arena) LocationSummary(invoke,
+                                                                 LocationSummary::kCallOnMainOnly,
+                                                                 kIntrinsified);
+  const InvokeRuntimeCallingConvention calling_convention;
+
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+  // Native code uses the soft float ABI.
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
+}
+
+static void GenFPToFPCall(HInvoke* invoke,
+                          ArmAssembler* assembler,
+                          CodeGeneratorARM* codegen,
+                          QuickEntrypointEnum entry) {
+  LocationSummary* const locations = invoke->GetLocations();
+  const InvokeRuntimeCallingConvention calling_convention;
+
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
+  DCHECK(locations->WillCall() && locations->Intrinsified());
+  DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(0)));
+  DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(1)));
+
+  __ LoadFromOffset(kLoadWord, LR, TR, GetThreadOffset<kArmPointerSize>(entry).Int32Value());
+  // Native code uses the soft float ABI.
+  __ vmovrrd(calling_convention.GetRegisterAt(0),
+             calling_convention.GetRegisterAt(1),
+             FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
+  __ blx(LR);
+  codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ vmovdrr(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()),
+             calling_convention.GetRegisterAt(0),
+             calling_convention.GetRegisterAt(1));
+}
+
+static void GenFPFPToFPCall(HInvoke* invoke,
+                          ArmAssembler* assembler,
+                          CodeGeneratorARM* codegen,
+                          QuickEntrypointEnum entry) {
+  LocationSummary* const locations = invoke->GetLocations();
+  const InvokeRuntimeCallingConvention calling_convention;
+
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
+  DCHECK(locations->WillCall() && locations->Intrinsified());
+  DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(0)));
+  DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(1)));
+  DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(2)));
+  DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(3)));
+
+  __ LoadFromOffset(kLoadWord, LR, TR, GetThreadOffset<kArmPointerSize>(entry).Int32Value());
+  // Native code uses the soft float ABI.
+  __ vmovrrd(calling_convention.GetRegisterAt(0),
+             calling_convention.GetRegisterAt(1),
+             FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
+  __ vmovrrd(calling_convention.GetRegisterAt(2),
+             calling_convention.GetRegisterAt(3),
+             FromLowSToD(locations->InAt(1).AsFpuRegisterPairLow<SRegister>()));
+  __ blx(LR);
+  codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ vmovdrr(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()),
+             calling_convention.GetRegisterAt(0),
+             calling_convention.GetRegisterAt(1));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathCos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathCos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathSin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathSin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathAcos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathAcos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathAsin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathAsin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathAtan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathAtan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathCbrt(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathCbrt(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathCosh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathCosh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathExp(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathExp(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathExpm1(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathExpm1(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathLog(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathLog(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathLog10(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathLog10(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathSinh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathSinh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathTan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathTan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathTanh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathTanh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathAtan2(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathAtan2(HInvoke* invoke) {
+  GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathHypot(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathHypot(HInvoke* invoke) {
+  GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathNextAfter(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathNextAfter(HInvoke* invoke) {
+  GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
+}
+
+void IntrinsicLocationsBuilderARM::VisitIntegerReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerReverse(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register in  = locations->InAt(0).AsRegister<Register>();
+
+  __ rbit(out, in);
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongReverse(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register in_reg_lo  = locations->InAt(0).AsRegisterPairLow<Register>();
+  Register in_reg_hi  = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
+  Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+  __ rbit(out_reg_lo, in_reg_hi);
+  __ rbit(out_reg_hi, in_reg_lo);
+}
+
+void IntrinsicLocationsBuilderARM::VisitIntegerReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerReverseBytes(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register in  = locations->InAt(0).AsRegister<Register>();
+
+  __ rev(out, in);
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongReverseBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongReverseBytes(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register in_reg_lo  = locations->InAt(0).AsRegisterPairLow<Register>();
+  Register in_reg_hi  = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
+  Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+  __ rev(out_reg_lo, in_reg_hi);
+  __ rev(out_reg_hi, in_reg_lo);
+}
+
+void IntrinsicLocationsBuilderARM::VisitShortReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitShortReverseBytes(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register in  = locations->InAt(0).AsRegister<Register>();
+
+  __ revsh(out, in);
+}
+
+static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmAssembler* assembler) {
+  DCHECK(Primitive::IsIntOrLongType(type)) << type;
+  DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
+  DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
+
+  bool is_long = type == Primitive::kPrimLong;
+  LocationSummary* locations = instr->GetLocations();
+  Location in = locations->InAt(0);
+  Register src_0 = is_long ? in.AsRegisterPairLow<Register>() : in.AsRegister<Register>();
+  Register src_1 = is_long ? in.AsRegisterPairHigh<Register>() : src_0;
+  SRegister tmp_s = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
+  DRegister tmp_d = FromLowSToD(tmp_s);
+  Register  out_r = locations->Out().AsRegister<Register>();
+
+  // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
+  // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
+  // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
+  // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
+  __ vmovdrr(tmp_d, src_1, src_0);                         // Temp DReg |--src_1|--src_0|
+  __ vcntd(tmp_d, tmp_d);                                  // Temp DReg |c|c|c|c|c|c|c|c|
+  __ vpaddld(tmp_d, tmp_d, 8, /* is_unsigned */ true);     // Temp DReg |--c|--c|--c|--c|
+  __ vpaddld(tmp_d, tmp_d, 16, /* is_unsigned */ true);    // Temp DReg |------c|------c|
+  if (is_long) {
+    __ vpaddld(tmp_d, tmp_d, 32, /* is_unsigned */ true);  // Temp DReg |--------------c|
+  }
+  __ vmovrs(out_r, tmp_s);
+}
+
+void IntrinsicLocationsBuilderARM::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+  invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongBitCount(HInvoke* invoke) {
+  VisitIntegerBitCount(invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+  locations->SetInAt(4, Location::RequiresRegister());
+
+  // Temporary registers to store lengths of strings and for calculations.
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Check assumption that sizeof(Char) is 2 (used in scaling below).
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  // Location of data in char array buffer.
+  const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
+
+  // Location of char array data in string.
+  const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+
+  // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
+  // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
+  Register srcObj = locations->InAt(0).AsRegister<Register>();
+  Register srcBegin = locations->InAt(1).AsRegister<Register>();
+  Register srcEnd = locations->InAt(2).AsRegister<Register>();
+  Register dstObj = locations->InAt(3).AsRegister<Register>();
+  Register dstBegin = locations->InAt(4).AsRegister<Register>();
+
+  Register num_chr = locations->GetTemp(0).AsRegister<Register>();
+  Register src_ptr = locations->GetTemp(1).AsRegister<Register>();
+  Register dst_ptr = locations->GetTemp(2).AsRegister<Register>();
+
+  // src range to copy.
+  __ add(src_ptr, srcObj, ShifterOperand(value_offset));
+  __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1));
+
+  // dst to be copied.
+  __ add(dst_ptr, dstObj, ShifterOperand(data_offset));
+  __ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1));
+
+  __ subs(num_chr, srcEnd, ShifterOperand(srcBegin));
+
+  // Do the copy.
+  Label loop, remainder, done;
+
+  // Early out for valid zero-length retrievals.
+  __ b(&done, EQ);
+
+  // Save repairing the value of num_chr on the < 4 character path.
+  __ subs(IP, num_chr, ShifterOperand(4));
+  __ b(&remainder, LT);
+
+  // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
+  __ mov(num_chr, ShifterOperand(IP));
+
+  // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
+  // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
+  // to rectify these everywhere this intrinsic applies.)
+  __ Bind(&loop);
+  __ ldr(IP, Address(src_ptr, char_size * 2));
+  __ subs(num_chr, num_chr, ShifterOperand(4));
+  __ str(IP, Address(dst_ptr, char_size * 2));
+  __ ldr(IP, Address(src_ptr, char_size * 4, Address::PostIndex));
+  __ str(IP, Address(dst_ptr, char_size * 4, Address::PostIndex));
+  __ b(&loop, GE);
+
+  __ adds(num_chr, num_chr, ShifterOperand(4));
+  __ b(&done, EQ);
+
+  // Main loop for < 4 character case and remainder handling. Loads and stores one
+  // 16-bit Java character at a time.
+  __ Bind(&remainder);
+  __ ldrh(IP, Address(src_ptr, char_size, Address::PostIndex));
+  __ subs(num_chr, num_chr, ShifterOperand(1));
+  __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex));
+  __ b(&remainder, GT);
+
+  __ Bind(&done);
+}
+
+void IntrinsicLocationsBuilderARM::VisitFloatIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitFloatIsInfinite(HInvoke* invoke) {
+  ArmAssembler* const assembler = GetAssembler();
+  LocationSummary* const locations = invoke->GetLocations();
+  const Register out = locations->Out().AsRegister<Register>();
+  // Shifting left by 1 bit makes the value encodable as an immediate operand;
+  // we don't care about the sign bit anyway.
+  constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
+
+  __ vmovrs(out, locations->InAt(0).AsFpuRegister<SRegister>());
+  // We don't care about the sign bit, so shift left.
+  __ Lsl(out, out, 1);
+  __ eor(out, out, ShifterOperand(infinity));
+  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+  __ clz(out, out);
+  // Any number less than 32 logically shifted right by 5 bits results in 0;
+  // the same operation on 32 yields 1.
+  __ Lsr(out, out, 5);
+}
+
+void IntrinsicLocationsBuilderARM::VisitDoubleIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) {
+  ArmAssembler* const assembler = GetAssembler();
+  LocationSummary* const locations = invoke->GetLocations();
+  const Register out = locations->Out().AsRegister<Register>();
+  // The highest 32 bits of double precision positive infinity separated into
+  // two constants encodable as immediate operands.
+  constexpr uint32_t infinity_high  = 0x7f000000U;
+  constexpr uint32_t infinity_high2 = 0x00f00000U;
+
+  static_assert((infinity_high | infinity_high2) == static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
+                "The constants do not add up to the high 32 bits of double precision positive infinity.");
+  __ vmovrrd(IP, out, FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
+  __ eor(out, out, ShifterOperand(infinity_high));
+  __ eor(out, out, ShifterOperand(infinity_high2));
+  // We don't care about the sign bit, so shift left.
+  __ orr(out, IP, ShifterOperand(out, LSL, 1));
+  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+  __ clz(out, out);
+  // Any number less than 32 logically shifted right by 5 bits results in 0;
+  // the same operation on 32 yields 1.
+  __ Lsr(out, out, 5);
+}
+
+UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
+UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat)
+UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)
+UNIMPLEMENTED_INTRINSIC(ARM, MathMaxFloatFloat)
+UNIMPLEMENTED_INTRINSIC(ARM, MathMinLongLong)
+UNIMPLEMENTED_INTRINSIC(ARM, MathMaxLongLong)
+UNIMPLEMENTED_INTRINSIC(ARM, MathCeil)          // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(ARM, MathFloor)         // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(ARM, MathRint)
+UNIMPLEMENTED_INTRINSIC(ARM, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(ARM, MathRoundFloat)    // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(ARM, UnsafeCASLong)     // High register pressure.
+UNIMPLEMENTED_INTRINSIC(ARM, SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(ARM, ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(ARM, IntegerHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(ARM, LongHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(ARM, LongLowestOneBit)
+
+// 1.8.
+UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndAddInt)
+UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndAddLong)
+UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndSetInt)
+UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndSetLong)
+UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndSetObject)
+
+UNREACHABLE_INTRINSICS(ARM)
 
 #undef __
 
diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h
index 127e9a4..e01b6ff 100644
--- a/compiler/optimizing/intrinsics_arm.h
+++ b/compiler/optimizing/intrinsics_arm.h
@@ -40,7 +40,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -67,7 +67,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 059abf0..91374b3 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -26,12 +26,15 @@
 #include "mirror/string.h"
 #include "thread.h"
 #include "utils/arm64/assembler_arm64.h"
-#include "utils/arm64/constants_arm64.h"
 
-#include "vixl/a64/disasm-a64.h"
-#include "vixl/a64/macro-assembler-a64.h"
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
 
-using namespace vixl;   // NOLINT(build/namespaces)
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#pragma GCC diagnostic pop
 
 namespace art {
 
@@ -46,6 +49,8 @@
 using helpers::SRegisterFrom;
 using helpers::WRegisterFrom;
 using helpers::XRegisterFrom;
+using helpers::InputRegisterAt;
+using helpers::OutputRegister;
 
 namespace {
 
@@ -55,15 +60,15 @@
 
 }  // namespace
 
-vixl::MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
-  return codegen_->GetAssembler()->vixl_masm_;
+MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
+  return codegen_->GetVIXLAssembler();
 }
 
 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
   return codegen_->GetGraph()->GetArena();
 }
 
-#define __ codegen->GetAssembler()->vixl_masm_->
+#define __ codegen->GetVIXLAssembler()->
 
 static void MoveFromReturnRegister(Location trg,
                                    Primitive::Type type,
@@ -99,7 +104,8 @@
 //       restored!
 class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit IntrinsicSlowPathARM64(HInvoke* invoke) : invoke_(invoke) { }
+  explicit IntrinsicSlowPathARM64(HInvoke* invoke)
+      : SlowPathCodeARM64(invoke), invoke_(invoke) { }
 
   void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
     CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
@@ -138,12 +144,82 @@
   DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64);
 };
 
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
+      : SlowPathCodeARM64(instruction), tmp_(tmp) {
+    DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+    CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(instruction_->IsInvokeStaticOrDirect())
+        << "Unexpected instruction in read barrier arraycopy slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+    const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+
+    Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
+    Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
+    Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
+    Register tmp_reg = WRegisterFrom(tmp_);
+
+    __ Bind(GetEntryLabel());
+    vixl::aarch64::Label slow_copy_loop;
+    __ Bind(&slow_copy_loop);
+    __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
+    codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
+    // TODO: Inline the mark bit check before calling the runtime?
+    // tmp_reg = ReadBarrier::Mark(tmp_reg);
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
+    // explanations.)
+    DCHECK_NE(tmp_.reg(), LR);
+    DCHECK_NE(tmp_.reg(), WSP);
+    DCHECK_NE(tmp_.reg(), WZR);
+    // IP0 is used internally by the ReadBarrierMarkRegX entry point
+    // as a temporary (and not preserved).  It thus cannot be used by
+    // any live register in this slow path.
+    DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
+    DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
+    DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
+    DCHECK_NE(tmp_.reg(), IP0);
+    DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
+    // This runtime call does not require a stack map.
+    codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
+    __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
+    __ Cmp(src_curr_addr, src_stop_addr);
+    __ B(&slow_copy_loop, ne);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
+
+ private:
+  Location tmp_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
+};
 #undef __
 
 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
   Dispatch(invoke);
   LocationSummary* res = invoke->GetLocations();
-  return res != nullptr && res->Intrinsified();
+  if (res == nullptr) {
+    return false;
+  }
+  return res->Intrinsified();
 }
 
 #define __ masm->
@@ -164,14 +240,14 @@
   locations->SetOut(Location::RequiresFpuRegister());
 }
 
-static void MoveFPToInt(LocationSummary* locations, bool is64bit, vixl::MacroAssembler* masm) {
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
   Location input = locations->InAt(0);
   Location output = locations->Out();
   __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
           is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
 }
 
-static void MoveIntToFP(LocationSummary* locations, bool is64bit, vixl::MacroAssembler* masm) {
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
   Location input = locations->InAt(0);
   Location output = locations->Out();
   __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
@@ -186,10 +262,10 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), true, GetVIXLAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
 }
 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), true, GetVIXLAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
@@ -200,10 +276,10 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), false, GetVIXLAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
 }
 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), false, GetVIXLAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
 }
 
 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -216,7 +292,7 @@
 
 static void GenReverseBytes(LocationSummary* locations,
                             Primitive::Type type,
-                            vixl::MacroAssembler* masm) {
+                            MacroAssembler* masm) {
   Location in = locations->InAt(0);
   Location out = locations->Out();
 
@@ -259,9 +335,18 @@
   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetVIXLAssembler());
 }
 
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
 static void GenNumberOfLeadingZeros(LocationSummary* locations,
                                     Primitive::Type type,
-                                    vixl::MacroAssembler* masm) {
+                                    MacroAssembler* masm) {
   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
   Location in = locations->InAt(0);
@@ -288,7 +373,7 @@
 
 static void GenNumberOfTrailingZeros(LocationSummary* locations,
                                      Primitive::Type type,
-                                     vixl::MacroAssembler* masm) {
+                                     MacroAssembler* masm) {
   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
   Location in = locations->InAt(0);
@@ -314,106 +399,9 @@
   GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
 }
 
-static void GenRotateRight(LocationSummary* locations,
-                           Primitive::Type type,
-                           vixl::MacroAssembler* masm) {
-  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
-  Location in = locations->InAt(0);
-  Location out = locations->Out();
-  Operand rhs = OperandFrom(locations->InAt(1), type);
-
-  if (rhs.IsImmediate()) {
-    uint32_t shift = rhs.immediate() & (RegisterFrom(in, type).SizeInBits() - 1);
-    __ Ror(RegisterFrom(out, type),
-           RegisterFrom(in, type),
-           shift);
-  } else {
-    DCHECK(rhs.shift() == vixl::LSL && rhs.shift_amount() == 0);
-    __ Ror(RegisterFrom(out, type),
-           RegisterFrom(in, type),
-           rhs.reg());
-  }
-}
-
-void IntrinsicLocationsBuilderARM64::VisitIntegerRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerRotateRight(HInvoke* invoke) {
-  GenRotateRight(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitLongRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitLongRotateRight(HInvoke* invoke) {
-  GenRotateRight(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
-}
-
-static void GenRotateLeft(LocationSummary* locations,
-                           Primitive::Type type,
-                           vixl::MacroAssembler* masm) {
-  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
-  Location in = locations->InAt(0);
-  Location out = locations->Out();
-  Operand rhs = OperandFrom(locations->InAt(1), type);
-
-  if (rhs.IsImmediate()) {
-    uint32_t regsize = RegisterFrom(in, type).SizeInBits();
-    uint32_t shift = (regsize - rhs.immediate()) & (regsize - 1);
-    __ Ror(RegisterFrom(out, type), RegisterFrom(in, type), shift);
-  } else {
-    DCHECK(rhs.shift() == vixl::LSL && rhs.shift_amount() == 0);
-    __ Neg(RegisterFrom(out, type),
-           Operand(RegisterFrom(locations->InAt(1), type)));
-    __ Ror(RegisterFrom(out, type),
-           RegisterFrom(in, type),
-           RegisterFrom(out, type));
-  }
-}
-
-void IntrinsicLocationsBuilderARM64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  GenRotateLeft(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitLongRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitLongRotateLeft(HInvoke* invoke) {
-  GenRotateLeft(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
-}
-
 static void GenReverse(LocationSummary* locations,
                        Primitive::Type type,
-                       vixl::MacroAssembler* masm) {
+                       MacroAssembler* masm) {
   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
   Location in = locations->InAt(0);
@@ -438,6 +426,39 @@
   GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
 }
 
+static void GenBitCount(HInvoke* instr, Primitive::Type type, MacroAssembler* masm) {
+  DCHECK(Primitive::IsIntOrLongType(type)) << type;
+  DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
+  DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
+
+  UseScratchRegisterScope temps(masm);
+
+  Register src = InputRegisterAt(instr, 0);
+  Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
+  FPRegister fpr = (type == Primitive::kPrimLong) ? temps.AcquireD() : temps.AcquireS();
+
+  __ Fmov(fpr, src);
+  __ Cnt(fpr.V8B(), fpr.V8B());
+  __ Addv(fpr.B(), fpr.V8B());
+  __ Fmov(dst, fpr);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, Primitive::kPrimLong, GetVIXLAssembler());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, Primitive::kPrimInt, GetVIXLAssembler());
+}
+
 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
@@ -446,7 +467,7 @@
   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
 }
 
-static void MathAbsFP(LocationSummary* locations, bool is64bit, vixl::MacroAssembler* masm) {
+static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
   Location in = locations->InAt(0);
   Location out = locations->Out();
 
@@ -461,7 +482,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetVIXLAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) {
@@ -469,7 +490,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetVIXLAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
 }
 
 static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
@@ -482,7 +503,7 @@
 
 static void GenAbsInteger(LocationSummary* locations,
                           bool is64bit,
-                          vixl::MacroAssembler* masm) {
+                          MacroAssembler* masm) {
   Location in = locations->InAt(0);
   Location output = locations->Out();
 
@@ -498,7 +519,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), false, GetVIXLAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) {
@@ -506,13 +527,13 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), true, GetVIXLAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
 }
 
 static void GenMinMaxFP(LocationSummary* locations,
                         bool is_min,
                         bool is_double,
-                        vixl::MacroAssembler* masm) {
+                        MacroAssembler* masm) {
   Location op1 = locations->InAt(0);
   Location op2 = locations->InAt(1);
   Location out = locations->Out();
@@ -541,7 +562,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, true, GetVIXLAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
@@ -549,7 +570,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, false, GetVIXLAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
@@ -557,7 +578,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, true, GetVIXLAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
@@ -565,13 +586,14 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, false, GetVIXLAssembler());
+  GenMinMaxFP(
+      invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler());
 }
 
 static void GenMinMax(LocationSummary* locations,
                       bool is_min,
                       bool is_long,
-                      vixl::MacroAssembler* masm) {
+                      MacroAssembler* masm) {
   Location op1 = locations->InAt(0);
   Location op2 = locations->InAt(1);
   Location out = locations->Out();
@@ -584,21 +606,12 @@
   __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt);
 }
 
-static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
-  LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
 void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) {
   CreateIntIntToIntLocations(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, false, GetVIXLAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) {
@@ -606,7 +619,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, true, GetVIXLAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) {
@@ -614,7 +627,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, false, GetVIXLAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) {
@@ -622,7 +635,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, true, GetVIXLAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
@@ -631,7 +644,7 @@
 
 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
 }
 
@@ -641,7 +654,7 @@
 
 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
 }
 
@@ -651,7 +664,7 @@
 
 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
 }
 
@@ -661,52 +674,70 @@
 
 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
 }
 
-static void CreateFPToIntPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresFpuRegister());
   locations->SetOut(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresFpuRegister());
 }
 
-static void GenMathRound(LocationSummary* locations,
-                         bool is_double,
-                         vixl::MacroAssembler* masm) {
-  FPRegister in_reg = is_double ?
-      DRegisterFrom(locations->InAt(0)) : SRegisterFrom(locations->InAt(0));
-  Register out_reg = is_double ?
-      XRegisterFrom(locations->Out()) : WRegisterFrom(locations->Out());
-  UseScratchRegisterScope temps(masm);
-  FPRegister temp1_reg = temps.AcquireSameSizeAs(in_reg);
+static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
+  // Java 8 API definition for Math.round():
+  // Return the closest long or int to the argument, with ties rounding to positive infinity.
+  //
+  // There is no single instruction in ARMv8 that can support the above definition.
+  // We choose to use FCVTAS here, because it has closest semantic.
+  // FCVTAS performs rounding to nearest integer, ties away from zero.
+  // For most inputs (positive values, zero or NaN), this instruction is enough.
+  // We only need a few handling code after FCVTAS if the input is negative half value.
+  //
+  // The reason why we didn't choose FCVTPS instruction here is that
+  // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
+  // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
+  // If we were using this instruction, for most inputs, more handling code would be needed.
+  LocationSummary* l = invoke->GetLocations();
+  FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
+  FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
+  Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
+  vixl::aarch64::Label done;
 
-  // 0.5 can be encoded as an immediate, so use fmov.
-  if (is_double) {
-    __ Fmov(temp1_reg, static_cast<double>(0.5));
-  } else {
-    __ Fmov(temp1_reg, static_cast<float>(0.5));
-  }
-  __ Fadd(temp1_reg, in_reg, temp1_reg);
-  __ Fcvtms(out_reg, temp1_reg);
+  // Round to nearest integer, ties away from zero.
+  __ Fcvtas(out_reg, in_reg);
+
+  // For positive values, zero or NaN inputs, rounding is done.
+  __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
+
+  // Handle input < 0 cases.
+  // If input is negative but not a tie, previous result (round to nearest) is valid.
+  // If input is a negative tie, out_reg += 1.
+  __ Frinta(tmp_fp, in_reg);
+  __ Fsub(tmp_fp, in_reg, tmp_fp);
+  __ Fcmp(tmp_fp, 0.5);
+  __ Cinc(out_reg, out_reg, eq);
+
+  __ Bind(&done);
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
-  CreateFPToIntPlusTempLocations(arena_, invoke);
+  CreateFPToIntPlusFPTempLocations(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
-  GenMathRound(invoke->GetLocations(), true, GetVIXLAssembler());
+  GenMathRound(invoke, /* is_double */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
-  CreateFPToIntPlusTempLocations(arena_, invoke);
+  CreateFPToIntPlusFPTempLocations(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
-  GenMathRound(invoke->GetLocations(), false, GetVIXLAssembler());
+  GenMathRound(invoke, /* is_double */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
@@ -714,7 +745,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -724,7 +755,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -734,7 +765,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -744,7 +775,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
            AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -762,7 +793,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -772,7 +803,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -782,7 +813,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -792,7 +823,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -806,7 +837,7 @@
 
 void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
   codegen_->Load(Primitive::kPrimNot, WRegisterFrom(invoke->GetLocations()->Out()),
-                 MemOperand(tr, Thread::PeerOffset<8>().Int32Value()));
+                 MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
 }
 
 static void GenUnsafeGet(HInvoke* invoke,
@@ -817,38 +848,57 @@
   DCHECK((type == Primitive::kPrimInt) ||
          (type == Primitive::kPrimLong) ||
          (type == Primitive::kPrimNot));
-  vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
-  Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
-  Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
-  Register trg = RegisterFrom(locations->Out(), type);
-  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
+  MacroAssembler* masm = codegen->GetVIXLAssembler();
+  Location base_loc = locations->InAt(1);
+  Register base = WRegisterFrom(base_loc);      // Object pointer.
+  Location offset_loc = locations->InAt(2);
+  Register offset = XRegisterFrom(offset_loc);  // Long offset.
+  Location trg_loc = locations->Out();
+  Register trg = RegisterFrom(trg_loc, type);
 
-  MemOperand mem_op(base.X(), offset);
-  if (is_volatile) {
-    if (use_acquire_release) {
-      codegen->LoadAcquire(invoke, trg, mem_op);
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
+    UseScratchRegisterScope temps(masm);
+    Register temp = temps.AcquireW();
+    codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
+                                                       trg_loc,
+                                                       base,
+                                                       /* offset */ 0U,
+                                                       /* index */ offset_loc,
+                                                       /* scale_factor */ 0U,
+                                                       temp,
+                                                       /* needs_null_check */ false,
+                                                       is_volatile);
+  } else {
+    // Other cases.
+    MemOperand mem_op(base.X(), offset);
+    if (is_volatile) {
+      codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
     } else {
       codegen->Load(type, trg, mem_op);
-      __ Dmb(InnerShareable, BarrierReads);
     }
-  } else {
-    codegen->Load(type, trg, mem_op);
-  }
 
-  if (type == Primitive::kPrimNot) {
-    DCHECK(trg.IsW());
-    codegen->GetAssembler()->MaybeUnpoisonHeapReference(trg);
+    if (type == Primitive::kPrimNot) {
+      DCHECK(trg.IsW());
+      codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+    }
   }
 }
 
 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           can_call ?
+                                                               LocationSummary::kCallOnSlowPath :
+                                                               LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  locations->SetOut(Location::RequiresRegister(),
+                    can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
 }
 
 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
@@ -871,22 +921,22 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 
 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) {
@@ -932,14 +982,12 @@
                          bool is_volatile,
                          bool is_ordered,
                          CodeGeneratorARM64* codegen) {
-  vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+  MacroAssembler* masm = codegen->GetVIXLAssembler();
 
   Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
   Register value = RegisterFrom(locations->InAt(3), type);
   Register source = value;
-  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
-
   MemOperand mem_op(base.X(), offset);
 
   {
@@ -956,15 +1004,7 @@
     }
 
     if (is_volatile || is_ordered) {
-      if (use_acquire_release) {
-        codegen->StoreRelease(type, source, mem_op);
-      } else {
-        __ Dmb(InnerShareable, BarrierAll);
-        codegen->Store(type, source, mem_op);
-        if (is_volatile) {
-          __ Dmb(InnerShareable, BarrierReads);
-        }
-      }
+      codegen->StoreRelease(type, source, mem_op);
     } else {
       codegen->Store(type, source, mem_op);
     }
@@ -977,34 +1017,72 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 
-static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
+                                       HInvoke* invoke,
+                                       Primitive::Type type) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
@@ -1014,12 +1092,16 @@
   locations->SetInAt(3, Location::RequiresRegister());
   locations->SetInAt(4, Location::RequiresRegister());
 
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  // If heap poisoning is enabled, we don't want the unpoisoning
+  // operations to potentially clobber the output.
+  Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot)
+      ? Location::kOutputOverlap
+      : Location::kNoOutputOverlap;
+  locations->SetOut(Location::RequiresRegister(), overlaps);
 }
 
 static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
-  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
-  vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+  MacroAssembler* masm = codegen->GetVIXLAssembler();
 
   Register out = WRegisterFrom(locations->Out());                  // Boolean result.
 
@@ -1045,7 +1127,12 @@
 
   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
     codegen->GetAssembler()->PoisonHeapReference(expected);
-    codegen->GetAssembler()->PoisonHeapReference(value);
+    if (value.Is(expected)) {
+      // Do not poison `value`, as it is the same register as
+      // `expected`, which has just been poisoned.
+    } else {
+      codegen->GetAssembler()->PoisonHeapReference(value);
+    }
   }
 
   // do {
@@ -1053,49 +1140,54 @@
   // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
   // result = tmp_value != 0;
 
-  vixl::Label loop_head, exit_loop;
-  if (use_acquire_release) {
-    __ Bind(&loop_head);
-    __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
-    __ Cmp(tmp_value, expected);
-    __ B(&exit_loop, ne);
-    __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
-    __ Cbnz(tmp_32, &loop_head);
-  } else {
-    __ Dmb(InnerShareable, BarrierWrites);
-    __ Bind(&loop_head);
-    __ Ldxr(tmp_value, MemOperand(tmp_ptr));
-    __ Cmp(tmp_value, expected);
-    __ B(&exit_loop, ne);
-    __ Stxr(tmp_32, value, MemOperand(tmp_ptr));
-    __ Cbnz(tmp_32, &loop_head);
-    __ Dmb(InnerShareable, BarrierAll);
-  }
+  vixl::aarch64::Label loop_head, exit_loop;
+  __ Bind(&loop_head);
+  // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+  // the reference stored in the object before attempting the CAS,
+  // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+  // implementation.
+  //
+  // Note that this code is not (yet) used when read barriers are
+  // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
+  DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
+  __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
+  __ Cmp(tmp_value, expected);
+  __ B(&exit_loop, ne);
+  __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
+  __ Cbnz(tmp_32, &loop_head);
   __ Bind(&exit_loop);
   __ Cset(out, eq);
 
   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
-    codegen->GetAssembler()->UnpoisonHeapReference(value);
     codegen->GetAssembler()->UnpoisonHeapReference(expected);
+    if (value.Is(expected)) {
+      // Do not unpoison `value`, as it is the same register as
+      // `expected`, which has just been unpoisoned.
+    } else {
+      codegen->GetAssembler()->UnpoisonHeapReference(value);
+    }
   }
 }
 
 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
-  CreateIntIntIntIntIntToInt(arena_, invoke);
+  CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
-  CreateIntIntIntIntIntToInt(arena_, invoke);
+  CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
-  // The UnsafeCASObject intrinsic does not always work when heap
-  // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
-  // off temporarily as a quick fix.
-  // TODO(rpl): Fix it and turn it back on.
-  if (kPoisonHeapReferences) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  if (kEmitCompilerReadBarrier) {
     return;
   }
 
-  CreateIntIntIntIntIntToInt(arena_, invoke);
+  CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimNot);
 }
 
 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
@@ -1105,86 +1197,134 @@
   GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  DCHECK(!kEmitCompilerReadBarrier);
+
   GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
 }
 
-void IntrinsicLocationsBuilderARM64::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  // In case we need to go in the slow path, we can't have the output be the same
-  // as the input: the current liveness analysis considers the input to be live
-  // at the point of the call.
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitStringCharAt(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
-  LocationSummary* locations = invoke->GetLocations();
-
-  // Location of reference to data array
-  const MemberOffset value_offset = mirror::String::ValueOffset();
-  // Location of count
-  const MemberOffset count_offset = mirror::String::CountOffset();
-
-  Register obj = WRegisterFrom(locations->InAt(0));  // String object pointer.
-  Register idx = WRegisterFrom(locations->InAt(1));  // Index of character.
-  Register out = WRegisterFrom(locations->Out());    // Result character.
-
-  UseScratchRegisterScope temps(masm);
-  Register temp = temps.AcquireW();
-  Register array_temp = temps.AcquireW();            // We can trade this for worse scheduling.
-
-  // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
-  //       the cost.
-  // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
-  //       we will not optimize the code for constants (which would save a register).
-
-  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  __ Ldr(temp, HeapOperand(obj, count_offset));          // temp = str.length.
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ Cmp(idx, temp);
-  __ B(hs, slow_path->GetEntryLabel());
-
-  __ Add(array_temp, obj, Operand(value_offset.Int32Value()));  // array_temp := str.value.
-
-  // Load the value.
-  __ Ldrh(out, MemOperand(array_temp.X(), idx, UXTW, 1));  // out := array_temp[idx].
-
-  __ Bind(slow_path->GetExitLabel());
-}
-
 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            invoke->InputAt(1)->CanBeNull()
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall,
                                                             kIntrinsified);
-  InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
-  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
+  Register str = InputRegisterAt(invoke, 0);
+  Register arg = InputRegisterAt(invoke, 1);
+  DCHECK(str.IsW());
+  DCHECK(arg.IsW());
+  Register out = OutputRegister(invoke);
+
+  Register temp0 = WRegisterFrom(locations->GetTemp(0));
+  Register temp1 = WRegisterFrom(locations->GetTemp(1));
+  Register temp2 = WRegisterFrom(locations->GetTemp(2));
+
+  vixl::aarch64::Label loop;
+  vixl::aarch64::Label find_char_diff;
+  vixl::aarch64::Label end;
+
+  // Get offsets of count and value fields within a string object.
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  Register argument = WRegisterFrom(locations->InAt(1));
-  __ Cmp(argument, 0);
-  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
-  codegen_->AddSlowPath(slow_path);
-  __ B(eq, slow_path->GetEntryLabel());
+  // Take slow path and throw if input can be and is null.
+  SlowPathCodeARM64* slow_path = nullptr;
+  const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
+  if (can_slow_path) {
+    slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+    codegen_->AddSlowPath(slow_path);
+    __ Cbz(arg, slow_path->GetEntryLabel());
+  }
 
-  __ Ldr(
-      lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pStringCompareTo).Int32Value()));
-  __ Blr(lr);
-  __ Bind(slow_path->GetExitLabel());
+  // Reference equality check, return 0 if same reference.
+  __ Subs(out, str, arg);
+  __ B(&end, eq);
+  // Load lengths of this and argument strings.
+  __ Ldr(temp0, HeapOperand(str, count_offset));
+  __ Ldr(temp1, HeapOperand(arg, count_offset));
+  // Return zero if both strings are empty.
+  __ Orr(out, temp0, temp1);
+  __ Cbz(out, &end);
+  // out = length diff.
+  __ Subs(out, temp0, temp1);
+  // temp2 = min(len(str), len(arg)).
+  __ Csel(temp2, temp1, temp0, ge);
+  // Shorter string is empty?
+  __ Cbz(temp2, &end);
+
+  // Store offset of string value in preparation for comparison loop.
+  __ Mov(temp1, value_offset);
+
+  UseScratchRegisterScope scratch_scope(masm);
+  Register temp4 = scratch_scope.AcquireX();
+
+  // Assertions that must hold in order to compare strings 4 characters at a time.
+  DCHECK_ALIGNED(value_offset, 8);
+  static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
+
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  // Promote temp0 to an X reg, ready for LDR.
+  temp0 = temp0.X();
+
+  // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
+  __ Bind(&loop);
+  __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
+  __ Ldr(temp0, MemOperand(arg.X(), temp1.X()));
+  __ Cmp(temp4, temp0);
+  __ B(ne, &find_char_diff);
+  __ Add(temp1, temp1, char_size * 4);
+  __ Subs(temp2, temp2, 4);
+  __ B(gt, &loop);
+  __ B(&end);
+
+  // Promote temp1 to an X reg, ready for EOR.
+  temp1 = temp1.X();
+
+  // Find the single 16-bit character difference.
+  __ Bind(&find_char_diff);
+  // Get the bit position of the first character that differs.
+  __ Eor(temp1, temp0, temp4);
+  __ Rbit(temp1, temp1);
+  __ Clz(temp1, temp1);
+  // If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then
+  // the difference occurs outside the remaining string data, so just return length diff (out).
+  __ Cmp(temp2, Operand(temp1.W(), LSR, 4));
+  __ B(le, &end);
+  // Extract the characters and calculate the difference.
+  __ Bic(temp1, temp1, 0xf);
+  __ Lsr(temp0, temp0, temp1);
+  __ Lsr(temp4, temp4, temp1);
+  __ And(temp4, temp4, 0xffff);
+  __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH));
+
+  __ Bind(&end);
+
+  if (can_slow_path) {
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
@@ -1201,7 +1341,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   Register str = WRegisterFrom(locations->InAt(0));
@@ -1213,10 +1353,10 @@
   Register temp1 = WRegisterFrom(locations->GetTemp(0));
   Register temp2 = WRegisterFrom(locations->GetTemp(1));
 
-  vixl::Label loop;
-  vixl::Label end;
-  vixl::Label return_true;
-  vixl::Label return_false;
+  vixl::aarch64::Label loop;
+  vixl::aarch64::Label end;
+  vixl::aarch64::Label return_true;
+  vixl::aarch64::Label return_false;
 
   // Get offsets of count, value, and class fields within a string object.
   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
@@ -1226,21 +1366,26 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check if input is null, return false if it is.
-  __ Cbz(arg, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ Cbz(arg, &return_false);
+  }
 
   // Reference equality check, return true if same reference.
   __ Cmp(str, arg);
   __ B(&return_true, eq);
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ Ldr(temp, MemOperand(str.X(), class_offset));
-  __ Ldr(temp1, MemOperand(arg.X(), class_offset));
-  __ Cmp(temp, temp1);
-  __ B(&return_false, ne);
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ Ldr(temp, MemOperand(str.X(), class_offset));
+    __ Ldr(temp1, MemOperand(arg.X(), class_offset));
+    __ Cmp(temp, temp1);
+    __ B(&return_false, ne);
+  }
 
   // Load lengths of this and argument strings.
   __ Ldr(temp, MemOperand(str.X(), count_offset));
@@ -1284,21 +1429,21 @@
 }
 
 static void GenerateVisitStringIndexOf(HInvoke* invoke,
-                                       vixl::MacroAssembler* masm,
+                                       MacroAssembler* masm,
                                        CodeGeneratorARM64* codegen,
                                        ArenaAllocator* allocator,
                                        bool start_at_zero) {
   LocationSummary* locations = invoke->GetLocations();
-  Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
 
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
-  // or directly dispatch if we have a constant.
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCodeARM64* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
-    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > 0xFFFFU) {
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
+    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
       slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
@@ -1307,21 +1452,22 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     Register char_reg = WRegisterFrom(locations->InAt(1));
-    __ Mov(tmp_reg, 0xFFFF);
-    __ Cmp(char_reg, Operand(tmp_reg));
+    __ Tst(char_reg, 0xFFFF0000);
     slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
     codegen->AddSlowPath(slow_path);
-    __ B(hi, slow_path->GetEntryLabel());
+    __ B(ne, slow_path->GetEntryLabel());
   }
 
   if (start_at_zero) {
     // Start-index = 0.
+    Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
     __ Mov(tmp_reg, 0);
   }
 
-  __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pIndexOf).Int32Value()));
+  __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pIndexOf).Int32Value()));
+  CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
   __ Blr(lr);
 
   if (slow_path != nullptr) {
@@ -1331,7 +1477,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1340,17 +1486,18 @@
   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
 
-  // Need a temp for slow-path codepoint compare, and need to send start_index=0.
+  // Need to send start_index=0.
   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
-  GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), true);
+  GenerateVisitStringIndexOf(
+      invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
 }
 
 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1359,18 +1506,16 @@
   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
-
-  // Need a temp for slow-path codepoint compare.
-  locations->AddTemp(Location::RequiresRegister());
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
-  GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), false);
+  GenerateVisitStringIndexOf(
+      invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
 }
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1381,7 +1526,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   Register byte_array = WRegisterFrom(locations->InAt(0));
@@ -1391,15 +1536,17 @@
   __ B(eq, slow_path->GetEntryLabel());
 
   __ Ldr(lr,
-      MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromBytes).Int32Value()));
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+      MemOperand(tr,
+                 QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pAllocStringFromBytes).Int32Value()));
+  CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   __ Blr(lr);
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1409,28 +1556,33 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
 
+  // No need to emit code checking whether `locations->InAt(2)` is a null
+  // pointer, as callers of the native method
+  //
+  //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
+  //
+  // all include a null check on `data` before calling that method.
   __ Ldr(lr,
-      MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromChars).Int32Value()));
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+      MemOperand(tr,
+                 QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pAllocStringFromChars).Int32Value()));
+  CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   __ Blr(lr);
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
-  // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
-  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   Register string_to_copy = WRegisterFrom(locations->InAt(0));
@@ -1440,26 +1592,1070 @@
   __ B(eq, slow_path->GetEntryLabel());
 
   __ Ldr(lr,
-      MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromString).Int32Value()));
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+      MemOperand(tr,
+                 QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pAllocStringFromString).Int32Value()));
+  CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   __ Blr(lr);
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
 
-// Unimplemented intrinsics.
+static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
+  DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType()));
+  DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
 
-#define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
-void IntrinsicLocationsBuilderARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
-}                                                                                      \
-void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
+  LocationSummary* const locations = new (arena) LocationSummary(invoke,
+                                                                 LocationSummary::kCallOnMainOnly,
+                                                                 kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+
+  locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
 }
 
-UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
-UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
+  DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType()));
+  DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(1)->GetType()));
+  DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
 
-#undef UNIMPLEMENTED_INTRINSIC
+  LocationSummary* const locations = new (arena) LocationSummary(invoke,
+                                                                 LocationSummary::kCallOnMainOnly,
+                                                                 kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+
+  locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
+  locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
+}
+
+static void GenFPToFPCall(HInvoke* invoke,
+                          MacroAssembler* masm,
+                          CodeGeneratorARM64* codegen,
+                          QuickEntrypointEnum entry) {
+  __ Ldr(lr, MemOperand(tr,
+                        GetThreadOffset<kArm64PointerSize>(entry).Int32Value()));
+  __ Blr(lr);
+  codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCos);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickSin);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAcos);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAsin);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAtan);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCbrt);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCosh);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickExp);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickExpm1);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickLog);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickLog10);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickSinh);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickTan);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickTanh);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAtan2);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickHypot);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickNextAfter);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+  locations->SetInAt(4, Location::RequiresRegister());
+
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
+  MacroAssembler* masm = GetVIXLAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Check assumption that sizeof(Char) is 2 (used in scaling below).
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  // Location of data in char array buffer.
+  const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
+
+  // Location of char array data in string.
+  const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+
+  // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
+  // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
+  Register srcObj = XRegisterFrom(locations->InAt(0));
+  Register srcBegin = XRegisterFrom(locations->InAt(1));
+  Register srcEnd = XRegisterFrom(locations->InAt(2));
+  Register dstObj = XRegisterFrom(locations->InAt(3));
+  Register dstBegin = XRegisterFrom(locations->InAt(4));
+
+  Register src_ptr = XRegisterFrom(locations->GetTemp(0));
+  Register num_chr = XRegisterFrom(locations->GetTemp(1));
+  Register tmp1 = XRegisterFrom(locations->GetTemp(2));
+
+  UseScratchRegisterScope temps(masm);
+  Register dst_ptr = temps.AcquireX();
+  Register tmp2 = temps.AcquireX();
+
+  // src address to copy from.
+  __ Add(src_ptr, srcObj, Operand(value_offset));
+  __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
+
+  // dst address start to copy to.
+  __ Add(dst_ptr, dstObj, Operand(data_offset));
+  __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
+
+  __ Sub(num_chr, srcEnd, srcBegin);
+
+  // Do the copy.
+  vixl::aarch64::Label loop;
+  vixl::aarch64::Label done;
+  vixl::aarch64::Label remainder;
+
+  // Early out for valid zero-length retrievals.
+  __ Cbz(num_chr, &done);
+
+  // Save repairing the value of num_chr on the < 8 character path.
+  __ Subs(tmp1, num_chr, 8);
+  __ B(lt, &remainder);
+
+  // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
+  __ Mov(num_chr, tmp1);
+
+  // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
+  // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
+  __ Bind(&loop);
+  __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
+  __ Subs(num_chr, num_chr, 8);
+  __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
+  __ B(ge, &loop);
+
+  __ Adds(num_chr, num_chr, 8);
+  __ B(eq, &done);
+
+  // Main loop for < 8 character case and remainder handling. Loads and stores one
+  // 16-bit Java character at a time.
+  __ Bind(&remainder);
+  __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
+  __ Subs(num_chr, num_chr, 1);
+  __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
+  __ B(gt, &remainder);
+
+  __ Bind(&done);
+}
+
+// Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
+// implementation there for longer copy lengths.
+static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
+
+static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
+                                               uint32_t at,
+                                               HInstruction* input) {
+  HIntConstant* const_input = input->AsIntConstant();
+  if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
+    locations->SetInAt(at, Location::RequiresRegister());
+  } else {
+    locations->SetInAt(at, Location::RegisterOrConstant(input));
+  }
+}
+
+void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
+  // Check to see if we have known failures that will cause us to have to bail out
+  // to the runtime, and just generate the runtime call directly.
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
+
+  // The positions must be non-negative.
+  if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
+      (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
+    // We will have to fail anyways.
+    return;
+  }
+
+  // The length must be >= 0 and not so long that we would (currently) prefer libcore's
+  // native implementation.
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+  if (length != nullptr) {
+    int32_t len = length->GetValue();
+    if (len < 0 || len > kSystemArrayCopyCharThreshold) {
+      // Just call as normal.
+      return;
+    }
+  }
+
+  ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
+  LocationSummary* locations = new (allocator) LocationSummary(invoke,
+                                                               LocationSummary::kCallOnSlowPath,
+                                                               kIntrinsified);
+  // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
+  locations->SetInAt(0, Location::RequiresRegister());
+  SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
+  locations->SetInAt(2, Location::RequiresRegister());
+  SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
+  SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
+
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
+                                         const Location& pos,
+                                         const Register& input,
+                                         const Location& length,
+                                         SlowPathCodeARM64* slow_path,
+                                         const Register& temp,
+                                         bool length_is_input_length = false) {
+  const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
+  if (pos.IsConstant()) {
+    int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
+    if (pos_const == 0) {
+      if (!length_is_input_length) {
+        // Check that length(input) >= length.
+        __ Ldr(temp, MemOperand(input, length_offset));
+        __ Cmp(temp, OperandFrom(length, Primitive::kPrimInt));
+        __ B(slow_path->GetEntryLabel(), lt);
+      }
+    } else {
+      // Check that length(input) >= pos.
+      __ Ldr(temp, MemOperand(input, length_offset));
+      __ Subs(temp, temp, pos_const);
+      __ B(slow_path->GetEntryLabel(), lt);
+
+      // Check that (length(input) - pos) >= length.
+      __ Cmp(temp, OperandFrom(length, Primitive::kPrimInt));
+      __ B(slow_path->GetEntryLabel(), lt);
+    }
+  } else if (length_is_input_length) {
+    // The only way the copy can succeed is if pos is zero.
+    __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel());
+  } else {
+    // Check that pos >= 0.
+    Register pos_reg = WRegisterFrom(pos);
+    __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
+
+    // Check that pos <= length(input) && (length(input) - pos) >= length.
+    __ Ldr(temp, MemOperand(input, length_offset));
+    __ Subs(temp, temp, pos_reg);
+    // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt).
+    __ Ccmp(temp, OperandFrom(length, Primitive::kPrimInt), NFlag, ge);
+    __ B(slow_path->GetEntryLabel(), lt);
+  }
+}
+
+// Compute base source address, base destination address, and end source address
+// for System.arraycopy* intrinsics.
+static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
+                                        Primitive::Type type,
+                                        const Register& src,
+                                        const Location& src_pos,
+                                        const Register& dst,
+                                        const Location& dst_pos,
+                                        const Location& copy_length,
+                                        const Register& src_base,
+                                        const Register& dst_base,
+                                        const Register& src_end) {
+  DCHECK(type == Primitive::kPrimNot || type == Primitive::kPrimChar)
+      << "Unexpected element type: " << type;
+  const int32_t element_size = Primitive::ComponentSize(type);
+  const int32_t element_size_shift = Primitive::ComponentSizeShift(type);
+
+  uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
+  if (src_pos.IsConstant()) {
+    int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ Add(src_base, src, element_size * constant + data_offset);
+  } else {
+    __ Add(src_base, src, data_offset);
+    __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift));
+  }
+
+  if (dst_pos.IsConstant()) {
+    int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ Add(dst_base, dst, element_size * constant + data_offset);
+  } else {
+    __ Add(dst_base, dst, data_offset);
+    __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift));
+  }
+
+  if (copy_length.IsConstant()) {
+    int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
+    __ Add(src_end, src_base, element_size * constant);
+  } else {
+    __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
+  }
+}
+
+void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
+  MacroAssembler* masm = GetVIXLAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+  Register src = XRegisterFrom(locations->InAt(0));
+  Location src_pos = locations->InAt(1);
+  Register dst = XRegisterFrom(locations->InAt(2));
+  Location dst_pos = locations->InAt(3);
+  Location length = locations->InAt(4);
+
+  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // If source and destination are the same, take the slow path. Overlapping copy regions must be
+  // copied in reverse and we can't know in all cases if it's needed.
+  __ Cmp(src, dst);
+  __ B(slow_path->GetEntryLabel(), eq);
+
+  // Bail out if the source is null.
+  __ Cbz(src, slow_path->GetEntryLabel());
+
+  // Bail out if the destination is null.
+  __ Cbz(dst, slow_path->GetEntryLabel());
+
+  if (!length.IsConstant()) {
+    // If the length is negative, bail out.
+    __ Tbnz(WRegisterFrom(length), kWRegSize - 1, slow_path->GetEntryLabel());
+    // If the length > 32 then (currently) prefer libcore's native implementation.
+    __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
+    __ B(slow_path->GetEntryLabel(), gt);
+  } else {
+    // We have already checked in the LocationsBuilder for the constant case.
+    DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
+    DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32);
+  }
+
+  Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
+  Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1));
+  Register src_stop_addr = WRegisterFrom(locations->GetTemp(2));
+
+  CheckSystemArrayCopyPosition(masm,
+                               src_pos,
+                               src,
+                               length,
+                               slow_path,
+                               src_curr_addr,
+                               false);
+
+  CheckSystemArrayCopyPosition(masm,
+                               dst_pos,
+                               dst,
+                               length,
+                               slow_path,
+                               src_curr_addr,
+                               false);
+
+  src_curr_addr = src_curr_addr.X();
+  dst_curr_addr = dst_curr_addr.X();
+  src_stop_addr = src_stop_addr.X();
+
+  GenSystemArrayCopyAddresses(masm,
+                              Primitive::kPrimChar,
+                              src,
+                              src_pos,
+                              dst,
+                              dst_pos,
+                              length,
+                              src_curr_addr,
+                              dst_curr_addr,
+                              src_stop_addr);
+
+  // Iterate over the arrays and do a raw copy of the chars.
+  const int32_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  UseScratchRegisterScope temps(masm);
+  Register tmp = temps.AcquireW();
+  vixl::aarch64::Label loop, done;
+  __ Bind(&loop);
+  __ Cmp(src_curr_addr, src_stop_addr);
+  __ B(&done, eq);
+  __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
+  __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
+  __ B(&loop);
+  __ Bind(&done);
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
+// We can choose to use the native implementation there for longer copy lengths.
+static constexpr int32_t kSystemArrayCopyThreshold = 128;
+
+// CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers.
+// We want to use two temporary registers in order to reduce the register pressure in arm64.
+// So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
+void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
+  // Check to see if we have known failures that will cause us to have to bail out
+  // to the runtime, and just generate the runtime call directly.
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+
+  // The positions must be non-negative.
+  if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
+      (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
+    // We will have to fail anyways.
+    return;
+  }
+
+  // The length must be >= 0.
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+  if (length != nullptr) {
+    int32_t len = length->GetValue();
+    if (len < 0 || len >= kSystemArrayCopyThreshold) {
+      // Just call as normal.
+      return;
+    }
+  }
+
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  if (optimizations.GetDestinationIsSource()) {
+    if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
+      // We only support backward copying if source and destination are the same.
+      return;
+    }
+  }
+
+  if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
+    // We currently don't intrinsify primitive copying.
+    return;
+  }
+
+  ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
+  LocationSummary* locations = new (allocator) LocationSummary(invoke,
+                                                               LocationSummary::kCallOnSlowPath,
+                                                               kIntrinsified);
+  // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
+  locations->SetInAt(0, Location::RequiresRegister());
+  SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
+  locations->SetInAt(2, Location::RequiresRegister());
+  SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
+  SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
+
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Temporary register IP0, obtained from the VIXL scratch register
+    // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
+    // (because that register is clobbered by ReadBarrierMarkRegX
+    // entry points). Get an extra temporary register from the
+    // register allocator.
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+  MacroAssembler* masm = GetVIXLAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  Register src = XRegisterFrom(locations->InAt(0));
+  Location src_pos = locations->InAt(1);
+  Register dest = XRegisterFrom(locations->InAt(2));
+  Location dest_pos = locations->InAt(3);
+  Location length = locations->InAt(4);
+  Register temp1 = WRegisterFrom(locations->GetTemp(0));
+  Location temp1_loc = LocationFrom(temp1);
+  Register temp2 = WRegisterFrom(locations->GetTemp(1));
+  Location temp2_loc = LocationFrom(temp2);
+
+  SlowPathCodeARM64* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+  codegen_->AddSlowPath(intrinsic_slow_path);
+
+  vixl::aarch64::Label conditions_on_positions_validated;
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  // If source and destination are the same, we go to slow path if we need to do
+  // forward copying.
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      if (optimizations.GetDestinationIsSource()) {
+        // Checked when building locations.
+        DCHECK_GE(src_pos_constant, dest_pos_constant);
+      } else if (src_pos_constant < dest_pos_constant) {
+        __ Cmp(src, dest);
+        __ B(intrinsic_slow_path->GetEntryLabel(), eq);
+      }
+      // Checked when building locations.
+      DCHECK(!optimizations.GetDestinationIsSource()
+             || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      if (!optimizations.GetDestinationIsSource()) {
+        __ Cmp(src, dest);
+        __ B(&conditions_on_positions_validated, ne);
+      }
+      __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
+      __ B(intrinsic_slow_path->GetEntryLabel(), gt);
+    }
+  } else {
+    if (!optimizations.GetDestinationIsSource()) {
+      __ Cmp(src, dest);
+      __ B(&conditions_on_positions_validated, ne);
+    }
+    __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
+           OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
+    __ B(intrinsic_slow_path->GetEntryLabel(), lt);
+  }
+
+  __ Bind(&conditions_on_positions_validated);
+
+  if (!optimizations.GetSourceIsNotNull()) {
+    // Bail out if the source is null.
+    __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
+    // Bail out if the destination is null.
+    __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  // We have already checked in the LocationsBuilder for the constant case.
+  if (!length.IsConstant() &&
+      !optimizations.GetCountIsSourceLength() &&
+      !optimizations.GetCountIsDestinationLength()) {
+    // If the length is negative, bail out.
+    __ Tbnz(WRegisterFrom(length), kWRegSize - 1, intrinsic_slow_path->GetEntryLabel());
+    // If the length >= 128 then (currently) prefer native implementation.
+    __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
+    __ B(intrinsic_slow_path->GetEntryLabel(), ge);
+  }
+  // Validity checks: source.
+  CheckSystemArrayCopyPosition(masm,
+                               src_pos,
+                               src,
+                               length,
+                               intrinsic_slow_path,
+                               temp1,
+                               optimizations.GetCountIsSourceLength());
+
+  // Validity checks: dest.
+  CheckSystemArrayCopyPosition(masm,
+                               dest_pos,
+                               dest,
+                               length,
+                               intrinsic_slow_path,
+                               temp1,
+                               optimizations.GetCountIsDestinationLength());
+  {
+    // We use a block to end the scratch scope before the write barrier, thus
+    // freeing the temporary registers so they can be used in `MarkGCCard`.
+    UseScratchRegisterScope temps(masm);
+    // Note: Because it is acquired from VIXL's scratch register pool,
+    // `temp3` might be IP0, and thus cannot be used as `ref` argument
+    // of CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
+    // calls below (see ReadBarrierMarkSlowPathARM64 for more details).
+    Register temp3 = temps.AcquireW();
+
+    if (!optimizations.GetDoesNotNeedTypeCheck()) {
+      // Check whether all elements of the source array are assignable to the component
+      // type of the destination array. We do two checks: the classes are the same,
+      // or the destination is Object[]. If none of these checks succeed, we go to the
+      // slow path.
+
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+          // /* HeapReference<Class> */ temp1 = src->klass_
+          codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+                                                          temp1_loc,
+                                                          src.W(),
+                                                          class_offset,
+                                                          temp2,
+                                                          /* needs_null_check */ false,
+                                                          /* use_load_acquire */ false);
+          // Bail out if the source is not a non primitive array.
+          // /* HeapReference<Class> */ temp1 = temp1->component_type_
+          codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+                                                          temp1_loc,
+                                                          temp1,
+                                                          component_offset,
+                                                          temp2,
+                                                          /* needs_null_check */ false,
+                                                          /* use_load_acquire */ false);
+          __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
+          // If heap poisoning is enabled, `temp1` has been unpoisoned
+          // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+          // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+          __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
+          static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+          __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+        }
+
+        // /* HeapReference<Class> */ temp1 = dest->klass_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+                                                        temp1_loc,
+                                                        dest.W(),
+                                                        class_offset,
+                                                        temp2,
+                                                        /* needs_null_check */ false,
+                                                        /* use_load_acquire */ false);
+
+        if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+          // Bail out if the destination is not a non primitive array.
+          //
+          // Register `temp1` is not trashed by the read barrier emitted
+          // by GenerateFieldLoadWithBakerReadBarrier below, as that
+          // method produces a call to a ReadBarrierMarkRegX entry point,
+          // which saves all potentially live registers, including
+          // temporaries such a `temp1`.
+          // /* HeapReference<Class> */ temp2 = temp1->component_type_
+          codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+                                                          temp2_loc,
+                                                          temp1,
+                                                          component_offset,
+                                                          temp3,
+                                                          /* needs_null_check */ false,
+                                                          /* use_load_acquire */ false);
+          __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+          // If heap poisoning is enabled, `temp2` has been unpoisoned
+          // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+          // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+          __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
+          static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+          __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
+        }
+
+        // For the same reason given earlier, `temp1` is not trashed by the
+        // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+        // /* HeapReference<Class> */ temp2 = src->klass_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+                                                        temp2_loc,
+                                                        src.W(),
+                                                        class_offset,
+                                                        temp3,
+                                                        /* needs_null_check */ false,
+                                                        /* use_load_acquire */ false);
+        // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+        __ Cmp(temp1, temp2);
+
+        if (optimizations.GetDestinationIsTypedObjectArray()) {
+          vixl::aarch64::Label do_copy;
+          __ B(&do_copy, eq);
+          // /* HeapReference<Class> */ temp1 = temp1->component_type_
+          codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+                                                          temp1_loc,
+                                                          temp1,
+                                                          component_offset,
+                                                          temp2,
+                                                          /* needs_null_check */ false,
+                                                          /* use_load_acquire */ false);
+          // /* HeapReference<Class> */ temp1 = temp1->super_class_
+          // We do not need to emit a read barrier for the following
+          // heap reference load, as `temp1` is only used in a
+          // comparison with null below, and this reference is not
+          // kept afterwards.
+          __ Ldr(temp1, HeapOperand(temp1, super_offset));
+          __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+          __ Bind(&do_copy);
+        } else {
+          __ B(intrinsic_slow_path->GetEntryLabel(), ne);
+        }
+      } else {
+        // Non read barrier code.
+
+        // /* HeapReference<Class> */ temp1 = dest->klass_
+        __ Ldr(temp1, MemOperand(dest, class_offset));
+        // /* HeapReference<Class> */ temp2 = src->klass_
+        __ Ldr(temp2, MemOperand(src, class_offset));
+        bool did_unpoison = false;
+        if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+            !optimizations.GetSourceIsNonPrimitiveArray()) {
+          // One or two of the references need to be unpoisoned. Unpoison them
+          // both to make the identity check valid.
+          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+          did_unpoison = true;
+        }
+
+        if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+          // Bail out if the destination is not a non primitive array.
+          // /* HeapReference<Class> */ temp3 = temp1->component_type_
+          __ Ldr(temp3, HeapOperand(temp1, component_offset));
+          __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+          // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+          __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+          static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+          __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+        }
+
+        if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+          // Bail out if the source is not a non primitive array.
+          // /* HeapReference<Class> */ temp3 = temp2->component_type_
+          __ Ldr(temp3, HeapOperand(temp2, component_offset));
+          __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+          // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+          __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+          static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+          __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+        }
+
+        __ Cmp(temp1, temp2);
+
+        if (optimizations.GetDestinationIsTypedObjectArray()) {
+          vixl::aarch64::Label do_copy;
+          __ B(&do_copy, eq);
+          if (!did_unpoison) {
+            codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+          }
+          // /* HeapReference<Class> */ temp1 = temp1->component_type_
+          __ Ldr(temp1, HeapOperand(temp1, component_offset));
+          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+          // /* HeapReference<Class> */ temp1 = temp1->super_class_
+          __ Ldr(temp1, HeapOperand(temp1, super_offset));
+          // No need to unpoison the result, we're comparing against null.
+          __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+          __ Bind(&do_copy);
+        } else {
+          __ B(intrinsic_slow_path->GetEntryLabel(), ne);
+        }
+      }
+    } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+      DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+      // Bail out if the source is not a non primitive array.
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        // /* HeapReference<Class> */ temp1 = src->klass_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+                                                        temp1_loc,
+                                                        src.W(),
+                                                        class_offset,
+                                                        temp2,
+                                                        /* needs_null_check */ false,
+                                                        /* use_load_acquire */ false);
+        // /* HeapReference<Class> */ temp2 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+                                                        temp2_loc,
+                                                        temp1,
+                                                        component_offset,
+                                                        temp3,
+                                                        /* needs_null_check */ false,
+                                                        /* use_load_acquire */ false);
+        __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `temp2` has been unpoisoned
+        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+      } else {
+        // /* HeapReference<Class> */ temp1 = src->klass_
+        __ Ldr(temp1, HeapOperand(src.W(), class_offset));
+        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+        // /* HeapReference<Class> */ temp2 = temp1->component_type_
+        __ Ldr(temp2, HeapOperand(temp1, component_offset));
+        __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+      }
+      // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+      __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
+    }
+
+    Register src_curr_addr = temp1.X();
+    Register dst_curr_addr = temp2.X();
+    Register src_stop_addr;
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // Temporary register IP0, obtained from the VIXL scratch
+      // register pool as `temp3`, cannot be used in
+      // ReadBarrierSystemArrayCopySlowPathARM64 (because that
+      // register is clobbered by ReadBarrierMarkRegX entry points).
+      // So another temporary register allocated by the register
+      // allocator instead.
+      DCHECK_EQ(LocationFrom(temp3).reg(), IP0);
+      src_stop_addr = XRegisterFrom(locations->GetTemp(2));
+    } else {
+      src_stop_addr = temp3.X();
+    }
+
+    GenSystemArrayCopyAddresses(masm,
+                                Primitive::kPrimNot,
+                                src,
+                                src_pos,
+                                dest,
+                                dest_pos,
+                                length,
+                                src_curr_addr,
+                                dst_curr_addr,
+                                src_stop_addr);
+
+    const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // SystemArrayCopy implementation for Baker read barriers (see
+      // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+      //
+      //   if (src_ptr != end_ptr) {
+      //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+      //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+      //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+      //     if (is_gray) {
+      //       // Slow-path copy.
+      //       do {
+      //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+      //       } while (src_ptr != end_ptr)
+      //     } else {
+      //       // Fast-path copy.
+      //       do {
+      //         *dest_ptr++ = *src_ptr++;
+      //       } while (src_ptr != end_ptr)
+      //     }
+      //   }
+
+      vixl::aarch64::Label loop, done;
+
+      // Don't enter copy loop if `length == 0`.
+      __ Cmp(src_curr_addr, src_stop_addr);
+      __ B(&done, eq);
+
+      Register tmp = temps.AcquireW();
+      // Make sure `tmp` is not IP0, as it is clobbered by
+      // ReadBarrierMarkRegX entry points in
+      // ReadBarrierSystemArrayCopySlowPathARM64.
+      DCHECK_NE(LocationFrom(tmp).reg(), IP0);
+
+      // /* int32_t */ monitor = src->monitor_
+      __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
+      // /* LockWord */ lock_word = LockWord(monitor)
+      static_assert(sizeof(LockWord) == sizeof(int32_t),
+                    "art::LockWord and int32_t have different sizes.");
+
+      // Introduce a dependency on the lock_word including rb_state,
+      // to prevent load-load reordering, and without using
+      // a memory barrier (which would be more expensive).
+      // `src` is unchanged by this operation, but its value now depends
+      // on `tmp`.
+      __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
+
+      // Slow path used to copy array when `src` is gray.
+      SlowPathCodeARM64* read_barrier_slow_path =
+          new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp));
+      codegen_->AddSlowPath(read_barrier_slow_path);
+
+      // Given the numeric representation, it's enough to check the low bit of the rb_state.
+      static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+      static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+      static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+      __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
+
+      // Fast-path copy.
+      // Iterate over the arrays and do a raw copy of the objects. We don't need to
+      // poison/unpoison.
+      __ Bind(&loop);
+      __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
+      __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+      __ Cmp(src_curr_addr, src_stop_addr);
+      __ B(&loop, ne);
+
+      __ Bind(read_barrier_slow_path->GetExitLabel());
+      __ Bind(&done);
+    } else {
+      // Non read barrier code.
+
+      // Iterate over the arrays and do a raw copy of the objects. We don't need to
+      // poison/unpoison.
+      vixl::aarch64::Label loop, done;
+      __ Bind(&loop);
+      __ Cmp(src_curr_addr, src_stop_addr);
+      __ B(&done, eq);
+      {
+        Register tmp = temps.AcquireW();
+        __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
+        __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+      }
+      __ B(&loop);
+      __ Bind(&done);
+    }
+  }
+  // We only need one card marking on the destination array.
+  codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
+
+  __ Bind(intrinsic_slow_path->GetExitLabel());
+}
+
+static void GenIsInfinite(LocationSummary* locations,
+                          bool is64bit,
+                          MacroAssembler* masm) {
+  Operand infinity;
+  Register out;
+
+  if (is64bit) {
+    infinity = kPositiveInfinityDouble;
+    out = XRegisterFrom(locations->Out());
+  } else {
+    infinity = kPositiveInfinityFloat;
+    out = WRegisterFrom(locations->Out());
+  }
+
+  const Register zero = vixl::aarch64::Assembler::AppropriateZeroRegFor(out);
+
+  MoveFPToInt(locations, is64bit, masm);
+  __ Eor(out, out, infinity);
+  // We don't care about the sign bit, so shift left.
+  __ Cmp(zero, Operand(out, LSL, 1));
+  __ Cset(out, eq);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
+  GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
+  GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
+}
+
+UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(ARM64, LongLowestOneBit)
+
+// 1.8.
+UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
+UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
+UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
+UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
+UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
+
+UNREACHABLE_INTRINSICS(ARM64)
 
 #undef __
 
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index 4250ecf..5251536 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -20,10 +20,11 @@
 #include "intrinsics.h"
 
 namespace vixl {
+namespace aarch64 {
 
 class MacroAssembler;
 
-}  // namespace vixl
+}}  // namespace vixl::aarch64
 
 namespace art {
 
@@ -41,7 +42,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -65,7 +66,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -73,7 +74,7 @@
 #undef OPTIMIZING_INTRINSICS
 
  private:
-  vixl::MacroAssembler* GetVIXLAssembler();
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler();
 
   ArenaAllocator* GetAllocator();
 
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index 8f1d5e1..db60238 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -19,83 +19,126 @@
 
 // All intrinsics supported by the optimizing compiler. Format is name, then whether it is expected
 // to be a HInvokeStaticOrDirect node (compared to HInvokeVirtual), then whether it requires an
-// environment.
+// environment, may have side effects, or may throw exceptions.
 
 #define INTRINSICS_LIST(V) \
-  V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache) \
-  V(DoubleLongBitsToDouble, kStatic, kNeedsEnvironmentOrCache) \
-  V(FloatFloatToRawIntBits, kStatic, kNeedsEnvironmentOrCache) \
-  V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache) \
-  V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache) \
-  V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache) \
-  V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache) \
-  V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache) \
-  V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache) \
-  V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache) \
-  V(LongReverse, kStatic, kNeedsEnvironmentOrCache) \
-  V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache) \
-  V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache) \
-  V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache) \
-  V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache) \
-  V(LongRotateLeft, kStatic, kNeedsEnvironmentOrCache) \
-  V(ShortReverseBytes, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathAbsDouble, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathAbsFloat, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathAbsLong, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathAbsInt, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathMinDoubleDouble, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathMinFloatFloat, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathMinLongLong, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathMinIntInt, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathMaxDoubleDouble, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathMaxFloatFloat, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathMaxLongLong, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathMaxIntInt, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathSqrt, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathCeil, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathFloor, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathRint, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathRoundDouble, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathRoundFloat, kStatic, kNeedsEnvironmentOrCache) \
-  V(SystemArrayCopyChar, kStatic, kNeedsEnvironmentOrCache) \
-  V(SystemArrayCopy, kStatic, kNeedsEnvironmentOrCache) \
-  V(ThreadCurrentThread, kStatic, kNeedsEnvironmentOrCache) \
-  V(MemoryPeekByte, kStatic, kNeedsEnvironmentOrCache) \
-  V(MemoryPeekIntNative, kStatic, kNeedsEnvironmentOrCache) \
-  V(MemoryPeekLongNative, kStatic, kNeedsEnvironmentOrCache) \
-  V(MemoryPeekShortNative, kStatic, kNeedsEnvironmentOrCache) \
-  V(MemoryPokeByte, kStatic, kNeedsEnvironmentOrCache) \
-  V(MemoryPokeIntNative, kStatic, kNeedsEnvironmentOrCache) \
-  V(MemoryPokeLongNative, kStatic, kNeedsEnvironmentOrCache) \
-  V(MemoryPokeShortNative, kStatic, kNeedsEnvironmentOrCache) \
-  V(StringCharAt, kDirect, kNeedsEnvironmentOrCache) \
-  V(StringCompareTo, kDirect, kNeedsEnvironmentOrCache) \
-  V(StringEquals, kDirect, kNeedsEnvironmentOrCache) \
-  V(StringGetCharsNoCheck, kDirect, kNeedsEnvironmentOrCache) \
-  V(StringIndexOf, kDirect, kNeedsEnvironmentOrCache) \
-  V(StringIndexOfAfter, kDirect, kNeedsEnvironmentOrCache) \
-  V(StringNewStringFromBytes, kStatic, kNeedsEnvironmentOrCache) \
-  V(StringNewStringFromChars, kStatic, kNeedsEnvironmentOrCache) \
-  V(StringNewStringFromString, kStatic, kNeedsEnvironmentOrCache) \
-  V(UnsafeCASInt, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafeCASLong, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafeCASObject, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafeGet, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafeGetVolatile, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafeGetObject, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafeGetObjectVolatile, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafeGetLong, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafeGetLongVolatile, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePut, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePutOrdered, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePutVolatile, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePutObject, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePutObjectOrdered, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePutObjectVolatile, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePutLong, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePutLongOrdered, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePutLongVolatile, kDirect, kNeedsEnvironmentOrCache) \
-  V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache)
+  V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(DoubleDoubleToLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(DoubleIsInfinite, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(DoubleIsNaN, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(DoubleLongBitsToDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(FloatFloatToRawIntBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(FloatFloatToIntBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(FloatIsInfinite, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(FloatIsNaN, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerCompare, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerHighestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerLowestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerSignum, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongCompare, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongHighestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongLowestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongSignum, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(ShortReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathAbsDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathAbsFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathAbsLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathAbsInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathMinDoubleDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathMinFloatFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathMinLongLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathMinIntInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathMaxDoubleDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathMaxFloatFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathMaxLongLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathMaxIntInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathCos, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathSin, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathAcos, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathAsin, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathAtan, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathAtan2, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathCbrt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathCosh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathExp, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathExpm1, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathHypot, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathLog, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathLog10, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathNextAfter, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathSinh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathTan, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathTanh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathSqrt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathCeil, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathFloor, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathRint, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathRoundDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathRoundFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(SystemArrayCopyChar, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(SystemArrayCopy, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(ThreadCurrentThread, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MemoryPeekByte, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(MemoryPeekIntNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(MemoryPeekLongNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(MemoryPeekShortNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(MemoryPokeByte, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \
+  V(MemoryPokeIntNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \
+  V(MemoryPokeLongNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \
+  V(MemoryPokeShortNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \
+  V(StringCharAt, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(StringCompareTo, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(StringEquals, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(StringGetCharsNoCheck, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(StringIndexOf, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(StringIndexOfAfter, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(StringIsEmpty, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow) \
+  V(StringLength, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow) \
+  V(StringNewStringFromBytes, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(StringNewStringFromChars, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(StringNewStringFromString, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeCASInt, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeCASLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeCASObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGet, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetObjectVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetLongVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePut, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePutOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePutVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePutObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePutObjectOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePutObjectVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePutLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePutLongOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePutLongVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetAndAddInt, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetAndAddLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetAndSetInt, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetAndSetLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetAndSetObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeLoadFence, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeStoreFence, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeFullFence, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow)
 
 #endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
 #undef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_   // #define is only for lint.
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
new file mode 100644
index 0000000..6e5eb66
--- /dev/null
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -0,0 +1,2546 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_mips.h"
+
+#include "arch/mips/instruction_set_features_mips.h"
+#include "art_method.h"
+#include "code_generator_mips.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "intrinsics.h"
+#include "mirror/array-inl.h"
+#include "mirror/string.h"
+#include "thread.h"
+#include "utils/mips/assembler_mips.h"
+#include "utils/mips/constants_mips.h"
+
+namespace art {
+
+namespace mips {
+
+IntrinsicLocationsBuilderMIPS::IntrinsicLocationsBuilderMIPS(CodeGeneratorMIPS* codegen)
+  : arena_(codegen->GetGraph()->GetArena()) {
+}
+
+MipsAssembler* IntrinsicCodeGeneratorMIPS::GetAssembler() {
+  return reinterpret_cast<MipsAssembler*>(codegen_->GetAssembler());
+}
+
+ArenaAllocator* IntrinsicCodeGeneratorMIPS::GetAllocator() {
+  return codegen_->GetGraph()->GetArena();
+}
+
+inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() const {
+  return codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+}
+
+inline bool IntrinsicCodeGeneratorMIPS::IsR6() const {
+  return codegen_->GetInstructionSetFeatures().IsR6();
+}
+
+inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const {
+  return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
+}
+
+#define __ codegen->GetAssembler()->
+
+static void MoveFromReturnRegister(Location trg,
+                                   Primitive::Type type,
+                                   CodeGeneratorMIPS* codegen) {
+  if (!trg.IsValid()) {
+    DCHECK_EQ(type, Primitive::kPrimVoid);
+    return;
+  }
+
+  DCHECK_NE(type, Primitive::kPrimVoid);
+
+  if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) {
+    Register trg_reg = trg.AsRegister<Register>();
+    if (trg_reg != V0) {
+      __ Move(V0, trg_reg);
+    }
+  } else {
+    FRegister trg_reg = trg.AsFpuRegister<FRegister>();
+    if (trg_reg != F0) {
+      if (type == Primitive::kPrimFloat) {
+        __ MovS(F0, trg_reg);
+      } else {
+        __ MovD(F0, trg_reg);
+      }
+    }
+  }
+}
+
+static void MoveArguments(HInvoke* invoke, CodeGeneratorMIPS* codegen) {
+  InvokeDexCallingConventionVisitorMIPS calling_convention_visitor;
+  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
+}
+
+// Slow-path for fallback (calling the managed code to handle the
+// intrinsic) in an intrinsified call. This will copy the arguments
+// into the positions for a regular call.
+//
+// Note: The actual parameters are required to be in the locations
+//       given by the invoke's location summary. If an intrinsic
+//       modifies those locations before a slowpath call, they must be
+//       restored!
+class IntrinsicSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  explicit IntrinsicSlowPathMIPS(HInvoke* invoke) : SlowPathCodeMIPS(invoke), invoke_(invoke) { }
+
+  void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+    CodeGeneratorMIPS* codegen = down_cast<CodeGeneratorMIPS*>(codegen_in);
+
+    __ Bind(GetEntryLabel());
+
+    SaveLiveRegisters(codegen, invoke_->GetLocations());
+
+    MoveArguments(invoke_, codegen);
+
+    if (invoke_->IsInvokeStaticOrDirect()) {
+      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
+                                          Location::RegisterLocation(A0));
+    } else {
+      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0));
+    }
+    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
+
+    // Copy the result back to the expected output.
+    Location out = invoke_->GetLocations()->Out();
+    if (out.IsValid()) {
+      DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
+      DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+      MoveFromReturnRegister(out, invoke_->GetType(), codegen);
+    }
+
+    RestoreLiveRegisters(codegen, invoke_->GetLocations());
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS"; }
+
+ private:
+  // The instruction where this slow path is happening.
+  HInvoke* const invoke_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathMIPS);
+};
+
+#undef __
+
+bool IntrinsicLocationsBuilderMIPS::TryDispatch(HInvoke* invoke) {
+  Dispatch(invoke);
+  LocationSummary* res = invoke->GetLocations();
+  return res != nullptr && res->Intrinsified();
+}
+
+#define __ assembler->
+
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
+  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+
+  if (is64bit) {
+    Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+    __ Mfc1(out_lo, in);
+    __ MoveFromFpuHigh(out_hi, in);
+  } else {
+    Register out = locations->Out().AsRegister<Register>();
+
+    __ Mfc1(out, in);
+  }
+}
+
+// long java.lang.Double.doubleToRawLongBits(double)
+void IntrinsicLocationsBuilderMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+
+// int java.lang.Float.floatToRawIntBits(float)
+void IntrinsicLocationsBuilderMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
+  FRegister out = locations->Out().AsFpuRegister<FRegister>();
+
+  if (is64bit) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+
+    __ Mtc1(in_lo, out);
+    __ MoveToFpuHigh(in_hi, out);
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+
+    __ Mtc1(in, out);
+  }
+}
+
+// double java.lang.Double.longBitsToDouble(long)
+void IntrinsicLocationsBuilderMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+
+// float java.lang.Float.intBitsToFloat(int)
+void IntrinsicLocationsBuilderMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena,
+                                    HInvoke* invoke,
+                                    Location::OutputOverlap overlaps = Location::kNoOutputOverlap) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), overlaps);
+}
+
+static void GenReverse(LocationSummary* locations,
+                       Primitive::Type type,
+                       bool isR2OrNewer,
+                       bool isR6,
+                       bool reverseBits,
+                       MipsAssembler* assembler) {
+  DCHECK(type == Primitive::kPrimShort ||
+         type == Primitive::kPrimInt ||
+         type == Primitive::kPrimLong);
+  DCHECK(type != Primitive::kPrimShort || !reverseBits);
+
+  if (type == Primitive::kPrimShort) {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    Register out = locations->Out().AsRegister<Register>();
+
+    if (isR2OrNewer) {
+      __ Wsbh(out, in);
+      __ Seh(out, out);
+    } else {
+      __ Sll(TMP, in, 24);
+      __ Sra(TMP, TMP, 16);
+      __ Sll(out, in, 16);
+      __ Srl(out, out, 24);
+      __ Or(out, out, TMP);
+    }
+  } else if (type == Primitive::kPrimInt) {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    Register out = locations->Out().AsRegister<Register>();
+
+    if (isR2OrNewer) {
+      __ Rotr(out, in, 16);
+      __ Wsbh(out, out);
+    } else {
+      // MIPS32r1
+      // __ Rotr(out, in, 16);
+      __ Sll(TMP, in, 16);
+      __ Srl(out, in, 16);
+      __ Or(out, out, TMP);
+      // __ Wsbh(out, out);
+      __ LoadConst32(AT, 0x00FF00FF);
+      __ And(TMP, out, AT);
+      __ Sll(TMP, TMP, 8);
+      __ Srl(out, out, 8);
+      __ And(out, out, AT);
+      __ Or(out, out, TMP);
+    }
+    if (reverseBits) {
+      if (isR6) {
+        __ Bitswap(out, out);
+      } else {
+        __ LoadConst32(AT, 0x0F0F0F0F);
+        __ And(TMP, out, AT);
+        __ Sll(TMP, TMP, 4);
+        __ Srl(out, out, 4);
+        __ And(out, out, AT);
+        __ Or(out, TMP, out);
+        __ LoadConst32(AT, 0x33333333);
+        __ And(TMP, out, AT);
+        __ Sll(TMP, TMP, 2);
+        __ Srl(out, out, 2);
+        __ And(out, out, AT);
+        __ Or(out, TMP, out);
+        __ LoadConst32(AT, 0x55555555);
+        __ And(TMP, out, AT);
+        __ Sll(TMP, TMP, 1);
+        __ Srl(out, out, 1);
+        __ And(out, out, AT);
+        __ Or(out, TMP, out);
+      }
+    }
+  } else if (type == Primitive::kPrimLong) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+    Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+    if (isR2OrNewer) {
+      __ Rotr(AT, in_hi, 16);
+      __ Rotr(TMP, in_lo, 16);
+      __ Wsbh(out_lo, AT);
+      __ Wsbh(out_hi, TMP);
+    } else {
+      // When calling CreateIntToIntLocations() we promised that the
+      // use of the out_lo/out_hi wouldn't overlap with the use of
+      // in_lo/in_hi. Be very careful not to write to out_lo/out_hi
+      // until we're completely done reading from in_lo/in_hi.
+      // __ Rotr(TMP, in_lo, 16);
+      __ Sll(TMP, in_lo, 16);
+      __ Srl(AT, in_lo, 16);
+      __ Or(TMP, TMP, AT);             // Hold in TMP until it's safe
+                                       // to write to out_hi.
+      // __ Rotr(out_lo, in_hi, 16);
+      __ Sll(AT, in_hi, 16);
+      __ Srl(out_lo, in_hi, 16);        // Here we are finally done reading
+                                        // from in_lo/in_hi so it's okay to
+                                        // write to out_lo/out_hi.
+      __ Or(out_lo, out_lo, AT);
+      // __ Wsbh(out_hi, out_hi);
+      __ LoadConst32(AT, 0x00FF00FF);
+      __ And(out_hi, TMP, AT);
+      __ Sll(out_hi, out_hi, 8);
+      __ Srl(TMP, TMP, 8);
+      __ And(TMP, TMP, AT);
+      __ Or(out_hi, out_hi, TMP);
+      // __ Wsbh(out_lo, out_lo);
+      __ And(TMP, out_lo, AT);  // AT already holds the correct mask value
+      __ Sll(TMP, TMP, 8);
+      __ Srl(out_lo, out_lo, 8);
+      __ And(out_lo, out_lo, AT);
+      __ Or(out_lo, out_lo, TMP);
+    }
+    if (reverseBits) {
+      if (isR6) {
+        __ Bitswap(out_hi, out_hi);
+        __ Bitswap(out_lo, out_lo);
+      } else {
+        __ LoadConst32(AT, 0x0F0F0F0F);
+        __ And(TMP, out_hi, AT);
+        __ Sll(TMP, TMP, 4);
+        __ Srl(out_hi, out_hi, 4);
+        __ And(out_hi, out_hi, AT);
+        __ Or(out_hi, TMP, out_hi);
+        __ And(TMP, out_lo, AT);
+        __ Sll(TMP, TMP, 4);
+        __ Srl(out_lo, out_lo, 4);
+        __ And(out_lo, out_lo, AT);
+        __ Or(out_lo, TMP, out_lo);
+        __ LoadConst32(AT, 0x33333333);
+        __ And(TMP, out_hi, AT);
+        __ Sll(TMP, TMP, 2);
+        __ Srl(out_hi, out_hi, 2);
+        __ And(out_hi, out_hi, AT);
+        __ Or(out_hi, TMP, out_hi);
+        __ And(TMP, out_lo, AT);
+        __ Sll(TMP, TMP, 2);
+        __ Srl(out_lo, out_lo, 2);
+        __ And(out_lo, out_lo, AT);
+        __ Or(out_lo, TMP, out_lo);
+        __ LoadConst32(AT, 0x55555555);
+        __ And(TMP, out_hi, AT);
+        __ Sll(TMP, TMP, 1);
+        __ Srl(out_hi, out_hi, 1);
+        __ And(out_hi, out_hi, AT);
+        __ Or(out_hi, TMP, out_hi);
+        __ And(TMP, out_lo, AT);
+        __ Sll(TMP, TMP, 1);
+        __ Srl(out_lo, out_lo, 1);
+        __ And(out_lo, out_lo, AT);
+        __ Or(out_lo, TMP, out_lo);
+      }
+    }
+  }
+}
+
+// int java.lang.Integer.reverseBytes(int)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimInt,
+             IsR2OrNewer(),
+             IsR6(),
+             /* reverseBits */ false,
+             GetAssembler());
+}
+
+// long java.lang.Long.reverseBytes(long)
+void IntrinsicLocationsBuilderMIPS::VisitLongReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimLong,
+             IsR2OrNewer(),
+             IsR6(),
+             /* reverseBits */ false,
+             GetAssembler());
+}
+
+// short java.lang.Short.reverseBytes(short)
+void IntrinsicLocationsBuilderMIPS::VisitShortReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimShort,
+             IsR2OrNewer(),
+             IsR6(),
+             /* reverseBits */ false,
+             GetAssembler());
+}
+
+static void GenNumberOfLeadingZeroes(LocationSummary* locations,
+                                     bool is64bit,
+                                     bool isR6,
+                                     MipsAssembler* assembler) {
+  Register out = locations->Out().AsRegister<Register>();
+  if (is64bit) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+
+    if (isR6) {
+      __ ClzR6(AT, in_hi);
+      __ ClzR6(TMP, in_lo);
+      __ Seleqz(TMP, TMP, in_hi);
+    } else {
+      __ ClzR2(AT, in_hi);
+      __ ClzR2(TMP, in_lo);
+      __ Movn(TMP, ZERO, in_hi);
+    }
+    __ Addu(out, AT, TMP);
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+
+    if (isR6) {
+      __ ClzR6(out, in);
+    } else {
+      __ ClzR2(out, in);
+    }
+  }
+}
+
+// int java.lang.Integer.numberOfLeadingZeros(int i)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ false, IsR6(), GetAssembler());
+}
+
+// int java.lang.Long.numberOfLeadingZeros(long i)
+void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ true, IsR6(), GetAssembler());
+}
+
+static void GenNumberOfTrailingZeroes(LocationSummary* locations,
+                                      bool is64bit,
+                                      bool isR6,
+                                      MipsAssembler* assembler) {
+  Register out = locations->Out().AsRegister<Register>();
+  Register in_lo;
+  Register in;
+
+  if (is64bit) {
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+
+    in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+
+    // If in_lo is zero then count the number of trailing zeroes in in_hi;
+    // otherwise count the number of trailing zeroes in in_lo.
+    // out = in_lo ? in_lo : in_hi;
+    if (isR6) {
+      __ Seleqz(out, in_hi, in_lo);
+      __ Selnez(TMP, in_lo, in_lo);
+      __ Or(out, out, TMP);
+    } else {
+      __ Movz(out, in_hi, in_lo);
+      __ Movn(out, in_lo, in_lo);
+    }
+
+    in = out;
+  } else {
+    in = locations->InAt(0).AsRegister<Register>();
+    // Give in_lo a dummy value to keep the compiler from complaining.
+    // Since we only get here in the 32-bit case, this value will never
+    // be used.
+    in_lo = in;
+  }
+
+  if (isR6) {
+    // We don't have an instruction to count the number of trailing zeroes.
+    // Start by flipping the bits end-for-end so we can count the number of
+    // leading zeroes instead.
+    __ Rotr(out, in, 16);
+    __ Wsbh(out, out);
+    __ Bitswap(out, out);
+    __ ClzR6(out, out);
+  } else {
+    // Convert trailing zeroes to trailing ones, and bits to their left
+    // to zeroes.
+    __ Addiu(TMP, in, -1);
+    __ Xor(out, TMP, in);
+    __ And(out, out, TMP);
+    // Count number of leading zeroes.
+    __ ClzR2(out, out);
+    // Subtract number of leading zeroes from 32 to get number of trailing ones.
+    // Remember that the trailing ones were formerly trailing zeroes.
+    __ LoadConst32(TMP, 32);
+    __ Subu(out, TMP, out);
+  }
+
+  if (is64bit) {
+    // If in_lo is zero, then we counted the number of trailing zeroes in in_hi so we must add the
+    // number of trailing zeroes in in_lo (32) to get the correct final count
+    __ LoadConst32(TMP, 32);
+    if (isR6) {
+      __ Seleqz(TMP, TMP, in_lo);
+    } else {
+      __ Movn(TMP, ZERO, in_lo);
+    }
+    __ Addu(out, out, TMP);
+  }
+}
+
+// int java.lang.Integer.numberOfTrailingZeros(int i)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ false, IsR6(), GetAssembler());
+}
+
+// int java.lang.Long.numberOfTrailingZeros(long i)
+void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ true, IsR6(), GetAssembler());
+}
+
+// int java.lang.Integer.reverse(int)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerReverse(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimInt,
+             IsR2OrNewer(),
+             IsR6(),
+             /* reverseBits */ true,
+             GetAssembler());
+}
+
+// long java.lang.Long.reverse(long)
+void IntrinsicLocationsBuilderMIPS::VisitLongReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongReverse(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimLong,
+             IsR2OrNewer(),
+             IsR6(),
+             /* reverseBits */ true,
+             GetAssembler());
+}
+
+static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+static void GenBitCount(LocationSummary* locations,
+                        Primitive::Type type,
+                        bool isR6,
+                        MipsAssembler* assembler) {
+  Register out = locations->Out().AsRegister<Register>();
+
+  // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+  //
+  // A generalization of the best bit counting method to integers of
+  // bit-widths up to 128 (parameterized by type T) is this:
+  //
+  // v = v - ((v >> 1) & (T)~(T)0/3);                           // temp
+  // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);      // temp
+  // v = (v + (v >> 4)) & (T)~(T)0/255*15;                      // temp
+  // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; // count
+  //
+  // For comparison, for 32-bit quantities, this algorithm can be executed
+  // using 20 MIPS instructions (the calls to LoadConst32() generate two
+  // machine instructions each for the values being used in this algorithm).
+  // A(n unrolled) loop-based algorithm required 25 instructions.
+  //
+  // For 64-bit quantities, this algorithm gets executed twice, (once
+  // for in_lo, and again for in_hi), but saves a few instructions
+  // because the mask values only have to be loaded once.  Using this
+  // algorithm the count for a 64-bit operand can be performed in 33
+  // instructions compared to a loop-based algorithm which required 47
+  // instructions.
+
+  if (type == Primitive::kPrimInt) {
+    Register in = locations->InAt(0).AsRegister<Register>();
+
+    __ Srl(TMP, in, 1);
+    __ LoadConst32(AT, 0x55555555);
+    __ And(TMP, TMP, AT);
+    __ Subu(TMP, in, TMP);
+    __ LoadConst32(AT, 0x33333333);
+    __ And(out, TMP, AT);
+    __ Srl(TMP, TMP, 2);
+    __ And(TMP, TMP, AT);
+    __ Addu(TMP, out, TMP);
+    __ Srl(out, TMP, 4);
+    __ Addu(out, out, TMP);
+    __ LoadConst32(AT, 0x0F0F0F0F);
+    __ And(out, out, AT);
+    __ LoadConst32(TMP, 0x01010101);
+    if (isR6) {
+      __ MulR6(out, out, TMP);
+    } else {
+      __ MulR2(out, out, TMP);
+    }
+    __ Srl(out, out, 24);
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimLong);
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+    Register tmp_hi = locations->GetTemp(0).AsRegister<Register>();
+    Register out_hi = locations->GetTemp(1).AsRegister<Register>();
+    Register tmp_lo = TMP;
+    Register out_lo = out;
+
+    __ Srl(tmp_lo, in_lo, 1);
+    __ Srl(tmp_hi, in_hi, 1);
+
+    __ LoadConst32(AT, 0x55555555);
+
+    __ And(tmp_lo, tmp_lo, AT);
+    __ Subu(tmp_lo, in_lo, tmp_lo);
+
+    __ And(tmp_hi, tmp_hi, AT);
+    __ Subu(tmp_hi, in_hi, tmp_hi);
+
+    __ LoadConst32(AT, 0x33333333);
+
+    __ And(out_lo, tmp_lo, AT);
+    __ Srl(tmp_lo, tmp_lo, 2);
+    __ And(tmp_lo, tmp_lo, AT);
+    __ Addu(tmp_lo, out_lo, tmp_lo);
+    __ Srl(out_lo, tmp_lo, 4);
+    __ Addu(out_lo, out_lo, tmp_lo);
+
+    __ And(out_hi, tmp_hi, AT);
+    __ Srl(tmp_hi, tmp_hi, 2);
+    __ And(tmp_hi, tmp_hi, AT);
+    __ Addu(tmp_hi, out_hi, tmp_hi);
+    __ Srl(out_hi, tmp_hi, 4);
+    __ Addu(out_hi, out_hi, tmp_hi);
+
+    __ LoadConst32(AT, 0x0F0F0F0F);
+
+    __ And(out_lo, out_lo, AT);
+    __ And(out_hi, out_hi, AT);
+
+    __ LoadConst32(AT, 0x01010101);
+
+    if (isR6) {
+      __ MulR6(out_lo, out_lo, AT);
+
+      __ MulR6(out_hi, out_hi, AT);
+    } else {
+      __ MulR2(out_lo, out_lo, AT);
+
+      __ MulR2(out_hi, out_hi, AT);
+    }
+
+    __ Srl(out_lo, out_lo, 24);
+    __ Srl(out_hi, out_hi, 24);
+
+    __ Addu(out, out_hi, out_lo);
+  }
+}
+
+// int java.lang.Integer.bitCount(int)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(invoke->GetLocations(), Primitive::kPrimInt, IsR6(), GetAssembler());
+}
+
+// int java.lang.Long.bitCount(int)
+void IntrinsicLocationsBuilderMIPS::VisitLongBitCount(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(invoke->GetLocations(), Primitive::kPrimLong, IsR6(), GetAssembler());
+}
+
+static void MathAbsFP(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
+  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister out = locations->Out().AsFpuRegister<FRegister>();
+
+  if (is64bit) {
+    __ AbsD(out, in);
+  } else {
+    __ AbsS(out, in);
+  }
+}
+
+// double java.lang.Math.abs(double)
+void IntrinsicLocationsBuilderMIPS::VisitMathAbsDouble(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathAbsDouble(HInvoke* invoke) {
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+
+// float java.lang.Math.abs(float)
+void IntrinsicLocationsBuilderMIPS::VisitMathAbsFloat(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathAbsFloat(HInvoke* invoke) {
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+
+static void GenAbsInteger(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
+  if (is64bit) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+    Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+    // The comments in this section show the analogous operations which would
+    // be performed if we had 64-bit registers "in", and "out".
+    // __ Dsra32(AT, in, 31);
+    __ Sra(AT, in_hi, 31);
+    // __ Xor(out, in, AT);
+    __ Xor(TMP, in_lo, AT);
+    __ Xor(out_hi, in_hi, AT);
+    // __ Dsubu(out, out, AT);
+    __ Subu(out_lo, TMP, AT);
+    __ Sltu(TMP, out_lo, TMP);
+    __ Addu(out_hi, out_hi, TMP);
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    Register out = locations->Out().AsRegister<Register>();
+
+    __ Sra(AT, in, 31);
+    __ Xor(out, in, AT);
+    __ Subu(out, out, AT);
+  }
+}
+
+// int java.lang.Math.abs(int)
+void IntrinsicLocationsBuilderMIPS::VisitMathAbsInt(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathAbsInt(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+
+// long java.lang.Math.abs(long)
+void IntrinsicLocationsBuilderMIPS::VisitMathAbsLong(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathAbsLong(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+
+static void GenMinMaxFP(LocationSummary* locations,
+                        bool is_min,
+                        Primitive::Type type,
+                        bool is_R6,
+                        MipsAssembler* assembler) {
+  FRegister out = locations->Out().AsFpuRegister<FRegister>();
+  FRegister a = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister b = locations->InAt(1).AsFpuRegister<FRegister>();
+
+  if (is_R6) {
+    MipsLabel noNaNs;
+    MipsLabel done;
+    FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
+
+    // When Java computes min/max it prefers a NaN to a number; the
+    // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
+    // the inputs is a NaN and the other is a valid number, the MIPS
+    // instruction will return the number; Java wants the NaN value
+    // returned. This is why there is extra logic preceding the use of
+    // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
+    // NaN, return the NaN, otherwise return the min/max.
+    if (type == Primitive::kPrimDouble) {
+      __ CmpUnD(FTMP, a, b);
+      __ Bc1eqz(FTMP, &noNaNs);
+
+      // One of the inputs is a NaN
+      __ CmpEqD(ftmp, a, a);
+      // If a == a then b is the NaN, otherwise a is the NaN.
+      __ SelD(ftmp, a, b);
+
+      if (ftmp != out) {
+        __ MovD(out, ftmp);
+      }
+
+      __ B(&done);
+
+      __ Bind(&noNaNs);
+
+      if (is_min) {
+        __ MinD(out, a, b);
+      } else {
+        __ MaxD(out, a, b);
+      }
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimFloat);
+      __ CmpUnS(FTMP, a, b);
+      __ Bc1eqz(FTMP, &noNaNs);
+
+      // One of the inputs is a NaN
+      __ CmpEqS(ftmp, a, a);
+      // If a == a then b is the NaN, otherwise a is the NaN.
+      __ SelS(ftmp, a, b);
+
+      if (ftmp != out) {
+        __ MovS(out, ftmp);
+      }
+
+      __ B(&done);
+
+      __ Bind(&noNaNs);
+
+      if (is_min) {
+        __ MinS(out, a, b);
+      } else {
+        __ MaxS(out, a, b);
+      }
+    }
+
+    __ Bind(&done);
+  } else {
+    MipsLabel ordered;
+    MipsLabel compare;
+    MipsLabel select;
+    MipsLabel done;
+
+    if (type == Primitive::kPrimDouble) {
+      __ CunD(a, b);
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimFloat);
+      __ CunS(a, b);
+    }
+    __ Bc1f(&ordered);
+
+    // a or b (or both) is a NaN. Return one, which is a NaN.
+    if (type == Primitive::kPrimDouble) {
+      __ CeqD(b, b);
+    } else {
+      __ CeqS(b, b);
+    }
+    __ B(&select);
+
+    __ Bind(&ordered);
+
+    // Neither is a NaN.
+    // a == b? (-0.0 compares equal with +0.0)
+    // If equal, handle zeroes, else compare further.
+    if (type == Primitive::kPrimDouble) {
+      __ CeqD(a, b);
+    } else {
+      __ CeqS(a, b);
+    }
+    __ Bc1f(&compare);
+
+    // a == b either bit for bit or one is -0.0 and the other is +0.0.
+    if (type == Primitive::kPrimDouble) {
+      __ MoveFromFpuHigh(TMP, a);
+      __ MoveFromFpuHigh(AT, b);
+    } else {
+      __ Mfc1(TMP, a);
+      __ Mfc1(AT, b);
+    }
+
+    if (is_min) {
+      // -0.0 prevails over +0.0.
+      __ Or(TMP, TMP, AT);
+    } else {
+      // +0.0 prevails over -0.0.
+      __ And(TMP, TMP, AT);
+    }
+
+    if (type == Primitive::kPrimDouble) {
+      __ Mfc1(AT, a);
+      __ Mtc1(AT, out);
+      __ MoveToFpuHigh(TMP, out);
+    } else {
+      __ Mtc1(TMP, out);
+    }
+    __ B(&done);
+
+    __ Bind(&compare);
+
+    if (type == Primitive::kPrimDouble) {
+      if (is_min) {
+        // return (a <= b) ? a : b;
+        __ ColeD(a, b);
+      } else {
+        // return (a >= b) ? a : b;
+        __ ColeD(b, a);  // b <= a
+      }
+    } else {
+      if (is_min) {
+        // return (a <= b) ? a : b;
+        __ ColeS(a, b);
+      } else {
+        // return (a >= b) ? a : b;
+        __ ColeS(b, a);  // b <= a
+      }
+    }
+
+    __ Bind(&select);
+
+    if (type == Primitive::kPrimDouble) {
+      __ MovtD(out, a);
+      __ MovfD(out, b);
+    } else {
+      __ MovtS(out, a);
+      __ MovfS(out, b);
+    }
+
+    __ Bind(&done);
+  }
+}
+
+static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap);
+}
+
+// double java.lang.Math.min(double, double)
+void IntrinsicLocationsBuilderMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(),
+              /* is_min */ true,
+              Primitive::kPrimDouble,
+              IsR6(),
+              GetAssembler());
+}
+
+// float java.lang.Math.min(float, float)
+void IntrinsicLocationsBuilderMIPS::VisitMathMinFloatFloat(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathMinFloatFloat(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(),
+              /* is_min */ true,
+              Primitive::kPrimFloat,
+              IsR6(),
+              GetAssembler());
+}
+
+// double java.lang.Math.max(double, double)
+void IntrinsicLocationsBuilderMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(),
+              /* is_min */ false,
+              Primitive::kPrimDouble,
+              IsR6(),
+              GetAssembler());
+}
+
+// float java.lang.Math.max(float, float)
+void IntrinsicLocationsBuilderMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(),
+              /* is_min */ false,
+              Primitive::kPrimFloat,
+              IsR6(),
+              GetAssembler());
+}
+
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void GenMinMax(LocationSummary* locations,
+                      bool is_min,
+                      Primitive::Type type,
+                      bool is_R6,
+                      MipsAssembler* assembler) {
+  if (is_R6) {
+    // Some architectures, such as ARM and MIPS (prior to r6), have a
+    // conditional move instruction which only changes the target
+    // (output) register if the condition is true (MIPS prior to r6 had
+    // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions
+    // always change the target (output) register.  If the condition is
+    // true the output register gets the contents of the "rs" register;
+    // otherwise, the output register is set to zero. One consequence
+    // of this is that to implement something like "rd = c==0 ? rs : rt"
+    // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions.
+    // After executing this pair of instructions one of the output
+    // registers from the pair will necessarily contain zero. Then the
+    // code ORs the output registers from the SELEQZ/SELNEZ instructions
+    // to get the final result.
+    //
+    // The initial test to see if the output register is same as the
+    // first input register is needed to make sure that value in the
+    // first input register isn't clobbered before we've finished
+    // computing the output value. The logic in the corresponding else
+    // clause performs the same task but makes sure the second input
+    // register isn't clobbered in the event that it's the same register
+    // as the output register; the else clause also handles the case
+    // where the output register is distinct from both the first, and the
+    // second input registers.
+    if (type == Primitive::kPrimLong) {
+      Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+      Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
+      Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
+      Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+      Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+      MipsLabel compare_done;
+
+      if (a_lo == b_lo) {
+        if (out_lo != a_lo) {
+          __ Move(out_lo, a_lo);
+          __ Move(out_hi, a_hi);
+        }
+      } else {
+        __ Slt(TMP, b_hi, a_hi);
+        __ Bne(b_hi, a_hi, &compare_done);
+
+        __ Sltu(TMP, b_lo, a_lo);
+
+        __ Bind(&compare_done);
+
+        if (is_min) {
+          __ Seleqz(AT, a_lo, TMP);
+          __ Selnez(out_lo, b_lo, TMP);  // Safe even if out_lo == a_lo/b_lo
+                                         // because at this point we're
+                                         // done using a_lo/b_lo.
+        } else {
+          __ Selnez(AT, a_lo, TMP);
+          __ Seleqz(out_lo, b_lo, TMP);  // ditto
+        }
+        __ Or(out_lo, out_lo, AT);
+        if (is_min) {
+          __ Seleqz(AT, a_hi, TMP);
+          __ Selnez(out_hi, b_hi, TMP);  // ditto but for out_hi & a_hi/b_hi
+        } else {
+          __ Selnez(AT, a_hi, TMP);
+          __ Seleqz(out_hi, b_hi, TMP);  // ditto but for out_hi & a_hi/b_hi
+        }
+        __ Or(out_hi, out_hi, AT);
+      }
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimInt);
+      Register a = locations->InAt(0).AsRegister<Register>();
+      Register b = locations->InAt(1).AsRegister<Register>();
+      Register out = locations->Out().AsRegister<Register>();
+
+      if (a == b) {
+        if (out != a) {
+          __ Move(out, a);
+        }
+      } else {
+        __ Slt(AT, b, a);
+        if (is_min) {
+          __ Seleqz(TMP, a, AT);
+          __ Selnez(AT, b, AT);
+        } else {
+          __ Selnez(TMP, a, AT);
+          __ Seleqz(AT, b, AT);
+        }
+        __ Or(out, TMP, AT);
+      }
+    }
+  } else {
+    if (type == Primitive::kPrimLong) {
+      Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+      Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
+      Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
+      Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+      Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+      MipsLabel compare_done;
+
+      if (a_lo == b_lo) {
+        if (out_lo != a_lo) {
+          __ Move(out_lo, a_lo);
+          __ Move(out_hi, a_hi);
+        }
+      } else {
+        __ Slt(TMP, a_hi, b_hi);
+        __ Bne(a_hi, b_hi, &compare_done);
+
+        __ Sltu(TMP, a_lo, b_lo);
+
+        __ Bind(&compare_done);
+
+        if (is_min) {
+          if (out_lo != a_lo) {
+            __ Movn(out_hi, a_hi, TMP);
+            __ Movn(out_lo, a_lo, TMP);
+          }
+          if (out_lo != b_lo) {
+            __ Movz(out_hi, b_hi, TMP);
+            __ Movz(out_lo, b_lo, TMP);
+          }
+        } else {
+          if (out_lo != a_lo) {
+            __ Movz(out_hi, a_hi, TMP);
+            __ Movz(out_lo, a_lo, TMP);
+          }
+          if (out_lo != b_lo) {
+            __ Movn(out_hi, b_hi, TMP);
+            __ Movn(out_lo, b_lo, TMP);
+          }
+        }
+      }
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimInt);
+      Register a = locations->InAt(0).AsRegister<Register>();
+      Register b = locations->InAt(1).AsRegister<Register>();
+      Register out = locations->Out().AsRegister<Register>();
+
+      if (a == b) {
+        if (out != a) {
+          __ Move(out, a);
+        }
+      } else {
+        __ Slt(AT, a, b);
+        if (is_min) {
+          if (out != a) {
+            __ Movn(out, a, AT);
+          }
+          if (out != b) {
+            __ Movz(out, b, AT);
+          }
+        } else {
+          if (out != a) {
+            __ Movz(out, a, AT);
+          }
+          if (out != b) {
+            __ Movn(out, b, AT);
+          }
+        }
+      }
+    }
+  }
+}
+
+// int java.lang.Math.min(int, int)
+void IntrinsicLocationsBuilderMIPS::VisitMathMinIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathMinIntInt(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(),
+            /* is_min */ true,
+            Primitive::kPrimInt,
+            IsR6(),
+            GetAssembler());
+}
+
+// long java.lang.Math.min(long, long)
+void IntrinsicLocationsBuilderMIPS::VisitMathMinLongLong(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathMinLongLong(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(),
+            /* is_min */ true,
+            Primitive::kPrimLong,
+            IsR6(),
+            GetAssembler());
+}
+
+// int java.lang.Math.max(int, int)
+void IntrinsicLocationsBuilderMIPS::VisitMathMaxIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathMaxIntInt(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(),
+            /* is_min */ false,
+            Primitive::kPrimInt,
+            IsR6(),
+            GetAssembler());
+}
+
+// long java.lang.Math.max(long, long)
+void IntrinsicLocationsBuilderMIPS::VisitMathMaxLongLong(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathMaxLongLong(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(),
+            /* is_min */ false,
+            Primitive::kPrimLong,
+            IsR6(),
+            GetAssembler());
+}
+
+// double java.lang.Math.sqrt(double)
+void IntrinsicLocationsBuilderMIPS::VisitMathSqrt(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathSqrt(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  MipsAssembler* assembler = GetAssembler();
+  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister out = locations->Out().AsFpuRegister<FRegister>();
+
+  __ SqrtD(out, in);
+}
+
+// byte libcore.io.Memory.peekByte(long address)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekByte(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekByte(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register out = invoke->GetLocations()->Out().AsRegister<Register>();
+
+  __ Lb(out, adr, 0);
+}
+
+// short libcore.io.Memory.peekShort(long address)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register out = invoke->GetLocations()->Out().AsRegister<Register>();
+
+  if (IsR6()) {
+    __ Lh(out, adr, 0);
+  } else if (IsR2OrNewer()) {
+    // Unlike for words, there are no lhl/lhr instructions to load
+    // unaligned halfwords so the code loads individual bytes, in case
+    // the address isn't halfword-aligned, and assembles them into a
+    // signed halfword.
+    __ Lb(AT, adr, 1);   // This byte must be sign-extended.
+    __ Lb(out, adr, 0);  // This byte can be either sign-extended, or
+                         // zero-extended because the following
+                         // instruction overwrites the sign bits.
+    __ Ins(out, AT, 8, 24);
+  } else {
+    __ Lbu(AT, adr, 0);  // This byte must be zero-extended.  If it's not
+                         // the "or" instruction below will destroy the upper
+                         // 24 bits of the final result.
+    __ Lb(out, adr, 1);  // This byte must be sign-extended.
+    __ Sll(out, out, 8);
+    __ Or(out, out, AT);
+  }
+}
+
+// int libcore.io.Memory.peekInt(long address)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register out = invoke->GetLocations()->Out().AsRegister<Register>();
+
+  if (IsR6()) {
+    __ Lw(out, adr, 0);
+  } else {
+    __ Lwr(out, adr, 0);
+    __ Lwl(out, adr, 3);
+  }
+}
+
+// long libcore.io.Memory.peekLong(long address)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register out_lo = invoke->GetLocations()->Out().AsRegisterPairLow<Register>();
+  Register out_hi = invoke->GetLocations()->Out().AsRegisterPairHigh<Register>();
+
+  if (IsR6()) {
+    __ Lw(out_lo, adr, 0);
+    __ Lw(out_hi, adr, 4);
+  } else {
+    __ Lwr(out_lo, adr, 0);
+    __ Lwl(out_lo, adr, 3);
+    __ Lwr(out_hi, adr, 4);
+    __ Lwl(out_hi, adr, 7);
+  }
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+// void libcore.io.Memory.pokeByte(long address, byte value)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeByte(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeByte(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register val = invoke->GetLocations()->InAt(1).AsRegister<Register>();
+
+  __ Sb(val, adr, 0);
+}
+
+// void libcore.io.Memory.pokeShort(long address, short value)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register val = invoke->GetLocations()->InAt(1).AsRegister<Register>();
+
+  if (IsR6()) {
+    __ Sh(val, adr, 0);
+  } else {
+    // Unlike for words, there are no shl/shr instructions to store
+    // unaligned halfwords so the code stores individual bytes, in case
+    // the address isn't halfword-aligned.
+    __ Sb(val, adr, 0);
+    __ Srl(AT, val, 8);
+    __ Sb(AT, adr, 1);
+  }
+}
+
+// void libcore.io.Memory.pokeInt(long address, int value)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register val = invoke->GetLocations()->InAt(1).AsRegister<Register>();
+
+  if (IsR6()) {
+    __ Sw(val, adr, 0);
+  } else {
+    __ Swr(val, adr, 0);
+    __ Swl(val, adr, 3);
+  }
+}
+
+// void libcore.io.Memory.pokeLong(long address, long value)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register val_lo = invoke->GetLocations()->InAt(1).AsRegisterPairLow<Register>();
+  Register val_hi = invoke->GetLocations()->InAt(1).AsRegisterPairHigh<Register>();
+
+  if (IsR6()) {
+    __ Sw(val_lo, adr, 0);
+    __ Sw(val_hi, adr, 4);
+  } else {
+    __ Swr(val_lo, adr, 0);
+    __ Swl(val_lo, adr, 3);
+    __ Swr(val_hi, adr, 4);
+    __ Swl(val_hi, adr, 7);
+  }
+}
+
+// Thread java.lang.Thread.currentThread()
+void IntrinsicLocationsBuilderMIPS::VisitThreadCurrentThread(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitThreadCurrentThread(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register out = invoke->GetLocations()->Out().AsRegister<Register>();
+
+  __ LoadFromOffset(kLoadWord,
+                    out,
+                    TR,
+                    Thread::PeerOffset<kMipsPointerSize>().Int32Value());
+}
+
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  bool can_call =
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile;
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           can_call ?
+                                                               LocationSummary::kCallOnSlowPath :
+                                                               LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void GenUnsafeGet(HInvoke* invoke,
+                         Primitive::Type type,
+                         bool is_volatile,
+                         bool is_R6,
+                         CodeGeneratorMIPS* codegen) {
+  LocationSummary* locations = invoke->GetLocations();
+  DCHECK((type == Primitive::kPrimInt) ||
+         (type == Primitive::kPrimLong) ||
+         (type == Primitive::kPrimNot)) << type;
+  MipsAssembler* assembler = codegen->GetAssembler();
+  // Object pointer.
+  Register base = locations->InAt(1).AsRegister<Register>();
+  // The "offset" argument is passed as a "long". Since this code is for
+  // a 32-bit processor, we can only use 32-bit addresses, so we only
+  // need the low 32-bits of offset.
+  Register offset_lo = invoke->GetLocations()->InAt(2).AsRegisterPairLow<Register>();
+
+  __ Addu(TMP, base, offset_lo);
+  if (is_volatile) {
+    __ Sync(0);
+  }
+  if (type == Primitive::kPrimLong) {
+    Register trg_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register trg_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+    if (is_R6) {
+      __ Lw(trg_lo, TMP, 0);
+      __ Lw(trg_hi, TMP, 4);
+    } else {
+      __ Lwr(trg_lo, TMP, 0);
+      __ Lwl(trg_lo, TMP, 3);
+      __ Lwr(trg_hi, TMP, 4);
+      __ Lwl(trg_hi, TMP, 7);
+    }
+  } else {
+    Register trg = locations->Out().AsRegister<Register>();
+
+    if (is_R6) {
+      __ Lw(trg, TMP, 0);
+    } else {
+      __ Lwr(trg, TMP, 0);
+      __ Lwl(trg, TMP, 3);
+    }
+  }
+}
+
+// int sun.misc.Unsafe.getInt(Object o, long offset)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafeGet(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafeGet(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, IsR6(), codegen_);
+}
+
+// int sun.misc.Unsafe.getIntVolatile(Object o, long offset)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, IsR6(), codegen_);
+}
+
+// long sun.misc.Unsafe.getLong(Object o, long offset)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLong(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLong(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, IsR6(), codegen_);
+}
+
+// long sun.misc.Unsafe.getLongVolatile(Object o, long offset)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, IsR6(), codegen_);
+}
+
+// Object sun.misc.Unsafe.getObject(Object o, long offset)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObject(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObject(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, IsR6(), codegen_);
+}
+
+// Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, IsR6(), codegen_);
+}
+
+static void CreateIntIntIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+}
+
+static void GenUnsafePut(LocationSummary* locations,
+                         Primitive::Type type,
+                         bool is_volatile,
+                         bool is_ordered,
+                         bool is_R6,
+                         CodeGeneratorMIPS* codegen) {
+  DCHECK((type == Primitive::kPrimInt) ||
+         (type == Primitive::kPrimLong) ||
+         (type == Primitive::kPrimNot)) << type;
+  MipsAssembler* assembler = codegen->GetAssembler();
+  // Object pointer.
+  Register base = locations->InAt(1).AsRegister<Register>();
+  // The "offset" argument is passed as a "long", i.e., it's 64-bits in
+  // size. Since this code is for a 32-bit processor, we can only use
+  // 32-bit addresses, so we only need the low 32-bits of offset.
+  Register offset_lo = locations->InAt(2).AsRegisterPairLow<Register>();
+
+  __ Addu(TMP, base, offset_lo);
+  if (is_volatile || is_ordered) {
+    __ Sync(0);
+  }
+  if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) {
+    Register value = locations->InAt(3).AsRegister<Register>();
+
+    if (is_R6) {
+      __ Sw(value, TMP, 0);
+    } else {
+      __ Swr(value, TMP, 0);
+      __ Swl(value, TMP, 3);
+    }
+  } else {
+    Register value_lo = locations->InAt(3).AsRegisterPairLow<Register>();
+    Register value_hi = locations->InAt(3).AsRegisterPairHigh<Register>();
+
+    if (is_R6) {
+      __ Sw(value_lo, TMP, 0);
+      __ Sw(value_hi, TMP, 4);
+    } else {
+      __ Swr(value_lo, TMP, 0);
+      __ Swl(value_lo, TMP, 3);
+      __ Swr(value_hi, TMP, 4);
+      __ Swl(value_hi, TMP, 7);
+    }
+  }
+
+  if (is_volatile) {
+    __ Sync(0);
+  }
+
+  if (type == Primitive::kPrimNot) {
+    codegen->MarkGCCard(base, locations->InAt(3).AsRegister<Register>());
+  }
+}
+
+// void sun.misc.Unsafe.putInt(Object o, long offset, int x)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafePut(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafePut(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               IsR6(),
+               codegen_);
+}
+
+// void sun.misc.Unsafe.putOrderedInt(Object o, long offset, int x)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafePutOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafePutOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               IsR6(),
+               codegen_);
+}
+
+// void sun.misc.Unsafe.putIntVolatile(Object o, long offset, int x)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafePutVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafePutVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               IsR6(),
+               codegen_);
+}
+
+// void sun.misc.Unsafe.putObject(Object o, long offset, Object x)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObject(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObject(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               IsR6(),
+               codegen_);
+}
+
+// void sun.misc.Unsafe.putOrderedObject(Object o, long offset, Object x)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               IsR6(),
+               codegen_);
+}
+
+// void sun.misc.Unsafe.putObjectVolatile(Object o, long offset, Object x)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               IsR6(),
+               codegen_);
+}
+
+// void sun.misc.Unsafe.putLong(Object o, long offset, long x)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLong(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLong(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               IsR6(),
+               codegen_);
+}
+
+// void sun.misc.Unsafe.putOrderedLong(Object o, long offset, long x)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               IsR6(),
+               codegen_);
+}
+
+// void sun.misc.Unsafe.putLongVolatile(Object o, long offset, long x)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               IsR6(),
+               codegen_);
+}
+
+static void CreateIntIntIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+  locations->SetInAt(4, Location::RequiresRegister());
+
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorMIPS* codegen) {
+  MipsAssembler* assembler = codegen->GetAssembler();
+  bool isR6 = codegen->GetInstructionSetFeatures().IsR6();
+  Register base = locations->InAt(1).AsRegister<Register>();
+  Register offset_lo = locations->InAt(2).AsRegisterPairLow<Register>();
+  Register expected = locations->InAt(3).AsRegister<Register>();
+  Register value = locations->InAt(4).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  DCHECK_NE(base, out);
+  DCHECK_NE(offset_lo, out);
+  DCHECK_NE(expected, out);
+
+  if (type == Primitive::kPrimNot) {
+    // Mark card for object assuming new value is stored.
+    codegen->MarkGCCard(base, value);
+  }
+
+  // do {
+  //   tmp_value = [tmp_ptr] - expected;
+  // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
+  // result = tmp_value != 0;
+
+  MipsLabel loop_head, exit_loop;
+  __ Addu(TMP, base, offset_lo);
+  __ Sync(0);
+  __ Bind(&loop_head);
+  if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) {
+    if (isR6) {
+      __ LlR6(out, TMP);
+    } else {
+      __ LlR2(out, TMP);
+    }
+  } else {
+      LOG(FATAL) << "Unsupported op size " << type;
+      UNREACHABLE();
+  }
+  __ Subu(out, out, expected);          // If we didn't get the 'expected'
+  __ Sltiu(out, out, 1);                // value, set 'out' to false, and
+  __ Beqz(out, &exit_loop);             // return.
+  __ Move(out, value);  // Use 'out' for the 'store conditional' instruction.
+                        // If we use 'value' directly, we would lose 'value'
+                        // in the case that the store fails.  Whether the
+                        // store succeeds, or fails, it will load the
+                        // correct boolean value into the 'out' register.
+  // This test isn't really necessary. We only support Primitive::kPrimInt,
+  // Primitive::kPrimNot, and we already verified that we're working on one
+  // of those two types. It's left here in case the code needs to support
+  // other types in the future.
+  if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) {
+    if (isR6) {
+      __ ScR6(out, TMP);
+    } else {
+      __ ScR2(out, TMP);
+    }
+  }
+  __ Beqz(out, &loop_head);     // If we couldn't do the read-modify-write
+                                // cycle atomically then retry.
+  __ Bind(&exit_loop);
+  __ Sync(0);
+}
+
+// boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASInt(HInvoke* invoke) {
+  CreateIntIntIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASInt(HInvoke* invoke) {
+  GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+}
+
+// boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x)
+void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASObject(HInvoke* invoke) {
+  CreateIntIntIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASObject(HInvoke* invoke) {
+  GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+}
+
+// int java.lang.String.compareTo(String anotherString)
+void IntrinsicLocationsBuilderMIPS::VisitStringCompareTo(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainOnly,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>()));
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringCompareTo(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  Register argument = locations->InAt(1).AsRegister<Register>();
+  SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ Beqz(argument, slow_path->GetEntryLabel());
+
+  __ LoadFromOffset(kLoadWord,
+                    T9,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pStringCompareTo).Int32Value());
+  __ Jalr(T9);
+  __ Nop();
+  __ Bind(slow_path->GetExitLabel());
+}
+
+// boolean java.lang.String.equals(Object anObject)
+void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+
+  // Temporary registers to store lengths of strings and for calculations.
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register str = locations->InAt(0).AsRegister<Register>();
+  Register arg = locations->InAt(1).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+  Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+  Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+
+  MipsLabel loop;
+  MipsLabel end;
+  MipsLabel return_true;
+  MipsLabel return_false;
+
+  // Get offsets of count, value, and class fields within a string object.
+  const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+  const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // If the register containing the pointer to "this", and the register
+  // containing the pointer to "anObject" are the same register then
+  // "this", and "anObject" are the same object and we can
+  // short-circuit the logic to a true result.
+  if (str == arg) {
+    __ LoadConst32(out, 1);
+    return;
+  }
+
+  // Check if input is null, return false if it is.
+  __ Beqz(arg, &return_false);
+
+  // Reference equality check, return true if same reference.
+  __ Beq(str, arg, &return_true);
+
+  // Instanceof check for the argument by comparing class fields.
+  // All string objects must have the same type since String cannot be subclassed.
+  // Receiver must be a string object, so its class field is equal to all strings' class fields.
+  // If the argument is a string object, its class field must be equal to receiver's class field.
+  __ Lw(temp1, str, class_offset);
+  __ Lw(temp2, arg, class_offset);
+  __ Bne(temp1, temp2, &return_false);
+
+  // Load lengths of this and argument strings.
+  __ Lw(temp1, str, count_offset);
+  __ Lw(temp2, arg, count_offset);
+  // Check if lengths are equal, return false if they're not.
+  __ Bne(temp1, temp2, &return_false);
+  // Return true if both strings are empty.
+  __ Beqz(temp1, &return_true);
+
+  // Don't overwrite input registers
+  __ Move(TMP, str);
+  __ Move(temp3, arg);
+
+  // Assertions that must hold in order to compare strings 2 characters at a time.
+  DCHECK_ALIGNED(value_offset, 4);
+  static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
+
+  // Loop to compare strings 2 characters at a time starting at the beginning of the string.
+  // Ok to do this because strings are zero-padded.
+  __ Bind(&loop);
+  __ Lw(out, TMP, value_offset);
+  __ Lw(temp2, temp3, value_offset);
+  __ Bne(out, temp2, &return_false);
+  __ Addiu(TMP, TMP, 4);
+  __ Addiu(temp3, temp3, 4);
+  __ Addiu(temp1, temp1, -2);
+  __ Bgtz(temp1, &loop);
+
+  // Return true and exit the function.
+  // If loop does not result in returning false, we return true.
+  __ Bind(&return_true);
+  __ LoadConst32(out, 1);
+  __ B(&end);
+
+  // Return false and exit the function.
+  __ Bind(&return_false);
+  __ LoadConst32(out, 0);
+  __ Bind(&end);
+}
+
+static void GenerateStringIndexOf(HInvoke* invoke,
+                                  bool start_at_zero,
+                                  MipsAssembler* assembler,
+                                  CodeGeneratorMIPS* codegen,
+                                  ArenaAllocator* allocator) {
+  LocationSummary* locations = invoke->GetLocations();
+  Register tmp_reg = start_at_zero ? locations->GetTemp(0).AsRegister<Register>() : TMP;
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
+  SlowPathCodeMIPS* slow_path = nullptr;
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
+    if (!IsUint<16>(code_point->AsIntConstant()->GetValue())) {
+      // Always needs the slow-path. We could directly dispatch to it,
+      // but this case should be rare, so for simplicity just put the
+      // full slow-path down and branch unconditionally.
+      slow_path = new (allocator) IntrinsicSlowPathMIPS(invoke);
+      codegen->AddSlowPath(slow_path);
+      __ B(slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+      return;
+    }
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
+    Register char_reg = locations->InAt(1).AsRegister<Register>();
+    // The "bltu" conditional branch tests to see if the character value
+    // fits in a valid 16-bit (MIPS halfword) value. If it doesn't then
+    // the character being searched for, if it exists in the string, is
+    // encoded using UTF-16 and stored in the string as two (16-bit)
+    // halfwords. Currently the assembly code used to implement this
+    // intrinsic doesn't support searching for a character stored as
+    // two halfwords so we fallback to using the generic implementation
+    // of indexOf().
+    __ LoadConst32(tmp_reg, std::numeric_limits<uint16_t>::max());
+    slow_path = new (allocator) IntrinsicSlowPathMIPS(invoke);
+    codegen->AddSlowPath(slow_path);
+    __ Bltu(tmp_reg, char_reg, slow_path->GetEntryLabel());
+  }
+
+  if (start_at_zero) {
+    DCHECK_EQ(tmp_reg, A2);
+    // Start-index = 0.
+    __ Clear(tmp_reg);
+  }
+
+  __ LoadFromOffset(kLoadWord,
+                    T9,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pIndexOf).Int32Value());
+  __ Jalr(T9);
+  __ Nop();
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+// int java.lang.String.indexOf(int ch)
+void IntrinsicLocationsBuilderMIPS::VisitStringIndexOf(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime
+  // calling convention. So it's best to align the inputs accordingly.
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>()));
+
+  // Need a temp for slow-path codepoint compare, and need to send start-index=0.
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringIndexOf(HInvoke* invoke) {
+  GenerateStringIndexOf(invoke,
+                        /* start_at_zero */ true,
+                        GetAssembler(),
+                        codegen_,
+                        GetAllocator());
+}
+
+// int java.lang.String.indexOf(int ch, int fromIndex)
+void IntrinsicLocationsBuilderMIPS::VisitStringIndexOfAfter(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime
+  // calling convention. So it's best to align the inputs accordingly.
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>()));
+
+  // Need a temp for slow-path codepoint compare.
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringIndexOfAfter(HInvoke* invoke) {
+  GenerateStringIndexOf(invoke,
+                        /* start_at_zero */ false,
+                        GetAssembler(),
+                        codegen_,
+                        GetAllocator());
+}
+
+// java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
+void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>()));
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register byte_array = locations->InAt(0).AsRegister<Register>();
+  SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ Beqz(byte_array, slow_path->GetEntryLabel());
+
+  __ LoadFromOffset(kLoadWord,
+                    T9,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromBytes).Int32Value());
+  __ Jalr(T9);
+  __ Nop();
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+// java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
+void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromChars(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainOnly,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>()));
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromChars(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+
+  // No need to emit code checking whether `locations->InAt(2)` is a null
+  // pointer, as callers of the native method
+  //
+  //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
+  //
+  // all include a null check on `data` before calling that method.
+
+  __ LoadFromOffset(kLoadWord,
+                    T9,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromChars).Int32Value());
+  __ Jalr(T9);
+  __ Nop();
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+// java.lang.StringFactory.newStringFromString(String toCopy)
+void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromString(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>()));
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromString(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register string_to_copy = locations->InAt(0).AsRegister<Register>();
+  SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ Beqz(string_to_copy, slow_path->GetEntryLabel());
+
+  __ LoadFromOffset(kLoadWord,
+                    T9,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromString).Int32Value());
+  __ Jalr(T9);
+  __ Nop();
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+static void GenIsInfinite(LocationSummary* locations,
+                          const Primitive::Type type,
+                          const bool isR6,
+                          MipsAssembler* assembler) {
+  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  DCHECK(type == Primitive::kPrimFloat || type == Primitive::kPrimDouble);
+
+  if (isR6) {
+    if (type == Primitive::kPrimDouble) {
+        __ ClassD(FTMP, in);
+    } else {
+        __ ClassS(FTMP, in);
+    }
+    __ Mfc1(out, FTMP);
+    __ Andi(out, out, kPositiveInfinity | kNegativeInfinity);
+    __ Sltu(out, ZERO, out);
+  } else {
+    // If one, or more, of the exponent bits is zero, then the number can't be infinite.
+    if (type == Primitive::kPrimDouble) {
+      __ MoveFromFpuHigh(TMP, in);
+      __ LoadConst32(AT, High32Bits(kPositiveInfinityDouble));
+    } else {
+      __ Mfc1(TMP, in);
+      __ LoadConst32(AT, kPositiveInfinityFloat);
+    }
+    __ Xor(TMP, TMP, AT);
+
+    __ Sll(TMP, TMP, 1);
+
+    if (type == Primitive::kPrimDouble) {
+      __ Mfc1(AT, in);
+      __ Or(TMP, TMP, AT);
+    }
+    // If any of the significand bits are one, then the number is not infinite.
+    __ Sltiu(out, TMP, 1);
+  }
+}
+
+// boolean java.lang.Float.isInfinite(float)
+void IntrinsicLocationsBuilderMIPS::VisitFloatIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitFloatIsInfinite(HInvoke* invoke) {
+  GenIsInfinite(invoke->GetLocations(), Primitive::kPrimFloat, IsR6(), GetAssembler());
+}
+
+// boolean java.lang.Double.isInfinite(double)
+void IntrinsicLocationsBuilderMIPS::VisitDoubleIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitDoubleIsInfinite(HInvoke* invoke) {
+  GenIsInfinite(invoke->GetLocations(), Primitive::kPrimDouble, IsR6(), GetAssembler());
+}
+
+static void GenHighestOneBit(LocationSummary* locations,
+                             const Primitive::Type type,
+                             bool isR6,
+                             MipsAssembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  if (type == Primitive::kPrimLong) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+    Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+    if (isR6) {
+      __ ClzR6(TMP, in_hi);
+    } else {
+      __ ClzR2(TMP, in_hi);
+    }
+    __ LoadConst32(AT, 0x80000000);
+    __ Srlv(out_hi, AT, TMP);
+    __ And(out_hi, out_hi, in_hi);
+    if (isR6) {
+      __ ClzR6(TMP, in_lo);
+    } else {
+      __ ClzR2(TMP, in_lo);
+    }
+    __ Srlv(out_lo, AT, TMP);
+    __ And(out_lo, out_lo, in_lo);
+    if (isR6) {
+      __ Seleqz(out_lo, out_lo, out_hi);
+    } else {
+      __ Movn(out_lo, ZERO, out_hi);
+    }
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    Register out = locations->Out().AsRegister<Register>();
+
+    if (isR6) {
+      __ ClzR6(TMP, in);
+    } else {
+      __ ClzR2(TMP, in);
+    }
+    __ LoadConst32(AT, 0x80000000);
+    __ Srlv(AT, AT, TMP);  // Srlv shifts in the range of [0;31] bits (lower 5 bits of arg).
+    __ And(out, AT, in);   // So this is required for 0 (=shift by 32).
+  }
+}
+
+// int java.lang.Integer.highestOneBit(int)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerHighestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerHighestOneBit(HInvoke* invoke) {
+  GenHighestOneBit(invoke->GetLocations(), Primitive::kPrimInt, IsR6(), GetAssembler());
+}
+
+// long java.lang.Long.highestOneBit(long)
+void IntrinsicLocationsBuilderMIPS::VisitLongHighestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongHighestOneBit(HInvoke* invoke) {
+  GenHighestOneBit(invoke->GetLocations(), Primitive::kPrimLong, IsR6(), GetAssembler());
+}
+
+static void GenLowestOneBit(LocationSummary* locations,
+                            const Primitive::Type type,
+                            bool isR6,
+                            MipsAssembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  if (type == Primitive::kPrimLong) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+    Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+    __ Subu(TMP, ZERO, in_lo);
+    __ And(out_lo, TMP, in_lo);
+    __ Subu(TMP, ZERO, in_hi);
+    __ And(out_hi, TMP, in_hi);
+    if (isR6) {
+      __ Seleqz(out_hi, out_hi, out_lo);
+    } else {
+      __ Movn(out_hi, ZERO, out_lo);
+    }
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    Register out = locations->Out().AsRegister<Register>();
+
+    __ Subu(TMP, ZERO, in);
+    __ And(out, TMP, in);
+  }
+}
+
+// int java.lang.Integer.lowestOneBit(int)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerLowestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerLowestOneBit(HInvoke* invoke) {
+  GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimInt, IsR6(), GetAssembler());
+}
+
+// long java.lang.Long.lowestOneBit(long)
+void IntrinsicLocationsBuilderMIPS::VisitLongLowestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongLowestOneBit(HInvoke* invoke) {
+  GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimLong, IsR6(), GetAssembler());
+}
+
+// int java.lang.Math.round(float)
+void IntrinsicLocationsBuilderMIPS::VisitMathRoundFloat(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->AddTemp(Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  MipsAssembler* assembler = GetAssembler();
+  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister half = locations->GetTemp(0).AsFpuRegister<FRegister>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  MipsLabel done;
+  MipsLabel finite;
+  MipsLabel add;
+
+  // if (in.isNaN) {
+  //   return 0;
+  // }
+  //
+  // out = floor.w.s(in);
+  //
+  // /*
+  //  * This "if" statement is only needed for the pre-R6 version of floor.w.s
+  //  * which outputs Integer.MAX_VALUE for negative numbers with magnitudes
+  //  * too large to fit in a 32-bit integer.
+  //  *
+  //  * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative
+  //  * numbers which are too large to be represented in a 32-bit signed
+  //  * integer will be processed by floor.w.s to output Integer.MIN_VALUE,
+  //  * and will no longer be processed by this "if" statement.
+  //  */
+  // if (out == Integer.MAX_VALUE) {
+  //   TMP = (in < 0.0f) ? 1 : 0;
+  //   /*
+  //    * If TMP is 1, then adding it to out will wrap its value from
+  //    * Integer.MAX_VALUE to Integer.MIN_VALUE.
+  //    */
+  //   return out += TMP;
+  // }
+  //
+  // /*
+  //  * For negative values not handled by the previous "if" statement the
+  //  * test here will correctly set the value of TMP.
+  //  */
+  // TMP = ((in - out) >= 0.5f) ? 1 : 0;
+  // return out += TMP;
+
+  // Test for NaN.
+  if (IsR6()) {
+    __ CmpUnS(FTMP, in, in);
+  } else {
+    __ CunS(in, in);
+  }
+
+  // Return zero for NaN.
+  __ Move(out, ZERO);
+  if (IsR6()) {
+    __ Bc1nez(FTMP, &done);
+  } else {
+    __ Bc1t(&done);
+  }
+
+  // out = floor(in);
+  __ FloorWS(FTMP, in);
+  __ Mfc1(out, FTMP);
+
+  if (!IsR6()) {
+    __ LoadConst32(TMP, -1);
+  }
+
+  // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0;
+  __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
+  __ Bne(AT, out, &finite);
+
+  __ Mtc1(ZERO, FTMP);
+  if (IsR6()) {
+    __ CmpLtS(FTMP, in, FTMP);
+    __ Mfc1(TMP, FTMP);
+  } else {
+    __ ColtS(in, FTMP);
+  }
+
+  __ B(&add);
+
+  __ Bind(&finite);
+
+  // TMP = (0.5f <= (in - out)) ? -1 : 0;
+  __ Cvtsw(FTMP, FTMP);  // Convert output of floor.w.s back to "float".
+  __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
+  __ SubS(FTMP, in, FTMP);
+  __ Mtc1(AT, half);
+  if (IsR6()) {
+    __ CmpLeS(FTMP, half, FTMP);
+    __ Mfc1(TMP, FTMP);
+  } else {
+    __ ColeS(half, FTMP);
+  }
+
+  __ Bind(&add);
+
+  if (!IsR6()) {
+    __ Movf(TMP, ZERO);
+  }
+
+  // Return out -= TMP.
+  __ Subu(out, out, TMP);
+
+  __ Bind(&done);
+}
+
+// Unimplemented intrinsics.
+
+UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathFloor)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathRint)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong)
+
+UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(MIPS, StringGetCharsNoCheck)
+UNIMPLEMENTED_INTRINSIC(MIPS, SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(MIPS, SystemArrayCopy)
+
+UNIMPLEMENTED_INTRINSIC(MIPS, MathCos)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathSin)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathAcos)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathAsin)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathAtan)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathAtan2)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathCbrt)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathCosh)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathExp)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathExpm1)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathHypot)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathLog)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathLog10)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathNextAfter)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathSinh)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathTan)
+UNIMPLEMENTED_INTRINSIC(MIPS, MathTanh)
+
+// 1.8.
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndAddInt)
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndAddLong)
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetInt)
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong)
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject)
+
+UNREACHABLE_INTRINSICS(MIPS)
+
+#undef __
+
+}  // namespace mips
+}  // namespace art
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
new file mode 100644
index 0000000..575a7d0
--- /dev/null
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS_H_
+
+#include "intrinsics.h"
+
+namespace art {
+
+class ArenaAllocator;
+class HInvokeStaticOrDirect;
+class HInvokeVirtual;
+
+namespace mips {
+
+class CodeGeneratorMIPS;
+class MipsAssembler;
+
+class IntrinsicLocationsBuilderMIPS FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicLocationsBuilderMIPS(CodeGeneratorMIPS* codegen);
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+  // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+  // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+  // the invoke.
+  bool TryDispatch(HInvoke* invoke);
+
+ private:
+  ArenaAllocator* arena_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS);
+};
+
+class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicCodeGeneratorMIPS(CodeGeneratorMIPS* codegen) : codegen_(codegen) {}
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+  bool IsR2OrNewer() const;
+  bool IsR6() const;
+  bool Is32BitFPU() const;
+
+ private:
+  MipsAssembler* GetAssembler();
+
+  ArenaAllocator* GetAllocator();
+
+  CodeGeneratorMIPS* codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorMIPS);
+};
+
+}  // namespace mips
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS_H_
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 56c4177..1e18540 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -87,7 +87,8 @@
 //       restored!
 class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit IntrinsicSlowPathMIPS64(HInvoke* invoke) : invoke_(invoke) { }
+  explicit IntrinsicSlowPathMIPS64(HInvoke* invoke)
+     : SlowPathCodeMIPS64(invoke), invoke_(invoke) { }
 
   void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
     CodeGeneratorMIPS64* codegen = down_cast<CodeGeneratorMIPS64*>(codegen_in);
@@ -101,11 +102,10 @@
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
                                           Location::RegisterLocation(A0));
-      codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
     } else {
-      UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
-      UNREACHABLE();
+      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0));
     }
+    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
 
     // Copy the result back to the expected output.
     Location out = invoke_->GetLocations()->Out();
@@ -116,7 +116,7 @@
     }
 
     RestoreLiveRegisters(codegen, invoke_->GetLocations());
-    __ B(GetExitLabel());
+    __ Bc(GetExitLabel());
   }
 
   const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS64"; }
@@ -163,7 +163,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 // int java.lang.Float.floatToRawIntBits(float)
@@ -172,7 +172,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -200,7 +200,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 // float java.lang.Float.intBitsToFloat(int)
@@ -209,7 +209,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -272,7 +272,9 @@
   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
 }
 
-static void GenNumberOfLeadingZeroes(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
+static void GenNumberOfLeadingZeroes(LocationSummary* locations,
+                                     bool is64bit,
+                                     Mips64Assembler* assembler) {
   GpuRegister in  = locations->InAt(0).AsRegister<GpuRegister>();
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
 
@@ -289,7 +291,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
-  GenNumberOfLeadingZeroes(invoke->GetLocations(), false, GetAssembler());
+  GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 // int java.lang.Long.numberOfLeadingZeros(long i)
@@ -298,10 +300,12 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
-  GenNumberOfLeadingZeroes(invoke->GetLocations(), true, GetAssembler());
+  GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
-static void GenNumberOfTrailingZeroes(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
+static void GenNumberOfTrailingZeroes(LocationSummary* locations,
+                                      bool is64bit,
+                                      Mips64Assembler* assembler) {
   Location in = locations->InAt(0);
   Location out = locations->Out();
 
@@ -324,7 +328,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
-  GenNumberOfTrailingZeroes(invoke->GetLocations(), false, GetAssembler());
+  GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 // int java.lang.Long.numberOfTrailingZeros(long i)
@@ -333,131 +337,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
-  GenNumberOfTrailingZeroes(invoke->GetLocations(), true, GetAssembler());
-}
-
-static void GenRotateRight(HInvoke* invoke,
-                           Primitive::Type type,
-                           Mips64Assembler* assembler) {
-  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
-  LocationSummary* locations = invoke->GetLocations();
-  GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
-  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-
-  if (invoke->InputAt(1)->IsIntConstant()) {
-    uint32_t shift = static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue());
-    if (type == Primitive::kPrimInt) {
-      shift &= 0x1f;
-      __ Rotr(out, in, shift);
-    } else {
-      shift &= 0x3f;
-      if (shift < 32) {
-        __ Drotr(out, in, shift);
-      } else {
-        shift &= 0x1f;
-        __ Drotr32(out, in, shift);
-      }
-    }
-  } else {
-    GpuRegister shamt = locations->InAt(1).AsRegister<GpuRegister>();
-    if (type == Primitive::kPrimInt) {
-      __ Rotrv(out, in, shamt);
-    } else {
-      __ Drotrv(out, in, shamt);
-    }
-  }
-}
-
-// int java.lang.Integer.rotateRight(int i, int distance)
-void IntrinsicLocationsBuilderMIPS64::VisitIntegerRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitIntegerRotateRight(HInvoke* invoke) {
-  GenRotateRight(invoke, Primitive::kPrimInt, GetAssembler());
-}
-
-// int java.lang.Long.rotateRight(long i, int distance)
-void IntrinsicLocationsBuilderMIPS64::VisitLongRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitLongRotateRight(HInvoke* invoke) {
-  GenRotateRight(invoke, Primitive::kPrimLong, GetAssembler());
-}
-
-static void GenRotateLeft(HInvoke* invoke,
-                           Primitive::Type type,
-                           Mips64Assembler* assembler) {
-  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
-  LocationSummary* locations = invoke->GetLocations();
-  GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
-  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-
-  if (invoke->InputAt(1)->IsIntConstant()) {
-    int32_t shift = -static_cast<int32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue());
-    if (type == Primitive::kPrimInt) {
-      shift &= 0x1f;
-      __ Rotr(out, in, shift);
-    } else {
-      shift &= 0x3f;
-      if (shift < 32) {
-        __ Drotr(out, in, shift);
-      } else {
-        shift &= 0x1f;
-        __ Drotr32(out, in, shift);
-      }
-    }
-  } else {
-    GpuRegister shamt = locations->InAt(1).AsRegister<GpuRegister>();
-    if (type == Primitive::kPrimInt) {
-      __ Subu(TMP, ZERO, shamt);
-      __ Rotrv(out, in, TMP);
-    } else {
-      __ Dsubu(TMP, ZERO, shamt);
-      __ Drotrv(out, in, TMP);
-    }
-  }
-}
-
-// int java.lang.Integer.rotateLeft(int i, int distance)
-void IntrinsicLocationsBuilderMIPS64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  GenRotateLeft(invoke, Primitive::kPrimInt, GetAssembler());
-}
-
-// int java.lang.Long.rotateLeft(long i, int distance)
-void IntrinsicLocationsBuilderMIPS64::VisitLongRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitLongRotateLeft(HInvoke* invoke) {
-  GenRotateLeft(invoke, Primitive::kPrimLong, GetAssembler());
+  GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 static void GenReverse(LocationSummary* locations,
@@ -505,6 +385,92 @@
   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
 }
 
+static void GenBitCount(LocationSummary* locations,
+                        const Primitive::Type type,
+                        Mips64Assembler* assembler) {
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+  //
+  // A generalization of the best bit counting method to integers of
+  // bit-widths up to 128 (parameterized by type T) is this:
+  //
+  // v = v - ((v >> 1) & (T)~(T)0/3);                           // temp
+  // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);      // temp
+  // v = (v + (v >> 4)) & (T)~(T)0/255*15;                      // temp
+  // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; // count
+  //
+  // For comparison, for 32-bit quantities, this algorithm can be executed
+  // using 20 MIPS instructions (the calls to LoadConst32() generate two
+  // machine instructions each for the values being used in this algorithm).
+  // A(n unrolled) loop-based algorithm requires 25 instructions.
+  //
+  // For a 64-bit operand this can be performed in 24 instructions compared
+  // to a(n unrolled) loop based algorithm which requires 38 instructions.
+  //
+  // There are algorithms which are faster in the cases where very few
+  // bits are set but the algorithm here attempts to minimize the total
+  // number of instructions executed even when a large number of bits
+  // are set.
+
+  if (type == Primitive::kPrimInt) {
+    __ Srl(TMP, in, 1);
+    __ LoadConst32(AT, 0x55555555);
+    __ And(TMP, TMP, AT);
+    __ Subu(TMP, in, TMP);
+    __ LoadConst32(AT, 0x33333333);
+    __ And(out, TMP, AT);
+    __ Srl(TMP, TMP, 2);
+    __ And(TMP, TMP, AT);
+    __ Addu(TMP, out, TMP);
+    __ Srl(out, TMP, 4);
+    __ Addu(out, out, TMP);
+    __ LoadConst32(AT, 0x0F0F0F0F);
+    __ And(out, out, AT);
+    __ LoadConst32(TMP, 0x01010101);
+    __ MulR6(out, out, TMP);
+    __ Srl(out, out, 24);
+  } else if (type == Primitive::kPrimLong) {
+    __ Dsrl(TMP, in, 1);
+    __ LoadConst64(AT, 0x5555555555555555L);
+    __ And(TMP, TMP, AT);
+    __ Dsubu(TMP, in, TMP);
+    __ LoadConst64(AT, 0x3333333333333333L);
+    __ And(out, TMP, AT);
+    __ Dsrl(TMP, TMP, 2);
+    __ And(TMP, TMP, AT);
+    __ Daddu(TMP, out, TMP);
+    __ Dsrl(out, TMP, 4);
+    __ Daddu(out, out, TMP);
+    __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL);
+    __ And(out, out, AT);
+    __ LoadConst64(TMP, 0x0101010101010101L);
+    __ Dmul(out, out, TMP);
+    __ Dsrl32(out, out, 24);
+  }
+}
+
+// int java.lang.Integer.bitCount(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// int java.lang.Long.bitCount(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
 static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
   FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
   FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
@@ -522,7 +488,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 // float java.lang.Math.abs(float)
@@ -531,7 +497,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
@@ -563,7 +529,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 // long java.lang.Math.abs(long)
@@ -572,30 +538,76 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 static void GenMinMaxFP(LocationSummary* locations,
                         bool is_min,
-                        bool is_double,
+                        Primitive::Type type,
                         Mips64Assembler* assembler) {
-  FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
-  FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+  FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>();
   FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
 
-  if (is_double) {
+  Mips64Label noNaNs;
+  Mips64Label done;
+  FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
+
+  // When Java computes min/max it prefers a NaN to a number; the
+  // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
+  // the inputs is a NaN and the other is a valid number, the MIPS
+  // instruction will return the number; Java wants the NaN value
+  // returned. This is why there is extra logic preceding the use of
+  // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
+  // NaN, return the NaN, otherwise return the min/max.
+  if (type == Primitive::kPrimDouble) {
+    __ CmpUnD(FTMP, a, b);
+    __ Bc1eqz(FTMP, &noNaNs);
+
+    // One of the inputs is a NaN
+    __ CmpEqD(ftmp, a, a);
+    // If a == a then b is the NaN, otherwise a is the NaN.
+    __ SelD(ftmp, a, b);
+
+    if (ftmp != out) {
+      __ MovD(out, ftmp);
+    }
+
+    __ Bc(&done);
+
+    __ Bind(&noNaNs);
+
     if (is_min) {
-      __ MinD(out, lhs, rhs);
+      __ MinD(out, a, b);
     } else {
-      __ MaxD(out, lhs, rhs);
+      __ MaxD(out, a, b);
     }
   } else {
+    DCHECK_EQ(type, Primitive::kPrimFloat);
+    __ CmpUnS(FTMP, a, b);
+    __ Bc1eqz(FTMP, &noNaNs);
+
+    // One of the inputs is a NaN
+    __ CmpEqS(ftmp, a, a);
+    // If a == a then b is the NaN, otherwise a is the NaN.
+    __ SelS(ftmp, a, b);
+
+    if (ftmp != out) {
+      __ MovS(out, ftmp);
+    }
+
+    __ Bc(&done);
+
+    __ Bind(&noNaNs);
+
     if (is_min) {
-      __ MinS(out, lhs, rhs);
+      __ MinS(out, a, b);
     } else {
-      __ MaxS(out, lhs, rhs);
+      __ MaxS(out, a, b);
     }
   }
+
+  __ Bind(&done);
 }
 
 static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -613,7 +625,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, Primitive::kPrimDouble, GetAssembler());
 }
 
 // float java.lang.Math.min(float, float)
@@ -622,7 +634,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, Primitive::kPrimFloat, GetAssembler());
 }
 
 // double java.lang.Math.max(double, double)
@@ -631,7 +643,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, Primitive::kPrimDouble, GetAssembler());
 }
 
 // float java.lang.Math.max(float, float)
@@ -640,7 +652,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, Primitive::kPrimFloat, GetAssembler());
 }
 
 static void GenMinMax(LocationSummary* locations,
@@ -650,49 +662,55 @@
   GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
 
-  // Some architectures, such as ARM and MIPS (prior to r6), have a
-  // conditional move instruction which only changes the target
-  // (output) register if the condition is true (MIPS prior to r6 had
-  // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always
-  // change the target (output) register.  If the condition is true the
-  // output register gets the contents of the "rs" register; otherwise,
-  // the output register is set to zero. One consequence of this is
-  // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6
-  // needs to use a pair of SELEQZ/SELNEZ instructions.  After
-  // executing this pair of instructions one of the output registers
-  // from the pair will necessarily contain zero. Then the code ORs the
-  // output registers from the SELEQZ/SELNEZ instructions to get the
-  // final result.
-  //
-  // The initial test to see if the output register is same as the
-  // first input register is needed to make sure that value in the
-  // first input register isn't clobbered before we've finished
-  // computing the output value. The logic in the corresponding else
-  // clause performs the same task but makes sure the second input
-  // register isn't clobbered in the event that it's the same register
-  // as the output register; the else clause also handles the case
-  // where the output register is distinct from both the first, and the
-  // second input registers.
-  if (out == lhs) {
-    __ Slt(AT, rhs, lhs);
-    if (is_min) {
-      __ Seleqz(out, lhs, AT);
-      __ Selnez(AT, rhs, AT);
-    } else {
-      __ Selnez(out, lhs, AT);
-      __ Seleqz(AT, rhs, AT);
+  if (lhs == rhs) {
+    if (out != lhs) {
+      __ Move(out, lhs);
     }
   } else {
-    __ Slt(AT, lhs, rhs);
-    if (is_min) {
-      __ Seleqz(out, rhs, AT);
-      __ Selnez(AT, lhs, AT);
+    // Some architectures, such as ARM and MIPS (prior to r6), have a
+    // conditional move instruction which only changes the target
+    // (output) register if the condition is true (MIPS prior to r6 had
+    // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always
+    // change the target (output) register.  If the condition is true the
+    // output register gets the contents of the "rs" register; otherwise,
+    // the output register is set to zero. One consequence of this is
+    // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6
+    // needs to use a pair of SELEQZ/SELNEZ instructions.  After
+    // executing this pair of instructions one of the output registers
+    // from the pair will necessarily contain zero. Then the code ORs the
+    // output registers from the SELEQZ/SELNEZ instructions to get the
+    // final result.
+    //
+    // The initial test to see if the output register is same as the
+    // first input register is needed to make sure that value in the
+    // first input register isn't clobbered before we've finished
+    // computing the output value. The logic in the corresponding else
+    // clause performs the same task but makes sure the second input
+    // register isn't clobbered in the event that it's the same register
+    // as the output register; the else clause also handles the case
+    // where the output register is distinct from both the first, and the
+    // second input registers.
+    if (out == lhs) {
+      __ Slt(AT, rhs, lhs);
+      if (is_min) {
+        __ Seleqz(out, lhs, AT);
+        __ Selnez(AT, rhs, AT);
+      } else {
+        __ Selnez(out, lhs, AT);
+        __ Seleqz(AT, rhs, AT);
+      }
     } else {
-      __ Selnez(out, rhs, AT);
-      __ Seleqz(AT, lhs, AT);
+      __ Slt(AT, lhs, rhs);
+      if (is_min) {
+        __ Seleqz(out, rhs, AT);
+        __ Selnez(AT, lhs, AT);
+      } else {
+        __ Selnez(out, rhs, AT);
+        __ Seleqz(AT, lhs, AT);
+      }
     }
+    __ Or(out, out, AT);
   }
-  __ Or(out, out, AT);
 }
 
 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -710,7 +728,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
 }
 
 // long java.lang.Math.min(long, long)
@@ -719,7 +737,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
 }
 
 // int java.lang.Math.max(int, int)
@@ -728,7 +746,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
 }
 
 // long java.lang.Math.max(long, long)
@@ -737,7 +755,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
 }
 
 // double java.lang.Math.sqrt(double)
@@ -754,17 +772,19 @@
   __ SqrtD(out, in);
 }
 
-static void CreateFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateFPToFP(ArenaAllocator* arena,
+                         HInvoke* invoke,
+                         Location::OutputOverlap overlaps = Location::kOutputOverlap) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresFpuRegister());
-  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+  locations->SetOut(Location::RequiresFpuRegister(), overlaps);
 }
 
 // double java.lang.Math.rint(double)
 void IntrinsicLocationsBuilderMIPS64::VisitMathRint(HInvoke* invoke) {
-  CreateFPToFP(arena_, invoke);
+  CreateFPToFP(arena_, invoke, Location::kNoOutputOverlap);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathRint(HInvoke* invoke) {
@@ -788,15 +808,22 @@
                                              kQuietNaN |
                                              kSignalingNaN;
 
-void IntrinsicCodeGeneratorMIPS64::VisitMathFloor(HInvoke* invoke) {
-  LocationSummary* locations = invoke->GetLocations();
-  Mips64Assembler* assembler = GetAssembler();
+enum FloatRoundingMode {
+  kFloor,
+  kCeil,
+};
+
+static void GenRoundingMode(LocationSummary* locations,
+                            FloatRoundingMode mode,
+                            Mips64Assembler* assembler) {
   FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
   FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
 
-  Label done;
+  DCHECK_NE(in, out);
 
-  // double floor(double in) {
+  Mips64Label done;
+
+  // double floor/ceil(double in) {
   //     if in.isNaN || in.isInfinite || in.isZero {
   //         return in;
   //     }
@@ -806,19 +833,23 @@
   __ MovD(out, in);
   __ Bnezc(AT, &done);
 
-  //     Long outLong = floor(in);
+  //     Long outLong = floor/ceil(in);
   //     if outLong == Long.MAX_VALUE {
-  //         // floor() has almost certainly returned a value which
-  //         // can't be successfully represented as a signed 64-bit
-  //         // number.  Java expects that the input value will be
-  //         // returned in these cases.
-  //         // There is also a small probability that floor(in)
-  //         // correctly truncates the input value to Long.MAX_VALUE.  In
-  //         // that case, this exception handling code still does the
-  //         // correct thing.
+  //         // floor()/ceil() has almost certainly returned a value
+  //         // which can't be successfully represented as a signed
+  //         // 64-bit number.  Java expects that the input value will
+  //         // be returned in these cases.
+  //         // There is also a small probability that floor(in)/ceil(in)
+  //         // correctly truncates/rounds up the input value to
+  //         // Long.MAX_VALUE.  In that case, this exception handling
+  //         // code still does the correct thing.
   //         return in;
   //     }
-  __ FloorLD(out, in);
+  if (mode == kFloor) {
+    __ FloorLD(out, in);
+  } else  if (mode == kCeil) {
+    __ CeilLD(out, in);
+  }
   __ Dmfc1(AT, out);
   __ MovD(out, in);
   __ LoadConst64(TMP, kPrimLongMax);
@@ -832,53 +863,162 @@
   // }
 }
 
+void IntrinsicCodeGeneratorMIPS64::VisitMathFloor(HInvoke* invoke) {
+  GenRoundingMode(invoke->GetLocations(), kFloor, GetAssembler());
+}
+
 // double java.lang.Math.ceil(double)
 void IntrinsicLocationsBuilderMIPS64::VisitMathCeil(HInvoke* invoke) {
   CreateFPToFP(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathCeil(HInvoke* invoke) {
-  LocationSummary* locations = invoke->GetLocations();
-  Mips64Assembler* assembler = GetAssembler();
+  GenRoundingMode(invoke->GetLocations(), kCeil, GetAssembler());
+}
+
+static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Primitive::Type type) {
   FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
-  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+  FpuRegister half = locations->GetTemp(0).AsFpuRegister<FpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
 
-  Label done;
+  DCHECK(type == Primitive::kPrimFloat || type == Primitive::kPrimDouble);
 
-  // double ceil(double in) {
-  //     if in.isNaN || in.isInfinite || in.isZero {
-  //         return in;
-  //     }
-  __ ClassD(out, in);
-  __ Dmfc1(AT, out);
-  __ Andi(AT, AT, kFPLeaveUnchanged);   // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN
-  __ MovD(out, in);
-  __ Bnezc(AT, &done);
+  Mips64Label done;
+  Mips64Label finite;
+  Mips64Label add;
 
-  //     Long outLong = ceil(in);
-  //     if outLong == Long.MAX_VALUE {
-  //         // ceil() has almost certainly returned a value which
-  //         // can't be successfully represented as a signed 64-bit
-  //         // number.  Java expects that the input value will be
-  //         // returned in these cases.
-  //         // There is also a small probability that ceil(in)
-  //         // correctly rounds up the input value to Long.MAX_VALUE.  In
-  //         // that case, this exception handling code still does the
-  //         // correct thing.
-  //         return in;
-  //     }
-  __ CeilLD(out, in);
-  __ Dmfc1(AT, out);
-  __ MovD(out, in);
-  __ LoadConst64(TMP, kPrimLongMax);
-  __ Beqc(AT, TMP, &done);
-
-  //     double out = outLong;
-  //     return out;
-  __ Dmtc1(AT, out);
-  __ Cvtdl(out, out);
-  __ Bind(&done);
+  // if (in.isNaN) {
+  //   return 0;
   // }
+  //
+  // out = floor(in);
+  //
+  // /*
+  //  * TODO: Amend this code when emulator FCSR.NAN2008=1 bug is fixed.
+  //  *
+  //  * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative
+  //  * numbers which are too large to be represented in a 32-/64-bit
+  //  * signed integer will be processed by floor.X.Y to output
+  //  * Integer.MIN_VALUE/Long.MIN_VALUE, and will no longer be
+  //  * processed by this "if" statement.
+  //  *
+  //  * However, this bug in the 64-bit MIPS emulator causes the
+  //  * behavior of floor.X.Y to be the same as pre-R6 implementations
+  //  * of MIPS64.  When that bug is fixed this logic should be amended.
+  //  */
+  // if (out == MAX_VALUE) {
+  //   TMP = (in < 0.0) ? 1 : 0;
+  //   /*
+  //    * If TMP is 1, then adding it to out will wrap its value from
+  //    * MAX_VALUE to MIN_VALUE.
+  //    */
+  //   return out += TMP;
+  // }
+  //
+  // /*
+  //  * For negative values not handled by the previous "if" statement the
+  //  * test here will correctly set the value of TMP.
+  //  */
+  // TMP = ((in - out) >= 0.5) ? 1 : 0;
+  // return out += TMP;
+
+  // Test for NaN.
+  if (type == Primitive::kPrimDouble) {
+    __ CmpUnD(FTMP, in, in);
+  } else {
+    __ CmpUnS(FTMP, in, in);
+  }
+
+  // Return zero for NaN.
+  __ Move(out, ZERO);
+  __ Bc1nez(FTMP, &done);
+
+  // out = floor(in);
+  if (type == Primitive::kPrimDouble) {
+    __ FloorLD(FTMP, in);
+    __ Dmfc1(out, FTMP);
+  } else {
+    __ FloorWS(FTMP, in);
+    __ Mfc1(out, FTMP);
+  }
+
+  // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0;
+  if (type == Primitive::kPrimDouble) {
+    __ LoadConst64(AT, std::numeric_limits<int64_t>::max());
+  } else {
+    __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
+  }
+  __ Bnec(AT, out, &finite);
+
+  if (type == Primitive::kPrimDouble) {
+    __ Dmtc1(ZERO, FTMP);
+    __ CmpLtD(FTMP, in, FTMP);
+    __ Dmfc1(AT, FTMP);
+  } else {
+    __ Mtc1(ZERO, FTMP);
+    __ CmpLtS(FTMP, in, FTMP);
+    __ Mfc1(AT, FTMP);
+  }
+
+  __ Bc(&add);
+
+  __ Bind(&finite);
+
+  // TMP = (0.5 <= (in - out)) ? -1 : 0;
+  if (type == Primitive::kPrimDouble) {
+    __ Cvtdl(FTMP, FTMP);  // Convert output of floor.l.d back to "double".
+    __ LoadConst64(AT, bit_cast<int64_t, double>(0.5));
+    __ SubD(FTMP, in, FTMP);
+    __ Dmtc1(AT, half);
+    __ CmpLeD(FTMP, half, FTMP);
+    __ Dmfc1(AT, FTMP);
+  } else {
+    __ Cvtsw(FTMP, FTMP);  // Convert output of floor.w.s back to "float".
+    __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
+    __ SubS(FTMP, in, FTMP);
+    __ Mtc1(AT, half);
+    __ CmpLeS(FTMP, half, FTMP);
+    __ Mfc1(AT, FTMP);
+  }
+
+  __ Bind(&add);
+
+  // Return out -= TMP.
+  if (type == Primitive::kPrimDouble) {
+    __ Dsubu(out, out, AT);
+  } else {
+    __ Subu(out, out, AT);
+  }
+
+  __ Bind(&done);
+}
+
+// int java.lang.Math.round(float)
+void IntrinsicLocationsBuilderMIPS64::VisitMathRoundFloat(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->AddTemp(Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathRoundFloat(HInvoke* invoke) {
+  GenRound(invoke->GetLocations(), GetAssembler(), Primitive::kPrimFloat);
+}
+
+// long java.lang.Math.round(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathRoundDouble(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->AddTemp(Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathRoundDouble(HInvoke* invoke) {
+  GenRound(invoke->GetLocations(), GetAssembler(), Primitive::kPrimDouble);
 }
 
 // byte libcore.io.Memory.peekByte(long address)
@@ -1065,7 +1205,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 
 // int sun.misc.Unsafe.getIntVolatile(Object o, long offset)
@@ -1074,7 +1214,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 
 // long sun.misc.Unsafe.getLong(Object o, long offset)
@@ -1083,7 +1223,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 
 // long sun.misc.Unsafe.getLongVolatile(Object o, long offset)
@@ -1092,7 +1232,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 
 // Object sun.misc.Unsafe.getObject(Object o, long offset)
@@ -1101,7 +1241,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 
 // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset)
@@ -1110,7 +1250,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 
 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) {
@@ -1161,7 +1301,8 @@
   }
 
   if (type == Primitive::kPrimNot) {
-    codegen->MarkGCCard(base, value);
+    bool value_can_be_null = true;  // TODO: Worth finding out this information?
+    codegen->MarkGCCard(base, value, value_can_be_null);
   }
 }
 
@@ -1171,7 +1312,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePut(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putOrderedInt(Object o, long offset, int x)
@@ -1180,7 +1325,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putIntVolatile(Object o, long offset, int x)
@@ -1189,7 +1338,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putObject(Object o, long offset, Object x)
@@ -1198,7 +1351,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObject(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putOrderedObject(Object o, long offset, Object x)
@@ -1207,7 +1364,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putObjectVolatile(Object o, long offset, Object x)
@@ -1216,7 +1377,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putLong(Object o, long offset, long x)
@@ -1225,7 +1390,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLong(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putOrderedLong(Object o, long offset, long x)
@@ -1234,7 +1403,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putLongVolatile(Object o, long offset, long x)
@@ -1243,59 +1416,110 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 
-// char java.lang.String.charAt(int index)
-void IntrinsicLocationsBuilderMIPS64::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+  locations->SetInAt(4, Location::RequiresRegister());
+
+  locations->SetOut(Location::RequiresRegister());
 }
 
-void IntrinsicCodeGeneratorMIPS64::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = invoke->GetLocations();
-  Mips64Assembler* assembler = GetAssembler();
-
-  // Location of reference to data array
-  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
-  // Location of count
-  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
-
-  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
-  GpuRegister idx = locations->InAt(1).AsRegister<GpuRegister>();
+static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorMIPS64* codegen) {
+  Mips64Assembler* assembler = codegen->GetAssembler();
+  GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>();
+  GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>();
+  GpuRegister expected = locations->InAt(3).AsRegister<GpuRegister>();
+  GpuRegister value = locations->InAt(4).AsRegister<GpuRegister>();
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
 
-  // TODO: Maybe we can support range check elimination. Overall,
-  //       though, I think it's not worth the cost.
-  // TODO: For simplicity, the index parameter is requested in a
-  //       register, so different from Quick we will not optimize the
-  //       code for constants (which would save a register).
+  DCHECK_NE(base, out);
+  DCHECK_NE(offset, out);
+  DCHECK_NE(expected, out);
 
-  SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke);
-  codegen_->AddSlowPath(slow_path);
+  if (type == Primitive::kPrimNot) {
+    // Mark card for object assuming new value is stored.
+    bool value_can_be_null = true;  // TODO: Worth finding out this information?
+    codegen->MarkGCCard(base, value, value_can_be_null);
+  }
 
-  // Load the string size
-  __ Lw(TMP, obj, count_offset);
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  // Revert to slow path if idx is too large, or negative
-  __ Bgeuc(idx, TMP, slow_path->GetEntryLabel());
+  // do {
+  //   tmp_value = [tmp_ptr] - expected;
+  // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
+  // result = tmp_value != 0;
 
-  // out = obj[2*idx].
-  __ Sll(TMP, idx, 1);                  // idx * 2
-  __ Daddu(TMP, TMP, obj);              // Address of char at location idx
-  __ Lhu(out, TMP, value_offset);       // Load char at location idx
+  Mips64Label loop_head, exit_loop;
+  __ Daddu(TMP, base, offset);
+  __ Sync(0);
+  __ Bind(&loop_head);
+  if (type == Primitive::kPrimLong) {
+    __ Lld(out, TMP);
+  } else {
+    // Note: We will need a read barrier here, when read barrier
+    // support is added to the MIPS64 back end.
+    __ Ll(out, TMP);
+  }
+  __ Dsubu(out, out, expected);         // If we didn't get the 'expected'
+  __ Sltiu(out, out, 1);                // value, set 'out' to false, and
+  __ Beqzc(out, &exit_loop);            // return.
+  __ Move(out, value);  // Use 'out' for the 'store conditional' instruction.
+                        // If we use 'value' directly, we would lose 'value'
+                        // in the case that the store fails.  Whether the
+                        // store succeeds, or fails, it will load the
+                        // correct boolean value into the 'out' register.
+  if (type == Primitive::kPrimLong) {
+    __ Scd(out, TMP);
+  } else {
+    __ Sc(out, TMP);
+  }
+  __ Beqzc(out, &loop_head);    // If we couldn't do the read-modify-write
+                                // cycle atomically then retry.
+  __ Bind(&exit_loop);
+  __ Sync(0);
+}
 
-  __ Bind(slow_path->GetExitLabel());
+// boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASInt(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASInt(HInvoke* invoke) {
+  GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+}
+
+// boolean sun.misc.Unsafe.compareAndSwapLong(Object o, long offset, long expected, long x)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASLong(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASLong(HInvoke* invoke) {
+  GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_);
+}
+
+// boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASObject(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASObject(HInvoke* invoke) {
+  GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
 }
 
 // int java.lang.String.compareTo(String anotherString)
 void IntrinsicLocationsBuilderMIPS64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1317,15 +1541,116 @@
   __ Beqzc(argument, slow_path->GetEntryLabel());
 
   __ LoadFromOffset(kLoadDoubleword,
-                    TMP,
+                    T9,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize,
-                                            pStringCompareTo).Int32Value());
-  __ Jalr(TMP);
+                    QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, pStringCompareTo).Int32Value());
+  __ Jalr(T9);
   __ Nop();
   __ Bind(slow_path->GetExitLabel());
 }
 
+// boolean java.lang.String.equals(Object anObject)
+void IntrinsicLocationsBuilderMIPS64::VisitStringEquals(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+
+  // Temporary registers to store lengths of strings and for calculations.
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  GpuRegister str = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister arg = locations->InAt(1).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>();
+  GpuRegister temp2 = locations->GetTemp(1).AsRegister<GpuRegister>();
+  GpuRegister temp3 = locations->GetTemp(2).AsRegister<GpuRegister>();
+
+  Mips64Label loop;
+  Mips64Label end;
+  Mips64Label return_true;
+  Mips64Label return_false;
+
+  // Get offsets of count, value, and class fields within a string object.
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+  const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // If the register containing the pointer to "this", and the register
+  // containing the pointer to "anObject" are the same register then
+  // "this", and "anObject" are the same object and we can
+  // short-circuit the logic to a true result.
+  if (str == arg) {
+    __ LoadConst64(out, 1);
+    return;
+  }
+
+  // Check if input is null, return false if it is.
+  __ Beqzc(arg, &return_false);
+
+  // Reference equality check, return true if same reference.
+  __ Beqc(str, arg, &return_true);
+
+  // Instanceof check for the argument by comparing class fields.
+  // All string objects must have the same type since String cannot be subclassed.
+  // Receiver must be a string object, so its class field is equal to all strings' class fields.
+  // If the argument is a string object, its class field must be equal to receiver's class field.
+  __ Lw(temp1, str, class_offset);
+  __ Lw(temp2, arg, class_offset);
+  __ Bnec(temp1, temp2, &return_false);
+
+  // Load lengths of this and argument strings.
+  __ Lw(temp1, str, count_offset);
+  __ Lw(temp2, arg, count_offset);
+  // Check if lengths are equal, return false if they're not.
+  __ Bnec(temp1, temp2, &return_false);
+  // Return true if both strings are empty.
+  __ Beqzc(temp1, &return_true);
+
+  // Don't overwrite input registers
+  __ Move(TMP, str);
+  __ Move(temp3, arg);
+
+  // Assertions that must hold in order to compare strings 4 characters at a time.
+  DCHECK_ALIGNED(value_offset, 8);
+  static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
+
+  // Loop to compare strings 4 characters at a time starting at the beginning of the string.
+  // Ok to do this because strings are zero-padded to be 8-byte aligned.
+  __ Bind(&loop);
+  __ Ld(out, TMP, value_offset);
+  __ Ld(temp2, temp3, value_offset);
+  __ Bnec(out, temp2, &return_false);
+  __ Daddiu(TMP, TMP, 8);
+  __ Daddiu(temp3, temp3, 8);
+  __ Addiu(temp1, temp1, -4);
+  __ Bgtzc(temp1, &loop);
+
+  // Return true and exit the function.
+  // If loop does not result in returning false, we return true.
+  __ Bind(&return_true);
+  __ LoadConst64(out, 1);
+  __ Bc(&end);
+
+  // Return false and exit the function.
+  __ Bind(&return_false);
+  __ LoadConst64(out, 0);
+  __ Bind(&end);
+}
+
 static void GenerateStringIndexOf(HInvoke* invoke,
                                   Mips64Assembler* assembler,
                                   CodeGeneratorMIPS64* codegen,
@@ -1337,21 +1662,22 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check for code points > 0xFFFF. Either a slow-path check when we
-  // don't know statically, or directly dispatch if we have a constant.
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCodeMIPS64* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
-    if (!IsUint<16>(invoke->InputAt(1)->AsIntConstant()->GetValue())) {
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
+    if (!IsUint<16>(code_point->AsIntConstant()->GetValue())) {
       // Always needs the slow-path. We could directly dispatch to it,
       // but this case should be rare, so for simplicity just put the
       // full slow-path down and branch unconditionally.
       slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke);
       codegen->AddSlowPath(slow_path);
-      __ B(slow_path->GetEntryLabel());
+      __ Bc(slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     GpuRegister char_reg = locations->InAt(1).AsRegister<GpuRegister>();
     __ LoadConst32(tmp_reg, std::numeric_limits<uint16_t>::max());
     slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke);
@@ -1363,16 +1689,14 @@
     DCHECK_EQ(tmp_reg, A2);
     // Start-index = 0.
     __ Clear(tmp_reg);
-  } else {
-    __ Slt(TMP, A2, ZERO);      // if fromIndex < 0
-    __ Seleqz(A2, A2, TMP);     //     fromIndex = 0
   }
 
   __ LoadFromOffset(kLoadDoubleword,
-                    TMP,
+                    T9,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pIndexOf).Int32Value());
-  __ Jalr(TMP);
+                    QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, pIndexOf).Int32Value());
+  CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
+  __ Jalr(T9);
   __ Nop();
 
   if (slow_path != nullptr) {
@@ -1383,7 +1707,7 @@
 // int java.lang.String.indexOf(int ch)
 void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -1398,13 +1722,13 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOf(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
 }
 
 // int java.lang.String.indexOf(int ch, int fromIndex)
 void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -1417,13 +1741,14 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+  GenerateStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
 }
 
-// java.lang.String.String(byte[] bytes)
+// java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
 void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1444,19 +1769,21 @@
   __ Beqzc(byte_array, slow_path->GetEntryLabel());
 
   __ LoadFromOffset(kLoadDoubleword,
-                    TMP,
+                    T9,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromBytes).Int32Value());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
-  __ Jalr(TMP);
+                    QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize,
+                                            pAllocStringFromBytes).Int32Value());
+  CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
+  __ Jalr(T9);
   __ Nop();
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
 
-// java.lang.String.String(char[] value)
+// java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
 void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1469,24 +1796,30 @@
 void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromChars(HInvoke* invoke) {
   Mips64Assembler* assembler = GetAssembler();
 
+  // No need to emit code checking whether `locations->InAt(2)` is a null
+  // pointer, as callers of the native method
+  //
+  //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
+  //
+  // all include a null check on `data` before calling that method.
   __ LoadFromOffset(kLoadDoubleword,
-                    TMP,
+                    T9,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromChars).Int32Value());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
-  __ Jalr(TMP);
+                    QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize,
+                                            pAllocStringFromChars).Int32Value());
+  CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
+  __ Jalr(T9);
   __ Nop();
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
-// java.lang.String.String(String original)
+// java.lang.StringFactory.newStringFromString(String toCopy)
 void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
   Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
   locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>()));
 }
@@ -1501,37 +1834,160 @@
   __ Beqzc(string_to_copy, slow_path->GetEntryLabel());
 
   __ LoadFromOffset(kLoadDoubleword,
-                    TMP,
+                    T9,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromString).Int32Value());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
-  __ Jalr(TMP);
+                    QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize,
+                                            pAllocStringFromString).Int32Value());
+  CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
+  __ Jalr(T9);
   __ Nop();
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
 
-// Unimplemented intrinsics.
+static void GenIsInfinite(LocationSummary* locations,
+                          bool is64bit,
+                          Mips64Assembler* assembler) {
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
 
-#define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
-void IntrinsicLocationsBuilderMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
-}                                                                                      \
-void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
+  if (is64bit) {
+    __ ClassD(FTMP, in);
+  } else {
+    __ ClassS(FTMP, in);
+  }
+  __ Mfc1(out, FTMP);
+  __ Andi(out, out, kPositiveInfinity | kNegativeInfinity);
+  __ Sltu(out, ZERO, out);
 }
 
-UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
+// boolean java.lang.Float.isInfinite(float)
+void IntrinsicLocationsBuilderMIPS64::VisitFloatIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
 
-UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
-UNIMPLEMENTED_INTRINSIC(StringEquals)
+void IntrinsicCodeGeneratorMIPS64::VisitFloatIsInfinite(HInvoke* invoke) {
+  GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
 
-UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
-UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
+// boolean java.lang.Double.isInfinite(double)
+void IntrinsicLocationsBuilderMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
 
-#undef UNIMPLEMENTED_INTRINSIC
+void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) {
+  GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+
+static void GenHighestOneBit(LocationSummary* locations,
+                             Primitive::Type type,
+                             Mips64Assembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << PrettyDescriptor(type);
+
+  GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (type == Primitive::kPrimLong) {
+    __ Dclz(TMP, in);
+    __ LoadConst64(AT, INT64_C(0x8000000000000000));
+    __ Dsrlv(out, AT, TMP);
+  } else {
+    __ Clz(TMP, in);
+    __ LoadConst32(AT, 0x80000000);
+    __ Srlv(out, AT, TMP);
+  }
+  // For either value of "type", when "in" is zero, "out" should also
+  // be zero. Without this extra "and" operation, when "in" is zero,
+  // "out" would be either Integer.MIN_VALUE, or Long.MIN_VALUE because
+  // the MIPS logical shift operations "dsrlv", and "srlv" don't use
+  // the shift amount (TMP) directly; they use either (TMP % 64) or
+  // (TMP % 32), respectively.
+  __ And(out, out, in);
+}
+
+// int java.lang.Integer.highestOneBit(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+  GenHighestOneBit(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// long java.lang.Long.highestOneBit(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongHighestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongHighestOneBit(HInvoke* invoke) {
+  GenHighestOneBit(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+static void GenLowestOneBit(LocationSummary* locations,
+                            Primitive::Type type,
+                            Mips64Assembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << PrettyDescriptor(type);
+
+  GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (type == Primitive::kPrimLong) {
+    __ Dsubu(TMP, ZERO, in);
+  } else {
+    __ Subu(TMP, ZERO, in);
+  }
+  __ And(out, TMP, in);
+}
+
+// int java.lang.Integer.lowestOneBit(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+  GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// long java.lang.Long.lowestOneBit(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongLowestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongLowestOneBit(HInvoke* invoke) {
+  GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringGetCharsNoCheck)
+UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy)
+
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathCos)
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathSin)
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathAcos)
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathAsin)
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathAtan)
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathAtan2)
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathCbrt)
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathCosh)
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathExp)
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathExpm1)
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathHypot)
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathLog)
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathLog10)
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathNextAfter)
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathSinh)
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathTan)
+UNIMPLEMENTED_INTRINSIC(MIPS64, MathTanh)
+
+// 1.8.
+UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddInt)
+UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddLong)
+UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetInt)
+UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong)
+UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject)
+
+UNREACHABLE_INTRINSICS(MIPS64)
 
 #undef __
 
diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h
index 1481d24..4137fbd 100644
--- a/compiler/optimizing/intrinsics_mips64.h
+++ b/compiler/optimizing/intrinsics_mips64.h
@@ -36,7 +36,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -60,7 +60,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h
index e70afd2..c1f9ae6 100644
--- a/compiler/optimizing/intrinsics_utils.h
+++ b/compiler/optimizing/intrinsics_utils.h
@@ -39,7 +39,7 @@
 template <typename TDexCallingConvention>
 class IntrinsicSlowPath : public SlowPathCode {
  public:
-  explicit IntrinsicSlowPath(HInvoke* invoke) : invoke_(invoke) { }
+  explicit IntrinsicSlowPath(HInvoke* invoke) : SlowPathCode(invoke), invoke_(invoke) { }
 
   Location MoveArguments(CodeGenerator* codegen) {
     TDexCallingConvention calling_convention_visitor;
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 040bf6a..49d6c19 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -37,10 +37,12 @@
 
 static constexpr int kDoubleNaNHigh = 0x7FF80000;
 static constexpr int kDoubleNaNLow = 0x00000000;
-static constexpr int kFloatNaN = 0x7FC00000;
+static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
+static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
 
 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
-  : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
+  : arena_(codegen->GetGraph()->GetArena()),
+    codegen_(codegen) {
 }
 
 
@@ -55,7 +57,10 @@
 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
   Dispatch(invoke);
   LocationSummary* res = invoke->GetLocations();
-  return res != nullptr && res->Intrinsified();
+  if (res == nullptr) {
+    return false;
+  }
+  return res->Intrinsified();
 }
 
 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
@@ -65,6 +70,105 @@
 
 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
 
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
+ public:
+  explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
+      : SlowPathCode(instruction) {
+    DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(instruction_->IsInvokeStaticOrDirect())
+        << "Unexpected instruction in read barrier arraycopy slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+    int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+    uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+    Register src = locations->InAt(0).AsRegister<Register>();
+    Location src_pos = locations->InAt(1);
+    Register dest = locations->InAt(2).AsRegister<Register>();
+    Location dest_pos = locations->InAt(3);
+    Location length = locations->InAt(4);
+    Location temp1_loc = locations->GetTemp(0);
+    Register temp1 = temp1_loc.AsRegister<Register>();
+    Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+    Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+
+    __ Bind(GetEntryLabel());
+    // In this code path, registers `temp1`, `temp2`, and `temp3`
+    // (resp.) are not used for the base source address, the base
+    // destination address, and the end source address (resp.), as in
+    // other SystemArrayCopy intrinsic code paths.  Instead they are
+    // (resp.) used for:
+    // - the loop index (`i`);
+    // - the source index (`src_index`) and the loaded (source)
+    //   reference (`value`); and
+    // - the destination index (`dest_index`).
+
+    // i = 0
+    __ xorl(temp1, temp1);
+    NearLabel loop;
+    __ Bind(&loop);
+    // value = src_array[i + src_pos]
+    if (src_pos.IsConstant()) {
+      int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+      int32_t adjusted_offset = offset + constant * element_size;
+      __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
+    } else {
+      __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
+      __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
+    }
+    __ MaybeUnpoisonHeapReference(temp2);
+    // TODO: Inline the mark bit check before calling the runtime?
+    // value = ReadBarrier::Mark(value)
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
+    // explanations.)
+    DCHECK_NE(temp2, ESP);
+    DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
+    // This runtime call does not require a stack map.
+    x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    __ MaybePoisonHeapReference(temp2);
+    // dest_array[i + dest_pos] = value
+    if (dest_pos.IsConstant()) {
+      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      int32_t adjusted_offset = offset + constant * element_size;
+      __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
+    } else {
+      __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
+      __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
+    }
+    // ++i
+    __ addl(temp1, Immediate(1));
+    // if (i != length) goto loop
+    x86_codegen->GenerateIntCompare(temp1_loc, length);
+    __ j(kNotEqual, &loop);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
+};
+
+#undef __
+
 #define __ assembler->
 
 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
@@ -122,31 +226,31 @@
 }
 
 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  CreateFPToIntLocations(arena_, invoke, true);
+  CreateFPToIntLocations(arena_, invoke, /* is64bit */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  CreateIntToFPLocations(arena_, invoke, true);
+  CreateIntToFPLocations(arena_, invoke, /* is64bit */ true);
 }
 
 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  CreateFPToIntLocations(arena_, invoke, false);
+  CreateFPToIntLocations(arena_, invoke, /* is64bit */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  CreateIntToFPLocations(arena_, invoke, false);
+  CreateIntToFPLocations(arena_, invoke, /* is64bit */ false);
 }
 
 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -240,15 +344,38 @@
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresFpuRegister());
-  // TODO: Allow x86 to work with memory. This requires assembler support, see below.
-  // locations->SetInAt(0, Location::Any());               // X86 can work on memory directly.
   locations->SetOut(Location::SameAsFirstInput());
+  HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(static_or_direct != nullptr);
+  if (static_or_direct->HasSpecialInput() &&
+      invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
+    // We need addressibility for the constant area.
+    locations->SetInAt(1, Location::RequiresRegister());
+    // We need a temporary to hold the constant.
+    locations->AddTemp(Location::RequiresFpuRegister());
+  }
 }
 
-static void MathAbsFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
+static void MathAbsFP(LocationSummary* locations,
+                      bool is64bit,
+                      X86Assembler* assembler,
+                      CodeGeneratorX86* codegen) {
   Location output = locations->Out();
 
-  if (output.IsFpuRegister()) {
+  DCHECK(output.IsFpuRegister());
+  if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
+    DCHECK(locations->InAt(1).IsRegister());
+    // We also have a constant area pointer.
+    Register constant_area = locations->InAt(1).AsRegister<Register>();
+    XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+    if (is64bit) {
+      __ movsd(temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF), constant_area));
+      __ andpd(output.AsFpuRegister<XmmRegister>(), temp);
+    } else {
+      __ movss(temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF), constant_area));
+      __ andps(output.AsFpuRegister<XmmRegister>(), temp);
+    }
+  } else {
     // Create the right constant on an aligned stack.
     if (is64bit) {
       __ subl(ESP, Immediate(8));
@@ -261,19 +388,6 @@
       __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
     }
     __ addl(ESP, Immediate(16));
-  } else {
-    // TODO: update when assember support is available.
-    UNIMPLEMENTED(FATAL) << "Needs assembler support.";
-//  Once assembler support is available, in-memory operations look like this:
-//    if (is64bit) {
-//      DCHECK(output.IsDoubleStackSlot());
-//      __ andl(Address(Register(RSP), output.GetHighStackIndex(kX86WordSize)),
-//              Immediate(0x7FFFFFFF));
-//    } else {
-//      DCHECK(output.IsStackSlot());
-//      // Can use and with a literal directly.
-//      __ andl(Address(Register(RSP), output.GetStackIndex()), Immediate(0x7FFFFFFF));
-//    }
   }
 }
 
@@ -282,7 +396,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
@@ -290,7 +404,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
 }
 
 static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
@@ -372,8 +486,11 @@
   GenAbsLong(invoke->GetLocations(), GetAssembler());
 }
 
-static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
-                        X86Assembler* assembler) {
+static void GenMinMaxFP(LocationSummary* locations,
+                        bool is_min,
+                        bool is_double,
+                        X86Assembler* assembler,
+                        CodeGeneratorX86* codegen) {
   Location op1_loc = locations->InAt(0);
   Location op2_loc = locations->InAt(1);
   Location out_loc = locations->Out();
@@ -434,15 +551,26 @@
 
   // NaN handling.
   __ Bind(&nan);
-  if (is_double) {
-    __ pushl(Immediate(kDoubleNaNHigh));
-    __ pushl(Immediate(kDoubleNaNLow));
-    __ movsd(out, Address(ESP, 0));
-    __ addl(ESP, Immediate(8));
+  // Do we have a constant area pointer?
+  if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) {
+    DCHECK(locations->InAt(2).IsRegister());
+    Register constant_area = locations->InAt(2).AsRegister<Register>();
+    if (is_double) {
+      __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, constant_area));
+    } else {
+      __ movss(out, codegen->LiteralInt32Address(kFloatNaN, constant_area));
+    }
   } else {
-    __ pushl(Immediate(kFloatNaN));
-    __ movss(out, Address(ESP, 0));
-    __ addl(ESP, Immediate(4));
+    if (is_double) {
+      __ pushl(Immediate(kDoubleNaNHigh));
+      __ pushl(Immediate(kDoubleNaNLow));
+      __ movsd(out, Address(ESP, 0));
+      __ addl(ESP, Immediate(8));
+    } else {
+      __ pushl(Immediate(kFloatNaN));
+      __ movss(out, Address(ESP, 0));
+      __ addl(ESP, Immediate(4));
+    }
   }
   __ jmp(&done);
 
@@ -467,6 +595,12 @@
   // The following is sub-optimal, but all we can do for now. It would be fine to also accept
   // the second input to be the output (we can simply swap inputs).
   locations->SetOut(Location::SameAsFirstInput());
+  HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(static_or_direct != nullptr);
+  if (static_or_direct->HasSpecialInput() &&
+      invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
@@ -474,7 +608,11 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(),
+              /* is_min */ true,
+              /* is_double */ true,
+              GetAssembler(),
+              codegen_);
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
@@ -482,7 +620,11 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(),
+              /* is_min */ true,
+              /* is_double */ false,
+              GetAssembler(),
+              codegen_);
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
@@ -490,7 +632,11 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(),
+              /* is_min */ false,
+              /* is_double */ true,
+              GetAssembler(),
+              codegen_);
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
@@ -498,7 +644,11 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(),
+              /* is_min */ false,
+              /* is_double */ false,
+              GetAssembler(),
+              codegen_);
 }
 
 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
@@ -581,7 +731,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
@@ -589,7 +739,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
@@ -597,7 +747,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
@@ -605,7 +755,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
 }
 
 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -655,7 +805,7 @@
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall);
+                                                           LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::FpuRegisterLocation(XMM0));
@@ -701,15 +851,20 @@
   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
 }
 
-// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble,
-// as it needs 64 bit instructions.
 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
   // Do we have instruction support?
   if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
+    HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
+    DCHECK(static_or_direct != nullptr);
     LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                               LocationSummary::kNoCall,
                                                               kIntrinsified);
     locations->SetInAt(0, Location::RequiresFpuRegister());
+    if (static_or_direct->HasSpecialInput() &&
+        invoke->InputAt(
+            static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
+      locations->SetInAt(1, Location::RequiresRegister());
+    }
     locations->SetOut(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresFpuRegister());
     locations->AddTemp(Location::RequiresFpuRegister());
@@ -718,7 +873,7 @@
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kCall);
+                                                            LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(EAX));
@@ -728,90 +883,244 @@
 
 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
-  if (locations->WillCall()) {
+  if (locations->WillCall()) {  // TODO: can we reach this?
     InvokeOutOfLineIntrinsic(codegen_, invoke);
     return;
   }
 
-  // Implement RoundFloat as t1 = floor(input + 0.5f);  convert to int.
   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+  XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
   Register out = locations->Out().AsRegister<Register>();
-  XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-  XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
-  NearLabel done, nan;
+  NearLabel skip_incr, done;
   X86Assembler* assembler = GetAssembler();
 
-  // Generate 0.5 into inPlusPointFive.
-  __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
-  __ movd(inPlusPointFive, out);
+  // Since no direct x86 rounding instruction matches the required semantics,
+  // this intrinsic is implemented as follows:
+  //  result = floor(in);
+  //  if (in - result >= 0.5f)
+  //    result = result + 1.0f;
+  __ movss(t2, in);
+  __ roundss(t1, in, Immediate(1));
+  __ subss(t2, t1);
+  if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
+    // Direct constant area available.
+    Register constant_area = locations->InAt(1).AsRegister<Register>();
+    __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area));
+    __ j(kBelow, &skip_incr);
+    __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area));
+    __ Bind(&skip_incr);
+  } else {
+    // No constant area: go through stack.
+    __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
+    __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
+    __ comiss(t2, Address(ESP, 4));
+    __ j(kBelow, &skip_incr);
+    __ addss(t1, Address(ESP, 0));
+    __ Bind(&skip_incr);
+    __ addl(ESP, Immediate(8));
+  }
 
-  // Add in the input.
-  __ addss(inPlusPointFive, in);
-
-  // And truncate to an integer.
-  __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
-
+  // Final conversion to an integer. Unfortunately this also does not have a
+  // direct x86 instruction, since NaN should map to 0 and large positive
+  // values need to be clipped to the extreme value.
   __ movl(out, Immediate(kPrimIntMax));
-  // maxInt = int-to-float(out)
-  __ cvtsi2ss(maxInt, out);
-
-  // if inPlusPointFive >= maxInt goto done
-  __ comiss(inPlusPointFive, maxInt);
-  __ j(kAboveEqual, &done);
-
-  // if input == NaN goto nan
-  __ j(kUnordered, &nan);
-
-  // output = float-to-int-truncate(input)
-  __ cvttss2si(out, inPlusPointFive);
-  __ jmp(&done);
-  __ Bind(&nan);
-
-  //  output = 0
-  __ xorl(out, out);
+  __ cvtsi2ss(t2, out);
+  __ comiss(t1, t2);
+  __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
+  __ movl(out, Immediate(0));  // does not change flags
+  __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
+  __ cvttss2si(out, t1);
   __ Bind(&done);
 }
 
-void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
-  // The inputs plus one temp.
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
+static void CreateFPToFPCallLocations(ArenaAllocator* arena,
+                                      HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kCallOnMainOnly,
+                                                           kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetOut(Location::FpuRegisterLocation(XMM0));
 }
 
-void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
+static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
   LocationSummary* locations = invoke->GetLocations();
+  DCHECK(locations->WillCall());
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  X86Assembler* assembler = codegen->GetAssembler();
 
-  // Location of reference to data array.
-  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
-  // Location of count.
-  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  // We need some place to pass the parameters.
+  __ subl(ESP, Immediate(16));
+  __ cfi().AdjustCFAOffset(16);
 
-  Register obj = locations->InAt(0).AsRegister<Register>();
-  Register idx = locations->InAt(1).AsRegister<Register>();
-  Register out = locations->Out().AsRegister<Register>();
+  // Pass the parameters at the bottom of the stack.
+  __ movsd(Address(ESP, 0), XMM0);
 
-  // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
-  //       the cost.
-  // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
-  //       we will not optimize the code for constants (which would save a register).
+  // If we have a second parameter, pass it next.
+  if (invoke->GetNumberOfArguments() == 2) {
+    __ movsd(Address(ESP, 8), XMM1);
+  }
 
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
-  codegen_->AddSlowPath(slow_path);
+  // Now do the actual call.
+  __ fs()->call(Address::Absolute(GetThreadOffset<kX86PointerSize>(entry)));
 
-  X86Assembler* assembler = GetAssembler();
+  // Extract the return value from the FP stack.
+  __ fstpl(Address(ESP, 0));
+  __ movsd(XMM0, Address(ESP, 0));
 
-  __ cmpl(idx, Address(obj, count_offset));
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ j(kAboveEqual, slow_path->GetEntryLabel());
+  // And clean up the stack.
+  __ addl(ESP, Immediate(16));
+  __ cfi().AdjustCFAOffset(-16);
 
-  // out = out[2*idx].
-  __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
+  codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+}
 
-  __ Bind(slow_path->GetExitLabel());
+void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCos);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickSin);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAcos);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAsin);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAtan);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCbrt);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCosh);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickExp);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickExpm1);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickLog);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickLog10);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickSinh);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickTan);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickTanh);
+}
+
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
+                                        HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kCallOnMainOnly,
+                                                           kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+  locations->SetOut(Location::FpuRegisterLocation(XMM0));
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAtan2);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickHypot);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
 }
 
 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
@@ -867,30 +1176,45 @@
 static void CheckPosition(X86Assembler* assembler,
                           Location pos,
                           Register input,
-                          Register length,
+                          Location length,
                           SlowPathCode* slow_path,
-                          Register input_len,
-                          Register temp) {
-  // Where is the length in the String?
+                          Register temp,
+                          bool length_is_input_length = false) {
+  // Where is the length in the Array?
   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
 
   if (pos.IsConstant()) {
     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
     if (pos_const == 0) {
-      // Check that length(input) >= length.
-      __ cmpl(Address(input, length_offset), length);
-      __ j(kLess, slow_path->GetEntryLabel());
+      if (!length_is_input_length) {
+        // Check that length(input) >= length.
+        if (length.IsConstant()) {
+          __ cmpl(Address(input, length_offset),
+                  Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+        } else {
+          __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
+        }
+        __ j(kLess, slow_path->GetEntryLabel());
+      }
     } else {
       // Check that length(input) >= pos.
-      __ movl(input_len, Address(input, length_offset));
-      __ cmpl(input_len, Immediate(pos_const));
+      __ movl(temp, Address(input, length_offset));
+      __ subl(temp, Immediate(pos_const));
       __ j(kLess, slow_path->GetEntryLabel());
 
       // Check that (length(input) - pos) >= length.
-      __ leal(temp, Address(input_len, -pos_const));
-      __ cmpl(temp, length);
+      if (length.IsConstant()) {
+        __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+      } else {
+        __ cmpl(temp, length.AsRegister<Register>());
+      }
       __ j(kLess, slow_path->GetEntryLabel());
     }
+  } else if (length_is_input_length) {
+    // The only way the copy can succeed is if pos is zero.
+    Register pos_reg = pos.AsRegister<Register>();
+    __ testl(pos_reg, pos_reg);
+    __ j(kNotEqual, slow_path->GetEntryLabel());
   } else {
     // Check that pos >= 0.
     Register pos_reg = pos.AsRegister<Register>();
@@ -904,7 +1228,11 @@
     // Check that (length(input) - pos) >= length.
     __ movl(temp, Address(input, length_offset));
     __ subl(temp, pos_reg);
-    __ cmpl(temp, length);
+    if (length.IsConstant()) {
+      __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      __ cmpl(temp, length.AsRegister<Register>());
+    }
     __ j(kLess, slow_path->GetEntryLabel());
   }
 }
@@ -956,11 +1284,11 @@
     __ movl(count, length.AsRegister<Register>());
   }
 
-  // Validity checks: source.
-  CheckPosition(assembler, srcPos, src, count, slow_path, src_base, dest_base);
+  // Validity checks: source. Use src_base as a temporary register.
+  CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base);
 
-  // Validity checks: dest.
-  CheckPosition(assembler, destPos, dest, count, slow_path, src_base, dest_base);
+  // Validity checks: dest. Use src_base as a temporary register.
+  CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base);
 
   // Okay, everything checks out.  Finally time to do the copy.
   // Check assumption that sizeof(Char) is 2 (used in scaling below).
@@ -994,7 +1322,7 @@
 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1015,7 +1343,7 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pStringCompareTo)));
+  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pStringCompareTo)));
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1061,11 +1389,11 @@
     __ j(kEqual, &return_false);
   }
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
   if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
     __ movl(ecx, Address(str, class_offset));
     __ cmpl(ecx, Address(arg, class_offset));
     __ j(kNotEqual, &return_false);
@@ -1160,10 +1488,11 @@
   DCHECK_EQ(out, EDI);
 
   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
-  // or directly dispatch if we have a constant.
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCode* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
-    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
+    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
     std::numeric_limits<uint16_t>::max()) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
@@ -1173,7 +1502,7 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
     slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
     codegen->AddSlowPath(slow_path);
@@ -1249,24 +1578,25 @@
 }
 
 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
-  CreateStringIndexOfLocations(invoke, arena_, true);
+  CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
 }
 
 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
 }
 
 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
-  CreateStringIndexOfLocations(invoke, arena_, false);
+  CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
 }
 
 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+  GenerateStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
 }
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1286,14 +1616,15 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromBytes)));
+  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromBytes)));
+  CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1305,13 +1636,20 @@
 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
   X86Assembler* assembler = GetAssembler();
 
-  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromChars)));
+  // No need to emit code checking whether `locations->InAt(2)` is a null
+  // pointer, as callers of the native method
+  //
+  //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
+  //
+  // all include a null check on `data` before calling that method.
+  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromChars)));
+  CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1328,7 +1666,9 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromString)));
+  __ fs()->call(
+      Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromString)));
+  CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1568,29 +1908,51 @@
 
 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
-  GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>()));
+  GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
 }
 
-static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
-                         bool is_volatile, X86Assembler* assembler) {
-  Register base = locations->InAt(1).AsRegister<Register>();
-  Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
-  Location output = locations->Out();
+static void GenUnsafeGet(HInvoke* invoke,
+                         Primitive::Type type,
+                         bool is_volatile,
+                         CodeGeneratorX86* codegen) {
+  X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+  Location base_loc = locations->InAt(1);
+  Register base = base_loc.AsRegister<Register>();
+  Location offset_loc = locations->InAt(2);
+  Register offset = offset_loc.AsRegisterPairLow<Register>();
+  Location output_loc = locations->Out();
 
   switch (type) {
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      Register output = output_loc.AsRegister<Register>();
+      __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+      break;
+    }
+
     case Primitive::kPrimNot: {
-      Register output_reg = output.AsRegister<Register>();
-      __ movl(output_reg, Address(base, offset, ScaleFactor::TIMES_1, 0));
-      if (type == Primitive::kPrimNot) {
-        __ MaybeUnpoisonHeapReference(output_reg);
+      Register output = output_loc.AsRegister<Register>();
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          Address src(base, offset, ScaleFactor::TIMES_1, 0);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(
+              invoke, output_loc, base, src, temp, /* needs_null_check */ false);
+        } else {
+          __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+          codegen->GenerateReadBarrierSlow(
+              invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+        }
+      } else {
+        __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+        __ MaybeUnpoisonHeapReference(output);
       }
       break;
     }
 
     case Primitive::kPrimLong: {
-        Register output_lo = output.AsRegisterPairLow<Register>();
-        Register output_hi = output.AsRegisterPairHigh<Register>();
+        Register output_lo = output_loc.AsRegisterPairLow<Register>();
+        Register output_hi = output_loc.AsRegisterPairHigh<Register>();
         if (is_volatile) {
           // Need to use a XMM to read atomically.
           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
@@ -1611,64 +1973,77 @@
   }
 }
 
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke,
-                                          bool is_long, bool is_volatile) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type,
+                                          bool is_volatile) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           can_call ?
+                                                               LocationSummary::kCallOnSlowPath :
+                                                               LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  if (is_long) {
+  if (type == Primitive::kPrimLong) {
     if (is_volatile) {
       // Need to use XMM to read volatile.
       locations->AddTemp(Location::RequiresFpuRegister());
-      locations->SetOut(Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
     } else {
       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
     }
   } else {
-    locations->SetOut(Location::RequiresRegister());
+    locations->SetOut(Location::RequiresRegister(),
+                      can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+  }
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, false, false);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, false, true);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, false, false);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, true, true);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, false, false);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, false, true);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true);
 }
 
 
 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 
 
@@ -1695,31 +2070,40 @@
 }
 
 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, true);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimInt, invoke, /* is_volatile */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, true);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimNot, invoke, /* is_volatile */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, true);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimLong, invoke, /* is_volatile */ true);
 }
 
 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
@@ -1757,7 +2141,7 @@
   }
 
   if (is_volatile) {
-    __ mfence();
+    codegen->MemoryFence();
   }
 
   if (type == Primitive::kPrimNot) {
@@ -1771,31 +2155,31 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 
 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
@@ -1837,6 +2221,17 @@
 }
 
 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  if (kEmitCompilerReadBarrier) {
+    return;
+  }
+
   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
 }
 
@@ -1888,15 +2283,24 @@
       __ PoisonHeapReference(value);
     }
 
+    // TODO: Add a read barrier for the reference stored in the object
+    // before attempting the CAS, similar to the one in the
+    // art::Unsafe_compareAndSwapObject JNI implementation.
+    //
+    // Note that this code is not (yet) used when read barriers are
+    // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject).
+    DCHECK(!kEmitCompilerReadBarrier);
     __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
 
-    // locked cmpxchg has full barrier semantics, and we don't need
+    // LOCK CMPXCHG has full barrier semantics, and we don't need
     // scheduling barriers at this time.
 
     // Convert ZF into the boolean result.
     __ setb(kZero, out.AsRegister<Register>());
     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
 
+    // If heap poisoning is enabled, we need to unpoison the values
+    // that were poisoned earlier.
     if (kPoisonHeapReferences) {
       if (base_equals_value) {
         // `value` has been moved to a temporary register, no need to
@@ -1929,8 +2333,8 @@
       LOG(FATAL) << "Unexpected CAS type " << type;
     }
 
-    // locked cmpxchg has full barrier semantics, and we don't need
-    // scheduling barriers at this time.
+    // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
+    // don't need scheduling barriers at this time.
 
     // Convert ZF into the boolean result.
     __ setb(kZero, out.AsRegister<Register>());
@@ -1947,6 +2351,15 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  DCHECK(!kEmitCompilerReadBarrier);
+
   GenCAS(Primitive::kPrimNot, invoke, codegen_);
 }
 
@@ -1972,7 +2385,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
+  X86Assembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   Register reg = locations->InAt(0).AsRegister<Register>();
@@ -2003,7 +2416,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
+  X86Assembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
@@ -2030,6 +2443,79 @@
   SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
 }
 
+static void CreateBitCountLocations(
+    ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
+  if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
+    // Do nothing if there is no popcnt support. This results in generating
+    // a call for the intrinsic rather than direct code.
+    return;
+  }
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  if (is_long) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
+  locations->SetInAt(0, Location::Any());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenBitCount(X86Assembler* assembler,
+                        CodeGeneratorX86* codegen,
+                        HInvoke* invoke, bool is_long) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location src = locations->InAt(0);
+  Register out = locations->Out().AsRegister<Register>();
+
+  if (invoke->InputAt(0)->IsConstant()) {
+    // Evaluate this at compile time.
+    int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+    int32_t result = is_long
+        ? POPCOUNT(static_cast<uint64_t>(value))
+        : POPCOUNT(static_cast<uint32_t>(value));
+    codegen->Load32BitValue(out, result);
+    return;
+  }
+
+  // Handle the non-constant cases.
+  if (!is_long) {
+    if (src.IsRegister()) {
+      __ popcntl(out, src.AsRegister<Register>());
+    } else {
+      DCHECK(src.IsStackSlot());
+      __ popcntl(out, Address(ESP, src.GetStackIndex()));
+    }
+  } else {
+    // The 64-bit case needs to worry about two parts.
+    Register temp = locations->GetTemp(0).AsRegister<Register>();
+    if (src.IsRegisterPair()) {
+      __ popcntl(temp, src.AsRegisterPairLow<Register>());
+      __ popcntl(out, src.AsRegisterPairHigh<Register>());
+    } else {
+      DCHECK(src.IsDoubleStackSlot());
+      __ popcntl(temp, Address(ESP, src.GetStackIndex()));
+      __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
+    }
+    __ addl(out, temp);
+  }
+}
+
+void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false);
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
+}
+
 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
@@ -2042,7 +2528,9 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-static void GenLeadingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+static void GenLeadingZeros(X86Assembler* assembler,
+                            CodeGeneratorX86* codegen,
+                            HInvoke* invoke, bool is_long) {
   LocationSummary* locations = invoke->GetLocations();
   Location src = locations->InAt(0);
   Register out = locations->Out().AsRegister<Register>();
@@ -2055,11 +2543,7 @@
     } else {
       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
     }
-    if (value == 0) {
-      __ xorl(out, out);
-    } else {
-      __ movl(out, Immediate(value));
-    }
+    codegen->Load32BitValue(out, value);
     return;
   }
 
@@ -2126,8 +2610,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenLeadingZeros(assembler, invoke, /* is_long */ false);
+  GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
 }
 
 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -2135,8 +2618,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenLeadingZeros(assembler, invoke, /* is_long */ true);
+  GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
 static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
@@ -2151,7 +2633,9 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-static void GenTrailingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+static void GenTrailingZeros(X86Assembler* assembler,
+                             CodeGeneratorX86* codegen,
+                             HInvoke* invoke, bool is_long) {
   LocationSummary* locations = invoke->GetLocations();
   Location src = locations->InAt(0);
   Register out = locations->Out().AsRegister<Register>();
@@ -2164,11 +2648,7 @@
     } else {
       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
     }
-    if (value == 0) {
-      __ xorl(out, out);
-    } else {
-      __ movl(out, Immediate(value));
-    }
+    codegen->Load32BitValue(out, value);
     return;
   }
 
@@ -2222,8 +2702,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenTrailingZeros(assembler, invoke, /* is_long */ false);
+  GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
 }
 
 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -2231,75 +2710,544 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenTrailingZeros(assembler, invoke, /* is_long */ true);
+  GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
-static void CreateRotateLocations(ArenaAllocator* arena, HInvoke* invoke) {
-  LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
+void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
+  if (kEmitCompilerReadBarrier) {
+    // Do not intrinsify this call with the read barrier configuration.
+    return;
+  }
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresRegister());
-  // The shift count needs to be in CL or a constant.
-  locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, invoke->InputAt(1)));
   locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
 }
 
-static void GenRotate(X86Assembler* assembler, HInvoke* invoke, bool is_left) {
+void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
+  DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations = invoke->GetLocations();
-  Register first_reg = locations->InAt(0).AsRegister<Register>();
-  Location second = locations->InAt(1);
+  X86Assembler* assembler = GetAssembler();
 
-  if (second.IsRegister()) {
-    Register second_reg = second.AsRegister<Register>();
-    if (is_left) {
-      __ roll(first_reg, second_reg);
-    } else {
-      __ rorl(first_reg, second_reg);
-    }
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load ArtMethod first.
+  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(invoke_direct != nullptr);
+  Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
+      invoke_direct, locations->GetTemp(0));
+  DCHECK(temp_loc.Equals(locations->GetTemp(0)));
+  Register temp = temp_loc.AsRegister<Register>();
+
+  // Now get declaring class.
+  __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
+  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
+  DCHECK_NE(slow_path_flag_offset, 0u);
+  DCHECK_NE(disable_flag_offset, 0u);
+  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
+
+  // Check static flags preventing us for using intrinsic.
+  if (slow_path_flag_offset == disable_flag_offset + 1) {
+    __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
   } else {
-    Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
-    if (is_left) {
-      __ roll(first_reg, imm);
-    } else {
-      __ rorl(first_reg, imm);
+    __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+    __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  }
+
+  // Fast path.
+  __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ MaybeUnpoisonHeapReference(out);
+  __ Bind(slow_path->GetExitLabel());
+}
+
+static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
+  return instruction->InputAt(input0) == instruction->InputAt(input1);
+}
+
+void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
+  CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
+  if (invoke->GetLocations() != nullptr) {
+    // Need a byte register for marking.
+    invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
+
+    static constexpr size_t kSrc = 0;
+    static constexpr size_t kSrcPos = 1;
+    static constexpr size_t kDest = 2;
+    static constexpr size_t kDestPos = 3;
+    static constexpr size_t kLength = 4;
+
+    if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
+        !invoke->InputAt(kDestPos)->IsIntConstant() &&
+        !invoke->InputAt(kLength)->IsIntConstant()) {
+      if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
+          !IsSameInput(invoke, kSrcPos, kLength) &&
+          !IsSameInput(invoke, kDestPos, kLength) &&
+          !IsSameInput(invoke, kSrc, kDest)) {
+        // Not enough registers, make the length also take a stack slot.
+        invoke->GetLocations()->SetInAt(kLength, Location::Any());
+      }
     }
   }
 }
 
-void IntrinsicLocationsBuilderX86::VisitIntegerRotateLeft(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
+void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+  X86Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  Register src = locations->InAt(0).AsRegister<Register>();
+  Location src_pos = locations->InAt(1);
+  Register dest = locations->InAt(2).AsRegister<Register>();
+  Location dest_pos = locations->InAt(3);
+  Location length_arg = locations->InAt(4);
+  Location length = length_arg;
+  Location temp1_loc = locations->GetTemp(0);
+  Register temp1 = temp1_loc.AsRegister<Register>();
+  Location temp2_loc = locations->GetTemp(1);
+  Register temp2 = temp2_loc.AsRegister<Register>();
+
+  SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  codegen_->AddSlowPath(intrinsic_slow_path);
+
+  NearLabel conditions_on_positions_validated;
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  // If source and destination are the same, we go to slow path if we need to do
+  // forward copying.
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      if (optimizations.GetDestinationIsSource()) {
+        // Checked when building locations.
+        DCHECK_GE(src_pos_constant, dest_pos_constant);
+      } else if (src_pos_constant < dest_pos_constant) {
+        __ cmpl(src, dest);
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+      }
+    } else {
+      if (!optimizations.GetDestinationIsSource()) {
+        __ cmpl(src, dest);
+        __ j(kNotEqual, &conditions_on_positions_validated);
+      }
+      __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
+      __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
+    }
+  } else {
+    if (!optimizations.GetDestinationIsSource()) {
+      __ cmpl(src, dest);
+      __ j(kNotEqual, &conditions_on_positions_validated);
+    }
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
+      __ j(kLess, intrinsic_slow_path->GetEntryLabel());
+    } else {
+      __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
+      __ j(kLess, intrinsic_slow_path->GetEntryLabel());
+    }
+  }
+
+  __ Bind(&conditions_on_positions_validated);
+
+  if (!optimizations.GetSourceIsNotNull()) {
+    // Bail out if the source is null.
+    __ testl(src, src);
+    __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
+    // Bail out if the destination is null.
+    __ testl(dest, dest);
+    __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  Location temp3_loc = locations->GetTemp(2);
+  Register temp3 = temp3_loc.AsRegister<Register>();
+  if (length.IsStackSlot()) {
+    __ movl(temp3, Address(ESP, length.GetStackIndex()));
+    length = Location::RegisterLocation(temp3);
+  }
+
+  // If the length is negative, bail out.
+  // We have already checked in the LocationsBuilder for the constant case.
+  if (!length.IsConstant() &&
+      !optimizations.GetCountIsSourceLength() &&
+      !optimizations.GetCountIsDestinationLength()) {
+    __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
+    __ j(kLess, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  // Validity checks: source.
+  CheckPosition(assembler,
+                src_pos,
+                src,
+                length,
+                intrinsic_slow_path,
+                temp1,
+                optimizations.GetCountIsSourceLength());
+
+  // Validity checks: dest.
+  CheckPosition(assembler,
+                dest_pos,
+                dest,
+                length,
+                intrinsic_slow_path,
+                temp1,
+                optimizations.GetCountIsDestinationLength());
+
+  if (!optimizations.GetDoesNotNeedTypeCheck()) {
+    // Check whether all elements of the source array are assignable to the component
+    // type of the destination array. We do two checks: the classes are the same,
+    // or the destination is Object[]. If none of these checks succeed, we go to the
+    // slow path.
+
+    if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        // /* HeapReference<Class> */ temp1 = src->klass_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+        // Bail out if the source is not a non primitive array.
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+        __ testl(temp1, temp1);
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `temp1` has been unpoisoned
+        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+      } else {
+        // /* HeapReference<Class> */ temp1 = src->klass_
+        __ movl(temp1, Address(src, class_offset));
+        __ MaybeUnpoisonHeapReference(temp1);
+        // Bail out if the source is not a non primitive array.
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ movl(temp1, Address(temp1, component_offset));
+        __ testl(temp1, temp1);
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+        __ MaybeUnpoisonHeapReference(temp1);
+      }
+      __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
+      __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+    }
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      if (length.Equals(Location::RegisterLocation(temp3))) {
+        // When Baker read barriers are enabled, register `temp3`,
+        // which in the present case contains the `length` parameter,
+        // will be overwritten below.  Make the `length` location
+        // reference the original stack location; it will be moved
+        // back to `temp3` later if necessary.
+        DCHECK(length_arg.IsStackSlot());
+        length = length_arg;
+      }
+
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
+
+      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+        // Bail out if the destination is not a non primitive array.
+        //
+        // Register `temp1` is not trashed by the read barrier emitted
+        // by GenerateFieldLoadWithBakerReadBarrier below, as that
+        // method produces a call to a ReadBarrierMarkRegX entry point,
+        // which saves all potentially live registers, including
+        // temporaries such a `temp1`.
+        // /* HeapReference<Class> */ temp2 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+        __ testl(temp2, temp2);
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `temp2` has been unpoisoned
+        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+        __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
+        __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      // For the same reason given earlier, `temp1` is not trashed by the
+      // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+      // /* HeapReference<Class> */ temp2 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+      // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+      __ cmpl(temp1, temp2);
+
+      if (optimizations.GetDestinationIsTypedObjectArray()) {
+        NearLabel do_copy;
+        __ j(kEqual, &do_copy);
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+        // We do not need to emit a read barrier for the following
+        // heap reference load, as `temp1` is only used in a
+        // comparison with null below, and this reference is not
+        // kept afterwards.
+        __ cmpl(Address(temp1, super_offset), Immediate(0));
+        __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+        __ Bind(&do_copy);
+      } else {
+        __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+      }
+    } else {
+      // Non read barrier code.
+
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      __ movl(temp1, Address(dest, class_offset));
+      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+        __ MaybeUnpoisonHeapReference(temp1);
+        // Bail out if the destination is not a non primitive array.
+        // /* HeapReference<Class> */ temp2 = temp1->component_type_
+        __ movl(temp2, Address(temp1, component_offset));
+        __ testl(temp2, temp2);
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+        __ MaybeUnpoisonHeapReference(temp2);
+        __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
+        __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+        // Re-poison the heap reference to make the compare instruction below
+        // compare two poisoned references.
+        __ PoisonHeapReference(temp1);
+      }
+
+      // Note: if heap poisoning is on, we are comparing two poisoned references here.
+      __ cmpl(temp1, Address(src, class_offset));
+
+      if (optimizations.GetDestinationIsTypedObjectArray()) {
+        NearLabel do_copy;
+        __ j(kEqual, &do_copy);
+        __ MaybeUnpoisonHeapReference(temp1);
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ movl(temp1, Address(temp1, component_offset));
+        __ MaybeUnpoisonHeapReference(temp1);
+        __ cmpl(Address(temp1, super_offset), Immediate(0));
+        __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+        __ Bind(&do_copy);
+      } else {
+        __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+      }
+    }
+  } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+    DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+    // Bail out if the source is not a non primitive array.
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+      // /* HeapReference<Class> */ temp1 = temp1->component_type_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+      __ testl(temp1, temp1);
+      __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+      // If heap poisoning is enabled, `temp1` has been unpoisoned
+      // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+    } else {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      __ movl(temp1, Address(src, class_offset));
+      __ MaybeUnpoisonHeapReference(temp1);
+      // /* HeapReference<Class> */ temp1 = temp1->component_type_
+      __ movl(temp1, Address(temp1, component_offset));
+      __ testl(temp1, temp1);
+      __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(temp1);
+    }
+    __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
+    __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  // Compute the base source address in `temp1`.
+  int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+  DCHECK_EQ(element_size, 4);
+  uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+  if (src_pos.IsConstant()) {
+    int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(temp1, Address(src, element_size * constant + offset));
+  } else {
+    __ leal(temp1, Address(src, src_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+  }
+
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // If it is needed (in the case of the fast-path loop), the base
+    // destination address is computed later, as `temp2` is used for
+    // intermediate computations.
+
+    // Compute the end source address in `temp3`.
+    if (length.IsConstant()) {
+      int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+      __ leal(temp3, Address(temp1, element_size * constant));
+    } else {
+      if (length.IsStackSlot()) {
+        // Location `length` is again pointing at a stack slot, as
+        // register `temp3` (which was containing the length parameter
+        // earlier) has been overwritten; restore it now
+        DCHECK(length.Equals(length_arg));
+        __ movl(temp3, Address(ESP, length.GetStackIndex()));
+        length = Location::RegisterLocation(temp3);
+      }
+      __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+    }
+
+    // SystemArrayCopy implementation for Baker read barriers (see
+    // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
+    //
+    //   if (src_ptr != end_ptr) {
+    //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+    //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+    //     if (is_gray) {
+    //       // Slow-path copy.
+    //       for (size_t i = 0; i != length; ++i) {
+    //         dest_array[dest_pos + i] =
+    //             MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
+    //       }
+    //     } else {
+    //       // Fast-path copy.
+    //       do {
+    //         *dest_ptr++ = *src_ptr++;
+    //       } while (src_ptr != end_ptr)
+    //     }
+    //   }
+
+    NearLabel loop, done;
+
+    // Don't enter copy loop if `length == 0`.
+    __ cmpl(temp1, temp3);
+    __ j(kEqual, &done);
+
+    // /* int32_t */ monitor = src->monitor_
+    __ movl(temp2, Address(src, monitor_offset));
+    // /* LockWord */ lock_word = LockWord(monitor)
+    static_assert(sizeof(LockWord) == sizeof(int32_t),
+                  "art::LockWord and int32_t have different sizes.");
+
+    // Load fence to prevent load-load reordering.
+    // Note that this is a no-op, thanks to the x86 memory model.
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+    // Slow path used to copy array when `src` is gray.
+    SlowPathCode* read_barrier_slow_path =
+        new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
+    codegen_->AddSlowPath(read_barrier_slow_path);
+
+    // Given the numeric representation, it's enough to check the low bit of the
+    // rb_state. We do that by shifting the bit out of the lock word with SHR.
+    static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+    static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+    __ shrl(temp2, Immediate(LockWord::kReadBarrierStateShift + 1));
+    __ j(kCarrySet, read_barrier_slow_path->GetEntryLabel());
+
+    // Fast-path copy.
+
+    // Set the base destination address in `temp2`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ leal(temp2, Address(dest, element_size * constant + offset));
+    } else {
+      __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+    }
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    __ Bind(&loop);
+    __ pushl(Address(temp1, 0));
+    __ cfi().AdjustCFAOffset(4);
+    __ popl(Address(temp2, 0));
+    __ cfi().AdjustCFAOffset(-4);
+    __ addl(temp1, Immediate(element_size));
+    __ addl(temp2, Immediate(element_size));
+    __ cmpl(temp1, temp3);
+    __ j(kNotEqual, &loop);
+
+    __ Bind(read_barrier_slow_path->GetExitLabel());
+    __ Bind(&done);
+  } else {
+    // Non read barrier code.
+
+    // Compute the base destination address in `temp2`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ leal(temp2, Address(dest, element_size * constant + offset));
+    } else {
+      __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+    }
+
+    // Compute the end source address in `temp3`.
+    if (length.IsConstant()) {
+      int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+      __ leal(temp3, Address(temp1, element_size * constant));
+    } else {
+      __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+    }
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    NearLabel loop, done;
+    __ cmpl(temp1, temp3);
+    __ j(kEqual, &done);
+    __ Bind(&loop);
+    __ pushl(Address(temp1, 0));
+    __ cfi().AdjustCFAOffset(4);
+    __ popl(Address(temp2, 0));
+    __ cfi().AdjustCFAOffset(-4);
+    __ addl(temp1, Immediate(element_size));
+    __ addl(temp2, Immediate(element_size));
+    __ cmpl(temp1, temp3);
+    __ j(kNotEqual, &loop);
+    __ Bind(&done);
+  }
+
+  // We only need one card marking on the destination array.
+  codegen_->MarkGCCard(temp1,
+                       temp2,
+                       dest,
+                       Register(kNoRegister),
+                       /* value_can_be_null */ false);
+
+  __ Bind(intrinsic_slow_path->GetExitLabel());
 }
 
-void IntrinsicCodeGeneratorX86::VisitIntegerRotateLeft(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_left */ true);
-}
+UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
+UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
+UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit)
 
-void IntrinsicLocationsBuilderX86::VisitIntegerRotateRight(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
+// 1.8.
+UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
+UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
+UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
+UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
+UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
 
-void IntrinsicCodeGeneratorX86::VisitIntegerRotateRight(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_left */ false);
-}
-
-// Unimplemented intrinsics.
-
-#define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
-void IntrinsicLocationsBuilderX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
-}                                                                                       \
-void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
-}
-
-UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(LongRotateRight)
-UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
-UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
-
-#undef UNIMPLEMENTED_INTRINSIC
+UNREACHABLE_INTRINSICS(X86)
 
 #undef __
 
diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h
index fefe9c6..08bd197 100644
--- a/compiler/optimizing/intrinsics_x86.h
+++ b/compiler/optimizing/intrinsics_x86.h
@@ -36,7 +36,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -61,7 +61,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 14c65c9..311e1cd 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -50,8 +50,11 @@
 
 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
   Dispatch(invoke);
-  const LocationSummary* res = invoke->GetLocations();
-  return res != nullptr && res->Intrinsified();
+  LocationSummary* res = invoke->GetLocations();
+  if (res == nullptr) {
+    return false;
+  }
+  return res->Intrinsified();
 }
 
 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
@@ -61,6 +64,65 @@
 
 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
 
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
+ public:
+  explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
+      : SlowPathCode(instruction) {
+    DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(instruction_->IsInvokeStaticOrDirect())
+        << "Unexpected instruction in read barrier arraycopy slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+    int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+
+    CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
+    CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
+    CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
+
+    __ Bind(GetEntryLabel());
+    NearLabel loop;
+    __ Bind(&loop);
+    __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
+    __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+    // TODO: Inline the mark bit check before calling the runtime?
+    // TMP = ReadBarrier::Mark(TMP);
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
+    // This runtime call does not require a stack map.
+    x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    __ MaybePoisonHeapReference(CpuRegister(TMP));
+    __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
+    __ addl(src_curr_addr, Immediate(element_size));
+    __ addl(dst_curr_addr, Immediate(element_size));
+    __ cmpl(src_curr_addr, src_stop_addr);
+    __ j(kNotEqual, &loop);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
+};
+
+#undef __
+
 #define __ assembler->
 
 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -99,10 +161,10 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
@@ -113,10 +175,10 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -214,7 +276,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
@@ -222,7 +284,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
 }
 
 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
@@ -261,7 +323,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
@@ -269,7 +331,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 static void GenMinMaxFP(LocationSummary* locations,
@@ -372,7 +434,8 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
+  GenMinMaxFP(
+      invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
@@ -380,7 +443,8 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
+  GenMinMaxFP(
+      invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
@@ -388,7 +452,8 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
+  GenMinMaxFP(
+      invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
@@ -396,7 +461,8 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
+  GenMinMaxFP(
+      invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_);
 }
 
 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
@@ -445,7 +511,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
@@ -453,7 +519,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
@@ -461,7 +527,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
@@ -469,7 +535,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
 }
 
 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -519,7 +585,7 @@
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall);
+                                                           LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::FpuRegisterLocation(XMM0));
@@ -576,12 +642,13 @@
     locations->SetInAt(0, Location::RequiresFpuRegister());
     locations->SetOut(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresFpuRegister());
+    locations->AddTemp(Location::RequiresFpuRegister());
     return;
   }
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall);
+                                                           LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(RAX));
@@ -600,39 +667,36 @@
     return;
   }
 
-  // Implement RoundFloat as t1 = floor(input + 0.5f);  convert to int.
   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-  XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-  NearLabel done, nan;
+  XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+  XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+  NearLabel skip_incr, done;
   X86_64Assembler* assembler = GetAssembler();
 
-  // Load 0.5 into inPlusPointFive.
-  __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
+  // Since no direct x86 rounding instruction matches the required semantics,
+  // this intrinsic is implemented as follows:
+  //  result = floor(in);
+  //  if (in - result >= 0.5f)
+  //    result = result + 1.0f;
+  __ movss(t2, in);
+  __ roundss(t1, in, Immediate(1));
+  __ subss(t2, t1);
+  __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
+  __ j(kBelow, &skip_incr);
+  __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
+  __ Bind(&skip_incr);
 
-  // Add in the input.
-  __ addss(inPlusPointFive, in);
-
-  // And truncate to an integer.
-  __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
-
-  // Load maxInt into out.
-  codegen_->Load64BitValue(out, kPrimIntMax);
-
-  // if inPlusPointFive >= maxInt goto done
-  __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
-  __ j(kAboveEqual, &done);
-
-  // if input == NaN goto nan
-  __ j(kUnordered, &nan);
-
-  // output = float-to-int-truncate(input)
-  __ cvttss2si(out, inPlusPointFive);
-  __ jmp(&done);
-  __ Bind(&nan);
-
-  //  output = 0
-  __ xorl(out, out);
+  // Final conversion to an integer. Unfortunately this also does not have a
+  // direct x86 instruction, since NaN should map to 0 and large positive
+  // values need to be clipped to the extreme value.
+  codegen_->Load32BitValue(out, kPrimIntMax);
+  __ cvtsi2ss(t2, out);
+  __ comiss(t1, t2);
+  __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
+  __ movl(out, Immediate(0));  // does not change flags
+  __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
+  __ cvttss2si(out, t1);
   __ Bind(&done);
 }
 
@@ -647,83 +711,219 @@
     return;
   }
 
-  // Implement RoundDouble as t1 = floor(input + 0.5);  convert to long.
   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-  XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-  NearLabel done, nan;
+  XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+  XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+  NearLabel skip_incr, done;
   X86_64Assembler* assembler = GetAssembler();
 
-  // Load 0.5 into inPlusPointFive.
-  __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
+  // Since no direct x86 rounding instruction matches the required semantics,
+  // this intrinsic is implemented as follows:
+  //  result = floor(in);
+  //  if (in - result >= 0.5)
+  //    result = result + 1.0f;
+  __ movsd(t2, in);
+  __ roundsd(t1, in, Immediate(1));
+  __ subsd(t2, t1);
+  __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
+  __ j(kBelow, &skip_incr);
+  __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
+  __ Bind(&skip_incr);
 
-  // Add in the input.
-  __ addsd(inPlusPointFive, in);
-
-  // And truncate to an integer.
-  __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
-
-  // Load maxLong into out.
+  // Final conversion to an integer. Unfortunately this also does not have a
+  // direct x86 instruction, since NaN should map to 0 and large positive
+  // values need to be clipped to the extreme value.
   codegen_->Load64BitValue(out, kPrimLongMax);
-
-  // if inPlusPointFive >= maxLong goto done
-  __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
-  __ j(kAboveEqual, &done);
-
-  // if input == NaN goto nan
-  __ j(kUnordered, &nan);
-
-  // output = double-to-long-truncate(input)
-  __ cvttsd2si(out, inPlusPointFive, true);
-  __ jmp(&done);
-  __ Bind(&nan);
-
-  //  output = 0
-  __ xorl(out, out);
+  __ cvtsi2sd(t2, out, /* is64bit */ true);
+  __ comisd(t1, t2);
+  __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
+  __ movl(out, Immediate(0));  // does not change flags, implicit zero extension to 64-bit
+  __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
+  __ cvttsd2si(out, t1, /* is64bit */ true);
   __ Bind(&done);
 }
 
-void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
-  // The inputs plus one temp.
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RequiresRegister());
+static void CreateFPToFPCallLocations(ArenaAllocator* arena,
+                                      HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kCallOnMainOnly,
+                                                           kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetOut(Location::FpuRegisterLocation(XMM0));
+
+  // We have to ensure that the native code doesn't clobber the XMM registers which are
+  // non-volatile for ART, but volatile for Native calls.  This will ensure that they are
+  // saved in the prologue and properly restored.
+  for (auto fp_reg : non_volatile_xmm_regs) {
+    locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
+  }
 }
 
-void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
+static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
+                          QuickEntrypointEnum entry) {
   LocationSummary* locations = invoke->GetLocations();
+  DCHECK(locations->WillCall());
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  X86_64Assembler* assembler = codegen->GetAssembler();
 
-  // Location of reference to data array.
-  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
-  // Location of count.
-  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64PointerSize>(entry), true));
+  codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+}
 
-  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
-  CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
 
-  // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
-  //       the cost.
-  // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
-  //       we will not optimize the code for constants (which would save a register).
+void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCos);
+}
 
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
-  codegen_->AddSlowPath(slow_path);
+void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
 
-  X86_64Assembler* assembler = GetAssembler();
+void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickSin);
+}
 
-  __ cmpl(idx, Address(obj, count_offset));
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ j(kAboveEqual, slow_path->GetEntryLabel());
+void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
 
-  // out = out[2*idx].
-  __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
+void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAcos);
+}
 
-  __ Bind(slow_path->GetExitLabel());
+void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAsin);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAtan);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCbrt);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCosh);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickExp);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickExpm1);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickLog);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickLog10);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickSinh);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickTan);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickTanh);
+}
+
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
+                                        HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kCallOnMainOnly,
+                                                           kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+  locations->SetOut(Location::FpuRegisterLocation(XMM0));
+
+  // We have to ensure that the native code doesn't clobber the XMM registers which are
+  // non-volatile for ART, but volatile for Native calls.  This will ensure that they are
+  // saved in the prologue and properly restored.
+  for (auto fp_reg : non_volatile_xmm_regs) {
+    locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAtan2);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickHypot);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
@@ -770,7 +970,6 @@
                           CpuRegister input,
                           Location length,
                           SlowPathCode* slow_path,
-                          CpuRegister input_len,
                           CpuRegister temp,
                           bool length_is_input_length = false) {
   // Where is the length in the Array?
@@ -791,12 +990,11 @@
       }
     } else {
       // Check that length(input) >= pos.
-      __ movl(input_len, Address(input, length_offset));
-      __ cmpl(input_len, Immediate(pos_const));
+      __ movl(temp, Address(input, length_offset));
+      __ subl(temp, Immediate(pos_const));
       __ j(kLess, slow_path->GetEntryLabel());
 
       // Check that (length(input) - pos) >= length.
-      __ leal(temp, Address(input_len, -pos_const));
       if (length.IsConstant()) {
         __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
       } else {
@@ -871,11 +1069,11 @@
     __ j(kLess, slow_path->GetEntryLabel());
   }
 
-  // Validity checks: source.
-  CheckPosition(assembler, src_pos, src, length, slow_path, src_base, dest_base);
+  // Validity checks: source. Use src_base as a temporary register.
+  CheckPosition(assembler, src_pos, src, length, slow_path, src_base);
 
-  // Validity checks: dest.
-  CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base, dest_base);
+  // Validity checks: dest. Use src_base as a temporary register.
+  CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base);
 
   // We need the count in RCX.
   if (length.IsConstant()) {
@@ -914,10 +1112,20 @@
 
 
 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
   X86_64Assembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
@@ -925,69 +1133,75 @@
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
 
   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
   Location src_pos = locations->InAt(1);
   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
   Location dest_pos = locations->InAt(3);
   Location length = locations->InAt(4);
-  CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
-  CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
-  CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>();
+  Location temp1_loc = locations->GetTemp(0);
+  CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
+  Location temp2_loc = locations->GetTemp(1);
+  CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
+  Location temp3_loc = locations->GetTemp(2);
+  CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
+  Location TMP_loc = Location::RegisterLocation(TMP);
 
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  codegen_->AddSlowPath(intrinsic_slow_path);
 
-  NearLabel ok;
+  NearLabel conditions_on_positions_validated;
   SystemArrayCopyOptimizations optimizations(invoke);
 
-  if (!optimizations.GetDestinationIsSource()) {
-    if (!src_pos.IsConstant() || !dest_pos.IsConstant()) {
-      __ cmpl(src, dest);
-    }
-  }
-
   // If source and destination are the same, we go to slow path if we need to do
   // forward copying.
   if (src_pos.IsConstant()) {
     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
     if (dest_pos.IsConstant()) {
-      // Checked when building locations.
-      DCHECK(!optimizations.GetDestinationIsSource()
-             || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
+      int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      if (optimizations.GetDestinationIsSource()) {
+        // Checked when building locations.
+        DCHECK_GE(src_pos_constant, dest_pos_constant);
+      } else if (src_pos_constant < dest_pos_constant) {
+        __ cmpl(src, dest);
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+      }
     } else {
       if (!optimizations.GetDestinationIsSource()) {
-        __ j(kNotEqual, &ok);
+        __ cmpl(src, dest);
+        __ j(kNotEqual, &conditions_on_positions_validated);
       }
       __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
-      __ j(kGreater, slow_path->GetEntryLabel());
+      __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
     }
   } else {
     if (!optimizations.GetDestinationIsSource()) {
-      __ j(kNotEqual, &ok);
+      __ cmpl(src, dest);
+      __ j(kNotEqual, &conditions_on_positions_validated);
     }
     if (dest_pos.IsConstant()) {
       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
       __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
-      __ j(kLess, slow_path->GetEntryLabel());
+      __ j(kLess, intrinsic_slow_path->GetEntryLabel());
     } else {
       __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
-      __ j(kLess, slow_path->GetEntryLabel());
+      __ j(kLess, intrinsic_slow_path->GetEntryLabel());
     }
   }
 
-  __ Bind(&ok);
+  __ Bind(&conditions_on_positions_validated);
 
   if (!optimizations.GetSourceIsNotNull()) {
     // Bail out if the source is null.
     __ testl(src, src);
-    __ j(kEqual, slow_path->GetEntryLabel());
+    __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   }
 
   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
     // Bail out if the destination is null.
     __ testl(dest, dest);
-    __ j(kEqual, slow_path->GetEntryLabel());
+    __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   }
 
   // If the length is negative, bail out.
@@ -996,7 +1210,7 @@
       !optimizations.GetCountIsSourceLength() &&
       !optimizations.GetCountIsDestinationLength()) {
     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
-    __ j(kLess, slow_path->GetEntryLabel());
+    __ j(kLess, intrinsic_slow_path->GetEntryLabel());
   }
 
   // Validity checks: source.
@@ -1004,9 +1218,8 @@
                 src_pos,
                 src,
                 length,
-                slow_path,
+                intrinsic_slow_path,
                 temp1,
-                temp2,
                 optimizations.GetCountIsSourceLength());
 
   // Validity checks: dest.
@@ -1014,9 +1227,8 @@
                 dest_pos,
                 dest,
                 length,
-                slow_path,
+                intrinsic_slow_path,
                 temp1,
-                temp2,
                 optimizations.GetCountIsDestinationLength());
 
   if (!optimizations.GetDoesNotNeedTypeCheck()) {
@@ -1024,36 +1236,80 @@
     // type of the destination array. We do two checks: the classes are the same,
     // or the destination is Object[]. If none of these checks succeed, we go to the
     // slow path.
-    __ movl(temp1, Address(dest, class_offset));
-    __ movl(temp2, Address(src, class_offset));
+
     bool did_unpoison = false;
-    if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
-        !optimizations.GetSourceIsNonPrimitiveArray()) {
-      // One or two of the references need to be unpoisoned. Unpoisoned them
-      // both to make the identity check valid.
-      __ MaybeUnpoisonHeapReference(temp1);
-      __ MaybeUnpoisonHeapReference(temp2);
-      did_unpoison = true;
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, dest, class_offset, temp3_loc, /* needs_null_check */ false);
+      // Register `temp1` is not trashed by the read barrier emitted
+      // by GenerateFieldLoadWithBakerReadBarrier below, as that
+      // method produces a call to a ReadBarrierMarkRegX entry point,
+      // which saves all potentially live registers, including
+      // temporaries such a `temp1`.
+      // /* HeapReference<Class> */ temp2 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+      // If heap poisoning is enabled, `temp1` and `temp2` have been
+      // unpoisoned by the the previous calls to
+      // GenerateFieldLoadWithBakerReadBarrier.
+    } else {
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      __ movl(temp1, Address(dest, class_offset));
+      // /* HeapReference<Class> */ temp2 = src->klass_
+      __ movl(temp2, Address(src, class_offset));
+      if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+          !optimizations.GetSourceIsNonPrimitiveArray()) {
+        // One or two of the references need to be unpoisoned. Unpoison them
+        // both to make the identity check valid.
+        __ MaybeUnpoisonHeapReference(temp1);
+        __ MaybeUnpoisonHeapReference(temp2);
+        did_unpoison = true;
+      }
     }
 
     if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
       // Bail out if the destination is not a non primitive array.
-      __ movl(CpuRegister(TMP), Address(temp1, component_offset));
-      __ testl(CpuRegister(TMP), CpuRegister(TMP));
-      __ j(kEqual, slow_path->GetEntryLabel());
-      __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        // /* HeapReference<Class> */ TMP = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, TMP_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+        __ testl(CpuRegister(TMP), CpuRegister(TMP));
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `TMP` has been unpoisoned by
+        // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+      } else {
+        // /* HeapReference<Class> */ TMP = temp1->component_type_
+        __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+        __ testl(CpuRegister(TMP), CpuRegister(TMP));
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+        __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+      }
       __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
-      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
     }
 
     if (!optimizations.GetSourceIsNonPrimitiveArray()) {
       // Bail out if the source is not a non primitive array.
-      __ movl(CpuRegister(TMP), Address(temp2, component_offset));
-      __ testl(CpuRegister(TMP), CpuRegister(TMP));
-      __ j(kEqual, slow_path->GetEntryLabel());
-      __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        // For the same reason given earlier, `temp1` is not trashed by the
+        // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+        // /* HeapReference<Class> */ TMP = temp2->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, TMP_loc, temp2, component_offset, temp3_loc, /* needs_null_check */ false);
+        __ testl(CpuRegister(TMP), CpuRegister(TMP));
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `TMP` has been unpoisoned by
+        // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+      } else {
+        // /* HeapReference<Class> */ TMP = temp2->component_type_
+        __ movl(CpuRegister(TMP), Address(temp2, component_offset));
+        __ testl(CpuRegister(TMP), CpuRegister(TMP));
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+        __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+      }
       __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
-      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
     }
 
     __ cmpl(temp1, temp2);
@@ -1061,35 +1317,61 @@
     if (optimizations.GetDestinationIsTypedObjectArray()) {
       NearLabel do_copy;
       __ j(kEqual, &do_copy);
-      if (!did_unpoison) {
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+        // We do not need to emit a read barrier for the following
+        // heap reference load, as `temp1` is only used in a
+        // comparison with null below, and this reference is not
+        // kept afterwards.
+        __ cmpl(Address(temp1, super_offset), Immediate(0));
+      } else {
+        if (!did_unpoison) {
+          __ MaybeUnpoisonHeapReference(temp1);
+        }
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ movl(temp1, Address(temp1, component_offset));
         __ MaybeUnpoisonHeapReference(temp1);
+        // No need to unpoison the following heap reference load, as
+        // we're comparing against null.
+        __ cmpl(Address(temp1, super_offset), Immediate(0));
       }
-      __ movl(temp1, Address(temp1, component_offset));
-      __ MaybeUnpoisonHeapReference(temp1);
-      __ movl(temp1, Address(temp1, super_offset));
-      // No need to unpoison the result, we're comparing against null.
-      __ testl(temp1, temp1);
-      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
       __ Bind(&do_copy);
     } else {
-      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
     }
   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
     // Bail out if the source is not a non primitive array.
-    __ movl(temp1, Address(src, class_offset));
-    __ MaybeUnpoisonHeapReference(temp1);
-    __ movl(CpuRegister(TMP), Address(temp1, component_offset));
-    __ testl(CpuRegister(TMP), CpuRegister(TMP));
-    __ j(kEqual, slow_path->GetEntryLabel());
-    __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+      // /* HeapReference<Class> */ TMP = temp1->component_type_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, TMP_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+      __ testl(CpuRegister(TMP), CpuRegister(TMP));
+      __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+    } else {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      __ movl(temp1, Address(src, class_offset));
+      __ MaybeUnpoisonHeapReference(temp1);
+      // /* HeapReference<Class> */ TMP = temp1->component_type_
+      __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+      // No need to unpoison `TMP` now, as we're comparing against null.
+      __ testl(CpuRegister(TMP), CpuRegister(TMP));
+      __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+    }
     __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
-    __ j(kNotEqual, slow_path->GetEntryLabel());
+    __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   }
 
   // Compute base source address, base destination address, and end source address.
 
-  uint32_t element_size = sizeof(int32_t);
+  int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
   uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
   if (src_pos.IsConstant()) {
     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
@@ -1112,34 +1394,100 @@
     __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0));
   }
 
-  // Iterate over the arrays and do a raw copy of the objects. We don't need to
-  // poison/unpoison, nor do any read barrier as the next uses of the destination
-  // array will do it.
-  NearLabel loop, done;
-  __ cmpl(temp1, temp3);
-  __ j(kEqual, &done);
-  __ Bind(&loop);
-  __ movl(CpuRegister(TMP), Address(temp1, 0));
-  __ movl(Address(temp2, 0), CpuRegister(TMP));
-  __ addl(temp1, Immediate(element_size));
-  __ addl(temp2, Immediate(element_size));
-  __ cmpl(temp1, temp3);
-  __ j(kNotEqual, &loop);
-  __ Bind(&done);
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // SystemArrayCopy implementation for Baker read barriers (see
+    // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
+    //
+    //   if (src_ptr != end_ptr) {
+    //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+    //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+    //     if (is_gray) {
+    //       // Slow-path copy.
+    //       do {
+    //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+    //       } while (src_ptr != end_ptr)
+    //     } else {
+    //       // Fast-path copy.
+    //       do {
+    //         *dest_ptr++ = *src_ptr++;
+    //       } while (src_ptr != end_ptr)
+    //     }
+    //   }
+
+    NearLabel loop, done;
+
+    // Don't enter copy loop if `length == 0`.
+    __ cmpl(temp1, temp3);
+    __ j(kEqual, &done);
+
+    // /* int32_t */ monitor = src->monitor_
+    __ movl(CpuRegister(TMP), Address(src, monitor_offset));
+    // /* LockWord */ lock_word = LockWord(monitor)
+    static_assert(sizeof(LockWord) == sizeof(int32_t),
+                  "art::LockWord and int32_t have different sizes.");
+
+    // Load fence to prevent load-load reordering.
+    // Note that this is a no-op, thanks to the x86-64 memory model.
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+    // Slow path used to copy array when `src` is gray.
+    SlowPathCode* read_barrier_slow_path =
+        new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
+    codegen_->AddSlowPath(read_barrier_slow_path);
+
+    // Given the numeric representation, it's enough to check the low bit of the
+    // rb_state. We do that by shifting the bit out of the lock word with SHR.
+    static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+    static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+    __ shrl(CpuRegister(TMP), Immediate(LockWord::kReadBarrierStateShift + 1));
+    __ j(kCarrySet, read_barrier_slow_path->GetEntryLabel());
+
+    // Fast-path copy.
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    __ Bind(&loop);
+    __ movl(CpuRegister(TMP), Address(temp1, 0));
+    __ movl(Address(temp2, 0), CpuRegister(TMP));
+    __ addl(temp1, Immediate(element_size));
+    __ addl(temp2, Immediate(element_size));
+    __ cmpl(temp1, temp3);
+    __ j(kNotEqual, &loop);
+
+    __ Bind(read_barrier_slow_path->GetExitLabel());
+    __ Bind(&done);
+  } else {
+    // Non read barrier code.
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    NearLabel loop, done;
+    __ cmpl(temp1, temp3);
+    __ j(kEqual, &done);
+    __ Bind(&loop);
+    __ movl(CpuRegister(TMP), Address(temp1, 0));
+    __ movl(Address(temp2, 0), CpuRegister(TMP));
+    __ addl(temp1, Immediate(element_size));
+    __ addl(temp2, Immediate(element_size));
+    __ cmpl(temp1, temp3);
+    __ j(kNotEqual, &loop);
+    __ Bind(&done);
+  }
 
   // We only need one card marking on the destination array.
   codegen_->MarkGCCard(temp1,
                        temp2,
                        dest,
                        CpuRegister(kNoRegister),
-                       false);
+                       /* value_can_be_null */ false);
 
-  __ Bind(slow_path->GetExitLabel());
+  __ Bind(intrinsic_slow_path->GetExitLabel());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1160,8 +1508,8 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ gs()->call(Address::Absolute(
-        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, pStringCompareTo),
+                                  /* no_rip */ true));
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1200,17 +1548,22 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check if input is null, return false if it is.
-  __ testl(arg, arg);
-  __ j(kEqual, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ testl(arg, arg);
+    __ j(kEqual, &return_false);
+  }
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ movl(rcx, Address(str, class_offset));
-  __ cmpl(rcx, Address(arg, class_offset));
-  __ j(kNotEqual, &return_false);
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ movl(rcx, Address(str, class_offset));
+    __ cmpl(rcx, Address(arg, class_offset));
+    __ j(kNotEqual, &return_false);
+  }
 
   // Reference equality check, return true if same reference.
   __ cmpl(str, arg);
@@ -1301,10 +1654,11 @@
   DCHECK_EQ(out.AsRegister(), RDI);
 
   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
-  // or directly dispatch if we have a constant.
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCode* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
-    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
+    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
     std::numeric_limits<uint16_t>::max()) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
@@ -1314,7 +1668,7 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
     slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
     codegen->AddSlowPath(slow_path);
@@ -1352,7 +1706,7 @@
     // Ensure we have a start index >= 0;
     __ xorl(counter, counter);
     __ cmpl(start_index, Immediate(0));
-    __ cmov(kGreater, counter, start_index, false);  // 32-bit copy is enough.
+    __ cmov(kGreater, counter, start_index, /* is64bit */ false);  // 32-bit copy is enough.
 
     // Move to the start of the string: string_obj + value_offset + 2 * start_index.
     __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
@@ -1389,24 +1743,25 @@
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
-  CreateStringIndexOfLocations(invoke, arena_, true);
+  CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
-  CreateStringIndexOfLocations(invoke, arena_, false);
+  CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+  GenerateStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1426,15 +1781,17 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ gs()->call(Address::Absolute(
-        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize,
+                                                          pAllocStringFromBytes),
+                                  /* no_rip */ true));
+  CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1446,14 +1803,22 @@
 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
   X86_64Assembler* assembler = GetAssembler();
 
-  __ gs()->call(Address::Absolute(
-        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
+  // No need to emit code checking whether `locations->InAt(2)` is a null
+  // pointer, as callers of the native method
+  //
+  //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
+  //
+  // all include a null check on `data` before calling that method.
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize,
+                                                          pAllocStringFromChars),
+                                  /* no_rip */ true));
+  CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1470,8 +1835,10 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ gs()->call(Address::Absolute(
-        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize,
+                                                          pAllocStringFromString),
+                                  /* no_rip */ true));
+  CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1605,7 +1972,7 @@
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
+  locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
 }
 
 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
@@ -1695,26 +2062,49 @@
 
 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
-  GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
+  GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
+                                                    /* no_rip */ true));
 }
 
-static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
-                         bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
-  CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
-  CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
-  CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
+static void GenUnsafeGet(HInvoke* invoke,
+                         Primitive::Type type,
+                         bool is_volatile ATTRIBUTE_UNUSED,
+                         CodeGeneratorX86_64* codegen) {
+  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+  Location base_loc = locations->InAt(1);
+  CpuRegister base = base_loc.AsRegister<CpuRegister>();
+  Location offset_loc = locations->InAt(2);
+  CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
+  Location output_loc = locations->Out();
+  CpuRegister output = output_loc.AsRegister<CpuRegister>();
 
   switch (type) {
     case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-      __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
-      if (type == Primitive::kPrimNot) {
-        __ MaybeUnpoisonHeapReference(trg);
-      }
+      __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
       break;
 
+    case Primitive::kPrimNot: {
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          Address src(base, offset, ScaleFactor::TIMES_1, 0);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(
+              invoke, output_loc, base, src, temp, /* needs_null_check */ false);
+        } else {
+          __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+          codegen->GenerateReadBarrierSlow(
+              invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+        }
+      } else {
+        __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+        __ MaybeUnpoisonHeapReference(output);
+      }
+      break;
+    }
+
     case Primitive::kPrimLong:
-      __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+      __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
       break;
 
     default:
@@ -1723,53 +2113,66 @@
   }
 }
 
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           can_call ?
+                                                               LocationSummary::kCallOnSlowPath :
+                                                               LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(),
+                    can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 
 
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 
 
@@ -1839,7 +2242,7 @@
   }
 
   if (is_volatile) {
-    __ mfence();
+    codegen->MemoryFence();
   }
 
   if (type == Primitive::kPrimNot) {
@@ -1853,31 +2256,31 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 
 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
@@ -1909,6 +2312,17 @@
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  if (kEmitCompilerReadBarrier) {
+    return;
+  }
+
   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
 }
 
@@ -1959,15 +2373,24 @@
       __ PoisonHeapReference(CpuRegister(value_reg));
     }
 
+    // TODO: Add a read barrier for the reference stored in the object
+    // before attempting the CAS, similar to the one in the
+    // art::Unsafe_compareAndSwapObject JNI implementation.
+    //
+    // Note that this code is not (yet) used when read barriers are
+    // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject).
+    DCHECK(!kEmitCompilerReadBarrier);
     __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg));
 
-    // locked cmpxchg has full barrier semantics, and we don't need
+    // LOCK CMPXCHG has full barrier semantics, and we don't need
     // scheduling barriers at this time.
 
     // Convert ZF into the boolean result.
     __ setcc(kZero, out);
     __ movzxb(out, out);
 
+    // If heap poisoning is enabled, we need to unpoison the values
+    // that were poisoned earlier.
     if (kPoisonHeapReferences) {
       if (base_equals_value) {
         // `value_reg` has been moved to a temporary register, no need
@@ -1992,7 +2415,7 @@
       LOG(FATAL) << "Unexpected CAS type " << type;
     }
 
-    // locked cmpxchg has full barrier semantics, and we don't need
+    // LOCK CMPXCHG has full barrier semantics, and we don't need
     // scheduling barriers at this time.
 
     // Convert ZF into the boolean result.
@@ -2010,6 +2433,15 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  DCHECK(!kEmitCompilerReadBarrier);
+
   GenCAS(Primitive::kPrimNot, invoke, codegen_);
 }
 
@@ -2035,7 +2467,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
+  X86_64Assembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
@@ -2079,7 +2511,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
+  X86_64Assembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
@@ -2101,6 +2533,200 @@
   SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
 }
 
+static void CreateBitCountLocations(
+    ArenaAllocator* arena, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
+  if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
+    // Do nothing if there is no popcnt support. This results in generating
+    // a call for the intrinsic rather than direct code.
+    return;
+  }
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::Any());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenBitCount(X86_64Assembler* assembler,
+                        CodeGeneratorX86_64* codegen,
+                        HInvoke* invoke,
+                        bool is_long) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location src = locations->InAt(0);
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  if (invoke->InputAt(0)->IsConstant()) {
+    // Evaluate this at compile time.
+    int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+    int32_t result = is_long
+        ? POPCOUNT(static_cast<uint64_t>(value))
+        : POPCOUNT(static_cast<uint32_t>(value));
+    codegen->Load32BitValue(out, result);
+    return;
+  }
+
+  if (src.IsRegister()) {
+    if (is_long) {
+      __ popcntq(out, src.AsRegister<CpuRegister>());
+    } else {
+      __ popcntl(out, src.AsRegister<CpuRegister>());
+    }
+  } else if (is_long) {
+    DCHECK(src.IsDoubleStackSlot());
+    __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
+  } else {
+    DCHECK(src.IsStackSlot());
+    __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
+}
+
+static void CreateOneBitLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_high) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::Any());
+  locations->SetOut(Location::RequiresRegister());
+  locations->AddTemp(is_high ? Location::RegisterLocation(RCX)  // needs CL
+                             : Location::RequiresRegister());  // any will do
+}
+
+static void GenOneBit(X86_64Assembler* assembler,
+                      CodeGeneratorX86_64* codegen,
+                      HInvoke* invoke,
+                      bool is_high, bool is_long) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location src = locations->InAt(0);
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  if (invoke->InputAt(0)->IsConstant()) {
+    // Evaluate this at compile time.
+    int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+    if (value == 0) {
+      __ xorl(out, out);  // Clears upper bits too.
+      return;
+    }
+    // Nonzero value.
+    if (is_high) {
+      value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
+                      : 31 - CLZ(static_cast<uint32_t>(value));
+    } else {
+      value = is_long ? CTZ(static_cast<uint64_t>(value))
+                      : CTZ(static_cast<uint32_t>(value));
+    }
+    if (is_long) {
+      codegen->Load64BitValue(out, 1ULL << value);
+    } else {
+      codegen->Load32BitValue(out, 1 << value);
+    }
+    return;
+  }
+
+  // Handle the non-constant cases.
+  CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
+  if (is_high) {
+    // Use architectural support: basically 1 << bsr.
+    if (src.IsRegister()) {
+      if (is_long) {
+        __ bsrq(tmp, src.AsRegister<CpuRegister>());
+      } else {
+        __ bsrl(tmp, src.AsRegister<CpuRegister>());
+      }
+    } else if (is_long) {
+      DCHECK(src.IsDoubleStackSlot());
+      __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+    } else {
+      DCHECK(src.IsStackSlot());
+      __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+    }
+    // BSR sets ZF if the input was zero.
+    NearLabel is_zero, done;
+    __ j(kEqual, &is_zero);
+    __ movl(out, Immediate(1));  // Clears upper bits too.
+    if (is_long) {
+      __ shlq(out, tmp);
+    } else {
+      __ shll(out, tmp);
+    }
+    __ jmp(&done);
+    __ Bind(&is_zero);
+    __ xorl(out, out);  // Clears upper bits too.
+    __ Bind(&done);
+  } else  {
+    // Copy input into temporary.
+    if (src.IsRegister()) {
+      if (is_long) {
+        __ movq(tmp, src.AsRegister<CpuRegister>());
+      } else {
+        __ movl(tmp, src.AsRegister<CpuRegister>());
+      }
+    } else if (is_long) {
+      DCHECK(src.IsDoubleStackSlot());
+      __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+    } else {
+      DCHECK(src.IsStackSlot());
+      __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+    }
+    // Do the bit twiddling: basically tmp & -tmp;
+    if (is_long) {
+      __ movq(out, tmp);
+      __ negq(tmp);
+      __ andq(out, tmp);
+    } else {
+      __ movl(out, tmp);
+      __ negl(tmp);
+      __ andl(out, tmp);
+    }
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+  CreateOneBitLocations(arena_, invoke, /* is_high */ true);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+  GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
+  CreateOneBitLocations(arena_, invoke, /* is_high */ true);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
+  GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ true);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+  CreateOneBitLocations(arena_, invoke, /* is_high */ false);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+  GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
+  CreateOneBitLocations(arena_, invoke, /* is_high */ false);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
+  GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ true);
+}
+
 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
@@ -2109,7 +2735,9 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-static void GenLeadingZeros(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
+static void GenLeadingZeros(X86_64Assembler* assembler,
+                            CodeGeneratorX86_64* codegen,
+                            HInvoke* invoke, bool is_long) {
   LocationSummary* locations = invoke->GetLocations();
   Location src = locations->InAt(0);
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
@@ -2123,11 +2751,7 @@
     } else {
       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
     }
-    if (value == 0) {
-      __ xorl(out, out);
-    } else {
-      __ movl(out, Immediate(value));
-    }
+    codegen->Load32BitValue(out, value);
     return;
   }
 
@@ -2166,8 +2790,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenLeadingZeros(assembler, invoke, /* is_long */ false);
+  GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -2175,8 +2798,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenLeadingZeros(assembler, invoke, /* is_long */ true);
+  GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
 static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -2187,7 +2809,9 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-static void GenTrailingZeros(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
+static void GenTrailingZeros(X86_64Assembler* assembler,
+                             CodeGeneratorX86_64* codegen,
+                             HInvoke* invoke, bool is_long) {
   LocationSummary* locations = invoke->GetLocations();
   Location src = locations->InAt(0);
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
@@ -2201,11 +2825,7 @@
     } else {
       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
     }
-    if (value == 0) {
-      __ xorl(out, out);
-    } else {
-      __ movl(out, Immediate(value));
-    }
+    codegen->Load32BitValue(out, value);
     return;
   }
 
@@ -2239,8 +2859,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenTrailingZeros(assembler, invoke, /* is_long */ false);
+  GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -2248,107 +2867,79 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenTrailingZeros(assembler, invoke, /* is_long */ true);
+  GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
-static void CreateRotateLocations(ArenaAllocator* arena, HInvoke* invoke) {
-  LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  // The shift count needs to be in CL or a constant.
-  locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, invoke->InputAt(1)));
-  locations->SetOut(Location::SameAsFirstInput());
-}
-
-static void GenRotate(X86_64Assembler* assembler, HInvoke* invoke, bool is_long, bool is_left) {
-  LocationSummary* locations = invoke->GetLocations();
-  CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
-  Location second = locations->InAt(1);
-
-  if (is_long) {
-    if (second.IsRegister()) {
-      CpuRegister second_reg = second.AsRegister<CpuRegister>();
-      if (is_left) {
-        __ rolq(first_reg, second_reg);
-      } else {
-        __ rorq(first_reg, second_reg);
-      }
-    } else {
-      Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue);
-      if (is_left) {
-        __ rolq(first_reg, imm);
-      } else {
-        __ rorq(first_reg, imm);
-      }
-    }
-  } else {
-    if (second.IsRegister()) {
-      CpuRegister second_reg = second.AsRegister<CpuRegister>();
-      if (is_left) {
-        __ roll(first_reg, second_reg);
-      } else {
-        __ rorl(first_reg, second_reg);
-      }
-    } else {
-      Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
-      if (is_left) {
-        __ roll(first_reg, imm);
-      } else {
-        __ rorl(first_reg, imm);
-      }
-    }
+void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
+  if (kEmitCompilerReadBarrier) {
+    // Do not intrinsify this call with the read barrier configuration.
+    return;
   }
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
 }
 
-void IntrinsicLocationsBuilderX86_64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
+void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
+  DCHECK(!kEmitCompilerReadBarrier);
+  LocationSummary* locations = invoke->GetLocations();
+  X86_64Assembler* assembler = GetAssembler();
+
+  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load ArtMethod first.
+  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(invoke_direct != nullptr);
+  Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
+      invoke_direct, locations->GetTemp(0));
+  DCHECK(temp_loc.Equals(locations->GetTemp(0)));
+  CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+
+  // Now get declaring class.
+  __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
+  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
+  DCHECK_NE(slow_path_flag_offset, 0u);
+  DCHECK_NE(disable_flag_offset, 0u);
+  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
+
+  // Check static flags preventing us for using intrinsic.
+  if (slow_path_flag_offset == disable_flag_offset + 1) {
+    __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  } else {
+    __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+    __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  }
+
+  // Fast path.
+  __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ MaybeUnpoisonHeapReference(out);
+  __ Bind(slow_path->GetExitLabel());
 }
 
-void IntrinsicCodeGeneratorX86_64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_long */ false, /* is_left */ true);
-}
+UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
+UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
 
-void IntrinsicLocationsBuilderX86_64::VisitIntegerRotateRight(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
+// 1.8.
+UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt)
+UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong)
+UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt)
+UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong)
+UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject)
 
-void IntrinsicCodeGeneratorX86_64::VisitIntegerRotateRight(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_long */ false, /* is_left */ false);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitLongRotateLeft(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitLongRotateLeft(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_long */ true, /* is_left */ true);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitLongRotateRight(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitLongRotateRight(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_long */ true, /* is_left */ false);
-}
-
-// Unimplemented intrinsics.
-
-#define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
-void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
-}                                                                                       \
-void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
-}
-
-UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
-
-#undef UNIMPLEMENTED_INTRINSIC
+UNREACHABLE_INTRINSICS(X86_64)
 
 #undef __
 
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
index 6894e1b..155ff65 100644
--- a/compiler/optimizing/intrinsics_x86_64.h
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -36,7 +36,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -61,7 +61,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index c38bbe3..a0ded74 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -30,8 +30,8 @@
 static bool InputsAreDefinedBeforeLoop(HInstruction* instruction) {
   DCHECK(instruction->IsInLoop());
   HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
-  for (HInputIterator it(instruction); !it.Done(); it.Advance()) {
-    HLoopInformation* input_loop = it.Current()->GetBlock()->GetLoopInformation();
+  for (const HInstruction* input : instruction->GetInputs()) {
+    HLoopInformation* input_loop = input->GetBlock()->GetLoopInformation();
     // We only need to check whether the input is defined in the loop. If it is not
     // it is defined before the loop.
     if (input_loop != nullptr && input_loop->IsIn(*info)) {
@@ -79,8 +79,15 @@
 
 void LICM::Run() {
   DCHECK(side_effects_.HasRun());
+
   // Only used during debug.
-  ArenaBitVector visited(graph_->GetArena(), graph_->GetBlocks().size(), false);
+  ArenaBitVector* visited = nullptr;
+  if (kIsDebugBuild) {
+    visited = new (graph_->GetArena()) ArenaBitVector(graph_->GetArena(),
+                                                      graph_->GetBlocks().size(),
+                                                      false,
+                                                      kArenaAllocLICM);
+  }
 
   // Post order visit to visit inner loops before outer loops.
   for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
@@ -99,10 +106,19 @@
       DCHECK(inner->IsInLoop());
       if (inner->GetLoopInformation() != loop_info) {
         // Thanks to post order visit, inner loops were already visited.
-        DCHECK(visited.IsBitSet(inner->GetBlockId()));
+        DCHECK(visited->IsBitSet(inner->GetBlockId()));
         continue;
       }
-      visited.SetBit(inner->GetBlockId());
+      if (kIsDebugBuild) {
+        visited->SetBit(inner->GetBlockId());
+      }
+
+      if (loop_info->ContainsIrreducibleLoop()) {
+        // We cannot licm in an irreducible loop, or in a natural loop containing an
+        // irreducible loop.
+        continue;
+      }
+      DCHECK(!loop_info->IsIrreducible());
 
       // We can move an instruction that can throw only if it is the first
       // throwing instruction in the loop. Note that the first potentially
@@ -121,8 +137,11 @@
           // phi in it.
           if (instruction->NeedsEnvironment()) {
             UpdateLoopPhisIn(instruction->GetEnvironment(), loop_info);
+          } else {
+            DCHECK(!instruction->HasEnvironment());
           }
           instruction->MoveBefore(pre_header->GetLastInstruction());
+          MaybeRecordStat(MethodCompilationStat::kLoopInvariantMoved);
         } else if (instruction->CanThrow()) {
           // If `instruction` can throw, we cannot move further instructions
           // that can throw as well.
diff --git a/compiler/optimizing/licm.h b/compiler/optimizing/licm.h
index 0b5a0f1..bf56f53 100644
--- a/compiler/optimizing/licm.h
+++ b/compiler/optimizing/licm.h
@@ -26,8 +26,9 @@
 
 class LICM : public HOptimization {
  public:
-  LICM(HGraph* graph, const SideEffectsAnalysis& side_effects)
-      : HOptimization(graph, kLoopInvariantCodeMotionPassName), side_effects_(side_effects) {}
+  LICM(HGraph* graph, const SideEffectsAnalysis& side_effects, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kLoopInvariantCodeMotionPassName, stats),
+        side_effects_(side_effects) {}
 
   void Run() OVERRIDE;
 
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 47457de..2a62643 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -16,7 +16,6 @@
 
 #include "base/arena_allocator.h"
 #include "builder.h"
-#include "gtest/gtest.h"
 #include "licm.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
@@ -27,7 +26,7 @@
 /**
  * Fixture class for the LICM tests.
  */
-class LICMTest : public testing::Test {
+class LICMTest : public CommonCompilerTest {
  public:
   LICMTest() : pool_(), allocator_(&pool_) {
     graph_ = CreateGraph(&allocator_);
@@ -42,12 +41,14 @@
     loop_preheader_ = new (&allocator_) HBasicBlock(graph_);
     loop_header_ = new (&allocator_) HBasicBlock(graph_);
     loop_body_ = new (&allocator_) HBasicBlock(graph_);
+    return_ = new (&allocator_) HBasicBlock(graph_);
     exit_ = new (&allocator_) HBasicBlock(graph_);
 
     graph_->AddBlock(entry_);
     graph_->AddBlock(loop_preheader_);
     graph_->AddBlock(loop_header_);
     graph_->AddBlock(loop_body_);
+    graph_->AddBlock(return_);
     graph_->AddBlock(exit_);
 
     graph_->SetEntryBlock(entry_);
@@ -57,26 +58,28 @@
     entry_->AddSuccessor(loop_preheader_);
     loop_preheader_->AddSuccessor(loop_header_);
     loop_header_->AddSuccessor(loop_body_);
-    loop_header_->AddSuccessor(exit_);
+    loop_header_->AddSuccessor(return_);
     loop_body_->AddSuccessor(loop_header_);
+    return_->AddSuccessor(exit_);
 
     // Provide boiler-plate instructions.
     parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);
     entry_->AddInstruction(parameter_);
-    constant_ = graph_->GetIntConstant(42);
+    int_constant_ = graph_->GetIntConstant(42);
+    float_constant_ = graph_->GetFloatConstant(42.0f);
     loop_preheader_->AddInstruction(new (&allocator_) HGoto());
     loop_header_->AddInstruction(new (&allocator_) HIf(parameter_));
     loop_body_->AddInstruction(new (&allocator_) HGoto());
+    return_->AddInstruction(new (&allocator_) HReturnVoid());
     exit_->AddInstruction(new (&allocator_) HExit());
   }
 
   // Performs LICM optimizations (after proper set up).
   void PerformLICM() {
-    ASSERT_TRUE(graph_->TryBuildingSsa());
+    graph_->BuildDominatorTree();
     SideEffectsAnalysis side_effects(graph_);
     side_effects.Run();
-    LICM licm(graph_, side_effects);
-    licm.Run();
+    LICM(graph_, side_effects, nullptr).Run();
   }
 
   // General building fields.
@@ -89,10 +92,12 @@
   HBasicBlock* loop_preheader_;
   HBasicBlock* loop_header_;
   HBasicBlock* loop_body_;
+  HBasicBlock* return_;
   HBasicBlock* exit_;
 
   HInstruction* parameter_;  // "this"
-  HInstruction* constant_;
+  HInstruction* int_constant_;
+  HInstruction* float_constant_;
 };
 
 //
@@ -103,7 +108,7 @@
   BuildLoop();
 
   // Populate the loop with instructions: set/get field with different types.
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
   HInstruction* get_field = new (&allocator_) HInstanceFieldGet(parameter_,
                                                                 Primitive::kPrimLong,
                                                                 MemberOffset(10),
@@ -115,7 +120,7 @@
                                                                 0);
   loop_body_->InsertInstructionBefore(get_field, loop_body_->GetLastInstruction());
   HInstruction* set_field = new (&allocator_) HInstanceFieldSet(
-      parameter_, constant_, Primitive::kPrimInt, MemberOffset(20),
+      parameter_, int_constant_, Primitive::kPrimInt, MemberOffset(20),
       false, kUnknownFieldIndex, kUnknownClassDefIndex, graph_->GetDexFile(), dex_cache, 0);
   loop_body_->InsertInstructionBefore(set_field, loop_body_->GetLastInstruction());
 
@@ -130,7 +135,7 @@
   BuildLoop();
 
   // Populate the loop with instructions: set/get field with same types.
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
   HInstruction* get_field = new (&allocator_) HInstanceFieldGet(parameter_,
                                                                 Primitive::kPrimLong,
                                                                 MemberOffset(10),
@@ -165,10 +170,10 @@
 
   // Populate the loop with instructions: set/get array with different types.
   HInstruction* get_array = new (&allocator_) HArrayGet(
-      parameter_, constant_, Primitive::kPrimLong, 0);
+      parameter_, int_constant_, Primitive::kPrimInt, 0);
   loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction());
   HInstruction* set_array = new (&allocator_) HArraySet(
-      parameter_, constant_, constant_, Primitive::kPrimInt, 0);
+      parameter_, int_constant_, float_constant_, Primitive::kPrimFloat, 0);
   loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction());
 
   EXPECT_EQ(get_array->GetBlock(), loop_body_);
@@ -183,10 +188,10 @@
 
   // Populate the loop with instructions: set/get array with same types.
   HInstruction* get_array = new (&allocator_) HArrayGet(
-      parameter_, constant_, Primitive::kPrimLong, 0);
+      parameter_, int_constant_, Primitive::kPrimFloat, 0);
   loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction());
   HInstruction* set_array = new (&allocator_) HArraySet(
-      parameter_, get_array, constant_, Primitive::kPrimLong, 0);
+      parameter_, get_array, float_constant_, Primitive::kPrimFloat, 0);
   loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction());
 
   EXPECT_EQ(get_array->GetBlock(), loop_body_);
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index a059766..13e14c5 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -29,25 +29,17 @@
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "pretty_printer.h"
-#include "ssa_builder.h"
 #include "ssa_liveness_analysis.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
+class LinearizeTest : public CommonCompilerTest {};
+
 template <size_t number_of_blocks>
 static void TestCode(const uint16_t* data, const uint32_t (&expected_order)[number_of_blocks]) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateGraph(&allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-
-  graph->TryBuildingSsa();
-
+  HGraph* graph = CreateCFG(&allocator, data);
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
@@ -60,7 +52,7 @@
   }
 }
 
-TEST(LinearizeTest, CFG1) {
+TEST_F(LinearizeTest, CFG1) {
   // Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -85,7 +77,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG2) {
+TEST_F(LinearizeTest, CFG2) {
   // Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -110,7 +102,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG3) {
+TEST_F(LinearizeTest, CFG3) {
   // Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -137,7 +129,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG4) {
+TEST_F(LinearizeTest, CFG4) {
   /* Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -167,7 +159,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG5) {
+TEST_F(LinearizeTest, CFG5) {
   /* Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -197,7 +189,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG6) {
+TEST_F(LinearizeTest, CFG6) {
   //            Block0
   //              |
   //            Block1
@@ -223,7 +215,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG7) {
+TEST_F(LinearizeTest, CFG7) {
   // Structure of this graph (+ are back edges)
   //            Block0
   //              |
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 7f67560..f9a955f 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -27,25 +27,21 @@
 #include "prepare_for_register_allocation.h"
 #include "ssa_liveness_analysis.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
+class LiveRangesTest : public CommonCompilerTest {};
+
 static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) {
-  HGraph* graph = CreateGraph(allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  builder.BuildGraph(*item);
+  HGraph* graph = CreateCFG(allocator, data);
   // Suspend checks implementation may change in the future, and this test relies
   // on how instructions are ordered.
   RemoveSuspendChecks(graph);
-  graph->TryBuildingSsa();
   // `Inline` conditions into ifs.
   PrepareForRegisterAllocation(graph).Run();
   return graph;
 }
 
-TEST(LiveRangesTest, CFG1) {
+TEST_F(LiveRangesTest, CFG1) {
   /*
    * Test the following snippet:
    *  return 0;
@@ -83,7 +79,7 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
-TEST(LiveRangesTest, CFG2) {
+TEST_F(LiveRangesTest, CFG2) {
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -131,7 +127,7 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
-TEST(LiveRangesTest, CFG3) {
+TEST_F(LiveRangesTest, CFG3) {
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -204,7 +200,7 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
-TEST(LiveRangesTest, Loop1) {
+TEST_F(LiveRangesTest, Loop1) {
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -215,8 +211,8 @@
    *
    * Which becomes the following graph (numbered by lifetime position):
    *       2: constant0
-   *       4: constant4
-   *       6: constant5
+   *       4: constant5
+   *       6: constant4
    *       8: goto
    *           |
    *       12: goto
@@ -251,7 +247,7 @@
   liveness.Analyze();
 
   // Test for the 0 constant.
-  LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
+  LiveInterval* interval = graph->GetIntConstant(0)->GetLiveInterval();
   LiveRange* range = interval->GetFirstRange();
   ASSERT_EQ(2u, range->GetStart());
   // Last use is the loop phi so instruction is live until
@@ -260,31 +256,31 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   // Test for the 4 constant.
-  interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval();
+  interval = graph->GetIntConstant(4)->GetLiveInterval();
   range = interval->GetFirstRange();
   // The instruction is live until the end of the loop.
-  ASSERT_EQ(4u, range->GetStart());
+  ASSERT_EQ(6u, range->GetStart());
   ASSERT_EQ(24u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   // Test for the 5 constant.
-  interval = liveness.GetInstructionFromSsaIndex(2)->GetLiveInterval();
+  interval = graph->GetIntConstant(5)->GetLiveInterval();
   range = interval->GetFirstRange();
   // The instruction is live until the return instruction after the loop.
-  ASSERT_EQ(6u, range->GetStart());
+  ASSERT_EQ(4u, range->GetStart());
   ASSERT_EQ(26u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   // Test for the phi.
   interval = liveness.GetInstructionFromSsaIndex(3)->GetLiveInterval();
   range = interval->GetFirstRange();
-  // Instruction is consumed by the if.
+  // Instruction is input of non-materialized Equal and hence live until If.
   ASSERT_EQ(14u, range->GetStart());
-  ASSERT_EQ(17u, range->GetEnd());
+  ASSERT_EQ(19u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
-TEST(LiveRangesTest, Loop2) {
+TEST_F(LiveRangesTest, Loop2) {
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -303,13 +299,12 @@
    *       12: equal
    *       14: if +++++
    *        |       \ +
-   *        |     18: suspend
-   *        |     20: add
-   *        |     22: goto
+   *        |     18: add
+   *        |     20: goto
    *        |
-   *       26: return
+   *       24: return
    *         |
-   *       30: exit
+   *       28: exit
    *
    * We want to make sure the phi at 10 has a lifetime hole after the add at 20.
    */
@@ -345,22 +340,22 @@
   interval = phi->GetLiveInterval();
   range = interval->GetFirstRange();
   ASSERT_EQ(10u, range->GetStart());
-  ASSERT_EQ(21u, range->GetEnd());
+  ASSERT_EQ(19u, range->GetEnd());
   range = range->GetNext();
   ASSERT_TRUE(range != nullptr);
-  ASSERT_EQ(24u, range->GetStart());
-  ASSERT_EQ(26u, range->GetEnd());
+  ASSERT_EQ(22u, range->GetStart());
+  ASSERT_EQ(24u, range->GetEnd());
 
   // Test for the add instruction.
   HAdd* add = liveness.GetInstructionFromSsaIndex(2)->AsAdd();
   interval = add->GetLiveInterval();
   range = interval->GetFirstRange();
-  ASSERT_EQ(20u, range->GetStart());
-  ASSERT_EQ(24u, range->GetEnd());
+  ASSERT_EQ(18u, range->GetStart());
+  ASSERT_EQ(22u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
-TEST(LiveRangesTest, CFG4) {
+TEST_F(LiveRangesTest, CFG4) {
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -446,7 +441,7 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   HPhi* phi = liveness.GetInstructionFromSsaIndex(4)->AsPhi();
-  ASSERT_TRUE(phi->GetUses().HasOnlyOneUse());
+  ASSERT_TRUE(phi->GetUses().HasExactlyOneElement());
   interval = phi->GetLiveInterval();
   range = interval->GetFirstRange();
   ASSERT_EQ(26u, range->GetStart());
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 9d7d0b6..bd74368 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -27,10 +27,10 @@
 #include "prepare_for_register_allocation.h"
 #include "ssa_liveness_analysis.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
+class LivenessTest : public CommonCompilerTest {};
+
 static void DumpBitVector(BitVector* vector,
                           std::ostream& buffer,
                           size_t count,
@@ -46,12 +46,7 @@
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateGraph(&allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-  graph->TryBuildingSsa();
+  HGraph* graph = CreateCFG(&allocator, data);
   // `Inline` conditions into ifs.
   PrepareForRegisterAllocation(graph).Run();
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
@@ -75,7 +70,7 @@
   ASSERT_STREQ(expected, buffer.str().c_str());
 }
 
-TEST(LivenessTest, CFG1) {
+TEST_F(LivenessTest, CFG1) {
   const char* expected =
     "Block 0\n"
     "  live in: (0)\n"
@@ -98,7 +93,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, CFG2) {
+TEST_F(LivenessTest, CFG2) {
   const char* expected =
     "Block 0\n"
     "  live in: (0)\n"
@@ -120,7 +115,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, CFG3) {
+TEST_F(LivenessTest, CFG3) {
   const char* expected =
     "Block 0\n"  // entry block
     "  live in: (000)\n"
@@ -149,7 +144,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, CFG4) {
+TEST_F(LivenessTest, CFG4) {
   // var a;
   // if (0 == 0) {
   //   a = 5;
@@ -159,7 +154,7 @@
   // return a;
   //
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi)
+  // (constant0, constant5, constant4, phi)
   const char* expected =
     "Block 0\n"  // entry block
     "  live in: (0000)\n"
@@ -170,11 +165,11 @@
     "  live out: (0110)\n"
     "  kill: (0000)\n"
     "Block 2\n"  // else block
-    "  live in: (0100)\n"
+    "  live in: (0010)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 3\n"  // then block
-    "  live in: (0010)\n"
+    "  live in: (0100)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 4\n"  // return block
@@ -197,7 +192,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, CFG5) {
+TEST_F(LivenessTest, CFG5) {
   // var a = 0;
   // if (0 == 0) {
   // } else {
@@ -242,7 +237,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, Loop1) {
+TEST_F(LivenessTest, Loop1) {
   // Simple loop with one preheader and one back edge.
   // var a = 0;
   // while (a == a) {
@@ -288,7 +283,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, Loop3) {
+TEST_F(LivenessTest, Loop3) {
   // Test that the returned value stays live in a preceding loop.
   // var a = 0;
   // while (a == a) {
@@ -296,7 +291,7 @@
   // }
   // return 5;
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi)
+  // (constant0, constant5, constant4, phi)
   const char* expected =
     "Block 0\n"
     "  live in: (0000)\n"
@@ -315,7 +310,7 @@
     "  live out: (0110)\n"
     "  kill: (0000)\n"
     "Block 4\n"  // return block
-    "  live in: (0010)\n"
+    "  live in: (0100)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 5\n"  // exit block
@@ -335,7 +330,7 @@
 }
 
 
-TEST(LivenessTest, Loop4) {
+TEST_F(LivenessTest, Loop4) {
   // Make sure we support a preheader of a loop not being the first predecessor
   // in the predecessor list of the header.
   // var a = 0;
@@ -387,11 +382,11 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, Loop5) {
+TEST_F(LivenessTest, Loop5) {
   // Make sure we create a preheader of a loop when a header originally has two
   // incoming blocks and one back edge.
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi in block 8)
+  // (constant0, constant5, constant4, phi in block 8)
   const char* expected =
     "Block 0\n"
     "  live in: (0000)\n"
@@ -402,11 +397,11 @@
     "  live out: (0110)\n"
     "  kill: (0000)\n"
     "Block 2\n"
-    "  live in: (0100)\n"
+    "  live in: (0010)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 3\n"
-    "  live in: (0010)\n"
+    "  live in: (0100)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 4\n"  // loop header
@@ -443,7 +438,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, Loop6) {
+TEST_F(LivenessTest, Loop6) {
   // Bitsets are made of:
   // (constant0, constant4, constant5, phi in block 2)
   const char* expected =
@@ -494,7 +489,7 @@
 }
 
 
-TEST(LivenessTest, Loop7) {
+TEST_F(LivenessTest, Loop7) {
   // Bitsets are made of:
   // (constant0, constant4, constant5, phi in block 2, phi in block 6)
   const char* expected =
@@ -548,7 +543,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, Loop8) {
+TEST_F(LivenessTest, Loop8) {
   // var a = 0;
   // while (a == a) {
   //   a = a + a;
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 90f28e5..7347686 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -43,31 +43,39 @@
 
     // Visit all uses to determine if this reference can spread into the heap,
     // a method call, etc.
-    for (HUseIterator<HInstruction*> use_it(reference_->GetUses());
-         !use_it.Done();
-         use_it.Advance()) {
-      HInstruction* use = use_it.Current()->GetUser();
-      DCHECK(!use->IsNullCheck()) << "NullCheck should have been eliminated";
-      if (use->IsBoundType()) {
+    for (const HUseListNode<HInstruction*>& use : reference_->GetUses()) {
+      HInstruction* user = use.GetUser();
+      DCHECK(!user->IsNullCheck()) << "NullCheck should have been eliminated";
+      if (user->IsBoundType()) {
         // BoundType shouldn't normally be necessary for a NewInstance.
         // Just be conservative for the uncommon cases.
         is_singleton_ = false;
         is_singleton_and_not_returned_ = false;
         return;
       }
-      if (use->IsPhi() || use->IsInvoke() ||
-          (use->IsInstanceFieldSet() && (reference_ == use->InputAt(1))) ||
-          (use->IsUnresolvedInstanceFieldSet() && (reference_ == use->InputAt(1))) ||
-          (use->IsStaticFieldSet() && (reference_ == use->InputAt(1))) ||
-          (use->IsUnresolvedStaticFieldSet() && (reference_ == use->InputAt(1))) ||
-          (use->IsArraySet() && (reference_ == use->InputAt(2)))) {
-        // reference_ is merged to a phi, passed to a callee, or stored to heap.
+      if (user->IsPhi() || user->IsSelect() || user->IsInvoke() ||
+          (user->IsInstanceFieldSet() && (reference_ == user->InputAt(1))) ||
+          (user->IsUnresolvedInstanceFieldSet() && (reference_ == user->InputAt(1))) ||
+          (user->IsStaticFieldSet() && (reference_ == user->InputAt(1))) ||
+          (user->IsUnresolvedStaticFieldSet() && (reference_ == user->InputAt(0))) ||
+          (user->IsArraySet() && (reference_ == user->InputAt(2)))) {
+        // reference_ is merged to HPhi/HSelect, passed to a callee, or stored to heap.
         // reference_ isn't the only name that can refer to its value anymore.
         is_singleton_ = false;
         is_singleton_and_not_returned_ = false;
         return;
       }
-      if (use->IsReturn()) {
+      if ((user->IsUnresolvedInstanceFieldGet() && (reference_ == user->InputAt(0))) ||
+          (user->IsUnresolvedInstanceFieldSet() && (reference_ == user->InputAt(0)))) {
+        // The field is accessed in an unresolved way. We mark the object as a singleton to
+        // disable load/store optimizations on it.
+        // Note that we could optimize this case and still perform some optimizations until
+        // we hit the unresolved access, but disabling is the simplest.
+        is_singleton_ = false;
+        is_singleton_and_not_returned_ = false;
+        return;
+      }
+      if (user->IsReturn()) {
         is_singleton_and_not_returned_ = false;
       }
     }
@@ -120,17 +128,14 @@
         offset_(offset),
         index_(index),
         declaring_class_def_index_(declaring_class_def_index),
-        may_become_unknown_(true) {
+        value_killed_by_loop_side_effects_(true) {
     DCHECK(ref_info != nullptr);
     DCHECK((offset == kInvalidFieldOffset && index != nullptr) ||
            (offset != kInvalidFieldOffset && index == nullptr));
-
-    if (ref_info->IsSingletonAndNotReturned()) {
-      // We try to track stores to singletons that aren't returned to eliminate the stores
-      // since values in singleton's fields cannot be killed due to aliasing. Those values
-      // can still be killed due to merging values since we don't build phi for merging heap
-      // values. SetMayBecomeUnknown(true) may be called later once such merge becomes possible.
-      may_become_unknown_ = false;
+    if (ref_info->IsSingleton() && !IsArrayElement()) {
+      // Assume this location's value cannot be killed by loop side effects
+      // until proven otherwise.
+      value_killed_by_loop_side_effects_ = false;
     }
   }
 
@@ -148,13 +153,12 @@
     return index_ != nullptr;
   }
 
-  // Returns true if this heap location's value may become unknown after it's
-  // set to a value, due to merge of values, or killed due to aliasing.
-  bool MayBecomeUnknown() const {
-    return may_become_unknown_;
+  bool IsValueKilledByLoopSideEffects() const {
+    return value_killed_by_loop_side_effects_;
   }
-  void SetMayBecomeUnknown(bool val) {
-    may_become_unknown_ = val;
+
+  void SetValueKilledByLoopSideEffects(bool val) {
+    value_killed_by_loop_side_effects_ = val;
   }
 
  private:
@@ -162,7 +166,9 @@
   const size_t offset_;                // offset of static/instance field.
   HInstruction* const index_;          // index of an array element.
   const int16_t declaring_class_def_index_;  // declaring class's def's dex index.
-  bool may_become_unknown_;            // value may become kUnknownHeapValue.
+  bool value_killed_by_loop_side_effects_;   // value of this location may be killed by loop
+                                             // side effects because this location is stored
+                                             // into inside a loop.
 
   DISALLOW_COPY_AND_ASSIGN(HeapLocation);
 };
@@ -188,7 +194,10 @@
       : HGraphVisitor(graph),
         ref_info_array_(graph->GetArena()->Adapter(kArenaAllocLSE)),
         heap_locations_(graph->GetArena()->Adapter(kArenaAllocLSE)),
-        aliasing_matrix_(graph->GetArena(), kInitialAliasingMatrixBitVectorSize, true),
+        aliasing_matrix_(graph->GetArena(),
+                         kInitialAliasingMatrixBitVectorSize,
+                         true,
+                         kArenaAllocLSE),
         has_heap_stores_(false),
         has_volatile_(false),
         has_monitor_operations_(false),
@@ -354,16 +363,24 @@
     return true;
   }
 
-  ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* ref) {
-    ReferenceInfo* ref_info = FindReferenceInfoOf(ref);
+  ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) {
+    ReferenceInfo* ref_info = FindReferenceInfoOf(instruction);
     if (ref_info == nullptr) {
       size_t pos = ref_info_array_.size();
-      ref_info = new (GetGraph()->GetArena()) ReferenceInfo(ref, pos);
+      ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos);
       ref_info_array_.push_back(ref_info);
     }
     return ref_info;
   }
 
+  void CreateReferenceInfoForReferenceType(HInstruction* instruction) {
+    if (instruction->GetType() != Primitive::kPrimNot) {
+      return;
+    }
+    DCHECK(FindReferenceInfoOf(instruction) == nullptr);
+    GetOrCreateReferenceInfo(instruction);
+  }
+
   HeapLocation* GetOrCreateHeapLocation(HInstruction* ref,
                                         size_t offset,
                                         HInstruction* index,
@@ -381,26 +398,13 @@
     return heap_locations_[heap_location_idx];
   }
 
-  void VisitFieldAccess(HInstruction* field_access,
-                        HInstruction* ref,
-                        const FieldInfo& field_info,
-                        bool is_store) {
+  HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
     if (field_info.IsVolatile()) {
       has_volatile_ = true;
     }
     const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
     const size_t offset = field_info.GetFieldOffset().SizeValue();
-    HeapLocation* location = GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
-    // A store of a value may be eliminated if all future loads for that value can be eliminated.
-    // For a value that's stored into a singleton field, the value will not be killed due
-    // to aliasing. However if the value is set in a block that doesn't post dominate the definition,
-    // the value may be killed due to merging later. Before we have post dominating info, we check
-    // if the store is in the same block as the definition just to be conservative.
-    if (is_store &&
-        location->GetReferenceInfo()->IsSingletonAndNotReturned() &&
-        field_access->GetBlock() != ref->GetBlock()) {
-      location->SetMayBecomeUnknown(true);
-    }
+    return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
   }
 
   void VisitArrayAccess(HInstruction* array, HInstruction* index) {
@@ -409,20 +413,25 @@
   }
 
   void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), false);
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+    CreateReferenceInfoForReferenceType(instruction);
   }
 
   void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), true);
+    HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
     has_heap_stores_ = true;
+    if (instruction->GetBlock()->GetLoopInformation() != nullptr) {
+      location->SetValueKilledByLoopSideEffects(true);
+    }
   }
 
   void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), false);
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+    CreateReferenceInfoForReferenceType(instruction);
   }
 
   void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), true);
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
     has_heap_stores_ = true;
   }
 
@@ -431,6 +440,7 @@
 
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE {
     VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
+    CreateReferenceInfoForReferenceType(instruction);
   }
 
   void VisitArraySet(HArraySet* instruction) OVERRIDE {
@@ -440,7 +450,27 @@
 
   void VisitNewInstance(HNewInstance* new_instance) OVERRIDE {
     // Any references appearing in the ref_info_array_ so far cannot alias with new_instance.
-    GetOrCreateReferenceInfo(new_instance);
+    CreateReferenceInfoForReferenceType(new_instance);
+  }
+
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitParameterValue(HParameterValue* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitSelect(HSelect* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
   }
 
   void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) OVERRIDE {
@@ -458,15 +488,20 @@
                             // alias analysis and won't be as effective.
   bool has_volatile_;       // If there are volatile field accesses.
   bool has_monitor_operations_;    // If there are monitor operations.
-  bool may_deoptimize_;
+  bool may_deoptimize_;     // Only true for HDeoptimize with single-frame deoptimization.
 
   DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
 };
 
 // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
+// A heap location can be set to kUnknownHeapValue when:
+// - initially set a value.
+// - killed due to aliasing, merging, invocation, or loop side effects.
 static HInstruction* const kUnknownHeapValue =
     reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-1));
+
 // Default heap value after an allocation.
+// A heap location can be set to that value right after an allocation.
 static HInstruction* const kDefaultHeapValue =
     reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-2));
 
@@ -484,29 +519,17 @@
                                                     kUnknownHeapValue,
                                                     graph->GetArena()->Adapter(kArenaAllocLSE)),
                          graph->GetArena()->Adapter(kArenaAllocLSE)),
-        removed_instructions_(graph->GetArena()->Adapter(kArenaAllocLSE)),
-        substitute_instructions_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        removed_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        substitute_instructions_for_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        possibly_removed_stores_(graph->GetArena()->Adapter(kArenaAllocLSE)),
         singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)) {
   }
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
-    int block_id = block->GetBlockId();
-    ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id];
+    // Populate the heap_values array for this block.
     // TODO: try to reuse the heap_values array from one predecessor if possible.
     if (block->IsLoopHeader()) {
-      // We do a single pass in reverse post order. For loops, use the side effects as a hint
-      // to see if the heap values should be killed.
-      if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) {
-        // Leave all values as kUnknownHeapValue.
-      } else {
-        // Inherit the values from pre-header.
-        HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
-        ArenaVector<HInstruction*>& pre_header_heap_values =
-            heap_values_for_[pre_header->GetBlockId()];
-        for (size_t i = 0; i < heap_values.size(); i++) {
-          heap_values[i] = pre_header_heap_values[i];
-        }
-      }
+      HandleLoopSideEffects(block);
     } else {
       MergePredecessorValues(block);
     }
@@ -515,32 +538,106 @@
 
   // Remove recorded instructions that should be eliminated.
   void RemoveInstructions() {
-    size_t size = removed_instructions_.size();
-    DCHECK_EQ(size, substitute_instructions_.size());
+    size_t size = removed_loads_.size();
+    DCHECK_EQ(size, substitute_instructions_for_loads_.size());
     for (size_t i = 0; i < size; i++) {
-      HInstruction* instruction = removed_instructions_[i];
-      DCHECK(instruction != nullptr);
-      HInstruction* substitute = substitute_instructions_[i];
-      if (substitute != nullptr) {
-        // Keep tracing substitute till one that's not removed.
-        HInstruction* sub_sub = FindSubstitute(substitute);
-        while (sub_sub != substitute) {
-          substitute = sub_sub;
-          sub_sub = FindSubstitute(substitute);
-        }
-        instruction->ReplaceWith(substitute);
+      HInstruction* load = removed_loads_[i];
+      DCHECK(load != nullptr);
+      DCHECK(load->IsInstanceFieldGet() ||
+             load->IsStaticFieldGet() ||
+             load->IsArrayGet());
+      HInstruction* substitute = substitute_instructions_for_loads_[i];
+      DCHECK(substitute != nullptr);
+      // Keep tracing substitute till one that's not removed.
+      HInstruction* sub_sub = FindSubstitute(substitute);
+      while (sub_sub != substitute) {
+        substitute = sub_sub;
+        sub_sub = FindSubstitute(substitute);
       }
-      instruction->GetBlock()->RemoveInstruction(instruction);
+      load->ReplaceWith(substitute);
+      load->GetBlock()->RemoveInstruction(load);
     }
-    // TODO: remove unnecessary allocations.
-    // Eliminate instructions in singleton_new_instances_ that:
-    // - don't have uses,
-    // - don't have finalizers,
-    // - are instantiable and accessible,
-    // - have no/separate clinit check.
+
+    // At this point, stores in possibly_removed_stores_ can be safely removed.
+    for (size_t i = 0, e = possibly_removed_stores_.size(); i < e; i++) {
+      HInstruction* store = possibly_removed_stores_[i];
+      DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet());
+      store->GetBlock()->RemoveInstruction(store);
+    }
+
+    // Eliminate allocations that are not used.
+    for (size_t i = 0, e = singleton_new_instances_.size(); i < e; i++) {
+      HInstruction* new_instance = singleton_new_instances_[i];
+      if (!new_instance->HasNonEnvironmentUses()) {
+        new_instance->RemoveEnvironmentUsers();
+        new_instance->GetBlock()->RemoveInstruction(new_instance);
+      }
+    }
   }
 
  private:
+  // If heap_values[index] is an instance field store, need to keep the store.
+  // This is necessary if a heap value is killed due to merging, or loop side
+  // effects (which is essentially merging also), since a load later from the
+  // location won't be eliminated.
+  void KeepIfIsStore(HInstruction* heap_value) {
+    if (heap_value == kDefaultHeapValue ||
+        heap_value == kUnknownHeapValue ||
+        !heap_value->IsInstanceFieldSet()) {
+      return;
+    }
+    auto idx = std::find(possibly_removed_stores_.begin(),
+        possibly_removed_stores_.end(), heap_value);
+    if (idx != possibly_removed_stores_.end()) {
+      // Make sure the store is kept.
+      possibly_removed_stores_.erase(idx);
+    }
+  }
+
+  void HandleLoopSideEffects(HBasicBlock* block) {
+    DCHECK(block->IsLoopHeader());
+    int block_id = block->GetBlockId();
+    ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id];
+
+    // Don't eliminate loads in irreducible loops. This is safe for singletons, because
+    // they are always used by the non-eliminated loop-phi.
+    if (block->GetLoopInformation()->IsIrreducible()) {
+      if (kIsDebugBuild) {
+        for (size_t i = 0; i < heap_values.size(); i++) {
+          DCHECK_EQ(heap_values[i], kUnknownHeapValue);
+        }
+      }
+      return;
+    }
+
+    HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+    ArenaVector<HInstruction*>& pre_header_heap_values =
+        heap_values_for_[pre_header->GetBlockId()];
+
+    // Inherit the values from pre-header.
+    for (size_t i = 0; i < heap_values.size(); i++) {
+      heap_values[i] = pre_header_heap_values[i];
+    }
+
+    // We do a single pass in reverse post order. For loops, use the side effects as a hint
+    // to see if the heap values should be killed.
+    if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) {
+      for (size_t i = 0; i < heap_values.size(); i++) {
+        HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
+        ReferenceInfo* ref_info = location->GetReferenceInfo();
+        if (!ref_info->IsSingleton() || location->IsValueKilledByLoopSideEffects()) {
+          // heap value is killed by loop side effects (stored into directly, or due to
+          // aliasing).
+          KeepIfIsStore(pre_header_heap_values[i]);
+          heap_values[i] = kUnknownHeapValue;
+        } else {
+          // A singleton's field that's not stored into inside a loop is invariant throughout
+          // the loop.
+        }
+      }
+    }
+  }
+
   void MergePredecessorValues(HBasicBlock* block) {
     const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
     if (predecessors.size() == 0) {
@@ -548,16 +645,25 @@
     }
     ArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()];
     for (size_t i = 0; i < heap_values.size(); i++) {
-      HInstruction* value = heap_values_for_[predecessors[0]->GetBlockId()][i];
-      if (value != kUnknownHeapValue) {
+      HInstruction* pred0_value = heap_values_for_[predecessors[0]->GetBlockId()][i];
+      heap_values[i] = pred0_value;
+      if (pred0_value != kUnknownHeapValue) {
         for (size_t j = 1; j < predecessors.size(); j++) {
-          if (heap_values_for_[predecessors[j]->GetBlockId()][i] != value) {
-            value = kUnknownHeapValue;
+          HInstruction* pred_value = heap_values_for_[predecessors[j]->GetBlockId()][i];
+          if (pred_value != pred0_value) {
+            heap_values[i] = kUnknownHeapValue;
             break;
           }
         }
       }
-      heap_values[i] = value;
+
+      if (heap_values[i] == kUnknownHeapValue) {
+        // Keep the last store in each predecessor since future loads cannot be eliminated.
+        for (size_t j = 0; j < predecessors.size(); j++) {
+          ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessors[j]->GetBlockId()];
+          KeepIfIsStore(pred_values[i]);
+        }
+      }
     }
   }
 
@@ -616,25 +722,41 @@
     HInstruction* heap_value = heap_values[idx];
     if (heap_value == kDefaultHeapValue) {
       HInstruction* constant = GetDefaultValue(instruction->GetType());
-      removed_instructions_.push_back(instruction);
-      substitute_instructions_.push_back(constant);
+      removed_loads_.push_back(instruction);
+      substitute_instructions_for_loads_.push_back(constant);
       heap_values[idx] = constant;
       return;
     }
-    if ((heap_value != kUnknownHeapValue) &&
-        // Keep the load due to possible I/F, J/D array aliasing.
-        // See b/22538329 for details.
-        (heap_value->GetType() == instruction->GetType())) {
-      removed_instructions_.push_back(instruction);
-      substitute_instructions_.push_back(heap_value);
-      TryRemovingNullCheck(instruction);
-      return;
+    if (heap_value != kUnknownHeapValue && heap_value->IsInstanceFieldSet()) {
+      HInstruction* store = heap_value;
+      // This load must be from a singleton since it's from the same field
+      // that a "removed" store puts the value. That store must be to a singleton's field.
+      DCHECK(ref_info->IsSingleton());
+      // Get the real heap value of the store.
+      heap_value = store->InputAt(1);
     }
-
     if (heap_value == kUnknownHeapValue) {
-      // Put the load as the value into the HeapLocation.
+      // Load isn't eliminated. Put the load as the value into the HeapLocation.
       // This acts like GVN but with better aliasing analysis.
       heap_values[idx] = instruction;
+    } else {
+      if (Primitive::PrimitiveKind(heap_value->GetType())
+              != Primitive::PrimitiveKind(instruction->GetType())) {
+        // The only situation where the same heap location has different type is when
+        // we do an array get on an instruction that originates from the null constant
+        // (the null could be behind a field access, an array access, a null check or
+        // a bound type).
+        // In order to stay properly typed on primitive types, we do not eliminate
+        // the array gets.
+        if (kIsDebugBuild) {
+          DCHECK(heap_value->IsArrayGet()) << heap_value->DebugName();
+          DCHECK(instruction->IsArrayGet()) << instruction->DebugName();
+        }
+        return;
+      }
+      removed_loads_.push_back(instruction);
+      substitute_instructions_for_loads_.push_back(heap_value);
+      TryRemovingNullCheck(instruction);
     }
   }
 
@@ -662,51 +784,66 @@
     ArenaVector<HInstruction*>& heap_values =
         heap_values_for_[instruction->GetBlock()->GetBlockId()];
     HInstruction* heap_value = heap_values[idx];
-    bool redundant_store = false;
+    bool same_value = false;
+    bool possibly_redundant = false;
     if (Equal(heap_value, value)) {
       // Store into the heap location with the same value.
-      redundant_store = true;
+      same_value = true;
     } else if (index != nullptr) {
       // For array element, don't eliminate stores since it can be easily aliased
       // with non-constant index.
     } else if (!heap_location_collector_.MayDeoptimize() &&
-               ref_info->IsSingletonAndNotReturned() &&
-               !heap_location_collector_.GetHeapLocation(idx)->MayBecomeUnknown()) {
-      // Store into a field of a singleton that's not returned. And that value cannot be
-      // killed due to merge. It's redundant since future loads will get the value
-      // set by this instruction.
-      Primitive::Type type = Primitive::kPrimVoid;
-      if (instruction->IsInstanceFieldSet()) {
-        type = instruction->AsInstanceFieldSet()->GetFieldInfo().GetFieldType();
-      } else if (instruction->IsStaticFieldSet()) {
-        type = instruction->AsStaticFieldSet()->GetFieldInfo().GetFieldType();
+               ref_info->IsSingletonAndNotReturned()) {
+      // Store into a field of a singleton that's not returned. The value cannot be
+      // killed due to aliasing/invocation. It can be redundant since future loads can
+      // directly get the value set by this instruction. The value can still be killed due to
+      // merging or loop side effects. Stores whose values are killed due to merging/loop side
+      // effects later will be removed from possibly_removed_stores_ when that is detected.
+      possibly_redundant = true;
+      HNewInstance* new_instance = ref_info->GetReference()->AsNewInstance();
+      DCHECK(new_instance != nullptr);
+      if (new_instance->IsFinalizable()) {
+        // Finalizable objects escape globally. Need to keep the store.
+        possibly_redundant = false;
       } else {
-        DCHECK(false) << "Must be an instance/static field set instruction.";
+        HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
+        if (loop_info != nullptr) {
+          // instruction is a store in the loop so the loop must does write.
+          DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite());
+          // If it's a singleton, IsValueKilledByLoopSideEffects() must be true.
+          DCHECK(!ref_info->IsSingleton() ||
+                 heap_location_collector_.GetHeapLocation(idx)->IsValueKilledByLoopSideEffects());
+
+          if (loop_info->IsDefinedOutOfTheLoop(original_ref)) {
+            DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader()));
+            // Keep the store since its value may be needed at the loop header.
+            possibly_redundant = false;
+          } else {
+            // The singleton is created inside the loop. Value stored to it isn't needed at
+            // the loop header. This is true for outer loops also.
+          }
+        }
       }
-      if (value->GetType() != type) {
-        // I/F, J/D aliasing should not happen for fields.
-        DCHECK(Primitive::IsIntegralType(value->GetType()));
-        DCHECK(!Primitive::Is64BitType(value->GetType()));
-        DCHECK(Primitive::IsIntegralType(type));
-        DCHECK(!Primitive::Is64BitType(type));
-        // Keep the store since the corresponding load isn't eliminated due to different types.
-        // TODO: handle the different int types so that we can eliminate this store.
-        redundant_store = false;
-      } else {
-        redundant_store = true;
-      }
-      // TODO: eliminate the store if the singleton object is not finalizable.
-      redundant_store = false;
     }
-    if (redundant_store) {
-      removed_instructions_.push_back(instruction);
-      substitute_instructions_.push_back(nullptr);
-      TryRemovingNullCheck(instruction);
+    if (same_value || possibly_redundant) {
+      possibly_removed_stores_.push_back(instruction);
     }
 
-    heap_values[idx] = value;
+    if (!same_value) {
+      if (possibly_redundant) {
+        DCHECK(instruction->IsInstanceFieldSet());
+        // Put the store as the heap value. If the value is loaded from heap
+        // by a load later, this store isn't really redundant.
+        heap_values[idx] = instruction;
+      } else {
+        heap_values[idx] = value;
+      }
+    }
     // This store may kill values in other heap locations due to aliasing.
     for (size_t i = 0; i < heap_values.size(); i++) {
+      if (i == idx) {
+        continue;
+      }
       if (heap_values[i] == value) {
         // Same value should be kept even if aliasing happens.
         continue;
@@ -834,8 +971,9 @@
       return;
     }
     if (!heap_location_collector_.MayDeoptimize() &&
-        ref_info->IsSingletonAndNotReturned()) {
-      // The allocation might be eliminated.
+        ref_info->IsSingletonAndNotReturned() &&
+        !new_instance->IsFinalizable() &&
+        !new_instance->NeedsAccessCheck()) {
       singleton_new_instances_.push_back(new_instance);
     }
     ArenaVector<HInstruction*>& heap_values =
@@ -854,10 +992,10 @@
   // Find an instruction's substitute if it should be removed.
   // Return the same instruction if it should not be removed.
   HInstruction* FindSubstitute(HInstruction* instruction) {
-    size_t size = removed_instructions_.size();
+    size_t size = removed_loads_.size();
     for (size_t i = 0; i < size; i++) {
-      if (removed_instructions_[i] == instruction) {
-        return substitute_instructions_[i];
+      if (removed_loads_[i] == instruction) {
+        return substitute_instructions_for_loads_[i];
       }
     }
     return instruction;
@@ -871,16 +1009,22 @@
 
   // We record the instructions that should be eliminated but may be
   // used by heap locations. They'll be removed in the end.
-  ArenaVector<HInstruction*> removed_instructions_;
-  ArenaVector<HInstruction*> substitute_instructions_;
+  ArenaVector<HInstruction*> removed_loads_;
+  ArenaVector<HInstruction*> substitute_instructions_for_loads_;
+
+  // Stores in this list may be removed from the list later when it's
+  // found that the store cannot be eliminated.
+  ArenaVector<HInstruction*> possibly_removed_stores_;
+
   ArenaVector<HInstruction*> singleton_new_instances_;
 
   DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
 };
 
 void LoadStoreElimination::Run() {
-  if (graph_->IsDebuggable()) {
+  if (graph_->IsDebuggable() || graph_->HasTryCatch()) {
     // Debugger may set heap values or trigger deoptimization of callers.
+    // Try/catch support not implemented yet.
     // Skip this optimization.
     return;
   }
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index ebdf7a2..83596da 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -17,6 +17,7 @@
 #include "locations.h"
 
 #include "nodes.h"
+#include "code_generator.h"
 
 namespace art {
 
@@ -36,7 +37,7 @@
 
   if (NeedsSafepoint()) {
     ArenaAllocator* arena = instruction->GetBlock()->GetGraph()->GetArena();
-    stack_mask_ = new (arena) ArenaBitVector(arena, 0, true);
+    stack_mask_ = ArenaBitVector::Create(arena, 0, true, kArenaAllocLocationSummary);
   }
 }
 
@@ -47,18 +48,26 @@
       : Location::RequiresRegister();
 }
 
-Location Location::RegisterOrInt32LongConstant(HInstruction* instruction) {
-  if (instruction->IsIntConstant() || instruction->IsNullConstant()) {
-    return Location::ConstantLocation(instruction->AsConstant());
-  } else if (instruction->IsLongConstant()) {
-    // Does the long constant fit in a 32 bit int?
-    int64_t value = instruction->AsLongConstant()->GetValue();
-    return IsInt<32>(value)
-        ? Location::ConstantLocation(instruction->AsConstant())
-        : Location::RequiresRegister();
-  } else {
-    return Location::RequiresRegister();
+Location Location::RegisterOrInt32Constant(HInstruction* instruction) {
+  HConstant* constant = instruction->AsConstant();
+  if (constant != nullptr) {
+    int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+    if (IsInt<32>(value)) {
+      return Location::ConstantLocation(constant);
+    }
   }
+  return Location::RequiresRegister();
+}
+
+Location Location::FpuRegisterOrInt32Constant(HInstruction* instruction) {
+  HConstant* constant = instruction->AsConstant();
+  if (constant != nullptr) {
+    int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+    if (IsInt<32>(value)) {
+      return Location::ConstantLocation(constant);
+    }
+  }
+  return Location::RequiresFpuRegister();
 }
 
 Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) {
@@ -67,6 +76,12 @@
       : Location::RegisterLocation(reg);
 }
 
+Location Location::FpuRegisterOrConstant(HInstruction* instruction) {
+  return instruction->IsConstant()
+      ? Location::ConstantLocation(instruction->AsConstant())
+      : Location::RequiresFpuRegister();
+}
+
 std::ostream& operator<<(std::ostream& os, const Location& location) {
   os << location.DebugString();
   if (location.IsRegister() || location.IsFpuRegister()) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index d014379..5fdfb9b 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -38,7 +38,13 @@
 class Location : public ValueObject {
  public:
   enum OutputOverlap {
+    // The liveness of the output overlaps the liveness of one or
+    // several input(s); the register allocator cannot reuse an
+    // input's location for the output's location.
     kOutputOverlap,
+    // The liveness of the output does not overlap the liveness of any
+    // input; the register allocator is allowed to reuse an input's
+    // location for the output's location.
     kNoOutputOverlap
   };
 
@@ -354,8 +360,10 @@
   }
 
   static Location RegisterOrConstant(HInstruction* instruction);
-  static Location RegisterOrInt32LongConstant(HInstruction* instruction);
+  static Location RegisterOrInt32Constant(HInstruction* instruction);
   static Location ByteRegisterOrConstant(int reg, HInstruction* instruction);
+  static Location FpuRegisterOrConstant(HInstruction* instruction);
+  static Location FpuRegisterOrInt32Constant(HInstruction* instruction);
 
   // The location of the first input to the instruction will be
   // used to replace this unallocated location.
@@ -368,6 +376,10 @@
     return PolicyField::Decode(GetPayload());
   }
 
+  bool RequiresRegisterKind() const {
+    return GetPolicy() == kRequiresRegister || GetPolicy() == kRequiresFpuRegister;
+  }
+
   uintptr_t GetEncoding() const {
     return GetPayload();
   }
@@ -472,8 +484,9 @@
  public:
   enum CallKind {
     kNoCall,
+    kCallOnMainAndSlowPath,
     kCallOnSlowPath,
-    kCall
+    kCallOnMainOnly
   };
 
   LocationSummary(HInstruction* instruction,
@@ -492,6 +505,10 @@
     return inputs_.size();
   }
 
+  // Set the output location.  Argument `overlaps` tells whether the
+  // output overlaps any of the inputs (if so, it cannot share the
+  // same register as one of the inputs); it is set to
+  // `Location::kOutputOverlap` by default for safety.
   void SetOut(Location location, Location::OutputOverlap overlaps = Location::kOutputOverlap) {
     DCHECK(output_.IsInvalid());
     output_overlaps_ = overlaps;
@@ -528,10 +545,29 @@
 
   Location Out() const { return output_; }
 
-  bool CanCall() const { return call_kind_ != kNoCall; }
-  bool WillCall() const { return call_kind_ == kCall; }
-  bool OnlyCallsOnSlowPath() const { return call_kind_ == kCallOnSlowPath; }
-  bool NeedsSafepoint() const { return CanCall(); }
+  bool CanCall() const {
+    return call_kind_ != kNoCall;
+  }
+
+  bool WillCall() const {
+    return call_kind_ == kCallOnMainOnly || call_kind_ == kCallOnMainAndSlowPath;
+  }
+
+  bool CallsOnSlowPath() const {
+    return call_kind_ == kCallOnSlowPath || call_kind_ == kCallOnMainAndSlowPath;
+  }
+
+  bool OnlyCallsOnSlowPath() const {
+    return call_kind_ == kCallOnSlowPath;
+  }
+
+  bool CallsOnMainAndSlowPath() const {
+    return call_kind_ == kCallOnMainAndSlowPath;
+  }
+
+  bool NeedsSafepoint() const {
+    return CanCall();
+  }
 
   void SetStackBit(uint32_t index) {
     stack_mask_->SetBit(index);
@@ -592,6 +628,10 @@
     return intrinsified_;
   }
 
+  void SetIntrinsified(bool intrinsified) {
+    intrinsified_ = intrinsified;
+  }
+
  private:
   ArenaVector<Location> inputs_;
   ArenaVector<Location> temps_;
@@ -611,10 +651,9 @@
   RegisterSet live_registers_;
 
   // Whether these are locations for an intrinsified call.
-  const bool intrinsified_;
+  bool intrinsified_;
 
-  ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint);
-  ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint);
+  friend class RegisterAllocatorTest;
   DISALLOW_COPY_AND_ASSIGN(LocationSummary);
 };
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 98c3096..2808e1b 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -13,10 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #include "nodes.h"
 
+#include <cfloat>
+
 #include "code_generator.h"
+#include "common_dominator.h"
 #include "ssa_builder.h"
 #include "base/bit_vector-inl.h"
 #include "base/bit_utils.h"
@@ -27,6 +29,21 @@
 
 namespace art {
 
+// Enable floating-point static evaluation during constant folding
+// only if all floating-point operations and constants evaluate in the
+// range and precision of the type used (i.e., 32-bit float, 64-bit
+// double).
+static constexpr bool kEnableFloatingPointStaticEvaluation = (FLT_EVAL_METHOD == 0);
+
+void HGraph::InitializeInexactObjectRTI(StackHandleScopeCollection* handles) {
+  ScopedObjectAccess soa(Thread::Current());
+  // Create the inexact Object reference type and store it in the HGraph.
+  ClassLinker* linker = Runtime::Current()->GetClassLinker();
+  inexact_object_rti_ = ReferenceTypeInfo::Create(
+      handles->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject)),
+      /* is_exact */ false);
+}
+
 void HGraph::AddBlock(HBasicBlock* block) {
   block->SetBlockId(blocks_.size());
   blocks_.push_back(block);
@@ -37,11 +54,13 @@
   DCHECK_EQ(visited->GetHighestBitSet(), -1);
 
   // Nodes that we're currently visiting, indexed by block id.
-  ArenaBitVector visiting(arena_, blocks_.size(), false);
+  ArenaBitVector visiting(arena_, blocks_.size(), false, kArenaAllocGraphBuilder);
   // Number of successors visited from a given node, indexed by block id.
-  ArenaVector<size_t> successors_visited(blocks_.size(), 0u, arena_->Adapter());
+  ArenaVector<size_t> successors_visited(blocks_.size(),
+                                         0u,
+                                         arena_->Adapter(kArenaAllocGraphBuilder));
   // Stack of nodes that we're currently visiting (same as marked in "visiting" above).
-  ArenaVector<HBasicBlock*> worklist(arena_->Adapter());
+  ArenaVector<HBasicBlock*> worklist(arena_->Adapter(kArenaAllocGraphBuilder));
   constexpr size_t kDefaultWorklistSize = 8;
   worklist.reserve(kDefaultWorklistSize);
   visited->SetBit(entry_block_->GetBlockId());
@@ -69,11 +88,7 @@
   }
 }
 
-static void RemoveAsUser(HInstruction* instruction) {
-  for (size_t i = 0; i < instruction->InputCount(); i++) {
-    instruction->RemoveAsUserOfInput(i);
-  }
-
+static void RemoveEnvironmentUses(HInstruction* instruction) {
   for (HEnvironment* environment = instruction->GetEnvironment();
        environment != nullptr;
        environment = environment->GetParent()) {
@@ -85,10 +100,16 @@
   }
 }
 
+static void RemoveAsUser(HInstruction* instruction) {
+  instruction->RemoveAsUserOfAllInputs();
+  RemoveEnvironmentUses(instruction);
+}
+
 void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const {
   for (size_t i = 0; i < blocks_.size(); ++i) {
     if (!visited.IsBitSet(i)) {
       HBasicBlock* block = blocks_[i];
+      if (block == nullptr) continue;
       DCHECK(block->GetPhis().IsEmpty()) << "Phis are not inserted at this stage";
       for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
         RemoveAsUser(it.Current());
@@ -101,6 +122,7 @@
   for (size_t i = 0; i < blocks_.size(); ++i) {
     if (!visited.IsBitSet(i)) {
       HBasicBlock* block = blocks_[i];
+      if (block == nullptr) continue;
       // We only need to update the successor, which might be live.
       for (HBasicBlock* successor : block->GetSuccessors()) {
         successor->RemovePredecessor(block);
@@ -108,37 +130,52 @@
       // Remove the block from the list of blocks, so that further analyses
       // never see it.
       blocks_[i] = nullptr;
+      if (block->IsExitBlock()) {
+        SetExitBlock(nullptr);
+      }
+      // Mark the block as removed. This is used by the HGraphBuilder to discard
+      // the block as a branch target.
+      block->SetGraph(nullptr);
     }
   }
 }
 
-void HGraph::BuildDominatorTree() {
-  // (1) Simplify the CFG so that catch blocks have only exceptional incoming
-  //     edges. This invariant simplifies building SSA form because Phis cannot
-  //     collect both normal- and exceptional-flow values at the same time.
-  SimplifyCatchBlocks();
+GraphAnalysisResult HGraph::BuildDominatorTree() {
+  ArenaBitVector visited(arena_, blocks_.size(), false, kArenaAllocGraphBuilder);
 
-  ArenaBitVector visited(arena_, blocks_.size(), false);
-
-  // (2) Find the back edges in the graph doing a DFS traversal.
+  // (1) Find the back edges in the graph doing a DFS traversal.
   FindBackEdges(&visited);
 
-  // (3) Remove instructions and phis from blocks not visited during
+  // (2) Remove instructions and phis from blocks not visited during
   //     the initial DFS as users from other instructions, so that
   //     users can be safely removed before uses later.
   RemoveInstructionsAsUsersFromDeadBlocks(visited);
 
-  // (4) Remove blocks not visited during the initial DFS.
-  //     Step (4) requires dead blocks to be removed from the
+  // (3) Remove blocks not visited during the initial DFS.
+  //     Step (5) requires dead blocks to be removed from the
   //     predecessors list of live blocks.
   RemoveDeadBlocks(visited);
 
-  // (5) Simplify the CFG now, so that we don't need to recompute
+  // (4) Simplify the CFG now, so that we don't need to recompute
   //     dominators and the reverse post order.
   SimplifyCFG();
 
-  // (6) Compute the dominance information and the reverse post order.
+  // (5) Compute the dominance information and the reverse post order.
   ComputeDominanceInformation();
+
+  // (6) Analyze loops discovered through back edge analysis, and
+  //     set the loop information on each block.
+  GraphAnalysisResult result = AnalyzeLoops();
+  if (result != kAnalysisSuccess) {
+    return result;
+  }
+
+  // (7) Precompute per-block try membership before entering the SSA builder,
+  //     which needs the information to build catch block phis from values of
+  //     locals at throwing instructions inside try blocks.
+  ComputeTryBlockInformation();
+
+  return kAnalysisSuccess;
 }
 
 void HGraph::ClearDominanceInformation() {
@@ -148,22 +185,55 @@
   reverse_post_order_.clear();
 }
 
+void HGraph::ClearLoopInformation() {
+  SetHasIrreducibleLoops(false);
+  for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
+    it.Current()->SetLoopInformation(nullptr);
+  }
+}
+
 void HBasicBlock::ClearDominanceInformation() {
   dominated_blocks_.clear();
   dominator_ = nullptr;
 }
 
+HInstruction* HBasicBlock::GetFirstInstructionDisregardMoves() const {
+  HInstruction* instruction = GetFirstInstruction();
+  while (instruction->IsParallelMove()) {
+    instruction = instruction->GetNext();
+  }
+  return instruction;
+}
+
+static bool UpdateDominatorOfSuccessor(HBasicBlock* block, HBasicBlock* successor) {
+  DCHECK(ContainsElement(block->GetSuccessors(), successor));
+
+  HBasicBlock* old_dominator = successor->GetDominator();
+  HBasicBlock* new_dominator =
+      (old_dominator == nullptr) ? block
+                                 : CommonDominator::ForPair(old_dominator, block);
+
+  if (old_dominator == new_dominator) {
+    return false;
+  } else {
+    successor->SetDominator(new_dominator);
+    return true;
+  }
+}
+
 void HGraph::ComputeDominanceInformation() {
   DCHECK(reverse_post_order_.empty());
   reverse_post_order_.reserve(blocks_.size());
   reverse_post_order_.push_back(entry_block_);
 
   // Number of visits of a given node, indexed by block id.
-  ArenaVector<size_t> visits(blocks_.size(), 0u, arena_->Adapter());
+  ArenaVector<size_t> visits(blocks_.size(), 0u, arena_->Adapter(kArenaAllocGraphBuilder));
   // Number of successors visited from a given node, indexed by block id.
-  ArenaVector<size_t> successors_visited(blocks_.size(), 0u, arena_->Adapter());
+  ArenaVector<size_t> successors_visited(blocks_.size(),
+                                         0u,
+                                         arena_->Adapter(kArenaAllocGraphBuilder));
   // Nodes for which we need to visit successors.
-  ArenaVector<HBasicBlock*> worklist(arena_->Adapter());
+  ArenaVector<HBasicBlock*> worklist(arena_->Adapter(kArenaAllocGraphBuilder));
   constexpr size_t kDefaultWorklistSize = 8;
   worklist.reserve(kDefaultWorklistSize);
   worklist.push_back(entry_block_);
@@ -175,47 +245,64 @@
       worklist.pop_back();
     } else {
       HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++];
-
-      if (successor->GetDominator() == nullptr) {
-        successor->SetDominator(current);
-      } else {
-        successor->SetDominator(FindCommonDominator(successor->GetDominator(), current));
-      }
+      UpdateDominatorOfSuccessor(current, successor);
 
       // Once all the forward edges have been visited, we know the immediate
       // dominator of the block. We can then start visiting its successors.
       if (++visits[successor->GetBlockId()] ==
           successor->GetPredecessors().size() - successor->NumberOfBackEdges()) {
-        successor->GetDominator()->AddDominatedBlock(successor);
         reverse_post_order_.push_back(successor);
         worklist.push_back(successor);
       }
     }
   }
-}
 
-HBasicBlock* HGraph::FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const {
-  ArenaBitVector visited(arena_, blocks_.size(), false);
-  // Walk the dominator tree of the first block and mark the visited blocks.
-  while (first != nullptr) {
-    visited.SetBit(first->GetBlockId());
-    first = first->GetDominator();
-  }
-  // Walk the dominator tree of the second block until a marked block is found.
-  while (second != nullptr) {
-    if (visited.IsBitSet(second->GetBlockId())) {
-      return second;
+  // Check if the graph has back edges not dominated by their respective headers.
+  // If so, we need to update the dominators of those headers and recursively of
+  // their successors. We do that with a fix-point iteration over all blocks.
+  // The algorithm is guaranteed to terminate because it loops only if the sum
+  // of all dominator chains has decreased in the current iteration.
+  bool must_run_fix_point = false;
+  for (HBasicBlock* block : blocks_) {
+    if (block != nullptr &&
+        block->IsLoopHeader() &&
+        block->GetLoopInformation()->HasBackEdgeNotDominatedByHeader()) {
+      must_run_fix_point = true;
+      break;
     }
-    second = second->GetDominator();
   }
-  LOG(ERROR) << "Could not find common dominator";
-  return nullptr;
-}
+  if (must_run_fix_point) {
+    bool update_occurred = true;
+    while (update_occurred) {
+      update_occurred = false;
+      for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
+        HBasicBlock* block = it.Current();
+        for (HBasicBlock* successor : block->GetSuccessors()) {
+          update_occurred |= UpdateDominatorOfSuccessor(block, successor);
+        }
+      }
+    }
+  }
 
-void HGraph::TransformToSsa() {
-  DCHECK(!reverse_post_order_.empty());
-  SsaBuilder ssa_builder(this);
-  ssa_builder.BuildSsa();
+  // Make sure that there are no remaining blocks whose dominator information
+  // needs to be updated.
+  if (kIsDebugBuild) {
+    for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
+      HBasicBlock* block = it.Current();
+      for (HBasicBlock* successor : block->GetSuccessors()) {
+        DCHECK(!UpdateDominatorOfSuccessor(block, successor));
+      }
+    }
+  }
+
+  // Populate `dominated_blocks_` information after computing all dominators.
+  // The potential presence of irreducible loops requires to do it after.
+  for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    if (!block->IsEntryBlock()) {
+      block->GetDominator()->AddDominatedBlock(block);
+    }
+  }
 }
 
 HBasicBlock* HGraph::SplitEdge(HBasicBlock* block, HBasicBlock* successor) {
@@ -247,9 +334,10 @@
 
   // Make sure the loop has only one pre header. This simplifies SSA building by having
   // to just look at the pre header to know which locals are initialized at entry of the
-  // loop.
+  // loop. Also, don't allow the entry block to be a pre header: this simplifies inlining
+  // this graph.
   size_t number_of_incomings = header->GetPredecessors().size() - info->NumberOfBackEdges();
-  if (number_of_incomings != 1) {
+  if (number_of_incomings != 1 || (GetEntryBlock()->GetSingleSuccessor() == header)) {
     HBasicBlock* pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
     AddBlock(pre_header);
     pre_header->AddInstruction(new (arena_) HGoto(header->GetDexPc()));
@@ -277,75 +365,10 @@
     }
   }
 
-  // Place the suspend check at the beginning of the header, so that live registers
-  // will be known when allocating registers. Note that code generation can still
-  // generate the suspend check at the back edge, but needs to be careful with
-  // loop phi spill slots (which are not written to at back edge).
   HInstruction* first_instruction = header->GetFirstInstruction();
-  if (!first_instruction->IsSuspendCheck()) {
-    HSuspendCheck* check = new (arena_) HSuspendCheck(header->GetDexPc());
-    header->InsertInstructionBefore(check, first_instruction);
-    first_instruction = check;
-  }
-  info->SetSuspendCheck(first_instruction->AsSuspendCheck());
-}
-
-static bool CheckIfPredecessorAtIsExceptional(const HBasicBlock& block, size_t pred_idx) {
-  HBasicBlock* predecessor = block.GetPredecessors()[pred_idx];
-  if (!predecessor->EndsWithTryBoundary()) {
-    // Only edges from HTryBoundary can be exceptional.
-    return false;
-  }
-  HTryBoundary* try_boundary = predecessor->GetLastInstruction()->AsTryBoundary();
-  if (try_boundary->GetNormalFlowSuccessor() == &block) {
-    // This block is the normal-flow successor of `try_boundary`, but it could
-    // also be one of its exception handlers if catch blocks have not been
-    // simplified yet. Predecessors are unordered, so we will consider the first
-    // occurrence to be the normal edge and a possible second occurrence to be
-    // the exceptional edge.
-    return !block.IsFirstIndexOfPredecessor(predecessor, pred_idx);
-  } else {
-    // This is not the normal-flow successor of `try_boundary`, hence it must be
-    // one of its exception handlers.
-    DCHECK(try_boundary->HasExceptionHandler(block));
-    return true;
-  }
-}
-
-void HGraph::SimplifyCatchBlocks() {
-  // NOTE: We're appending new blocks inside the loop, so we need to use index because iterators
-  // can be invalidated. We remember the initial size to avoid iterating over the new blocks.
-  for (size_t block_id = 0u, end = blocks_.size(); block_id != end; ++block_id) {
-    HBasicBlock* catch_block = blocks_[block_id];
-    if (!catch_block->IsCatchBlock()) {
-      continue;
-    }
-
-    bool exceptional_predecessors_only = true;
-    for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
-      if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
-        exceptional_predecessors_only = false;
-        break;
-      }
-    }
-
-    if (!exceptional_predecessors_only) {
-      // Catch block has normal-flow predecessors and needs to be simplified.
-      // Splitting the block before its first instruction moves all its
-      // instructions into `normal_block` and links the two blocks with a Goto.
-      // Afterwards, incoming normal-flow edges are re-linked to `normal_block`,
-      // leaving `catch_block` with the exceptional edges only.
-      // Note that catch blocks with normal-flow predecessors cannot begin with
-      // a MOVE_EXCEPTION instruction, as guaranteed by the verifier.
-      DCHECK(!catch_block->GetFirstInstruction()->IsLoadException());
-      HBasicBlock* normal_block = catch_block->SplitBefore(catch_block->GetFirstInstruction());
-      for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
-        if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
-          catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block);
-          --j;
-        }
-      }
-    }
+  if (first_instruction != nullptr && first_instruction->IsSuspendCheck()) {
+    // Called from DeadBlockElimination. Update SuspendCheck pointer.
+    info->SetSuspendCheck(first_instruction->AsSuspendCheck());
   }
 }
 
@@ -366,64 +389,111 @@
     HBasicBlock* first_predecessor = block->GetPredecessors()[0];
     DCHECK(!block->IsLoopHeader() || !block->GetLoopInformation()->IsBackEdge(*first_predecessor));
     const HTryBoundary* try_entry = first_predecessor->ComputeTryEntryOfSuccessors();
-    if (try_entry != nullptr) {
+    if (try_entry != nullptr &&
+        (block->GetTryCatchInformation() == nullptr ||
+         try_entry != &block->GetTryCatchInformation()->GetTryEntry())) {
+      // We are either setting try block membership for the first time or it
+      // has changed.
       block->SetTryCatchInformation(new (arena_) TryCatchInformation(*try_entry));
     }
   }
 }
 
 void HGraph::SimplifyCFG() {
-  // Simplify the CFG for future analysis, and code generation:
+// Simplify the CFG for future analysis, and code generation:
   // (1): Split critical edges.
-  // (2): Simplify loops by having only one back edge, and one preheader.
+  // (2): Simplify loops by having only one preheader.
   // NOTE: We're appending new blocks inside the loop, so we need to use index because iterators
   // can be invalidated. We remember the initial size to avoid iterating over the new blocks.
   for (size_t block_id = 0u, end = blocks_.size(); block_id != end; ++block_id) {
     HBasicBlock* block = blocks_[block_id];
     if (block == nullptr) continue;
-    if (block->NumberOfNormalSuccessors() > 1) {
-      for (size_t j = 0; j < block->GetSuccessors().size(); ++j) {
-        HBasicBlock* successor = block->GetSuccessors()[j];
+    if (block->GetSuccessors().size() > 1) {
+      // Only split normal-flow edges. We cannot split exceptional edges as they
+      // are synthesized (approximate real control flow), and we do not need to
+      // anyway. Moves that would be inserted there are performed by the runtime.
+      ArrayRef<HBasicBlock* const> normal_successors = block->GetNormalSuccessors();
+      for (size_t j = 0, e = normal_successors.size(); j < e; ++j) {
+        HBasicBlock* successor = normal_successors[j];
         DCHECK(!successor->IsCatchBlock());
-        if (successor->GetPredecessors().size() > 1) {
+        if (successor == exit_block_) {
+          // (Throw/Return/ReturnVoid)->TryBoundary->Exit. Special case which we
+          // do not want to split because Goto->Exit is not allowed.
+          DCHECK(block->IsSingleTryBoundary());
+        } else if (successor->GetPredecessors().size() > 1) {
           SplitCriticalEdge(block, successor);
-          --j;
+          // SplitCriticalEdge could have invalidated the `normal_successors`
+          // ArrayRef. We must re-acquire it.
+          normal_successors = block->GetNormalSuccessors();
+          DCHECK_EQ(normal_successors[j]->GetSingleSuccessor(), successor);
+          DCHECK_EQ(e, normal_successors.size());
         }
       }
     }
     if (block->IsLoopHeader()) {
       SimplifyLoop(block);
+    } else if (!block->IsEntryBlock() &&
+               block->GetFirstInstruction() != nullptr &&
+               block->GetFirstInstruction()->IsSuspendCheck()) {
+      // We are being called by the dead code elimiation pass, and what used to be
+      // a loop got dismantled. Just remove the suspend check.
+      block->RemoveInstruction(block->GetFirstInstruction());
     }
   }
 }
 
-bool HGraph::AnalyzeNaturalLoops() const {
-  // Order does not matter.
-  for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
+GraphAnalysisResult HGraph::AnalyzeLoops() const {
+  // We iterate post order to ensure we visit inner loops before outer loops.
+  // `PopulateRecursive` needs this guarantee to know whether a natural loop
+  // contains an irreducible loop.
+  for (HPostOrderIterator it(*this); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     if (block->IsLoopHeader()) {
       if (block->IsCatchBlock()) {
         // TODO: Dealing with exceptional back edges could be tricky because
         //       they only approximate the real control flow. Bail out for now.
-        return false;
+        return kAnalysisFailThrowCatchLoop;
       }
-      HLoopInformation* info = block->GetLoopInformation();
-      if (!info->Populate()) {
-        // Abort if the loop is non natural. We currently bailout in such cases.
-        return false;
-      }
+      block->GetLoopInformation()->Populate();
     }
   }
-  return true;
+  return kAnalysisSuccess;
+}
+
+void HLoopInformation::Dump(std::ostream& os) {
+  os << "header: " << header_->GetBlockId() << std::endl;
+  os << "pre header: " << GetPreHeader()->GetBlockId() << std::endl;
+  for (HBasicBlock* block : back_edges_) {
+    os << "back edge: " << block->GetBlockId() << std::endl;
+  }
+  for (HBasicBlock* block : header_->GetPredecessors()) {
+    os << "predecessor: " << block->GetBlockId() << std::endl;
+  }
+  for (uint32_t idx : blocks_.Indexes()) {
+    os << "  in loop: " << idx << std::endl;
+  }
 }
 
 void HGraph::InsertConstant(HConstant* constant) {
-  // New constants are inserted before the final control-flow instruction
-  // of the graph, or at its end if called from the graph builder.
-  if (entry_block_->EndsWithControlFlowInstruction()) {
-    entry_block_->InsertInstructionBefore(constant, entry_block_->GetLastInstruction());
-  } else {
+  // New constants are inserted before the SuspendCheck at the bottom of the
+  // entry block. Note that this method can be called from the graph builder and
+  // the entry block therefore may not end with SuspendCheck->Goto yet.
+  HInstruction* insert_before = nullptr;
+
+  HInstruction* gota = entry_block_->GetLastInstruction();
+  if (gota != nullptr && gota->IsGoto()) {
+    HInstruction* suspend_check = gota->GetPrevious();
+    if (suspend_check != nullptr && suspend_check->IsSuspendCheck()) {
+      insert_before = suspend_check;
+    } else {
+      insert_before = gota;
+    }
+  }
+
+  if (insert_before == nullptr) {
     entry_block_->AddInstruction(constant);
+  } else {
+    entry_block_->InsertInstructionBefore(constant, insert_before);
   }
 }
 
@@ -433,8 +503,13 @@
   // id and/or any invariants the graph is assuming when adding new instructions.
   if ((cached_null_constant_ == nullptr) || (cached_null_constant_->GetBlock() == nullptr)) {
     cached_null_constant_ = new (arena_) HNullConstant(dex_pc);
+    cached_null_constant_->SetReferenceTypeInfo(inexact_object_rti_);
     InsertConstant(cached_null_constant_);
   }
+  if (kIsDebugBuild) {
+    ScopedObjectAccess soa(Thread::Current());
+    DCHECK(cached_null_constant_->GetReferenceTypeInfo().IsValid());
+  }
   return cached_null_constant_;
 }
 
@@ -504,66 +579,126 @@
 
   blocks_.SetBit(block->GetBlockId());
   block->SetInLoop(this);
+  if (block->IsLoopHeader()) {
+    // We're visiting loops in post-order, so inner loops must have been
+    // populated already.
+    DCHECK(block->GetLoopInformation()->IsPopulated());
+    if (block->GetLoopInformation()->IsIrreducible()) {
+      contains_irreducible_loop_ = true;
+    }
+  }
   for (HBasicBlock* predecessor : block->GetPredecessors()) {
     PopulateRecursive(predecessor);
   }
 }
 
-bool HLoopInformation::Populate() {
-  DCHECK_EQ(blocks_.NumSetBits(), 0u) << "Loop information has already been populated";
-  for (HBasicBlock* back_edge : GetBackEdges()) {
-    DCHECK(back_edge->GetDominator() != nullptr);
-    if (!header_->Dominates(back_edge)) {
-      // This loop is not natural. Do not bother going further.
-      return false;
-    }
+void HLoopInformation::PopulateIrreducibleRecursive(HBasicBlock* block, ArenaBitVector* finalized) {
+  size_t block_id = block->GetBlockId();
 
-    // Populate this loop: starting with the back edge, recursively add predecessors
-    // that are not already part of that loop. Set the header as part of the loop
-    // to end the recursion.
-    // This is a recursive implementation of the algorithm described in
-    // "Advanced Compiler Design & Implementation" (Muchnick) p192.
-    blocks_.SetBit(header_->GetBlockId());
-    PopulateRecursive(back_edge);
+  // If `block` is in `finalized`, we know its membership in the loop has been
+  // decided and it does not need to be revisited.
+  if (finalized->IsBitSet(block_id)) {
+    return;
   }
-  return true;
-}
 
-void HLoopInformation::Update() {
-  HGraph* graph = header_->GetGraph();
-  for (uint32_t id : blocks_.Indexes()) {
-    HBasicBlock* block = graph->GetBlocks()[id];
-    // Reset loop information of non-header blocks inside the loop, except
-    // members of inner nested loops because those should already have been
-    // updated by their own LoopInformation.
-    if (block->GetLoopInformation() == this && block != header_) {
-      block->SetLoopInformation(nullptr);
-    }
-  }
-  blocks_.ClearAllBits();
+  bool is_finalized = false;
+  if (block->IsLoopHeader()) {
+    // If we hit a loop header in an irreducible loop, we first check if the
+    // pre header of that loop belongs to the currently analyzed loop. If it does,
+    // then we visit the back edges.
+    // Note that we cannot use GetPreHeader, as the loop may have not been populated
+    // yet.
+    HBasicBlock* pre_header = block->GetPredecessors()[0];
+    PopulateIrreducibleRecursive(pre_header, finalized);
+    if (blocks_.IsBitSet(pre_header->GetBlockId())) {
+      block->SetInLoop(this);
+      blocks_.SetBit(block_id);
+      finalized->SetBit(block_id);
+      is_finalized = true;
 
-  if (back_edges_.empty()) {
-    // The loop has been dismantled, delete its suspend check and remove info
-    // from the header.
-    DCHECK(HasSuspendCheck());
-    header_->RemoveInstruction(suspend_check_);
-    header_->SetLoopInformation(nullptr);
-    header_ = nullptr;
-    suspend_check_ = nullptr;
-  } else {
-    if (kIsDebugBuild) {
-      for (HBasicBlock* back_edge : back_edges_) {
-        DCHECK(header_->Dominates(back_edge));
+      HLoopInformation* info = block->GetLoopInformation();
+      for (HBasicBlock* back_edge : info->GetBackEdges()) {
+        PopulateIrreducibleRecursive(back_edge, finalized);
       }
     }
-    // This loop still has reachable back edges. Repopulate the list of blocks.
-    bool populate_successful = Populate();
-    DCHECK(populate_successful);
+  } else {
+    // Visit all predecessors. If one predecessor is part of the loop, this
+    // block is also part of this loop.
+    for (HBasicBlock* predecessor : block->GetPredecessors()) {
+      PopulateIrreducibleRecursive(predecessor, finalized);
+      if (!is_finalized && blocks_.IsBitSet(predecessor->GetBlockId())) {
+        block->SetInLoop(this);
+        blocks_.SetBit(block_id);
+        finalized->SetBit(block_id);
+        is_finalized = true;
+      }
+    }
+  }
+
+  // All predecessors have been recursively visited. Mark finalized if not marked yet.
+  if (!is_finalized) {
+    finalized->SetBit(block_id);
+  }
+}
+
+void HLoopInformation::Populate() {
+  DCHECK_EQ(blocks_.NumSetBits(), 0u) << "Loop information has already been populated";
+  // Populate this loop: starting with the back edge, recursively add predecessors
+  // that are not already part of that loop. Set the header as part of the loop
+  // to end the recursion.
+  // This is a recursive implementation of the algorithm described in
+  // "Advanced Compiler Design & Implementation" (Muchnick) p192.
+  HGraph* graph = header_->GetGraph();
+  blocks_.SetBit(header_->GetBlockId());
+  header_->SetInLoop(this);
+
+  bool is_irreducible_loop = HasBackEdgeNotDominatedByHeader();
+
+  if (is_irreducible_loop) {
+    ArenaBitVector visited(graph->GetArena(),
+                           graph->GetBlocks().size(),
+                           /* expandable */ false,
+                           kArenaAllocGraphBuilder);
+    // Stop marking blocks at the loop header.
+    visited.SetBit(header_->GetBlockId());
+
+    for (HBasicBlock* back_edge : GetBackEdges()) {
+      PopulateIrreducibleRecursive(back_edge, &visited);
+    }
+  } else {
+    for (HBasicBlock* back_edge : GetBackEdges()) {
+      PopulateRecursive(back_edge);
+    }
+  }
+
+  if (!is_irreducible_loop && graph->IsCompilingOsr()) {
+    // When compiling in OSR mode, all loops in the compiled method may be entered
+    // from the interpreter. We treat this OSR entry point just like an extra entry
+    // to an irreducible loop, so we need to mark the method's loops as irreducible.
+    // This does not apply to inlined loops which do not act as OSR entry points.
+    if (suspend_check_ == nullptr) {
+      // Just building the graph in OSR mode, this loop is not inlined. We never build an
+      // inner graph in OSR mode as we can do OSR transition only from the outer method.
+      is_irreducible_loop = true;
+    } else {
+      // Look at the suspend check's environment to determine if the loop was inlined.
+      DCHECK(suspend_check_->HasEnvironment());
+      if (!suspend_check_->GetEnvironment()->IsFromInlinedInvoke()) {
+        is_irreducible_loop = true;
+      }
+    }
+  }
+  if (is_irreducible_loop) {
+    irreducible_ = true;
+    contains_irreducible_loop_ = true;
+    graph->SetHasIrreducibleLoops(true);
   }
 }
 
 HBasicBlock* HLoopInformation::GetPreHeader() const {
-  return header_->GetDominator();
+  HBasicBlock* block = header_->GetPredecessors()[0];
+  DCHECK(irreducible_ || (block == header_->GetDominator()));
+  return block;
 }
 
 bool HLoopInformation::Contains(const HBasicBlock& block) const {
@@ -574,6 +709,10 @@
   return other.blocks_.IsBitSet(header_->GetBlockId());
 }
 
+bool HLoopInformation::IsDefinedOutOfTheLoop(HInstruction* instruction) const {
+  return !blocks_.IsBitSet(instruction->GetBlock()->GetBlockId());
+}
+
 size_t HLoopInformation::GetLifetimeEnd() const {
   size_t last_position = 0;
   for (HBasicBlock* back_edge : GetBackEdges()) {
@@ -582,6 +721,25 @@
   return last_position;
 }
 
+bool HLoopInformation::HasBackEdgeNotDominatedByHeader() const {
+  for (HBasicBlock* back_edge : GetBackEdges()) {
+    DCHECK(back_edge->GetDominator() != nullptr);
+    if (!header_->Dominates(back_edge)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool HLoopInformation::DominatesAllBackEdges(HBasicBlock* block) {
+  for (HBasicBlock* back_edge : GetBackEdges()) {
+    if (!block->Dominates(back_edge)) {
+      return false;
+    }
+  }
+  return true;
+}
+
 bool HBasicBlock::Dominates(HBasicBlock* other) const {
   // Walk up the dominator tree from `other`, to find out if `this`
   // is an ancestor.
@@ -596,8 +754,9 @@
 }
 
 static void UpdateInputsUsers(HInstruction* instruction) {
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
-    instruction->InputAt(i)->AddUseAt(instruction, i);
+  HInputsRef inputs = instruction->GetInputs();
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    inputs[i]->AddUseAt(instruction, i);
   }
   // Environment should be created later.
   DCHECK(!instruction->HasEnvironment());
@@ -613,8 +772,8 @@
     DCHECK_EQ(replacement->GetType(), Primitive::kPrimVoid);
     DCHECK_EQ(initial->GetBlock(), this);
     DCHECK_EQ(initial->GetType(), Primitive::kPrimVoid);
-    DCHECK(initial->GetUses().IsEmpty());
-    DCHECK(initial->GetEnvUses().IsEmpty());
+    DCHECK(initial->GetUses().empty());
+    DCHECK(initial->GetEnvUses().empty());
     replacement->SetBlock(this);
     replacement->SetId(GetGraph()->GetNextInstructionId());
     instructions_.InsertInstructionBefore(replacement, initial);
@@ -690,8 +849,8 @@
   instruction->SetBlock(nullptr);
   instruction_list->RemoveInstruction(instruction);
   if (ensure_safety) {
-    DCHECK(instruction->GetUses().IsEmpty());
-    DCHECK(instruction->GetEnvUses().IsEmpty());
+    DCHECK(instruction->GetUses().empty());
+    DCHECK(instruction->GetEnvUses().empty());
     RemoveAsUser(instruction);
   }
 }
@@ -746,7 +905,6 @@
       // At the end of the loop pre-header, the corresponding value for instruction
       // is the first input of the phi.
       HInstruction* initial = instruction->AsPhi()->InputAt(0);
-      DCHECK(initial->GetBlock()->Dominates(loop_header));
       SetRawEnvAt(i, initial);
       initial->AddEnvUseAt(this, i);
     } else {
@@ -756,8 +914,15 @@
 }
 
 void HEnvironment::RemoveAsUserOfInput(size_t index) const {
-  const HUserRecord<HEnvironment*>& user_record = vregs_[index];
-  user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode());
+  const HUserRecord<HEnvironment*>& env_use = vregs_[index];
+  HInstruction* user = env_use.GetInstruction();
+  auto before_env_use_node = env_use.GetBeforeUseNode();
+  user->env_uses_.erase_after(before_env_use_node);
+  user->FixUpUserRecordsAfterEnvUseRemoval(before_env_use_node);
+}
+
+HInstruction::InstructionKind HInstruction::GetKind() const {
+  return GetKindInternal();
 }
 
 HInstruction* HInstruction::GetNextDisregardingMoves() const {
@@ -891,32 +1056,43 @@
   }
 }
 
+void HInstruction::RemoveEnvironment() {
+  RemoveEnvironmentUses(this);
+  environment_ = nullptr;
+}
+
 void HInstruction::ReplaceWith(HInstruction* other) {
   DCHECK(other != nullptr);
-  for (HUseIterator<HInstruction*> it(GetUses()); !it.Done(); it.Advance()) {
-    HUseListNode<HInstruction*>* current = it.Current();
-    HInstruction* user = current->GetUser();
-    size_t input_index = current->GetIndex();
-    user->SetRawInputAt(input_index, other);
-    other->AddUseAt(user, input_index);
-  }
+  // Note: fixup_end remains valid across splice_after().
+  auto fixup_end = other->uses_.empty() ? other->uses_.begin() : ++other->uses_.begin();
+  other->uses_.splice_after(other->uses_.before_begin(), uses_);
+  other->FixUpUserRecordsAfterUseInsertion(fixup_end);
 
-  for (HUseIterator<HEnvironment*> it(GetEnvUses()); !it.Done(); it.Advance()) {
-    HUseListNode<HEnvironment*>* current = it.Current();
-    HEnvironment* user = current->GetUser();
-    size_t input_index = current->GetIndex();
-    user->SetRawEnvAt(input_index, other);
-    other->AddEnvUseAt(user, input_index);
-  }
+  // Note: env_fixup_end remains valid across splice_after().
+  auto env_fixup_end =
+      other->env_uses_.empty() ? other->env_uses_.begin() : ++other->env_uses_.begin();
+  other->env_uses_.splice_after(other->env_uses_.before_begin(), env_uses_);
+  other->FixUpUserRecordsAfterEnvUseInsertion(env_fixup_end);
 
-  uses_.Clear();
-  env_uses_.Clear();
+  DCHECK(uses_.empty());
+  DCHECK(env_uses_.empty());
 }
 
 void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) {
-  RemoveAsUserOfInput(index);
-  SetRawInputAt(index, replacement);
-  replacement->AddUseAt(this, index);
+  HUserRecord<HInstruction*> input_use = InputRecordAt(index);
+  if (input_use.GetInstruction() == replacement) {
+    // Nothing to do.
+    return;
+  }
+  HUseList<HInstruction*>::iterator before_use_node = input_use.GetBeforeUseNode();
+  // Note: fixup_end remains valid across splice_after().
+  auto fixup_end =
+      replacement->uses_.empty() ? replacement->uses_.begin() : ++replacement->uses_.begin();
+  replacement->uses_.splice_after(replacement->uses_.before_begin(),
+                                  input_use.GetInstruction()->uses_,
+                                  before_use_node);
+  replacement->FixUpUserRecordsAfterUseInsertion(fixup_end);
+  input_use.GetInstruction()->FixUpUserRecordsAfterUseRemoval(before_use_node);
 }
 
 size_t HInstruction::EnvironmentSize() const {
@@ -932,9 +1108,10 @@
 void HPhi::RemoveInputAt(size_t index) {
   RemoveAsUserOfInput(index);
   inputs_.erase(inputs_.begin() + index);
-  for (size_t i = index, e = InputCount(); i < e; ++i) {
-    DCHECK_EQ(InputRecordAt(i).GetUseNode()->GetIndex(), i + 1u);
-    InputRecordAt(i).GetUseNode()->SetIndex(i);
+  // Update indexes in use nodes of inputs that have been pulled forward by the erase().
+  for (size_t i = index, e = inputs_.size(); i < e; ++i) {
+    DCHECK_EQ(inputs_[i].GetUseNode()->GetIndex(), i + 1u);
+    inputs_[i].GetUseNode()->SetIndex(i);
   }
 }
 
@@ -943,7 +1120,7 @@
   visitor->Visit##name(this);                                                  \
 }
 
-FOR_EACH_INSTRUCTION(DEFINE_ACCEPT)
+FOR_EACH_CONCRETE_INSTRUCTION(DEFINE_ACCEPT)
 
 #undef DEFINE_ACCEPT
 
@@ -1054,23 +1231,37 @@
     return Evaluate(GetInput()->AsIntConstant());
   } else if (GetInput()->IsLongConstant()) {
     return Evaluate(GetInput()->AsLongConstant());
+  } else if (kEnableFloatingPointStaticEvaluation) {
+    if (GetInput()->IsFloatConstant()) {
+      return Evaluate(GetInput()->AsFloatConstant());
+    } else if (GetInput()->IsDoubleConstant()) {
+      return Evaluate(GetInput()->AsDoubleConstant());
+    }
   }
   return nullptr;
 }
 
 HConstant* HBinaryOperation::TryStaticEvaluation() const {
-  if (GetLeft()->IsIntConstant()) {
-    if (GetRight()->IsIntConstant()) {
-      return Evaluate(GetLeft()->AsIntConstant(), GetRight()->AsIntConstant());
-    } else if (GetRight()->IsLongConstant()) {
-      return Evaluate(GetLeft()->AsIntConstant(), GetRight()->AsLongConstant());
-    }
+  if (GetLeft()->IsIntConstant() && GetRight()->IsIntConstant()) {
+    return Evaluate(GetLeft()->AsIntConstant(), GetRight()->AsIntConstant());
   } else if (GetLeft()->IsLongConstant()) {
     if (GetRight()->IsIntConstant()) {
+      // The binop(long, int) case is only valid for shifts and rotations.
+      DCHECK(IsShl() || IsShr() || IsUShr() || IsRor()) << DebugName();
       return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsIntConstant());
     } else if (GetRight()->IsLongConstant()) {
       return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsLongConstant());
     }
+  } else if (GetLeft()->IsNullConstant() && GetRight()->IsNullConstant()) {
+    // The binop(null, null) case is only valid for equal and not-equal conditions.
+    DCHECK(IsEqual() || IsNotEqual()) << DebugName();
+    return Evaluate(GetLeft()->AsNullConstant(), GetRight()->AsNullConstant());
+  } else if (kEnableFloatingPointStaticEvaluation) {
+    if (GetLeft()->IsFloatConstant() && GetRight()->IsFloatConstant()) {
+      return Evaluate(GetLeft()->AsFloatConstant(), GetRight()->AsFloatConstant());
+    } else if (GetLeft()->IsDoubleConstant() && GetRight()->IsDoubleConstant()) {
+      return Evaluate(GetLeft()->AsDoubleConstant(), GetRight()->AsDoubleConstant());
+    }
   }
   return nullptr;
 }
@@ -1098,20 +1289,36 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs) {
+  switch (rhs) {
+    case ComparisonBias::kNoBias:
+      return os << "no_bias";
+    case ComparisonBias::kGtBias:
+      return os << "gt_bias";
+    case ComparisonBias::kLtBias:
+      return os << "lt_bias";
+    default:
+      LOG(FATAL) << "Unknown ComparisonBias: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
 bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const {
   return this == instruction->GetPreviousDisregardingMoves();
 }
 
-bool HInstruction::Equals(HInstruction* other) const {
+bool HInstruction::Equals(const HInstruction* other) const {
   if (!InstructionTypeEquals(other)) return false;
   DCHECK_EQ(GetKind(), other->GetKind());
   if (!InstructionDataEquals(other)) return false;
   if (GetType() != other->GetType()) return false;
-  if (InputCount() != other->InputCount()) return false;
-
-  for (size_t i = 0, e = InputCount(); i < e; ++i) {
-    if (InputAt(i) != other->InputAt(i)) return false;
+  HConstInputsRef inputs = GetInputs();
+  HConstInputsRef other_inputs = other->GetInputs();
+  if (inputs.size() != other_inputs.size()) return false;
+  for (size_t i = 0; i != inputs.size(); ++i) {
+    if (inputs[i] != other_inputs[i]) return false;
   }
+
   DCHECK_EQ(ComputeHashCode(), other->ComputeHashCode());
   return true;
 }
@@ -1129,6 +1336,11 @@
 }
 
 void HInstruction::MoveBefore(HInstruction* cursor) {
+  DCHECK(!IsPhi());
+  DCHECK(!IsControlFlow());
+  DCHECK(CanBeMoved());
+  DCHECK(!cursor->IsPhi());
+
   next_->previous_ = previous_;
   if (previous_ != nullptr) {
     previous_->next_ = next_;
@@ -1151,8 +1363,62 @@
   }
 }
 
+void HInstruction::MoveBeforeFirstUserAndOutOfLoops() {
+  DCHECK(!CanThrow());
+  DCHECK(!HasSideEffects());
+  DCHECK(!HasEnvironmentUses());
+  DCHECK(HasNonEnvironmentUses());
+  DCHECK(!IsPhi());  // Makes no sense for Phi.
+  DCHECK_EQ(InputCount(), 0u);
+
+  // Find the target block.
+  auto uses_it = GetUses().begin();
+  auto uses_end = GetUses().end();
+  HBasicBlock* target_block = uses_it->GetUser()->GetBlock();
+  ++uses_it;
+  while (uses_it != uses_end && uses_it->GetUser()->GetBlock() == target_block) {
+    ++uses_it;
+  }
+  if (uses_it != uses_end) {
+    // This instruction has uses in two or more blocks. Find the common dominator.
+    CommonDominator finder(target_block);
+    for (; uses_it != uses_end; ++uses_it) {
+      finder.Update(uses_it->GetUser()->GetBlock());
+    }
+    target_block = finder.Get();
+    DCHECK(target_block != nullptr);
+  }
+  // Move to the first dominator not in a loop.
+  while (target_block->IsInLoop()) {
+    target_block = target_block->GetDominator();
+    DCHECK(target_block != nullptr);
+  }
+
+  // Find insertion position.
+  HInstruction* insert_pos = nullptr;
+  for (const HUseListNode<HInstruction*>& use : GetUses()) {
+    if (use.GetUser()->GetBlock() == target_block &&
+        (insert_pos == nullptr || use.GetUser()->StrictlyDominates(insert_pos))) {
+      insert_pos = use.GetUser();
+    }
+  }
+  if (insert_pos == nullptr) {
+    // No user in `target_block`, insert before the control flow instruction.
+    insert_pos = target_block->GetLastInstruction();
+    DCHECK(insert_pos->IsControlFlow());
+    // Avoid splitting HCondition from HIf to prevent unnecessary materialization.
+    if (insert_pos->IsIf()) {
+      HInstruction* if_input = insert_pos->AsIf()->InputAt(0);
+      if (if_input == insert_pos->GetPrevious()) {
+        insert_pos = if_input;
+      }
+    }
+  }
+  MoveBefore(insert_pos);
+}
+
 HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) {
-  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented";
+  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
   DCHECK_EQ(cursor->GetBlock(), this);
 
   HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(),
@@ -1182,7 +1448,7 @@
 }
 
 HBasicBlock* HBasicBlock::CreateImmediateDominator() {
-  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented";
+  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
   DCHECK(!IsCatchBlock()) << "Support for updating try/catch information not implemented.";
 
   HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), GetDexPc());
@@ -1198,7 +1464,38 @@
   return new_block;
 }
 
-HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) {
+HBasicBlock* HBasicBlock::SplitBeforeForInlining(HInstruction* cursor) {
+  DCHECK_EQ(cursor->GetBlock(), this);
+
+  HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(),
+                                                                    cursor->GetDexPc());
+  new_block->instructions_.first_instruction_ = cursor;
+  new_block->instructions_.last_instruction_ = instructions_.last_instruction_;
+  instructions_.last_instruction_ = cursor->previous_;
+  if (cursor->previous_ == nullptr) {
+    instructions_.first_instruction_ = nullptr;
+  } else {
+    cursor->previous_->next_ = nullptr;
+    cursor->previous_ = nullptr;
+  }
+
+  new_block->instructions_.SetBlockOfInstructions(new_block);
+
+  for (HBasicBlock* successor : GetSuccessors()) {
+    new_block->successors_.push_back(successor);
+    successor->predecessors_[successor->GetPredecessorIndexOf(this)] = new_block;
+  }
+  successors_.clear();
+
+  for (HBasicBlock* dominated : GetDominatedBlocks()) {
+    dominated->dominator_ = new_block;
+    new_block->dominated_blocks_.push_back(dominated);
+  }
+  dominated_blocks_.clear();
+  return new_block;
+}
+
+HBasicBlock* HBasicBlock::SplitAfterForInlining(HInstruction* cursor) {
   DCHECK(!cursor->IsControlFlow());
   DCHECK_NE(instructions_.last_instruction_, cursor);
   DCHECK_EQ(cursor->GetBlock(), this);
@@ -1282,17 +1579,38 @@
   return !GetPhis().IsEmpty() && GetFirstPhi()->GetNext() == nullptr;
 }
 
+ArrayRef<HBasicBlock* const> HBasicBlock::GetNormalSuccessors() const {
+  if (EndsWithTryBoundary()) {
+    // The normal-flow successor of HTryBoundary is always stored at index zero.
+    DCHECK_EQ(successors_[0], GetLastInstruction()->AsTryBoundary()->GetNormalFlowSuccessor());
+    return ArrayRef<HBasicBlock* const>(successors_).SubArray(0u, 1u);
+  } else {
+    // All successors of blocks not ending with TryBoundary are normal.
+    return ArrayRef<HBasicBlock* const>(successors_);
+  }
+}
+
+ArrayRef<HBasicBlock* const> HBasicBlock::GetExceptionalSuccessors() const {
+  if (EndsWithTryBoundary()) {
+    return GetLastInstruction()->AsTryBoundary()->GetExceptionHandlers();
+  } else {
+    // Blocks not ending with TryBoundary do not have exceptional successors.
+    return ArrayRef<HBasicBlock* const>();
+  }
+}
+
 bool HTryBoundary::HasSameExceptionHandlersAs(const HTryBoundary& other) const {
-  if (GetBlock()->GetSuccessors().size() != other.GetBlock()->GetSuccessors().size()) {
+  ArrayRef<HBasicBlock* const> handlers1 = GetExceptionHandlers();
+  ArrayRef<HBasicBlock* const> handlers2 = other.GetExceptionHandlers();
+
+  size_t length = handlers1.size();
+  if (length != handlers2.size()) {
     return false;
   }
 
   // Exception handlers need to be stored in the same order.
-  for (HExceptionHandlerIterator it1(*this), it2(other);
-       !it1.Done();
-       it1.Advance(), it2.Advance()) {
-    DCHECK(!it2.Done());
-    if (it1.Current() != it2.Current()) {
+  for (size_t i = 0; i < length; ++i) {
+    if (handlers1[i] != handlers2[i]) {
       return false;
     }
   }
@@ -1330,6 +1648,20 @@
   }
 }
 
+void HInstructionList::AddBefore(HInstruction* cursor, const HInstructionList& instruction_list) {
+  DCHECK(Contains(cursor));
+  if (!instruction_list.IsEmpty()) {
+    if (cursor == first_instruction_) {
+      first_instruction_ = instruction_list.first_instruction_;
+    } else {
+      cursor->previous_->next_ = instruction_list.first_instruction_;
+    }
+    instruction_list.last_instruction_->next_ = cursor;
+    instruction_list.first_instruction_->previous_ = cursor->previous_;
+    cursor->previous_ = instruction_list.last_instruction_;
+  }
+}
+
 void HInstructionList::Add(const HInstructionList& instruction_list) {
   if (IsEmpty()) {
     first_instruction_ = instruction_list.first_instruction_;
@@ -1339,52 +1671,48 @@
   }
 }
 
+// Should be called on instructions in a dead block in post order. This method
+// assumes `insn` has been removed from all users with the exception of catch
+// phis because of missing exceptional edges in the graph. It removes the
+// instruction from catch phi uses, together with inputs of other catch phis in
+// the catch block at the same index, as these must be dead too.
+static void RemoveUsesOfDeadInstruction(HInstruction* insn) {
+  DCHECK(!insn->HasEnvironmentUses());
+  while (insn->HasNonEnvironmentUses()) {
+    const HUseListNode<HInstruction*>& use = insn->GetUses().front();
+    size_t use_index = use.GetIndex();
+    HBasicBlock* user_block =  use.GetUser()->GetBlock();
+    DCHECK(use.GetUser()->IsPhi() && user_block->IsCatchBlock());
+    for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+      phi_it.Current()->AsPhi()->RemoveInputAt(use_index);
+    }
+  }
+}
+
 void HBasicBlock::DisconnectAndDelete() {
   // Dominators must be removed after all the blocks they dominate. This way
   // a loop header is removed last, a requirement for correct loop information
   // iteration.
   DCHECK(dominated_blocks_.empty());
 
-  // Remove the block from all loops it is included in.
-  for (HLoopInformationOutwardIterator it(*this); !it.Done(); it.Advance()) {
-    HLoopInformation* loop_info = it.Current();
-    loop_info->Remove(this);
-    if (loop_info->IsBackEdge(*this)) {
-      // If this was the last back edge of the loop, we deliberately leave the
-      // loop in an inconsistent state and will fail SSAChecker unless the
-      // entire loop is removed during the pass.
-      loop_info->RemoveBackEdge(this);
-    }
+  // The following steps gradually remove the block from all its dependants in
+  // post order (b/27683071).
+
+  // (1) Store a basic block that we'll use in step (5) to find loops to be updated.
+  //     We need to do this before step (4) which destroys the predecessor list.
+  HBasicBlock* loop_update_start = this;
+  if (IsLoopHeader()) {
+    HLoopInformation* loop_info = GetLoopInformation();
+    // All other blocks in this loop should have been removed because the header
+    // was their dominator.
+    // Note that we do not remove `this` from `loop_info` as it is unreachable.
+    DCHECK(!loop_info->IsIrreducible());
+    DCHECK_EQ(loop_info->GetBlocks().NumSetBits(), 1u);
+    DCHECK_EQ(static_cast<uint32_t>(loop_info->GetBlocks().GetHighestBitSet()), GetBlockId());
+    loop_update_start = loop_info->GetPreHeader();
   }
 
-  // Disconnect the block from its predecessors and update their control-flow
-  // instructions.
-  for (HBasicBlock* predecessor : predecessors_) {
-    HInstruction* last_instruction = predecessor->GetLastInstruction();
-    predecessor->RemoveSuccessor(this);
-    uint32_t num_pred_successors = predecessor->GetSuccessors().size();
-    if (num_pred_successors == 1u) {
-      // If we have one successor after removing one, then we must have
-      // had an HIf or HPackedSwitch, as they have more than one successor.
-      // Replace those with a HGoto.
-      DCHECK(last_instruction->IsIf() || last_instruction->IsPackedSwitch());
-      predecessor->RemoveInstruction(last_instruction);
-      predecessor->AddInstruction(new (graph_->GetArena()) HGoto(last_instruction->GetDexPc()));
-    } else if (num_pred_successors == 0u) {
-      // The predecessor has no remaining successors and therefore must be dead.
-      // We deliberately leave it without a control-flow instruction so that the
-      // SSAChecker fails unless it is not removed during the pass too.
-      predecessor->RemoveInstruction(last_instruction);
-    } else {
-      // There are multiple successors left.  This must come from a HPackedSwitch
-      // and we are in the middle of removing the HPackedSwitch. Like above, leave
-      // this alone, and the SSAChecker will fail if it is not removed as well.
-      DCHECK(last_instruction->IsPackedSwitch());
-    }
-  }
-  predecessors_.clear();
-
-  // Disconnect the block from its successors and update their phis.
+  // (2) Disconnect the block from its successors and update their phis.
   for (HBasicBlock* successor : successors_) {
     // Delete this block from the list of predecessors.
     size_t this_index = successor->GetPredecessorIndexOf(this);
@@ -1394,30 +1722,113 @@
     // dominator of `successor` which violates the order DCHECKed at the top.
     DCHECK(!successor->predecessors_.empty());
 
-    // Remove this block's entries in the successor's phis.
-    if (successor->predecessors_.size() == 1u) {
-      // The successor has just one predecessor left. Replace phis with the only
-      // remaining input.
-      for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
-        HPhi* phi = phi_it.Current()->AsPhi();
-        phi->ReplaceWith(phi->InputAt(1 - this_index));
-        successor->RemovePhi(phi);
-      }
-    } else {
-      for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
-        phi_it.Current()->AsPhi()->RemoveInputAt(this_index);
+    // Remove this block's entries in the successor's phis. Skip exceptional
+    // successors because catch phi inputs do not correspond to predecessor
+    // blocks but throwing instructions. The inputs of the catch phis will be
+    // updated in step (3).
+    if (!successor->IsCatchBlock()) {
+      if (successor->predecessors_.size() == 1u) {
+        // The successor has just one predecessor left. Replace phis with the only
+        // remaining input.
+        for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+          HPhi* phi = phi_it.Current()->AsPhi();
+          phi->ReplaceWith(phi->InputAt(1 - this_index));
+          successor->RemovePhi(phi);
+        }
+      } else {
+        for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+          phi_it.Current()->AsPhi()->RemoveInputAt(this_index);
+        }
       }
     }
   }
   successors_.clear();
 
-  // Disconnect from the dominator.
+  // (3) Remove instructions and phis. Instructions should have no remaining uses
+  //     except in catch phis. If an instruction is used by a catch phi at `index`,
+  //     remove `index`-th input of all phis in the catch block since they are
+  //     guaranteed dead. Note that we may miss dead inputs this way but the
+  //     graph will always remain consistent.
+  for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) {
+    HInstruction* insn = it.Current();
+    RemoveUsesOfDeadInstruction(insn);
+    RemoveInstruction(insn);
+  }
+  for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) {
+    HPhi* insn = it.Current()->AsPhi();
+    RemoveUsesOfDeadInstruction(insn);
+    RemovePhi(insn);
+  }
+
+  // (4) Disconnect the block from its predecessors and update their
+  //     control-flow instructions.
+  for (HBasicBlock* predecessor : predecessors_) {
+    // We should not see any back edges as they would have been removed by step (3).
+    DCHECK(!IsInLoop() || !GetLoopInformation()->IsBackEdge(*predecessor));
+
+    HInstruction* last_instruction = predecessor->GetLastInstruction();
+    if (last_instruction->IsTryBoundary() && !IsCatchBlock()) {
+      // This block is the only normal-flow successor of the TryBoundary which
+      // makes `predecessor` dead. Since DCE removes blocks in post order,
+      // exception handlers of this TryBoundary were already visited and any
+      // remaining handlers therefore must be live. We remove `predecessor` from
+      // their list of predecessors.
+      DCHECK_EQ(last_instruction->AsTryBoundary()->GetNormalFlowSuccessor(), this);
+      while (predecessor->GetSuccessors().size() > 1) {
+        HBasicBlock* handler = predecessor->GetSuccessors()[1];
+        DCHECK(handler->IsCatchBlock());
+        predecessor->RemoveSuccessor(handler);
+        handler->RemovePredecessor(predecessor);
+      }
+    }
+
+    predecessor->RemoveSuccessor(this);
+    uint32_t num_pred_successors = predecessor->GetSuccessors().size();
+    if (num_pred_successors == 1u) {
+      // If we have one successor after removing one, then we must have
+      // had an HIf, HPackedSwitch or HTryBoundary, as they have more than one
+      // successor. Replace those with a HGoto.
+      DCHECK(last_instruction->IsIf() ||
+             last_instruction->IsPackedSwitch() ||
+             (last_instruction->IsTryBoundary() && IsCatchBlock()));
+      predecessor->RemoveInstruction(last_instruction);
+      predecessor->AddInstruction(new (graph_->GetArena()) HGoto(last_instruction->GetDexPc()));
+    } else if (num_pred_successors == 0u) {
+      // The predecessor has no remaining successors and therefore must be dead.
+      // We deliberately leave it without a control-flow instruction so that the
+      // GraphChecker fails unless it is not removed during the pass too.
+      predecessor->RemoveInstruction(last_instruction);
+    } else {
+      // There are multiple successors left. The removed block might be a successor
+      // of a PackedSwitch which will be completely removed (perhaps replaced with
+      // a Goto), or we are deleting a catch block from a TryBoundary. In either
+      // case, leave `last_instruction` as is for now.
+      DCHECK(last_instruction->IsPackedSwitch() ||
+             (last_instruction->IsTryBoundary() && IsCatchBlock()));
+    }
+  }
+  predecessors_.clear();
+
+  // (5) Remove the block from all loops it is included in. Skip the inner-most
+  //     loop if this is the loop header (see definition of `loop_update_start`)
+  //     because the loop header's predecessor list has been destroyed in step (4).
+  for (HLoopInformationOutwardIterator it(*loop_update_start); !it.Done(); it.Advance()) {
+    HLoopInformation* loop_info = it.Current();
+    loop_info->Remove(this);
+    if (loop_info->IsBackEdge(*this)) {
+      // If this was the last back edge of the loop, we deliberately leave the
+      // loop in an inconsistent state and will fail GraphChecker unless the
+      // entire loop is removed during the pass.
+      loop_info->RemoveBackEdge(this);
+    }
+  }
+
+  // (6) Disconnect from the dominator.
   dominator_->RemoveDominatedBlock(this);
   SetDominator(nullptr);
 
-  // Delete from the graph. The function safely deletes remaining instructions
-  // and updates the reverse post order.
-  graph_->DeleteDeadBlock(this);
+  // (7) Delete from the graph, update reverse post order.
+  graph_->DeleteDeadEmptyBlock(this);
   SetGraph(nullptr);
 }
 
@@ -1464,7 +1875,7 @@
   other->predecessors_.clear();
 
   // Delete `other` from the graph. The function updates reverse post order.
-  graph_->DeleteDeadBlock(other);
+  graph_->DeleteDeadEmptyBlock(other);
   other->SetGraph(nullptr);
 }
 
@@ -1516,38 +1927,58 @@
   graph_ = nullptr;
 }
 
-// Create space in `blocks` for adding `number_of_new_blocks` entries
-// starting at location `at`. Blocks after `at` are moved accordingly.
-static void MakeRoomFor(ArenaVector<HBasicBlock*>* blocks,
-                        size_t number_of_new_blocks,
-                        size_t after) {
-  DCHECK_LT(after, blocks->size());
-  size_t old_size = blocks->size();
-  size_t new_size = old_size + number_of_new_blocks;
-  blocks->resize(new_size);
-  std::copy_backward(blocks->begin() + after + 1u, blocks->begin() + old_size, blocks->end());
-}
-
-void HGraph::DeleteDeadBlock(HBasicBlock* block) {
+void HGraph::DeleteDeadEmptyBlock(HBasicBlock* block) {
   DCHECK_EQ(block->GetGraph(), this);
   DCHECK(block->GetSuccessors().empty());
   DCHECK(block->GetPredecessors().empty());
   DCHECK(block->GetDominatedBlocks().empty());
   DCHECK(block->GetDominator() == nullptr);
-
-  for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-    block->RemoveInstruction(it.Current());
-  }
-  for (HBackwardInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-    block->RemovePhi(it.Current()->AsPhi());
-  }
+  DCHECK(block->GetInstructions().IsEmpty());
+  DCHECK(block->GetPhis().IsEmpty());
 
   if (block->IsExitBlock()) {
-    exit_block_ = nullptr;
+    SetExitBlock(nullptr);
   }
 
   RemoveElement(reverse_post_order_, block);
   blocks_[block->GetBlockId()] = nullptr;
+  block->SetGraph(nullptr);
+}
+
+void HGraph::UpdateLoopAndTryInformationOfNewBlock(HBasicBlock* block,
+                                                   HBasicBlock* reference,
+                                                   bool replace_if_back_edge) {
+  if (block->IsLoopHeader()) {
+    // Clear the information of which blocks are contained in that loop. Since the
+    // information is stored as a bit vector based on block ids, we have to update
+    // it, as those block ids were specific to the callee graph and we are now adding
+    // these blocks to the caller graph.
+    block->GetLoopInformation()->ClearAllBlocks();
+  }
+
+  // If not already in a loop, update the loop information.
+  if (!block->IsInLoop()) {
+    block->SetLoopInformation(reference->GetLoopInformation());
+  }
+
+  // If the block is in a loop, update all its outward loops.
+  HLoopInformation* loop_info = block->GetLoopInformation();
+  if (loop_info != nullptr) {
+    for (HLoopInformationOutwardIterator loop_it(*block);
+         !loop_it.Done();
+         loop_it.Advance()) {
+      loop_it.Current()->Add(block);
+    }
+    if (replace_if_back_edge && loop_info->IsBackEdge(*reference)) {
+      loop_info->ReplaceBackEdge(reference, block);
+    }
+  }
+
+  // Copy TryCatchInformation if `reference` is a try block, not if it is a catch block.
+  TryCatchInformation* try_catch_info = reference->IsTryBlock()
+      ? reference->GetTryCatchInformation()
+      : nullptr;
+  block->SetTryCatchInformation(try_catch_info);
 }
 
 HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
@@ -1564,6 +1995,7 @@
            instr_it.Advance()) {
         HInstruction* current = instr_it.Current();
         if (current->NeedsEnvironment()) {
+          DCHECK(current->HasEnvironment());
           current->GetEnvironment()->SetAndCopyParentChain(
               outer_graph->GetArena(), invoke->GetEnvironment());
         }
@@ -1583,9 +2015,11 @@
     DCHECK(GetBlocks()[0]->IsEntryBlock());
     DCHECK(GetBlocks()[2]->IsExitBlock());
     DCHECK(!body->IsExitBlock());
+    DCHECK(!body->IsInLoop());
     HInstruction* last = body->GetLastInstruction();
 
-    invoke->GetBlock()->instructions_.AddAfter(invoke, body->GetInstructions());
+    // Note that we add instructions before the invoke only to simplify polymorphic inlining.
+    invoke->GetBlock()->instructions_.AddBefore(invoke, body->GetInstructions());
     body->GetInstructions().SetBlockOfInstructions(invoke->GetBlock());
 
     // Replace the invoke with the return value of the inlined graph.
@@ -1603,13 +2037,59 @@
     // with the second half.
     ArenaAllocator* allocator = outer_graph->GetArena();
     HBasicBlock* at = invoke->GetBlock();
-    HBasicBlock* to = at->SplitAfter(invoke);
+    // Note that we split before the invoke only to simplify polymorphic inlining.
+    HBasicBlock* to = at->SplitBeforeForInlining(invoke);
 
     HBasicBlock* first = entry_block_->GetSuccessors()[0];
     DCHECK(!first->IsInLoop());
     at->MergeWithInlined(first);
     exit_block_->ReplaceWith(to);
 
+    // Update the meta information surrounding blocks:
+    // (1) the graph they are now in,
+    // (2) the reverse post order of that graph,
+    // (3) their potential loop information, inner and outer,
+    // (4) try block membership.
+    // Note that we do not need to update catch phi inputs because they
+    // correspond to the register file of the outer method which the inlinee
+    // cannot modify.
+
+    // We don't add the entry block, the exit block, and the first block, which
+    // has been merged with `at`.
+    static constexpr int kNumberOfSkippedBlocksInCallee = 3;
+
+    // We add the `to` block.
+    static constexpr int kNumberOfNewBlocksInCaller = 1;
+    size_t blocks_added = (reverse_post_order_.size() - kNumberOfSkippedBlocksInCallee)
+        + kNumberOfNewBlocksInCaller;
+
+    // Find the location of `at` in the outer graph's reverse post order. The new
+    // blocks will be added after it.
+    size_t index_of_at = IndexOfElement(outer_graph->reverse_post_order_, at);
+    MakeRoomFor(&outer_graph->reverse_post_order_, blocks_added, index_of_at);
+
+    // Do a reverse post order of the blocks in the callee and do (1), (2), (3)
+    // and (4) to the blocks that apply.
+    for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
+      HBasicBlock* current = it.Current();
+      if (current != exit_block_ && current != entry_block_ && current != first) {
+        DCHECK(current->GetTryCatchInformation() == nullptr);
+        DCHECK(current->GetGraph() == this);
+        current->SetGraph(outer_graph);
+        outer_graph->AddBlock(current);
+        outer_graph->reverse_post_order_[++index_of_at] = current;
+        UpdateLoopAndTryInformationOfNewBlock(current, at,  /* replace_if_back_edge */ false);
+      }
+    }
+
+    // Do (1), (2), (3) and (4) to `to`.
+    to->SetGraph(outer_graph);
+    outer_graph->AddBlock(to);
+    outer_graph->reverse_post_order_[++index_of_at] = to;
+    // Only `to` can become a back edge, as the inlined blocks
+    // are predecessors of `to`.
+    UpdateLoopAndTryInformationOfNewBlock(to, at, /* replace_if_back_edge */ true);
+
     // Update all predecessors of the exit block (now the `to` block)
     // to not `HReturn` but `HGoto` instead.
     bool returns_void = to->GetPredecessors()[0]->GetLastInstruction()->IsReturnVoid();
@@ -1631,73 +2111,15 @@
       for (HBasicBlock* predecessor : to->GetPredecessors()) {
         HInstruction* last = predecessor->GetLastInstruction();
         if (!returns_void) {
+          DCHECK(last->IsReturn());
           return_value->AsPhi()->AddInput(last->InputAt(0));
         }
         predecessor->AddInstruction(new (allocator) HGoto(last->GetDexPc()));
         predecessor->RemoveInstruction(last);
       }
     }
-
-    // Update the meta information surrounding blocks:
-    // (1) the graph they are now in,
-    // (2) the reverse post order of that graph,
-    // (3) the potential loop information they are now in.
-
-    // We don't add the entry block, the exit block, and the first block, which
-    // has been merged with `at`.
-    static constexpr int kNumberOfSkippedBlocksInCallee = 3;
-
-    // We add the `to` block.
-    static constexpr int kNumberOfNewBlocksInCaller = 1;
-    size_t blocks_added = (reverse_post_order_.size() - kNumberOfSkippedBlocksInCallee)
-        + kNumberOfNewBlocksInCaller;
-
-    // Find the location of `at` in the outer graph's reverse post order. The new
-    // blocks will be added after it.
-    size_t index_of_at = IndexOfElement(outer_graph->reverse_post_order_, at);
-    MakeRoomFor(&outer_graph->reverse_post_order_, blocks_added, index_of_at);
-
-    // Do a reverse post order of the blocks in the callee and do (1), (2),
-    // and (3) to the blocks that apply.
-    HLoopInformation* info = at->GetLoopInformation();
-    for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
-      HBasicBlock* current = it.Current();
-      if (current != exit_block_ && current != entry_block_ && current != first) {
-        DCHECK(!current->IsInLoop());
-        DCHECK(current->GetGraph() == this);
-        current->SetGraph(outer_graph);
-        outer_graph->AddBlock(current);
-        outer_graph->reverse_post_order_[++index_of_at] = current;
-        if (info != nullptr) {
-          current->SetLoopInformation(info);
-          for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
-            loop_it.Current()->Add(current);
-          }
-        }
-      }
-    }
-
-    // Do (1), (2), and (3) to `to`.
-    to->SetGraph(outer_graph);
-    outer_graph->AddBlock(to);
-    outer_graph->reverse_post_order_[++index_of_at] = to;
-    if (info != nullptr) {
-      to->SetLoopInformation(info);
-      for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
-        loop_it.Current()->Add(to);
-      }
-      if (info->IsBackEdge(*at)) {
-        // Only `to` can become a back edge, as the inlined blocks
-        // are predecessors of `to`.
-        info->ReplaceBackEdge(at, to);
-      }
-    }
   }
 
-  // Update the next instruction id of the outer graph, so that instructions
-  // added later get bigger ids than those in the inner graph.
-  outer_graph->SetCurrentInstructionId(GetNextInstructionId());
-
   // Walk over the entry block and:
   // - Move constants from the entry block to the outer_graph's entry block,
   // - Replace HParameterValue instructions with their real value.
@@ -1748,13 +2170,6 @@
     }
   }
 
-  if (return_value != nullptr) {
-    invoke->ReplaceWith(return_value);
-  }
-
-  // Finally remove the invoke from the caller.
-  invoke->GetBlock()->RemoveInstruction(invoke);
-
   return return_value;
 }
 
@@ -1764,7 +2179,7 @@
  *             |
  *          if_block
  *           /    \
- *  dummy_block   deopt_block
+ *  true_block   false_block
  *           \    /
  *       new_pre_header
  *             |
@@ -1772,61 +2187,69 @@
  */
 void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) {
   DCHECK(header->IsLoopHeader());
-  HBasicBlock* pre_header = header->GetDominator();
+  HBasicBlock* old_pre_header = header->GetDominator();
 
-  // Need this to avoid critical edge.
+  // Need extra block to avoid critical edge.
   HBasicBlock* if_block = new (arena_) HBasicBlock(this, header->GetDexPc());
-  // Need this to avoid critical edge.
-  HBasicBlock* dummy_block = new (arena_) HBasicBlock(this, header->GetDexPc());
-  HBasicBlock* deopt_block = new (arena_) HBasicBlock(this, header->GetDexPc());
+  HBasicBlock* true_block = new (arena_) HBasicBlock(this, header->GetDexPc());
+  HBasicBlock* false_block = new (arena_) HBasicBlock(this, header->GetDexPc());
   HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
   AddBlock(if_block);
-  AddBlock(dummy_block);
-  AddBlock(deopt_block);
+  AddBlock(true_block);
+  AddBlock(false_block);
   AddBlock(new_pre_header);
 
-  header->ReplacePredecessor(pre_header, new_pre_header);
-  pre_header->successors_.clear();
-  pre_header->dominated_blocks_.clear();
+  header->ReplacePredecessor(old_pre_header, new_pre_header);
+  old_pre_header->successors_.clear();
+  old_pre_header->dominated_blocks_.clear();
 
-  pre_header->AddSuccessor(if_block);
-  if_block->AddSuccessor(dummy_block);  // True successor
-  if_block->AddSuccessor(deopt_block);  // False successor
-  dummy_block->AddSuccessor(new_pre_header);
-  deopt_block->AddSuccessor(new_pre_header);
+  old_pre_header->AddSuccessor(if_block);
+  if_block->AddSuccessor(true_block);  // True successor
+  if_block->AddSuccessor(false_block);  // False successor
+  true_block->AddSuccessor(new_pre_header);
+  false_block->AddSuccessor(new_pre_header);
 
-  pre_header->dominated_blocks_.push_back(if_block);
-  if_block->SetDominator(pre_header);
-  if_block->dominated_blocks_.push_back(dummy_block);
-  dummy_block->SetDominator(if_block);
-  if_block->dominated_blocks_.push_back(deopt_block);
-  deopt_block->SetDominator(if_block);
+  old_pre_header->dominated_blocks_.push_back(if_block);
+  if_block->SetDominator(old_pre_header);
+  if_block->dominated_blocks_.push_back(true_block);
+  true_block->SetDominator(if_block);
+  if_block->dominated_blocks_.push_back(false_block);
+  false_block->SetDominator(if_block);
   if_block->dominated_blocks_.push_back(new_pre_header);
   new_pre_header->SetDominator(if_block);
   new_pre_header->dominated_blocks_.push_back(header);
   header->SetDominator(new_pre_header);
 
+  // Fix reverse post order.
   size_t index_of_header = IndexOfElement(reverse_post_order_, header);
   MakeRoomFor(&reverse_post_order_, 4, index_of_header - 1);
   reverse_post_order_[index_of_header++] = if_block;
-  reverse_post_order_[index_of_header++] = dummy_block;
-  reverse_post_order_[index_of_header++] = deopt_block;
+  reverse_post_order_[index_of_header++] = true_block;
+  reverse_post_order_[index_of_header++] = false_block;
   reverse_post_order_[index_of_header++] = new_pre_header;
 
-  HLoopInformation* info = pre_header->GetLoopInformation();
-  if (info != nullptr) {
-    if_block->SetLoopInformation(info);
-    dummy_block->SetLoopInformation(info);
-    deopt_block->SetLoopInformation(info);
-    new_pre_header->SetLoopInformation(info);
-    for (HLoopInformationOutwardIterator loop_it(*pre_header);
-         !loop_it.Done();
-         loop_it.Advance()) {
-      loop_it.Current()->Add(if_block);
-      loop_it.Current()->Add(dummy_block);
-      loop_it.Current()->Add(deopt_block);
-      loop_it.Current()->Add(new_pre_header);
-    }
+  // The pre_header can never be a back edge of a loop.
+  DCHECK((old_pre_header->GetLoopInformation() == nullptr) ||
+         !old_pre_header->GetLoopInformation()->IsBackEdge(*old_pre_header));
+  UpdateLoopAndTryInformationOfNewBlock(
+      if_block, old_pre_header, /* replace_if_back_edge */ false);
+  UpdateLoopAndTryInformationOfNewBlock(
+      true_block, old_pre_header, /* replace_if_back_edge */ false);
+  UpdateLoopAndTryInformationOfNewBlock(
+      false_block, old_pre_header, /* replace_if_back_edge */ false);
+  UpdateLoopAndTryInformationOfNewBlock(
+      new_pre_header, old_pre_header, /* replace_if_back_edge */ false);
+}
+
+static void CheckAgainstUpperBound(ReferenceTypeInfo rti, ReferenceTypeInfo upper_bound_rti)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (rti.IsValid()) {
+    DCHECK(upper_bound_rti.IsSupertypeOf(rti))
+        << " upper_bound_rti: " << upper_bound_rti
+        << " rti: " << rti;
+    DCHECK(!upper_bound_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes() || rti.IsExact())
+        << " upper_bound_rti: " << upper_bound_rti
+        << " rti: " << rti;
   }
 }
 
@@ -1838,24 +2261,34 @@
     if (IsBoundType()) {
       // Having the test here spares us from making the method virtual just for
       // the sake of a DCHECK.
-      ReferenceTypeInfo upper_bound_rti = AsBoundType()->GetUpperBound();
-      DCHECK(upper_bound_rti.IsSupertypeOf(rti))
-          << " upper_bound_rti: " << upper_bound_rti
-          << " rti: " << rti;
-      DCHECK(!upper_bound_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes() || rti.IsExact());
+      CheckAgainstUpperBound(rti, AsBoundType()->GetUpperBound());
     }
   }
-  reference_type_info_ = rti;
+  reference_type_handle_ = rti.GetTypeHandle();
+  SetPackedFlag<kFlagReferenceTypeIsExact>(rti.IsExact());
 }
 
-ReferenceTypeInfo::ReferenceTypeInfo() : type_handle_(TypeHandle()), is_exact_(false) {}
+void HBoundType::SetUpperBound(const ReferenceTypeInfo& upper_bound, bool can_be_null) {
+  if (kIsDebugBuild) {
+    ScopedObjectAccess soa(Thread::Current());
+    DCHECK(upper_bound.IsValid());
+    DCHECK(!upper_bound_.IsValid()) << "Upper bound should only be set once.";
+    CheckAgainstUpperBound(GetReferenceTypeInfo(), upper_bound);
+  }
+  upper_bound_ = upper_bound;
+  SetPackedFlag<kFlagUpperCanBeNull>(can_be_null);
+}
 
-ReferenceTypeInfo::ReferenceTypeInfo(TypeHandle type_handle, bool is_exact)
-    : type_handle_(type_handle), is_exact_(is_exact) {
+ReferenceTypeInfo ReferenceTypeInfo::Create(TypeHandle type_handle, bool is_exact) {
   if (kIsDebugBuild) {
     ScopedObjectAccess soa(Thread::Current());
     DCHECK(IsValidHandle(type_handle));
+    if (!is_exact) {
+      DCHECK(!type_handle->CannotBeAssignedFromOtherTypes())
+          << "Callers of ReferenceTypeInfo::Create should ensure is_exact is properly computed";
+    }
   }
+  return ReferenceTypeInfo(type_handle, is_exact);
 }
 
 std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) {
@@ -1894,13 +2327,34 @@
 }
 
 void HInvoke::SetIntrinsic(Intrinsics intrinsic,
-                           IntrinsicNeedsEnvironmentOrCache needs_env_or_cache) {
+                           IntrinsicNeedsEnvironmentOrCache needs_env_or_cache,
+                           IntrinsicSideEffects side_effects,
+                           IntrinsicExceptions exceptions) {
   intrinsic_ = intrinsic;
   IntrinsicOptimizations opt(this);
+
+  // Adjust method's side effects from intrinsic table.
+  switch (side_effects) {
+    case kNoSideEffects: SetSideEffects(SideEffects::None()); break;
+    case kReadSideEffects: SetSideEffects(SideEffects::AllReads()); break;
+    case kWriteSideEffects: SetSideEffects(SideEffects::AllWrites()); break;
+    case kAllSideEffects: SetSideEffects(SideEffects::AllExceptGCDependency()); break;
+  }
+
   if (needs_env_or_cache == kNoEnvironmentOrCache) {
     opt.SetDoesNotNeedDexCache();
     opt.SetDoesNotNeedEnvironment();
+  } else {
+    // If we need an environment, that means there will be a call, which can trigger GC.
+    SetSideEffects(GetSideEffects().Union(SideEffects::CanTriggerGC()));
   }
+  // Adjust method's exception status from intrinsic table.
+  SetCanThrow(exceptions == kCanThrow);
+}
+
+bool HNewInstance::IsStringAlloc() const {
+  ScopedObjectAccess soa(Thread::Current());
+  return GetReferenceTypeInfo().IsStringClass();
 }
 
 bool HInvoke::NeedsEnvironment() const {
@@ -1911,8 +2365,8 @@
   return !opt.GetDoesNotNeedEnvironment();
 }
 
-bool HInvokeStaticOrDirect::NeedsDexCache() const {
-  if (IsRecursive() || IsStringInit()) {
+bool HInvokeStaticOrDirect::NeedsDexCacheOfDeclaringClass() const {
+  if (GetMethodLoadKind() != MethodLoadKind::kDexCacheViaMethod) {
     return false;
   }
   if (!IsIntrinsic()) {
@@ -1922,13 +2376,279 @@
   return !opt.GetDoesNotNeedDexCache();
 }
 
-void HInstruction::RemoveEnvironmentUsers() {
-  for (HUseIterator<HEnvironment*> use_it(GetEnvUses()); !use_it.Done(); use_it.Advance()) {
-    HUseListNode<HEnvironment*>* user_node = use_it.Current();
-    HEnvironment* user = user_node->GetUser();
-    user->SetRawEnvAt(user_node->GetIndex(), nullptr);
+void HInvokeStaticOrDirect::InsertInputAt(size_t index, HInstruction* input) {
+  inputs_.insert(inputs_.begin() + index, HUserRecord<HInstruction*>(input));
+  input->AddUseAt(this, index);
+  // Update indexes in use nodes of inputs that have been pushed further back by the insert().
+  for (size_t i = index + 1u, e = inputs_.size(); i < e; ++i) {
+    DCHECK_EQ(inputs_[i].GetUseNode()->GetIndex(), i - 1u);
+    inputs_[i].GetUseNode()->SetIndex(i);
   }
-  env_uses_.Clear();
+}
+
+void HInvokeStaticOrDirect::RemoveInputAt(size_t index) {
+  RemoveAsUserOfInput(index);
+  inputs_.erase(inputs_.begin() + index);
+  // Update indexes in use nodes of inputs that have been pulled forward by the erase().
+  for (size_t i = index, e = inputs_.size(); i < e; ++i) {
+    DCHECK_EQ(inputs_[i].GetUseNode()->GetIndex(), i + 1u);
+    inputs_[i].GetUseNode()->SetIndex(i);
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs) {
+  switch (rhs) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+      return os << "string_init";
+    case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+      return os << "recursive";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
+      return os << "direct";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+      return os << "direct_fixup";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      return os << "dex_cache_pc_relative";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
+      return os << "dex_cache_via_method";
+    default:
+      LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs) {
+  switch (rhs) {
+    case HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit:
+      return os << "explicit";
+    case HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit:
+      return os << "implicit";
+    case HInvokeStaticOrDirect::ClinitCheckRequirement::kNone:
+      return os << "none";
+    default:
+      LOG(FATAL) << "Unknown ClinitCheckRequirement: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
+bool HLoadClass::InstructionDataEquals(const HInstruction* other) const {
+  const HLoadClass* other_load_class = other->AsLoadClass();
+  // TODO: To allow GVN for HLoadClass from different dex files, we should compare the type
+  // names rather than type indexes. However, we shall also have to re-think the hash code.
+  if (type_index_ != other_load_class->type_index_ ||
+      GetPackedFields() != other_load_class->GetPackedFields()) {
+    return false;
+  }
+  LoadKind load_kind = GetLoadKind();
+  if (HasAddress(load_kind)) {
+    return GetAddress() == other_load_class->GetAddress();
+  } else if (HasTypeReference(load_kind)) {
+    return IsSameDexFile(GetDexFile(), other_load_class->GetDexFile());
+  } else {
+    DCHECK(HasDexCacheReference(load_kind)) << load_kind;
+    // If the type indexes and dex files are the same, dex cache element offsets
+    // must also be the same, so we don't need to compare them.
+    return IsSameDexFile(GetDexFile(), other_load_class->GetDexFile());
+  }
+}
+
+void HLoadClass::SetLoadKindInternal(LoadKind load_kind) {
+  // Once sharpened, the load kind should not be changed again.
+  // Also, kReferrersClass should never be overwritten.
+  DCHECK_EQ(GetLoadKind(), LoadKind::kDexCacheViaMethod);
+  SetPackedField<LoadKindField>(load_kind);
+
+  if (load_kind != LoadKind::kDexCacheViaMethod) {
+    RemoveAsUserOfInput(0u);
+    SetRawInputAt(0u, nullptr);
+  }
+  if (!NeedsEnvironment()) {
+    RemoveEnvironment();
+    SetSideEffects(SideEffects::None());
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) {
+  switch (rhs) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      return os << "ReferrersClass";
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      return os << "BootImageLinkTimeAddress";
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      return os << "BootImageLinkTimePcRelative";
+    case HLoadClass::LoadKind::kBootImageAddress:
+      return os << "BootImageAddress";
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      return os << "DexCacheAddress";
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      return os << "DexCachePcRelative";
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      return os << "DexCacheViaMethod";
+    default:
+      LOG(FATAL) << "Unknown HLoadClass::LoadKind: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
+bool HLoadString::InstructionDataEquals(const HInstruction* other) const {
+  const HLoadString* other_load_string = other->AsLoadString();
+  // TODO: To allow GVN for HLoadString from different dex files, we should compare the strings
+  // rather than their indexes. However, we shall also have to re-think the hash code.
+  if (string_index_ != other_load_string->string_index_ ||
+      GetPackedFields() != other_load_string->GetPackedFields()) {
+    return false;
+  }
+  LoadKind load_kind = GetLoadKind();
+  if (HasAddress(load_kind)) {
+    return GetAddress() == other_load_string->GetAddress();
+  } else if (HasStringReference(load_kind)) {
+    return IsSameDexFile(GetDexFile(), other_load_string->GetDexFile());
+  } else {
+    DCHECK(HasDexCacheReference(load_kind)) << load_kind;
+    // If the string indexes and dex files are the same, dex cache element offsets
+    // must also be the same, so we don't need to compare them.
+    return IsSameDexFile(GetDexFile(), other_load_string->GetDexFile());
+  }
+}
+
+void HLoadString::SetLoadKindInternal(LoadKind load_kind) {
+  // Once sharpened, the load kind should not be changed again.
+  DCHECK_EQ(GetLoadKind(), LoadKind::kDexCacheViaMethod);
+  SetPackedField<LoadKindField>(load_kind);
+
+  if (load_kind != LoadKind::kDexCacheViaMethod) {
+    RemoveAsUserOfInput(0u);
+    SetRawInputAt(0u, nullptr);
+  }
+  if (!NeedsEnvironment()) {
+    RemoveEnvironment();
+    SetSideEffects(SideEffects::None());
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) {
+  switch (rhs) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      return os << "BootImageLinkTimeAddress";
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      return os << "BootImageLinkTimePcRelative";
+    case HLoadString::LoadKind::kBootImageAddress:
+      return os << "BootImageAddress";
+    case HLoadString::LoadKind::kDexCacheAddress:
+      return os << "DexCacheAddress";
+    case HLoadString::LoadKind::kDexCachePcRelative:
+      return os << "DexCachePcRelative";
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      return os << "DexCacheViaMethod";
+    default:
+      LOG(FATAL) << "Unknown HLoadString::LoadKind: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
+void HInstruction::RemoveEnvironmentUsers() {
+  for (const HUseListNode<HEnvironment*>& use : GetEnvUses()) {
+    HEnvironment* user = use.GetUser();
+    user->SetRawEnvAt(use.GetIndex(), nullptr);
+  }
+  env_uses_.clear();
+}
+
+// Returns an instruction with the opposite Boolean value from 'cond'.
+HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction* cursor) {
+  ArenaAllocator* allocator = GetArena();
+
+  if (cond->IsCondition() &&
+      !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType())) {
+    // Can't reverse floating point conditions.  We have to use HBooleanNot in that case.
+    HInstruction* lhs = cond->InputAt(0);
+    HInstruction* rhs = cond->InputAt(1);
+    HInstruction* replacement = nullptr;
+    switch (cond->AsCondition()->GetOppositeCondition()) {  // get *opposite*
+      case kCondEQ: replacement = new (allocator) HEqual(lhs, rhs); break;
+      case kCondNE: replacement = new (allocator) HNotEqual(lhs, rhs); break;
+      case kCondLT: replacement = new (allocator) HLessThan(lhs, rhs); break;
+      case kCondLE: replacement = new (allocator) HLessThanOrEqual(lhs, rhs); break;
+      case kCondGT: replacement = new (allocator) HGreaterThan(lhs, rhs); break;
+      case kCondGE: replacement = new (allocator) HGreaterThanOrEqual(lhs, rhs); break;
+      case kCondB:  replacement = new (allocator) HBelow(lhs, rhs); break;
+      case kCondBE: replacement = new (allocator) HBelowOrEqual(lhs, rhs); break;
+      case kCondA:  replacement = new (allocator) HAbove(lhs, rhs); break;
+      case kCondAE: replacement = new (allocator) HAboveOrEqual(lhs, rhs); break;
+      default:
+        LOG(FATAL) << "Unexpected condition";
+        UNREACHABLE();
+    }
+    cursor->GetBlock()->InsertInstructionBefore(replacement, cursor);
+    return replacement;
+  } else if (cond->IsIntConstant()) {
+    HIntConstant* int_const = cond->AsIntConstant();
+    if (int_const->IsFalse()) {
+      return GetIntConstant(1);
+    } else {
+      DCHECK(int_const->IsTrue()) << int_const->GetValue();
+      return GetIntConstant(0);
+    }
+  } else {
+    HInstruction* replacement = new (allocator) HBooleanNot(cond);
+    cursor->GetBlock()->InsertInstructionBefore(replacement, cursor);
+    return replacement;
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs) {
+  os << "["
+     << " source=" << rhs.GetSource()
+     << " destination=" << rhs.GetDestination()
+     << " type=" << rhs.GetType()
+     << " instruction=";
+  if (rhs.GetInstruction() != nullptr) {
+    os << rhs.GetInstruction()->DebugName() << ' ' << rhs.GetInstruction()->GetId();
+  } else {
+    os << "null";
+  }
+  os << " ]";
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) {
+  switch (rhs) {
+    case TypeCheckKind::kUnresolvedCheck:
+      return os << "unresolved_check";
+    case TypeCheckKind::kExactCheck:
+      return os << "exact_check";
+    case TypeCheckKind::kClassHierarchyCheck:
+      return os << "class_hierarchy_check";
+    case TypeCheckKind::kAbstractClassCheck:
+      return os << "abstract_class_check";
+    case TypeCheckKind::kInterfaceCheck:
+      return os << "interface_check";
+    case TypeCheckKind::kArrayObjectCheck:
+      return os << "array_object_check";
+    case TypeCheckKind::kArrayCheck:
+      return os << "array_check";
+    default:
+      LOG(FATAL) << "Unknown TypeCheckKind: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind) {
+  switch (kind) {
+    case MemBarrierKind::kAnyStore:
+      return os << "AnyStore";
+    case MemBarrierKind::kLoadAny:
+      return os << "LoadAny";
+    case MemBarrierKind::kStoreStore:
+      return os << "StoreStore";
+    case MemBarrierKind::kAnyAny:
+      return os << "AnyAny";
+    case MemBarrierKind::kNTStoreStore:
+      return os << "NTStoreStore";
+
+    default:
+      LOG(FATAL) << "Unknown MemBarrierKind: " << static_cast<int>(kind);
+      UNREACHABLE();
+  }
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 7aa933d..dfa8276 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -25,7 +25,7 @@
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
 #include "base/stl_util.h"
-#include "dex/compiler_enums.h"
+#include "dex_file.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "handle.h"
 #include "handle_scope.h"
@@ -35,6 +35,9 @@
 #include "mirror/class.h"
 #include "offsets.h"
 #include "primitive.h"
+#include "utils/array_ref.h"
+#include "utils/intrusive_forward_list.h"
+#include "utils/transform_array_ref.h"
 
 namespace art {
 
@@ -43,7 +46,6 @@
 class HCurrentMethod;
 class HDoubleConstant;
 class HEnvironment;
-class HFakeString;
 class HFloatConstant;
 class HGraphBuilder;
 class HGraphVisitor;
@@ -71,8 +73,10 @@
 static const int kDefaultNumberOfDominatedBlocks = 1;
 static const int kDefaultNumberOfBackEdges = 1;
 
-static constexpr uint32_t kMaxIntShiftValue = 0x1f;
-static constexpr uint64_t kMaxLongShiftValue = 0x3f;
+// The maximum (meaningful) distance (31) that can be used in an integer shift/rotate operation.
+static constexpr int32_t kMaxIntShiftDistance = 0x1f;
+// The maximum (meaningful) distance (63) that can be used in a long shift/rotate operation.
+static constexpr int32_t kMaxLongShiftDistance = 0x3f;
 
 static constexpr uint32_t kUnknownFieldIndex = static_cast<uint32_t>(-1);
 static constexpr uint16_t kUnknownClassDefIndex = static_cast<uint16_t>(-1);
@@ -81,6 +85,16 @@
 
 static constexpr uint32_t kNoDexPc = -1;
 
+inline bool IsSameDexFile(const DexFile& lhs, const DexFile& rhs) {
+  // For the purposes of the compiler, the dex files must actually be the same object
+  // if we want to safely treat them as the same. This is especially important for JIT
+  // as custom class loaders can open the same underlying file (or memory) multiple
+  // times and provide different class resolution but no two class loaders should ever
+  // use the same DexFile object - doing so is an unsupported hack that can lead to
+  // all sorts of weird failures.
+  return &lhs == &rhs;
+}
+
 enum IfCondition {
   // All types.
   kCondEQ,  // ==
@@ -97,6 +111,14 @@
   kCondAE,  // >=
 };
 
+enum GraphAnalysisResult {
+  kAnalysisSkipped,
+  kAnalysisInvalidBytecode,
+  kAnalysisFailThrowCatchLoop,
+  kAnalysisFailAmbiguousArrayOp,
+  kAnalysisSuccess,
+};
+
 class HInstructionList : public ValueObject {
  public:
   HInstructionList() : first_instruction_(nullptr), last_instruction_(nullptr) {}
@@ -124,6 +146,7 @@
   void SetBlockOfInstructions(HBasicBlock* block) const;
 
   void AddAfter(HInstruction* cursor, const HInstructionList& instruction_list);
+  void AddBefore(HInstruction* cursor, const HInstructionList& instruction_list);
   void Add(const HInstructionList& instruction_list);
 
   // Return the number of instructions in the list. This is an expensive operation.
@@ -142,6 +165,128 @@
   DISALLOW_COPY_AND_ASSIGN(HInstructionList);
 };
 
+class ReferenceTypeInfo : ValueObject {
+ public:
+  typedef Handle<mirror::Class> TypeHandle;
+
+  static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact);
+
+  static ReferenceTypeInfo Create(TypeHandle type_handle) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return Create(type_handle, type_handle->CannotBeAssignedFromOtherTypes());
+  }
+
+  static ReferenceTypeInfo CreateUnchecked(TypeHandle type_handle, bool is_exact) {
+    return ReferenceTypeInfo(type_handle, is_exact);
+  }
+
+  static ReferenceTypeInfo CreateInvalid() { return ReferenceTypeInfo(); }
+
+  static bool IsValidHandle(TypeHandle handle) {
+    return handle.GetReference() != nullptr;
+  }
+
+  bool IsValid() const {
+    return IsValidHandle(type_handle_);
+  }
+
+  bool IsExact() const { return is_exact_; }
+
+  bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsObjectClass();
+  }
+
+  bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsStringClass();
+  }
+
+  bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass();
+  }
+
+  bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsInterface();
+  }
+
+  bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsArrayClass();
+  }
+
+  bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsPrimitiveArray();
+  }
+
+  bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray();
+  }
+
+  bool CanArrayHold(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    if (!IsExact()) return false;
+    if (!IsArrayClass()) return false;
+    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get());
+  }
+
+  bool CanArrayHoldValuesOf(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    if (!IsExact()) return false;
+    if (!IsArrayClass()) return false;
+    if (!rti.IsArrayClass()) return false;
+    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(
+        rti.GetTypeHandle()->GetComponentType());
+  }
+
+  Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
+
+  bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    DCHECK(rti.IsValid());
+    return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
+  }
+
+  bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    DCHECK(rti.IsValid());
+    return GetTypeHandle().Get() != rti.GetTypeHandle().Get() &&
+        GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
+  }
+
+  // Returns true if the type information provide the same amount of details.
+  // Note that it does not mean that the instructions have the same actual type
+  // (because the type can be the result of a merge).
+  bool IsEqual(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!IsValid() && !rti.IsValid()) {
+      // Invalid types are equal.
+      return true;
+    }
+    if (!IsValid() || !rti.IsValid()) {
+      // One is valid, the other not.
+      return false;
+    }
+    return IsExact() == rti.IsExact()
+        && GetTypeHandle().Get() == rti.GetTypeHandle().Get();
+  }
+
+ private:
+  ReferenceTypeInfo() : type_handle_(TypeHandle()), is_exact_(false) {}
+  ReferenceTypeInfo(TypeHandle type_handle, bool is_exact)
+      : type_handle_(type_handle), is_exact_(is_exact) { }
+
+  // The class of the object.
+  TypeHandle type_handle_;
+  // Whether or not the type is exact or a superclass of the actual type.
+  // Whether or not we have any information about this type.
+  bool is_exact_;
+};
+
+std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs);
+
 // Control-flow graph of a method. Contains a list of basic blocks.
 class HGraph : public ArenaObject<kArenaAllocGraph> {
  public:
@@ -152,6 +297,7 @@
          InstructionSet instruction_set,
          InvokeType invoke_type = kInvalidInvokeType,
          bool debuggable = false,
+         bool osr = false,
          int start_instruction_id = 0)
       : arena_(arena),
         blocks_(arena->Adapter(kArenaAllocBlockList)),
@@ -165,6 +311,7 @@
         temporaries_vreg_slots_(0),
         has_bounds_checks_(false),
         has_try_catch_(false),
+        has_irreducible_loops_(false),
         debuggable_(debuggable),
         current_instruction_id_(start_instruction_id),
         dex_file_(dex_file),
@@ -178,14 +325,20 @@
         cached_float_constants_(std::less<int32_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
-        cached_current_method_(nullptr) {
+        cached_current_method_(nullptr),
+        inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()),
+        osr_(osr) {
     blocks_.reserve(kDefaultNumberOfBlocks);
   }
 
+  // Acquires and stores RTI of inexact Object to be used when creating HNullConstant.
+  void InitializeInexactObjectRTI(StackHandleScopeCollection* handles);
+
   ArenaAllocator* GetArena() const { return arena_; }
   const ArenaVector<HBasicBlock*>& GetBlocks() const { return blocks_; }
 
   bool IsInSsaForm() const { return in_ssa_form_; }
+  void SetInSsaForm() { in_ssa_form_ = true; }
 
   HBasicBlock* GetEntryBlock() const { return entry_block_; }
   HBasicBlock* GetExitBlock() const { return exit_block_; }
@@ -196,52 +349,43 @@
 
   void AddBlock(HBasicBlock* block);
 
-  // Try building the SSA form of this graph, with dominance computation and loop
-  // recognition. Returns whether it was successful in doing all these steps.
-  bool TryBuildingSsa() {
-    BuildDominatorTree();
-    // The SSA builder requires loops to all be natural. Specifically, the dead phi
-    // elimination phase checks the consistency of the graph when doing a post-order
-    // visit for eliminating dead phis: a dead phi can only have loop header phi
-    // users remaining when being visited.
-    if (!AnalyzeNaturalLoops()) return false;
-    // Precompute per-block try membership before entering the SSA builder,
-    // which needs the information to build catch block phis from values of
-    // locals at throwing instructions inside try blocks.
-    ComputeTryBlockInformation();
-    TransformToSsa();
-    in_ssa_form_ = true;
-    return true;
-  }
-
   void ComputeDominanceInformation();
   void ClearDominanceInformation();
-
-  void BuildDominatorTree();
-  void TransformToSsa();
+  void ClearLoopInformation();
+  void FindBackEdges(ArenaBitVector* visited);
+  GraphAnalysisResult BuildDominatorTree();
   void SimplifyCFG();
   void SimplifyCatchBlocks();
 
-  // Analyze all natural loops in this graph. Returns false if one
-  // loop is not natural, that is the header does not dominate the
-  // back edge.
-  bool AnalyzeNaturalLoops() const;
+  // Analyze all natural loops in this graph. Returns a code specifying that it
+  // was successful or the reason for failure. The method will fail if a loop
+  // is a throw-catch loop, i.e. the header is a catch block.
+  GraphAnalysisResult AnalyzeLoops() const;
 
   // Iterate over blocks to compute try block membership. Needs reverse post
   // order and loop information.
   void ComputeTryBlockInformation();
 
   // Inline this graph in `outer_graph`, replacing the given `invoke` instruction.
-  // Returns the instruction used to replace the invoke expression or null if the
-  // invoke is for a void method.
+  // Returns the instruction to replace the invoke expression or null if the
+  // invoke is for a void method. Note that the caller is responsible for replacing
+  // and removing the invoke instruction.
   HInstruction* InlineInto(HGraph* outer_graph, HInvoke* invoke);
 
+  // Update the loop and try membership of `block`, which was spawned from `reference`.
+  // In case `reference` is a back edge, `replace_if_back_edge` notifies whether `block`
+  // should be the new back edge.
+  void UpdateLoopAndTryInformationOfNewBlock(HBasicBlock* block,
+                                             HBasicBlock* reference,
+                                             bool replace_if_back_edge);
+
   // Need to add a couple of blocks to test if the loop body is entered and
   // put deoptimization instructions, etc.
   void TransformLoopHeaderForBCE(HBasicBlock* header);
 
-  // Removes `block` from the graph.
-  void DeleteDeadBlock(HBasicBlock* block);
+  // Removes `block` from the graph. Assumes `block` has been disconnected from
+  // other blocks and has no instructions or phis.
+  void DeleteDeadEmptyBlock(HBasicBlock* block);
 
   // Splits the edge between `block` and `successor` while preserving the
   // indices in the predecessor/successor lists. If there are multiple edges
@@ -262,6 +406,7 @@
   }
 
   void SetCurrentInstructionId(int32_t id) {
+    DCHECK_GE(id, current_instruction_id_);
     current_instruction_id_ = id;
   }
 
@@ -298,6 +443,10 @@
     number_of_in_vregs_ = value;
   }
 
+  uint16_t GetNumberOfInVRegs() const {
+    return number_of_in_vregs_;
+  }
+
   uint16_t GetNumberOfLocalVRegs() const {
     DCHECK(!in_ssa_form_);
     return number_of_vregs_ - number_of_in_vregs_;
@@ -350,8 +499,6 @@
 
   HCurrentMethod* GetCurrentMethod();
 
-  HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
-
   const DexFile& GetDexFile() const {
     return dex_file_;
   }
@@ -368,11 +515,25 @@
     return instruction_set_;
   }
 
+  bool IsCompilingOsr() const { return osr_; }
+
   bool HasTryCatch() const { return has_try_catch_; }
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
+  bool HasIrreducibleLoops() const { return has_irreducible_loops_; }
+  void SetHasIrreducibleLoops(bool value) { has_irreducible_loops_ = value; }
+
+  ArtMethod* GetArtMethod() const { return art_method_; }
+  void SetArtMethod(ArtMethod* method) { art_method_ = method; }
+
+  // Returns an instruction with the opposite boolean value from 'cond'.
+  // The instruction has been inserted into the graph, either as a constant, or
+  // before cursor.
+  HInstruction* InsertOppositeCondition(HInstruction* cond, HInstruction* cursor);
+
+  ReferenceTypeInfo GetInexactObjectRti() const { return inexact_object_rti_; }
+
  private:
-  void FindBackEdges(ArenaBitVector* visited);
   void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
   void RemoveDeadBlocks(const ArenaBitVector& visited);
 
@@ -439,6 +600,9 @@
   // try/catch-related passes if false.
   bool has_try_catch_;
 
+  // Flag whether there are any irreducible loops in the graph.
+  bool has_irreducible_loops_;
+
   // Indicates whether the graph should be compiled in a way that
   // ensures full debuggability. If false, we can apply more
   // aggressive optimizations that may limit the level of debugging.
@@ -474,8 +638,23 @@
 
   HCurrentMethod* cached_current_method_;
 
+  // The ArtMethod this graph is for. Note that for AOT, it may be null,
+  // for example for methods whose declaring class could not be resolved
+  // (such as when the superclass could not be found).
+  ArtMethod* art_method_;
+
+  // Keep the RTI of inexact Object to avoid having to pass stack handle
+  // collection pointer to passes which may create NullConstant.
+  ReferenceTypeInfo inexact_object_rti_;
+
+  // Whether we are compiling this graph for on stack replacement: this will
+  // make all loops seen as irreducible and emit special stack maps to mark
+  // compiled code entries which the interpreter can directly jump to.
+  const bool osr_;
+
   friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
+  friend class HInliner;             // For the reverse post order.
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
   DISALLOW_COPY_AND_ASSIGN(HGraph);
 };
@@ -485,12 +664,19 @@
   HLoopInformation(HBasicBlock* header, HGraph* graph)
       : header_(header),
         suspend_check_(nullptr),
+        irreducible_(false),
+        contains_irreducible_loop_(false),
         back_edges_(graph->GetArena()->Adapter(kArenaAllocLoopInfoBackEdges)),
         // Make bit vector growable, as the number of blocks may change.
-        blocks_(graph->GetArena(), graph->GetBlocks().size(), true) {
+        blocks_(graph->GetArena(), graph->GetBlocks().size(), true, kArenaAllocLoopInfoBackEdges) {
     back_edges_.reserve(kDefaultNumberOfBackEdges);
   }
 
+  bool IsIrreducible() const { return irreducible_; }
+  bool ContainsIrreducibleLoop() const { return contains_irreducible_loop_; }
+
+  void Dump(std::ostream& os);
+
   HBasicBlock* GetHeader() const {
     return header_;
   }
@@ -533,15 +719,8 @@
     ReplaceElement(back_edges_, existing, new_back_edge);
   }
 
-  // Finds blocks that are part of this loop. Returns whether the loop is a natural loop,
-  // that is the header dominates the back edge.
-  bool Populate();
-
-  // Reanalyzes the loop by removing loop info from its blocks and re-running
-  // Populate(). If there are no back edges left, the loop info is completely
-  // removed as well as its SuspendCheck instruction. It must be run on nested
-  // inner loops first.
-  void Update();
+  // Finds blocks that are part of this loop.
+  void Populate();
 
   // Returns whether this loop information contains `block`.
   // Note that this loop information *must* be populated before entering this function.
@@ -551,17 +730,35 @@
   // Note that `other` *must* be populated before entering this function.
   bool IsIn(const HLoopInformation& other) const;
 
+  // Returns true if instruction is not defined within this loop.
+  bool IsDefinedOutOfTheLoop(HInstruction* instruction) const;
+
   const ArenaBitVector& GetBlocks() const { return blocks_; }
 
   void Add(HBasicBlock* block);
   void Remove(HBasicBlock* block);
 
+  void ClearAllBlocks() {
+    blocks_.ClearAllBits();
+  }
+
+  bool HasBackEdgeNotDominatedByHeader() const;
+
+  bool IsPopulated() const {
+    return blocks_.GetHighestBitSet() != -1;
+  }
+
+  bool DominatesAllBackEdges(HBasicBlock* block);
+
  private:
   // Internal recursive implementation of `Populate`.
   void PopulateRecursive(HBasicBlock* block);
+  void PopulateIrreducibleRecursive(HBasicBlock* block, ArenaBitVector* finalized);
 
   HBasicBlock* header_;
   HSuspendCheck* suspend_check_;
+  bool irreducible_;
+  bool contains_irreducible_loop_;
   ArenaVector<HBasicBlock*> back_edges_;
   ArenaBitVector blocks_;
 
@@ -655,6 +852,9 @@
     return successors_;
   }
 
+  ArrayRef<HBasicBlock* const> GetNormalSuccessors() const;
+  ArrayRef<HBasicBlock* const> GetExceptionalSuccessors() const;
+
   bool HasSuccessor(const HBasicBlock* block, size_t start_from = 0u) {
     return ContainsElement(successors_, block, start_from);
   }
@@ -722,6 +922,8 @@
   HInstruction* GetLastPhi() const { return phis_.last_instruction_; }
   const HInstructionList& GetPhis() const { return phis_; }
 
+  HInstruction* GetFirstInstructionDisregardMoves() const;
+
   void AddSuccessor(HBasicBlock* block) {
     successors_.push_back(block);
     block->predecessors_.push_back(this);
@@ -805,12 +1007,6 @@
     return GetPredecessorIndexOf(predecessor) == idx;
   }
 
-  // Returns the number of non-exceptional successors. SsaChecker ensures that
-  // these are stored at the beginning of the successor list.
-  size_t NumberOfNormalSuccessors() const {
-    return EndsWithTryBoundary() ? 1 : GetSuccessors().size();
-  }
-
   // Create a new block between this block and its predecessors. The new block
   // is added to the graph, all predecessor edges are relinked to it and an edge
   // is created to `this`. Returns the new empty block. Reverse post order or
@@ -824,12 +1020,15 @@
   // loop and try/catch information.
   HBasicBlock* SplitBefore(HInstruction* cursor);
 
-  // Split the block into two blocks just after `cursor`. Returns the newly
+  // Split the block into two blocks just before `cursor`. Returns the newly
   // created block. Note that this method just updates raw block information,
   // like predecessors, successors, dominators, and instruction list. It does not
   // update the graph, reverse post order, loop information, nor make sure the
   // blocks are consistent (for example ending with a control flow instruction).
-  HBasicBlock* SplitAfter(HInstruction* cursor);
+  HBasicBlock* SplitBeforeForInlining(HInstruction* cursor);
+
+  // Similar to `SplitBeforeForInlining` but does it after `cursor`.
+  HBasicBlock* SplitAfterForInlining(HInstruction* cursor);
 
   // Merge `other` at the end of `this`. Successors and dominated blocks of
   // `other` are changed to be successors and dominated blocks of `this`. Note
@@ -882,6 +1081,11 @@
     return GetPredecessors()[0] == GetLoopInformation()->GetPreHeader();
   }
 
+  bool IsFirstPredecessorBackEdge() const {
+    DCHECK(IsLoopHeader());
+    return GetLoopInformation()->IsBackEdge(*GetPredecessors()[0]);
+  }
+
   HLoopInformation* GetLoopInformation() const {
     return loop_information_;
   }
@@ -1009,10 +1213,10 @@
   M(BoundsCheck, Instruction)                                           \
   M(BoundType, Instruction)                                             \
   M(CheckCast, Instruction)                                             \
+  M(ClassTableGet, Instruction)                                         \
   M(ClearException, Instruction)                                        \
   M(ClinitCheck, Instruction)                                           \
   M(Compare, BinaryOperation)                                           \
-  M(Condition, BinaryOperation)                                         \
   M(CurrentMethod, Instruction)                                         \
   M(Deoptimize, Instruction)                                            \
   M(Div, BinaryOperation)                                               \
@@ -1020,7 +1224,6 @@
   M(DoubleConstant, Constant)                                           \
   M(Equal, Condition)                                                   \
   M(Exit, Instruction)                                                  \
-  M(FakeString, Instruction)                                            \
   M(FloatConstant, Constant)                                            \
   M(Goto, Instruction)                                                  \
   M(GreaterThan, Condition)                                             \
@@ -1038,13 +1241,12 @@
   M(LessThanOrEqual, Condition)                                         \
   M(LoadClass, Instruction)                                             \
   M(LoadException, Instruction)                                         \
-  M(LoadLocal, Instruction)                                             \
   M(LoadString, Instruction)                                            \
-  M(Local, Instruction)                                                 \
   M(LongConstant, Constant)                                             \
   M(MemoryBarrier, Instruction)                                         \
   M(MonitorOperation, Instruction)                                      \
   M(Mul, BinaryOperation)                                               \
+  M(NativeDebugInfo, Instruction)                                       \
   M(Neg, UnaryOperation)                                                \
   M(NewArray, Instruction)                                              \
   M(NewInstance, Instruction)                                           \
@@ -1060,6 +1262,7 @@
   M(Rem, BinaryOperation)                                               \
   M(Return, Instruction)                                                \
   M(ReturnVoid, Instruction)                                            \
+  M(Ror, BinaryOperation)                                               \
   M(Shl, BinaryOperation)                                               \
   M(Shr, BinaryOperation)                                               \
   M(StaticFieldGet, Instruction)                                        \
@@ -1068,26 +1271,48 @@
   M(UnresolvedInstanceFieldSet, Instruction)                            \
   M(UnresolvedStaticFieldGet, Instruction)                              \
   M(UnresolvedStaticFieldSet, Instruction)                              \
-  M(StoreLocal, Instruction)                                            \
+  M(Select, Instruction)                                                \
   M(Sub, BinaryOperation)                                               \
   M(SuspendCheck, Instruction)                                          \
-  M(Temporary, Instruction)                                             \
   M(Throw, Instruction)                                                 \
   M(TryBoundary, Instruction)                                           \
   M(TypeConversion, Instruction)                                        \
   M(UShr, BinaryOperation)                                              \
   M(Xor, BinaryOperation)                                               \
 
+/*
+ * Instructions, shared across several (not all) architectures.
+ */
+#if !defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_ENABLE_CODEGEN_arm64)
+#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)                         \
+  M(BitwiseNegatedRight, Instruction)                                   \
+  M(MultiplyAccumulate, Instruction)                                    \
+  M(IntermediateAddress, Instruction)
+#endif
+
+#ifndef ART_ENABLE_CODEGEN_arm
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)                            \
+  M(ArmDexCacheArraysBase, Instruction)
+#endif
 
 #ifndef ART_ENABLE_CODEGEN_arm64
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                          \
-  M(Arm64IntermediateAddress, Instruction)
+  M(Arm64DataProcWithShifterOp, Instruction)
 #endif
 
+#ifndef ART_ENABLE_CODEGEN_mips
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)                           \
+  M(MipsComputeBaseMethodAddress, Instruction)                          \
+  M(MipsDexCacheArraysBase, Instruction)
+#endif
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M)
 
@@ -1097,6 +1322,7 @@
 #define FOR_EACH_CONCRETE_INSTRUCTION_X86(M)                            \
   M(X86ComputeBaseMethodAddress, Instruction)                           \
   M(X86LoadFromConstantTable, Instruction)                              \
+  M(X86FPNeg, Instruction)                                              \
   M(X86PackedSwitch, Instruction)
 #endif
 
@@ -1104,6 +1330,7 @@
 
 #define FOR_EACH_CONCRETE_INSTRUCTION(M)                                \
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M)                               \
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)                               \
   FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)                                  \
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                                \
   FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)                                 \
@@ -1111,148 +1338,59 @@
   FOR_EACH_CONCRETE_INSTRUCTION_X86(M)                                  \
   FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M)
 
-#define FOR_EACH_INSTRUCTION(M)                                         \
-  FOR_EACH_CONCRETE_INSTRUCTION(M)                                      \
+#define FOR_EACH_ABSTRACT_INSTRUCTION(M)                                \
+  M(Condition, BinaryOperation)                                         \
   M(Constant, Instruction)                                              \
   M(UnaryOperation, Instruction)                                        \
   M(BinaryOperation, Instruction)                                       \
   M(Invoke, Instruction)
 
+#define FOR_EACH_INSTRUCTION(M)                                         \
+  FOR_EACH_CONCRETE_INSTRUCTION(M)                                      \
+  FOR_EACH_ABSTRACT_INSTRUCTION(M)
+
 #define FORWARD_DECLARATION(type, super) class H##type;
 FOR_EACH_INSTRUCTION(FORWARD_DECLARATION)
 #undef FORWARD_DECLARATION
 
-#define DECLARE_INSTRUCTION(type)                                       \
-  InstructionKind GetKind() const OVERRIDE { return k##type; }          \
-  const char* DebugName() const OVERRIDE { return #type; }              \
-  const H##type* As##type() const OVERRIDE { return this; }             \
-  H##type* As##type() OVERRIDE { return this; }                         \
-  bool InstructionTypeEquals(HInstruction* other) const OVERRIDE {      \
-    return other->Is##type();                                           \
-  }                                                                     \
+#define DECLARE_INSTRUCTION(type)                                         \
+  InstructionKind GetKindInternal() const OVERRIDE { return k##type; }    \
+  const char* DebugName() const OVERRIDE { return #type; }                \
+  bool InstructionTypeEquals(const HInstruction* other) const OVERRIDE {  \
+    return other->Is##type();                                             \
+  }                                                                       \
   void Accept(HGraphVisitor* visitor) OVERRIDE
 
-template <typename T> class HUseList;
+#define DECLARE_ABSTRACT_INSTRUCTION(type)                              \
+  bool Is##type() const { return As##type() != nullptr; }               \
+  const H##type* As##type() const { return this; }                      \
+  H##type* As##type() { return this; }
 
 template <typename T>
 class HUseListNode : public ArenaObject<kArenaAllocUseListNode> {
  public:
-  HUseListNode* GetPrevious() const { return prev_; }
-  HUseListNode* GetNext() const { return next_; }
   T GetUser() const { return user_; }
   size_t GetIndex() const { return index_; }
   void SetIndex(size_t index) { index_ = index; }
 
+  // Hook for the IntrusiveForwardList<>.
+  // TODO: Hide this better.
+  IntrusiveForwardListHook hook;
+
  private:
   HUseListNode(T user, size_t index)
-      : user_(user), index_(index), prev_(nullptr), next_(nullptr) {}
+      : user_(user), index_(index) {}
 
   T const user_;
   size_t index_;
-  HUseListNode<T>* prev_;
-  HUseListNode<T>* next_;
 
-  friend class HUseList<T>;
+  friend class HInstruction;
 
   DISALLOW_COPY_AND_ASSIGN(HUseListNode);
 };
 
 template <typename T>
-class HUseList : public ValueObject {
- public:
-  HUseList() : first_(nullptr) {}
-
-  void Clear() {
-    first_ = nullptr;
-  }
-
-  // Adds a new entry at the beginning of the use list and returns
-  // the newly created node.
-  HUseListNode<T>* AddUse(T user, size_t index, ArenaAllocator* arena) {
-    HUseListNode<T>* new_node = new (arena) HUseListNode<T>(user, index);
-    if (IsEmpty()) {
-      first_ = new_node;
-    } else {
-      first_->prev_ = new_node;
-      new_node->next_ = first_;
-      first_ = new_node;
-    }
-    return new_node;
-  }
-
-  HUseListNode<T>* GetFirst() const {
-    return first_;
-  }
-
-  void Remove(HUseListNode<T>* node) {
-    DCHECK(node != nullptr);
-    DCHECK(Contains(node));
-
-    if (node->prev_ != nullptr) {
-      node->prev_->next_ = node->next_;
-    }
-    if (node->next_ != nullptr) {
-      node->next_->prev_ = node->prev_;
-    }
-    if (node == first_) {
-      first_ = node->next_;
-    }
-  }
-
-  bool Contains(const HUseListNode<T>* node) const {
-    if (node == nullptr) {
-      return false;
-    }
-    for (HUseListNode<T>* current = first_; current != nullptr; current = current->GetNext()) {
-      if (current == node) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  bool IsEmpty() const {
-    return first_ == nullptr;
-  }
-
-  bool HasOnlyOneUse() const {
-    return first_ != nullptr && first_->next_ == nullptr;
-  }
-
-  size_t SizeSlow() const {
-    size_t count = 0;
-    for (HUseListNode<T>* current = first_; current != nullptr; current = current->GetNext()) {
-      ++count;
-    }
-    return count;
-  }
-
- private:
-  HUseListNode<T>* first_;
-};
-
-template<typename T>
-class HUseIterator : public ValueObject {
- public:
-  explicit HUseIterator(const HUseList<T>& uses) : current_(uses.GetFirst()) {}
-
-  bool Done() const { return current_ == nullptr; }
-
-  void Advance() {
-    DCHECK(!Done());
-    current_ = current_->GetNext();
-  }
-
-  HUseListNode<T>* Current() const {
-    DCHECK(!Done());
-    return current_;
-  }
-
- private:
-  HUseListNode<T>* current_;
-
-  friend class HValue;
-};
+using HUseList = IntrusiveForwardList<HUseListNode<T>>;
 
 // This class is used by HEnvironment and HInstruction classes to record the
 // instructions they use and pointers to the corresponding HUseListNodes kept
@@ -1260,27 +1398,43 @@
 template <typename T>
 class HUserRecord : public ValueObject {
  public:
-  HUserRecord() : instruction_(nullptr), use_node_(nullptr) {}
-  explicit HUserRecord(HInstruction* instruction) : instruction_(instruction), use_node_(nullptr) {}
+  HUserRecord() : instruction_(nullptr), before_use_node_() {}
+  explicit HUserRecord(HInstruction* instruction) : instruction_(instruction), before_use_node_() {}
 
-  HUserRecord(const HUserRecord<T>& old_record, HUseListNode<T>* use_node)
-    : instruction_(old_record.instruction_), use_node_(use_node) {
+  HUserRecord(const HUserRecord<T>& old_record, typename HUseList<T>::iterator before_use_node)
+      : HUserRecord(old_record.instruction_, before_use_node) {}
+  HUserRecord(HInstruction* instruction, typename HUseList<T>::iterator before_use_node)
+      : instruction_(instruction), before_use_node_(before_use_node) {
     DCHECK(instruction_ != nullptr);
-    DCHECK(use_node_ != nullptr);
-    DCHECK(old_record.use_node_ == nullptr);
   }
 
   HInstruction* GetInstruction() const { return instruction_; }
-  HUseListNode<T>* GetUseNode() const { return use_node_; }
+  typename HUseList<T>::iterator GetBeforeUseNode() const { return before_use_node_; }
+  typename HUseList<T>::iterator GetUseNode() const { return ++GetBeforeUseNode(); }
 
  private:
   // Instruction used by the user.
   HInstruction* instruction_;
 
-  // Corresponding entry in the use list kept by 'instruction_'.
-  HUseListNode<T>* use_node_;
+  // Iterator before the corresponding entry in the use list kept by 'instruction_'.
+  typename HUseList<T>::iterator before_use_node_;
 };
 
+// Helper class that extracts the input instruction from HUserRecord<HInstruction*>.
+// This is used for HInstruction::GetInputs() to return a container wrapper providing
+// HInstruction* values even though the underlying container has HUserRecord<>s.
+struct HInputExtractor {
+  HInstruction* operator()(HUserRecord<HInstruction*>& record) const {
+    return record.GetInstruction();
+  }
+  const HInstruction* operator()(const HUserRecord<HInstruction*>& record) const {
+    return record.GetInstruction();
+  }
+};
+
+using HInputsRef = TransformArrayRef<HUserRecord<HInstruction*>, HInputExtractor>;
+using HConstInputsRef = TransformArrayRef<const HUserRecord<HInstruction*>, HInputExtractor>;
+
 /**
  * Side-effects representation.
  *
@@ -1349,21 +1503,21 @@
   static SideEffects FieldWriteOfType(Primitive::Type type, bool is_volatile) {
     return is_volatile
         ? AllWritesAndReads()
-        : SideEffects(TypeFlagWithAlias(type, kFieldWriteOffset));
+        : SideEffects(TypeFlag(type, kFieldWriteOffset));
   }
 
   static SideEffects ArrayWriteOfType(Primitive::Type type) {
-    return SideEffects(TypeFlagWithAlias(type, kArrayWriteOffset));
+    return SideEffects(TypeFlag(type, kArrayWriteOffset));
   }
 
   static SideEffects FieldReadOfType(Primitive::Type type, bool is_volatile) {
     return is_volatile
         ? AllWritesAndReads()
-        : SideEffects(TypeFlagWithAlias(type, kFieldReadOffset));
+        : SideEffects(TypeFlag(type, kFieldReadOffset));
   }
 
   static SideEffects ArrayReadOfType(Primitive::Type type) {
-    return SideEffects(TypeFlagWithAlias(type, kArrayReadOffset));
+    return SideEffects(TypeFlag(type, kArrayReadOffset));
   }
 
   static SideEffects CanTriggerGC() {
@@ -1424,7 +1578,7 @@
     return flags_ == (kAllChangeBits | kAllDependOnBits);
   }
 
-  // Returns true if this may read something written by other.
+  // Returns true if `this` may read something written by `other`.
   bool MayDependOn(SideEffects other) const {
     const uint64_t depends_on_flags = (flags_ & kAllDependOnBits) >> kChangeBits;
     return (other.flags_ & depends_on_flags);
@@ -1490,23 +1644,6 @@
   static constexpr uint64_t kAllReads =
       ((1ULL << (kLastBitForReads + 1 - kFieldReadOffset)) - 1) << kFieldReadOffset;
 
-  // Work around the fact that HIR aliases I/F and J/D.
-  // TODO: remove this interceptor once HIR types are clean
-  static uint64_t TypeFlagWithAlias(Primitive::Type type, int offset) {
-    switch (type) {
-      case Primitive::kPrimInt:
-      case Primitive::kPrimFloat:
-        return TypeFlag(Primitive::kPrimInt, offset) |
-               TypeFlag(Primitive::kPrimFloat, offset);
-      case Primitive::kPrimLong:
-      case Primitive::kPrimDouble:
-        return TypeFlag(Primitive::kPrimLong, offset) |
-               TypeFlag(Primitive::kPrimDouble, offset);
-      default:
-        return TypeFlag(type, offset);
-    }
-  }
-
   // Translates type to bit flag.
   static uint64_t TypeFlag(Primitive::Type type, int offset) {
     CHECK_NE(type, Primitive::kPrimVoid);
@@ -1614,15 +1751,12 @@
     return holder_;
   }
 
- private:
-  // Record instructions' use entries of this environment for constant-time removal.
-  // It should only be called by HInstruction when a new environment use is added.
-  void RecordEnvUse(HUseListNode<HEnvironment*>* env_use) {
-    DCHECK(env_use->GetUser() == this);
-    size_t index = env_use->GetIndex();
-    vregs_[index] = HUserRecord<HEnvironment*>(vregs_[index], env_use);
+
+  bool IsFromInlinedInvoke() const {
+    return GetParent() != nullptr;
   }
 
+ private:
   ArenaVector<HUserRecord<HEnvironment*>> vregs_;
   ArenaVector<Location> locations_;
   HEnvironment* parent_;
@@ -1639,115 +1773,6 @@
   DISALLOW_COPY_AND_ASSIGN(HEnvironment);
 };
 
-class ReferenceTypeInfo : ValueObject {
- public:
-  typedef Handle<mirror::Class> TypeHandle;
-
-  static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact) {
-    // The constructor will check that the type_handle is valid.
-    return ReferenceTypeInfo(type_handle, is_exact);
-  }
-
-  static ReferenceTypeInfo CreateInvalid() { return ReferenceTypeInfo(); }
-
-  static bool IsValidHandle(TypeHandle handle) SHARED_REQUIRES(Locks::mutator_lock_) {
-    return handle.GetReference() != nullptr;
-  }
-
-  bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return IsValidHandle(type_handle_);
-  }
-
-  bool IsExact() const { return is_exact_; }
-
-  bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsObjectClass();
-  }
-
-  bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsStringClass();
-  }
-
-  bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass();
-  }
-
-  bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsInterface();
-  }
-
-  bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsArrayClass();
-  }
-
-  bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsPrimitiveArray();
-  }
-
-  bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray();
-  }
-
-  bool CanArrayHold(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    if (!IsExact()) return false;
-    if (!IsArrayClass()) return false;
-    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get());
-  }
-
-  bool CanArrayHoldValuesOf(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    if (!IsExact()) return false;
-    if (!IsArrayClass()) return false;
-    if (!rti.IsArrayClass()) return false;
-    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(
-        rti.GetTypeHandle()->GetComponentType());
-  }
-
-  Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
-
-  bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    DCHECK(rti.IsValid());
-    return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
-  }
-
-  // Returns true if the type information provide the same amount of details.
-  // Note that it does not mean that the instructions have the same actual type
-  // (because the type can be the result of a merge).
-  bool IsEqual(ReferenceTypeInfo rti) SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (!IsValid() && !rti.IsValid()) {
-      // Invalid types are equal.
-      return true;
-    }
-    if (!IsValid() || !rti.IsValid()) {
-      // One is valid, the other not.
-      return false;
-    }
-    return IsExact() == rti.IsExact()
-        && GetTypeHandle().Get() == rti.GetTypeHandle().Get();
-  }
-
- private:
-  ReferenceTypeInfo();
-  ReferenceTypeInfo(TypeHandle type_handle, bool is_exact);
-
-  // The class of the object.
-  TypeHandle type_handle_;
-  // Whether or not the type is exact or a superclass of the actual type.
-  // Whether or not we have any information about this type.
-  bool is_exact_;
-};
-
-std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs);
-
 class HInstruction : public ArenaObject<kArenaAllocInstruction> {
  public:
   HInstruction(SideEffects side_effects, uint32_t dex_pc)
@@ -1757,12 +1782,15 @@
         dex_pc_(dex_pc),
         id_(-1),
         ssa_index_(-1),
+        packed_fields_(0u),
         environment_(nullptr),
         locations_(nullptr),
         live_interval_(nullptr),
         lifetime_position_(kNoLifetime),
         side_effects_(side_effects),
-        reference_type_info_(ReferenceTypeInfo::CreateInvalid()) {}
+        reference_type_handle_(ReferenceTypeInfo::CreateInvalid().GetTypeHandle()) {
+    SetPackedFlag<kFlagReferenceTypeIsExact>(ReferenceTypeInfo::CreateInvalid().IsExact());
+  }
 
   virtual ~HInstruction() {}
 
@@ -1783,18 +1811,38 @@
   void SetBlock(HBasicBlock* block) { block_ = block; }
   bool IsInBlock() const { return block_ != nullptr; }
   bool IsInLoop() const { return block_->IsInLoop(); }
-  bool IsLoopHeaderPhi() { return IsPhi() && block_->IsLoopHeader(); }
+  bool IsLoopHeaderPhi() const { return IsPhi() && block_->IsLoopHeader(); }
+  bool IsIrreducibleLoopHeaderPhi() const {
+    return IsLoopHeaderPhi() && GetBlock()->GetLoopInformation()->IsIrreducible();
+  }
 
-  virtual size_t InputCount() const = 0;
+  virtual ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() = 0;
+
+  ArrayRef<const HUserRecord<HInstruction*>> GetInputRecords() const {
+    // One virtual method is enough, just const_cast<> and then re-add the const.
+    return ArrayRef<const HUserRecord<HInstruction*>>(
+        const_cast<HInstruction*>(this)->GetInputRecords());
+  }
+
+  HInputsRef GetInputs() {
+    return MakeTransformArrayRef(GetInputRecords(), HInputExtractor());
+  }
+
+  HConstInputsRef GetInputs() const {
+    return MakeTransformArrayRef(GetInputRecords(), HInputExtractor());
+  }
+
+  size_t InputCount() const { return GetInputRecords().size(); }
   HInstruction* InputAt(size_t i) const { return InputRecordAt(i).GetInstruction(); }
 
+  void SetRawInputAt(size_t index, HInstruction* input) {
+    SetRawInputRecordAt(index, HUserRecord<HInstruction*>(input));
+  }
+
   virtual void Accept(HGraphVisitor* visitor) = 0;
   virtual const char* DebugName() const = 0;
 
   virtual Primitive::Type GetType() const { return Primitive::kPrimVoid; }
-  void SetRawInputAt(size_t index, HInstruction* input) {
-    SetRawInputRecordAt(index, HUserRecord<HInstruction*>(input));
-  }
 
   virtual bool NeedsEnvironment() const { return false; }
 
@@ -1820,40 +1868,61 @@
     return false;
   }
 
+  virtual bool IsActualObject() const {
+    return GetType() == Primitive::kPrimNot;
+  }
+
   void SetReferenceTypeInfo(ReferenceTypeInfo rti);
 
   ReferenceTypeInfo GetReferenceTypeInfo() const {
     DCHECK_EQ(GetType(), Primitive::kPrimNot);
-    return reference_type_info_;
+    return ReferenceTypeInfo::CreateUnchecked(reference_type_handle_,
+                                              GetPackedFlag<kFlagReferenceTypeIsExact>());
   }
 
   void AddUseAt(HInstruction* user, size_t index) {
     DCHECK(user != nullptr);
-    HUseListNode<HInstruction*>* use =
-        uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena());
-    user->SetRawInputRecordAt(index, HUserRecord<HInstruction*>(user->InputRecordAt(index), use));
+    // Note: fixup_end remains valid across push_front().
+    auto fixup_end = uses_.empty() ? uses_.begin() : ++uses_.begin();
+    HUseListNode<HInstruction*>* new_node =
+        new (GetBlock()->GetGraph()->GetArena()) HUseListNode<HInstruction*>(user, index);
+    uses_.push_front(*new_node);
+    FixUpUserRecordsAfterUseInsertion(fixup_end);
   }
 
   void AddEnvUseAt(HEnvironment* user, size_t index) {
     DCHECK(user != nullptr);
-    HUseListNode<HEnvironment*>* env_use =
-        env_uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena());
-    user->RecordEnvUse(env_use);
+    // Note: env_fixup_end remains valid across push_front().
+    auto env_fixup_end = env_uses_.empty() ? env_uses_.begin() : ++env_uses_.begin();
+    HUseListNode<HEnvironment*>* new_node =
+        new (GetBlock()->GetGraph()->GetArena()) HUseListNode<HEnvironment*>(user, index);
+    env_uses_.push_front(*new_node);
+    FixUpUserRecordsAfterEnvUseInsertion(env_fixup_end);
   }
 
   void RemoveAsUserOfInput(size_t input) {
     HUserRecord<HInstruction*> input_use = InputRecordAt(input);
-    input_use.GetInstruction()->uses_.Remove(input_use.GetUseNode());
+    HUseList<HInstruction*>::iterator before_use_node = input_use.GetBeforeUseNode();
+    input_use.GetInstruction()->uses_.erase_after(before_use_node);
+    input_use.GetInstruction()->FixUpUserRecordsAfterUseRemoval(before_use_node);
+  }
+
+  void RemoveAsUserOfAllInputs() {
+    for (const HUserRecord<HInstruction*>& input_use : GetInputRecords()) {
+      HUseList<HInstruction*>::iterator before_use_node = input_use.GetBeforeUseNode();
+      input_use.GetInstruction()->uses_.erase_after(before_use_node);
+      input_use.GetInstruction()->FixUpUserRecordsAfterUseRemoval(before_use_node);
+    }
   }
 
   const HUseList<HInstruction*>& GetUses() const { return uses_; }
   const HUseList<HEnvironment*>& GetEnvUses() const { return env_uses_; }
 
-  bool HasUses() const { return !uses_.IsEmpty() || !env_uses_.IsEmpty(); }
-  bool HasEnvironmentUses() const { return !env_uses_.IsEmpty(); }
-  bool HasNonEnvironmentUses() const { return !uses_.IsEmpty(); }
+  bool HasUses() const { return !uses_.empty() || !env_uses_.empty(); }
+  bool HasEnvironmentUses() const { return !env_uses_.empty(); }
+  bool HasNonEnvironmentUses() const { return !uses_.empty(); }
   bool HasOnlyOneNonEnvironmentUse() const {
-    return !HasEnvironmentUses() && GetUses().HasOnlyOneUse();
+    return !HasEnvironmentUses() && GetUses().HasExactlyOneElement();
   }
 
   // Does this instruction strictly dominate `other_instruction`?
@@ -1878,6 +1947,16 @@
     environment_ = environment;
   }
 
+  void InsertRawEnvironment(HEnvironment* environment) {
+    DCHECK(environment_ != nullptr);
+    DCHECK_EQ(environment->GetHolder(), this);
+    DCHECK(environment->GetParent() == nullptr);
+    environment->parent_ = environment_;
+    environment_ = environment;
+  }
+
+  void RemoveEnvironment();
+
   // Set the environment of this instruction, copying it from `environment`. While
   // copying, the uses lists are being updated.
   void CopyEnvironmentFrom(HEnvironment* environment) {
@@ -1921,45 +2000,66 @@
   // Move `this` instruction before `cursor`.
   void MoveBefore(HInstruction* cursor);
 
+  // Move `this` before its first user and out of any loops. If there is no
+  // out-of-loop user that dominates all other users, move the instruction
+  // to the end of the out-of-loop common dominator of the user's blocks.
+  //
+  // This can be used only on non-throwing instructions with no side effects that
+  // have at least one use but no environment uses.
+  void MoveBeforeFirstUserAndOutOfLoops();
+
+#define INSTRUCTION_TYPE_CHECK(type, super)                                    \
+  bool Is##type() const;                                                       \
+  const H##type* As##type() const;                                             \
+  H##type* As##type();
+
+  FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
+#undef INSTRUCTION_TYPE_CHECK
+
 #define INSTRUCTION_TYPE_CHECK(type, super)                                    \
   bool Is##type() const { return (As##type() != nullptr); }                    \
   virtual const H##type* As##type() const { return nullptr; }                  \
   virtual H##type* As##type() { return nullptr; }
-
-  FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
+  FOR_EACH_ABSTRACT_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
 #undef INSTRUCTION_TYPE_CHECK
 
   // Returns whether the instruction can be moved within the graph.
   virtual bool CanBeMoved() const { return false; }
 
   // Returns whether the two instructions are of the same kind.
-  virtual bool InstructionTypeEquals(HInstruction* other ATTRIBUTE_UNUSED) const {
+  virtual bool InstructionTypeEquals(const HInstruction* other ATTRIBUTE_UNUSED) const {
     return false;
   }
 
   // Returns whether any data encoded in the two instructions is equal.
   // This method does not look at the inputs. Both instructions must be
   // of the same type, otherwise the method has undefined behavior.
-  virtual bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const {
+  virtual bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const {
     return false;
   }
 
   // Returns whether two instructions are equal, that is:
   // 1) They have the same type and contain the same data (InstructionDataEquals).
   // 2) Their inputs are identical.
-  bool Equals(HInstruction* other) const;
+  bool Equals(const HInstruction* other) const;
 
-  virtual InstructionKind GetKind() const = 0;
+  // TODO: Remove this indirection when the [[pure]] attribute proposal (n3744)
+  // is adopted and implemented by our C++ compiler(s). Fow now, we need to hide
+  // the virtual function because the __attribute__((__pure__)) doesn't really
+  // apply the strong requirement for virtual functions, preventing optimizations.
+  InstructionKind GetKind() const PURE;
+  virtual InstructionKind GetKindInternal() const = 0;
 
   virtual size_t ComputeHashCode() const {
     size_t result = GetKind();
-    for (size_t i = 0, e = InputCount(); i < e; ++i) {
-      result = (result * 31) + InputAt(i)->GetId();
+    for (const HInstruction* input : GetInputs()) {
+      result = (result * 31) + input->GetId();
     }
     return result;
   }
 
   SideEffects GetSideEffects() const { return side_effects_; }
+  void SetSideEffects(SideEffects other) { side_effects_ = other; }
   void AddSideEffects(SideEffects other) { side_effects_.Add(other); }
 
   size_t GetLifetimePosition() const { return lifetime_position_; }
@@ -1980,7 +2080,9 @@
     return NeedsEnvironment() || IsLoadClass() || IsLoadString();
   }
 
-  virtual bool NeedsDexCache() const { return false; }
+  // Returns whether the code generation of the instruction will require to have access
+  // to the dex cache of the current method's declaring class via the current method.
+  virtual bool NeedsDexCacheOfDeclaringClass() const { return false; }
 
   // Does this instruction have any use in an environment before
   // control flow hits 'other'?
@@ -1990,12 +2092,91 @@
   // The caller must ensure that this is safe to do.
   void RemoveEnvironmentUsers();
 
+  bool IsEmittedAtUseSite() const { return GetPackedFlag<kFlagEmittedAtUseSite>(); }
+  void MarkEmittedAtUseSite() { SetPackedFlag<kFlagEmittedAtUseSite>(true); }
+
  protected:
-  virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0;
-  virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0;
+  // If set, the machine code for this instruction is assumed to be generated by
+  // its users. Used by liveness analysis to compute use positions accordingly.
+  static constexpr size_t kFlagEmittedAtUseSite = 0u;
+  static constexpr size_t kFlagReferenceTypeIsExact = kFlagEmittedAtUseSite + 1;
+  static constexpr size_t kNumberOfGenericPackedBits = kFlagReferenceTypeIsExact + 1;
+  static constexpr size_t kMaxNumberOfPackedBits = sizeof(uint32_t) * kBitsPerByte;
+
+  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const {
+    return GetInputRecords()[i];
+  }
+
+  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) {
+    ArrayRef<HUserRecord<HInstruction*>> input_records = GetInputRecords();
+    input_records[index] = input;
+  }
+
+  uint32_t GetPackedFields() const {
+    return packed_fields_;
+  }
+
+  template <size_t flag>
+  bool GetPackedFlag() const {
+    return (packed_fields_ & (1u << flag)) != 0u;
+  }
+
+  template <size_t flag>
+  void SetPackedFlag(bool value = true) {
+    packed_fields_ = (packed_fields_ & ~(1u << flag)) | ((value ? 1u : 0u) << flag);
+  }
+
+  template <typename BitFieldType>
+  typename BitFieldType::value_type GetPackedField() const {
+    return BitFieldType::Decode(packed_fields_);
+  }
+
+  template <typename BitFieldType>
+  void SetPackedField(typename BitFieldType::value_type value) {
+    DCHECK(IsUint<BitFieldType::size>(static_cast<uintptr_t>(value)));
+    packed_fields_ = BitFieldType::Update(value, packed_fields_);
+  }
 
  private:
-  void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use_node) { env_uses_.Remove(use_node); }
+  void FixUpUserRecordsAfterUseInsertion(HUseList<HInstruction*>::iterator fixup_end) {
+    auto before_use_node = uses_.before_begin();
+    for (auto use_node = uses_.begin(); use_node != fixup_end; ++use_node) {
+      HInstruction* user = use_node->GetUser();
+      size_t input_index = use_node->GetIndex();
+      user->SetRawInputRecordAt(input_index, HUserRecord<HInstruction*>(this, before_use_node));
+      before_use_node = use_node;
+    }
+  }
+
+  void FixUpUserRecordsAfterUseRemoval(HUseList<HInstruction*>::iterator before_use_node) {
+    auto next = ++HUseList<HInstruction*>::iterator(before_use_node);
+    if (next != uses_.end()) {
+      HInstruction* next_user = next->GetUser();
+      size_t next_index = next->GetIndex();
+      DCHECK(next_user->InputRecordAt(next_index).GetInstruction() == this);
+      next_user->SetRawInputRecordAt(next_index, HUserRecord<HInstruction*>(this, before_use_node));
+    }
+  }
+
+  void FixUpUserRecordsAfterEnvUseInsertion(HUseList<HEnvironment*>::iterator env_fixup_end) {
+    auto before_env_use_node = env_uses_.before_begin();
+    for (auto env_use_node = env_uses_.begin(); env_use_node != env_fixup_end; ++env_use_node) {
+      HEnvironment* user = env_use_node->GetUser();
+      size_t input_index = env_use_node->GetIndex();
+      user->vregs_[input_index] = HUserRecord<HEnvironment*>(this, before_env_use_node);
+      before_env_use_node = env_use_node;
+    }
+  }
+
+  void FixUpUserRecordsAfterEnvUseRemoval(HUseList<HEnvironment*>::iterator before_env_use_node) {
+    auto next = ++HUseList<HEnvironment*>::iterator(before_env_use_node);
+    if (next != env_uses_.end()) {
+      HEnvironment* next_user = next->GetUser();
+      size_t next_index = next->GetIndex();
+      DCHECK(next_user->vregs_[next_index].GetInstruction() == this);
+      next_user->vregs_[next_index] = HUserRecord<HEnvironment*>(this, before_env_use_node);
+    }
+  }
 
   HInstruction* previous_;
   HInstruction* next_;
@@ -2010,6 +2191,9 @@
   // When doing liveness analysis, instructions that have uses get an SSA index.
   int ssa_index_;
 
+  // Packed fields.
+  uint32_t packed_fields_;
+
   // List of instructions that have this instruction as input.
   HUseList<HInstruction*> uses_;
 
@@ -2032,8 +2216,10 @@
 
   SideEffects side_effects_;
 
+  // The reference handle part of the reference type info.
+  // The IsExact() flag is stored in packed fields.
   // TODO: for primitive types this should be marked as invalid.
-  ReferenceTypeInfo reference_type_info_;
+  ReferenceTypeInfo::TypeHandle reference_type_handle_;
 
   friend class GraphChecker;
   friend class HBasicBlock;
@@ -2045,21 +2231,6 @@
 };
 std::ostream& operator<<(std::ostream& os, const HInstruction::InstructionKind& rhs);
 
-class HInputIterator : public ValueObject {
- public:
-  explicit HInputIterator(HInstruction* instruction) : instruction_(instruction), index_(0) {}
-
-  bool Done() const { return index_ == instruction_->InputCount(); }
-  HInstruction* Current() const { return instruction_->InputAt(index_); }
-  void Advance() { index_++; }
-
- private:
-  HInstruction* instruction_;
-  size_t index_;
-
-  DISALLOW_COPY_AND_ASSIGN(HInputIterator);
-};
-
 class HInstructionIterator : public ValueObject {
  public:
   explicit HInstructionIterator(const HInstructionList& instructions)
@@ -2109,17 +2280,9 @@
       : HInstruction(side_effects, dex_pc), inputs_() {}
   virtual ~HTemplateInstruction() {}
 
-  size_t InputCount() const OVERRIDE { return N; }
-
- protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE {
-    DCHECK_LT(i, N);
-    return inputs_[i];
-  }
-
-  void SetRawInputRecordAt(size_t i, const HUserRecord<HInstruction*>& input) OVERRIDE {
-    DCHECK_LT(i, N);
-    inputs_[i] = input;
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+    return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
   }
 
  private:
@@ -2137,18 +2300,9 @@
 
   virtual ~HTemplateInstruction() {}
 
-  size_t InputCount() const OVERRIDE { return 0; }
-
- protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t i ATTRIBUTE_UNUSED) const OVERRIDE {
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
-  }
-
-  void SetRawInputRecordAt(size_t i ATTRIBUTE_UNUSED,
-                           const HUserRecord<HInstruction*>& input ATTRIBUTE_UNUSED) OVERRIDE {
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+    return ArrayRef<HUserRecord<HInstruction*>>();
   }
 
  private:
@@ -2159,18 +2313,28 @@
 class HExpression : public HTemplateInstruction<N> {
  public:
   HExpression<N>(Primitive::Type type, SideEffects side_effects, uint32_t dex_pc)
-      : HTemplateInstruction<N>(side_effects, dex_pc), type_(type) {}
+      : HTemplateInstruction<N>(side_effects, dex_pc) {
+    this->template SetPackedField<TypeField>(type);
+  }
   virtual ~HExpression() {}
 
-  Primitive::Type GetType() const OVERRIDE { return type_; }
+  Primitive::Type GetType() const OVERRIDE {
+    return TypeField::Decode(this->GetPackedFields());
+  }
 
  protected:
-  Primitive::Type type_;
+  static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldTypeSize =
+      MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast));
+  static constexpr size_t kNumberOfExpressionPackedBits = kFieldType + kFieldTypeSize;
+  static_assert(kNumberOfExpressionPackedBits <= HInstruction::kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+  using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>;
 };
 
 // Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow
 // instruction that branches to the exit block.
-class HReturnVoid : public HTemplateInstruction<0> {
+class HReturnVoid FINAL : public HTemplateInstruction<0> {
  public:
   explicit HReturnVoid(uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {}
@@ -2185,7 +2349,7 @@
 
 // Represents dex's RETURN opcodes. A HReturn is a control flow
 // instruction that branches to the exit block.
-class HReturn : public HTemplateInstruction<1> {
+class HReturn FINAL : public HTemplateInstruction<1> {
  public:
   explicit HReturn(HInstruction* value, uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {
@@ -2200,10 +2364,105 @@
   DISALLOW_COPY_AND_ASSIGN(HReturn);
 };
 
+class HPhi FINAL : public HInstruction {
+ public:
+  HPhi(ArenaAllocator* arena,
+       uint32_t reg_number,
+       size_t number_of_inputs,
+       Primitive::Type type,
+       uint32_t dex_pc = kNoDexPc)
+      : HInstruction(SideEffects::None(), dex_pc),
+        inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)),
+        reg_number_(reg_number) {
+    SetPackedField<TypeField>(ToPhiType(type));
+    DCHECK_NE(GetType(), Primitive::kPrimVoid);
+    // Phis are constructed live and marked dead if conflicting or unused.
+    // Individual steps of SsaBuilder should assume that if a phi has been
+    // marked dead, it can be ignored and will be removed by SsaPhiElimination.
+    SetPackedFlag<kFlagIsLive>(true);
+    SetPackedFlag<kFlagCanBeNull>(true);
+  }
+
+  // Returns a type equivalent to the given `type`, but that a `HPhi` can hold.
+  static Primitive::Type ToPhiType(Primitive::Type type) {
+    return Primitive::PrimitiveKind(type);
+  }
+
+  bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); }
+
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+    return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
+  }
+
+  void AddInput(HInstruction* input);
+  void RemoveInputAt(size_t index);
+
+  Primitive::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); }
+  void SetType(Primitive::Type new_type) {
+    // Make sure that only valid type changes occur. The following are allowed:
+    //  (1) int  -> float/ref (primitive type propagation),
+    //  (2) long -> double (primitive type propagation).
+    DCHECK(GetType() == new_type ||
+           (GetType() == Primitive::kPrimInt && new_type == Primitive::kPrimFloat) ||
+           (GetType() == Primitive::kPrimInt && new_type == Primitive::kPrimNot) ||
+           (GetType() == Primitive::kPrimLong && new_type == Primitive::kPrimDouble));
+    SetPackedField<TypeField>(new_type);
+  }
+
+  bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
+  void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); }
+
+  uint32_t GetRegNumber() const { return reg_number_; }
+
+  void SetDead() { SetPackedFlag<kFlagIsLive>(false); }
+  void SetLive() { SetPackedFlag<kFlagIsLive>(true); }
+  bool IsDead() const { return !IsLive(); }
+  bool IsLive() const { return GetPackedFlag<kFlagIsLive>(); }
+
+  bool IsVRegEquivalentOf(const HInstruction* other) const {
+    return other != nullptr
+        && other->IsPhi()
+        && other->AsPhi()->GetBlock() == GetBlock()
+        && other->AsPhi()->GetRegNumber() == GetRegNumber();
+  }
+
+  // Returns the next equivalent phi (starting from the current one) or null if there is none.
+  // An equivalent phi is a phi having the same dex register and type.
+  // It assumes that phis with the same dex register are adjacent.
+  HPhi* GetNextEquivalentPhiWithSameType() {
+    HInstruction* next = GetNext();
+    while (next != nullptr && next->AsPhi()->GetRegNumber() == reg_number_) {
+      if (next->GetType() == GetType()) {
+        return next->AsPhi();
+      }
+      next = next->GetNext();
+    }
+    return nullptr;
+  }
+
+  DECLARE_INSTRUCTION(Phi);
+
+ private:
+  static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldTypeSize =
+      MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast));
+  static constexpr size_t kFlagIsLive = kFieldType + kFieldTypeSize;
+  static constexpr size_t kFlagCanBeNull = kFlagIsLive + 1;
+  static constexpr size_t kNumberOfPhiPackedBits = kFlagCanBeNull + 1;
+  static_assert(kNumberOfPhiPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+  using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>;
+
+  ArenaVector<HUserRecord<HInstruction*>> inputs_;
+  const uint32_t reg_number_;
+
+  DISALLOW_COPY_AND_ASSIGN(HPhi);
+};
+
 // The exit instruction is the only instruction of the exit block.
 // Instructions aborting the method (HThrow and HReturn) must branch to the
 // exit block.
-class HExit : public HTemplateInstruction<0> {
+class HExit FINAL : public HTemplateInstruction<0> {
  public:
   explicit HExit(uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc) {}
 
@@ -2216,7 +2475,7 @@
 };
 
 // Jumps from one block to another.
-class HGoto : public HTemplateInstruction<0> {
+class HGoto FINAL : public HTemplateInstruction<0> {
  public:
   explicit HGoto(uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc) {}
 
@@ -2239,21 +2498,26 @@
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
+  // Is this constant -1 in the arithmetic sense?
   virtual bool IsMinusOne() const { return false; }
-  virtual bool IsZero() const { return false; }
+  // Is this constant 0 in the arithmetic sense?
+  virtual bool IsArithmeticZero() const { return false; }
+  // Is this constant a 0-bit pattern?
+  virtual bool IsZeroBitPattern() const { return false; }
+  // Is this constant 1 in the arithmetic sense?
   virtual bool IsOne() const { return false; }
 
   virtual uint64_t GetValueAsUint64() const = 0;
 
-  DECLARE_INSTRUCTION(Constant);
+  DECLARE_ABSTRACT_INSTRUCTION(Constant);
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HConstant);
 };
 
-class HNullConstant : public HConstant {
+class HNullConstant FINAL : public HConstant {
  public:
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -2261,6 +2525,9 @@
 
   size_t ComputeHashCode() const OVERRIDE { return 0; }
 
+  // The null constant representation is a 0-bit pattern.
+  virtual bool IsZeroBitPattern() const { return true; }
+
   DECLARE_INSTRUCTION(NullConstant);
 
  private:
@@ -2272,7 +2539,7 @@
 
 // Constants of the type int. Those can be from Dex instructions, or
 // synthesized (for example with the if-eqz instruction).
-class HIntConstant : public HConstant {
+class HIntConstant FINAL : public HConstant {
  public:
   int32_t GetValue() const { return value_; }
 
@@ -2280,17 +2547,23 @@
     return static_cast<uint64_t>(static_cast<uint32_t>(value_));
   }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    DCHECK(other->IsIntConstant());
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    DCHECK(other->IsIntConstant()) << other->DebugName();
     return other->AsIntConstant()->value_ == value_;
   }
 
   size_t ComputeHashCode() const OVERRIDE { return GetValue(); }
 
   bool IsMinusOne() const OVERRIDE { return GetValue() == -1; }
-  bool IsZero() const OVERRIDE { return GetValue() == 0; }
+  bool IsArithmeticZero() const OVERRIDE { return GetValue() == 0; }
+  bool IsZeroBitPattern() const OVERRIDE { return GetValue() == 0; }
   bool IsOne() const OVERRIDE { return GetValue() == 1; }
 
+  // Integer constants are used to encode Boolean values as well,
+  // where 1 means true and 0 means false.
+  bool IsTrue() const { return GetValue() == 1; }
+  bool IsFalse() const { return GetValue() == 0; }
+
   DECLARE_INSTRUCTION(IntConstant);
 
  private:
@@ -2307,21 +2580,22 @@
   DISALLOW_COPY_AND_ASSIGN(HIntConstant);
 };
 
-class HLongConstant : public HConstant {
+class HLongConstant FINAL : public HConstant {
  public:
   int64_t GetValue() const { return value_; }
 
   uint64_t GetValueAsUint64() const OVERRIDE { return value_; }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    DCHECK(other->IsLongConstant());
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    DCHECK(other->IsLongConstant()) << other->DebugName();
     return other->AsLongConstant()->value_ == value_;
   }
 
   size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
 
   bool IsMinusOne() const OVERRIDE { return GetValue() == -1; }
-  bool IsZero() const OVERRIDE { return GetValue() == 0; }
+  bool IsArithmeticZero() const OVERRIDE { return GetValue() == 0; }
+  bool IsZeroBitPattern() const OVERRIDE { return GetValue() == 0; }
   bool IsOne() const OVERRIDE { return GetValue() == 1; }
 
   DECLARE_INSTRUCTION(LongConstant);
@@ -2336,9 +2610,113 @@
   DISALLOW_COPY_AND_ASSIGN(HLongConstant);
 };
 
+class HFloatConstant FINAL : public HConstant {
+ public:
+  float GetValue() const { return value_; }
+
+  uint64_t GetValueAsUint64() const OVERRIDE {
+    return static_cast<uint64_t>(bit_cast<uint32_t, float>(value_));
+  }
+
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    DCHECK(other->IsFloatConstant()) << other->DebugName();
+    return other->AsFloatConstant()->GetValueAsUint64() == GetValueAsUint64();
+  }
+
+  size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
+
+  bool IsMinusOne() const OVERRIDE {
+    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>((-1.0f));
+  }
+  bool IsArithmeticZero() const OVERRIDE {
+    return std::fpclassify(value_) == FP_ZERO;
+  }
+  bool IsArithmeticPositiveZero() const {
+    return IsArithmeticZero() && !std::signbit(value_);
+  }
+  bool IsArithmeticNegativeZero() const {
+    return IsArithmeticZero() && std::signbit(value_);
+  }
+  bool IsZeroBitPattern() const OVERRIDE {
+    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(0.0f);
+  }
+  bool IsOne() const OVERRIDE {
+    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(1.0f);
+  }
+  bool IsNaN() const {
+    return std::isnan(value_);
+  }
+
+  DECLARE_INSTRUCTION(FloatConstant);
+
+ private:
+  explicit HFloatConstant(float value, uint32_t dex_pc = kNoDexPc)
+      : HConstant(Primitive::kPrimFloat, dex_pc), value_(value) {}
+  explicit HFloatConstant(int32_t value, uint32_t dex_pc = kNoDexPc)
+      : HConstant(Primitive::kPrimFloat, dex_pc), value_(bit_cast<float, int32_t>(value)) {}
+
+  const float value_;
+
+  // Only the SsaBuilder and HGraph can create floating-point constants.
+  friend class SsaBuilder;
+  friend class HGraph;
+  DISALLOW_COPY_AND_ASSIGN(HFloatConstant);
+};
+
+class HDoubleConstant FINAL : public HConstant {
+ public:
+  double GetValue() const { return value_; }
+
+  uint64_t GetValueAsUint64() const OVERRIDE { return bit_cast<uint64_t, double>(value_); }
+
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    DCHECK(other->IsDoubleConstant()) << other->DebugName();
+    return other->AsDoubleConstant()->GetValueAsUint64() == GetValueAsUint64();
+  }
+
+  size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
+
+  bool IsMinusOne() const OVERRIDE {
+    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((-1.0));
+  }
+  bool IsArithmeticZero() const OVERRIDE {
+    return std::fpclassify(value_) == FP_ZERO;
+  }
+  bool IsArithmeticPositiveZero() const {
+    return IsArithmeticZero() && !std::signbit(value_);
+  }
+  bool IsArithmeticNegativeZero() const {
+    return IsArithmeticZero() && std::signbit(value_);
+  }
+  bool IsZeroBitPattern() const OVERRIDE {
+    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((0.0));
+  }
+  bool IsOne() const OVERRIDE {
+    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>(1.0);
+  }
+  bool IsNaN() const {
+    return std::isnan(value_);
+  }
+
+  DECLARE_INSTRUCTION(DoubleConstant);
+
+ private:
+  explicit HDoubleConstant(double value, uint32_t dex_pc = kNoDexPc)
+      : HConstant(Primitive::kPrimDouble, dex_pc), value_(value) {}
+  explicit HDoubleConstant(int64_t value, uint32_t dex_pc = kNoDexPc)
+      : HConstant(Primitive::kPrimDouble, dex_pc), value_(bit_cast<double, int64_t>(value)) {}
+
+  const double value_;
+
+  // Only the SsaBuilder and HGraph can create floating-point constants.
+  friend class SsaBuilder;
+  friend class HGraph;
+  DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
+};
+
 // Conditional branch. A block ending with an HIf instruction must have
 // two successors.
-class HIf : public HTemplateInstruction<1> {
+class HIf FINAL : public HTemplateInstruction<1> {
  public:
   explicit HIf(HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {
@@ -2367,21 +2745,28 @@
 // non-exceptional control flow.
 // Normal-flow successor is stored at index zero, exception handlers under
 // higher indices in no particular order.
-class HTryBoundary : public HTemplateInstruction<0> {
+class HTryBoundary FINAL : public HTemplateInstruction<0> {
  public:
-  enum BoundaryKind {
+  enum class BoundaryKind {
     kEntry,
     kExit,
+    kLast = kExit
   };
 
   explicit HTryBoundary(BoundaryKind kind, uint32_t dex_pc = kNoDexPc)
-      : HTemplateInstruction(SideEffects::None(), dex_pc), kind_(kind) {}
+      : HTemplateInstruction(SideEffects::None(), dex_pc) {
+    SetPackedField<BoundaryKindField>(kind);
+  }
 
   bool IsControlFlow() const OVERRIDE { return true; }
 
   // Returns the block's non-exceptional successor (index zero).
   HBasicBlock* GetNormalFlowSuccessor() const { return GetBlock()->GetSuccessors()[0]; }
 
+  ArrayRef<HBasicBlock* const> GetExceptionHandlers() const {
+    return ArrayRef<HBasicBlock* const>(GetBlock()->GetSuccessors()).SubArray(1u);
+  }
+
   // Returns whether `handler` is among its exception handlers (non-zero index
   // successors).
   bool HasExceptionHandler(const HBasicBlock& handler) const {
@@ -2397,45 +2782,40 @@
     }
   }
 
-  bool IsEntry() const { return kind_ == BoundaryKind::kEntry; }
+  BoundaryKind GetBoundaryKind() const { return GetPackedField<BoundaryKindField>(); }
+  bool IsEntry() const { return GetBoundaryKind() == BoundaryKind::kEntry; }
 
   bool HasSameExceptionHandlersAs(const HTryBoundary& other) const;
 
   DECLARE_INSTRUCTION(TryBoundary);
 
  private:
-  const BoundaryKind kind_;
+  static constexpr size_t kFieldBoundaryKind = kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldBoundaryKindSize =
+      MinimumBitsToStore(static_cast<size_t>(BoundaryKind::kLast));
+  static constexpr size_t kNumberOfTryBoundaryPackedBits =
+      kFieldBoundaryKind + kFieldBoundaryKindSize;
+  static_assert(kNumberOfTryBoundaryPackedBits <= kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+  using BoundaryKindField = BitField<BoundaryKind, kFieldBoundaryKind, kFieldBoundaryKindSize>;
 
   DISALLOW_COPY_AND_ASSIGN(HTryBoundary);
 };
 
-// Iterator over exception handlers of a given HTryBoundary, i.e. over
-// exceptional successors of its basic block.
-class HExceptionHandlerIterator : public ValueObject {
- public:
-  explicit HExceptionHandlerIterator(const HTryBoundary& try_boundary)
-    : block_(*try_boundary.GetBlock()), index_(block_.NumberOfNormalSuccessors()) {}
-
-  bool Done() const { return index_ == block_.GetSuccessors().size(); }
-  HBasicBlock* Current() const { return block_.GetSuccessors()[index_]; }
-  size_t CurrentSuccessorIndex() const { return index_; }
-  void Advance() { ++index_; }
-
- private:
-  const HBasicBlock& block_;
-  size_t index_;
-
-  DISALLOW_COPY_AND_ASSIGN(HExceptionHandlerIterator);
-};
-
 // Deoptimize to interpreter, upon checking a condition.
-class HDeoptimize : public HTemplateInstruction<1> {
+class HDeoptimize FINAL : public HTemplateInstruction<1> {
  public:
-  explicit HDeoptimize(HInstruction* cond, uint32_t dex_pc)
-      : HTemplateInstruction(SideEffects::None(), dex_pc) {
+  // We set CanTriggerGC to prevent any intermediate address to be live
+  // at the point of the `HDeoptimize`.
+  HDeoptimize(HInstruction* cond, uint32_t dex_pc)
+      : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
     SetRawInputAt(0, cond);
   }
 
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
   bool NeedsEnvironment() const OVERRIDE { return true; }
   bool CanThrow() const OVERRIDE { return true; }
 
@@ -2448,7 +2828,7 @@
 // Represents the ArtMethod that was passed as a first argument to
 // the method. It is used by instructions that depend on it, like
 // instructions that work with the dex cache.
-class HCurrentMethod : public HExpression<0> {
+class HCurrentMethod FINAL : public HExpression<0> {
  public:
   explicit HCurrentMethod(Primitive::Type type, uint32_t dex_pc = kNoDexPc)
       : HExpression(type, SideEffects::None(), dex_pc) {}
@@ -2459,10 +2839,56 @@
   DISALLOW_COPY_AND_ASSIGN(HCurrentMethod);
 };
 
+// Fetches an ArtMethod from the virtual table or the interface method table
+// of a class.
+class HClassTableGet FINAL : public HExpression<1> {
+ public:
+  enum class TableKind {
+    kVTable,
+    kIMTable,
+    kLast = kIMTable
+  };
+  HClassTableGet(HInstruction* cls,
+                 Primitive::Type type,
+                 TableKind kind,
+                 size_t index,
+                 uint32_t dex_pc)
+      : HExpression(type, SideEffects::None(), dex_pc),
+        index_(index) {
+    SetPackedField<TableKindField>(kind);
+    SetRawInputAt(0, cls);
+  }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    return other->AsClassTableGet()->GetIndex() == index_ &&
+        other->AsClassTableGet()->GetPackedFields() == GetPackedFields();
+  }
+
+  TableKind GetTableKind() const { return GetPackedField<TableKindField>(); }
+  size_t GetIndex() const { return index_; }
+
+  DECLARE_INSTRUCTION(ClassTableGet);
+
+ private:
+  static constexpr size_t kFieldTableKind = kNumberOfExpressionPackedBits;
+  static constexpr size_t kFieldTableKindSize =
+      MinimumBitsToStore(static_cast<size_t>(TableKind::kLast));
+  static constexpr size_t kNumberOfClassTableGetPackedBits = kFieldTableKind + kFieldTableKindSize;
+  static_assert(kNumberOfClassTableGetPackedBits <= kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+  using TableKindField = BitField<TableKind, kFieldTableKind, kFieldTableKind>;
+
+  // The index of the ArtMethod in the table.
+  const size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HClassTableGet);
+};
+
 // PackedSwitch (jump table). A block ending with a PackedSwitch instruction will
 // have one successor for each entry in the switch table, and the final successor
 // will be the block containing the next Dex opcode.
-class HPackedSwitch : public HTemplateInstruction<1> {
+class HPackedSwitch FINAL : public HTemplateInstruction<1> {
  public:
   HPackedSwitch(int32_t start_value,
                 uint32_t num_entries,
@@ -2504,20 +2930,22 @@
   Primitive::Type GetResultType() const { return GetType(); }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
-  // Try to statically evaluate `operation` and return a HConstant
-  // containing the result of this evaluation.  If `operation` cannot
+  // Try to statically evaluate `this` and return a HConstant
+  // containing the result of this evaluation.  If `this` cannot
   // be evaluated as a constant, return null.
   HConstant* TryStaticEvaluation() const;
 
   // Apply this operation to `x`.
   virtual HConstant* Evaluate(HIntConstant* x) const = 0;
   virtual HConstant* Evaluate(HLongConstant* x) const = 0;
+  virtual HConstant* Evaluate(HFloatConstant* x) const = 0;
+  virtual HConstant* Evaluate(HDoubleConstant* x) const = 0;
 
-  DECLARE_INSTRUCTION(UnaryOperation);
+  DECLARE_ABSTRACT_INSTRUCTION(UnaryOperation);
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HUnaryOperation);
@@ -2574,28 +3002,30 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
-  // Try to statically evaluate `operation` and return a HConstant
-  // containing the result of this evaluation.  If `operation` cannot
+  // Try to statically evaluate `this` and return a HConstant
+  // containing the result of this evaluation.  If `this` cannot
   // be evaluated as a constant, return null.
   HConstant* TryStaticEvaluation() const;
 
   // Apply this operation to `x` and `y`.
+  virtual HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
+                              HNullConstant* y ATTRIBUTE_UNUSED) const {
+    LOG(FATAL) << DebugName() << " is not defined for the (null, null) case.";
+    UNREACHABLE();
+  }
   virtual HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const = 0;
   virtual HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const = 0;
-  virtual HConstant* Evaluate(HIntConstant* x ATTRIBUTE_UNUSED,
-                              HLongConstant* y ATTRIBUTE_UNUSED) const {
-    VLOG(compiler) << DebugName() << " is not defined for the (int, long) case.";
-    return nullptr;
-  }
   virtual HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED,
                               HIntConstant* y ATTRIBUTE_UNUSED) const {
-    VLOG(compiler) << DebugName() << " is not defined for the (long, int) case.";
-    return nullptr;
+    LOG(FATAL) << DebugName() << " is not defined for the (long, int) case.";
+    UNREACHABLE();
   }
+  virtual HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const = 0;
+  virtual HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const = 0;
 
   // Returns an input that can legally be used as the right input and is
   // constant, or null.
@@ -2605,7 +3035,7 @@
   // one. Otherwise it returns null.
   HInstruction* GetLeastConstantLeft() const;
 
-  DECLARE_INSTRUCTION(BinaryOperation);
+  DECLARE_ABSTRACT_INSTRUCTION(BinaryOperation);
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HBinaryOperation);
@@ -2617,74 +3047,118 @@
   kNoBias,  // bias is not applicable (i.e. for long operation)
   kGtBias,  // return 1 for NaN comparisons
   kLtBias,  // return -1 for NaN comparisons
+  kLast = kLtBias
 };
 
+std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs);
+
 class HCondition : public HBinaryOperation {
  public:
   HCondition(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
-      : HBinaryOperation(Primitive::kPrimBoolean, first, second, SideEffects::None(), dex_pc),
-        needs_materialization_(true),
-        bias_(ComparisonBias::kNoBias) {}
-
-  bool NeedsMaterialization() const { return needs_materialization_; }
-  void ClearNeedsMaterialization() { needs_materialization_ = false; }
+      : HBinaryOperation(Primitive::kPrimBoolean, first, second, SideEffects::None(), dex_pc) {
+    SetPackedField<ComparisonBiasField>(ComparisonBias::kNoBias);
+  }
 
   // For code generation purposes, returns whether this instruction is just before
   // `instruction`, and disregard moves in between.
   bool IsBeforeWhenDisregardMoves(HInstruction* instruction) const;
 
-  DECLARE_INSTRUCTION(Condition);
+  DECLARE_ABSTRACT_INSTRUCTION(Condition);
 
   virtual IfCondition GetCondition() const = 0;
 
   virtual IfCondition GetOppositeCondition() const = 0;
 
-  bool IsGtBias() const { return bias_ == ComparisonBias::kGtBias; }
+  bool IsGtBias() const { return GetBias() == ComparisonBias::kGtBias; }
+  bool IsLtBias() const { return GetBias() == ComparisonBias::kLtBias; }
 
-  void SetBias(ComparisonBias bias) { bias_ = bias; }
+  ComparisonBias GetBias() const { return GetPackedField<ComparisonBiasField>(); }
+  void SetBias(ComparisonBias bias) { SetPackedField<ComparisonBiasField>(bias); }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return bias_ == other->AsCondition()->bias_;
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    return GetPackedFields() == other->AsCondition()->GetPackedFields();
   }
 
   bool IsFPConditionTrueIfNaN() const {
-    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType()));
+    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
     IfCondition if_cond = GetCondition();
-    return IsGtBias() ? ((if_cond == kCondGT) || (if_cond == kCondGE)) : (if_cond == kCondNE);
+    if (if_cond == kCondNE) {
+      return true;
+    } else if (if_cond == kCondEQ) {
+      return false;
+    }
+    return ((if_cond == kCondGT) || (if_cond == kCondGE)) && IsGtBias();
   }
 
   bool IsFPConditionFalseIfNaN() const {
-    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType()));
+    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
     IfCondition if_cond = GetCondition();
-    return IsGtBias() ? ((if_cond == kCondLT) || (if_cond == kCondLE)) : (if_cond == kCondEQ);
+    if (if_cond == kCondEQ) {
+      return true;
+    } else if (if_cond == kCondNE) {
+      return false;
+    }
+    return ((if_cond == kCondLT) || (if_cond == kCondLE)) && IsGtBias();
+  }
+
+ protected:
+  // Needed if we merge a HCompare into a HCondition.
+  static constexpr size_t kFieldComparisonBias = kNumberOfExpressionPackedBits;
+  static constexpr size_t kFieldComparisonBiasSize =
+      MinimumBitsToStore(static_cast<size_t>(ComparisonBias::kLast));
+  static constexpr size_t kNumberOfConditionPackedBits =
+      kFieldComparisonBias + kFieldComparisonBiasSize;
+  static_assert(kNumberOfConditionPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+  using ComparisonBiasField =
+      BitField<ComparisonBias, kFieldComparisonBias, kFieldComparisonBiasSize>;
+
+  template <typename T>
+  int32_t Compare(T x, T y) const { return x > y ? 1 : (x < y ? -1 : 0); }
+
+  template <typename T>
+  int32_t CompareFP(T x, T y) const {
+    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
+    DCHECK_NE(GetBias(), ComparisonBias::kNoBias);
+    // Handle the bias.
+    return std::isunordered(x, y) ? (IsGtBias() ? 1 : -1) : Compare(x, y);
+  }
+
+  // Return an integer constant containing the result of a condition evaluated at compile time.
+  HIntConstant* MakeConstantCondition(bool value, uint32_t dex_pc) const {
+    return GetBlock()->GetGraph()->GetIntConstant(value, dex_pc);
   }
 
  private:
-  // For register allocation purposes, returns whether this instruction needs to be
-  // materialized (that is, not just be in the processor flags).
-  bool needs_materialization_;
-
-  // Needed if we merge a HCompare into a HCondition.
-  ComparisonBias bias_;
-
   DISALLOW_COPY_AND_ASSIGN(HCondition);
 };
 
 // Instruction to check if two inputs are equal to each other.
-class HEqual : public HCondition {
+class HEqual FINAL : public HCondition {
  public:
   HEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
+                      HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    return MakeConstantCondition(true, GetDexPc());
   }
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HEqual instruction; evaluate it as
+  // `Compare(x, y) == 0`.
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0),
+                                 GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(Equal);
@@ -2698,25 +3172,36 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x == y; }
+  template <typename T> static bool Compute(T x, T y) { return x == y; }
 
   DISALLOW_COPY_AND_ASSIGN(HEqual);
 };
 
-class HNotEqual : public HCondition {
+class HNotEqual FINAL : public HCondition {
  public:
   HNotEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
+                      HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    return MakeConstantCondition(false, GetDexPc());
   }
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HNotEqual instruction; evaluate it as
+  // `Compare(x, y) != 0`.
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(NotEqual);
@@ -2730,23 +3215,30 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x != y; }
+  template <typename T> static bool Compute(T x, T y) { return x != y; }
 
   DISALLOW_COPY_AND_ASSIGN(HNotEqual);
 };
 
-class HLessThan : public HCondition {
+class HLessThan FINAL : public HCondition {
  public:
   HLessThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HLessThan instruction; evaluate it as
+  // `Compare(x, y) < 0`.
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(LessThan);
@@ -2760,23 +3252,30 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x < y; }
+  template <typename T> static bool Compute(T x, T y) { return x < y; }
 
   DISALLOW_COPY_AND_ASSIGN(HLessThan);
 };
 
-class HLessThanOrEqual : public HCondition {
+class HLessThanOrEqual FINAL : public HCondition {
  public:
   HLessThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HLessThanOrEqual instruction; evaluate it as
+  // `Compare(x, y) <= 0`.
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(LessThanOrEqual);
@@ -2790,23 +3289,30 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x <= y; }
+  template <typename T> static bool Compute(T x, T y) { return x <= y; }
 
   DISALLOW_COPY_AND_ASSIGN(HLessThanOrEqual);
 };
 
-class HGreaterThan : public HCondition {
+class HGreaterThan FINAL : public HCondition {
  public:
   HGreaterThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HGreaterThan instruction; evaluate it as
+  // `Compare(x, y) > 0`.
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(GreaterThan);
@@ -2820,23 +3326,30 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x > y; }
+  template <typename T> static bool Compute(T x, T y) { return x > y; }
 
   DISALLOW_COPY_AND_ASSIGN(HGreaterThan);
 };
 
-class HGreaterThanOrEqual : public HCondition {
+class HGreaterThanOrEqual FINAL : public HCondition {
  public:
   HGreaterThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HGreaterThanOrEqual instruction; evaluate it as
+  // `Compare(x, y) >= 0`.
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(GreaterThanOrEqual);
@@ -2850,25 +3363,31 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x >= y; }
+  template <typename T> static bool Compute(T x, T y) { return x >= y; }
 
   DISALLOW_COPY_AND_ASSIGN(HGreaterThanOrEqual);
 };
 
-class HBelow : public HCondition {
+class HBelow FINAL : public HCondition {
  public:
   HBelow(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint32_t>(x->GetValue()),
-                static_cast<uint32_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint64_t>(x->GetValue()),
-                static_cast<uint64_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
   }
 
   DECLARE_INSTRUCTION(Below);
@@ -2882,25 +3401,33 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x < y; }
+  template <typename T> static bool Compute(T x, T y) {
+    return MakeUnsigned(x) < MakeUnsigned(y);
+  }
 
   DISALLOW_COPY_AND_ASSIGN(HBelow);
 };
 
-class HBelowOrEqual : public HCondition {
+class HBelowOrEqual FINAL : public HCondition {
  public:
   HBelowOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint32_t>(x->GetValue()),
-                static_cast<uint32_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint64_t>(x->GetValue()),
-                static_cast<uint64_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
   }
 
   DECLARE_INSTRUCTION(BelowOrEqual);
@@ -2914,25 +3441,33 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x <= y; }
+  template <typename T> static bool Compute(T x, T y) {
+    return MakeUnsigned(x) <= MakeUnsigned(y);
+  }
 
   DISALLOW_COPY_AND_ASSIGN(HBelowOrEqual);
 };
 
-class HAbove : public HCondition {
+class HAbove FINAL : public HCondition {
  public:
   HAbove(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint32_t>(x->GetValue()),
-                static_cast<uint32_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint64_t>(x->GetValue()),
-                static_cast<uint64_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
   }
 
   DECLARE_INSTRUCTION(Above);
@@ -2946,25 +3481,33 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x > y; }
+  template <typename T> static bool Compute(T x, T y) {
+    return MakeUnsigned(x) > MakeUnsigned(y);
+  }
 
   DISALLOW_COPY_AND_ASSIGN(HAbove);
 };
 
-class HAboveOrEqual : public HCondition {
+class HAboveOrEqual FINAL : public HCondition {
  public:
   HAboveOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint32_t>(x->GetValue()),
-                static_cast<uint32_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint64_t>(x->GetValue()),
-                static_cast<uint64_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
   }
 
   DECLARE_INSTRUCTION(AboveOrEqual);
@@ -2978,16 +3521,21 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x >= y; }
+  template <typename T> static bool Compute(T x, T y) {
+    return MakeUnsigned(x) >= MakeUnsigned(y);
+  }
 
   DISALLOW_COPY_AND_ASSIGN(HAboveOrEqual);
 };
 
 // Instruction to check how two inputs compare to each other.
 // Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1.
-class HCompare : public HBinaryOperation {
+class HCompare FINAL : public HBinaryOperation {
  public:
-  HCompare(Primitive::Type type,
+  // Note that `comparison_type` is the type of comparison performed
+  // between the comparison's inputs, not the type of the instantiated
+  // HCompare instruction (which is always Primitive::kPrimInt).
+  HCompare(Primitive::Type comparison_type,
            HInstruction* first,
            HInstruction* second,
            ComparisonBias bias,
@@ -2995,186 +3543,145 @@
       : HBinaryOperation(Primitive::kPrimInt,
                          first,
                          second,
-                         SideEffectsForArchRuntimeCalls(type),
-                         dex_pc),
-        bias_(bias) {
-    DCHECK_EQ(type, first->GetType());
-    DCHECK_EQ(type, second->GetType());
+                         SideEffectsForArchRuntimeCalls(comparison_type),
+                         dex_pc) {
+    SetPackedField<ComparisonBiasField>(bias);
+    DCHECK_EQ(comparison_type, Primitive::PrimitiveKind(first->GetType()));
+    DCHECK_EQ(comparison_type, Primitive::PrimitiveKind(second->GetType()));
   }
 
   template <typename T>
-  int32_t Compute(T x, T y) const { return x == y ? 0 : x > y ? 1 : -1; }
+  int32_t Compute(T x, T y) const { return x > y ? 1 : (x < y ? -1 : 0); }
+
+  template <typename T>
+  int32_t ComputeFP(T x, T y) const {
+    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
+    DCHECK_NE(GetBias(), ComparisonBias::kNoBias);
+    // Handle the bias.
+    return std::isunordered(x, y) ? (IsGtBias() ? 1 : -1) : Compute(x, y);
+  }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    // Note that there is no "cmp-int" Dex instruction so we shouldn't
+    // reach this code path when processing a freshly built HIR
+    // graph. However HCompare integer instructions can be synthesized
+    // by the instruction simplifier to implement IntegerCompare and
+    // IntegerSignum intrinsics, so we have to handle this case.
+    return MakeConstantComparison(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantComparison(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantComparison(ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantComparison(ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
   }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return bias_ == other->AsCompare()->bias_;
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    return GetPackedFields() == other->AsCompare()->GetPackedFields();
   }
 
-  ComparisonBias GetBias() const { return bias_; }
+  ComparisonBias GetBias() const { return GetPackedField<ComparisonBiasField>(); }
 
-  bool IsGtBias() { return bias_ == ComparisonBias::kGtBias; }
+  // Does this compare instruction have a "gt bias" (vs an "lt bias")?
+  // Only meaningful for floating-point comparisons.
+  bool IsGtBias() const {
+    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
+    return GetBias() == ComparisonBias::kGtBias;
+  }
 
-
-  static SideEffects SideEffectsForArchRuntimeCalls(Primitive::Type type) {
-    // MIPS64 uses a runtime call for FP comparisons.
-    return Primitive::IsFloatingPointType(type) ? SideEffects::CanTriggerGC() : SideEffects::None();
+  static SideEffects SideEffectsForArchRuntimeCalls(Primitive::Type type ATTRIBUTE_UNUSED) {
+    // Comparisons do not require a runtime call in any back end.
+    return SideEffects::None();
   }
 
   DECLARE_INSTRUCTION(Compare);
 
- private:
-  const ComparisonBias bias_;
+ protected:
+  static constexpr size_t kFieldComparisonBias = kNumberOfExpressionPackedBits;
+  static constexpr size_t kFieldComparisonBiasSize =
+      MinimumBitsToStore(static_cast<size_t>(ComparisonBias::kLast));
+  static constexpr size_t kNumberOfComparePackedBits =
+      kFieldComparisonBias + kFieldComparisonBiasSize;
+  static_assert(kNumberOfComparePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+  using ComparisonBiasField =
+      BitField<ComparisonBias, kFieldComparisonBias, kFieldComparisonBiasSize>;
 
+  // Return an integer constant containing the result of a comparison evaluated at compile time.
+  HIntConstant* MakeConstantComparison(int32_t value, uint32_t dex_pc) const {
+    DCHECK(value == -1 || value == 0 || value == 1) << value;
+    return GetBlock()->GetGraph()->GetIntConstant(value, dex_pc);
+  }
+
+ private:
   DISALLOW_COPY_AND_ASSIGN(HCompare);
 };
 
-// A local in the graph. Corresponds to a Dex register.
-class HLocal : public HTemplateInstruction<0> {
+class HNewInstance FINAL : public HExpression<2> {
  public:
-  explicit HLocal(uint16_t reg_number)
-      : HTemplateInstruction(SideEffects::None(), kNoDexPc), reg_number_(reg_number) {}
+  HNewInstance(HInstruction* cls,
+               HCurrentMethod* current_method,
+               uint32_t dex_pc,
+               uint16_t type_index,
+               const DexFile& dex_file,
+               bool needs_access_check,
+               bool finalizable,
+               QuickEntrypointEnum entrypoint)
+      : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc),
+        type_index_(type_index),
+        dex_file_(dex_file),
+        entrypoint_(entrypoint) {
+    SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
+    SetPackedFlag<kFlagFinalizable>(finalizable);
+    SetRawInputAt(0, cls);
+    SetRawInputAt(1, current_method);
+  }
 
-  DECLARE_INSTRUCTION(Local);
+  uint16_t GetTypeIndex() const { return type_index_; }
+  const DexFile& GetDexFile() const { return dex_file_; }
 
-  uint16_t GetRegNumber() const { return reg_number_; }
+  // Calls runtime so needs an environment.
+  bool NeedsEnvironment() const OVERRIDE { return true; }
+
+  // Can throw errors when out-of-memory or if it's not instantiable/accessible.
+  bool CanThrow() const OVERRIDE { return true; }
+
+  // Needs to call into runtime to make sure it's instantiable/accessible.
+  bool NeedsAccessCheck() const { return GetPackedFlag<kFlagNeedsAccessCheck>(); }
+
+  bool IsFinalizable() const { return GetPackedFlag<kFlagFinalizable>(); }
+
+  bool CanBeNull() const OVERRIDE { return false; }
+
+  QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; }
+
+  void SetEntrypoint(QuickEntrypointEnum entrypoint) {
+    entrypoint_ = entrypoint;
+  }
+
+  bool IsStringAlloc() const;
+
+  DECLARE_INSTRUCTION(NewInstance);
 
  private:
-  // The Dex register number.
-  const uint16_t reg_number_;
+  static constexpr size_t kFlagNeedsAccessCheck = kNumberOfExpressionPackedBits;
+  static constexpr size_t kFlagFinalizable = kFlagNeedsAccessCheck + 1;
+  static constexpr size_t kNumberOfNewInstancePackedBits = kFlagFinalizable + 1;
+  static_assert(kNumberOfNewInstancePackedBits <= kMaxNumberOfPackedBits,
+                "Too many packed fields.");
 
-  DISALLOW_COPY_AND_ASSIGN(HLocal);
-};
+  const uint16_t type_index_;
+  const DexFile& dex_file_;
+  QuickEntrypointEnum entrypoint_;
 
-// Load a given local. The local is an input of this instruction.
-class HLoadLocal : public HExpression<1> {
- public:
-  HLoadLocal(HLocal* local, Primitive::Type type, uint32_t dex_pc = kNoDexPc)
-      : HExpression(type, SideEffects::None(), dex_pc) {
-    SetRawInputAt(0, local);
-  }
-
-  HLocal* GetLocal() const { return reinterpret_cast<HLocal*>(InputAt(0)); }
-
-  DECLARE_INSTRUCTION(LoadLocal);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(HLoadLocal);
-};
-
-// Store a value in a given local. This instruction has two inputs: the value
-// and the local.
-class HStoreLocal : public HTemplateInstruction<2> {
- public:
-  HStoreLocal(HLocal* local, HInstruction* value, uint32_t dex_pc = kNoDexPc)
-      : HTemplateInstruction(SideEffects::None(), dex_pc) {
-    SetRawInputAt(0, local);
-    SetRawInputAt(1, value);
-  }
-
-  HLocal* GetLocal() const { return reinterpret_cast<HLocal*>(InputAt(0)); }
-
-  DECLARE_INSTRUCTION(StoreLocal);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(HStoreLocal);
-};
-
-class HFloatConstant : public HConstant {
- public:
-  float GetValue() const { return value_; }
-
-  uint64_t GetValueAsUint64() const OVERRIDE {
-    return static_cast<uint64_t>(bit_cast<uint32_t, float>(value_));
-  }
-
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    DCHECK(other->IsFloatConstant());
-    return other->AsFloatConstant()->GetValueAsUint64() == GetValueAsUint64();
-  }
-
-  size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
-
-  bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>((-1.0f));
-  }
-  bool IsZero() const OVERRIDE {
-    return value_ == 0.0f;
-  }
-  bool IsOne() const OVERRIDE {
-    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(1.0f);
-  }
-  bool IsNaN() const {
-    return std::isnan(value_);
-  }
-
-  DECLARE_INSTRUCTION(FloatConstant);
-
- private:
-  explicit HFloatConstant(float value, uint32_t dex_pc = kNoDexPc)
-      : HConstant(Primitive::kPrimFloat, dex_pc), value_(value) {}
-  explicit HFloatConstant(int32_t value, uint32_t dex_pc = kNoDexPc)
-      : HConstant(Primitive::kPrimFloat, dex_pc), value_(bit_cast<float, int32_t>(value)) {}
-
-  const float value_;
-
-  // Only the SsaBuilder and HGraph can create floating-point constants.
-  friend class SsaBuilder;
-  friend class HGraph;
-  DISALLOW_COPY_AND_ASSIGN(HFloatConstant);
-};
-
-class HDoubleConstant : public HConstant {
- public:
-  double GetValue() const { return value_; }
-
-  uint64_t GetValueAsUint64() const OVERRIDE { return bit_cast<uint64_t, double>(value_); }
-
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    DCHECK(other->IsDoubleConstant());
-    return other->AsDoubleConstant()->GetValueAsUint64() == GetValueAsUint64();
-  }
-
-  size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
-
-  bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((-1.0));
-  }
-  bool IsZero() const OVERRIDE {
-    return value_ == 0.0;
-  }
-  bool IsOne() const OVERRIDE {
-    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>(1.0);
-  }
-  bool IsNaN() const {
-    return std::isnan(value_);
-  }
-
-  DECLARE_INSTRUCTION(DoubleConstant);
-
- private:
-  explicit HDoubleConstant(double value, uint32_t dex_pc = kNoDexPc)
-      : HConstant(Primitive::kPrimDouble, dex_pc), value_(value) {}
-  explicit HDoubleConstant(int64_t value, uint32_t dex_pc = kNoDexPc)
-      : HConstant(Primitive::kPrimDouble, dex_pc), value_(bit_cast<double, int64_t>(value)) {}
-
-  const double value_;
-
-  // Only the SsaBuilder and HGraph can create floating-point constants.
-  friend class SsaBuilder;
-  friend class HGraph;
-  DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
+  DISALLOW_COPY_AND_ASSIGN(HNewInstance);
 };
 
 enum class Intrinsics {
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) k ## Name,
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+  k ## Name,
 #include "intrinsics_list.h"
   kNone,
   INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -3188,12 +3695,27 @@
   kNeedsEnvironmentOrCache      // Intrinsic requires an environment or requires a dex cache.
 };
 
+enum IntrinsicSideEffects {
+  kNoSideEffects,     // Intrinsic does not have any heap memory side effects.
+  kReadSideEffects,   // Intrinsic may read heap memory.
+  kWriteSideEffects,  // Intrinsic may write heap memory.
+  kAllSideEffects     // Intrinsic may read or write heap memory, or trigger GC.
+};
+
+enum IntrinsicExceptions {
+  kNoThrow,  // Intrinsic does not throw any exceptions.
+  kCanThrow  // Intrinsic may throw exceptions.
+};
+
 class HInvoke : public HInstruction {
  public:
-  size_t InputCount() const OVERRIDE { return inputs_.size(); }
-
   bool NeedsEnvironment() const OVERRIDE;
 
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE {
+    return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
+  }
+
   void SetArgumentAt(size_t index, HInstruction* argument) {
     SetRawInputAt(index, argument);
   }
@@ -3204,25 +3726,35 @@
   // inputs at the end of their list of inputs.
   uint32_t GetNumberOfArguments() const { return number_of_arguments_; }
 
-  Primitive::Type GetType() const OVERRIDE { return return_type_; }
-
+  Primitive::Type GetType() const OVERRIDE { return GetPackedField<ReturnTypeField>(); }
 
   uint32_t GetDexMethodIndex() const { return dex_method_index_; }
   const DexFile& GetDexFile() const { return GetEnvironment()->GetDexFile(); }
 
-  InvokeType GetOriginalInvokeType() const { return original_invoke_type_; }
+  InvokeType GetOriginalInvokeType() const {
+    return GetPackedField<OriginalInvokeTypeField>();
+  }
 
   Intrinsics GetIntrinsic() const {
     return intrinsic_;
   }
 
-  void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironmentOrCache needs_env_or_cache);
+  void SetIntrinsic(Intrinsics intrinsic,
+                    IntrinsicNeedsEnvironmentOrCache needs_env_or_cache,
+                    IntrinsicSideEffects side_effects,
+                    IntrinsicExceptions exceptions);
 
   bool IsFromInlinedInvoke() const {
-    return GetEnvironment()->GetParent() != nullptr;
+    return GetEnvironment()->IsFromInlinedInvoke();
   }
 
-  bool CanThrow() const OVERRIDE { return true; }
+  bool CanThrow() const OVERRIDE { return GetPackedFlag<kFlagCanThrow>(); }
+
+  bool CanBeMoved() const OVERRIDE { return IsIntrinsic(); }
+
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    return intrinsic_ != Intrinsics::kNone && intrinsic_ == other->AsInvoke()->intrinsic_;
+  }
 
   uint32_t* GetIntrinsicOptimizations() {
     return &intrinsic_optimizations_;
@@ -3234,9 +3766,23 @@
 
   bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; }
 
-  DECLARE_INSTRUCTION(Invoke);
+  DECLARE_ABSTRACT_INSTRUCTION(Invoke);
 
  protected:
+  static constexpr size_t kFieldOriginalInvokeType = kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldOriginalInvokeTypeSize =
+      MinimumBitsToStore(static_cast<size_t>(kMaxInvokeType));
+  static constexpr size_t kFieldReturnType =
+      kFieldOriginalInvokeType + kFieldOriginalInvokeTypeSize;
+  static constexpr size_t kFieldReturnTypeSize =
+      MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast));
+  static constexpr size_t kFlagCanThrow = kFieldReturnType + kFieldReturnTypeSize;
+  static constexpr size_t kNumberOfInvokePackedBits = kFlagCanThrow + 1;
+  static_assert(kNumberOfInvokePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+  using OriginalInvokeTypeField =
+      BitField<InvokeType, kFieldOriginalInvokeType, kFieldOriginalInvokeTypeSize>;
+  using ReturnTypeField = BitField<Primitive::Type, kFieldReturnType, kFieldReturnTypeSize>;
+
   HInvoke(ArenaAllocator* arena,
           uint32_t number_of_arguments,
           uint32_t number_of_other_inputs,
@@ -3249,26 +3795,19 @@
       number_of_arguments_(number_of_arguments),
       inputs_(number_of_arguments + number_of_other_inputs,
               arena->Adapter(kArenaAllocInvokeInputs)),
-      return_type_(return_type),
       dex_method_index_(dex_method_index),
-      original_invoke_type_(original_invoke_type),
       intrinsic_(Intrinsics::kNone),
       intrinsic_optimizations_(0) {
+    SetPackedField<ReturnTypeField>(return_type);
+    SetPackedField<OriginalInvokeTypeField>(original_invoke_type);
+    SetPackedFlag<kFlagCanThrow>(true);
   }
 
-  const HUserRecord<HInstruction*> InputRecordAt(size_t index) const OVERRIDE {
-    return inputs_[index];
-  }
-
-  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
-    inputs_[index] = input;
-  }
+  void SetCanThrow(bool can_throw) { SetPackedFlag<kFlagCanThrow>(can_throw); }
 
   uint32_t number_of_arguments_;
   ArenaVector<HUserRecord<HInstruction*>> inputs_;
-  const Primitive::Type return_type_;
   const uint32_t dex_method_index_;
-  const InvokeType original_invoke_type_;
   Intrinsics intrinsic_;
 
   // A magic word holding optimizations for intrinsics. See intrinsics.h.
@@ -3278,7 +3817,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInvoke);
 };
 
-class HInvokeUnresolved : public HInvoke {
+class HInvokeUnresolved FINAL : public HInvoke {
  public:
   HInvokeUnresolved(ArenaAllocator* arena,
                     uint32_t number_of_arguments,
@@ -3301,7 +3840,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeUnresolved);
 };
 
-class HInvokeStaticOrDirect : public HInvoke {
+class HInvokeStaticOrDirect FINAL : public HInvoke {
  public:
   // Requirements of this method call regarding the class
   // initialization (clinit) check of its declaring class.
@@ -3309,6 +3848,7 @@
     kNone,      // Class already initialized.
     kExplicit,  // Static call having explicit clinit check as last input.
     kImplicit,  // Static call implicitly requiring a clinit check.
+    kLast = kImplicit
   };
 
   // Determines how to load the target ArtMethod*.
@@ -3329,7 +3869,7 @@
     // the image relocatable or not.
     kDirectAddressWithFixup,
 
-    // Load from resoved methods array in the dex cache using a PC-relative load.
+    // Load from resolved methods array in the dex cache using a PC-relative load.
     // Used when we need to use the dex cache, for example for invoke-static that
     // may cause class initialization (the entry may point to a resolution method),
     // and we know that we can access the dex cache arrays using a PC-relative load.
@@ -3368,15 +3908,15 @@
   };
 
   struct DispatchInfo {
-    const MethodLoadKind method_load_kind;
-    const CodePtrLocation code_ptr_location;
+    MethodLoadKind method_load_kind;
+    CodePtrLocation code_ptr_location;
     // The method load data holds
     //   - thread entrypoint offset for kStringInit method if this is a string init invoke.
     //     Note that there are multiple string init methods, each having its own offset.
     //   - the method address for kDirectAddress
     //   - the dex cache arrays offset for kDexCachePcRel.
-    const uint64_t method_load_data;
-    const uint64_t direct_code_ptr;
+    uint64_t method_load_data;
+    uint64_t direct_code_ptr;
   };
 
   HInvokeStaticOrDirect(ArenaAllocator* arena,
@@ -3387,23 +3927,66 @@
                         MethodReference target_method,
                         DispatchInfo dispatch_info,
                         InvokeType original_invoke_type,
-                        InvokeType invoke_type,
+                        InvokeType optimized_invoke_type,
                         ClinitCheckRequirement clinit_check_requirement)
       : HInvoke(arena,
                 number_of_arguments,
-                // There is one extra argument for the HCurrentMethod node, and
-                // potentially one other if the clinit check is explicit, and one other
-                // if the method is a string factory.
-                1u + (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u)
-                   + (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u),
+                // There is potentially one extra argument for the HCurrentMethod node, and
+                // potentially one other if the clinit check is explicit, and potentially
+                // one other if the method is a string factory.
+                (NeedsCurrentMethodInput(dispatch_info.method_load_kind) ? 1u : 0u) +
+                    (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u),
                 return_type,
                 dex_pc,
                 method_index,
                 original_invoke_type),
-        invoke_type_(invoke_type),
-        clinit_check_requirement_(clinit_check_requirement),
         target_method_(target_method),
-        dispatch_info_(dispatch_info) {}
+        dispatch_info_(dispatch_info) {
+    SetPackedField<OptimizedInvokeTypeField>(optimized_invoke_type);
+    SetPackedField<ClinitCheckRequirementField>(clinit_check_requirement);
+  }
+
+  void SetDispatchInfo(const DispatchInfo& dispatch_info) {
+    bool had_current_method_input = HasCurrentMethodInput();
+    bool needs_current_method_input = NeedsCurrentMethodInput(dispatch_info.method_load_kind);
+
+    // Using the current method is the default and once we find a better
+    // method load kind, we should not go back to using the current method.
+    DCHECK(had_current_method_input || !needs_current_method_input);
+
+    if (had_current_method_input && !needs_current_method_input) {
+      DCHECK_EQ(InputAt(GetSpecialInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod());
+      RemoveInputAt(GetSpecialInputIndex());
+    }
+    dispatch_info_ = dispatch_info;
+  }
+
+  void AddSpecialInput(HInstruction* input) {
+    // We allow only one special input.
+    DCHECK(!IsStringInit() && !HasCurrentMethodInput());
+    DCHECK(InputCount() == GetSpecialInputIndex() ||
+           (InputCount() == GetSpecialInputIndex() + 1 && IsStaticWithExplicitClinitCheck()));
+    InsertInputAt(GetSpecialInputIndex(), input);
+  }
+
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE {
+    ArrayRef<HUserRecord<HInstruction*>> input_records = HInvoke::GetInputRecords();
+    if (kIsDebugBuild && IsStaticWithExplicitClinitCheck()) {
+      DCHECK(!input_records.empty());
+      DCHECK_GT(input_records.size(), GetNumberOfArguments());
+      HInstruction* last_input = input_records.back().GetInstruction();
+      // Note: `last_input` may be null during arguments setup.
+      if (last_input != nullptr) {
+        // `last_input` is the last input of a static invoke marked as having
+        // an explicit clinit check. It must either be:
+        // - an art::HClinitCheck instruction, set by art::HGraphBuilder; or
+        // - an art::HLoadClass instruction, set by art::PrepareForRegisterAllocation.
+        DCHECK(last_input->IsClinitCheck() || last_input->IsLoadClass()) << last_input->DebugName();
+      }
+    }
+    return input_records;
+  }
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
     // We access the method via the dex cache so we can't do an implicit null check.
@@ -3412,20 +3995,48 @@
   }
 
   bool CanBeNull() const OVERRIDE {
-    return return_type_ == Primitive::kPrimNot && !IsStringInit();
+    return GetPackedField<ReturnTypeField>() == Primitive::kPrimNot && !IsStringInit();
   }
 
-  InvokeType GetInvokeType() const { return invoke_type_; }
+  // Get the index of the special input, if any.
+  //
+  // If the invoke HasCurrentMethodInput(), the "special input" is the current
+  // method pointer; otherwise there may be one platform-specific special input,
+  // such as PC-relative addressing base.
+  uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); }
+  bool HasSpecialInput() const { return GetNumberOfArguments() != InputCount(); }
+
+  InvokeType GetOptimizedInvokeType() const {
+    return GetPackedField<OptimizedInvokeTypeField>();
+  }
+
+  void SetOptimizedInvokeType(InvokeType invoke_type) {
+    SetPackedField<OptimizedInvokeTypeField>(invoke_type);
+  }
+
   MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; }
   CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; }
   bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; }
-  bool NeedsDexCache() const OVERRIDE;
+  bool NeedsDexCacheOfDeclaringClass() const OVERRIDE;
   bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; }
-  uint32_t GetCurrentMethodInputIndex() const { return GetNumberOfArguments(); }
   bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; }
-  bool HasPcRelDexCache() const { return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; }
+  bool HasPcRelativeDexCache() const {
+    return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative;
+  }
+  bool HasCurrentMethodInput() const {
+    // This function can be called only after the invoke has been fully initialized by the builder.
+    if (NeedsCurrentMethodInput(GetMethodLoadKind())) {
+      DCHECK(InputAt(GetSpecialInputIndex())->IsCurrentMethod());
+      return true;
+    } else {
+      DCHECK(InputCount() == GetSpecialInputIndex() ||
+             !InputAt(GetSpecialInputIndex())->IsCurrentMethod());
+      return false;
+    }
+  }
   bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; }
   MethodReference GetTargetMethod() const { return target_method_; }
+  void SetTargetMethod(MethodReference method) { target_method_ = method; }
 
   int32_t GetStringInitOffset() const {
     DCHECK(IsStringInit());
@@ -3438,7 +4049,7 @@
   }
 
   uint32_t GetDexCacheArrayOffset() const {
-    DCHECK(HasPcRelDexCache());
+    DCHECK(HasPcRelativeDexCache());
     return dispatch_info_.method_load_data;
   }
 
@@ -3447,78 +4058,71 @@
     return dispatch_info_.direct_code_ptr;
   }
 
-  ClinitCheckRequirement GetClinitCheckRequirement() const { return clinit_check_requirement_; }
+  ClinitCheckRequirement GetClinitCheckRequirement() const {
+    return GetPackedField<ClinitCheckRequirementField>();
+  }
 
   // Is this instruction a call to a static method?
   bool IsStatic() const {
-    return GetInvokeType() == kStatic;
+    return GetOriginalInvokeType() == kStatic;
   }
 
-  // Remove the art::HLoadClass instruction set as last input by
-  // art::PrepareForRegisterAllocation::VisitClinitCheck in lieu of
-  // the initial art::HClinitCheck instruction (only relevant for
-  // static calls with explicit clinit check).
-  void RemoveLoadClassAsLastInput() {
+  // Remove the HClinitCheck or the replacement HLoadClass (set as last input by
+  // PrepareForRegisterAllocation::VisitClinitCheck() in lieu of the initial HClinitCheck)
+  // instruction; only relevant for static calls with explicit clinit check.
+  void RemoveExplicitClinitCheck(ClinitCheckRequirement new_requirement) {
     DCHECK(IsStaticWithExplicitClinitCheck());
-    size_t last_input_index = InputCount() - 1;
-    HInstruction* last_input = InputAt(last_input_index);
+    size_t last_input_index = inputs_.size() - 1u;
+    HInstruction* last_input = inputs_.back().GetInstruction();
     DCHECK(last_input != nullptr);
-    DCHECK(last_input->IsLoadClass()) << last_input->DebugName();
+    DCHECK(last_input->IsLoadClass() || last_input->IsClinitCheck()) << last_input->DebugName();
     RemoveAsUserOfInput(last_input_index);
     inputs_.pop_back();
-    clinit_check_requirement_ = ClinitCheckRequirement::kImplicit;
-    DCHECK(IsStaticWithImplicitClinitCheck());
-  }
-
-  bool IsStringFactoryFor(HFakeString* str) const {
-    if (!IsStringInit()) return false;
-    // +1 for the current method.
-    if (InputCount() == (number_of_arguments_ + 1)) return false;
-    return InputAt(InputCount() - 1)->AsFakeString() == str;
-  }
-
-  void RemoveFakeStringArgumentAsLastInput() {
-    DCHECK(IsStringInit());
-    size_t last_input_index = InputCount() - 1;
-    HInstruction* last_input = InputAt(last_input_index);
-    DCHECK(last_input != nullptr);
-    DCHECK(last_input->IsFakeString()) << last_input->DebugName();
-    RemoveAsUserOfInput(last_input_index);
-    inputs_.pop_back();
+    SetPackedField<ClinitCheckRequirementField>(new_requirement);
+    DCHECK(!IsStaticWithExplicitClinitCheck());
   }
 
   // Is this a call to a static method whose declaring class has an
-  // explicit intialization check in the graph?
+  // explicit initialization check in the graph?
   bool IsStaticWithExplicitClinitCheck() const {
-    return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kExplicit);
+    return IsStatic() && (GetClinitCheckRequirement() == ClinitCheckRequirement::kExplicit);
   }
 
   // Is this a call to a static method whose declaring class has an
   // implicit intialization check requirement?
   bool IsStaticWithImplicitClinitCheck() const {
-    return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kImplicit);
+    return IsStatic() && (GetClinitCheckRequirement() == ClinitCheckRequirement::kImplicit);
+  }
+
+  // Does this method load kind need the current method as an input?
+  static bool NeedsCurrentMethodInput(MethodLoadKind kind) {
+    return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kDexCacheViaMethod;
   }
 
   DECLARE_INSTRUCTION(InvokeStaticOrDirect);
 
  protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE {
-    const HUserRecord<HInstruction*> input_record = HInvoke::InputRecordAt(i);
-    if (kIsDebugBuild && IsStaticWithExplicitClinitCheck() && (i == InputCount() - 1)) {
-      HInstruction* input = input_record.GetInstruction();
-      // `input` is the last input of a static invoke marked as having
-      // an explicit clinit check. It must either be:
-      // - an art::HClinitCheck instruction, set by art::HGraphBuilder; or
-      // - an art::HLoadClass instruction, set by art::PrepareForRegisterAllocation.
-      DCHECK(input != nullptr);
-      DCHECK(input->IsClinitCheck() || input->IsLoadClass()) << input->DebugName();
-    }
-    return input_record;
-  }
+  void InsertInputAt(size_t index, HInstruction* input);
+  void RemoveInputAt(size_t index);
 
  private:
-  const InvokeType invoke_type_;
-  ClinitCheckRequirement clinit_check_requirement_;
+  static constexpr size_t kFieldOptimizedInvokeType = kNumberOfInvokePackedBits;
+  static constexpr size_t kFieldOptimizedInvokeTypeSize =
+      MinimumBitsToStore(static_cast<size_t>(kMaxInvokeType));
+  static constexpr size_t kFieldClinitCheckRequirement =
+      kFieldOptimizedInvokeType + kFieldOptimizedInvokeTypeSize;
+  static constexpr size_t kFieldClinitCheckRequirementSize =
+      MinimumBitsToStore(static_cast<size_t>(ClinitCheckRequirement::kLast));
+  static constexpr size_t kNumberOfInvokeStaticOrDirectPackedBits =
+      kFieldClinitCheckRequirement + kFieldClinitCheckRequirementSize;
+  static_assert(kNumberOfInvokeStaticOrDirectPackedBits <= kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+  using OptimizedInvokeTypeField =
+      BitField<InvokeType, kFieldOptimizedInvokeType, kFieldOptimizedInvokeTypeSize>;
+  using ClinitCheckRequirementField = BitField<ClinitCheckRequirement,
+                                               kFieldClinitCheckRequirement,
+                                               kFieldClinitCheckRequirementSize>;
+
   // The target method may refer to different dex file or method index than the original
   // invoke. This happens for sharpened calls and for calls where a method was redeclared
   // in derived class to increase visibility.
@@ -3527,8 +4131,10 @@
 
   DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
 };
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs);
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs);
 
-class HInvokeVirtual : public HInvoke {
+class HInvokeVirtual FINAL : public HInvoke {
  public:
   HInvokeVirtual(ArenaAllocator* arena,
                  uint32_t number_of_arguments,
@@ -3554,7 +4160,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeVirtual);
 };
 
-class HInvokeInterface : public HInvoke {
+class HInvokeInterface FINAL : public HInvoke {
  public:
   HInvokeInterface(ArenaAllocator* arena,
                    uint32_t number_of_arguments,
@@ -3581,51 +4187,14 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeInterface);
 };
 
-class HNewInstance : public HExpression<1> {
- public:
-  HNewInstance(HCurrentMethod* current_method,
-               uint32_t dex_pc,
-               uint16_t type_index,
-               const DexFile& dex_file,
-               QuickEntrypointEnum entrypoint)
-      : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc),
-        type_index_(type_index),
-        dex_file_(dex_file),
-        entrypoint_(entrypoint) {
-    SetRawInputAt(0, current_method);
-  }
-
-  uint16_t GetTypeIndex() const { return type_index_; }
-  const DexFile& GetDexFile() const { return dex_file_; }
-
-  // Calls runtime so needs an environment.
-  bool NeedsEnvironment() const OVERRIDE { return true; }
-  // It may throw when called on:
-  //   - interfaces
-  //   - abstract/innaccessible/unknown classes
-  // TODO: optimize when possible.
-  bool CanThrow() const OVERRIDE { return true; }
-
-  bool CanBeNull() const OVERRIDE { return false; }
-
-  QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; }
-
-  DECLARE_INSTRUCTION(NewInstance);
-
- private:
-  const uint16_t type_index_;
-  const DexFile& dex_file_;
-  const QuickEntrypointEnum entrypoint_;
-
-  DISALLOW_COPY_AND_ASSIGN(HNewInstance);
-};
-
-class HNeg : public HUnaryOperation {
+class HNeg FINAL : public HUnaryOperation {
  public:
   HNeg(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
-      : HUnaryOperation(result_type, input, dex_pc) {}
+      : HUnaryOperation(result_type, input, dex_pc) {
+    DCHECK_EQ(result_type, Primitive::PrimitiveKind(input->GetType()));
+  }
 
-  template <typename T> T Compute(T x) const { return -x; }
+  template <typename T> static T Compute(T x) { return -x; }
 
   HConstant* Evaluate(HIntConstant* x) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc());
@@ -3633,6 +4202,12 @@
   HConstant* Evaluate(HLongConstant* x) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(Compute(x->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(Compute(x->GetValue()), GetDexPc());
+  }
 
   DECLARE_INSTRUCTION(Neg);
 
@@ -3640,7 +4215,7 @@
   DISALLOW_COPY_AND_ASSIGN(HNeg);
 };
 
-class HNewArray : public HExpression<2> {
+class HNewArray FINAL : public HExpression<2> {
  public:
   HNewArray(HInstruction* length,
             HCurrentMethod* current_method,
@@ -3679,7 +4254,7 @@
   DISALLOW_COPY_AND_ASSIGN(HNewArray);
 };
 
-class HAdd : public HBinaryOperation {
+class HAdd FINAL : public HBinaryOperation {
  public:
   HAdd(Primitive::Type result_type,
        HInstruction* left,
@@ -3689,7 +4264,7 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T> T Compute(T x, T y) const { return x + y; }
+  template <typename T> static T Compute(T x, T y) { return x + y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
@@ -3699,6 +4274,14 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
 
   DECLARE_INSTRUCTION(Add);
 
@@ -3706,7 +4289,7 @@
   DISALLOW_COPY_AND_ASSIGN(HAdd);
 };
 
-class HSub : public HBinaryOperation {
+class HSub FINAL : public HBinaryOperation {
  public:
   HSub(Primitive::Type result_type,
        HInstruction* left,
@@ -3714,7 +4297,7 @@
        uint32_t dex_pc = kNoDexPc)
       : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {}
 
-  template <typename T> T Compute(T x, T y) const { return x - y; }
+  template <typename T> static T Compute(T x, T y) { return x - y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
@@ -3724,6 +4307,14 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
 
   DECLARE_INSTRUCTION(Sub);
 
@@ -3731,7 +4322,7 @@
   DISALLOW_COPY_AND_ASSIGN(HSub);
 };
 
-class HMul : public HBinaryOperation {
+class HMul FINAL : public HBinaryOperation {
  public:
   HMul(Primitive::Type result_type,
        HInstruction* left,
@@ -3741,7 +4332,7 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T> T Compute(T x, T y) const { return x * y; }
+  template <typename T> static T Compute(T x, T y) { return x * y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
@@ -3751,6 +4342,14 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
 
   DECLARE_INSTRUCTION(Mul);
 
@@ -3758,7 +4357,7 @@
   DISALLOW_COPY_AND_ASSIGN(HMul);
 };
 
-class HDiv : public HBinaryOperation {
+class HDiv FINAL : public HBinaryOperation {
  public:
   HDiv(Primitive::Type result_type,
        HInstruction* left,
@@ -3767,7 +4366,8 @@
       : HBinaryOperation(result_type, left, right, SideEffectsForArchRuntimeCalls(), dex_pc) {}
 
   template <typename T>
-  T Compute(T x, T y) const {
+  T ComputeIntegral(T x, T y) const {
+    DCHECK(!Primitive::IsFloatingPointType(GetType())) << GetType();
     // Our graph structure ensures we never have 0 for `y` during
     // constant folding.
     DCHECK_NE(y, 0);
@@ -3775,13 +4375,27 @@
     return (y == -1) ? -x : x / y;
   }
 
+  template <typename T>
+  T ComputeFP(T x, T y) const {
+    DCHECK(Primitive::IsFloatingPointType(GetType())) << GetType();
+    return x / y;
+  }
+
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+        ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+        ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(
+        ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(
+        ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
   }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
@@ -3795,7 +4409,7 @@
   DISALLOW_COPY_AND_ASSIGN(HDiv);
 };
 
-class HRem : public HBinaryOperation {
+class HRem FINAL : public HBinaryOperation {
  public:
   HRem(Primitive::Type result_type,
        HInstruction* left,
@@ -3804,7 +4418,8 @@
       : HBinaryOperation(result_type, left, right, SideEffectsForArchRuntimeCalls(), dex_pc) {}
 
   template <typename T>
-  T Compute(T x, T y) const {
+  T ComputeIntegral(T x, T y) const {
+    DCHECK(!Primitive::IsFloatingPointType(GetType())) << GetType();
     // Our graph structure ensures we never have 0 for `y` during
     // constant folding.
     DCHECK_NE(y, 0);
@@ -3812,15 +4427,28 @@
     return (y == -1) ? 0 : x % y;
   }
 
+  template <typename T>
+  T ComputeFP(T x, T y) const {
+    DCHECK(Primitive::IsFloatingPointType(GetType())) << GetType();
+    return std::fmod(x, y);
+  }
+
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+        ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+        ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
   }
-
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(
+        ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(
+        ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
+  }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
     return SideEffects::CanTriggerGC();
@@ -3832,10 +4460,12 @@
   DISALLOW_COPY_AND_ASSIGN(HRem);
 };
 
-class HDivZeroCheck : public HExpression<1> {
+class HDivZeroCheck FINAL : public HExpression<1> {
  public:
+  // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException`
+  // constructor.
   HDivZeroCheck(HInstruction* value, uint32_t dex_pc)
-      : HExpression(value->GetType(), SideEffects::None(), dex_pc) {
+      : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
     SetRawInputAt(0, value);
   }
 
@@ -3843,7 +4473,7 @@
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -3856,34 +4486,44 @@
   DISALLOW_COPY_AND_ASSIGN(HDivZeroCheck);
 };
 
-class HShl : public HBinaryOperation {
+class HShl FINAL : public HBinaryOperation {
  public:
   HShl(Primitive::Type result_type,
-       HInstruction* left,
-       HInstruction* right,
+       HInstruction* value,
+       HInstruction* distance,
        uint32_t dex_pc = kNoDexPc)
-      : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {}
-
-  template <typename T, typename U, typename V>
-  T Compute(T x, U y, V max_shift_value) const {
-    static_assert(std::is_same<V, typename std::make_unsigned<T>::type>::value,
-                  "V is not the unsigned integer type corresponding to T");
-    return x << (y & max_shift_value);
+      : HBinaryOperation(result_type, value, distance, SideEffects::None(), dex_pc) {
+    DCHECK_EQ(result_type, Primitive::PrimitiveKind(value->GetType()));
+    DCHECK_EQ(Primitive::kPrimInt, Primitive::PrimitiveKind(distance->GetType()));
   }
 
-  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+  template <typename T>
+  static T Compute(T value, int32_t distance, int32_t max_shift_distance) {
+    return value << (distance & max_shift_distance);
+  }
+
+  HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue), GetDexPc());
+        Compute(value->GetValue(), distance->GetValue(), kMaxIntShiftDistance), GetDexPc());
   }
-  // There is no `Evaluate(HIntConstant* x, HLongConstant* y)`, as this
-  // case is handled as `x << static_cast<int>(y)`.
-  HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE {
+  HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
+        Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
   }
-  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
+  HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
+                      HLongConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
+                      HFloatConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
+                      HDoubleConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
   }
 
   DECLARE_INSTRUCTION(Shl);
@@ -3892,34 +4532,44 @@
   DISALLOW_COPY_AND_ASSIGN(HShl);
 };
 
-class HShr : public HBinaryOperation {
+class HShr FINAL : public HBinaryOperation {
  public:
   HShr(Primitive::Type result_type,
-       HInstruction* left,
-       HInstruction* right,
+       HInstruction* value,
+       HInstruction* distance,
        uint32_t dex_pc = kNoDexPc)
-      : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {}
-
-  template <typename T, typename U, typename V>
-  T Compute(T x, U y, V max_shift_value) const {
-    static_assert(std::is_same<V, typename std::make_unsigned<T>::type>::value,
-                  "V is not the unsigned integer type corresponding to T");
-    return x >> (y & max_shift_value);
+      : HBinaryOperation(result_type, value, distance, SideEffects::None(), dex_pc) {
+    DCHECK_EQ(result_type, Primitive::PrimitiveKind(value->GetType()));
+    DCHECK_EQ(Primitive::kPrimInt, Primitive::PrimitiveKind(distance->GetType()));
   }
 
-  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+  template <typename T>
+  static T Compute(T value, int32_t distance, int32_t max_shift_distance) {
+    return value >> (distance & max_shift_distance);
+  }
+
+  HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue), GetDexPc());
+        Compute(value->GetValue(), distance->GetValue(), kMaxIntShiftDistance), GetDexPc());
   }
-  // There is no `Evaluate(HIntConstant* x, HLongConstant* y)`, as this
-  // case is handled as `x >> static_cast<int>(y)`.
-  HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE {
+  HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
+        Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
   }
-  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
+  HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
+                      HLongConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
+                      HFloatConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
+                      HDoubleConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
   }
 
   DECLARE_INSTRUCTION(Shr);
@@ -3928,35 +4578,46 @@
   DISALLOW_COPY_AND_ASSIGN(HShr);
 };
 
-class HUShr : public HBinaryOperation {
+class HUShr FINAL : public HBinaryOperation {
  public:
   HUShr(Primitive::Type result_type,
-        HInstruction* left,
-        HInstruction* right,
+        HInstruction* value,
+        HInstruction* distance,
         uint32_t dex_pc = kNoDexPc)
-      : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {}
-
-  template <typename T, typename U, typename V>
-  T Compute(T x, U y, V max_shift_value) const {
-    static_assert(std::is_same<V, typename std::make_unsigned<T>::type>::value,
-                  "V is not the unsigned integer type corresponding to T");
-    V ux = static_cast<V>(x);
-    return static_cast<T>(ux >> (y & max_shift_value));
+      : HBinaryOperation(result_type, value, distance, SideEffects::None(), dex_pc) {
+    DCHECK_EQ(result_type, Primitive::PrimitiveKind(value->GetType()));
+    DCHECK_EQ(Primitive::kPrimInt, Primitive::PrimitiveKind(distance->GetType()));
   }
 
-  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+  template <typename T>
+  static T Compute(T value, int32_t distance, int32_t max_shift_distance) {
+    typedef typename std::make_unsigned<T>::type V;
+    V ux = static_cast<V>(value);
+    return static_cast<T>(ux >> (distance & max_shift_distance));
+  }
+
+  HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue), GetDexPc());
+        Compute(value->GetValue(), distance->GetValue(), kMaxIntShiftDistance), GetDexPc());
   }
-  // There is no `Evaluate(HIntConstant* x, HLongConstant* y)`, as this
-  // case is handled as `x >>> static_cast<int>(y)`.
-  HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE {
+  HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
+        Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
   }
-  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
+  HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
+                      HLongConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
+                      HFloatConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
+                      HDoubleConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
   }
 
   DECLARE_INSTRUCTION(UShr);
@@ -3965,7 +4626,7 @@
   DISALLOW_COPY_AND_ASSIGN(HUShr);
 };
 
-class HAnd : public HBinaryOperation {
+class HAnd FINAL : public HBinaryOperation {
  public:
   HAnd(Primitive::Type result_type,
        HInstruction* left,
@@ -3975,25 +4636,26 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T, typename U>
-  auto Compute(T x, U y) const -> decltype(x & y) { return x & y; }
+  template <typename T> static T Compute(T x, T y) { return x & y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HIntConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
-  HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(And);
 
@@ -4001,7 +4663,7 @@
   DISALLOW_COPY_AND_ASSIGN(HAnd);
 };
 
-class HOr : public HBinaryOperation {
+class HOr FINAL : public HBinaryOperation {
  public:
   HOr(Primitive::Type result_type,
       HInstruction* left,
@@ -4011,25 +4673,26 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T, typename U>
-  auto Compute(T x, U y) const -> decltype(x | y) { return x | y; }
+  template <typename T> static T Compute(T x, T y) { return x | y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HIntConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
-  HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(Or);
 
@@ -4037,7 +4700,7 @@
   DISALLOW_COPY_AND_ASSIGN(HOr);
 };
 
-class HXor : public HBinaryOperation {
+class HXor FINAL : public HBinaryOperation {
  public:
   HXor(Primitive::Type result_type,
        HInstruction* left,
@@ -4047,25 +4710,26 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T, typename U>
-  auto Compute(T x, U y) const -> decltype(x ^ y) { return x ^ y; }
+  template <typename T> static T Compute(T x, T y) { return x ^ y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HIntConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
-  HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(Xor);
 
@@ -4073,9 +4737,60 @@
   DISALLOW_COPY_AND_ASSIGN(HXor);
 };
 
+class HRor FINAL : public HBinaryOperation {
+ public:
+  HRor(Primitive::Type result_type, HInstruction* value, HInstruction* distance)
+    : HBinaryOperation(result_type, value, distance) {
+    DCHECK_EQ(result_type, Primitive::PrimitiveKind(value->GetType()));
+    DCHECK_EQ(Primitive::kPrimInt, Primitive::PrimitiveKind(distance->GetType()));
+  }
+
+  template <typename T>
+  static T Compute(T value, int32_t distance, int32_t max_shift_value) {
+    typedef typename std::make_unsigned<T>::type V;
+    V ux = static_cast<V>(value);
+    if ((distance & max_shift_value) == 0) {
+      return static_cast<T>(ux);
+    } else {
+      const V reg_bits = sizeof(T) * 8;
+      return static_cast<T>(ux >> (distance & max_shift_value)) |
+                           (value << (reg_bits - (distance & max_shift_value)));
+    }
+  }
+
+  HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(value->GetValue(), distance->GetValue(), kMaxIntShiftDistance), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetLongConstant(
+        Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
+                      HLongConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
+                      HFloatConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
+                      HDoubleConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
+
+  DECLARE_INSTRUCTION(Ror);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HRor);
+};
+
 // The value of a parameter in this method. Its location depends on
 // the calling convention.
-class HParameterValue : public HExpression<0> {
+class HParameterValue FINAL : public HExpression<0> {
  public:
   HParameterValue(const DexFile& dex_file,
                   uint16_t type_index,
@@ -4085,46 +4800,49 @@
       : HExpression(parameter_type, SideEffects::None(), kNoDexPc),
         dex_file_(dex_file),
         type_index_(type_index),
-        index_(index),
-        is_this_(is_this),
-        can_be_null_(!is_this) {}
+        index_(index) {
+    SetPackedFlag<kFlagIsThis>(is_this);
+    SetPackedFlag<kFlagCanBeNull>(!is_this);
+  }
 
   const DexFile& GetDexFile() const { return dex_file_; }
   uint16_t GetTypeIndex() const { return type_index_; }
   uint8_t GetIndex() const { return index_; }
-  bool IsThis() const { return is_this_; }
+  bool IsThis() const { return GetPackedFlag<kFlagIsThis>(); }
 
-  bool CanBeNull() const OVERRIDE { return can_be_null_; }
-  void SetCanBeNull(bool can_be_null) { can_be_null_ = can_be_null; }
+  bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
+  void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); }
 
   DECLARE_INSTRUCTION(ParameterValue);
 
  private:
+  // Whether or not the parameter value corresponds to 'this' argument.
+  static constexpr size_t kFlagIsThis = kNumberOfExpressionPackedBits;
+  static constexpr size_t kFlagCanBeNull = kFlagIsThis + 1;
+  static constexpr size_t kNumberOfParameterValuePackedBits = kFlagCanBeNull + 1;
+  static_assert(kNumberOfParameterValuePackedBits <= kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+
   const DexFile& dex_file_;
   const uint16_t type_index_;
   // The index of this parameter in the parameters list. Must be less
   // than HGraph::number_of_in_vregs_.
   const uint8_t index_;
 
-  // Whether or not the parameter value corresponds to 'this' argument.
-  const bool is_this_;
-
-  bool can_be_null_;
-
   DISALLOW_COPY_AND_ASSIGN(HParameterValue);
 };
 
-class HNot : public HUnaryOperation {
+class HNot FINAL : public HUnaryOperation {
  public:
   HNot(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HUnaryOperation(result_type, input, dex_pc) {}
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
-  template <typename T> T Compute(T x) const { return ~x; }
+  template <typename T> static T Compute(T x) { return ~x; }
 
   HConstant* Evaluate(HIntConstant* x) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc());
@@ -4132,6 +4850,14 @@
   HConstant* Evaluate(HLongConstant* x) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(Not);
 
@@ -4139,18 +4865,18 @@
   DISALLOW_COPY_AND_ASSIGN(HNot);
 };
 
-class HBooleanNot : public HUnaryOperation {
+class HBooleanNot FINAL : public HUnaryOperation {
  public:
   explicit HBooleanNot(HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HUnaryOperation(Primitive::Type::kPrimBoolean, input, dex_pc) {}
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
-  template <typename T> bool Compute(T x) const {
-    DCHECK(IsUint<1>(x));
+  template <typename T> static bool Compute(T x) {
+    DCHECK(IsUint<1>(x)) << x;
     return !x;
   }
 
@@ -4161,6 +4887,14 @@
     LOG(FATAL) << DebugName() << " is not defined for long values";
     UNREACHABLE();
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(BooleanNot);
 
@@ -4168,7 +4902,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBooleanNot);
 };
 
-class HTypeConversion : public HExpression<1> {
+class HTypeConversion FINAL : public HExpression<1> {
  public:
   // Instantiate a type conversion of `input` to `result_type`.
   HTypeConversion(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc)
@@ -4176,18 +4910,18 @@
                     SideEffectsForArchRuntimeCalls(input->GetType(), result_type),
                     dex_pc) {
     SetRawInputAt(0, input);
-    DCHECK_NE(input->GetType(), result_type);
+    // Invariant: We should never generate a conversion to a Boolean value.
+    DCHECK_NE(Primitive::kPrimBoolean, result_type);
   }
 
   HInstruction* GetInput() const { return InputAt(0); }
   Primitive::Type GetInputType() const { return GetInput()->GetType(); }
   Primitive::Type GetResultType() const { return GetType(); }
 
-  // Required by the x86, ARM, MIPS and MIPS64 code generators when producing calls
-  // to the runtime.
-
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
 
   // Try to statically evaluate the conversion and return a HConstant
   // containing the result.  If the input cannot be converted, return nullptr.
@@ -4213,105 +4947,17 @@
 
 static constexpr uint32_t kNoRegNumber = -1;
 
-class HPhi : public HInstruction {
+class HNullCheck FINAL : public HExpression<1> {
  public:
-  HPhi(ArenaAllocator* arena,
-       uint32_t reg_number,
-       size_t number_of_inputs,
-       Primitive::Type type,
-       uint32_t dex_pc = kNoDexPc)
-      : HInstruction(SideEffects::None(), dex_pc),
-        inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)),
-        reg_number_(reg_number),
-        type_(type),
-        is_live_(false),
-        can_be_null_(true) {
-  }
-
-  // Returns a type equivalent to the given `type`, but that a `HPhi` can hold.
-  static Primitive::Type ToPhiType(Primitive::Type type) {
-    switch (type) {
-      case Primitive::kPrimBoolean:
-      case Primitive::kPrimByte:
-      case Primitive::kPrimShort:
-      case Primitive::kPrimChar:
-        return Primitive::kPrimInt;
-      default:
-        return type;
-    }
-  }
-
-  bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); }
-
-  size_t InputCount() const OVERRIDE { return inputs_.size(); }
-
-  void AddInput(HInstruction* input);
-  void RemoveInputAt(size_t index);
-
-  Primitive::Type GetType() const OVERRIDE { return type_; }
-  void SetType(Primitive::Type type) { type_ = type; }
-
-  bool CanBeNull() const OVERRIDE { return can_be_null_; }
-  void SetCanBeNull(bool can_be_null) { can_be_null_ = can_be_null; }
-
-  uint32_t GetRegNumber() const { return reg_number_; }
-
-  void SetDead() { is_live_ = false; }
-  void SetLive() { is_live_ = true; }
-  bool IsDead() const { return !is_live_; }
-  bool IsLive() const { return is_live_; }
-
-  bool IsVRegEquivalentOf(HInstruction* other) const {
-    return other != nullptr
-        && other->IsPhi()
-        && other->AsPhi()->GetBlock() == GetBlock()
-        && other->AsPhi()->GetRegNumber() == GetRegNumber();
-  }
-
-  // Returns the next equivalent phi (starting from the current one) or null if there is none.
-  // An equivalent phi is a phi having the same dex register and type.
-  // It assumes that phis with the same dex register are adjacent.
-  HPhi* GetNextEquivalentPhiWithSameType() {
-    HInstruction* next = GetNext();
-    while (next != nullptr && next->AsPhi()->GetRegNumber() == reg_number_) {
-      if (next->GetType() == GetType()) {
-        return next->AsPhi();
-      }
-      next = next->GetNext();
-    }
-    return nullptr;
-  }
-
-  DECLARE_INSTRUCTION(Phi);
-
- protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t index) const OVERRIDE {
-    return inputs_[index];
-  }
-
-  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
-    inputs_[index] = input;
-  }
-
- private:
-  ArenaVector<HUserRecord<HInstruction*> > inputs_;
-  const uint32_t reg_number_;
-  Primitive::Type type_;
-  bool is_live_;
-  bool can_be_null_;
-
-  DISALLOW_COPY_AND_ASSIGN(HPhi);
-};
-
-class HNullCheck : public HExpression<1> {
- public:
+  // `HNullCheck` can trigger GC, as it may call the `NullPointerException`
+  // constructor.
   HNullCheck(HInstruction* value, uint32_t dex_pc)
-      : HExpression(value->GetType(), SideEffects::None(), dex_pc) {
+      : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
     SetRawInputAt(0, value);
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -4363,7 +5009,7 @@
   const Handle<mirror::DexCache> dex_cache_;
 };
 
-class HInstanceFieldGet : public HExpression<1> {
+class HInstanceFieldGet FINAL : public HExpression<1> {
  public:
   HInstanceFieldGet(HInstruction* value,
                     Primitive::Type field_type,
@@ -4389,13 +5035,13 @@
 
   bool CanBeMoved() const OVERRIDE { return !IsVolatile(); }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    HInstanceFieldGet* other_get = other->AsInstanceFieldGet();
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    const HInstanceFieldGet* other_get = other->AsInstanceFieldGet();
     return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue();
   }
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
-    return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize;
+    return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value());
   }
 
   size_t ComputeHashCode() const OVERRIDE {
@@ -4415,7 +5061,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInstanceFieldGet);
 };
 
-class HInstanceFieldSet : public HTemplateInstruction<2> {
+class HInstanceFieldSet FINAL : public HTemplateInstruction<2> {
  public:
   HInstanceFieldSet(HInstruction* object,
                     HInstruction* value,
@@ -4435,14 +5081,14 @@
                     field_idx,
                     declaring_class_def_index,
                     dex_file,
-                    dex_cache),
-        value_can_be_null_(true) {
+                    dex_cache) {
+    SetPackedFlag<kFlagValueCanBeNull>(true);
     SetRawInputAt(0, object);
     SetRawInputAt(1, value);
   }
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
-    return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize;
+    return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value());
   }
 
   const FieldInfo& GetFieldInfo() const { return field_info_; }
@@ -4450,34 +5096,37 @@
   Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); }
   bool IsVolatile() const { return field_info_.IsVolatile(); }
   HInstruction* GetValue() const { return InputAt(1); }
-  bool GetValueCanBeNull() const { return value_can_be_null_; }
-  void ClearValueCanBeNull() { value_can_be_null_ = false; }
+  bool GetValueCanBeNull() const { return GetPackedFlag<kFlagValueCanBeNull>(); }
+  void ClearValueCanBeNull() { SetPackedFlag<kFlagValueCanBeNull>(false); }
 
   DECLARE_INSTRUCTION(InstanceFieldSet);
 
  private:
+  static constexpr size_t kFlagValueCanBeNull = kNumberOfGenericPackedBits;
+  static constexpr size_t kNumberOfInstanceFieldSetPackedBits = kFlagValueCanBeNull + 1;
+  static_assert(kNumberOfInstanceFieldSetPackedBits <= kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+
   const FieldInfo field_info_;
-  bool value_can_be_null_;
 
   DISALLOW_COPY_AND_ASSIGN(HInstanceFieldSet);
 };
 
-class HArrayGet : public HExpression<2> {
+class HArrayGet FINAL : public HExpression<2> {
  public:
   HArrayGet(HInstruction* array,
             HInstruction* index,
             Primitive::Type type,
             uint32_t dex_pc,
-            SideEffects additional_side_effects = SideEffects::None())
-      : HExpression(type,
-                    SideEffects::ArrayReadOfType(type).Union(additional_side_effects),
-                    dex_pc) {
+            bool is_string_char_at = false)
+      : HExpression(type, SideEffects::ArrayReadOfType(type), dex_pc) {
+    SetPackedFlag<kFlagIsStringCharAt>(is_string_char_at);
     SetRawInputAt(0, array);
     SetRawInputAt(1, index);
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
   bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
@@ -4489,7 +5138,23 @@
     return false;
   }
 
-  void SetType(Primitive::Type type) { type_ = type; }
+  bool IsEquivalentOf(HArrayGet* other) const {
+    bool result = (GetDexPc() == other->GetDexPc());
+    if (kIsDebugBuild && result) {
+      DCHECK_EQ(GetBlock(), other->GetBlock());
+      DCHECK_EQ(GetArray(), other->GetArray());
+      DCHECK_EQ(GetIndex(), other->GetIndex());
+      if (Primitive::IsIntOrLongType(GetType())) {
+        DCHECK(Primitive::IsFloatingPointType(other->GetType())) << other->GetType();
+      } else {
+        DCHECK(Primitive::IsFloatingPointType(GetType())) << GetType();
+        DCHECK(Primitive::IsIntOrLongType(other->GetType())) << other->GetType();
+      }
+    }
+    return result;
+  }
+
+  bool IsStringCharAt() const { return GetPackedFlag<kFlagIsStringCharAt>(); }
 
   HInstruction* GetArray() const { return InputAt(0); }
   HInstruction* GetIndex() const { return InputAt(1); }
@@ -4497,39 +5162,45 @@
   DECLARE_INSTRUCTION(ArrayGet);
 
  private:
+  // We treat a String as an array, creating the HArrayGet from String.charAt()
+  // intrinsic in the instruction simplifier. We can always determine whether
+  // a particular HArrayGet is actually a String.charAt() by looking at the type
+  // of the input but that requires holding the mutator lock, so we prefer to use
+  // a flag, so that code generators don't need to do the locking.
+  static constexpr size_t kFlagIsStringCharAt = kNumberOfExpressionPackedBits;
+  static constexpr size_t kNumberOfArrayGetPackedBits = kFlagIsStringCharAt + 1;
+  static_assert(kNumberOfArrayGetPackedBits <= HInstruction::kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+
   DISALLOW_COPY_AND_ASSIGN(HArrayGet);
 };
 
-class HArraySet : public HTemplateInstruction<3> {
+class HArraySet FINAL : public HTemplateInstruction<3> {
  public:
   HArraySet(HInstruction* array,
             HInstruction* index,
             HInstruction* value,
             Primitive::Type expected_component_type,
-            uint32_t dex_pc,
-            SideEffects additional_side_effects = SideEffects::None())
-      : HTemplateInstruction(
-            SideEffects::ArrayWriteOfType(expected_component_type).Union(
-                SideEffectsForArchRuntimeCalls(value->GetType())).Union(
-                    additional_side_effects),
-            dex_pc),
-        expected_component_type_(expected_component_type),
-        needs_type_check_(value->GetType() == Primitive::kPrimNot),
-        value_can_be_null_(true),
-        static_type_of_array_is_object_array_(false) {
+            uint32_t dex_pc)
+      : HTemplateInstruction(SideEffects::None(), dex_pc) {
+    SetPackedField<ExpectedComponentTypeField>(expected_component_type);
+    SetPackedFlag<kFlagNeedsTypeCheck>(value->GetType() == Primitive::kPrimNot);
+    SetPackedFlag<kFlagValueCanBeNull>(true);
+    SetPackedFlag<kFlagStaticTypeOfArrayIsObjectArray>(false);
     SetRawInputAt(0, array);
     SetRawInputAt(1, index);
     SetRawInputAt(2, value);
+    // Make a best guess now, may be refined during SSA building.
+    ComputeSideEffects();
   }
 
   bool NeedsEnvironment() const OVERRIDE {
-    // We currently always call a runtime method to catch array store
-    // exceptions.
-    return needs_type_check_;
+    // We call a runtime method to throw ArrayStoreException.
+    return NeedsTypeCheck();
   }
 
   // Can throw ArrayStoreException.
-  bool CanThrow() const OVERRIDE { return needs_type_check_; }
+  bool CanThrow() const OVERRIDE { return NeedsTypeCheck(); }
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
     // TODO: Same as for ArrayGet.
@@ -4537,20 +5208,22 @@
   }
 
   void ClearNeedsTypeCheck() {
-    needs_type_check_ = false;
+    SetPackedFlag<kFlagNeedsTypeCheck>(false);
   }
 
   void ClearValueCanBeNull() {
-    value_can_be_null_ = false;
+    SetPackedFlag<kFlagValueCanBeNull>(false);
   }
 
   void SetStaticTypeOfArrayIsObjectArray() {
-    static_type_of_array_is_object_array_ = true;
+    SetPackedFlag<kFlagStaticTypeOfArrayIsObjectArray>(true);
   }
 
-  bool GetValueCanBeNull() const { return value_can_be_null_; }
-  bool NeedsTypeCheck() const { return needs_type_check_; }
-  bool StaticTypeOfArrayIsObjectArray() const { return static_type_of_array_is_object_array_; }
+  bool GetValueCanBeNull() const { return GetPackedFlag<kFlagValueCanBeNull>(); }
+  bool NeedsTypeCheck() const { return GetPackedFlag<kFlagNeedsTypeCheck>(); }
+  bool StaticTypeOfArrayIsObjectArray() const {
+    return GetPackedFlag<kFlagStaticTypeOfArrayIsObjectArray>();
+  }
 
   HInstruction* GetArray() const { return InputAt(0); }
   HInstruction* GetIndex() const { return InputAt(1); }
@@ -4564,11 +5237,17 @@
     Primitive::Type value_type = GetValue()->GetType();
     return ((value_type == Primitive::kPrimFloat) || (value_type == Primitive::kPrimDouble))
         ? value_type
-        : expected_component_type_;
+        : GetRawExpectedComponentType();
   }
 
   Primitive::Type GetRawExpectedComponentType() const {
-    return expected_component_type_;
+    return GetPackedField<ExpectedComponentTypeField>();
+  }
+
+  void ComputeSideEffects() {
+    Primitive::Type type = GetComponentType();
+    SetSideEffects(SideEffects::ArrayWriteOfType(type).Union(
+        SideEffectsForArchRuntimeCalls(type)));
   }
 
   static SideEffects SideEffectsForArchRuntimeCalls(Primitive::Type value_type) {
@@ -4578,50 +5257,77 @@
   DECLARE_INSTRUCTION(ArraySet);
 
  private:
-  const Primitive::Type expected_component_type_;
-  bool needs_type_check_;
-  bool value_can_be_null_;
+  static constexpr size_t kFieldExpectedComponentType = kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldExpectedComponentTypeSize =
+      MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast));
+  static constexpr size_t kFlagNeedsTypeCheck =
+      kFieldExpectedComponentType + kFieldExpectedComponentTypeSize;
+  static constexpr size_t kFlagValueCanBeNull = kFlagNeedsTypeCheck + 1;
   // Cached information for the reference_type_info_ so that codegen
   // does not need to inspect the static type.
-  bool static_type_of_array_is_object_array_;
+  static constexpr size_t kFlagStaticTypeOfArrayIsObjectArray = kFlagValueCanBeNull + 1;
+  static constexpr size_t kNumberOfArraySetPackedBits =
+      kFlagStaticTypeOfArrayIsObjectArray + 1;
+  static_assert(kNumberOfArraySetPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+  using ExpectedComponentTypeField =
+      BitField<Primitive::Type, kFieldExpectedComponentType, kFieldExpectedComponentTypeSize>;
 
   DISALLOW_COPY_AND_ASSIGN(HArraySet);
 };
 
-class HArrayLength : public HExpression<1> {
+class HArrayLength FINAL : public HExpression<1> {
  public:
-  HArrayLength(HInstruction* array, uint32_t dex_pc)
+  HArrayLength(HInstruction* array, uint32_t dex_pc, bool is_string_length = false)
       : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
+    SetPackedFlag<kFlagIsStringLength>(is_string_length);
     // Note that arrays do not change length, so the instruction does not
     // depend on any write.
     SetRawInputAt(0, array);
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
     return obj == InputAt(0);
   }
 
+  bool IsStringLength() const { return GetPackedFlag<kFlagIsStringLength>(); }
+
   DECLARE_INSTRUCTION(ArrayLength);
 
  private:
+  // We treat a String as an array, creating the HArrayLength from String.length()
+  // or String.isEmpty() intrinsic in the instruction simplifier. We can always
+  // determine whether a particular HArrayLength is actually a String.length() by
+  // looking at the type of the input but that requires holding the mutator lock, so
+  // we prefer to use a flag, so that code generators don't need to do the locking.
+  static constexpr size_t kFlagIsStringLength = kNumberOfExpressionPackedBits;
+  static constexpr size_t kNumberOfArrayLengthPackedBits = kFlagIsStringLength + 1;
+  static_assert(kNumberOfArrayLengthPackedBits <= HInstruction::kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+
   DISALLOW_COPY_AND_ASSIGN(HArrayLength);
 };
 
-class HBoundsCheck : public HExpression<2> {
+class HBoundsCheck FINAL : public HExpression<2> {
  public:
-  HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc)
-      : HExpression(index->GetType(), SideEffects::None(), dex_pc) {
-    DCHECK(index->GetType() == Primitive::kPrimInt);
+  // `HBoundsCheck` can trigger GC, as it may call the `IndexOutOfBoundsException`
+  // constructor.
+  HBoundsCheck(HInstruction* index,
+               HInstruction* length,
+               uint32_t dex_pc,
+               uint32_t string_char_at_method_index = DexFile::kDexNoIndex)
+      : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc),
+        string_char_at_method_index_(string_char_at_method_index) {
+    DCHECK_EQ(Primitive::kPrimInt, Primitive::PrimitiveKind(index->GetType()));
     SetRawInputAt(0, index);
     SetRawInputAt(1, length);
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -4629,44 +5335,29 @@
 
   bool CanThrow() const OVERRIDE { return true; }
 
+  bool IsStringCharAt() const { return GetStringCharAtMethodIndex() != DexFile::kDexNoIndex; }
+  uint32_t GetStringCharAtMethodIndex() const { return string_char_at_method_index_; }
+
   HInstruction* GetIndex() const { return InputAt(0); }
 
   DECLARE_INSTRUCTION(BoundsCheck);
 
  private:
+  // We treat a String as an array, creating the HBoundsCheck from String.charAt()
+  // intrinsic in the instruction simplifier. We want to include the String.charAt()
+  // in the stack trace if we actually throw the StringIndexOutOfBoundsException,
+  // so we need to create an HEnvironment which will be translated to an InlineInfo
+  // indicating the extra stack frame. Since we add this HEnvironment quite late,
+  // in the PrepareForRegisterAllocation pass, we need to remember the method index
+  // from the invoke as we don't want to look again at the dex bytecode.
+  uint32_t string_char_at_method_index_;  // DexFile::kDexNoIndex if regular array.
+
   DISALLOW_COPY_AND_ASSIGN(HBoundsCheck);
 };
 
-/**
- * Some DEX instructions are folded into multiple HInstructions that need
- * to stay live until the last HInstruction. This class
- * is used as a marker for the baseline compiler to ensure its preceding
- * HInstruction stays live. `index` represents the stack location index of the
- * instruction (the actual offset is computed as index * vreg_size).
- */
-class HTemporary : public HTemplateInstruction<0> {
+class HSuspendCheck FINAL : public HTemplateInstruction<0> {
  public:
-  explicit HTemporary(size_t index, uint32_t dex_pc = kNoDexPc)
-      : HTemplateInstruction(SideEffects::None(), dex_pc), index_(index) {}
-
-  size_t GetIndex() const { return index_; }
-
-  Primitive::Type GetType() const OVERRIDE {
-    // The previous instruction is the one that will be stored in the temporary location.
-    DCHECK(GetPrevious() != nullptr);
-    return GetPrevious()->GetType();
-  }
-
-  DECLARE_INSTRUCTION(Temporary);
-
- private:
-  const size_t index_;
-  DISALLOW_COPY_AND_ASSIGN(HTemporary);
-};
-
-class HSuspendCheck : public HTemplateInstruction<0> {
- public:
-  explicit HSuspendCheck(uint32_t dex_pc)
+  explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc), slow_path_(nullptr) {}
 
   bool NeedsEnvironment() const OVERRIDE {
@@ -4686,70 +5377,142 @@
   DISALLOW_COPY_AND_ASSIGN(HSuspendCheck);
 };
 
+// Pseudo-instruction which provides the native debugger with mapping information.
+// It ensures that we can generate line number and local variables at this point.
+class HNativeDebugInfo : public HTemplateInstruction<0> {
+ public:
+  explicit HNativeDebugInfo(uint32_t dex_pc)
+      : HTemplateInstruction<0>(SideEffects::None(), dex_pc) {}
+
+  bool NeedsEnvironment() const OVERRIDE {
+    return true;
+  }
+
+  DECLARE_INSTRUCTION(NativeDebugInfo);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HNativeDebugInfo);
+};
+
 /**
  * Instruction to load a Class object.
  */
-class HLoadClass : public HExpression<1> {
+class HLoadClass FINAL : public HInstruction {
  public:
+  // Determines how to load the Class.
+  enum class LoadKind {
+    // Use the Class* from the method's own ArtMethod*.
+    kReferrersClass,
+
+    // Use boot image Class* address that will be known at link time.
+    // Used for boot image classes referenced by boot image code in non-PIC mode.
+    kBootImageLinkTimeAddress,
+
+    // Use PC-relative boot image Class* address that will be known at link time.
+    // Used for boot image classes referenced by boot image code in PIC mode.
+    kBootImageLinkTimePcRelative,
+
+    // Use a known boot image Class* address, embedded in the code by the codegen.
+    // Used for boot image classes referenced by apps in AOT- and JIT-compiled code.
+    // Note: codegen needs to emit a linker patch if indicated by compiler options'
+    // GetIncludePatchInformation().
+    kBootImageAddress,
+
+    // Load from the resolved types array at an absolute address.
+    // Used for classes outside the boot image referenced by JIT-compiled code.
+    kDexCacheAddress,
+
+    // Load from resolved types array in the dex cache using a PC-relative load.
+    // Used for classes outside boot image when we know that we can access
+    // the dex cache arrays using a PC-relative load.
+    kDexCachePcRelative,
+
+    // Load from resolved types array accessed through the class loaded from
+    // the compiled method's own ArtMethod*. This is the default access type when
+    // all other types are unavailable.
+    kDexCacheViaMethod,
+
+    kLast = kDexCacheViaMethod
+  };
+
   HLoadClass(HCurrentMethod* current_method,
              uint16_t type_index,
              const DexFile& dex_file,
              bool is_referrers_class,
              uint32_t dex_pc,
-             bool needs_access_check)
-      : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc),
+             bool needs_access_check,
+             bool is_in_dex_cache)
+      : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc),
+        special_input_(HUserRecord<HInstruction*>(current_method)),
         type_index_(type_index),
         dex_file_(dex_file),
-        is_referrers_class_(is_referrers_class),
-        generate_clinit_check_(false),
-        needs_access_check_(needs_access_check),
         loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
     // Referrers class should not need access check. We never inline unverified
     // methods so we can't possibly end up in this situation.
-    DCHECK(!is_referrers_class_ || !needs_access_check_);
-    SetRawInputAt(0, current_method);
+    DCHECK(!is_referrers_class || !needs_access_check);
+
+    SetPackedField<LoadKindField>(
+        is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kDexCacheViaMethod);
+    SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
+    SetPackedFlag<kFlagIsInDexCache>(is_in_dex_cache);
+    SetPackedFlag<kFlagGenerateClInitCheck>(false);
+  }
+
+  void SetLoadKindWithAddress(LoadKind load_kind, uint64_t address) {
+    DCHECK(HasAddress(load_kind));
+    load_data_.address = address;
+    SetLoadKindInternal(load_kind);
+  }
+
+  void SetLoadKindWithTypeReference(LoadKind load_kind,
+                                    const DexFile& dex_file,
+                                    uint32_t type_index) {
+    DCHECK(HasTypeReference(load_kind));
+    DCHECK(IsSameDexFile(dex_file_, dex_file));
+    DCHECK_EQ(type_index_, type_index);
+    SetLoadKindInternal(load_kind);
+  }
+
+  void SetLoadKindWithDexCacheReference(LoadKind load_kind,
+                                        const DexFile& dex_file,
+                                        uint32_t element_index) {
+    DCHECK(HasDexCacheReference(load_kind));
+    DCHECK(IsSameDexFile(dex_file_, dex_file));
+    load_data_.dex_cache_element_index = element_index;
+    SetLoadKindInternal(load_kind);
+  }
+
+  LoadKind GetLoadKind() const {
+    return GetPackedField<LoadKindField>();
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    // Note that we don't need to test for generate_clinit_check_.
-    // Whether or not we need to generate the clinit check is processed in
-    // prepare_for_register_allocator based on existing HInvokes and HClinitChecks.
-    return other->AsLoadClass()->type_index_ == type_index_ &&
-        other->AsLoadClass()->needs_access_check_ == needs_access_check_;
-  }
+  bool InstructionDataEquals(const HInstruction* other) const;
 
   size_t ComputeHashCode() const OVERRIDE { return type_index_; }
 
-  uint16_t GetTypeIndex() const { return type_index_; }
-  bool IsReferrersClass() const { return is_referrers_class_; }
   bool CanBeNull() const OVERRIDE { return false; }
 
   bool NeedsEnvironment() const OVERRIDE {
-    // Will call runtime and load the class if the class is not loaded yet.
-    // TODO: finer grain decision.
-    return !is_referrers_class_;
+    return CanCallRuntime();
   }
 
-  bool MustGenerateClinitCheck() const {
-    return generate_clinit_check_;
-  }
   void SetMustGenerateClinitCheck(bool generate_clinit_check) {
-    generate_clinit_check_ = generate_clinit_check;
+    // The entrypoint the code generator is going to call does not do
+    // clinit of the class.
+    DCHECK(!NeedsAccessCheck());
+    SetPackedFlag<kFlagGenerateClInitCheck>(generate_clinit_check);
   }
 
   bool CanCallRuntime() const {
-    return MustGenerateClinitCheck() || !is_referrers_class_ || needs_access_check_;
+    return MustGenerateClinitCheck() ||
+           (!IsReferrersClass() && !IsInDexCache()) ||
+           NeedsAccessCheck();
   }
 
-  bool NeedsAccessCheck() const {
-    return needs_access_check_;
-  }
 
   bool CanThrow() const OVERRIDE {
-    // May call runtime and and therefore can throw.
-    // TODO: finer grain decision.
     return CanCallRuntime();
   }
 
@@ -4763,69 +5526,335 @@
     loaded_class_rti_ = rti;
   }
 
-  const DexFile& GetDexFile() { return dex_file_; }
+  uint32_t GetTypeIndex() const { return type_index_; }
+  const DexFile& GetDexFile() const { return dex_file_; }
 
-  bool NeedsDexCache() const OVERRIDE { return !is_referrers_class_; }
+  uint32_t GetDexCacheElementOffset() const;
+
+  uint64_t GetAddress() const {
+    DCHECK(HasAddress(GetLoadKind()));
+    return load_data_.address;
+  }
+
+  bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { return !IsReferrersClass(); }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
     return SideEffects::CanTriggerGC();
   }
 
+  bool IsReferrersClass() const { return GetLoadKind() == LoadKind::kReferrersClass; }
+  bool NeedsAccessCheck() const { return GetPackedFlag<kFlagNeedsAccessCheck>(); }
+  bool IsInDexCache() const { return GetPackedFlag<kFlagIsInDexCache>(); }
+  bool MustGenerateClinitCheck() const { return GetPackedFlag<kFlagGenerateClInitCheck>(); }
+
+  void MarkInDexCache() {
+    SetPackedFlag<kFlagIsInDexCache>(true);
+    DCHECK(!NeedsEnvironment());
+    RemoveEnvironment();
+    SetSideEffects(SideEffects::None());
+  }
+
+  void AddSpecialInput(HInstruction* special_input);
+
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+    return ArrayRef<HUserRecord<HInstruction*>>(
+        &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u);
+  }
+
+  Primitive::Type GetType() const OVERRIDE {
+    return Primitive::kPrimNot;
+  }
+
   DECLARE_INSTRUCTION(LoadClass);
 
  private:
-  const uint16_t type_index_;
-  const DexFile& dex_file_;
-  const bool is_referrers_class_;
+  static constexpr size_t kFlagNeedsAccessCheck    = kNumberOfGenericPackedBits;
+  static constexpr size_t kFlagIsInDexCache        = kFlagNeedsAccessCheck + 1;
   // Whether this instruction must generate the initialization check.
   // Used for code generation.
-  bool generate_clinit_check_;
-  bool needs_access_check_;
+  static constexpr size_t kFlagGenerateClInitCheck = kFlagIsInDexCache + 1;
+  static constexpr size_t kFieldLoadKind           = kFlagGenerateClInitCheck + 1;
+  static constexpr size_t kFieldLoadKindSize =
+      MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast));
+  static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize;
+  static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields.");
+  using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>;
+
+  static bool HasTypeReference(LoadKind load_kind) {
+    return load_kind == LoadKind::kBootImageLinkTimeAddress ||
+        load_kind == LoadKind::kBootImageLinkTimePcRelative ||
+        load_kind == LoadKind::kDexCacheViaMethod ||
+        load_kind == LoadKind::kReferrersClass;
+  }
+
+  static bool HasAddress(LoadKind load_kind) {
+    return load_kind == LoadKind::kBootImageAddress || load_kind == LoadKind::kDexCacheAddress;
+  }
+
+  static bool HasDexCacheReference(LoadKind load_kind) {
+    return load_kind == LoadKind::kDexCachePcRelative;
+  }
+
+  void SetLoadKindInternal(LoadKind load_kind);
+
+  // The special input is the HCurrentMethod for kDexCacheViaMethod or kReferrersClass.
+  // For other load kinds it's empty or possibly some architecture-specific instruction
+  // for PC-relative loads, i.e. kDexCachePcRelative or kBootImageLinkTimePcRelative.
+  HUserRecord<HInstruction*> special_input_;
+
+  const uint16_t type_index_;
+  const DexFile& dex_file_;
+
+  union {
+    uint32_t dex_cache_element_index;   // Only for dex cache reference.
+    uint64_t address;  // Up to 64-bit, needed for kDexCacheAddress on 64-bit targets.
+  } load_data_;
 
   ReferenceTypeInfo loaded_class_rti_;
 
   DISALLOW_COPY_AND_ASSIGN(HLoadClass);
 };
+std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs);
 
-class HLoadString : public HExpression<1> {
+// Note: defined outside class to see operator<<(., HLoadClass::LoadKind).
+inline uint32_t HLoadClass::GetDexCacheElementOffset() const {
+  DCHECK(HasDexCacheReference(GetLoadKind())) << GetLoadKind();
+  return load_data_.dex_cache_element_index;
+}
+
+// Note: defined outside class to see operator<<(., HLoadClass::LoadKind).
+inline void HLoadClass::AddSpecialInput(HInstruction* special_input) {
+  // The special input is used for PC-relative loads on some architectures,
+  // including literal pool loads, which are PC-relative too.
+  DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
+         GetLoadKind() == LoadKind::kDexCachePcRelative ||
+         GetLoadKind() == LoadKind::kBootImageLinkTimeAddress ||
+         GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind();
+  DCHECK(special_input_.GetInstruction() == nullptr);
+  special_input_ = HUserRecord<HInstruction*>(special_input);
+  special_input->AddUseAt(this, 0);
+}
+
+class HLoadString FINAL : public HInstruction {
  public:
-  HLoadString(HCurrentMethod* current_method, uint32_t string_index, uint32_t dex_pc)
-      : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc),
+  // Determines how to load the String.
+  enum class LoadKind {
+    // Use boot image String* address that will be known at link time.
+    // Used for boot image strings referenced by boot image code in non-PIC mode.
+    kBootImageLinkTimeAddress,
+
+    // Use PC-relative boot image String* address that will be known at link time.
+    // Used for boot image strings referenced by boot image code in PIC mode.
+    kBootImageLinkTimePcRelative,
+
+    // Use a known boot image String* address, embedded in the code by the codegen.
+    // Used for boot image strings referenced by apps in AOT- and JIT-compiled code.
+    // Note: codegen needs to emit a linker patch if indicated by compiler options'
+    // GetIncludePatchInformation().
+    kBootImageAddress,
+
+    // Load from the resolved strings array at an absolute address.
+    // Used for strings outside the boot image referenced by JIT-compiled code.
+    kDexCacheAddress,
+
+    // Load from resolved strings array in the dex cache using a PC-relative load.
+    // Used for strings outside boot image when we know that we can access
+    // the dex cache arrays using a PC-relative load.
+    kDexCachePcRelative,
+
+    // Load from resolved strings array accessed through the class loaded from
+    // the compiled method's own ArtMethod*. This is the default access type when
+    // all other types are unavailable.
+    kDexCacheViaMethod,
+
+    kLast = kDexCacheViaMethod
+  };
+
+  HLoadString(HCurrentMethod* current_method,
+              uint32_t string_index,
+              const DexFile& dex_file,
+              uint32_t dex_pc)
+      : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc),
+        special_input_(HUserRecord<HInstruction*>(current_method)),
         string_index_(string_index) {
-    SetRawInputAt(0, current_method);
+    SetPackedFlag<kFlagIsInDexCache>(false);
+    SetPackedField<LoadKindField>(LoadKind::kDexCacheViaMethod);
+    load_data_.ref.dex_file = &dex_file;
+  }
+
+  void SetLoadKindWithAddress(LoadKind load_kind, uint64_t address) {
+    DCHECK(HasAddress(load_kind));
+    load_data_.address = address;
+    SetLoadKindInternal(load_kind);
+  }
+
+  void SetLoadKindWithStringReference(LoadKind load_kind,
+                                      const DexFile& dex_file,
+                                      uint32_t string_index) {
+    DCHECK(HasStringReference(load_kind));
+    load_data_.ref.dex_file = &dex_file;
+    string_index_ = string_index;
+    SetLoadKindInternal(load_kind);
+  }
+
+  void SetLoadKindWithDexCacheReference(LoadKind load_kind,
+                                        const DexFile& dex_file,
+                                        uint32_t element_index) {
+    DCHECK(HasDexCacheReference(load_kind));
+    load_data_.ref.dex_file = &dex_file;
+    load_data_.ref.dex_cache_element_index = element_index;
+    SetLoadKindInternal(load_kind);
+  }
+
+  LoadKind GetLoadKind() const {
+    return GetPackedField<LoadKindField>();
+  }
+
+  const DexFile& GetDexFile() const;
+
+  uint32_t GetStringIndex() const {
+    DCHECK(HasStringReference(GetLoadKind()) || /* For slow paths. */ !IsInDexCache());
+    return string_index_;
+  }
+
+  uint32_t GetDexCacheElementOffset() const;
+
+  uint64_t GetAddress() const {
+    DCHECK(HasAddress(GetLoadKind()));
+    return load_data_.address;
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return other->AsLoadString()->string_index_ == string_index_;
-  }
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE;
 
   size_t ComputeHashCode() const OVERRIDE { return string_index_; }
 
-  uint32_t GetStringIndex() const { return string_index_; }
+  // Will call the runtime if we need to load the string through
+  // the dex cache and the string is not guaranteed to be there yet.
+  bool NeedsEnvironment() const OVERRIDE {
+    LoadKind load_kind = GetLoadKind();
+    if (load_kind == LoadKind::kBootImageLinkTimeAddress ||
+        load_kind == LoadKind::kBootImageLinkTimePcRelative ||
+        load_kind == LoadKind::kBootImageAddress) {
+      return false;
+    }
+    return !IsInDexCache();
+  }
 
-  // TODO: Can we deopt or debug when we resolve a string?
-  bool NeedsEnvironment() const OVERRIDE { return false; }
-  bool NeedsDexCache() const OVERRIDE { return true; }
+  bool NeedsDexCacheOfDeclaringClass() const OVERRIDE {
+    return GetLoadKind() == LoadKind::kDexCacheViaMethod;
+  }
+
   bool CanBeNull() const OVERRIDE { return false; }
+  bool CanThrow() const OVERRIDE { return NeedsEnvironment(); }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
     return SideEffects::CanTriggerGC();
   }
 
+  bool IsInDexCache() const { return GetPackedFlag<kFlagIsInDexCache>(); }
+
+  void MarkInDexCache() {
+    SetPackedFlag<kFlagIsInDexCache>(true);
+    DCHECK(!NeedsEnvironment());
+    RemoveEnvironment();
+    SetSideEffects(SideEffects::None());
+  }
+
+  void AddSpecialInput(HInstruction* special_input);
+
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+    return ArrayRef<HUserRecord<HInstruction*>>(
+        &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u);
+  }
+
+  Primitive::Type GetType() const OVERRIDE {
+    return Primitive::kPrimNot;
+  }
+
   DECLARE_INSTRUCTION(LoadString);
 
  private:
-  const uint32_t string_index_;
+  static constexpr size_t kFlagIsInDexCache = kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldLoadKind = kFlagIsInDexCache + 1;
+  static constexpr size_t kFieldLoadKindSize =
+      MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast));
+  static constexpr size_t kNumberOfLoadStringPackedBits = kFieldLoadKind + kFieldLoadKindSize;
+  static_assert(kNumberOfLoadStringPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+  using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>;
+
+  static bool HasStringReference(LoadKind load_kind) {
+    return load_kind == LoadKind::kBootImageLinkTimeAddress ||
+        load_kind == LoadKind::kBootImageLinkTimePcRelative ||
+        load_kind == LoadKind::kDexCacheViaMethod;
+  }
+
+  static bool HasAddress(LoadKind load_kind) {
+    return load_kind == LoadKind::kBootImageAddress || load_kind == LoadKind::kDexCacheAddress;
+  }
+
+  static bool HasDexCacheReference(LoadKind load_kind) {
+    return load_kind == LoadKind::kDexCachePcRelative;
+  }
+
+  void SetLoadKindInternal(LoadKind load_kind);
+
+  // The special input is the HCurrentMethod for kDexCacheViaMethod.
+  // For other load kinds it's empty or possibly some architecture-specific instruction
+  // for PC-relative loads, i.e. kDexCachePcRelative or kBootImageLinkTimePcRelative.
+  HUserRecord<HInstruction*> special_input_;
+
+  // String index serves also as the hash code and it's also needed for slow-paths,
+  // so it must not be overwritten with other load data.
+  uint32_t string_index_;
+
+  union {
+    struct {
+      const DexFile* dex_file;            // For string reference and dex cache reference.
+      uint32_t dex_cache_element_index;   // Only for dex cache reference.
+    } ref;
+    uint64_t address;  // Up to 64-bit, needed for kDexCacheAddress on 64-bit targets.
+  } load_data_;
 
   DISALLOW_COPY_AND_ASSIGN(HLoadString);
 };
+std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs);
+
+// Note: defined outside class to see operator<<(., HLoadString::LoadKind).
+inline const DexFile& HLoadString::GetDexFile() const {
+  DCHECK(HasStringReference(GetLoadKind()) || HasDexCacheReference(GetLoadKind()))
+      << GetLoadKind();
+  return *load_data_.ref.dex_file;
+}
+
+// Note: defined outside class to see operator<<(., HLoadString::LoadKind).
+inline uint32_t HLoadString::GetDexCacheElementOffset() const {
+  DCHECK(HasDexCacheReference(GetLoadKind())) << GetLoadKind();
+  return load_data_.ref.dex_cache_element_index;
+}
+
+// Note: defined outside class to see operator<<(., HLoadString::LoadKind).
+inline void HLoadString::AddSpecialInput(HInstruction* special_input) {
+  // The special input is used for PC-relative loads on some architectures,
+  // including literal pool loads, which are PC-relative too.
+  DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
+         GetLoadKind() == LoadKind::kDexCachePcRelative ||
+         GetLoadKind() == LoadKind::kBootImageLinkTimeAddress ||
+         GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind();
+  // HLoadString::GetInputRecords() returns an empty array at this point,
+  // so use the GetInputRecords() from the base class to set the input record.
+  DCHECK(special_input_.GetInstruction() == nullptr);
+  special_input_ = HUserRecord<HInstruction*>(special_input);
+  special_input->AddUseAt(this, 0);
+}
 
 /**
  * Performs an initialization check on its Class object input.
  */
-class HClinitCheck : public HExpression<1> {
+class HClinitCheck FINAL : public HExpression<1> {
  public:
   HClinitCheck(HLoadClass* constant, uint32_t dex_pc)
       : HExpression(
@@ -4836,7 +5865,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -4845,6 +5874,7 @@
     return true;
   }
 
+  bool CanThrow() const OVERRIDE { return true; }
 
   HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); }
 
@@ -4854,7 +5884,7 @@
   DISALLOW_COPY_AND_ASSIGN(HClinitCheck);
 };
 
-class HStaticFieldGet : public HExpression<1> {
+class HStaticFieldGet FINAL : public HExpression<1> {
  public:
   HStaticFieldGet(HInstruction* cls,
                   Primitive::Type field_type,
@@ -4881,8 +5911,8 @@
 
   bool CanBeMoved() const OVERRIDE { return !IsVolatile(); }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    HStaticFieldGet* other_get = other->AsStaticFieldGet();
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    const HStaticFieldGet* other_get = other->AsStaticFieldGet();
     return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue();
   }
 
@@ -4903,7 +5933,7 @@
   DISALLOW_COPY_AND_ASSIGN(HStaticFieldGet);
 };
 
-class HStaticFieldSet : public HTemplateInstruction<2> {
+class HStaticFieldSet FINAL : public HTemplateInstruction<2> {
  public:
   HStaticFieldSet(HInstruction* cls,
                   HInstruction* value,
@@ -4923,8 +5953,8 @@
                     field_idx,
                     declaring_class_def_index,
                     dex_file,
-                    dex_cache),
-        value_can_be_null_(true) {
+                    dex_cache) {
+    SetPackedFlag<kFlagValueCanBeNull>(true);
     SetRawInputAt(0, cls);
     SetRawInputAt(1, value);
   }
@@ -4935,19 +5965,23 @@
   bool IsVolatile() const { return field_info_.IsVolatile(); }
 
   HInstruction* GetValue() const { return InputAt(1); }
-  bool GetValueCanBeNull() const { return value_can_be_null_; }
-  void ClearValueCanBeNull() { value_can_be_null_ = false; }
+  bool GetValueCanBeNull() const { return GetPackedFlag<kFlagValueCanBeNull>(); }
+  void ClearValueCanBeNull() { SetPackedFlag<kFlagValueCanBeNull>(false); }
 
   DECLARE_INSTRUCTION(StaticFieldSet);
 
  private:
+  static constexpr size_t kFlagValueCanBeNull = kNumberOfGenericPackedBits;
+  static constexpr size_t kNumberOfStaticFieldSetPackedBits = kFlagValueCanBeNull + 1;
+  static_assert(kNumberOfStaticFieldSetPackedBits <= kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+
   const FieldInfo field_info_;
-  bool value_can_be_null_;
 
   DISALLOW_COPY_AND_ASSIGN(HStaticFieldSet);
 };
 
-class HUnresolvedInstanceFieldGet : public HExpression<1> {
+class HUnresolvedInstanceFieldGet FINAL : public HExpression<1> {
  public:
   HUnresolvedInstanceFieldGet(HInstruction* obj,
                               Primitive::Type field_type,
@@ -4972,7 +6006,7 @@
   DISALLOW_COPY_AND_ASSIGN(HUnresolvedInstanceFieldGet);
 };
 
-class HUnresolvedInstanceFieldSet : public HTemplateInstruction<2> {
+class HUnresolvedInstanceFieldSet FINAL : public HTemplateInstruction<2> {
  public:
   HUnresolvedInstanceFieldSet(HInstruction* obj,
                               HInstruction* value,
@@ -4980,9 +6014,9 @@
                               uint32_t field_index,
                               uint32_t dex_pc)
       : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc),
-        field_type_(field_type),
         field_index_(field_index) {
-    DCHECK_EQ(field_type, value->GetType());
+    SetPackedField<FieldTypeField>(field_type);
+    DCHECK_EQ(Primitive::PrimitiveKind(field_type), Primitive::PrimitiveKind(value->GetType()));
     SetRawInputAt(0, obj);
     SetRawInputAt(1, value);
   }
@@ -4990,19 +6024,27 @@
   bool NeedsEnvironment() const OVERRIDE { return true; }
   bool CanThrow() const OVERRIDE { return true; }
 
-  Primitive::Type GetFieldType() const { return field_type_; }
+  Primitive::Type GetFieldType() const { return GetPackedField<FieldTypeField>(); }
   uint32_t GetFieldIndex() const { return field_index_; }
 
   DECLARE_INSTRUCTION(UnresolvedInstanceFieldSet);
 
  private:
-  const Primitive::Type field_type_;
+  static constexpr size_t kFieldFieldType = HInstruction::kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldFieldTypeSize =
+      MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast));
+  static constexpr size_t kNumberOfUnresolvedStaticFieldSetPackedBits =
+      kFieldFieldType + kFieldFieldTypeSize;
+  static_assert(kNumberOfUnresolvedStaticFieldSetPackedBits <= HInstruction::kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+  using FieldTypeField = BitField<Primitive::Type, kFieldFieldType, kFieldFieldTypeSize>;
+
   const uint32_t field_index_;
 
   DISALLOW_COPY_AND_ASSIGN(HUnresolvedInstanceFieldSet);
 };
 
-class HUnresolvedStaticFieldGet : public HExpression<0> {
+class HUnresolvedStaticFieldGet FINAL : public HExpression<0> {
  public:
   HUnresolvedStaticFieldGet(Primitive::Type field_type,
                             uint32_t field_index,
@@ -5025,36 +6067,44 @@
   DISALLOW_COPY_AND_ASSIGN(HUnresolvedStaticFieldGet);
 };
 
-class HUnresolvedStaticFieldSet : public HTemplateInstruction<1> {
+class HUnresolvedStaticFieldSet FINAL : public HTemplateInstruction<1> {
  public:
   HUnresolvedStaticFieldSet(HInstruction* value,
                             Primitive::Type field_type,
                             uint32_t field_index,
                             uint32_t dex_pc)
       : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc),
-        field_type_(field_type),
         field_index_(field_index) {
-    DCHECK_EQ(field_type, value->GetType());
+    SetPackedField<FieldTypeField>(field_type);
+    DCHECK_EQ(Primitive::PrimitiveKind(field_type), Primitive::PrimitiveKind(value->GetType()));
     SetRawInputAt(0, value);
   }
 
   bool NeedsEnvironment() const OVERRIDE { return true; }
   bool CanThrow() const OVERRIDE { return true; }
 
-  Primitive::Type GetFieldType() const { return field_type_; }
+  Primitive::Type GetFieldType() const { return GetPackedField<FieldTypeField>(); }
   uint32_t GetFieldIndex() const { return field_index_; }
 
   DECLARE_INSTRUCTION(UnresolvedStaticFieldSet);
 
  private:
-  const Primitive::Type field_type_;
+  static constexpr size_t kFieldFieldType = HInstruction::kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldFieldTypeSize =
+      MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast));
+  static constexpr size_t kNumberOfUnresolvedStaticFieldSetPackedBits =
+      kFieldFieldType + kFieldFieldTypeSize;
+  static_assert(kNumberOfUnresolvedStaticFieldSetPackedBits <= HInstruction::kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+  using FieldTypeField = BitField<Primitive::Type, kFieldFieldType, kFieldFieldTypeSize>;
+
   const uint32_t field_index_;
 
   DISALLOW_COPY_AND_ASSIGN(HUnresolvedStaticFieldSet);
 };
 
 // Implement the move-exception DEX instruction.
-class HLoadException : public HExpression<0> {
+class HLoadException FINAL : public HExpression<0> {
  public:
   explicit HLoadException(uint32_t dex_pc = kNoDexPc)
       : HExpression(Primitive::kPrimNot, SideEffects::None(), dex_pc) {}
@@ -5069,7 +6119,7 @@
 
 // Implicit part of move-exception which clears thread-local exception storage.
 // Must not be removed because the runtime expects the TLS to get cleared.
-class HClearException : public HTemplateInstruction<0> {
+class HClearException FINAL : public HTemplateInstruction<0> {
  public:
   explicit HClearException(uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::AllWrites(), dex_pc) {}
@@ -5080,7 +6130,7 @@
   DISALLOW_COPY_AND_ASSIGN(HClearException);
 };
 
-class HThrow : public HTemplateInstruction<1> {
+class HThrow FINAL : public HTemplateInstruction<1> {
  public:
   HThrow(HInstruction* exception, uint32_t dex_pc)
       : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
@@ -5111,10 +6161,13 @@
   kAbstractClassCheck,    // Can just walk the super class chain, starting one up.
   kInterfaceCheck,        // No optimization yet when checking against an interface.
   kArrayObjectCheck,      // Can just check if the array is not primitive.
-  kArrayCheck             // No optimization yet when checking against a generic array.
+  kArrayCheck,            // No optimization yet when checking against a generic array.
+  kLast = kArrayCheck
 };
 
-class HInstanceOf : public HExpression<2> {
+std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs);
+
+class HInstanceOf FINAL : public HExpression<2> {
  public:
   HInstanceOf(HInstruction* object,
               HLoadClass* constant,
@@ -5122,109 +6175,112 @@
               uint32_t dex_pc)
       : HExpression(Primitive::kPrimBoolean,
                     SideEffectsForArchRuntimeCalls(check_kind),
-                    dex_pc),
-        check_kind_(check_kind),
-        must_do_null_check_(true) {
+                    dex_pc) {
+    SetPackedField<TypeCheckKindField>(check_kind);
+    SetPackedFlag<kFlagMustDoNullCheck>(true);
     SetRawInputAt(0, object);
     SetRawInputAt(1, constant);
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
   bool NeedsEnvironment() const OVERRIDE {
-    return false;
+    return CanCallRuntime(GetTypeCheckKind());
   }
 
-  bool IsExactCheck() const { return check_kind_ == TypeCheckKind::kExactCheck; }
-
-  TypeCheckKind GetTypeCheckKind() const { return check_kind_; }
-
   // Used only in code generation.
-  bool MustDoNullCheck() const { return must_do_null_check_; }
-  void ClearMustDoNullCheck() { must_do_null_check_ = false; }
+  bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); }
+  void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); }
+  TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); }
+  bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; }
+
+  static bool CanCallRuntime(TypeCheckKind check_kind) {
+    // Mips currently does runtime calls for any other checks.
+    return check_kind != TypeCheckKind::kExactCheck;
+  }
 
   static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) {
-    return (check_kind == TypeCheckKind::kExactCheck)
-        ? SideEffects::None()
-        // Mips currently does runtime calls for any other checks.
-        : SideEffects::CanTriggerGC();
+    return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None();
   }
 
   DECLARE_INSTRUCTION(InstanceOf);
 
  private:
-  const TypeCheckKind check_kind_;
-  bool must_do_null_check_;
+  static constexpr size_t kFieldTypeCheckKind = kNumberOfExpressionPackedBits;
+  static constexpr size_t kFieldTypeCheckKindSize =
+      MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));
+  static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize;
+  static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagMustDoNullCheck + 1;
+  static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+  using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;
 
   DISALLOW_COPY_AND_ASSIGN(HInstanceOf);
 };
 
-class HBoundType : public HExpression<1> {
+class HBoundType FINAL : public HExpression<1> {
  public:
-  // Constructs an HBoundType with the given upper_bound.
-  // Ensures that the upper_bound is valid.
-  HBoundType(HInstruction* input,
-             ReferenceTypeInfo upper_bound,
-             bool upper_can_be_null,
-             uint32_t dex_pc = kNoDexPc)
+  HBoundType(HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HExpression(Primitive::kPrimNot, SideEffects::None(), dex_pc),
-        upper_bound_(upper_bound),
-        upper_can_be_null_(upper_can_be_null),
-        can_be_null_(upper_can_be_null) {
+        upper_bound_(ReferenceTypeInfo::CreateInvalid()) {
+    SetPackedFlag<kFlagUpperCanBeNull>(true);
+    SetPackedFlag<kFlagCanBeNull>(true);
     DCHECK_EQ(input->GetType(), Primitive::kPrimNot);
     SetRawInputAt(0, input);
-    SetReferenceTypeInfo(upper_bound_);
   }
 
-  // GetUpper* should only be used in reference type propagation.
+  // {Get,Set}Upper* should only be used in reference type propagation.
   const ReferenceTypeInfo& GetUpperBound() const { return upper_bound_; }
-  bool GetUpperCanBeNull() const { return upper_can_be_null_; }
+  bool GetUpperCanBeNull() const { return GetPackedFlag<kFlagUpperCanBeNull>(); }
+  void SetUpperBound(const ReferenceTypeInfo& upper_bound, bool can_be_null);
 
   void SetCanBeNull(bool can_be_null) {
-    DCHECK(upper_can_be_null_ || !can_be_null);
-    can_be_null_ = can_be_null;
+    DCHECK(GetUpperCanBeNull() || !can_be_null);
+    SetPackedFlag<kFlagCanBeNull>(can_be_null);
   }
 
-  bool CanBeNull() const OVERRIDE { return can_be_null_; }
+  bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
 
   DECLARE_INSTRUCTION(BoundType);
 
  private:
+  // Represents the top constraint that can_be_null_ cannot exceed (i.e. if this
+  // is false then CanBeNull() cannot be true).
+  static constexpr size_t kFlagUpperCanBeNull = kNumberOfExpressionPackedBits;
+  static constexpr size_t kFlagCanBeNull = kFlagUpperCanBeNull + 1;
+  static constexpr size_t kNumberOfBoundTypePackedBits = kFlagCanBeNull + 1;
+  static_assert(kNumberOfBoundTypePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+
   // Encodes the most upper class that this instruction can have. In other words
   // it is always the case that GetUpperBound().IsSupertypeOf(GetReferenceType()).
   // It is used to bound the type in cases like:
   //   if (x instanceof ClassX) {
   //     // uper_bound_ will be ClassX
   //   }
-  const ReferenceTypeInfo upper_bound_;
-  // Represents the top constraint that can_be_null_ cannot exceed (i.e. if this
-  // is false then can_be_null_ cannot be true).
-  const bool upper_can_be_null_;
-  bool can_be_null_;
+  ReferenceTypeInfo upper_bound_;
 
   DISALLOW_COPY_AND_ASSIGN(HBoundType);
 };
 
-class HCheckCast : public HTemplateInstruction<2> {
+class HCheckCast FINAL : public HTemplateInstruction<2> {
  public:
   HCheckCast(HInstruction* object,
              HLoadClass* constant,
              TypeCheckKind check_kind,
              uint32_t dex_pc)
-      : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc),
-        check_kind_(check_kind),
-        must_do_null_check_(true) {
+      : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
+    SetPackedField<TypeCheckKindField>(check_kind);
+    SetPackedFlag<kFlagMustDoNullCheck>(true);
     SetRawInputAt(0, object);
     SetRawInputAt(1, constant);
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -5235,54 +6291,94 @@
 
   bool CanThrow() const OVERRIDE { return true; }
 
-  bool MustDoNullCheck() const { return must_do_null_check_; }
-  void ClearMustDoNullCheck() { must_do_null_check_ = false; }
-  TypeCheckKind GetTypeCheckKind() const { return check_kind_; }
-
-  bool IsExactCheck() const { return check_kind_ == TypeCheckKind::kExactCheck; }
+  bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); }
+  void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); }
+  TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); }
+  bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; }
 
   DECLARE_INSTRUCTION(CheckCast);
 
  private:
-  const TypeCheckKind check_kind_;
-  bool must_do_null_check_;
+  static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldTypeCheckKindSize =
+      MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));
+  static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize;
+  static constexpr size_t kNumberOfCheckCastPackedBits = kFlagMustDoNullCheck + 1;
+  static_assert(kNumberOfCheckCastPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+  using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;
 
   DISALLOW_COPY_AND_ASSIGN(HCheckCast);
 };
 
-class HMemoryBarrier : public HTemplateInstruction<0> {
+/**
+ * @brief Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers").
+ * @details We define the combined barrier types that are actually required
+ * by the Java Memory Model, rather than using exactly the terminology from
+ * the JSR-133 cookbook.  These should, in many cases, be replaced by acquire/release
+ * primitives.  Note that the JSR-133 cookbook generally does not deal with
+ * store atomicity issues, and the recipes there are not always entirely sufficient.
+ * The current recipe is as follows:
+ * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store.
+ * -# Use AnyAny barrier after volatile store.  (StoreLoad is as expensive.)
+ * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrier after each volatile load.
+ * -# Use StoreStore barrier after all stores but before return from any constructor whose
+ *    class has final fields.
+ * -# Use NTStoreStore to order non-temporal stores with respect to all later
+ *    store-to-memory instructions.  Only generated together with non-temporal stores.
+ */
+enum MemBarrierKind {
+  kAnyStore,
+  kLoadAny,
+  kStoreStore,
+  kAnyAny,
+  kNTStoreStore,
+  kLastBarrierKind = kNTStoreStore
+};
+std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind);
+
+class HMemoryBarrier FINAL : public HTemplateInstruction<0> {
  public:
   explicit HMemoryBarrier(MemBarrierKind barrier_kind, uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(
-            SideEffects::AllWritesAndReads(), dex_pc),  // Assume write/read on all fields/arrays.
-        barrier_kind_(barrier_kind) {}
+            SideEffects::AllWritesAndReads(), dex_pc) {  // Assume write/read on all fields/arrays.
+    SetPackedField<BarrierKindField>(barrier_kind);
+  }
 
-  MemBarrierKind GetBarrierKind() { return barrier_kind_; }
+  MemBarrierKind GetBarrierKind() { return GetPackedField<BarrierKindField>(); }
 
   DECLARE_INSTRUCTION(MemoryBarrier);
 
  private:
-  const MemBarrierKind barrier_kind_;
+  static constexpr size_t kFieldBarrierKind = HInstruction::kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldBarrierKindSize =
+      MinimumBitsToStore(static_cast<size_t>(kLastBarrierKind));
+  static constexpr size_t kNumberOfMemoryBarrierPackedBits =
+      kFieldBarrierKind + kFieldBarrierKindSize;
+  static_assert(kNumberOfMemoryBarrierPackedBits <= kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+  using BarrierKindField = BitField<MemBarrierKind, kFieldBarrierKind, kFieldBarrierKindSize>;
 
   DISALLOW_COPY_AND_ASSIGN(HMemoryBarrier);
 };
 
-class HMonitorOperation : public HTemplateInstruction<1> {
+class HMonitorOperation FINAL : public HTemplateInstruction<1> {
  public:
-  enum OperationKind {
+  enum class OperationKind {
     kEnter,
     kExit,
+    kLast = kExit
   };
 
   HMonitorOperation(HInstruction* object, OperationKind kind, uint32_t dex_pc)
     : HTemplateInstruction(
-          SideEffects::AllExceptGCDependency(), dex_pc),  // Assume write/read on all fields/arrays.
-      kind_(kind) {
+          SideEffects::AllExceptGCDependency(),  // Assume write/read on all fields/arrays.
+          dex_pc) {
+    SetPackedField<OperationKindField>(kind);
     SetRawInputAt(0, object);
   }
 
-  // Instruction may throw a Java exception, so we need an environment.
-  bool NeedsEnvironment() const OVERRIDE { return CanThrow(); }
+  // Instruction may go into runtime, so we need an environment.
+  bool NeedsEnvironment() const OVERRIDE { return true; }
 
   bool CanThrow() const OVERRIDE {
     // Verifier guarantees that monitor-exit cannot throw.
@@ -5291,36 +6387,60 @@
     return IsEnter();
   }
 
-
-  bool IsEnter() const { return kind_ == kEnter; }
+  OperationKind GetOperationKind() const { return GetPackedField<OperationKindField>(); }
+  bool IsEnter() const { return GetOperationKind() == OperationKind::kEnter; }
 
   DECLARE_INSTRUCTION(MonitorOperation);
 
  private:
-  const OperationKind kind_;
+  static constexpr size_t kFieldOperationKind = HInstruction::kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldOperationKindSize =
+      MinimumBitsToStore(static_cast<size_t>(OperationKind::kLast));
+  static constexpr size_t kNumberOfMonitorOperationPackedBits =
+      kFieldOperationKind + kFieldOperationKindSize;
+  static_assert(kNumberOfMonitorOperationPackedBits <= HInstruction::kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+  using OperationKindField = BitField<OperationKind, kFieldOperationKind, kFieldOperationKindSize>;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HMonitorOperation);
 };
 
-/**
- * A HInstruction used as a marker for the replacement of new + <init>
- * of a String to a call to a StringFactory. Only baseline will see
- * the node at code generation, where it will be be treated as null.
- * When compiling non-baseline, `HFakeString` instructions are being removed
- * in the instruction simplifier.
- */
-class HFakeString : public HTemplateInstruction<0> {
+class HSelect FINAL : public HExpression<3> {
  public:
-  explicit HFakeString(uint32_t dex_pc = kNoDexPc)
-      : HTemplateInstruction(SideEffects::None(), dex_pc) {}
+  HSelect(HInstruction* condition,
+          HInstruction* true_value,
+          HInstruction* false_value,
+          uint32_t dex_pc)
+      : HExpression(HPhi::ToPhiType(true_value->GetType()), SideEffects::None(), dex_pc) {
+    DCHECK_EQ(HPhi::ToPhiType(true_value->GetType()), HPhi::ToPhiType(false_value->GetType()));
 
-  Primitive::Type GetType() const OVERRIDE { return Primitive::kPrimNot; }
+    // First input must be `true_value` or `false_value` to allow codegens to
+    // use the SameAsFirstInput allocation policy. We make it `false_value`, so
+    // that architectures which implement HSelect as a conditional move also
+    // will not need to invert the condition.
+    SetRawInputAt(0, false_value);
+    SetRawInputAt(1, true_value);
+    SetRawInputAt(2, condition);
+  }
 
-  DECLARE_INSTRUCTION(FakeString);
+  HInstruction* GetFalseValue() const { return InputAt(0); }
+  HInstruction* GetTrueValue() const { return InputAt(1); }
+  HInstruction* GetCondition() const { return InputAt(2); }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
+
+  bool CanBeNull() const OVERRIDE {
+    return GetTrueValue()->CanBeNull() || GetFalseValue()->CanBeNull();
+  }
+
+  DECLARE_INSTRUCTION(Select);
 
  private:
-  DISALLOW_COPY_AND_ASSIGN(HFakeString);
+  DISALLOW_COPY_AND_ASSIGN(HSelect);
 };
 
 class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> {
@@ -5352,8 +6472,8 @@
   }
 
   bool IsPending() const {
-    DCHECK(!source_.IsInvalid() || destination_.IsInvalid());
-    return destination_.IsInvalid() && !source_.IsInvalid();
+    DCHECK(source_.IsValid() || destination_.IsInvalid());
+    return destination_.IsInvalid() && source_.IsValid();
   }
 
   // True if this blocks a move from the given location.
@@ -5397,9 +6517,11 @@
   HInstruction* instruction_;
 };
 
+std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs);
+
 static constexpr size_t kDefaultNumberOfMoves = 4;
 
-class HParallelMove : public HTemplateInstruction<0> {
+class HParallelMove FINAL : public HTemplateInstruction<0> {
  public:
   explicit HParallelMove(ArenaAllocator* arena, uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::None(), dex_pc),
@@ -5457,9 +6579,18 @@
 
 }  // namespace art
 
+#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
+#include "nodes_shared.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "nodes_arm.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "nodes_arm64.h"
 #endif
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "nodes_mips.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_x86
 #include "nodes_x86.h"
 #endif
@@ -5669,19 +6800,38 @@
 };
 
 inline int64_t Int64FromConstant(HConstant* constant) {
-  DCHECK(constant->IsIntConstant() || constant->IsLongConstant());
-  return constant->IsIntConstant() ? constant->AsIntConstant()->GetValue()
-                                   : constant->AsLongConstant()->GetValue();
+  if (constant->IsIntConstant()) {
+    return constant->AsIntConstant()->GetValue();
+  } else if (constant->IsLongConstant()) {
+    return constant->AsLongConstant()->GetValue();
+  } else {
+    DCHECK(constant->IsNullConstant()) << constant->DebugName();
+    return 0;
+  }
 }
 
-inline bool IsSameDexFile(const DexFile& lhs, const DexFile& rhs) {
-  // For the purposes of the compiler, the dex files must actually be the same object
-  // if we want to safely treat them as the same. This is especially important for JIT
-  // as custom class loaders can open the same underlying file (or memory) multiple
-  // times and provide different class resolution but no two class loaders should ever
-  // use the same DexFile object - doing so is an unsupported hack that can lead to
-  // all sorts of weird failures.
-  return &lhs == &rhs;
+#define INSTRUCTION_TYPE_CHECK(type, super)                                    \
+  inline bool HInstruction::Is##type() const { return GetKind() == k##type; }  \
+  inline const H##type* HInstruction::As##type() const {                       \
+    return Is##type() ? down_cast<const H##type*>(this) : nullptr;             \
+  }                                                                            \
+  inline H##type* HInstruction::As##type() {                                   \
+    return Is##type() ? static_cast<H##type*>(this) : nullptr;                 \
+  }
+
+  FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
+#undef INSTRUCTION_TYPE_CHECK
+
+// Create space in `blocks` for adding `number_of_new_blocks` entries
+// starting at location `at`. Blocks after `at` are moved accordingly.
+inline void MakeRoomFor(ArenaVector<HBasicBlock*>* blocks,
+                        size_t number_of_new_blocks,
+                        size_t after) {
+  DCHECK_LT(after, blocks->size());
+  size_t old_size = blocks->size();
+  size_t new_size = old_size + number_of_new_blocks;
+  blocks->resize(new_size);
+  std::copy_backward(blocks->begin() + after + 1u, blocks->begin() + old_size, blocks->end());
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h
new file mode 100644
index 0000000..d9f9740e
--- /dev/null
+++ b/compiler/optimizing/nodes_arm.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM_H_
+#define ART_COMPILER_OPTIMIZING_NODES_ARM_H_
+
+namespace art {
+
+class HArmDexCacheArraysBase FINAL : public HExpression<0> {
+ public:
+  explicit HArmDexCacheArraysBase(const DexFile& dex_file)
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc),
+        dex_file_(&dex_file),
+        element_offset_(static_cast<size_t>(-1)) { }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+
+  void UpdateElementOffset(size_t element_offset) {
+    // Use the lowest offset from the requested elements so that all offsets from
+    // this base are non-negative because our assemblers emit negative-offset loads
+    // as a sequence of two or more instructions. (However, positive offsets beyond
+    // 4KiB also require two or more instructions, so this simple heuristic could
+    // be improved for cases where there is a dense cluster of elements far from
+    // the lowest offset. This is expected to be rare enough though, so we choose
+    // not to spend compile time on elaborate calculations.)
+    element_offset_ = std::min(element_offset_, element_offset);
+  }
+
+  const DexFile& GetDexFile() const {
+    return *dex_file_;
+  }
+
+  size_t GetElementOffset() const {
+    return element_offset_;
+  }
+
+  DECLARE_INSTRUCTION(ArmDexCacheArraysBase);
+
+ private:
+  const DexFile* dex_file_;
+  size_t element_offset_;
+
+  DISALLOW_COPY_AND_ASSIGN(HArmDexCacheArraysBase);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_NODES_ARM_H_
diff --git a/compiler/optimizing/nodes_arm64.cc b/compiler/optimizing/nodes_arm64.cc
new file mode 100644
index 0000000..ac2f093
--- /dev/null
+++ b/compiler/optimizing/nodes_arm64.cc
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_arm64.h"
+#include "nodes.h"
+
+namespace art {
+
+using arm64::helpers::CanFitInShifterOperand;
+
+void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction,
+                                                           /*out*/OpKind* op_kind,
+                                                           /*out*/int* shift_amount) {
+  DCHECK(CanFitInShifterOperand(instruction));
+  if (instruction->IsShl()) {
+    *op_kind = kLSL;
+    *shift_amount = instruction->AsShl()->GetRight()->AsIntConstant()->GetValue();
+  } else if (instruction->IsShr()) {
+    *op_kind = kASR;
+    *shift_amount = instruction->AsShr()->GetRight()->AsIntConstant()->GetValue();
+  } else if (instruction->IsUShr()) {
+    *op_kind = kLSR;
+    *shift_amount = instruction->AsUShr()->GetRight()->AsIntConstant()->GetValue();
+  } else {
+    DCHECK(instruction->IsTypeConversion());
+    Primitive::Type result_type = instruction->AsTypeConversion()->GetResultType();
+    Primitive::Type input_type = instruction->AsTypeConversion()->GetInputType();
+    int result_size = Primitive::ComponentSize(result_type);
+    int input_size = Primitive::ComponentSize(input_type);
+    int min_size = std::min(result_size, input_size);
+    // This follows the logic in
+    // `InstructionCodeGeneratorARM64::VisitTypeConversion()`.
+    if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
+      // There is actually nothing to do. The register will be used as a W
+      // register, discarding the top bits. This is represented by the default
+      // encoding 'LSL 0'.
+      *op_kind = kLSL;
+      *shift_amount = 0;
+    } else if (result_type == Primitive::kPrimChar ||
+               (input_type == Primitive::kPrimChar && input_size < result_size)) {
+      *op_kind = kUXTH;
+    } else {
+      switch (min_size) {
+        case 1: *op_kind = kSXTB; break;
+        case 2: *op_kind = kSXTH; break;
+        case 4: *op_kind = kSXTW; break;
+        default:
+          LOG(FATAL) << "Unexpected min size " << min_size;
+      }
+    }
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op) {
+  switch (op) {
+    case HArm64DataProcWithShifterOp::kLSL:  return os << "LSL";
+    case HArm64DataProcWithShifterOp::kLSR:  return os << "LSR";
+    case HArm64DataProcWithShifterOp::kASR:  return os << "ASR";
+    case HArm64DataProcWithShifterOp::kUXTB: return os << "UXTB";
+    case HArm64DataProcWithShifterOp::kUXTH: return os << "UXTH";
+    case HArm64DataProcWithShifterOp::kUXTW: return os << "UXTW";
+    case HArm64DataProcWithShifterOp::kSXTB: return os << "SXTB";
+    case HArm64DataProcWithShifterOp::kSXTH: return os << "SXTH";
+    case HArm64DataProcWithShifterOp::kSXTW: return os << "SXTW";
+    default:
+      LOG(FATAL) << "Invalid OpKind " << static_cast<int>(op);
+      UNREACHABLE();
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 885d3a2..3f88717 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -17,31 +17,83 @@
 #ifndef ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
 #define ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
 
+#include "nodes.h"
+
 namespace art {
 
-// This instruction computes an intermediate address pointing in the 'middle' of an object. The
-// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
-// never used across anything that can trigger GC.
-class HArm64IntermediateAddress : public HExpression<2> {
+class HArm64DataProcWithShifterOp FINAL : public HExpression<2> {
  public:
-  HArm64IntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
-      : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) {
-    SetRawInputAt(0, base_address);
-    SetRawInputAt(1, offset);
+  enum OpKind {
+    kLSL,   // Logical shift left.
+    kLSR,   // Logical shift right.
+    kASR,   // Arithmetic shift right.
+    kUXTB,  // Unsigned extend byte.
+    kUXTH,  // Unsigned extend half-word.
+    kUXTW,  // Unsigned extend word.
+    kSXTB,  // Signed extend byte.
+    kSXTH,  // Signed extend half-word.
+    kSXTW,  // Signed extend word.
+
+    // Aliases.
+    kFirstShiftOp = kLSL,
+    kLastShiftOp = kASR,
+    kFirstExtensionOp = kUXTB,
+    kLastExtensionOp = kSXTW
+  };
+  HArm64DataProcWithShifterOp(HInstruction* instr,
+                              HInstruction* left,
+                              HInstruction* right,
+                              OpKind op,
+                              // The shift argument is unused if the operation
+                              // is an extension.
+                              int shift = 0,
+                              uint32_t dex_pc = kNoDexPc)
+      : HExpression(instr->GetType(), SideEffects::None(), dex_pc),
+        instr_kind_(instr->GetKind()), op_kind_(op), shift_amount_(shift) {
+    DCHECK(!instr->HasSideEffects());
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other_instr) const OVERRIDE {
+    const HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp();
+    return instr_kind_ == other->instr_kind_ &&
+        op_kind_ == other->op_kind_ &&
+        shift_amount_ == other->shift_amount_;
+  }
 
-  HInstruction* GetBaseAddress() const { return InputAt(0); }
-  HInstruction* GetOffset() const { return InputAt(1); }
+  static bool IsShiftOp(OpKind op_kind) {
+    return kFirstShiftOp <= op_kind && op_kind <= kLastShiftOp;
+  }
 
-  DECLARE_INSTRUCTION(Arm64IntermediateAddress);
+  static bool IsExtensionOp(OpKind op_kind) {
+    return kFirstExtensionOp <= op_kind && op_kind <= kLastExtensionOp;
+  }
+
+  // Find the operation kind and shift amount from a bitfield move instruction.
+  static void GetOpInfoFromInstruction(HInstruction* bitfield_op,
+                                       /*out*/OpKind* op_kind,
+                                       /*out*/int* shift_amount);
+
+  InstructionKind GetInstrKind() const { return instr_kind_; }
+  OpKind GetOpKind() const { return op_kind_; }
+  int GetShiftAmount() const { return shift_amount_; }
+
+  DECLARE_INSTRUCTION(Arm64DataProcWithShifterOp);
 
  private:
-  DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
+  InstructionKind instr_kind_;
+  OpKind op_kind_;
+  int shift_amount_;
+
+  friend std::ostream& operator<<(std::ostream& os, OpKind op);
+
+  DISALLOW_COPY_AND_ASSIGN(HArm64DataProcWithShifterOp);
 };
 
+std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op);
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h
new file mode 100644
index 0000000..de77245
--- /dev/null
+++ b/compiler/optimizing/nodes_mips.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_MIPS_H_
+#define ART_COMPILER_OPTIMIZING_NODES_MIPS_H_
+
+namespace art {
+
+// Compute the address of the method for MIPS Constant area support.
+class HMipsComputeBaseMethodAddress : public HExpression<0> {
+ public:
+  // Treat the value as an int32_t, but it is really a 32 bit native pointer.
+  HMipsComputeBaseMethodAddress()
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc) {}
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+
+  DECLARE_INSTRUCTION(MipsComputeBaseMethodAddress);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HMipsComputeBaseMethodAddress);
+};
+
+class HMipsDexCacheArraysBase : public HExpression<0> {
+ public:
+  explicit HMipsDexCacheArraysBase(const DexFile& dex_file)
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc),
+        dex_file_(&dex_file),
+        element_offset_(static_cast<size_t>(-1)) { }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+
+  void UpdateElementOffset(size_t element_offset) {
+    // We'll maximize the range of a single load instruction for dex cache array accesses
+    // by aligning offset -32768 with the offset of the first used element.
+    element_offset_ = std::min(element_offset_, element_offset);
+  }
+
+  const DexFile& GetDexFile() const {
+    return *dex_file_;
+  }
+
+  size_t GetElementOffset() const {
+    return element_offset_;
+  }
+
+  DECLARE_INSTRUCTION(MipsDexCacheArraysBase);
+
+ private:
+  const DexFile* dex_file_;
+  size_t element_offset_;
+
+  DISALLOW_COPY_AND_ASSIGN(HMipsDexCacheArraysBase);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_NODES_MIPS_H_
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
new file mode 100644
index 0000000..8bd8667
--- /dev/null
+++ b/compiler/optimizing/nodes_shared.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
+#define ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
+
+namespace art {
+
+class HMultiplyAccumulate FINAL : public HExpression<3> {
+ public:
+  HMultiplyAccumulate(Primitive::Type type,
+                      InstructionKind op,
+                      HInstruction* accumulator,
+                      HInstruction* mul_left,
+                      HInstruction* mul_right,
+                      uint32_t dex_pc = kNoDexPc)
+      : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) {
+    SetRawInputAt(kInputAccumulatorIndex, accumulator);
+    SetRawInputAt(kInputMulLeftIndex, mul_left);
+    SetRawInputAt(kInputMulRightIndex, mul_right);
+  }
+
+  static constexpr int kInputAccumulatorIndex = 0;
+  static constexpr int kInputMulLeftIndex = 1;
+  static constexpr int kInputMulRightIndex = 2;
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    return op_kind_ == other->AsMultiplyAccumulate()->op_kind_;
+  }
+
+  InstructionKind GetOpKind() const { return op_kind_; }
+
+  DECLARE_INSTRUCTION(MultiplyAccumulate);
+
+ private:
+  // Indicates if this is a MADD or MSUB.
+  const InstructionKind op_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(HMultiplyAccumulate);
+};
+
+class HBitwiseNegatedRight FINAL : public HBinaryOperation {
+ public:
+  HBitwiseNegatedRight(Primitive::Type result_type,
+                            InstructionKind op,
+                            HInstruction* left,
+                            HInstruction* right,
+                            uint32_t dex_pc = kNoDexPc)
+    : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc),
+      op_kind_(op) {
+    DCHECK(op == HInstruction::kAnd || op == HInstruction::kOr || op == HInstruction::kXor) << op;
+  }
+
+  template <typename T, typename U>
+  auto Compute(T x, U y) const -> decltype(x & ~y) {
+    static_assert(std::is_same<decltype(x & ~y), decltype(x | ~y)>::value &&
+                  std::is_same<decltype(x & ~y), decltype(x ^ ~y)>::value,
+                  "Inconsistent negated bitwise types");
+    switch (op_kind_) {
+      case HInstruction::kAnd:
+        return x & ~y;
+      case HInstruction::kOr:
+        return x | ~y;
+      case HInstruction::kXor:
+        return x ^ ~y;
+      default:
+        LOG(FATAL) << "Unreachable";
+        UNREACHABLE();
+    }
+  }
+
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetLongConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
+
+  InstructionKind GetOpKind() const { return op_kind_; }
+
+  DECLARE_INSTRUCTION(BitwiseNegatedRight);
+
+ private:
+  // Specifies the bitwise operation, which will be then negated.
+  const InstructionKind op_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(HBitwiseNegatedRight);
+};
+
+
+// This instruction computes an intermediate address pointing in the 'middle' of an object. The
+// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
+// never used across anything that can trigger GC.
+class HIntermediateAddress FINAL : public HExpression<2> {
+ public:
+  HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
+      : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) {
+    SetRawInputAt(0, base_address);
+    SetRawInputAt(1, offset);
+  }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
+  bool IsActualObject() const OVERRIDE { return false; }
+
+  HInstruction* GetBaseAddress() const { return InputAt(0); }
+  HInstruction* GetOffset() const { return InputAt(1); }
+
+  DECLARE_INSTRUCTION(IntermediateAddress);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress);
+};
+
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 764f5fe..d4e2a58 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -91,7 +91,7 @@
   entry->InsertInstructionBefore(to_insert, parameter2);
 
   ASSERT_TRUE(parameter1->HasUses());
-  ASSERT_TRUE(parameter1->GetUses().HasOnlyOneUse());
+  ASSERT_TRUE(parameter1->GetUses().HasExactlyOneElement());
 }
 
 /**
@@ -115,7 +115,7 @@
   entry->AddInstruction(to_add);
 
   ASSERT_TRUE(parameter->HasUses());
-  ASSERT_TRUE(parameter->GetUses().HasOnlyOneUse());
+  ASSERT_TRUE(parameter->GetUses().HasExactlyOneElement());
 }
 
 TEST(Node, ParentEnvironment) {
@@ -134,7 +134,7 @@
   entry->AddInstruction(new (&allocator) HExit());
 
   ASSERT_TRUE(parameter1->HasUses());
-  ASSERT_TRUE(parameter1->GetUses().HasOnlyOneUse());
+  ASSERT_TRUE(parameter1->GetUses().HasExactlyOneElement());
 
   HEnvironment* environment = new (&allocator) HEnvironment(
       &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0, kStatic, with_environment);
@@ -145,7 +145,7 @@
   with_environment->SetRawEnvironment(environment);
 
   ASSERT_TRUE(parameter1->HasEnvironmentUses());
-  ASSERT_TRUE(parameter1->GetEnvUses().HasOnlyOneUse());
+  ASSERT_TRUE(parameter1->GetEnvUses().HasExactlyOneElement());
 
   HEnvironment* parent1 = new (&allocator) HEnvironment(
       &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0, kStatic, nullptr);
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index 556217b..fa47976 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -20,12 +20,14 @@
 namespace art {
 
 // Compute the address of the method for X86 Constant area support.
-class HX86ComputeBaseMethodAddress : public HExpression<0> {
+class HX86ComputeBaseMethodAddress FINAL : public HExpression<0> {
  public:
   // Treat the value as an int32_t, but it is really a 32 bit native pointer.
   HX86ComputeBaseMethodAddress()
       : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc) {}
 
+  bool CanBeMoved() const OVERRIDE { return true; }
+
   DECLARE_INSTRUCTION(X86ComputeBaseMethodAddress);
 
  private:
@@ -33,19 +35,15 @@
 };
 
 // Load a constant value from the constant table.
-class HX86LoadFromConstantTable : public HExpression<2> {
+class HX86LoadFromConstantTable FINAL : public HExpression<2> {
  public:
   HX86LoadFromConstantTable(HX86ComputeBaseMethodAddress* method_base,
-                            HConstant* constant,
-                            bool needs_materialization = true)
-      : HExpression(constant->GetType(), SideEffects::None(), kNoDexPc),
-        needs_materialization_(needs_materialization) {
+                            HConstant* constant)
+      : HExpression(constant->GetType(), SideEffects::None(), kNoDexPc) {
     SetRawInputAt(0, method_base);
     SetRawInputAt(1, constant);
   }
 
-  bool NeedsMaterialization() const { return needs_materialization_; }
-
   HX86ComputeBaseMethodAddress* GetBaseMethodAddress() const {
     return InputAt(0)->AsX86ComputeBaseMethodAddress();
   }
@@ -57,13 +55,30 @@
   DECLARE_INSTRUCTION(X86LoadFromConstantTable);
 
  private:
-  const bool needs_materialization_;
-
   DISALLOW_COPY_AND_ASSIGN(HX86LoadFromConstantTable);
 };
 
+// Version of HNeg with access to the constant table for FP types.
+class HX86FPNeg FINAL : public HExpression<2> {
+ public:
+  HX86FPNeg(Primitive::Type result_type,
+            HInstruction* input,
+            HX86ComputeBaseMethodAddress* method_base,
+            uint32_t dex_pc)
+      : HExpression(result_type, SideEffects::None(), dex_pc) {
+    DCHECK(Primitive::IsFloatingPointType(result_type));
+    SetRawInputAt(0, input);
+    SetRawInputAt(1, method_base);
+  }
+
+  DECLARE_INSTRUCTION(X86FPNeg);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HX86FPNeg);
+};
+
 // X86 version of HPackedSwitch that holds a pointer to the base method address.
-class HX86PackedSwitch : public HTemplateInstruction<2> {
+class HX86PackedSwitch FINAL : public HTemplateInstruction<2> {
  public:
   HX86PackedSwitch(int32_t start_value,
                    int32_t num_entries,
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index 2f59d4c..0819fb0 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -37,7 +37,10 @@
 
   virtual ~HOptimization() {}
 
-  // Return the name of the pass.
+  // Return the name of the pass. Pass names for a single HOptimization should be of form
+  // <optimization_name> or <optimization_name>$<pass_name> for common <optimization_name> prefix.
+  // Example: 'instruction_simplifier', 'instruction_simplifier$after_bce',
+  // 'instruction_simplifier$before_codegen'.
   const char* GetPassName() const { return pass_name_; }
 
   // Perform the analysis itself.
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index 05c6b2c..8c0231e 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -23,65 +23,78 @@
 #include "optimizing/code_generator.h"
 #include "optimizing/optimizing_unit_test.h"
 #include "utils/assembler.h"
+#include "utils/arm/assembler_thumb2.h"
+#include "utils/mips/assembler_mips.h"
+#include "utils/mips64/assembler_mips64.h"
 
 #include "optimizing/optimizing_cfi_test_expected.inc"
 
 namespace art {
 
 // Run the tests only on host.
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 
 class OptimizingCFITest : public CFITest {
  public:
   // Enable this flag to generate the expected outputs.
   static constexpr bool kGenerateExpected = false;
 
-  void TestImpl(InstructionSet isa, const char* isa_str,
-                const std::vector<uint8_t>& expected_asm,
-                const std::vector<uint8_t>& expected_cfi) {
+  OptimizingCFITest()
+      : pool_(),
+        allocator_(&pool_),
+        opts_(),
+        isa_features_(),
+        graph_(nullptr),
+        code_gen_(),
+        blocks_(allocator_.Adapter()) {}
+
+  void SetUpFrame(InstructionSet isa) {
     // Setup simple context.
-    ArenaPool pool;
-    ArenaAllocator allocator(&pool);
-    CompilerOptions opts;
-    std::unique_ptr<const InstructionSetFeatures> isa_features;
     std::string error;
-    isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
-    HGraph* graph = CreateGraph(&allocator);
+    isa_features_.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
+    graph_ = CreateGraph(&allocator_);
     // Generate simple frame with some spills.
-    std::unique_ptr<CodeGenerator> code_gen(
-        CodeGenerator::Create(graph, isa, *isa_features.get(), opts));
+    code_gen_ = CodeGenerator::Create(graph_, isa, *isa_features_, opts_);
+    code_gen_->GetAssembler()->cfi().SetEnabled(true);
     const int frame_size = 64;
     int core_reg = 0;
     int fp_reg = 0;
     for (int i = 0; i < 2; i++) {  // Two registers of each kind.
       for (; core_reg < 32; core_reg++) {
-        if (code_gen->IsCoreCalleeSaveRegister(core_reg)) {
+        if (code_gen_->IsCoreCalleeSaveRegister(core_reg)) {
           auto location = Location::RegisterLocation(core_reg);
-          code_gen->AddAllocatedRegister(location);
+          code_gen_->AddAllocatedRegister(location);
           core_reg++;
           break;
         }
       }
       for (; fp_reg < 32; fp_reg++) {
-        if (code_gen->IsFloatingPointCalleeSaveRegister(fp_reg)) {
+        if (code_gen_->IsFloatingPointCalleeSaveRegister(fp_reg)) {
           auto location = Location::FpuRegisterLocation(fp_reg);
-          code_gen->AddAllocatedRegister(location);
+          code_gen_->AddAllocatedRegister(location);
           fp_reg++;
           break;
         }
       }
     }
-    ArenaVector<HBasicBlock*> blocks(allocator.Adapter());
-    code_gen->block_order_ = &blocks;
-    code_gen->ComputeSpillMask();
-    code_gen->SetFrameSize(frame_size);
-    code_gen->GenerateFrameEntry();
-    code_gen->GenerateFrameExit();
+    code_gen_->block_order_ = &blocks_;
+    code_gen_->ComputeSpillMask();
+    code_gen_->SetFrameSize(frame_size);
+    code_gen_->GenerateFrameEntry();
+  }
+
+  void Finish() {
+    code_gen_->GenerateFrameExit();
+    code_gen_->Finalize(&code_allocator_);
+  }
+
+  void Check(InstructionSet isa,
+             const char* isa_str,
+             const std::vector<uint8_t>& expected_asm,
+             const std::vector<uint8_t>& expected_cfi) {
     // Get the outputs.
-    InternalCodeAllocator code_allocator;
-    code_gen->Finalize(&code_allocator);
-    const std::vector<uint8_t>& actual_asm = code_allocator.GetMemory();
-    Assembler* opt_asm = code_gen->GetAssembler();
+    const std::vector<uint8_t>& actual_asm = code_allocator_.GetMemory();
+    Assembler* opt_asm = code_gen_->GetAssembler();
     const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data());
 
     if (kGenerateExpected) {
@@ -92,6 +105,19 @@
     }
   }
 
+  void TestImpl(InstructionSet isa, const char*
+                isa_str,
+                const std::vector<uint8_t>& expected_asm,
+                const std::vector<uint8_t>& expected_cfi) {
+    SetUpFrame(isa);
+    Finish();
+    Check(isa, isa_str, expected_asm, expected_cfi);
+  }
+
+  CodeGenerator* GetCodeGenerator() {
+    return code_gen_.get();
+  }
+
  private:
   class InternalCodeAllocator : public CodeAllocator {
    public:
@@ -109,22 +135,130 @@
 
     DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
   };
+
+  ArenaPool pool_;
+  ArenaAllocator allocator_;
+  CompilerOptions opts_;
+  std::unique_ptr<const InstructionSetFeatures> isa_features_;
+  HGraph* graph_;
+  std::unique_ptr<CodeGenerator> code_gen_;
+  ArenaVector<HBasicBlock*> blocks_;
+  InternalCodeAllocator code_allocator_;
 };
 
-#define TEST_ISA(isa) \
-  TEST_F(OptimizingCFITest, isa) { \
-    std::vector<uint8_t> expected_asm(expected_asm_##isa, \
-        expected_asm_##isa + arraysize(expected_asm_##isa)); \
-    std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
-        expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
-    TestImpl(isa, #isa, expected_asm, expected_cfi); \
+#define TEST_ISA(isa)                                         \
+  TEST_F(OptimizingCFITest, isa) {                            \
+    std::vector<uint8_t> expected_asm(                        \
+        expected_asm_##isa,                                   \
+        expected_asm_##isa + arraysize(expected_asm_##isa));  \
+    std::vector<uint8_t> expected_cfi(                        \
+        expected_cfi_##isa,                                   \
+        expected_cfi_##isa + arraysize(expected_cfi_##isa));  \
+    TestImpl(isa, #isa, expected_asm, expected_cfi);          \
   }
 
+#ifdef ART_ENABLE_CODEGEN_arm
 TEST_ISA(kThumb2)
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
 TEST_ISA(kArm64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
 TEST_ISA(kX86)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
 TEST_ISA(kX86_64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+TEST_ISA(kMips)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+TEST_ISA(kMips64)
+#endif
 
-#endif  // __ANDROID__
+#ifdef ART_ENABLE_CODEGEN_arm
+TEST_F(OptimizingCFITest, kThumb2Adjust) {
+  std::vector<uint8_t> expected_asm(
+      expected_asm_kThumb2_adjust,
+      expected_asm_kThumb2_adjust + arraysize(expected_asm_kThumb2_adjust));
+  std::vector<uint8_t> expected_cfi(
+      expected_cfi_kThumb2_adjust,
+      expected_cfi_kThumb2_adjust + arraysize(expected_cfi_kThumb2_adjust));
+  SetUpFrame(kThumb2);
+#define __ down_cast<arm::Thumb2Assembler*>(GetCodeGenerator()->GetAssembler())->
+  Label target;
+  __ CompareAndBranchIfZero(arm::R0, &target);
+  // Push the target out of range of CBZ.
+  for (size_t i = 0; i != 65; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ Bind(&target);
+#undef __
+  Finish();
+  Check(kThumb2, "kThumb2_adjust", expected_asm, expected_cfi);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips
+TEST_F(OptimizingCFITest, kMipsAdjust) {
+  // One NOP in delay slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
+  static constexpr size_t kNumNops = 1u + (1u << 15);
+  std::vector<uint8_t> expected_asm(
+      expected_asm_kMips_adjust_head,
+      expected_asm_kMips_adjust_head + arraysize(expected_asm_kMips_adjust_head));
+  expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u);
+  expected_asm.insert(
+      expected_asm.end(),
+      expected_asm_kMips_adjust_tail,
+      expected_asm_kMips_adjust_tail + arraysize(expected_asm_kMips_adjust_tail));
+  std::vector<uint8_t> expected_cfi(
+      expected_cfi_kMips_adjust,
+      expected_cfi_kMips_adjust + arraysize(expected_cfi_kMips_adjust));
+  SetUpFrame(kMips);
+#define __ down_cast<mips::MipsAssembler*>(GetCodeGenerator()->GetAssembler())->
+  mips::MipsLabel target;
+  __ Beqz(mips::A0, &target);
+  // Push the target out of range of BEQZ.
+  for (size_t i = 0; i != kNumNops; ++i) {
+    __ Nop();
+  }
+  __ Bind(&target);
+#undef __
+  Finish();
+  Check(kMips, "kMips_adjust", expected_asm, expected_cfi);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
+TEST_F(OptimizingCFITest, kMips64Adjust) {
+  // One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
+  static constexpr size_t kNumNops = 1u + (1u << 15);
+  std::vector<uint8_t> expected_asm(
+      expected_asm_kMips64_adjust_head,
+      expected_asm_kMips64_adjust_head + arraysize(expected_asm_kMips64_adjust_head));
+  expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u);
+  expected_asm.insert(
+      expected_asm.end(),
+      expected_asm_kMips64_adjust_tail,
+      expected_asm_kMips64_adjust_tail + arraysize(expected_asm_kMips64_adjust_tail));
+  std::vector<uint8_t> expected_cfi(
+      expected_cfi_kMips64_adjust,
+      expected_cfi_kMips64_adjust + arraysize(expected_cfi_kMips64_adjust));
+  SetUpFrame(kMips64);
+#define __ down_cast<mips64::Mips64Assembler*>(GetCodeGenerator()->GetAssembler())->
+  mips64::Mips64Label target;
+  __ Beqc(mips64::A1, mips64::A2, &target);
+  // Push the target out of range of BEQC.
+  for (size_t i = 0; i != kNumNops; ++i) {
+    __ Nop();
+  }
+  __ Bind(&target);
+#undef __
+  Finish();
+  Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi);
+}
+#endif
+
+#endif  // ART_TARGET_ANDROID
 
 }  // namespace art
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 2c2c55f..05eb063 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -32,21 +32,21 @@
 // 0x00000012: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kArm64[] = {
-    0xE0, 0x0F, 0x1C, 0xF8, 0xF4, 0xD7, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9,
-    0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0xD7, 0x42, 0xA9,
-    0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+    0xE0, 0x0F, 0x1C, 0xF8, 0xF4, 0x17, 0x00, 0xF9, 0xF5, 0x7B, 0x03, 0xA9,
+    0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0x17, 0x40, 0xF9,
+    0xF5, 0x7B, 0x43, 0xA9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
 };
 static constexpr uint8_t expected_cfi_kArm64[] = {
-    0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44,
+    0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x44, 0x95, 0x04, 0x9E, 0x02, 0x44,
     0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49,
-    0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
+    0x44, 0xD4, 0x44, 0xD5, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
 };
 // 0x00000000: str x0, [sp, #-64]!
 // 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: stp x20, x21, [sp, #40]
+// 0x00000004: str x20, [sp, #40]
 // 0x00000008: .cfi_offset: r20 at cfa-24
-// 0x00000008: .cfi_offset: r21 at cfa-16
-// 0x00000008: str lr, [sp, #56]
+// 0x00000008: stp x21, lr, [sp, #48]
+// 0x0000000c: .cfi_offset: r21 at cfa-16
 // 0x0000000c: .cfi_offset: r30 at cfa-8
 // 0x0000000c: stp d8, d9, [sp, #24]
 // 0x00000010: .cfi_offset_extended: r72 at cfa-40
@@ -55,10 +55,10 @@
 // 0x00000010: ldp d8, d9, [sp, #24]
 // 0x00000014: .cfi_restore_extended: r72
 // 0x00000014: .cfi_restore_extended: r73
-// 0x00000014: ldp x20, x21, [sp, #40]
+// 0x00000014: ldr x20, [sp, #40]
 // 0x00000018: .cfi_restore: r20
-// 0x00000018: .cfi_restore: r21
-// 0x00000018: ldr lr, [sp, #56]
+// 0x00000018: ldp x21, lr, [sp, #48]
+// 0x0000001c: .cfi_restore: r21
 // 0x0000001c: .cfi_restore: r30
 // 0x0000001c: add sp, sp, #0x40 (64)
 // 0x00000020: .cfi_def_cfa_offset: 0
@@ -138,3 +138,321 @@
 // 0x0000002c: ret
 // 0x0000002d: .cfi_restore_state
 // 0x0000002d: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips[] = {
+    0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF,
+    0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7,
+    0x00, 0x00, 0xA4, 0xAF, 0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F,
+    0x34, 0x00, 0xB0, 0x8F, 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7,
+    0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips[] = {
+    0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
+    0x4C, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x4C, 0x0E, 0x00, 0x48,
+    0x0B, 0x0E, 0x40,
+};
+// 0x00000000: addiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sw r31, +60(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-4
+// 0x00000008: sw r17, +56(r29)
+// 0x0000000c: .cfi_offset: r17 at cfa-8
+// 0x0000000c: sw r16, +52(r29)
+// 0x00000010: .cfi_offset: r16 at cfa-12
+// 0x00000010: sdc1 f22, +40(r29)
+// 0x00000014: sdc1 f20, +32(r29)
+// 0x00000018: sw r4, +0(r29)
+// 0x0000001c: .cfi_remember_state
+// 0x0000001c: lw r31, +60(r29)
+// 0x00000020: .cfi_restore: r31
+// 0x00000020: lw r17, +56(r29)
+// 0x00000024: .cfi_restore: r17
+// 0x00000024: lw r16, +52(r29)
+// 0x00000028: .cfi_restore: r16
+// 0x00000028: ldc1 f22, +40(r29)
+// 0x0000002c: ldc1 f20, +32(r29)
+// 0x00000030: addiu r29, r29, 64
+// 0x00000034: .cfi_def_cfa_offset: 0
+// 0x00000034: jr r31
+// 0x00000038: nop
+// 0x0000003c: .cfi_restore_state
+// 0x0000003c: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips64[] = {
+    0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
+    0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
+    0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x18, 0x00, 0xBD, 0x67,
+    0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, 0x10, 0x00, 0xB0, 0xDF,
+    0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, 0x28, 0x00, 0xBD, 0x67,
+    0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips64[] = {
+    0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
+    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x44, 0x0A, 0x44,
+    0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0, 0x44, 0xD1, 0x44, 0xDF,
+    0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: daddiu r29, r29, -40
+// 0x00000004: .cfi_def_cfa_offset: 40
+// 0x00000004: sd r31, +32(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-8
+// 0x00000008: sd r17, +24(r29)
+// 0x0000000c: .cfi_offset: r17 at cfa-16
+// 0x0000000c: sd r16, +16(r29)
+// 0x00000010: .cfi_offset: r16 at cfa-24
+// 0x00000010: sdc1 f25, +8(r29)
+// 0x00000014: .cfi_offset: r57 at cfa-32
+// 0x00000014: sdc1 f24, +0(r29)
+// 0x00000018: .cfi_offset: r56 at cfa-40
+// 0x00000018: daddiu r29, r29, -24
+// 0x0000001c: .cfi_def_cfa_offset: 64
+// 0x0000001c: sd r4, +0(r29)
+// 0x00000020: .cfi_remember_state
+// 0x00000020: daddiu r29, r29, 24
+// 0x00000024: .cfi_def_cfa_offset: 40
+// 0x00000024: ldc1 f24, +0(r29)
+// 0x00000028: .cfi_restore: r56
+// 0x00000028: ldc1 f25, +8(r29)
+// 0x0000002c: .cfi_restore: r57
+// 0x0000002c: ld r16, +16(r29)
+// 0x00000030: .cfi_restore: r16
+// 0x00000030: ld r17, +24(r29)
+// 0x00000034: .cfi_restore: r17
+// 0x00000034: ld r31, +32(r29)
+// 0x00000038: .cfi_restore: r31
+// 0x00000038: daddiu r29, r29, 40
+// 0x0000003c: .cfi_def_cfa_offset: 0
+// 0x0000003c: jr r31
+// 0x00000040: nop
+// 0x00000044: .cfi_restore_state
+// 0x00000044: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kThumb2_adjust[] = {
+    0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x00, 0x28,
+    0x40, 0xD0, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x0B, 0xB0, 0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD,
+};
+static constexpr uint8_t expected_cfi_kThumb2_adjust[] = {
+    0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
+    0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x88, 0x0A,
+    0x42, 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B,
+    0x0E, 0x40,
+};
+// 0x00000000: push {r5, r6, lr}
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: .cfi_offset: r6 at cfa-8
+// 0x00000002: .cfi_offset: r14 at cfa-4
+// 0x00000002: vpush.f32 {s16-s17}
+// 0x00000006: .cfi_def_cfa_offset: 20
+// 0x00000006: .cfi_offset_extended: r80 at cfa-20
+// 0x00000006: .cfi_offset_extended: r81 at cfa-16
+// 0x00000006: sub sp, sp, #44
+// 0x00000008: .cfi_def_cfa_offset: 64
+// 0x00000008: str r0, [sp, #0]
+// 0x0000000a: cmp r0, #0
+// 0x0000000c: beq +128 (0x00000090)
+// 0x0000000e: ldr r0, [r0, #0]
+// 0x00000010: ldr r0, [r0, #0]
+// 0x00000012: ldr r0, [r0, #0]
+// 0x00000014: ldr r0, [r0, #0]
+// 0x00000016: ldr r0, [r0, #0]
+// 0x00000018: ldr r0, [r0, #0]
+// 0x0000001a: ldr r0, [r0, #0]
+// 0x0000001c: ldr r0, [r0, #0]
+// 0x0000001e: ldr r0, [r0, #0]
+// 0x00000020: ldr r0, [r0, #0]
+// 0x00000022: ldr r0, [r0, #0]
+// 0x00000024: ldr r0, [r0, #0]
+// 0x00000026: ldr r0, [r0, #0]
+// 0x00000028: ldr r0, [r0, #0]
+// 0x0000002a: ldr r0, [r0, #0]
+// 0x0000002c: ldr r0, [r0, #0]
+// 0x0000002e: ldr r0, [r0, #0]
+// 0x00000030: ldr r0, [r0, #0]
+// 0x00000032: ldr r0, [r0, #0]
+// 0x00000034: ldr r0, [r0, #0]
+// 0x00000036: ldr r0, [r0, #0]
+// 0x00000038: ldr r0, [r0, #0]
+// 0x0000003a: ldr r0, [r0, #0]
+// 0x0000003c: ldr r0, [r0, #0]
+// 0x0000003e: ldr r0, [r0, #0]
+// 0x00000040: ldr r0, [r0, #0]
+// 0x00000042: ldr r0, [r0, #0]
+// 0x00000044: ldr r0, [r0, #0]
+// 0x00000046: ldr r0, [r0, #0]
+// 0x00000048: ldr r0, [r0, #0]
+// 0x0000004a: ldr r0, [r0, #0]
+// 0x0000004c: ldr r0, [r0, #0]
+// 0x0000004e: ldr r0, [r0, #0]
+// 0x00000050: ldr r0, [r0, #0]
+// 0x00000052: ldr r0, [r0, #0]
+// 0x00000054: ldr r0, [r0, #0]
+// 0x00000056: ldr r0, [r0, #0]
+// 0x00000058: ldr r0, [r0, #0]
+// 0x0000005a: ldr r0, [r0, #0]
+// 0x0000005c: ldr r0, [r0, #0]
+// 0x0000005e: ldr r0, [r0, #0]
+// 0x00000060: ldr r0, [r0, #0]
+// 0x00000062: ldr r0, [r0, #0]
+// 0x00000064: ldr r0, [r0, #0]
+// 0x00000066: ldr r0, [r0, #0]
+// 0x00000068: ldr r0, [r0, #0]
+// 0x0000006a: ldr r0, [r0, #0]
+// 0x0000006c: ldr r0, [r0, #0]
+// 0x0000006e: ldr r0, [r0, #0]
+// 0x00000070: ldr r0, [r0, #0]
+// 0x00000072: ldr r0, [r0, #0]
+// 0x00000074: ldr r0, [r0, #0]
+// 0x00000076: ldr r0, [r0, #0]
+// 0x00000078: ldr r0, [r0, #0]
+// 0x0000007a: ldr r0, [r0, #0]
+// 0x0000007c: ldr r0, [r0, #0]
+// 0x0000007e: ldr r0, [r0, #0]
+// 0x00000080: ldr r0, [r0, #0]
+// 0x00000082: ldr r0, [r0, #0]
+// 0x00000084: ldr r0, [r0, #0]
+// 0x00000086: ldr r0, [r0, #0]
+// 0x00000088: ldr r0, [r0, #0]
+// 0x0000008a: ldr r0, [r0, #0]
+// 0x0000008c: ldr r0, [r0, #0]
+// 0x0000008e: ldr r0, [r0, #0]
+// 0x00000090: .cfi_remember_state
+// 0x00000090: add sp, sp, #44
+// 0x00000092: .cfi_def_cfa_offset: 20
+// 0x00000092: vpop.f32 {s16-s17}
+// 0x00000096: .cfi_def_cfa_offset: 12
+// 0x00000096: .cfi_restore_extended: r80
+// 0x00000096: .cfi_restore_extended: r81
+// 0x00000096: pop {r5, r6, pc}
+// 0x00000098: .cfi_restore_state
+// 0x00000098: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips_adjust_head[] = {
+    0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF,
+    0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7,
+    0x00, 0x00, 0xA4, 0xAF, 0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27,
+    0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C,
+    0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F,
+    0x09, 0x00, 0x20, 0x00, 0x04, 0x00, 0xBD, 0x27,
+};
+static constexpr uint8_t expected_asm_kMips_adjust_tail[] = {
+    0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F, 0x34, 0x00, 0xB0, 0x8F,
+    0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7, 0x40, 0x00, 0xBD, 0x27,
+    0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips_adjust[] = {
+    0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
+    0x54, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A,
+    0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x4C, 0x0E, 0x00, 0x48, 0x0B, 0x0E,
+    0x40,
+};
+// 0x00000000: addiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sw r31, +60(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-4
+// 0x00000008: sw r17, +56(r29)
+// 0x0000000c: .cfi_offset: r17 at cfa-8
+// 0x0000000c: sw r16, +52(r29)
+// 0x00000010: .cfi_offset: r16 at cfa-12
+// 0x00000010: sdc1 f22, +40(r29)
+// 0x00000014: sdc1 f20, +32(r29)
+// 0x00000018: sw r4, +0(r29)
+// 0x0000001c: bne r0, r4, 0x00000040 ; +36
+// 0x00000020: addiu r29, r29, -4
+// 0x00000024: .cfi_def_cfa_offset: 68
+// 0x00000024: sw r31, +0(r29)
+// 0x00000028: bltzal r0, 0x0000002c ; +4
+// 0x0000002c: lui r1, 0x20000
+// 0x00000030: ori r1, r1, 24
+// 0x00000034: addu r1, r1, r31
+// 0x00000038: lw r31, +0(r29)
+// 0x0000003c: jr r1
+// 0x00000040: addiu r29, r29, 4
+// 0x00000044: .cfi_def_cfa_offset: 64
+// 0x00000044: nop
+//             ...
+// 0x00020044: nop
+// 0x00020048: .cfi_remember_state
+// 0x00020048: lw r31, +60(r29)
+// 0x0002004c: .cfi_restore: r31
+// 0x0002004c: lw r17, +56(r29)
+// 0x00020050: .cfi_restore: r17
+// 0x00020050: lw r16, +52(r29)
+// 0x00020054: .cfi_restore: r16
+// 0x00020054: ldc1 f22, +40(r29)
+// 0x00020058: ldc1 f20, +32(r29)
+// 0x0002005c: addiu r29, r29, 64
+// 0x00020060: .cfi_def_cfa_offset: 0
+// 0x00020060: jr r31
+// 0x00020064: nop
+// 0x00020068: .cfi_restore_state
+// 0x00020068: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips64_adjust_head[] = {
+    0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
+    0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
+    0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x02, 0x00, 0xA6, 0x60,
+    0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8,
+};
+static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = {
+    0x18, 0x00, 0xBD, 0x67, 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7,
+    0x10, 0x00, 0xB0, 0xDF, 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF,
+    0x28, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips64_adjust[] = {
+    0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
+    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x04, 0x14, 0x00,
+    0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0,
+    0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: daddiu r29, r29, -40
+// 0x00000004: .cfi_def_cfa_offset: 40
+// 0x00000004: sd r31, +32(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-8
+// 0x00000008: sd r17, +24(r29)
+// 0x0000000c: .cfi_offset: r17 at cfa-16
+// 0x0000000c: sd r16, +16(r29)
+// 0x00000010: .cfi_offset: r16 at cfa-24
+// 0x00000010: sdc1 f25, +8(r29)
+// 0x00000014: .cfi_offset: r57 at cfa-32
+// 0x00000014: sdc1 f24, +0(r29)
+// 0x00000018: .cfi_offset: r56 at cfa-40
+// 0x00000018: daddiu r29, r29, -24
+// 0x0000001c: .cfi_def_cfa_offset: 64
+// 0x0000001c: sd r4, +0(r29)
+// 0x00000020: bnec r5, r6, 0x0000002c ; +12
+// 0x00000024: auipc r1, 2
+// 0x00000028: jic r1, 12 ; b 0x00020030 ; +131080
+// 0x0000002c: nop
+//             ...
+// 0x0002002c: nop
+// 0x00020030: .cfi_remember_state
+// 0x00020030: daddiu r29, r29, 24
+// 0x00020034: .cfi_def_cfa_offset: 40
+// 0x00020034: ldc1 f24, +0(r29)
+// 0x00020038: .cfi_restore: r56
+// 0x00020038: ldc1 f25, +8(r29)
+// 0x0002003c: .cfi_restore: r57
+// 0x0002003c: ld r16, +16(r29)
+// 0x00020040: .cfi_restore: r16
+// 0x00020040: ld r17, +24(r29)
+// 0x00020044: .cfi_restore: r17
+// 0x00020044: ld r31, +32(r29)
+// 0x00020048: .cfi_restore: r31
+// 0x00020048: daddiu r29, r29, 40
+// 0x0002004c: .cfi_def_cfa_offset: 0
+// 0x0002004c: jr r31
+// 0x00020050: nop
+// 0x00020054: .cfi_restore_state
+// 0x00020054: .cfi_def_cfa_offset: 64
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d6f2543..cc9cbda 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -17,14 +17,28 @@
 #include "optimizing_compiler.h"
 
 #include <fstream>
+#include <memory>
 #include <stdint.h>
 
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "dex_cache_array_fixups_arm.h"
+#endif
+
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "instruction_simplifier_arm64.h"
 #endif
 
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "dex_cache_array_fixups_mips.h"
+#include "pc_relative_fixups_mips.h"
+#endif
+
 #ifdef ART_ENABLE_CODEGEN_x86
-#include "constant_area_fixups_x86.h"
+#include "pc_relative_fixups_x86.h"
+#endif
+
+#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
+#include "x86_memory_gen.h"
 #endif
 
 #include "art_method-inl.h"
@@ -33,7 +47,6 @@
 #include "base/dumpable.h"
 #include "base/macros.h"
 #include "base/timing_logger.h"
-#include "boolean_simplifier.h"
 #include "bounds_check_elimination.h"
 #include "builder.h"
 #include "code_generator.h"
@@ -41,10 +54,11 @@
 #include "compiler.h"
 #include "constant_folding.h"
 #include "dead_code_elimination.h"
+#include "debug/elf_debug_writer.h"
+#include "debug/method_debug_info.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "dex/verified_method.h"
 #include "dex/verification_results.h"
-#include "driver/compiler_driver.h"
+#include "dex/verified_method.h"
 #include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
 #include "driver/dex_compilation_unit.h"
@@ -55,23 +69,34 @@
 #include "induction_var_analysis.h"
 #include "inliner.h"
 #include "instruction_simplifier.h"
+#include "instruction_simplifier_arm.h"
 #include "intrinsics.h"
-#include "licm.h"
+#include "jit/debugger_interface.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "jni/quick/jni_compiler.h"
+#include "licm.h"
 #include "load_store_elimination.h"
 #include "nodes.h"
+#include "oat_quick_method_header.h"
 #include "prepare_for_register_allocation.h"
 #include "reference_type_propagation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
+#include "select_generator.h"
+#include "sharpening.h"
 #include "side_effects_analysis.h"
 #include "ssa_builder.h"
-#include "ssa_phi_elimination.h"
 #include "ssa_liveness_analysis.h"
+#include "ssa_phi_elimination.h"
 #include "utils/assembler.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
 
+static constexpr size_t kArenaAllocatorMemoryReportThreshold = 8 * MB;
+
+static constexpr const char* kPassNameSeparator = "$";
+
 /**
  * Used by the code generator, to allocate the code in a vector.
  */
@@ -108,24 +133,23 @@
 class PassObserver : public ValueObject {
  public:
   PassObserver(HGraph* graph,
-               const char* method_name,
                CodeGenerator* codegen,
                std::ostream* visualizer_output,
                CompilerDriver* compiler_driver)
       : graph_(graph),
-        method_name_(method_name),
+        cached_method_name_(),
         timing_logger_enabled_(compiler_driver->GetDumpPasses()),
-        timing_logger_(method_name, true, true),
+        timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true),
         disasm_info_(graph->GetArena()),
-        visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()),
+        visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()),
         visualizer_(visualizer_output, graph, *codegen),
         graph_in_bad_state_(false) {
     if (timing_logger_enabled_ || visualizer_enabled_) {
-      if (!IsVerboseMethod(compiler_driver, method_name)) {
+      if (!IsVerboseMethod(compiler_driver, GetMethodName())) {
         timing_logger_enabled_ = visualizer_enabled_ = false;
       }
       if (visualizer_enabled_) {
-        visualizer_.PrintHeader(method_name_);
+        visualizer_.PrintHeader(GetMethodName());
         codegen->SetDisassemblyInformation(&disasm_info_);
       }
     }
@@ -133,7 +157,7 @@
 
   ~PassObserver() {
     if (timing_logger_enabled_) {
-      LOG(INFO) << "TIMINGS " << method_name_;
+      LOG(INFO) << "TIMINGS " << GetMethodName();
       LOG(INFO) << Dumpable<TimingLogger>(timing_logger_);
     }
   }
@@ -146,8 +170,17 @@
 
   void SetGraphInBadState() { graph_in_bad_state_ = true; }
 
+  const char* GetMethodName() {
+    // PrettyMethod() is expensive, so we delay calling it until we actually have to.
+    if (cached_method_name_.empty()) {
+      cached_method_name_ = PrettyMethod(graph_->GetMethodIdx(), graph_->GetDexFile());
+    }
+    return cached_method_name_.c_str();
+  }
+
  private:
   void StartPass(const char* pass_name) {
+    VLOG(compiler) << "Starting pass: " << pass_name;
     // Dump graph first, then start timer.
     if (visualizer_enabled_) {
       visualizer_.DumpGraph(pass_name, /* is_after_pass */ false, graph_in_bad_state_);
@@ -169,18 +202,10 @@
     // Validate the HGraph if running in debug mode.
     if (kIsDebugBuild) {
       if (!graph_in_bad_state_) {
-        if (graph_->IsInSsaForm()) {
-          SSAChecker checker(graph_);
-          checker.Run();
-          if (!checker.IsValid()) {
-            LOG(FATAL) << "Error after " << pass_name << ": " << Dumpable<SSAChecker>(checker);
-          }
-        } else {
-          GraphChecker checker(graph_);
-          checker.Run();
-          if (!checker.IsValid()) {
-            LOG(FATAL) << "Error after " << pass_name << ": " << Dumpable<GraphChecker>(checker);
-          }
+        GraphChecker checker(graph_);
+        checker.Run();
+        if (!checker.IsValid()) {
+          LOG(FATAL) << "Error after " << pass_name << ": " << Dumpable<GraphChecker>(checker);
         }
       }
     }
@@ -204,7 +229,8 @@
   }
 
   HGraph* const graph_;
-  const char* method_name_;
+
+  std::string cached_method_name_;
 
   bool timing_logger_enabled_;
   TimingLogger timing_logger_;
@@ -243,10 +269,9 @@
 class OptimizingCompiler FINAL : public Compiler {
  public:
   explicit OptimizingCompiler(CompilerDriver* driver);
-  ~OptimizingCompiler();
+  ~OptimizingCompiler() OVERRIDE;
 
-  bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file, CompilationUnit* cu) const
-      OVERRIDE;
+  bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const OVERRIDE;
 
   CompiledMethod* Compile(const DexFile::CodeItem* code_item,
                           uint32_t access_flags,
@@ -257,19 +282,15 @@
                           const DexFile& dex_file,
                           Handle<mirror::DexCache> dex_cache) const OVERRIDE;
 
-  CompiledMethod* TryCompile(const DexFile::CodeItem* code_item,
-                             uint32_t access_flags,
-                             InvokeType invoke_type,
-                             uint16_t class_def_idx,
-                             uint32_t method_idx,
-                             jobject class_loader,
-                             const DexFile& dex_file,
-                             Handle<mirror::DexCache> dex_cache) const;
-
   CompiledMethod* JniCompile(uint32_t access_flags,
                              uint32_t method_idx,
-                             const DexFile& dex_file) const OVERRIDE {
-    return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file);
+                             const DexFile& dex_file,
+                             JniOptimizationFlags optimization_flags) const OVERRIDE {
+    return ArtQuickJniCompileMethod(GetCompilerDriver(),
+                                    access_flags,
+                                    method_idx,
+                                    dex_file,
+                                    optimization_flags);
   }
 
   uintptr_t GetEntryPointOf(ArtMethod* method) const OVERRIDE
@@ -278,8 +299,6 @@
         InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
   }
 
-  void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
-
   void Init() OVERRIDE;
 
   void UnInit() const OVERRIDE;
@@ -290,23 +309,61 @@
     }
   }
 
+  bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, bool osr)
+      OVERRIDE
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
-  // Whether we should run any optimization or register allocation. If false, will
-  // just run the code generation after the graph was built.
-  const bool run_optimizations_;
+  void RunOptimizations(HGraph* graph,
+                        CodeGenerator* codegen,
+                        CompilerDriver* driver,
+                        const DexCompilationUnit& dex_compilation_unit,
+                        PassObserver* pass_observer,
+                        StackHandleScopeCollection* handles) const;
 
-  // Optimize and compile `graph`.
-  CompiledMethod* CompileOptimized(HGraph* graph,
-                                   CodeGenerator* codegen,
-                                   CompilerDriver* driver,
-                                   const DexCompilationUnit& dex_compilation_unit,
-                                   PassObserver* pass_observer) const;
+  void RunOptimizations(HOptimization* optimizations[],
+                        size_t length,
+                        PassObserver* pass_observer) const;
 
-  // Just compile without doing optimizations.
-  CompiledMethod* CompileBaseline(CodeGenerator* codegen,
-                                  CompilerDriver* driver,
-                                  const DexCompilationUnit& dex_compilation_unit,
-                                  PassObserver* pass_observer) const;
+ private:
+  // Create a 'CompiledMethod' for an optimized graph.
+  CompiledMethod* Emit(ArenaAllocator* arena,
+                       CodeVectorAllocator* code_allocator,
+                       CodeGenerator* codegen,
+                       CompilerDriver* driver,
+                       const DexFile::CodeItem* item) const;
+
+  // Try compiling a method and return the code generator used for
+  // compiling it.
+  // This method:
+  // 1) Builds the graph. Returns null if it failed to build it.
+  // 2) Transforms the graph to SSA. Returns null if it failed.
+  // 3) Runs optimizations on the graph, including register allocator.
+  // 4) Generates code with the `code_allocator` provided.
+  CodeGenerator* TryCompile(ArenaAllocator* arena,
+                            CodeVectorAllocator* code_allocator,
+                            const DexFile::CodeItem* code_item,
+                            uint32_t access_flags,
+                            InvokeType invoke_type,
+                            uint16_t class_def_idx,
+                            uint32_t method_idx,
+                            jobject class_loader,
+                            const DexFile& dex_file,
+                            Handle<mirror::DexCache> dex_cache,
+                            ArtMethod* method,
+                            bool osr) const;
+
+  void MaybeRunInliner(HGraph* graph,
+                       CodeGenerator* codegen,
+                       CompilerDriver* driver,
+                       const DexCompilationUnit& dex_compilation_unit,
+                       PassObserver* pass_observer,
+                       StackHandleScopeCollection* handles) const;
+
+  void RunArchOptimizations(InstructionSet instruction_set,
+                            HGraph* graph,
+                            CodeGenerator* codegen,
+                            PassObserver* pass_observer) const;
 
   std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
 
@@ -318,21 +375,19 @@
 static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */
 
 OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
-    : Compiler(driver, kMaximumCompilationTimeBeforeWarning),
-      run_optimizations_(
-          driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) {}
+    : Compiler(driver, kMaximumCompilationTimeBeforeWarning) {}
 
 void OptimizingCompiler::Init() {
   // Enable C1visualizer output. Must be done in Init() because the compiler
   // driver is not fully initialized when passed to the compiler's constructor.
   CompilerDriver* driver = GetCompilerDriver();
-  const std::string cfg_file_name = driver->GetDumpCfgFileName();
+  const std::string cfg_file_name = driver->GetCompilerOptions().GetDumpCfgFileName();
   if (!cfg_file_name.empty()) {
     CHECK_EQ(driver->GetThreadCount(), 1U)
       << "Graph visualizer requires the compiler to run single-threaded. "
       << "Invoke the compiler with '-j1'.";
     std::ios_base::openmode cfg_file_mode =
-        driver->GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
+        driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
     visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode));
   }
   if (driver->GetDumpStats()) {
@@ -349,12 +404,8 @@
   }
 }
 
-void OptimizingCompiler::InitCompilationUnit(CompilationUnit& cu ATTRIBUTE_UNUSED) const {
-}
-
 bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx ATTRIBUTE_UNUSED,
-                                          const DexFile& dex_file ATTRIBUTE_UNUSED,
-                                          CompilationUnit* cu ATTRIBUTE_UNUSED) const {
+                                          const DexFile& dex_file ATTRIBUTE_UNUSED) const {
   return true;
 }
 
@@ -368,57 +419,212 @@
       || instruction_set == kX86_64;
 }
 
-static void RunOptimizations(HOptimization* optimizations[],
-                             size_t length,
-                             PassObserver* pass_observer) {
+// Read barrier are supported on ARM, ARM64, x86 and x86-64 at the moment.
+// TODO: Add support for other architectures and remove this function
+static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) {
+  return instruction_set == kArm64
+      || instruction_set == kThumb2
+      || instruction_set == kX86
+      || instruction_set == kX86_64;
+}
+
+// Strip pass name suffix to get optimization name.
+static std::string ConvertPassNameToOptimizationName(const std::string& pass_name) {
+  size_t pos = pass_name.find(kPassNameSeparator);
+  return pos == std::string::npos ? pass_name : pass_name.substr(0, pos);
+}
+
+static HOptimization* BuildOptimization(
+    const std::string& pass_name,
+    ArenaAllocator* arena,
+    HGraph* graph,
+    OptimizingCompilerStats* stats,
+    CodeGenerator* codegen,
+    CompilerDriver* driver,
+    const DexCompilationUnit& dex_compilation_unit,
+    StackHandleScopeCollection* handles,
+    SideEffectsAnalysis* most_recent_side_effects,
+    HInductionVarAnalysis* most_recent_induction) {
+  std::string opt_name = ConvertPassNameToOptimizationName(pass_name);
+  if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) {
+    CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr);
+    return new (arena) BoundsCheckElimination(graph,
+                                              *most_recent_side_effects,
+                                              most_recent_induction);
+  } else if (opt_name == GVNOptimization::kGlobalValueNumberingPassName) {
+    CHECK(most_recent_side_effects != nullptr);
+    return new (arena) GVNOptimization(graph, *most_recent_side_effects, pass_name.c_str());
+  } else if (opt_name == HConstantFolding::kConstantFoldingPassName) {
+    return new (arena) HConstantFolding(graph, pass_name.c_str());
+  } else if (opt_name == HDeadCodeElimination::kDeadCodeEliminationPassName) {
+    return new (arena) HDeadCodeElimination(graph, stats, pass_name.c_str());
+  } else if (opt_name == HInliner::kInlinerPassName) {
+    size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_;
+    return new (arena) HInliner(graph,                   // outer_graph
+                                graph,                   // outermost_graph
+                                codegen,
+                                dex_compilation_unit,    // outer_compilation_unit
+                                dex_compilation_unit,    // outermost_compilation_unit
+                                driver,
+                                handles,
+                                stats,
+                                number_of_dex_registers,
+                                /* depth */ 0);
+  } else if (opt_name == HSharpening::kSharpeningPassName) {
+    return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
+  } else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) {
+    return new (arena) HSelectGenerator(graph, stats);
+  } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
+    return new (arena) HInductionVarAnalysis(graph);
+  } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
+    return new (arena) InstructionSimplifier(graph, stats, pass_name.c_str());
+  } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) {
+    return new (arena) IntrinsicsRecognizer(graph, driver, stats);
+  } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) {
+    CHECK(most_recent_side_effects != nullptr);
+    return new (arena) LICM(graph, *most_recent_side_effects, stats);
+  } else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) {
+    CHECK(most_recent_side_effects != nullptr);
+    return new (arena) LoadStoreElimination(graph, *most_recent_side_effects);
+  } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
+    return new (arena) SideEffectsAnalysis(graph);
+#ifdef ART_ENABLE_CODEGEN_arm
+  } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) {
+    return new (arena) arm::DexCacheArrayFixups(graph, stats);
+  } else if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) {
+    return new (arena) arm::InstructionSimplifierArm(graph, stats);
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+  } else if (opt_name == arm64::InstructionSimplifierArm64::kInstructionSimplifierArm64PassName) {
+    return new (arena) arm64::InstructionSimplifierArm64(graph, stats);
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+  } else if (opt_name == mips::DexCacheArrayFixups::kDexCacheArrayFixupsMipsPassName) {
+    return new (arena) mips::DexCacheArrayFixups(graph, codegen, stats);
+  } else if (opt_name == mips::PcRelativeFixups::kPcRelativeFixupsMipsPassName) {
+    return new (arena) mips::PcRelativeFixups(graph, codegen, stats);
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+  } else if (opt_name == x86::PcRelativeFixups::kPcRelativeFixupsX86PassName) {
+    return new (arena) x86::PcRelativeFixups(graph, codegen, stats);
+  } else if (opt_name == x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName) {
+    return new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats);
+#endif
+  }
+  return nullptr;
+}
+
+static ArenaVector<HOptimization*> BuildOptimizations(
+    const std::vector<std::string>& pass_names,
+    ArenaAllocator* arena,
+    HGraph* graph,
+    OptimizingCompilerStats* stats,
+    CodeGenerator* codegen,
+    CompilerDriver* driver,
+    const DexCompilationUnit& dex_compilation_unit,
+    StackHandleScopeCollection* handles) {
+  // Few HOptimizations constructors require SideEffectsAnalysis or HInductionVarAnalysis
+  // instances. This method assumes that each of them expects the nearest instance preceeding it
+  // in the pass name list.
+  SideEffectsAnalysis* most_recent_side_effects = nullptr;
+  HInductionVarAnalysis* most_recent_induction = nullptr;
+  ArenaVector<HOptimization*> ret(arena->Adapter());
+  for (const std::string& pass_name : pass_names) {
+    HOptimization* opt = BuildOptimization(
+        pass_name,
+        arena,
+        graph,
+        stats,
+        codegen,
+        driver,
+        dex_compilation_unit,
+        handles,
+        most_recent_side_effects,
+        most_recent_induction);
+    CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\"";
+    ret.push_back(opt);
+
+    std::string opt_name = ConvertPassNameToOptimizationName(pass_name);
+    if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
+      most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt);
+    } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
+      most_recent_induction = down_cast<HInductionVarAnalysis*>(opt);
+    }
+  }
+  return ret;
+}
+
+void OptimizingCompiler::RunOptimizations(HOptimization* optimizations[],
+                                          size_t length,
+                                          PassObserver* pass_observer) const {
   for (size_t i = 0; i < length; ++i) {
     PassScope scope(optimizations[i]->GetPassName(), pass_observer);
     optimizations[i]->Run();
   }
 }
 
-static void MaybeRunInliner(HGraph* graph,
-                            CompilerDriver* driver,
-                            OptimizingCompilerStats* stats,
-                            const DexCompilationUnit& dex_compilation_unit,
-                            PassObserver* pass_observer,
-                            StackHandleScopeCollection* handles) {
+void OptimizingCompiler::MaybeRunInliner(HGraph* graph,
+                                         CodeGenerator* codegen,
+                                         CompilerDriver* driver,
+                                         const DexCompilationUnit& dex_compilation_unit,
+                                         PassObserver* pass_observer,
+                                         StackHandleScopeCollection* handles) const {
+  OptimizingCompilerStats* stats = compilation_stats_.get();
   const CompilerOptions& compiler_options = driver->GetCompilerOptions();
   bool should_inline = (compiler_options.GetInlineDepthLimit() > 0)
       && (compiler_options.GetInlineMaxCodeUnits() > 0);
   if (!should_inline) {
     return;
   }
-
-  ArenaAllocator* arena = graph->GetArena();
-  HInliner* inliner = new (arena) HInliner(
-    graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
-  ReferenceTypePropagation* type_propagation =
-    new (arena) ReferenceTypePropagation(graph, handles,
-        "reference_type_propagation_after_inlining");
-
-  HOptimization* optimizations[] = {
-    inliner,
-    // Run another type propagation phase: inlining will open up more opportunities
-    // to remove checkcast/instanceof and null checks.
-    type_propagation,
-  };
+  size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_;
+  HInliner* inliner = new (graph->GetArena()) HInliner(
+      graph,                   // outer_graph
+      graph,                   // outermost_graph
+      codegen,
+      dex_compilation_unit,    // outer_compilation_unit
+      dex_compilation_unit,    // outermost_compilation_unit
+      driver,
+      handles,
+      stats,
+      number_of_dex_registers,
+      /* depth */ 0);
+  HOptimization* optimizations[] = { inliner };
 
   RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
 }
 
-static void RunArchOptimizations(InstructionSet instruction_set,
-                                 HGraph* graph,
-                                 OptimizingCompilerStats* stats,
-                                 PassObserver* pass_observer) {
+void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
+                                              HGraph* graph,
+                                              CodeGenerator* codegen,
+                                              PassObserver* pass_observer) const {
+  UNUSED(codegen);  // To avoid compilation error when compiling for svelte
+  OptimizingCompilerStats* stats = compilation_stats_.get();
   ArenaAllocator* arena = graph->GetArena();
   switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
+    case kThumb2:
+    case kArm: {
+      arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
+      arm::InstructionSimplifierArm* simplifier =
+          new (arena) arm::InstructionSimplifierArm(graph, stats);
+      SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
+      GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
+      HOptimization* arm_optimizations[] = {
+        simplifier,
+        side_effects,
+        gvn,
+        fixups
+      };
+      RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
+      break;
+    }
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64: {
       arm64::InstructionSimplifierArm64* simplifier =
           new (arena) arm64::InstructionSimplifierArm64(graph, stats);
       SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
-      GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN_after_arch");
+      GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
       HOptimization* arm64_optimizations[] = {
         simplifier,
         side_effects,
@@ -428,123 +634,60 @@
       break;
     }
 #endif
+#ifdef ART_ENABLE_CODEGEN_mips
+    case kMips: {
+      mips::PcRelativeFixups* pc_relative_fixups =
+          new (arena) mips::PcRelativeFixups(graph, codegen, stats);
+      mips::DexCacheArrayFixups* dex_cache_array_fixups =
+          new (arena) mips::DexCacheArrayFixups(graph, codegen, stats);
+      HOptimization* mips_optimizations[] = {
+          pc_relative_fixups,
+          dex_cache_array_fixups
+      };
+      RunOptimizations(mips_optimizations, arraysize(mips_optimizations), pass_observer);
+      break;
+    }
+#endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86: {
-      x86::ConstantAreaFixups* constant_area_fixups =
-          new (arena) x86::ConstantAreaFixups(graph, stats);
+      x86::PcRelativeFixups* pc_relative_fixups =
+          new (arena) x86::PcRelativeFixups(graph, codegen, stats);
+      x86::X86MemoryOperandGeneration* memory_gen =
+          new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats);
       HOptimization* x86_optimizations[] = {
-        constant_area_fixups
+          pc_relative_fixups,
+          memory_gen
       };
       RunOptimizations(x86_optimizations, arraysize(x86_optimizations), pass_observer);
       break;
     }
 #endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+    case kX86_64: {
+      x86::X86MemoryOperandGeneration* memory_gen =
+          new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats);
+      HOptimization* x86_64_optimizations[] = {
+          memory_gen
+      };
+      RunOptimizations(x86_64_optimizations, arraysize(x86_64_optimizations), pass_observer);
+      break;
+    }
+#endif
     default:
       break;
   }
 }
 
-static void RunOptimizations(HGraph* graph,
-                             CompilerDriver* driver,
-                             OptimizingCompilerStats* stats,
-                             const DexCompilationUnit& dex_compilation_unit,
-                             PassObserver* pass_observer,
-                             StackHandleScopeCollection* handles) {
-  ArenaAllocator* arena = graph->GetArena();
-  HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
-      graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
-  HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination(
-      graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
-  HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
-  InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
-  HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph);
-  HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
-  SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
-  GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
-  LICM* licm = new (arena) LICM(graph, *side_effects);
-  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
-  HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
-  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, induction);
-  ReferenceTypePropagation* type_propagation =
-      new (arena) ReferenceTypePropagation(graph, handles);
-  InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
-      graph, stats, "instruction_simplifier_after_types");
-  InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
-      graph, stats, "instruction_simplifier_after_bce");
-  InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
-      graph, stats, "instruction_simplifier_before_codegen");
-
-  IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver);
-
-  HOptimization* optimizations1[] = {
-    intrinsics,
-    fold1,
-    simplify1,
-    type_propagation,
-    dce1,
-    simplify2
-  };
-
-  RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
-
-  // TODO: Update passes incompatible with try/catch so we have the same
-  //       pipeline for all methods.
-  if (graph->HasTryCatch()) {
-    HOptimization* optimizations2[] = {
-      side_effects,
-      gvn,
-      dce2,
-      // The codegen has a few assumptions that only the instruction simplifier
-      // can satisfy. For example, the code generator does not expect to see a
-      // HTypeConversion from a type to the same type.
-      simplify4,
-    };
-
-    RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
-  } else {
-    MaybeRunInliner(graph, driver, stats, dex_compilation_unit, pass_observer, handles);
-
-    HOptimization* optimizations2[] = {
-      // BooleanSimplifier depends on the InstructionSimplifier removing
-      // redundant suspend checks to recognize empty blocks.
-      boolean_simplify,
-      fold2,  // TODO: if we don't inline we can also skip fold2.
-      side_effects,
-      gvn,
-      licm,
-      induction,
-      bce,
-      simplify3,
-      lse,
-      dce2,
-      // The codegen has a few assumptions that only the instruction simplifier
-      // can satisfy. For example, the code generator does not expect to see a
-      // HTypeConversion from a type to the same type.
-      simplify4,
-    };
-
-    RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
-  }
-
-  RunArchOptimizations(driver->GetInstructionSet(), graph, stats, pass_observer);
-}
-
-// The stack map we generate must be 4-byte aligned on ARM. Since existing
-// maps are generated alongside these stack maps, we must also align them.
-static ArrayRef<const uint8_t> AlignVectorSize(ArenaVector<uint8_t>& vector) {
-  size_t size = vector.size();
-  size_t aligned_size = RoundUp(size, 4);
-  for (; size < aligned_size; ++size) {
-    vector.push_back(0);
-  }
-  return ArrayRef<const uint8_t>(vector);
-}
-
 NO_INLINE  // Avoid increasing caller's frame size by large stack-allocated objects.
 static void AllocateRegisters(HGraph* graph,
                               CodeGenerator* codegen,
-                              PassObserver* pass_observer) {
-  PrepareForRegisterAllocation(graph).Run();
+                              PassObserver* pass_observer,
+                              RegisterAllocator::Strategy strategy) {
+  {
+    PassScope scope(PrepareForRegisterAllocation::kPrepareForRegisterAllocationPassName,
+                    pass_observer);
+    PrepareForRegisterAllocation(graph).Run();
+  }
   SsaLivenessAnalysis liveness(graph, codegen);
   {
     PassScope scope(SsaLivenessAnalysis::kLivenessPassName, pass_observer);
@@ -552,10 +695,90 @@
   }
   {
     PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer);
-    RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
+    RegisterAllocator::Create(graph->GetArena(), codegen, liveness, strategy)->AllocateRegisters();
   }
 }
 
+void OptimizingCompiler::RunOptimizations(HGraph* graph,
+                                          CodeGenerator* codegen,
+                                          CompilerDriver* driver,
+                                          const DexCompilationUnit& dex_compilation_unit,
+                                          PassObserver* pass_observer,
+                                          StackHandleScopeCollection* handles) const {
+  OptimizingCompilerStats* stats = compilation_stats_.get();
+  ArenaAllocator* arena = graph->GetArena();
+  if (driver->GetCompilerOptions().GetPassesToRun() != nullptr) {
+    ArenaVector<HOptimization*> optimizations = BuildOptimizations(
+        *driver->GetCompilerOptions().GetPassesToRun(),
+        arena,
+        graph,
+        stats,
+        codegen,
+        driver,
+        dex_compilation_unit,
+        handles);
+    RunOptimizations(&optimizations[0], optimizations.size(), pass_observer);
+    return;
+  }
+
+  HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
+      graph, stats, "dead_code_elimination$initial");
+  HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination(
+      graph, stats, "dead_code_elimination$final");
+  HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
+  InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
+  HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
+  HConstantFolding* fold2 = new (arena) HConstantFolding(
+      graph, "constant_folding$after_inlining");
+  HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding$after_bce");
+  SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
+  GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
+  LICM* licm = new (arena) LICM(graph, *side_effects, stats);
+  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
+  HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
+  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
+  HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
+  InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
+      graph, stats, "instruction_simplifier$after_bce");
+  InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
+      graph, stats, "instruction_simplifier$before_codegen");
+  IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver, stats);
+
+  HOptimization* optimizations1[] = {
+    intrinsics,
+    sharpening,
+    fold1,
+    simplify1,
+    dce1,
+  };
+  RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
+
+  MaybeRunInliner(graph, codegen, driver, dex_compilation_unit, pass_observer, handles);
+
+  HOptimization* optimizations2[] = {
+    // SelectGenerator depends on the InstructionSimplifier removing
+    // redundant suspend checks to recognize empty blocks.
+    select_generator,
+    fold2,  // TODO: if we don't inline we can also skip fold2.
+    side_effects,
+    gvn,
+    licm,
+    induction,
+    bce,
+    fold3,  // evaluates code generated by dynamic bce
+    simplify2,
+    lse,
+    dce2,
+    // The codegen has a few assumptions that only the instruction simplifier
+    // can satisfy. For example, the code generator does not expect to see a
+    // HTypeConversion from a type to the same type.
+    simplify3,
+  };
+  RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
+
+  RunArchOptimizations(driver->GetInstructionSet(), graph, codegen, pass_observer);
+}
+
 static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
   ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter());
   codegen->EmitLinkerPatches(&linker_patches);
@@ -569,114 +792,52 @@
   return linker_patches;
 }
 
-CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
-                                                     CodeGenerator* codegen,
-                                                     CompilerDriver* compiler_driver,
-                                                     const DexCompilationUnit& dex_compilation_unit,
-                                                     PassObserver* pass_observer) const {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScopeCollection handles(soa.Self());
-  soa.Self()->TransitionFromRunnableToSuspended(kNative);
-  RunOptimizations(graph, compiler_driver, compilation_stats_.get(),
-                   dex_compilation_unit, pass_observer, &handles);
-
-  AllocateRegisters(graph, codegen, pass_observer);
-
-  ArenaAllocator* arena = graph->GetArena();
-  CodeVectorAllocator allocator(arena);
-  DefaultSrcMap src_mapping_table;
-  codegen->SetSrcMap(compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()
-                         ? &src_mapping_table
-                         : nullptr);
-  codegen->CompileOptimized(&allocator);
-
+CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena,
+                                         CodeVectorAllocator* code_allocator,
+                                         CodeGenerator* codegen,
+                                         CompilerDriver* compiler_driver,
+                                         const DexFile::CodeItem* code_item) const {
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
-
   ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
-  codegen->BuildStackMaps(&stack_map);
-
-  MaybeRecordStat(MethodCompilationStat::kCompiledOptimized);
+  stack_map.resize(codegen->ComputeStackMapsSize());
+  codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()), *code_item);
 
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
-      ArrayRef<const uint8_t>(allocator.GetMemory()),
+      ArrayRef<const uint8_t>(code_allocator->GetMemory()),
       // Follow Quick's behavior and set the frame size to zero if it is
       // considered "empty" (see the definition of
       // art::CodeGenerator::HasEmptyFrame).
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
-      &src_mapping_table,
-      ArrayRef<const uint8_t>(),  // mapping_table.
+      ArrayRef<const SrcMapElem>(),
       ArrayRef<const uint8_t>(stack_map),
-      ArrayRef<const uint8_t>(),  // native_gc_map.
       ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
       ArrayRef<const LinkerPatch>(linker_patches));
-  pass_observer->DumpDisassembly();
 
-  soa.Self()->TransitionFromSuspendedToRunnable();
   return compiled_method;
 }
 
-CompiledMethod* OptimizingCompiler::CompileBaseline(
-    CodeGenerator* codegen,
-    CompilerDriver* compiler_driver,
-    const DexCompilationUnit& dex_compilation_unit,
-    PassObserver* pass_observer) const {
-  ArenaAllocator* arena = codegen->GetGraph()->GetArena();
-  CodeVectorAllocator allocator(arena);
-  DefaultSrcMap src_mapping_table;
-  codegen->SetSrcMap(compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()
-                         ? &src_mapping_table
-                         : nullptr);
-  codegen->CompileBaseline(&allocator);
-
-  ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
-
-  ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildMappingTable(&mapping_table);
-  ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildVMapTable(&vmap_table);
-  ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
-
-  MaybeRecordStat(MethodCompilationStat::kCompiledBaseline);
-  CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
-      compiler_driver,
-      codegen->GetInstructionSet(),
-      ArrayRef<const uint8_t>(allocator.GetMemory()),
-      // Follow Quick's behavior and set the frame size to zero if it is
-      // considered "empty" (see the definition of
-      // art::CodeGenerator::HasEmptyFrame).
-      codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
-      codegen->GetCoreSpillMask(),
-      codegen->GetFpuSpillMask(),
-      &src_mapping_table,
-      AlignVectorSize(mapping_table),
-      AlignVectorSize(vmap_table),
-      AlignVectorSize(gc_map),
-      ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
-      ArrayRef<const LinkerPatch>(linker_patches));
-  pass_observer->DumpDisassembly();
-  return compiled_method;
-}
-
-CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item,
-                                               uint32_t access_flags,
-                                               InvokeType invoke_type,
-                                               uint16_t class_def_idx,
-                                               uint32_t method_idx,
-                                               jobject class_loader,
-                                               const DexFile& dex_file,
-                                               Handle<mirror::DexCache> dex_cache) const {
-  std::string method_name = PrettyMethod(method_idx, dex_file);
+CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
+                                              CodeVectorAllocator* code_allocator,
+                                              const DexFile::CodeItem* code_item,
+                                              uint32_t access_flags,
+                                              InvokeType invoke_type,
+                                              uint16_t class_def_idx,
+                                              uint32_t method_idx,
+                                              jobject class_loader,
+                                              const DexFile& dex_file,
+                                              Handle<mirror::DexCache> dex_cache,
+                                              ArtMethod* method,
+                                              bool osr) const {
   MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
 
-  // Always use the thumb2 assembler: some runtime functionality (like implicit stack
-  // overflow checks) assume thumb2.
+  // Always use the Thumb-2 assembler: some runtime functionality
+  // (like implicit stack overflow checks) assume Thumb-2.
   if (instruction_set == kArm) {
     instruction_set = kThumb2;
   }
@@ -687,6 +848,12 @@
     return nullptr;
   }
 
+  // When read barriers are enabled, do not attempt to compile for
+  // instruction sets that have no read barrier support.
+  if (kEmitCompilerReadBarrier && !InstructionSetSupportsReadBarrier(instruction_set)) {
+    return nullptr;
+  }
+
   if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) {
     MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological);
     return nullptr;
@@ -696,140 +863,143 @@
   // code units is bigger than 128.
   static constexpr size_t kSpaceFilterOptimizingThreshold = 128;
   const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions();
-  if ((compiler_options.GetCompilerFilter() == CompilerOptions::kSpace)
+  if ((compiler_options.GetCompilerFilter() == CompilerFilter::kSpace)
       && (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) {
     MaybeRecordStat(MethodCompilationStat::kNotCompiledSpaceFilter);
     return nullptr;
   }
 
   DexCompilationUnit dex_compilation_unit(
-    nullptr, class_loader, Runtime::Current()->GetClassLinker(), dex_file, code_item,
-    class_def_idx, method_idx, access_flags,
-    compiler_driver->GetVerifiedMethod(&dex_file, method_idx), dex_cache);
+      class_loader,
+      Runtime::Current()->GetClassLinker(),
+      dex_file,
+      code_item,
+      class_def_idx,
+      method_idx,
+      access_flags,
+      /* verified_method */ nullptr,
+      dex_cache);
 
   bool requires_barrier = dex_compilation_unit.IsConstructor()
       && compiler_driver->RequiresConstructorBarrier(Thread::Current(),
                                                      dex_compilation_unit.GetDexFile(),
                                                      dex_compilation_unit.GetClassDefIndex());
-  ArenaAllocator arena(Runtime::Current()->GetArenaPool());
-  HGraph* graph = new (&arena) HGraph(
-      &arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(),
-      kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable());
 
-  bool shouldOptimize = method_name.find("$opt$reg$") != std::string::npos && run_optimizations_;
+  HGraph* graph = new (arena) HGraph(
+      arena,
+      dex_file,
+      method_idx,
+      requires_barrier,
+      compiler_driver->GetInstructionSet(),
+      kInvalidInvokeType,
+      compiler_driver->GetCompilerOptions().GetDebuggable(),
+      osr);
+
+  const uint8_t* interpreter_metadata = nullptr;
+  if (method == nullptr) {
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::ClassLoader> loader(hs.NewHandle(
+        soa.Decode<mirror::ClassLoader*>(class_loader)));
+    method = compiler_driver->ResolveMethod(
+        soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type);
+  }
+  // For AOT compilation, we may not get a method, for example if its class is erroneous.
+  // JIT should always have a method.
+  DCHECK(Runtime::Current()->IsAotCompiler() || method != nullptr);
+  if (method != nullptr) {
+    graph->SetArtMethod(method);
+    ScopedObjectAccess soa(Thread::Current());
+    interpreter_metadata = method->GetQuickenedInfo();
+    uint16_t type_index = method->GetDeclaringClass()->GetDexTypeIndex();
+
+    // Update the dex cache if the type is not in it yet. Note that under AOT,
+    // the verifier must have set it, but under JIT, there's no guarantee, as we
+    // don't necessarily run the verifier.
+    // The compiler and the compiler driver assume the compiling class is
+    // in the dex cache.
+    if (dex_cache->GetResolvedType(type_index) == nullptr) {
+      dex_cache->SetResolvedType(type_index, method->GetDeclaringClass());
+    }
+  }
 
   std::unique_ptr<CodeGenerator> codegen(
       CodeGenerator::Create(graph,
                             instruction_set,
                             *compiler_driver->GetInstructionSetFeatures(),
-                            compiler_driver->GetCompilerOptions()));
+                            compiler_driver->GetCompilerOptions(),
+                            compilation_stats_.get()));
   if (codegen.get() == nullptr) {
     MaybeRecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
     return nullptr;
   }
   codegen->GetAssembler()->cfi().SetEnabled(
-      compiler_driver->GetCompilerOptions().GetGenerateDebugInfo());
+      compiler_driver->GetCompilerOptions().GenerateAnyDebugInfo());
 
   PassObserver pass_observer(graph,
-                             method_name.c_str(),
                              codegen.get(),
                              visualizer_output_.get(),
                              compiler_driver);
 
-  const uint8_t* interpreter_metadata = nullptr;
+  VLOG(compiler) << "Building " << pass_observer.GetMethodName();
+
   {
     ScopedObjectAccess soa(Thread::Current());
-    StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::ClassLoader> loader(hs.NewHandle(
-        soa.Decode<mirror::ClassLoader*>(class_loader)));
-    ArtMethod* art_method = compiler_driver->ResolveMethod(
-        soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type);
-    // We may not get a method, for example if its class is erroneous.
-    // TODO: Clean this up, the compiler driver should just pass the ArtMethod to compile.
-    if (art_method != nullptr) {
-      interpreter_metadata = art_method->GetQuickenedInfo();
-    }
-  }
-  HGraphBuilder builder(graph,
-                        &dex_compilation_unit,
-                        &dex_compilation_unit,
-                        &dex_file,
-                        compiler_driver,
-                        compilation_stats_.get(),
-                        interpreter_metadata,
-                        dex_cache);
-
-  VLOG(compiler) << "Building " << method_name;
-
-  {
-    PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer);
-    if (!builder.BuildGraph(*code_item)) {
-      pass_observer.SetGraphInBadState();
-      return nullptr;
-    }
-  }
-
-  bool can_allocate_registers = RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set);
-
-  // `run_optimizations_` is set explicitly (either through a compiler filter
-  // or the debuggable flag). If it is set, we can run baseline. Otherwise, we fall back
-  // to Quick.
-  bool can_use_baseline = !run_optimizations_ && builder.CanUseBaselineForStringInit();
-  CompiledMethod* compiled_method = nullptr;
-  if (run_optimizations_ && can_allocate_registers) {
-    VLOG(compiler) << "Optimizing " << method_name;
+    StackHandleScopeCollection handles(soa.Self());
+    // Do not hold `mutator_lock_` between optimizations.
+    ScopedThreadSuspension sts(soa.Self(), kNative);
 
     {
-      PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
-      if (!graph->TryBuildingSsa()) {
-        // We could not transform the graph to SSA, bailout.
-        LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop";
-        MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA);
+      PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer);
+      HGraphBuilder builder(graph,
+                            &dex_compilation_unit,
+                            &dex_compilation_unit,
+                            &dex_file,
+                            *code_item,
+                            compiler_driver,
+                            compilation_stats_.get(),
+                            interpreter_metadata,
+                            dex_cache,
+                            &handles);
+      GraphAnalysisResult result = builder.BuildGraph();
+      if (result != kAnalysisSuccess) {
+        switch (result) {
+          case kAnalysisSkipped:
+            MaybeRecordStat(MethodCompilationStat::kNotCompiledSkipped);
+            break;
+          case kAnalysisInvalidBytecode:
+            MaybeRecordStat(MethodCompilationStat::kNotCompiledInvalidBytecode);
+            break;
+          case kAnalysisFailThrowCatchLoop:
+            MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
+            break;
+          case kAnalysisFailAmbiguousArrayOp:
+            MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+            break;
+          case kAnalysisSuccess:
+            UNREACHABLE();
+        }
         pass_observer.SetGraphInBadState();
         return nullptr;
       }
     }
 
-    compiled_method = CompileOptimized(graph,
-                                       codegen.get(),
-                                       compiler_driver,
-                                       dex_compilation_unit,
-                                       &pass_observer);
-  } else if (shouldOptimize && can_allocate_registers) {
-    LOG(FATAL) << "Could not allocate registers in optimizing compiler";
-    UNREACHABLE();
-  } else if (can_use_baseline) {
-    VLOG(compiler) << "Compile baseline " << method_name;
+    RunOptimizations(graph,
+                     codegen.get(),
+                     compiler_driver,
+                     dex_compilation_unit,
+                     &pass_observer,
+                     &handles);
 
-    if (!run_optimizations_) {
-      MaybeRecordStat(MethodCompilationStat::kNotOptimizedDisabled);
-    } else if (!can_allocate_registers) {
-      MaybeRecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator);
-    }
+    RegisterAllocator::Strategy regalloc_strategy =
+      compiler_options.GetRegisterAllocationStrategy();
+    AllocateRegisters(graph, codegen.get(), &pass_observer, regalloc_strategy);
 
-    compiled_method = CompileBaseline(codegen.get(),
-                                      compiler_driver,
-                                      dex_compilation_unit,
-                                      &pass_observer);
+    codegen->Compile(code_allocator);
+    pass_observer.DumpDisassembly();
   }
 
-  if (kArenaAllocatorCountAllocations) {
-    if (arena.BytesAllocated() > 4 * MB) {
-      MemStats mem_stats(arena.GetMemStats());
-      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats);
-    }
-  }
-
-  return compiled_method;
-}
-
-static bool CanHandleVerificationFailure(const VerifiedMethod* verified_method) {
-  // For access errors the compiler will use the unresolved helpers (e.g. HInvokeUnresolved).
-  uint32_t unresolved_mask = verifier::VerifyError::VERIFY_ERROR_NO_CLASS
-      | verifier::VerifyError::VERIFY_ERROR_ACCESS_CLASS
-      | verifier::VerifyError::VERIFY_ERROR_ACCESS_FIELD
-      | verifier::VerifyError::VERIFY_ERROR_ACCESS_METHOD;
-  return (verified_method->GetEncounteredVerificationFailures() & (~unresolved_mask)) == 0;
+  return codegen.release();
 }
 
 CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
@@ -842,33 +1012,56 @@
                                             Handle<mirror::DexCache> dex_cache) const {
   CompilerDriver* compiler_driver = GetCompilerDriver();
   CompiledMethod* method = nullptr;
-  if (Runtime::Current()->IsAotCompiler()) {
-    const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx);
-    DCHECK(!verified_method->HasRuntimeThrow());
-    if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file)
-        || CanHandleVerificationFailure(verified_method)) {
-       method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
-                           method_idx, jclass_loader, dex_file, dex_cache);
-    } else {
-      if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
-        MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
-      } else {
-        MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified);
+  DCHECK(Runtime::Current()->IsAotCompiler());
+  const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx);
+  DCHECK(!verified_method->HasRuntimeThrow());
+  if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file)
+      || verifier::MethodVerifier::CanCompilerHandleVerificationFailure(
+            verified_method->GetEncounteredVerificationFailures())) {
+    ArenaAllocator arena(Runtime::Current()->GetArenaPool());
+    CodeVectorAllocator code_allocator(&arena);
+    std::unique_ptr<CodeGenerator> codegen(
+        TryCompile(&arena,
+                   &code_allocator,
+                   code_item,
+                   access_flags,
+                   invoke_type,
+                   class_def_idx,
+                   method_idx,
+                   jclass_loader,
+                   dex_file,
+                   dex_cache,
+                   nullptr,
+                   /* osr */ false));
+    if (codegen.get() != nullptr) {
+      MaybeRecordStat(MethodCompilationStat::kCompiled);
+      method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver, code_item);
+
+      if (kArenaAllocatorCountAllocations) {
+        if (arena.BytesAllocated() > kArenaAllocatorMemoryReportThreshold) {
+          MemStats mem_stats(arena.GetMemStats());
+          LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats);
+        }
       }
     }
   } else {
-    // This is for the JIT compiler, which has already ensured the class is verified.
-    // We can go straight to compiling.
-    DCHECK(Runtime::Current()->UseJit());
-    method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
-                        method_idx, jclass_loader, dex_file, dex_cache);
+    if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
+    } else {
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledVerificationError);
+    }
   }
 
   if (kIsDebugBuild &&
       IsCompilingWithCoreImage() &&
-      IsInstructionSetSupported(compiler_driver->GetInstructionSet())) {
-    // For testing purposes, we put a special marker on method names that should be compiled
-    // with this compiler. This makes sure we're not regressing.
+      IsInstructionSetSupported(compiler_driver->GetInstructionSet()) &&
+      (!kEmitCompilerReadBarrier ||
+       InstructionSetSupportsReadBarrier(compiler_driver->GetInstructionSet()))) {
+    // For testing purposes, we put a special marker on method names
+    // that should be compiled with this compiler (when the the
+    // instruction set is supported -- and has support for read
+    // barriers, if they are enabled). This makes sure we're not
+    // regressing.
     std::string method_name = PrettyMethod(method_idx, dex_file);
     bool shouldCompile = method_name.find("$opt$") != std::string::npos;
     DCHECK((method != nullptr) || !shouldCompile) << "Didn't compile " << method_name;
@@ -883,7 +1076,116 @@
 
 bool IsCompilingWithCoreImage() {
   const std::string& image = Runtime::Current()->GetImageLocation();
-  return EndsWith(image, "core.art") || EndsWith(image, "core-optimizing.art");
+  // TODO: This is under-approximating...
+  if (EndsWith(image, "core.art") || EndsWith(image, "core-optimizing.art")) {
+    return true;
+  }
+  return false;
+}
+
+bool OptimizingCompiler::JitCompile(Thread* self,
+                                    jit::JitCodeCache* code_cache,
+                                    ArtMethod* method,
+                                    bool osr) {
+  StackHandleScope<2> hs(self);
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      method->GetDeclaringClass()->GetClassLoader()));
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
+  DCHECK(method->IsCompilable());
+
+  jobject jclass_loader = class_loader.ToJObject();
+  const DexFile* dex_file = method->GetDexFile();
+  const uint16_t class_def_idx = method->GetClassDefIndex();
+  const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
+  const uint32_t method_idx = method->GetDexMethodIndex();
+  const uint32_t access_flags = method->GetAccessFlags();
+  const InvokeType invoke_type = method->GetInvokeType();
+
+  ArenaAllocator arena(Runtime::Current()->GetJitArenaPool());
+  CodeVectorAllocator code_allocator(&arena);
+  std::unique_ptr<CodeGenerator> codegen;
+  {
+    // Go to native so that we don't block GC during compilation.
+    ScopedThreadSuspension sts(self, kNative);
+    codegen.reset(
+        TryCompile(&arena,
+                   &code_allocator,
+                   code_item,
+                   access_flags,
+                   invoke_type,
+                   class_def_idx,
+                   method_idx,
+                   jclass_loader,
+                   *dex_file,
+                   dex_cache,
+                   method,
+                   osr));
+    if (codegen.get() == nullptr) {
+      return false;
+    }
+
+    if (kArenaAllocatorCountAllocations) {
+      if (arena.BytesAllocated() > kArenaAllocatorMemoryReportThreshold) {
+        MemStats mem_stats(arena.GetMemStats());
+        LOG(INFO) << PrettyMethod(method_idx, *dex_file) << " " << Dumpable<MemStats>(mem_stats);
+      }
+    }
+  }
+
+  size_t stack_map_size = codegen->ComputeStackMapsSize();
+  uint8_t* stack_map_data = code_cache->ReserveData(self, stack_map_size, method);
+  if (stack_map_data == nullptr) {
+    return false;
+  }
+  MaybeRecordStat(MethodCompilationStat::kCompiled);
+  codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), *code_item);
+  const void* code = code_cache->CommitCode(
+      self,
+      method,
+      stack_map_data,
+      codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
+      codegen->GetCoreSpillMask(),
+      codegen->GetFpuSpillMask(),
+      code_allocator.GetMemory().data(),
+      code_allocator.GetSize(),
+      osr);
+
+  if (code == nullptr) {
+    code_cache->ClearData(self, stack_map_data);
+    return false;
+  }
+
+  const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions();
+  if (compiler_options.GetGenerateDebugInfo()) {
+    const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
+    const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
+    debug::MethodDebugInfo info = debug::MethodDebugInfo();
+    info.trampoline_name = nullptr;
+    info.dex_file = dex_file;
+    info.class_def_index = class_def_idx;
+    info.dex_method_index = method_idx;
+    info.access_flags = access_flags;
+    info.code_item = code_item;
+    info.isa = codegen->GetInstructionSet();
+    info.deduped = false;
+    info.is_native_debuggable = compiler_options.GetNativeDebuggable();
+    info.is_optimized = true;
+    info.is_code_address_text_relative = false;
+    info.code_address = code_address;
+    info.code_size = code_allocator.GetSize();
+    info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
+    info.code_info = stack_map_size == 0 ? nullptr : stack_map_data;
+    info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data());
+    std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForMethods(
+        GetCompilerDriver()->GetInstructionSet(),
+        GetCompilerDriver()->GetInstructionSetFeatures(),
+        ArrayRef<const debug::MethodDebugInfo>(&info, 1));
+    CreateJITCodeEntryForAddress(code_address, std::move(elf_file));
+  }
+
+  Runtime::Current()->GetJit()->AddMemoryUsage(method, arena.BytesUsed());
+
+  return true;
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 6375cf1..c8d1ce0 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -17,7 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
 #define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
 
-#include <sstream>
+#include <iomanip>
 #include <string>
 #include <type_traits>
 
@@ -27,18 +27,21 @@
 
 enum MethodCompilationStat {
   kAttemptCompilation = 0,
-  kCompiledBaseline,
-  kCompiledOptimized,
+  kCompiled,
   kInlinedInvoke,
+  kReplacedInvokeWithSimplePattern,
   kInstructionSimplifications,
   kInstructionSimplificationsArch,
   kUnresolvedMethod,
   kUnresolvedField,
   kUnresolvedFieldNotAFastAccess,
-  kNotCompiledBranchOutsideMethodCode,
-  kNotCompiledCannotBuildSSA,
-  kNotCompiledCantAccesType,
-  kNotCompiledClassNotVerified,
+  kRemovedCheckedCast,
+  kRemovedDeadInstruction,
+  kRemovedNullCheck,
+  kNotCompiledSkipped,
+  kNotCompiledInvalidBytecode,
+  kNotCompiledThrowCatchLoop,
+  kNotCompiledAmbiguousArrayOp,
   kNotCompiledHugeMethod,
   kNotCompiledLargeMethodNoBranches,
   kNotCompiledMalformedOpcode,
@@ -47,13 +50,22 @@
   kNotCompiledSpaceFilter,
   kNotCompiledUnhandledInstruction,
   kNotCompiledUnsupportedIsa,
+  kNotCompiledVerificationError,
   kNotCompiledVerifyAtRuntime,
-  kNotOptimizedDisabled,
-  kNotOptimizedRegisterAllocator,
-  kNotOptimizedTryCatch,
-  kRemovedCheckedCast,
-  kRemovedDeadInstruction,
-  kRemovedNullCheck,
+  kInlinedMonomorphicCall,
+  kInlinedPolymorphicCall,
+  kMonomorphicCall,
+  kPolymorphicCall,
+  kMegamorphicCall,
+  kBooleanSimplified,
+  kIntrinsicRecognized,
+  kLoopInvariantMoved,
+  kSelectGenerated,
+  kRemovedInstanceOf,
+  kInlinedInvokeVirtualOrInterface,
+  kImplicitNullCheckGenerated,
+  kExplicitNullCheckGenerated,
+  kSimplifyIf,
   kLastStat
 };
 
@@ -66,20 +78,19 @@
   }
 
   void Log() const {
+    if (!kIsDebugBuild && !VLOG_IS_ON(compiler)) {
+      // Log only in debug builds or if the compiler is verbose.
+      return;
+    }
+
     if (compile_stats_[kAttemptCompilation] == 0) {
       LOG(INFO) << "Did not compile any method.";
     } else {
-      size_t unoptimized_percent =
-          compile_stats_[kCompiledBaseline] * 100 / compile_stats_[kAttemptCompilation];
-      size_t optimized_percent =
-          compile_stats_[kCompiledOptimized] * 100 / compile_stats_[kAttemptCompilation];
-      std::ostringstream oss;
-      oss << "Attempted compilation of " << compile_stats_[kAttemptCompilation] << " methods: ";
-
-      oss << unoptimized_percent << "% (" << compile_stats_[kCompiledBaseline] << ") unoptimized, ";
-      oss << optimized_percent << "% (" << compile_stats_[kCompiledOptimized] << ") optimized, ";
-
-      LOG(INFO) << oss.str();
+      float compiled_percent =
+          compile_stats_[kCompiled] * 100.0f / compile_stats_[kAttemptCompilation];
+      LOG(INFO) << "Attempted compilation of " << compile_stats_[kAttemptCompilation]
+          << " methods: " << std::fixed << std::setprecision(2)
+          << compiled_percent << "% (" << compile_stats_[kCompiled] << ") compiled.";
 
       for (int i = 0; i < kLastStat; i++) {
         if (compile_stats_[i] != 0) {
@@ -92,41 +103,55 @@
 
  private:
   std::string PrintMethodCompilationStat(MethodCompilationStat stat) const {
+    std::string name;
     switch (stat) {
-      case kAttemptCompilation : return "kAttemptCompilation";
-      case kCompiledBaseline : return "kCompiledBaseline";
-      case kCompiledOptimized : return "kCompiledOptimized";
-      case kInlinedInvoke : return "kInlinedInvoke";
-      case kInstructionSimplifications: return "kInstructionSimplifications";
-      case kInstructionSimplificationsArch: return "kInstructionSimplificationsArch";
-      case kUnresolvedMethod : return "kUnresolvedMethod";
-      case kUnresolvedField : return "kUnresolvedField";
-      case kUnresolvedFieldNotAFastAccess : return "kUnresolvedFieldNotAFastAccess";
-      case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode";
-      case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
-      case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
-      case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
-      case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod";
-      case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches";
-      case kNotCompiledMalformedOpcode : return "kNotCompiledMalformedOpcode";
-      case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen";
-      case kNotCompiledPathological : return "kNotCompiledPathological";
-      case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
-      case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
-      case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
-      case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime";
-      case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
-      case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
-      case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
-      case kRemovedCheckedCast: return "kRemovedCheckedCast";
-      case kRemovedDeadInstruction: return "kRemovedDeadInstruction";
-      case kRemovedNullCheck: return "kRemovedNullCheck";
+      case kAttemptCompilation : name = "AttemptCompilation"; break;
+      case kCompiled : name = "Compiled"; break;
+      case kInlinedInvoke : name = "InlinedInvoke"; break;
+      case kReplacedInvokeWithSimplePattern: name = "ReplacedInvokeWithSimplePattern"; break;
+      case kInstructionSimplifications: name = "InstructionSimplifications"; break;
+      case kInstructionSimplificationsArch: name = "InstructionSimplificationsArch"; break;
+      case kUnresolvedMethod : name = "UnresolvedMethod"; break;
+      case kUnresolvedField : name = "UnresolvedField"; break;
+      case kUnresolvedFieldNotAFastAccess : name = "UnresolvedFieldNotAFastAccess"; break;
+      case kRemovedCheckedCast: name = "RemovedCheckedCast"; break;
+      case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break;
+      case kRemovedNullCheck: name = "RemovedNullCheck"; break;
+      case kNotCompiledSkipped: name = "NotCompiledSkipped"; break;
+      case kNotCompiledInvalidBytecode: name = "NotCompiledInvalidBytecode"; break;
+      case kNotCompiledThrowCatchLoop : name = "NotCompiledThrowCatchLoop"; break;
+      case kNotCompiledAmbiguousArrayOp : name = "NotCompiledAmbiguousArrayOp"; break;
+      case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break;
+      case kNotCompiledLargeMethodNoBranches : name = "NotCompiledLargeMethodNoBranches"; break;
+      case kNotCompiledMalformedOpcode : name = "NotCompiledMalformedOpcode"; break;
+      case kNotCompiledNoCodegen : name = "NotCompiledNoCodegen"; break;
+      case kNotCompiledPathological : name = "NotCompiledPathological"; break;
+      case kNotCompiledSpaceFilter : name = "NotCompiledSpaceFilter"; break;
+      case kNotCompiledUnhandledInstruction : name = "NotCompiledUnhandledInstruction"; break;
+      case kNotCompiledUnsupportedIsa : name = "NotCompiledUnsupportedIsa"; break;
+      case kNotCompiledVerificationError : name = "NotCompiledVerificationError"; break;
+      case kNotCompiledVerifyAtRuntime : name = "NotCompiledVerifyAtRuntime"; break;
+      case kInlinedMonomorphicCall: name = "InlinedMonomorphicCall"; break;
+      case kInlinedPolymorphicCall: name = "InlinedPolymorphicCall"; break;
+      case kMonomorphicCall: name = "MonomorphicCall"; break;
+      case kPolymorphicCall: name = "PolymorphicCall"; break;
+      case kMegamorphicCall: name = "MegamorphicCall"; break;
+      case kBooleanSimplified : name = "BooleanSimplified"; break;
+      case kIntrinsicRecognized : name = "IntrinsicRecognized"; break;
+      case kLoopInvariantMoved : name = "LoopInvariantMoved"; break;
+      case kSelectGenerated : name = "SelectGenerated"; break;
+      case kRemovedInstanceOf: name = "RemovedInstanceOf"; break;
+      case kInlinedInvokeVirtualOrInterface: name = "InlinedInvokeVirtualOrInterface"; break;
+      case kImplicitNullCheckGenerated: name = "ImplicitNullCheckGenerated"; break;
+      case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break;
+      case kSimplifyIf: name = "SimplifyIf"; break;
 
-      case kLastStat: break;  // Invalid to print out.
+      case kLastStat:
+        LOG(FATAL) << "invalid stat "
+            << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat);
+        UNREACHABLE();
     }
-    LOG(FATAL) << "invalid stat "
-        << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat);
-    UNREACHABLE();
+    return "OptStat#" + name;
   }
 
   AtomicInteger compile_stats_[kLastStat];
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 350f0b1..dd5cb1c 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -19,9 +19,12 @@
 
 #include "nodes.h"
 #include "builder.h"
-#include "compiler/dex/pass_manager.h"
+#include "common_compiler_test.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
+#include "handle_scope-inl.h"
+#include "scoped_thread_state_change.h"
+#include "ssa_builder.h"
 #include "ssa_liveness_analysis.h"
 
 #include "gtest/gtest.h"
@@ -42,7 +45,6 @@
 #define FIVE_REGISTERS_CODE_ITEM(...)  N_REGISTERS_CODE_ITEM(5, __VA_ARGS__)
 #define SIX_REGISTERS_CODE_ITEM(...)   N_REGISTERS_CODE_ITEM(6, __VA_ARGS__)
 
-
 LiveInterval* BuildInterval(const size_t ranges[][2],
                             size_t number_of_ranges,
                             ArenaAllocator* allocator,
@@ -61,10 +63,12 @@
 
 void RemoveSuspendChecks(HGraph* graph) {
   for (HBasicBlock* block : graph->GetBlocks()) {
-    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-      HInstruction* current = it.Current();
-      if (current->IsSuspendCheck()) {
-        current->GetBlock()->RemoveInstruction(current);
+    if (block != nullptr) {
+      for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+        HInstruction* current = it.Current();
+        if (current->IsSuspendCheck()) {
+          current->GetBlock()->RemoveInstruction(current);
+        }
       }
     }
   }
@@ -80,12 +84,17 @@
 inline HGraph* CreateCFG(ArenaAllocator* allocator,
                          const uint16_t* data,
                          Primitive::Type return_type = Primitive::kPrimInt) {
-  HGraph* graph = CreateGraph(allocator);
-  HGraphBuilder builder(graph, return_type);
   const DexFile::CodeItem* item =
     reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  return graph_built ? graph : nullptr;
+  HGraph* graph = CreateGraph(allocator);
+
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScopeCollection handles(soa.Self());
+    HGraphBuilder builder(graph, *item, &handles, return_type);
+    bool graph_built = (builder.BuildGraph() == kAnalysisSuccess);
+    return graph_built ? graph : nullptr;
+  }
 }
 
 // Naive string diff data type.
@@ -100,7 +109,8 @@
   std::string result = original;
   for (const auto& p : diff) {
     std::string::size_type pos = result.find(p.first);
-    EXPECT_NE(pos, std::string::npos);
+    DCHECK_NE(pos, std::string::npos)
+        << "Could not find: \"" << p.first << "\" in \"" << result << "\"";
     result.replace(pos, p.first.size(), p.second);
   }
   return result;
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 30bcf19..be470cc 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <iostream>
 
 #include "parallel_move_resolver.h"
 
@@ -169,10 +168,10 @@
         // If `other_move` was swapped, we iterate again to find a new
         // potential cycle.
         required_swap = nullptr;
-        i = 0;
+        i = -1;
       } else if (required_swap != nullptr) {
         // A move is required to swap. We walk back the cycle to find the
-        // move by just returning from this `PerforrmMove`.
+        // move by just returning from this `PerformMove`.
         moves_[index]->ClearPending(destination);
         return required_swap;
       }
@@ -201,7 +200,7 @@
   } else {
     for (MoveOperands* other_move : moves_) {
       if (other_move->Blocks(destination)) {
-        DCHECK(other_move->IsPending());
+        DCHECK(other_move->IsPending()) << "move=" << *move << " other_move=" << *other_move;
         if (!move->Is64BitMove() && other_move->Is64BitMove()) {
           // We swap 64bits moves before swapping 32bits moves. Go back from the
           // cycle by returning the move that must be swapped.
@@ -505,7 +504,7 @@
 void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
   // This function is used to reduce the dependencies in the graph after
   // (from -> to) has been performed. Since we ensure there is no move with the same
-  // destination, (to -> X) can not be blocked while (from -> X) might still be
+  // destination, (to -> X) cannot be blocked while (from -> X) might still be
   // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
   // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
   // a dependency between the two. If we update the source location from 1 to 2, we
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 46e6f3e..5e8fe37 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -609,4 +609,36 @@
   }
 }
 
+TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves2) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  {
+    TypeParam resolver(&allocator);
+    HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+    moves->AddMove(
+        Location::RegisterLocation(0),
+        Location::RegisterLocation(3),
+        Primitive::kPrimInt,
+        nullptr);
+    moves->AddMove(
+        Location::RegisterPairLocation(2, 3),
+        Location::RegisterPairLocation(0, 1),
+        Primitive::kPrimLong,
+        nullptr);
+    moves->AddMove(
+        Location::RegisterLocation(7),
+        Location::RegisterLocation(2),
+        Primitive::kPrimInt,
+        nullptr);
+    resolver.EmitNativeCode(moves);
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(2,3 <-> 0,1) (2 -> 3) (7 -> 2)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(2,3 -> T0,T1) (0 -> 3) (T0,T1 -> 0,1) (7 -> 2)",
+          resolver.GetMessage().c_str());
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
new file mode 100644
index 0000000..c6acc45
--- /dev/null
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pc_relative_fixups_mips.h"
+#include "code_generator_mips.h"
+#include "intrinsics_mips.h"
+
+namespace art {
+namespace mips {
+
+/**
+ * Finds instructions that need the constant area base as an input.
+ */
+class PCRelativeHandlerVisitor : public HGraphVisitor {
+ public:
+  PCRelativeHandlerVisitor(HGraph* graph, CodeGenerator* codegen)
+      : HGraphVisitor(graph),
+        codegen_(down_cast<CodeGeneratorMIPS*>(codegen)),
+        base_(nullptr) {}
+
+  void MoveBaseIfNeeded() {
+    if (base_ != nullptr) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      base_->MoveBeforeFirstUserAndOutOfLoops();
+      // Computing the base for PC-relative literals will clobber RA with
+      // the NAL instruction on R2. Take a note of this before generating
+      // the method entry.
+      codegen_->ClobberRA();
+    }
+  }
+
+ private:
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    HandleInvoke(invoke);
+  }
+
+  void InitializePCRelativeBasePointer() {
+    // Ensure we only initialize the pointer once.
+    if (base_ != nullptr) {
+      return;
+    }
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    base_ = new (GetGraph()->GetArena()) HMipsComputeBaseMethodAddress();
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction());
+    DCHECK(base_ != nullptr);
+  }
+
+  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
+    switch (load_kind) {
+      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadClass::LoadKind::kBootImageAddress:
+      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+        // Add a base register for PC-relative literals on R2.
+        InitializePCRelativeBasePointer();
+        load_class->AddSpecialInput(base_);
+        break;
+      default:
+        break;
+    }
+  }
+
+  void VisitLoadString(HLoadString* load_string) OVERRIDE {
+    HLoadString::LoadKind load_kind = load_string->GetLoadKind();
+    switch (load_kind) {
+      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadString::LoadKind::kBootImageAddress:
+      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+        // Add a base register for PC-relative literals on R2.
+        InitializePCRelativeBasePointer();
+        load_string->AddSpecialInput(base_);
+        break;
+      default:
+        break;
+    }
+  }
+
+  void HandleInvoke(HInvoke* invoke) {
+    // If this is an invoke-static/-direct with PC-relative dex cache array
+    // addressing, we need the PC-relative address base.
+    HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
+    if (invoke_static_or_direct != nullptr) {
+      HInvokeStaticOrDirect::MethodLoadKind method_load_kind =
+          invoke_static_or_direct->GetMethodLoadKind();
+      HInvokeStaticOrDirect::CodePtrLocation code_ptr_location =
+          invoke_static_or_direct->GetCodePtrLocation();
+
+      bool has_extra_input =
+          (method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) ||
+          (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup);
+
+      // We can't add a pointer to the constant area if we already have a current
+      // method pointer. This may arise when sharpening doesn't remove the current
+      // method pointer from the invoke.
+      if (invoke_static_or_direct->HasCurrentMethodInput()) {
+        DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache());
+        CHECK(!has_extra_input);
+        return;
+      }
+
+      if (has_extra_input && !WillHaveCallFreeIntrinsicsCodeGen(invoke)) {
+        InitializePCRelativeBasePointer();
+        // Add the extra parameter base_.
+        invoke_static_or_direct->AddSpecialInput(base_);
+      }
+    }
+  }
+
+  bool WillHaveCallFreeIntrinsicsCodeGen(HInvoke* invoke) {
+    if (invoke->GetIntrinsic() != Intrinsics::kNone) {
+      // This invoke may have intrinsic code generation defined. However, we must
+      // now also determine if this code generation is truly there and call-free
+      // (not unimplemented, no bail on instruction features, or call on slow path).
+      // This is done by actually calling the locations builder on the instruction
+      // and clearing out the locations once result is known. We assume this
+      // call only has creating locations as side effects!
+      IntrinsicLocationsBuilderMIPS builder(codegen_);
+      bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall();
+      invoke->SetLocations(nullptr);
+      return success;
+    }
+    return false;
+  }
+
+  CodeGeneratorMIPS* codegen_;
+
+  // The generated HMipsComputeBaseMethodAddress in the entry block needed as an
+  // input to the HMipsLoadFromConstantTable instructions.
+  HMipsComputeBaseMethodAddress* base_;
+};
+
+void PcRelativeFixups::Run() {
+  CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_);
+  if (mips_codegen->GetInstructionSetFeatures().IsR6()) {
+    // Do nothing for R6 because it has PC-relative addressing.
+    return;
+  }
+  if (graph_->HasIrreducibleLoops()) {
+    // Do not run this optimization, as irreducible loops do not work with an instruction
+    // that can be live-in at the irreducible loop header.
+    return;
+  }
+  PCRelativeHandlerVisitor visitor(graph_, codegen_);
+  visitor.VisitInsertionOrder();
+  visitor.MoveBaseIfNeeded();
+}
+
+}  // namespace mips
+}  // namespace art
diff --git a/compiler/optimizing/pc_relative_fixups_mips.h b/compiler/optimizing/pc_relative_fixups_mips.h
new file mode 100644
index 0000000..5a7397b
--- /dev/null
+++ b/compiler/optimizing/pc_relative_fixups_mips.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_
+#define ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+class CodeGenerator;
+
+namespace mips {
+
+class PcRelativeFixups : public HOptimization {
+ public:
+  PcRelativeFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+      : HOptimization(graph, "pc_relative_fixups_mips", stats),
+        codegen_(codegen) {}
+
+  static constexpr const char* kPcRelativeFixupsMipsPassName = "pc_relative_fixups_mips";
+
+  void Run() OVERRIDE;
+
+ private:
+  CodeGenerator* codegen_;
+};
+
+}  // namespace mips
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
new file mode 100644
index 0000000..ad0921d
--- /dev/null
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -0,0 +1,278 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pc_relative_fixups_x86.h"
+#include "code_generator_x86.h"
+#include "intrinsics_x86.h"
+
+namespace art {
+namespace x86 {
+
+/**
+ * Finds instructions that need the constant area base as an input.
+ */
+class PCRelativeHandlerVisitor : public HGraphVisitor {
+ public:
+  PCRelativeHandlerVisitor(HGraph* graph, CodeGenerator* codegen)
+      : HGraphVisitor(graph),
+        codegen_(down_cast<CodeGeneratorX86*>(codegen)),
+        base_(nullptr) {}
+
+  void MoveBaseIfNeeded() {
+    if (base_ != nullptr) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      base_->MoveBeforeFirstUserAndOutOfLoops();
+    }
+  }
+
+ private:
+  void VisitAdd(HAdd* add) OVERRIDE {
+    BinaryFP(add);
+  }
+
+  void VisitSub(HSub* sub) OVERRIDE {
+    BinaryFP(sub);
+  }
+
+  void VisitMul(HMul* mul) OVERRIDE {
+    BinaryFP(mul);
+  }
+
+  void VisitDiv(HDiv* div) OVERRIDE {
+    BinaryFP(div);
+  }
+
+  void VisitCompare(HCompare* compare) OVERRIDE {
+    BinaryFP(compare);
+  }
+
+  void VisitReturn(HReturn* ret) OVERRIDE {
+    HConstant* value = ret->InputAt(0)->AsConstant();
+    if ((value != nullptr && Primitive::IsFloatingPointType(value->GetType()))) {
+      ReplaceInput(ret, value, 0, true);
+    }
+  }
+
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    HandleInvoke(invoke);
+  }
+
+  void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
+    HandleInvoke(invoke);
+  }
+
+  void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE {
+    HandleInvoke(invoke);
+  }
+
+  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
+    if (load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
+        load_kind == HLoadClass::LoadKind::kDexCachePcRelative) {
+      InitializePCRelativeBasePointer();
+      load_class->AddSpecialInput(base_);
+    }
+  }
+
+  void VisitLoadString(HLoadString* load_string) OVERRIDE {
+    HLoadString::LoadKind load_kind = load_string->GetLoadKind();
+    if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
+        load_kind == HLoadString::LoadKind::kDexCachePcRelative) {
+      InitializePCRelativeBasePointer();
+      load_string->AddSpecialInput(base_);
+    }
+  }
+
+  void BinaryFP(HBinaryOperation* bin) {
+    HConstant* rhs = bin->InputAt(1)->AsConstant();
+    if (rhs != nullptr && Primitive::IsFloatingPointType(rhs->GetType())) {
+      ReplaceInput(bin, rhs, 1, false);
+    }
+  }
+
+  void VisitEqual(HEqual* cond) OVERRIDE {
+    BinaryFP(cond);
+  }
+
+  void VisitNotEqual(HNotEqual* cond) OVERRIDE {
+    BinaryFP(cond);
+  }
+
+  void VisitLessThan(HLessThan* cond) OVERRIDE {
+    BinaryFP(cond);
+  }
+
+  void VisitLessThanOrEqual(HLessThanOrEqual* cond) OVERRIDE {
+    BinaryFP(cond);
+  }
+
+  void VisitGreaterThan(HGreaterThan* cond) OVERRIDE {
+    BinaryFP(cond);
+  }
+
+  void VisitGreaterThanOrEqual(HGreaterThanOrEqual* cond) OVERRIDE {
+    BinaryFP(cond);
+  }
+
+  void VisitNeg(HNeg* neg) OVERRIDE {
+    if (Primitive::IsFloatingPointType(neg->GetType())) {
+      // We need to replace the HNeg with a HX86FPNeg in order to address the constant area.
+      InitializePCRelativeBasePointer();
+      HGraph* graph = GetGraph();
+      HBasicBlock* block = neg->GetBlock();
+      HX86FPNeg* x86_fp_neg = new (graph->GetArena()) HX86FPNeg(
+          neg->GetType(),
+          neg->InputAt(0),
+          base_,
+          neg->GetDexPc());
+      block->ReplaceAndRemoveInstructionWith(neg, x86_fp_neg);
+    }
+  }
+
+  void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
+    if (switch_insn->GetNumEntries() <=
+        InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) {
+      return;
+    }
+    // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
+    // address the constant area.
+    InitializePCRelativeBasePointer();
+    HGraph* graph = GetGraph();
+    HBasicBlock* block = switch_insn->GetBlock();
+    HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch(
+        switch_insn->GetStartValue(),
+        switch_insn->GetNumEntries(),
+        switch_insn->InputAt(0),
+        base_,
+        switch_insn->GetDexPc());
+    block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch);
+  }
+
+  void InitializePCRelativeBasePointer() {
+    // Ensure we only initialize the pointer once.
+    if (base_ != nullptr) {
+      return;
+    }
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    base_ = new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress();
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction());
+    DCHECK(base_ != nullptr);
+  }
+
+  void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) {
+    InitializePCRelativeBasePointer();
+    HX86LoadFromConstantTable* load_constant =
+        new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value);
+    if (!materialize) {
+      load_constant->MarkEmittedAtUseSite();
+    }
+    insn->GetBlock()->InsertInstructionBefore(load_constant, insn);
+    insn->ReplaceInput(load_constant, input_index);
+  }
+
+  void HandleInvoke(HInvoke* invoke) {
+    // If this is an invoke-static/-direct with PC-relative dex cache array
+    // addressing, we need the PC-relative address base.
+    HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
+    // We can't add a pointer to the constant area if we already have a current
+    // method pointer. This may arise when sharpening doesn't remove the current
+    // method pointer from the invoke.
+    if (invoke_static_or_direct != nullptr &&
+        invoke_static_or_direct->HasCurrentMethodInput()) {
+      DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache());
+      return;
+    }
+
+    bool base_added = false;
+    if (invoke_static_or_direct != nullptr &&
+        invoke_static_or_direct->HasPcRelativeDexCache() &&
+        !WillHaveCallFreeIntrinsicsCodeGen(invoke)) {
+      InitializePCRelativeBasePointer();
+      // Add the extra parameter base_.
+      invoke_static_or_direct->AddSpecialInput(base_);
+      base_added = true;
+    }
+
+    // Ensure that we can load FP arguments from the constant area.
+    HInputsRef inputs = invoke->GetInputs();
+    for (size_t i = 0; i < inputs.size(); i++) {
+      HConstant* input = inputs[i]->AsConstant();
+      if (input != nullptr && Primitive::IsFloatingPointType(input->GetType())) {
+        ReplaceInput(invoke, input, i, true);
+      }
+    }
+
+    // These intrinsics need the constant area.
+    switch (invoke->GetIntrinsic()) {
+      case Intrinsics::kMathAbsDouble:
+      case Intrinsics::kMathAbsFloat:
+      case Intrinsics::kMathMaxDoubleDouble:
+      case Intrinsics::kMathMaxFloatFloat:
+      case Intrinsics::kMathMinDoubleDouble:
+      case Intrinsics::kMathMinFloatFloat:
+      case Intrinsics::kMathRoundFloat:
+        if (!base_added) {
+          DCHECK(invoke_static_or_direct != nullptr);
+          DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
+          InitializePCRelativeBasePointer();
+          invoke_static_or_direct->AddSpecialInput(base_);
+        }
+        break;
+      default:
+        break;
+    }
+  }
+
+  bool WillHaveCallFreeIntrinsicsCodeGen(HInvoke* invoke) {
+    if (invoke->GetIntrinsic() != Intrinsics::kNone) {
+      // This invoke may have intrinsic code generation defined. However, we must
+      // now also determine if this code generation is truly there and call-free
+      // (not unimplemented, no bail on instruction features, or call on slow path).
+      // This is done by actually calling the locations builder on the instruction
+      // and clearing out the locations once result is known. We assume this
+      // call only has creating locations as side effects!
+      IntrinsicLocationsBuilderX86 builder(codegen_);
+      bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall();
+      invoke->SetLocations(nullptr);
+      return success;
+    }
+    return false;
+  }
+
+  CodeGeneratorX86* codegen_;
+
+  // The generated HX86ComputeBaseMethodAddress in the entry block needed as an
+  // input to the HX86LoadFromConstantTable instructions.
+  HX86ComputeBaseMethodAddress* base_;
+};
+
+void PcRelativeFixups::Run() {
+  if (graph_->HasIrreducibleLoops()) {
+    // Do not run this optimization, as irreducible loops do not work with an instruction
+    // that can be live-in at the irreducible loop header.
+    return;
+  }
+  PCRelativeHandlerVisitor visitor(graph_, codegen_);
+  visitor.VisitInsertionOrder();
+  visitor.MoveBaseIfNeeded();
+}
+
+}  // namespace x86
+}  // namespace art
diff --git a/compiler/optimizing/pc_relative_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h
new file mode 100644
index 0000000..72fa71e
--- /dev/null
+++ b/compiler/optimizing/pc_relative_fixups_x86.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_
+#define ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+class CodeGenerator;
+
+namespace x86 {
+
+class PcRelativeFixups : public HOptimization {
+ public:
+  PcRelativeFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kPcRelativeFixupsX86PassName, stats),
+        codegen_(codegen) {}
+
+  static constexpr const char* kPcRelativeFixupsX86PassName  = "pc_relative_fixups_x86";
+
+  void Run() OVERRIDE;
+
+ private:
+  CodeGenerator* codegen_;
+};
+
+}  // namespace x86
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index ca928ae..8fb5396 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -40,6 +40,22 @@
 
 void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
   check->ReplaceWith(check->InputAt(0));
+  if (check->IsStringCharAt()) {
+    // Add a fake environment for String.charAt() inline info as we want
+    // the exception to appear as being thrown from there.
+    const DexFile& dex_file = check->GetEnvironment()->GetDexFile();
+    DCHECK_STREQ(PrettyMethod(check->GetStringCharAtMethodIndex(), dex_file).c_str(),
+                 "char java.lang.String.charAt(int)");
+    ArenaAllocator* arena = GetGraph()->GetArena();
+    HEnvironment* environment = new (arena) HEnvironment(arena,
+                                                         /* number_of_vregs */ 0u,
+                                                         dex_file,
+                                                         check->GetStringCharAtMethodIndex(),
+                                                         /* dex_pc */ DexFile::kDexNoIndex,
+                                                         kVirtual,
+                                                         check);
+    check->InsertRawEnvironment(environment);
+  }
 }
 
 void PrepareForRegisterAllocation::VisitBoundType(HBoundType* bound_type) {
@@ -47,69 +63,192 @@
   bound_type->GetBlock()->RemoveInstruction(bound_type);
 }
 
+void PrepareForRegisterAllocation::VisitArraySet(HArraySet* instruction) {
+  HInstruction* value = instruction->GetValue();
+  // PrepareForRegisterAllocation::VisitBoundType may have replaced a
+  // BoundType (as value input of this ArraySet) with a NullConstant.
+  // If so, this ArraySet no longer needs a type check.
+  if (value->IsNullConstant()) {
+    DCHECK_EQ(value->GetType(), Primitive::kPrimNot);
+    if (instruction->NeedsTypeCheck()) {
+      instruction->ClearNeedsTypeCheck();
+    }
+  }
+}
+
 void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
-  HLoadClass* cls = check->GetLoadClass();
-  check->ReplaceWith(cls);
-  if (check->GetPrevious() == cls) {
+  // Try to find a static invoke or a new-instance from which this check originated.
+  HInstruction* implicit_clinit = nullptr;
+  for (const HUseListNode<HInstruction*>& use : check->GetUses()) {
+    HInstruction* user = use.GetUser();
+    if ((user->IsInvokeStaticOrDirect() || user->IsNewInstance()) &&
+        CanMoveClinitCheck(check, user)) {
+      implicit_clinit = user;
+      if (user->IsInvokeStaticOrDirect()) {
+        DCHECK(user->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck());
+        user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck(
+            HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
+      } else {
+        DCHECK(user->IsNewInstance());
+        // We delegate the initialization duty to the allocation.
+        if (user->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectInitialized) {
+          user->AsNewInstance()->SetEntrypoint(kQuickAllocObjectResolved);
+        }
+      }
+      break;
+    }
+  }
+  // If we found a static invoke or new-instance for merging, remove the check
+  // from dominated static invokes.
+  if (implicit_clinit != nullptr) {
+    const HUseList<HInstruction*>& uses = check->GetUses();
+    for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+      HInstruction* user = it->GetUser();
+      // All other uses must be dominated.
+      DCHECK(implicit_clinit->StrictlyDominates(user) || (implicit_clinit == user));
+      ++it;  // Advance before we remove the node, reference to the next node is preserved.
+      if (user->IsInvokeStaticOrDirect()) {
+        user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck(
+            HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+      }
+    }
+  }
+
+  HLoadClass* load_class = check->GetLoadClass();
+  bool can_merge_with_load_class = CanMoveClinitCheck(load_class, check);
+
+  check->ReplaceWith(load_class);
+
+  if (implicit_clinit != nullptr) {
+    // Remove the check from the graph. It has been merged into the invoke or new-instance.
+    check->GetBlock()->RemoveInstruction(check);
+    // Check if we can merge the load class as well.
+    if (can_merge_with_load_class && !load_class->HasUses()) {
+      load_class->GetBlock()->RemoveInstruction(load_class);
+    }
+  } else if (can_merge_with_load_class && !load_class->NeedsAccessCheck()) {
     // Pass the initialization duty to the `HLoadClass` instruction,
     // and remove the instruction from the graph.
-    cls->SetMustGenerateClinitCheck(true);
+    load_class->SetMustGenerateClinitCheck(true);
     check->GetBlock()->RemoveInstruction(check);
   }
 }
 
-void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) {
-  bool needs_materialization = false;
-  if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) {
-    needs_materialization = true;
-  } else {
-    HInstruction* user = condition->GetUses().GetFirst()->GetUser();
-    if (!user->IsIf() && !user->IsDeoptimize()) {
-      needs_materialization = true;
-    } else {
-      // TODO: if there is no intervening instructions with side-effect between this condition
-      // and the If instruction, we should move the condition just before the If.
-      if (condition->GetNext() != user) {
-        needs_materialization = true;
+void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) {
+  HLoadClass* load_class = instruction->InputAt(0)->AsLoadClass();
+  bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse();
+  // Change the entrypoint to kQuickAllocObject if either:
+  // - the class is finalizable (only kQuickAllocObject handles finalizable classes),
+  // - the class needs access checks (we do not know if it's finalizable),
+  // - or the load class has only one use.
+  if (instruction->IsFinalizable() || has_only_one_use || load_class->NeedsAccessCheck()) {
+    instruction->SetEntrypoint(kQuickAllocObject);
+    instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex()), 0);
+    // The allocation entry point that deals with access checks does not work with inlined
+    // methods, so we need to check whether this allocation comes from an inlined method.
+    // We also need to make the same check as for moving clinit check, whether the HLoadClass
+    // has the clinit check responsibility or not (HLoadClass can throw anyway).
+    if (has_only_one_use &&
+        !instruction->GetEnvironment()->IsFromInlinedInvoke() &&
+        CanMoveClinitCheck(load_class, instruction)) {
+      // We can remove the load class from the graph. If it needed access checks, we delegate
+      // the access check to the allocation.
+      if (load_class->NeedsAccessCheck()) {
+        instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck);
       }
+      load_class->GetBlock()->RemoveInstruction(load_class);
     }
   }
-  if (!needs_materialization) {
-    condition->ClearNeedsMaterialization();
+}
+
+bool PrepareForRegisterAllocation::CanEmitConditionAt(HCondition* condition,
+                                                      HInstruction* user) const {
+  if (condition->GetNext() != user) {
+    return false;
+  }
+
+  if (user->IsIf() || user->IsDeoptimize()) {
+    return true;
+  }
+
+  if (user->IsSelect() && user->AsSelect()->GetCondition() == condition) {
+    return true;
+  }
+
+  return false;
+}
+
+void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) {
+  if (condition->HasOnlyOneNonEnvironmentUse()) {
+    HInstruction* user = condition->GetUses().front().GetUser();
+    if (CanEmitConditionAt(condition, user)) {
+      condition->MarkEmittedAtUseSite();
+    }
   }
 }
 
 void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   if (invoke->IsStaticWithExplicitClinitCheck()) {
-    size_t last_input_index = invoke->InputCount() - 1;
-    HLoadClass* last_input = invoke->InputAt(last_input_index)->AsLoadClass();
+    HLoadClass* last_input = invoke->GetInputs().back()->AsLoadClass();
     DCHECK(last_input != nullptr)
         << "Last input is not HLoadClass. It is " << last_input->DebugName();
 
-    // Remove a load class instruction as last input of a static
-    // invoke, which has been added (along with a clinit check,
-    // removed by PrepareForRegisterAllocation::VisitClinitCheck
-    // previously) by the graph builder during the creation of the
-    // static invoke instruction, but is no longer required at this
-    // stage (i.e., after inlining has been performed).
-    invoke->RemoveLoadClassAsLastInput();
+    // Detach the explicit class initialization check from the invoke.
+    // Keeping track of the initializing instruction is no longer required
+    // at this stage (i.e., after inlining has been performed).
+    invoke->RemoveExplicitClinitCheck(HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
 
-    // The static call will initialize the class so there's no need for a clinit check if
-    // it's the first user.
-    // There is one special case where we still need the clinit check, when inlining. Because
-    // currently the callee is responsible for reporting parameters to the GC, the code
-    // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC.
-    // Therefore we cannot allocate any object in that code, including loading a new class.
-    if (last_input == invoke->GetPrevious() && !invoke->IsFromInlinedInvoke()) {
-      last_input->SetMustGenerateClinitCheck(false);
+    // Merging with load class should have happened in VisitClinitCheck().
+    DCHECK(!CanMoveClinitCheck(last_input, invoke));
+  }
+}
 
-      // If the load class instruction is no longer used, remove it from
-      // the graph.
-      if (!last_input->HasUses()) {
-        last_input->GetBlock()->RemoveInstruction(last_input);
-      }
+bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input,
+                                                      HInstruction* user) const {
+  // Determine if input and user come from the same dex instruction, so that we can move
+  // the clinit check responsibility from one to the other, i.e. from HClinitCheck (user)
+  // to HLoadClass (input), or from HClinitCheck (input) to HInvokeStaticOrDirect (user),
+  // or from HLoadClass (input) to HNewInstance (user).
+
+  // Start with a quick dex pc check.
+  if (user->GetDexPc() != input->GetDexPc()) {
+    return false;
+  }
+
+  // Now do a thorough environment check that this is really coming from the same instruction in
+  // the same inlined graph. Unfortunately, we have to go through the whole environment chain.
+  HEnvironment* user_environment = user->GetEnvironment();
+  HEnvironment* input_environment = input->GetEnvironment();
+  while (user_environment != nullptr || input_environment != nullptr) {
+    if (user_environment == nullptr || input_environment == nullptr) {
+      // Different environment chain length. This happens when a method is called
+      // once directly and once indirectly through another inlined method.
+      return false;
+    }
+    if (user_environment->GetDexPc() != input_environment->GetDexPc() ||
+        user_environment->GetMethodIdx() != input_environment->GetMethodIdx() ||
+        !IsSameDexFile(user_environment->GetDexFile(), input_environment->GetDexFile())) {
+      return false;
+    }
+    user_environment = user_environment->GetParent();
+    input_environment = input_environment->GetParent();
+  }
+
+  // Check for code motion taking the input to a different block.
+  if (user->GetBlock() != input->GetBlock()) {
+    return false;
+  }
+
+  // In debug mode, check that we have not inserted a throwing instruction
+  // or an instruction with side effects between input and user.
+  if (kIsDebugBuild) {
+    for (HInstruction* between = input->GetNext(); between != user; between = between->GetNext()) {
+      CHECK(between != nullptr);  // User must be after input in the same block.
+      CHECK(!between->CanThrow());
+      CHECK(!between->HasSideEffects());
     }
   }
+  return true;
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index d7f277f..a679148 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -32,14 +32,22 @@
 
   void Run();
 
+  static constexpr const char* kPrepareForRegisterAllocationPassName =
+      "prepare_for_register_allocation";
+
  private:
   void VisitNullCheck(HNullCheck* check) OVERRIDE;
   void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE;
   void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
   void VisitBoundType(HBoundType* bound_type) OVERRIDE;
+  void VisitArraySet(HArraySet* instruction) OVERRIDE;
   void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
   void VisitCondition(HCondition* condition) OVERRIDE;
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+  void VisitNewInstance(HNewInstance* instruction) OVERRIDE;
+
+  bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const;
+  bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const;
 
   DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation);
 };
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index 429e6e3..5891350 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -39,29 +39,30 @@
   }
 
   void PrintPostInstruction(HInstruction* instruction) {
-    if (instruction->InputCount() != 0) {
+    HConstInputsRef inputs = instruction->GetInputs();
+    if (!inputs.empty()) {
       PrintString("(");
       bool first = true;
-      for (HInputIterator it(instruction); !it.Done(); it.Advance()) {
+      for (const HInstruction* input : inputs) {
         if (first) {
           first = false;
         } else {
           PrintString(", ");
         }
-        PrintInt(it.Current()->GetId());
+        PrintInt(input->GetId());
       }
       PrintString(")");
     }
     if (instruction->HasUses()) {
       PrintString(" [");
       bool first = true;
-      for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
+      for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
         if (first) {
           first = false;
         } else {
           PrintString(", ");
         }
-        PrintInt(it.Current()->GetUser()->GetId());
+        PrintInt(use.GetUser()->GetId());
       }
       PrintString("]");
     }
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index c56100d..951cdfb 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -30,43 +30,41 @@
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateGraph(&allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
+  HGraph* graph = CreateCFG(&allocator, data);
   StringPrettyPrinter printer(graph);
   printer.VisitInsertionOrder();
   ASSERT_STREQ(expected, printer.str().c_str());
 }
 
-TEST(PrettyPrinterTest, ReturnVoid) {
+class PrettyPrinterTest : public CommonCompilerTest {};
+
+TEST_F(PrettyPrinterTest, ReturnVoid) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
       Instruction::RETURN_VOID);
 
   const char* expected =
       "BasicBlock 0, succ: 1\n"
-      "  2: SuspendCheck\n"
-      "  3: Goto 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  0: ReturnVoid\n"
+      "  2: ReturnVoid\n"
       "BasicBlock 2, pred: 1\n"
-      "  1: Exit\n";
+      "  3: Exit\n";
 
   TestCode(data, expected);
 }
 
-TEST(PrettyPrinterTest, CFG1) {
+TEST_F(PrettyPrinterTest, CFG1) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  3: SuspendCheck\n"
-    "  4: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  0: Goto 2\n"
-    "BasicBlock 2, pred: 1, succ: 3\n"
-    "  1: ReturnVoid\n"
-    "BasicBlock 3, pred: 2\n"
-    "  2: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  2: Goto 2\n"
+      "BasicBlock 2, pred: 1, succ: 3\n"
+      "  3: ReturnVoid\n"
+      "BasicBlock 3, pred: 2\n"
+      "  4: Exit\n";
 
   const uint16_t data[] =
     ZERO_REGISTER_CODE_ITEM(
@@ -76,19 +74,19 @@
   TestCode(data, expected);
 }
 
-TEST(PrettyPrinterTest, CFG2) {
+TEST_F(PrettyPrinterTest, CFG2) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  4: SuspendCheck\n"
-    "  5: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  0: Goto 2\n"
-    "BasicBlock 2, pred: 1, succ: 3\n"
-    "  1: Goto 3\n"
-    "BasicBlock 3, pred: 2, succ: 4\n"
-    "  2: ReturnVoid\n"
-    "BasicBlock 4, pred: 3\n"
-    "  3: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  2: Goto 2\n"
+      "BasicBlock 2, pred: 1, succ: 3\n"
+      "  3: Goto 3\n"
+      "BasicBlock 3, pred: 2, succ: 4\n"
+      "  4: ReturnVoid\n"
+      "BasicBlock 4, pred: 3\n"
+      "  5: Exit\n";
 
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
@@ -98,19 +96,19 @@
   TestCode(data, expected);
 }
 
-TEST(PrettyPrinterTest, CFG3) {
+TEST_F(PrettyPrinterTest, CFG3) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  4: SuspendCheck\n"
-    "  5: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 3\n"
-    "  0: Goto 3\n"
-    "BasicBlock 2, pred: 3, succ: 4\n"
-    "  1: ReturnVoid\n"
-    "BasicBlock 3, pred: 1, succ: 2\n"
-    "  2: Goto 2\n"
-    "BasicBlock 4, pred: 2\n"
-    "  3: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 3\n"
+      "  2: Goto 3\n"
+      "BasicBlock 2, pred: 3, succ: 4\n"
+      "  4: ReturnVoid\n"
+      "BasicBlock 3, pred: 1, succ: 2\n"
+      "  3: Goto 2\n"
+      "BasicBlock 4, pred: 2\n"
+      "  5: Exit\n";
 
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x200,
@@ -134,16 +132,16 @@
   TestCode(data3, expected);
 }
 
-TEST(PrettyPrinterTest, CFG4) {
+TEST_F(PrettyPrinterTest, CFG4) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  3: SuspendCheck\n"
-    "  4: Goto 1\n"
-    "BasicBlock 1, pred: 0, 1, succ: 1\n"
-    "  0: SuspendCheck\n"
-    "  1: Goto 1\n"
-    "BasicBlock 2\n"
-    "  2: Exit\n";
+      "BasicBlock 0, succ: 3\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 3\n"
+      "BasicBlock 1, pred: 3, 1, succ: 1\n"
+      "  3: SuspendCheck\n"
+      "  4: Goto 1\n"
+      "BasicBlock 3, pred: 0, succ: 1\n"
+      "  0: Goto 1\n";
 
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::NOP,
@@ -157,17 +155,15 @@
   TestCode(data2, expected);
 }
 
-TEST(PrettyPrinterTest, CFG5) {
+TEST_F(PrettyPrinterTest, CFG5) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  3: SuspendCheck\n"
-    "  4: Goto 1\n"
-    "BasicBlock 1, pred: 0, 2, succ: 3\n"
-    "  0: ReturnVoid\n"
-    "BasicBlock 2, succ: 1\n"
-    "  1: Goto 1\n"
-    "BasicBlock 3, pred: 1\n"
-    "  2: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 3\n"
+      "  2: ReturnVoid\n"
+      "BasicBlock 3, pred: 1\n"
+      "  3: Exit\n";
 
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::RETURN_VOID,
@@ -177,25 +173,23 @@
   TestCode(data, expected);
 }
 
-TEST(PrettyPrinterTest, CFG6) {
+TEST_F(PrettyPrinterTest, CFG6) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  0: Local [4, 3, 2]\n"
-    "  1: IntConstant [2]\n"
-    "  10: SuspendCheck\n"
-    "  11: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 3, 2\n"
-    "  2: StoreLocal(0, 1)\n"
-    "  3: LoadLocal(0) [5]\n"
-    "  4: LoadLocal(0) [5]\n"
-    "  5: Equal(3, 4) [6]\n"
-    "  6: If(5)\n"
-    "BasicBlock 2, pred: 1, succ: 3\n"
-    "  7: Goto 3\n"
-    "BasicBlock 3, pred: 1, 2, succ: 4\n"
-    "  8: ReturnVoid\n"
-    "BasicBlock 4, pred: 3\n"
-    "  9: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  3: IntConstant [4, 4]\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5, 2\n"
+      "  4: Equal(3, 3) [5]\n"
+      "  5: If(4)\n"
+      "BasicBlock 2, pred: 1, succ: 3\n"
+      "  6: Goto 3\n"
+      "BasicBlock 3, pred: 5, 2, succ: 4\n"
+      "  7: ReturnVoid\n"
+      "BasicBlock 4, pred: 3\n"
+      "  8: Exit\n"
+      "BasicBlock 5, pred: 1, succ: 3\n"
+      "  0: Goto 3\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -206,26 +200,24 @@
   TestCode(data, expected);
 }
 
-TEST(PrettyPrinterTest, CFG7) {
+TEST_F(PrettyPrinterTest, CFG7) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  0: Local [4, 3, 2]\n"
-    "  1: IntConstant [2]\n"
-    "  11: SuspendCheck\n"
-    "  12: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 3, 2\n"
-    "  2: StoreLocal(0, 1)\n"
-    "  3: LoadLocal(0) [5]\n"
-    "  4: LoadLocal(0) [5]\n"
-    "  5: Equal(3, 4) [6]\n"
-    "  6: If(5)\n"
-    "BasicBlock 2, pred: 1, 3, succ: 3\n"
-    "  7: Goto 3\n"
-    "BasicBlock 3, pred: 1, 2, succ: 2\n"
-    "  8: SuspendCheck\n"
-    "  9: Goto 2\n"
-    "BasicBlock 4\n"
-    "  10: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  4: IntConstant [5, 5]\n"
+      "  2: SuspendCheck\n"
+      "  3: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5, 6\n"
+      "  5: Equal(4, 4) [6]\n"
+      "  6: If(5)\n"
+      "BasicBlock 2, pred: 6, 3, succ: 3\n"
+      "  11: Goto 3\n"
+      "BasicBlock 3, pred: 5, 2, succ: 2\n"
+      "  8: SuspendCheck\n"
+      "  9: Goto 2\n"
+      "BasicBlock 5, pred: 1, succ: 3\n"
+      "  0: Goto 3\n"
+      "BasicBlock 6, pred: 1, succ: 2\n"
+      "  1: Goto 2\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -236,18 +228,16 @@
   TestCode(data, expected);
 }
 
-TEST(PrettyPrinterTest, IntConstant) {
+TEST_F(PrettyPrinterTest, IntConstant) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  0: Local [2]\n"
-    "  1: IntConstant [2]\n"
-    "  5: SuspendCheck\n"
-    "  6: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  2: StoreLocal(0, 1)\n"
-    "  3: ReturnVoid\n"
-    "BasicBlock 2, pred: 1\n"
-    "  4: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  3: ReturnVoid\n"
+      "BasicBlock 2, pred: 1\n"
+      "  4: Exit\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc
deleted file mode 100644
index c98f43e..0000000
--- a/compiler/optimizing/primitive_type_propagation.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "primitive_type_propagation.h"
-
-#include "nodes.h"
-#include "ssa_builder.h"
-
-namespace art {
-
-static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_type) {
-  // We trust the verifier has already done the necessary checking.
-  switch (existing) {
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-    case Primitive::kPrimNot:
-      return existing;
-    default:
-      // Phis are initialized with a void type, so if we are asked
-      // to merge with a void type, we should use the existing one.
-      return new_type == Primitive::kPrimVoid
-          ? existing
-          : HPhi::ToPhiType(new_type);
-  }
-}
-
-// Re-compute and update the type of the instruction. Returns
-// whether or not the type was changed.
-bool PrimitiveTypePropagation::UpdateType(HPhi* phi) {
-  DCHECK(phi->IsLive());
-  Primitive::Type existing = phi->GetType();
-
-  Primitive::Type new_type = existing;
-  for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-    Primitive::Type input_type = phi->InputAt(i)->GetType();
-    new_type = MergeTypes(new_type, input_type);
-  }
-  phi->SetType(new_type);
-
-  if (new_type == Primitive::kPrimDouble
-      || new_type == Primitive::kPrimFloat
-      || new_type == Primitive::kPrimNot) {
-    // If the phi is of floating point type, we need to update its inputs to that
-    // type. For inputs that are phis, we need to recompute their types.
-    for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-      HInstruction* input = phi->InputAt(i);
-      if (input->GetType() != new_type) {
-        HInstruction* equivalent = (new_type == Primitive::kPrimNot)
-            ? SsaBuilder::GetReferenceTypeEquivalent(input)
-            : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
-        phi->ReplaceInput(equivalent, i);
-        if (equivalent->IsPhi()) {
-          equivalent->AsPhi()->SetLive();
-          AddToWorklist(equivalent->AsPhi());
-        } else if (equivalent == input) {
-          // The input has changed its type. It can be an input of other phis,
-          // so we need to put phi users in the work list.
-          AddDependentInstructionsToWorklist(equivalent);
-        }
-      }
-    }
-  }
-
-  return existing != new_type;
-}
-
-void PrimitiveTypePropagation::Run() {
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
-  }
-  ProcessWorklist();
-}
-
-void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) {
-  if (block->IsLoopHeader()) {
-    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      HPhi* phi = it.Current()->AsPhi();
-      if (phi->IsLive()) {
-        AddToWorklist(phi);
-      }
-    }
-  } else {
-    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      // Eagerly compute the type of the phi, for quicker convergence. Note
-      // that we don't need to add users to the worklist because we are
-      // doing a reverse post-order visit, therefore either the phi users are
-      // non-loop phi and will be visited later in the visit, or are loop-phis,
-      // and they are already in the work list.
-      HPhi* phi = it.Current()->AsPhi();
-      if (phi->IsLive()) {
-        UpdateType(phi);
-      }
-    }
-  }
-}
-
-void PrimitiveTypePropagation::ProcessWorklist() {
-  while (!worklist_.empty()) {
-    HPhi* instruction = worklist_.back();
-    worklist_.pop_back();
-    if (UpdateType(instruction)) {
-      AddDependentInstructionsToWorklist(instruction);
-    }
-  }
-}
-
-void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) {
-  DCHECK(instruction->IsLive());
-  worklist_.push_back(instruction);
-}
-
-void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) {
-  for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
-    HPhi* phi = it.Current()->GetUser()->AsPhi();
-    if (phi != nullptr && phi->IsLive() && phi->GetType() != instruction->GetType()) {
-      AddToWorklist(phi);
-    }
-  }
-}
-
-}  // namespace art
diff --git a/compiler/optimizing/primitive_type_propagation.h b/compiler/optimizing/primitive_type_propagation.h
deleted file mode 100644
index 212fcfc..0000000
--- a/compiler/optimizing/primitive_type_propagation.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_
-#define ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_
-
-#include "base/arena_containers.h"
-#include "nodes.h"
-
-namespace art {
-
-// Compute and propagate primitive types of phis in the graph.
-class PrimitiveTypePropagation : public ValueObject {
- public:
-  explicit PrimitiveTypePropagation(HGraph* graph)
-      : graph_(graph), worklist_(graph->GetArena()->Adapter(kArenaAllocPrimitiveTypePropagation)) {
-    worklist_.reserve(kDefaultWorklistSize);
-  }
-
-  void Run();
-
- private:
-  void VisitBasicBlock(HBasicBlock* block);
-  void ProcessWorklist();
-  void AddToWorklist(HPhi* phi);
-  void AddDependentInstructionsToWorklist(HInstruction* instruction);
-  bool UpdateType(HPhi* phi);
-
-  HGraph* const graph_;
-  ArenaVector<HPhi*> worklist_;
-
-  static constexpr size_t kDefaultWorklistSize = 8;
-
-  DISALLOW_COPY_AND_ASSIGN(PrimitiveTypePropagation);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 26a05da..e96ab19 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -16,6 +16,7 @@
 
 #include "reference_type_propagation.h"
 
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache.h"
@@ -23,24 +24,58 @@
 
 namespace art {
 
-class RTPVisitor : public HGraphDelegateVisitor {
+static inline mirror::DexCache* FindDexCacheWithHint(Thread* self,
+                                                     const DexFile& dex_file,
+                                                     Handle<mirror::DexCache> hint_dex_cache)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (LIKELY(hint_dex_cache->GetDexFile() == &dex_file)) {
+    return hint_dex_cache.Get();
+  } else {
+    return Runtime::Current()->GetClassLinker()->FindDexCache(self, dex_file);
+  }
+}
+
+static inline ReferenceTypeInfo::TypeHandle GetRootHandle(StackHandleScopeCollection* handles,
+                                                          ClassLinker::ClassRoot class_root,
+                                                          ReferenceTypeInfo::TypeHandle* cache) {
+  if (!ReferenceTypeInfo::IsValidHandle(*cache)) {
+    // Mutator lock is required for NewHandle.
+    ClassLinker* linker = Runtime::Current()->GetClassLinker();
+    ScopedObjectAccess soa(Thread::Current());
+    *cache = handles->NewHandle(linker->GetClassRoot(class_root));
+  }
+  return *cache;
+}
+
+ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetObjectClassHandle() {
+  return GetRootHandle(handles_, ClassLinker::kJavaLangObject, &object_class_handle_);
+}
+
+ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetClassClassHandle() {
+  return GetRootHandle(handles_, ClassLinker::kJavaLangClass, &class_class_handle_);
+}
+
+ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetStringClassHandle() {
+  return GetRootHandle(handles_, ClassLinker::kJavaLangString, &string_class_handle_);
+}
+
+ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetThrowableClassHandle() {
+  return GetRootHandle(handles_, ClassLinker::kJavaLangThrowable, &throwable_class_handle_);
+}
+
+class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
  public:
   RTPVisitor(HGraph* graph,
-             StackHandleScopeCollection* handles,
+             Handle<mirror::DexCache> hint_dex_cache,
+             HandleCache* handle_cache,
              ArenaVector<HInstruction*>* worklist,
-             ReferenceTypeInfo::TypeHandle object_class_handle,
-             ReferenceTypeInfo::TypeHandle class_class_handle,
-             ReferenceTypeInfo::TypeHandle string_class_handle,
-             ReferenceTypeInfo::TypeHandle throwable_class_handle)
+             bool is_first_run)
     : HGraphDelegateVisitor(graph),
-      handles_(handles),
-      object_class_handle_(object_class_handle),
-      class_class_handle_(class_class_handle),
-      string_class_handle_(string_class_handle),
-      throwable_class_handle_(throwable_class_handle),
-      worklist_(worklist) {}
+      hint_dex_cache_(hint_dex_cache),
+      handle_cache_(handle_cache),
+      worklist_(worklist),
+      is_first_run_(is_first_run) {}
 
-  void VisitNullConstant(HNullConstant* null_constant) OVERRIDE;
   void VisitNewInstance(HNewInstance* new_instance) OVERRIDE;
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE;
   void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE;
@@ -49,7 +84,8 @@
   void VisitNewArray(HNewArray* instr) OVERRIDE;
   void VisitParameterValue(HParameterValue* instr) OVERRIDE;
   void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info);
-  void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact);
+  void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE;
   void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE;
   void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) OVERRIDE;
@@ -57,59 +93,35 @@
   void VisitInvoke(HInvoke* instr) OVERRIDE;
   void VisitArrayGet(HArrayGet* instr) OVERRIDE;
   void VisitCheckCast(HCheckCast* instr) OVERRIDE;
+  void VisitBoundType(HBoundType* instr) OVERRIDE;
   void VisitNullCheck(HNullCheck* instr) OVERRIDE;
-  void VisitFakeString(HFakeString* instr) OVERRIDE;
   void UpdateReferenceTypeInfo(HInstruction* instr,
                                uint16_t type_idx,
                                const DexFile& dex_file,
                                bool is_exact);
 
  private:
-  StackHandleScopeCollection* handles_;
-  ReferenceTypeInfo::TypeHandle object_class_handle_;
-  ReferenceTypeInfo::TypeHandle class_class_handle_;
-  ReferenceTypeInfo::TypeHandle string_class_handle_;
-  ReferenceTypeInfo::TypeHandle throwable_class_handle_;
+  Handle<mirror::DexCache> hint_dex_cache_;
+  HandleCache* handle_cache_;
   ArenaVector<HInstruction*>* worklist_;
-
-  static constexpr size_t kDefaultWorklistSize = 8;
+  const bool is_first_run_;
 };
 
 ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph,
+                                                   Handle<mirror::DexCache> hint_dex_cache,
                                                    StackHandleScopeCollection* handles,
+                                                   bool is_first_run,
                                                    const char* name)
     : HOptimization(graph, name),
-      handles_(handles),
-      worklist_(graph->GetArena()->Adapter(kArenaAllocReferenceTypePropagation)) {
-  worklist_.reserve(kDefaultWorklistSize);
-  // Mutator lock is required for NewHandle, but annotalysis ignores constructors.
-  ScopedObjectAccess soa(Thread::Current());
-  ClassLinker* linker = Runtime::Current()->GetClassLinker();
-  object_class_handle_ = handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject));
-  string_class_handle_ = handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangString));
-  class_class_handle_ = handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangClass));
-  throwable_class_handle_ =
-      handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangThrowable));
-
-  if (kIsDebugBuild) {
-    DCHECK(ReferenceTypeInfo::IsValidHandle(object_class_handle_));
-    DCHECK(ReferenceTypeInfo::IsValidHandle(class_class_handle_));
-    DCHECK(ReferenceTypeInfo::IsValidHandle(string_class_handle_));
-    DCHECK(ReferenceTypeInfo::IsValidHandle(throwable_class_handle_));
-  }
+      hint_dex_cache_(hint_dex_cache),
+      handle_cache_(handles),
+      worklist_(graph->GetArena()->Adapter(kArenaAllocReferenceTypePropagation)),
+      is_first_run_(is_first_run) {
 }
 
-void ReferenceTypePropagation::Run() {
-  // To properly propagate type info we need to visit in the dominator-based order.
-  // Reverse post order guarantees a node's dominators are visited first.
-  // We take advantage of this order in `VisitBasicBlock`.
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
-  }
-  ProcessWorklist();
-
+void ReferenceTypePropagation::ValidateTypes() {
+  // TODO: move this to the graph checker.
   if (kIsDebugBuild) {
-    // TODO: move this to the graph checker.
     ScopedObjectAccess soa(Thread::Current());
     for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
       HBasicBlock* block = it.Current();
@@ -135,14 +147,27 @@
   }
 }
 
+void ReferenceTypePropagation::Visit(HInstruction* instruction) {
+  RTPVisitor visitor(graph_, hint_dex_cache_, &handle_cache_, &worklist_, is_first_run_);
+  instruction->Accept(&visitor);
+}
+
+void ReferenceTypePropagation::Run() {
+  worklist_.reserve(kDefaultWorklistSize);
+
+  // To properly propagate type info we need to visit in the dominator-based order.
+  // Reverse post order guarantees a node's dominators are visited first.
+  // We take advantage of this order in `VisitBasicBlock`.
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    VisitBasicBlock(it.Current());
+  }
+
+  ProcessWorklist();
+  ValidateTypes();
+}
+
 void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) {
-  RTPVisitor visitor(graph_,
-                     handles_,
-                     &worklist_,
-                     object_class_handle_,
-                     class_class_handle_,
-                     string_class_handle_,
-                     throwable_class_handle_);
+  RTPVisitor visitor(graph_, hint_dex_cache_, &handle_cache_, &worklist_, is_first_run_);
   // Handle Phis first as there might be instructions in the same block who depend on them.
   for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
     VisitPhi(it.Current()->AsPhi());
@@ -159,34 +184,6 @@
   BoundTypeForIfInstanceOf(block);
 }
 
-// Create a bound type for the given object narrowing the type as much as possible.
-// The BoundType upper values for the super type and can_be_null will be taken from
-// load_class.GetLoadedClassRTI() and upper_can_be_null.
-static HBoundType* CreateBoundType(ArenaAllocator* arena,
-                                   HInstruction* obj,
-                                   HLoadClass* load_class,
-                                   bool upper_can_be_null)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-  ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo();
-  ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
-  DCHECK(class_rti.IsValid());
-  HBoundType* bound_type = new (arena) HBoundType(obj, class_rti, upper_can_be_null);
-  // Narrow the type as much as possible.
-  if (class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes()) {
-    bound_type->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ true));
-  } else if (obj_rti.IsValid() && class_rti.IsSupertypeOf(obj_rti)) {
-    bound_type->SetReferenceTypeInfo(obj_rti);
-  } else {
-    bound_type->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false));
-  }
-  if (upper_can_be_null) {
-    bound_type->SetCanBeNull(obj->CanBeNull());
-  }
-  return bound_type;
-}
-
 // Check if we should create a bound type for the given object at the specified
 // position. Because of inlining and the fact we run RTP more than once and we
 // might have a HBoundType already. If we do, we should not create a new one.
@@ -208,8 +205,8 @@
   if (existing_bound_type->GetUpperBound().IsSupertypeOf(upper_bound)) {
     if (kIsDebugBuild) {
       // Check that the existing HBoundType dominates all the uses.
-      for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) {
-        HInstruction* user = it.Current()->GetUser();
+      for (const HUseListNode<HInstruction*>& use : obj->GetUses()) {
+        HInstruction* user = use.GetUser();
         if (dominator_instr != nullptr) {
           DCHECK(!dominator_instr->StrictlyDominates(user)
               || user == existing_bound_type
@@ -263,17 +260,21 @@
       ? ifInstruction->IfTrueSuccessor()
       : ifInstruction->IfFalseSuccessor();
 
-  for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) {
-    HInstruction* user = it.Current()->GetUser();
+  const HUseList<HInstruction*>& uses = obj->GetUses();
+  for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+    HInstruction* user = it->GetUser();
+    size_t index = it->GetIndex();
+    // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+    ++it;
     if (notNullBlock->Dominates(user->GetBlock())) {
       if (bound_type == nullptr) {
         ScopedObjectAccess soa(Thread::Current());
         HInstruction* insert_point = notNullBlock->GetFirstInstruction();
         ReferenceTypeInfo object_rti = ReferenceTypeInfo::Create(
-            object_class_handle_, /* is_exact */ true);
+            handle_cache_.GetObjectClassHandle(), /* is_exact */ true);
         if (ShouldCreateBoundType(insert_point, obj, object_rti, nullptr, notNullBlock)) {
-          bound_type = new (graph_->GetArena()) HBoundType(
-              obj, object_rti, /* bound_can_be_null */ false);
+          bound_type = new (graph_->GetArena()) HBoundType(obj);
+          bound_type->SetUpperBound(object_rti, /* bound_can_be_null */ false);
           if (obj->GetReferenceTypeInfo().IsValid()) {
             bound_type->SetReferenceTypeInfo(obj->GetReferenceTypeInfo());
           }
@@ -285,11 +286,80 @@
           break;
         }
       }
-      user->ReplaceInput(bound_type, it.Current()->GetIndex());
+      user->ReplaceInput(bound_type, index);
     }
   }
 }
 
+// Returns true if one of the patterns below has been recognized. If so, the
+// InstanceOf instruction together with the true branch of `ifInstruction` will
+// be returned using the out parameters.
+// Recognized patterns:
+//   (1) patterns equivalent to `if (obj instanceof X)`
+//     (a) InstanceOf -> Equal to 1 -> If
+//     (b) InstanceOf -> NotEqual to 0 -> If
+//     (c) InstanceOf -> If
+//   (2) patterns equivalent to `if (!(obj instanceof X))`
+//     (a) InstanceOf -> Equal to 0 -> If
+//     (b) InstanceOf -> NotEqual to 1 -> If
+//     (c) InstanceOf -> BooleanNot -> If
+static bool MatchIfInstanceOf(HIf* ifInstruction,
+                              /* out */ HInstanceOf** instanceOf,
+                              /* out */ HBasicBlock** trueBranch) {
+  HInstruction* input = ifInstruction->InputAt(0);
+
+  if (input->IsEqual()) {
+    HInstruction* rhs = input->AsEqual()->GetConstantRight();
+    if (rhs != nullptr) {
+      HInstruction* lhs = input->AsEqual()->GetLeastConstantLeft();
+      if (lhs->IsInstanceOf() && rhs->IsIntConstant()) {
+        if (rhs->AsIntConstant()->IsTrue()) {
+          // Case (1a)
+          *trueBranch = ifInstruction->IfTrueSuccessor();
+        } else {
+          // Case (2a)
+          DCHECK(rhs->AsIntConstant()->IsFalse()) << rhs->AsIntConstant()->GetValue();
+          *trueBranch = ifInstruction->IfFalseSuccessor();
+        }
+        *instanceOf = lhs->AsInstanceOf();
+        return true;
+      }
+    }
+  } else if (input->IsNotEqual()) {
+    HInstruction* rhs = input->AsNotEqual()->GetConstantRight();
+    if (rhs != nullptr) {
+      HInstruction* lhs = input->AsNotEqual()->GetLeastConstantLeft();
+      if (lhs->IsInstanceOf() && rhs->IsIntConstant()) {
+        if (rhs->AsIntConstant()->IsFalse()) {
+          // Case (1b)
+          *trueBranch = ifInstruction->IfTrueSuccessor();
+        } else {
+          // Case (2b)
+          DCHECK(rhs->AsIntConstant()->IsTrue()) << rhs->AsIntConstant()->GetValue();
+          *trueBranch = ifInstruction->IfFalseSuccessor();
+        }
+        *instanceOf = lhs->AsInstanceOf();
+        return true;
+      }
+    }
+  } else if (input->IsInstanceOf()) {
+    // Case (1c)
+    *instanceOf = input->AsInstanceOf();
+    *trueBranch = ifInstruction->IfTrueSuccessor();
+    return true;
+  } else if (input->IsBooleanNot()) {
+    HInstruction* not_input = input->InputAt(0);
+    if (not_input->IsInstanceOf()) {
+      // Case (2c)
+      *instanceOf = not_input->AsInstanceOf();
+      *trueBranch = ifInstruction->IfFalseSuccessor();
+      return true;
+    }
+  }
+
+  return false;
+}
+
 // Detects if `block` is the True block for the pattern
 // `if (x instanceof ClassX) { }`
 // If that's the case insert an HBoundType instruction to bound the type of `x`
@@ -299,29 +369,17 @@
   if (ifInstruction == nullptr) {
     return;
   }
-  HInstruction* ifInput = ifInstruction->InputAt(0);
-  HInstruction* instanceOf = nullptr;
-  HBasicBlock* instanceOfTrueBlock = nullptr;
 
-  // The instruction simplifier has transformed:
-  //   - `if (a instanceof A)` into an HIf with an HInstanceOf input
-  //   - `if (!(a instanceof A)` into an HIf with an HBooleanNot input (which in turn
-  //     has an HInstanceOf input)
-  // So we should not see the usual HEqual here.
-  if (ifInput->IsInstanceOf()) {
-    instanceOf = ifInput;
-    instanceOfTrueBlock = ifInstruction->IfTrueSuccessor();
-  } else if (ifInput->IsBooleanNot() && ifInput->InputAt(0)->IsInstanceOf()) {
-    instanceOf = ifInput->InputAt(0);
-    instanceOfTrueBlock = ifInstruction->IfFalseSuccessor();
-  } else {
+  // Try to recognize common `if (instanceof)` and `if (!instanceof)` patterns.
+  HInstanceOf* instanceOf = nullptr;
+  HBasicBlock* instanceOfTrueBlock = nullptr;
+  if (!MatchIfInstanceOf(ifInstruction, &instanceOf, &instanceOfTrueBlock)) {
     return;
   }
 
   HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass();
   ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
   {
-    ScopedObjectAccess soa(Thread::Current());
     if (!class_rti.IsValid()) {
       // He have loaded an unresolved class. Don't bother bounding the type.
       return;
@@ -342,18 +400,19 @@
     return;
   }
   DCHECK(!obj->IsLoadClass()) << "We should not replace HLoadClass instructions";
-  for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) {
-    HInstruction* user = it.Current()->GetUser();
+  const HUseList<HInstruction*>& uses = obj->GetUses();
+  for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+    HInstruction* user = it->GetUser();
+    size_t index = it->GetIndex();
+    // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+    ++it;
     if (instanceOfTrueBlock->Dominates(user->GetBlock())) {
       if (bound_type == nullptr) {
         ScopedObjectAccess soa(Thread::Current());
         HInstruction* insert_point = instanceOfTrueBlock->GetFirstInstruction();
         if (ShouldCreateBoundType(insert_point, obj, class_rti, nullptr, instanceOfTrueBlock)) {
-          bound_type = CreateBoundType(
-              graph_->GetArena(),
-              obj,
-              load_class,
-              false /* InstanceOf ensures the object is not null. */);
+          bound_type = new (graph_->GetArena()) HBoundType(obj);
+          bound_type->SetUpperBound(class_rti, /* InstanceOf fails for null. */ false);
           instanceOfTrueBlock->InsertInstructionBefore(bound_type, insert_point);
         } else {
           // We already have a bound type on the position we would need to insert
@@ -362,23 +421,29 @@
           break;
         }
       }
-      user->ReplaceInput(bound_type, it.Current()->GetIndex());
+      user->ReplaceInput(bound_type, index);
     }
   }
 }
 
-void RTPVisitor::SetClassAsTypeInfo(HInstruction* instr,
-                                    mirror::Class* klass,
-                                    bool is_exact) {
+void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* instr,
+                                                              mirror::Class* klass,
+                                                              bool is_exact) {
   if (instr->IsInvokeStaticOrDirect() && instr->AsInvokeStaticOrDirect()->IsStringInit()) {
     // Calls to String.<init> are replaced with a StringFactory.
     if (kIsDebugBuild) {
-      ScopedObjectAccess soa(Thread::Current());
+      HInvoke* invoke = instr->AsInvoke();
       ClassLinker* cl = Runtime::Current()->GetClassLinker();
-      mirror::DexCache* dex_cache = cl->FindDexCache(
-          soa.Self(), instr->AsInvoke()->GetDexFile(), false);
-      ArtMethod* method = dex_cache->GetResolvedMethod(
-          instr->AsInvoke()->GetDexMethodIndex(), cl->GetImagePointerSize());
+      Thread* self = Thread::Current();
+      StackHandleScope<2> hs(self);
+      Handle<mirror::DexCache> dex_cache(
+          hs.NewHandle(FindDexCacheWithHint(self, invoke->GetDexFile(), hint_dex_cache_)));
+      // Use a null loader. We should probably use the compiling method's class loader,
+      // but then we would need to pass it to RTPVisitor just for this debug check. Since
+      // the method is from the String class, the null loader is good enough.
+      Handle<mirror::ClassLoader> loader;
+      ArtMethod* method = cl->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+          invoke->GetDexFile(), invoke->GetDexMethodIndex(), dex_cache, loader, nullptr, kDirect);
       DCHECK(method != nullptr);
       mirror::Class* declaring_class = method->GetDeclaringClass();
       DCHECK(declaring_class != nullptr);
@@ -388,131 +453,136 @@
           << "Expected String.<init>: " << PrettyMethod(method);
     }
     instr->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(string_class_handle_, /* is_exact */ true));
-  } else if (klass != nullptr) {
-    ScopedObjectAccess soa(Thread::Current());
-    ReferenceTypeInfo::TypeHandle handle = handles_->NewHandle(klass);
-    is_exact = is_exact || klass->CannotBeAssignedFromOtherTypes();
+        ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact */ true));
+  } else if (IsAdmissible(klass)) {
+    ReferenceTypeInfo::TypeHandle handle = handle_cache_->NewHandle(klass);
+    is_exact = is_exact || handle->CannotBeAssignedFromOtherTypes();
     instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact));
   } else {
-    instr->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
+    instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
   }
 }
 
-void RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr,
-                                         uint16_t type_idx,
-                                         const DexFile& dex_file,
-                                         bool is_exact) {
+void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr,
+                                                                   uint16_t type_idx,
+                                                                   const DexFile& dex_file,
+                                                                   bool is_exact) {
   DCHECK_EQ(instr->GetType(), Primitive::kPrimNot);
 
   ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
-      soa.Self(), dex_file, false);
+  mirror::DexCache* dex_cache = FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_);
   // Get type from dex cache assuming it was populated by the verifier.
   SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact);
 }
 
-void RTPVisitor::VisitNullConstant(HNullConstant* instr) {
-  // TODO: The null constant could be bound contextually (e.g. based on return statements)
-  // to a more precise type.
-  instr->SetReferenceTypeInfo(
-      ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
-}
-
-void RTPVisitor::VisitNewInstance(HNewInstance* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitNewInstance(HNewInstance* instr) {
   UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true);
 }
 
-void RTPVisitor::VisitNewArray(HNewArray* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitNewArray(HNewArray* instr) {
   UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true);
 }
 
-static mirror::Class* GetClassFromDexCache(Thread* self, const DexFile& dex_file, uint16_t type_idx)
+static mirror::Class* GetClassFromDexCache(Thread* self,
+                                           const DexFile& dex_file,
+                                           uint16_t type_idx,
+                                           Handle<mirror::DexCache> hint_dex_cache)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  mirror::DexCache* dex_cache =
-      Runtime::Current()->GetClassLinker()->FindDexCache(self, dex_file, false);
+  mirror::DexCache* dex_cache = FindDexCacheWithHint(self, dex_file, hint_dex_cache);
   // Get type from dex cache assuming it was populated by the verifier.
   return dex_cache->GetResolvedType(type_idx);
 }
 
-void RTPVisitor::VisitParameterValue(HParameterValue* instr) {
-  ScopedObjectAccess soa(Thread::Current());
+void ReferenceTypePropagation::RTPVisitor::VisitParameterValue(HParameterValue* instr) {
   // We check if the existing type is valid: the inliner may have set it.
   if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
-    mirror::Class* resolved_class =
-        GetClassFromDexCache(soa.Self(), instr->GetDexFile(), instr->GetTypeIndex());
+    ScopedObjectAccess soa(Thread::Current());
+    mirror::Class* resolved_class = GetClassFromDexCache(soa.Self(),
+                                                         instr->GetDexFile(),
+                                                         instr->GetTypeIndex(),
+                                                         hint_dex_cache_);
     SetClassAsTypeInfo(instr, resolved_class, /* is_exact */ false);
   }
 }
 
-void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr,
-                                           const FieldInfo& info) {
-  // The field index is unknown only during tests.
-  if (instr->GetType() != Primitive::kPrimNot || info.GetFieldIndex() == kUnknownFieldIndex) {
+void ReferenceTypePropagation::RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr,
+                                                                     const FieldInfo& info) {
+  if (instr->GetType() != Primitive::kPrimNot) {
     return;
   }
 
   ScopedObjectAccess soa(Thread::Current());
-  ClassLinker* cl = Runtime::Current()->GetClassLinker();
-  ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), info.GetDexCache().Get());
-  // TODO: There are certain cases where we can't resolve the field.
-  // b/21914925 is open to keep track of a repro case for this issue.
-  mirror::Class* klass = (field == nullptr) ? nullptr : field->GetType<false>();
+  mirror::Class* klass = nullptr;
+
+  // The field index is unknown only during tests.
+  if (info.GetFieldIndex() != kUnknownFieldIndex) {
+    ClassLinker* cl = Runtime::Current()->GetClassLinker();
+    ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), info.GetDexCache().Get());
+    // TODO: There are certain cases where we can't resolve the field.
+    // b/21914925 is open to keep track of a repro case for this issue.
+    if (field != nullptr) {
+      klass = field->GetType<false>();
+    }
+  }
+
   SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
 }
 
-void RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) {
   UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo());
 }
 
-void RTPVisitor::VisitStaticFieldGet(HStaticFieldGet* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitStaticFieldGet(HStaticFieldGet* instr) {
   UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo());
 }
 
-void RTPVisitor::VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instr) {
   // TODO: Use descriptor to get the actual type.
   if (instr->GetFieldType() == Primitive::kPrimNot) {
-    instr->SetReferenceTypeInfo(
-      ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
+    instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
   }
 }
 
-void RTPVisitor::VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instr) {
   // TODO: Use descriptor to get the actual type.
   if (instr->GetFieldType() == Primitive::kPrimNot) {
-    instr->SetReferenceTypeInfo(
-      ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
+    instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
   }
 }
 
-void RTPVisitor::VisitLoadClass(HLoadClass* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitLoadClass(HLoadClass* instr) {
   ScopedObjectAccess soa(Thread::Current());
   // Get type from dex cache assuming it was populated by the verifier.
-  mirror::Class* resolved_class =
-      GetClassFromDexCache(soa.Self(), instr->GetDexFile(), instr->GetTypeIndex());
-  if (resolved_class != nullptr) {
+  mirror::Class* resolved_class = GetClassFromDexCache(soa.Self(),
+                                                       instr->GetDexFile(),
+                                                       instr->GetTypeIndex(),
+                                                       hint_dex_cache_);
+  if (IsAdmissible(resolved_class)) {
     instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(
-        handles_->NewHandle(resolved_class), /* is_exact */ true));
+        handle_cache_->NewHandle(resolved_class), /* is_exact */ true));
   }
-  instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(class_class_handle_, /* is_exact */ true));
+  instr->SetReferenceTypeInfo(
+      ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact */ true));
 }
 
-void RTPVisitor::VisitClinitCheck(HClinitCheck* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitClinitCheck(HClinitCheck* instr) {
   instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo());
 }
 
-void RTPVisitor::VisitLoadString(HLoadString* instr) {
-  instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(string_class_handle_, /* is_exact */ true));
+void ReferenceTypePropagation::RTPVisitor::VisitLoadString(HLoadString* instr) {
+  instr->SetReferenceTypeInfo(
+      ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact */ true));
 }
 
-void RTPVisitor::VisitLoadException(HLoadException* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitLoadException(HLoadException* instr) {
   DCHECK(instr->GetBlock()->IsCatchBlock());
   TryCatchInformation* catch_info = instr->GetBlock()->GetTryCatchInformation();
 
   if (catch_info->IsCatchAllTypeIndex()) {
-    instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(throwable_class_handle_,
-                                                          /* is_exact */ false));
+    instr->SetReferenceTypeInfo(
+        ReferenceTypeInfo::Create(handle_cache_->GetThrowableClassHandle(), /* is_exact */ false));
   } else {
     UpdateReferenceTypeInfo(instr,
                             catch_info->GetCatchTypeIndex(),
@@ -521,68 +591,84 @@
   }
 }
 
-void RTPVisitor::VisitNullCheck(HNullCheck* instr) {
-  ScopedObjectAccess soa(Thread::Current());
+void ReferenceTypePropagation::RTPVisitor::VisitNullCheck(HNullCheck* instr) {
   ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
-  DCHECK(parent_rti.IsValid());
-  instr->SetReferenceTypeInfo(parent_rti);
+  if (parent_rti.IsValid()) {
+    instr->SetReferenceTypeInfo(parent_rti);
+  }
 }
 
-void RTPVisitor::VisitFakeString(HFakeString* instr) {
-  instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(string_class_handle_, /* is_exact */ true));
+void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) {
+  ReferenceTypeInfo class_rti = instr->GetUpperBound();
+  if (class_rti.IsValid()) {
+    ScopedObjectAccess soa(Thread::Current());
+    // Narrow the type as much as possible.
+    HInstruction* obj = instr->InputAt(0);
+    ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo();
+    if (class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes()) {
+      instr->SetReferenceTypeInfo(
+          ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ true));
+    } else if (obj_rti.IsValid()) {
+      if (class_rti.IsSupertypeOf(obj_rti)) {
+        // Object type is more specific.
+        instr->SetReferenceTypeInfo(obj_rti);
+      } else {
+        // Upper bound is more specific.
+        instr->SetReferenceTypeInfo(
+            ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false));
+      }
+    } else {
+      // Object not typed yet. Leave BoundType untyped for now rather than
+      // assign the type conservatively.
+    }
+    instr->SetCanBeNull(obj->CanBeNull() && instr->GetUpperCanBeNull());
+  } else {
+    // The owner of the BoundType was already visited. If the class is unresolved,
+    // the BoundType should have been removed from the data flow and this method
+    // should remove it from the graph.
+    DCHECK(!instr->HasUses());
+    instr->GetBlock()->RemoveInstruction(instr);
+  }
 }
 
-void RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
+void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
   HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
   ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
-  {
-    ScopedObjectAccess soa(Thread::Current());
-    if (!class_rti.IsValid()) {
-      // He have loaded an unresolved class. Don't bother bounding the type.
-      return;
-    }
+  HBoundType* bound_type = check_cast->GetNext()->AsBoundType();
+  if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) {
+    // The next instruction is not an uninitialized BoundType. This must be
+    // an RTP pass after SsaBuilder and we do not need to do anything.
+    return;
   }
-  HInstruction* obj = check_cast->InputAt(0);
-  HBoundType* bound_type = nullptr;
-  for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) {
-    HInstruction* user = it.Current()->GetUser();
-    if (check_cast->StrictlyDominates(user)) {
-      if (bound_type == nullptr) {
-        ScopedObjectAccess soa(Thread::Current());
-        if (ShouldCreateBoundType(check_cast->GetNext(), obj, class_rti, check_cast, nullptr)) {
-          bound_type = CreateBoundType(
-              GetGraph()->GetArena(),
-              obj,
-              load_class,
-              true /* CheckCast succeeds for nulls. */);
-          check_cast->GetBlock()->InsertInstructionAfter(bound_type, check_cast);
-        } else {
-          // Update nullability of the existing bound type, which may not have known
-          // that its input was not null when it was being created.
-          bound_type = check_cast->GetNext()->AsBoundType();
-          bound_type->SetCanBeNull(obj->CanBeNull());
-          // We already have a bound type on the position we would need to insert
-          // the new one. The existing bound type should dominate all the users
-          // (dchecked) so there's no need to continue.
-          break;
-        }
-      }
-      user->ReplaceInput(bound_type, it.Current()->GetIndex());
-    }
+  DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0));
+
+  if (class_rti.IsValid()) {
+    DCHECK(is_first_run_);
+    // This is the first run of RTP and class is resolved.
+    bound_type->SetUpperBound(class_rti, /* CheckCast succeeds for nulls. */ true);
+  } else {
+    // This is the first run of RTP and class is unresolved. Remove the binding.
+    // The instruction itself is removed in VisitBoundType so as to not
+    // invalidate HInstructionIterator.
+    bound_type->ReplaceWith(bound_type->InputAt(0));
   }
 }
 
 void ReferenceTypePropagation::VisitPhi(HPhi* phi) {
-  if (phi->GetType() != Primitive::kPrimNot) {
+  if (phi->IsDead() || phi->GetType() != Primitive::kPrimNot) {
     return;
   }
 
   if (phi->GetBlock()->IsLoopHeader()) {
     // Set the initial type for the phi. Use the non back edge input for reaching
     // a fixed point faster.
+    HInstruction* first_input = phi->InputAt(0);
+    ReferenceTypeInfo first_input_rti = first_input->GetReferenceTypeInfo();
+    if (first_input_rti.IsValid() && !first_input->IsNullConstant()) {
+      phi->SetCanBeNull(first_input->CanBeNull());
+      phi->SetReferenceTypeInfo(first_input_rti);
+    }
     AddToWorklist(phi);
-    phi->SetCanBeNull(phi->InputAt(0)->CanBeNull());
-    phi->SetReferenceTypeInfo(phi->InputAt(0)->GetReferenceTypeInfo());
   } else {
     // Eagerly compute the type of the phi, for quicker convergence. Note
     // that we don't need to add users to the worklist because we are
@@ -604,46 +690,57 @@
   }
 
   bool is_exact = a.IsExact() && b.IsExact();
-  Handle<mirror::Class> type_handle;
+  ReferenceTypeInfo::TypeHandle result_type_handle;
+  ReferenceTypeInfo::TypeHandle a_type_handle = a.GetTypeHandle();
+  ReferenceTypeInfo::TypeHandle b_type_handle = b.GetTypeHandle();
+  bool a_is_interface = a_type_handle->IsInterface();
+  bool b_is_interface = b_type_handle->IsInterface();
 
   if (a.GetTypeHandle().Get() == b.GetTypeHandle().Get()) {
-    type_handle = a.GetTypeHandle();
+    result_type_handle = a_type_handle;
   } else if (a.IsSupertypeOf(b)) {
-    type_handle = a.GetTypeHandle();
+    result_type_handle = a_type_handle;
     is_exact = false;
   } else if (b.IsSupertypeOf(a)) {
-    type_handle = b.GetTypeHandle();
+    result_type_handle = b_type_handle;
+    is_exact = false;
+  } else if (!a_is_interface && !b_is_interface) {
+    result_type_handle =
+        handle_cache_.NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle));
     is_exact = false;
   } else {
-    // TODO: Find the first common super class.
-    type_handle = object_class_handle_;
+    // This can happen if:
+    //    - both types are interfaces. TODO(calin): implement
+    //    - one is an interface, the other a class, and the type does not implement the interface
+    //      e.g:
+    //        void foo(Interface i, boolean cond) {
+    //          Object o = cond ? i : new Object();
+    //        }
+    result_type_handle = handle_cache_.GetObjectClassHandle();
     is_exact = false;
   }
 
-  return ReferenceTypeInfo::Create(type_handle, is_exact);
+  return ReferenceTypeInfo::Create(result_type_handle, is_exact);
 }
 
-static void UpdateArrayGet(HArrayGet* instr,
-                           StackHandleScopeCollection* handles,
-                           ReferenceTypeInfo::TypeHandle object_class_handle)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+void ReferenceTypePropagation::UpdateArrayGet(HArrayGet* instr, HandleCache* handle_cache) {
   DCHECK_EQ(Primitive::kPrimNot, instr->GetType());
 
   ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
-  DCHECK(parent_rti.IsValid());
-
-  Handle<mirror::Class> handle = parent_rti.GetTypeHandle();
-  if (handle->IsObjectArrayClass()) {
-    ReferenceTypeInfo::TypeHandle component_handle = handles->NewHandle(handle->GetComponentType());
-    instr->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(component_handle, /* is_exact */ false));
-  } else {
-    // We don't know what the parent actually is, so we fallback to object.
-    instr->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(object_class_handle, /* is_exact */ false));
+  if (!parent_rti.IsValid()) {
+    return;
   }
 
-  return;
+  Handle<mirror::Class> handle = parent_rti.GetTypeHandle();
+  if (handle->IsObjectArrayClass() && IsAdmissible(handle->GetComponentType())) {
+    ReferenceTypeInfo::TypeHandle component_handle =
+        handle_cache->NewHandle(handle->GetComponentType());
+    bool is_exact = component_handle->CannotBeAssignedFromOtherTypes();
+    instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(component_handle, is_exact));
+  } else {
+    // We don't know what the parent actually is, so we fallback to object.
+    instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
+  }
 }
 
 bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) {
@@ -660,9 +757,9 @@
       instr->SetReferenceTypeInfo(parent_rti);
     }
   } else if (instr->IsArrayGet()) {
-    // TODO: consider if it's worth "looking back" and bounding the input object
+    // TODO: consider if it's worth "looking back" and binding the input object
     // to an array type.
-    UpdateArrayGet(instr->AsArrayGet(), handles_, object_class_handle_);
+    UpdateArrayGet(instr->AsArrayGet(), &handle_cache_);
   } else {
     LOG(FATAL) << "Invalid instruction (should not get here)";
   }
@@ -670,26 +767,28 @@
   return !previous_rti.IsEqual(instr->GetReferenceTypeInfo());
 }
 
-void RTPVisitor::VisitInvoke(HInvoke* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitInvoke(HInvoke* instr) {
   if (instr->GetType() != Primitive::kPrimNot) {
     return;
   }
 
   ScopedObjectAccess soa(Thread::Current());
   ClassLinker* cl = Runtime::Current()->GetClassLinker();
-  mirror::DexCache* dex_cache = cl->FindDexCache(soa.Self(), instr->GetDexFile());
-  size_t pointer_size = cl->GetImagePointerSize();
+  mirror::DexCache* dex_cache =
+      FindDexCacheWithHint(soa.Self(), instr->GetDexFile(), hint_dex_cache_);
+  PointerSize pointer_size = cl->GetImagePointerSize();
   ArtMethod* method = dex_cache->GetResolvedMethod(instr->GetDexMethodIndex(), pointer_size);
   mirror::Class* klass = (method == nullptr) ? nullptr : method->GetReturnType(false, pointer_size);
   SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
 }
 
-void RTPVisitor::VisitArrayGet(HArrayGet* instr) {
+void ReferenceTypePropagation::RTPVisitor::VisitArrayGet(HArrayGet* instr) {
   if (instr->GetType() != Primitive::kPrimNot) {
     return;
   }
+
   ScopedObjectAccess soa(Thread::Current());
-  UpdateArrayGet(instr, handles_, object_class_handle_);
+  UpdateArrayGet(instr, handle_cache_);
   if (!instr->GetReferenceTypeInfo().IsValid()) {
     worklist_->push_back(instr);
   }
@@ -704,20 +803,46 @@
   // Make sure that we don't go over the bounded type.
   ReferenceTypeInfo upper_bound_rti = instr->GetUpperBound();
   if (!upper_bound_rti.IsSupertypeOf(new_rti)) {
-    new_rti = upper_bound_rti;
+    // Note that the input might be exact, in which case we know the branch leading
+    // to the bound type is dead. We play it safe by not marking the bound type as
+    // exact.
+    bool is_exact = upper_bound_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes();
+    new_rti = ReferenceTypeInfo::Create(upper_bound_rti.GetTypeHandle(), is_exact);
   }
   instr->SetReferenceTypeInfo(new_rti);
 }
 
+// NullConstant inputs are ignored during merging as they do not provide any useful information.
+// If all the inputs are NullConstants then the type of the phi will be set to Object.
 void ReferenceTypePropagation::UpdatePhi(HPhi* instr) {
-  ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo();
+  DCHECK(instr->IsLive());
+
+  HInputsRef inputs = instr->GetInputs();
+  size_t first_input_index_not_null = 0;
+  while (first_input_index_not_null < inputs.size() &&
+         inputs[first_input_index_not_null]->IsNullConstant()) {
+    first_input_index_not_null++;
+  }
+  if (first_input_index_not_null == inputs.size()) {
+    // All inputs are NullConstants, set the type to object.
+    // This may happen in the presence of inlining.
+    instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
+    return;
+  }
+
+  ReferenceTypeInfo new_rti = instr->InputAt(first_input_index_not_null)->GetReferenceTypeInfo();
+
   if (new_rti.IsValid() && new_rti.IsObjectClass() && !new_rti.IsExact()) {
     // Early return if we are Object and inexact.
     instr->SetReferenceTypeInfo(new_rti);
     return;
   }
-  for (size_t i = 1; i < instr->InputCount(); i++) {
-    new_rti = MergeTypes(new_rti, instr->InputAt(i)->GetReferenceTypeInfo());
+
+  for (size_t i = first_input_index_not_null + 1; i < inputs.size(); i++) {
+    if (inputs[i]->IsNullConstant()) {
+      continue;
+    }
+    new_rti = MergeTypes(new_rti, inputs[i]->GetReferenceTypeInfo());
     if (new_rti.IsValid() && new_rti.IsObjectClass()) {
       if (!new_rti.IsExact()) {
         break;
@@ -726,13 +851,16 @@
       }
     }
   }
-  instr->SetReferenceTypeInfo(new_rti);
+
+  if (new_rti.IsValid()) {
+    instr->SetReferenceTypeInfo(new_rti);
+  }
 }
 
 // Re-computes and updates the nullability of the instruction. Returns whether or
 // not the nullability was changed.
 bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) {
-  DCHECK(instr->IsPhi()
+  DCHECK((instr->IsPhi() && instr->AsPhi()->IsLive())
       || instr->IsBoundType()
       || instr->IsNullCheck()
       || instr->IsArrayGet());
@@ -745,8 +873,8 @@
   if (instr->IsPhi()) {
     HPhi* phi = instr->AsPhi();
     bool new_can_be_null = false;
-    for (size_t i = 0; i < phi->InputCount(); i++) {
-      if (phi->InputAt(i)->CanBeNull()) {
+    for (HInstruction* input : phi->GetInputs()) {
+      if (input->CanBeNull()) {
         new_can_be_null = true;
         break;
       }
@@ -778,9 +906,9 @@
 }
 
 void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) {
-  for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
-    HInstruction* user = it.Current()->GetUser();
-    if (user->IsPhi()
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    HInstruction* user = use.GetUser();
+    if ((user->IsPhi() && user->AsPhi()->IsLive())
        || user->IsBoundType()
        || user->IsNullCheck()
        || (user->IsArrayGet() && (user->GetType() == Primitive::kPrimNot))) {
@@ -788,4 +916,5 @@
     }
   }
 }
+
 }  // namespace art
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 5493601..edd83bf 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -32,14 +32,52 @@
 class ReferenceTypePropagation : public HOptimization {
  public:
   ReferenceTypePropagation(HGraph* graph,
+                           Handle<mirror::DexCache> hint_dex_cache,
                            StackHandleScopeCollection* handles,
+                           bool is_first_run,
                            const char* name = kReferenceTypePropagationPassName);
 
+  // Visit a single instruction.
+  void Visit(HInstruction* instruction);
+
   void Run() OVERRIDE;
 
+  // Returns true if klass is admissible to the propagation: non-null and resolved.
+  // For an array type, we also check if the component type is admissible.
+  static bool IsAdmissible(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return klass != nullptr &&
+           klass->IsResolved() &&
+           (!klass->IsArrayClass() || IsAdmissible(klass->GetComponentType()));
+  }
+
   static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation";
 
  private:
+  class HandleCache {
+   public:
+    explicit HandleCache(StackHandleScopeCollection* handles) : handles_(handles) { }
+
+    template <typename T>
+    MutableHandle<T> NewHandle(T* object) SHARED_REQUIRES(Locks::mutator_lock_) {
+      return handles_->NewHandle(object);
+    }
+
+    ReferenceTypeInfo::TypeHandle GetObjectClassHandle();
+    ReferenceTypeInfo::TypeHandle GetClassClassHandle();
+    ReferenceTypeInfo::TypeHandle GetStringClassHandle();
+    ReferenceTypeInfo::TypeHandle GetThrowableClassHandle();
+
+   private:
+    StackHandleScopeCollection* handles_;
+
+    ReferenceTypeInfo::TypeHandle object_class_handle_;
+    ReferenceTypeInfo::TypeHandle class_class_handle_;
+    ReferenceTypeInfo::TypeHandle string_class_handle_;
+    ReferenceTypeInfo::TypeHandle throwable_class_handle_;
+  };
+
+  class RTPVisitor;
+
   void VisitPhi(HPhi* phi);
   void VisitBasicBlock(HBasicBlock* block);
   void UpdateBoundType(HBoundType* bound_type) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -53,20 +91,29 @@
   bool UpdateNullability(HInstruction* instr);
   bool UpdateReferenceTypeInfo(HInstruction* instr);
 
+  static void UpdateArrayGet(HArrayGet* instr, HandleCache* handle_cache)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  StackHandleScopeCollection* handles_;
+  void ValidateTypes();
+
+  // Note: hint_dex_cache_ is usually, but not necessarily, the dex cache associated with
+  // graph_->GetDexFile(). Since we may look up also in other dex files, it's used only
+  // as a hint, to reduce the number of calls to the costly ClassLinker::FindDexCache().
+  Handle<mirror::DexCache> hint_dex_cache_;
+  HandleCache handle_cache_;
 
   ArenaVector<HInstruction*> worklist_;
 
-  ReferenceTypeInfo::TypeHandle object_class_handle_;
-  ReferenceTypeInfo::TypeHandle class_class_handle_;
-  ReferenceTypeInfo::TypeHandle string_class_handle_;
-  ReferenceTypeInfo::TypeHandle throwable_class_handle_;
+  // Whether this reference type propagation is the first run we are doing.
+  const bool is_first_run_;
 
   static constexpr size_t kDefaultWorklistSize = 8;
 
+  friend class ReferenceTypePropagationTest;
+
   DISALLOW_COPY_AND_ASSIGN(ReferenceTypePropagation);
 };
 
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
new file mode 100644
index 0000000..7649b50
--- /dev/null
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/arena_allocator.h"
+#include "builder.h"
+#include "nodes.h"
+#include "object_lock.h"
+#include "optimizing_unit_test.h"
+#include "reference_type_propagation.h"
+
+namespace art {
+
+/**
+ * Fixture class for unit testing the ReferenceTypePropagation phase. Used to verify the
+ * functionality of methods and situations that are hard to set up with checker tests.
+ */
+class ReferenceTypePropagationTest : public CommonCompilerTest {
+ public:
+  ReferenceTypePropagationTest() : pool_(), allocator_(&pool_) {
+    graph_ = CreateGraph(&allocator_);
+  }
+
+  ~ReferenceTypePropagationTest() { }
+
+  void SetupPropagation(StackHandleScopeCollection* handles) {
+    graph_->InitializeInexactObjectRTI(handles);
+    propagation_ = new (&allocator_) ReferenceTypePropagation(graph_,
+                                                              Handle<mirror::DexCache>(),
+                                                              handles,
+                                                              true,
+                                                              "test_prop");
+  }
+
+  // Relay method to merge type in reference type propagation.
+  ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a,
+                               const ReferenceTypeInfo& b) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return propagation_->MergeTypes(a, b);
+  }
+
+  // Helper method to construct an invalid type.
+  ReferenceTypeInfo InvalidType() {
+    return ReferenceTypeInfo::CreateInvalid();
+  }
+
+  // Helper method to construct the Object type.
+  ReferenceTypeInfo ObjectType(bool is_exact = true) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return ReferenceTypeInfo::Create(propagation_->handle_cache_.GetObjectClassHandle(), is_exact);
+  }
+
+  // Helper method to construct the String type.
+  ReferenceTypeInfo StringType(bool is_exact = true) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return ReferenceTypeInfo::Create(propagation_->handle_cache_.GetStringClassHandle(), is_exact);
+  }
+
+  // General building fields.
+  ArenaPool pool_;
+  ArenaAllocator allocator_;
+  HGraph* graph_;
+
+  ReferenceTypePropagation* propagation_;
+};
+
+//
+// The actual ReferenceTypePropgation unit tests.
+//
+
+TEST_F(ReferenceTypePropagationTest, ProperSetup) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScopeCollection handles(soa.Self());
+  SetupPropagation(&handles);
+
+  EXPECT_TRUE(propagation_ != nullptr);
+  EXPECT_TRUE(graph_->GetInexactObjectRti().IsEqual(ObjectType(false)));
+}
+
+TEST_F(ReferenceTypePropagationTest, MergeInvalidTypes) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScopeCollection handles(soa.Self());
+  SetupPropagation(&handles);
+
+  // Two invalid types.
+  ReferenceTypeInfo t1(MergeTypes(InvalidType(), InvalidType()));
+  EXPECT_FALSE(t1.IsValid());
+  EXPECT_FALSE(t1.IsExact());
+  EXPECT_TRUE(t1.IsEqual(InvalidType()));
+
+  // Valid type on right.
+  ReferenceTypeInfo t2(MergeTypes(InvalidType(), ObjectType()));
+  EXPECT_TRUE(t2.IsValid());
+  EXPECT_TRUE(t2.IsExact());
+  EXPECT_TRUE(t2.IsEqual(ObjectType()));
+  ReferenceTypeInfo t3(MergeTypes(InvalidType(), StringType()));
+  EXPECT_TRUE(t3.IsValid());
+  EXPECT_TRUE(t3.IsExact());
+  EXPECT_TRUE(t3.IsEqual(StringType()));
+
+  // Valid type on left.
+  ReferenceTypeInfo t4(MergeTypes(ObjectType(), InvalidType()));
+  EXPECT_TRUE(t4.IsValid());
+  EXPECT_TRUE(t4.IsExact());
+  EXPECT_TRUE(t4.IsEqual(ObjectType()));
+  ReferenceTypeInfo t5(MergeTypes(StringType(), InvalidType()));
+  EXPECT_TRUE(t5.IsValid());
+  EXPECT_TRUE(t5.IsExact());
+  EXPECT_TRUE(t5.IsEqual(StringType()));
+}
+
+TEST_F(ReferenceTypePropagationTest, MergeValidTypes) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScopeCollection handles(soa.Self());
+  SetupPropagation(&handles);
+
+  // Same types.
+  ReferenceTypeInfo t1(MergeTypes(ObjectType(), ObjectType()));
+  EXPECT_TRUE(t1.IsValid());
+  EXPECT_TRUE(t1.IsExact());
+  EXPECT_TRUE(t1.IsEqual(ObjectType()));
+  ReferenceTypeInfo t2(MergeTypes(StringType(), StringType()));
+  EXPECT_TRUE(t2.IsValid());
+  EXPECT_TRUE(t2.IsExact());
+  EXPECT_TRUE(t2.IsEqual(StringType()));
+
+  // Left is super class of right.
+  ReferenceTypeInfo t3(MergeTypes(ObjectType(), StringType()));
+  EXPECT_TRUE(t3.IsValid());
+  EXPECT_FALSE(t3.IsExact());
+  EXPECT_TRUE(t3.IsEqual(ObjectType(false)));
+
+  // Right is super class of left.
+  ReferenceTypeInfo t4(MergeTypes(StringType(), ObjectType()));
+  EXPECT_TRUE(t4.IsValid());
+  EXPECT_FALSE(t4.IsExact());
+  EXPECT_TRUE(t4.IsEqual(ObjectType(false)));
+
+  // Same types, but one or both are inexact.
+  ReferenceTypeInfo t5(MergeTypes(ObjectType(false), ObjectType()));
+  EXPECT_TRUE(t5.IsValid());
+  EXPECT_FALSE(t5.IsExact());
+  EXPECT_TRUE(t5.IsEqual(ObjectType(false)));
+  ReferenceTypeInfo t6(MergeTypes(ObjectType(), ObjectType(false)));
+  EXPECT_TRUE(t6.IsValid());
+  EXPECT_FALSE(t6.IsExact());
+  EXPECT_TRUE(t6.IsEqual(ObjectType(false)));
+  ReferenceTypeInfo t7(MergeTypes(ObjectType(false), ObjectType(false)));
+  EXPECT_TRUE(t7.IsValid());
+  EXPECT_FALSE(t7.IsExact());
+  EXPECT_TRUE(t7.IsEqual(ObjectType(false)));
+}
+
+}  // namespace art
+
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
new file mode 100644
index 0000000..3450286
--- /dev/null
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -0,0 +1,653 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "register_allocation_resolver.h"
+
+#include "code_generator.h"
+#include "ssa_liveness_analysis.h"
+
+namespace art {
+
+RegisterAllocationResolver::RegisterAllocationResolver(ArenaAllocator* allocator,
+                                                       CodeGenerator* codegen,
+                                                       const SsaLivenessAnalysis& liveness)
+      : allocator_(allocator),
+        codegen_(codegen),
+        liveness_(liveness) {}
+
+void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs,
+                                         size_t max_safepoint_live_fp_regs,
+                                         size_t reserved_out_slots,
+                                         size_t int_spill_slots,
+                                         size_t long_spill_slots,
+                                         size_t float_spill_slots,
+                                         size_t double_spill_slots,
+                                         size_t catch_phi_spill_slots,
+                                         const ArenaVector<LiveInterval*>& temp_intervals) {
+  size_t spill_slots = int_spill_slots
+                     + long_spill_slots
+                     + float_spill_slots
+                     + double_spill_slots
+                     + catch_phi_spill_slots;
+
+  // Computes frame size and spill mask.
+  codegen_->InitializeCodeGeneration(spill_slots,
+                                     max_safepoint_live_core_regs,
+                                     max_safepoint_live_fp_regs,
+                                     reserved_out_slots,  // Includes slot(s) for the art method.
+                                     codegen_->GetGraph()->GetLinearOrder());
+
+  // Resolve outputs, including stack locations.
+  // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
+  for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
+    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+    LiveInterval* current = instruction->GetLiveInterval();
+    LocationSummary* locations = instruction->GetLocations();
+    Location location = locations->Out();
+    if (instruction->IsParameterValue()) {
+      // Now that we know the frame size, adjust the parameter's location.
+      if (location.IsStackSlot()) {
+        location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+        current->SetSpillSlot(location.GetStackIndex());
+        locations->UpdateOut(location);
+      } else if (location.IsDoubleStackSlot()) {
+        location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+        current->SetSpillSlot(location.GetStackIndex());
+        locations->UpdateOut(location);
+      } else if (current->HasSpillSlot()) {
+        current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize());
+      }
+    } else if (instruction->IsCurrentMethod()) {
+      // The current method is always at offset 0.
+      DCHECK(!current->HasSpillSlot() || (current->GetSpillSlot() == 0));
+    } else if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+      DCHECK(current->HasSpillSlot());
+      size_t slot = current->GetSpillSlot()
+                    + spill_slots
+                    + reserved_out_slots
+                    - catch_phi_spill_slots;
+      current->SetSpillSlot(slot * kVRegSize);
+    } else if (current->HasSpillSlot()) {
+      // Adjust the stack slot, now that we know the number of them for each type.
+      // The way this implementation lays out the stack is the following:
+      // [parameter slots       ]
+      // [catch phi spill slots ]
+      // [double spill slots    ]
+      // [long spill slots      ]
+      // [float spill slots     ]
+      // [int/ref values        ]
+      // [maximum out values    ] (number of arguments for calls)
+      // [art method            ].
+      size_t slot = current->GetSpillSlot();
+      switch (current->GetType()) {
+        case Primitive::kPrimDouble:
+          slot += long_spill_slots;
+          FALLTHROUGH_INTENDED;
+        case Primitive::kPrimLong:
+          slot += float_spill_slots;
+          FALLTHROUGH_INTENDED;
+        case Primitive::kPrimFloat:
+          slot += int_spill_slots;
+          FALLTHROUGH_INTENDED;
+        case Primitive::kPrimNot:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+        case Primitive::kPrimByte:
+        case Primitive::kPrimBoolean:
+        case Primitive::kPrimShort:
+          slot += reserved_out_slots;
+          break;
+        case Primitive::kPrimVoid:
+          LOG(FATAL) << "Unexpected type for interval " << current->GetType();
+      }
+      current->SetSpillSlot(slot * kVRegSize);
+    }
+
+    Location source = current->ToLocation();
+
+    if (location.IsUnallocated()) {
+      if (location.GetPolicy() == Location::kSameAsFirstInput) {
+        if (locations->InAt(0).IsUnallocated()) {
+          locations->SetInAt(0, source);
+        } else {
+          DCHECK(locations->InAt(0).Equals(source));
+        }
+      }
+      locations->UpdateOut(source);
+    } else {
+      DCHECK(source.Equals(location));
+    }
+  }
+
+  // Connect siblings and resolve inputs.
+  for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
+    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+    ConnectSiblings(instruction->GetLiveInterval(),
+                    max_safepoint_live_core_regs + max_safepoint_live_fp_regs);
+  }
+
+  // Resolve non-linear control flow across branches. Order does not matter.
+  for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    if (block->IsCatchBlock() ||
+        (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
+      // Instructions live at the top of catch blocks or irreducible loop header
+      // were forced to spill.
+      if (kIsDebugBuild) {
+        BitVector* live = liveness_.GetLiveInSet(*block);
+        for (uint32_t idx : live->Indexes()) {
+          LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
+          LiveInterval* sibling = interval->GetSiblingAt(block->GetLifetimeStart());
+          // `GetSiblingAt` returns the sibling that contains a position, but there could be
+          // a lifetime hole in it. `CoversSlow` returns whether the interval is live at that
+          // position.
+          if ((sibling != nullptr) && sibling->CoversSlow(block->GetLifetimeStart())) {
+            DCHECK(!sibling->HasRegister());
+          }
+        }
+      }
+    } else {
+      BitVector* live = liveness_.GetLiveInSet(*block);
+      for (uint32_t idx : live->Indexes()) {
+        LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
+        for (HBasicBlock* predecessor : block->GetPredecessors()) {
+          ConnectSplitSiblings(interval, predecessor, block);
+        }
+      }
+    }
+  }
+
+  // Resolve phi inputs. Order does not matter.
+  for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+    HBasicBlock* current = it.Current();
+    if (current->IsCatchBlock()) {
+      // Catch phi values are set at runtime by the exception delivery mechanism.
+    } else {
+      for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+        HInstruction* phi = inst_it.Current();
+        for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) {
+          HBasicBlock* predecessor = current->GetPredecessors()[i];
+          DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u);
+          HInstruction* input = phi->InputAt(i);
+          Location source = input->GetLiveInterval()->GetLocationAt(
+              predecessor->GetLifetimeEnd() - 1);
+          Location destination = phi->GetLiveInterval()->ToLocation();
+          InsertParallelMoveAtExitOf(predecessor, phi, source, destination);
+        }
+      }
+    }
+  }
+
+  // Resolve temp locations.
+  for (LiveInterval* temp : temp_intervals) {
+    if (temp->IsHighInterval()) {
+      // High intervals can be skipped, they are already handled by the low interval.
+      continue;
+    }
+    HInstruction* at = liveness_.GetTempUser(temp);
+    size_t temp_index = liveness_.GetTempIndex(temp);
+    LocationSummary* locations = at->GetLocations();
+    switch (temp->GetType()) {
+      case Primitive::kPrimInt:
+        locations->SetTempAt(temp_index, Location::RegisterLocation(temp->GetRegister()));
+        break;
+
+      case Primitive::kPrimDouble:
+        if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+          Location location = Location::FpuRegisterPairLocation(
+              temp->GetRegister(), temp->GetHighInterval()->GetRegister());
+          locations->SetTempAt(temp_index, location);
+        } else {
+          locations->SetTempAt(temp_index, Location::FpuRegisterLocation(temp->GetRegister()));
+        }
+        break;
+
+      default:
+        LOG(FATAL) << "Unexpected type for temporary location "
+                   << temp->GetType();
+    }
+  }
+}
+
+void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval,
+                                                 size_t max_safepoint_live_regs) {
+  LiveInterval* current = interval;
+  if (current->HasSpillSlot()
+      && current->HasRegister()
+      // Currently, we spill unconditionnally the current method in the code generators.
+      && !interval->GetDefinedBy()->IsCurrentMethod()) {
+    // We spill eagerly, so move must be at definition.
+    InsertMoveAfter(interval->GetDefinedBy(),
+                    interval->ToLocation(),
+                    interval->NeedsTwoSpillSlots()
+                        ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
+                        : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
+  }
+  UsePosition* use = current->GetFirstUse();
+  UsePosition* env_use = current->GetFirstEnvironmentUse();
+
+  // Walk over all siblings, updating locations of use positions, and
+  // connecting them when they are adjacent.
+  do {
+    Location source = current->ToLocation();
+
+    // Walk over all uses covered by this interval, and update the location
+    // information.
+
+    LiveRange* range = current->GetFirstRange();
+    while (range != nullptr) {
+      while (use != nullptr && use->GetPosition() < range->GetStart()) {
+        DCHECK(use->IsSynthesized());
+        use = use->GetNext();
+      }
+      while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
+        DCHECK(!use->GetIsEnvironment());
+        DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
+        if (!use->IsSynthesized()) {
+          LocationSummary* locations = use->GetUser()->GetLocations();
+          Location expected_location = locations->InAt(use->GetInputIndex());
+          // The expected (actual) location may be invalid in case the input is unused. Currently
+          // this only happens for intrinsics.
+          if (expected_location.IsValid()) {
+            if (expected_location.IsUnallocated()) {
+              locations->SetInAt(use->GetInputIndex(), source);
+            } else if (!expected_location.IsConstant()) {
+              AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
+            }
+          } else {
+            DCHECK(use->GetUser()->IsInvoke());
+            DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
+          }
+        }
+        use = use->GetNext();
+      }
+
+      // Walk over the environment uses, and update their locations.
+      while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) {
+        env_use = env_use->GetNext();
+      }
+
+      while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
+        DCHECK(current->CoversSlow(env_use->GetPosition())
+               || (env_use->GetPosition() == range->GetEnd()));
+        HEnvironment* environment = env_use->GetEnvironment();
+        environment->SetLocationAt(env_use->GetInputIndex(), source);
+        env_use = env_use->GetNext();
+      }
+
+      range = range->GetNext();
+    }
+
+    // If the next interval starts just after this one, and has a register,
+    // insert a move.
+    LiveInterval* next_sibling = current->GetNextSibling();
+    if (next_sibling != nullptr
+        && next_sibling->HasRegister()
+        && current->GetEnd() == next_sibling->GetStart()) {
+      Location destination = next_sibling->ToLocation();
+      InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination);
+    }
+
+    for (SafepointPosition* safepoint_position = current->GetFirstSafepoint();
+         safepoint_position != nullptr;
+         safepoint_position = safepoint_position->GetNext()) {
+      DCHECK(current->CoversSlow(safepoint_position->GetPosition()));
+
+      LocationSummary* locations = safepoint_position->GetLocations();
+      if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
+        DCHECK(interval->GetDefinedBy()->IsActualObject())
+            << interval->GetDefinedBy()->DebugName()
+            << "@" << safepoint_position->GetInstruction()->DebugName();
+        locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
+      }
+
+      switch (source.GetKind()) {
+        case Location::kRegister: {
+          locations->AddLiveRegister(source);
+          if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) {
+            DCHECK_LE(locations->GetNumberOfLiveRegisters(),
+                      max_safepoint_live_regs);
+          }
+          if (current->GetType() == Primitive::kPrimNot) {
+            DCHECK(interval->GetDefinedBy()->IsActualObject())
+                << interval->GetDefinedBy()->DebugName()
+                << "@" << safepoint_position->GetInstruction()->DebugName();
+            locations->SetRegisterBit(source.reg());
+          }
+          break;
+        }
+        case Location::kFpuRegister: {
+          locations->AddLiveRegister(source);
+          break;
+        }
+
+        case Location::kRegisterPair:
+        case Location::kFpuRegisterPair: {
+          locations->AddLiveRegister(source.ToLow());
+          locations->AddLiveRegister(source.ToHigh());
+          break;
+        }
+        case Location::kStackSlot:  // Fall-through
+        case Location::kDoubleStackSlot:  // Fall-through
+        case Location::kConstant: {
+          // Nothing to do.
+          break;
+        }
+        default: {
+          LOG(FATAL) << "Unexpected location for object";
+        }
+      }
+    }
+    current = next_sibling;
+  } while (current != nullptr);
+
+  if (kIsDebugBuild) {
+    // Following uses can only be synthesized uses.
+    while (use != nullptr) {
+      DCHECK(use->IsSynthesized());
+      use = use->GetNext();
+    }
+  }
+}
+
+static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(
+    HInstruction* instruction) {
+  return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() &&
+         (instruction->IsConstant() || instruction->IsCurrentMethod());
+}
+
+void RegisterAllocationResolver::ConnectSplitSiblings(LiveInterval* interval,
+                                                      HBasicBlock* from,
+                                                      HBasicBlock* to) const {
+  if (interval->GetNextSibling() == nullptr) {
+    // Nothing to connect. The whole range was allocated to the same location.
+    return;
+  }
+
+  // Find the intervals that cover `from` and `to`.
+  size_t destination_position = to->GetLifetimeStart();
+  size_t source_position = from->GetLifetimeEnd() - 1;
+  LiveInterval* destination = interval->GetSiblingAt(destination_position);
+  LiveInterval* source = interval->GetSiblingAt(source_position);
+
+  if (destination == source) {
+    // Interval was not split.
+    return;
+  }
+
+  LiveInterval* parent = interval->GetParent();
+  HInstruction* defined_by = parent->GetDefinedBy();
+  if (codegen_->GetGraph()->HasIrreducibleLoops() &&
+      (destination == nullptr || !destination->CoversSlow(destination_position))) {
+    // Our live_in fixed point calculation has found that the instruction is live
+    // in the `to` block because it will eventually enter an irreducible loop. Our
+    // live interval computation however does not compute a fixed point, and
+    // therefore will not have a location for that instruction for `to`.
+    // Because the instruction is a constant or the ArtMethod, we don't need to
+    // do anything: it will be materialized in the irreducible loop.
+    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by))
+        << defined_by->DebugName() << ":" << defined_by->GetId()
+        << " " << from->GetBlockId() << " -> " << to->GetBlockId();
+    return;
+  }
+
+  if (!destination->HasRegister()) {
+    // Values are eagerly spilled. Spill slot already contains appropriate value.
+    return;
+  }
+
+  Location location_source;
+  // `GetSiblingAt` returns the interval whose start and end cover `position`,
+  // but does not check whether the interval is inactive at that position.
+  // The only situation where the interval is inactive at that position is in the
+  // presence of irreducible loops for constants and ArtMethod.
+  if (codegen_->GetGraph()->HasIrreducibleLoops() &&
+      (source == nullptr || !source->CoversSlow(source_position))) {
+    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
+    if (defined_by->IsConstant()) {
+      location_source = defined_by->GetLocations()->Out();
+    } else {
+      DCHECK(defined_by->IsCurrentMethod());
+      location_source = parent->NeedsTwoSpillSlots()
+          ? Location::DoubleStackSlot(parent->GetSpillSlot())
+          : Location::StackSlot(parent->GetSpillSlot());
+    }
+  } else {
+    DCHECK(source != nullptr);
+    DCHECK(source->CoversSlow(source_position));
+    DCHECK(destination->CoversSlow(destination_position));
+    location_source = source->ToLocation();
+  }
+
+  // If `from` has only one successor, we can put the moves at the exit of it. Otherwise
+  // we need to put the moves at the entry of `to`.
+  if (from->GetNormalSuccessors().size() == 1) {
+    InsertParallelMoveAtExitOf(from,
+                               defined_by,
+                               location_source,
+                               destination->ToLocation());
+  } else {
+    DCHECK_EQ(to->GetPredecessors().size(), 1u);
+    InsertParallelMoveAtEntryOf(to,
+                                defined_by,
+                                location_source,
+                                destination->ToLocation());
+  }
+}
+
+static bool IsValidDestination(Location destination) {
+  return destination.IsRegister()
+      || destination.IsRegisterPair()
+      || destination.IsFpuRegister()
+      || destination.IsFpuRegisterPair()
+      || destination.IsStackSlot()
+      || destination.IsDoubleStackSlot();
+}
+
+void RegisterAllocationResolver::AddMove(HParallelMove* move,
+                                         Location source,
+                                         Location destination,
+                                         HInstruction* instruction,
+                                         Primitive::Type type) const {
+  if (type == Primitive::kPrimLong
+      && codegen_->ShouldSplitLongMoves()
+      // The parallel move resolver knows how to deal with long constants.
+      && !source.IsConstant()) {
+    move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction);
+    move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr);
+  } else {
+    move->AddMove(source, destination, type, instruction);
+  }
+}
+
+void RegisterAllocationResolver::AddInputMoveFor(HInstruction* input,
+                                                 HInstruction* user,
+                                                 Location source,
+                                                 Location destination) const {
+  if (source.Equals(destination)) return;
+
+  DCHECK(!user->IsPhi());
+
+  HInstruction* previous = user->GetPrevious();
+  HParallelMove* move = nullptr;
+  if (previous == nullptr
+      || !previous->IsParallelMove()
+      || previous->GetLifetimePosition() < user->GetLifetimePosition()) {
+    move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(user->GetLifetimePosition());
+    user->GetBlock()->InsertInstructionBefore(move, user);
+  } else {
+    move = previous->AsParallelMove();
+  }
+  DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition());
+  AddMove(move, source, destination, nullptr, input->GetType());
+}
+
+static bool IsInstructionStart(size_t position) {
+  return (position & 1) == 0;
+}
+
+static bool IsInstructionEnd(size_t position) {
+  return (position & 1) == 1;
+}
+
+void RegisterAllocationResolver::InsertParallelMoveAt(size_t position,
+                                                      HInstruction* instruction,
+                                                      Location source,
+                                                      Location destination) const {
+  DCHECK(IsValidDestination(destination)) << destination;
+  if (source.Equals(destination)) return;
+
+  HInstruction* at = liveness_.GetInstructionFromPosition(position / 2);
+  HParallelMove* move;
+  if (at == nullptr) {
+    if (IsInstructionStart(position)) {
+      // Block boundary, don't do anything the connection of split siblings will handle it.
+      return;
+    } else {
+      // Move must happen before the first instruction of the block.
+      at = liveness_.GetInstructionFromPosition((position + 1) / 2);
+      // Note that parallel moves may have already been inserted, so we explicitly
+      // ask for the first instruction of the block: `GetInstructionFromPosition` does
+      // not contain the `HParallelMove` instructions.
+      at = at->GetBlock()->GetFirstInstruction();
+
+      if (at->GetLifetimePosition() < position) {
+        // We may insert moves for split siblings and phi spills at the beginning of the block.
+        // Since this is a different lifetime position, we need to go to the next instruction.
+        DCHECK(at->IsParallelMove());
+        at = at->GetNext();
+      }
+
+      if (at->GetLifetimePosition() != position) {
+        DCHECK_GT(at->GetLifetimePosition(), position);
+        move = new (allocator_) HParallelMove(allocator_);
+        move->SetLifetimePosition(position);
+        at->GetBlock()->InsertInstructionBefore(move, at);
+      } else {
+        DCHECK(at->IsParallelMove());
+        move = at->AsParallelMove();
+      }
+    }
+  } else if (IsInstructionEnd(position)) {
+    // Move must happen after the instruction.
+    DCHECK(!at->IsControlFlow());
+    move = at->GetNext()->AsParallelMove();
+    // This is a parallel move for connecting siblings in a same block. We need to
+    // differentiate it with moves for connecting blocks, and input moves.
+    if (move == nullptr || move->GetLifetimePosition() > position) {
+      move = new (allocator_) HParallelMove(allocator_);
+      move->SetLifetimePosition(position);
+      at->GetBlock()->InsertInstructionBefore(move, at->GetNext());
+    }
+  } else {
+    // Move must happen before the instruction.
+    HInstruction* previous = at->GetPrevious();
+    if (previous == nullptr
+        || !previous->IsParallelMove()
+        || previous->GetLifetimePosition() != position) {
+      // If the previous is a parallel move, then its position must be lower
+      // than the given `position`: it was added just after the non-parallel
+      // move instruction that precedes `instruction`.
+      DCHECK(previous == nullptr
+             || !previous->IsParallelMove()
+             || previous->GetLifetimePosition() < position);
+      move = new (allocator_) HParallelMove(allocator_);
+      move->SetLifetimePosition(position);
+      at->GetBlock()->InsertInstructionBefore(move, at);
+    } else {
+      move = previous->AsParallelMove();
+    }
+  }
+  DCHECK_EQ(move->GetLifetimePosition(), position);
+  AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+void RegisterAllocationResolver::InsertParallelMoveAtExitOf(HBasicBlock* block,
+                                                            HInstruction* instruction,
+                                                            Location source,
+                                                            Location destination) const {
+  DCHECK(IsValidDestination(destination)) << destination;
+  if (source.Equals(destination)) return;
+
+  DCHECK_EQ(block->GetNormalSuccessors().size(), 1u);
+  HInstruction* last = block->GetLastInstruction();
+  // We insert moves at exit for phi predecessors and connecting blocks.
+  // A block ending with an if or a packed switch cannot branch to a block
+  // with phis because we do not allow critical edges. It can also not connect
+  // a split interval between two blocks: the move has to happen in the successor.
+  DCHECK(!last->IsIf() && !last->IsPackedSwitch());
+  HInstruction* previous = last->GetPrevious();
+  HParallelMove* move;
+  // This is a parallel move for connecting blocks. We need to differentiate
+  // it with moves for connecting siblings in a same block, and output moves.
+  size_t position = last->GetLifetimePosition();
+  if (previous == nullptr || !previous->IsParallelMove()
+      || previous->AsParallelMove()->GetLifetimePosition() != position) {
+    move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(position);
+    block->InsertInstructionBefore(move, last);
+  } else {
+    move = previous->AsParallelMove();
+  }
+  AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+void RegisterAllocationResolver::InsertParallelMoveAtEntryOf(HBasicBlock* block,
+                                                             HInstruction* instruction,
+                                                             Location source,
+                                                             Location destination) const {
+  DCHECK(IsValidDestination(destination)) << destination;
+  if (source.Equals(destination)) return;
+
+  HInstruction* first = block->GetFirstInstruction();
+  HParallelMove* move = first->AsParallelMove();
+  size_t position = block->GetLifetimeStart();
+  // This is a parallel move for connecting blocks. We need to differentiate
+  // it with moves for connecting siblings in a same block, and input moves.
+  if (move == nullptr || move->GetLifetimePosition() != position) {
+    move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(position);
+    block->InsertInstructionBefore(move, first);
+  }
+  AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+void RegisterAllocationResolver::InsertMoveAfter(HInstruction* instruction,
+                                                 Location source,
+                                                 Location destination) const {
+  DCHECK(IsValidDestination(destination)) << destination;
+  if (source.Equals(destination)) return;
+
+  if (instruction->IsPhi()) {
+    InsertParallelMoveAtEntryOf(instruction->GetBlock(), instruction, source, destination);
+    return;
+  }
+
+  size_t position = instruction->GetLifetimePosition() + 1;
+  HParallelMove* move = instruction->GetNext()->AsParallelMove();
+  // This is a parallel move for moving the output of an instruction. We need
+  // to differentiate with input moves, moves for connecting siblings in a
+  // and moves for connecting blocks.
+  if (move == nullptr || move->GetLifetimePosition() != position) {
+    move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(position);
+    instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext());
+  }
+  AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/register_allocation_resolver.h b/compiler/optimizing/register_allocation_resolver.h
new file mode 100644
index 0000000..6ceb9bc
--- /dev/null
+++ b/compiler/optimizing/register_allocation_resolver.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
+#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
+
+#include "base/arena_containers.h"
+#include "base/value_object.h"
+#include "primitive.h"
+
+namespace art {
+
+class ArenaAllocator;
+class CodeGenerator;
+class HBasicBlock;
+class HInstruction;
+class HParallelMove;
+class LiveInterval;
+class Location;
+class SsaLivenessAnalysis;
+
+/**
+ * Reconciles the locations assigned to live intervals with the location
+ * summary of each instruction, and inserts moves to resolve split intervals,
+ * nonlinear control flow, and phi inputs.
+ */
+class RegisterAllocationResolver : ValueObject {
+ public:
+  RegisterAllocationResolver(ArenaAllocator* allocator,
+                             CodeGenerator* codegen,
+                             const SsaLivenessAnalysis& liveness);
+
+  void Resolve(size_t max_safepoint_live_core_regs,
+               size_t max_safepoint_live_fp_regs,
+               size_t reserved_out_slots,  // Includes slot(s) for the art method.
+               size_t int_spill_slots,
+               size_t long_spill_slots,
+               size_t float_spill_slots,
+               size_t double_spill_slots,
+               size_t catch_phi_spill_slots,
+               const ArenaVector<LiveInterval*>& temp_intervals);
+
+ private:
+  // Connect adjacent siblings within blocks, and resolve inputs along the way.
+  // Uses max_safepoint_live_regs to check that we did not underestimate the
+  // number of live registers at safepoints.
+  void ConnectSiblings(LiveInterval* interval, size_t max_safepoint_live_regs);
+
+  // Connect siblings between block entries and exits.
+  void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const;
+
+  // Helper methods for inserting parallel moves in the graph.
+  void InsertParallelMoveAtExitOf(HBasicBlock* block,
+                                  HInstruction* instruction,
+                                  Location source,
+                                  Location destination) const;
+  void InsertParallelMoveAtEntryOf(HBasicBlock* block,
+                                   HInstruction* instruction,
+                                   Location source,
+                                   Location destination) const;
+  void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const;
+  void AddInputMoveFor(HInstruction* input,
+                       HInstruction* user,
+                       Location source,
+                       Location destination) const;
+  void InsertParallelMoveAt(size_t position,
+                            HInstruction* instruction,
+                            Location source,
+                            Location destination) const;
+  void AddMove(HParallelMove* move,
+               Location source,
+               Location destination,
+               HInstruction* instruction,
+               Primitive::Type type) const;
+
+  ArenaAllocator* const allocator_;
+  CodeGenerator* const codegen_;
+  const SsaLivenessAnalysis& liveness_;
+
+  DISALLOW_COPY_AND_ASSIGN(RegisterAllocationResolver);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index ef22c81..5b768d5 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,66 +21,33 @@
 
 #include "base/bit_vector-inl.h"
 #include "code_generator.h"
+#include "register_allocator_graph_color.h"
+#include "register_allocator_linear_scan.h"
 #include "ssa_liveness_analysis.h"
 
+
 namespace art {
 
-static constexpr size_t kMaxLifetimePosition = -1;
-static constexpr size_t kDefaultNumberOfSpillSlots = 4;
-
-// For simplicity, we implement register pairs as (reg, reg + 1).
-// Note that this is a requirement for double registers on ARM, since we
-// allocate SRegister.
-static int GetHighForLowRegister(int reg) { return reg + 1; }
-static bool IsLowRegister(int reg) { return (reg & 1) == 0; }
-static bool IsLowOfUnalignedPairInterval(LiveInterval* low) {
-  return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister();
-}
-
 RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
                                      CodeGenerator* codegen,
                                      const SsaLivenessAnalysis& liveness)
-      : allocator_(allocator),
-        codegen_(codegen),
-        liveness_(liveness),
-        unhandled_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        unhandled_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        unhandled_(nullptr),
-        handled_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        active_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        float_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        catch_phi_spill_slots_(0),
-        safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        processing_core_registers_(false),
-        number_of_registers_(-1),
-        registers_array_(nullptr),
-        blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
-        blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
-        reserved_out_slots_(0),
-        maximum_number_of_live_core_registers_(0),
-        maximum_number_of_live_fp_registers_(0) {
-  temp_intervals_.reserve(4);
-  int_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
-  long_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
-  float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
-  double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+    : allocator_(allocator),
+      codegen_(codegen),
+      liveness_(liveness) {}
 
-  static constexpr bool kIsBaseline = false;
-  codegen->SetupBlockedRegisters(kIsBaseline);
-  physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
-  physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
-  // Always reserve for the current method and the graph's max out registers.
-  // TODO: compute it instead.
-  // ArtMethod* takes 2 vregs for 64 bits.
-  reserved_out_slots_ = InstructionSetPointerSize(codegen->GetInstructionSet()) / kVRegSize +
-      codegen->GetGraph()->GetMaximumNumberOfOutVRegs();
+RegisterAllocator* RegisterAllocator::Create(ArenaAllocator* allocator,
+                                             CodeGenerator* codegen,
+                                             const SsaLivenessAnalysis& analysis,
+                                             Strategy strategy) {
+  switch (strategy) {
+    case kRegisterAllocatorLinearScan:
+      return new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis);
+    case kRegisterAllocatorGraphColor:
+      return new (allocator) RegisterAllocatorGraphColor(allocator, codegen, analysis);
+    default:
+      LOG(FATAL) << "Invalid register allocation strategy: " << strategy;
+      UNREACHABLE();
+  }
 }
 
 bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED,
@@ -94,326 +61,6 @@
       || instruction_set == kX86_64;
 }
 
-static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) {
-  if (interval == nullptr) return false;
-  bool is_core_register = (interval->GetType() != Primitive::kPrimDouble)
-      && (interval->GetType() != Primitive::kPrimFloat);
-  return processing_core_registers == is_core_register;
-}
-
-void RegisterAllocator::AllocateRegisters() {
-  AllocateRegistersInternal();
-  Resolve();
-
-  if (kIsDebugBuild) {
-    processing_core_registers_ = true;
-    ValidateInternal(true);
-    processing_core_registers_ = false;
-    ValidateInternal(true);
-    // Check that the linear order is still correct with regards to lifetime positions.
-    // Since only parallel moves have been inserted during the register allocation,
-    // these checks are mostly for making sure these moves have been added correctly.
-    size_t current_liveness = 0;
-    for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
-      HBasicBlock* block = it.Current();
-      for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
-        HInstruction* instruction = inst_it.Current();
-        DCHECK_LE(current_liveness, instruction->GetLifetimePosition());
-        current_liveness = instruction->GetLifetimePosition();
-      }
-      for (HInstructionIterator inst_it(block->GetInstructions());
-           !inst_it.Done();
-           inst_it.Advance()) {
-        HInstruction* instruction = inst_it.Current();
-        DCHECK_LE(current_liveness, instruction->GetLifetimePosition()) << instruction->DebugName();
-        current_liveness = instruction->GetLifetimePosition();
-      }
-    }
-  }
-}
-
-void RegisterAllocator::BlockRegister(Location location, size_t start, size_t end) {
-  int reg = location.reg();
-  DCHECK(location.IsRegister() || location.IsFpuRegister());
-  LiveInterval* interval = location.IsRegister()
-      ? physical_core_register_intervals_[reg]
-      : physical_fp_register_intervals_[reg];
-  Primitive::Type type = location.IsRegister()
-      ? Primitive::kPrimInt
-      : Primitive::kPrimFloat;
-  if (interval == nullptr) {
-    interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
-    if (location.IsRegister()) {
-      physical_core_register_intervals_[reg] = interval;
-    } else {
-      physical_fp_register_intervals_[reg] = interval;
-    }
-  }
-  DCHECK(interval->GetRegister() == reg);
-  interval->AddRange(start, end);
-}
-
-void RegisterAllocator::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
-  for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
-    if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
-      BlockRegister(Location::RegisterLocation(i), start, end);
-    }
-  }
-  for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
-    if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
-      BlockRegister(Location::FpuRegisterLocation(i), start, end);
-    }
-  }
-}
-
-void RegisterAllocator::AllocateRegistersInternal() {
-  // Iterate post-order, to ensure the list is sorted, and the last added interval
-  // is the one with the lowest start position.
-  for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-    for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done();
-         back_it.Advance()) {
-      ProcessInstruction(back_it.Current());
-    }
-    for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
-      ProcessInstruction(inst_it.Current());
-    }
-
-    if (block->IsCatchBlock()) {
-      // By blocking all registers at the top of each catch block, we force
-      // intervals used after catch to spill.
-      size_t position = block->GetLifetimeStart();
-      BlockRegisters(position, position + 1);
-    }
-  }
-
-  number_of_registers_ = codegen_->GetNumberOfCoreRegisters();
-  registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
-                                                    kArenaAllocRegisterAllocator);
-  processing_core_registers_ = true;
-  unhandled_ = &unhandled_core_intervals_;
-  for (LiveInterval* fixed : physical_core_register_intervals_) {
-    if (fixed != nullptr) {
-      // Fixed interval is added to inactive_ instead of unhandled_.
-      // It's also the only type of inactive interval whose start position
-      // can be after the current interval during linear scan.
-      // Fixed interval is never split and never moves to unhandled_.
-      inactive_.push_back(fixed);
-    }
-  }
-  LinearScan();
-
-  inactive_.clear();
-  active_.clear();
-  handled_.clear();
-
-  number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters();
-  registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
-                                                    kArenaAllocRegisterAllocator);
-  processing_core_registers_ = false;
-  unhandled_ = &unhandled_fp_intervals_;
-  for (LiveInterval* fixed : physical_fp_register_intervals_) {
-    if (fixed != nullptr) {
-      // Fixed interval is added to inactive_ instead of unhandled_.
-      // It's also the only type of inactive interval whose start position
-      // can be after the current interval during linear scan.
-      // Fixed interval is never split and never moves to unhandled_.
-      inactive_.push_back(fixed);
-    }
-  }
-  LinearScan();
-}
-
-void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  size_t position = instruction->GetLifetimePosition();
-
-  if (locations == nullptr) return;
-
-  // Create synthesized intervals for temporaries.
-  for (size_t i = 0; i < locations->GetTempCount(); ++i) {
-    Location temp = locations->GetTemp(i);
-    if (temp.IsRegister() || temp.IsFpuRegister()) {
-      BlockRegister(temp, position, position + 1);
-      // Ensure that an explicit temporary register is marked as being allocated.
-      codegen_->AddAllocatedRegister(temp);
-    } else {
-      DCHECK(temp.IsUnallocated());
-      switch (temp.GetPolicy()) {
-        case Location::kRequiresRegister: {
-          LiveInterval* interval =
-              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
-          temp_intervals_.push_back(interval);
-          interval->AddTempUse(instruction, i);
-          unhandled_core_intervals_.push_back(interval);
-          break;
-        }
-
-        case Location::kRequiresFpuRegister: {
-          LiveInterval* interval =
-              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
-          temp_intervals_.push_back(interval);
-          interval->AddTempUse(instruction, i);
-          if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
-            interval->AddHighInterval(/* is_temp */ true);
-            LiveInterval* high = interval->GetHighInterval();
-            temp_intervals_.push_back(high);
-            unhandled_fp_intervals_.push_back(high);
-          }
-          unhandled_fp_intervals_.push_back(interval);
-          break;
-        }
-
-        default:
-          LOG(FATAL) << "Unexpected policy for temporary location "
-                     << temp.GetPolicy();
-      }
-    }
-  }
-
-  bool core_register = (instruction->GetType() != Primitive::kPrimDouble)
-      && (instruction->GetType() != Primitive::kPrimFloat);
-
-  if (locations->NeedsSafepoint()) {
-    if (codegen_->IsLeafMethod()) {
-      // TODO: We do this here because we do not want the suspend check to artificially
-      // create live registers. We should find another place, but this is currently the
-      // simplest.
-      DCHECK(instruction->IsSuspendCheckEntry());
-      instruction->GetBlock()->RemoveInstruction(instruction);
-      return;
-    }
-    safepoints_.push_back(instruction);
-    if (locations->OnlyCallsOnSlowPath()) {
-      // We add a synthesized range at this position to record the live registers
-      // at this position. Ideally, we could just update the safepoints when locations
-      // are updated, but we currently need to know the full stack size before updating
-      // locations (because of parameters and the fact that we don't have a frame pointer).
-      // And knowing the full stack size requires to know the maximum number of live
-      // registers at calls in slow paths.
-      // By adding the following interval in the algorithm, we can compute this
-      // maximum before updating locations.
-      LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
-      interval->AddRange(position, position + 1);
-      AddSorted(&unhandled_core_intervals_, interval);
-      AddSorted(&unhandled_fp_intervals_, interval);
-    }
-  }
-
-  if (locations->WillCall()) {
-    BlockRegisters(position, position + 1, /* caller_save_only */ true);
-  }
-
-  for (size_t i = 0; i < instruction->InputCount(); ++i) {
-    Location input = locations->InAt(i);
-    if (input.IsRegister() || input.IsFpuRegister()) {
-      BlockRegister(input, position, position + 1);
-    } else if (input.IsPair()) {
-      BlockRegister(input.ToLow(), position, position + 1);
-      BlockRegister(input.ToHigh(), position, position + 1);
-    }
-  }
-
-  LiveInterval* current = instruction->GetLiveInterval();
-  if (current == nullptr) return;
-
-  ArenaVector<LiveInterval*>& unhandled = core_register
-      ? unhandled_core_intervals_
-      : unhandled_fp_intervals_;
-
-  DCHECK(unhandled.empty() || current->StartsBeforeOrAt(unhandled.back()));
-
-  if (codegen_->NeedsTwoRegisters(current->GetType())) {
-    current->AddHighInterval();
-  }
-
-  for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
-    HInstruction* safepoint = safepoints_[safepoint_index - 1u];
-    size_t safepoint_position = safepoint->GetLifetimePosition();
-
-    // Test that safepoints are ordered in the optimal way.
-    DCHECK(safepoint_index == safepoints_.size() ||
-           safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
-
-    if (safepoint_position == current->GetStart()) {
-      // The safepoint is for this instruction, so the location of the instruction
-      // does not need to be saved.
-      DCHECK_EQ(safepoint_index, safepoints_.size());
-      DCHECK_EQ(safepoint, instruction);
-      continue;
-    } else if (current->IsDeadAt(safepoint_position)) {
-      break;
-    } else if (!current->Covers(safepoint_position)) {
-      // Hole in the interval.
-      continue;
-    }
-    current->AddSafepoint(safepoint);
-  }
-  current->ResetSearchCache();
-
-  // Some instructions define their output in fixed register/stack slot. We need
-  // to ensure we know these locations before doing register allocation. For a
-  // given register, we create an interval that covers these locations. The register
-  // will be unavailable at these locations when trying to allocate one for an
-  // interval.
-  //
-  // The backwards walking ensures the ranges are ordered on increasing start positions.
-  Location output = locations->Out();
-  if (output.IsUnallocated() && output.GetPolicy() == Location::kSameAsFirstInput) {
-    Location first = locations->InAt(0);
-    if (first.IsRegister() || first.IsFpuRegister()) {
-      current->SetFrom(position + 1);
-      current->SetRegister(first.reg());
-    } else if (first.IsPair()) {
-      current->SetFrom(position + 1);
-      current->SetRegister(first.low());
-      LiveInterval* high = current->GetHighInterval();
-      high->SetRegister(first.high());
-      high->SetFrom(position + 1);
-    }
-  } else if (output.IsRegister() || output.IsFpuRegister()) {
-    // Shift the interval's start by one to account for the blocked register.
-    current->SetFrom(position + 1);
-    current->SetRegister(output.reg());
-    BlockRegister(output, position, position + 1);
-  } else if (output.IsPair()) {
-    current->SetFrom(position + 1);
-    current->SetRegister(output.low());
-    LiveInterval* high = current->GetHighInterval();
-    high->SetRegister(output.high());
-    high->SetFrom(position + 1);
-    BlockRegister(output.ToLow(), position, position + 1);
-    BlockRegister(output.ToHigh(), position, position + 1);
-  } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
-    current->SetSpillSlot(output.GetStackIndex());
-  } else {
-    DCHECK(output.IsUnallocated() || output.IsConstant());
-  }
-
-  if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
-    AllocateSpillSlotForCatchPhi(instruction->AsPhi());
-  }
-
-  // If needed, add interval to the list of unhandled intervals.
-  if (current->HasSpillSlot() || instruction->IsConstant()) {
-    // Split just before first register use.
-    size_t first_register_use = current->FirstRegisterUse();
-    if (first_register_use != kNoLifetime) {
-      LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
-      // Don't add directly to `unhandled`, it needs to be sorted and the start
-      // of this new interval might be after intervals already in the list.
-      AddSorted(&unhandled, split);
-    } else {
-      // Nothing to do, we won't allocate a register for this value.
-    }
-  } else {
-    // Don't add directly to `unhandled`, temp or safepoint intervals
-    // for this instruction may have been added, and those can be
-    // processed first.
-    AddSorted(&unhandled, current);
-  }
-}
-
 class AllRangesIterator : public ValueObject {
  public:
   explicit AllRangesIterator(LiveInterval* interval)
@@ -441,36 +88,6 @@
   DISALLOW_COPY_AND_ASSIGN(AllRangesIterator);
 };
 
-bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const {
-  // To simplify unit testing, we eagerly create the array of intervals, and
-  // call the helper method.
-  ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
-  for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
-    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
-    if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) {
-      intervals.push_back(instruction->GetLiveInterval());
-    }
-  }
-
-  const ArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_
-      ? &physical_core_register_intervals_
-      : &physical_fp_register_intervals_;
-  for (LiveInterval* fixed : *physical_register_intervals) {
-    if (fixed != nullptr) {
-      intervals.push_back(fixed);
-    }
-  }
-
-  for (LiveInterval* temp : temp_intervals_) {
-    if (ShouldProcess(processing_core_registers_, temp)) {
-      intervals.push_back(temp);
-    }
-  }
-
-  return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_,
-                           allocator_, processing_core_registers_, log_fatal_on_failure);
-}
-
 bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& intervals,
                                           size_t number_of_spill_slots,
                                           size_t number_of_out_slots,
@@ -482,13 +99,21 @@
       ? codegen.GetNumberOfCoreRegisters()
       : codegen.GetNumberOfFloatingPointRegisters();
   ArenaVector<ArenaBitVector*> liveness_of_values(
-      allocator->Adapter(kArenaAllocRegisterAllocator));
+      allocator->Adapter(kArenaAllocRegisterAllocatorValidate));
   liveness_of_values.reserve(number_of_registers + number_of_spill_slots);
 
+  size_t max_end = 0u;
+  for (LiveInterval* start_interval : intervals) {
+    for (AllRangesIterator it(start_interval); !it.Done(); it.Advance()) {
+      max_end = std::max(max_end, it.CurrentRange()->GetEnd());
+    }
+  }
+
   // Allocate a bit vector per register. A live interval that has a register
   // allocated will populate the associated bit vector based on its live ranges.
   for (size_t i = 0; i < number_of_registers + number_of_spill_slots; ++i) {
-    liveness_of_values.push_back(new (allocator) ArenaBitVector(allocator, 0, true));
+    liveness_of_values.push_back(
+        ArenaBitVector::Create(allocator, max_end, false, kArenaAllocRegisterAllocatorValidate));
   }
 
   for (LiveInterval* start_interval : intervals) {
@@ -541,6 +166,19 @@
               } else {
                 codegen.DumpFloatingPointRegister(message, current->GetRegister());
               }
+              for (LiveInterval* interval : intervals) {
+                if (interval->HasRegister()
+                    && interval->GetRegister() == current->GetRegister()
+                    && interval->CoversSlow(j)) {
+                  message << std::endl;
+                  if (interval->GetDefinedBy() != nullptr) {
+                    message << interval->GetDefinedBy()->GetKind() << " ";
+                  } else {
+                    message << "physical ";
+                  }
+                  interval->Dump(message);
+                }
+              }
               LOG(FATAL) << message.str();
             } else {
               return false;
@@ -555,641 +193,30 @@
   return true;
 }
 
-void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interval) const {
-  interval->Dump(stream);
-  stream << ": ";
-  if (interval->HasRegister()) {
-    if (interval->IsFloatingPoint()) {
-      codegen_->DumpFloatingPointRegister(stream, interval->GetRegister());
-    } else {
-      codegen_->DumpCoreRegister(stream, interval->GetRegister());
-    }
-  } else {
-    stream << "spilled";
-  }
-  stream << std::endl;
-}
-
-void RegisterAllocator::DumpAllIntervals(std::ostream& stream) const {
-  stream << "inactive: " << std::endl;
-  for (LiveInterval* inactive_interval : inactive_) {
-    DumpInterval(stream, inactive_interval);
-  }
-  stream << "active: " << std::endl;
-  for (LiveInterval* active_interval : active_) {
-    DumpInterval(stream, active_interval);
-  }
-  stream << "unhandled: " << std::endl;
-  auto unhandled = (unhandled_ != nullptr) ?
-      unhandled_ : &unhandled_core_intervals_;
-  for (LiveInterval* unhandled_interval : *unhandled) {
-    DumpInterval(stream, unhandled_interval);
-  }
-  stream << "handled: " << std::endl;
-  for (LiveInterval* handled_interval : handled_) {
-    DumpInterval(stream, handled_interval);
-  }
-}
-
-// By the book implementation of a linear scan register allocator.
-void RegisterAllocator::LinearScan() {
-  while (!unhandled_->empty()) {
-    // (1) Remove interval with the lowest start position from unhandled.
-    LiveInterval* current = unhandled_->back();
-    unhandled_->pop_back();
-
-    // Make sure the interval is an expected state.
-    DCHECK(!current->IsFixed() && !current->HasSpillSlot());
-    // Make sure we are going in the right order.
-    DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() >= current->GetStart());
-    // Make sure a low interval is always with a high.
-    DCHECK(!current->IsLowInterval() || unhandled_->back()->IsHighInterval());
-    // Make sure a high interval is always with a low.
-    DCHECK(current->IsLowInterval() ||
-           unhandled_->empty() ||
-           !unhandled_->back()->IsHighInterval());
-
-    size_t position = current->GetStart();
-
-    // Remember the inactive_ size here since the ones moved to inactive_ from
-    // active_ below shouldn't need to be re-checked.
-    size_t inactive_intervals_to_handle = inactive_.size();
-
-    // (2) Remove currently active intervals that are dead at this position.
-    //     Move active intervals that have a lifetime hole at this position
-    //     to inactive.
-    auto active_kept_end = std::remove_if(
-        active_.begin(),
-        active_.end(),
-        [this, position](LiveInterval* interval) {
-          if (interval->IsDeadAt(position)) {
-            handled_.push_back(interval);
-            return true;
-          } else if (!interval->Covers(position)) {
-            inactive_.push_back(interval);
-            return true;
-          } else {
-            return false;  // Keep this interval.
-          }
-        });
-    active_.erase(active_kept_end, active_.end());
-
-    // (3) Remove currently inactive intervals that are dead at this position.
-    //     Move inactive intervals that cover this position to active.
-    auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle;
-    auto inactive_kept_end = std::remove_if(
-        inactive_.begin(),
-        inactive_to_handle_end,
-        [this, position](LiveInterval* interval) {
-          DCHECK(interval->GetStart() < position || interval->IsFixed());
-          if (interval->IsDeadAt(position)) {
-            handled_.push_back(interval);
-            return true;
-          } else if (interval->Covers(position)) {
-            active_.push_back(interval);
-            return true;
-          } else {
-            return false;  // Keep this interval.
-          }
-        });
-    inactive_.erase(inactive_kept_end, inactive_to_handle_end);
-
-    if (current->IsSlowPathSafepoint()) {
-      // Synthesized interval to record the maximum number of live registers
-      // at safepoints. No need to allocate a register for it.
-      if (processing_core_registers_) {
-        maximum_number_of_live_core_registers_ =
-          std::max(maximum_number_of_live_core_registers_, active_.size());
-      } else {
-        maximum_number_of_live_fp_registers_ =
-          std::max(maximum_number_of_live_fp_registers_, active_.size());
-      }
-      DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart());
-      continue;
-    }
-
-    if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) {
-      DCHECK(!current->HasRegister());
-      // Allocating the low part was unsucessful. The splitted interval for the high part
-      // will be handled next (it is in the `unhandled_` list).
-      continue;
-    }
-
-    // (4) Try to find an available register.
-    bool success = TryAllocateFreeReg(current);
-
-    // (5) If no register could be found, we need to spill.
-    if (!success) {
-      success = AllocateBlockedReg(current);
-    }
-
-    // (6) If the interval had a register allocated, add it to the list of active
-    //     intervals.
-    if (success) {
-      codegen_->AddAllocatedRegister(processing_core_registers_
-          ? Location::RegisterLocation(current->GetRegister())
-          : Location::FpuRegisterLocation(current->GetRegister()));
-      active_.push_back(current);
-      if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) {
-        current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister()));
-      }
-    }
-  }
-}
-
-static void FreeIfNotCoverAt(LiveInterval* interval, size_t position, size_t* free_until) {
-  DCHECK(!interval->IsHighInterval());
-  // Note that the same instruction may occur multiple times in the input list,
-  // so `free_until` may have changed already.
-  // Since `position` is not the current scan position, we need to use CoversSlow.
-  if (interval->IsDeadAt(position)) {
-    // Set the register to be free. Note that inactive intervals might later
-    // update this.
-    free_until[interval->GetRegister()] = kMaxLifetimePosition;
+LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
+  DCHECK_GE(position, interval->GetStart());
+  DCHECK(!interval->IsDeadAt(position));
+  if (position == interval->GetStart()) {
+    // Spill slot will be allocated when handling `interval` again.
+    interval->ClearRegister();
     if (interval->HasHighInterval()) {
-      DCHECK(interval->GetHighInterval()->IsDeadAt(position));
-      free_until[interval->GetHighInterval()->GetRegister()] = kMaxLifetimePosition;
+      interval->GetHighInterval()->ClearRegister();
+    } else if (interval->HasLowInterval()) {
+      interval->GetLowInterval()->ClearRegister();
     }
-  } else if (!interval->CoversSlow(position)) {
-    // The interval becomes inactive at `defined_by`. We make its register
-    // available only until the next use strictly after `defined_by`.
-    free_until[interval->GetRegister()] = interval->FirstUseAfter(position);
+    return interval;
+  } else {
+    LiveInterval* new_interval = interval->SplitAt(position);
     if (interval->HasHighInterval()) {
-      DCHECK(!interval->GetHighInterval()->CoversSlow(position));
-      free_until[interval->GetHighInterval()->GetRegister()] = free_until[interval->GetRegister()];
+      LiveInterval* high = interval->GetHighInterval()->SplitAt(position);
+      new_interval->SetHighInterval(high);
+      high->SetLowInterval(new_interval);
+    } else if (interval->HasLowInterval()) {
+      LiveInterval* low = interval->GetLowInterval()->SplitAt(position);
+      new_interval->SetLowInterval(low);
+      low->SetHighInterval(new_interval);
     }
-  }
-}
-
-// Find a free register. If multiple are found, pick the register that
-// is free the longest.
-bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) {
-  size_t* free_until = registers_array_;
-
-  // First set all registers to be free.
-  for (size_t i = 0; i < number_of_registers_; ++i) {
-    free_until[i] = kMaxLifetimePosition;
-  }
-
-  // For each active interval, set its register to not free.
-  for (LiveInterval* interval : active_) {
-    DCHECK(interval->HasRegister());
-    free_until[interval->GetRegister()] = 0;
-  }
-
-  // An interval that starts an instruction (that is, it is not split), may
-  // re-use the registers used by the inputs of that instruciton, based on the
-  // location summary.
-  HInstruction* defined_by = current->GetDefinedBy();
-  if (defined_by != nullptr && !current->IsSplit()) {
-    LocationSummary* locations = defined_by->GetLocations();
-    if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) {
-      for (size_t i = 0, e = defined_by->InputCount(); i < e; ++i) {
-        // Take the last interval of the input. It is the location of that interval
-        // that will be used at `defined_by`.
-        LiveInterval* interval = defined_by->InputAt(i)->GetLiveInterval()->GetLastSibling();
-        // Note that interval may have not been processed yet.
-        // TODO: Handle non-split intervals last in the work list.
-        if (locations->InAt(i).IsValid()
-            && interval->HasRegister()
-            && interval->SameRegisterKind(*current)) {
-          // The input must be live until the end of `defined_by`, to comply to
-          // the linear scan algorithm. So we use `defined_by`'s end lifetime
-          // position to check whether the input is dead or is inactive after
-          // `defined_by`.
-          DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition()));
-          size_t position = defined_by->GetLifetimePosition() + 1;
-          FreeIfNotCoverAt(interval, position, free_until);
-        }
-      }
-    }
-  }
-
-  // For each inactive interval, set its register to be free until
-  // the next intersection with `current`.
-  for (LiveInterval* inactive : inactive_) {
-    // Temp/Slow-path-safepoint interval has no holes.
-    DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
-    if (!current->IsSplit() && !inactive->IsFixed()) {
-      // Neither current nor inactive are fixed.
-      // Thanks to SSA, a non-split interval starting in a hole of an
-      // inactive interval should never intersect with that inactive interval.
-      // Only if it's not fixed though, because fixed intervals don't come from SSA.
-      DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
-      continue;
-    }
-
-    DCHECK(inactive->HasRegister());
-    if (free_until[inactive->GetRegister()] == 0) {
-      // Already used by some active interval. No need to intersect.
-      continue;
-    }
-    size_t next_intersection = inactive->FirstIntersectionWith(current);
-    if (next_intersection != kNoLifetime) {
-      free_until[inactive->GetRegister()] =
-          std::min(free_until[inactive->GetRegister()], next_intersection);
-    }
-  }
-
-  int reg = kNoRegister;
-  if (current->HasRegister()) {
-    // Some instructions have a fixed register output.
-    reg = current->GetRegister();
-    if (free_until[reg] == 0) {
-      DCHECK(current->IsHighInterval());
-      // AllocateBlockedReg will spill the holder of the register.
-      return false;
-    }
-  } else {
-    DCHECK(!current->IsHighInterval());
-    int hint = current->FindFirstRegisterHint(free_until, liveness_);
-    if ((hint != kNoRegister)
-        // For simplicity, if the hint we are getting for a pair cannot be used,
-        // we are just going to allocate a new pair.
-        && !(current->IsLowInterval() && IsBlocked(GetHighForLowRegister(hint)))) {
-      DCHECK(!IsBlocked(hint));
-      reg = hint;
-    } else if (current->IsLowInterval()) {
-      reg = FindAvailableRegisterPair(free_until, current->GetStart());
-    } else {
-      reg = FindAvailableRegister(free_until, current);
-    }
-  }
-
-  DCHECK_NE(reg, kNoRegister);
-  // If we could not find a register, we need to spill.
-  if (free_until[reg] == 0) {
-    return false;
-  }
-
-  if (current->IsLowInterval()) {
-    // If the high register of this interval is not available, we need to spill.
-    int high_reg = current->GetHighInterval()->GetRegister();
-    if (high_reg == kNoRegister) {
-      high_reg = GetHighForLowRegister(reg);
-    }
-    if (free_until[high_reg] == 0) {
-      return false;
-    }
-  }
-
-  current->SetRegister(reg);
-  if (!current->IsDeadAt(free_until[reg])) {
-    // If the register is only available for a subset of live ranges
-    // covered by `current`, split `current` before the position where
-    // the register is not available anymore.
-    LiveInterval* split = SplitBetween(current, current->GetStart(), free_until[reg]);
-    DCHECK(split != nullptr);
-    AddSorted(unhandled_, split);
-  }
-  return true;
-}
-
-bool RegisterAllocator::IsBlocked(int reg) const {
-  return processing_core_registers_
-      ? blocked_core_registers_[reg]
-      : blocked_fp_registers_[reg];
-}
-
-int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const {
-  int reg = kNoRegister;
-  // Pick the register pair that is used the last.
-  for (size_t i = 0; i < number_of_registers_; ++i) {
-    if (IsBlocked(i)) continue;
-    if (!IsLowRegister(i)) continue;
-    int high_register = GetHighForLowRegister(i);
-    if (IsBlocked(high_register)) continue;
-    int existing_high_register = GetHighForLowRegister(reg);
-    if ((reg == kNoRegister) || (next_use[i] >= next_use[reg]
-                        && next_use[high_register] >= next_use[existing_high_register])) {
-      reg = i;
-      if (next_use[i] == kMaxLifetimePosition
-          && next_use[high_register] == kMaxLifetimePosition) {
-        break;
-      }
-    } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) {
-      // If one of the current register is known to be unavailable, just unconditionally
-      // try a new one.
-      reg = i;
-    }
-  }
-  return reg;
-}
-
-bool RegisterAllocator::IsCallerSaveRegister(int reg) const {
-  return processing_core_registers_
-      ? !codegen_->IsCoreCalleeSaveRegister(reg)
-      : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
-}
-
-int RegisterAllocator::FindAvailableRegister(size_t* next_use, LiveInterval* current) const {
-  // We special case intervals that do not span a safepoint to try to find a caller-save
-  // register if one is available. We iterate from 0 to the number of registers,
-  // so if there are caller-save registers available at the end, we continue the iteration.
-  bool prefers_caller_save = !current->HasWillCallSafepoint();
-  int reg = kNoRegister;
-  for (size_t i = 0; i < number_of_registers_; ++i) {
-    if (IsBlocked(i)) {
-      // Register cannot be used. Continue.
-      continue;
-    }
-
-    // Best case: we found a register fully available.
-    if (next_use[i] == kMaxLifetimePosition) {
-      if (prefers_caller_save && !IsCallerSaveRegister(i)) {
-        // We can get shorter encodings on some platforms by using
-        // small register numbers. So only update the candidate if the previous
-        // one was not available for the whole method.
-        if (reg == kNoRegister || next_use[reg] != kMaxLifetimePosition) {
-          reg = i;
-        }
-        // Continue the iteration in the hope of finding a caller save register.
-        continue;
-      } else {
-        reg = i;
-        // We know the register is good enough. Return it.
-        break;
-      }
-    }
-
-    // If we had no register before, take this one as a reference.
-    if (reg == kNoRegister) {
-      reg = i;
-      continue;
-    }
-
-    // Pick the register that is used the last.
-    if (next_use[i] > next_use[reg]) {
-      reg = i;
-      continue;
-    }
-  }
-  return reg;
-}
-
-// Remove interval and its other half if any. Return iterator to the following element.
-static ArenaVector<LiveInterval*>::iterator RemoveIntervalAndPotentialOtherHalf(
-    ArenaVector<LiveInterval*>* intervals, ArenaVector<LiveInterval*>::iterator pos) {
-  DCHECK(intervals->begin() <= pos && pos < intervals->end());
-  LiveInterval* interval = *pos;
-  if (interval->IsLowInterval()) {
-    DCHECK(pos + 1 < intervals->end());
-    DCHECK_EQ(*(pos + 1), interval->GetHighInterval());
-    return intervals->erase(pos, pos + 2);
-  } else if (interval->IsHighInterval()) {
-    DCHECK(intervals->begin() < pos);
-    DCHECK_EQ(*(pos - 1), interval->GetLowInterval());
-    return intervals->erase(pos - 1, pos + 1);
-  } else {
-    return intervals->erase(pos);
-  }
-}
-
-bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
-                                                                 size_t first_register_use,
-                                                                 size_t* next_use) {
-  for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
-    LiveInterval* active = *it;
-    DCHECK(active->HasRegister());
-    if (active->IsFixed()) continue;
-    if (active->IsHighInterval()) continue;
-    if (first_register_use > next_use[active->GetRegister()]) continue;
-
-    // Split the first interval found that is either:
-    // 1) A non-pair interval.
-    // 2) A pair interval whose high is not low + 1.
-    // 3) A pair interval whose low is not even.
-    if (!active->IsLowInterval() ||
-        IsLowOfUnalignedPairInterval(active) ||
-        !IsLowRegister(active->GetRegister())) {
-      LiveInterval* split = Split(active, position);
-      if (split != active) {
-        handled_.push_back(active);
-      }
-      RemoveIntervalAndPotentialOtherHalf(&active_, it);
-      AddSorted(unhandled_, split);
-      return true;
-    }
-  }
-  return false;
-}
-
-// Find the register that is used the last, and spill the interval
-// that holds it. If the first use of `current` is after that register
-// we spill `current` instead.
-bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
-  size_t first_register_use = current->FirstRegisterUse();
-  if (current->HasRegister()) {
-    DCHECK(current->IsHighInterval());
-    // The low interval has allocated the register for the high interval. In
-    // case the low interval had to split both intervals, we may end up in a
-    // situation where the high interval does not have a register use anymore.
-    // We must still proceed in order to split currently active and inactive
-    // uses of the high interval's register, and put the high interval in the
-    // active set.
-    DCHECK(first_register_use != kNoLifetime || (current->GetNextSibling() != nullptr));
-  } else if (first_register_use == kNoLifetime) {
-    AllocateSpillSlotFor(current);
-    return false;
-  }
-
-  // We use the first use to compare with other intervals. If this interval
-  // is used after any active intervals, we will spill this interval.
-  size_t first_use = current->FirstUseAfter(current->GetStart());
-
-  // First set all registers as not being used.
-  size_t* next_use = registers_array_;
-  for (size_t i = 0; i < number_of_registers_; ++i) {
-    next_use[i] = kMaxLifetimePosition;
-  }
-
-  // For each active interval, find the next use of its register after the
-  // start of current.
-  for (LiveInterval* active : active_) {
-    DCHECK(active->HasRegister());
-    if (active->IsFixed()) {
-      next_use[active->GetRegister()] = current->GetStart();
-    } else {
-      size_t use = active->FirstUseAfter(current->GetStart());
-      if (use != kNoLifetime) {
-        next_use[active->GetRegister()] = use;
-      }
-    }
-  }
-
-  // For each inactive interval, find the next use of its register after the
-  // start of current.
-  for (LiveInterval* inactive : inactive_) {
-    // Temp/Slow-path-safepoint interval has no holes.
-    DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
-    if (!current->IsSplit() && !inactive->IsFixed()) {
-      // Neither current nor inactive are fixed.
-      // Thanks to SSA, a non-split interval starting in a hole of an
-      // inactive interval should never intersect with that inactive interval.
-      // Only if it's not fixed though, because fixed intervals don't come from SSA.
-      DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
-      continue;
-    }
-    DCHECK(inactive->HasRegister());
-    size_t next_intersection = inactive->FirstIntersectionWith(current);
-    if (next_intersection != kNoLifetime) {
-      if (inactive->IsFixed()) {
-        next_use[inactive->GetRegister()] =
-            std::min(next_intersection, next_use[inactive->GetRegister()]);
-      } else {
-        size_t use = inactive->FirstUseAfter(current->GetStart());
-        if (use != kNoLifetime) {
-          next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]);
-        }
-      }
-    }
-  }
-
-  int reg = kNoRegister;
-  bool should_spill = false;
-  if (current->HasRegister()) {
-    DCHECK(current->IsHighInterval());
-    reg = current->GetRegister();
-    // When allocating the low part, we made sure the high register was available.
-    DCHECK_LT(first_use, next_use[reg]);
-  } else if (current->IsLowInterval()) {
-    reg = FindAvailableRegisterPair(next_use, first_use);
-    // We should spill if both registers are not available.
-    should_spill = (first_use >= next_use[reg])
-      || (first_use >= next_use[GetHighForLowRegister(reg)]);
-  } else {
-    DCHECK(!current->IsHighInterval());
-    reg = FindAvailableRegister(next_use, current);
-    should_spill = (first_use >= next_use[reg]);
-  }
-
-  DCHECK_NE(reg, kNoRegister);
-  if (should_spill) {
-    DCHECK(!current->IsHighInterval());
-    bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1));
-    if (is_allocation_at_use_site) {
-      if (!current->IsLowInterval()) {
-        DumpInterval(std::cerr, current);
-        DumpAllIntervals(std::cerr);
-        // This situation has the potential to infinite loop, so we make it a non-debug CHECK.
-        HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2);
-        CHECK(false) << "There is not enough registers available for "
-          << current->GetParent()->GetDefinedBy()->DebugName() << " "
-          << current->GetParent()->GetDefinedBy()->GetId()
-          << " at " << first_register_use - 1 << " "
-          << (at == nullptr ? "" : at->DebugName());
-      }
-
-      // If we're allocating a register for `current` because the instruction at
-      // that position requires it, but we think we should spill, then there are
-      // non-pair intervals or unaligned pair intervals blocking the allocation.
-      // We split the first interval found, and put ourselves first in the
-      // `unhandled_` list.
-      bool success = TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(),
-                                                              first_register_use,
-                                                              next_use);
-      DCHECK(success);
-      LiveInterval* existing = unhandled_->back();
-      DCHECK(existing->IsHighInterval());
-      DCHECK_EQ(existing->GetLowInterval(), current);
-      unhandled_->push_back(current);
-    } else {
-      // If the first use of that instruction is after the last use of the found
-      // register, we split this interval just before its first register use.
-      AllocateSpillSlotFor(current);
-      LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
-      DCHECK(current != split);
-      AddSorted(unhandled_, split);
-    }
-    return false;
-  } else {
-    // Use this register and spill the active and inactives interval that
-    // have that register.
-    current->SetRegister(reg);
-
-    for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
-      LiveInterval* active = *it;
-      if (active->GetRegister() == reg) {
-        DCHECK(!active->IsFixed());
-        LiveInterval* split = Split(active, current->GetStart());
-        if (split != active) {
-          handled_.push_back(active);
-        }
-        RemoveIntervalAndPotentialOtherHalf(&active_, it);
-        AddSorted(unhandled_, split);
-        break;
-      }
-    }
-
-    // NOTE: Retrieve end() on each iteration because we're removing elements in the loop body.
-    for (auto it = inactive_.begin(); it != inactive_.end(); ) {
-      LiveInterval* inactive = *it;
-      bool erased = false;
-      if (inactive->GetRegister() == reg) {
-        if (!current->IsSplit() && !inactive->IsFixed()) {
-          // Neither current nor inactive are fixed.
-          // Thanks to SSA, a non-split interval starting in a hole of an
-          // inactive interval should never intersect with that inactive interval.
-          // Only if it's not fixed though, because fixed intervals don't come from SSA.
-          DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
-        } else {
-          size_t next_intersection = inactive->FirstIntersectionWith(current);
-          if (next_intersection != kNoLifetime) {
-            if (inactive->IsFixed()) {
-              LiveInterval* split = Split(current, next_intersection);
-              DCHECK_NE(split, current);
-              AddSorted(unhandled_, split);
-            } else {
-              // Split at the start of `current`, which will lead to splitting
-              // at the end of the lifetime hole of `inactive`.
-              LiveInterval* split = Split(inactive, current->GetStart());
-              // If it's inactive, it must start before the current interval.
-              DCHECK_NE(split, inactive);
-              it = RemoveIntervalAndPotentialOtherHalf(&inactive_, it);
-              erased = true;
-              handled_.push_back(inactive);
-              AddSorted(unhandled_, split);
-            }
-          }
-        }
-      }
-      // If we have erased the element, `it` already points to the next element.
-      // Otherwise we need to move to the next element.
-      if (!erased) {
-        ++it;
-      }
-    }
-
-    return true;
-  }
-}
-
-void RegisterAllocator::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) {
-  DCHECK(!interval->IsFixed() && !interval->HasSpillSlot());
-  size_t insert_at = 0;
-  for (size_t i = array->size(); i > 0; --i) {
-    LiveInterval* current = (*array)[i - 1u];
-    // High intervals must be processed right after their low equivalent.
-    if (current->StartsAfter(interval) && !current->IsHighInterval()) {
-      insert_at = i;
-      break;
-    } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) {
-      // Ensure the slow path interval is the last to be processed at its location: we want the
-      // interval to know all live registers at this location.
-      DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current));
-      insert_at = i;
-      break;
-    }
-  }
-
-  // Insert the high interval before the low, to ensure the low is processed before.
-  auto insert_pos = array->begin() + insert_at;
-  if (interval->HasHighInterval()) {
-    array->insert(insert_pos, { interval->GetHighInterval(), interval });
-  } else if (interval->HasLowInterval()) {
-    array->insert(insert_pos, { interval, interval->GetLowInterval() });
-  } else {
-    array->insert(insert_pos, interval);
+    return new_interval;
   }
 }
 
@@ -1252,688 +279,4 @@
   return Split(interval, block_to->GetLifetimeStart());
 }
 
-LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
-  DCHECK_GE(position, interval->GetStart());
-  DCHECK(!interval->IsDeadAt(position));
-  if (position == interval->GetStart()) {
-    // Spill slot will be allocated when handling `interval` again.
-    interval->ClearRegister();
-    if (interval->HasHighInterval()) {
-      interval->GetHighInterval()->ClearRegister();
-    } else if (interval->HasLowInterval()) {
-      interval->GetLowInterval()->ClearRegister();
-    }
-    return interval;
-  } else {
-    LiveInterval* new_interval = interval->SplitAt(position);
-    if (interval->HasHighInterval()) {
-      LiveInterval* high = interval->GetHighInterval()->SplitAt(position);
-      new_interval->SetHighInterval(high);
-      high->SetLowInterval(new_interval);
-    } else if (interval->HasLowInterval()) {
-      LiveInterval* low = interval->GetLowInterval()->SplitAt(position);
-      new_interval->SetLowInterval(low);
-      low->SetHighInterval(new_interval);
-    }
-    return new_interval;
-  }
-}
-
-void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
-  if (interval->IsHighInterval()) {
-    // The low interval already took care of allocating the spill slot.
-    DCHECK(!interval->GetLowInterval()->HasRegister());
-    DCHECK(interval->GetLowInterval()->GetParent()->HasSpillSlot());
-    return;
-  }
-
-  LiveInterval* parent = interval->GetParent();
-
-  // An instruction gets a spill slot for its entire lifetime. If the parent
-  // of this interval already has a spill slot, there is nothing to do.
-  if (parent->HasSpillSlot()) {
-    return;
-  }
-
-  HInstruction* defined_by = parent->GetDefinedBy();
-  DCHECK(!defined_by->IsPhi() || !defined_by->AsPhi()->IsCatchPhi());
-
-  if (defined_by->IsParameterValue()) {
-    // Parameters have their own stack slot.
-    parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
-    return;
-  }
-
-  if (defined_by->IsCurrentMethod()) {
-    parent->SetSpillSlot(0);
-    return;
-  }
-
-  if (defined_by->IsConstant()) {
-    // Constants don't need a spill slot.
-    return;
-  }
-
-  ArenaVector<size_t>* spill_slots = nullptr;
-  switch (interval->GetType()) {
-    case Primitive::kPrimDouble:
-      spill_slots = &double_spill_slots_;
-      break;
-    case Primitive::kPrimLong:
-      spill_slots = &long_spill_slots_;
-      break;
-    case Primitive::kPrimFloat:
-      spill_slots = &float_spill_slots_;
-      break;
-    case Primitive::kPrimNot:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimShort:
-      spill_slots = &int_spill_slots_;
-      break;
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
-  }
-
-  // Find an available spill slot.
-  size_t slot = 0;
-  for (size_t e = spill_slots->size(); slot < e; ++slot) {
-    if ((*spill_slots)[slot] <= parent->GetStart()
-        && (slot == (e - 1) || (*spill_slots)[slot + 1] <= parent->GetStart())) {
-      break;
-    }
-  }
-
-  size_t end = interval->GetLastSibling()->GetEnd();
-  if (parent->NeedsTwoSpillSlots()) {
-    if (slot + 2u > spill_slots->size()) {
-      // We need a new spill slot.
-      spill_slots->resize(slot + 2u, end);
-    }
-    (*spill_slots)[slot] = end;
-    (*spill_slots)[slot + 1] = end;
-  } else {
-    if (slot == spill_slots->size()) {
-      // We need a new spill slot.
-      spill_slots->push_back(end);
-    } else {
-      (*spill_slots)[slot] = end;
-    }
-  }
-
-  // Note that the exact spill slot location will be computed when we resolve,
-  // that is when we know the number of spill slots for each type.
-  parent->SetSpillSlot(slot);
-}
-
-static bool IsValidDestination(Location destination) {
-  return destination.IsRegister()
-      || destination.IsRegisterPair()
-      || destination.IsFpuRegister()
-      || destination.IsFpuRegisterPair()
-      || destination.IsStackSlot()
-      || destination.IsDoubleStackSlot();
-}
-
-void RegisterAllocator::AllocateSpillSlotForCatchPhi(HPhi* phi) {
-  LiveInterval* interval = phi->GetLiveInterval();
-
-  HInstruction* previous_phi = phi->GetPrevious();
-  DCHECK(previous_phi == nullptr ||
-         previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
-      << "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent.";
-
-  if (phi->IsVRegEquivalentOf(previous_phi)) {
-    // This is an equivalent of the previous phi. We need to assign the same
-    // catch phi slot.
-    DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
-    interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
-  } else {
-    // Allocate a new spill slot for this catch phi.
-    // TODO: Reuse spill slots when intervals of phis from different catch
-    //       blocks do not overlap.
-    interval->SetSpillSlot(catch_phi_spill_slots_);
-    catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
-  }
-}
-
-void RegisterAllocator::AddMove(HParallelMove* move,
-                                Location source,
-                                Location destination,
-                                HInstruction* instruction,
-                                Primitive::Type type) const {
-  if (type == Primitive::kPrimLong
-      && codegen_->ShouldSplitLongMoves()
-      // The parallel move resolver knows how to deal with long constants.
-      && !source.IsConstant()) {
-    move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction);
-    move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr);
-  } else {
-    move->AddMove(source, destination, type, instruction);
-  }
-}
-
-void RegisterAllocator::AddInputMoveFor(HInstruction* input,
-                                        HInstruction* user,
-                                        Location source,
-                                        Location destination) const {
-  if (source.Equals(destination)) return;
-
-  DCHECK(!user->IsPhi());
-
-  HInstruction* previous = user->GetPrevious();
-  HParallelMove* move = nullptr;
-  if (previous == nullptr
-      || !previous->IsParallelMove()
-      || previous->GetLifetimePosition() < user->GetLifetimePosition()) {
-    move = new (allocator_) HParallelMove(allocator_);
-    move->SetLifetimePosition(user->GetLifetimePosition());
-    user->GetBlock()->InsertInstructionBefore(move, user);
-  } else {
-    move = previous->AsParallelMove();
-  }
-  DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition());
-  AddMove(move, source, destination, nullptr, input->GetType());
-}
-
-static bool IsInstructionStart(size_t position) {
-  return (position & 1) == 0;
-}
-
-static bool IsInstructionEnd(size_t position) {
-  return (position & 1) == 1;
-}
-
-void RegisterAllocator::InsertParallelMoveAt(size_t position,
-                                             HInstruction* instruction,
-                                             Location source,
-                                             Location destination) const {
-  DCHECK(IsValidDestination(destination)) << destination;
-  if (source.Equals(destination)) return;
-
-  HInstruction* at = liveness_.GetInstructionFromPosition(position / 2);
-  HParallelMove* move;
-  if (at == nullptr) {
-    if (IsInstructionStart(position)) {
-      // Block boundary, don't do anything the connection of split siblings will handle it.
-      return;
-    } else {
-      // Move must happen before the first instruction of the block.
-      at = liveness_.GetInstructionFromPosition((position + 1) / 2);
-      // Note that parallel moves may have already been inserted, so we explicitly
-      // ask for the first instruction of the block: `GetInstructionFromPosition` does
-      // not contain the `HParallelMove` instructions.
-      at = at->GetBlock()->GetFirstInstruction();
-
-      if (at->GetLifetimePosition() < position) {
-        // We may insert moves for split siblings and phi spills at the beginning of the block.
-        // Since this is a different lifetime position, we need to go to the next instruction.
-        DCHECK(at->IsParallelMove());
-        at = at->GetNext();
-      }
-
-      if (at->GetLifetimePosition() != position) {
-        DCHECK_GT(at->GetLifetimePosition(), position);
-        move = new (allocator_) HParallelMove(allocator_);
-        move->SetLifetimePosition(position);
-        at->GetBlock()->InsertInstructionBefore(move, at);
-      } else {
-        DCHECK(at->IsParallelMove());
-        move = at->AsParallelMove();
-      }
-    }
-  } else if (IsInstructionEnd(position)) {
-    // Move must happen after the instruction.
-    DCHECK(!at->IsControlFlow());
-    move = at->GetNext()->AsParallelMove();
-    // This is a parallel move for connecting siblings in a same block. We need to
-    // differentiate it with moves for connecting blocks, and input moves.
-    if (move == nullptr || move->GetLifetimePosition() > position) {
-      move = new (allocator_) HParallelMove(allocator_);
-      move->SetLifetimePosition(position);
-      at->GetBlock()->InsertInstructionBefore(move, at->GetNext());
-    }
-  } else {
-    // Move must happen before the instruction.
-    HInstruction* previous = at->GetPrevious();
-    if (previous == nullptr
-        || !previous->IsParallelMove()
-        || previous->GetLifetimePosition() != position) {
-      // If the previous is a parallel move, then its position must be lower
-      // than the given `position`: it was added just after the non-parallel
-      // move instruction that precedes `instruction`.
-      DCHECK(previous == nullptr
-             || !previous->IsParallelMove()
-             || previous->GetLifetimePosition() < position);
-      move = new (allocator_) HParallelMove(allocator_);
-      move->SetLifetimePosition(position);
-      at->GetBlock()->InsertInstructionBefore(move, at);
-    } else {
-      move = previous->AsParallelMove();
-    }
-  }
-  DCHECK_EQ(move->GetLifetimePosition(), position);
-  AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block,
-                                                   HInstruction* instruction,
-                                                   Location source,
-                                                   Location destination) const {
-  DCHECK(IsValidDestination(destination)) << destination;
-  if (source.Equals(destination)) return;
-
-  DCHECK_EQ(block->NumberOfNormalSuccessors(), 1u);
-  HInstruction* last = block->GetLastInstruction();
-  // We insert moves at exit for phi predecessors and connecting blocks.
-  // A block ending with an if or a packed switch cannot branch to a block
-  // with phis because we do not allow critical edges. It can also not connect
-  // a split interval between two blocks: the move has to happen in the successor.
-  DCHECK(!last->IsIf() && !last->IsPackedSwitch());
-  HInstruction* previous = last->GetPrevious();
-  HParallelMove* move;
-  // This is a parallel move for connecting blocks. We need to differentiate
-  // it with moves for connecting siblings in a same block, and output moves.
-  size_t position = last->GetLifetimePosition();
-  if (previous == nullptr || !previous->IsParallelMove()
-      || previous->AsParallelMove()->GetLifetimePosition() != position) {
-    move = new (allocator_) HParallelMove(allocator_);
-    move->SetLifetimePosition(position);
-    block->InsertInstructionBefore(move, last);
-  } else {
-    move = previous->AsParallelMove();
-  }
-  AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block,
-                                                    HInstruction* instruction,
-                                                    Location source,
-                                                    Location destination) const {
-  DCHECK(IsValidDestination(destination)) << destination;
-  if (source.Equals(destination)) return;
-
-  HInstruction* first = block->GetFirstInstruction();
-  HParallelMove* move = first->AsParallelMove();
-  size_t position = block->GetLifetimeStart();
-  // This is a parallel move for connecting blocks. We need to differentiate
-  // it with moves for connecting siblings in a same block, and input moves.
-  if (move == nullptr || move->GetLifetimePosition() != position) {
-    move = new (allocator_) HParallelMove(allocator_);
-    move->SetLifetimePosition(position);
-    block->InsertInstructionBefore(move, first);
-  }
-  AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::InsertMoveAfter(HInstruction* instruction,
-                                        Location source,
-                                        Location destination) const {
-  DCHECK(IsValidDestination(destination)) << destination;
-  if (source.Equals(destination)) return;
-
-  if (instruction->IsPhi()) {
-    InsertParallelMoveAtEntryOf(instruction->GetBlock(), instruction, source, destination);
-    return;
-  }
-
-  size_t position = instruction->GetLifetimePosition() + 1;
-  HParallelMove* move = instruction->GetNext()->AsParallelMove();
-  // This is a parallel move for moving the output of an instruction. We need
-  // to differentiate with input moves, moves for connecting siblings in a
-  // and moves for connecting blocks.
-  if (move == nullptr || move->GetLifetimePosition() != position) {
-    move = new (allocator_) HParallelMove(allocator_);
-    move->SetLifetimePosition(position);
-    instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext());
-  }
-  AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
-  LiveInterval* current = interval;
-  if (current->HasSpillSlot()
-      && current->HasRegister()
-      // Currently, we spill unconditionnally the current method in the code generators.
-      && !interval->GetDefinedBy()->IsCurrentMethod()) {
-    // We spill eagerly, so move must be at definition.
-    InsertMoveAfter(interval->GetDefinedBy(),
-                    interval->ToLocation(),
-                    interval->NeedsTwoSpillSlots()
-                        ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
-                        : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
-  }
-  UsePosition* use = current->GetFirstUse();
-  UsePosition* env_use = current->GetFirstEnvironmentUse();
-
-  // Walk over all siblings, updating locations of use positions, and
-  // connecting them when they are adjacent.
-  do {
-    Location source = current->ToLocation();
-
-    // Walk over all uses covered by this interval, and update the location
-    // information.
-
-    LiveRange* range = current->GetFirstRange();
-    while (range != nullptr) {
-      while (use != nullptr && use->GetPosition() < range->GetStart()) {
-        DCHECK(use->IsSynthesized());
-        use = use->GetNext();
-      }
-      while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
-        DCHECK(!use->GetIsEnvironment());
-        DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
-        if (!use->IsSynthesized()) {
-          LocationSummary* locations = use->GetUser()->GetLocations();
-          Location expected_location = locations->InAt(use->GetInputIndex());
-          // The expected (actual) location may be invalid in case the input is unused. Currently
-          // this only happens for intrinsics.
-          if (expected_location.IsValid()) {
-            if (expected_location.IsUnallocated()) {
-              locations->SetInAt(use->GetInputIndex(), source);
-            } else if (!expected_location.IsConstant()) {
-              AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
-            }
-          } else {
-            DCHECK(use->GetUser()->IsInvoke());
-            DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
-          }
-        }
-        use = use->GetNext();
-      }
-
-      // Walk over the environment uses, and update their locations.
-      while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) {
-        env_use = env_use->GetNext();
-      }
-
-      while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
-        DCHECK(current->CoversSlow(env_use->GetPosition())
-               || (env_use->GetPosition() == range->GetEnd()));
-        HEnvironment* environment = env_use->GetEnvironment();
-        environment->SetLocationAt(env_use->GetInputIndex(), source);
-        env_use = env_use->GetNext();
-      }
-
-      range = range->GetNext();
-    }
-
-    // If the next interval starts just after this one, and has a register,
-    // insert a move.
-    LiveInterval* next_sibling = current->GetNextSibling();
-    if (next_sibling != nullptr
-        && next_sibling->HasRegister()
-        && current->GetEnd() == next_sibling->GetStart()) {
-      Location destination = next_sibling->ToLocation();
-      InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination);
-    }
-
-    for (SafepointPosition* safepoint_position = current->GetFirstSafepoint();
-         safepoint_position != nullptr;
-         safepoint_position = safepoint_position->GetNext()) {
-      DCHECK(current->CoversSlow(safepoint_position->GetPosition()));
-
-      LocationSummary* locations = safepoint_position->GetLocations();
-      if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
-        locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
-      }
-
-      switch (source.GetKind()) {
-        case Location::kRegister: {
-          locations->AddLiveRegister(source);
-          if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) {
-            DCHECK_LE(locations->GetNumberOfLiveRegisters(),
-                      maximum_number_of_live_core_registers_ +
-                      maximum_number_of_live_fp_registers_);
-          }
-          if (current->GetType() == Primitive::kPrimNot) {
-            locations->SetRegisterBit(source.reg());
-          }
-          break;
-        }
-        case Location::kFpuRegister: {
-          locations->AddLiveRegister(source);
-          break;
-        }
-
-        case Location::kRegisterPair:
-        case Location::kFpuRegisterPair: {
-          locations->AddLiveRegister(source.ToLow());
-          locations->AddLiveRegister(source.ToHigh());
-          break;
-        }
-        case Location::kStackSlot:  // Fall-through
-        case Location::kDoubleStackSlot:  // Fall-through
-        case Location::kConstant: {
-          // Nothing to do.
-          break;
-        }
-        default: {
-          LOG(FATAL) << "Unexpected location for object";
-        }
-      }
-    }
-    current = next_sibling;
-  } while (current != nullptr);
-
-  if (kIsDebugBuild) {
-    // Following uses can only be synthesized uses.
-    while (use != nullptr) {
-      DCHECK(use->IsSynthesized());
-      use = use->GetNext();
-    }
-  }
-}
-
-void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
-                                             HBasicBlock* from,
-                                             HBasicBlock* to) const {
-  if (interval->GetNextSibling() == nullptr) {
-    // Nothing to connect. The whole range was allocated to the same location.
-    return;
-  }
-
-  // Find the intervals that cover `from` and `to`.
-  LiveInterval* destination = interval->GetSiblingAt(to->GetLifetimeStart());
-  LiveInterval* source = interval->GetSiblingAt(from->GetLifetimeEnd() - 1);
-
-  if (destination == source) {
-    // Interval was not split.
-    return;
-  }
-  DCHECK(destination != nullptr && source != nullptr);
-
-  if (!destination->HasRegister()) {
-    // Values are eagerly spilled. Spill slot already contains appropriate value.
-    return;
-  }
-
-  // If `from` has only one successor, we can put the moves at the exit of it. Otherwise
-  // we need to put the moves at the entry of `to`.
-  if (from->NumberOfNormalSuccessors() == 1) {
-    InsertParallelMoveAtExitOf(from,
-                               interval->GetParent()->GetDefinedBy(),
-                               source->ToLocation(),
-                               destination->ToLocation());
-  } else {
-    DCHECK_EQ(to->GetPredecessors().size(), 1u);
-    InsertParallelMoveAtEntryOf(to,
-                                interval->GetParent()->GetDefinedBy(),
-                                source->ToLocation(),
-                                destination->ToLocation());
-  }
-}
-
-void RegisterAllocator::Resolve() {
-  codegen_->InitializeCodeGeneration(GetNumberOfSpillSlots(),
-                                     maximum_number_of_live_core_registers_,
-                                     maximum_number_of_live_fp_registers_,
-                                     reserved_out_slots_,
-                                     codegen_->GetGraph()->GetLinearOrder());
-
-  // Adjust the Out Location of instructions.
-  // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
-  for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
-    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
-    LiveInterval* current = instruction->GetLiveInterval();
-    LocationSummary* locations = instruction->GetLocations();
-    Location location = locations->Out();
-    if (instruction->IsParameterValue()) {
-      // Now that we know the frame size, adjust the parameter's location.
-      if (location.IsStackSlot()) {
-        location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
-        current->SetSpillSlot(location.GetStackIndex());
-        locations->UpdateOut(location);
-      } else if (location.IsDoubleStackSlot()) {
-        location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
-        current->SetSpillSlot(location.GetStackIndex());
-        locations->UpdateOut(location);
-      } else if (current->HasSpillSlot()) {
-        current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize());
-      }
-    } else if (instruction->IsCurrentMethod()) {
-      // The current method is always at offset 0.
-      DCHECK(!current->HasSpillSlot() || (current->GetSpillSlot() == 0));
-    } else if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
-      DCHECK(current->HasSpillSlot());
-      size_t slot = current->GetSpillSlot()
-                    + GetNumberOfSpillSlots()
-                    + reserved_out_slots_
-                    - catch_phi_spill_slots_;
-      current->SetSpillSlot(slot * kVRegSize);
-    } else if (current->HasSpillSlot()) {
-      // Adjust the stack slot, now that we know the number of them for each type.
-      // The way this implementation lays out the stack is the following:
-      // [parameter slots       ]
-      // [catch phi spill slots ]
-      // [double spill slots    ]
-      // [long spill slots      ]
-      // [float spill slots     ]
-      // [int/ref values        ]
-      // [maximum out values    ] (number of arguments for calls)
-      // [art method            ].
-      size_t slot = current->GetSpillSlot();
-      switch (current->GetType()) {
-        case Primitive::kPrimDouble:
-          slot += long_spill_slots_.size();
-          FALLTHROUGH_INTENDED;
-        case Primitive::kPrimLong:
-          slot += float_spill_slots_.size();
-          FALLTHROUGH_INTENDED;
-        case Primitive::kPrimFloat:
-          slot += int_spill_slots_.size();
-          FALLTHROUGH_INTENDED;
-        case Primitive::kPrimNot:
-        case Primitive::kPrimInt:
-        case Primitive::kPrimChar:
-        case Primitive::kPrimByte:
-        case Primitive::kPrimBoolean:
-        case Primitive::kPrimShort:
-          slot += reserved_out_slots_;
-          break;
-        case Primitive::kPrimVoid:
-          LOG(FATAL) << "Unexpected type for interval " << current->GetType();
-      }
-      current->SetSpillSlot(slot * kVRegSize);
-    }
-
-    Location source = current->ToLocation();
-
-    if (location.IsUnallocated()) {
-      if (location.GetPolicy() == Location::kSameAsFirstInput) {
-        if (locations->InAt(0).IsUnallocated()) {
-          locations->SetInAt(0, source);
-        } else {
-          DCHECK(locations->InAt(0).Equals(source));
-        }
-      }
-      locations->UpdateOut(source);
-    } else {
-      DCHECK(source.Equals(location));
-    }
-  }
-
-  // Connect siblings.
-  for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
-    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
-    ConnectSiblings(instruction->GetLiveInterval());
-  }
-
-  // Resolve non-linear control flow across branches. Order does not matter.
-  for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-    if (block->IsCatchBlock()) {
-      // Instructions live at the top of catch blocks were forced to spill.
-      if (kIsDebugBuild) {
-        BitVector* live = liveness_.GetLiveInSet(*block);
-        for (uint32_t idx : live->Indexes()) {
-          LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
-          DCHECK(!interval->GetSiblingAt(block->GetLifetimeStart())->HasRegister());
-        }
-      }
-    } else {
-      BitVector* live = liveness_.GetLiveInSet(*block);
-      for (uint32_t idx : live->Indexes()) {
-        LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
-        for (HBasicBlock* predecessor : block->GetPredecessors()) {
-          ConnectSplitSiblings(interval, predecessor, block);
-        }
-      }
-    }
-  }
-
-  // Resolve phi inputs. Order does not matter.
-  for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
-    HBasicBlock* current = it.Current();
-    if (current->IsCatchBlock()) {
-      // Catch phi values are set at runtime by the exception delivery mechanism.
-    } else {
-      for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
-        HInstruction* phi = inst_it.Current();
-        for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) {
-          HBasicBlock* predecessor = current->GetPredecessors()[i];
-          DCHECK_EQ(predecessor->NumberOfNormalSuccessors(), 1u);
-          HInstruction* input = phi->InputAt(i);
-          Location source = input->GetLiveInterval()->GetLocationAt(
-              predecessor->GetLifetimeEnd() - 1);
-          Location destination = phi->GetLiveInterval()->ToLocation();
-          InsertParallelMoveAtExitOf(predecessor, phi, source, destination);
-        }
-      }
-    }
-  }
-
-  // Assign temp locations.
-  for (LiveInterval* temp : temp_intervals_) {
-    if (temp->IsHighInterval()) {
-      // High intervals can be skipped, they are already handled by the low interval.
-      continue;
-    }
-    HInstruction* at = liveness_.GetTempUser(temp);
-    size_t temp_index = liveness_.GetTempIndex(temp);
-    LocationSummary* locations = at->GetLocations();
-    switch (temp->GetType()) {
-      case Primitive::kPrimInt:
-        locations->SetTempAt(temp_index, Location::RegisterLocation(temp->GetRegister()));
-        break;
-
-      case Primitive::kPrimDouble:
-        if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
-          Location location = Location::FpuRegisterPairLocation(
-              temp->GetRegister(), temp->GetHighInterval()->GetRegister());
-          locations->SetTempAt(temp_index, location);
-        } else {
-          locations->SetTempAt(temp_index, Location::FpuRegisterLocation(temp->GetRegister()));
-        }
-        break;
-
-      default:
-        LOG(FATAL) << "Unexpected type for temporary location "
-                   << temp->GetType();
-    }
-  }
-}
-
 }  // namespace art
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 58600b7..7e1fff8 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
 
 #include "arch/instruction_set.h"
 #include "base/arena_containers.h"
+#include "base/arena_object.h"
 #include "base/macros.h"
 #include "primitive.h"
 
@@ -29,36 +30,41 @@
 class HGraph;
 class HInstruction;
 class HParallelMove;
-class HPhi;
 class LiveInterval;
 class Location;
 class SsaLivenessAnalysis;
 
 /**
- * An implementation of a linear scan register allocator on an `HGraph` with SSA form.
+ * Base class for any register allocator.
  */
-class RegisterAllocator {
+class RegisterAllocator : public ArenaObject<kArenaAllocRegisterAllocator> {
  public:
-  RegisterAllocator(ArenaAllocator* allocator,
-                    CodeGenerator* codegen,
-                    const SsaLivenessAnalysis& analysis);
+  enum Strategy {
+    kRegisterAllocatorLinearScan,
+    kRegisterAllocatorGraphColor
+  };
+
+  static constexpr Strategy kRegisterAllocatorDefault = kRegisterAllocatorLinearScan;
+
+  static RegisterAllocator* Create(ArenaAllocator* allocator,
+                                   CodeGenerator* codegen,
+                                   const SsaLivenessAnalysis& analysis,
+                                   Strategy strategy = kRegisterAllocatorDefault);
+
+  virtual ~RegisterAllocator() = default;
 
   // Main entry point for the register allocator. Given the liveness analysis,
   // allocates registers to live intervals.
-  void AllocateRegisters();
+  virtual void AllocateRegisters() = 0;
 
   // Validate that the register allocator did not allocate the same register to
-  // intervals that intersect each other. Returns false if it did not.
-  bool Validate(bool log_fatal_on_failure) {
-    processing_core_registers_ = true;
-    if (!ValidateInternal(log_fatal_on_failure)) {
-      return false;
-    }
-    processing_core_registers_ = false;
-    return ValidateInternal(log_fatal_on_failure);
-  }
+  // intervals that intersect each other. Returns false if it failed.
+  virtual bool Validate(bool log_fatal_on_failure) = 0;
 
-  // Helper method for validation. Used by unit testing.
+  static bool CanAllocateRegistersFor(const HGraph& graph,
+                                      InstructionSet instruction_set);
+
+  // Verifies that live intervals do not conflict. Used by unit testing.
   static bool ValidateIntervals(const ArenaVector<LiveInterval*>& intervals,
                                 size_t number_of_spill_slots,
                                 size_t number_of_out_slots,
@@ -67,178 +73,25 @@
                                 bool processing_core_registers,
                                 bool log_fatal_on_failure);
 
-  static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set);
-
-  size_t GetNumberOfSpillSlots() const {
-    return int_spill_slots_.size()
-        + long_spill_slots_.size()
-        + float_spill_slots_.size()
-        + double_spill_slots_.size()
-        + catch_phi_spill_slots_;
-  }
-
   static constexpr const char* kRegisterAllocatorPassName = "register";
 
- private:
-  // Main methods of the allocator.
-  void LinearScan();
-  bool TryAllocateFreeReg(LiveInterval* interval);
-  bool AllocateBlockedReg(LiveInterval* interval);
-  void Resolve();
-
-  // Add `interval` in the given sorted list.
-  static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval);
+ protected:
+  RegisterAllocator(ArenaAllocator* allocator,
+                    CodeGenerator* codegen,
+                    const SsaLivenessAnalysis& analysis);
 
   // Split `interval` at the position `position`. The new interval starts at `position`.
-  LiveInterval* Split(LiveInterval* interval, size_t position);
+  // If `position` is at the start of `interval`, returns `interval` with its
+  // register location(s) cleared.
+  static LiveInterval* Split(LiveInterval* interval, size_t position);
 
   // Split `interval` at a position between `from` and `to`. The method will try
   // to find an optimal split position.
   LiveInterval* SplitBetween(LiveInterval* interval, size_t from, size_t to);
 
-  // Returns whether `reg` is blocked by the code generator.
-  bool IsBlocked(int reg) const;
-
-  // Update the interval for the register in `location` to cover [start, end).
-  void BlockRegister(Location location, size_t start, size_t end);
-  void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
-
-  // Allocate a spill slot for the given interval. Should be called in linear
-  // order of interval starting positions.
-  void AllocateSpillSlotFor(LiveInterval* interval);
-
-  // Allocate a spill slot for the given catch phi. Will allocate the same slot
-  // for phis which share the same vreg. Must be called in reverse linear order
-  // of lifetime positions and ascending vreg numbers for correctness.
-  void AllocateSpillSlotForCatchPhi(HPhi* phi);
-
-  // Connect adjacent siblings within blocks.
-  void ConnectSiblings(LiveInterval* interval);
-
-  // Connect siblings between block entries and exits.
-  void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const;
-
-  // Helper methods to insert parallel moves in the graph.
-  void InsertParallelMoveAtExitOf(HBasicBlock* block,
-                                  HInstruction* instruction,
-                                  Location source,
-                                  Location destination) const;
-  void InsertParallelMoveAtEntryOf(HBasicBlock* block,
-                                   HInstruction* instruction,
-                                   Location source,
-                                   Location destination) const;
-  void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const;
-  void AddInputMoveFor(HInstruction* input,
-                       HInstruction* user,
-                       Location source,
-                       Location destination) const;
-  void InsertParallelMoveAt(size_t position,
-                            HInstruction* instruction,
-                            Location source,
-                            Location destination) const;
-
-  void AddMove(HParallelMove* move,
-               Location source,
-               Location destination,
-               HInstruction* instruction,
-               Primitive::Type type) const;
-
-  // Helper methods.
-  void AllocateRegistersInternal();
-  void ProcessInstruction(HInstruction* instruction);
-  bool ValidateInternal(bool log_fatal_on_failure) const;
-  void DumpInterval(std::ostream& stream, LiveInterval* interval) const;
-  void DumpAllIntervals(std::ostream& stream) const;
-  int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const;
-  int FindAvailableRegister(size_t* next_use, LiveInterval* current) const;
-  bool IsCallerSaveRegister(int reg) const;
-
-  // Try splitting an active non-pair or unaligned pair interval at the given `position`.
-  // Returns whether it was successful at finding such an interval.
-  bool TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
-                                                size_t first_register_use,
-                                                size_t* next_use);
-
   ArenaAllocator* const allocator_;
   CodeGenerator* const codegen_;
   const SsaLivenessAnalysis& liveness_;
-
-  // List of intervals for core registers that must be processed, ordered by start
-  // position. Last entry is the interval that has the lowest start position.
-  // This list is initially populated before doing the linear scan.
-  ArenaVector<LiveInterval*> unhandled_core_intervals_;
-
-  // List of intervals for floating-point registers. Same comments as above.
-  ArenaVector<LiveInterval*> unhandled_fp_intervals_;
-
-  // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_`
-  // or `unhandled_fp_intervals_`.
-  ArenaVector<LiveInterval*>* unhandled_;
-
-  // List of intervals that have been processed.
-  ArenaVector<LiveInterval*> handled_;
-
-  // List of intervals that are currently active when processing a new live interval.
-  // That is, they have a live range that spans the start of the new interval.
-  ArenaVector<LiveInterval*> active_;
-
-  // List of intervals that are currently inactive when processing a new live interval.
-  // That is, they have a lifetime hole that spans the start of the new interval.
-  ArenaVector<LiveInterval*> inactive_;
-
-  // Fixed intervals for physical registers. Such intervals cover the positions
-  // where an instruction requires a specific register.
-  ArenaVector<LiveInterval*> physical_core_register_intervals_;
-  ArenaVector<LiveInterval*> physical_fp_register_intervals_;
-
-  // Intervals for temporaries. Such intervals cover the positions
-  // where an instruction requires a temporary.
-  ArenaVector<LiveInterval*> temp_intervals_;
-
-  // The spill slots allocated for live intervals. We ensure spill slots
-  // are typed to avoid (1) doing moves and swaps between two different kinds
-  // of registers, and (2) swapping between a single stack slot and a double
-  // stack slot. This simplifies the parallel move resolver.
-  ArenaVector<size_t> int_spill_slots_;
-  ArenaVector<size_t> long_spill_slots_;
-  ArenaVector<size_t> float_spill_slots_;
-  ArenaVector<size_t> double_spill_slots_;
-
-  // Spill slots allocated to catch phis. This category is special-cased because
-  // (1) slots are allocated prior to linear scan and in reverse linear order,
-  // (2) equivalent phis need to share slots despite having different types.
-  size_t catch_phi_spill_slots_;
-
-  // Instructions that need a safepoint.
-  ArenaVector<HInstruction*> safepoints_;
-
-  // True if processing core registers. False if processing floating
-  // point registers.
-  bool processing_core_registers_;
-
-  // Number of registers for the current register kind (core or floating point).
-  size_t number_of_registers_;
-
-  // Temporary array, allocated ahead of time for simplicity.
-  size_t* registers_array_;
-
-  // Blocked registers, as decided by the code generator.
-  bool* const blocked_core_registers_;
-  bool* const blocked_fp_registers_;
-
-  // Slots reserved for out arguments.
-  size_t reserved_out_slots_;
-
-  // The maximum live core registers at safepoints.
-  size_t maximum_number_of_live_core_registers_;
-
-  // The maximum live FP registers at safepoints.
-  size_t maximum_number_of_live_fp_registers_;
-
-  ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
-  ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
-
-  DISALLOW_COPY_AND_ASSIGN(RegisterAllocator);
 };
 
 }  // namespace art
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
new file mode 100644
index 0000000..a21595f
--- /dev/null
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -0,0 +1,2105 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "register_allocator_graph_color.h"
+
+#include "code_generator.h"
+#include "register_allocation_resolver.h"
+#include "ssa_liveness_analysis.h"
+#include "thread-inl.h"
+
+namespace art {
+
+// Highest number of registers that we support for any platform. This can be used for std::bitset,
+// for example, which needs to know its size at compile time.
+static constexpr size_t kMaxNumRegs = 32;
+
+// The maximum number of graph coloring attempts before triggering a DCHECK.
+// This is meant to catch changes to the graph coloring algorithm that undermine its forward
+// progress guarantees. Forward progress for the algorithm means splitting live intervals on
+// every graph coloring attempt so that eventually the interference graph will be sparse enough
+// to color. The main threat to forward progress is trying to split short intervals which cannot be
+// split further; this could cause infinite looping because the interference graph would never
+// change. This is avoided by prioritizing short intervals before long ones, so that long
+// intervals are split when coloring fails.
+static constexpr size_t kMaxGraphColoringAttemptsDebug = 100;
+
+// We always want to avoid spilling inside loops.
+static constexpr size_t kLoopSpillWeightMultiplier = 10;
+
+// If we avoid moves in single jump blocks, we can avoid jumps to jumps.
+static constexpr size_t kSingleJumpBlockWeightMultiplier = 2;
+
+// We avoid moves in blocks that dominate the exit block, since these blocks will
+// be executed on every path through the method.
+static constexpr size_t kDominatesExitBlockWeightMultiplier = 2;
+
+enum class CoalesceKind {
+  kAdjacentSibling,       // Prevents moves at interval split points.
+  kFixedOutputSibling,    // Prevents moves from a fixed output location.
+  kFixedInput,            // Prevents moves into a fixed input location.
+  kNonlinearControlFlow,  // Prevents moves between blocks.
+  kPhi,                   // Prevents phi resolution moves.
+  kFirstInput,            // Prevents a single input move.
+  kAnyInput,              // May lead to better instruction selection / smaller encodings.
+};
+
+std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) {
+  return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind);
+}
+
+static size_t LoopDepthAt(HBasicBlock* block) {
+  HLoopInformation* loop_info = block->GetLoopInformation();
+  size_t depth = 0;
+  while (loop_info != nullptr) {
+    ++depth;
+    loop_info = loop_info->GetPreHeader()->GetLoopInformation();
+  }
+  return depth;
+}
+
+// Return the runtime cost of inserting a move instruction at the specified location.
+static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) {
+  HBasicBlock* block = liveness.GetBlockFromPosition(position / 2);
+  DCHECK(block != nullptr);
+  size_t cost = 1;
+  if (block->IsSingleJump()) {
+    cost *= kSingleJumpBlockWeightMultiplier;
+  }
+  if (block->Dominates(block->GetGraph()->GetExitBlock())) {
+    cost *= kDominatesExitBlockWeightMultiplier;
+  }
+  for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) {
+    cost *= kLoopSpillWeightMultiplier;
+  }
+  return cost;
+}
+
+// In general, we estimate coalesce priority by whether it will definitely avoid a move,
+// and by how likely it is to create an interference graph that's harder to color.
+static size_t ComputeCoalescePriority(CoalesceKind kind,
+                                      size_t position,
+                                      const SsaLivenessAnalysis& liveness) {
+  if (kind == CoalesceKind::kAnyInput) {
+    // This type of coalescing can affect instruction selection, but not moves, so we
+    // give it the lowest priority.
+    return 0;
+  } else {
+    return CostForMoveAt(position, liveness);
+  }
+}
+
+enum class CoalesceStage {
+  kWorklist,  // Currently in the iterative coalescing worklist.
+  kActive,    // Not in a worklist, but could be considered again during iterative coalescing.
+  kInactive,  // No longer considered until last-chance coalescing.
+  kDefunct,   // Either the two nodes interfere, or have already been coalesced.
+};
+
+std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) {
+  return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage);
+}
+
+// Represents a coalesce opportunity between two nodes.
+struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> {
+  CoalesceOpportunity(InterferenceNode* a,
+                      InterferenceNode* b,
+                      CoalesceKind kind,
+                      size_t position,
+                      const SsaLivenessAnalysis& liveness)
+        : node_a(a),
+          node_b(b),
+          stage(CoalesceStage::kWorklist),
+          priority(ComputeCoalescePriority(kind, position, liveness)) {}
+
+  // Compare two coalesce opportunities based on their priority.
+  // Return true if lhs has a lower priority than that of rhs.
+  static bool CmpPriority(const CoalesceOpportunity* lhs,
+                          const CoalesceOpportunity* rhs) {
+    return lhs->priority < rhs->priority;
+  }
+
+  InterferenceNode* const node_a;
+  InterferenceNode* const node_b;
+
+  // The current stage of this coalesce opportunity, indicating whether it is in a worklist,
+  // and whether it should still be considered.
+  CoalesceStage stage;
+
+  // The priority of this coalesce opportunity, based on heuristics.
+  const size_t priority;
+};
+
+enum class NodeStage {
+  kInitial,           // Uninitialized.
+  kPrecolored,        // Marks fixed nodes.
+  kSafepoint,         // Marks safepoint nodes.
+  kPrunable,          // Marks uncolored nodes in the interference graph.
+  kSimplifyWorklist,  // Marks non-move-related nodes with degree less than the number of registers.
+  kFreezeWorklist,    // Marks move-related nodes with degree less than the number of registers.
+  kSpillWorklist,     // Marks nodes with degree greater or equal to the number of registers.
+  kPruned             // Marks nodes already pruned from the interference graph.
+};
+
+std::ostream& operator<<(std::ostream& os, const NodeStage& stage) {
+  return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage);
+}
+
+// Returns the estimated cost of spilling a particular live interval.
+static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) {
+  if (interval->HasRegister()) {
+    // Intervals with a fixed register cannot be spilled.
+    return std::numeric_limits<float>::min();
+  }
+
+  size_t length = interval->GetLength();
+  if (length == 1) {
+    // Tiny intervals should have maximum priority, since they cannot be split any further.
+    return std::numeric_limits<float>::max();
+  }
+
+  size_t use_weight = 0;
+  if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) {
+    // Cost for spilling at a register definition point.
+    use_weight += CostForMoveAt(interval->GetStart() + 1, liveness);
+  }
+
+  UsePosition* use = interval->GetFirstUse();
+  while (use != nullptr && use->GetPosition() <= interval->GetStart()) {
+    // Skip uses before the start of this live interval.
+    use = use->GetNext();
+  }
+
+  while (use != nullptr && use->GetPosition() <= interval->GetEnd()) {
+    if (use->GetUser() != nullptr && use->RequiresRegister()) {
+      // Cost for spilling at a register use point.
+      use_weight += CostForMoveAt(use->GetUser()->GetLifetimePosition() - 1, liveness);
+    }
+    use = use->GetNext();
+  }
+
+  // We divide by the length of the interval because we want to prioritize
+  // short intervals; we do not benefit much if we split them further.
+  return static_cast<float>(use_weight) / static_cast<float>(length);
+}
+
+// Interference nodes make up the interference graph, which is the primary data structure in
+// graph coloring register allocation. Each node represents a single live interval, and contains
+// a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory,
+// pre-colored nodes never contain outgoing edges (only incoming ones).
+//
+// As nodes are pruned from the interference graph, incoming edges of the pruned node are removed,
+// but outgoing edges remain in order to later color the node based on the colors of its neighbors.
+//
+// Note that a pair interval is represented by a single node in the interference graph, which
+// essentially requires two colors. One consequence of this is that the degree of a node is not
+// necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum
+// number of colors with which a node could interfere. We model this by giving edges different
+// weights (1 or 2) to control how much it increases the degree of adjacent nodes.
+// For example, the edge between two single nodes will have weight 1. On the other hand,
+// the edge between a single node and a pair node will have weight 2. This is because the pair
+// node could block up to two colors for the single node, and because the single node could
+// block an entire two-register aligned slot for the pair node.
+// The degree is defined this way because we use it to decide whether a node is guaranteed a color,
+// and thus whether it is safe to prune it from the interference graph early on.
+class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
+ public:
+  InterferenceNode(ArenaAllocator* allocator,
+                   LiveInterval* interval,
+                   const SsaLivenessAnalysis& liveness)
+        : stage(NodeStage::kInitial),
+          interval_(interval),
+          adjacent_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+          coalesce_opportunities_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+          out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0),
+          alias_(this),
+          spill_weight_(ComputeSpillWeight(interval, liveness)),
+          requires_color_(interval->RequiresRegister()),
+          needs_spill_slot_(false) {
+    DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval";
+  }
+
+  void AddInterference(InterferenceNode* other, bool guaranteed_not_interfering_yet) {
+    DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences";
+    DCHECK_NE(this, other) << "Should not create self loops in the interference graph";
+    DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another";
+    DCHECK_NE(stage, NodeStage::kPruned);
+    DCHECK_NE(other->stage, NodeStage::kPruned);
+    if (guaranteed_not_interfering_yet) {
+      DCHECK(std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other)
+             == adjacent_nodes_.end());
+      adjacent_nodes_.push_back(other);
+      out_degree_ += EdgeWeightWith(other);
+    } else {
+      auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
+      if (it == adjacent_nodes_.end()) {
+        adjacent_nodes_.push_back(other);
+        out_degree_ += EdgeWeightWith(other);
+      }
+    }
+  }
+
+  void RemoveInterference(InterferenceNode* other) {
+    DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node";
+    DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning";
+    auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
+    if (it != adjacent_nodes_.end()) {
+      adjacent_nodes_.erase(it);
+      out_degree_ -= EdgeWeightWith(other);
+    }
+  }
+
+  bool ContainsInterference(InterferenceNode* other) const {
+    DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences";
+    DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences";
+    auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
+    return it != adjacent_nodes_.end();
+  }
+
+  LiveInterval* GetInterval() const {
+    return interval_;
+  }
+
+  const ArenaVector<InterferenceNode*>& GetAdjacentNodes() const {
+    return adjacent_nodes_;
+  }
+
+  size_t GetOutDegree() const {
+    // Pre-colored nodes have infinite degree.
+    DCHECK(!IsPrecolored() || out_degree_ == std::numeric_limits<size_t>::max());
+    return out_degree_;
+  }
+
+  void AddCoalesceOpportunity(CoalesceOpportunity* opportunity) {
+    coalesce_opportunities_.push_back(opportunity);
+  }
+
+  void ClearCoalesceOpportunities() {
+    coalesce_opportunities_.clear();
+  }
+
+  bool IsMoveRelated() const {
+    for (CoalesceOpportunity* opportunity : coalesce_opportunities_) {
+      if (opportunity->stage == CoalesceStage::kWorklist ||
+          opportunity->stage == CoalesceStage::kActive) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // Return whether this node already has a color.
+  // Used to find fixed nodes in the interference graph before coloring.
+  bool IsPrecolored() const {
+    return interval_->HasRegister();
+  }
+
+  bool IsPair() const {
+    return interval_->HasHighInterval();
+  }
+
+  void SetAlias(InterferenceNode* rep) {
+    DCHECK_NE(rep->stage, NodeStage::kPruned);
+    DCHECK_EQ(this, alias_) << "Should only set a node's alias once";
+    alias_ = rep;
+  }
+
+  InterferenceNode* GetAlias() {
+    if (alias_ != this) {
+      // Recurse in order to flatten tree of alias pointers.
+      alias_ = alias_->GetAlias();
+    }
+    return alias_;
+  }
+
+  const ArenaVector<CoalesceOpportunity*>& GetCoalesceOpportunities() const {
+    return coalesce_opportunities_;
+  }
+
+  float GetSpillWeight() const {
+    return spill_weight_;
+  }
+
+  bool RequiresColor() const {
+    return requires_color_;
+  }
+
+  // We give extra weight to edges adjacent to pair nodes. See the general comment on the
+  // interference graph above.
+  size_t EdgeWeightWith(const InterferenceNode* other) const {
+    return (IsPair() || other->IsPair()) ? 2 : 1;
+  }
+
+  bool NeedsSpillSlot() const {
+    return needs_spill_slot_;
+  }
+
+  void SetNeedsSpillSlot() {
+    needs_spill_slot_ = true;
+  }
+
+  // The current stage of this node, indicating which worklist it belongs to.
+  NodeStage stage;
+
+ private:
+  // The live interval that this node represents.
+  LiveInterval* const interval_;
+
+  // All nodes interfering with this one.
+  // We use an unsorted vector as a set, since a tree or hash set is too heavy for the
+  // set sizes that we encounter. Using a vector leads to much better performance.
+  ArenaVector<InterferenceNode*> adjacent_nodes_;
+
+  // Interference nodes that this node should be coalesced with to reduce moves.
+  ArenaVector<CoalesceOpportunity*> coalesce_opportunities_;
+
+  // The maximum number of colors with which this node could interfere. This could be more than
+  // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes.
+  // We use "out" degree because incoming edges come from nodes already pruned from the graph,
+  // and do not affect the coloring of this node.
+  // Pre-colored nodes are treated as having infinite degree.
+  size_t out_degree_;
+
+  // The node representing this node in the interference graph.
+  // Initially set to `this`, and only changed if this node is coalesced into another.
+  InterferenceNode* alias_;
+
+  // The cost of splitting and spilling this interval to the stack.
+  // Nodes with a higher spill weight should be prioritized when assigning registers.
+  // This is essentially based on use density and location; short intervals with many uses inside
+  // deeply nested loops have a high spill weight.
+  const float spill_weight_;
+
+  const bool requires_color_;
+
+  bool needs_spill_slot_;
+
+  DISALLOW_COPY_AND_ASSIGN(InterferenceNode);
+};
+
+// The order in which we color nodes is important. To guarantee forward progress,
+// we prioritize intervals that require registers, and after that we prioritize
+// short intervals. That way, if we fail to color a node, it either won't require a
+// register, or it will be a long interval that can be split in order to make the
+// interference graph sparser.
+// To improve code quality, we prioritize intervals used frequently in deeply nested loops.
+// (This metric is secondary to the forward progress requirements above.)
+// TODO: May also want to consider:
+// - Constants (since they can be rematerialized)
+// - Allocated spill slots
+static bool HasGreaterNodePriority(const InterferenceNode* lhs,
+                                   const InterferenceNode* rhs) {
+  // (1) Prioritize the node that requires a color.
+  if (lhs->RequiresColor() != rhs->RequiresColor()) {
+    return lhs->RequiresColor();
+  }
+
+  // (2) Prioritize the interval that has a higher spill weight.
+  return lhs->GetSpillWeight() > rhs->GetSpillWeight();
+}
+
+// A ColoringIteration holds the many data structures needed for a single graph coloring attempt,
+// and provides methods for each phase of the attempt.
+class ColoringIteration {
+ public:
+  ColoringIteration(RegisterAllocatorGraphColor* register_allocator,
+                    ArenaAllocator* allocator,
+                    bool processing_core_regs,
+                    size_t num_regs)
+        : register_allocator_(register_allocator),
+          allocator_(allocator),
+          processing_core_regs_(processing_core_regs),
+          num_regs_(num_regs),
+          interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+          prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+          pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+          simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+          freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+          spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)),
+          coalesce_worklist_(CoalesceOpportunity::CmpPriority,
+                             allocator->Adapter(kArenaAllocRegisterAllocator)) {}
+
+  // Use the intervals collected from instructions to construct an
+  // interference graph mapping intervals to adjacency lists.
+  // Also, collect synthesized safepoint nodes, used to keep
+  // track of live intervals across safepoints.
+  // TODO: Should build safepoints elsewhere.
+  void BuildInterferenceGraph(const ArenaVector<LiveInterval*>& intervals,
+                              const ArenaVector<InterferenceNode*>& physical_nodes,
+                              ArenaVector<InterferenceNode*>* safepoints);
+
+  // Add coalesce opportunities to interference nodes.
+  void FindCoalesceOpportunities();
+
+  // Prune nodes from the interference graph to be colored later. Build
+  // a stack (pruned_nodes) containing these intervals in an order determined
+  // by various heuristics.
+  void PruneInterferenceGraph();
+
+  // Process pruned_intervals_ to color the interference graph, spilling when
+  // necessary. Returns true if successful. Else, some intervals have been
+  // split, and the interference graph should be rebuilt for another attempt.
+  bool ColorInterferenceGraph();
+
+  // Return prunable nodes.
+  // The register allocator will need to access prunable nodes after coloring
+  // in order to tell the code generator which registers have been assigned.
+  const ArenaVector<InterferenceNode*>& GetPrunableNodes() const {
+    return prunable_nodes_;
+  }
+
+ private:
+  // Create a coalesce opportunity between two nodes.
+  void CreateCoalesceOpportunity(InterferenceNode* a,
+                                 InterferenceNode* b,
+                                 CoalesceKind kind,
+                                 size_t position);
+
+  // Add an edge in the interference graph, if valid.
+  // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion
+  // when possible.
+  void AddPotentialInterference(InterferenceNode* from,
+                                InterferenceNode* to,
+                                bool guaranteed_not_interfering_yet,
+                                bool both_directions = true);
+
+  // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors)
+  // may be pruned from the interference graph.
+  void FreezeMoves(InterferenceNode* node);
+
+  // Prune a node from the interference graph, updating worklists if necessary.
+  void PruneNode(InterferenceNode* node);
+
+  // Add coalesce opportunities associated with this node to the coalesce worklist.
+  void EnableCoalesceOpportunities(InterferenceNode* node);
+
+  // If needed, from `node` from the freeze worklist to the simplify worklist.
+  void CheckTransitionFromFreezeWorklist(InterferenceNode* node);
+
+  // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively.
+  bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
+
+  // Return true if `from` and `into` are uncolored, and can be coalesced conservatively.
+  bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
+
+  void Coalesce(CoalesceOpportunity* opportunity);
+
+  // Merge `from` into `into` in the interference graph.
+  void Combine(InterferenceNode* from, InterferenceNode* into);
+
+  // A reference to the register allocator instance,
+  // needed to split intervals and assign spill slots.
+  RegisterAllocatorGraphColor* register_allocator_;
+
+  // An arena allocator used for a single graph coloring attempt.
+  ArenaAllocator* allocator_;
+
+  const bool processing_core_regs_;
+
+  const size_t num_regs_;
+
+  // A map from live intervals to interference nodes.
+  ArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_;
+
+  // Uncolored nodes that should be pruned from the interference graph.
+  ArenaVector<InterferenceNode*> prunable_nodes_;
+
+  // A stack of nodes pruned from the interference graph, waiting to be pruned.
+  ArenaStdStack<InterferenceNode*> pruned_nodes_;
+
+  // A queue containing low degree, non-move-related nodes that can pruned immediately.
+  ArenaDeque<InterferenceNode*> simplify_worklist_;
+
+  // A queue containing low degree, move-related nodes.
+  ArenaDeque<InterferenceNode*> freeze_worklist_;
+
+  // A queue containing high degree nodes.
+  // If we have to prune from the spill worklist, we cannot guarantee
+  // the pruned node a color, so we order the worklist by priority.
+  ArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_;
+
+  // A queue containing coalesce opportunities.
+  // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those
+  // inside of loops) are more important than others.
+  ArenaPriorityQueue<CoalesceOpportunity*,
+                     decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_;
+
+  DISALLOW_COPY_AND_ASSIGN(ColoringIteration);
+};
+
+static bool IsCoreInterval(LiveInterval* interval) {
+  return !Primitive::IsFloatingPointType(interval->GetType());
+}
+
+static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) {
+  return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize;
+}
+
+RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocator,
+                                                         CodeGenerator* codegen,
+                                                         const SsaLivenessAnalysis& liveness,
+                                                         bool iterative_move_coalescing)
+      : RegisterAllocator(allocator, codegen, liveness),
+        iterative_move_coalescing_(iterative_move_coalescing),
+        core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        num_int_spill_slots_(0),
+        num_double_spill_slots_(0),
+        num_float_spill_slots_(0),
+        num_long_spill_slots_(0),
+        catch_phi_spill_slot_counter_(0),
+        reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
+        reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()),
+        number_of_globally_blocked_core_regs_(0),
+        number_of_globally_blocked_fp_regs_(0),
+        max_safepoint_live_core_regs_(0),
+        max_safepoint_live_fp_regs_(0) {
+  // Before we ask for blocked registers, set them up in the code generator.
+  codegen->SetupBlockedRegisters();
+
+  // Initialize physical core register live intervals and blocked registers.
+  // This includes globally blocked registers, such as the stack pointer.
+  physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr);
+  for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
+    LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimInt);
+    physical_core_nodes_[i] =
+        new (allocator_) InterferenceNode(allocator_, interval, liveness);
+    physical_core_nodes_[i]->stage = NodeStage::kPrecolored;
+    core_intervals_.push_back(interval);
+    if (codegen_->IsBlockedCoreRegister(i)) {
+      ++number_of_globally_blocked_core_regs_;
+      interval->AddRange(0, liveness.GetMaxLifetimePosition());
+    }
+  }
+  // Initialize physical floating point register live intervals and blocked registers.
+  physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr);
+  for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+    LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimFloat);
+    physical_fp_nodes_[i] =
+        new (allocator_) InterferenceNode(allocator_, interval, liveness);
+    physical_fp_nodes_[i]->stage = NodeStage::kPrecolored;
+    fp_intervals_.push_back(interval);
+    if (codegen_->IsBlockedFloatingPointRegister(i)) {
+      ++number_of_globally_blocked_fp_regs_;
+      interval->AddRange(0, liveness.GetMaxLifetimePosition());
+    }
+  }
+}
+
+void RegisterAllocatorGraphColor::AllocateRegisters() {
+  // (1) Collect and prepare live intervals.
+  ProcessInstructions();
+
+  for (bool processing_core_regs : {true, false}) {
+    ArenaVector<LiveInterval*>& intervals = processing_core_regs
+        ? core_intervals_
+        : fp_intervals_;
+    size_t num_registers = processing_core_regs
+        ? codegen_->GetNumberOfCoreRegisters()
+        : codegen_->GetNumberOfFloatingPointRegisters();
+
+    size_t attempt = 0;
+    while (true) {
+      ++attempt;
+      DCHECK(attempt <= kMaxGraphColoringAttemptsDebug)
+          << "Exceeded debug max graph coloring register allocation attempts. "
+          << "This could indicate that the register allocator is not making forward progress, "
+          << "which could be caused by prioritizing the wrong live intervals. (Short intervals "
+          << "should be prioritized over long ones, because they cannot be split further.)";
+
+      // Many data structures are cleared between graph coloring attempts, so we reduce
+      // total memory usage by using a new arena allocator for each attempt.
+      ArenaAllocator coloring_attempt_allocator(allocator_->GetArenaPool());
+      ColoringIteration iteration(this,
+                                  &coloring_attempt_allocator,
+                                  processing_core_regs,
+                                  num_registers);
+
+      // (2) Build the interference graph. Also gather safepoints.
+      ArenaVector<InterferenceNode*> safepoints(
+          coloring_attempt_allocator.Adapter(kArenaAllocRegisterAllocator));
+      ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
+          ? physical_core_nodes_
+          : physical_fp_nodes_;
+      iteration.BuildInterferenceGraph(intervals, physical_nodes, &safepoints);
+
+      // (3) Add coalesce opportunities.
+      //     If we have tried coloring the graph a suspiciously high number of times, give
+      //     up on move coalescing, just in case the coalescing heuristics are not conservative.
+      //     (This situation will be caught if DCHECKs are turned on.)
+      if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) {
+        iteration.FindCoalesceOpportunities();
+      }
+
+      // (4) Prune all uncolored nodes from interference graph.
+      iteration.PruneInterferenceGraph();
+
+      // (5) Color pruned nodes based on interferences.
+      bool successful = iteration.ColorInterferenceGraph();
+
+      // We manually clear coalesce opportunities for physical nodes,
+      // since they persist across coloring attempts.
+      for (InterferenceNode* node : physical_core_nodes_) {
+        node->ClearCoalesceOpportunities();
+      }
+      for (InterferenceNode* node : physical_fp_nodes_) {
+        node->ClearCoalesceOpportunities();
+      }
+
+      if (successful) {
+        // Assign spill slots.
+        AllocateSpillSlots(iteration.GetPrunableNodes());
+
+        // Compute the maximum number of live registers across safepoints.
+        // Notice that we do not count globally blocked registers, such as the stack pointer.
+        if (safepoints.size() > 0) {
+          size_t max_safepoint_live_regs = ComputeMaxSafepointLiveRegisters(safepoints);
+          if (processing_core_regs) {
+            max_safepoint_live_core_regs_ =
+                max_safepoint_live_regs - number_of_globally_blocked_core_regs_;
+          } else {
+            max_safepoint_live_fp_regs_=
+                max_safepoint_live_regs - number_of_globally_blocked_fp_regs_;
+          }
+        }
+
+        // Tell the code generator which registers were allocated.
+        // We only look at prunable_nodes because we already told the code generator about
+        // fixed intervals while processing instructions. We also ignore the fixed intervals
+        // placed at the top of catch blocks.
+        for (InterferenceNode* node : iteration.GetPrunableNodes()) {
+          LiveInterval* interval = node->GetInterval();
+          if (interval->HasRegister()) {
+            Location low_reg = processing_core_regs
+                ? Location::RegisterLocation(interval->GetRegister())
+                : Location::FpuRegisterLocation(interval->GetRegister());
+            codegen_->AddAllocatedRegister(low_reg);
+            if (interval->HasHighInterval()) {
+              LiveInterval* high = interval->GetHighInterval();
+              DCHECK(high->HasRegister());
+              Location high_reg = processing_core_regs
+                  ? Location::RegisterLocation(high->GetRegister())
+                  : Location::FpuRegisterLocation(high->GetRegister());
+              codegen_->AddAllocatedRegister(high_reg);
+            }
+          } else {
+            DCHECK(!interval->HasHighInterval() || !interval->GetHighInterval()->HasRegister());
+          }
+        }
+
+        break;
+      }
+    }  // while unsuccessful
+  }  // for processing_core_instructions
+
+  // (6) Resolve locations and deconstruct SSA form.
+  RegisterAllocationResolver(allocator_, codegen_, liveness_)
+      .Resolve(max_safepoint_live_core_regs_,
+               max_safepoint_live_fp_regs_,
+               reserved_art_method_slots_ + reserved_out_slots_,
+               num_int_spill_slots_,
+               num_long_spill_slots_,
+               num_float_spill_slots_,
+               num_double_spill_slots_,
+               catch_phi_spill_slot_counter_,
+               temp_intervals_);
+
+  if (kIsDebugBuild) {
+    Validate(/*log_fatal_on_failure*/ true);
+  }
+}
+
+bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
+  for (bool processing_core_regs : {true, false}) {
+    ArenaVector<LiveInterval*> intervals(
+        allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
+    for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
+      HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+      LiveInterval* interval = instruction->GetLiveInterval();
+      if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) {
+        intervals.push_back(instruction->GetLiveInterval());
+      }
+    }
+
+    ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
+        ? physical_core_nodes_
+        : physical_fp_nodes_;
+    for (InterferenceNode* fixed : physical_nodes) {
+      LiveInterval* interval = fixed->GetInterval();
+      if (interval->GetFirstRange() != nullptr) {
+        // Ideally we would check fixed ranges as well, but currently there are times when
+        // two fixed intervals for the same register will overlap. For example, a fixed input
+        // and a fixed output may sometimes share the same register, in which there will be two
+        // fixed intervals for the same place.
+      }
+    }
+
+    for (LiveInterval* temp : temp_intervals_) {
+      if (IsCoreInterval(temp) == processing_core_regs) {
+        intervals.push_back(temp);
+      }
+    }
+
+    size_t spill_slots = num_int_spill_slots_
+                       + num_long_spill_slots_
+                       + num_float_spill_slots_
+                       + num_double_spill_slots_
+                       + catch_phi_spill_slot_counter_;
+    bool ok = ValidateIntervals(intervals,
+                                spill_slots,
+                                reserved_art_method_slots_ + reserved_out_slots_,
+                                *codegen_,
+                                allocator_,
+                                processing_core_regs,
+                                log_fatal_on_failure);
+    if (!ok) {
+      return false;
+    }
+  }  // for processing_core_regs
+
+  return true;
+}
+
+void RegisterAllocatorGraphColor::ProcessInstructions() {
+  for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+
+    // Note that we currently depend on this ordering, since some helper
+    // code is designed for linear scan register allocation.
+    for (HBackwardInstructionIterator instr_it(block->GetInstructions());
+          !instr_it.Done();
+          instr_it.Advance()) {
+      ProcessInstruction(instr_it.Current());
+    }
+
+    for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+      ProcessInstruction(phi_it.Current());
+    }
+
+    if (block->IsCatchBlock()
+        || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
+      // By blocking all registers at the top of each catch block or irreducible loop, we force
+      // intervals belonging to the live-in set of the catch/header block to be spilled.
+      // TODO(ngeoffray): Phis in this block could be allocated in register.
+      size_t position = block->GetLifetimeStart();
+      BlockRegisters(position, position + 1);
+    }
+  }
+}
+
+void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  if (locations == nullptr) {
+    return;
+  }
+  if (locations->NeedsSafepoint() && codegen_->IsLeafMethod()) {
+    // We do this here because we do not want the suspend check to artificially
+    // create live registers.
+    DCHECK(instruction->IsSuspendCheckEntry());
+    DCHECK_EQ(locations->GetTempCount(), 0u);
+    instruction->GetBlock()->RemoveInstruction(instruction);
+    return;
+  }
+
+  CheckForTempLiveIntervals(instruction);
+  CheckForSafepoint(instruction);
+  if (instruction->GetLocations()->WillCall()) {
+    // If a call will happen, create fixed intervals for caller-save registers.
+    // TODO: Note that it may be beneficial to later split intervals at this point,
+    //       so that we allow last-minute moves from a caller-save register
+    //       to a callee-save register.
+    BlockRegisters(instruction->GetLifetimePosition(),
+                   instruction->GetLifetimePosition() + 1,
+                   /*caller_save_only*/ true);
+  }
+  CheckForFixedInputs(instruction);
+
+  LiveInterval* interval = instruction->GetLiveInterval();
+  if (interval == nullptr) {
+    // Instructions lacking a valid output location do not have a live interval.
+    DCHECK(!locations->Out().IsValid());
+    return;
+  }
+
+  // Low intervals act as representatives for their corresponding high interval.
+  DCHECK(!interval->IsHighInterval());
+  if (codegen_->NeedsTwoRegisters(interval->GetType())) {
+    interval->AddHighInterval();
+  }
+  AddSafepointsFor(instruction);
+  CheckForFixedOutput(instruction);
+  AllocateSpillSlotForCatchPhi(instruction);
+
+  ArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval)
+      ? core_intervals_
+      : fp_intervals_;
+  if (interval->HasSpillSlot() || instruction->IsConstant()) {
+    // Note that if an interval already has a spill slot, then its value currently resides
+    // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first
+    // register use. This is also true for constants, which can be materialized at any point.
+    size_t first_register_use = interval->FirstRegisterUse();
+    if (first_register_use != kNoLifetime) {
+      LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1);
+      intervals.push_back(split);
+    } else {
+      // We won't allocate a register for this value.
+    }
+  } else {
+    intervals.push_back(interval);
+  }
+}
+
+void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) {
+  // We simply block physical registers where necessary.
+  // TODO: Ideally we would coalesce the physical register with the register
+  //       allocated to the input value, but this can be tricky if, e.g., there
+  //       could be multiple physical register uses of the same value at the
+  //       same instruction. Furthermore, there's currently no distinction between
+  //       fixed inputs to a call (which will be clobbered) and other fixed inputs (which
+  //       may not be clobbered).
+  LocationSummary* locations = instruction->GetLocations();
+  size_t position = instruction->GetLifetimePosition();
+  for (size_t i = 0; i < locations->GetInputCount(); ++i) {
+    Location input = locations->InAt(i);
+    if (input.IsRegister() || input.IsFpuRegister()) {
+      BlockRegister(input, position, position + 1);
+      codegen_->AddAllocatedRegister(input);
+    } else if (input.IsPair()) {
+      BlockRegister(input.ToLow(), position, position + 1);
+      BlockRegister(input.ToHigh(), position, position + 1);
+      codegen_->AddAllocatedRegister(input.ToLow());
+      codegen_->AddAllocatedRegister(input.ToHigh());
+    }
+  }
+}
+
+void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) {
+  // If an instruction has a fixed output location, we give the live interval a register and then
+  // proactively split it just after the definition point to avoid creating too many interferences
+  // with a fixed node.
+  LiveInterval* interval = instruction->GetLiveInterval();
+  Location out = interval->GetDefinedBy()->GetLocations()->Out();
+  size_t position = instruction->GetLifetimePosition();
+  DCHECK_GE(interval->GetEnd() - position, 2u);
+
+  if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
+    out = instruction->GetLocations()->InAt(0);
+  }
+
+  if (out.IsRegister() || out.IsFpuRegister()) {
+    interval->SetRegister(out.reg());
+    codegen_->AddAllocatedRegister(out);
+    Split(interval, position + 1);
+  } else if (out.IsPair()) {
+    interval->SetRegister(out.low());
+    interval->GetHighInterval()->SetRegister(out.high());
+    codegen_->AddAllocatedRegister(out.ToLow());
+    codegen_->AddAllocatedRegister(out.ToHigh());
+    Split(interval, position + 1);
+  } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) {
+    interval->SetSpillSlot(out.GetStackIndex());
+  } else {
+    DCHECK(out.IsUnallocated() || out.IsConstant());
+  }
+}
+
+void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) {
+  LiveInterval* interval = instruction->GetLiveInterval();
+  for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
+    HInstruction* safepoint = safepoints_[safepoint_index - 1u];
+    size_t safepoint_position = safepoint->GetLifetimePosition();
+
+    // Test that safepoints_ are ordered in the optimal way.
+    DCHECK(safepoint_index == safepoints_.size() ||
+           safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
+
+    if (safepoint_position == interval->GetStart()) {
+      // The safepoint is for this instruction, so the location of the instruction
+      // does not need to be saved.
+      DCHECK_EQ(safepoint_index, safepoints_.size());
+      DCHECK_EQ(safepoint, instruction);
+      continue;
+    } else if (interval->IsDeadAt(safepoint_position)) {
+      break;
+    } else if (!interval->Covers(safepoint_position)) {
+      // Hole in the interval.
+      continue;
+    }
+    interval->AddSafepoint(safepoint);
+  }
+}
+
+void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  size_t position = instruction->GetLifetimePosition();
+  for (size_t i = 0; i < locations->GetTempCount(); ++i) {
+    Location temp = locations->GetTemp(i);
+    if (temp.IsRegister() || temp.IsFpuRegister()) {
+      BlockRegister(temp, position, position + 1);
+      codegen_->AddAllocatedRegister(temp);
+    } else {
+      DCHECK(temp.IsUnallocated());
+      switch (temp.GetPolicy()) {
+        case Location::kRequiresRegister: {
+          LiveInterval* interval =
+              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
+          interval->AddTempUse(instruction, i);
+          core_intervals_.push_back(interval);
+          temp_intervals_.push_back(interval);
+          break;
+        }
+
+        case Location::kRequiresFpuRegister: {
+          LiveInterval* interval =
+              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
+          interval->AddTempUse(instruction, i);
+          fp_intervals_.push_back(interval);
+          temp_intervals_.push_back(interval);
+          if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+            interval->AddHighInterval(/*is_temp*/ true);
+            temp_intervals_.push_back(interval->GetHighInterval());
+          }
+          break;
+        }
+
+        default:
+          LOG(FATAL) << "Unexpected policy for temporary location "
+                     << temp.GetPolicy();
+      }
+    }
+  }
+}
+
+void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  size_t position = instruction->GetLifetimePosition();
+
+  if (locations->NeedsSafepoint()) {
+    safepoints_.push_back(instruction);
+    if (locations->OnlyCallsOnSlowPath()) {
+      // We add a synthesized range at this position to record the live registers
+      // at this position. Ideally, we could just update the safepoints when locations
+      // are updated, but we currently need to know the full stack size before updating
+      // locations (because of parameters and the fact that we don't have a frame pointer).
+      // And knowing the full stack size requires to know the maximum number of live
+      // registers at calls in slow paths.
+      // By adding the following interval in the algorithm, we can compute this
+      // maximum before updating locations.
+      LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
+      interval->AddRange(position, position + 1);
+      core_intervals_.push_back(interval);
+      fp_intervals_.push_back(interval);
+    }
+  }
+}
+
+LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) {
+  if (interval->GetStart() < position && position < interval->GetEnd()) {
+    return Split(interval, position);
+  } else {
+    return interval;
+  }
+}
+
+void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) {
+  DCHECK(!interval->IsHighInterval());
+
+  // Split just after a register definition.
+  if (interval->IsParent() && interval->DefinitionRequiresRegister()) {
+    interval = TrySplit(interval, interval->GetStart() + 1);
+  }
+
+  UsePosition* use = interval->GetFirstUse();
+  while (use != nullptr && use->GetPosition() < interval->GetStart()) {
+    use = use->GetNext();
+  }
+
+  // Split around register uses.
+  size_t end = interval->GetEnd();
+  while (use != nullptr && use->GetPosition() <= end) {
+    if (use->RequiresRegister()) {
+      size_t position = use->GetPosition();
+      interval = TrySplit(interval, position - 1);
+      if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) {
+        // If we are at the very end of a basic block, we cannot split right
+        // at the use. Split just after instead.
+        interval = TrySplit(interval, position + 1);
+      } else {
+        interval = TrySplit(interval, position);
+      }
+    }
+    use = use->GetNext();
+  }
+}
+
+void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) {
+  if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+    HPhi* phi = instruction->AsPhi();
+    LiveInterval* interval = phi->GetLiveInterval();
+
+    HInstruction* previous_phi = phi->GetPrevious();
+    DCHECK(previous_phi == nullptr ||
+           previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
+        << "Phis expected to be sorted by vreg number, "
+        << "so that equivalent phis are adjacent.";
+
+    if (phi->IsVRegEquivalentOf(previous_phi)) {
+      // Assign the same spill slot.
+      DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
+      interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
+    } else {
+      interval->SetSpillSlot(catch_phi_spill_slot_counter_);
+      catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+    }
+  }
+}
+
+void RegisterAllocatorGraphColor::BlockRegister(Location location,
+                                                size_t start,
+                                                size_t end) {
+  DCHECK(location.IsRegister() || location.IsFpuRegister());
+  int reg = location.reg();
+  LiveInterval* interval = location.IsRegister()
+      ? physical_core_nodes_[reg]->GetInterval()
+      : physical_fp_nodes_[reg]->GetInterval();
+  DCHECK(interval->GetRegister() == reg);
+  bool blocked_by_codegen = location.IsRegister()
+      ? codegen_->IsBlockedCoreRegister(reg)
+      : codegen_->IsBlockedFloatingPointRegister(reg);
+  if (blocked_by_codegen) {
+    // We've already blocked this register for the entire method. (And adding a
+    // range inside another range violates the preconditions of AddRange).
+  } else {
+    interval->AddRange(start, end);
+  }
+}
+
+void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
+  for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
+    if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
+      BlockRegister(Location::RegisterLocation(i), start, end);
+    }
+  }
+  for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+    if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
+      BlockRegister(Location::FpuRegisterLocation(i), start, end);
+    }
+  }
+}
+
+void ColoringIteration::AddPotentialInterference(InterferenceNode* from,
+                                                 InterferenceNode* to,
+                                                 bool guaranteed_not_interfering_yet,
+                                                 bool both_directions) {
+  if (from->IsPrecolored()) {
+    // We save space by ignoring outgoing edges from fixed nodes.
+  } else if (to->GetInterval()->IsSlowPathSafepoint()) {
+    // Safepoint intervals are only there to count max live registers,
+    // so no need to give them incoming interference edges.
+    // This is also necessary for correctness, because we don't want nodes
+    // to remove themselves from safepoint adjacency sets when they're pruned.
+  } else if (to->IsPrecolored()) {
+    // It is important that only a single node represents a given fixed register in the
+    // interference graph. We retrieve that node here.
+    const ArenaVector<InterferenceNode*>& physical_nodes = to->GetInterval()->IsFloatingPoint()
+        ? register_allocator_->physical_fp_nodes_
+        : register_allocator_->physical_core_nodes_;
+    InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()];
+    from->AddInterference(physical_node, /*guaranteed_not_interfering_yet*/ false);
+    DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister());
+    DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node";
+
+    // If a node interferes with a fixed pair node, the weight of the edge may
+    // be inaccurate after using the alias of the pair node, because the alias of the pair node
+    // is a singular node.
+    // We could make special pair fixed nodes, but that ends up being too conservative because
+    // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of
+    // three rather than two.
+    // Instead, we explicitly add an interference with the high node of the fixed pair node.
+    // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals
+    //       can be unaligned on x86 complicates things.
+    if (to->IsPair()) {
+      InterferenceNode* high_node =
+          physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()];
+      DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(),
+                high_node->GetInterval()->GetRegister());
+      from->AddInterference(high_node, /*guaranteed_not_interfering_yet*/ false);
+    }
+  } else {
+    // Standard interference between two uncolored nodes.
+    from->AddInterference(to, guaranteed_not_interfering_yet);
+  }
+
+  if (both_directions) {
+    AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false);
+  }
+}
+
+// Returns true if `in_node` represents an input interval of `out_node`, and the output interval
+// is allowed to have the same register as the input interval.
+// TODO: Ideally we should just produce correct intervals in liveness analysis.
+//       We would need to refactor the current live interval layout to do so, which is
+//       no small task.
+static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) {
+  LiveInterval* output_interval = out_node->GetInterval();
+  HInstruction* defined_by = output_interval->GetDefinedBy();
+  if (defined_by == nullptr) {
+    // This must not be a definition point.
+    return false;
+  }
+
+  LocationSummary* locations = defined_by->GetLocations();
+  if (locations->OutputCanOverlapWithInputs()) {
+    // This instruction does not allow the output to reuse a register from an input.
+    return false;
+  }
+
+  LiveInterval* input_interval = in_node->GetInterval();
+  LiveInterval* next_sibling = input_interval->GetNextSibling();
+  size_t def_position = defined_by->GetLifetimePosition();
+  size_t use_position = def_position + 1;
+  if (next_sibling != nullptr && next_sibling->GetStart() == use_position) {
+    // The next sibling starts at the use position, so reusing the input register in the output
+    // would clobber the input before it's moved into the sibling interval location.
+    return false;
+  }
+
+  if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) {
+    // The input interval is live after the use position.
+    return false;
+  }
+
+  HInputsRef inputs = defined_by->GetInputs();
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) {
+      DCHECK(input_interval->SameRegisterKind(*output_interval));
+      return true;
+    }
+  }
+
+  // The input interval was not an input for this instruction.
+  return false;
+}
+
+void ColoringIteration::BuildInterferenceGraph(
+    const ArenaVector<LiveInterval*>& intervals,
+    const ArenaVector<InterferenceNode*>& physical_nodes,
+    ArenaVector<InterferenceNode*>* safepoints) {
+  DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty());
+  // Build the interference graph efficiently by ordering range endpoints
+  // by position and doing a linear sweep to find interferences. (That is, we
+  // jump from endpoint to endpoint, maintaining a set of intervals live at each
+  // point. If two nodes are ever in the live set at the same time, then they
+  // interfere with each other.)
+  //
+  // We order by both position and (secondarily) by whether the endpoint
+  // begins or ends a range; we want to process range endings before range
+  // beginnings at the same position because they should not conflict.
+  //
+  // For simplicity, we create a tuple for each endpoint, and then sort the tuples.
+  // Tuple contents: (position, is_range_beginning, node).
+  ArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints(
+      allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+  // We reserve plenty of space to avoid excessive copying.
+  range_endpoints.reserve(4 * prunable_nodes_.size());
+
+  for (LiveInterval* parent : intervals) {
+    for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) {
+      LiveRange* range = sibling->GetFirstRange();
+      if (range != nullptr) {
+        InterferenceNode* node = new (allocator_) InterferenceNode(
+            allocator_, sibling, register_allocator_->liveness_);
+        interval_node_map_.Insert(std::make_pair(sibling, node));
+
+        if (sibling->HasRegister()) {
+          // Fixed nodes should alias the canonical node for the corresponding register.
+          node->stage = NodeStage::kPrecolored;
+          InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()];
+          node->SetAlias(physical_node);
+          DCHECK_EQ(node->GetInterval()->GetRegister(),
+                    physical_node->GetInterval()->GetRegister());
+        } else if (sibling->IsSlowPathSafepoint()) {
+          // Safepoint intervals are synthesized to count max live registers.
+          // They will be processed separately after coloring.
+          node->stage = NodeStage::kSafepoint;
+          safepoints->push_back(node);
+        } else {
+          node->stage = NodeStage::kPrunable;
+          prunable_nodes_.push_back(node);
+        }
+
+        while (range != nullptr) {
+          range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node));
+          range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node));
+          range = range->GetNext();
+        }
+      }
+    }
+  }
+
+  // Sort the endpoints.
+  // We explicitly ignore the third entry of each tuple (the node pointer) in order
+  // to maintain determinism.
+  std::sort(range_endpoints.begin(), range_endpoints.end(),
+            [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs,
+                const std::tuple<size_t, bool, InterferenceNode*>& rhs) {
+    return std::tie(std::get<0>(lhs), std::get<1>(lhs))
+         < std::tie(std::get<0>(rhs), std::get<1>(rhs));
+  });
+
+  // Nodes live at the current position in the linear sweep.
+  ArenaVector<InterferenceNode*> live(
+      allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+  // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the
+  // live set. When we encounter the end of a range, we remove the corresponding node
+  // from the live set. Nodes interfere if they are in the live set at the same time.
+  for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) {
+    bool is_range_beginning;
+    InterferenceNode* node;
+    size_t position;
+    // Extract information from the tuple, including the node this tuple represents.
+    std::tie(position, is_range_beginning, node) = *it;
+
+    if (is_range_beginning) {
+      bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart();
+      for (InterferenceNode* conflicting : live) {
+        DCHECK_NE(node, conflicting);
+        if (CheckInputOutputCanOverlap(conflicting, node)) {
+          // We do not add an interference, because the instruction represented by `node` allows
+          // its output to share a register with an input, represented here by `conflicting`.
+        } else {
+          AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet);
+        }
+      }
+      DCHECK(std::find(live.begin(), live.end(), node) == live.end());
+      live.push_back(node);
+    } else {
+      // End of range.
+      auto live_it = std::find(live.begin(), live.end(), node);
+      DCHECK(live_it != live.end());
+      live.erase(live_it);
+    }
+  }
+  DCHECK(live.empty());
+}
+
+void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a,
+                                                  InterferenceNode* b,
+                                                  CoalesceKind kind,
+                                                  size_t position) {
+  DCHECK_EQ(a->IsPair(), b->IsPair())
+      << "Nodes of different memory widths should never be coalesced";
+  CoalesceOpportunity* opportunity =
+      new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_);
+  a->AddCoalesceOpportunity(opportunity);
+  b->AddCoalesceOpportunity(opportunity);
+  coalesce_worklist_.push(opportunity);
+}
+
+// When looking for coalesce opportunities, we use the interval_node_map_ to find the node
+// corresponding to an interval. Note that not all intervals are in this map, notably the parents
+// of constants and stack arguments. (However, these interval should not be involved in coalesce
+// opportunities anyway, because they're not going to be in registers.)
+void ColoringIteration::FindCoalesceOpportunities() {
+  DCHECK(coalesce_worklist_.empty());
+
+  for (InterferenceNode* node : prunable_nodes_) {
+    LiveInterval* interval = node->GetInterval();
+
+    // Coalesce siblings.
+    LiveInterval* next_sibling = interval->GetNextSibling();
+    if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) {
+      auto it = interval_node_map_.Find(next_sibling);
+      if (it != interval_node_map_.end()) {
+        InterferenceNode* sibling_node = it->second;
+        CreateCoalesceOpportunity(node,
+                                  sibling_node,
+                                  CoalesceKind::kAdjacentSibling,
+                                  interval->GetEnd());
+      }
+    }
+
+    // Coalesce fixed outputs with this interval if this interval is an adjacent sibling.
+    LiveInterval* parent = interval->GetParent();
+    if (parent->HasRegister()
+        && parent->GetNextSibling() == interval
+        && parent->GetEnd() == interval->GetStart()) {
+      auto it = interval_node_map_.Find(parent);
+      if (it != interval_node_map_.end()) {
+        InterferenceNode* parent_node = it->second;
+        CreateCoalesceOpportunity(node,
+                                  parent_node,
+                                  CoalesceKind::kFixedOutputSibling,
+                                  parent->GetEnd());
+      }
+    }
+
+    // Try to prevent moves across blocks.
+    // Note that this does not lead to many succeeding coalesce attempts, so could be removed
+    // if found to add to compile time.
+    const SsaLivenessAnalysis& liveness = register_allocator_->liveness_;
+    if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) {
+      // If the start of this interval is at a block boundary, we look at the
+      // location of the interval in blocks preceding the block this interval
+      // starts at. This can avoid a move between the two blocks.
+      HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2);
+      for (HBasicBlock* predecessor : block->GetPredecessors()) {
+        size_t position = predecessor->GetLifetimeEnd() - 1;
+        LiveInterval* existing = interval->GetParent()->GetSiblingAt(position);
+        if (existing != nullptr) {
+          auto it = interval_node_map_.Find(existing);
+          if (it != interval_node_map_.end()) {
+            InterferenceNode* existing_node = it->second;
+            CreateCoalesceOpportunity(node,
+                                      existing_node,
+                                      CoalesceKind::kNonlinearControlFlow,
+                                      position);
+          }
+        }
+      }
+    }
+
+    // Coalesce phi inputs with the corresponding output.
+    HInstruction* defined_by = interval->GetDefinedBy();
+    if (defined_by != nullptr && defined_by->IsPhi()) {
+      const ArenaVector<HBasicBlock*>& predecessors = defined_by->GetBlock()->GetPredecessors();
+      HInputsRef inputs = defined_by->GetInputs();
+
+      for (size_t i = 0, e = inputs.size(); i < e; ++i) {
+        // We want the sibling at the end of the appropriate predecessor block.
+        size_t position = predecessors[i]->GetLifetimeEnd() - 1;
+        LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position);
+
+        auto it = interval_node_map_.Find(input_interval);
+        if (it != interval_node_map_.end()) {
+          InterferenceNode* input_node = it->second;
+          CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position);
+        }
+      }
+    }
+
+    // Coalesce output with first input when policy is kSameAsFirstInput.
+    if (defined_by != nullptr) {
+      Location out = defined_by->GetLocations()->Out();
+      if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
+        LiveInterval* input_interval
+            = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1);
+        // TODO: Could we consider lifetime holes here?
+        if (input_interval->GetEnd() == interval->GetStart()) {
+          auto it = interval_node_map_.Find(input_interval);
+          if (it != interval_node_map_.end()) {
+            InterferenceNode* input_node = it->second;
+            CreateCoalesceOpportunity(node,
+                                      input_node,
+                                      CoalesceKind::kFirstInput,
+                                      interval->GetStart());
+          }
+        }
+      }
+    }
+
+    // An interval that starts an instruction (that is, it is not split), may
+    // re-use the registers used by the inputs of that instruction, based on the
+    // location summary.
+    if (defined_by != nullptr) {
+      DCHECK(!interval->IsSplit());
+      LocationSummary* locations = defined_by->GetLocations();
+      if (!locations->OutputCanOverlapWithInputs()) {
+        HInputsRef inputs = defined_by->GetInputs();
+        for (size_t i = 0; i < inputs.size(); ++i) {
+          size_t def_point = defined_by->GetLifetimePosition();
+          // TODO: Getting the sibling at the def_point might not be quite what we want
+          //       for fixed inputs, since the use will be *at* the def_point rather than after.
+          LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point);
+          if (input_interval != nullptr &&
+              input_interval->HasHighInterval() == interval->HasHighInterval()) {
+            auto it = interval_node_map_.Find(input_interval);
+            if (it != interval_node_map_.end()) {
+              InterferenceNode* input_node = it->second;
+              CreateCoalesceOpportunity(node,
+                                        input_node,
+                                        CoalesceKind::kAnyInput,
+                                        interval->GetStart());
+            }
+          }
+        }
+      }
+    }
+
+    // Try to prevent moves into fixed input locations.
+    UsePosition* use = interval->GetFirstUse();
+    for (; use != nullptr && use->GetPosition() <= interval->GetStart(); use = use->GetNext()) {
+      // Skip past uses before the start of this interval.
+    }
+    for (; use != nullptr && use->GetPosition() <= interval->GetEnd(); use = use->GetNext()) {
+      HInstruction* user = use->GetUser();
+      if (user == nullptr) {
+        // User may be null for certain intervals, such as temp intervals.
+        continue;
+      }
+      LocationSummary* locations = user->GetLocations();
+      Location input = locations->InAt(use->GetInputIndex());
+      if (input.IsRegister() || input.IsFpuRegister()) {
+        // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes
+        //       is currently not supported.
+        InterferenceNode* fixed_node = input.IsRegister()
+            ? register_allocator_->physical_core_nodes_[input.reg()]
+            : register_allocator_->physical_fp_nodes_[input.reg()];
+        CreateCoalesceOpportunity(node,
+                                  fixed_node,
+                                  CoalesceKind::kFixedInput,
+                                  user->GetLifetimePosition());
+      }
+    }
+  }  // for node in prunable_nodes
+}
+
+static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) {
+  return node->GetOutDegree() < num_regs;
+}
+
+static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) {
+  return !IsLowDegreeNode(node, num_regs);
+}
+
+void ColoringIteration::PruneInterferenceGraph() {
+  DCHECK(pruned_nodes_.empty()
+      && simplify_worklist_.empty()
+      && freeze_worklist_.empty()
+      && spill_worklist_.empty());
+  // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes,
+  // and all others as high degree nodes. The distinction is important: low degree nodes are
+  // guaranteed a color, while high degree nodes are not.
+
+  // Build worklists. Note that the coalesce worklist has already been
+  // filled by FindCoalesceOpportunities().
+  for (InterferenceNode* node : prunable_nodes_) {
+    DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned";
+    DCHECK(!node->GetInterval()->IsSlowPathSafepoint()) << "Safepoint nodes should never be pruned";
+    if (IsLowDegreeNode(node, num_regs_)) {
+      if (node->GetCoalesceOpportunities().empty()) {
+        // Simplify Worklist.
+        node->stage = NodeStage::kSimplifyWorklist;
+        simplify_worklist_.push_back(node);
+      } else {
+        // Freeze Worklist.
+        node->stage = NodeStage::kFreezeWorklist;
+        freeze_worklist_.push_back(node);
+      }
+    } else {
+      // Spill worklist.
+      node->stage = NodeStage::kSpillWorklist;
+      spill_worklist_.push(node);
+    }
+  }
+
+  // Prune graph.
+  // Note that we do not remove a node from its current worklist if it moves to another, so it may
+  // be in multiple worklists at once; the node's `phase` says which worklist it is really in.
+  while (true) {
+    if (!simplify_worklist_.empty()) {
+      // Prune low-degree nodes.
+      // TODO: pop_back() should work as well, but it didn't; we get a
+      //       failed check while pruning. We should look into this.
+      InterferenceNode* node = simplify_worklist_.front();
+      simplify_worklist_.pop_front();
+      DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list";
+      DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree";
+      DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related";
+      PruneNode(node);
+    } else if (!coalesce_worklist_.empty()) {
+      // Coalesce.
+      CoalesceOpportunity* opportunity = coalesce_worklist_.top();
+      coalesce_worklist_.pop();
+      if (opportunity->stage == CoalesceStage::kWorklist) {
+        Coalesce(opportunity);
+      }
+    } else if (!freeze_worklist_.empty()) {
+      // Freeze moves and prune a low-degree move-related node.
+      InterferenceNode* node = freeze_worklist_.front();
+      freeze_worklist_.pop_front();
+      if (node->stage == NodeStage::kFreezeWorklist) {
+        DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree";
+        DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related";
+        FreezeMoves(node);
+        PruneNode(node);
+      }
+    } else if (!spill_worklist_.empty()) {
+      // We spill the lowest-priority node, because pruning a node earlier
+      // gives it a higher chance of being spilled.
+      InterferenceNode* node = spill_worklist_.top();
+      spill_worklist_.pop();
+      if (node->stage == NodeStage::kSpillWorklist) {
+        DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree";
+        FreezeMoves(node);
+        PruneNode(node);
+      }
+    } else {
+      // Pruning complete.
+      break;
+    }
+  }
+  DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size());
+}
+
+void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) {
+  for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
+    if (opportunity->stage == CoalesceStage::kActive) {
+      opportunity->stage = CoalesceStage::kWorklist;
+      coalesce_worklist_.push(opportunity);
+    }
+  }
+}
+
+void ColoringIteration::PruneNode(InterferenceNode* node) {
+  DCHECK_NE(node->stage, NodeStage::kPruned);
+  DCHECK(!node->IsPrecolored());
+  node->stage = NodeStage::kPruned;
+  pruned_nodes_.push(node);
+
+  for (InterferenceNode* adj : node->GetAdjacentNodes()) {
+    DCHECK(!adj->GetInterval()->IsSlowPathSafepoint())
+        << "Nodes should never interfere with synthesized safepoint nodes";
+    DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes";
+
+    if (adj->IsPrecolored()) {
+      // No effect on pre-colored nodes; they're never pruned.
+    } else {
+      // Remove the interference.
+      bool was_high_degree = IsHighDegreeNode(adj, num_regs_);
+      DCHECK(adj->ContainsInterference(node))
+          << "Missing reflexive interference from non-fixed node";
+      adj->RemoveInterference(node);
+
+      // Handle transitions from high degree to low degree.
+      if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) {
+        EnableCoalesceOpportunities(adj);
+        for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) {
+          EnableCoalesceOpportunities(adj_adj);
+        }
+
+        DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist);
+        if (adj->IsMoveRelated()) {
+          adj->stage = NodeStage::kFreezeWorklist;
+          freeze_worklist_.push_back(adj);
+        } else {
+          adj->stage = NodeStage::kSimplifyWorklist;
+          simplify_worklist_.push_back(adj);
+        }
+      }
+    }
+  }
+}
+
+void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) {
+  if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) {
+    DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist);
+    node->stage = NodeStage::kSimplifyWorklist;
+    simplify_worklist_.push_back(node);
+  }
+}
+
+void ColoringIteration::FreezeMoves(InterferenceNode* node) {
+  for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
+    if (opportunity->stage == CoalesceStage::kDefunct) {
+      // Constrained moves should remain constrained, since they will not be considered
+      // during last-chance coalescing.
+    } else {
+      opportunity->stage = CoalesceStage::kInactive;
+    }
+    InterferenceNode* other = opportunity->node_a->GetAlias() == node
+        ? opportunity->node_b->GetAlias()
+        : opportunity->node_a->GetAlias();
+    if (other != node && other->stage == NodeStage::kFreezeWorklist) {
+      DCHECK(IsLowDegreeNode(node, num_regs_));
+      CheckTransitionFromFreezeWorklist(other);
+    }
+  }
+}
+
+bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from,
+                                            InterferenceNode* into) {
+  if (!into->IsPrecolored()) {
+    // The uncolored heuristic will cover this case.
+    return false;
+  }
+  if (from->IsPair() || into->IsPair()) {
+    // TODO: Merging from a pair node is currently not supported, since fixed pair nodes
+    //       are currently represented as two single fixed nodes in the graph, and `into` is
+    //       only one of them. (We may lose the implicit connections to the second one in a merge.)
+    return false;
+  }
+
+  // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`.
+  // Reasons an adjacent node `adj` can be "ok":
+  // (1) If `adj` is low degree, interference with `into` will not affect its existing
+  //     colorable guarantee. (Notice that coalescing cannot increase its degree.)
+  // (2) If `adj` is pre-colored, it already interferes with `into`. See (3).
+  // (3) If there's already an interference with `into`, coalescing will not add interferences.
+  for (InterferenceNode* adj : from->GetAdjacentNodes()) {
+    if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) {
+      // Ok.
+    } else {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from,
+                                           InterferenceNode* into) {
+  if (into->IsPrecolored()) {
+    // The pre-colored heuristic will handle this case.
+    return false;
+  }
+
+  // Arbitrary cap to improve compile time. Tests show that this has negligible affect
+  // on generated code.
+  if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) {
+    return false;
+  }
+
+  // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors
+  // of high degree. (Low degree neighbors can be ignored, because they will eventually be
+  // pruned from the interference graph in the simplify stage.)
+  size_t high_degree_interferences = 0;
+  for (InterferenceNode* adj : from->GetAdjacentNodes()) {
+    if (IsHighDegreeNode(adj, num_regs_)) {
+      high_degree_interferences += from->EdgeWeightWith(adj);
+    }
+  }
+  for (InterferenceNode* adj : into->GetAdjacentNodes()) {
+    if (IsHighDegreeNode(adj, num_regs_)) {
+      if (from->ContainsInterference(adj)) {
+        // We've already counted this adjacent node.
+        // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that
+        // we should not have counted it at all. (This extends the textbook Briggs coalescing test,
+        // but remains conservative.)
+        if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) {
+          high_degree_interferences -= from->EdgeWeightWith(adj);
+        }
+      } else {
+        high_degree_interferences += into->EdgeWeightWith(adj);
+      }
+    }
+  }
+
+  return high_degree_interferences < num_regs_;
+}
+
+void ColoringIteration::Combine(InterferenceNode* from,
+                                InterferenceNode* into) {
+  from->SetAlias(into);
+
+  // Add interferences.
+  for (InterferenceNode* adj : from->GetAdjacentNodes()) {
+    bool was_low_degree = IsLowDegreeNode(adj, num_regs_);
+    AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false);
+    if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) {
+      // This is a (temporary) transition to a high degree node. Its degree will decrease again
+      // when we prune `from`, but it's best to be consistent about the current worklist.
+      adj->stage = NodeStage::kSpillWorklist;
+      spill_worklist_.push(adj);
+    }
+  }
+
+  // Add coalesce opportunities.
+  for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) {
+    if (opportunity->stage != CoalesceStage::kDefunct) {
+      into->AddCoalesceOpportunity(opportunity);
+    }
+  }
+  EnableCoalesceOpportunities(from);
+
+  // Prune and update worklists.
+  PruneNode(from);
+  if (IsLowDegreeNode(into, num_regs_)) {
+    // Coalesce(...) takes care of checking for a transition to the simplify worklist.
+    DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist);
+  } else if (into->stage == NodeStage::kFreezeWorklist) {
+    // This is a transition to a high degree node.
+    into->stage = NodeStage::kSpillWorklist;
+    spill_worklist_.push(into);
+  } else {
+    DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored);
+  }
+}
+
+void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) {
+  InterferenceNode* from = opportunity->node_a->GetAlias();
+  InterferenceNode* into = opportunity->node_b->GetAlias();
+  DCHECK_NE(from->stage, NodeStage::kPruned);
+  DCHECK_NE(into->stage, NodeStage::kPruned);
+
+  if (from->IsPrecolored()) {
+    // If we have one pre-colored node, make sure it's the `into` node.
+    std::swap(from, into);
+  }
+
+  if (from == into) {
+    // These nodes have already been coalesced.
+    opportunity->stage = CoalesceStage::kDefunct;
+    CheckTransitionFromFreezeWorklist(from);
+  } else if (from->IsPrecolored() || from->ContainsInterference(into)) {
+    // These nodes interfere.
+    opportunity->stage = CoalesceStage::kDefunct;
+    CheckTransitionFromFreezeWorklist(from);
+    CheckTransitionFromFreezeWorklist(into);
+  } else if (PrecoloredHeuristic(from, into)
+          || UncoloredHeuristic(from, into)) {
+    // We can coalesce these nodes.
+    opportunity->stage = CoalesceStage::kDefunct;
+    Combine(from, into);
+    CheckTransitionFromFreezeWorklist(into);
+  } else {
+    // We cannot coalesce, but we may be able to later.
+    opportunity->stage = CoalesceStage::kActive;
+  }
+}
+
+// Build a mask with a bit set for each register assigned to some
+// interval in `intervals`.
+template <typename Container>
+static std::bitset<kMaxNumRegs> BuildConflictMask(Container& intervals) {
+  std::bitset<kMaxNumRegs> conflict_mask;
+  for (InterferenceNode* adjacent : intervals) {
+    LiveInterval* conflicting = adjacent->GetInterval();
+    if (conflicting->HasRegister()) {
+      conflict_mask.set(conflicting->GetRegister());
+      if (conflicting->HasHighInterval()) {
+        DCHECK(conflicting->GetHighInterval()->HasRegister());
+        conflict_mask.set(conflicting->GetHighInterval()->GetRegister());
+      }
+    } else {
+      DCHECK(!conflicting->HasHighInterval()
+          || !conflicting->GetHighInterval()->HasRegister());
+    }
+  }
+  return conflict_mask;
+}
+
+bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) {
+  return processing_core_regs
+      ? !codegen_->IsCoreCalleeSaveRegister(reg)
+      : !codegen_->IsCoreCalleeSaveRegister(reg);
+}
+
+static bool RegisterIsAligned(size_t reg) {
+  return reg % 2 == 0;
+}
+
+static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) {
+  // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit.
+  // Note that CTZ is undefined if all bits are 0, so we special-case it.
+  return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong());
+}
+
+bool ColoringIteration::ColorInterferenceGraph() {
+  DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small";
+  ArenaVector<LiveInterval*> colored_intervals(
+      allocator_->Adapter(kArenaAllocRegisterAllocator));
+  bool successful = true;
+
+  while (!pruned_nodes_.empty()) {
+    InterferenceNode* node = pruned_nodes_.top();
+    pruned_nodes_.pop();
+    LiveInterval* interval = node->GetInterval();
+    size_t reg = 0;
+
+    InterferenceNode* alias = node->GetAlias();
+    if (alias != node) {
+      // This node was coalesced with another.
+      LiveInterval* alias_interval = alias->GetInterval();
+      if (alias_interval->HasRegister()) {
+        reg = alias_interval->GetRegister();
+        DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg])
+            << "This node conflicts with the register it was coalesced with";
+      } else {
+        DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " "
+            << "Move coalescing was not conservative, causing a node to be coalesced "
+            << "with another node that could not be colored";
+        if (interval->RequiresRegister()) {
+          successful = false;
+        }
+      }
+    } else {
+      // Search for free register(s).
+      std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes());
+      if (interval->HasHighInterval()) {
+        // Note that the graph coloring allocator assumes that pair intervals are aligned here,
+        // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we
+        // change the alignment requirements here, we will have to update the algorithm (e.g.,
+        // be more conservative about the weight of edges adjacent to pair nodes.)
+        while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) {
+          reg += 2;
+        }
+
+        // Try to use a caller-save register first.
+        for (size_t i = 0; i < num_regs_ - 1; i += 2) {
+          bool low_caller_save  = register_allocator_->IsCallerSave(i, processing_core_regs_);
+          bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_);
+          if (!conflict_mask[i] && !conflict_mask[i + 1]) {
+            if (low_caller_save && high_caller_save) {
+              reg = i;
+              break;
+            } else if (low_caller_save || high_caller_save) {
+              reg = i;
+              // Keep looking to try to get both parts in caller-save registers.
+            }
+          }
+        }
+      } else {
+        // Not a pair interval.
+        reg = FindFirstZeroInConflictMask(conflict_mask);
+
+        // Try to use caller-save registers first.
+        for (size_t i = 0; i < num_regs_; ++i) {
+          if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) {
+            reg = i;
+            break;
+          }
+        }
+      }
+
+      // Last-chance coalescing.
+      for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
+        if (opportunity->stage == CoalesceStage::kDefunct) {
+          continue;
+        }
+        LiveInterval* other_interval = opportunity->node_a->GetAlias() == node
+            ? opportunity->node_b->GetAlias()->GetInterval()
+            : opportunity->node_a->GetAlias()->GetInterval();
+        if (other_interval->HasRegister()) {
+          size_t coalesce_register = other_interval->GetRegister();
+          if (interval->HasHighInterval()) {
+            if (!conflict_mask[coalesce_register] &&
+                !conflict_mask[coalesce_register + 1] &&
+                RegisterIsAligned(coalesce_register)) {
+              reg = coalesce_register;
+              break;
+            }
+          } else if (!conflict_mask[coalesce_register]) {
+            reg = coalesce_register;
+            break;
+          }
+        }
+      }
+    }
+
+    if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) {
+      // Assign register.
+      DCHECK(!interval->HasRegister());
+      interval->SetRegister(reg);
+      colored_intervals.push_back(interval);
+      if (interval->HasHighInterval()) {
+        DCHECK(!interval->GetHighInterval()->HasRegister());
+        interval->GetHighInterval()->SetRegister(reg + 1);
+        colored_intervals.push_back(interval->GetHighInterval());
+      }
+    } else if (interval->RequiresRegister()) {
+      // The interference graph is too dense to color. Make it sparser by
+      // splitting this live interval.
+      successful = false;
+      register_allocator_->SplitAtRegisterUses(interval);
+      // We continue coloring, because there may be additional intervals that cannot
+      // be colored, and that we should split.
+    } else {
+      // Spill.
+      node->SetNeedsSpillSlot();
+    }
+  }
+
+  // If unsuccessful, reset all register assignments.
+  if (!successful) {
+    for (LiveInterval* interval : colored_intervals) {
+      interval->ClearRegister();
+    }
+  }
+
+  return successful;
+}
+
+size_t RegisterAllocatorGraphColor::ComputeMaxSafepointLiveRegisters(
+    const ArenaVector<InterferenceNode*>& safepoints) {
+  size_t max_safepoint_live_regs = 0;
+  for (InterferenceNode* safepoint : safepoints) {
+    DCHECK(safepoint->GetInterval()->IsSlowPathSafepoint());
+    std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(safepoint->GetAdjacentNodes());
+    size_t live_regs = conflict_mask.count();
+    max_safepoint_live_regs = std::max(max_safepoint_live_regs, live_regs);
+  }
+  return max_safepoint_live_regs;
+}
+
+void RegisterAllocatorGraphColor::AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes) {
+  // The register allocation resolver will organize the stack based on value type,
+  // so we assign stack slots for each value type separately.
+  ArenaVector<LiveInterval*> double_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+  ArenaVector<LiveInterval*> long_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+  ArenaVector<LiveInterval*> float_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+  ArenaVector<LiveInterval*> int_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+  // The set of parent intervals already handled.
+  ArenaSet<LiveInterval*> seen(allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+  // Find nodes that need spill slots.
+  for (InterferenceNode* node : nodes) {
+    if (!node->NeedsSpillSlot()) {
+      continue;
+    }
+
+    LiveInterval* parent = node->GetInterval()->GetParent();
+    if (seen.find(parent) != seen.end()) {
+      // We've already handled this interval.
+      // This can happen if multiple siblings of the same interval request a stack slot.
+      continue;
+    }
+    seen.insert(parent);
+
+    HInstruction* defined_by = parent->GetDefinedBy();
+    if (parent->HasSpillSlot()) {
+      // We already have a spill slot for this value that we can reuse.
+    } else if (defined_by->IsParameterValue()) {
+      // Parameters already have a stack slot.
+      parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
+    } else if (defined_by->IsCurrentMethod()) {
+      // The current method is always at stack slot 0.
+      parent->SetSpillSlot(0);
+    } else if (defined_by->IsConstant()) {
+      // Constants don't need a spill slot.
+    } else {
+      // We need to find a spill slot for this interval. Place it in the correct
+      // worklist to be processed later.
+      switch (node->GetInterval()->GetType()) {
+        case Primitive::kPrimDouble:
+          double_intervals.push_back(parent);
+          break;
+        case Primitive::kPrimLong:
+          long_intervals.push_back(parent);
+          break;
+        case Primitive::kPrimFloat:
+          float_intervals.push_back(parent);
+          break;
+        case Primitive::kPrimNot:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+        case Primitive::kPrimByte:
+        case Primitive::kPrimBoolean:
+        case Primitive::kPrimShort:
+          int_intervals.push_back(parent);
+          break;
+        case Primitive::kPrimVoid:
+          LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType();
+          UNREACHABLE();
+      }
+    }
+  }
+
+  // Color spill slots for each value type.
+  ColorSpillSlots(&double_intervals, &num_double_spill_slots_);
+  ColorSpillSlots(&long_intervals, &num_long_spill_slots_);
+  ColorSpillSlots(&float_intervals, &num_float_spill_slots_);
+  ColorSpillSlots(&int_intervals, &num_int_spill_slots_);
+}
+
+void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* intervals,
+                                                  size_t* num_stack_slots_used) {
+  // We cannot use the original interference graph here because spill slots are assigned to
+  // all of the siblings of an interval, whereas an interference node represents only a single
+  // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints
+  // by position, and assigning the lowest spill slot available when we encounter an interval
+  // beginning. We ignore lifetime holes for simplicity.
+  ArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
+      allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+  for (auto it = intervals->begin(), e = intervals->end(); it != e; ++it) {
+    LiveInterval* parent_interval = *it;
+    DCHECK(parent_interval->IsParent());
+    DCHECK(!parent_interval->HasSpillSlot());
+    size_t start = parent_interval->GetStart();
+    size_t end = parent_interval->GetLastSibling()->GetEnd();
+    DCHECK_LT(start, end);
+    interval_endpoints.push_back(std::make_tuple(start, true, parent_interval));
+    interval_endpoints.push_back(std::make_tuple(end, false, parent_interval));
+  }
+
+  // Sort by position.
+  // We explicitly ignore the third entry of each tuple (the interval pointer) in order
+  // to maintain determinism.
+  std::sort(interval_endpoints.begin(), interval_endpoints.end(),
+            [] (const std::tuple<size_t, bool, LiveInterval*>& lhs,
+                const std::tuple<size_t, bool, LiveInterval*>& rhs) {
+    return std::tie(std::get<0>(lhs), std::get<1>(lhs))
+         < std::tie(std::get<0>(rhs), std::get<1>(rhs));
+  });
+
+  ArenaBitVector taken(allocator_, 0, true);
+  for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) {
+    // Extract information from the current tuple.
+    LiveInterval* parent_interval;
+    bool is_interval_beginning;
+    size_t position;
+    std::tie(position, is_interval_beginning, parent_interval) = *it;
+
+    bool needs_two_slots = parent_interval->NeedsTwoSpillSlots();
+
+    if (is_interval_beginning) {
+      DCHECK(!parent_interval->HasSpillSlot());
+      DCHECK_EQ(position, parent_interval->GetStart());
+
+      // Find a free stack slot.
+      size_t slot = 0;
+      for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) {
+        // Skip taken slots.
+      }
+      parent_interval->SetSpillSlot(slot);
+
+      *num_stack_slots_used = std::max(*num_stack_slots_used,
+                                       needs_two_slots ? slot + 1 : slot + 2);
+      if (needs_two_slots && *num_stack_slots_used % 2 != 0) {
+        // The parallel move resolver requires that there be an even number of spill slots
+        // allocated for pair value types.
+        ++(*num_stack_slots_used);
+      }
+
+      taken.SetBit(slot);
+      if (needs_two_slots) {
+        taken.SetBit(slot + 1);
+      }
+    } else {
+      DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
+      DCHECK(parent_interval->HasSpillSlot());
+
+      // Free up the stack slot used by this interval.
+      size_t slot = parent_interval->GetSpillSlot();
+      DCHECK(taken.IsBitSet(slot));
+      DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1));
+      taken.ClearBit(slot);
+      if (needs_two_slots) {
+        taken.ClearBit(slot + 1);
+      }
+    }
+  }
+  DCHECK_EQ(taken.NumSetBits(), 0u);
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h
new file mode 100644
index 0000000..ed12561
--- /dev/null
+++ b/compiler/optimizing/register_allocator_graph_color.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
+#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
+
+#include "arch/instruction_set.h"
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
+#include "base/macros.h"
+#include "primitive.h"
+#include "register_allocator.h"
+
+namespace art {
+
+class CodeGenerator;
+class HBasicBlock;
+class HGraph;
+class HInstruction;
+class HParallelMove;
+class Location;
+class SsaLivenessAnalysis;
+class InterferenceNode;
+struct CoalesceOpportunity;
+enum class CoalesceKind;
+
+/**
+ * A graph coloring register allocator.
+ *
+ * The algorithm proceeds as follows:
+ * (1) Build an interference graph, where nodes represent live intervals, and edges represent
+ *     interferences between two intervals. Coloring this graph with k colors is isomorphic to
+ *     finding a valid register assignment with k registers.
+ * (2) To color the graph, first prune all nodes with degree less than k, since these nodes are
+ *     guaranteed a color. (No matter how we color their adjacent nodes, we can give them a
+ *     different color.) As we prune nodes from the graph, more nodes may drop below degree k,
+ *     enabling further pruning. The key is to maintain the pruning order in a stack, so that we
+ *     can color the nodes in the reverse order.
+ *     When there are no more nodes with degree less than k, we start pruning alternate nodes based
+ *     on heuristics. Since these nodes are not guaranteed a color, we are careful to
+ *     prioritize nodes that require a register. We also prioritize short intervals, because
+ *     short intervals cannot be split very much if coloring fails (see below). "Prioritizing"
+ *     a node amounts to pruning it later, since it will have fewer interferences if we prune other
+ *     nodes first.
+ * (3) We color nodes in the reverse order in which we pruned them. If we cannot assign
+ *     a node a color, we do one of two things:
+ *     - If the node requires a register, we consider the current coloring attempt a failure.
+ *       However, we split the node's live interval in order to make the interference graph
+ *       sparser, so that future coloring attempts may succeed.
+ *     - If the node does not require a register, we simply assign it a location on the stack.
+ *
+ * If iterative move coalescing is enabled, the algorithm also attempts to conservatively
+ * combine nodes in the graph that would prefer to have the same color. (For example, the output
+ * of a phi instruction would prefer to have the same register as at least one of its inputs.)
+ * There are several additional steps involved with this:
+ * - We look for coalesce opportunities by examining each live interval, a step similar to that
+ *   used by linear scan when looking for register hints.
+ * - When pruning the graph, we maintain a worklist of coalesce opportunities, as well as a worklist
+ *   of low degree nodes that have associated coalesce opportunities. Only when we run out of
+ *   coalesce opportunities do we start pruning coalesce-associated nodes.
+ * - When pruning a node, if any nodes transition from high degree to low degree, we add
+ *   associated coalesce opportunities to the worklist, since these opportunities may now succeed.
+ * - Whether two nodes can be combined is decided by two different heuristics--one used when
+ *   coalescing uncolored nodes, and one used for coalescing an uncolored node with a colored node.
+ *   It is vital that we only combine two nodes if the node that remains is guaranteed to receive
+ *   a color. This is because additionally spilling is more costly than failing to coalesce.
+ * - Even if nodes are not coalesced while pruning, we keep the coalesce opportunities around
+ *   to be used as last-chance register hints when coloring. If nothing else, we try to use
+ *   caller-save registers before callee-save registers.
+ *
+ * A good reference for graph coloring register allocation is
+ * "Modern Compiler Implementation in Java" (Andrew W. Appel, 2nd Edition).
+ */
+class RegisterAllocatorGraphColor : public RegisterAllocator {
+ public:
+  RegisterAllocatorGraphColor(ArenaAllocator* allocator,
+                              CodeGenerator* codegen,
+                              const SsaLivenessAnalysis& analysis,
+                              bool iterative_move_coalescing = true);
+  ~RegisterAllocatorGraphColor() OVERRIDE {}
+
+  void AllocateRegisters() OVERRIDE;
+
+  bool Validate(bool log_fatal_on_failure);
+
+ private:
+  // Collect all intervals and prepare for register allocation.
+  void ProcessInstructions();
+  void ProcessInstruction(HInstruction* instruction);
+
+  // If any inputs require specific registers, block those registers
+  // at the position of this instruction.
+  void CheckForFixedInputs(HInstruction* instruction);
+
+  // If the output of an instruction requires a specific register, split
+  // the interval and assign the register to the first part.
+  void CheckForFixedOutput(HInstruction* instruction);
+
+  // Add all applicable safepoints to a live interval.
+  // Currently depends on instruction processing order.
+  void AddSafepointsFor(HInstruction* instruction);
+
+  // Collect all live intervals associated with the temporary locations
+  // needed by an instruction.
+  void CheckForTempLiveIntervals(HInstruction* instruction);
+
+  // If a safe point is needed, add a synthesized interval to later record
+  // the number of live registers at this point.
+  void CheckForSafepoint(HInstruction* instruction);
+
+  // Split an interval, but only if `position` is inside of `interval`.
+  // Return either the new interval, or the original interval if not split.
+  static LiveInterval* TrySplit(LiveInterval* interval, size_t position);
+
+  // To ensure every graph can be colored, split live intervals
+  // at their register defs and uses. This creates short intervals with low
+  // degree in the interference graph, which are prioritized during graph
+  // coloring.
+  void SplitAtRegisterUses(LiveInterval* interval);
+
+  // If the given instruction is a catch phi, give it a spill slot.
+  void AllocateSpillSlotForCatchPhi(HInstruction* instruction);
+
+  // Ensure that the given register cannot be allocated for a given range.
+  void BlockRegister(Location location, size_t start, size_t end);
+  void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
+
+  bool IsCallerSave(size_t reg, bool processing_core_regs);
+
+  // Return the maximum number of registers live at safepoints,
+  // based on the outgoing interference edges of safepoint nodes.
+  size_t ComputeMaxSafepointLiveRegisters(const ArenaVector<InterferenceNode*>& safepoints);
+
+  // Assigns stack slots to a list of intervals, ensuring that interfering intervals are not
+  // assigned the same stack slot.
+  void ColorSpillSlots(ArenaVector<LiveInterval*>* nodes,
+                       size_t* num_stack_slots_used);
+
+  // Provide stack slots to nodes that need them.
+  void AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes);
+
+  // Whether iterative move coalescing should be performed. Iterative move coalescing
+  // improves code quality, but increases compile time.
+  const bool iterative_move_coalescing_;
+
+  // Live intervals, split by kind (core and floating point).
+  // These should not contain high intervals, as those are represented by
+  // the corresponding low interval throughout register allocation.
+  ArenaVector<LiveInterval*> core_intervals_;
+  ArenaVector<LiveInterval*> fp_intervals_;
+
+  // Intervals for temporaries, saved for special handling in the resolution phase.
+  ArenaVector<LiveInterval*> temp_intervals_;
+
+  // Safepoints, saved for special handling while processing instructions.
+  ArenaVector<HInstruction*> safepoints_;
+
+  // Interference nodes representing specific registers. These are "pre-colored" nodes
+  // in the interference graph.
+  ArenaVector<InterferenceNode*> physical_core_nodes_;
+  ArenaVector<InterferenceNode*> physical_fp_nodes_;
+
+  // Allocated stack slot counters.
+  size_t num_int_spill_slots_;
+  size_t num_double_spill_slots_;
+  size_t num_float_spill_slots_;
+  size_t num_long_spill_slots_;
+  size_t catch_phi_spill_slot_counter_;
+
+  // Number of stack slots needed for the pointer to the current method.
+  // This is 1 for 32-bit architectures, and 2 for 64-bit architectures.
+  const size_t reserved_art_method_slots_;
+
+  // Number of stack slots needed for outgoing arguments.
+  const size_t reserved_out_slots_;
+
+  // The number of globally blocked core and floating point registers, such as the stack pointer.
+  size_t number_of_globally_blocked_core_regs_;
+  size_t number_of_globally_blocked_fp_regs_;
+
+  // The maximum number of registers live at safe points. Needed by the code generator.
+  size_t max_safepoint_live_core_regs_;
+  size_t max_safepoint_live_fp_regs_;
+
+  friend class ColoringIteration;
+
+  DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorGraphColor);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
new file mode 100644
index 0000000..768ed2d
--- /dev/null
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -0,0 +1,1225 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "register_allocator_linear_scan.h"
+
+#include <iostream>
+#include <sstream>
+
+#include "base/bit_vector-inl.h"
+#include "base/enums.h"
+#include "code_generator.h"
+#include "register_allocation_resolver.h"
+#include "ssa_liveness_analysis.h"
+
+namespace art {
+
+static constexpr size_t kMaxLifetimePosition = -1;
+static constexpr size_t kDefaultNumberOfSpillSlots = 4;
+
+// For simplicity, we implement register pairs as (reg, reg + 1).
+// Note that this is a requirement for double registers on ARM, since we
+// allocate SRegister.
+static int GetHighForLowRegister(int reg) { return reg + 1; }
+static bool IsLowRegister(int reg) { return (reg & 1) == 0; }
+static bool IsLowOfUnalignedPairInterval(LiveInterval* low) {
+  return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister();
+}
+
+RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ArenaAllocator* allocator,
+                                                         CodeGenerator* codegen,
+                                                         const SsaLivenessAnalysis& liveness)
+      : RegisterAllocator(allocator, codegen, liveness),
+        unhandled_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        unhandled_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        unhandled_(nullptr),
+        handled_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        active_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        float_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        catch_phi_spill_slots_(0),
+        safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        processing_core_registers_(false),
+        number_of_registers_(-1),
+        registers_array_(nullptr),
+        blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
+        blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
+        reserved_out_slots_(0),
+        maximum_number_of_live_core_registers_(0),
+        maximum_number_of_live_fp_registers_(0) {
+  temp_intervals_.reserve(4);
+  int_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+  long_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+  float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+  double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+
+  codegen->SetupBlockedRegisters();
+  physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
+  physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
+  // Always reserve for the current method and the graph's max out registers.
+  // TODO: compute it instead.
+  // ArtMethod* takes 2 vregs for 64 bits.
+  size_t ptr_size = static_cast<size_t>(InstructionSetPointerSize(codegen->GetInstructionSet()));
+  reserved_out_slots_ = ptr_size / kVRegSize + codegen->GetGraph()->GetMaximumNumberOfOutVRegs();
+}
+
+static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) {
+  if (interval == nullptr) return false;
+  bool is_core_register = (interval->GetType() != Primitive::kPrimDouble)
+      && (interval->GetType() != Primitive::kPrimFloat);
+  return processing_core_registers == is_core_register;
+}
+
+void RegisterAllocatorLinearScan::AllocateRegisters() {
+  AllocateRegistersInternal();
+  RegisterAllocationResolver(allocator_, codegen_, liveness_)
+      .Resolve(maximum_number_of_live_core_registers_,
+               maximum_number_of_live_fp_registers_,
+               reserved_out_slots_,
+               int_spill_slots_.size(),
+               long_spill_slots_.size(),
+               float_spill_slots_.size(),
+               double_spill_slots_.size(),
+               catch_phi_spill_slots_,
+               temp_intervals_);
+
+  if (kIsDebugBuild) {
+    processing_core_registers_ = true;
+    ValidateInternal(true);
+    processing_core_registers_ = false;
+    ValidateInternal(true);
+    // Check that the linear order is still correct with regards to lifetime positions.
+    // Since only parallel moves have been inserted during the register allocation,
+    // these checks are mostly for making sure these moves have been added correctly.
+    size_t current_liveness = 0;
+    for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+      HBasicBlock* block = it.Current();
+      for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+        HInstruction* instruction = inst_it.Current();
+        DCHECK_LE(current_liveness, instruction->GetLifetimePosition());
+        current_liveness = instruction->GetLifetimePosition();
+      }
+      for (HInstructionIterator inst_it(block->GetInstructions());
+           !inst_it.Done();
+           inst_it.Advance()) {
+        HInstruction* instruction = inst_it.Current();
+        DCHECK_LE(current_liveness, instruction->GetLifetimePosition()) << instruction->DebugName();
+        current_liveness = instruction->GetLifetimePosition();
+      }
+    }
+  }
+}
+
+void RegisterAllocatorLinearScan::BlockRegister(Location location, size_t start, size_t end) {
+  int reg = location.reg();
+  DCHECK(location.IsRegister() || location.IsFpuRegister());
+  LiveInterval* interval = location.IsRegister()
+      ? physical_core_register_intervals_[reg]
+      : physical_fp_register_intervals_[reg];
+  Primitive::Type type = location.IsRegister()
+      ? Primitive::kPrimInt
+      : Primitive::kPrimFloat;
+  if (interval == nullptr) {
+    interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
+    if (location.IsRegister()) {
+      physical_core_register_intervals_[reg] = interval;
+    } else {
+      physical_fp_register_intervals_[reg] = interval;
+    }
+  }
+  DCHECK(interval->GetRegister() == reg);
+  interval->AddRange(start, end);
+}
+
+void RegisterAllocatorLinearScan::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
+  for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
+    if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
+      BlockRegister(Location::RegisterLocation(i), start, end);
+    }
+  }
+  for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+    if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
+      BlockRegister(Location::FpuRegisterLocation(i), start, end);
+    }
+  }
+}
+
+void RegisterAllocatorLinearScan::AllocateRegistersInternal() {
+  // Iterate post-order, to ensure the list is sorted, and the last added interval
+  // is the one with the lowest start position.
+  for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done();
+         back_it.Advance()) {
+      ProcessInstruction(back_it.Current());
+    }
+    for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+      ProcessInstruction(inst_it.Current());
+    }
+
+    if (block->IsCatchBlock() ||
+        (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
+      // By blocking all registers at the top of each catch block or irreducible loop, we force
+      // intervals belonging to the live-in set of the catch/header block to be spilled.
+      // TODO(ngeoffray): Phis in this block could be allocated in register.
+      size_t position = block->GetLifetimeStart();
+      BlockRegisters(position, position + 1);
+    }
+  }
+
+  number_of_registers_ = codegen_->GetNumberOfCoreRegisters();
+  registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
+                                                    kArenaAllocRegisterAllocator);
+  processing_core_registers_ = true;
+  unhandled_ = &unhandled_core_intervals_;
+  for (LiveInterval* fixed : physical_core_register_intervals_) {
+    if (fixed != nullptr) {
+      // Fixed interval is added to inactive_ instead of unhandled_.
+      // It's also the only type of inactive interval whose start position
+      // can be after the current interval during linear scan.
+      // Fixed interval is never split and never moves to unhandled_.
+      inactive_.push_back(fixed);
+    }
+  }
+  LinearScan();
+
+  inactive_.clear();
+  active_.clear();
+  handled_.clear();
+
+  number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters();
+  registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
+                                                    kArenaAllocRegisterAllocator);
+  processing_core_registers_ = false;
+  unhandled_ = &unhandled_fp_intervals_;
+  for (LiveInterval* fixed : physical_fp_register_intervals_) {
+    if (fixed != nullptr) {
+      // Fixed interval is added to inactive_ instead of unhandled_.
+      // It's also the only type of inactive interval whose start position
+      // can be after the current interval during linear scan.
+      // Fixed interval is never split and never moves to unhandled_.
+      inactive_.push_back(fixed);
+    }
+  }
+  LinearScan();
+}
+
+void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  size_t position = instruction->GetLifetimePosition();
+
+  if (locations == nullptr) return;
+
+  // Create synthesized intervals for temporaries.
+  for (size_t i = 0; i < locations->GetTempCount(); ++i) {
+    Location temp = locations->GetTemp(i);
+    if (temp.IsRegister() || temp.IsFpuRegister()) {
+      BlockRegister(temp, position, position + 1);
+      // Ensure that an explicit temporary register is marked as being allocated.
+      codegen_->AddAllocatedRegister(temp);
+    } else {
+      DCHECK(temp.IsUnallocated());
+      switch (temp.GetPolicy()) {
+        case Location::kRequiresRegister: {
+          LiveInterval* interval =
+              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
+          temp_intervals_.push_back(interval);
+          interval->AddTempUse(instruction, i);
+          unhandled_core_intervals_.push_back(interval);
+          break;
+        }
+
+        case Location::kRequiresFpuRegister: {
+          LiveInterval* interval =
+              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
+          temp_intervals_.push_back(interval);
+          interval->AddTempUse(instruction, i);
+          if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+            interval->AddHighInterval(/* is_temp */ true);
+            LiveInterval* high = interval->GetHighInterval();
+            temp_intervals_.push_back(high);
+            unhandled_fp_intervals_.push_back(high);
+          }
+          unhandled_fp_intervals_.push_back(interval);
+          break;
+        }
+
+        default:
+          LOG(FATAL) << "Unexpected policy for temporary location "
+                     << temp.GetPolicy();
+      }
+    }
+  }
+
+  bool core_register = (instruction->GetType() != Primitive::kPrimDouble)
+      && (instruction->GetType() != Primitive::kPrimFloat);
+
+  if (locations->NeedsSafepoint()) {
+    if (codegen_->IsLeafMethod()) {
+      // TODO: We do this here because we do not want the suspend check to artificially
+      // create live registers. We should find another place, but this is currently the
+      // simplest.
+      DCHECK(instruction->IsSuspendCheckEntry());
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    }
+    safepoints_.push_back(instruction);
+    if (locations->OnlyCallsOnSlowPath()) {
+      // We add a synthesized range at this position to record the live registers
+      // at this position. Ideally, we could just update the safepoints when locations
+      // are updated, but we currently need to know the full stack size before updating
+      // locations (because of parameters and the fact that we don't have a frame pointer).
+      // And knowing the full stack size requires to know the maximum number of live
+      // registers at calls in slow paths.
+      // By adding the following interval in the algorithm, we can compute this
+      // maximum before updating locations.
+      LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
+      interval->AddRange(position, position + 1);
+      AddSorted(&unhandled_core_intervals_, interval);
+      AddSorted(&unhandled_fp_intervals_, interval);
+    }
+  }
+
+  if (locations->WillCall()) {
+    BlockRegisters(position, position + 1, /* caller_save_only */ true);
+  }
+
+  for (size_t i = 0; i < locations->GetInputCount(); ++i) {
+    Location input = locations->InAt(i);
+    if (input.IsRegister() || input.IsFpuRegister()) {
+      BlockRegister(input, position, position + 1);
+    } else if (input.IsPair()) {
+      BlockRegister(input.ToLow(), position, position + 1);
+      BlockRegister(input.ToHigh(), position, position + 1);
+    }
+  }
+
+  LiveInterval* current = instruction->GetLiveInterval();
+  if (current == nullptr) return;
+
+  ArenaVector<LiveInterval*>& unhandled = core_register
+      ? unhandled_core_intervals_
+      : unhandled_fp_intervals_;
+
+  DCHECK(unhandled.empty() || current->StartsBeforeOrAt(unhandled.back()));
+
+  if (codegen_->NeedsTwoRegisters(current->GetType())) {
+    current->AddHighInterval();
+  }
+
+  for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
+    HInstruction* safepoint = safepoints_[safepoint_index - 1u];
+    size_t safepoint_position = safepoint->GetLifetimePosition();
+
+    // Test that safepoints are ordered in the optimal way.
+    DCHECK(safepoint_index == safepoints_.size() ||
+           safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
+
+    if (safepoint_position == current->GetStart()) {
+      // The safepoint is for this instruction, so the location of the instruction
+      // does not need to be saved.
+      DCHECK_EQ(safepoint_index, safepoints_.size());
+      DCHECK_EQ(safepoint, instruction);
+      continue;
+    } else if (current->IsDeadAt(safepoint_position)) {
+      break;
+    } else if (!current->Covers(safepoint_position)) {
+      // Hole in the interval.
+      continue;
+    }
+    current->AddSafepoint(safepoint);
+  }
+  current->ResetSearchCache();
+
+  // Some instructions define their output in fixed register/stack slot. We need
+  // to ensure we know these locations before doing register allocation. For a
+  // given register, we create an interval that covers these locations. The register
+  // will be unavailable at these locations when trying to allocate one for an
+  // interval.
+  //
+  // The backwards walking ensures the ranges are ordered on increasing start positions.
+  Location output = locations->Out();
+  if (output.IsUnallocated() && output.GetPolicy() == Location::kSameAsFirstInput) {
+    Location first = locations->InAt(0);
+    if (first.IsRegister() || first.IsFpuRegister()) {
+      current->SetFrom(position + 1);
+      current->SetRegister(first.reg());
+    } else if (first.IsPair()) {
+      current->SetFrom(position + 1);
+      current->SetRegister(first.low());
+      LiveInterval* high = current->GetHighInterval();
+      high->SetRegister(first.high());
+      high->SetFrom(position + 1);
+    }
+  } else if (output.IsRegister() || output.IsFpuRegister()) {
+    // Shift the interval's start by one to account for the blocked register.
+    current->SetFrom(position + 1);
+    current->SetRegister(output.reg());
+    BlockRegister(output, position, position + 1);
+  } else if (output.IsPair()) {
+    current->SetFrom(position + 1);
+    current->SetRegister(output.low());
+    LiveInterval* high = current->GetHighInterval();
+    high->SetRegister(output.high());
+    high->SetFrom(position + 1);
+    BlockRegister(output.ToLow(), position, position + 1);
+    BlockRegister(output.ToHigh(), position, position + 1);
+  } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
+    current->SetSpillSlot(output.GetStackIndex());
+  } else {
+    DCHECK(output.IsUnallocated() || output.IsConstant());
+  }
+
+  if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+    AllocateSpillSlotForCatchPhi(instruction->AsPhi());
+  }
+
+  // If needed, add interval to the list of unhandled intervals.
+  if (current->HasSpillSlot() || instruction->IsConstant()) {
+    // Split just before first register use.
+    size_t first_register_use = current->FirstRegisterUse();
+    if (first_register_use != kNoLifetime) {
+      LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
+      // Don't add directly to `unhandled`, it needs to be sorted and the start
+      // of this new interval might be after intervals already in the list.
+      AddSorted(&unhandled, split);
+    } else {
+      // Nothing to do, we won't allocate a register for this value.
+    }
+  } else {
+    // Don't add directly to `unhandled`, temp or safepoint intervals
+    // for this instruction may have been added, and those can be
+    // processed first.
+    AddSorted(&unhandled, current);
+  }
+}
+
+class AllRangesIterator : public ValueObject {
+ public:
+  explicit AllRangesIterator(LiveInterval* interval)
+      : current_interval_(interval),
+        current_range_(interval->GetFirstRange()) {}
+
+  bool Done() const { return current_interval_ == nullptr; }
+  LiveRange* CurrentRange() const { return current_range_; }
+  LiveInterval* CurrentInterval() const { return current_interval_; }
+
+  void Advance() {
+    current_range_ = current_range_->GetNext();
+    if (current_range_ == nullptr) {
+      current_interval_ = current_interval_->GetNextSibling();
+      if (current_interval_ != nullptr) {
+        current_range_ = current_interval_->GetFirstRange();
+      }
+    }
+  }
+
+ private:
+  LiveInterval* current_interval_;
+  LiveRange* current_range_;
+
+  DISALLOW_COPY_AND_ASSIGN(AllRangesIterator);
+};
+
+bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) const {
+  // To simplify unit testing, we eagerly create the array of intervals, and
+  // call the helper method.
+  ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
+  for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
+    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+    if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) {
+      intervals.push_back(instruction->GetLiveInterval());
+    }
+  }
+
+  const ArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_
+      ? &physical_core_register_intervals_
+      : &physical_fp_register_intervals_;
+  for (LiveInterval* fixed : *physical_register_intervals) {
+    if (fixed != nullptr) {
+      intervals.push_back(fixed);
+    }
+  }
+
+  for (LiveInterval* temp : temp_intervals_) {
+    if (ShouldProcess(processing_core_registers_, temp)) {
+      intervals.push_back(temp);
+    }
+  }
+
+  return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_,
+                           allocator_, processing_core_registers_, log_fatal_on_failure);
+}
+
+void RegisterAllocatorLinearScan::DumpInterval(std::ostream& stream, LiveInterval* interval) const {
+  interval->Dump(stream);
+  stream << ": ";
+  if (interval->HasRegister()) {
+    if (interval->IsFloatingPoint()) {
+      codegen_->DumpFloatingPointRegister(stream, interval->GetRegister());
+    } else {
+      codegen_->DumpCoreRegister(stream, interval->GetRegister());
+    }
+  } else {
+    stream << "spilled";
+  }
+  stream << std::endl;
+}
+
+void RegisterAllocatorLinearScan::DumpAllIntervals(std::ostream& stream) const {
+  stream << "inactive: " << std::endl;
+  for (LiveInterval* inactive_interval : inactive_) {
+    DumpInterval(stream, inactive_interval);
+  }
+  stream << "active: " << std::endl;
+  for (LiveInterval* active_interval : active_) {
+    DumpInterval(stream, active_interval);
+  }
+  stream << "unhandled: " << std::endl;
+  auto unhandled = (unhandled_ != nullptr) ?
+      unhandled_ : &unhandled_core_intervals_;
+  for (LiveInterval* unhandled_interval : *unhandled) {
+    DumpInterval(stream, unhandled_interval);
+  }
+  stream << "handled: " << std::endl;
+  for (LiveInterval* handled_interval : handled_) {
+    DumpInterval(stream, handled_interval);
+  }
+}
+
+// By the book implementation of a linear scan register allocator.
+void RegisterAllocatorLinearScan::LinearScan() {
+  while (!unhandled_->empty()) {
+    // (1) Remove interval with the lowest start position from unhandled.
+    LiveInterval* current = unhandled_->back();
+    unhandled_->pop_back();
+
+    // Make sure the interval is an expected state.
+    DCHECK(!current->IsFixed() && !current->HasSpillSlot());
+    // Make sure we are going in the right order.
+    DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() >= current->GetStart());
+    // Make sure a low interval is always with a high.
+    DCHECK(!current->IsLowInterval() || unhandled_->back()->IsHighInterval());
+    // Make sure a high interval is always with a low.
+    DCHECK(current->IsLowInterval() ||
+           unhandled_->empty() ||
+           !unhandled_->back()->IsHighInterval());
+
+    size_t position = current->GetStart();
+
+    // Remember the inactive_ size here since the ones moved to inactive_ from
+    // active_ below shouldn't need to be re-checked.
+    size_t inactive_intervals_to_handle = inactive_.size();
+
+    // (2) Remove currently active intervals that are dead at this position.
+    //     Move active intervals that have a lifetime hole at this position
+    //     to inactive.
+    auto active_kept_end = std::remove_if(
+        active_.begin(),
+        active_.end(),
+        [this, position](LiveInterval* interval) {
+          if (interval->IsDeadAt(position)) {
+            handled_.push_back(interval);
+            return true;
+          } else if (!interval->Covers(position)) {
+            inactive_.push_back(interval);
+            return true;
+          } else {
+            return false;  // Keep this interval.
+          }
+        });
+    active_.erase(active_kept_end, active_.end());
+
+    // (3) Remove currently inactive intervals that are dead at this position.
+    //     Move inactive intervals that cover this position to active.
+    auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle;
+    auto inactive_kept_end = std::remove_if(
+        inactive_.begin(),
+        inactive_to_handle_end,
+        [this, position](LiveInterval* interval) {
+          DCHECK(interval->GetStart() < position || interval->IsFixed());
+          if (interval->IsDeadAt(position)) {
+            handled_.push_back(interval);
+            return true;
+          } else if (interval->Covers(position)) {
+            active_.push_back(interval);
+            return true;
+          } else {
+            return false;  // Keep this interval.
+          }
+        });
+    inactive_.erase(inactive_kept_end, inactive_to_handle_end);
+
+    if (current->IsSlowPathSafepoint()) {
+      // Synthesized interval to record the maximum number of live registers
+      // at safepoints. No need to allocate a register for it.
+      if (processing_core_registers_) {
+        maximum_number_of_live_core_registers_ =
+          std::max(maximum_number_of_live_core_registers_, active_.size());
+      } else {
+        maximum_number_of_live_fp_registers_ =
+          std::max(maximum_number_of_live_fp_registers_, active_.size());
+      }
+      DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart());
+      continue;
+    }
+
+    if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) {
+      DCHECK(!current->HasRegister());
+      // Allocating the low part was unsucessful. The splitted interval for the high part
+      // will be handled next (it is in the `unhandled_` list).
+      continue;
+    }
+
+    // (4) Try to find an available register.
+    bool success = TryAllocateFreeReg(current);
+
+    // (5) If no register could be found, we need to spill.
+    if (!success) {
+      success = AllocateBlockedReg(current);
+    }
+
+    // (6) If the interval had a register allocated, add it to the list of active
+    //     intervals.
+    if (success) {
+      codegen_->AddAllocatedRegister(processing_core_registers_
+          ? Location::RegisterLocation(current->GetRegister())
+          : Location::FpuRegisterLocation(current->GetRegister()));
+      active_.push_back(current);
+      if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) {
+        current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister()));
+      }
+    }
+  }
+}
+
+static void FreeIfNotCoverAt(LiveInterval* interval, size_t position, size_t* free_until) {
+  DCHECK(!interval->IsHighInterval());
+  // Note that the same instruction may occur multiple times in the input list,
+  // so `free_until` may have changed already.
+  // Since `position` is not the current scan position, we need to use CoversSlow.
+  if (interval->IsDeadAt(position)) {
+    // Set the register to be free. Note that inactive intervals might later
+    // update this.
+    free_until[interval->GetRegister()] = kMaxLifetimePosition;
+    if (interval->HasHighInterval()) {
+      DCHECK(interval->GetHighInterval()->IsDeadAt(position));
+      free_until[interval->GetHighInterval()->GetRegister()] = kMaxLifetimePosition;
+    }
+  } else if (!interval->CoversSlow(position)) {
+    // The interval becomes inactive at `defined_by`. We make its register
+    // available only until the next use strictly after `defined_by`.
+    free_until[interval->GetRegister()] = interval->FirstUseAfter(position);
+    if (interval->HasHighInterval()) {
+      DCHECK(!interval->GetHighInterval()->CoversSlow(position));
+      free_until[interval->GetHighInterval()->GetRegister()] = free_until[interval->GetRegister()];
+    }
+  }
+}
+
+// Find a free register. If multiple are found, pick the register that
+// is free the longest.
+bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) {
+  size_t* free_until = registers_array_;
+
+  // First set all registers to be free.
+  for (size_t i = 0; i < number_of_registers_; ++i) {
+    free_until[i] = kMaxLifetimePosition;
+  }
+
+  // For each active interval, set its register to not free.
+  for (LiveInterval* interval : active_) {
+    DCHECK(interval->HasRegister());
+    free_until[interval->GetRegister()] = 0;
+  }
+
+  // An interval that starts an instruction (that is, it is not split), may
+  // re-use the registers used by the inputs of that instruciton, based on the
+  // location summary.
+  HInstruction* defined_by = current->GetDefinedBy();
+  if (defined_by != nullptr && !current->IsSplit()) {
+    LocationSummary* locations = defined_by->GetLocations();
+    if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) {
+      HInputsRef inputs = defined_by->GetInputs();
+      for (size_t i = 0; i < inputs.size(); ++i) {
+        // Take the last interval of the input. It is the location of that interval
+        // that will be used at `defined_by`.
+        LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling();
+        // Note that interval may have not been processed yet.
+        // TODO: Handle non-split intervals last in the work list.
+        if (locations->InAt(i).IsValid()
+            && interval->HasRegister()
+            && interval->SameRegisterKind(*current)) {
+          // The input must be live until the end of `defined_by`, to comply to
+          // the linear scan algorithm. So we use `defined_by`'s end lifetime
+          // position to check whether the input is dead or is inactive after
+          // `defined_by`.
+          DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition()));
+          size_t position = defined_by->GetLifetimePosition() + 1;
+          FreeIfNotCoverAt(interval, position, free_until);
+        }
+      }
+    }
+  }
+
+  // For each inactive interval, set its register to be free until
+  // the next intersection with `current`.
+  for (LiveInterval* inactive : inactive_) {
+    // Temp/Slow-path-safepoint interval has no holes.
+    DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
+    if (!current->IsSplit() && !inactive->IsFixed()) {
+      // Neither current nor inactive are fixed.
+      // Thanks to SSA, a non-split interval starting in a hole of an
+      // inactive interval should never intersect with that inactive interval.
+      // Only if it's not fixed though, because fixed intervals don't come from SSA.
+      DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
+      continue;
+    }
+
+    DCHECK(inactive->HasRegister());
+    if (free_until[inactive->GetRegister()] == 0) {
+      // Already used by some active interval. No need to intersect.
+      continue;
+    }
+    size_t next_intersection = inactive->FirstIntersectionWith(current);
+    if (next_intersection != kNoLifetime) {
+      free_until[inactive->GetRegister()] =
+          std::min(free_until[inactive->GetRegister()], next_intersection);
+    }
+  }
+
+  int reg = kNoRegister;
+  if (current->HasRegister()) {
+    // Some instructions have a fixed register output.
+    reg = current->GetRegister();
+    if (free_until[reg] == 0) {
+      DCHECK(current->IsHighInterval());
+      // AllocateBlockedReg will spill the holder of the register.
+      return false;
+    }
+  } else {
+    DCHECK(!current->IsHighInterval());
+    int hint = current->FindFirstRegisterHint(free_until, liveness_);
+    if ((hint != kNoRegister)
+        // For simplicity, if the hint we are getting for a pair cannot be used,
+        // we are just going to allocate a new pair.
+        && !(current->IsLowInterval() && IsBlocked(GetHighForLowRegister(hint)))) {
+      DCHECK(!IsBlocked(hint));
+      reg = hint;
+    } else if (current->IsLowInterval()) {
+      reg = FindAvailableRegisterPair(free_until, current->GetStart());
+    } else {
+      reg = FindAvailableRegister(free_until, current);
+    }
+  }
+
+  DCHECK_NE(reg, kNoRegister);
+  // If we could not find a register, we need to spill.
+  if (free_until[reg] == 0) {
+    return false;
+  }
+
+  if (current->IsLowInterval()) {
+    // If the high register of this interval is not available, we need to spill.
+    int high_reg = current->GetHighInterval()->GetRegister();
+    if (high_reg == kNoRegister) {
+      high_reg = GetHighForLowRegister(reg);
+    }
+    if (free_until[high_reg] == 0) {
+      return false;
+    }
+  }
+
+  current->SetRegister(reg);
+  if (!current->IsDeadAt(free_until[reg])) {
+    // If the register is only available for a subset of live ranges
+    // covered by `current`, split `current` before the position where
+    // the register is not available anymore.
+    LiveInterval* split = SplitBetween(current, current->GetStart(), free_until[reg]);
+    DCHECK(split != nullptr);
+    AddSorted(unhandled_, split);
+  }
+  return true;
+}
+
+bool RegisterAllocatorLinearScan::IsBlocked(int reg) const {
+  return processing_core_registers_
+      ? blocked_core_registers_[reg]
+      : blocked_fp_registers_[reg];
+}
+
+int RegisterAllocatorLinearScan::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const {
+  int reg = kNoRegister;
+  // Pick the register pair that is used the last.
+  for (size_t i = 0; i < number_of_registers_; ++i) {
+    if (IsBlocked(i)) continue;
+    if (!IsLowRegister(i)) continue;
+    int high_register = GetHighForLowRegister(i);
+    if (IsBlocked(high_register)) continue;
+    int existing_high_register = GetHighForLowRegister(reg);
+    if ((reg == kNoRegister) || (next_use[i] >= next_use[reg]
+                        && next_use[high_register] >= next_use[existing_high_register])) {
+      reg = i;
+      if (next_use[i] == kMaxLifetimePosition
+          && next_use[high_register] == kMaxLifetimePosition) {
+        break;
+      }
+    } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) {
+      // If one of the current register is known to be unavailable, just unconditionally
+      // try a new one.
+      reg = i;
+    }
+  }
+  return reg;
+}
+
+bool RegisterAllocatorLinearScan::IsCallerSaveRegister(int reg) const {
+  return processing_core_registers_
+      ? !codegen_->IsCoreCalleeSaveRegister(reg)
+      : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
+}
+
+int RegisterAllocatorLinearScan::FindAvailableRegister(size_t* next_use, LiveInterval* current) const {
+  // We special case intervals that do not span a safepoint to try to find a caller-save
+  // register if one is available. We iterate from 0 to the number of registers,
+  // so if there are caller-save registers available at the end, we continue the iteration.
+  bool prefers_caller_save = !current->HasWillCallSafepoint();
+  int reg = kNoRegister;
+  for (size_t i = 0; i < number_of_registers_; ++i) {
+    if (IsBlocked(i)) {
+      // Register cannot be used. Continue.
+      continue;
+    }
+
+    // Best case: we found a register fully available.
+    if (next_use[i] == kMaxLifetimePosition) {
+      if (prefers_caller_save && !IsCallerSaveRegister(i)) {
+        // We can get shorter encodings on some platforms by using
+        // small register numbers. So only update the candidate if the previous
+        // one was not available for the whole method.
+        if (reg == kNoRegister || next_use[reg] != kMaxLifetimePosition) {
+          reg = i;
+        }
+        // Continue the iteration in the hope of finding a caller save register.
+        continue;
+      } else {
+        reg = i;
+        // We know the register is good enough. Return it.
+        break;
+      }
+    }
+
+    // If we had no register before, take this one as a reference.
+    if (reg == kNoRegister) {
+      reg = i;
+      continue;
+    }
+
+    // Pick the register that is used the last.
+    if (next_use[i] > next_use[reg]) {
+      reg = i;
+      continue;
+    }
+  }
+  return reg;
+}
+
+// Remove interval and its other half if any. Return iterator to the following element.
+static ArenaVector<LiveInterval*>::iterator RemoveIntervalAndPotentialOtherHalf(
+    ArenaVector<LiveInterval*>* intervals, ArenaVector<LiveInterval*>::iterator pos) {
+  DCHECK(intervals->begin() <= pos && pos < intervals->end());
+  LiveInterval* interval = *pos;
+  if (interval->IsLowInterval()) {
+    DCHECK(pos + 1 < intervals->end());
+    DCHECK_EQ(*(pos + 1), interval->GetHighInterval());
+    return intervals->erase(pos, pos + 2);
+  } else if (interval->IsHighInterval()) {
+    DCHECK(intervals->begin() < pos);
+    DCHECK_EQ(*(pos - 1), interval->GetLowInterval());
+    return intervals->erase(pos - 1, pos + 1);
+  } else {
+    return intervals->erase(pos);
+  }
+}
+
+bool RegisterAllocatorLinearScan::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
+                                                                           size_t first_register_use,
+                                                                           size_t* next_use) {
+  for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
+    LiveInterval* active = *it;
+    DCHECK(active->HasRegister());
+    if (active->IsFixed()) continue;
+    if (active->IsHighInterval()) continue;
+    if (first_register_use > next_use[active->GetRegister()]) continue;
+
+    // Split the first interval found that is either:
+    // 1) A non-pair interval.
+    // 2) A pair interval whose high is not low + 1.
+    // 3) A pair interval whose low is not even.
+    if (!active->IsLowInterval() ||
+        IsLowOfUnalignedPairInterval(active) ||
+        !IsLowRegister(active->GetRegister())) {
+      LiveInterval* split = Split(active, position);
+      if (split != active) {
+        handled_.push_back(active);
+      }
+      RemoveIntervalAndPotentialOtherHalf(&active_, it);
+      AddSorted(unhandled_, split);
+      return true;
+    }
+  }
+  return false;
+}
+
+// Find the register that is used the last, and spill the interval
+// that holds it. If the first use of `current` is after that register
+// we spill `current` instead.
+bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) {
+  size_t first_register_use = current->FirstRegisterUse();
+  if (current->HasRegister()) {
+    DCHECK(current->IsHighInterval());
+    // The low interval has allocated the register for the high interval. In
+    // case the low interval had to split both intervals, we may end up in a
+    // situation where the high interval does not have a register use anymore.
+    // We must still proceed in order to split currently active and inactive
+    // uses of the high interval's register, and put the high interval in the
+    // active set.
+    DCHECK(first_register_use != kNoLifetime || (current->GetNextSibling() != nullptr));
+  } else if (first_register_use == kNoLifetime) {
+    AllocateSpillSlotFor(current);
+    return false;
+  }
+
+  // First set all registers as not being used.
+  size_t* next_use = registers_array_;
+  for (size_t i = 0; i < number_of_registers_; ++i) {
+    next_use[i] = kMaxLifetimePosition;
+  }
+
+  // For each active interval, find the next use of its register after the
+  // start of current.
+  for (LiveInterval* active : active_) {
+    DCHECK(active->HasRegister());
+    if (active->IsFixed()) {
+      next_use[active->GetRegister()] = current->GetStart();
+    } else {
+      size_t use = active->FirstRegisterUseAfter(current->GetStart());
+      if (use != kNoLifetime) {
+        next_use[active->GetRegister()] = use;
+      }
+    }
+  }
+
+  // For each inactive interval, find the next use of its register after the
+  // start of current.
+  for (LiveInterval* inactive : inactive_) {
+    // Temp/Slow-path-safepoint interval has no holes.
+    DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
+    if (!current->IsSplit() && !inactive->IsFixed()) {
+      // Neither current nor inactive are fixed.
+      // Thanks to SSA, a non-split interval starting in a hole of an
+      // inactive interval should never intersect with that inactive interval.
+      // Only if it's not fixed though, because fixed intervals don't come from SSA.
+      DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
+      continue;
+    }
+    DCHECK(inactive->HasRegister());
+    size_t next_intersection = inactive->FirstIntersectionWith(current);
+    if (next_intersection != kNoLifetime) {
+      if (inactive->IsFixed()) {
+        next_use[inactive->GetRegister()] =
+            std::min(next_intersection, next_use[inactive->GetRegister()]);
+      } else {
+        size_t use = inactive->FirstUseAfter(current->GetStart());
+        if (use != kNoLifetime) {
+          next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]);
+        }
+      }
+    }
+  }
+
+  int reg = kNoRegister;
+  bool should_spill = false;
+  if (current->HasRegister()) {
+    DCHECK(current->IsHighInterval());
+    reg = current->GetRegister();
+    // When allocating the low part, we made sure the high register was available.
+    DCHECK_LT(first_register_use, next_use[reg]);
+  } else if (current->IsLowInterval()) {
+    reg = FindAvailableRegisterPair(next_use, first_register_use);
+    // We should spill if both registers are not available.
+    should_spill = (first_register_use >= next_use[reg])
+      || (first_register_use >= next_use[GetHighForLowRegister(reg)]);
+  } else {
+    DCHECK(!current->IsHighInterval());
+    reg = FindAvailableRegister(next_use, current);
+    should_spill = (first_register_use >= next_use[reg]);
+  }
+
+  DCHECK_NE(reg, kNoRegister);
+  if (should_spill) {
+    DCHECK(!current->IsHighInterval());
+    bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1));
+    if (is_allocation_at_use_site) {
+      if (!current->IsLowInterval()) {
+        DumpInterval(std::cerr, current);
+        DumpAllIntervals(std::cerr);
+        // This situation has the potential to infinite loop, so we make it a non-debug CHECK.
+        HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2);
+        CHECK(false) << "There is not enough registers available for "
+          << current->GetParent()->GetDefinedBy()->DebugName() << " "
+          << current->GetParent()->GetDefinedBy()->GetId()
+          << " at " << first_register_use - 1 << " "
+          << (at == nullptr ? "" : at->DebugName());
+      }
+
+      // If we're allocating a register for `current` because the instruction at
+      // that position requires it, but we think we should spill, then there are
+      // non-pair intervals or unaligned pair intervals blocking the allocation.
+      // We split the first interval found, and put ourselves first in the
+      // `unhandled_` list.
+      bool success = TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(),
+                                                              first_register_use,
+                                                              next_use);
+      DCHECK(success);
+      LiveInterval* existing = unhandled_->back();
+      DCHECK(existing->IsHighInterval());
+      DCHECK_EQ(existing->GetLowInterval(), current);
+      unhandled_->push_back(current);
+    } else {
+      // If the first use of that instruction is after the last use of the found
+      // register, we split this interval just before its first register use.
+      AllocateSpillSlotFor(current);
+      LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
+      DCHECK(current != split);
+      AddSorted(unhandled_, split);
+    }
+    return false;
+  } else {
+    // Use this register and spill the active and inactives interval that
+    // have that register.
+    current->SetRegister(reg);
+
+    for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
+      LiveInterval* active = *it;
+      if (active->GetRegister() == reg) {
+        DCHECK(!active->IsFixed());
+        LiveInterval* split = Split(active, current->GetStart());
+        if (split != active) {
+          handled_.push_back(active);
+        }
+        RemoveIntervalAndPotentialOtherHalf(&active_, it);
+        AddSorted(unhandled_, split);
+        break;
+      }
+    }
+
+    // NOTE: Retrieve end() on each iteration because we're removing elements in the loop body.
+    for (auto it = inactive_.begin(); it != inactive_.end(); ) {
+      LiveInterval* inactive = *it;
+      bool erased = false;
+      if (inactive->GetRegister() == reg) {
+        if (!current->IsSplit() && !inactive->IsFixed()) {
+          // Neither current nor inactive are fixed.
+          // Thanks to SSA, a non-split interval starting in a hole of an
+          // inactive interval should never intersect with that inactive interval.
+          // Only if it's not fixed though, because fixed intervals don't come from SSA.
+          DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
+        } else {
+          size_t next_intersection = inactive->FirstIntersectionWith(current);
+          if (next_intersection != kNoLifetime) {
+            if (inactive->IsFixed()) {
+              LiveInterval* split = Split(current, next_intersection);
+              DCHECK_NE(split, current);
+              AddSorted(unhandled_, split);
+            } else {
+              // Split at the start of `current`, which will lead to splitting
+              // at the end of the lifetime hole of `inactive`.
+              LiveInterval* split = Split(inactive, current->GetStart());
+              // If it's inactive, it must start before the current interval.
+              DCHECK_NE(split, inactive);
+              it = RemoveIntervalAndPotentialOtherHalf(&inactive_, it);
+              erased = true;
+              handled_.push_back(inactive);
+              AddSorted(unhandled_, split);
+            }
+          }
+        }
+      }
+      // If we have erased the element, `it` already points to the next element.
+      // Otherwise we need to move to the next element.
+      if (!erased) {
+        ++it;
+      }
+    }
+
+    return true;
+  }
+}
+
+void RegisterAllocatorLinearScan::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) {
+  DCHECK(!interval->IsFixed() && !interval->HasSpillSlot());
+  size_t insert_at = 0;
+  for (size_t i = array->size(); i > 0; --i) {
+    LiveInterval* current = (*array)[i - 1u];
+    // High intervals must be processed right after their low equivalent.
+    if (current->StartsAfter(interval) && !current->IsHighInterval()) {
+      insert_at = i;
+      break;
+    } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) {
+      // Ensure the slow path interval is the last to be processed at its location: we want the
+      // interval to know all live registers at this location.
+      DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current));
+      insert_at = i;
+      break;
+    }
+  }
+
+  // Insert the high interval before the low, to ensure the low is processed before.
+  auto insert_pos = array->begin() + insert_at;
+  if (interval->HasHighInterval()) {
+    array->insert(insert_pos, { interval->GetHighInterval(), interval });
+  } else if (interval->HasLowInterval()) {
+    array->insert(insert_pos, { interval, interval->GetLowInterval() });
+  } else {
+    array->insert(insert_pos, interval);
+  }
+}
+
+void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) {
+  if (interval->IsHighInterval()) {
+    // The low interval already took care of allocating the spill slot.
+    DCHECK(!interval->GetLowInterval()->HasRegister());
+    DCHECK(interval->GetLowInterval()->GetParent()->HasSpillSlot());
+    return;
+  }
+
+  LiveInterval* parent = interval->GetParent();
+
+  // An instruction gets a spill slot for its entire lifetime. If the parent
+  // of this interval already has a spill slot, there is nothing to do.
+  if (parent->HasSpillSlot()) {
+    return;
+  }
+
+  HInstruction* defined_by = parent->GetDefinedBy();
+  DCHECK(!defined_by->IsPhi() || !defined_by->AsPhi()->IsCatchPhi());
+
+  if (defined_by->IsParameterValue()) {
+    // Parameters have their own stack slot.
+    parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
+    return;
+  }
+
+  if (defined_by->IsCurrentMethod()) {
+    parent->SetSpillSlot(0);
+    return;
+  }
+
+  if (defined_by->IsConstant()) {
+    // Constants don't need a spill slot.
+    return;
+  }
+
+  ArenaVector<size_t>* spill_slots = nullptr;
+  switch (interval->GetType()) {
+    case Primitive::kPrimDouble:
+      spill_slots = &double_spill_slots_;
+      break;
+    case Primitive::kPrimLong:
+      spill_slots = &long_spill_slots_;
+      break;
+    case Primitive::kPrimFloat:
+      spill_slots = &float_spill_slots_;
+      break;
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimShort:
+      spill_slots = &int_spill_slots_;
+      break;
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
+  }
+
+  // Find an available spill slot.
+  size_t slot = 0;
+  for (size_t e = spill_slots->size(); slot < e; ++slot) {
+    if ((*spill_slots)[slot] <= parent->GetStart()) {
+      if (!parent->NeedsTwoSpillSlots()) {
+        // One spill slot is sufficient.
+        break;
+      }
+      if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) {
+        // Two spill slots are available.
+        break;
+      }
+    }
+  }
+
+  size_t end = interval->GetLastSibling()->GetEnd();
+  if (parent->NeedsTwoSpillSlots()) {
+    if (slot + 2u > spill_slots->size()) {
+      // We need a new spill slot.
+      spill_slots->resize(slot + 2u, end);
+    }
+    (*spill_slots)[slot] = end;
+    (*spill_slots)[slot + 1] = end;
+  } else {
+    if (slot == spill_slots->size()) {
+      // We need a new spill slot.
+      spill_slots->push_back(end);
+    } else {
+      (*spill_slots)[slot] = end;
+    }
+  }
+
+  // Note that the exact spill slot location will be computed when we resolve,
+  // that is when we know the number of spill slots for each type.
+  parent->SetSpillSlot(slot);
+}
+
+void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) {
+  LiveInterval* interval = phi->GetLiveInterval();
+
+  HInstruction* previous_phi = phi->GetPrevious();
+  DCHECK(previous_phi == nullptr ||
+         previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
+      << "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent.";
+
+  if (phi->IsVRegEquivalentOf(previous_phi)) {
+    // This is an equivalent of the previous phi. We need to assign the same
+    // catch phi slot.
+    DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
+    interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
+  } else {
+    // Allocate a new spill slot for this catch phi.
+    // TODO: Reuse spill slots when intervals of phis from different catch
+    //       blocks do not overlap.
+    interval->SetSpillSlot(catch_phi_spill_slots_);
+    catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h
new file mode 100644
index 0000000..1a643a0
--- /dev/null
+++ b/compiler/optimizing/register_allocator_linear_scan.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_
+#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_
+
+#include "arch/instruction_set.h"
+#include "base/arena_containers.h"
+#include "base/macros.h"
+#include "primitive.h"
+#include "register_allocator.h"
+
+namespace art {
+
+class CodeGenerator;
+class HBasicBlock;
+class HGraph;
+class HInstruction;
+class HParallelMove;
+class HPhi;
+class LiveInterval;
+class Location;
+class SsaLivenessAnalysis;
+
+/**
+ * An implementation of a linear scan register allocator on an `HGraph` with SSA form.
+ */
+class RegisterAllocatorLinearScan : public RegisterAllocator {
+ public:
+  RegisterAllocatorLinearScan(ArenaAllocator* allocator,
+                              CodeGenerator* codegen,
+                              const SsaLivenessAnalysis& analysis);
+  ~RegisterAllocatorLinearScan() OVERRIDE {}
+
+  void AllocateRegisters() OVERRIDE;
+
+  bool Validate(bool log_fatal_on_failure) OVERRIDE {
+    processing_core_registers_ = true;
+    if (!ValidateInternal(log_fatal_on_failure)) {
+      return false;
+    }
+    processing_core_registers_ = false;
+    return ValidateInternal(log_fatal_on_failure);
+  }
+
+  size_t GetNumberOfSpillSlots() const {
+    return int_spill_slots_.size()
+        + long_spill_slots_.size()
+        + float_spill_slots_.size()
+        + double_spill_slots_.size()
+        + catch_phi_spill_slots_;
+  }
+
+ private:
+  // Main methods of the allocator.
+  void LinearScan();
+  bool TryAllocateFreeReg(LiveInterval* interval);
+  bool AllocateBlockedReg(LiveInterval* interval);
+
+  // Add `interval` in the given sorted list.
+  static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval);
+
+  // Returns whether `reg` is blocked by the code generator.
+  bool IsBlocked(int reg) const;
+
+  // Update the interval for the register in `location` to cover [start, end).
+  void BlockRegister(Location location, size_t start, size_t end);
+  void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
+
+  // Allocate a spill slot for the given interval. Should be called in linear
+  // order of interval starting positions.
+  void AllocateSpillSlotFor(LiveInterval* interval);
+
+  // Allocate a spill slot for the given catch phi. Will allocate the same slot
+  // for phis which share the same vreg. Must be called in reverse linear order
+  // of lifetime positions and ascending vreg numbers for correctness.
+  void AllocateSpillSlotForCatchPhi(HPhi* phi);
+
+  // Helper methods.
+  void AllocateRegistersInternal();
+  void ProcessInstruction(HInstruction* instruction);
+  bool ValidateInternal(bool log_fatal_on_failure) const;
+  void DumpInterval(std::ostream& stream, LiveInterval* interval) const;
+  void DumpAllIntervals(std::ostream& stream) const;
+  int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const;
+  int FindAvailableRegister(size_t* next_use, LiveInterval* current) const;
+  bool IsCallerSaveRegister(int reg) const;
+
+  // Try splitting an active non-pair or unaligned pair interval at the given `position`.
+  // Returns whether it was successful at finding such an interval.
+  bool TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
+                                                size_t first_register_use,
+                                                size_t* next_use);
+
+  // List of intervals for core registers that must be processed, ordered by start
+  // position. Last entry is the interval that has the lowest start position.
+  // This list is initially populated before doing the linear scan.
+  ArenaVector<LiveInterval*> unhandled_core_intervals_;
+
+  // List of intervals for floating-point registers. Same comments as above.
+  ArenaVector<LiveInterval*> unhandled_fp_intervals_;
+
+  // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_`
+  // or `unhandled_fp_intervals_`.
+  ArenaVector<LiveInterval*>* unhandled_;
+
+  // List of intervals that have been processed.
+  ArenaVector<LiveInterval*> handled_;
+
+  // List of intervals that are currently active when processing a new live interval.
+  // That is, they have a live range that spans the start of the new interval.
+  ArenaVector<LiveInterval*> active_;
+
+  // List of intervals that are currently inactive when processing a new live interval.
+  // That is, they have a lifetime hole that spans the start of the new interval.
+  ArenaVector<LiveInterval*> inactive_;
+
+  // Fixed intervals for physical registers. Such intervals cover the positions
+  // where an instruction requires a specific register.
+  ArenaVector<LiveInterval*> physical_core_register_intervals_;
+  ArenaVector<LiveInterval*> physical_fp_register_intervals_;
+
+  // Intervals for temporaries. Such intervals cover the positions
+  // where an instruction requires a temporary.
+  ArenaVector<LiveInterval*> temp_intervals_;
+
+  // The spill slots allocated for live intervals. We ensure spill slots
+  // are typed to avoid (1) doing moves and swaps between two different kinds
+  // of registers, and (2) swapping between a single stack slot and a double
+  // stack slot. This simplifies the parallel move resolver.
+  ArenaVector<size_t> int_spill_slots_;
+  ArenaVector<size_t> long_spill_slots_;
+  ArenaVector<size_t> float_spill_slots_;
+  ArenaVector<size_t> double_spill_slots_;
+
+  // Spill slots allocated to catch phis. This category is special-cased because
+  // (1) slots are allocated prior to linear scan and in reverse linear order,
+  // (2) equivalent phis need to share slots despite having different types.
+  size_t catch_phi_spill_slots_;
+
+  // Instructions that need a safepoint.
+  ArenaVector<HInstruction*> safepoints_;
+
+  // True if processing core registers. False if processing floating
+  // point registers.
+  bool processing_core_registers_;
+
+  // Number of registers for the current register kind (core or floating point).
+  size_t number_of_registers_;
+
+  // Temporary array, allocated ahead of time for simplicity.
+  size_t* registers_array_;
+
+  // Blocked registers, as decided by the code generator.
+  bool* const blocked_core_registers_;
+  bool* const blocked_fp_registers_;
+
+  // Slots reserved for out arguments.
+  size_t reserved_out_slots_;
+
+  // The maximum live core registers at safepoints.
+  size_t maximum_number_of_live_core_registers_;
+
+  // The maximum live FP registers at safepoints.
+  size_t maximum_number_of_live_fp_registers_;
+
+  ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
+  ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
+
+  DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorLinearScan);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 080f970..55ea99e 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -25,39 +25,54 @@
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
 #include "ssa_liveness_analysis.h"
 #include "ssa_phi_elimination.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
+using Strategy = RegisterAllocator::Strategy;
+
 // Note: the register allocator tests rely on the fact that constants have live
 // intervals and registers get allocated to them.
 
-static bool Check(const uint16_t* data) {
+class RegisterAllocatorTest : public CommonCompilerTest {
+ protected:
+  // These functions need to access private variables of LocationSummary, so we declare it
+  // as a member of RegisterAllocatorTest, which we make a friend class.
+  static void SameAsFirstInputHint(Strategy strategy);
+  static void ExpectedInRegisterHint(Strategy strategy);
+};
+
+// This macro should include all register allocation strategies that should be tested.
+#define TEST_ALL_STRATEGIES(test_name)\
+TEST_F(RegisterAllocatorTest, test_name##_LinearScan) {\
+  test_name(Strategy::kRegisterAllocatorLinearScan);\
+}\
+TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\
+  test_name(Strategy::kRegisterAllocatorGraphColor);\
+}
+
+static bool Check(const uint16_t* data, Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateGraph(&allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  builder.BuildGraph(*item);
-  graph->TryBuildingSsa();
+  HGraph* graph = CreateCFG(&allocator, data);
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-  register_allocator.AllocateRegisters();
-  return register_allocator.Validate(false);
+  RegisterAllocator* register_allocator =
+      RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+  register_allocator->AllocateRegisters();
+  return register_allocator->Validate(false);
 }
 
 /**
  * Unit testing of RegisterAllocator::ValidateIntervals. Register allocator
  * tests are based on this validation method.
  */
-TEST(RegisterAllocatorTest, ValidateIntervals) {
+TEST_F(RegisterAllocatorTest, ValidateIntervals) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = CreateGraph(&allocator);
@@ -146,7 +161,7 @@
   }
 }
 
-TEST(RegisterAllocatorTest, CFG1) {
+static void CFG1(Strategy strategy) {
   /*
    * Test the following snippet:
    *  return 0;
@@ -163,10 +178,12 @@
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN);
 
-  ASSERT_TRUE(Check(data));
+  ASSERT_TRUE(Check(data, strategy));
 }
 
-TEST(RegisterAllocatorTest, Loop1) {
+TEST_ALL_STRATEGIES(CFG1);
+
+static void Loop1(Strategy strategy) {
   /*
    * Test the following snippet:
    *  int a = 0;
@@ -202,10 +219,12 @@
     Instruction::CONST_4 | 5 << 12 | 1 << 8,
     Instruction::RETURN | 1 << 8);
 
-  ASSERT_TRUE(Check(data));
+  ASSERT_TRUE(Check(data, strategy));
 }
 
-TEST(RegisterAllocatorTest, Loop2) {
+TEST_ALL_STRATEGIES(Loop1);
+
+static void Loop2(Strategy strategy) {
   /*
    * Test the following snippet:
    *  int a = 0;
@@ -251,19 +270,12 @@
     Instruction::ADD_INT, 1 << 8 | 0,
     Instruction::RETURN | 1 << 8);
 
-  ASSERT_TRUE(Check(data));
+  ASSERT_TRUE(Check(data, strategy));
 }
 
-static HGraph* BuildSSAGraph(const uint16_t* data, ArenaAllocator* allocator) {
-  HGraph* graph = CreateGraph(allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  builder.BuildGraph(*item);
-  graph->TryBuildingSsa();
-  return graph;
-}
+TEST_ALL_STRATEGIES(Loop2);
 
-TEST(RegisterAllocatorTest, Loop3) {
+static void Loop3(Strategy strategy) {
   /*
    * Test the following snippet:
    *  int a = 0
@@ -302,15 +314,16 @@
 
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = BuildSSAGraph(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-  register_allocator.AllocateRegisters();
-  ASSERT_TRUE(register_allocator.Validate(false));
+  RegisterAllocator* register_allocator =
+      RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+  register_allocator->AllocateRegisters();
+  ASSERT_TRUE(register_allocator->Validate(false));
 
   HBasicBlock* loop_header = graph->GetBlocks()[2];
   HPhi* phi = loop_header->GetFirstPhi()->AsPhi();
@@ -326,7 +339,9 @@
   ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister());
 }
 
-TEST(RegisterAllocatorTest, FirstRegisterUse) {
+TEST_ALL_STRATEGIES(Loop3);
+
+TEST_F(RegisterAllocatorTest, FirstRegisterUse) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::XOR_INT_LIT8 | 1 << 8, 1 << 8,
@@ -336,7 +351,7 @@
 
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = BuildSSAGraph(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
@@ -366,7 +381,7 @@
   ASSERT_EQ(new_interval->FirstRegisterUse(), last_xor->GetLifetimePosition());
 }
 
-TEST(RegisterAllocatorTest, DeadPhi) {
+static void DeadPhi(Strategy strategy) {
   /* Test for a dead loop phi taking as back-edge input a phi that also has
    * this loop phi as input. Walking backwards in SsaDeadPhiElimination
    * does not solve the problem because the loop phi will be visited last.
@@ -390,38 +405,42 @@
 
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = BuildSSAGraph(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   SsaDeadPhiElimination(graph).Run();
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-  register_allocator.AllocateRegisters();
-  ASSERT_TRUE(register_allocator.Validate(false));
+  RegisterAllocator* register_allocator =
+      RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+  register_allocator->AllocateRegisters();
+  ASSERT_TRUE(register_allocator->Validate(false));
 }
 
+TEST_ALL_STRATEGIES(DeadPhi);
+
 /**
  * Test that the TryAllocateFreeReg method works in the presence of inactive intervals
  * that share the same register. It should split the interval it is currently
  * allocating for at the minimum lifetime position between the two inactive intervals.
+ * This test only applies to the linear scan allocator.
  */
-TEST(RegisterAllocatorTest, FreeUntil) {
+TEST_F(RegisterAllocatorTest, FreeUntil) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN);
 
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = BuildSSAGraph(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   SsaDeadPhiElimination(graph).Run();
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+  RegisterAllocatorLinearScan register_allocator(&allocator, &codegen, liveness);
 
   // Add an artifical range to cover the temps that will be put in the unhandled list.
   LiveInterval* unhandled = graph->GetEntryBlock()->GetFirstInstruction()->GetLiveInterval();
@@ -472,7 +491,7 @@
                                   HInstruction** input2) {
   HGraph* graph = CreateGraph(allocator);
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* parameter = new (allocator) HParameterValue(
@@ -519,15 +538,15 @@
                                               graph->GetDexFile(),
                                               dex_cache,
                                               0);
-*input2 = new (allocator) HInstanceFieldGet(parameter,
-                                            Primitive::kPrimInt,
-                                            MemberOffset(42),
-                                            false,
-                                            kUnknownFieldIndex,
-                                            kUnknownClassDefIndex,
-                                            graph->GetDexFile(),
-                                            dex_cache,
-                                            0);
+  *input2 = new (allocator) HInstanceFieldGet(parameter,
+                                              Primitive::kPrimInt,
+                                              MemberOffset(42),
+                                              false,
+                                              kUnknownFieldIndex,
+                                              kUnknownClassDefIndex,
+                                              graph->GetDexFile(),
+                                              dex_cache,
+                                              0);
   then->AddInstruction(*input1);
   else_->AddInstruction(*input2);
   join->AddInstruction(new (allocator) HExit());
@@ -535,11 +554,11 @@
   (*phi)->AddInput(*input2);
 
   graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
+  graph->AnalyzeLoops();
   return graph;
 }
 
-TEST(RegisterAllocatorTest, PhiHint) {
+static void PhiHint(Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HPhi *phi;
@@ -554,8 +573,9 @@
     liveness.Analyze();
 
     // Check that the register allocator is deterministic.
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 0);
     ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 0);
@@ -573,8 +593,9 @@
     // Set the phi to a specific register, and check that the inputs get allocated
     // the same register.
     phi->GetLocations()->UpdateOut(Location::RegisterLocation(2));
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
     ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -592,8 +613,9 @@
     // Set input1 to a specific register, and check that the phi and other input get allocated
     // the same register.
     input1->GetLocations()->UpdateOut(Location::RegisterLocation(2));
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
     ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -611,8 +633,9 @@
     // Set input2 to a specific register, and check that the phi and other input get allocated
     // the same register.
     input2->GetLocations()->UpdateOut(Location::RegisterLocation(2));
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
     ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -620,11 +643,17 @@
   }
 }
 
+// TODO: Enable this test for graph coloring register allocation when iterative move
+//       coalescing is merged.
+TEST_F(RegisterAllocatorTest, PhiHint_LinearScan) {
+  PhiHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
 static HGraph* BuildFieldReturn(ArenaAllocator* allocator,
                                 HInstruction** field,
                                 HInstruction** ret) {
   HGraph* graph = CreateGraph(allocator);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -658,7 +687,7 @@
   return graph;
 }
 
-TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
+void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *field, *ret;
@@ -671,8 +700,9 @@
     SsaLivenessAnalysis liveness(graph, &codegen);
     liveness.Analyze();
 
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     // Sanity check that in normal conditions, the register should be hinted to 0 (EAX).
     ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 0);
@@ -690,13 +720,20 @@
     // Don't use SetInAt because we are overriding an already allocated location.
     ret->GetLocations()->inputs_[0] = Location::RegisterLocation(2);
 
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 2);
   }
 }
 
+// TODO: Enable this test for graph coloring register allocation when iterative move
+//       coalescing is merged.
+TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint_LinearScan) {
+  ExpectedInRegisterHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
 static HGraph* BuildTwoSubs(ArenaAllocator* allocator,
                             HInstruction** first_sub,
                             HInstruction** second_sub) {
@@ -726,7 +763,7 @@
   return graph;
 }
 
-TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
+void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *first_sub, *second_sub;
@@ -739,8 +776,9 @@
     SsaLivenessAnalysis liveness(graph, &codegen);
     liveness.Analyze();
 
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     // Sanity check that in normal conditions, the registers are the same.
     ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 1);
@@ -761,14 +799,21 @@
     ASSERT_EQ(first_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
     ASSERT_EQ(second_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
 
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 2);
     ASSERT_EQ(second_sub->GetLiveInterval()->GetRegister(), 2);
   }
 }
 
+// TODO: Enable this test for graph coloring register allocation when iterative move
+//       coalescing is merged.
+TEST_F(RegisterAllocatorTest, SameAsFirstInputHint_LinearScan) {
+  SameAsFirstInputHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
 static HGraph* BuildDiv(ArenaAllocator* allocator,
                         HInstruction** div) {
   HGraph* graph = CreateGraph(allocator);
@@ -795,7 +840,7 @@
   return graph;
 }
 
-TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
+static void ExpectedExactInRegisterAndSameOutputHint(Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *div;
@@ -808,18 +853,26 @@
     SsaLivenessAnalysis liveness(graph, &codegen);
     liveness.Analyze();
 
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     // div on x86 requires its first input in eax and the output be the same as the first input.
     ASSERT_EQ(div->GetLiveInterval()->GetRegister(), 0);
   }
 }
 
+// TODO: Enable this test for graph coloring register allocation when iterative move
+//       coalescing is merged.
+TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint_LinearScan) {
+  ExpectedExactInRegisterAndSameOutputHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
 // Test a bug in the register allocator, where allocating a blocked
 // register would lead to spilling an inactive interval at the wrong
 // position.
-TEST(RegisterAllocatorTest, SpillInactive) {
+// This test only applies to the linear scan allocator.
+TEST_F(RegisterAllocatorTest, SpillInactive) {
   ArenaPool pool;
 
   // Create a synthesized graph to please the register_allocator and
@@ -905,7 +958,7 @@
     liveness.instructions_from_lifetime_position_.push_back(user);
   }
 
-  RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+  RegisterAllocatorLinearScan register_allocator(&allocator, &codegen, liveness);
   register_allocator.unhandled_core_intervals_.push_back(fourth);
   register_allocator.unhandled_core_intervals_.push_back(third);
   register_allocator.unhandled_core_intervals_.push_back(second);
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
new file mode 100644
index 0000000..e409035
--- /dev/null
+++ b/compiler/optimizing/select_generator.cc
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "select_generator.h"
+
+namespace art {
+
+static constexpr size_t kMaxInstructionsInBranch = 1u;
+
+// Returns true if `block` has only one predecessor, ends with a Goto and
+// contains at most `kMaxInstructionsInBranch` other movable instruction with
+// no side-effects.
+static bool IsSimpleBlock(HBasicBlock* block) {
+  if (block->GetPredecessors().size() != 1u) {
+    return false;
+  }
+  DCHECK(block->GetPhis().IsEmpty());
+
+  size_t num_instructions = 0u;
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    HInstruction* instruction = it.Current();
+    if (instruction->IsControlFlow()) {
+      return instruction->IsGoto() && num_instructions <= kMaxInstructionsInBranch;
+    } else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) {
+      num_instructions++;
+    } else {
+      return false;
+    }
+  }
+
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
+// Returns true if 'block1' and 'block2' are empty, merge into the same single
+// successor and the successor can only be reached from them.
+static bool BlocksMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
+  return block1->GetSingleSuccessor() == block2->GetSingleSuccessor();
+}
+
+// Returns nullptr if `block` has either no phis or there is more than one phi
+// with different inputs at `index1` and `index2`. Otherwise returns that phi.
+static HPhi* GetSingleChangedPhi(HBasicBlock* block, size_t index1, size_t index2) {
+  DCHECK_NE(index1, index2);
+
+  HPhi* select_phi = nullptr;
+  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    HPhi* phi = it.Current()->AsPhi();
+    if (phi->InputAt(index1) != phi->InputAt(index2)) {
+      if (select_phi == nullptr) {
+        // First phi with different inputs for the two indices found.
+        select_phi = phi;
+      } else {
+        // More than one phis has different inputs for the two indices.
+        return nullptr;
+      }
+    }
+  }
+  return select_phi;
+}
+
+void HSelectGenerator::Run() {
+  // Iterate in post order in the unlikely case that removing one occurrence of
+  // the selection pattern empties a branch block of another occurrence.
+  // Otherwise the order does not matter.
+  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    if (!block->EndsWithIf()) continue;
+
+    // Find elements of the diamond pattern.
+    HIf* if_instruction = block->GetLastInstruction()->AsIf();
+    HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
+    HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
+    DCHECK_NE(true_block, false_block);
+    if (!IsSimpleBlock(true_block) ||
+        !IsSimpleBlock(false_block) ||
+        !BlocksMergeTogether(true_block, false_block)) {
+      continue;
+    }
+    HBasicBlock* merge_block = true_block->GetSingleSuccessor();
+
+    // If the branches are not empty, move instructions in front of the If.
+    // TODO(dbrazdil): This puts an instruction between If and its condition.
+    //                 Implement moving of conditions to first users if possible.
+    if (!true_block->IsSingleGoto()) {
+      true_block->GetFirstInstruction()->MoveBefore(if_instruction);
+    }
+    if (!false_block->IsSingleGoto()) {
+      false_block->GetFirstInstruction()->MoveBefore(if_instruction);
+    }
+    DCHECK(true_block->IsSingleGoto());
+    DCHECK(false_block->IsSingleGoto());
+
+    // Find the resulting true/false values.
+    size_t predecessor_index_true = merge_block->GetPredecessorIndexOf(true_block);
+    size_t predecessor_index_false = merge_block->GetPredecessorIndexOf(false_block);
+    DCHECK_NE(predecessor_index_true, predecessor_index_false);
+
+    HPhi* phi = GetSingleChangedPhi(merge_block, predecessor_index_true, predecessor_index_false);
+    if (phi == nullptr) {
+      continue;
+    }
+    HInstruction* true_value = phi->InputAt(predecessor_index_true);
+    HInstruction* false_value = phi->InputAt(predecessor_index_false);
+
+    // Create the Select instruction and insert it in front of the If.
+    HSelect* select = new (graph_->GetArena()) HSelect(if_instruction->InputAt(0),
+                                                       true_value,
+                                                       false_value,
+                                                       if_instruction->GetDexPc());
+    if (phi->GetType() == Primitive::kPrimNot) {
+      select->SetReferenceTypeInfo(phi->GetReferenceTypeInfo());
+    }
+    block->InsertInstructionBefore(select, if_instruction);
+
+    // Remove the true branch which removes the corresponding Phi input.
+    // If left only with the false branch, the Phi is automatically removed.
+    phi->ReplaceInput(select, predecessor_index_false);
+    bool only_two_predecessors = (merge_block->GetPredecessors().size() == 2u);
+    true_block->DisconnectAndDelete();
+    DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr);
+
+    // Merge remaining blocks which are now connected with Goto.
+    DCHECK_EQ(block->GetSingleSuccessor(), false_block);
+    block->MergeWith(false_block);
+    if (only_two_predecessors) {
+      DCHECK_EQ(block->GetSingleSuccessor(), merge_block);
+      block->MergeWith(merge_block);
+    }
+
+    MaybeRecordStat(MethodCompilationStat::kSelectGenerated);
+
+    // No need to update dominance information, as we are simplifying
+    // a simple diamond shape, where the join block is merged with the
+    // entry block. Any following blocks would have had the join block
+    // as a dominator, and `MergeWith` handles changing that to the
+    // entry block.
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h
new file mode 100644
index 0000000..c6dca58
--- /dev/null
+++ b/compiler/optimizing/select_generator.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This optimization recognizes the common diamond selection pattern and
+ * replaces it with an instance of the HSelect instruction.
+ *
+ * Recognized pattern:
+ *
+ *          If [ Condition ]
+ *            /          \
+ *      false branch  true branch
+ *            \          /
+ *     Phi [FalseValue, TrueValue]
+ *
+ * The pattern will be simplified if `true_branch` and `false_branch` each
+ * contain at most one instruction without any side effects.
+ *
+ * Blocks are merged into one and Select replaces the If and the Phi:
+ *              true branch
+ *              false branch
+ *              Select [FalseValue, TrueValue, Condition]
+ *
+ * Note: In order to recognize no side-effect blocks, this optimization must be
+ * run after the instruction simplifier has removed redundant suspend checks.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_
+#define ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_
+
+#include "optimization.h"
+
+namespace art {
+
+class HSelectGenerator : public HOptimization {
+ public:
+  HSelectGenerator(HGraph* graph, OptimizingCompilerStats* stats)
+    : HOptimization(graph, kSelectGeneratorPassName, stats) {}
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kSelectGeneratorPassName = "select_generator";
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HSelectGenerator);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
new file mode 100644
index 0000000..6effc30
--- /dev/null
+++ b/compiler/optimizing/sharpening.cc
@@ -0,0 +1,341 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "sharpening.h"
+
+#include "base/casts.h"
+#include "base/enums.h"
+#include "class_linker.h"
+#include "code_generator.h"
+#include "driver/dex_compilation_unit.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
+#include "driver/compiler_driver.h"
+#include "gc/heap.h"
+#include "gc/space/image_space.h"
+#include "handle_scope-inl.h"
+#include "mirror/dex_cache.h"
+#include "mirror/string.h"
+#include "nodes.h"
+#include "runtime.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+void HSharpening::Run() {
+  // We don't care about the order of the blocks here.
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instruction = it.Current();
+      if (instruction->IsInvokeStaticOrDirect()) {
+        ProcessInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect());
+      } else if (instruction->IsLoadClass()) {
+        ProcessLoadClass(instruction->AsLoadClass());
+      } else if (instruction->IsLoadString()) {
+        ProcessLoadString(instruction->AsLoadString());
+      }
+      // TODO: Move the sharpening of invoke-virtual/-interface/-super from HGraphBuilder
+      //       here. Rewrite it to avoid the CompilerDriver's reliance on verifier data
+      //       because we know the type better when inlining.
+    }
+  }
+}
+
+void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  if (invoke->IsStringInit()) {
+    // Not using the dex cache arrays. But we could still try to use a better dispatch...
+    // TODO: Use direct_method and direct_code for the appropriate StringFactory method.
+    return;
+  }
+
+  // TODO: Avoid CompilerDriver.
+  InvokeType original_invoke_type = invoke->GetOriginalInvokeType();
+  InvokeType optimized_invoke_type = original_invoke_type;
+  MethodReference target_method(&graph_->GetDexFile(), invoke->GetDexMethodIndex());
+  int vtable_idx;
+  uintptr_t direct_code, direct_method;
+  bool success = compiler_driver_->ComputeInvokeInfo(
+      &compilation_unit_,
+      invoke->GetDexPc(),
+      false /* update_stats: already updated in builder */,
+      true /* enable_devirtualization */,
+      &optimized_invoke_type,
+      &target_method,
+      &vtable_idx,
+      &direct_code,
+      &direct_method);
+  if (!success) {
+    // TODO: try using kDexCachePcRelative. It's always a valid method load
+    // kind as long as it's supported by the codegen
+    return;
+  }
+  invoke->SetOptimizedInvokeType(optimized_invoke_type);
+  invoke->SetTargetMethod(target_method);
+
+  HInvokeStaticOrDirect::MethodLoadKind method_load_kind;
+  HInvokeStaticOrDirect::CodePtrLocation code_ptr_location;
+  uint64_t method_load_data = 0u;
+  uint64_t direct_code_ptr = 0u;
+
+  HGraph* outer_graph = codegen_->GetGraph();
+  if (target_method.dex_file == &outer_graph->GetDexFile() &&
+      target_method.dex_method_index == outer_graph->GetMethodIdx()) {
+    method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive;
+    code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf;
+  } else {
+    bool use_pc_relative_instructions =
+        ((direct_method == 0u || direct_code == static_cast<uintptr_t>(-1))) &&
+        ContainsElement(compiler_driver_->GetDexFilesForOatFile(), target_method.dex_file);
+    if (direct_method != 0u) {  // Should we use a direct pointer to the method?
+      // Note: For JIT, kDirectAddressWithFixup doesn't make sense at all and while
+      // kDirectAddress would be fine for image methods, we don't support it at the moment.
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      if (direct_method != static_cast<uintptr_t>(-1)) {  // Is the method pointer known now?
+        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress;
+        method_load_data = direct_method;
+      } else {  // The direct pointer will be known at link time.
+        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup;
+      }
+    } else {  // Use dex cache.
+      DCHECK_EQ(target_method.dex_file, &graph_->GetDexFile());
+      if (use_pc_relative_instructions) {  // Can we use PC-relative access to the dex cache arrays?
+        DCHECK(!Runtime::Current()->UseJitCompilation());
+        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative;
+        DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen_->GetInstructionSet()),
+                                    &graph_->GetDexFile());
+        method_load_data = layout.MethodOffset(target_method.dex_method_index);
+      } else {  // We must go through the ArtMethod's pointer to resolved methods.
+        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
+      }
+    }
+    if (direct_code != 0u) {  // Should we use a direct pointer to the code?
+      // Note: For JIT, kCallPCRelative and kCallDirectWithFixup don't make sense at all and
+      // while kCallDirect would be fine for image methods, we don't support it at the moment.
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      if (direct_code != static_cast<uintptr_t>(-1)) {  // Is the code pointer known now?
+        code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirect;
+        direct_code_ptr = direct_code;
+      } else if (use_pc_relative_instructions) {
+        // Use PC-relative calls for invokes within a multi-dex oat file.
+        code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative;
+      } else {  // The direct pointer will be known at link time.
+        // NOTE: This is used for app->boot calls when compiling an app against
+        // a relocatable but not yet relocated image.
+        code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup;
+      }
+    } else {  // We must use the code pointer from the ArtMethod.
+      code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+    }
+  }
+
+  if (graph_->IsDebuggable()) {
+    // For debuggable apps always use the code pointer from ArtMethod
+    // so that we don't circumvent instrumentation stubs if installed.
+    code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+  }
+
+  HInvokeStaticOrDirect::DispatchInfo desired_dispatch_info = {
+      method_load_kind, code_ptr_location, method_load_data, direct_code_ptr
+  };
+  HInvokeStaticOrDirect::DispatchInfo dispatch_info =
+      codegen_->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info,
+                                                         invoke->GetTargetMethod());
+  invoke->SetDispatchInfo(dispatch_info);
+}
+
+void HSharpening::ProcessLoadClass(HLoadClass* load_class) {
+  if (load_class->NeedsAccessCheck()) {
+    // We need to call the runtime anyway, so we simply get the class as that call's return value.
+    return;
+  }
+  if (load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) {
+    // Loading from the ArtMethod* is the most efficient retrieval.
+    // TODO: This may not actually be true for all architectures and
+    // locations of target classes. The additional register pressure
+    // for using the ArtMethod* should be considered.
+    return;
+  }
+
+  DCHECK_EQ(load_class->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod);
+  DCHECK(!load_class->IsInDexCache()) << "HLoadClass should not be optimized before sharpening.";
+
+  const DexFile& dex_file = load_class->GetDexFile();
+  uint32_t type_index = load_class->GetTypeIndex();
+
+  bool is_in_dex_cache = false;
+  HLoadClass::LoadKind desired_load_kind;
+  uint64_t address = 0u;  // Class or dex cache element address.
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<1> hs(soa.Self());
+    Runtime* runtime = Runtime::Current();
+    ClassLinker* class_linker = runtime->GetClassLinker();
+    Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *compilation_unit_.GetDexFile())
+        ? compilation_unit_.GetDexCache()
+        : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file));
+    mirror::Class* klass = dex_cache->GetResolvedType(type_index);
+
+    if (compiler_driver_->IsBootImage()) {
+      // Compiling boot image. Check if the class is a boot image class.
+      DCHECK(!runtime->UseJitCompilation());
+      if (!compiler_driver_->GetSupportBootImageFixup()) {
+        // MIPS/MIPS64 or compiler_driver_test. Do not sharpen.
+        desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+      } else {
+        if (klass != nullptr &&
+            compiler_driver_->IsImageClass(
+                dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) {
+          is_in_dex_cache = true;
+          desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic()
+              ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative
+              : HLoadClass::LoadKind::kBootImageLinkTimeAddress;
+        } else {
+          // Not a boot image class. We must go through the dex cache.
+          DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file));
+          desired_load_kind = HLoadClass::LoadKind::kDexCachePcRelative;
+        }
+      }
+    } else if (runtime->UseJitCompilation()) {
+      // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
+      // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
+      is_in_dex_cache = (klass != nullptr);
+      if (klass != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(klass)) {
+        // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
+        desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
+        address = reinterpret_cast64<uint64_t>(klass);
+      } else {
+        // Note: If the class is not in the dex cache or isn't initialized, the
+        // instruction needs environment and will not be inlined across dex files.
+        // Within a dex file, the slow-path helper loads the correct class and
+        // inlined frames are used correctly for OOM stack trace.
+        // TODO: Write a test for this. Bug: 29416588
+        desired_load_kind = HLoadClass::LoadKind::kDexCacheAddress;
+        void* dex_cache_element_address = &dex_cache->GetResolvedTypes()[type_index];
+        address = reinterpret_cast64<uint64_t>(dex_cache_element_address);
+      }
+    } else {
+      // AOT app compilation. Check if the class is in the boot image.
+      if ((klass != nullptr) &&
+          runtime->GetHeap()->ObjectIsInBootImageSpace(klass) &&
+          !codegen_->GetCompilerOptions().GetCompilePic()) {
+        desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
+        address = reinterpret_cast64<uint64_t>(klass);
+      } else {
+        // Not JIT and either the klass is not in boot image or we are compiling in PIC mode.
+        // Use PC-relative load from the dex cache if the dex file belongs
+        // to the oat file that we're currently compiling.
+        desired_load_kind =
+            ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &load_class->GetDexFile())
+                ? HLoadClass::LoadKind::kDexCachePcRelative
+                : HLoadClass::LoadKind::kDexCacheViaMethod;
+      }
+    }
+  }
+  if (is_in_dex_cache) {
+    load_class->MarkInDexCache();
+  }
+
+  HLoadClass::LoadKind load_kind = codegen_->GetSupportedLoadClassKind(desired_load_kind);
+  switch (load_kind) {
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      load_class->SetLoadKindWithTypeReference(load_kind, dex_file, type_index);
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK_NE(address, 0u);
+      load_class->SetLoadKindWithAddress(load_kind, address);
+      break;
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+      DexCacheArraysLayout layout(pointer_size, &dex_file);
+      size_t element_index = layout.TypeOffset(type_index);
+      load_class->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected load kind: " << load_kind;
+      UNREACHABLE();
+  }
+}
+
+void HSharpening::ProcessLoadString(HLoadString* load_string) {
+  DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod);
+  DCHECK(!load_string->IsInDexCache());
+
+  const DexFile& dex_file = load_string->GetDexFile();
+  uint32_t string_index = load_string->GetStringIndex();
+
+  HLoadString::LoadKind desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+  uint64_t address = 0u;  // String or dex cache element address.
+  {
+    Runtime* runtime = Runtime::Current();
+    ClassLinker* class_linker = runtime->GetClassLinker();
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *compilation_unit_.GetDexFile())
+        ? compilation_unit_.GetDexCache()
+        : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file));
+
+    if (compiler_driver_->IsBootImage()) {
+      // Compiling boot image. Resolve the string and allocate it if needed.
+      DCHECK(!runtime->UseJitCompilation());
+      mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache);
+      CHECK(string != nullptr);
+      // TODO: In follow up CL, add PcRelative and Address back in.
+    } else if (runtime->UseJitCompilation()) {
+      // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
+      // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
+      mirror::String* string = dex_cache->GetResolvedString(string_index);
+      if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
+        desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
+        address = reinterpret_cast64<uint64_t>(string);
+      }
+    } else {
+      // AOT app compilation. Try to lookup the string without allocating if not found.
+      mirror::String* string = class_linker->LookupString(dex_file, string_index, dex_cache);
+      if (string != nullptr &&
+          runtime->GetHeap()->ObjectIsInBootImageSpace(string) &&
+          !codegen_->GetCompilerOptions().GetCompilePic()) {
+        desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
+        address = reinterpret_cast64<uint64_t>(string);
+      }
+    }
+  }
+
+  HLoadString::LoadKind load_kind = codegen_->GetSupportedLoadStringKind(desired_load_kind);
+  switch (load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      load_string->SetLoadKindWithStringReference(load_kind, dex_file, string_index);
+      break;
+    case HLoadString::LoadKind::kBootImageAddress:
+    case HLoadString::LoadKind::kDexCacheAddress:
+      DCHECK_NE(address, 0u);
+      load_string->SetLoadKindWithAddress(load_kind, address);
+      break;
+    case HLoadString::LoadKind::kDexCachePcRelative: {
+      PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+      DexCacheArraysLayout layout(pointer_size, &dex_file);
+      size_t element_index = layout.StringOffset(string_index);
+      load_string->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index);
+      break;
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
new file mode 100644
index 0000000..d35ae66
--- /dev/null
+++ b/compiler/optimizing/sharpening.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SHARPENING_H_
+#define ART_COMPILER_OPTIMIZING_SHARPENING_H_
+
+#include "optimization.h"
+
+namespace art {
+
+class CodeGenerator;
+class CompilerDriver;
+class DexCompilationUnit;
+class HInvokeStaticOrDirect;
+
+// Optimization that tries to improve the way we dispatch methods and access types,
+// fields, etc. Besides actual method sharpening based on receiver type (for example
+// virtual->direct), this includes selecting the best available dispatch for
+// invoke-static/-direct based on code generator support.
+class HSharpening : public HOptimization {
+ public:
+  HSharpening(HGraph* graph,
+              CodeGenerator* codegen,
+              const DexCompilationUnit& compilation_unit,
+              CompilerDriver* compiler_driver)
+      : HOptimization(graph, kSharpeningPassName),
+        codegen_(codegen),
+        compilation_unit_(compilation_unit),
+        compiler_driver_(compiler_driver) { }
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kSharpeningPassName = "sharpening";
+
+ private:
+  void ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke);
+  void ProcessLoadClass(HLoadClass* load_class);
+  void ProcessLoadString(HLoadString* load_string);
+
+  CodeGenerator* codegen_;
+  const DexCompilationUnit& compilation_unit_;
+  CompilerDriver* compiler_driver_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_SHARPENING_H_
diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc
index ec45d6b..b01bc1c 100644
--- a/compiler/optimizing/side_effects_test.cc
+++ b/compiler/optimizing/side_effects_test.cc
@@ -129,13 +129,13 @@
 
 TEST(SideEffectsTest, VolatileDependences) {
   SideEffects volatile_write =
-      SideEffects::FieldWriteOfType(Primitive::kPrimInt, true);
+      SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ true);
   SideEffects any_write =
-      SideEffects::FieldWriteOfType(Primitive::kPrimInt, false);
+      SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ false);
   SideEffects volatile_read =
-      SideEffects::FieldReadOfType(Primitive::kPrimByte, true);
+      SideEffects::FieldReadOfType(Primitive::kPrimByte, /* is_volatile */ true);
   SideEffects any_read =
-      SideEffects::FieldReadOfType(Primitive::kPrimByte, false);
+      SideEffects::FieldReadOfType(Primitive::kPrimByte, /* is_volatile */ false);
 
   EXPECT_FALSE(volatile_write.MayDependOn(any_read));
   EXPECT_TRUE(any_read.MayDependOn(volatile_write));
@@ -148,19 +148,19 @@
   EXPECT_FALSE(any_write.MayDependOn(volatile_read));
 }
 
-TEST(SideEffectsTest, SameWidthTypes) {
+TEST(SideEffectsTest, SameWidthTypesNoAlias) {
   // Type I/F.
-  testWriteAndReadDependence(
-      SideEffects::FieldWriteOfType(Primitive::kPrimInt, false),
-      SideEffects::FieldReadOfType(Primitive::kPrimFloat, false));
-  testWriteAndReadDependence(
+  testNoWriteAndReadDependence(
+      SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ false),
+      SideEffects::FieldReadOfType(Primitive::kPrimFloat, /* is_volatile */ false));
+  testNoWriteAndReadDependence(
       SideEffects::ArrayWriteOfType(Primitive::kPrimInt),
       SideEffects::ArrayReadOfType(Primitive::kPrimFloat));
   // Type L/D.
-  testWriteAndReadDependence(
-      SideEffects::FieldWriteOfType(Primitive::kPrimLong, false),
-      SideEffects::FieldReadOfType(Primitive::kPrimDouble, false));
-  testWriteAndReadDependence(
+  testNoWriteAndReadDependence(
+      SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false),
+      SideEffects::FieldReadOfType(Primitive::kPrimDouble, /* is_volatile */ false));
+  testNoWriteAndReadDependence(
       SideEffects::ArrayWriteOfType(Primitive::kPrimLong),
       SideEffects::ArrayReadOfType(Primitive::kPrimDouble));
 }
@@ -171,9 +171,9 @@
   for (Primitive::Type type = Primitive::kPrimNot;
         type < Primitive::kPrimVoid;
         type = Primitive::Type(type + 1)) {
-    s = s.Union(SideEffects::FieldWriteOfType(type, false));
+    s = s.Union(SideEffects::FieldWriteOfType(type, /* is_volatile */ false));
     s = s.Union(SideEffects::ArrayWriteOfType(type));
-    s = s.Union(SideEffects::FieldReadOfType(type, false));
+    s = s.Union(SideEffects::FieldReadOfType(type, /* is_volatile */ false));
     s = s.Union(SideEffects::ArrayReadOfType(type));
   }
   EXPECT_TRUE(s.DoesAllReadWrite());
@@ -216,24 +216,40 @@
       "||||||L|",
       SideEffects::FieldWriteOfType(Primitive::kPrimNot, false).ToString().c_str());
   EXPECT_STREQ(
+      "||DFJISCBZL|DFJISCBZL||DFJISCBZL|DFJISCBZL|",
+      SideEffects::FieldWriteOfType(Primitive::kPrimNot, true).ToString().c_str());
+  EXPECT_STREQ(
       "|||||Z||",
       SideEffects::ArrayWriteOfType(Primitive::kPrimBoolean).ToString().c_str());
   EXPECT_STREQ(
+      "|||||C||",
+      SideEffects::ArrayWriteOfType(Primitive::kPrimChar).ToString().c_str());
+  EXPECT_STREQ(
+      "|||||S||",
+      SideEffects::ArrayWriteOfType(Primitive::kPrimShort).ToString().c_str());
+  EXPECT_STREQ(
       "|||B||||",
       SideEffects::FieldReadOfType(Primitive::kPrimByte, false).ToString().c_str());
   EXPECT_STREQ(
-      "||DJ|||||",  // note: DJ alias
+      "||D|||||",
       SideEffects::ArrayReadOfType(Primitive::kPrimDouble).ToString().c_str());
+  EXPECT_STREQ(
+      "||J|||||",
+      SideEffects::ArrayReadOfType(Primitive::kPrimLong).ToString().c_str());
+  EXPECT_STREQ(
+      "||F|||||",
+      SideEffects::ArrayReadOfType(Primitive::kPrimFloat).ToString().c_str());
+  EXPECT_STREQ(
+      "||I|||||",
+      SideEffects::ArrayReadOfType(Primitive::kPrimInt).ToString().c_str());
   SideEffects s = SideEffects::None();
-  s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, false));
-  s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, false));
+  s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, /* is_volatile */ false));
+  s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false));
   s = s.Union(SideEffects::ArrayWriteOfType(Primitive::kPrimShort));
-  s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, false));
+  s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, /* is_volatile */ false));
   s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimFloat));
   s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimDouble));
-  EXPECT_STREQ(
-      "||DFJI|FI||S|DJC|",   // note: DJ/FI alias.
-      s.ToString().c_str());
+  EXPECT_STREQ("||DF|I||S|JC|", s.ToString().c_str());
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 4565590..5a574d9 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -16,213 +16,16 @@
 
 #include "ssa_builder.h"
 
+#include "bytecode_utils.h"
 #include "nodes.h"
-#include "primitive_type_propagation.h"
+#include "reference_type_propagation.h"
 #include "ssa_phi_elimination.h"
 
 namespace art {
 
-/**
- * A debuggable application may require to reviving phis, to ensure their
- * associated DEX register is available to a debugger. This class implements
- * the logic for statement (c) of the SsaBuilder (see ssa_builder.h). It
- * also makes sure that phis with incompatible input types are not revived
- * (statement (b) of the SsaBuilder).
- *
- * This phase must be run after detecting dead phis through the
- * DeadPhiElimination phase, and before deleting the dead phis.
- */
-class DeadPhiHandling : public ValueObject {
- public:
-  explicit DeadPhiHandling(HGraph* graph)
-      : graph_(graph), worklist_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
-    worklist_.reserve(kDefaultWorklistSize);
-  }
-
-  void Run();
-
- private:
-  void VisitBasicBlock(HBasicBlock* block);
-  void ProcessWorklist();
-  void AddToWorklist(HPhi* phi);
-  void AddDependentInstructionsToWorklist(HPhi* phi);
-  bool UpdateType(HPhi* phi);
-
-  HGraph* const graph_;
-  ArenaVector<HPhi*> worklist_;
-
-  static constexpr size_t kDefaultWorklistSize = 8;
-
-  DISALLOW_COPY_AND_ASSIGN(DeadPhiHandling);
-};
-
-static bool HasConflictingEquivalent(HPhi* phi) {
-  if (phi->GetNext() == nullptr) {
-    return false;
-  }
-  HPhi* next = phi->GetNext()->AsPhi();
-  if (next->GetRegNumber() == phi->GetRegNumber()) {
-    if (next->GetType() == Primitive::kPrimVoid) {
-      // We only get a void type for an equivalent phi we processed and found out
-      // it was conflicting.
-      return true;
-    } else {
-      // Go to the next phi, in case it is also an equivalent.
-      return HasConflictingEquivalent(next);
-    }
-  }
-  return false;
-}
-
-bool DeadPhiHandling::UpdateType(HPhi* phi) {
-  if (phi->IsDead()) {
-    // Phi was rendered dead while waiting in the worklist because it was replaced
-    // with an equivalent.
-    return false;
-  }
-
-  Primitive::Type existing = phi->GetType();
-
-  bool conflict = false;
-  Primitive::Type new_type = existing;
-  for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-    HInstruction* input = phi->InputAt(i);
-    if (input->IsPhi() && input->AsPhi()->IsDead()) {
-      // We are doing a reverse post order visit of the graph, reviving
-      // phis that have environment uses and updating their types. If an
-      // input is a phi, and it is dead (because its input types are
-      // conflicting), this phi must be marked dead as well.
-      conflict = true;
-      break;
-    }
-    Primitive::Type input_type = HPhi::ToPhiType(input->GetType());
-
-    // The only acceptable transitions are:
-    // - From void to typed: first time we update the type of this phi.
-    // - From int to reference (or reference to int): the phi has to change
-    //   to reference type. If the integer input cannot be converted to a
-    //   reference input, the phi will remain dead.
-    if (new_type == Primitive::kPrimVoid) {
-      new_type = input_type;
-    } else if (new_type == Primitive::kPrimNot && input_type == Primitive::kPrimInt) {
-      if (input->IsPhi() && HasConflictingEquivalent(input->AsPhi())) {
-        // If we already asked for an equivalent of the input phi, but that equivalent
-        // ended up conflicting, make this phi conflicting too.
-        conflict = true;
-        break;
-      }
-      HInstruction* equivalent = SsaBuilder::GetReferenceTypeEquivalent(input);
-      if (equivalent == nullptr) {
-        conflict = true;
-        break;
-      }
-      phi->ReplaceInput(equivalent, i);
-      if (equivalent->IsPhi()) {
-        DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot);
-        // We created a new phi, but that phi has the same inputs as the old phi. We
-        // add it to the worklist to ensure its inputs can also be converted to reference.
-        // If not, it will remain dead, and the algorithm will make the current phi dead
-        // as well.
-        equivalent->AsPhi()->SetLive();
-        AddToWorklist(equivalent->AsPhi());
-      }
-    } else if (new_type == Primitive::kPrimInt && input_type == Primitive::kPrimNot) {
-      new_type = Primitive::kPrimNot;
-      // Start over, we may request reference equivalents for the inputs of the phi.
-      i = -1;
-    } else if (new_type != input_type) {
-      conflict = true;
-      break;
-    }
-  }
-
-  if (conflict) {
-    phi->SetType(Primitive::kPrimVoid);
-    phi->SetDead();
-    return true;
-  } else if (existing == new_type) {
-    return false;
-  }
-
-  DCHECK(phi->IsLive());
-  phi->SetType(new_type);
-
-  // There might exist a `new_type` equivalent of `phi` already. In that case,
-  // we replace the equivalent with the, now live, `phi`.
-  HPhi* equivalent = phi->GetNextEquivalentPhiWithSameType();
-  if (equivalent != nullptr) {
-    // There cannot be more than two equivalents with the same type.
-    DCHECK(equivalent->GetNextEquivalentPhiWithSameType() == nullptr);
-    // If doing fix-point iteration, the equivalent might be in `worklist_`.
-    // Setting it dead will make UpdateType skip it.
-    equivalent->SetDead();
-    equivalent->ReplaceWith(phi);
-  }
-
-  return true;
-}
-
-void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) {
-  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-    HPhi* phi = it.Current()->AsPhi();
-    if (phi->IsDead() && phi->HasEnvironmentUses()) {
-      phi->SetLive();
-      if (block->IsLoopHeader()) {
-        // Give a type to the loop phi to guarantee convergence of the algorithm.
-        // Note that the dead phi may already have a type if it is an equivalent
-        // generated for a typed LoadLocal. In that case we do not change the
-        // type because it could lead to an unsupported PrimNot/Float/Double ->
-        // PrimInt/Long transition and create same type equivalents.
-        if (phi->GetType() == Primitive::kPrimVoid) {
-          phi->SetType(phi->InputAt(0)->GetType());
-        }
-        AddToWorklist(phi);
-      } else {
-        // Because we are doing a reverse post order visit, all inputs of
-        // this phi have been visited and therefore had their (initial) type set.
-        UpdateType(phi);
-      }
-    }
-  }
-}
-
-void DeadPhiHandling::ProcessWorklist() {
-  while (!worklist_.empty()) {
-    HPhi* instruction = worklist_.back();
-    worklist_.pop_back();
-    // Note that the same equivalent phi can be added multiple times in the work list, if
-    // used by multiple phis. The first call to `UpdateType` will know whether the phi is
-    // dead or live.
-    if (instruction->IsLive() && UpdateType(instruction)) {
-      AddDependentInstructionsToWorklist(instruction);
-    }
-  }
-}
-
-void DeadPhiHandling::AddToWorklist(HPhi* instruction) {
-  DCHECK(instruction->IsLive());
-  worklist_.push_back(instruction);
-}
-
-void DeadPhiHandling::AddDependentInstructionsToWorklist(HPhi* instruction) {
-  for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
-    HPhi* phi = it.Current()->GetUser()->AsPhi();
-    if (phi != nullptr && !phi->IsDead()) {
-      AddToWorklist(phi);
-    }
-  }
-}
-
-void DeadPhiHandling::Run() {
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
-  }
-  ProcessWorklist();
-}
-
 void SsaBuilder::FixNullConstantType() {
   // The order doesn't matter here.
-  for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) {
+  for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) {
     for (HInstructionIterator it(itb.Current()->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* equality_instr = it.Current();
       if (!equality_instr->IsEqual() && !equality_instr->IsNotEqual()) {
@@ -245,24 +48,25 @@
       // should be replaced with a null constant.
       // Both type propagation and redundant phi elimination ensure `int_operand`
       // can only be the 0 constant.
-      DCHECK(int_operand->IsIntConstant());
+      DCHECK(int_operand->IsIntConstant()) << int_operand->DebugName();
       DCHECK_EQ(0, int_operand->AsIntConstant()->GetValue());
-      equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), int_operand == right ? 1 : 0);
+      equality_instr->ReplaceInput(graph_->GetNullConstant(), int_operand == right ? 1 : 0);
     }
   }
 }
 
 void SsaBuilder::EquivalentPhisCleanup() {
   // The order doesn't matter here.
-  for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) {
+  for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) {
     for (HInstructionIterator it(itb.Current()->GetPhis()); !it.Done(); it.Advance()) {
       HPhi* phi = it.Current()->AsPhi();
       HPhi* next = phi->GetNextEquivalentPhiWithSameType();
       if (next != nullptr) {
-        // Make sure we do not replace a live phi with a dead phi. A live phi has been
-        // handled by the type propagation phase, unlike a dead phi.
+        // Make sure we do not replace a live phi with a dead phi. A live phi
+        // has been handled by the type propagation phase, unlike a dead phi.
         if (next->IsLive()) {
           phi->ReplaceWith(next);
+          phi->SetDead();
         } else {
           next->ReplaceWith(phi);
         }
@@ -274,73 +78,8 @@
   }
 }
 
-void SsaBuilder::BuildSsa() {
-  // 1) Visit in reverse post order. We need to have all predecessors of a block visited
-  // (with the exception of loops) in order to create the right environment for that
-  // block. For loops, we create phis whose inputs will be set in 2).
-  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
-  }
-
-  // 2) Set inputs of loop phis.
-  for (HBasicBlock* block : loop_headers_) {
-    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      HPhi* phi = it.Current()->AsPhi();
-      for (HBasicBlock* predecessor : block->GetPredecessors()) {
-        HInstruction* input = ValueOfLocal(predecessor, phi->GetRegNumber());
-        phi->AddInput(input);
-      }
-    }
-  }
-
-  // 3) Mark dead phis. This will mark phis that are only used by environments:
-  // at the DEX level, the type of these phis does not need to be consistent, but
-  // our code generator will complain if the inputs of a phi do not have the same
-  // type. The marking allows the type propagation to know which phis it needs
-  // to handle. We mark but do not eliminate: the elimination will be done in
-  // step 9).
-  SsaDeadPhiElimination dead_phis_for_type_propagation(GetGraph());
-  dead_phis_for_type_propagation.MarkDeadPhis();
-
-  // 4) Propagate types of phis. At this point, phis are typed void in the general
-  // case, or float/double/reference when we created an equivalent phi. So we
-  // need to propagate the types across phis to give them a correct type.
-  PrimitiveTypePropagation type_propagation(GetGraph());
-  type_propagation.Run();
-
-  // 5) When creating equivalent phis we copy the inputs of the original phi which
-  // may be improperly typed. This was fixed during the type propagation in 4) but
-  // as a result we may end up with two equivalent phis with the same type for
-  // the same dex register. This pass cleans them up.
-  EquivalentPhisCleanup();
-
-  // 6) Mark dead phis again. Step 4) may have introduced new phis.
-  // Step 5) might enable the death of new phis.
-  SsaDeadPhiElimination dead_phis(GetGraph());
-  dead_phis.MarkDeadPhis();
-
-  // 7) Now that the graph is correctly typed, we can get rid of redundant phis.
-  // Note that we cannot do this phase before type propagation, otherwise
-  // we could get rid of phi equivalents, whose presence is a requirement for the
-  // type propagation phase. Note that this is to satisfy statement (a) of the
-  // SsaBuilder (see ssa_builder.h).
-  SsaRedundantPhiElimination redundant_phi(GetGraph());
-  redundant_phi.Run();
-
-  // 8) Fix the type for null constants which are part of an equality comparison.
-  // We need to do this after redundant phi elimination, to ensure the only cases
-  // that we can see are reference comparison against 0. The redundant phi
-  // elimination ensures we do not see a phi taking two 0 constants in a HEqual
-  // or HNotEqual.
-  FixNullConstantType();
-
-  // 9) Make sure environments use the right phi "equivalent": a phi marked dead
-  // can have a phi equivalent that is not dead. We must therefore update
-  // all environment uses of the dead phi to use its equivalent. Note that there
-  // can be multiple phis for the same Dex register that are live (for example
-  // when merging constants), in which case it is OK for the environments
-  // to just reference one.
-  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+void SsaBuilder::FixEnvironmentPhis() {
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) {
       HPhi* phi = it_phis.Current()->AsPhi();
@@ -360,153 +99,457 @@
       phi->ReplaceWith(next);
     }
   }
-
-  // 10) Deal with phis to guarantee liveness of phis in case of a debuggable
-  // application. This is for satisfying statement (c) of the SsaBuilder
-  // (see ssa_builder.h).
-  if (GetGraph()->IsDebuggable()) {
-    DeadPhiHandling dead_phi_handler(GetGraph());
-    dead_phi_handler.Run();
-  }
-
-  // 11) Now that the right phis are used for the environments, and we
-  // have potentially revive dead phis in case of a debuggable application,
-  // we can eliminate phis we do not need. Regardless of the debuggable status,
-  // this phase is necessary for statement (b) of the SsaBuilder (see ssa_builder.h),
-  // as well as for the code generation, which does not deal with phis of conflicting
-  // input types.
-  dead_phis.EliminateDeadPhis();
-
-  // 12) Clear locals.
-  for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
-       !it.Done();
-       it.Advance()) {
-    HInstruction* current = it.Current();
-    if (current->IsLocal()) {
-      current->GetBlock()->RemoveInstruction(current);
-    }
-  }
 }
 
-ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) {
-  ArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()];
-  const size_t vregs = GetGraph()->GetNumberOfVRegs();
-  if (locals->empty() && vregs != 0u) {
-    locals->resize(vregs, nullptr);
-
-    if (block->IsCatchBlock()) {
-      ArenaAllocator* arena = GetGraph()->GetArena();
-      // We record incoming inputs of catch phis at throwing instructions and
-      // must therefore eagerly create the phis. Phis for undefined vregs will
-      // be deleted when the first throwing instruction with the vreg undefined
-      // is encountered. Unused phis will be removed by dead phi analysis.
-      for (size_t i = 0; i < vregs; ++i) {
-        // No point in creating the catch phi if it is already undefined at
-        // the first throwing instruction.
-        if ((*current_locals_)[i] != nullptr) {
-          HPhi* phi = new (arena) HPhi(arena, i, 0, Primitive::kPrimVoid);
-          block->AddPhi(phi);
-          (*locals)[i] = phi;
-        }
+static void AddDependentInstructionsToWorklist(HInstruction* instruction,
+                                               ArenaVector<HPhi*>* worklist) {
+  // If `instruction` is a dead phi, type conflict was just identified. All its
+  // live phi users, and transitively users of those users, therefore need to be
+  // marked dead/conflicting too, so we add them to the worklist. Otherwise we
+  // add users whose type does not match and needs to be updated.
+  bool add_all_live_phis = instruction->IsPhi() && instruction->AsPhi()->IsDead();
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    HInstruction* user = use.GetUser();
+    if (user->IsPhi() && user->AsPhi()->IsLive()) {
+      if (add_all_live_phis || user->GetType() != instruction->GetType()) {
+        worklist->push_back(user->AsPhi());
       }
     }
   }
-  return locals;
 }
 
-HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) {
-  ArenaVector<HInstruction*>* locals = GetLocalsFor(block);
-  return (*locals)[local];
+// Find a candidate primitive type for `phi` by merging the type of its inputs.
+// Return false if conflict is identified.
+static bool TypePhiFromInputs(HPhi* phi) {
+  Primitive::Type common_type = phi->GetType();
+
+  for (HInstruction* input : phi->GetInputs()) {
+    if (input->IsPhi() && input->AsPhi()->IsDead()) {
+      // Phis are constructed live so if an input is a dead phi, it must have
+      // been made dead due to type conflict. Mark this phi conflicting too.
+      return false;
+    }
+
+    Primitive::Type input_type = HPhi::ToPhiType(input->GetType());
+    if (common_type == input_type) {
+      // No change in type.
+    } else if (Primitive::Is64BitType(common_type) != Primitive::Is64BitType(input_type)) {
+      // Types are of different sizes, e.g. int vs. long. Must be a conflict.
+      return false;
+    } else if (Primitive::IsIntegralType(common_type)) {
+      // Previous inputs were integral, this one is not but is of the same size.
+      // This does not imply conflict since some bytecode instruction types are
+      // ambiguous. TypeInputsOfPhi will either type them or detect a conflict.
+      DCHECK(Primitive::IsFloatingPointType(input_type) || input_type == Primitive::kPrimNot);
+      common_type = input_type;
+    } else if (Primitive::IsIntegralType(input_type)) {
+      // Input is integral, common type is not. Same as in the previous case, if
+      // there is a conflict, it will be detected during TypeInputsOfPhi.
+      DCHECK(Primitive::IsFloatingPointType(common_type) || common_type == Primitive::kPrimNot);
+    } else {
+      // Combining float and reference types. Clearly a conflict.
+      DCHECK((common_type == Primitive::kPrimFloat && input_type == Primitive::kPrimNot) ||
+             (common_type == Primitive::kPrimNot && input_type == Primitive::kPrimFloat));
+      return false;
+    }
+  }
+
+  // We have found a candidate type for the phi. Set it and return true. We may
+  // still discover conflict whilst typing the individual inputs in TypeInputsOfPhi.
+  phi->SetType(common_type);
+  return true;
 }
 
-void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
-  current_locals_ = GetLocalsFor(block);
-
-  if (block->IsCatchBlock()) {
-    // Catch phis were already created and inputs collected from throwing sites.
+// Replace inputs of `phi` to match its type. Return false if conflict is identified.
+bool SsaBuilder::TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist) {
+  Primitive::Type common_type = phi->GetType();
+  if (common_type == Primitive::kPrimVoid || Primitive::IsIntegralType(common_type)) {
+    // Phi either contains only other untyped phis (common_type == kPrimVoid),
+    // or `common_type` is integral and we do not need to retype ambiguous inputs
+    // because they are always constructed with the integral type candidate.
     if (kIsDebugBuild) {
-      // Make sure there was at least one throwing instruction which initialized
-      // locals (guaranteed by HGraphBuilder) and that all try blocks have been
-      // visited already (from HTryBoundary scoping and reverse post order).
-      bool throwing_instruction_found = false;
-      bool catch_block_visited = false;
-      for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
-        HBasicBlock* current = it.Current();
-        if (current == block) {
-          catch_block_visited = true;
-        } else if (current->IsTryBlock() &&
-                   current->GetTryCatchInformation()->GetTryEntry().HasExceptionHandler(*block)) {
-          DCHECK(!catch_block_visited) << "Catch block visited before its try block.";
-          throwing_instruction_found |= current->HasThrowingInstructions();
+      for (HInstruction* input : phi->GetInputs()) {
+        if (common_type == Primitive::kPrimVoid) {
+          DCHECK(input->IsPhi() && input->GetType() == Primitive::kPrimVoid);
+        } else {
+          DCHECK((input->IsPhi() && input->GetType() == Primitive::kPrimVoid) ||
+                 HPhi::ToPhiType(input->GetType()) == common_type);
         }
       }
-      DCHECK(throwing_instruction_found) << "No instructions throwing into a live catch block.";
-    }
-  } else if (block->IsLoopHeader()) {
-    // If the block is a loop header, we know we only have visited the pre header
-    // because we are visiting in reverse post order. We create phis for all initialized
-    // locals from the pre header. Their inputs will be populated at the end of
-    // the analysis.
-    for (size_t local = 0; local < current_locals_->size(); ++local) {
-      HInstruction* incoming = ValueOfLocal(block->GetLoopInformation()->GetPreHeader(), local);
-      if (incoming != nullptr) {
-        HPhi* phi = new (GetGraph()->GetArena()) HPhi(
-            GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid);
-        block->AddPhi(phi);
-        (*current_locals_)[local] = phi;
-      }
     }
-    // Save the loop header so that the last phase of the analysis knows which
-    // blocks need to be updated.
-    loop_headers_.push_back(block);
-  } else if (block->GetPredecessors().size() > 0) {
-    // All predecessors have already been visited because we are visiting in reverse post order.
-    // We merge the values of all locals, creating phis if those values differ.
-    for (size_t local = 0; local < current_locals_->size(); ++local) {
-      bool one_predecessor_has_no_value = false;
-      bool is_different = false;
-      HInstruction* value = ValueOfLocal(block->GetPredecessors()[0], local);
+    // Inputs did not need to be replaced, hence no conflict. Report success.
+    return true;
+  } else {
+    DCHECK(common_type == Primitive::kPrimNot || Primitive::IsFloatingPointType(common_type));
+    HInputsRef inputs = phi->GetInputs();
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      HInstruction* input = inputs[i];
+      if (input->GetType() != common_type) {
+        // Input type does not match phi's type. Try to retype the input or
+        // generate a suitably typed equivalent.
+        HInstruction* equivalent = (common_type == Primitive::kPrimNot)
+            ? GetReferenceTypeEquivalent(input)
+            : GetFloatOrDoubleEquivalent(input, common_type);
+        if (equivalent == nullptr) {
+          // Input could not be typed. Report conflict.
+          return false;
+        }
+        // Make sure the input did not change its type and we do not need to
+        // update its users.
+        DCHECK_NE(input, equivalent);
 
-      for (HBasicBlock* predecessor : block->GetPredecessors()) {
-        HInstruction* current = ValueOfLocal(predecessor, local);
-        if (current == nullptr) {
-          one_predecessor_has_no_value = true;
-          break;
-        } else if (current != value) {
-          is_different = true;
+        phi->ReplaceInput(equivalent, i);
+        if (equivalent->IsPhi()) {
+          worklist->push_back(equivalent->AsPhi());
         }
       }
+    }
+    // All inputs either matched the type of the phi or we successfully replaced
+    // them with a suitable equivalent. Report success.
+    return true;
+  }
+}
 
-      if (one_predecessor_has_no_value) {
-        // If one predecessor has no value for this local, we trust the verifier has
-        // successfully checked that there is a store dominating any read after this block.
-        continue;
-      }
+// Attempt to set the primitive type of `phi` to match its inputs. Return whether
+// it was changed by the algorithm or not.
+bool SsaBuilder::UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist) {
+  DCHECK(phi->IsLive());
+  Primitive::Type original_type = phi->GetType();
 
-      if (is_different) {
-        HPhi* phi = new (GetGraph()->GetArena()) HPhi(
-            GetGraph()->GetArena(), local, block->GetPredecessors().size(), Primitive::kPrimVoid);
-        for (size_t i = 0; i < block->GetPredecessors().size(); i++) {
-          HInstruction* pred_value = ValueOfLocal(block->GetPredecessors()[i], local);
-          phi->SetRawInputAt(i, pred_value);
+  // Try to type the phi in two stages:
+  // (1) find a candidate type for the phi by merging types of all its inputs,
+  // (2) try to type the phi's inputs to that candidate type.
+  // Either of these stages may detect a type conflict and fail, in which case
+  // we immediately abort.
+  if (!TypePhiFromInputs(phi) || !TypeInputsOfPhi(phi, worklist)) {
+    // Conflict detected. Mark the phi dead and return true because it changed.
+    phi->SetDead();
+    return true;
+  }
+
+  // Return true if the type of the phi has changed.
+  return phi->GetType() != original_type;
+}
+
+void SsaBuilder::RunPrimitiveTypePropagation() {
+  ArenaVector<HPhi*> worklist(graph_->GetArena()->Adapter(kArenaAllocGraphBuilder));
+
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    if (block->IsLoopHeader()) {
+      for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+        HPhi* phi = phi_it.Current()->AsPhi();
+        if (phi->IsLive()) {
+          worklist.push_back(phi);
         }
-        block->AddPhi(phi);
-        value = phi;
       }
-      (*current_locals_)[local] = value;
+    } else {
+      for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+        // Eagerly compute the type of the phi, for quicker convergence. Note
+        // that we don't need to add users to the worklist because we are
+        // doing a reverse post-order visit, therefore either the phi users are
+        // non-loop phi and will be visited later in the visit, or are loop-phis,
+        // and they are already in the work list.
+        HPhi* phi = phi_it.Current()->AsPhi();
+        if (phi->IsLive()) {
+          UpdatePrimitiveType(phi, &worklist);
+        }
+      }
     }
   }
 
-  // Visit all instructions. The instructions of interest are:
-  // - HLoadLocal: replace them with the current value of the local.
-  // - HStoreLocal: update current value of the local and remove the instruction.
-  // - Instructions that require an environment: populate their environment
-  //   with the current values of the locals.
-  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-    it.Current()->Accept(this);
+  ProcessPrimitiveTypePropagationWorklist(&worklist);
+  EquivalentPhisCleanup();
+}
+
+void SsaBuilder::ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist) {
+  // Process worklist
+  while (!worklist->empty()) {
+    HPhi* phi = worklist->back();
+    worklist->pop_back();
+    // The phi could have been made dead as a result of conflicts while in the
+    // worklist. If it is now dead, there is no point in updating its type.
+    if (phi->IsLive() && UpdatePrimitiveType(phi, worklist)) {
+      AddDependentInstructionsToWorklist(phi, worklist);
+    }
   }
 }
 
+static HArrayGet* FindFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
+  Primitive::Type type = aget->GetType();
+  DCHECK(Primitive::IsIntOrLongType(type));
+  HInstruction* next = aget->GetNext();
+  if (next != nullptr && next->IsArrayGet()) {
+    HArrayGet* next_aget = next->AsArrayGet();
+    if (next_aget->IsEquivalentOf(aget)) {
+      return next_aget;
+    }
+  }
+  return nullptr;
+}
+
+static HArrayGet* CreateFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
+  Primitive::Type type = aget->GetType();
+  DCHECK(Primitive::IsIntOrLongType(type));
+  DCHECK(FindFloatOrDoubleEquivalentOfArrayGet(aget) == nullptr);
+
+  HArrayGet* equivalent = new (aget->GetBlock()->GetGraph()->GetArena()) HArrayGet(
+      aget->GetArray(),
+      aget->GetIndex(),
+      type == Primitive::kPrimInt ? Primitive::kPrimFloat : Primitive::kPrimDouble,
+      aget->GetDexPc());
+  aget->GetBlock()->InsertInstructionAfter(equivalent, aget);
+  return equivalent;
+}
+
+static Primitive::Type GetPrimitiveArrayComponentType(HInstruction* array)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ReferenceTypeInfo array_type = array->GetReferenceTypeInfo();
+  DCHECK(array_type.IsPrimitiveArrayClass());
+  return array_type.GetTypeHandle()->GetComponentType()->GetPrimitiveType();
+}
+
+bool SsaBuilder::FixAmbiguousArrayOps() {
+  if (ambiguous_agets_.empty() && ambiguous_asets_.empty()) {
+    return true;
+  }
+
+  // The wrong ArrayGet equivalent may still have Phi uses coming from ArraySet
+  // uses (because they are untyped) and environment uses (if --debuggable).
+  // After resolving all ambiguous ArrayGets, we will re-run primitive type
+  // propagation on the Phis which need to be updated.
+  ArenaVector<HPhi*> worklist(graph_->GetArena()->Adapter(kArenaAllocGraphBuilder));
+
+  {
+    ScopedObjectAccess soa(Thread::Current());
+
+    for (HArrayGet* aget_int : ambiguous_agets_) {
+      HInstruction* array = aget_int->GetArray();
+      if (!array->GetReferenceTypeInfo().IsPrimitiveArrayClass()) {
+        // RTP did not type the input array. Bail.
+        return false;
+      }
+
+      HArrayGet* aget_float = FindFloatOrDoubleEquivalentOfArrayGet(aget_int);
+      Primitive::Type array_type = GetPrimitiveArrayComponentType(array);
+      DCHECK_EQ(Primitive::Is64BitType(aget_int->GetType()), Primitive::Is64BitType(array_type));
+
+      if (Primitive::IsIntOrLongType(array_type)) {
+        if (aget_float != nullptr) {
+          // There is a float/double equivalent. We must replace it and re-run
+          // primitive type propagation on all dependent instructions.
+          aget_float->ReplaceWith(aget_int);
+          aget_float->GetBlock()->RemoveInstruction(aget_float);
+          AddDependentInstructionsToWorklist(aget_int, &worklist);
+        }
+      } else {
+        DCHECK(Primitive::IsFloatingPointType(array_type));
+        if (aget_float == nullptr) {
+          // This is a float/double ArrayGet but there were no typed uses which
+          // would create the typed equivalent. Create it now.
+          aget_float = CreateFloatOrDoubleEquivalentOfArrayGet(aget_int);
+        }
+        // Replace the original int/long instruction. Note that it may have phi
+        // uses, environment uses, as well as real uses (from untyped ArraySets).
+        // We need to re-run primitive type propagation on its dependent instructions.
+        aget_int->ReplaceWith(aget_float);
+        aget_int->GetBlock()->RemoveInstruction(aget_int);
+        AddDependentInstructionsToWorklist(aget_float, &worklist);
+      }
+    }
+
+    // Set a flag stating that types of ArrayGets have been resolved. Requesting
+    // equivalent of the wrong type with GetFloatOrDoubleEquivalentOfArrayGet
+    // will fail from now on.
+    agets_fixed_ = true;
+
+    for (HArraySet* aset : ambiguous_asets_) {
+      HInstruction* array = aset->GetArray();
+      if (!array->GetReferenceTypeInfo().IsPrimitiveArrayClass()) {
+        // RTP did not type the input array. Bail.
+        return false;
+      }
+
+      HInstruction* value = aset->GetValue();
+      Primitive::Type value_type = value->GetType();
+      Primitive::Type array_type = GetPrimitiveArrayComponentType(array);
+      DCHECK_EQ(Primitive::Is64BitType(value_type), Primitive::Is64BitType(array_type));
+
+      if (Primitive::IsFloatingPointType(array_type)) {
+        if (!Primitive::IsFloatingPointType(value_type)) {
+          DCHECK(Primitive::IsIntegralType(value_type));
+          // Array elements are floating-point but the value has not been replaced
+          // with its floating-point equivalent. The replacement must always
+          // succeed in code validated by the verifier.
+          HInstruction* equivalent = GetFloatOrDoubleEquivalent(value, array_type);
+          DCHECK(equivalent != nullptr);
+          aset->ReplaceInput(equivalent, /* input_index */ 2);
+          if (equivalent->IsPhi()) {
+            // Returned equivalent is a phi which may not have had its inputs
+            // replaced yet. We need to run primitive type propagation on it.
+            worklist.push_back(equivalent->AsPhi());
+          }
+        }
+        // Refine the side effects of this floating point aset. Note that we do this even if
+        // no replacement occurs, since the right-hand-side may have been corrected already.
+        aset->ComputeSideEffects();
+      } else {
+        // Array elements are integral and the value assigned to it initially
+        // was integral too. Nothing to do.
+        DCHECK(Primitive::IsIntegralType(array_type));
+        DCHECK(Primitive::IsIntegralType(value_type));
+      }
+    }
+  }
+
+  if (!worklist.empty()) {
+    ProcessPrimitiveTypePropagationWorklist(&worklist);
+    EquivalentPhisCleanup();
+  }
+
+  return true;
+}
+
+static bool HasAliasInEnvironments(HInstruction* instruction) {
+  HEnvironment* last_user = nullptr;
+  for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+    DCHECK(use.GetUser() != nullptr);
+    // Note: The first comparison (== null) always fails.
+    if (use.GetUser() == last_user) {
+      return true;
+    }
+    last_user = use.GetUser();
+  }
+
+  if (kIsDebugBuild) {
+    // Do a quadratic search to ensure same environment uses are next
+    // to each other.
+    const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses();
+    for (auto current = env_uses.begin(), end = env_uses.end(); current != end; ++current) {
+      auto next = current;
+      for (++next; next != end; ++next) {
+        DCHECK(next->GetUser() != current->GetUser());
+      }
+    }
+  }
+  return false;
+}
+
+void SsaBuilder::RemoveRedundantUninitializedStrings() {
+  if (graph_->IsDebuggable()) {
+    // Do not perform the optimization for consistency with the interpreter
+    // which always allocates an object for new-instance of String.
+    return;
+  }
+
+  for (HNewInstance* new_instance : uninitialized_strings_) {
+    DCHECK(new_instance->IsInBlock());
+    DCHECK(new_instance->IsStringAlloc());
+
+    // Replace NewInstance of String with NullConstant if not used prior to
+    // calling StringFactory. In case of deoptimization, the interpreter is
+    // expected to skip null check on the `this` argument of the StringFactory call.
+    if (!new_instance->HasNonEnvironmentUses() && !HasAliasInEnvironments(new_instance)) {
+      new_instance->ReplaceWith(graph_->GetNullConstant());
+      new_instance->GetBlock()->RemoveInstruction(new_instance);
+
+      // Remove LoadClass if not needed any more.
+      HInstruction* input = new_instance->InputAt(0);
+      HLoadClass* load_class = nullptr;
+
+      // If the class was not present in the dex cache at the point of building
+      // the graph, the builder inserted a HClinitCheck in between. Since the String
+      // class is always initialized at the point of running Java code, we can remove
+      // that check.
+      if (input->IsClinitCheck()) {
+        load_class = input->InputAt(0)->AsLoadClass();
+        input->ReplaceWith(load_class);
+        input->GetBlock()->RemoveInstruction(input);
+      } else {
+        load_class = input->AsLoadClass();
+        DCHECK(new_instance->IsStringAlloc());
+        DCHECK(!load_class->NeedsAccessCheck()) << "String class is always accessible";
+      }
+      DCHECK(load_class != nullptr);
+      if (!load_class->HasUses()) {
+        // Even if the HLoadClass needs access check, we can remove it, as we know the
+        // String class does not need it.
+        load_class->GetBlock()->RemoveInstruction(load_class);
+      }
+    }
+  }
+}
+
+GraphAnalysisResult SsaBuilder::BuildSsa() {
+  DCHECK(!graph_->IsInSsaForm());
+
+  // 1) Propagate types of phis. At this point, phis are typed void in the general
+  // case, or float/double/reference if we created an equivalent phi. So we need
+  // to propagate the types across phis to give them a correct type. If a type
+  // conflict is detected in this stage, the phi is marked dead.
+  RunPrimitiveTypePropagation();
+
+  // 2) Now that the correct primitive types have been assigned, we can get rid
+  // of redundant phis. Note that we cannot do this phase before type propagation,
+  // otherwise we could get rid of phi equivalents, whose presence is a requirement
+  // for the type propagation phase. Note that this is to satisfy statement (a)
+  // of the SsaBuilder (see ssa_builder.h).
+  SsaRedundantPhiElimination(graph_).Run();
+
+  // 3) Fix the type for null constants which are part of an equality comparison.
+  // We need to do this after redundant phi elimination, to ensure the only cases
+  // that we can see are reference comparison against 0. The redundant phi
+  // elimination ensures we do not see a phi taking two 0 constants in a HEqual
+  // or HNotEqual.
+  FixNullConstantType();
+
+  // 4) Compute type of reference type instructions. The pass assumes that
+  // NullConstant has been fixed up.
+  ReferenceTypePropagation(graph_, dex_cache_, handles_, /* is_first_run */ true).Run();
+
+  // 5) HInstructionBuilder duplicated ArrayGet instructions with ambiguous type
+  // (int/float or long/double) and marked ArraySets with ambiguous input type.
+  // Now that RTP computed the type of the array input, the ambiguity can be
+  // resolved and the correct equivalents kept.
+  if (!FixAmbiguousArrayOps()) {
+    return kAnalysisFailAmbiguousArrayOp;
+  }
+
+  // 6) Mark dead phis. This will mark phis which are not used by instructions
+  // or other live phis. If compiling as debuggable code, phis will also be kept
+  // live if they have an environment use.
+  SsaDeadPhiElimination dead_phi_elimimation(graph_);
+  dead_phi_elimimation.MarkDeadPhis();
+
+  // 7) Make sure environments use the right phi equivalent: a phi marked dead
+  // can have a phi equivalent that is not dead. In that case we have to replace
+  // it with the live equivalent because deoptimization and try/catch rely on
+  // environments containing values of all live vregs at that point. Note that
+  // there can be multiple phis for the same Dex register that are live
+  // (for example when merging constants), in which case it is okay for the
+  // environments to just reference one.
+  FixEnvironmentPhis();
+
+  // 8) Now that the right phis are used for the environments, we can eliminate
+  // phis we do not need. Regardless of the debuggable status, this phase is
+  /// necessary for statement (b) of the SsaBuilder (see ssa_builder.h), as well
+  // as for the code generation, which does not deal with phis of conflicting
+  // input types.
+  dead_phi_elimimation.EliminateDeadPhis();
+
+  // 9) HInstructionBuidler replaced uses of NewInstances of String with the
+  // results of their corresponding StringFactory calls. Unless the String
+  // objects are used before they are initialized, they can be replaced with
+  // NullConstant. Note that this optimization is valid only if unsimplified
+  // code does not use the uninitialized value because we assume execution can
+  // be deoptimized at any safepoint. We must therefore perform it before any
+  // other optimizations.
+  RemoveRedundantUninitializedStrings();
+
+  graph_->SetInSsaForm();
+  return kAnalysisSuccess;
+}
+
 /**
  * Constants in the Dex format are not typed. So the builder types them as
  * integers, but when doing the SSA form, we might realize the constant
@@ -517,11 +560,10 @@
   // We place the floating point constant next to this constant.
   HFloatConstant* result = constant->GetNext()->AsFloatConstant();
   if (result == nullptr) {
-    HGraph* graph = constant->GetBlock()->GetGraph();
-    ArenaAllocator* allocator = graph->GetArena();
-    result = new (allocator) HFloatConstant(bit_cast<float, int32_t>(constant->GetValue()));
+    float value = bit_cast<float, int32_t>(constant->GetValue());
+    result = new (graph_->GetArena()) HFloatConstant(value);
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
-    graph->CacheFloatConstant(result);
+    graph_->CacheFloatConstant(result);
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
@@ -540,11 +582,10 @@
   // We place the floating point constant next to this constant.
   HDoubleConstant* result = constant->GetNext()->AsDoubleConstant();
   if (result == nullptr) {
-    HGraph* graph = constant->GetBlock()->GetGraph();
-    ArenaAllocator* allocator = graph->GetArena();
-    result = new (allocator) HDoubleConstant(bit_cast<double, int64_t>(constant->GetValue()));
+    double value = bit_cast<double, int64_t>(constant->GetValue());
+    result = new (graph_->GetArena()) HDoubleConstant(value);
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
-    graph->CacheDoubleConstant(result);
+    graph_->CacheDoubleConstant(result);
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
@@ -561,6 +602,8 @@
  * phi with a floating point / reference type.
  */
 HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type) {
+  DCHECK(phi->IsLive()) << "Cannot get equivalent of a dead phi since it would create a live one.";
+
   // We place the floating point /reference phi next to this phi.
   HInstruction* next = phi->GetNext();
   if (next != nullptr
@@ -573,30 +616,56 @@
   if (next == nullptr
       || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())
       || (next->GetType() != type)) {
-    ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena();
-    HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type);
-    for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-      // Copy the inputs. Note that the graph may not be correctly typed by doing this copy,
-      // but the type propagation phase will fix it.
-      new_phi->SetRawInputAt(i, phi->InputAt(i));
+    ArenaAllocator* allocator = graph_->GetArena();
+    HInputsRef inputs = phi->GetInputs();
+    HPhi* new_phi =
+        new (allocator) HPhi(allocator, phi->GetRegNumber(), inputs.size(), type);
+    // Copy the inputs. Note that the graph may not be correctly typed
+    // by doing this copy, but the type propagation phase will fix it.
+    ArrayRef<HUserRecord<HInstruction*>> new_input_records = new_phi->GetInputRecords();
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      new_input_records[i] = HUserRecord<HInstruction*>(inputs[i]);
     }
     phi->GetBlock()->InsertPhiAfter(new_phi, phi);
+    DCHECK(new_phi->IsLive());
     return new_phi;
   } else {
-    DCHECK_EQ(next->GetType(), type);
-    return next->AsPhi();
+    // An existing equivalent was found. If it is dead, conflict was previously
+    // identified and we return nullptr instead.
+    HPhi* next_phi = next->AsPhi();
+    DCHECK_EQ(next_phi->GetType(), type);
+    return next_phi->IsLive() ? next_phi : nullptr;
   }
 }
 
-HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user,
-                                                     HInstruction* value,
-                                                     Primitive::Type type) {
+HArrayGet* SsaBuilder::GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
+  DCHECK(Primitive::IsIntegralType(aget->GetType()));
+
+  if (!Primitive::IsIntOrLongType(aget->GetType())) {
+    // Cannot type boolean, char, byte, short to float/double.
+    return nullptr;
+  }
+
+  DCHECK(ContainsElement(ambiguous_agets_, aget));
+  if (agets_fixed_) {
+    // This used to be an ambiguous ArrayGet but its type has been resolved to
+    // int/long. Requesting a float/double equivalent should lead to a conflict.
+    if (kIsDebugBuild) {
+      ScopedObjectAccess soa(Thread::Current());
+      DCHECK(Primitive::IsIntOrLongType(GetPrimitiveArrayComponentType(aget->GetArray())));
+    }
+    return nullptr;
+  } else {
+    // This is an ambiguous ArrayGet which has not been resolved yet. Return an
+    // equivalent float/double instruction to use until it is resolved.
+    HArrayGet* equivalent = FindFloatOrDoubleEquivalentOfArrayGet(aget);
+    return (equivalent == nullptr) ? CreateFloatOrDoubleEquivalentOfArrayGet(aget) : equivalent;
+  }
+}
+
+HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* value, Primitive::Type type) {
   if (value->IsArrayGet()) {
-    // The verifier has checked that values in arrays cannot be used for both
-    // floating point and non-floating point operations. It is therefore safe to just
-    // change the type of the operation.
-    value->AsArrayGet()->SetType(type);
-    return value;
+    return GetFloatOrDoubleEquivalentOfArrayGet(value->AsArrayGet());
   } else if (value->IsLongConstant()) {
     return GetDoubleEquivalent(value->AsLongConstant());
   } else if (value->IsIntConstant()) {
@@ -604,18 +673,13 @@
   } else if (value->IsPhi()) {
     return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), type);
   } else {
-    // For other instructions, we assume the verifier has checked that the dex format is correctly
-    // typed and the value in a dex register will not be used for both floating point and
-    // non-floating point operations. So the only reason an instruction would want a floating
-    // point equivalent is for an unused phi that will be removed by the dead phi elimination phase.
-    DCHECK(user->IsPhi()) << "is actually " << user->DebugName() << " (" << user->GetId() << ")";
-    return value;
+    return nullptr;
   }
 }
 
 HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) {
   if (value->IsIntConstant() && value->AsIntConstant()->GetValue() == 0) {
-    return value->GetBlock()->GetGraph()->GetNullConstant();
+    return graph_->GetNullConstant();
   } else if (value->IsPhi()) {
     return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), Primitive::kPrimNot);
   } else {
@@ -623,75 +687,4 @@
   }
 }
 
-void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
-  HInstruction* value = (*current_locals_)[load->GetLocal()->GetRegNumber()];
-  // If the operation requests a specific type, we make sure its input is of that type.
-  if (load->GetType() != value->GetType()) {
-    if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) {
-      value = GetFloatOrDoubleEquivalent(load, value, load->GetType());
-    } else if (load->GetType() == Primitive::kPrimNot) {
-      value = GetReferenceTypeEquivalent(value);
-    }
-  }
-  load->ReplaceWith(value);
-  load->GetBlock()->RemoveInstruction(load);
-}
-
-void SsaBuilder::VisitStoreLocal(HStoreLocal* store) {
-  (*current_locals_)[store->GetLocal()->GetRegNumber()] = store->InputAt(1);
-  store->GetBlock()->RemoveInstruction(store);
-}
-
-void SsaBuilder::VisitInstruction(HInstruction* instruction) {
-  if (instruction->NeedsEnvironment()) {
-    HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment(
-        GetGraph()->GetArena(),
-        current_locals_->size(),
-        GetGraph()->GetDexFile(),
-        GetGraph()->GetMethodIdx(),
-        instruction->GetDexPc(),
-        GetGraph()->GetInvokeType(),
-        instruction);
-    environment->CopyFrom(*current_locals_);
-    instruction->SetRawEnvironment(environment);
-  }
-
-  // If in a try block, propagate values of locals into catch blocks.
-  if (instruction->CanThrowIntoCatchBlock()) {
-    const HTryBoundary& try_entry =
-        instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry();
-    for (HExceptionHandlerIterator it(try_entry); !it.Done(); it.Advance()) {
-      HBasicBlock* catch_block = it.Current();
-      ArenaVector<HInstruction*>* handler_locals = GetLocalsFor(catch_block);
-      DCHECK_EQ(handler_locals->size(), current_locals_->size());
-      for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
-        HInstruction* handler_value = (*handler_locals)[vreg];
-        if (handler_value == nullptr) {
-          // Vreg was undefined at a previously encountered throwing instruction
-          // and the catch phi was deleted. Do not record the local value.
-          continue;
-        }
-        DCHECK(handler_value->IsPhi());
-
-        HInstruction* local_value = (*current_locals_)[vreg];
-        if (local_value == nullptr) {
-          // This is the first instruction throwing into `catch_block` where
-          // `vreg` is undefined. Delete the catch phi.
-          catch_block->RemovePhi(handler_value->AsPhi());
-          (*handler_locals)[vreg] = nullptr;
-        } else {
-          // Vreg has been defined at all instructions throwing into `catch_block`
-          // encountered so far. Record the local value in the catch phi.
-          handler_value->AsPhi()->AddInput(local_value);
-        }
-      }
-    }
-  }
-}
-
-void SsaBuilder::VisitTemporary(HTemporary* temp) {
-  // Temporaries are only used by the baseline register allocator.
-  temp->GetBlock()->RemoveInstruction(temp);
-}
-
 }  // namespace art
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 79f1a28..d7360ad 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -23,8 +23,6 @@
 
 namespace art {
 
-static constexpr int kDefaultNumberOfLoops = 2;
-
 /**
  * Transforms a graph into SSA form. The liveness guarantees of
  * this transformation are listed below. A DEX register
@@ -47,56 +45,85 @@
  *     is not set, values of Dex registers only used by environments
  *     are killed.
  */
-class SsaBuilder : public HGraphVisitor {
+class SsaBuilder : public ValueObject {
  public:
-  explicit SsaBuilder(HGraph* graph)
-      : HGraphVisitor(graph),
-        current_locals_(nullptr),
-        loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-        locals_for_(graph->GetBlocks().size(),
-                    ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-                    graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
-    loop_headers_.reserve(kDefaultNumberOfLoops);
+  SsaBuilder(HGraph* graph,
+             Handle<mirror::DexCache> dex_cache,
+             StackHandleScopeCollection* handles)
+      : graph_(graph),
+        dex_cache_(dex_cache),
+        handles_(handles),
+        agets_fixed_(false),
+        ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
+        ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
+        uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)) {
+    graph_->InitializeInexactObjectRTI(handles);
   }
 
-  void BuildSsa();
+  GraphAnalysisResult BuildSsa();
 
-  // Returns locals vector for `block`. If it is a catch block, the vector will be
-  // prepopulated with catch phis for vregs which are defined in `current_locals_`.
-  ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block);
-  HInstruction* ValueOfLocal(HBasicBlock* block, size_t local);
+  HInstruction* GetFloatOrDoubleEquivalent(HInstruction* instruction, Primitive::Type type);
+  HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
 
-  void VisitBasicBlock(HBasicBlock* block);
-  void VisitLoadLocal(HLoadLocal* load);
-  void VisitStoreLocal(HStoreLocal* store);
-  void VisitInstruction(HInstruction* instruction);
-  void VisitTemporary(HTemporary* instruction);
+  void MaybeAddAmbiguousArrayGet(HArrayGet* aget) {
+    Primitive::Type type = aget->GetType();
+    DCHECK(!Primitive::IsFloatingPointType(type));
+    if (Primitive::IsIntOrLongType(type)) {
+      ambiguous_agets_.push_back(aget);
+    }
+  }
 
-  static HInstruction* GetFloatOrDoubleEquivalent(HInstruction* user,
-                                                  HInstruction* instruction,
-                                                  Primitive::Type type);
+  void MaybeAddAmbiguousArraySet(HArraySet* aset) {
+    Primitive::Type type = aset->GetValue()->GetType();
+    if (Primitive::IsIntOrLongType(type)) {
+      ambiguous_asets_.push_back(aset);
+    }
+  }
 
-  static HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
-
-  static constexpr const char* kSsaBuilderPassName = "ssa_builder";
+  void AddUninitializedString(HNewInstance* string) {
+    // In some rare cases (b/27847265), the same NewInstance may be seen
+    // multiple times. We should only consider it once for removal, so we
+    // ensure it is not added more than once.
+    // Note that we cannot check whether this really is a NewInstance of String
+    // before RTP. We DCHECK that in RemoveRedundantUninitializedStrings.
+    if (!ContainsElement(uninitialized_strings_, string)) {
+      uninitialized_strings_.push_back(string);
+    }
+  }
 
  private:
+  void SetLoopHeaderPhiInputs();
+  void FixEnvironmentPhis();
   void FixNullConstantType();
   void EquivalentPhisCleanup();
+  void RunPrimitiveTypePropagation();
 
-  static HFloatConstant* GetFloatEquivalent(HIntConstant* constant);
-  static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant);
-  static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
+  // Attempts to resolve types of aget(-wide) instructions and type values passed
+  // to aput(-wide) instructions from reference type information on the array
+  // input. Returns false if the type of an array is unknown.
+  bool FixAmbiguousArrayOps();
 
-  // Locals for the current block being visited.
-  ArenaVector<HInstruction*>* current_locals_;
+  bool TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist);
+  bool UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist);
+  void ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist);
 
-  // Keep track of loop headers found. The last phase of the analysis iterates
-  // over these blocks to set the inputs of their phis.
-  ArenaVector<HBasicBlock*> loop_headers_;
+  HFloatConstant* GetFloatEquivalent(HIntConstant* constant);
+  HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant);
+  HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
+  HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget);
 
-  // HEnvironment for each block.
-  ArenaVector<ArenaVector<HInstruction*>> locals_for_;
+  void RemoveRedundantUninitializedStrings();
+
+  HGraph* graph_;
+  Handle<mirror::DexCache> dex_cache_;
+  StackHandleScopeCollection* const handles_;
+
+  // True if types of ambiguous ArrayGets have been resolved.
+  bool agets_fixed_;
+
+  ArenaVector<HArrayGet*> ambiguous_agets_;
+  ArenaVector<HArraySet*> ambiguous_asets_;
+  ArenaVector<HNewInstance*> uninitialized_strings_;
 
   DISALLOW_COPY_AND_ASSIGN(SsaBuilder);
 };
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index b9d8731..a01e107 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -174,6 +174,39 @@
   ComputeLiveInAndLiveOutSets();
 }
 
+static void RecursivelyProcessInputs(HInstruction* current,
+                                     HInstruction* actual_user,
+                                     BitVector* live_in) {
+  HInputsRef inputs = current->GetInputs();
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    HInstruction* input = inputs[i];
+    bool has_in_location = current->GetLocations()->InAt(i).IsValid();
+    bool has_out_location = input->GetLocations()->Out().IsValid();
+
+    if (has_in_location) {
+      DCHECK(has_out_location)
+          << "Instruction " << current->DebugName() << current->GetId()
+          << " expects an input value at index " << i << " but "
+          << input->DebugName() << input->GetId() << " does not produce one.";
+      DCHECK(input->HasSsaIndex());
+      // `input` generates a result used by `current`. Add use and update
+      // the live-in set.
+      input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i, actual_user);
+      live_in->SetBit(input->GetSsaIndex());
+    } else if (has_out_location) {
+      // `input` generates a result but it is not used by `current`.
+    } else {
+      // `input` is inlined into `current`. Walk over its inputs and record
+      // uses at `current`.
+      DCHECK(input->IsEmittedAtUseSite());
+      // Check that the inlined input is not a phi. Recursing on loop phis could
+      // lead to an infinite loop.
+      DCHECK(!input->IsPhi());
+      RecursivelyProcessInputs(input, actual_user, live_in);
+    }
+  }
+}
+
 void SsaLivenessAnalysis::ComputeLiveRanges() {
   // Do a post order visit, adding inputs of instructions live in the block where
   // that instruction is defined, and killing instructions that are being visited.
@@ -194,7 +227,7 @@
         // The only instructions which may not be recorded in the environments
         // are constants created by the SSA builder as typed equivalents of
         // untyped constants from the bytecode, or phis with only such constants
-        // as inputs (verified by SSAChecker). Their raw binary value must
+        // as inputs (verified by GraphChecker). Their raw binary value must
         // therefore be the same and we only need to keep alive one.
       } else {
         size_t phi_input_index = successor->GetPredecessorIndexOf(block);
@@ -242,20 +275,24 @@
           }
           if (instruction != nullptr) {
             instruction->GetLiveInterval()->AddUse(
-                current, environment, i, should_be_live);
+                current, environment, i, /* actual_user */ nullptr, should_be_live);
           }
         }
       }
 
-      // All inputs of an instruction must be live.
-      for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
-        HInstruction* input = current->InputAt(i);
-        // Some instructions 'inline' their inputs, that is they do not need
-        // to be materialized.
-        if (input->HasSsaIndex() && current->GetLocations()->InAt(i).IsValid()) {
-          live_in->SetBit(input->GetSsaIndex());
-          input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i);
+      // Process inputs of instructions.
+      if (current->IsEmittedAtUseSite()) {
+        if (kIsDebugBuild) {
+          DCHECK(!current->GetLocations()->Out().IsValid());
+          for (const HUseListNode<HInstruction*>& use : current->GetUses()) {
+            HInstruction* user = use.GetUser();
+            size_t index = use.GetIndex();
+            DCHECK(!user->GetLocations()->InAt(index).IsValid());
+          }
+          DCHECK(!current->HasEnvironmentUses());
         }
+      } else {
+        RecursivelyProcessInputs(current, current, live_in);
       }
     }
 
@@ -273,6 +310,9 @@
     }
 
     if (block->IsLoopHeader()) {
+      if (kIsDebugBuild) {
+        CheckNoLiveInIrreducibleLoop(*block);
+      }
       size_t last_position = block->GetLoopInformation()->GetLifetimeEnd();
       // For all live_in instructions at the loop header, we need to create a range
       // that covers the full loop.
@@ -296,6 +336,9 @@
       // change in this loop), and the live_out set.  If the live_out
       // set does not change, there is no need to update the live_in set.
       if (UpdateLiveOut(block) && UpdateLiveIn(block)) {
+        if (kIsDebugBuild) {
+          CheckNoLiveInIrreducibleLoop(block);
+        }
         changed = true;
       }
     }
@@ -325,6 +368,27 @@
   return live_in->UnionIfNotIn(live_out, kill);
 }
 
+void LiveInterval::DumpWithContext(std::ostream& stream,
+                                   const CodeGenerator& codegen) const {
+  Dump(stream);
+  if (IsFixed()) {
+    stream << ", register:" << GetRegister() << "(";
+    if (IsFloatingPoint()) {
+      codegen.DumpFloatingPointRegister(stream, GetRegister());
+    } else {
+      codegen.DumpCoreRegister(stream, GetRegister());
+    }
+    stream << ")";
+  } else {
+    stream << ", spill slot:" << GetSpillSlot();
+  }
+  stream << ", requires_register:" << (GetDefinedBy() != nullptr && RequiresRegister());
+  if (GetParent()->GetDefinedBy() != nullptr) {
+    stream << ", defined_by:" << GetParent()->GetDefinedBy()->GetKind();
+    stream << "(" << GetParent()->GetDefinedBy()->GetLifetimePosition() << ")";
+  }
+}
+
 static int RegisterOrLowRegister(Location location) {
   return location.IsPair() ? location.low() : location.reg();
 }
@@ -388,12 +452,12 @@
         // If the instruction dies at the phi assignment, we can try having the
         // same register.
         if (end == user->GetBlock()->GetPredecessors()[input_index]->GetLifetimeEnd()) {
-          for (size_t i = 0, e = user->InputCount(); i < e; ++i) {
+          HInputsRef inputs = user->GetInputs();
+          for (size_t i = 0; i < inputs.size(); ++i) {
             if (i == input_index) {
               continue;
             }
-            HInstruction* input = user->InputAt(i);
-            Location location = input->GetLiveInterval()->GetLocationAt(
+            Location location = inputs[i]->GetLiveInterval()->GetLocationAt(
                 user->GetBlock()->GetPredecessors()[i]->GetLifetimeEnd() - 1);
             if (location.IsRegisterKind()) {
               int reg = RegisterOrLowRegister(location);
@@ -429,10 +493,10 @@
   if (defined_by_->IsPhi()) {
     // Try to use the same register as one of the inputs.
     const ArenaVector<HBasicBlock*>& predecessors = defined_by_->GetBlock()->GetPredecessors();
-    for (size_t i = 0, e = defined_by_->InputCount(); i < e; ++i) {
-      HInstruction* input = defined_by_->InputAt(i);
+    HInputsRef inputs = defined_by_->GetInputs();
+    for (size_t i = 0; i < inputs.size(); ++i) {
       size_t end = predecessors[i]->GetLifetimeEnd();
-      LiveInterval* input_interval = input->GetLiveInterval()->GetSiblingAt(end - 1);
+      LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(end - 1);
       if (input_interval->GetEnd() == end) {
         // If the input dies at the end of the predecessor, we know its register can
         // be reused.
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 572a7b6..92788fe 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -31,9 +31,9 @@
  public:
   BlockInfo(ArenaAllocator* allocator, const HBasicBlock& block, size_t number_of_ssa_values)
       : block_(block),
-        live_in_(allocator, number_of_ssa_values, false),
-        live_out_(allocator, number_of_ssa_values, false),
-        kill_(allocator, number_of_ssa_values, false) {
+        live_in_(allocator, number_of_ssa_values, false, kArenaAllocSsaLiveness),
+        live_out_(allocator, number_of_ssa_values, false, kArenaAllocSsaLiveness),
+        kill_(allocator, number_of_ssa_values, false, kArenaAllocSsaLiveness) {
     UNUSED(block_);
     live_in_.ClearAllBits();
     live_out_.ClearAllBits();
@@ -113,10 +113,6 @@
         input_index_(input_index),
         position_(position),
         next_(next) {
-    DCHECK((user == nullptr)
-        || user->IsPhi()
-        || (GetPosition() == user->GetLifetimePosition() + 1)
-        || (GetPosition() == user->GetLifetimePosition()));
     DCHECK(environment == nullptr || user == nullptr);
     DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
   }
@@ -154,9 +150,7 @@
     if (GetIsEnvironment()) return false;
     if (IsSynthesized()) return false;
     Location location = GetUser()->GetLocations()->InAt(GetInputIndex());
-    return location.IsUnallocated()
-        && (location.GetPolicy() == Location::kRequiresRegister
-            || location.GetPolicy() == Location::kRequiresFpuRegister);
+    return location.IsUnallocated() && location.RequiresRegisterKind();
   }
 
  private:
@@ -243,21 +237,30 @@
     AddRange(position, position + 1);
   }
 
+  // Record use of an input. The use will be recorded as an environment use if
+  // `environment` is not null and as register use otherwise. If `actual_user`
+  // is specified, the use will be recorded at `actual_user`'s lifetime position.
   void AddUse(HInstruction* instruction,
               HEnvironment* environment,
               size_t input_index,
+              HInstruction* actual_user = nullptr,
               bool keep_alive = false) {
-    // Set the use within the instruction.
     bool is_environment = (environment != nullptr);
-    size_t position = instruction->GetLifetimePosition() + 1;
     LocationSummary* locations = instruction->GetLocations();
+    if (actual_user == nullptr) {
+      actual_user = instruction;
+    }
+
+    // Set the use within the instruction.
+    size_t position = actual_user->GetLifetimePosition() + 1;
     if (!is_environment) {
       if (locations->IsFixedInput(input_index) || locations->OutputUsesSameAs(input_index)) {
         // For fixed inputs and output same as input, the register allocator
         // requires to have inputs die at the instruction, so that input moves use the
         // location of the input just before that instruction (and not potential moves due
         // to splitting).
-        position = instruction->GetLifetimePosition();
+        DCHECK_EQ(instruction, actual_user);
+        position = actual_user->GetLifetimePosition();
       } else if (!locations->InAt(input_index).IsValid()) {
         return;
       }
@@ -267,11 +270,8 @@
       AddBackEdgeUses(*instruction->GetBlock());
     }
 
-    DCHECK(position == instruction->GetLifetimePosition()
-           || position == instruction->GetLifetimePosition() + 1);
-
     if ((first_use_ != nullptr)
-        && (first_use_->GetUser() == instruction)
+        && (first_use_->GetUser() == actual_user)
         && (first_use_->GetPosition() < position)) {
       // The user uses the instruction multiple times, and one use dies before the other.
       // We update the use list so that the latter is first.
@@ -479,6 +479,10 @@
     return last_range_->GetEnd();
   }
 
+  size_t GetLength() const {
+    return GetEnd() - GetStart();
+  }
+
   size_t FirstRegisterUseAfter(size_t position) const {
     if (is_temp_) {
       return position == GetStart() ? position : kNoLifetime;
@@ -502,10 +506,18 @@
     return kNoLifetime;
   }
 
+  // Returns the location of the first register use for this live interval,
+  // including a register definition if applicable.
   size_t FirstRegisterUse() const {
     return FirstRegisterUseAfter(GetStart());
   }
 
+  // Whether the interval requires a register rather than a stack location.
+  // If needed for performance, this could be cached.
+  bool RequiresRegister() const {
+    return !HasRegister() && FirstRegisterUse() != kNoLifetime;
+  }
+
   size_t FirstUseAfter(size_t position) const {
     if (is_temp_) {
       return position == GetStart() ? position : kNoLifetime;
@@ -691,6 +703,10 @@
     stream << " is_high: " << IsHighInterval();
   }
 
+  // Same as Dump, but adds context such as the instruction defining this interval, and
+  // the register currently assigned to this interval.
+  void DumpWithContext(std::ostream& stream, const CodeGenerator& codegen) const;
+
   LiveInterval* GetNextSibling() const { return next_sibling_; }
   LiveInterval* GetLastSibling() {
     LiveInterval* result = this;
@@ -795,8 +811,8 @@
   bool IsUsingInputRegister() const {
     CHECK(kIsDebugBuild) << "Function should be used only for DCHECKs";
     if (defined_by_ != nullptr && !IsSplit()) {
-      for (HInputIterator it(defined_by_); !it.Done(); it.Advance()) {
-        LiveInterval* interval = it.Current()->GetLiveInterval();
+      for (const HInstruction* input : defined_by_->GetInputs()) {
+        LiveInterval* interval = input->GetLiveInterval();
 
         // Find the interval that covers `defined_by`_. Calls to this function
         // are made outside the linear scan, hence we need to use CoversSlow.
@@ -826,8 +842,8 @@
       if (locations->OutputCanOverlapWithInputs()) {
         return false;
       }
-      for (HInputIterator it(defined_by_); !it.Done(); it.Advance()) {
-        LiveInterval* interval = it.Current()->GetLiveInterval();
+      for (const HInstruction* input : defined_by_->GetInputs()) {
+        LiveInterval* interval = input->GetLiveInterval();
 
         // Find the interval that covers `defined_by`_. Calls to this function
         // are made outside the linear scan, hence we need to use CoversSlow.
@@ -869,6 +885,33 @@
     range_search_start_ = first_range_;
   }
 
+  bool DefinitionRequiresRegister() const {
+    DCHECK(IsParent());
+    LocationSummary* locations = defined_by_->GetLocations();
+    Location location = locations->Out();
+    // This interval is the first interval of the instruction. If the output
+    // of the instruction requires a register, we return the position of that instruction
+    // as the first register use.
+    if (location.IsUnallocated()) {
+      if ((location.GetPolicy() == Location::kRequiresRegister)
+           || (location.GetPolicy() == Location::kSameAsFirstInput
+               && (locations->InAt(0).IsRegister()
+                   || locations->InAt(0).IsRegisterPair()
+                   || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
+        return true;
+      } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
+                 || (location.GetPolicy() == Location::kSameAsFirstInput
+                     && (locations->InAt(0).IsFpuRegister()
+                         || locations->InAt(0).IsFpuRegisterPair()
+                         || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) {
+        return true;
+      }
+    } else if (location.IsRegister() || location.IsRegisterPair()) {
+      return true;
+    }
+    return false;
+  }
+
  private:
   LiveInterval(ArenaAllocator* allocator,
                Primitive::Type type,
@@ -923,33 +966,6 @@
     return range;
   }
 
-  bool DefinitionRequiresRegister() const {
-    DCHECK(IsParent());
-    LocationSummary* locations = defined_by_->GetLocations();
-    Location location = locations->Out();
-    // This interval is the first interval of the instruction. If the output
-    // of the instruction requires a register, we return the position of that instruction
-    // as the first register use.
-    if (location.IsUnallocated()) {
-      if ((location.GetPolicy() == Location::kRequiresRegister)
-           || (location.GetPolicy() == Location::kSameAsFirstInput
-               && (locations->InAt(0).IsRegister()
-                   || locations->InAt(0).IsRegisterPair()
-                   || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
-        return true;
-      } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
-                 || (location.GetPolicy() == Location::kSameAsFirstInput
-                     && (locations->InAt(0).IsFpuRegister()
-                         || locations->InAt(0).IsFpuRegisterPair()
-                         || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) {
-        return true;
-      }
-    } else if (location.IsRegister() || location.IsRegisterPair()) {
-      return true;
-    }
-    return false;
-  }
-
   bool IsDefiningPosition(size_t position) const {
     return IsParent() && (position == GetStart());
   }
@@ -967,8 +983,49 @@
     return false;
   }
 
+  bool IsLinearOrderWellFormed(const HGraph& graph) {
+    for (HBasicBlock* header : graph.GetBlocks()) {
+      if (header == nullptr || !header->IsLoopHeader()) {
+        continue;
+      }
+
+      HLoopInformation* loop = header->GetLoopInformation();
+      size_t num_blocks = loop->GetBlocks().NumSetBits();
+      size_t found_blocks = 0u;
+
+      for (HLinearOrderIterator it(graph); !it.Done(); it.Advance()) {
+        HBasicBlock* current = it.Current();
+        if (loop->Contains(*current)) {
+          found_blocks++;
+          if (found_blocks == 1u && current != header) {
+            // First block is not the header.
+            return false;
+          } else if (found_blocks == num_blocks && !loop->IsBackEdge(*current)) {
+            // Last block is not a back edge.
+            return false;
+          }
+        } else if (found_blocks != 0u && found_blocks != num_blocks) {
+          // Blocks are not adjacent.
+          return false;
+        }
+      }
+      DCHECK_EQ(found_blocks, num_blocks);
+    }
+
+    return true;
+  }
+
   void AddBackEdgeUses(const HBasicBlock& block_at_use) {
     DCHECK(block_at_use.IsInLoop());
+    if (block_at_use.GetGraph()->HasIrreducibleLoops()) {
+      // Linear order may not be well formed when irreducible loops are present,
+      // i.e. loop blocks may not be adjacent and a back edge may not be last,
+      // which violates assumptions made in this method.
+      return;
+    }
+
+    DCHECK(IsLinearOrderWellFormed(*block_at_use.GetGraph()));
+
     // Add synthesized uses at the back edge of loops to help the register allocator.
     // Note that this method is called in decreasing liveness order, to faciliate adding
     // uses at the head of the `first_use_` linked list. Because below
@@ -997,8 +1054,8 @@
         break;
       }
 
-      DCHECK(last_in_new_list == nullptr
-             || back_edge_use_position > last_in_new_list->GetPosition());
+      DCHECK(last_in_new_list == nullptr ||
+             back_edge_use_position > last_in_new_list->GetPosition());
 
       UsePosition* new_use = new (allocator_) UsePosition(
           /* user */ nullptr,
@@ -1217,6 +1274,23 @@
     return instruction->GetType() == Primitive::kPrimNot;
   }
 
+  void CheckNoLiveInIrreducibleLoop(const HBasicBlock& block) const {
+    if (!block.IsLoopHeader() || !block.GetLoopInformation()->IsIrreducible()) {
+      return;
+    }
+    BitVector* live_in = GetLiveInSet(block);
+    // To satisfy our liveness algorithm, we need to ensure loop headers of
+    // irreducible loops do not have any live-in instructions, except constants
+    // and the current method, which can be trivially re-materialized.
+    for (uint32_t idx : live_in->Indexes()) {
+      HInstruction* instruction = GetInstructionFromSsaIndex(idx);
+      DCHECK(instruction->GetBlock()->IsEntryBlock()) << instruction->DebugName();
+      DCHECK(!instruction->IsParameterValue());
+      DCHECK(instruction->IsCurrentMethod() || instruction->IsConstant())
+          << instruction->DebugName();
+    }
+  }
+
   HGraph* const graph_;
   CodeGenerator* const codegen_;
   ArenaVector<BlockInfo*> block_infos_;
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index 72f9ddd..b1ec99a 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -16,6 +16,10 @@
 
 #include "ssa_phi_elimination.h"
 
+#include "base/arena_containers.h"
+#include "base/arena_bit_vector.h"
+#include "base/bit_vector-inl.h"
+
 namespace art {
 
 void SsaDeadPhiElimination::Run() {
@@ -24,20 +28,36 @@
 }
 
 void SsaDeadPhiElimination::MarkDeadPhis() {
+  // Phis are constructed live and should not be revived if previously marked
+  // dead. This algorithm temporarily breaks that invariant but we DCHECK that
+  // only phis which were initially live are revived.
+  ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter(kArenaAllocSsaPhiElimination));
+
   // Add to the worklist phis referenced by non-phi instructions.
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
       HPhi* phi = inst_it.Current()->AsPhi();
-      // Set dead ahead of running through uses. The phi may have no use.
-      phi->SetDead();
-      for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) {
-        HUseListNode<HInstruction*>* current = use_it.Current();
-        HInstruction* user = current->GetUser();
-        if (!user->IsPhi()) {
-          worklist_.push_back(phi);
-          phi->SetLive();
-          break;
+      if (phi->IsDead()) {
+        continue;
+      }
+
+      bool keep_alive = (graph_->IsDebuggable() && phi->HasEnvironmentUses());
+      if (!keep_alive) {
+        for (const HUseListNode<HInstruction*>& use : phi->GetUses()) {
+          if (!use.GetUser()->IsPhi()) {
+            keep_alive = true;
+            break;
+          }
+        }
+      }
+
+      if (keep_alive) {
+        worklist_.push_back(phi);
+      } else {
+        phi->SetDead();
+        if (kIsDebugBuild) {
+          initially_live.insert(phi);
         }
       }
     }
@@ -47,11 +67,14 @@
   while (!worklist_.empty()) {
     HPhi* phi = worklist_.back();
     worklist_.pop_back();
-    for (HInputIterator it(phi); !it.Done(); it.Advance()) {
-      HInstruction* input = it.Current();
-      if (input->IsPhi() && input->AsPhi()->IsDead()) {
-        worklist_.push_back(input->AsPhi());
-        input->AsPhi()->SetLive();
+    for (HInstruction* raw_input : phi->GetInputs()) {
+      HPhi* input = raw_input->AsPhi();
+      if (input != nullptr && input->IsDead()) {
+        // Input is a dead phi. Revive it and add to the worklist. We make sure
+        // that the phi was not dead initially (see definition of `initially_live`).
+        DCHECK(ContainsElement(initially_live, input));
+        input->SetLive();
+        worklist_.push_back(input);
       }
     }
   }
@@ -72,23 +95,18 @@
       if (phi->IsDead()) {
         // Make sure the phi is only used by other dead phis.
         if (kIsDebugBuild) {
-          for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done();
-               use_it.Advance()) {
-            HInstruction* user = use_it.Current()->GetUser();
-            DCHECK(user->IsLoopHeaderPhi()) << user->GetId();
-            DCHECK(user->AsPhi()->IsDead()) << user->GetId();
+          for (const HUseListNode<HInstruction*>& use : phi->GetUses()) {
+            HInstruction* user = use.GetUser();
+            DCHECK(user->IsLoopHeaderPhi());
+            DCHECK(user->AsPhi()->IsDead());
           }
         }
         // Remove the phi from use lists of its inputs.
-        for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-          phi->RemoveAsUserOfInput(i);
-        }
+        phi->RemoveAsUserOfAllInputs();
         // Remove the phi from environments that use it.
-        for (HUseIterator<HEnvironment*> use_it(phi->GetEnvUses()); !use_it.Done();
-             use_it.Advance()) {
-          HUseListNode<HEnvironment*>* user_node = use_it.Current();
-          HEnvironment* user = user_node->GetUser();
-          user->SetRawEnvAt(user_node->GetIndex(), nullptr);
+        for (const HUseListNode<HEnvironment*>& use : phi->GetEnvUses()) {
+          HEnvironment* user = use.GetUser();
+          user->SetRawEnvAt(use.GetIndex(), nullptr);
         }
         // Delete it from the instruction list.
         block->RemovePhi(phi, /*ensure_safety=*/ false);
@@ -108,6 +126,12 @@
     }
   }
 
+  ArenaBitVector visited_phis_in_cycle(graph_->GetArena(),
+                                       graph_->GetCurrentInstructionId(),
+                                       /* expandable */ false,
+                                       kArenaAllocSsaPhiElimination);
+  ArenaVector<HPhi*> cycle_worklist(graph_->GetArena()->Adapter(kArenaAllocSsaPhiElimination));
+
   while (!worklist_.empty()) {
     HPhi* phi = worklist_.back();
     worklist_.pop_back();
@@ -117,52 +141,106 @@
       continue;
     }
 
-    if (phi->InputCount() == 0) {
-      DCHECK(phi->IsCatchPhi());
-      DCHECK(phi->IsDead());
+    // If the phi is dead, we know we won't revive it and it will be removed,
+    // so don't process it.
+    if (phi->IsDead()) {
       continue;
     }
 
-    // Find if the inputs of the phi are the same instruction.
-    HInstruction* candidate = phi->InputAt(0);
-    // A loop phi cannot have itself as the first phi. Note that this
-    // check relies on our simplification pass ensuring the pre-header
-    // block is first in the list of predecessors of the loop header.
-    DCHECK(!phi->IsLoopHeaderPhi() || phi->GetBlock()->IsLoopPreHeaderFirstPredecessor());
-    DCHECK_NE(phi, candidate);
+    HInstruction* candidate = nullptr;
+    visited_phis_in_cycle.ClearAllBits();
+    cycle_worklist.clear();
 
-    for (size_t i = 1; i < phi->InputCount(); ++i) {
-      HInstruction* input = phi->InputAt(i);
-      // For a loop phi, if the input is the phi, the phi is still candidate for
-      // elimination.
-      if (input != candidate && input != phi) {
+    cycle_worklist.push_back(phi);
+    visited_phis_in_cycle.SetBit(phi->GetId());
+    bool catch_phi_in_cycle = phi->IsCatchPhi();
+    bool irreducible_loop_phi_in_cycle = phi->IsIrreducibleLoopHeaderPhi();
+
+    // First do a simple loop over inputs and check if they are all the same.
+    for (HInstruction* input : phi->GetInputs()) {
+      if (input == phi) {
+        continue;
+      } else if (candidate == nullptr) {
+        candidate = input;
+      } else if (candidate != input) {
         candidate = nullptr;
         break;
       }
     }
 
-    // If the inputs are not the same, continue.
+    // If we haven't found a candidate, check for a phi cycle. Note that we need to detect
+    // such cycles to avoid having reference and non-reference equivalents. We check this
+    // invariant in the graph checker.
+    if (candidate == nullptr) {
+      // We iterate over the array as long as it grows.
+      for (size_t i = 0; i < cycle_worklist.size(); ++i) {
+        HPhi* current = cycle_worklist[i];
+        DCHECK(!current->IsLoopHeaderPhi() ||
+               current->GetBlock()->IsLoopPreHeaderFirstPredecessor());
+
+        for (HInstruction* input : current->GetInputs()) {
+          if (input == current) {
+            continue;
+          } else if (input->IsPhi()) {
+            if (!visited_phis_in_cycle.IsBitSet(input->GetId())) {
+              cycle_worklist.push_back(input->AsPhi());
+              visited_phis_in_cycle.SetBit(input->GetId());
+              catch_phi_in_cycle |= input->AsPhi()->IsCatchPhi();
+              irreducible_loop_phi_in_cycle |= input->IsIrreducibleLoopHeaderPhi();
+            } else {
+              // Already visited, nothing to do.
+            }
+          } else if (candidate == nullptr) {
+            candidate = input;
+          } else if (candidate != input) {
+            candidate = nullptr;
+            // Clear the cycle worklist to break out of the outer loop.
+            cycle_worklist.clear();
+            break;
+          }
+        }
+      }
+    }
+
     if (candidate == nullptr) {
       continue;
     }
 
-    // The candidate may not dominate a phi in a catch block.
-    if (phi->IsCatchPhi() && !candidate->StrictlyDominates(phi)) {
+    if (irreducible_loop_phi_in_cycle && !candidate->IsConstant()) {
+      // For irreducible loops, we need to keep the phis to satisfy our linear scan
+      // algorithm.
+      // There is one exception for constants, as the type propagation requires redundant
+      // cyclic phis of a constant to be removed. This is ok for the linear scan as it
+      // has to deal with constants anyway, and they can trivially be rematerialized.
       continue;
     }
 
-    // Because we're updating the users of this phi, we may have new candidates
-    // for elimination. Add phis that use this phi to the worklist.
-    for (HUseIterator<HInstruction*> it(phi->GetUses()); !it.Done(); it.Advance()) {
-      HUseListNode<HInstruction*>* current = it.Current();
-      HInstruction* user = current->GetUser();
-      if (user->IsPhi()) {
-        worklist_.push_back(user->AsPhi());
+    for (HPhi* current : cycle_worklist) {
+      // The candidate may not dominate a phi in a catch block: there may be non-throwing
+      // instructions at the beginning of a try range, that may be the first input of
+      // catch phis.
+      // TODO(dbrazdil): Remove this situation by moving those non-throwing instructions
+      // before the try entry.
+      if (catch_phi_in_cycle) {
+        if (!candidate->StrictlyDominates(current)) {
+          continue;
+        }
+      } else {
+        DCHECK(candidate->StrictlyDominates(current));
       }
-    }
 
-    phi->ReplaceWith(candidate);
-    phi->GetBlock()->RemovePhi(phi);
+      // Because we're updating the users of this phi, we may have new candidates
+      // for elimination. Add phis that use this phi to the worklist.
+      for (const HUseListNode<HInstruction*>& use : current->GetUses()) {
+        HInstruction* user = use.GetUser();
+        if (user->IsPhi() && !visited_phis_in_cycle.IsBitSet(user->GetId())) {
+          worklist_.push_back(user->AsPhi());
+        }
+      }
+      DCHECK(candidate->StrictlyDominates(current));
+      current->ReplaceWith(candidate);
+      current->GetBlock()->RemovePhi(current);
+    }
   }
 }
 
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 024278f..4297634 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -28,6 +28,8 @@
 
 namespace art {
 
+class SsaTest : public CommonCompilerTest {};
+
 class SsaPrettyPrinter : public HPrettyPrinter {
  public:
   explicit SsaPrettyPrinter(HGraph* graph) : HPrettyPrinter(graph), str_("") {}
@@ -77,17 +79,10 @@
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateGraph(&allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-
-  graph->BuildDominatorTree();
+  HGraph* graph = CreateCFG(&allocator, data);
   // Suspend checks implementation may change in the future, and this test relies
   // on how instructions are ordered.
   RemoveSuspendChecks(graph);
-  graph->TransformToSsa();
   ReNumberInstructions(graph);
 
   // Test that phis had their type set.
@@ -103,7 +98,7 @@
   ASSERT_STREQ(expected, printer.str().c_str());
 }
 
-TEST(SsaTest, CFG1) {
+TEST_F(SsaTest, CFG1) {
   // Test that we get rid of loads and stores.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -131,7 +126,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, CFG2) {
+TEST_F(SsaTest, CFG2) {
   // Test that we create a phi for the join block of an if control flow instruction
   // when there is only code in the else branch.
   const char* expected =
@@ -162,14 +157,14 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, CFG3) {
+TEST_F(SsaTest, CFG3) {
   // Test that we create a phi for the join block of an if control flow instruction
   // when both branches update a local.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [4, 4]\n"
-    "  1: IntConstant 4 [8]\n"
-    "  2: IntConstant 5 [8]\n"
+    "  1: IntConstant 5 [8]\n"
+    "  2: IntConstant 4 [8]\n"
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 3, 2\n"
     "  4: Equal(0, 0) [5]\n"
@@ -179,7 +174,7 @@
     "BasicBlock 3, pred: 1, succ: 4\n"
     "  7: Goto\n"
     "BasicBlock 4, pred: 2, 3, succ: 5\n"
-    "  8: Phi(1, 2) [9]\n"
+    "  8: Phi(2, 1) [9]\n"
     "  9: Return(8)\n"
     "BasicBlock 5, pred: 4\n"
     "  10: Exit\n";
@@ -195,7 +190,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop1) {
+TEST_F(SsaTest, Loop1) {
   // Test that we create a phi for an initialized local at entry of a loop.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -228,7 +223,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop2) {
+TEST_F(SsaTest, Loop2) {
   // Simple loop with one preheader and one back edge.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -258,24 +253,24 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop3) {
+TEST_F(SsaTest, Loop3) {
   // Test that a local not yet defined at the entry of a loop is handled properly.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [5]\n"
-    "  1: IntConstant 4 [5]\n"
-    "  2: IntConstant 5 [9]\n"
+    "  1: IntConstant 5 [9]\n"
+    "  2: IntConstant 4 [5]\n"
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 2\n"
     "  4: Goto\n"
     "BasicBlock 2, pred: 1, 3, succ: 4, 3\n"
-    "  5: Phi(0, 1) [6, 6]\n"
+    "  5: Phi(0, 2) [6, 6]\n"
     "  6: Equal(5, 5) [7]\n"
     "  7: If(6)\n"
     "BasicBlock 3, pred: 2, succ: 2\n"
     "  8: Goto\n"
     "BasicBlock 4, pred: 2, succ: 5\n"
-    "  9: Return(2)\n"
+    "  9: Return(1)\n"
     "BasicBlock 5, pred: 4\n"
     "  10: Exit\n";
 
@@ -290,7 +285,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop4) {
+TEST_F(SsaTest, Loop4) {
   // Make sure we support a preheader of a loop not being the first predecessor
   // in the predecessor list of the header.
   const char* expected =
@@ -325,14 +320,14 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop5) {
+TEST_F(SsaTest, Loop5) {
   // Make sure we create a preheader of a loop when a header originally has two
   // incoming blocks and one back edge.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [4, 4]\n"
-    "  1: IntConstant 4 [13]\n"
-    "  2: IntConstant 5 [13]\n"
+    "  1: IntConstant 5 [13]\n"
+    "  2: IntConstant 4 [13]\n"
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 3, 2\n"
     "  4: Equal(0, 0) [5]\n"
@@ -351,7 +346,7 @@
     "BasicBlock 7, pred: 6\n"
     "  12: Exit\n"
     "BasicBlock 8, pred: 2, 3, succ: 4\n"
-    "  13: Phi(1, 2) [8, 8, 11]\n"
+    "  13: Phi(2, 1) [11, 8, 8]\n"
     "  14: Goto\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
@@ -367,7 +362,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop6) {
+TEST_F(SsaTest, Loop6) {
   // Test a loop with one preheader and two back edges (e.g. continue).
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -406,7 +401,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop7) {
+TEST_F(SsaTest, Loop7) {
   // Test a loop with one preheader, one back edge, and two exit edges (e.g. break).
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -448,7 +443,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, DeadLocal) {
+TEST_F(SsaTest, DeadLocal) {
   // Test that we correctly handle a local not being used.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -466,7 +461,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, LocalInIf) {
+TEST_F(SsaTest, LocalInIf) {
   // Test that we do not create a phi in the join block when one predecessor
   // does not update the local.
   const char* expected =
@@ -496,12 +491,12 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, MultiplePredecessors) {
+TEST_F(SsaTest, MultiplePredecessors) {
   // Test that we do not create a phi when one predecessor
   // does not update the local.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
-    "  0: IntConstant 0 [4, 8, 6, 6, 2, 2, 8, 4]\n"
+    "  0: IntConstant 0 [4, 4, 8, 8, 6, 6, 2, 2]\n"
     "  1: Goto\n"
     "BasicBlock 1, pred: 0, succ: 3, 2\n"
     "  2: Equal(0, 0) [3]\n"
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index c60a4ea..fc8af64 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -37,7 +37,7 @@
   current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound;
   if (num_dex_registers != 0) {
     current_entry_.live_dex_registers_mask =
-        new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true);
+        ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream);
   } else {
     current_entry_.live_dex_registers_mask = nullptr;
   }
@@ -63,8 +63,7 @@
 void StackMapStream::AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) {
   if (kind != DexRegisterLocation::Kind::kNone) {
     // Ensure we only use non-compressed location kind at this stage.
-    DCHECK(DexRegisterLocation::IsShortLocationKind(kind))
-        << DexRegisterLocation::PrettyDescriptor(kind);
+    DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) << kind;
     DexRegisterLocation location(kind, value);
 
     // Look for Dex register `location` in the location catalog (using the
@@ -112,7 +111,7 @@
   current_inline_info_.dex_register_locations_start_index = dex_register_locations_.size();
   if (num_dex_registers != 0) {
     current_inline_info_.live_dex_registers_mask =
-        new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true);
+        ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream);
   } else {
     current_inline_info_.live_dex_registers_mask = nullptr;
   }
@@ -138,34 +137,43 @@
 
 size_t StackMapStream::PrepareForFillIn() {
   int stack_mask_number_of_bits = stack_mask_max_ + 1;  // Need room for max element too.
-  stack_mask_size_ = RoundUp(stack_mask_number_of_bits, kBitsPerByte) / kBitsPerByte;
-  inline_info_size_ = ComputeInlineInfoSize();
   dex_register_maps_size_ = ComputeDexRegisterMapsSize();
+  ComputeInlineInfoEncoding();  // needs dex_register_maps_size_.
+  inline_info_size_ = inline_infos_.size() * inline_info_encoding_.GetEntrySize();
   uint32_t max_native_pc_offset = ComputeMaxNativePcOffset();
-  stack_map_encoding_ = StackMapEncoding::CreateFromSizes(stack_mask_size_,
-                                                          inline_info_size_,
-                                                          dex_register_maps_size_,
-                                                          dex_pc_max_,
-                                                          max_native_pc_offset,
-                                                          register_mask_max_);
-  stack_maps_size_ = stack_maps_.size() * stack_map_encoding_.ComputeStackMapSize();
+  size_t stack_map_size = stack_map_encoding_.SetFromSizes(max_native_pc_offset,
+                                                           dex_pc_max_,
+                                                           dex_register_maps_size_,
+                                                           inline_info_size_,
+                                                           register_mask_max_,
+                                                           stack_mask_number_of_bits);
+  stack_maps_size_ = stack_maps_.size() * stack_map_size;
   dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize();
 
-  // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned.
-  needed_size_ = CodeInfo::kFixedSize
-      + stack_maps_size_
-      + dex_register_location_catalog_size_
-      + dex_register_maps_size_
-      + inline_info_size_;
+  size_t non_header_size =
+      stack_maps_size_ +
+      dex_register_location_catalog_size_ +
+      dex_register_maps_size_ +
+      inline_info_size_;
 
-  stack_maps_start_ = CodeInfo::kFixedSize;
+  // Prepare the CodeInfo variable-sized encoding.
+  CodeInfoEncoding code_info_encoding;
+  code_info_encoding.non_header_size = non_header_size;
+  code_info_encoding.number_of_stack_maps = stack_maps_.size();
+  code_info_encoding.stack_map_size_in_bytes = stack_map_size;
+  code_info_encoding.stack_map_encoding = stack_map_encoding_;
+  code_info_encoding.inline_info_encoding = inline_info_encoding_;
+  code_info_encoding.number_of_location_catalog_entries = location_catalog_entries_.size();
+  code_info_encoding.Compress(&code_info_encoding_);
+
   // TODO: Move the catalog at the end. It is currently too expensive at runtime
   // to compute its size (note that we do not encode that size in the CodeInfo).
-  dex_register_location_catalog_start_ = stack_maps_start_ + stack_maps_size_;
+  dex_register_location_catalog_start_ = code_info_encoding_.size() + stack_maps_size_;
   dex_register_maps_start_ =
       dex_register_location_catalog_start_ + dex_register_location_catalog_size_;
   inline_infos_start_ = dex_register_maps_start_ + dex_register_maps_size_;
 
+  needed_size_ = code_info_encoding_.size() + non_header_size;
   return needed_size_;
 }
 
@@ -218,19 +226,42 @@
   return size;
 }
 
-size_t StackMapStream::ComputeInlineInfoSize() const {
-  return inline_infos_.size() * InlineInfo::SingleEntrySize()
-    // For encoding the depth.
-    + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize);
+void StackMapStream::ComputeInlineInfoEncoding() {
+  uint32_t method_index_max = 0;
+  uint32_t dex_pc_max = DexFile::kDexNoIndex;
+  uint32_t invoke_type_max = 0;
+
+  uint32_t inline_info_index = 0;
+  for (const StackMapEntry& entry : stack_maps_) {
+    for (size_t j = 0; j < entry.inlining_depth; ++j) {
+      InlineInfoEntry inline_entry = inline_infos_[inline_info_index++];
+      method_index_max = std::max(method_index_max, inline_entry.method_index);
+      if (inline_entry.dex_pc != DexFile::kDexNoIndex &&
+          (dex_pc_max == DexFile::kDexNoIndex || dex_pc_max < inline_entry.dex_pc)) {
+        dex_pc_max = inline_entry.dex_pc;
+      }
+      invoke_type_max = std::max(invoke_type_max, static_cast<uint32_t>(inline_entry.invoke_type));
+    }
+  }
+  DCHECK_EQ(inline_info_index, inline_infos_.size());
+
+  inline_info_encoding_.SetFromSizes(method_index_max,
+                                     dex_pc_max,
+                                     invoke_type_max,
+                                     dex_register_maps_size_);
 }
 
 void StackMapStream::FillIn(MemoryRegion region) {
   DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
   DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before FillIn";
 
-  CodeInfo code_info(region);
   DCHECK_EQ(region.size(), needed_size_);
-  code_info.SetOverallSize(region.size());
+
+  // Note that the memory region does not have to be zeroed when we JIT code
+  // because we do not use the arena allocator there.
+
+  // Write the CodeInfo header.
+  region.CopyFrom(0, MemoryRegion(code_info_encoding_.data(), code_info_encoding_.size()));
 
   MemoryRegion dex_register_locations_region = region.Subregion(
       dex_register_maps_start_, dex_register_maps_size_);
@@ -238,12 +269,11 @@
   MemoryRegion inline_infos_region = region.Subregion(
       inline_infos_start_, inline_info_size_);
 
-  code_info.SetEncoding(stack_map_encoding_);
-  code_info.SetNumberOfStackMaps(stack_maps_.size());
-  DCHECK_EQ(code_info.GetStackMapsSize(code_info.ExtractEncoding()), stack_maps_size_);
+  CodeInfo code_info(region);
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  DCHECK_EQ(code_info.GetStackMapsSize(encoding), stack_maps_size_);
 
   // Set the Dex register location catalog.
-  code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.size());
   MemoryRegion dex_register_location_catalog_region = region.Subregion(
       dex_register_location_catalog_start_, dex_register_location_catalog_size_);
   DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region);
@@ -257,20 +287,29 @@
   // Ensure we reached the end of the Dex registers location_catalog.
   DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size());
 
+  ArenaBitVector empty_bitmask(allocator_, 0, /* expandable */ false, kArenaAllocStackMapStream);
   uintptr_t next_dex_register_map_offset = 0;
   uintptr_t next_inline_info_offset = 0;
   for (size_t i = 0, e = stack_maps_.size(); i < e; ++i) {
-    StackMap stack_map = code_info.GetStackMapAt(i, stack_map_encoding_);
+    StackMap stack_map = code_info.GetStackMapAt(i, encoding);
     StackMapEntry entry = stack_maps_[i];
 
     stack_map.SetDexPc(stack_map_encoding_, entry.dex_pc);
     stack_map.SetNativePcOffset(stack_map_encoding_, entry.native_pc_offset);
     stack_map.SetRegisterMask(stack_map_encoding_, entry.register_mask);
+    size_t number_of_stack_mask_bits = stack_map.GetNumberOfStackMaskBits(stack_map_encoding_);
     if (entry.sp_mask != nullptr) {
-      stack_map.SetStackMask(stack_map_encoding_, *entry.sp_mask);
+      for (size_t bit = 0; bit < number_of_stack_mask_bits; bit++) {
+        stack_map.SetStackMaskBit(stack_map_encoding_, bit, entry.sp_mask->IsBitSet(bit));
+      }
+    } else {
+      // The MemoryRegion does not have to be zeroed, so make sure we clear the bits.
+      for (size_t bit = 0; bit < number_of_stack_mask_bits; bit++) {
+        stack_map.SetStackMaskBit(stack_map_encoding_, bit, false);
+      }
     }
 
-    if (entry.num_dex_registers == 0) {
+    if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
       // No dex map available.
       stack_map.SetDexRegisterMapOffset(stack_map_encoding_, StackMap::kNoDexRegisterMap);
     } else {
@@ -279,7 +318,7 @@
         // If we have a hit reuse the offset.
         stack_map.SetDexRegisterMapOffset(
             stack_map_encoding_,
-            code_info.GetStackMapAt(entry.same_dex_register_map_as_, stack_map_encoding_)
+            code_info.GetStackMapAt(entry.same_dex_register_map_as_, encoding)
                 .GetDexRegisterMapOffset(stack_map_encoding_));
       } else {
         // New dex registers maps should be added to the stack map.
@@ -303,7 +342,7 @@
     if (entry.inlining_depth != 0) {
       MemoryRegion inline_region = inline_infos_region.Subregion(
           next_inline_info_offset,
-          InlineInfo::kFixedSize + entry.inlining_depth * InlineInfo::SingleEntrySize());
+          entry.inlining_depth * inline_info_encoding_.GetEntrySize());
       next_inline_info_offset += inline_region.size();
       InlineInfo inline_info(inline_region);
 
@@ -311,16 +350,18 @@
       stack_map.SetInlineDescriptorOffset(
           stack_map_encoding_, inline_region.start() - dex_register_locations_region.start());
 
-      inline_info.SetDepth(entry.inlining_depth);
+      inline_info.SetDepth(inline_info_encoding_, entry.inlining_depth);
       DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size());
       for (size_t depth = 0; depth < entry.inlining_depth; ++depth) {
         InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index];
-        inline_info.SetMethodIndexAtDepth(depth, inline_entry.method_index);
-        inline_info.SetDexPcAtDepth(depth, inline_entry.dex_pc);
-        inline_info.SetInvokeTypeAtDepth(depth, inline_entry.invoke_type);
+        inline_info.SetMethodIndexAtDepth(inline_info_encoding_, depth, inline_entry.method_index);
+        inline_info.SetDexPcAtDepth(inline_info_encoding_, depth, inline_entry.dex_pc);
+        inline_info.SetInvokeTypeAtDepth(inline_info_encoding_, depth, inline_entry.invoke_type);
         if (inline_entry.num_dex_registers == 0) {
           // No dex map available.
-          inline_info.SetDexRegisterMapOffsetAtDepth(depth, StackMap::kNoDexRegisterMap);
+          inline_info.SetDexRegisterMapOffsetAtDepth(inline_info_encoding_,
+                                                     depth,
+                                                     StackMap::kNoDexRegisterMap);
           DCHECK(inline_entry.live_dex_registers_mask == nullptr);
         } else {
           MemoryRegion register_region = dex_register_locations_region.Subregion(
@@ -330,7 +371,8 @@
           next_dex_register_map_offset += register_region.size();
           DexRegisterMap dex_register_map(register_region);
           inline_info.SetDexRegisterMapOffsetAtDepth(
-            depth, register_region.start() - dex_register_locations_region.start());
+              inline_info_encoding_,
+              depth, register_region.start() - dex_register_locations_region.start());
 
           FillInDexRegisterMap(dex_register_map,
                                inline_entry.num_dex_registers,
@@ -344,6 +386,11 @@
       }
     }
   }
+
+  // Verify all written data in debug build.
+  if (kIsDebugBuild) {
+    CheckCodeInfo(region);
+  }
 }
 
 void StackMapStream::FillInDexRegisterMap(DexRegisterMap dex_register_map,
@@ -423,4 +470,94 @@
   return true;
 }
 
+// Helper for CheckCodeInfo - check that register map has the expected content.
+void StackMapStream::CheckDexRegisterMap(const CodeInfo& code_info,
+                                         const DexRegisterMap& dex_register_map,
+                                         size_t num_dex_registers,
+                                         BitVector* live_dex_registers_mask,
+                                         size_t dex_register_locations_index) const {
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  for (size_t reg = 0; reg < num_dex_registers; reg++) {
+    // Find the location we tried to encode.
+    DexRegisterLocation expected = DexRegisterLocation::None();
+    if (live_dex_registers_mask->IsBitSet(reg)) {
+      size_t catalog_index = dex_register_locations_[dex_register_locations_index++];
+      expected = location_catalog_entries_[catalog_index];
+    }
+    // Compare to the seen location.
+    if (expected.GetKind() == DexRegisterLocation::Kind::kNone) {
+      DCHECK(!dex_register_map.IsValid() || !dex_register_map.IsDexRegisterLive(reg));
+    } else {
+      DCHECK(dex_register_map.IsDexRegisterLive(reg));
+      DexRegisterLocation seen = dex_register_map.GetDexRegisterLocation(
+          reg, num_dex_registers, code_info, encoding);
+      DCHECK_EQ(expected.GetKind(), seen.GetKind());
+      DCHECK_EQ(expected.GetValue(), seen.GetValue());
+    }
+  }
+  if (num_dex_registers == 0) {
+    DCHECK(!dex_register_map.IsValid());
+  }
+}
+
+// Check that all StackMapStream inputs are correctly encoded by trying to read them back.
+void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
+  CodeInfo code_info(region);
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  DCHECK_EQ(code_info.GetNumberOfStackMaps(encoding), stack_maps_.size());
+  for (size_t s = 0; s < stack_maps_.size(); ++s) {
+    const StackMap stack_map = code_info.GetStackMapAt(s, encoding);
+    const StackMapEncoding& stack_map_encoding = encoding.stack_map_encoding;
+    StackMapEntry entry = stack_maps_[s];
+
+    // Check main stack map fields.
+    DCHECK_EQ(stack_map.GetNativePcOffset(stack_map_encoding), entry.native_pc_offset);
+    DCHECK_EQ(stack_map.GetDexPc(stack_map_encoding), entry.dex_pc);
+    DCHECK_EQ(stack_map.GetRegisterMask(stack_map_encoding), entry.register_mask);
+    size_t num_stack_mask_bits = stack_map.GetNumberOfStackMaskBits(stack_map_encoding);
+    if (entry.sp_mask != nullptr) {
+      DCHECK_GE(num_stack_mask_bits, entry.sp_mask->GetNumberOfBits());
+      for (size_t b = 0; b < num_stack_mask_bits; b++) {
+        DCHECK_EQ(stack_map.GetStackMaskBit(stack_map_encoding, b), entry.sp_mask->IsBitSet(b));
+      }
+    } else {
+      for (size_t b = 0; b < num_stack_mask_bits; b++) {
+        DCHECK_EQ(stack_map.GetStackMaskBit(stack_map_encoding, b), 0u);
+      }
+    }
+
+    CheckDexRegisterMap(code_info,
+                        code_info.GetDexRegisterMapOf(
+                            stack_map, encoding, entry.num_dex_registers),
+                        entry.num_dex_registers,
+                        entry.live_dex_registers_mask,
+                        entry.dex_register_locations_start_index);
+
+    // Check inline info.
+    DCHECK_EQ(stack_map.HasInlineInfo(stack_map_encoding), (entry.inlining_depth != 0));
+    if (entry.inlining_depth != 0) {
+      InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
+      DCHECK_EQ(inline_info.GetDepth(encoding.inline_info_encoding), entry.inlining_depth);
+      for (size_t d = 0; d < entry.inlining_depth; ++d) {
+        size_t inline_info_index = entry.inline_infos_start_index + d;
+        DCHECK_LT(inline_info_index, inline_infos_.size());
+        InlineInfoEntry inline_entry = inline_infos_[inline_info_index];
+        DCHECK_EQ(inline_info.GetDexPcAtDepth(encoding.inline_info_encoding, d),
+                  inline_entry.dex_pc);
+        DCHECK_EQ(inline_info.GetMethodIndexAtDepth(encoding.inline_info_encoding, d),
+                  inline_entry.method_index);
+        DCHECK_EQ(inline_info.GetInvokeTypeAtDepth(encoding.inline_info_encoding, d),
+                  inline_entry.invoke_type);
+
+        CheckDexRegisterMap(code_info,
+                            code_info.GetDexRegisterMapAtDepth(
+                                d, inline_info, encoding, inline_entry.num_dex_registers),
+                            inline_entry.num_dex_registers,
+                            inline_entry.live_dex_registers_mask,
+                            inline_entry.dex_register_locations_start_index);
+      }
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index fc27a2b..53a9795 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -74,13 +74,12 @@
                                            allocator->Adapter(kArenaAllocStackMapStream)),
         current_entry_(),
         current_inline_info_(),
-        stack_mask_size_(0),
+        code_info_encoding_(allocator->Adapter(kArenaAllocStackMapStream)),
         inline_info_size_(0),
         dex_register_maps_size_(0),
         stack_maps_size_(0),
         dex_register_location_catalog_size_(0),
         dex_register_location_catalog_start_(0),
-        stack_maps_start_(0),
         dex_register_maps_start_(0),
         inline_infos_start_(0),
         needed_size_(0),
@@ -90,6 +89,7 @@
     location_catalog_entries_.reserve(4);
     dex_register_locations_.reserve(10 * 4);
     inline_infos_.reserve(2);
+    code_info_encoding_.reserve(16);
   }
 
   // See runtime/stack_map.h to know what these fields contain.
@@ -108,7 +108,7 @@
   };
 
   struct InlineInfoEntry {
-    uint32_t dex_pc;
+    uint32_t dex_pc;  // DexFile::kDexNoIndex for intrinsified native methods.
     uint32_t method_index;
     InvokeType invoke_type;
     uint32_t num_dex_registers;
@@ -156,7 +156,7 @@
   size_t ComputeDexRegisterMapSize(uint32_t num_dex_registers,
                                    const BitVector* live_dex_registers_mask) const;
   size_t ComputeDexRegisterMapsSize() const;
-  size_t ComputeInlineInfoSize() const;
+  void ComputeInlineInfoEncoding();
 
   // Returns the index of an entry with the same dex register map as the current_entry,
   // or kNoSameDexMapFound if no such entry exists.
@@ -167,6 +167,13 @@
                             const BitVector& live_dex_registers_mask,
                             uint32_t start_index_in_dex_register_locations) const;
 
+  void CheckDexRegisterMap(const CodeInfo& code_info,
+                           const DexRegisterMap& dex_register_map,
+                           size_t num_dex_registers,
+                           BitVector* live_dex_registers_mask,
+                           size_t dex_register_locations_index) const;
+  void CheckCodeInfo(MemoryRegion region) const;
+
   ArenaAllocator* allocator_;
   ArenaVector<StackMapEntry> stack_maps_;
 
@@ -193,13 +200,13 @@
   StackMapEntry current_entry_;
   InlineInfoEntry current_inline_info_;
   StackMapEncoding stack_map_encoding_;
-  size_t stack_mask_size_;
+  InlineInfoEncoding inline_info_encoding_;
+  ArenaVector<uint8_t> code_info_encoding_;
   size_t inline_info_size_;
   size_t dex_register_maps_size_;
   size_t stack_maps_size_;
   size_t dex_register_location_catalog_size_;
   size_t dex_register_location_catalog_start_;
-  size_t stack_maps_start_;
   size_t dex_register_maps_start_;
   size_t inline_infos_start_;
   size_t needed_size_;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 560502f..967fd96 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -23,9 +23,18 @@
 
 namespace art {
 
-static bool SameBits(MemoryRegion region, const BitVector& bit_vector) {
-  for (size_t i = 0; i < region.size_in_bits(); ++i) {
-    if (region.LoadBit(i) != bit_vector.IsBitSet(i)) {
+// Check that the stack mask of given stack map is identical
+// to the given bit vector. Returns true if they are same.
+static bool CheckStackMask(
+    const StackMap& stack_map,
+    StackMapEncoding& encoding,
+    const BitVector& bit_vector) {
+  int number_of_bits = stack_map.GetNumberOfStackMaskBits(encoding);
+  if (bit_vector.GetHighestBitSet() >= number_of_bits) {
+    return false;
+  }
+  for (int i = 0; i < number_of_bits; ++i) {
+    if (stack_map.GetStackMaskBit(encoding, i) != bit_vector.IsBitSet(i)) {
       return false;
     }
   }
@@ -52,12 +61,11 @@
   stream.FillIn(region);
 
   CodeInfo code_info(region);
-  StackMapEncoding encoding = code_info.ExtractEncoding();
-  ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask());
-  ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  ASSERT_EQ(1u, code_info.GetNumberOfStackMaps(encoding));
 
-  uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
-  ASSERT_EQ(2u, number_of_location_catalog_entries);
+  uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
+  ASSERT_EQ(2u, number_of_catalog_entries);
   DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
   // The Dex register location catalog contains:
   // - one 1-byte short Dex register location, and
@@ -68,14 +76,13 @@
   StackMap stack_map = code_info.GetStackMapAt(0, encoding);
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding)));
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
-  ASSERT_EQ(0u, stack_map.GetDexPc(encoding));
-  ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding));
-  ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding));
+  ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map_encoding));
+  ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding));
+  ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-  MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
-  ASSERT_TRUE(SameBits(stack_mask, sp_mask));
+  ASSERT_TRUE(CheckStackMask(stack_map, encoding.stack_map_encoding, sp_mask));
 
-  ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding));
+  ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
   DexRegisterMap dex_register_map =
       code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
   ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
@@ -100,9 +107,9 @@
   ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info, encoding));
 
   size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
-      0, number_of_dex_registers, number_of_location_catalog_entries);
+      0, number_of_dex_registers, number_of_catalog_entries);
   size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
-      1, number_of_dex_registers, number_of_location_catalog_entries);
+      1, number_of_dex_registers, number_of_catalog_entries);
   ASSERT_EQ(0u, index0);
   ASSERT_EQ(1u, index1);
   DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -114,7 +121,7 @@
   ASSERT_EQ(0, location0.GetValue());
   ASSERT_EQ(-2, location1.GetValue());
 
-  ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+  ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
 }
 
 TEST(StackMapTest, Test2) {
@@ -166,12 +173,11 @@
   stream.FillIn(region);
 
   CodeInfo code_info(region);
-  StackMapEncoding encoding = code_info.ExtractEncoding();
-  ASSERT_EQ(2u, encoding.NumberOfBytesForStackMask());
-  ASSERT_EQ(4u, code_info.GetNumberOfStackMaps());
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  ASSERT_EQ(4u, code_info.GetNumberOfStackMaps(encoding));
 
-  uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
-  ASSERT_EQ(7u, number_of_location_catalog_entries);
+  uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
+  ASSERT_EQ(7u, number_of_catalog_entries);
   DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
   // The Dex register location catalog contains:
   // - six 1-byte short Dex register locations, and
@@ -184,14 +190,13 @@
     StackMap stack_map = code_info.GetStackMapAt(0, encoding);
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding)));
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
-    ASSERT_EQ(0u, stack_map.GetDexPc(encoding));
-    ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding));
-    ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding));
+    ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map_encoding));
+    ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding));
+    ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-    MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
-    ASSERT_TRUE(SameBits(stack_mask, sp_mask1));
+    ASSERT_TRUE(CheckStackMask(stack_map, encoding.stack_map_encoding, sp_mask1));
 
-    ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding));
+    ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
     DexRegisterMap dex_register_map =
         code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
     ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
@@ -216,9 +221,9 @@
     ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info, encoding));
 
     size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
-        0, number_of_dex_registers, number_of_location_catalog_entries);
+        0, number_of_dex_registers, number_of_catalog_entries);
     size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
-        1, number_of_dex_registers, number_of_location_catalog_entries);
+        1, number_of_dex_registers, number_of_catalog_entries);
     ASSERT_EQ(0u, index0);
     ASSERT_EQ(1u, index1);
     DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -230,15 +235,15 @@
     ASSERT_EQ(0, location0.GetValue());
     ASSERT_EQ(-2, location1.GetValue());
 
-    ASSERT_TRUE(stack_map.HasInlineInfo(encoding));
+    ASSERT_TRUE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
     InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-    ASSERT_EQ(2u, inline_info.GetDepth());
-    ASSERT_EQ(82u, inline_info.GetMethodIndexAtDepth(0));
-    ASSERT_EQ(42u, inline_info.GetMethodIndexAtDepth(1));
-    ASSERT_EQ(3u, inline_info.GetDexPcAtDepth(0));
-    ASSERT_EQ(2u, inline_info.GetDexPcAtDepth(1));
-    ASSERT_EQ(kDirect, inline_info.GetInvokeTypeAtDepth(0));
-    ASSERT_EQ(kStatic, inline_info.GetInvokeTypeAtDepth(1));
+    ASSERT_EQ(2u, inline_info.GetDepth(encoding.inline_info_encoding));
+    ASSERT_EQ(82u, inline_info.GetMethodIndexAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(42u, inline_info.GetMethodIndexAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(3u, inline_info.GetDexPcAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(2u, inline_info.GetDexPcAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(kDirect, inline_info.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(kStatic, inline_info.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 1));
   }
 
   // Second stack map.
@@ -246,14 +251,13 @@
     StackMap stack_map = code_info.GetStackMapAt(1, encoding);
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u, encoding)));
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u, encoding)));
-    ASSERT_EQ(1u, stack_map.GetDexPc(encoding));
-    ASSERT_EQ(128u, stack_map.GetNativePcOffset(encoding));
-    ASSERT_EQ(0xFFu, stack_map.GetRegisterMask(encoding));
+    ASSERT_EQ(1u, stack_map.GetDexPc(encoding.stack_map_encoding));
+    ASSERT_EQ(128u, stack_map.GetNativePcOffset(encoding.stack_map_encoding));
+    ASSERT_EQ(0xFFu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-    MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
-    ASSERT_TRUE(SameBits(stack_mask, sp_mask2));
+    ASSERT_TRUE(CheckStackMask(stack_map, encoding.stack_map_encoding, sp_mask2));
 
-    ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding));
+    ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
     DexRegisterMap dex_register_map =
         code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
     ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
@@ -279,9 +283,9 @@
                   1, number_of_dex_registers, code_info, encoding));
 
     size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
-        0, number_of_dex_registers, number_of_location_catalog_entries);
+        0, number_of_dex_registers, number_of_catalog_entries);
     size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
-        1, number_of_dex_registers, number_of_location_catalog_entries);
+        1, number_of_dex_registers, number_of_catalog_entries);
     ASSERT_EQ(2u, index0);
     ASSERT_EQ(3u, index1);
     DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -293,7 +297,7 @@
     ASSERT_EQ(18, location0.GetValue());
     ASSERT_EQ(3, location1.GetValue());
 
-    ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+    ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
   }
 
   // Third stack map.
@@ -301,14 +305,13 @@
     StackMap stack_map = code_info.GetStackMapAt(2, encoding);
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(2u, encoding)));
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(192u, encoding)));
-    ASSERT_EQ(2u, stack_map.GetDexPc(encoding));
-    ASSERT_EQ(192u, stack_map.GetNativePcOffset(encoding));
-    ASSERT_EQ(0xABu, stack_map.GetRegisterMask(encoding));
+    ASSERT_EQ(2u, stack_map.GetDexPc(encoding.stack_map_encoding));
+    ASSERT_EQ(192u, stack_map.GetNativePcOffset(encoding.stack_map_encoding));
+    ASSERT_EQ(0xABu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-    MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
-    ASSERT_TRUE(SameBits(stack_mask, sp_mask3));
+    ASSERT_TRUE(CheckStackMask(stack_map, encoding.stack_map_encoding, sp_mask3));
 
-    ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding));
+    ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
     DexRegisterMap dex_register_map =
         code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
     ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
@@ -334,9 +337,9 @@
                   1, number_of_dex_registers, code_info, encoding));
 
     size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
-        0, number_of_dex_registers, number_of_location_catalog_entries);
+        0, number_of_dex_registers, number_of_catalog_entries);
     size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
-        1, number_of_dex_registers, number_of_location_catalog_entries);
+        1, number_of_dex_registers, number_of_catalog_entries);
     ASSERT_EQ(4u, index0);
     ASSERT_EQ(5u, index1);
     DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -348,7 +351,7 @@
     ASSERT_EQ(6, location0.GetValue());
     ASSERT_EQ(8, location1.GetValue());
 
-    ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+    ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
   }
 
   // Fourth stack map.
@@ -356,14 +359,13 @@
     StackMap stack_map = code_info.GetStackMapAt(3, encoding);
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(3u, encoding)));
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(256u, encoding)));
-    ASSERT_EQ(3u, stack_map.GetDexPc(encoding));
-    ASSERT_EQ(256u, stack_map.GetNativePcOffset(encoding));
-    ASSERT_EQ(0xCDu, stack_map.GetRegisterMask(encoding));
+    ASSERT_EQ(3u, stack_map.GetDexPc(encoding.stack_map_encoding));
+    ASSERT_EQ(256u, stack_map.GetNativePcOffset(encoding.stack_map_encoding));
+    ASSERT_EQ(0xCDu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-    MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
-    ASSERT_TRUE(SameBits(stack_mask, sp_mask4));
+    ASSERT_TRUE(CheckStackMask(stack_map, encoding.stack_map_encoding, sp_mask4));
 
-    ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding));
+    ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
     DexRegisterMap dex_register_map =
         code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
     ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
@@ -389,9 +391,9 @@
                   1, number_of_dex_registers, code_info, encoding));
 
     size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
-        0, number_of_dex_registers, number_of_location_catalog_entries);
+        0, number_of_dex_registers, number_of_catalog_entries);
     size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
-        1, number_of_dex_registers, number_of_location_catalog_entries);
+        1, number_of_dex_registers, number_of_catalog_entries);
     ASSERT_EQ(3u, index0);  // Shared with second stack map.
     ASSERT_EQ(6u, index1);
     DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -403,7 +405,7 @@
     ASSERT_EQ(3, location0.GetValue());
     ASSERT_EQ(1, location1.GetValue());
 
-    ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+    ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
   }
 }
 
@@ -425,12 +427,11 @@
   stream.FillIn(region);
 
   CodeInfo code_info(region);
-  StackMapEncoding encoding = code_info.ExtractEncoding();
-  ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask());
-  ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  ASSERT_EQ(1u, code_info.GetNumberOfStackMaps(encoding));
 
-  uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
-  ASSERT_EQ(1u, number_of_location_catalog_entries);
+  uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
+  ASSERT_EQ(1u, number_of_catalog_entries);
   DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
   // The Dex register location catalog contains:
   // - one 5-byte large Dex register location.
@@ -440,11 +441,11 @@
   StackMap stack_map = code_info.GetStackMapAt(0, encoding);
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding)));
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
-  ASSERT_EQ(0u, stack_map.GetDexPc(encoding));
-  ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding));
-  ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding));
+  ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map_encoding));
+  ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding));
+  ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-  ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding));
+  ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
   DexRegisterMap dex_register_map =
       code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
   ASSERT_FALSE(dex_register_map.IsDexRegisterLive(0));
@@ -467,9 +468,9 @@
   ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info, encoding));
 
   size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
-      0, number_of_dex_registers, number_of_location_catalog_entries);
+      0, number_of_dex_registers, number_of_catalog_entries);
   size_t index1 =  dex_register_map.GetLocationCatalogEntryIndex(
-      1, number_of_dex_registers, number_of_location_catalog_entries);
+      1, number_of_dex_registers, number_of_catalog_entries);
   ASSERT_EQ(DexRegisterLocationCatalog::kNoLocationEntryIndex, index0);
   ASSERT_EQ(0u, index1);
   DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -481,7 +482,7 @@
   ASSERT_EQ(0, location0.GetValue());
   ASSERT_EQ(-2, location1.GetValue());
 
-  ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+  ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
 }
 
 // Generate a stack map whose dex register offset is
@@ -518,13 +519,13 @@
   stream.FillIn(region);
 
   CodeInfo code_info(region);
-  StackMapEncoding encoding = code_info.ExtractEncoding();
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
   // The location catalog contains two entries (DexRegisterLocation(kConstant, 0)
   // and DexRegisterLocation(kConstant, 1)), therefore the location catalog index
   // has a size of 1 bit.
-  uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
-  ASSERT_EQ(2u, number_of_location_catalog_entries);
-  ASSERT_EQ(1u, DexRegisterMap::SingleEntrySizeInBits(number_of_location_catalog_entries));
+  uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
+  ASSERT_EQ(2u, number_of_catalog_entries);
+  ASSERT_EQ(1u, DexRegisterMap::SingleEntrySizeInBits(number_of_catalog_entries));
 
   // The first Dex register map contains:
   // - a live register bit mask for 1024 registers (that is, 128 bytes of
@@ -537,16 +538,17 @@
   DexRegisterMap dex_register_map0 =
       code_info.GetDexRegisterMapOf(stack_map0, encoding, number_of_dex_registers);
   ASSERT_EQ(127u, dex_register_map0.GetLocationMappingDataSize(number_of_dex_registers,
-                                                               number_of_location_catalog_entries));
+                                                               number_of_catalog_entries));
   ASSERT_EQ(255u, dex_register_map0.Size());
 
   StackMap stack_map1 = code_info.GetStackMapAt(1, encoding);
-  ASSERT_TRUE(stack_map1.HasDexRegisterMap(encoding));
+  ASSERT_TRUE(stack_map1.HasDexRegisterMap(encoding.stack_map_encoding));
   // ...the offset of the second Dex register map (relative to the
   // beginning of the Dex register maps region) is 255 (i.e.,
   // kNoDexRegisterMapSmallEncoding).
-  ASSERT_NE(stack_map1.GetDexRegisterMapOffset(encoding), StackMap::kNoDexRegisterMap);
-  ASSERT_EQ(stack_map1.GetDexRegisterMapOffset(encoding), 0xFFu);
+  ASSERT_NE(stack_map1.GetDexRegisterMapOffset(encoding.stack_map_encoding),
+            StackMap::kNoDexRegisterMap);
+  ASSERT_EQ(stack_map1.GetDexRegisterMapOffset(encoding.stack_map_encoding), 0xFFu);
 }
 
 TEST(StackMapTest, TestShareDexRegisterMap) {
@@ -578,7 +580,7 @@
   stream.FillIn(region);
 
   CodeInfo ci(region);
-  StackMapEncoding encoding = ci.ExtractEncoding();
+  CodeInfoEncoding encoding = ci.ExtractEncoding();
 
   // Verify first stack map.
   StackMap sm0 = ci.GetStackMapAt(0, encoding);
@@ -599,9 +601,12 @@
   ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers, ci, encoding));
 
   // Verify dex register map offsets.
-  ASSERT_EQ(sm0.GetDexRegisterMapOffset(encoding), sm1.GetDexRegisterMapOffset(encoding));
-  ASSERT_NE(sm0.GetDexRegisterMapOffset(encoding), sm2.GetDexRegisterMapOffset(encoding));
-  ASSERT_NE(sm1.GetDexRegisterMapOffset(encoding), sm2.GetDexRegisterMapOffset(encoding));
+  ASSERT_EQ(sm0.GetDexRegisterMapOffset(encoding.stack_map_encoding),
+            sm1.GetDexRegisterMapOffset(encoding.stack_map_encoding));
+  ASSERT_NE(sm0.GetDexRegisterMapOffset(encoding.stack_map_encoding),
+            sm2.GetDexRegisterMapOffset(encoding.stack_map_encoding));
+  ASSERT_NE(sm1.GetDexRegisterMapOffset(encoding.stack_map_encoding),
+            sm2.GetDexRegisterMapOffset(encoding.stack_map_encoding));
 }
 
 TEST(StackMapTest, TestNoDexRegisterMap) {
@@ -614,30 +619,43 @@
   stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
   stream.EndStackMapEntry();
 
+  number_of_dex_registers = 1;
+  stream.BeginStackMapEntry(1, 67, 0x4, &sp_mask, number_of_dex_registers, 0);
+  stream.EndStackMapEntry();
+
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
   stream.FillIn(region);
 
   CodeInfo code_info(region);
-  StackMapEncoding encoding = code_info.ExtractEncoding();
-  ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask());
-  ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  ASSERT_EQ(2u, code_info.GetNumberOfStackMaps(encoding));
 
-  uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
-  ASSERT_EQ(0u, number_of_location_catalog_entries);
+  uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
+  ASSERT_EQ(0u, number_of_catalog_entries);
   DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
   ASSERT_EQ(0u, location_catalog.Size());
 
   StackMap stack_map = code_info.GetStackMapAt(0, encoding);
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding)));
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
-  ASSERT_EQ(0u, stack_map.GetDexPc(encoding));
-  ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding));
-  ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding));
+  ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map_encoding));
+  ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding));
+  ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-  ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding));
-  ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+  ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
+  ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
+
+  stack_map = code_info.GetStackMapAt(1, encoding);
+  ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1, encoding)));
+  ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(67, encoding)));
+  ASSERT_EQ(1u, stack_map.GetDexPc(encoding.stack_map_encoding));
+  ASSERT_EQ(67u, stack_map.GetNativePcOffset(encoding.stack_map_encoding));
+  ASSERT_EQ(0x4u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+
+  ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
+  ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
 }
 
 TEST(StackMapTest, InlineTest) {
@@ -712,7 +730,7 @@
   stream.FillIn(region);
 
   CodeInfo ci(region);
-  StackMapEncoding encoding = ci.ExtractEncoding();
+  CodeInfoEncoding encoding = ci.ExtractEncoding();
 
   {
     // Verify first stack map.
@@ -723,13 +741,13 @@
     ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci, encoding));
 
     InlineInfo if0 = ci.GetInlineInfoOf(sm0, encoding);
-    ASSERT_EQ(2u, if0.GetDepth());
-    ASSERT_EQ(2u, if0.GetDexPcAtDepth(0));
-    ASSERT_EQ(42u, if0.GetMethodIndexAtDepth(0));
-    ASSERT_EQ(kStatic, if0.GetInvokeTypeAtDepth(0));
-    ASSERT_EQ(3u, if0.GetDexPcAtDepth(1));
-    ASSERT_EQ(82u, if0.GetMethodIndexAtDepth(1));
-    ASSERT_EQ(kStatic, if0.GetInvokeTypeAtDepth(1));
+    ASSERT_EQ(2u, if0.GetDepth(encoding.inline_info_encoding));
+    ASSERT_EQ(2u, if0.GetDexPcAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(42u, if0.GetMethodIndexAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(kStatic, if0.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(3u, if0.GetDexPcAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(82u, if0.GetMethodIndexAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(kStatic, if0.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 1));
 
     DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if0, encoding, 1);
     ASSERT_EQ(8, dex_registers1.GetStackOffsetInBytes(0, 1, ci, encoding));
@@ -749,16 +767,16 @@
     ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci, encoding));
 
     InlineInfo if1 = ci.GetInlineInfoOf(sm1, encoding);
-    ASSERT_EQ(3u, if1.GetDepth());
-    ASSERT_EQ(2u, if1.GetDexPcAtDepth(0));
-    ASSERT_EQ(42u, if1.GetMethodIndexAtDepth(0));
-    ASSERT_EQ(kDirect, if1.GetInvokeTypeAtDepth(0));
-    ASSERT_EQ(3u, if1.GetDexPcAtDepth(1));
-    ASSERT_EQ(82u, if1.GetMethodIndexAtDepth(1));
-    ASSERT_EQ(kStatic, if1.GetInvokeTypeAtDepth(1));
-    ASSERT_EQ(5u, if1.GetDexPcAtDepth(2));
-    ASSERT_EQ(52u, if1.GetMethodIndexAtDepth(2));
-    ASSERT_EQ(kVirtual, if1.GetInvokeTypeAtDepth(2));
+    ASSERT_EQ(3u, if1.GetDepth(encoding.inline_info_encoding));
+    ASSERT_EQ(2u, if1.GetDexPcAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(42u, if1.GetMethodIndexAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(kDirect, if1.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(3u, if1.GetDexPcAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(82u, if1.GetMethodIndexAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(kStatic, if1.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(5u, if1.GetDexPcAtDepth(encoding.inline_info_encoding, 2));
+    ASSERT_EQ(52u, if1.GetMethodIndexAtDepth(encoding.inline_info_encoding, 2));
+    ASSERT_EQ(kVirtual, if1.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 2));
 
     DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if1, encoding, 1);
     ASSERT_EQ(12, dex_registers1.GetStackOffsetInBytes(0, 1, ci, encoding));
@@ -768,7 +786,7 @@
     ASSERT_EQ(10, dex_registers2.GetConstant(1, 3, ci, encoding));
     ASSERT_EQ(5, dex_registers2.GetMachineRegister(2, 3, ci, encoding));
 
-    ASSERT_FALSE(if1.HasDexRegisterMapAtDepth(2));
+    ASSERT_FALSE(if1.HasDexRegisterMapAtDepth(encoding.inline_info_encoding, 2));
   }
 
   {
@@ -778,7 +796,7 @@
     DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm2, encoding, 2);
     ASSERT_FALSE(dex_registers0.IsDexRegisterLive(0));
     ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci, encoding));
-    ASSERT_FALSE(sm2.HasInlineInfo(encoding));
+    ASSERT_FALSE(sm2.HasInlineInfo(encoding.stack_map_encoding));
   }
 
   {
@@ -790,18 +808,18 @@
     ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci, encoding));
 
     InlineInfo if2 = ci.GetInlineInfoOf(sm3, encoding);
-    ASSERT_EQ(3u, if2.GetDepth());
-    ASSERT_EQ(2u, if2.GetDexPcAtDepth(0));
-    ASSERT_EQ(42u, if2.GetMethodIndexAtDepth(0));
-    ASSERT_EQ(kVirtual, if2.GetInvokeTypeAtDepth(0));
-    ASSERT_EQ(5u, if2.GetDexPcAtDepth(1));
-    ASSERT_EQ(52u, if2.GetMethodIndexAtDepth(1));
-    ASSERT_EQ(kInterface, if2.GetInvokeTypeAtDepth(1));
-    ASSERT_EQ(10u, if2.GetDexPcAtDepth(2));
-    ASSERT_EQ(52u, if2.GetMethodIndexAtDepth(2));
-    ASSERT_EQ(kStatic, if2.GetInvokeTypeAtDepth(2));
+    ASSERT_EQ(3u, if2.GetDepth(encoding.inline_info_encoding));
+    ASSERT_EQ(2u, if2.GetDexPcAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(42u, if2.GetMethodIndexAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(kVirtual, if2.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(5u, if2.GetDexPcAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(52u, if2.GetMethodIndexAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(kInterface, if2.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(10u, if2.GetDexPcAtDepth(encoding.inline_info_encoding, 2));
+    ASSERT_EQ(52u, if2.GetMethodIndexAtDepth(encoding.inline_info_encoding, 2));
+    ASSERT_EQ(kStatic, if2.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 2));
 
-    ASSERT_FALSE(if2.HasDexRegisterMapAtDepth(0));
+    ASSERT_FALSE(if2.HasDexRegisterMapAtDepth(encoding.inline_info_encoding, 0));
 
     DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(1, if2, encoding, 1);
     ASSERT_EQ(2, dex_registers1.GetMachineRegister(0, 1, ci, encoding));
diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc
index b6c704c..15cd4e8 100644
--- a/compiler/optimizing/suspend_check_test.cc
+++ b/compiler/optimizing/suspend_check_test.cc
@@ -18,6 +18,7 @@
 #include "dex_instruction.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
+#include "pretty_printer.h"
 
 #include "gtest/gtest.h"
 
@@ -30,20 +31,17 @@
 static void TestCode(const uint16_t* data) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateGraph(&allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-
-  HBasicBlock* first_block = graph->GetEntryBlock()->GetSuccessors()[0];
-  HInstruction* first_instruction = first_block->GetFirstInstruction();
-  // Account for some tests having a store local as first instruction.
-  ASSERT_TRUE(first_instruction->IsSuspendCheck()
-              || first_instruction->GetNext()->IsSuspendCheck());
+  HGraph* graph = CreateCFG(&allocator, data);
+  HBasicBlock* first_block = graph->GetEntryBlock()->GetSingleSuccessor();
+  HBasicBlock* loop_header = first_block->GetSingleSuccessor();
+  ASSERT_TRUE(loop_header->IsLoopHeader());
+  ASSERT_EQ(loop_header->GetLoopInformation()->GetPreHeader(), first_block);
+  ASSERT_TRUE(loop_header->GetFirstInstruction()->IsSuspendCheck());
 }
 
-TEST(CodegenTest, CFG1) {
+class SuspendCheckTest : public CommonCompilerTest {};
+
+TEST_F(SuspendCheckTest, CFG1) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::NOP,
     Instruction::GOTO | 0xFF00);
@@ -51,14 +49,14 @@
   TestCode(data);
 }
 
-TEST(CodegenTest, CFG2) {
+TEST_F(SuspendCheckTest, CFG2) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO_32, 0, 0);
 
   TestCode(data);
 }
 
-TEST(CodegenTest, CFG3) {
+TEST_F(SuspendCheckTest, CFG3) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 0xFFFF,
@@ -67,7 +65,7 @@
   TestCode(data);
 }
 
-TEST(CodegenTest, CFG4) {
+TEST_F(SuspendCheckTest, CFG4) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_NE, 0xFFFF,
@@ -76,7 +74,7 @@
   TestCode(data);
 }
 
-TEST(CodegenTest, CFG5) {
+TEST_F(SuspendCheckTest, CFG5) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQZ, 0xFFFF,
@@ -85,7 +83,7 @@
   TestCode(data);
 }
 
-TEST(CodegenTest, CFG6) {
+TEST_F(SuspendCheckTest, CFG6) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_NEZ, 0xFFFF,
diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc
new file mode 100644
index 0000000..8aa315a
--- /dev/null
+++ b/compiler/optimizing/x86_memory_gen.cc
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "x86_memory_gen.h"
+#include "code_generator.h"
+
+namespace art {
+namespace x86 {
+
+/**
+ * Replace instructions with memory operand forms.
+ */
+class MemoryOperandVisitor : public HGraphVisitor {
+ public:
+  MemoryOperandVisitor(HGraph* graph, bool do_implicit_null_checks)
+      : HGraphVisitor(graph),
+        do_implicit_null_checks_(do_implicit_null_checks) {}
+
+ private:
+  void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE {
+    // Replace the length by the array itself, so that we can do compares to memory.
+    HArrayLength* array_len = check->InputAt(1)->AsArrayLength();
+
+    // We only want to replace an ArrayLength.
+    if (array_len == nullptr) {
+      return;
+    }
+
+    HInstruction* array = array_len->InputAt(0);
+    DCHECK_EQ(array->GetType(), Primitive::kPrimNot);
+
+    // Don't apply this optimization when the array is nullptr.
+    if (array->IsConstant() || (array->IsNullCheck() && array->InputAt(0)->IsConstant())) {
+      return;
+    }
+
+    // Is there a null check that could be an implicit check?
+    if (array->IsNullCheck() && do_implicit_null_checks_) {
+      // The ArrayLen may generate the implicit null check.  Can the
+      // bounds check do so as well?
+      if (array_len->GetNextDisregardingMoves() != check) {
+        // No, it won't.  Leave as is.
+        return;
+      }
+    }
+
+    // Can we suppress the ArrayLength and generate at BoundCheck?
+    if (array_len->HasOnlyOneNonEnvironmentUse()) {
+      array_len->MarkEmittedAtUseSite();
+      // We need the ArrayLength just before the BoundsCheck.
+      array_len->MoveBefore(check);
+    }
+  }
+
+  bool do_implicit_null_checks_;
+};
+
+X86MemoryOperandGeneration::X86MemoryOperandGeneration(HGraph* graph,
+                                                       CodeGenerator* codegen,
+                                                       OptimizingCompilerStats* stats)
+    : HOptimization(graph, kX86MemoryOperandGenerationPassName, stats),
+      do_implicit_null_checks_(codegen->GetCompilerOptions().GetImplicitNullChecks()) {
+}
+
+void X86MemoryOperandGeneration::Run() {
+  MemoryOperandVisitor visitor(graph_, do_implicit_null_checks_);
+  visitor.VisitInsertionOrder();
+}
+
+}  // namespace x86
+}  // namespace art
diff --git a/compiler/optimizing/x86_memory_gen.h b/compiler/optimizing/x86_memory_gen.h
new file mode 100644
index 0000000..5f15d9f
--- /dev/null
+++ b/compiler/optimizing/x86_memory_gen.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
+#define ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+class CodeGenerator;
+
+namespace x86 {
+
+class X86MemoryOperandGeneration : public HOptimization {
+ public:
+  X86MemoryOperandGeneration(HGraph* graph,
+                             CodeGenerator* codegen,
+                             OptimizingCompilerStats* stats);
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kX86MemoryOperandGenerationPassName =
+          "x86_memory_operand_generation";
+
+ private:
+  bool do_implicit_null_checks_;
+};
+
+}  // namespace x86
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
diff --git a/compiler/output_stream.h b/compiler/output_stream.h
deleted file mode 100644
index 4d30b83..0000000
--- a/compiler/output_stream.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OUTPUT_STREAM_H_
-#define ART_COMPILER_OUTPUT_STREAM_H_
-
-#include <ostream>
-#include <string>
-
-#include "base/macros.h"
-
-namespace art {
-
-enum Whence {
-  kSeekSet = SEEK_SET,
-  kSeekCurrent = SEEK_CUR,
-  kSeekEnd = SEEK_END,
-};
-std::ostream& operator<<(std::ostream& os, const Whence& rhs);
-
-class OutputStream {
- public:
-  explicit OutputStream(const std::string& location) : location_(location) {}
-
-  virtual ~OutputStream() {}
-
-  const std::string& GetLocation() const {
-    return location_;
-  }
-
-  virtual bool WriteFully(const void* buffer, size_t byte_count) = 0;
-
-  virtual off_t Seek(off_t offset, Whence whence) = 0;
-
- private:
-  const std::string location_;
-
-  DISALLOW_COPY_AND_ASSIGN(OutputStream);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_OUTPUT_STREAM_H_
diff --git a/compiler/output_stream_test.cc b/compiler/output_stream_test.cc
deleted file mode 100644
index 6104ccd..0000000
--- a/compiler/output_stream_test.cc
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "file_output_stream.h"
-#include "vector_output_stream.h"
-
-#include "base/unix_file/fd_file.h"
-#include "base/logging.h"
-#include "buffered_output_stream.h"
-#include "common_runtime_test.h"
-
-namespace art {
-
-class OutputStreamTest : public CommonRuntimeTest {
- protected:
-  void CheckOffset(off_t expected) {
-    off_t actual = output_stream_->Seek(0, kSeekCurrent);
-    EXPECT_EQ(expected, actual);
-  }
-
-  void SetOutputStream(OutputStream& output_stream) {
-    output_stream_ = &output_stream;
-  }
-
-  void GenerateTestOutput() {
-    EXPECT_EQ(3, output_stream_->Seek(3, kSeekCurrent));
-    CheckOffset(3);
-    EXPECT_EQ(2, output_stream_->Seek(2, kSeekSet));
-    CheckOffset(2);
-    uint8_t buf[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
-    EXPECT_TRUE(output_stream_->WriteFully(buf, 2));
-    CheckOffset(4);
-    EXPECT_EQ(6, output_stream_->Seek(2, kSeekEnd));
-    CheckOffset(6);
-    EXPECT_TRUE(output_stream_->WriteFully(buf, 4));
-    CheckOffset(10);
-    EXPECT_TRUE(output_stream_->WriteFully(buf, 6));
-  }
-
-  void CheckTestOutput(const std::vector<uint8_t>& actual) {
-    uint8_t expected[] = {
-        0, 0, 1, 2, 0, 0, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6
-    };
-    EXPECT_EQ(sizeof(expected), actual.size());
-    EXPECT_EQ(0, memcmp(expected, &actual[0], actual.size()));
-  }
-
-  OutputStream* output_stream_;
-};
-
-TEST_F(OutputStreamTest, File) {
-  ScratchFile tmp;
-  FileOutputStream output_stream(tmp.GetFile());
-  SetOutputStream(output_stream);
-  GenerateTestOutput();
-  std::unique_ptr<File> in(OS::OpenFileForReading(tmp.GetFilename().c_str()));
-  EXPECT_TRUE(in.get() != nullptr);
-  std::vector<uint8_t> actual(in->GetLength());
-  bool readSuccess = in->ReadFully(&actual[0], actual.size());
-  EXPECT_TRUE(readSuccess);
-  CheckTestOutput(actual);
-}
-
-TEST_F(OutputStreamTest, Buffered) {
-  ScratchFile tmp;
-  {
-    std::unique_ptr<FileOutputStream> file_output_stream(new FileOutputStream(tmp.GetFile()));
-    CHECK(file_output_stream.get() != nullptr);
-    BufferedOutputStream buffered_output_stream(file_output_stream.release());
-    SetOutputStream(buffered_output_stream);
-    GenerateTestOutput();
-  }
-  std::unique_ptr<File> in(OS::OpenFileForReading(tmp.GetFilename().c_str()));
-  EXPECT_TRUE(in.get() != nullptr);
-  std::vector<uint8_t> actual(in->GetLength());
-  bool readSuccess = in->ReadFully(&actual[0], actual.size());
-  EXPECT_TRUE(readSuccess);
-  CheckTestOutput(actual);
-}
-
-TEST_F(OutputStreamTest, Vector) {
-  std::vector<uint8_t> output;
-  VectorOutputStream output_stream("test vector output", &output);
-  SetOutputStream(output_stream);
-  GenerateTestOutput();
-  CheckTestOutput(output);
-}
-
-}  // namespace art
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index 39e5259..304e56b 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -16,6 +16,7 @@
 
 #include "trampoline_compiler.h"
 
+#include "base/arena_allocator.h"
 #include "jni_env_ext.h"
 
 #ifdef ART_ENABLE_CODEGEN_arm
@@ -48,16 +49,16 @@
 
 #ifdef ART_ENABLE_CODEGEN_arm
 namespace arm {
-static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
-                                                    ThreadOffset<4> offset) {
-  Thumb2Assembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
+    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset32 offset) {
+  Thumb2Assembler assembler(arena);
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (R0) in interpreter ABI.
       __ LoadFromOffset(kLoadWord, PC, R0, offset.Int32Value());
       break;
     case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (R0).
-      __ LoadFromOffset(kLoadWord, IP, R0, JNIEnvExt::SelfOffset().Int32Value());
+      __ LoadFromOffset(kLoadWord, IP, R0, JNIEnvExt::SelfOffset(4).Int32Value());
       __ LoadFromOffset(kLoadWord, PC, IP, offset.Int32Value());
       break;
     case kQuickAbi:  // R9 holds Thread*.
@@ -68,19 +69,19 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace arm
 #endif  // ART_ENABLE_CODEGEN_arm
 
 #ifdef ART_ENABLE_CODEGEN_arm64
 namespace arm64 {
-static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
-                                                    ThreadOffset<8> offset) {
-  Arm64Assembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
+    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset64 offset) {
+  Arm64Assembler assembler(arena);
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (X0) in interpreter ABI.
@@ -91,7 +92,7 @@
     case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (X0).
       __ LoadRawPtr(Arm64ManagedRegister::FromXRegister(IP1),
                       Arm64ManagedRegister::FromXRegister(X0),
-                      Offset(JNIEnvExt::SelfOffset().Int32Value()));
+                      Offset(JNIEnvExt::SelfOffset(8).Int32Value()));
 
       __ JumpTo(Arm64ManagedRegister::FromXRegister(IP1), Offset(offset.Int32Value()),
                 Arm64ManagedRegister::FromXRegister(IP0));
@@ -107,26 +108,26 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace arm64
 #endif  // ART_ENABLE_CODEGEN_arm64
 
 #ifdef ART_ENABLE_CODEGEN_mips
 namespace mips {
-static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
-                                                    ThreadOffset<4> offset) {
-  MipsAssembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
+    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset32 offset) {
+  MipsAssembler assembler(arena);
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (A0) in interpreter ABI.
       __ LoadFromOffset(kLoadWord, T9, A0, offset.Int32Value());
       break;
     case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (A0).
-      __ LoadFromOffset(kLoadWord, T9, A0, JNIEnvExt::SelfOffset().Int32Value());
+      __ LoadFromOffset(kLoadWord, T9, A0, JNIEnvExt::SelfOffset(4).Int32Value());
       __ LoadFromOffset(kLoadWord, T9, T9, offset.Int32Value());
       break;
     case kQuickAbi:  // S1 holds Thread*.
@@ -139,26 +140,26 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace mips
 #endif  // ART_ENABLE_CODEGEN_mips
 
 #ifdef ART_ENABLE_CODEGEN_mips64
 namespace mips64 {
-static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
-                                                    ThreadOffset<8> offset) {
-  Mips64Assembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
+    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset64 offset) {
+  Mips64Assembler assembler(arena);
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (A0) in interpreter ABI.
       __ LoadFromOffset(kLoadDoubleword, T9, A0, offset.Int32Value());
       break;
     case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (A0).
-      __ LoadFromOffset(kLoadDoubleword, T9, A0, JNIEnvExt::SelfOffset().Int32Value());
+      __ LoadFromOffset(kLoadDoubleword, T9, A0, JNIEnvExt::SelfOffset(8).Int32Value());
       __ LoadFromOffset(kLoadDoubleword, T9, T9, offset.Int32Value());
       break;
     case kQuickAbi:  // Fall-through.
@@ -171,18 +172,19 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace mips64
 #endif  // ART_ENABLE_CODEGEN_mips
 
 #ifdef ART_ENABLE_CODEGEN_x86
 namespace x86 {
-static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<4> offset) {
-  X86Assembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocator* arena,
+                                                                    ThreadOffset32 offset) {
+  X86Assembler assembler(arena);
 
   // All x86 trampolines call via the Thread* held in fs.
   __ fs()->jmp(Address::Absolute(offset));
@@ -191,18 +193,19 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace x86
 #endif  // ART_ENABLE_CODEGEN_x86
 
 #ifdef ART_ENABLE_CODEGEN_x86_64
 namespace x86_64 {
-static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<8> offset) {
-  x86_64::X86_64Assembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocator* arena,
+                                                                    ThreadOffset64 offset) {
+  x86_64::X86_64Assembler assembler(arena);
 
   // All x86 trampolines call via the Thread* held in gs.
   __ gs()->jmp(x86_64::Address::Absolute(offset, true));
@@ -211,28 +214,31 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace x86_64
 #endif  // ART_ENABLE_CODEGEN_x86_64
 
-const std::vector<uint8_t>* CreateTrampoline64(InstructionSet isa, EntryPointCallingConvention abi,
-                                               ThreadOffset<8> offset) {
+std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet isa,
+                                                               EntryPointCallingConvention abi,
+                                                               ThreadOffset64 offset) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
   switch (isa) {
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
-      return arm64::CreateTrampoline(abi, offset);
+      return arm64::CreateTrampoline(&arena, abi, offset);
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
-      return mips64::CreateTrampoline(abi, offset);
+      return mips64::CreateTrampoline(&arena, abi, offset);
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
-      return x86_64::CreateTrampoline(offset);
+      return x86_64::CreateTrampoline(&arena, offset);
 #endif
     default:
       UNUSED(abi);
@@ -242,22 +248,25 @@
   }
 }
 
-const std::vector<uint8_t>* CreateTrampoline32(InstructionSet isa, EntryPointCallingConvention abi,
-                                               ThreadOffset<4> offset) {
+std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(InstructionSet isa,
+                                                               EntryPointCallingConvention abi,
+                                                               ThreadOffset32 offset) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
   switch (isa) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
     case kThumb2:
-      return arm::CreateTrampoline(abi, offset);
+      return arm::CreateTrampoline(&arena, abi, offset);
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
-      return mips::CreateTrampoline(abi, offset);
+      return mips::CreateTrampoline(&arena, abi, offset);
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
       UNUSED(abi);
-      return x86::CreateTrampoline(offset);
+      return x86::CreateTrampoline(&arena, offset);
 #endif
     default:
       LOG(FATAL) << "Unexpected InstructionSet: " << isa;
diff --git a/compiler/trampolines/trampoline_compiler.h b/compiler/trampolines/trampoline_compiler.h
index 9fb2245..1a10e4c 100644
--- a/compiler/trampolines/trampoline_compiler.h
+++ b/compiler/trampolines/trampoline_compiler.h
@@ -25,12 +25,12 @@
 namespace art {
 
 // Create code that will invoke the function held in thread local storage.
-const std::vector<uint8_t>* CreateTrampoline32(InstructionSet isa, EntryPointCallingConvention abi,
-                                               ThreadOffset<4> entry_point_offset)
-    SHARED_REQUIRES(Locks::mutator_lock_);
-const std::vector<uint8_t>* CreateTrampoline64(InstructionSet isa, EntryPointCallingConvention abi,
-                                               ThreadOffset<8> entry_point_offset)
-    SHARED_REQUIRES(Locks::mutator_lock_);
+std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(InstructionSet isa,
+                                                               EntryPointCallingConvention abi,
+                                                               ThreadOffset32 entry_point_offset);
+std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet isa,
+                                                               EntryPointCallingConvention abi,
+                                                               ThreadOffset64 entry_point_offset);
 
 }  // namespace art
 
diff --git a/compiler/utils/arena_allocator_test.cc b/compiler/utils/arena_allocator_test.cc
deleted file mode 100644
index 7f67ef1..0000000
--- a/compiler/utils/arena_allocator_test.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "base/arena_allocator.h"
-#include "base/arena_bit_vector.h"
-#include "gtest/gtest.h"
-
-namespace art {
-
-TEST(ArenaAllocator, Test) {
-  ArenaPool pool;
-  ArenaAllocator arena(&pool);
-  ArenaBitVector bv(&arena, 10, true);
-  bv.SetBit(5);
-  EXPECT_EQ(1U, bv.GetStorageSize());
-  bv.SetBit(35);
-  EXPECT_EQ(2U, bv.GetStorageSize());
-}
-
-}  // namespace art
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 807beda..d5cd59d 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -16,6 +16,8 @@
 
 #include "assembler_arm.h"
 
+#include <algorithm>
+
 #include "base/bit_utils.h"
 #include "base/logging.h"
 #include "entrypoints/quick/quick_entrypoints.h"
@@ -340,9 +342,9 @@
       return IsAbsoluteUint<12>(offset);
     case kLoadSWord:
     case kLoadDWord:
-      return IsAbsoluteUint<10>(offset);  // VFP addressing mode.
+      return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;  // VFP addressing mode.
     case kLoadWordPair:
-      return IsAbsoluteUint<10>(offset);
+      return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;
     default:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -358,9 +360,9 @@
       return IsAbsoluteUint<12>(offset);
     case kStoreSWord:
     case kStoreDWord:
-      return IsAbsoluteUint<10>(offset);  // VFP addressing mode.
+      return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;  // VFP addressing mode.
     case kStoreWordPair:
-      return IsAbsoluteUint<10>(offset);
+      return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;
     default:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -374,500 +376,6 @@
   }
 }
 
-static dwarf::Reg DWARFReg(Register reg) {
-  return dwarf::Reg::ArmCore(static_cast<int>(reg));
-}
-
-static dwarf::Reg DWARFReg(SRegister reg) {
-  return dwarf::Reg::ArmFp(static_cast<int>(reg));
-}
-
-constexpr size_t kFramePointerSize = kArmPointerSize;
-
-void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                              const std::vector<ManagedRegister>& callee_save_regs,
-                              const ManagedRegisterEntrySpills& entry_spills) {
-  CHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister());
-
-  // Push callee saves and link register.
-  RegList core_spill_mask = 1 << LR;
-  uint32_t fp_spill_mask = 0;
-  for (const ManagedRegister& reg : callee_save_regs) {
-    if (reg.AsArm().IsCoreRegister()) {
-      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
-    } else {
-      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
-    }
-  }
-  PushList(core_spill_mask);
-  cfi_.AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize);
-  cfi_.RelOffsetForMany(DWARFReg(Register(0)), 0, core_spill_mask, kFramePointerSize);
-  if (fp_spill_mask != 0) {
-    vpushs(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
-    cfi_.AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize);
-    cfi_.RelOffsetForMany(DWARFReg(SRegister(0)), 0, fp_spill_mask, kFramePointerSize);
-  }
-
-  // Increase frame to required size.
-  int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
-  CHECK_GT(frame_size, pushed_values * kFramePointerSize);  // Must at least have space for Method*.
-  IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize);  // handles CFI as well.
-
-  // Write out Method*.
-  StoreToOffset(kStoreWord, R0, SP, 0);
-
-  // Write out entry spills.
-  int32_t offset = frame_size + kFramePointerSize;
-  for (size_t i = 0; i < entry_spills.size(); ++i) {
-    ArmManagedRegister reg = entry_spills.at(i).AsArm();
-    if (reg.IsNoRegister()) {
-      // only increment stack offset.
-      ManagedRegisterSpill spill = entry_spills.at(i);
-      offset += spill.getSize();
-    } else if (reg.IsCoreRegister()) {
-      StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset);
-      offset += 4;
-    } else if (reg.IsSRegister()) {
-      StoreSToOffset(reg.AsSRegister(), SP, offset);
-      offset += 4;
-    } else if (reg.IsDRegister()) {
-      StoreDToOffset(reg.AsDRegister(), SP, offset);
-      offset += 8;
-    }
-  }
-}
-
-void ArmAssembler::RemoveFrame(size_t frame_size,
-                              const std::vector<ManagedRegister>& callee_save_regs) {
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  cfi_.RememberState();
-
-  // Compute callee saves to pop and PC.
-  RegList core_spill_mask = 1 << PC;
-  uint32_t fp_spill_mask = 0;
-  for (const ManagedRegister& reg : callee_save_regs) {
-    if (reg.AsArm().IsCoreRegister()) {
-      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
-    } else {
-      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
-    }
-  }
-
-  // Decrease frame to start of callee saves.
-  int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
-  CHECK_GT(frame_size, pop_values * kFramePointerSize);
-  DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize));  // handles CFI as well.
-
-  if (fp_spill_mask != 0) {
-    vpops(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
-    cfi_.AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask));
-    cfi_.RestoreMany(DWARFReg(SRegister(0)), fp_spill_mask);
-  }
-
-  // Pop callee saves and PC.
-  PopList(core_spill_mask);
-
-  // The CFI should be restored for any code that follows the exit block.
-  cfi_.RestoreState();
-  cfi_.DefCFAOffset(frame_size);
-}
-
-void ArmAssembler::IncreaseFrameSize(size_t adjust) {
-  AddConstant(SP, -adjust);
-  cfi_.AdjustCFAOffset(adjust);
-}
-
-void ArmAssembler::DecreaseFrameSize(size_t adjust) {
-  AddConstant(SP, adjust);
-  cfi_.AdjustCFAOffset(-adjust);
-}
-
-void ArmAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
-  ArmManagedRegister src = msrc.AsArm();
-  if (src.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (src.IsCoreRegister()) {
-    CHECK_EQ(4u, size);
-    StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
-  } else if (src.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
-    StoreToOffset(kStoreWord, src.AsRegisterPairLow(), SP, dest.Int32Value());
-    StoreToOffset(kStoreWord, src.AsRegisterPairHigh(),
-                  SP, dest.Int32Value() + 4);
-  } else if (src.IsSRegister()) {
-    StoreSToOffset(src.AsSRegister(), SP, dest.Int32Value());
-  } else {
-    CHECK(src.IsDRegister()) << src;
-    StoreDToOffset(src.AsDRegister(), SP, dest.Int32Value());
-  }
-}
-
-void ArmAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
-  ArmManagedRegister src = msrc.AsArm();
-  CHECK(src.IsCoreRegister()) << src;
-  StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
-}
-
-void ArmAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
-  ArmManagedRegister src = msrc.AsArm();
-  CHECK(src.IsCoreRegister()) << src;
-  StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
-}
-
-void ArmAssembler::StoreSpanning(FrameOffset dest, ManagedRegister msrc,
-                              FrameOffset in_off, ManagedRegister mscratch) {
-  ArmManagedRegister src = msrc.AsArm();
-  ArmManagedRegister scratch = mscratch.AsArm();
-  StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, in_off.Int32Value());
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4);
-}
-
-void ArmAssembler::CopyRef(FrameOffset dest, FrameOffset src,
-                        ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
-}
-
-void ArmAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
-                           bool unpoison_reference) {
-  ArmManagedRegister dst = mdest.AsArm();
-  CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst;
-  LoadFromOffset(kLoadWord, dst.AsCoreRegister(),
-                 base.AsArm().AsCoreRegister(), offs.Int32Value());
-  if (unpoison_reference) {
-    MaybeUnpoisonHeapReference(dst.AsCoreRegister());
-  }
-}
-
-void ArmAssembler::LoadRef(ManagedRegister mdest, FrameOffset  src) {
-  ArmManagedRegister dst = mdest.AsArm();
-  CHECK(dst.IsCoreRegister()) << dst;
-  LoadFromOffset(kLoadWord, dst.AsCoreRegister(), SP, src.Int32Value());
-}
-
-void ArmAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
-                           Offset offs) {
-  ArmManagedRegister dst = mdest.AsArm();
-  CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst;
-  LoadFromOffset(kLoadWord, dst.AsCoreRegister(),
-                 base.AsArm().AsCoreRegister(), offs.Int32Value());
-}
-
-void ArmAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
-                                      ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadImmediate(scratch.AsCoreRegister(), imm);
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
-}
-
-void ArmAssembler::StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm,
-                                       ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadImmediate(scratch.AsCoreRegister(), imm);
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), TR, dest.Int32Value());
-}
-
-static void EmitLoad(ArmAssembler* assembler, ManagedRegister m_dst,
-                     Register src_register, int32_t src_offset, size_t size) {
-  ArmManagedRegister dst = m_dst.AsArm();
-  if (dst.IsNoRegister()) {
-    CHECK_EQ(0u, size) << dst;
-  } else if (dst.IsCoreRegister()) {
-    CHECK_EQ(4u, size) << dst;
-    assembler->LoadFromOffset(kLoadWord, dst.AsCoreRegister(), src_register, src_offset);
-  } else if (dst.IsRegisterPair()) {
-    CHECK_EQ(8u, size) << dst;
-    assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairLow(), src_register, src_offset);
-    assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairHigh(), src_register, src_offset + 4);
-  } else if (dst.IsSRegister()) {
-    assembler->LoadSFromOffset(dst.AsSRegister(), src_register, src_offset);
-  } else {
-    CHECK(dst.IsDRegister()) << dst;
-    assembler->LoadDFromOffset(dst.AsDRegister(), src_register, src_offset);
-  }
-}
-
-void ArmAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) {
-  return EmitLoad(this, m_dst, SP, src.Int32Value(), size);
-}
-
-void ArmAssembler::LoadFromThread32(ManagedRegister m_dst, ThreadOffset<4> src, size_t size) {
-  return EmitLoad(this, m_dst, TR, src.Int32Value(), size);
-}
-
-void ArmAssembler::LoadRawPtrFromThread32(ManagedRegister m_dst, ThreadOffset<4> offs) {
-  ArmManagedRegister dst = m_dst.AsArm();
-  CHECK(dst.IsCoreRegister()) << dst;
-  LoadFromOffset(kLoadWord, dst.AsCoreRegister(), TR, offs.Int32Value());
-}
-
-void ArmAssembler::CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                        ThreadOffset<4> thr_offs,
-                                        ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 TR, thr_offs.Int32Value());
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
-                SP, fr_offs.Int32Value());
-}
-
-void ArmAssembler::CopyRawPtrToThread32(ThreadOffset<4> thr_offs,
-                                      FrameOffset fr_offs,
-                                      ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 SP, fr_offs.Int32Value());
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
-                TR, thr_offs.Int32Value());
-}
-
-void ArmAssembler::StoreStackOffsetToThread32(ThreadOffset<4> thr_offs,
-                                            FrameOffset fr_offs,
-                                            ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value(), AL);
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
-                TR, thr_offs.Int32Value());
-}
-
-void ArmAssembler::StoreStackPointerToThread32(ThreadOffset<4> thr_offs) {
-  StoreToOffset(kStoreWord, SP, TR, thr_offs.Int32Value());
-}
-
-void ArmAssembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm";
-}
-
-void ArmAssembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm";
-}
-
-void ArmAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t /*size*/) {
-  ArmManagedRegister dst = m_dst.AsArm();
-  ArmManagedRegister src = m_src.AsArm();
-  if (!dst.Equals(src)) {
-    if (dst.IsCoreRegister()) {
-      CHECK(src.IsCoreRegister()) << src;
-      mov(dst.AsCoreRegister(), ShifterOperand(src.AsCoreRegister()));
-    } else if (dst.IsDRegister()) {
-      CHECK(src.IsDRegister()) << src;
-      vmovd(dst.AsDRegister(), src.AsDRegister());
-    } else if (dst.IsSRegister()) {
-      CHECK(src.IsSRegister()) << src;
-      vmovs(dst.AsSRegister(), src.AsSRegister());
-    } else {
-      CHECK(dst.IsRegisterPair()) << dst;
-      CHECK(src.IsRegisterPair()) << src;
-      // Ensure that the first move doesn't clobber the input of the second.
-      if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) {
-        mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow()));
-        mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh()));
-      } else {
-        mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh()));
-        mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow()));
-      }
-    }
-  }
-}
-
-void ArmAssembler::Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
-    StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
-  } else if (size == 8) {
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
-    StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value() + 4);
-    StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4);
-  }
-}
-
-void ArmAssembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  Register scratch = mscratch.AsArm().AsCoreRegister();
-  CHECK_EQ(size, 4u);
-  LoadFromOffset(kLoadWord, scratch, src_base.AsArm().AsCoreRegister(), src_offset.Int32Value());
-  StoreToOffset(kStoreWord, scratch, SP, dest.Int32Value());
-}
-
-void ArmAssembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-                        ManagedRegister mscratch, size_t size) {
-  Register scratch = mscratch.AsArm().AsCoreRegister();
-  CHECK_EQ(size, 4u);
-  LoadFromOffset(kLoadWord, scratch, SP, src.Int32Value());
-  StoreToOffset(kStoreWord, scratch, dest_base.AsArm().AsCoreRegister(), dest_offset.Int32Value());
-}
-
-void ArmAssembler::Copy(FrameOffset /*dst*/, FrameOffset /*src_base*/, Offset /*src_offset*/,
-                        ManagedRegister /*mscratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void ArmAssembler::Copy(ManagedRegister dest, Offset dest_offset,
-                        ManagedRegister src, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  CHECK_EQ(size, 4u);
-  Register scratch = mscratch.AsArm().AsCoreRegister();
-  LoadFromOffset(kLoadWord, scratch, src.AsArm().AsCoreRegister(), src_offset.Int32Value());
-  StoreToOffset(kStoreWord, scratch, dest.AsArm().AsCoreRegister(), dest_offset.Int32Value());
-}
-
-void ArmAssembler::Copy(FrameOffset /*dst*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset /*src_offset*/,
-                        ManagedRegister /*scratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void ArmAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister min_reg, bool null_allowed) {
-  ArmManagedRegister out_reg = mout_reg.AsArm();
-  ArmManagedRegister in_reg = min_reg.AsArm();
-  CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg;
-  CHECK(out_reg.IsCoreRegister()) << out_reg;
-  if (null_allowed) {
-    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
-    // the address in the handle scope holding the reference.
-    // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
-    if (in_reg.IsNoRegister()) {
-      LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(),
-                     SP, handle_scope_offset.Int32Value());
-      in_reg = out_reg;
-    }
-    cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
-    if (!out_reg.Equals(in_reg)) {
-      it(EQ, kItElse);
-      LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);
-    } else {
-      it(NE);
-    }
-    AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
-  } else {
-    AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
-  }
-}
-
-void ArmAssembler::CreateHandleScopeEntry(FrameOffset out_off,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister mscratch,
-                                   bool null_allowed) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  if (null_allowed) {
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP,
-                   handle_scope_offset.Int32Value());
-    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
-    // the address in the handle scope holding the reference.
-    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
-    cmp(scratch.AsCoreRegister(), ShifterOperand(0));
-    it(NE);
-    AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
-  } else {
-    AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
-  }
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, out_off.Int32Value());
-}
-
-void ArmAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
-                                         ManagedRegister min_reg) {
-  ArmManagedRegister out_reg = mout_reg.AsArm();
-  ArmManagedRegister in_reg = min_reg.AsArm();
-  CHECK(out_reg.IsCoreRegister()) << out_reg;
-  CHECK(in_reg.IsCoreRegister()) << in_reg;
-  Label null_arg;
-  if (!out_reg.Equals(in_reg)) {
-    LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);     // TODO: why EQ?
-  }
-  cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
-  it(NE);
-  LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(),
-                 in_reg.AsCoreRegister(), 0, NE);
-}
-
-void ArmAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references.
-}
-
-void ArmAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references.
-}
-
-void ArmAssembler::Call(ManagedRegister mbase, Offset offset,
-                        ManagedRegister mscratch) {
-  ArmManagedRegister base = mbase.AsArm();
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(base.IsCoreRegister()) << base;
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 base.AsCoreRegister(), offset.Int32Value());
-  blx(scratch.AsCoreRegister());
-  // TODO: place reference map on call.
-}
-
-void ArmAssembler::Call(FrameOffset base, Offset offset,
-                        ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  // Call *(*(SP + base) + offset)
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 SP, base.Int32Value());
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 scratch.AsCoreRegister(), offset.Int32Value());
-  blx(scratch.AsCoreRegister());
-  // TODO: place reference map on call
-}
-
-void ArmAssembler::CallFromThread32(ThreadOffset<4> /*offset*/, ManagedRegister /*scratch*/) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void ArmAssembler::GetCurrentThread(ManagedRegister tr) {
-  mov(tr.AsArm().AsCoreRegister(), ShifterOperand(TR));
-}
-
-void ArmAssembler::GetCurrentThread(FrameOffset offset,
-                                    ManagedRegister /*scratch*/) {
-  StoreToOffset(kStoreWord, TR, SP, offset.Int32Value(), AL);
-}
-
-void ArmAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  ArmExceptionSlowPath* slow = new ArmExceptionSlowPath(scratch, stack_adjust);
-  buffer_.EnqueueSlowPath(slow);
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 TR, Thread::ExceptionOffset<4>().Int32Value());
-  cmp(scratch.AsCoreRegister(), ShifterOperand(0));
-  b(slow->Entry(), NE);
-}
-
-void ArmExceptionSlowPath::Emit(Assembler* sasm) {
-  ArmAssembler* sp_asm = down_cast<ArmAssembler*>(sasm);
-#define __ sp_asm->
-  __ Bind(&entry_);
-  if (stack_adjust_ != 0) {  // Fix up the frame.
-    __ DecreaseFrameSize(stack_adjust_);
-  }
-  // Pass exception object as argument.
-  // Don't care about preserving R0 as this call won't return.
-  __ mov(R0, ShifterOperand(scratch_.AsCoreRegister()));
-  // Set up call to Thread::Current()->pDeliverException.
-  __ LoadFromOffset(kLoadWord, R12, TR, QUICK_ENTRYPOINT_OFFSET(4, pDeliverException).Int32Value());
-  __ blx(R12);
-#undef __
-}
-
-
 static int LeadingZeros(uint32_t val) {
   uint32_t alt;
   int32_t n;
@@ -922,5 +430,24 @@
   return value | i << 26 | imm3 << 12 | a << 7;
 }
 
+void ArmAssembler::FinalizeTrackedLabels() {
+  if (!tracked_labels_.empty()) {
+    // This array should be sorted, as assembly is generated in linearized order. It isn't
+    // technically required, but GetAdjustedPosition() used in AdjustLabelPosition() can take
+    // advantage of it. So ensure that it's actually the case.
+    DCHECK(std::is_sorted(
+        tracked_labels_.begin(),
+        tracked_labels_.end(),
+        [](const Label* lhs, const Label* rhs) { return lhs->Position() < rhs->Position(); }));
+
+    Label* last_label = nullptr;  // Track duplicates, we must not adjust twice.
+    for (Label* label : tracked_labels_) {
+      DCHECK_NE(label, last_label);
+      AdjustLabelPosition(label);
+      last_label = label;
+    }
+  }
+}
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index d59bc6b..c52a5a9 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -20,12 +20,17 @@
 #include <type_traits>
 #include <vector>
 
+#include "base/arena_allocator.h"
+#include "base/arena_containers.h"
 #include "base/bit_utils.h"
+#include "base/enums.h"
 #include "base/logging.h"
+#include "base/stl_util.h"
 #include "base/value_object.h"
 #include "constants_arm.h"
 #include "utils/arm/managed_register_arm.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 #include "offsets.h"
 
 namespace art {
@@ -77,6 +82,45 @@
   DISALLOW_COPY_AND_ASSIGN(Literal);
 };
 
+// Jump table: table of labels emitted after the literals. Similar to literals.
+class JumpTable {
+ public:
+  explicit JumpTable(std::vector<Label*>&& labels)
+      : label_(), anchor_label_(), labels_(std::move(labels)) {
+  }
+
+  uint32_t GetSize() const {
+    return static_cast<uint32_t>(labels_.size()) * sizeof(uint32_t);
+  }
+
+  const std::vector<Label*>& GetData() const {
+    return labels_;
+  }
+
+  Label* GetLabel() {
+    return &label_;
+  }
+
+  const Label* GetLabel() const {
+    return &label_;
+  }
+
+  Label* GetAnchorLabel() {
+    return &anchor_label_;
+  }
+
+  const Label* GetAnchorLabel() const {
+    return &anchor_label_;
+  }
+
+ private:
+  Label label_;
+  Label anchor_label_;
+  std::vector<Label*> labels_;
+
+  DISALLOW_COPY_AND_ASSIGN(JumpTable);
+};
+
 class ShifterOperand {
  public:
   ShifterOperand() : type_(kUnknown), rm_(kNoRegister), rs_(kNoRegister),
@@ -461,6 +505,8 @@
 
   virtual void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
+  // Note: CMN updates flags based on addition of its operands. Do not confuse
+  // the "N" suffix with bitwise inversion performed by MVN.
   virtual void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
   virtual void orr(Register rd, Register rn, const ShifterOperand& so,
@@ -503,6 +549,9 @@
   virtual void movw(Register rd, uint16_t imm16, Condition cond = AL) = 0;
   virtual void movt(Register rd, uint16_t imm16, Condition cond = AL) = 0;
   virtual void rbit(Register rd, Register rm, Condition cond = AL) = 0;
+  virtual void rev(Register rd, Register rm, Condition cond = AL) = 0;
+  virtual void rev16(Register rd, Register rm, Condition cond = AL) = 0;
+  virtual void revsh(Register rd, Register rm, Condition cond = AL) = 0;
 
   // Multiply instructions.
   virtual void mul(Register rd, Register rn, Register rm, Condition cond = AL) = 0;
@@ -624,10 +673,15 @@
   virtual void vcmpdz(DRegister dd, Condition cond = AL) = 0;
   virtual void vmstat(Condition cond = AL) = 0;  // VMRS APSR_nzcv, FPSCR
 
+  virtual void vcntd(DRegister dd, DRegister dm) = 0;
+  virtual void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) = 0;
+
   virtual void vpushs(SRegister reg, int nregs, Condition cond = AL) = 0;
   virtual void vpushd(DRegister reg, int nregs, Condition cond = AL) = 0;
   virtual void vpops(SRegister reg, int nregs, Condition cond = AL) = 0;
   virtual void vpopd(DRegister reg, int nregs, Condition cond = AL) = 0;
+  virtual void vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) = 0;
+  virtual void vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) = 0;
 
   // Branch instructions.
   virtual void b(Label* label, Condition cond = AL) = 0;
@@ -658,10 +712,9 @@
   // Most of these are pure virtual as they need to be implemented per instruction set.
 
   // Create a new literal with a given value.
-  // NOTE: Force the template parameter to be explicitly specified. In the absence of
-  // std::omit_from_type_deduction<T> or std::identity<T>, use std::decay<T>.
+  // NOTE: Force the template parameter to be explicitly specified.
   template <typename T>
-  Literal* NewLiteral(typename std::decay<T>::type value) {
+  Literal* NewLiteral(typename Identity<T>::type value) {
     static_assert(std::is_integral<T>::value, "T must be an integral type.");
     return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
   }
@@ -685,6 +738,8 @@
     AddConstant(rd, rd, value, cond, set_cc);
   }
 
+  virtual void CmpConstant(Register rn, int32_t value, Condition cond = AL) = 0;
+
   // Load and Store. May clobber IP.
   virtual void LoadImmediate(Register rd, int32_t value, Condition cond = AL) = 0;
   void LoadSImmediate(SRegister sd, float value, Condition cond = AL) {
@@ -703,32 +758,7 @@
     }
   }
 
-  void LoadDImmediate(DRegister sd, double value, Condition cond = AL) {
-    if (!vmovd(sd, value, cond)) {
-      uint64_t int_value = bit_cast<uint64_t, double>(value);
-      if (int_value == bit_cast<uint64_t, double>(0.0)) {
-        // 0.0 is quite common, so we special case it by loading
-        // 2.0 in `sd` and then substracting it.
-        bool success = vmovd(sd, 2.0, cond);
-        CHECK(success);
-        vsubd(sd, sd, sd, cond);
-      } else {
-        if (sd < 16) {
-          SRegister low = static_cast<SRegister>(sd << 1);
-          SRegister high = static_cast<SRegister>(low + 1);
-          LoadSImmediate(low, bit_cast<float, uint32_t>(Low32Bits(int_value)), cond);
-          if (High32Bits(int_value) == Low32Bits(int_value)) {
-            vmovs(high, low);
-          } else {
-            LoadSImmediate(high, bit_cast<float, uint32_t>(High32Bits(int_value)), cond);
-          }
-        } else {
-          LOG(FATAL) << "Unimplemented loading of double into a D register "
-                     << "that cannot be split into two S registers";
-        }
-      }
-    }
-  }
+  virtual void LoadDImmediate(DRegister dd, double value, Condition cond = AL) = 0;
 
   virtual void MarkExceptionHandler(Label* label) = 0;
   virtual void LoadFromOffset(LoadOperandType type,
@@ -837,7 +867,15 @@
                                      Register rn,
                                      Opcode opcode,
                                      uint32_t immediate,
+                                     SetCc set_cc,
                                      ShifterOperand* shifter_op) = 0;
+  bool ShifterOperandCanHold(Register rd,
+                             Register rn,
+                             Opcode opcode,
+                             uint32_t immediate,
+                             ShifterOperand* shifter_op) {
+    return ShifterOperandCanHold(rd, rn, opcode, immediate, kCcDontCare, shifter_op);
+  }
 
   virtual bool ShifterOperandCanAlwaysHold(uint32_t immediate) = 0;
 
@@ -846,121 +884,6 @@
   virtual void CompareAndBranchIfZero(Register r, Label* label) = 0;
   virtual void CompareAndBranchIfNonZero(Register r, Label* label) = 0;
 
-  //
-  // Overridden common assembler high-level functionality
-  //
-
-  // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
-
-  // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
-    OVERRIDE;
-
-  void IncreaseFrameSize(size_t adjust) OVERRIDE;
-  void DecreaseFrameSize(size_t adjust) OVERRIDE;
-
-  // Store routines
-  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
-  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
-  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
-
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
-
-  void StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm, ManagedRegister scratch)
-      OVERRIDE;
-
-  void StoreStackOffsetToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister scratch) OVERRIDE;
-
-  void StoreStackPointerToThread32(ThreadOffset<4> thr_offs) OVERRIDE;
-
-  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
-                     ManagedRegister scratch) OVERRIDE;
-
-  // Load routines
-  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
-
-  void LoadFromThread32(ManagedRegister dest, ThreadOffset<4> src, size_t size) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-               bool unpoison_reference) OVERRIDE;
-
-  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
-
-  void LoadRawPtrFromThread32(ManagedRegister dest, ThreadOffset<4> offs) OVERRIDE;
-
-  // Copying routines
-  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
-
-  void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset<4> thr_offs,
-                              ManagedRegister scratch) OVERRIDE;
-
-  void CopyRawPtrToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
-      OVERRIDE;
-
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  // Sign extension
-  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Zero extension
-  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Exploit fast access in managed code to Thread::Current()
-  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
-
-  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the handle scope entry to see if the value is
-  // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
-
-  // Set up out_off to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                              ManagedRegister scratch, bool null_allowed) OVERRIDE;
-
-  // src holds a handle scope entry (Object**) load this into dst
-  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
-
-  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
-  // know that src may not be null.
-  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
-  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
-
-  // Call to address held at [base+offset]
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread32(ThreadOffset<4> offset, ManagedRegister scratch) OVERRIDE;
-
-  // Generate code to check if Thread::Current()->exception_ is non-null
-  // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
-
   static uint32_t ModifiedImmediate(uint32_t value);
 
   static bool IsLowRegister(Register r) {
@@ -985,6 +908,12 @@
     // reg = -reg.
     rsb(reg, reg, ShifterOperand(0));
   }
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(Register reg) {
+    if (kPoisonHeapReferences) {
+      PoisonHeapReference(reg);
+    }
+  }
   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
   void MaybeUnpoisonHeapReference(Register reg) {
     if (kPoisonHeapReferences) {
@@ -996,23 +925,46 @@
     b(label);
   }
 
+  // Jump table support. This is split into three functions:
+  //
+  // * CreateJumpTable creates the internal metadata to track the jump targets, and emits code to
+  // load the base address of the jump table.
+  //
+  // * EmitJumpTableDispatch emits the code to actually jump, assuming that the right table value
+  // has been loaded into a register already.
+  //
+  // * FinalizeTables emits the jump table into the literal pool. This can only be called after the
+  // labels for the jump targets have been finalized.
+
+  // Create a jump table for the given labels that will be emitted when finalizing. Create a load
+  // sequence (or placeholder) that stores the base address into the given register. When the table
+  // is emitted, offsets will be relative to the location EmitJumpTableDispatch was called on (the
+  // anchor).
+  virtual JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) = 0;
+
+  // Emit the jump-table jump, assuming that the right value was loaded into displacement_reg.
+  virtual void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) = 0;
+
+  // Bind a Label that needs to be updated by the assembler in FinalizeCode() if its position
+  // changes due to branch/literal fixup.
+  void BindTrackedLabel(Label* label) {
+    Bind(label);
+    tracked_labels_.push_back(label);
+  }
+
  protected:
+  explicit ArmAssembler(ArenaAllocator* arena)
+      : Assembler(arena), tracked_labels_(arena->Adapter(kArenaAllocAssembler)) {}
+
   // Returns whether or not the given register is used for passing parameters.
   static int RegisterCompare(const Register* reg1, const Register* reg2) {
     return *reg1 - *reg2;
   }
-};
 
-// Slowpath entered when Thread::Current()->_exception is non-null
-class ArmExceptionSlowPath FINAL : public SlowPath {
- public:
-  ArmExceptionSlowPath(ArmManagedRegister scratch, size_t stack_adjust)
-      : scratch_(scratch), stack_adjust_(stack_adjust) {
-  }
-  void Emit(Assembler *sp_asm) OVERRIDE;
- private:
-  const ArmManagedRegister scratch_;
-  const size_t stack_adjust_;
+  void FinalizeTrackedLabels();
+
+  // Tracked labels. Use a vector, as we need to sort before adjusting.
+  ArenaVector<Label*> tracked_labels_;
 };
 
 }  // namespace arm
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 6e7c828..b8eb60c 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -57,6 +57,7 @@
                                            Register rn ATTRIBUTE_UNUSED,
                                            Opcode opcode ATTRIBUTE_UNUSED,
                                            uint32_t immediate,
+                                           SetCc set_cc ATTRIBUTE_UNUSED,
                                            ShifterOperand* shifter_op) {
   return ShifterOperandCanHoldArm32(immediate, shifter_op);
 }
@@ -749,6 +750,35 @@
 }
 
 
+void Arm32Assembler::EmitMiscellaneous(Condition cond, uint8_t op1,
+                                       uint8_t op2, uint32_t a_part,
+                                       uint32_t rest) {
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                      B26 | B25 | B23 |
+                      (op1 << 20) |
+                      (a_part << 16) |
+                      (op2 << 5) |
+                      B4 |
+                      rest;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitReverseBytes(Register rd, Register rm, Condition cond,
+                                      uint8_t op1, uint8_t op2) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  CHECK_NE(rd, PC);
+  CHECK_NE(rm, PC);
+
+  int32_t encoding = (static_cast<int32_t>(rd) << kRdShift) |
+                     (0b1111 << 8) |
+                     static_cast<int32_t>(rm);
+  EmitMiscellaneous(cond, op1, op2, 0b1111, encoding);
+}
+
+
 void Arm32Assembler::rbit(Register rd, Register rm, Condition cond) {
   CHECK_NE(rd, kNoRegister);
   CHECK_NE(rm, kNoRegister);
@@ -763,6 +793,21 @@
 }
 
 
+void Arm32Assembler::rev(Register rd, Register rm, Condition cond) {
+  EmitReverseBytes(rd, rm, cond, 0b011, 0b001);
+}
+
+
+void Arm32Assembler::rev16(Register rd, Register rm, Condition cond) {
+  EmitReverseBytes(rd, rm, cond, 0b011, 0b101);
+}
+
+
+void Arm32Assembler::revsh(Register rd, Register rm, Condition cond) {
+  EmitReverseBytes(rd, rm, cond, 0b111, 0b101);
+}
+
+
 void Arm32Assembler::EmitMulOp(Condition cond, int32_t opcode,
                                Register rd, Register rn,
                                Register rm, Register rs) {
@@ -1061,6 +1106,18 @@
 }
 
 
+void Arm32Assembler::vldmiad(Register, DRegister, int, Condition) {
+  LOG(FATAL) << "Unimplemented.";
+  UNREACHABLE();
+}
+
+
+void Arm32Assembler::vstmiad(Register, DRegister, int, Condition) {
+  LOG(FATAL) << "Unimplemented.";
+  UNREACHABLE();
+}
+
+
 void Arm32Assembler::EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond) {
   CHECK_NE(cond, kNoCondition);
   CHECK_GT(nregs, 0);
@@ -1219,6 +1276,31 @@
   Emit(encoding);
 }
 
+void Arm32Assembler::vcntd(DRegister dd, DRegister dm) {
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B10 | B8) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit(encoding);
+}
+
+void Arm32Assembler::vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) {
+  CHECK(size == 8 || size == 16 || size == 32) << size;
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<uint32_t>(size >> 4) & 0x3) * B18) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B9) |
+    (is_unsigned ? B7 : 0) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit(encoding);
+}
+
 
 void Arm32Assembler::svc(uint32_t imm24) {
   CHECK(IsUint<24>(imm24)) << imm24;
@@ -1385,6 +1467,21 @@
   }
 }
 
+void Arm32Assembler::CmpConstant(Register rn, int32_t value, Condition cond) {
+  ShifterOperand shifter_op;
+  if (ShifterOperandCanHoldArm32(value, &shifter_op)) {
+    cmp(rn, shifter_op, cond);
+  } else if (ShifterOperandCanHoldArm32(~value, &shifter_op)) {
+    cmn(rn, shifter_op, cond);
+  } else {
+    movw(IP, Low16Bits(value), cond);
+    uint16_t value_high = High16Bits(value);
+    if (value_high != 0) {
+      movt(IP, value_high, cond);
+    }
+    cmp(rn, ShifterOperand(IP), cond);
+  }
+}
 
 void Arm32Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) {
   ShifterOperand shifter_op;
@@ -1401,6 +1498,34 @@
   }
 }
 
+void Arm32Assembler::LoadDImmediate(DRegister dd, double value, Condition cond) {
+  if (!vmovd(dd, value, cond)) {
+    uint64_t int_value = bit_cast<uint64_t, double>(value);
+    if (int_value == bit_cast<uint64_t, double>(0.0)) {
+      // 0.0 is quite common, so we special case it by loading
+      // 2.0 in `dd` and then subtracting it.
+      bool success = vmovd(dd, 2.0, cond);
+      CHECK(success);
+      vsubd(dd, dd, dd, cond);
+    } else {
+      if (dd < 16) {
+        // Note: Depending on the particular CPU, this may cause register
+        // forwarding hazard, negatively impacting the performance.
+        SRegister low = static_cast<SRegister>(dd << 1);
+        SRegister high = static_cast<SRegister>(low + 1);
+        LoadSImmediate(low, bit_cast<float, uint32_t>(Low32Bits(int_value)), cond);
+        if (High32Bits(int_value) == Low32Bits(int_value)) {
+          vmovs(high, low);
+        } else {
+          LoadSImmediate(high, bit_cast<float, uint32_t>(High32Bits(int_value)), cond);
+        }
+      } else {
+        LOG(FATAL) << "Unimplemented loading of double into a D register "
+                   << "that cannot be split into two S registers";
+      }
+    }
+  }
+}
 
 // Implementation note: this method must emit at most one instruction when
 // Address::CanHoldLoadOffsetArm.
@@ -1551,12 +1676,6 @@
 }
 
 
-void Arm32Assembler::MemoryBarrier(ManagedRegister mscratch) {
-  CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12);
-  dmb(SY);
-}
-
-
 void Arm32Assembler::dmb(DmbOptions flavor) {
   int32_t encoding = 0xf57ff05f;  // dmb
   Emit(encoding | flavor);
@@ -1584,6 +1703,23 @@
   b(label, NE);
 }
 
+JumpTable* Arm32Assembler::CreateJumpTable(std::vector<Label*>&& labels ATTRIBUTE_UNUSED,
+                                           Register base_reg ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "CreateJumpTable is not supported on ARM32";
+  UNREACHABLE();
+}
+
+void Arm32Assembler::EmitJumpTableDispatch(JumpTable* jump_table ATTRIBUTE_UNUSED,
+                                           Register displacement_reg ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "EmitJumpTableDispatch is not supported on ARM32";
+  UNREACHABLE();
+}
+
+void Arm32Assembler::FinalizeCode() {
+  ArmAssembler::FinalizeCode();
+  // Currently the arm32 assembler does not support fixups, and thus no tracking. We must not call
+  // FinalizeTrackedLabels(), which would lead to an abort.
+}
 
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 4646538..0cb6b17 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -30,8 +30,7 @@
 
 class Arm32Assembler FINAL : public ArmAssembler {
  public:
-  Arm32Assembler() {
-  }
+  explicit Arm32Assembler(ArenaAllocator* arena) : ArmAssembler(arena) {}
   virtual ~Arm32Assembler() {}
 
   bool IsThumb() const OVERRIDE {
@@ -91,6 +90,9 @@
   void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
   void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
   void rbit(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+  void rev(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+  void rev16(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+  void revsh(Register rd, Register rm, Condition cond = AL) OVERRIDE;
 
   // Multiply instructions.
   void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
@@ -203,10 +205,15 @@
   void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
   void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
 
+  void vcntd(DRegister dd, DRegister dm) OVERRIDE;
+  void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE;
+
   void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
 
   // Branch instructions.
   void b(Label* label, Condition cond = AL) OVERRIDE;
@@ -261,8 +268,11 @@
   void AddConstant(Register rd, Register rn, int32_t value,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
+  void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE;
+
   // Load and Store. May clobber IP.
   void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
+  void LoadDImmediate(DRegister dd, double value, Condition cond = AL) OVERRIDE;
   void MarkExceptionHandler(Label* label) OVERRIDE;
   void LoadFromOffset(LoadOperandType type,
                       Register reg,
@@ -295,7 +305,9 @@
                              Register rn,
                              Opcode opcode,
                              uint32_t immediate,
+                             SetCc set_cc,
                              ShifterOperand* shifter_op) OVERRIDE;
+  using ArmAssembler::ShifterOperandCanHold;  // Don't hide the non-virtual override.
 
   bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE;
 
@@ -306,7 +318,10 @@
   void Emit(int32_t value);
   void Bind(Label* label) OVERRIDE;
 
-  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
+  JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
+  void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE;
+
+  void FinalizeCode() OVERRIDE;
 
  private:
   void EmitType01(Condition cond,
@@ -379,11 +394,14 @@
 
   void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond);
 
+  void EmitMiscellaneous(Condition cond, uint8_t op1, uint8_t op2,
+                         uint32_t a_part, uint32_t rest);
+  void EmitReverseBytes(Register rd, Register rm, Condition cond,
+                        uint8_t op1, uint8_t op2);
+
   void EmitBranch(Condition cond, Label* label, bool link);
   static int32_t EncodeBranchOffset(int offset, int32_t inst);
   static int DecodeBranchOffset(int32_t inst);
-  int32_t EncodeTstOffset(int offset, int32_t inst);
-  int DecodeTstOffset(int32_t inst);
   bool ShifterOperandCanHoldArm32(uint32_t immediate, ShifterOperand* shifter_op);
 };
 
diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc
index 4380596..b214062 100644
--- a/compiler/utils/arm/assembler_arm32_test.cc
+++ b/compiler/utils/arm/assembler_arm32_test.cc
@@ -887,4 +887,55 @@
   T3Helper(&arm::Arm32Assembler::rbit, true, "rbit{cond} {reg1}, {reg2}", "rbit");
 }
 
+TEST_F(AssemblerArm32Test, rev) {
+  T3Helper(&arm::Arm32Assembler::rev, true, "rev{cond} {reg1}, {reg2}", "rev");
+}
+
+TEST_F(AssemblerArm32Test, rev16) {
+  T3Helper(&arm::Arm32Assembler::rev16, true, "rev16{cond} {reg1}, {reg2}", "rev16");
+}
+
+TEST_F(AssemblerArm32Test, revsh) {
+  T3Helper(&arm::Arm32Assembler::revsh, true, "revsh{cond} {reg1}, {reg2}", "revsh");
+}
+
+TEST_F(AssemblerArm32Test, vcnt) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  GetAssembler()->vcntd(arm::D0, arm::D1);
+  GetAssembler()->vcntd(arm::D19, arm::D20);
+  GetAssembler()->vcntd(arm::D0, arm::D9);
+  GetAssembler()->vcntd(arm::D16, arm::D20);
+
+  std::string expected =
+      "vcnt.8 d0, d1\n"
+      "vcnt.8 d19, d20\n"
+      "vcnt.8 d0, d9\n"
+      "vcnt.8 d16, d20\n";
+
+  DriverStr(expected, "vcnt");
+}
+
+TEST_F(AssemblerArm32Test, vpaddl) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  // Different data types (signed and unsigned) are also tested.
+  GetAssembler()->vpaddld(arm::D0, arm::D0, 8, true);
+  GetAssembler()->vpaddld(arm::D20, arm::D20, 8, false);
+  GetAssembler()->vpaddld(arm::D0, arm::D20, 16, false);
+  GetAssembler()->vpaddld(arm::D20, arm::D0, 32, true);
+
+  std::string expected =
+      "vpaddl.u8 d0, d0\n"
+      "vpaddl.s8 d20, d20\n"
+      "vpaddl.s16 d0, d20\n"
+      "vpaddl.u32 d20, d0\n";
+
+  DriverStr(expected, "vpaddl");
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index cc87856..ebdfc98 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <type_traits>
+
 #include "assembler_thumb2.h"
 
 #include "base/bit_utils.h"
@@ -25,6 +27,30 @@
 namespace art {
 namespace arm {
 
+template <typename Function>
+void Thumb2Assembler::Fixup::ForExpandableDependencies(Thumb2Assembler* assembler, Function fn) {
+  static_assert(
+      std::is_same<typename std::result_of<Function(FixupId, FixupId)>::type, void>::value,
+      "Incorrect signature for argument `fn`: expected (FixupId, FixupId) -> void");
+  Fixup* fixups = assembler->fixups_.data();
+  for (FixupId fixup_id = 0u, end_id = assembler->fixups_.size(); fixup_id != end_id; ++fixup_id) {
+    uint32_t target = fixups[fixup_id].target_;
+    if (target > fixups[fixup_id].location_) {
+      for (FixupId id = fixup_id + 1u; id != end_id && fixups[id].location_ < target; ++id) {
+        if (fixups[id].CanExpand()) {
+          fn(id, fixup_id);
+        }
+      }
+    } else {
+      for (FixupId id = fixup_id; id != 0u && fixups[id - 1u].location_ >= target; --id) {
+        if (fixups[id - 1u].CanExpand()) {
+          fn(id - 1u, fixup_id);
+        }
+      }
+    }
+  }
+}
+
 void Thumb2Assembler::Fixup::PrepareDependents(Thumb2Assembler* assembler) {
   // For each Fixup, it's easy to find the Fixups that it depends on as they are either
   // the following or the preceding Fixups until we find the target. However, for fixup
@@ -34,24 +60,16 @@
   // index and count. (Instead of having a per-fixup vector.)
 
   // Count the number of dependents of each Fixup.
-  const FixupId end_id = assembler->fixups_.size();
   Fixup* fixups = assembler->fixups_.data();
-  for (FixupId fixup_id = 0u; fixup_id != end_id; ++fixup_id) {
-    uint32_t target = fixups[fixup_id].target_;
-    if (target > fixups[fixup_id].location_) {
-      for (FixupId id = fixup_id + 1u; id != end_id && fixups[id].location_ < target; ++id) {
-        fixups[id].dependents_count_ += 1u;
-      }
-    } else {
-      for (FixupId id = fixup_id; id != 0u && fixups[id - 1u].location_ >= target; --id) {
-        fixups[id - 1u].dependents_count_ += 1u;
-      }
-    }
-  }
+  ForExpandableDependencies(
+      assembler,
+      [fixups](FixupId dependency, FixupId dependent ATTRIBUTE_UNUSED) {
+        fixups[dependency].dependents_count_ += 1u;
+      });
   // Assign index ranges in fixup_dependents_ to individual fixups. Record the end of the
   // range in dependents_start_, we shall later decrement it as we fill in fixup_dependents_.
   uint32_t number_of_dependents = 0u;
-  for (FixupId fixup_id = 0u; fixup_id != end_id; ++fixup_id) {
+  for (FixupId fixup_id = 0u, end_id = assembler->fixups_.size(); fixup_id != end_id; ++fixup_id) {
     number_of_dependents += fixups[fixup_id].dependents_count_;
     fixups[fixup_id].dependents_start_ = number_of_dependents;
   }
@@ -59,22 +77,14 @@
     return;
   }
   // Create and fill in the fixup_dependents_.
-  assembler->fixup_dependents_.reset(new FixupId[number_of_dependents]);
-  FixupId* dependents = assembler->fixup_dependents_.get();
-  for (FixupId fixup_id = 0u; fixup_id != end_id; ++fixup_id) {
-    uint32_t target = fixups[fixup_id].target_;
-    if (target > fixups[fixup_id].location_) {
-      for (FixupId id = fixup_id + 1u; id != end_id && fixups[id].location_ < target; ++id) {
-        fixups[id].dependents_start_ -= 1u;
-        dependents[fixups[id].dependents_start_] = fixup_id;
-      }
-    } else {
-      for (FixupId id = fixup_id; id != 0u && fixups[id - 1u].location_ >= target; --id) {
-        fixups[id - 1u].dependents_start_ -= 1u;
-        dependents[fixups[id - 1u].dependents_start_] = fixup_id;
-      }
-    }
-  }
+  assembler->fixup_dependents_.resize(number_of_dependents);
+  FixupId* dependents = assembler->fixup_dependents_.data();
+  ForExpandableDependencies(
+      assembler,
+      [fixups, dependents](FixupId dependency, FixupId dependent) {
+        fixups[dependency].dependents_start_ -= 1u;
+        dependents[fixups[dependency].dependents_start_] = dependent;
+      });
 }
 
 void Thumb2Assembler::BindLabel(Label* label, uint32_t bound_pc) {
@@ -92,7 +102,7 @@
   label->BindTo(bound_pc);
 }
 
-void Thumb2Assembler::BindLiterals() {
+uint32_t Thumb2Assembler::BindLiterals() {
   // We don't add the padding here, that's done only after adjusting the Fixup sizes.
   uint32_t code_size = buffer_.Size();
   for (Literal& lit : literals_) {
@@ -100,12 +110,22 @@
     BindLabel(label, code_size);
     code_size += lit.GetSize();
   }
+  return code_size;
+}
+
+void Thumb2Assembler::BindJumpTables(uint32_t code_size) {
+  for (JumpTable& table : jump_tables_) {
+    Label* label = table.GetLabel();
+    BindLabel(label, code_size);
+    code_size += table.GetSize();
+  }
 }
 
 void Thumb2Assembler::AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size,
                                           std::deque<FixupId>* fixups_to_recalculate) {
   uint32_t adjustment = fixup->AdjustSizeIfNeeded(*current_code_size);
   if (adjustment != 0u) {
+    DCHECK(fixup->CanExpand());
     *current_code_size += adjustment;
     for (FixupId dependent_id : fixup->Dependents(*this)) {
       Fixup* dependent = GetFixup(dependent_id);
@@ -144,7 +164,7 @@
       AdjustFixupIfNeeded(fixup, &current_code_size, &fixups_to_recalculate);
     } while (!fixups_to_recalculate.empty());
 
-    if ((current_code_size & 2) != 0 && !literals_.empty()) {
+    if ((current_code_size & 2) != 0 && (!literals_.empty() || !jump_tables_.empty())) {
       // If we need to add padding before literals, this may just push some out of range,
       // so recalculate all load literals. This makes up for the fact that we don't mark
       // load literal as a dependency of all previous Fixups even though it actually is.
@@ -173,6 +193,13 @@
       label->Reinitialize();
       label->BindTo(old_position + literals_adjustment);
     }
+    for (JumpTable& table : jump_tables_) {
+      Label* label = table.GetLabel();
+      DCHECK(label->IsBound());
+      int old_position = label->Position();
+      label->Reinitialize();
+      label->BindTo(old_position + literals_adjustment);
+    }
   }
 
   return current_code_size;
@@ -229,6 +256,72 @@
   }
 }
 
+void Thumb2Assembler::EmitJumpTables() {
+  if (!jump_tables_.empty()) {
+    // Jump tables require 4 byte alignment. (We don't support byte and half-word jump tables.)
+    uint32_t code_size = buffer_.Size();
+    DCHECK_ALIGNED(code_size, 2);
+    if ((code_size & 2u) != 0u) {
+      Emit16(0);
+    }
+    for (JumpTable& table : jump_tables_) {
+      // Bulk ensure capacity, as this may be large.
+      size_t orig_size = buffer_.Size();
+      size_t required_capacity = orig_size + table.GetSize();
+      if (required_capacity > buffer_.Capacity()) {
+        buffer_.ExtendCapacity(required_capacity);
+      }
+#ifndef NDEBUG
+      buffer_.has_ensured_capacity_ = true;
+#endif
+
+      DCHECK_EQ(static_cast<size_t>(table.GetLabel()->Position()), buffer_.Size());
+      int32_t anchor_position = table.GetAnchorLabel()->Position() + 4;
+
+      for (Label* target : table.GetData()) {
+        // Ensure that the label was tracked, so that it will have the right position.
+        DCHECK(std::find(tracked_labels_.begin(), tracked_labels_.end(), target) !=
+                   tracked_labels_.end());
+
+        int32_t offset = target->Position() - anchor_position;
+        buffer_.Emit<int32_t>(offset);
+      }
+
+#ifndef NDEBUG
+      buffer_.has_ensured_capacity_ = false;
+#endif
+      size_t new_size = buffer_.Size();
+      DCHECK_LE(new_size - orig_size, table.GetSize());
+    }
+  }
+}
+
+void Thumb2Assembler::PatchCFI() {
+  if (cfi().NumberOfDelayedAdvancePCs() == 0u) {
+    return;
+  }
+
+  typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC;
+  const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC();
+  const std::vector<uint8_t>& old_stream = data.first;
+  const std::vector<DelayedAdvancePC>& advances = data.second;
+
+  // Refill our data buffer with patched opcodes.
+  cfi().ReserveCFIStream(old_stream.size() + advances.size() + 16);
+  size_t stream_pos = 0;
+  for (const DelayedAdvancePC& advance : advances) {
+    DCHECK_GE(advance.stream_pos, stream_pos);
+    // Copy old data up to the point where advance was issued.
+    cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos);
+    stream_pos = advance.stream_pos;
+    // Insert the advance command with its final offset.
+    size_t final_pc = GetAdjustedPosition(advance.pc);
+    cfi().AdvancePC(final_pc);
+  }
+  // Copy the final segment if any.
+  cfi().AppendRawData(old_stream, stream_pos, old_stream.size());
+}
+
 inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) {
   DCHECK_ALIGNED(offset, 2);
   int16_t encoding = B15 | B14;
@@ -382,12 +475,35 @@
   return B31 | B30 | B29 | B28 | B27 | B23 | B22 | B20 | (rn << 16) | (rt << 12) | offset;
 }
 
+inline int16_t Thumb2Assembler::AdrEncoding16(Register rd, int32_t offset) {
+  DCHECK(IsUint<10>(offset));
+  DCHECK(IsAligned<4>(offset));
+  DCHECK(!IsHighRegister(rd));
+  return B15 | B13 | (rd << 8) | (offset >> 2);
+}
+
+inline int32_t Thumb2Assembler::AdrEncoding32(Register rd, int32_t offset) {
+  DCHECK(IsUint<12>(offset));
+  // Bit     26: offset[11]
+  // Bits 14-12: offset[10-8]
+  // Bits   7-0: offset[7-0]
+  int32_t immediate_mask =
+      ((offset & (1 << 11)) << (26 - 11)) |
+      ((offset & (7 << 8)) << (12 - 8)) |
+      (offset & 0xFF);
+  return B31 | B30 | B29 | B28 | B25 | B19 | B18 | B17 | B16 | (rd << 8) | immediate_mask;
+}
+
 void Thumb2Assembler::FinalizeCode() {
   ArmAssembler::FinalizeCode();
-  BindLiterals();
+  uint32_t size_after_literals = BindLiterals();
+  BindJumpTables(size_after_literals);
   uint32_t adjusted_code_size = AdjustFixups();
   EmitFixups(adjusted_code_size);
   EmitLiterals();
+  FinalizeTrackedLabels();
+  EmitJumpTables();
+  PatchCFI();
 }
 
 bool Thumb2Assembler::ShifterOperandCanAlwaysHold(uint32_t immediate) {
@@ -398,6 +514,7 @@
                                             Register rn ATTRIBUTE_UNUSED,
                                             Opcode opcode,
                                             uint32_t immediate,
+                                            SetCc set_cc,
                                             ShifterOperand* shifter_op) {
   shifter_op->type_ = ShifterOperand::kImmediate;
   shifter_op->immed_ = immediate;
@@ -406,7 +523,8 @@
   switch (opcode) {
     case ADD:
     case SUB:
-      if (immediate < (1 << 12)) {    // Less than (or equal to) 12 bits can always be done.
+      // Less than (or equal to) 12 bits can be done if we don't need to set condition codes.
+      if (immediate < (1 << 12) && set_cc != kCcSet) {
         return true;
       }
       return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
@@ -1137,7 +1255,10 @@
       // The only thumb1 instructions with a register and an immediate are ADD and SUB
       // with a 3-bit immediate, and RSB with zero immediate.
       if (opcode == ADD || opcode == SUB) {
-        if (!IsUint<3>(so.GetImmediate())) {
+        if ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet) {
+          return true;  // Cannot match "setflags".
+        }
+        if (!IsUint<3>(so.GetImmediate()) && !IsUint<3>(-so.GetImmediate())) {
           return true;
         }
       } else {
@@ -1147,8 +1268,12 @@
       // ADD, SUB, CMP and MOV may be thumb1 only if the immediate is 8 bits.
       if (!(opcode == ADD || opcode == SUB || opcode == MOV || opcode == CMP)) {
         return true;
+      } else if (opcode != CMP && ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet)) {
+        return true;  // Cannot match "setflags" for ADD, SUB or MOV.
       } else {
-        if (!IsUint<8>(so.GetImmediate())) {
+        // For ADD and SUB allow also negative 8-bit immediate as we will emit the oposite opcode.
+        if (!IsUint<8>(so.GetImmediate()) &&
+            (opcode == MOV || opcode == CMP || !IsUint<8>(-so.GetImmediate()))) {
           return true;
         }
       }
@@ -1247,7 +1372,8 @@
   int32_t encoding = 0;
   if (so.IsImmediate()) {
     // Check special cases.
-    if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12))) {
+    if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12)) &&
+        /* Prefer T3 encoding to T4. */ !ShifterOperandCanAlwaysHold(so.GetImmediate())) {
       if (set_cc != kCcSet) {
         if (opcode == SUB) {
           thumb_opcode = 5U;
@@ -1499,12 +1625,18 @@
   uint8_t rn_shift = 3;
   uint8_t immediate_shift = 0;
   bool use_immediate = false;
-  uint32_t immediate = 0;  // Should be at most 9 bits but keep the full immediate for CHECKs.
+  uint32_t immediate = 0;  // Should be at most 10 bits but keep the full immediate for CHECKs.
   uint8_t thumb_opcode;
 
   if (so.IsImmediate()) {
     use_immediate = true;
     immediate = so.GetImmediate();
+    if (!IsUint<10>(immediate)) {
+      // Flip ADD/SUB.
+      opcode = (opcode == ADD) ? SUB : ADD;
+      immediate = -immediate;
+      DCHECK(IsUint<10>(immediate));  // More stringent checks below.
+    }
   }
 
   switch (opcode) {
@@ -1541,7 +1673,7 @@
           dp_opcode = 2U /* 0b10 */;
           thumb_opcode = 3U /* 0b11 */;
           opcode_shift = 12;
-          CHECK_LT(immediate, (1u << 9));
+          CHECK(IsUint<9>(immediate));
           CHECK_ALIGNED(immediate, 4);
 
           // Remove rd and rn from instruction by orring it with immed and clearing bits.
@@ -1555,7 +1687,7 @@
           dp_opcode = 2U /* 0b10 */;
           thumb_opcode = 5U /* 0b101 */;
           opcode_shift = 11;
-          CHECK_LT(immediate, (1u << 10));
+          CHECK(IsUint<10>(immediate));
           CHECK_ALIGNED(immediate, 4);
 
           // Remove rn from instruction.
@@ -1565,11 +1697,13 @@
           immediate >>= 2;
         } else if (rn != rd) {
           // Must use T1.
+          CHECK(IsUint<3>(immediate));
           opcode_shift = 9;
           thumb_opcode = 14U /* 0b01110 */;
           immediate_shift = 6;
         } else {
           // T2 encoding.
+          CHECK(IsUint<8>(immediate));
           opcode_shift = 11;
           thumb_opcode = 6U /* 0b110 */;
           rd_shift = 8;
@@ -1599,7 +1733,7 @@
           dp_opcode = 2U /* 0b10 */;
           thumb_opcode = 0x61 /* 0b1100001 */;
           opcode_shift = 7;
-          CHECK_LT(immediate, (1u << 9));
+          CHECK(IsUint<9>(immediate));
           CHECK_ALIGNED(immediate, 4);
 
           // Remove rd and rn from instruction by orring it with immed and clearing bits.
@@ -1610,11 +1744,13 @@
           immediate >>= 2;
         } else if (rn != rd) {
           // Must use T1.
+          CHECK(IsUint<3>(immediate));
           opcode_shift = 9;
           thumb_opcode = 15U /* 0b01111 */;
           immediate_shift = 6;
         } else {
           // T2 encoding.
+          CHECK(IsUint<8>(immediate));
           opcode_shift = 11;
           thumb_opcode = 7U /* 0b111 */;
           rd_shift = 8;
@@ -1770,9 +1906,18 @@
     case kLiteralFar:
       return 14u;
 
+    case kLiteralAddr1KiB:
+      return 2u;
+    case kLiteralAddr4KiB:
+      return 4u;
+    case kLiteralAddr64KiB:
+      return 6u;
+    case kLiteralAddrFar:
+      return 10u;
+
     case kLongOrFPLiteral1KiB:
       return 4u;
-    case kLongOrFPLiteral256KiB:
+    case kLongOrFPLiteral64KiB:
       return 10u;
     case kLongOrFPLiteralFar:
       return 14u;
@@ -1831,6 +1976,8 @@
     case kLiteral1KiB:
     case kLiteral4KiB:
     case kLongOrFPLiteral1KiB:
+    case kLiteralAddr1KiB:
+    case kLiteralAddr4KiB:
       DCHECK(diff >= 0 || (GetSize() == kLiteral1KiB && diff == -2));
       diff += LiteralPoolPaddingSize(current_code_size);
       // Load literal instructions round down the PC+4 to a multiple of 4, so if the PC
@@ -1842,13 +1989,15 @@
       break;
     case kLiteral1MiB:
     case kLiteral64KiB:
-    case kLongOrFPLiteral256KiB:
+    case kLongOrFPLiteral64KiB:
+    case kLiteralAddr64KiB:
       DCHECK_GE(diff, 4);  // The target must be at least 4 bytes after the ADD rX, PC.
       diff -= 4;        // One extra 32-bit MOV.
       diff += LiteralPoolPaddingSize(current_code_size);
       break;
     case kLiteralFar:
     case kLongOrFPLiteralFar:
+    case kLiteralAddrFar:
       DCHECK_GE(diff, 8);  // The target must be at least 4 bytes after the ADD rX, PC.
       diff -= 8;        // Extra MOVW+MOVT; both 32-bit.
       diff += LiteralPoolPaddingSize(current_code_size);
@@ -1929,14 +2078,37 @@
       // This encoding can reach any target.
       break;
 
+    case kLiteralAddr1KiB:
+      DCHECK(!IsHighRegister(rn_));
+      if (IsUint<10>(GetOffset(current_code_size))) {
+        break;
+      }
+      current_code_size += IncreaseSize(kLiteralAddr4KiB);
+      FALLTHROUGH_INTENDED;
+    case kLiteralAddr4KiB:
+      if (IsUint<12>(GetOffset(current_code_size))) {
+        break;
+      }
+      current_code_size += IncreaseSize(kLiteralAddr64KiB);
+      FALLTHROUGH_INTENDED;
+    case kLiteralAddr64KiB:
+      if (IsUint<16>(GetOffset(current_code_size))) {
+        break;
+      }
+      current_code_size += IncreaseSize(kLiteralAddrFar);
+      FALLTHROUGH_INTENDED;
+    case kLiteralAddrFar:
+      // This encoding can reach any target.
+      break;
+
     case kLongOrFPLiteral1KiB:
       if (IsUint<10>(GetOffset(current_code_size))) {
         break;
       }
-      current_code_size += IncreaseSize(kLongOrFPLiteral256KiB);
+      current_code_size += IncreaseSize(kLongOrFPLiteral64KiB);
       FALLTHROUGH_INTENDED;
-    case kLongOrFPLiteral256KiB:
-      if (IsUint<18>(GetOffset(current_code_size))) {
+    case kLongOrFPLiteral64KiB:
+      if (IsUint<16>(GetOffset(current_code_size))) {
         break;
       }
       current_code_size += IncreaseSize(kLongOrFPLiteralFar);
@@ -2055,17 +2227,52 @@
       break;
     }
 
+    case kLiteralAddr1KiB: {
+      DCHECK(type_ == kLoadLiteralAddr);
+      int16_t encoding = AdrEncoding16(rn_, GetOffset(code_size));
+      buffer->Store<int16_t>(location_, encoding);
+      break;
+    }
+    case kLiteralAddr4KiB: {
+      DCHECK(type_ == kLoadLiteralAddr);
+      int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size));
+      buffer->Store<int16_t>(location_, encoding >> 16);
+      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
+      break;
+    }
+    case kLiteralAddr64KiB: {
+      DCHECK(type_ == kLoadLiteralAddr);
+      int32_t mov_encoding = MovwEncoding32(rn_, GetOffset(code_size));
+      int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
+      buffer->Store<int16_t>(location_, mov_encoding >> 16);
+      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
+      buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
+      break;
+    }
+    case kLiteralAddrFar: {
+      DCHECK(type_ == kLoadLiteralAddr);
+      int32_t offset = GetOffset(code_size);
+      int32_t movw_encoding = MovwEncoding32(rn_, offset & 0xffff);
+      int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff);
+      int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
+      buffer->Store<int16_t>(location_, movw_encoding >> 16);
+      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
+      buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16);
+      buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
+      buffer->Store<int16_t>(location_ + 8u, add_pc_encoding);
+      break;
+    }
+
     case kLongOrFPLiteral1KiB: {
       int32_t encoding = LoadWideOrFpEncoding(PC, GetOffset(code_size));  // DCHECKs type_.
       buffer->Store<int16_t>(location_, encoding >> 16);
       buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
       break;
     }
-    case kLongOrFPLiteral256KiB: {
-      int32_t offset = GetOffset(code_size);
-      int32_t mov_encoding = MovModImmEncoding32(IP, offset & ~0x3ff);
+    case kLongOrFPLiteral64KiB: {
+      int32_t mov_encoding = MovwEncoding32(IP, GetOffset(code_size));
       int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC);
-      int32_t ldr_encoding = LoadWideOrFpEncoding(IP, offset & 0x3ff);    // DCHECKs type_.
+      int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0u);    // DCHECKs type_.
       buffer->Store<int16_t>(location_, mov_encoding >> 16);
       buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
       buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
@@ -2118,7 +2325,7 @@
   }
 
   Register rn = ad.GetRegister();
-  if (IsHighRegister(rn) && rn != SP && rn != PC) {
+  if (IsHighRegister(rn) && (byte || half || (rn != SP && rn != PC))) {
     must_be_32bit = true;
   }
 
@@ -2130,24 +2337,24 @@
     // Immediate offset
     int32_t offset = ad.GetOffset();
 
-    // The 16 bit SP relative instruction can only have a 10 bit offset.
-    if (rn == SP && offset >= (1 << 10)) {
-      must_be_32bit = true;
-    }
-
     if (byte) {
       // 5 bit offset, no shift.
-      if (offset >= (1 << 5)) {
+      if ((offset & ~0x1f) != 0) {
         must_be_32bit = true;
       }
     } else if (half) {
-      // 6 bit offset, shifted by 1.
-      if (offset >= (1 << 6)) {
+      // 5 bit offset, shifted by 1.
+      if ((offset & ~(0x1f << 1)) != 0) {
+        must_be_32bit = true;
+      }
+    } else if (rn == SP || rn == PC) {
+      // The 16 bit SP/PC relative instruction can only have an (imm8 << 2) offset.
+      if ((offset & ~(0xff << 2)) != 0) {
         must_be_32bit = true;
       }
     } else {
-      // 7 bit offset, shifted by 2.
-      if (offset >= (1 << 7)) {
+      // 5 bit offset, shifted by 2.
+      if ((offset & ~(0x1f << 2)) != 0) {
         must_be_32bit = true;
       }
     }
@@ -2163,7 +2370,7 @@
     } else {
       // 16 bit thumb1.
       uint8_t opA = 0;
-      bool sp_relative = false;
+      bool sp_or_pc_relative = false;
 
       if (byte) {
         opA = 7U /* 0b0111 */;
@@ -2172,7 +2379,10 @@
       } else {
         if (rn == SP) {
           opA = 9U /* 0b1001 */;
-          sp_relative = true;
+          sp_or_pc_relative = true;
+        } else if (rn == PC) {
+          opA = 4U;
+          sp_or_pc_relative = true;
         } else {
           opA = 6U /* 0b0110 */;
         }
@@ -2181,7 +2391,7 @@
           (load ? B11 : 0);
 
       CHECK_GE(offset, 0);
-      if (sp_relative) {
+      if (sp_or_pc_relative) {
         // SP relative, 10 bit offset.
         CHECK_LT(offset, (1 << 10));
         CHECK_ALIGNED(offset, 4);
@@ -2249,6 +2459,9 @@
         } else if (!byte) {
           encoding |= B22;
         }
+        if (load && is_signed && (byte || half)) {
+          encoding |= B24;
+        }
         Emit32(encoding);
       } else {
         // 16 bit register offset.
@@ -2349,9 +2562,19 @@
       }
     } else {
       branch_type = Fixup::kUnconditional;             // B.
+      // The T2 encoding offset is `SignExtend(imm11:'0', 32)` and there is a PC adjustment of 4.
+      static constexpr size_t kMaxT2BackwardDistance = (1u << 11) - 4u;
+      if (!use32bit && label->IsBound() && pc - label->Position() > kMaxT2BackwardDistance) {
+        use32bit = true;
+      }
     }
   } else {
     branch_type = Fixup::kConditional;                 // B<cond>.
+    // The T1 encoding offset is `SignExtend(imm8:'0', 32)` and there is a PC adjustment of 4.
+    static constexpr size_t kMaxT1BackwardDistance = (1u << 8) - 4u;
+    if (!use32bit && label->IsBound() && pc - label->Position() > kMaxT1BackwardDistance) {
+      use32bit = true;
+    }
   }
 
   Fixup::Size size = use32bit ? Fixup::kBranch32Bit : Fixup::kBranch16Bit;
@@ -2375,49 +2598,54 @@
 }
 
 
+void Thumb2Assembler::Emit32Miscellaneous(uint8_t op1,
+                                          uint8_t op2,
+                                          uint32_t rest_encoding) {
+  int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B23 |
+      op1 << 20 |
+      0xf << 12 |
+      B7 |
+      op2 << 4 |
+      rest_encoding;
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::Emit16Miscellaneous(uint32_t rest_encoding) {
+  int16_t encoding = B15 | B13 | B12 |
+      rest_encoding;
+  Emit16(encoding);
+}
+
 void Thumb2Assembler::clz(Register rd, Register rm, Condition cond) {
   CHECK_NE(rd, kNoRegister);
   CHECK_NE(rm, kNoRegister);
   CheckCondition(cond);
   CHECK_NE(rd, PC);
   CHECK_NE(rm, PC);
-  int32_t encoding = B31 | B30 | B29 | B28 | B27 |
-      B25 | B23 | B21 | B20 |
+  int32_t encoding =
       static_cast<uint32_t>(rm) << 16 |
-      0xf << 12 |
       static_cast<uint32_t>(rd) << 8 |
-      B7 |
       static_cast<uint32_t>(rm);
-  Emit32(encoding);
+  Emit32Miscellaneous(0b11, 0b00, encoding);
 }
 
 
 void Thumb2Assembler::movw(Register rd, uint16_t imm16, Condition cond) {
   CheckCondition(cond);
-  bool must_be_32bit = force_32bit_;
-  if (IsHighRegister(rd)|| imm16 >= 256u) {
-    must_be_32bit = true;
-  }
-
-  if (must_be_32bit) {
-    // Use encoding T3.
-    uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */;
-    uint32_t i = (imm16 >> 11) & 1U /* 0b1 */;
-    uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */;
-    uint32_t imm8 = imm16 & 0xff;
-    int32_t encoding = B31 | B30 | B29 | B28 |
-                    B25 | B22 |
-                    static_cast<uint32_t>(rd) << 8 |
-                    i << 26 |
-                    imm4 << 16 |
-                    imm3 << 12 |
-                    imm8;
-    Emit32(encoding);
-  } else {
-    int16_t encoding = B13 | static_cast<uint16_t>(rd) << 8 |
-                imm16;
-    Emit16(encoding);
-  }
+  // Always 32 bits, encoding T3. (Other encondings are called MOV, not MOVW.)
+  uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */;
+  uint32_t i = (imm16 >> 11) & 1U /* 0b1 */;
+  uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */;
+  uint32_t imm8 = imm16 & 0xff;
+  int32_t encoding = B31 | B30 | B29 | B28 |
+                  B25 | B22 |
+                  static_cast<uint32_t>(rd) << 8 |
+                  i << 26 |
+                  imm4 << 16 |
+                  imm3 << 12 |
+                  imm8;
+  Emit32(encoding);
 }
 
 
@@ -2447,14 +2675,55 @@
   CHECK_NE(rm, PC);
   CHECK_NE(rd, SP);
   CHECK_NE(rm, SP);
-  int32_t encoding = B31 | B30 | B29 | B28 | B27 |
-      B25 | B23 | B20 |
+  int32_t encoding =
       static_cast<uint32_t>(rm) << 16 |
-      0xf << 12 |
       static_cast<uint32_t>(rd) << 8 |
-      B7 | B5 |
       static_cast<uint32_t>(rm);
-  Emit32(encoding);
+
+  Emit32Miscellaneous(0b01, 0b10, encoding);
+}
+
+
+void Thumb2Assembler::EmitReverseBytes(Register rd, Register rm,
+                                       uint32_t op) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(rd, PC);
+  CHECK_NE(rm, PC);
+  CHECK_NE(rd, SP);
+  CHECK_NE(rm, SP);
+
+  if (!IsHighRegister(rd) && !IsHighRegister(rm) && !force_32bit_) {
+    uint16_t t1_op = B11 | B9 | (op << 6);
+    int16_t encoding = t1_op |
+        static_cast<uint16_t>(rm) << 3 |
+        static_cast<uint16_t>(rd);
+    Emit16Miscellaneous(encoding);
+  } else {
+    int32_t encoding =
+        static_cast<uint32_t>(rm) << 16 |
+        static_cast<uint32_t>(rd) << 8 |
+        static_cast<uint32_t>(rm);
+    Emit32Miscellaneous(0b01, op, encoding);
+  }
+}
+
+
+void Thumb2Assembler::rev(Register rd, Register rm, Condition cond) {
+  CheckCondition(cond);
+  EmitReverseBytes(rd, rm, 0b00);
+}
+
+
+void Thumb2Assembler::rev16(Register rd, Register rm, Condition cond) {
+  CheckCondition(cond);
+  EmitReverseBytes(rd, rm, 0b01);
+}
+
+
+void Thumb2Assembler::revsh(Register rd, Register rm, Condition cond) {
+  CheckCondition(cond);
+  EmitReverseBytes(rd, rm, 0b11);
 }
 
 
@@ -2751,9 +3020,49 @@
 }
 
 
+void Thumb2Assembler::vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond) {
+  int32_t rest = B23;
+  EmitVLdmOrStm(rest,
+                static_cast<uint32_t>(reg),
+                nregs,
+                base_reg,
+                /*is_load*/ true,
+                /*dbl*/ true,
+                cond);
+}
+
+
+void Thumb2Assembler::vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond) {
+  int32_t rest = B23;
+  EmitVLdmOrStm(rest,
+                static_cast<uint32_t>(reg),
+                nregs,
+                base_reg,
+                /*is_load*/ false,
+                /*dbl*/ true,
+                cond);
+}
+
+
 void Thumb2Assembler::EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond) {
+  int32_t rest = B21 | (push ? B24 : B23);
+  EmitVLdmOrStm(rest, reg, nregs, SP, /*is_load*/ !push, dbl, cond);
+}
+
+
+void Thumb2Assembler::EmitVLdmOrStm(int32_t rest,
+                                    uint32_t reg,
+                                    int nregs,
+                                    Register rn,
+                                    bool is_load,
+                                    bool dbl,
+                                    Condition cond) {
   CheckCondition(cond);
 
+  DCHECK_GT(nregs, 0);
+  DCHECK_LE(reg + nregs, 32u);
+  DCHECK(!dbl || (nregs <= 16));
+
   uint32_t D;
   uint32_t Vd;
   if (dbl) {
@@ -2765,14 +3074,17 @@
     D = reg & 1;
     Vd = (reg >> 1) & 15U /* 0b1111 */;
   }
-  int32_t encoding = B27 | B26 | B21 | B19 | B18 | B16 |
-                    B11 | B9 |
-        (dbl ? B8 : 0) |
-        (push ? B24 : (B23 | B20)) |
-        14U /* 0b1110 */ << 28 |
-        nregs << (dbl ? 1 : 0) |
-        D << 22 |
-        Vd << 12;
+
+  int32_t encoding = rest |
+                     14U /* 0b1110 */ << 28 |
+                     B27 | B26 | B11 | B9 |
+                     (is_load ? B20 : 0) |
+                     static_cast<int16_t>(rn) << 16 |
+                     D << 22 |
+                     Vd << 12 |
+                     (dbl ? B8 : 0) |
+                     nregs << (dbl ? 1 : 0);
+
   Emit32(encoding);
 }
 
@@ -2853,6 +3165,30 @@
   Emit32(encoding);
 }
 
+void Thumb2Assembler::vcntd(DRegister dd, DRegister dm) {
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B27 | B26 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B10 | B8) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit32(encoding);
+}
+
+void Thumb2Assembler::vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) {
+  CHECK(size == 8 || size == 16 || size == 32) << size;
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B27 | B26 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<uint32_t>(size >> 4) & 0x3) * B18) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B9) |
+    (is_unsigned ? B7 : 0) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit32(encoding);
+}
 
 void Thumb2Assembler::svc(uint32_t imm8) {
   CHECK(IsUint<8>(imm8)) << imm8;
@@ -3046,7 +3382,7 @@
 
 void Thumb2Assembler::Rrx(Register rd, Register rm, Condition cond, SetCc set_cc) {
   CheckCondition(cond);
-  EmitShift(rd, rm, RRX, rm, cond, set_cc);
+  EmitShift(rd, rm, RRX, 0, cond, set_cc);
 }
 
 
@@ -3237,30 +3573,64 @@
   // positive values and sub for negatives ones, which would slightly improve
   // the readability of generated code for some constants.
   ShifterOperand shifter_op;
-  if (ShifterOperandCanHold(rd, rn, ADD, value, &shifter_op)) {
+  if (ShifterOperandCanHold(rd, rn, ADD, value, set_cc, &shifter_op)) {
     add(rd, rn, shifter_op, cond, set_cc);
-  } else if (ShifterOperandCanHold(rd, rn, SUB, -value, &shifter_op)) {
+  } else if (ShifterOperandCanHold(rd, rn, SUB, -value, set_cc, &shifter_op)) {
     sub(rd, rn, shifter_op, cond, set_cc);
   } else {
     CHECK(rn != IP);
-    if (ShifterOperandCanHold(rd, rn, MVN, ~value, &shifter_op)) {
+    // If rd != rn, use rd as temp. This alows 16-bit ADD/SUB in more situations than using IP.
+    Register temp = (rd != rn) ? rd : IP;
+    if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~value, kCcKeep, &shifter_op)) {
+      mvn(temp, shifter_op, cond, kCcKeep);
+      add(rd, rn, ShifterOperand(temp), cond, set_cc);
+    } else if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~(-value), kCcKeep, &shifter_op)) {
+      mvn(temp, shifter_op, cond, kCcKeep);
+      sub(rd, rn, ShifterOperand(temp), cond, set_cc);
+    } else if (High16Bits(-value) == 0) {
+      movw(temp, Low16Bits(-value), cond);
+      sub(rd, rn, ShifterOperand(temp), cond, set_cc);
+    } else {
+      movw(temp, Low16Bits(value), cond);
+      uint16_t value_high = High16Bits(value);
+      if (value_high != 0) {
+        movt(temp, value_high, cond);
+      }
+      add(rd, rn, ShifterOperand(temp), cond, set_cc);
+    }
+  }
+}
+
+void Thumb2Assembler::CmpConstant(Register rn, int32_t value, Condition cond) {
+  // We prefer to select the shorter code sequence rather than using plain cmp and cmn
+  // which would slightly improve the readability of generated code for some constants.
+  ShifterOperand shifter_op;
+  if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, kCcSet, &shifter_op)) {
+    cmp(rn, shifter_op, cond);
+  } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, -value, kCcSet, &shifter_op)) {
+    cmn(rn, shifter_op, cond);
+  } else {
+    CHECK(rn != IP);
+    if (ShifterOperandCanHold(IP, kNoRegister, MVN, ~value, kCcKeep, &shifter_op)) {
       mvn(IP, shifter_op, cond, kCcKeep);
-      add(rd, rn, ShifterOperand(IP), cond, set_cc);
-    } else if (ShifterOperandCanHold(rd, rn, MVN, ~(-value), &shifter_op)) {
+      cmp(rn, ShifterOperand(IP), cond);
+    } else if (ShifterOperandCanHold(IP, kNoRegister, MVN, ~(-value), kCcKeep, &shifter_op)) {
       mvn(IP, shifter_op, cond, kCcKeep);
-      sub(rd, rn, ShifterOperand(IP), cond, set_cc);
+      cmn(rn, ShifterOperand(IP), cond);
+    } else if (High16Bits(-value) == 0) {
+      movw(IP, Low16Bits(-value), cond);
+      cmn(rn, ShifterOperand(IP), cond);
     } else {
       movw(IP, Low16Bits(value), cond);
       uint16_t value_high = High16Bits(value);
       if (value_high != 0) {
         movt(IP, value_high, cond);
       }
-      add(rd, rn, ShifterOperand(IP), cond, set_cc);
+      cmp(rn, ShifterOperand(IP), cond);
     }
   }
 }
 
-
 void Thumb2Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) {
   ShifterOperand shifter_op;
   if (ShifterOperandCanHold(rd, R0, MOV, value, &shifter_op)) {
@@ -3276,6 +3646,91 @@
   }
 }
 
+void Thumb2Assembler::LoadDImmediate(DRegister dd, double value, Condition cond) {
+  if (!vmovd(dd, value, cond)) {
+    uint64_t int_value = bit_cast<uint64_t, double>(value);
+    if (int_value == bit_cast<uint64_t, double>(0.0)) {
+      // 0.0 is quite common, so we special case it by loading
+      // 2.0 in `dd` and then subtracting it.
+      bool success = vmovd(dd, 2.0, cond);
+      CHECK(success);
+      vsubd(dd, dd, dd, cond);
+    } else {
+      Literal* literal = literal64_dedupe_map_.GetOrCreate(
+          int_value,
+          [this, int_value]() { return NewLiteral<uint64_t>(int_value); });
+      LoadLiteral(dd, literal);
+    }
+  }
+}
+
+int32_t Thumb2Assembler::GetAllowedLoadOffsetBits(LoadOperandType type) {
+  switch (type) {
+    case kLoadSignedByte:
+    case kLoadSignedHalfword:
+    case kLoadUnsignedHalfword:
+    case kLoadUnsignedByte:
+    case kLoadWord:
+      // We can encode imm12 offset.
+      return 0xfffu;
+    case kLoadSWord:
+    case kLoadDWord:
+    case kLoadWordPair:
+      // We can encode imm8:'00' offset.
+      return 0xff << 2;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
+int32_t Thumb2Assembler::GetAllowedStoreOffsetBits(StoreOperandType type) {
+  switch (type) {
+    case kStoreHalfword:
+    case kStoreByte:
+    case kStoreWord:
+      // We can encode imm12 offset.
+      return 0xfff;
+    case kStoreSWord:
+    case kStoreDWord:
+    case kStoreWordPair:
+      // We can encode imm8:'00' offset.
+      return 0xff << 2;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
+bool Thumb2Assembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
+                                              int32_t offset,
+                                              /*out*/ int32_t* add_to_base,
+                                              /*out*/ int32_t* offset_for_load_store) {
+  int32_t other_bits = offset & ~allowed_offset_bits;
+  if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) {
+    *add_to_base = offset & ~allowed_offset_bits;
+    *offset_for_load_store = offset & allowed_offset_bits;
+    return true;
+  }
+  return false;
+}
+
+int32_t Thumb2Assembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits,
+                                               Register temp,
+                                               Register base,
+                                               int32_t offset,
+                                               Condition cond) {
+  DCHECK_NE(offset & ~allowed_offset_bits, 0);
+  int32_t add_to_base, offset_for_load;
+  if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
+    AddConstant(temp, base, add_to_base, cond, kCcKeep);
+    return offset_for_load;
+  } else {
+    LoadImmediate(temp, offset, cond);
+    add(temp, temp, ShifterOperand(base), cond, kCcKeep);
+    return 0;
+  }
+}
 
 // Implementation note: this method must emit at most one instruction when
 // Address::CanHoldLoadOffsetThumb.
@@ -3286,12 +3741,26 @@
                                      Condition cond) {
   if (!Address::CanHoldLoadOffsetThumb(type, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
-    base = IP;
-    offset = 0;
+    // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks.
+    int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type);
+    DCHECK_NE(offset & ~allowed_offset_bits, 0);
+    int32_t add_to_base, offset_for_load;
+    if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
+      // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
+      AddConstant(reg, base, add_to_base, cond, kCcKeep);
+      base = reg;
+      offset = offset_for_load;
+    } else {
+      Register temp = (reg == base) ? IP : reg;
+      LoadImmediate(temp, offset, cond);
+      // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD.
+      // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
+      add(reg, reg, ShifterOperand((reg == base) ? IP : base), cond, kCcKeep);
+      base = reg;
+      offset = 0;
+    }
   }
-  CHECK(Address::CanHoldLoadOffsetThumb(type, offset));
+  DCHECK(Address::CanHoldLoadOffsetThumb(type, offset));
   switch (type) {
     case kLoadSignedByte:
       ldrsb(reg, Address(base, offset), cond);
@@ -3317,7 +3786,6 @@
   }
 }
 
-
 // Implementation note: this method must emit at most one instruction when
 // Address::CanHoldLoadOffsetThumb, as expected by JIT::GuardedLoadFromOffset.
 void Thumb2Assembler::LoadSFromOffset(SRegister reg,
@@ -3326,12 +3794,10 @@
                                       Condition cond) {
   if (!Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
+    offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadSWord), IP, base, offset, cond);
     base = IP;
-    offset = 0;
   }
-  CHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset));
+  DCHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset));
   vldrs(reg, Address(base, offset), cond);
 }
 
@@ -3344,12 +3810,10 @@
                                       Condition cond) {
   if (!Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
+    offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadDWord), IP, base, offset, cond);
     base = IP;
-    offset = 0;
   }
-  CHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset));
+  DCHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset));
   vldrd(reg, Address(base, offset), cond);
 }
 
@@ -3380,12 +3844,12 @@
         offset += kRegisterSize;
       }
     }
-    LoadImmediate(tmp_reg, offset, cond);
-    add(tmp_reg, tmp_reg, ShifterOperand(base), AL);
+    // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset()
+    // and in the "unsplittable" path get rid of the "add" by using the store indexed instead.
+    offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset, cond);
     base = tmp_reg;
-    offset = 0;
   }
-  CHECK(Address::CanHoldStoreOffsetThumb(type, offset));
+  DCHECK(Address::CanHoldStoreOffsetThumb(type, offset));
   switch (type) {
     case kStoreByte:
       strb(reg, Address(base, offset), cond);
@@ -3418,12 +3882,10 @@
                                      Condition cond) {
   if (!Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
+    offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreSWord), IP, base, offset, cond);
     base = IP;
-    offset = 0;
   }
-  CHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset));
+  DCHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset));
   vstrs(reg, Address(base, offset), cond);
 }
 
@@ -3436,22 +3898,14 @@
                                      Condition cond) {
   if (!Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
+    offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreDWord), IP, base, offset, cond);
     base = IP;
-    offset = 0;
   }
-  CHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset));
+  DCHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset));
   vstrd(reg, Address(base, offset), cond);
 }
 
 
-void Thumb2Assembler::MemoryBarrier(ManagedRegister mscratch) {
-  CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12);
-  dmb(SY);
-}
-
-
 void Thumb2Assembler::dmb(DmbOptions flavor) {
   int32_t encoding = 0xf3bf8f50;  // dmb in T1 encoding.
   Emit32(encoding | flavor);
@@ -3476,5 +3930,39 @@
     b(label, NE);
   }
 }
+
+JumpTable* Thumb2Assembler::CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) {
+  jump_tables_.emplace_back(std::move(labels));
+  JumpTable* table = &jump_tables_.back();
+  DCHECK(!table->GetLabel()->IsBound());
+
+  bool use32bit = IsForced32Bit() || IsHighRegister(base_reg);
+  uint32_t location = buffer_.Size();
+  Fixup::Size size = use32bit ? Fixup::kLiteralAddr4KiB : Fixup::kLiteralAddr1KiB;
+  FixupId fixup_id = AddFixup(Fixup::LoadLiteralAddress(location, base_reg, size));
+  Emit16(static_cast<uint16_t>(table->GetLabel()->position_));
+  table->GetLabel()->LinkTo(fixup_id);
+  if (use32bit) {
+    Emit16(0);
+  }
+  DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size());
+
+  return table;
+}
+
+void Thumb2Assembler::EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) {
+  CHECK(!IsForced32Bit()) << "Forced 32-bit dispatch not implemented yet";
+  // 32-bit ADD doesn't support PC as an input, so we need a two-instruction sequence:
+  //   SUB ip, ip, #0
+  //   ADD pc, ip, reg
+  // TODO: Implement.
+
+  // The anchor's position needs to be fixed up before we can compute offsets - so make it a tracked
+  // label.
+  BindTrackedLabel(jump_table->GetAnchorLabel());
+
+  add(PC, PC, ShifterOperand(displacement_reg));
+}
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 055b137..13f3bec 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -18,8 +18,10 @@
 #define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
 
 #include <deque>
+#include <utility>
 #include <vector>
 
+#include "base/arena_containers.h"
 #include "base/logging.h"
 #include "constants_arm.h"
 #include "utils/arm/managed_register_arm.h"
@@ -32,17 +34,21 @@
 
 class Thumb2Assembler FINAL : public ArmAssembler {
  public:
-  explicit Thumb2Assembler(bool can_relocate_branches = true)
-      : can_relocate_branches_(can_relocate_branches),
+  explicit Thumb2Assembler(ArenaAllocator* arena, bool can_relocate_branches = true)
+      : ArmAssembler(arena),
+        can_relocate_branches_(can_relocate_branches),
         force_32bit_(false),
         it_cond_index_(kNoItCondition),
         next_condition_(AL),
-        fixups_(),
-        fixup_dependents_(),
-        literals_(),
+        fixups_(arena->Adapter(kArenaAllocAssembler)),
+        fixup_dependents_(arena->Adapter(kArenaAllocAssembler)),
+        literals_(arena->Adapter(kArenaAllocAssembler)),
+        literal64_dedupe_map_(std::less<uint64_t>(), arena->Adapter(kArenaAllocAssembler)),
+        jump_tables_(arena->Adapter(kArenaAllocAssembler)),
         last_position_adjustment_(0u),
         last_old_position_(0u),
         last_fixup_id_(0u) {
+    cfi().DelayEmittingAdvancePCs();
   }
 
   virtual ~Thumb2Assembler() {
@@ -115,6 +121,9 @@
   void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
   void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
   void rbit(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+  void rev(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+  void rev16(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+  void revsh(Register rd, Register rm, Condition cond = AL) OVERRIDE;
 
   // Multiply instructions.
   void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
@@ -242,10 +251,15 @@
   void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
   void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
 
+  void vcntd(DRegister dd, DRegister dm) OVERRIDE;
+  void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE;
+
   void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
 
   // Branch instructions.
   void b(Label* label, Condition cond = AL);
@@ -304,8 +318,11 @@
   void AddConstant(Register rd, Register rn, int32_t value,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
+  void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE;
+
   // Load and Store. May clobber IP.
   void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
+  void LoadDImmediate(DRegister dd, double value, Condition cond = AL) OVERRIDE;
   void MarkExceptionHandler(Label* label) OVERRIDE;
   void LoadFromOffset(LoadOperandType type,
                       Register reg,
@@ -338,7 +355,9 @@
                              Register rn,
                              Opcode opcode,
                              uint32_t immediate,
+                             SetCc set_cc,
                              ShifterOperand* shifter_op) OVERRIDE;
+  using ArmAssembler::ShifterOperandCanHold;  // Don't hide the non-virtual override.
 
   bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE;
 
@@ -351,13 +370,17 @@
   void Emit16(int16_t value);     // Emit a 16 bit instruction in little endian format.
   void Bind(Label* label) OVERRIDE;
 
-  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
-
   // Force the assembler to generate 32 bit instructions.
   void Force32Bit() {
     force_32bit_ = true;
   }
 
+  // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This
+  // will generate a fixup.
+  JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
+  // Emit an ADD PC, X to dispatch a jump-table jump. This will generate a fixup.
+  void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE;
+
  private:
   typedef uint16_t FixupId;
 
@@ -399,6 +422,7 @@
       kCompareAndBranchXZero,     // cbz/cbnz.
       kLoadLiteralNarrow,         // Load narrrow integer literal.
       kLoadLiteralWide,           // Load wide integer literal.
+      kLoadLiteralAddr,           // Load address of literal (used for jump table).
       kLoadFPLiteralSingle,       // Load FP literal single.
       kLoadFPLiteralDouble,       // Load FP literal double.
     };
@@ -429,11 +453,21 @@
       // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc + LDR rX, [rX]; any offset; 14 bytes.
       kLiteralFar,
 
+      // Load literal base addr.
+      // ADR rX, label; X < 8; 8 bit immediate, shifted to 10 bit. 2 bytes.
+      kLiteralAddr1KiB,
+      // ADR rX, label; 4KiB offset. 4 bytes.
+      kLiteralAddr4KiB,
+      // MOV rX, imm16 + ADD rX, pc; 64KiB offset. 6 bytes.
+      kLiteralAddr64KiB,
+      // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc; any offset; 10 bytes.
+      kLiteralAddrFar,
+
       // Load long or FP literal variants.
       // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes.
       kLongOrFPLiteral1KiB,
-      // MOV ip, modimm + ADD ip, pc + VLDR s/dX, [IP, #imm8*4]; up to 256KiB offset; 10 bytes.
-      kLongOrFPLiteral256KiB,
+      // MOV ip, imm16 + ADD ip, pc + VLDR s/dX, [IP, #0]; up to 64KiB offset; 10 bytes.
+      kLongOrFPLiteral64KiB,
       // MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes.
       kLongOrFPLiteralFar,
     };
@@ -457,7 +491,7 @@
     }
 
     // Load narrow literal.
-    static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size = kLiteral1KiB) {
+    static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) {
       DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB ||
              size == kLiteral1MiB || size == kLiteralFar);
       DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
@@ -468,7 +502,7 @@
     // Load wide literal.
     static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2,
                                  Size size = kLongOrFPLiteral1KiB) {
-      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
+      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
              size == kLongOrFPLiteralFar);
       DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
       return Fixup(rt, rt2, kNoSRegister, kNoDRegister,
@@ -478,7 +512,7 @@
     // Load FP single literal.
     static Fixup LoadSingleLiteral(uint32_t location, SRegister sd,
                                    Size size = kLongOrFPLiteral1KiB) {
-      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
+      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
              size == kLongOrFPLiteralFar);
       return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister,
                    AL, kLoadFPLiteralSingle, size, location);
@@ -487,12 +521,20 @@
     // Load FP double literal.
     static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd,
                                    Size size = kLongOrFPLiteral1KiB) {
-      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
+      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
              size == kLongOrFPLiteralFar);
       return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd,
                    AL, kLoadFPLiteralDouble, size, location);
     }
 
+    static Fixup LoadLiteralAddress(uint32_t location, Register rt, Size size) {
+      DCHECK(size == kLiteralAddr1KiB || size == kLiteralAddr4KiB || size == kLiteralAddr64KiB ||
+             size == kLiteralAddrFar);
+      DCHECK(!IsHighRegister(rt) || size != kLiteralAddr1KiB);
+      return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister,
+                   AL, kLoadLiteralAddr, size, location);
+    }
+
     Type GetType() const {
       return type_;
     }
@@ -501,6 +543,20 @@
       return GetType() >= kLoadLiteralNarrow;
     }
 
+    // Returns whether the Fixup can expand from the original size.
+    bool CanExpand() const {
+      switch (GetOriginalSize()) {
+        case kBranch32Bit:
+        case kCbxz48Bit:
+        case kLiteralFar:
+        case kLiteralAddrFar:
+        case kLongOrFPLiteralFar:
+          return false;
+        default:
+          return true;
+      }
+    }
+
     Size GetOriginalSize() const {
       return original_size_;
     }
@@ -524,9 +580,9 @@
     // Prepare the assembler->fixup_dependents_ and each Fixup's dependents_start_/count_.
     static void PrepareDependents(Thumb2Assembler* assembler);
 
-    ArrayRef<FixupId> Dependents(const Thumb2Assembler& assembler) const {
-      return ArrayRef<FixupId>(assembler.fixup_dependents_.get() + dependents_start_,
-                               dependents_count_);
+    ArrayRef<const FixupId> Dependents(const Thumb2Assembler& assembler) const {
+      return ArrayRef<const FixupId>(assembler.fixup_dependents_).SubArray(dependents_start_,
+                                                                           dependents_count_);
     }
 
     // Resolve a branch when the target is known.
@@ -574,6 +630,7 @@
           dependents_count_(0u),
           dependents_start_(0u) {
     }
+
     static size_t SizeInBytes(Size size);
 
     // The size of padding added before the literal pool.
@@ -586,6 +643,9 @@
 
     int32_t LoadWideOrFpEncoding(Register rbase, int32_t offset) const;
 
+    template <typename Function>
+    static void ForExpandableDependencies(Thumb2Assembler* assembler, Function fn);
+
     static constexpr uint32_t kUnresolved = 0xffffffff;     // Value for target_ for unresolved.
 
     const Register rn_;   // Rn for cbnz/cbz, Rt for literal loads.
@@ -613,6 +673,17 @@
                           Register rd,
                           const ShifterOperand& so);
 
+  // Emit a single 32 bit miscellaneous instruction.
+  void Emit32Miscellaneous(uint8_t op1,
+                           uint8_t op2,
+                           uint32_t rest_encoding);
+
+  // Emit reverse byte instructions: rev, rev16, revsh.
+  void EmitReverseBytes(Register rd, Register rm, uint32_t op);
+
+  // Emit a single 16 bit miscellaneous instruction.
+  void Emit16Miscellaneous(uint32_t rest_encoding);
+
   // Must the instruction be 32 bits or can it possibly be encoded
   // in 16 bits?
   bool Is32BitDataProcessing(Condition cond,
@@ -679,6 +750,14 @@
                   SRegister sn,
                   SRegister sm);
 
+  void EmitVLdmOrStm(int32_t rest,
+                     uint32_t reg,
+                     int nregs,
+                     Register rn,
+                     bool is_load,
+                     bool dbl,
+                     Condition cond);
+
   void EmitVFPddd(Condition cond,
                   int32_t opcode,
                   DRegister dd,
@@ -700,13 +779,23 @@
   void EmitBranch(Condition cond, Label* label, bool link, bool x);
   static int32_t EncodeBranchOffset(int32_t offset, int32_t inst);
   static int DecodeBranchOffset(int32_t inst);
-  int32_t EncodeTstOffset(int offset, int32_t inst);
-  int DecodeTstOffset(int32_t inst);
   void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount,
                  Condition cond = AL, SetCc set_cc = kCcDontCare);
   void EmitShift(Register rd, Register rn, Shift shift, Register rm,
                  Condition cond = AL, SetCc set_cc = kCcDontCare);
 
+  static int32_t GetAllowedLoadOffsetBits(LoadOperandType type);
+  static int32_t GetAllowedStoreOffsetBits(StoreOperandType type);
+  bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
+                               int32_t offset,
+                               /*out*/ int32_t* add_to_base,
+                               /*out*/ int32_t* offset_for_load_store);
+  int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits,
+                                Register temp,
+                                Register base,
+                                int32_t offset,
+                                Condition cond);
+
   // Whether the assembler can relocate branches. If false, unresolved branches will be
   // emitted on 32bits.
   bool can_relocate_branches_;
@@ -756,12 +845,15 @@
   }
 
   void BindLabel(Label* label, uint32_t bound_pc);
-  void BindLiterals();
+  uint32_t BindLiterals();
+  void BindJumpTables(uint32_t code_size);
   void AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size,
                            std::deque<FixupId>* fixups_to_recalculate);
   uint32_t AdjustFixups();
   void EmitFixups(uint32_t adjusted_code_size);
   void EmitLiterals();
+  void EmitJumpTables();
+  void PatchCFI();
 
   static int16_t BEncoding16(int32_t offset, Condition cond);
   static int32_t BEncoding32(int32_t offset, Condition cond);
@@ -778,13 +870,21 @@
   static int32_t VldrdEncoding32(DRegister dd, Register rn, int32_t offset);
   static int16_t LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset);
   static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset);
+  static int16_t AdrEncoding16(Register rd, int32_t offset);
+  static int32_t AdrEncoding32(Register rd, int32_t offset);
 
-  std::vector<Fixup> fixups_;
-  std::unique_ptr<FixupId[]> fixup_dependents_;
+  ArenaVector<Fixup> fixups_;
+  ArenaVector<FixupId> fixup_dependents_;
 
   // Use std::deque<> for literal labels to allow insertions at the end
   // without invalidating pointers and references to existing elements.
-  std::deque<Literal> literals_;
+  ArenaDeque<Literal> literals_;
+
+  // Deduplication map for 64-bit literals, used for LoadDImmediate().
+  ArenaSafeMap<uint64_t, Literal*> literal64_dedupe_map_;
+
+  // Jump table list.
+  ArenaDeque<JumpTable> jump_tables_;
 
   // Data for AdjustedPosition(), see the description there.
   uint32_t last_position_adjustment_;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 9c08ce0..d0799d6 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -17,6 +17,7 @@
 #include "assembler_thumb2.h"
 
 #include "base/stl_util.h"
+#include "base/stringprintf.h"
 #include "utils/assembler_test.h"
 
 namespace art {
@@ -242,7 +243,7 @@
 
   const char* expected =
       "subs r1, r0, #42\n"
-      "subw r1, r0, #42\n"
+      "sub.w r1, r0, #42\n"
       "subs r1, r0, r2, asr #31\n"
       "sub r1, r0, r2, asr #31\n";
   DriverStr(expected, "sub");
@@ -256,7 +257,7 @@
 
   const char* expected =
       "adds r1, r0, #42\n"
-      "addw r1, r0, #42\n"
+      "add.w r1, r0, #42\n"
       "adds r1, r0, r2, asr #31\n"
       "add r1, r0, r2, asr #31\n";
   DriverStr(expected, "add");
@@ -278,6 +279,148 @@
   DriverStr(expected, "smull");
 }
 
+TEST_F(AssemblerThumb2Test, LoadByteFromThumbOffset) {
+  arm::LoadOperandType type = arm::kLoadUnsignedByte;
+
+  __ LoadFromOffset(type, arm::R0, arm::R7, 0);
+  __ LoadFromOffset(type, arm::R1, arm::R7, 31);
+  __ LoadFromOffset(type, arm::R2, arm::R7, 32);
+  __ LoadFromOffset(type, arm::R3, arm::R7, 4095);
+  __ LoadFromOffset(type, arm::R4, arm::SP, 0);
+
+  const char* expected =
+      "ldrb r0, [r7, #0]\n"
+      "ldrb r1, [r7, #31]\n"
+      "ldrb.w r2, [r7, #32]\n"
+      "ldrb.w r3, [r7, #4095]\n"
+      "ldrb.w r4, [sp, #0]\n";
+  DriverStr(expected, "LoadByteFromThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreByteToThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreByte;
+
+  __ StoreToOffset(type, arm::R0, arm::R7, 0);
+  __ StoreToOffset(type, arm::R1, arm::R7, 31);
+  __ StoreToOffset(type, arm::R2, arm::R7, 32);
+  __ StoreToOffset(type, arm::R3, arm::R7, 4095);
+  __ StoreToOffset(type, arm::R4, arm::SP, 0);
+
+  const char* expected =
+      "strb r0, [r7, #0]\n"
+      "strb r1, [r7, #31]\n"
+      "strb.w r2, [r7, #32]\n"
+      "strb.w r3, [r7, #4095]\n"
+      "strb.w r4, [sp, #0]\n";
+  DriverStr(expected, "StoreByteToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, LoadHalfFromThumbOffset) {
+  arm::LoadOperandType type = arm::kLoadUnsignedHalfword;
+
+  __ LoadFromOffset(type, arm::R0, arm::R7, 0);
+  __ LoadFromOffset(type, arm::R1, arm::R7, 62);
+  __ LoadFromOffset(type, arm::R2, arm::R7, 64);
+  __ LoadFromOffset(type, arm::R3, arm::R7, 4094);
+  __ LoadFromOffset(type, arm::R4, arm::SP, 0);
+  __ LoadFromOffset(type, arm::R5, arm::R7, 1);  // Unaligned
+
+  const char* expected =
+      "ldrh r0, [r7, #0]\n"
+      "ldrh r1, [r7, #62]\n"
+      "ldrh.w r2, [r7, #64]\n"
+      "ldrh.w r3, [r7, #4094]\n"
+      "ldrh.w r4, [sp, #0]\n"
+      "ldrh.w r5, [r7, #1]\n";
+  DriverStr(expected, "LoadHalfFromThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreHalfToThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreHalfword;
+
+  __ StoreToOffset(type, arm::R0, arm::R7, 0);
+  __ StoreToOffset(type, arm::R1, arm::R7, 62);
+  __ StoreToOffset(type, arm::R2, arm::R7, 64);
+  __ StoreToOffset(type, arm::R3, arm::R7, 4094);
+  __ StoreToOffset(type, arm::R4, arm::SP, 0);
+  __ StoreToOffset(type, arm::R5, arm::R7, 1);  // Unaligned
+
+  const char* expected =
+      "strh r0, [r7, #0]\n"
+      "strh r1, [r7, #62]\n"
+      "strh.w r2, [r7, #64]\n"
+      "strh.w r3, [r7, #4094]\n"
+      "strh.w r4, [sp, #0]\n"
+      "strh.w r5, [r7, #1]\n";
+  DriverStr(expected, "StoreHalfToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, LoadWordFromSpPlusOffset) {
+  arm::LoadOperandType type = arm::kLoadWord;
+
+  __ LoadFromOffset(type, arm::R0, arm::SP, 0);
+  __ LoadFromOffset(type, arm::R1, arm::SP, 124);
+  __ LoadFromOffset(type, arm::R2, arm::SP, 128);
+  __ LoadFromOffset(type, arm::R3, arm::SP, 1020);
+  __ LoadFromOffset(type, arm::R4, arm::SP, 1024);
+  __ LoadFromOffset(type, arm::R5, arm::SP, 4092);
+  __ LoadFromOffset(type, arm::R6, arm::SP, 1);  // Unaligned
+
+  const char* expected =
+      "ldr r0, [sp, #0]\n"
+      "ldr r1, [sp, #124]\n"
+      "ldr r2, [sp, #128]\n"
+      "ldr r3, [sp, #1020]\n"
+      "ldr.w r4, [sp, #1024]\n"
+      "ldr.w r5, [sp, #4092]\n"
+      "ldr.w r6, [sp, #1]\n";
+  DriverStr(expected, "LoadWordFromSpPlusOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordToSpPlusOffset) {
+  arm::StoreOperandType type = arm::kStoreWord;
+
+  __ StoreToOffset(type, arm::R0, arm::SP, 0);
+  __ StoreToOffset(type, arm::R1, arm::SP, 124);
+  __ StoreToOffset(type, arm::R2, arm::SP, 128);
+  __ StoreToOffset(type, arm::R3, arm::SP, 1020);
+  __ StoreToOffset(type, arm::R4, arm::SP, 1024);
+  __ StoreToOffset(type, arm::R5, arm::SP, 4092);
+  __ StoreToOffset(type, arm::R6, arm::SP, 1);  // Unaligned
+
+  const char* expected =
+      "str r0, [sp, #0]\n"
+      "str r1, [sp, #124]\n"
+      "str r2, [sp, #128]\n"
+      "str r3, [sp, #1020]\n"
+      "str.w r4, [sp, #1024]\n"
+      "str.w r5, [sp, #4092]\n"
+      "str.w r6, [sp, #1]\n";
+  DriverStr(expected, "StoreWordToSpPlusOffset");
+}
+
+TEST_F(AssemblerThumb2Test, LoadWordFromPcPlusOffset) {
+  arm::LoadOperandType type = arm::kLoadWord;
+
+  __ LoadFromOffset(type, arm::R0, arm::PC, 0);
+  __ LoadFromOffset(type, arm::R1, arm::PC, 124);
+  __ LoadFromOffset(type, arm::R2, arm::PC, 128);
+  __ LoadFromOffset(type, arm::R3, arm::PC, 1020);
+  __ LoadFromOffset(type, arm::R4, arm::PC, 1024);
+  __ LoadFromOffset(type, arm::R5, arm::PC, 4092);
+  __ LoadFromOffset(type, arm::R6, arm::PC, 1);  // Unaligned
+
+  const char* expected =
+      "ldr r0, [pc, #0]\n"
+      "ldr r1, [pc, #124]\n"
+      "ldr r2, [pc, #128]\n"
+      "ldr r3, [pc, #1020]\n"
+      "ldr.w r4, [pc, #1024]\n"
+      "ldr.w r5, [pc, #4092]\n"
+      "ldr.w r6, [pc, #1]\n";
+  DriverStr(expected, "LoadWordFromPcPlusOffset");
+}
+
 TEST_F(AssemblerThumb2Test, StoreWordToThumbOffset) {
   arm::StoreOperandType type = arm::kStoreWord;
   int32_t offset = 4092;
@@ -304,21 +447,18 @@
   __ StoreToOffset(type, arm::IP, arm::R5, offset);
 
   const char* expected =
-      "mov ip, #4096\n"       // LoadImmediate(ip, 4096)
-      "add ip, ip, sp\n"
+      "add.w ip, sp, #4096\n"   // AddConstant(ip, sp, 4096)
       "str r0, [ip, #0]\n"
 
-      "str r5, [sp, #-4]!\n"  // Push(r5)
-      "movw r5, #4100\n"      // LoadImmediate(r5, 4096 + kRegisterSize)
-      "add r5, r5, sp\n"
-      "str ip, [r5, #0]\n"
-      "ldr r5, [sp], #4\n"    // Pop(r5)
+      "str r5, [sp, #-4]!\n"    // Push(r5)
+      "add.w r5, sp, #4096\n"   // AddConstant(r5, 4100 & ~0xfff)
+      "str ip, [r5, #4]\n"      // StoreToOffset(type, ip, r5, 4100 & 0xfff)
+      "ldr r5, [sp], #4\n"      // Pop(r5)
 
-      "str r6, [sp, #-4]!\n"  // Push(r6)
-      "mov r6, #4096\n"       // LoadImmediate(r6, 4096)
-      "add r6, r6, r5\n"
-      "str ip, [r6, #0]\n"
-      "ldr r6, [sp], #4\n";   // Pop(r6)
+      "str r6, [sp, #-4]!\n"    // Push(r6)
+      "add.w r6, r5, #4096\n"   // AddConstant(r6, r5, 4096 & ~0xfff)
+      "str ip, [r6, #0]\n"      // StoreToOffset(type, ip, r6, 4096 & 0xfff)
+      "ldr r6, [sp], #4\n";     // Pop(r6)
   DriverStr(expected, "StoreWordToNonThumbOffset");
 }
 
@@ -359,24 +499,46 @@
   __ StoreToOffset(type, arm::R11, arm::R5, offset);
 
   const char* expected =
-      "mov ip, #1024\n"           // LoadImmediate(ip, 1024)
-      "add ip, ip, sp\n"
+      "add.w ip, sp, #1024\n"     // AddConstant(ip, sp, 1024)
       "strd r0, r1, [ip, #0]\n"
 
       "str r5, [sp, #-4]!\n"      // Push(r5)
-      "movw r5, #1028\n"          // LoadImmediate(r5, 1024 + kRegisterSize)
-      "add r5, r5, sp\n"
-      "strd r11, ip, [r5, #0]\n"
+      "add.w r5, sp, #1024\n"     // AddConstant(r5, sp, (1024 + kRegisterSize) & ~0x3fc)
+      "strd r11, ip, [r5, #4]\n"  // StoreToOffset(type, r11, sp, (1024 + kRegisterSize) & 0x3fc)
       "ldr r5, [sp], #4\n"        // Pop(r5)
 
       "str r6, [sp, #-4]!\n"      // Push(r6)
-      "mov r6, #1024\n"           // LoadImmediate(r6, 1024)
-      "add r6, r6, r5\n"
-      "strd r11, ip, [r6, #0]\n"
+      "add.w r6, r5, #1024\n"     // AddConstant(r6, r5, 1024 & ~0x3fc)
+      "strd r11, ip, [r6, #0]\n"  // StoreToOffset(type, r11, r6, 1024 & 0x3fc)
       "ldr r6, [sp], #4\n";       // Pop(r6)
   DriverStr(expected, "StoreWordPairToNonThumbOffset");
 }
 
+TEST_F(AssemblerThumb2Test, DistantBackBranch) {
+  Label start, end;
+  __ Bind(&start);
+  constexpr size_t kLdrR0R0Count1 = 256;
+  for (size_t i = 0; i != kLdrR0R0Count1; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ b(&end, arm::EQ);
+  __ b(&start, arm::LT);
+  constexpr size_t kLdrR0R0Count2 = 256;
+  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ Bind(&end);
+
+  std::string expected =
+      "0:\n" +
+      RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") +
+      "beq 1f\n"
+      "blt 0b\n" +
+      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
+      "1:\n";
+  DriverStr(expected, "DistantBackBranch");
+}
+
 TEST_F(AssemblerThumb2Test, TwoCbzMaxOffset) {
   Label label0, label1, label2;
   __ cbz(arm::R0, &label1);
@@ -849,10 +1011,11 @@
   }
 
   std::string expected =
-      "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n"
+      // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
+      "movw ip, #(0x408 - 0x4 - 4)\n"
       "1:\n"
       "add ip, pc\n"
-      "ldrd r1, r3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" +
+      "ldrd r1, r3, [ip, #0]\n" +
       RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
       ".align 2, 0\n"
       "2:\n"
@@ -864,48 +1027,78 @@
             __ GetAdjustedPosition(label.Position()));
 }
 
-TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax256KiB) {
+TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax64KiB) {
   // The literal size must match but the type doesn't, so use an int32_t rather than float.
   arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
   __ LoadLiteral(arm::S3, literal);
   Label label;
   __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1 << 17) - 3u;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n"
-      "1:\n"
-      "add ip, pc\n"
-      "vldr s3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralSingleMax256KiB");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax256KiB) {
-  // The literal size must match but the type doesn't, so use an int64_t rather than double.
-  arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321));
-  __ LoadLiteral(arm::D3, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1 << 17) - 2u;
+  constexpr size_t kLdrR0R0Count = (1 << 15) - 3u;
   for (size_t i = 0; i != kLdrR0R0Count; ++i) {
     __ ldr(arm::R0, arm::Address(arm::R0));
   }
 
   std::string expected =
       // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
-      "movw ip, #(0x40000 & 0xffff)\n"
+      "movw ip, #(0x10004 - 0x4 - 4)\n"
+      "1:\n"
+      "add ip, pc\n"
+      "vldr s3, [ip, #0]\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".align 2, 0\n"
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadLiteralSingleMax64KiB");
+
+  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u,
+            __ GetAdjustedPosition(label.Position()));
+}
+
+TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax64KiB_UnalignedPC) {
+  // The literal size must match but the type doesn't, so use an int32_t rather than float.
+  arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
+  __ ldr(arm::R0, arm::Address(arm::R0));
+  __ LoadLiteral(arm::S3, literal);
+  Label label;
+  __ Bind(&label);
+  constexpr size_t kLdrR0R0Count = (1 << 15) - 4u;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  std::string expected =
+      "ldr r0, [r0]\n"
+      // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
+      "movw ip, #(0x10004 - 0x6 - 4)\n"
+      "1:\n"
+      "add ip, pc\n"
+      "vldr s3, [ip, #0]\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".align 2, 0\n"
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadLiteralSingleMax64KiB_UnalignedPC");
+
+  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u,
+            __ GetAdjustedPosition(label.Position()));
+}
+
+TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax64KiB) {
+  // The literal size must match but the type doesn't, so use an int64_t rather than double.
+  arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321));
+  __ LoadLiteral(arm::D3, literal);
+  Label label;
+  __ Bind(&label);
+  constexpr size_t kLdrR0R0Count = (1 << 15) - 2u;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  std::string expected =
+      // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
+      "movw ip, #((0x1000c - 0x8 - 4) & 0xffff)\n"
       // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt.
-      "movt ip, #(0x40000 >> 16)\n"
+      "movt ip, #((0x1000c - 0x8 - 4) >> 16)\n"
       "1:\n"
       "add ip, pc\n"
       "vldr d3, [ip, #0]\n" +
@@ -914,7 +1107,7 @@
       "2:\n"
       ".word 0x87654321\n"
       ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralDoubleBeyondMax256KiB");
+  DriverStr(expected, "LoadLiteralDoubleBeyondMax64KiB");
 
   EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 10u,
             __ GetAdjustedPosition(label.Position()));
@@ -926,16 +1119,16 @@
   __ LoadLiteral(arm::D3, literal);
   Label label;
   __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1 << 17) - 2u + 0x1234;
+  constexpr size_t kLdrR0R0Count = (1 << 15) - 2u + 0x1234;
   for (size_t i = 0; i != kLdrR0R0Count; ++i) {
     __ ldr(arm::R0, arm::Address(arm::R0));
   }
 
   std::string expected =
       // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
-      "movw ip, #((0x40000 + 2 * 0x1234) & 0xffff)\n"
+      "movw ip, #((0x1000c + 2 * 0x1234 - 0x8 - 4) & 0xffff)\n"
       // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt.
-      "movt ip, #((0x40000 + 2 * 0x1234) >> 16)\n"
+      "movt ip, #((0x1000c + 2 * 0x1234 - 0x8 - 4) >> 16)\n"
       "1:\n"
       "add ip, pc\n"
       "vldr d3, [ip, #0]\n" +
@@ -1011,6 +1204,315 @@
             __ GetAdjustedPosition(label.Position()));
 }
 
+TEST_F(AssemblerThumb2Test, BindTrackedLabel) {
+  Label non_tracked, tracked, branch_target;
+
+  // A few dummy loads on entry.
+  constexpr size_t kLdrR0R0Count = 5;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // A branch that will need to be fixed up.
+  __ cbz(arm::R0, &branch_target);
+
+  // Some more dummy loads.
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Now insert tracked and untracked label.
+  __ Bind(&non_tracked);
+  __ BindTrackedLabel(&tracked);
+
+  // A lot of dummy loads, to ensure the branch needs resizing.
+  constexpr size_t kLdrR0R0CountLong = 60;
+  for (size_t i = 0; i != kLdrR0R0CountLong; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Bind the branch target.
+  __ Bind(&branch_target);
+
+  // One more load.
+  __ ldr(arm::R0, arm::Address(arm::R0));
+
+  std::string expected =
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      "cmp r0, #0\n"                                                       // cbz r0, 1f
+      "beq.n 1f\n" +
+      RepeatInsn(kLdrR0R0Count + kLdrR0R0CountLong, "ldr r0, [r0]\n") +
+      "1:\n"
+      "ldr r0, [r0]\n";
+  DriverStr(expected, "BindTrackedLabel");
+
+  // Expectation is that the tracked label should have moved.
+  EXPECT_LT(non_tracked.Position(), tracked.Position());
+}
+
+TEST_F(AssemblerThumb2Test, JumpTable) {
+  // The jump table. Use three labels.
+  Label label1, label2, label3;
+  std::vector<Label*> labels({ &label1, &label2, &label3 });
+
+  // A few dummy loads on entry, interspersed with 2 labels.
+  constexpr size_t kLdrR0R0Count = 5;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label1);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label2);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Create the jump table, emit the base load.
+  arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1);
+
+  // Dummy computation, stand-in for the address. We're only testing the jump table here, not how
+  // it's being used.
+  __ ldr(arm::R0, arm::Address(arm::R0));
+
+  // Emit the jump
+  __ EmitJumpTableDispatch(jump_table, arm::R1);
+
+  // Some more dummy instructions.
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label3);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {          // Note: odd so there's no alignment
+    __ ldr(arm::R0, arm::Address(arm::R0));              //       necessary, as gcc as emits nops,
+  }                                                      //       whereas we emit 0 != nop.
+
+  static_assert((kLdrR0R0Count + 3) * 2 < 1 * KB, "Too much offset");
+
+  std::string expected =
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L1:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L2:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      "adr r1, .Ljump_table\n"
+      "ldr r0, [r0]\n"
+      ".Lbase:\n"
+      "add pc, r1\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L3:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".align 2\n"
+      ".Ljump_table:\n"
+      ".4byte (.L1 - .Lbase - 4)\n"
+      ".4byte (.L2 - .Lbase - 4)\n"
+      ".4byte (.L3 - .Lbase - 4)\n";
+  DriverStr(expected, "JumpTable");
+}
+
+// Test for >1K fixup.
+TEST_F(AssemblerThumb2Test, JumpTable4K) {
+  // The jump table. Use three labels.
+  Label label1, label2, label3;
+  std::vector<Label*> labels({ &label1, &label2, &label3 });
+
+  // A few dummy loads on entry, interspersed with 2 labels.
+  constexpr size_t kLdrR0R0Count = 5;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label1);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label2);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Create the jump table, emit the base load.
+  arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1);
+
+  // Dummy computation, stand-in for the address. We're only testing the jump table here, not how
+  // it's being used.
+  __ ldr(arm::R0, arm::Address(arm::R0));
+
+  // Emit the jump
+  __ EmitJumpTableDispatch(jump_table, arm::R1);
+
+  // Some more dummy instructions.
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label3);
+  constexpr size_t kLdrR0R0Count2 = 600;               // Note: even so there's no alignment
+  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {       //       necessary, as gcc as emits nops,
+    __ ldr(arm::R0, arm::Address(arm::R0));            //       whereas we emit 0 != nop.
+  }
+
+  static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 > 1 * KB, "Not enough offset");
+  static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 < 4 * KB, "Too much offset");
+
+  std::string expected =
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L1:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L2:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      "adr r1, .Ljump_table\n"
+      "ldr r0, [r0]\n"
+      ".Lbase:\n"
+      "add pc, r1\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L3:\n" +
+      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
+      ".align 2\n"
+      ".Ljump_table:\n"
+      ".4byte (.L1 - .Lbase - 4)\n"
+      ".4byte (.L2 - .Lbase - 4)\n"
+      ".4byte (.L3 - .Lbase - 4)\n";
+  DriverStr(expected, "JumpTable4K");
+}
+
+// Test for >4K fixup.
+TEST_F(AssemblerThumb2Test, JumpTable64K) {
+  // The jump table. Use three labels.
+  Label label1, label2, label3;
+  std::vector<Label*> labels({ &label1, &label2, &label3 });
+
+  // A few dummy loads on entry, interspersed with 2 labels.
+  constexpr size_t kLdrR0R0Count = 5;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label1);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label2);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Create the jump table, emit the base load.
+  arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1);
+
+  // Dummy computation, stand-in for the address. We're only testing the jump table here, not how
+  // it's being used.
+  __ ldr(arm::R0, arm::Address(arm::R0));
+
+  // Emit the jump
+  __ EmitJumpTableDispatch(jump_table, arm::R1);
+
+  // Some more dummy instructions.
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label3);
+  constexpr size_t kLdrR0R0Count2 = 2601;              // Note: odd so there's no alignment
+  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {       //       necessary, as gcc as emits nops,
+    __ ldr(arm::R0, arm::Address(arm::R0));            //       whereas we emit 0 != nop.
+  }
+
+  static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 > 4 * KB, "Not enough offset");
+  static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 < 64 * KB, "Too much offset");
+
+  std::string expected =
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L1:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L2:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      // ~ adr r1, .Ljump_table, gcc as can't seem to fix up a large offset itself.
+      // (Note: have to use constants, as labels aren't accepted.
+      "movw r1, #(((3 + " + StringPrintf("%zu", kLdrR0R0Count + kLdrR0R0Count2) +
+          ") * 2 - 4) & 0xFFFF)\n"
+      "add r1, pc\n"
+      "ldr r0, [r0]\n"
+      ".Lbase:\n"
+      "add pc, r1\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L3:\n" +
+      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
+      ".align 2\n"
+      ".Ljump_table:\n"
+      ".4byte (.L1 - .Lbase - 4)\n"
+      ".4byte (.L2 - .Lbase - 4)\n"
+      ".4byte (.L3 - .Lbase - 4)\n";
+  DriverStr(expected, "JumpTable64K");
+}
+
+// Test for >64K fixup.
+TEST_F(AssemblerThumb2Test, JumpTableFar) {
+  // The jump table. Use three labels.
+  Label label1, label2, label3;
+  std::vector<Label*> labels({ &label1, &label2, &label3 });
+
+  // A few dummy loads on entry, interspersed with 2 labels.
+  constexpr size_t kLdrR0R0Count = 5;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label1);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label2);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Create the jump table, emit the base load.
+  arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1);
+
+  // Dummy computation, stand-in for the address. We're only testing the jump table here, not how
+  // it's being used.
+  __ ldr(arm::R0, arm::Address(arm::R0));
+
+  // Emit the jump
+  __ EmitJumpTableDispatch(jump_table, arm::R1);
+
+  // Some more dummy instructions.
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label3);
+  constexpr size_t kLdrR0R0Count2 = 70001;             // Note: odd so there's no alignment
+  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {       //       necessary, as gcc as emits nops,
+    __ ldr(arm::R0, arm::Address(arm::R0));            //       whereas we emit 0 != nop.
+  }
+
+  static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 > 64 * KB, "Not enough offset");
+
+  std::string expected =
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L1:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L2:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      // ~ adr r1, .Ljump_table, gcc as can't seem to fix up a large offset itself.
+      // (Note: have to use constants, as labels aren't accepted.
+      "movw r1, #(((3 + " + StringPrintf("%zu", kLdrR0R0Count + kLdrR0R0Count2) +
+          ") * 2 - 4) & 0xFFFF)\n"
+      "movt r1, #(((3 + " + StringPrintf("%zu", kLdrR0R0Count + kLdrR0R0Count2) +
+          ") * 2 - 4) >> 16)\n"
+      ".Lhelp:"
+      "add r1, pc\n"
+      "ldr r0, [r0]\n"
+      ".Lbase:\n"
+      "add pc, r1\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L3:\n" +
+      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
+      ".align 2\n"
+      ".Ljump_table:\n"
+      ".4byte (.L1 - .Lbase - 4)\n"
+      ".4byte (.L2 - .Lbase - 4)\n"
+      ".4byte (.L3 - .Lbase - 4)\n";
+  DriverStr(expected, "JumpTableFar");
+}
+
 TEST_F(AssemblerThumb2Test, Clz) {
   __ clz(arm::R0, arm::R1);
 
@@ -1027,4 +1529,128 @@
   DriverStr(expected, "rbit");
 }
 
+TEST_F(AssemblerThumb2Test, rev) {
+  __ rev(arm::R1, arm::R0);
+
+  const char* expected = "rev r1, r0\n";
+
+  DriverStr(expected, "rev");
+}
+
+TEST_F(AssemblerThumb2Test, rev16) {
+  __ rev16(arm::R1, arm::R0);
+
+  const char* expected = "rev16 r1, r0\n";
+
+  DriverStr(expected, "rev16");
+}
+
+TEST_F(AssemblerThumb2Test, revsh) {
+  __ revsh(arm::R1, arm::R0);
+
+  const char* expected = "revsh r1, r0\n";
+
+  DriverStr(expected, "revsh");
+}
+
+TEST_F(AssemblerThumb2Test, vcnt) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  __ vcntd(arm::D0, arm::D1);
+  __ vcntd(arm::D19, arm::D20);
+  __ vcntd(arm::D0, arm::D9);
+  __ vcntd(arm::D16, arm::D20);
+
+  std::string expected =
+      "vcnt.8 d0, d1\n"
+      "vcnt.8 d19, d20\n"
+      "vcnt.8 d0, d9\n"
+      "vcnt.8 d16, d20\n";
+
+  DriverStr(expected, "vcnt");
+}
+
+TEST_F(AssemblerThumb2Test, vpaddl) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  // Different data types (signed and unsigned) are also tested.
+  __ vpaddld(arm::D0, arm::D0, 8, true);
+  __ vpaddld(arm::D20, arm::D20, 8, false);
+  __ vpaddld(arm::D0, arm::D20, 16, false);
+  __ vpaddld(arm::D20, arm::D0, 32, true);
+
+  std::string expected =
+      "vpaddl.u8 d0, d0\n"
+      "vpaddl.s8 d20, d20\n"
+      "vpaddl.s16 d0, d20\n"
+      "vpaddl.u32 d20, d0\n";
+
+  DriverStr(expected, "vpaddl");
+}
+
+TEST_F(AssemblerThumb2Test, LoadFromShiftedRegOffset) {
+  arm::Address mem_address(arm::R0, arm::R1, arm::Shift::LSL, 2);
+
+  __ ldrsb(arm::R2, mem_address);
+  __ ldrb(arm::R2, mem_address);
+  __ ldrsh(arm::R2, mem_address);
+  __ ldrh(arm::R2, mem_address);
+  __ ldr(arm::R2, mem_address);
+
+  std::string expected =
+      "ldrsb r2, [r0, r1, LSL #2]\n"
+      "ldrb r2, [r0, r1, LSL #2]\n"
+      "ldrsh r2, [r0, r1, LSL #2]\n"
+      "ldrh r2, [r0, r1, LSL #2]\n"
+      "ldr r2, [r0, r1, LSL #2]\n";
+
+  DriverStr(expected, "LoadFromShiftedRegOffset");
+}
+
+TEST_F(AssemblerThumb2Test, VStmLdmPushPop) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  // Different data types (signed and unsigned) are also tested.
+  __ vstmiad(arm::R0, arm::D0, 4);
+  __ vldmiad(arm::R1, arm::D9, 5);
+  __ vpopd(arm::D0, 4);
+  __ vpushd(arm::D9, 5);
+  __ vpops(arm::S0, 4);
+  __ vpushs(arm::S9, 5);
+  __ vpushs(arm::S16, 5);
+  __ vpushd(arm::D0, 16);
+  __ vpushd(arm::D1, 15);
+  __ vpushd(arm::D8, 16);
+  __ vpushd(arm::D31, 1);
+  __ vpushs(arm::S0, 32);
+  __ vpushs(arm::S1, 31);
+  __ vpushs(arm::S16, 16);
+  __ vpushs(arm::S31, 1);
+
+  std::string expected =
+      "vstmia r0, {d0 - d3}\n"
+      "vldmia r1, {d9 - d13}\n"
+      "vpop {d0 - d3}\n"
+      "vpush {d9 - d13}\n"
+      "vpop {s0 - s3}\n"
+      "vpush {s9 - s13}\n"
+      "vpush {s16 - s20}\n"
+      "vpush {d0 - d15}\n"
+      "vpush {d1 - d15}\n"
+      "vpush {d8 - d23}\n"
+      "vpush {d31}\n"
+      "vpush {s0 - s31}\n"
+      "vpush {s1 - s31}\n"
+      "vpush {s16 - s31}\n"
+      "vpush {s31}\n";
+
+  DriverStr(expected, "VStmLdmPushPop");
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm/jni_macro_assembler_arm.cc b/compiler/utils/arm/jni_macro_assembler_arm.cc
new file mode 100644
index 0000000..af5ebb4
--- /dev/null
+++ b/compiler/utils/arm/jni_macro_assembler_arm.cc
@@ -0,0 +1,618 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler_arm.h"
+
+#include <algorithm>
+
+#include "assembler_arm32.h"
+#include "assembler_thumb2.h"
+#include "base/arena_allocator.h"
+#include "base/bit_utils.h"
+#include "base/logging.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "offsets.h"
+#include "thread.h"
+
+namespace art {
+namespace arm {
+
+constexpr size_t kFramePointerSize = static_cast<size_t>(kArmPointerSize);
+
+// Slowpath entered when Thread::Current()->_exception is non-null
+class ArmExceptionSlowPath FINAL : public SlowPath {
+ public:
+  ArmExceptionSlowPath(ArmManagedRegister scratch, size_t stack_adjust)
+      : scratch_(scratch), stack_adjust_(stack_adjust) {
+  }
+  void Emit(Assembler *sp_asm) OVERRIDE;
+ private:
+  const ArmManagedRegister scratch_;
+  const size_t stack_adjust_;
+};
+
+ArmJNIMacroAssembler::ArmJNIMacroAssembler(ArenaAllocator* arena, InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      asm_.reset(new (arena) Arm32Assembler(arena));
+      break;
+
+    case kThumb2:
+      asm_.reset(new (arena) Thumb2Assembler(arena));
+      break;
+
+    default:
+      LOG(FATAL) << isa;
+      UNREACHABLE();
+  }
+}
+
+ArmJNIMacroAssembler::~ArmJNIMacroAssembler() {
+}
+
+size_t ArmJNIMacroAssembler::CodeSize() const {
+  return asm_->CodeSize();
+}
+
+DebugFrameOpCodeWriterForAssembler& ArmJNIMacroAssembler::cfi() {
+  return asm_->cfi();
+}
+
+void ArmJNIMacroAssembler::FinalizeCode() {
+  asm_->FinalizeCode();
+}
+
+void ArmJNIMacroAssembler::FinalizeInstructions(const MemoryRegion& region) {
+  asm_->FinalizeInstructions(region);
+}
+
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::ArmCore(static_cast<int>(reg));
+}
+
+static dwarf::Reg DWARFReg(SRegister reg) {
+  return dwarf::Reg::ArmFp(static_cast<int>(reg));
+}
+
+#define __ asm_->
+
+void ArmJNIMacroAssembler::BuildFrame(size_t frame_size,
+                                      ManagedRegister method_reg,
+                                      ArrayRef<const ManagedRegister> callee_save_regs,
+                                      const ManagedRegisterEntrySpills& entry_spills) {
+  CHECK_EQ(CodeSize(), 0U);  // Nothing emitted yet
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister());
+
+  // Push callee saves and link register.
+  RegList core_spill_mask = 1 << LR;
+  uint32_t fp_spill_mask = 0;
+  for (const ManagedRegister& reg : callee_save_regs) {
+    if (reg.AsArm().IsCoreRegister()) {
+      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
+    } else {
+      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
+    }
+  }
+  __ PushList(core_spill_mask);
+  cfi().AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize);
+  cfi().RelOffsetForMany(DWARFReg(Register(0)), 0, core_spill_mask, kFramePointerSize);
+  if (fp_spill_mask != 0) {
+    __ vpushs(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
+    cfi().AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize);
+    cfi().RelOffsetForMany(DWARFReg(SRegister(0)), 0, fp_spill_mask, kFramePointerSize);
+  }
+
+  // Increase frame to required size.
+  int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
+  CHECK_GT(frame_size, pushed_values * kFramePointerSize);  // Must at least have space for Method*.
+  IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize);  // handles CFI as well.
+
+  // Write out Method*.
+  __ StoreToOffset(kStoreWord, R0, SP, 0);
+
+  // Write out entry spills.
+  int32_t offset = frame_size + kFramePointerSize;
+  for (size_t i = 0; i < entry_spills.size(); ++i) {
+    ArmManagedRegister reg = entry_spills.at(i).AsArm();
+    if (reg.IsNoRegister()) {
+      // only increment stack offset.
+      ManagedRegisterSpill spill = entry_spills.at(i);
+      offset += spill.getSize();
+    } else if (reg.IsCoreRegister()) {
+      __ StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset);
+      offset += 4;
+    } else if (reg.IsSRegister()) {
+      __ StoreSToOffset(reg.AsSRegister(), SP, offset);
+      offset += 4;
+    } else if (reg.IsDRegister()) {
+      __ StoreDToOffset(reg.AsDRegister(), SP, offset);
+      offset += 8;
+    }
+  }
+}
+
+void ArmJNIMacroAssembler::RemoveFrame(size_t frame_size,
+                                       ArrayRef<const ManagedRegister> callee_save_regs) {
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi().RememberState();
+
+  // Compute callee saves to pop and PC.
+  RegList core_spill_mask = 1 << PC;
+  uint32_t fp_spill_mask = 0;
+  for (const ManagedRegister& reg : callee_save_regs) {
+    if (reg.AsArm().IsCoreRegister()) {
+      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
+    } else {
+      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
+    }
+  }
+
+  // Decrease frame to start of callee saves.
+  int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
+  CHECK_GT(frame_size, pop_values * kFramePointerSize);
+  DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize));  // handles CFI as well.
+
+  if (fp_spill_mask != 0) {
+    __ vpops(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
+    cfi().AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask));
+    cfi().RestoreMany(DWARFReg(SRegister(0)), fp_spill_mask);
+  }
+
+  // Pop callee saves and PC.
+  __ PopList(core_spill_mask);
+
+  // The CFI should be restored for any code that follows the exit block.
+  cfi().RestoreState();
+  cfi().DefCFAOffset(frame_size);
+}
+
+void ArmJNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
+  __ AddConstant(SP, -adjust);
+  cfi().AdjustCFAOffset(adjust);
+}
+
+static void DecreaseFrameSizeImpl(ArmAssembler* assembler, size_t adjust) {
+  assembler->AddConstant(SP, adjust);
+  assembler->cfi().AdjustCFAOffset(-adjust);
+}
+
+void ArmJNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
+  DecreaseFrameSizeImpl(asm_.get(), adjust);
+}
+
+void ArmJNIMacroAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
+  ArmManagedRegister src = msrc.AsArm();
+  if (src.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (src.IsCoreRegister()) {
+    CHECK_EQ(4u, size);
+    __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
+  } else if (src.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    __ StoreToOffset(kStoreWord, src.AsRegisterPairLow(), SP, dest.Int32Value());
+    __ StoreToOffset(kStoreWord, src.AsRegisterPairHigh(), SP, dest.Int32Value() + 4);
+  } else if (src.IsSRegister()) {
+    __ StoreSToOffset(src.AsSRegister(), SP, dest.Int32Value());
+  } else {
+    CHECK(src.IsDRegister()) << src;
+    __ StoreDToOffset(src.AsDRegister(), SP, dest.Int32Value());
+  }
+}
+
+void ArmJNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
+  ArmManagedRegister src = msrc.AsArm();
+  CHECK(src.IsCoreRegister()) << src;
+  __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
+}
+
+void ArmJNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
+  ArmManagedRegister src = msrc.AsArm();
+  CHECK(src.IsCoreRegister()) << src;
+  __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
+}
+
+void ArmJNIMacroAssembler::StoreSpanning(FrameOffset dest,
+                                         ManagedRegister msrc,
+                                         FrameOffset in_off,
+                                         ManagedRegister mscratch) {
+  ArmManagedRegister src = msrc.AsArm();
+  ArmManagedRegister scratch = mscratch.AsArm();
+  __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
+  __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, in_off.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + sizeof(uint32_t));
+}
+
+void ArmJNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
+}
+
+void ArmJNIMacroAssembler::LoadRef(ManagedRegister mdest,
+                                   ManagedRegister mbase,
+                                   MemberOffset offs,
+                                   bool unpoison_reference) {
+  ArmManagedRegister base = mbase.AsArm();
+  ArmManagedRegister dst = mdest.AsArm();
+  CHECK(base.IsCoreRegister()) << base;
+  CHECK(dst.IsCoreRegister()) << dst;
+  __ LoadFromOffset(kLoadWord,
+                    dst.AsCoreRegister(),
+                    base.AsCoreRegister(),
+                    offs.Int32Value());
+  if (unpoison_reference) {
+    __ MaybeUnpoisonHeapReference(dst.AsCoreRegister());
+  }
+}
+
+void ArmJNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset  src) {
+  ArmManagedRegister dst = mdest.AsArm();
+  CHECK(dst.IsCoreRegister()) << dst;
+  __ LoadFromOffset(kLoadWord, dst.AsCoreRegister(), SP, src.Int32Value());
+}
+
+void ArmJNIMacroAssembler::LoadRawPtr(ManagedRegister mdest,
+                                      ManagedRegister mbase,
+                                      Offset offs) {
+  ArmManagedRegister base = mbase.AsArm();
+  ArmManagedRegister dst = mdest.AsArm();
+  CHECK(base.IsCoreRegister()) << base;
+  CHECK(dst.IsCoreRegister()) << dst;
+  __ LoadFromOffset(kLoadWord,
+                    dst.AsCoreRegister(),
+                    base.AsCoreRegister(),
+                    offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest,
+                                                 uint32_t imm,
+                                                 ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  __ LoadImmediate(scratch.AsCoreRegister(), imm);
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
+}
+
+static void EmitLoad(ArmAssembler* assembler,
+                     ManagedRegister m_dst,
+                     Register src_register,
+                     int32_t src_offset,
+                     size_t size) {
+  ArmManagedRegister dst = m_dst.AsArm();
+  if (dst.IsNoRegister()) {
+    CHECK_EQ(0u, size) << dst;
+  } else if (dst.IsCoreRegister()) {
+    CHECK_EQ(4u, size) << dst;
+    assembler->LoadFromOffset(kLoadWord, dst.AsCoreRegister(), src_register, src_offset);
+  } else if (dst.IsRegisterPair()) {
+    CHECK_EQ(8u, size) << dst;
+    assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairLow(), src_register, src_offset);
+    assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairHigh(), src_register, src_offset + 4);
+  } else if (dst.IsSRegister()) {
+    assembler->LoadSFromOffset(dst.AsSRegister(), src_register, src_offset);
+  } else {
+    CHECK(dst.IsDRegister()) << dst;
+    assembler->LoadDFromOffset(dst.AsDRegister(), src_register, src_offset);
+  }
+}
+
+void ArmJNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) {
+  EmitLoad(asm_.get(), m_dst, SP, src.Int32Value(), size);
+}
+
+void ArmJNIMacroAssembler::LoadFromThread(ManagedRegister m_dst, ThreadOffset32 src, size_t size) {
+  EmitLoad(asm_.get(), m_dst, TR, src.Int32Value(), size);
+}
+
+void ArmJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset32 offs) {
+  ArmManagedRegister dst = m_dst.AsArm();
+  CHECK(dst.IsCoreRegister()) << dst;
+  __ LoadFromOffset(kLoadWord, dst.AsCoreRegister(), TR, offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                                ThreadOffset32 thr_offs,
+                                                ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), TR, thr_offs.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs,
+                                              FrameOffset fr_offs,
+                                              ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), TR, thr_offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                                    FrameOffset fr_offs,
+                                                    ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  __ AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value(), AL);
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), TR, thr_offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
+  __ StoreToOffset(kStoreWord, SP, TR, thr_offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
+  UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm";
+}
+
+void ArmJNIMacroAssembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
+  UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm";
+}
+
+void ArmJNIMacroAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t /*size*/) {
+  ArmManagedRegister dst = m_dst.AsArm();
+  ArmManagedRegister src = m_src.AsArm();
+  if (!dst.Equals(src)) {
+    if (dst.IsCoreRegister()) {
+      CHECK(src.IsCoreRegister()) << src;
+      __ mov(dst.AsCoreRegister(), ShifterOperand(src.AsCoreRegister()));
+    } else if (dst.IsDRegister()) {
+      CHECK(src.IsDRegister()) << src;
+      __ vmovd(dst.AsDRegister(), src.AsDRegister());
+    } else if (dst.IsSRegister()) {
+      CHECK(src.IsSRegister()) << src;
+      __ vmovs(dst.AsSRegister(), src.AsSRegister());
+    } else {
+      CHECK(dst.IsRegisterPair()) << dst;
+      CHECK(src.IsRegisterPair()) << src;
+      // Ensure that the first move doesn't clobber the input of the second.
+      if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) {
+        __ mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow()));
+        __ mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh()));
+      } else {
+        __ mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh()));
+        __ mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow()));
+      }
+    }
+  }
+}
+
+void ArmJNIMacroAssembler::Copy(FrameOffset dest,
+                                FrameOffset src,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
+    __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
+  } else if (size == 8) {
+    __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
+    __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
+    __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value() + 4);
+    __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4);
+  }
+}
+
+void ArmJNIMacroAssembler::Copy(FrameOffset dest,
+                                ManagedRegister src_base,
+                                Offset src_offset,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  Register scratch = mscratch.AsArm().AsCoreRegister();
+  CHECK_EQ(size, 4u);
+  __ LoadFromOffset(kLoadWord, scratch, src_base.AsArm().AsCoreRegister(), src_offset.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch, SP, dest.Int32Value());
+}
+
+void ArmJNIMacroAssembler::Copy(ManagedRegister dest_base,
+                                Offset dest_offset,
+                                FrameOffset src,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  Register scratch = mscratch.AsArm().AsCoreRegister();
+  CHECK_EQ(size, 4u);
+  __ LoadFromOffset(kLoadWord, scratch, SP, src.Int32Value());
+  __ StoreToOffset(kStoreWord,
+                   scratch,
+                   dest_base.AsArm().AsCoreRegister(),
+                   dest_offset.Int32Value());
+}
+
+void ArmJNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                FrameOffset /*src_base*/,
+                                Offset /*src_offset*/,
+                                ManagedRegister /*mscratch*/,
+                                size_t /*size*/) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void ArmJNIMacroAssembler::Copy(ManagedRegister dest,
+                                Offset dest_offset,
+                                ManagedRegister src,
+                                Offset src_offset,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  CHECK_EQ(size, 4u);
+  Register scratch = mscratch.AsArm().AsCoreRegister();
+  __ LoadFromOffset(kLoadWord, scratch, src.AsArm().AsCoreRegister(), src_offset.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch, dest.AsArm().AsCoreRegister(), dest_offset.Int32Value());
+}
+
+void ArmJNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                Offset /*dest_offset*/,
+                                FrameOffset /*src*/,
+                                Offset /*src_offset*/,
+                                ManagedRegister /*scratch*/,
+                                size_t /*size*/) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void ArmJNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
+                                                  FrameOffset handle_scope_offset,
+                                                  ManagedRegister min_reg,
+                                                  bool null_allowed) {
+  ArmManagedRegister out_reg = mout_reg.AsArm();
+  ArmManagedRegister in_reg = min_reg.AsArm();
+  CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg;
+  CHECK(out_reg.IsCoreRegister()) << out_reg;
+  if (null_allowed) {
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
+    if (in_reg.IsNoRegister()) {
+      __ LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
+      in_reg = out_reg;
+    }
+    __ cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
+    if (!out_reg.Equals(in_reg)) {
+      __ it(EQ, kItElse);
+      __ LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);
+    } else {
+      __ it(NE);
+    }
+    __ AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
+  } else {
+    __ AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
+  }
+}
+
+void ArmJNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                                  FrameOffset handle_scope_offset,
+                                                  ManagedRegister mscratch,
+                                                  bool null_allowed) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  if (null_allowed) {
+    __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
+    __ cmp(scratch.AsCoreRegister(), ShifterOperand(0));
+    __ it(NE);
+    __ AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
+  } else {
+    __ AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
+  }
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, out_off.Int32Value());
+}
+
+void ArmJNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
+                                                        ManagedRegister min_reg) {
+  ArmManagedRegister out_reg = mout_reg.AsArm();
+  ArmManagedRegister in_reg = min_reg.AsArm();
+  CHECK(out_reg.IsCoreRegister()) << out_reg;
+  CHECK(in_reg.IsCoreRegister()) << in_reg;
+  Label null_arg;
+  if (!out_reg.Equals(in_reg)) {
+    __ LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);     // TODO: why EQ?
+  }
+  __ cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
+  __ it(NE);
+  __ LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(), in_reg.AsCoreRegister(), 0, NE);
+}
+
+void ArmJNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references.
+}
+
+void ArmJNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references.
+}
+
+void ArmJNIMacroAssembler::Call(ManagedRegister mbase,
+                                Offset offset,
+                                ManagedRegister mscratch) {
+  ArmManagedRegister base = mbase.AsArm();
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(base.IsCoreRegister()) << base;
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  __ LoadFromOffset(kLoadWord,
+                    scratch.AsCoreRegister(),
+                    base.AsCoreRegister(),
+                    offset.Int32Value());
+  __ blx(scratch.AsCoreRegister());
+  // TODO: place reference map on call.
+}
+
+void ArmJNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  // Call *(*(SP + base) + offset)
+  __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, base.Int32Value());
+  __ LoadFromOffset(kLoadWord,
+                    scratch.AsCoreRegister(),
+                    scratch.AsCoreRegister(),
+                    offset.Int32Value());
+  __ blx(scratch.AsCoreRegister());
+  // TODO: place reference map on call
+}
+
+void ArmJNIMacroAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED,
+                                          ManagedRegister scratch ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void ArmJNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
+  __ mov(tr.AsArm().AsCoreRegister(), ShifterOperand(TR));
+}
+
+void ArmJNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister /*scratch*/) {
+  __ StoreToOffset(kStoreWord, TR, SP, offset.Int32Value(), AL);
+}
+
+void ArmJNIMacroAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  ArmExceptionSlowPath* slow = new (__ GetArena()) ArmExceptionSlowPath(scratch, stack_adjust);
+  __ GetBuffer()->EnqueueSlowPath(slow);
+  __ LoadFromOffset(kLoadWord,
+                    scratch.AsCoreRegister(),
+                    TR,
+                    Thread::ExceptionOffset<kArmPointerSize>().Int32Value());
+  __ cmp(scratch.AsCoreRegister(), ShifterOperand(0));
+  __ b(slow->Entry(), NE);
+}
+
+#undef __
+
+void ArmExceptionSlowPath::Emit(Assembler* sasm) {
+  ArmAssembler* sp_asm = down_cast<ArmAssembler*>(sasm);
+#define __ sp_asm->
+  __ Bind(&entry_);
+  if (stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSizeImpl(sp_asm, stack_adjust_);
+  }
+  // Pass exception object as argument.
+  // Don't care about preserving R0 as this call won't return.
+  __ mov(R0, ShifterOperand(scratch_.AsCoreRegister()));
+  // Set up call to Thread::Current()->pDeliverException.
+  __ LoadFromOffset(kLoadWord,
+                    R12,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pDeliverException).Int32Value());
+  __ blx(R12);
+#undef __
+}
+
+void ArmJNIMacroAssembler::MemoryBarrier(ManagedRegister mscratch) {
+  CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12);
+  asm_->dmb(SY);
+}
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/utils/arm/jni_macro_assembler_arm.h b/compiler/utils/arm/jni_macro_assembler_arm.h
new file mode 100644
index 0000000..4471906
--- /dev/null
+++ b/compiler/utils/arm/jni_macro_assembler_arm.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_H_
+#define ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_H_
+
+#include <memory>
+#include <type_traits>
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "base/enums.h"
+#include "base/macros.h"
+#include "utils/jni_macro_assembler.h"
+#include "offsets.h"
+
+namespace art {
+namespace arm {
+
+class ArmAssembler;
+
+class ArmJNIMacroAssembler : public JNIMacroAssembler<PointerSize::k32> {
+ public:
+  ArmJNIMacroAssembler(ArenaAllocator* arena, InstructionSet isa);
+  virtual ~ArmJNIMacroAssembler();
+
+  size_t CodeSize() const OVERRIDE;
+  DebugFrameOpCodeWriterForAssembler& cfi() OVERRIDE;
+  void FinalizeCode() OVERRIDE;
+  void FinalizeInstructions(const MemoryRegion& region) OVERRIDE;
+
+  //
+  // Overridden common assembler high-level functionality
+  //
+
+  // Emit code that will create an activation on the stack
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
+                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
+
+  // Emit code that will remove an activation from the stack
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
+    OVERRIDE;
+
+  void IncreaseFrameSize(size_t adjust) OVERRIDE;
+  void DecreaseFrameSize(size_t adjust) OVERRIDE;
+
+  // Store routines
+  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
+  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
+  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
+
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE;
+
+  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
+                     ManagedRegister scratch) OVERRIDE;
+
+  // Load routines
+  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
+
+  void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
+               bool unpoison_reference) OVERRIDE;
+
+  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
+
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE;
+
+  // Copying routines
+  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
+
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset32 thr_offs,
+                            ManagedRegister scratch) OVERRIDE;
+
+  void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+      OVERRIDE;
+
+  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
+            ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
+            ManagedRegister scratch, size_t size) OVERRIDE;
+
+  // Sign extension
+  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Zero extension
+  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Exploit fast access in managed code to Thread::Current()
+  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
+  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // null.
+  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
+                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed.
+  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
+                              ManagedRegister scratch, bool null_allowed) OVERRIDE;
+
+  // src holds a handle scope entry (Object**) load this into dst
+  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
+  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
+
+  // Call to address held at [base+offset]
+  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
+
+  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
+
+ private:
+  std::unique_ptr<ArmAssembler> asm_;
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_H_
diff --git a/compiler/utils/arm/managed_register_arm.h b/compiler/utils/arm/managed_register_arm.h
index 5fde9e8..276db44 100644
--- a/compiler/utils/arm/managed_register_arm.h
+++ b/compiler/utils/arm/managed_register_arm.h
@@ -19,7 +19,7 @@
 
 #include "base/logging.h"
 #include "constants_arm.h"
-#include "dwarf/register.h"
+#include "debug/dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
@@ -85,34 +85,34 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class ArmManagedRegister : public ManagedRegister {
  public:
-  Register AsCoreRegister() const {
+  constexpr Register AsCoreRegister() const {
     CHECK(IsCoreRegister());
     return static_cast<Register>(id_);
   }
 
-  SRegister AsSRegister() const {
+  constexpr SRegister AsSRegister() const {
     CHECK(IsSRegister());
     return static_cast<SRegister>(id_ - kNumberOfCoreRegIds);
   }
 
-  DRegister AsDRegister() const {
+  constexpr DRegister AsDRegister() const {
     CHECK(IsDRegister());
     return static_cast<DRegister>(id_ - kNumberOfCoreRegIds - kNumberOfSRegIds);
   }
 
-  SRegister AsOverlappingDRegisterLow() const {
+  constexpr SRegister AsOverlappingDRegisterLow() const {
     CHECK(IsOverlappingDRegister());
     DRegister d_reg = AsDRegister();
     return static_cast<SRegister>(d_reg * 2);
   }
 
-  SRegister AsOverlappingDRegisterHigh() const {
+  constexpr SRegister AsOverlappingDRegisterHigh() const {
     CHECK(IsOverlappingDRegister());
     DRegister d_reg = AsDRegister();
     return static_cast<SRegister>(d_reg * 2 + 1);
   }
 
-  RegisterPair AsRegisterPair() const {
+  constexpr RegisterPair AsRegisterPair() const {
     CHECK(IsRegisterPair());
     Register reg_low = AsRegisterPairLow();
     if (reg_low == R1) {
@@ -122,50 +122,50 @@
     }
   }
 
-  Register AsRegisterPairLow() const {
+  constexpr Register AsRegisterPairLow() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdLow().
     return FromRegId(AllocIdLow()).AsCoreRegister();
   }
 
-  Register AsRegisterPairHigh() const {
+  constexpr Register AsRegisterPairHigh() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdHigh().
     return FromRegId(AllocIdHigh()).AsCoreRegister();
   }
 
-  bool IsCoreRegister() const {
+  constexpr bool IsCoreRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCoreRegIds);
   }
 
-  bool IsSRegister() const {
+  constexpr bool IsSRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfCoreRegIds;
     return (0 <= test) && (test < kNumberOfSRegIds);
   }
 
-  bool IsDRegister() const {
+  constexpr bool IsDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCoreRegIds + kNumberOfSRegIds);
     return (0 <= test) && (test < kNumberOfDRegIds);
   }
 
   // Returns true if this DRegister overlaps SRegisters.
-  bool IsOverlappingDRegister() const {
+  constexpr bool IsOverlappingDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCoreRegIds + kNumberOfSRegIds);
     return (0 <= test) && (test < kNumberOfOverlappingDRegIds);
   }
 
-  bool IsRegisterPair() const {
+  constexpr bool IsRegisterPair() const {
     CHECK(IsValidManagedRegister());
     const int test =
         id_ - (kNumberOfCoreRegIds + kNumberOfSRegIds + kNumberOfDRegIds);
     return (0 <= test) && (test < kNumberOfPairRegIds);
   }
 
-  bool IsSameType(ArmManagedRegister test) const {
+  constexpr bool IsSameType(ArmManagedRegister test) const {
     CHECK(IsValidManagedRegister() && test.IsValidManagedRegister());
     return
       (IsCoreRegister() && test.IsCoreRegister()) ||
@@ -182,29 +182,29 @@
 
   void Print(std::ostream& os) const;
 
-  static ArmManagedRegister FromCoreRegister(Register r) {
+  static constexpr ArmManagedRegister FromCoreRegister(Register r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static ArmManagedRegister FromSRegister(SRegister r) {
+  static constexpr ArmManagedRegister FromSRegister(SRegister r) {
     CHECK_NE(r, kNoSRegister);
     return FromRegId(r + kNumberOfCoreRegIds);
   }
 
-  static ArmManagedRegister FromDRegister(DRegister r) {
+  static constexpr ArmManagedRegister FromDRegister(DRegister r) {
     CHECK_NE(r, kNoDRegister);
     return FromRegId(r + (kNumberOfCoreRegIds + kNumberOfSRegIds));
   }
 
-  static ArmManagedRegister FromRegisterPair(RegisterPair r) {
+  static constexpr ArmManagedRegister FromRegisterPair(RegisterPair r) {
     CHECK_NE(r, kNoRegisterPair);
     return FromRegId(r + (kNumberOfCoreRegIds +
                           kNumberOfSRegIds + kNumberOfDRegIds));
   }
 
   // Return a RegisterPair consisting of Register r_low and r_low + 1.
-  static ArmManagedRegister FromCoreRegisterPair(Register r_low) {
+  static constexpr ArmManagedRegister FromCoreRegisterPair(Register r_low) {
     if (r_low != R1) {  // not the dalvik special case
       CHECK_NE(r_low, kNoRegister);
       CHECK_EQ(0, (r_low % 2));
@@ -217,7 +217,7 @@
   }
 
   // Return a DRegister overlapping SRegister r_low and r_low + 1.
-  static ArmManagedRegister FromSRegisterPair(SRegister r_low) {
+  static constexpr ArmManagedRegister FromSRegisterPair(SRegister r_low) {
     CHECK_NE(r_low, kNoSRegister);
     CHECK_EQ(0, (r_low % 2));
     const int r = r_low / 2;
@@ -226,7 +226,7 @@
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
@@ -251,9 +251,9 @@
 
   friend class ManagedRegister;
 
-  explicit ArmManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr ArmManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static ArmManagedRegister FromRegId(int reg_id) {
+  static constexpr ArmManagedRegister FromRegId(int reg_id) {
     ArmManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -264,7 +264,7 @@
 
 }  // namespace arm
 
-inline arm::ArmManagedRegister ManagedRegister::AsArm() const {
+constexpr inline arm::ArmManagedRegister ManagedRegister::AsArm() const {
   arm::ArmManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 0e17512..f91bcfa 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -20,7 +20,7 @@
 #include "offsets.h"
 #include "thread.h"
 
-using namespace vixl;  // NOLINT(build/namespaces)
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
 
 namespace art {
 namespace arm64 {
@@ -28,630 +28,71 @@
 #ifdef ___
 #error "ARM64 Assembler macro already defined."
 #else
-#define ___   vixl_masm_->
+#define ___   vixl_masm_.
 #endif
 
 void Arm64Assembler::FinalizeCode() {
-  if (!exception_blocks_.empty()) {
-    for (size_t i = 0; i < exception_blocks_.size(); i++) {
-      EmitExceptionPoll(exception_blocks_.at(i));
-    }
-  }
   ___ FinalizeCode();
 }
 
 size_t Arm64Assembler::CodeSize() const {
-  return vixl_masm_->BufferCapacity() - vixl_masm_->RemainingBufferSpace();
+  return vixl_masm_.GetSizeOfCodeGenerated();
 }
 
 const uint8_t* Arm64Assembler::CodeBufferBaseAddress() const {
-  return vixl_masm_->GetStartAddress<uint8_t*>();
+  return vixl_masm_.GetStartAddress<uint8_t*>();
 }
 
 void Arm64Assembler::FinalizeInstructions(const MemoryRegion& region) {
   // Copy the instructions from the buffer.
-  MemoryRegion from(vixl_masm_->GetStartAddress<void*>(), CodeSize());
+  MemoryRegion from(vixl_masm_.GetStartAddress<void*>(), CodeSize());
   region.CopyFrom(0, from);
 }
 
-void Arm64Assembler::GetCurrentThread(ManagedRegister tr) {
-  ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(TR));
-}
-
-void Arm64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) {
-  StoreToOffset(TR, SP, offset.Int32Value());
-}
-
-// See Arm64 PCS Section 5.2.2.1.
-void Arm64Assembler::IncreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  AddConstant(SP, -adjust);
-  cfi().AdjustCFAOffset(adjust);
-}
-
-// See Arm64 PCS Section 5.2.2.1.
-void Arm64Assembler::DecreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  AddConstant(SP, adjust);
-  cfi().AdjustCFAOffset(-adjust);
-}
-
-void Arm64Assembler::AddConstant(XRegister rd, int32_t value, Condition cond) {
-  AddConstant(rd, rd, value, cond);
-}
-
-void Arm64Assembler::AddConstant(XRegister rd, XRegister rn, int32_t value,
-                                 Condition cond) {
-  if ((cond == al) || (cond == nv)) {
-    // VIXL macro-assembler handles all variants.
-    ___ Add(reg_x(rd), reg_x(rn), value);
-  } else {
-    // temp = rd + value
-    // rd = cond ? temp : rn
-    vixl::UseScratchRegisterScope temps(vixl_masm_);
-    temps.Exclude(reg_x(rd), reg_x(rn));
-    vixl::Register temp = temps.AcquireX();
-    ___ Add(temp, reg_x(rn), value);
-    ___ Csel(reg_x(rd), temp, reg_x(rd), cond);
-  }
-}
-
-void Arm64Assembler::StoreWToOffset(StoreOperandType type, WRegister source,
-                                    XRegister base, int32_t offset) {
-  switch (type) {
-    case kStoreByte:
-      ___ Strb(reg_w(source), MEM_OP(reg_x(base), offset));
-      break;
-    case kStoreHalfword:
-      ___ Strh(reg_w(source), MEM_OP(reg_x(base), offset));
-      break;
-    case kStoreWord:
-      ___ Str(reg_w(source), MEM_OP(reg_x(base), offset));
-      break;
-    default:
-      LOG(FATAL) << "UNREACHABLE";
-  }
-}
-
-void Arm64Assembler::StoreToOffset(XRegister source, XRegister base, int32_t offset) {
-  CHECK_NE(source, SP);
-  ___ Str(reg_x(source), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::StoreSToOffset(SRegister source, XRegister base, int32_t offset) {
-  ___ Str(reg_s(source), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::StoreDToOffset(DRegister source, XRegister base, int32_t offset) {
-  ___ Str(reg_d(source), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::Store(FrameOffset offs, ManagedRegister m_src, size_t size) {
-  Arm64ManagedRegister src = m_src.AsArm64();
-  if (src.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (src.IsWRegister()) {
-    CHECK_EQ(4u, size);
-    StoreWToOffset(kStoreWord, src.AsWRegister(), SP, offs.Int32Value());
-  } else if (src.IsXRegister()) {
-    CHECK_EQ(8u, size);
-    StoreToOffset(src.AsXRegister(), SP, offs.Int32Value());
-  } else if (src.IsSRegister()) {
-    StoreSToOffset(src.AsSRegister(), SP, offs.Int32Value());
-  } else {
-    CHECK(src.IsDRegister()) << src;
-    StoreDToOffset(src.AsDRegister(), SP, offs.Int32Value());
-  }
-}
-
-void Arm64Assembler::StoreRef(FrameOffset offs, ManagedRegister m_src) {
-  Arm64ManagedRegister src = m_src.AsArm64();
-  CHECK(src.IsXRegister()) << src;
-  StoreWToOffset(kStoreWord, src.AsOverlappingWRegister(), SP,
-                 offs.Int32Value());
-}
-
-void Arm64Assembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) {
-  Arm64ManagedRegister src = m_src.AsArm64();
-  CHECK(src.IsXRegister()) << src;
-  StoreToOffset(src.AsXRegister(), SP, offs.Int32Value());
-}
-
-void Arm64Assembler::StoreImmediateToFrame(FrameOffset offs, uint32_t imm,
-                                           ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadImmediate(scratch.AsXRegister(), imm);
-  StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP,
-                 offs.Int32Value());
-}
-
-void Arm64Assembler::StoreImmediateToThread64(ThreadOffset<8> offs, uint32_t imm,
-                                            ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadImmediate(scratch.AsXRegister(), imm);
-  StoreToOffset(scratch.AsXRegister(), TR, offs.Int32Value());
-}
-
-void Arm64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> tr_offs,
-                                              FrameOffset fr_offs,
-                                              ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  AddConstant(scratch.AsXRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
-}
-
-void Arm64Assembler::StoreStackPointerToThread64(ThreadOffset<8> tr_offs) {
-  vixl::UseScratchRegisterScope temps(vixl_masm_);
-  vixl::Register temp = temps.AcquireX();
-  ___ Mov(temp, reg_x(SP));
-  ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
-}
-
-void Arm64Assembler::StoreSpanning(FrameOffset dest_off, ManagedRegister m_source,
-                                   FrameOffset in_off, ManagedRegister m_scratch) {
-  Arm64ManagedRegister source = m_source.AsArm64();
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  StoreToOffset(source.AsXRegister(), SP, dest_off.Int32Value());
-  LoadFromOffset(scratch.AsXRegister(), SP, in_off.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), SP, dest_off.Int32Value() + 8);
-}
-
-// Load routines.
-void Arm64Assembler::LoadImmediate(XRegister dest, int32_t value,
-                                   Condition cond) {
-  if ((cond == al) || (cond == nv)) {
-    ___ Mov(reg_x(dest), value);
-  } else {
-    // temp = value
-    // rd = cond ? temp : rd
-    if (value != 0) {
-      vixl::UseScratchRegisterScope temps(vixl_masm_);
-      temps.Exclude(reg_x(dest));
-      vixl::Register temp = temps.AcquireX();
-      ___ Mov(temp, value);
-      ___ Csel(reg_x(dest), temp, reg_x(dest), cond);
-    } else {
-      ___ Csel(reg_x(dest), reg_x(XZR), reg_x(dest), cond);
-    }
-  }
-}
-
-void Arm64Assembler::LoadWFromOffset(LoadOperandType type, WRegister dest,
-                                     XRegister base, int32_t offset) {
-  switch (type) {
-    case kLoadSignedByte:
-      ___ Ldrsb(reg_w(dest), MEM_OP(reg_x(base), offset));
-      break;
-    case kLoadSignedHalfword:
-      ___ Ldrsh(reg_w(dest), MEM_OP(reg_x(base), offset));
-      break;
-    case kLoadUnsignedByte:
-      ___ Ldrb(reg_w(dest), MEM_OP(reg_x(base), offset));
-      break;
-    case kLoadUnsignedHalfword:
-      ___ Ldrh(reg_w(dest), MEM_OP(reg_x(base), offset));
-      break;
-    case kLoadWord:
-      ___ Ldr(reg_w(dest), MEM_OP(reg_x(base), offset));
-      break;
-    default:
-        LOG(FATAL) << "UNREACHABLE";
-  }
-}
-
-// Note: We can extend this member by adding load type info - see
-// sign extended A64 load variants.
-void Arm64Assembler::LoadFromOffset(XRegister dest, XRegister base,
-                                    int32_t offset) {
-  CHECK_NE(dest, SP);
-  ___ Ldr(reg_x(dest), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::LoadSFromOffset(SRegister dest, XRegister base,
-                                     int32_t offset) {
-  ___ Ldr(reg_s(dest), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::LoadDFromOffset(DRegister dest, XRegister base,
-                                     int32_t offset) {
-  ___ Ldr(reg_d(dest), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::Load(Arm64ManagedRegister dest, XRegister base,
-                          int32_t offset, size_t size) {
-  if (dest.IsNoRegister()) {
-    CHECK_EQ(0u, size) << dest;
-  } else if (dest.IsWRegister()) {
-    CHECK_EQ(4u, size) << dest;
-    ___ Ldr(reg_w(dest.AsWRegister()), MEM_OP(reg_x(base), offset));
-  } else if (dest.IsXRegister()) {
-    CHECK_NE(dest.AsXRegister(), SP) << dest;
-    if (size == 4u) {
-      ___ Ldr(reg_w(dest.AsOverlappingWRegister()), MEM_OP(reg_x(base), offset));
-    } else {
-      CHECK_EQ(8u, size) << dest;
-      ___ Ldr(reg_x(dest.AsXRegister()), MEM_OP(reg_x(base), offset));
-    }
-  } else if (dest.IsSRegister()) {
-    ___ Ldr(reg_s(dest.AsSRegister()), MEM_OP(reg_x(base), offset));
-  } else {
-    CHECK(dest.IsDRegister()) << dest;
-    ___ Ldr(reg_d(dest.AsDRegister()), MEM_OP(reg_x(base), offset));
-  }
-}
-
-void Arm64Assembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) {
-  return Load(m_dst.AsArm64(), SP, src.Int32Value(), size);
-}
-
-void Arm64Assembler::LoadFromThread64(ManagedRegister m_dst, ThreadOffset<8> src, size_t size) {
-  return Load(m_dst.AsArm64(), TR, src.Int32Value(), size);
-}
-
-void Arm64Assembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) {
-  Arm64ManagedRegister dst = m_dst.AsArm64();
-  CHECK(dst.IsXRegister()) << dst;
-  LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), SP, offs.Int32Value());
-}
-
-void Arm64Assembler::LoadRef(ManagedRegister m_dst, ManagedRegister m_base, MemberOffset offs,
-                             bool unpoison_reference) {
-  Arm64ManagedRegister dst = m_dst.AsArm64();
-  Arm64ManagedRegister base = m_base.AsArm64();
-  CHECK(dst.IsXRegister() && base.IsXRegister());
-  LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), base.AsXRegister(),
-                  offs.Int32Value());
-  if (unpoison_reference) {
-    WRegister ref_reg = dst.AsOverlappingWRegister();
-    MaybeUnpoisonHeapReference(reg_w(ref_reg));
-  }
-}
-
 void Arm64Assembler::LoadRawPtr(ManagedRegister m_dst, ManagedRegister m_base, Offset offs) {
   Arm64ManagedRegister dst = m_dst.AsArm64();
   Arm64ManagedRegister base = m_base.AsArm64();
   CHECK(dst.IsXRegister() && base.IsXRegister());
   // Remove dst and base form the temp list - higher level API uses IP1, IP0.
-  vixl::UseScratchRegisterScope temps(vixl_masm_);
+  UseScratchRegisterScope temps(&vixl_masm_);
   temps.Exclude(reg_x(dst.AsXRegister()), reg_x(base.AsXRegister()));
   ___ Ldr(reg_x(dst.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
 }
 
-void Arm64Assembler::LoadRawPtrFromThread64(ManagedRegister m_dst, ThreadOffset<8> offs) {
-  Arm64ManagedRegister dst = m_dst.AsArm64();
-  CHECK(dst.IsXRegister()) << dst;
-  LoadFromOffset(dst.AsXRegister(), TR, offs.Int32Value());
-}
-
-// Copying routines.
-void Arm64Assembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t size) {
-  Arm64ManagedRegister dst = m_dst.AsArm64();
-  Arm64ManagedRegister src = m_src.AsArm64();
-  if (!dst.Equals(src)) {
-    if (dst.IsXRegister()) {
-      if (size == 4) {
-        CHECK(src.IsWRegister());
-        ___ Mov(reg_w(dst.AsOverlappingWRegister()), reg_w(src.AsWRegister()));
-      } else {
-        if (src.IsXRegister()) {
-          ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsXRegister()));
-        } else {
-          ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsOverlappingXRegister()));
-        }
-      }
-    } else if (dst.IsWRegister()) {
-      CHECK(src.IsWRegister()) << src;
-      ___ Mov(reg_w(dst.AsWRegister()), reg_w(src.AsWRegister()));
-    } else if (dst.IsSRegister()) {
-      CHECK(src.IsSRegister()) << src;
-      ___ Fmov(reg_s(dst.AsSRegister()), reg_s(src.AsSRegister()));
-    } else {
-      CHECK(dst.IsDRegister()) << dst;
-      CHECK(src.IsDRegister()) << src;
-      ___ Fmov(reg_d(dst.AsDRegister()), reg_d(src.AsDRegister()));
-    }
-  }
-}
-
-void Arm64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                          ThreadOffset<8> tr_offs,
-                                          ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
-}
-
-void Arm64Assembler::CopyRawPtrToThread64(ThreadOffset<8> tr_offs,
-                                        FrameOffset fr_offs,
-                                        ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
-}
-
-void Arm64Assembler::CopyRef(FrameOffset dest, FrameOffset src,
-                             ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(),
-                  SP, src.Int32Value());
-  StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(),
-                 SP, dest.Int32Value());
-}
-
-void Arm64Assembler::Copy(FrameOffset dest, FrameOffset src,
-                          ManagedRegister m_scratch, size_t size) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP, src.Int32Value());
-    StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP, dest.Int32Value());
-  } else if (size == 8) {
-    LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value());
-    StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value());
-  } else {
-    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
-  }
-}
-
-void Arm64Assembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
-                          ManagedRegister m_scratch, size_t size) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  Arm64ManagedRegister base = src_base.AsArm64();
-  CHECK(base.IsXRegister()) << base;
-  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    LoadWFromOffset(kLoadWord, scratch.AsWRegister(), base.AsXRegister(),
-                   src_offset.Int32Value());
-    StoreWToOffset(kStoreWord, scratch.AsWRegister(), SP, dest.Int32Value());
-  } else if (size == 8) {
-    LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), src_offset.Int32Value());
-    StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value());
-  } else {
-    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
-  }
-}
-
-void Arm64Assembler::Copy(ManagedRegister m_dest_base, Offset dest_offs, FrameOffset src,
-                          ManagedRegister m_scratch, size_t size) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  Arm64ManagedRegister base = m_dest_base.AsArm64();
-  CHECK(base.IsXRegister()) << base;
-  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    LoadWFromOffset(kLoadWord, scratch.AsWRegister(), SP, src.Int32Value());
-    StoreWToOffset(kStoreWord, scratch.AsWRegister(), base.AsXRegister(),
-                   dest_offs.Int32Value());
-  } else if (size == 8) {
-    LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value());
-    StoreToOffset(scratch.AsXRegister(), base.AsXRegister(), dest_offs.Int32Value());
-  } else {
-    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
-  }
-}
-
-void Arm64Assembler::Copy(FrameOffset /*dst*/, FrameOffset /*src_base*/, Offset /*src_offset*/,
-                          ManagedRegister /*mscratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant";
-}
-
-void Arm64Assembler::Copy(ManagedRegister m_dest, Offset dest_offset,
-                          ManagedRegister m_src, Offset src_offset,
-                          ManagedRegister m_scratch, size_t size) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  Arm64ManagedRegister src = m_src.AsArm64();
-  Arm64ManagedRegister dest = m_dest.AsArm64();
-  CHECK(dest.IsXRegister()) << dest;
-  CHECK(src.IsXRegister()) << src;
-  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    if (scratch.IsWRegister()) {
-      LoadWFromOffset(kLoadWord, scratch.AsWRegister(), src.AsXRegister(),
-                    src_offset.Int32Value());
-      StoreWToOffset(kStoreWord, scratch.AsWRegister(), dest.AsXRegister(),
-                   dest_offset.Int32Value());
-    } else {
-      LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), src.AsXRegister(),
-                    src_offset.Int32Value());
-      StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), dest.AsXRegister(),
-                   dest_offset.Int32Value());
-    }
-  } else if (size == 8) {
-    LoadFromOffset(scratch.AsXRegister(), src.AsXRegister(), src_offset.Int32Value());
-    StoreToOffset(scratch.AsXRegister(), dest.AsXRegister(), dest_offset.Int32Value());
-  } else {
-    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
-  }
-}
-
-void Arm64Assembler::Copy(FrameOffset /*dst*/, Offset /*dest_offset*/,
-                          FrameOffset /*src*/, Offset /*src_offset*/,
-                          ManagedRegister /*scratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant";
-}
-
-void Arm64Assembler::MemoryBarrier(ManagedRegister m_scratch ATTRIBUTE_UNUSED) {
-  // TODO: Should we check that m_scratch is IP? - see arm.
-  ___ Dmb(vixl::InnerShareable, vixl::BarrierAll);
-}
-
-void Arm64Assembler::SignExtend(ManagedRegister mreg, size_t size) {
-  Arm64ManagedRegister reg = mreg.AsArm64();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsWRegister()) << reg;
-  if (size == 1) {
-    ___ Sxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
-  } else {
-    ___ Sxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
-  }
-}
-
-void Arm64Assembler::ZeroExtend(ManagedRegister mreg, size_t size) {
-  Arm64ManagedRegister reg = mreg.AsArm64();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsWRegister()) << reg;
-  if (size == 1) {
-    ___ Uxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
-  } else {
-    ___ Uxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
-  }
-}
-
-void Arm64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references.
-}
-
-void Arm64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references.
-}
-
-void Arm64Assembler::Call(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) {
-  Arm64ManagedRegister base = m_base.AsArm64();
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(base.IsXRegister()) << base;
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), offs.Int32Value());
-  ___ Blr(reg_x(scratch.AsXRegister()));
-}
-
 void Arm64Assembler::JumpTo(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) {
   Arm64ManagedRegister base = m_base.AsArm64();
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(base.IsXRegister()) << base;
   CHECK(scratch.IsXRegister()) << scratch;
   // Remove base and scratch form the temp list - higher level API uses IP1, IP0.
-  vixl::UseScratchRegisterScope temps(vixl_masm_);
+  UseScratchRegisterScope temps(&vixl_masm_);
   temps.Exclude(reg_x(base.AsXRegister()), reg_x(scratch.AsXRegister()));
   ___ Ldr(reg_x(scratch.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
   ___ Br(reg_x(scratch.AsXRegister()));
 }
 
-void Arm64Assembler::Call(FrameOffset base, Offset offs, ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  // Call *(*(SP + base) + offset)
-  LoadFromOffset(scratch.AsXRegister(), SP, base.Int32Value());
-  LoadFromOffset(scratch.AsXRegister(), scratch.AsXRegister(), offs.Int32Value());
-  ___ Blr(reg_x(scratch.AsXRegister()));
-}
-
-void Arm64Assembler::CallFromThread64(ThreadOffset<8> /*offset*/, ManagedRegister /*scratch*/) {
-  UNIMPLEMENTED(FATAL) << "Unimplemented Call() variant";
-}
-
-void Arm64Assembler::CreateHandleScopeEntry(
-    ManagedRegister m_out_reg, FrameOffset handle_scope_offs, ManagedRegister m_in_reg,
-    bool null_allowed) {
-  Arm64ManagedRegister out_reg = m_out_reg.AsArm64();
-  Arm64ManagedRegister in_reg = m_in_reg.AsArm64();
-  // For now we only hold stale handle scope entries in x registers.
-  CHECK(in_reg.IsNoRegister() || in_reg.IsXRegister()) << in_reg;
-  CHECK(out_reg.IsXRegister()) << out_reg;
-  if (null_allowed) {
-    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
-    // the address in the handle scope holding the reference.
-    // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
-    if (in_reg.IsNoRegister()) {
-      LoadWFromOffset(kLoadWord, out_reg.AsOverlappingWRegister(), SP,
-                      handle_scope_offs.Int32Value());
-      in_reg = out_reg;
-    }
-    ___ Cmp(reg_w(in_reg.AsOverlappingWRegister()), 0);
-    if (!out_reg.Equals(in_reg)) {
-      LoadImmediate(out_reg.AsXRegister(), 0, eq);
-    }
-    AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), ne);
-  } else {
-    AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), al);
-  }
-}
-
-void Arm64Assembler::CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handle_scope_offset,
-                                            ManagedRegister m_scratch, bool null_allowed) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  if (null_allowed) {
-    LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP,
-                    handle_scope_offset.Int32Value());
-    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
-    // the address in the handle scope holding the reference.
-    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
-    ___ Cmp(reg_w(scratch.AsOverlappingWRegister()), 0);
-    // Move this logic in add constants with flags.
-    AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), ne);
-  } else {
-    AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), al);
-  }
-  StoreToOffset(scratch.AsXRegister(), SP, out_off.Int32Value());
-}
-
-void Arm64Assembler::LoadReferenceFromHandleScope(ManagedRegister m_out_reg,
-                                                  ManagedRegister m_in_reg) {
-  Arm64ManagedRegister out_reg = m_out_reg.AsArm64();
-  Arm64ManagedRegister in_reg = m_in_reg.AsArm64();
-  CHECK(out_reg.IsXRegister()) << out_reg;
-  CHECK(in_reg.IsXRegister()) << in_reg;
-  vixl::Label exit;
-  if (!out_reg.Equals(in_reg)) {
-    // FIXME: Who sets the flags here?
-    LoadImmediate(out_reg.AsXRegister(), 0, eq);
-  }
-  ___ Cbz(reg_x(in_reg.AsXRegister()), &exit);
-  LoadFromOffset(out_reg.AsXRegister(), in_reg.AsXRegister(), 0);
-  ___ Bind(&exit);
-}
-
-void Arm64Assembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjust) {
-  CHECK_ALIGNED(stack_adjust, kStackAlignment);
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  Arm64Exception *current_exception = new Arm64Exception(scratch, stack_adjust);
-  exception_blocks_.push_back(current_exception);
-  LoadFromOffset(scratch.AsXRegister(), TR, Thread::ExceptionOffset<8>().Int32Value());
-  ___ Cbnz(reg_x(scratch.AsXRegister()), current_exception->Entry());
-}
-
-void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) {
-  vixl::UseScratchRegisterScope temps(vixl_masm_);
-  temps.Exclude(reg_x(exception->scratch_.AsXRegister()));
-  vixl::Register temp = temps.AcquireX();
-
-  // Bind exception poll entry.
-  ___ Bind(exception->Entry());
-  if (exception->stack_adjust_ != 0) {  // Fix up the frame.
-    DecreaseFrameSize(exception->stack_adjust_);
-  }
-  // Pass exception object as argument.
-  // Don't care about preserving X0 as this won't return.
-  ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsXRegister()));
-  ___ Ldr(temp, MEM_OP(reg_x(TR), QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value()));
-
-  ___ Blr(temp);
-  // Call should never return.
-  ___ Brk();
-}
-
 static inline dwarf::Reg DWARFReg(CPURegister reg) {
   if (reg.IsFPRegister()) {
-    return dwarf::Reg::Arm64Fp(reg.code());
+    return dwarf::Reg::Arm64Fp(reg.GetCode());
   } else {
-    DCHECK_LT(reg.code(), 31u);  // X0 - X30.
-    return dwarf::Reg::Arm64Core(reg.code());
+    DCHECK_LT(reg.GetCode(), 31u);  // X0 - X30.
+    return dwarf::Reg::Arm64Core(reg.GetCode());
   }
 }
 
-void Arm64Assembler::SpillRegisters(vixl::CPURegList registers, int offset) {
-  int size = registers.RegisterSizeInBytes();
-  const Register sp = vixl_masm_->StackPointer();
-  while (registers.Count() >= 2) {
+void Arm64Assembler::SpillRegisters(CPURegList registers, int offset) {
+  int size = registers.GetRegisterSizeInBytes();
+  const Register sp = vixl_masm_.StackPointer();
+  // Since we are operating on register pairs, we would like to align on
+  // double the standard size; on the other hand, we don't want to insert
+  // an extra store, which will happen if the number of registers is even.
+  if (!IsAlignedParam(offset, 2 * size) && registers.GetCount() % 2 != 0) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    ___ Str(dst0, MemOperand(sp, offset));
+    cfi_.RelOffset(DWARFReg(dst0), offset);
+    offset += size;
+  }
+  while (registers.GetCount() >= 2) {
     const CPURegister& dst0 = registers.PopLowestIndex();
     const CPURegister& dst1 = registers.PopLowestIndex();
     ___ Stp(dst0, dst1, MemOperand(sp, offset));
@@ -667,10 +108,17 @@
   DCHECK(registers.IsEmpty());
 }
 
-void Arm64Assembler::UnspillRegisters(vixl::CPURegList registers, int offset) {
-  int size = registers.RegisterSizeInBytes();
-  const Register sp = vixl_masm_->StackPointer();
-  while (registers.Count() >= 2) {
+void Arm64Assembler::UnspillRegisters(CPURegList registers, int offset) {
+  int size = registers.GetRegisterSizeInBytes();
+  const Register sp = vixl_masm_.StackPointer();
+  // Be consistent with the logic for spilling registers.
+  if (!IsAlignedParam(offset, 2 * size) && registers.GetCount() % 2 != 0) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    ___ Ldr(dst0, MemOperand(sp, offset));
+    cfi_.Restore(DWARFReg(dst0));
+    offset += size;
+  }
+  while (registers.GetCount() >= 2) {
     const CPURegister& dst0 = registers.PopLowestIndex();
     const CPURegister& dst1 = registers.PopLowestIndex();
     ___ Ldp(dst0, dst1, MemOperand(sp, offset));
@@ -686,117 +134,25 @@
   DCHECK(registers.IsEmpty());
 }
 
-void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                                const std::vector<ManagedRegister>& callee_save_regs,
-                                const ManagedRegisterEntrySpills& entry_spills) {
-  // Setup VIXL CPURegList for callee-saves.
-  CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
-  CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0);
-  for (auto r : callee_save_regs) {
-    Arm64ManagedRegister reg = r.AsArm64();
-    if (reg.IsXRegister()) {
-      core_reg_list.Combine(reg_x(reg.AsXRegister()).code());
-    } else {
-      DCHECK(reg.IsDRegister());
-      fp_reg_list.Combine(reg_d(reg.AsDRegister()).code());
-    }
-  }
-  size_t core_reg_size = core_reg_list.TotalSizeInBytes();
-  size_t fp_reg_size = fp_reg_list.TotalSizeInBytes();
-
-  // Increase frame to required size.
-  DCHECK_ALIGNED(frame_size, kStackAlignment);
-  DCHECK_GE(frame_size, core_reg_size + fp_reg_size + kArm64PointerSize);
-  IncreaseFrameSize(frame_size);
-
-  // Save callee-saves.
-  SpillRegisters(core_reg_list, frame_size - core_reg_size);
-  SpillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
-
-  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
-
-  // Write ArtMethod*
-  DCHECK(X0 == method_reg.AsArm64().AsXRegister());
-  StoreToOffset(X0, SP, 0);
-
-  // Write out entry spills
-  int32_t offset = frame_size + kArm64PointerSize;
-  for (size_t i = 0; i < entry_spills.size(); ++i) {
-    Arm64ManagedRegister reg = entry_spills.at(i).AsArm64();
-    if (reg.IsNoRegister()) {
-      // only increment stack offset.
-      ManagedRegisterSpill spill = entry_spills.at(i);
-      offset += spill.getSize();
-    } else if (reg.IsXRegister()) {
-      StoreToOffset(reg.AsXRegister(), SP, offset);
-      offset += 8;
-    } else if (reg.IsWRegister()) {
-      StoreWToOffset(kStoreWord, reg.AsWRegister(), SP, offset);
-      offset += 4;
-    } else if (reg.IsDRegister()) {
-      StoreDToOffset(reg.AsDRegister(), SP, offset);
-      offset += 8;
-    } else if (reg.IsSRegister()) {
-      StoreSToOffset(reg.AsSRegister(), SP, offset);
-      offset += 4;
-    }
-  }
-}
-
-void Arm64Assembler::RemoveFrame(size_t frame_size,
-                                 const std::vector<ManagedRegister>& callee_save_regs) {
-  // Setup VIXL CPURegList for callee-saves.
-  CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
-  CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0);
-  for (auto r : callee_save_regs) {
-    Arm64ManagedRegister reg = r.AsArm64();
-    if (reg.IsXRegister()) {
-      core_reg_list.Combine(reg_x(reg.AsXRegister()).code());
-    } else {
-      DCHECK(reg.IsDRegister());
-      fp_reg_list.Combine(reg_d(reg.AsDRegister()).code());
-    }
-  }
-  size_t core_reg_size = core_reg_list.TotalSizeInBytes();
-  size_t fp_reg_size = fp_reg_list.TotalSizeInBytes();
-
-  // For now we only check that the size of the frame is large enough to hold spills and method
-  // reference.
-  DCHECK_GE(frame_size, core_reg_size + fp_reg_size + kArm64PointerSize);
-  DCHECK_ALIGNED(frame_size, kStackAlignment);
-
-  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
-
-  cfi_.RememberState();
-
-  // Restore callee-saves.
-  UnspillRegisters(core_reg_list, frame_size - core_reg_size);
-  UnspillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
-
-  // Decrease frame size to start of callee saved regs.
-  DecreaseFrameSize(frame_size);
-
-  // Pop callee saved and return to LR.
-  ___ Ret();
-
-  // The CFI should be restored for any code that follows the exit block.
-  cfi_.RestoreState();
-  cfi_.DefCFAOffset(frame_size);
-}
-
-void Arm64Assembler::PoisonHeapReference(vixl::Register reg) {
+void Arm64Assembler::PoisonHeapReference(Register reg) {
   DCHECK(reg.IsW());
   // reg = -reg.
-  ___ Neg(reg, vixl::Operand(reg));
+  ___ Neg(reg, Operand(reg));
 }
 
-void Arm64Assembler::UnpoisonHeapReference(vixl::Register reg) {
+void Arm64Assembler::UnpoisonHeapReference(Register reg) {
   DCHECK(reg.IsW());
   // reg = -reg.
-  ___ Neg(reg, vixl::Operand(reg));
+  ___ Neg(reg, Operand(reg));
 }
 
-void Arm64Assembler::MaybeUnpoisonHeapReference(vixl::Register reg) {
+void Arm64Assembler::MaybePoisonHeapReference(Register reg) {
+  if (kPoisonHeapReferences) {
+    PoisonHeapReference(reg);
+  }
+}
+
+void Arm64Assembler::MaybeUnpoisonHeapReference(Register reg) {
   if (kPoisonHeapReferences) {
     UnpoisonHeapReference(reg);
   }
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 7b25b8f..66a7fed 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -21,25 +21,23 @@
 #include <memory>
 #include <vector>
 
+#include "base/arena_containers.h"
 #include "base/logging.h"
-#include "constants_arm64.h"
 #include "utils/arm64/managed_register_arm64.h"
 #include "utils/assembler.h"
 #include "offsets.h"
 
-// TODO: make vixl clean wrt -Wshadow.
+// TODO(VIXL): Make VIXL compile with -Wshadow.
 #pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunknown-pragmas"
 #pragma GCC diagnostic ignored "-Wshadow"
-#pragma GCC diagnostic ignored "-Wmissing-noreturn"
-#include "vixl/a64/macro-assembler-a64.h"
-#include "vixl/a64/disasm-a64.h"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
 #pragma GCC diagnostic pop
 
 namespace art {
 namespace arm64 {
 
-#define MEM_OP(...)      vixl::MemOperand(__VA_ARGS__)
+#define MEM_OP(...)      vixl::aarch64::MemOperand(__VA_ARGS__)
 
 enum LoadOperandType {
   kLoadSignedByte,
@@ -61,17 +59,13 @@
   kStoreDWord
 };
 
-class Arm64Exception;
-
 class Arm64Assembler FINAL : public Assembler {
  public:
-  // We indicate the size of the initial code generation buffer to the VIXL
-  // assembler. From there we it will automatically manage the buffer.
-  Arm64Assembler() : vixl_masm_(new vixl::MacroAssembler(kArm64BaseBufferSize)) {}
+  explicit Arm64Assembler(ArenaAllocator* arena) : Assembler(arena) {}
 
-  virtual ~Arm64Assembler() {
-    delete vixl_masm_;
-  }
+  virtual ~Arm64Assembler() {}
+
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return &vixl_masm_; }
 
   // Finalize the code.
   void FinalizeCode() OVERRIDE;
@@ -83,115 +77,26 @@
   // Copy instructions out of assembly buffer into the given region of memory.
   void FinalizeInstructions(const MemoryRegion& region);
 
-  void SpillRegisters(vixl::CPURegList registers, int offset);
-  void UnspillRegisters(vixl::CPURegList registers, int offset);
+  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs);
 
-  // Emit code that will create an activation on the stack.
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
-
-  // Emit code that will remove an activation from the stack.
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
-      OVERRIDE;
-
-  void IncreaseFrameSize(size_t adjust) OVERRIDE;
-  void DecreaseFrameSize(size_t adjust) OVERRIDE;
-
-  // Store routines.
-  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
-  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
-  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
-  void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, ManagedRegister scratch)
-      OVERRIDE;
-  void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister scratch) OVERRIDE;
-  void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE;
-  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
-                     ManagedRegister scratch) OVERRIDE;
-
-  // Load routines.
-  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
-  void LoadFromThread64(ManagedRegister dest, ThreadOffset<8> src, size_t size) OVERRIDE;
-  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
-  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-               bool unpoison_reference) OVERRIDE;
-  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
-  void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset<8> offs) OVERRIDE;
-
-  // Copying routines.
-  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
-  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs,
-                              ManagedRegister scratch) OVERRIDE;
-  void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
-      OVERRIDE;
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
-  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
-
-  // Sign extension.
-  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Zero extension.
-  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Exploit fast access in managed code to Thread::Current().
-  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
-
-  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the handle scope entry to see if the value is
-  // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                       ManagedRegister in_reg, bool null_allowed) OVERRIDE;
-
-  // Set up out_off to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                       ManagedRegister scratch, bool null_allowed) OVERRIDE;
-
-  // src holds a handle scope entry (Object**) load this into dst.
-  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
-
-  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
-  // know that src may not be null.
-  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
-  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
-
-  // Call to address held at [base+offset].
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset<8> offset, ManagedRegister scratch) OVERRIDE;
+  void SpillRegisters(vixl::aarch64::CPURegList registers, int offset);
+  void UnspillRegisters(vixl::aarch64::CPURegList registers, int offset);
 
   // Jump to address (not setting link register)
   void JumpTo(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch);
 
-  // Generate code to check if Thread::Current()->exception_ is non-null
-  // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
-
   //
   // Heap poisoning.
   //
 
   // Poison a heap reference contained in `reg`.
-  void PoisonHeapReference(vixl::Register reg);
+  void PoisonHeapReference(vixl::aarch64::Register reg);
   // Unpoison a heap reference contained in `reg`.
-  void UnpoisonHeapReference(vixl::Register reg);
+  void UnpoisonHeapReference(vixl::aarch64::Register reg);
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(vixl::aarch64::Register reg);
   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
-  void MaybeUnpoisonHeapReference(vixl::Register reg);
+  void MaybeUnpoisonHeapReference(vixl::aarch64::Register reg);
 
   void Bind(Label* label ATTRIBUTE_UNUSED) OVERRIDE {
     UNIMPLEMENTED(FATAL) << "Do not use Bind for ARM64";
@@ -200,85 +105,42 @@
     UNIMPLEMENTED(FATAL) << "Do not use Jump for ARM64";
   }
 
- private:
-  static vixl::Register reg_x(int code) {
+  static vixl::aarch64::Register reg_x(int code) {
     CHECK(code < kNumberOfXRegisters) << code;
     if (code == SP) {
-      return vixl::sp;
+      return vixl::aarch64::sp;
     } else if (code == XZR) {
-      return vixl::xzr;
+      return vixl::aarch64::xzr;
     }
-    return vixl::Register::XRegFromCode(code);
+    return vixl::aarch64::Register::GetXRegFromCode(code);
   }
 
-  static vixl::Register reg_w(int code) {
+  static vixl::aarch64::Register reg_w(int code) {
     CHECK(code < kNumberOfWRegisters) << code;
     if (code == WSP) {
-      return vixl::wsp;
+      return vixl::aarch64::wsp;
     } else if (code == WZR) {
-      return vixl::wzr;
+      return vixl::aarch64::wzr;
     }
-    return vixl::Register::WRegFromCode(code);
+    return vixl::aarch64::Register::GetWRegFromCode(code);
   }
 
-  static vixl::FPRegister reg_d(int code) {
-    return vixl::FPRegister::DRegFromCode(code);
+  static vixl::aarch64::FPRegister reg_d(int code) {
+    return vixl::aarch64::FPRegister::GetDRegFromCode(code);
   }
 
-  static vixl::FPRegister reg_s(int code) {
-    return vixl::FPRegister::SRegFromCode(code);
+  static vixl::aarch64::FPRegister reg_s(int code) {
+    return vixl::aarch64::FPRegister::GetSRegFromCode(code);
   }
 
-  // Emits Exception block.
-  void EmitExceptionPoll(Arm64Exception *exception);
-
-  void StoreWToOffset(StoreOperandType type, WRegister source,
-                      XRegister base, int32_t offset);
-  void StoreToOffset(XRegister source, XRegister base, int32_t offset);
-  void StoreSToOffset(SRegister source, XRegister base, int32_t offset);
-  void StoreDToOffset(DRegister source, XRegister base, int32_t offset);
-
-  void LoadImmediate(XRegister dest, int32_t value, vixl::Condition cond = vixl::al);
-  void Load(Arm64ManagedRegister dst, XRegister src, int32_t src_offset, size_t size);
-  void LoadWFromOffset(LoadOperandType type, WRegister dest,
-                      XRegister base, int32_t offset);
-  void LoadFromOffset(XRegister dest, XRegister base, int32_t offset);
-  void LoadSFromOffset(SRegister dest, XRegister base, int32_t offset);
-  void LoadDFromOffset(DRegister dest, XRegister base, int32_t offset);
-  void AddConstant(XRegister rd, int32_t value, vixl::Condition cond = vixl::al);
-  void AddConstant(XRegister rd, XRegister rn, int32_t value, vixl::Condition cond = vixl::al);
-
-  // List of exception blocks to generate at the end of the code cache.
-  std::vector<Arm64Exception*> exception_blocks_;
-
- public:
-  // Vixl assembler.
-  vixl::MacroAssembler* const vixl_masm_;
+ private:
+  // VIXL assembler.
+  vixl::aarch64::MacroAssembler vixl_masm_;
 
   // Used for testing.
   friend class Arm64ManagedRegister_VixlRegisters_Test;
 };
 
-class Arm64Exception {
- private:
-  Arm64Exception(Arm64ManagedRegister scratch, size_t stack_adjust)
-      : scratch_(scratch), stack_adjust_(stack_adjust) {
-    }
-
-  vixl::Label* Entry() { return &exception_entry_; }
-
-  // Register used for passing Thread::Current()->exception_ .
-  const Arm64ManagedRegister scratch_;
-
-  // Stack adjust for ExceptionPool.
-  const size_t stack_adjust_;
-
-  vixl::Label exception_entry_;
-
-  friend class Arm64Assembler;
-  DISALLOW_COPY_AND_ASSIGN(Arm64Exception);
-};
-
 }  // namespace arm64
 }  // namespace art
 
diff --git a/compiler/utils/arm64/constants_arm64.h b/compiler/utils/arm64/constants_arm64.h
deleted file mode 100644
index 01e8be9..0000000
--- a/compiler/utils/arm64/constants_arm64.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_
-#define ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_
-
-#include <stdint.h>
-#include <iosfwd>
-#include "arch/arm64/registers_arm64.h"
-#include "base/casts.h"
-#include "base/logging.h"
-#include "globals.h"
-
-// TODO: Extend this file by adding missing functionality.
-
-namespace art {
-namespace arm64 {
-
-constexpr size_t kArm64BaseBufferSize = 4096;
-
-}  // namespace arm64
-}  // namespace art
-
-#endif  // ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
new file mode 100644
index 0000000..dfdcd11
--- /dev/null
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -0,0 +1,754 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler_arm64.h"
+
+#include "base/logging.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "managed_register_arm64.h"
+#include "offsets.h"
+#include "thread.h"
+
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
+
+namespace art {
+namespace arm64 {
+
+#ifdef ___
+#error "ARM64 Assembler macro already defined."
+#else
+#define ___   asm_.GetVIXLAssembler()->
+#endif
+
+#define reg_x(X) Arm64Assembler::reg_x(X)
+#define reg_w(W) Arm64Assembler::reg_w(W)
+#define reg_d(D) Arm64Assembler::reg_d(D)
+#define reg_s(S) Arm64Assembler::reg_s(S)
+
+Arm64JNIMacroAssembler::~Arm64JNIMacroAssembler() {
+}
+
+void Arm64JNIMacroAssembler::FinalizeCode() {
+  for (const std::unique_ptr<Arm64Exception>& exception : exception_blocks_) {
+    EmitExceptionPoll(exception.get());
+  }
+  ___ FinalizeCode();
+}
+
+void Arm64JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
+  ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(TR));
+}
+
+void Arm64JNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) {
+  StoreToOffset(TR, SP, offset.Int32Value());
+}
+
+// See Arm64 PCS Section 5.2.2.1.
+void Arm64JNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  AddConstant(SP, -adjust);
+  cfi().AdjustCFAOffset(adjust);
+}
+
+// See Arm64 PCS Section 5.2.2.1.
+void Arm64JNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  AddConstant(SP, adjust);
+  cfi().AdjustCFAOffset(-adjust);
+}
+
+void Arm64JNIMacroAssembler::AddConstant(XRegister rd, int32_t value, Condition cond) {
+  AddConstant(rd, rd, value, cond);
+}
+
+void Arm64JNIMacroAssembler::AddConstant(XRegister rd,
+                                         XRegister rn,
+                                         int32_t value,
+                                         Condition cond) {
+  if ((cond == al) || (cond == nv)) {
+    // VIXL macro-assembler handles all variants.
+    ___ Add(reg_x(rd), reg_x(rn), value);
+  } else {
+    // temp = rd + value
+    // rd = cond ? temp : rn
+    UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+    temps.Exclude(reg_x(rd), reg_x(rn));
+    Register temp = temps.AcquireX();
+    ___ Add(temp, reg_x(rn), value);
+    ___ Csel(reg_x(rd), temp, reg_x(rd), cond);
+  }
+}
+
+void Arm64JNIMacroAssembler::StoreWToOffset(StoreOperandType type,
+                                            WRegister source,
+                                            XRegister base,
+                                            int32_t offset) {
+  switch (type) {
+    case kStoreByte:
+      ___ Strb(reg_w(source), MEM_OP(reg_x(base), offset));
+      break;
+    case kStoreHalfword:
+      ___ Strh(reg_w(source), MEM_OP(reg_x(base), offset));
+      break;
+    case kStoreWord:
+      ___ Str(reg_w(source), MEM_OP(reg_x(base), offset));
+      break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+void Arm64JNIMacroAssembler::StoreToOffset(XRegister source, XRegister base, int32_t offset) {
+  CHECK_NE(source, SP);
+  ___ Str(reg_x(source), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::StoreSToOffset(SRegister source, XRegister base, int32_t offset) {
+  ___ Str(reg_s(source), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::StoreDToOffset(DRegister source, XRegister base, int32_t offset) {
+  ___ Str(reg_d(source), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister m_src, size_t size) {
+  Arm64ManagedRegister src = m_src.AsArm64();
+  if (src.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (src.IsWRegister()) {
+    CHECK_EQ(4u, size);
+    StoreWToOffset(kStoreWord, src.AsWRegister(), SP, offs.Int32Value());
+  } else if (src.IsXRegister()) {
+    CHECK_EQ(8u, size);
+    StoreToOffset(src.AsXRegister(), SP, offs.Int32Value());
+  } else if (src.IsSRegister()) {
+    StoreSToOffset(src.AsSRegister(), SP, offs.Int32Value());
+  } else {
+    CHECK(src.IsDRegister()) << src;
+    StoreDToOffset(src.AsDRegister(), SP, offs.Int32Value());
+  }
+}
+
+void Arm64JNIMacroAssembler::StoreRef(FrameOffset offs, ManagedRegister m_src) {
+  Arm64ManagedRegister src = m_src.AsArm64();
+  CHECK(src.IsXRegister()) << src;
+  StoreWToOffset(kStoreWord, src.AsOverlappingWRegister(), SP,
+                 offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) {
+  Arm64ManagedRegister src = m_src.AsArm64();
+  CHECK(src.IsXRegister()) << src;
+  StoreToOffset(src.AsXRegister(), SP, offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset offs,
+                                                   uint32_t imm,
+                                                   ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  LoadImmediate(scratch.AsXRegister(), imm);
+  StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP,
+                 offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 tr_offs,
+                                                      FrameOffset fr_offs,
+                                                      ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  AddConstant(scratch.AsXRegister(), SP, fr_offs.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 tr_offs) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register temp = temps.AcquireX();
+  ___ Mov(temp, reg_x(SP));
+  ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
+}
+
+void Arm64JNIMacroAssembler::StoreSpanning(FrameOffset dest_off,
+                                           ManagedRegister m_source,
+                                           FrameOffset in_off,
+                                           ManagedRegister m_scratch) {
+  Arm64ManagedRegister source = m_source.AsArm64();
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  StoreToOffset(source.AsXRegister(), SP, dest_off.Int32Value());
+  LoadFromOffset(scratch.AsXRegister(), SP, in_off.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), SP, dest_off.Int32Value() + 8);
+}
+
+// Load routines.
+void Arm64JNIMacroAssembler::LoadImmediate(XRegister dest, int32_t value, Condition cond) {
+  if ((cond == al) || (cond == nv)) {
+    ___ Mov(reg_x(dest), value);
+  } else {
+    // temp = value
+    // rd = cond ? temp : rd
+    if (value != 0) {
+      UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+      temps.Exclude(reg_x(dest));
+      Register temp = temps.AcquireX();
+      ___ Mov(temp, value);
+      ___ Csel(reg_x(dest), temp, reg_x(dest), cond);
+    } else {
+      ___ Csel(reg_x(dest), reg_x(XZR), reg_x(dest), cond);
+    }
+  }
+}
+
+void Arm64JNIMacroAssembler::LoadWFromOffset(LoadOperandType type,
+                                             WRegister dest,
+                                             XRegister base,
+                                             int32_t offset) {
+  switch (type) {
+    case kLoadSignedByte:
+      ___ Ldrsb(reg_w(dest), MEM_OP(reg_x(base), offset));
+      break;
+    case kLoadSignedHalfword:
+      ___ Ldrsh(reg_w(dest), MEM_OP(reg_x(base), offset));
+      break;
+    case kLoadUnsignedByte:
+      ___ Ldrb(reg_w(dest), MEM_OP(reg_x(base), offset));
+      break;
+    case kLoadUnsignedHalfword:
+      ___ Ldrh(reg_w(dest), MEM_OP(reg_x(base), offset));
+      break;
+    case kLoadWord:
+      ___ Ldr(reg_w(dest), MEM_OP(reg_x(base), offset));
+      break;
+    default:
+        LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+// Note: We can extend this member by adding load type info - see
+// sign extended A64 load variants.
+void Arm64JNIMacroAssembler::LoadFromOffset(XRegister dest, XRegister base, int32_t offset) {
+  CHECK_NE(dest, SP);
+  ___ Ldr(reg_x(dest), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::LoadSFromOffset(SRegister dest, XRegister base, int32_t offset) {
+  ___ Ldr(reg_s(dest), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::LoadDFromOffset(DRegister dest, XRegister base, int32_t offset) {
+  ___ Ldr(reg_d(dest), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::Load(Arm64ManagedRegister dest,
+                                  XRegister base,
+                                  int32_t offset,
+                                  size_t size) {
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size) << dest;
+  } else if (dest.IsWRegister()) {
+    CHECK_EQ(4u, size) << dest;
+    ___ Ldr(reg_w(dest.AsWRegister()), MEM_OP(reg_x(base), offset));
+  } else if (dest.IsXRegister()) {
+    CHECK_NE(dest.AsXRegister(), SP) << dest;
+    if (size == 4u) {
+      ___ Ldr(reg_w(dest.AsOverlappingWRegister()), MEM_OP(reg_x(base), offset));
+    } else {
+      CHECK_EQ(8u, size) << dest;
+      ___ Ldr(reg_x(dest.AsXRegister()), MEM_OP(reg_x(base), offset));
+    }
+  } else if (dest.IsSRegister()) {
+    ___ Ldr(reg_s(dest.AsSRegister()), MEM_OP(reg_x(base), offset));
+  } else {
+    CHECK(dest.IsDRegister()) << dest;
+    ___ Ldr(reg_d(dest.AsDRegister()), MEM_OP(reg_x(base), offset));
+  }
+}
+
+void Arm64JNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) {
+  return Load(m_dst.AsArm64(), SP, src.Int32Value(), size);
+}
+
+void Arm64JNIMacroAssembler::LoadFromThread(ManagedRegister m_dst,
+                                            ThreadOffset64 src,
+                                            size_t size) {
+  return Load(m_dst.AsArm64(), TR, src.Int32Value(), size);
+}
+
+void Arm64JNIMacroAssembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) {
+  Arm64ManagedRegister dst = m_dst.AsArm64();
+  CHECK(dst.IsXRegister()) << dst;
+  LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), SP, offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::LoadRef(ManagedRegister m_dst,
+                                     ManagedRegister m_base,
+                                     MemberOffset offs,
+                                     bool unpoison_reference) {
+  Arm64ManagedRegister dst = m_dst.AsArm64();
+  Arm64ManagedRegister base = m_base.AsArm64();
+  CHECK(dst.IsXRegister() && base.IsXRegister());
+  LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), base.AsXRegister(),
+                  offs.Int32Value());
+  if (unpoison_reference) {
+    WRegister ref_reg = dst.AsOverlappingWRegister();
+    asm_.MaybeUnpoisonHeapReference(reg_w(ref_reg));
+  }
+}
+
+void Arm64JNIMacroAssembler::LoadRawPtr(ManagedRegister m_dst,
+                                        ManagedRegister m_base,
+                                        Offset offs) {
+  Arm64ManagedRegister dst = m_dst.AsArm64();
+  Arm64ManagedRegister base = m_base.AsArm64();
+  CHECK(dst.IsXRegister() && base.IsXRegister());
+  // Remove dst and base form the temp list - higher level API uses IP1, IP0.
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(reg_x(dst.AsXRegister()), reg_x(base.AsXRegister()));
+  ___ Ldr(reg_x(dst.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
+}
+
+void Arm64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset64 offs) {
+  Arm64ManagedRegister dst = m_dst.AsArm64();
+  CHECK(dst.IsXRegister()) << dst;
+  LoadFromOffset(dst.AsXRegister(), TR, offs.Int32Value());
+}
+
+// Copying routines.
+void Arm64JNIMacroAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t size) {
+  Arm64ManagedRegister dst = m_dst.AsArm64();
+  Arm64ManagedRegister src = m_src.AsArm64();
+  if (!dst.Equals(src)) {
+    if (dst.IsXRegister()) {
+      if (size == 4) {
+        CHECK(src.IsWRegister());
+        ___ Mov(reg_w(dst.AsOverlappingWRegister()), reg_w(src.AsWRegister()));
+      } else {
+        if (src.IsXRegister()) {
+          ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsXRegister()));
+        } else {
+          ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsOverlappingXRegister()));
+        }
+      }
+    } else if (dst.IsWRegister()) {
+      CHECK(src.IsWRegister()) << src;
+      ___ Mov(reg_w(dst.AsWRegister()), reg_w(src.AsWRegister()));
+    } else if (dst.IsSRegister()) {
+      CHECK(src.IsSRegister()) << src;
+      ___ Fmov(reg_s(dst.AsSRegister()), reg_s(src.AsSRegister()));
+    } else {
+      CHECK(dst.IsDRegister()) << dst;
+      CHECK(src.IsDRegister()) << src;
+      ___ Fmov(reg_d(dst.AsDRegister()), reg_d(src.AsDRegister()));
+    }
+  }
+}
+
+void Arm64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                                  ThreadOffset64 tr_offs,
+                                                  ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  LoadFromOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 tr_offs,
+                                                FrameOffset fr_offs,
+                                                ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  LoadFromOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(),
+                  SP, src.Int32Value());
+  StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(),
+                 SP, dest.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::Copy(FrameOffset dest,
+                                  FrameOffset src,
+                                  ManagedRegister m_scratch,
+                                  size_t size) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP, src.Int32Value());
+    StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP, dest.Int32Value());
+  } else if (size == 8) {
+    LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value());
+    StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value());
+  } else {
+    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+  }
+}
+
+void Arm64JNIMacroAssembler::Copy(FrameOffset dest,
+                                  ManagedRegister src_base,
+                                  Offset src_offset,
+                                  ManagedRegister m_scratch,
+                                  size_t size) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  Arm64ManagedRegister base = src_base.AsArm64();
+  CHECK(base.IsXRegister()) << base;
+  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    LoadWFromOffset(kLoadWord, scratch.AsWRegister(), base.AsXRegister(),
+                   src_offset.Int32Value());
+    StoreWToOffset(kStoreWord, scratch.AsWRegister(), SP, dest.Int32Value());
+  } else if (size == 8) {
+    LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), src_offset.Int32Value());
+    StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value());
+  } else {
+    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+  }
+}
+
+void Arm64JNIMacroAssembler::Copy(ManagedRegister m_dest_base,
+                                  Offset dest_offs,
+                                  FrameOffset src,
+                                  ManagedRegister m_scratch,
+                                  size_t size) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  Arm64ManagedRegister base = m_dest_base.AsArm64();
+  CHECK(base.IsXRegister()) << base;
+  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    LoadWFromOffset(kLoadWord, scratch.AsWRegister(), SP, src.Int32Value());
+    StoreWToOffset(kStoreWord, scratch.AsWRegister(), base.AsXRegister(),
+                   dest_offs.Int32Value());
+  } else if (size == 8) {
+    LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value());
+    StoreToOffset(scratch.AsXRegister(), base.AsXRegister(), dest_offs.Int32Value());
+  } else {
+    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+  }
+}
+
+void Arm64JNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                  FrameOffset /*src_base*/,
+                                  Offset /*src_offset*/,
+                                  ManagedRegister /*mscratch*/,
+                                  size_t /*size*/) {
+  UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant";
+}
+
+void Arm64JNIMacroAssembler::Copy(ManagedRegister m_dest,
+                                  Offset dest_offset,
+                                  ManagedRegister m_src,
+                                  Offset src_offset,
+                                  ManagedRegister m_scratch,
+                                  size_t size) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  Arm64ManagedRegister src = m_src.AsArm64();
+  Arm64ManagedRegister dest = m_dest.AsArm64();
+  CHECK(dest.IsXRegister()) << dest;
+  CHECK(src.IsXRegister()) << src;
+  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    if (scratch.IsWRegister()) {
+      LoadWFromOffset(kLoadWord, scratch.AsWRegister(), src.AsXRegister(),
+                    src_offset.Int32Value());
+      StoreWToOffset(kStoreWord, scratch.AsWRegister(), dest.AsXRegister(),
+                   dest_offset.Int32Value());
+    } else {
+      LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), src.AsXRegister(),
+                    src_offset.Int32Value());
+      StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), dest.AsXRegister(),
+                   dest_offset.Int32Value());
+    }
+  } else if (size == 8) {
+    LoadFromOffset(scratch.AsXRegister(), src.AsXRegister(), src_offset.Int32Value());
+    StoreToOffset(scratch.AsXRegister(), dest.AsXRegister(), dest_offset.Int32Value());
+  } else {
+    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+  }
+}
+
+void Arm64JNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                  Offset /*dest_offset*/,
+                                  FrameOffset /*src*/,
+                                  Offset /*src_offset*/,
+                                  ManagedRegister /*scratch*/,
+                                  size_t /*size*/) {
+  UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant";
+}
+
+void Arm64JNIMacroAssembler::MemoryBarrier(ManagedRegister m_scratch ATTRIBUTE_UNUSED) {
+  // TODO: Should we check that m_scratch is IP? - see arm.
+  ___ Dmb(InnerShareable, BarrierAll);
+}
+
+void Arm64JNIMacroAssembler::SignExtend(ManagedRegister mreg, size_t size) {
+  Arm64ManagedRegister reg = mreg.AsArm64();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsWRegister()) << reg;
+  if (size == 1) {
+    ___ Sxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+  } else {
+    ___ Sxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+  }
+}
+
+void Arm64JNIMacroAssembler::ZeroExtend(ManagedRegister mreg, size_t size) {
+  Arm64ManagedRegister reg = mreg.AsArm64();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsWRegister()) << reg;
+  if (size == 1) {
+    ___ Uxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+  } else {
+    ___ Uxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+  }
+}
+
+void Arm64JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references.
+}
+
+void Arm64JNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references.
+}
+
+void Arm64JNIMacroAssembler::Call(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) {
+  Arm64ManagedRegister base = m_base.AsArm64();
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(base.IsXRegister()) << base;
+  CHECK(scratch.IsXRegister()) << scratch;
+  LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), offs.Int32Value());
+  ___ Blr(reg_x(scratch.AsXRegister()));
+}
+
+void Arm64JNIMacroAssembler::Call(FrameOffset base, Offset offs, ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  // Call *(*(SP + base) + offset)
+  LoadFromOffset(scratch.AsXRegister(), SP, base.Int32Value());
+  LoadFromOffset(scratch.AsXRegister(), scratch.AsXRegister(), offs.Int32Value());
+  ___ Blr(reg_x(scratch.AsXRegister()));
+}
+
+void Arm64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED,
+                                            ManagedRegister scratch ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "Unimplemented Call() variant";
+}
+
+void Arm64JNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister m_out_reg,
+                                                    FrameOffset handle_scope_offs,
+                                                    ManagedRegister m_in_reg,
+                                                    bool null_allowed) {
+  Arm64ManagedRegister out_reg = m_out_reg.AsArm64();
+  Arm64ManagedRegister in_reg = m_in_reg.AsArm64();
+  // For now we only hold stale handle scope entries in x registers.
+  CHECK(in_reg.IsNoRegister() || in_reg.IsXRegister()) << in_reg;
+  CHECK(out_reg.IsXRegister()) << out_reg;
+  if (null_allowed) {
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
+    if (in_reg.IsNoRegister()) {
+      LoadWFromOffset(kLoadWord, out_reg.AsOverlappingWRegister(), SP,
+                      handle_scope_offs.Int32Value());
+      in_reg = out_reg;
+    }
+    ___ Cmp(reg_w(in_reg.AsOverlappingWRegister()), 0);
+    if (!out_reg.Equals(in_reg)) {
+      LoadImmediate(out_reg.AsXRegister(), 0, eq);
+    }
+    AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), ne);
+  } else {
+    AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), al);
+  }
+}
+
+void Arm64JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                                    FrameOffset handle_scope_offset,
+                                                    ManagedRegister m_scratch,
+                                                    bool null_allowed) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  if (null_allowed) {
+    LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP,
+                    handle_scope_offset.Int32Value());
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
+    ___ Cmp(reg_w(scratch.AsOverlappingWRegister()), 0);
+    // Move this logic in add constants with flags.
+    AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), ne);
+  } else {
+    AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), al);
+  }
+  StoreToOffset(scratch.AsXRegister(), SP, out_off.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister m_out_reg,
+                                                          ManagedRegister m_in_reg) {
+  Arm64ManagedRegister out_reg = m_out_reg.AsArm64();
+  Arm64ManagedRegister in_reg = m_in_reg.AsArm64();
+  CHECK(out_reg.IsXRegister()) << out_reg;
+  CHECK(in_reg.IsXRegister()) << in_reg;
+  vixl::aarch64::Label exit;
+  if (!out_reg.Equals(in_reg)) {
+    // FIXME: Who sets the flags here?
+    LoadImmediate(out_reg.AsXRegister(), 0, eq);
+  }
+  ___ Cbz(reg_x(in_reg.AsXRegister()), &exit);
+  LoadFromOffset(out_reg.AsXRegister(), in_reg.AsXRegister(), 0);
+  ___ Bind(&exit);
+}
+
+void Arm64JNIMacroAssembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjust) {
+  CHECK_ALIGNED(stack_adjust, kStackAlignment);
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  exception_blocks_.emplace_back(new Arm64Exception(scratch, stack_adjust));
+  LoadFromOffset(scratch.AsXRegister(),
+                 TR,
+                 Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
+  ___ Cbnz(reg_x(scratch.AsXRegister()), exception_blocks_.back()->Entry());
+}
+
+void Arm64JNIMacroAssembler::EmitExceptionPoll(Arm64Exception *exception) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(reg_x(exception->scratch_.AsXRegister()));
+  Register temp = temps.AcquireX();
+
+  // Bind exception poll entry.
+  ___ Bind(exception->Entry());
+  if (exception->stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSize(exception->stack_adjust_);
+  }
+  // Pass exception object as argument.
+  // Don't care about preserving X0 as this won't return.
+  ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsXRegister()));
+  ___ Ldr(temp,
+          MEM_OP(reg_x(TR),
+                 QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pDeliverException).Int32Value()));
+
+  ___ Blr(temp);
+  // Call should never return.
+  ___ Brk();
+}
+
+void Arm64JNIMacroAssembler::BuildFrame(size_t frame_size,
+                                        ManagedRegister method_reg,
+                                        ArrayRef<const ManagedRegister> callee_save_regs,
+                                        const ManagedRegisterEntrySpills& entry_spills) {
+  // Setup VIXL CPURegList for callee-saves.
+  CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
+  CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0);
+  for (auto r : callee_save_regs) {
+    Arm64ManagedRegister reg = r.AsArm64();
+    if (reg.IsXRegister()) {
+      core_reg_list.Combine(reg_x(reg.AsXRegister()).GetCode());
+    } else {
+      DCHECK(reg.IsDRegister());
+      fp_reg_list.Combine(reg_d(reg.AsDRegister()).GetCode());
+    }
+  }
+  size_t core_reg_size = core_reg_list.GetTotalSizeInBytes();
+  size_t fp_reg_size = fp_reg_list.GetTotalSizeInBytes();
+
+  // Increase frame to required size.
+  DCHECK_ALIGNED(frame_size, kStackAlignment);
+  DCHECK_GE(frame_size, core_reg_size + fp_reg_size + static_cast<size_t>(kArm64PointerSize));
+  IncreaseFrameSize(frame_size);
+
+  // Save callee-saves.
+  asm_.SpillRegisters(core_reg_list, frame_size - core_reg_size);
+  asm_.SpillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
+
+  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
+
+  // Write ArtMethod*
+  DCHECK(X0 == method_reg.AsArm64().AsXRegister());
+  StoreToOffset(X0, SP, 0);
+
+  // Write out entry spills
+  int32_t offset = frame_size + static_cast<size_t>(kArm64PointerSize);
+  for (size_t i = 0; i < entry_spills.size(); ++i) {
+    Arm64ManagedRegister reg = entry_spills.at(i).AsArm64();
+    if (reg.IsNoRegister()) {
+      // only increment stack offset.
+      ManagedRegisterSpill spill = entry_spills.at(i);
+      offset += spill.getSize();
+    } else if (reg.IsXRegister()) {
+      StoreToOffset(reg.AsXRegister(), SP, offset);
+      offset += 8;
+    } else if (reg.IsWRegister()) {
+      StoreWToOffset(kStoreWord, reg.AsWRegister(), SP, offset);
+      offset += 4;
+    } else if (reg.IsDRegister()) {
+      StoreDToOffset(reg.AsDRegister(), SP, offset);
+      offset += 8;
+    } else if (reg.IsSRegister()) {
+      StoreSToOffset(reg.AsSRegister(), SP, offset);
+      offset += 4;
+    }
+  }
+}
+
+void Arm64JNIMacroAssembler::RemoveFrame(size_t frame_size,
+                                         ArrayRef<const ManagedRegister> callee_save_regs) {
+  // Setup VIXL CPURegList for callee-saves.
+  CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
+  CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0);
+  for (auto r : callee_save_regs) {
+    Arm64ManagedRegister reg = r.AsArm64();
+    if (reg.IsXRegister()) {
+      core_reg_list.Combine(reg_x(reg.AsXRegister()).GetCode());
+    } else {
+      DCHECK(reg.IsDRegister());
+      fp_reg_list.Combine(reg_d(reg.AsDRegister()).GetCode());
+    }
+  }
+  size_t core_reg_size = core_reg_list.GetTotalSizeInBytes();
+  size_t fp_reg_size = fp_reg_list.GetTotalSizeInBytes();
+
+  // For now we only check that the size of the frame is large enough to hold spills and method
+  // reference.
+  DCHECK_GE(frame_size, core_reg_size + fp_reg_size + static_cast<size_t>(kArm64PointerSize));
+  DCHECK_ALIGNED(frame_size, kStackAlignment);
+
+  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
+
+  cfi().RememberState();
+
+  // Restore callee-saves.
+  asm_.UnspillRegisters(core_reg_list, frame_size - core_reg_size);
+  asm_.UnspillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
+
+  // Decrease frame size to start of callee saved regs.
+  DecreaseFrameSize(frame_size);
+
+  // Pop callee saved and return to LR.
+  ___ Ret();
+
+  // The CFI should be restored for any code that follows the exit block.
+  cfi().RestoreState();
+  cfi().DefCFAOffset(frame_size);
+}
+
+#undef ___
+
+}  // namespace arm64
+}  // namespace art
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
new file mode 100644
index 0000000..b9f6854
--- /dev/null
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ARM64_JNI_MACRO_ASSEMBLER_ARM64_H_
+#define ART_COMPILER_UTILS_ARM64_JNI_MACRO_ASSEMBLER_ARM64_H_
+
+#include <stdint.h>
+#include <memory>
+#include <vector>
+
+#include "assembler_arm64.h"
+#include "base/arena_containers.h"
+#include "base/enums.h"
+#include "base/logging.h"
+#include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
+#include "offsets.h"
+
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch64/macro-assembler-aarch64.h"
+#pragma GCC diagnostic pop
+
+namespace art {
+namespace arm64 {
+
+class Arm64JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<Arm64Assembler, PointerSize::k64> {
+ public:
+  explicit Arm64JNIMacroAssembler(ArenaAllocator* arena)
+      : JNIMacroAssemblerFwd(arena),
+        exception_blocks_(arena->Adapter(kArenaAllocAssembler)) {}
+
+  ~Arm64JNIMacroAssembler();
+
+  // Finalize the code.
+  void FinalizeCode() OVERRIDE;
+
+  // Emit code that will create an activation on the stack.
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
+                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
+
+  // Emit code that will remove an activation from the stack.
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
+      OVERRIDE;
+
+  void IncreaseFrameSize(size_t adjust) OVERRIDE;
+  void DecreaseFrameSize(size_t adjust) OVERRIDE;
+
+  // Store routines.
+  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
+  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
+  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
+  void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister scratch) OVERRIDE;
+  void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE;
+  void StoreSpanning(FrameOffset dest,
+                     ManagedRegister src,
+                     FrameOffset in_off,
+                     ManagedRegister scratch) OVERRIDE;
+
+  // Load routines.
+  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
+  void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE;
+  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
+  void LoadRef(ManagedRegister dest,
+               ManagedRegister base,
+               MemberOffset offs,
+               bool unpoison_reference) OVERRIDE;
+  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE;
+
+  // Copying routines.
+  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset64 thr_offs,
+                            ManagedRegister scratch) OVERRIDE;
+  void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+      OVERRIDE;
+  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
+  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
+  void Copy(FrameOffset dest,
+            ManagedRegister src_base,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+  void Copy(ManagedRegister dest_base,
+            Offset dest_offset,
+            FrameOffset src,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+  void Copy(FrameOffset dest,
+            FrameOffset src_base,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+  void Copy(ManagedRegister dest,
+            Offset dest_offset,
+            ManagedRegister src,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+  void Copy(FrameOffset dest,
+            Offset dest_offset,
+            FrameOffset src,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
+
+  // Sign extension.
+  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Zero extension.
+  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Exploit fast access in managed code to Thread::Current().
+  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
+  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // null.
+  void CreateHandleScopeEntry(ManagedRegister out_reg,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister in_reg,
+                              bool null_allowed) OVERRIDE;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed.
+  void CreateHandleScopeEntry(FrameOffset out_off,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister scratch,
+                              bool null_allowed) OVERRIDE;
+
+  // src holds a handle scope entry (Object**) load this into dst.
+  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
+  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
+
+  // Call to address held at [base+offset].
+  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
+
+ private:
+  class Arm64Exception {
+   public:
+    Arm64Exception(Arm64ManagedRegister scratch, size_t stack_adjust)
+        : scratch_(scratch), stack_adjust_(stack_adjust) {}
+
+    vixl::aarch64::Label* Entry() { return &exception_entry_; }
+
+    // Register used for passing Thread::Current()->exception_ .
+    const Arm64ManagedRegister scratch_;
+
+    // Stack adjust for ExceptionPool.
+    const size_t stack_adjust_;
+
+    vixl::aarch64::Label exception_entry_;
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(Arm64Exception);
+  };
+
+  // Emits Exception block.
+  void EmitExceptionPoll(Arm64Exception *exception);
+
+  void StoreWToOffset(StoreOperandType type,
+                      WRegister source,
+                      XRegister base,
+                      int32_t offset);
+  void StoreToOffset(XRegister source, XRegister base, int32_t offset);
+  void StoreSToOffset(SRegister source, XRegister base, int32_t offset);
+  void StoreDToOffset(DRegister source, XRegister base, int32_t offset);
+
+  void LoadImmediate(XRegister dest,
+                     int32_t value,
+                     vixl::aarch64::Condition cond = vixl::aarch64::al);
+  void Load(Arm64ManagedRegister dst, XRegister src, int32_t src_offset, size_t size);
+  void LoadWFromOffset(LoadOperandType type,
+                       WRegister dest,
+                       XRegister base,
+                       int32_t offset);
+  void LoadFromOffset(XRegister dest, XRegister base, int32_t offset);
+  void LoadSFromOffset(SRegister dest, XRegister base, int32_t offset);
+  void LoadDFromOffset(DRegister dest, XRegister base, int32_t offset);
+  void AddConstant(XRegister rd,
+                   int32_t value,
+                   vixl::aarch64::Condition cond = vixl::aarch64::al);
+  void AddConstant(XRegister rd,
+                   XRegister rn,
+                   int32_t value,
+                   vixl::aarch64::Condition cond = vixl::aarch64::al);
+
+  // List of exception blocks to generate at the end of the code cache.
+  ArenaVector<std::unique_ptr<Arm64Exception>> exception_blocks_;
+};
+
+}  // namespace arm64
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ARM64_JNI_MACRO_ASSEMBLER_ARM64_H_
diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h
index dbcd8c5..7378a0a 100644
--- a/compiler/utils/arm64/managed_register_arm64.h
+++ b/compiler/utils/arm64/managed_register_arm64.h
@@ -17,9 +17,9 @@
 #ifndef ART_COMPILER_UTILS_ARM64_MANAGED_REGISTER_ARM64_H_
 #define ART_COMPILER_UTILS_ARM64_MANAGED_REGISTER_ARM64_H_
 
+#include "arch/arm64/registers_arm64.h"
 #include "base/logging.h"
-#include "constants_arm64.h"
-#include "dwarf/register.h"
+#include "debug/dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
@@ -56,80 +56,80 @@
 
 class Arm64ManagedRegister : public ManagedRegister {
  public:
-  XRegister AsXRegister() const {
+  constexpr XRegister AsXRegister() const {
     CHECK(IsXRegister());
     return static_cast<XRegister>(id_);
   }
 
-  WRegister AsWRegister() const {
+  constexpr WRegister AsWRegister() const {
     CHECK(IsWRegister());
     return static_cast<WRegister>(id_ - kNumberOfXRegIds);
   }
 
-  DRegister AsDRegister() const {
+  constexpr DRegister AsDRegister() const {
     CHECK(IsDRegister());
     return static_cast<DRegister>(id_ - kNumberOfXRegIds - kNumberOfWRegIds);
   }
 
-  SRegister AsSRegister() const {
+  constexpr SRegister AsSRegister() const {
     CHECK(IsSRegister());
     return static_cast<SRegister>(id_ - kNumberOfXRegIds - kNumberOfWRegIds -
                                   kNumberOfDRegIds);
   }
 
-  WRegister AsOverlappingWRegister() const {
+  constexpr WRegister AsOverlappingWRegister() const {
     CHECK(IsValidManagedRegister());
     if (IsZeroRegister()) return WZR;
     return static_cast<WRegister>(AsXRegister());
   }
 
-  XRegister AsOverlappingXRegister() const {
+  constexpr XRegister AsOverlappingXRegister() const {
     CHECK(IsValidManagedRegister());
     return static_cast<XRegister>(AsWRegister());
   }
 
-  SRegister AsOverlappingSRegister() const {
+  constexpr SRegister AsOverlappingSRegister() const {
     CHECK(IsValidManagedRegister());
     return static_cast<SRegister>(AsDRegister());
   }
 
-  DRegister AsOverlappingDRegister() const {
+  constexpr DRegister AsOverlappingDRegister() const {
     CHECK(IsValidManagedRegister());
     return static_cast<DRegister>(AsSRegister());
   }
 
-  bool IsXRegister() const {
+  constexpr bool IsXRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfXRegIds);
   }
 
-  bool IsWRegister() const {
+  constexpr bool IsWRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfXRegIds;
     return (0 <= test) && (test < kNumberOfWRegIds);
   }
 
-  bool IsDRegister() const {
+  constexpr bool IsDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfXRegIds + kNumberOfWRegIds);
     return (0 <= test) && (test < kNumberOfDRegIds);
   }
 
-  bool IsSRegister() const {
+  constexpr bool IsSRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfXRegIds + kNumberOfWRegIds + kNumberOfDRegIds);
     return (0 <= test) && (test < kNumberOfSRegIds);
   }
 
-  bool IsGPRegister() const {
+  constexpr bool IsGPRegister() const {
     return IsXRegister() || IsWRegister();
   }
 
-  bool IsFPRegister() const {
+  constexpr bool IsFPRegister() const {
     return IsDRegister() || IsSRegister();
   }
 
-  bool IsSameType(Arm64ManagedRegister test) const {
+  constexpr bool IsSameType(Arm64ManagedRegister test) const {
     CHECK(IsValidManagedRegister() && test.IsValidManagedRegister());
     return
       (IsXRegister() && test.IsXRegister()) ||
@@ -145,53 +145,53 @@
 
   void Print(std::ostream& os) const;
 
-  static Arm64ManagedRegister FromXRegister(XRegister r) {
+  static constexpr Arm64ManagedRegister FromXRegister(XRegister r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static Arm64ManagedRegister FromWRegister(WRegister r) {
+  static constexpr Arm64ManagedRegister FromWRegister(WRegister r) {
     CHECK_NE(r, kNoWRegister);
     return FromRegId(r + kNumberOfXRegIds);
   }
 
-  static Arm64ManagedRegister FromDRegister(DRegister r) {
+  static constexpr Arm64ManagedRegister FromDRegister(DRegister r) {
     CHECK_NE(r, kNoDRegister);
     return FromRegId(r + (kNumberOfXRegIds + kNumberOfWRegIds));
   }
 
-  static Arm64ManagedRegister FromSRegister(SRegister r) {
+  static constexpr Arm64ManagedRegister FromSRegister(SRegister r) {
     CHECK_NE(r, kNoSRegister);
     return FromRegId(r + (kNumberOfXRegIds + kNumberOfWRegIds +
                           kNumberOfDRegIds));
   }
 
   // Returns the X register overlapping W register r.
-  static Arm64ManagedRegister FromWRegisterX(WRegister r) {
+  static constexpr Arm64ManagedRegister FromWRegisterX(WRegister r) {
     CHECK_NE(r, kNoWRegister);
     return FromRegId(r);
   }
 
   // Return the D register overlapping S register r.
-  static Arm64ManagedRegister FromSRegisterD(SRegister r) {
+  static constexpr Arm64ManagedRegister FromSRegisterD(SRegister r) {
     CHECK_NE(r, kNoSRegister);
     return FromRegId(r + (kNumberOfXRegIds + kNumberOfWRegIds));
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  bool IsStackPointer() const {
+  constexpr bool IsStackPointer() const {
     return IsXRegister() && (id_ == SP);
   }
 
-  bool IsZeroRegister() const {
+  constexpr bool IsZeroRegister() const {
     return IsXRegister() && (id_ == XZR);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -202,9 +202,9 @@
 
   friend class ManagedRegister;
 
-  explicit Arm64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr Arm64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static Arm64ManagedRegister FromRegId(int reg_id) {
+  static constexpr Arm64ManagedRegister FromRegId(int reg_id) {
     Arm64ManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -215,7 +215,7 @@
 
 }  // namespace arm64
 
-inline arm64::Arm64ManagedRegister ManagedRegister::AsArm64() const {
+constexpr inline arm64::Arm64ManagedRegister ManagedRegister::AsArm64() const {
   arm64::Arm64ManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/arm64/managed_register_arm64_test.cc b/compiler/utils/arm64/managed_register_arm64_test.cc
index e27115d..79076b8 100644
--- a/compiler/utils/arm64/managed_register_arm64_test.cc
+++ b/compiler/utils/arm64/managed_register_arm64_test.cc
@@ -591,149 +591,149 @@
 
 TEST(Arm64ManagedRegister, VixlRegisters) {
   // X Registers.
-  EXPECT_TRUE(vixl::x0.Is(Arm64Assembler::reg_x(X0)));
-  EXPECT_TRUE(vixl::x1.Is(Arm64Assembler::reg_x(X1)));
-  EXPECT_TRUE(vixl::x2.Is(Arm64Assembler::reg_x(X2)));
-  EXPECT_TRUE(vixl::x3.Is(Arm64Assembler::reg_x(X3)));
-  EXPECT_TRUE(vixl::x4.Is(Arm64Assembler::reg_x(X4)));
-  EXPECT_TRUE(vixl::x5.Is(Arm64Assembler::reg_x(X5)));
-  EXPECT_TRUE(vixl::x6.Is(Arm64Assembler::reg_x(X6)));
-  EXPECT_TRUE(vixl::x7.Is(Arm64Assembler::reg_x(X7)));
-  EXPECT_TRUE(vixl::x8.Is(Arm64Assembler::reg_x(X8)));
-  EXPECT_TRUE(vixl::x9.Is(Arm64Assembler::reg_x(X9)));
-  EXPECT_TRUE(vixl::x10.Is(Arm64Assembler::reg_x(X10)));
-  EXPECT_TRUE(vixl::x11.Is(Arm64Assembler::reg_x(X11)));
-  EXPECT_TRUE(vixl::x12.Is(Arm64Assembler::reg_x(X12)));
-  EXPECT_TRUE(vixl::x13.Is(Arm64Assembler::reg_x(X13)));
-  EXPECT_TRUE(vixl::x14.Is(Arm64Assembler::reg_x(X14)));
-  EXPECT_TRUE(vixl::x15.Is(Arm64Assembler::reg_x(X15)));
-  EXPECT_TRUE(vixl::x16.Is(Arm64Assembler::reg_x(X16)));
-  EXPECT_TRUE(vixl::x17.Is(Arm64Assembler::reg_x(X17)));
-  EXPECT_TRUE(vixl::x18.Is(Arm64Assembler::reg_x(X18)));
-  EXPECT_TRUE(vixl::x19.Is(Arm64Assembler::reg_x(X19)));
-  EXPECT_TRUE(vixl::x20.Is(Arm64Assembler::reg_x(X20)));
-  EXPECT_TRUE(vixl::x21.Is(Arm64Assembler::reg_x(X21)));
-  EXPECT_TRUE(vixl::x22.Is(Arm64Assembler::reg_x(X22)));
-  EXPECT_TRUE(vixl::x23.Is(Arm64Assembler::reg_x(X23)));
-  EXPECT_TRUE(vixl::x24.Is(Arm64Assembler::reg_x(X24)));
-  EXPECT_TRUE(vixl::x25.Is(Arm64Assembler::reg_x(X25)));
-  EXPECT_TRUE(vixl::x26.Is(Arm64Assembler::reg_x(X26)));
-  EXPECT_TRUE(vixl::x27.Is(Arm64Assembler::reg_x(X27)));
-  EXPECT_TRUE(vixl::x28.Is(Arm64Assembler::reg_x(X28)));
-  EXPECT_TRUE(vixl::x29.Is(Arm64Assembler::reg_x(X29)));
-  EXPECT_TRUE(vixl::x30.Is(Arm64Assembler::reg_x(X30)));
+  EXPECT_TRUE(vixl::aarch64::x0.Is(Arm64Assembler::reg_x(X0)));
+  EXPECT_TRUE(vixl::aarch64::x1.Is(Arm64Assembler::reg_x(X1)));
+  EXPECT_TRUE(vixl::aarch64::x2.Is(Arm64Assembler::reg_x(X2)));
+  EXPECT_TRUE(vixl::aarch64::x3.Is(Arm64Assembler::reg_x(X3)));
+  EXPECT_TRUE(vixl::aarch64::x4.Is(Arm64Assembler::reg_x(X4)));
+  EXPECT_TRUE(vixl::aarch64::x5.Is(Arm64Assembler::reg_x(X5)));
+  EXPECT_TRUE(vixl::aarch64::x6.Is(Arm64Assembler::reg_x(X6)));
+  EXPECT_TRUE(vixl::aarch64::x7.Is(Arm64Assembler::reg_x(X7)));
+  EXPECT_TRUE(vixl::aarch64::x8.Is(Arm64Assembler::reg_x(X8)));
+  EXPECT_TRUE(vixl::aarch64::x9.Is(Arm64Assembler::reg_x(X9)));
+  EXPECT_TRUE(vixl::aarch64::x10.Is(Arm64Assembler::reg_x(X10)));
+  EXPECT_TRUE(vixl::aarch64::x11.Is(Arm64Assembler::reg_x(X11)));
+  EXPECT_TRUE(vixl::aarch64::x12.Is(Arm64Assembler::reg_x(X12)));
+  EXPECT_TRUE(vixl::aarch64::x13.Is(Arm64Assembler::reg_x(X13)));
+  EXPECT_TRUE(vixl::aarch64::x14.Is(Arm64Assembler::reg_x(X14)));
+  EXPECT_TRUE(vixl::aarch64::x15.Is(Arm64Assembler::reg_x(X15)));
+  EXPECT_TRUE(vixl::aarch64::x16.Is(Arm64Assembler::reg_x(X16)));
+  EXPECT_TRUE(vixl::aarch64::x17.Is(Arm64Assembler::reg_x(X17)));
+  EXPECT_TRUE(vixl::aarch64::x18.Is(Arm64Assembler::reg_x(X18)));
+  EXPECT_TRUE(vixl::aarch64::x19.Is(Arm64Assembler::reg_x(X19)));
+  EXPECT_TRUE(vixl::aarch64::x20.Is(Arm64Assembler::reg_x(X20)));
+  EXPECT_TRUE(vixl::aarch64::x21.Is(Arm64Assembler::reg_x(X21)));
+  EXPECT_TRUE(vixl::aarch64::x22.Is(Arm64Assembler::reg_x(X22)));
+  EXPECT_TRUE(vixl::aarch64::x23.Is(Arm64Assembler::reg_x(X23)));
+  EXPECT_TRUE(vixl::aarch64::x24.Is(Arm64Assembler::reg_x(X24)));
+  EXPECT_TRUE(vixl::aarch64::x25.Is(Arm64Assembler::reg_x(X25)));
+  EXPECT_TRUE(vixl::aarch64::x26.Is(Arm64Assembler::reg_x(X26)));
+  EXPECT_TRUE(vixl::aarch64::x27.Is(Arm64Assembler::reg_x(X27)));
+  EXPECT_TRUE(vixl::aarch64::x28.Is(Arm64Assembler::reg_x(X28)));
+  EXPECT_TRUE(vixl::aarch64::x29.Is(Arm64Assembler::reg_x(X29)));
+  EXPECT_TRUE(vixl::aarch64::x30.Is(Arm64Assembler::reg_x(X30)));
 
-  EXPECT_TRUE(vixl::x19.Is(Arm64Assembler::reg_x(TR)));
-  EXPECT_TRUE(vixl::ip0.Is(Arm64Assembler::reg_x(IP0)));
-  EXPECT_TRUE(vixl::ip1.Is(Arm64Assembler::reg_x(IP1)));
-  EXPECT_TRUE(vixl::x29.Is(Arm64Assembler::reg_x(FP)));
-  EXPECT_TRUE(vixl::lr.Is(Arm64Assembler::reg_x(LR)));
-  EXPECT_TRUE(vixl::sp.Is(Arm64Assembler::reg_x(SP)));
-  EXPECT_TRUE(vixl::xzr.Is(Arm64Assembler::reg_x(XZR)));
+  EXPECT_TRUE(vixl::aarch64::x19.Is(Arm64Assembler::reg_x(TR)));
+  EXPECT_TRUE(vixl::aarch64::ip0.Is(Arm64Assembler::reg_x(IP0)));
+  EXPECT_TRUE(vixl::aarch64::ip1.Is(Arm64Assembler::reg_x(IP1)));
+  EXPECT_TRUE(vixl::aarch64::x29.Is(Arm64Assembler::reg_x(FP)));
+  EXPECT_TRUE(vixl::aarch64::lr.Is(Arm64Assembler::reg_x(LR)));
+  EXPECT_TRUE(vixl::aarch64::sp.Is(Arm64Assembler::reg_x(SP)));
+  EXPECT_TRUE(vixl::aarch64::xzr.Is(Arm64Assembler::reg_x(XZR)));
 
   // W Registers.
-  EXPECT_TRUE(vixl::w0.Is(Arm64Assembler::reg_w(W0)));
-  EXPECT_TRUE(vixl::w1.Is(Arm64Assembler::reg_w(W1)));
-  EXPECT_TRUE(vixl::w2.Is(Arm64Assembler::reg_w(W2)));
-  EXPECT_TRUE(vixl::w3.Is(Arm64Assembler::reg_w(W3)));
-  EXPECT_TRUE(vixl::w4.Is(Arm64Assembler::reg_w(W4)));
-  EXPECT_TRUE(vixl::w5.Is(Arm64Assembler::reg_w(W5)));
-  EXPECT_TRUE(vixl::w6.Is(Arm64Assembler::reg_w(W6)));
-  EXPECT_TRUE(vixl::w7.Is(Arm64Assembler::reg_w(W7)));
-  EXPECT_TRUE(vixl::w8.Is(Arm64Assembler::reg_w(W8)));
-  EXPECT_TRUE(vixl::w9.Is(Arm64Assembler::reg_w(W9)));
-  EXPECT_TRUE(vixl::w10.Is(Arm64Assembler::reg_w(W10)));
-  EXPECT_TRUE(vixl::w11.Is(Arm64Assembler::reg_w(W11)));
-  EXPECT_TRUE(vixl::w12.Is(Arm64Assembler::reg_w(W12)));
-  EXPECT_TRUE(vixl::w13.Is(Arm64Assembler::reg_w(W13)));
-  EXPECT_TRUE(vixl::w14.Is(Arm64Assembler::reg_w(W14)));
-  EXPECT_TRUE(vixl::w15.Is(Arm64Assembler::reg_w(W15)));
-  EXPECT_TRUE(vixl::w16.Is(Arm64Assembler::reg_w(W16)));
-  EXPECT_TRUE(vixl::w17.Is(Arm64Assembler::reg_w(W17)));
-  EXPECT_TRUE(vixl::w18.Is(Arm64Assembler::reg_w(W18)));
-  EXPECT_TRUE(vixl::w19.Is(Arm64Assembler::reg_w(W19)));
-  EXPECT_TRUE(vixl::w20.Is(Arm64Assembler::reg_w(W20)));
-  EXPECT_TRUE(vixl::w21.Is(Arm64Assembler::reg_w(W21)));
-  EXPECT_TRUE(vixl::w22.Is(Arm64Assembler::reg_w(W22)));
-  EXPECT_TRUE(vixl::w23.Is(Arm64Assembler::reg_w(W23)));
-  EXPECT_TRUE(vixl::w24.Is(Arm64Assembler::reg_w(W24)));
-  EXPECT_TRUE(vixl::w25.Is(Arm64Assembler::reg_w(W25)));
-  EXPECT_TRUE(vixl::w26.Is(Arm64Assembler::reg_w(W26)));
-  EXPECT_TRUE(vixl::w27.Is(Arm64Assembler::reg_w(W27)));
-  EXPECT_TRUE(vixl::w28.Is(Arm64Assembler::reg_w(W28)));
-  EXPECT_TRUE(vixl::w29.Is(Arm64Assembler::reg_w(W29)));
-  EXPECT_TRUE(vixl::w30.Is(Arm64Assembler::reg_w(W30)));
-  EXPECT_TRUE(vixl::w31.Is(Arm64Assembler::reg_w(WZR)));
-  EXPECT_TRUE(vixl::wzr.Is(Arm64Assembler::reg_w(WZR)));
-  EXPECT_TRUE(vixl::wsp.Is(Arm64Assembler::reg_w(WSP)));
+  EXPECT_TRUE(vixl::aarch64::w0.Is(Arm64Assembler::reg_w(W0)));
+  EXPECT_TRUE(vixl::aarch64::w1.Is(Arm64Assembler::reg_w(W1)));
+  EXPECT_TRUE(vixl::aarch64::w2.Is(Arm64Assembler::reg_w(W2)));
+  EXPECT_TRUE(vixl::aarch64::w3.Is(Arm64Assembler::reg_w(W3)));
+  EXPECT_TRUE(vixl::aarch64::w4.Is(Arm64Assembler::reg_w(W4)));
+  EXPECT_TRUE(vixl::aarch64::w5.Is(Arm64Assembler::reg_w(W5)));
+  EXPECT_TRUE(vixl::aarch64::w6.Is(Arm64Assembler::reg_w(W6)));
+  EXPECT_TRUE(vixl::aarch64::w7.Is(Arm64Assembler::reg_w(W7)));
+  EXPECT_TRUE(vixl::aarch64::w8.Is(Arm64Assembler::reg_w(W8)));
+  EXPECT_TRUE(vixl::aarch64::w9.Is(Arm64Assembler::reg_w(W9)));
+  EXPECT_TRUE(vixl::aarch64::w10.Is(Arm64Assembler::reg_w(W10)));
+  EXPECT_TRUE(vixl::aarch64::w11.Is(Arm64Assembler::reg_w(W11)));
+  EXPECT_TRUE(vixl::aarch64::w12.Is(Arm64Assembler::reg_w(W12)));
+  EXPECT_TRUE(vixl::aarch64::w13.Is(Arm64Assembler::reg_w(W13)));
+  EXPECT_TRUE(vixl::aarch64::w14.Is(Arm64Assembler::reg_w(W14)));
+  EXPECT_TRUE(vixl::aarch64::w15.Is(Arm64Assembler::reg_w(W15)));
+  EXPECT_TRUE(vixl::aarch64::w16.Is(Arm64Assembler::reg_w(W16)));
+  EXPECT_TRUE(vixl::aarch64::w17.Is(Arm64Assembler::reg_w(W17)));
+  EXPECT_TRUE(vixl::aarch64::w18.Is(Arm64Assembler::reg_w(W18)));
+  EXPECT_TRUE(vixl::aarch64::w19.Is(Arm64Assembler::reg_w(W19)));
+  EXPECT_TRUE(vixl::aarch64::w20.Is(Arm64Assembler::reg_w(W20)));
+  EXPECT_TRUE(vixl::aarch64::w21.Is(Arm64Assembler::reg_w(W21)));
+  EXPECT_TRUE(vixl::aarch64::w22.Is(Arm64Assembler::reg_w(W22)));
+  EXPECT_TRUE(vixl::aarch64::w23.Is(Arm64Assembler::reg_w(W23)));
+  EXPECT_TRUE(vixl::aarch64::w24.Is(Arm64Assembler::reg_w(W24)));
+  EXPECT_TRUE(vixl::aarch64::w25.Is(Arm64Assembler::reg_w(W25)));
+  EXPECT_TRUE(vixl::aarch64::w26.Is(Arm64Assembler::reg_w(W26)));
+  EXPECT_TRUE(vixl::aarch64::w27.Is(Arm64Assembler::reg_w(W27)));
+  EXPECT_TRUE(vixl::aarch64::w28.Is(Arm64Assembler::reg_w(W28)));
+  EXPECT_TRUE(vixl::aarch64::w29.Is(Arm64Assembler::reg_w(W29)));
+  EXPECT_TRUE(vixl::aarch64::w30.Is(Arm64Assembler::reg_w(W30)));
+  EXPECT_TRUE(vixl::aarch64::w31.Is(Arm64Assembler::reg_w(WZR)));
+  EXPECT_TRUE(vixl::aarch64::wzr.Is(Arm64Assembler::reg_w(WZR)));
+  EXPECT_TRUE(vixl::aarch64::wsp.Is(Arm64Assembler::reg_w(WSP)));
 
   // D Registers.
-  EXPECT_TRUE(vixl::d0.Is(Arm64Assembler::reg_d(D0)));
-  EXPECT_TRUE(vixl::d1.Is(Arm64Assembler::reg_d(D1)));
-  EXPECT_TRUE(vixl::d2.Is(Arm64Assembler::reg_d(D2)));
-  EXPECT_TRUE(vixl::d3.Is(Arm64Assembler::reg_d(D3)));
-  EXPECT_TRUE(vixl::d4.Is(Arm64Assembler::reg_d(D4)));
-  EXPECT_TRUE(vixl::d5.Is(Arm64Assembler::reg_d(D5)));
-  EXPECT_TRUE(vixl::d6.Is(Arm64Assembler::reg_d(D6)));
-  EXPECT_TRUE(vixl::d7.Is(Arm64Assembler::reg_d(D7)));
-  EXPECT_TRUE(vixl::d8.Is(Arm64Assembler::reg_d(D8)));
-  EXPECT_TRUE(vixl::d9.Is(Arm64Assembler::reg_d(D9)));
-  EXPECT_TRUE(vixl::d10.Is(Arm64Assembler::reg_d(D10)));
-  EXPECT_TRUE(vixl::d11.Is(Arm64Assembler::reg_d(D11)));
-  EXPECT_TRUE(vixl::d12.Is(Arm64Assembler::reg_d(D12)));
-  EXPECT_TRUE(vixl::d13.Is(Arm64Assembler::reg_d(D13)));
-  EXPECT_TRUE(vixl::d14.Is(Arm64Assembler::reg_d(D14)));
-  EXPECT_TRUE(vixl::d15.Is(Arm64Assembler::reg_d(D15)));
-  EXPECT_TRUE(vixl::d16.Is(Arm64Assembler::reg_d(D16)));
-  EXPECT_TRUE(vixl::d17.Is(Arm64Assembler::reg_d(D17)));
-  EXPECT_TRUE(vixl::d18.Is(Arm64Assembler::reg_d(D18)));
-  EXPECT_TRUE(vixl::d19.Is(Arm64Assembler::reg_d(D19)));
-  EXPECT_TRUE(vixl::d20.Is(Arm64Assembler::reg_d(D20)));
-  EXPECT_TRUE(vixl::d21.Is(Arm64Assembler::reg_d(D21)));
-  EXPECT_TRUE(vixl::d22.Is(Arm64Assembler::reg_d(D22)));
-  EXPECT_TRUE(vixl::d23.Is(Arm64Assembler::reg_d(D23)));
-  EXPECT_TRUE(vixl::d24.Is(Arm64Assembler::reg_d(D24)));
-  EXPECT_TRUE(vixl::d25.Is(Arm64Assembler::reg_d(D25)));
-  EXPECT_TRUE(vixl::d26.Is(Arm64Assembler::reg_d(D26)));
-  EXPECT_TRUE(vixl::d27.Is(Arm64Assembler::reg_d(D27)));
-  EXPECT_TRUE(vixl::d28.Is(Arm64Assembler::reg_d(D28)));
-  EXPECT_TRUE(vixl::d29.Is(Arm64Assembler::reg_d(D29)));
-  EXPECT_TRUE(vixl::d30.Is(Arm64Assembler::reg_d(D30)));
-  EXPECT_TRUE(vixl::d31.Is(Arm64Assembler::reg_d(D31)));
+  EXPECT_TRUE(vixl::aarch64::d0.Is(Arm64Assembler::reg_d(D0)));
+  EXPECT_TRUE(vixl::aarch64::d1.Is(Arm64Assembler::reg_d(D1)));
+  EXPECT_TRUE(vixl::aarch64::d2.Is(Arm64Assembler::reg_d(D2)));
+  EXPECT_TRUE(vixl::aarch64::d3.Is(Arm64Assembler::reg_d(D3)));
+  EXPECT_TRUE(vixl::aarch64::d4.Is(Arm64Assembler::reg_d(D4)));
+  EXPECT_TRUE(vixl::aarch64::d5.Is(Arm64Assembler::reg_d(D5)));
+  EXPECT_TRUE(vixl::aarch64::d6.Is(Arm64Assembler::reg_d(D6)));
+  EXPECT_TRUE(vixl::aarch64::d7.Is(Arm64Assembler::reg_d(D7)));
+  EXPECT_TRUE(vixl::aarch64::d8.Is(Arm64Assembler::reg_d(D8)));
+  EXPECT_TRUE(vixl::aarch64::d9.Is(Arm64Assembler::reg_d(D9)));
+  EXPECT_TRUE(vixl::aarch64::d10.Is(Arm64Assembler::reg_d(D10)));
+  EXPECT_TRUE(vixl::aarch64::d11.Is(Arm64Assembler::reg_d(D11)));
+  EXPECT_TRUE(vixl::aarch64::d12.Is(Arm64Assembler::reg_d(D12)));
+  EXPECT_TRUE(vixl::aarch64::d13.Is(Arm64Assembler::reg_d(D13)));
+  EXPECT_TRUE(vixl::aarch64::d14.Is(Arm64Assembler::reg_d(D14)));
+  EXPECT_TRUE(vixl::aarch64::d15.Is(Arm64Assembler::reg_d(D15)));
+  EXPECT_TRUE(vixl::aarch64::d16.Is(Arm64Assembler::reg_d(D16)));
+  EXPECT_TRUE(vixl::aarch64::d17.Is(Arm64Assembler::reg_d(D17)));
+  EXPECT_TRUE(vixl::aarch64::d18.Is(Arm64Assembler::reg_d(D18)));
+  EXPECT_TRUE(vixl::aarch64::d19.Is(Arm64Assembler::reg_d(D19)));
+  EXPECT_TRUE(vixl::aarch64::d20.Is(Arm64Assembler::reg_d(D20)));
+  EXPECT_TRUE(vixl::aarch64::d21.Is(Arm64Assembler::reg_d(D21)));
+  EXPECT_TRUE(vixl::aarch64::d22.Is(Arm64Assembler::reg_d(D22)));
+  EXPECT_TRUE(vixl::aarch64::d23.Is(Arm64Assembler::reg_d(D23)));
+  EXPECT_TRUE(vixl::aarch64::d24.Is(Arm64Assembler::reg_d(D24)));
+  EXPECT_TRUE(vixl::aarch64::d25.Is(Arm64Assembler::reg_d(D25)));
+  EXPECT_TRUE(vixl::aarch64::d26.Is(Arm64Assembler::reg_d(D26)));
+  EXPECT_TRUE(vixl::aarch64::d27.Is(Arm64Assembler::reg_d(D27)));
+  EXPECT_TRUE(vixl::aarch64::d28.Is(Arm64Assembler::reg_d(D28)));
+  EXPECT_TRUE(vixl::aarch64::d29.Is(Arm64Assembler::reg_d(D29)));
+  EXPECT_TRUE(vixl::aarch64::d30.Is(Arm64Assembler::reg_d(D30)));
+  EXPECT_TRUE(vixl::aarch64::d31.Is(Arm64Assembler::reg_d(D31)));
 
   // S Registers.
-  EXPECT_TRUE(vixl::s0.Is(Arm64Assembler::reg_s(S0)));
-  EXPECT_TRUE(vixl::s1.Is(Arm64Assembler::reg_s(S1)));
-  EXPECT_TRUE(vixl::s2.Is(Arm64Assembler::reg_s(S2)));
-  EXPECT_TRUE(vixl::s3.Is(Arm64Assembler::reg_s(S3)));
-  EXPECT_TRUE(vixl::s4.Is(Arm64Assembler::reg_s(S4)));
-  EXPECT_TRUE(vixl::s5.Is(Arm64Assembler::reg_s(S5)));
-  EXPECT_TRUE(vixl::s6.Is(Arm64Assembler::reg_s(S6)));
-  EXPECT_TRUE(vixl::s7.Is(Arm64Assembler::reg_s(S7)));
-  EXPECT_TRUE(vixl::s8.Is(Arm64Assembler::reg_s(S8)));
-  EXPECT_TRUE(vixl::s9.Is(Arm64Assembler::reg_s(S9)));
-  EXPECT_TRUE(vixl::s10.Is(Arm64Assembler::reg_s(S10)));
-  EXPECT_TRUE(vixl::s11.Is(Arm64Assembler::reg_s(S11)));
-  EXPECT_TRUE(vixl::s12.Is(Arm64Assembler::reg_s(S12)));
-  EXPECT_TRUE(vixl::s13.Is(Arm64Assembler::reg_s(S13)));
-  EXPECT_TRUE(vixl::s14.Is(Arm64Assembler::reg_s(S14)));
-  EXPECT_TRUE(vixl::s15.Is(Arm64Assembler::reg_s(S15)));
-  EXPECT_TRUE(vixl::s16.Is(Arm64Assembler::reg_s(S16)));
-  EXPECT_TRUE(vixl::s17.Is(Arm64Assembler::reg_s(S17)));
-  EXPECT_TRUE(vixl::s18.Is(Arm64Assembler::reg_s(S18)));
-  EXPECT_TRUE(vixl::s19.Is(Arm64Assembler::reg_s(S19)));
-  EXPECT_TRUE(vixl::s20.Is(Arm64Assembler::reg_s(S20)));
-  EXPECT_TRUE(vixl::s21.Is(Arm64Assembler::reg_s(S21)));
-  EXPECT_TRUE(vixl::s22.Is(Arm64Assembler::reg_s(S22)));
-  EXPECT_TRUE(vixl::s23.Is(Arm64Assembler::reg_s(S23)));
-  EXPECT_TRUE(vixl::s24.Is(Arm64Assembler::reg_s(S24)));
-  EXPECT_TRUE(vixl::s25.Is(Arm64Assembler::reg_s(S25)));
-  EXPECT_TRUE(vixl::s26.Is(Arm64Assembler::reg_s(S26)));
-  EXPECT_TRUE(vixl::s27.Is(Arm64Assembler::reg_s(S27)));
-  EXPECT_TRUE(vixl::s28.Is(Arm64Assembler::reg_s(S28)));
-  EXPECT_TRUE(vixl::s29.Is(Arm64Assembler::reg_s(S29)));
-  EXPECT_TRUE(vixl::s30.Is(Arm64Assembler::reg_s(S30)));
-  EXPECT_TRUE(vixl::s31.Is(Arm64Assembler::reg_s(S31)));
+  EXPECT_TRUE(vixl::aarch64::s0.Is(Arm64Assembler::reg_s(S0)));
+  EXPECT_TRUE(vixl::aarch64::s1.Is(Arm64Assembler::reg_s(S1)));
+  EXPECT_TRUE(vixl::aarch64::s2.Is(Arm64Assembler::reg_s(S2)));
+  EXPECT_TRUE(vixl::aarch64::s3.Is(Arm64Assembler::reg_s(S3)));
+  EXPECT_TRUE(vixl::aarch64::s4.Is(Arm64Assembler::reg_s(S4)));
+  EXPECT_TRUE(vixl::aarch64::s5.Is(Arm64Assembler::reg_s(S5)));
+  EXPECT_TRUE(vixl::aarch64::s6.Is(Arm64Assembler::reg_s(S6)));
+  EXPECT_TRUE(vixl::aarch64::s7.Is(Arm64Assembler::reg_s(S7)));
+  EXPECT_TRUE(vixl::aarch64::s8.Is(Arm64Assembler::reg_s(S8)));
+  EXPECT_TRUE(vixl::aarch64::s9.Is(Arm64Assembler::reg_s(S9)));
+  EXPECT_TRUE(vixl::aarch64::s10.Is(Arm64Assembler::reg_s(S10)));
+  EXPECT_TRUE(vixl::aarch64::s11.Is(Arm64Assembler::reg_s(S11)));
+  EXPECT_TRUE(vixl::aarch64::s12.Is(Arm64Assembler::reg_s(S12)));
+  EXPECT_TRUE(vixl::aarch64::s13.Is(Arm64Assembler::reg_s(S13)));
+  EXPECT_TRUE(vixl::aarch64::s14.Is(Arm64Assembler::reg_s(S14)));
+  EXPECT_TRUE(vixl::aarch64::s15.Is(Arm64Assembler::reg_s(S15)));
+  EXPECT_TRUE(vixl::aarch64::s16.Is(Arm64Assembler::reg_s(S16)));
+  EXPECT_TRUE(vixl::aarch64::s17.Is(Arm64Assembler::reg_s(S17)));
+  EXPECT_TRUE(vixl::aarch64::s18.Is(Arm64Assembler::reg_s(S18)));
+  EXPECT_TRUE(vixl::aarch64::s19.Is(Arm64Assembler::reg_s(S19)));
+  EXPECT_TRUE(vixl::aarch64::s20.Is(Arm64Assembler::reg_s(S20)));
+  EXPECT_TRUE(vixl::aarch64::s21.Is(Arm64Assembler::reg_s(S21)));
+  EXPECT_TRUE(vixl::aarch64::s22.Is(Arm64Assembler::reg_s(S22)));
+  EXPECT_TRUE(vixl::aarch64::s23.Is(Arm64Assembler::reg_s(S23)));
+  EXPECT_TRUE(vixl::aarch64::s24.Is(Arm64Assembler::reg_s(S24)));
+  EXPECT_TRUE(vixl::aarch64::s25.Is(Arm64Assembler::reg_s(S25)));
+  EXPECT_TRUE(vixl::aarch64::s26.Is(Arm64Assembler::reg_s(S26)));
+  EXPECT_TRUE(vixl::aarch64::s27.Is(Arm64Assembler::reg_s(S27)));
+  EXPECT_TRUE(vixl::aarch64::s28.Is(Arm64Assembler::reg_s(S28)));
+  EXPECT_TRUE(vixl::aarch64::s29.Is(Arm64Assembler::reg_s(S29)));
+  EXPECT_TRUE(vixl::aarch64::s30.Is(Arm64Assembler::reg_s(S30)));
+  EXPECT_TRUE(vixl::aarch64::s31.Is(Arm64Assembler::reg_s(S31)));
 }
 
 }  // namespace arm64
diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h
index 5c33639..8dc9ab4 100644
--- a/compiler/utils/array_ref.h
+++ b/compiler/utils/array_ref.h
@@ -39,9 +39,6 @@
  */
 template <typename T>
 class ArrayRef {
- private:
-  struct tag { };
-
  public:
   typedef T value_type;
   typedef T& reference;
@@ -63,14 +60,14 @@
 
   template <size_t size>
   explicit constexpr ArrayRef(T (&array)[size])
-    : array_(array), size_(size) {
+      : array_(array), size_(size) {
   }
 
-  template <typename U, size_t size>
-  explicit constexpr ArrayRef(U (&array)[size],
-                              typename std::enable_if<std::is_same<T, const U>::value, tag>::type
-                                  t ATTRIBUTE_UNUSED = tag())
-    : array_(array), size_(size) {
+  template <typename U,
+            size_t size,
+            typename = typename std::enable_if<std::is_same<T, const U>::value>::type>
+  explicit constexpr ArrayRef(U (&array)[size])
+      : array_(array), size_(size) {
   }
 
   constexpr ArrayRef(T* array_in, size_t size_in)
@@ -165,13 +162,21 @@
   value_type* data() { return array_; }
   const value_type* data() const { return array_; }
 
-  ArrayRef SubArray(size_type pos) const {
-    return SubArray(pos, size_ - pos);
+  ArrayRef SubArray(size_type pos) {
+    return SubArray(pos, size() - pos);
   }
-  ArrayRef SubArray(size_type pos, size_type length) const {
+  ArrayRef<const T> SubArray(size_type pos) const {
+    return SubArray(pos, size() - pos);
+  }
+  ArrayRef SubArray(size_type pos, size_type length) {
     DCHECK_LE(pos, size());
     DCHECK_LE(length, size() - pos);
-    return ArrayRef(array_ + pos, length);
+    return ArrayRef(data() + pos, length);
+  }
+  ArrayRef<const T> SubArray(size_type pos, size_type length) const {
+    DCHECK_LE(pos, size());
+    DCHECK_LE(length, size() - pos);
+    return ArrayRef<const T>(data() + pos, length);
   }
 
  private:
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index b01b0fe..81159e6 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -38,19 +38,16 @@
 #ifdef ART_ENABLE_CODEGEN_x86_64
 #include "x86_64/assembler_x86_64.h"
 #endif
+#include "base/casts.h"
 #include "globals.h"
 #include "memory_region.h"
 
 namespace art {
 
-static uint8_t* NewContents(size_t capacity) {
-  return new uint8_t[capacity];
-}
-
-
-AssemblerBuffer::AssemblerBuffer() {
+AssemblerBuffer::AssemblerBuffer(ArenaAllocator* arena)
+    : arena_(arena) {
   static const size_t kInitialBufferCapacity = 4 * KB;
-  contents_ = NewContents(kInitialBufferCapacity);
+  contents_ = arena_->AllocArray<uint8_t>(kInitialBufferCapacity, kArenaAllocAssembler);
   cursor_ = contents_;
   limit_ = ComputeLimit(contents_, kInitialBufferCapacity);
   fixup_ = nullptr;
@@ -67,7 +64,9 @@
 
 
 AssemblerBuffer::~AssemblerBuffer() {
-  delete[] contents_;
+  if (arena_->IsRunningOnMemoryTool()) {
+    arena_->MakeInaccessible(contents_, Capacity());
+  }
 }
 
 
@@ -95,23 +94,17 @@
 void AssemblerBuffer::ExtendCapacity(size_t min_capacity) {
   size_t old_size = Size();
   size_t old_capacity = Capacity();
+  DCHECK_GT(min_capacity, old_capacity);
   size_t new_capacity = std::min(old_capacity * 2, old_capacity + 1 * MB);
   new_capacity = std::max(new_capacity, min_capacity);
 
   // Allocate the new data area and copy contents of the old one to it.
-  uint8_t* new_contents = NewContents(new_capacity);
-  memmove(reinterpret_cast<void*>(new_contents),
-          reinterpret_cast<void*>(contents_),
-          old_size);
-
-  // Compute the relocation delta and switch to the new contents area.
-  ptrdiff_t delta = new_contents - contents_;
-  delete[] contents_;
-  contents_ = new_contents;
+  contents_ = reinterpret_cast<uint8_t*>(
+      arena_->Realloc(contents_, old_capacity, new_capacity, kArenaAllocAssembler));
 
   // Update the cursor and recompute the limit.
-  cursor_ += delta;
-  limit_ = ComputeLimit(new_contents, new_capacity);
+  cursor_ = contents_ + old_size;
+  limit_ = ComputeLimit(contents_, new_capacity);
 
   // Verify internal state.
   CHECK_EQ(Capacity(), new_capacity);
@@ -119,132 +112,13 @@
 }
 
 void DebugFrameOpCodeWriterForAssembler::ImplicitlyAdvancePC() {
-  this->AdvancePC(assembler_->CodeSize());
-}
-
-Assembler* Assembler::Create(InstructionSet instruction_set,
-                             const InstructionSetFeatures* instruction_set_features) {
-  switch (instruction_set) {
-#ifdef ART_ENABLE_CODEGEN_arm
-    case kArm:
-      return new arm::Arm32Assembler();
-    case kThumb2:
-      return new arm::Thumb2Assembler();
-#endif
-#ifdef ART_ENABLE_CODEGEN_arm64
-    case kArm64:
-      return new arm64::Arm64Assembler();
-#endif
-#ifdef ART_ENABLE_CODEGEN_mips
-    case kMips:
-      return new mips::MipsAssembler(instruction_set_features != nullptr
-                                         ? instruction_set_features->AsMipsInstructionSetFeatures()
-                                         : nullptr);
-#endif
-#ifdef ART_ENABLE_CODEGEN_mips64
-    case kMips64:
-      return new mips64::Mips64Assembler();
-#endif
-#ifdef ART_ENABLE_CODEGEN_x86
-    case kX86:
-      return new x86::X86Assembler();
-#endif
-#ifdef ART_ENABLE_CODEGEN_x86_64
-    case kX86_64:
-      return new x86_64::X86_64Assembler();
-#endif
-    default:
-      LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
-      return nullptr;
+  uint32_t pc = dchecked_integral_cast<uint32_t>(assembler_->CodeSize());
+  if (delay_emitting_advance_pc_) {
+    uint32_t stream_pos = dchecked_integral_cast<uint32_t>(opcodes_.size());
+    delayed_advance_pcs_.push_back(DelayedAdvancePC {stream_pos, pc});
+  } else {
+    AdvancePC(pc);
   }
 }
 
-void Assembler::StoreImmediateToThread32(ThreadOffset<4> dest ATTRIBUTE_UNUSED,
-                                         uint32_t imm ATTRIBUTE_UNUSED,
-                                         ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreImmediateToThread64(ThreadOffset<8> dest ATTRIBUTE_UNUSED,
-                                         uint32_t imm ATTRIBUTE_UNUSED,
-                                         ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreStackOffsetToThread32(ThreadOffset<4> thr_offs ATTRIBUTE_UNUSED,
-                                           FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                           ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs ATTRIBUTE_UNUSED,
-                                           FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                           ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreStackPointerToThread32(ThreadOffset<4> thr_offs ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::LoadFromThread32(ManagedRegister dest ATTRIBUTE_UNUSED,
-                                 ThreadOffset<4> src ATTRIBUTE_UNUSED,
-                                 size_t size ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::LoadFromThread64(ManagedRegister dest ATTRIBUTE_UNUSED,
-                                 ThreadOffset<8> src ATTRIBUTE_UNUSED,
-                                 size_t size ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::LoadRawPtrFromThread32(ManagedRegister dest ATTRIBUTE_UNUSED,
-                                       ThreadOffset<4> offs ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::LoadRawPtrFromThread64(ManagedRegister dest ATTRIBUTE_UNUSED,
-                                       ThreadOffset<8> offs ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CopyRawPtrFromThread32(FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                       ThreadOffset<4> thr_offs ATTRIBUTE_UNUSED,
-                                       ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                       ThreadOffset<8> thr_offs ATTRIBUTE_UNUSED,
-                                       ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CopyRawPtrToThread32(ThreadOffset<4> thr_offs ATTRIBUTE_UNUSED,
-                                     FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                     ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs ATTRIBUTE_UNUSED,
-                                     FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                     ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CallFromThread32(ThreadOffset<4> offset ATTRIBUTE_UNUSED,
-                                 ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CallFromThread64(ThreadOffset<8> offset ATTRIBUTE_UNUSED,
-                                 ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
 }  // namespace art
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index d97a2a4..8981776 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -21,17 +21,21 @@
 
 #include "arch/instruction_set.h"
 #include "arch/instruction_set_features.h"
+#include "arm/constants_arm.h"
+#include "base/arena_allocator.h"
+#include "base/arena_object.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/macros.h"
-#include "arm/constants_arm.h"
+#include "debug/dwarf/debug_frame_opcode_writer.h"
 #include "label.h"
 #include "managed_register.h"
 #include "memory_region.h"
 #include "mips/constants_mips.h"
 #include "offsets.h"
+#include "utils/array_ref.h"
 #include "x86/constants_x86.h"
 #include "x86_64/constants_x86_64.h"
-#include "dwarf/debug_frame_opcode_writer.h"
 
 namespace art {
 
@@ -60,7 +64,7 @@
 };
 
 // Parent of all queued slow paths, emitted during finalization
-class SlowPath {
+class SlowPath : public DeletableArenaObject<kArenaAllocAssembler> {
  public:
   SlowPath() : next_(nullptr) {}
   virtual ~SlowPath() {}
@@ -85,9 +89,13 @@
 
 class AssemblerBuffer {
  public:
-  AssemblerBuffer();
+  explicit AssemblerBuffer(ArenaAllocator* arena);
   ~AssemblerBuffer();
 
+  ArenaAllocator* GetArena() {
+    return arena_;
+  }
+
   // Basic support for emitting, loading, and storing.
   template<typename T> void Emit(T value) {
     CHECK(HasEnsuredCapacity());
@@ -172,8 +180,8 @@
   class EnsureCapacity {
    public:
     explicit EnsureCapacity(AssemblerBuffer* buffer) {
-      if (buffer->cursor() >= buffer->limit()) {
-        buffer->ExtendCapacity();
+      if (buffer->cursor() > buffer->limit()) {
+        buffer->ExtendCapacity(buffer->Size() + kMinimumGap);
       }
       // In debug mode, we save the assembler buffer along with the gap
       // size before we start emitting to the buffer. This allows us to
@@ -213,7 +221,9 @@
   class EnsureCapacity {
    public:
     explicit EnsureCapacity(AssemblerBuffer* buffer) {
-      if (buffer->cursor() >= buffer->limit()) buffer->ExtendCapacity();
+      if (buffer->cursor() > buffer->limit()) {
+        buffer->ExtendCapacity(buffer->Size() + kMinimumGap);
+      }
     }
   };
 
@@ -227,12 +237,22 @@
   // Returns the position in the instruction stream.
   int GetPosition() { return  cursor_ - contents_; }
 
+  size_t Capacity() const {
+    CHECK_GE(limit_, contents_);
+    return (limit_ - contents_) + kMinimumGap;
+  }
+
+  // Unconditionally increase the capacity.
+  // The provided `min_capacity` must be higher than current `Capacity()`.
+  void ExtendCapacity(size_t min_capacity);
+
  private:
   // The limit is set to kMinimumGap bytes before the end of the data area.
   // This leaves enough space for the longest possible instruction and allows
   // for a single, fast space check per instruction.
   static const int kMinimumGap = 32;
 
+  ArenaAllocator* arena_;
   uint8_t* contents_;
   uint8_t* cursor_;
   uint8_t* limit_;
@@ -246,10 +266,6 @@
 
   uint8_t* cursor() const { return cursor_; }
   uint8_t* limit() const { return limit_; }
-  size_t Capacity() const {
-    CHECK_GE(limit_, contents_);
-    return (limit_ - contents_) + kMinimumGap;
-  }
 
   // Process the fixup chain starting at the given fixup. The offset is
   // non-zero for fixups in the body if the preamble is non-empty.
@@ -261,8 +277,6 @@
     return data + capacity - kMinimumGap;
   }
 
-  void ExtendCapacity(size_t min_capacity = 0u);
-
   friend class AssemblerFixup;
 };
 
@@ -271,23 +285,77 @@
 class DebugFrameOpCodeWriterForAssembler FINAL
     : public dwarf::DebugFrameOpCodeWriter<> {
  public:
+  struct DelayedAdvancePC {
+    uint32_t stream_pos;
+    uint32_t pc;
+  };
+
   // This method is called the by the opcode writers.
   virtual void ImplicitlyAdvancePC() FINAL;
 
   explicit DebugFrameOpCodeWriterForAssembler(Assembler* buffer)
-      : dwarf::DebugFrameOpCodeWriter<>(),
-        assembler_(buffer) {
+      : dwarf::DebugFrameOpCodeWriter<>(false /* enabled */),
+        assembler_(buffer),
+        delay_emitting_advance_pc_(false),
+        delayed_advance_pcs_() {
+  }
+
+  ~DebugFrameOpCodeWriterForAssembler() {
+    DCHECK(delayed_advance_pcs_.empty());
+  }
+
+  // Tell the writer to delay emitting advance PC info.
+  // The assembler must explicitly process all the delayed advances.
+  void DelayEmittingAdvancePCs() {
+    delay_emitting_advance_pc_ = true;
+  }
+
+  // Override the last delayed PC. The new PC can be out of order.
+  void OverrideDelayedPC(size_t pc) {
+    DCHECK(delay_emitting_advance_pc_);
+    if (enabled_) {
+      DCHECK(!delayed_advance_pcs_.empty());
+      delayed_advance_pcs_.back().pc = pc;
+    }
+  }
+
+  // Return the number of delayed advance PC entries.
+  size_t NumberOfDelayedAdvancePCs() const {
+    return delayed_advance_pcs_.size();
+  }
+
+  // Release the CFI stream and advance PC infos so that the assembler can patch it.
+  std::pair<std::vector<uint8_t>, std::vector<DelayedAdvancePC>>
+  ReleaseStreamAndPrepareForDelayedAdvancePC() {
+    DCHECK(delay_emitting_advance_pc_);
+    delay_emitting_advance_pc_ = false;
+    std::pair<std::vector<uint8_t>, std::vector<DelayedAdvancePC>> result;
+    result.first.swap(opcodes_);
+    result.second.swap(delayed_advance_pcs_);
+    return result;
+  }
+
+  // Reserve space for the CFI stream.
+  void ReserveCFIStream(size_t capacity) {
+    opcodes_.reserve(capacity);
+  }
+
+  // Append raw data to the CFI stream.
+  void AppendRawData(const std::vector<uint8_t>& raw_data, size_t first, size_t last) {
+    DCHECK_LE(0u, first);
+    DCHECK_LE(first, last);
+    DCHECK_LE(last, raw_data.size());
+    opcodes_.insert(opcodes_.end(), raw_data.begin() + first, raw_data.begin() + last);
   }
 
  private:
   Assembler* assembler_;
+  bool delay_emitting_advance_pc_;
+  std::vector<DelayedAdvancePC> delayed_advance_pcs_;
 };
 
-class Assembler {
+class Assembler : public DeletableArenaObject<kArenaAllocAssembler> {
  public:
-  static Assembler* Create(InstructionSet instruction_set,
-                           const InstructionSetFeatures* instruction_set_features = nullptr);
-
   // Finalize the code; emit slow paths, fixup branches, add literal pool, etc.
   virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); }
 
@@ -303,140 +371,6 @@
   // TODO: Implement with disassembler.
   virtual void Comment(const char* format ATTRIBUTE_UNUSED, ...) {}
 
-  // Emit code that will create an activation on the stack
-  virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                          const std::vector<ManagedRegister>& callee_save_regs,
-                          const ManagedRegisterEntrySpills& entry_spills) = 0;
-
-  // Emit code that will remove an activation from the stack
-  virtual void RemoveFrame(size_t frame_size,
-                           const std::vector<ManagedRegister>& callee_save_regs) = 0;
-
-  virtual void IncreaseFrameSize(size_t adjust) = 0;
-  virtual void DecreaseFrameSize(size_t adjust) = 0;
-
-  // Store routines
-  virtual void Store(FrameOffset offs, ManagedRegister src, size_t size) = 0;
-  virtual void StoreRef(FrameOffset dest, ManagedRegister src) = 0;
-  virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src) = 0;
-
-  virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
-                                     ManagedRegister scratch) = 0;
-
-  virtual void StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm,
-                                        ManagedRegister scratch);
-  virtual void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm,
-                                        ManagedRegister scratch);
-
-  virtual void StoreStackOffsetToThread32(ThreadOffset<4> thr_offs,
-                                          FrameOffset fr_offs,
-                                          ManagedRegister scratch);
-  virtual void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs,
-                                          FrameOffset fr_offs,
-                                          ManagedRegister scratch);
-
-  virtual void StoreStackPointerToThread32(ThreadOffset<4> thr_offs);
-  virtual void StoreStackPointerToThread64(ThreadOffset<8> thr_offs);
-
-  virtual void StoreSpanning(FrameOffset dest, ManagedRegister src,
-                             FrameOffset in_off, ManagedRegister scratch) = 0;
-
-  // Load routines
-  virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0;
-
-  virtual void LoadFromThread32(ManagedRegister dest, ThreadOffset<4> src, size_t size);
-  virtual void LoadFromThread64(ManagedRegister dest, ThreadOffset<8> src, size_t size);
-
-  virtual void LoadRef(ManagedRegister dest, FrameOffset src) = 0;
-  // If unpoison_reference is true and kPoisonReference is true, then we negate the read reference.
-  virtual void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-                       bool unpoison_reference) = 0;
-
-  virtual void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) = 0;
-
-  virtual void LoadRawPtrFromThread32(ManagedRegister dest, ThreadOffset<4> offs);
-  virtual void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset<8> offs);
-
-  // Copying routines
-  virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size) = 0;
-
-  virtual void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset<4> thr_offs,
-                                      ManagedRegister scratch);
-  virtual void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs,
-                                      ManagedRegister scratch);
-
-  virtual void CopyRawPtrToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs,
-                                    ManagedRegister scratch);
-  virtual void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
-                                    ManagedRegister scratch);
-
-  virtual void CopyRef(FrameOffset dest, FrameOffset src,
-                       ManagedRegister scratch) = 0;
-
-  virtual void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(ManagedRegister dest, Offset dest_offset,
-                    ManagedRegister src, Offset src_offset,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void MemoryBarrier(ManagedRegister scratch) = 0;
-
-  // Sign extension
-  virtual void SignExtend(ManagedRegister mreg, size_t size) = 0;
-
-  // Zero extension
-  virtual void ZeroExtend(ManagedRegister mreg, size_t size) = 0;
-
-  // Exploit fast access in managed code to Thread::Current()
-  virtual void GetCurrentThread(ManagedRegister tr) = 0;
-  virtual void GetCurrentThread(FrameOffset dest_offset,
-                                ManagedRegister scratch) = 0;
-
-  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the handle scope entry to see if the value is
-  // null.
-  virtual void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                               ManagedRegister in_reg, bool null_allowed) = 0;
-
-  // Set up out_off to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed.
-  virtual void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                               ManagedRegister scratch, bool null_allowed) = 0;
-
-  // src holds a handle scope entry (Object**) load this into dst
-  virtual void LoadReferenceFromHandleScope(ManagedRegister dst,
-                                     ManagedRegister src) = 0;
-
-  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
-  // know that src may not be null.
-  virtual void VerifyObject(ManagedRegister src, bool could_be_null) = 0;
-  virtual void VerifyObject(FrameOffset src, bool could_be_null) = 0;
-
-  // Call to address held at [base+offset]
-  virtual void Call(ManagedRegister base, Offset offset,
-                    ManagedRegister scratch) = 0;
-  virtual void Call(FrameOffset base, Offset offset,
-                    ManagedRegister scratch) = 0;
-  virtual void CallFromThread32(ThreadOffset<4> offset, ManagedRegister scratch);
-  virtual void CallFromThread64(ThreadOffset<8> offset, ManagedRegister scratch);
-
-  // Generate code to check if Thread::Current()->exception_ is non-null
-  // and branch to a ExceptionSlowPath if it is.
-  virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
-
   virtual void Bind(Label* label) = 0;
   virtual void Jump(Label* label) = 0;
 
@@ -448,8 +382,16 @@
    */
   DebugFrameOpCodeWriterForAssembler& cfi() { return cfi_; }
 
+  ArenaAllocator* GetArena() {
+    return buffer_.GetArena();
+  }
+
+  AssemblerBuffer* GetBuffer() {
+    return &buffer_;
+  }
+
  protected:
-  Assembler() : buffer_(), cfi_(this) {}
+  explicit Assembler(ArenaAllocator* arena) : buffer_(arena), cfi_(this) {}
 
   AssemblerBuffer buffer_;
 
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index f1233ca..92b4c8e 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -147,7 +147,7 @@
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
                                               std::string fmt) {
     std::string str;
-    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), imm_bits > 0);
+    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
 
     for (auto reg1 : reg1_registers) {
       for (auto reg2 : reg2_registers) {
@@ -188,14 +188,66 @@
     return str;
   }
 
+  template <typename ImmType, typename Reg1, typename Reg2>
+  std::string RepeatTemplatedImmBitsRegisters(void (Ass::*f)(ImmType, Reg1, Reg2),
+                                              const std::vector<Reg1*> reg1_registers,
+                                              const std::vector<Reg2*> reg2_registers,
+                                              std::string (AssemblerTest::*GetName1)(const Reg1&),
+                                              std::string (AssemblerTest::*GetName2)(const Reg2&),
+                                              int imm_bits,
+                                              std::string fmt) {
+    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
+
+    WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size());
+
+    std::string str;
+    for (auto reg1 : reg1_registers) {
+      for (auto reg2 : reg2_registers) {
+        for (int64_t imm : imms) {
+          ImmType new_imm = CreateImmediate(imm);
+          (assembler_.get()->*f)(new_imm, *reg1, *reg2);
+          std::string base = fmt;
+
+          std::string reg1_string = (this->*GetName1)(*reg1);
+          size_t reg1_index;
+          while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
+            base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
+          }
+
+          std::string reg2_string = (this->*GetName2)(*reg2);
+          size_t reg2_index;
+          while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
+            base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
+          }
+
+          size_t imm_index = base.find(IMM_TOKEN);
+          if (imm_index != std::string::npos) {
+            std::ostringstream sreg;
+            sreg << imm;
+            std::string imm_string = sreg.str();
+            base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
+          }
+
+          if (str.size() > 0) {
+            str += "\n";
+          }
+          str += base;
+        }
+      }
+    }
+    // Add a newline at the end.
+    str += "\n";
+    return str;
+  }
+
   template <typename RegType, typename ImmType>
   std::string RepeatTemplatedRegisterImmBits(void (Ass::*f)(RegType, ImmType),
-                                              int imm_bits,
-                                              const std::vector<Reg*> registers,
-                                              std::string (AssemblerTest::*GetName)(const RegType&),
-                                              std::string fmt) {
+                                             int imm_bits,
+                                             const std::vector<Reg*> registers,
+                                             std::string (AssemblerTest::*GetName)(const RegType&),
+                                             std::string fmt) {
     std::string str;
-    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), imm_bits > 0);
+    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
 
     for (auto reg : registers) {
       for (int64_t imm : imms) {
@@ -291,6 +343,28 @@
                                                      fmt);
   }
 
+  template <typename ImmType>
+  std::string RepeatFFIb(void (Ass::*f)(FPReg, FPReg, ImmType), int imm_bits, std::string fmt) {
+    return RepeatTemplatedRegistersImmBits<FPReg, FPReg, ImmType>(f,
+                                                                  imm_bits,
+                                                                  GetFPRegisters(),
+                                                                  GetFPRegisters(),
+                                                                  &AssemblerTest::GetFPRegName,
+                                                                  &AssemblerTest::GetFPRegName,
+                                                                  fmt);
+  }
+
+  template <typename ImmType>
+  std::string RepeatIbFF(void (Ass::*f)(ImmType, FPReg, FPReg), int imm_bits, std::string fmt) {
+    return RepeatTemplatedImmBitsRegisters<ImmType, FPReg, FPReg>(f,
+                                                                  GetFPRegisters(),
+                                                                  GetFPRegisters(),
+                                                                  &AssemblerTest::GetFPRegName,
+                                                                  &AssemblerTest::GetFPRegName,
+                                                                  imm_bits,
+                                                                  fmt);
+  }
+
   std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) {
     return RepeatTemplatedRegisters<FPReg, Reg>(f,
         GetFPRegisters(),
@@ -397,7 +471,8 @@
   explicit AssemblerTest() {}
 
   void SetUp() OVERRIDE {
-    assembler_.reset(new Ass());
+    arena_.reset(new ArenaAllocator(&pool_));
+    assembler_.reset(CreateAssembler(arena_.get()));
     test_helper_.reset(
         new AssemblerTestInfrastructure(GetArchitectureString(),
                                         GetAssemblerCmdName(),
@@ -413,6 +488,13 @@
 
   void TearDown() OVERRIDE {
     test_helper_.reset();  // Clean up the helper.
+    assembler_.reset();
+    arena_.reset();
+  }
+
+  // Override this to set up any architecture-specific things, e.g., CPU revision.
+  virtual Ass* CreateAssembler(ArenaAllocator* arena) {
+    return new (arena) Ass(arena);
   }
 
   // Override this to set up any architecture-specific things, e.g., register vectors.
@@ -840,17 +922,24 @@
     return str;
   }
 
+  // Override this to pad the code with NOPs to a certain size if needed.
+  virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
+  }
+
   void DriverWrapper(std::string assembly_text, std::string test_name) {
     assembler_->FinalizeCode();
     size_t cs = assembler_->CodeSize();
     std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
     MemoryRegion code(&(*data)[0], data->size());
     assembler_->FinalizeInstructions(code);
+    Pad(*data);
     test_helper_->Driver(*data, assembly_text, test_name);
   }
 
   static constexpr size_t kWarnManyCombinationsThreshold = 500;
 
+  ArenaPool pool_;
+  std::unique_ptr<ArenaAllocator> arena_;
   std::unique_ptr<Ass> assembler_;
   std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 2ae8841..9c9271d 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -32,7 +32,7 @@
 // Include results file (generated manually)
 #include "assembler_thumb_test_expected.cc.inc"
 
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 // This controls whether the results are printed to the
 // screen or compared against the expected output.
 // To generate new expected output, set this to true and
@@ -72,7 +72,7 @@
 }
 
 std::string GetToolsDir() {
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
   // This will only work on the host.  There is no as, objcopy or objdump on the device.
   static std::string toolsdir;
 
@@ -89,7 +89,7 @@
 }
 
 void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char* const* results) {
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
   static std::string toolsdir = GetToolsDir();
 
   ScratchFile file;
@@ -135,7 +135,8 @@
     toolsdir.c_str(), filename);
   if (kPrintResults) {
     // Print the results only, don't check. This is used to generate new output for inserting
-    // into the .inc file.
+    // into the .inc file, so let's add the appropriate prefix/suffix needed in the C++ code.
+    strcat(cmd, " | sed '-es/^/  \"/' | sed '-es/$/\\\\n\",/'");
     int cmd_result3 = system(cmd);
     ASSERT_EQ(cmd_result3, 0) << strerror(errno);
   } else {
@@ -168,7 +169,7 @@
 
   snprintf(buf, sizeof(buf), "%s.oo", filename);
   unlink(buf);
-#endif
+#endif  // ART_TARGET_ANDROID
 }
 
 #define __ assembler->
@@ -194,11 +195,18 @@
 
 #undef __
 
+class Thumb2AssemblerTest : public ::testing::Test {
+ public:
+  Thumb2AssemblerTest() : pool(), arena(&pool), assembler(&arena) { }
+
+  ArenaPool pool;
+  ArenaAllocator arena;
+  arm::Thumb2Assembler assembler;
+};
+
 #define __ assembler.
 
-TEST(Thumb2AssemblerTest, SimpleMov) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, SimpleMov) {
   __ movs(R0, ShifterOperand(R1));
   __ mov(R0, ShifterOperand(R1));
   __ mov(R8, ShifterOperand(R9));
@@ -209,8 +217,7 @@
   EmitAndCheck(&assembler, "SimpleMov");
 }
 
-TEST(Thumb2AssemblerTest, SimpleMov32) {
-  arm::Thumb2Assembler assembler;
+TEST_F(Thumb2AssemblerTest, SimpleMov32) {
   __ Force32Bit();
 
   __ mov(R0, ShifterOperand(R1));
@@ -219,9 +226,7 @@
   EmitAndCheck(&assembler, "SimpleMov32");
 }
 
-TEST(Thumb2AssemblerTest, SimpleMovAdd) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, SimpleMovAdd) {
   __ mov(R0, ShifterOperand(R1));
   __ adds(R0, R1, ShifterOperand(R2));
   __ add(R0, R1, ShifterOperand(0));
@@ -229,9 +234,7 @@
   EmitAndCheck(&assembler, "SimpleMovAdd");
 }
 
-TEST(Thumb2AssemblerTest, DataProcessingRegister) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, DataProcessingRegister) {
   // 32 bit variants using low registers.
   __ mvn(R0, ShifterOperand(R1), AL, kCcKeep);
   __ add(R0, R1, ShifterOperand(R2), AL, kCcKeep);
@@ -363,9 +366,7 @@
   EmitAndCheck(&assembler, "DataProcessingRegister");
 }
 
-TEST(Thumb2AssemblerTest, DataProcessingImmediate) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, DataProcessingImmediate) {
   __ mov(R0, ShifterOperand(0x55));
   __ mvn(R0, ShifterOperand(0x55));
   __ add(R0, R1, ShifterOperand(0x55));
@@ -396,9 +397,7 @@
   EmitAndCheck(&assembler, "DataProcessingImmediate");
 }
 
-TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediate) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, DataProcessingModifiedImmediate) {
   __ mov(R0, ShifterOperand(0x550055));
   __ mvn(R0, ShifterOperand(0x550055));
   __ add(R0, R1, ShifterOperand(0x550055));
@@ -421,9 +420,7 @@
 }
 
 
-TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediates) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, DataProcessingModifiedImmediates) {
   __ mov(R0, ShifterOperand(0x550055));
   __ mov(R0, ShifterOperand(0x55005500));
   __ mov(R0, ShifterOperand(0x55555555));
@@ -435,9 +432,7 @@
   EmitAndCheck(&assembler, "DataProcessingModifiedImmediates");
 }
 
-TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, DataProcessingShiftedRegister) {
   // 16-bit variants.
   __ movs(R3, ShifterOperand(R4, LSL, 4));
   __ movs(R3, ShifterOperand(R4, LSR, 5));
@@ -466,10 +461,39 @@
   EmitAndCheck(&assembler, "DataProcessingShiftedRegister");
 }
 
+TEST_F(Thumb2AssemblerTest, ShiftImmediate) {
+  // Note: This test produces the same results as DataProcessingShiftedRegister
+  // but it does so using shift functions instead of mov().
 
-TEST(Thumb2AssemblerTest, BasicLoad) {
-  arm::Thumb2Assembler assembler;
+  // 16-bit variants.
+  __ Lsl(R3, R4, 4);
+  __ Lsr(R3, R4, 5);
+  __ Asr(R3, R4, 6);
 
+  // 32-bit ROR because ROR immediate doesn't have the same 16-bit version as other shifts.
+  __ Ror(R3, R4, 7);
+
+  // 32-bit RRX because RRX has no 16-bit version.
+  __ Rrx(R3, R4);
+
+  // 32 bit variants (not setting condition codes).
+  __ Lsl(R3, R4, 4, AL, kCcKeep);
+  __ Lsr(R3, R4, 5, AL, kCcKeep);
+  __ Asr(R3, R4, 6, AL, kCcKeep);
+  __ Ror(R3, R4, 7, AL, kCcKeep);
+  __ Rrx(R3, R4, AL, kCcKeep);
+
+  // 32 bit variants (high registers).
+  __ Lsls(R8, R4, 4);
+  __ Lsrs(R8, R4, 5);
+  __ Asrs(R8, R4, 6);
+  __ Rors(R8, R4, 7);
+  __ Rrxs(R8, R4);
+
+  EmitAndCheck(&assembler, "ShiftImmediate");
+}
+
+TEST_F(Thumb2AssemblerTest, BasicLoad) {
   __ ldr(R3, Address(R4, 24));
   __ ldrb(R3, Address(R4, 24));
   __ ldrh(R3, Address(R4, 24));
@@ -489,9 +513,7 @@
 }
 
 
-TEST(Thumb2AssemblerTest, BasicStore) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, BasicStore) {
   __ str(R3, Address(R4, 24));
   __ strb(R3, Address(R4, 24));
   __ strh(R3, Address(R4, 24));
@@ -506,9 +528,7 @@
   EmitAndCheck(&assembler, "BasicStore");
 }
 
-TEST(Thumb2AssemblerTest, ComplexLoad) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, ComplexLoad) {
   __ ldr(R3, Address(R4, 24, Address::Mode::Offset));
   __ ldr(R3, Address(R4, 24, Address::Mode::PreIndex));
   __ ldr(R3, Address(R4, 24, Address::Mode::PostIndex));
@@ -548,9 +568,7 @@
 }
 
 
-TEST(Thumb2AssemblerTest, ComplexStore) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, ComplexStore) {
   __ str(R3, Address(R4, 24, Address::Mode::Offset));
   __ str(R3, Address(R4, 24, Address::Mode::PreIndex));
   __ str(R3, Address(R4, 24, Address::Mode::PostIndex));
@@ -575,9 +593,7 @@
   EmitAndCheck(&assembler, "ComplexStore");
 }
 
-TEST(Thumb2AssemblerTest, NegativeLoadStore) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, NegativeLoadStore) {
   __ ldr(R3, Address(R4, -24, Address::Mode::Offset));
   __ ldr(R3, Address(R4, -24, Address::Mode::PreIndex));
   __ ldr(R3, Address(R4, -24, Address::Mode::PostIndex));
@@ -637,18 +653,14 @@
   EmitAndCheck(&assembler, "NegativeLoadStore");
 }
 
-TEST(Thumb2AssemblerTest, SimpleLoadStoreDual) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, SimpleLoadStoreDual) {
   __ strd(R2, Address(R0, 24, Address::Mode::Offset));
   __ ldrd(R2, Address(R0, 24, Address::Mode::Offset));
 
   EmitAndCheck(&assembler, "SimpleLoadStoreDual");
 }
 
-TEST(Thumb2AssemblerTest, ComplexLoadStoreDual) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, ComplexLoadStoreDual) {
   __ strd(R2, Address(R0, 24, Address::Mode::Offset));
   __ strd(R2, Address(R0, 24, Address::Mode::PreIndex));
   __ strd(R2, Address(R0, 24, Address::Mode::PostIndex));
@@ -666,9 +678,7 @@
   EmitAndCheck(&assembler, "ComplexLoadStoreDual");
 }
 
-TEST(Thumb2AssemblerTest, NegativeLoadStoreDual) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, NegativeLoadStoreDual) {
   __ strd(R2, Address(R0, -24, Address::Mode::Offset));
   __ strd(R2, Address(R0, -24, Address::Mode::PreIndex));
   __ strd(R2, Address(R0, -24, Address::Mode::PostIndex));
@@ -686,9 +696,7 @@
   EmitAndCheck(&assembler, "NegativeLoadStoreDual");
 }
 
-TEST(Thumb2AssemblerTest, SimpleBranch) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, SimpleBranch) {
   Label l1;
   __ mov(R0, ShifterOperand(2));
   __ Bind(&l1);
@@ -724,8 +732,7 @@
   EmitAndCheck(&assembler, "SimpleBranch");
 }
 
-TEST(Thumb2AssemblerTest, LongBranch) {
-  arm::Thumb2Assembler assembler;
+TEST_F(Thumb2AssemblerTest, LongBranch) {
   __ Force32Bit();
   // 32 bit branches.
   Label l1;
@@ -764,9 +771,7 @@
   EmitAndCheck(&assembler, "LongBranch");
 }
 
-TEST(Thumb2AssemblerTest, LoadMultiple) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, LoadMultiple) {
   // 16 bit.
   __ ldm(DB_W, R4, (1 << R0 | 1 << R3));
 
@@ -780,9 +785,7 @@
   EmitAndCheck(&assembler, "LoadMultiple");
 }
 
-TEST(Thumb2AssemblerTest, StoreMultiple) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, StoreMultiple) {
   // 16 bit.
   __ stm(IA_W, R4, (1 << R0 | 1 << R3));
 
@@ -797,14 +800,13 @@
   EmitAndCheck(&assembler, "StoreMultiple");
 }
 
-TEST(Thumb2AssemblerTest, MovWMovT) {
-  arm::Thumb2Assembler assembler;
-
-  __ movw(R4, 0);         // 16 bit.
-  __ movw(R4, 0x34);      // 16 bit.
-  __ movw(R9, 0x34);      // 32 bit due to high register.
-  __ movw(R3, 0x1234);    // 32 bit due to large value.
-  __ movw(R9, 0xffff);    // 32 bit due to large value and high register.
+TEST_F(Thumb2AssemblerTest, MovWMovT) {
+  // Always 32 bit.
+  __ movw(R4, 0);
+  __ movw(R4, 0x34);
+  __ movw(R9, 0x34);
+  __ movw(R3, 0x1234);
+  __ movw(R9, 0xffff);
 
   // Always 32 bit.
   __ movt(R0, 0);
@@ -814,41 +816,84 @@
   EmitAndCheck(&assembler, "MovWMovT");
 }
 
-TEST(Thumb2AssemblerTest, SpecialAddSub) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, SpecialAddSub) {
   __ add(R2, SP, ShifterOperand(0x50));   // 16 bit.
   __ add(SP, SP, ShifterOperand(0x50));   // 16 bit.
   __ add(R8, SP, ShifterOperand(0x50));   // 32 bit.
 
   __ add(R2, SP, ShifterOperand(0xf00));  // 32 bit due to imm size.
   __ add(SP, SP, ShifterOperand(0xf00));  // 32 bit due to imm size.
+  __ add(SP, SP, ShifterOperand(0xffc));  // 32 bit due to imm size; encoding T4.
 
-  __ sub(SP, SP, ShifterOperand(0x50));     // 16 bit
-  __ sub(R0, SP, ShifterOperand(0x50));     // 32 bit
-  __ sub(R8, SP, ShifterOperand(0x50));     // 32 bit.
+  __ sub(SP, SP, ShifterOperand(0x50));   // 16 bit
+  __ sub(R0, SP, ShifterOperand(0x50));   // 32 bit
+  __ sub(R8, SP, ShifterOperand(0x50));   // 32 bit.
 
-  __ sub(SP, SP, ShifterOperand(0xf00));   // 32 bit due to imm size
+  __ sub(SP, SP, ShifterOperand(0xf00));  // 32 bit due to imm size
+  __ sub(SP, SP, ShifterOperand(0xffc));  // 32 bit due to imm size; encoding T4.
 
   EmitAndCheck(&assembler, "SpecialAddSub");
 }
 
-TEST(Thumb2AssemblerTest, StoreToOffset) {
-  arm::Thumb2Assembler assembler;
+TEST_F(Thumb2AssemblerTest, LoadFromOffset) {
+  __ LoadFromOffset(kLoadWord, R2, R4, 12);
+  __ LoadFromOffset(kLoadWord, R2, R4, 0xfff);
+  __ LoadFromOffset(kLoadWord, R2, R4, 0x1000);
+  __ LoadFromOffset(kLoadWord, R2, R4, 0x1000a4);
+  __ LoadFromOffset(kLoadWord, R2, R4, 0x101000);
+  __ LoadFromOffset(kLoadWord, R4, R4, 0x101000);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 12);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0xfff);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000a4);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x101000);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R4, R4, 0x101000);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 12);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 0x3fc);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400a4);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 0x40400);
+  __ LoadFromOffset(kLoadWordPair, R4, R4, 0x40400);
 
-  __ StoreToOffset(kStoreWord, R2, R4, 12);     // Simple
-  __ StoreToOffset(kStoreWord, R2, R4, 0x2000);     // Offset too big.
-  __ StoreToOffset(kStoreWord, R0, R12, 12);
-  __ StoreToOffset(kStoreHalfword, R0, R12, 12);
-  __ StoreToOffset(kStoreByte, R2, R12, 12);
+  __ LoadFromOffset(kLoadWord, R0, R12, 12);  // 32-bit because of R12.
+  __ LoadFromOffset(kLoadWord, R2, R4, 0xa4 - 0x100000);
+
+  __ LoadFromOffset(kLoadSignedByte, R2, R4, 12);
+  __ LoadFromOffset(kLoadUnsignedByte, R2, R4, 12);
+  __ LoadFromOffset(kLoadSignedHalfword, R2, R4, 12);
+
+  EmitAndCheck(&assembler, "LoadFromOffset");
+}
+
+TEST_F(Thumb2AssemblerTest, StoreToOffset) {
+  __ StoreToOffset(kStoreWord, R2, R4, 12);
+  __ StoreToOffset(kStoreWord, R2, R4, 0xfff);
+  __ StoreToOffset(kStoreWord, R2, R4, 0x1000);
+  __ StoreToOffset(kStoreWord, R2, R4, 0x1000a4);
+  __ StoreToOffset(kStoreWord, R2, R4, 0x101000);
+  __ StoreToOffset(kStoreWord, R4, R4, 0x101000);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 12);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 0xfff);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000a4);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 0x101000);
+  __ StoreToOffset(kStoreHalfword, R4, R4, 0x101000);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 12);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 0x3fc);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 0x400);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 0x400a4);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 0x40400);
+  __ StoreToOffset(kStoreWordPair, R4, R4, 0x40400);
+
+  __ StoreToOffset(kStoreWord, R0, R12, 12);  // 32-bit because of R12.
+  __ StoreToOffset(kStoreWord, R2, R4, 0xa4 - 0x100000);
+
+  __ StoreToOffset(kStoreByte, R2, R4, 12);
 
   EmitAndCheck(&assembler, "StoreToOffset");
 }
 
-
-TEST(Thumb2AssemblerTest, IfThen) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, IfThen) {
   __ it(EQ);
   __ mov(R1, ShifterOperand(1), EQ);
 
@@ -879,9 +924,7 @@
   EmitAndCheck(&assembler, "IfThen");
 }
 
-TEST(Thumb2AssemblerTest, CbzCbnz) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CbzCbnz) {
   Label l1;
   __ cbz(R2, &l1);
   __ mov(R1, ShifterOperand(3));
@@ -899,9 +942,7 @@
   EmitAndCheck(&assembler, "CbzCbnz");
 }
 
-TEST(Thumb2AssemblerTest, Multiply) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Multiply) {
   __ mul(R0, R1, R0);
   __ mul(R0, R1, R2);
   __ mul(R8, R9, R8);
@@ -919,9 +960,7 @@
   EmitAndCheck(&assembler, "Multiply");
 }
 
-TEST(Thumb2AssemblerTest, Divide) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Divide) {
   __ sdiv(R0, R1, R2);
   __ sdiv(R8, R9, R10);
 
@@ -931,9 +970,7 @@
   EmitAndCheck(&assembler, "Divide");
 }
 
-TEST(Thumb2AssemblerTest, VMov) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, VMov) {
   __ vmovs(S1, 1.0);
   __ vmovd(D1, 1.0);
 
@@ -944,9 +981,7 @@
 }
 
 
-TEST(Thumb2AssemblerTest, BasicFloatingPoint) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, BasicFloatingPoint) {
   __ vadds(S0, S1, S2);
   __ vsubs(S0, S1, S2);
   __ vmuls(S0, S1, S2);
@@ -970,9 +1005,7 @@
   EmitAndCheck(&assembler, "BasicFloatingPoint");
 }
 
-TEST(Thumb2AssemblerTest, FloatingPointConversions) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, FloatingPointConversions) {
   __ vcvtsd(S2, D2);
   __ vcvtds(D2, S2);
 
@@ -991,9 +1024,7 @@
   EmitAndCheck(&assembler, "FloatingPointConversions");
 }
 
-TEST(Thumb2AssemblerTest, FloatingPointComparisons) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, FloatingPointComparisons) {
   __ vcmps(S0, S1);
   __ vcmpd(D0, D1);
 
@@ -1003,35 +1034,27 @@
   EmitAndCheck(&assembler, "FloatingPointComparisons");
 }
 
-TEST(Thumb2AssemblerTest, Calls) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Calls) {
   __ blx(LR);
   __ bx(LR);
 
   EmitAndCheck(&assembler, "Calls");
 }
 
-TEST(Thumb2AssemblerTest, Breakpoint) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Breakpoint) {
   __ bkpt(0);
 
   EmitAndCheck(&assembler, "Breakpoint");
 }
 
-TEST(Thumb2AssemblerTest, StrR1) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, StrR1) {
   __ str(R1, Address(SP, 68));
   __ str(R1, Address(SP, 1068));
 
   EmitAndCheck(&assembler, "StrR1");
 }
 
-TEST(Thumb2AssemblerTest, VPushPop) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, VPushPop) {
   __ vpushs(S2, 4);
   __ vpushd(D2, 4);
 
@@ -1041,9 +1064,7 @@
   EmitAndCheck(&assembler, "VPushPop");
 }
 
-TEST(Thumb2AssemblerTest, Max16BitBranch) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Max16BitBranch) {
   Label l1;
   __ b(&l1);
   for (int i = 0 ; i < (1 << 11) ; i += 2) {
@@ -1055,9 +1076,7 @@
   EmitAndCheck(&assembler, "Max16BitBranch");
 }
 
-TEST(Thumb2AssemblerTest, Branch32) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Branch32) {
   Label l1;
   __ b(&l1);
   for (int i = 0 ; i < (1 << 11) + 2 ; i += 2) {
@@ -1069,9 +1088,7 @@
   EmitAndCheck(&assembler, "Branch32");
 }
 
-TEST(Thumb2AssemblerTest, CompareAndBranchMax) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CompareAndBranchMax) {
   Label l1;
   __ cbz(R4, &l1);
   for (int i = 0 ; i < (1 << 7) ; i += 2) {
@@ -1083,9 +1100,7 @@
   EmitAndCheck(&assembler, "CompareAndBranchMax");
 }
 
-TEST(Thumb2AssemblerTest, CompareAndBranchRelocation16) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CompareAndBranchRelocation16) {
   Label l1;
   __ cbz(R4, &l1);
   for (int i = 0 ; i < (1 << 7) + 2 ; i += 2) {
@@ -1097,9 +1112,7 @@
   EmitAndCheck(&assembler, "CompareAndBranchRelocation16");
 }
 
-TEST(Thumb2AssemblerTest, CompareAndBranchRelocation32) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CompareAndBranchRelocation32) {
   Label l1;
   __ cbz(R4, &l1);
   for (int i = 0 ; i < (1 << 11) + 2 ; i += 2) {
@@ -1111,9 +1124,7 @@
   EmitAndCheck(&assembler, "CompareAndBranchRelocation32");
 }
 
-TEST(Thumb2AssemblerTest, MixedBranch32) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, MixedBranch32) {
   Label l1;
   Label l2;
   __ b(&l1);      // Forwards.
@@ -1130,9 +1141,7 @@
   EmitAndCheck(&assembler, "MixedBranch32");
 }
 
-TEST(Thumb2AssemblerTest, Shifts) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Shifts) {
   // 16 bit selected for CcDontCare.
   __ Lsl(R0, R1, 5);
   __ Lsr(R0, R1, 5);
@@ -1207,9 +1216,7 @@
   EmitAndCheck(&assembler, "Shifts");
 }
 
-TEST(Thumb2AssemblerTest, LoadStoreRegOffset) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, LoadStoreRegOffset) {
   // 16 bit.
   __ ldr(R0, Address(R1, R2));
   __ str(R0, Address(R1, R2));
@@ -1234,9 +1241,7 @@
   EmitAndCheck(&assembler, "LoadStoreRegOffset");
 }
 
-TEST(Thumb2AssemblerTest, LoadStoreLiteral) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, LoadStoreLiteral) {
   __ ldr(R0, Address(4));
   __ str(R0, Address(4));
 
@@ -1252,9 +1257,7 @@
   EmitAndCheck(&assembler, "LoadStoreLiteral");
 }
 
-TEST(Thumb2AssemblerTest, LoadStoreLimits) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, LoadStoreLimits) {
   __ ldr(R0, Address(R4, 124));     // 16 bit.
   __ ldr(R0, Address(R4, 128));     // 32 bit.
 
@@ -1282,9 +1285,7 @@
   EmitAndCheck(&assembler, "LoadStoreLimits");
 }
 
-TEST(Thumb2AssemblerTest, CompareAndBranch) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CompareAndBranch) {
   Label label;
   __ CompareAndBranchIfZero(arm::R0, &label);
   __ CompareAndBranchIfZero(arm::R11, &label);
@@ -1295,6 +1296,318 @@
   EmitAndCheck(&assembler, "CompareAndBranch");
 }
 
+TEST_F(Thumb2AssemblerTest, AddConstant) {
+  // Low registers, Rd != Rn.
+  __ AddConstant(R0, R1, 0);                          // MOV.
+  __ AddConstant(R0, R1, 1);                          // 16-bit ADDS, encoding T1.
+  __ AddConstant(R0, R1, 7);                          // 16-bit ADDS, encoding T1.
+  __ AddConstant(R0, R1, 8);                          // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 255);                        // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 256);                        // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 257);                        // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R1, 0xfff);                      // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R1, 0x1000);                     // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x1001);                     // MVN+SUB.
+  __ AddConstant(R0, R1, 0x1002);                     // MOVW+ADD.
+  __ AddConstant(R0, R1, 0xffff);                     // MOVW+ADD.
+  __ AddConstant(R0, R1, 0x10000);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x10001);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x10002);                    // MVN+SUB.
+  __ AddConstant(R0, R1, 0x10003);                    // MOVW+MOVT+ADD.
+  __ AddConstant(R0, R1, -1);                         // 16-bit SUBS.
+  __ AddConstant(R0, R1, -7);                         // 16-bit SUBS.
+  __ AddConstant(R0, R1, -8);                         // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -255);                       // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -256);                       // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -257);                       // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R1, -0xfff);                     // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R1, -0x1000);                    // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x1001);                    // MVN+ADD.
+  __ AddConstant(R0, R1, -0x1002);                    // MOVW+SUB.
+  __ AddConstant(R0, R1, -0xffff);                    // MOVW+SUB.
+  __ AddConstant(R0, R1, -0x10000);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x10001);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x10002);                   // MVN+ADD.
+  __ AddConstant(R0, R1, -0x10003);                   // MOVW+MOVT+ADD.
+
+  // Low registers, Rd == Rn.
+  __ AddConstant(R0, R0, 0);                          // Nothing.
+  __ AddConstant(R1, R1, 1);                          // 16-bit ADDS, encoding T2,
+  __ AddConstant(R0, R0, 7);                          // 16-bit ADDS, encoding T2.
+  __ AddConstant(R1, R1, 8);                          // 16-bit ADDS, encoding T2.
+  __ AddConstant(R0, R0, 255);                        // 16-bit ADDS, encoding T2.
+  __ AddConstant(R1, R1, 256);                        // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 257);                        // 32-bit ADD, encoding T4.
+  __ AddConstant(R1, R1, 0xfff);                      // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R0, 0x1000);                     // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 0x1001);                     // MVN+SUB.
+  __ AddConstant(R0, R0, 0x1002);                     // MOVW+ADD.
+  __ AddConstant(R1, R1, 0xffff);                     // MOVW+ADD.
+  __ AddConstant(R0, R0, 0x10000);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 0x10001);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 0x10002);                    // MVN+SUB.
+  __ AddConstant(R1, R1, 0x10003);                    // MOVW+MOVT+ADD.
+  __ AddConstant(R0, R0, -1);                         // 16-bit SUBS, encoding T2.
+  __ AddConstant(R1, R1, -7);                         // 16-bit SUBS, encoding T2.
+  __ AddConstant(R0, R0, -8);                         // 16-bit SUBS, encoding T2.
+  __ AddConstant(R1, R1, -255);                       // 16-bit SUBS, encoding T2.
+  __ AddConstant(R0, R0, -256);                       // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -257);                       // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R0, -0xfff);                     // 32-bit SUB, encoding T4.
+  __ AddConstant(R1, R1, -0x1000);                    // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -0x1001);                    // MVN+ADD.
+  __ AddConstant(R1, R1, -0x1002);                    // MOVW+SUB.
+  __ AddConstant(R0, R0, -0xffff);                    // MOVW+SUB.
+  __ AddConstant(R1, R1, -0x10000);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -0x10001);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -0x10002);                   // MVN+ADD.
+  __ AddConstant(R0, R0, -0x10003);                   // MOVW+MOVT+ADD.
+
+  // High registers.
+  __ AddConstant(R8, R8, 0);                          // Nothing.
+  __ AddConstant(R8, R1, 1);                          // 32-bit ADD, encoding T3,
+  __ AddConstant(R0, R8, 7);                          // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R8, 8);                          // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R1, 255);                        // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R8, 256);                        // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R8, 257);                        // 32-bit ADD, encoding T4.
+  __ AddConstant(R8, R1, 0xfff);                      // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R8, 0x1000);                     // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R8, 0x1001);                     // MVN+SUB.
+  __ AddConstant(R0, R1, 0x1002);                     // MOVW+ADD.
+  __ AddConstant(R0, R8, 0xffff);                     // MOVW+ADD.
+  __ AddConstant(R8, R8, 0x10000);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R1, 0x10001);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R8, 0x10002);                    // MVN+SUB.
+  __ AddConstant(R0, R8, 0x10003);                    // MOVW+MOVT+ADD.
+  __ AddConstant(R8, R8, -1);                         // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R1, -7);                         // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R8, -8);                         // 32-bit SUB, encoding T3.
+  __ AddConstant(R8, R8, -255);                       // 32-bit SUB, encoding T3.
+  __ AddConstant(R8, R1, -256);                       // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R8, -257);                       // 32-bit SUB, encoding T4.
+  __ AddConstant(R8, R8, -0xfff);                     // 32-bit SUB, encoding T4.
+  __ AddConstant(R8, R1, -0x1000);                    // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R8, -0x1001);                    // MVN+ADD.
+  __ AddConstant(R0, R1, -0x1002);                    // MOVW+SUB.
+  __ AddConstant(R8, R1, -0xffff);                    // MOVW+SUB.
+  __ AddConstant(R0, R8, -0x10000);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R8, R8, -0x10001);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R8, R1, -0x10002);                   // MVN+SUB.
+  __ AddConstant(R0, R8, -0x10003);                   // MOVW+MOVT+ADD.
+
+  // Low registers, Rd != Rn, kCcKeep.
+  __ AddConstant(R0, R1, 0, AL, kCcKeep);             // MOV.
+  __ AddConstant(R0, R1, 1, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 7, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 8, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 255, AL, kCcKeep);           // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 256, AL, kCcKeep);           // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 257, AL, kCcKeep);           // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R1, 0xfff, AL, kCcKeep);         // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R1, 0x1000, AL, kCcKeep);        // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x1001, AL, kCcKeep);        // MVN+SUB.
+  __ AddConstant(R0, R1, 0x1002, AL, kCcKeep);        // MOVW+ADD.
+  __ AddConstant(R0, R1, 0xffff, AL, kCcKeep);        // MOVW+ADD.
+  __ AddConstant(R0, R1, 0x10000, AL, kCcKeep);       // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x10001, AL, kCcKeep);       // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x10002, AL, kCcKeep);       // MVN+SUB.
+  __ AddConstant(R0, R1, 0x10003, AL, kCcKeep);       // MOVW+MOVT+ADD.
+  __ AddConstant(R0, R1, -1, AL, kCcKeep);            // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, -7, AL, kCcKeep);            // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -8, AL, kCcKeep);            // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -255, AL, kCcKeep);          // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -256, AL, kCcKeep);          // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -257, AL, kCcKeep);          // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R1, -0xfff, AL, kCcKeep);        // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R1, -0x1000, AL, kCcKeep);       // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x1001, AL, kCcKeep);       // MVN+ADD.
+  __ AddConstant(R0, R1, -0x1002, AL, kCcKeep);       // MOVW+SUB.
+  __ AddConstant(R0, R1, -0xffff, AL, kCcKeep);       // MOVW+SUB.
+  __ AddConstant(R0, R1, -0x10000, AL, kCcKeep);      // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x10001, AL, kCcKeep);      // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x10002, AL, kCcKeep);      // MVN+ADD.
+  __ AddConstant(R0, R1, -0x10003, AL, kCcKeep);      // MOVW+MOVT+ADD.
+
+  // Low registers, Rd == Rn, kCcKeep.
+  __ AddConstant(R0, R0, 0, AL, kCcKeep);             // Nothing.
+  __ AddConstant(R1, R1, 1, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 7, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 8, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 255, AL, kCcKeep);           // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 256, AL, kCcKeep);           // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 257, AL, kCcKeep);           // 32-bit ADD, encoding T4.
+  __ AddConstant(R1, R1, 0xfff, AL, kCcKeep);         // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R0, 0x1000, AL, kCcKeep);        // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 0x1001, AL, kCcKeep);        // MVN+SUB.
+  __ AddConstant(R0, R0, 0x1002, AL, kCcKeep);        // MOVW+ADD.
+  __ AddConstant(R1, R1, 0xffff, AL, kCcKeep);        // MOVW+ADD.
+  __ AddConstant(R0, R0, 0x10000, AL, kCcKeep);       // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 0x10001, AL, kCcKeep);       // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 0x10002, AL, kCcKeep);       // MVN+SUB.
+  __ AddConstant(R1, R1, 0x10003, AL, kCcKeep);       // MOVW+MOVT+ADD.
+  __ AddConstant(R0, R0, -1, AL, kCcKeep);            // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, -7, AL, kCcKeep);            // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -8, AL, kCcKeep);            // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -255, AL, kCcKeep);          // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -256, AL, kCcKeep);          // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -257, AL, kCcKeep);          // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R0, -0xfff, AL, kCcKeep);        // 32-bit SUB, encoding T4.
+  __ AddConstant(R1, R1, -0x1000, AL, kCcKeep);       // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -0x1001, AL, kCcKeep);       // MVN+ADD.
+  __ AddConstant(R1, R1, -0x1002, AL, kCcKeep);       // MOVW+SUB.
+  __ AddConstant(R0, R0, -0xffff, AL, kCcKeep);       // MOVW+SUB.
+  __ AddConstant(R1, R1, -0x10000, AL, kCcKeep);      // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -0x10001, AL, kCcKeep);      // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -0x10002, AL, kCcKeep);      // MVN+ADD.
+  __ AddConstant(R0, R0, -0x10003, AL, kCcKeep);      // MOVW+MOVT+ADD.
+
+  // Low registers, Rd != Rn, kCcSet.
+  __ AddConstant(R0, R1, 0, AL, kCcSet);              // 16-bit ADDS.
+  __ AddConstant(R0, R1, 1, AL, kCcSet);              // 16-bit ADDS.
+  __ AddConstant(R0, R1, 7, AL, kCcSet);              // 16-bit ADDS.
+  __ AddConstant(R0, R1, 8, AL, kCcSet);              // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 255, AL, kCcSet);            // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 256, AL, kCcSet);            // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 257, AL, kCcSet);            // MVN+SUBS.
+  __ AddConstant(R0, R1, 0xfff, AL, kCcSet);          // MOVW+ADDS.
+  __ AddConstant(R0, R1, 0x1000, AL, kCcSet);         // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 0x1001, AL, kCcSet);         // MVN+SUBS.
+  __ AddConstant(R0, R1, 0x1002, AL, kCcSet);         // MOVW+ADDS.
+  __ AddConstant(R0, R1, 0xffff, AL, kCcSet);         // MOVW+ADDS.
+  __ AddConstant(R0, R1, 0x10000, AL, kCcSet);        // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 0x10001, AL, kCcSet);        // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 0x10002, AL, kCcSet);        // MVN+SUBS.
+  __ AddConstant(R0, R1, 0x10003, AL, kCcSet);        // MOVW+MOVT+ADDS.
+  __ AddConstant(R0, R1, -1, AL, kCcSet);             // 16-bit SUBS.
+  __ AddConstant(R0, R1, -7, AL, kCcSet);             // 16-bit SUBS.
+  __ AddConstant(R0, R1, -8, AL, kCcSet);             // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -255, AL, kCcSet);           // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -256, AL, kCcSet);           // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -257, AL, kCcSet);           // MVN+ADDS.
+  __ AddConstant(R0, R1, -0xfff, AL, kCcSet);         // MOVW+SUBS.
+  __ AddConstant(R0, R1, -0x1000, AL, kCcSet);        // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -0x1001, AL, kCcSet);        // MVN+ADDS.
+  __ AddConstant(R0, R1, -0x1002, AL, kCcSet);        // MOVW+SUBS.
+  __ AddConstant(R0, R1, -0xffff, AL, kCcSet);        // MOVW+SUBS.
+  __ AddConstant(R0, R1, -0x10000, AL, kCcSet);       // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -0x10001, AL, kCcSet);       // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -0x10002, AL, kCcSet);       // MVN+ADDS.
+  __ AddConstant(R0, R1, -0x10003, AL, kCcSet);       // MOVW+MOVT+ADDS.
+
+  // Low registers, Rd == Rn, kCcSet.
+  __ AddConstant(R0, R0, 0, AL, kCcSet);              // 16-bit ADDS, encoding T2.
+  __ AddConstant(R1, R1, 1, AL, kCcSet);              // 16-bit ADDS, encoding T2.
+  __ AddConstant(R0, R0, 7, AL, kCcSet);              // 16-bit ADDS, encoding T2.
+  __ AddConstant(R1, R1, 8, AL, kCcSet);              // 16-bit ADDS, encoding T2.
+  __ AddConstant(R0, R0, 255, AL, kCcSet);            // 16-bit ADDS, encoding T2.
+  __ AddConstant(R1, R1, 256, AL, kCcSet);            // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R0, 257, AL, kCcSet);            // MVN+SUBS.
+  __ AddConstant(R1, R1, 0xfff, AL, kCcSet);          // MOVW+ADDS.
+  __ AddConstant(R0, R0, 0x1000, AL, kCcSet);         // 32-bit ADDS, encoding T3.
+  __ AddConstant(R1, R1, 0x1001, AL, kCcSet);         // MVN+SUBS.
+  __ AddConstant(R0, R0, 0x1002, AL, kCcSet);         // MOVW+ADDS.
+  __ AddConstant(R1, R1, 0xffff, AL, kCcSet);         // MOVW+ADDS.
+  __ AddConstant(R0, R0, 0x10000, AL, kCcSet);        // 32-bit ADDS, encoding T3.
+  __ AddConstant(R1, R1, 0x10001, AL, kCcSet);        // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R0, 0x10002, AL, kCcSet);        // MVN+SUBS.
+  __ AddConstant(R1, R1, 0x10003, AL, kCcSet);        // MOVW+MOVT+ADDS.
+  __ AddConstant(R0, R0, -1, AL, kCcSet);             // 16-bit SUBS, encoding T2.
+  __ AddConstant(R1, R1, -7, AL, kCcSet);             // 16-bit SUBS, encoding T2.
+  __ AddConstant(R0, R0, -8, AL, kCcSet);             // 16-bit SUBS, encoding T2.
+  __ AddConstant(R1, R1, -255, AL, kCcSet);           // 16-bit SUBS, encoding T2.
+  __ AddConstant(R0, R0, -256, AL, kCcSet);           // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -257, AL, kCcSet);           // MNV+ADDS.
+  __ AddConstant(R0, R0, -0xfff, AL, kCcSet);         // MOVW+SUBS.
+  __ AddConstant(R1, R1, -0x1000, AL, kCcSet);        // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -0x1001, AL, kCcSet);        // MVN+ADDS.
+  __ AddConstant(R1, R1, -0x1002, AL, kCcSet);        // MOVW+SUBS.
+  __ AddConstant(R0, R0, -0xffff, AL, kCcSet);        // MOVW+SUBS.
+  __ AddConstant(R1, R1, -0x10000, AL, kCcSet);       // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R0, -0x10001, AL, kCcSet);       // 32-bit SUBS, encoding T3.
+  __ AddConstant(R1, R1, -0x10002, AL, kCcSet);       // MVN+ADDS.
+  __ AddConstant(R0, R0, -0x10003, AL, kCcSet);       // MOVW+MOVT+ADDS.
+
+  __ it(EQ);
+  __ AddConstant(R0, R1, 1, EQ, kCcSet);              // 32-bit ADDS, encoding T3.
+  __ it(NE);
+  __ AddConstant(R0, R1, 1, NE, kCcKeep);             // 16-bit ADDS, encoding T1.
+  __ it(GE);
+  __ AddConstant(R0, R0, 1, GE, kCcSet);              // 32-bit ADDS, encoding T3.
+  __ it(LE);
+  __ AddConstant(R0, R0, 1, LE, kCcKeep);             // 16-bit ADDS, encoding T2.
+
+  EmitAndCheck(&assembler, "AddConstant");
+}
+
+TEST_F(Thumb2AssemblerTest, CmpConstant) {
+  __ CmpConstant(R0, 0);                              // 16-bit CMP.
+  __ CmpConstant(R1, 1);                              // 16-bit CMP.
+  __ CmpConstant(R0, 7);                              // 16-bit CMP.
+  __ CmpConstant(R1, 8);                              // 16-bit CMP.
+  __ CmpConstant(R0, 255);                            // 16-bit CMP.
+  __ CmpConstant(R1, 256);                            // 32-bit CMP.
+  __ CmpConstant(R0, 257);                            // MNV+CMN.
+  __ CmpConstant(R1, 0xfff);                          // MOVW+CMP.
+  __ CmpConstant(R0, 0x1000);                         // 32-bit CMP.
+  __ CmpConstant(R1, 0x1001);                         // MNV+CMN.
+  __ CmpConstant(R0, 0x1002);                         // MOVW+CMP.
+  __ CmpConstant(R1, 0xffff);                         // MOVW+CMP.
+  __ CmpConstant(R0, 0x10000);                        // 32-bit CMP.
+  __ CmpConstant(R1, 0x10001);                        // 32-bit CMP.
+  __ CmpConstant(R0, 0x10002);                        // MVN+CMN.
+  __ CmpConstant(R1, 0x10003);                        // MOVW+MOVT+CMP.
+  __ CmpConstant(R0, -1);                             // 32-bit CMP.
+  __ CmpConstant(R1, -7);                             // CMN.
+  __ CmpConstant(R0, -8);                             // CMN.
+  __ CmpConstant(R1, -255);                           // CMN.
+  __ CmpConstant(R0, -256);                           // CMN.
+  __ CmpConstant(R1, -257);                           // MNV+CMP.
+  __ CmpConstant(R0, -0xfff);                         // MOVW+CMN.
+  __ CmpConstant(R1, -0x1000);                        // CMN.
+  __ CmpConstant(R0, -0x1001);                        // MNV+CMP.
+  __ CmpConstant(R1, -0x1002);                        // MOVW+CMN.
+  __ CmpConstant(R0, -0xffff);                        // MOVW+CMN.
+  __ CmpConstant(R1, -0x10000);                       // CMN.
+  __ CmpConstant(R0, -0x10001);                       // CMN.
+  __ CmpConstant(R1, -0x10002);                       // MVN+CMP.
+  __ CmpConstant(R0, -0x10003);                       // MOVW+MOVT+CMP.
+
+  __ CmpConstant(R8, 0);                              // 32-bit CMP.
+  __ CmpConstant(R9, 1);                              // 32-bit CMP.
+  __ CmpConstant(R8, 7);                              // 32-bit CMP.
+  __ CmpConstant(R9, 8);                              // 32-bit CMP.
+  __ CmpConstant(R8, 255);                            // 32-bit CMP.
+  __ CmpConstant(R9, 256);                            // 32-bit CMP.
+  __ CmpConstant(R8, 257);                            // MNV+CMN
+  __ CmpConstant(R9, 0xfff);                          // MOVW+CMP.
+  __ CmpConstant(R8, 0x1000);                         // 32-bit CMP.
+  __ CmpConstant(R9, 0x1001);                         // MVN+CMN.
+  __ CmpConstant(R8, 0x1002);                         // MOVW+CMP.
+  __ CmpConstant(R9, 0xffff);                         // MOVW+CMP.
+  __ CmpConstant(R8, 0x10000);                        // 32-bit CMP.
+  __ CmpConstant(R9, 0x10001);                        // 32-bit CMP.
+  __ CmpConstant(R8, 0x10002);                        // MVN+CMN.
+  __ CmpConstant(R9, 0x10003);                        // MOVW+MOVT+CMP.
+  __ CmpConstant(R8, -1);                             // 32-bit CMP
+  __ CmpConstant(R9, -7);                             // CMN.
+  __ CmpConstant(R8, -8);                             // CMN.
+  __ CmpConstant(R9, -255);                           // CMN.
+  __ CmpConstant(R8, -256);                           // CMN.
+  __ CmpConstant(R9, -257);                           // MNV+CMP.
+  __ CmpConstant(R8, -0xfff);                         // MOVW+CMN.
+  __ CmpConstant(R9, -0x1000);                        // CMN.
+  __ CmpConstant(R8, -0x1001);                        // MVN+CMP.
+  __ CmpConstant(R9, -0x1002);                        // MOVW+CMN.
+  __ CmpConstant(R8, -0xffff);                        // MOVW+CMN.
+  __ CmpConstant(R9, -0x10000);                       // CMN.
+  __ CmpConstant(R8, -0x10001);                       // CMN.
+  __ CmpConstant(R9, -0x10002);                       // MVN+CMP.
+  __ CmpConstant(R8, -0x10003);                       // MOVW+MOVT+CMP.
+
+  EmitAndCheck(&assembler, "CmpConstant");
+}
+
 #undef __
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index b79c2e4..6736015 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -1,4 +1,4 @@
-const char* SimpleMovResults[] = {
+const char* const SimpleMovResults[] = {
   "   0:	0008      	movs	r0, r1\n",
   "   2:	4608      	mov	r0, r1\n",
   "   4:	46c8      	mov	r8, r9\n",
@@ -6,18 +6,18 @@
   "   8:	f04f 0809 	mov.w	r8, #9\n",
   nullptr
 };
-const char* SimpleMov32Results[] = {
+const char* const SimpleMov32Results[] = {
   "   0:	ea4f 0001 	mov.w	r0, r1\n",
   "   4:	ea4f 0809 	mov.w	r8, r9\n",
   nullptr
 };
-const char* SimpleMovAddResults[] = {
+const char* const SimpleMovAddResults[] = {
   "   0:	4608      	mov	r0, r1\n",
   "   2:	1888      	adds	r0, r1, r2\n",
   "   4:	1c08      	adds	r0, r1, #0\n",
   nullptr
 };
-const char* DataProcessingRegisterResults[] = {
+const char* const DataProcessingRegisterResults[] = {
   "   0:	ea6f 0001 	mvn.w	r0, r1\n",
   "   4:	eb01 0002 	add.w	r0, r1, r2\n",
   "   8:	eba1 0002 	sub.w	r0, r1, r2\n",
@@ -129,11 +129,11 @@
   " 120:	eb01 0c00 	add.w	ip, r1, r0\n",
   nullptr
 };
-const char* DataProcessingImmediateResults[] = {
+const char* const DataProcessingImmediateResults[] = {
   "   0:	2055      	movs	r0, #85	; 0x55\n",
   "   2:	f06f 0055 	mvn.w	r0, #85	; 0x55\n",
-  "   6:	f201 0055 	addw	r0, r1, #85	; 0x55\n",
-  "   a:	f2a1 0055 	subw	r0, r1, #85	; 0x55\n",
+  "   6:	f101 0055 	add.w	r0, r1, #85	; 0x55\n",
+  "   a:	f1a1 0055 	sub.w	r0, r1, #85	; 0x55\n",
   "   e:	f001 0055 	and.w	r0, r1, #85	; 0x55\n",
   "  12:	f041 0055 	orr.w	r0, r1, #85	; 0x55\n",
   "  16:	f061 0055 	orn	r0, r1, #85	; 0x55\n",
@@ -154,7 +154,7 @@
   "  48:	1f48      	subs	r0, r1, #5\n",
   nullptr
 };
-const char* DataProcessingModifiedImmediateResults[] = {
+const char* const DataProcessingModifiedImmediateResults[] = {
   "   0:	f04f 1055 	mov.w	r0, #5570645	; 0x550055\n",
   "   4:	f06f 1055 	mvn.w	r0, #5570645	; 0x550055\n",
   "   8:	f101 1055 	add.w	r0, r1, #5570645	; 0x550055\n",
@@ -173,7 +173,7 @@
   "  3c:	f110 1f55 	cmn.w	r0, #5570645	; 0x550055\n",
   nullptr
 };
-const char* DataProcessingModifiedImmediatesResults[] = {
+const char* const DataProcessingModifiedImmediatesResults[] = {
   "   0:	f04f 1055 	mov.w	r0, #5570645	; 0x550055\n",
   "   4:	f04f 2055 	mov.w	r0, #1426085120	; 0x55005500\n",
   "   8:	f04f 3055 	mov.w	r0, #1431655765	; 0x55555555\n",
@@ -183,7 +183,7 @@
   "  18:	f44f 70d4 	mov.w	r0, #424	; 0x1a8\n",
   nullptr
 };
-const char* DataProcessingShiftedRegisterResults[] = {
+const char* const DataProcessingShiftedRegisterResults[] = {
   "   0:	0123      	lsls	r3, r4, #4\n",
   "   2:	0963      	lsrs	r3, r4, #5\n",
   "   4:	11a3      	asrs	r3, r4, #6\n",
@@ -201,7 +201,25 @@
   "  32:	ea5f 0834 	movs.w	r8, r4, rrx\n",
   nullptr
 };
-const char* BasicLoadResults[] = {
+const char* const ShiftImmediateResults[] = {
+  "   0:  0123        lsls  r3, r4, #4\n",
+  "   2:  0963        lsrs  r3, r4, #5\n",
+  "   4:  11a3        asrs  r3, r4, #6\n",
+  "   6:  ea4f 13f4   mov.w  r3, r4, ror #7\n",
+  "   a:  ea4f 0334   mov.w  r3, r4, rrx\n",
+  "   e:  ea4f 1304   mov.w r3, r4, lsl #4\n",
+  "  12:  ea4f 1354   mov.w r3, r4, lsr #5\n",
+  "  16:  ea4f 13a4   mov.w r3, r4, asr #6\n",
+  "  1a:  ea4f 13f4   mov.w r3, r4, ror #7\n",
+  "  1e:  ea4f 0334   mov.w r3, r4, rrx\n",
+  "  22:  ea5f 1804   movs.w  r8, r4, lsl #4\n",
+  "  26:  ea5f 1854   movs.w  r8, r4, lsr #5\n",
+  "  2a:  ea5f 18a4   movs.w  r8, r4, asr #6\n",
+  "  2e:  ea5f 18f4   movs.w  r8, r4, ror #7\n",
+  "  32:  ea5f 0834   movs.w  r8, r4, rrx\n",
+  nullptr
+};
+const char* const BasicLoadResults[] = {
   "   0:	69a3      	ldr	r3, [r4, #24]\n",
   "   2:	7e23      	ldrb	r3, [r4, #24]\n",
   "   4:	8b23      	ldrh	r3, [r4, #24]\n",
@@ -215,7 +233,7 @@
   "  20:	f9b4 8018 	ldrsh.w	r8, [r4, #24]\n",
   nullptr
 };
-const char* BasicStoreResults[] = {
+const char* const BasicStoreResults[] = {
   "   0:	61a3      	str	r3, [r4, #24]\n",
   "   2:	7623      	strb	r3, [r4, #24]\n",
   "   4:	8323      	strh	r3, [r4, #24]\n",
@@ -225,7 +243,7 @@
   "  10:	f8a4 8018 	strh.w	r8, [r4, #24]\n",
   nullptr
 };
-const char* ComplexLoadResults[] = {
+const char* const ComplexLoadResults[] = {
   "   0:	69a3      	ldr	r3, [r4, #24]\n",
   "   2:	f854 3f18 	ldr.w	r3, [r4, #24]!\n",
   "   6:	f854 3b18 	ldr.w	r3, [r4], #24\n",
@@ -258,7 +276,7 @@
   "  6e:	f934 3918 	ldrsh.w	r3, [r4], #-24\n",
   nullptr
 };
-const char* ComplexStoreResults[] = {
+const char* const ComplexStoreResults[] = {
   "   0:	61a3      	str	r3, [r4, #24]\n",
   "   2:	f844 3f18 	str.w	r3, [r4, #24]!\n",
   "   6:	f844 3b18 	str.w	r3, [r4], #24\n",
@@ -279,7 +297,7 @@
   "  3e:	f824 3918 	strh.w	r3, [r4], #-24\n",
   nullptr
 };
-const char* NegativeLoadStoreResults[] = {
+const char* const NegativeLoadStoreResults[] = {
   "   0:	f854 3c18 	ldr.w	r3, [r4, #-24]\n",
   "   4:	f854 3d18 	ldr.w	r3, [r4, #-24]!\n",
   "   8:	f854 3918 	ldr.w	r3, [r4], #-24\n",
@@ -330,12 +348,12 @@
   "  bc:	f824 3b18 	strh.w	r3, [r4], #24\n",
   nullptr
 };
-const char* SimpleLoadStoreDualResults[] = {
+const char* const SimpleLoadStoreDualResults[] = {
   "   0:	e9c0 2306 	strd	r2, r3, [r0, #24]\n",
   "   4:	e9d0 2306 	ldrd	r2, r3, [r0, #24]\n",
   nullptr
 };
-const char* ComplexLoadStoreDualResults[] = {
+const char* const ComplexLoadStoreDualResults[] = {
   "   0:	e9c0 2306 	strd	r2, r3, [r0, #24]\n",
   "   4:	e9e0 2306 	strd	r2, r3, [r0, #24]!\n",
   "   8:	e8e0 2306 	strd	r2, r3, [r0], #24\n",
@@ -350,7 +368,7 @@
   "  2c:	e870 2306 	ldrd	r2, r3, [r0], #-24\n",
   nullptr
 };
-const char* NegativeLoadStoreDualResults[] = {
+const char* const NegativeLoadStoreDualResults[] = {
   "   0:	e940 2306 	strd	r2, r3, [r0, #-24]\n",
   "   4:	e960 2306 	strd	r2, r3, [r0, #-24]!\n",
   "   8:	e860 2306 	strd	r2, r3, [r0], #-24\n",
@@ -365,7 +383,7 @@
   "  2c:	e8f0 2306 	ldrd	r2, r3, [r0], #24\n",
   nullptr
 };
-const char* SimpleBranchResults[] = {
+const char* const SimpleBranchResults[] = {
   "   0:	2002      	movs	r0, #2\n",
   "   2:	2101      	movs	r1, #1\n",
   "   4:	e7fd      	b.n	2 <SimpleBranch+0x2>\n",
@@ -385,7 +403,7 @@
   "  20:	2006      	movs	r0, #6\n",
   nullptr
 };
-const char* LongBranchResults[] = {
+const char* const LongBranchResults[] = {
   "   0:	f04f 0002 	mov.w	r0, #2\n",
   "   4:	f04f 0101 	mov.w	r1, #1\n",
   "   8:	f7ff bffc 	b.w	4 <LongBranch+0x4>\n",
@@ -405,14 +423,14 @@
   "  40:	f04f 0006 	mov.w	r0, #6\n",
   nullptr
 };
-const char* LoadMultipleResults[] = {
+const char* const LoadMultipleResults[] = {
   "   0:	cc09      	ldmia	r4!, {r0, r3}\n",
   "   2:	e934 4800 	ldmdb	r4!, {fp, lr}\n",
   "   6:	e914 4800 	ldmdb	r4, {fp, lr}\n",
   "   a:	f854 5b04 	ldr.w	r5, [r4], #4\n",
   nullptr
 };
-const char* StoreMultipleResults[] = {
+const char* const StoreMultipleResults[] = {
   "   0:	c409      	stmia	r4!, {r0, r3}\n",
   "   2:	e8a4 4800 	stmia.w	r4!, {fp, lr}\n",
   "   6:	e884 4800 	stmia.w	r4, {fp, lr}\n",
@@ -420,40 +438,132 @@
   "   e:	f844 5d04 	str.w	r5, [r4, #-4]!\n",
   nullptr
 };
-const char* MovWMovTResults[] = {
-  "   0:	2400      	movs	r4, #0\n",
-  "   2:	2434      	movs	r4, #52	; 0x34\n",
-  "   4:	f240 0934 	movw	r9, #52	; 0x34\n",
-  "   8:	f241 2334 	movw	r3, #4660	; 0x1234\n",
-  "   c:	f64f 79ff 	movw	r9, #65535	; 0xffff\n",
-  "  10:	f2c0 0000 	movt	r0, #0\n",
-  "  14:	f2c1 2034 	movt	r0, #4660	; 0x1234\n",
-  "  18:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
+const char* const MovWMovTResults[] = {
+  "   0:	f240 0400 	movw  r4, #0\n",
+  "   4:	f240 0434 	movw  r4, #52 ; 0x34\n",
+  "   8:	f240 0934 	movw	r9, #52	; 0x34\n",
+  "   c:	f241 2334 	movw	r3, #4660	; 0x1234\n",
+  "  10:	f64f 79ff 	movw	r9, #65535	; 0xffff\n",
+  "  14:	f2c0 0000 	movt	r0, #0\n",
+  "  18:	f2c1 2034 	movt	r0, #4660	; 0x1234\n",
+  "  1c:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
   nullptr
 };
-const char* SpecialAddSubResults[] = {
+const char* const SpecialAddSubResults[] = {
   "   0:	aa14      	add	r2, sp, #80	; 0x50\n",
   "   2:	b014      	add	sp, #80		; 0x50\n",
-  "   4:	f20d 0850 	addw	r8, sp, #80	; 0x50\n",
-  "   8:	f60d 7200 	addw	r2, sp, #3840	; 0xf00\n",
-  "   c:	f60d 7d00 	addw	sp, sp, #3840	; 0xf00\n",
-  "  10:	b094      	sub	sp, #80		; 0x50\n",
-  "  12:	f2ad 0050 	subw	r0, sp, #80	; 0x50\n",
-  "  16:	f2ad 0850 	subw	r8, sp, #80	; 0x50\n",
-  "  1a:	f6ad 7d00 	subw	sp, sp, #3840	; 0xf00\n",
+  "   4:	f10d 0850 	add.w	r8, sp, #80	; 0x50\n",
+  "   8:	f50d 6270 	add.w	r2, sp, #3840	; 0xf00\n",
+  "   c:	f50d 6d70 	add.w	sp, sp, #3840	; 0xf00\n",
+  "  10:	f60d 7dfc 	addw	sp, sp, #4092	; 0xffc\n",
+  "  14:	b094      	sub	sp, #80		; 0x50\n",
+  "  16:	f1ad 0050 	sub.w	r0, sp, #80	; 0x50\n",
+  "  1a:	f1ad 0850 	sub.w	r8, sp, #80	; 0x50\n",
+  "  1e:	f5ad 6d70 	sub.w	sp, sp, #3840	; 0xf00\n",
+  "  22:	f6ad 7dfc 	subw	sp, sp, #4092	; 0xffc\n",
   nullptr
 };
-const char* StoreToOffsetResults[] = {
+const char* const LoadFromOffsetResults[] = {
+  "   0:	68e2      	ldr	r2, [r4, #12]\n",
+  "   2:	f8d4 2fff 	ldr.w	r2, [r4, #4095]	; 0xfff\n",
+  "   6:	f504 5280 	add.w	r2, r4, #4096	; 0x1000\n",
+  "   a:	6812      	ldr	r2, [r2, #0]\n",
+  "   c:	f504 1280 	add.w	r2, r4, #1048576	; 0x100000\n",
+  "  10:	f8d2 20a4 	ldr.w	r2, [r2, #164]	; 0xa4\n",
+  "  14:	f241 0200 	movw	r2, #4096	; 0x1000\n",
+  "  18:	f2c0 0210 	movt	r2, #16\n",
+  "  1c:	4422      	add	r2, r4\n",
+  "  1e:	6812      	ldr	r2, [r2, #0]\n",
+  "  20:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  24:	f2c0 0c10 	movt	ip, #16\n",
+  "  28:	4464      	add	r4, ip\n",
+  "  2a:	6824      	ldr	r4, [r4, #0]\n",
+  "  2c:	89a2      	ldrh	r2, [r4, #12]\n",
+  "  2e:	f8b4 2fff 	ldrh.w	r2, [r4, #4095]	; 0xfff\n",
+  "  32:	f504 5280 	add.w	r2, r4, #4096	; 0x1000\n",
+  "  36:	8812      	ldrh	r2, [r2, #0]\n",
+  "  38:	f504 1280 	add.w	r2, r4, #1048576	; 0x100000\n",
+  "  3c:	f8b2 20a4 	ldrh.w	r2, [r2, #164]	; 0xa4\n",
+  "  40:	f241 0200 	movw	r2, #4096	; 0x1000\n",
+  "  44:	f2c0 0210 	movt	r2, #16\n",
+  "  48:	4422      	add	r2, r4\n",
+  "  4a:	8812      	ldrh	r2, [r2, #0]\n",
+  "  4c:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  50:	f2c0 0c10 	movt	ip, #16\n",
+  "  54:	4464      	add	r4, ip\n",
+  "  56:	8824      	ldrh	r4, [r4, #0]\n",
+  "  58:	e9d4 2303 	ldrd	r2, r3, [r4, #12]\n",
+  "  5c:	e9d4 23ff 	ldrd	r2, r3, [r4, #1020]	; 0x3fc\n",
+  "  60:	f504 6280 	add.w	r2, r4, #1024	; 0x400\n",
+  "  64:	e9d2 2300 	ldrd	r2, r3, [r2]\n",
+  "  68:	f504 2280 	add.w	r2, r4, #262144	; 0x40000\n",
+  "  6c:	e9d2 2329 	ldrd	r2, r3, [r2, #164];	0xa4\n",
+  "  70:	f240 4200 	movw	r2, #1024	; 0x400\n",
+  "  74:	f2c0 0204 	movt	r2, #4\n",
+  "  78:	4422      	add	r2, r4\n",
+  "  7a:	e9d2 2300 	ldrd	r2, r3, [r2]\n",
+  "  7e:	f240 4c00 	movw	ip, #1024	; 0x400\n",
+  "  82:	f2c0 0c04 	movt	ip, #4\n",
+  "  86:	4464      	add	r4, ip\n",
+  "  88:	e9d4 4500 	ldrd	r4, r5, [r4]\n",
+  "  8c:	f8dc 000c 	ldr.w	r0, [ip, #12]\n",
+  "  90:	f5a4 1280 	sub.w	r2, r4, #1048576	; 0x100000\n",
+  "  94:	f8d2 20a4 	ldr.w	r2, [r2, #164]	; 0xa4\n",
+  "  98:	f994 200c 	ldrsb.w	r2, [r4, #12]\n",
+  "  9c:	7b22      	ldrb	r2, [r4, #12]\n",
+  "  9e:	f9b4 200c 	ldrsh.w	r2, [r4, #12]\n",
+  nullptr
+};
+const char* const StoreToOffsetResults[] = {
   "   0:	60e2      	str	r2, [r4, #12]\n",
-  "   2:	f44f 5c00 	mov.w	ip, #8192	; 0x2000\n",
-  "   6:	44a4      	add	ip, r4\n",
-  "   8:	f8cc 2000 	str.w	r2, [ip]\n",
-  "   c:	f8cc 000c 	str.w	r0, [ip, #12]\n",
-  "   10:	f8ac 000c 	strh.w	r0, [ip, #12]\n",
-  "   14:	f88c 200c 	strb.w	r2, [ip, #12]\n",
+  "   2:	f8c4 2fff 	str.w	r2, [r4, #4095]	; 0xfff\n",
+  "   6:	f504 5c80 	add.w	ip, r4, #4096	; 0x1000\n",
+  "   a:	f8cc 2000 	str.w	r2, [ip]\n",
+  "   e:	f504 1c80 	add.w	ip, r4, #1048576	; 0x100000\n",
+  "  12:	f8cc 20a4 	str.w	r2, [ip, #164]	; 0xa4\n",
+  "  16:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  1a:	f2c0 0c10 	movt	ip, #16\n",
+  "  1e:	44a4      	add	ip, r4\n",
+  "  20:	f8cc 2000 	str.w	r2, [ip]\n",
+  "  24:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  28:	f2c0 0c10 	movt	ip, #16\n",
+  "  2c:	44a4      	add	ip, r4\n",
+  "  2e:	f8cc 4000 	str.w	r4, [ip]\n",
+  "  32:	81a2      	strh	r2, [r4, #12]\n",
+  "  34:	f8a4 2fff 	strh.w	r2, [r4, #4095]	; 0xfff\n",
+  "  38:	f504 5c80 	add.w	ip, r4, #4096	; 0x1000\n",
+  "  3c:	f8ac 2000 	strh.w	r2, [ip]\n",
+  "  40:	f504 1c80 	add.w	ip, r4, #1048576	; 0x100000\n",
+  "  44:	f8ac 20a4 	strh.w	r2, [ip, #164]	; 0xa4\n",
+  "  48:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  4c:	f2c0 0c10 	movt	ip, #16\n",
+  "  50:	44a4      	add	ip, r4\n",
+  "  52:	f8ac 2000 	strh.w	r2, [ip]\n",
+  "  56:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  5a:	f2c0 0c10 	movt	ip, #16\n",
+  "  5e:	44a4      	add	ip, r4\n",
+  "  60:	f8ac 4000 	strh.w	r4, [ip]\n",
+  "  64:	e9c4 2303 	strd	r2, r3, [r4, #12]\n",
+  "  68:	e9c4 23ff 	strd	r2, r3, [r4, #1020]	; 0x3fc\n",
+  "  6c:	f504 6c80 	add.w	ip, r4, #1024	; 0x400\n",
+  "  70:	e9cc 2300 	strd	r2, r3, [ip]\n",
+  "  74:	f504 2c80 	add.w	ip, r4, #262144	; 0x40000\n",
+  "  78:	e9cc 2329 	strd	r2, r3, [ip, #164];	0xa4\n",
+  "  7c:	f240 4c00 	movw	ip, #1024	; 0x400\n",
+  "  80:	f2c0 0c04 	movt	ip, #4\n",
+  "  84:	44a4      	add	ip, r4\n",
+  "  86:	e9cc 2300 	strd	r2, r3, [ip]\n",
+  "  8a:	f240 4c00 	movw	ip, #1024	; 0x400\n",
+  "  8e:	f2c0 0c04 	movt	ip, #4\n",
+  "  92:	44a4      	add	ip, r4\n",
+  "  94:	e9cc 4500 	strd	r4, r5, [ip]\n",
+  "  98:	f8cc 000c 	str.w	r0, [ip, #12]\n",
+  "  9c:	f5a4 1c80 	sub.w	ip, r4, #1048576	; 0x100000\n",
+  "  a0:	f8cc 20a4 	str.w	r2, [ip, #164]	; 0xa4\n",
+  "  a4:	7322      	strb	r2, [r4, #12]\n",
   nullptr
 };
-const char* IfThenResults[] = {
+const char* const IfThenResults[] = {
   "   0:	bf08      	it	eq\n",
   "   2:	2101      	moveq	r1, #1\n",
   "   4:	bf04      	itt	eq\n",
@@ -477,7 +587,7 @@
   "  28:	2404      	movne	r4, #4\n",
   nullptr
 };
-const char* CbzCbnzResults[] = {
+const char* const CbzCbnzResults[] = {
   "   0:	b10a      	cbz	r2, 6 <CbzCbnz+0x6>\n",
   "   2:	2103      	movs	r1, #3\n",
   "   4:	2203      	movs	r2, #3\n",
@@ -488,7 +598,7 @@
   "  10:	2204      	movs	r2, #4\n",
   nullptr
 };
-const char* MultiplyResults[] = {
+const char* const MultiplyResults[] = {
   "   0:	4348      	muls	r0, r1\n",
   "   2:	fb01 f002 	mul.w	r0, r1, r2\n",
   "   6:	fb09 f808 	mul.w	r8, r9, r8\n",
@@ -501,21 +611,21 @@
   "  22:	fbaa 890b 	umull	r8, r9, sl, fp\n",
   nullptr
 };
-const char* DivideResults[] = {
+const char* const DivideResults[] = {
   "   0:	fb91 f0f2 	sdiv	r0, r1, r2\n",
   "   4:	fb99 f8fa 	sdiv	r8, r9, sl\n",
   "   8:	fbb1 f0f2 	udiv	r0, r1, r2\n",
   "   c:	fbb9 f8fa 	udiv	r8, r9, sl\n",
   nullptr
 };
-const char* VMovResults[] = {
+const char* const VMovResults[] = {
   "   0:	eef7 0a00 	vmov.f32	s1, #112	; 0x70\n",
   "   4:	eeb7 1b00 	vmov.f64	d1, #112	; 0x70\n",
   "   8:	eef0 0a41 	vmov.f32	s1, s2\n",
   "   c:	eeb0 1b42 	vmov.f64	d1, d2\n",
   nullptr
 };
-const char* BasicFloatingPointResults[] = {
+const char* const BasicFloatingPointResults[] = {
   "   0:	ee30 0a81 	vadd.f32	s0, s1, s2\n",
   "   4:	ee30 0ac1 	vsub.f32	s0, s1, s2\n",
   "   8:	ee20 0a81 	vmul.f32	s0, s1, s2\n",
@@ -536,7 +646,7 @@
   "  44:	eeb1 0bc1 	vsqrt.f64	d0, d1\n",
   nullptr
 };
-const char* FloatingPointConversionsResults[] = {
+const char* const FloatingPointConversionsResults[] = {
   "   0:	eeb7 1bc2 	vcvt.f32.f64	s2, d2\n",
   "   4:	eeb7 2ac1 	vcvt.f64.f32	d2, s2\n",
   "   8:	eefd 0ac1 	vcvt.s32.f32	s1, s2\n",
@@ -549,35 +659,35 @@
   "  24:	eeb8 1b41 	vcvt.f64.u32	d1, s2\n",
   nullptr
 };
-const char* FloatingPointComparisonsResults[] = {
+const char* const FloatingPointComparisonsResults[] = {
   "   0:	eeb4 0a60 	vcmp.f32	s0, s1\n",
   "   4:	eeb4 0b41 	vcmp.f64	d0, d1\n",
   "   8:	eeb5 1a40 	vcmp.f32	s2, #0.0\n",
   "   c:	eeb5 2b40 	vcmp.f64	d2, #0.0\n",
   nullptr
 };
-const char* CallsResults[] = {
+const char* const CallsResults[] = {
   "   0:	47f0      	blx	lr\n",
   "   2:	4770      	bx	lr\n",
   nullptr
 };
-const char* BreakpointResults[] = {
+const char* const BreakpointResults[] = {
   "   0:	be00      	bkpt	0x0000\n",
   nullptr
 };
-const char* StrR1Results[] = {
+const char* const StrR1Results[] = {
   "   0:	9111      	str	r1, [sp, #68]	; 0x44\n",
   "   2:	f8cd 142c 	str.w	r1, [sp, #1068]	; 0x42c\n",
   nullptr
 };
-const char* VPushPopResults[] = {
+const char* const VPushPopResults[] = {
   "   0:	ed2d 1a04 	vpush	{s2-s5}\n",
   "   4:	ed2d 2b08 	vpush	{d2-d5}\n",
   "   8:	ecbd 1a04 	vpop	{s2-s5}\n",
   "   c:	ecbd 2b08 	vpop	{d2-d5}\n",
   nullptr
 };
-const char* Max16BitBranchResults[] = {
+const char* const Max16BitBranchResults[] = {
   "   0:	e3ff      	b.n	802 <Max16BitBranch+0x802>\n",
   "   2:	2300      	movs	r3, #0\n",
   "   4:	2302      	movs	r3, #2\n",
@@ -1606,7 +1716,7 @@
   " 802:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* Branch32Results[] = {
+const char* const Branch32Results[] = {
   "   0:	f000 bc01 	b.w	806 <Branch32+0x806>\n",
   "   4:	2300      	movs	r3, #0\n",
   "   6:	2302      	movs	r3, #2\n",
@@ -2636,7 +2746,7 @@
   " 806:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* CompareAndBranchMaxResults[] = {
+const char* const CompareAndBranchMaxResults[] = {
   "   0:	b3fc      	cbz	r4, 82 <CompareAndBranchMax+0x82>\n",
   "   2:	2300      	movs	r3, #0\n",
   "   4:	2302      	movs	r3, #2\n",
@@ -2705,7 +2815,7 @@
   "  82:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* CompareAndBranchRelocation16Results[] = {
+const char* const CompareAndBranchRelocation16Results[] = {
   "   0:	2c00      	cmp	r4, #0\n",
   "   2:	d040      	beq.n	86 <CompareAndBranchRelocation16+0x86>\n",
   "   4:	2300      	movs	r3, #0\n",
@@ -2776,7 +2886,7 @@
   "  86:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* CompareAndBranchRelocation32Results[] = {
+const char* const CompareAndBranchRelocation32Results[] = {
   "   0:	2c00      	cmp	r4, #0\n",
   "   2:	f000 8401 	beq.w	808 <CompareAndBranchRelocation32+0x808>\n",
   "   6:	2300      	movs	r3, #0\n",
@@ -3807,7 +3917,7 @@
   " 808:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* MixedBranch32Results[] = {
+const char* const MixedBranch32Results[] = {
   "   0:	f000 bc03 	b.w	80a <MixedBranch32+0x80a>\n",
   "   4:	2300      	movs	r3, #0\n",
   "   6:	2302      	movs	r3, #2\n",
@@ -4838,7 +4948,7 @@
   " 80a:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* ShiftsResults[] = {
+const char* const ShiftsResults[] = {
   "   0:	0148      	lsls	r0, r1, #5\n",
   "   2:	0948      	lsrs	r0, r1, #5\n",
   "   4:	1148      	asrs	r0, r1, #5\n",
@@ -4887,7 +4997,7 @@
   "  98:	fa51 f008 	asrs.w	r0, r1, r8\n",
   nullptr
 };
-const char* LoadStoreRegOffsetResults[] = {
+const char* const LoadStoreRegOffsetResults[] = {
   "   0:	5888      	ldr	r0, [r1, r2]\n",
   "   2:	5088      	str	r0, [r1, r2]\n",
   "   4:	f851 0012 	ldr.w	r0, [r1, r2, lsl #1]\n",
@@ -4902,7 +5012,7 @@
   "  28:	f841 0008 	str.w	r0, [r1, r8]\n",
   nullptr
 };
-const char* LoadStoreLiteralResults[] = {
+const char* const LoadStoreLiteralResults[] = {
   "   0:   4801            ldr     r0, [pc, #4]    ; (8 <LoadStoreLiteral+0x8>)\n",
   "   2:   f8cf 0004       str.w   r0, [pc, #4]    ; 8 <LoadStoreLiteral+0x8>\n",
   "   6:   f85f 0008       ldr.w   r0, [pc, #-8]   ; 0 <LoadStoreLiteral>\n",
@@ -4913,7 +5023,7 @@
   "  18:   f8cf 07ff       str.w   r0, [pc, #2047] ; 81b <LoadStoreLiteral+0x81b>\n",
   nullptr
 };
-const char* LoadStoreLimitsResults[] = {
+const char* const LoadStoreLimitsResults[] = {
   "   0:   6fe0            ldr     r0, [r4, #124]  ; 0x7c\n",
   "   2:   f8d4 0080       ldr.w   r0, [r4, #128]  ; 0x80\n",
   "   6:   7fe0            ldrb    r0, [r4, #31]\n",
@@ -4932,7 +5042,7 @@
   "  30:   f8a4 0040       strh.w  r0, [r4, #64]   ; 0x40\n",
   nullptr
 };
-const char* CompareAndBranchResults[] = {
+const char* const CompareAndBranchResults[] = {
   "  0: b130        cbz r0, 10 <CompareAndBranch+0x10>\n",
   "  2: f1bb 0f00   cmp.w fp, #0\n",
   "  6: d003        beq.n 10 <CompareAndBranch+0x10>\n",
@@ -4942,6 +5052,422 @@
   nullptr
 };
 
+const char* const AddConstantResults[] = {
+  "   0:	4608      	mov	r0, r1\n",
+  "   2:	1c48      	adds	r0, r1, #1\n",
+  "   4:	1dc8      	adds	r0, r1, #7\n",
+  "   6:	f101 0008 	add.w	r0, r1, #8\n",
+  "   a:	f101 00ff 	add.w	r0, r1, #255	; 0xff\n",
+  "   e:	f501 7080 	add.w	r0, r1, #256	; 0x100\n",
+  "  12:	f201 1001 	addw	r0, r1, #257	; 0x101\n",
+  "  16:	f601 70ff 	addw	r0, r1, #4095	; 0xfff\n",
+  "  1a:	f501 5080 	add.w	r0, r1, #4096	; 0x1000\n",
+  "  1e:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  "  22:	1a08      	subs	r0, r1, r0\n",
+  "  24:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  "  28:	1808      	adds	r0, r1, r0\n",
+  "  2a:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  "  2e:	1808      	adds	r0, r1, r0\n",
+  "  30:	f501 3080 	add.w	r0, r1, #65536	; 0x10000\n",
+  "  34:	f101 1001 	add.w	r0, r1, #65537	; 0x10001\n",
+  "  38:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  "  3c:	1a08      	subs	r0, r1, r0\n",
+  "  3e:	f240 0003 	movw	r0, #3\n",
+  "  42:	f2c0 0001 	movt	r0, #1\n",
+  "  46:	1808      	adds	r0, r1, r0\n",
+  "  48:	1e48      	subs	r0, r1, #1\n",
+  "  4a:	1fc8      	subs	r0, r1, #7\n",
+  "  4c:	f1a1 0008 	sub.w	r0, r1, #8\n",
+  "  50:	f1a1 00ff 	sub.w	r0, r1, #255	; 0xff\n",
+  "  54:	f5a1 7080 	sub.w	r0, r1, #256	; 0x100\n",
+  "  58:	f2a1 1001 	subw	r0, r1, #257	; 0x101\n",
+  "  5c:	f6a1 70ff 	subw	r0, r1, #4095	; 0xfff\n",
+  "  60:	f5a1 5080 	sub.w	r0, r1, #4096	; 0x1000\n",
+  "  64:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  "  68:	1808      	adds	r0, r1, r0\n",
+  "  6a:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  "  6e:	1a08      	subs	r0, r1, r0\n",
+  "  70:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  "  74:	1a08      	subs	r0, r1, r0\n",
+  "  76:	f5a1 3080 	sub.w	r0, r1, #65536	; 0x10000\n",
+  "  7a:	f1a1 1001 	sub.w	r0, r1, #65537	; 0x10001\n",
+  "  7e:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  "  82:	1808      	adds	r0, r1, r0\n",
+  "  84:	f64f 70fd 	movw	r0, #65533	; 0xfffd\n",
+  "  88:	f6cf 70fe 	movt	r0, #65534	; 0xfffe\n",
+  "  8c:	1808      	adds	r0, r1, r0\n",
+  "  8e:	3101      	adds	r1, #1\n",
+  "  90:	3007      	adds	r0, #7\n",
+  "  92:	3108      	adds	r1, #8\n",
+  "  94:	30ff      	adds	r0, #255	; 0xff\n",
+  "  96:	f501 7180 	add.w	r1, r1, #256	; 0x100\n",
+  "  9a:	f200 1001 	addw	r0, r0, #257	; 0x101\n",
+  "  9e:	f601 71ff 	addw	r1, r1, #4095	; 0xfff\n",
+  "  a2:	f500 5080 	add.w	r0, r0, #4096	; 0x1000\n",
+  "  a6:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  "  aa:	eba1 010c 	sub.w	r1, r1, ip\n",
+  "  ae:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  "  b2:	4460      	add	r0, ip\n",
+  "  b4:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  "  b8:	4461      	add	r1, ip\n",
+  "  ba:	f500 3080 	add.w	r0, r0, #65536	; 0x10000\n",
+  "  be:	f101 1101 	add.w	r1, r1, #65537	; 0x10001\n",
+  "  c2:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  "  c6:	eba0 000c 	sub.w	r0, r0, ip\n",
+  "  ca:	f240 0c03 	movw	ip, #3\n",
+  "  ce:	f2c0 0c01 	movt	ip, #1\n",
+  "  d2:	4461      	add	r1, ip\n",
+  "  d4:	3801      	subs	r0, #1\n",
+  "  d6:	3907      	subs	r1, #7\n",
+  "  d8:	3808      	subs	r0, #8\n",
+  "  da:	39ff      	subs	r1, #255	; 0xff\n",
+  "  dc:	f5a0 7080 	sub.w	r0, r0, #256	; 0x100\n",
+  "  e0:	f2a1 1101 	subw	r1, r1, #257	; 0x101\n",
+  "  e4:	f6a0 70ff 	subw	r0, r0, #4095	; 0xfff\n",
+  "  e8:	f5a1 5180 	sub.w	r1, r1, #4096	; 0x1000\n",
+  "  ec:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  "  f0:	4460      	add	r0, ip\n",
+  "  f2:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  "  f6:	eba1 010c 	sub.w	r1, r1, ip\n",
+  "  fa:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  "  fe:	eba0 000c 	sub.w	r0, r0, ip\n",
+  " 102:	f5a1 3180 	sub.w	r1, r1, #65536	; 0x10000\n",
+  " 106:	f1a0 1001 	sub.w	r0, r0, #65537	; 0x10001\n",
+  " 10a:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 10e:	4461      	add	r1, ip\n",
+  " 110:	f64f 7cfd 	movw	ip, #65533	; 0xfffd\n",
+  " 114:	f6cf 7cfe 	movt	ip, #65534	; 0xfffe\n",
+  " 118:	4460      	add	r0, ip\n",
+  " 11a:	f101 0801 	add.w	r8, r1, #1\n",
+  " 11e:	f108 0007 	add.w	r0, r8, #7\n",
+  " 122:	f108 0808 	add.w	r8, r8, #8\n",
+  " 126:	f101 08ff 	add.w	r8, r1, #255	; 0xff\n",
+  " 12a:	f508 7080 	add.w	r0, r8, #256	; 0x100\n",
+  " 12e:	f208 1801 	addw	r8, r8, #257	; 0x101\n",
+  " 132:	f601 78ff 	addw	r8, r1, #4095	; 0xfff\n",
+  " 136:	f508 5080 	add.w	r0, r8, #4096	; 0x1000\n",
+  " 13a:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 13e:	eba8 080c 	sub.w	r8, r8, ip\n",
+  " 142:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 146:	1808      	adds	r0, r1, r0\n",
+  " 148:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  " 14c:	eb08 0000 	add.w	r0, r8, r0\n",
+  " 150:	f508 3880 	add.w	r8, r8, #65536	; 0x10000\n",
+  " 154:	f101 1801 	add.w	r8, r1, #65537	; 0x10001\n",
+  " 158:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  " 15c:	eba8 0000 	sub.w	r0, r8, r0\n",
+  " 160:	f240 0003 	movw	r0, #3\n",
+  " 164:	f2c0 0001 	movt	r0, #1\n",
+  " 168:	eb08 0000 	add.w	r0, r8, r0\n",
+  " 16c:	f108 38ff 	add.w	r8, r8, #4294967295	; 0xffffffff\n",
+  " 170:	f1a1 0807 	sub.w	r8, r1, #7\n",
+  " 174:	f1a8 0008 	sub.w	r0, r8, #8\n",
+  " 178:	f1a8 08ff 	sub.w	r8, r8, #255	; 0xff\n",
+  " 17c:	f5a1 7880 	sub.w	r8, r1, #256	; 0x100\n",
+  " 180:	f2a8 1001 	subw	r0, r8, #257	; 0x101\n",
+  " 184:	f6a8 78ff 	subw	r8, r8, #4095	; 0xfff\n",
+  " 188:	f5a1 5880 	sub.w	r8, r1, #4096	; 0x1000\n",
+  " 18c:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  " 190:	eb08 0000 	add.w	r0, r8, r0\n",
+  " 194:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 198:	1a08      	subs	r0, r1, r0\n",
+  " 19a:	f64f 78ff 	movw	r8, #65535	; 0xffff\n",
+  " 19e:	eba1 0808 	sub.w	r8, r1, r8\n",
+  " 1a2:	f5a8 3080 	sub.w	r0, r8, #65536	; 0x10000\n",
+  " 1a6:	f1a8 1801 	sub.w	r8, r8, #65537	; 0x10001\n",
+  " 1aa:	f06f 1801 	mvn.w	r8, #65537	; 0x10001\n",
+  " 1ae:	eb01 0808 	add.w	r8, r1, r8\n",
+  " 1b2:	f64f 70fd 	movw	r0, #65533	; 0xfffd\n",
+  " 1b6:	f6cf 70fe 	movt	r0, #65534	; 0xfffe\n",
+  " 1ba:	eb08 0000 	add.w	r0, r8, r0\n",
+  " 1be:	4608      	mov	r0, r1\n",
+  " 1c0:	f101 0001 	add.w	r0, r1, #1\n",
+  " 1c4:	f101 0007 	add.w	r0, r1, #7\n",
+  " 1c8:	f101 0008 	add.w	r0, r1, #8\n",
+  " 1cc:	f101 00ff 	add.w	r0, r1, #255	; 0xff\n",
+  " 1d0:	f501 7080 	add.w	r0, r1, #256	; 0x100\n",
+  " 1d4:	f201 1001 	addw	r0, r1, #257	; 0x101\n",
+  " 1d8:	f601 70ff 	addw	r0, r1, #4095	; 0xfff\n",
+  " 1dc:	f501 5080 	add.w	r0, r1, #4096	; 0x1000\n",
+  " 1e0:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  " 1e4:	eba1 0000 	sub.w	r0, r1, r0\n",
+  " 1e8:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 1ec:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 1f0:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  " 1f4:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 1f8:	f501 3080 	add.w	r0, r1, #65536	; 0x10000\n",
+  " 1fc:	f101 1001 	add.w	r0, r1, #65537	; 0x10001\n",
+  " 200:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  " 204:	eba1 0000 	sub.w	r0, r1, r0\n",
+  " 208:	f240 0003 	movw	r0, #3\n",
+  " 20c:	f2c0 0001 	movt	r0, #1\n",
+  " 210:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 214:	f101 30ff 	add.w	r0, r1, #4294967295	; 0xffffffff\n",
+  " 218:	f1a1 0007 	sub.w	r0, r1, #7\n",
+  " 21c:	f1a1 0008 	sub.w	r0, r1, #8\n",
+  " 220:	f1a1 00ff 	sub.w	r0, r1, #255	; 0xff\n",
+  " 224:	f5a1 7080 	sub.w	r0, r1, #256	; 0x100\n",
+  " 228:	f2a1 1001 	subw	r0, r1, #257	; 0x101\n",
+  " 22c:	f6a1 70ff 	subw	r0, r1, #4095	; 0xfff\n",
+  " 230:	f5a1 5080 	sub.w	r0, r1, #4096	; 0x1000\n",
+  " 234:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  " 238:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 23c:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 240:	eba1 0000 	sub.w	r0, r1, r0\n",
+  " 244:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  " 248:	eba1 0000 	sub.w	r0, r1, r0\n",
+  " 24c:	f5a1 3080 	sub.w	r0, r1, #65536	; 0x10000\n",
+  " 250:	f1a1 1001 	sub.w	r0, r1, #65537	; 0x10001\n",
+  " 254:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  " 258:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 25c:	f64f 70fd 	movw	r0, #65533	; 0xfffd\n",
+  " 260:	f6cf 70fe 	movt	r0, #65534	; 0xfffe\n",
+  " 264:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 268:	f101 0101 	add.w	r1, r1, #1\n",
+  " 26c:	f100 0007 	add.w	r0, r0, #7\n",
+  " 270:	f101 0108 	add.w	r1, r1, #8\n",
+  " 274:	f100 00ff 	add.w	r0, r0, #255	; 0xff\n",
+  " 278:	f501 7180 	add.w	r1, r1, #256	; 0x100\n",
+  " 27c:	f200 1001 	addw	r0, r0, #257	; 0x101\n",
+  " 280:	f601 71ff 	addw	r1, r1, #4095	; 0xfff\n",
+  " 284:	f500 5080 	add.w	r0, r0, #4096	; 0x1000\n",
+  " 288:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 28c:	eba1 010c 	sub.w	r1, r1, ip\n",
+  " 290:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  " 294:	4460      	add	r0, ip\n",
+  " 296:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  " 29a:	4461      	add	r1, ip\n",
+  " 29c:	f500 3080 	add.w	r0, r0, #65536	; 0x10000\n",
+  " 2a0:	f101 1101 	add.w	r1, r1, #65537	; 0x10001\n",
+  " 2a4:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 2a8:	eba0 000c 	sub.w	r0, r0, ip\n",
+  " 2ac:	f240 0c03 	movw	ip, #3\n",
+  " 2b0:	f2c0 0c01 	movt	ip, #1\n",
+  " 2b4:	4461      	add	r1, ip\n",
+  " 2b6:	f100 30ff 	add.w	r0, r0, #4294967295	; 0xffffffff\n",
+  " 2ba:	f1a1 0107 	sub.w	r1, r1, #7\n",
+  " 2be:	f1a0 0008 	sub.w	r0, r0, #8\n",
+  " 2c2:	f1a1 01ff 	sub.w	r1, r1, #255	; 0xff\n",
+  " 2c6:	f5a0 7080 	sub.w	r0, r0, #256	; 0x100\n",
+  " 2ca:	f2a1 1101 	subw	r1, r1, #257	; 0x101\n",
+  " 2ce:	f6a0 70ff 	subw	r0, r0, #4095	; 0xfff\n",
+  " 2d2:	f5a1 5180 	sub.w	r1, r1, #4096	; 0x1000\n",
+  " 2d6:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 2da:	4460      	add	r0, ip\n",
+  " 2dc:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  " 2e0:	eba1 010c 	sub.w	r1, r1, ip\n",
+  " 2e4:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  " 2e8:	eba0 000c 	sub.w	r0, r0, ip\n",
+  " 2ec:	f5a1 3180 	sub.w	r1, r1, #65536	; 0x10000\n",
+  " 2f0:	f1a0 1001 	sub.w	r0, r0, #65537	; 0x10001\n",
+  " 2f4:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 2f8:	4461      	add	r1, ip\n",
+  " 2fa:	f64f 7cfd 	movw	ip, #65533	; 0xfffd\n",
+  " 2fe:	f6cf 7cfe 	movt	ip, #65534	; 0xfffe\n",
+  " 302:	4460      	add	r0, ip\n",
+  " 304:	1c08      	adds	r0, r1, #0\n",
+  " 306:	1c48      	adds	r0, r1, #1\n",
+  " 308:	1dc8      	adds	r0, r1, #7\n",
+  " 30a:	f111 0008 	adds.w	r0, r1, #8\n",
+  " 30e:	f111 00ff 	adds.w	r0, r1, #255	; 0xff\n",
+  " 312:	f511 7080 	adds.w	r0, r1, #256	; 0x100\n",
+  " 316:	f46f 7080 	mvn.w	r0, #256	; 0x100\n",
+  " 31a:	1a08      	subs	r0, r1, r0\n",
+  " 31c:	f640 70ff 	movw	r0, #4095	; 0xfff\n",
+  " 320:	1808      	adds	r0, r1, r0\n",
+  " 322:	f511 5080 	adds.w	r0, r1, #4096	; 0x1000\n",
+  " 326:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  " 32a:	1a08      	subs	r0, r1, r0\n",
+  " 32c:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 330:	1808      	adds	r0, r1, r0\n",
+  " 332:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  " 336:	1808      	adds	r0, r1, r0\n",
+  " 338:	f511 3080 	adds.w	r0, r1, #65536	; 0x10000\n",
+  " 33c:	f111 1001 	adds.w	r0, r1, #65537	; 0x10001\n",
+  " 340:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  " 344:	1a08      	subs	r0, r1, r0\n",
+  " 346:	f240 0003 	movw	r0, #3\n",
+  " 34a:	f2c0 0001 	movt	r0, #1\n",
+  " 34e:	1808      	adds	r0, r1, r0\n",
+  " 350:	1e48      	subs	r0, r1, #1\n",
+  " 352:	1fc8      	subs	r0, r1, #7\n",
+  " 354:	f1b1 0008 	subs.w	r0, r1, #8\n",
+  " 358:	f1b1 00ff 	subs.w	r0, r1, #255	; 0xff\n",
+  " 35c:	f5b1 7080 	subs.w	r0, r1, #256	; 0x100\n",
+  " 360:	f46f 7080 	mvn.w	r0, #256	; 0x100\n",
+  " 364:	1808      	adds	r0, r1, r0\n",
+  " 366:	f640 70ff 	movw	r0, #4095	; 0xfff\n",
+  " 36a:	1a08      	subs	r0, r1, r0\n",
+  " 36c:	f5b1 5080 	subs.w	r0, r1, #4096	; 0x1000\n",
+  " 370:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  " 374:	1808      	adds	r0, r1, r0\n",
+  " 376:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 37a:	1a08      	subs	r0, r1, r0\n",
+  " 37c:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  " 380:	1a08      	subs	r0, r1, r0\n",
+  " 382:	f5b1 3080 	subs.w	r0, r1, #65536	; 0x10000\n",
+  " 386:	f1b1 1001 	subs.w	r0, r1, #65537	; 0x10001\n",
+  " 38a:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  " 38e:	1808      	adds	r0, r1, r0\n",
+  " 390:	f64f 70fd 	movw	r0, #65533	; 0xfffd\n",
+  " 394:	f6cf 70fe 	movt	r0, #65534	; 0xfffe\n",
+  " 398:	1808      	adds	r0, r1, r0\n",
+  " 39a:	3000      	adds	r0, #0\n",
+  " 39c:	3101      	adds	r1, #1\n",
+  " 39e:	3007      	adds	r0, #7\n",
+  " 3a0:	3108      	adds	r1, #8\n",
+  " 3a2:	30ff      	adds	r0, #255	; 0xff\n",
+  " 3a4:	f511 7180 	adds.w	r1, r1, #256	; 0x100\n",
+  " 3a8:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  " 3ac:	ebb0 000c 	subs.w	r0, r0, ip\n",
+  " 3b0:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  " 3b4:	eb11 010c 	adds.w	r1, r1, ip\n",
+  " 3b8:	f510 5080 	adds.w	r0, r0, #4096	; 0x1000\n",
+  " 3bc:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 3c0:	ebb1 010c 	subs.w	r1, r1, ip\n",
+  " 3c4:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  " 3c8:	eb10 000c 	adds.w	r0, r0, ip\n",
+  " 3cc:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  " 3d0:	eb11 010c 	adds.w	r1, r1, ip\n",
+  " 3d4:	f510 3080 	adds.w	r0, r0, #65536	; 0x10000\n",
+  " 3d8:	f111 1101 	adds.w	r1, r1, #65537	; 0x10001\n",
+  " 3dc:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 3e0:	ebb0 000c 	subs.w	r0, r0, ip\n",
+  " 3e4:	f240 0c03 	movw	ip, #3\n",
+  " 3e8:	f2c0 0c01 	movt	ip, #1\n",
+  " 3ec:	eb11 010c 	adds.w	r1, r1, ip\n",
+  " 3f0:	3801      	subs	r0, #1\n",
+  " 3f2:	3907      	subs	r1, #7\n",
+  " 3f4:	3808      	subs	r0, #8\n",
+  " 3f6:	39ff      	subs	r1, #255	; 0xff\n",
+  " 3f8:	f5b0 7080 	subs.w	r0, r0, #256	; 0x100\n",
+  " 3fc:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  " 400:	eb11 010c 	adds.w	r1, r1, ip\n",
+  " 404:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  " 408:	ebb0 000c 	subs.w	r0, r0, ip\n",
+  " 40c:	f5b1 5180 	subs.w	r1, r1, #4096	; 0x1000\n",
+  " 410:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 414:	eb10 000c 	adds.w	r0, r0, ip\n",
+  " 418:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  " 41c:	ebb1 010c 	subs.w	r1, r1, ip\n",
+  " 420:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  " 424:	ebb0 000c 	subs.w	r0, r0, ip\n",
+  " 428:	f5b1 3180 	subs.w	r1, r1, #65536	; 0x10000\n",
+  " 42c:	f1b0 1001 	subs.w	r0, r0, #65537	; 0x10001\n",
+  " 430:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 434:	eb11 010c 	adds.w	r1, r1, ip\n",
+  " 438:	f64f 7cfd 	movw	ip, #65533	; 0xfffd\n",
+  " 43c:	f6cf 7cfe 	movt	ip, #65534	; 0xfffe\n",
+  " 440:	eb10 000c 	adds.w	r0, r0, ip\n",
+  " 444:	bf08      	it	eq\n",
+  " 446:	f111 0001 	addseq.w	r0, r1, #1\n",
+  " 44a:	bf18      	it	ne\n",
+  " 44c:	1c48      	addne	r0, r1, #1\n",
+  " 44e:	bfa8      	it	ge\n",
+  " 450:	f110 0001 	addsge.w	r0, r0, #1\n",
+  " 454:	bfd8      	it	le\n",
+  " 456:	3001      	addle	r0, #1\n",
+  nullptr
+};
+
+const char* const CmpConstantResults[] = {
+  "   0:	2800      	cmp	r0, #0\n",
+  "   2:	2901      	cmp	r1, #1\n",
+  "   4:	2807      	cmp	r0, #7\n",
+  "   6:	2908      	cmp	r1, #8\n",
+  "   8:	28ff      	cmp	r0, #255	; 0xff\n",
+  "   a:	f5b1 7f80 	cmp.w	r1, #256	; 0x100\n",
+  "   e:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  "  12:	eb10 0f0c 	cmn.w	r0, ip\n",
+  "  16:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  "  1a:	4561      	cmp	r1, ip\n",
+  "  1c:	f5b0 5f80 	cmp.w	r0, #4096	; 0x1000\n",
+  "  20:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  "  24:	eb11 0f0c 	cmn.w	r1, ip\n",
+  "  28:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  "  2c:	4560      	cmp	r0, ip\n",
+  "  2e:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  "  32:	4561      	cmp	r1, ip\n",
+  "  34:	f5b0 3f80 	cmp.w	r0, #65536	; 0x10000\n",
+  "  38:	f1b1 1f01 	cmp.w	r1, #65537	; 0x10001\n",
+  "  3c:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  "  40:	eb10 0f0c 	cmn.w	r0, ip\n",
+  "  44:	f240 0c03 	movw	ip, #3\n",
+  "  48:	f2c0 0c01 	movt	ip, #1\n",
+  "  4c:	4561      	cmp	r1, ip\n",
+  "  4e:	f1b0 3fff 	cmp.w	r0, #4294967295	; 0xffffffff\n",
+  "  52:	f111 0f07 	cmn.w	r1, #7\n",
+  "  56:	f110 0f08 	cmn.w	r0, #8\n",
+  "  5a:	f111 0fff 	cmn.w	r1, #255	; 0xff\n",
+  "  5e:	f510 7f80 	cmn.w	r0, #256	; 0x100\n",
+  "  62:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  "  66:	4561      	cmp	r1, ip\n",
+  "  68:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  "  6c:	eb10 0f0c 	cmn.w	r0, ip\n",
+  "  70:	f511 5f80 	cmn.w	r1, #4096	; 0x1000\n",
+  "  74:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  "  78:	4560      	cmp	r0, ip\n",
+  "  7a:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  "  7e:	eb11 0f0c 	cmn.w	r1, ip\n",
+  "  82:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  "  86:	eb10 0f0c 	cmn.w	r0, ip\n",
+  "  8a:	f511 3f80 	cmn.w	r1, #65536	; 0x10000\n",
+  "  8e:	f110 1f01 	cmn.w	r0, #65537	; 0x10001\n",
+  "  92:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  "  96:	4561      	cmp	r1, ip\n",
+  "  98:	f64f 7cfd 	movw	ip, #65533	; 0xfffd\n",
+  "  9c:	f6cf 7cfe 	movt	ip, #65534	; 0xfffe\n",
+  "  a0:	4560      	cmp	r0, ip\n",
+  "  a2:	f1b8 0f00 	cmp.w	r8, #0\n",
+  "  a6:	f1b9 0f01 	cmp.w	r9, #1\n",
+  "  aa:	f1b8 0f07 	cmp.w	r8, #7\n",
+  "  ae:	f1b9 0f08 	cmp.w	r9, #8\n",
+  "  b2:	f1b8 0fff 	cmp.w	r8, #255	; 0xff\n",
+  "  b6:	f5b9 7f80 	cmp.w	r9, #256	; 0x100\n",
+  "  ba:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  "  be:	eb18 0f0c 	cmn.w	r8, ip\n",
+  "  c2:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  "  c6:	45e1      	cmp	r9, ip\n",
+  "  c8:	f5b8 5f80 	cmp.w	r8, #4096	; 0x1000\n",
+  "  cc:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  "  d0:	eb19 0f0c 	cmn.w	r9, ip\n",
+  "  d4:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  "  d8:	45e0      	cmp	r8, ip\n",
+  "  da:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  "  de:	45e1      	cmp	r9, ip\n",
+  "  e0:	f5b8 3f80 	cmp.w	r8, #65536	; 0x10000\n",
+  "  e4:	f1b9 1f01 	cmp.w	r9, #65537	; 0x10001\n",
+  "  e8:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  "  ec:	eb18 0f0c 	cmn.w	r8, ip\n",
+  "  f0:	f240 0c03 	movw	ip, #3\n",
+  "  f4:	f2c0 0c01 	movt	ip, #1\n",
+  "  f8:	45e1      	cmp	r9, ip\n",
+  "  fa:	f1b8 3fff 	cmp.w	r8, #4294967295	; 0xffffffff\n",
+  "  fe:	f119 0f07 	cmn.w	r9, #7\n",
+  " 102:	f118 0f08 	cmn.w	r8, #8\n",
+  " 106:	f119 0fff 	cmn.w	r9, #255	; 0xff\n",
+  " 10a:	f518 7f80 	cmn.w	r8, #256	; 0x100\n",
+  " 10e:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  " 112:	45e1      	cmp	r9, ip\n",
+  " 114:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  " 118:	eb18 0f0c 	cmn.w	r8, ip\n",
+  " 11c:	f519 5f80 	cmn.w	r9, #4096	; 0x1000\n",
+  " 120:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 124:	45e0      	cmp	r8, ip\n",
+  " 126:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  " 12a:	eb19 0f0c 	cmn.w	r9, ip\n",
+  " 12e:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  " 132:	eb18 0f0c 	cmn.w	r8, ip\n",
+  " 136:	f519 3f80 	cmn.w	r9, #65536	; 0x10000\n",
+  " 13a:	f118 1f01 	cmn.w	r8, #65537	; 0x10001\n",
+  " 13e:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 142:	45e1      	cmp	r9, ip\n",
+  " 144:	f64f 7cfd 	movw	ip, #65533	; 0xfffd\n",
+  " 148:	f6cf 7cfe 	movt	ip, #65534	; 0xfffe\n",
+  " 14c:	45e0      	cmp	r8, ip\n",
+  nullptr
+};
+
 std::map<std::string, const char* const*> test_results;
 void setup_results() {
     test_results["SimpleMov"] = SimpleMovResults;
@@ -4952,6 +5478,7 @@
     test_results["DataProcessingModifiedImmediate"] = DataProcessingModifiedImmediateResults;
     test_results["DataProcessingModifiedImmediates"] = DataProcessingModifiedImmediatesResults;
     test_results["DataProcessingShiftedRegister"] = DataProcessingShiftedRegisterResults;
+    test_results["ShiftImmediate"] = ShiftImmediateResults;
     test_results["BasicLoad"] = BasicLoadResults;
     test_results["BasicStore"] = BasicStoreResults;
     test_results["ComplexLoad"] = ComplexLoadResults;
@@ -4966,6 +5493,7 @@
     test_results["StoreMultiple"] = StoreMultipleResults;
     test_results["MovWMovT"] = MovWMovTResults;
     test_results["SpecialAddSub"] = SpecialAddSubResults;
+    test_results["LoadFromOffset"] = LoadFromOffsetResults;
     test_results["StoreToOffset"] = StoreToOffsetResults;
     test_results["IfThen"] = IfThenResults;
     test_results["CbzCbnz"] = CbzCbnzResults;
@@ -4990,4 +5518,6 @@
     test_results["LoadStoreLiteral"] = LoadStoreLiteralResults;
     test_results["LoadStoreLimits"] = LoadStoreLimitsResults;
     test_results["CompareAndBranch"] = CompareAndBranchResults;
+    test_results["AddConstant"] = AddConstantResults;
+    test_results["CmpConstant"] = CmpConstantResults;
 }
diff --git a/compiler/utils/dedupe_set-inl.h b/compiler/utils/dedupe_set-inl.h
new file mode 100644
index 0000000..ac54813
--- /dev/null
+++ b/compiler/utils/dedupe_set-inl.h
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
+#define ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
+
+#include "dedupe_set.h"
+
+#include <algorithm>
+#include <inttypes.h>
+#include <unordered_map>
+
+#include "base/mutex.h"
+#include "base/hash_set.h"
+#include "base/stl_util.h"
+#include "base/stringprintf.h"
+#include "base/time_utils.h"
+
+namespace art {
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+struct DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Stats {
+  size_t collision_sum = 0u;
+  size_t collision_max = 0u;
+  size_t total_probe_distance = 0u;
+  size_t total_size = 0u;
+};
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard {
+ public:
+  Shard(const Alloc& alloc, const std::string& lock_name)
+      : alloc_(alloc),
+        lock_name_(lock_name),
+        lock_(lock_name_.c_str()),
+        keys_() {
+  }
+
+  ~Shard() {
+    for (const HashedKey<StoreKey>& key : keys_) {
+      DCHECK(key.Key() != nullptr);
+      alloc_.Destroy(key.Key());
+    }
+  }
+
+  const StoreKey* Add(Thread* self, size_t hash, const InKey& in_key) REQUIRES(!lock_) {
+    MutexLock lock(self, lock_);
+    HashedKey<InKey> hashed_in_key(hash, &in_key);
+    auto it = keys_.Find(hashed_in_key);
+    if (it != keys_.end()) {
+      DCHECK(it->Key() != nullptr);
+      return it->Key();
+    }
+    const StoreKey* store_key = alloc_.Copy(in_key);
+    keys_.Insert(HashedKey<StoreKey> { hash, store_key });
+    return store_key;
+  }
+
+  void UpdateStats(Thread* self, Stats* global_stats) REQUIRES(!lock_) {
+    // HashSet<> doesn't keep entries ordered by hash, so we actually allocate memory
+    // for bookkeeping while collecting the stats.
+    std::unordered_map<HashType, size_t> stats;
+    {
+      MutexLock lock(self, lock_);
+      // Note: The total_probe_distance will be updated with the current state.
+      // It may have been higher before a re-hash.
+      global_stats->total_probe_distance += keys_.TotalProbeDistance();
+      global_stats->total_size += keys_.Size();
+      for (const HashedKey<StoreKey>& key : keys_) {
+        auto it = stats.find(key.Hash());
+        if (it == stats.end()) {
+          stats.insert({key.Hash(), 1u});
+        } else {
+          ++it->second;
+        }
+      }
+    }
+    for (const auto& entry : stats) {
+      size_t number_of_entries = entry.second;
+      if (number_of_entries > 1u) {
+        global_stats->collision_sum += number_of_entries - 1u;
+        global_stats->collision_max = std::max(global_stats->collision_max, number_of_entries);
+      }
+    }
+  }
+
+ private:
+  template <typename T>
+  class HashedKey {
+   public:
+    HashedKey() : hash_(0u), key_(nullptr) { }
+    HashedKey(size_t hash, const T* key) : hash_(hash), key_(key) { }
+
+    size_t Hash() const {
+      return hash_;
+    }
+
+    const T* Key() const {
+      return key_;
+    }
+
+    bool IsEmpty() const {
+      return Key() == nullptr;
+    }
+
+    void MakeEmpty() {
+      key_ = nullptr;
+    }
+
+   private:
+    size_t hash_;
+    const T* key_;
+  };
+
+  class ShardEmptyFn {
+   public:
+    bool IsEmpty(const HashedKey<StoreKey>& key) const {
+      return key.IsEmpty();
+    }
+
+    void MakeEmpty(HashedKey<StoreKey>& key) {
+      key.MakeEmpty();
+    }
+  };
+
+  struct ShardHashFn {
+    template <typename T>
+    size_t operator()(const HashedKey<T>& key) const {
+      return key.Hash();
+    }
+  };
+
+  struct ShardPred {
+    typename std::enable_if<!std::is_same<StoreKey, InKey>::value, bool>::type
+    operator()(const HashedKey<StoreKey>& lhs, const HashedKey<StoreKey>& rhs) const {
+      DCHECK(lhs.Key() != nullptr);
+      DCHECK(rhs.Key() != nullptr);
+      // Rehashing: stored keys are already deduplicated, so we can simply compare key pointers.
+      return lhs.Key() == rhs.Key();
+    }
+
+    template <typename LeftT, typename RightT>
+    bool operator()(const HashedKey<LeftT>& lhs, const HashedKey<RightT>& rhs) const {
+      DCHECK(lhs.Key() != nullptr);
+      DCHECK(rhs.Key() != nullptr);
+      return lhs.Hash() == rhs.Hash() &&
+          lhs.Key()->size() == rhs.Key()->size() &&
+          std::equal(lhs.Key()->begin(), lhs.Key()->end(), rhs.Key()->begin());
+    }
+  };
+
+  Alloc alloc_;
+  const std::string lock_name_;
+  Mutex lock_;
+  HashSet<HashedKey<StoreKey>, ShardEmptyFn, ShardHashFn, ShardPred> keys_ GUARDED_BY(lock_);
+};
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+const StoreKey* DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Add(
+    Thread* self, const InKey& key) {
+  uint64_t hash_start;
+  if (kIsDebugBuild) {
+    hash_start = NanoTime();
+  }
+  HashType raw_hash = HashFunc()(key);
+  if (kIsDebugBuild) {
+    uint64_t hash_end = NanoTime();
+    hash_time_ += hash_end - hash_start;
+  }
+  HashType shard_hash = raw_hash / kShard;
+  HashType shard_bin = raw_hash % kShard;
+  return shards_[shard_bin]->Add(self, shard_hash, key);
+}
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DedupeSet(const char* set_name,
+                                                                         const Alloc& alloc)
+    : hash_time_(0) {
+  for (HashType i = 0; i < kShard; ++i) {
+    std::ostringstream oss;
+    oss << set_name << " lock " << i;
+    shards_[i].reset(new Shard(alloc, oss.str()));
+  }
+}
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::~DedupeSet() {
+  // Everything done by member destructors.
+}
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpStats(
+    Thread* self) const {
+  Stats stats;
+  for (HashType shard = 0; shard < kShard; ++shard) {
+    shards_[shard]->UpdateStats(self, &stats);
+  }
+  return StringPrintf("%zu collisions, %zu max hash collisions, "
+                      "%zu/%zu probe distance, %" PRIu64 " ns hash time",
+                      stats.collision_sum,
+                      stats.collision_max,
+                      stats.total_probe_distance,
+                      stats.total_size,
+                      hash_time_);
+}
+
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h
index 2c4a689..b62f216 100644
--- a/compiler/utils/dedupe_set.h
+++ b/compiler/utils/dedupe_set.h
@@ -17,151 +17,41 @@
 #ifndef ART_COMPILER_UTILS_DEDUPE_SET_H_
 #define ART_COMPILER_UTILS_DEDUPE_SET_H_
 
-#include <algorithm>
-#include <inttypes.h>
 #include <memory>
-#include <set>
+#include <stdint.h>
 #include <string>
 
-#include "base/mutex.h"
-#include "base/stl_util.h"
-#include "base/stringprintf.h"
-#include "base/time_utils.h"
-#include "utils/swap_space.h"
+#include "base/macros.h"
 
 namespace art {
 
+class Thread;
+
 // A set of Keys that support a HashFunc returning HashType. Used to find duplicates of Key in the
 // Add method. The data-structure is thread-safe through the use of internal locks, it also
 // supports the lock being sharded.
-template <typename InKey, typename StoreKey, typename HashType, typename HashFunc,
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
           HashType kShard = 1>
 class DedupeSet {
-  typedef std::pair<HashType, const InKey*> HashedInKey;
-  struct HashedKey {
-    StoreKey* store_ptr;
-    union {
-      HashType store_hash;        // Valid if store_ptr != null.
-      const HashedInKey* in_key;  // Valid if store_ptr == null.
-    };
-  };
-
-  class Comparator {
-   public:
-    bool operator()(const HashedKey& a, const HashedKey& b) const {
-      HashType a_hash = (a.store_ptr != nullptr) ? a.store_hash : a.in_key->first;
-      HashType b_hash = (b.store_ptr != nullptr) ? b.store_hash : b.in_key->first;
-      if (a_hash != b_hash) {
-        return a_hash < b_hash;
-      }
-      if (a.store_ptr != nullptr && b.store_ptr != nullptr) {
-        return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(),
-                                            b.store_ptr->begin(), b.store_ptr->end());
-      } else if (a.store_ptr != nullptr && b.store_ptr == nullptr) {
-        return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(),
-                                            b.in_key->second->begin(), b.in_key->second->end());
-      } else if (a.store_ptr == nullptr && b.store_ptr != nullptr) {
-        return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(),
-                                            b.store_ptr->begin(), b.store_ptr->end());
-      } else {
-        return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(),
-                                            b.in_key->second->begin(), b.in_key->second->end());
-      }
-    }
-  };
-
  public:
-  StoreKey* Add(Thread* self, const InKey& key) {
-    uint64_t hash_start;
-    if (kIsDebugBuild) {
-      hash_start = NanoTime();
-    }
-    HashType raw_hash = HashFunc()(key);
-    if (kIsDebugBuild) {
-      uint64_t hash_end = NanoTime();
-      hash_time_ += hash_end - hash_start;
-    }
-    HashType shard_hash = raw_hash / kShard;
-    HashType shard_bin = raw_hash % kShard;
-    HashedInKey hashed_in_key(shard_hash, &key);
-    HashedKey hashed_key;
-    hashed_key.store_ptr = nullptr;
-    hashed_key.in_key = &hashed_in_key;
-    MutexLock lock(self, *lock_[shard_bin]);
-    auto it = keys_[shard_bin].find(hashed_key);
-    if (it != keys_[shard_bin].end()) {
-      DCHECK(it->store_ptr != nullptr);
-      return it->store_ptr;
-    }
-    hashed_key.store_ptr = CreateStoreKey(key);
-    hashed_key.store_hash = shard_hash;
-    keys_[shard_bin].insert(hashed_key);
-    return hashed_key.store_ptr;
-  }
+  // Add a new key to the dedupe set if not present. Return the equivalent deduplicated stored key.
+  const StoreKey* Add(Thread* self, const InKey& key);
 
-  DedupeSet(const char* set_name, SwapAllocator<void>& alloc)
-      : allocator_(alloc), hash_time_(0) {
-    for (HashType i = 0; i < kShard; ++i) {
-      std::ostringstream oss;
-      oss << set_name << " lock " << i;
-      lock_name_[i] = oss.str();
-      lock_[i].reset(new Mutex(lock_name_[i].c_str()));
-    }
-  }
+  DedupeSet(const char* set_name, const Alloc& alloc);
 
-  ~DedupeSet() {
-    // Have to manually free all pointers.
-    for (auto& shard : keys_) {
-      for (const auto& hashed_key : shard) {
-        DCHECK(hashed_key.store_ptr != nullptr);
-        DeleteStoreKey(hashed_key.store_ptr);
-      }
-    }
-  }
+  ~DedupeSet();
 
-  std::string DumpStats() const {
-    size_t collision_sum = 0;
-    size_t collision_max = 0;
-    for (HashType shard = 0; shard < kShard; ++shard) {
-      HashType last_hash = 0;
-      size_t collision_cur_max = 0;
-      for (const HashedKey& key : keys_[shard]) {
-        DCHECK(key.store_ptr != nullptr);
-        if (key.store_hash == last_hash) {
-          collision_cur_max++;
-          if (collision_cur_max > 1) {
-            collision_sum++;
-            if (collision_cur_max > collision_max) {
-              collision_max = collision_cur_max;
-            }
-          }
-        } else {
-          collision_cur_max = 1;
-          last_hash = key.store_hash;
-        }
-      }
-    }
-    return StringPrintf("%zu collisions, %zu max bucket size, %" PRIu64 " ns hash time",
-                        collision_sum, collision_max, hash_time_);
-  }
+  std::string DumpStats(Thread* self) const;
 
  private:
-  StoreKey* CreateStoreKey(const InKey& key) {
-    StoreKey* ret = allocator_.allocate(1);
-    allocator_.construct(ret, key.begin(), key.end(), allocator_);
-    return ret;
-  }
+  struct Stats;
+  class Shard;
 
-  void DeleteStoreKey(StoreKey* key) {
-    SwapAllocator<StoreKey> alloc(allocator_);
-    alloc.destroy(key);
-    alloc.deallocate(key, 1);
-  }
-
-  std::string lock_name_[kShard];
-  std::unique_ptr<Mutex> lock_[kShard];
-  std::set<HashedKey, Comparator> keys_[kShard];
-  SwapAllocator<StoreKey> allocator_;
+  std::unique_ptr<Shard> shards_[kShard];
   uint64_t hash_time_;
 
   DISALLOW_COPY_AND_ASSIGN(DedupeSet);
diff --git a/compiler/utils/dedupe_set_test.cc b/compiler/utils/dedupe_set_test.cc
index 637964e..60a891d 100644
--- a/compiler/utils/dedupe_set_test.cc
+++ b/compiler/utils/dedupe_set_test.cc
@@ -18,15 +18,18 @@
 
 #include <algorithm>
 #include <cstdio>
+#include <vector>
 
+#include "dedupe_set-inl.h"
 #include "gtest/gtest.h"
 #include "thread-inl.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
-class DedupeHashFunc {
+class DedupeSetTestHashFunc {
  public:
-  size_t operator()(const std::vector<uint8_t>& array) const {
+  size_t operator()(const ArrayRef<const uint8_t>& array) const {
     size_t hash = 0;
     for (uint8_t c : array) {
       hash += c;
@@ -36,46 +39,52 @@
     return hash;
   }
 };
+
+class DedupeSetTestAlloc {
+ public:
+  const std::vector<uint8_t>* Copy(const ArrayRef<const uint8_t>& src) {
+    return new std::vector<uint8_t>(src.begin(), src.end());
+  }
+
+  void Destroy(const std::vector<uint8_t>* key) {
+    delete key;
+  }
+};
+
 TEST(DedupeSetTest, Test) {
   Thread* self = Thread::Current();
-  typedef std::vector<uint8_t> ByteArray;
-  SwapAllocator<void> swap(nullptr);
-  DedupeSet<ByteArray, SwapVector<uint8_t>, size_t, DedupeHashFunc> deduplicator("test", swap);
-  SwapVector<uint8_t>* array1;
+  DedupeSetTestAlloc alloc;
+  DedupeSet<ArrayRef<const uint8_t>,
+            std::vector<uint8_t>,
+            DedupeSetTestAlloc,
+            size_t,
+            DedupeSetTestHashFunc> deduplicator("test", alloc);
+  const std::vector<uint8_t>* array1;
   {
-    ByteArray test1;
-    test1.push_back(10);
-    test1.push_back(20);
-    test1.push_back(30);
-    test1.push_back(45);
-
+    uint8_t raw_test1[] = { 10u, 20u, 30u, 45u };
+    ArrayRef<const uint8_t> test1(raw_test1);
     array1 = deduplicator.Add(self, test1);
     ASSERT_NE(array1, nullptr);
     ASSERT_TRUE(std::equal(test1.begin(), test1.end(), array1->begin()));
   }
 
-  SwapVector<uint8_t>* array2;
+  const std::vector<uint8_t>* array2;
   {
-    ByteArray test1;
-    test1.push_back(10);
-    test1.push_back(20);
-    test1.push_back(30);
-    test1.push_back(45);
-    array2 = deduplicator.Add(self, test1);
+    uint8_t raw_test2[] = { 10u, 20u, 30u, 45u };
+    ArrayRef<const uint8_t> test2(raw_test2);
+    array2 = deduplicator.Add(self, test2);
     ASSERT_EQ(array2, array1);
-    ASSERT_TRUE(std::equal(test1.begin(), test1.end(), array2->begin()));
+    ASSERT_TRUE(std::equal(test2.begin(), test2.end(), array2->begin()));
   }
 
-  SwapVector<uint8_t>* array3;
+  const std::vector<uint8_t>* array3;
   {
-    ByteArray test1;
-    test1.push_back(10);
-    test1.push_back(22);
-    test1.push_back(30);
-    test1.push_back(47);
-    array3 = deduplicator.Add(self, test1);
+    uint8_t raw_test3[] = { 10u, 22u, 30u, 47u };
+    ArrayRef<const uint8_t> test3(raw_test3);
+    array3 = deduplicator.Add(self, test3);
     ASSERT_NE(array3, nullptr);
-    ASSERT_TRUE(std::equal(test1.begin(), test1.end(), array3->begin()));
+    ASSERT_NE(array3, array1);
+    ASSERT_TRUE(std::equal(test3.begin(), test3.end(), array3->begin()));
   }
 }
 
diff --git a/compiler/utils/intrusive_forward_list.h b/compiler/utils/intrusive_forward_list.h
new file mode 100644
index 0000000..ec2c087
--- /dev/null
+++ b/compiler/utils/intrusive_forward_list.h
@@ -0,0 +1,452 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_INTRUSIVE_FORWARD_LIST_H_
+#define ART_COMPILER_UTILS_INTRUSIVE_FORWARD_LIST_H_
+
+#include <stdint.h>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <type_traits>
+
+#include "base/logging.h"
+#include "base/macros.h"
+
+namespace art {
+
+struct IntrusiveForwardListHook {
+  IntrusiveForwardListHook() : next_hook(nullptr) { }
+  explicit IntrusiveForwardListHook(const IntrusiveForwardListHook* hook) : next_hook(hook) { }
+
+  // Allow copyable values but do not copy the hook, it is not part of the value.
+  IntrusiveForwardListHook(const IntrusiveForwardListHook& other ATTRIBUTE_UNUSED)
+      : next_hook(nullptr) { }
+  IntrusiveForwardListHook& operator=(const IntrusiveForwardListHook& src ATTRIBUTE_UNUSED) {
+    return *this;
+  }
+
+  mutable const IntrusiveForwardListHook* next_hook;
+};
+
+template <typename T, IntrusiveForwardListHook T::* NextPtr = &T::hook>
+class IntrusiveForwardListMemberHook;
+
+template <typename T, typename HookTraits = IntrusiveForwardListMemberHook<T>>
+class IntrusiveForwardList;
+
+template <typename T, typename HookTraits>
+class IntrusiveForwardListIterator : public std::iterator<std::forward_iterator_tag, T> {
+ public:
+  // Construct/copy/destroy (except the private constructor used by IntrusiveForwardList<>).
+  IntrusiveForwardListIterator() : hook_(nullptr) { }
+  IntrusiveForwardListIterator(const IntrusiveForwardListIterator& src) = default;
+  IntrusiveForwardListIterator& operator=(const IntrusiveForwardListIterator& src) = default;
+
+  // Conversion from iterator to const_iterator.
+  template <typename OtherT,
+            typename = typename std::enable_if<std::is_same<T, const OtherT>::value>::type>
+  IntrusiveForwardListIterator(const IntrusiveForwardListIterator<OtherT, HookTraits>& src)
+      : hook_(src.hook_) { }
+
+  // Iteration.
+  IntrusiveForwardListIterator& operator++() {
+    DCHECK(hook_ != nullptr);
+    hook_ = hook_->next_hook;
+    return *this;
+  }
+  IntrusiveForwardListIterator operator++(int) {
+    IntrusiveForwardListIterator tmp(*this);
+    ++*this;
+    return tmp;
+  }
+
+  // Dereference
+  T& operator*() const {
+    DCHECK(hook_ != nullptr);
+    return *HookTraits::GetValue(hook_);
+  }
+  T* operator->() const {
+    return &**this;
+  }
+
+ private:
+  explicit IntrusiveForwardListIterator(const IntrusiveForwardListHook* hook) : hook_(hook) { }
+
+  const IntrusiveForwardListHook* hook_;
+
+  template <typename OtherT, typename OtherTraits>
+  friend class IntrusiveForwardListIterator;
+
+  template <typename OtherT, typename OtherTraits>
+  friend class IntrusiveForwardList;
+
+  template <typename OtherT1, typename OtherT2, typename OtherTraits>
+  friend typename std::enable_if<std::is_same<const OtherT1, const OtherT2>::value, bool>::type
+  operator==(const IntrusiveForwardListIterator<OtherT1, OtherTraits>& lhs,
+             const IntrusiveForwardListIterator<OtherT2, OtherTraits>& rhs);
+};
+
+template <typename T, typename OtherT, typename HookTraits>
+typename std::enable_if<std::is_same<const T, const OtherT>::value, bool>::type operator==(
+    const IntrusiveForwardListIterator<T, HookTraits>& lhs,
+    const IntrusiveForwardListIterator<OtherT, HookTraits>& rhs) {
+  return lhs.hook_ == rhs.hook_;
+}
+
+template <typename T, typename OtherT, typename HookTraits>
+typename std::enable_if<std::is_same<const T, const OtherT>::value, bool>::type operator!=(
+    const IntrusiveForwardListIterator<T, HookTraits>& lhs,
+    const IntrusiveForwardListIterator<OtherT, HookTraits>& rhs) {
+  return !(lhs == rhs);
+}
+
+// Intrusive version of std::forward_list<>. See also slist<> in Boost.Intrusive.
+//
+// This class template provides the same interface as std::forward_list<> as long
+// as the functions are meaningful for an intrusive container; this excludes emplace
+// functions and functions taking an std::initializer_list<> as the container does
+// not construct elements.
+template <typename T, typename HookTraits>
+class IntrusiveForwardList {
+ public:
+  typedef HookTraits hook_traits;
+  typedef       T  value_type;
+  typedef       T& reference;
+  typedef const T& const_reference;
+  typedef       T* pointer;
+  typedef const T* const_pointer;
+  typedef IntrusiveForwardListIterator<      T, hook_traits> iterator;
+  typedef IntrusiveForwardListIterator<const T, hook_traits> const_iterator;
+
+  // Construct/copy/destroy.
+  IntrusiveForwardList() = default;
+  template <typename InputIterator>
+  IntrusiveForwardList(InputIterator first, InputIterator last) : IntrusiveForwardList() {
+    insert_after(before_begin(), first, last);
+  }
+  IntrusiveForwardList(IntrusiveForwardList&& src) : first_(src.first_.next_hook) {
+    src.first_.next_hook = nullptr;
+  }
+  IntrusiveForwardList& operator=(const IntrusiveForwardList& src) = delete;
+  IntrusiveForwardList& operator=(IntrusiveForwardList&& src) {
+    IntrusiveForwardList tmp(std::move(src));
+    tmp.swap(*this);
+    return *this;
+  }
+  ~IntrusiveForwardList() = default;
+
+  // Iterators.
+  iterator before_begin() { return iterator(&first_); }
+  const_iterator before_begin() const { return const_iterator(&first_); }
+  iterator begin() { return iterator(first_.next_hook); }
+  const_iterator begin() const { return const_iterator(first_.next_hook); }
+  iterator end() { return iterator(nullptr); }
+  const_iterator end() const { return const_iterator(nullptr); }
+  const_iterator cbefore_begin() const { return const_iterator(&first_); }
+  const_iterator cbegin() const { return const_iterator(first_.next_hook); }
+  const_iterator cend() const { return const_iterator(nullptr); }
+
+  // Capacity.
+  bool empty() const { return begin() == end(); }
+  size_t max_size() { return static_cast<size_t>(-1); }
+
+  // Element access.
+  reference front() { return *begin(); }
+  const_reference front() const { return *begin(); }
+
+  // Modifiers.
+  template <typename InputIterator>
+  void assign(InputIterator first, InputIterator last) {
+    IntrusiveForwardList tmp(first, last);
+    tmp.swap(*this);
+  }
+  void push_front(value_type& value) {
+    insert_after(before_begin(), value);
+  }
+  void pop_front() {
+    DCHECK(!empty());
+    erase_after(before_begin());
+  }
+  iterator insert_after(const_iterator position, value_type& value) {
+    const IntrusiveForwardListHook* new_hook = hook_traits::GetHook(&value);
+    new_hook->next_hook = position.hook_->next_hook;
+    position.hook_->next_hook = new_hook;
+    return iterator(new_hook);
+  }
+  template <typename InputIterator>
+  iterator insert_after(const_iterator position, InputIterator first, InputIterator last) {
+    while (first != last) {
+      position = insert_after(position, *first++);
+    }
+    return iterator(position.hook_);
+  }
+  iterator erase_after(const_iterator position) {
+    const_iterator last = position;
+    std::advance(last, 2);
+    return erase_after(position, last);
+  }
+  iterator erase_after(const_iterator position, const_iterator last) {
+    DCHECK(position != last);
+    position.hook_->next_hook = last.hook_;
+    return iterator(last.hook_);
+  }
+  void swap(IntrusiveForwardList& other) {
+    std::swap(first_.next_hook, other.first_.next_hook);
+  }
+  void clear() {
+    first_.next_hook = nullptr;
+  }
+
+  // Operations.
+  void splice_after(const_iterator position, IntrusiveForwardList& src) {
+    DCHECK(position != end());
+    splice_after(position, src, src.before_begin(), src.end());
+  }
+  void splice_after(const_iterator position, IntrusiveForwardList&& src) {
+    splice_after(position, src);  // Use l-value overload.
+  }
+  // Splice the element after `i`.
+  void splice_after(const_iterator position, IntrusiveForwardList& src, const_iterator i) {
+    // The standard specifies that this version does nothing if `position == i`
+    // or `position == ++i`. We must handle the latter here because the overload
+    // `splice_after(position, src, first, last)` does not allow `position` inside
+    // the range `(first, last)`.
+    if (++const_iterator(i) == position) {
+      return;
+    }
+    const_iterator last = i;
+    std::advance(last, 2);
+    splice_after(position, src, i, last);
+  }
+  // Splice the element after `i`.
+  void splice_after(const_iterator position, IntrusiveForwardList&& src, const_iterator i) {
+    splice_after(position, src, i);  // Use l-value overload.
+  }
+  // Splice elements between `first` and `last`, i.e. open range `(first, last)`.
+  void splice_after(const_iterator position,
+                    IntrusiveForwardList& src,
+                    const_iterator first,
+                    const_iterator last) {
+    DCHECK(position != end());
+    DCHECK(first != last);
+    if (++const_iterator(first) == last) {
+      // Nothing to do.
+      return;
+    }
+    // If position is just before end() and last is src.end(), we can finish this quickly.
+    if (++const_iterator(position) == end() && last == src.end()) {
+      position.hook_->next_hook = first.hook_->next_hook;
+      first.hook_->next_hook = nullptr;
+      return;
+    }
+    // Otherwise we need to find the position before last to fix up the hook.
+    const_iterator before_last = first;
+    while (++const_iterator(before_last) != last) {
+      ++before_last;
+    }
+    // Detach (first, last).
+    const IntrusiveForwardListHook* first_taken = first.hook_->next_hook;
+    first.hook_->next_hook = last.hook_;
+    // Attach the sequence to the new position.
+    before_last.hook_->next_hook = position.hook_->next_hook;
+    position.hook_->next_hook = first_taken;
+  }
+  // Splice elements between `first` and `last`, i.e. open range `(first, last)`.
+  void splice_after(const_iterator position,
+                    IntrusiveForwardList&& src,
+                    const_iterator first,
+                    const_iterator last) {
+    splice_after(position, src, first, last);  // Use l-value overload.
+  }
+  void remove(const value_type& value) {
+    remove_if([value](const value_type& v) { return value == v; });
+  }
+  template <typename Predicate>
+  void remove_if(Predicate pred) {
+    iterator prev = before_begin();
+    for (iterator current = begin(); current != end(); ++current) {
+      if (pred(*current)) {
+        erase_after(prev);
+        current = prev;
+      } else {
+        prev = current;
+      }
+    }
+  }
+  void unique() {
+    unique(std::equal_to<value_type>());
+  }
+  template <typename BinaryPredicate>
+  void unique(BinaryPredicate pred) {
+    if (!empty()) {
+      iterator prev = begin();
+      iterator current = prev;
+      ++current;
+      for (; current != end(); ++current) {
+        if (pred(*prev, *current)) {
+          erase_after(prev);
+          current = prev;
+        } else {
+          prev = current;
+        }
+      }
+    }
+  }
+  void merge(IntrusiveForwardList& other) {
+    merge(other, std::less<value_type>());
+  }
+  void merge(IntrusiveForwardList&& other) {
+    merge(other);  // Use l-value overload.
+  }
+  template <typename Compare>
+  void merge(IntrusiveForwardList& other, Compare cmp) {
+    iterator prev = before_begin();
+    iterator current = begin();
+    iterator other_prev = other.before_begin();
+    iterator other_current = other.begin();
+    while (current != end() && other_current != other.end()) {
+      if (cmp(*other_current, *current)) {
+        ++other_current;
+        splice_after(prev, other, other_prev);
+        ++prev;
+      } else {
+        prev = current;
+        ++current;
+      }
+      DCHECK(++const_iterator(prev) == current);
+      DCHECK(++const_iterator(other_prev) == other_current);
+    }
+    splice_after(prev, other);
+  }
+  template <typename Compare>
+  void merge(IntrusiveForwardList&& other, Compare cmp) {
+    merge(other, cmp);  // Use l-value overload.
+  }
+  void sort() {
+    sort(std::less<value_type>());
+  }
+  template <typename Compare>
+  void sort(Compare cmp) {
+    size_t n = std::distance(begin(), end());
+    if (n >= 2u) {
+      const_iterator middle = before_begin();
+      std::advance(middle, n / 2u);
+      IntrusiveForwardList second_half;
+      second_half.splice_after(second_half.before_begin(), *this, middle, end());
+      sort(cmp);
+      second_half.sort(cmp);
+      merge(second_half, cmp);
+    }
+  }
+  void reverse() {
+    IntrusiveForwardList reversed;
+    while (!empty()) {
+      value_type& value = front();
+      erase_after(before_begin());
+      reversed.insert_after(reversed.before_begin(), value);
+    }
+    reversed.swap(*this);
+  }
+
+  // Extensions.
+  bool HasExactlyOneElement() const {
+    return !empty() && ++begin() == end();
+  }
+  size_t SizeSlow() const {
+    return std::distance(begin(), end());
+  }
+  bool ContainsNode(const_reference node) const {
+    for (auto&& n : *this) {
+      if (std::addressof(n) == std::addressof(node)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+ private:
+  static IntrusiveForwardListHook* ModifiableHook(const IntrusiveForwardListHook* hook) {
+    return const_cast<IntrusiveForwardListHook*>(hook);
+  }
+
+  IntrusiveForwardListHook first_;
+};
+
+template <typename T, typename HookTraits>
+void swap(IntrusiveForwardList<T, HookTraits>& lhs, IntrusiveForwardList<T, HookTraits>& rhs) {
+  lhs.swap(rhs);
+}
+
+template <typename T, typename HookTraits>
+bool operator==(const IntrusiveForwardList<T, HookTraits>& lhs,
+                const IntrusiveForwardList<T, HookTraits>& rhs) {
+  auto lit = lhs.begin();
+  auto rit = rhs.begin();
+  for (; lit != lhs.end() && rit != rhs.end(); ++lit, ++rit) {
+    if (*lit != *rit) {
+      return false;
+    }
+  }
+  return lit == lhs.end() && rit == rhs.end();
+}
+
+template <typename T, typename HookTraits>
+bool operator!=(const IntrusiveForwardList<T, HookTraits>& lhs,
+                const IntrusiveForwardList<T, HookTraits>& rhs) {
+  return !(lhs == rhs);
+}
+
+template <typename T, typename HookTraits>
+bool operator<(const IntrusiveForwardList<T, HookTraits>& lhs,
+               const IntrusiveForwardList<T, HookTraits>& rhs) {
+  return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
+}
+
+template <typename T, typename HookTraits>
+bool operator>(const IntrusiveForwardList<T, HookTraits>& lhs,
+               const IntrusiveForwardList<T, HookTraits>& rhs) {
+  return rhs < lhs;
+}
+
+template <typename T, typename HookTraits>
+bool operator<=(const IntrusiveForwardList<T, HookTraits>& lhs,
+                const IntrusiveForwardList<T, HookTraits>& rhs) {
+  return !(rhs < lhs);
+}
+
+template <typename T, typename HookTraits>
+bool operator>=(const IntrusiveForwardList<T, HookTraits>& lhs,
+                const IntrusiveForwardList<T, HookTraits>& rhs) {
+  return !(lhs < rhs);
+}
+
+template <typename T, IntrusiveForwardListHook T::* NextPtr>
+class IntrusiveForwardListMemberHook {
+ public:
+  static const IntrusiveForwardListHook* GetHook(const T* value) {
+    return &(value->*NextPtr);
+  }
+
+  static T* GetValue(const IntrusiveForwardListHook* hook) {
+    return reinterpret_cast<T*>(
+        reinterpret_cast<uintptr_t>(hook) - OFFSETOF_MEMBERPTR(T, NextPtr));
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_INTRUSIVE_FORWARD_LIST_H_
diff --git a/compiler/utils/intrusive_forward_list_test.cc b/compiler/utils/intrusive_forward_list_test.cc
new file mode 100644
index 0000000..517142e
--- /dev/null
+++ b/compiler/utils/intrusive_forward_list_test.cc
@@ -0,0 +1,505 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <forward_list>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "intrusive_forward_list.h"
+
+namespace art {
+
+struct IFLTestValue {
+  // Deliberately not explicit.
+  IFLTestValue(int v) : hook(), value(v) { }  // NOLINT(runtime/explicit)
+
+  IntrusiveForwardListHook hook;
+  int value;
+};
+
+bool operator==(const IFLTestValue& lhs, const IFLTestValue& rhs) {
+  return lhs.value == rhs.value;
+}
+
+bool operator<(const IFLTestValue& lhs, const IFLTestValue& rhs) {
+  return lhs.value < rhs.value;
+}
+
+#define ASSERT_LISTS_EQUAL(expected, value)                                   \
+  do {                                                                        \
+    ASSERT_EQ(expected.empty(), value.empty());                               \
+    ASSERT_EQ(std::distance(expected.begin(), expected.end()),                \
+              std::distance(value.begin(), value.end()));                     \
+    ASSERT_TRUE(std::equal(expected.begin(), expected.end(), value.begin())); \
+  } while (false)
+
+TEST(IntrusiveForwardList, IteratorToConstIterator) {
+  IntrusiveForwardList<IFLTestValue> ifl;
+  IntrusiveForwardList<IFLTestValue>::iterator begin = ifl.begin();
+  IntrusiveForwardList<IFLTestValue>::const_iterator cbegin = ifl.cbegin();
+  IntrusiveForwardList<IFLTestValue>::const_iterator converted_begin = begin;
+  ASSERT_TRUE(converted_begin == cbegin);
+}
+
+TEST(IntrusiveForwardList, IteratorOperators) {
+  IntrusiveForwardList<IFLTestValue> ifl;
+  ASSERT_TRUE(ifl.begin() == ifl.cbegin());
+  ASSERT_FALSE(ifl.begin() != ifl.cbegin());
+  ASSERT_TRUE(ifl.end() == ifl.cend());
+  ASSERT_FALSE(ifl.end() != ifl.cend());
+
+  ASSERT_TRUE(ifl.begin() == ifl.end());  // Empty.
+  ASSERT_FALSE(ifl.begin() != ifl.end());  // Empty.
+
+  IFLTestValue value(1);
+  ifl.insert_after(ifl.cbefore_begin(), value);
+
+  ASSERT_FALSE(ifl.begin() == ifl.end());  // Not empty.
+  ASSERT_TRUE(ifl.begin() != ifl.end());  // Not empty.
+}
+
+TEST(IntrusiveForwardList, ConstructRange) {
+  std::forward_list<int> ref({ 1, 2, 7 });
+  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
+  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+}
+
+TEST(IntrusiveForwardList, Assign) {
+  std::forward_list<int> ref1({ 2, 8, 5 });
+  std::vector<IFLTestValue> storage1(ref1.begin(), ref1.end());
+  IntrusiveForwardList<IFLTestValue> ifl;
+  ifl.assign(storage1.begin(), storage1.end());
+  ASSERT_LISTS_EQUAL(ref1, ifl);
+  std::forward_list<int> ref2({ 7, 1, 3 });
+  std::vector<IFLTestValue> storage2(ref2.begin(), ref2.end());
+  ifl.assign(storage2.begin(), storage2.end());
+  ASSERT_LISTS_EQUAL(ref2, ifl);
+}
+
+TEST(IntrusiveForwardList, PushPop) {
+  IFLTestValue value3(3);
+  IFLTestValue value7(7);
+  std::forward_list<int> ref;
+  IntrusiveForwardList<IFLTestValue> ifl;
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ref.push_front(3);
+  ifl.push_front(value3);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(3, ifl.front());
+  ref.push_front(7);
+  ifl.push_front(value7);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(7, ifl.front());
+  ref.pop_front();
+  ifl.pop_front();
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(3, ifl.front());
+  ref.pop_front();
+  ifl.pop_front();
+  ASSERT_LISTS_EQUAL(ref, ifl);
+}
+
+TEST(IntrusiveForwardList, InsertAfter1) {
+  IFLTestValue value4(4);
+  IFLTestValue value8(8);
+  IFLTestValue value5(5);
+  IFLTestValue value3(3);
+  std::forward_list<int> ref;
+  IntrusiveForwardList<IFLTestValue> ifl;
+
+  auto ref_it = ref.insert_after(ref.before_begin(), 4);
+  auto ifl_it = ifl.insert_after(ifl.before_begin(), value4);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(*ref_it, *ifl_it);
+  CHECK(ref_it == ref.begin());
+  ASSERT_TRUE(ifl_it == ifl.begin());
+
+  ref_it = ref.insert_after(ref.begin(), 8);
+  ifl_it = ifl.insert_after(ifl.begin(), value8);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(*ref_it, *ifl_it);
+  CHECK(ref_it != ref.end());
+  ASSERT_TRUE(ifl_it != ifl.end());
+  CHECK(++ref_it == ref.end());
+  ASSERT_TRUE(++ifl_it == ifl.end());
+
+  ref_it = ref.insert_after(ref.begin(), 5);
+  ifl_it = ifl.insert_after(ifl.begin(), value5);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(*ref_it, *ifl_it);
+
+  ref_it = ref.insert_after(ref_it, 3);
+  ifl_it = ifl.insert_after(ifl_it, value3);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(*ref_it, *ifl_it);
+}
+
+TEST(IntrusiveForwardList, InsertAfter2) {
+  std::forward_list<int> ref;
+  IntrusiveForwardList<IFLTestValue> ifl;
+
+  auto ref_it = ref.insert_after(ref.before_begin(), { 2, 8, 5 });
+  std::vector<IFLTestValue> storage1({ { 2 }, { 8 }, { 5 } });
+  auto ifl_it = ifl.insert_after(ifl.before_begin(), storage1.begin(), storage1.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(*ref_it, *ifl_it);
+
+  std::vector<IFLTestValue> storage2({ { 7 }, { 2 } });
+  ref_it = ref.insert_after(ref.begin(), { 7, 2 });
+  ifl_it = ifl.insert_after(ifl.begin(), storage2.begin(), storage2.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(*ref_it, *ifl_it);
+
+  std::vector<IFLTestValue> storage3({ { 1 }, { 3 }, { 4 }, { 9 } });
+  ref_it = ref.begin();
+  ifl_it = ifl.begin();
+  std::advance(ref_it, std::distance(ref.begin(), ref.end()) - 1);
+  std::advance(ifl_it, std::distance(ifl.begin(), ifl.end()) - 1);
+  ref_it = ref.insert_after(ref_it, { 1, 3, 4, 9 });
+  ifl_it = ifl.insert_after(ifl_it, storage3.begin(), storage3.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+}
+
+TEST(IntrusiveForwardList, EraseAfter1) {
+  std::forward_list<int> ref({ 1, 2, 7, 4, 5 });
+  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
+  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 5);
+
+  auto ref_it = ref.begin();
+  auto ifl_it = ifl.begin();
+  std::advance(ref_it, 2);
+  std::advance(ifl_it, 2);
+  ref_it = ref.erase_after(ref_it);
+  ifl_it = ifl.erase_after(ifl_it);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 4);
+  CHECK(ref_it != ref.end());
+  ASSERT_TRUE(ifl_it != ifl.end());
+  CHECK(++ref_it == ref.end());
+  ASSERT_TRUE(++ifl_it == ifl.end());
+
+  ref_it = ref.begin();
+  ifl_it = ifl.begin();
+  std::advance(ref_it, 2);
+  std::advance(ifl_it, 2);
+  ref_it = ref.erase_after(ref_it);
+  ifl_it = ifl.erase_after(ifl_it);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 3);
+  CHECK(ref_it == ref.end());
+  ASSERT_TRUE(ifl_it == ifl.end());
+
+  ref_it = ref.erase_after(ref.begin());
+  ifl_it = ifl.erase_after(ifl.begin());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 2);
+  CHECK(ref_it != ref.end());
+  ASSERT_TRUE(ifl_it != ifl.end());
+  CHECK(++ref_it == ref.end());
+  ASSERT_TRUE(++ifl_it == ifl.end());
+
+  ref_it = ref.erase_after(ref.before_begin());
+  ifl_it = ifl.erase_after(ifl.before_begin());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 1);
+  CHECK(ref_it == ref.begin());
+  ASSERT_TRUE(ifl_it == ifl.begin());
+
+  ref_it = ref.erase_after(ref.before_begin());
+  ifl_it = ifl.erase_after(ifl.before_begin());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 0);
+  CHECK(ref_it == ref.begin());
+  ASSERT_TRUE(ifl_it == ifl.begin());
+}
+
+TEST(IntrusiveForwardList, EraseAfter2) {
+  std::forward_list<int> ref({ 1, 2, 7, 4, 5, 3, 2, 8, 9 });
+  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
+  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 9);
+
+  auto ref_it = ref.begin();
+  auto ifl_it = ifl.begin();
+  std::advance(ref_it, 3);
+  std::advance(ifl_it, 3);
+  ref_it = ref.erase_after(ref.begin(), ref_it);
+  ifl_it = ifl.erase_after(ifl.begin(), ifl_it);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(std::distance(ref.begin(), ref_it), std::distance(ifl.begin(), ifl_it));
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 7);
+
+  ref_it = ref.erase_after(ref_it, ref.end());
+  ifl_it = ifl.erase_after(ifl_it, ifl.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK(ref_it == ref.end());
+  ASSERT_TRUE(ifl_it == ifl.end());
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 2);
+
+  ref_it = ref.erase_after(ref.before_begin(), ref.end());
+  ifl_it = ifl.erase_after(ifl.before_begin(), ifl.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK(ref_it == ref.end());
+  ASSERT_TRUE(ifl_it == ifl.end());
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 0);
+}
+
+TEST(IntrusiveForwardList, SwapClear) {
+  std::forward_list<int> ref1({ 1, 2, 7 });
+  std::vector<IFLTestValue> storage1(ref1.begin(), ref1.end());
+  IntrusiveForwardList<IFLTestValue> ifl1(storage1.begin(), storage1.end());
+  std::forward_list<int> ref2({ 3, 8, 6 });
+  std::vector<IFLTestValue> storage2(ref2.begin(), ref2.end());
+  IntrusiveForwardList<IFLTestValue> ifl2(storage2.begin(), storage2.end());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+  ref1.swap(ref2);
+  ifl1.swap(ifl2);
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+  ref1.clear();
+  ifl1.clear();
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+  swap(ref1, ref2);
+  swap(ifl1, ifl2);
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+  ref1.clear();
+  ifl1.clear();
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+}
+
+TEST(IntrusiveForwardList, SpliceAfter) {
+  std::forward_list<int> ref1({ 3, 1, 2, 7, 4, 5, 4, 8, 7 });
+  std::forward_list<int> ref2;
+  std::vector<IFLTestValue> storage(ref1.begin(), ref1.end());
+  IntrusiveForwardList<IFLTestValue> ifl1(storage.begin(), storage.end());
+  IntrusiveForwardList<IFLTestValue> ifl2;
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+
+  // Move everything to ref2/ifl2.
+  ref2.splice_after(ref2.before_begin(), ref1);
+  ifl2.splice_after(ifl2.before_begin(), ifl1);
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+
+  // Move first element (3) to ref1/ifl1.
+  ref1.splice_after(ref1.before_begin(), ref2, ref2.before_begin());
+  ifl1.splice_after(ifl1.before_begin(), ifl2, ifl2.before_begin());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+
+  // Move second element (2) to ref1/ifl1 after the first element (3).
+  ref1.splice_after(ref1.begin(), ref2, ref2.begin());
+  ifl1.splice_after(ifl1.begin(), ifl2, ifl2.begin());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+
+  // Move everything from ref2/ifl2 between the 2 elements now in ref1/ifl1.
+  ref1.splice_after(ref1.begin(), ref2);
+  ifl1.splice_after(ifl1.begin(), ifl2);
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+
+  std::forward_list<int> check({ 3, 1, 7, 4, 5, 4, 8, 7, 2 });
+  ASSERT_LISTS_EQUAL(check, ifl1);
+  ASSERT_TRUE(ifl2.empty());
+
+  // Empty splice_after().
+  ref2.splice_after(
+      ref2.before_begin(), ref1, ref1.before_begin(), ref1.begin());
+  ifl2.splice_after(ifl2.before_begin(), ifl1, ifl1.before_begin(), ifl1.begin());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+
+  // Move { 1, 7 } to ref2/ifl2.
+  auto ref_it = ref1.begin();
+  auto ifl_it = ifl1.begin();
+  std::advance(ref_it, 3);
+  std::advance(ifl_it, 3);
+  ref2.splice_after(ref2.before_begin(), ref1, ref1.begin(), ref_it);
+  ifl2.splice_after(ifl2.before_begin(), ifl1, ifl1.begin(), ifl_it);
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+
+  // Move { 8, 7, 2 } to the beginning of ref1/ifl1.
+  ref_it = ref1.begin();
+  ifl_it = ifl1.begin();
+  std::advance(ref_it, 3);
+  std::advance(ifl_it, 3);
+  ref1.splice_after(ref1.before_begin(), ref1, ref_it, ref1.end());
+  ifl1.splice_after(ifl1.before_begin(), ifl1, ifl_it, ifl1.end());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+
+  check.assign({ 8, 7, 2, 3, 4, 5, 4 });
+  ASSERT_LISTS_EQUAL(check, ifl1);
+  check.assign({ 1, 7 });
+  ASSERT_LISTS_EQUAL(check, ifl2);
+
+  // Move all but the first element to ref2/ifl2.
+  ref_it = ref2.begin();
+  ifl_it = ifl2.begin();
+  std::advance(ref_it, 1);
+  std::advance(ifl_it, 1);
+  ref2.splice_after(ref_it, ref1, ref1.begin(), ref1.end());
+  ifl2.splice_after(ifl_it, ifl1, ifl1.begin(), ifl1.end());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+
+  check.assign({8});
+  ASSERT_LISTS_EQUAL(check, ifl1);
+
+  // Move the first element of ref1/ifl1 to the beginning of ref1/ifl1 (do nothing).
+  ref1.splice_after(ref1.before_begin(), ref1, ref1.before_begin());
+  ifl1.splice_after(ifl1.before_begin(), ifl1, ifl1.before_begin());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(check, ifl1);
+
+  // Move the first element of ref2/ifl2 after itself (do nothing).
+  ref1.splice_after(ref1.begin(), ref1, ref1.before_begin());
+  ifl1.splice_after(ifl1.begin(), ifl1, ifl1.before_begin());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(check, ifl1);
+
+  check.assign({ 1, 7, 7, 2, 3, 4, 5, 4 });
+  ASSERT_LISTS_EQUAL(check, ifl2);
+
+  // Move the first element of ref2/ifl2 to the beginning of ref2/ifl2 (do nothing).
+  ref2.splice_after(ref2.before_begin(), ref2, ref2.before_begin());
+  ifl2.splice_after(ifl2.before_begin(), ifl2, ifl2.before_begin());
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+  ASSERT_LISTS_EQUAL(check, ifl2);
+
+  // Move the first element of ref2/ifl2 after itself (do nothing).
+  ref2.splice_after(ref2.begin(), ref2, ref2.before_begin());
+  ifl2.splice_after(ifl2.begin(), ifl2, ifl2.before_begin());
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+  ASSERT_LISTS_EQUAL(check, ifl2);
+}
+
+TEST(IntrusiveForwardList, Remove) {
+  std::forward_list<int> ref({ 3, 1, 2, 7, 4, 5, 4, 8, 7 });
+  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
+  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ref.remove(1);
+  ifl.remove(1);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ref.remove(4);
+  ifl.remove(4);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  auto odd = [](IFLTestValue value) { return (value.value & 1) != 0; };  // NOLINT(readability/braces)
+  ref.remove_if(odd);
+  ifl.remove_if(odd);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  auto all = [](IFLTestValue value ATTRIBUTE_UNUSED) { return true; };  // NOLINT(readability/braces)
+  ref.remove_if(all);
+  ifl.remove_if(all);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+}
+
+TEST(IntrusiveForwardList, Unique) {
+  std::forward_list<int> ref({ 3, 1, 1, 2, 3, 3, 7, 7, 4, 4, 5, 7 });
+  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
+  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ref.unique();
+  ifl.unique();
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  std::forward_list<int> check({ 3, 1, 2, 3, 7, 4, 5, 7 });
+  ASSERT_LISTS_EQUAL(check, ifl);
+
+  auto bin_pred = [](IFLTestValue lhs, IFLTestValue rhs) {
+    return (lhs.value & ~1) == (rhs.value & ~1);
+  };
+  ref.unique(bin_pred);
+  ifl.unique(bin_pred);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  check.assign({ 3, 1, 2, 7, 4, 7 });
+  ASSERT_LISTS_EQUAL(check, ifl);
+}
+
+TEST(IntrusiveForwardList, Merge) {
+  std::forward_list<int> ref1({ 1, 4, 8, 8, 12 });
+  std::vector<IFLTestValue> storage1(ref1.begin(), ref1.end());
+  IntrusiveForwardList<IFLTestValue> ifl1(storage1.begin(), storage1.end());
+  std::forward_list<int> ref2({ 3, 5, 6, 7, 9 });
+  std::vector<IFLTestValue> storage2(ref2.begin(), ref2.end());
+  IntrusiveForwardList<IFLTestValue> ifl2(storage2.begin(), storage2.end());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+  CHECK(std::is_sorted(ref1.begin(), ref1.end()));
+  CHECK(std::is_sorted(ref2.begin(), ref2.end()));
+  ref1.merge(ref2);
+  ifl1.merge(ifl2);
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+  CHECK(ref2.empty());
+  std::forward_list<int> check({ 1, 3, 4, 5, 6, 7, 8, 8, 9, 12 });
+  ASSERT_LISTS_EQUAL(check, ifl1);
+}
+
+TEST(IntrusiveForwardList, Sort1) {
+  std::forward_list<int> ref({ 2, 9, 8, 3, 7, 4, 1, 5, 3, 0 });
+  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
+  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK(!std::is_sorted(ref.begin(), ref.end()));
+  ref.sort();
+  ifl.sort();
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  std::forward_list<int> check({ 0, 1, 2, 3, 3, 4, 5, 7, 8, 9 });
+  ASSERT_LISTS_EQUAL(check, ifl);
+}
+
+TEST(IntrusiveForwardList, Sort2) {
+  std::forward_list<int> ref({ 2, 9, 8, 3, 7, 4, 1, 5, 3, 0 });
+  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
+  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  auto cmp = [](IFLTestValue lhs, IFLTestValue rhs) {
+    return (lhs.value & ~1) < (rhs.value & ~1);
+  };
+  CHECK(!std::is_sorted(ref.begin(), ref.end(), cmp));
+  ref.sort(cmp);
+  ifl.sort(cmp);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  std::forward_list<int> check({ 1, 0, 2, 3, 3, 4, 5, 7, 9, 8 });
+  ASSERT_LISTS_EQUAL(check, ifl);
+}
+
+TEST(IntrusiveForwardList, Reverse) {
+  std::forward_list<int> ref({ 8, 3, 5, 4, 1, 3 });
+  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
+  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK(!std::is_sorted(ref.begin(), ref.end()));
+  ref.reverse();
+  ifl.reverse();
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  std::forward_list<int> check({ 3, 1, 4, 5, 3, 8 });
+  ASSERT_LISTS_EQUAL(check, ifl);
+}
+
+}  // namespace art
diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc
new file mode 100644
index 0000000..1b74313
--- /dev/null
+++ b/compiler/utils/jni_macro_assembler.cc
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler.h"
+
+#include <algorithm>
+#include <vector>
+
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "arm/jni_macro_assembler_arm.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "arm64/jni_macro_assembler_arm64.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "mips/assembler_mips.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+#include "mips64/assembler_mips64.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+#include "x86/jni_macro_assembler_x86.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+#include "x86_64/jni_macro_assembler_x86_64.h"
+#endif
+#include "base/casts.h"
+#include "globals.h"
+#include "memory_region.h"
+
+namespace art {
+
+using MacroAsm32UniquePtr = std::unique_ptr<JNIMacroAssembler<PointerSize::k32>>;
+
+template <>
+MacroAsm32UniquePtr JNIMacroAssembler<PointerSize::k32>::Create(
+    ArenaAllocator* arena,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features) {
+#ifndef ART_ENABLE_CODEGEN_mips
+  UNUSED(instruction_set_features);
+#endif
+
+  switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
+    case kArm:
+    case kThumb2:
+      return MacroAsm32UniquePtr(new (arena) arm::ArmJNIMacroAssembler(arena, instruction_set));
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+    case kMips:
+      return MacroAsm32UniquePtr(new (arena) mips::MipsAssembler(
+          arena,
+          instruction_set_features != nullptr
+              ? instruction_set_features->AsMipsInstructionSetFeatures()
+              : nullptr));
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+    case kX86:
+      return MacroAsm32UniquePtr(new (arena) x86::X86JNIMacroAssembler(arena));
+#endif
+    default:
+      LOG(FATAL) << "Unknown/unsupported 4B InstructionSet: " << instruction_set;
+      UNREACHABLE();
+  }
+}
+
+using MacroAsm64UniquePtr = std::unique_ptr<JNIMacroAssembler<PointerSize::k64>>;
+
+template <>
+MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create(
+    ArenaAllocator* arena,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features ATTRIBUTE_UNUSED) {
+  switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm64
+    case kArm64:
+      return MacroAsm64UniquePtr(new (arena) arm64::Arm64JNIMacroAssembler(arena));
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+    case kMips64:
+      return MacroAsm64UniquePtr(new (arena) mips64::Mips64Assembler(arena));
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+    case kX86_64:
+      return MacroAsm64UniquePtr(new (arena) x86_64::X86_64JNIMacroAssembler(arena));
+#endif
+    default:
+      UNUSED(arena);
+      LOG(FATAL) << "Unknown/unsupported 8B InstructionSet: " << instruction_set;
+      UNREACHABLE();
+  }
+}
+
+}  // namespace art
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
new file mode 100644
index 0000000..6f45bd6
--- /dev/null
+++ b/compiler/utils/jni_macro_assembler.h
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_H_
+#define ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_H_
+
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "base/arena_allocator.h"
+#include "base/arena_object.h"
+#include "base/enums.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "managed_register.h"
+#include "offsets.h"
+#include "utils/array_ref.h"
+
+namespace art {
+
+class ArenaAllocator;
+class DebugFrameOpCodeWriterForAssembler;
+class InstructionSetFeatures;
+class MemoryRegion;
+
+template <PointerSize kPointerSize>
+class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> {
+ public:
+  static std::unique_ptr<JNIMacroAssembler<kPointerSize>> Create(
+      ArenaAllocator* arena,
+      InstructionSet instruction_set,
+      const InstructionSetFeatures* instruction_set_features = nullptr);
+
+  // Finalize the code; emit slow paths, fixup branches, add literal pool, etc.
+  virtual void FinalizeCode() = 0;
+
+  // Size of generated code
+  virtual size_t CodeSize() const = 0;
+
+  // Copy instructions out of assembly buffer into the given region of memory
+  virtual void FinalizeInstructions(const MemoryRegion& region) = 0;
+
+  // Emit code that will create an activation on the stack
+  virtual void BuildFrame(size_t frame_size,
+                          ManagedRegister method_reg,
+                          ArrayRef<const ManagedRegister> callee_save_regs,
+                          const ManagedRegisterEntrySpills& entry_spills) = 0;
+
+  // Emit code that will remove an activation from the stack
+  virtual void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) = 0;
+
+  virtual void IncreaseFrameSize(size_t adjust) = 0;
+  virtual void DecreaseFrameSize(size_t adjust) = 0;
+
+  // Store routines
+  virtual void Store(FrameOffset offs, ManagedRegister src, size_t size) = 0;
+  virtual void StoreRef(FrameOffset dest, ManagedRegister src) = 0;
+  virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src) = 0;
+
+  virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) = 0;
+
+  virtual void StoreStackOffsetToThread(ThreadOffset<kPointerSize> thr_offs,
+                                        FrameOffset fr_offs,
+                                        ManagedRegister scratch) = 0;
+
+  virtual void StoreStackPointerToThread(ThreadOffset<kPointerSize> thr_offs) = 0;
+
+  virtual void StoreSpanning(FrameOffset dest,
+                             ManagedRegister src,
+                             FrameOffset in_off,
+                             ManagedRegister scratch) = 0;
+
+  // Load routines
+  virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0;
+
+  virtual void LoadFromThread(ManagedRegister dest,
+                              ThreadOffset<kPointerSize> src,
+                              size_t size) = 0;
+
+  virtual void LoadRef(ManagedRegister dest, FrameOffset src) = 0;
+  // If unpoison_reference is true and kPoisonReference is true, then we negate the read reference.
+  virtual void LoadRef(ManagedRegister dest,
+                       ManagedRegister base,
+                       MemberOffset offs,
+                       bool unpoison_reference) = 0;
+
+  virtual void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) = 0;
+
+  virtual void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset<kPointerSize> offs) = 0;
+
+  // Copying routines
+  virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size) = 0;
+
+  virtual void CopyRawPtrFromThread(FrameOffset fr_offs,
+                                    ThreadOffset<kPointerSize> thr_offs,
+                                    ManagedRegister scratch) = 0;
+
+  virtual void CopyRawPtrToThread(ThreadOffset<kPointerSize> thr_offs,
+                                  FrameOffset fr_offs,
+                                  ManagedRegister scratch) = 0;
+
+  virtual void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) = 0;
+
+  virtual void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) = 0;
+
+  virtual void Copy(FrameOffset dest,
+                    ManagedRegister src_base,
+                    Offset src_offset,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void Copy(ManagedRegister dest_base,
+                    Offset dest_offset,
+                    FrameOffset src,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void Copy(FrameOffset dest,
+                    FrameOffset src_base,
+                    Offset src_offset,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void Copy(ManagedRegister dest,
+                    Offset dest_offset,
+                    ManagedRegister src,
+                    Offset src_offset,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void Copy(FrameOffset dest,
+                    Offset dest_offset,
+                    FrameOffset src,
+                    Offset src_offset,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void MemoryBarrier(ManagedRegister scratch) = 0;
+
+  // Sign extension
+  virtual void SignExtend(ManagedRegister mreg, size_t size) = 0;
+
+  // Zero extension
+  virtual void ZeroExtend(ManagedRegister mreg, size_t size) = 0;
+
+  // Exploit fast access in managed code to Thread::Current()
+  virtual void GetCurrentThread(ManagedRegister tr) = 0;
+  virtual void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) = 0;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // null.
+  virtual void CreateHandleScopeEntry(ManagedRegister out_reg,
+                                      FrameOffset handlescope_offset,
+                                      ManagedRegister in_reg,
+                                      bool null_allowed) = 0;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed.
+  virtual void CreateHandleScopeEntry(FrameOffset out_off,
+                                      FrameOffset handlescope_offset,
+                                      ManagedRegister scratch,
+                                      bool null_allowed) = 0;
+
+  // src holds a handle scope entry (Object**) load this into dst
+  virtual void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) = 0;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  virtual void VerifyObject(ManagedRegister src, bool could_be_null) = 0;
+  virtual void VerifyObject(FrameOffset src, bool could_be_null) = 0;
+
+  // Call to address held at [base+offset]
+  virtual void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) = 0;
+  virtual void Call(FrameOffset base, Offset offset, ManagedRegister scratch) = 0;
+  virtual void CallFromThread(ThreadOffset<kPointerSize> offset, ManagedRegister scratch) = 0;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
+
+  virtual ~JNIMacroAssembler() {}
+
+  /**
+   * @brief Buffer of DWARF's Call Frame Information opcodes.
+   * @details It is used by debuggers and other tools to unwind the call stack.
+   */
+  virtual DebugFrameOpCodeWriterForAssembler& cfi() = 0;
+
+ protected:
+  explicit JNIMacroAssembler() {}
+};
+
+template <typename T, PointerSize kPointerSize>
+class JNIMacroAssemblerFwd : public JNIMacroAssembler<kPointerSize> {
+ public:
+  void FinalizeCode() OVERRIDE {
+    asm_.FinalizeCode();
+  }
+
+  size_t CodeSize() const OVERRIDE {
+    return asm_.CodeSize();
+  }
+
+  void FinalizeInstructions(const MemoryRegion& region) OVERRIDE {
+    asm_.FinalizeInstructions(region);
+  }
+
+  DebugFrameOpCodeWriterForAssembler& cfi() OVERRIDE {
+    return asm_.cfi();
+  }
+
+ protected:
+  explicit JNIMacroAssemblerFwd(ArenaAllocator* arena) : asm_(arena) {}
+
+  T asm_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_H_
diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h
new file mode 100644
index 0000000..829f34b
--- /dev/null
+++ b/compiler/utils/jni_macro_assembler_test.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_TEST_H_
+#define ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_TEST_H_
+
+#include "jni_macro_assembler.h"
+
+#include "assembler_test_base.h"
+#include "common_runtime_test.h"  // For ScratchFile
+
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iterator>
+#include <sys/stat.h>
+
+namespace art {
+
+template<typename Ass>
+class JNIMacroAssemblerTest : public testing::Test {
+ public:
+  Ass* GetAssembler() {
+    return assembler_.get();
+  }
+
+  typedef std::string (*TestFn)(JNIMacroAssemblerTest* assembler_test, Ass* assembler);
+
+  void DriverFn(TestFn f, std::string test_name) {
+    DriverWrapper(f(this, assembler_.get()), test_name);
+  }
+
+  // This driver assumes the assembler has already been called.
+  void DriverStr(std::string assembly_string, std::string test_name) {
+    DriverWrapper(assembly_string, test_name);
+  }
+
+  // This is intended to be run as a test.
+  bool CheckTools() {
+    return test_helper_->CheckTools();
+  }
+
+ protected:
+  explicit JNIMacroAssemblerTest() {}
+
+  void SetUp() OVERRIDE {
+    arena_.reset(new ArenaAllocator(&pool_));
+    assembler_.reset(CreateAssembler(arena_.get()));
+    test_helper_.reset(
+        new AssemblerTestInfrastructure(GetArchitectureString(),
+                                        GetAssemblerCmdName(),
+                                        GetAssemblerParameters(),
+                                        GetObjdumpCmdName(),
+                                        GetObjdumpParameters(),
+                                        GetDisassembleCmdName(),
+                                        GetDisassembleParameters(),
+                                        GetAssemblyHeader()));
+
+    SetUpHelpers();
+  }
+
+  void TearDown() OVERRIDE {
+    test_helper_.reset();  // Clean up the helper.
+    assembler_.reset();
+    arena_.reset();
+  }
+
+  // Override this to set up any architecture-specific things, e.g., CPU revision.
+  virtual Ass* CreateAssembler(ArenaAllocator* arena) {
+    return new (arena) Ass(arena);
+  }
+
+  // Override this to set up any architecture-specific things, e.g., register vectors.
+  virtual void SetUpHelpers() {}
+
+  // Get the typically used name for this architecture, e.g., aarch64, x86_64, ...
+  virtual std::string GetArchitectureString() = 0;
+
+  // Get the name of the assembler, e.g., "as" by default.
+  virtual std::string GetAssemblerCmdName() {
+    return "as";
+  }
+
+  // Switches to the assembler command. Default none.
+  virtual std::string GetAssemblerParameters() {
+    return "";
+  }
+
+  // Get the name of the objdump, e.g., "objdump" by default.
+  virtual std::string GetObjdumpCmdName() {
+    return "objdump";
+  }
+
+  // Switches to the objdump command. Default is " -h".
+  virtual std::string GetObjdumpParameters() {
+    return " -h";
+  }
+
+  // Get the name of the objdump, e.g., "objdump" by default.
+  virtual std::string GetDisassembleCmdName() {
+    return "objdump";
+  }
+
+  // Switches to the objdump command. As it's a binary, one needs to push the architecture and
+  // such to objdump, so it's architecture-specific and there is no default.
+  virtual std::string GetDisassembleParameters() = 0;
+
+  // If the assembly file needs a header, return it in a sub-class.
+  virtual const char* GetAssemblyHeader() {
+    return nullptr;
+  }
+
+ private:
+  // Override this to pad the code with NOPs to a certain size if needed.
+  virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
+  }
+
+  void DriverWrapper(std::string assembly_text, std::string test_name) {
+    assembler_->FinalizeCode();
+    size_t cs = assembler_->CodeSize();
+    std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
+    MemoryRegion code(&(*data)[0], data->size());
+    assembler_->FinalizeInstructions(code);
+    Pad(*data);
+    test_helper_->Driver(*data, assembly_text, test_name);
+  }
+
+  ArenaPool pool_;
+  std::unique_ptr<ArenaAllocator> arena_;
+  std::unique_ptr<Ass> assembler_;
+  std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
+
+  DISALLOW_COPY_AND_ASSIGN(JNIMacroAssemblerTest);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_TEST_H_
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index 893daff..46adb3f 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -47,40 +47,40 @@
   // ManagedRegister is a value class. There exists no method to change the
   // internal state. We therefore allow a copy constructor and an
   // assignment-operator.
-  ManagedRegister(const ManagedRegister& other) : id_(other.id_) { }
+  constexpr ManagedRegister(const ManagedRegister& other) : id_(other.id_) { }
 
   ManagedRegister& operator=(const ManagedRegister& other) {
     id_ = other.id_;
     return *this;
   }
 
-  arm::ArmManagedRegister AsArm() const;
-  arm64::Arm64ManagedRegister AsArm64() const;
-  mips::MipsManagedRegister AsMips() const;
-  mips64::Mips64ManagedRegister AsMips64() const;
-  x86::X86ManagedRegister AsX86() const;
-  x86_64::X86_64ManagedRegister AsX86_64() const;
+  constexpr arm::ArmManagedRegister AsArm() const;
+  constexpr arm64::Arm64ManagedRegister AsArm64() const;
+  constexpr mips::MipsManagedRegister AsMips() const;
+  constexpr mips64::Mips64ManagedRegister AsMips64() const;
+  constexpr x86::X86ManagedRegister AsX86() const;
+  constexpr x86_64::X86_64ManagedRegister AsX86_64() const;
 
   // It is valid to invoke Equals on and with a NoRegister.
-  bool Equals(const ManagedRegister& other) const {
+  constexpr bool Equals(const ManagedRegister& other) const {
     return id_ == other.id_;
   }
 
-  bool IsNoRegister() const {
+  constexpr bool IsNoRegister() const {
     return id_ == kNoRegister;
   }
 
-  static ManagedRegister NoRegister() {
+  static constexpr ManagedRegister NoRegister() {
     return ManagedRegister();
   }
 
-  int RegId() const { return id_; }
-  explicit ManagedRegister(int reg_id) : id_(reg_id) { }
+  constexpr int RegId() const { return id_; }
+  explicit constexpr ManagedRegister(int reg_id) : id_(reg_id) { }
 
  protected:
   static const int kNoRegister = -1;
 
-  ManagedRegister() : id_(kNoRegister) { }
+  constexpr ManagedRegister() : id_(kNoRegister) { }
 
   int id_;
 };
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 6f35e9e..8b7da3f 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -26,6 +26,11 @@
 namespace art {
 namespace mips {
 
+static_assert(static_cast<size_t>(kMipsPointerSize) == kMipsWordSize,
+              "Unexpected Mips pointer size.");
+static_assert(kMipsPointerSize == PointerSize::k32, "Unexpected Mips pointer size.");
+
+
 std::ostream& operator<<(std::ostream& os, const DRegister& rhs) {
   if (rhs >= D0 && rhs < kNumberOfDRegisters) {
     os << "d" << static_cast<int>(rhs);
@@ -39,12 +44,65 @@
   for (auto& exception_block : exception_blocks_) {
     EmitExceptionPoll(&exception_block);
   }
+  EmitLiterals();
   PromoteBranches();
 }
 
 void MipsAssembler::FinalizeInstructions(const MemoryRegion& region) {
+  size_t number_of_delayed_adjust_pcs = cfi().NumberOfDelayedAdvancePCs();
   EmitBranches();
   Assembler::FinalizeInstructions(region);
+  PatchCFI(number_of_delayed_adjust_pcs);
+}
+
+void MipsAssembler::PatchCFI(size_t number_of_delayed_adjust_pcs) {
+  if (cfi().NumberOfDelayedAdvancePCs() == 0u) {
+    DCHECK_EQ(number_of_delayed_adjust_pcs, 0u);
+    return;
+  }
+
+  typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC;
+  const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC();
+  const std::vector<uint8_t>& old_stream = data.first;
+  const std::vector<DelayedAdvancePC>& advances = data.second;
+
+  // PCs recorded before EmitBranches() need to be adjusted.
+  // PCs recorded during EmitBranches() are already adjusted.
+  // Both ranges are separately sorted but they may overlap.
+  if (kIsDebugBuild) {
+    auto cmp = [](const DelayedAdvancePC& lhs, const DelayedAdvancePC& rhs) {
+      return lhs.pc < rhs.pc;
+    };
+    CHECK(std::is_sorted(advances.begin(), advances.begin() + number_of_delayed_adjust_pcs, cmp));
+    CHECK(std::is_sorted(advances.begin() + number_of_delayed_adjust_pcs, advances.end(), cmp));
+  }
+
+  // Append initial CFI data if any.
+  size_t size = advances.size();
+  DCHECK_NE(size, 0u);
+  cfi().AppendRawData(old_stream, 0u, advances[0].stream_pos);
+  // Emit PC adjustments interleaved with the old CFI stream.
+  size_t adjust_pos = 0u;
+  size_t late_emit_pos = number_of_delayed_adjust_pcs;
+  while (adjust_pos != number_of_delayed_adjust_pcs || late_emit_pos != size) {
+    size_t adjusted_pc = (adjust_pos != number_of_delayed_adjust_pcs)
+        ? GetAdjustedPosition(advances[adjust_pos].pc)
+        : static_cast<size_t>(-1);
+    size_t late_emit_pc = (late_emit_pos != size)
+        ? advances[late_emit_pos].pc
+        : static_cast<size_t>(-1);
+    size_t advance_pc = std::min(adjusted_pc, late_emit_pc);
+    DCHECK_NE(advance_pc, static_cast<size_t>(-1));
+    size_t entry = (adjusted_pc <= late_emit_pc) ? adjust_pos : late_emit_pos;
+    if (adjusted_pc <= late_emit_pc) {
+      ++adjust_pos;
+    } else {
+      ++late_emit_pos;
+    }
+    cfi().AdvancePC(advance_pc);
+    size_t end_pos = (entry + 1u == size) ? old_stream.size() : advances[entry + 1u].stream_pos;
+    cfi().AppendRawData(old_stream, advances[entry].stream_pos, end_pos);
+  }
 }
 
 void MipsAssembler::EmitBranches() {
@@ -197,6 +255,11 @@
   EmitR(0, rs, rt, rd, 2, 0x18);
 }
 
+void MipsAssembler::MuhR6(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 3, 0x18);
+}
+
 void MipsAssembler::MuhuR6(Register rd, Register rs, Register rt) {
   CHECK(IsR6());
   EmitR(0, rs, rt, rd, 3, 0x19);
@@ -250,6 +313,46 @@
   EmitR(0, rs, rt, rd, 0, 0x27);
 }
 
+void MipsAssembler::Movz(Register rd, Register rs, Register rt) {
+  CHECK(!IsR6());
+  EmitR(0, rs, rt, rd, 0, 0x0A);
+}
+
+void MipsAssembler::Movn(Register rd, Register rs, Register rt) {
+  CHECK(!IsR6());
+  EmitR(0, rs, rt, rd, 0, 0x0B);
+}
+
+void MipsAssembler::Seleqz(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 0, 0x35);
+}
+
+void MipsAssembler::Selnez(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 0, 0x37);
+}
+
+void MipsAssembler::ClzR6(Register rd, Register rs) {
+  CHECK(IsR6());
+  EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x10);
+}
+
+void MipsAssembler::ClzR2(Register rd, Register rs) {
+  CHECK(!IsR6());
+  EmitR(0x1C, rs, rd, rd, 0, 0x20);
+}
+
+void MipsAssembler::CloR6(Register rd, Register rs) {
+  CHECK(IsR6());
+  EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x11);
+}
+
+void MipsAssembler::CloR2(Register rd, Register rs) {
+  CHECK(!IsR6());
+  EmitR(0x1C, rs, rd, rd, 0, 0x21);
+}
+
 void MipsAssembler::Seb(Register rd, Register rt) {
   EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x10, 0x20);
 }
@@ -258,15 +361,32 @@
   EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20);
 }
 
+void MipsAssembler::Wsbh(Register rd, Register rt) {
+  EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20);
+}
+
+void MipsAssembler::Bitswap(Register rd, Register rt) {
+  CHECK(IsR6());
+  EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x0, 0x20);
+}
+
 void MipsAssembler::Sll(Register rd, Register rt, int shamt) {
+  CHECK(IsUint<5>(shamt)) << shamt;
   EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00);
 }
 
 void MipsAssembler::Srl(Register rd, Register rt, int shamt) {
+  CHECK(IsUint<5>(shamt)) << shamt;
   EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02);
 }
 
+void MipsAssembler::Rotr(Register rd, Register rt, int shamt) {
+  CHECK(IsUint<5>(shamt)) << shamt;
+  EmitR(0, static_cast<Register>(1), rt, rd, shamt, 0x02);
+}
+
 void MipsAssembler::Sra(Register rd, Register rt, int shamt) {
+  CHECK(IsUint<5>(shamt)) << shamt;
   EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03);
 }
 
@@ -278,10 +398,28 @@
   EmitR(0, rs, rt, rd, 0, 0x06);
 }
 
+void MipsAssembler::Rotrv(Register rd, Register rt, Register rs) {
+  EmitR(0, rs, rt, rd, 1, 0x06);
+}
+
 void MipsAssembler::Srav(Register rd, Register rt, Register rs) {
   EmitR(0, rs, rt, rd, 0, 0x07);
 }
 
+void MipsAssembler::Ext(Register rd, Register rt, int pos, int size) {
+  CHECK(IsUint<5>(pos)) << pos;
+  CHECK(0 < size && size <= 32) << size;
+  CHECK(0 < pos + size && pos + size <= 32) << pos << " + " << size;
+  EmitR(0x1f, rt, rd, static_cast<Register>(size - 1), pos, 0x00);
+}
+
+void MipsAssembler::Ins(Register rd, Register rt, int pos, int size) {
+  CHECK(IsUint<5>(pos)) << pos;
+  CHECK(0 < size && size <= 32) << size;
+  CHECK(0 < pos + size && pos + size <= 32) << pos << " + " << size;
+  EmitR(0x1f, rt, rd, static_cast<Register>(pos + size - 1), pos, 0x04);
+}
+
 void MipsAssembler::Lb(Register rt, Register rs, uint16_t imm16) {
   EmitI(0x20, rs, rt, imm16);
 }
@@ -294,6 +432,16 @@
   EmitI(0x23, rs, rt, imm16);
 }
 
+void MipsAssembler::Lwl(Register rt, Register rs, uint16_t imm16) {
+  CHECK(!IsR6());
+  EmitI(0x22, rs, rt, imm16);
+}
+
+void MipsAssembler::Lwr(Register rt, Register rs, uint16_t imm16) {
+  CHECK(!IsR6());
+  EmitI(0x26, rs, rt, imm16);
+}
+
 void MipsAssembler::Lbu(Register rt, Register rs, uint16_t imm16) {
   EmitI(0x24, rs, rt, imm16);
 }
@@ -302,10 +450,21 @@
   EmitI(0x25, rs, rt, imm16);
 }
 
+void MipsAssembler::Lwpc(Register rs, uint32_t imm19) {
+  CHECK(IsR6());
+  CHECK(IsUint<19>(imm19)) << imm19;
+  EmitI21(0x3B, rs, (0x01 << 19) | imm19);
+}
+
 void MipsAssembler::Lui(Register rt, uint16_t imm16) {
   EmitI(0xf, static_cast<Register>(0), rt, imm16);
 }
 
+void MipsAssembler::Aui(Register rt, Register rs, uint16_t imm16) {
+  CHECK(IsR6());
+  EmitI(0xf, rs, rt, imm16);
+}
+
 void MipsAssembler::Sync(uint32_t stype) {
   EmitR(0, static_cast<Register>(0), static_cast<Register>(0), static_cast<Register>(0),
         stype & 0x1f, 0xf);
@@ -333,6 +492,38 @@
   EmitI(0x2b, rs, rt, imm16);
 }
 
+void MipsAssembler::Swl(Register rt, Register rs, uint16_t imm16) {
+  CHECK(!IsR6());
+  EmitI(0x2a, rs, rt, imm16);
+}
+
+void MipsAssembler::Swr(Register rt, Register rs, uint16_t imm16) {
+  CHECK(!IsR6());
+  EmitI(0x2e, rs, rt, imm16);
+}
+
+void MipsAssembler::LlR2(Register rt, Register base, int16_t imm16) {
+  CHECK(!IsR6());
+  EmitI(0x30, base, rt, imm16);
+}
+
+void MipsAssembler::ScR2(Register rt, Register base, int16_t imm16) {
+  CHECK(!IsR6());
+  EmitI(0x38, base, rt, imm16);
+}
+
+void MipsAssembler::LlR6(Register rt, Register base, int16_t imm9) {
+  CHECK(IsR6());
+  CHECK(IsInt<9>(imm9));
+  EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x36);
+}
+
+void MipsAssembler::ScR6(Register rt, Register base, int16_t imm9) {
+  CHECK(IsR6());
+  CHECK(IsInt<9>(imm9));
+  EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x26);
+}
+
 void MipsAssembler::Slt(Register rd, Register rs, Register rt) {
   EmitR(0, rs, rt, rd, 0, 0x2a);
 }
@@ -353,6 +544,10 @@
   EmitI(0x4, static_cast<Register>(0), static_cast<Register>(0), imm16);
 }
 
+void MipsAssembler::Bal(uint16_t imm16) {
+  EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x11), imm16);
+}
+
 void MipsAssembler::Beq(Register rs, Register rt, uint16_t imm16) {
   EmitI(0x4, rs, rt, imm16);
 }
@@ -385,6 +580,26 @@
   EmitI(0x7, rt, static_cast<Register>(0), imm16);
 }
 
+void MipsAssembler::Bc1f(uint16_t imm16) {
+  Bc1f(0, imm16);
+}
+
+void MipsAssembler::Bc1f(int cc, uint16_t imm16) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>(cc << 2), imm16);
+}
+
+void MipsAssembler::Bc1t(uint16_t imm16) {
+  Bc1t(0, imm16);
+}
+
+void MipsAssembler::Bc1t(int cc, uint16_t imm16) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>((cc << 2) | 1), imm16);
+}
+
 void MipsAssembler::J(uint32_t addr26) {
   EmitI26(0x2, addr26);
 }
@@ -425,6 +640,11 @@
   EmitI26(0x32, imm26);
 }
 
+void MipsAssembler::Balc(uint32_t imm26) {
+  CHECK(IsR6());
+  EmitI26(0x3A, imm26);
+}
+
 void MipsAssembler::Jic(Register rt, uint16_t imm16) {
   CHECK(IsR6());
   EmitI(0x36, static_cast<Register>(0), rt, imm16);
@@ -519,7 +739,17 @@
   EmitI21(0x3E, rs, imm21);
 }
 
-void MipsAssembler::EmitBcond(BranchCondition cond, Register rs, Register rt, uint16_t imm16) {
+void MipsAssembler::Bc1eqz(FRegister ft, uint16_t imm16) {
+  CHECK(IsR6());
+  EmitFI(0x11, 0x9, ft, imm16);
+}
+
+void MipsAssembler::Bc1nez(FRegister ft, uint16_t imm16) {
+  CHECK(IsR6());
+  EmitFI(0x11, 0xD, ft, imm16);
+}
+
+void MipsAssembler::EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16) {
   switch (cond) {
     case kCondLTZ:
       CHECK_EQ(rt, ZERO);
@@ -551,6 +781,14 @@
       CHECK_EQ(rt, ZERO);
       Bnez(rs, imm16);
       break;
+    case kCondF:
+      CHECK_EQ(rt, ZERO);
+      Bc1f(static_cast<int>(rs), imm16);
+      break;
+    case kCondT:
+      CHECK_EQ(rt, ZERO);
+      Bc1t(static_cast<int>(rs), imm16);
+      break;
     case kCondLT:
     case kCondGE:
     case kCondLE:
@@ -565,7 +803,7 @@
   }
 }
 
-void MipsAssembler::EmitBcondc(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21) {
+void MipsAssembler::EmitBcondR6(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21) {
   switch (cond) {
     case kCondLT:
       Bltc(rs, rt, imm16_21);
@@ -615,6 +853,14 @@
     case kCondGEU:
       Bgeuc(rs, rt, imm16_21);
       break;
+    case kCondF:
+      CHECK_EQ(rt, ZERO);
+      Bc1eqz(static_cast<FRegister>(rs), imm16_21);
+      break;
+    case kCondT:
+      CHECK_EQ(rt, ZERO);
+      Bc1nez(static_cast<FRegister>(rs), imm16_21);
+      break;
     case kUncond:
       LOG(FATAL) << "Unexpected branch condition " << cond;
       UNREACHABLE();
@@ -653,6 +899,22 @@
   EmitFR(0x11, 0x11, ft, fs, fd, 0x3);
 }
 
+void MipsAssembler::SqrtS(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x4);
+}
+
+void MipsAssembler::SqrtD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x4);
+}
+
+void MipsAssembler::AbsS(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x5);
+}
+
+void MipsAssembler::AbsD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x5);
+}
+
 void MipsAssembler::MovS(FRegister fd, FRegister fs) {
   EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x6);
 }
@@ -669,6 +931,338 @@
   EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x7);
 }
 
+void MipsAssembler::CunS(FRegister fs, FRegister ft) {
+  CunS(0, fs, ft);
+}
+
+void MipsAssembler::CunS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x31);
+}
+
+void MipsAssembler::CeqS(FRegister fs, FRegister ft) {
+  CeqS(0, fs, ft);
+}
+
+void MipsAssembler::CeqS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x32);
+}
+
+void MipsAssembler::CueqS(FRegister fs, FRegister ft) {
+  CueqS(0, fs, ft);
+}
+
+void MipsAssembler::CueqS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x33);
+}
+
+void MipsAssembler::ColtS(FRegister fs, FRegister ft) {
+  ColtS(0, fs, ft);
+}
+
+void MipsAssembler::ColtS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x34);
+}
+
+void MipsAssembler::CultS(FRegister fs, FRegister ft) {
+  CultS(0, fs, ft);
+}
+
+void MipsAssembler::CultS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x35);
+}
+
+void MipsAssembler::ColeS(FRegister fs, FRegister ft) {
+  ColeS(0, fs, ft);
+}
+
+void MipsAssembler::ColeS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x36);
+}
+
+void MipsAssembler::CuleS(FRegister fs, FRegister ft) {
+  CuleS(0, fs, ft);
+}
+
+void MipsAssembler::CuleS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x37);
+}
+
+void MipsAssembler::CunD(FRegister fs, FRegister ft) {
+  CunD(0, fs, ft);
+}
+
+void MipsAssembler::CunD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x31);
+}
+
+void MipsAssembler::CeqD(FRegister fs, FRegister ft) {
+  CeqD(0, fs, ft);
+}
+
+void MipsAssembler::CeqD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x32);
+}
+
+void MipsAssembler::CueqD(FRegister fs, FRegister ft) {
+  CueqD(0, fs, ft);
+}
+
+void MipsAssembler::CueqD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x33);
+}
+
+void MipsAssembler::ColtD(FRegister fs, FRegister ft) {
+  ColtD(0, fs, ft);
+}
+
+void MipsAssembler::ColtD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x34);
+}
+
+void MipsAssembler::CultD(FRegister fs, FRegister ft) {
+  CultD(0, fs, ft);
+}
+
+void MipsAssembler::CultD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x35);
+}
+
+void MipsAssembler::ColeD(FRegister fs, FRegister ft) {
+  ColeD(0, fs, ft);
+}
+
+void MipsAssembler::ColeD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x36);
+}
+
+void MipsAssembler::CuleD(FRegister fs, FRegister ft) {
+  CuleD(0, fs, ft);
+}
+
+void MipsAssembler::CuleD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x37);
+}
+
+void MipsAssembler::CmpUnS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x01);
+}
+
+void MipsAssembler::CmpEqS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x02);
+}
+
+void MipsAssembler::CmpUeqS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x03);
+}
+
+void MipsAssembler::CmpLtS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x04);
+}
+
+void MipsAssembler::CmpUltS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x05);
+}
+
+void MipsAssembler::CmpLeS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x06);
+}
+
+void MipsAssembler::CmpUleS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x07);
+}
+
+void MipsAssembler::CmpOrS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x11);
+}
+
+void MipsAssembler::CmpUneS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x12);
+}
+
+void MipsAssembler::CmpNeS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x13);
+}
+
+void MipsAssembler::CmpUnD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x01);
+}
+
+void MipsAssembler::CmpEqD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x02);
+}
+
+void MipsAssembler::CmpUeqD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x03);
+}
+
+void MipsAssembler::CmpLtD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x04);
+}
+
+void MipsAssembler::CmpUltD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x05);
+}
+
+void MipsAssembler::CmpLeD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x06);
+}
+
+void MipsAssembler::CmpUleD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x07);
+}
+
+void MipsAssembler::CmpOrD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x11);
+}
+
+void MipsAssembler::CmpUneD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x12);
+}
+
+void MipsAssembler::CmpNeD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x13);
+}
+
+void MipsAssembler::Movf(Register rd, Register rs, int cc) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitR(0, rs, static_cast<Register>(cc << 2), rd, 0, 0x01);
+}
+
+void MipsAssembler::Movt(Register rd, Register rs, int cc) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01);
+}
+
+void MipsAssembler::MovfS(FRegister fd, FRegister fs, int cc) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, static_cast<FRegister>(cc << 2), fs, fd, 0x11);
+}
+
+void MipsAssembler::MovfD(FRegister fd, FRegister fs, int cc) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, static_cast<FRegister>(cc << 2), fs, fd, 0x11);
+}
+
+void MipsAssembler::MovtS(FRegister fd, FRegister fs, int cc) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11);
+}
+
+void MipsAssembler::MovtD(FRegister fd, FRegister fs, int cc) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11);
+}
+
+void MipsAssembler::SelS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x10, ft, fs, fd, 0x10);
+}
+
+void MipsAssembler::SelD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x10);
+}
+
+void MipsAssembler::ClassS(FRegister fd, FRegister fs) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x1b);
+}
+
+void MipsAssembler::ClassD(FRegister fd, FRegister fs) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x1b);
+}
+
+void MipsAssembler::MinS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x10, ft, fs, fd, 0x1c);
+}
+
+void MipsAssembler::MinD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x1c);
+}
+
+void MipsAssembler::MaxS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x10, ft, fs, fd, 0x1e);
+}
+
+void MipsAssembler::MaxD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x1e);
+}
+
+void MipsAssembler::TruncLS(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09);
+}
+
+void MipsAssembler::TruncLD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09);
+}
+
+void MipsAssembler::TruncWS(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D);
+}
+
+void MipsAssembler::TruncWD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D);
+}
+
 void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) {
   EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20);
 }
@@ -685,6 +1279,22 @@
   EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21);
 }
 
+void MipsAssembler::Cvtsl(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20);
+}
+
+void MipsAssembler::Cvtdl(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21);
+}
+
+void MipsAssembler::FloorWS(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0xf);
+}
+
+void MipsAssembler::FloorWD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0xf);
+}
+
 void MipsAssembler::Mfc1(Register rt, FRegister fs) {
   EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
 }
@@ -701,6 +1311,24 @@
   EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
 }
 
+void MipsAssembler::MoveFromFpuHigh(Register rt, FRegister fs) {
+  if (Is32BitFPU()) {
+    CHECK_EQ(fs % 2, 0) << fs;
+    Mfc1(rt, static_cast<FRegister>(fs + 1));
+  } else {
+    Mfhc1(rt, fs);
+  }
+}
+
+void MipsAssembler::MoveToFpuHigh(Register rt, FRegister fs) {
+  if (Is32BitFPU()) {
+    CHECK_EQ(fs % 2, 0) << fs;
+    Mtc1(rt, static_cast<FRegister>(fs + 1));
+  } else {
+    Mthc1(rt, fs);
+  }
+}
+
 void MipsAssembler::Lwc1(FRegister ft, Register rs, uint16_t imm16) {
   EmitI(0x31, rs, static_cast<Register>(ft), imm16);
 }
@@ -769,22 +1397,27 @@
 }
 
 void MipsAssembler::LoadConst64(Register reg_hi, Register reg_lo, int64_t value) {
-  LoadConst32(reg_lo, Low32Bits(value));
-  LoadConst32(reg_hi, High32Bits(value));
+  uint32_t low = Low32Bits(value);
+  uint32_t high = High32Bits(value);
+  LoadConst32(reg_lo, low);
+  if (high != low) {
+    LoadConst32(reg_hi, high);
+  } else {
+    Move(reg_hi, reg_lo);
+  }
 }
 
 void MipsAssembler::StoreConst32ToOffset(int32_t value,
                                          Register base,
                                          int32_t offset,
                                          Register temp) {
-  if (!IsInt<16>(offset)) {
-    CHECK_NE(temp, AT);  //  Must not use AT as temp, as not to overwrite the loaded value.
-    LoadConst32(AT, offset);
-    Addu(AT, AT, base);
-    base = AT;
-    offset = 0;
+  CHECK_NE(temp, AT);  // Must not use AT as temp, so as not to overwrite the adjusted base.
+  AdjustBaseAndOffset(base, offset, /* is_doubleword */ false);
+  if (value == 0) {
+    temp = ZERO;
+  } else {
+    LoadConst32(temp, value);
   }
-  LoadConst32(temp, value);
   Sw(temp, base, offset);
 }
 
@@ -792,38 +1425,82 @@
                                          Register base,
                                          int32_t offset,
                                          Register temp) {
-  // IsInt<16> must be passed a signed value.
-  if (!IsInt<16>(offset) || !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize))) {
-    CHECK_NE(temp, AT);  //  Must not use AT as temp, as not to overwrite the loaded value.
-    LoadConst32(AT, offset);
-    Addu(AT, AT, base);
-    base = AT;
-    offset = 0;
+  CHECK_NE(temp, AT);  // Must not use AT as temp, so as not to overwrite the adjusted base.
+  AdjustBaseAndOffset(base, offset, /* is_doubleword */ true);
+  uint32_t low = Low32Bits(value);
+  uint32_t high = High32Bits(value);
+  if (low == 0) {
+    Sw(ZERO, base, offset);
+  } else {
+    LoadConst32(temp, low);
+    Sw(temp, base, offset);
   }
-  LoadConst32(temp, Low32Bits(value));
-  Sw(temp, base, offset);
-  LoadConst32(temp, High32Bits(value));
-  Sw(temp, base, offset + kMipsWordSize);
+  if (high == 0) {
+    Sw(ZERO, base, offset + kMipsWordSize);
+  } else {
+    if (high != low) {
+      LoadConst32(temp, high);
+    }
+    Sw(temp, base, offset + kMipsWordSize);
+  }
 }
 
 void MipsAssembler::LoadSConst32(FRegister r, int32_t value, Register temp) {
-  LoadConst32(temp, value);
+  if (value == 0) {
+    temp = ZERO;
+  } else {
+    LoadConst32(temp, value);
+  }
   Mtc1(temp, r);
 }
 
 void MipsAssembler::LoadDConst64(FRegister rd, int64_t value, Register temp) {
-  LoadConst32(temp, Low32Bits(value));
-  Mtc1(temp, rd);
-  LoadConst32(temp, High32Bits(value));
-  Mthc1(temp, rd);
+  uint32_t low = Low32Bits(value);
+  uint32_t high = High32Bits(value);
+  if (low == 0) {
+    Mtc1(ZERO, rd);
+  } else {
+    LoadConst32(temp, low);
+    Mtc1(temp, rd);
+  }
+  if (high == 0) {
+    MoveToFpuHigh(ZERO, rd);
+  } else {
+    LoadConst32(temp, high);
+    MoveToFpuHigh(temp, rd);
+  }
 }
 
 void MipsAssembler::Addiu32(Register rt, Register rs, int32_t value, Register temp) {
+  CHECK_NE(rs, temp);  // Must not overwrite the register `rs` while loading `value`.
   if (IsInt<16>(value)) {
     Addiu(rt, rs, value);
+  } else if (IsR6()) {
+    int16_t high = High16Bits(value);
+    int16_t low = Low16Bits(value);
+    high += (low < 0) ? 1 : 0;  // Account for sign extension in addiu.
+    if (low != 0) {
+      Aui(temp, rs, high);
+      Addiu(rt, temp, low);
+    } else {
+      Aui(rt, rs, high);
+    }
   } else {
-    LoadConst32(temp, value);
-    Addu(rt, rs, temp);
+    // Do not load the whole 32-bit `value` if it can be represented as
+    // a sum of two 16-bit signed values. This can save an instruction.
+    constexpr int32_t kMinValueForSimpleAdjustment = std::numeric_limits<int16_t>::min() * 2;
+    constexpr int32_t kMaxValueForSimpleAdjustment = std::numeric_limits<int16_t>::max() * 2;
+    if (0 <= value && value <= kMaxValueForSimpleAdjustment) {
+      Addiu(temp, rs, kMaxValueForSimpleAdjustment / 2);
+      Addiu(rt, temp, value - kMaxValueForSimpleAdjustment / 2);
+    } else if (kMinValueForSimpleAdjustment <= value && value < 0) {
+      Addiu(temp, rs, kMinValueForSimpleAdjustment / 2);
+      Addiu(rt, temp, value - kMinValueForSimpleAdjustment / 2);
+    } else {
+      // Now that all shorter options have been exhausted, load the full 32-bit value.
+      LoadConst32(temp, value);
+      Addu(rt, rs, temp);
+    }
   }
 }
 
@@ -833,30 +1510,47 @@
   type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type;
 }
 
-void MipsAssembler::Branch::InitializeType(bool is_call, bool is_r6) {
+void MipsAssembler::Branch::InitializeType(bool is_call, bool is_literal, bool is_r6) {
+  CHECK_EQ(is_call && is_literal, false);
   OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_);
   if (is_r6) {
     // R6
-    if (is_call) {
+    if (is_literal) {
+      CHECK(!IsResolved());
+      type_ = kR6Literal;
+    } else if (is_call) {
       InitShortOrLong(offset_size, kR6Call, kR6LongCall);
-    } else if (condition_ == kUncond) {
-      InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch);
     } else {
-      if (condition_ == kCondEQZ || condition_ == kCondNEZ) {
-        // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
-        type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch;
-      } else {
-        InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch);
+      switch (condition_) {
+        case kUncond:
+          InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch);
+          break;
+        case kCondEQZ:
+        case kCondNEZ:
+          // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
+          type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch;
+          break;
+        default:
+          InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch);
+          break;
       }
     }
   } else {
     // R2
-    if (is_call) {
+    if (is_literal) {
+      CHECK(!IsResolved());
+      type_ = kLiteral;
+    } else if (is_call) {
       InitShortOrLong(offset_size, kCall, kLongCall);
-    } else if (condition_ == kUncond) {
-      InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
     } else {
-      InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+      switch (condition_) {
+        case kUncond:
+          InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
+          break;
+        default:
+          InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+          break;
+      }
     }
   }
   old_type_ = type_;
@@ -888,14 +1582,14 @@
   }
 }
 
-MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target)
+MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call)
     : old_location_(location),
       location_(location),
       target_(target),
       lhs_reg_(0),
       rhs_reg_(0),
       condition_(kUncond) {
-  InitializeType(false, is_r6);
+  InitializeType(is_call, /* is_literal */ false, is_r6);
 }
 
 MipsAssembler::Branch::Branch(bool is_r6,
@@ -940,6 +1634,10 @@
       CHECK_NE(lhs_reg, ZERO);
       CHECK_EQ(rhs_reg, ZERO);
       break;
+    case kCondF:
+    case kCondT:
+      CHECK_EQ(rhs_reg, ZERO);
+      break;
     case kUncond:
       UNREACHABLE();
   }
@@ -948,19 +1646,23 @@
     // Branch condition is always true, make the branch unconditional.
     condition_ = kUncond;
   }
-  InitializeType(false, is_r6);
+  InitializeType(/* is_call */ false, /* is_literal */ false, is_r6);
 }
 
-MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg)
+MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, Register dest_reg, Register base_reg)
     : old_location_(location),
       location_(location),
-      target_(target),
-      lhs_reg_(indirect_reg),
-      rhs_reg_(0),
+      target_(kUnresolved),
+      lhs_reg_(dest_reg),
+      rhs_reg_(base_reg),
       condition_(kUncond) {
-  CHECK_NE(indirect_reg, ZERO);
-  CHECK_NE(indirect_reg, AT);
-  InitializeType(true, is_r6);
+  CHECK_NE(dest_reg, ZERO);
+  if (is_r6) {
+    CHECK_EQ(base_reg, ZERO);
+  } else {
+    CHECK_NE(base_reg, ZERO);
+  }
+  InitializeType(/* is_call */ false, /* is_literal */ true, is_r6);
 }
 
 MipsAssembler::BranchCondition MipsAssembler::Branch::OppositeCondition(
@@ -994,6 +1696,10 @@
       return kCondGEU;
     case kCondGEU:
       return kCondLTU;
+    case kCondF:
+      return kCondT;
+    case kCondT:
+      return kCondF;
     case kUncond:
       LOG(FATAL) << "Unexpected branch condition " << cond;
   }
@@ -1058,19 +1764,27 @@
     case kUncondBranch:
     case kCondBranch:
     case kCall:
+    // R2 near literal.
+    case kLiteral:
     // R6 short branches.
     case kR6UncondBranch:
     case kR6CondBranch:
     case kR6Call:
+    // R6 near literal.
+    case kR6Literal:
       return false;
     // R2 long branches.
     case kLongUncondBranch:
     case kLongCondBranch:
     case kLongCall:
+    // R2 far literal.
+    case kFarLiteral:
     // R6 long branches.
     case kR6LongUncondBranch:
     case kR6LongCondBranch:
     case kR6LongCall:
+    // R6 far literal.
+    case kR6FarLiteral:
       return true;
   }
   UNREACHABLE();
@@ -1139,6 +1853,10 @@
     case kCall:
       type_ = kLongCall;
       break;
+    // R2 near literal.
+    case kLiteral:
+      type_ = kFarLiteral;
+      break;
     // R6 short branches.
     case kR6UncondBranch:
       type_ = kR6LongUncondBranch;
@@ -1149,6 +1867,10 @@
     case kR6Call:
       type_ = kR6LongCall;
       break;
+    // R6 near literal.
+    case kR6Literal:
+      type_ = kR6FarLiteral;
+      break;
     default:
       // Note: 'type_' is already long.
       break;
@@ -1156,14 +1878,26 @@
   CHECK(IsLong());
 }
 
-uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) {
+uint32_t MipsAssembler::GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const {
+  switch (branch->GetType()) {
+    case Branch::kLiteral:
+    case Branch::kFarLiteral:
+      return GetLabelLocation(&pc_rel_base_label_);
+    default:
+      return branch->GetLocation();
+  }
+}
+
+uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t location, uint32_t max_short_distance) {
+  // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 literals or
+  // `this->GetLocation()` for everything else.
   // If the branch is still unresolved or already long, nothing to do.
   if (IsLong() || !IsResolved()) {
     return 0;
   }
   // Promote the short branch to long if the offset size is too small
-  // to hold the distance between location_ and target_.
-  if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) {
+  // to hold the distance between location and target_.
+  if (GetOffsetSizeNeeded(location, target_) > GetOffsetSize()) {
     PromoteToLong();
     uint32_t old_size = GetOldSize();
     uint32_t new_size = GetSize();
@@ -1173,7 +1907,7 @@
   // The following logic is for debugging/testing purposes.
   // Promote some short branches to long when it's not really required.
   if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) {
-    int64_t distance = static_cast<int64_t>(target_) - location_;
+    int64_t distance = static_cast<int64_t>(target_) - location;
     distance = (distance >= 0) ? distance : -distance;
     if (distance >= max_short_distance) {
       PromoteToLong();
@@ -1190,12 +1924,26 @@
   return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t);
 }
 
-uint32_t MipsAssembler::Branch::GetOffset() const {
+uint32_t MipsAssembler::GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const {
+  switch (branch->GetType()) {
+    case Branch::kLiteral:
+    case Branch::kFarLiteral:
+      return GetLabelLocation(&pc_rel_base_label_);
+    default:
+      return branch->GetOffsetLocation() +
+          Branch::branch_info_[branch->GetType()].pc_org * sizeof(uint32_t);
+  }
+}
+
+uint32_t MipsAssembler::Branch::GetOffset(uint32_t location) const {
+  // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 literals or
+  // `this->GetOffsetLocation() + branch_info_[this->GetType()].pc_org * sizeof(uint32_t)`
+  // for everything else.
   CHECK(IsResolved());
   uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize());
   // Calculate the byte distance between instructions and also account for
   // different PC-relative origins.
-  uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t);
+  uint32_t offset = target_ - location;
   // Prepare the offset for encoding into the instruction(s).
   offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift;
   return offset;
@@ -1242,7 +1990,7 @@
   label->BindTo(bound_pc);
 }
 
-uint32_t MipsAssembler::GetLabelLocation(MipsLabel* label) const {
+uint32_t MipsAssembler::GetLabelLocation(const MipsLabel* label) const {
   CHECK(label->IsBound());
   uint32_t target = label->Position();
   if (label->prev_branch_id_plus_one_) {
@@ -1277,6 +2025,14 @@
   return old_position + last_position_adjustment_;
 }
 
+void MipsAssembler::BindPcRelBaseLabel() {
+  Bind(&pc_rel_base_label_);
+}
+
+uint32_t MipsAssembler::GetPcRelBaseLabelLocation() const {
+  return GetLabelLocation(&pc_rel_base_label_);
+}
+
 void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) {
   uint32_t length = branches_.back().GetLength();
   if (!label->IsBound()) {
@@ -1298,7 +2054,7 @@
 
 void MipsAssembler::Buncond(MipsLabel* label) {
   uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
-  branches_.emplace_back(IsR6(), buffer_.Size(), target);
+  branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ false);
   FinalizeLabeledBranch(label);
 }
 
@@ -1312,12 +2068,46 @@
   FinalizeLabeledBranch(label);
 }
 
-void MipsAssembler::Call(MipsLabel* label, Register indirect_reg) {
+void MipsAssembler::Call(MipsLabel* label) {
   uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
-  branches_.emplace_back(IsR6(), buffer_.Size(), target, indirect_reg);
+  branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ true);
   FinalizeLabeledBranch(label);
 }
 
+Literal* MipsAssembler::NewLiteral(size_t size, const uint8_t* data) {
+  DCHECK(size == 4u || size == 8u) << size;
+  literals_.emplace_back(size, data);
+  return &literals_.back();
+}
+
+void MipsAssembler::LoadLiteral(Register dest_reg, Register base_reg, Literal* literal) {
+  // Literal loads are treated as pseudo branches since they require very similar handling.
+  DCHECK_EQ(literal->GetSize(), 4u);
+  MipsLabel* label = literal->GetLabel();
+  DCHECK(!label->IsBound());
+  branches_.emplace_back(IsR6(),
+                         buffer_.Size(),
+                         dest_reg,
+                         base_reg);
+  FinalizeLabeledBranch(label);
+}
+
+void MipsAssembler::EmitLiterals() {
+  if (!literals_.empty()) {
+    // We don't support byte and half-word literals.
+    // TODO: proper alignment for 64-bit literals when they're implemented.
+    for (Literal& literal : literals_) {
+      MipsLabel* label = literal.GetLabel();
+      Bind(label);
+      AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+      DCHECK(literal.GetSize() == 4u || literal.GetSize() == 8u);
+      for (size_t i = 0, size = literal.GetSize(); i != size; ++i) {
+        buffer_.Emit<uint8_t>(literal.GetData()[i]);
+      }
+    }
+  }
+}
+
 void MipsAssembler::PromoteBranches() {
   // Promote short branches to long as necessary.
   bool changed;
@@ -1325,7 +2115,8 @@
     changed = false;
     for (auto& branch : branches_) {
       CHECK(branch.IsResolved());
-      uint32_t delta = branch.PromoteIfNeeded();
+      uint32_t base = GetBranchLocationOrPcRelBase(&branch);
+      uint32_t delta = branch.PromoteIfNeeded(base);
       // If this branch has been promoted and needs to expand in size,
       // relocate all branches by the expansion size.
       if (delta) {
@@ -1363,27 +2154,35 @@
   // R2 short branches.
   {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kUncondBranch
   {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kCondBranch
-  {  5, 2, 0, MipsAssembler::Branch::kOffset16, 0 },  // kCall
+  {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kCall
+  // R2 near literal.
+  {  1, 0, 0, MipsAssembler::Branch::kOffset16, 0 },  // kLiteral
   // R2 long branches.
   {  9, 3, 1, MipsAssembler::Branch::kOffset32, 0 },  // kLongUncondBranch
   { 10, 4, 1, MipsAssembler::Branch::kOffset32, 0 },  // kLongCondBranch
   {  6, 1, 1, MipsAssembler::Branch::kOffset32, 0 },  // kLongCall
+  // R2 far literal.
+  {  3, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kFarLiteral
   // R6 short branches.
   {  1, 0, 1, MipsAssembler::Branch::kOffset28, 2 },  // kR6UncondBranch
   {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kR6CondBranch
                                                       // Exception: kOffset23 for beqzc/bnezc.
-  {  2, 0, 0, MipsAssembler::Branch::kOffset21, 2 },  // kR6Call
+  {  1, 0, 1, MipsAssembler::Branch::kOffset28, 2 },  // kR6Call
+  // R6 near literal.
+  {  1, 0, 0, MipsAssembler::Branch::kOffset21, 2 },  // kR6Literal
   // R6 long branches.
   {  2, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongUncondBranch
   {  3, 1, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongCondBranch
-  {  3, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongCall
+  {  2, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongCall
+  // R6 far literal.
+  {  2, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6FarLiteral
 };
 
-// Note: make sure branch_info_[] and mitBranch() are kept synchronized.
+// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
 void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) {
   CHECK_EQ(overwriting_, true);
   overwrite_location_ = branch->GetLocation();
-  uint32_t offset = branch->GetOffset();
+  uint32_t offset = branch->GetOffset(GetBranchOrPcRelBaseForEncoding(branch));
   BranchCondition condition = branch->GetCondition();
   Register lhs = branch->GetLeftRegister();
   Register rhs = branch->GetRightRegister();
@@ -1396,16 +2195,19 @@
       break;
     case Branch::kCondBranch:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      EmitBcond(condition, lhs, rhs, offset);
+      EmitBcondR2(condition, lhs, rhs, offset);
       Nop();  // TODO: improve by filling the delay slot.
       break;
     case Branch::kCall:
-      Nal();
-      Nop();  // TODO: is this NOP really needed here?
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      Addiu(lhs, RA, offset);
-      Jalr(lhs);
-      Nop();
+      Bal(offset);
+      Nop();  // TODO: improve by filling the delay slot.
+      break;
+
+    // R2 near literal.
+    case Branch::kLiteral:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lw(lhs, rhs, offset);
       break;
 
     // R2 long branches.
@@ -1443,7 +2245,7 @@
       // Note: the opposite condition branch encodes 8 as the distance, which is equal to the
       // number of instructions skipped:
       // (PUSH(IncreaseFrameSize(ADDIU) + SW) + NAL + LUI + ORI + ADDU + LW + JR).
-      EmitBcond(Branch::OppositeCondition(condition), lhs, rhs, 8);
+      EmitBcondR2(Branch::OppositeCondition(condition), lhs, rhs, 8);
       Push(RA);
       Nal();
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
@@ -1459,11 +2261,20 @@
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
       Lui(AT, High16Bits(offset));
       Ori(AT, AT, Low16Bits(offset));
-      Addu(lhs, AT, RA);
-      Jalr(lhs);
+      Addu(AT, AT, RA);
+      Jalr(AT);
       Nop();
       break;
 
+    // R2 far literal.
+    case Branch::kFarLiteral:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in lw.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lui(AT, High16Bits(offset));
+      Addu(AT, AT, rhs);
+      Lw(lhs, AT, Low16Bits(offset));
+      break;
+
     // R6 short branches.
     case Branch::kR6UncondBranch:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
@@ -1471,13 +2282,18 @@
       break;
     case Branch::kR6CondBranch:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      EmitBcondc(condition, lhs, rhs, offset);
-      Nop();  // TODO: improve by filling the forbidden slot.
+      EmitBcondR6(condition, lhs, rhs, offset);
+      Nop();  // TODO: improve by filling the forbidden/delay slot.
       break;
     case Branch::kR6Call:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      Addiupc(lhs, offset);
-      Jialc(lhs, 0);
+      Balc(offset);
+      break;
+
+    // R6 near literal.
+    case Branch::kR6Literal:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lwpc(lhs, offset);
       break;
 
     // R6 long branches.
@@ -1488,18 +2304,25 @@
       Jic(AT, Low16Bits(offset));
       break;
     case Branch::kR6LongCondBranch:
-      EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2);
+      EmitBcondR6(Branch::OppositeCondition(condition), lhs, rhs, 2);
       offset += (offset & 0x8000) << 1;  // Account for sign extension in jic.
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
       Auipc(AT, High16Bits(offset));
       Jic(AT, Low16Bits(offset));
       break;
     case Branch::kR6LongCall:
-      offset += (offset & 0x8000) << 1;  // Account for sign extension in addiu.
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in jialc.
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      Auipc(lhs, High16Bits(offset));
-      Addiu(lhs, lhs, Low16Bits(offset));
-      Jialc(lhs, 0);
+      Auipc(AT, High16Bits(offset));
+      Jialc(AT, Low16Bits(offset));
+      break;
+
+    // R6 far literal.
+    case Branch::kR6FarLiteral:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in lw.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Lw(lhs, AT, Low16Bits(offset));
       break;
   }
   CHECK_EQ(overwrite_location_, branch->GetEndLocation());
@@ -1510,8 +2333,8 @@
   Buncond(label);
 }
 
-void MipsAssembler::Jalr(MipsLabel* label, Register indirect_reg) {
-  Call(label, indirect_reg);
+void MipsAssembler::Bal(MipsLabel* label) {
+  Call(label);
 }
 
 void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label) {
@@ -1590,17 +2413,129 @@
   }
 }
 
-void MipsAssembler::LoadFromOffset(LoadOperandType type, Register reg, Register base,
-                                   int32_t offset) {
-  // IsInt<16> must be passed a signed value.
-  if (!IsInt<16>(offset) ||
-      (type == kLoadDoubleword && !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
-    LoadConst32(AT, offset);
-    Addu(AT, AT, base);
-    base = AT;
-    offset = 0;
+void MipsAssembler::Bc1f(MipsLabel* label) {
+  Bc1f(0, label);
+}
+
+void MipsAssembler::Bc1f(int cc, MipsLabel* label) {
+  CHECK(IsUint<3>(cc)) << cc;
+  Bcond(label, kCondF, static_cast<Register>(cc), ZERO);
+}
+
+void MipsAssembler::Bc1t(MipsLabel* label) {
+  Bc1t(0, label);
+}
+
+void MipsAssembler::Bc1t(int cc, MipsLabel* label) {
+  CHECK(IsUint<3>(cc)) << cc;
+  Bcond(label, kCondT, static_cast<Register>(cc), ZERO);
+}
+
+void MipsAssembler::Bc1eqz(FRegister ft, MipsLabel* label) {
+  Bcond(label, kCondF, static_cast<Register>(ft), ZERO);
+}
+
+void MipsAssembler::Bc1nez(FRegister ft, MipsLabel* label) {
+  Bcond(label, kCondT, static_cast<Register>(ft), ZERO);
+}
+
+void MipsAssembler::AdjustBaseAndOffset(Register& base,
+                                        int32_t& offset,
+                                        bool is_doubleword,
+                                        bool is_float) {
+  // This method is used to adjust the base register and offset pair
+  // for a load/store when the offset doesn't fit into int16_t.
+  // It is assumed that `base + offset` is sufficiently aligned for memory
+  // operands that are machine word in size or smaller. For doubleword-sized
+  // operands it's assumed that `base` is a multiple of 8, while `offset`
+  // may be a multiple of 4 (e.g. 4-byte-aligned long and double arguments
+  // and spilled variables on the stack accessed relative to the stack
+  // pointer register).
+  // We preserve the "alignment" of `offset` by adjusting it by a multiple of 8.
+  CHECK_NE(base, AT);  // Must not overwrite the register `base` while loading `offset`.
+
+  bool doubleword_aligned = IsAligned<kMipsDoublewordSize>(offset);
+  bool two_accesses = is_doubleword && (!is_float || !doubleword_aligned);
+
+  // IsInt<16> must be passed a signed value, hence the static cast below.
+  if (IsInt<16>(offset) &&
+      (!two_accesses || IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
+    // Nothing to do: `offset` (and, if needed, `offset + 4`) fits into int16_t.
+    return;
   }
 
+  // Remember the "(mis)alignment" of `offset`, it will be checked at the end.
+  uint32_t misalignment = offset & (kMipsDoublewordSize - 1);
+
+  // Do not load the whole 32-bit `offset` if it can be represented as
+  // a sum of two 16-bit signed offsets. This can save an instruction or two.
+  // To simplify matters, only do this for a symmetric range of offsets from
+  // about -64KB to about +64KB, allowing further addition of 4 when accessing
+  // 64-bit variables with two 32-bit accesses.
+  constexpr int32_t kMinOffsetForSimpleAdjustment = 0x7ff8;  // Max int16_t that's a multiple of 8.
+  constexpr int32_t kMaxOffsetForSimpleAdjustment = 2 * kMinOffsetForSimpleAdjustment;
+  if (0 <= offset && offset <= kMaxOffsetForSimpleAdjustment) {
+    Addiu(AT, base, kMinOffsetForSimpleAdjustment);
+    offset -= kMinOffsetForSimpleAdjustment;
+  } else if (-kMaxOffsetForSimpleAdjustment <= offset && offset < 0) {
+    Addiu(AT, base, -kMinOffsetForSimpleAdjustment);
+    offset += kMinOffsetForSimpleAdjustment;
+  } else if (IsR6()) {
+    // On R6 take advantage of the aui instruction, e.g.:
+    //   aui   AT, base, offset_high
+    //   lw    reg_lo, offset_low(AT)
+    //   lw    reg_hi, (offset_low+4)(AT)
+    // or when offset_low+4 overflows int16_t:
+    //   aui   AT, base, offset_high
+    //   addiu AT, AT, 8
+    //   lw    reg_lo, (offset_low-8)(AT)
+    //   lw    reg_hi, (offset_low-4)(AT)
+    int16_t offset_high = High16Bits(offset);
+    int16_t offset_low = Low16Bits(offset);
+    offset_high += (offset_low < 0) ? 1 : 0;  // Account for offset sign extension in load/store.
+    Aui(AT, base, offset_high);
+    if (two_accesses && !IsInt<16>(static_cast<int32_t>(offset_low + kMipsWordSize))) {
+      // Avoid overflow in the 16-bit offset of the load/store instruction when adding 4.
+      Addiu(AT, AT, kMipsDoublewordSize);
+      offset_low -= kMipsDoublewordSize;
+    }
+    offset = offset_low;
+  } else {
+    // Do not load the whole 32-bit `offset` if it can be represented as
+    // a sum of three 16-bit signed offsets. This can save an instruction.
+    // To simplify matters, only do this for a symmetric range of offsets from
+    // about -96KB to about +96KB, allowing further addition of 4 when accessing
+    // 64-bit variables with two 32-bit accesses.
+    constexpr int32_t kMinOffsetForMediumAdjustment = 2 * kMinOffsetForSimpleAdjustment;
+    constexpr int32_t kMaxOffsetForMediumAdjustment = 3 * kMinOffsetForSimpleAdjustment;
+    if (0 <= offset && offset <= kMaxOffsetForMediumAdjustment) {
+      Addiu(AT, base, kMinOffsetForMediumAdjustment / 2);
+      Addiu(AT, AT, kMinOffsetForMediumAdjustment / 2);
+      offset -= kMinOffsetForMediumAdjustment;
+    } else if (-kMaxOffsetForMediumAdjustment <= offset && offset < 0) {
+      Addiu(AT, base, -kMinOffsetForMediumAdjustment / 2);
+      Addiu(AT, AT, -kMinOffsetForMediumAdjustment / 2);
+      offset += kMinOffsetForMediumAdjustment;
+    } else {
+      // Now that all shorter options have been exhausted, load the full 32-bit offset.
+      int32_t loaded_offset = RoundDown(offset, kMipsDoublewordSize);
+      LoadConst32(AT, loaded_offset);
+      Addu(AT, AT, base);
+      offset -= loaded_offset;
+    }
+  }
+  base = AT;
+
+  CHECK(IsInt<16>(offset));
+  if (two_accesses) {
+    CHECK(IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)));
+  }
+  CHECK_EQ(misalignment, offset & (kMipsDoublewordSize - 1));
+}
+
+void MipsAssembler::LoadFromOffset(LoadOperandType type, Register reg, Register base,
+                                   int32_t offset) {
+  AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword));
   switch (type) {
     case kLoadSignedByte:
       Lb(reg, base, offset);
@@ -1634,27 +2569,12 @@
 }
 
 void MipsAssembler::LoadSFromOffset(FRegister reg, Register base, int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
-    Addu(AT, AT, base);
-    base = AT;
-    offset = 0;
-  }
-
+  AdjustBaseAndOffset(base, offset, /* is_doubleword */ false, /* is_float */ true);
   Lwc1(reg, base, offset);
 }
 
 void MipsAssembler::LoadDFromOffset(FRegister reg, Register base, int32_t offset) {
-  // IsInt<16> must be passed a signed value.
-  if (!IsInt<16>(offset) ||
-      (!IsAligned<kMipsDoublewordSize>(offset) &&
-       !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
-    LoadConst32(AT, offset);
-    Addu(AT, AT, base);
-    base = AT;
-    offset = 0;
-  }
-
+  AdjustBaseAndOffset(base, offset, /* is_doubleword */ true, /* is_float */ true);
   if (offset & 0x7) {
     if (Is32BitFPU()) {
       Lwc1(reg, base, offset);
@@ -1693,15 +2613,10 @@
 
 void MipsAssembler::StoreToOffset(StoreOperandType type, Register reg, Register base,
                                   int32_t offset) {
-  // IsInt<16> must be passed a signed value.
-  if (!IsInt<16>(offset) ||
-      (type == kStoreDoubleword && !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
-    LoadConst32(AT, offset);
-    Addu(AT, AT, base);
-    base = AT;
-    offset = 0;
-  }
-
+  // Must not use AT as `reg`, so as not to overwrite the value being stored
+  // with the adjusted `base`.
+  CHECK_NE(reg, AT);
+  AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword));
   switch (type) {
     case kStoreByte:
       Sb(reg, base, offset);
@@ -1724,27 +2639,12 @@
 }
 
 void MipsAssembler::StoreSToOffset(FRegister reg, Register base, int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
-    Addu(AT, AT, base);
-    base = AT;
-    offset = 0;
-  }
-
+  AdjustBaseAndOffset(base, offset, /* is_doubleword */ false, /* is_float */ true);
   Swc1(reg, base, offset);
 }
 
 void MipsAssembler::StoreDToOffset(FRegister reg, Register base, int32_t offset) {
-  // IsInt<16> must be passed a signed value.
-  if (!IsInt<16>(offset) ||
-      (!IsAligned<kMipsDoublewordSize>(offset) &&
-       !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
-    LoadConst32(AT, offset);
-    Addu(AT, AT, base);
-    base = AT;
-    offset = 0;
-  }
-
+  AdjustBaseAndOffset(base, offset, /* is_doubleword */ true, /* is_float */ true);
   if (offset & 0x7) {
     if (Is32BitFPU()) {
       Swc1(reg, base, offset);
@@ -1766,10 +2666,12 @@
 
 constexpr size_t kFramePointerSize = 4;
 
-void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                               const std::vector<ManagedRegister>& callee_save_regs,
+void MipsAssembler::BuildFrame(size_t frame_size,
+                               ManagedRegister method_reg,
+                               ArrayRef<const ManagedRegister> callee_save_regs,
                                const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  DCHECK(!overwriting_);
 
   // Increase frame to required size.
   IncreaseFrameSize(frame_size);
@@ -1780,7 +2682,7 @@
   cfi_.RelOffset(DWARFReg(RA), stack_offset);
   for (int i = callee_save_regs.size() - 1; i >= 0; --i) {
     stack_offset -= kFramePointerSize;
-    Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
+    Register reg = callee_save_regs[i].AsMips().AsCoreRegister();
     StoreToOffset(kStoreWord, reg, SP, stack_offset);
     cfi_.RelOffset(DWARFReg(reg), stack_offset);
   }
@@ -1809,14 +2711,15 @@
 }
 
 void MipsAssembler::RemoveFrame(size_t frame_size,
-                                const std::vector<ManagedRegister>& callee_save_regs) {
+                                ArrayRef<const ManagedRegister> callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  DCHECK(!overwriting_);
   cfi_.RememberState();
 
   // Pop callee saves and return address.
   int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
   for (size_t i = 0; i < callee_save_regs.size(); ++i) {
-    Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
+    Register reg = callee_save_regs[i].AsMips().AsCoreRegister();
     LoadFromOffset(kLoadWord, reg, SP, stack_offset);
     cfi_.Restore(DWARFReg(reg));
     stack_offset += kFramePointerSize;
@@ -1840,12 +2743,18 @@
   CHECK_ALIGNED(adjust, kFramePointerSize);
   Addiu32(SP, SP, -adjust);
   cfi_.AdjustCFAOffset(adjust);
+  if (overwriting_) {
+    cfi_.OverrideDelayedPC(overwrite_location_);
+  }
 }
 
 void MipsAssembler::DecreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kFramePointerSize);
   Addiu32(SP, SP, adjust);
   cfi_.AdjustCFAOffset(-adjust);
+  if (overwriting_) {
+    cfi_.OverrideDelayedPC(overwrite_location_);
+  }
 }
 
 void MipsAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
@@ -1890,26 +2799,17 @@
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
 }
 
-void MipsAssembler::StoreImmediateToThread32(ThreadOffset<kMipsWordSize> dest, uint32_t imm,
+void MipsAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                             FrameOffset fr_offs,
                                              ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
-  // Is this function even referenced anywhere else in the code?
-  LoadConst32(scratch.AsCoreRegister(), imm);
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), S1, dest.Int32Value());
-}
-
-void MipsAssembler::StoreStackOffsetToThread32(ThreadOffset<kMipsWordSize> thr_offs,
-                                               FrameOffset fr_offs,
-                                               ManagedRegister mscratch) {
-  MipsManagedRegister scratch = mscratch.AsMips();
-  CHECK(scratch.IsCoreRegister()) << scratch;
   Addiu32(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
                 S1, thr_offs.Int32Value());
 }
 
-void MipsAssembler::StoreStackPointerToThread32(ThreadOffset<kMipsWordSize> thr_offs) {
+void MipsAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
   StoreToOffset(kStoreWord, SP, S1, thr_offs.Int32Value());
 }
 
@@ -1926,8 +2826,7 @@
   return EmitLoad(mdest, SP, src.Int32Value(), size);
 }
 
-void MipsAssembler::LoadFromThread32(ManagedRegister mdest,
-                                     ThreadOffset<kMipsWordSize> src, size_t size) {
+void MipsAssembler::LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) {
   return EmitLoad(mdest, S1, src.Int32Value(), size);
 }
 
@@ -1955,8 +2854,7 @@
                  base.AsMips().AsCoreRegister(), offs.Int32Value());
 }
 
-void MipsAssembler::LoadRawPtrFromThread32(ManagedRegister mdest,
-                                           ThreadOffset<kMipsWordSize> offs) {
+void MipsAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) {
   MipsManagedRegister dest = mdest.AsMips();
   CHECK(dest.IsCoreRegister());
   LoadFromOffset(kLoadWord, dest.AsCoreRegister(), S1, offs.Int32Value());
@@ -2010,9 +2908,9 @@
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
 }
 
-void MipsAssembler::CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                           ThreadOffset<kMipsWordSize> thr_offs,
-                                           ManagedRegister mscratch) {
+void MipsAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                         ThreadOffset32 thr_offs,
+                                         ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
@@ -2021,9 +2919,9 @@
                 SP, fr_offs.Int32Value());
 }
 
-void MipsAssembler::CopyRawPtrToThread32(ThreadOffset<kMipsWordSize> thr_offs,
-                                         FrameOffset fr_offs,
-                                         ManagedRegister mscratch) {
+void MipsAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs,
+                                       FrameOffset fr_offs,
+                                       ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
@@ -2195,8 +3093,8 @@
   // TODO: place reference map on call.
 }
 
-void MipsAssembler::CallFromThread32(ThreadOffset<kMipsWordSize> offset ATTRIBUTE_UNUSED,
-                                     ManagedRegister mscratch ATTRIBUTE_UNUSED) {
+void MipsAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED,
+                                   ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "no mips implementation";
 }
 
@@ -2213,7 +3111,7 @@
   MipsManagedRegister scratch = mscratch.AsMips();
   exception_blocks_.emplace_back(scratch, stack_adjust);
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 S1, Thread::ExceptionOffset<kMipsWordSize>().Int32Value());
+                 S1, Thread::ExceptionOffset<kMipsPointerSize>().Int32Value());
   // TODO: on MIPS32R6 prefer Bnezc(scratch.AsCoreRegister(), slow.Entry());
   // as the NAL instruction (occurring in long R2 branches) may become deprecated.
   // For now use common for R2 and R6 instructions as this code must execute on both.
@@ -2231,7 +3129,7 @@
   Move(A0, exception->scratch_.AsCoreRegister());
   // Set up call to Thread::Current()->pDeliverException.
   LoadFromOffset(kLoadWord, T9, S1,
-    QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pDeliverException).Int32Value());
+    QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pDeliverException).Int32Value());
   Jr(T9);
   Nop();
 
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index aa187b8..41b6c6b 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -17,16 +17,20 @@
 #ifndef ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_
 #define ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_
 
+#include <deque>
 #include <utility>
 #include <vector>
 
 #include "arch/mips/instruction_set_features_mips.h"
+#include "base/arena_containers.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "constants_mips.h"
 #include "globals.h"
 #include "managed_register_mips.h"
 #include "offsets.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 #include "utils/label.h"
 
 namespace art {
@@ -51,6 +55,20 @@
   kStoreDoubleword
 };
 
+// Used to test the values returned by ClassS/ClassD.
+enum FPClassMaskType {
+  kSignalingNaN      = 0x001,
+  kQuietNaN          = 0x002,
+  kNegativeInfinity  = 0x004,
+  kNegativeNormal    = 0x008,
+  kNegativeSubnormal = 0x010,
+  kNegativeZero      = 0x020,
+  kPositiveInfinity  = 0x040,
+  kPositiveNormal    = 0x080,
+  kPositiveSubnormal = 0x100,
+  kPositiveZero      = 0x200,
+};
+
 class MipsLabel : public Label {
  public:
   MipsLabel() : prev_branch_id_plus_one_(0) {}
@@ -65,6 +83,49 @@
   DISALLOW_COPY_AND_ASSIGN(MipsLabel);
 };
 
+// Assembler literal is a value embedded in code, retrieved using a PC-relative load.
+class Literal {
+ public:
+  static constexpr size_t kMaxSize = 8;
+
+  Literal(uint32_t size, const uint8_t* data)
+      : label_(), size_(size) {
+    DCHECK_LE(size, Literal::kMaxSize);
+    memcpy(data_, data, size);
+  }
+
+  template <typename T>
+  T GetValue() const {
+    DCHECK_EQ(size_, sizeof(T));
+    T value;
+    memcpy(&value, data_, sizeof(T));
+    return value;
+  }
+
+  uint32_t GetSize() const {
+    return size_;
+  }
+
+  const uint8_t* GetData() const {
+    return data_;
+  }
+
+  MipsLabel* GetLabel() {
+    return &label_;
+  }
+
+  const MipsLabel* GetLabel() const {
+    return &label_;
+  }
+
+ private:
+  MipsLabel label_;
+  const uint32_t size_;
+  uint8_t data_[kMaxSize];
+
+  DISALLOW_COPY_AND_ASSIGN(Literal);
+};
+
 // Slowpath entered when Thread::Current()->_exception is non-null.
 class MipsExceptionSlowPath {
  public:
@@ -72,8 +133,8 @@
       : scratch_(scratch), stack_adjust_(stack_adjust) {}
 
   MipsExceptionSlowPath(MipsExceptionSlowPath&& src)
-      : scratch_(std::move(src.scratch_)),
-        stack_adjust_(std::move(src.stack_adjust_)),
+      : scratch_(src.scratch_),
+        stack_adjust_(src.stack_adjust_),
         exception_entry_(std::move(src.exception_entry_)) {}
 
  private:
@@ -86,15 +147,23 @@
   DISALLOW_COPY_AND_ASSIGN(MipsExceptionSlowPath);
 };
 
-class MipsAssembler FINAL : public Assembler {
+class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k32> {
  public:
-  explicit MipsAssembler(const MipsInstructionSetFeatures* instruction_set_features = nullptr)
-      : overwriting_(false),
+  explicit MipsAssembler(ArenaAllocator* arena,
+                         const MipsInstructionSetFeatures* instruction_set_features = nullptr)
+      : Assembler(arena),
+        overwriting_(false),
         overwrite_location_(0),
+        literals_(arena->Adapter(kArenaAllocAssembler)),
         last_position_adjustment_(0),
         last_old_position_(0),
         last_branch_id_(0),
-        isa_features_(instruction_set_features) {}
+        isa_features_(instruction_set_features) {
+    cfi().DelayEmittingAdvancePCs();
+  }
+
+  size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); }
+  DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
 
   virtual ~MipsAssembler() {
     for (auto& branch : branches_) {
@@ -117,6 +186,7 @@
   void DivuR2(Register rd, Register rs, Register rt);  // R2
   void ModuR2(Register rd, Register rs, Register rt);  // R2
   void MulR6(Register rd, Register rs, Register rt);  // R6
+  void MuhR6(Register rd, Register rs, Register rt);  // R6
   void MuhuR6(Register rd, Register rs, Register rt);  // R6
   void DivR6(Register rd, Register rs, Register rt);  // R6
   void ModR6(Register rd, Register rs, Register rt);  // R6
@@ -131,22 +201,41 @@
   void Xori(Register rt, Register rs, uint16_t imm16);
   void Nor(Register rd, Register rs, Register rt);
 
+  void Movz(Register rd, Register rs, Register rt);  // R2
+  void Movn(Register rd, Register rs, Register rt);  // R2
+  void Seleqz(Register rd, Register rs, Register rt);  // R6
+  void Selnez(Register rd, Register rs, Register rt);  // R6
+  void ClzR6(Register rd, Register rs);
+  void ClzR2(Register rd, Register rs);
+  void CloR6(Register rd, Register rs);
+  void CloR2(Register rd, Register rs);
+
   void Seb(Register rd, Register rt);  // R2+
   void Seh(Register rd, Register rt);  // R2+
+  void Wsbh(Register rd, Register rt);  // R2+
+  void Bitswap(Register rd, Register rt);  // R6
 
   void Sll(Register rd, Register rt, int shamt);
   void Srl(Register rd, Register rt, int shamt);
+  void Rotr(Register rd, Register rt, int shamt);  // R2+
   void Sra(Register rd, Register rt, int shamt);
   void Sllv(Register rd, Register rt, Register rs);
   void Srlv(Register rd, Register rt, Register rs);
+  void Rotrv(Register rd, Register rt, Register rs);  // R2+
   void Srav(Register rd, Register rt, Register rs);
+  void Ext(Register rd, Register rt, int pos, int size);  // R2+
+  void Ins(Register rd, Register rt, int pos, int size);  // R2+
 
   void Lb(Register rt, Register rs, uint16_t imm16);
   void Lh(Register rt, Register rs, uint16_t imm16);
   void Lw(Register rt, Register rs, uint16_t imm16);
+  void Lwl(Register rt, Register rs, uint16_t imm16);
+  void Lwr(Register rt, Register rs, uint16_t imm16);
   void Lbu(Register rt, Register rs, uint16_t imm16);
   void Lhu(Register rt, Register rs, uint16_t imm16);
+  void Lwpc(Register rs, uint32_t imm19);  // R6
   void Lui(Register rt, uint16_t imm16);
+  void Aui(Register rt, Register rs, uint16_t imm16);  // R6
   void Sync(uint32_t stype);
   void Mfhi(Register rd);  // R2
   void Mflo(Register rd);  // R2
@@ -154,6 +243,13 @@
   void Sb(Register rt, Register rs, uint16_t imm16);
   void Sh(Register rt, Register rs, uint16_t imm16);
   void Sw(Register rt, Register rs, uint16_t imm16);
+  void Swl(Register rt, Register rs, uint16_t imm16);
+  void Swr(Register rt, Register rs, uint16_t imm16);
+
+  void LlR2(Register rt, Register base, int16_t imm16 = 0);
+  void ScR2(Register rt, Register base, int16_t imm16 = 0);
+  void LlR6(Register rt, Register base, int16_t imm9 = 0);
+  void ScR6(Register rt, Register base, int16_t imm9 = 0);
 
   void Slt(Register rd, Register rs, Register rt);
   void Sltu(Register rd, Register rs, Register rt);
@@ -161,6 +257,7 @@
   void Sltiu(Register rt, Register rs, uint16_t imm16);
 
   void B(uint16_t imm16);
+  void Bal(uint16_t imm16);
   void Beq(Register rs, Register rt, uint16_t imm16);
   void Bne(Register rs, Register rt, uint16_t imm16);
   void Beqz(Register rt, uint16_t imm16);
@@ -169,6 +266,10 @@
   void Bgez(Register rt, uint16_t imm16);
   void Blez(Register rt, uint16_t imm16);
   void Bgtz(Register rt, uint16_t imm16);
+  void Bc1f(uint16_t imm16);  // R2
+  void Bc1f(int cc, uint16_t imm16);  // R2
+  void Bc1t(uint16_t imm16);  // R2
+  void Bc1t(int cc, uint16_t imm16);  // R2
   void J(uint32_t addr26);
   void Jal(uint32_t addr26);
   void Jalr(Register rd, Register rs);
@@ -178,6 +279,7 @@
   void Auipc(Register rs, uint16_t imm16);  // R6
   void Addiupc(Register rs, uint32_t imm19);  // R6
   void Bc(uint32_t imm26);  // R6
+  void Balc(uint32_t imm26);  // R6
   void Jic(Register rt, uint16_t imm16);  // R6
   void Jialc(Register rt, uint16_t imm16);  // R6
   void Bltc(Register rs, Register rt, uint16_t imm16);  // R6
@@ -192,6 +294,8 @@
   void Bnec(Register rs, Register rt, uint16_t imm16);  // R6
   void Beqzc(Register rs, uint32_t imm21);  // R6
   void Bnezc(Register rs, uint32_t imm21);  // R6
+  void Bc1eqz(FRegister ft, uint16_t imm16);  // R6
+  void Bc1nez(FRegister ft, uint16_t imm16);  // R6
 
   void AddS(FRegister fd, FRegister fs, FRegister ft);
   void SubS(FRegister fd, FRegister fs, FRegister ft);
@@ -201,20 +305,97 @@
   void SubD(FRegister fd, FRegister fs, FRegister ft);
   void MulD(FRegister fd, FRegister fs, FRegister ft);
   void DivD(FRegister fd, FRegister fs, FRegister ft);
+  void SqrtS(FRegister fd, FRegister fs);
+  void SqrtD(FRegister fd, FRegister fs);
+  void AbsS(FRegister fd, FRegister fs);
+  void AbsD(FRegister fd, FRegister fs);
   void MovS(FRegister fd, FRegister fs);
   void MovD(FRegister fd, FRegister fs);
   void NegS(FRegister fd, FRegister fs);
   void NegD(FRegister fd, FRegister fs);
 
+  void CunS(FRegister fs, FRegister ft);  // R2
+  void CunS(int cc, FRegister fs, FRegister ft);  // R2
+  void CeqS(FRegister fs, FRegister ft);  // R2
+  void CeqS(int cc, FRegister fs, FRegister ft);  // R2
+  void CueqS(FRegister fs, FRegister ft);  // R2
+  void CueqS(int cc, FRegister fs, FRegister ft);  // R2
+  void ColtS(FRegister fs, FRegister ft);  // R2
+  void ColtS(int cc, FRegister fs, FRegister ft);  // R2
+  void CultS(FRegister fs, FRegister ft);  // R2
+  void CultS(int cc, FRegister fs, FRegister ft);  // R2
+  void ColeS(FRegister fs, FRegister ft);  // R2
+  void ColeS(int cc, FRegister fs, FRegister ft);  // R2
+  void CuleS(FRegister fs, FRegister ft);  // R2
+  void CuleS(int cc, FRegister fs, FRegister ft);  // R2
+  void CunD(FRegister fs, FRegister ft);  // R2
+  void CunD(int cc, FRegister fs, FRegister ft);  // R2
+  void CeqD(FRegister fs, FRegister ft);  // R2
+  void CeqD(int cc, FRegister fs, FRegister ft);  // R2
+  void CueqD(FRegister fs, FRegister ft);  // R2
+  void CueqD(int cc, FRegister fs, FRegister ft);  // R2
+  void ColtD(FRegister fs, FRegister ft);  // R2
+  void ColtD(int cc, FRegister fs, FRegister ft);  // R2
+  void CultD(FRegister fs, FRegister ft);  // R2
+  void CultD(int cc, FRegister fs, FRegister ft);  // R2
+  void ColeD(FRegister fs, FRegister ft);  // R2
+  void ColeD(int cc, FRegister fs, FRegister ft);  // R2
+  void CuleD(FRegister fs, FRegister ft);  // R2
+  void CuleD(int cc, FRegister fs, FRegister ft);  // R2
+  void CmpUnS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpEqS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUeqS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpLtS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUltS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpLeS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUleS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpOrS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUneS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpNeS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUnD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpEqD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUeqD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpLtD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUltD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpLeD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUleD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpOrD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUneD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpNeD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void Movf(Register rd, Register rs, int cc = 0);  // R2
+  void Movt(Register rd, Register rs, int cc = 0);  // R2
+  void MovfS(FRegister fd, FRegister fs, int cc = 0);  // R2
+  void MovfD(FRegister fd, FRegister fs, int cc = 0);  // R2
+  void MovtS(FRegister fd, FRegister fs, int cc = 0);  // R2
+  void MovtD(FRegister fd, FRegister fs, int cc = 0);  // R2
+  void SelS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void SelD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void ClassS(FRegister fd, FRegister fs);  // R6
+  void ClassD(FRegister fd, FRegister fs);  // R6
+  void MinS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void MinD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void MaxS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void MaxD(FRegister fd, FRegister fs, FRegister ft);  // R6
+
+  void TruncLS(FRegister fd, FRegister fs);  // R2+, FR=1
+  void TruncLD(FRegister fd, FRegister fs);  // R2+, FR=1
+  void TruncWS(FRegister fd, FRegister fs);
+  void TruncWD(FRegister fd, FRegister fs);
   void Cvtsw(FRegister fd, FRegister fs);
   void Cvtdw(FRegister fd, FRegister fs);
   void Cvtsd(FRegister fd, FRegister fs);
   void Cvtds(FRegister fd, FRegister fs);
+  void Cvtsl(FRegister fd, FRegister fs);  // R2+, FR=1
+  void Cvtdl(FRegister fd, FRegister fs);  // R2+, FR=1
+  void FloorWS(FRegister fd, FRegister fs);
+  void FloorWD(FRegister fd, FRegister fs);
 
   void Mfc1(Register rt, FRegister fs);
   void Mtc1(Register rt, FRegister fs);
   void Mfhc1(Register rt, FRegister fs);
   void Mthc1(Register rt, FRegister fs);
+  void MoveFromFpuHigh(Register rt, FRegister fs);
+  void MoveToFpuHigh(Register rt, FRegister fs);
   void Lwc1(FRegister ft, Register rs, uint16_t imm16);
   void Ldc1(FRegister ft, Register rs, uint16_t imm16);
   void Swc1(FRegister ft, Register rs, uint16_t imm16);
@@ -238,7 +419,7 @@
   // These will generate R2 branches or R6 branches as appropriate.
   void Bind(MipsLabel* label);
   void B(MipsLabel* label);
-  void Jalr(MipsLabel* label, Register indirect_reg);
+  void Bal(MipsLabel* label);
   void Beq(Register rs, Register rt, MipsLabel* label);
   void Bne(Register rs, Register rt, MipsLabel* label);
   void Beqz(Register rt, MipsLabel* label);
@@ -251,8 +432,18 @@
   void Bge(Register rs, Register rt, MipsLabel* label);
   void Bltu(Register rs, Register rt, MipsLabel* label);
   void Bgeu(Register rs, Register rt, MipsLabel* label);
+  void Bc1f(MipsLabel* label);  // R2
+  void Bc1f(int cc, MipsLabel* label);  // R2
+  void Bc1t(MipsLabel* label);  // R2
+  void Bc1t(int cc, MipsLabel* label);  // R2
+  void Bc1eqz(FRegister ft, MipsLabel* label);  // R6
+  void Bc1nez(FRegister ft, MipsLabel* label);  // R6
 
   void EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset, size_t size);
+  void AdjustBaseAndOffset(Register& base,
+                           int32_t& offset,
+                           bool is_doubleword,
+                           bool is_float = false);
   void LoadFromOffset(LoadOperandType type, Register reg, Register base, int32_t offset);
   void LoadSFromOffset(FRegister reg, Register base, int32_t offset);
   void LoadDFromOffset(FRegister reg, Register base, int32_t offset);
@@ -275,17 +466,33 @@
     UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS";
   }
 
+  // Create a new literal with a given value.
+  // NOTE: Force the template parameter to be explicitly specified.
+  template <typename T>
+  Literal* NewLiteral(typename Identity<T>::type value) {
+    static_assert(std::is_integral<T>::value, "T must be an integral type.");
+    return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
+  }
+
+  // Create a new literal with the given data.
+  Literal* NewLiteral(size_t size, const uint8_t* data);
+
+  // Load literal using the base register (for R2 only) or using PC-relative loads
+  // (for R6 only; base_reg must be ZERO).
+  void LoadLiteral(Register dest_reg, Register base_reg, Literal* literal);
+
   //
   // Overridden common assembler high-level functionality.
   //
 
   // Emit code that will create an activation on the stack.
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
   // Emit code that will remove an activation from the stack.
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
       OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
@@ -298,58 +505,79 @@
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
 
-  void StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm, ManagedRegister mscratch)
-      OVERRIDE;
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackOffsetToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister mscratch) OVERRIDE;
+  void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE;
 
-  void StoreStackPointerToThread32(ThreadOffset<4> thr_offs) OVERRIDE;
-
-  void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
+  void StoreSpanning(FrameOffset dest,
+                     ManagedRegister msrc,
+                     FrameOffset in_off,
                      ManagedRegister mscratch) OVERRIDE;
 
   // Load routines.
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
-  void LoadFromThread32(ManagedRegister mdest, ThreadOffset<4> src, size_t size) OVERRIDE;
+  void LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
-  void LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
+  void LoadRef(ManagedRegister mdest,
+               ManagedRegister base,
+               MemberOffset offs,
                bool unpoison_reference) OVERRIDE;
 
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
-  void LoadRawPtrFromThread32(ManagedRegister mdest, ThreadOffset<4> offs) OVERRIDE;
+  void LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) OVERRIDE;
 
   // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset<4> thr_offs,
-                              ManagedRegister mscratch) OVERRIDE;
-
-  void CopyRawPtrToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs,
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset32 thr_offs,
                             ManagedRegister mscratch) OVERRIDE;
 
+  void CopyRawPtrToThread(ThreadOffset32 thr_offs,
+                          FrameOffset fr_offs,
+                          ManagedRegister mscratch) OVERRIDE;
+
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
 
   void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE;
 
-  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister mscratch,
+  void Copy(FrameOffset dest,
+            ManagedRegister src_base,
+            Offset src_offset,
+            ManagedRegister mscratch,
             size_t size) OVERRIDE;
 
-  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-            ManagedRegister mscratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister mscratch,
+  void Copy(ManagedRegister dest_base,
+            Offset dest_offset,
+            FrameOffset src,
+            ManagedRegister mscratch,
             size_t size) OVERRIDE;
 
-  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
-            ManagedRegister mscratch, size_t size) OVERRIDE;
+  void Copy(FrameOffset dest,
+            FrameOffset src_base,
+            Offset src_offset,
+            ManagedRegister mscratch,
+            size_t size) OVERRIDE;
 
-  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-            ManagedRegister mscratch, size_t size) OVERRIDE;
+  void Copy(ManagedRegister dest,
+            Offset dest_offset,
+            ManagedRegister src,
+            Offset src_offset,
+            ManagedRegister mscratch,
+            size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest,
+            Offset dest_offset,
+            FrameOffset src,
+            Offset src_offset,
+            ManagedRegister mscratch,
+            size_t size) OVERRIDE;
 
   void MemoryBarrier(ManagedRegister) OVERRIDE;
 
@@ -367,13 +595,17 @@
   // value is null and null_allowed. in_reg holds a possibly stale reference
   // that can be used to avoid loading the handle scope entry to see if the value is
   // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
+  void CreateHandleScopeEntry(ManagedRegister out_reg,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister in_reg,
+                              bool null_allowed) OVERRIDE;
 
   // Set up out_off to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                              ManagedRegister mscratch, bool null_allowed) OVERRIDE;
+  void CreateHandleScopeEntry(FrameOffset out_off,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister mscratch,
+                              bool null_allowed) OVERRIDE;
 
   // src holds a handle scope entry (Object**) load this into dst.
   void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
@@ -386,7 +618,7 @@
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
-  void CallFromThread32(ThreadOffset<4> offset, ManagedRegister mscratch) OVERRIDE;
+  void CallFromThread(ThreadOffset32 offset, ManagedRegister mscratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
@@ -400,12 +632,25 @@
 
   // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS,
   // must be used instead of MipsLabel::GetPosition()).
-  uint32_t GetLabelLocation(MipsLabel* label) const;
+  uint32_t GetLabelLocation(const MipsLabel* label) const;
 
   // Get the final position of a label after local fixup based on the old position
   // recorded before FinalizeCode().
   uint32_t GetAdjustedPosition(uint32_t old_position);
 
+  // R2 doesn't have PC-relative addressing, which we need to access literals. We simulate it by
+  // reading the PC value into a general-purpose register with the NAL instruction and then loading
+  // literals through this base register. The code generator calls this method (at most once per
+  // method being compiled) to bind a label to the location for which the PC value is acquired.
+  // The assembler then computes literal offsets relative to this label.
+  void BindPcRelBaseLabel();
+
+  // Returns the location of the label bound with BindPcRelBaseLabel().
+  uint32_t GetPcRelBaseLabelLocation() const;
+
+  // Note that PC-relative literal loads are handled as pseudo branches because they need very
+  // similar relocation and may similarly expand in size to accomodate for larger offsets relative
+  // to PC.
   enum BranchCondition {
     kCondLT,
     kCondGE,
@@ -421,6 +666,8 @@
     kCondNEZ,
     kCondLTU,
     kCondGEU,
+    kCondF,    // Floating-point predicate false.
+    kCondT,    // Floating-point predicate true.
     kUncond,
   };
   friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs);
@@ -433,18 +680,26 @@
       kUncondBranch,
       kCondBranch,
       kCall,
+      // R2 near literal.
+      kLiteral,
       // R2 long branches.
       kLongUncondBranch,
       kLongCondBranch,
       kLongCall,
+      // R2 far literal.
+      kFarLiteral,
       // R6 short branches.
       kR6UncondBranch,
       kR6CondBranch,
       kR6Call,
+      // R6 near literal.
+      kR6Literal,
       // R6 long branches.
       kR6LongUncondBranch,
       kR6LongCondBranch,
       kR6LongCall,
+      // R6 far literal.
+      kR6FarLiteral,
     };
     // Bit sizes of offsets defined as enums to minimize chance of typos.
     enum OffsetBits {
@@ -479,17 +734,17 @@
     };
     static const BranchInfo branch_info_[/* Type */];
 
-    // Unconditional branch.
-    Branch(bool is_r6, uint32_t location, uint32_t target);
+    // Unconditional branch or call.
+    Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call);
     // Conditional branch.
     Branch(bool is_r6,
            uint32_t location,
            uint32_t target,
            BranchCondition condition,
            Register lhs_reg,
-           Register rhs_reg = ZERO);
-    // Call (branch and link) that stores the target address in a given register (i.e. T9).
-    Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg);
+           Register rhs_reg);
+    // Literal.
+    Branch(bool is_r6, uint32_t location, Register dest_reg, Register base_reg);
 
     // Some conditional branches with lhs = rhs are effectively NOPs, while some
     // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs.
@@ -527,7 +782,22 @@
     //
     // Composite branches (made of several instructions) with longer reach have 32-bit
     // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first).
-    // The composite branches cover the range of PC + +/-2GB.
+    // The composite branches cover the range of PC + +/-2GB on MIPS32 CPUs. However,
+    // the range is not end-to-end on MIPS64 (unless addresses are forced to zero- or
+    // sign-extend from 32 to 64 bits by the appropriate CPU configuration).
+    // Consider the following implementation of a long unconditional branch, for
+    // example:
+    //
+    //   auipc at, offset_31_16  // at = pc + sign_extend(offset_31_16) << 16
+    //   jic   at, offset_15_0   // pc = at + sign_extend(offset_15_0)
+    //
+    // Both of the above instructions take 16-bit signed offsets as immediate operands.
+    // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000
+    // due to sign extension. This must be compensated for by incrementing offset_31_16
+    // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is
+    // 0x7FFF, adding 1 will overflow the positive offset into the negative range.
+    // Therefore, the long branch range is something like from PC - 0x80000000 to
+    // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side.
     //
     // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special
     // case with the addiu instruction and a 16 bit offset.
@@ -550,31 +820,32 @@
     // that is allowed for short branches. This is for debugging/testing purposes.
     // max_short_distance = 0 forces all short branches to become long.
     // Use the implicit default argument when not debugging/testing.
-    uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max());
+    uint32_t PromoteIfNeeded(uint32_t location,
+                             uint32_t max_short_distance = std::numeric_limits<uint32_t>::max());
 
     // Returns the location of the instruction(s) containing the offset.
     uint32_t GetOffsetLocation() const;
 
     // Calculates and returns the offset ready for encoding in the branch instruction(s).
-    uint32_t GetOffset() const;
+    uint32_t GetOffset(uint32_t location) const;
 
    private:
     // Completes branch construction by determining and recording its type.
-    void InitializeType(bool is_call, bool is_r6);
+    void InitializeType(bool is_call, bool is_literal, bool is_r6);
     // Helper for the above.
     void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
 
-    uint32_t old_location_;          // Offset into assembler buffer in bytes.
-    uint32_t location_;              // Offset into assembler buffer in bytes.
-    uint32_t target_;                // Offset into assembler buffer in bytes.
+    uint32_t old_location_;      // Offset into assembler buffer in bytes.
+    uint32_t location_;          // Offset into assembler buffer in bytes.
+    uint32_t target_;            // Offset into assembler buffer in bytes.
 
-    uint32_t lhs_reg_ : 5;           // Left-hand side register in conditional branches or
-                                     // indirect call register.
-    uint32_t rhs_reg_ : 5;           // Right-hand side register in conditional branches.
-    BranchCondition condition_ : 5;  // Condition for conditional branches.
+    uint32_t lhs_reg_;           // Left-hand side register in conditional branches or
+                                 // indirect call register.
+    uint32_t rhs_reg_;           // Right-hand side register in conditional branches.
+    BranchCondition condition_;  // Condition for conditional branches.
 
-    Type type_ : 5;                  // Current type of the branch.
-    Type old_type_ : 5;              // Initial type of the branch.
+    Type type_;                  // Current type of the branch.
+    Type old_type_;              // Initial type of the branch.
   };
   friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs);
   friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs);
@@ -585,20 +856,24 @@
   void EmitI26(int opcode, uint32_t imm26);
   void EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd, int funct);
   void EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm);
-  void EmitBcond(BranchCondition cond, Register rs, Register rt, uint16_t imm16);
-  void EmitBcondc(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21);  // R6
+  void EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16);
+  void EmitBcondR6(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21);
 
   void Buncond(MipsLabel* label);
   void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO);
-  void Call(MipsLabel* label, Register indirect_reg);
+  void Call(MipsLabel* label);
   void FinalizeLabeledBranch(MipsLabel* label);
 
   Branch* GetBranch(uint32_t branch_id);
   const Branch* GetBranch(uint32_t branch_id) const;
+  uint32_t GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const;
+  uint32_t GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const;
 
+  void EmitLiterals();
   void PromoteBranches();
   void EmitBranch(Branch* branch);
   void EmitBranches();
+  void PatchCFI(size_t number_of_delayed_adjust_pcs);
 
   // Emits exception block.
   void EmitExceptionPoll(MipsExceptionSlowPath* exception);
@@ -629,6 +904,15 @@
   // The current overwrite location.
   uint32_t overwrite_location_;
 
+  // Use std::deque<> for literal labels to allow insertions at the end
+  // without invalidating pointers and references to existing elements.
+  ArenaDeque<Literal> literals_;
+
+  // There's no PC-relative addressing on MIPS32R2. So, in order to access literals relative to PC
+  // we get PC using the NAL instruction. This label marks the position within the assembler buffer
+  // that PC (from NAL) points to.
+  MipsLabel pc_rel_base_label_;
+
   // Data for AdjustedPosition(), see the description there.
   uint32_t last_position_adjustment_;
   uint32_t last_old_position_;
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
new file mode 100644
index 0000000..49ef272
--- /dev/null
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -0,0 +1,714 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_mips.h"
+
+#include <map>
+
+#include "base/stl_util.h"
+#include "utils/assembler_test.h"
+
+#define __ GetAssembler()->
+
+namespace art {
+
+struct MIPSCpuRegisterCompare {
+  bool operator()(const mips::Register& a, const mips::Register& b) const {
+    return a < b;
+  }
+};
+
+class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler,
+                                                   mips::Register,
+                                                   mips::FRegister,
+                                                   uint32_t> {
+ public:
+  typedef AssemblerTest<mips::MipsAssembler, mips::Register, mips::FRegister, uint32_t> Base;
+
+  AssemblerMIPS32r6Test() :
+    instruction_set_features_(MipsInstructionSetFeatures::FromVariant("mips32r6", nullptr)) {
+  }
+
+ protected:
+  // Get the typically used name for this architecture, e.g., aarch64, x86-64, ...
+  std::string GetArchitectureString() OVERRIDE {
+    return "mips";
+  }
+
+  std::string GetAssemblerCmdName() OVERRIDE {
+    // We assemble and link for MIPS32R6. See GetAssemblerParameters() for details.
+    return "gcc";
+  }
+
+  std::string GetAssemblerParameters() OVERRIDE {
+    // We assemble and link for MIPS32R6. The reason is that object files produced for MIPS32R6
+    // (and MIPS64R6) with the GNU assembler don't have correct final offsets in PC-relative
+    // branches in the .text section and so they require a relocation pass (there's a relocation
+    // section, .rela.text, that has the needed info to fix up the branches).
+    // We use "-modd-spreg" so we can use odd-numbered single precision FPU registers.
+    // We put the code at address 0x1000000 (instead of 0) to avoid overlapping with the
+    // .MIPS.abiflags section (there doesn't seem to be a way to suppress its generation easily).
+    return " -march=mips32r6 -modd-spreg -Wa,--no-warn"
+        " -Wl,-Ttext=0x1000000 -Wl,-e0x1000000 -nostdlib";
+  }
+
+  void Pad(std::vector<uint8_t>& data) OVERRIDE {
+    // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple
+    // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't
+    // pad, so, in order for two assembler outputs to match, we need to match the padding as well.
+    // NOP is encoded as four zero bytes on MIPS.
+    size_t pad_size = RoundUp(data.size(), 16u) - data.size();
+    data.insert(data.end(), pad_size, 0);
+  }
+
+  std::string GetDisassembleParameters() OVERRIDE {
+    return " -D -bbinary -mmips:isa32r6";
+  }
+
+  mips::MipsAssembler* CreateAssembler(ArenaAllocator* arena) OVERRIDE {
+    return new (arena) mips::MipsAssembler(arena, instruction_set_features_.get());
+  }
+
+  void SetUpHelpers() OVERRIDE {
+    if (registers_.size() == 0) {
+      registers_.push_back(new mips::Register(mips::ZERO));
+      registers_.push_back(new mips::Register(mips::AT));
+      registers_.push_back(new mips::Register(mips::V0));
+      registers_.push_back(new mips::Register(mips::V1));
+      registers_.push_back(new mips::Register(mips::A0));
+      registers_.push_back(new mips::Register(mips::A1));
+      registers_.push_back(new mips::Register(mips::A2));
+      registers_.push_back(new mips::Register(mips::A3));
+      registers_.push_back(new mips::Register(mips::T0));
+      registers_.push_back(new mips::Register(mips::T1));
+      registers_.push_back(new mips::Register(mips::T2));
+      registers_.push_back(new mips::Register(mips::T3));
+      registers_.push_back(new mips::Register(mips::T4));
+      registers_.push_back(new mips::Register(mips::T5));
+      registers_.push_back(new mips::Register(mips::T6));
+      registers_.push_back(new mips::Register(mips::T7));
+      registers_.push_back(new mips::Register(mips::S0));
+      registers_.push_back(new mips::Register(mips::S1));
+      registers_.push_back(new mips::Register(mips::S2));
+      registers_.push_back(new mips::Register(mips::S3));
+      registers_.push_back(new mips::Register(mips::S4));
+      registers_.push_back(new mips::Register(mips::S5));
+      registers_.push_back(new mips::Register(mips::S6));
+      registers_.push_back(new mips::Register(mips::S7));
+      registers_.push_back(new mips::Register(mips::T8));
+      registers_.push_back(new mips::Register(mips::T9));
+      registers_.push_back(new mips::Register(mips::K0));
+      registers_.push_back(new mips::Register(mips::K1));
+      registers_.push_back(new mips::Register(mips::GP));
+      registers_.push_back(new mips::Register(mips::SP));
+      registers_.push_back(new mips::Register(mips::FP));
+      registers_.push_back(new mips::Register(mips::RA));
+
+      secondary_register_names_.emplace(mips::Register(mips::ZERO), "zero");
+      secondary_register_names_.emplace(mips::Register(mips::AT), "at");
+      secondary_register_names_.emplace(mips::Register(mips::V0), "v0");
+      secondary_register_names_.emplace(mips::Register(mips::V1), "v1");
+      secondary_register_names_.emplace(mips::Register(mips::A0), "a0");
+      secondary_register_names_.emplace(mips::Register(mips::A1), "a1");
+      secondary_register_names_.emplace(mips::Register(mips::A2), "a2");
+      secondary_register_names_.emplace(mips::Register(mips::A3), "a3");
+      secondary_register_names_.emplace(mips::Register(mips::T0), "t0");
+      secondary_register_names_.emplace(mips::Register(mips::T1), "t1");
+      secondary_register_names_.emplace(mips::Register(mips::T2), "t2");
+      secondary_register_names_.emplace(mips::Register(mips::T3), "t3");
+      secondary_register_names_.emplace(mips::Register(mips::T4), "t4");
+      secondary_register_names_.emplace(mips::Register(mips::T5), "t5");
+      secondary_register_names_.emplace(mips::Register(mips::T6), "t6");
+      secondary_register_names_.emplace(mips::Register(mips::T7), "t7");
+      secondary_register_names_.emplace(mips::Register(mips::S0), "s0");
+      secondary_register_names_.emplace(mips::Register(mips::S1), "s1");
+      secondary_register_names_.emplace(mips::Register(mips::S2), "s2");
+      secondary_register_names_.emplace(mips::Register(mips::S3), "s3");
+      secondary_register_names_.emplace(mips::Register(mips::S4), "s4");
+      secondary_register_names_.emplace(mips::Register(mips::S5), "s5");
+      secondary_register_names_.emplace(mips::Register(mips::S6), "s6");
+      secondary_register_names_.emplace(mips::Register(mips::S7), "s7");
+      secondary_register_names_.emplace(mips::Register(mips::T8), "t8");
+      secondary_register_names_.emplace(mips::Register(mips::T9), "t9");
+      secondary_register_names_.emplace(mips::Register(mips::K0), "k0");
+      secondary_register_names_.emplace(mips::Register(mips::K1), "k1");
+      secondary_register_names_.emplace(mips::Register(mips::GP), "gp");
+      secondary_register_names_.emplace(mips::Register(mips::SP), "sp");
+      secondary_register_names_.emplace(mips::Register(mips::FP), "fp");
+      secondary_register_names_.emplace(mips::Register(mips::RA), "ra");
+
+      fp_registers_.push_back(new mips::FRegister(mips::F0));
+      fp_registers_.push_back(new mips::FRegister(mips::F1));
+      fp_registers_.push_back(new mips::FRegister(mips::F2));
+      fp_registers_.push_back(new mips::FRegister(mips::F3));
+      fp_registers_.push_back(new mips::FRegister(mips::F4));
+      fp_registers_.push_back(new mips::FRegister(mips::F5));
+      fp_registers_.push_back(new mips::FRegister(mips::F6));
+      fp_registers_.push_back(new mips::FRegister(mips::F7));
+      fp_registers_.push_back(new mips::FRegister(mips::F8));
+      fp_registers_.push_back(new mips::FRegister(mips::F9));
+      fp_registers_.push_back(new mips::FRegister(mips::F10));
+      fp_registers_.push_back(new mips::FRegister(mips::F11));
+      fp_registers_.push_back(new mips::FRegister(mips::F12));
+      fp_registers_.push_back(new mips::FRegister(mips::F13));
+      fp_registers_.push_back(new mips::FRegister(mips::F14));
+      fp_registers_.push_back(new mips::FRegister(mips::F15));
+      fp_registers_.push_back(new mips::FRegister(mips::F16));
+      fp_registers_.push_back(new mips::FRegister(mips::F17));
+      fp_registers_.push_back(new mips::FRegister(mips::F18));
+      fp_registers_.push_back(new mips::FRegister(mips::F19));
+      fp_registers_.push_back(new mips::FRegister(mips::F20));
+      fp_registers_.push_back(new mips::FRegister(mips::F21));
+      fp_registers_.push_back(new mips::FRegister(mips::F22));
+      fp_registers_.push_back(new mips::FRegister(mips::F23));
+      fp_registers_.push_back(new mips::FRegister(mips::F24));
+      fp_registers_.push_back(new mips::FRegister(mips::F25));
+      fp_registers_.push_back(new mips::FRegister(mips::F26));
+      fp_registers_.push_back(new mips::FRegister(mips::F27));
+      fp_registers_.push_back(new mips::FRegister(mips::F28));
+      fp_registers_.push_back(new mips::FRegister(mips::F29));
+      fp_registers_.push_back(new mips::FRegister(mips::F30));
+      fp_registers_.push_back(new mips::FRegister(mips::F31));
+    }
+  }
+
+  void TearDown() OVERRIDE {
+    AssemblerTest::TearDown();
+    STLDeleteElements(&registers_);
+    STLDeleteElements(&fp_registers_);
+  }
+
+  std::vector<mips::Register*> GetRegisters() OVERRIDE {
+    return registers_;
+  }
+
+  std::vector<mips::FRegister*> GetFPRegisters() OVERRIDE {
+    return fp_registers_;
+  }
+
+  uint32_t CreateImmediate(int64_t imm_value) OVERRIDE {
+    return imm_value;
+  }
+
+  std::string GetSecondaryRegisterName(const mips::Register& reg) OVERRIDE {
+    CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end());
+    return secondary_register_names_[reg];
+  }
+
+  std::string RepeatInsn(size_t count, const std::string& insn) {
+    std::string result;
+    for (; count != 0u; --count) {
+      result += insn;
+    }
+    return result;
+  }
+
+  void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register,
+                                                              mips::Register,
+                                                              mips::MipsLabel*),
+                               std::string instr_name) {
+    mips::MipsLabel label;
+    (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $a0, $a1, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $a2, $a3, 1b\n"
+        "nop\n";
+    DriverStr(expected, instr_name);
+  }
+
+ private:
+  std::vector<mips::Register*> registers_;
+  std::map<mips::Register, std::string, MIPSCpuRegisterCompare> secondary_register_names_;
+
+  std::vector<mips::FRegister*> fp_registers_;
+  std::unique_ptr<const MipsInstructionSetFeatures> instruction_set_features_;
+};
+
+
+TEST_F(AssemblerMIPS32r6Test, Toolchain) {
+  EXPECT_TRUE(CheckTools());
+}
+
+TEST_F(AssemblerMIPS32r6Test, MulR6) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::MulR6, "mul ${reg1}, ${reg2}, ${reg3}"), "MulR6");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MuhR6) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::MuhR6, "muh ${reg1}, ${reg2}, ${reg3}"), "MuhR6");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MuhuR6) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::MuhuR6, "muhu ${reg1}, ${reg2}, ${reg3}"), "MuhuR6");
+}
+
+TEST_F(AssemblerMIPS32r6Test, DivR6) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::DivR6, "div ${reg1}, ${reg2}, ${reg3}"), "DivR6");
+}
+
+TEST_F(AssemblerMIPS32r6Test, ModR6) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::ModR6, "mod ${reg1}, ${reg2}, ${reg3}"), "ModR6");
+}
+
+TEST_F(AssemblerMIPS32r6Test, DivuR6) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::DivuR6, "divu ${reg1}, ${reg2}, ${reg3}"), "DivuR6");
+}
+
+TEST_F(AssemblerMIPS32r6Test, ModuR6) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::ModuR6, "modu ${reg1}, ${reg2}, ${reg3}"), "ModuR6");
+}
+
+//////////
+// MISC //
+//////////
+
+TEST_F(AssemblerMIPS32r6Test, Aui) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Aui, 16, "aui ${reg1}, ${reg2}, {imm}"), "Aui");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Auipc) {
+  DriverStr(RepeatRIb(&mips::MipsAssembler::Auipc, 16, "auipc ${reg}, {imm}"), "Auipc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Lwpc) {
+  // Lwpc() takes an unsigned 19-bit immediate, while the GNU assembler needs a signed offset,
+  // hence the sign extension from bit 18 with `imm - ((imm & 0x40000) << 1)`.
+  // The GNU assembler also wants the offset to be a multiple of 4, which it will shift right
+  // by 2 positions when encoding, hence `<< 2` to compensate for that shift.
+  // We capture the value of the immediate with `.set imm, {imm}` because the value is needed
+  // twice for the sign extension, but `{imm}` is substituted only once.
+  const char* code = ".set imm, {imm}\nlw ${reg}, ((imm - ((imm & 0x40000) << 1)) << 2)($pc)";
+  DriverStr(RepeatRIb(&mips::MipsAssembler::Lwpc, 19, code), "Lwpc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bitswap) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::Bitswap, "bitswap ${reg1}, ${reg2}"), "bitswap");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Seleqz) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Seleqz, "seleqz ${reg1}, ${reg2}, ${reg3}"),
+            "seleqz");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Selnez) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Selnez, "selnez ${reg1}, ${reg2}, ${reg3}"),
+            "selnez");
+}
+
+TEST_F(AssemblerMIPS32r6Test, ClzR6) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::ClzR6, "clz ${reg1}, ${reg2}"), "clzR6");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CloR6) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::CloR6, "clo ${reg1}, ${reg2}"), "cloR6");
+}
+
+////////////////////
+// FLOATING POINT //
+////////////////////
+
+TEST_F(AssemblerMIPS32r6Test, SelS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::SelS, "sel.s ${reg1}, ${reg2}, ${reg3}"), "sel.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SelD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::SelD, "sel.d ${reg1}, ${reg2}, ${reg3}"), "sel.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, ClassS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::ClassS, "class.s ${reg1}, ${reg2}"), "class.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, ClassD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::ClassD, "class.d ${reg1}, ${reg2}"), "class.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MinS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::MinS, "min.s ${reg1}, ${reg2}, ${reg3}"), "min.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MinD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::MinD, "min.d ${reg1}, ${reg2}, ${reg3}"), "min.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MaxS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::MaxS, "max.s ${reg1}, ${reg2}, ${reg3}"), "max.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MaxD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::MaxD, "max.d ${reg1}, ${reg2}, ${reg3}"), "max.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUnS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUnS, "cmp.un.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.un.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpEqS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpEqS, "cmp.eq.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.eq.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUeqS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUeqS, "cmp.ueq.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ueq.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpLtS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpLtS, "cmp.lt.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.lt.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUltS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUltS, "cmp.ult.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ult.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpLeS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpLeS, "cmp.le.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.le.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUleS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUleS, "cmp.ule.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ule.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpOrS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpOrS, "cmp.or.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.or.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUneS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUneS, "cmp.une.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.une.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpNeS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpNeS, "cmp.ne.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ne.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUnD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUnD, "cmp.un.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.un.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpEqD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpEqD, "cmp.eq.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.eq.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUeqD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUeqD, "cmp.ueq.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ueq.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpLtD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpLtD, "cmp.lt.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.lt.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUltD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUltD, "cmp.ult.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ult.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpLeD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpLeD, "cmp.le.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.le.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUleD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUleD, "cmp.ule.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ule.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpOrD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpOrD, "cmp.or.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.or.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUneD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUneD, "cmp.une.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.une.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpNeD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpNeD, "cmp.ne.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ne.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LoadDFromOffset) {
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x8000);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x7FF8);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFB);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFC);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFF);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0xFFF0);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x8008);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x8001);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x8000);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0xFFF0);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x17FE8);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x0FFF8);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x0FFF1);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x0FFF1);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x0FFF8);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x17FE8);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x17FF0);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x17FE9);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x17FE9);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x17FF0);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x12345678);
+
+  const char* expected =
+      "ldc1 $f0, -0x8000($a0)\n"
+      "ldc1 $f0, 0($a0)\n"
+      "ldc1 $f0, 0x7FF8($a0)\n"
+      "lwc1 $f0, 0x7FFB($a0)\n"
+      "lw $t8, 0x7FFF($a0)\n"
+      "mthc1 $t8, $f0\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "lwc1 $f0, 4($at)\n"
+      "lw $t8, 8($at)\n"
+      "mthc1 $t8, $f0\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "lwc1 $f0, 7($at)\n"
+      "lw $t8, 11($at)\n"
+      "mthc1 $t8, $f0\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "ldc1 $f0, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "ldc1 $f0, -0x10($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "lwc1 $f0, -9($at)\n"
+      "lw $t8, -5($at)\n"
+      "mthc1 $t8, $f0\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "ldc1 $f0, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "ldc1 $f0, 0x7FF8($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "ldc1 $f0, -0x7FE8($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "ldc1 $f0, 0x8($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "lwc1 $f0, 0xF($at)\n"
+      "lw $t8, 0x13($at)\n"
+      "mthc1 $t8, $f0\n"
+      "aui $at, $a0, 0x1\n"
+      "lwc1 $f0, -0xF($at)\n"
+      "lw $t8, -0xB($at)\n"
+      "mthc1 $t8, $f0\n"
+      "aui $at, $a0, 0x1\n"
+      "ldc1 $f0, -0x8($at)\n"
+      "aui $at, $a0, 0x1\n"
+      "ldc1 $f0, 0x7FE8($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "ldc1 $f0, -0x7FF0($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "lwc1 $f0, -0x7FE9($at)\n"
+      "lw $t8, -0x7FE5($at)\n"
+      "mthc1 $t8, $f0\n"
+      "aui $at, $a0, 0x1\n"
+      "lwc1 $f0, 0x7FE9($at)\n"
+      "lw $t8, 0x7FED($at)\n"
+      "mthc1 $t8, $f0\n"
+      "aui $at, $a0, 0x1\n"
+      "ldc1 $f0, 0x7FF0($at)\n"
+      "aui $at, $a0, 0x1234\n"
+      "ldc1 $f0, 0x5678($at)\n";
+  DriverStr(expected, "LoadDFromOffset");
+}
+
+TEST_F(AssemblerMIPS32r6Test, StoreDToOffset) {
+  __ StoreDToOffset(mips::F0, mips::A0, -0x8000);
+  __ StoreDToOffset(mips::F0, mips::A0, +0);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x7FF8);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x7FFB);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x7FFC);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x7FFF);
+  __ StoreDToOffset(mips::F0, mips::A0, -0xFFF0);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x8008);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x8001);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x8000);
+  __ StoreDToOffset(mips::F0, mips::A0, +0xFFF0);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x17FE8);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x0FFF8);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x0FFF1);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x0FFF1);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x0FFF8);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x17FE8);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x17FF0);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x17FE9);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x17FE9);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x17FF0);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x12345678);
+
+  const char* expected =
+      "sdc1 $f0, -0x8000($a0)\n"
+      "sdc1 $f0, 0($a0)\n"
+      "sdc1 $f0, 0x7FF8($a0)\n"
+      "mfhc1 $t8, $f0\n"
+      "swc1 $f0, 0x7FFB($a0)\n"
+      "sw $t8, 0x7FFF($a0)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "mfhc1 $t8, $f0\n"
+      "swc1 $f0, 4($at)\n"
+      "sw $t8, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "mfhc1 $t8, $f0\n"
+      "swc1 $f0, 7($at)\n"
+      "sw $t8, 11($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "sdc1 $f0, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "sdc1 $f0, -0x10($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "mfhc1 $t8, $f0\n"
+      "swc1 $f0, -9($at)\n"
+      "sw $t8, -5($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "sdc1 $f0, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "sdc1 $f0, 0x7FF8($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "sdc1 $f0, -0x7FE8($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "sdc1 $f0, 0x8($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "mfhc1 $t8, $f0\n"
+      "swc1 $f0, 0xF($at)\n"
+      "sw $t8, 0x13($at)\n"
+      "aui $at, $a0, 0x1\n"
+      "mfhc1 $t8, $f0\n"
+      "swc1 $f0, -0xF($at)\n"
+      "sw $t8, -0xB($at)\n"
+      "aui $at, $a0, 0x1\n"
+      "sdc1 $f0, -0x8($at)\n"
+      "aui $at, $a0, 0x1\n"
+      "sdc1 $f0, 0x7FE8($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "sdc1 $f0, -0x7FF0($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "mfhc1 $t8, $f0\n"
+      "swc1 $f0, -0x7FE9($at)\n"
+      "sw $t8, -0x7FE5($at)\n"
+      "aui $at, $a0, 0x1\n"
+      "mfhc1 $t8, $f0\n"
+      "swc1 $f0, 0x7FE9($at)\n"
+      "sw $t8, 0x7FED($at)\n"
+      "aui $at, $a0, 0x1\n"
+      "sdc1 $f0, 0x7FF0($at)\n"
+      "aui $at, $a0, 0x1234\n"
+      "sdc1 $f0, 0x5678($at)\n";
+  DriverStr(expected, "StoreDToOffset");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLiteral) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ LoadLiteral(mips::V0, mips::ZERO, literal);
+  constexpr size_t kAdduCount = 0x3FFDE;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+
+  std::string expected =
+      "lwpc $v0, 1f\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadFarthestNearLiteral");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLiteral) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ LoadLiteral(mips::V0, mips::ZERO, literal);
+  constexpr size_t kAdduCount = 0x3FFDF;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+
+  std::string expected =
+      "1:\n"
+      "auipc $at, %hi(2f - 1b)\n"
+      "lw $v0, %lo(2f - 1b)($at)\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadNearestFarLiteral");
+}
+
+//////////////
+// BRANCHES //
+//////////////
+
+// TODO: MipsAssembler::Addiupc
+//       MipsAssembler::Bc
+//       MipsAssembler::Jic
+//       MipsAssembler::Jialc
+//       MipsAssembler::Bltc
+//       MipsAssembler::Bltzc
+//       MipsAssembler::Bgtzc
+//       MipsAssembler::Bgec
+//       MipsAssembler::Bgezc
+//       MipsAssembler::Blezc
+//       MipsAssembler::Bltuc
+//       MipsAssembler::Bgeuc
+//       MipsAssembler::Beqc
+//       MipsAssembler::Bnec
+//       MipsAssembler::Beqzc
+//       MipsAssembler::Bnezc
+//       MipsAssembler::Bc1eqz
+//       MipsAssembler::Bc1nez
+//       MipsAssembler::Buncond
+//       MipsAssembler::Bcond
+//       MipsAssembler::Call
+
+// TODO:  AssemblerMIPS32r6Test.B
+//        AssemblerMIPS32r6Test.Beq
+//        AssemblerMIPS32r6Test.Bne
+//        AssemblerMIPS32r6Test.Beqz
+//        AssemblerMIPS32r6Test.Bnez
+//        AssemblerMIPS32r6Test.Bltz
+//        AssemblerMIPS32r6Test.Bgez
+//        AssemblerMIPS32r6Test.Blez
+//        AssemblerMIPS32r6Test.Bgtz
+//        AssemblerMIPS32r6Test.Blt
+//        AssemblerMIPS32r6Test.Bge
+//        AssemblerMIPS32r6Test.Bltu
+//        AssemblerMIPS32r6Test.Bgeu
+
+#undef __
+
+}  // namespace art
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 063d8bd..50a8dc2 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -21,6 +21,8 @@
 #include "base/stl_util.h"
 #include "utils/assembler_test.h"
 
+#define __ GetAssembler()->
+
 namespace art {
 
 struct MIPSCpuRegisterCompare {
@@ -184,6 +186,63 @@
     return result;
   }
 
+  void BranchCondOneRegHelper(void (mips::MipsAssembler::*f)(mips::Register,
+                                                             mips::MipsLabel*),
+                              std::string instr_name) {
+    mips::MipsLabel label;
+    (Base::GetAssembler()->*f)(mips::A0, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips::A1, &label);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $a0, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $a1, 1b\n"
+        "nop\n";
+    DriverStr(expected, instr_name);
+  }
+
+  void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register,
+                                                              mips::Register,
+                                                              mips::MipsLabel*),
+                               std::string instr_name) {
+    mips::MipsLabel label;
+    (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $a0, $a1, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $a2, $a3, 1b\n"
+        "nop\n";
+    DriverStr(expected, instr_name);
+  }
+
  private:
   std::vector<mips::Register*> registers_;
   std::map<mips::Register, std::string, MIPSCpuRegisterCompare> secondary_register_names_;
@@ -196,8 +255,6 @@
   EXPECT_TRUE(CheckTools());
 }
 
-#define __ GetAssembler()->
-
 TEST_F(AssemblerMIPSTest, Addu) {
   DriverStr(RepeatRRR(&mips::MipsAssembler::Addu, "addu ${reg1}, ${reg2}, ${reg3}"), "Addu");
 }
@@ -278,6 +335,18 @@
   DriverStr(RepeatRRR(&mips::MipsAssembler::Nor, "nor ${reg1}, ${reg2}, ${reg3}"), "Nor");
 }
 
+//////////
+// MISC //
+//////////
+
+TEST_F(AssemblerMIPSTest, Movz) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Movz, "movz ${reg1}, ${reg2}, ${reg3}"), "Movz");
+}
+
+TEST_F(AssemblerMIPSTest, Movn) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Movn, "movn ${reg1}, ${reg2}, ${reg3}"), "Movn");
+}
+
 TEST_F(AssemblerMIPSTest, Seb) {
   DriverStr(RepeatRR(&mips::MipsAssembler::Seb, "seb ${reg1}, ${reg2}"), "Seb");
 }
@@ -306,10 +375,60 @@
   DriverStr(RepeatRRR(&mips::MipsAssembler::Srlv, "srlv ${reg1}, ${reg2}, ${reg3}"), "Srlv");
 }
 
+TEST_F(AssemblerMIPSTest, Rotrv) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Rotrv, "rotrv ${reg1}, ${reg2}, ${reg3}"), "rotrv");
+}
+
 TEST_F(AssemblerMIPSTest, Srav) {
   DriverStr(RepeatRRR(&mips::MipsAssembler::Srav, "srav ${reg1}, ${reg2}, ${reg3}"), "Srav");
 }
 
+TEST_F(AssemblerMIPSTest, Ins) {
+  std::vector<mips::Register*> regs = GetRegisters();
+  WarnOnCombinations(regs.size() * regs.size() * 33 * 16);
+  std::string expected;
+  for (mips::Register* reg1 : regs) {
+    for (mips::Register* reg2 : regs) {
+      for (int32_t pos = 0; pos < 32; pos++) {
+        for (int32_t size = 1; pos + size <= 32; size++) {
+          __ Ins(*reg1, *reg2, pos, size);
+          std::ostringstream instr;
+          instr << "ins $" << *reg1 << ", $" << *reg2 << ", " << pos << ", " << size << "\n";
+          expected += instr.str();
+        }
+      }
+    }
+  }
+  DriverStr(expected, "Ins");
+}
+
+TEST_F(AssemblerMIPSTest, Ext) {
+  std::vector<mips::Register*> regs = GetRegisters();
+  WarnOnCombinations(regs.size() * regs.size() * 33 * 16);
+  std::string expected;
+  for (mips::Register* reg1 : regs) {
+    for (mips::Register* reg2 : regs) {
+      for (int32_t pos = 0; pos < 32; pos++) {
+        for (int32_t size = 1; pos + size <= 32; size++) {
+          __ Ext(*reg1, *reg2, pos, size);
+          std::ostringstream instr;
+          instr << "ext $" << *reg1 << ", $" << *reg2 << ", " << pos << ", " << size << "\n";
+          expected += instr.str();
+        }
+      }
+    }
+  }
+  DriverStr(expected, "Ext");
+}
+
+TEST_F(AssemblerMIPSTest, ClzR2) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::ClzR2, "clz ${reg1}, ${reg2}"), "clzR2");
+}
+
+TEST_F(AssemblerMIPSTest, CloR2) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::CloR2, "clo ${reg1}, ${reg2}"), "cloR2");
+}
+
 TEST_F(AssemblerMIPSTest, Lb) {
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Lb, -16, "lb ${reg1}, {imm}(${reg2})"), "Lb");
 }
@@ -318,10 +437,18 @@
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Lh, -16, "lh ${reg1}, {imm}(${reg2})"), "Lh");
 }
 
+TEST_F(AssemblerMIPSTest, Lwl) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Lwl, -16, "lwl ${reg1}, {imm}(${reg2})"), "Lwl");
+}
+
 TEST_F(AssemblerMIPSTest, Lw) {
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Lw, -16, "lw ${reg1}, {imm}(${reg2})"), "Lw");
 }
 
+TEST_F(AssemblerMIPSTest, Lwr) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Lwr, -16, "lwr ${reg1}, {imm}(${reg2})"), "Lwr");
+}
+
 TEST_F(AssemblerMIPSTest, Lbu) {
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Lbu, -16, "lbu ${reg1}, {imm}(${reg2})"), "Lbu");
 }
@@ -350,10 +477,26 @@
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Sh, -16, "sh ${reg1}, {imm}(${reg2})"), "Sh");
 }
 
+TEST_F(AssemblerMIPSTest, Swl) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Swl, -16, "swl ${reg1}, {imm}(${reg2})"), "Swl");
+}
+
 TEST_F(AssemblerMIPSTest, Sw) {
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Sw, -16, "sw ${reg1}, {imm}(${reg2})"), "Sw");
 }
 
+TEST_F(AssemblerMIPSTest, Swr) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Swr, -16, "swr ${reg1}, {imm}(${reg2})"), "Swr");
+}
+
+TEST_F(AssemblerMIPSTest, LlR2) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::LlR2, -16, "ll ${reg1}, {imm}(${reg2})"), "LlR2");
+}
+
+TEST_F(AssemblerMIPSTest, ScR2) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::ScR2, -16, "sc ${reg1}, {imm}(${reg2})"), "ScR2");
+}
+
 TEST_F(AssemblerMIPSTest, Slt) {
   DriverStr(RepeatRRR(&mips::MipsAssembler::Slt, "slt ${reg1}, ${reg2}, ${reg3}"), "Slt");
 }
@@ -418,6 +561,112 @@
   DriverStr(RepeatFF(&mips::MipsAssembler::NegD, "neg.d ${reg1}, ${reg2}"), "NegD");
 }
 
+TEST_F(AssemblerMIPSTest, FloorWS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::FloorWS, "floor.w.s ${reg1}, ${reg2}"), "floor.w.s");
+}
+
+TEST_F(AssemblerMIPSTest, FloorWD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::FloorWD, "floor.w.d ${reg1}, ${reg2}"), "floor.w.d");
+}
+
+TEST_F(AssemblerMIPSTest, CunS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CunS, 3, "c.un.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "CunS");
+}
+
+TEST_F(AssemblerMIPSTest, CeqS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CeqS, 3, "c.eq.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "CeqS");
+}
+
+TEST_F(AssemblerMIPSTest, CueqS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CueqS, 3, "c.ueq.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "CueqS");
+}
+
+TEST_F(AssemblerMIPSTest, ColtS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::ColtS, 3, "c.olt.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "ColtS");
+}
+
+TEST_F(AssemblerMIPSTest, CultS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CultS, 3, "c.ult.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "CultS");
+}
+
+TEST_F(AssemblerMIPSTest, ColeS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::ColeS, 3, "c.ole.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "ColeS");
+}
+
+TEST_F(AssemblerMIPSTest, CuleS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CuleS, 3, "c.ule.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "CuleS");
+}
+
+TEST_F(AssemblerMIPSTest, CunD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CunD, 3, "c.un.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "CunD");
+}
+
+TEST_F(AssemblerMIPSTest, CeqD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CeqD, 3, "c.eq.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "CeqD");
+}
+
+TEST_F(AssemblerMIPSTest, CueqD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CueqD, 3, "c.ueq.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "CueqD");
+}
+
+TEST_F(AssemblerMIPSTest, ColtD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::ColtD, 3, "c.olt.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "ColtD");
+}
+
+TEST_F(AssemblerMIPSTest, CultD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CultD, 3, "c.ult.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "CultD");
+}
+
+TEST_F(AssemblerMIPSTest, ColeD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::ColeD, 3, "c.ole.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "ColeD");
+}
+
+TEST_F(AssemblerMIPSTest, CuleD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CuleD, 3, "c.ule.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "CuleD");
+}
+
+TEST_F(AssemblerMIPSTest, Movf) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Movf, 3, "movf ${reg1}, ${reg2}, $fcc{imm}"), "Movf");
+}
+
+TEST_F(AssemblerMIPSTest, Movt) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Movt, 3, "movt ${reg1}, ${reg2}, $fcc{imm}"), "Movt");
+}
+
+TEST_F(AssemblerMIPSTest, MovfS) {
+  DriverStr(RepeatFFIb(&mips::MipsAssembler::MovfS, 3, "movf.s ${reg1}, ${reg2}, $fcc{imm}"),
+            "MovfS");
+}
+
+TEST_F(AssemblerMIPSTest, MovfD) {
+  DriverStr(RepeatFFIb(&mips::MipsAssembler::MovfD, 3, "movf.d ${reg1}, ${reg2}, $fcc{imm}"),
+            "MovfD");
+}
+
+TEST_F(AssemblerMIPSTest, MovtS) {
+  DriverStr(RepeatFFIb(&mips::MipsAssembler::MovtS, 3, "movt.s ${reg1}, ${reg2}, $fcc{imm}"),
+            "MovtS");
+}
+
+TEST_F(AssemblerMIPSTest, MovtD) {
+  DriverStr(RepeatFFIb(&mips::MipsAssembler::MovtD, 3, "movt.d ${reg1}, ${reg2}, $fcc{imm}"),
+            "MovtD");
+}
+
 TEST_F(AssemblerMIPSTest, CvtSW) {
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "CvtSW");
 }
@@ -426,6 +675,14 @@
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdw, "cvt.d.w ${reg1}, ${reg2}"), "CvtDW");
 }
 
+TEST_F(AssemblerMIPSTest, CvtSL) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsl, "cvt.s.l ${reg1}, ${reg2}"), "CvtSL");
+}
+
+TEST_F(AssemblerMIPSTest, CvtDL) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "CvtDL");
+}
+
 TEST_F(AssemblerMIPSTest, CvtSD) {
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsd, "cvt.s.d ${reg1}, ${reg2}"), "CvtSD");
 }
@@ -434,6 +691,22 @@
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtds, "cvt.d.s ${reg1}, ${reg2}"), "CvtDS");
 }
 
+TEST_F(AssemblerMIPSTest, TruncWS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "TruncWS");
+}
+
+TEST_F(AssemblerMIPSTest, TruncWD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "TruncWD");
+}
+
+TEST_F(AssemblerMIPSTest, TruncLS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "TruncLS");
+}
+
+TEST_F(AssemblerMIPSTest, TruncLD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "TruncLD");
+}
+
 TEST_F(AssemblerMIPSTest, Mfc1) {
   DriverStr(RepeatRF(&mips::MipsAssembler::Mfc1, "mfc1 ${reg1}, ${reg2}"), "Mfc1");
 }
@@ -478,212 +751,538 @@
   DriverStr(RepeatRR(&mips::MipsAssembler::Not, "nor ${reg1}, ${reg2}, $zero"), "Not");
 }
 
-TEST_F(AssemblerMIPSTest, LoadFromOffset) {
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A0, 0);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 256);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 1000);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0x8000);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0x10000);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0x12345678);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, -256);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0xFFFF8000);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0xABCDEF00);
-
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A0, 0);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 256);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 1000);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0x8000);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0x10000);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0x12345678);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, -256);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0xFFFF8000);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0xABCDEF00);
-
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A0, 0);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 256);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 1000);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0x8000);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0x10000);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0x12345678);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, -256);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0xFFFF8000);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0xABCDEF00);
-
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A0, 0);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 256);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 1000);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0x8000);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0x10000);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0x12345678);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, -256);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0xFFFF8000);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0xABCDEF00);
-
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A0, 0);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 256);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 1000);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0x8000);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0x10000);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0x12345678);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, -256);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0xFFFF8000);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0xABCDEF00);
-
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A0, 0);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A1, 0);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A1, mips::A0, 0);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 256);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 1000);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0x8000);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0x10000);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0x12345678);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -256);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0xFFFF8000);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0xABCDEF00);
+TEST_F(AssemblerMIPSTest, Addiu32) {
+  __ Addiu32(mips::A1, mips::A2, -0x8000);
+  __ Addiu32(mips::A1, mips::A2, +0);
+  __ Addiu32(mips::A1, mips::A2, +0x7FFF);
+  __ Addiu32(mips::A1, mips::A2, -0x10000);
+  __ Addiu32(mips::A1, mips::A2, -0x8001);
+  __ Addiu32(mips::A1, mips::A2, +0x8000);
+  __ Addiu32(mips::A1, mips::A2, +0xFFFE);
+  __ Addiu32(mips::A1, mips::A2, -0x10001);
+  __ Addiu32(mips::A1, mips::A2, +0xFFFF);
+  __ Addiu32(mips::A1, mips::A2, +0x10000);
+  __ Addiu32(mips::A1, mips::A2, +0x10001);
+  __ Addiu32(mips::A1, mips::A2, +0x12345678);
 
   const char* expected =
-      "lb $a0, 0($a0)\n"
-      "lb $a0, 0($a1)\n"
-      "lb $a0, 256($a1)\n"
-      "lb $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x8000\n"
-      "addu $at, $at, $a1\n"
-      "lb $a0, 0($at)\n"
+      "addiu $a1, $a2, -0x8000\n"
+      "addiu $a1, $a2, 0\n"
+      "addiu $a1, $a2, 0x7FFF\n"
+      "addiu $at, $a2, -0x8000\n"
+      "addiu $a1, $at, -0x8000\n"
+      "addiu $at, $a2, -0x8000\n"
+      "addiu $a1, $at, -1\n"
+      "addiu $at, $a2, 0x7FFF\n"
+      "addiu $a1, $at, 1\n"
+      "addiu $at, $a2, 0x7FFF\n"
+      "addiu $a1, $at, 0x7FFF\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0xFFFF\n"
+      "addu $a1, $a2, $at\n"
+      "ori $at, $zero, 0xFFFF\n"
+      "addu $a1, $a2, $at\n"
       "lui $at, 1\n"
-      "addu $at, $at, $a1\n"
-      "lb $a0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "addu $at, $at, $a1\n"
-      "lb $a0, 0($at)\n"
-      "lb $a0, -256($a1)\n"
-      "lb $a0, 0xFFFF8000($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a1\n"
-      "lb $a0, 0($at)\n"
-
-      "lbu $a0, 0($a0)\n"
-      "lbu $a0, 0($a1)\n"
-      "lbu $a0, 256($a1)\n"
-      "lbu $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x8000\n"
-      "addu $at, $at, $a1\n"
-      "lbu $a0, 0($at)\n"
+      "addu $a1, $a2, $at\n"
       "lui $at, 1\n"
-      "addu $at, $at, $a1\n"
-      "lbu $a0, 0($at)\n"
+      "ori $at, $at, 1\n"
+      "addu $a1, $a2, $at\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "addu $at, $at, $a1\n"
-      "lbu $a0, 0($at)\n"
-      "lbu $a0, -256($a1)\n"
-      "lbu $a0, 0xFFFF8000($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a1\n"
-      "lbu $a0, 0($at)\n"
+      "ori $at, $at, 0x5678\n"
+      "addu $a1, $a2, $at\n";
+  DriverStr(expected, "Addiu32");
+}
 
-      "lh $a0, 0($a0)\n"
-      "lh $a0, 0($a1)\n"
-      "lh $a0, 256($a1)\n"
-      "lh $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x8000\n"
+TEST_F(AssemblerMIPSTest, LoadFromOffset) {
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x8000);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x7FF8);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x7FFB);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x7FFC);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x7FFF);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0xFFF0);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x8008);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x8001);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x8000);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0xFFF0);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x17FE8);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x0FFF8);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x0FFF1);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x0FFF1);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x0FFF8);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x17FE8);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x17FF0);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x17FE9);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x17FE9);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x17FF0);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x12345678);
+
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x8000);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x7FF8);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x7FFB);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x7FFC);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x7FFF);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0xFFF0);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x8008);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x8001);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x8000);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0xFFF0);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x17FE8);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x0FFF8);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x0FFF1);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x0FFF1);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x0FFF8);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x17FE8);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x17FF0);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x17FE9);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x17FE9);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x17FF0);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x12345678);
+
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x8000);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x7FF8);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x7FFB);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x7FFC);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x7FFF);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0xFFF0);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x8008);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x8001);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x8000);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0xFFF0);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x17FE8);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x0FFF8);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x0FFF1);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x0FFF1);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x0FFF8);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x17FE8);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x17FF0);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x17FE9);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x17FE9);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x17FF0);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x12345678);
+
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x8000);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x7FF8);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x7FFB);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x7FFC);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x7FFF);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0xFFF0);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x8008);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x8001);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x8000);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0xFFF0);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x17FE8);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x0FFF8);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x0FFF1);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x0FFF1);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x0FFF8);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x17FE8);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x17FF0);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x17FE9);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x17FE9);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x17FF0);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x12345678);
+
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x8000);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x7FF8);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x7FFB);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x7FFC);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x7FFF);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0xFFF0);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x8008);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x8001);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x8000);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0xFFF0);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x17FE8);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x0FFF8);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x0FFF1);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x0FFF1);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x0FFF8);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x17FE8);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x17FF0);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x17FE9);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x17FE9);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x17FF0);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x12345678);
+
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x8000);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x7FF8);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x7FFB);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x7FFC);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x7FFF);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0xFFF0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x8008);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x8001);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x8000);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0xFFF0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x17FE8);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x0FFF8);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x0FFF1);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x0FFF1);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x0FFF8);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x17FE8);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x17FF0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x17FE9);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x17FE9);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x17FF0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x12345678);
+
+  const char* expected =
+      "lb $a3, -0x8000($a1)\n"
+      "lb $a3, 0($a1)\n"
+      "lb $a3, 0x7FF8($a1)\n"
+      "lb $a3, 0x7FFB($a1)\n"
+      "lb $a3, 0x7FFC($a1)\n"
+      "lb $a3, 0x7FFF($a1)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lb $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lb $a3, -0x10($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lb $a3, -9($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lb $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lb $a3, 0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lb $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lb $a3, -8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lb $a3, -1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lb $a3, 1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lb $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lb $a3, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "lh $a0, 0($at)\n"
-      "lui $at, 1\n"
+      "lb $a3, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "lh $a0, 0($at)\n"
+      "lb $a3, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a1\n"
+      "lb $a3, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a1\n"
+      "lb $a3, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a1\n"
-      "lh $a0, 0($at)\n"
-      "lh $a0, -256($a1)\n"
-      "lh $a0, 0xFFFF8000($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a1\n"
-      "lh $a0, 0($at)\n"
+      "lb $a3, 0($at)\n"
 
-      "lhu $a0, 0($a0)\n"
-      "lhu $a0, 0($a1)\n"
-      "lhu $a0, 256($a1)\n"
-      "lhu $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x8000\n"
+      "lbu $a3, -0x8000($a1)\n"
+      "lbu $a3, 0($a1)\n"
+      "lbu $a3, 0x7FF8($a1)\n"
+      "lbu $a3, 0x7FFB($a1)\n"
+      "lbu $a3, 0x7FFC($a1)\n"
+      "lbu $a3, 0x7FFF($a1)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lbu $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lbu $a3, -0x10($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lbu $a3, -9($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lbu $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lbu $a3, 0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lbu $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lbu $a3, -8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lbu $a3, -1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lbu $a3, 1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lbu $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lbu $a3, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "lhu $a0, 0($at)\n"
-      "lui $at, 1\n"
+      "lbu $a3, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "lhu $a0, 0($at)\n"
+      "lbu $a3, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a1\n"
+      "lbu $a3, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a1\n"
+      "lbu $a3, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a1\n"
-      "lhu $a0, 0($at)\n"
-      "lhu $a0, -256($a1)\n"
-      "lhu $a0, 0xFFFF8000($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a1\n"
-      "lhu $a0, 0($at)\n"
+      "lbu $a3, 0($at)\n"
 
-      "lw $a0, 0($a0)\n"
-      "lw $a0, 0($a1)\n"
-      "lw $a0, 256($a1)\n"
-      "lw $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x8000\n"
+      "lh $a3, -0x8000($a1)\n"
+      "lh $a3, 0($a1)\n"
+      "lh $a3, 0x7FF8($a1)\n"
+      "lh $a3, 0x7FFB($a1)\n"
+      "lh $a3, 0x7FFC($a1)\n"
+      "lh $a3, 0x7FFF($a1)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lh $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lh $a3, -0x10($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lh $a3, -9($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lh $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lh $a3, 0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lh $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lh $a3, -8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lh $a3, -1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lh $a3, 1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lh $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lh $a3, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "lw $a0, 0($at)\n"
-      "lui $at, 1\n"
+      "lh $a3, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "lw $a0, 0($at)\n"
+      "lh $a3, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a1\n"
+      "lh $a3, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a1\n"
+      "lh $a3, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a1\n"
-      "lw $a0, 0($at)\n"
-      "lw $a0, -256($a1)\n"
-      "lw $a0, 0xFFFF8000($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a1\n"
-      "lw $a0, 0($at)\n"
+      "lh $a3, 0($at)\n"
 
-      "lw $a1, 4($a0)\n"
-      "lw $a0, 0($a0)\n"
-      "lw $a0, 0($a1)\n"
-      "lw $a1, 4($a1)\n"
-      "lw $a1, 0($a0)\n"
-      "lw $a2, 4($a0)\n"
+      "lhu $a3, -0x8000($a1)\n"
+      "lhu $a3, 0($a1)\n"
+      "lhu $a3, 0x7FF8($a1)\n"
+      "lhu $a3, 0x7FFB($a1)\n"
+      "lhu $a3, 0x7FFC($a1)\n"
+      "lhu $a3, 0x7FFF($a1)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lhu $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lhu $a3, -0x10($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lhu $a3, -9($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lhu $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lhu $a3, 0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lhu $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lhu $a3, -8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lhu $a3, -1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lhu $a3, 1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lhu $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lhu $a3, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a3, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a3, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a3, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a3, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a3, 0($at)\n"
+
+      "lw $a3, -0x8000($a1)\n"
+      "lw $a3, 0($a1)\n"
+      "lw $a3, 0x7FF8($a1)\n"
+      "lw $a3, 0x7FFB($a1)\n"
+      "lw $a3, 0x7FFC($a1)\n"
+      "lw $a3, 0x7FFF($a1)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lw $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lw $a3, -0x10($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lw $a3, -9($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lw $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lw $a3, 0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lw $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lw $a3, -8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lw $a3, -1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lw $a3, 1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lw $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lw $a3, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
+      "addu $at, $at, $a1\n"
+      "lw $a3, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
+      "addu $at, $at, $a1\n"
+      "lw $a3, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a1\n"
+      "lw $a3, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a1\n"
+      "lw $a3, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "lw $a3, 0($at)\n"
+
+      "lw $a0, -0x8000($a2)\n"
+      "lw $a1, -0x7FFC($a2)\n"
       "lw $a0, 0($a2)\n"
       "lw $a1, 4($a2)\n"
-      "lw $a0, 256($a2)\n"
-      "lw $a1, 260($a2)\n"
-      "lw $a0, 1000($a2)\n"
-      "lw $a1, 1004($a2)\n"
-      "ori $at, $zero, 0x8000\n"
+      "lw $a0, 0x7FF8($a2)\n"
+      "lw $a1, 0x7FFC($a2)\n"
+      "lw $a0, 0x7FFB($a2)\n"
+      "lw $a1, 0x7FFF($a2)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "lw $a0, 4($at)\n"
+      "lw $a1, 8($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "lw $a0, 7($at)\n"
+      "lw $a1, 11($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "lw $a0, -0x7FF8($at)\n"
+      "lw $a1, -0x7FF4($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "lw $a0, -0x10($at)\n"
+      "lw $a1, -0xC($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "lw $a0, -9($at)\n"
+      "lw $a1, -5($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "lw $a0, 8($at)\n"
+      "lw $a1, 12($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "lw $a0, 0x7FF8($at)\n"
+      "lw $a1, 0x7FFC($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lw $a0, -0x7FF8($at)\n"
+      "lw $a1, -0x7FF4($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lw $a0, -8($at)\n"
+      "lw $a1, -4($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lw $a0, -1($at)\n"
+      "lw $a1, 3($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lw $a0, 1($at)\n"
+      "lw $a1, 5($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lw $a0, 8($at)\n"
+      "lw $a1, 12($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lw $a0, 0x7FF8($at)\n"
+      "lw $a1, 0x7FFC($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a2\n"
       "lw $a0, 0($at)\n"
       "lw $a1, 4($at)\n"
-      "lui $at, 1\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
+      "addu $at, $at, $a2\n"
+      "lw $a0, 7($at)\n"
+      "lw $a1, 11($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a2\n"
+      "lw $a0, 1($at)\n"
+      "lw $a1, 5($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
       "addu $at, $at, $a2\n"
       "lw $a0, 0($at)\n"
       "lw $a1, 4($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "addu $at, $at, $a2\n"
-      "lw $a0, 0($at)\n"
-      "lw $a1, 4($at)\n"
-      "lw $a0, -256($a2)\n"
-      "lw $a1, -252($a2)\n"
-      "lw $a0, 0xFFFF8000($a2)\n"
-      "lw $a1, 0xFFFF8004($a2)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a2\n"
       "lw $a0, 0($at)\n"
       "lw $a1, 4($at)\n";
@@ -691,208 +1290,513 @@
 }
 
 TEST_F(AssemblerMIPSTest, LoadSFromOffset) {
-  __ LoadSFromOffset(mips::F0, mips::A0, 0);
-  __ LoadSFromOffset(mips::F0, mips::A0, 4);
-  __ LoadSFromOffset(mips::F0, mips::A0, 256);
-  __ LoadSFromOffset(mips::F0, mips::A0, 0x8000);
-  __ LoadSFromOffset(mips::F0, mips::A0, 0x10000);
-  __ LoadSFromOffset(mips::F0, mips::A0, 0x12345678);
-  __ LoadSFromOffset(mips::F0, mips::A0, -256);
-  __ LoadSFromOffset(mips::F0, mips::A0, 0xFFFF8000);
-  __ LoadSFromOffset(mips::F0, mips::A0, 0xABCDEF00);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0x8000);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x7FF8);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x7FFB);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x7FFC);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x7FFF);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0xFFF0);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0x8008);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0x8001);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x8000);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0xFFF0);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0x17FE8);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0x0FFF8);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0x0FFF1);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x0FFF1);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x0FFF8);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x17FE8);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0x17FF0);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0x17FE9);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x17FE9);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x17FF0);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x12345678);
 
   const char* expected =
-      "lwc1 $f0, 0($a0)\n"
-      "lwc1 $f0, 4($a0)\n"
-      "lwc1 $f0, 256($a0)\n"
-      "ori $at, $zero, 0x8000\n"
+      "lwc1 $f2, -0x8000($a0)\n"
+      "lwc1 $f2, 0($a0)\n"
+      "lwc1 $f2, 0x7FF8($a0)\n"
+      "lwc1 $f2, 0x7FFB($a0)\n"
+      "lwc1 $f2, 0x7FFC($a0)\n"
+      "lwc1 $f2, 0x7FFF($a0)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "lwc1 $f2, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "lwc1 $f2, -0x10($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "lwc1 $f2, -9($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "lwc1 $f2, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "lwc1 $f2, 0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lwc1 $f2, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lwc1 $f2, -8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lwc1 $f2, -1($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lwc1 $f2, 1($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lwc1 $f2, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lwc1 $f2, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a0\n"
-      "lwc1 $f0, 0($at)\n"
-      "lui $at, 1\n"
+      "lwc1 $f2, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a0\n"
-      "lwc1 $f0, 0($at)\n"
+      "lwc1 $f2, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a0\n"
+      "lwc1 $f2, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a0\n"
+      "lwc1 $f2, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a0\n"
-      "lwc1 $f0, 0($at)\n"
-      "lwc1 $f0, -256($a0)\n"
-      "lwc1 $f0, 0xFFFF8000($a0)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a0\n"
-      "lwc1 $f0, 0($at)\n";
+      "lwc1 $f2, 0($at)\n";
   DriverStr(expected, "LoadSFromOffset");
 }
 
-
 TEST_F(AssemblerMIPSTest, LoadDFromOffset) {
-  __ LoadDFromOffset(mips::F0, mips::A0, 0);
-  __ LoadDFromOffset(mips::F0, mips::A0, 4);
-  __ LoadDFromOffset(mips::F0, mips::A0, 256);
-  __ LoadDFromOffset(mips::F0, mips::A0, 0x8000);
-  __ LoadDFromOffset(mips::F0, mips::A0, 0x10000);
-  __ LoadDFromOffset(mips::F0, mips::A0, 0x12345678);
-  __ LoadDFromOffset(mips::F0, mips::A0, -256);
-  __ LoadDFromOffset(mips::F0, mips::A0, 0xFFFF8000);
-  __ LoadDFromOffset(mips::F0, mips::A0, 0xABCDEF00);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x8000);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x7FF8);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFB);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFC);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFF);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0xFFF0);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x8008);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x8001);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x8000);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0xFFF0);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x17FE8);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x0FFF8);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x0FFF1);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x0FFF1);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x0FFF8);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x17FE8);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x17FF0);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x17FE9);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x17FE9);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x17FF0);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x12345678);
 
   const char* expected =
+      "ldc1 $f0, -0x8000($a0)\n"
       "ldc1 $f0, 0($a0)\n"
-      "lwc1 $f0, 4($a0)\n"
-      "lwc1 $f1, 8($a0)\n"
-      "ldc1 $f0, 256($a0)\n"
-      "ori $at, $zero, 0x8000\n"
+      "ldc1 $f0, 0x7FF8($a0)\n"
+      "lwc1 $f0, 0x7FFB($a0)\n"
+      "lwc1 $f1, 0x7FFF($a0)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "lwc1 $f0, 4($at)\n"
+      "lwc1 $f1, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "lwc1 $f0, 7($at)\n"
+      "lwc1 $f1, 11($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "ldc1 $f0, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "ldc1 $f0, -0x10($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "lwc1 $f0, -9($at)\n"
+      "lwc1 $f1, -5($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "ldc1 $f0, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "ldc1 $f0, 0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "ldc1 $f0, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "ldc1 $f0, -8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lwc1 $f0, -1($at)\n"
+      "lwc1 $f1, 3($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lwc1 $f0, 1($at)\n"
+      "lwc1 $f1, 5($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "ldc1 $f0, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "ldc1 $f0, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a0\n"
       "ldc1 $f0, 0($at)\n"
-      "lui $at, 1\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
+      "addu $at, $at, $a0\n"
+      "lwc1 $f0, 7($at)\n"
+      "lwc1 $f1, 11($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a0\n"
+      "lwc1 $f0, 1($at)\n"
+      "lwc1 $f1, 5($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
       "addu $at, $at, $a0\n"
       "ldc1 $f0, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "addu $at, $at, $a0\n"
-      "ldc1 $f0, 0($at)\n"
-      "ldc1 $f0, -256($a0)\n"
-      "ldc1 $f0, 0xFFFF8000($a0)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a0\n"
       "ldc1 $f0, 0($at)\n";
   DriverStr(expected, "LoadDFromOffset");
 }
 
 TEST_F(AssemblerMIPSTest, StoreToOffset) {
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A0, 0);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 256);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 1000);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0x8000);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0x10000);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0x12345678);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, -256);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0xFFFF8000);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0xABCDEF00);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x8000);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x7FF8);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x7FFB);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x7FFC);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x7FFF);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0xFFF0);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x8008);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x8001);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x8000);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0xFFF0);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x17FE8);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x0FFF8);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x0FFF1);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x0FFF1);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x0FFF8);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x17FE8);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x17FF0);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x17FE9);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x17FE9);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x17FF0);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x12345678);
 
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A0, 0);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 256);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 1000);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0x8000);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0x10000);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0x12345678);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, -256);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0xFFFF8000);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0xABCDEF00);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x8000);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x7FF8);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x7FFB);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x7FFC);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x7FFF);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0xFFF0);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x8008);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x8001);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x8000);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0xFFF0);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x17FE8);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x0FFF8);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x0FFF1);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x0FFF1);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x0FFF8);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x17FE8);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x17FF0);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x17FE9);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x17FE9);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x17FF0);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x12345678);
 
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A0, 0);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 256);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 1000);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0x8000);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0x10000);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0x12345678);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, -256);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0xFFFF8000);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0xABCDEF00);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x8000);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x7FF8);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x7FFB);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x7FFC);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x7FFF);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0xFFF0);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x8008);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x8001);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x8000);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0xFFF0);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x17FE8);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x0FFF8);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x0FFF1);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x0FFF1);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x0FFF8);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x17FE8);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x17FF0);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x17FE9);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x17FE9);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x17FF0);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x12345678);
 
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0);
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 256);
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 1000);
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0x8000);
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0x10000);
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0x12345678);
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -256);
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0xFFFF8000);
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0xABCDEF00);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x8000);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x7FF8);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x7FFB);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x7FFC);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x7FFF);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0xFFF0);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x8008);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x8001);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x8000);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0xFFF0);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x17FE8);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x0FFF8);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x0FFF1);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x0FFF1);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x0FFF8);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x17FE8);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x17FF0);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x17FE9);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x17FE9);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x17FF0);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x12345678);
 
   const char* expected =
-      "sb $a0, 0($a0)\n"
-      "sb $a0, 0($a1)\n"
-      "sb $a0, 256($a1)\n"
-      "sb $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x8000\n"
+      "sb $a3, -0x8000($a1)\n"
+      "sb $a3, 0($a1)\n"
+      "sb $a3, 0x7FF8($a1)\n"
+      "sb $a3, 0x7FFB($a1)\n"
+      "sb $a3, 0x7FFC($a1)\n"
+      "sb $a3, 0x7FFF($a1)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sb $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sb $a3, -0x10($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sb $a3, -9($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "sb $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "sb $a3, 0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sb $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sb $a3, -8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sb $a3, -1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sb $a3, 1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sb $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sb $a3, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "sb $a0, 0($at)\n"
-      "lui $at, 1\n"
+      "sb $a3, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "sb $a0, 0($at)\n"
+      "sb $a3, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a1\n"
+      "sb $a3, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a1\n"
+      "sb $a3, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a1\n"
-      "sb $a0, 0($at)\n"
-      "sb $a0, -256($a1)\n"
-      "sb $a0, 0xFFFF8000($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a1\n"
-      "sb $a0, 0($at)\n"
+      "sb $a3, 0($at)\n"
 
-      "sh $a0, 0($a0)\n"
-      "sh $a0, 0($a1)\n"
-      "sh $a0, 256($a1)\n"
-      "sh $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x8000\n"
+      "sh $a3, -0x8000($a1)\n"
+      "sh $a3, 0($a1)\n"
+      "sh $a3, 0x7FF8($a1)\n"
+      "sh $a3, 0x7FFB($a1)\n"
+      "sh $a3, 0x7FFC($a1)\n"
+      "sh $a3, 0x7FFF($a1)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sh $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sh $a3, -0x10($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sh $a3, -9($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "sh $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "sh $a3, 0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sh $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sh $a3, -8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sh $a3, -1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sh $a3, 1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sh $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sh $a3, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "sh $a0, 0($at)\n"
-      "lui $at, 1\n"
+      "sh $a3, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "sh $a0, 0($at)\n"
+      "sh $a3, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a1\n"
+      "sh $a3, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a1\n"
+      "sh $a3, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a1\n"
-      "sh $a0, 0($at)\n"
-      "sh $a0, -256($a1)\n"
-      "sh $a0, 0xFFFF8000($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a1\n"
-      "sh $a0, 0($at)\n"
+      "sh $a3, 0($at)\n"
 
-      "sw $a0, 0($a0)\n"
-      "sw $a0, 0($a1)\n"
-      "sw $a0, 256($a1)\n"
-      "sw $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x8000\n"
+      "sw $a3, -0x8000($a1)\n"
+      "sw $a3, 0($a1)\n"
+      "sw $a3, 0x7FF8($a1)\n"
+      "sw $a3, 0x7FFB($a1)\n"
+      "sw $a3, 0x7FFC($a1)\n"
+      "sw $a3, 0x7FFF($a1)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sw $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sw $a3, -0x10($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sw $a3, -9($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "sw $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "sw $a3, 0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sw $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sw $a3, -8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sw $a3, -1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sw $a3, 1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sw $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sw $a3, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "sw $a0, 0($at)\n"
-      "lui $at, 1\n"
+      "sw $a3, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "sw $a0, 0($at)\n"
+      "sw $a3, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a1\n"
+      "sw $a3, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a1\n"
+      "sw $a3, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a1\n"
-      "sw $a0, 0($at)\n"
-      "sw $a0, -256($a1)\n"
-      "sw $a0, 0xFFFF8000($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a1\n"
-      "sw $a0, 0($at)\n"
+      "sw $a3, 0($at)\n"
 
+      "sw $a0, -0x8000($a2)\n"
+      "sw $a1, -0x7FFC($a2)\n"
       "sw $a0, 0($a2)\n"
       "sw $a1, 4($a2)\n"
-      "sw $a0, 256($a2)\n"
-      "sw $a1, 260($a2)\n"
-      "sw $a0, 1000($a2)\n"
-      "sw $a1, 1004($a2)\n"
-      "ori $at, $zero, 0x8000\n"
+      "sw $a0, 0x7FF8($a2)\n"
+      "sw $a1, 0x7FFC($a2)\n"
+      "sw $a0, 0x7FFB($a2)\n"
+      "sw $a1, 0x7FFF($a2)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "sw $a0, 4($at)\n"
+      "sw $a1, 8($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "sw $a0, 7($at)\n"
+      "sw $a1, 11($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "sw $a0, -0x7FF8($at)\n"
+      "sw $a1, -0x7FF4($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "sw $a0, -0x10($at)\n"
+      "sw $a1, -0xC($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "sw $a0, -9($at)\n"
+      "sw $a1, -5($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "sw $a0, 8($at)\n"
+      "sw $a1, 12($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "sw $a0, 0x7FF8($at)\n"
+      "sw $a1, 0x7FFC($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sw $a0, -0x7FF8($at)\n"
+      "sw $a1, -0x7FF4($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sw $a0, -8($at)\n"
+      "sw $a1, -4($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sw $a0, -1($at)\n"
+      "sw $a1, 3($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sw $a0, 1($at)\n"
+      "sw $a1, 5($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sw $a0, 8($at)\n"
+      "sw $a1, 12($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sw $a0, 0x7FF8($at)\n"
+      "sw $a1, 0x7FFC($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a2\n"
       "sw $a0, 0($at)\n"
       "sw $a1, 4($at)\n"
-      "lui $at, 1\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
+      "addu $at, $at, $a2\n"
+      "sw $a0, 7($at)\n"
+      "sw $a1, 11($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a2\n"
+      "sw $a0, 1($at)\n"
+      "sw $a1, 5($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
       "addu $at, $at, $a2\n"
       "sw $a0, 0($at)\n"
       "sw $a1, 4($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "addu $at, $at, $a2\n"
-      "sw $a0, 0($at)\n"
-      "sw $a1, 4($at)\n"
-      "sw $a0, -256($a2)\n"
-      "sw $a1, -252($a2)\n"
-      "sw $a0, 0xFFFF8000($a2)\n"
-      "sw $a1, 0xFFFF8004($a2)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a2\n"
       "sw $a0, 0($at)\n"
       "sw $a1, 4($at)\n";
@@ -900,69 +1804,174 @@
 }
 
 TEST_F(AssemblerMIPSTest, StoreSToOffset) {
-  __ StoreSToOffset(mips::F0, mips::A0, 0);
-  __ StoreSToOffset(mips::F0, mips::A0, 4);
-  __ StoreSToOffset(mips::F0, mips::A0, 256);
-  __ StoreSToOffset(mips::F0, mips::A0, 0x8000);
-  __ StoreSToOffset(mips::F0, mips::A0, 0x10000);
-  __ StoreSToOffset(mips::F0, mips::A0, 0x12345678);
-  __ StoreSToOffset(mips::F0, mips::A0, -256);
-  __ StoreSToOffset(mips::F0, mips::A0, 0xFFFF8000);
-  __ StoreSToOffset(mips::F0, mips::A0, 0xABCDEF00);
+  __ StoreSToOffset(mips::F2, mips::A0, -0x8000);
+  __ StoreSToOffset(mips::F2, mips::A0, +0);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x7FF8);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x7FFB);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x7FFC);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x7FFF);
+  __ StoreSToOffset(mips::F2, mips::A0, -0xFFF0);
+  __ StoreSToOffset(mips::F2, mips::A0, -0x8008);
+  __ StoreSToOffset(mips::F2, mips::A0, -0x8001);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x8000);
+  __ StoreSToOffset(mips::F2, mips::A0, +0xFFF0);
+  __ StoreSToOffset(mips::F2, mips::A0, -0x17FE8);
+  __ StoreSToOffset(mips::F2, mips::A0, -0x0FFF8);
+  __ StoreSToOffset(mips::F2, mips::A0, -0x0FFF1);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x0FFF1);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x0FFF8);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x17FE8);
+  __ StoreSToOffset(mips::F2, mips::A0, -0x17FF0);
+  __ StoreSToOffset(mips::F2, mips::A0, -0x17FE9);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x17FE9);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x17FF0);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x12345678);
 
   const char* expected =
-      "swc1 $f0, 0($a0)\n"
-      "swc1 $f0, 4($a0)\n"
-      "swc1 $f0, 256($a0)\n"
-      "ori $at, $zero, 0x8000\n"
+      "swc1 $f2, -0x8000($a0)\n"
+      "swc1 $f2, 0($a0)\n"
+      "swc1 $f2, 0x7FF8($a0)\n"
+      "swc1 $f2, 0x7FFB($a0)\n"
+      "swc1 $f2, 0x7FFC($a0)\n"
+      "swc1 $f2, 0x7FFF($a0)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "swc1 $f2, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "swc1 $f2, -0x10($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "swc1 $f2, -9($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "swc1 $f2, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "swc1 $f2, 0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "swc1 $f2, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "swc1 $f2, -8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "swc1 $f2, -1($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "swc1 $f2, 1($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "swc1 $f2, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "swc1 $f2, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a0\n"
-      "swc1 $f0, 0($at)\n"
-      "lui $at, 1\n"
+      "swc1 $f2, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a0\n"
-      "swc1 $f0, 0($at)\n"
+      "swc1 $f2, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a0\n"
+      "swc1 $f2, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a0\n"
+      "swc1 $f2, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a0\n"
-      "swc1 $f0, 0($at)\n"
-      "swc1 $f0, -256($a0)\n"
-      "swc1 $f0, 0xFFFF8000($a0)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a0\n"
-      "swc1 $f0, 0($at)\n";
+      "swc1 $f2, 0($at)\n";
   DriverStr(expected, "StoreSToOffset");
 }
 
 TEST_F(AssemblerMIPSTest, StoreDToOffset) {
-  __ StoreDToOffset(mips::F0, mips::A0, 0);
-  __ StoreDToOffset(mips::F0, mips::A0, 4);
-  __ StoreDToOffset(mips::F0, mips::A0, 256);
-  __ StoreDToOffset(mips::F0, mips::A0, 0x8000);
-  __ StoreDToOffset(mips::F0, mips::A0, 0x10000);
-  __ StoreDToOffset(mips::F0, mips::A0, 0x12345678);
-  __ StoreDToOffset(mips::F0, mips::A0, -256);
-  __ StoreDToOffset(mips::F0, mips::A0, 0xFFFF8000);
-  __ StoreDToOffset(mips::F0, mips::A0, 0xABCDEF00);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x8000);
+  __ StoreDToOffset(mips::F0, mips::A0, +0);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x7FF8);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x7FFB);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x7FFC);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x7FFF);
+  __ StoreDToOffset(mips::F0, mips::A0, -0xFFF0);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x8008);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x8001);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x8000);
+  __ StoreDToOffset(mips::F0, mips::A0, +0xFFF0);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x17FE8);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x0FFF8);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x0FFF1);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x0FFF1);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x0FFF8);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x17FE8);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x17FF0);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x17FE9);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x17FE9);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x17FF0);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x12345678);
 
   const char* expected =
+      "sdc1 $f0, -0x8000($a0)\n"
       "sdc1 $f0, 0($a0)\n"
-      "swc1 $f0, 4($a0)\n"
-      "swc1 $f1, 8($a0)\n"
-      "sdc1 $f0, 256($a0)\n"
-      "ori $at, $zero, 0x8000\n"
+      "sdc1 $f0, 0x7FF8($a0)\n"
+      "swc1 $f0, 0x7FFB($a0)\n"
+      "swc1 $f1, 0x7FFF($a0)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "swc1 $f0, 4($at)\n"
+      "swc1 $f1, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "swc1 $f0, 7($at)\n"
+      "swc1 $f1, 11($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "sdc1 $f0, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "sdc1 $f0, -0x10($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "swc1 $f0, -9($at)\n"
+      "swc1 $f1, -5($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "sdc1 $f0, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "sdc1 $f0, 0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sdc1 $f0, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sdc1 $f0, -8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "swc1 $f0, -1($at)\n"
+      "swc1 $f1, 3($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "swc1 $f0, 1($at)\n"
+      "swc1 $f1, 5($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sdc1 $f0, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sdc1 $f0, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a0\n"
       "sdc1 $f0, 0($at)\n"
-      "lui $at, 1\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
+      "addu $at, $at, $a0\n"
+      "swc1 $f0, 7($at)\n"
+      "swc1 $f1, 11($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a0\n"
+      "swc1 $f0, 1($at)\n"
+      "swc1 $f1, 5($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
       "addu $at, $at, $a0\n"
       "sdc1 $f0, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "addu $at, $at, $a0\n"
-      "sdc1 $f0, 0($at)\n"
-      "sdc1 $f0, -256($a0)\n"
-      "sdc1 $f0, 0xFFFF8000($a0)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a0\n"
       "sdc1 $f0, 0($at)\n";
   DriverStr(expected, "StoreDToOffset");
@@ -1000,55 +2009,11 @@
 }
 
 TEST_F(AssemblerMIPSTest, Beq) {
-  mips::MipsLabel label;
-  __ Beq(mips::A0, mips::A1, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Beq(mips::A2, mips::A3, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "beq $a0, $a1, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "beq $a2, $a3, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Beq");
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq");
 }
 
 TEST_F(AssemblerMIPSTest, Bne) {
-  mips::MipsLabel label;
-  __ Bne(mips::A0, mips::A1, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bne(mips::A2, mips::A3, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "bne $a0, $a1, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "bne $a2, $a3, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bne");
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne");
 }
 
 TEST_F(AssemblerMIPSTest, Beqz) {
@@ -1104,107 +2069,19 @@
 }
 
 TEST_F(AssemblerMIPSTest, Bltz) {
-  mips::MipsLabel label;
-  __ Bltz(mips::A0, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bltz(mips::A1, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "bltz $a0, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "bltz $a1, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bltz");
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz");
 }
 
 TEST_F(AssemblerMIPSTest, Bgez) {
-  mips::MipsLabel label;
-  __ Bgez(mips::A0, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bgez(mips::A1, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "bgez $a0, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "bgez $a1, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bgez");
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez");
 }
 
 TEST_F(AssemblerMIPSTest, Blez) {
-  mips::MipsLabel label;
-  __ Blez(mips::A0, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Blez(mips::A1, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "blez $a0, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "blez $a1, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Blez");
+  BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez");
 }
 
 TEST_F(AssemblerMIPSTest, Bgtz) {
-  mips::MipsLabel label;
-  __ Bgtz(mips::A0, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bgtz(mips::A1, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "bgtz $a0, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "bgtz $a1, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bgtz");
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz");
 }
 
 TEST_F(AssemblerMIPSTest, Blt) {
@@ -1319,6 +2196,141 @@
   DriverStr(expected, "Bgeu");
 }
 
+TEST_F(AssemblerMIPSTest, Bc1f) {
+  mips::MipsLabel label;
+  __ Bc1f(0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bc1f(7, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bc1f $fcc0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "bc1f $fcc7, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bc1f");
+}
+
+TEST_F(AssemblerMIPSTest, Bc1t) {
+  mips::MipsLabel label;
+  __ Bc1t(0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bc1t(7, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bc1t $fcc0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "bc1t $fcc7, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bc1t");
+}
+
+///////////////////////
+// Loading Constants //
+///////////////////////
+
+TEST_F(AssemblerMIPSTest, LoadConst32) {
+  // IsUint<16>(value)
+  __ LoadConst32(mips::V0, 0);
+  __ LoadConst32(mips::V0, 65535);
+  // IsInt<16>(value)
+  __ LoadConst32(mips::V0, -1);
+  __ LoadConst32(mips::V0, -32768);
+  // Everything else
+  __ LoadConst32(mips::V0, 65536);
+  __ LoadConst32(mips::V0, 65537);
+  __ LoadConst32(mips::V0, 2147483647);
+  __ LoadConst32(mips::V0, -32769);
+  __ LoadConst32(mips::V0, -65536);
+  __ LoadConst32(mips::V0, -65537);
+  __ LoadConst32(mips::V0, -2147483647);
+  __ LoadConst32(mips::V0, -2147483648);
+
+  const char* expected =
+      // IsUint<16>(value)
+      "ori $v0, $zero, 0\n"         // __ LoadConst32(mips::V0, 0);
+      "ori $v0, $zero, 65535\n"     // __ LoadConst32(mips::V0, 65535);
+      // IsInt<16>(value)
+      "addiu $v0, $zero, -1\n"      // __ LoadConst32(mips::V0, -1);
+      "addiu $v0, $zero, -32768\n"  // __ LoadConst32(mips::V0, -32768);
+      // Everything else
+      "lui $v0, 1\n"                // __ LoadConst32(mips::V0, 65536);
+      "lui $v0, 1\n"                // __ LoadConst32(mips::V0, 65537);
+      "ori $v0, 1\n"                //                 "
+      "lui $v0, 32767\n"            // __ LoadConst32(mips::V0, 2147483647);
+      "ori $v0, 65535\n"            //                 "
+      "lui $v0, 65535\n"            // __ LoadConst32(mips::V0, -32769);
+      "ori $v0, 32767\n"            //                 "
+      "lui $v0, 65535\n"            // __ LoadConst32(mips::V0, -65536);
+      "lui $v0, 65534\n"            // __ LoadConst32(mips::V0, -65537);
+      "ori $v0, 65535\n"            //                 "
+      "lui $v0, 32768\n"            // __ LoadConst32(mips::V0, -2147483647);
+      "ori $v0, 1\n"                //                 "
+      "lui $v0, 32768\n";           // __ LoadConst32(mips::V0, -2147483648);
+  DriverStr(expected, "LoadConst32");
+}
+
+TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ BindPcRelBaseLabel();
+  __ LoadLiteral(mips::V0, mips::V1, literal);
+  constexpr size_t kAddiuCount = 0x1FDE;
+  for (size_t i = 0; i != kAddiuCount; ++i) {
+    __ Addiu(mips::A0, mips::A1, 0);
+  }
+
+  std::string expected =
+      "1:\n"
+      "lw $v0, %lo(2f - 1b)($v1)\n" +
+      RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") +
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadFarthestNearLiteral");
+}
+
+TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ BindPcRelBaseLabel();
+  __ LoadLiteral(mips::V0, mips::V1, literal);
+  constexpr size_t kAdduCount = 0x1FDF;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+
+  std::string expected =
+      "1:\n"
+      "lui $at, %hi(2f - 1b)\n"
+      "addu $at, $at, $v1\n"
+      "lw $v0, %lo(2f - 1b)($at)\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadNearestFarLiteral");
+}
+
 #undef __
 
 }  // namespace art
diff --git a/compiler/utils/mips/managed_register_mips.h b/compiler/utils/mips/managed_register_mips.h
index 40d39e3..66204e7 100644
--- a/compiler/utils/mips/managed_register_mips.h
+++ b/compiler/utils/mips/managed_register_mips.h
@@ -18,7 +18,7 @@
 #define ART_COMPILER_UTILS_MIPS_MANAGED_REGISTER_MIPS_H_
 
 #include "constants_mips.h"
-#include "dwarf/register.h"
+#include "debug/dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
@@ -87,70 +87,70 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class MipsManagedRegister : public ManagedRegister {
  public:
-  Register AsCoreRegister() const {
+  constexpr Register AsCoreRegister() const {
     CHECK(IsCoreRegister());
     return static_cast<Register>(id_);
   }
 
-  FRegister AsFRegister() const {
+  constexpr FRegister AsFRegister() const {
     CHECK(IsFRegister());
     return static_cast<FRegister>(id_ - kNumberOfCoreRegIds);
   }
 
-  DRegister AsDRegister() const {
+  constexpr DRegister AsDRegister() const {
     CHECK(IsDRegister());
     return static_cast<DRegister>(id_ - kNumberOfCoreRegIds - kNumberOfFRegIds);
   }
 
-  FRegister AsOverlappingDRegisterLow() const {
+  constexpr FRegister AsOverlappingDRegisterLow() const {
     CHECK(IsOverlappingDRegister());
     DRegister d_reg = AsDRegister();
     return static_cast<FRegister>(d_reg * 2);
   }
 
-  FRegister AsOverlappingDRegisterHigh() const {
+  constexpr FRegister AsOverlappingDRegisterHigh() const {
     CHECK(IsOverlappingDRegister());
     DRegister d_reg = AsDRegister();
     return static_cast<FRegister>(d_reg * 2 + 1);
   }
 
-  Register AsRegisterPairLow() const {
+  constexpr Register AsRegisterPairLow() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdLow().
     return FromRegId(AllocIdLow()).AsCoreRegister();
   }
 
-  Register AsRegisterPairHigh() const {
+  constexpr Register AsRegisterPairHigh() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdHigh().
     return FromRegId(AllocIdHigh()).AsCoreRegister();
   }
 
-  bool IsCoreRegister() const {
+  constexpr bool IsCoreRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCoreRegIds);
   }
 
-  bool IsFRegister() const {
+  constexpr bool IsFRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfCoreRegIds;
     return (0 <= test) && (test < kNumberOfFRegIds);
   }
 
-  bool IsDRegister() const {
+  constexpr bool IsDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCoreRegIds + kNumberOfFRegIds);
     return (0 <= test) && (test < kNumberOfDRegIds);
   }
 
   // Returns true if this DRegister overlaps FRegisters.
-  bool IsOverlappingDRegister() const {
+  constexpr bool IsOverlappingDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCoreRegIds + kNumberOfFRegIds);
     return (0 <= test) && (test < kNumberOfOverlappingDRegIds);
   }
 
-  bool IsRegisterPair() const {
+  constexpr bool IsRegisterPair() const {
     CHECK(IsValidManagedRegister());
     const int test =
         id_ - (kNumberOfCoreRegIds + kNumberOfFRegIds + kNumberOfDRegIds);
@@ -164,32 +164,32 @@
   // then false is returned.
   bool Overlaps(const MipsManagedRegister& other) const;
 
-  static MipsManagedRegister FromCoreRegister(Register r) {
+  static constexpr MipsManagedRegister FromCoreRegister(Register r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static MipsManagedRegister FromFRegister(FRegister r) {
+  static constexpr MipsManagedRegister FromFRegister(FRegister r) {
     CHECK_NE(r, kNoFRegister);
     return FromRegId(r + kNumberOfCoreRegIds);
   }
 
-  static MipsManagedRegister FromDRegister(DRegister r) {
+  static constexpr MipsManagedRegister FromDRegister(DRegister r) {
     CHECK_NE(r, kNoDRegister);
     return FromRegId(r + kNumberOfCoreRegIds + kNumberOfFRegIds);
   }
 
-  static MipsManagedRegister FromRegisterPair(RegisterPair r) {
+  static constexpr MipsManagedRegister FromRegisterPair(RegisterPair r) {
     CHECK_NE(r, kNoRegisterPair);
     return FromRegId(r + (kNumberOfCoreRegIds + kNumberOfFRegIds + kNumberOfDRegIds));
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -205,9 +205,9 @@
 
   friend class ManagedRegister;
 
-  explicit MipsManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr MipsManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static MipsManagedRegister FromRegId(int reg_id) {
+  static constexpr MipsManagedRegister FromRegId(int reg_id) {
     MipsManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -218,7 +218,7 @@
 
 }  // namespace mips
 
-inline mips::MipsManagedRegister ManagedRegister::AsMips() const {
+constexpr inline mips::MipsManagedRegister ManagedRegister::AsMips() const {
   mips::MipsManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 00e8995..a2621cb 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -19,15 +19,78 @@
 #include "base/bit_utils.h"
 #include "base/casts.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "memory_region.h"
 #include "thread.h"
 
 namespace art {
 namespace mips64 {
 
+static_assert(static_cast<size_t>(kMips64PointerSize) == kMips64DoublewordSize,
+              "Unexpected Mips64 pointer size.");
+static_assert(kMips64PointerSize == PointerSize::k64, "Unexpected Mips64 pointer size.");
+
+
+void Mips64Assembler::FinalizeCode() {
+  for (auto& exception_block : exception_blocks_) {
+    EmitExceptionPoll(&exception_block);
+  }
+  PromoteBranches();
+}
+
+void Mips64Assembler::FinalizeInstructions(const MemoryRegion& region) {
+  EmitBranches();
+  Assembler::FinalizeInstructions(region);
+  PatchCFI();
+}
+
+void Mips64Assembler::PatchCFI() {
+  if (cfi().NumberOfDelayedAdvancePCs() == 0u) {
+    return;
+  }
+
+  typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC;
+  const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC();
+  const std::vector<uint8_t>& old_stream = data.first;
+  const std::vector<DelayedAdvancePC>& advances = data.second;
+
+  // Refill our data buffer with patched opcodes.
+  cfi().ReserveCFIStream(old_stream.size() + advances.size() + 16);
+  size_t stream_pos = 0;
+  for (const DelayedAdvancePC& advance : advances) {
+    DCHECK_GE(advance.stream_pos, stream_pos);
+    // Copy old data up to the point where advance was issued.
+    cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos);
+    stream_pos = advance.stream_pos;
+    // Insert the advance command with its final offset.
+    size_t final_pc = GetAdjustedPosition(advance.pc);
+    cfi().AdvancePC(final_pc);
+  }
+  // Copy the final segment if any.
+  cfi().AppendRawData(old_stream, stream_pos, old_stream.size());
+}
+
+void Mips64Assembler::EmitBranches() {
+  CHECK(!overwriting_);
+  // Switch from appending instructions at the end of the buffer to overwriting
+  // existing instructions (branch placeholders) in the buffer.
+  overwriting_ = true;
+  for (auto& branch : branches_) {
+    EmitBranch(&branch);
+  }
+  overwriting_ = false;
+}
+
 void Mips64Assembler::Emit(uint32_t value) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  buffer_.Emit<uint32_t>(value);
+  if (overwriting_) {
+    // Branches to labels are emitted into their placeholders here.
+    buffer_.Store<uint32_t>(overwrite_location_, value);
+    overwrite_location_ += sizeof(uint32_t);
+  } else {
+    // Other instructions are simply appended at the end here.
+    AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+    buffer_.Emit<uint32_t>(value);
+  }
 }
 
 void Mips64Assembler::EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd,
@@ -82,15 +145,16 @@
 
 void Mips64Assembler::EmitI21(int opcode, GpuRegister rs, uint32_t imm21) {
   CHECK_NE(rs, kNoGpuRegister);
+  CHECK(IsUint<21>(imm21)) << imm21;
   uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
                       static_cast<uint32_t>(rs) << kRsShift |
-                      (imm21 & 0x1FFFFF);
+                      imm21;
   Emit(encoding);
 }
 
-void Mips64Assembler::EmitJ(int opcode, uint32_t addr26) {
-  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
-                      (addr26 & 0x3FFFFFF);
+void Mips64Assembler::EmitI26(int opcode, uint32_t imm26) {
+  CHECK(IsUint<26>(imm26)) << imm26;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | imm26;
   Emit(encoding);
 }
 
@@ -117,14 +181,6 @@
   Emit(encoding);
 }
 
-void Mips64Assembler::Add(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, rd, 0, 0x20);
-}
-
-void Mips64Assembler::Addi(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
-  EmitI(0x8, rs, rt, imm16);
-}
-
 void Mips64Assembler::Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x21);
 }
@@ -141,10 +197,6 @@
   EmitI(0x19, rs, rt, imm16);
 }
 
-void Mips64Assembler::Sub(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, rd, 0, 0x22);
-}
-
 void Mips64Assembler::Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x23);
 }
@@ -153,50 +205,14 @@
   EmitR(0, rs, rt, rd, 0, 0x2f);
 }
 
-void Mips64Assembler::MultR2(GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x18);
-}
-
-void Mips64Assembler::MultuR2(GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x19);
-}
-
-void Mips64Assembler::DivR2(GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1a);
-}
-
-void Mips64Assembler::DivuR2(GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1b);
-}
-
-void Mips64Assembler::MulR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  EmitR(0x1c, rs, rt, rd, 0, 2);
-}
-
-void Mips64Assembler::DivR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  DivR2(rs, rt);
-  Mflo(rd);
-}
-
-void Mips64Assembler::ModR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  DivR2(rs, rt);
-  Mfhi(rd);
-}
-
-void Mips64Assembler::DivuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  DivuR2(rs, rt);
-  Mflo(rd);
-}
-
-void Mips64Assembler::ModuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  DivuR2(rs, rt);
-  Mfhi(rd);
-}
-
 void Mips64Assembler::MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 2, 0x18);
 }
 
+void Mips64Assembler::MuhR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 3, 0x18);
+}
+
 void Mips64Assembler::DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 2, 0x1a);
 }
@@ -217,6 +233,10 @@
   EmitR(0, rs, rt, rd, 2, 0x1c);
 }
 
+void Mips64Assembler::Dmuh(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 3, 0x1c);
+}
+
 void Mips64Assembler::Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 2, 0x1e);
 }
@@ -285,10 +305,17 @@
   EmitRtd(0x1f, rt, rd, 0x5, 0x24);
 }
 
-void Mips64Assembler::Dext(GpuRegister rt, GpuRegister rs, int pos, int size_less_one) {
-  DCHECK(0 <= pos && pos < 32) << pos;
-  DCHECK(0 <= size_less_one && size_less_one < 32) << size_less_one;
-  EmitR(0x1f, rs, rt, static_cast<GpuRegister>(size_less_one), pos, 3);
+void Mips64Assembler::Dext(GpuRegister rt, GpuRegister rs, int pos, int size) {
+  CHECK(IsUint<5>(pos)) << pos;
+  CHECK(IsUint<5>(size - 1)) << size;
+  EmitR(0x1f, rs, rt, static_cast<GpuRegister>(size - 1), pos, 0x3);
+}
+
+void Mips64Assembler::Dinsu(GpuRegister rt, GpuRegister rs, int pos, int size) {
+  CHECK(IsUint<5>(pos - 32)) << pos;
+  CHECK(IsUint<5>(size - 1)) << size;
+  CHECK(IsUint<5>(pos + size - 33)) << pos << " + " << size;
+  EmitR(0x1f, rs, rt, static_cast<GpuRegister>(pos + size - 33), pos - 32, 0x6);
 }
 
 void Mips64Assembler::Wsbh(GpuRegister rd, GpuRegister rt) {
@@ -296,22 +323,22 @@
 }
 
 void Mips64Assembler::Sc(GpuRegister rt, GpuRegister base, int16_t imm9) {
-  DCHECK((-256 <= imm9) && (imm9 < 256));
+  CHECK(IsInt<9>(imm9));
   EmitI(0x1f, base, rt, ((imm9 & 0x1FF) << 7) | 0x26);
 }
 
 void Mips64Assembler::Scd(GpuRegister rt, GpuRegister base, int16_t imm9) {
-  DCHECK((-256 <= imm9) && (imm9 < 256));
+  CHECK(IsInt<9>(imm9));
   EmitI(0x1f, base, rt, ((imm9 & 0x1FF) << 7) | 0x27);
 }
 
 void Mips64Assembler::Ll(GpuRegister rt, GpuRegister base, int16_t imm9) {
-  DCHECK((-256 <= imm9) && (imm9 < 256));
+  CHECK(IsInt<9>(imm9));
   EmitI(0x1f, base, rt, ((imm9 & 0x1FF) << 7) | 0x36);
 }
 
 void Mips64Assembler::Lld(GpuRegister rt, GpuRegister base, int16_t imm9) {
-  DCHECK((-256 <= imm9) && (imm9 < 256));
+  CHECK(IsInt<9>(imm9));
   EmitI(0x1f, base, rt, ((imm9 & 0x1FF) << 7) | 0x37);
 }
 
@@ -440,14 +467,6 @@
            static_cast<GpuRegister>(0), stype & 0x1f, 0xf);
 }
 
-void Mips64Assembler::Mfhi(GpuRegister rd) {
-  EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rd, 0, 0x10);
-}
-
-void Mips64Assembler::Mflo(GpuRegister rd) {
-  EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rd, 0, 0x12);
-}
-
 void Mips64Assembler::Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
   EmitI(0x28, rs, rt, imm16);
 }
@@ -480,26 +499,6 @@
   EmitI(0xb, rs, rt, imm16);
 }
 
-void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
-  EmitI(0x4, rs, rt, imm16);
-  Nop();
-}
-
-void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
-  EmitI(0x5, rs, rt, imm16);
-  Nop();
-}
-
-void Mips64Assembler::J(uint32_t addr26) {
-  EmitJ(0x2, addr26);
-  Nop();
-}
-
-void Mips64Assembler::Jal(uint32_t addr26) {
-  EmitJ(0x3, addr26);
-  Nop();
-}
-
 void Mips64Assembler::Seleqz(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x35);
 }
@@ -526,7 +525,6 @@
 
 void Mips64Assembler::Jalr(GpuRegister rd, GpuRegister rs) {
   EmitR(0, rs, static_cast<GpuRegister>(0), rd, 0, 0x09);
-  Nop();
 }
 
 void Mips64Assembler::Jalr(GpuRegister rs) {
@@ -541,6 +539,15 @@
   EmitI(0x3B, rs, static_cast<GpuRegister>(0x1E), imm16);
 }
 
+void Mips64Assembler::Addiupc(GpuRegister rs, uint32_t imm19) {
+  CHECK(IsUint<19>(imm19)) << imm19;
+  EmitI21(0x3B, rs, imm19);
+}
+
+void Mips64Assembler::Bc(uint32_t imm26) {
+  EmitI26(0x32, imm26);
+}
+
 void Mips64Assembler::Jic(GpuRegister rt, uint16_t imm16) {
   EmitI(0x36, static_cast<GpuRegister>(0), rt, imm16);
 }
@@ -601,14 +608,14 @@
   CHECK_NE(rs, ZERO);
   CHECK_NE(rt, ZERO);
   CHECK_NE(rs, rt);
-  EmitI(0x8, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16);
+  EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16);
 }
 
 void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
   CHECK_NE(rs, ZERO);
   CHECK_NE(rt, ZERO);
   CHECK_NE(rs, rt);
-  EmitI(0x18, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16);
+  EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16);
 }
 
 void Mips64Assembler::Beqzc(GpuRegister rs, uint32_t imm21) {
@@ -621,6 +628,81 @@
   EmitI21(0x3E, rs, imm21);
 }
 
+void Mips64Assembler::Bc1eqz(FpuRegister ft, uint16_t imm16) {
+  EmitFI(0x11, 0x9, ft, imm16);
+}
+
+void Mips64Assembler::Bc1nez(FpuRegister ft, uint16_t imm16) {
+  EmitFI(0x11, 0xD, ft, imm16);
+}
+
+void Mips64Assembler::EmitBcondc(BranchCondition cond,
+                                 GpuRegister rs,
+                                 GpuRegister rt,
+                                 uint32_t imm16_21) {
+  switch (cond) {
+    case kCondLT:
+      Bltc(rs, rt, imm16_21);
+      break;
+    case kCondGE:
+      Bgec(rs, rt, imm16_21);
+      break;
+    case kCondLE:
+      Bgec(rt, rs, imm16_21);
+      break;
+    case kCondGT:
+      Bltc(rt, rs, imm16_21);
+      break;
+    case kCondLTZ:
+      CHECK_EQ(rt, ZERO);
+      Bltzc(rs, imm16_21);
+      break;
+    case kCondGEZ:
+      CHECK_EQ(rt, ZERO);
+      Bgezc(rs, imm16_21);
+      break;
+    case kCondLEZ:
+      CHECK_EQ(rt, ZERO);
+      Blezc(rs, imm16_21);
+      break;
+    case kCondGTZ:
+      CHECK_EQ(rt, ZERO);
+      Bgtzc(rs, imm16_21);
+      break;
+    case kCondEQ:
+      Beqc(rs, rt, imm16_21);
+      break;
+    case kCondNE:
+      Bnec(rs, rt, imm16_21);
+      break;
+    case kCondEQZ:
+      CHECK_EQ(rt, ZERO);
+      Beqzc(rs, imm16_21);
+      break;
+    case kCondNEZ:
+      CHECK_EQ(rt, ZERO);
+      Bnezc(rs, imm16_21);
+      break;
+    case kCondLTU:
+      Bltuc(rs, rt, imm16_21);
+      break;
+    case kCondGEU:
+      Bgeuc(rs, rt, imm16_21);
+      break;
+    case kCondF:
+      CHECK_EQ(rt, ZERO);
+      Bc1eqz(static_cast<FpuRegister>(rs), imm16_21);
+      break;
+    case kCondT:
+      CHECK_EQ(rt, ZERO);
+      Bc1nez(static_cast<FpuRegister>(rs), imm16_21);
+      break;
+    case kUncond:
+      LOG(FATAL) << "Unexpected branch condition " << cond;
+      UNREACHABLE();
+  }
+}
+
 void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
   EmitFR(0x11, 0x10, ft, fs, fd, 0x0);
 }
@@ -701,6 +783,22 @@
   EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xc);
 }
 
+void Mips64Assembler::TruncLS(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x9);
+}
+
+void Mips64Assembler::TruncLD(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0x9);
+}
+
+void Mips64Assembler::TruncWS(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xd);
+}
+
+void Mips64Assembler::TruncWD(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xd);
+}
+
 void Mips64Assembler::CeilLS(FpuRegister fd, FpuRegister fs) {
   EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xa);
 }
@@ -773,6 +871,86 @@
   EmitFR(0x11, 0x11, ft, fs, fd, 0x1e);
 }
 
+void Mips64Assembler::CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x01);
+}
+
+void Mips64Assembler::CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x02);
+}
+
+void Mips64Assembler::CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x03);
+}
+
+void Mips64Assembler::CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x04);
+}
+
+void Mips64Assembler::CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x05);
+}
+
+void Mips64Assembler::CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x06);
+}
+
+void Mips64Assembler::CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x07);
+}
+
+void Mips64Assembler::CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x11);
+}
+
+void Mips64Assembler::CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x12);
+}
+
+void Mips64Assembler::CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x13);
+}
+
+void Mips64Assembler::CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x01);
+}
+
+void Mips64Assembler::CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x02);
+}
+
+void Mips64Assembler::CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x03);
+}
+
+void Mips64Assembler::CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x04);
+}
+
+void Mips64Assembler::CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x05);
+}
+
+void Mips64Assembler::CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x06);
+}
+
+void Mips64Assembler::CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x07);
+}
+
+void Mips64Assembler::CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x11);
+}
+
+void Mips64Assembler::CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x12);
+}
+
+void Mips64Assembler::CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x13);
+}
+
 void Mips64Assembler::Cvtsw(FpuRegister fd, FpuRegister fs) {
   EmitFR(0x11, 0x14, static_cast<FpuRegister>(0), fs, fd, 0x20);
 }
@@ -801,10 +979,18 @@
   EmitFR(0x11, 0x00, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
 }
 
+void Mips64Assembler::Mfhc1(GpuRegister rt, FpuRegister fs) {
+  EmitFR(0x11, 0x03, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
+}
+
 void Mips64Assembler::Mtc1(GpuRegister rt, FpuRegister fs) {
   EmitFR(0x11, 0x04, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
 }
 
+void Mips64Assembler::Mthc1(GpuRegister rt, FpuRegister fs) {
+  EmitFR(0x11, 0x07, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
+}
+
 void Mips64Assembler::Dmfc1(GpuRegister rt, FpuRegister fs) {
   EmitFR(0x11, 0x01, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
 }
@@ -892,45 +1078,58 @@
   } else if ((value & 0xFFFF) == 0 && ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) {
     Lui(rd, value >> 16);
     Dati(rd, (value >> 48) + bit31);
+  } else if (IsPowerOfTwo(value + UINT64_C(1))) {
+    int shift_cnt = 64 - CTZ(value + UINT64_C(1));
+    Daddiu(rd, ZERO, -1);
+    if (shift_cnt < 32) {
+      Dsrl(rd, rd, shift_cnt);
+    } else {
+      Dsrl32(rd, rd, shift_cnt & 31);
+    }
   } else {
     int shift_cnt = CTZ(value);
     int64_t tmp = value >> shift_cnt;
     if (IsUint<16>(tmp)) {
       Ori(rd, ZERO, tmp);
-      if (shift_cnt < 32)
+      if (shift_cnt < 32) {
         Dsll(rd, rd, shift_cnt);
-      else
+      } else {
         Dsll32(rd, rd, shift_cnt & 31);
+      }
     } else if (IsInt<16>(tmp)) {
       Daddiu(rd, ZERO, tmp);
-      if (shift_cnt < 32)
+      if (shift_cnt < 32) {
         Dsll(rd, rd, shift_cnt);
-      else
+      } else {
         Dsll32(rd, rd, shift_cnt & 31);
+      }
     } else if (IsInt<32>(tmp)) {
       // Loads with 3 instructions.
       Lui(rd, tmp >> 16);
       Ori(rd, rd, tmp);
-      if (shift_cnt < 32)
+      if (shift_cnt < 32) {
         Dsll(rd, rd, shift_cnt);
-      else
+      } else {
         Dsll32(rd, rd, shift_cnt & 31);
+      }
     } else {
       shift_cnt = 16 + CTZ(value >> 16);
       tmp = value >> shift_cnt;
       if (IsUint<16>(tmp)) {
         Ori(rd, ZERO, tmp);
-        if (shift_cnt < 32)
+        if (shift_cnt < 32) {
           Dsll(rd, rd, shift_cnt);
-        else
+        } else {
           Dsll32(rd, rd, shift_cnt & 31);
+        }
         Ori(rd, rd, value);
       } else if (IsInt<16>(tmp)) {
         Daddiu(rd, ZERO, tmp);
-        if (shift_cnt < 32)
+        if (shift_cnt < 32) {
           Dsll(rd, rd, shift_cnt);
-        else
+        } else {
           Dsll32(rd, rd, shift_cnt & 31);
+        }
         Ori(rd, rd, value);
       } else {
         // Loads with 3-4 instructions.
@@ -941,10 +1140,11 @@
           used_lui = true;
         }
         if ((tmp2 & 0xFFFF) != 0) {
-          if (used_lui)
+          if (used_lui) {
             Ori(rd, rd, tmp2);
-          else
+          } else {
             Ori(rd, ZERO, tmp2);
+          }
         }
         if (bit31) {
           tmp2 += UINT64_C(0x100000000);
@@ -963,15 +1163,6 @@
   }
 }
 
-void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp) {
-  if (IsInt<16>(value)) {
-    Addiu(rt, rs, value);
-  } else {
-    LoadConst32(rtmp, value);
-    Addu(rt, rs, rtmp);
-  }
-}
-
 void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) {
   if (IsInt<16>(value)) {
     Daddiu(rt, rs, value);
@@ -981,186 +1172,648 @@
   }
 }
 
-//
-// MIPS64R6 branches
-//
-//
-// Unconditional (pc + 32-bit signed offset):
-//
-//   auipc    at, ofs_high
-//   jic      at, ofs_low
-//   // no delay/forbidden slot
-//
-//
-// Conditional (pc + 32-bit signed offset):
-//
-//   b<cond>c   reg, +2      // skip next 2 instructions
-//   auipc      at, ofs_high
-//   jic        at, ofs_low
-//   // no delay/forbidden slot
-//
-//
-// Unconditional (pc + 32-bit signed offset) and link:
-//
-//   auipc    reg, ofs_high
-//   daddiu   reg, ofs_low
-//   jialc    reg, 0
-//   // no delay/forbidden slot
-//
-//
-// TODO: use shorter instruction sequences whenever possible.
-//
+void Mips64Assembler::Branch::InitShortOrLong(Mips64Assembler::Branch::OffsetBits offset_size,
+                                              Mips64Assembler::Branch::Type short_type,
+                                              Mips64Assembler::Branch::Type long_type) {
+  type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type;
+}
 
-void Mips64Assembler::Bind(Label* label) {
+void Mips64Assembler::Branch::InitializeType(bool is_call) {
+  OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_);
+  if (is_call) {
+    InitShortOrLong(offset_size, kCall, kLongCall);
+  } else if (condition_ == kUncond) {
+    InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
+  } else {
+    if (condition_ == kCondEQZ || condition_ == kCondNEZ) {
+      // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
+      type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch;
+    } else {
+      InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+    }
+  }
+  old_type_ = type_;
+}
+
+bool Mips64Assembler::Branch::IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs) {
+  switch (condition) {
+    case kCondLT:
+    case kCondGT:
+    case kCondNE:
+    case kCondLTU:
+      return lhs == rhs;
+    default:
+      return false;
+  }
+}
+
+bool Mips64Assembler::Branch::IsUncond(BranchCondition condition,
+                                       GpuRegister lhs,
+                                       GpuRegister rhs) {
+  switch (condition) {
+    case kUncond:
+      return true;
+    case kCondGE:
+    case kCondLE:
+    case kCondEQ:
+    case kCondGEU:
+      return lhs == rhs;
+    default:
+      return false;
+  }
+}
+
+Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(ZERO),
+      rhs_reg_(ZERO),
+      condition_(kUncond) {
+  InitializeType(false);
+}
+
+Mips64Assembler::Branch::Branch(uint32_t location,
+                                uint32_t target,
+                                Mips64Assembler::BranchCondition condition,
+                                GpuRegister lhs_reg,
+                                GpuRegister rhs_reg)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(lhs_reg),
+      rhs_reg_(rhs_reg),
+      condition_(condition) {
+  CHECK_NE(condition, kUncond);
+  switch (condition) {
+    case kCondEQ:
+    case kCondNE:
+    case kCondLT:
+    case kCondGE:
+    case kCondLE:
+    case kCondGT:
+    case kCondLTU:
+    case kCondGEU:
+      CHECK_NE(lhs_reg, ZERO);
+      CHECK_NE(rhs_reg, ZERO);
+      break;
+    case kCondLTZ:
+    case kCondGEZ:
+    case kCondLEZ:
+    case kCondGTZ:
+    case kCondEQZ:
+    case kCondNEZ:
+      CHECK_NE(lhs_reg, ZERO);
+      CHECK_EQ(rhs_reg, ZERO);
+      break;
+    case kCondF:
+    case kCondT:
+      CHECK_EQ(rhs_reg, ZERO);
+      break;
+    case kUncond:
+      UNREACHABLE();
+  }
+  CHECK(!IsNop(condition, lhs_reg, rhs_reg));
+  if (IsUncond(condition, lhs_reg, rhs_reg)) {
+    // Branch condition is always true, make the branch unconditional.
+    condition_ = kUncond;
+  }
+  InitializeType(false);
+}
+
+Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(indirect_reg),
+      rhs_reg_(ZERO),
+      condition_(kUncond) {
+  CHECK_NE(indirect_reg, ZERO);
+  CHECK_NE(indirect_reg, AT);
+  InitializeType(true);
+}
+
+Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition(
+    Mips64Assembler::BranchCondition cond) {
+  switch (cond) {
+    case kCondLT:
+      return kCondGE;
+    case kCondGE:
+      return kCondLT;
+    case kCondLE:
+      return kCondGT;
+    case kCondGT:
+      return kCondLE;
+    case kCondLTZ:
+      return kCondGEZ;
+    case kCondGEZ:
+      return kCondLTZ;
+    case kCondLEZ:
+      return kCondGTZ;
+    case kCondGTZ:
+      return kCondLEZ;
+    case kCondEQ:
+      return kCondNE;
+    case kCondNE:
+      return kCondEQ;
+    case kCondEQZ:
+      return kCondNEZ;
+    case kCondNEZ:
+      return kCondEQZ;
+    case kCondLTU:
+      return kCondGEU;
+    case kCondGEU:
+      return kCondLTU;
+    case kCondF:
+      return kCondT;
+    case kCondT:
+      return kCondF;
+    case kUncond:
+      LOG(FATAL) << "Unexpected branch condition " << cond;
+  }
+  UNREACHABLE();
+}
+
+Mips64Assembler::Branch::Type Mips64Assembler::Branch::GetType() const {
+  return type_;
+}
+
+Mips64Assembler::BranchCondition Mips64Assembler::Branch::GetCondition() const {
+  return condition_;
+}
+
+GpuRegister Mips64Assembler::Branch::GetLeftRegister() const {
+  return lhs_reg_;
+}
+
+GpuRegister Mips64Assembler::Branch::GetRightRegister() const {
+  return rhs_reg_;
+}
+
+uint32_t Mips64Assembler::Branch::GetTarget() const {
+  return target_;
+}
+
+uint32_t Mips64Assembler::Branch::GetLocation() const {
+  return location_;
+}
+
+uint32_t Mips64Assembler::Branch::GetOldLocation() const {
+  return old_location_;
+}
+
+uint32_t Mips64Assembler::Branch::GetLength() const {
+  return branch_info_[type_].length;
+}
+
+uint32_t Mips64Assembler::Branch::GetOldLength() const {
+  return branch_info_[old_type_].length;
+}
+
+uint32_t Mips64Assembler::Branch::GetSize() const {
+  return GetLength() * sizeof(uint32_t);
+}
+
+uint32_t Mips64Assembler::Branch::GetOldSize() const {
+  return GetOldLength() * sizeof(uint32_t);
+}
+
+uint32_t Mips64Assembler::Branch::GetEndLocation() const {
+  return GetLocation() + GetSize();
+}
+
+uint32_t Mips64Assembler::Branch::GetOldEndLocation() const {
+  return GetOldLocation() + GetOldSize();
+}
+
+bool Mips64Assembler::Branch::IsLong() const {
+  switch (type_) {
+    // Short branches.
+    case kUncondBranch:
+    case kCondBranch:
+    case kCall:
+      return false;
+    // Long branches.
+    case kLongUncondBranch:
+    case kLongCondBranch:
+    case kLongCall:
+      return true;
+  }
+  UNREACHABLE();
+}
+
+bool Mips64Assembler::Branch::IsResolved() const {
+  return target_ != kUnresolved;
+}
+
+Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSize() const {
+  OffsetBits offset_size =
+      (type_ == kCondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ))
+          ? kOffset23
+          : branch_info_[type_].offset_size;
+  return offset_size;
+}
+
+Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSizeNeeded(uint32_t location,
+                                                                                 uint32_t target) {
+  // For unresolved targets assume the shortest encoding
+  // (later it will be made longer if needed).
+  if (target == kUnresolved)
+    return kOffset16;
+  int64_t distance = static_cast<int64_t>(target) - location;
+  // To simplify calculations in composite branches consisting of multiple instructions
+  // bump up the distance by a value larger than the max byte size of a composite branch.
+  distance += (distance >= 0) ? kMaxBranchSize : -kMaxBranchSize;
+  if (IsInt<kOffset16>(distance))
+    return kOffset16;
+  else if (IsInt<kOffset18>(distance))
+    return kOffset18;
+  else if (IsInt<kOffset21>(distance))
+    return kOffset21;
+  else if (IsInt<kOffset23>(distance))
+    return kOffset23;
+  else if (IsInt<kOffset28>(distance))
+    return kOffset28;
+  return kOffset32;
+}
+
+void Mips64Assembler::Branch::Resolve(uint32_t target) {
+  target_ = target;
+}
+
+void Mips64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) {
+  if (location_ > expand_location) {
+    location_ += delta;
+  }
+  if (!IsResolved()) {
+    return;  // Don't know the target yet.
+  }
+  if (target_ > expand_location) {
+    target_ += delta;
+  }
+}
+
+void Mips64Assembler::Branch::PromoteToLong() {
+  switch (type_) {
+    // Short branches.
+    case kUncondBranch:
+      type_ = kLongUncondBranch;
+      break;
+    case kCondBranch:
+      type_ = kLongCondBranch;
+      break;
+    case kCall:
+      type_ = kLongCall;
+      break;
+    default:
+      // Note: 'type_' is already long.
+      break;
+  }
+  CHECK(IsLong());
+}
+
+uint32_t Mips64Assembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) {
+  // If the branch is still unresolved or already long, nothing to do.
+  if (IsLong() || !IsResolved()) {
+    return 0;
+  }
+  // Promote the short branch to long if the offset size is too small
+  // to hold the distance between location_ and target_.
+  if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) {
+    PromoteToLong();
+    uint32_t old_size = GetOldSize();
+    uint32_t new_size = GetSize();
+    CHECK_GT(new_size, old_size);
+    return new_size - old_size;
+  }
+  // The following logic is for debugging/testing purposes.
+  // Promote some short branches to long when it's not really required.
+  if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) {
+    int64_t distance = static_cast<int64_t>(target_) - location_;
+    distance = (distance >= 0) ? distance : -distance;
+    if (distance >= max_short_distance) {
+      PromoteToLong();
+      uint32_t old_size = GetOldSize();
+      uint32_t new_size = GetSize();
+      CHECK_GT(new_size, old_size);
+      return new_size - old_size;
+    }
+  }
+  return 0;
+}
+
+uint32_t Mips64Assembler::Branch::GetOffsetLocation() const {
+  return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t);
+}
+
+uint32_t Mips64Assembler::Branch::GetOffset() const {
+  CHECK(IsResolved());
+  uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize());
+  // Calculate the byte distance between instructions and also account for
+  // different PC-relative origins.
+  uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t);
+  // Prepare the offset for encoding into the instruction(s).
+  offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift;
+  return offset;
+}
+
+Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) {
+  CHECK_LT(branch_id, branches_.size());
+  return &branches_[branch_id];
+}
+
+const Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) const {
+  CHECK_LT(branch_id, branches_.size());
+  return &branches_[branch_id];
+}
+
+void Mips64Assembler::Bind(Mips64Label* label) {
   CHECK(!label->IsBound());
-  int32_t bound_pc = buffer_.Size();
+  uint32_t bound_pc = buffer_.Size();
 
-  // Walk the list of the branches (auipc + jic pairs) referring to and preceding this label.
-  // Embed the previously unknown pc-relative addresses in them.
+  // Walk the list of branches referring to and preceding this label.
+  // Store the previously unknown target addresses in them.
   while (label->IsLinked()) {
-    int32_t position = label->Position();
-    // Extract the branch (instruction pair)
-    uint32_t auipc = buffer_.Load<uint32_t>(position);
-    uint32_t jic = buffer_.Load<uint32_t>(position + 4);  // actually, jic or daddiu
+    uint32_t branch_id = label->Position();
+    Branch* branch = GetBranch(branch_id);
+    branch->Resolve(bound_pc);
 
-    // Extract the location of the previous pair in the list (walking the list backwards;
-    // the previous pair location was stored in the immediate operands of the instructions)
-    int32_t prev = (auipc << 16) | (jic & 0xFFFF);
-
-    // Get the pc-relative address
-    uint32_t offset = bound_pc - position;
-    offset += (offset & 0x8000) << 1;  // account for sign extension in jic/daddiu
-
-    // Embed it in the two instructions
-    auipc = (auipc & 0xFFFF0000) | (offset >> 16);
-    jic = (jic & 0xFFFF0000) | (offset & 0xFFFF);
-
-    // Save the adjusted instructions
-    buffer_.Store<uint32_t>(position, auipc);
-    buffer_.Store<uint32_t>(position + 4, jic);
+    uint32_t branch_location = branch->GetLocation();
+    // Extract the location of the previous branch in the list (walking the list backwards;
+    // the previous branch ID was stored in the space reserved for this branch).
+    uint32_t prev = buffer_.Load<uint32_t>(branch_location);
 
     // On to the previous branch in the list...
     label->position_ = prev;
   }
 
-  // Now make the label object contain its own location
-  // (it will be used by the branches referring to and following this label)
+  // Now make the label object contain its own location (relative to the end of the preceding
+  // branch, if any; it will be used by the branches referring to and following this label).
+  label->prev_branch_id_plus_one_ = branches_.size();
+  if (label->prev_branch_id_plus_one_) {
+    uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
+    const Branch* branch = GetBranch(branch_id);
+    bound_pc -= branch->GetEndLocation();
+  }
   label->BindTo(bound_pc);
 }
 
-void Mips64Assembler::B(Label* label) {
-  if (label->IsBound()) {
-    // Branch backwards (to a preceding label), distance is known
-    uint32_t offset = label->Position() - buffer_.Size();
-    CHECK_LE(static_cast<int32_t>(offset), 0);
-    offset += (offset & 0x8000) << 1;  // account for sign extension in jic
-    Auipc(AT, offset >> 16);
-    Jic(AT, offset);
-  } else {
-    // Branch forward (to a following label), distance is unknown
-    int32_t position = buffer_.Size();
-    // The first branch forward will have 0 in its pc-relative address (copied from label's
-    // position). It will be the terminator of the list of forward-reaching branches.
-    uint32_t prev = label->position_;
-    Auipc(AT, prev >> 16);
-    Jic(AT, prev);
-    // Now make the link object point to the location of this branch
-    // (this forms a linked list of branches preceding this label)
-    label->LinkTo(position);
+uint32_t Mips64Assembler::GetLabelLocation(Mips64Label* label) const {
+  CHECK(label->IsBound());
+  uint32_t target = label->Position();
+  if (label->prev_branch_id_plus_one_) {
+    // Get label location based on the branch preceding it.
+    uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
+    const Branch* branch = GetBranch(branch_id);
+    target += branch->GetEndLocation();
+  }
+  return target;
+}
+
+uint32_t Mips64Assembler::GetAdjustedPosition(uint32_t old_position) {
+  // We can reconstruct the adjustment by going through all the branches from the beginning
+  // up to the old_position. Since we expect AdjustedPosition() to be called in a loop
+  // with increasing old_position, we can use the data from last AdjustedPosition() to
+  // continue where we left off and the whole loop should be O(m+n) where m is the number
+  // of positions to adjust and n is the number of branches.
+  if (old_position < last_old_position_) {
+    last_position_adjustment_ = 0;
+    last_old_position_ = 0;
+    last_branch_id_ = 0;
+  }
+  while (last_branch_id_ != branches_.size()) {
+    const Branch* branch = GetBranch(last_branch_id_);
+    if (branch->GetLocation() >= old_position + last_position_adjustment_) {
+      break;
+    }
+    last_position_adjustment_ += branch->GetSize() - branch->GetOldSize();
+    ++last_branch_id_;
+  }
+  last_old_position_ = old_position;
+  return old_position + last_position_adjustment_;
+}
+
+void Mips64Assembler::FinalizeLabeledBranch(Mips64Label* label) {
+  uint32_t length = branches_.back().GetLength();
+  if (!label->IsBound()) {
+    // Branch forward (to a following label), distance is unknown.
+    // The first branch forward will contain 0, serving as the terminator of
+    // the list of forward-reaching branches.
+    Emit(label->position_);
+    length--;
+    // Now make the label object point to this branch
+    // (this forms a linked list of branches preceding this label).
+    uint32_t branch_id = branches_.size() - 1;
+    label->LinkTo(branch_id);
+  }
+  // Reserve space for the branch.
+  while (length--) {
+    Nop();
   }
 }
 
-void Mips64Assembler::Jalr(Label* label, GpuRegister indirect_reg) {
-  if (label->IsBound()) {
-    // Branch backwards (to a preceding label), distance is known
-    uint32_t offset = label->Position() - buffer_.Size();
-    CHECK_LE(static_cast<int32_t>(offset), 0);
-    offset += (offset & 0x8000) << 1;  // account for sign extension in daddiu
-    Auipc(indirect_reg, offset >> 16);
-    Daddiu(indirect_reg, indirect_reg, offset);
-    Jialc(indirect_reg, 0);
-  } else {
-    // Branch forward (to a following label), distance is unknown
-    int32_t position = buffer_.Size();
-    // The first branch forward will have 0 in its pc-relative address (copied from label's
-    // position). It will be the terminator of the list of forward-reaching branches.
-    uint32_t prev = label->position_;
-    Auipc(indirect_reg, prev >> 16);
-    Daddiu(indirect_reg, indirect_reg, prev);
-    Jialc(indirect_reg, 0);
-    // Now make the link object point to the location of this branch
-    // (this forms a linked list of branches preceding this label)
-    label->LinkTo(position);
+void Mips64Assembler::Buncond(Mips64Label* label) {
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(buffer_.Size(), target);
+  FinalizeLabeledBranch(label);
+}
+
+void Mips64Assembler::Bcond(Mips64Label* label,
+                            BranchCondition condition,
+                            GpuRegister lhs,
+                            GpuRegister rhs) {
+  // If lhs = rhs, this can be a NOP.
+  if (Branch::IsNop(condition, lhs, rhs)) {
+    return;
+  }
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs);
+  FinalizeLabeledBranch(label);
+}
+
+void Mips64Assembler::Call(Mips64Label* label, GpuRegister indirect_reg) {
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(buffer_.Size(), target, indirect_reg);
+  FinalizeLabeledBranch(label);
+}
+
+void Mips64Assembler::PromoteBranches() {
+  // Promote short branches to long as necessary.
+  bool changed;
+  do {
+    changed = false;
+    for (auto& branch : branches_) {
+      CHECK(branch.IsResolved());
+      uint32_t delta = branch.PromoteIfNeeded();
+      // If this branch has been promoted and needs to expand in size,
+      // relocate all branches by the expansion size.
+      if (delta) {
+        changed = true;
+        uint32_t expand_location = branch.GetLocation();
+        for (auto& branch2 : branches_) {
+          branch2.Relocate(expand_location, delta);
+        }
+      }
+    }
+  } while (changed);
+
+  // Account for branch expansion by resizing the code buffer
+  // and moving the code in it to its final location.
+  size_t branch_count = branches_.size();
+  if (branch_count > 0) {
+    // Resize.
+    Branch& last_branch = branches_[branch_count - 1];
+    uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation();
+    uint32_t old_size = buffer_.Size();
+    buffer_.Resize(old_size + size_delta);
+    // Move the code residing between branch placeholders.
+    uint32_t end = old_size;
+    for (size_t i = branch_count; i > 0; ) {
+      Branch& branch = branches_[--i];
+      uint32_t size = end - branch.GetOldEndLocation();
+      buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size);
+      end = branch.GetOldLocation();
+    }
   }
 }
 
-void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Label* label) {
-  Bgec(rs, rt, 2);
-  B(label);
+// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
+const Mips64Assembler::Branch::BranchInfo Mips64Assembler::Branch::branch_info_[] = {
+  // Short branches.
+  {  1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 },  // kUncondBranch
+  {  2, 0, 1, Mips64Assembler::Branch::kOffset18, 2 },  // kCondBranch
+                                                        // Exception: kOffset23 for beqzc/bnezc
+  {  2, 0, 0, Mips64Assembler::Branch::kOffset21, 2 },  // kCall
+  // Long branches.
+  {  2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kLongUncondBranch
+  {  3, 1, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kLongCondBranch
+  {  3, 0, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kLongCall
+};
+
+// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
+void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) {
+  CHECK(overwriting_);
+  overwrite_location_ = branch->GetLocation();
+  uint32_t offset = branch->GetOffset();
+  BranchCondition condition = branch->GetCondition();
+  GpuRegister lhs = branch->GetLeftRegister();
+  GpuRegister rhs = branch->GetRightRegister();
+  switch (branch->GetType()) {
+    // Short branches.
+    case Branch::kUncondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Bc(offset);
+      break;
+    case Branch::kCondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      EmitBcondc(condition, lhs, rhs, offset);
+      Nop();  // TODO: improve by filling the forbidden/delay slot.
+      break;
+    case Branch::kCall:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Addiupc(lhs, offset);
+      Jialc(lhs, 0);
+      break;
+
+    // Long branches.
+    case Branch::kLongUncondBranch:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in jic.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Jic(AT, Low16Bits(offset));
+      break;
+    case Branch::kLongCondBranch:
+      EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2);
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in jic.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Jic(AT, Low16Bits(offset));
+      break;
+    case Branch::kLongCall:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in daddiu.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(lhs, High16Bits(offset));
+      Daddiu(lhs, lhs, Low16Bits(offset));
+      Jialc(lhs, 0);
+      break;
+  }
+  CHECK_EQ(overwrite_location_, branch->GetEndLocation());
+  CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize));
 }
 
-void Mips64Assembler::Bltzc(GpuRegister rt, Label* label) {
-  Bgezc(rt, 2);
-  B(label);
+void Mips64Assembler::Bc(Mips64Label* label) {
+  Buncond(label);
 }
 
-void Mips64Assembler::Bgtzc(GpuRegister rt, Label* label) {
-  Blezc(rt, 2);
-  B(label);
+void Mips64Assembler::Jialc(Mips64Label* label, GpuRegister indirect_reg) {
+  Call(label, indirect_reg);
 }
 
-void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Label* label) {
-  Bltc(rs, rt, 2);
-  B(label);
+void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondLT, rs, rt);
 }
 
-void Mips64Assembler::Bgezc(GpuRegister rt, Label* label) {
-  Bltzc(rt, 2);
-  B(label);
+void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondLTZ, rt);
 }
 
-void Mips64Assembler::Blezc(GpuRegister rt, Label* label) {
-  Bgtzc(rt, 2);
-  B(label);
+void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondGTZ, rt);
 }
 
-void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Label* label) {
-  Bgeuc(rs, rt, 2);
-  B(label);
+void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondGE, rs, rt);
 }
 
-void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Label* label) {
-  Bltuc(rs, rt, 2);
-  B(label);
+void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondGEZ, rt);
 }
 
-void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Label* label) {
-  Bnec(rs, rt, 2);
-  B(label);
+void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondLEZ, rt);
 }
 
-void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Label* label) {
-  Beqc(rs, rt, 2);
-  B(label);
+void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondLTU, rs, rt);
 }
 
-void Mips64Assembler::Beqzc(GpuRegister rs, Label* label) {
-  Bnezc(rs, 2);
-  B(label);
+void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondGEU, rs, rt);
 }
 
-void Mips64Assembler::Bnezc(GpuRegister rs, Label* label) {
-  Beqzc(rs, 2);
-  B(label);
+void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondEQ, rs, rt);
+}
+
+void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondNE, rs, rt);
+}
+
+void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label) {
+  Bcond(label, kCondEQZ, rs);
+}
+
+void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label) {
+  Bcond(label, kCondNEZ, rs);
+}
+
+void Mips64Assembler::Bc1eqz(FpuRegister ft, Mips64Label* label) {
+  Bcond(label, kCondF, static_cast<GpuRegister>(ft), ZERO);
+}
+
+void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label) {
+  Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO);
 }
 
 void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base,
                                      int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
+  if (!IsInt<16>(offset) ||
+      (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
     Daddu(AT, AT, base);
     base = AT;
-    offset = 0;
+    offset &= (kMips64DoublewordSize - 1);
   }
 
   switch (type) {
@@ -1177,32 +1830,51 @@
       Lhu(reg, base, offset);
       break;
     case kLoadWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Lw(reg, base, offset);
       break;
     case kLoadUnsignedWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Lwu(reg, base, offset);
       break;
     case kLoadDoubleword:
-      Ld(reg, base, offset);
+      if (!IsAligned<kMips64DoublewordSize>(offset)) {
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Lwu(reg, base, offset);
+        Lwu(TMP2, base, offset + kMips64WordSize);
+        Dinsu(reg, TMP2, 32, 32);
+      } else {
+        Ld(reg, base, offset);
+      }
       break;
   }
 }
 
 void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base,
                                         int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
+  if (!IsInt<16>(offset) ||
+      (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
     Daddu(AT, AT, base);
     base = AT;
-    offset = 0;
+    offset &= (kMips64DoublewordSize - 1);
   }
 
   switch (type) {
     case kLoadWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Lwc1(reg, base, offset);
       break;
     case kLoadDoubleword:
-      Ldc1(reg, base, offset);
+      if (!IsAligned<kMips64DoublewordSize>(offset)) {
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Lwc1(reg, base, offset);
+        Lw(TMP2, base, offset + kMips64WordSize);
+        Mthc1(TMP2, reg);
+      } else {
+        Ldc1(reg, base, offset);
+      }
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
@@ -1238,11 +1910,13 @@
 
 void Mips64Assembler::StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base,
                                     int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
+  if (!IsInt<16>(offset) ||
+      (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
     Daddu(AT, AT, base);
     base = AT;
-    offset = 0;
+    offset &= (kMips64DoublewordSize - 1);
   }
 
   switch (type) {
@@ -1253,10 +1927,18 @@
       Sh(reg, base, offset);
       break;
     case kStoreWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Sw(reg, base, offset);
       break;
     case kStoreDoubleword:
-      Sd(reg, base, offset);
+      if (!IsAligned<kMips64DoublewordSize>(offset)) {
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Sw(reg, base, offset);
+        Dsrl32(TMP2, reg, 0);
+        Sw(TMP2, base, offset + kMips64WordSize);
+      } else {
+        Sd(reg, base, offset);
+      }
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
@@ -1265,19 +1947,29 @@
 
 void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base,
                                        int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
+  if (!IsInt<16>(offset) ||
+      (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
     Daddu(AT, AT, base);
     base = AT;
-    offset = 0;
+    offset &= (kMips64DoublewordSize - 1);
   }
 
   switch (type) {
     case kStoreWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Swc1(reg, base, offset);
       break;
     case kStoreDoubleword:
-      Sdc1(reg, base, offset);
+      if (!IsAligned<kMips64DoublewordSize>(offset)) {
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Mfhc1(TMP2, reg);
+        Swc1(reg, base, offset);
+        Sw(TMP2, base, offset + kMips64WordSize);
+      } else {
+        Sdc1(reg, base, offset);
+      }
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
@@ -1290,10 +1982,12 @@
 
 constexpr size_t kFramePointerSize = 8;
 
-void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                                 const std::vector<ManagedRegister>& callee_save_regs,
+void Mips64Assembler::BuildFrame(size_t frame_size,
+                                 ManagedRegister method_reg,
+                                 ArrayRef<const ManagedRegister> callee_save_regs,
                                  const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  DCHECK(!overwriting_);
 
   // Increase frame to required size.
   IncreaseFrameSize(frame_size);
@@ -1304,7 +1998,7 @@
   cfi_.RelOffset(DWARFReg(RA), stack_offset);
   for (int i = callee_save_regs.size() - 1; i >= 0; --i) {
     stack_offset -= kFramePointerSize;
-    GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister();
+    GpuRegister reg = callee_save_regs[i].AsMips64().AsGpuRegister();
     StoreToOffset(kStoreDoubleword, reg, SP, stack_offset);
     cfi_.RelOffset(DWARFReg(reg), stack_offset);
   }
@@ -1315,7 +2009,7 @@
   // Write out entry spills.
   int32_t offset = frame_size + kFramePointerSize;
   for (size_t i = 0; i < entry_spills.size(); ++i) {
-    Mips64ManagedRegister reg = entry_spills.at(i).AsMips64();
+    Mips64ManagedRegister reg = entry_spills[i].AsMips64();
     ManagedRegisterSpill spill = entry_spills.at(i);
     int32_t size = spill.getSize();
     if (reg.IsNoRegister()) {
@@ -1334,14 +2028,15 @@
 }
 
 void Mips64Assembler::RemoveFrame(size_t frame_size,
-                                  const std::vector<ManagedRegister>& callee_save_regs) {
+                                  ArrayRef<const ManagedRegister> callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  DCHECK(!overwriting_);
   cfi_.RememberState();
 
   // Pop callee saves and return address
   int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
   for (size_t i = 0; i < callee_save_regs.size(); ++i) {
-    GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister();
+    GpuRegister reg = callee_save_regs[i].AsMips64().AsGpuRegister();
     LoadFromOffset(kLoadDoubleword, reg, SP, stack_offset);
     cfi_.Restore(DWARFReg(reg));
     stack_offset += kFramePointerSize;
@@ -1354,6 +2049,7 @@
 
   // Then jump to the return address.
   Jr(RA);
+  Nop();
 
   // The CFI should be restored for any code that follows the exit block.
   cfi_.RestoreState();
@@ -1362,12 +2058,14 @@
 
 void Mips64Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kFramePointerSize);
+  DCHECK(!overwriting_);
   Daddiu64(SP, SP, static_cast<int32_t>(-adjust));
   cfi_.AdjustCFAOffset(adjust);
 }
 
 void Mips64Assembler::DecreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kFramePointerSize);
+  DCHECK(!overwriting_);
   Daddiu64(SP, SP, static_cast<int32_t>(adjust));
   cfi_.AdjustCFAOffset(-adjust);
 }
@@ -1417,26 +2115,16 @@
   StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
 }
 
-void Mips64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm,
+void Mips64Assembler::StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                               FrameOffset fr_offs,
                                                ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
-  // TODO: it's unclear wether 32 or 64 bits need to be stored (Arm64 and x86/x64 disagree?).
-  // Is this function even referenced anywhere else in the code?
-  LoadConst32(scratch.AsGpuRegister(), imm);
-  StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, dest.Int32Value());
-}
-
-void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs,
-                                                 FrameOffset fr_offs,
-                                                 ManagedRegister mscratch) {
-  Mips64ManagedRegister scratch = mscratch.AsMips64();
-  CHECK(scratch.IsGpuRegister()) << scratch;
   Daddiu64(scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
 }
 
-void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs) {
+void Mips64Assembler::StoreStackPointerToThread(ThreadOffset64 thr_offs) {
   StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value());
 }
 
@@ -1453,7 +2141,7 @@
   return EmitLoad(mdest, SP, src.Int32Value(), size);
 }
 
-void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) {
+void Mips64Assembler::LoadFromThread(ManagedRegister mdest, ThreadOffset64 src, size_t size) {
   return EmitLoad(mdest, S1, src.Int32Value(), size);
 }
 
@@ -1474,7 +2162,7 @@
     // Negate the 32-bit ref
     Dsubu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
     // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64
-    Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 31);
+    Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 32);
   }
 }
 
@@ -1486,19 +2174,20 @@
                  base.AsMips64().AsGpuRegister(), offs.Int32Value());
 }
 
-void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest,
-                                             ThreadOffset<8> offs) {
+void Mips64Assembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) {
   Mips64ManagedRegister dest = mdest.AsMips64();
   CHECK(dest.IsGpuRegister());
   LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value());
 }
 
-void Mips64Assembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no sign extension necessary for mips";
+void Mips64Assembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED,
+                                 size_t size ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "No sign extension necessary for MIPS64";
 }
 
-void Mips64Assembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no zero extension necessary for mips";
+void Mips64Assembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED,
+                                 size_t size ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "No zero extension necessary for MIPS64";
 }
 
 void Mips64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
@@ -1529,18 +2218,18 @@
   StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
 }
 
-void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                             ThreadOffset<8> thr_offs,
-                                             ManagedRegister mscratch) {
+void Mips64Assembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                           ThreadOffset64 thr_offs,
+                                           ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
 }
 
-void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs,
-                                           FrameOffset fr_offs,
-                                           ManagedRegister mscratch) {
+void Mips64Assembler::CopyRawPtrToThread(ThreadOffset64 thr_offs,
+                                         FrameOffset fr_offs,
+                                         ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
@@ -1599,9 +2288,12 @@
   }
 }
 
-void Mips64Assembler::Copy(FrameOffset /*dest*/, FrameOffset /*src_base*/, Offset /*src_offset*/,
-                         ManagedRegister /*mscratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED,
+                           FrameOffset src_base ATTRIBUTE_UNUSED,
+                           Offset src_offset ATTRIBUTE_UNUSED,
+                           ManagedRegister mscratch ATTRIBUTE_UNUSED,
+                           size_t size ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
 
 void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset,
@@ -1622,15 +2314,18 @@
   }
 }
 
-void Mips64Assembler::Copy(FrameOffset /*dest*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset
-/*src_offset*/,
-                         ManagedRegister /*mscratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED,
+                           Offset dest_offset ATTRIBUTE_UNUSED,
+                           FrameOffset src ATTRIBUTE_UNUSED,
+                           Offset src_offset ATTRIBUTE_UNUSED,
+                           ManagedRegister mscratch ATTRIBUTE_UNUSED,
+                           size_t size ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
 
-void Mips64Assembler::MemoryBarrier(ManagedRegister) {
+void Mips64Assembler::MemoryBarrier(ManagedRegister mreg ATTRIBUTE_UNUSED) {
   // TODO: sync?
-  UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+  UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
 
 void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
@@ -1642,7 +2337,7 @@
   CHECK(in_reg.IsNoRegister() || in_reg.IsGpuRegister()) << in_reg;
   CHECK(out_reg.IsGpuRegister()) << out_reg;
   if (null_allowed) {
-    Label null_arg;
+    Mips64Label null_arg;
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
     // the address in the handle scope holding the reference.
     // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
@@ -1669,7 +2364,7 @@
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   if (null_allowed) {
-    Label null_arg;
+    Mips64Label null_arg;
     LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP,
                    handle_scope_offset.Int32Value());
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
@@ -1691,7 +2386,7 @@
   Mips64ManagedRegister in_reg = min_reg.AsMips64();
   CHECK(out_reg.IsGpuRegister()) << out_reg;
   CHECK(in_reg.IsGpuRegister()) << in_reg;
-  Label null_arg;
+  Mips64Label null_arg;
   if (!out_reg.Equals(in_reg)) {
     LoadConst32(out_reg.AsGpuRegister(), 0);
   }
@@ -1701,11 +2396,13 @@
   Bind(&null_arg);
 }
 
-void Mips64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+void Mips64Assembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED,
+                                   bool could_be_null ATTRIBUTE_UNUSED) {
   // TODO: not validating references
 }
 
-void Mips64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+void Mips64Assembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED,
+                                   bool could_be_null ATTRIBUTE_UNUSED) {
   // TODO: not validating references
 }
 
@@ -1717,6 +2414,7 @@
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
                  base.AsGpuRegister(), offset.Int32Value());
   Jalr(scratch.AsGpuRegister());
+  Nop();
   // TODO: place reference map on call
 }
 
@@ -1729,11 +2427,13 @@
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
                  scratch.AsGpuRegister(), offset.Int32Value());
   Jalr(scratch.AsGpuRegister());
+  Nop();
   // TODO: place reference map on call
 }
 
-void Mips64Assembler::CallFromThread64(ThreadOffset<8> /*offset*/, ManagedRegister /*mscratch*/) {
-  UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+void Mips64Assembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED,
+                                     ManagedRegister mscratch ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
 
 void Mips64Assembler::GetCurrentThread(ManagedRegister tr) {
@@ -1741,37 +2441,39 @@
 }
 
 void Mips64Assembler::GetCurrentThread(FrameOffset offset,
-                                       ManagedRegister /*mscratch*/) {
+                                       ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   StoreToOffset(kStoreDoubleword, S1, SP, offset.Int32Value());
 }
 
 void Mips64Assembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
-  Mips64ExceptionSlowPath* slow = new Mips64ExceptionSlowPath(scratch, stack_adjust);
-  buffer_.EnqueueSlowPath(slow);
-  LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
-                 S1, Thread::ExceptionOffset<8>().Int32Value());
-  Bnezc(scratch.AsGpuRegister(), slow->Entry());
+  exception_blocks_.emplace_back(scratch, stack_adjust);
+  LoadFromOffset(kLoadDoubleword,
+                 scratch.AsGpuRegister(),
+                 S1,
+                 Thread::ExceptionOffset<kMips64PointerSize>().Int32Value());
+  Bnezc(scratch.AsGpuRegister(), exception_blocks_.back().Entry());
 }
 
-void Mips64ExceptionSlowPath::Emit(Assembler* sasm) {
-  Mips64Assembler* sp_asm = down_cast<Mips64Assembler*>(sasm);
-#define __ sp_asm->
-  __ Bind(&entry_);
-  if (stack_adjust_ != 0) {  // Fix up the frame.
-    __ DecreaseFrameSize(stack_adjust_);
+void Mips64Assembler::EmitExceptionPoll(Mips64ExceptionSlowPath* exception) {
+  Bind(exception->Entry());
+  if (exception->stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSize(exception->stack_adjust_);
   }
-  // Pass exception object as argument
-  // Don't care about preserving A0 as this call won't return
-  __ Move(A0, scratch_.AsGpuRegister());
+  // Pass exception object as argument.
+  // Don't care about preserving A0 as this call won't return.
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+  Move(A0, exception->scratch_.AsGpuRegister());
   // Set up call to Thread::Current()->pDeliverException
-  __ LoadFromOffset(kLoadDoubleword, T9, S1,
-                    QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value());
-  // TODO: check T9 usage
-  __ Jr(T9);
+  LoadFromOffset(kLoadDoubleword,
+                 T9,
+                 S1,
+                 QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, pDeliverException).Int32Value());
+  Jr(T9);
+  Nop();
+
   // Call never returns
-  __ Break();
-#undef __
+  Break();
 }
 
 }  // namespace mips64
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 33f22d2..a7d350c 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -17,18 +17,25 @@
 #ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
 #define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
 
+#include <utility>
 #include <vector>
 
+#include "base/enums.h"
 #include "base/macros.h"
 #include "constants_mips64.h"
 #include "globals.h"
 #include "managed_register_mips64.h"
-#include "utils/assembler.h"
 #include "offsets.h"
+#include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
+#include "utils/label.h"
 
 namespace art {
 namespace mips64 {
 
+static constexpr size_t kMips64WordSize = 4;
+static constexpr size_t kMips64DoublewordSize = 8;
+
 enum LoadOperandType {
   kLoadSignedByte,
   kLoadUnsignedByte,
@@ -60,41 +67,82 @@
   kPositiveZero      = 0x200,
 };
 
-class Mips64Assembler FINAL : public Assembler {
+class Mips64Label : public Label {
  public:
-  Mips64Assembler() {}
-  virtual ~Mips64Assembler() {}
+  Mips64Label() : prev_branch_id_plus_one_(0) {}
+
+  Mips64Label(Mips64Label&& src)
+      : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {}
+
+ private:
+  uint32_t prev_branch_id_plus_one_;  // To get distance from preceding branch, if any.
+
+  friend class Mips64Assembler;
+  DISALLOW_COPY_AND_ASSIGN(Mips64Label);
+};
+
+// Slowpath entered when Thread::Current()->_exception is non-null.
+class Mips64ExceptionSlowPath {
+ public:
+  explicit Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust)
+      : scratch_(scratch), stack_adjust_(stack_adjust) {}
+
+  Mips64ExceptionSlowPath(Mips64ExceptionSlowPath&& src)
+      : scratch_(src.scratch_),
+        stack_adjust_(src.stack_adjust_),
+        exception_entry_(std::move(src.exception_entry_)) {}
+
+ private:
+  Mips64Label* Entry() { return &exception_entry_; }
+  const Mips64ManagedRegister scratch_;
+  const size_t stack_adjust_;
+  Mips64Label exception_entry_;
+
+  friend class Mips64Assembler;
+  DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath);
+};
+
+class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k64> {
+ public:
+  explicit Mips64Assembler(ArenaAllocator* arena)
+      : Assembler(arena),
+        overwriting_(false),
+        overwrite_location_(0),
+        last_position_adjustment_(0),
+        last_old_position_(0),
+        last_branch_id_(0) {
+    cfi().DelayEmittingAdvancePCs();
+  }
+
+  virtual ~Mips64Assembler() {
+    for (auto& branch : branches_) {
+      CHECK(branch.IsResolved());
+    }
+  }
+
+  size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); }
+  DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
 
   // Emit Machine Instructions.
-  void Add(GpuRegister rd, GpuRegister rs, GpuRegister rt);
-  void Addi(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Daddu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
   void Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
-  void Sub(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Dsubu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
 
-  void MultR2(GpuRegister rs, GpuRegister rt);  // R2
-  void MultuR2(GpuRegister rs, GpuRegister rt);  // R2
-  void DivR2(GpuRegister rs, GpuRegister rt);  // R2
-  void DivuR2(GpuRegister rs, GpuRegister rt);  // R2
-  void MulR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void DivR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void ModR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void DivuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void ModuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
-  void Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
-  void Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
-  void Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
-  void Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
+  void MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void MuhR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Dmuh(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
 
   void And(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Andi(GpuRegister rt, GpuRegister rs, uint16_t imm16);
@@ -104,13 +152,14 @@
   void Xori(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt);
 
-  void Bitswap(GpuRegister rd, GpuRegister rt);  // R6
-  void Dbitswap(GpuRegister rd, GpuRegister rt);  // R6
-  void Seb(GpuRegister rd, GpuRegister rt);  // R2+
-  void Seh(GpuRegister rd, GpuRegister rt);  // R2+
-  void Dsbh(GpuRegister rd, GpuRegister rt);  // R2+
-  void Dshd(GpuRegister rd, GpuRegister rt);  // R2+
-  void Dext(GpuRegister rs, GpuRegister rt, int pos, int size_less_one);  // MIPS64
+  void Bitswap(GpuRegister rd, GpuRegister rt);
+  void Dbitswap(GpuRegister rd, GpuRegister rt);
+  void Seb(GpuRegister rd, GpuRegister rt);
+  void Seh(GpuRegister rd, GpuRegister rt);
+  void Dsbh(GpuRegister rd, GpuRegister rt);
+  void Dshd(GpuRegister rd, GpuRegister rt);
+  void Dext(GpuRegister rs, GpuRegister rt, int pos, int size);  // MIPS64
+  void Dinsu(GpuRegister rt, GpuRegister rs, int pos, int size);  // MIPS64
   void Wsbh(GpuRegister rd, GpuRegister rt);
   void Sc(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
   void Scd(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
@@ -146,11 +195,9 @@
   void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
   void Lui(GpuRegister rt, uint16_t imm16);
-  void Dahi(GpuRegister rs, uint16_t imm16);  // MIPS64 R6
-  void Dati(GpuRegister rs, uint16_t imm16);  // MIPS64 R6
+  void Dahi(GpuRegister rs, uint16_t imm16);  // MIPS64
+  void Dati(GpuRegister rs, uint16_t imm16);  // MIPS64
   void Sync(uint32_t stype);
-  void Mfhi(GpuRegister rd);  // R2
-  void Mflo(GpuRegister rd);  // R2
 
   void Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Sh(GpuRegister rt, GpuRegister rs, uint16_t imm16);
@@ -168,28 +215,28 @@
   void Dclz(GpuRegister rd, GpuRegister rs);
   void Dclo(GpuRegister rd, GpuRegister rs);
 
-  void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16);
-  void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16);
-  void J(uint32_t addr26);
-  void Jal(uint32_t addr26);
   void Jalr(GpuRegister rd, GpuRegister rs);
   void Jalr(GpuRegister rs);
   void Jr(GpuRegister rs);
-  void Auipc(GpuRegister rs, uint16_t imm16);  // R6
-  void Jic(GpuRegister rt, uint16_t imm16);  // R6
-  void Jialc(GpuRegister rt, uint16_t imm16);  // R6
-  void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Bltzc(GpuRegister rt, uint16_t imm16);  // R6
-  void Bgtzc(GpuRegister rt, uint16_t imm16);  // R6
-  void Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Bgezc(GpuRegister rt, uint16_t imm16);  // R6
-  void Blezc(GpuRegister rt, uint16_t imm16);  // R6
-  void Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Beqzc(GpuRegister rs, uint32_t imm21);  // R6
-  void Bnezc(GpuRegister rs, uint32_t imm21);  // R6
+  void Auipc(GpuRegister rs, uint16_t imm16);
+  void Addiupc(GpuRegister rs, uint32_t imm19);
+  void Bc(uint32_t imm26);
+  void Jic(GpuRegister rt, uint16_t imm16);
+  void Jialc(GpuRegister rt, uint16_t imm16);
+  void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bltzc(GpuRegister rt, uint16_t imm16);
+  void Bgtzc(GpuRegister rt, uint16_t imm16);
+  void Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bgezc(GpuRegister rt, uint16_t imm16);
+  void Blezc(GpuRegister rt, uint16_t imm16);
+  void Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Beqzc(GpuRegister rs, uint32_t imm21);
+  void Bnezc(GpuRegister rs, uint32_t imm21);
+  void Bc1eqz(FpuRegister ft, uint16_t imm16);
+  void Bc1nez(FpuRegister ft, uint16_t imm16);
 
   void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
@@ -211,6 +258,10 @@
   void RoundLD(FpuRegister fd, FpuRegister fs);
   void RoundWS(FpuRegister fd, FpuRegister fs);
   void RoundWD(FpuRegister fd, FpuRegister fs);
+  void TruncLS(FpuRegister fd, FpuRegister fs);
+  void TruncLD(FpuRegister fd, FpuRegister fs);
+  void TruncWS(FpuRegister fd, FpuRegister fs);
+  void TruncWD(FpuRegister fd, FpuRegister fs);
   void CeilLS(FpuRegister fd, FpuRegister fs);
   void CeilLD(FpuRegister fd, FpuRegister fs);
   void CeilWS(FpuRegister fd, FpuRegister fs);
@@ -229,6 +280,26 @@
   void MinD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void MaxS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
 
   void Cvtsw(FpuRegister fd, FpuRegister fs);
   void Cvtdw(FpuRegister fd, FpuRegister fs);
@@ -238,7 +309,9 @@
   void Cvtdl(FpuRegister fd, FpuRegister fs);
 
   void Mfc1(GpuRegister rt, FpuRegister fs);
+  void Mfhc1(GpuRegister rt, FpuRegister fs);
   void Mtc1(GpuRegister rt, FpuRegister fs);
+  void Mthc1(GpuRegister rt, FpuRegister fs);
   void Dmfc1(GpuRegister rt, FpuRegister fs);  // MIPS64
   void Dmtc1(GpuRegister rt, FpuRegister fs);  // MIPS64
   void Lwc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
@@ -252,32 +325,36 @@
   void Clear(GpuRegister rd);
   void Not(GpuRegister rd, GpuRegister rs);
 
-  // Higher level composite instructions
+  // Higher level composite instructions.
   void LoadConst32(GpuRegister rd, int32_t value);
   void LoadConst64(GpuRegister rd, int64_t value);  // MIPS64
 
-  void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp = AT);
   void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT);  // MIPS64
 
-  void Bind(Label* label) OVERRIDE;  // R6
-  void Jump(Label* label) OVERRIDE {
-    B(label);
+  void Bind(Label* label) OVERRIDE {
+    Bind(down_cast<Mips64Label*>(label));
   }
-  void B(Label* label);  // R6
-  void Jalr(Label* label, GpuRegister indirect_reg = RA);  // R6
-  // TODO: implement common for R6 and non-R6 interface for conditional branches?
-  void Bltc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Bltzc(GpuRegister rt, Label* label);  // R6
-  void Bgtzc(GpuRegister rt, Label* label);  // R6
-  void Bgec(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Bgezc(GpuRegister rt, Label* label);  // R6
-  void Blezc(GpuRegister rt, Label* label);  // R6
-  void Bltuc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Beqc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Bnec(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Beqzc(GpuRegister rs, Label* label);  // R6
-  void Bnezc(GpuRegister rs, Label* label);  // R6
+  void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE {
+    UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS64";
+  }
+
+  void Bind(Mips64Label* label);
+  void Bc(Mips64Label* label);
+  void Jialc(Mips64Label* label, GpuRegister indirect_reg);
+  void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Bltzc(GpuRegister rt, Mips64Label* label);
+  void Bgtzc(GpuRegister rt, Mips64Label* label);
+  void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Bgezc(GpuRegister rt, Mips64Label* label);
+  void Blezc(GpuRegister rt, Mips64Label* label);
+  void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Beqzc(GpuRegister rs, Mips64Label* label);
+  void Bnezc(GpuRegister rs, Mips64Label* label);
+  void Bc1eqz(FpuRegister ft, Mips64Label* label);
+  void Bc1nez(FpuRegister ft, Mips64Label* label);
 
   void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
   void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
@@ -289,43 +366,41 @@
   void Emit(uint32_t value);
 
   //
-  // Overridden common assembler high-level functionality
+  // Overridden common assembler high-level functionality.
   //
 
-  // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
+  // Emit code that will create an activation on the stack.
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
-  // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size,
-                   const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE;
+  // Emit code that will remove an activation from the stack.
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
   void DecreaseFrameSize(size_t adjust) OVERRIDE;
 
-  // Store routines
+  // Store routines.
   void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE;
   void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
   void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
 
-  void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm,
+  void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                FrameOffset fr_offs,
                                 ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister mscratch) OVERRIDE;
-
-  void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE;
+  void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE;
 
   void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
                      ManagedRegister mscratch) OVERRIDE;
 
-  // Load routines
+  // Load routines.
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
-  void LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) OVERRIDE;
+  void LoadFromThread(ManagedRegister mdest, ThreadOffset64 src, size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
@@ -334,17 +409,19 @@
 
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
-  void LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset<8> offs) OVERRIDE;
+  void LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) OVERRIDE;
 
-  // Copying routines
+  // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs,
-                              ManagedRegister mscratch) OVERRIDE;
-
-  void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset64 thr_offs,
                             ManagedRegister mscratch) OVERRIDE;
 
+  void CopyRawPtrToThread(ThreadOffset64 thr_offs,
+                          FrameOffset fr_offs,
+                          ManagedRegister mscratch) OVERRIDE;
+
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
 
   void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE;
@@ -366,13 +443,13 @@
 
   void MemoryBarrier(ManagedRegister) OVERRIDE;
 
-  // Sign extension
+  // Sign extension.
   void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
 
-  // Zero extension
+  // Zero extension.
   void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
 
-  // Exploit fast access in managed code to Thread::Current()
+  // Exploit fast access in managed code to Thread::Current().
   void GetCurrentThread(ManagedRegister tr) OVERRIDE;
   void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE;
 
@@ -388,7 +465,7 @@
   void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister
                               mscratch, bool null_allowed) OVERRIDE;
 
-  // src holds a handle scope entry (Object**) load this into dst
+  // src holds a handle scope entry (Object**) load this into dst.
   void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
 
   // Heap::VerifyObject on src. In some cases (such as a reference to this) we
@@ -396,39 +473,256 @@
   void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
   void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
 
-  // Call to address held at [base+offset]
+  // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset<8> offset, ManagedRegister mscratch) OVERRIDE;
+  void CallFromThread(ThreadOffset64 offset, ManagedRegister mscratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE;
 
+  // Emit slow paths queued during assembly and promote short branches to long if needed.
+  void FinalizeCode() OVERRIDE;
+
+  // Emit branches and finalize all instructions.
+  void FinalizeInstructions(const MemoryRegion& region);
+
+  // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS64,
+  // must be used instead of Mips64Label::GetPosition()).
+  uint32_t GetLabelLocation(Mips64Label* label) const;
+
+  // Get the final position of a label after local fixup based on the old position
+  // recorded before FinalizeCode().
+  uint32_t GetAdjustedPosition(uint32_t old_position);
+
+  enum BranchCondition {
+    kCondLT,
+    kCondGE,
+    kCondLE,
+    kCondGT,
+    kCondLTZ,
+    kCondGEZ,
+    kCondLEZ,
+    kCondGTZ,
+    kCondEQ,
+    kCondNE,
+    kCondEQZ,
+    kCondNEZ,
+    kCondLTU,
+    kCondGEU,
+    kCondF,    // Floating-point predicate false.
+    kCondT,    // Floating-point predicate true.
+    kUncond,
+  };
+  friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs);
+
  private:
+  class Branch {
+   public:
+    enum Type {
+      // Short branches.
+      kUncondBranch,
+      kCondBranch,
+      kCall,
+      // Long branches.
+      kLongUncondBranch,
+      kLongCondBranch,
+      kLongCall,
+    };
+
+    // Bit sizes of offsets defined as enums to minimize chance of typos.
+    enum OffsetBits {
+      kOffset16 = 16,
+      kOffset18 = 18,
+      kOffset21 = 21,
+      kOffset23 = 23,
+      kOffset28 = 28,
+      kOffset32 = 32,
+    };
+
+    static constexpr uint32_t kUnresolved = 0xffffffff;  // Unresolved target_
+    static constexpr int32_t kMaxBranchLength = 32;
+    static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t);
+
+    struct BranchInfo {
+      // Branch length as a number of 4-byte-long instructions.
+      uint32_t length;
+      // Ordinal number (0-based) of the first (or the only) instruction that contains the branch's
+      // PC-relative offset (or its most significant 16-bit half, which goes first).
+      uint32_t instr_offset;
+      // Different MIPS instructions with PC-relative offsets apply said offsets to slightly
+      // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte
+      // instructions) from the instruction containing the offset.
+      uint32_t pc_org;
+      // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch is
+      // an exception: use kOffset23 for beqzc/bnezc).
+      OffsetBits offset_size;
+      // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift
+      // count.
+      int offset_shift;
+    };
+    static const BranchInfo branch_info_[/* Type */];
+
+    // Unconditional branch.
+    Branch(uint32_t location, uint32_t target);
+    // Conditional branch.
+    Branch(uint32_t location,
+           uint32_t target,
+           BranchCondition condition,
+           GpuRegister lhs_reg,
+           GpuRegister rhs_reg = ZERO);
+    // Call (branch and link) that stores the target address in a given register (i.e. T9).
+    Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg);
+
+    // Some conditional branches with lhs = rhs are effectively NOPs, while some
+    // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs.
+    // So, we need a way to identify such branches in order to emit no instructions for them
+    // or change them to unconditional.
+    static bool IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs);
+    static bool IsUncond(BranchCondition condition, GpuRegister lhs, GpuRegister rhs);
+
+    static BranchCondition OppositeCondition(BranchCondition cond);
+
+    Type GetType() const;
+    BranchCondition GetCondition() const;
+    GpuRegister GetLeftRegister() const;
+    GpuRegister GetRightRegister() const;
+    uint32_t GetTarget() const;
+    uint32_t GetLocation() const;
+    uint32_t GetOldLocation() const;
+    uint32_t GetLength() const;
+    uint32_t GetOldLength() const;
+    uint32_t GetSize() const;
+    uint32_t GetOldSize() const;
+    uint32_t GetEndLocation() const;
+    uint32_t GetOldEndLocation() const;
+    bool IsLong() const;
+    bool IsResolved() const;
+
+    // Returns the bit size of the signed offset that the branch instruction can handle.
+    OffsetBits GetOffsetSize() const;
+
+    // Calculates the distance between two byte locations in the assembler buffer and
+    // returns the number of bits needed to represent the distance as a signed integer.
+    //
+    // Branch instructions have signed offsets of 16, 19 (addiupc), 21 (beqzc/bnezc),
+    // and 26 (bc) bits, which are additionally shifted left 2 positions at run time.
+    //
+    // Composite branches (made of several instructions) with longer reach have 32-bit
+    // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first).
+    // The composite branches cover the range of PC + ~+/-2GB. The range is not end-to-end,
+    // however. Consider the following implementation of a long unconditional branch, for
+    // example:
+    //
+    //   auipc at, offset_31_16  // at = pc + sign_extend(offset_31_16) << 16
+    //   jic   at, offset_15_0   // pc = at + sign_extend(offset_15_0)
+    //
+    // Both of the above instructions take 16-bit signed offsets as immediate operands.
+    // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000
+    // due to sign extension. This must be compensated for by incrementing offset_31_16
+    // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is
+    // 0x7FFF, adding 1 will overflow the positive offset into the negative range.
+    // Therefore, the long branch range is something like from PC - 0x80000000 to
+    // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side.
+    //
+    // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special
+    // case with the addiu instruction and a 16 bit offset.
+    static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target);
+
+    // Resolve a branch when the target is known.
+    void Resolve(uint32_t target);
+
+    // Relocate a branch by a given delta if needed due to expansion of this or another
+    // branch at a given location by this delta (just changes location_ and target_).
+    void Relocate(uint32_t expand_location, uint32_t delta);
+
+    // If the branch is short, changes its type to long.
+    void PromoteToLong();
+
+    // If necessary, updates the type by promoting a short branch to a long branch
+    // based on the branch location and target. Returns the amount (in bytes) by
+    // which the branch size has increased.
+    // max_short_distance caps the maximum distance between location_ and target_
+    // that is allowed for short branches. This is for debugging/testing purposes.
+    // max_short_distance = 0 forces all short branches to become long.
+    // Use the implicit default argument when not debugging/testing.
+    uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max());
+
+    // Returns the location of the instruction(s) containing the offset.
+    uint32_t GetOffsetLocation() const;
+
+    // Calculates and returns the offset ready for encoding in the branch instruction(s).
+    uint32_t GetOffset() const;
+
+   private:
+    // Completes branch construction by determining and recording its type.
+    void InitializeType(bool is_call);
+    // Helper for the above.
+    void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
+
+    uint32_t old_location_;      // Offset into assembler buffer in bytes.
+    uint32_t location_;          // Offset into assembler buffer in bytes.
+    uint32_t target_;            // Offset into assembler buffer in bytes.
+
+    GpuRegister lhs_reg_;        // Left-hand side register in conditional branches or
+                                 // indirect call register.
+    GpuRegister rhs_reg_;        // Right-hand side register in conditional branches.
+    BranchCondition condition_;  // Condition for conditional branches.
+
+    Type type_;                  // Current type of the branch.
+    Type old_type_;              // Initial type of the branch.
+  };
+  friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs);
+  friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs);
+
   void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct);
   void EmitRsd(int opcode, GpuRegister rs, GpuRegister rd, int shamt, int funct);
   void EmitRtd(int opcode, GpuRegister rt, GpuRegister rd, int shamt, int funct);
   void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm);
   void EmitI21(int opcode, GpuRegister rs, uint32_t imm21);
-  void EmitJ(int opcode, uint32_t addr26);
+  void EmitI26(int opcode, uint32_t imm26);
   void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct);
   void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm);
+  void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21);
+
+  void Buncond(Mips64Label* label);
+  void Bcond(Mips64Label* label,
+             BranchCondition condition,
+             GpuRegister lhs,
+             GpuRegister rhs = ZERO);
+  void Call(Mips64Label* label, GpuRegister indirect_reg);
+  void FinalizeLabeledBranch(Mips64Label* label);
+
+  Branch* GetBranch(uint32_t branch_id);
+  const Branch* GetBranch(uint32_t branch_id) const;
+
+  void PromoteBranches();
+  void EmitBranch(Branch* branch);
+  void EmitBranches();
+  void PatchCFI();
+
+  // Emits exception block.
+  void EmitExceptionPoll(Mips64ExceptionSlowPath* exception);
+
+  // List of exception blocks to generate at the end of the code cache.
+  std::vector<Mips64ExceptionSlowPath> exception_blocks_;
+
+  std::vector<Branch> branches_;
+
+  // Whether appending instructions at the end of the buffer or overwriting the existing ones.
+  bool overwriting_;
+  // The current overwrite location.
+  uint32_t overwrite_location_;
+
+  // Data for AdjustedPosition(), see the description there.
+  uint32_t last_position_adjustment_;
+  uint32_t last_old_position_;
+  uint32_t last_branch_id_;
 
   DISALLOW_COPY_AND_ASSIGN(Mips64Assembler);
 };
 
-// Slowpath entered when Thread::Current()->_exception is non-null
-class Mips64ExceptionSlowPath FINAL : public SlowPath {
- public:
-  Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust)
-      : scratch_(scratch), stack_adjust_(stack_adjust) {}
-  virtual void Emit(Assembler *sp_asm) OVERRIDE;
- private:
-  const Mips64ManagedRegister scratch_;
-  const size_t stack_adjust_;
-};
-
 }  // namespace mips64
 }  // namespace art
 
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 4413906..b758d64 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -24,6 +24,8 @@
 #include "base/stl_util.h"
 #include "utils/assembler_test.h"
 
+#define __ GetAssembler()->
+
 namespace art {
 
 struct MIPS64CpuRegisterCompare {
@@ -48,8 +50,26 @@
     return "mips64";
   }
 
+  std::string GetAssemblerCmdName() OVERRIDE {
+    // We assemble and link for MIPS64R6. See GetAssemblerParameters() for details.
+    return "gcc";
+  }
+
   std::string GetAssemblerParameters() OVERRIDE {
-    return " --no-warn -march=mips64r6";
+    // We assemble and link for MIPS64R6. The reason is that object files produced for MIPS64R6
+    // (and MIPS32R6) with the GNU assembler don't have correct final offsets in PC-relative
+    // branches in the .text section and so they require a relocation pass (there's a relocation
+    // section, .rela.text, that has the needed info to fix up the branches).
+    return " -march=mips64r6 -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib";
+  }
+
+  void Pad(std::vector<uint8_t>& data) OVERRIDE {
+    // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple
+    // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't
+    // pad, so, in order for two assembler outputs to match, we need to match the padding as well.
+    // NOP is encoded as four zero bytes on MIPS.
+    size_t pad_size = RoundUp(data.size(), 16u) - data.size();
+    data.insert(data.end(), pad_size, 0);
   }
 
   std::string GetDisassembleParameters() OVERRIDE {
@@ -182,6 +202,71 @@
     return secondary_register_names_[reg];
   }
 
+  std::string RepeatInsn(size_t count, const std::string& insn) {
+    std::string result;
+    for (; count != 0u; --count) {
+      result += insn;
+    }
+    return result;
+  }
+
+  void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
+                                                                 mips64::Mips64Label*),
+                              std::string instr_name) {
+    mips64::Mips64Label label;
+    (Base::GetAssembler()->*f)(mips64::A0, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips64::A1, &label);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $a0, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $a1, 1b\n"
+        "nop\n";
+    DriverStr(expected, instr_name);
+  }
+
+  void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
+                                                                  mips64::GpuRegister,
+                                                                  mips64::Mips64Label*),
+                               std::string instr_name) {
+    mips64::Mips64Label label;
+    (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips64::A2, mips64::A3, &label);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $a0, $a1, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $a2, $a3, 1b\n"
+        "nop\n";
+    DriverStr(expected, instr_name);
+  }
+
  private:
   std::vector<mips64::GpuRegister*> registers_;
   std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_;
@@ -194,7 +279,6 @@
   EXPECT_TRUE(CheckTools());
 }
 
-
 ///////////////////
 // FP Operations //
 ///////////////////
@@ -319,6 +403,106 @@
   DriverStr(RepeatFFF(&mips64::Mips64Assembler::MaxD, "max.d ${reg1}, ${reg2}, ${reg3}"), "max.d");
 }
 
+TEST_F(AssemblerMIPS64Test, CmpUnS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnS, "cmp.un.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.un.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpEqS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqS, "cmp.eq.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.eq.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUeqS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqS, "cmp.ueq.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ueq.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLtS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtS, "cmp.lt.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.lt.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUltS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltS, "cmp.ult.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ult.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLeS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeS, "cmp.le.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.le.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUleS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleS, "cmp.ule.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ule.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpOrS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrS, "cmp.or.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.or.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUneS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneS, "cmp.une.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.une.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpNeS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeS, "cmp.ne.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ne.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUnD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnD, "cmp.un.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.un.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpEqD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqD, "cmp.eq.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.eq.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUeqD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqD, "cmp.ueq.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ueq.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLtD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtD, "cmp.lt.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.lt.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUltD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltD, "cmp.ult.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ult.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLeD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeD, "cmp.le.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.le.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUleD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleD, "cmp.ule.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ule.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpOrD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrD, "cmp.or.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.or.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUneD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneD, "cmp.une.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.une.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpNeD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeD, "cmp.ne.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ne.d");
+}
+
 TEST_F(AssemblerMIPS64Test, CvtDL) {
   DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "cvt.d.l");
 }
@@ -343,12 +527,300 @@
   DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "cvt.s.w");
 }
 
+TEST_F(AssemblerMIPS64Test, TruncWS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "trunc.w.s");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncWD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "trunc.w.d");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncLS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "trunc.l.s");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncLD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "trunc.l.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Mfc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Mfc1, "mfc1 ${reg1}, ${reg2}"), "Mfc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Mfhc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Mfhc1, "mfhc1 ${reg1}, ${reg2}"), "Mfhc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Mtc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Mtc1, "mtc1 ${reg1}, ${reg2}"), "Mtc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Mthc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Mthc1, "mthc1 ${reg1}, ${reg2}"), "Mthc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Dmfc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Dmfc1, "dmfc1 ${reg1}, ${reg2}"), "Dmfc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Dmtc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Dmtc1, "dmtc1 ${reg1}, ${reg2}"), "Dmtc1");
+}
+
 ////////////////
 // CALL / JMP //
 ////////////////
 
 TEST_F(AssemblerMIPS64Test, Jalr) {
-  DriverStr(RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr");
+  DriverStr(".set noreorder\n" +
+            RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr");
+}
+
+TEST_F(AssemblerMIPS64Test, Jialc) {
+  mips64::Mips64Label label1, label2;
+  __ Jialc(&label1, mips64::T9);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label1);
+  __ Jialc(&label2, mips64::T9);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label2);
+  __ Jialc(&label1, mips64::T9);
+
+  std::string expected =
+      ".set noreorder\n"
+      "lapc $t9, 1f\n"
+      "jialc $t9, 0\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      "lapc $t9, 2f\n"
+      "jialc $t9, 0\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      "lapc $t9, 1b\n"
+      "jialc $t9, 0\n";
+  DriverStr(expected, "Jialc");
+}
+
+TEST_F(AssemblerMIPS64Test, LongJialc) {
+  mips64::Mips64Label label1, label2;
+  __ Jialc(&label1, mips64::T9);
+  constexpr uint32_t kAdduCount1 = (1u << 18) + 1;
+  for (uint32_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label1);
+  __ Jialc(&label2, mips64::T9);
+  constexpr uint32_t kAdduCount2 = (1u << 18) + 1;
+  for (uint32_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label2);
+  __ Jialc(&label1, mips64::T9);
+
+  uint32_t offset_forward1 = 3 + kAdduCount1;  // 3: account for auipc, daddiu and jic.
+  offset_forward1 <<= 2;
+  offset_forward1 += (offset_forward1 & 0x8000) << 1;  // Account for sign extension in daddiu.
+
+  uint32_t offset_forward2 = 3 + kAdduCount2;  // 3: account for auipc, daddiu and jic.
+  offset_forward2 <<= 2;
+  offset_forward2 += (offset_forward2 & 0x8000) << 1;  // Account for sign extension in daddiu.
+
+  uint32_t offset_back = -(3 + kAdduCount2);  // 3: account for auipc, daddiu and jic.
+  offset_back <<= 2;
+  offset_back += (offset_back & 0x8000) << 1;  // Account for sign extension in daddiu.
+
+  std::ostringstream oss;
+  oss <<
+      ".set noreorder\n"
+      "auipc $t9, 0x" << std::hex << High16Bits(offset_forward1) << "\n"
+      "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward1) << "\n"
+      "jialc $t9, 0\n" <<
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
+      "1:\n"
+      "auipc $t9, 0x" << std::hex << High16Bits(offset_forward2) << "\n"
+      "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward2) << "\n"
+      "jialc $t9, 0\n" <<
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
+      "2:\n"
+      "auipc $t9, 0x" << std::hex << High16Bits(offset_back) << "\n"
+      "daddiu $t9, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+      "jialc $t9, 0\n";
+  std::string expected = oss.str();
+  DriverStr(expected, "LongJialc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bc) {
+  mips64::Mips64Label label1, label2;
+  __ Bc(&label1);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label1);
+  __ Bc(&label2);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label2);
+  __ Bc(&label1);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bc 1f\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      "bc 2f\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      "bc 1b\n";
+  DriverStr(expected, "Bc");
+}
+
+TEST_F(AssemblerMIPS64Test, Beqzc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bnezc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bltzc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgezc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc");
+}
+
+TEST_F(AssemblerMIPS64Test, Blezc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgtzc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc");
+}
+
+TEST_F(AssemblerMIPS64Test, Beqc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bnec) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec");
+}
+
+TEST_F(AssemblerMIPS64Test, Bltc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgec) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec");
+}
+
+TEST_F(AssemblerMIPS64Test, Bltuc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgeuc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bc1eqz) {
+    mips64::Mips64Label label;
+    __ Bc1eqz(mips64::F0, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bc1eqz(mips64::F31, &label);
+
+    std::string expected =
+        ".set noreorder\n"
+        "bc1eqz $f0, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        "bc1eqz $f31, 1b\n"
+        "nop\n";
+    DriverStr(expected, "Bc1eqz");
+}
+
+TEST_F(AssemblerMIPS64Test, Bc1nez) {
+    mips64::Mips64Label label;
+    __ Bc1nez(mips64::F0, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bc1nez(mips64::F31, &label);
+
+    std::string expected =
+        ".set noreorder\n"
+        "bc1nez $f0, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        "bc1nez $f31, 1b\n"
+        "nop\n";
+    DriverStr(expected, "Bc1nez");
+}
+
+TEST_F(AssemblerMIPS64Test, LongBeqc) {
+  mips64::Mips64Label label;
+  __ Beqc(mips64::A0, mips64::A1, &label);
+  constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
+  for (uint32_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label);
+  constexpr uint32_t kAdduCount2 = (1u << 15) + 1;
+  for (uint32_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Beqc(mips64::A2, mips64::A3, &label);
+
+  uint32_t offset_forward = 2 + kAdduCount1;  // 2: account for auipc and jic.
+  offset_forward <<= 2;
+  offset_forward += (offset_forward & 0x8000) << 1;  // Account for sign extension in jic.
+
+  uint32_t offset_back = -(kAdduCount2 + 1);  // 1: account for bnec.
+  offset_back <<= 2;
+  offset_back += (offset_back & 0x8000) << 1;  // Account for sign extension in jic.
+
+  std::ostringstream oss;
+  oss <<
+      ".set noreorder\n"
+      "bnec $a0, $a1, 1f\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
+      "1:\n" <<
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
+      "2:\n" <<
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
+      "bnec $a2, $a3, 3f\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+      "3:\n";
+  std::string expected = oss.str();
+  DriverStr(expected, "LongBeqc");
 }
 
 //////////
@@ -379,6 +851,44 @@
   DriverStr(RepeatRR(&mips64::Mips64Assembler::Dshd, "dshd ${reg1}, ${reg2}"), "dshd");
 }
 
+TEST_F(AssemblerMIPS64Test, Dext) {
+  std::vector<mips64::GpuRegister*> reg1_registers = GetRegisters();
+  std::vector<mips64::GpuRegister*> reg2_registers = GetRegisters();
+  WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * 33 * 16);
+  std::ostringstream expected;
+  for (mips64::GpuRegister* reg1 : reg1_registers) {
+    for (mips64::GpuRegister* reg2 : reg2_registers) {
+      for (int32_t pos = 0; pos < 32; pos++) {
+        for (int32_t size = 1; size <= 32; size++) {
+          __ Dext(*reg1, *reg2, pos, size);
+          expected << "dext $" << *reg1 << ", $" << *reg2 << ", " << pos << ", " << size << "\n";
+        }
+      }
+    }
+  }
+
+  DriverStr(expected.str(), "Dext");
+}
+
+TEST_F(AssemblerMIPS64Test, Dinsu) {
+  std::vector<mips64::GpuRegister*> reg1_registers = GetRegisters();
+  std::vector<mips64::GpuRegister*> reg2_registers = GetRegisters();
+  WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * 33 * 16);
+  std::ostringstream expected;
+  for (mips64::GpuRegister* reg1 : reg1_registers) {
+    for (mips64::GpuRegister* reg2 : reg2_registers) {
+      for (int32_t pos = 32; pos < 64; pos++) {
+        for (int32_t size = 1; pos + size <= 64; size++) {
+          __ Dinsu(*reg1, *reg2, pos, size);
+          expected << "dinsu $" << *reg1 << ", $" << *reg2 << ", " << pos << ", " << size << "\n";
+        }
+      }
+    }
+  }
+
+  DriverStr(expected.str(), "Dinsu");
+}
+
 TEST_F(AssemblerMIPS64Test, Wsbh) {
   DriverStr(RepeatRR(&mips64::Mips64Assembler::Wsbh, "wsbh ${reg1}, ${reg2}"), "wsbh");
 }
@@ -494,4 +1004,638 @@
   DriverStr(RepeatRR(&mips64::Mips64Assembler::Dclo, "dclo ${reg1}, ${reg2}"), "dclo");
 }
 
+TEST_F(AssemblerMIPS64Test, LoadFromOffset) {
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 1);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x7FFF);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x8001);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 1);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x7FFF);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x8001);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 2);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x7FFE);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x8002);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 2);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x7FFE);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x8002);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 4);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x7FFC);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x8004);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 4);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x7FFC);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x8004);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 4);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x7FFC);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x8004);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  const char* expected =
+      "lb $a0, 0($a0)\n"
+      "lb $a0, 0($a1)\n"
+      "lb $a0, 1($a1)\n"
+      "lb $a0, 256($a1)\n"
+      "lb $a0, 1000($a1)\n"
+      "lb $a0, 0x7FFF($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 1($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "lb $a0, -256($a1)\n"
+      "lb $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+
+      "lbu $a0, 0($a0)\n"
+      "lbu $a0, 0($a1)\n"
+      "lbu $a0, 1($a1)\n"
+      "lbu $a0, 256($a1)\n"
+      "lbu $a0, 1000($a1)\n"
+      "lbu $a0, 0x7FFF($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 1($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "lbu $a0, -256($a1)\n"
+      "lbu $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+
+      "lh $a0, 0($a0)\n"
+      "lh $a0, 0($a1)\n"
+      "lh $a0, 2($a1)\n"
+      "lh $a0, 256($a1)\n"
+      "lh $a0, 1000($a1)\n"
+      "lh $a0, 0x7FFE($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 2($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "lh $a0, -256($a1)\n"
+      "lh $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+
+      "lhu $a0, 0($a0)\n"
+      "lhu $a0, 0($a1)\n"
+      "lhu $a0, 2($a1)\n"
+      "lhu $a0, 256($a1)\n"
+      "lhu $a0, 1000($a1)\n"
+      "lhu $a0, 0x7FFE($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 2($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "lhu $a0, -256($a1)\n"
+      "lhu $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+
+      "lw $a0, 0($a0)\n"
+      "lw $a0, 0($a1)\n"
+      "lw $a0, 4($a1)\n"
+      "lw $a0, 256($a1)\n"
+      "lw $a0, 1000($a1)\n"
+      "lw $a0, 0x7FFC($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "lw $a0, -256($a1)\n"
+      "lw $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+
+      "lwu $a0, 0($a0)\n"
+      "lwu $a0, 0($a1)\n"
+      "lwu $a0, 4($a1)\n"
+      "lwu $a0, 256($a1)\n"
+      "lwu $a0, 1000($a1)\n"
+      "lwu $a0, 0x7FFC($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 0($at)\n"
+      "lwu $a0, -256($a1)\n"
+      "lwu $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 0($at)\n"
+
+      "ld $a0, 0($a0)\n"
+      "ld $a0, 0($a1)\n"
+      "lwu $a0, 4($a1)\n"
+      "lwu $t3, 8($a1)\n"
+      "dins $a0, $t3, 32, 32\n"
+      "ld $a0, 256($a1)\n"
+      "ld $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x7FF8\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 4($at)\n"
+      "lwu $t3, 8($at)\n"
+      "dins $a0, $t3, 32, 32\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "ld $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 4($at)\n"
+      "lwu $t3, 8($at)\n"
+      "dins $a0, $t3, 32, 32\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "ld $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "ld $a0, 0($at)\n"
+      "ld $a0, -256($a1)\n"
+      "ld $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "ld $a0, 0($at)\n";
+  DriverStr(expected, "LoadFromOffset");
+}
+
+TEST_F(AssemblerMIPS64Test, LoadFpuFromOffset) {
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 4);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 256);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x7FFC);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x8000);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x8004);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x10000);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x12345678);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, -256);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, -32768);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0xABCDEF00);
+
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 4);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 256);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x7FFC);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x8000);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x8004);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x10000);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x12345678);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, -256);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, -32768);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0xABCDEF00);
+
+  const char* expected =
+      "lwc1 $f0, 0($a0)\n"
+      "lwc1 $f0, 4($a0)\n"
+      "lwc1 $f0, 256($a0)\n"
+      "lwc1 $f0, 0x7FFC($a0)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "lwc1 $f0, -256($a0)\n"
+      "lwc1 $f0, -32768($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+
+      "ldc1 $f0, 0($a0)\n"
+      "lwc1 $f0, 4($a0)\n"
+      "lw $t3, 8($a0)\n"
+      "mthc1 $t3, $f0\n"
+      "ldc1 $f0, 256($a0)\n"
+      "ori $at, $zero, 0x7FF8\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 4($at)\n"
+      "lw $t3, 8($at)\n"
+      "mthc1 $t3, $f0\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 4($at)\n"
+      "lw $t3, 8($at)\n"
+      "mthc1 $t3, $f0\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "ldc1 $f0, -256($a0)\n"
+      "ldc1 $f0, -32768($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n";
+  DriverStr(expected, "LoadFpuFromOffset");
+}
+
+TEST_F(AssemblerMIPS64Test, StoreToOffset) {
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A0, 0);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 1);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 256);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 1000);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x7FFF);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x8000);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x8001);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x10000);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x12345678);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, -256);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, -32768);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A0, 0);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 2);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 256);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 1000);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x7FFE);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x8000);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x8002);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x10000);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x12345678);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, -256);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, -32768);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A0, 0);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 4);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 256);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 1000);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x7FFC);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x8000);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x8004);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x10000);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x12345678);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, -256);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, -32768);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A0, 0);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 4);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 256);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 1000);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x7FFC);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x8000);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x8004);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x10000);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x12345678);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, -256);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, -32768);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  const char* expected =
+      "sb $a0, 0($a0)\n"
+      "sb $a0, 0($a1)\n"
+      "sb $a0, 1($a1)\n"
+      "sb $a0, 256($a1)\n"
+      "sb $a0, 1000($a1)\n"
+      "sb $a0, 0x7FFF($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 1($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "sb $a0, -256($a1)\n"
+      "sb $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+
+      "sh $a0, 0($a0)\n"
+      "sh $a0, 0($a1)\n"
+      "sh $a0, 2($a1)\n"
+      "sh $a0, 256($a1)\n"
+      "sh $a0, 1000($a1)\n"
+      "sh $a0, 0x7FFE($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 2($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "sh $a0, -256($a1)\n"
+      "sh $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+
+      "sw $a0, 0($a0)\n"
+      "sw $a0, 0($a1)\n"
+      "sw $a0, 4($a1)\n"
+      "sw $a0, 256($a1)\n"
+      "sw $a0, 1000($a1)\n"
+      "sw $a0, 0x7FFC($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "sw $a0, -256($a1)\n"
+      "sw $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+
+      "sd $a0, 0($a0)\n"
+      "sd $a0, 0($a1)\n"
+      "sw $a0, 4($a1)\n"
+      "dsrl32 $t3, $a0, 0\n"
+      "sw $t3, 8($a1)\n"
+      "sd $a0, 256($a1)\n"
+      "sd $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x7FF8\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 4($at)\n"
+      "dsrl32 $t3, $a0, 0\n"
+      "sw $t3, 8($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sd $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 4($at)\n"
+      "dsrl32 $t3, $a0, 0\n"
+      "sw $t3, 8($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "sd $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "sd $a0, 0($at)\n"
+      "sd $a0, -256($a1)\n"
+      "sd $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "sd $a0, 0($at)\n";
+  DriverStr(expected, "StoreToOffset");
+}
+
+TEST_F(AssemblerMIPS64Test, StoreFpuToOffset) {
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 4);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 256);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x7FFC);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x8000);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x8004);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x10000);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x12345678);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, -256);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, -32768);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0xABCDEF00);
+
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 4);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 256);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x7FFC);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x8000);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x8004);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x10000);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x12345678);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, -256);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, -32768);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0xABCDEF00);
+
+  const char* expected =
+      "swc1 $f0, 0($a0)\n"
+      "swc1 $f0, 4($a0)\n"
+      "swc1 $f0, 256($a0)\n"
+      "swc1 $f0, 0x7FFC($a0)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "swc1 $f0, -256($a0)\n"
+      "swc1 $f0, -32768($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+
+      "sdc1 $f0, 0($a0)\n"
+      "mfhc1 $t3, $f0\n"
+      "swc1 $f0, 4($a0)\n"
+      "sw $t3, 8($a0)\n"
+      "sdc1 $f0, 256($a0)\n"
+      "ori $at, $zero, 0x7FF8\n"
+      "daddu $at, $at, $a0\n"
+      "mfhc1 $t3, $f0\n"
+      "swc1 $f0, 4($at)\n"
+      "sw $t3, 8($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "mfhc1 $t3, $f0\n"
+      "swc1 $f0, 4($at)\n"
+      "sw $t3, 8($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "sdc1 $f0, -256($a0)\n"
+      "sdc1 $f0, -32768($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n";
+  DriverStr(expected, "StoreFpuToOffset");
+}
+
+#undef __
+
 }  // namespace art
diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h
index 4c4705b..c9f9556 100644
--- a/compiler/utils/mips64/managed_register_mips64.h
+++ b/compiler/utils/mips64/managed_register_mips64.h
@@ -18,7 +18,7 @@
 #define ART_COMPILER_UTILS_MIPS64_MANAGED_REGISTER_MIPS64_H_
 
 #include "constants_mips64.h"
-#include "dwarf/register.h"
+#include "debug/dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
@@ -39,22 +39,22 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class Mips64ManagedRegister : public ManagedRegister {
  public:
-  GpuRegister AsGpuRegister() const {
+  constexpr GpuRegister AsGpuRegister() const {
     CHECK(IsGpuRegister());
     return static_cast<GpuRegister>(id_);
   }
 
-  FpuRegister AsFpuRegister() const {
+  constexpr FpuRegister AsFpuRegister() const {
     CHECK(IsFpuRegister());
     return static_cast<FpuRegister>(id_ - kNumberOfGpuRegIds);
   }
 
-  bool IsGpuRegister() const {
+  constexpr bool IsGpuRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfGpuRegIds);
   }
 
-  bool IsFpuRegister() const {
+  constexpr bool IsFpuRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfGpuRegIds;
     return (0 <= test) && (test < kNumberOfFpuRegIds);
@@ -67,22 +67,22 @@
   // then false is returned.
   bool Overlaps(const Mips64ManagedRegister& other) const;
 
-  static Mips64ManagedRegister FromGpuRegister(GpuRegister r) {
+  static constexpr Mips64ManagedRegister FromGpuRegister(GpuRegister r) {
     CHECK_NE(r, kNoGpuRegister);
     return FromRegId(r);
   }
 
-  static Mips64ManagedRegister FromFpuRegister(FpuRegister r) {
+  static constexpr Mips64ManagedRegister FromFpuRegister(FpuRegister r) {
     CHECK_NE(r, kNoFpuRegister);
     return FromRegId(r + kNumberOfGpuRegIds);
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -98,9 +98,9 @@
 
   friend class ManagedRegister;
 
-  explicit Mips64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr Mips64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static Mips64ManagedRegister FromRegId(int reg_id) {
+  static constexpr Mips64ManagedRegister FromRegId(int reg_id) {
     Mips64ManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -111,7 +111,7 @@
 
 }  // namespace mips64
 
-inline mips64::Mips64ManagedRegister ManagedRegister::AsMips64() const {
+constexpr inline mips64::Mips64ManagedRegister ManagedRegister::AsMips64() const {
   mips64::Mips64ManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/string_reference_test.cc b/compiler/utils/string_reference_test.cc
new file mode 100644
index 0000000..0fd9e5b
--- /dev/null
+++ b/compiler/utils/string_reference_test.cc
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "string_reference.h"
+
+#include <memory>
+
+#include "gtest/gtest.h"
+#include "utils/test_dex_file_builder.h"
+
+namespace art {
+
+TEST(StringReference, ValueComparator) {
+  // This is a regression test for the StringReferenceValueComparator using the wrong
+  // dex file to get the string data from a StringId. We construct two dex files with
+  // just a single string with the same length but different value. This creates dex
+  // files that have the same layout, so the byte offset read from the StringId in one
+  // dex file, when used in the other dex file still points to valid string data, except
+  // that it's the wrong string. Without the fix the strings would then compare equal.
+  TestDexFileBuilder builder1;
+  builder1.AddString("String1");
+  std::unique_ptr<const DexFile> dex_file1 = builder1.Build("dummy location 1");
+  ASSERT_EQ(1u, dex_file1->NumStringIds());
+  ASSERT_STREQ("String1", dex_file1->GetStringData(dex_file1->GetStringId(0)));
+  StringReference sr1(dex_file1.get(), 0);
+
+  TestDexFileBuilder builder2;
+  builder2.AddString("String2");
+  std::unique_ptr<const DexFile> dex_file2 = builder2.Build("dummy location 2");
+  ASSERT_EQ(1u, dex_file2->NumStringIds());
+  ASSERT_STREQ("String2", dex_file2->GetStringData(dex_file2->GetStringId(0)));
+  StringReference sr2(dex_file2.get(), 0);
+
+  StringReferenceValueComparator cmp;
+  EXPECT_TRUE(cmp(sr1, sr2));  // "String1" < "String2" is true.
+  EXPECT_FALSE(cmp(sr2, sr1));  // "String2" < "String1" is false.
+}
+
+TEST(StringReference, ValueComparator2) {
+  const char* const kDexFile1Strings[] = {
+      "",
+      "abc",
+      "abcxyz",
+  };
+  const char* const kDexFile2Strings[] = {
+      "a",
+      "abc",
+      "abcdef",
+      "def",
+  };
+  const bool expectedCmp12[arraysize(kDexFile1Strings)][arraysize(kDexFile2Strings)] = {
+      { true, true, true, true },
+      { false, false, true, true },
+      { false, false, false, true },
+  };
+  const bool expectedCmp21[arraysize(kDexFile2Strings)][arraysize(kDexFile1Strings)] = {
+      { false, true, true },
+      { false, false, true },
+      { false, false, true },
+      { false, false, false },
+  };
+
+  TestDexFileBuilder builder1;
+  for (const char* s : kDexFile1Strings) {
+    builder1.AddString(s);
+  }
+  std::unique_ptr<const DexFile> dex_file1 = builder1.Build("dummy location 1");
+  ASSERT_EQ(arraysize(kDexFile1Strings), dex_file1->NumStringIds());
+  for (size_t index = 0; index != arraysize(kDexFile1Strings); ++index) {
+    ASSERT_STREQ(kDexFile1Strings[index], dex_file1->GetStringData(dex_file1->GetStringId(index)));
+  }
+
+  TestDexFileBuilder builder2;
+  for (const char* s : kDexFile2Strings) {
+    builder2.AddString(s);
+  }
+  std::unique_ptr<const DexFile> dex_file2 = builder2.Build("dummy location 1");
+  ASSERT_EQ(arraysize(kDexFile2Strings), dex_file2->NumStringIds());
+  for (size_t index = 0; index != arraysize(kDexFile2Strings); ++index) {
+    ASSERT_STREQ(kDexFile2Strings[index], dex_file2->GetStringData(dex_file2->GetStringId(index)));
+  }
+
+  StringReferenceValueComparator cmp;
+  for (size_t index1 = 0; index1 != arraysize(kDexFile1Strings); ++index1) {
+    for (size_t index2 = 0; index2 != arraysize(kDexFile2Strings); ++index2) {
+      StringReference sr1(dex_file1.get(), index1);
+      StringReference sr2(dex_file2.get(), index2);
+      EXPECT_EQ(expectedCmp12[index1][index2], cmp(sr1, sr2)) << index1 << " " << index2;
+      EXPECT_EQ(expectedCmp21[index2][index1], cmp(sr2, sr1)) << index1 << " " << index2;
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc
index 42ed881..1a8f567 100644
--- a/compiler/utils/swap_space.cc
+++ b/compiler/utils/swap_space.cc
@@ -18,6 +18,7 @@
 
 #include <algorithm>
 #include <numeric>
+#include <sys/mman.h>
 
 #include "base/logging.h"
 #include "base/macros.h"
@@ -44,23 +45,17 @@
   }
 }
 
-template <typename FreeByStartSet, typename FreeBySizeSet>
-static void RemoveChunk(FreeByStartSet* free_by_start,
-                        FreeBySizeSet* free_by_size,
-                        typename FreeBySizeSet::const_iterator free_by_size_pos) {
+void SwapSpace::RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) {
   auto free_by_start_pos = free_by_size_pos->second;
-  free_by_size->erase(free_by_size_pos);
-  free_by_start->erase(free_by_start_pos);
+  free_by_size_.erase(free_by_size_pos);
+  free_by_start_.erase(free_by_start_pos);
 }
 
-template <typename FreeByStartSet, typename FreeBySizeSet>
-static void InsertChunk(FreeByStartSet* free_by_start,
-                        FreeBySizeSet* free_by_size,
-                        const SpaceChunk& chunk) {
+inline void SwapSpace::InsertChunk(const SpaceChunk& chunk) {
   DCHECK_NE(chunk.size, 0u);
-  auto insert_result = free_by_start->insert(chunk);
+  auto insert_result = free_by_start_.insert(chunk);
   DCHECK(insert_result.second);
-  free_by_size->emplace(chunk.size, insert_result.first);
+  free_by_size_.emplace(chunk.size, insert_result.first);
 }
 
 SwapSpace::SwapSpace(int fd, size_t initial_size)
@@ -69,10 +64,18 @@
       lock_("SwapSpace lock", static_cast<LockLevel>(LockLevel::kDefaultMutexLevel - 1)) {
   // Assume that the file is unlinked.
 
-  InsertChunk(&free_by_start_, &free_by_size_, NewFileChunk(initial_size));
+  InsertChunk(NewFileChunk(initial_size));
 }
 
 SwapSpace::~SwapSpace() {
+  // Unmap all mmapped chunks. Nothing should be allocated anymore at
+  // this point, so there should be only full size chunks in free_by_start_.
+  for (const SpaceChunk& chunk : free_by_start_) {
+    if (munmap(chunk.ptr, chunk.size) != 0) {
+      PLOG(ERROR) << "Failed to unmap swap space chunk at "
+          << static_cast<const void*>(chunk.ptr) << " size=" << chunk.size;
+    }
+  }
   // All arenas are backed by the same file. Just close the descriptor.
   close(fd_);
 }
@@ -113,7 +116,7 @@
       : free_by_size_.lower_bound(FreeBySizeEntry { size, free_by_start_.begin() });
   if (it != free_by_size_.end()) {
     old_chunk = *it->second;
-    RemoveChunk(&free_by_start_, &free_by_size_, it);
+    RemoveChunk(it);
   } else {
     // Not a big enough free chunk, need to increase file size.
     old_chunk = NewFileChunk(size);
@@ -124,13 +127,13 @@
   if (old_chunk.size != size) {
     // Insert the remainder.
     SpaceChunk new_chunk = { old_chunk.ptr + size, old_chunk.size - size };
-    InsertChunk(&free_by_start_, &free_by_size_, new_chunk);
+    InsertChunk(new_chunk);
   }
 
   return ret;
 }
 
-SpaceChunk SwapSpace::NewFileChunk(size_t min_size) {
+SwapSpace::SpaceChunk SwapSpace::NewFileChunk(size_t min_size) {
 #if !defined(__APPLE__)
   size_t next_part = std::max(RoundUp(min_size, kPageSize), RoundUp(kMininumMapSize, kPageSize));
   int result = TEMP_FAILURE_RETRY(ftruncate64(fd_, size_ + next_part));
@@ -149,7 +152,6 @@
   }
   size_ += next_part;
   SpaceChunk new_chunk = {ptr, next_part};
-  maps_.push_back(new_chunk);
   return new_chunk;
 #else
   UNUSED(min_size, kMininumMapSize);
@@ -159,7 +161,7 @@
 }
 
 // TODO: Full coalescing.
-void SwapSpace::Free(void* ptrV, size_t size) {
+void SwapSpace::Free(void* ptr, size_t size) {
   MutexLock lock(Thread::Current(), lock_);
   size = RoundUp(size, 8U);
 
@@ -168,7 +170,7 @@
     free_before = CollectFree(free_by_start_, free_by_size_);
   }
 
-  SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptrV), size };
+  SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptr), size };
   auto it = free_by_start_.lower_bound(chunk);
   if (it != free_by_start_.begin()) {
     auto prev = it;
@@ -180,7 +182,7 @@
       chunk.ptr -= prev->size;
       auto erase_pos = free_by_size_.find(FreeBySizeEntry { prev->size, prev });
       DCHECK(erase_pos != free_by_size_.end());
-      RemoveChunk(&free_by_start_, &free_by_size_, erase_pos);
+      RemoveChunk(erase_pos);
       // "prev" is invalidated but "it" remains valid.
     }
   }
@@ -191,11 +193,11 @@
       chunk.size += it->size;
       auto erase_pos = free_by_size_.find(FreeBySizeEntry { it->size, it });
       DCHECK(erase_pos != free_by_size_.end());
-      RemoveChunk(&free_by_start_, &free_by_size_, erase_pos);
+      RemoveChunk(erase_pos);
       // "it" is invalidated but we don't need it anymore.
     }
   }
-  InsertChunk(&free_by_start_, &free_by_size_, chunk);
+  InsertChunk(chunk);
 
   if (kCheckFreeMaps) {
     size_t free_after = CollectFree(free_by_start_, free_by_size_);
diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h
index 9127b6b..bf06675 100644
--- a/compiler/utils/swap_space.h
+++ b/compiler/utils/swap_space.h
@@ -19,42 +19,17 @@
 
 #include <cstdlib>
 #include <list>
+#include <vector>
 #include <set>
 #include <stdint.h>
 #include <stddef.h>
 
-#include "base/debug_stack.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/mutex.h"
-#include "mem_map.h"
 
 namespace art {
 
-// Chunk of space.
-struct SpaceChunk {
-  uint8_t* ptr;
-  size_t size;
-
-  uintptr_t Start() const {
-    return reinterpret_cast<uintptr_t>(ptr);
-  }
-  uintptr_t End() const {
-    return reinterpret_cast<uintptr_t>(ptr) + size;
-  }
-};
-
-inline bool operator==(const SpaceChunk& lhs, const SpaceChunk& rhs) {
-  return (lhs.size == rhs.size) && (lhs.ptr == rhs.ptr);
-}
-
-class SortChunkByPtr {
- public:
-  bool operator()(const SpaceChunk& a, const SpaceChunk& b) const {
-    return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr);
-  }
-};
-
 // An arena pool that creates arenas backed by an mmaped file.
 class SwapSpace {
  public:
@@ -68,17 +43,27 @@
   }
 
  private:
-  SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_);
+  // Chunk of space.
+  struct SpaceChunk {
+    uint8_t* ptr;
+    size_t size;
 
-  int fd_;
-  size_t size_;
-  std::list<SpaceChunk> maps_;
+    uintptr_t Start() const {
+      return reinterpret_cast<uintptr_t>(ptr);
+    }
+    uintptr_t End() const {
+      return reinterpret_cast<uintptr_t>(ptr) + size;
+    }
+  };
 
-  // NOTE: Boost.Bimap would be useful for the two following members.
+  class SortChunkByPtr {
+   public:
+    bool operator()(const SpaceChunk& a, const SpaceChunk& b) const {
+      return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr);
+    }
+  };
 
-  // Map start of a free chunk to its size.
   typedef std::set<SpaceChunk, SortChunkByPtr> FreeByStartSet;
-  FreeByStartSet free_by_start_ GUARDED_BY(lock_);
 
   // Map size to an iterator to free_by_start_'s entry.
   typedef std::pair<size_t, FreeByStartSet::const_iterator> FreeBySizeEntry;
@@ -92,6 +77,20 @@
     }
   };
   typedef std::set<FreeBySizeEntry, FreeBySizeComparator> FreeBySizeSet;
+
+  SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_);
+
+  void RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) REQUIRES(lock_);
+  void InsertChunk(const SpaceChunk& chunk) REQUIRES(lock_);
+
+  int fd_;
+  size_t size_;
+
+  // NOTE: Boost.Bimap would be useful for the two following members.
+
+  // Map start of a free chunk to its size.
+  FreeByStartSet free_by_start_ GUARDED_BY(lock_);
+  // Free chunks ordered by size.
   FreeBySizeSet free_by_size_ GUARDED_BY(lock_);
 
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -126,6 +125,9 @@
 
   template <typename U>
   friend class SwapAllocator;
+
+  template <typename U>
+  friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs);
 };
 
 template <typename T>
@@ -201,9 +203,22 @@
 
   template <typename U>
   friend class SwapAllocator;
+
+  template <typename U>
+  friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs);
 };
 
 template <typename T>
+inline bool operator==(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) {
+  return lhs.swap_space_ == rhs.swap_space_;
+}
+
+template <typename T>
+inline bool operator!=(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) {
+  return !(lhs == rhs);
+}
+
+template <typename T>
 using SwapVector = std::vector<T, SwapAllocator<T>>;
 template <typename T, typename Comparator>
 using SwapSet = std::set<T, Comparator, SwapAllocator<T>>;
diff --git a/compiler/utils/test_dex_file_builder.h b/compiler/utils/test_dex_file_builder.h
index b6a228c..6921780 100644
--- a/compiler/utils/test_dex_file_builder.h
+++ b/compiler/utils/test_dex_file_builder.h
@@ -21,6 +21,7 @@
 #include <set>
 #include <map>
 #include <vector>
+#include <zlib.h>
 
 #include "base/bit_utils.h"
 #include "base/logging.h"
@@ -87,12 +88,13 @@
     std::memset(header_data.data, 0, sizeof(header_data.data));
     DexFile::Header* header = reinterpret_cast<DexFile::Header*>(&header_data.data);
     std::copy_n(DexFile::kDexMagic, 4u, header->magic_);
-    std::copy_n(DexFile::kDexMagicVersion, 4u, header->magic_ + 4u);
-    header->header_size_ = sizeof(header);
+    std::copy_n(DexFile::kDexMagicVersions[0], 4u, header->magic_ + 4u);
+    header->header_size_ = sizeof(DexFile::Header);
     header->endian_tag_ = DexFile::kDexEndianConstant;
     header->link_size_ = 0u;  // Unused.
     header->link_off_ = 0u;  // Unused.
-    header->map_off_ = 0u;  // Unused.
+    header->map_off_ = 0u;  // Unused. TODO: This is wrong. Dex files created by this builder
+                            //               cannot be verified. b/26808512
 
     uint32_t data_section_size = 0u;
 
@@ -161,7 +163,6 @@
     uint32_t total_size = data_section_offset + data_section_size;
 
     dex_file_data_.resize(total_size);
-    std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header));
 
     for (const auto& entry : strings_) {
       CHECK_LT(entry.first.size(), 128u);
@@ -210,11 +211,34 @@
       Write32(raw_offset + 4u, GetStringIdx(entry.first.name));
     }
 
-    // Leave checksum and signature as zeros.
+    // Leave signature as zeros.
 
+    header->file_size_ = dex_file_data_.size();
+
+    // Write the complete header early, as part of it needs to be checksummed.
+    std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header));
+
+    // Checksum starts after the checksum field.
+    size_t skip = sizeof(header->magic_) + sizeof(header->checksum_);
+    header->checksum_ = adler32(adler32(0L, Z_NULL, 0),
+                                dex_file_data_.data() + skip,
+                                dex_file_data_.size() - skip);
+
+    // Write the complete header again, just simpler that way.
+    std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header));
+
+    static constexpr bool kVerify = false;
+    static constexpr bool kVerifyChecksum = false;
     std::string error_msg;
     std::unique_ptr<const DexFile> dex_file(DexFile::Open(
-        &dex_file_data_[0], dex_file_data_.size(), dex_location, 0u, nullptr, &error_msg));
+        &dex_file_data_[0],
+        dex_file_data_.size(),
+        dex_location,
+        0u,
+        nullptr,
+        kVerify,
+        kVerifyChecksum,
+        &error_msg));
     CHECK(dex_file != nullptr) << error_msg;
     return dex_file;
   }
diff --git a/compiler/utils/transform_array_ref.h b/compiler/utils/transform_array_ref.h
new file mode 100644
index 0000000..a6da34f
--- /dev/null
+++ b/compiler/utils/transform_array_ref.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_TRANSFORM_ARRAY_REF_H_
+#define ART_COMPILER_UTILS_TRANSFORM_ARRAY_REF_H_
+
+#include <type_traits>
+
+#include "utils/array_ref.h"
+#include "utils/transform_iterator.h"
+
+namespace art {
+
+/**
+ * @brief An ArrayRef<> wrapper that uses a transformation function for element access.
+ */
+template <typename BaseType, typename Function>
+class TransformArrayRef {
+ private:
+  using Iter = TransformIterator<typename ArrayRef<BaseType>::iterator, Function>;
+
+  // The Function may take a non-const reference, so const_iterator may not exist.
+  using FallbackConstIter = std::iterator<std::random_access_iterator_tag, void, void, void, void>;
+  using PreferredConstIter =
+      TransformIterator<typename ArrayRef<BaseType>::const_iterator, Function>;
+  template <typename F, typename = typename std::result_of<F(const BaseType&)>::type>
+  static PreferredConstIter ConstIterHelper(int&);
+  template <typename F>
+  static FallbackConstIter ConstIterHelper(const int&);
+
+  using ConstIter = decltype(ConstIterHelper<Function>(*reinterpret_cast<int*>(0)));
+
+ public:
+  using value_type = typename Iter::value_type;
+  using reference = typename Iter::reference;
+  using const_reference = typename ConstIter::reference;
+  using pointer = typename Iter::pointer;
+  using const_pointer = typename ConstIter::pointer;
+  using iterator = Iter;
+  using const_iterator = typename std::conditional<
+      std::is_same<ConstIter, FallbackConstIter>::value,
+      void,
+      ConstIter>::type;
+  using reverse_iterator = std::reverse_iterator<Iter>;
+  using const_reverse_iterator = typename std::conditional<
+      std::is_same<ConstIter, FallbackConstIter>::value,
+      void,
+      std::reverse_iterator<ConstIter>>::type;
+  using difference_type = typename ArrayRef<BaseType>::difference_type;
+  using size_type = typename ArrayRef<BaseType>::size_type;
+
+  // Constructors.
+
+  TransformArrayRef(const TransformArrayRef& other) = default;
+
+  template <typename OtherBT>
+  TransformArrayRef(const ArrayRef<OtherBT>& base, Function fn)
+      : data_(base, fn) { }
+
+  template <typename OtherBT,
+            typename = typename std::enable_if<std::is_same<BaseType, const OtherBT>::value>::type>
+  TransformArrayRef(const TransformArrayRef<OtherBT, Function>& other)
+      : TransformArrayRef(other.base(), other.GetFunction()) { }
+
+  // Assignment operators.
+
+  TransformArrayRef& operator=(const TransformArrayRef& other) = default;
+
+  template <typename OtherBT,
+            typename = typename std::enable_if<std::is_same<BaseType, const OtherBT>::value>::type>
+  TransformArrayRef& operator=(const TransformArrayRef<OtherBT, Function>& other) {
+    return *this = TransformArrayRef(other.base(), other.GetFunction());
+  }
+
+  // Destructor.
+  ~TransformArrayRef() = default;
+
+  // Iterators.
+  iterator begin() { return MakeIterator(base().begin()); }
+  const_iterator begin() const { return MakeIterator(base().cbegin()); }
+  const_iterator cbegin() const { return MakeIterator(base().cbegin()); }
+  iterator end() { return MakeIterator(base().end()); }
+  const_iterator end() const { MakeIterator(base().cend()); }
+  const_iterator cend() const { return MakeIterator(base().cend()); }
+  reverse_iterator rbegin() { return reverse_iterator(end()); }
+  const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
+  const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); }
+  reverse_iterator rend() { return reverse_iterator(begin()); }
+  const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
+  const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); }
+
+  // Size.
+  size_type size() const { return base().size(); }
+  bool empty() const { return base().empty(); }
+
+  // Element access. NOTE: Not providing data().
+
+  reference operator[](size_type n) { return GetFunction()(base()[n]); }
+  const_reference operator[](size_type n) const { return GetFunction()(base()[n]); }
+
+  reference front() { return GetFunction()(base().front()); }
+  const_reference front() const { return GetFunction()(base().front()); }
+
+  reference back() { return GetFunction()(base().back()); }
+  const_reference back() const { return GetFunction()(base().back()); }
+
+  TransformArrayRef SubArray(size_type pos) {
+    return TransformArrayRef(base().subarray(pos), GetFunction());
+  }
+  TransformArrayRef SubArray(size_type pos) const {
+    return TransformArrayRef(base().subarray(pos), GetFunction());
+  }
+  TransformArrayRef SubArray(size_type pos, size_type length) const {
+    return TransformArrayRef(base().subarray(pos, length), GetFunction());
+  }
+
+  // Retrieve the base ArrayRef<>.
+  ArrayRef<BaseType> base() {
+    return data_.base_;
+  }
+  ArrayRef<const BaseType> base() const {
+    return ArrayRef<const BaseType>(data_.base_);
+  }
+
+ private:
+  // Allow EBO for state-less Function.
+  struct Data : Function {
+   public:
+    Data(ArrayRef<BaseType> base, Function fn) : Function(fn), base_(base) { }
+
+    ArrayRef<BaseType> base_;
+  };
+
+  const Function& GetFunction() const {
+    return static_cast<const Function&>(data_);
+  }
+
+  template <typename BaseIterator>
+  auto MakeIterator(BaseIterator base) const {
+    return MakeTransformIterator(base, GetFunction());
+  }
+
+  Data data_;
+
+  template <typename OtherBT, typename OtherFunction>
+  friend class TransformArrayRef;
+};
+
+template <typename BaseType, typename Function>
+bool operator==(const TransformArrayRef<BaseType, Function>& lhs,
+                const TransformArrayRef<BaseType, Function>& rhs) {
+  return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
+}
+
+template <typename BaseType, typename Function>
+bool operator!=(const TransformArrayRef<BaseType, Function>& lhs,
+                const TransformArrayRef<BaseType, Function>& rhs) {
+  return !(lhs == rhs);
+}
+
+template <typename ValueType, typename Function>
+TransformArrayRef<ValueType, Function> MakeTransformArrayRef(
+    ArrayRef<ValueType> container, Function f) {
+  return TransformArrayRef<ValueType, Function>(container, f);
+}
+
+template <typename Container, typename Function>
+TransformArrayRef<typename Container::value_type, Function> MakeTransformArrayRef(
+    Container& container, Function f) {
+  return TransformArrayRef<typename Container::value_type, Function>(
+      ArrayRef<typename Container::value_type>(container.data(), container.size()), f);
+}
+
+template <typename Container, typename Function>
+TransformArrayRef<const typename Container::value_type, Function> MakeTransformArrayRef(
+    const Container& container, Function f) {
+  return TransformArrayRef<const typename Container::value_type, Function>(
+      ArrayRef<const typename Container::value_type>(container.data(), container.size()), f);
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_TRANSFORM_ARRAY_REF_H_
diff --git a/compiler/utils/transform_array_ref_test.cc b/compiler/utils/transform_array_ref_test.cc
new file mode 100644
index 0000000..8d71fd7
--- /dev/null
+++ b/compiler/utils/transform_array_ref_test.cc
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "utils/transform_array_ref.h"
+
+namespace art {
+
+namespace {  // anonymous namespace
+
+struct ValueHolder {
+  // Deliberately not explicit.
+  ValueHolder(int v) : value(v) { }  // NOLINT
+  int value;
+};
+
+ATTRIBUTE_UNUSED bool operator==(const ValueHolder& lhs, const ValueHolder& rhs) {
+  return lhs.value == rhs.value;
+}
+
+}  // anonymous namespace
+
+TEST(TransformArrayRef, ConstRefAdd1) {
+  auto add1 = [](const ValueHolder& h) { return h.value + 1; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> input({ 7, 6, 4, 0 });
+  std::vector<int> output;
+
+  auto taref = MakeTransformArrayRef(input, add1);
+  using TarefIter = decltype(taref)::iterator;
+  using ConstTarefIter = decltype(taref)::const_iterator;
+  static_assert(std::is_same<int, decltype(taref)::value_type>::value, "value_type");
+  static_assert(std::is_same<TarefIter, decltype(taref)::pointer>::value, "pointer");
+  static_assert(std::is_same<int, decltype(taref)::reference>::value, "reference");
+  static_assert(std::is_same<ConstTarefIter, decltype(taref)::const_pointer>::value,
+                "const_pointer");
+  static_assert(std::is_same<int, decltype(taref)::const_reference>::value, "const_reference");
+
+  std::copy(taref.begin(), taref.end(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 7, 5, 1 }), output);
+  output.clear();
+
+  std::copy(taref.cbegin(), taref.cend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 7, 5, 1 }), output);
+  output.clear();
+
+  std::copy(taref.rbegin(), taref.rend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 5, 7, 8 }), output);
+  output.clear();
+
+  std::copy(taref.crbegin(), taref.crend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 5, 7, 8 }), output);
+  output.clear();
+
+  ASSERT_EQ(input.size(), taref.size());
+  ASSERT_EQ(input.empty(), taref.empty());
+  ASSERT_EQ(input.front().value + 1, taref.front());
+  ASSERT_EQ(input.back().value + 1, taref.back());
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value + 1, taref[i]);
+  }
+}
+
+TEST(TransformArrayRef, NonConstRefSub1) {
+  auto sub1 = [](ValueHolder& h) { return h.value - 1; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> input({ 4, 4, 5, 7, 10 });
+  std::vector<int> output;
+
+  auto taref = MakeTransformArrayRef(input, sub1);
+  using TarefIter = decltype(taref)::iterator;
+  static_assert(std::is_same<void, decltype(taref)::const_iterator>::value, "const_iterator");
+  static_assert(std::is_same<int, decltype(taref)::value_type>::value, "value_type");
+  static_assert(std::is_same<TarefIter, decltype(taref)::pointer>::value, "pointer");
+  static_assert(std::is_same<int, decltype(taref)::reference>::value, "reference");
+  static_assert(std::is_same<void, decltype(taref)::const_pointer>::value, "const_pointer");
+  static_assert(std::is_same<void, decltype(taref)::const_reference>::value, "const_reference");
+
+  std::copy(taref.begin(), taref.end(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 3, 3, 4, 6, 9 }), output);
+  output.clear();
+
+  std::copy(taref.rbegin(), taref.rend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 9, 6, 4, 3, 3 }), output);
+  output.clear();
+
+  ASSERT_EQ(input.size(), taref.size());
+  ASSERT_EQ(input.empty(), taref.empty());
+  ASSERT_EQ(input.front().value - 1, taref.front());
+  ASSERT_EQ(input.back().value - 1, taref.back());
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value - 1, taref[i]);
+  }
+}
+
+TEST(TransformArrayRef, ConstAndNonConstRef) {
+  struct Ref {
+    int& operator()(ValueHolder& h) const { return h.value; }
+    const int& operator()(const ValueHolder& h) const { return h.value; }
+  };
+  Ref ref;
+  std::vector<ValueHolder> input({ 1, 0, 1, 0, 3, 1 });
+  std::vector<int> output;
+
+  auto taref = MakeTransformArrayRef(input, ref);
+  static_assert(std::is_same<int, decltype(taref)::value_type>::value, "value_type");
+  static_assert(std::is_same<int*, decltype(taref)::pointer>::value, "pointer");
+  static_assert(std::is_same<int&, decltype(taref)::reference>::value, "reference");
+  static_assert(std::is_same<const int*, decltype(taref)::const_pointer>::value, "const_pointer");
+  static_assert(std::is_same<const int&, decltype(taref)::const_reference>::value,
+                "const_reference");
+
+  std::copy(taref.begin(), taref.end(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 0, 1, 0, 3, 1 }), output);
+  output.clear();
+
+  std::copy(taref.cbegin(), taref.cend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 0, 1, 0, 3, 1 }), output);
+  output.clear();
+
+  std::copy(taref.rbegin(), taref.rend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 3, 0, 1, 0, 1 }), output);
+  output.clear();
+
+  std::copy(taref.crbegin(), taref.crend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 3, 0, 1, 0, 1 }), output);
+  output.clear();
+
+  ASSERT_EQ(input.size(), taref.size());
+  ASSERT_EQ(input.empty(), taref.empty());
+  ASSERT_EQ(input.front().value, taref.front());
+  ASSERT_EQ(input.back().value, taref.back());
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value, taref[i]);
+  }
+
+  // Test writing through the transform iterator.
+  std::vector<int> transform_input({ 24, 37, 11, 71 });
+  std::vector<ValueHolder> transformed(transform_input.size(), 0);
+  taref = MakeTransformArrayRef(transformed, ref);
+  for (size_t i = 0; i != transform_input.size(); ++i) {
+    taref[i] = transform_input[i];
+  }
+  ASSERT_EQ(std::vector<ValueHolder>({ 24, 37, 11, 71 }), transformed);
+
+  const std::vector<ValueHolder>& cinput = input;
+
+  auto ctaref = MakeTransformArrayRef(cinput, ref);
+  static_assert(std::is_same<int, decltype(ctaref)::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, decltype(ctaref)::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, decltype(ctaref)::reference>::value, "reference");
+  static_assert(std::is_same<const int*, decltype(ctaref)::const_pointer>::value, "const_pointer");
+  static_assert(std::is_same<const int&, decltype(ctaref)::const_reference>::value,
+                "const_reference");
+
+  std::copy(ctaref.begin(), ctaref.end(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 0, 1, 0, 3, 1 }), output);
+  output.clear();
+
+  std::copy(ctaref.cbegin(), ctaref.cend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 0, 1, 0, 3, 1 }), output);
+  output.clear();
+
+  std::copy(ctaref.rbegin(), ctaref.rend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 3, 0, 1, 0, 1 }), output);
+  output.clear();
+
+  std::copy(ctaref.crbegin(), ctaref.crend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 3, 0, 1, 0, 1 }), output);
+  output.clear();
+
+  ASSERT_EQ(cinput.size(), ctaref.size());
+  ASSERT_EQ(cinput.empty(), ctaref.empty());
+  ASSERT_EQ(cinput.front().value, ctaref.front());
+  ASSERT_EQ(cinput.back().value, ctaref.back());
+
+  for (size_t i = 0; i != cinput.size(); ++i) {
+    ASSERT_EQ(cinput[i].value, ctaref[i]);
+  }
+
+  // Test conversion adding const.
+  decltype(ctaref) ctaref2 = taref;
+  ASSERT_EQ(taref.size(), ctaref2.size());
+  for (size_t i = 0; i != taref.size(); ++i) {
+    ASSERT_EQ(taref[i], ctaref2[i]);
+  }
+}
+
+}  // namespace art
diff --git a/compiler/utils/transform_iterator.h b/compiler/utils/transform_iterator.h
new file mode 100644
index 0000000..3bc9046
--- /dev/null
+++ b/compiler/utils/transform_iterator.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_TRANSFORM_ITERATOR_H_
+#define ART_COMPILER_UTILS_TRANSFORM_ITERATOR_H_
+
+#include <iterator>
+#include <type_traits>
+
+#include "base/iteration_range.h"
+
+namespace art {
+
+// The transform iterator transforms values from the base iterator with a given
+// transformation function. It can serve as a replacement for std::transform(), i.e.
+//    std::copy(MakeTransformIterator(begin, f), MakeTransformIterator(end, f), out)
+// is equivalent to
+//    std::transform(begin, end, f)
+// If the function returns an l-value reference or a wrapper that supports assignment,
+// the TransformIterator can be used also as an output iterator, i.e.
+//    std::copy(begin, end, MakeTransformIterator(out, f))
+// is equivalent to
+//    for (auto it = begin; it != end; ++it) {
+//      f(*out++) = *it;
+//    }
+template <typename BaseIterator, typename Function>
+class TransformIterator {
+ private:
+  static_assert(std::is_base_of<
+                    std::input_iterator_tag,
+                    typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+                "Transform iterator base must be an input iterator.");
+
+  using InputType = typename std::iterator_traits<BaseIterator>::reference;
+  using ResultType = typename std::result_of<Function(InputType)>::type;
+
+ public:
+  using iterator_category = typename std::iterator_traits<BaseIterator>::iterator_category;
+  using value_type =
+      typename std::remove_const<typename std::remove_reference<ResultType>::type>::type;
+  using difference_type = typename std::iterator_traits<BaseIterator>::difference_type;
+  using pointer = typename std::conditional<
+      std::is_reference<ResultType>::value,
+      typename std::add_pointer<typename std::remove_reference<ResultType>::type>::type,
+      TransformIterator>::type;
+  using reference = ResultType;
+
+  TransformIterator(BaseIterator base, Function fn)
+      : data_(base, fn) { }
+
+  template <typename OtherBI>
+  TransformIterator(const TransformIterator<OtherBI, Function>& other)
+      : data_(other.base(), other.GetFunction()) {
+  }
+
+  TransformIterator& operator++() {
+    ++data_.base_;
+    return *this;
+  }
+
+  TransformIterator& operator++(int) {
+    TransformIterator tmp(*this);
+    ++*this;
+    return tmp;
+  }
+
+  TransformIterator& operator--() {
+    static_assert(
+        std::is_base_of<std::bidirectional_iterator_tag,
+                        typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+        "BaseIterator must be bidirectional iterator to use operator--()");
+    --data_.base_;
+    return *this;
+  }
+
+  TransformIterator& operator--(int) {
+    TransformIterator tmp(*this);
+    --*this;
+    return tmp;
+  }
+
+  reference operator*() const {
+    return GetFunction()(*base());
+  }
+
+  reference operator[](difference_type n) const {
+    static_assert(
+        std::is_base_of<std::random_access_iterator_tag,
+                        typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+        "BaseIterator must be random access iterator to use operator[]");
+    return GetFunction()(base()[n]);
+  }
+
+  TransformIterator operator+(difference_type n) const {
+    static_assert(
+        std::is_base_of<std::random_access_iterator_tag,
+                        typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+        "BaseIterator must be random access iterator to use operator+");
+    return TransformIterator(base() + n, GetFunction());
+  }
+
+  TransformIterator operator-(difference_type n) const {
+    static_assert(
+        std::is_base_of<std::random_access_iterator_tag,
+                        typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+        "BaseIterator must be random access iterator to use operator-");
+    return TransformIterator(base() - n, GetFunction());
+  }
+
+  difference_type operator-(const TransformIterator& other) const {
+    static_assert(
+        std::is_base_of<std::random_access_iterator_tag,
+                        typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+        "BaseIterator must be random access iterator to use operator-");
+    return base() - other.base();
+  }
+
+  // Retrieve the base iterator.
+  BaseIterator base() const {
+    return data_.base_;
+  }
+
+  // Retrieve the transformation function.
+  const Function& GetFunction() const {
+    return static_cast<const Function&>(data_);
+  }
+
+ private:
+  // Allow EBO for state-less Function.
+  struct Data : Function {
+   public:
+    Data(BaseIterator base, Function fn) : Function(fn), base_(base) { }
+
+    BaseIterator base_;
+  };
+
+  Data data_;
+};
+
+template <typename BaseIterator1, typename BaseIterator2, typename Function>
+bool operator==(const TransformIterator<BaseIterator1, Function>& lhs,
+                const TransformIterator<BaseIterator2, Function>& rhs) {
+  return lhs.base() == rhs.base();
+}
+
+template <typename BaseIterator1, typename BaseIterator2, typename Function>
+bool operator!=(const TransformIterator<BaseIterator1, Function>& lhs,
+                const TransformIterator<BaseIterator2, Function>& rhs) {
+  return !(lhs == rhs);
+}
+
+template <typename BaseIterator, typename Function>
+TransformIterator<BaseIterator, Function> MakeTransformIterator(BaseIterator base, Function f) {
+  return TransformIterator<BaseIterator, Function>(base, f);
+}
+
+template <typename BaseRange, typename Function>
+auto MakeTransformRange(BaseRange& range, Function f) {
+  return MakeIterationRange(MakeTransformIterator(range.begin(), f),
+                            MakeTransformIterator(range.end(), f));
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_TRANSFORM_ITERATOR_H_
diff --git a/compiler/utils/transform_iterator_test.cc b/compiler/utils/transform_iterator_test.cc
new file mode 100644
index 0000000..57ff0a6
--- /dev/null
+++ b/compiler/utils/transform_iterator_test.cc
@@ -0,0 +1,531 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <forward_list>
+#include <list>
+#include <type_traits>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "utils/transform_iterator.h"
+
+namespace art {
+
+namespace {  // anonymous namespace
+
+struct ValueHolder {
+  // Deliberately not explicit.
+  ValueHolder(int v) : value(v) { }  // NOLINT
+  int value;
+};
+
+bool operator==(const ValueHolder& lhs, const ValueHolder& rhs) {
+  return lhs.value == rhs.value;
+}
+
+}  // anonymous namespace
+
+TEST(TransformIterator, VectorAdd1) {
+  auto add1 = [](const ValueHolder& h) { return h.value + 1; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> input({ 1, 7, 3, 8 });
+  std::vector<int> output;
+
+  using vector_titer = decltype(MakeTransformIterator(input.begin(), add1));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<vector_titer, vector_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<int, vector_titer::reference>::value, "reference");
+
+  using vector_ctiter = decltype(MakeTransformIterator(input.cbegin(), add1));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_ctiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_ctiter::value_type>::value, "value_type");
+  static_assert(std::is_same<vector_ctiter, vector_ctiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, vector_ctiter::reference>::value, "reference");
+
+  using vector_rtiter = decltype(MakeTransformIterator(input.rbegin(), add1));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_rtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_rtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<vector_rtiter, vector_rtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, vector_rtiter::reference>::value, "reference");
+
+  using vector_crtiter = decltype(MakeTransformIterator(input.crbegin(), add1));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_crtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_crtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<vector_crtiter, vector_crtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, vector_crtiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), add1),
+            MakeTransformIterator(input.end(), add1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 2, 8, 4, 9 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.cbegin(), add1),
+            MakeTransformIterator(input.cend(), add1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 2, 8, 4, 9 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.rbegin(), add1),
+            MakeTransformIterator(input.rend(), add1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 9, 4, 8, 2 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.crbegin(), add1),
+            MakeTransformIterator(input.crend(), add1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 9, 4, 8, 2 }), output);
+  output.clear();
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.begin(), add1)[i]);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.cbegin(), add1)[i]);
+    ptrdiff_t index_from_rbegin = static_cast<ptrdiff_t>(input.size() - i - 1u);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.rbegin(), add1)[index_from_rbegin]);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.crbegin(), add1)[index_from_rbegin]);
+    ptrdiff_t index_from_end = -static_cast<ptrdiff_t>(input.size() - i);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.end(), add1)[index_from_end]);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.cend(), add1)[index_from_end]);
+    ptrdiff_t index_from_rend = -1 - static_cast<ptrdiff_t>(i);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.rend(), add1)[index_from_rend]);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.crend(), add1)[index_from_rend]);
+
+    ASSERT_EQ(MakeTransformIterator(input.begin(), add1) + i,
+              MakeTransformIterator(input.begin() + i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.cbegin(), add1) + i,
+              MakeTransformIterator(input.cbegin() + i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.rbegin(), add1) + i,
+              MakeTransformIterator(input.rbegin() + i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.crbegin(), add1) + i,
+              MakeTransformIterator(input.crbegin() + i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.end(), add1) - i,
+              MakeTransformIterator(input.end() - i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.cend(), add1) - i,
+              MakeTransformIterator(input.cend() - i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.rend(), add1) - i,
+              MakeTransformIterator(input.rend() - i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.crend(), add1) - i,
+              MakeTransformIterator(input.crend() - i, add1));
+  }
+  ASSERT_EQ(input.end(),
+            (MakeTransformIterator(input.begin(), add1) + input.size()).base());
+  ASSERT_EQ(MakeTransformIterator(input.end(), add1) - MakeTransformIterator(input.begin(), add1),
+            static_cast<ptrdiff_t>(input.size()));
+
+  // Test iterator->const_iterator conversion and comparison.
+  auto it = MakeTransformIterator(input.begin(), add1);
+  decltype(MakeTransformIterator(input.cbegin(), add1)) cit = it;
+  static_assert(!std::is_same<decltype(it), decltype(cit)>::value, "Types must be different");
+  ASSERT_EQ(it, cit);
+  auto rit = MakeTransformIterator(input.rbegin(), add1);
+  decltype(MakeTransformIterator(input.crbegin(), add1)) crit(rit);
+  static_assert(!std::is_same<decltype(rit), decltype(crit)>::value, "Types must be different");
+  ASSERT_EQ(rit, crit);
+}
+
+TEST(TransformIterator, ListSub1) {
+  auto sub1 = [](const ValueHolder& h) { return h.value - 1; };  // NOLINT [readability/braces]
+  std::list<ValueHolder> input({ 2, 3, 5, 7, 11 });
+  std::vector<int> output;
+
+  using list_titer = decltype(MakeTransformIterator(input.begin(), sub1));
+  static_assert(std::is_same<std::bidirectional_iterator_tag,
+                             list_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, list_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<list_titer, list_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<int, list_titer::reference>::value, "reference");
+
+  using list_ctiter = decltype(MakeTransformIterator(input.cbegin(), sub1));
+  static_assert(std::is_same<std::bidirectional_iterator_tag,
+                             list_ctiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, list_ctiter::value_type>::value, "value_type");
+  static_assert(std::is_same<list_ctiter, list_ctiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, list_ctiter::reference>::value, "reference");
+
+  using list_rtiter = decltype(MakeTransformIterator(input.rbegin(), sub1));
+  static_assert(std::is_same<std::bidirectional_iterator_tag,
+                             list_rtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, list_rtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<list_rtiter, list_rtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, list_rtiter::reference>::value, "reference");
+
+  using list_crtiter = decltype(MakeTransformIterator(input.crbegin(), sub1));
+  static_assert(std::is_same<std::bidirectional_iterator_tag,
+                             list_crtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, list_crtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<list_crtiter, list_crtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, list_crtiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), sub1),
+            MakeTransformIterator(input.end(), sub1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 2, 4, 6, 10 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.cbegin(), sub1),
+            MakeTransformIterator(input.cend(), sub1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 2, 4, 6, 10 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.rbegin(), sub1),
+            MakeTransformIterator(input.rend(), sub1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 10, 6, 4, 2, 1 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.crbegin(), sub1),
+            MakeTransformIterator(input.crend(), sub1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 10, 6, 4, 2, 1  }), output);
+  output.clear();
+
+  // Test iterator->const_iterator conversion and comparison.
+  auto it = MakeTransformIterator(input.begin(), sub1);
+  decltype(MakeTransformIterator(input.cbegin(), sub1)) cit = it;
+  static_assert(!std::is_same<decltype(it), decltype(cit)>::value, "Types must be different");
+  ASSERT_EQ(it, cit);
+}
+
+TEST(TransformIterator, ForwardListSub1) {
+  auto mul3 = [](const ValueHolder& h) { return h.value * 3; };  // NOLINT [readability/braces]
+  std::forward_list<ValueHolder> input({ 1, 1, 2, 3, 5, 8 });
+  std::vector<int> output;
+
+  using flist_titer = decltype(MakeTransformIterator(input.begin(), mul3));
+  static_assert(std::is_same<std::forward_iterator_tag,
+                             flist_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, flist_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<flist_titer, flist_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<int, flist_titer::reference>::value, "reference");
+
+  using flist_ctiter = decltype(MakeTransformIterator(input.cbegin(), mul3));
+  static_assert(std::is_same<std::forward_iterator_tag,
+                             flist_ctiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, flist_ctiter::value_type>::value, "value_type");
+  static_assert(std::is_same<flist_ctiter, flist_ctiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, flist_ctiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), mul3),
+            MakeTransformIterator(input.end(), mul3),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 3, 3, 6, 9, 15, 24 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.cbegin(), mul3),
+            MakeTransformIterator(input.cend(), mul3),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 3, 3, 6, 9, 15, 24 }), output);
+  output.clear();
+
+  // Test iterator->const_iterator conversion and comparison.
+  auto it = MakeTransformIterator(input.begin(), mul3);
+  decltype(MakeTransformIterator(input.cbegin(), mul3)) cit = it;
+  static_assert(!std::is_same<decltype(it), decltype(cit)>::value, "Types must be different");
+  ASSERT_EQ(it, cit);
+}
+
+TEST(TransformIterator, VectorConstReference) {
+  auto ref = [](const ValueHolder& h) -> const int& { return h.value; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> input({ 7, 3, 1, 2, 4, 8 });
+  std::vector<int> output;
+
+  using vector_titer = decltype(MakeTransformIterator(input.begin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_titer::reference>::value, "reference");
+
+  using vector_ctiter = decltype(MakeTransformIterator(input.cbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_ctiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_ctiter::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_ctiter::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_ctiter::reference>::value, "reference");
+
+  using vector_rtiter = decltype(MakeTransformIterator(input.rbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_rtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_rtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_rtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_rtiter::reference>::value, "reference");
+
+  using vector_crtiter = decltype(MakeTransformIterator(input.crbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_crtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_crtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_crtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_crtiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), ref),
+            MakeTransformIterator(input.end(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 7, 3, 1, 2, 4, 8 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.cbegin(), ref),
+            MakeTransformIterator(input.cend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 7, 3, 1, 2, 4, 8 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.rbegin(), ref),
+            MakeTransformIterator(input.rend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 4, 2, 1, 3, 7 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.crbegin(), ref),
+            MakeTransformIterator(input.crend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 4, 2, 1, 3, 7 }), output);
+  output.clear();
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.begin(), ref)[i]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.cbegin(), ref)[i]);
+    ptrdiff_t index_from_rbegin = static_cast<ptrdiff_t>(input.size() - i - 1u);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rbegin(), ref)[index_from_rbegin]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.crbegin(), ref)[index_from_rbegin]);
+    ptrdiff_t index_from_end = -static_cast<ptrdiff_t>(input.size() - i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.end(), ref)[index_from_end]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.cend(), ref)[index_from_end]);
+    ptrdiff_t index_from_rend = -1 - static_cast<ptrdiff_t>(i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rend(), ref)[index_from_rend]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.crend(), ref)[index_from_rend]);
+
+    ASSERT_EQ(MakeTransformIterator(input.begin(), ref) + i,
+              MakeTransformIterator(input.begin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.cbegin(), ref) + i,
+              MakeTransformIterator(input.cbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rbegin(), ref) + i,
+              MakeTransformIterator(input.rbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.crbegin(), ref) + i,
+              MakeTransformIterator(input.crbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.end(), ref) - i,
+              MakeTransformIterator(input.end() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.cend(), ref) - i,
+              MakeTransformIterator(input.cend() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rend(), ref) - i,
+              MakeTransformIterator(input.rend() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.crend(), ref) - i,
+              MakeTransformIterator(input.crend() - i, ref));
+  }
+  ASSERT_EQ(input.end(),
+            (MakeTransformIterator(input.begin(), ref) + input.size()).base());
+  ASSERT_EQ(MakeTransformIterator(input.end(), ref) - MakeTransformIterator(input.begin(), ref),
+            static_cast<ptrdiff_t>(input.size()));
+}
+
+TEST(TransformIterator, VectorNonConstReference) {
+  auto ref = [](ValueHolder& h) -> int& { return h.value; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> input({ 7, 3, 1, 2, 4, 8 });
+  std::vector<int> output;
+
+  using vector_titer = decltype(MakeTransformIterator(input.begin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<int*, vector_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<int&, vector_titer::reference>::value, "reference");
+
+  using vector_rtiter = decltype(MakeTransformIterator(input.rbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_rtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_rtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<int*, vector_rtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int&, vector_rtiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), ref),
+            MakeTransformIterator(input.end(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 7, 3, 1, 2, 4, 8 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.rbegin(), ref),
+            MakeTransformIterator(input.rend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 4, 2, 1, 3, 7 }), output);
+  output.clear();
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.begin(), ref)[i]);
+    ptrdiff_t index_from_rbegin = static_cast<ptrdiff_t>(input.size() - i - 1u);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rbegin(), ref)[index_from_rbegin]);
+    ptrdiff_t index_from_end = -static_cast<ptrdiff_t>(input.size() - i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.end(), ref)[index_from_end]);
+    ptrdiff_t index_from_rend = -1 - static_cast<ptrdiff_t>(i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rend(), ref)[index_from_rend]);
+
+    ASSERT_EQ(MakeTransformIterator(input.begin(), ref) + i,
+              MakeTransformIterator(input.begin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rbegin(), ref) + i,
+              MakeTransformIterator(input.rbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.end(), ref) - i,
+              MakeTransformIterator(input.end() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rend(), ref) - i,
+              MakeTransformIterator(input.rend() - i, ref));
+  }
+  ASSERT_EQ(input.end(),
+            (MakeTransformIterator(input.begin(), ref) + input.size()).base());
+  ASSERT_EQ(MakeTransformIterator(input.end(), ref) - MakeTransformIterator(input.begin(), ref),
+            static_cast<ptrdiff_t>(input.size()));
+
+  // Test writing through the transform iterator.
+  std::list<int> transform_input({ 1, -1, 2, -2, 3, -3 });
+  std::vector<ValueHolder> transformed(transform_input.size(), 0);
+  std::transform(transform_input.begin(),
+                 transform_input.end(),
+                 MakeTransformIterator(transformed.begin(), ref),
+                 [](int v) { return -2 * v; });
+  ASSERT_EQ(std::vector<ValueHolder>({ -2, 2, -4, 4, -6, 6 }), transformed);
+}
+
+TEST(TransformIterator, VectorConstAndNonConstReference) {
+  struct Ref {
+    int& operator()(ValueHolder& h) const { return h.value; }
+    const int& operator()(const ValueHolder& h) const { return h.value; }
+  };
+  Ref ref;
+  std::vector<ValueHolder> input({ 7, 3, 1, 2, 4, 8 });
+  std::vector<int> output;
+
+  using vector_titer = decltype(MakeTransformIterator(input.begin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<int*, vector_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<int&, vector_titer::reference>::value, "reference");
+
+  using vector_ctiter = decltype(MakeTransformIterator(input.cbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_ctiter::iterator_category>::value, "category");
+  // static_assert(std::is_same<int, vector_ctiter::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_ctiter::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_ctiter::reference>::value, "reference");
+
+  using vector_rtiter = decltype(MakeTransformIterator(input.rbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_rtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_rtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<int*, vector_rtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int&, vector_rtiter::reference>::value, "reference");
+
+  using vector_crtiter = decltype(MakeTransformIterator(input.crbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_crtiter::iterator_category>::value, "category");
+  // static_assert(std::is_same<int, vector_crtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_crtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_crtiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), ref),
+            MakeTransformIterator(input.end(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 7, 3, 1, 2, 4, 8 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.cbegin(), ref),
+            MakeTransformIterator(input.cend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 7, 3, 1, 2, 4, 8 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.rbegin(), ref),
+            MakeTransformIterator(input.rend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 4, 2, 1, 3, 7 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.crbegin(), ref),
+            MakeTransformIterator(input.crend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 4, 2, 1, 3, 7 }), output);
+  output.clear();
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.begin(), ref)[i]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.cbegin(), ref)[i]);
+    ptrdiff_t index_from_rbegin = static_cast<ptrdiff_t>(input.size() - i - 1u);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rbegin(), ref)[index_from_rbegin]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.crbegin(), ref)[index_from_rbegin]);
+    ptrdiff_t index_from_end = -static_cast<ptrdiff_t>(input.size() - i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.end(), ref)[index_from_end]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.cend(), ref)[index_from_end]);
+    ptrdiff_t index_from_rend = -1 - static_cast<ptrdiff_t>(i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rend(), ref)[index_from_rend]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.crend(), ref)[index_from_rend]);
+
+    ASSERT_EQ(MakeTransformIterator(input.begin(), ref) + i,
+              MakeTransformIterator(input.begin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.cbegin(), ref) + i,
+              MakeTransformIterator(input.cbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rbegin(), ref) + i,
+              MakeTransformIterator(input.rbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.crbegin(), ref) + i,
+              MakeTransformIterator(input.crbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.end(), ref) - i,
+              MakeTransformIterator(input.end() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.cend(), ref) - i,
+              MakeTransformIterator(input.cend() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rend(), ref) - i,
+              MakeTransformIterator(input.rend() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.crend(), ref) - i,
+              MakeTransformIterator(input.crend() - i, ref));
+  }
+  ASSERT_EQ(input.end(),
+            (MakeTransformIterator(input.begin(), ref) + input.size()).base());
+  ASSERT_EQ(MakeTransformIterator(input.end(), ref) - MakeTransformIterator(input.begin(), ref),
+            static_cast<ptrdiff_t>(input.size()));
+
+  // Test iterator->const_iterator conversion and comparison.
+  auto it = MakeTransformIterator(input.begin(), ref);
+  decltype(MakeTransformIterator(input.cbegin(), ref)) cit = it;
+  static_assert(!std::is_same<decltype(it), decltype(cit)>::value, "Types must be different");
+  ASSERT_EQ(it, cit);
+  auto rit = MakeTransformIterator(input.rbegin(), ref);
+  decltype(MakeTransformIterator(input.crbegin(), ref)) crit(rit);
+  static_assert(!std::is_same<decltype(rit), decltype(crit)>::value, "Types must be different");
+  ASSERT_EQ(rit, crit);
+
+  // Test writing through the transform iterator.
+  std::list<int> transform_input({ 42, 73, 11, 17 });
+  std::vector<ValueHolder> transformed(transform_input.size(), 0);
+  std::transform(transform_input.begin(),
+                 transform_input.end(),
+                 MakeTransformIterator(transformed.begin(), ref),
+                 [](int v) { return -v; });
+  ASSERT_EQ(std::vector<ValueHolder>({ -42, -73, -11, -17 }), transformed);
+}
+
+TEST(TransformIterator, TransformRange) {
+  auto ref = [](ValueHolder& h) -> int& { return h.value; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> data({ 1, 0, 1, 3, 1, 0 });
+
+  for (int& v : MakeTransformRange(data, ref)) {
+    v += 11;
+  }
+  ASSERT_EQ(std::vector<ValueHolder>({ 12, 11, 12, 14, 12, 11 }), data);
+}
+
+}  // namespace art
diff --git a/compiler/utils/type_reference.h b/compiler/utils/type_reference.h
new file mode 100644
index 0000000..d0c1656
--- /dev/null
+++ b/compiler/utils/type_reference.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_TYPE_REFERENCE_H_
+#define ART_COMPILER_UTILS_TYPE_REFERENCE_H_
+
+#include <stdint.h>
+
+#include "base/logging.h"
+#include "string_reference.h"
+
+namespace art {
+
+class DexFile;
+
+// A type is located by its DexFile and the string_ids_ table index into that DexFile.
+struct TypeReference {
+  TypeReference(const DexFile* file, uint32_t index) : dex_file(file), type_index(index) { }
+
+  const DexFile* dex_file;
+  uint32_t type_index;
+};
+
+// Compare the actual referenced type names. Used for type reference deduplication.
+struct TypeReferenceValueComparator {
+  bool operator()(TypeReference tr1, TypeReference tr2) const {
+    // Note that we want to deduplicate identical boot image types even if they are
+    // referenced by different dex files, so we simply compare the descriptors.
+    StringReference sr1(tr1.dex_file, tr1.dex_file->GetTypeId(tr1.type_index).descriptor_idx_);
+    StringReference sr2(tr2.dex_file, tr2.dex_file->GetTypeId(tr2.type_index).descriptor_idx_);
+    return StringReferenceValueComparator()(sr1, sr2);
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_TYPE_REFERENCE_H_
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 5347bf0..f1a9915 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -186,6 +186,22 @@
   EmitOperand(dst, src);
 }
 
+void X86Assembler::popcntl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitRegisterOperand(dst, src);
+}
+
+void X86Assembler::popcntl(Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitOperand(dst, src);
+}
+
 void X86Assembler::movzxb(Register dst, ByteRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -310,6 +326,14 @@
 }
 
 
+void X86Assembler::cmovl(Condition condition, Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x40 + condition);
+  EmitOperand(dst, src);
+}
+
+
 void X86Assembler::setb(Condition condition, Register dst) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -703,6 +727,14 @@
 }
 
 
+void X86Assembler::comiss(XmmRegister a, const Address& b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x2F);
+  EmitOperand(a, b);
+}
+
+
 void X86Assembler::comisd(XmmRegister a, XmmRegister b) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -712,6 +744,15 @@
 }
 
 
+void X86Assembler::comisd(XmmRegister a, const Address& b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x2F);
+  EmitOperand(a, b);
+}
+
+
 void X86Assembler::ucomiss(XmmRegister a, XmmRegister b) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -720,6 +761,14 @@
 }
 
 
+void X86Assembler::ucomiss(XmmRegister a, const Address& b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x2E);
+  EmitOperand(a, b);
+}
+
+
 void X86Assembler::ucomisd(XmmRegister a, XmmRegister b) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -729,6 +778,15 @@
 }
 
 
+void X86Assembler::ucomisd(XmmRegister a, const Address& b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x2E);
+  EmitOperand(a, b);
+}
+
+
 void X86Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -989,6 +1047,14 @@
 }
 
 
+void X86Assembler::cmpb(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x80);
+  EmitOperand(7, address);
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
 void X86Assembler::cmpw(const Address& address, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -1877,491 +1943,8 @@
   EmitOperand(reg_or_opcode, operand);
 }
 
-static dwarf::Reg DWARFReg(Register reg) {
-  return dwarf::Reg::X86Core(static_cast<int>(reg));
-}
-
-constexpr size_t kFramePointerSize = 4;
-
-void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                              const std::vector<ManagedRegister>& spill_regs,
-                              const ManagedRegisterEntrySpills& entry_spills) {
-  DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
-  cfi_.SetCurrentCFAOffset(4);  // Return address on stack.
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  int gpr_count = 0;
-  for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    Register spill = spill_regs.at(i).AsX86().AsCpuRegister();
-    pushl(spill);
-    gpr_count++;
-    cfi_.AdjustCFAOffset(kFramePointerSize);
-    cfi_.RelOffset(DWARFReg(spill), 0);
-  }
-
-  // return address then method on stack.
-  int32_t adjust = frame_size - gpr_count * kFramePointerSize -
-      kFramePointerSize /*method*/ -
-      kFramePointerSize /*return address*/;
-  addl(ESP, Immediate(-adjust));
-  cfi_.AdjustCFAOffset(adjust);
-  pushl(method_reg.AsX86().AsCpuRegister());
-  cfi_.AdjustCFAOffset(kFramePointerSize);
-  DCHECK_EQ(static_cast<size_t>(cfi_.GetCurrentCFAOffset()), frame_size);
-
-  for (size_t i = 0; i < entry_spills.size(); ++i) {
-    ManagedRegisterSpill spill = entry_spills.at(i);
-    if (spill.AsX86().IsCpuRegister()) {
-      int offset = frame_size + spill.getSpillOffset();
-      movl(Address(ESP, offset), spill.AsX86().AsCpuRegister());
-    } else {
-      DCHECK(spill.AsX86().IsXmmRegister());
-      if (spill.getSize() == 8) {
-        movsd(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        movss(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
-      }
-    }
-  }
-}
-
-void X86Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& spill_regs) {
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  cfi_.RememberState();
-  // -kFramePointerSize for ArtMethod*.
-  int adjust = frame_size - spill_regs.size() * kFramePointerSize - kFramePointerSize;
-  addl(ESP, Immediate(adjust));
-  cfi_.AdjustCFAOffset(-adjust);
-  for (size_t i = 0; i < spill_regs.size(); ++i) {
-    Register spill = spill_regs.at(i).AsX86().AsCpuRegister();
-    popl(spill);
-    cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
-    cfi_.Restore(DWARFReg(spill));
-  }
-  ret();
-  // The CFI should be restored for any code that follows the exit block.
-  cfi_.RestoreState();
-  cfi_.DefCFAOffset(frame_size);
-}
-
-void X86Assembler::IncreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  addl(ESP, Immediate(-adjust));
-  cfi_.AdjustCFAOffset(adjust);
-}
-
-void X86Assembler::DecreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  addl(ESP, Immediate(adjust));
-  cfi_.AdjustCFAOffset(-adjust);
-}
-
-void X86Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
-  X86ManagedRegister src = msrc.AsX86();
-  if (src.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (src.IsCpuRegister()) {
-    CHECK_EQ(4u, size);
-    movl(Address(ESP, offs), src.AsCpuRegister());
-  } else if (src.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
-    movl(Address(ESP, offs), src.AsRegisterPairLow());
-    movl(Address(ESP, FrameOffset(offs.Int32Value()+4)),
-         src.AsRegisterPairHigh());
-  } else if (src.IsX87Register()) {
-    if (size == 4) {
-      fstps(Address(ESP, offs));
-    } else {
-      fstpl(Address(ESP, offs));
-    }
-  } else {
-    CHECK(src.IsXmmRegister());
-    if (size == 4) {
-      movss(Address(ESP, offs), src.AsXmmRegister());
-    } else {
-      movsd(Address(ESP, offs), src.AsXmmRegister());
-    }
-  }
-}
-
-void X86Assembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
-  X86ManagedRegister src = msrc.AsX86();
-  CHECK(src.IsCpuRegister());
-  movl(Address(ESP, dest), src.AsCpuRegister());
-}
-
-void X86Assembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
-  X86ManagedRegister src = msrc.AsX86();
-  CHECK(src.IsCpuRegister());
-  movl(Address(ESP, dest), src.AsCpuRegister());
-}
-
-void X86Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
-                                         ManagedRegister) {
-  movl(Address(ESP, dest), Immediate(imm));
-}
-
-void X86Assembler::StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm,
-                                          ManagedRegister) {
-  fs()->movl(Address::Absolute(dest), Immediate(imm));
-}
-
-void X86Assembler::StoreStackOffsetToThread32(ThreadOffset<4> thr_offs,
-                                            FrameOffset fr_offs,
-                                            ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  leal(scratch.AsCpuRegister(), Address(ESP, fr_offs));
-  fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
-}
-
-void X86Assembler::StoreStackPointerToThread32(ThreadOffset<4> thr_offs) {
-  fs()->movl(Address::Absolute(thr_offs), ESP);
-}
-
-void X86Assembler::StoreSpanning(FrameOffset /*dst*/, ManagedRegister /*src*/,
-                                 FrameOffset /*in_off*/, ManagedRegister /*scratch*/) {
-  UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
-}
-
-void X86Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
-  X86ManagedRegister dest = mdest.AsX86();
-  if (dest.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (dest.IsCpuRegister()) {
-    CHECK_EQ(4u, size);
-    movl(dest.AsCpuRegister(), Address(ESP, src));
-  } else if (dest.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
-    movl(dest.AsRegisterPairLow(), Address(ESP, src));
-    movl(dest.AsRegisterPairHigh(), Address(ESP, FrameOffset(src.Int32Value()+4)));
-  } else if (dest.IsX87Register()) {
-    if (size == 4) {
-      flds(Address(ESP, src));
-    } else {
-      fldl(Address(ESP, src));
-    }
-  } else {
-    CHECK(dest.IsXmmRegister());
-    if (size == 4) {
-      movss(dest.AsXmmRegister(), Address(ESP, src));
-    } else {
-      movsd(dest.AsXmmRegister(), Address(ESP, src));
-    }
-  }
-}
-
-void X86Assembler::LoadFromThread32(ManagedRegister mdest, ThreadOffset<4> src, size_t size) {
-  X86ManagedRegister dest = mdest.AsX86();
-  if (dest.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (dest.IsCpuRegister()) {
-    CHECK_EQ(4u, size);
-    fs()->movl(dest.AsCpuRegister(), Address::Absolute(src));
-  } else if (dest.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
-    fs()->movl(dest.AsRegisterPairLow(), Address::Absolute(src));
-    fs()->movl(dest.AsRegisterPairHigh(), Address::Absolute(ThreadOffset<4>(src.Int32Value()+4)));
-  } else if (dest.IsX87Register()) {
-    if (size == 4) {
-      fs()->flds(Address::Absolute(src));
-    } else {
-      fs()->fldl(Address::Absolute(src));
-    }
-  } else {
-    CHECK(dest.IsXmmRegister());
-    if (size == 4) {
-      fs()->movss(dest.AsXmmRegister(), Address::Absolute(src));
-    } else {
-      fs()->movsd(dest.AsXmmRegister(), Address::Absolute(src));
-    }
-  }
-}
-
-void X86Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
-  X86ManagedRegister dest = mdest.AsX86();
-  CHECK(dest.IsCpuRegister());
-  movl(dest.AsCpuRegister(), Address(ESP, src));
-}
-
-void X86Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
-                           bool unpoison_reference) {
-  X86ManagedRegister dest = mdest.AsX86();
-  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
-  movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
-  if (unpoison_reference) {
-    MaybeUnpoisonHeapReference(dest.AsCpuRegister());
-  }
-}
-
-void X86Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
-                              Offset offs) {
-  X86ManagedRegister dest = mdest.AsX86();
-  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
-  movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
-}
-
-void X86Assembler::LoadRawPtrFromThread32(ManagedRegister mdest,
-                                        ThreadOffset<4> offs) {
-  X86ManagedRegister dest = mdest.AsX86();
-  CHECK(dest.IsCpuRegister());
-  fs()->movl(dest.AsCpuRegister(), Address::Absolute(offs));
-}
-
-void X86Assembler::SignExtend(ManagedRegister mreg, size_t size) {
-  X86ManagedRegister reg = mreg.AsX86();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsCpuRegister()) << reg;
-  if (size == 1) {
-    movsxb(reg.AsCpuRegister(), reg.AsByteRegister());
-  } else {
-    movsxw(reg.AsCpuRegister(), reg.AsCpuRegister());
-  }
-}
-
-void X86Assembler::ZeroExtend(ManagedRegister mreg, size_t size) {
-  X86ManagedRegister reg = mreg.AsX86();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsCpuRegister()) << reg;
-  if (size == 1) {
-    movzxb(reg.AsCpuRegister(), reg.AsByteRegister());
-  } else {
-    movzxw(reg.AsCpuRegister(), reg.AsCpuRegister());
-  }
-}
-
-void X86Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
-  X86ManagedRegister dest = mdest.AsX86();
-  X86ManagedRegister src = msrc.AsX86();
-  if (!dest.Equals(src)) {
-    if (dest.IsCpuRegister() && src.IsCpuRegister()) {
-      movl(dest.AsCpuRegister(), src.AsCpuRegister());
-    } else if (src.IsX87Register() && dest.IsXmmRegister()) {
-      // Pass via stack and pop X87 register
-      subl(ESP, Immediate(16));
-      if (size == 4) {
-        CHECK_EQ(src.AsX87Register(), ST0);
-        fstps(Address(ESP, 0));
-        movss(dest.AsXmmRegister(), Address(ESP, 0));
-      } else {
-        CHECK_EQ(src.AsX87Register(), ST0);
-        fstpl(Address(ESP, 0));
-        movsd(dest.AsXmmRegister(), Address(ESP, 0));
-      }
-      addl(ESP, Immediate(16));
-    } else {
-      // TODO: x87, SSE
-      UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src;
-    }
-  }
-}
-
-void X86Assembler::CopyRef(FrameOffset dest, FrameOffset src,
-                           ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  movl(scratch.AsCpuRegister(), Address(ESP, src));
-  movl(Address(ESP, dest), scratch.AsCpuRegister());
-}
-
-void X86Assembler::CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                        ThreadOffset<4> thr_offs,
-                                        ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  fs()->movl(scratch.AsCpuRegister(), Address::Absolute(thr_offs));
-  Store(fr_offs, scratch, 4);
-}
-
-void X86Assembler::CopyRawPtrToThread32(ThreadOffset<4> thr_offs,
-                                      FrameOffset fr_offs,
-                                      ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  Load(scratch, fr_offs, 4);
-  fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
-}
-
-void X86Assembler::Copy(FrameOffset dest, FrameOffset src,
-                        ManagedRegister mscratch,
-                        size_t size) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  if (scratch.IsCpuRegister() && size == 8) {
-    Load(scratch, src, 4);
-    Store(dest, scratch, 4);
-    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
-    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
-  } else {
-    Load(scratch, src, size);
-    Store(dest, scratch, size);
-  }
-}
-
-void X86Assembler::Copy(FrameOffset /*dst*/, ManagedRegister /*src_base*/, Offset /*src_offset*/,
-                        ManagedRegister /*scratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void X86Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-                        ManagedRegister scratch, size_t size) {
-  CHECK(scratch.IsNoRegister());
-  CHECK_EQ(size, 4u);
-  pushl(Address(ESP, src));
-  popl(Address(dest_base.AsX86().AsCpuRegister(), dest_offset));
-}
-
-void X86Assembler::Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  Register scratch = mscratch.AsX86().AsCpuRegister();
-  CHECK_EQ(size, 4u);
-  movl(scratch, Address(ESP, src_base));
-  movl(scratch, Address(scratch, src_offset));
-  movl(Address(ESP, dest), scratch);
-}
-
-void X86Assembler::Copy(ManagedRegister dest, Offset dest_offset,
-                        ManagedRegister src, Offset src_offset,
-                        ManagedRegister scratch, size_t size) {
-  CHECK_EQ(size, 4u);
-  CHECK(scratch.IsNoRegister());
-  pushl(Address(src.AsX86().AsCpuRegister(), src_offset));
-  popl(Address(dest.AsX86().AsCpuRegister(), dest_offset));
-}
-
-void X86Assembler::Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  Register scratch = mscratch.AsX86().AsCpuRegister();
-  CHECK_EQ(size, 4u);
-  CHECK_EQ(dest.Int32Value(), src.Int32Value());
-  movl(scratch, Address(ESP, src));
-  pushl(Address(scratch, src_offset));
-  popl(Address(scratch, dest_offset));
-}
-
-void X86Assembler::MemoryBarrier(ManagedRegister) {
-  mfence();
-}
-
-void X86Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister min_reg, bool null_allowed) {
-  X86ManagedRegister out_reg = mout_reg.AsX86();
-  X86ManagedRegister in_reg = min_reg.AsX86();
-  CHECK(in_reg.IsCpuRegister());
-  CHECK(out_reg.IsCpuRegister());
-  VerifyObject(in_reg, null_allowed);
-  if (null_allowed) {
-    Label null_arg;
-    if (!out_reg.Equals(in_reg)) {
-      xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
-    }
-    testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
-    j(kZero, &null_arg);
-    leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset));
-    Bind(&null_arg);
-  } else {
-    leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset));
-  }
-}
-
-void X86Assembler::CreateHandleScopeEntry(FrameOffset out_off,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister mscratch,
-                                   bool null_allowed) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  if (null_allowed) {
-    Label null_arg;
-    movl(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
-    testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
-    j(kZero, &null_arg);
-    leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
-    Bind(&null_arg);
-  } else {
-    leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
-  }
-  Store(out_off, scratch, 4);
-}
-
-// Given a handle scope entry, load the associated reference.
-void X86Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
-                                         ManagedRegister min_reg) {
-  X86ManagedRegister out_reg = mout_reg.AsX86();
-  X86ManagedRegister in_reg = min_reg.AsX86();
-  CHECK(out_reg.IsCpuRegister());
-  CHECK(in_reg.IsCpuRegister());
-  Label null_arg;
-  if (!out_reg.Equals(in_reg)) {
-    xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
-  }
-  testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
-  j(kZero, &null_arg);
-  movl(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0));
-  Bind(&null_arg);
-}
-
-void X86Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
-}
-
-void X86Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
-}
-
-void X86Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
-  X86ManagedRegister base = mbase.AsX86();
-  CHECK(base.IsCpuRegister());
-  call(Address(base.AsCpuRegister(), offset.Int32Value()));
-  // TODO: place reference map on call
-}
-
-void X86Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
-  Register scratch = mscratch.AsX86().AsCpuRegister();
-  movl(scratch, Address(ESP, base));
-  call(Address(scratch, offset));
-}
-
-void X86Assembler::CallFromThread32(ThreadOffset<4> offset, ManagedRegister /*mscratch*/) {
-  fs()->call(Address::Absolute(offset));
-}
-
-void X86Assembler::GetCurrentThread(ManagedRegister tr) {
-  fs()->movl(tr.AsX86().AsCpuRegister(),
-             Address::Absolute(Thread::SelfOffset<4>()));
-}
-
-void X86Assembler::GetCurrentThread(FrameOffset offset,
-                                    ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  fs()->movl(scratch.AsCpuRegister(), Address::Absolute(Thread::SelfOffset<4>()));
-  movl(Address(ESP, offset), scratch.AsCpuRegister());
-}
-
-void X86Assembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
-  X86ExceptionSlowPath* slow = new X86ExceptionSlowPath(stack_adjust);
-  buffer_.EnqueueSlowPath(slow);
-  fs()->cmpl(Address::Absolute(Thread::ExceptionOffset<4>()), Immediate(0));
-  j(kNotEqual, slow->Entry());
-}
-
-void X86ExceptionSlowPath::Emit(Assembler *sasm) {
-  X86Assembler* sp_asm = down_cast<X86Assembler*>(sasm);
-#define __ sp_asm->
-  __ Bind(&entry_);
-  // Note: the return value is dead
-  if (stack_adjust_ != 0) {  // Fix up the frame.
-    __ DecreaseFrameSize(stack_adjust_);
-  }
-  // Pass exception as argument in EAX
-  __ fs()->movl(EAX, Address::Absolute(Thread::ExceptionOffset<4>()));
-  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(4, pDeliverException)));
-  // this call should never return
-  __ int3();
-#undef __
-}
-
 void X86Assembler::AddConstantArea() {
-  const std::vector<int32_t>& area = constant_area_.GetBuffer();
+  ArrayRef<const int32_t> area = constant_area_.GetBuffer();
   // Generate the data for the literal area.
   for (size_t i = 0, e = area.size(); i < e; i++) {
     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index b50fda9..63aa4a4 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -18,12 +18,16 @@
 #define ART_COMPILER_UTILS_X86_ASSEMBLER_X86_H_
 
 #include <vector>
+
+#include "base/arena_containers.h"
 #include "base/bit_utils.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "constants_x86.h"
 #include "globals.h"
 #include "managed_register_x86.h"
 #include "offsets.h"
+#include "utils/array_ref.h"
 #include "utils/assembler.h"
 
 namespace art {
@@ -192,7 +196,7 @@
     return result;
   }
 
-  static Address Absolute(ThreadOffset<4> addr) {
+  static Address Absolute(ThreadOffset32 addr) {
     return Absolute(addr.Int32Value());
   }
 
@@ -260,7 +264,7 @@
  */
 class ConstantArea {
  public:
-  ConstantArea() {}
+  explicit ConstantArea(ArenaAllocator* arena) : buffer_(arena->Adapter(kArenaAllocAssembler)) {}
 
   // Add a double to the constant area, returning the offset into
   // the constant area where the literal resides.
@@ -290,18 +294,18 @@
     return buffer_.size() * elem_size_;
   }
 
-  const std::vector<int32_t>& GetBuffer() const {
-    return buffer_;
+  ArrayRef<const int32_t> GetBuffer() const {
+    return ArrayRef<const int32_t>(buffer_);
   }
 
  private:
   static constexpr size_t elem_size_ = sizeof(int32_t);
-  std::vector<int32_t> buffer_;
+  ArenaVector<int32_t> buffer_;
 };
 
 class X86Assembler FINAL : public Assembler {
  public:
-  X86Assembler() {}
+  explicit X86Assembler(ArenaAllocator* arena) : Assembler(arena), constant_area_(arena) {}
   virtual ~X86Assembler() {}
 
   /*
@@ -330,11 +334,15 @@
   void movntl(const Address& dst, Register src);
 
   void bswapl(Register dst);
+
   void bsfl(Register dst, Register src);
   void bsfl(Register dst, const Address& src);
   void bsrl(Register dst, Register src);
   void bsrl(Register dst, const Address& src);
 
+  void popcntl(Register dst, Register src);
+  void popcntl(Register dst, const Address& src);
+
   void rorl(Register reg, const Immediate& imm);
   void rorl(Register operand, Register shifter);
   void roll(Register reg, const Immediate& imm);
@@ -359,6 +367,7 @@
   void leal(Register dst, const Address& src);
 
   void cmovl(Condition condition, Register dst, Register src);
+  void cmovl(Condition condition, Register dst, const Address& src);
 
   void setb(Condition condition, Register dst);
 
@@ -415,9 +424,13 @@
   void cvtdq2pd(XmmRegister dst, XmmRegister src);
 
   void comiss(XmmRegister a, XmmRegister b);
+  void comiss(XmmRegister a, const Address& b);
   void comisd(XmmRegister a, XmmRegister b);
+  void comisd(XmmRegister a, const Address& b);
   void ucomiss(XmmRegister a, XmmRegister b);
+  void ucomiss(XmmRegister a, const Address& b);
   void ucomisd(XmmRegister a, XmmRegister b);
+  void ucomisd(XmmRegister a, const Address& b);
 
   void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
   void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
@@ -469,6 +482,7 @@
   void xchgl(Register dst, Register src);
   void xchgl(Register reg, const Address& address);
 
+  void cmpb(const Address& address, const Immediate& imm);
   void cmpw(const Address& address, const Immediate& imm);
 
   void cmpl(Register reg, const Immediate& imm);
@@ -618,123 +632,6 @@
   void Bind(NearLabel* label);
 
   //
-  // Overridden common assembler high-level functionality
-  //
-
-  // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
-
-  // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
-      OVERRIDE;
-
-  void IncreaseFrameSize(size_t adjust) OVERRIDE;
-  void DecreaseFrameSize(size_t adjust) OVERRIDE;
-
-  // Store routines
-  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
-  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
-  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
-
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
-
-  void StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm, ManagedRegister scratch)
-      OVERRIDE;
-
-  void StoreStackOffsetToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister scratch) OVERRIDE;
-
-  void StoreStackPointerToThread32(ThreadOffset<4> thr_offs) OVERRIDE;
-
-  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
-                     ManagedRegister scratch) OVERRIDE;
-
-  // Load routines
-  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
-
-  void LoadFromThread32(ManagedRegister dest, ThreadOffset<4> src, size_t size) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-               bool unpoison_reference) OVERRIDE;
-
-  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
-
-  void LoadRawPtrFromThread32(ManagedRegister dest, ThreadOffset<4> offs) OVERRIDE;
-
-  // Copying routines
-  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
-
-  void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset<4> thr_offs,
-                              ManagedRegister scratch) OVERRIDE;
-
-  void CopyRawPtrToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
-      OVERRIDE;
-
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void MemoryBarrier(ManagedRegister) OVERRIDE;
-
-  // Sign extension
-  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Zero extension
-  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Exploit fast access in managed code to Thread::Current()
-  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
-
-  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the handle scope entry to see if the value is
-  // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
-
-  // Set up out_off to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                              ManagedRegister scratch, bool null_allowed) OVERRIDE;
-
-  // src holds a handle scope entry (Object**) load this into dst
-  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
-
-  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
-  // know that src may not be null.
-  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
-  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
-
-  // Call to address held at [base+offset]
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread32(ThreadOffset<4> offset, ManagedRegister scratch) OVERRIDE;
-
-  // Generate code to check if Thread::Current()->exception_ is non-null
-  // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
-
-  //
   // Heap poisoning.
   //
 
@@ -742,6 +639,12 @@
   void PoisonHeapReference(Register reg) { negl(reg); }
   // Unpoison a heap reference contained in `reg`.
   void UnpoisonHeapReference(Register reg) { negl(reg); }
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(Register reg) {
+    if (kPoisonHeapReferences) {
+      PoisonHeapReference(reg);
+    }
+  }
   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
   void MaybeUnpoisonHeapReference(Register reg) {
     if (kPoisonHeapReferences) {
@@ -831,15 +734,6 @@
   EmitUint8(0x66);
 }
 
-// Slowpath entered when Thread::Current()->_exception is non-null
-class X86ExceptionSlowPath FINAL : public SlowPath {
- public:
-  explicit X86ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {}
-  virtual void Emit(Assembler *sp_asm) OVERRIDE;
- private:
-  const size_t stack_adjust_;
-};
-
 }  // namespace x86
 }  // namespace art
 
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 16f9db4..307e034 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -16,13 +16,16 @@
 
 #include "assembler_x86.h"
 
+#include "base/arena_allocator.h"
 #include "base/stl_util.h"
 #include "utils/assembler_test.h"
 
 namespace art {
 
 TEST(AssemblerX86, CreateBuffer) {
-  AssemblerBuffer buffer;
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  AssemblerBuffer buffer(&arena);
   AssemblerBuffer::EnsureCapacity ensured(&buffer);
   buffer.Emit<uint8_t>(0x42);
   ASSERT_EQ(static_cast<size_t>(1), buffer.Size());
@@ -260,6 +263,19 @@
   DriverStr(expected, "bsrl_address");
 }
 
+TEST_F(AssemblerX86Test, Popcntl) {
+  DriverStr(RepeatRR(&x86::X86Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl");
+}
+
+TEST_F(AssemblerX86Test, PopcntlAddress) {
+  GetAssembler()->popcntl(x86::Register(x86::EDI), x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+  const char* expected =
+    "popcntl 0xc(%EDI,%EBX,4), %EDI\n";
+
+  DriverStr(expected, "popcntl_address");
+}
+
 // Rorl only allows CL as the shift count.
 std::string rorl_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) {
   std::ostringstream str;
@@ -306,6 +322,59 @@
   DriverStr(RepeatRI(&x86::X86Assembler::roll, 1U, "roll ${imm}, %{reg}"), "rolli");
 }
 
+TEST_F(AssemblerX86Test, ComissAddr) {
+  GetAssembler()->comiss(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0));
+  const char* expected = "comiss 0(%EAX), %xmm0\n";
+  DriverStr(expected, "comiss");
+}
+
+TEST_F(AssemblerX86Test, UComissAddr) {
+  GetAssembler()->ucomiss(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0));
+  const char* expected = "ucomiss 0(%EAX), %xmm0\n";
+  DriverStr(expected, "ucomiss");
+}
+
+TEST_F(AssemblerX86Test, ComisdAddr) {
+  GetAssembler()->comisd(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0));
+  const char* expected = "comisd 0(%EAX), %xmm0\n";
+  DriverStr(expected, "comisd");
+}
+
+TEST_F(AssemblerX86Test, UComisdAddr) {
+  GetAssembler()->ucomisd(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0));
+  const char* expected = "ucomisd 0(%EAX), %xmm0\n";
+  DriverStr(expected, "ucomisd");
+}
+
+TEST_F(AssemblerX86Test, RoundSS) {
+  GetAssembler()->roundss(
+      x86::XmmRegister(x86::XMM0), x86::XmmRegister(x86::XMM1), x86::Immediate(1));
+  const char* expected = "roundss $1, %xmm1, %xmm0\n";
+  DriverStr(expected, "roundss");
+}
+
+TEST_F(AssemblerX86Test, RoundSD) {
+  GetAssembler()->roundsd(
+      x86::XmmRegister(x86::XMM0), x86::XmmRegister(x86::XMM1), x86::Immediate(1));
+  const char* expected = "roundsd $1, %xmm1, %xmm0\n";
+  DriverStr(expected, "roundsd");
+}
+
+TEST_F(AssemblerX86Test, CmovlAddress) {
+  GetAssembler()->cmovl(x86::kEqual, x86::Register(x86::EAX), x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+  GetAssembler()->cmovl(x86::kNotEqual, x86::Register(x86::EDI), x86::Address(
+      x86::Register(x86::ESI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+  GetAssembler()->cmovl(x86::kEqual, x86::Register(x86::EDI), x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::EAX), x86::TIMES_4, 12));
+  const char* expected =
+    "cmovzl 0xc(%EDI,%EBX,4), %eax\n"
+    "cmovnzl 0xc(%ESI,%EBX,4), %edi\n"
+    "cmovzl 0xc(%EDI,%EAX,4), %edi\n";
+
+  DriverStr(expected, "cmovl_address");
+}
+
 /////////////////
 // Near labels //
 /////////////////
@@ -345,4 +414,10 @@
   DriverStr(expected, "near_label");
 }
 
+TEST_F(AssemblerX86Test, Cmpb) {
+  GetAssembler()->cmpb(x86::Address(x86::EDI, 128), x86::Immediate(0));
+  const char* expected = "cmpb $0, 128(%EDI)\n";
+  DriverStr(expected, "cmpb");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86/constants_x86.h b/compiler/utils/x86/constants_x86.h
index 2dfb65c..0bc1560 100644
--- a/compiler/utils/x86/constants_x86.h
+++ b/compiler/utils/x86/constants_x86.h
@@ -97,6 +97,8 @@
   kNotZero      = kNotEqual,
   kNegative     = kSign,
   kPositive     = kNotSign,
+  kCarrySet     = kBelow,
+  kCarryClear   = kAboveEqual,
   kUnordered    = kParityEven
 };
 
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
new file mode 100644
index 0000000..77af885
--- /dev/null
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -0,0 +1,541 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler_x86.h"
+
+#include "utils/assembler.h"
+#include "base/casts.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "thread.h"
+
+namespace art {
+namespace x86 {
+
+// Slowpath entered when Thread::Current()->_exception is non-null
+class X86ExceptionSlowPath FINAL : public SlowPath {
+ public:
+  explicit X86ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {}
+  virtual void Emit(Assembler *sp_asm) OVERRIDE;
+ private:
+  const size_t stack_adjust_;
+};
+
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::X86Core(static_cast<int>(reg));
+}
+
+constexpr size_t kFramePointerSize = 4;
+
+#define __ asm_.
+
+void X86JNIMacroAssembler::BuildFrame(size_t frame_size,
+                                      ManagedRegister method_reg,
+                                      ArrayRef<const ManagedRegister> spill_regs,
+                                      const ManagedRegisterEntrySpills& entry_spills) {
+  DCHECK_EQ(CodeSize(), 0U);  // Nothing emitted yet.
+  cfi().SetCurrentCFAOffset(4);  // Return address on stack.
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  int gpr_count = 0;
+  for (int i = spill_regs.size() - 1; i >= 0; --i) {
+    Register spill = spill_regs[i].AsX86().AsCpuRegister();
+    __ pushl(spill);
+    gpr_count++;
+    cfi().AdjustCFAOffset(kFramePointerSize);
+    cfi().RelOffset(DWARFReg(spill), 0);
+  }
+
+  // return address then method on stack.
+  int32_t adjust = frame_size - gpr_count * kFramePointerSize -
+      kFramePointerSize /*method*/ -
+      kFramePointerSize /*return address*/;
+  __ addl(ESP, Immediate(-adjust));
+  cfi().AdjustCFAOffset(adjust);
+  __ pushl(method_reg.AsX86().AsCpuRegister());
+  cfi().AdjustCFAOffset(kFramePointerSize);
+  DCHECK_EQ(static_cast<size_t>(cfi().GetCurrentCFAOffset()), frame_size);
+
+  for (size_t i = 0; i < entry_spills.size(); ++i) {
+    ManagedRegisterSpill spill = entry_spills.at(i);
+    if (spill.AsX86().IsCpuRegister()) {
+      int offset = frame_size + spill.getSpillOffset();
+      __ movl(Address(ESP, offset), spill.AsX86().AsCpuRegister());
+    } else {
+      DCHECK(spill.AsX86().IsXmmRegister());
+      if (spill.getSize() == 8) {
+        __ movsd(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
+      } else {
+        CHECK_EQ(spill.getSize(), 4);
+        __ movss(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
+      }
+    }
+  }
+}
+
+void X86JNIMacroAssembler::RemoveFrame(size_t frame_size,
+                                       ArrayRef<const ManagedRegister> spill_regs) {
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi().RememberState();
+  // -kFramePointerSize for ArtMethod*.
+  int adjust = frame_size - spill_regs.size() * kFramePointerSize - kFramePointerSize;
+  __ addl(ESP, Immediate(adjust));
+  cfi().AdjustCFAOffset(-adjust);
+  for (size_t i = 0; i < spill_regs.size(); ++i) {
+    Register spill = spill_regs[i].AsX86().AsCpuRegister();
+    __ popl(spill);
+    cfi().AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
+    cfi().Restore(DWARFReg(spill));
+  }
+  __ ret();
+  // The CFI should be restored for any code that follows the exit block.
+  cfi().RestoreState();
+  cfi().DefCFAOffset(frame_size);
+}
+
+void X86JNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  __ addl(ESP, Immediate(-adjust));
+  cfi().AdjustCFAOffset(adjust);
+}
+
+static void DecreaseFrameSizeImpl(X86Assembler* assembler, size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  assembler->addl(ESP, Immediate(adjust));
+  assembler->cfi().AdjustCFAOffset(-adjust);
+}
+
+void X86JNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
+  DecreaseFrameSizeImpl(&asm_, adjust);
+}
+
+void X86JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
+  X86ManagedRegister src = msrc.AsX86();
+  if (src.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (src.IsCpuRegister()) {
+    CHECK_EQ(4u, size);
+    __ movl(Address(ESP, offs), src.AsCpuRegister());
+  } else if (src.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    __ movl(Address(ESP, offs), src.AsRegisterPairLow());
+    __ movl(Address(ESP, FrameOffset(offs.Int32Value()+4)), src.AsRegisterPairHigh());
+  } else if (src.IsX87Register()) {
+    if (size == 4) {
+      __ fstps(Address(ESP, offs));
+    } else {
+      __ fstpl(Address(ESP, offs));
+    }
+  } else {
+    CHECK(src.IsXmmRegister());
+    if (size == 4) {
+      __ movss(Address(ESP, offs), src.AsXmmRegister());
+    } else {
+      __ movsd(Address(ESP, offs), src.AsXmmRegister());
+    }
+  }
+}
+
+void X86JNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
+  X86ManagedRegister src = msrc.AsX86();
+  CHECK(src.IsCpuRegister());
+  __ movl(Address(ESP, dest), src.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
+  X86ManagedRegister src = msrc.AsX86();
+  CHECK(src.IsCpuRegister());
+  __ movl(Address(ESP, dest), src.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister) {
+  __ movl(Address(ESP, dest), Immediate(imm));
+}
+
+void X86JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                                    FrameOffset fr_offs,
+                                                    ManagedRegister mscratch) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  CHECK(scratch.IsCpuRegister());
+  __ leal(scratch.AsCpuRegister(), Address(ESP, fr_offs));
+  __ fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
+  __ fs()->movl(Address::Absolute(thr_offs), ESP);
+}
+
+void X86JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/,
+                                         ManagedRegister /*src*/,
+                                         FrameOffset /*in_off*/,
+                                         ManagedRegister /*scratch*/) {
+  UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
+}
+
+void X86JNIMacroAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
+  X86ManagedRegister dest = mdest.AsX86();
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (dest.IsCpuRegister()) {
+    CHECK_EQ(4u, size);
+    __ movl(dest.AsCpuRegister(), Address(ESP, src));
+  } else if (dest.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    __ movl(dest.AsRegisterPairLow(), Address(ESP, src));
+    __ movl(dest.AsRegisterPairHigh(), Address(ESP, FrameOffset(src.Int32Value()+4)));
+  } else if (dest.IsX87Register()) {
+    if (size == 4) {
+      __ flds(Address(ESP, src));
+    } else {
+      __ fldl(Address(ESP, src));
+    }
+  } else {
+    CHECK(dest.IsXmmRegister());
+    if (size == 4) {
+      __ movss(dest.AsXmmRegister(), Address(ESP, src));
+    } else {
+      __ movsd(dest.AsXmmRegister(), Address(ESP, src));
+    }
+  }
+}
+
+void X86JNIMacroAssembler::LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) {
+  X86ManagedRegister dest = mdest.AsX86();
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (dest.IsCpuRegister()) {
+    CHECK_EQ(4u, size);
+    __ fs()->movl(dest.AsCpuRegister(), Address::Absolute(src));
+  } else if (dest.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    __ fs()->movl(dest.AsRegisterPairLow(), Address::Absolute(src));
+    __ fs()->movl(dest.AsRegisterPairHigh(), Address::Absolute(ThreadOffset32(src.Int32Value()+4)));
+  } else if (dest.IsX87Register()) {
+    if (size == 4) {
+      __ fs()->flds(Address::Absolute(src));
+    } else {
+      __ fs()->fldl(Address::Absolute(src));
+    }
+  } else {
+    CHECK(dest.IsXmmRegister());
+    if (size == 4) {
+      __ fs()->movss(dest.AsXmmRegister(), Address::Absolute(src));
+    } else {
+      __ fs()->movsd(dest.AsXmmRegister(), Address::Absolute(src));
+    }
+  }
+}
+
+void X86JNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
+  X86ManagedRegister dest = mdest.AsX86();
+  CHECK(dest.IsCpuRegister());
+  __ movl(dest.AsCpuRegister(), Address(ESP, src));
+}
+
+void X86JNIMacroAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
+                           bool unpoison_reference) {
+  X86ManagedRegister dest = mdest.AsX86();
+  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
+  __ movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
+  if (unpoison_reference) {
+    __ MaybeUnpoisonHeapReference(dest.AsCpuRegister());
+  }
+}
+
+void X86JNIMacroAssembler::LoadRawPtr(ManagedRegister mdest,
+                                      ManagedRegister base,
+                                      Offset offs) {
+  X86ManagedRegister dest = mdest.AsX86();
+  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
+  __ movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
+}
+
+void X86JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) {
+  X86ManagedRegister dest = mdest.AsX86();
+  CHECK(dest.IsCpuRegister());
+  __ fs()->movl(dest.AsCpuRegister(), Address::Absolute(offs));
+}
+
+void X86JNIMacroAssembler::SignExtend(ManagedRegister mreg, size_t size) {
+  X86ManagedRegister reg = mreg.AsX86();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsCpuRegister()) << reg;
+  if (size == 1) {
+    __ movsxb(reg.AsCpuRegister(), reg.AsByteRegister());
+  } else {
+    __ movsxw(reg.AsCpuRegister(), reg.AsCpuRegister());
+  }
+}
+
+void X86JNIMacroAssembler::ZeroExtend(ManagedRegister mreg, size_t size) {
+  X86ManagedRegister reg = mreg.AsX86();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsCpuRegister()) << reg;
+  if (size == 1) {
+    __ movzxb(reg.AsCpuRegister(), reg.AsByteRegister());
+  } else {
+    __ movzxw(reg.AsCpuRegister(), reg.AsCpuRegister());
+  }
+}
+
+void X86JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
+  X86ManagedRegister dest = mdest.AsX86();
+  X86ManagedRegister src = msrc.AsX86();
+  if (!dest.Equals(src)) {
+    if (dest.IsCpuRegister() && src.IsCpuRegister()) {
+      __ movl(dest.AsCpuRegister(), src.AsCpuRegister());
+    } else if (src.IsX87Register() && dest.IsXmmRegister()) {
+      // Pass via stack and pop X87 register
+      __ subl(ESP, Immediate(16));
+      if (size == 4) {
+        CHECK_EQ(src.AsX87Register(), ST0);
+        __ fstps(Address(ESP, 0));
+        __ movss(dest.AsXmmRegister(), Address(ESP, 0));
+      } else {
+        CHECK_EQ(src.AsX87Register(), ST0);
+        __ fstpl(Address(ESP, 0));
+        __ movsd(dest.AsXmmRegister(), Address(ESP, 0));
+      }
+      __ addl(ESP, Immediate(16));
+    } else {
+      // TODO: x87, SSE
+      UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src;
+    }
+  }
+}
+
+void X86JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  CHECK(scratch.IsCpuRegister());
+  __ movl(scratch.AsCpuRegister(), Address(ESP, src));
+  __ movl(Address(ESP, dest), scratch.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                                ThreadOffset32 thr_offs,
+                                                ManagedRegister mscratch) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  CHECK(scratch.IsCpuRegister());
+  __ fs()->movl(scratch.AsCpuRegister(), Address::Absolute(thr_offs));
+  Store(fr_offs, scratch, 4);
+}
+
+void X86JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs,
+                                              FrameOffset fr_offs,
+                                              ManagedRegister mscratch) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  CHECK(scratch.IsCpuRegister());
+  Load(scratch, fr_offs, 4);
+  __ fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src,
+                        ManagedRegister mscratch,
+                        size_t size) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  if (scratch.IsCpuRegister() && size == 8) {
+    Load(scratch, src, 4);
+    Store(dest, scratch, 4);
+    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
+    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
+  } else {
+    Load(scratch, src, size);
+    Store(dest, scratch, size);
+  }
+}
+
+void X86JNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                ManagedRegister /*src_base*/,
+                                Offset /*src_offset*/,
+                                ManagedRegister /*scratch*/,
+                                size_t /*size*/) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void X86JNIMacroAssembler::Copy(ManagedRegister dest_base,
+                                Offset dest_offset,
+                                FrameOffset src,
+                                ManagedRegister scratch,
+                                size_t size) {
+  CHECK(scratch.IsNoRegister());
+  CHECK_EQ(size, 4u);
+  __ pushl(Address(ESP, src));
+  __ popl(Address(dest_base.AsX86().AsCpuRegister(), dest_offset));
+}
+
+void X86JNIMacroAssembler::Copy(FrameOffset dest,
+                                FrameOffset src_base,
+                                Offset src_offset,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  Register scratch = mscratch.AsX86().AsCpuRegister();
+  CHECK_EQ(size, 4u);
+  __ movl(scratch, Address(ESP, src_base));
+  __ movl(scratch, Address(scratch, src_offset));
+  __ movl(Address(ESP, dest), scratch);
+}
+
+void X86JNIMacroAssembler::Copy(ManagedRegister dest,
+                                Offset dest_offset,
+                                ManagedRegister src,
+                                Offset src_offset,
+                                ManagedRegister scratch,
+                                size_t size) {
+  CHECK_EQ(size, 4u);
+  CHECK(scratch.IsNoRegister());
+  __ pushl(Address(src.AsX86().AsCpuRegister(), src_offset));
+  __ popl(Address(dest.AsX86().AsCpuRegister(), dest_offset));
+}
+
+void X86JNIMacroAssembler::Copy(FrameOffset dest,
+                                Offset dest_offset,
+                                FrameOffset src,
+                                Offset src_offset,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  Register scratch = mscratch.AsX86().AsCpuRegister();
+  CHECK_EQ(size, 4u);
+  CHECK_EQ(dest.Int32Value(), src.Int32Value());
+  __ movl(scratch, Address(ESP, src));
+  __ pushl(Address(scratch, src_offset));
+  __ popl(Address(scratch, dest_offset));
+}
+
+void X86JNIMacroAssembler::MemoryBarrier(ManagedRegister) {
+  __ mfence();
+}
+
+void X86JNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
+                                                  FrameOffset handle_scope_offset,
+                                                  ManagedRegister min_reg,
+                                                  bool null_allowed) {
+  X86ManagedRegister out_reg = mout_reg.AsX86();
+  X86ManagedRegister in_reg = min_reg.AsX86();
+  CHECK(in_reg.IsCpuRegister());
+  CHECK(out_reg.IsCpuRegister());
+  VerifyObject(in_reg, null_allowed);
+  if (null_allowed) {
+    Label null_arg;
+    if (!out_reg.Equals(in_reg)) {
+      __ xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
+    }
+    __ testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
+    __ j(kZero, &null_arg);
+    __ leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset));
+    __ Bind(&null_arg);
+  } else {
+    __ leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset));
+  }
+}
+
+void X86JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                                  FrameOffset handle_scope_offset,
+                                                  ManagedRegister mscratch,
+                                                  bool null_allowed) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  CHECK(scratch.IsCpuRegister());
+  if (null_allowed) {
+    Label null_arg;
+    __ movl(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
+    __ testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
+    __ j(kZero, &null_arg);
+    __ leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
+    __ Bind(&null_arg);
+  } else {
+    __ leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
+  }
+  Store(out_off, scratch, 4);
+}
+
+// Given a handle scope entry, load the associated reference.
+void X86JNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
+                                                        ManagedRegister min_reg) {
+  X86ManagedRegister out_reg = mout_reg.AsX86();
+  X86ManagedRegister in_reg = min_reg.AsX86();
+  CHECK(out_reg.IsCpuRegister());
+  CHECK(in_reg.IsCpuRegister());
+  Label null_arg;
+  if (!out_reg.Equals(in_reg)) {
+    __ xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
+  }
+  __ testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
+  __ j(kZero, &null_arg);
+  __ movl(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0));
+  __ Bind(&null_arg);
+}
+
+void X86JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references
+}
+
+void X86JNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references
+}
+
+void X86JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
+  X86ManagedRegister base = mbase.AsX86();
+  CHECK(base.IsCpuRegister());
+  __ call(Address(base.AsCpuRegister(), offset.Int32Value()));
+  // TODO: place reference map on call
+}
+
+void X86JNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
+  Register scratch = mscratch.AsX86().AsCpuRegister();
+  __ movl(scratch, Address(ESP, base));
+  __ call(Address(scratch, offset));
+}
+
+void X86JNIMacroAssembler::CallFromThread(ThreadOffset32 offset, ManagedRegister /*mscratch*/) {
+  __ fs()->call(Address::Absolute(offset));
+}
+
+void X86JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
+  __ fs()->movl(tr.AsX86().AsCpuRegister(),
+                Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
+}
+
+void X86JNIMacroAssembler::GetCurrentThread(FrameOffset offset,
+                                    ManagedRegister mscratch) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  __ fs()->movl(scratch.AsCpuRegister(), Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
+  __ movl(Address(ESP, offset), scratch.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
+  X86ExceptionSlowPath* slow = new (__ GetArena()) X86ExceptionSlowPath(stack_adjust);
+  __ GetBuffer()->EnqueueSlowPath(slow);
+  __ fs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>()), Immediate(0));
+  __ j(kNotEqual, slow->Entry());
+}
+
+#undef __
+
+void X86ExceptionSlowPath::Emit(Assembler *sasm) {
+  X86Assembler* sp_asm = down_cast<X86Assembler*>(sasm);
+#define __ sp_asm->
+  __ Bind(&entry_);
+  // Note: the return value is dead
+  if (stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSizeImpl(sp_asm, stack_adjust_);
+  }
+  // Pass exception as argument in EAX
+  __ fs()->movl(EAX, Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>()));
+  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pDeliverException)));
+  // this call should never return
+  __ int3();
+#undef __
+}
+
+}  // namespace x86
+}  // namespace art
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
new file mode 100644
index 0000000..3f07ede
--- /dev/null
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_X86_JNI_MACRO_ASSEMBLER_X86_H_
+#define ART_COMPILER_UTILS_X86_JNI_MACRO_ASSEMBLER_X86_H_
+
+#include <vector>
+
+#include "assembler_x86.h"
+#include "base/arena_containers.h"
+#include "base/enums.h"
+#include "base/macros.h"
+#include "offsets.h"
+#include "utils/array_ref.h"
+#include "utils/jni_macro_assembler.h"
+
+namespace art {
+namespace x86 {
+
+class X86JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<X86Assembler, PointerSize::k32> {
+ public:
+  explicit X86JNIMacroAssembler(ArenaAllocator* arena) : JNIMacroAssemblerFwd(arena) {}
+  virtual ~X86JNIMacroAssembler() {}
+
+  //
+  // Overridden common assembler high-level functionality
+  //
+
+  // Emit code that will create an activation on the stack
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
+                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
+
+  // Emit code that will remove an activation from the stack
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
+      OVERRIDE;
+
+  void IncreaseFrameSize(size_t adjust) OVERRIDE;
+  void DecreaseFrameSize(size_t adjust) OVERRIDE;
+
+  // Store routines
+  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
+  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
+  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
+
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE;
+
+  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
+                     ManagedRegister scratch) OVERRIDE;
+
+  // Load routines
+  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
+
+  void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
+               bool unpoison_reference) OVERRIDE;
+
+  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
+
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE;
+
+  // Copying routines
+  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
+
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset32 thr_offs,
+                            ManagedRegister scratch) OVERRIDE;
+
+  void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+      OVERRIDE;
+
+  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
+            ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
+            ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void MemoryBarrier(ManagedRegister) OVERRIDE;
+
+  // Sign extension
+  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Zero extension
+  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Exploit fast access in managed code to Thread::Current()
+  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
+  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // null.
+  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
+                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed.
+  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
+                              ManagedRegister scratch, bool null_allowed) OVERRIDE;
+
+  // src holds a handle scope entry (Object**) load this into dst
+  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
+  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
+
+  // Call to address held at [base+offset]
+  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(X86JNIMacroAssembler);
+};
+
+}  // namespace x86
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_X86_JNI_MACRO_ASSEMBLER_X86_H_
diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h
index 4e8c41e..c0c2b65 100644
--- a/compiler/utils/x86/managed_register_x86.h
+++ b/compiler/utils/x86/managed_register_x86.h
@@ -18,7 +18,7 @@
 #define ART_COMPILER_UTILS_X86_MANAGED_REGISTER_X86_H_
 
 #include "constants_x86.h"
-#include "dwarf/register.h"
+#include "debug/dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
@@ -89,64 +89,64 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86ManagedRegister : public ManagedRegister {
  public:
-  ByteRegister AsByteRegister() const {
+  constexpr ByteRegister AsByteRegister() const {
     CHECK(IsCpuRegister());
     CHECK_LT(AsCpuRegister(), ESP);  // ESP, EBP, ESI and EDI cannot be encoded as byte registers.
     return static_cast<ByteRegister>(id_);
   }
 
-  Register AsCpuRegister() const {
+  constexpr Register AsCpuRegister() const {
     CHECK(IsCpuRegister());
     return static_cast<Register>(id_);
   }
 
-  XmmRegister AsXmmRegister() const {
+  constexpr XmmRegister AsXmmRegister() const {
     CHECK(IsXmmRegister());
     return static_cast<XmmRegister>(id_ - kNumberOfCpuRegIds);
   }
 
-  X87Register AsX87Register() const {
+  constexpr X87Register AsX87Register() const {
     CHECK(IsX87Register());
     return static_cast<X87Register>(id_ -
                                     (kNumberOfCpuRegIds + kNumberOfXmmRegIds));
   }
 
-  Register AsRegisterPairLow() const {
+  constexpr Register AsRegisterPairLow() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdLow().
     return FromRegId(AllocIdLow()).AsCpuRegister();
   }
 
-  Register AsRegisterPairHigh() const {
+  constexpr Register AsRegisterPairHigh() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdHigh().
     return FromRegId(AllocIdHigh()).AsCpuRegister();
   }
 
-  RegisterPair AsRegisterPair() const {
+  constexpr RegisterPair AsRegisterPair() const {
     CHECK(IsRegisterPair());
     return static_cast<RegisterPair>(id_ -
         (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds));
   }
 
-  bool IsCpuRegister() const {
+  constexpr bool IsCpuRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCpuRegIds);
   }
 
-  bool IsXmmRegister() const {
+  constexpr bool IsXmmRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfCpuRegIds;
     return (0 <= test) && (test < kNumberOfXmmRegIds);
   }
 
-  bool IsX87Register() const {
+  constexpr bool IsX87Register() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds);
     return (0 <= test) && (test < kNumberOfX87RegIds);
   }
 
-  bool IsRegisterPair() const {
+  constexpr bool IsRegisterPair() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ -
         (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds);
@@ -160,33 +160,33 @@
   // then false is returned.
   bool Overlaps(const X86ManagedRegister& other) const;
 
-  static X86ManagedRegister FromCpuRegister(Register r) {
+  static constexpr X86ManagedRegister FromCpuRegister(Register r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static X86ManagedRegister FromXmmRegister(XmmRegister r) {
+  static constexpr X86ManagedRegister FromXmmRegister(XmmRegister r) {
     CHECK_NE(r, kNoXmmRegister);
     return FromRegId(r + kNumberOfCpuRegIds);
   }
 
-  static X86ManagedRegister FromX87Register(X87Register r) {
+  static constexpr X86ManagedRegister FromX87Register(X87Register r) {
     CHECK_NE(r, kNoX87Register);
     return FromRegId(r + kNumberOfCpuRegIds + kNumberOfXmmRegIds);
   }
 
-  static X86ManagedRegister FromRegisterPair(RegisterPair r) {
+  static constexpr X86ManagedRegister FromRegisterPair(RegisterPair r) {
     CHECK_NE(r, kNoRegisterPair);
     return FromRegId(r + (kNumberOfCpuRegIds + kNumberOfXmmRegIds +
                           kNumberOfX87RegIds));
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -202,9 +202,9 @@
 
   friend class ManagedRegister;
 
-  explicit X86ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr X86ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static X86ManagedRegister FromRegId(int reg_id) {
+  static constexpr X86ManagedRegister FromRegId(int reg_id) {
     X86ManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -215,7 +215,7 @@
 
 }  // namespace x86
 
-inline x86::X86ManagedRegister ManagedRegister::AsX86() const {
+constexpr inline x86::X86ManagedRegister ManagedRegister::AsX86() const {
   x86::X86ManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 9eb5e67..ddc8244 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -223,6 +223,19 @@
 }
 
 
+void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  if (is64bit) {
+    EmitRex64(dst, src);
+  } else {
+    EmitOptionalRex32(dst, src);
+  }
+  EmitUint8(0x0F);
+  EmitUint8(0x40 + c);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalByteRegNormalizingRex32(dst, src);
@@ -1211,8 +1224,19 @@
 }
 
 
+void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());
+  EmitOptionalRex32(address);
+  EmitUint8(0x80);
+  EmitOperand(7, address);
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());
   EmitOperandSizeOverride();
   EmitOptionalRex32(address);
   EmitComplex(7, address, imm);
@@ -1221,6 +1245,7 @@
 
 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());
   EmitOptionalRex32(reg);
   EmitComplex(7, Operand(reg), imm);
 }
@@ -1252,6 +1277,7 @@
 
 void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());
   EmitOptionalRex32(address);
   EmitComplex(7, address, imm);
 }
@@ -2244,6 +2270,42 @@
   EmitOperand(dst.LowBits(), src);
 }
 
+void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitRex64(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitRex64(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitOperand(dst.LowBits(), src);
+}
+
 void X86_64Assembler::repne_scasw() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -2577,545 +2639,8 @@
   }
 }
 
-static dwarf::Reg DWARFReg(Register reg) {
-  return dwarf::Reg::X86_64Core(static_cast<int>(reg));
-}
-static dwarf::Reg DWARFReg(FloatRegister reg) {
-  return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
-}
-
-constexpr size_t kFramePointerSize = 8;
-
-void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                                 const std::vector<ManagedRegister>& spill_regs,
-                                 const ManagedRegisterEntrySpills& entry_spills) {
-  DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
-  cfi_.SetCurrentCFAOffset(8);  // Return address on stack.
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  int gpr_count = 0;
-  for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
-    if (spill.IsCpuRegister()) {
-      pushq(spill.AsCpuRegister());
-      gpr_count++;
-      cfi_.AdjustCFAOffset(kFramePointerSize);
-      cfi_.RelOffset(DWARFReg(spill.AsCpuRegister().AsRegister()), 0);
-    }
-  }
-  // return address then method on stack.
-  int64_t rest_of_frame = static_cast<int64_t>(frame_size)
-                          - (gpr_count * kFramePointerSize)
-                          - kFramePointerSize /*return address*/;
-  subq(CpuRegister(RSP), Immediate(rest_of_frame));
-  cfi_.AdjustCFAOffset(rest_of_frame);
-
-  // spill xmms
-  int64_t offset = rest_of_frame;
-  for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
-    if (spill.IsXmmRegister()) {
-      offset -= sizeof(double);
-      movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister());
-      cfi_.RelOffset(DWARFReg(spill.AsXmmRegister().AsFloatRegister()), offset);
-    }
-  }
-
-  DCHECK_EQ(kX86_64PointerSize, kFramePointerSize);
-
-  movq(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
-
-  for (size_t i = 0; i < entry_spills.size(); ++i) {
-    ManagedRegisterSpill spill = entry_spills.at(i);
-    if (spill.AsX86_64().IsCpuRegister()) {
-      if (spill.getSize() == 8) {
-        movq(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
-             spill.AsX86_64().AsCpuRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        movl(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), spill.AsX86_64().AsCpuRegister());
-      }
-    } else {
-      if (spill.getSize() == 8) {
-        movsd(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), spill.AsX86_64().AsXmmRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        movss(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), spill.AsX86_64().AsXmmRegister());
-      }
-    }
-  }
-}
-
-void X86_64Assembler::RemoveFrame(size_t frame_size,
-                            const std::vector<ManagedRegister>& spill_regs) {
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  cfi_.RememberState();
-  int gpr_count = 0;
-  // unspill xmms
-  int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize;
-  for (size_t i = 0; i < spill_regs.size(); ++i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
-    if (spill.IsXmmRegister()) {
-      offset += sizeof(double);
-      movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset));
-      cfi_.Restore(DWARFReg(spill.AsXmmRegister().AsFloatRegister()));
-    } else {
-      gpr_count++;
-    }
-  }
-  int adjust = static_cast<int>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize;
-  addq(CpuRegister(RSP), Immediate(adjust));
-  cfi_.AdjustCFAOffset(-adjust);
-  for (size_t i = 0; i < spill_regs.size(); ++i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
-    if (spill.IsCpuRegister()) {
-      popq(spill.AsCpuRegister());
-      cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
-      cfi_.Restore(DWARFReg(spill.AsCpuRegister().AsRegister()));
-    }
-  }
-  ret();
-  // The CFI should be restored for any code that follows the exit block.
-  cfi_.RestoreState();
-  cfi_.DefCFAOffset(frame_size);
-}
-
-void X86_64Assembler::IncreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust)));
-  cfi_.AdjustCFAOffset(adjust);
-}
-
-void X86_64Assembler::DecreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  addq(CpuRegister(RSP), Immediate(adjust));
-  cfi_.AdjustCFAOffset(-adjust);
-}
-
-void X86_64Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
-  X86_64ManagedRegister src = msrc.AsX86_64();
-  if (src.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (src.IsCpuRegister()) {
-    if (size == 4) {
-      CHECK_EQ(4u, size);
-      movl(Address(CpuRegister(RSP), offs), src.AsCpuRegister());
-    } else {
-      CHECK_EQ(8u, size);
-      movq(Address(CpuRegister(RSP), offs), src.AsCpuRegister());
-    }
-  } else if (src.IsRegisterPair()) {
-    CHECK_EQ(0u, size);
-    movq(Address(CpuRegister(RSP), offs), src.AsRegisterPairLow());
-    movq(Address(CpuRegister(RSP), FrameOffset(offs.Int32Value()+4)),
-         src.AsRegisterPairHigh());
-  } else if (src.IsX87Register()) {
-    if (size == 4) {
-      fstps(Address(CpuRegister(RSP), offs));
-    } else {
-      fstpl(Address(CpuRegister(RSP), offs));
-    }
-  } else {
-    CHECK(src.IsXmmRegister());
-    if (size == 4) {
-      movss(Address(CpuRegister(RSP), offs), src.AsXmmRegister());
-    } else {
-      movsd(Address(CpuRegister(RSP), offs), src.AsXmmRegister());
-    }
-  }
-}
-
-void X86_64Assembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
-  X86_64ManagedRegister src = msrc.AsX86_64();
-  CHECK(src.IsCpuRegister());
-  movl(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
-}
-
-void X86_64Assembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
-  X86_64ManagedRegister src = msrc.AsX86_64();
-  CHECK(src.IsCpuRegister());
-  movq(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
-}
-
-void X86_64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
-                                            ManagedRegister) {
-  movl(Address(CpuRegister(RSP), dest), Immediate(imm));  // TODO(64) movq?
-}
-
-void X86_64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm,
-                                               ManagedRegister) {
-  gs()->movl(Address::Absolute(dest, true), Immediate(imm));  // TODO(64) movq?
-}
-
-void X86_64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs,
-                                                 FrameOffset fr_offs,
-                                                 ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), fr_offs));
-  gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
-}
-
-void X86_64Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs) {
-  gs()->movq(Address::Absolute(thr_offs, true), CpuRegister(RSP));
-}
-
-void X86_64Assembler::StoreSpanning(FrameOffset /*dst*/, ManagedRegister /*src*/,
-                                 FrameOffset /*in_off*/, ManagedRegister /*scratch*/) {
-  UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
-}
-
-void X86_64Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  if (dest.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (dest.IsCpuRegister()) {
-    if (size == 4) {
-      CHECK_EQ(4u, size);
-      movl(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
-    } else {
-      CHECK_EQ(8u, size);
-      movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
-    }
-  } else if (dest.IsRegisterPair()) {
-    CHECK_EQ(0u, size);
-    movq(dest.AsRegisterPairLow(), Address(CpuRegister(RSP), src));
-    movq(dest.AsRegisterPairHigh(), Address(CpuRegister(RSP), FrameOffset(src.Int32Value()+4)));
-  } else if (dest.IsX87Register()) {
-    if (size == 4) {
-      flds(Address(CpuRegister(RSP), src));
-    } else {
-      fldl(Address(CpuRegister(RSP), src));
-    }
-  } else {
-    CHECK(dest.IsXmmRegister());
-    if (size == 4) {
-      movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), src));
-    } else {
-      movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), src));
-    }
-  }
-}
-
-void X86_64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  if (dest.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (dest.IsCpuRegister()) {
-    CHECK_EQ(4u, size);
-    gs()->movl(dest.AsCpuRegister(), Address::Absolute(src, true));
-  } else if (dest.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
-    gs()->movq(dest.AsRegisterPairLow(), Address::Absolute(src, true));
-  } else if (dest.IsX87Register()) {
-    if (size == 4) {
-      gs()->flds(Address::Absolute(src, true));
-    } else {
-      gs()->fldl(Address::Absolute(src, true));
-    }
-  } else {
-    CHECK(dest.IsXmmRegister());
-    if (size == 4) {
-      gs()->movss(dest.AsXmmRegister(), Address::Absolute(src, true));
-    } else {
-      gs()->movsd(dest.AsXmmRegister(), Address::Absolute(src, true));
-    }
-  }
-}
-
-void X86_64Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  CHECK(dest.IsCpuRegister());
-  movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
-}
-
-void X86_64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
-                              bool unpoison_reference) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
-  movl(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
-  if (unpoison_reference) {
-    MaybeUnpoisonHeapReference(dest.AsCpuRegister());
-  }
-}
-
-void X86_64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
-                              Offset offs) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
-  movq(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
-}
-
-void X86_64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset<8> offs) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  CHECK(dest.IsCpuRegister());
-  gs()->movq(dest.AsCpuRegister(), Address::Absolute(offs, true));
-}
-
-void X86_64Assembler::SignExtend(ManagedRegister mreg, size_t size) {
-  X86_64ManagedRegister reg = mreg.AsX86_64();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsCpuRegister()) << reg;
-  if (size == 1) {
-    movsxb(reg.AsCpuRegister(), reg.AsCpuRegister());
-  } else {
-    movsxw(reg.AsCpuRegister(), reg.AsCpuRegister());
-  }
-}
-
-void X86_64Assembler::ZeroExtend(ManagedRegister mreg, size_t size) {
-  X86_64ManagedRegister reg = mreg.AsX86_64();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsCpuRegister()) << reg;
-  if (size == 1) {
-    movzxb(reg.AsCpuRegister(), reg.AsCpuRegister());
-  } else {
-    movzxw(reg.AsCpuRegister(), reg.AsCpuRegister());
-  }
-}
-
-void X86_64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  X86_64ManagedRegister src = msrc.AsX86_64();
-  if (!dest.Equals(src)) {
-    if (dest.IsCpuRegister() && src.IsCpuRegister()) {
-      movq(dest.AsCpuRegister(), src.AsCpuRegister());
-    } else if (src.IsX87Register() && dest.IsXmmRegister()) {
-      // Pass via stack and pop X87 register
-      subl(CpuRegister(RSP), Immediate(16));
-      if (size == 4) {
-        CHECK_EQ(src.AsX87Register(), ST0);
-        fstps(Address(CpuRegister(RSP), 0));
-        movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0));
-      } else {
-        CHECK_EQ(src.AsX87Register(), ST0);
-        fstpl(Address(CpuRegister(RSP), 0));
-        movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0));
-      }
-      addq(CpuRegister(RSP), Immediate(16));
-    } else {
-      // TODO: x87, SSE
-      UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src;
-    }
-  }
-}
-
-void X86_64Assembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), src));
-  movl(Address(CpuRegister(RSP), dest), scratch.AsCpuRegister());
-}
-
-void X86_64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                             ThreadOffset<8> thr_offs,
-                                             ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  gs()->movq(scratch.AsCpuRegister(), Address::Absolute(thr_offs, true));
-  Store(fr_offs, scratch, 8);
-}
-
-void X86_64Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs,
-                                           FrameOffset fr_offs,
-                                           ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  Load(scratch, fr_offs, 8);
-  gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
-}
-
-void X86_64Assembler::Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch,
-                           size_t size) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  if (scratch.IsCpuRegister() && size == 8) {
-    Load(scratch, src, 4);
-    Store(dest, scratch, 4);
-    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
-    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
-  } else {
-    Load(scratch, src, size);
-    Store(dest, scratch, size);
-  }
-}
-
-void X86_64Assembler::Copy(FrameOffset /*dst*/, ManagedRegister /*src_base*/, Offset /*src_offset*/,
-                        ManagedRegister /*scratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void X86_64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-                        ManagedRegister scratch, size_t size) {
-  CHECK(scratch.IsNoRegister());
-  CHECK_EQ(size, 4u);
-  pushq(Address(CpuRegister(RSP), src));
-  popq(Address(dest_base.AsX86_64().AsCpuRegister(), dest_offset));
-}
-
-void X86_64Assembler::Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
-  CHECK_EQ(size, 4u);
-  movq(scratch, Address(CpuRegister(RSP), src_base));
-  movq(scratch, Address(scratch, src_offset));
-  movq(Address(CpuRegister(RSP), dest), scratch);
-}
-
-void X86_64Assembler::Copy(ManagedRegister dest, Offset dest_offset,
-                        ManagedRegister src, Offset src_offset,
-                        ManagedRegister scratch, size_t size) {
-  CHECK_EQ(size, 4u);
-  CHECK(scratch.IsNoRegister());
-  pushq(Address(src.AsX86_64().AsCpuRegister(), src_offset));
-  popq(Address(dest.AsX86_64().AsCpuRegister(), dest_offset));
-}
-
-void X86_64Assembler::Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
-  CHECK_EQ(size, 4u);
-  CHECK_EQ(dest.Int32Value(), src.Int32Value());
-  movq(scratch, Address(CpuRegister(RSP), src));
-  pushq(Address(scratch, src_offset));
-  popq(Address(scratch, dest_offset));
-}
-
-void X86_64Assembler::MemoryBarrier(ManagedRegister) {
-  mfence();
-}
-
-void X86_64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister min_reg, bool null_allowed) {
-  X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
-  X86_64ManagedRegister in_reg = min_reg.AsX86_64();
-  if (in_reg.IsNoRegister()) {  // TODO(64): && null_allowed
-    // Use out_reg as indicator of null.
-    in_reg = out_reg;
-    // TODO: movzwl
-    movl(in_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-  }
-  CHECK(in_reg.IsCpuRegister());
-  CHECK(out_reg.IsCpuRegister());
-  VerifyObject(in_reg, null_allowed);
-  if (null_allowed) {
-    Label null_arg;
-    if (!out_reg.Equals(in_reg)) {
-      xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
-    }
-    testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
-    j(kZero, &null_arg);
-    leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-    Bind(&null_arg);
-  } else {
-    leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-  }
-}
-
-void X86_64Assembler::CreateHandleScopeEntry(FrameOffset out_off,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister mscratch,
-                                   bool null_allowed) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  if (null_allowed) {
-    Label null_arg;
-    movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-    testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
-    j(kZero, &null_arg);
-    leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-    Bind(&null_arg);
-  } else {
-    leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-  }
-  Store(out_off, scratch, 8);
-}
-
-// Given a handle scope entry, load the associated reference.
-void X86_64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
-                                         ManagedRegister min_reg) {
-  X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
-  X86_64ManagedRegister in_reg = min_reg.AsX86_64();
-  CHECK(out_reg.IsCpuRegister());
-  CHECK(in_reg.IsCpuRegister());
-  Label null_arg;
-  if (!out_reg.Equals(in_reg)) {
-    xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
-  }
-  testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
-  j(kZero, &null_arg);
-  movq(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0));
-  Bind(&null_arg);
-}
-
-void X86_64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
-}
-
-void X86_64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
-}
-
-void X86_64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
-  X86_64ManagedRegister base = mbase.AsX86_64();
-  CHECK(base.IsCpuRegister());
-  call(Address(base.AsCpuRegister(), offset.Int32Value()));
-  // TODO: place reference map on call
-}
-
-void X86_64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
-  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
-  movq(scratch, Address(CpuRegister(RSP), base));
-  call(Address(scratch, offset));
-}
-
-void X86_64Assembler::CallFromThread64(ThreadOffset<8> offset, ManagedRegister /*mscratch*/) {
-  gs()->call(Address::Absolute(offset, true));
-}
-
-void X86_64Assembler::GetCurrentThread(ManagedRegister tr) {
-  gs()->movq(tr.AsX86_64().AsCpuRegister(), Address::Absolute(Thread::SelfOffset<8>(), true));
-}
-
-void X86_64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  gs()->movq(scratch.AsCpuRegister(), Address::Absolute(Thread::SelfOffset<8>(), true));
-  movq(Address(CpuRegister(RSP), offset), scratch.AsCpuRegister());
-}
-
-// Slowpath entered when Thread::Current()->_exception is non-null
-class X86_64ExceptionSlowPath FINAL : public SlowPath {
- public:
-  explicit X86_64ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {}
-  virtual void Emit(Assembler *sp_asm) OVERRIDE;
- private:
-  const size_t stack_adjust_;
-};
-
-void X86_64Assembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
-  X86_64ExceptionSlowPath* slow = new X86_64ExceptionSlowPath(stack_adjust);
-  buffer_.EnqueueSlowPath(slow);
-  gs()->cmpl(Address::Absolute(Thread::ExceptionOffset<8>(), true), Immediate(0));
-  j(kNotEqual, slow->Entry());
-}
-
-void X86_64ExceptionSlowPath::Emit(Assembler *sasm) {
-  X86_64Assembler* sp_asm = down_cast<X86_64Assembler*>(sasm);
-#define __ sp_asm->
-  __ Bind(&entry_);
-  // Note: the return value is dead
-  if (stack_adjust_ != 0) {  // Fix up the frame.
-    __ DecreaseFrameSize(stack_adjust_);
-  }
-  // Pass exception as argument in RDI
-  __ gs()->movq(CpuRegister(RDI), Address::Absolute(Thread::ExceptionOffset<8>(), true));
-  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(8, pDeliverException), true));
-  // this call should never return
-  __ int3();
-#undef __
-}
-
 void X86_64Assembler::AddConstantArea() {
-  const std::vector<int32_t>& area = constant_area_.GetBuffer();
+  ArrayRef<const int32_t> area = constant_area_.GetBuffer();
   for (size_t i = 0, e = area.size(); i < e; i++) {
     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     EmitInt32(area[i]);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 01d28e3..a4166f9 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -19,13 +19,16 @@
 
 #include <vector>
 
+#include "base/arena_containers.h"
 #include "base/bit_utils.h"
 #include "base/macros.h"
 #include "constants_x86_64.h"
 #include "globals.h"
 #include "managed_register_x86_64.h"
 #include "offsets.h"
+#include "utils/array_ref.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 
 namespace art {
 namespace x86_64 {
@@ -256,7 +259,7 @@
   }
 
   // If no_rip is true then the Absolute address isn't RIP relative.
-  static Address Absolute(ThreadOffset<8> addr, bool no_rip = false) {
+  static Address Absolute(ThreadOffset64 addr, bool no_rip = false) {
     return Absolute(addr.Int32Value(), no_rip);
   }
 
@@ -270,7 +273,7 @@
  */
 class ConstantArea {
  public:
-  ConstantArea() {}
+  explicit ConstantArea(ArenaAllocator* arena) : buffer_(arena->Adapter(kArenaAllocAssembler)) {}
 
   // Add a double to the constant area, returning the offset into
   // the constant area where the literal resides.
@@ -296,13 +299,13 @@
     return buffer_.size() * elem_size_;
   }
 
-  const std::vector<int32_t>& GetBuffer() const {
-    return buffer_;
+  ArrayRef<const int32_t> GetBuffer() const {
+    return ArrayRef<const int32_t>(buffer_);
   }
 
  private:
   static constexpr size_t elem_size_ = sizeof(int32_t);
-  std::vector<int32_t> buffer_;
+  ArenaVector<int32_t> buffer_;
 };
 
 
@@ -332,7 +335,7 @@
 
 class X86_64Assembler FINAL : public Assembler {
  public:
-  X86_64Assembler() {}
+  explicit X86_64Assembler(ArenaAllocator* arena) : Assembler(arena), constant_area_(arena) {}
   virtual ~X86_64Assembler() {}
 
   /*
@@ -366,6 +369,7 @@
 
   void cmov(Condition c, CpuRegister dst, CpuRegister src);  // This is the 64b version.
   void cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit);
+  void cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit);
 
   void movzxb(CpuRegister dst, CpuRegister src);
   void movzxb(CpuRegister dst, const Address& src);
@@ -503,6 +507,7 @@
   void xchgq(CpuRegister dst, CpuRegister src);
   void xchgl(CpuRegister reg, const Address& address);
 
+  void cmpb(const Address& address, const Immediate& imm);
   void cmpw(const Address& address, const Immediate& imm);
 
   void cmpl(CpuRegister reg, const Immediate& imm);
@@ -647,6 +652,11 @@
   void bsrq(CpuRegister dst, CpuRegister src);
   void bsrq(CpuRegister dst, const Address& src);
 
+  void popcntl(CpuRegister dst, CpuRegister src);
+  void popcntl(CpuRegister dst, const Address& src);
+  void popcntq(CpuRegister dst, CpuRegister src);
+  void popcntq(CpuRegister dst, const Address& src);
+
   void rorl(CpuRegister reg, const Immediate& imm);
   void rorl(CpuRegister operand, CpuRegister shifter);
   void roll(CpuRegister reg, const Immediate& imm);
@@ -690,124 +700,6 @@
   }
   void Bind(NearLabel* label);
 
-  //
-  // Overridden common assembler high-level functionality
-  //
-
-  // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
-
-  // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
-      OVERRIDE;
-
-  void IncreaseFrameSize(size_t adjust) OVERRIDE;
-  void DecreaseFrameSize(size_t adjust) OVERRIDE;
-
-  // Store routines
-  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
-  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
-  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
-
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
-
-  void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, ManagedRegister scratch)
-      OVERRIDE;
-
-  void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister scratch) OVERRIDE;
-
-  void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE;
-
-  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
-                     ManagedRegister scratch) OVERRIDE;
-
-  // Load routines
-  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
-
-  void LoadFromThread64(ManagedRegister dest, ThreadOffset<8> src, size_t size) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, FrameOffset  src) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-               bool unpoison_reference) OVERRIDE;
-
-  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
-
-  void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset<8> offs) OVERRIDE;
-
-  // Copying routines
-  void Move(ManagedRegister dest, ManagedRegister src, size_t size);
-
-  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs,
-                              ManagedRegister scratch) OVERRIDE;
-
-  void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
-      OVERRIDE;
-
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void MemoryBarrier(ManagedRegister) OVERRIDE;
-
-  // Sign extension
-  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Zero extension
-  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Exploit fast access in managed code to Thread::Current()
-  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
-
-  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the handle scope entry to see if the value is
-  // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
-
-  // Set up out_off to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                              ManagedRegister scratch, bool null_allowed) OVERRIDE;
-
-  // src holds a handle scope entry (Object**) load this into dst
-  virtual void LoadReferenceFromHandleScope(ManagedRegister dst,
-                                     ManagedRegister src);
-
-  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
-  // know that src may not be null.
-  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
-  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
-
-  // Call to address held at [base+offset]
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset<8> offset, ManagedRegister scratch) OVERRIDE;
-
-  // Generate code to check if Thread::Current()->exception_ is non-null
-  // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
-
   // Add a double to the constant area, returning the offset into
   // the constant area where the literal resides.
   size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
@@ -849,6 +741,12 @@
   void PoisonHeapReference(CpuRegister reg) { negl(reg); }
   // Unpoison a heap reference contained in `reg`.
   void UnpoisonHeapReference(CpuRegister reg) { negl(reg); }
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(CpuRegister reg) {
+    if (kPoisonHeapReferences) {
+      PoisonHeapReference(reg);
+    }
+  }
   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
   void MaybeUnpoisonHeapReference(CpuRegister reg) {
     if (kPoisonHeapReferences) {
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 00bb5ca..36c966b 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -22,12 +22,16 @@
 
 #include "base/bit_utils.h"
 #include "base/stl_util.h"
+#include "jni_macro_assembler_x86_64.h"
 #include "utils/assembler_test.h"
+#include "utils/jni_macro_assembler_test.h"
 
 namespace art {
 
 TEST(AssemblerX86_64, CreateBuffer) {
-  AssemblerBuffer buffer;
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  AssemblerBuffer buffer(&arena);
   AssemblerBuffer::EnsureCapacity ensured(&buffer);
   buffer.Emit<uint8_t>(0x42);
   ASSERT_EQ(static_cast<size_t>(1), buffer.Size());
@@ -35,7 +39,7 @@
   ASSERT_EQ(static_cast<size_t>(5), buffer.Size());
 }
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 static constexpr size_t kRandomIterations = 1000;  // Devices might be puny, don't stress them...
 #else
 static constexpr size_t kRandomIterations = 100000;  // Hosts are pretty powerful.
@@ -1333,6 +1337,75 @@
   DriverStr(expected, "bsrq_address");
 }
 
+TEST_F(AssemblerX86_64Test, Popcntl) {
+  DriverStr(Repeatrr(&x86_64::X86_64Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl");
+}
+
+TEST_F(AssemblerX86_64Test, PopcntlAddress) {
+  GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+  const char* expected =
+    "popcntl 0xc(%RDI,%RBX,4), %R10d\n"
+    "popcntl 0xc(%R10,%RBX,4), %edi\n"
+    "popcntl 0xc(%RDI,%R9,4), %edi\n";
+
+  DriverStr(expected, "popcntl_address");
+}
+
+TEST_F(AssemblerX86_64Test, Popcntq) {
+  DriverStr(RepeatRR(&x86_64::X86_64Assembler::popcntq, "popcntq %{reg2}, %{reg1}"), "popcntq");
+}
+
+TEST_F(AssemblerX86_64Test, PopcntqAddress) {
+  GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+  const char* expected =
+    "popcntq 0xc(%RDI,%RBX,4), %R10\n"
+    "popcntq 0xc(%R10,%RBX,4), %RDI\n"
+    "popcntq 0xc(%RDI,%R9,4), %RDI\n";
+
+  DriverStr(expected, "popcntq_address");
+}
+
+TEST_F(AssemblerX86_64Test, CmovlAddress) {
+  GetAssembler()->cmov(x86_64::kEqual, x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), false);
+  GetAssembler()->cmov(x86_64::kNotEqual, x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), false);
+  GetAssembler()->cmov(x86_64::kEqual, x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), false);
+  const char* expected =
+    "cmovzl 0xc(%RDI,%RBX,4), %R10d\n"
+    "cmovnzl 0xc(%R10,%RBX,4), %edi\n"
+    "cmovzl 0xc(%RDI,%R9,4), %edi\n";
+
+  DriverStr(expected, "cmovl_address");
+}
+
+TEST_F(AssemblerX86_64Test, CmovqAddress) {
+  GetAssembler()->cmov(x86_64::kEqual, x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), true);
+  GetAssembler()->cmov(x86_64::kNotEqual, x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), true);
+  GetAssembler()->cmov(x86_64::kEqual, x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), true);
+  const char* expected =
+    "cmovzq 0xc(%RDI,%RBX,4), %R10\n"
+    "cmovnzq 0xc(%R10,%RBX,4), %rdi\n"
+    "cmovzq 0xc(%RDI,%R9,4), %rdi\n";
+
+  DriverStr(expected, "cmovq_address");
+}
+
+
 /////////////////
 // Near labels //
 /////////////////
@@ -1414,6 +1487,62 @@
   DriverFn(&setcc_test_fn, "setcc");
 }
 
+TEST_F(AssemblerX86_64Test, MovzxbRegs) {
+  DriverStr(Repeatrb(&x86_64::X86_64Assembler::movzxb, "movzbl %{reg2}, %{reg1}"), "movzxb");
+}
+
+TEST_F(AssemblerX86_64Test, MovsxbRegs) {
+  DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb");
+}
+
+TEST_F(AssemblerX86_64Test, Repnescasw) {
+  GetAssembler()->repne_scasw();
+  const char* expected = "repne scasw\n";
+  DriverStr(expected, "Repnescasw");
+}
+
+TEST_F(AssemblerX86_64Test, Repecmpsw) {
+  GetAssembler()->repe_cmpsw();
+  const char* expected = "repe cmpsw\n";
+  DriverStr(expected, "Repecmpsw");
+}
+
+TEST_F(AssemblerX86_64Test, Repecmpsl) {
+  GetAssembler()->repe_cmpsl();
+  const char* expected = "repe cmpsl\n";
+  DriverStr(expected, "Repecmpsl");
+}
+
+TEST_F(AssemblerX86_64Test, Repecmpsq) {
+  GetAssembler()->repe_cmpsq();
+  const char* expected = "repe cmpsq\n";
+  DriverStr(expected, "Repecmpsq");
+}
+
+TEST_F(AssemblerX86_64Test, Cmpb) {
+  GetAssembler()->cmpb(x86_64::Address(x86_64::CpuRegister(x86_64::RDI), 128),
+                       x86_64::Immediate(0));
+  const char* expected = "cmpb $0, 128(%RDI)\n";
+  DriverStr(expected, "cmpb");
+}
+
+class JNIMacroAssemblerX86_64Test : public JNIMacroAssemblerTest<x86_64::X86_64JNIMacroAssembler> {
+ public:
+  using Base = JNIMacroAssemblerTest<x86_64::X86_64JNIMacroAssembler>;
+
+ protected:
+  // Get the typically used name for this architecture, e.g., aarch64, x86-64, ...
+  std::string GetArchitectureString() OVERRIDE {
+    return "x86_64";
+  }
+
+  std::string GetDisassembleParameters() OVERRIDE {
+    return " -D -bbinary -mi386:x86-64 -Mx86-64,addr64,data32 --no-show-raw-insn";
+  }
+
+ private:
+};
+
 static x86_64::X86_64ManagedRegister ManagedFromCpu(x86_64::Register r) {
   return x86_64::X86_64ManagedRegister::FromCpuRegister(r);
 }
@@ -1422,14 +1551,16 @@
   return x86_64::X86_64ManagedRegister::FromXmmRegister(r);
 }
 
-std::string buildframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
-                               x86_64::X86_64Assembler* assembler) {
+std::string buildframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+                               x86_64::X86_64JNIMacroAssembler* assembler) {
   // TODO: more interesting spill registers / entry spills.
 
   // Two random spill regs.
-  std::vector<ManagedRegister> spill_regs;
-  spill_regs.push_back(ManagedFromCpu(x86_64::R10));
-  spill_regs.push_back(ManagedFromCpu(x86_64::RSI));
+  const ManagedRegister raw_spill_regs[] = {
+      ManagedFromCpu(x86_64::R10),
+      ManagedFromCpu(x86_64::RSI)
+  };
+  ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs);
 
   // Three random entry spills.
   ManagedRegisterEntrySpills entry_spills;
@@ -1463,18 +1594,20 @@
   return str.str();
 }
 
-TEST_F(AssemblerX86_64Test, BuildFrame) {
+TEST_F(JNIMacroAssemblerX86_64Test, BuildFrame) {
   DriverFn(&buildframe_test_fn, "BuildFrame");
 }
 
-std::string removeframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
-                                x86_64::X86_64Assembler* assembler) {
+std::string removeframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+                                x86_64::X86_64JNIMacroAssembler* assembler) {
   // TODO: more interesting spill registers / entry spills.
 
   // Two random spill regs.
-  std::vector<ManagedRegister> spill_regs;
-  spill_regs.push_back(ManagedFromCpu(x86_64::R10));
-  spill_regs.push_back(ManagedFromCpu(x86_64::RSI));
+  const ManagedRegister raw_spill_regs[] = {
+      ManagedFromCpu(x86_64::R10),
+      ManagedFromCpu(x86_64::RSI)
+  };
+  ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs);
 
   size_t frame_size = 10 * kStackAlignment;
   assembler->RemoveFrame(10 * kStackAlignment, spill_regs);
@@ -1492,12 +1625,13 @@
   return str.str();
 }
 
-TEST_F(AssemblerX86_64Test, RemoveFrame) {
+TEST_F(JNIMacroAssemblerX86_64Test, RemoveFrame) {
   DriverFn(&removeframe_test_fn, "RemoveFrame");
 }
 
-std::string increaseframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
-                                  x86_64::X86_64Assembler* assembler) {
+std::string increaseframe_test_fn(
+    JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+    x86_64::X86_64JNIMacroAssembler* assembler) {
   assembler->IncreaseFrameSize(0U);
   assembler->IncreaseFrameSize(kStackAlignment);
   assembler->IncreaseFrameSize(10 * kStackAlignment);
@@ -1511,12 +1645,13 @@
   return str.str();
 }
 
-TEST_F(AssemblerX86_64Test, IncreaseFrame) {
+TEST_F(JNIMacroAssemblerX86_64Test, IncreaseFrame) {
   DriverFn(&increaseframe_test_fn, "IncreaseFrame");
 }
 
-std::string decreaseframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
-                                  x86_64::X86_64Assembler* assembler) {
+std::string decreaseframe_test_fn(
+    JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+    x86_64::X86_64JNIMacroAssembler* assembler) {
   assembler->DecreaseFrameSize(0U);
   assembler->DecreaseFrameSize(kStackAlignment);
   assembler->DecreaseFrameSize(10 * kStackAlignment);
@@ -1530,40 +1665,8 @@
   return str.str();
 }
 
-TEST_F(AssemblerX86_64Test, DecreaseFrame) {
+TEST_F(JNIMacroAssemblerX86_64Test, DecreaseFrame) {
   DriverFn(&decreaseframe_test_fn, "DecreaseFrame");
 }
 
-TEST_F(AssemblerX86_64Test, MovzxbRegs) {
-  DriverStr(Repeatrb(&x86_64::X86_64Assembler::movzxb, "movzbl %{reg2}, %{reg1}"), "movzxb");
-}
-
-TEST_F(AssemblerX86_64Test, MovsxbRegs) {
-  DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb");
-}
-
-TEST_F(AssemblerX86_64Test, Repnescasw) {
-  GetAssembler()->repne_scasw();
-  const char* expected = "repne scasw\n";
-  DriverStr(expected, "Repnescasw");
-}
-
-TEST_F(AssemblerX86_64Test, Repecmpsw) {
-  GetAssembler()->repe_cmpsw();
-  const char* expected = "repe cmpsw\n";
-  DriverStr(expected, "Repecmpsw");
-}
-
-TEST_F(AssemblerX86_64Test, Repecmpsl) {
-  GetAssembler()->repe_cmpsl();
-  const char* expected = "repe cmpsl\n";
-  DriverStr(expected, "Repecmpsl");
-}
-
-TEST_F(AssemblerX86_64Test, Repecmpsq) {
-  GetAssembler()->repe_cmpsq();
-  const char* expected = "repe cmpsq\n";
-  DriverStr(expected, "Repecmpsq");
-}
-
 }  // namespace art
diff --git a/compiler/utils/x86_64/constants_x86_64.h b/compiler/utils/x86_64/constants_x86_64.h
index 0c782d4..cc508a1 100644
--- a/compiler/utils/x86_64/constants_x86_64.h
+++ b/compiler/utils/x86_64/constants_x86_64.h
@@ -29,15 +29,15 @@
 
 class CpuRegister {
  public:
-  explicit CpuRegister(Register r) : reg_(r) {}
-  explicit CpuRegister(int r) : reg_(Register(r)) {}
-  Register AsRegister() const {
+  explicit constexpr CpuRegister(Register r) : reg_(r) {}
+  explicit constexpr CpuRegister(int r) : reg_(Register(r)) {}
+  constexpr Register AsRegister() const {
     return reg_;
   }
-  uint8_t LowBits() const {
+  constexpr uint8_t LowBits() const {
     return reg_ & 7;
   }
-  bool NeedsRex() const {
+  constexpr bool NeedsRex() const {
     return reg_ > 7;
   }
  private:
@@ -47,15 +47,15 @@
 
 class XmmRegister {
  public:
-  explicit XmmRegister(FloatRegister r) : reg_(r) {}
-  explicit XmmRegister(int r) : reg_(FloatRegister(r)) {}
-  FloatRegister AsFloatRegister() const {
+  explicit constexpr XmmRegister(FloatRegister r) : reg_(r) {}
+  explicit constexpr XmmRegister(int r) : reg_(FloatRegister(r)) {}
+  constexpr FloatRegister AsFloatRegister() const {
     return reg_;
   }
-  uint8_t LowBits() const {
+  constexpr uint8_t LowBits() const {
     return reg_ & 7;
   }
-  bool NeedsRex() const {
+  constexpr bool NeedsRex() const {
     return reg_ > 7;
   }
  private:
@@ -106,6 +106,8 @@
   kNotZero      = kNotEqual,
   kNegative     = kSign,
   kPositive     = kNotSign,
+  kCarrySet     = kBelow,
+  kCarryClear   = kAboveEqual,
   kUnordered    = kParityEven
 };
 
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
new file mode 100644
index 0000000..3e687a7
--- /dev/null
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -0,0 +1,609 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler_x86_64.h"
+
+#include "base/casts.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "memory_region.h"
+#include "thread.h"
+
+namespace art {
+namespace x86_64 {
+
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::X86_64Core(static_cast<int>(reg));
+}
+static dwarf::Reg DWARFReg(FloatRegister reg) {
+  return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
+}
+
+constexpr size_t kFramePointerSize = 8;
+
+#define __ asm_.
+
+void X86_64JNIMacroAssembler::BuildFrame(size_t frame_size,
+                                         ManagedRegister method_reg,
+                                         ArrayRef<const ManagedRegister> spill_regs,
+                                         const ManagedRegisterEntrySpills& entry_spills) {
+  DCHECK_EQ(CodeSize(), 0U);  // Nothing emitted yet.
+  cfi().SetCurrentCFAOffset(8);  // Return address on stack.
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  int gpr_count = 0;
+  for (int i = spill_regs.size() - 1; i >= 0; --i) {
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
+    if (spill.IsCpuRegister()) {
+      __ pushq(spill.AsCpuRegister());
+      gpr_count++;
+      cfi().AdjustCFAOffset(kFramePointerSize);
+      cfi().RelOffset(DWARFReg(spill.AsCpuRegister().AsRegister()), 0);
+    }
+  }
+  // return address then method on stack.
+  int64_t rest_of_frame = static_cast<int64_t>(frame_size)
+                          - (gpr_count * kFramePointerSize)
+                          - kFramePointerSize /*return address*/;
+  __ subq(CpuRegister(RSP), Immediate(rest_of_frame));
+  cfi().AdjustCFAOffset(rest_of_frame);
+
+  // spill xmms
+  int64_t offset = rest_of_frame;
+  for (int i = spill_regs.size() - 1; i >= 0; --i) {
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
+    if (spill.IsXmmRegister()) {
+      offset -= sizeof(double);
+      __ movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister());
+      cfi().RelOffset(DWARFReg(spill.AsXmmRegister().AsFloatRegister()), offset);
+    }
+  }
+
+  static_assert(static_cast<size_t>(kX86_64PointerSize) == kFramePointerSize,
+                "Unexpected frame pointer size.");
+
+  __ movq(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
+
+  for (size_t i = 0; i < entry_spills.size(); ++i) {
+    ManagedRegisterSpill spill = entry_spills.at(i);
+    if (spill.AsX86_64().IsCpuRegister()) {
+      if (spill.getSize() == 8) {
+        __ movq(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
+                spill.AsX86_64().AsCpuRegister());
+      } else {
+        CHECK_EQ(spill.getSize(), 4);
+        __ movl(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
+                spill.AsX86_64().AsCpuRegister());
+      }
+    } else {
+      if (spill.getSize() == 8) {
+        __ movsd(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
+                 spill.AsX86_64().AsXmmRegister());
+      } else {
+        CHECK_EQ(spill.getSize(), 4);
+        __ movss(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
+                 spill.AsX86_64().AsXmmRegister());
+      }
+    }
+  }
+}
+
+void X86_64JNIMacroAssembler::RemoveFrame(size_t frame_size,
+                                          ArrayRef<const ManagedRegister> spill_regs) {
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi().RememberState();
+  int gpr_count = 0;
+  // unspill xmms
+  int64_t offset = static_cast<int64_t>(frame_size)
+      - (spill_regs.size() * kFramePointerSize)
+      - 2 * kFramePointerSize;
+  for (size_t i = 0; i < spill_regs.size(); ++i) {
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
+    if (spill.IsXmmRegister()) {
+      offset += sizeof(double);
+      __ movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset));
+      cfi().Restore(DWARFReg(spill.AsXmmRegister().AsFloatRegister()));
+    } else {
+      gpr_count++;
+    }
+  }
+  int adjust = static_cast<int>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize;
+  __ addq(CpuRegister(RSP), Immediate(adjust));
+  cfi().AdjustCFAOffset(-adjust);
+  for (size_t i = 0; i < spill_regs.size(); ++i) {
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
+    if (spill.IsCpuRegister()) {
+      __ popq(spill.AsCpuRegister());
+      cfi().AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
+      cfi().Restore(DWARFReg(spill.AsCpuRegister().AsRegister()));
+    }
+  }
+  __ ret();
+  // The CFI should be restored for any code that follows the exit block.
+  cfi().RestoreState();
+  cfi().DefCFAOffset(frame_size);
+}
+
+void X86_64JNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  __ addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust)));
+  cfi().AdjustCFAOffset(adjust);
+}
+
+static void DecreaseFrameSizeImpl(size_t adjust, X86_64Assembler* assembler) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  assembler->addq(CpuRegister(RSP), Immediate(adjust));
+  assembler->cfi().AdjustCFAOffset(-adjust);
+}
+
+void X86_64JNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
+  DecreaseFrameSizeImpl(adjust, &asm_);
+}
+
+void X86_64JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
+  X86_64ManagedRegister src = msrc.AsX86_64();
+  if (src.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (src.IsCpuRegister()) {
+    if (size == 4) {
+      CHECK_EQ(4u, size);
+      __ movl(Address(CpuRegister(RSP), offs), src.AsCpuRegister());
+    } else {
+      CHECK_EQ(8u, size);
+      __ movq(Address(CpuRegister(RSP), offs), src.AsCpuRegister());
+    }
+  } else if (src.IsRegisterPair()) {
+    CHECK_EQ(0u, size);
+    __ movq(Address(CpuRegister(RSP), offs), src.AsRegisterPairLow());
+    __ movq(Address(CpuRegister(RSP), FrameOffset(offs.Int32Value()+4)),
+            src.AsRegisterPairHigh());
+  } else if (src.IsX87Register()) {
+    if (size == 4) {
+      __ fstps(Address(CpuRegister(RSP), offs));
+    } else {
+      __ fstpl(Address(CpuRegister(RSP), offs));
+    }
+  } else {
+    CHECK(src.IsXmmRegister());
+    if (size == 4) {
+      __ movss(Address(CpuRegister(RSP), offs), src.AsXmmRegister());
+    } else {
+      __ movsd(Address(CpuRegister(RSP), offs), src.AsXmmRegister());
+    }
+  }
+}
+
+void X86_64JNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
+  X86_64ManagedRegister src = msrc.AsX86_64();
+  CHECK(src.IsCpuRegister());
+  __ movl(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
+}
+
+void X86_64JNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
+  X86_64ManagedRegister src = msrc.AsX86_64();
+  CHECK(src.IsCpuRegister());
+  __ movq(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
+}
+
+void X86_64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest,
+                                                    uint32_t imm,
+                                                    ManagedRegister) {
+  __ movl(Address(CpuRegister(RSP), dest), Immediate(imm));  // TODO(64) movq?
+}
+
+void X86_64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                                       FrameOffset fr_offs,
+                                                       ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), fr_offs));
+  __ gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
+}
+
+void X86_64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 thr_offs) {
+  __ gs()->movq(Address::Absolute(thr_offs, true), CpuRegister(RSP));
+}
+
+void X86_64JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/,
+                                            ManagedRegister /*src*/,
+                                            FrameOffset /*in_off*/,
+                                            ManagedRegister /*scratch*/) {
+  UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
+}
+
+void X86_64JNIMacroAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (dest.IsCpuRegister()) {
+    if (size == 4) {
+      CHECK_EQ(4u, size);
+      __ movl(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
+    } else {
+      CHECK_EQ(8u, size);
+      __ movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
+    }
+  } else if (dest.IsRegisterPair()) {
+    CHECK_EQ(0u, size);
+    __ movq(dest.AsRegisterPairLow(), Address(CpuRegister(RSP), src));
+    __ movq(dest.AsRegisterPairHigh(), Address(CpuRegister(RSP), FrameOffset(src.Int32Value()+4)));
+  } else if (dest.IsX87Register()) {
+    if (size == 4) {
+      __ flds(Address(CpuRegister(RSP), src));
+    } else {
+      __ fldl(Address(CpuRegister(RSP), src));
+    }
+  } else {
+    CHECK(dest.IsXmmRegister());
+    if (size == 4) {
+      __ movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), src));
+    } else {
+      __ movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), src));
+    }
+  }
+}
+
+void X86_64JNIMacroAssembler::LoadFromThread(ManagedRegister mdest,
+                                             ThreadOffset64 src, size_t size) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (dest.IsCpuRegister()) {
+    CHECK_EQ(4u, size);
+    __ gs()->movl(dest.AsCpuRegister(), Address::Absolute(src, true));
+  } else if (dest.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    __ gs()->movq(dest.AsRegisterPairLow(), Address::Absolute(src, true));
+  } else if (dest.IsX87Register()) {
+    if (size == 4) {
+      __ gs()->flds(Address::Absolute(src, true));
+    } else {
+      __ gs()->fldl(Address::Absolute(src, true));
+    }
+  } else {
+    CHECK(dest.IsXmmRegister());
+    if (size == 4) {
+      __ gs()->movss(dest.AsXmmRegister(), Address::Absolute(src, true));
+    } else {
+      __ gs()->movsd(dest.AsXmmRegister(), Address::Absolute(src, true));
+    }
+  }
+}
+
+void X86_64JNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  CHECK(dest.IsCpuRegister());
+  __ movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
+}
+
+void X86_64JNIMacroAssembler::LoadRef(ManagedRegister mdest,
+                                      ManagedRegister mbase,
+                                      MemberOffset offs,
+                                      bool unpoison_reference) {
+  X86_64ManagedRegister base = mbase.AsX86_64();
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  CHECK(base.IsCpuRegister());
+  CHECK(dest.IsCpuRegister());
+  __ movl(dest.AsCpuRegister(), Address(base.AsCpuRegister(), offs));
+  if (unpoison_reference) {
+    __ MaybeUnpoisonHeapReference(dest.AsCpuRegister());
+  }
+}
+
+void X86_64JNIMacroAssembler::LoadRawPtr(ManagedRegister mdest,
+                                         ManagedRegister mbase,
+                                         Offset offs) {
+  X86_64ManagedRegister base = mbase.AsX86_64();
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  CHECK(base.IsCpuRegister());
+  CHECK(dest.IsCpuRegister());
+  __ movq(dest.AsCpuRegister(), Address(base.AsCpuRegister(), offs));
+}
+
+void X86_64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  CHECK(dest.IsCpuRegister());
+  __ gs()->movq(dest.AsCpuRegister(), Address::Absolute(offs, true));
+}
+
+void X86_64JNIMacroAssembler::SignExtend(ManagedRegister mreg, size_t size) {
+  X86_64ManagedRegister reg = mreg.AsX86_64();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsCpuRegister()) << reg;
+  if (size == 1) {
+    __ movsxb(reg.AsCpuRegister(), reg.AsCpuRegister());
+  } else {
+    __ movsxw(reg.AsCpuRegister(), reg.AsCpuRegister());
+  }
+}
+
+void X86_64JNIMacroAssembler::ZeroExtend(ManagedRegister mreg, size_t size) {
+  X86_64ManagedRegister reg = mreg.AsX86_64();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsCpuRegister()) << reg;
+  if (size == 1) {
+    __ movzxb(reg.AsCpuRegister(), reg.AsCpuRegister());
+  } else {
+    __ movzxw(reg.AsCpuRegister(), reg.AsCpuRegister());
+  }
+}
+
+void X86_64JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  X86_64ManagedRegister src = msrc.AsX86_64();
+  if (!dest.Equals(src)) {
+    if (dest.IsCpuRegister() && src.IsCpuRegister()) {
+      __ movq(dest.AsCpuRegister(), src.AsCpuRegister());
+    } else if (src.IsX87Register() && dest.IsXmmRegister()) {
+      // Pass via stack and pop X87 register
+      __ subl(CpuRegister(RSP), Immediate(16));
+      if (size == 4) {
+        CHECK_EQ(src.AsX87Register(), ST0);
+        __ fstps(Address(CpuRegister(RSP), 0));
+        __ movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0));
+      } else {
+        CHECK_EQ(src.AsX87Register(), ST0);
+        __ fstpl(Address(CpuRegister(RSP), 0));
+        __ movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0));
+      }
+      __ addq(CpuRegister(RSP), Immediate(16));
+    } else {
+      // TODO: x87, SSE
+      UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src;
+    }
+  }
+}
+
+void X86_64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  __ movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), src));
+  __ movl(Address(CpuRegister(RSP), dest), scratch.AsCpuRegister());
+}
+
+void X86_64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                                   ThreadOffset64 thr_offs,
+                                                   ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  __ gs()->movq(scratch.AsCpuRegister(), Address::Absolute(thr_offs, true));
+  Store(fr_offs, scratch, 8);
+}
+
+void X86_64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 thr_offs,
+                                                 FrameOffset fr_offs,
+                                                 ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  Load(scratch, fr_offs, 8);
+  __ gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
+}
+
+void X86_64JNIMacroAssembler::Copy(FrameOffset dest,
+                                   FrameOffset src,
+                                   ManagedRegister mscratch,
+                                   size_t size) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  if (scratch.IsCpuRegister() && size == 8) {
+    Load(scratch, src, 4);
+    Store(dest, scratch, 4);
+    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
+    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
+  } else {
+    Load(scratch, src, size);
+    Store(dest, scratch, size);
+  }
+}
+
+void X86_64JNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                   ManagedRegister /*src_base*/,
+                                   Offset /*src_offset*/,
+                                   ManagedRegister /*scratch*/,
+                                   size_t /*size*/) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void X86_64JNIMacroAssembler::Copy(ManagedRegister dest_base,
+                                   Offset dest_offset,
+                                   FrameOffset src,
+                                   ManagedRegister scratch,
+                                   size_t size) {
+  CHECK(scratch.IsNoRegister());
+  CHECK_EQ(size, 4u);
+  __ pushq(Address(CpuRegister(RSP), src));
+  __ popq(Address(dest_base.AsX86_64().AsCpuRegister(), dest_offset));
+}
+
+void X86_64JNIMacroAssembler::Copy(FrameOffset dest,
+                                   FrameOffset src_base,
+                                   Offset src_offset,
+                                   ManagedRegister mscratch,
+                                   size_t size) {
+  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
+  CHECK_EQ(size, 4u);
+  __ movq(scratch, Address(CpuRegister(RSP), src_base));
+  __ movq(scratch, Address(scratch, src_offset));
+  __ movq(Address(CpuRegister(RSP), dest), scratch);
+}
+
+void X86_64JNIMacroAssembler::Copy(ManagedRegister dest,
+                                   Offset dest_offset,
+                                   ManagedRegister src,
+                                   Offset src_offset,
+                                   ManagedRegister scratch,
+                                   size_t size) {
+  CHECK_EQ(size, 4u);
+  CHECK(scratch.IsNoRegister());
+  __ pushq(Address(src.AsX86_64().AsCpuRegister(), src_offset));
+  __ popq(Address(dest.AsX86_64().AsCpuRegister(), dest_offset));
+}
+
+void X86_64JNIMacroAssembler::Copy(FrameOffset dest,
+                                   Offset dest_offset,
+                                   FrameOffset src,
+                                   Offset src_offset,
+                                   ManagedRegister mscratch,
+                                   size_t size) {
+  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
+  CHECK_EQ(size, 4u);
+  CHECK_EQ(dest.Int32Value(), src.Int32Value());
+  __ movq(scratch, Address(CpuRegister(RSP), src));
+  __ pushq(Address(scratch, src_offset));
+  __ popq(Address(scratch, dest_offset));
+}
+
+void X86_64JNIMacroAssembler::MemoryBarrier(ManagedRegister) {
+  __ mfence();
+}
+
+void X86_64JNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
+                                                     FrameOffset handle_scope_offset,
+                                                     ManagedRegister min_reg,
+                                                     bool null_allowed) {
+  X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
+  X86_64ManagedRegister in_reg = min_reg.AsX86_64();
+  if (in_reg.IsNoRegister()) {  // TODO(64): && null_allowed
+    // Use out_reg as indicator of null.
+    in_reg = out_reg;
+    // TODO: movzwl
+    __ movl(in_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+  }
+  CHECK(in_reg.IsCpuRegister());
+  CHECK(out_reg.IsCpuRegister());
+  VerifyObject(in_reg, null_allowed);
+  if (null_allowed) {
+    Label null_arg;
+    if (!out_reg.Equals(in_reg)) {
+      __ xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
+    }
+    __ testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
+    __ j(kZero, &null_arg);
+    __ leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+    __ Bind(&null_arg);
+  } else {
+    __ leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+  }
+}
+
+void X86_64JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                                     FrameOffset handle_scope_offset,
+                                                     ManagedRegister mscratch,
+                                                     bool null_allowed) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  if (null_allowed) {
+    Label null_arg;
+    __ movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+    __ testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
+    __ j(kZero, &null_arg);
+    __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+    __ Bind(&null_arg);
+  } else {
+    __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+  }
+  Store(out_off, scratch, 8);
+}
+
+// Given a handle scope entry, load the associated reference.
+void X86_64JNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
+                                                           ManagedRegister min_reg) {
+  X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
+  X86_64ManagedRegister in_reg = min_reg.AsX86_64();
+  CHECK(out_reg.IsCpuRegister());
+  CHECK(in_reg.IsCpuRegister());
+  Label null_arg;
+  if (!out_reg.Equals(in_reg)) {
+    __ xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
+  }
+  __ testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
+  __ j(kZero, &null_arg);
+  __ movq(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0));
+  __ Bind(&null_arg);
+}
+
+void X86_64JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references
+}
+
+void X86_64JNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references
+}
+
+void X86_64JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
+  X86_64ManagedRegister base = mbase.AsX86_64();
+  CHECK(base.IsCpuRegister());
+  __ call(Address(base.AsCpuRegister(), offset.Int32Value()));
+  // TODO: place reference map on call
+}
+
+void X86_64JNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
+  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
+  __ movq(scratch, Address(CpuRegister(RSP), base));
+  __ call(Address(scratch, offset));
+}
+
+void X86_64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset, ManagedRegister /*mscratch*/) {
+  __ gs()->call(Address::Absolute(offset, true));
+}
+
+void X86_64JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
+  __ gs()->movq(tr.AsX86_64().AsCpuRegister(),
+                Address::Absolute(Thread::SelfOffset<kX86_64PointerSize>(), true));
+}
+
+void X86_64JNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  __ gs()->movq(scratch.AsCpuRegister(),
+                Address::Absolute(Thread::SelfOffset<kX86_64PointerSize>(), true));
+  __ movq(Address(CpuRegister(RSP), offset), scratch.AsCpuRegister());
+}
+
+// Slowpath entered when Thread::Current()->_exception is non-null
+class X86_64ExceptionSlowPath FINAL : public SlowPath {
+ public:
+  explicit X86_64ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {}
+  virtual void Emit(Assembler *sp_asm) OVERRIDE;
+ private:
+  const size_t stack_adjust_;
+};
+
+void X86_64JNIMacroAssembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
+  X86_64ExceptionSlowPath* slow = new (__ GetArena()) X86_64ExceptionSlowPath(stack_adjust);
+  __ GetBuffer()->EnqueueSlowPath(slow);
+  __ gs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>(), true), Immediate(0));
+  __ j(kNotEqual, slow->Entry());
+}
+
+#undef __
+
+void X86_64ExceptionSlowPath::Emit(Assembler *sasm) {
+  X86_64Assembler* sp_asm = down_cast<X86_64Assembler*>(sasm);
+#define __ sp_asm->
+  __ Bind(&entry_);
+  // Note: the return value is dead
+  if (stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSizeImpl(stack_adjust_, sp_asm);
+  }
+  // Pass exception as argument in RDI
+  __ gs()->movq(CpuRegister(RDI),
+                Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>(), true));
+  __ gs()->call(
+      Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, pDeliverException), true));
+  // this call should never return
+  __ int3();
+#undef __
+}
+
+}  // namespace x86_64
+}  // namespace art
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
new file mode 100644
index 0000000..cc4e57c
--- /dev/null
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_X86_64_JNI_MACRO_ASSEMBLER_X86_64_H_
+#define ART_COMPILER_UTILS_X86_64_JNI_MACRO_ASSEMBLER_X86_64_H_
+
+#include <vector>
+
+#include "assembler_x86_64.h"
+#include "base/arena_containers.h"
+#include "base/enums.h"
+#include "base/macros.h"
+#include "offsets.h"
+#include "utils/array_ref.h"
+#include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
+
+namespace art {
+namespace x86_64 {
+
+class X86_64JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<X86_64Assembler,
+                                                                  PointerSize::k64> {
+ public:
+  explicit X86_64JNIMacroAssembler(ArenaAllocator* arena)
+      : JNIMacroAssemblerFwd<X86_64Assembler, PointerSize::k64>(arena) {}
+  virtual ~X86_64JNIMacroAssembler() {}
+
+  //
+  // Overridden common assembler high-level functionality
+  //
+
+  // Emit code that will create an activation on the stack
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
+                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
+
+  // Emit code that will remove an activation from the stack
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
+      OVERRIDE;
+
+  void IncreaseFrameSize(size_t adjust) OVERRIDE;
+  void DecreaseFrameSize(size_t adjust) OVERRIDE;
+
+  // Store routines
+  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
+  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
+  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
+
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE;
+
+  void StoreSpanning(FrameOffset dest,
+                     ManagedRegister src,
+                     FrameOffset in_off,
+                     ManagedRegister scratch) OVERRIDE;
+
+  // Load routines
+  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
+
+  void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest, FrameOffset  src) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest,
+               ManagedRegister base,
+               MemberOffset offs,
+               bool unpoison_reference) OVERRIDE;
+
+  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
+
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE;
+
+  // Copying routines
+  void Move(ManagedRegister dest, ManagedRegister src, size_t size);
+
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset64 thr_offs,
+                            ManagedRegister scratch) OVERRIDE;
+
+  void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+      OVERRIDE;
+
+  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest,
+            ManagedRegister src_base,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest_base,
+            Offset dest_offset,
+            FrameOffset src,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest,
+            FrameOffset src_base,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest,
+            Offset dest_offset,
+            ManagedRegister src,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest,
+            Offset dest_offset,
+            FrameOffset src,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void MemoryBarrier(ManagedRegister) OVERRIDE;
+
+  // Sign extension
+  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Zero extension
+  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Exploit fast access in managed code to Thread::Current()
+  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
+  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // null.
+  void CreateHandleScopeEntry(ManagedRegister out_reg,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister in_reg,
+                              bool null_allowed) OVERRIDE;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed.
+  void CreateHandleScopeEntry(FrameOffset out_off,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister scratch,
+                              bool null_allowed) OVERRIDE;
+
+  // src holds a handle scope entry (Object**) load this into dst
+  virtual void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
+  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
+
+  // Call to address held at [base+offset]
+  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(X86_64JNIMacroAssembler);
+};
+
+}  // namespace x86_64
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_X86_64_JNI_MACRO_ASSEMBLER_X86_64_H_
diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h
index 47bbb44..32af672 100644
--- a/compiler/utils/x86_64/managed_register_x86_64.h
+++ b/compiler/utils/x86_64/managed_register_x86_64.h
@@ -18,7 +18,7 @@
 #define ART_COMPILER_UTILS_X86_64_MANAGED_REGISTER_X86_64_H_
 
 #include "constants_x86_64.h"
-#include "dwarf/register.h"
+#include "debug/dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
@@ -88,52 +88,52 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86_64ManagedRegister : public ManagedRegister {
  public:
-  CpuRegister AsCpuRegister() const {
+  constexpr CpuRegister AsCpuRegister() const {
     CHECK(IsCpuRegister());
     return CpuRegister(static_cast<Register>(id_));
   }
 
-  XmmRegister AsXmmRegister() const {
+  constexpr XmmRegister AsXmmRegister() const {
     CHECK(IsXmmRegister());
     return XmmRegister(static_cast<FloatRegister>(id_ - kNumberOfCpuRegIds));
   }
 
-  X87Register AsX87Register() const {
+  constexpr X87Register AsX87Register() const {
     CHECK(IsX87Register());
     return static_cast<X87Register>(id_ -
                                     (kNumberOfCpuRegIds + kNumberOfXmmRegIds));
   }
 
-  CpuRegister AsRegisterPairLow() const {
+  constexpr CpuRegister AsRegisterPairLow() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdLow().
     return FromRegId(AllocIdLow()).AsCpuRegister();
   }
 
-  CpuRegister AsRegisterPairHigh() const {
+  constexpr CpuRegister AsRegisterPairHigh() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdHigh().
     return FromRegId(AllocIdHigh()).AsCpuRegister();
   }
 
-  bool IsCpuRegister() const {
+  constexpr bool IsCpuRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCpuRegIds);
   }
 
-  bool IsXmmRegister() const {
+  constexpr bool IsXmmRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfCpuRegIds;
     return (0 <= test) && (test < kNumberOfXmmRegIds);
   }
 
-  bool IsX87Register() const {
+  constexpr bool IsX87Register() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds);
     return (0 <= test) && (test < kNumberOfX87RegIds);
   }
 
-  bool IsRegisterPair() const {
+  constexpr bool IsRegisterPair() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ -
         (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds);
@@ -147,32 +147,32 @@
   // then false is returned.
   bool Overlaps(const X86_64ManagedRegister& other) const;
 
-  static X86_64ManagedRegister FromCpuRegister(Register r) {
+  static constexpr X86_64ManagedRegister FromCpuRegister(Register r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static X86_64ManagedRegister FromXmmRegister(FloatRegister r) {
+  static constexpr X86_64ManagedRegister FromXmmRegister(FloatRegister r) {
     return FromRegId(r + kNumberOfCpuRegIds);
   }
 
-  static X86_64ManagedRegister FromX87Register(X87Register r) {
+  static constexpr X86_64ManagedRegister FromX87Register(X87Register r) {
     CHECK_NE(r, kNoX87Register);
     return FromRegId(r + kNumberOfCpuRegIds + kNumberOfXmmRegIds);
   }
 
-  static X86_64ManagedRegister FromRegisterPair(RegisterPair r) {
+  static constexpr X86_64ManagedRegister FromRegisterPair(RegisterPair r) {
     CHECK_NE(r, kNoRegisterPair);
     return FromRegId(r + (kNumberOfCpuRegIds + kNumberOfXmmRegIds +
                           kNumberOfX87RegIds));
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -188,9 +188,9 @@
 
   friend class ManagedRegister;
 
-  explicit X86_64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr X86_64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static X86_64ManagedRegister FromRegId(int reg_id) {
+  static constexpr X86_64ManagedRegister FromRegId(int reg_id) {
     X86_64ManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -201,7 +201,7 @@
 
 }  // namespace x86_64
 
-inline x86_64::X86_64ManagedRegister ManagedRegister::AsX86_64() const {
+constexpr inline x86_64::X86_64ManagedRegister ManagedRegister::AsX86_64() const {
   x86_64::X86_64ManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/vector_output_stream.cc b/compiler/vector_output_stream.cc
deleted file mode 100644
index 3d33673..0000000
--- a/compiler/vector_output_stream.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "vector_output_stream.h"
-
-#include "base/logging.h"
-
-namespace art {
-
-VectorOutputStream::VectorOutputStream(const std::string& location, std::vector<uint8_t>* vector)
-  : OutputStream(location), offset_(vector->size()), vector_(vector) {}
-
-off_t VectorOutputStream::Seek(off_t offset, Whence whence) {
-  CHECK(whence == kSeekSet || whence == kSeekCurrent || whence == kSeekEnd) << whence;
-  off_t new_offset = 0;
-  switch (whence) {
-    case kSeekSet: {
-      new_offset = offset;
-      break;
-    }
-    case kSeekCurrent: {
-      new_offset = offset_ + offset;
-      break;
-    }
-    case kSeekEnd: {
-      new_offset = vector_->size() + offset;
-      break;
-    }
-  }
-  EnsureCapacity(new_offset);
-  offset_ = new_offset;
-  return offset_;
-}
-
-}  // namespace art
diff --git a/compiler/vector_output_stream.h b/compiler/vector_output_stream.h
deleted file mode 100644
index 3c5877c..0000000
--- a/compiler/vector_output_stream.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_VECTOR_OUTPUT_STREAM_H_
-#define ART_COMPILER_VECTOR_OUTPUT_STREAM_H_
-
-#include "output_stream.h"
-
-#include <string>
-#include <string.h>
-#include <vector>
-
-namespace art {
-
-class VectorOutputStream FINAL : public OutputStream {
- public:
-  VectorOutputStream(const std::string& location, std::vector<uint8_t>* vector);
-
-  virtual ~VectorOutputStream() {}
-
-  bool WriteFully(const void* buffer, size_t byte_count) {
-    if (static_cast<size_t>(offset_) == vector_->size()) {
-      const uint8_t* start = reinterpret_cast<const uint8_t*>(buffer);
-      vector_->insert(vector_->end(), &start[0], &start[byte_count]);
-      offset_ += byte_count;
-    } else {
-      off_t new_offset = offset_ + byte_count;
-      EnsureCapacity(new_offset);
-      memcpy(&(*vector_)[offset_], buffer, byte_count);
-      offset_ = new_offset;
-    }
-    return true;
-  }
-
-  off_t Seek(off_t offset, Whence whence);
-
- private:
-  void EnsureCapacity(off_t new_offset) {
-    if (new_offset > static_cast<off_t>(vector_->size())) {
-      vector_->resize(new_offset);
-    }
-  }
-
-  off_t offset_;
-  std::vector<uint8_t>* const vector_;
-
-  DISALLOW_COPY_AND_ASSIGN(VectorOutputStream);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_VECTOR_OUTPUT_STREAM_H_
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index e252765..37acef6 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -38,9 +38,10 @@
   dex2oat_target_arch := 32
 endif
 
-# We need to explcitly give the arch, as giving 'both' will make the
-# build-art-executable rule compile dex2oat for 64bits.
 ifeq ($(HOST_PREFER_32_BIT),true)
+  # We need to explicitly restrict the host arch to 32-bit only, as
+  # giving 'both' would make build-art-executable generate a build
+  # rule for a 64-bit dex2oat executable too.
   dex2oat_host_arch := 32
 else
   dex2oat_host_arch := both
@@ -54,21 +55,46 @@
   $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain,art/compiler,target,debug,$(dex2oat_target_arch)))
 endif
 
+# Note: the order is important because of static linking resolution.
+DEX2OAT_STATIC_DEPENDENCIES := \
+  libziparchive-host \
+  libnativehelper \
+  libnativebridge \
+  libnativeloader \
+  libsigchain_dummy \
+  liblog \
+  libz \
+  libbacktrace \
+  libLLVMObject \
+  libLLVMBitReader \
+  libLLVMMC \
+  libLLVMMCParser \
+  libLLVMCore \
+  libLLVMSupport \
+  libcutils \
+  libunwindbacktrace \
+  libutils \
+  libbase \
+  liblz4 \
+  liblzma
+
 # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
 ifeq ($(ART_BUILD_HOST_NDEBUG),true)
-  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler libsigchain libziparchive-host,art/compiler,host,ndebug,$(dex2oat_host_arch)))
+  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler libsigchain libziparchive-host liblz4,art/compiler,host,ndebug,$(dex2oat_host_arch)))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
-    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixl liblog libz \
-        libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase,art/compiler,host,ndebug,$(dex2oat_host_arch),static))
+    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart libvixl-arm64 $(DEX2OAT_STATIC_DEPENDENCIES),art/compiler,host,ndebug,$(dex2oat_host_arch),static))
   endif
 endif
 
 ifeq ($(ART_BUILD_HOST_DEBUG),true)
-  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain libziparchive-host,art/compiler,host,debug,$(dex2oat_host_arch)))
+  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain libziparchive-host liblz4,art/compiler,host,debug,$(dex2oat_host_arch)))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
-    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixld liblog libz \
-        libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase,art/compiler,host,debug,$(dex2oat_host_arch),static))
+    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd libvixld-arm64 $(DEX2OAT_STATIC_DEPENDENCIES),art/compiler,host,debug,$(dex2oat_host_arch),static))
   endif
 endif
 
+# Clear locals now they've served their purpose.
+dex2oat_target_arch :=
+dex2oat_host_arch :=
+
 endif
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 384b879..cfcfe1c 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -22,6 +22,7 @@
 
 #include <fstream>
 #include <iostream>
+#include <limits>
 #include <sstream>
 #include <string>
 #include <unordered_set>
@@ -32,14 +33,12 @@
 #include <sys/utsname.h>
 #endif
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
-#include <cutils/trace.h>
-
-#include "art_method-inl.h"
 #include "arch/instruction_set_features.h"
 #include "arch/mips/instruction_set_features_mips.h"
+#include "art_method-inl.h"
 #include "base/dumpable.h"
 #include "base/macros.h"
+#include "base/scoped_flock.h"
 #include "base/stl_util.h"
 #include "base/stringpiece.h"
 #include "base/time_utils.h"
@@ -48,36 +47,44 @@
 #include "class_linker.h"
 #include "compiler.h"
 #include "compiler_callbacks.h"
-#include "dex_file-inl.h"
-#include "dex/pass_manager.h"
-#include "dex/verification_results.h"
-#include "dex/quick_compiler_callbacks.h"
+#include "debug/elf_debug_writer.h"
+#include "debug/method_debug_info.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "dex/quick_compiler_callbacks.h"
+#include "dex/verification_results.h"
+#include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 #include "elf_file.h"
 #include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
 #include "image_writer.h"
 #include "interpreter/unstarted_runtime.h"
+#include "jit/offline_profiling_info.h"
 #include "leb128.h"
+#include "linker/multi_oat_relative_patcher.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "oat_file_assistant.h"
 #include "oat_writer.h"
 #include "os.h"
 #include "runtime.h"
+#include "runtime_options.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
 #include "utils.h"
-#include "vector_output_stream.h"
 #include "well_known_classes.h"
 #include "zip_archive.h"
 
 namespace art {
 
+static constexpr size_t kDefaultMinDexFilesForSwap = 2;
+static constexpr size_t kDefaultMinDexFileCumulativeSizeForSwap = 20 * MB;
+
 static int original_argc;
 static char** original_argv;
 
@@ -122,15 +129,21 @@
       continue;
     }
 
+    // The image format is dropped.
+    if (StartsWith(original_argv[i], "--image-format=")) {
+      continue;
+    }
+
     // This should leave any dex-file and oat-file options, describing what we compiled.
 
     // However, we prefer to drop this when we saw --zip-fd.
     if (saw_zip_fd) {
-      // Drop anything --zip-X, --dex-X, --oat-X, --swap-X.
+      // Drop anything --zip-X, --dex-X, --oat-X, --swap-X, or --app-image-X
       if (StartsWith(original_argv[i], "--zip-") ||
           StartsWith(original_argv[i], "--dex-") ||
           StartsWith(original_argv[i], "--oat-") ||
-          StartsWith(original_argv[i], "--swap-")) {
+          StartsWith(original_argv[i], "--swap-") ||
+          StartsWith(original_argv[i], "--app-image-")) {
         continue;
       }
     }
@@ -190,7 +203,7 @@
   UsageError("      corresponding to the file descriptor specified by --zip-fd.");
   UsageError("      Example: --zip-location=/system/app/Calculator.apk");
   UsageError("");
-  UsageError("  --oat-file=<file.oat>: specifies the oat output destination via a filename.");
+  UsageError("  --oat-file=<file.oat>: specifies an oat output destination via a filename.");
   UsageError("      Example: --oat-file=/system/framework/boot.oat");
   UsageError("");
   UsageError("  --oat-fd=<number>: specifies the oat output destination via a file descriptor.");
@@ -200,12 +213,17 @@
   UsageError("      to the file descriptor specified by --oat-fd.");
   UsageError("      Example: --oat-location=/data/dalvik-cache/system@app@Calculator.apk.oat");
   UsageError("");
-  UsageError("  --oat-symbols=<file.oat>: specifies the oat output destination with full symbols.");
+  UsageError("  --oat-symbols=<file.oat>: specifies an oat output destination with full symbols.");
   UsageError("      Example: --oat-symbols=/symbols/system/framework/boot.oat");
   UsageError("");
-  UsageError("  --image=<file.art>: specifies the output image filename.");
+  UsageError("  --image=<file.art>: specifies an output image filename.");
   UsageError("      Example: --image=/system/framework/boot.art");
   UsageError("");
+  UsageError("  --image-format=(uncompressed|lz4|lz4hc):");
+  UsageError("      Which format to store the image.");
+  UsageError("      Example: --image-format=lz4");
+  UsageError("      Default: uncompressed");
+  UsageError("");
   UsageError("  --image-classes=<classname-file>: specifies classes to include in an image.");
   UsageError("      Example: --image=frameworks/base/preloaded-classes");
   UsageError("");
@@ -213,7 +231,9 @@
   UsageError("      Example: --base=0x50000000");
   UsageError("");
   UsageError("  --boot-image=<file.art>: provide the image file for the boot class path.");
+  UsageError("      Do not include the arch as part of the name, it is added automatically.");
   UsageError("      Example: --boot-image=/system/framework/boot.art");
+  UsageError("               (specifies /system/framework/<arch>/boot.art as the image file)");
   UsageError("      Default: $ANDROID_ROOT/system/framework/boot.art");
   UsageError("");
   UsageError("  --android-root=<path>: used to locate libraries for portable linking.");
@@ -239,13 +259,19 @@
   UsageError("");
   UsageError("  --compiler-filter="
                 "(verify-none"
+                "|verify-at-runtime"
+                "|verify-profile"
                 "|interpret-only"
+                "|time"
+                "|space-profile"
                 "|space"
                 "|balanced"
+                "|speed-profile"
                 "|speed"
-                "|everything"
-                "|time):");
+                "|everything-profile"
+                "|everything):");
   UsageError("      select compiler filter.");
+  UsageError("      verify-profile requires a --profile(-fd) to also be passed in.");
   UsageError("      Example: --compiler-filter=everything");
   UsageError("      Default: speed");
   UsageError("");
@@ -271,8 +297,8 @@
   UsageError("");
   UsageError("  --num-dex-methods=<method-count>: threshold size for a small dex file for");
   UsageError("      compiler filter tuning. If the input has fewer than this many methods");
-  UsageError("      and the filter is not interpret-only or verify-none, overrides the");
-  UsageError("      filter to use speed");
+  UsageError("      and the filter is not interpret-only or verify-none or verify-at-runtime, ");
+  UsageError("      overrides the filter to use speed");
   UsageError("      Example: --num-dex-method=%d", CompilerOptions::kDefaultNumDexMethodsThreshold);
   UsageError("      Default: %d", CompilerOptions::kDefaultNumDexMethodsThreshold);
   UsageError("");
@@ -301,14 +327,18 @@
   UsageError("  -g");
   UsageError("  --generate-debug-info: Generate debug information for native debugging,");
   UsageError("      such as stack unwinding information, ELF symbols and DWARF sections.");
-  UsageError("      This generates all the available information. Unneeded parts can be");
-  UsageError("      stripped using standard command line tools such as strip or objcopy.");
-  UsageError("      (enabled by default in debug builds, disabled by default otherwise)");
-  UsageError("");
-  UsageError("  --debuggable: Produce debuggable code. Implies --generate-debug-info.");
+  UsageError("      If used without --debuggable, it will be best-effort only.");
+  UsageError("      This option does not affect the generated code. (disabled by default)");
   UsageError("");
   UsageError("  --no-generate-debug-info: Do not generate debug information for native debugging.");
   UsageError("");
+  UsageError("  --generate-mini-debug-info: Generate minimal amount of LZMA-compressed");
+  UsageError("      debug information necessary to print backtraces. (disabled by default)");
+  UsageError("");
+  UsageError("  --no-generate-mini-debug-info: Do not generate backtrace info.");
+  UsageError("");
+  UsageError("  --debuggable: Produce code debuggable with Java debugger.");
+  UsageError("");
   UsageError("  --runtime-arg <argument>: used to specify various arguments for the runtime,");
   UsageError("      such as initial heap size, maximum heap size, and verbose output.");
   UsageError("      Use a separate --runtime-arg switch for each argument.");
@@ -316,19 +346,8 @@
   UsageError("");
   UsageError("  --profile-file=<filename>: specify profiler output file to use for compilation.");
   UsageError("");
-  UsageError("  --print-pass-names: print a list of pass names");
-  UsageError("");
-  UsageError("  --disable-passes=<pass-names>:  disable one or more passes separated by comma.");
-  UsageError("      Example: --disable-passes=UseCount,BBOptimizations");
-  UsageError("");
-  UsageError("  --print-pass-options: print a list of passes that have configurable options along "
-             "with the setting.");
-  UsageError("      Will print default if no overridden setting exists.");
-  UsageError("");
-  UsageError("  --pass-options=Pass1Name:Pass1OptionName:Pass1Option#,"
-             "Pass2Name:Pass2OptionName:Pass2Option#");
-  UsageError("      Used to specify a pass specific option. The setting itself must be integer.");
-  UsageError("      Separator used between options is a comma.");
+  UsageError("  --profile-file-fd=<number>: same as --profile-file but accepts a file descriptor.");
+  UsageError("      Cannot be used together with --profile-file.");
   UsageError("");
   UsageError("  --swap-file=<file-name>:  specifies a file to use for swap.");
   UsageError("      Example: --swap-file=/data/tmp/swap.001");
@@ -336,6 +355,33 @@
   UsageError("  --swap-fd=<file-descriptor>:  specifies a file to use for swap (by descriptor).");
   UsageError("      Example: --swap-fd=10");
   UsageError("");
+  UsageError("  --swap-dex-size-threshold=<size>:  specifies the minimum total dex file size in");
+  UsageError("      bytes to allow the use of swap.");
+  UsageError("      Example: --swap-dex-size-threshold=1000000");
+  UsageError("      Default: %zu", kDefaultMinDexFileCumulativeSizeForSwap);
+  UsageError("");
+  UsageError("  --swap-dex-count-threshold=<count>:  specifies the minimum number of dex files to");
+  UsageError("      allow the use of swap.");
+  UsageError("      Example: --swap-dex-count-threshold=10");
+  UsageError("      Default: %zu", kDefaultMinDexFilesForSwap);
+  UsageError("");
+  UsageError("  --very-large-app-threshold=<size>:  specifies the minimum total dex file size in");
+  UsageError("      bytes to consider the input \"very large\" and punt on the compilation.");
+  UsageError("      Example: --very-large-app-threshold=100000000");
+  UsageError("");
+  UsageError("  --app-image-fd=<file-descriptor>: specify output file descriptor for app image.");
+  UsageError("      Example: --app-image-fd=10");
+  UsageError("");
+  UsageError("  --app-image-file=<file-name>: specify a file name for app image.");
+  UsageError("      Example: --app-image-file=/data/dalvik-cache/system@app@Calculator.apk.art");
+  UsageError("");
+  UsageError("  --multi-image: specify that separate oat and image files be generated for each "
+             "input dex file.");
+  UsageError("");
+  UsageError("  --force-determinism: force the compiler to emit a deterministic output.");
+  UsageError("      This option is incompatible with read barriers (e.g., if dex2oat has been");
+  UsageError("      built with the environment variable `ART_USE_READ_BARRIER` set to `true`).");
+  UsageError("");
   std::cerr << "See log for usage error information\n";
   exit(EXIT_FAILURE);
 }
@@ -445,63 +491,16 @@
   pthread_t pthread_;
 };
 
-static void ParseStringAfterChar(const std::string& s, char c, std::string* parsed_value) {
-  std::string::size_type colon = s.find(c);
-  if (colon == std::string::npos) {
-    Usage("Missing char %c in option %s\n", c, s.c_str());
-  }
-  // Add one to remove the char we were trimming until.
-  *parsed_value = s.substr(colon + 1);
-}
-
-static void ParseDouble(const std::string& option, char after_char, double min, double max,
-                        double* parsed_value) {
-  std::string substring;
-  ParseStringAfterChar(option, after_char, &substring);
-  bool sane_val = true;
-  double value;
-  if (false) {
-    // TODO: this doesn't seem to work on the emulator.  b/15114595
-    std::stringstream iss(substring);
-    iss >> value;
-    // Ensure that we have a value, there was no cruft after it and it satisfies a sensible range.
-    sane_val = iss.eof() && (value >= min) && (value <= max);
-  } else {
-    char* end = nullptr;
-    value = strtod(substring.c_str(), &end);
-    sane_val = *end == '\0' && value >= min && value <= max;
-  }
-  if (!sane_val) {
-    Usage("Invalid double value %s for option %s\n", substring.c_str(), option.c_str());
-  }
-  *parsed_value = value;
-}
-
-static constexpr size_t kMinDexFilesForSwap = 2;
-static constexpr size_t kMinDexFileCumulativeSizeForSwap = 20 * MB;
-
-static bool UseSwap(bool is_image, std::vector<const DexFile*>& dex_files) {
-  if (is_image) {
-    // Don't use swap, we know generation should succeed, and we don't want to slow it down.
-    return false;
-  }
-  if (dex_files.size() < kMinDexFilesForSwap) {
-    // If there are less dex files than the threshold, assume it's gonna be fine.
-    return false;
-  }
-  size_t dex_files_size = 0;
-  for (const auto* dex_file : dex_files) {
-    dex_files_size += dex_file->GetHeader().file_size_;
-  }
-  return dex_files_size >= kMinDexFileCumulativeSizeForSwap;
-}
-
 class Dex2Oat FINAL {
  public:
   explicit Dex2Oat(TimingLogger* timings) :
       compiler_kind_(Compiler::kOptimizing),
       instruction_set_(kRuntimeISA),
       // Take the default set of instruction features from the build.
+      image_file_location_oat_checksum_(0),
+      image_file_location_oat_data_begin_(0),
+      image_patch_delta_(0),
+      key_value_store_(nullptr),
       verification_results_(nullptr),
       method_inliner_map_(),
       runtime_(nullptr),
@@ -512,27 +511,37 @@
       image_base_(0U),
       image_classes_zip_filename_(nullptr),
       image_classes_filename_(nullptr),
+      image_storage_mode_(ImageHeader::kStorageModeUncompressed),
       compiled_classes_zip_filename_(nullptr),
       compiled_classes_filename_(nullptr),
       compiled_methods_zip_filename_(nullptr),
       compiled_methods_filename_(nullptr),
-      image_(false),
+      passes_to_run_filename_(nullptr),
+      app_image_(false),
+      boot_image_(false),
+      multi_image_(false),
       is_host_(false),
+      class_loader_(nullptr),
+      elf_writers_(),
+      oat_writers_(),
+      rodata_(),
+      image_writer_(nullptr),
       driver_(nullptr),
+      opened_dex_files_maps_(),
+      opened_dex_files_(),
+      no_inline_from_dex_files_(),
       dump_stats_(false),
       dump_passes_(false),
       dump_timing_(false),
       dump_slow_timing_(kIsDebugBuild),
-      dump_cfg_append_(false),
-      swap_fd_(-1),
-      timings_(timings) {}
+      swap_fd_(kInvalidFd),
+      app_image_fd_(kInvalidFd),
+      profile_file_fd_(kInvalidFd),
+      timings_(timings),
+      force_determinism_(false)
+      {}
 
   ~Dex2Oat() {
-    // Free opened dex files before deleting the runtime_, because ~DexFile
-    // uses MemMap, which is shut down by ~Runtime.
-    class_path_files_.clear();
-    opened_dex_files_.clear();
-
     // Log completion time before deleting the runtime_, because this accesses
     // the runtime.
     LogCompletionTime();
@@ -545,7 +554,12 @@
       for (std::unique_ptr<const DexFile>& dex_file : opened_dex_files_) {
         dex_file.release();
       }
-      oat_file_.release();
+      for (std::unique_ptr<MemMap>& map : opened_dex_files_maps_) {
+        map.release();
+      }
+      for (std::unique_ptr<File>& oat_file : oat_files_) {
+        oat_file.release();
+      }
       runtime_.release();
       verification_results_.release();
       key_value_store_.release();
@@ -553,68 +567,31 @@
   }
 
   struct ParserOptions {
-    std::string oat_symbols;
+    std::vector<const char*> oat_symbols;
     std::string boot_image_filename;
-    const char* compiler_filter_string = nullptr;
-    CompilerOptions::CompilerFilter compiler_filter = CompilerOptions::kDefaultCompilerFilter;
-    bool compile_pic = false;
-    int huge_method_threshold = CompilerOptions::kDefaultHugeMethodThreshold;
-    int large_method_threshold = CompilerOptions::kDefaultLargeMethodThreshold;
-    int small_method_threshold = CompilerOptions::kDefaultSmallMethodThreshold;
-    int tiny_method_threshold = CompilerOptions::kDefaultTinyMethodThreshold;
-    int num_dex_methods_threshold = CompilerOptions::kDefaultNumDexMethodsThreshold;
-    static constexpr int kUnsetInlineDepthLimit = -1;
-    int inline_depth_limit = kUnsetInlineDepthLimit;
-    static constexpr int kUnsetInlineMaxCodeUnits = -1;
-    int inline_max_code_units = kUnsetInlineMaxCodeUnits;
-
-    // Profile file to use
-    double top_k_profile_threshold = CompilerOptions::kDefaultTopKProfileThreshold;
-
-    bool debuggable = false;
-    bool include_patch_information = CompilerOptions::kDefaultIncludePatchInformation;
-    bool generate_debug_info = kIsDebugBuild;
     bool watch_dog_enabled = true;
-    bool abort_on_hard_verifier_error = false;
     bool requested_specific_compiler = false;
-
-    bool implicit_null_checks = false;
-    bool implicit_so_checks = false;
-    bool implicit_suspend_checks = false;
-
-    PassManagerOptions pass_manager_options;
-
     std::string error_msg;
   };
 
-  template <typename T>
-  static void ParseUintOption(const StringPiece& option,
-                              const std::string& option_name,
-                              T* out,
-                              bool is_long_option = true) {
-    std::string option_prefix = option_name + (is_long_option ? "=" : "");
-    DCHECK(option.starts_with(option_prefix));
-    const char* value_string = option.substr(option_prefix.size()).data();
-    int64_t parsed_integer_value;
-    if (!ParseInt(value_string, &parsed_integer_value)) {
-      Usage("Failed to parse %s '%s' as an integer", option_name.c_str(), value_string);
-    }
-    if (parsed_integer_value < 0) {
-      Usage("%s passed a negative value %d", option_name.c_str(), parsed_integer_value);
-    }
-    *out = dchecked_integral_cast<T>(parsed_integer_value);
-  }
-
   void ParseZipFd(const StringPiece& option) {
-    ParseUintOption(option, "--zip-fd", &zip_fd_);
+    ParseUintOption(option, "--zip-fd", &zip_fd_, Usage);
   }
 
   void ParseOatFd(const StringPiece& option) {
-    ParseUintOption(option, "--oat-fd", &oat_fd_);
+    ParseUintOption(option, "--oat-fd", &oat_fd_, Usage);
+  }
+
+  void ParseFdForCollection(const StringPiece& option,
+                            const char* arg_name,
+                            std::vector<uint32_t>* fds) {
+    uint32_t fd;
+    ParseUintOption(option, arg_name, &fd, Usage);
+    fds->push_back(fd);
   }
 
   void ParseJ(const StringPiece& option) {
-    ParseUintOption(option, "-j", &thread_count_, /* is_long_option */ false);
+    ParseUintOption(option, "-j", &thread_count_, Usage, /* is_long_option */ false);
   }
 
   void ParseBase(const StringPiece& option) {
@@ -685,87 +662,34 @@
     }
   }
 
-  void ParseHugeMethodMax(const StringPiece& option, ParserOptions* parser_options) {
-    ParseUintOption(option, "--huge-method-max", &parser_options->huge_method_threshold);
-  }
-
-  void ParseLargeMethodMax(const StringPiece& option, ParserOptions* parser_options) {
-    ParseUintOption(option, "--large-method-max", &parser_options->large_method_threshold);
-  }
-
-  void ParseSmallMethodMax(const StringPiece& option, ParserOptions* parser_options) {
-    ParseUintOption(option, "--small-method-max", &parser_options->small_method_threshold);
-  }
-
-  void ParseTinyMethodMax(const StringPiece& option, ParserOptions* parser_options) {
-    ParseUintOption(option, "--tiny-method-max", &parser_options->tiny_method_threshold);
-  }
-
-  void ParseNumDexMethods(const StringPiece& option, ParserOptions* parser_options) {
-    ParseUintOption(option, "--num-dex-methods", &parser_options->num_dex_methods_threshold);
-  }
-
-  void ParseInlineDepthLimit(const StringPiece& option, ParserOptions* parser_options) {
-    ParseUintOption(option, "--inline-depth-limit", &parser_options->inline_depth_limit);
-  }
-
-  void ParseInlineMaxCodeUnits(const StringPiece& option, ParserOptions* parser_options) {
-    ParseUintOption(option, "--inline-max-code-units=", &parser_options->inline_max_code_units);
-  }
-
-  void ParseDisablePasses(const StringPiece& option, ParserOptions* parser_options) {
-    DCHECK(option.starts_with("--disable-passes="));
-    const std::string disable_passes = option.substr(strlen("--disable-passes=")).data();
-    parser_options->pass_manager_options.SetDisablePassList(disable_passes);
-  }
-
-  void ParsePrintPasses(const StringPiece& option, ParserOptions* parser_options) {
-    DCHECK(option.starts_with("--print-passes="));
-    const std::string print_passes = option.substr(strlen("--print-passes=")).data();
-    parser_options->pass_manager_options.SetPrintPassList(print_passes);
-  }
-
-  void ParseDumpCfgPasses(const StringPiece& option, ParserOptions* parser_options) {
-    DCHECK(option.starts_with("--dump-cfg-passes="));
-    const std::string dump_passes_string = option.substr(strlen("--dump-cfg-passes=")).data();
-    parser_options->pass_manager_options.SetDumpPassList(dump_passes_string);
-  }
-
-  void ParsePassOptions(const StringPiece& option, ParserOptions* parser_options) {
-    DCHECK(option.starts_with("--pass-options="));
-    const std::string pass_options = option.substr(strlen("--pass-options=")).data();
-    parser_options->pass_manager_options.SetOverriddenPassOptions(pass_options);
-  }
-
-  void ParseDumpInitFailures(const StringPiece& option) {
-    DCHECK(option.starts_with("--dump-init-failures="));
-    std::string file_name = option.substr(strlen("--dump-init-failures=")).data();
-    init_failure_output_.reset(new std::ofstream(file_name));
-    if (init_failure_output_.get() == nullptr) {
-      LOG(ERROR) << "Failed to allocate ofstream";
-    } else if (init_failure_output_->fail()) {
-      LOG(ERROR) << "Failed to open " << file_name << " for writing the initialization "
-                 << "failures.";
-      init_failure_output_.reset();
+  void ParseImageFormat(const StringPiece& option) {
+    const StringPiece substr("--image-format=");
+    DCHECK(option.starts_with(substr));
+    const StringPiece format_str = option.substr(substr.length());
+    if (format_str == "lz4") {
+      image_storage_mode_ = ImageHeader::kStorageModeLZ4;
+    } else if (format_str == "lz4hc") {
+      image_storage_mode_ = ImageHeader::kStorageModeLZ4HC;
+    } else if (format_str == "uncompressed") {
+      image_storage_mode_ = ImageHeader::kStorageModeUncompressed;
+    } else {
+      Usage("Unknown image format: %s", format_str.data());
     }
   }
 
-  void ParseSwapFd(const StringPiece& option) {
-    ParseUintOption(option, "--swap-fd", &swap_fd_);
-  }
-
   void ProcessOptions(ParserOptions* parser_options) {
-    image_ = (!image_filename_.empty());
-    if (image_) {
-      // We need the boot image to always be debuggable.
-      parser_options->debuggable = true;
+    boot_image_ = !image_filenames_.empty();
+    app_image_ = app_image_fd_ != -1 || !app_image_file_name_.empty();
+
+    if (IsAppImage() && IsBootImage()) {
+      Usage("Can't have both --image and (--app-image-fd or --app-image-file)");
     }
 
-    if (oat_filename_.empty() && oat_fd_ == -1) {
+    if (oat_filenames_.empty() && oat_fd_ == -1) {
       Usage("Output must be supplied with either --oat-file or --oat-fd");
     }
 
-    if (!oat_filename_.empty() && oat_fd_ != -1) {
+    if (!oat_filenames_.empty() && oat_fd_ != -1) {
       Usage("--oat-file should not be used with --oat-fd");
     }
 
@@ -777,10 +701,19 @@
       Usage("--oat-symbols should not be used with --host");
     }
 
-    if (oat_fd_ != -1 && !image_filename_.empty()) {
+    if (oat_fd_ != -1 && !image_filenames_.empty()) {
       Usage("--oat-fd should not be used with --image");
     }
 
+    if (!parser_options->oat_symbols.empty() &&
+        parser_options->oat_symbols.size() != oat_filenames_.size()) {
+      Usage("--oat-file arguments do not match --oat-symbols arguments");
+    }
+
+    if (!image_filenames_.empty() && image_filenames_.size() != oat_filenames_.size()) {
+      Usage("--oat-file arguments do not match --image arguments");
+    }
+
     if (android_root_.empty()) {
       const char* android_root_env_var = getenv("ANDROID_ROOT");
       if (android_root_env_var == nullptr) {
@@ -789,20 +722,19 @@
       android_root_ += android_root_env_var;
     }
 
-    if (!image_ && parser_options->boot_image_filename.empty()) {
+    if (!boot_image_ && parser_options->boot_image_filename.empty()) {
       parser_options->boot_image_filename += android_root_;
       parser_options->boot_image_filename += "/framework/boot.art";
     }
     if (!parser_options->boot_image_filename.empty()) {
-      boot_image_option_ += "-Ximage:";
-      boot_image_option_ += parser_options->boot_image_filename;
+      boot_image_filename_ = parser_options->boot_image_filename;
     }
 
-    if (image_classes_filename_ != nullptr && !image_) {
+    if (image_classes_filename_ != nullptr && !IsBootImage()) {
       Usage("--image-classes should only be used with --image");
     }
 
-    if (image_classes_filename_ != nullptr && !boot_image_option_.empty()) {
+    if (image_classes_filename_ != nullptr && !boot_image_filename_.empty()) {
       Usage("--image-classes should not be used with --boot-image");
     }
 
@@ -810,11 +742,11 @@
       Usage("--image-classes-zip should be used with --image-classes");
     }
 
-    if (compiled_classes_filename_ != nullptr && !image_) {
+    if (compiled_classes_filename_ != nullptr && !IsBootImage()) {
       Usage("--compiled-classes should only be used with --image");
     }
 
-    if (compiled_classes_filename_ != nullptr && !boot_image_option_.empty()) {
+    if (compiled_classes_filename_ != nullptr && !boot_image_filename_.empty()) {
       Usage("--compiled-classes should not be used with --boot-image");
     }
 
@@ -842,21 +774,30 @@
       Usage("--dex-location arguments do not match --dex-file arguments");
     }
 
+    if (!dex_filenames_.empty() && !oat_filenames_.empty()) {
+      if (oat_filenames_.size() != 1 && oat_filenames_.size() != dex_filenames_.size()) {
+        Usage("--oat-file arguments must be singular or match --dex-file arguments");
+      }
+    }
+
     if (zip_fd_ != -1 && zip_location_.empty()) {
       Usage("--zip-location should be supplied with --zip-fd");
     }
 
-    if (boot_image_option_.empty()) {
+    if (boot_image_filename_.empty()) {
       if (image_base_ == 0) {
         Usage("Non-zero --base not specified");
       }
     }
 
-    oat_stripped_ = oat_filename_;
+    const bool have_profile_file = !profile_file_.empty();
+    const bool have_profile_fd = profile_file_fd_ != kInvalidFd;
+    if (have_profile_file && have_profile_fd) {
+      Usage("Profile file should not be specified with both --profile-file-fd and --profile-file");
+    }
+
     if (!parser_options->oat_symbols.empty()) {
-      oat_unstripped_ = parser_options->oat_symbols;
-    } else {
-      oat_unstripped_ = oat_filename_;
+      oat_unstripped_ = std::move(parser_options->oat_symbols);
     }
 
     // If no instruction set feature was given, use the default one for the target
@@ -882,44 +823,19 @@
       }
     }
 
-    if (parser_options->compiler_filter_string == nullptr) {
-      parser_options->compiler_filter_string = "speed";
-    }
-
-    CHECK(parser_options->compiler_filter_string != nullptr);
-    if (strcmp(parser_options->compiler_filter_string, "verify-none") == 0) {
-      parser_options->compiler_filter = CompilerOptions::kVerifyNone;
-    } else if (strcmp(parser_options->compiler_filter_string, "interpret-only") == 0) {
-      parser_options->compiler_filter = CompilerOptions::kInterpretOnly;
-    } else if (strcmp(parser_options->compiler_filter_string, "verify-at-runtime") == 0) {
-      parser_options->compiler_filter = CompilerOptions::kVerifyAtRuntime;
-    } else if (strcmp(parser_options->compiler_filter_string, "space") == 0) {
-      parser_options->compiler_filter = CompilerOptions::kSpace;
-    } else if (strcmp(parser_options->compiler_filter_string, "balanced") == 0) {
-      parser_options->compiler_filter = CompilerOptions::kBalanced;
-    } else if (strcmp(parser_options->compiler_filter_string, "speed") == 0) {
-      parser_options->compiler_filter = CompilerOptions::kSpeed;
-    } else if (strcmp(parser_options->compiler_filter_string, "everything") == 0) {
-      parser_options->compiler_filter = CompilerOptions::kEverything;
-    } else if (strcmp(parser_options->compiler_filter_string, "time") == 0) {
-      parser_options->compiler_filter = CompilerOptions::kTime;
-    } else {
-      Usage("Unknown --compiler-filter value %s", parser_options->compiler_filter_string);
-    }
-
     // It they are not set, use default values for inlining settings.
     // TODO: We should rethink the compiler filter. We mostly save
     // time here, which is orthogonal to space.
-    if (parser_options->inline_depth_limit == ParserOptions::kUnsetInlineDepthLimit) {
-      parser_options->inline_depth_limit =
-          (parser_options->compiler_filter == CompilerOptions::kSpace)
+    if (compiler_options_->inline_depth_limit_ == CompilerOptions::kUnsetInlineDepthLimit) {
+      compiler_options_->inline_depth_limit_ =
+          (compiler_options_->compiler_filter_ == CompilerFilter::kSpace)
           // Implementation of the space filter: limit inlining depth.
           ? CompilerOptions::kSpaceFilterInlineDepthLimit
           : CompilerOptions::kDefaultInlineDepthLimit;
     }
-    if (parser_options->inline_max_code_units == ParserOptions::kUnsetInlineMaxCodeUnits) {
-      parser_options->inline_max_code_units =
-          (parser_options->compiler_filter == CompilerOptions::kSpace)
+    if (compiler_options_->inline_max_code_units_ == CompilerOptions::kUnsetInlineMaxCodeUnits) {
+      compiler_options_->inline_max_code_units_ =
+          (compiler_options_->compiler_filter_ == CompilerFilter::kSpace)
           // Implementation of the space filter: limit inlining max code units.
           ? CompilerOptions::kSpaceFilterInlineMaxCodeUnits
           : CompilerOptions::kDefaultInlineMaxCodeUnits;
@@ -935,8 +851,8 @@
       case kX86_64:
       case kMips:
       case kMips64:
-        parser_options->implicit_null_checks = true;
-        parser_options->implicit_so_checks = true;
+        compiler_options_->implicit_null_checks_ = true;
+        compiler_options_->implicit_so_checks_ = true;
         break;
 
       default:
@@ -944,29 +860,21 @@
         break;
     }
 
-    compiler_options_.reset(new CompilerOptions(parser_options->compiler_filter,
-                                                parser_options->huge_method_threshold,
-                                                parser_options->large_method_threshold,
-                                                parser_options->small_method_threshold,
-                                                parser_options->tiny_method_threshold,
-                                                parser_options->num_dex_methods_threshold,
-                                                parser_options->inline_depth_limit,
-                                                parser_options->inline_max_code_units,
-                                                parser_options->include_patch_information,
-                                                parser_options->top_k_profile_threshold,
-                                                parser_options->debuggable,
-                                                parser_options->generate_debug_info,
-                                                parser_options->implicit_null_checks,
-                                                parser_options->implicit_so_checks,
-                                                parser_options->implicit_suspend_checks,
-                                                parser_options->compile_pic,
-                                                verbose_methods_.empty() ?
-                                                    nullptr :
-                                                    &verbose_methods_,
-                                                new PassManagerOptions(
-                                                    parser_options->pass_manager_options),
-                                                init_failure_output_.get(),
-                                                parser_options->abort_on_hard_verifier_error));
+    compiler_options_->verbose_methods_ = verbose_methods_.empty() ? nullptr : &verbose_methods_;
+
+    if (!IsBootImage() && multi_image_) {
+      Usage("--multi-image can only be used when creating boot images");
+    }
+    if (IsBootImage() && multi_image_ && image_filenames_.size() > 1) {
+      Usage("--multi-image cannot be used with multiple image names");
+    }
+
+    // For now, if we're on the host and compile the boot image, *always* use multiple image files.
+    if (!kIsTargetBuild && IsBootImage()) {
+      if (image_filenames_.size() == 1) {
+        multi_image_ = true;
+      }
+    }
 
     // Done with usage checks, enable watchdog if requested
     if (parser_options->watch_dog_enabled) {
@@ -975,9 +883,136 @@
 
     // Fill some values into the key-value store for the oat header.
     key_value_store_.reset(new SafeMap<std::string, std::string>());
+
+    // Automatically force determinism for the boot image in a host build if the default GC is CMS
+    // or MS and read barriers are not enabled, as the former switches the GC to a non-concurrent
+    // one by passing the option `-Xgc:nonconcurrent` (see below).
+    if (!kIsTargetBuild && IsBootImage()) {
+      if (SupportsDeterministicCompilation()) {
+        force_determinism_ = true;
+      } else {
+        LOG(WARNING) << "Deterministic compilation is disabled.";
+      }
+    }
+    compiler_options_->force_determinism_ = force_determinism_;
+
+    if (passes_to_run_filename_ != nullptr) {
+      passes_to_run_.reset(ReadCommentedInputFromFile<std::vector<std::string>>(
+          passes_to_run_filename_,
+          nullptr));         // No post-processing.
+      if (passes_to_run_.get() == nullptr) {
+        Usage("Failed to read list of passes to run.");
+      }
+    }
+    compiler_options_->passes_to_run_ = passes_to_run_.get();
   }
 
-  void InsertCompileOptions(int argc, char** argv, ParserOptions* parser_options) {
+  static bool SupportsDeterministicCompilation() {
+    return (gc::kCollectorTypeDefault == gc::kCollectorTypeCMS ||
+            gc::kCollectorTypeDefault == gc::kCollectorTypeMS) &&
+        !kEmitCompilerReadBarrier;
+  }
+
+  void ExpandOatAndImageFilenames() {
+    std::string base_oat = oat_filenames_[0];
+    size_t last_oat_slash = base_oat.rfind('/');
+    if (last_oat_slash == std::string::npos) {
+      Usage("--multi-image used with unusable oat filename %s", base_oat.c_str());
+    }
+    // We also need to honor path components that were encoded through '@'. Otherwise the loading
+    // code won't be able to find the images.
+    if (base_oat.find('@', last_oat_slash) != std::string::npos) {
+      last_oat_slash = base_oat.rfind('@');
+    }
+    base_oat = base_oat.substr(0, last_oat_slash + 1);
+
+    std::string base_img = image_filenames_[0];
+    size_t last_img_slash = base_img.rfind('/');
+    if (last_img_slash == std::string::npos) {
+      Usage("--multi-image used with unusable image filename %s", base_img.c_str());
+    }
+    // We also need to honor path components that were encoded through '@'. Otherwise the loading
+    // code won't be able to find the images.
+    if (base_img.find('@', last_img_slash) != std::string::npos) {
+      last_img_slash = base_img.rfind('@');
+    }
+
+    // Get the prefix, which is the primary image name (without path components). Strip the
+    // extension.
+    std::string prefix = base_img.substr(last_img_slash + 1);
+    if (prefix.rfind('.') != std::string::npos) {
+      prefix = prefix.substr(0, prefix.rfind('.'));
+    }
+    if (!prefix.empty()) {
+      prefix = prefix + "-";
+    }
+
+    base_img = base_img.substr(0, last_img_slash + 1);
+
+    // Note: we have some special case here for our testing. We have to inject the differentiating
+    //       parts for the different core images.
+    std::string infix;  // Empty infix by default.
+    {
+      // Check the first name.
+      std::string dex_file = oat_filenames_[0];
+      size_t last_dex_slash = dex_file.rfind('/');
+      if (last_dex_slash != std::string::npos) {
+        dex_file = dex_file.substr(last_dex_slash + 1);
+      }
+      size_t last_dex_dot = dex_file.rfind('.');
+      if (last_dex_dot != std::string::npos) {
+        dex_file = dex_file.substr(0, last_dex_dot);
+      }
+      if (StartsWith(dex_file, "core-")) {
+        infix = dex_file.substr(strlen("core"));
+      }
+    }
+
+    // Now create the other names. Use a counted loop to skip the first one.
+    for (size_t i = 1; i < dex_locations_.size(); ++i) {
+      // TODO: Make everything properly std::string.
+      std::string image_name = CreateMultiImageName(dex_locations_[i], prefix, infix, ".art");
+      char_backing_storage_.push_back(base_img + image_name);
+      image_filenames_.push_back((char_backing_storage_.end() - 1)->c_str());
+
+      std::string oat_name = CreateMultiImageName(dex_locations_[i], prefix, infix, ".oat");
+      char_backing_storage_.push_back(base_oat + oat_name);
+      oat_filenames_.push_back((char_backing_storage_.end() - 1)->c_str());
+    }
+  }
+
+  // Modify the input string in the following way:
+  //   0) Assume input is /a/b/c.d
+  //   1) Strip the path  -> c.d
+  //   2) Inject prefix p -> pc.d
+  //   3) Inject infix i  -> pci.d
+  //   4) Replace suffix with s if it's "jar"  -> d == "jar" -> pci.s
+  static std::string CreateMultiImageName(std::string in,
+                                          const std::string& prefix,
+                                          const std::string& infix,
+                                          const char* replace_suffix) {
+    size_t last_dex_slash = in.rfind('/');
+    if (last_dex_slash != std::string::npos) {
+      in = in.substr(last_dex_slash + 1);
+    }
+    if (!prefix.empty()) {
+      in = prefix + in;
+    }
+    if (!infix.empty()) {
+      // Inject infix.
+      size_t last_dot = in.rfind('.');
+      if (last_dot != std::string::npos) {
+        in.insert(last_dot, infix);
+      }
+    }
+    if (EndsWith(in, ".jar")) {
+      in = in.substr(0, in.length() - strlen(".jar")) +
+          (replace_suffix != nullptr ? replace_suffix : "");
+    }
+    return in;
+  }
+
+  void InsertCompileOptions(int argc, char** argv) {
     std::ostringstream oss;
     for (int i = 0; i < argc; ++i) {
       if (i > 0) {
@@ -991,10 +1026,18 @@
     key_value_store_->Put(OatHeader::kDex2OatHostKey, oss.str());
     key_value_store_->Put(
         OatHeader::kPicKey,
-        parser_options->compile_pic ? OatHeader::kTrueValue : OatHeader::kFalseValue);
+        compiler_options_->compile_pic_ ? OatHeader::kTrueValue : OatHeader::kFalseValue);
     key_value_store_->Put(
         OatHeader::kDebuggableKey,
-        parser_options->debuggable ? OatHeader::kTrueValue : OatHeader::kFalseValue);
+        compiler_options_->debuggable_ ? OatHeader::kTrueValue : OatHeader::kFalseValue);
+    key_value_store_->Put(
+        OatHeader::kNativeDebuggableKey,
+        compiler_options_->GetNativeDebuggable() ? OatHeader::kTrueValue : OatHeader::kFalseValue);
+    key_value_store_->Put(OatHeader::kCompilerFilter,
+        CompilerFilter::NameOfFilter(compiler_options_->GetCompilerFilter()));
+    key_value_store_->Put(OatHeader::kHasPatchInfoKey,
+        compiler_options_->GetIncludePatchInformation() ? OatHeader::kTrueValue
+                                                        : OatHeader::kFalseValue);
   }
 
   // Parse the arguments from the command line. In case of an unrecognized option or impossible
@@ -1015,6 +1058,7 @@
     }
 
     std::unique_ptr<ParserOptions> parser_options(new ParserOptions());
+    compiler_options_.reset(new CompilerOptions());
 
     for (int i = 0; i < argc; i++) {
       const StringPiece option(argv[i]);
@@ -1031,9 +1075,9 @@
       } else if (option.starts_with("--zip-location=")) {
         zip_location_ = option.substr(strlen("--zip-location=")).data();
       } else if (option.starts_with("--oat-file=")) {
-        oat_filename_ = option.substr(strlen("--oat-file=")).data();
+        oat_filenames_.push_back(option.substr(strlen("--oat-file=")).data());
       } else if (option.starts_with("--oat-symbols=")) {
-        parser_options->oat_symbols = option.substr(strlen("--oat-symbols=")).data();
+        parser_options->oat_symbols.push_back(option.substr(strlen("--oat-symbols=")).data());
       } else if (option.starts_with("--oat-fd=")) {
         ParseOatFd(option);
       } else if (option == "--watch-dog") {
@@ -1045,11 +1089,13 @@
       } else if (option.starts_with("--oat-location=")) {
         oat_location_ = option.substr(strlen("--oat-location=")).data();
       } else if (option.starts_with("--image=")) {
-        image_filename_ = option.substr(strlen("--image=")).data();
+        image_filenames_.push_back(option.substr(strlen("--image=")).data());
       } else if (option.starts_with("--image-classes=")) {
         image_classes_filename_ = option.substr(strlen("--image-classes=")).data();
       } else if (option.starts_with("--image-classes-zip=")) {
         image_classes_zip_filename_ = option.substr(strlen("--image-classes-zip=")).data();
+      } else if (option.starts_with("--image-format=")) {
+        ParseImageFormat(option);
       } else if (option.starts_with("--compiled-classes=")) {
         compiled_classes_filename_ = option.substr(strlen("--compiled-classes=")).data();
       } else if (option.starts_with("--compiled-classes-zip=")) {
@@ -1058,6 +1104,8 @@
         compiled_methods_filename_ = option.substr(strlen("--compiled-methods=")).data();
       } else if (option.starts_with("--compiled-methods-zip=")) {
         compiled_methods_zip_filename_ = option.substr(strlen("--compiled-methods-zip=")).data();
+      } else if (option.starts_with("--run-passes=")) {
+        passes_to_run_filename_ = option.substr(strlen("--run-passes=")).data();
       } else if (option.starts_with("--base=")) {
         ParseBase(option);
       } else if (option.starts_with("--boot-image=")) {
@@ -1072,24 +1120,10 @@
         ParseInstructionSetFeatures(option, parser_options.get());
       } else if (option.starts_with("--compiler-backend=")) {
         ParseCompilerBackend(option, parser_options.get());
-      } else if (option.starts_with("--compiler-filter=")) {
-        parser_options->compiler_filter_string = option.substr(strlen("--compiler-filter=")).data();
-      } else if (option == "--compile-pic") {
-        parser_options->compile_pic = true;
-      } else if (option.starts_with("--huge-method-max=")) {
-        ParseHugeMethodMax(option, parser_options.get());
-      } else if (option.starts_with("--large-method-max=")) {
-        ParseLargeMethodMax(option, parser_options.get());
-      } else if (option.starts_with("--small-method-max=")) {
-        ParseSmallMethodMax(option, parser_options.get());
-      } else if (option.starts_with("--tiny-method-max=")) {
-        ParseTinyMethodMax(option, parser_options.get());
-      } else if (option.starts_with("--num-dex-methods=")) {
-        ParseNumDexMethods(option, parser_options.get());
-      } else if (option.starts_with("--inline-depth-limit=")) {
-        ParseInlineDepthLimit(option, parser_options.get());
-      } else if (option.starts_with("--inline-max-code-units=")) {
-        ParseInlineMaxCodeUnits(option, parser_options.get());
+      } else if (option.starts_with("--profile-file=")) {
+        profile_file_ = option.substr(strlen("--profile-file=")).ToString();
+      } else if (option.starts_with("--profile-file-fd=")) {
+        ParseUintOption(option, "--profile-file-fd", &profile_file_fd_, Usage);
       } else if (option == "--host") {
         is_host_ = true;
       } else if (option == "--runtime-arg") {
@@ -1104,58 +1138,46 @@
         dump_timing_ = true;
       } else if (option == "--dump-passes") {
         dump_passes_ = true;
-      } else if (option.starts_with("--dump-cfg=")) {
-        dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data();
-      } else if (option.starts_with("--dump-cfg-append")) {
-        dump_cfg_append_ = true;
       } else if (option == "--dump-stats") {
         dump_stats_ = true;
-      } else if (option == "--generate-debug-info" || option == "-g") {
-        parser_options->generate_debug_info = true;
-      } else if (option == "--no-generate-debug-info") {
-        parser_options->generate_debug_info = false;
-      } else if (option == "--debuggable") {
-        parser_options->debuggable = true;
-        parser_options->generate_debug_info = true;
-      } else if (option.starts_with("--profile-file=")) {
-        profile_file_ = option.substr(strlen("--profile-file=")).data();
-        VLOG(compiler) << "dex2oat: profile file is " << profile_file_;
-      } else if (option == "--no-profile-file") {
-        // No profile
-      } else if (option.starts_with("--top-k-profile-threshold=")) {
-        ParseDouble(option.data(), '=', 0.0, 100.0, &parser_options->top_k_profile_threshold);
-      } else if (option == "--print-pass-names") {
-        parser_options->pass_manager_options.SetPrintPassNames(true);
-      } else if (option.starts_with("--disable-passes=")) {
-        ParseDisablePasses(option, parser_options.get());
-      } else if (option.starts_with("--print-passes=")) {
-        ParsePrintPasses(option, parser_options.get());
-      } else if (option == "--print-all-passes") {
-        parser_options->pass_manager_options.SetPrintAllPasses();
-      } else if (option.starts_with("--dump-cfg-passes=")) {
-        ParseDumpCfgPasses(option, parser_options.get());
-      } else if (option == "--print-pass-options") {
-        parser_options->pass_manager_options.SetPrintPassOptions(true);
-      } else if (option.starts_with("--pass-options=")) {
-        ParsePassOptions(option, parser_options.get());
-      } else if (option == "--include-patch-information") {
-        parser_options->include_patch_information = true;
-      } else if (option == "--no-include-patch-information") {
-        parser_options->include_patch_information = false;
+      } else if (option.starts_with("--swap-file=")) {
+        swap_file_name_ = option.substr(strlen("--swap-file=")).data();
+      } else if (option.starts_with("--swap-fd=")) {
+        ParseUintOption(option, "--swap-fd", &swap_fd_, Usage);
+      } else if (option.starts_with("--swap-dex-size-threshold=")) {
+        ParseUintOption(option,
+                        "--swap-dex-size-threshold",
+                        &min_dex_file_cumulative_size_for_swap_,
+                        Usage);
+      } else if (option.starts_with("--swap-dex-count-threshold=")) {
+        ParseUintOption(option,
+                        "--swap-dex-count-threshold",
+                        &min_dex_files_for_swap_,
+                        Usage);
+      } else if (option.starts_with("--very-large-app-threshold=")) {
+        ParseUintOption(option,
+                        "--very-large-app-threshold",
+                        &very_large_threshold_,
+                        Usage);
+      } else if (option.starts_with("--app-image-file=")) {
+        app_image_file_name_ = option.substr(strlen("--app-image-file=")).data();
+      } else if (option.starts_with("--app-image-fd=")) {
+        ParseUintOption(option, "--app-image-fd", &app_image_fd_, Usage);
       } else if (option.starts_with("--verbose-methods=")) {
         // TODO: rather than switch off compiler logging, make all VLOG(compiler) messages
         //       conditional on having verbost methods.
         gLogVerbosity.compiler = false;
         Split(option.substr(strlen("--verbose-methods=")).ToString(), ',', &verbose_methods_);
-      } else if (option.starts_with("--dump-init-failures=")) {
-        ParseDumpInitFailures(option);
-      } else if (option.starts_with("--swap-file=")) {
-        swap_file_name_ = option.substr(strlen("--swap-file=")).data();
-      } else if (option.starts_with("--swap-fd=")) {
-        ParseSwapFd(option);
-      } else if (option == "--abort-on-hard-verifier-error") {
-        parser_options->abort_on_hard_verifier_error = true;
-      } else {
+      } else if (option == "--multi-image") {
+        multi_image_ = true;
+      } else if (option.starts_with("--no-inline-from=")) {
+        no_inline_from_string_ = option.substr(strlen("--no-inline-from=")).data();
+      } else if (option == "--force-determinism") {
+        if (!SupportsDeterministicCompilation()) {
+          Usage("Cannot use --force-determinism with read barriers or non-CMS garbage collector");
+        }
+        force_determinism_ = true;
+      } else if (!compiler_options_->ParseCompilerOption(option, Usage)) {
         Usage("Unknown argument %s", option.data());
       }
     }
@@ -1163,33 +1185,52 @@
     ProcessOptions(parser_options.get());
 
     // Insert some compiler things.
-    InsertCompileOptions(argc, argv, parser_options.get());
+    InsertCompileOptions(argc, argv);
   }
 
-  // Check whether the oat output file is writable, and open it for later. Also open a swap file,
-  // if a name is given.
+  // Check whether the oat output files are writable, and open them for later. Also open a swap
+  // file, if a name is given.
   bool OpenFile() {
-    bool create_file = !oat_unstripped_.empty();  // as opposed to using open file descriptor
+    // Prune non-existent dex files now so that we don't create empty oat files for multi-image.
+    PruneNonExistentDexFiles();
+
+    // Expand oat and image filenames for multi image.
+    if (IsBootImage() && multi_image_) {
+      ExpandOatAndImageFilenames();
+    }
+
+    bool create_file = oat_fd_ == -1;  // as opposed to using open file descriptor
     if (create_file) {
-      oat_file_.reset(OS::CreateEmptyFile(oat_unstripped_.c_str()));
-      if (oat_location_.empty()) {
-        oat_location_ = oat_filename_;
+      for (const char* oat_filename : oat_filenames_) {
+        std::unique_ptr<File> oat_file(OS::CreateEmptyFile(oat_filename));
+        if (oat_file.get() == nullptr) {
+          PLOG(ERROR) << "Failed to create oat file: " << oat_filename;
+          return false;
+        }
+        if (create_file && fchmod(oat_file->Fd(), 0644) != 0) {
+          PLOG(ERROR) << "Failed to make oat file world readable: " << oat_filename;
+          oat_file->Erase();
+          return false;
+        }
+        oat_files_.push_back(std::move(oat_file));
       }
     } else {
-      oat_file_.reset(new File(oat_fd_, oat_location_, true));
-      oat_file_->DisableAutoClose();
-      if (oat_file_->SetLength(0) != 0) {
+      std::unique_ptr<File> oat_file(new File(oat_fd_, oat_location_, true));
+      oat_file->DisableAutoClose();
+      if (oat_file->SetLength(0) != 0) {
         PLOG(WARNING) << "Truncating oat file " << oat_location_ << " failed.";
       }
-    }
-    if (oat_file_.get() == nullptr) {
-      PLOG(ERROR) << "Failed to create oat file: " << oat_location_;
-      return false;
-    }
-    if (create_file && fchmod(oat_file_->Fd(), 0644) != 0) {
-      PLOG(ERROR) << "Failed to make oat file world readable: " << oat_location_;
-      oat_file_->Erase();
-      return false;
+      if (oat_file.get() == nullptr) {
+        PLOG(ERROR) << "Failed to create oat file: " << oat_location_;
+        return false;
+      }
+      if (create_file && fchmod(oat_file->Fd(), 0644) != 0) {
+        PLOG(ERROR) << "Failed to make oat file world readable: " << oat_location_;
+        oat_file->Erase();
+        return false;
+      }
+      oat_filenames_.push_back(oat_location_.c_str());
+      oat_files_.push_back(std::move(oat_file));
     }
 
     // Swap file handling.
@@ -1215,10 +1256,12 @@
     return true;
   }
 
-  void EraseOatFile() {
-    DCHECK(oat_file_.get() != nullptr);
-    oat_file_->Erase();
-    oat_file_.reset();
+  void EraseOatFiles() {
+    for (size_t i = 0; i < oat_files_.size(); ++i) {
+      DCHECK(oat_files_[i].get() != nullptr);
+      oat_files_[i]->Erase();
+      oat_files_[i].reset();
+    }
   }
 
   void Shutdown() {
@@ -1229,192 +1272,166 @@
     dex_caches_.clear();
   }
 
+  void LoadClassProfileDescriptors() {
+    if (profile_compilation_info_ != nullptr && app_image_) {
+      Runtime* runtime = Runtime::Current();
+      CHECK(runtime != nullptr);
+      std::set<DexCacheResolvedClasses> resolved_classes(
+          profile_compilation_info_->GetResolvedClasses());
+
+      // Filter out class path classes since we don't want to include these in the image.
+      std::unordered_set<std::string> dex_files_locations;
+      for (const DexFile* dex_file : dex_files_) {
+        dex_files_locations.insert(dex_file->GetLocation());
+      }
+      for (auto it = resolved_classes.begin(); it != resolved_classes.end(); ) {
+        if (dex_files_locations.find(it->GetDexLocation()) == dex_files_locations.end()) {
+          VLOG(compiler) << "Removed profile samples for non-app dex file " << it->GetDexLocation();
+          it = resolved_classes.erase(it);
+        } else {
+          ++it;
+        }
+      }
+
+      image_classes_.reset(new std::unordered_set<std::string>(
+          runtime->GetClassLinker()->GetClassDescriptorsForProfileKeys(resolved_classes)));
+      VLOG(compiler) << "Loaded " << image_classes_->size()
+                     << " image class descriptors from profile";
+      if (VLOG_IS_ON(compiler)) {
+        for (const std::string& s : *image_classes_) {
+          LOG(INFO) << "Image class " << s;
+        }
+      }
+    }
+  }
+
   // Set up the environment for compilation. Includes starting the runtime and loading/opening the
   // boot class path.
   bool Setup() {
     TimingLogger::ScopedTiming t("dex2oat Setup", timings_);
-    RuntimeOptions runtime_options;
     art::MemMap::Init();  // For ZipEntry::ExtractToMemMap.
-    if (boot_image_option_.empty()) {
-      std::string boot_class_path = "-Xbootclasspath:";
-      boot_class_path += Join(dex_filenames_, ':');
-      runtime_options.push_back(std::make_pair(boot_class_path, nullptr));
-      std::string boot_class_path_locations = "-Xbootclasspath-locations:";
-      boot_class_path_locations += Join(dex_locations_, ':');
-      runtime_options.push_back(std::make_pair(boot_class_path_locations, nullptr));
-    } else {
-      runtime_options.push_back(std::make_pair(boot_image_option_, nullptr));
-    }
-    for (size_t i = 0; i < runtime_args_.size(); i++) {
-      runtime_options.push_back(std::make_pair(runtime_args_[i], nullptr));
+
+    if (!PrepareImageClasses() || !PrepareCompiledClasses() || !PrepareCompiledMethods()) {
+      return false;
     }
 
     verification_results_.reset(new VerificationResults(compiler_options_.get()));
     callbacks_.reset(new QuickCompilerCallbacks(
         verification_results_.get(),
         &method_inliner_map_,
-        image_ ?
+        IsBootImage() ?
             CompilerCallbacks::CallbackMode::kCompileBootImage :
             CompilerCallbacks::CallbackMode::kCompileApp));
-    runtime_options.push_back(std::make_pair("compilercallbacks", callbacks_.get()));
-    runtime_options.push_back(
-        std::make_pair("imageinstructionset", GetInstructionSetString(instruction_set_)));
 
-    // Only allow no boot image for the runtime if we're compiling one. When we compile an app,
-    // we don't want fallback mode, it will abort as we do not push a boot classpath (it might
-    // have been stripped in preopting, anyways).
-    if (!image_) {
-      runtime_options.push_back(std::make_pair("-Xno-dex-file-fallback", nullptr));
+    RuntimeArgumentMap runtime_options;
+    if (!PrepareRuntimeOptions(&runtime_options)) {
+      return false;
     }
-    // Disable libsigchain. We don't don't need it during compilation and it prevents us
-    // from getting a statically linked version of dex2oat (because of dlsym and RTLD_NEXT).
-    runtime_options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
 
+    CreateOatWriters();
+    if (!AddDexFileSources()) {
+      return false;
+    }
+
+    if (IsBootImage() && image_filenames_.size() > 1) {
+      // If we're compiling the boot image, store the boot classpath into the Key-Value store.
+      // We need this for the multi-image case.
+      key_value_store_->Put(OatHeader::kBootClassPathKey, GetMultiImageBootClassPath());
+    }
+
+    if (!IsBootImage()) {
+      // When compiling an app, create the runtime early to retrieve
+      // the image location key needed for the oat header.
+      if (!CreateRuntime(std::move(runtime_options))) {
+        return false;
+      }
+
+      if (CompilerFilter::DependsOnImageChecksum(compiler_options_->GetCompilerFilter())) {
+        TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
+        std::vector<gc::space::ImageSpace*> image_spaces =
+            Runtime::Current()->GetHeap()->GetBootImageSpaces();
+        image_file_location_oat_checksum_ = OatFileAssistant::CalculateCombinedImageChecksum();
+        image_file_location_oat_data_begin_ =
+            reinterpret_cast<uintptr_t>(image_spaces[0]->GetImageHeader().GetOatDataBegin());
+        image_patch_delta_ = image_spaces[0]->GetImageHeader().GetPatchDelta();
+        // Store the boot image filename(s).
+        std::vector<std::string> image_filenames;
+        for (const gc::space::ImageSpace* image_space : image_spaces) {
+          image_filenames.push_back(image_space->GetImageFilename());
+        }
+        std::string image_file_location = Join(image_filenames, ':');
+        if (!image_file_location.empty()) {
+          key_value_store_->Put(OatHeader::kImageLocationKey, image_file_location);
+        }
+      } else {
+        image_file_location_oat_checksum_ = 0u;
+        image_file_location_oat_data_begin_ = 0u;
+        image_patch_delta_ = 0;
+      }
+
+      // Open dex files for class path.
+      const std::vector<std::string> class_path_locations =
+          GetClassPathLocations(runtime_->GetClassPathString());
+      OpenClassPathFiles(class_path_locations,
+                         &class_path_files_,
+                         &opened_oat_files_,
+                         runtime_->GetInstructionSet());
+
+      // Store the classpath we have right now.
+      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
+      std::string encoded_class_path;
+      if (class_path_locations.size() == 1 &&
+          class_path_locations[0] == OatFile::kSpecialSharedLibrary) {
+        // When passing the special shared library as the classpath, it is the only path.
+        encoded_class_path = OatFile::kSpecialSharedLibrary;
+      } else {
+        encoded_class_path = OatFile::EncodeDexFileDependencies(class_path_files);
+      }
+      key_value_store_->Put(OatHeader::kClassPathKey, encoded_class_path);
+    }
+
+    // Now that we have finalized key_value_store_, start writing the oat file.
     {
-      TimingLogger::ScopedTiming t_runtime("Create runtime", timings_);
-      if (!CreateRuntime(runtime_options)) {
-        return false;
-      }
-    }
-
-    // Runtime::Create acquired the mutator_lock_ that is normally given away when we
-    // Runtime::Start, give it away now so that we don't starve GC.
-    Thread* self = Thread::Current();
-    self->TransitionFromRunnableToSuspended(kNative);
-    // If we're doing the image, override the compiler filter to force full compilation. Must be
-    // done ahead of WellKnownClasses::Init that causes verification.  Note: doesn't force
-    // compilation of class initializers.
-    // Whilst we're in native take the opportunity to initialize well known classes.
-    WellKnownClasses::Init(self->GetJniEnv());
-
-    // If --image-classes was specified, calculate the full list of classes to include in the image
-    if (image_classes_filename_ != nullptr) {
-      std::string error_msg;
-      if (image_classes_zip_filename_ != nullptr) {
-        image_classes_.reset(ReadImageClassesFromZip(image_classes_zip_filename_,
-                                                     image_classes_filename_,
-                                                     &error_msg));
-      } else {
-        image_classes_.reset(ReadImageClassesFromFile(image_classes_filename_));
-      }
-      if (image_classes_.get() == nullptr) {
-        LOG(ERROR) << "Failed to create list of image classes from '" << image_classes_filename_ <<
-            "': " << error_msg;
-        return false;
-      }
-    } else if (image_) {
-      image_classes_.reset(new std::unordered_set<std::string>);
-    }
-    // If --compiled-classes was specified, calculate the full list of classes to compile in the
-    // image.
-    if (compiled_classes_filename_ != nullptr) {
-      std::string error_msg;
-      if (compiled_classes_zip_filename_ != nullptr) {
-        compiled_classes_.reset(ReadImageClassesFromZip(compiled_classes_zip_filename_,
-                                                        compiled_classes_filename_,
-                                                        &error_msg));
-      } else {
-        compiled_classes_.reset(ReadImageClassesFromFile(compiled_classes_filename_));
-      }
-      if (compiled_classes_.get() == nullptr) {
-        LOG(ERROR) << "Failed to create list of compiled classes from '"
-                   << compiled_classes_filename_ << "': " << error_msg;
-        return false;
-      }
-    } else {
-      compiled_classes_.reset(nullptr);  // By default compile everything.
-    }
-    // If --compiled-methods was specified, read the methods to compile from the given file(s).
-    if (compiled_methods_filename_ != nullptr) {
-      std::string error_msg;
-      if (compiled_methods_zip_filename_ != nullptr) {
-        compiled_methods_.reset(ReadCommentedInputFromZip(compiled_methods_zip_filename_,
-                                                          compiled_methods_filename_,
-                                                          nullptr,            // No post-processing.
-                                                          &error_msg));
-      } else {
-        compiled_methods_.reset(ReadCommentedInputFromFile(compiled_methods_filename_,
-                                                           nullptr));         // No post-processing.
-      }
-      if (compiled_methods_.get() == nullptr) {
-        LOG(ERROR) << "Failed to create list of compiled methods from '"
-            << compiled_methods_filename_ << "': " << error_msg;
-        return false;
-      }
-    } else {
-      compiled_methods_.reset(nullptr);  // By default compile everything.
-    }
-
-    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    if (boot_image_option_.empty()) {
-      dex_files_ = class_linker->GetBootClassPath();
-    } else {
-      TimingLogger::ScopedTiming t_dex("Opening dex files", timings_);
-      if (dex_filenames_.empty()) {
-        ATRACE_BEGIN("Opening zip archive from file descriptor");
-        std::string error_msg;
-        std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(zip_fd_,
-                                                                       zip_location_.c_str(),
-                                                                       &error_msg));
-        if (zip_archive.get() == nullptr) {
-          LOG(ERROR) << "Failed to open zip from file descriptor for '" << zip_location_ << "': "
-              << error_msg;
+      TimingLogger::ScopedTiming t_dex("Writing and opening dex files", timings_);
+      rodata_.reserve(oat_writers_.size());
+      for (size_t i = 0, size = oat_writers_.size(); i != size; ++i) {
+        rodata_.push_back(elf_writers_[i]->StartRoData());
+        // Unzip or copy dex files straight to the oat file.
+        std::unique_ptr<MemMap> opened_dex_files_map;
+        std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+        if (!oat_writers_[i]->WriteAndOpenDexFiles(rodata_.back(),
+                                                   oat_files_[i].get(),
+                                                   instruction_set_,
+                                                   instruction_set_features_.get(),
+                                                   key_value_store_.get(),
+                                                   /* verify */ true,
+                                                   &opened_dex_files_map,
+                                                   &opened_dex_files)) {
           return false;
         }
-        if (!DexFile::OpenFromZip(*zip_archive.get(), zip_location_, &error_msg, &opened_dex_files_)) {
-          LOG(ERROR) << "Failed to open dex from file descriptor for zip file '" << zip_location_
-              << "': " << error_msg;
-          return false;
-        }
-        for (auto& dex_file : opened_dex_files_) {
-          dex_files_.push_back(dex_file.get());
-        }
-        ATRACE_END();
-      } else {
-        size_t failure_count = OpenDexFiles(dex_filenames_, dex_locations_, &opened_dex_files_);
-        if (failure_count > 0) {
-          LOG(ERROR) << "Failed to open some dex files: " << failure_count;
-          return false;
-        }
-        for (auto& dex_file : opened_dex_files_) {
-          dex_files_.push_back(dex_file.get());
-        }
-      }
-
-      constexpr bool kSaveDexInput = false;
-      if (kSaveDexInput) {
-        for (size_t i = 0; i < dex_files_.size(); ++i) {
-          const DexFile* dex_file = dex_files_[i];
-          std::string tmp_file_name(StringPrintf("/data/local/tmp/dex2oat.%d.%zd.dex",
-                                                 getpid(), i));
-          std::unique_ptr<File> tmp_file(OS::CreateEmptyFile(tmp_file_name.c_str()));
-          if (tmp_file.get() == nullptr) {
-            PLOG(ERROR) << "Failed to open file " << tmp_file_name
-                << ". Try: adb shell chmod 777 /data/local/tmp";
-            continue;
+        dex_files_per_oat_file_.push_back(MakeNonOwningPointerVector(opened_dex_files));
+        if (opened_dex_files_map != nullptr) {
+          opened_dex_files_maps_.push_back(std::move(opened_dex_files_map));
+          for (std::unique_ptr<const DexFile>& dex_file : opened_dex_files) {
+            dex_file_oat_index_map_.emplace(dex_file.get(), i);
+            opened_dex_files_.push_back(std::move(dex_file));
           }
-          // This is just dumping files for debugging. Ignore errors, and leave remnants.
-          UNUSED(tmp_file->WriteFully(dex_file->Begin(), dex_file->Size()));
-          UNUSED(tmp_file->Flush());
-          UNUSED(tmp_file->Close());
-          LOG(INFO) << "Wrote input to " << tmp_file_name;
+        } else {
+          DCHECK(opened_dex_files.empty());
         }
       }
     }
-    // Ensure opened dex files are writable for dex-to-dex transformations. Also ensure that
-    // the dex caches stay live since we don't want class unloading to occur during compilation.
-    for (const auto& dex_file : dex_files_) {
-      if (!dex_file->EnableWrite()) {
-        PLOG(ERROR) << "Failed to make .dex file writeable '" << dex_file->GetLocation() << "'\n";
-      }
-      ScopedObjectAccess soa(self);
-      dex_caches_.push_back(soa.AddLocalReference<jobject>(
-          class_linker->RegisterDexFile(*dex_file, Runtime::Current()->GetLinearAlloc())));
-    }
 
+    dex_files_ = MakeNonOwningPointerVector(opened_dex_files_);
+
+    // We had to postpone the swap decision till now, as this is the point when we actually
+    // know about the dex files we're going to use.
+
+    // Make sure that we didn't create the driver, yet.
+    CHECK(driver_ == nullptr);
     // If we use a swap file, ensure we are above the threshold to make it necessary.
     if (swap_fd_ != -1) {
-      if (!UseSwap(image_, dex_files_)) {
+      if (!UseSwap(IsBootImage(), dex_files_)) {
         close(swap_fd_);
         swap_fd_ = -1;
         VLOG(compiler) << "Decided to run without swap.";
@@ -1424,56 +1441,129 @@
     }
     // Note that dex2oat won't close the swap_fd_. The compiler driver's swap space will do that.
 
-    /*
-     * If we're not in interpret-only or verify-none mode, go ahead and compile small applications.
-     * Don't bother to check if we're doing the image.
-     */
-    if (!image_ &&
-        compiler_options_->IsCompilationEnabled() &&
-        compiler_kind_ == Compiler::kQuick) {
-      size_t num_methods = 0;
-      for (size_t i = 0; i != dex_files_.size(); ++i) {
-        const DexFile* dex_file = dex_files_[i];
-        CHECK(dex_file != nullptr);
-        num_methods += dex_file->NumMethodIds();
-      }
-      if (num_methods <= compiler_options_->GetNumDexMethodsThreshold()) {
-        compiler_options_->SetCompilerFilter(CompilerOptions::kSpeed);
-        VLOG(compiler) << "Below method threshold, compiling anyways";
+    // If we need to downgrade the compiler-filter for size reasons, do that check now.
+    if (!IsBootImage() && IsVeryLarge(dex_files_)) {
+      if (!CompilerFilter::IsAsGoodAs(CompilerFilter::kVerifyAtRuntime,
+                                      compiler_options_->GetCompilerFilter())) {
+        LOG(INFO) << "Very large app, downgrading to verify-at-runtime.";
+        // Note: this change won't be reflected in the key-value store, as that had to be
+        //       finalized before loading the dex files. This setup is currently required
+        //       to get the size from the DexFile objects.
+        // TODO: refactor. b/29790079
+        compiler_options_->SetCompilerFilter(CompilerFilter::kVerifyAtRuntime);
       }
     }
 
+    if (IsBootImage()) {
+      // For boot image, pass opened dex files to the Runtime::Create().
+      // Note: Runtime acquires ownership of these dex files.
+      runtime_options.Set(RuntimeArgumentMap::BootClassPathDexList, &opened_dex_files_);
+      if (!CreateRuntime(std::move(runtime_options))) {
+        return false;
+      }
+    }
+
+    // If we're doing the image, override the compiler filter to force full compilation. Must be
+    // done ahead of WellKnownClasses::Init that causes verification.  Note: doesn't force
+    // compilation of class initializers.
+    // Whilst we're in native take the opportunity to initialize well known classes.
+    Thread* self = Thread::Current();
+    WellKnownClasses::Init(self->GetJniEnv());
+
+    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+    if (!IsBootImage()) {
+      constexpr bool kSaveDexInput = false;
+      if (kSaveDexInput) {
+        SaveDexInput();
+      }
+
+      // Handle and ClassLoader creation needs to come after Runtime::Create.
+      ScopedObjectAccess soa(self);
+
+      // Classpath: first the class-path given.
+      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
+
+      // Then the dex files we'll compile. Thus we'll resolve the class-path first.
+      class_path_files.insert(class_path_files.end(), dex_files_.begin(), dex_files_.end());
+
+      class_loader_ = class_linker->CreatePathClassLoader(self, class_path_files);
+    }
+
+    // Ensure opened dex files are writable for dex-to-dex transformations.
+    for (const std::unique_ptr<MemMap>& map : opened_dex_files_maps_) {
+      if (!map->Protect(PROT_READ | PROT_WRITE)) {
+        PLOG(ERROR) << "Failed to make .dex files writeable.";
+        return false;
+      }
+    }
+
+    // Ensure that the dex caches stay live since we don't want class unloading
+    // to occur during compilation.
+    for (const auto& dex_file : dex_files_) {
+      ScopedObjectAccess soa(self);
+      dex_caches_.push_back(soa.AddLocalReference<jobject>(
+          class_linker->RegisterDexFile(*dex_file,
+                                        soa.Decode<mirror::ClassLoader*>(class_loader_))));
+    }
+
     return true;
   }
 
+  // If we need to keep the oat file open for the image writer.
+  bool ShouldKeepOatFileOpen() const {
+    return IsImage() && oat_fd_ != kInvalidFd;
+  }
+
   // Create and invoke the compiler driver. This will compile all the dex files.
   void Compile() {
     TimingLogger::ScopedTiming t("dex2oat Compile", timings_);
     compiler_phases_timings_.reset(new CumulativeLogger("compilation times"));
 
-    // Handle and ClassLoader creation needs to come after Runtime::Create
-    jobject class_loader = nullptr;
-    Thread* self = Thread::Current();
+    // Find the dex files we should not inline from.
 
-    if (!boot_image_option_.empty()) {
+    std::vector<std::string> no_inline_filters;
+    Split(no_inline_from_string_, ',', &no_inline_filters);
+
+    // For now, on the host always have core-oj removed.
+    const std::string core_oj = "core-oj";
+    if (!kIsTargetBuild && !ContainsElement(no_inline_filters, core_oj)) {
+      no_inline_filters.push_back(core_oj);
+    }
+
+    if (!no_inline_filters.empty()) {
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-      OpenClassPathFiles(runtime_->GetClassPathString(), dex_files_, &class_path_files_);
-      ScopedObjectAccess soa(self);
+      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
+      std::vector<const std::vector<const DexFile*>*> dex_file_vectors = {
+          &class_linker->GetBootClassPath(),
+          &class_path_files,
+          &dex_files_
+      };
+      for (const std::vector<const DexFile*>* dex_file_vector : dex_file_vectors) {
+        for (const DexFile* dex_file : *dex_file_vector) {
+          for (const std::string& filter : no_inline_filters) {
+            // Use dex_file->GetLocation() rather than dex_file->GetBaseLocation(). This
+            // allows tests to specify <test-dexfile>:classes2.dex if needed but if the
+            // base location passes the StartsWith() test, so do all extra locations.
+            std::string dex_location = dex_file->GetLocation();
+            if (filter.find('/') == std::string::npos) {
+              // The filter does not contain the path. Remove the path from dex_location as well.
+              size_t last_slash = dex_file->GetLocation().rfind('/');
+              if (last_slash != std::string::npos) {
+                dex_location = dex_location.substr(last_slash + 1);
+              }
+            }
 
-      // Classpath: first the class-path given.
-      std::vector<const DexFile*> class_path_files;
-      for (auto& class_path_file : class_path_files_) {
-        class_path_files.push_back(class_path_file.get());
+            if (StartsWith(dex_location, filter.c_str())) {
+              VLOG(compiler) << "Disabling inlining from " << dex_file->GetLocation();
+              no_inline_from_dex_files_.push_back(dex_file);
+              break;
+            }
+          }
+        }
       }
-
-      // Store the classpath we have right now.
-      key_value_store_->Put(OatHeader::kClassPathKey,
-                            OatFile::EncodeDexFileDependencies(class_path_files));
-
-      // Then the dex files we'll compile. Thus we'll resolve the class-path first.
-      class_path_files.insert(class_path_files.end(), dex_files_.begin(), dex_files_.end());
-
-      class_loader = class_linker->CreatePathClassLoader(self, class_path_files);
+      if (!no_inline_from_dex_files_.empty()) {
+        compiler_options_->no_inline_from_ = &no_inline_from_dex_files_;
+      }
     }
 
     driver_.reset(new CompilerDriver(compiler_options_.get(),
@@ -1482,72 +1572,71 @@
                                      compiler_kind_,
                                      instruction_set_,
                                      instruction_set_features_.get(),
-                                     image_,
+                                     IsBootImage(),
+                                     IsAppImage(),
                                      image_classes_.release(),
                                      compiled_classes_.release(),
-                                     nullptr,
+                                     compiled_methods_.release(),
                                      thread_count_,
                                      dump_stats_,
                                      dump_passes_,
-                                     dump_cfg_file_name_,
-                                     dump_cfg_append_,
                                      compiler_phases_timings_.get(),
                                      swap_fd_,
-                                     profile_file_));
-
-    driver_->CompileAll(class_loader, dex_files_, timings_);
+                                     profile_compilation_info_.get()));
+    driver_->SetDexFilesForOatFile(dex_files_);
+    driver_->CompileAll(class_loader_, dex_files_, timings_);
   }
 
-  // Notes on the interleaving of creating the image and oat file to
+  // Notes on the interleaving of creating the images and oat files to
   // ensure the references between the two are correct.
   //
   // Currently we have a memory layout that looks something like this:
   //
   // +--------------+
-  // | image        |
+  // | images       |
   // +--------------+
-  // | boot oat     |
+  // | oat files    |
   // +--------------+
   // | alloc spaces |
   // +--------------+
   //
-  // There are several constraints on the loading of the image and boot.oat.
+  // There are several constraints on the loading of the images and oat files.
   //
-  // 1. The image is expected to be loaded at an absolute address and
-  // contains Objects with absolute pointers within the image.
+  // 1. The images are expected to be loaded at an absolute address and
+  // contain Objects with absolute pointers within the images.
   //
-  // 2. There are absolute pointers from Methods in the image to their
-  // code in the oat.
+  // 2. There are absolute pointers from Methods in the images to their
+  // code in the oat files.
   //
-  // 3. There are absolute pointers from the code in the oat to Methods
-  // in the image.
+  // 3. There are absolute pointers from the code in the oat files to Methods
+  // in the images.
   //
-  // 4. There are absolute pointers from code in the oat to other code
-  // in the oat.
+  // 4. There are absolute pointers from code in the oat files to other code
+  // in the oat files.
   //
   // To get this all correct, we go through several steps.
   //
-  // 1. We prepare offsets for all data in the oat file and calculate
+  // 1. We prepare offsets for all data in the oat files and calculate
   // the oat data size and code size. During this stage, we also set
   // oat code offsets in methods for use by the image writer.
   //
-  // 2. We prepare offsets for the objects in the image and calculate
-  // the image size.
+  // 2. We prepare offsets for the objects in the images and calculate
+  // the image sizes.
   //
-  // 3. We create the oat file. Originally this was just our own proprietary
+  // 3. We create the oat files. Originally this was just our own proprietary
   // file but now it is contained within an ELF dynamic object (aka an .so
-  // file). Since we know the image size and oat data size and code size we
+  // file). Since we know the image sizes and oat data sizes and code sizes we
   // can prepare the ELF headers and we then know the ELF memory segment
   // layout and we can now resolve all references. The compiler provides
   // LinkerPatch information in each CompiledMethod and we resolve these,
   // using the layout information and image object locations provided by
   // image writer, as we're writing the method code.
   //
-  // 4. We create the image file. It needs to know where the oat file
-  // will be loaded after itself. Originally when oat file was simply
-  // memory mapped so we could predict where its contents were based
-  // on the file size. Now that it is an ELF file, we need to inspect
-  // the ELF file to understand the in memory segment layout including
+  // 4. We create the image files. They need to know where the oat files
+  // will be loaded after itself. Originally oat files were simply
+  // memory mapped so we could predict where their contents were based
+  // on the file size. Now that they are ELF files, we need to inspect
+  // the ELF files to understand the in memory segment layout including
   // where the oat header is located within.
   // TODO: We could just remember this information from step 3.
   //
@@ -1559,51 +1648,49 @@
   // Steps 1.-3. are done by the CreateOatFile() above, steps 4.-5.
   // are done by the CreateImageFile() below.
 
-
   // Write out the generated code part. Calls the OatWriter and ElfBuilder. Also prepares the
   // ImageWriter, if necessary.
   // Note: Flushing (and closing) the file is the caller's responsibility, except for the failure
   //       case (when the file will be explicitly erased).
-  bool CreateOatFile() {
-    CHECK(key_value_store_.get() != nullptr);
-
+  bool WriteOatFiles() {
     TimingLogger::ScopedTiming t("dex2oat Oat", timings_);
 
-    std::unique_ptr<OatWriter> oat_writer;
-    {
-      TimingLogger::ScopedTiming t2("dex2oat OatWriter", timings_);
-      std::string image_file_location;
-      uint32_t image_file_location_oat_checksum = 0;
-      uintptr_t image_file_location_oat_data_begin = 0;
-      int32_t image_patch_delta = 0;
-      if (image_) {
-        PrepareImageWriter(image_base_);
-      } else {
-        TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
-        gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
-        image_file_location_oat_checksum = image_space->GetImageHeader().GetOatChecksum();
-        image_file_location_oat_data_begin =
-            reinterpret_cast<uintptr_t>(image_space->GetImageHeader().GetOatDataBegin());
-        image_file_location = image_space->GetImageFilename();
-        image_patch_delta = image_space->GetImageHeader().GetPatchDelta();
+    // Sync the data to the file, in case we did dex2dex transformations.
+    for (const std::unique_ptr<MemMap>& map : opened_dex_files_maps_) {
+      if (!map->Sync()) {
+        PLOG(ERROR) << "Failed to Sync() dex2dex output. Map: " << map->GetName();
+        return false;
       }
-
-      if (!image_file_location.empty()) {
-        key_value_store_->Put(OatHeader::kImageLocationKey, image_file_location);
-      }
-
-      oat_writer.reset(new OatWriter(dex_files_, image_file_location_oat_checksum,
-                                     image_file_location_oat_data_begin,
-                                     image_patch_delta,
-                                     driver_.get(),
-                                     image_writer_.get(),
-                                     timings_,
-                                     key_value_store_.get()));
     }
 
-    if (image_) {
-      // The OatWriter constructor has already updated offsets in methods and we need to
-      // prepare method offsets in the image address space for direct method patching.
+    if (IsImage()) {
+      if (app_image_ && image_base_ == 0) {
+        gc::Heap* const heap = Runtime::Current()->GetHeap();
+        for (gc::space::ImageSpace* image_space : heap->GetBootImageSpaces()) {
+          image_base_ = std::max(image_base_, RoundUp(
+              reinterpret_cast<uintptr_t>(image_space->GetImageHeader().GetOatFileEnd()),
+              kPageSize));
+        }
+        // The non moving space is right after the oat file. Put the preferred app image location
+        // right after the non moving space so that we ideally get a continuous immune region for
+        // the GC.
+        // Use the default non moving space capacity since dex2oat does not have a separate non-
+        // moving space. This means the runtime's non moving space space size will be as large
+        // as the growth limit for dex2oat, but smaller in the zygote.
+        const size_t non_moving_space_capacity = gc::Heap::kDefaultNonMovingSpaceCapacity;
+        image_base_ += non_moving_space_capacity;
+        VLOG(compiler) << "App image base=" << reinterpret_cast<void*>(image_base_);
+      }
+
+      image_writer_.reset(new ImageWriter(*driver_,
+                                          image_base_,
+                                          compiler_options_->GetCompilePic(),
+                                          IsAppImage(),
+                                          image_storage_mode_,
+                                          oat_filenames_,
+                                          dex_file_oat_index_map_));
+
+      // We need to prepare method offsets in the image address space for direct method patching.
       TimingLogger::ScopedTiming t2("dex2oat Prepare image address space", timings_);
       if (!image_writer_->PrepareImageAddressSpace()) {
         LOG(ERROR) << "Failed to prepare image address space.";
@@ -1611,90 +1698,181 @@
       }
     }
 
+    linker::MultiOatRelativePatcher patcher(instruction_set_, instruction_set_features_.get());
     {
       TimingLogger::ScopedTiming t2("dex2oat Write ELF", timings_);
-      if (!driver_->WriteElf(android_root_, is_host_, dex_files_, oat_writer.get(),
-                             oat_file_.get())) {
-        LOG(ERROR) << "Failed to write ELF file " << oat_file_->GetPath();
-        return false;
+      for (size_t i = 0, size = oat_files_.size(); i != size; ++i) {
+        std::unique_ptr<ElfWriter>& elf_writer = elf_writers_[i];
+        std::unique_ptr<OatWriter>& oat_writer = oat_writers_[i];
+
+        std::vector<const DexFile*>& dex_files = dex_files_per_oat_file_[i];
+        oat_writer->PrepareLayout(driver_.get(), image_writer_.get(), dex_files, &patcher);
+
+        size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset();
+        size_t text_size = oat_writer->GetSize() - rodata_size;
+        elf_writer->SetLoadedSectionSizes(rodata_size, text_size, oat_writer->GetBssSize());
+
+        if (IsImage()) {
+          // Update oat layout.
+          DCHECK(image_writer_ != nullptr);
+          DCHECK_LT(i, oat_filenames_.size());
+          image_writer_->UpdateOatFileLayout(i,
+                                             elf_writer->GetLoadedSize(),
+                                             oat_writer->GetOatDataOffset(),
+                                             oat_writer->GetSize());
+        }
+      }
+
+      for (size_t i = 0, size = oat_files_.size(); i != size; ++i) {
+        std::unique_ptr<File>& oat_file = oat_files_[i];
+        std::unique_ptr<ElfWriter>& elf_writer = elf_writers_[i];
+        std::unique_ptr<OatWriter>& oat_writer = oat_writers_[i];
+
+        oat_writer->AddMethodDebugInfos(debug::MakeTrampolineInfos(oat_writer->GetOatHeader()));
+
+        // We need to mirror the layout of the ELF file in the compressed debug-info.
+        // Therefore PrepareDebugInfo() relies on the SetLoadedSectionSizes() call further above.
+        elf_writer->PrepareDebugInfo(oat_writer->GetMethodDebugInfo());
+
+        OutputStream*& rodata = rodata_[i];
+        DCHECK(rodata != nullptr);
+        if (!oat_writer->WriteRodata(rodata)) {
+          LOG(ERROR) << "Failed to write .rodata section to the ELF file " << oat_file->GetPath();
+          return false;
+        }
+        elf_writer->EndRoData(rodata);
+        rodata = nullptr;
+
+        OutputStream* text = elf_writer->StartText();
+        if (!oat_writer->WriteCode(text)) {
+          LOG(ERROR) << "Failed to write .text section to the ELF file " << oat_file->GetPath();
+          return false;
+        }
+        elf_writer->EndText(text);
+
+        if (!oat_writer->WriteHeader(elf_writer->GetStream(),
+                                     image_file_location_oat_checksum_,
+                                     image_file_location_oat_data_begin_,
+                                     image_patch_delta_)) {
+          LOG(ERROR) << "Failed to write oat header to the ELF file " << oat_file->GetPath();
+          return false;
+        }
+
+        if (IsImage()) {
+          // Update oat header information.
+          DCHECK(image_writer_ != nullptr);
+          DCHECK_LT(i, oat_filenames_.size());
+          image_writer_->UpdateOatFileHeader(i, oat_writer->GetOatHeader());
+        }
+
+        elf_writer->WriteDynamicSection();
+        elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
+        elf_writer->WritePatchLocations(oat_writer->GetAbsolutePatchLocations());
+
+        if (!elf_writer->End()) {
+          LOG(ERROR) << "Failed to write ELF file " << oat_file->GetPath();
+          return false;
+        }
+
+        // Flush the oat file.
+        if (oat_files_[i] != nullptr) {
+          if (oat_files_[i]->Flush() != 0) {
+            PLOG(ERROR) << "Failed to flush oat file: " << oat_filenames_[i];
+            return false;
+          }
+        }
+
+        VLOG(compiler) << "Oat file written successfully: " << oat_filenames_[i];
+
+        oat_writer.reset();
+        elf_writer.reset();
       }
     }
 
-    VLOG(compiler) << "Oat file written successfully (unstripped): " << oat_location_;
     return true;
   }
 
   // If we are compiling an image, invoke the image creation routine. Else just skip.
   bool HandleImage() {
-    if (image_) {
+    if (IsImage()) {
       TimingLogger::ScopedTiming t("dex2oat ImageWriter", timings_);
       if (!CreateImageFile()) {
         return false;
       }
-      VLOG(compiler) << "Image written successfully: " << image_filename_;
+      VLOG(compiler) << "Images written successfully";
     }
     return true;
   }
 
-  // Create a copy from unstripped to stripped.
-  bool CopyUnstrippedToStripped() {
-    // If we don't want to strip in place, copy from unstripped location to stripped location.
-    // We need to strip after image creation because FixupElf needs to use .strtab.
-    if (oat_unstripped_ != oat_stripped_) {
-      // If the oat file is still open, flush it.
-      if (oat_file_.get() != nullptr && oat_file_->IsOpened()) {
-        if (!FlushCloseOatFile()) {
+  // Create a copy from stripped to unstripped.
+  bool CopyStrippedToUnstripped() {
+    for (size_t i = 0; i < oat_unstripped_.size(); ++i) {
+      // If we don't want to strip in place, copy from stripped location to unstripped location.
+      // We need to strip after image creation because FixupElf needs to use .strtab.
+      if (strcmp(oat_unstripped_[i], oat_filenames_[i]) != 0) {
+        // If the oat file is still open, flush it.
+        if (oat_files_[i].get() != nullptr && oat_files_[i]->IsOpened()) {
+          if (!FlushCloseOatFile(i)) {
+            return false;
+          }
+        }
+
+        TimingLogger::ScopedTiming t("dex2oat OatFile copy", timings_);
+        std::unique_ptr<File> in(OS::OpenFileForReading(oat_filenames_[i]));
+        std::unique_ptr<File> out(OS::CreateEmptyFile(oat_unstripped_[i]));
+        size_t buffer_size = 8192;
+        std::unique_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
+        while (true) {
+          int bytes_read = TEMP_FAILURE_RETRY(read(in->Fd(), buffer.get(), buffer_size));
+          if (bytes_read <= 0) {
+            break;
+          }
+          bool write_ok = out->WriteFully(buffer.get(), bytes_read);
+          CHECK(write_ok);
+        }
+        if (out->FlushCloseOrErase() != 0) {
+          PLOG(ERROR) << "Failed to flush and close copied oat file: " << oat_unstripped_[i];
+          return false;
+        }
+        VLOG(compiler) << "Oat file copied successfully (unstripped): " << oat_unstripped_[i];
+      }
+    }
+    return true;
+  }
+
+  bool FlushOatFiles() {
+    TimingLogger::ScopedTiming t2("dex2oat Flush ELF", timings_);
+    for (size_t i = 0; i < oat_files_.size(); ++i) {
+      if (oat_files_[i].get() != nullptr) {
+        if (oat_files_[i]->Flush() != 0) {
+          PLOG(ERROR) << "Failed to flush oat file: " << oat_filenames_[i];
+          oat_files_[i]->Erase();
           return false;
         }
       }
-
-      TimingLogger::ScopedTiming t("dex2oat OatFile copy", timings_);
-      std::unique_ptr<File> in(OS::OpenFileForReading(oat_unstripped_.c_str()));
-      std::unique_ptr<File> out(OS::CreateEmptyFile(oat_stripped_.c_str()));
-      size_t buffer_size = 8192;
-      std::unique_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
-      while (true) {
-        int bytes_read = TEMP_FAILURE_RETRY(read(in->Fd(), buffer.get(), buffer_size));
-        if (bytes_read <= 0) {
-          break;
-        }
-        bool write_ok = out->WriteFully(buffer.get(), bytes_read);
-        CHECK(write_ok);
-      }
-      if (out->FlushCloseOrErase() != 0) {
-        PLOG(ERROR) << "Failed to flush and close copied oat file: " << oat_stripped_;
-        return false;
-      }
-      VLOG(compiler) << "Oat file copied successfully (stripped): " << oat_stripped_;
     }
     return true;
   }
 
-  bool FlushOatFile() {
-    if (oat_file_.get() != nullptr) {
-      TimingLogger::ScopedTiming t2("dex2oat Flush ELF", timings_);
-      if (oat_file_->Flush() != 0) {
-        PLOG(ERROR) << "Failed to flush oat file: " << oat_location_ << " / "
-            << oat_filename_;
-        oat_file_->Erase();
-        return false;
-      }
-    }
-    return true;
-  }
-
-  bool FlushCloseOatFile() {
-    if (oat_file_.get() != nullptr) {
-      std::unique_ptr<File> tmp(oat_file_.release());
+  bool FlushCloseOatFile(size_t i) {
+    if (oat_files_[i].get() != nullptr) {
+      std::unique_ptr<File> tmp(oat_files_[i].release());
       if (tmp->FlushCloseOrErase() != 0) {
-        PLOG(ERROR) << "Failed to flush and close oat file: " << oat_location_ << " / "
-            << oat_filename_;
+        PLOG(ERROR) << "Failed to flush and close oat file: " << oat_filenames_[i];
         return false;
       }
     }
     return true;
   }
 
+  bool FlushCloseOatFiles() {
+    bool result = true;
+    for (size_t i = 0; i < oat_files_.size(); ++i) {
+      result &= FlushCloseOatFile(i);
+    }
+    return result;
+  }
+
   void DumpTiming() {
     if (dump_timing_ || (dump_slow_timing_ && timings_->GetTotalNs() > MsToNs(1000))) {
       LOG(INFO) << Dumpable<TimingLogger>(*timings_);
@@ -1704,83 +1882,405 @@
     }
   }
 
-  CompilerOptions* GetCompilerOptions() const {
-    return compiler_options_.get();
+  bool IsImage() const {
+    return IsAppImage() || IsBootImage();
   }
 
-  bool IsImage() const {
-    return image_;
+  bool IsAppImage() const {
+    return app_image_;
+  }
+
+  bool IsBootImage() const {
+    return boot_image_;
   }
 
   bool IsHost() const {
     return is_host_;
   }
 
+  bool UseProfileGuidedCompilation() const {
+    return CompilerFilter::DependsOnProfile(compiler_options_->GetCompilerFilter());
+  }
+
+  bool LoadProfile() {
+    DCHECK(UseProfileGuidedCompilation());
+
+    profile_compilation_info_.reset(new ProfileCompilationInfo());
+    ScopedFlock flock;
+    bool success = true;
+    std::string error;
+    if (profile_file_fd_ != -1) {
+      // The file doesn't need to be flushed so don't check the usage.
+      // Pass a bogus path so that we can easily attribute any reported error.
+      File file(profile_file_fd_, "profile", /*check_usage*/ false, /*read_only_mode*/ true);
+      if (flock.Init(&file, &error)) {
+        success = profile_compilation_info_->Load(profile_file_fd_);
+      }
+    } else if (profile_file_ != "") {
+      if (flock.Init(profile_file_.c_str(), O_RDONLY, /* block */ true, &error)) {
+        success = profile_compilation_info_->Load(flock.GetFile()->Fd());
+      }
+    }
+    if (!error.empty()) {
+      LOG(WARNING) << "Cannot lock profiles: " << error;
+    }
+
+    if (!success) {
+      profile_compilation_info_.reset(nullptr);
+    }
+
+    return success;
+  }
+
  private:
-  static size_t OpenDexFiles(const std::vector<const char*>& dex_filenames,
-                             const std::vector<const char*>& dex_locations,
-                             std::vector<std::unique_ptr<const DexFile>>* dex_files) {
-    DCHECK(dex_files != nullptr) << "OpenDexFiles out-param is nullptr";
-    size_t failure_count = 0;
-    for (size_t i = 0; i < dex_filenames.size(); i++) {
-      const char* dex_filename = dex_filenames[i];
-      const char* dex_location = dex_locations[i];
-      ATRACE_BEGIN(StringPrintf("Opening dex file '%s'", dex_filenames[i]).c_str());
-      std::string error_msg;
-      if (!OS::FileExists(dex_filename)) {
-        LOG(WARNING) << "Skipping non-existent dex file '" << dex_filename << "'";
-        continue;
-      }
-      if (!DexFile::Open(dex_filename, dex_location, &error_msg, dex_files)) {
-        LOG(WARNING) << "Failed to open .dex from file '" << dex_filename << "': " << error_msg;
-        ++failure_count;
-      }
-      ATRACE_END();
+  bool UseSwap(bool is_image, const std::vector<const DexFile*>& dex_files) {
+    if (is_image) {
+      // Don't use swap, we know generation should succeed, and we don't want to slow it down.
+      return false;
     }
-    return failure_count;
+    if (dex_files.size() < min_dex_files_for_swap_) {
+      // If there are less dex files than the threshold, assume it's gonna be fine.
+      return false;
+    }
+    size_t dex_files_size = 0;
+    for (const auto* dex_file : dex_files) {
+      dex_files_size += dex_file->GetHeader().file_size_;
+    }
+    return dex_files_size >= min_dex_file_cumulative_size_for_swap_;
   }
 
-  // Returns true if dex_files has a dex with the named location. We compare canonical locations,
-  // so that relative and absolute paths will match. Not caching for the dex_files isn't very
-  // efficient, but under normal circumstances the list is neither large nor is this part too
-  // sensitive.
-  static bool DexFilesContains(const std::vector<const DexFile*>& dex_files,
-                               const std::string& location) {
-    std::string canonical_location(DexFile::GetDexCanonicalLocation(location.c_str()));
-    for (size_t i = 0; i < dex_files.size(); ++i) {
-      if (DexFile::GetDexCanonicalLocation(dex_files[i]->GetLocation().c_str()) ==
-          canonical_location) {
-        return true;
-      }
+  bool IsVeryLarge(std::vector<const DexFile*>& dex_files) {
+    size_t dex_files_size = 0;
+    for (const auto* dex_file : dex_files) {
+      dex_files_size += dex_file->GetHeader().file_size_;
     }
-    return false;
+    return dex_files_size >= very_large_threshold_;
   }
 
-  // Appends to opened_dex_files any elements of class_path that dex_files
-  // doesn't already contain. This will open those dex files as necessary.
-  static void OpenClassPathFiles(const std::string& class_path,
-                                 std::vector<const DexFile*> dex_files,
-                                 std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
-    DCHECK(opened_dex_files != nullptr) << "OpenClassPathFiles out-param is nullptr";
+  template <typename T>
+  static std::vector<T*> MakeNonOwningPointerVector(const std::vector<std::unique_ptr<T>>& src) {
+    std::vector<T*> result;
+    result.reserve(src.size());
+    for (const std::unique_ptr<T>& t : src) {
+      result.push_back(t.get());
+    }
+    return result;
+  }
+
+  std::string GetMultiImageBootClassPath() {
+    DCHECK(IsBootImage());
+    DCHECK_GT(oat_filenames_.size(), 1u);
+    // If the image filename was adapted (e.g., for our tests), we need to change this here,
+    // too, but need to strip all path components (they will be re-established when loading).
+    std::ostringstream bootcp_oss;
+    bool first_bootcp = true;
+    for (size_t i = 0; i < dex_locations_.size(); ++i) {
+      if (!first_bootcp) {
+        bootcp_oss << ":";
+      }
+
+      std::string dex_loc = dex_locations_[i];
+      std::string image_filename = image_filenames_[i];
+
+      // Use the dex_loc path, but the image_filename name (without path elements).
+      size_t dex_last_slash = dex_loc.rfind('/');
+
+      // npos is max(size_t). That makes this a bit ugly.
+      size_t image_last_slash = image_filename.rfind('/');
+      size_t image_last_at = image_filename.rfind('@');
+      size_t image_last_sep = (image_last_slash == std::string::npos)
+                                  ? image_last_at
+                                  : (image_last_at == std::string::npos)
+                                        ? std::string::npos
+                                        : std::max(image_last_slash, image_last_at);
+      // Note: whenever image_last_sep == npos, +1 overflow means using the full string.
+
+      if (dex_last_slash == std::string::npos) {
+        dex_loc = image_filename.substr(image_last_sep + 1);
+      } else {
+        dex_loc = dex_loc.substr(0, dex_last_slash + 1) +
+            image_filename.substr(image_last_sep + 1);
+      }
+
+      // Image filenames already end with .art, no need to replace.
+
+      bootcp_oss << dex_loc;
+      first_bootcp = false;
+    }
+    return bootcp_oss.str();
+  }
+
+  std::vector<std::string> GetClassPathLocations(const std::string& class_path) {
+    // This function is used only for apps and for an app we have exactly one oat file.
+    DCHECK(!IsBootImage());
+    DCHECK_EQ(oat_writers_.size(), 1u);
+    std::vector<std::string> dex_files_canonical_locations;
+    for (const char* location : oat_writers_[0]->GetSourceLocations()) {
+      dex_files_canonical_locations.push_back(DexFile::GetDexCanonicalLocation(location));
+    }
+
     std::vector<std::string> parsed;
     Split(class_path, ':', &parsed);
-    // Take Locks::mutator_lock_ so that lock ordering on the ClassLinker::dex_lock_ is maintained.
-    ScopedObjectAccess soa(Thread::Current());
-    for (size_t i = 0; i < parsed.size(); ++i) {
-      if (DexFilesContains(dex_files, parsed[i])) {
-        continue;
+    auto kept_it = std::remove_if(parsed.begin(),
+                                  parsed.end(),
+                                  [dex_files_canonical_locations](const std::string& location) {
+      return ContainsElement(dex_files_canonical_locations,
+                             DexFile::GetDexCanonicalLocation(location.c_str()));
+    });
+    parsed.erase(kept_it, parsed.end());
+    return parsed;
+  }
+
+  // Opens requested class path files and appends them to opened_dex_files. If the dex files have
+  // been stripped, this opens them from their oat files and appends them to opened_oat_files.
+  static void OpenClassPathFiles(const std::vector<std::string>& class_path_locations,
+                                 std::vector<std::unique_ptr<const DexFile>>* opened_dex_files,
+                                 std::vector<std::unique_ptr<OatFile>>* opened_oat_files,
+                                 InstructionSet isa) {
+    DCHECK(opened_dex_files != nullptr) << "OpenClassPathFiles dex out-param is nullptr";
+    DCHECK(opened_oat_files != nullptr) << "OpenClassPathFiles oat out-param is nullptr";
+    for (const std::string& location : class_path_locations) {
+      // Stop early if we detect the special shared library, which may be passed as the classpath
+      // for dex2oat when we want to skip the shared libraries check.
+      if (location == OatFile::kSpecialSharedLibrary) {
+        break;
       }
+      static constexpr bool kVerifyChecksum = true;
       std::string error_msg;
-      if (!DexFile::Open(parsed[i].c_str(), parsed[i].c_str(), &error_msg, opened_dex_files)) {
-        LOG(WARNING) << "Failed to open dex file '" << parsed[i] << "': " << error_msg;
+      if (!DexFile::Open(
+          location.c_str(), location.c_str(), kVerifyChecksum, &error_msg, opened_dex_files)) {
+        // If we fail to open the dex file because it's been stripped, try to open the dex file
+        // from its corresponding oat file.
+        OatFileAssistant oat_file_assistant(location.c_str(), isa, false);
+        std::unique_ptr<OatFile> oat_file(oat_file_assistant.GetBestOatFile());
+        if (oat_file == nullptr) {
+          LOG(WARNING) << "Failed to open dex file and associated oat file for '" << location
+                       << "': " << error_msg;
+        } else {
+          std::vector<std::unique_ptr<const DexFile>> oat_dex_files =
+              oat_file_assistant.LoadDexFiles(*oat_file, location.c_str());
+          opened_oat_files->push_back(std::move(oat_file));
+          opened_dex_files->insert(opened_dex_files->end(),
+                                   std::make_move_iterator(oat_dex_files.begin()),
+                                   std::make_move_iterator(oat_dex_files.end()));
+        }
       }
     }
   }
 
+  bool PrepareImageClasses() {
+    // If --image-classes was specified, calculate the full list of classes to include in the image.
+    if (image_classes_filename_ != nullptr) {
+      image_classes_ =
+          ReadClasses(image_classes_zip_filename_, image_classes_filename_, "image");
+      if (image_classes_ == nullptr) {
+        return false;
+      }
+    } else if (IsBootImage()) {
+      image_classes_.reset(new std::unordered_set<std::string>);
+    }
+    return true;
+  }
+
+  bool PrepareCompiledClasses() {
+    // If --compiled-classes was specified, calculate the full list of classes to compile in the
+    // image.
+    if (compiled_classes_filename_ != nullptr) {
+      compiled_classes_ =
+          ReadClasses(compiled_classes_zip_filename_, compiled_classes_filename_, "compiled");
+      if (compiled_classes_ == nullptr) {
+        return false;
+      }
+    } else {
+      compiled_classes_.reset(nullptr);  // By default compile everything.
+    }
+    return true;
+  }
+
+  static std::unique_ptr<std::unordered_set<std::string>> ReadClasses(const char* zip_filename,
+                                                                      const char* classes_filename,
+                                                                      const char* tag) {
+    std::unique_ptr<std::unordered_set<std::string>> classes;
+    std::string error_msg;
+    if (zip_filename != nullptr) {
+      classes.reset(ReadImageClassesFromZip(zip_filename, classes_filename, &error_msg));
+    } else {
+      classes.reset(ReadImageClassesFromFile(classes_filename));
+    }
+    if (classes == nullptr) {
+      LOG(ERROR) << "Failed to create list of " << tag << " classes from '"
+                 << classes_filename << "': " << error_msg;
+    }
+    return classes;
+  }
+
+  bool PrepareCompiledMethods() {
+    // If --compiled-methods was specified, read the methods to compile from the given file(s).
+    if (compiled_methods_filename_ != nullptr) {
+      std::string error_msg;
+      if (compiled_methods_zip_filename_ != nullptr) {
+        compiled_methods_.reset(ReadCommentedInputFromZip<std::unordered_set<std::string>>(
+            compiled_methods_zip_filename_,
+            compiled_methods_filename_,
+            nullptr,            // No post-processing.
+            &error_msg));
+      } else {
+        compiled_methods_.reset(ReadCommentedInputFromFile<std::unordered_set<std::string>>(
+            compiled_methods_filename_,
+            nullptr));          // No post-processing.
+      }
+      if (compiled_methods_.get() == nullptr) {
+        LOG(ERROR) << "Failed to create list of compiled methods from '"
+            << compiled_methods_filename_ << "': " << error_msg;
+        return false;
+      }
+    } else {
+      compiled_methods_.reset(nullptr);  // By default compile everything.
+    }
+    return true;
+  }
+
+  void PruneNonExistentDexFiles() {
+    DCHECK_EQ(dex_filenames_.size(), dex_locations_.size());
+    size_t kept = 0u;
+    for (size_t i = 0, size = dex_filenames_.size(); i != size; ++i) {
+      if (!OS::FileExists(dex_filenames_[i])) {
+        LOG(WARNING) << "Skipping non-existent dex file '" << dex_filenames_[i] << "'";
+      } else {
+        dex_filenames_[kept] = dex_filenames_[i];
+        dex_locations_[kept] = dex_locations_[i];
+        ++kept;
+      }
+    }
+    dex_filenames_.resize(kept);
+    dex_locations_.resize(kept);
+  }
+
+  bool AddDexFileSources() {
+    TimingLogger::ScopedTiming t2("AddDexFileSources", timings_);
+    if (zip_fd_ != -1) {
+      DCHECK_EQ(oat_writers_.size(), 1u);
+      if (!oat_writers_[0]->AddZippedDexFilesSource(File(zip_fd_, /* check_usage */ false),
+                                                    zip_location_.c_str())) {
+        return false;
+      }
+    } else if (oat_writers_.size() > 1u) {
+      // Multi-image.
+      DCHECK_EQ(oat_writers_.size(), dex_filenames_.size());
+      DCHECK_EQ(oat_writers_.size(), dex_locations_.size());
+      for (size_t i = 0, size = oat_writers_.size(); i != size; ++i) {
+        if (!oat_writers_[i]->AddDexFileSource(dex_filenames_[i], dex_locations_[i])) {
+          return false;
+        }
+      }
+    } else {
+      DCHECK_EQ(oat_writers_.size(), 1u);
+      DCHECK_EQ(dex_filenames_.size(), dex_locations_.size());
+      DCHECK_NE(dex_filenames_.size(), 0u);
+      for (size_t i = 0; i != dex_filenames_.size(); ++i) {
+        if (!oat_writers_[0]->AddDexFileSource(dex_filenames_[i], dex_locations_[i])) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  void CreateOatWriters() {
+    TimingLogger::ScopedTiming t2("CreateOatWriters", timings_);
+    elf_writers_.reserve(oat_files_.size());
+    oat_writers_.reserve(oat_files_.size());
+    for (const std::unique_ptr<File>& oat_file : oat_files_) {
+      elf_writers_.emplace_back(CreateElfWriterQuick(instruction_set_,
+                                                     instruction_set_features_.get(),
+                                                     compiler_options_.get(),
+                                                     oat_file.get()));
+      elf_writers_.back()->Start();
+      oat_writers_.emplace_back(new OatWriter(IsBootImage(), timings_));
+    }
+  }
+
+  void SaveDexInput() {
+    for (size_t i = 0; i < dex_files_.size(); ++i) {
+      const DexFile* dex_file = dex_files_[i];
+      std::string tmp_file_name(StringPrintf("/data/local/tmp/dex2oat.%d.%zd.dex",
+                                             getpid(), i));
+      std::unique_ptr<File> tmp_file(OS::CreateEmptyFile(tmp_file_name.c_str()));
+      if (tmp_file.get() == nullptr) {
+        PLOG(ERROR) << "Failed to open file " << tmp_file_name
+            << ". Try: adb shell chmod 777 /data/local/tmp";
+        continue;
+      }
+      // This is just dumping files for debugging. Ignore errors, and leave remnants.
+      UNUSED(tmp_file->WriteFully(dex_file->Begin(), dex_file->Size()));
+      UNUSED(tmp_file->Flush());
+      UNUSED(tmp_file->Close());
+      LOG(INFO) << "Wrote input to " << tmp_file_name;
+    }
+  }
+
+  bool PrepareRuntimeOptions(RuntimeArgumentMap* runtime_options) {
+    RuntimeOptions raw_options;
+    if (boot_image_filename_.empty()) {
+      std::string boot_class_path = "-Xbootclasspath:";
+      boot_class_path += Join(dex_filenames_, ':');
+      raw_options.push_back(std::make_pair(boot_class_path, nullptr));
+      std::string boot_class_path_locations = "-Xbootclasspath-locations:";
+      boot_class_path_locations += Join(dex_locations_, ':');
+      raw_options.push_back(std::make_pair(boot_class_path_locations, nullptr));
+    } else {
+      std::string boot_image_option = "-Ximage:";
+      boot_image_option += boot_image_filename_;
+      raw_options.push_back(std::make_pair(boot_image_option, nullptr));
+    }
+    for (size_t i = 0; i < runtime_args_.size(); i++) {
+      raw_options.push_back(std::make_pair(runtime_args_[i], nullptr));
+    }
+
+    raw_options.push_back(std::make_pair("compilercallbacks", callbacks_.get()));
+    raw_options.push_back(
+        std::make_pair("imageinstructionset", GetInstructionSetString(instruction_set_)));
+
+    // Only allow no boot image for the runtime if we're compiling one. When we compile an app,
+    // we don't want fallback mode, it will abort as we do not push a boot classpath (it might
+    // have been stripped in preopting, anyways).
+    if (!IsBootImage()) {
+      raw_options.push_back(std::make_pair("-Xno-dex-file-fallback", nullptr));
+    }
+    // Disable libsigchain. We don't don't need it during compilation and it prevents us
+    // from getting a statically linked version of dex2oat (because of dlsym and RTLD_NEXT).
+    raw_options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
+    // Disable Hspace compaction to save heap size virtual space.
+    // Only need disable Hspace for OOM becasue background collector is equal to
+    // foreground collector by default for dex2oat.
+    raw_options.push_back(std::make_pair("-XX:DisableHSpaceCompactForOOM", nullptr));
+
+    // If we're asked to be deterministic, ensure non-concurrent GC for determinism. Also
+    // force the free-list implementation for large objects.
+    if (compiler_options_->IsForceDeterminism()) {
+      raw_options.push_back(std::make_pair("-Xgc:nonconcurrent", nullptr));
+      raw_options.push_back(std::make_pair("-XX:LargeObjectSpace=freelist", nullptr));
+
+      // We also need to turn off the nonmoving space. For that, we need to disable HSpace
+      // compaction (done above) and ensure that neither foreground nor background collectors
+      // are concurrent.
+      raw_options.push_back(std::make_pair("-XX:BackgroundGC=nonconcurrent", nullptr));
+
+      // To make identity hashcode deterministic, set a known seed.
+      mirror::Object::SetHashCodeSeed(987654321U);
+    }
+
+    if (!Runtime::ParseOptions(raw_options, false, runtime_options)) {
+      LOG(ERROR) << "Failed to parse runtime options";
+      return false;
+    }
+    return true;
+  }
+
   // Create a runtime necessary for compilation.
-  bool CreateRuntime(const RuntimeOptions& runtime_options)
-      SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_) {
-    if (!Runtime::Create(runtime_options, false)) {
+  bool CreateRuntime(RuntimeArgumentMap&& runtime_options) {
+    TimingLogger::ScopedTiming t_runtime("Create runtime", timings_);
+    if (!Runtime::Create(std::move(runtime_options))) {
       LOG(ERROR) << "Failed to create runtime";
       return false;
     }
@@ -1800,43 +2300,57 @@
 
     runtime_->GetClassLinker()->RunRootClinits();
 
+    // Runtime::Create acquired the mutator_lock_ that is normally given away when we
+    // Runtime::Start, give it away now so that we don't starve GC.
+    Thread* self = Thread::Current();
+    self->TransitionFromRunnableToSuspended(kNative);
+
     return true;
   }
 
-  void PrepareImageWriter(uintptr_t image_base) {
-    image_writer_.reset(new ImageWriter(*driver_, image_base, compiler_options_->GetCompilePic()));
-  }
-
-  // Let the ImageWriter write the image file. If we do not compile PIC, also fix up the oat file.
+  // Let the ImageWriter write the image files. If we do not compile PIC, also fix up the oat files.
   bool CreateImageFile()
       REQUIRES(!Locks::mutator_lock_) {
     CHECK(image_writer_ != nullptr);
-    if (!image_writer_->Write(image_filename_, oat_unstripped_, oat_location_)) {
-      LOG(ERROR) << "Failed to create image file " << image_filename_;
+    if (!IsBootImage()) {
+      CHECK(image_filenames_.empty());
+      image_filenames_.push_back(app_image_file_name_.c_str());
+    }
+    if (!image_writer_->Write(app_image_fd_,
+                              image_filenames_,
+                              oat_filenames_)) {
+      LOG(ERROR) << "Failure during image file creation";
       return false;
     }
-    uintptr_t oat_data_begin = image_writer_->GetOatDataBegin();
 
+    // We need the OatDataBegin entries.
+    dchecked_vector<uintptr_t> oat_data_begins;
+    for (size_t i = 0, size = oat_filenames_.size(); i != size; ++i) {
+      oat_data_begins.push_back(image_writer_->GetOatDataBegin(i));
+    }
     // Destroy ImageWriter before doing FixupElf.
     image_writer_.reset();
 
-    // Do not fix up the ELF file if we are --compile-pic
-    if (!compiler_options_->GetCompilePic()) {
-      std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_unstripped_.c_str()));
-      if (oat_file.get() == nullptr) {
-        PLOG(ERROR) << "Failed to open ELF file: " << oat_unstripped_;
-        return false;
-      }
+    for (size_t i = 0, size = oat_filenames_.size(); i != size; ++i) {
+      const char* oat_filename = oat_filenames_[i];
+      // Do not fix up the ELF file if we are --compile-pic or compiling the app image
+      if (!compiler_options_->GetCompilePic() && IsBootImage()) {
+        std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename));
+        if (oat_file.get() == nullptr) {
+          PLOG(ERROR) << "Failed to open ELF file: " << oat_filename;
+          return false;
+        }
 
-      if (!ElfWriter::Fixup(oat_file.get(), oat_data_begin)) {
-        oat_file->Erase();
-        LOG(ERROR) << "Failed to fixup ELF file " << oat_file->GetPath();
-        return false;
-      }
+        if (!ElfWriter::Fixup(oat_file.get(), oat_data_begins[i])) {
+          oat_file->Erase();
+          LOG(ERROR) << "Failed to fixup ELF file " << oat_file->GetPath();
+          return false;
+        }
 
-      if (oat_file->FlushCloseOrErase()) {
-        PLOG(ERROR) << "Failed to flush and close fixed ELF file " << oat_file->GetPath();
-        return false;
+        if (oat_file->FlushCloseOrErase()) {
+          PLOG(ERROR) << "Failed to flush and close fixed ELF file " << oat_file->GetPath();
+          return false;
+        }
       }
     }
 
@@ -1847,7 +2361,8 @@
   static std::unordered_set<std::string>* ReadImageClassesFromFile(
       const char* image_classes_filename) {
     std::function<std::string(const char*)> process = DotToDescriptor;
-    return ReadCommentedInputFromFile(image_classes_filename, &process);
+    return ReadCommentedInputFromFile<std::unordered_set<std::string>>(image_classes_filename,
+                                                                       &process);
   }
 
   // Reads the class names (java.lang.Object) and returns a set of descriptors (Ljava/lang/Object;)
@@ -1856,27 +2371,32 @@
         const char* image_classes_filename,
         std::string* error_msg) {
     std::function<std::string(const char*)> process = DotToDescriptor;
-    return ReadCommentedInputFromZip(zip_filename, image_classes_filename, &process, error_msg);
+    return ReadCommentedInputFromZip<std::unordered_set<std::string>>(zip_filename,
+                                                                      image_classes_filename,
+                                                                      &process,
+                                                                      error_msg);
   }
 
   // Read lines from the given file, dropping comments and empty lines. Post-process each line with
   // the given function.
-  static std::unordered_set<std::string>* ReadCommentedInputFromFile(
+  template <typename T>
+  static T* ReadCommentedInputFromFile(
       const char* input_filename, std::function<std::string(const char*)>* process) {
     std::unique_ptr<std::ifstream> input_file(new std::ifstream(input_filename, std::ifstream::in));
     if (input_file.get() == nullptr) {
       LOG(ERROR) << "Failed to open input file " << input_filename;
       return nullptr;
     }
-    std::unique_ptr<std::unordered_set<std::string>> result(
-        ReadCommentedInputStream(*input_file, process));
+    std::unique_ptr<T> result(
+        ReadCommentedInputStream<T>(*input_file, process));
     input_file->close();
     return result.release();
   }
 
   // Read lines from the given file from the given zip file, dropping comments and empty lines.
   // Post-process each line with the given function.
-  static std::unordered_set<std::string>* ReadCommentedInputFromZip(
+  template <typename T>
+  static T* ReadCommentedInputFromZip(
       const char* zip_filename,
       const char* input_filename,
       std::function<std::string(const char*)>* process,
@@ -1902,16 +2422,16 @@
     const std::string input_string(reinterpret_cast<char*>(input_file->Begin()),
                                    input_file->Size());
     std::istringstream input_stream(input_string);
-    return ReadCommentedInputStream(input_stream, process);
+    return ReadCommentedInputStream<T>(input_stream, process);
   }
 
   // Read lines from the given stream, dropping comments and empty lines. Post-process each line
   // with the given function.
-  static std::unordered_set<std::string>* ReadCommentedInputStream(
+  template <typename T>
+  static T* ReadCommentedInputStream(
       std::istream& in_stream,
       std::function<std::string(const char*)>* process) {
-    std::unique_ptr<std::unordered_set<std::string>> image_classes(
-        new std::unordered_set<std::string>);
+    std::unique_ptr<T> output(new T());
     while (in_stream.good()) {
       std::string dot;
       std::getline(in_stream, dot);
@@ -1920,12 +2440,12 @@
       }
       if (process != nullptr) {
         std::string descriptor((*process)(dot.c_str()));
-        image_classes->insert(descriptor);
+        output->insert(output->end(), descriptor);
       } else {
-        image_classes->insert(dot);
+        output->insert(output->end(), dot);
       }
     }
-    return image_classes.release();
+    return output.release();
   }
 
   void LogCompletionTime() {
@@ -1939,12 +2459,34 @@
                   "");
   }
 
+  std::string StripIsaFrom(const char* image_filename, InstructionSet isa) {
+    std::string res(image_filename);
+    size_t last_slash = res.rfind('/');
+    if (last_slash == std::string::npos || last_slash == 0) {
+      return res;
+    }
+    size_t penultimate_slash = res.rfind('/', last_slash - 1);
+    if (penultimate_slash == std::string::npos) {
+      return res;
+    }
+    // Check that the string in-between is the expected one.
+    if (res.substr(penultimate_slash + 1, last_slash - penultimate_slash - 1) !=
+            GetInstructionSetString(isa)) {
+      LOG(WARNING) << "Unexpected string when trying to strip isa: " << res;
+      return res;
+    }
+    return res.substr(0, penultimate_slash) + res.substr(last_slash);
+  }
+
   std::unique_ptr<CompilerOptions> compiler_options_;
   Compiler::Kind compiler_kind_;
 
   InstructionSet instruction_set_;
   std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
 
+  uint32_t image_file_location_oat_checksum_;
+  uintptr_t image_file_location_oat_data_begin_;
+  int32_t image_patch_delta_;
   std::unique_ptr<SafeMap<std::string, std::string> > key_value_store_;
 
   std::unique_ptr<VerificationResults> verification_results_;
@@ -1952,60 +2494,87 @@
   DexFileToMethodInlinerMap method_inliner_map_;
   std::unique_ptr<QuickCompilerCallbacks> callbacks_;
 
+  std::unique_ptr<Runtime> runtime_;
+
   // Ownership for the class path files.
   std::vector<std::unique_ptr<const DexFile>> class_path_files_;
 
-  std::unique_ptr<Runtime> runtime_;
-
   size_t thread_count_;
   uint64_t start_ns_;
   std::unique_ptr<WatchDog> watchdog_;
-  std::unique_ptr<File> oat_file_;
-  std::string oat_stripped_;
-  std::string oat_unstripped_;
+  std::vector<std::unique_ptr<File>> oat_files_;
   std::string oat_location_;
-  std::string oat_filename_;
+  std::vector<const char*> oat_filenames_;
+  std::vector<const char*> oat_unstripped_;
   int oat_fd_;
   std::vector<const char*> dex_filenames_;
   std::vector<const char*> dex_locations_;
   int zip_fd_;
   std::string zip_location_;
-  std::string boot_image_option_;
+  std::string boot_image_filename_;
   std::vector<const char*> runtime_args_;
-  std::string image_filename_;
+  std::vector<const char*> image_filenames_;
   uintptr_t image_base_;
   const char* image_classes_zip_filename_;
   const char* image_classes_filename_;
+  ImageHeader::StorageMode image_storage_mode_;
   const char* compiled_classes_zip_filename_;
   const char* compiled_classes_filename_;
   const char* compiled_methods_zip_filename_;
   const char* compiled_methods_filename_;
+  const char* passes_to_run_filename_;
   std::unique_ptr<std::unordered_set<std::string>> image_classes_;
   std::unique_ptr<std::unordered_set<std::string>> compiled_classes_;
   std::unique_ptr<std::unordered_set<std::string>> compiled_methods_;
-  bool image_;
+  std::unique_ptr<std::vector<std::string>> passes_to_run_;
+  bool app_image_;
+  bool boot_image_;
+  bool multi_image_;
   bool is_host_;
   std::string android_root_;
+  // Dex files we are compiling, does not include the class path dex files.
   std::vector<const DexFile*> dex_files_;
+  std::string no_inline_from_string_;
   std::vector<jobject> dex_caches_;
-  std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
+  jobject class_loader_;
 
+  std::vector<std::unique_ptr<ElfWriter>> elf_writers_;
+  std::vector<std::unique_ptr<OatWriter>> oat_writers_;
+  std::vector<OutputStream*> rodata_;
   std::unique_ptr<ImageWriter> image_writer_;
   std::unique_ptr<CompilerDriver> driver_;
 
+  std::vector<std::unique_ptr<MemMap>> opened_dex_files_maps_;
+  std::vector<std::unique_ptr<OatFile>> opened_oat_files_;
+  std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
+
+  std::vector<const DexFile*> no_inline_from_dex_files_;
+
   std::vector<std::string> verbose_methods_;
   bool dump_stats_;
   bool dump_passes_;
   bool dump_timing_;
   bool dump_slow_timing_;
-  std::string dump_cfg_file_name_;
-  bool dump_cfg_append_;
   std::string swap_file_name_;
   int swap_fd_;
-  std::string profile_file_;  // Profile file to use
+  size_t min_dex_files_for_swap_ = kDefaultMinDexFilesForSwap;
+  size_t min_dex_file_cumulative_size_for_swap_ = kDefaultMinDexFileCumulativeSizeForSwap;
+  size_t very_large_threshold_ = std::numeric_limits<size_t>::max();
+  std::string app_image_file_name_;
+  int app_image_fd_;
+  std::string profile_file_;
+  int profile_file_fd_;
+  std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_;
   TimingLogger* timings_;
   std::unique_ptr<CumulativeLogger> compiler_phases_timings_;
-  std::unique_ptr<std::ostream> init_failure_output_;
+  std::vector<std::vector<const DexFile*>> dex_files_per_oat_file_;
+  std::unordered_map<const DexFile*, size_t> dex_file_oat_index_map_;
+
+  // Backing storage.
+  std::vector<std::string> char_backing_storage_;
+
+  // See CompilerOptions.force_determinism_.
+  bool force_determinism_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(Dex2Oat);
 };
@@ -2031,21 +2600,26 @@
 }
 
 static int CompileImage(Dex2Oat& dex2oat) {
+  dex2oat.LoadClassProfileDescriptors();
   dex2oat.Compile();
 
-  // Create the boot.oat.
-  if (!dex2oat.CreateOatFile()) {
-    dex2oat.EraseOatFile();
+  if (!dex2oat.WriteOatFiles()) {
+    dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
 
-  // Flush and close the boot.oat. We always expect the output file by name, and it will be
-  // re-opened from the unstripped name.
-  if (!dex2oat.FlushCloseOatFile()) {
+  // Flush boot.oat. We always expect the output file by name, and it will be re-opened from the
+  // unstripped name. Do not close the file if we are compiling the image with an oat fd since the
+  // image writer will require this fd to generate the image.
+  if (dex2oat.ShouldKeepOatFileOpen()) {
+    if (!dex2oat.FlushOatFiles()) {
+      return EXIT_FAILURE;
+    }
+  } else if (!dex2oat.FlushCloseOatFiles()) {
     return EXIT_FAILURE;
   }
 
-  // Creates the boot.art and patches the boot.oat.
+  // Creates the boot.art and patches the oat files.
   if (!dex2oat.HandleImage()) {
     return EXIT_FAILURE;
   }
@@ -2056,13 +2630,13 @@
     return EXIT_SUCCESS;
   }
 
-  // Copy unstripped to stripped location, if necessary.
-  if (!dex2oat.CopyUnstrippedToStripped()) {
+  // Copy stripped to unstripped location, if necessary.
+  if (!dex2oat.CopyStrippedToUnstripped()) {
     return EXIT_FAILURE;
   }
 
-  // FlushClose again, as stripping might have re-opened the oat file.
-  if (!dex2oat.FlushCloseOatFile()) {
+  // FlushClose again, as stripping might have re-opened the oat files.
+  if (!dex2oat.FlushCloseOatFiles()) {
     return EXIT_FAILURE;
   }
 
@@ -2073,21 +2647,17 @@
 static int CompileApp(Dex2Oat& dex2oat) {
   dex2oat.Compile();
 
-  // Create the app oat.
-  if (!dex2oat.CreateOatFile()) {
-    dex2oat.EraseOatFile();
+  if (!dex2oat.WriteOatFiles()) {
+    dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
 
-  // Do not close the oat file here. We might haven gotten the output file by file descriptor,
+  // Do not close the oat files here. We might have gotten the output file by file descriptor,
   // which we would lose.
-  if (!dex2oat.FlushOatFile()) {
-    return EXIT_FAILURE;
-  }
 
   // When given --host, finish early without stripping.
   if (dex2oat.IsHost()) {
-    if (!dex2oat.FlushCloseOatFile()) {
+    if (!dex2oat.FlushCloseOatFiles()) {
       return EXIT_FAILURE;
     }
 
@@ -2095,14 +2665,14 @@
     return EXIT_SUCCESS;
   }
 
-  // Copy unstripped to stripped location, if necessary. This will implicitly flush & close the
-  // unstripped version. If this is given, we expect to be able to open writable files by name.
-  if (!dex2oat.CopyUnstrippedToStripped()) {
+  // Copy stripped to unstripped location, if necessary. This will implicitly flush & close the
+  // stripped versions. If this is given, we expect to be able to open writable files by name.
+  if (!dex2oat.CopyStrippedToUnstripped()) {
     return EXIT_FAILURE;
   }
 
-  // Flush and close the file.
-  if (!dex2oat.FlushCloseOatFile()) {
+  // Flush and close the files.
+  if (!dex2oat.FlushCloseOatFiles()) {
     return EXIT_FAILURE;
   }
 
@@ -2115,13 +2685,26 @@
 
   TimingLogger timings("compiler", false, false);
 
-  Dex2Oat dex2oat(&timings);
+  // Allocate `dex2oat` on the heap instead of on the stack, as Clang
+  // might produce a stack frame too large for this function or for
+  // functions inlining it (such as main), that would not fit the
+  // requirements of the `-Wframe-larger-than` option.
+  std::unique_ptr<Dex2Oat> dex2oat = MakeUnique<Dex2Oat>(&timings);
 
   // Parse arguments. Argument mistakes will lead to exit(EXIT_FAILURE) in UsageError.
-  dex2oat.ParseArgs(argc, argv);
+  dex2oat->ParseArgs(argc, argv);
+
+  // If needed, process profile information for profile guided compilation.
+  // This operation involves I/O.
+  if (dex2oat->UseProfileGuidedCompilation()) {
+    if (!dex2oat->LoadProfile()) {
+      LOG(ERROR) << "Failed to process profile file";
+      return EXIT_FAILURE;
+    }
+  }
 
   // Check early that the result of compilation can be written
-  if (!dex2oat.OpenFile()) {
+  if (!dex2oat->OpenFile()) {
     return EXIT_FAILURE;
   }
 
@@ -2131,25 +2714,25 @@
   //   3) Compiling with --host
   //   4) Compiling on the host (not a target build)
   // Otherwise, print a stripped command line.
-  if (kIsDebugBuild || dex2oat.IsImage() || dex2oat.IsHost() || !kIsTargetBuild) {
+  if (kIsDebugBuild || dex2oat->IsBootImage() || dex2oat->IsHost() || !kIsTargetBuild) {
     LOG(INFO) << CommandLine();
   } else {
     LOG(INFO) << StrippedCommandLine();
   }
 
-  if (!dex2oat.Setup()) {
-    dex2oat.EraseOatFile();
+  if (!dex2oat->Setup()) {
+    dex2oat->EraseOatFiles();
     return EXIT_FAILURE;
   }
 
   bool result;
-  if (dex2oat.IsImage()) {
-    result = CompileImage(dex2oat);
+  if (dex2oat->IsImage()) {
+    result = CompileImage(*dex2oat);
   } else {
-    result = CompileApp(dex2oat);
+    result = CompileApp(*dex2oat);
   }
 
-  dex2oat.Shutdown();
+  dex2oat->Shutdown();
   return result;
 }
 }  // namespace art
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
new file mode 100644
index 0000000..58dd047
--- /dev/null
+++ b/dex2oat/dex2oat_test.cc
@@ -0,0 +1,556 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <regex>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "common_runtime_test.h"
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/stringprintf.h"
+#include "dex2oat_environment_test.h"
+#include "oat.h"
+#include "oat_file.h"
+#include "utils.h"
+
+#include <sys/wait.h>
+#include <unistd.h>
+
+namespace art {
+
+class Dex2oatTest : public Dex2oatEnvironmentTest {
+ public:
+  virtual void TearDown() OVERRIDE {
+    Dex2oatEnvironmentTest::TearDown();
+
+    output_ = "";
+    error_msg_ = "";
+    success_ = false;
+  }
+
+ protected:
+  void GenerateOdexForTest(const std::string& dex_location,
+                           const std::string& odex_location,
+                           CompilerFilter::Filter filter,
+                           const std::vector<std::string>& extra_args = {},
+                           bool expect_success = true) {
+    std::vector<std::string> args;
+    args.push_back("--dex-file=" + dex_location);
+    args.push_back("--oat-file=" + odex_location);
+    args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
+    args.push_back("--runtime-arg");
+    args.push_back("-Xnorelocate");
+
+    args.insert(args.end(), extra_args.begin(), extra_args.end());
+
+    std::string error_msg;
+    bool success = Dex2Oat(args, &error_msg);
+
+    if (expect_success) {
+      ASSERT_TRUE(success) << error_msg;
+
+      // Verify the odex file was generated as expected.
+      std::unique_ptr<OatFile> odex_file(OatFile::Open(odex_location.c_str(),
+                                                       odex_location.c_str(),
+                                                       nullptr,
+                                                       nullptr,
+                                                       false,
+                                                       /*low_4gb*/false,
+                                                       dex_location.c_str(),
+                                                       &error_msg));
+      ASSERT_TRUE(odex_file.get() != nullptr) << error_msg;
+
+      CheckFilter(filter, odex_file->GetCompilerFilter());
+    } else {
+      ASSERT_FALSE(success) << output_;
+
+      error_msg_ = error_msg;
+
+      // Verify there's no loadable odex file.
+      std::unique_ptr<OatFile> odex_file(OatFile::Open(odex_location.c_str(),
+                                                       odex_location.c_str(),
+                                                       nullptr,
+                                                       nullptr,
+                                                       false,
+                                                       /*low_4gb*/false,
+                                                       dex_location.c_str(),
+                                                       &error_msg));
+      ASSERT_TRUE(odex_file.get() == nullptr);
+    }
+  }
+
+  // Check the input compiler filter against the generated oat file's filter. Mayb be overridden
+  // in subclasses when equality is not expected.
+  virtual void CheckFilter(CompilerFilter::Filter expected, CompilerFilter::Filter actual) {
+    EXPECT_EQ(expected, actual);
+  }
+
+  bool Dex2Oat(const std::vector<std::string>& dex2oat_args, std::string* error_msg) {
+    Runtime* runtime = Runtime::Current();
+
+    const std::vector<gc::space::ImageSpace*>& image_spaces =
+        runtime->GetHeap()->GetBootImageSpaces();
+    if (image_spaces.empty()) {
+      *error_msg = "No image location found for Dex2Oat.";
+      return false;
+    }
+    std::string image_location = image_spaces[0]->GetImageLocation();
+
+    std::vector<std::string> argv;
+    argv.push_back(runtime->GetCompilerExecutable());
+    argv.push_back("--runtime-arg");
+    argv.push_back("-classpath");
+    argv.push_back("--runtime-arg");
+    std::string class_path = runtime->GetClassPathString();
+    if (class_path == "") {
+      class_path = OatFile::kSpecialSharedLibrary;
+    }
+    argv.push_back(class_path);
+    if (runtime->IsDebuggable()) {
+      argv.push_back("--debuggable");
+    }
+    runtime->AddCurrentRuntimeFeaturesAsDex2OatArguments(&argv);
+
+    if (!runtime->IsVerificationEnabled()) {
+      argv.push_back("--compiler-filter=verify-none");
+    }
+
+    if (runtime->MustRelocateIfPossible()) {
+      argv.push_back("--runtime-arg");
+      argv.push_back("-Xrelocate");
+    } else {
+      argv.push_back("--runtime-arg");
+      argv.push_back("-Xnorelocate");
+    }
+
+    if (!kIsTargetBuild) {
+      argv.push_back("--host");
+    }
+
+    argv.push_back("--boot-image=" + image_location);
+
+    std::vector<std::string> compiler_options = runtime->GetCompilerOptions();
+    argv.insert(argv.end(), compiler_options.begin(), compiler_options.end());
+
+    argv.insert(argv.end(), dex2oat_args.begin(), dex2oat_args.end());
+
+    // We must set --android-root.
+    const char* android_root = getenv("ANDROID_ROOT");
+    CHECK(android_root != nullptr);
+    argv.push_back("--android-root=" + std::string(android_root));
+
+    int link[2];
+
+    if (pipe(link) == -1) {
+      return false;
+    }
+
+    pid_t pid = fork();
+    if (pid == -1) {
+      return false;
+    }
+
+    if (pid == 0) {
+      // We need dex2oat to actually log things.
+      setenv("ANDROID_LOG_TAGS", "*:d", 1);
+      dup2(link[1], STDERR_FILENO);
+      close(link[0]);
+      close(link[1]);
+      std::vector<const char*> c_args;
+      for (const std::string& str : argv) {
+        c_args.push_back(str.c_str());
+      }
+      c_args.push_back(nullptr);
+      execv(c_args[0], const_cast<char* const*>(c_args.data()));
+      exit(1);
+    } else {
+      close(link[1]);
+      char buffer[128];
+      memset(buffer, 0, 128);
+      ssize_t bytes_read = 0;
+
+      while (TEMP_FAILURE_RETRY(bytes_read = read(link[0], buffer, 128)) > 0) {
+        output_ += std::string(buffer, bytes_read);
+      }
+      close(link[0]);
+      int status = 0;
+      if (waitpid(pid, &status, 0) != -1) {
+        success_ = (status == 0);
+      }
+    }
+    return success_;
+  }
+
+  std::string output_ = "";
+  std::string error_msg_ = "";
+  bool success_ = false;
+};
+
+class Dex2oatSwapTest : public Dex2oatTest {
+ protected:
+  void RunTest(bool use_fd, bool expect_use, const std::vector<std::string>& extra_args = {}) {
+    std::string dex_location = GetScratchDir() + "/Dex2OatSwapTest.jar";
+    std::string odex_location = GetOdexDir() + "/Dex2OatSwapTest.odex";
+
+    Copy(GetTestDexFileName(), dex_location);
+
+    std::vector<std::string> copy(extra_args);
+
+    std::unique_ptr<ScratchFile> sf;
+    if (use_fd) {
+      sf.reset(new ScratchFile());
+      copy.push_back(StringPrintf("--swap-fd=%d", sf->GetFd()));
+    } else {
+      std::string swap_location = GetOdexDir() + "/Dex2OatSwapTest.odex.swap";
+      copy.push_back("--swap-file=" + swap_location);
+    }
+    GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed, copy);
+
+    CheckValidity();
+    ASSERT_TRUE(success_);
+    CheckResult(expect_use);
+  }
+
+  virtual std::string GetTestDexFileName() {
+    return GetDexSrc1();
+  }
+
+  virtual void CheckResult(bool expect_use) {
+    if (kIsTargetBuild) {
+      CheckTargetResult(expect_use);
+    } else {
+      CheckHostResult(expect_use);
+    }
+  }
+
+  virtual void CheckTargetResult(bool expect_use ATTRIBUTE_UNUSED) {
+    // TODO: Ignore for now, as we won't capture any output (it goes to the logcat). We may do
+    //       something for variants with file descriptor where we can control the lifetime of
+    //       the swap file and thus take a look at it.
+  }
+
+  virtual void CheckHostResult(bool expect_use) {
+    if (!kIsTargetBuild) {
+      if (expect_use) {
+        EXPECT_NE(output_.find("Large app, accepted running with swap."), std::string::npos)
+            << output_;
+      } else {
+        EXPECT_EQ(output_.find("Large app, accepted running with swap."), std::string::npos)
+            << output_;
+      }
+    }
+  }
+
+  // Check whether the dex2oat run was really successful.
+  virtual void CheckValidity() {
+    if (kIsTargetBuild) {
+      CheckTargetValidity();
+    } else {
+      CheckHostValidity();
+    }
+  }
+
+  virtual void CheckTargetValidity() {
+    // TODO: Ignore for now, as we won't capture any output (it goes to the logcat). We may do
+    //       something for variants with file descriptor where we can control the lifetime of
+    //       the swap file and thus take a look at it.
+  }
+
+  // On the host, we can get the dex2oat output. Here, look for "dex2oat took."
+  virtual void CheckHostValidity() {
+    EXPECT_NE(output_.find("dex2oat took"), std::string::npos) << output_;
+  }
+};
+
+TEST_F(Dex2oatSwapTest, DoNotUseSwapDefaultSingleSmall) {
+  RunTest(false /* use_fd */, false /* expect_use */);
+  RunTest(true /* use_fd */, false /* expect_use */);
+}
+
+TEST_F(Dex2oatSwapTest, DoNotUseSwapSingle) {
+  RunTest(false /* use_fd */, false /* expect_use */, { "--swap-dex-size-threshold=0" });
+  RunTest(true /* use_fd */, false /* expect_use */, { "--swap-dex-size-threshold=0" });
+}
+
+TEST_F(Dex2oatSwapTest, DoNotUseSwapSmall) {
+  RunTest(false /* use_fd */, false /* expect_use */, { "--swap-dex-count-threshold=0" });
+  RunTest(true /* use_fd */, false /* expect_use */, { "--swap-dex-count-threshold=0" });
+}
+
+TEST_F(Dex2oatSwapTest, DoUseSwapSingleSmall) {
+  RunTest(false /* use_fd */,
+          true /* expect_use */,
+          { "--swap-dex-size-threshold=0", "--swap-dex-count-threshold=0" });
+  RunTest(true /* use_fd */,
+          true /* expect_use */,
+          { "--swap-dex-size-threshold=0", "--swap-dex-count-threshold=0" });
+}
+
+class Dex2oatSwapUseTest : public Dex2oatSwapTest {
+ protected:
+  void CheckHostResult(bool expect_use) OVERRIDE {
+    if (!kIsTargetBuild) {
+      if (expect_use) {
+        EXPECT_NE(output_.find("Large app, accepted running with swap."), std::string::npos)
+            << output_;
+      } else {
+        EXPECT_EQ(output_.find("Large app, accepted running with swap."), std::string::npos)
+            << output_;
+      }
+    }
+  }
+
+  std::string GetTestDexFileName() OVERRIDE {
+    // Use Statics as it has a handful of functions.
+    return CommonRuntimeTest::GetTestDexFileName("Statics");
+  }
+
+  void GrabResult1() {
+    if (!kIsTargetBuild) {
+      native_alloc_1_ = ParseNativeAlloc();
+      swap_1_ = ParseSwap(false /* expected */);
+    } else {
+      native_alloc_1_ = std::numeric_limits<size_t>::max();
+      swap_1_ = 0;
+    }
+  }
+
+  void GrabResult2() {
+    if (!kIsTargetBuild) {
+      native_alloc_2_ = ParseNativeAlloc();
+      swap_2_ = ParseSwap(true /* expected */);
+    } else {
+      native_alloc_2_ = 0;
+      swap_2_ = std::numeric_limits<size_t>::max();
+    }
+  }
+
+ private:
+  size_t ParseNativeAlloc() {
+    std::regex native_alloc_regex("dex2oat took.*native alloc=[^ ]+ \\(([0-9]+)B\\)");
+    std::smatch native_alloc_match;
+    bool found = std::regex_search(output_, native_alloc_match, native_alloc_regex);
+    if (!found) {
+      EXPECT_TRUE(found);
+      return 0;
+    }
+    if (native_alloc_match.size() != 2U) {
+      EXPECT_EQ(native_alloc_match.size(), 2U);
+      return 0;
+    }
+
+    std::istringstream stream(native_alloc_match[1].str());
+    size_t value;
+    stream >> value;
+
+    return value;
+  }
+
+  size_t ParseSwap(bool expected) {
+    std::regex swap_regex("dex2oat took[^\\n]+swap=[^ ]+ \\(([0-9]+)B\\)");
+    std::smatch swap_match;
+    bool found = std::regex_search(output_, swap_match, swap_regex);
+    if (found != expected) {
+      EXPECT_EQ(expected, found);
+      return 0;
+    }
+
+    if (!found) {
+      return 0;
+    }
+
+    if (swap_match.size() != 2U) {
+      EXPECT_EQ(swap_match.size(), 2U);
+      return 0;
+    }
+
+    std::istringstream stream(swap_match[1].str());
+    size_t value;
+    stream >> value;
+
+    return value;
+  }
+
+ protected:
+  size_t native_alloc_1_;
+  size_t native_alloc_2_;
+
+  size_t swap_1_;
+  size_t swap_2_;
+};
+
+TEST_F(Dex2oatSwapUseTest, CheckSwapUsage) {
+  // The `native_alloc_2_ >= native_alloc_1_` assertion below may not
+  // hold true on some x86 systems when read barriers are enabled;
+  // disable this test while we investigate (b/29259363).
+  TEST_DISABLED_FOR_READ_BARRIER_ON_X86();
+
+  RunTest(false /* use_fd */,
+          false /* expect_use */);
+  GrabResult1();
+  std::string output_1 = output_;
+
+  output_ = "";
+
+  RunTest(false /* use_fd */,
+          true /* expect_use */,
+          { "--swap-dex-size-threshold=0", "--swap-dex-count-threshold=0" });
+  GrabResult2();
+  std::string output_2 = output_;
+
+  if (native_alloc_2_ >= native_alloc_1_ || swap_1_ >= swap_2_) {
+    EXPECT_LT(native_alloc_2_, native_alloc_1_);
+    EXPECT_LT(swap_1_, swap_2_);
+
+    LOG(ERROR) << output_1;
+    LOG(ERROR) << output_2;
+  }
+}
+
+class Dex2oatVeryLargeTest : public Dex2oatTest {
+ protected:
+  void CheckFilter(CompilerFilter::Filter input ATTRIBUTE_UNUSED,
+                   CompilerFilter::Filter result ATTRIBUTE_UNUSED) OVERRIDE {
+    // Ignore, we'll do our own checks.
+  }
+
+  void RunTest(CompilerFilter::Filter filter,
+               bool expect_large,
+               const std::vector<std::string>& extra_args = {}) {
+    std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
+    std::string odex_location = GetOdexDir() + "/DexOdexNoOat.odex";
+
+    Copy(GetDexSrc1(), dex_location);
+
+    std::vector<std::string> copy(extra_args);
+
+    GenerateOdexForTest(dex_location, odex_location, filter, copy);
+
+    CheckValidity();
+    ASSERT_TRUE(success_);
+    CheckResult(dex_location, odex_location, filter, expect_large);
+  }
+
+  void CheckResult(const std::string& dex_location,
+                   const std::string& odex_location,
+                   CompilerFilter::Filter filter,
+                   bool expect_large) {
+    // Host/target independent checks.
+    std::string error_msg;
+    std::unique_ptr<OatFile> odex_file(OatFile::Open(odex_location.c_str(),
+                                                     odex_location.c_str(),
+                                                     nullptr,
+                                                     nullptr,
+                                                     false,
+                                                     /*low_4gb*/false,
+                                                     dex_location.c_str(),
+                                                     &error_msg));
+    ASSERT_TRUE(odex_file.get() != nullptr) << error_msg;
+    if (expect_large) {
+      // Note: we cannot check the following:
+      //   EXPECT_TRUE(CompilerFilter::IsAsGoodAs(CompilerFilter::kVerifyAtRuntime,
+      //                                          odex_file->GetCompilerFilter()));
+      // The reason is that the filter override currently happens when the dex files are
+      // loaded in dex2oat, which is after the oat file has been started. Thus, the header
+      // store cannot be changed, and the original filter is set in stone.
+
+      for (const OatDexFile* oat_dex_file : odex_file->GetOatDexFiles()) {
+        std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
+        ASSERT_TRUE(dex_file != nullptr);
+        uint32_t class_def_count = dex_file->NumClassDefs();
+        ASSERT_LT(class_def_count, std::numeric_limits<uint16_t>::max());
+        for (uint16_t class_def_index = 0; class_def_index < class_def_count; ++class_def_index) {
+          OatFile::OatClass oat_class = oat_dex_file->GetOatClass(class_def_index);
+          EXPECT_EQ(oat_class.GetType(), OatClassType::kOatClassNoneCompiled);
+        }
+      }
+
+      // If the input filter was "below," it should have been used.
+      if (!CompilerFilter::IsAsGoodAs(CompilerFilter::kVerifyAtRuntime, filter)) {
+        EXPECT_EQ(odex_file->GetCompilerFilter(), filter);
+      }
+    } else {
+      EXPECT_EQ(odex_file->GetCompilerFilter(), filter);
+    }
+
+    // Host/target dependent checks.
+    if (kIsTargetBuild) {
+      CheckTargetResult(expect_large);
+    } else {
+      CheckHostResult(expect_large);
+    }
+  }
+
+  void CheckTargetResult(bool expect_large ATTRIBUTE_UNUSED) {
+    // TODO: Ignore for now. May do something for fd things.
+  }
+
+  void CheckHostResult(bool expect_large) {
+    if (!kIsTargetBuild) {
+      if (expect_large) {
+        EXPECT_NE(output_.find("Very large app, downgrading to verify-at-runtime."),
+                  std::string::npos)
+            << output_;
+      } else {
+        EXPECT_EQ(output_.find("Very large app, downgrading to verify-at-runtime."),
+                  std::string::npos)
+            << output_;
+      }
+    }
+  }
+
+  // Check whether the dex2oat run was really successful.
+  void CheckValidity() {
+    if (kIsTargetBuild) {
+      CheckTargetValidity();
+    } else {
+      CheckHostValidity();
+    }
+  }
+
+  void CheckTargetValidity() {
+    // TODO: Ignore for now.
+  }
+
+  // On the host, we can get the dex2oat output. Here, look for "dex2oat took."
+  void CheckHostValidity() {
+    EXPECT_NE(output_.find("dex2oat took"), std::string::npos) << output_;
+  }
+};
+
+TEST_F(Dex2oatVeryLargeTest, DontUseVeryLarge) {
+  RunTest(CompilerFilter::kVerifyNone, false);
+  RunTest(CompilerFilter::kVerifyAtRuntime, false);
+  RunTest(CompilerFilter::kInterpretOnly, false);
+  RunTest(CompilerFilter::kSpeed, false);
+
+  RunTest(CompilerFilter::kVerifyNone, false, { "--very-large-app-threshold=1000000" });
+  RunTest(CompilerFilter::kVerifyAtRuntime, false, { "--very-large-app-threshold=1000000" });
+  RunTest(CompilerFilter::kInterpretOnly, false, { "--very-large-app-threshold=1000000" });
+  RunTest(CompilerFilter::kSpeed, false, { "--very-large-app-threshold=1000000" });
+}
+
+TEST_F(Dex2oatVeryLargeTest, UseVeryLarge) {
+  RunTest(CompilerFilter::kVerifyNone, false, { "--very-large-app-threshold=100" });
+  RunTest(CompilerFilter::kVerifyAtRuntime, false, { "--very-large-app-threshold=100" });
+  RunTest(CompilerFilter::kInterpretOnly, true, { "--very-large-app-threshold=100" });
+  RunTest(CompilerFilter::kSpeed, true, { "--very-large-app-threshold=100" });
+}
+
+}  // namespace art
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 282db5d..2042934 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -17,8 +17,8 @@
  *
  * This is a re-implementation of the original dexdump utility that was
  * based on Dalvik functions in libdex into a new dexdump that is now
- * based on Art functions in libart instead. The output is identical to
- * the original for correct DEX files. Error messages may differ, however.
+ * based on Art functions in libart instead. The output is very similar to
+ * to the original for correct DEX files. Error messages may differ, however.
  * Also, ODEX files are no longer supported.
  *
  * The dexdump tool is intended to mimic objdump.  When possible, use
@@ -27,7 +27,6 @@
  * Differences between XML output and the "current.xml" file:
  * - classes in same package are not all grouped together; nothing is sorted
  * - no "deprecated" on fields and methods
- * - no "value" on fields
  * - no parameter names
  * - no generic signatures on parameters, e.g. type="java.lang.Class&lt;?&gt;"
  * - class shows declared fields and methods; does not show inherited fields
@@ -66,6 +65,8 @@
 typedef uint16_t u2;
 typedef uint32_t u4;
 typedef uint64_t u8;
+typedef int8_t   s1;
+typedef int16_t  s2;
 typedef int32_t  s4;
 typedef int64_t  s8;
 
@@ -117,7 +118,7 @@
  * "[I" becomes "int[]".  Also converts '$' to '.', which means this
  * form can't be converted back to a descriptor.
  */
-static char* descriptorToDot(const char* str) {
+static std::unique_ptr<char[]> descriptorToDot(const char* str) {
   int targetLen = strlen(str);
   int offset = 0;
 
@@ -144,8 +145,7 @@
   }
 
   // Copy class name over.
-  char* newStr = reinterpret_cast<char*>(
-      malloc(targetLen + arrayDepth * 2 + 1));
+  std::unique_ptr<char[]> newStr(new char[targetLen + arrayDepth * 2 + 1]);
   int i = 0;
   for (; i < targetLen; i++) {
     const char ch = str[offset + i];
@@ -164,12 +164,10 @@
 
 /*
  * Converts the class name portion of a type descriptor to human-readable
- * "dotted" form.
- *
- * Returns a newly-allocated string.
+ * "dotted" form. For example, "Ljava/lang/String;" becomes "String".
  */
-static char* descriptorClassToDot(const char* str) {
-  // Reduce to just the class name, trimming trailing ';'.
+static std::unique_ptr<char[]> descriptorClassToDot(const char* str) {
+  // Reduce to just the class name prefix.
   const char* lastSlash = strrchr(str, '/');
   if (lastSlash == nullptr) {
     lastSlash = str + 1;  // start past 'L'
@@ -177,17 +175,25 @@
     lastSlash++;          // start past '/'
   }
 
-  char* newStr = strdup(lastSlash);
-  newStr[strlen(lastSlash) - 1] = '\0';
-  for (char* cp = newStr; *cp != '\0'; cp++) {
-    if (*cp == '$') {
-      *cp = '.';
-    }
+  // Copy class name over, trimming trailing ';'.
+  const int targetLen = strlen(lastSlash);
+  std::unique_ptr<char[]> newStr(new char[targetLen]);
+  for (int i = 0; i < targetLen - 1; i++) {
+    const char ch = lastSlash[i];
+    newStr[i] = ch == '$' ? '.' : ch;
   }  // for
+  newStr[targetLen - 1] = '\0';
   return newStr;
 }
 
 /*
+ * Returns string representing the boolean value.
+ */
+static const char* strBool(bool val) {
+  return val ? "true" : "false";
+}
+
+/*
  * Returns a quoted string representing the boolean value.
  */
 static const char* quotedBool(bool val) {
@@ -347,10 +353,197 @@
 }
 
 /*
+ * Dumps a string value with some escape characters.
+ */
+static void dumpEscapedString(const char* p) {
+  fputs("\"", gOutFile);
+  for (; *p; p++) {
+    switch (*p) {
+      case '\\':
+        fputs("\\\\", gOutFile);
+        break;
+      case '\"':
+        fputs("\\\"", gOutFile);
+        break;
+      case '\t':
+        fputs("\\t", gOutFile);
+        break;
+      case '\n':
+        fputs("\\n", gOutFile);
+        break;
+      case '\r':
+        fputs("\\r", gOutFile);
+        break;
+      default:
+        putc(*p, gOutFile);
+    }  // switch
+  }  // for
+  fputs("\"", gOutFile);
+}
+
+/*
+ * Dumps a string as an XML attribute value.
+ */
+static void dumpXmlAttribute(const char* p) {
+  for (; *p; p++) {
+    switch (*p) {
+      case '&':
+        fputs("&amp;", gOutFile);
+        break;
+      case '<':
+        fputs("&lt;", gOutFile);
+        break;
+      case '>':
+        fputs("&gt;", gOutFile);
+        break;
+      case '"':
+        fputs("&quot;", gOutFile);
+        break;
+      case '\t':
+        fputs("&#x9;", gOutFile);
+        break;
+      case '\n':
+        fputs("&#xA;", gOutFile);
+        break;
+      case '\r':
+        fputs("&#xD;", gOutFile);
+        break;
+      default:
+        putc(*p, gOutFile);
+    }  // switch
+  }  // for
+}
+
+/*
+ * Reads variable width value, possibly sign extended at the last defined byte.
+ */
+static u8 readVarWidth(const u1** data, u1 arg, bool sign_extend) {
+  u8 value = 0;
+  for (u4 i = 0; i <= arg; i++) {
+    value |= static_cast<u8>(*(*data)++) << (i * 8);
+  }
+  if (sign_extend) {
+    int shift = (7 - arg) * 8;
+    return (static_cast<s8>(value) << shift) >> shift;
+  }
+  return value;
+}
+
+/*
+ * Dumps encoded value.
+ */
+static void dumpEncodedValue(const DexFile* pDexFile, const u1** data);  // forward
+static void dumpEncodedValue(const DexFile* pDexFile, const u1** data, u1 type, u1 arg) {
+  switch (type) {
+    case DexFile::kDexAnnotationByte:
+      fprintf(gOutFile, "%" PRId8, static_cast<s1>(readVarWidth(data, arg, false)));
+      break;
+    case DexFile::kDexAnnotationShort:
+      fprintf(gOutFile, "%" PRId16, static_cast<s2>(readVarWidth(data, arg, true)));
+      break;
+    case DexFile::kDexAnnotationChar:
+      fprintf(gOutFile, "%" PRIu16, static_cast<u2>(readVarWidth(data, arg, false)));
+      break;
+    case DexFile::kDexAnnotationInt:
+      fprintf(gOutFile, "%" PRId32, static_cast<s4>(readVarWidth(data, arg, true)));
+      break;
+    case DexFile::kDexAnnotationLong:
+      fprintf(gOutFile, "%" PRId64, static_cast<s8>(readVarWidth(data, arg, true)));
+      break;
+    case DexFile::kDexAnnotationFloat: {
+      // Fill on right.
+      union {
+        float f;
+        u4 data;
+      } conv;
+      conv.data = static_cast<u4>(readVarWidth(data, arg, false)) << (3 - arg) * 8;
+      fprintf(gOutFile, "%g", conv.f);
+      break;
+    }
+    case DexFile::kDexAnnotationDouble: {
+      // Fill on right.
+      union {
+        double d;
+        u8 data;
+      } conv;
+      conv.data = readVarWidth(data, arg, false) << (7 - arg) * 8;
+      fprintf(gOutFile, "%g", conv.d);
+      break;
+    }
+    case DexFile::kDexAnnotationString: {
+      const u4 idx = static_cast<u4>(readVarWidth(data, arg, false));
+      if (gOptions.outputFormat == OUTPUT_PLAIN) {
+        dumpEscapedString(pDexFile->StringDataByIdx(idx));
+      } else {
+        dumpXmlAttribute(pDexFile->StringDataByIdx(idx));
+      }
+      break;
+    }
+    case DexFile::kDexAnnotationType: {
+      const u4 str_idx = static_cast<u4>(readVarWidth(data, arg, false));
+      fputs(pDexFile->StringByTypeIdx(str_idx), gOutFile);
+      break;
+    }
+    case DexFile::kDexAnnotationField:
+    case DexFile::kDexAnnotationEnum: {
+      const u4 field_idx = static_cast<u4>(readVarWidth(data, arg, false));
+      const DexFile::FieldId& pFieldId = pDexFile->GetFieldId(field_idx);
+      fputs(pDexFile->StringDataByIdx(pFieldId.name_idx_), gOutFile);
+      break;
+    }
+    case DexFile::kDexAnnotationMethod: {
+      const u4 method_idx = static_cast<u4>(readVarWidth(data, arg, false));
+      const DexFile::MethodId& pMethodId = pDexFile->GetMethodId(method_idx);
+      fputs(pDexFile->StringDataByIdx(pMethodId.name_idx_), gOutFile);
+      break;
+    }
+    case DexFile::kDexAnnotationArray: {
+      fputc('{', gOutFile);
+      // Decode and display all elements.
+      const u4 size = DecodeUnsignedLeb128(data);
+      for (u4 i = 0; i < size; i++) {
+        fputc(' ', gOutFile);
+        dumpEncodedValue(pDexFile, data);
+      }
+      fputs(" }", gOutFile);
+      break;
+    }
+    case DexFile::kDexAnnotationAnnotation: {
+      const u4 type_idx = DecodeUnsignedLeb128(data);
+      fputs(pDexFile->StringByTypeIdx(type_idx), gOutFile);
+      // Decode and display all name=value pairs.
+      const u4 size = DecodeUnsignedLeb128(data);
+      for (u4 i = 0; i < size; i++) {
+        const u4 name_idx = DecodeUnsignedLeb128(data);
+        fputc(' ', gOutFile);
+        fputs(pDexFile->StringDataByIdx(name_idx), gOutFile);
+        fputc('=', gOutFile);
+        dumpEncodedValue(pDexFile, data);
+      }
+      break;
+    }
+    case DexFile::kDexAnnotationNull:
+      fputs("null", gOutFile);
+      break;
+    case DexFile::kDexAnnotationBoolean:
+      fputs(strBool(arg), gOutFile);
+      break;
+    default:
+      fputs("????", gOutFile);
+      break;
+  }  // switch
+}
+
+/*
+ * Dumps encoded value with prefix.
+ */
+static void dumpEncodedValue(const DexFile* pDexFile, const u1** data) {
+  const u1 enc = *(*data)++;
+  dumpEncodedValue(pDexFile, data, enc & 0x1f, enc >> 5);
+}
+
+/*
  * Dumps the file header.
- *
- * Note that some of the : are misaligned on purpose to preserve
- * the exact output of the original Dalvik dexdump.
  */
 static void dumpFileHeader(const DexFile* pDexFile) {
   const DexFile::Header& pHeader = pDexFile->GetHeader();
@@ -374,8 +567,8 @@
   fprintf(gOutFile, "type_ids_size       : %d\n", pHeader.type_ids_size_);
   fprintf(gOutFile, "type_ids_off        : %d (0x%06x)\n",
           pHeader.type_ids_off_, pHeader.type_ids_off_);
-  fprintf(gOutFile, "proto_ids_size       : %d\n", pHeader.proto_ids_size_);
-  fprintf(gOutFile, "proto_ids_off        : %d (0x%06x)\n",
+  fprintf(gOutFile, "proto_ids_size      : %d\n", pHeader.proto_ids_size_);
+  fprintf(gOutFile, "proto_ids_off       : %d (0x%06x)\n",
           pHeader.proto_ids_off_, pHeader.proto_ids_off_);
   fprintf(gOutFile, "field_ids_size      : %d\n", pHeader.field_ids_size_);
   fprintf(gOutFile, "field_ids_off       : %d (0x%06x)\n",
@@ -427,6 +620,99 @@
   fprintf(gOutFile, "\n");
 }
 
+/**
+ * Dumps an annotation set item.
+ */
+static void dumpAnnotationSetItem(const DexFile* pDexFile, const DexFile::AnnotationSetItem* set_item) {
+  if (set_item == nullptr || set_item->size_ == 0) {
+    fputs("  empty-annotation-set\n", gOutFile);
+    return;
+  }
+  for (u4 i = 0; i < set_item->size_; i++) {
+    const DexFile::AnnotationItem* annotation = pDexFile->GetAnnotationItem(set_item, i);
+    if (annotation == nullptr) {
+      continue;
+    }
+    fputs("  ", gOutFile);
+    switch (annotation->visibility_) {
+      case DexFile::kDexVisibilityBuild:   fputs("VISIBILITY_BUILD ",   gOutFile); break;
+      case DexFile::kDexVisibilityRuntime: fputs("VISIBILITY_RUNTIME ", gOutFile); break;
+      case DexFile::kDexVisibilitySystem:  fputs("VISIBILITY_SYSTEM ",  gOutFile); break;
+      default:                             fputs("VISIBILITY_UNKNOWN ", gOutFile); break;
+    }  // switch
+    // Decode raw bytes in annotation.
+    const u1* rData = annotation->annotation_;
+    dumpEncodedValue(pDexFile, &rData, DexFile::kDexAnnotationAnnotation, 0);
+    fputc('\n', gOutFile);
+  }
+}
+
+/*
+ * Dumps class annotations.
+ */
+static void dumpClassAnnotations(const DexFile* pDexFile, int idx) {
+  const DexFile::ClassDef& pClassDef = pDexFile->GetClassDef(idx);
+  const DexFile::AnnotationsDirectoryItem* dir = pDexFile->GetAnnotationsDirectory(pClassDef);
+  if (dir == nullptr) {
+    return;  // none
+  }
+
+  fprintf(gOutFile, "Class #%d annotations:\n", idx);
+
+  const DexFile::AnnotationSetItem* class_set_item = pDexFile->GetClassAnnotationSet(dir);
+  const DexFile::FieldAnnotationsItem* fields = pDexFile->GetFieldAnnotations(dir);
+  const DexFile::MethodAnnotationsItem* methods = pDexFile->GetMethodAnnotations(dir);
+  const DexFile::ParameterAnnotationsItem* pars = pDexFile->GetParameterAnnotations(dir);
+
+  // Annotations on the class itself.
+  if (class_set_item != nullptr) {
+    fprintf(gOutFile, "Annotations on class\n");
+    dumpAnnotationSetItem(pDexFile, class_set_item);
+  }
+
+  // Annotations on fields.
+  if (fields != nullptr) {
+    for (u4 i = 0; i < dir->fields_size_; i++) {
+      const u4 field_idx = fields[i].field_idx_;
+      const DexFile::FieldId& pFieldId = pDexFile->GetFieldId(field_idx);
+      const char* field_name = pDexFile->StringDataByIdx(pFieldId.name_idx_);
+      fprintf(gOutFile, "Annotations on field #%u '%s'\n", field_idx, field_name);
+      dumpAnnotationSetItem(pDexFile, pDexFile->GetFieldAnnotationSetItem(fields[i]));
+    }
+  }
+
+  // Annotations on methods.
+  if (methods != nullptr) {
+    for (u4 i = 0; i < dir->methods_size_; i++) {
+      const u4 method_idx = methods[i].method_idx_;
+      const DexFile::MethodId& pMethodId = pDexFile->GetMethodId(method_idx);
+      const char* method_name = pDexFile->StringDataByIdx(pMethodId.name_idx_);
+      fprintf(gOutFile, "Annotations on method #%u '%s'\n", method_idx, method_name);
+      dumpAnnotationSetItem(pDexFile, pDexFile->GetMethodAnnotationSetItem(methods[i]));
+    }
+  }
+
+  // Annotations on method parameters.
+  if (pars != nullptr) {
+    for (u4 i = 0; i < dir->parameters_size_; i++) {
+      const u4 method_idx = pars[i].method_idx_;
+      const DexFile::MethodId& pMethodId = pDexFile->GetMethodId(method_idx);
+      const char* method_name = pDexFile->StringDataByIdx(pMethodId.name_idx_);
+      fprintf(gOutFile, "Annotations on method #%u '%s' parameters\n", method_idx, method_name);
+      const DexFile::AnnotationSetRefList*
+          list = pDexFile->GetParameterAnnotationSetRefList(&pars[i]);
+      if (list != nullptr) {
+        for (u4 j = 0; j < list->size_; j++) {
+          fprintf(gOutFile, "#%u\n", j);
+          dumpAnnotationSetItem(pDexFile, pDexFile->GetSetRefItemItem(&list->list_[j]));
+        }
+      }
+    }
+  }
+
+  fputc('\n', gOutFile);
+}
+
 /*
  * Dumps an interface that a class declares to implement.
  */
@@ -435,9 +721,8 @@
   if (gOptions.outputFormat == OUTPUT_PLAIN) {
     fprintf(gOutFile, "    #%d              : '%s'\n", i, interfaceName);
   } else {
-    char* dotted = descriptorToDot(interfaceName);
-    fprintf(gOutFile, "<implements name=\"%s\">\n</implements>\n", dotted);
-    free(dotted);
+    std::unique_ptr<char[]> dot(descriptorToDot(interfaceName));
+    fprintf(gOutFile, "<implements name=\"%s\">\n</implements>\n", dot.get());
   }
 }
 
@@ -472,31 +757,30 @@
 /*
  * Callback for dumping each positions table entry.
  */
-static bool dumpPositionsCb(void* /*context*/, u4 address, u4 lineNum) {
-  fprintf(gOutFile, "        0x%04x line=%d\n", address, lineNum);
+static bool dumpPositionsCb(void* /*context*/, const DexFile::PositionInfo& entry) {
+  fprintf(gOutFile, "        0x%04x line=%d\n", entry.address_, entry.line_);
   return false;
 }
 
 /*
  * Callback for dumping locals table entry.
  */
-static void dumpLocalsCb(void* /*context*/, u2 slot, u4 startAddress, u4 endAddress,
-                         const char* name, const char* descriptor, const char* signature) {
+static void dumpLocalsCb(void* /*context*/, const DexFile::LocalInfo& entry) {
+  const char* signature = entry.signature_ != nullptr ? entry.signature_ : "";
   fprintf(gOutFile, "        0x%04x - 0x%04x reg=%d %s %s %s\n",
-          startAddress, endAddress, slot, name, descriptor, signature);
+          entry.start_address_, entry.end_address_, entry.reg_,
+          entry.name_, entry.descriptor_, signature);
 }
 
 /*
  * Helper for dumpInstruction(), which builds the string
- * representation for the index in the given instruction. This will
- * first try to use the given buffer, but if the result won't fit,
- * then this will allocate a new buffer to hold the result. A pointer
- * to the buffer which holds the full result is always returned, and
- * this can be compared with the one passed in, to see if the result
- * needs to be free()d.
+ * representation for the index in the given instruction.
+ * Returns a pointer to a buffer of sufficient size.
  */
-static char* indexString(const DexFile* pDexFile,
-                         const Instruction* pDecInsn, char* buf, size_t bufSize) {
+static std::unique_ptr<char[]> indexString(const DexFile* pDexFile,
+                                           const Instruction* pDecInsn,
+                                           size_t bufSize) {
+  std::unique_ptr<char[]> buf(new char[bufSize]);
   // Determine index and width of the string.
   u4 index = 0;
   u4 width = 4;
@@ -532,27 +816,27 @@
     case Instruction::kIndexUnknown:
       // This function should never get called for this type, but do
       // something sensible here, just to help with debugging.
-      outSize = snprintf(buf, bufSize, "<unknown-index>");
+      outSize = snprintf(buf.get(), bufSize, "<unknown-index>");
       break;
     case Instruction::kIndexNone:
       // This function should never get called for this type, but do
       // something sensible here, just to help with debugging.
-      outSize = snprintf(buf, bufSize, "<no-index>");
+      outSize = snprintf(buf.get(), bufSize, "<no-index>");
       break;
     case Instruction::kIndexTypeRef:
       if (index < pDexFile->GetHeader().type_ids_size_) {
         const char* tp = pDexFile->StringByTypeIdx(index);
-        outSize = snprintf(buf, bufSize, "%s // type@%0*x", tp, width, index);
+        outSize = snprintf(buf.get(), bufSize, "%s // type@%0*x", tp, width, index);
       } else {
-        outSize = snprintf(buf, bufSize, "<type?> // type@%0*x", width, index);
+        outSize = snprintf(buf.get(), bufSize, "<type?> // type@%0*x", width, index);
       }
       break;
     case Instruction::kIndexStringRef:
       if (index < pDexFile->GetHeader().string_ids_size_) {
         const char* st = pDexFile->StringDataByIdx(index);
-        outSize = snprintf(buf, bufSize, "\"%s\" // string@%0*x", st, width, index);
+        outSize = snprintf(buf.get(), bufSize, "\"%s\" // string@%0*x", st, width, index);
       } else {
-        outSize = snprintf(buf, bufSize, "<string?> // string@%0*x", width, index);
+        outSize = snprintf(buf.get(), bufSize, "<string?> // string@%0*x", width, index);
       }
       break;
     case Instruction::kIndexMethodRef:
@@ -561,10 +845,10 @@
         const char* name = pDexFile->StringDataByIdx(pMethodId.name_idx_);
         const Signature signature = pDexFile->GetMethodSignature(pMethodId);
         const char* backDescriptor = pDexFile->StringByTypeIdx(pMethodId.class_idx_);
-        outSize = snprintf(buf, bufSize, "%s.%s:%s // method@%0*x",
+        outSize = snprintf(buf.get(), bufSize, "%s.%s:%s // method@%0*x",
                            backDescriptor, name, signature.ToString().c_str(), width, index);
       } else {
-        outSize = snprintf(buf, bufSize, "<method?> // method@%0*x", width, index);
+        outSize = snprintf(buf.get(), bufSize, "<method?> // method@%0*x", width, index);
       }
       break;
     case Instruction::kIndexFieldRef:
@@ -573,38 +857,33 @@
         const char* name = pDexFile->StringDataByIdx(pFieldId.name_idx_);
         const char* typeDescriptor = pDexFile->StringByTypeIdx(pFieldId.type_idx_);
         const char* backDescriptor = pDexFile->StringByTypeIdx(pFieldId.class_idx_);
-        outSize = snprintf(buf, bufSize, "%s.%s:%s // field@%0*x",
+        outSize = snprintf(buf.get(), bufSize, "%s.%s:%s // field@%0*x",
                            backDescriptor, name, typeDescriptor, width, index);
       } else {
-        outSize = snprintf(buf, bufSize, "<field?> // field@%0*x", width, index);
+        outSize = snprintf(buf.get(), bufSize, "<field?> // field@%0*x", width, index);
       }
       break;
     case Instruction::kIndexVtableOffset:
-      outSize = snprintf(buf, bufSize, "[%0*x] // vtable #%0*x",
+      outSize = snprintf(buf.get(), bufSize, "[%0*x] // vtable #%0*x",
                          width, index, width, index);
       break;
     case Instruction::kIndexFieldOffset:
-      outSize = snprintf(buf, bufSize, "[obj+%0*x]", width, index);
+      outSize = snprintf(buf.get(), bufSize, "[obj+%0*x]", width, index);
       break;
     // SOME NOT SUPPORTED:
     // case Instruction::kIndexVaries:
     // case Instruction::kIndexInlineMethod:
     default:
-      outSize = snprintf(buf, bufSize, "<?>");
+      outSize = snprintf(buf.get(), bufSize, "<?>");
       break;
   }  // switch
 
   // Determine success of string construction.
   if (outSize >= bufSize) {
-    // The buffer wasn't big enough; allocate and retry. Note:
-    // snprintf() doesn't count the '\0' as part of its returned
-    // size, so we add explicit space for it here.
-    outSize++;
-    buf = reinterpret_cast<char*>(malloc(outSize));
-    if (buf == nullptr) {
-      return nullptr;
-    }
-    return indexString(pDexFile, pDecInsn, buf, outSize);
+    // The buffer wasn't big enough; retry with computed size. Note: snprintf()
+    // doesn't count/ the '\0' as part of its returned size, so we add explicit
+    // space for it here.
+    return indexString(pDexFile, pDecInsn, outSize + 1);
   }
   return buf;
 }
@@ -652,11 +931,9 @@
   }
 
   // Set up additional argument.
-  char indexBufChars[200];
-  char *indexBuf = indexBufChars;
+  std::unique_ptr<char[]> indexBuf;
   if (Instruction::IndexTypeOf(pDecInsn->Opcode()) != Instruction::kIndexNone) {
-    indexBuf = indexString(pDexFile, pDecInsn,
-                           indexBufChars, sizeof(indexBufChars));
+    indexBuf = indexString(pDexFile, pDecInsn, 200);
   }
 
   // Dump the instruction.
@@ -677,27 +954,25 @@
       fprintf(gOutFile, " v%d", pDecInsn->VRegA());
       break;
     case Instruction::k10t:        // op +AA
-    case Instruction::k20t:        // op +AAAA
-      {
-        const s4 targ = (s4) pDecInsn->VRegA();
-        fprintf(gOutFile, " %04x // %c%04x",
-                insnIdx + targ,
-                (targ < 0) ? '-' : '+',
-                (targ < 0) ? -targ : targ);
-      }
+    case Instruction::k20t: {      // op +AAAA
+      const s4 targ = (s4) pDecInsn->VRegA();
+      fprintf(gOutFile, " %04x // %c%04x",
+              insnIdx + targ,
+              (targ < 0) ? '-' : '+',
+              (targ < 0) ? -targ : targ);
       break;
+    }
     case Instruction::k22x:        // op vAA, vBBBB
       fprintf(gOutFile, " v%d, v%d", pDecInsn->VRegA(), pDecInsn->VRegB());
       break;
-    case Instruction::k21t:        // op vAA, +BBBB
-      {
-        const s4 targ = (s4) pDecInsn->VRegB();
-        fprintf(gOutFile, " v%d, %04x // %c%04x", pDecInsn->VRegA(),
-                insnIdx + targ,
-                (targ < 0) ? '-' : '+',
-                (targ < 0) ? -targ : targ);
-      }
+    case Instruction::k21t: {     // op vAA, +BBBB
+      const s4 targ = (s4) pDecInsn->VRegB();
+      fprintf(gOutFile, " v%d, %04x // %c%04x", pDecInsn->VRegA(),
+              insnIdx + targ,
+              (targ < 0) ? '-' : '+',
+              (targ < 0) ? -targ : targ);
       break;
+    }
     case Instruction::k21s:        // op vAA, #+BBBB
       fprintf(gOutFile, " v%d, #int %d // #%x",
               pDecInsn->VRegA(), (s4) pDecInsn->VRegB(), (u2)pDecInsn->VRegB());
@@ -716,7 +991,7 @@
       break;
     case Instruction::k21c:        // op vAA, thing@BBBB
     case Instruction::k31c:        // op vAA, thing@BBBBBBBB
-      fprintf(gOutFile, " v%d, %s", pDecInsn->VRegA(), indexBuf);
+      fprintf(gOutFile, " v%d, %s", pDecInsn->VRegA(), indexBuf.get());
       break;
     case Instruction::k23x:        // op vAA, vBB, vCC
       fprintf(gOutFile, " v%d, v%d, v%d",
@@ -727,16 +1002,15 @@
               pDecInsn->VRegA(), pDecInsn->VRegB(),
               (s4) pDecInsn->VRegC(), (u1) pDecInsn->VRegC());
       break;
-    case Instruction::k22t:        // op vA, vB, +CCCC
-      {
-        const s4 targ = (s4) pDecInsn->VRegC();
-        fprintf(gOutFile, " v%d, v%d, %04x // %c%04x",
-                pDecInsn->VRegA(), pDecInsn->VRegB(),
-                insnIdx + targ,
-                (targ < 0) ? '-' : '+',
-                (targ < 0) ? -targ : targ);
-      }
+    case Instruction::k22t: {      // op vA, vB, +CCCC
+      const s4 targ = (s4) pDecInsn->VRegC();
+      fprintf(gOutFile, " v%d, v%d, %04x // %c%04x",
+              pDecInsn->VRegA(), pDecInsn->VRegB(),
+              insnIdx + targ,
+              (targ < 0) ? '-' : '+',
+              (targ < 0) ? -targ : targ);
       break;
+    }
     case Instruction::k22s:        // op vA, vB, #+CCCC
       fprintf(gOutFile, " v%d, v%d, #int %d // #%04x",
               pDecInsn->VRegA(), pDecInsn->VRegB(),
@@ -746,23 +1020,22 @@
     // NOT SUPPORTED:
     // case Instruction::k22cs:    // [opt] op vA, vB, field offset CCCC
       fprintf(gOutFile, " v%d, v%d, %s",
-              pDecInsn->VRegA(), pDecInsn->VRegB(), indexBuf);
+              pDecInsn->VRegA(), pDecInsn->VRegB(), indexBuf.get());
       break;
     case Instruction::k30t:
       fprintf(gOutFile, " #%08x", pDecInsn->VRegA());
       break;
-    case Instruction::k31i:        // op vAA, #+BBBBBBBB
-      {
-        // This is often, but not always, a float.
-        union {
-          float f;
-          u4 i;
-        } conv;
-        conv.i = pDecInsn->VRegB();
-        fprintf(gOutFile, " v%d, #float %f // #%08x",
-                pDecInsn->VRegA(), conv.f, pDecInsn->VRegB());
-      }
+    case Instruction::k31i: {     // op vAA, #+BBBBBBBB
+      // This is often, but not always, a float.
+      union {
+        float f;
+        u4 i;
+      } conv;
+      conv.i = pDecInsn->VRegB();
+      fprintf(gOutFile, " v%d, #float %g // #%08x",
+              pDecInsn->VRegA(), conv.f, pDecInsn->VRegB());
       break;
+    }
     case Instruction::k31t:       // op vAA, offset +BBBBBBBB
       fprintf(gOutFile, " v%d, %08x // +%08x",
               pDecInsn->VRegA(), insnIdx + pDecInsn->VRegB(), pDecInsn->VRegB());
@@ -770,24 +1043,23 @@
     case Instruction::k32x:        // op vAAAA, vBBBB
       fprintf(gOutFile, " v%d, v%d", pDecInsn->VRegA(), pDecInsn->VRegB());
       break;
-    case Instruction::k35c:        // op {vC, vD, vE, vF, vG}, thing@BBBB
+    case Instruction::k35c: {      // op {vC, vD, vE, vF, vG}, thing@BBBB
     // NOT SUPPORTED:
     // case Instruction::k35ms:       // [opt] invoke-virtual+super
     // case Instruction::k35mi:       // [opt] inline invoke
-      {
-        u4 arg[5];
-        pDecInsn->GetVarArgs(arg);
-        fputs(" {", gOutFile);
-        for (int i = 0, n = pDecInsn->VRegA(); i < n; i++) {
-          if (i == 0) {
-            fprintf(gOutFile, "v%d", arg[i]);
-          } else {
-            fprintf(gOutFile, ", v%d", arg[i]);
-          }
-        }  // for
-        fprintf(gOutFile, "}, %s", indexBuf);
-      }
+      u4 arg[Instruction::kMaxVarArgRegs];
+      pDecInsn->GetVarArgs(arg);
+      fputs(" {", gOutFile);
+      for (int i = 0, n = pDecInsn->VRegA(); i < n; i++) {
+        if (i == 0) {
+          fprintf(gOutFile, "v%d", arg[i]);
+        } else {
+          fprintf(gOutFile, ", v%d", arg[i]);
+        }
+      }  // for
+      fprintf(gOutFile, "}, %s", indexBuf.get());
       break;
+    }
     case Instruction::k3rc:        // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
     // NOT SUPPORTED:
     // case Instruction::k3rms:       // [opt] invoke-virtual+super/range
@@ -803,21 +1075,20 @@
             fprintf(gOutFile, ", v%d", pDecInsn->VRegC() + i);
           }
         }  // for
-        fprintf(gOutFile, "}, %s", indexBuf);
+        fprintf(gOutFile, "}, %s", indexBuf.get());
       }
       break;
-    case Instruction::k51l:        // op vAA, #+BBBBBBBBBBBBBBBB
-      {
-        // This is often, but not always, a double.
-        union {
-          double d;
-          u8 j;
-        } conv;
-        conv.j = pDecInsn->WideVRegB();
-        fprintf(gOutFile, " v%d, #double %f // #%016" PRIx64,
-                pDecInsn->VRegA(), conv.d, pDecInsn->WideVRegB());
-      }
+    case Instruction::k51l: {      // op vAA, #+BBBBBBBBBBBBBBBB
+      // This is often, but not always, a double.
+      union {
+        double d;
+        u8 j;
+      } conv;
+      conv.j = pDecInsn->WideVRegB();
+      fprintf(gOutFile, " v%d, #double %g // #%016" PRIx64,
+              pDecInsn->VRegA(), conv.d, pDecInsn->WideVRegB());
       break;
+    }
     // NOT SUPPORTED:
     // case Instruction::k00x:        // unknown op or breakpoint
     //    break;
@@ -827,10 +1098,6 @@
   }  // switch
 
   fputc('\n', gOutFile);
-
-  if (indexBuf != indexBufChars) {
-    free(indexBuf);
-  }
 }
 
 /*
@@ -844,11 +1111,9 @@
   const char* backDescriptor = pDexFile->StringByTypeIdx(pMethodId.class_idx_);
 
   // Generate header.
-  char* tmp = descriptorToDot(backDescriptor);
-  fprintf(gOutFile, "%06x:                                        "
-          "|[%06x] %s.%s:%s\n",
-          codeOffset, codeOffset, tmp, name, signature.ToString().c_str());
-  free(tmp);
+  std::unique_ptr<char[]> dot(descriptorToDot(backDescriptor));
+  fprintf(gOutFile, "%06x:                                        |[%06x] %s.%s:%s\n",
+          codeOffset, codeOffset, dot.get(), name, signature.ToString().c_str());
 
   // Iterate over all instructions.
   const u2* insns = pCode->insns_;
@@ -886,11 +1151,9 @@
   // Positions and locals table in the debug info.
   bool is_static = (flags & kAccStatic) != 0;
   fprintf(gOutFile, "      positions     : \n");
-  pDexFile->DecodeDebugInfo(
-      pCode, is_static, idx, dumpPositionsCb, nullptr, nullptr);
+  pDexFile->DecodeDebugPositionInfo(pCode, dumpPositionsCb, nullptr);
   fprintf(gOutFile, "      locals        : \n");
-  pDexFile->DecodeDebugInfo(
-      pCode, is_static, idx, nullptr, dumpLocalsCb, nullptr);
+  pDexFile->DecodeDebugLocalInfo(pCode, is_static, idx, dumpLocalsCb, nullptr);
 }
 
 /*
@@ -929,12 +1192,10 @@
 
     // Method name and prototype.
     if (constructor) {
-      char* tmp = descriptorClassToDot(backDescriptor);
-      fprintf(gOutFile, "<constructor name=\"%s\"\n", tmp);
-      free(tmp);
-      tmp = descriptorToDot(backDescriptor);
-      fprintf(gOutFile, " type=\"%s\"\n", tmp);
-      free(tmp);
+      std::unique_ptr<char[]> dot(descriptorClassToDot(backDescriptor));
+      fprintf(gOutFile, "<constructor name=\"%s\"\n", dot.get());
+      dot = descriptorToDot(backDescriptor);
+      fprintf(gOutFile, " type=\"%s\"\n", dot.get());
     } else {
       fprintf(gOutFile, "<method name=\"%s\"\n", name);
       const char* returnType = strrchr(typeDescriptor, ')');
@@ -942,9 +1203,8 @@
         fprintf(stderr, "bad method type descriptor '%s'\n", typeDescriptor);
         goto bail;
       }
-      char* tmp = descriptorToDot(returnType+1);
-      fprintf(gOutFile, " return=\"%s\"\n", tmp);
-      free(tmp);
+      std::unique_ptr<char[]> dot(descriptorToDot(returnType + 1));
+      fprintf(gOutFile, " return=\"%s\"\n", dot.get());
       fprintf(gOutFile, " abstract=%s\n", quotedBool((flags & kAccAbstract) != 0));
       fprintf(gOutFile, " native=%s\n", quotedBool((flags & kAccNative) != 0));
       fprintf(gOutFile, " synchronized=%s\n", quotedBool(
@@ -977,18 +1237,17 @@
         } while (*cp++ != ';');
       } else {
         // Primitive char, copy it.
-        if (strchr("ZBCSIFJD", *base) == NULL) {
+        if (strchr("ZBCSIFJD", *base) == nullptr) {
           fprintf(stderr, "ERROR: bad method signature '%s'\n", base);
-          goto bail;
+          break;  // while
         }
         *cp++ = *base++;
       }
       // Null terminate and display.
       *cp++ = '\0';
-      char* tmp = descriptorToDot(tmpBuf);
+      std::unique_ptr<char[]> dot(descriptorToDot(tmpBuf));
       fprintf(gOutFile, "<parameter name=\"arg%d\" type=\"%s\">\n"
-                        "</parameter>\n", argNum++, tmp);
-      free(tmp);
+                        "</parameter>\n", argNum++, dot.get());
     }  // while
     free(tmpBuf);
     if (constructor) {
@@ -1006,7 +1265,7 @@
 /*
  * Dumps a static (class) field.
  */
-static void dumpSField(const DexFile* pDexFile, u4 idx, u4 flags, int i) {
+static void dumpSField(const DexFile* pDexFile, u4 idx, u4 flags, int i, const u1** data) {
   // Bail for anything private if export only requested.
   if (gOptions.exportsOnly && (flags & (kAccPublic | kAccProtected)) == 0) {
     return;
@@ -1023,11 +1282,15 @@
     fprintf(gOutFile, "      name          : '%s'\n", name);
     fprintf(gOutFile, "      type          : '%s'\n", typeDescriptor);
     fprintf(gOutFile, "      access        : 0x%04x (%s)\n", flags, accessStr);
+    if (data != nullptr) {
+      fputs("      value         : ", gOutFile);
+      dumpEncodedValue(pDexFile, data);
+      fputs("\n", gOutFile);
+    }
   } else if (gOptions.outputFormat == OUTPUT_XML) {
     fprintf(gOutFile, "<field name=\"%s\"\n", name);
-    char *tmp = descriptorToDot(typeDescriptor);
-    fprintf(gOutFile, " type=\"%s\"\n", tmp);
-    free(tmp);
+    std::unique_ptr<char[]> dot(descriptorToDot(typeDescriptor));
+    fprintf(gOutFile, " type=\"%s\"\n", dot.get());
     fprintf(gOutFile, " transient=%s\n", quotedBool((flags & kAccTransient) != 0));
     fprintf(gOutFile, " volatile=%s\n", quotedBool((flags & kAccVolatile) != 0));
     // The "value=" is not knowable w/o parsing annotations.
@@ -1035,7 +1298,12 @@
     fprintf(gOutFile, " final=%s\n", quotedBool((flags & kAccFinal) != 0));
     // The "deprecated=" is not knowable w/o parsing annotations.
     fprintf(gOutFile, " visibility=%s\n", quotedVisibility(flags));
-    fprintf(gOutFile, ">\n</field>\n");
+    if (data != nullptr) {
+      fputs(" value=\"", gOutFile);
+      dumpEncodedValue(pDexFile, data);
+      fputs("\"\n", gOutFile);
+    }
+    fputs(">\n</field>\n", gOutFile);
   }
 
   free(accessStr);
@@ -1045,7 +1313,7 @@
  * Dumps an instance field.
  */
 static void dumpIField(const DexFile* pDexFile, u4 idx, u4 flags, int i) {
-  dumpSField(pDexFile, idx, flags, i);
+  dumpSField(pDexFile, idx, flags, i, nullptr);
 }
 
 /*
@@ -1055,7 +1323,7 @@
  */
 
 static void dumpCfg(const DexFile* dex_file,
-                    uint32_t dex_method_idx,
+                    u4 dex_method_idx,
                     const DexFile::CodeItem* code_item) {
   if (code_item != nullptr) {
     std::ostringstream oss;
@@ -1066,7 +1334,7 @@
 
 static void dumpCfg(const DexFile* dex_file, int idx) {
   const DexFile::ClassDef& class_def = dex_file->GetClassDef(idx);
-  const uint8_t* class_data = dex_file->GetClassData(class_def);
+  const u1* class_data = dex_file->GetClassData(class_def);
   if (class_data == nullptr) {  // empty class such as a marker interface?
     return;
   }
@@ -1107,7 +1375,15 @@
     return;
   }
 
-  if (gOptions.cfg) {
+  if (gOptions.showSectionHeaders) {
+    dumpClassDef(pDexFile, idx);
+  }
+
+  if (gOptions.showAnnotations) {
+    dumpClassAnnotations(pDexFile, idx);
+  }
+
+  if (gOptions.showCfg) {
     dumpCfg(pDexFile, idx);
     return;
   }
@@ -1169,14 +1445,14 @@
     }
     fprintf(gOutFile, "  Interfaces        -\n");
   } else {
-    char* tmp = descriptorClassToDot(classDescriptor);
-    fprintf(gOutFile, "<class name=\"%s\"\n", tmp);
-    free(tmp);
+    std::unique_ptr<char[]> dot(descriptorClassToDot(classDescriptor));
+    fprintf(gOutFile, "<class name=\"%s\"\n", dot.get());
     if (superclassDescriptor != nullptr) {
-      tmp = descriptorToDot(superclassDescriptor);
-      fprintf(gOutFile, " extends=\"%s\"\n", tmp);
-      free(tmp);
+      dot = descriptorToDot(superclassDescriptor);
+      fprintf(gOutFile, " extends=\"%s\"\n", dot.get());
     }
+    fprintf(gOutFile, " interface=%s\n",
+            quotedBool((pClassDef.access_flags_ & kAccInterface) != 0));
     fprintf(gOutFile, " abstract=%s\n", quotedBool((pClassDef.access_flags_ & kAccAbstract) != 0));
     fprintf(gOutFile, " static=%s\n", quotedBool((pClassDef.access_flags_ & kAccStatic) != 0));
     fprintf(gOutFile, " final=%s\n", quotedBool((pClassDef.access_flags_ & kAccFinal) != 0));
@@ -1204,20 +1480,35 @@
     }
   } else {
     ClassDataItemIterator pClassData(*pDexFile, pEncodedData);
+
+    // Prepare data for static fields.
+    const u1* sData = pDexFile->GetEncodedStaticFieldValuesArray(pClassDef);
+    const u4 sSize = sData != nullptr ? DecodeUnsignedLeb128(&sData) : 0;
+
+    // Static fields.
     if (gOptions.outputFormat == OUTPUT_PLAIN) {
       fprintf(gOutFile, "  Static fields     -\n");
     }
-    for (int i = 0; pClassData.HasNextStaticField(); i++, pClassData.Next()) {
-      dumpSField(pDexFile, pClassData.GetMemberIndex(),
-                           pClassData.GetRawMemberAccessFlags(), i);
+    for (u4 i = 0; pClassData.HasNextStaticField(); i++, pClassData.Next()) {
+      dumpSField(pDexFile,
+                 pClassData.GetMemberIndex(),
+                 pClassData.GetRawMemberAccessFlags(),
+                 i,
+                 i < sSize ? &sData : nullptr);
     }  // for
+
+    // Instance fields.
     if (gOptions.outputFormat == OUTPUT_PLAIN) {
       fprintf(gOutFile, "  Instance fields   -\n");
     }
-    for (int i = 0; pClassData.HasNextInstanceField(); i++, pClassData.Next()) {
-      dumpIField(pDexFile, pClassData.GetMemberIndex(),
-                          pClassData.GetRawMemberAccessFlags(), i);
+    for (u4 i = 0; pClassData.HasNextInstanceField(); i++, pClassData.Next()) {
+      dumpIField(pDexFile,
+                 pClassData.GetMemberIndex(),
+                 pClassData.GetRawMemberAccessFlags(),
+                 i);
     }  // for
+
+    // Direct methods.
     if (gOptions.outputFormat == OUTPUT_PLAIN) {
       fprintf(gOutFile, "  Direct methods    -\n");
     }
@@ -1227,6 +1518,8 @@
                            pClassData.GetMethodCodeItem(),
                            pClassData.GetMethodCodeItemOffset(), i);
     }  // for
+
+    // Virtual methods.
     if (gOptions.outputFormat == OUTPUT_PLAIN) {
       fprintf(gOutFile, "  Virtual methods   -\n");
     }
@@ -1278,9 +1571,6 @@
   char* package = nullptr;
   const u4 classDefsSize = pDexFile->GetHeader().class_defs_size_;
   for (u4 i = 0; i < classDefsSize; i++) {
-    if (gOptions.showSectionHeaders) {
-      dumpClassDef(pDexFile, i);
-    }
     dumpClass(pDexFile, i, &package);
   }  // for
 
@@ -1305,17 +1595,11 @@
   }
 
   // If the file is not a .dex file, the function tries .zip/.jar/.apk files,
-  // all of which are Zip archives with "classes.dex" inside. The compressed
-  // data needs to be extracted to a temp file, the location of which varies.
-  //
-  // TODO(ajcbik): fix following issues
-  //
-  // (1) gOptions.tempFileName is not accounted for
-  // (2) gOptions.ignoreBadChecksum is not accounted for
-  //
+  // all of which are Zip archives with "classes.dex" inside.
+  const bool kVerifyChecksum = !gOptions.ignoreBadChecksum;
   std::string error_msg;
   std::vector<std::unique_ptr<const DexFile>> dex_files;
-  if (!DexFile::Open(fileName, fileName, &error_msg, &dex_files)) {
+  if (!DexFile::Open(fileName, fileName, kVerifyChecksum, &error_msg, &dex_files)) {
     // Display returned error message to user. Note that this error behavior
     // differs from the error messages shown by the original Dalvik dexdump.
     fputs(error_msg.c_str(), stderr);
diff --git a/dexdump/dexdump.h b/dexdump/dexdump.h
index 50280a9..6939f90 100644
--- a/dexdump/dexdump.h
+++ b/dexdump/dexdump.h
@@ -42,13 +42,13 @@
   bool disassemble;
   bool exportsOnly;
   bool ignoreBadChecksum;
+  bool showAnnotations;
+  bool showCfg;
   bool showFileHeaders;
   bool showSectionHeaders;
   bool verbose;
-  bool cfg;
   OutputFormat outputFormat;
   const char* outputFileName;
-  const char* tempFileName;
 };
 
 /* Prototypes. */
diff --git a/dexdump/dexdump_main.cc b/dexdump/dexdump_main.cc
index 2466f33..f716ba8 100644
--- a/dexdump/dexdump_main.cc
+++ b/dexdump/dexdump_main.cc
@@ -17,8 +17,8 @@
  *
  * This is a re-implementation of the original dexdump utility that was
  * based on Dalvik functions in libdex into a new dexdump that is now
- * based on Art functions in libart instead. The output is identical to
- * the original for correct DEX files. Error messages may differ, however.
+ * based on Art functions in libart instead. The output is very similar to
+ * to the original for correct DEX files. Error messages may differ, however.
  * Also, ODEX files are no longer supported.
  */
 
@@ -40,18 +40,18 @@
  */
 static void usage(void) {
   fprintf(stderr, "Copyright (C) 2007 The Android Open Source Project\n\n");
-  fprintf(stderr, "%s: [-c] [-d] [-f] [-h] [-i] [-l layout] [-o outfile]"
-                  " [-t tempfile] dexfile...\n", gProgName);
-  fprintf(stderr, "\n");
+  fprintf(stderr, "%s: [-a] [-c] [-d] [-e] [-f] [-h] [-i] [-l layout] [-o outfile]"
+                  " dexfile...\n\n", gProgName);
+  fprintf(stderr, " -a : display annotations\n");
   fprintf(stderr, " -c : verify checksum and exit\n");
   fprintf(stderr, " -d : disassemble code sections\n");
+  fprintf(stderr, " -e : display exported items only\n");
   fprintf(stderr, " -f : display summary information from file header\n");
-  fprintf(stderr, " -g : dump CFG for dex\n");
+  fprintf(stderr, " -g : display CFG for dex\n");
   fprintf(stderr, " -h : display file header details\n");
   fprintf(stderr, " -i : ignore checksum failures\n");
   fprintf(stderr, " -l : output layout, either 'plain' or 'xml'\n");
   fprintf(stderr, " -o : output file name (defaults to stdout)\n");
-  fprintf(stderr, " -t : temp file name (defaults to /sdcard/dex-temp-*)\n");
 }
 
 /*
@@ -69,24 +69,30 @@
 
   // Parse all arguments.
   while (1) {
-    const int ic = getopt(argc, argv, "cdfghil:t:o:");
+    const int ic = getopt(argc, argv, "acdefghil:o:");
     if (ic < 0) {
       break;  // done
     }
     switch (ic) {
+      case 'a':  // display annotations
+        gOptions.showAnnotations = true;
+        break;
       case 'c':  // verify the checksum then exit
         gOptions.checksumOnly = true;
         break;
       case 'd':  // disassemble Dalvik instructions
         gOptions.disassemble = true;
         break;
-      case 'f':  // dump outer file header
+      case 'e':  // exported items only
+        gOptions.exportsOnly = true;
+        break;
+      case 'f':  // display outer file header
         gOptions.showFileHeaders = true;
         break;
-      case 'g':  // dump cfg
-        gOptions.cfg = true;
+      case 'g':  // display cfg
+        gOptions.showCfg = true;
         break;
-      case 'h':  // dump section headers, i.e. all meta-data
+      case 'h':  // display section headers, i.e. all meta-data
         gOptions.showSectionHeaders = true;
         break;
       case 'i':  // continue even if checksum is bad
@@ -98,14 +104,10 @@
         } else if (strcmp(optarg, "xml") == 0) {
           gOptions.outputFormat = OUTPUT_XML;
           gOptions.verbose = false;
-          gOptions.exportsOnly = true;
         } else {
           wantUsage = true;
         }
         break;
-      case 't':  // temp file, used when opening compressed Jar
-        gOptions.tempFileName = optarg;
-        break;
       case 'o':  // output file
         gOptions.outputFileName = optarg;
         break;
diff --git a/dexdump/dexdump_test.cc b/dexdump/dexdump_test.cc
index 4230cb2..9819233 100644
--- a/dexdump/dexdump_test.cc
+++ b/dexdump/dexdump_test.cc
@@ -37,7 +37,7 @@
   virtual void SetUp() {
     CommonRuntimeTest::SetUp();
     // Dogfood our own lib core dex file.
-    dex_file_ = GetLibCoreDexFileName();
+    dex_file_ = GetLibCoreDexFileNames()[0];
   }
 
   // Runs test with given arguments.
diff --git a/dexlist/dexlist.cc b/dexlist/dexlist.cc
index 1d0f75e..a1bde0e 100644
--- a/dexlist/dexlist.cc
+++ b/dexlist/dexlist.cc
@@ -60,18 +60,17 @@
  * final ";" (if any) have been removed and all occurrences of '/'
  * have been changed to '.'.
  */
-static char* descriptorToDot(const char* str) {
-  size_t at = strlen(str);
+static std::unique_ptr<char[]> descriptorToDot(const char* str) {
+  size_t len = strlen(str);
   if (str[0] == 'L') {
-    at -= 2;  // Two fewer chars to copy.
-    str++;
+    len -= 2;  // Two fewer chars to copy (trims L and ;).
+    str++;     // Start past 'L'.
   }
-  char* newStr = reinterpret_cast<char*>(malloc(at + 1));
-  newStr[at] = '\0';
-  while (at > 0) {
-    at--;
-    newStr[at] = (str[at] == '/') ? '.' : str[at];
+  std::unique_ptr<char[]> newStr(new char[len + 1]);
+  for (size_t i = 0; i < len; i++) {
+    newStr[i] = (str[i] == '/') ? '.' : str[i];
   }
+  newStr[len] = '\0';
   return newStr;
 }
 
@@ -80,10 +79,10 @@
  * first line in the method, which *should* correspond to the first
  * entry from the table.  (Could also use "min" here.)
  */
-static bool positionsCb(void* context, u4 /*address*/, u4 lineNum) {
+static bool positionsCb(void* context, const DexFile::PositionInfo& entry) {
   int* pFirstLine = reinterpret_cast<int *>(context);
   if (*pFirstLine == -1) {
-    *pFirstLine = lineNum;
+    *pFirstLine = entry.line_;
   }
   return 0;
 }
@@ -92,7 +91,7 @@
  * Dumps a method.
  */
 static void dumpMethod(const DexFile* pDexFile,
-                       const char* fileName, u4 idx, u4 flags,
+                       const char* fileName, u4 idx, u4 flags ATTRIBUTE_UNUSED,
                        const DexFile::CodeItem* pCode, u4 codeOffset) {
   // Abstract and native methods don't get listed.
   if (pCode == nullptr || codeOffset == 0) {
@@ -103,14 +102,13 @@
   const DexFile::MethodId& pMethodId = pDexFile->GetMethodId(idx);
   const char* methodName = pDexFile->StringDataByIdx(pMethodId.name_idx_);
   const char* classDescriptor = pDexFile->StringByTypeIdx(pMethodId.class_idx_);
-  char* className = descriptorToDot(classDescriptor);
+  std::unique_ptr<char[]> className(descriptorToDot(classDescriptor));
   const u4 insnsOff = codeOffset + 0x10;
 
   // Don't list methods that do not match a particular query.
   if (gOptions.methodToFind != nullptr &&
-      (strcmp(gOptions.classToFind, className) != 0 ||
+      (strcmp(gOptions.classToFind, className.get()) != 0 ||
        strcmp(gOptions.methodToFind, methodName) != 0)) {
-    free(className);
     return;
   }
 
@@ -121,9 +119,7 @@
 
   // Find the first line.
   int firstLine = -1;
-  bool is_static = (flags & kAccStatic) != 0;
-  pDexFile->DecodeDebugInfo(
-     pCode, is_static, idx, positionsCb, nullptr, &firstLine);
+  pDexFile->DecodeDebugPositionInfo(pCode, positionsCb, &firstLine);
 
   // Method signature.
   const Signature signature = pDexFile->GetMethodSignature(pMethodId);
@@ -132,10 +128,9 @@
   // Dump actual method information.
   fprintf(gOutFile, "0x%08x %d %s %s %s %s %d\n",
           insnsOff, pCode->insns_size_in_code_units_ * 2,
-          className, methodName, typeDesc, fileName, firstLine);
+          className.get(), methodName, typeDesc, fileName, firstLine);
 
   free(typeDesc);
-  free(className);
 }
 
 /*
@@ -182,9 +177,10 @@
 static int processFile(const char* fileName) {
   // If the file is not a .dex file, the function tries .zip/.jar/.apk files,
   // all of which are Zip archives with "classes.dex" inside.
+  static constexpr bool kVerifyChecksum = true;
   std::string error_msg;
   std::vector<std::unique_ptr<const DexFile>> dex_files;
-  if (!DexFile::Open(fileName, fileName, &error_msg, &dex_files)) {
+  if (!DexFile::Open(fileName, fileName, kVerifyChecksum, &error_msg, &dex_files)) {
     fputs(error_msg.c_str(), stderr);
     fputc('\n', stderr);
     return -1;
diff --git a/dexlist/dexlist_test.cc b/dexlist/dexlist_test.cc
index 82179de..9a65ba6 100644
--- a/dexlist/dexlist_test.cc
+++ b/dexlist/dexlist_test.cc
@@ -37,7 +37,7 @@
   virtual void SetUp() {
     CommonRuntimeTest::SetUp();
     // Dogfood our own lib core dex file.
-    dex_file_ = GetLibCoreDexFileName();
+    dex_file_ = GetLibCoreDexFileNames()[0];
   }
 
   // Runs test with given arguments.
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
index 039986c..db327fc 100644
--- a/disassembler/Android.mk
+++ b/disassembler/Android.mk
@@ -59,17 +59,18 @@
   LOCAL_SRC_FILES := $$(LIBART_DISASSEMBLER_SRC_FILES)
 
   ifeq ($$(art_target_or_host),target)
-    $(call set-target-local-clang-vars)
+    LOCAL_CLANG := $(ART_TARGET_CLANG)
     $(call set-target-local-cflags-vars,$(2))
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
-    LOCAL_LDLIBS := $(ART_HOST_LDLIBS)
     LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
     LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
   endif
 
@@ -89,9 +90,9 @@
   LOCAL_NATIVE_COVERAGE := $(ART_COVERAGE)
   # For disassembler_arm64.
   ifeq ($$(art_ndebug_or_debug),debug)
-     LOCAL_SHARED_LIBRARIES += libvixld
+    LOCAL_SHARED_LIBRARIES += libvixld-arm64
   else
-     LOCAL_SHARED_LIBRARIES += libvixl
+    LOCAL_SHARED_LIBRARIES += libvixl-arm64
   endif
   ifeq ($$(art_target_or_host),target)
     include $(BUILD_SHARED_LIBRARY)
diff --git a/disassembler/disassembler.cc b/disassembler/disassembler.cc
index e604c1f..bcd0d16 100644
--- a/disassembler/disassembler.cc
+++ b/disassembler/disassembler.cc
@@ -32,10 +32,8 @@
     return new arm::DisassemblerArm(options);
   } else if (instruction_set == kArm64) {
     return new arm64::DisassemblerArm64(options);
-  } else if (instruction_set == kMips) {
-    return new mips::DisassemblerMips(options, false);
-  } else if (instruction_set == kMips64) {
-    return new mips::DisassemblerMips(options, true);
+  } else if (instruction_set == kMips || instruction_set == kMips64) {
+    return new mips::DisassemblerMips(options);
   } else if (instruction_set == kX86) {
     return new x86::DisassemblerX86(options, false);
   } else if (instruction_set == kX86_64) {
diff --git a/disassembler/disassembler.h b/disassembler/disassembler.h
index b99e5c2..86793cc 100644
--- a/disassembler/disassembler.h
+++ b/disassembler/disassembler.h
@@ -28,19 +28,32 @@
 
 class DisassemblerOptions {
  public:
+  using ThreadOffsetNameFunction = void (*)(std::ostream& os, uint32_t offset);
+
+  ThreadOffsetNameFunction thread_offset_name_function_;
+
+  // Base address for calculating relative code offsets when absolute_addresses_ is false.
+  const uint8_t* const base_address_;
+
+  // End address (exclusive);
+  const uint8_t* const end_address_;
+
   // Should the disassembler print absolute or relative addresses.
   const bool absolute_addresses_;
 
-  // Base addess for calculating relative code offsets when absolute_addresses_ is false.
-  const uint8_t* const base_address_;
-
   // If set, the disassembler is allowed to look at load targets in literal
   // pools.
   const bool can_read_literals_;
 
-  DisassemblerOptions(bool absolute_addresses, const uint8_t* base_address,
-                      bool can_read_literals)
-      : absolute_addresses_(absolute_addresses), base_address_(base_address),
+  DisassemblerOptions(bool absolute_addresses,
+                      const uint8_t* base_address,
+                      const uint8_t* end_address,
+                      bool can_read_literals,
+                      ThreadOffsetNameFunction fn)
+      : thread_offset_name_function_(fn),
+        base_address_(base_address),
+        end_address_(end_address),
+        absolute_addresses_(absolute_addresses),
         can_read_literals_(can_read_literals) {}
 
  private:
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 5e2cf6b..a47b6ad 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -25,7 +25,6 @@
 #include "base/bit_utils.h"
 #include "base/logging.h"
 #include "base/stringprintf.h"
-#include "thread.h"
 
 namespace art {
 namespace arm {
@@ -329,7 +328,7 @@
           }
           if (rn.r == 9) {
             args << "  ; ";
-            Thread::DumpThreadOffset<4>(args, offset);
+            GetDisassemblerOptions()->thread_offset_name_function_(args, offset);
           }
         }
       }
@@ -418,7 +417,12 @@
   return os << static_cast<int>(type);
 }
 
-void DumpThumb2Literal(std::ostream& args, const uint8_t* instr_ptr, uint32_t U, uint32_t imm32,
+void DumpThumb2Literal(std::ostream& args,
+                       const uint8_t* instr_ptr,
+                       const uintptr_t lo_adr,
+                       const uintptr_t hi_adr,
+                       uint32_t U,
+                       uint32_t imm32,
                        T2LitType type) {
   // Literal offsets (imm32) are not required to be aligned so we may need unaligned access.
   typedef const int16_t unaligned_int16_t __attribute__ ((aligned (1)));
@@ -428,8 +432,16 @@
   typedef const int64_t unaligned_int64_t __attribute__ ((aligned (1)));
   typedef const uint64_t unaligned_uint64_t __attribute__ ((aligned (1)));
 
+  // Get address of literal. Bail if not within expected buffer range to
+  // avoid trying to fetch invalid literals (we can encounter this when
+  // interpreting raw data as instructions).
   uintptr_t pc = RoundDown(reinterpret_cast<intptr_t>(instr_ptr) + 4, 4);
   uintptr_t lit_adr = U ? pc + imm32 : pc - imm32;
+  if (lit_adr < lo_adr || lit_adr >= hi_adr) {
+    args << "  ; (?)";
+    return;
+  }
+
   args << "  ; ";
   switch (type) {
     case kT2LitUByte:
@@ -482,6 +494,10 @@
     return DumpThumb16(os, instr_ptr);
   }
 
+  // Set valid address range of backing buffer.
+  const uintptr_t lo_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->base_address_);
+  const uintptr_t hi_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->end_address_);
+
   uint32_t op2 = (instr >> 20) & 0x7F;
   std::ostringstream opcode;
   std::ostringstream args;
@@ -765,23 +781,13 @@
         args << Rm;
 
         // Shift operand.
-        bool noShift = (imm5 == 0 && shift_type != 0x3);
+        bool noShift = (imm5 == 0 && shift_type == 0x0);
         if (!noShift) {
           args << ", ";
-          switch (shift_type) {
-            case 0x0: args << "lsl"; break;
-            case 0x1: args << "lsr"; break;
-            case 0x2: args << "asr"; break;
-            case 0x3:
-              if (imm5 == 0) {
-                args << "rrx";
-              } else {
-                args << "ror";
-              }
-              break;
-          }
-          if (shift_type != 0x3 /* rrx */) {
-            args << StringPrintf(" #%d", (0 != imm5 || 0 == shift_type) ? imm5 : 32);
+          if (shift_type == 0x3u && imm5 == 0u) {
+            args << "rrx";
+          } else {
+            args << kThumb2ShiftOperations[shift_type] << " #" << ((0 != imm5) ? imm5 : 32);
           }
         }
 
@@ -824,7 +830,7 @@
                 args << d << ", [" << Rn << ", #" << ((U == 1) ? "" : "-")
                      << (imm8 << 2) << "]";
                 if (Rn.r == 15 && U == 1) {
-                  DumpThumb2Literal(args, instr_ptr, U, imm8 << 2, kT2LitHexLong);
+                  DumpThumb2Literal(args, instr_ptr, lo_adr, hi_adr, U, imm8 << 2, kT2LitHexLong);
                 }
               } else if (Rn.r == 13 && W == 1 && U == L) {  // VPUSH/VPOP
                 opcode << (L == 1 ? "vpop" : "vpush");
@@ -934,17 +940,11 @@
                 opcode << (op != 0 ? "vsqrt" : "vneg") << (S != 0 ? ".f64" : ".f32");
                 args << d << ", " << m;
               } else if (op5 == 4) {
-                opcode << "vcmp" << (S != 0 ? ".f64" : ".f32");
+                opcode << "vcmp" << ((op != 0) ? "e" : "") << (S != 0 ? ".f64" : ".f32");
                 args << d << ", " << m;
-                if (op != 0) {
-                  args << " (quiet nan)";
-                }
               } else if (op5 == 5) {
-                opcode << "vcmpe" << (S != 0 ? ".f64" : ".f32");
+                opcode << "vcmp" << ((op != 0) ? "e" : "") << (S != 0 ? ".f64" : ".f32");
                 args << d << ", #0.0";
-                if (op != 0) {
-                  args << " (quiet nan)";
-                }
                 if ((instr & 0x2f) != 0) {
                   args << " (UNPREDICTABLE)";
                 }
@@ -1152,8 +1152,10 @@
             args << Rd << ", #" << imm16;
             break;
           }
-          case 0x16: {
+          case 0x16: case 0x14: case 0x1C: {
             // BFI Rd, Rn, #lsb, #width - 111 10 0 11 011 0 nnnn 0 iii dddd ii 0 iiiii
+            // SBFX Rd, Rn, #lsb, #width - 111 10 0 11 010 0 nnnn 0 iii dddd ii 0 iiiii
+            // UBFX Rd, Rn, #lsb, #width - 111 10 0 11 110 0 nnnn 0 iii dddd ii 0 iiiii
             ArmRegister Rd(instr, 8);
             ArmRegister Rn(instr, 16);
             uint32_t msb = instr & 0x1F;
@@ -1161,12 +1163,21 @@
             uint32_t imm3 = (instr >> 12) & 0x7;
             uint32_t lsb = (imm3 << 2) | imm2;
             uint32_t width = msb - lsb + 1;
-            if (Rn.r != 0xF) {
-              opcode << "bfi";
-              args << Rd << ", " << Rn << ", #" << lsb << ", #" << width;
+            if (op3 == 0x16) {
+              if (Rn.r != 0xF) {
+                opcode << "bfi";
+                args << Rd << ", " << Rn << ", #" << lsb << ", #" << width;
+              } else {
+                opcode << "bfc";
+                args << Rd << ", #" << lsb << ", #" << width;
+              }
             } else {
-              opcode << "bfc";
-              args << Rd << ", #" << lsb << ", #" << width;
+              opcode << ((op3 & 0x8) != 0u ? "ubfx" : "sbfx");
+              args << Rd << ", " << Rn << ", #" << lsb << ", #" << width;
+              if (Rd.r == 13 || Rd.r == 15 || Rn.r == 13 || Rn.r == 15 ||
+                  (instr & 0x04000020) != 0u) {
+                args << " (UNPREDICTABLE)";
+              }
             }
             break;
           }
@@ -1251,10 +1262,10 @@
               imm32 = (S << 20) | (J2 << 19) | (J1 << 18) | (imm6 << 12) | (imm11 << 1);
               imm32 = (imm32 << 11) >> 11;  // sign extend 21 bit immediate.
             } else {
-              uint32_t I1 = ~(J1 ^ S);
-              uint32_t I2 = ~(J2 ^ S);
+              uint32_t I1 = (J1 ^ S) ^ 1;
+              uint32_t I2 = (J2 ^ S) ^ 1;
               imm32 = (S << 24) | (I1 << 23) | (I2 << 22) | (imm10 << 12) | (imm11 << 1);
-              imm32 = (imm32 << 8) >> 8;  // sign extend 24 bit immediate.
+              imm32 = (imm32 << 7) >> 7;  // sign extend 25 bit immediate.
             }
             opcode << ".w";
             DumpBranchTarget(args, instr_ptr + 4, imm32);
@@ -1389,7 +1400,7 @@
             args << Rt << ", [" << Rn << ", #" << (U != 0u ? "" : "-") << imm12 << "]";
             if (Rn.r == TR && is_load) {
               args << "  ; ";
-              Thread::DumpThreadOffset<4>(args, imm12);
+              GetDisassemblerOptions()->thread_offset_name_function_(args, imm12);
             } else if (Rn.r == PC) {
               T2LitType lit_type[] = {
                   kT2LitUByte, kT2LitUHalf, kT2LitHexWord, kT2LitInvalid,
@@ -1399,7 +1410,7 @@
               };
               DCHECK_LT(op2 >> 1, arraysize(lit_type));
               DCHECK_NE(lit_type[op2 >> 1], kT2LitInvalid);
-              DumpThumb2Literal(args, instr_ptr, U, imm12, lit_type[op2 >> 1]);
+              DumpThumb2Literal(args, instr_ptr, lo_adr, hi_adr, U, imm12, lit_type[op2 >> 1]);
             }
           } else if ((instr & 0xFC0) == 0) {
             opcode << ldr_str << sign << type << ".w";
@@ -1469,82 +1480,101 @@
           }
           break;
         }
-      default:      // more formats
-        if ((op2 >> 4) == 2) {      // 010xxxx
-          // data processing (register)
-          if ((instr & 0x0080f0f0) == 0x0000f000) {
-            // LSL, LSR, ASR, ROR
-            uint32_t shift_op = (instr >> 21) & 3;
-            uint32_t S = (instr >> 20) & 1;
-            ArmRegister Rd(instr, 8);
+        case 0x7B: case 0x7F: {
+          FpRegister d(instr, 12, 22);
+          FpRegister m(instr, 0, 5);
+          uint32_t sz = (instr >> 18) & 0x3;  // Decode size bits.
+          uint32_t size = (sz == 0) ? 8 : sz << 4;
+          uint32_t opc2 = (instr >> 7) & 0xF;
+          uint32_t Q = (instr >> 6) & 1;
+          if (Q == 0 && opc2 == 0xA && size == 8) {  // 1010, VCNT
+            opcode << "vcnt." << size;
+            args << d << ", " << m;
+          } else if (Q == 0 && (opc2 == 0x4 || opc2 == 0x5) && size <= 32) {  // 010x, VPADDL
+            bool op = HasBitSet(instr, 7);
+            opcode << "vpaddl." << (op ? "u" : "s") << size;
+            args << d << ", " << m;
+          } else {
+            opcode << "UNKNOWN " << op2;
+          }
+          break;
+        }
+        default:      // more formats
+          if ((op2 >> 4) == 2) {      // 010xxxx
+            // data processing (register)
+            if ((instr & 0x0080f0f0) == 0x0000f000) {
+              // LSL, LSR, ASR, ROR
+              uint32_t shift_op = (instr >> 21) & 3;
+              uint32_t S = (instr >> 20) & 1;
+              ArmRegister Rd(instr, 8);
+              ArmRegister Rn(instr, 16);
+              ArmRegister Rm(instr, 0);
+              opcode << kThumb2ShiftOperations[shift_op] << (S != 0 ? "s" : "");
+              args << Rd << ", " << Rn << ", " << Rm;
+            }
+          } else if ((op2 >> 3) == 6) {       // 0110xxx
+            // Multiply, multiply accumulate, and absolute difference
+            op1 = (instr >> 20) & 0x7;
+            op2 = (instr >> 4) & 0x1;
+            ArmRegister Ra(instr, 12);
             ArmRegister Rn(instr, 16);
             ArmRegister Rm(instr, 0);
-            opcode << kThumb2ShiftOperations[shift_op] << (S != 0 ? "s" : "");
-            args << Rd << ", " << Rn << ", " << Rm;
-          }
-        } else if ((op2 >> 3) == 6) {       // 0110xxx
-          // Multiply, multiply accumulate, and absolute difference
-          op1 = (instr >> 20) & 0x7;
-          op2 = (instr >> 4) & 0x1;
-          ArmRegister Ra(instr, 12);
-          ArmRegister Rn(instr, 16);
-          ArmRegister Rm(instr, 0);
-          ArmRegister Rd(instr, 8);
-          switch (op1) {
-          case 0:
-            if (op2 == 0) {
-              if (Ra.r == 0xf) {
-                opcode << "mul";
-                args << Rd << ", " << Rn << ", " << Rm;
+            ArmRegister Rd(instr, 8);
+            switch (op1) {
+            case 0:
+              if (op2 == 0) {
+                if (Ra.r == 0xf) {
+                  opcode << "mul";
+                  args << Rd << ", " << Rn << ", " << Rm;
+                } else {
+                  opcode << "mla";
+                  args << Rd << ", " << Rn << ", " << Rm << ", " << Ra;
+                }
               } else {
-                opcode << "mla";
+                opcode << "mls";
                 args << Rd << ", " << Rn << ", " << Rm << ", " << Ra;
               }
-            } else {
-              opcode << "mls";
-              args << Rd << ", " << Rn << ", " << Rm << ", " << Ra;
+              break;
+            case 1:
+            case 2:
+            case 3:
+            case 4:
+            case 5:
+            case 6:
+                break;        // do these sometime
             }
-            break;
-          case 1:
-          case 2:
-          case 3:
-          case 4:
-          case 5:
-          case 6:
-              break;        // do these sometime
+          } else if ((op2 >> 3) == 7) {       // 0111xxx
+            // Long multiply, long multiply accumulate, and divide
+            op1 = (instr >> 20) & 0x7;
+            op2 = (instr >> 4) & 0xf;
+            ArmRegister Rn(instr, 16);
+            ArmRegister Rm(instr, 0);
+            ArmRegister Rd(instr, 8);
+            ArmRegister RdHi(instr, 8);
+            ArmRegister RdLo(instr, 12);
+            switch (op1) {
+            case 0:
+              opcode << "smull";
+              args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
+              break;
+            case 1:
+              opcode << "sdiv";
+              args << Rd << ", " << Rn << ", " << Rm;
+              break;
+            case 2:
+              opcode << "umull";
+              args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
+              break;
+            case 3:
+              opcode << "udiv";
+              args << Rd << ", " << Rn << ", " << Rm;
+              break;
+            case 4:
+            case 5:
+            case 6:
+              break;      // TODO: when we generate these...
+            }
           }
-        } else if ((op2 >> 3) == 7) {       // 0111xxx
-          // Long multiply, long multiply accumulate, and divide
-          op1 = (instr >> 20) & 0x7;
-          op2 = (instr >> 4) & 0xf;
-          ArmRegister Rn(instr, 16);
-          ArmRegister Rm(instr, 0);
-          ArmRegister Rd(instr, 8);
-          ArmRegister RdHi(instr, 8);
-          ArmRegister RdLo(instr, 12);
-          switch (op1) {
-          case 0:
-            opcode << "smull";
-            args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
-            break;
-          case 1:
-            opcode << "sdiv";
-            args << Rd << ", " << Rn << ", " << Rm;
-            break;
-          case 2:
-            opcode << "umull";
-            args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
-            break;
-          case 3:
-            opcode << "udiv";
-            args << Rd << ", " << Rn << ", " << Rm;
-            break;
-          case 4:
-          case 5:
-          case 6:
-            break;      // TODO: when we generate these...
-          }
-        }
       }
       break;
     default:
@@ -1700,10 +1730,13 @@
           break;
       }
     } else if (opcode1 == 0x12 || opcode1 == 0x13) {  // 01001x
+      const uintptr_t lo_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->base_address_);
+      const uintptr_t hi_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->end_address_);
       ThumbRegister Rt(instr, 8);
       uint16_t imm8 = instr & 0xFF;
       opcode << "ldr";
       args << Rt << ", [pc, #" << (imm8 << 2) << "]";
+      DumpThumb2Literal(args, instr_ptr, lo_adr, hi_adr, /*U*/ 1u, imm8 << 2, kT2LitHexWord);
     } else if ((opcode1 >= 0x14 && opcode1 <= 0x17) ||  // 0101xx
                (opcode1 >= 0x18 && opcode1 <= 0x1f) ||  // 011xxx
                (opcode1 >= 0x20 && opcode1 <= 0x27)) {  // 100xxx
diff --git a/disassembler/disassembler_arm64.cc b/disassembler/disassembler_arm64.cc
index 5f88714..80bacb2 100644
--- a/disassembler/disassembler_arm64.cc
+++ b/disassembler/disassembler_arm64.cc
@@ -22,7 +22,8 @@
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
-#include "thread.h"
+
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
 
 namespace art {
 namespace arm64 {
@@ -38,15 +39,14 @@
   LR  = 30
 };
 
-void CustomDisassembler::AppendRegisterNameToOutput(
-    const vixl::Instruction* instr,
-    const vixl::CPURegister& reg) {
+void CustomDisassembler::AppendRegisterNameToOutput(const Instruction* instr,
+                                                    const CPURegister& reg) {
   USE(instr);
   if (reg.IsRegister() && reg.Is64Bits()) {
-    if (reg.code() == TR) {
+    if (reg.GetCode() == TR) {
       AppendToOutput("tr");
       return;
-    } else if (reg.code() == LR) {
+    } else if (reg.GetCode() == LR) {
       AppendToOutput("lr");
       return;
     }
@@ -56,29 +56,37 @@
   Disassembler::AppendRegisterNameToOutput(instr, reg);
 }
 
-void CustomDisassembler::VisitLoadLiteral(const vixl::Instruction* instr) {
+void CustomDisassembler::VisitLoadLiteral(const Instruction* instr) {
   Disassembler::VisitLoadLiteral(instr);
 
   if (!read_literals_) {
     return;
   }
 
-  void* data_address = instr->LiteralAddress<void*>();
-  vixl::Instr op = instr->Mask(vixl::LoadLiteralMask);
+  // Get address of literal. Bail if not within expected buffer range to
+  // avoid trying to fetch invalid literals (we can encounter this when
+  // interpreting raw data as instructions).
+  void* data_address = instr->GetLiteralAddress<void*>();
+  if (data_address < base_address_ || data_address >= end_address_) {
+    AppendToOutput(" (?)");
+    return;
+  }
 
+  // Output information on literal.
+  Instr op = instr->Mask(LoadLiteralMask);
   switch (op) {
-    case vixl::LDR_w_lit:
-    case vixl::LDR_x_lit:
-    case vixl::LDRSW_x_lit: {
-      int64_t data = op == vixl::LDR_x_lit ? *reinterpret_cast<int64_t*>(data_address)
-                                           : *reinterpret_cast<int32_t*>(data_address);
+    case LDR_w_lit:
+    case LDR_x_lit:
+    case LDRSW_x_lit: {
+      int64_t data = op == LDR_x_lit ? *reinterpret_cast<int64_t*>(data_address)
+                                     : *reinterpret_cast<int32_t*>(data_address);
       AppendToOutput(" (0x%" PRIx64 " / %" PRId64 ")", data, data);
       break;
     }
-    case vixl::LDR_s_lit:
-    case vixl::LDR_d_lit: {
-      double data = (op == vixl::LDR_s_lit) ? *reinterpret_cast<float*>(data_address)
-                                            : *reinterpret_cast<double*>(data_address);
+    case LDR_s_lit:
+    case LDR_d_lit: {
+      double data = (op == LDR_s_lit) ? *reinterpret_cast<float*>(data_address)
+                                      : *reinterpret_cast<double*>(data_address);
       AppendToOutput(" (%g)", data);
       break;
     }
@@ -87,27 +95,27 @@
   }
 }
 
-void CustomDisassembler::VisitLoadStoreUnsignedOffset(const vixl::Instruction* instr) {
+void CustomDisassembler::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
   Disassembler::VisitLoadStoreUnsignedOffset(instr);
 
-  if (instr->Rn() == TR) {
-    int64_t offset = instr->ImmLSUnsigned() << instr->SizeLS();
+  if (instr->GetRn() == TR) {
+    int64_t offset = instr->GetImmLSUnsigned() << instr->GetSizeLS();
     std::ostringstream tmp_stream;
-    Thread::DumpThreadOffset<8>(tmp_stream, static_cast<uint32_t>(offset));
+    options_->thread_offset_name_function_(tmp_stream, static_cast<uint32_t>(offset));
     AppendToOutput(" ; %s", tmp_stream.str().c_str());
   }
 }
 
 size_t DisassemblerArm64::Dump(std::ostream& os, const uint8_t* begin) {
-  const vixl::Instruction* instr = reinterpret_cast<const vixl::Instruction*>(begin);
+  const Instruction* instr = reinterpret_cast<const Instruction*>(begin);
   decoder.Decode(instr);
     os << FormatInstructionPointer(begin)
-     << StringPrintf(": %08x\t%s\n", instr->InstructionBits(), disasm.GetOutput());
-  return vixl::kInstructionSize;
+     << StringPrintf(": %08x\t%s\n", instr->GetInstructionBits(), disasm.GetOutput());
+  return kInstructionSize;
 }
 
 void DisassemblerArm64::Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) {
-  for (const uint8_t* cur = begin; cur < end; cur += vixl::kInstructionSize) {
+  for (const uint8_t* cur = begin; cur < end; cur += kInstructionSize) {
     Dump(os, cur);
   }
 }
diff --git a/disassembler/disassembler_arm64.h b/disassembler/disassembler_arm64.h
index 44fa53f..19e4dfb 100644
--- a/disassembler/disassembler_arm64.h
+++ b/disassembler/disassembler_arm64.h
@@ -19,33 +19,39 @@
 
 #include "disassembler.h"
 
+// TODO(VIXL): Make VIXL compile with -Wshadow.
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wshadow"
-#include "vixl/a64/decoder-a64.h"
-#include "vixl/a64/disasm-a64.h"
+#include "aarch64/decoder-aarch64.h"
+#include "aarch64/disasm-aarch64.h"
 #pragma GCC diagnostic pop
 
 namespace art {
 namespace arm64 {
 
-class CustomDisassembler FINAL : public vixl::Disassembler {
+class CustomDisassembler FINAL : public vixl::aarch64::Disassembler {
  public:
-  explicit CustomDisassembler(DisassemblerOptions* options) :
-      vixl::Disassembler(), read_literals_(options->can_read_literals_) {
+  explicit CustomDisassembler(DisassemblerOptions* options)
+      : vixl::aarch64::Disassembler(),
+        read_literals_(options->can_read_literals_),
+        base_address_(options->base_address_),
+        end_address_(options->end_address_),
+        options_(options) {
     if (!options->absolute_addresses_) {
-      MapCodeAddress(0, reinterpret_cast<const vixl::Instruction*>(options->base_address_));
+      MapCodeAddress(0,
+                     reinterpret_cast<const vixl::aarch64::Instruction*>(options->base_address_));
     }
   }
 
   // Use register aliases in the disassembly.
-  void AppendRegisterNameToOutput(const vixl::Instruction* instr,
-                                  const vixl::CPURegister& reg) OVERRIDE;
+  void AppendRegisterNameToOutput(const vixl::aarch64::Instruction* instr,
+                                  const vixl::aarch64::CPURegister& reg) OVERRIDE;
 
   // Improve the disassembly of literal load instructions.
-  void VisitLoadLiteral(const vixl::Instruction* instr) OVERRIDE;
+  void VisitLoadLiteral(const vixl::aarch64::Instruction* instr) OVERRIDE;
 
   // Improve the disassembly of thread offset.
-  void VisitLoadStoreUnsignedOffset(const vixl::Instruction* instr) OVERRIDE;
+  void VisitLoadStoreUnsignedOffset(const vixl::aarch64::Instruction* instr) OVERRIDE;
 
  private:
   // Indicate if the disassembler should read data loaded from literal pools.
@@ -55,6 +61,12 @@
   //           true | 0x72681558: 1c000acb  ldr s11, pc+344 (addr 0x726816b0)
   //          false | 0x72681558: 1c000acb  ldr s11, pc+344 (addr 0x726816b0) (3.40282e+38)
   const bool read_literals_;
+
+  // Valid address range: [base_address_, end_address_)
+  const void* const base_address_;
+  const void* const end_address_;
+
+  DisassemblerOptions* options_;
 };
 
 class DisassemblerArm64 FINAL : public Disassembler {
@@ -68,7 +80,7 @@
   void Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) OVERRIDE;
 
  private:
-  vixl::Decoder decoder;
+  vixl::aarch64::Decoder decoder;
   CustomDisassembler disasm;
 
   DISALLOW_COPY_AND_ASSIGN(DisassemblerArm64);
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index c2f23aa..02c6d71 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -21,7 +21,6 @@
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
-#include "thread.h"
 
 namespace art {
 namespace mips {
@@ -56,7 +55,7 @@
   // R-type instructions.
   { kRTypeMask, 0, "sll", "DTA", },
   // 0, 1, movci
-  { kRTypeMask, 2, "srl", "DTA", },
+  { kRTypeMask | (0x1f << 21), 2, "srl", "DTA", },
   { kRTypeMask, 3, "sra", "DTA", },
   { kRTypeMask | (0x1f << 6), 4, "sllv", "DTS", },
   { kRTypeMask | (0x1f << 6), 6, "srlv", "DTS", },
@@ -111,6 +110,8 @@
   { kRTypeMask | (0x1f << 21), 63, "dsra32", "DTA", },
 
   // SPECIAL0
+  { kSpecial0Mask | 0x307ff, 1, "movf", "DSc" },
+  { kSpecial0Mask | 0x307ff, 0x10001, "movt", "DSc" },
   { kSpecial0Mask | 0x7ff, (2 << 6) | 24, "mul", "DST" },
   { kSpecial0Mask | 0x7ff, (3 << 6) | 24, "muh", "DST" },
   { kSpecial0Mask | 0x7ff, (2 << 6) | 25, "mulu", "DST" },
@@ -139,6 +140,7 @@
   // SPECIAL2
   { kSpecial2Mask | 0x7ff, (28 << kOpcodeShift) | 2, "mul", "DST" },
   { kSpecial2Mask | 0x7ff, (28 << kOpcodeShift) | 32, "clz", "DS" },
+  { kSpecial2Mask | 0x7ff, (28 << kOpcodeShift) | 33, "clo", "DS" },
   { kSpecial2Mask | 0xffff, (28 << kOpcodeShift) | 0, "madd", "ST" },
   { kSpecial2Mask | 0xffff, (28 << kOpcodeShift) | 1, "maddu", "ST" },
   { kSpecial2Mask | 0xffff, (28 << kOpcodeShift) | 2, "mul", "DST" },
@@ -147,14 +149,37 @@
   { kSpecial2Mask | 0x3f, (28 << kOpcodeShift) | 0x3f, "sdbbp", "" },  // TODO: code
 
   // SPECIAL3
+  { kSpecial3Mask | 0x3f, (31 << kOpcodeShift), "ext", "TSAZ", },
   { kSpecial3Mask | 0x3f, (31 << kOpcodeShift) | 3, "dext", "TSAZ", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | (16 << 6) | 32, "seb", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | (24 << 6) | 32, "seh", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | 32, "bitswap", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | 36, "dbitswap", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | (2 << 6) | 36, "dsbh", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | (5 << 6) | 36, "dshd", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | (2 << 6) | 32, "wsbh", "DT", },
+  { kSpecial3Mask | 0x3f, (31 << kOpcodeShift) | 4, "ins", "TSAz", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | (16 << 6) | 32,
+    "seb",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | (24 << 6) | 32,
+    "seh",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | 32,
+    "bitswap",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | 36,
+    "dbitswap",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | (2 << 6) | 36,
+    "dsbh",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | (5 << 6) | 36,
+    "dshd",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | (2 << 6) | 32,
+    "wsbh",
+    "DT", },
   { kSpecial3Mask | 0x7f, (31 << kOpcodeShift) | 0x26, "sc", "Tl", },
   { kSpecial3Mask | 0x7f, (31 << kOpcodeShift) | 0x27, "scd", "Tl", },
   { kSpecial3Mask | 0x7f, (31 << kOpcodeShift) | 0x36, "ll", "Tl", },
@@ -194,6 +219,11 @@
   { kITypeMask | (0x1f << 21), 15 << kOpcodeShift, "lui", "TI", },
   { kITypeMask, 15 << kOpcodeShift, "aui", "TSI", },
 
+  { kITypeMask | (0x3e3 << 16), (17 << kOpcodeShift) | (8 << 21), "bc1f", "cB" },
+  { kITypeMask | (0x3e3 << 16), (17 << kOpcodeShift) | (8 << 21) | (1 << 16), "bc1t", "cB" },
+  { kITypeMask | (0x1f << 21), (17 << kOpcodeShift) | (9 << 21), "bc1eqz", "tB" },
+  { kITypeMask | (0x1f << 21), (17 << kOpcodeShift) | (13 << 21), "bc1nez", "tB" },
+
   { kITypeMask | (0x1f << 21), 22 << kOpcodeShift, "blezc", "TB" },
 
   // TODO: de-dup
@@ -275,13 +305,18 @@
 
   { kITypeMask, 32u << kOpcodeShift, "lb", "TO", },
   { kITypeMask, 33u << kOpcodeShift, "lh", "TO", },
+  { kITypeMask, 34u << kOpcodeShift, "lwl", "TO", },
   { kITypeMask, 35u << kOpcodeShift, "lw", "TO", },
   { kITypeMask, 36u << kOpcodeShift, "lbu", "TO", },
   { kITypeMask, 37u << kOpcodeShift, "lhu", "TO", },
+  { kITypeMask, 38u << kOpcodeShift, "lwr", "TO", },
   { kITypeMask, 39u << kOpcodeShift, "lwu", "TO", },
   { kITypeMask, 40u << kOpcodeShift, "sb", "TO", },
   { kITypeMask, 41u << kOpcodeShift, "sh", "TO", },
+  { kITypeMask, 42u << kOpcodeShift, "swl", "TO", },
   { kITypeMask, 43u << kOpcodeShift, "sw", "TO", },
+  { kITypeMask, 46u << kOpcodeShift, "swr", "TO", },
+  { kITypeMask, 48u << kOpcodeShift, "ll", "TO", },
   { kITypeMask, 49u << kOpcodeShift, "lwc1", "tO", },
   { kJTypeMask, 50u << kOpcodeShift, "bc", "P" },
   { kITypeMask, 53u << kOpcodeShift, "ldc1", "tO", },
@@ -292,9 +327,12 @@
   { kITypeMask | (1 << 24), (54u << kOpcodeShift) | (1 << 24), "beqzc", "Sb" },
   { kITypeMask | (1 << 25), (54u << kOpcodeShift) | (1 << 25), "beqzc", "Sb" },
   { kITypeMask, 55u << kOpcodeShift, "ld", "TO", },
+  { kITypeMask, 56u << kOpcodeShift, "sc", "TO", },
   { kITypeMask, 57u << kOpcodeShift, "swc1", "tO", },
+  { kJTypeMask, 58u << kOpcodeShift, "balc", "P" },
   { kITypeMask | (0x1f << 16), (59u << kOpcodeShift) | (30 << 16), "auipc", "Si" },
   { kITypeMask | (0x3 << 19), (59u << kOpcodeShift) | (0 << 19), "addiupc", "Sp" },
+  { kITypeMask | (0x3 << 19), (59u << kOpcodeShift) | (1 << 19), "lwpc", "So" },
   { kITypeMask, 61u << kOpcodeShift, "sdc1", "tO", },
   { kITypeMask | (0x1f << 21), 62u << kOpcodeShift, "jialc", "Ti" },
   { kITypeMask | (1 << 21), (62u << kOpcodeShift) | (1 << 21), "bnezc", "Sb" },  // TODO: de-dup?
@@ -311,6 +349,26 @@
   { kFpMask | (0x1f << 21), kCop1 | (0x04 << 21), "mtc1", "Td" },
   { kFpMask | (0x1f << 21), kCop1 | (0x05 << 21), "dmtc1", "Td" },
   { kFpMask | (0x1f << 21), kCop1 | (0x07 << 21), "mthc1", "Td" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 1, "cmp.un.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 2, "cmp.eq.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 3, "cmp.ueq.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 4, "cmp.lt.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 5, "cmp.ult.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 6, "cmp.le.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 7, "cmp.ule.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 17, "cmp.or.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 18, "cmp.une.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 19, "cmp.ne.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 1, "cmp.un.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 2, "cmp.eq.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 3, "cmp.ueq.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 4, "cmp.lt.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 5, "cmp.ult.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 6, "cmp.le.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 7, "cmp.ule.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 17, "cmp.or.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 18, "cmp.une.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 19, "cmp.ne.d", "adt" },
   { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 0, "add", "fadt" },
   { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 1, "sub", "fadt" },
   { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 2, "mul", "fadt" },
@@ -327,6 +385,8 @@
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 13, "trunc.w", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 14, "ceil.w", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 15, "floor.w", "fad" },
+  { kFpMask | (0x201 << 16), kCop1 | (0x200 << 16) | 17, "movf", "fadc" },
+  { kFpMask | (0x201 << 16), kCop1 | (0x201 << 16) | 17, "movt", "fadc" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 26, "rint", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 27, "class", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 32, "cvt.s", "fad" },
@@ -334,6 +394,13 @@
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 36, "cvt.w", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 37, "cvt.l", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 38, "cvt.ps", "fad" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 49, "c.un", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 50, "c.eq", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 51, "c.ueq", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 52, "c.olt", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 53, "c.ult", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 54, "c.ole", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 55, "c.ule", "fCdt" },
   { kFpMask, kCop1 | 0x10, "sel", "fadt" },
   { kFpMask, kCop1 | 0x1e, "max", "fadt" },
   { kFpMask, kCop1 | 0x1c, "min", "fadt" },
@@ -365,7 +432,7 @@
       opcode = gMipsInstructions[i].name;
       for (const char* args_fmt = gMipsInstructions[i].args_fmt; *args_fmt; ++args_fmt) {
         switch (*args_fmt) {
-          case 'A':  // sa (shift amount or [d]ext position).
+          case 'A':  // sa (shift amount or [d]ins/[d]ext position).
             args << sa;
             break;
           case 'B':  // Branch offset.
@@ -386,6 +453,12 @@
                    << StringPrintf("  ; %+d", offset);
             }
             break;
+          case 'C':  // Floating-point condition code flag in c.<cond>.fmt.
+            args << "cc" << (sa >> 2);
+            break;
+          case 'c':  // Floating-point condition code flag in bc1f/bc1t and movf/movt.
+            args << "cc" << (rt >> 2);
+            break;
           case 'D': args << 'r' << rd; break;
           case 'd': args << 'f' << rd; break;
           case 'a': args << 'f' << sa; break;
@@ -429,15 +502,19 @@
               args << StringPrintf("%+d(r%d)", offset, rs);
               if (rs == 17) {
                 args << "  ; ";
-                if (is64bit_) {
-                  Thread::DumpThreadOffset<8>(args, offset);
-                } else {
-                  Thread::DumpThreadOffset<4>(args, offset);
-                }
+                GetDisassemblerOptions()->thread_offset_name_function_(args, offset);
               }
             }
             break;
-          case 'P':  // 26-bit offset in bc.
+          case 'o':  // 19-bit offset in lwpc.
+            {
+              int32_t offset = (instruction & 0x7ffff) - ((instruction & 0x40000) << 1);
+              offset <<= 2;
+              args << FormatInstructionPointer(instr_ptr + offset);
+              args << StringPrintf("  ; %+d", offset);
+            }
+            break;
+          case 'P':  // 26-bit offset in bc and balc.
             {
               int32_t offset = (instruction & 0x3ffffff) - ((instruction & 0x2000000) << 1);
               offset <<= 2;
@@ -457,7 +534,8 @@
           case 's': args << 'f' << rs; break;
           case 'T': args << 'r' << rt; break;
           case 't': args << 'f' << rt; break;
-          case 'Z': args << rd; break;   // sz ([d]ext size).
+          case 'Z': args << (rd + 1); break;  // sz ([d]ext size).
+          case 'z': args << (rd - sa + 1); break;  // sz ([d]ins size).
         }
         if (*(args_fmt + 1)) {
           args << ", ";
@@ -467,6 +545,7 @@
     }
   }
 
+  // TODO: Simplify this once these sequences are simplified in the compiler.
   // Special cases for sequences of:
   //   pc-relative +/- 2GB branch:
   //     auipc  reg, imm
diff --git a/disassembler/disassembler_mips.h b/disassembler/disassembler_mips.h
index b0e49b3..6342f22 100644
--- a/disassembler/disassembler_mips.h
+++ b/disassembler/disassembler_mips.h
@@ -26,9 +26,8 @@
 
 class DisassemblerMips FINAL : public Disassembler {
  public:
-  DisassemblerMips(DisassemblerOptions* options, bool is64bit)
+  explicit DisassemblerMips(DisassemblerOptions* options)
       : Disassembler(options),
-        is64bit_(is64bit),
         last_ptr_(nullptr),
         last_instr_(0) {}
 
@@ -36,8 +35,6 @@
   void Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) OVERRIDE;
 
  private:
-  const bool is64bit_;
-
   // Address and encoding of the last disassembled instruction.
   // Needed to produce more readable disassembly of certain 2-instruction sequences.
   const uint8_t* last_ptr_;
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index d4bef0f..2ca84e5 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -23,7 +23,6 @@
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
-#include "thread.h"
 
 namespace art {
 namespace x86 {
@@ -243,7 +242,38 @@
   return address.str();
 }
 
+size_t DisassemblerX86::DumpNops(std::ostream& os, const uint8_t* instr) {
+static constexpr uint8_t kNops[][10] = {
+      { },
+      { 0x90 },
+      { 0x66, 0x90 },
+      { 0x0f, 0x1f, 0x00 },
+      { 0x0f, 0x1f, 0x40, 0x00 },
+      { 0x0f, 0x1f, 0x44, 0x00, 0x00 },
+      { 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00 },
+      { 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00 },
+      { 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }
+  };
+
+  for (size_t i = 1; i < arraysize(kNops); ++i) {
+    if (memcmp(instr, kNops[i], i) == 0) {
+      os << FormatInstructionPointer(instr)
+         << StringPrintf(": %22s    \t       nop \n", DumpCodeHex(instr, instr + i).c_str());
+      return i;
+    }
+  }
+
+  return 0;
+}
+
 size_t DisassemblerX86::DumpInstruction(std::ostream& os, const uint8_t* instr) {
+  size_t nop_size = DumpNops(os, instr);
+  if (nop_size != 0u) {
+    return nop_size;
+  }
+
   const uint8_t* begin_instr = instr;
   bool have_prefixes = true;
   uint8_t prefix[4] = {0, 0, 0, 0};
@@ -400,6 +430,7 @@
   case 0x89: opcode1 = "mov"; store = true; has_modrm = true; break;
   case 0x8A: opcode1 = "mov"; load = true; has_modrm = true; byte_operand = true; break;
   case 0x8B: opcode1 = "mov"; load = true; has_modrm = true; break;
+  case 0x9D: opcode1 = "popf"; break;
 
   case 0x0F:  // 2 byte extended opcode
     instr++;
@@ -565,7 +596,7 @@
               opcode1 = "roundss";
               prefix[2] = 0;
               has_modrm = true;
-              store = true;
+              load = true;
               src_reg_file = SSE;
               dst_reg_file = SSE;
               immediate_bytes = 1;
@@ -574,7 +605,7 @@
               opcode1 = "roundsd";
               prefix[2] = 0;
               has_modrm = true;
-              store = true;
+              load = true;
               src_reg_file = SSE;
               dst_reg_file = SSE;
               immediate_bytes = 1;
@@ -938,6 +969,11 @@
         has_modrm = true;
         load = true;
         break;
+      case 0xB8:
+        opcode1 = "popcnt";
+        has_modrm = true;
+        load = true;
+        break;
       case 0xBE:
         opcode1 = "movsxb";
         has_modrm = true;
@@ -1372,11 +1408,11 @@
   }
   if (prefix[1] == kFs && !supports_rex_) {
     args << "  ; ";
-    Thread::DumpThreadOffset<4>(args, address_bits);
+    GetDisassemblerOptions()->thread_offset_name_function_(args, address_bits);
   }
   if (prefix[1] == kGs && supports_rex_) {
     args << "  ; ";
-    Thread::DumpThreadOffset<8>(args, address_bits);
+    GetDisassemblerOptions()->thread_offset_name_function_(args, address_bits);
   }
   const char* prefix_str;
   switch (prefix[0]) {
diff --git a/disassembler/disassembler_x86.h b/disassembler/disassembler_x86.h
index 71c3e41..31b62bc 100644
--- a/disassembler/disassembler_x86.h
+++ b/disassembler/disassembler_x86.h
@@ -33,6 +33,7 @@
   void Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) OVERRIDE;
 
  private:
+  size_t DumpNops(std::ostream& os, const uint8_t* instr);
   size_t DumpInstruction(std::ostream& os, const uint8_t* instr);
 
   std::string DumpAddress(uint8_t mod, uint8_t rm, uint8_t rex64, uint8_t rex_w, bool no_ops,
diff --git a/imgdiag/Android.mk b/imgdiag/Android.mk
index d5d7c22..83315be 100644
--- a/imgdiag/Android.mk
+++ b/imgdiag/Android.mk
@@ -25,4 +25,8 @@
 # that the image it's analyzing be the same ISA as the runtime ISA.
 
 # Build variants {target,host} x {debug,ndebug} x {32,64}
-$(eval $(call build-art-multi-executable,imgdiag,$(IMGDIAG_SRC_FILES),libart-compiler libbacktrace,libcutils,libziparchive-host,art/compiler,both))
+#
+# Honor HOST_PREFER_32_BIT, as building a 64-bit imgdiag executable
+# when HOST_PREFER_32_BIT is true would require an unmet dependency on
+# 64-bit libbacktrace.
+$(eval $(call build-art-multi-executable,imgdiag,$(IMGDIAG_SRC_FILES),libart-compiler libbacktrace,libcutils,libziparchive-host,art/compiler,both,$(HOST_PREFER_32_BIT)))
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index 304d4e5..21a0ca0 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -18,11 +18,13 @@
 #include <stdlib.h>
 
 #include <fstream>
+#include <functional>
 #include <iostream>
 #include <string>
 #include <vector>
 #include <set>
 #include <map>
+#include <unordered_set>
 
 #include "art_method-inl.h"
 #include "base/unix_file/fd_file.h"
@@ -34,7 +36,6 @@
 #include "image.h"
 #include "scoped_thread_state_change.h"
 #include "os.h"
-#include "gc_map.h"
 
 #include "cmdline.h"
 #include "backtrace/BacktraceMap.h"
@@ -48,16 +49,20 @@
 class ImgDiagDumper {
  public:
   explicit ImgDiagDumper(std::ostream* os,
-                       const ImageHeader& image_header,
-                       const char* image_location,
-                       pid_t image_diff_pid)
+                         const ImageHeader& image_header,
+                         const std::string& image_location,
+                         pid_t image_diff_pid,
+                         pid_t zygote_diff_pid)
       : os_(os),
         image_header_(image_header),
         image_location_(image_location),
-        image_diff_pid_(image_diff_pid) {}
+        image_diff_pid_(image_diff_pid),
+        zygote_diff_pid_(zygote_diff_pid) {}
 
   bool Dump() SHARED_REQUIRES(Locks::mutator_lock_) {
     std::ostream& os = *os_;
+    os << "IMAGE LOCATION: " << image_location_ << "\n\n";
+
     os << "MAGIC: " << image_header_.GetMagic() << "\n\n";
 
     os << "IMAGE BEGIN: " << reinterpret_cast<void*>(image_header_.GetImageBegin()) << "\n\n";
@@ -65,7 +70,7 @@
     bool ret = true;
     if (image_diff_pid_ >= 0) {
       os << "IMAGE DIFF PID (" << image_diff_pid_ << "): ";
-      ret = DumpImageDiff(image_diff_pid_);
+      ret = DumpImageDiff(image_diff_pid_, zygote_diff_pid_);
       os << "\n\n";
     } else {
       os << "IMAGE DIFF PID: disabled\n\n";
@@ -92,7 +97,8 @@
     return str.substr(idx + 1);
   }
 
-  bool DumpImageDiff(pid_t image_diff_pid) SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool DumpImageDiff(pid_t image_diff_pid, pid_t zygote_diff_pid)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
     std::ostream& os = *os_;
 
     {
@@ -135,14 +141,151 @@
     }
 
     // Future idea: diff against zygote so we can ignore the shared dirty pages.
-    return DumpImageDiffMap(image_diff_pid, boot_map);
+    return DumpImageDiffMap(image_diff_pid, zygote_diff_pid, boot_map);
   }
 
-    // Look at /proc/$pid/mem and only diff the things from there
-  bool DumpImageDiffMap(pid_t image_diff_pid, const backtrace_map_t& boot_map)
+  static std::string PrettyFieldValue(ArtField* field, mirror::Object* obj)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    std::ostringstream oss;
+    switch (field->GetTypeAsPrimitiveType()) {
+      case Primitive::kPrimNot: {
+        oss << obj->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(
+            field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimBoolean: {
+        oss << static_cast<bool>(obj->GetFieldBoolean<kVerifyNone>(field->GetOffset()));
+        break;
+      }
+      case Primitive::kPrimByte: {
+        oss << static_cast<int32_t>(obj->GetFieldByte<kVerifyNone>(field->GetOffset()));
+        break;
+      }
+      case Primitive::kPrimChar: {
+        oss << obj->GetFieldChar<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimShort: {
+        oss << obj->GetFieldShort<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimInt: {
+        oss << obj->GetField32<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimLong: {
+        oss << obj->GetField64<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimFloat: {
+        oss << obj->GetField32<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimDouble: {
+        oss << obj->GetField64<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimVoid: {
+        oss << "void";
+        break;
+      }
+    }
+    return oss.str();
+  }
+
+  // Aggregate and detail class data from an image diff.
+  struct ClassData {
+    int dirty_object_count = 0;
+
+    // Track only the byte-per-byte dirtiness (in bytes)
+    int dirty_object_byte_count = 0;
+
+    // Track the object-by-object dirtiness (in bytes)
+    int dirty_object_size_in_bytes = 0;
+
+    int clean_object_count = 0;
+
+    std::string descriptor;
+
+    int false_dirty_byte_count = 0;
+    int false_dirty_object_count = 0;
+    std::vector<mirror::Object*> false_dirty_objects;
+
+    // Remote pointers to dirty objects
+    std::vector<mirror::Object*> dirty_objects;
+  };
+
+  void DiffObjectContents(mirror::Object* obj,
+                          uint8_t* remote_bytes,
+                          std::ostream& os) SHARED_REQUIRES(Locks::mutator_lock_) {
+    const char* tabs = "    ";
+    // Attempt to find fields for all dirty bytes.
+    mirror::Class* klass = obj->GetClass();
+    if (obj->IsClass()) {
+      os << tabs << "Class " << PrettyClass(obj->AsClass()) << " " << obj << "\n";
+    } else {
+      os << tabs << "Instance of " << PrettyClass(klass) << " " << obj << "\n";
+    }
+
+    std::unordered_set<ArtField*> dirty_instance_fields;
+    std::unordered_set<ArtField*> dirty_static_fields;
+    const uint8_t* obj_bytes = reinterpret_cast<const uint8_t*>(obj);
+    mirror::Object* remote_obj = reinterpret_cast<mirror::Object*>(remote_bytes);
+    for (size_t i = 0, count = obj->SizeOf(); i < count; ++i) {
+      if (obj_bytes[i] != remote_bytes[i]) {
+        ArtField* field = ArtField::FindInstanceFieldWithOffset</*exact*/false>(klass, i);
+        if (field != nullptr) {
+          dirty_instance_fields.insert(field);
+        } else if (obj->IsClass()) {
+          field = ArtField::FindStaticFieldWithOffset</*exact*/false>(obj->AsClass(), i);
+          if (field != nullptr) {
+            dirty_static_fields.insert(field);
+          }
+        }
+        if (field == nullptr) {
+          if (klass->IsArrayClass()) {
+            mirror::Class* component_type = klass->GetComponentType();
+            Primitive::Type primitive_type = component_type->GetPrimitiveType();
+            size_t component_size = Primitive::ComponentSize(primitive_type);
+            size_t data_offset = mirror::Array::DataOffset(component_size).Uint32Value();
+            if (i >= data_offset) {
+              os << tabs << "Dirty array element " << (i - data_offset) / component_size << "\n";
+              // Skip to next element to prevent spam.
+              i += component_size - 1;
+              continue;
+            }
+          }
+          os << tabs << "No field for byte offset " << i << "\n";
+        }
+      }
+    }
+    // Dump different fields. TODO: Dump field contents.
+    if (!dirty_instance_fields.empty()) {
+      os << tabs << "Dirty instance fields " << dirty_instance_fields.size() << "\n";
+      for (ArtField* field : dirty_instance_fields) {
+        os << tabs << PrettyField(field)
+           << " original=" << PrettyFieldValue(field, obj)
+           << " remote=" << PrettyFieldValue(field, remote_obj) << "\n";
+      }
+    }
+    if (!dirty_static_fields.empty()) {
+      os << tabs << "Dirty static fields " << dirty_static_fields.size() << "\n";
+      for (ArtField* field : dirty_static_fields) {
+        os << tabs << PrettyField(field)
+           << " original=" << PrettyFieldValue(field, obj)
+           << " remote=" << PrettyFieldValue(field, remote_obj) << "\n";
+      }
+    }
+    os << "\n";
+  }
+
+  // Look at /proc/$pid/mem and only diff the things from there
+  bool DumpImageDiffMap(pid_t image_diff_pid,
+                        pid_t zygote_diff_pid,
+                        const backtrace_map_t& boot_map)
     SHARED_REQUIRES(Locks::mutator_lock_) {
     std::ostream& os = *os_;
-    const size_t pointer_size = InstructionSetPointerSize(
+    const PointerSize pointer_size = InstructionSetPointerSize(
         Runtime::Current()->GetInstructionSet());
 
     std::string file_name =
@@ -163,7 +306,7 @@
     std::string error_msg;
 
     // Walk the bytes and diff against our boot image
-    const ImageHeader& boot_image_header = GetBootImageHeader();
+    const ImageHeader& boot_image_header = image_header_;
 
     os << "\nObserving boot image header at address "
        << reinterpret_cast<const void*>(&boot_image_header)
@@ -198,6 +341,20 @@
       return false;
     }
 
+    std::vector<uint8_t> zygote_contents;
+    std::unique_ptr<File> zygote_map_file;
+    if (zygote_diff_pid != -1) {
+      std::string zygote_file_name =
+          StringPrintf("/proc/%ld/mem", static_cast<long>(zygote_diff_pid));  // NOLINT [runtime/int]
+      zygote_map_file.reset(OS::OpenFileForReading(zygote_file_name.c_str()));
+      // The boot map should be at the same address.
+      zygote_contents.resize(boot_map_size);
+      if (!zygote_map_file->PreadFully(&zygote_contents[0], boot_map_size, boot_map.start)) {
+        LOG(WARNING) << "Could not fully read zygote file " << zygote_file_name;
+        zygote_contents.clear();
+      }
+    }
+
     std::string page_map_file_name = StringPrintf(
         "/proc/%ld/pagemap", static_cast<long>(image_diff_pid));  // NOLINT [runtime/int]
     auto page_map_file = std::unique_ptr<File>(OS::OpenFileForReading(page_map_file_name.c_str()));
@@ -322,16 +479,10 @@
       }
     }
 
+    std::map<mirror::Class*, ClassData> class_data;
+
     // Walk each object in the remote image space and compare it against ours
     size_t different_objects = 0;
-    std::map<mirror::Class*, int /*count*/> dirty_object_class_map;
-    // Track only the byte-per-byte dirtiness (in bytes)
-    std::map<mirror::Class*, int /*byte_count*/> dirty_object_byte_count;
-    // Track the object-by-object dirtiness (in bytes)
-    std::map<mirror::Class*, int /*byte_count*/> dirty_object_size_in_bytes;
-    std::map<mirror::Class*, int /*count*/> clean_object_class_map;
-
-    std::map<mirror::Class*, std::string> class_to_descriptor_map;
 
     std::map<off_t /* field offset */, int /* count */> art_method_field_dirty_count;
     std::vector<ArtMethod*> art_method_dirty_objects;
@@ -341,131 +492,139 @@
 
     // List of local objects that are clean, but located on dirty pages.
     std::vector<mirror::Object*> false_dirty_objects;
-    std::map<mirror::Class*, int /*byte_count*/> false_dirty_byte_count;
-    std::map<mirror::Class*, int /*object_count*/> false_dirty_object_count;
-    std::map<mirror::Class*, std::vector<mirror::Object*>> false_dirty_objects_map;
     size_t false_dirty_object_bytes = 0;
 
-    // Remote pointers to dirty objects
-    std::map<mirror::Class*, std::vector<mirror::Object*>> dirty_objects_by_class;
     // Look up remote classes by their descriptor
     std::map<std::string, mirror::Class*> remote_class_map;
     // Look up local classes by their descriptor
     std::map<std::string, mirror::Class*> local_class_map;
 
+    // Objects that are dirty against the image (possibly shared or private dirty).
+    std::set<mirror::Object*> image_dirty_objects;
+
+    // Objects that are dirty against the zygote (probably private dirty).
+    std::set<mirror::Object*> zygote_dirty_objects;
+
     size_t dirty_object_bytes = 0;
-    {
-      const uint8_t* begin_image_ptr = image_begin_unaligned;
-      const uint8_t* end_image_ptr = image_mirror_end_unaligned;
+    const uint8_t* begin_image_ptr = image_begin_unaligned;
+    const uint8_t* end_image_ptr = image_mirror_end_unaligned;
 
-      const uint8_t* current = begin_image_ptr + RoundUp(sizeof(ImageHeader), kObjectAlignment);
-      while (reinterpret_cast<const uintptr_t>(current)
-             < reinterpret_cast<const uintptr_t>(end_image_ptr)) {
-        CHECK_ALIGNED(current, kObjectAlignment);
-        mirror::Object* obj = reinterpret_cast<mirror::Object*>(const_cast<uint8_t*>(current));
+    const uint8_t* current = begin_image_ptr + RoundUp(sizeof(ImageHeader), kObjectAlignment);
+    while (reinterpret_cast<uintptr_t>(current) < reinterpret_cast<uintptr_t>(end_image_ptr)) {
+      CHECK_ALIGNED(current, kObjectAlignment);
+      mirror::Object* obj = reinterpret_cast<mirror::Object*>(const_cast<uint8_t*>(current));
 
-        // Sanity check that we are reading a real object
-        CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
-        if (kUseBakerOrBrooksReadBarrier) {
-          obj->AssertReadBarrierPointer();
+      // Sanity check that we are reading a real object
+      CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
+      if (kUseBakerOrBrooksReadBarrier) {
+        obj->AssertReadBarrierPointer();
+      }
+
+      // Iterate every page this object belongs to
+      bool on_dirty_page = false;
+      size_t page_off = 0;
+      size_t current_page_idx;
+      uintptr_t object_address;
+      do {
+        object_address = reinterpret_cast<uintptr_t>(current);
+        current_page_idx = object_address / kPageSize + page_off;
+
+        if (dirty_page_set_local.find(current_page_idx) != dirty_page_set_local.end()) {
+          // This object is on a dirty page
+          on_dirty_page = true;
         }
 
-        // Iterate every page this object belongs to
-        bool on_dirty_page = false;
-        size_t page_off = 0;
-        size_t current_page_idx;
-        uintptr_t object_address;
-        do {
-          object_address = reinterpret_cast<uintptr_t>(current);
-          current_page_idx = object_address / kPageSize + page_off;
+        page_off++;
+      } while ((current_page_idx * kPageSize) <
+               RoundUp(object_address + obj->SizeOf(), kObjectAlignment));
 
-          if (dirty_page_set_local.find(current_page_idx) != dirty_page_set_local.end()) {
-            // This object is on a dirty page
-            on_dirty_page = true;
+      mirror::Class* klass = obj->GetClass();
+
+      // Check against the other object and see if they are different
+      ptrdiff_t offset = current - begin_image_ptr;
+      const uint8_t* current_remote = &remote_contents[offset];
+      mirror::Object* remote_obj = reinterpret_cast<mirror::Object*>(
+          const_cast<uint8_t*>(current_remote));
+
+      bool different_image_object = memcmp(current, current_remote, obj->SizeOf()) != 0;
+      if (different_image_object) {
+        bool different_zygote_object = false;
+        if (!zygote_contents.empty()) {
+          const uint8_t* zygote_ptr = &zygote_contents[offset];
+          different_zygote_object = memcmp(current, zygote_ptr, obj->SizeOf()) != 0;
+        }
+        if (different_zygote_object) {
+          // Different from zygote.
+          zygote_dirty_objects.insert(obj);
+        } else {
+          // Just different from iamge.
+          image_dirty_objects.insert(obj);
+        }
+
+        different_objects++;
+        dirty_object_bytes += obj->SizeOf();
+
+        ++class_data[klass].dirty_object_count;
+
+        // Go byte-by-byte and figure out what exactly got dirtied
+        size_t dirty_byte_count_per_object = 0;
+        for (size_t i = 0; i < obj->SizeOf(); ++i) {
+          if (current[i] != current_remote[i]) {
+            dirty_byte_count_per_object++;
           }
+        }
+        class_data[klass].dirty_object_byte_count += dirty_byte_count_per_object;
+        class_data[klass].dirty_object_size_in_bytes += obj->SizeOf();
+        class_data[klass].dirty_objects.push_back(remote_obj);
+      } else {
+        ++class_data[klass].clean_object_count;
+      }
 
-          page_off++;
-        } while ((current_page_idx * kPageSize) <
-                 RoundUp(object_address + obj->SizeOf(), kObjectAlignment));
+      std::string descriptor = GetClassDescriptor(klass);
+      if (different_image_object) {
+        if (klass->IsClassClass()) {
+          // this is a "Class"
+          mirror::Class* obj_as_class  = reinterpret_cast<mirror::Class*>(remote_obj);
 
-        mirror::Class* klass = obj->GetClass();
-
-        bool different_object = false;
-
-        // Check against the other object and see if they are different
-        ptrdiff_t offset = current - begin_image_ptr;
-        const uint8_t* current_remote = &remote_contents[offset];
-        mirror::Object* remote_obj = reinterpret_cast<mirror::Object*>(
-            const_cast<uint8_t*>(current_remote));
-        if (memcmp(current, current_remote, obj->SizeOf()) != 0) {
-          different_objects++;
-          dirty_object_bytes += obj->SizeOf();
-
-          ++dirty_object_class_map[klass];
-
-          // Go byte-by-byte and figure out what exactly got dirtied
-          size_t dirty_byte_count_per_object = 0;
+          // print the fields that are dirty
           for (size_t i = 0; i < obj->SizeOf(); ++i) {
             if (current[i] != current_remote[i]) {
-              dirty_byte_count_per_object++;
+              class_field_dirty_count[i]++;
             }
           }
-          dirty_object_byte_count[klass] += dirty_byte_count_per_object;
-          dirty_object_size_in_bytes[klass] += obj->SizeOf();
 
-          different_object = true;
+          class_dirty_objects.push_back(obj_as_class);
+        } else if (strcmp(descriptor.c_str(), "Ljava/lang/reflect/ArtMethod;") == 0) {
+          // this is an ArtMethod
+          ArtMethod* art_method = reinterpret_cast<ArtMethod*>(remote_obj);
 
-          dirty_objects_by_class[klass].push_back(remote_obj);
-        } else {
-          ++clean_object_class_map[klass];
-        }
-
-        std::string descriptor = GetClassDescriptor(klass);
-        if (different_object) {
-          if (strcmp(descriptor.c_str(), "Ljava/lang/Class;") == 0) {
-            // this is a "Class"
-            mirror::Class* obj_as_class  = reinterpret_cast<mirror::Class*>(remote_obj);
-
-            // print the fields that are dirty
-            for (size_t i = 0; i < obj->SizeOf(); ++i) {
-              if (current[i] != current_remote[i]) {
-                class_field_dirty_count[i]++;
-              }
+          // print the fields that are dirty
+          for (size_t i = 0; i < obj->SizeOf(); ++i) {
+            if (current[i] != current_remote[i]) {
+              art_method_field_dirty_count[i]++;
             }
-
-            class_dirty_objects.push_back(obj_as_class);
-          } else if (strcmp(descriptor.c_str(), "Ljava/lang/reflect/ArtMethod;") == 0) {
-            // this is an ArtMethod
-            ArtMethod* art_method = reinterpret_cast<ArtMethod*>(remote_obj);
-
-            // print the fields that are dirty
-            for (size_t i = 0; i < obj->SizeOf(); ++i) {
-              if (current[i] != current_remote[i]) {
-                art_method_field_dirty_count[i]++;
-              }
-            }
-
-            art_method_dirty_objects.push_back(art_method);
           }
-        } else if (on_dirty_page) {
-          // This object was either never mutated or got mutated back to the same value.
-          // TODO: Do I want to distinguish a "different" vs a "dirty" page here?
-          false_dirty_objects.push_back(obj);
-          false_dirty_objects_map[klass].push_back(obj);
-          false_dirty_object_bytes += obj->SizeOf();
-          false_dirty_byte_count[obj->GetClass()] += obj->SizeOf();
-          false_dirty_object_count[obj->GetClass()] += 1;
-        }
 
-        if (strcmp(descriptor.c_str(), "Ljava/lang/Class;") == 0) {
-          local_class_map[descriptor] = reinterpret_cast<mirror::Class*>(obj);
-          remote_class_map[descriptor] = reinterpret_cast<mirror::Class*>(remote_obj);
+          art_method_dirty_objects.push_back(art_method);
         }
-
-        // Unconditionally store the class descriptor in case we need it later
-        class_to_descriptor_map[klass] = descriptor;
-        current += RoundUp(obj->SizeOf(), kObjectAlignment);
+      } else if (on_dirty_page) {
+        // This object was either never mutated or got mutated back to the same value.
+        // TODO: Do I want to distinguish a "different" vs a "dirty" page here?
+        false_dirty_objects.push_back(obj);
+        class_data[klass].false_dirty_objects.push_back(obj);
+        false_dirty_object_bytes += obj->SizeOf();
+        class_data[obj->GetClass()].false_dirty_byte_count += obj->SizeOf();
+        class_data[obj->GetClass()].false_dirty_object_count += 1;
       }
+
+      if (strcmp(descriptor.c_str(), "Ljava/lang/Class;") == 0) {
+        local_class_map[descriptor] = reinterpret_cast<mirror::Class*>(obj);
+        remote_class_map[descriptor] = reinterpret_cast<mirror::Class*>(remote_obj);
+      }
+
+      // Unconditionally store the class descriptor in case we need it later
+      class_data[klass].descriptor = descriptor;
+      current += RoundUp(obj->SizeOf(), kObjectAlignment);
     }
 
     // Looking at only dirty pages, figure out how many of those bytes belong to dirty objects.
@@ -489,17 +648,39 @@
        << "";
 
     // vector of pairs (int count, Class*)
-    auto dirty_object_class_values = SortByValueDesc(dirty_object_class_map);
-    auto clean_object_class_values = SortByValueDesc(clean_object_class_map);
+    auto dirty_object_class_values = SortByValueDesc<mirror::Class*, int, ClassData>(
+        class_data, [](const ClassData& d) { return d.dirty_object_count; });
+    auto clean_object_class_values = SortByValueDesc<mirror::Class*, int, ClassData>(
+        class_data, [](const ClassData& d) { return d.clean_object_count; });
+
+    if (!zygote_dirty_objects.empty()) {
+      os << "\n" << "  Dirty objects compared to zygote (probably private dirty): "
+         << zygote_dirty_objects.size() << "\n";
+      for (mirror::Object* obj : zygote_dirty_objects) {
+        const uint8_t* obj_bytes = reinterpret_cast<const uint8_t*>(obj);
+        ptrdiff_t offset = obj_bytes - begin_image_ptr;
+        uint8_t* remote_bytes = &zygote_contents[offset];
+        DiffObjectContents(obj, remote_bytes, os);
+      }
+    }
+    os << "\n" << "  Dirty objects compared to image (private or shared dirty): "
+       << image_dirty_objects.size() << "\n";
+    for (mirror::Object* obj : image_dirty_objects) {
+      const uint8_t* obj_bytes = reinterpret_cast<const uint8_t*>(obj);
+      ptrdiff_t offset = obj_bytes - begin_image_ptr;
+      uint8_t* remote_bytes = &remote_contents[offset];
+      DiffObjectContents(obj, remote_bytes, os);
+    }
 
     os << "\n" << "  Dirty object count by class:\n";
     for (const auto& vk_pair : dirty_object_class_values) {
       int dirty_object_count = vk_pair.first;
       mirror::Class* klass = vk_pair.second;
-      int object_sizes = dirty_object_size_in_bytes[klass];
-      float avg_dirty_bytes_per_class = dirty_object_byte_count[klass] * 1.0f / object_sizes;
+      int object_sizes = class_data[klass].dirty_object_size_in_bytes;
+      float avg_dirty_bytes_per_class =
+          class_data[klass].dirty_object_byte_count * 1.0f / object_sizes;
       float avg_object_size = object_sizes * 1.0f / dirty_object_count;
-      const std::string& descriptor = class_to_descriptor_map[klass];
+      const std::string& descriptor = class_data[klass].descriptor;
       os << "    " << PrettyClass(klass) << " ("
          << "objects: " << dirty_object_count << ", "
          << "avg dirty bytes: " << avg_dirty_bytes_per_class << ", "
@@ -518,7 +699,8 @@
         os << "\n";
 
         os << "      dirty byte +offset:count list = ";
-        auto art_method_field_dirty_count_sorted = SortByValueDesc(art_method_field_dirty_count);
+        auto art_method_field_dirty_count_sorted =
+            SortByValueDesc<off_t, int, int>(art_method_field_dirty_count);
         for (auto pair : art_method_field_dirty_count_sorted) {
           off_t offset = pair.second;
           int count = pair.first;
@@ -529,7 +711,7 @@
         os << "\n";
 
         os << "      field contents:\n";
-        const auto& dirty_objects_list = dirty_objects_by_class[klass];
+        const auto& dirty_objects_list = class_data[klass].dirty_objects;
         for (mirror::Object* obj : dirty_objects_list) {
           // remote method
           auto art_method = reinterpret_cast<ArtMethod*>(obj);
@@ -547,7 +729,7 @@
           os << "        " << reinterpret_cast<void*>(obj) << " ";
           os << "  entryPointFromJni: "
              << reinterpret_cast<const void*>(
-                    art_method->GetEntryPointFromJniPtrSize(pointer_size)) << ", ";
+                    art_method->GetDataPtrSize(pointer_size)) << ", ";
           os << "  entryPointFromQuickCompiledCode: "
              << reinterpret_cast<const void*>(
                     art_method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size))
@@ -568,7 +750,8 @@
         os << "\n";
 
         os << "       dirty byte +offset:count list = ";
-        auto class_field_dirty_count_sorted = SortByValueDesc(class_field_dirty_count);
+        auto class_field_dirty_count_sorted =
+            SortByValueDesc<off_t, int, int>(class_field_dirty_count);
         for (auto pair : class_field_dirty_count_sorted) {
           off_t offset = pair.second;
           int count = pair.first;
@@ -578,7 +761,7 @@
         os << "\n";
 
         os << "      field contents:\n";
-        const auto& dirty_objects_list = dirty_objects_by_class[klass];
+        const auto& dirty_objects_list = class_data[klass].dirty_objects;
         for (mirror::Object* obj : dirty_objects_list) {
           // remote class object
           auto remote_klass = reinterpret_cast<mirror::Class*>(obj);
@@ -596,15 +779,16 @@
       }
     }
 
-    auto false_dirty_object_class_values = SortByValueDesc(false_dirty_object_count);
+    auto false_dirty_object_class_values = SortByValueDesc<mirror::Class*, int, ClassData>(
+        class_data, [](const ClassData& d) { return d.false_dirty_object_count; });
 
     os << "\n" << "  False-dirty object count by class:\n";
     for (const auto& vk_pair : false_dirty_object_class_values) {
       int object_count = vk_pair.first;
       mirror::Class* klass = vk_pair.second;
-      int object_sizes = false_dirty_byte_count[klass];
+      int object_sizes = class_data[klass].false_dirty_byte_count;
       float avg_object_size = object_sizes * 1.0f / object_count;
-      const std::string& descriptor = class_to_descriptor_map[klass];
+      const std::string& descriptor = class_data[klass].descriptor;
       os << "    " << PrettyClass(klass) << " ("
          << "objects: " << object_count << ", "
          << "avg object size: " << avg_object_size << ", "
@@ -613,7 +797,7 @@
          << ")\n";
 
       if (strcmp(descriptor.c_str(), "Ljava/lang/reflect/ArtMethod;") == 0) {
-        auto& art_method_false_dirty_objects = false_dirty_objects_map[klass];
+        auto& art_method_false_dirty_objects = class_data[klass].false_dirty_objects;
 
         os << "      field contents:\n";
         for (mirror::Object* obj : art_method_false_dirty_objects) {
@@ -626,7 +810,7 @@
           os << "        " << reinterpret_cast<void*>(obj) << " ";
           os << "  entryPointFromJni: "
              << reinterpret_cast<const void*>(
-                    art_method->GetEntryPointFromJniPtrSize(pointer_size)) << ", ";
+                    art_method->GetDataPtrSize(pointer_size)) << ", ";
           os << "  entryPointFromQuickCompiledCode: "
              << reinterpret_cast<const void*>(
                     art_method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size))
@@ -692,14 +876,16 @@
     return std::string(descriptor_str);
   }
 
-  template <typename K, typename V>
-  static std::vector<std::pair<V, K>> SortByValueDesc(const std::map<K, V> map) {
+  template <typename K, typename V, typename D>
+  static std::vector<std::pair<V, K>> SortByValueDesc(
+      const std::map<K, D> map,
+      std::function<V(const D&)> value_mapper = [](const D& d) { return static_cast<V>(d); }) {
     // Store value->key so that we can use the default sort from pair which
     // sorts by value first and then key
     std::vector<std::pair<V, K>> value_key_vector;
 
     for (const auto& kv_pair : map) {
-      value_key_vector.push_back(std::make_pair(kv_pair.second, kv_pair.first));
+      value_key_vector.push_back(std::make_pair(value_mapper(kv_pair.second), kv_pair.first));
     }
 
     // Sort in reverse (descending order)
@@ -812,14 +998,6 @@
     return page_frame_number != page_frame_number_clean;
   }
 
-  static const ImageHeader& GetBootImageHeader() {
-    gc::Heap* heap = Runtime::Current()->GetHeap();
-    gc::space::ImageSpace* image_space = heap->GetImageSpace();
-    CHECK(image_space != nullptr);
-    const ImageHeader& image_header = image_space->GetImageHeader();
-    return image_header;
-  }
-
  private:
   // Return the image location, stripped of any directories, e.g. "boot.art" or "core.art"
   std::string GetImageLocationBaseName() const {
@@ -828,28 +1006,38 @@
 
   std::ostream* os_;
   const ImageHeader& image_header_;
-  const char* image_location_;
+  const std::string image_location_;
   pid_t image_diff_pid_;  // Dump image diff against boot.art if pid is non-negative
+  pid_t zygote_diff_pid_;  // Dump image diff against zygote boot.art if pid is non-negative
 
   DISALLOW_COPY_AND_ASSIGN(ImgDiagDumper);
 };
 
-static int DumpImage(Runtime* runtime, const char* image_location,
-                     std::ostream* os, pid_t image_diff_pid) {
+static int DumpImage(Runtime* runtime,
+                     std::ostream* os,
+                     pid_t image_diff_pid,
+                     pid_t zygote_diff_pid) {
   ScopedObjectAccess soa(Thread::Current());
   gc::Heap* heap = runtime->GetHeap();
-  gc::space::ImageSpace* image_space = heap->GetImageSpace();
-  CHECK(image_space != nullptr);
-  const ImageHeader& image_header = image_space->GetImageHeader();
-  if (!image_header.IsValid()) {
-    fprintf(stderr, "Invalid image header %s\n", image_location);
-    return EXIT_FAILURE;
+  std::vector<gc::space::ImageSpace*> image_spaces = heap->GetBootImageSpaces();
+  CHECK(!image_spaces.empty());
+  for (gc::space::ImageSpace* image_space : image_spaces) {
+    const ImageHeader& image_header = image_space->GetImageHeader();
+    if (!image_header.IsValid()) {
+      fprintf(stderr, "Invalid image header %s\n", image_space->GetImageLocation().c_str());
+      return EXIT_FAILURE;
+    }
+
+    ImgDiagDumper img_diag_dumper(os,
+                                  image_header,
+                                  image_space->GetImageLocation(),
+                                  image_diff_pid,
+                                  zygote_diff_pid);
+    if (!img_diag_dumper.Dump()) {
+      return EXIT_FAILURE;
+    }
   }
-
-  ImgDiagDumper img_diag_dumper(os, image_header, image_location, image_diff_pid);
-
-  bool success = img_diag_dumper.Dump();
-  return (success) ? EXIT_SUCCESS : EXIT_FAILURE;
+  return EXIT_SUCCESS;
 }
 
 struct ImgDiagArgs : public CmdlineArgs {
@@ -872,6 +1060,13 @@
         *error_msg = "Image diff pid out of range";
         return kParseError;
       }
+    } else if (option.starts_with("--zygote-diff-pid=")) {
+      const char* zygote_diff_pid = option.substr(strlen("--zygote-diff-pid=")).data();
+
+      if (!ParseInt(zygote_diff_pid, &zygote_diff_pid_)) {
+        *error_msg = "Zygote diff pid out of range";
+        return kParseError;
+      }
     } else {
       return kParseUnknownArgument;
     }
@@ -921,6 +1116,9 @@
     usage +=  // Optional.
         "  --image-diff-pid=<pid>: provide the PID of a process whose boot.art you want to diff.\n"
         "      Example: --image-diff-pid=$(pid zygote)\n"
+        "  --zygote-diff-pid=<pid>: provide the PID of the zygote whose boot.art you want to diff "
+        "against.\n"
+        "      Example: --zygote-diff-pid=$(pid zygote)\n"
         "\n";
 
     return usage;
@@ -928,6 +1126,7 @@
 
  public:
   pid_t image_diff_pid_ = -1;
+  pid_t zygote_diff_pid_ = -1;
 };
 
 struct ImgDiagMain : public CmdlineMain<ImgDiagArgs> {
@@ -935,9 +1134,9 @@
     CHECK(args_ != nullptr);
 
     return DumpImage(runtime,
-                     args_->boot_image_location_,
                      args_->os_,
-                     args_->image_diff_pid_) == EXIT_SUCCESS;
+                     args_->image_diff_pid_,
+                     args_->zygote_diff_pid_) == EXIT_SUCCESS;
   }
 };
 
diff --git a/imgdiag/imgdiag_test.cc b/imgdiag/imgdiag_test.cc
index 82bc8b9..9f771ba 100644
--- a/imgdiag/imgdiag_test.cc
+++ b/imgdiag/imgdiag_test.cc
@@ -36,15 +36,23 @@
 static const char* kImgDiagBootImage = "--boot-image";
 static const char* kImgDiagBinaryName = "imgdiag";
 
+static const char* kImgDiagZygoteDiffPid = "--zygote-diff-pid";
+
+// from kernel <include/linux/threads.h>
+#define PID_MAX_LIMIT (4*1024*1024)  // Upper bound. Most kernel configs will have smaller max pid.
+
+static const pid_t kImgDiagGuaranteedBadPid = (PID_MAX_LIMIT + 1);
+
 class ImgDiagTest : public CommonRuntimeTest {
  protected:
   virtual void SetUp() {
     CommonRuntimeTest::SetUp();
 
     // We loaded the runtime with an explicit image. Therefore the image space must exist.
-    gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
-    ASSERT_TRUE(image_space != nullptr);
-    boot_image_location_ = image_space->GetImageLocation();
+    std::vector<gc::space::ImageSpace*> image_spaces =
+        Runtime::Current()->GetHeap()->GetBootImageSpaces();
+    ASSERT_TRUE(!image_spaces.empty());
+    boot_image_location_ = image_spaces[0]->GetImageLocation();
   }
 
   virtual void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE {
@@ -84,17 +92,25 @@
 
     // Run imgdiag --image-diff-pid=$image_diff_pid and wait until it's done with a 0 exit code.
     std::string diff_pid_args;
+    std::string zygote_diff_pid_args;
     {
       std::stringstream diff_pid_args_ss;
       diff_pid_args_ss << kImgDiagDiffPid << "=" << image_diff_pid;
       diff_pid_args = diff_pid_args_ss.str();
     }
-    std::string boot_image_args;
     {
-      boot_image_args = boot_image_args + kImgDiagBootImage + "=" + boot_image;
+      std::stringstream zygote_pid_args_ss;
+      zygote_pid_args_ss << kImgDiagZygoteDiffPid << "=" << image_diff_pid;
+      zygote_diff_pid_args = zygote_pid_args_ss.str();
     }
+    std::string boot_image_args = std::string(kImgDiagBootImage) + "=" + boot_image;
 
-    std::vector<std::string> exec_argv = { file_path, diff_pid_args, boot_image_args };
+    std::vector<std::string> exec_argv = {
+        file_path,
+        diff_pid_args,
+        zygote_diff_pid_args,
+        boot_image_args
+    };
 
     return ::art::Exec(exec_argv, error_msg);
   }
@@ -132,7 +148,8 @@
 
   // Run imgdiag --image-diff-pid=some_bad_pid and wait until it's done with a 0 exit code.
   std::string error_msg;
-  ASSERT_FALSE(ExecDefaultBootImage(-12345, &error_msg)) << "Incorrectly executed";
+  ASSERT_FALSE(ExecDefaultBootImage(kImgDiagGuaranteedBadPid,
+                                    &error_msg)) << "Incorrectly executed";
   UNUSED(error_msg);
 }
 
diff --git a/libart_fake/Android.mk b/libart_fake/Android.mk
new file mode 100644
index 0000000..ed868a5
--- /dev/null
+++ b/libart_fake/Android.mk
@@ -0,0 +1,34 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libart_fake
+LOCAL_INSTALLED_MODULE_STEM := libart.so
+LOCAL_SDK_VERSION := 9
+LOCAL_CPP_EXTENSION := .cc
+LOCAL_SRC_FILES := fake.cc
+LOCAL_SHARED_LIBRARIES := liblog
+
+ifdef TARGET_2ND_ARCH
+    LOCAL_MODULE_PATH_32 := $(TARGET_OUT)/fake-libs
+    LOCAL_MODULE_PATH_64 := $(TARGET_OUT)/fake-libs64
+else
+    LOCAL_MODULE_PATH := $(TARGET_OUT)/fake-libs
+endif
+
+include $(BUILD_SHARED_LIBRARY)
diff --git a/libart_fake/README.md b/libart_fake/README.md
new file mode 100644
index 0000000..6e3621e
--- /dev/null
+++ b/libart_fake/README.md
@@ -0,0 +1,5 @@
+libart_fake
+====
+
+A fake libart made to satisfy some misbehaving apps that will attempt to link
+against libart.so.
diff --git a/libart_fake/fake.cc b/libart_fake/fake.cc
new file mode 100644
index 0000000..8842421
--- /dev/null
+++ b/libart_fake/fake.cc
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "libart_fake"
+
+#include <android/log.h>
+
+#define LOGIT(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
+namespace art {
+class Dbg {
+ public:
+  void SuspendVM();
+  void ResumeVM();
+};
+
+class FaultManager {
+ public:
+  void EnsureArtActionInFrontOfSignalChain();
+};
+
+void Dbg::SuspendVM() {
+  LOGIT("Linking to and calling into libart.so internal functions is not supported. "
+        "This call to '%s' is being ignored.", __func__);
+}
+void Dbg::ResumeVM() {
+  LOGIT("Linking to and calling into libart.so internal functions is not supported. "
+        "This call to '%s' is being ignored.", __func__);
+}
+void FaultManager::EnsureArtActionInFrontOfSignalChain() {
+  LOGIT("Linking to and calling into libart.so internal functions is not supported. "
+        "This call to '%s' is being ignored.", __func__);
+}
+};  // namespace art
diff --git a/oatdump/Android.mk b/oatdump/Android.mk
index a3ef38d..5c75f20 100644
--- a/oatdump/Android.mk
+++ b/oatdump/Android.mk
@@ -74,14 +74,14 @@
 .PHONY: dump-oat-boot-$(TARGET_ARCH)
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
 dump-oat-boot-$(TARGET_ARCH): $(DEFAULT_DEX_PREOPT_BUILT_IMAGE_FILENAME) $(OATDUMP)
-	$(OATDUMP) --image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE_LOCATION) \
+	$(OATDUMP) $(addprefix --image=,$(DEFAULT_DEX_PREOPT_BUILT_IMAGE_LOCATION)) \
 	  --output=$(ART_DUMP_OAT_PATH)/boot.$(TARGET_ARCH).oatdump.txt --instruction-set=$(TARGET_ARCH)
 	@echo Output in $(ART_DUMP_OAT_PATH)/boot.$(TARGET_ARCH).oatdump.txt
 endif
 
 ifdef TARGET_2ND_ARCH
 dump-oat-boot-$(TARGET_2ND_ARCH): $(2ND_DEFAULT_DEX_PREOPT_BUILT_IMAGE_FILENAME) $(OATDUMP)
-	$(OATDUMP) --image=$(2ND_DEFAULT_DEX_PREOPT_BUILT_IMAGE_LOCATION) \
+	$(OATDUMP) $(addprefix --image=,$(2ND_DEFAULT_DEX_PREOPT_BUILT_IMAGE_LOCATION)) \
 	  --output=$(ART_DUMP_OAT_PATH)/boot.$(TARGET_2ND_ARCH).oatdump.txt --instruction-set=$(TARGET_2ND_ARCH)
 	@echo Output in $(ART_DUMP_OAT_PATH)/boot.$(TARGET_2ND_ARCH).oatdump.txt
 endif
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index ea61b43..96c8e94 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -28,20 +28,23 @@
 #include "arch/instruction_set_features.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
+#include "debug/elf_debug_writer.h"
+#include "debug/method_debug_info.h"
 #include "dex_file-inl.h"
-#include "dex_instruction.h"
+#include "dex_instruction-inl.h"
 #include "disassembler.h"
 #include "elf_builder.h"
-#include "gc_map.h"
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
-#include "image.h"
+#include "image-inl.h"
 #include "indenter.h"
-#include "mapping_table.h"
+#include "linker/buffered_output_stream.h"
+#include "linker/file_output_stream.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
@@ -51,15 +54,14 @@
 #include "oat_file-inl.h"
 #include "oat_file_manager.h"
 #include "os.h"
-#include "output_stream.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
-#include "stack_map.h"
 #include "ScopedLocalRef.h"
+#include "stack_map.h"
+#include "string_reference.h"
 #include "thread_list.h"
-#include "verifier/dex_gc_map.h"
+#include "type_lookup_table.h"
 #include "verifier/method_verifier.h"
-#include "vmap_table.h"
 #include "well_known_classes.h"
 
 #include <sys/stat.h>
@@ -71,9 +73,10 @@
   "kResolutionMethod",
   "kImtConflictMethod",
   "kImtUnimplementedMethod",
-  "kCalleeSaveMethod",
-  "kRefsOnlySaveMethod",
-  "kRefsAndArgsSaveMethod",
+  "kSaveAllCalleeSavesMethod",
+  "kSaveRefsOnlyMethod",
+  "kSaveRefsAndArgsMethod",
+  "kSaveEverythingMethod",
 };
 
 const char* image_roots_descriptions_[] = {
@@ -96,86 +99,88 @@
   return ret;
 }
 
+template <typename ElfTypes>
 class OatSymbolizer FINAL {
  public:
-  class RodataWriter FINAL : public CodeOutput {
-   public:
-    explicit RodataWriter(const OatFile* oat_file) : oat_file_(oat_file) {}
-
-    bool Write(OutputStream* out) OVERRIDE {
-      const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
-      return out->WriteFully(oat_file_->Begin(), rodata_size);
-    }
-
-   private:
-    const OatFile* oat_file_;
-  };
-
-  class TextWriter FINAL : public CodeOutput {
-   public:
-    explicit TextWriter(const OatFile* oat_file) : oat_file_(oat_file) {}
-
-    bool Write(OutputStream* out) OVERRIDE {
-      const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
-      const uint8_t* text_begin = oat_file_->Begin() + rodata_size;
-      return out->WriteFully(text_begin, oat_file_->End() - text_begin);
-    }
-
-   private:
-    const OatFile* oat_file_;
-  };
-
-  OatSymbolizer(const OatFile* oat_file, const std::string& output_name) :
-      oat_file_(oat_file), builder_(nullptr),
-      output_name_(output_name.empty() ? "symbolized.oat" : output_name) {
+  OatSymbolizer(const OatFile* oat_file, const std::string& output_name, bool no_bits) :
+      oat_file_(oat_file),
+      builder_(nullptr),
+      output_name_(output_name.empty() ? "symbolized.oat" : output_name),
+      no_bits_(no_bits) {
   }
 
-  typedef void (OatSymbolizer::*Callback)(const DexFile::ClassDef&,
-                                          uint32_t,
-                                          const OatFile::OatMethod&,
-                                          const DexFile&,
-                                          uint32_t,
-                                          const DexFile::CodeItem*,
-                                          uint32_t);
-
   bool Symbolize() {
-    Elf32_Word rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
-    uint32_t size = static_cast<uint32_t>(oat_file_->End() - oat_file_->Begin());
-    uint32_t text_size = size - rodata_size;
-    uint32_t bss_size = oat_file_->BssSize();
-    RodataWriter rodata_writer(oat_file_);
-    TextWriter text_writer(oat_file_);
-    builder_.reset(new ElfBuilder<ElfTypes32>(
-        oat_file_->GetOatHeader().GetInstructionSet(),
-        rodata_size, &rodata_writer,
-        text_size, &text_writer,
-        bss_size));
+    const InstructionSet isa = oat_file_->GetOatHeader().GetInstructionSet();
+    const InstructionSetFeatures* features = InstructionSetFeatures::FromBitmap(
+        isa, oat_file_->GetOatHeader().GetInstructionSetFeaturesBitmap());
 
-    Walk(&art::OatSymbolizer::RegisterForDedup);
+    File* elf_file = OS::CreateEmptyFile(output_name_.c_str());
+    std::unique_ptr<BufferedOutputStream> output_stream(
+        MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file)));
+    builder_.reset(new ElfBuilder<ElfTypes>(isa, features, output_stream.get()));
 
-    NormalizeState();
+    builder_->Start();
 
-    Walk(&art::OatSymbolizer::AddSymbol);
+    auto* rodata = builder_->GetRoData();
+    auto* text = builder_->GetText();
+    auto* bss = builder_->GetBss();
 
-    File* elf_output = OS::CreateEmptyFile(output_name_.c_str());
-    bool result = builder_->Write(elf_output);
+    const uint8_t* rodata_begin = oat_file_->Begin();
+    const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
+    if (no_bits_) {
+      rodata->WriteNoBitsSection(rodata_size);
+    } else {
+      rodata->Start();
+      rodata->WriteFully(rodata_begin, rodata_size);
+      rodata->End();
+    }
 
-    // Ignore I/O errors.
-    UNUSED(elf_output->FlushClose());
+    const uint8_t* text_begin = oat_file_->Begin() + rodata_size;
+    const size_t text_size = oat_file_->End() - text_begin;
+    if (no_bits_) {
+      text->WriteNoBitsSection(text_size);
+    } else {
+      text->Start();
+      text->WriteFully(text_begin, text_size);
+      text->End();
+    }
 
-    return result;
+    if (oat_file_->BssSize() != 0) {
+      bss->WriteNoBitsSection(oat_file_->BssSize());
+    }
+
+    if (isa == kMips || isa == kMips64) {
+      builder_->WriteMIPSabiflagsSection();
+    }
+    builder_->PrepareDynamicSection(
+        elf_file->GetPath(), rodata_size, text_size, oat_file_->BssSize());
+    builder_->WriteDynamicSection();
+
+    Walk();
+    for (const auto& trampoline : debug::MakeTrampolineInfos(oat_file_->GetOatHeader())) {
+      method_debug_infos_.push_back(trampoline);
+    }
+
+    debug::WriteDebugInfo(builder_.get(),
+                          ArrayRef<const debug::MethodDebugInfo>(method_debug_infos_),
+                          dwarf::DW_DEBUG_FRAME_FORMAT,
+                          true /* write_oat_patches */);
+
+    builder_->End();
+
+    return builder_->Good();
   }
 
-  void Walk(Callback callback) {
+  void Walk() {
     std::vector<const OatFile::OatDexFile*> oat_dex_files = oat_file_->GetOatDexFiles();
     for (size_t i = 0; i < oat_dex_files.size(); i++) {
       const OatFile::OatDexFile* oat_dex_file = oat_dex_files[i];
       CHECK(oat_dex_file != nullptr);
-      WalkOatDexFile(oat_dex_file, callback);
+      WalkOatDexFile(oat_dex_file);
     }
   }
 
-  void WalkOatDexFile(const OatFile::OatDexFile* oat_dex_file, Callback callback) {
+  void WalkOatDexFile(const OatFile::OatDexFile* oat_dex_file) {
     std::string error_msg;
     const DexFile* const dex_file = OpenDexFile(oat_dex_file, &error_msg);
     if (dex_file == nullptr) {
@@ -184,13 +189,12 @@
     for (size_t class_def_index = 0;
         class_def_index < dex_file->NumClassDefs();
         class_def_index++) {
-      const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
       const OatFile::OatClass oat_class = oat_dex_file->GetOatClass(class_def_index);
       OatClassType type = oat_class.GetType();
       switch (type) {
         case kOatClassAllCompiled:
         case kOatClassSomeCompiled:
-          WalkOatClass(oat_class, *dex_file, class_def, callback);
+          WalkOatClass(oat_class, *dex_file, class_def_index);
           break;
 
         case kOatClassNoneCompiled:
@@ -201,8 +205,10 @@
     }
   }
 
-  void WalkOatClass(const OatFile::OatClass& oat_class, const DexFile& dex_file,
-                    const DexFile::ClassDef& class_def, Callback callback) {
+  void WalkOatClass(const OatFile::OatClass& oat_class,
+                    const DexFile& dex_file,
+                    uint32_t class_def_index) {
+    const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
     const uint8_t* class_data = dex_file.GetClassData(class_def);
     if (class_data == nullptr) {  // empty class such as a marker interface?
       return;
@@ -210,120 +216,73 @@
     // Note: even if this is an interface or a native class, we still have to walk it, as there
     //       might be a static initializer.
     ClassDataItemIterator it(dex_file, class_data);
-    SkipAllFields(&it);
     uint32_t class_method_idx = 0;
-    while (it.HasNextDirectMethod()) {
-      const OatFile::OatMethod oat_method = oat_class.GetOatMethod(class_method_idx);
-      WalkOatMethod(class_def, class_method_idx, oat_method, dex_file, it.GetMemberIndex(),
-                    it.GetMethodCodeItem(), it.GetMethodAccessFlags(), callback);
-      class_method_idx++;
-      it.Next();
-    }
-    while (it.HasNextVirtualMethod()) {
-      const OatFile::OatMethod oat_method = oat_class.GetOatMethod(class_method_idx);
-      WalkOatMethod(class_def, class_method_idx, oat_method, dex_file, it.GetMemberIndex(),
-                    it.GetMethodCodeItem(), it.GetMethodAccessFlags(), callback);
-      class_method_idx++;
-      it.Next();
+    for (; it.HasNextStaticField(); it.Next()) { /* skip */ }
+    for (; it.HasNextInstanceField(); it.Next()) { /* skip */ }
+    for (; it.HasNextDirectMethod() || it.HasNextVirtualMethod(); it.Next()) {
+      WalkOatMethod(oat_class.GetOatMethod(class_method_idx++),
+                    dex_file,
+                    class_def_index,
+                    it.GetMemberIndex(),
+                    it.GetMethodCodeItem(),
+                    it.GetMethodAccessFlags());
     }
     DCHECK(!it.HasNext());
   }
 
-  void WalkOatMethod(const DexFile::ClassDef& class_def, uint32_t class_method_index,
-                     const OatFile::OatMethod& oat_method, const DexFile& dex_file,
-                     uint32_t dex_method_idx, const DexFile::CodeItem* code_item,
-                     uint32_t method_access_flags, Callback callback) {
+  void WalkOatMethod(const OatFile::OatMethod& oat_method,
+                     const DexFile& dex_file,
+                     uint32_t class_def_index,
+                     uint32_t dex_method_index,
+                     const DexFile::CodeItem* code_item,
+                     uint32_t method_access_flags) {
     if ((method_access_flags & kAccAbstract) != 0) {
       // Abstract method, no code.
       return;
     }
-    if (oat_method.GetCodeOffset() == 0) {
+    const OatHeader& oat_header = oat_file_->GetOatHeader();
+    const OatQuickMethodHeader* method_header = oat_method.GetOatQuickMethodHeader();
+    if (method_header == nullptr || method_header->GetCodeSize() == 0) {
       // No code.
       return;
     }
 
-    (this->*callback)(class_def, class_method_index, oat_method, dex_file, dex_method_idx, code_item,
-                      method_access_flags);
-  }
+    uint32_t entry_point = oat_method.GetCodeOffset() - oat_header.GetExecutableOffset();
+    // Clear Thumb2 bit.
+    const void* code_address = EntryPointToCodePointer(reinterpret_cast<void*>(entry_point));
 
-  void RegisterForDedup(const DexFile::ClassDef& class_def ATTRIBUTE_UNUSED,
-                        uint32_t class_method_index ATTRIBUTE_UNUSED,
-                        const OatFile::OatMethod& oat_method,
-                        const DexFile& dex_file ATTRIBUTE_UNUSED,
-                        uint32_t dex_method_idx ATTRIBUTE_UNUSED,
-                        const DexFile::CodeItem* code_item ATTRIBUTE_UNUSED,
-                        uint32_t method_access_flags ATTRIBUTE_UNUSED) {
-    state_[oat_method.GetCodeOffset()]++;
-  }
-
-  void NormalizeState() {
-    for (auto& x : state_) {
-      if (x.second == 1) {
-        state_[x.first] = 0;
-      }
-    }
-  }
-
-  enum class DedupState {  // private
-    kNotDeduplicated,
-    kDeduplicatedFirst,
-    kDeduplicatedOther
-  };
-  DedupState IsDuplicated(uint32_t offset) {
-    if (state_[offset] == 0) {
-      return DedupState::kNotDeduplicated;
-    }
-    if (state_[offset] == 1) {
-      return DedupState::kDeduplicatedOther;
-    }
-    state_[offset] = 1;
-    return DedupState::kDeduplicatedFirst;
-  }
-
-  void AddSymbol(const DexFile::ClassDef& class_def ATTRIBUTE_UNUSED,
-                 uint32_t class_method_index ATTRIBUTE_UNUSED,
-                 const OatFile::OatMethod& oat_method,
-                 const DexFile& dex_file,
-                 uint32_t dex_method_idx,
-                 const DexFile::CodeItem* code_item ATTRIBUTE_UNUSED,
-                 uint32_t method_access_flags ATTRIBUTE_UNUSED) {
-    DedupState dedup = IsDuplicated(oat_method.GetCodeOffset());
-    if (dedup != DedupState::kDeduplicatedOther) {
-      std::string pretty_name = PrettyMethod(dex_method_idx, dex_file, true);
-
-      if (dedup == DedupState::kDeduplicatedFirst) {
-        pretty_name = "[Dedup]" + pretty_name;
-      }
-
-      auto* symtab = builder_->GetSymtab();
-
-      symtab->AddSymbol(pretty_name, builder_->GetText(),
-          oat_method.GetCodeOffset() - oat_file_->GetOatHeader().GetExecutableOffset(),
-          true, oat_method.GetQuickCodeSize(), STB_GLOBAL, STT_FUNC);
-    }
+    debug::MethodDebugInfo info = debug::MethodDebugInfo();
+    info.trampoline_name = nullptr;
+    info.dex_file = &dex_file;
+    info.class_def_index = class_def_index;
+    info.dex_method_index = dex_method_index;
+    info.access_flags = method_access_flags;
+    info.code_item = code_item;
+    info.isa = oat_header.GetInstructionSet();
+    info.deduped = !seen_offsets_.insert(oat_method.GetCodeOffset()).second;
+    info.is_native_debuggable = oat_header.IsNativeDebuggable();
+    info.is_optimized = method_header->IsOptimized();
+    info.is_code_address_text_relative = true;
+    info.code_address = reinterpret_cast<uintptr_t>(code_address);
+    info.code_size = method_header->GetCodeSize();
+    info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
+    info.code_info = info.is_optimized ? method_header->GetOptimizedCodeInfoPtr() : nullptr;
+    info.cfi = ArrayRef<uint8_t>();
+    method_debug_infos_.push_back(info);
   }
 
  private:
-  static void SkipAllFields(ClassDataItemIterator* it) {
-    while (it->HasNextStaticField()) {
-      it->Next();
-    }
-    while (it->HasNextInstanceField()) {
-      it->Next();
-    }
-  }
-
   const OatFile* oat_file_;
-  std::unique_ptr<ElfBuilder<ElfTypes32> > builder_;
-  std::unordered_map<uint32_t, uint32_t> state_;
+  std::unique_ptr<ElfBuilder<ElfTypes> > builder_;
+  std::vector<debug::MethodDebugInfo> method_debug_infos_;
+  std::unordered_set<uint32_t> seen_offsets_;
   const std::string output_name_;
+  bool no_bits_;
 };
 
 class OatDumperOptions {
  public:
-  OatDumperOptions(bool dump_raw_mapping_table,
-                   bool dump_raw_gc_map,
-                   bool dump_vmap,
+  OatDumperOptions(bool dump_vmap,
                    bool dump_code_info_stack_maps,
                    bool disassemble_code,
                    bool absolute_addresses,
@@ -331,11 +290,12 @@
                    const char* method_filter,
                    bool list_classes,
                    bool list_methods,
+                   bool dump_header_only,
                    const char* export_dex_location,
+                   const char* app_image,
+                   const char* app_oat,
                    uint32_t addr2instr)
-    : dump_raw_mapping_table_(dump_raw_mapping_table),
-      dump_raw_gc_map_(dump_raw_gc_map),
-      dump_vmap_(dump_vmap),
+    : dump_vmap_(dump_vmap),
       dump_code_info_stack_maps_(dump_code_info_stack_maps),
       disassemble_code_(disassemble_code),
       absolute_addresses_(absolute_addresses),
@@ -343,12 +303,13 @@
       method_filter_(method_filter),
       list_classes_(list_classes),
       list_methods_(list_methods),
+      dump_header_only_(dump_header_only),
       export_dex_location_(export_dex_location),
+      app_image_(app_image),
+      app_oat_(app_oat),
       addr2instr_(addr2instr),
       class_loader_(nullptr) {}
 
-  const bool dump_raw_mapping_table_;
-  const bool dump_raw_gc_map_;
   const bool dump_vmap_;
   const bool dump_code_info_stack_maps_;
   const bool disassemble_code_;
@@ -357,7 +318,10 @@
   const char* const method_filter_;
   const bool list_classes_;
   const bool list_methods_;
+  const bool dump_header_only_;
   const char* const export_dex_location_;
+  const char* const app_image_;
+  const char* const app_oat_;
   uint32_t addr2instr_;
   Handle<mirror::ClassLoader>* class_loader_;
 };
@@ -371,9 +335,14 @@
       resolved_addr2instr_(0),
       instruction_set_(oat_file_.GetOatHeader().GetInstructionSet()),
       disassembler_(Disassembler::Create(instruction_set_,
-                                         new DisassemblerOptions(options_.absolute_addresses_,
-                                                                 oat_file.Begin(),
-                                                                 true /* can_read_literals_ */))) {
+                                         new DisassemblerOptions(
+                                             options_.absolute_addresses_,
+                                             oat_file.Begin(),
+                                             oat_file.End(),
+                                             true /* can_read_literals_ */,
+                                             Is64BitInstructionSet(instruction_set_)
+                                                 ? &Thread::DumpThreadOffset<PointerSize::k64>
+                                                 : &Thread::DumpThreadOffset<PointerSize::k32>))) {
     CHECK(options_.class_loader_ != nullptr);
     CHECK(options_.class_filter_ != nullptr);
     CHECK(options_.method_filter_ != nullptr);
@@ -395,6 +364,9 @@
     os << "MAGIC:\n";
     os << oat_header.GetMagic() << "\n\n";
 
+    os << "LOCATION:\n";
+    os << oat_file_.GetLocation() << "\n\n";
+
     os << "CHECKSUM:\n";
     os << StringPrintf("0x%08x\n\n", oat_header.GetChecksum());
 
@@ -481,21 +453,46 @@
       os << StringPrintf("0x%08x\n\n", resolved_addr2instr_);
     }
 
+    // Dumping the dex file overview is compact enough to do even if header only.
+    DexFileData cumulative;
     for (size_t i = 0; i < oat_dex_files_.size(); i++) {
       const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
       CHECK(oat_dex_file != nullptr);
+      std::string error_msg;
+      const DexFile* const dex_file = OpenDexFile(oat_dex_file, &error_msg);
+      if (dex_file == nullptr) {
+        os << "Failed to open dex file '" << oat_dex_file->GetDexFileLocation() << "': "
+           << error_msg;
+        continue;
+      }
+      DexFileData data(*dex_file);
+      os << "Dex file data for " << dex_file->GetLocation() << "\n";
+      data.Dump(os);
+      os << "\n";
+      cumulative.Add(data);
+    }
+    os << "Cumulative dex file data\n";
+    cumulative.Dump(os);
+    os << "\n";
 
-      // If file export selected skip file analysis
-      if (options_.export_dex_location_) {
-        if (!ExportDexFile(os, *oat_dex_file)) {
-          success = false;
-        }
-      } else {
-        if (!DumpOatDexFile(os, *oat_dex_file)) {
-          success = false;
+    if (!options_.dump_header_only_) {
+      for (size_t i = 0; i < oat_dex_files_.size(); i++) {
+        const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
+        CHECK(oat_dex_file != nullptr);
+
+        // If file export selected skip file analysis
+        if (options_.export_dex_location_) {
+          if (!ExportDexFile(os, *oat_dex_file)) {
+            success = false;
+          }
+        } else {
+          if (!DumpOatDexFile(os, *oat_dex_file)) {
+            success = false;
+          }
         }
       }
     }
+
     os << std::flush;
     return success;
   }
@@ -596,11 +593,125 @@
       code_offset &= ~0x1;
     }
     offsets_.insert(code_offset);
-    offsets_.insert(oat_method.GetMappingTableOffset());
     offsets_.insert(oat_method.GetVmapTableOffset());
-    offsets_.insert(oat_method.GetGcMapOffset());
   }
 
+  // Dex file data, may be for multiple different dex files.
+  class DexFileData {
+   public:
+    DexFileData() {}
+
+    explicit DexFileData(const DexFile& dex_file)
+        : num_string_ids_(dex_file.NumStringIds()),
+          num_method_ids_(dex_file.NumMethodIds()),
+          num_field_ids_(dex_file.NumFieldIds()),
+          num_type_ids_(dex_file.NumTypeIds()),
+          num_class_defs_(dex_file.NumClassDefs()) {
+      for (size_t class_def_index = 0; class_def_index < num_class_defs_; ++class_def_index) {
+        const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
+        WalkClass(dex_file, class_def);
+      }
+    }
+
+    void Add(const DexFileData& other) {
+      AddAll(unique_string_ids_from_code_, other.unique_string_ids_from_code_);
+      num_string_ids_from_code_ += other.num_string_ids_from_code_;
+      AddAll(dex_code_item_ptrs_, other.dex_code_item_ptrs_);
+      dex_code_bytes_ += other.dex_code_bytes_;
+      num_string_ids_ += other.num_string_ids_;
+      num_method_ids_ += other.num_method_ids_;
+      num_field_ids_ += other.num_field_ids_;
+      num_type_ids_ += other.num_type_ids_;
+      num_class_defs_ += other.num_class_defs_;
+    }
+
+    void Dump(std::ostream& os) {
+      os << "Num string ids: " << num_string_ids_ << "\n";
+      os << "Num method ids: " << num_method_ids_ << "\n";
+      os << "Num field ids: " << num_field_ids_ << "\n";
+      os << "Num type ids: " << num_type_ids_ << "\n";
+      os << "Num class defs: " << num_class_defs_ << "\n";
+      os << "Unique strings loaded from dex code: " << unique_string_ids_from_code_.size() << "\n";
+      os << "Total strings loaded from dex code: " << num_string_ids_from_code_ << "\n";
+      os << "Number of unique dex code items: " << dex_code_item_ptrs_.size() << "\n";
+      os << "Total number of dex code bytes: " << dex_code_bytes_ << "\n";
+    }
+
+  private:
+    void WalkClass(const DexFile& dex_file, const DexFile::ClassDef& class_def) {
+      const uint8_t* class_data = dex_file.GetClassData(class_def);
+      if (class_data == nullptr) {  // empty class such as a marker interface?
+        return;
+      }
+      ClassDataItemIterator it(dex_file, class_data);
+      SkipAllFields(it);
+      while (it.HasNextDirectMethod()) {
+        WalkCodeItem(dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      while (it.HasNextVirtualMethod()) {
+        WalkCodeItem(dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      DCHECK(!it.HasNext());
+    }
+
+    void WalkCodeItem(const DexFile& dex_file, const DexFile::CodeItem* code_item) {
+      if (code_item == nullptr) {
+        return;
+      }
+      const size_t code_item_size = code_item->insns_size_in_code_units_;
+      const uint16_t* code_ptr = code_item->insns_;
+      const uint16_t* code_end = code_item->insns_ + code_item_size;
+
+      // If we inserted a new dex code item pointer, add to total code bytes.
+      if (dex_code_item_ptrs_.insert(code_ptr).second) {
+        dex_code_bytes_ += code_item_size * sizeof(code_ptr[0]);
+      }
+
+      while (code_ptr < code_end) {
+        const Instruction* inst = Instruction::At(code_ptr);
+        switch (inst->Opcode()) {
+          case Instruction::CONST_STRING: {
+            const uint32_t string_index = inst->VRegB_21c();
+            unique_string_ids_from_code_.insert(StringReference(&dex_file, string_index));
+            ++num_string_ids_from_code_;
+            break;
+          }
+          case Instruction::CONST_STRING_JUMBO: {
+            const uint32_t string_index = inst->VRegB_31c();
+            unique_string_ids_from_code_.insert(StringReference(&dex_file, string_index));
+            ++num_string_ids_from_code_;
+            break;
+          }
+          default:
+            break;
+        }
+
+        code_ptr += inst->SizeInCodeUnits();
+      }
+    }
+
+    // Unique string ids loaded from dex code.
+    std::set<StringReference, StringReferenceComparator> unique_string_ids_from_code_;
+
+    // Total string ids loaded from dex code.
+    size_t num_string_ids_from_code_ = 0;
+
+    // Unique code pointers.
+    std::set<const void*> dex_code_item_ptrs_;
+
+    // Total "unique" dex code bytes.
+    size_t dex_code_bytes_ = 0;
+
+    // Other dex ids.
+    size_t num_string_ids_ = 0;
+    size_t num_method_ids_ = 0;
+    size_t num_field_ids_ = 0;
+    size_t num_type_ids_ = 0;
+    size_t num_class_defs_ = 0;
+  };
+
   bool DumpOatDexFile(std::ostream& os, const OatFile::OatDexFile& oat_dex_file) {
     bool success = true;
     bool stop_analysis = false;
@@ -608,8 +719,15 @@
     os << StringPrintf("location: %s\n", oat_dex_file.GetDexFileLocation().c_str());
     os << StringPrintf("checksum: 0x%08x\n", oat_dex_file.GetDexFileLocationChecksum());
 
-    // Create the verifier early.
+    // Print embedded dex file data range.
+    const uint8_t* const oat_file_begin = oat_dex_file.GetOatFile()->Begin();
+    const uint8_t* const dex_file_pointer = oat_dex_file.GetDexFilePointer();
+    uint32_t dex_offset = dchecked_integral_cast<uint32_t>(dex_file_pointer - oat_file_begin);
+    os << StringPrintf("dex-file: 0x%08x..0x%08x\n",
+                       dex_offset,
+                       dchecked_integral_cast<uint32_t>(dex_offset + oat_dex_file.FileSize() - 1));
 
+    // Create the dex file early. A lot of print-out things depend on it.
     std::string error_msg;
     const DexFile* const dex_file = OpenDexFile(&oat_dex_file, &error_msg);
     if (dex_file == nullptr) {
@@ -618,6 +736,16 @@
       return false;
     }
 
+    // Print lookup table, if it exists.
+    if (oat_dex_file.GetLookupTableData() != nullptr) {
+      uint32_t table_offset = dchecked_integral_cast<uint32_t>(
+          oat_dex_file.GetLookupTableData() - oat_file_begin);
+      uint32_t table_size = TypeLookupTable::RawDataLength(*dex_file);
+      os << StringPrintf("type-table: 0x%08x..0x%08x\n",
+                         table_offset,
+                         table_offset + table_size - 1);
+    }
+
     VariableIndentationOutputStream vios(&os);
     ScopedIndentation indent1(&vios);
     for (size_t class_def_index = 0;
@@ -638,7 +766,9 @@
          << " (" << oat_class.GetStatus() << ")"
          << " (" << oat_class.GetType() << ")\n";
       // TODO: include bitmap here if type is kOatClassSomeCompiled?
-      if (options_.list_classes_) continue;
+      if (options_.list_classes_) {
+        continue;
+      }
       if (!DumpOatClass(&vios, oat_class, *dex_file, class_def, &stop_analysis)) {
         success = false;
       }
@@ -647,7 +777,7 @@
         return success;
       }
     }
-
+    os << "\n";
     os << std::flush;
     return success;
   }
@@ -867,22 +997,6 @@
         success = false;
       }
       vios->Stream() << "\n";
-
-      vios->Stream() << "gc_map: ";
-      if (options_.absolute_addresses_) {
-        vios->Stream() << StringPrintf("%p ", oat_method.GetGcMap());
-      }
-      uint32_t gc_map_offset = oat_method.GetGcMapOffset();
-      vios->Stream() << StringPrintf("(offset=0x%08x)\n", gc_map_offset);
-      if (gc_map_offset > oat_file_.Size()) {
-        vios->Stream() << StringPrintf("WARNING: "
-                           "gc map table offset 0x%08x is past end of file 0x%08zx.\n",
-                           gc_map_offset, oat_file_.Size());
-        success = false;
-      } else if (options_.dump_raw_gc_map_) {
-        ScopedIndentation indent3(vios);
-        DumpGcMap(vios->Stream(), oat_method, code_item);
-      }
     }
     {
       vios->Stream() << "OatQuickMethodHeader ";
@@ -903,24 +1017,6 @@
       }
 
       ScopedIndentation indent2(vios);
-      vios->Stream() << "mapping_table: ";
-      if (options_.absolute_addresses_) {
-        vios->Stream() << StringPrintf("%p ", oat_method.GetMappingTable());
-      }
-      uint32_t mapping_table_offset = oat_method.GetMappingTableOffset();
-      vios->Stream() << StringPrintf("(offset=0x%08x)\n", oat_method.GetMappingTableOffset());
-      if (mapping_table_offset > oat_file_.Size()) {
-        vios->Stream() << StringPrintf("WARNING: "
-                                       "mapping table offset 0x%08x is past end of file 0x%08zx. "
-                                       "mapping table offset was loaded from offset 0x%08x.\n",
-                                       mapping_table_offset, oat_file_.Size(),
-                                       oat_method.GetMappingTableOffsetOffset());
-        success = false;
-      } else if (options_.dump_raw_mapping_table_) {
-        ScopedIndentation indent3(vios);
-        DumpMappingTable(vios, oat_method);
-      }
-
       vios->Stream() << "vmap_table: ";
       if (options_.absolute_addresses_) {
         vios->Stream() << StringPrintf("%p ", oat_method.GetVmapTable());
@@ -997,7 +1093,7 @@
           success = false;
           if (options_.disassemble_code_) {
             if (code_size_offset + kPrologueBytes <= oat_file_.Size()) {
-              DumpCode(vios, verifier.get(), oat_method, code_item, true, kPrologueBytes);
+              DumpCode(vios, oat_method, code_item, true, kPrologueBytes);
             }
           }
         } else if (code_size > kMaxCodeSize) {
@@ -1010,11 +1106,11 @@
           success = false;
           if (options_.disassemble_code_) {
             if (code_size_offset + kPrologueBytes <= oat_file_.Size()) {
-              DumpCode(vios, verifier.get(), oat_method, code_item, true, kPrologueBytes);
+              DumpCode(vios, oat_method, code_item, true, kPrologueBytes);
             }
           }
         } else if (options_.disassemble_code_) {
-          DumpCode(vios, verifier.get(), oat_method, code_item, !success, 0);
+          DumpCode(vios, oat_method, code_item, !success, 0);
         }
       }
     }
@@ -1064,12 +1160,7 @@
       ScopedIndentation indent(vios);
       vios->Stream() << "quickened data\n";
     } else {
-      // Otherwise, display the vmap table.
-      const uint8_t* raw_table = oat_method.GetVmapTable();
-      if (raw_table != nullptr) {
-        VmapTable vmap_table(raw_table);
-        DumpVmapTable(vios->Stream(), oat_method, vmap_table);
-      }
+      // Otherwise, there is nothing to display.
     }
   }
 
@@ -1084,32 +1175,6 @@
                    options_.dump_code_info_stack_maps_);
   }
 
-  // Display a vmap table.
-  void DumpVmapTable(std::ostream& os,
-                     const OatFile::OatMethod& oat_method,
-                     const VmapTable& vmap_table) {
-    bool first = true;
-    bool processing_fp = false;
-    uint32_t spill_mask = oat_method.GetCoreSpillMask();
-    for (size_t i = 0; i < vmap_table.Size(); i++) {
-      uint16_t dex_reg = vmap_table[i];
-      uint32_t cpu_reg = vmap_table.ComputeRegister(spill_mask, i,
-                                                    processing_fp ? kFloatVReg : kIntVReg);
-      os << (first ? "v" : ", v")  << dex_reg;
-      if (!processing_fp) {
-        os << "/r" << cpu_reg;
-      } else {
-        os << "/fr" << cpu_reg;
-      }
-      first = false;
-      if (!processing_fp && dex_reg == 0xFFFF) {
-        processing_fp = true;
-        spill_mask = oat_method.GetFpSpillMask();
-      }
-    }
-    os << "\n";
-  }
-
   void DumpVregLocations(std::ostream& os, const OatFile::OatMethod& oat_method,
                          const DexFile::CodeItem* code_item) {
     if (code_item != nullptr) {
@@ -1152,207 +1217,6 @@
     }
   }
 
-  void DescribeVReg(std::ostream& os, const OatFile::OatMethod& oat_method,
-                    const DexFile::CodeItem* code_item, size_t reg, VRegKind kind) {
-    const uint8_t* raw_table = oat_method.GetVmapTable();
-    if (raw_table != nullptr) {
-      const VmapTable vmap_table(raw_table);
-      uint32_t vmap_offset;
-      if (vmap_table.IsInContext(reg, kind, &vmap_offset)) {
-        bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
-        uint32_t spill_mask = is_float ? oat_method.GetFpSpillMask()
-                                       : oat_method.GetCoreSpillMask();
-        os << (is_float ? "fr" : "r") << vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
-      } else {
-        uint32_t offset = StackVisitor::GetVRegOffsetFromQuickCode(
-            code_item,
-            oat_method.GetCoreSpillMask(),
-            oat_method.GetFpSpillMask(),
-            oat_method.GetFrameSizeInBytes(),
-            reg,
-            GetInstructionSet());
-        os << "[sp + #" << offset << "]";
-      }
-    }
-  }
-
-  void DumpGcMapRegisters(std::ostream& os, const OatFile::OatMethod& oat_method,
-                          const DexFile::CodeItem* code_item,
-                          size_t num_regs, const uint8_t* reg_bitmap) {
-    bool first = true;
-    for (size_t reg = 0; reg < num_regs; reg++) {
-      if (((reg_bitmap[reg / 8] >> (reg % 8)) & 0x01) != 0) {
-        if (first) {
-          os << "  v" << reg << " (";
-          DescribeVReg(os, oat_method, code_item, reg, kReferenceVReg);
-          os << ")";
-          first = false;
-        } else {
-          os << ", v" << reg << " (";
-          DescribeVReg(os, oat_method, code_item, reg, kReferenceVReg);
-          os << ")";
-        }
-      }
-    }
-    if (first) {
-      os << "No registers in GC map\n";
-    } else {
-      os << "\n";
-    }
-  }
-  void DumpGcMap(std::ostream& os, const OatFile::OatMethod& oat_method,
-                 const DexFile::CodeItem* code_item) {
-    const uint8_t* gc_map_raw = oat_method.GetGcMap();
-    if (gc_map_raw == nullptr) {
-      return;  // No GC map.
-    }
-    const void* quick_code = oat_method.GetQuickCode();
-    NativePcOffsetToReferenceMap map(gc_map_raw);
-    for (size_t entry = 0; entry < map.NumEntries(); entry++) {
-      const uint8_t* native_pc = reinterpret_cast<const uint8_t*>(quick_code) +
-          map.GetNativePcOffset(entry);
-      os << StringPrintf("%p", native_pc);
-      DumpGcMapRegisters(os, oat_method, code_item, map.RegWidth() * 8, map.GetBitMap(entry));
-    }
-  }
-
-  void DumpMappingTable(VariableIndentationOutputStream* vios,
-                        const OatFile::OatMethod& oat_method) {
-    const void* quick_code = oat_method.GetQuickCode();
-    if (quick_code == nullptr) {
-      return;
-    }
-    MappingTable table(oat_method.GetMappingTable());
-    if (table.TotalSize() != 0) {
-      if (table.PcToDexSize() != 0) {
-        typedef MappingTable::PcToDexIterator It;
-        vios->Stream() << "suspend point mappings {\n";
-        for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
-          ScopedIndentation indent1(vios);
-          vios->Stream() << StringPrintf("0x%04x -> 0x%04x\n", cur.NativePcOffset(), cur.DexPc());
-        }
-        vios->Stream() << "}\n";
-      }
-      if (table.DexToPcSize() != 0) {
-        typedef MappingTable::DexToPcIterator It;
-        vios->Stream() << "catch entry mappings {\n";
-        for (It cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
-          ScopedIndentation indent1(vios);
-          vios->Stream() << StringPrintf("0x%04x -> 0x%04x\n", cur.NativePcOffset(), cur.DexPc());
-        }
-        vios->Stream() << "}\n";
-      }
-    }
-  }
-
-  uint32_t DumpInformationAtOffset(VariableIndentationOutputStream* vios,
-                                   const OatFile::OatMethod& oat_method,
-                                   const DexFile::CodeItem* code_item,
-                                   size_t offset,
-                                   bool suspend_point_mapping) {
-    if (IsMethodGeneratedByOptimizingCompiler(oat_method, code_item)) {
-      if (suspend_point_mapping) {
-        ScopedIndentation indent1(vios);
-        DumpDexRegisterMapAtOffset(vios, oat_method, code_item, offset);
-      }
-      // The return value is not used in the case of a method compiled
-      // with the optimizing compiler.
-      return DexFile::kDexNoIndex;
-    } else {
-      return DumpMappingAtOffset(vios->Stream(), oat_method, offset, suspend_point_mapping);
-    }
-  }
-
-  uint32_t DumpMappingAtOffset(std::ostream& os, const OatFile::OatMethod& oat_method,
-                               size_t offset, bool suspend_point_mapping) {
-    MappingTable table(oat_method.GetMappingTable());
-    if (suspend_point_mapping && table.PcToDexSize() > 0) {
-      typedef MappingTable::PcToDexIterator It;
-      for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
-        if (offset == cur.NativePcOffset()) {
-          os << StringPrintf("suspend point dex PC: 0x%04x\n", cur.DexPc());
-          return cur.DexPc();
-        }
-      }
-    } else if (!suspend_point_mapping && table.DexToPcSize() > 0) {
-      typedef MappingTable::DexToPcIterator It;
-      for (It cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
-        if (offset == cur.NativePcOffset()) {
-          os << StringPrintf("catch entry dex PC: 0x%04x\n", cur.DexPc());
-          return cur.DexPc();
-        }
-      }
-    }
-    return DexFile::kDexNoIndex;
-  }
-
-  void DumpGcMapAtNativePcOffset(std::ostream& os, const OatFile::OatMethod& oat_method,
-                                 const DexFile::CodeItem* code_item, size_t native_pc_offset) {
-    const uint8_t* gc_map_raw = oat_method.GetGcMap();
-    if (gc_map_raw != nullptr) {
-      NativePcOffsetToReferenceMap map(gc_map_raw);
-      if (map.HasEntry(native_pc_offset)) {
-        size_t num_regs = map.RegWidth() * 8;
-        const uint8_t* reg_bitmap = map.FindBitMap(native_pc_offset);
-        bool first = true;
-        for (size_t reg = 0; reg < num_regs; reg++) {
-          if (((reg_bitmap[reg / 8] >> (reg % 8)) & 0x01) != 0) {
-            if (first) {
-              os << "GC map objects:  v" << reg << " (";
-              DescribeVReg(os, oat_method, code_item, reg, kReferenceVReg);
-              os << ")";
-              first = false;
-            } else {
-              os << ", v" << reg << " (";
-              DescribeVReg(os, oat_method, code_item, reg, kReferenceVReg);
-              os << ")";
-            }
-          }
-        }
-        if (!first) {
-          os << "\n";
-        }
-      }
-    }
-  }
-
-  void DumpVRegsAtDexPc(std::ostream& os, verifier::MethodVerifier* verifier,
-                        const OatFile::OatMethod& oat_method,
-                        const DexFile::CodeItem* code_item, uint32_t dex_pc) {
-    DCHECK(verifier != nullptr);
-    std::vector<int32_t> kinds = verifier->DescribeVRegs(dex_pc);
-    bool first = true;
-    for (size_t reg = 0; reg < code_item->registers_size_; reg++) {
-      VRegKind kind = static_cast<VRegKind>(kinds.at(reg * 2));
-      if (kind != kUndefined) {
-        if (first) {
-          os << "VRegs:  v";
-          first = false;
-        } else {
-          os << ", v";
-        }
-        os << reg << " (";
-        switch (kind) {
-          case kImpreciseConstant:
-            os << "Imprecise Constant: " << kinds.at((reg * 2) + 1) << ", ";
-            DescribeVReg(os, oat_method, code_item, reg, kind);
-            break;
-          case kConstant:
-            os << "Constant: " << kinds.at((reg * 2) + 1);
-            break;
-          default:
-            DescribeVReg(os, oat_method, code_item, reg, kind);
-            break;
-        }
-        os << ")";
-      }
-    }
-    if (!first) {
-      os << "\n";
-    }
-  }
-
-
   void DumpDexCode(std::ostream& os, const DexFile& dex_file, const DexFile::CodeItem* code_item) {
     if (code_item != nullptr) {
       size_t i = 0;
@@ -1373,7 +1237,7 @@
     // null, then this method has been compiled with the optimizing
     // compiler.
     return oat_method.GetQuickCode() != nullptr &&
-           oat_method.GetGcMap() == nullptr &&
+           oat_method.GetVmapTable() != nullptr &&
            code_item != nullptr;
   }
 
@@ -1389,27 +1253,6 @@
            code_item != nullptr;
   }
 
-  void DumpDexRegisterMapAtOffset(VariableIndentationOutputStream* vios,
-                                  const OatFile::OatMethod& oat_method,
-                                  const DexFile::CodeItem* code_item,
-                                  size_t offset) {
-    // This method is only relevant for oat methods compiled with the
-    // optimizing compiler.
-    DCHECK(IsMethodGeneratedByOptimizingCompiler(oat_method, code_item));
-
-    // The optimizing compiler outputs its CodeInfo data in the vmap table.
-    const void* raw_code_info = oat_method.GetVmapTable();
-    if (raw_code_info != nullptr) {
-      CodeInfo code_info(raw_code_info);
-      StackMapEncoding encoding = code_info.ExtractEncoding();
-      StackMap stack_map = code_info.GetStackMapForNativePcOffset(offset, encoding);
-      if (stack_map.IsValid()) {
-        stack_map.Dump(vios, code_info, encoding, oat_method.GetCodeOffset(),
-                       code_item->registers_size_);
-      }
-    }
-  }
-
   verifier::MethodVerifier* DumpVerifier(VariableIndentationOutputStream* vios,
                                          StackHandleScope<1>* hs,
                                          uint32_t dex_method_idx,
@@ -1421,8 +1264,7 @@
       ScopedObjectAccess soa(Thread::Current());
       Runtime* const runtime = Runtime::Current();
       Handle<mirror::DexCache> dex_cache(
-          hs->NewHandle(runtime->GetClassLinker()->RegisterDexFile(*dex_file,
-                                                                   runtime->GetLinearAlloc())));
+          hs->NewHandle(runtime->GetClassLinker()->RegisterDexFile(*dex_file, nullptr)));
       DCHECK(options_.class_loader_ != nullptr);
       return verifier::MethodVerifier::VerifyMethodAndDump(
           soa.Self(), vios, dex_method_idx, dex_file, dex_cache, *options_.class_loader_,
@@ -1432,8 +1274,92 @@
     return nullptr;
   }
 
+  // The StackMapsHelper provides the stack maps in the native PC order.
+  // For identical native PCs, the order from the CodeInfo is preserved.
+  class StackMapsHelper {
+   public:
+    explicit StackMapsHelper(const uint8_t* raw_code_info)
+        : code_info_(raw_code_info),
+          encoding_(code_info_.ExtractEncoding()),
+          number_of_stack_maps_(code_info_.GetNumberOfStackMaps(encoding_)),
+          indexes_(),
+          offset_(static_cast<size_t>(-1)),
+          stack_map_index_(0u) {
+      if (number_of_stack_maps_ != 0u) {
+        // Check if native PCs are ordered.
+        bool ordered = true;
+        StackMap last = code_info_.GetStackMapAt(0u, encoding_);
+        for (size_t i = 1; i != number_of_stack_maps_; ++i) {
+          StackMap current = code_info_.GetStackMapAt(i, encoding_);
+          if (last.GetNativePcOffset(encoding_.stack_map_encoding) >
+              current.GetNativePcOffset(encoding_.stack_map_encoding)) {
+            ordered = false;
+            break;
+          }
+          last = current;
+        }
+        if (!ordered) {
+          // Create indirection indexes for access in native PC order. We do not optimize
+          // for the fact that there can currently be only two separately ordered ranges,
+          // namely normal stack maps and catch-point stack maps.
+          indexes_.resize(number_of_stack_maps_);
+          std::iota(indexes_.begin(), indexes_.end(), 0u);
+          std::sort(indexes_.begin(),
+                    indexes_.end(),
+                    [this](size_t lhs, size_t rhs) {
+                      StackMap left = code_info_.GetStackMapAt(lhs, encoding_);
+                      uint32_t left_pc = left.GetNativePcOffset(encoding_.stack_map_encoding);
+                      StackMap right = code_info_.GetStackMapAt(rhs, encoding_);
+                      uint32_t right_pc = right.GetNativePcOffset(encoding_.stack_map_encoding);
+                      // If the PCs are the same, compare indexes to preserve the original order.
+                      return (left_pc < right_pc) || (left_pc == right_pc && lhs < rhs);
+                    });
+        }
+        offset_ = GetStackMapAt(0).GetNativePcOffset(encoding_.stack_map_encoding);
+      }
+    }
+
+    const CodeInfo& GetCodeInfo() const {
+      return code_info_;
+    }
+
+    const CodeInfoEncoding& GetEncoding() const {
+      return encoding_;
+    }
+
+    size_t GetOffset() const {
+      return offset_;
+    }
+
+    StackMap GetStackMap() const {
+      return GetStackMapAt(stack_map_index_);
+    }
+
+    void Next() {
+      ++stack_map_index_;
+      offset_ = (stack_map_index_ == number_of_stack_maps_)
+          ? static_cast<size_t>(-1)
+          : GetStackMapAt(stack_map_index_).GetNativePcOffset(encoding_.stack_map_encoding);
+    }
+
+   private:
+    StackMap GetStackMapAt(size_t i) const {
+      if (!indexes_.empty()) {
+        i = indexes_[i];
+      }
+      DCHECK_LT(i, number_of_stack_maps_);
+      return code_info_.GetStackMapAt(i, encoding_);
+    }
+
+    const CodeInfo code_info_;
+    const CodeInfoEncoding encoding_;
+    const size_t number_of_stack_maps_;
+    dchecked_vector<size_t> indexes_;  // Used if stack map native PCs are not ordered.
+    size_t offset_;
+    size_t stack_map_index_;
+  };
+
   void DumpCode(VariableIndentationOutputStream* vios,
-                verifier::MethodVerifier* verifier,
                 const OatFile::OatMethod& oat_method, const DexFile::CodeItem* code_item,
                 bool bad_input, size_t code_size) {
     const void* quick_code = oat_method.GetQuickCode();
@@ -1444,24 +1370,34 @@
     if (code_size == 0 || quick_code == nullptr) {
       vios->Stream() << "NO CODE!\n";
       return;
+    } else if (!bad_input && IsMethodGeneratedByOptimizingCompiler(oat_method, code_item)) {
+      // The optimizing compiler outputs its CodeInfo data in the vmap table.
+      StackMapsHelper helper(oat_method.GetVmapTable());
+      const uint8_t* quick_native_pc = reinterpret_cast<const uint8_t*>(quick_code);
+      size_t offset = 0;
+      while (offset < code_size) {
+        offset += disassembler_->Dump(vios->Stream(), quick_native_pc + offset);
+        if (offset == helper.GetOffset()) {
+          ScopedIndentation indent1(vios);
+          StackMap stack_map = helper.GetStackMap();
+          DCHECK(stack_map.IsValid());
+          stack_map.Dump(vios,
+                         helper.GetCodeInfo(),
+                         helper.GetEncoding(),
+                         oat_method.GetCodeOffset(),
+                         code_item->registers_size_);
+          do {
+            helper.Next();
+            // There may be multiple stack maps at a given PC. We display only the first one.
+          } while (offset == helper.GetOffset());
+        }
+        DCHECK_LT(offset, helper.GetOffset());
+      }
     } else {
       const uint8_t* quick_native_pc = reinterpret_cast<const uint8_t*>(quick_code);
       size_t offset = 0;
       while (offset < code_size) {
-        if (!bad_input) {
-          DumpInformationAtOffset(vios, oat_method, code_item, offset, false);
-        }
         offset += disassembler_->Dump(vios->Stream(), quick_native_pc + offset);
-        if (!bad_input) {
-          uint32_t dex_pc =
-              DumpInformationAtOffset(vios, oat_method, code_item, offset, true);
-          if (dex_pc != DexFile::kDexNoIndex) {
-            DumpGcMapAtNativePcOffset(vios->Stream(), oat_method, code_item, offset);
-            if (verifier != nullptr) {
-              DumpVRegsAtDexPc(vios->Stream(), verifier, oat_method, code_item, dex_pc);
-            }
-          }
-        }
       }
     }
   }
@@ -1470,15 +1406,17 @@
   const std::vector<const OatFile::OatDexFile*> oat_dex_files_;
   const OatDumperOptions& options_;
   uint32_t resolved_addr2instr_;
-  InstructionSet instruction_set_;
+  const InstructionSet instruction_set_;
   std::set<uintptr_t> offsets_;
   Disassembler* disassembler_;
 };
 
 class ImageDumper {
  public:
-  ImageDumper(std::ostream* os, gc::space::ImageSpace& image_space,
-              const ImageHeader& image_header, OatDumperOptions* oat_dumper_options)
+  ImageDumper(std::ostream* os,
+              gc::space::ImageSpace& image_space,
+              const ImageHeader& image_header,
+              OatDumperOptions* oat_dumper_options)
       : os_(os),
         vios_(os),
         indent1_(&vios_),
@@ -1492,6 +1430,8 @@
 
     os << "MAGIC: " << image_header_.GetMagic() << "\n\n";
 
+    os << "IMAGE LOCATION: " << image_space_.GetImageLocation() << "\n\n";
+
     os << "IMAGE BEGIN: " << reinterpret_cast<void*>(image_header_.GetImageBegin()) << "\n\n";
 
     os << "IMAGE SIZE: " << image_header_.GetImageSize() << "\n\n";
@@ -1574,16 +1514,23 @@
     os << "OAT LOCATION: " << oat_location;
     os << "\n";
     std::string error_msg;
-    const OatFile* oat_file = runtime->GetOatFileManager().FindOpenedOatFileFromOatLocation(
-        oat_location);
+    const OatFile* oat_file = image_space_.GetOatFile();
     if (oat_file == nullptr) {
-      oat_file = OatFile::Open(oat_location, oat_location,
-                               nullptr, nullptr, false, nullptr,
+      oat_file = runtime->GetOatFileManager().FindOpenedOatFileFromOatLocation(oat_location);
+    }
+    if (oat_file == nullptr) {
+      oat_file = OatFile::Open(oat_location,
+                               oat_location,
+                               nullptr,
+                               nullptr,
+                               false,
+                               /*low_4gb*/false,
+                               nullptr,
                                &error_msg);
-      if (oat_file == nullptr) {
-        os << "NOT FOUND: " << error_msg << "\n";
-        return false;
-      }
+    }
+    if (oat_file == nullptr) {
+      os << "OAT FILE NOT FOUND: " << error_msg << "\n";
+      return EXIT_FAILURE;
     }
     os << "\n";
 
@@ -1599,9 +1546,8 @@
 
     os << "OBJECTS:\n" << std::flush;
 
-    // Loop through all the image spaces and dump their objects.
+    // Loop through the image space and dump its objects.
     gc::Heap* heap = runtime->GetHeap();
-    const std::vector<gc::space::ContinuousSpace*>& spaces = heap->GetContinuousSpaces();
     Thread* self = Thread::Current();
     {
       {
@@ -1620,66 +1566,98 @@
       dex_caches_.clear();
       {
         ReaderMutexLock mu(self, *class_linker->DexLock());
-        for (jobject weak_root : class_linker->GetDexCaches()) {
+        for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
           mirror::DexCache* dex_cache =
-              down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
+              down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
           if (dex_cache != nullptr) {
             dex_caches_.insert(dex_cache);
           }
         }
       }
       ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-      for (const auto& space : spaces) {
-        if (space->IsImageSpace()) {
-          auto* image_space = space->AsImageSpace();
-          // Dump the normal objects before ArtMethods.
-          image_space->GetLiveBitmap()->Walk(ImageDumper::Callback, this);
-          indent_os << "\n";
-          // TODO: Dump fields.
-          // Dump methods after.
-          const auto& methods_section = image_header_.GetMethodsSection();
-          const size_t pointer_size =
-              InstructionSetPointerSize(oat_dumper_->GetOatInstructionSet());
-          DumpArtMethodVisitor visitor(this);
-          methods_section.VisitPackedArtMethods(&visitor, image_space->Begin(), pointer_size);
-        }
-      }
+      // Dump the normal objects before ArtMethods.
+      image_space_.GetLiveBitmap()->Walk(ImageDumper::Callback, this);
+      indent_os << "\n";
+      // TODO: Dump fields.
+      // Dump methods after.
+      DumpArtMethodVisitor visitor(this);
+      image_header_.VisitPackedArtMethods(&visitor,
+                                          image_space_.Begin(),
+                                          image_header_.GetPointerSize());
       // Dump the large objects separately.
       heap->GetLargeObjectsSpace()->GetLiveBitmap()->Walk(ImageDumper::Callback, this);
       indent_os << "\n";
     }
     os << "STATS:\n" << std::flush;
     std::unique_ptr<File> file(OS::OpenFileForReading(image_filename.c_str()));
-    if (file.get() == nullptr) {
+    size_t data_size = image_header_.GetDataSize();  // stored size in file.
+    if (file == nullptr) {
       LOG(WARNING) << "Failed to find image in " << image_filename;
-    }
-    if (file.get() != nullptr) {
+    } else {
       stats_.file_bytes = file->GetLength();
+      // If the image is compressed, adjust to decompressed size.
+      size_t uncompressed_size = image_header_.GetImageSize() - sizeof(ImageHeader);
+      if (image_header_.GetStorageMode() == ImageHeader::kStorageModeUncompressed) {
+        DCHECK_EQ(uncompressed_size, data_size) << "Sizes should match for uncompressed image";
+      }
+      stats_.file_bytes += uncompressed_size - data_size;
     }
     size_t header_bytes = sizeof(ImageHeader);
-    const auto& bitmap_section = image_header_.GetImageSection(ImageHeader::kSectionImageBitmap);
+    const auto& object_section = image_header_.GetImageSection(ImageHeader::kSectionObjects);
     const auto& field_section = image_header_.GetImageSection(ImageHeader::kSectionArtFields);
     const auto& method_section = image_header_.GetMethodsSection();
     const auto& dex_cache_arrays_section = image_header_.GetImageSection(
         ImageHeader::kSectionDexCacheArrays);
     const auto& intern_section = image_header_.GetImageSection(
         ImageHeader::kSectionInternedStrings);
+    const auto& class_table_section = image_header_.GetImageSection(
+        ImageHeader::kSectionClassTable);
+    const auto& bitmap_section = image_header_.GetImageSection(ImageHeader::kSectionImageBitmap);
+
     stats_.header_bytes = header_bytes;
-    stats_.alignment_bytes += RoundUp(header_bytes, kObjectAlignment) - header_bytes;
-    // Add padding between the field and method section.
-    // (Field section is 4-byte aligned, method section is 8-byte aligned on 64-bit targets.)
-    stats_.alignment_bytes += method_section.Offset() -
-        (field_section.Offset() + field_section.Size());
-    // Add padding between the dex cache arrays section and the intern table. (Dex cache
-    // arrays section is 4-byte aligned on 32-bit targets, intern table is 8-byte aligned.)
-    stats_.alignment_bytes += intern_section.Offset() -
-        (dex_cache_arrays_section.Offset() + dex_cache_arrays_section.Size());
-    stats_.alignment_bytes += bitmap_section.Offset() - image_header_.GetImageSize();
+
+    // Objects are kObjectAlignment-aligned.
+    // CHECK_EQ(RoundUp(header_bytes, kObjectAlignment), object_section.Offset());
+    if (object_section.Offset() > header_bytes) {
+      stats_.alignment_bytes += object_section.Offset() - header_bytes;
+    }
+
+    // Field section is 4-byte aligned.
+    constexpr size_t kFieldSectionAlignment = 4U;
+    uint32_t end_objects = object_section.Offset() + object_section.Size();
+    CHECK_EQ(RoundUp(end_objects, kFieldSectionAlignment), field_section.Offset());
+    stats_.alignment_bytes += field_section.Offset() - end_objects;
+
+    // Method section is 4/8 byte aligned depending on target. Just check for 4-byte alignment.
+    uint32_t end_fields = field_section.Offset() + field_section.Size();
+    CHECK_ALIGNED(method_section.Offset(), 4);
+    stats_.alignment_bytes += method_section.Offset() - end_fields;
+
+    // Dex cache arrays section is aligned depending on the target. Just check for 4-byte alignment.
+    uint32_t end_methods = method_section.Offset() + method_section.Size();
+    CHECK_ALIGNED(dex_cache_arrays_section.Offset(), 4);
+    stats_.alignment_bytes += dex_cache_arrays_section.Offset() - end_methods;
+
+    // Intern table is 8-byte aligned.
+    uint32_t end_caches = dex_cache_arrays_section.Offset() + dex_cache_arrays_section.Size();
+    CHECK_EQ(RoundUp(end_caches, 8U), intern_section.Offset());
+    stats_.alignment_bytes += intern_section.Offset() - end_caches;
+
+    // Add space between intern table and class table.
+    uint32_t end_intern = intern_section.Offset() + intern_section.Size();
+    stats_.alignment_bytes += class_table_section.Offset() - end_intern;
+
+    // Add space between end of image data and bitmap. Expect the bitmap to be page-aligned.
+    const size_t bitmap_offset = sizeof(ImageHeader) + data_size;
+    CHECK_ALIGNED(bitmap_section.Offset(), kPageSize);
+    stats_.alignment_bytes += RoundUp(bitmap_offset, kPageSize) - bitmap_offset;
+
     stats_.bitmap_bytes += bitmap_section.Size();
     stats_.art_field_bytes += field_section.Size();
     stats_.art_method_bytes += method_section.Size();
     stats_.dex_cache_arrays_bytes += dex_cache_arrays_section.Size();
     stats_.interned_strings_bytes += intern_section.Size();
+    stats_.class_table_bytes += class_table_section.Size();
     stats_.Dump(os, indent_os);
     os << "\n";
 
@@ -1696,7 +1674,7 @@
     virtual void Visit(ArtMethod* method) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
       std::ostream& indent_os = image_dumper_->vios_.Stream();
       indent_os << method << " " << " ArtMethod: " << PrettyMethod(method) << "\n";
-      image_dumper_->DumpMethod(method, image_dumper_, indent_os);
+      image_dumper_->DumpMethod(method, indent_os);
       indent_os << "\n";
     }
 
@@ -1789,10 +1767,9 @@
     return image_space_.Contains(object);
   }
 
-  const void* GetQuickOatCodeBegin(ArtMethod* m)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
+  const void* GetQuickOatCodeBegin(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) {
     const void* quick_code = m->GetEntryPointFromQuickCompiledCodePtrSize(
-        InstructionSetPointerSize(oat_dumper_->GetOatInstructionSet()));
+        image_header_.GetPointerSize());
     if (Runtime::Current()->GetClassLinker()->IsQuickResolutionStub(quick_code)) {
       quick_code = oat_dumper_->GetQuickOatCode(m);
     }
@@ -1851,8 +1828,7 @@
     }
     ScopedIndentation indent1(&state->vios_);
     DumpFields(os, obj, obj_class);
-    const auto image_pointer_size =
-        InstructionSetPointerSize(state->oat_dumper_->GetOatInstructionSet());
+    const PointerSize image_pointer_size = state->image_header_.GetPointerSize();
     if (obj->IsObjectArray()) {
       auto* obj_array = obj->AsObjectArray<mirror::Object>();
       for (int32_t i = 0, length = obj_array->GetLength(); i < length; i++) {
@@ -1893,17 +1869,21 @@
         const auto& method_section = state->image_header_.GetMethodsSection();
         size_t num_methods = dex_cache->NumResolvedMethods();
         if (num_methods != 0u) {
-          os << "Methods (size=" << num_methods << "):";
+          os << "Methods (size=" << num_methods << "):\n";
           ScopedIndentation indent2(&state->vios_);
           auto* resolved_methods = dex_cache->GetResolvedMethods();
           for (size_t i = 0, length = dex_cache->NumResolvedMethods(); i < length; ++i) {
-            auto* elem = mirror::DexCache::GetElementPtrSize(resolved_methods, i, image_pointer_size);
+            auto* elem = mirror::DexCache::GetElementPtrSize(resolved_methods,
+                                                             i,
+                                                             image_pointer_size);
             size_t run = 0;
             for (size_t j = i + 1;
-                j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_methods,
-                                                                           j,
-                                                                           image_pointer_size);
-                ++j, ++run) {}
+                 j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_methods,
+                                                                            j,
+                                                                            image_pointer_size);
+                 ++j) {
+              ++run;
+            }
             if (run == 0) {
               os << StringPrintf("%zd: ", i);
             } else {
@@ -1924,17 +1904,20 @@
         }
         size_t num_fields = dex_cache->NumResolvedFields();
         if (num_fields != 0u) {
-          os << "Fields (size=" << num_fields << "):";
+          os << "Fields (size=" << num_fields << "):\n";
           ScopedIndentation indent2(&state->vios_);
           auto* resolved_fields = dex_cache->GetResolvedFields();
           for (size_t i = 0, length = dex_cache->NumResolvedFields(); i < length; ++i) {
-            auto* elem = mirror::DexCache::GetElementPtrSize(resolved_fields, i, image_pointer_size);
+            auto* elem = mirror::DexCache::GetElementPtrSize(
+                resolved_fields, i, image_pointer_size);
             size_t run = 0;
             for (size_t j = i + 1;
-                j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_fields,
-                                                                           j,
-                                                                           image_pointer_size);
-                ++j, ++run) {}
+                 j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_fields,
+                                                                            j,
+                                                                            image_pointer_size);
+                 ++j) {
+              ++run;
+            }
             if (run == 0) {
               os << StringPrintf("%zd: ", i);
             } else {
@@ -1953,96 +1936,112 @@
             os << StringPrintf("%p   %s\n", elem, msg.c_str());
           }
         }
+        size_t num_types = dex_cache->NumResolvedTypes();
+        if (num_types != 0u) {
+          os << "Types (size=" << num_types << "):\n";
+          ScopedIndentation indent2(&state->vios_);
+          auto* resolved_types = dex_cache->GetResolvedTypes();
+          for (size_t i = 0; i < num_types; ++i) {
+            auto* elem = resolved_types[i].Read();
+            size_t run = 0;
+            for (size_t j = i + 1; j != num_types && elem == resolved_types[j].Read(); ++j) {
+              ++run;
+            }
+            if (run == 0) {
+              os << StringPrintf("%zd: ", i);
+            } else {
+              os << StringPrintf("%zd to %zd: ", i, i + run);
+              i = i + run;
+            }
+            std::string msg;
+            if (elem == nullptr) {
+              msg = "null";
+            } else {
+              msg = PrettyClass(elem);
+            }
+            os << StringPrintf("%p   %s\n", elem, msg.c_str());
+          }
+        }
       }
     }
     std::string temp;
     state->stats_.Update(obj_class->GetDescriptor(&temp), object_bytes);
   }
 
-  void DumpMethod(ArtMethod* method, ImageDumper* state, std::ostream& indent_os)
+  void DumpMethod(ArtMethod* method, std::ostream& indent_os)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(method != nullptr);
-    const auto image_pointer_size =
-        InstructionSetPointerSize(state->oat_dumper_->GetOatInstructionSet());
-    const void* quick_oat_code_begin = state->GetQuickOatCodeBegin(method);
-    const void* quick_oat_code_end = state->GetQuickOatCodeEnd(method);
+    const void* quick_oat_code_begin = GetQuickOatCodeBegin(method);
+    const void* quick_oat_code_end = GetQuickOatCodeEnd(method);
+    const PointerSize pointer_size = image_header_.GetPointerSize();
     OatQuickMethodHeader* method_header = reinterpret_cast<OatQuickMethodHeader*>(
         reinterpret_cast<uintptr_t>(quick_oat_code_begin) - sizeof(OatQuickMethodHeader));
     if (method->IsNative()) {
-      if (!Runtime::Current()->GetClassLinker()->IsQuickGenericJniStub(quick_oat_code_begin)) {
-        DCHECK(method_header->GetNativeGcMap() == nullptr) << PrettyMethod(method);
-        DCHECK(method_header->GetMappingTable() == nullptr) << PrettyMethod(method);
-      }
       bool first_occurrence;
-      uint32_t quick_oat_code_size = state->GetQuickOatCodeSize(method);
-      state->ComputeOatSize(quick_oat_code_begin, &first_occurrence);
+      uint32_t quick_oat_code_size = GetQuickOatCodeSize(method);
+      ComputeOatSize(quick_oat_code_begin, &first_occurrence);
       if (first_occurrence) {
-        state->stats_.native_to_managed_code_bytes += quick_oat_code_size;
+        stats_.native_to_managed_code_bytes += quick_oat_code_size;
       }
-      if (quick_oat_code_begin !=
-            method->GetEntryPointFromQuickCompiledCodePtrSize(image_pointer_size)) {
+      if (quick_oat_code_begin != method->GetEntryPointFromQuickCompiledCodePtrSize(
+          image_header_.GetPointerSize())) {
         indent_os << StringPrintf("OAT CODE: %p\n", quick_oat_code_begin);
       }
-    } else if (method->IsAbstract() || method->IsCalleeSaveMethod() ||
-      method->IsResolutionMethod() || method->IsImtConflictMethod() ||
-      method->IsImtUnimplementedMethod() || method->IsClassInitializer()) {
+    } else if (method->IsAbstract() || method->IsClassInitializer()) {
+      // Don't print information for these.
+    } else if (method->IsRuntimeMethod()) {
+      ImtConflictTable* table = method->GetImtConflictTable(image_header_.GetPointerSize());
+      if (table != nullptr) {
+        indent_os << "IMT conflict table " << table << " method: ";
+        for (size_t i = 0, count = table->NumEntries(pointer_size); i < count; ++i) {
+          indent_os << PrettyMethod(table->GetImplementationMethod(i, pointer_size)) << " ";
+        }
+      }
     } else {
       const DexFile::CodeItem* code_item = method->GetCodeItem();
       size_t dex_instruction_bytes = code_item->insns_size_in_code_units_ * 2;
-      state->stats_.dex_instruction_bytes += dex_instruction_bytes;
+      stats_.dex_instruction_bytes += dex_instruction_bytes;
 
       bool first_occurrence;
-      size_t gc_map_bytes = state->ComputeOatSize(
-          method_header->GetNativeGcMap(), &first_occurrence);
-      if (first_occurrence) {
-        state->stats_.gc_map_bytes += gc_map_bytes;
-      }
-
-      size_t pc_mapping_table_bytes = state->ComputeOatSize(
-          method_header->GetMappingTable(), &first_occurrence);
-      if (first_occurrence) {
-        state->stats_.pc_mapping_table_bytes += pc_mapping_table_bytes;
-      }
-
       size_t vmap_table_bytes = 0u;
       if (!method_header->IsOptimized()) {
         // Method compiled with the optimizing compiler have no vmap table.
-        vmap_table_bytes = state->ComputeOatSize(
-            method_header->GetVmapTable(), &first_occurrence);
+        vmap_table_bytes = ComputeOatSize(method_header->GetVmapTable(), &first_occurrence);
         if (first_occurrence) {
-          state->stats_.vmap_table_bytes += vmap_table_bytes;
+          stats_.vmap_table_bytes += vmap_table_bytes;
         }
       }
 
-      uint32_t quick_oat_code_size = state->GetQuickOatCodeSize(method);
-      state->ComputeOatSize(quick_oat_code_begin, &first_occurrence);
+      uint32_t quick_oat_code_size = GetQuickOatCodeSize(method);
+      ComputeOatSize(quick_oat_code_begin, &first_occurrence);
       if (first_occurrence) {
-        state->stats_.managed_code_bytes += quick_oat_code_size;
+        stats_.managed_code_bytes += quick_oat_code_size;
         if (method->IsConstructor()) {
           if (method->IsStatic()) {
-            state->stats_.class_initializer_code_bytes += quick_oat_code_size;
+            stats_.class_initializer_code_bytes += quick_oat_code_size;
           } else if (dex_instruction_bytes > kLargeConstructorDexBytes) {
-            state->stats_.large_initializer_code_bytes += quick_oat_code_size;
+            stats_.large_initializer_code_bytes += quick_oat_code_size;
           }
         } else if (dex_instruction_bytes > kLargeMethodDexBytes) {
-          state->stats_.large_method_code_bytes += quick_oat_code_size;
+          stats_.large_method_code_bytes += quick_oat_code_size;
         }
       }
-      state->stats_.managed_code_bytes_ignoring_deduplication += quick_oat_code_size;
+      stats_.managed_code_bytes_ignoring_deduplication += quick_oat_code_size;
 
       uint32_t method_access_flags = method->GetAccessFlags();
 
       indent_os << StringPrintf("OAT CODE: %p-%p\n", quick_oat_code_begin, quick_oat_code_end);
-      indent_os << StringPrintf("SIZE: Dex Instructions=%zd GC=%zd Mapping=%zd AccessFlags=0x%x\n",
-                                dex_instruction_bytes, gc_map_bytes, pc_mapping_table_bytes,
+      indent_os << StringPrintf("SIZE: Dex Instructions=%zd StackMaps=%zd AccessFlags=0x%x\n",
+                                dex_instruction_bytes,
+                                vmap_table_bytes,
                                 method_access_flags);
 
-      size_t total_size = dex_instruction_bytes + gc_map_bytes + pc_mapping_table_bytes +
-          vmap_table_bytes + quick_oat_code_size + ArtMethod::Size(image_pointer_size);
+      size_t total_size = dex_instruction_bytes +
+          vmap_table_bytes + quick_oat_code_size + ArtMethod::Size(image_header_.GetPointerSize());
 
       double expansion =
       static_cast<double>(quick_oat_code_size) / static_cast<double>(dex_instruction_bytes);
-      state->stats_.ComputeOutliers(total_size, expansion, method);
+      stats_.ComputeOutliers(total_size, expansion, method);
     }
   }
 
@@ -2070,6 +2069,7 @@
     size_t art_method_bytes;
     size_t dex_cache_arrays_bytes;
     size_t interned_strings_bytes;
+    size_t class_table_bytes;
     size_t bitmap_bytes;
     size_t alignment_bytes;
 
@@ -2081,8 +2081,6 @@
     size_t large_initializer_code_bytes;
     size_t large_method_code_bytes;
 
-    size_t gc_map_bytes;
-    size_t pc_mapping_table_bytes;
     size_t vmap_table_bytes;
 
     size_t dex_instruction_bytes;
@@ -2101,6 +2099,7 @@
           art_method_bytes(0),
           dex_cache_arrays_bytes(0),
           interned_strings_bytes(0),
+          class_table_bytes(0),
           bitmap_bytes(0),
           alignment_bytes(0),
           managed_code_bytes(0),
@@ -2110,8 +2109,6 @@
           class_initializer_code_bytes(0),
           large_initializer_code_bytes(0),
           large_method_code_bytes(0),
-          gc_map_bytes(0),
-          pc_mapping_table_bytes(0),
           vmap_table_bytes(0),
           dex_instruction_bytes(0) {}
 
@@ -2158,6 +2155,9 @@
       size_t sum_of_expansion = 0;
       size_t sum_of_expansion_squared = 0;
       size_t n = method_outlier_size.size();
+      if (n <= 1) {
+        return;
+      }
       for (size_t i = 0; i < n; i++) {
         size_t cur_size = method_outlier_size[i];
         sum_of_sizes += cur_size;
@@ -2263,6 +2263,7 @@
                                   "art_method_bytes       =  %8zd (%2.0f%% of art file bytes)\n"
                                   "dex_cache_arrays_bytes =  %8zd (%2.0f%% of art file bytes)\n"
                                   "interned_string_bytes  =  %8zd (%2.0f%% of art file bytes)\n"
+                                  "class_table_bytes      =  %8zd (%2.0f%% of art file bytes)\n"
                                   "bitmap_bytes           =  %8zd (%2.0f%% of art file bytes)\n"
                                   "alignment_bytes        =  %8zd (%2.0f%% of art file bytes)\n\n",
                                   header_bytes, PercentOfFileBytes(header_bytes),
@@ -2273,11 +2274,14 @@
                                   PercentOfFileBytes(dex_cache_arrays_bytes),
                                   interned_strings_bytes,
                                   PercentOfFileBytes(interned_strings_bytes),
+                                  class_table_bytes, PercentOfFileBytes(class_table_bytes),
                                   bitmap_bytes, PercentOfFileBytes(bitmap_bytes),
                                   alignment_bytes, PercentOfFileBytes(alignment_bytes))
             << std::flush;
-        CHECK_EQ(file_bytes, header_bytes + object_bytes + art_field_bytes + art_method_bytes +
-                 dex_cache_arrays_bytes + interned_strings_bytes + bitmap_bytes + alignment_bytes);
+        CHECK_EQ(file_bytes,
+                 header_bytes + object_bytes + art_field_bytes + art_method_bytes +
+                 dex_cache_arrays_bytes + interned_strings_bytes + class_table_bytes +
+                 bitmap_bytes + alignment_bytes);
       }
 
       os << "object_bytes breakdown:\n";
@@ -2323,11 +2327,7 @@
                            PercentOfOatBytes(oat_dex_file_size.second));
       }
 
-      os << "\n" << StringPrintf("gc_map_bytes           = %7zd (%2.0f%% of oat file bytes)\n"
-                                 "pc_mapping_table_bytes = %7zd (%2.0f%% of oat file bytes)\n"
-                                 "vmap_table_bytes       = %7zd (%2.0f%% of oat file bytes)\n\n",
-                                 gc_map_bytes, PercentOfOatBytes(gc_map_bytes),
-                                 pc_mapping_table_bytes, PercentOfOatBytes(pc_mapping_table_bytes),
+      os << "\n" << StringPrintf("vmap_table_bytes       = %7zd (%2.0f%% of oat file bytes)\n\n",
                                  vmap_table_bytes, PercentOfOatBytes(vmap_table_bytes))
          << std::flush;
 
@@ -2368,26 +2368,77 @@
   DISALLOW_COPY_AND_ASSIGN(ImageDumper);
 };
 
-static int DumpImage(Runtime* runtime, const char* image_location, OatDumperOptions* options,
-                     std::ostream* os) {
+static int DumpImage(gc::space::ImageSpace* image_space,
+                     OatDumperOptions* options,
+                     std::ostream* os) SHARED_REQUIRES(Locks::mutator_lock_) {
+  const ImageHeader& image_header = image_space->GetImageHeader();
+  if (!image_header.IsValid()) {
+    fprintf(stderr, "Invalid image header %s\n", image_space->GetImageLocation().c_str());
+    return EXIT_FAILURE;
+  }
+  ImageDumper image_dumper(os, *image_space, image_header, options);
+  if (!image_dumper.Dump()) {
+    return EXIT_FAILURE;
+  }
+  return EXIT_SUCCESS;
+}
+
+static int DumpImages(Runtime* runtime, OatDumperOptions* options, std::ostream* os) {
   // Dumping the image, no explicit class loader.
-  NullHandle<mirror::ClassLoader> null_class_loader;
+  ScopedNullHandle<mirror::ClassLoader> null_class_loader;
   options->class_loader_ = &null_class_loader;
 
   ScopedObjectAccess soa(Thread::Current());
-  gc::Heap* heap = runtime->GetHeap();
-  gc::space::ImageSpace* image_space = heap->GetImageSpace();
-  CHECK(image_space != nullptr);
-  const ImageHeader& image_header = image_space->GetImageHeader();
-  if (!image_header.IsValid()) {
-    fprintf(stderr, "Invalid image header %s\n", image_location);
-    return EXIT_FAILURE;
+  if (options->app_image_ != nullptr) {
+    if (options->app_oat_ == nullptr) {
+      LOG(ERROR) << "Can not dump app image without app oat file";
+      return EXIT_FAILURE;
+    }
+    // We can't know if the app image is 32 bits yet, but it contains pointers into the oat file.
+    // We need to map the oat file in the low 4gb or else the fixup wont be able to fit oat file
+    // pointers into 32 bit pointer sized ArtMethods.
+    std::string error_msg;
+    std::unique_ptr<OatFile> oat_file(OatFile::Open(options->app_oat_,
+                                                    options->app_oat_,
+                                                    nullptr,
+                                                    nullptr,
+                                                    false,
+                                                    /*low_4gb*/true,
+                                                    nullptr,
+                                                    &error_msg));
+    if (oat_file == nullptr) {
+      LOG(ERROR) << "Failed to open oat file " << options->app_oat_ << " with error " << error_msg;
+      return EXIT_FAILURE;
+    }
+    std::unique_ptr<gc::space::ImageSpace> space(
+        gc::space::ImageSpace::CreateFromAppImage(options->app_image_, oat_file.get(), &error_msg));
+    if (space == nullptr) {
+      LOG(ERROR) << "Failed to open app image " << options->app_image_ << " with error "
+                 << error_msg;
+    }
+    // Open dex files for the image.
+    std::vector<std::unique_ptr<const DexFile>> dex_files;
+    if (!runtime->GetClassLinker()->OpenImageDexFiles(space.get(), &dex_files, &error_msg)) {
+      LOG(ERROR) << "Failed to open app image dex files " << options->app_image_ << " with error "
+                 << error_msg;
+    }
+    // Dump the actual image.
+    int result = DumpImage(space.get(), options, os);
+    if (result != EXIT_SUCCESS) {
+      return result;
+    }
+    // Fall through to dump the boot images.
   }
 
-  ImageDumper image_dumper(os, *image_space, image_header, options);
-
-  bool success = image_dumper.Dump();
-  return (success) ? EXIT_SUCCESS : EXIT_FAILURE;
+  gc::Heap* heap = runtime->GetHeap();
+  CHECK(heap->HasBootImageSpace()) << "No image spaces";
+  for (gc::space::ImageSpace* image_space : heap->GetBootImageSpaces()) {
+    int result = DumpImage(image_space, options, os);
+    if (result != EXIT_SUCCESS) {
+      return result;
+    }
+  }
+  return EXIT_SUCCESS;
 }
 
 static int DumpOatWithRuntime(Runtime* runtime, OatFile* oat_file, OatDumperOptions* options,
@@ -2408,7 +2459,7 @@
     std::string error_msg;
     const DexFile* const dex_file = OpenDexFile(odf, &error_msg);
     CHECK(dex_file != nullptr) << error_msg;
-    class_linker->RegisterDexFile(*dex_file, runtime->GetLinearAlloc());
+    class_linker->RegisterDexFile(*dex_file, nullptr);
     class_path.push_back(dex_file);
   }
 
@@ -2430,7 +2481,7 @@
 static int DumpOatWithoutRuntime(OatFile* oat_file, OatDumperOptions* options, std::ostream* os) {
   CHECK(oat_file != nullptr && options != nullptr);
   // No image = no class loader.
-  NullHandle<mirror::ClassLoader> null_class_loader;
+  ScopedNullHandle<mirror::ClassLoader> null_class_loader;
   options->class_loader_ = &null_class_loader;
 
   OatDumper oat_dumper(*oat_file, *options);
@@ -2441,8 +2492,14 @@
 static int DumpOat(Runtime* runtime, const char* oat_filename, OatDumperOptions* options,
                    std::ostream* os) {
   std::string error_msg;
-  OatFile* oat_file = OatFile::Open(oat_filename, oat_filename, nullptr, nullptr, false,
-                                    nullptr, &error_msg);
+  OatFile* oat_file = OatFile::Open(oat_filename,
+                                    oat_filename,
+                                    nullptr,
+                                    nullptr,
+                                    false,
+                                    /*low_4gb*/false,
+                                    nullptr,
+                                    &error_msg);
   if (oat_file == nullptr) {
     fprintf(stderr, "Failed to open oat file from '%s': %s\n", oat_filename, error_msg.c_str());
     return EXIT_FAILURE;
@@ -2455,17 +2512,32 @@
   }
 }
 
-static int SymbolizeOat(const char* oat_filename, std::string& output_name) {
+static int SymbolizeOat(const char* oat_filename, std::string& output_name, bool no_bits) {
   std::string error_msg;
-  OatFile* oat_file = OatFile::Open(oat_filename, oat_filename, nullptr, nullptr, false,
-                                    nullptr, &error_msg);
+  OatFile* oat_file = OatFile::Open(oat_filename,
+                                    oat_filename,
+                                    nullptr,
+                                    nullptr,
+                                    false,
+                                    /*low_4gb*/false,
+                                    nullptr,
+                                    &error_msg);
   if (oat_file == nullptr) {
     fprintf(stderr, "Failed to open oat file from '%s': %s\n", oat_filename, error_msg.c_str());
     return EXIT_FAILURE;
   }
 
-  OatSymbolizer oat_symbolizer(oat_file, output_name);
-  if (!oat_symbolizer.Symbolize()) {
+  bool result;
+  // Try to produce an ELF file of the same type. This is finicky, as we have used 32-bit ELF
+  // files for 64-bit code in the past.
+  if (Is64BitInstructionSet(oat_file->GetOatHeader().GetInstructionSet())) {
+    OatSymbolizer<ElfTypes64> oat_symbolizer(oat_file, output_name, no_bits);
+    result = oat_symbolizer.Symbolize();
+  } else {
+    OatSymbolizer<ElfTypes32> oat_symbolizer(oat_file, output_name, no_bits);
+    result = oat_symbolizer.Symbolize();
+  }
+  if (!result) {
     fprintf(stderr, "Failed to symbolize\n");
     return EXIT_FAILURE;
   }
@@ -2490,19 +2562,19 @@
       oat_filename_ = option.substr(strlen("--oat-file=")).data();
     } else if (option.starts_with("--image=")) {
       image_location_ = option.substr(strlen("--image=")).data();
-    } else if (option =="--dump:raw_mapping_table") {
-      dump_raw_mapping_table_ = true;
-    } else if (option == "--dump:raw_gc_map") {
-      dump_raw_gc_map_ = true;
     } else if (option == "--no-dump:vmap") {
       dump_vmap_ = false;
     } else if (option =="--dump:code_info_stack_maps") {
       dump_code_info_stack_maps_ = true;
     } else if (option == "--no-disassemble") {
       disassemble_code_ = false;
+    } else if (option =="--header-only") {
+      dump_header_only_ = true;
     } else if (option.starts_with("--symbolize=")) {
       oat_filename_ = option.substr(strlen("--symbolize=")).data();
       symbolize_ = true;
+    } else if (option.starts_with("--only-keep-debug")) {
+      only_keep_debug_ = true;
     } else if (option.starts_with("--class-filter=")) {
       class_filter_ = option.substr(strlen("--class-filter=")).data();
     } else if (option.starts_with("--method-filter=")) {
@@ -2518,6 +2590,10 @@
         *error_msg = "Address conversion failed";
         return kParseError;
       }
+    } else if (option.starts_with("--app-image=")) {
+      app_image_ = option.substr(strlen("--app-image=")).data();
+    } else if (option.starts_with("--app-oat=")) {
+      app_oat_ = option.substr(strlen("--app-oat=")).data();
     } else {
       return kParseUnknownArgument;
     }
@@ -2563,17 +2639,18 @@
         "\n"
         "  --image=<file.art>: specifies an input image location.\n"
         "      Example: --image=/system/framework/boot.art\n"
+        "\n"
+        "  --app-image=<file.art>: specifies an input app image. Must also have a specified\n"
+        " boot image and app oat file.\n"
+        "      Example: --app-image=app.art\n"
+        "\n"
+        "  --app-oat=<file.odex>: specifies an input app oat.\n"
+        "      Example: --app-oat=app.odex\n"
         "\n";
 
     usage += Base::GetUsage();
 
     usage +=  // Optional.
-        "  --dump:raw_mapping_table enables dumping of the mapping table.\n"
-        "      Example: --dump:raw_mapping_table\n"
-        "\n"
-        "  --dump:raw_gc_map enables dumping of the GC map.\n"
-        "      Example: --dump:raw_gc_map\n"
-        "\n"
         "  --no-dump:vmap may be used to disable vmap dumping.\n"
         "      Example: --no-dump:vmap\n"
         "\n"
@@ -2583,6 +2660,9 @@
         "  --no-disassemble may be used to disable disassembly.\n"
         "      Example: --no-disassemble\n"
         "\n"
+        "  --header-only may be used to print only the oat header.\n"
+        "      Example: --header-only\n"
+        "\n"
         "  --list-classes may be used to list target file classes (can be used with filters).\n"
         "      Example: --list-classes\n"
         "      Example: --list-classes --class-filter=com.example.foo\n"
@@ -2594,6 +2674,10 @@
         "  --symbolize=<file.oat>: output a copy of file.oat with elf symbols included.\n"
         "      Example: --symbolize=/system/framework/boot.oat\n"
         "\n"
+        "  --only-keep-debug<file.oat>: Modifies the behaviour of --symbolize so that\n"
+        "      .rodata and .text sections are omitted in the output file to save space.\n"
+        "      Example: --symbolize=/system/framework/boot.oat --only-keep-debug\n"
+        "\n"
         "  --class-filter=<class name>: only dumps classes that contain the filter.\n"
         "      Example: --class-filter=com.example.foo\n"
         "\n"
@@ -2617,16 +2701,18 @@
   const char* method_filter_ = "";
   const char* image_location_ = nullptr;
   std::string elf_filename_prefix_;
-  bool dump_raw_mapping_table_ = false;
-  bool dump_raw_gc_map_ = false;
   bool dump_vmap_ = true;
   bool dump_code_info_stack_maps_ = false;
   bool disassemble_code_ = true;
   bool symbolize_ = false;
+  bool only_keep_debug_ = false;
   bool list_classes_ = false;
   bool list_methods_ = false;
+  bool dump_header_only_ = false;
   uint32_t addr2instr_ = 0;
   const char* export_dex_location_ = nullptr;
+  const char* app_image_ = nullptr;
+  const char* app_oat_ = nullptr;
 };
 
 struct OatdumpMain : public CmdlineMain<OatdumpArgs> {
@@ -2636,9 +2722,7 @@
     // If we are only doing the oat file, disable absolute_addresses. Keep them for image dumping.
     bool absolute_addresses = (args_->oat_filename_ == nullptr);
 
-    oat_dumper_options_ = std::unique_ptr<OatDumperOptions>(new OatDumperOptions(
-        args_->dump_raw_mapping_table_,
-        args_->dump_raw_gc_map_,
+    oat_dumper_options_.reset(new OatDumperOptions(
         args_->dump_vmap_,
         args_->dump_code_info_stack_maps_,
         args_->disassemble_code_,
@@ -2647,7 +2731,10 @@
         args_->method_filter_,
         args_->list_classes_,
         args_->list_methods_,
+        args_->dump_header_only_,
         args_->export_dex_location_,
+        args_->app_image_,
+        args_->app_oat_,
         args_->addr2instr_));
 
     return (args_->boot_image_location_ != nullptr || args_->image_location_ != nullptr) &&
@@ -2661,7 +2748,12 @@
     MemMap::Init();
 
     if (args_->symbolize_) {
-      return SymbolizeOat(args_->oat_filename_, args_->output_name_) == EXIT_SUCCESS;
+      // ELF has special kind of section called SHT_NOBITS which allows us to create
+      // sections which exist but their data is omitted from the ELF file to save space.
+      // This is what "strip --only-keep-debug" does when it creates separate ELF file
+      // with only debug data. We use it in similar way to exclude .rodata and .text.
+      bool no_bits = args_->only_keep_debug_;
+      return SymbolizeOat(args_->oat_filename_, args_->output_name_, no_bits) == EXIT_SUCCESS;
     } else {
       return DumpOat(nullptr,
                      args_->oat_filename_,
@@ -2680,8 +2772,7 @@
                      args_->os_) == EXIT_SUCCESS;
     }
 
-    return DumpImage(runtime, args_->image_location_, oat_dumper_options_.get(), args_->os_)
-      == EXIT_SUCCESS;
+    return DumpImages(runtime, oat_dumper_options_.get(), args_->os_) == EXIT_SUCCESS;
   }
 
   std::unique_ptr<OatDumperOptions> oat_dumper_options_;
diff --git a/oatdump/oatdump_test.cc b/oatdump/oatdump_test.cc
index b34bc84..db97055 100644
--- a/oatdump/oatdump_test.cc
+++ b/oatdump/oatdump_test.cc
@@ -14,13 +14,14 @@
  * limitations under the License.
  */
 
+#include <sstream>
 #include <string>
 #include <vector>
-#include <sstream>
 
 #include "common_runtime_test.h"
 
 #include "base/stringprintf.h"
+#include "base/unix_file/fd_file.h"
 #include "runtime/arch/instruction_set.h"
 #include "runtime/gc/heap.h"
 #include "runtime/gc/space/image_space.h"
@@ -58,25 +59,141 @@
   };
 
   // Run the test with custom arguments.
-  bool Exec(Mode mode, const std::vector<std::string>& args, std::string* error_msg) {
+  bool Exec(Mode mode,
+            const std::vector<std::string>& args,
+            bool list_only,
+            std::string* error_msg) {
     std::string file_path = GetOatDumpFilePath();
 
     EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path";
 
+    // ScratchFile scratch;
     std::vector<std::string> exec_argv = { file_path };
+    std::vector<std::string> expected_prefixes;
     if (mode == kModeSymbolize) {
       exec_argv.push_back("--symbolize=" + core_oat_location_);
       exec_argv.push_back("--output=" + core_oat_location_ + ".symbolize");
-    } else if (mode == kModeArt) {
-      exec_argv.push_back("--image=" + core_art_location_);
-      exec_argv.push_back("--output=/dev/null");
     } else {
-      CHECK_EQ(static_cast<size_t>(mode), static_cast<size_t>(kModeOat));
-      exec_argv.push_back("--oat-file=" + core_oat_location_);
-      exec_argv.push_back("--output=/dev/null");
+      expected_prefixes.push_back("Dex file data for");
+      expected_prefixes.push_back("Num string ids:");
+      expected_prefixes.push_back("Num field ids:");
+      expected_prefixes.push_back("Num method ids:");
+      expected_prefixes.push_back("LOCATION:");
+      expected_prefixes.push_back("MAGIC:");
+      expected_prefixes.push_back("DEX FILE COUNT:");
+      if (!list_only) {
+        // Code and dex code do not show up if list only.
+        expected_prefixes.push_back("DEX CODE:");
+        expected_prefixes.push_back("CODE:");
+      }
+      if (mode == kModeArt) {
+        exec_argv.push_back("--image=" + core_art_location_);
+        exec_argv.push_back("--instruction-set=" + std::string(
+            GetInstructionSetString(kRuntimeISA)));
+        expected_prefixes.push_back("IMAGE LOCATION:");
+        expected_prefixes.push_back("IMAGE BEGIN:");
+        expected_prefixes.push_back("kDexCaches:");
+      } else {
+        CHECK_EQ(static_cast<size_t>(mode), static_cast<size_t>(kModeOat));
+        exec_argv.push_back("--oat-file=" + core_oat_location_);
+      }
     }
     exec_argv.insert(exec_argv.end(), args.begin(), args.end());
-    return ::art::Exec(exec_argv, error_msg);
+
+    bool result = true;
+    // We must set --android-root.
+    int link[2];
+    if (pipe(link) == -1) {
+      *error_msg = strerror(errno);
+      return false;
+    }
+
+    const pid_t pid = fork();
+    if (pid == -1) {
+      *error_msg = strerror(errno);
+      return false;
+    }
+
+    if (pid == 0) {
+      dup2(link[1], STDOUT_FILENO);
+      close(link[0]);
+      close(link[1]);
+      // change process groups, so we don't get reaped by ProcessManager
+      setpgid(0, 0);
+      // Use execv here rather than art::Exec to avoid blocking on waitpid here.
+      std::vector<char*> argv;
+      for (size_t i = 0; i < exec_argv.size(); ++i) {
+        argv.push_back(const_cast<char*>(exec_argv[i].c_str()));
+      }
+      argv.push_back(nullptr);
+      UNUSED(execv(argv[0], &argv[0]));
+      const std::string command_line(Join(exec_argv, ' '));
+      PLOG(ERROR) << "Failed to execv(" << command_line << ")";
+      // _exit to avoid atexit handlers in child.
+      _exit(1);
+    } else {
+      close(link[1]);
+      static const size_t kLineMax = 256;
+      char line[kLineMax] = {};
+      size_t line_len = 0;
+      size_t total = 0;
+      std::vector<bool> found(expected_prefixes.size(), false);
+      while (true) {
+        while (true) {
+          size_t spaces = 0;
+          // Trim spaces at the start of the line.
+          for (; spaces < line_len && isspace(line[spaces]); ++spaces) {}
+          if (spaces > 0) {
+            line_len -= spaces;
+            memmove(&line[0], &line[spaces], line_len);
+          }
+          ssize_t bytes_read =
+              TEMP_FAILURE_RETRY(read(link[0], &line[line_len], kLineMax - line_len));
+          if (bytes_read <= 0) {
+            break;
+          }
+          line_len += bytes_read;
+          total += bytes_read;
+        }
+        if (line_len == 0) {
+          break;
+        }
+        // Check contents.
+        for (size_t i = 0; i < expected_prefixes.size(); ++i) {
+          const std::string& expected = expected_prefixes[i];
+          if (!found[i] &&
+              line_len >= expected.length() &&
+              memcmp(line, expected.c_str(), expected.length()) == 0) {
+            found[i] = true;
+          }
+        }
+        // Skip to next line.
+        size_t next_line = 0;
+        for (; next_line + 1 < line_len && line[next_line] != '\n'; ++next_line) {}
+        line_len -= next_line + 1;
+        memmove(&line[0], &line[next_line + 1], line_len);
+      }
+      if (mode == kModeSymbolize) {
+        EXPECT_EQ(total, 0u);
+      } else {
+        EXPECT_GT(total, 0u);
+      }
+      LOG(INFO) << "Processed bytes " << total;
+      close(link[0]);
+      int status = 0;
+      if (waitpid(pid, &status, 0) != -1) {
+        result = (status == 0);
+      }
+
+      for (size_t i = 0; i < expected_prefixes.size(); ++i) {
+        if (!found[i]) {
+          LOG(ERROR) << "Did not find prefix " << expected_prefixes[i];
+          result = false;
+        }
+      }
+    }
+
+    return result;
   }
 
  private:
@@ -84,49 +201,41 @@
   std::string core_oat_location_;
 };
 
+// Disable tests on arm and mips as they are taking too long to run. b/27824283.
+#if !defined(__arm__) && !defined(__mips__)
 TEST_F(OatDumpTest, TestImage) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {}, /*list_only*/ false, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestOatImage) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeOat, {}, &error_msg)) << error_msg;
-}
-
-TEST_F(OatDumpTest, TestDumpRawMappingTable) {
-  std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--dump:raw_mapping_table"}, &error_msg)) << error_msg;
-}
-
-TEST_F(OatDumpTest, TestDumpRawGcMap) {
-  std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--dump:raw_gc_map"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeOat, {}, /*list_only*/ false, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestNoDumpVmap) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--no-dump:vmap"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {"--no-dump:vmap"}, /*list_only*/ false, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestNoDisassemble) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--no-disassemble"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {"--no-disassemble"}, /*list_only*/ false, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestListClasses) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--list-classes"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {"--list-classes"}, /*list_only*/ true, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestListMethods) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--list-methods"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {"--list-methods"}, /*list_only*/ true, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestSymbolize) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeSymbolize, {}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeSymbolize, {}, /*list_only*/ true, &error_msg)) << error_msg;
 }
-
+#endif
 }  // namespace art
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 88622cc..3f6531b 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -35,8 +35,9 @@
 #include "elf_file.h"
 #include "elf_file_impl.h"
 #include "gc/space/image_space.h"
-#include "image.h"
+#include "image-inl.h"
 #include "mirror/abstract_method.h"
+#include "mirror/dex_cache.h"
 #include "mirror/object-inl.h"
 #include "mirror/method.h"
 #include "mirror/reference.h"
@@ -118,140 +119,50 @@
   return true;
 }
 
-bool PatchOat::Patch(const std::string& image_location, off_t delta,
-                     File* output_image, InstructionSet isa,
-                     TimingLogger* timings) {
-  CHECK(Runtime::Current() == nullptr);
-  CHECK(output_image != nullptr);
-  CHECK_GE(output_image->Fd(), 0);
-  CHECK(!image_location.empty()) << "image file must have a filename.";
-  CHECK_NE(isa, kNone);
-
-  TimingLogger::ScopedTiming t("Runtime Setup", timings);
-  const char *isa_name = GetInstructionSetString(isa);
-  std::string image_filename;
-  if (!LocationToFilename(image_location, isa, &image_filename)) {
-    LOG(ERROR) << "Unable to find image at location " << image_location;
-    return false;
+static File* CreateOrOpen(const char* name, bool* created) {
+  if (OS::FileExists(name)) {
+    *created = false;
+    return OS::OpenFileReadWrite(name);
+  } else {
+    *created = true;
+    std::unique_ptr<File> f(OS::CreateEmptyFile(name));
+    if (f.get() != nullptr) {
+      if (fchmod(f->Fd(), 0644) != 0) {
+        PLOG(ERROR) << "Unable to make " << name << " world readable";
+        unlink(name);
+        return nullptr;
+      }
+    }
+    return f.release();
   }
-  std::unique_ptr<File> input_image(OS::OpenFileForReading(image_filename.c_str()));
-  if (input_image.get() == nullptr) {
-    LOG(ERROR) << "unable to open input image file at " << image_filename
-               << " for location " << image_location;
-    return false;
-  }
-
-  int64_t image_len = input_image->GetLength();
-  if (image_len < 0) {
-    LOG(ERROR) << "Error while getting image length";
-    return false;
-  }
-  ImageHeader image_header;
-  if (sizeof(image_header) != input_image->Read(reinterpret_cast<char*>(&image_header),
-                                                sizeof(image_header), 0)) {
-    LOG(ERROR) << "Unable to read image header from image file " << input_image->GetPath();
-    return false;
-  }
-
-  /*bool is_image_pic = */IsImagePic(image_header, input_image->GetPath());
-  // Nothing special to do right now since the image always needs to get patched.
-  // Perhaps in some far-off future we may have images with relative addresses that are true-PIC.
-
-  // Set up the runtime
-  RuntimeOptions options;
-  NoopCompilerCallbacks callbacks;
-  options.push_back(std::make_pair("compilercallbacks", &callbacks));
-  std::string img = "-Ximage:" + image_location;
-  options.push_back(std::make_pair(img.c_str(), nullptr));
-  options.push_back(std::make_pair("imageinstructionset", reinterpret_cast<const void*>(isa_name)));
-  options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
-  if (!Runtime::Create(options, false)) {
-    LOG(ERROR) << "Unable to initialize runtime";
-    return false;
-  }
-  // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
-  // give it away now and then switch to a more manageable ScopedObjectAccess.
-  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
-  ScopedObjectAccess soa(Thread::Current());
-
-  t.NewTiming("Image and oat Patching setup");
-  // Create the map where we will write the image patches to.
-  std::string error_msg;
-  std::unique_ptr<MemMap> image(MemMap::MapFile(image_len, PROT_READ | PROT_WRITE, MAP_PRIVATE,
-                                                input_image->Fd(), 0,
-                                                input_image->GetPath().c_str(),
-                                                &error_msg));
-  if (image.get() == nullptr) {
-    LOG(ERROR) << "unable to map image file " << input_image->GetPath() << " : " << error_msg;
-    return false;
-  }
-  gc::space::ImageSpace* ispc = Runtime::Current()->GetHeap()->GetImageSpace();
-
-  PatchOat p(isa, image.release(), ispc->GetLiveBitmap(), ispc->GetMemMap(),
-             delta, timings);
-  t.NewTiming("Patching files");
-  if (!p.PatchImage()) {
-    LOG(ERROR) << "Failed to patch image file " << input_image->GetPath();
-    return false;
-  }
-
-  t.NewTiming("Writing files");
-  if (!p.WriteImage(output_image)) {
-    return false;
-  }
-  return true;
 }
 
-bool PatchOat::Patch(File* input_oat, const std::string& image_location, off_t delta,
-                     File* output_oat, File* output_image, InstructionSet isa,
-                     TimingLogger* timings,
-                     bool output_oat_opened_from_fd,
-                     bool new_oat_out) {
+// Either try to close the file (close=true), or erase it.
+static bool FinishFile(File* file, bool close) {
+  if (close) {
+    if (file->FlushCloseOrErase() != 0) {
+      PLOG(ERROR) << "Failed to flush and close file.";
+      return false;
+    }
+    return true;
+  } else {
+    file->Erase();
+    return false;
+  }
+}
+
+bool PatchOat::Patch(const std::string& image_location,
+                     off_t delta,
+                     const std::string& output_directory,
+                     InstructionSet isa,
+                     TimingLogger* timings) {
   CHECK(Runtime::Current() == nullptr);
-  CHECK(output_image != nullptr);
-  CHECK_GE(output_image->Fd(), 0);
-  CHECK(input_oat != nullptr);
-  CHECK(output_oat != nullptr);
-  CHECK_GE(input_oat->Fd(), 0);
-  CHECK_GE(output_oat->Fd(), 0);
   CHECK(!image_location.empty()) << "image file must have a filename.";
 
   TimingLogger::ScopedTiming t("Runtime Setup", timings);
 
-  if (isa == kNone) {
-    Elf32_Ehdr elf_hdr;
-    if (sizeof(elf_hdr) != input_oat->Read(reinterpret_cast<char*>(&elf_hdr), sizeof(elf_hdr), 0)) {
-      LOG(ERROR) << "unable to read elf header";
-      return false;
-    }
-    isa = GetInstructionSetFromELF(elf_hdr.e_machine, elf_hdr.e_flags);
-  }
+  CHECK_NE(isa, kNone);
   const char* isa_name = GetInstructionSetString(isa);
-  std::string image_filename;
-  if (!LocationToFilename(image_location, isa, &image_filename)) {
-    LOG(ERROR) << "Unable to find image at location " << image_location;
-    return false;
-  }
-  std::unique_ptr<File> input_image(OS::OpenFileForReading(image_filename.c_str()));
-  if (input_image.get() == nullptr) {
-    LOG(ERROR) << "unable to open input image file at " << image_filename
-               << " for location " << image_location;
-    return false;
-  }
-  int64_t image_len = input_image->GetLength();
-  if (image_len < 0) {
-    LOG(ERROR) << "Error while getting image length";
-    return false;
-  }
-  ImageHeader image_header;
-  if (sizeof(image_header) != input_image->Read(reinterpret_cast<char*>(&image_header),
-                                              sizeof(image_header), 0)) {
-    LOG(ERROR) << "Unable to read image header from image file " << input_image->GetPath();
-  }
-
-  /*bool is_image_pic = */IsImagePic(image_header, input_image->GetPath());
-  // Nothing special to do right now since the image always needs to get patched.
-  // Perhaps in some far-off future we may have images with relative addresses that are true-PIC.
 
   // Set up the runtime
   RuntimeOptions options;
@@ -271,65 +182,167 @@
   ScopedObjectAccess soa(Thread::Current());
 
   t.NewTiming("Image and oat Patching setup");
-  // Create the map where we will write the image patches to.
-  std::string error_msg;
-  std::unique_ptr<MemMap> image(MemMap::MapFile(image_len, PROT_READ | PROT_WRITE, MAP_PRIVATE,
-                                                input_image->Fd(), 0,
-                                                input_image->GetPath().c_str(),
-                                                &error_msg));
-  if (image.get() == nullptr) {
-    LOG(ERROR) << "unable to map image file " << input_image->GetPath() << " : " << error_msg;
-    return false;
-  }
-  gc::space::ImageSpace* ispc = Runtime::Current()->GetHeap()->GetImageSpace();
+  std::vector<gc::space::ImageSpace*> spaces = Runtime::Current()->GetHeap()->GetBootImageSpaces();
+  std::map<gc::space::ImageSpace*, std::unique_ptr<File>> space_to_file_map;
+  std::map<gc::space::ImageSpace*, std::unique_ptr<MemMap>> space_to_memmap_map;
+  std::map<gc::space::ImageSpace*, PatchOat> space_to_patchoat_map;
+  std::map<gc::space::ImageSpace*, bool> space_to_skip_patching_map;
 
-  std::unique_ptr<ElfFile> elf(ElfFile::Open(input_oat,
-                                             PROT_READ | PROT_WRITE, MAP_PRIVATE, &error_msg));
-  if (elf.get() == nullptr) {
-    LOG(ERROR) << "unable to open oat file " << input_oat->GetPath() << " : " << error_msg;
-    return false;
-  }
-
-  bool skip_patching_oat = false;
-  MaybePic is_oat_pic = IsOatPic(elf.get());
-  if (is_oat_pic >= ERROR_FIRST) {
-    // Error logged by IsOatPic
-    return false;
-  } else if (is_oat_pic == PIC) {
-    // Do not need to do ELF-file patching. Create a symlink and skip the ELF patching.
-    if (!ReplaceOatFileWithSymlink(input_oat->GetPath(),
-                                   output_oat->GetPath(),
-                                   output_oat_opened_from_fd,
-                                   new_oat_out)) {
-      // Errors already logged by above call.
+  for (size_t i = 0; i < spaces.size(); ++i) {
+    gc::space::ImageSpace* space = spaces[i];
+    std::string input_image_filename = space->GetImageFilename();
+    std::unique_ptr<File> input_image(OS::OpenFileForReading(input_image_filename.c_str()));
+    if (input_image.get() == nullptr) {
+      LOG(ERROR) << "Unable to open input image file at " << input_image_filename;
       return false;
     }
-    // Don't patch the OAT, since we just symlinked it. Image still needs patching.
-    skip_patching_oat = true;
-  } else {
-    CHECK(is_oat_pic == NOT_PIC);
+
+    int64_t image_len = input_image->GetLength();
+    if (image_len < 0) {
+      LOG(ERROR) << "Error while getting image length";
+      return false;
+    }
+    ImageHeader image_header;
+    if (sizeof(image_header) != input_image->Read(reinterpret_cast<char*>(&image_header),
+                                                  sizeof(image_header), 0)) {
+      LOG(ERROR) << "Unable to read image header from image file " << input_image->GetPath();
+    }
+
+    /*bool is_image_pic = */IsImagePic(image_header, input_image->GetPath());
+    // Nothing special to do right now since the image always needs to get patched.
+    // Perhaps in some far-off future we may have images with relative addresses that are true-PIC.
+
+    // Create the map where we will write the image patches to.
+    std::string error_msg;
+    std::unique_ptr<MemMap> image(MemMap::MapFile(image_len,
+                                                  PROT_READ | PROT_WRITE,
+                                                  MAP_PRIVATE,
+                                                  input_image->Fd(),
+                                                  0,
+                                                  /*low_4gb*/false,
+                                                  input_image->GetPath().c_str(),
+                                                  &error_msg));
+    if (image.get() == nullptr) {
+      LOG(ERROR) << "Unable to map image file " << input_image->GetPath() << " : " << error_msg;
+      return false;
+    }
+    space_to_file_map.emplace(space, std::move(input_image));
+    space_to_memmap_map.emplace(space, std::move(image));
   }
 
-  PatchOat p(isa, elf.release(), image.release(), ispc->GetLiveBitmap(), ispc->GetMemMap(),
-             delta, timings);
-  t.NewTiming("Patching files");
-  if (!skip_patching_oat && !p.PatchElf()) {
-    LOG(ERROR) << "Failed to patch oat file " << input_oat->GetPath();
-    return false;
-  }
-  if (!p.PatchImage()) {
-    LOG(ERROR) << "Failed to patch image file " << input_image->GetPath();
-    return false;
+  for (size_t i = 0; i < spaces.size(); ++i) {
+    gc::space::ImageSpace* space = spaces[i];
+    std::string input_image_filename = space->GetImageFilename();
+    std::string input_oat_filename =
+        ImageHeader::GetOatLocationFromImageLocation(input_image_filename);
+    std::unique_ptr<File> input_oat_file(OS::OpenFileForReading(input_oat_filename.c_str()));
+    if (input_oat_file.get() == nullptr) {
+      LOG(ERROR) << "Unable to open input oat file at " << input_oat_filename;
+      return false;
+    }
+    std::string error_msg;
+    std::unique_ptr<ElfFile> elf(ElfFile::Open(input_oat_file.get(),
+                                               PROT_READ | PROT_WRITE, MAP_PRIVATE, &error_msg));
+    if (elf.get() == nullptr) {
+      LOG(ERROR) << "Unable to open oat file " << input_oat_file->GetPath() << " : " << error_msg;
+      return false;
+    }
+
+    bool skip_patching_oat = false;
+    MaybePic is_oat_pic = IsOatPic(elf.get());
+    if (is_oat_pic >= ERROR_FIRST) {
+      // Error logged by IsOatPic
+      return false;
+    } else if (is_oat_pic == PIC) {
+      // Do not need to do ELF-file patching. Create a symlink and skip the ELF patching.
+
+      std::string converted_image_filename = space->GetImageLocation();
+      std::replace(converted_image_filename.begin() + 1, converted_image_filename.end(), '/', '@');
+      std::string output_image_filename = output_directory +
+                                          (StartsWith(converted_image_filename, "/") ? "" : "/") +
+                                          converted_image_filename;
+      std::string output_oat_filename =
+          ImageHeader::GetOatLocationFromImageLocation(output_image_filename);
+
+      if (!ReplaceOatFileWithSymlink(input_oat_file->GetPath(),
+                                     output_oat_filename,
+                                     false,
+                                     true)) {
+        // Errors already logged by above call.
+        return false;
+      }
+      // Don't patch the OAT, since we just symlinked it. Image still needs patching.
+      skip_patching_oat = true;
+    } else {
+      CHECK(is_oat_pic == NOT_PIC);
+    }
+
+    PatchOat& p = space_to_patchoat_map.emplace(space,
+                                                PatchOat(
+                                                    isa,
+                                                    elf.release(),
+                                                    space_to_memmap_map.find(space)->second.get(),
+                                                    space->GetLiveBitmap(),
+                                                    space->GetMemMap(),
+                                                    delta,
+                                                    &space_to_memmap_map,
+                                                    timings)).first->second;
+
+    t.NewTiming("Patching files");
+    if (!skip_patching_oat && !p.PatchElf()) {
+      LOG(ERROR) << "Failed to patch oat file " << input_oat_file->GetPath();
+      return false;
+    }
+    if (!p.PatchImage(i == 0)) {
+      LOG(ERROR) << "Failed to patch image file " << input_image_filename;
+      return false;
+    }
+
+    space_to_skip_patching_map.emplace(space, skip_patching_oat);
   }
 
-  t.NewTiming("Writing files");
-  if (!skip_patching_oat && !p.WriteElf(output_oat)) {
-    LOG(ERROR) << "Failed to write oat file " << input_oat->GetPath();
-    return false;
-  }
-  if (!p.WriteImage(output_image)) {
-    LOG(ERROR) << "Failed to write image file " << input_image->GetPath();
-    return false;
+  for (size_t i = 0; i < spaces.size(); ++i) {
+    gc::space::ImageSpace* space = spaces[i];
+    std::string input_image_filename = space->GetImageFilename();
+
+    t.NewTiming("Writing files");
+    std::string converted_image_filename = space->GetImageLocation();
+    std::replace(converted_image_filename.begin() + 1, converted_image_filename.end(), '/', '@');
+    std::string output_image_filename = output_directory +
+                                        (StartsWith(converted_image_filename, "/") ? "" : "/") +
+                                        converted_image_filename;
+    bool new_oat_out;
+    std::unique_ptr<File>
+        output_image_file(CreateOrOpen(output_image_filename.c_str(), &new_oat_out));
+    if (output_image_file.get() == nullptr) {
+      LOG(ERROR) << "Failed to open output image file at " << output_image_filename;
+      return false;
+    }
+
+    PatchOat& p = space_to_patchoat_map.find(space)->second;
+
+    bool success = p.WriteImage(output_image_file.get());
+    success = FinishFile(output_image_file.get(), success);
+    if (!success) {
+      return false;
+    }
+
+    bool skip_patching_oat = space_to_skip_patching_map.find(space)->second;
+    if (!skip_patching_oat) {
+      std::string output_oat_filename =
+          ImageHeader::GetOatLocationFromImageLocation(output_image_filename);
+      std::unique_ptr<File>
+          output_oat_file(CreateOrOpen(output_oat_filename.c_str(), &new_oat_out));
+      if (output_oat_file.get() == nullptr) {
+        LOG(ERROR) << "Failed to open output oat file at " << output_oat_filename;
+        return false;
+      }
+      success = p.WriteElf(output_oat_file.get());
+      success = FinishFile(output_oat_file.get(), success);
+      if (!success) {
+        return false;
+      }
+    }
   }
   return true;
 }
@@ -428,7 +441,7 @@
   }
 
   // Delete the original file, since we won't need it.
-  TEMP_FAILURE_RETRY(unlink(output_oat_filename.c_str()));
+  unlink(output_oat_filename.c_str());
 
   // Create a symlink from the old oat to the new oat
   if (symlink(input_oat_filename.c_str(), output_oat_filename.c_str()) < 0) {
@@ -460,8 +473,7 @@
 
 void PatchOat::PatchArtFields(const ImageHeader* image_header) {
   PatchOatArtFieldVisitor visitor(this);
-  const auto& section = image_header->GetImageSection(ImageHeader::kSectionArtFields);
-  section.VisitPackedArtFields(&visitor, heap_->Begin());
+  image_header->VisitPackedArtFields(&visitor, heap_->Begin());
 }
 
 class PatchOatArtMethodVisitor : public ArtMethodVisitor {
@@ -478,10 +490,31 @@
 };
 
 void PatchOat::PatchArtMethods(const ImageHeader* image_header) {
-  const auto& section = image_header->GetMethodsSection();
-  const size_t pointer_size = InstructionSetPointerSize(isa_);
+  const PointerSize pointer_size = InstructionSetPointerSize(isa_);
   PatchOatArtMethodVisitor visitor(this);
-  section.VisitPackedArtMethods(&visitor, heap_->Begin(), pointer_size);
+  image_header->VisitPackedArtMethods(&visitor, heap_->Begin(), pointer_size);
+}
+
+void PatchOat::PatchImTables(const ImageHeader* image_header) {
+  const PointerSize pointer_size = InstructionSetPointerSize(isa_);
+  // We can safely walk target image since the conflict tables are independent.
+  image_header->VisitPackedImTables(
+      [this](ArtMethod* method) {
+        return RelocatedAddressOfPointer(method);
+      },
+      image_->Begin(),
+      pointer_size);
+}
+
+void PatchOat::PatchImtConflictTables(const ImageHeader* image_header) {
+  const PointerSize pointer_size = InstructionSetPointerSize(isa_);
+  // We can safely walk target image since the conflict tables are independent.
+  image_header->VisitPackedImtConflictTables(
+      [this](ArtMethod* method) {
+        return RelocatedAddressOfPointer(method);
+      },
+      image_->Begin(),
+      pointer_size);
 }
 
 class FixupRootVisitor : public RootVisitor {
@@ -514,32 +547,59 @@
   // Note that we require that ReadFromMemory does not make an internal copy of the elements.
   // This also relies on visit roots not doing any verification which could fail after we update
   // the roots to be the image addresses.
-  temp_table.ReadFromMemory(image_->Begin() + section.Offset());
+  temp_table.AddTableFromMemory(image_->Begin() + section.Offset());
   FixupRootVisitor visitor(this);
   temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots);
 }
 
+void PatchOat::PatchClassTable(const ImageHeader* image_header) {
+  const auto& section = image_header->GetImageSection(ImageHeader::kSectionClassTable);
+  if (section.Size() == 0) {
+    return;
+  }
+  // Note that we require that ReadFromMemory does not make an internal copy of the elements.
+  // This also relies on visit roots not doing any verification which could fail after we update
+  // the roots to be the image addresses.
+  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+  ClassTable temp_table;
+  temp_table.ReadFromMemory(image_->Begin() + section.Offset());
+  FixupRootVisitor visitor(this);
+  BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&visitor, RootInfo(kRootUnknown));
+  temp_table.VisitRoots(buffered_visitor);
+}
+
+
+class RelocatedPointerVisitor {
+ public:
+  explicit RelocatedPointerVisitor(PatchOat* patch_oat) : patch_oat_(patch_oat) {}
+
+  template <typename T>
+  T* operator()(T* ptr) const {
+    return patch_oat_->RelocatedAddressOfPointer(ptr);
+  }
+
+ private:
+  PatchOat* const patch_oat_;
+};
+
 void PatchOat::PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots) {
   auto* dex_caches = down_cast<mirror::ObjectArray<mirror::DexCache>*>(
       img_roots->Get(ImageHeader::kDexCaches));
+  const PointerSize pointer_size = InstructionSetPointerSize(isa_);
   for (size_t i = 0, count = dex_caches->GetLength(); i < count; ++i) {
     auto* orig_dex_cache = dex_caches->GetWithoutChecks(i);
     auto* copy_dex_cache = RelocatedCopyOf(orig_dex_cache);
-    const size_t pointer_size = InstructionSetPointerSize(isa_);
     // Though the DexCache array fields are usually treated as native pointers, we set the full
     // 64-bit values here, clearing the top 32 bits for 32-bit targets. The zero-extension is
     // done by casting to the unsigned type uintptr_t before casting to int64_t, i.e.
     //     static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + offset))).
-    GcRoot<mirror::String>* orig_strings = orig_dex_cache->GetStrings();
-    GcRoot<mirror::String>* relocated_strings = RelocatedAddressOfPointer(orig_strings);
+    mirror::StringDexCacheType* orig_strings = orig_dex_cache->GetStrings();
+    mirror::StringDexCacheType* relocated_strings = RelocatedAddressOfPointer(orig_strings);
     copy_dex_cache->SetField64<false>(
         mirror::DexCache::StringsOffset(),
         static_cast<int64_t>(reinterpret_cast<uintptr_t>(relocated_strings)));
     if (orig_strings != nullptr) {
-      GcRoot<mirror::String>* copy_strings = RelocatedCopyOf(orig_strings);
-      for (size_t j = 0, num = orig_dex_cache->NumStrings(); j != num; ++j) {
-        copy_strings[j] = GcRoot<mirror::String>(RelocatedAddressOfPointer(orig_strings[j].Read()));
-      }
+      orig_dex_cache->FixupStrings(RelocatedCopyOf(orig_strings), RelocatedPointerVisitor(this));
     }
     GcRoot<mirror::Class>* orig_types = orig_dex_cache->GetResolvedTypes();
     GcRoot<mirror::Class>* relocated_types = RelocatedAddressOfPointer(orig_types);
@@ -547,10 +607,8 @@
         mirror::DexCache::ResolvedTypesOffset(),
         static_cast<int64_t>(reinterpret_cast<uintptr_t>(relocated_types)));
     if (orig_types != nullptr) {
-      GcRoot<mirror::Class>* copy_types = RelocatedCopyOf(orig_types);
-      for (size_t j = 0, num = orig_dex_cache->NumResolvedTypes(); j != num; ++j) {
-        copy_types[j] = GcRoot<mirror::Class>(RelocatedAddressOfPointer(orig_types[j].Read()));
-      }
+      orig_dex_cache->FixupResolvedTypes(RelocatedCopyOf(orig_types),
+                                         RelocatedPointerVisitor(this));
     }
     ArtMethod** orig_methods = orig_dex_cache->GetResolvedMethods();
     ArtMethod** relocated_methods = RelocatedAddressOfPointer(orig_methods);
@@ -581,26 +639,7 @@
   }
 }
 
-void PatchOat::FixupNativePointerArray(mirror::PointerArray* object) {
-  if (object->IsIntArray()) {
-    mirror::IntArray* arr = object->AsIntArray();
-    mirror::IntArray* copy_arr = down_cast<mirror::IntArray*>(RelocatedCopyOf(arr));
-    for (size_t j = 0, count2 = arr->GetLength(); j < count2; ++j) {
-      copy_arr->SetWithoutChecks<false>(
-          j, RelocatedAddressOfIntPointer(arr->GetWithoutChecks(j)));
-    }
-  } else {
-    CHECK(object->IsLongArray());
-    mirror::LongArray* arr = object->AsLongArray();
-    mirror::LongArray* copy_arr = down_cast<mirror::LongArray*>(RelocatedCopyOf(arr));
-    for (size_t j = 0, count2 = arr->GetLength(); j < count2; ++j) {
-      copy_arr->SetWithoutChecks<false>(
-          j, RelocatedAddressOfIntPointer(arr->GetWithoutChecks(j)));
-    }
-  }
-}
-
-bool PatchOat::PatchImage() {
+bool PatchOat::PatchImage(bool primary_image) {
   ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
   CHECK_GT(image_->Size(), sizeof(ImageHeader));
   // These are the roots from the original file.
@@ -609,13 +648,19 @@
 
   PatchArtFields(image_header);
   PatchArtMethods(image_header);
+  PatchImTables(image_header);
+  PatchImtConflictTables(image_header);
   PatchInternedStrings(image_header);
+  PatchClassTable(image_header);
   // Patch dex file int/long arrays which point to ArtFields.
   PatchDexFileArrays(img_roots);
 
-  VisitObject(img_roots);
+  if (primary_image) {
+    VisitObject(img_roots);
+  }
+
   if (!image_header->IsValid()) {
-    LOG(ERROR) << "reloction renders image header invalid";
+    LOG(ERROR) << "relocation renders image header invalid";
     return false;
   }
 
@@ -628,17 +673,10 @@
   return true;
 }
 
-bool PatchOat::InHeap(mirror::Object* o) {
-  uintptr_t begin = reinterpret_cast<uintptr_t>(heap_->Begin());
-  uintptr_t end = reinterpret_cast<uintptr_t>(heap_->End());
-  uintptr_t obj = reinterpret_cast<uintptr_t>(o);
-  return o == nullptr || (begin <= obj && obj < end);
-}
 
 void PatchOat::PatchVisitor::operator() (mirror::Object* obj, MemberOffset off,
                                          bool is_static_unused ATTRIBUTE_UNUSED) const {
   mirror::Object* referent = obj->GetFieldObject<mirror::Object, kVerifyNone>(off);
-  DCHECK(patcher_->InHeap(referent)) << "Referent is not in the heap.";
   mirror::Object* moved_object = patcher_->RelocatedAddressOfPointer(referent);
   copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(off, moved_object);
 }
@@ -647,7 +685,8 @@
                                          mirror::Reference* ref) const {
   MemberOffset off = mirror::Reference::ReferentOffset();
   mirror::Object* referent = ref->GetReferent();
-  DCHECK(patcher_->InHeap(referent)) << "Referent is not in the heap.";
+  DCHECK(referent == nullptr ||
+         Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(referent)) << referent;
   mirror::Object* moved_object = patcher_->RelocatedAddressOfPointer(referent);
   copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(off, moved_object);
 }
@@ -667,17 +706,14 @@
   PatchOat::PatchVisitor visitor(this, copy);
   object->VisitReferences<kVerifyNone>(visitor, visitor);
   if (object->IsClass<kVerifyNone>()) {
-    auto* klass = object->AsClass();
-    auto* copy_klass = down_cast<mirror::Class*>(copy);
-    copy_klass->SetDexCacheStrings(RelocatedAddressOfPointer(klass->GetDexCacheStrings()));
-    copy_klass->SetSFieldsPtrUnchecked(RelocatedAddressOfPointer(klass->GetSFieldsPtr()));
-    copy_klass->SetIFieldsPtrUnchecked(RelocatedAddressOfPointer(klass->GetIFieldsPtr()));
-    copy_klass->SetDirectMethodsPtrUnchecked(
-        RelocatedAddressOfPointer(klass->GetDirectMethodsPtr()));
-    copy_klass->SetVirtualMethodsPtr(RelocatedAddressOfPointer(klass->GetVirtualMethodsPtr()));
+    const PointerSize pointer_size = InstructionSetPointerSize(isa_);
+    mirror::Class* klass = object->AsClass();
+    mirror::Class* copy_klass = down_cast<mirror::Class*>(copy);
+    RelocatedPointerVisitor native_visitor(this);
+    klass->FixupNativePointers(copy_klass, pointer_size, native_visitor);
     auto* vtable = klass->GetVTable();
     if (vtable != nullptr) {
-      FixupNativePointerArray(vtable);
+      vtable->Fixup(RelocatedCopyOfFollowImages(vtable), pointer_size, native_visitor);
     }
     auto* iftable = klass->GetIfTable();
     if (iftable != nullptr) {
@@ -685,24 +721,14 @@
         if (iftable->GetMethodArrayCount(i) > 0) {
           auto* method_array = iftable->GetMethodArray(i);
           CHECK(method_array != nullptr);
-          FixupNativePointerArray(method_array);
+          method_array->Fixup(RelocatedCopyOfFollowImages(method_array),
+                              pointer_size,
+                              native_visitor);
         }
       }
     }
-    if (klass->ShouldHaveEmbeddedImtAndVTable()) {
-      const size_t pointer_size = InstructionSetPointerSize(isa_);
-      for (int32_t i = 0; i < klass->GetEmbeddedVTableLength(); ++i) {
-        copy_klass->SetEmbeddedVTableEntryUnchecked(i, RelocatedAddressOfPointer(
-            klass->GetEmbeddedVTableEntry(i, pointer_size)), pointer_size);
-      }
-      for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-        copy_klass->SetEmbeddedImTableEntry(i, RelocatedAddressOfPointer(
-            klass->GetEmbeddedImTableEntry(i, pointer_size)), pointer_size);
-      }
-    }
-  }
-  if (object->GetClass() == mirror::Method::StaticClass() ||
-      object->GetClass() == mirror::Constructor::StaticClass()) {
+  } else if (object->GetClass() == mirror::Method::StaticClass() ||
+             object->GetClass() == mirror::Constructor::StaticClass()) {
     // Need to go update the ArtMethod.
     auto* dest = down_cast<mirror::AbstractMethod*>(copy);
     auto* src = down_cast<mirror::AbstractMethod*>(object);
@@ -711,7 +737,7 @@
 }
 
 void PatchOat::FixupMethod(ArtMethod* object, ArtMethod* copy) {
-  const size_t pointer_size = InstructionSetPointerSize(isa_);
+  const PointerSize pointer_size = InstructionSetPointerSize(isa_);
   copy->CopyFrom(object, pointer_size);
   // Just update the entry points if it looks like we should.
   // TODO: sanity check all the pointers' values
@@ -722,8 +748,9 @@
       RelocatedAddressOfPointer(object->GetDexCacheResolvedTypes(pointer_size)), pointer_size);
   copy->SetEntryPointFromQuickCompiledCodePtrSize(RelocatedAddressOfPointer(
       object->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size)), pointer_size);
-  copy->SetEntryPointFromJniPtrSize(RelocatedAddressOfPointer(
-      object->GetEntryPointFromJniPtrSize(pointer_size)), pointer_size);
+  // No special handling for IMT conflict table since all pointers are moved by the same offset.
+  copy->SetDataPtrSize(RelocatedAddressOfPointer(
+      object->GetDataPtrSize(pointer_size)), pointer_size);
 }
 
 bool PatchOat::Patch(File* input_oat, off_t delta, File* output_oat, TimingLogger* timings,
@@ -910,21 +937,9 @@
   UsageError("  --output-image-file=<file.art>: Specifies the exact file to write the patched");
   UsageError("      image file to.");
   UsageError("");
-  UsageError("  --output-image-fd=<file-descriptor>: Specifies the file-descriptor to write the");
-  UsageError("      the patched image file to.");
-  UsageError("");
-  UsageError("  --orig-base-offset=<original-base-offset>: Specify the base offset the input file");
-  UsageError("      was compiled with. This is needed if one is specifying a --base-offset");
-  UsageError("");
-  UsageError("  --base-offset=<new-base-offset>: Specify the base offset we will repatch the");
-  UsageError("      given files to use. This requires that --orig-base-offset is also given.");
-  UsageError("");
   UsageError("  --base-offset-delta=<delta>: Specify the amount to change the old base-offset by.");
   UsageError("      This value may be negative.");
   UsageError("");
-  UsageError("  --patched-image-file=<file.art>: Relocate the oat file to be the same as the");
-  UsageError("      given image file.");
-  UsageError("");
   UsageError("  --patched-image-location=<file.art>: Relocate the oat file to be the same as the");
   UsageError("      image at the given location. If used one must also specify the");
   UsageError("      --instruction-set flag. It will search for this image in the same way that");
@@ -970,36 +985,246 @@
   return true;
 }
 
-static File* CreateOrOpen(const char* name, bool* created) {
-  if (OS::FileExists(name)) {
-    *created = false;
-    return OS::OpenFileReadWrite(name);
-  } else {
-    *created = true;
-    std::unique_ptr<File> f(OS::CreateEmptyFile(name));
-    if (f.get() != nullptr) {
-      if (fchmod(f->Fd(), 0644) != 0) {
-        PLOG(ERROR) << "Unable to make " << name << " world readable";
-        TEMP_FAILURE_RETRY(unlink(name));
-        return nullptr;
-      }
-    }
-    return f.release();
+static int patchoat_image(TimingLogger& timings,
+                          InstructionSet isa,
+                          const std::string& input_image_location,
+                          const std::string& output_image_filename,
+                          off_t base_delta,
+                          bool base_delta_set,
+                          bool debug) {
+  CHECK(!input_image_location.empty());
+  if (output_image_filename.empty()) {
+    Usage("Image patching requires --output-image-file");
   }
+
+  if (!base_delta_set) {
+    Usage("Must supply a desired new offset or delta.");
+  }
+
+  if (!IsAligned<kPageSize>(base_delta)) {
+    Usage("Base offset/delta must be aligned to a pagesize (0x%08x) boundary.", kPageSize);
+  }
+
+  if (debug) {
+    LOG(INFO) << "moving offset by " << base_delta
+        << " (0x" << std::hex << base_delta << ") bytes or "
+        << std::dec << (base_delta/kPageSize) << " pages.";
+  }
+
+  TimingLogger::ScopedTiming pt("patch image and oat", &timings);
+
+  std::string output_directory =
+      output_image_filename.substr(0, output_image_filename.find_last_of("/"));
+  bool ret = PatchOat::Patch(input_image_location, base_delta, output_directory, isa, &timings);
+
+  if (kIsDebugBuild) {
+    LOG(INFO) << "Exiting with return ... " << ret;
+  }
+  return ret ? EXIT_SUCCESS : EXIT_FAILURE;
 }
 
-// Either try to close the file (close=true), or erase it.
-static bool FinishFile(File* file, bool close) {
-  if (close) {
-    if (file->FlushCloseOrErase() != 0) {
-      PLOG(ERROR) << "Failed to flush and close file.";
+static int patchoat_oat(TimingLogger& timings,
+                        InstructionSet isa,
+                        const std::string& patched_image_location,
+                        off_t base_delta,
+                        bool base_delta_set,
+                        int input_oat_fd,
+                        const std::string& input_oat_location,
+                        std::string input_oat_filename,
+                        bool have_input_oat,
+                        int output_oat_fd,
+                        std::string output_oat_filename,
+                        bool have_output_oat,
+                        bool lock_output,
+                        bool debug) {
+  {
+    // Only 1 of these may be set.
+    uint32_t cnt = 0;
+    cnt += (base_delta_set) ? 1 : 0;
+    cnt += (!patched_image_location.empty()) ? 1 : 0;
+    if (cnt > 1) {
+      Usage("Only one of --base-offset-delta or --patched-image-location may be used.");
+    } else if (cnt == 0) {
+      Usage("Must specify --base-offset-delta or --patched-image-location.");
+    }
+  }
+
+  if (!have_input_oat || !have_output_oat) {
+    Usage("Both input and output oat must be supplied to patch an app odex.");
+  }
+
+  if (!input_oat_location.empty()) {
+    if (!LocationToFilename(input_oat_location, isa, &input_oat_filename)) {
+      Usage("Unable to find filename for input oat location %s", input_oat_location.c_str());
+    }
+    if (debug) {
+      LOG(INFO) << "Using input-oat-file " << input_oat_filename;
+    }
+  }
+
+  bool match_delta = false;
+  if (!patched_image_location.empty()) {
+    std::string system_filename;
+    bool has_system = false;
+    std::string cache_filename;
+    bool has_cache = false;
+    bool has_android_data_unused = false;
+    bool is_global_cache = false;
+    if (!gc::space::ImageSpace::FindImageFilename(patched_image_location.c_str(), isa,
+                                                  &system_filename, &has_system, &cache_filename,
+                                                  &has_android_data_unused, &has_cache,
+                                                  &is_global_cache)) {
+      Usage("Unable to determine image file for location %s", patched_image_location.c_str());
+    }
+    std::string patched_image_filename;
+    if (has_cache) {
+      patched_image_filename = cache_filename;
+    } else if (has_system) {
+      LOG(WARNING) << "Only image file found was in /system for image location "
+          << patched_image_location;
+      patched_image_filename = system_filename;
+    } else {
+      Usage("Unable to determine image file for location %s", patched_image_location.c_str());
+    }
+    if (debug) {
+      LOG(INFO) << "Using patched-image-file " << patched_image_filename;
+    }
+
+    base_delta_set = true;
+    match_delta = true;
+    std::string error_msg;
+    if (!ReadBaseDelta(patched_image_filename.c_str(), &base_delta, &error_msg)) {
+      Usage(error_msg.c_str(), patched_image_filename.c_str());
+    }
+  }
+
+  if (!IsAligned<kPageSize>(base_delta)) {
+    Usage("Base offset/delta must be alligned to a pagesize (0x%08x) boundary.", kPageSize);
+  }
+
+  // Do we need to cleanup output files if we fail?
+  bool new_oat_out = false;
+
+  std::unique_ptr<File> input_oat;
+  std::unique_ptr<File> output_oat;
+
+  if (input_oat_fd != -1) {
+    if (input_oat_filename.empty()) {
+      input_oat_filename = "input-oat-file";
+    }
+    input_oat.reset(new File(input_oat_fd, input_oat_filename, false));
+    if (input_oat_fd == output_oat_fd) {
+      input_oat.get()->DisableAutoClose();
+    }
+    if (input_oat == nullptr) {
+      // Unlikely, but ensure exhaustive logging in non-0 exit code case
+      LOG(ERROR) << "Failed to open input oat file by its FD" << input_oat_fd;
+      return EXIT_FAILURE;
+    }
+  } else {
+    CHECK(!input_oat_filename.empty());
+    input_oat.reset(OS::OpenFileForReading(input_oat_filename.c_str()));
+    if (input_oat == nullptr) {
+      int err = errno;
+      LOG(ERROR) << "Failed to open input oat file " << input_oat_filename
+          << ": " << strerror(err) << "(" << err << ")";
+      return EXIT_FAILURE;
+    }
+  }
+
+  std::string error_msg;
+  std::unique_ptr<ElfFile> elf(ElfFile::Open(input_oat.get(), PROT_READ, MAP_PRIVATE, &error_msg));
+  if (elf.get() == nullptr) {
+    LOG(ERROR) << "unable to open oat file " << input_oat->GetPath() << " : " << error_msg;
+    return EXIT_FAILURE;
+  }
+  if (!elf->HasSection(".text.oat_patches")) {
+    LOG(ERROR) << "missing oat patch section in input oat file " << input_oat->GetPath();
+    return EXIT_FAILURE;
+  }
+
+  if (output_oat_fd != -1) {
+    if (output_oat_filename.empty()) {
+      output_oat_filename = "output-oat-file";
+    }
+    output_oat.reset(new File(output_oat_fd, output_oat_filename, true));
+    if (output_oat == nullptr) {
+      // Unlikely, but ensure exhaustive logging in non-0 exit code case
+      LOG(ERROR) << "Failed to open output oat file by its FD" << output_oat_fd;
+    }
+  } else {
+    CHECK(!output_oat_filename.empty());
+    output_oat.reset(CreateOrOpen(output_oat_filename.c_str(), &new_oat_out));
+    if (output_oat == nullptr) {
+      int err = errno;
+      LOG(ERROR) << "Failed to open output oat file " << output_oat_filename
+          << ": " << strerror(err) << "(" << err << ")";
+    }
+  }
+
+  // TODO: get rid of this.
+  auto cleanup = [&output_oat_filename, &new_oat_out](bool success) {
+    if (!success) {
+      if (new_oat_out) {
+        CHECK(!output_oat_filename.empty());
+        unlink(output_oat_filename.c_str());
+      }
+    }
+
+    if (kIsDebugBuild) {
+      LOG(INFO) << "Cleaning up.. success? " << success;
+    }
+  };
+
+  if (output_oat.get() == nullptr) {
+    cleanup(false);
+    return EXIT_FAILURE;
+  }
+
+  if (match_delta) {
+    // Figure out what the current delta is so we can match it to the desired delta.
+    off_t current_delta = 0;
+    if (!ReadOatPatchDelta(elf.get(), &current_delta, &error_msg)) {
+      LOG(ERROR) << "Unable to get current delta: " << error_msg;
+      cleanup(false);
+      return EXIT_FAILURE;
+    }
+    // Before this line base_delta is the desired final delta. We need it to be the actual amount to
+    // change everything by. We subtract the current delta from it to make it this.
+    base_delta -= current_delta;
+    if (!IsAligned<kPageSize>(base_delta)) {
+      LOG(ERROR) << "Given image file was relocated by an illegal delta";
+      cleanup(false);
       return false;
     }
-    return true;
-  } else {
-    file->Erase();
-    return false;
   }
+
+  if (debug) {
+    LOG(INFO) << "moving offset by " << base_delta
+        << " (0x" << std::hex << base_delta << ") bytes or "
+        << std::dec << (base_delta/kPageSize) << " pages.";
+  }
+
+  ScopedFlock output_oat_lock;
+  if (lock_output) {
+    if (!output_oat_lock.Init(output_oat.get(), &error_msg)) {
+      LOG(ERROR) << "Unable to lock output oat " << output_oat->GetPath() << ": " << error_msg;
+      cleanup(false);
+      return EXIT_FAILURE;
+    }
+  }
+
+  TimingLogger::ScopedTiming pt("patch oat", &timings);
+  bool ret = PatchOat::Patch(input_oat.get(), base_delta, output_oat.get(), &timings,
+                             output_oat_fd >= 0,  // was it opened from FD?
+                             new_oat_out);
+  ret = FinishFile(output_oat.get(), ret);
+
+  if (kIsDebugBuild) {
+    LOG(INFO) << "Exiting with return ... " << ret;
+  }
+  cleanup(ret);
+  return ret ? EXIT_SUCCESS : EXIT_FAILURE;
 }
 
 static int patchoat(int argc, char **argv) {
@@ -1034,15 +1259,8 @@
   int output_oat_fd = -1;
   bool have_output_oat = false;
   std::string output_image_filename;
-  int output_image_fd = -1;
-  bool have_output_image = false;
-  uintptr_t base_offset = 0;
-  bool base_offset_set = false;
-  uintptr_t orig_base_offset = 0;
-  bool orig_base_offset_set = false;
   off_t base_delta = 0;
   bool base_delta_set = false;
-  bool match_delta = false;
   std::string patched_image_filename;
   std::string patched_image_location;
   bool dump_timings = kIsDebugBuild;
@@ -1106,36 +1324,7 @@
         Usage("--output-oat-fd pass a negative value %d", output_oat_fd);
       }
     } else if (option.starts_with("--output-image-file=")) {
-      if (have_output_image) {
-        Usage("Only one of --output-image-file, and --output-image-fd may be used.");
-      }
-      have_output_image = true;
       output_image_filename = option.substr(strlen("--output-image-file=")).data();
-    } else if (option.starts_with("--output-image-fd=")) {
-      if (have_output_image) {
-        Usage("Only one of --output-image-file, and --output-image-fd may be used.");
-      }
-      have_output_image = true;
-      const char* image_fd_str = option.substr(strlen("--output-image-fd=")).data();
-      if (!ParseInt(image_fd_str, &output_image_fd)) {
-        Usage("Failed to parse --output-image-fd argument '%s' as an integer", image_fd_str);
-      }
-      if (output_image_fd < 0) {
-        Usage("--output-image-fd pass a negative value %d", output_image_fd);
-      }
-    } else if (option.starts_with("--orig-base-offset=")) {
-      const char* orig_base_offset_str = option.substr(strlen("--orig-base-offset=")).data();
-      orig_base_offset_set = true;
-      if (!ParseUint(orig_base_offset_str, &orig_base_offset)) {
-        Usage("Failed to parse --orig-base-offset argument '%s' as an uintptr_t",
-              orig_base_offset_str);
-      }
-    } else if (option.starts_with("--base-offset=")) {
-      const char* base_offset_str = option.substr(strlen("--base-offset=")).data();
-      base_offset_set = true;
-      if (!ParseUint(base_offset_str, &base_offset)) {
-        Usage("Failed to parse --base-offset argument '%s' as an uintptr_t", base_offset_str);
-      }
     } else if (option.starts_with("--base-offset-delta=")) {
       const char* base_delta_str = option.substr(strlen("--base-offset-delta=")).data();
       base_delta_set = true;
@@ -1144,8 +1333,6 @@
       }
     } else if (option.starts_with("--patched-image-location=")) {
       patched_image_location = option.substr(strlen("--patched-image-location=")).data();
-    } else if (option.starts_with("--patched-image-file=")) {
-      patched_image_filename = option.substr(strlen("--patched-image-file=")).data();
     } else if (option == "--lock-output") {
       lock_output = true;
     } else if (option == "--no-lock-output") {
@@ -1159,284 +1346,43 @@
     }
   }
 
-  {
-    // Only 1 of these may be set.
-    uint32_t cnt = 0;
-    cnt += (base_delta_set) ? 1 : 0;
-    cnt += (base_offset_set && orig_base_offset_set) ? 1 : 0;
-    cnt += (!patched_image_filename.empty()) ? 1 : 0;
-    cnt += (!patched_image_location.empty()) ? 1 : 0;
-    if (cnt > 1) {
-      Usage("Only one of --base-offset/--orig-base-offset, --base-offset-delta, "
-            "--patched-image-filename or --patched-image-location may be used.");
-    } else if (cnt == 0) {
-      Usage("Must specify --base-offset-delta, --base-offset and --orig-base-offset, "
-            "--patched-image-location or --patched-image-file");
-    }
+  // The instruction set is mandatory. This simplifies things...
+  if (!isa_set) {
+    Usage("Instruction set must be set.");
   }
 
-  if (have_input_oat != have_output_oat) {
-    Usage("Either both input and output oat must be supplied or niether must be.");
-  }
-
-  if ((!input_image_location.empty()) != have_output_image) {
-    Usage("Either both input and output image must be supplied or niether must be.");
-  }
-
-  // We know we have both the input and output so rename for clarity.
-  bool have_image_files = have_output_image;
-  bool have_oat_files = have_output_oat;
-
-  if (!have_oat_files && !have_image_files) {
-    Usage("Must be patching either an oat or an image file or both.");
-  }
-
-  if (!have_oat_files && !isa_set) {
-    Usage("Must include ISA if patching an image file without an oat file.");
-  }
-
-  if (!input_oat_location.empty()) {
-    if (!isa_set) {
-      Usage("specifying a location requires specifying an instruction set");
-    }
-    if (!LocationToFilename(input_oat_location, isa, &input_oat_filename)) {
-      Usage("Unable to find filename for input oat location %s", input_oat_location.c_str());
-    }
-    if (debug) {
-      LOG(INFO) << "Using input-oat-file " << input_oat_filename;
-    }
-  }
-  if (!patched_image_location.empty()) {
-    if (!isa_set) {
-      Usage("specifying a location requires specifying an instruction set");
-    }
-    std::string system_filename;
-    bool has_system = false;
-    std::string cache_filename;
-    bool has_cache = false;
-    bool has_android_data_unused = false;
-    bool is_global_cache = false;
-    if (!gc::space::ImageSpace::FindImageFilename(patched_image_location.c_str(), isa,
-                                                  &system_filename, &has_system, &cache_filename,
-                                                  &has_android_data_unused, &has_cache,
-                                                  &is_global_cache)) {
-      Usage("Unable to determine image file for location %s", patched_image_location.c_str());
-    }
-    if (has_cache) {
-      patched_image_filename = cache_filename;
-    } else if (has_system) {
-      LOG(WARNING) << "Only image file found was in /system for image location "
-                   << patched_image_location;
-      patched_image_filename = system_filename;
-    } else {
-      Usage("Unable to determine image file for location %s", patched_image_location.c_str());
-    }
-    if (debug) {
-      LOG(INFO) << "Using patched-image-file " << patched_image_filename;
-    }
-  }
-
-  if (!base_delta_set) {
-    if (orig_base_offset_set && base_offset_set) {
-      base_delta_set = true;
-      base_delta = base_offset - orig_base_offset;
-    } else if (!patched_image_filename.empty()) {
-      if (have_image_files) {
-        Usage("--patched-image-location should not be used when patching other images");
-      }
-      base_delta_set = true;
-      match_delta = true;
-      std::string error_msg;
-      if (!ReadBaseDelta(patched_image_filename.c_str(), &base_delta, &error_msg)) {
-        Usage(error_msg.c_str(), patched_image_filename.c_str());
-      }
-    } else {
-      if (base_offset_set) {
-        Usage("Unable to determine original base offset.");
-      } else {
-        Usage("Must supply a desired new offset or delta.");
-      }
-    }
-  }
-
-  if (!IsAligned<kPageSize>(base_delta)) {
-    Usage("Base offset/delta must be alligned to a pagesize (0x%08x) boundary.", kPageSize);
-  }
-
-  // Do we need to cleanup output files if we fail?
-  bool new_image_out = false;
-  bool new_oat_out = false;
-
-  std::unique_ptr<File> input_oat;
-  std::unique_ptr<File> output_oat;
-  std::unique_ptr<File> output_image;
-
-  if (have_image_files) {
-    CHECK(!input_image_location.empty());
-
-    if (output_image_fd != -1) {
-      if (output_image_filename.empty()) {
-        output_image_filename = "output-image-file";
-      }
-      output_image.reset(new File(output_image_fd, output_image_filename, true));
-    } else {
-      CHECK(!output_image_filename.empty());
-      output_image.reset(CreateOrOpen(output_image_filename.c_str(), &new_image_out));
-    }
+  int ret;
+  if (!input_image_location.empty()) {
+    ret = patchoat_image(timings,
+                         isa,
+                         input_image_location,
+                         output_image_filename,
+                         base_delta,
+                         base_delta_set,
+                         debug);
   } else {
-    CHECK(output_image_filename.empty() && output_image_fd == -1 && input_image_location.empty());
+    ret = patchoat_oat(timings,
+                       isa,
+                       patched_image_location,
+                       base_delta,
+                       base_delta_set,
+                       input_oat_fd,
+                       input_oat_location,
+                       input_oat_filename,
+                       have_input_oat,
+                       output_oat_fd,
+                       output_oat_filename,
+                       have_output_oat,
+                       lock_output,
+                       debug);
   }
 
-  if (have_oat_files) {
-    if (input_oat_fd != -1) {
-      if (input_oat_filename.empty()) {
-        input_oat_filename = "input-oat-file";
-      }
-      input_oat.reset(new File(input_oat_fd, input_oat_filename, false));
-      if (input_oat_fd == output_oat_fd) {
-        input_oat.get()->DisableAutoClose();
-      }
-      if (input_oat == nullptr) {
-        // Unlikely, but ensure exhaustive logging in non-0 exit code case
-        LOG(ERROR) << "Failed to open input oat file by its FD" << input_oat_fd;
-      }
-    } else {
-      CHECK(!input_oat_filename.empty());
-      input_oat.reset(OS::OpenFileForReading(input_oat_filename.c_str()));
-      if (input_oat == nullptr) {
-        int err = errno;
-        LOG(ERROR) << "Failed to open input oat file " << input_oat_filename
-                   << ": " << strerror(err) << "(" << err << ")";
-      }
-    }
-
-    if (output_oat_fd != -1) {
-      if (output_oat_filename.empty()) {
-        output_oat_filename = "output-oat-file";
-      }
-      output_oat.reset(new File(output_oat_fd, output_oat_filename, true));
-      if (output_oat == nullptr) {
-        // Unlikely, but ensure exhaustive logging in non-0 exit code case
-        LOG(ERROR) << "Failed to open output oat file by its FD" << output_oat_fd;
-      }
-    } else {
-      CHECK(!output_oat_filename.empty());
-      output_oat.reset(CreateOrOpen(output_oat_filename.c_str(), &new_oat_out));
-      if (output_oat == nullptr) {
-        int err = errno;
-        LOG(ERROR) << "Failed to open output oat file " << output_oat_filename
-                   << ": " << strerror(err) << "(" << err << ")";
-      }
-    }
+  timings.EndTiming();
+  if (dump_timings) {
+    LOG(INFO) << Dumpable<TimingLogger>(timings);
   }
 
-  // TODO: get rid of this.
-  auto cleanup = [&output_image_filename, &output_oat_filename,
-                  &new_oat_out, &new_image_out, &timings, &dump_timings](bool success) {
-    timings.EndTiming();
-    if (!success) {
-      if (new_oat_out) {
-        CHECK(!output_oat_filename.empty());
-        TEMP_FAILURE_RETRY(unlink(output_oat_filename.c_str()));
-      }
-      if (new_image_out) {
-        CHECK(!output_image_filename.empty());
-        TEMP_FAILURE_RETRY(unlink(output_image_filename.c_str()));
-      }
-    }
-    if (dump_timings) {
-      LOG(INFO) << Dumpable<TimingLogger>(timings);
-    }
-
-    if (kIsDebugBuild) {
-      LOG(INFO) << "Cleaning up.. success? " << success;
-    }
-  };
-
-  if (have_oat_files && (input_oat.get() == nullptr || output_oat.get() == nullptr)) {
-    LOG(ERROR) << "Failed to open input/output oat files";
-    cleanup(false);
-    return EXIT_FAILURE;
-  } else if (have_image_files && output_image.get() == nullptr) {
-    LOG(ERROR) << "Failed to open output image file";
-    cleanup(false);
-    return EXIT_FAILURE;
-  }
-
-  if (match_delta) {
-    CHECK(!have_image_files);  // We will not do this with images.
-    std::string error_msg;
-    // Figure out what the current delta is so we can match it to the desired delta.
-    std::unique_ptr<ElfFile> elf(ElfFile::Open(input_oat.get(), PROT_READ, MAP_PRIVATE,
-                                               &error_msg));
-    off_t current_delta = 0;
-    if (elf.get() == nullptr) {
-      LOG(ERROR) << "unable to open oat file " << input_oat->GetPath() << " : " << error_msg;
-      cleanup(false);
-      return EXIT_FAILURE;
-    } else if (!ReadOatPatchDelta(elf.get(), &current_delta, &error_msg)) {
-      LOG(ERROR) << "Unable to get current delta: " << error_msg;
-      cleanup(false);
-      return EXIT_FAILURE;
-    }
-    // Before this line base_delta is the desired final delta. We need it to be the actual amount to
-    // change everything by. We subtract the current delta from it to make it this.
-    base_delta -= current_delta;
-    if (!IsAligned<kPageSize>(base_delta)) {
-      LOG(ERROR) << "Given image file was relocated by an illegal delta";
-      cleanup(false);
-      return false;
-    }
-  }
-
-  if (debug) {
-    LOG(INFO) << "moving offset by " << base_delta
-              << " (0x" << std::hex << base_delta << ") bytes or "
-              << std::dec << (base_delta/kPageSize) << " pages.";
-  }
-
-  // TODO: is it going to be promatic to unlink a file that was flock-ed?
-  ScopedFlock output_oat_lock;
-  if (lock_output) {
-    std::string error_msg;
-    if (have_oat_files && !output_oat_lock.Init(output_oat.get(), &error_msg)) {
-      LOG(ERROR) << "Unable to lock output oat " << output_image->GetPath() << ": " << error_msg;
-      cleanup(false);
-      return EXIT_FAILURE;
-    }
-  }
-
-  bool ret;
-  if (have_image_files && have_oat_files) {
-    TimingLogger::ScopedTiming pt("patch image and oat", &timings);
-    ret = PatchOat::Patch(input_oat.get(), input_image_location, base_delta,
-                          output_oat.get(), output_image.get(), isa, &timings,
-                          output_oat_fd >= 0,  // was it opened from FD?
-                          new_oat_out);
-    // The order here doesn't matter. If the first one is successfully saved and the second one
-    // erased, ImageSpace will still detect a problem and not use the files.
-    ret = FinishFile(output_image.get(), ret);
-    ret = FinishFile(output_oat.get(), ret);
-  } else if (have_oat_files) {
-    TimingLogger::ScopedTiming pt("patch oat", &timings);
-    ret = PatchOat::Patch(input_oat.get(), base_delta, output_oat.get(), &timings,
-                          output_oat_fd >= 0,  // was it opened from FD?
-                          new_oat_out);
-    ret = FinishFile(output_oat.get(), ret);
-  } else if (have_image_files) {
-    TimingLogger::ScopedTiming pt("patch image", &timings);
-    ret = PatchOat::Patch(input_image_location, base_delta, output_image.get(), isa, &timings);
-    ret = FinishFile(output_image.get(), ret);
-  } else {
-    CHECK(false);
-    ret = true;
-  }
-
-  if (kIsDebugBuild) {
-    LOG(INFO) << "Exiting with return ... " << ret;
-  }
-  cleanup(ret);
-  return (ret) ? EXIT_SUCCESS : EXIT_FAILURE;
+  return ret;
 }
 
 }  // namespace art
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 87ecc61..64efea9d 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -18,13 +18,16 @@
 #define ART_PATCHOAT_PATCHOAT_H_
 
 #include "arch/instruction_set.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "elf_file.h"
 #include "elf_utils.h"
 #include "gc/accounting/space_bitmap.h"
+#include "gc/space/image_space.h"
 #include "gc/heap.h"
 #include "os.h"
+#include "runtime.h"
 
 namespace art {
 
@@ -51,27 +54,29 @@
                     TimingLogger* timings);
 
   // Patch both the image and the oat file
-  static bool Patch(File* oat_in, const std::string& art_location,
-                    off_t delta, File* oat_out, File* art_out, InstructionSet isa,
-                    TimingLogger* timings,
-                    bool output_oat_opened_from_fd,  // Was this using --oatput-oat-fd ?
-                    bool new_oat_out);               // Output oat was a new file created by us?
+  static bool Patch(const std::string& art_location,
+                    off_t delta,
+                    const std::string& output_directory,
+                    InstructionSet isa,
+                    TimingLogger* timings);
+
+  ~PatchOat() {}
+  PatchOat(PatchOat&&) = default;
 
  private:
   // Takes ownership only of the ElfFile. All other pointers are only borrowed.
   PatchOat(ElfFile* oat_file, off_t delta, TimingLogger* timings)
       : oat_file_(oat_file), image_(nullptr), bitmap_(nullptr), heap_(nullptr), delta_(delta),
-        isa_(kNone), timings_(timings) {}
+        isa_(kNone), space_map_(nullptr), timings_(timings) {}
   PatchOat(InstructionSet isa, MemMap* image, gc::accounting::ContinuousSpaceBitmap* bitmap,
            MemMap* heap, off_t delta, TimingLogger* timings)
       : image_(image), bitmap_(bitmap), heap_(heap),
-        delta_(delta), isa_(isa), timings_(timings) {}
+        delta_(delta), isa_(isa), space_map_(nullptr), timings_(timings) {}
   PatchOat(InstructionSet isa, ElfFile* oat_file, MemMap* image,
            gc::accounting::ContinuousSpaceBitmap* bitmap, MemMap* heap, off_t delta,
-           TimingLogger* timings)
+           std::map<gc::space::ImageSpace*, std::unique_ptr<MemMap>>* map, TimingLogger* timings)
       : oat_file_(oat_file), image_(image), bitmap_(bitmap), heap_(heap),
-        delta_(delta), isa_(isa), timings_(timings) {}
-  ~PatchOat() {}
+        delta_(delta), isa_(isa), space_map_(map), timings_(timings) {}
 
   // Was the .art image at image_path made with --compile-pic ?
   static bool IsImagePic(const ImageHeader& image_header, const std::string& image_path);
@@ -102,9 +107,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   void FixupMethod(ArtMethod* object, ArtMethod* copy)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void FixupNativePointerArray(mirror::PointerArray* object)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  bool InHeap(mirror::Object*);
 
   // Patches oat in place, modifying the oat_file given to the constructor.
   bool PatchElf();
@@ -113,11 +115,16 @@
   template <typename ElfFileImpl>
   bool PatchOatHeader(ElfFileImpl* oat_file);
 
-  bool PatchImage() SHARED_REQUIRES(Locks::mutator_lock_);
+  bool PatchImage(bool primary_image) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchArtFields(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchArtMethods(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
+  void PatchImTables(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
+  void PatchImtConflictTables(const ImageHeader* image_header)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchInternedStrings(const ImageHeader* image_header)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  void PatchClassTable(const ImageHeader* image_header)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -138,13 +145,31 @@
   }
 
   template <typename T>
+  T* RelocatedCopyOfFollowImages(T* obj) const {
+    if (obj == nullptr) {
+      return nullptr;
+    }
+    // Find ImageSpace this belongs to.
+    auto image_spaces = Runtime::Current()->GetHeap()->GetBootImageSpaces();
+    for (gc::space::ImageSpace* image_space : image_spaces) {
+      if (image_space->Contains(obj)) {
+        uintptr_t heap_off = reinterpret_cast<uintptr_t>(obj) -
+                             reinterpret_cast<uintptr_t>(image_space->GetMemMap()->Begin());
+        return reinterpret_cast<T*>(space_map_->find(image_space)->second->Begin() + heap_off);
+      }
+    }
+    LOG(FATAL) << "Did not find object in boot image space " << obj;
+    UNREACHABLE();
+  }
+
+  template <typename T>
   T* RelocatedAddressOfPointer(T* obj) const {
     if (obj == nullptr) {
       return obj;
     }
     auto ret = reinterpret_cast<uintptr_t>(obj) + delta_;
     // Trim off high bits in case negative relocation with 64 bit patchoat.
-    if (InstructionSetPointerSize(isa_) == sizeof(uint32_t)) {
+    if (Is32BitISA()) {
       ret = static_cast<uintptr_t>(static_cast<uint32_t>(ret));
     }
     return reinterpret_cast<T*>(ret);
@@ -157,12 +182,16 @@
     }
     T ret = obj + delta_;
     // Trim off high bits in case negative relocation with 64 bit patchoat.
-    if (InstructionSetPointerSize(isa_) == 4) {
+    if (Is32BitISA()) {
       ret = static_cast<T>(static_cast<uint32_t>(ret));
     }
     return ret;
   }
 
+  bool Is32BitISA() const {
+    return InstructionSetPointerSize(isa_) == PointerSize::k32;
+  }
+
   // Walks through the old image and patches the mmap'd copy of it to the new offset. It does not
   // change the heap.
   class PatchVisitor {
@@ -197,9 +226,12 @@
   // Active instruction set, used to know the entrypoint size.
   const InstructionSet isa_;
 
+  const std::map<gc::space::ImageSpace*, std::unique_ptr<MemMap>>* space_map_;
+
   TimingLogger* timings_;
 
   friend class FixupRootVisitor;
+  friend class RelocatedPointerVisitor;
   friend class PatchOatArtFieldVisitor;
   friend class PatchOatArtMethodVisitor;
   DISALLOW_IMPLICIT_CONSTRUCTORS(PatchOat);
diff --git a/profman/Android.mk b/profman/Android.mk
new file mode 100644
index 0000000..d38d107
--- /dev/null
+++ b/profman/Android.mk
@@ -0,0 +1,45 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(call my-dir)
+
+include art/build/Android.executable.mk
+
+PROFMAN_SRC_FILES := \
+	profman.cc \
+	profile_assistant.cc
+
+# TODO: Remove this when the framework (installd) supports pushing the
+# right instruction-set parameter for the primary architecture.
+ifneq ($(filter ro.zygote=zygote64,$(PRODUCT_DEFAULT_PROPERTY_OVERRIDES)),)
+  profman_arch := 64
+else
+  profman_arch := 32
+endif
+
+ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
+  $(eval $(call build-art-executable,profman,$(PROFMAN_SRC_FILES),libcutils,art/profman,target,ndebug,$(profman_arch)))
+endif
+ifeq ($(ART_BUILD_TARGET_DEBUG),true)
+  $(eval $(call build-art-executable,profman,$(PROFMAN_SRC_FILES),libcutils,art/profman,target,debug,$(profman_arch)))
+endif
+
+ifeq ($(ART_BUILD_HOST_NDEBUG),true)
+  $(eval $(call build-art-executable,profman,$(PROFMAN_SRC_FILES),libcutils,art/profman,host,ndebug))
+endif
+ifeq ($(ART_BUILD_HOST_DEBUG),true)
+  $(eval $(call build-art-executable,profman,$(PROFMAN_SRC_FILES),libcutils,art/profman,host,debug))
+endif
diff --git a/profman/profile_assistant.cc b/profman/profile_assistant.cc
new file mode 100644
index 0000000..a25460e
--- /dev/null
+++ b/profman/profile_assistant.cc
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "profile_assistant.h"
+
+#include "base/unix_file/fd_file.h"
+#include "os.h"
+
+namespace art {
+
+// Minimum number of new methods/classes that profiles
+// must contain to enable recompilation.
+static constexpr const uint32_t kMinNewMethodsForCompilation = 10;
+static constexpr const uint32_t kMinNewClassesForCompilation = 10;
+
+ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfilesInternal(
+        const std::vector<ScopedFlock>& profile_files,
+        const ScopedFlock& reference_profile_file) {
+  DCHECK(!profile_files.empty());
+
+  ProfileCompilationInfo info;
+  // Load the reference profile.
+  if (!info.Load(reference_profile_file.GetFile()->Fd())) {
+    LOG(WARNING) << "Could not load reference profile file";
+    return kErrorBadProfiles;
+  }
+
+  // Store the current state of the reference profile before merging with the current profiles.
+  uint32_t number_of_methods = info.GetNumberOfMethods();
+  uint32_t number_of_classes = info.GetNumberOfResolvedClasses();
+
+  // Merge all current profiles.
+  for (size_t i = 0; i < profile_files.size(); i++) {
+    if (!info.Load(profile_files[i].GetFile()->Fd())) {
+      LOG(WARNING) << "Could not load profile file at index " << i;
+      return kErrorBadProfiles;
+    }
+  }
+
+  // Check if there is enough new information added by the current profiles.
+  if (((info.GetNumberOfMethods() - number_of_methods) < kMinNewMethodsForCompilation) &&
+      ((info.GetNumberOfResolvedClasses() - number_of_classes) < kMinNewClassesForCompilation)) {
+    return kSkipCompilation;
+  }
+
+  // We were successful in merging all profile information. Update the reference profile.
+  if (!reference_profile_file.GetFile()->ClearContent()) {
+    PLOG(WARNING) << "Could not clear reference profile file";
+    return kErrorIO;
+  }
+  if (!info.Save(reference_profile_file.GetFile()->Fd())) {
+    LOG(WARNING) << "Could not save reference profile file";
+    return kErrorIO;
+  }
+
+  return kCompile;
+}
+
+static bool InitFlock(const std::string& filename, ScopedFlock& flock, std::string* error) {
+  return flock.Init(filename.c_str(), O_RDWR, /* block */ true, error);
+}
+
+static bool InitFlock(int fd, ScopedFlock& flock, std::string* error) {
+  DCHECK_GE(fd, 0);
+  // We do not own the descriptor, so disable auto-close and don't check usage.
+  File file(fd, false);
+  file.DisableAutoClose();
+  return flock.Init(&file, error);
+}
+
+class ScopedCollectionFlock {
+ public:
+  explicit ScopedCollectionFlock(size_t size) : flocks_(size) {}
+
+  // Will block until all the locks are acquired.
+  bool Init(const std::vector<std::string>& filenames, /* out */ std::string* error) {
+    for (size_t i = 0; i < filenames.size(); i++) {
+      if (!InitFlock(filenames[i], flocks_[i], error)) {
+        *error += " (index=" + std::to_string(i) + ")";
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // Will block until all the locks are acquired.
+  bool Init(const std::vector<int>& fds, /* out */ std::string* error) {
+    for (size_t i = 0; i < fds.size(); i++) {
+      DCHECK_GE(fds[i], 0);
+      if (!InitFlock(fds[i], flocks_[i], error)) {
+        *error += " (index=" + std::to_string(i) + ")";
+        return false;
+      }
+    }
+    return true;
+  }
+
+  const std::vector<ScopedFlock>& Get() const { return flocks_; }
+
+ private:
+  std::vector<ScopedFlock> flocks_;
+};
+
+ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfiles(
+        const std::vector<int>& profile_files_fd,
+        int reference_profile_file_fd) {
+  DCHECK_GE(reference_profile_file_fd, 0);
+  std::string error;
+  ScopedCollectionFlock profile_files_flocks(profile_files_fd.size());
+  if (!profile_files_flocks.Init(profile_files_fd, &error)) {
+    LOG(WARNING) << "Could not lock profile files: " << error;
+    return kErrorCannotLock;
+  }
+  ScopedFlock reference_profile_file_flock;
+  if (!InitFlock(reference_profile_file_fd, reference_profile_file_flock, &error)) {
+    LOG(WARNING) << "Could not lock reference profiled files: " << error;
+    return kErrorCannotLock;
+  }
+
+  return ProcessProfilesInternal(profile_files_flocks.Get(),
+                                 reference_profile_file_flock);
+}
+
+ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfiles(
+        const std::vector<std::string>& profile_files,
+        const std::string& reference_profile_file) {
+  std::string error;
+  ScopedCollectionFlock profile_files_flocks(profile_files.size());
+  if (!profile_files_flocks.Init(profile_files, &error)) {
+    LOG(WARNING) << "Could not lock profile files: " << error;
+    return kErrorCannotLock;
+  }
+  ScopedFlock reference_profile_file_flock;
+  if (!InitFlock(reference_profile_file, reference_profile_file_flock, &error)) {
+    LOG(WARNING) << "Could not lock reference profile files: " << error;
+    return kErrorCannotLock;
+  }
+
+  return ProcessProfilesInternal(profile_files_flocks.Get(),
+                                 reference_profile_file_flock);
+}
+
+}  // namespace art
diff --git a/profman/profile_assistant.h b/profman/profile_assistant.h
new file mode 100644
index 0000000..d3c75b8
--- /dev/null
+++ b/profman/profile_assistant.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_PROFMAN_PROFILE_ASSISTANT_H_
+#define ART_PROFMAN_PROFILE_ASSISTANT_H_
+
+#include <string>
+#include <vector>
+
+#include "base/scoped_flock.h"
+#include "jit/offline_profiling_info.h"
+
+namespace art {
+
+class ProfileAssistant {
+ public:
+  // These also serve as return codes of profman and are processed by installd
+  // (frameworks/native/cmds/installd/commands.cpp)
+  enum ProcessingResult {
+    kCompile = 0,
+    kSkipCompilation = 1,
+    kErrorBadProfiles = 2,
+    kErrorIO = 3,
+    kErrorCannotLock = 4
+  };
+
+  // Process the profile information present in the given files. Returns one of
+  // ProcessingResult values depending on profile information and whether or not
+  // the analysis ended up successfully (i.e. no errors during reading,
+  // merging or writing of profile files).
+  //
+  // When the returned value is kCompile there is a significant difference
+  // between profile_files and reference_profile_files. In this case
+  // reference_profile will be updated with the profiling info obtain after
+  // merging all profiles.
+  //
+  // When the returned value is kSkipCompilation, the difference between the
+  // merge of the current profiles and the reference one is insignificant. In
+  // this case no file will be updated.
+  //
+  static ProcessingResult ProcessProfiles(
+      const std::vector<std::string>& profile_files,
+      const std::string& reference_profile_file);
+
+  static ProcessingResult ProcessProfiles(
+      const std::vector<int>& profile_files_fd_,
+      int reference_profile_file_fd);
+
+ private:
+  static ProcessingResult ProcessProfilesInternal(
+      const std::vector<ScopedFlock>& profile_files,
+      const ScopedFlock& reference_profile_file);
+
+  DISALLOW_COPY_AND_ASSIGN(ProfileAssistant);
+};
+
+}  // namespace art
+
+#endif  // ART_PROFMAN_PROFILE_ASSISTANT_H_
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
new file mode 100644
index 0000000..cd0aa6f
--- /dev/null
+++ b/profman/profile_assistant_test.cc
@@ -0,0 +1,309 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "base/unix_file/fd_file.h"
+#include "common_runtime_test.h"
+#include "profile_assistant.h"
+#include "jit/offline_profiling_info.h"
+#include "utils.h"
+
+namespace art {
+
+class ProfileAssistantTest : public CommonRuntimeTest {
+ protected:
+  void SetupProfile(const std::string& id,
+                    uint32_t checksum,
+                    uint16_t number_of_methods,
+                    uint16_t number_of_classes,
+                    const ScratchFile& profile,
+                    ProfileCompilationInfo* info,
+                    uint16_t start_method_index = 0) {
+    std::string dex_location1 = "location1" + id;
+    uint32_t dex_location_checksum1 = checksum;
+    std::string dex_location2 = "location2" + id;
+    uint32_t dex_location_checksum2 = 10 * checksum;
+    for (uint16_t i = start_method_index; i < start_method_index + number_of_methods; i++) {
+      ASSERT_TRUE(info->AddMethodIndex(dex_location1, dex_location_checksum1, i));
+      ASSERT_TRUE(info->AddMethodIndex(dex_location2, dex_location_checksum2, i));
+    }
+    for (uint16_t i = 0; i < number_of_classes; i++) {
+      ASSERT_TRUE(info->AddClassIndex(dex_location1, dex_location_checksum1, i));
+    }
+
+    ASSERT_TRUE(info->Save(GetFd(profile)));
+    ASSERT_EQ(0, profile.GetFile()->Flush());
+    ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  }
+
+  int GetFd(const ScratchFile& file) const {
+    return static_cast<int>(file.GetFd());
+  }
+
+  void CheckProfileInfo(ScratchFile& file, const ProfileCompilationInfo& info) {
+    ProfileCompilationInfo file_info;
+    ASSERT_TRUE(file.GetFile()->ResetOffset());
+    ASSERT_TRUE(file_info.Load(GetFd(file)));
+    ASSERT_TRUE(file_info.Equals(info));
+  }
+
+  std::string GetProfmanCmd() {
+    std::string file_path = GetTestAndroidRoot();
+    file_path += "/bin/profman";
+    if (kIsDebugBuild) {
+      file_path += "d";
+    }
+    EXPECT_TRUE(OS::FileExists(file_path.c_str()))
+        << file_path << " should be a valid file path";
+    return file_path;
+  }
+  // Runs test with given arguments.
+  int ProcessProfiles(const std::vector<int>& profiles_fd, int reference_profile_fd) {
+    std::string profman_cmd = GetProfmanCmd();
+    std::vector<std::string> argv_str;
+    argv_str.push_back(profman_cmd);
+    for (size_t k = 0; k < profiles_fd.size(); k++) {
+      argv_str.push_back("--profile-file-fd=" + std::to_string(profiles_fd[k]));
+    }
+    argv_str.push_back("--reference-profile-file-fd=" + std::to_string(reference_profile_fd));
+
+    std::string error;
+    return ExecAndReturnCode(argv_str, &error);
+  }
+
+  bool GenerateTestProfile(const std::string& filename) {
+    std::string profman_cmd = GetProfmanCmd();
+    std::vector<std::string> argv_str;
+    argv_str.push_back(profman_cmd);
+    argv_str.push_back("--generate-test-profile=" + filename);
+    std::string error;
+    return ExecAndReturnCode(argv_str, &error);
+  }
+};
+
+TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferences) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile;
+
+  std::vector<int> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  int reference_profile_fd = GetFd(reference_profile);
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, 0, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, 0, profile2, &info2);
+
+  // We should advise compilation.
+  ASSERT_EQ(ProfileAssistant::kCompile,
+            ProcessProfiles(profile_fds, reference_profile_fd));
+  // The resulting compilation info must be equal to the merge of the inputs.
+  ProfileCompilationInfo result;
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(result.Load(reference_profile_fd));
+
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.MergeWith(info1));
+  ASSERT_TRUE(expected.MergeWith(info2));
+  ASSERT_TRUE(expected.Equals(result));
+
+  // The information from profiles must remain the same.
+  CheckProfileInfo(profile1, info1);
+  CheckProfileInfo(profile2, info2);
+}
+
+// TODO(calin): Add more tests for classes.
+TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferencesBecauseOfClasses) {
+  ScratchFile profile1;
+  ScratchFile reference_profile;
+
+  std::vector<int> profile_fds({
+      GetFd(profile1)});
+  int reference_profile_fd = GetFd(reference_profile);
+
+  const uint16_t kNumberOfClassesToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, 0, kNumberOfClassesToEnableCompilation, profile1, &info1);
+
+  // We should advise compilation.
+  ASSERT_EQ(ProfileAssistant::kCompile,
+            ProcessProfiles(profile_fds, reference_profile_fd));
+  // The resulting compilation info must be equal to the merge of the inputs.
+  ProfileCompilationInfo result;
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(result.Load(reference_profile_fd));
+
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.MergeWith(info1));
+  ASSERT_TRUE(expected.Equals(result));
+
+  // The information from profiles must remain the same.
+  CheckProfileInfo(profile1, info1);
+}
+
+TEST_F(ProfileAssistantTest, AdviseCompilationNonEmptyReferences) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile;
+
+  std::vector<int> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  int reference_profile_fd = GetFd(reference_profile);
+
+  // The new profile info will contain the methods with indices 0-100.
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, 0, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, 0, profile2, &info2);
+
+
+  // The reference profile info will contain the methods with indices 50-150.
+  const uint16_t kNumberOfMethodsAlreadyCompiled = 100;
+  ProfileCompilationInfo reference_info;
+  SetupProfile("p1", 1, kNumberOfMethodsAlreadyCompiled, 0, reference_profile,
+      &reference_info, kNumberOfMethodsToEnableCompilation / 2);
+
+  // We should advise compilation.
+  ASSERT_EQ(ProfileAssistant::kCompile,
+            ProcessProfiles(profile_fds, reference_profile_fd));
+
+  // The resulting compilation info must be equal to the merge of the inputs
+  ProfileCompilationInfo result;
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(result.Load(reference_profile_fd));
+
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.MergeWith(info1));
+  ASSERT_TRUE(expected.MergeWith(info2));
+  ASSERT_TRUE(expected.MergeWith(reference_info));
+  ASSERT_TRUE(expected.Equals(result));
+
+  // The information from profiles must remain the same.
+  CheckProfileInfo(profile1, info1);
+  CheckProfileInfo(profile2, info2);
+}
+
+TEST_F(ProfileAssistantTest, DoNotAdviseCompilation) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile;
+
+  std::vector<int> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  int reference_profile_fd = GetFd(reference_profile);
+
+  const uint16_t kNumberOfMethodsToSkipCompilation = 1;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToSkipCompilation, 0, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToSkipCompilation, 0, profile2, &info2);
+
+  // We should not advise compilation.
+  ASSERT_EQ(ProfileAssistant::kSkipCompilation,
+            ProcessProfiles(profile_fds, reference_profile_fd));
+
+  // The information from profiles must remain the same.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(profile2)));
+  ASSERT_TRUE(file_info2.Equals(info2));
+
+  // Reference profile files must remain empty.
+  ASSERT_EQ(0, reference_profile.GetFile()->GetLength());
+
+  // The information from profiles must remain the same.
+  CheckProfileInfo(profile1, info1);
+  CheckProfileInfo(profile2, info2);
+}
+
+TEST_F(ProfileAssistantTest, FailProcessingBecauseOfProfiles) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile;
+
+  std::vector<int> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  int reference_profile_fd = GetFd(reference_profile);
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  // Assign different hashes for the same dex file. This will make merging of information to fail.
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, 0, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, 0, profile2, &info2);
+
+  // We should fail processing.
+  ASSERT_EQ(ProfileAssistant::kErrorBadProfiles,
+            ProcessProfiles(profile_fds, reference_profile_fd));
+
+  // The information from profiles must remain the same.
+  CheckProfileInfo(profile1, info1);
+  CheckProfileInfo(profile2, info2);
+
+  // Reference profile files must still remain empty.
+  ASSERT_EQ(0, reference_profile.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, FailProcessingBecauseOfReferenceProfiles) {
+  ScratchFile profile1;
+  ScratchFile reference_profile;
+
+  std::vector<int> profile_fds({
+      GetFd(profile1)});
+  int reference_profile_fd = GetFd(reference_profile);
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  // Assign different hashes for the same dex file. This will make merging of information to fail.
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, 0, profile1, &info1);
+  ProfileCompilationInfo reference_info;
+  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, 0, reference_profile, &reference_info);
+
+  // We should not advise compilation.
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_EQ(ProfileAssistant::kErrorBadProfiles,
+            ProcessProfiles(profile_fds, reference_profile_fd));
+
+  // The information from profiles must remain the same.
+  CheckProfileInfo(profile1, info1);
+}
+
+TEST_F(ProfileAssistantTest, TestProfileGeneration) {
+  ScratchFile profile;
+  // Generate a test profile.
+  GenerateTestProfile(profile.GetFilename());
+
+  // Verify that the generated profile is valid and can be loaded.
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ProfileCompilationInfo info;
+  ASSERT_TRUE(info.Load(GetFd(profile)));
+}
+
+}  // namespace art
diff --git a/profman/profman.cc b/profman/profman.cc
new file mode 100644
index 0000000..a5fefa7
--- /dev/null
+++ b/profman/profman.cc
@@ -0,0 +1,444 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "errno.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "base/dumpable.h"
+#include "base/scoped_flock.h"
+#include "base/stringpiece.h"
+#include "base/stringprintf.h"
+#include "base/time_utils.h"
+#include "base/unix_file/fd_file.h"
+#include "dex_file.h"
+#include "jit/offline_profiling_info.h"
+#include "utils.h"
+#include "zip_archive.h"
+#include "profile_assistant.h"
+
+namespace art {
+
+static int original_argc;
+static char** original_argv;
+
+static std::string CommandLine() {
+  std::vector<std::string> command;
+  for (int i = 0; i < original_argc; ++i) {
+    command.push_back(original_argv[i]);
+  }
+  return Join(command, ' ');
+}
+
+static constexpr int kInvalidFd = -1;
+
+static bool FdIsValid(int fd) {
+  return fd != kInvalidFd;
+}
+
+static void UsageErrorV(const char* fmt, va_list ap) {
+  std::string error;
+  StringAppendV(&error, fmt, ap);
+  LOG(ERROR) << error;
+}
+
+static void UsageError(const char* fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  UsageErrorV(fmt, ap);
+  va_end(ap);
+}
+
+NO_RETURN static void Usage(const char *fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  UsageErrorV(fmt, ap);
+  va_end(ap);
+
+  UsageError("Command: %s", CommandLine().c_str());
+  UsageError("Usage: profman [options]...");
+  UsageError("");
+  UsageError("  --dump-only: dumps the content of the specified profile files");
+  UsageError("      to standard output (default) in a human readable form.");
+  UsageError("");
+  UsageError("  --dump-output-to-fd=<number>: redirects --dump-info-for output to a file");
+  UsageError("      descriptor.");
+  UsageError("");
+  UsageError("  --profile-file=<filename>: specify profiler output file to use for compilation.");
+  UsageError("      Can be specified multiple time, in which case the data from the different");
+  UsageError("      profiles will be aggregated.");
+  UsageError("");
+  UsageError("  --profile-file-fd=<number>: same as --profile-file but accepts a file descriptor.");
+  UsageError("      Cannot be used together with --profile-file.");
+  UsageError("");
+  UsageError("  --reference-profile-file=<filename>: specify a reference profile.");
+  UsageError("      The data in this file will be compared with the data obtained by merging");
+  UsageError("      all the files specified with --profile-file or --profile-file-fd.");
+  UsageError("      If the exit code is EXIT_COMPILE then all --profile-file will be merged into");
+  UsageError("      --reference-profile-file. ");
+  UsageError("");
+  UsageError("  --reference-profile-file-fd=<number>: same as --reference-profile-file but");
+  UsageError("      accepts a file descriptor. Cannot be used together with");
+  UsageError("      --reference-profile-file.");
+  UsageError("  --generate-test-profile=<filename>: generates a random profile file for testing.");
+  UsageError("  --generate-test-profile-num-dex=<number>: number of dex files that should be");
+  UsageError("      included in the generated profile. Defaults to 20.");
+  UsageError("  --generate-test-profile-method-ratio=<number>: the percentage from the maximum");
+  UsageError("      number of methods that should be generated. Defaults to 5.");
+  UsageError("  --generate-test-profile-class-ratio=<number>: the percentage from the maximum");
+  UsageError("      number of classes that should be generated. Defaults to 5.");
+  UsageError("");
+  UsageError("");
+  UsageError("  --dex-location=<string>: location string to use with corresponding");
+  UsageError("      apk-fd to find dex files");
+  UsageError("");
+  UsageError("  --apk-fd=<number>: file descriptor containing an open APK to");
+  UsageError("      search for dex files");
+  UsageError("");
+
+  exit(EXIT_FAILURE);
+}
+
+// Note: make sure you update the Usage if you change these values.
+static constexpr uint16_t kDefaultTestProfileNumDex = 20;
+static constexpr uint16_t kDefaultTestProfileMethodRatio = 5;
+static constexpr uint16_t kDefaultTestProfileClassRatio = 5;
+
+class ProfMan FINAL {
+ public:
+  ProfMan() :
+      reference_profile_file_fd_(kInvalidFd),
+      dump_only_(false),
+      dump_output_to_fd_(kInvalidFd),
+      test_profile_num_dex_(kDefaultTestProfileNumDex),
+      test_profile_method_ratio_(kDefaultTestProfileMethodRatio),
+      test_profile_class_ratio_(kDefaultTestProfileClassRatio),
+      start_ns_(NanoTime()) {}
+
+  ~ProfMan() {
+    LogCompletionTime();
+  }
+
+  void ParseArgs(int argc, char **argv) {
+    original_argc = argc;
+    original_argv = argv;
+
+    InitLogging(argv);
+
+    // Skip over the command name.
+    argv++;
+    argc--;
+
+    if (argc == 0) {
+      Usage("No arguments specified");
+    }
+
+    for (int i = 0; i < argc; ++i) {
+      const StringPiece option(argv[i]);
+      const bool log_options = false;
+      if (log_options) {
+        LOG(INFO) << "profman: option[" << i << "]=" << argv[i];
+      }
+      if (option == "--dump-only") {
+        dump_only_ = true;
+      } else if (option.starts_with("--dump-output-to-fd=")) {
+        ParseUintOption(option, "--dump-output-to-fd", &dump_output_to_fd_, Usage);
+      } else if (option.starts_with("--profile-file=")) {
+        profile_files_.push_back(option.substr(strlen("--profile-file=")).ToString());
+      } else if (option.starts_with("--profile-file-fd=")) {
+        ParseFdForCollection(option, "--profile-file-fd", &profile_files_fd_);
+      } else if (option.starts_with("--reference-profile-file=")) {
+        reference_profile_file_ = option.substr(strlen("--reference-profile-file=")).ToString();
+      } else if (option.starts_with("--reference-profile-file-fd=")) {
+        ParseUintOption(option, "--reference-profile-file-fd", &reference_profile_file_fd_, Usage);
+      } else if (option.starts_with("--dex-location=")) {
+        dex_locations_.push_back(option.substr(strlen("--dex-location=")).ToString());
+      } else if (option.starts_with("--apk-fd=")) {
+        ParseFdForCollection(option, "--apk-fd", &apks_fd_);
+      } else if (option.starts_with("--generate-test-profile=")) {
+        test_profile_ = option.substr(strlen("--generate-test-profile=")).ToString();
+      } else if (option.starts_with("--generate-test-profile-num-dex=")) {
+        ParseUintOption(option,
+                        "--generate-test-profile-num-dex",
+                        &test_profile_num_dex_,
+                        Usage);
+      } else if (option.starts_with("--generate-test-profile-method-ratio")) {
+        ParseUintOption(option,
+                        "--generate-test-profile-method-ratio",
+                        &test_profile_method_ratio_,
+                        Usage);
+      } else if (option.starts_with("--generate-test-profile-class-ratio")) {
+        ParseUintOption(option,
+                        "--generate-test-profile-class-ratio",
+                        &test_profile_class_ratio_,
+                        Usage);
+      } else {
+        Usage("Unknown argument '%s'", option.data());
+      }
+    }
+
+    bool has_profiles = !profile_files_.empty() || !profile_files_fd_.empty();
+    bool has_reference_profile = !reference_profile_file_.empty() ||
+        FdIsValid(reference_profile_file_fd_);
+
+    if (!test_profile_.empty()) {
+      if (test_profile_method_ratio_ > 100) {
+        Usage("Invalid ratio for --generate-test-profile-method-ratio");
+      }
+      if (test_profile_class_ratio_ > 100) {
+        Usage("Invalid ratio for --generate-test-profile-class-ratio");
+      }
+      return;
+    }
+    // --dump-only may be specified with only --reference-profiles present.
+    if (!dump_only_ && !has_profiles) {
+      Usage("No profile files specified.");
+    }
+    if (!profile_files_.empty() && !profile_files_fd_.empty()) {
+      Usage("Profile files should not be specified with both --profile-file-fd and --profile-file");
+    }
+    if (!dump_only_ && !has_reference_profile) {
+      Usage("No reference profile file specified.");
+    }
+    if (!reference_profile_file_.empty() && FdIsValid(reference_profile_file_fd_)) {
+      Usage("Reference profile should not be specified with both "
+            "--reference-profile-file-fd and --reference-profile-file");
+    }
+    if ((!profile_files_.empty() && FdIsValid(reference_profile_file_fd_)) ||
+        (!dump_only_ && !profile_files_fd_.empty() && !FdIsValid(reference_profile_file_fd_))) {
+      Usage("Options --profile-file-fd and --reference-profile-file-fd "
+            "should only be used together");
+    }
+  }
+
+  ProfileAssistant::ProcessingResult ProcessProfiles() {
+    ProfileAssistant::ProcessingResult result;
+    if (profile_files_.empty()) {
+      // The file doesn't need to be flushed here (ProcessProfiles will do it)
+      // so don't check the usage.
+      File file(reference_profile_file_fd_, false);
+      result = ProfileAssistant::ProcessProfiles(profile_files_fd_, reference_profile_file_fd_);
+      CloseAllFds(profile_files_fd_, "profile_files_fd_");
+    } else {
+      result = ProfileAssistant::ProcessProfiles(profile_files_, reference_profile_file_);
+    }
+    return result;
+  }
+
+  int DumpOneProfile(const std::string& banner, const std::string& filename, int fd,
+                     const std::vector<const DexFile*>* dex_files, std::string* dump) {
+    if (!filename.empty()) {
+      fd = open(filename.c_str(), O_RDWR);
+      if (fd < 0) {
+        std::cerr << "Cannot open " << filename << strerror(errno);
+        return -1;
+      }
+    }
+    ProfileCompilationInfo info;
+    if (!info.Load(fd)) {
+      std::cerr << "Cannot load profile info from fd=" << fd << "\n";
+      return -1;
+    }
+    std::string this_dump = banner + "\n" + info.DumpInfo(dex_files) + "\n";
+    *dump += this_dump;
+    if (close(fd) < 0) {
+      PLOG(WARNING) << "Failed to close descriptor";
+    }
+    return 0;
+  }
+
+  int DumpProfileInfo() {
+    static const char* kEmptyString = "";
+    static const char* kOrdinaryProfile = "=== profile ===";
+    static const char* kReferenceProfile = "=== reference profile ===";
+
+    // Open apk/zip files and and read dex files.
+    MemMap::Init();  // for ZipArchive::OpenFromFd
+    std::vector<const DexFile*> dex_files;
+    assert(dex_locations_.size() == apks_fd_.size());
+    static constexpr bool kVerifyChecksum = true;
+    for (size_t i = 0; i < dex_locations_.size(); ++i) {
+      std::string error_msg;
+      std::vector<std::unique_ptr<const DexFile>> dex_files_for_location;
+      std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(apks_fd_[i],
+                                                                     dex_locations_[i].c_str(),
+                                                                     &error_msg));
+      if (zip_archive == nullptr) {
+        LOG(WARNING) << "OpenFromFd failed for '" << dex_locations_[i] << "' " << error_msg;
+        continue;
+      }
+      if (DexFile::OpenFromZip(*zip_archive,
+                               dex_locations_[i],
+                               kVerifyChecksum,
+                               &error_msg,
+                               &dex_files_for_location)) {
+      } else {
+        LOG(WARNING) << "OpenFromZip failed for '" << dex_locations_[i] << "' " << error_msg;
+        continue;
+      }
+      for (std::unique_ptr<const DexFile>& dex_file : dex_files_for_location) {
+        dex_files.push_back(dex_file.release());
+      }
+    }
+
+    std::string dump;
+    // Dump individual profile files.
+    if (!profile_files_fd_.empty()) {
+      for (int profile_file_fd : profile_files_fd_) {
+        int ret = DumpOneProfile(kOrdinaryProfile,
+                                 kEmptyString,
+                                 profile_file_fd,
+                                 &dex_files,
+                                 &dump);
+        if (ret != 0) {
+          return ret;
+        }
+      }
+    }
+    if (!profile_files_.empty()) {
+      for (const std::string& profile_file : profile_files_) {
+        int ret = DumpOneProfile(kOrdinaryProfile, profile_file, kInvalidFd, &dex_files, &dump);
+        if (ret != 0) {
+          return ret;
+        }
+      }
+    }
+    // Dump reference profile file.
+    if (FdIsValid(reference_profile_file_fd_)) {
+      int ret = DumpOneProfile(kReferenceProfile,
+                               kEmptyString,
+                               reference_profile_file_fd_,
+                               &dex_files,
+                               &dump);
+      if (ret != 0) {
+        return ret;
+      }
+    }
+    if (!reference_profile_file_.empty()) {
+      int ret = DumpOneProfile(kReferenceProfile,
+                               reference_profile_file_,
+                               kInvalidFd,
+                               &dex_files,
+                               &dump);
+      if (ret != 0) {
+        return ret;
+      }
+    }
+    if (!FdIsValid(dump_output_to_fd_)) {
+      std::cout << dump;
+    } else {
+      unix_file::FdFile out_fd(dump_output_to_fd_, false /*check_usage*/);
+      if (!out_fd.WriteFully(dump.c_str(), dump.length())) {
+        return -1;
+      }
+    }
+    return 0;
+  }
+
+  bool ShouldOnlyDumpProfile() {
+    return dump_only_;
+  }
+
+  int GenerateTestProfile() {
+    int profile_test_fd = open(test_profile_.c_str(), O_CREAT | O_TRUNC | O_WRONLY);
+    if (profile_test_fd < 0) {
+      std::cerr << "Cannot open " << test_profile_ << strerror(errno);
+      return -1;
+    }
+
+    bool result = ProfileCompilationInfo::GenerateTestProfile(profile_test_fd,
+                                                             test_profile_num_dex_,
+                                                             test_profile_method_ratio_,
+                                                             test_profile_class_ratio_);
+    close(profile_test_fd);  // ignore close result.
+    return result ? 0 : -1;
+  }
+
+  bool ShouldGenerateTestProfile() {
+    return !test_profile_.empty();
+  }
+
+ private:
+  static void ParseFdForCollection(const StringPiece& option,
+                                   const char* arg_name,
+                                   std::vector<int>* fds) {
+    int fd;
+    ParseUintOption(option, arg_name, &fd, Usage);
+    fds->push_back(fd);
+  }
+
+  static void CloseAllFds(const std::vector<int>& fds, const char* descriptor) {
+    for (size_t i = 0; i < fds.size(); i++) {
+      if (close(fds[i]) < 0) {
+        PLOG(WARNING) << "Failed to close descriptor for " << descriptor << " at index " << i;
+      }
+    }
+  }
+
+  void LogCompletionTime() {
+    static constexpr uint64_t kLogThresholdTime = MsToNs(100);  // 100ms
+    uint64_t time_taken = NanoTime() - start_ns_;
+    if (time_taken > kLogThresholdTime) {
+      LOG(WARNING) << "profman took " << PrettyDuration(time_taken);
+    }
+  }
+
+  std::vector<std::string> profile_files_;
+  std::vector<int> profile_files_fd_;
+  std::vector<std::string> dex_locations_;
+  std::vector<int> apks_fd_;
+  std::string reference_profile_file_;
+  int reference_profile_file_fd_;
+  bool dump_only_;
+  int dump_output_to_fd_;
+  std::string test_profile_;
+  uint16_t test_profile_num_dex_;
+  uint16_t test_profile_method_ratio_;
+  uint16_t test_profile_class_ratio_;
+  uint64_t start_ns_;
+};
+
+// See ProfileAssistant::ProcessingResult for return codes.
+static int profman(int argc, char** argv) {
+  ProfMan profman;
+
+  // Parse arguments. Argument mistakes will lead to exit(EXIT_FAILURE) in UsageError.
+  profman.ParseArgs(argc, argv);
+
+  if (profman.ShouldGenerateTestProfile()) {
+    return profman.GenerateTestProfile();
+  }
+  if (profman.ShouldOnlyDumpProfile()) {
+    return profman.DumpProfileInfo();
+  }
+  // Process profile information and assess if we need to do a profile guided compilation.
+  // This operation involves I/O.
+  return profman.ProcessProfiles();
+}
+
+}  // namespace art
+
+int main(int argc, char **argv) {
+  return art::profman(argc, argv);
+}
+
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 09d7311..0e50eeb 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -27,6 +27,7 @@
   base/arena_allocator.cc \
   base/arena_bit_vector.cc \
   base/bit_vector.cc \
+  base/file_magic.cc \
   base/hex_dump.cc \
   base/logging.cc \
   base/mutex.cc \
@@ -41,12 +42,15 @@
   check_jni.cc \
   class_linker.cc \
   class_table.cc \
+  code_simulator_container.cc \
   common_throws.cc \
+  compiler_filter.cc \
   debugger.cc \
   dex_file.cc \
   dex_file_verifier.cc \
   dex_instruction.cc \
   elf_file.cc \
+  fault_handler.cc \
   gc/allocation_record.cc \
   gc/allocator/dlmalloc.cc \
   gc/allocator/rosalloc.cc \
@@ -59,6 +63,7 @@
   gc/collector/concurrent_copying.cc \
   gc/collector/garbage_collector.cc \
   gc/collector/immune_region.cc \
+  gc/collector/immune_spaces.cc \
   gc/collector/mark_compact.cc \
   gc/collector/mark_sweep.cc \
   gc/collector/partial_mark_sweep.cc \
@@ -98,15 +103,12 @@
   jdwp/jdwp_socket.cc \
   jdwp/object_registry.cc \
   jni_env_ext.cc \
+  jit/debugger_interface.cc \
   jit/jit.cc \
   jit/jit_code_cache.cc \
-  jit/jit_instrumentation.cc \
+  jit/offline_profiling_info.cc \
   jit/profiling_info.cc \
-  lambda/art_lambda_method.cc \
-  lambda/box_table.cc \
-  lambda/closure.cc \
-  lambda/closure_builder.cc \
-  lambda/leaking_allocator.cc \
+  jit/profile_saver.cc  \
   jni_internal.cc \
   jobject_comparator.cc \
   linear_alloc.cc \
@@ -125,6 +127,7 @@
   mirror/throwable.cc \
   monitor.cc \
   native_bridge_art_interface.cc \
+  native_stack_dump.cc \
   native/dalvik_system_DexFile.cc \
   native/dalvik_system_VMDebug.cc \
   native/dalvik_system_VMRuntime.cc \
@@ -133,7 +136,6 @@
   native/java_lang_Class.cc \
   native/java_lang_DexCache.cc \
   native/java_lang_Object.cc \
-  native/java_lang_Runtime.cc \
   native/java_lang_String.cc \
   native/java_lang_StringFactory.cc \
   native/java_lang_System.cc \
@@ -142,6 +144,7 @@
   native/java_lang_VMClassLoader.cc \
   native/java_lang_ref_FinalizerReference.cc \
   native/java_lang_ref_Reference.cc \
+  native/java_lang_reflect_AbstractMethod.cc \
   native/java_lang_reflect_Array.cc \
   native/java_lang_reflect_Constructor.cc \
   native/java_lang_reflect_Field.cc \
@@ -161,6 +164,7 @@
   offsets.cc \
   os_linux.cc \
   parsed_options.cc \
+  plugin.cc \
   primitive.cc \
   quick_exception_handler.cc \
   quick/inline_method_analyser.cc \
@@ -174,13 +178,12 @@
   thread.cc \
   thread_list.cc \
   thread_pool.cc \
+  ti/agent.cc \
   trace.cc \
   transaction.cc \
-  profiler.cc \
-  fault_handler.cc \
+  type_lookup_table.cc \
   utf.cc \
   utils.cc \
-  verifier/dex_gc_map.cc \
   verifier/instruction_flags.cc \
   verifier/method_verifier.cc \
   verifier/reg_type.cc \
@@ -225,6 +228,17 @@
 LIBART_TARGET_LDFLAGS :=
 LIBART_HOST_LDFLAGS :=
 
+# Keep the __jit_debug_register_code symbol as a unique symbol during ICF for architectures where
+# we use gold as the linker (arm, x86, x86_64). The symbol is used by the debuggers to detect when
+# new jit code is generated. We don't want it to be called when a different function with the same
+# (empty) body is called.
+JIT_DEBUG_REGISTER_CODE_LDFLAGS := -Wl,--keep-unique,__jit_debug_register_code
+LIBART_TARGET_LDFLAGS_arm    := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
+LIBART_TARGET_LDFLAGS_arm64  := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
+LIBART_TARGET_LDFLAGS_x86    := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
+LIBART_TARGET_LDFLAGS_x86_64 := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
+JIT_DEBUG_REGISTER_CODE_LDFLAGS :=
+
 LIBART_TARGET_SRC_FILES := \
   $(LIBART_COMMON_SRC_FILES) \
   jdwp/jdwp_adb.cc \
@@ -233,6 +247,8 @@
   thread_android.cc
 
 LIBART_TARGET_SRC_FILES_arm := \
+  interpreter/mterp/mterp.cc \
+  interpreter/mterp/out/mterp_arm.S \
   arch/arm/context_arm.cc.arm \
   arch/arm/entrypoints_init_arm.cc \
   arch/arm/instruction_set_features_assembly_tests.S \
@@ -244,6 +260,8 @@
   arch/arm/fault_handler_arm.cc
 
 LIBART_TARGET_SRC_FILES_arm64 := \
+  interpreter/mterp/mterp.cc \
+  interpreter/mterp/out/mterp_arm64.S \
   arch/arm64/context_arm64.cc \
   arch/arm64/entrypoints_init_arm64.cc \
   arch/arm64/jni_entrypoints_arm64.S \
@@ -254,6 +272,8 @@
   arch/arm64/fault_handler_arm64.cc
 
 LIBART_SRC_FILES_x86 := \
+  interpreter/mterp/mterp.cc \
+  interpreter/mterp/out/mterp_x86.S \
   arch/x86/context_x86.cc \
   arch/x86/entrypoints_init_x86.cc \
   arch/x86/jni_entrypoints_x86.S \
@@ -268,6 +288,8 @@
 # Note that the fault_handler_x86.cc is not a mistake.  This file is
 # shared between the x86 and x86_64 architectures.
 LIBART_SRC_FILES_x86_64 := \
+  interpreter/mterp/mterp.cc \
+  interpreter/mterp/out/mterp_x86_64.S \
   arch/x86_64/context_x86_64.cc \
   arch/x86_64/entrypoints_init_x86_64.cc \
   arch/x86_64/jni_entrypoints_x86_64.S \
@@ -281,6 +303,8 @@
   $(LIBART_SRC_FILES_x86_64) \
 
 LIBART_TARGET_SRC_FILES_mips := \
+  interpreter/mterp/mterp.cc \
+  interpreter/mterp/out/mterp_mips.S \
   arch/mips/context_mips.cc \
   arch/mips/entrypoints_init_mips.cc \
   arch/mips/jni_entrypoints_mips.S \
@@ -290,6 +314,8 @@
   arch/mips/fault_handler_mips.cc
 
 LIBART_TARGET_SRC_FILES_mips64 := \
+  interpreter/mterp/mterp.cc \
+  interpreter/mterp/out/mterp_mips64.S \
   arch/mips64/context_mips64.cc \
   arch/mips64/entrypoints_init_mips64.cc \
   arch/mips64/jni_entrypoints_mips64.S \
@@ -314,6 +340,7 @@
 LIBART_ENUM_OPERATOR_OUT_HEADER_FILES := \
   arch/instruction_set.h \
   base/allocator.h \
+  base/enums.h \
   base/mutex.h \
   debugger.h \
   base/unix_file/fd_file.h \
@@ -339,52 +366,28 @@
   mirror/class.h \
   oat.h \
   object_callbacks.h \
-  profiler_options.h \
+  process_state.h \
   quick/inline_method_analyser.h \
   runtime.h \
   stack.h \
   thread.h \
   thread_state.h \
+  ti/agent.h \
   verifier/method_verifier.h
 
+LIBOPENJDKJVM_SRC_FILES := openjdkjvm/OpenjdkJvm.cc
+LIBOPENJDKJVMTI_SRC_FILES := openjdkjvmti/OpenjdkJvmTi.cc
+
 LIBART_CFLAGS := -DBUILDING_LIBART=1
 
 LIBART_TARGET_CFLAGS :=
 LIBART_HOST_CFLAGS :=
 
-ifeq ($(MALLOC_IMPL),dlmalloc)
-  LIBART_TARGET_CFLAGS += -DUSE_DLMALLOC
-else
-  LIBART_TARGET_CFLAGS += -DUSE_JEMALLOC
-endif
-
-# Default dex2oat instruction set features.
-LIBART_HOST_DEFAULT_INSTRUCTION_SET_FEATURES := default
-LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := default
-2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := default
-ifeq ($(DEX2OAT_TARGET_ARCH),arm)
-  ifneq (,$(filter $(DEX2OAT_TARGET_CPU_VARIANT),cortex-a15 krait denver))
-    LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := atomic_ldrd_strd,div
-  else
-    ifneq (,$(filter $(DEX2OAT_TARGET_CPU_VARIANT),cortex-a7))
-      LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := div
-    endif
-  endif
-endif
-ifeq ($(2ND_DEX2OAT_TARGET_ARCH),arm)
-  ifneq (,$(filter $(DEX2OAT_TARGET_CPU_VARIANT),cortex-a15 krait denver))
-    2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := atomic_ldrd_strd,div
-  else
-    ifneq (,$(filter $(DEX2OAT_TARGET_CPU_VARIANT),cortex-a7))
-      2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := div
-    endif
-  endif
-endif
-
 # $(1): target or host
 # $(2): ndebug or debug
-# $(3): static or shared (empty means shared, applies only for host)
-define build-libart
+# $(3): static or shared (note that static only applies for host)
+# $(4): module name : either libart, libopenjdkjvm, or libopenjdkjvmti
+define build-runtime-library
   ifneq ($(1),target)
     ifneq ($(1),host)
       $$(error expected target or host for argument 1, received $(1))
@@ -395,6 +398,13 @@
       $$(error expected ndebug or debug for argument 2, received $(2))
     endif
   endif
+  ifneq ($(4),libart)
+    ifneq ($(4),libopenjdkjvm)
+      ifneq ($(4),libopenjdkjvmti)
+        $$(error expected libart, libopenjdkjvmti, or libopenjdkjvm for argument 4, received $(4))
+      endif
+    endif
+  endif
 
   art_target_or_host := $(1)
   art_ndebug_or_debug := $(2)
@@ -403,12 +413,12 @@
   include $$(CLEAR_VARS)
   LOCAL_CPP_EXTENSION := $$(ART_CPP_EXTENSION)
   ifeq ($$(art_ndebug_or_debug),ndebug)
-    LOCAL_MODULE := libart
+    LOCAL_MODULE := $(4)
     ifeq ($$(art_target_or_host),target)
       LOCAL_FDO_SUPPORT := true
     endif
   else # debug
-    LOCAL_MODULE := libartd
+    LOCAL_MODULE := $(4)d
   endif
 
   LOCAL_MODULE_TAGS := optional
@@ -419,17 +429,29 @@
     LOCAL_MODULE_CLASS := SHARED_LIBRARIES
   endif
 
-  ifeq ($$(art_target_or_host),target)
-    LOCAL_SRC_FILES := $$(LIBART_TARGET_SRC_FILES)
-    $$(foreach arch,$$(ART_TARGET_SUPPORTED_ARCH), \
-      $$(eval LOCAL_SRC_FILES_$$(arch) := $$$$(LIBART_TARGET_SRC_FILES_$$(arch))))
-  else # host
-    LOCAL_SRC_FILES := $$(LIBART_HOST_SRC_FILES)
-    LOCAL_SRC_FILES_32 := $$(LIBART_HOST_SRC_FILES_32)
-    LOCAL_SRC_FILES_64 := $$(LIBART_HOST_SRC_FILES_64)
-    LOCAL_IS_HOST_MODULE := true
+  ifeq ($(4),libart)
+    ifeq ($$(art_target_or_host),target)
+      LOCAL_SRC_FILES := $$(LIBART_TARGET_SRC_FILES)
+      $$(foreach arch,$$(ART_TARGET_SUPPORTED_ARCH), \
+        $$(eval LOCAL_SRC_FILES_$$(arch) := $$$$(LIBART_TARGET_SRC_FILES_$$(arch))))
+    else # host
+      LOCAL_SRC_FILES := $$(LIBART_HOST_SRC_FILES)
+      LOCAL_SRC_FILES_32 := $$(LIBART_HOST_SRC_FILES_32)
+      LOCAL_SRC_FILES_64 := $$(LIBART_HOST_SRC_FILES_64)
+      LOCAL_IS_HOST_MODULE := true
+    endif
+  else
+    ifeq ($(4),libopenjdkjvmti)
+      LOCAL_SRC_FILES := $$(LIBOPENJDKJVMTI_SRC_FILES)
+    else # libopenjdkjvm
+      LOCAL_SRC_FILES := $$(LIBOPENJDKJVM_SRC_FILES)
+    endif
+    ifeq ($$(art_target_or_host),host)
+      LOCAL_IS_HOST_MODULE := true
+    endif
   endif
 
+ifeq ($(4),libart)
   GENERATED_SRC_DIR := $$(call local-generated-sources-dir)
   ENUM_OPERATOR_OUT_CC_FILES := $$(patsubst %.h,%_operator_out.cc,$$(LIBART_ENUM_OPERATOR_OUT_HEADER_FILES))
   ENUM_OPERATOR_OUT_GEN := $$(addprefix $$(GENERATED_SRC_DIR)/,$$(ENUM_OPERATOR_OUT_CC_FILES))
@@ -440,12 +462,15 @@
 	$$(transform-generated-source)
 
   LOCAL_GENERATED_SOURCES += $$(ENUM_OPERATOR_OUT_GEN)
+endif
 
   LOCAL_CFLAGS := $$(LIBART_CFLAGS)
   LOCAL_LDFLAGS := $$(LIBART_LDFLAGS)
   ifeq ($$(art_target_or_host),target)
     LOCAL_CFLAGS += $$(LIBART_TARGET_CFLAGS)
     LOCAL_LDFLAGS += $$(LIBART_TARGET_LDFLAGS)
+    $$(foreach arch,$$(ART_TARGET_SUPPORTED_ARCH), \
+      $$(eval LOCAL_LDFLAGS_$$(arch) := $$(LIBART_TARGET_LDFLAGS_$$(arch))))
   else #host
     LOCAL_CFLAGS += $$(LIBART_HOST_CFLAGS)
     LOCAL_LDFLAGS += $$(LIBART_HOST_LDFLAGS)
@@ -453,30 +478,27 @@
       LOCAL_LDFLAGS += -static
     endif
   endif
-  $$(foreach arch,$$(ART_TARGET_SUPPORTED_ARCH), \
-    $$(eval LOCAL_LDFLAGS_$$(arch) := $$(LIBART_TARGET_LDFLAGS_$$(arch))))
 
   # Clang usage
   ifeq ($$(art_target_or_host),target)
-    $$(eval $$(call set-target-local-clang-vars))
+    $$(eval LOCAL_CLANG := $$(ART_TARGET_CLANG))
     $$(eval $$(call set-target-local-cflags-vars,$(2)))
-    LOCAL_CFLAGS_$(DEX2OAT_TARGET_ARCH) += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES)"
-    LOCAL_CFLAGS_$(2ND_DEX2OAT_TARGET_ARCH) += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES)"
+    LOCAL_ASFLAGS_arm += -no-integrated-as
   else # host
     LOCAL_CLANG := $$(ART_HOST_CLANG)
-    LOCAL_LDLIBS := $$(ART_HOST_LDLIBS)
     LOCAL_LDLIBS += -ldl -lpthread
     ifeq ($$(HOST_OS),linux)
       LOCAL_LDLIBS += -lrt
     endif
     LOCAL_CFLAGS += $$(ART_HOST_CFLAGS)
-    LOCAL_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(LIBART_HOST_DEFAULT_INSTRUCTION_SET_FEATURES)"
     LOCAL_ASFLAGS += $$(ART_HOST_ASFLAGS)
 
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $$(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $$(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $$(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $$(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
     LOCAL_MULTILIB := both
   endif
@@ -487,9 +509,19 @@
   LOCAL_C_INCLUDES += art
 
   ifeq ($$(art_static_or_shared),static)
-    LOCAL_STATIC_LIBRARIES := libnativehelper libnativebridge libsigchain_dummy libbacktrace
+    LOCAL_STATIC_LIBRARIES := libnativehelper
+    LOCAL_STATIC_LIBRARIES += libnativebridge
+    LOCAL_STATIC_LIBRARIES += libnativeloader
+    LOCAL_STATIC_LIBRARIES += libsigchain_dummy
+    LOCAL_STATIC_LIBRARIES += libbacktrace
+    LOCAL_STATIC_LIBRARIES += liblz4
   else
-    LOCAL_SHARED_LIBRARIES := libnativehelper libnativebridge libsigchain libbacktrace
+    LOCAL_SHARED_LIBRARIES := libnativehelper
+    LOCAL_SHARED_LIBRARIES += libnativebridge
+    LOCAL_SHARED_LIBRARIES += libnativeloader
+    LOCAL_SHARED_LIBRARIES += libsigchain
+    LOCAL_SHARED_LIBRARIES += libbacktrace
+    LOCAL_SHARED_LIBRARIES += liblz4
   endif
 
   ifeq ($$(art_target_or_host),target)
@@ -511,6 +543,24 @@
       LOCAL_SHARED_LIBRARIES += libcutils
     endif
   endif
+
+  ifeq ($(4),libopenjdkjvm)
+    ifeq ($$(art_ndebug_or_debug),ndebug)
+      LOCAL_SHARED_LIBRARIES += libart
+    else
+      LOCAL_SHARED_LIBRARIES += libartd
+    endif
+    LOCAL_NOTICE_FILE := $(LOCAL_PATH)/openjdkjvm/NOTICE
+  else
+    ifeq ($(4),libopenjdkjvmti)
+      ifeq ($$(art_ndebug_or_debug),ndebug)
+        LOCAL_SHARED_LIBRARIES += libart
+      else
+        LOCAL_SHARED_LIBRARIES += libartd
+      endif
+      LOCAL_NOTICE_FILE := $(LOCAL_PATH)/openjdkjvmti/NOTICE
+    endif
+  endif
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $$(LOCAL_PATH)/Android.mk
 
@@ -547,34 +597,49 @@
 # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since
 # they are used to cross compile for the target.
 ifeq ($(ART_BUILD_HOST_NDEBUG),true)
-  $(eval $(call build-libart,host,ndebug))
+  $(eval $(call build-runtime-library,host,ndebug,shared,libart))
+  $(eval $(call build-runtime-library,host,ndebug,shared,libopenjdkjvm))
+  $(eval $(call build-runtime-library,host,ndebug,shared,libopenjdkjvmti))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
-    $(eval $(call build-libart,host,ndebug,static))
+    $(eval $(call build-runtime-library,host,ndebug,static,libart))
+    $(eval $(call build-runtime-library,host,ndebug,static,libopenjdkjvm))
+    $(eval $(call build-runtime-library,host,ndebug,static,libopenjdkjvmti))
   endif
 endif
 ifeq ($(ART_BUILD_HOST_DEBUG),true)
-  $(eval $(call build-libart,host,debug))
+  $(eval $(call build-runtime-library,host,debug,shared,libart))
+  $(eval $(call build-runtime-library,host,debug,shared,libopenjdkjvm))
+  $(eval $(call build-runtime-library,host,debug,shared,libopenjdkjvmti))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
-    $(eval $(call build-libart,host,debug,static))
+    $(eval $(call build-runtime-library,host,debug,static,libart))
+    $(eval $(call build-runtime-library,host,debug,static,libopenjdkjvm))
+    $(eval $(call build-runtime-library,host,debug,static,libopenjdkjvmti))
   endif
 endif
 
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
-#  $(error $(call build-libart,target,ndebug))
-  $(eval $(call build-libart,target,ndebug))
+#  $(error $(call build-runtime-library,target,ndebug))
+  $(eval $(call build-runtime-library,target,ndebug,shared,libart))
+  $(eval $(call build-runtime-library,target,ndebug,shared,libopenjdkjvm))
+  $(eval $(call build-runtime-library,target,ndebug,shared,libopenjdkjvmti))
 endif
 ifeq ($(ART_BUILD_TARGET_DEBUG),true)
-  $(eval $(call build-libart,target,debug))
+  $(eval $(call build-runtime-library,target,debug,shared,libart))
+  $(eval $(call build-runtime-library,target,debug,shared,libopenjdkjvm))
+  $(eval $(call build-runtime-library,target,debug,shared,libopenjdkjvmti))
 endif
 
 # Clear locally defined variables.
 LOCAL_PATH :=
 LIBART_COMMON_SRC_FILES :=
-LIBART_HOST_DEFAULT_INSTRUCTION_SET_FEATURES :=
-LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES :=
-2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES :=
-LIBART_TARGET_LDFLAGS :=
 LIBART_HOST_LDFLAGS :=
+LIBART_TARGET_LDFLAGS :=
+LIBART_TARGET_LDFLAGS_arm :=
+LIBART_TARGET_LDFLAGS_arm64 :=
+LIBART_TARGET_LDFLAGS_x86 :=
+LIBART_TARGET_LDFLAGS_x86_64 :=
+LIBART_TARGET_LDFLAGS_mips :=
+LIBART_TARGET_LDFLAGS_mips64 :=
 LIBART_TARGET_SRC_FILES :=
 LIBART_TARGET_SRC_FILES_arm :=
 LIBART_TARGET_SRC_FILES_arm64 :=
@@ -589,4 +654,4 @@
 LIBART_CFLAGS :=
 LIBART_TARGET_CFLAGS :=
 LIBART_HOST_CFLAGS :=
-build-libart :=
+build-runtime-library :=
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index d6ba304..a857976 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -19,6 +19,9 @@
 #include "art_method-inl.h"
 #include "common_runtime_test.h"
 #include "quick/quick_method_frame_info.h"
+// Common tests are declared next to the constants.
+#define ADD_TEST_EQ(x, y) EXPECT_EQ(x, y);
+#include "asm_support.h"
 
 namespace art {
 
@@ -30,6 +33,13 @@
     options->push_back(std::make_pair("imageinstructionset", "x86_64"));
   }
 
+  // Do not do any of the finalization. We don't want to run any code, we don't need the heap
+  // prepared, it actually will be a problem with setting the instruction set to x86_64 in
+  // SetUpRuntimeOptions.
+  void FinalizeSetup() OVERRIDE {
+    ASSERT_EQ(InstructionSet::kX86_64, Runtime::Current()->GetInstructionSet());
+  }
+
   static void CheckFrameSize(InstructionSet isa, Runtime::CalleeSaveType type, uint32_t save_size)
       NO_THREAD_SAFETY_ANALYSIS {
     Runtime* const runtime = Runtime::Current();
@@ -46,10 +56,6 @@
   }
 };
 
-// Common tests are declared next to the constants.
-#define ADD_TEST_EQ(x, y) EXPECT_EQ(x, y);
-#include "asm_support.h"
-
 TEST_F(ArchTest, CheckCommonOffsetsAndSizes) {
   CheckAsmSupportOffsetsAndSizes();
 }
@@ -57,104 +63,97 @@
 // Grab architecture specific constants.
 namespace arm {
 #include "arch/arm/asm_support_arm.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace arm
 
 namespace arm64 {
 #include "arch/arm64/asm_support_arm64.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace arm64
 
 namespace mips {
 #include "arch/mips/asm_support_mips.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace mips
 
 namespace mips64 {
 #include "arch/mips64/asm_support_mips64.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace mips64
 
 namespace x86 {
 #include "arch/x86/asm_support_x86.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace x86
 
 namespace x86_64 {
 #include "arch/x86_64/asm_support_x86_64.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace x86_64
 
 // Check architecture specific constants are sound.
-TEST_F(ArchTest, ARM) {
-  CheckFrameSize(InstructionSet::kArm, Runtime::kSaveAll, arm::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kArm, Runtime::kRefsOnly, arm::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kArm, Runtime::kRefsAndArgs, arm::kFrameSizeRefsAndArgsCalleeSave);
-}
-
-
-TEST_F(ArchTest, ARM64) {
-  CheckFrameSize(InstructionSet::kArm64, Runtime::kSaveAll, arm64::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kArm64, Runtime::kRefsOnly, arm64::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kArm64, Runtime::kRefsAndArgs,
-                 arm64::kFrameSizeRefsAndArgsCalleeSave);
-}
-
-TEST_F(ArchTest, MIPS) {
-  CheckFrameSize(InstructionSet::kMips, Runtime::kSaveAll, mips::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kMips, Runtime::kRefsOnly, mips::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kMips, Runtime::kRefsAndArgs,
-                 mips::kFrameSizeRefsAndArgsCalleeSave);
-}
-
-TEST_F(ArchTest, MIPS64) {
-  CheckFrameSize(InstructionSet::kMips64, Runtime::kSaveAll, mips64::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kMips64, Runtime::kRefsOnly, mips64::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kMips64, Runtime::kRefsAndArgs,
-                 mips64::kFrameSizeRefsAndArgsCalleeSave);
-}
-
-TEST_F(ArchTest, X86) {
-  CheckFrameSize(InstructionSet::kX86, Runtime::kSaveAll, x86::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kX86, Runtime::kRefsOnly, x86::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kX86, Runtime::kRefsAndArgs, x86::kFrameSizeRefsAndArgsCalleeSave);
-}
-
-TEST_F(ArchTest, X86_64) {
-  CheckFrameSize(InstructionSet::kX86_64, Runtime::kSaveAll, x86_64::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kX86_64, Runtime::kRefsOnly, x86_64::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kX86_64, Runtime::kRefsAndArgs,
-                 x86_64::kFrameSizeRefsAndArgsCalleeSave);
-}
+#define TEST_ARCH(Arch, arch)                             \
+  TEST_F(ArchTest, Arch) {                                \
+    CheckFrameSize(InstructionSet::k##Arch,               \
+                   Runtime::kSaveAllCalleeSaves,          \
+                   arch::kFrameSizeSaveAllCalleeSaves);   \
+    CheckFrameSize(InstructionSet::k##Arch,               \
+                   Runtime::kSaveRefsOnly,                \
+                   arch::kFrameSizeSaveRefsOnly);         \
+    CheckFrameSize(InstructionSet::k##Arch,               \
+                   Runtime::kSaveRefsAndArgs,             \
+                   arch::kFrameSizeSaveRefsAndArgs);      \
+    CheckFrameSize(InstructionSet::k##Arch,               \
+                   Runtime::kSaveEverything,              \
+                   arch::kFrameSizeSaveEverything);       \
+  }
+TEST_ARCH(Arm, arm)
+TEST_ARCH(Arm64, arm64)
+TEST_ARCH(Mips, mips)
+TEST_ARCH(Mips64, mips64)
+TEST_ARCH(X86, x86)
+TEST_ARCH(X86_64, x86_64)
 
 }  // namespace art
diff --git a/runtime/arch/arm/asm_support_arm.S b/runtime/arch/arm/asm_support_arm.S
index 44c7649..38ca76a 100644
--- a/runtime/arch/arm/asm_support_arm.S
+++ b/runtime/arch/arm/asm_support_arm.S
@@ -30,18 +30,17 @@
 .arch armv7-a
 .thumb
 
-// Macro to generate the value of Runtime::Current into rDest clobbering rTemp. As it uses labels
+// Macro to generate the value of Runtime::Current into rDest. As it uses labels
 // then the labels need to be unique. We bind these to the function name in the ENTRY macros.
-.macro RUNTIME_CURRENT name, num, rDest, rTemp
+.macro RUNTIME_CURRENT name, num, rDest
     .if .Lruntime_current\num\()_used
          .error
     .endif
     .set .Lruntime_current\num\()_used, 1
-    ldr \rDest, .Lgot_\name\()_\num               @ Load offset of the GOT.
-    ldr \rTemp, .Lruntime_instance_\name\()_\num  @ Load GOT offset of Runtime::instance_.
+    ldr \rDest, .Lruntime_instance_\name\()_\num  @ Load GOT_PREL offset of Runtime::instance_.
 .Lload_got_\name\()_\num\():
-    add \rDest, pc                                @ Fixup GOT address.
-    ldr \rDest, [\rDest, \rTemp]                  @ Load address of Runtime::instance_.
+    add \rDest, pc                                @ Fixup GOT_PREL address.
+    ldr \rDest, [\rDest]                          @ Load address of Runtime::instance_.
     ldr \rDest, [\rDest]                          @ Load Runtime::instance_.
 .endm
 
@@ -90,26 +89,20 @@
     DEF_ENTRY .arm, \name
 .endm
 
-// Terminate an ENTRY and generate GOT references.
+// Terminate an ENTRY and generate GOT_PREL references.
 .macro END name
      // Generate offsets of GOT and Runtime::instance_ used in RUNTIME_CURRENT.
      .if .Lruntime_current1_used
-         .Lgot_\name\()_1:
-             .word   _GLOBAL_OFFSET_TABLE_-(.Lload_got_\name\()_1+4)
          .Lruntime_instance_\name\()_1:
-             .word   _ZN3art7Runtime9instance_E(GOT)
+             .word   _ZN3art7Runtime9instance_E(GOT_PREL)-(.Lload_got_\name\()_1+4)
      .endif
      .if .Lruntime_current2_used
-         .Lgot_\name\()_2:
-             .word   _GLOBAL_OFFSET_TABLE_-(.Lload_got_\name\()_2+4)
          .Lruntime_instance_\name\()_2:
-             .word   _ZN3art7Runtime9instance_E(GOT)
+             .word   _ZN3art7Runtime9instance_E(GOT_PREL)-(.Lload_got_\name\()_2+4)
     .endif
      .if .Lruntime_current3_used
-         .Lgot_\name\()_3:
-             .word   _GLOBAL_OFFSET_TABLE_-(.Lload_got_\name\()_3+4)
          .Lruntime_instance_\name\()_3:
-             .word   _ZN3art7Runtime9instance_E(GOT)
+             .word   _ZN3art7Runtime9instance_E(GOT_PREL)-(.Lload_got_\name\()_3+4)
     .endif
     // Remove the RUNTIME_CURRENTx macros so they get rebound in the next function entry.
     .purgem RUNTIME_CURRENT1
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index 1fa566b..c03bcae 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -19,9 +19,10 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 112
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 112
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 112
+#define FRAME_SIZE_SAVE_REFS_ONLY 32
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS 112
+#define FRAME_SIZE_SAVE_EVERYTHING 192
 
 // Flag for enabling R4 optimization in arm runtime
 // #define ARM_R4_SUSPEND_FLAG
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 76c7c4f..492a12d 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -17,6 +17,7 @@
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
+#include "entrypoints/quick/quick_default_init_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/math_entrypoints.h"
@@ -26,9 +27,27 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
-                                            const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+                                          const mirror::Class* ref_class);
 
+// Read barrier entrypoints.
+// art_quick_read_barrier_mark_regX uses an non-standard calling
+// convention: it expects its input in register X and returns its
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg04(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg08(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg09(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
 
 // Used by soft float.
 // Single-precision FP arithmetics.
@@ -47,67 +66,12 @@
 extern "C" int64_t __aeabi_ldivmod(int64_t, int64_t);
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
-  // JNI
-  jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
-
-  // Alloc
-  ResetQuickAllocEntryPoints(qpoints);
+  DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
   qpoints->pCheckCast = art_quick_check_cast;
 
-  // DexCache
-  qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
-  qpoints->pInitializeTypeAndVerifyAccess = art_quick_initialize_type_and_verify_access;
-  qpoints->pInitializeType = art_quick_initialize_type;
-  qpoints->pResolveString = art_quick_resolve_string;
-
-  // Field
-  qpoints->pSet8Instance = art_quick_set8_instance;
-  qpoints->pSet8Static = art_quick_set8_static;
-  qpoints->pSet16Instance = art_quick_set16_instance;
-  qpoints->pSet16Static = art_quick_set16_static;
-  qpoints->pSet32Instance = art_quick_set32_instance;
-  qpoints->pSet32Static = art_quick_set32_static;
-  qpoints->pSet64Instance = art_quick_set64_instance;
-  qpoints->pSet64Static = art_quick_set64_static;
-  qpoints->pSetObjInstance = art_quick_set_obj_instance;
-  qpoints->pSetObjStatic = art_quick_set_obj_static;
-  qpoints->pGetByteInstance = art_quick_get_byte_instance;
-  qpoints->pGetBooleanInstance = art_quick_get_boolean_instance;
-  qpoints->pGetShortInstance = art_quick_get_short_instance;
-  qpoints->pGetCharInstance = art_quick_get_char_instance;
-  qpoints->pGet32Instance = art_quick_get32_instance;
-  qpoints->pGet64Instance = art_quick_get64_instance;
-  qpoints->pGetObjInstance = art_quick_get_obj_instance;
-  qpoints->pGetByteStatic = art_quick_get_byte_static;
-  qpoints->pGetBooleanStatic = art_quick_get_boolean_static;
-  qpoints->pGetShortStatic = art_quick_get_short_static;
-  qpoints->pGetCharStatic = art_quick_get_char_static;
-  qpoints->pGet32Static = art_quick_get32_static;
-  qpoints->pGet64Static = art_quick_get64_static;
-  qpoints->pGetObjStatic = art_quick_get_obj_static;
-
-  // Array
-  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
-  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
-  qpoints->pAputObject = art_quick_aput_obj;
-  qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
-
-  // JNI
-  qpoints->pJniMethodStart = JniMethodStart;
-  qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
-  qpoints->pJniMethodEnd = JniMethodEnd;
-  qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
-  qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
-  qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
-  qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
-
-  // Locks
-  qpoints->pLockObject = art_quick_lock_object;
-  qpoints->pUnlockObject = art_quick_unlock_object;
-
   // Math
   qpoints->pIdivmod = __aeabi_idivmod;
   qpoints->pLdiv = __aeabi_ldivmod;
@@ -130,43 +94,66 @@
     qpoints->pL2f = art_quick_l2f;
   }
 
+  // More math.
+  qpoints->pCos = cos;
+  qpoints->pSin = sin;
+  qpoints->pAcos = acos;
+  qpoints->pAsin = asin;
+  qpoints->pAtan = atan;
+  qpoints->pAtan2 = atan2;
+  qpoints->pCbrt = cbrt;
+  qpoints->pCosh = cosh;
+  qpoints->pExp = exp;
+  qpoints->pExpm1 = expm1;
+  qpoints->pHypot = hypot;
+  qpoints->pLog = log;
+  qpoints->pLog10 = log10;
+  qpoints->pNextAfter = nextafter;
+  qpoints->pSinh = sinh;
+  qpoints->pTan = tan;
+  qpoints->pTanh = tanh;
+
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
-  qpoints->pStringCompareTo = art_quick_string_compareto;
+  // The ARM StringCompareTo intrinsic does not call the runtime.
+  qpoints->pStringCompareTo = nullptr;
   qpoints->pMemcpy = memcpy;
 
-  // Invocation
-  qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
-  qpoints->pQuickResolutionTrampoline = art_quick_resolution_trampoline;
-  qpoints->pQuickToInterpreterBridge = art_quick_to_interpreter_bridge;
-  qpoints->pInvokeDirectTrampolineWithAccessCheck =
-      art_quick_invoke_direct_trampoline_with_access_check;
-  qpoints->pInvokeInterfaceTrampolineWithAccessCheck =
-      art_quick_invoke_interface_trampoline_with_access_check;
-  qpoints->pInvokeStaticTrampolineWithAccessCheck =
-      art_quick_invoke_static_trampoline_with_access_check;
-  qpoints->pInvokeSuperTrampolineWithAccessCheck =
-      art_quick_invoke_super_trampoline_with_access_check;
-  qpoints->pInvokeVirtualTrampolineWithAccessCheck =
-      art_quick_invoke_virtual_trampoline_with_access_check;
-
-  // Thread
-  qpoints->pTestSuspend = art_quick_test_suspend;
-
-  // Throws
-  qpoints->pDeliverException = art_quick_deliver_exception;
-  qpoints->pThrowArrayBounds = art_quick_throw_array_bounds;
-  qpoints->pThrowDivZero = art_quick_throw_div_zero;
-  qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
-  qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
-  qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
-
-  // Deoptimization from compiled code.
-  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
-
-  // Read barrier
+  // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
+  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
+  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
+  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  qpoints->pReadBarrierMarkReg04 = art_quick_read_barrier_mark_reg04;
+  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
+  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
+  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
+  qpoints->pReadBarrierMarkReg08 = art_quick_read_barrier_mark_reg08;
+  qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09;
+  qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10;
+  qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11;
+  qpoints->pReadBarrierMarkReg12 = nullptr;  // Cannot use register 12 (IP) to pass arguments.
+  qpoints->pReadBarrierMarkReg13 = nullptr;  // Cannot use register 13 (SP) to pass arguments.
+  qpoints->pReadBarrierMarkReg14 = nullptr;  // Cannot use register 14 (LR) to pass arguments.
+  qpoints->pReadBarrierMarkReg15 = nullptr;  // Cannot use register 15 (PC) to pass arguments.
+  // ARM has only 16 core registers.
+  qpoints->pReadBarrierMarkReg16 = nullptr;
+  qpoints->pReadBarrierMarkReg17 = nullptr;
+  qpoints->pReadBarrierMarkReg18 = nullptr;
+  qpoints->pReadBarrierMarkReg19 = nullptr;
+  qpoints->pReadBarrierMarkReg20 = nullptr;
+  qpoints->pReadBarrierMarkReg21 = nullptr;
+  qpoints->pReadBarrierMarkReg22 = nullptr;
+  qpoints->pReadBarrierMarkReg23 = nullptr;
+  qpoints->pReadBarrierMarkReg24 = nullptr;
+  qpoints->pReadBarrierMarkReg25 = nullptr;
+  qpoints->pReadBarrierMarkReg26 = nullptr;
+  qpoints->pReadBarrierMarkReg27 = nullptr;
+  qpoints->pReadBarrierMarkReg28 = nullptr;
+  qpoints->pReadBarrierMarkReg29 = nullptr;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
+  qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 }
 
 }  // namespace art
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index d81e0a9..befdd48 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -20,6 +20,7 @@
 #include <sys/ucontext.h>
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "base/hex_dump.h"
 #include "globals.h"
@@ -34,7 +35,7 @@
 
 namespace art {
 
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_implicit_suspend();
 
@@ -107,8 +108,10 @@
   *out_return_pc = (sc->arm_pc + instr_size) | 1;
 }
 
-bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context) {
+bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  if (!IsValidImplicitCheck(info)) {
+    return false;
+  }
   // The code that looks for the catch location needs to know the value of the
   // ARM PC at the point of call.  For Null checks we insert a GC map that is immediately after
   // the load/store instruction that might cause the fault.  However the mapping table has
@@ -122,7 +125,10 @@
 
   uint32_t instr_size = GetInstructionSize(ptr);
   sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
-  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+  sc->arm_r0 = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
@@ -139,7 +145,8 @@
                                void* context) {
   // These are the instructions to check for.  The first one is the ldr r0,[r9,#xxx]
   // where xxx is the offset of the suspend trigger.
-  uint32_t checkinst1 = 0xf8d90000 + Thread::ThreadSuspendTriggerOffset<4>().Int32Value();
+  uint32_t checkinst1 = 0xf8d90000
+      + Thread::ThreadSuspendTriggerOffset<PointerSize::k32>().Int32Value();
   uint16_t checkinst2 = 0x6800;
 
   struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
diff --git a/runtime/arch/arm/instruction_set_features_arm.cc b/runtime/arch/arm/instruction_set_features_arm.cc
index 28d1942..c3a5829 100644
--- a/runtime/arch/arm/instruction_set_features_arm.cc
+++ b/runtime/arch/arm/instruction_set_features_arm.cc
@@ -16,7 +16,7 @@
 
 #include "instruction_set_features_arm.h"
 
-#if defined(__ANDROID__) && defined(__arm__)
+#if defined(ART_TARGET_ANDROID) && defined(__arm__)
 #include <sys/auxv.h>
 #include <asm/hwcap.h>
 #endif
@@ -42,15 +42,15 @@
   // Look for variants that have divide support.
   static const char* arm_variants_with_div[] = {
           "cortex-a7", "cortex-a12", "cortex-a15", "cortex-a17", "cortex-a53", "cortex-a57",
-          "cortex-m3", "cortex-m4", "cortex-r4", "cortex-r5",
-          "cyclone", "denver", "krait", "swift"};
+          "cortex-a53.a57", "cortex-m3", "cortex-m4", "cortex-r4", "cortex-r5",
+          "cyclone", "denver", "krait", "swift" };
 
   bool has_div = FindVariantInArray(arm_variants_with_div, arraysize(arm_variants_with_div),
                                     variant);
 
   // Look for variants that have LPAE support.
   static const char* arm_variants_with_lpae[] = {
-      "cortex-a7", "cortex-a15", "krait", "denver"
+      "cortex-a7", "cortex-a15", "krait", "denver", "cortex-a53", "cortex-a57", "cortex-a53.a57"
   };
   bool has_lpae = FindVariantInArray(arm_variants_with_lpae, arraysize(arm_variants_with_lpae),
                                      variant);
@@ -166,7 +166,7 @@
   bool has_div = false;
   bool has_lpae = false;
 
-#if defined(__ANDROID__) && defined(__arm__)
+#if defined(ART_TARGET_ANDROID) && defined(__arm__)
   uint64_t hwcaps = getauxval(AT_HWCAP);
   LOG(INFO) << "hwcaps=" << hwcaps;
   if ((hwcaps & HWCAP_IDIVT) != 0) {
@@ -206,6 +206,7 @@
   struct sigaction sa, osa;
   sa.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO;
   sa.sa_sigaction = bad_divide_inst_handle;
+  sigemptyset(&sa.sa_mask);
   sigaction(SIGILL, &sa, &osa);
 
   bool has_div = false;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 631b784..e25e93f 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -42,30 +42,31 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      */
-.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME rTemp1, rTemp2
+.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME rTemp
     SPILL_ALL_CALLEE_SAVE_GPRS                    @ 9 words (36 bytes) of callee saves.
     vpush {s16-s31}                               @ 16 words (64 bytes) of floats.
     .cfi_adjust_cfa_offset 64
     sub sp, #12                                   @ 3 words of space, bottom word will hold Method*
     .cfi_adjust_cfa_offset 12
-    RUNTIME_CURRENT1 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
-    ldr \rTemp1, [\rTemp1, #RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kSaveAll Method*.
-    str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
+    RUNTIME_CURRENT1 \rTemp                       @ Load Runtime::Current into rTemp.
+    @ Load kSaveAllCalleeSaves Method* into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
+    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 
      // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 36 + 64 + 12)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(ARM) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 36 + 64 + 12)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM) size not as expected."
 #endif
 .endm
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly).
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
      */
-.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME rTemp1, rTemp2
+.macro SETUP_SAVE_REFS_ONLY_FRAME rTemp
     push {r5-r8, r10-r11, lr}                     @ 7 words of callee saves
     .cfi_adjust_cfa_offset 28
     .cfi_rel_offset r5, 0
@@ -77,18 +78,19 @@
     .cfi_rel_offset lr, 24
     sub sp, #4                                    @ bottom word will hold Method*
     .cfi_adjust_cfa_offset 4
-    RUNTIME_CURRENT2 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
-    ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kRefsOnly Method*.
-    str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
+    RUNTIME_CURRENT2 \rTemp                       @ Load Runtime::Current into rTemp.
+    @ Load kSaveRefsOnly Method* into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
+    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 28 + 4)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 28 + 4)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM) size not as expected."
 #endif
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME
     add sp, #4               @ bottom word holds Method*
     .cfi_adjust_cfa_offset -4
     pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
@@ -102,16 +104,16 @@
     .cfi_adjust_cfa_offset -28
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bx  lr                   @ return
 .endm
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs).
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
     push {r1-r3, r5-r8, r10-r11, lr}   @ 10 words of callee saves and args.
     .cfi_adjust_cfa_offset 40
     .cfi_rel_offset r1, 0
@@ -126,30 +128,30 @@
     .cfi_rel_offset lr, 36
     vpush {s0-s15}                     @ 16 words of float args.
     .cfi_adjust_cfa_offset 64
-    sub sp, #8                         @ 2 words of space, bottom word will hold Method*
+    sub sp, #8                         @ 2 words of space, alignment padding and Method*
     .cfi_adjust_cfa_offset 8
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 40 + 64 + 8)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 40 + 64 + 8)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM) size not as expected."
 #endif
 .endm
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME rTemp1, rTemp2
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
-    RUNTIME_CURRENT3 \rTemp1, \rTemp2  @ Load Runtime::Current into rTemp1.
-     @ rTemp1 is kRefsAndArgs Method*.
-    ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET]
-    str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME rTemp
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
+    RUNTIME_CURRENT3 \rTemp                       @ Load Runtime::Current into rTemp.
+    @ Load kSaveRefsAndArgs Method* into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
+    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
-    str r0, [sp, #0]                   @ Store ArtMethod* to bottom of stack.
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
+    str r0, [sp, #0]                              @ Store ArtMethod* to bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
     add  sp, #8                      @ rewind sp
     .cfi_adjust_cfa_offset -8
     vpop {s0-s15}
@@ -168,6 +170,65 @@
     .cfi_adjust_cfa_offset -40
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME rTemp
+    push {r0-r12, lr}                   @ 14 words of callee saves and args.
+    .cfi_adjust_cfa_offset 56
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset r2, 8
+    .cfi_rel_offset r3, 12
+    .cfi_rel_offset r4, 16
+    .cfi_rel_offset r5, 20
+    .cfi_rel_offset r6, 24
+    .cfi_rel_offset r7, 28
+    .cfi_rel_offset r8, 32
+    .cfi_rel_offset r9, 36
+    .cfi_rel_offset r10, 40
+    .cfi_rel_offset r11, 44
+    .cfi_rel_offset ip, 48
+    .cfi_rel_offset lr, 52
+    vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
+    .cfi_adjust_cfa_offset 128
+    sub sp, #8                          @ 2 words of space, alignment padding and Method*
+    .cfi_adjust_cfa_offset 8
+    RUNTIME_CURRENT1 \rTemp             @ Load Runtime::Current into rTemp.
+    @ Load kSaveEverything Method* into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
+    str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
+    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
+
+    // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8)
+#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected."
+#endif
+.endm
+
+.macro RESTORE_SAVE_EVERYTHING_FRAME
+    add  sp, #8                         @ rewind sp
+    .cfi_adjust_cfa_offset -8
+    vpop {d0-d15}
+    .cfi_adjust_cfa_offset -128
+    pop {r0-r12, lr}                    @ 14 words of callee saves
+    .cfi_restore r0
+    .cfi_restore r1
+    .cfi_restore r2
+    .cfi_restore r3
+    .cfi_restore r5
+    .cfi_restore r6
+    .cfi_restore r7
+    .cfi_restore r8
+    .cfi_restore r9
+    .cfi_restore r10
+    .cfi_restore r11
+    .cfi_restore r12
+    .cfi_restore lr
+    .cfi_adjust_cfa_offset -56
+.endm
+
 .macro RETURN_IF_RESULT_IS_ZERO
     cbnz   r0, 1f              @ result non-zero branch over
     bx     lr                  @ return
@@ -187,7 +248,7 @@
 .macro DELIVER_PENDING_EXCEPTION
     .fnend
     .fnstart
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1    @ save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save callee saves for throw
     mov    r0, r9                              @ pass Thread::Current
     b      artDeliverPendingExceptionFromCode  @ artDeliverPendingExceptionFromCode(Thread*)
 .endm
@@ -195,7 +256,7 @@
 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  r0, r1 // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save all registers as basis for long jump context
     mov r0, r9                      @ pass Thread::Current
     b   \cxx_name                   @ \cxx_name(Thread*)
 END \c_name
@@ -204,7 +265,7 @@
 .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r1, r2  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r1       @ save all registers as basis for long jump context
     mov r1, r9                      @ pass Thread::Current
     b   \cxx_name                   @ \cxx_name(Thread*)
 END \c_name
@@ -213,7 +274,7 @@
 .macro TWO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  r2, r3  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
     b   \cxx_name                   @ \cxx_name(Thread*)
 END \c_name
@@ -245,11 +306,11 @@
 .macro  ONE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2  @ save callee saves in case of GC
-    ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    SETUP_SAVE_REFS_ONLY_FRAME r1        @ save callee saves in case of GC
+    ldr    r1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     mov    r2, r9                        @ pass Thread::Current
     bl     \entrypoint                   @ (uint32_t field_idx, const Method* referrer, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -257,11 +318,11 @@
 .macro  TWO_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case of GC
-    ldr    r2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
+    ldr    r2, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     mov    r3, r9                        @ pass Thread::Current
     bl     \entrypoint                   @ (field_idx, Object*, referrer, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -269,14 +330,14 @@
 .macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r3, r12  @ save callee saves in case of GC
-    ldr    r3, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    SETUP_SAVE_REFS_ONLY_FRAME r3        @ save callee saves in case of GC
+    ldr    r3, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     str    r9, [sp, #-16]!               @ expand the frame and pass Thread::Current
     .cfi_adjust_cfa_offset 16
     bl     \entrypoint                   @ (field_idx, Object*, new_val, referrer, Thread*)
     add    sp, #16                       @ release out args
     .cfi_adjust_cfa_offset -16
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   @ TODO: we can clearly save an add here
+    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
     \return
 END \name
 .endm
@@ -293,6 +354,11 @@
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
@@ -304,6 +370,12 @@
 TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
+     */
+TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_string_bounds, artThrowStringBoundsFromCode
+
+    /*
      * Called by managed code to create and deliver a StackOverflowError.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
@@ -330,12 +402,12 @@
      */
 .macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME r2     @ save callee saves in case allocation triggers GC
     mov    r2, r9                         @ pass Thread::Current
     mov    r3, sp
     bl     \cxx_name                      @ (method_idx, this, Thread*, SP)
     mov    r12, r1                        @ save Method*->code_
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     cbz    r0, 1f                         @ did we find the target? if not go to exception delivery
     bx     r12                            @ tail call to target
 1:
@@ -429,6 +501,56 @@
 END art_quick_invoke_stub_internal
 
     /*
+     * On stack replacement stub.
+     * On entry:
+     *   r0 = stack to copy
+     *   r1 = size of stack
+     *   r2 = pc to call
+     *   r3 = JValue* result
+     *   [sp] = shorty
+     *   [sp + 4] = thread
+     */
+ENTRY art_quick_osr_stub
+    SPILL_ALL_CALLEE_SAVE_GPRS             @ Spill regs (9)
+    mov    r11, sp                         @ Save the stack pointer
+    mov    r10, r1                         @ Save size of stack
+    ldr    r9, [r11, #40]                  @ Move managed thread pointer into r9
+    mov    r8, r2                          @ Save the pc to call
+    sub    r7, sp, #12                     @ Reserve space for stack pointer,
+                                           @    JValue* result, and ArtMethod* slot.
+    and    r7, #0xFFFFFFF0                 @ Align stack pointer
+    mov    sp, r7                          @ Update stack pointer
+    str    r11, [sp, #4]                   @ Save old stack pointer
+    str    r3, [sp, #8]                    @ Save JValue* result
+    mov    ip, #0
+    str    ip, [sp]                        @ Store null for ArtMethod* at bottom of frame
+    sub    sp, sp, r1                      @ Reserve space for callee stack
+    mov    r2, r1
+    mov    r1, r0
+    mov    r0, sp
+    bl     memcpy                          @ memcpy (dest r0, src r1, bytes r2)
+    bl     .Losr_entry                     @ Call the method
+    ldr    r10, [sp, #8]                   @ Restore JValue* result
+    ldr    sp, [sp, #4]                    @ Restore saved stack pointer
+    ldr    r4, [sp, #36]                   @ load shorty
+    ldrb   r4, [r4, #0]                    @ load return type
+    cmp    r4, #68                         @ Test if result type char == 'D'.
+    beq    .Losr_fp_result
+    cmp    r4, #70                         @ Test if result type char == 'F'.
+    beq    .Losr_fp_result
+    strd r0, [r10]                         @ Store r0/r1 into result pointer
+    b    .Losr_exit
+.Losr_fp_result:
+    vstr d0, [r10]                         @ Store s0-s1/d0 into result pointer
+.Losr_exit:
+    pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+.Losr_entry:
+    sub r10, r10, #4
+    str lr, [sp, r10]                     @ Store link register per the compiler ABI
+    bx r8
+END art_quick_osr_stub
+
+    /*
      * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_
      */
 ARM_ENTRY art_quick_do_long_jump
@@ -459,7 +581,7 @@
     ldr    r2, [r9, #THREAD_ID_OFFSET]
     ldrex  r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
     mov    r3, r1
-    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
+    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
     cbnz   r3, .Lnot_unlocked         @ already thin locked
     @ unlocked case - r1: original lock word that's zero except for the read barrier bits.
     orr    r2, r1, r2                 @ r2 holds thread id with count of 0 with preserved read barrier bits
@@ -475,9 +597,9 @@
     cbnz   r2, .Lslow_lock            @ lock word and self thread id's match -> recursive lock
                                       @ else contention, go to slow path
     mov    r3, r1                     @ copy the lock word to check count overflow.
-    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits.
+    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits.
     add    r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count in lock word placing in r2 to check overflow
-    lsr    r3, r2, LOCK_WORD_READ_BARRIER_STATE_SHIFT  @ if either of the upper two bits (28-29) are set, we overflowed.
+    lsr    r3, r2, #LOCK_WORD_GC_STATE_SHIFT    @ if the first gc state bit is set, we overflowed.
     cbnz   r3, .Lslow_lock            @ if we overflow the count go slow path
     add    r2, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count for real
     strex  r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits
@@ -486,14 +608,23 @@
 .Llock_strex_fail:
     b      .Lretry_lock               @ retry
 .Lslow_lock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2  @ save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
     mov    r1, r9                     @ pass Thread::Current
     bl     artLockObjectFromCode      @ (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_lock_object
 
+ENTRY art_quick_lock_object_no_inline
+    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
+    mov    r1, r9                     @ pass Thread::Current
+    bl     artLockObjectFromCode      @ (Object* obj, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_lock_object_no_inline
+
     /*
      * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
      * r0 holds the possibly null object to lock.
@@ -511,17 +642,17 @@
     cbnz   r2, .Lslow_unlock          @ if either of the top two bits are set, go slow path
     ldr    r2, [r9, #THREAD_ID_OFFSET]
     mov    r3, r1                     @ copy lock word to check thread id equality
-    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
+    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
     eor    r3, r3, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
     uxth   r3, r3                     @ zero top 16 bits
     cbnz   r3, .Lslow_unlock          @ do lock word and self thread id's match?
     mov    r3, r1                     @ copy lock word to detect transition to unlocked
-    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
+    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
     cmp    r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
     bpl    .Lrecursive_thin_unlock
     @ transition to unlocked
     mov    r3, r1
-    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK  @ r3: zero except for the preserved read barrier bits
+    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED  @ r3: zero except for the preserved gc bits
     dmb    ish                        @ full (LoadStore|StoreStore) memory barrier
 #ifndef USE_READ_BARRIER
     str    r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
@@ -543,14 +674,24 @@
     b      .Lretry_unlock             @ retry
 .Lslow_unlock:
     @ save callee saves in case exception allocation triggers GC
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2
+    SETUP_SAVE_REFS_ONLY_FRAME r1
     mov    r1, r9                     @ pass Thread::Current
     bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_unlock_object
 
+ENTRY art_quick_unlock_object_no_inline
+    @ save callee saves in case exception allocation triggers GC
+    SETUP_SAVE_REFS_ONLY_FRAME r1
+    mov    r1, r9                     @ pass Thread::Current
+    bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_unlock_object_no_inline
+
     /*
      * Entry from managed code that calls artIsAssignableFromCode and on failure calls
      * artThrowClassCastException.
@@ -578,7 +719,7 @@
     .cfi_restore r0
     .cfi_restore r1
     .cfi_restore lr
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r2, r3  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
     b   artThrowClassCastException  @ (Class*, Class*, Thread*)
     bkpt
@@ -592,6 +733,12 @@
     .endif
 .endm
 
+// Save rReg's value to [sp, #offset].
+.macro PUSH_REG rReg, offset
+    str \rReg, [sp, #\offset]       @ save rReg
+    .cfi_rel_offset \rReg, \offset
+.endm
+
     /*
      * Macro to insert read barrier, only used in art_quick_aput_obj.
      * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
@@ -714,7 +861,7 @@
 .Lthrow_array_store_exception:
     pop {r0-r2, lr}
     /* No need to repeat restore cfi directives, the ones above apply here. */
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r3, ip
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r3
     mov r1, r2
     mov r2, r9                     @ pass Thread::Current
     b artThrowArrayStoreException  @ (Class*, Class*, Thread*)
@@ -725,10 +872,10 @@
 .macro ONE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r1, r2  @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case of GC
     mov    r1, r9                     @ pass Thread::Current
     bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -737,10 +884,10 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3  @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
     mov    r2, r9                     @ pass Thread::Current
     bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -749,11 +896,11 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r3, r12  @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r3     @ save callee saves in case of GC
     mov    r3, r9                     @ pass Thread::Current
     @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*)
     bl     \entrypoint
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -762,14 +909,13 @@
 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r3, r12  @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r12    @ save callee saves in case of GC
     str    r9, [sp, #-16]!            @ expand the frame and pass Thread::Current
-    .pad #16
     .cfi_adjust_cfa_offset 16
     bl     \entrypoint
     add    sp, #16                    @ strip the extra frame
     .cfi_adjust_cfa_offset -16
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -792,12 +938,12 @@
      */
     .extern artGet64StaticFromCode
 ENTRY art_quick_get64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case of GC
-    ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
+    ldr    r1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     mov    r2, r9                        @ pass Thread::Current
     bl     artGet64StaticFromCode        @ (uint32_t field_idx, const Method* referrer, Thread*)
     ldr    r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     cbnz   r2, 1f                        @ success if no exception pending
     bx     lr                            @ return on success
 1:
@@ -818,12 +964,12 @@
      */
     .extern artGet64InstanceFromCode
 ENTRY art_quick_get64_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3  @ save callee saves in case of GC
-    ldr    r2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
+    ldr    r2, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     mov    r3, r9                        @ pass Thread::Current
     bl     artGet64InstanceFromCode      @ (field_idx, Object*, referrer, Thread*)
     ldr    r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     cbnz   r2, 1f                        @ success if no exception pending
     bx     lr                            @ return on success
 1:
@@ -843,15 +989,15 @@
      */
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r12   @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r1        @ save callee saves in case of GC
                                          @ r2:r3 contain the wide argument
-    ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    ldr    r1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     str    r9, [sp, #-16]!               @ expand the frame and pass Thread::Current
     .cfi_adjust_cfa_offset 16
     bl     artSet64StaticFromCode        @ (field_idx, referrer, new_val, Thread*)
     add    sp, #16                       @ release out args
     .cfi_adjust_cfa_offset -16
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  @ TODO: we can clearly save an add here
+    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_set64_static
@@ -868,9 +1014,9 @@
      */
     .extern artSet64InstanceFromCode
 ENTRY art_quick_set64_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r12, lr  @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r12       @ save callee saves in case of GC
                                          @ r2:r3 contain the wide argument
-    ldr    r12, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    ldr    r12, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     str    r9, [sp, #-12]!               @ expand the frame and pass Thread::Current
     .cfi_adjust_cfa_offset 12
     str    r12, [sp, #-4]!               @ expand the frame and pass the referrer
@@ -878,21 +1024,59 @@
     bl     artSet64InstanceFromCode      @ (field_idx, Object*, new_val, Method* referrer, Thread*)
     add    sp, #16                       @ release out args
     .cfi_adjust_cfa_offset -16
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  @ TODO: we can clearly save an add here
+    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_set64_instance
 
     /*
-     * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
-     * exception on error. On success the String is returned. R0 holds the string index. The fast
-     * path check for hit in strings cache has already been performed.
+     * Entry from managed code to resolve a string, this stub will
+     * check the dex cache for a matching string (the fast path), and if not found,
+     * it will allocate a String and deliver an exception on error.
+     * On success the String is returned. R0 holds the string index.
      */
-ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+ENTRY art_quick_resolve_string
+    ldr    r1, [sp]                                              @ load referrer
+    ldr    r1, [r1, #ART_METHOD_DECLARING_CLASS_OFFSET]          @ load declaring class
+    ldr    r1, [r1, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET]   @ load string dex cache
+    ubfx   r2, r0, #0, #STRING_DEX_CACHE_HASH_BITS
+    add    r1, r1, r2, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT
+    ldrd   r2, r3, [r1]                                    @ load index into r3 and pointer into r2
+    cmp    r0, r3
+    bne    .Lart_quick_resolve_string_slow_path
+#ifdef USE_READ_BARRIER
+    ldr    r3, [rSELF, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   r3, .Lart_quick_resolve_string_marking
+#endif
+    mov    r0, r2
+    bx     lr
+// Slow path case, the index did not match
+.Lart_quick_resolve_string_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME r2                    @ save callee saves in case of GC
+    mov    r1, r9                                    @ pass Thread::Current
+    mov    r3, sp
+    bl     artResolveStringFromCode                  @ (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+// GC is marking case, need to check the mark bit.
+.Lart_quick_resolve_string_marking:
+    ldr    r3, [r2, MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    tst    r3, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+    mov    r0, r2
+    bne    .Lart_quick_resolve_string_no_rb
+    push   {r1, r2, r3, lr}                          @ Save x1, LR
+    .cfi_adjust_cfa_offset 16
+    bl     artReadBarrierMark                        @ Get the marked string back.
+    pop    {r1, r2, r3, lr}                          @ Restore registers.
+    .cfi_adjust_cfa_offset -16
+.Lart_quick_resolve_string_no_rb:
+    bx     lr
+END art_quick_resolve_string
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 ENTRY art_quick_alloc_object_rosalloc
     // Fast path rosalloc allocation.
@@ -963,6 +1147,18 @@
 #endif
     POISON_HEAP_REF r2
     str    r2, [r3, #MIRROR_OBJECT_CLASS_OFFSET]
+                                                              // Fence. This is "ish" not "ishst" so
+                                                              // that it also ensures ordering of
+                                                              // the class status load with respect
+                                                              // to later accesses to the class
+                                                              // object. Alternatively we could use
+                                                              // "ishst" if we use load-acquire for
+                                                              // the class status load.)
+                                                              // Needs to be done before pushing on
+                                                              // allocation since Heap::VisitObjects
+                                                              // relies on seeing the class pointer.
+                                                              // b/28790624
+    dmb    ish
                                                               // Push the new object onto the thread
                                                               // local allocation stack and
                                                               // increment the thread local
@@ -977,6 +1173,71 @@
                                                               // and the list head store above using
                                                               // strd.
     str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
+
+    mov    r0, r3                                             // Set the return value and return.
+    bx     lr
+
+.Lart_quick_alloc_object_rosalloc_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
+    mov    r2, r9                     @ pass Thread::Current
+    bl     artAllocObjectFromCodeRosAlloc     @ (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END art_quick_alloc_object_rosalloc
+
+// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+//
+// r0: type_idx/return value, r1: ArtMethod*, r2: class, r9: Thread::Current, r3, r12: free.
+// Need to preserve r0 and r1 to the slow path.
+.macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel
+    cbz    r2, \slowPathLabel                                 // Check null class
+                                                              // Check class status.
+    ldr    r3, [r2, #MIRROR_CLASS_STATUS_OFFSET]
+    cmp    r3, #MIRROR_CLASS_STATUS_INITIALIZED
+    bne    \slowPathLabel
+                                                              // Add a fake dependence from the
+                                                              // following access flag and size
+                                                              // loads to the status load.
+                                                              // This is to prevent those loads
+                                                              // from being reordered above the
+                                                              // status load and reading wrong
+                                                              // values (an alternative is to use
+                                                              // a load-acquire for the status).
+    eor    r3, r3, r3
+    add    r2, r2, r3
+                                                              // Check access flags has
+                                                              // kAccClassIsFinalizable.
+    ldr    r3, [r2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
+    tst    r3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE
+    bne    \slowPathLabel
+                                                              // Load thread_local_pos (r12) and
+                                                              // thread_local_end (r3) with ldrd.
+                                                              // Check constraints for ldrd.
+#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
+#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
+#endif
+    ldrd   r12, r3, [r9, #THREAD_LOCAL_POS_OFFSET]
+    sub    r12, r3, r12                                       // Compute the remaining buf size.
+    ldr    r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET]         // Load the object size (r3).
+    cmp    r3, r12                                            // Check if it fits. OK to do this
+                                                              // before rounding up the object size
+                                                              // assuming the buf size alignment.
+    bhi    \slowPathLabel
+    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
+                                                              // Round up the object size by the
+                                                              // object alignment. (addr + 7) & ~7.
+    add    r3, r3, #OBJECT_ALIGNMENT_MASK
+    and    r3, r3, #OBJECT_ALIGNMENT_MASK_TOGGLED
+                                                              // Reload old thread_local_pos (r0)
+                                                              // for the return value.
+    ldr    r0, [r9, #THREAD_LOCAL_POS_OFFSET]
+    add    r1, r0, r3
+    str    r1, [r9, #THREAD_LOCAL_POS_OFFSET]                 // Store new thread_local_pos.
+    ldr    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]             // Increment thread_local_objects.
+    add    r1, r1, #1
+    str    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]
+    POISON_HEAP_REF r2
+    str    r2, [r0, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
                                                               // Fence. This is "ish" not "ishst" so
                                                               // that the code after this allocation
                                                               // site will see the right values in
@@ -985,16 +1246,70 @@
                                                               // if we use load-acquire for the
                                                               // class status load.)
     dmb    ish
-    mov    r0, r3                                             // Set the return value and return.
     bx     lr
+.endm
 
-.Lart_quick_alloc_object_rosalloc_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3  @ save callee saves in case of GC
-    mov    r2, r9                     @ pass Thread::Current
-    bl     artAllocObjectFromCodeRosAlloc     @ (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+ENTRY art_quick_alloc_object_tlab
+    // Fast path tlab allocation.
+    // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current
+    // r2, r3, r12: free.
+#if defined(USE_READ_BARRIER)
+    mvn    r0, #0                                             // Read barrier not supported here.
+    bx     lr                                                 // Return -1.
+#endif
+    ldr    r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32]    // Load dex cache resolved types array
+                                                              // Load the class (r2)
+    ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
+.Lart_quick_alloc_object_tlab_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME r2                             // Save callee saves in case of GC.
+    mov    r2, r9                                             // Pass Thread::Current.
+    bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_rosalloc
+END art_quick_alloc_object_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+ENTRY art_quick_alloc_object_region_tlab
+    // Fast path tlab allocation.
+    // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current, r2, r3, r12: free.
+#if !defined(USE_READ_BARRIER)
+    eor    r0, r0, r0                                         // Read barrier must be enabled here.
+    sub    r0, r0, #1                                         // Return -1.
+    bx     lr
+#endif
+    ldr    r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32]    // Load dex cache resolved types array
+                                                              // Load the class (r2)
+    ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+                                                              // Read barrier for class load.
+    ldr    r3, [r9, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   r3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking:
+    cbz    r2, .Lart_quick_alloc_object_region_tlab_slow_path  // Null check for loading lock word.
+    // Check lock word for mark bit, if marked do the allocation.
+    ldr r3, [r2, MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    ands r3, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+    bne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+                                                              // The read barrier slow path. Mark
+                                                              // the class.
+    push   {r0, r1, r3, lr}                                   // Save registers. r3 is pushed only
+                                                              // to align sp by 16 bytes.
+    mov    r0, r2                                             // Pass the class as the first param.
+    bl     artReadBarrierMark
+    mov    r2, r0                                             // Get the (marked) class back.
+    pop    {r0, r1, r3, lr}
+    b      .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME r2                             // Save callee saves in case of GC.
+    mov    r2, r9                                             // Pass Thread::Current.
+    bl     artAllocObjectFromCodeRegionTLAB    // (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END art_quick_alloc_object_region_tlab
 
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
@@ -1002,24 +1317,25 @@
     .extern artTestSuspendFromCode
 ENTRY art_quick_test_suspend
 #ifdef ARM_R4_SUSPEND_FLAG
-    ldrh   r0, [rSELF, #THREAD_FLAGS_OFFSET]
-    mov    rSUSPEND, #SUSPEND_CHECK_INTERVAL  @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
-    cbnz   r0, 1f                             @ check Thread::Current()->suspend_count_ == 0
-    bx     lr                                 @ return if suspend_count_ == 0
+    ldrh   rSUSPEND, [rSELF, #THREAD_FLAGS_OFFSET]
+    cbnz   rSUSPEND, 1f                         @ check Thread::Current()->suspend_count_ == 0
+    mov    rSUSPEND, #SUSPEND_CHECK_INTERVAL    @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
+    bx     lr                                   @ return if suspend_count_ == 0
 1:
+    mov    rSUSPEND, #SUSPEND_CHECK_INTERVAL    @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
 #endif
+    SETUP_SAVE_EVERYTHING_FRAME r0              @ save everything for GC stack crawl
     mov    r0, rSELF
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2   @ save callee saves for GC stack crawl
-    @ TODO: save FPRs to enable access in the debugger?
-    bl     artTestSuspendFromCode             @ (Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    bl     artTestSuspendFromCode               @ (Thread*)
+    RESTORE_SAVE_EVERYTHING_FRAME
+    bx     lr
 END art_quick_test_suspend
 
 ENTRY art_quick_implicit_suspend
     mov    r0, rSELF
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2   @ save callee saves for stack crawl
+    SETUP_SAVE_REFS_ONLY_FRAME r1             @ save callee saves for stack crawl
     bl     artTestSuspendFromCode             @ (Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
 END art_quick_implicit_suspend
 
     /*
@@ -1029,15 +1345,15 @@
      */
      .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
     mov     r2, r9                 @ pass Thread::Current
     mov     r3, sp                 @ pass SP
     blx     artQuickProxyInvokeHandler  @ (Method* proxy method, receiver, Thread*, SP)
     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
     // Tear down the callee-save frame. Skip arg registers.
-    add     sp, #(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
-    .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
+    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
+    RESTORE_SAVE_REFS_ONLY_FRAME
     cbnz    r2, 1f                 @ success if no exception is pending
     vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
     bx      lr                     @ return on success
@@ -1046,27 +1362,51 @@
 END art_quick_proxy_invoke_handler
 
     /*
-     * Called to resolve an imt conflict. r12 is a hidden argument that holds the target method's
-     * dex method index.
+     * Called to resolve an imt conflict.
+     * r0 is the conflict ArtMethod.
+     * r12 is a hidden argument that holds the target interface method's dex method index.
+     *
+     * Note that this stub writes to r0, r4, and r12.
      */
 ENTRY art_quick_imt_conflict_trampoline
-    mov    r0, r12
+    ldr r4, [sp, #0]  // Load referrer
+    ldr r4, [r4, #ART_METHOD_DEX_CACHE_METHODS_OFFSET_32]   // Load dex cache methods array
+    ldr r12, [r4, r12, lsl #POINTER_SIZE_SHIFT]  // Load interface method
+    ldr r0, [r0, #ART_METHOD_JNI_OFFSET_32]  // Load ImtConflictTable
+    ldr r4, [r0]  // Load first entry in ImtConflictTable.
+.Limt_table_iterate:
+    cmp r4, r12
+    // Branch if found. Benchmarks have shown doing a branch here is better.
+    beq .Limt_table_found
+    // If the entry is null, the interface method is not in the ImtConflictTable.
+    cbz r4, .Lconflict_trampoline
+    // Iterate over the entries of the ImtConflictTable.
+    ldr r4, [r0, #(2 * __SIZEOF_POINTER__)]!
+    b .Limt_table_iterate
+.Limt_table_found:
+    // We successfully hit an entry in the table. Load the target method
+    // and jump to it.
+    ldr r0, [r0, #__SIZEOF_POINTER__]
+    ldr pc, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
+.Lconflict_trampoline:
+    // Call the runtime stub to populate the ImtConflictTable and jump to the
+    // resolved method.
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
     .extern artQuickResolutionTrampoline
 ENTRY art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2, r3
+    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
     mov     r2, r9                 @ pass Thread::Current
     mov     r3, sp                 @ pass SP
     blx     artQuickResolutionTrampoline  @ (Method* called, receiver, Thread*, SP)
     cbz     r0, 1f                 @ is code pointer null? goto exception
     mov     r12, r0
     ldr  r0, [sp, #0]              @ load resolved method in r0
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     bx      r12                    @ tail-call into actual code
 1:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
@@ -1074,7 +1414,7 @@
      * Called to do a generic JNI down-call
      */
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
 
     // Save rSELF
     mov r11, rSELF
@@ -1141,16 +1481,16 @@
     .cfi_def_cfa_register sp
 
     // Tear down the callee-save frame. Skip arg registers.
-    add     sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-    .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    add     sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
+    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY)
+    RESTORE_SAVE_REFS_ONLY_FRAME
 
     // store into fpr, for when it's a fpr return...
     vmov d0, r0, r1
     bx lr      // ret
     // Undo the unwinding information from above since it doesn't apply below.
     .cfi_def_cfa_register r10
-    .cfi_adjust_cfa_offset FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
+    .cfi_adjust_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
 
 .Lexception_in_native:
     ldr sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]
@@ -1161,15 +1501,15 @@
 
     .extern artQuickToInterpreterBridge
 ENTRY art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r1, r2
+    SETUP_SAVE_REFS_AND_ARGS_FRAME r1
     mov     r1, r9                 @ pass Thread::Current
     mov     r2, sp                 @ pass SP
     blx     artQuickToInterpreterBridge    @ (Method* method, Thread*, SP)
     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
     // Tear down the callee-save frame. Skip arg registers.
-    add     sp, #(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
-    .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
+    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
+    RESTORE_SAVE_REFS_ONLY_FRAME
     cbnz    r2, 1f                 @ success if no exception is pending
     vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
     bx      lr                     @ return on success
@@ -1184,22 +1524,22 @@
     .extern artInstrumentationMethodExitFromCode
 ENTRY art_quick_instrumentation_entry
     @ Make stack crawlable and clobber r2 and r3 (post saving)
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2, r3
-    @ preserve r0 (not normally an arg) knowing there is a spare slot in kRefsAndArgs.
+    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
+    @ preserve r0 (not normally an arg) knowing there is a spare slot in kSaveRefsAndArgs.
     str   r0, [sp, #4]
     mov   r2, r9         @ pass Thread::Current
     mov   r3, lr         @ pass LR
     blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, LR)
     mov   r12, r0        @ r12 holds reference to code
     ldr   r0, [sp, #4]   @ restore r0
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     blx   r12            @ call method with lr set to art_quick_instrumentation_exit
 @ Deliberate fall-through into art_quick_instrumentation_exit.
     .type art_quick_instrumentation_exit, #function
     .global art_quick_instrumentation_exit
 art_quick_instrumentation_exit:
     mov   lr, #0         @ link register is to here, so clobber with 0 for later checks
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3  @ set up frame knowing r2 and r3 must be dead on exit
+    SETUP_SAVE_REFS_ONLY_FRAME r2  @ set up frame knowing r2 and r3 must be dead on exit
     mov   r12, sp        @ remember bottom of caller's frame
     push  {r0-r1}        @ save return value
     .cfi_adjust_cfa_offset 8
@@ -1238,7 +1578,7 @@
      */
     .extern artDeoptimize
 ENTRY art_quick_deoptimize
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0
     mov    r0, r9         @ Set up args.
     blx    artDeoptimize  @ artDeoptimize(Thread*)
 END art_quick_deoptimize
@@ -1249,7 +1589,7 @@
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0
     mov    r0, r9                         @ Set up args.
     blx    artDeoptimizeFromCompiledCode  @ artDeoptimizeFromCompiledCode(Thread*)
 END art_quick_deoptimize_from_compiled_code
@@ -1457,145 +1797,6 @@
     pop {r4, r10-r11, pc}
 END art_quick_indexof
 
-   /*
-     * String's compareTo.
-     *
-     * Requires rARG0/rARG1 to have been previously checked for null.  Will
-     * return negative if this's string is < comp, 0 if they are the
-     * same and positive if >.
-     *
-     * On entry:
-     *    r0:   this object pointer
-     *    r1:   comp object pointer
-     *
-     */
-    .extern __memcmp16
-ENTRY art_quick_string_compareto
-    mov    r2, r0         @ this to r2, opening up r0 for return value
-    sub    r0, r2, r1     @ Same?
-    cbnz   r0,1f
-    bx     lr
-1:                        @ Same strings, return.
-
-    push {r4, r7-r12, lr} @ 8 words - keep alignment
-    .cfi_adjust_cfa_offset 32
-    .cfi_rel_offset r4, 0
-    .cfi_rel_offset r7, 4
-    .cfi_rel_offset r8, 8
-    .cfi_rel_offset r9, 12
-    .cfi_rel_offset r10, 16
-    .cfi_rel_offset r11, 20
-    .cfi_rel_offset r12, 24
-    .cfi_rel_offset lr, 28
-
-    ldr    r7, [r2, #MIRROR_STRING_COUNT_OFFSET]
-    ldr    r10, [r1, #MIRROR_STRING_COUNT_OFFSET]
-    add    r2, #MIRROR_STRING_VALUE_OFFSET
-    add    r1, #MIRROR_STRING_VALUE_OFFSET
-
-    /*
-     * At this point, we have:
-     *    value:  r2/r1
-     *    offset: r4/r9
-     *    count:  r7/r10
-     * We're going to compute
-     *    r11 <- countDiff
-     *    r10 <- minCount
-     */
-     subs  r11, r7, r10
-     it    ls
-     movls r10, r7
-
-     /*
-      * Note: data pointers point to previous element so we can use pre-index
-      * mode with base writeback.
-      */
-     subs  r2, #2   @ offset to contents[-1]
-     subs  r1, #2   @ offset to contents[-1]
-
-     /*
-      * At this point we have:
-      *   r2: *this string data
-      *   r1: *comp string data
-      *   r10: iteration count for comparison
-      *   r11: value to return if the first part of the string is equal
-      *   r0: reserved for result
-      *   r3, r4, r7, r8, r9, r12 available for loading string data
-      */
-
-    subs  r10, #2
-    blt   .Ldo_remainder2
-
-      /*
-       * Unroll the first two checks so we can quickly catch early mismatch
-       * on long strings (but preserve incoming alignment)
-       */
-
-    ldrh  r3, [r2, #2]!
-    ldrh  r4, [r1, #2]!
-    ldrh  r7, [r2, #2]!
-    ldrh  r8, [r1, #2]!
-    subs  r0, r3, r4
-    it    eq
-    subseq  r0, r7, r8
-    bne   .Ldone
-    cmp   r10, #28
-    bgt   .Ldo_memcmp16
-    subs  r10, #3
-    blt   .Ldo_remainder
-
-.Lloopback_triple:
-    ldrh  r3, [r2, #2]!
-    ldrh  r4, [r1, #2]!
-    ldrh  r7, [r2, #2]!
-    ldrh  r8, [r1, #2]!
-    ldrh  r9, [r2, #2]!
-    ldrh  r12,[r1, #2]!
-    subs  r0, r3, r4
-    it    eq
-    subseq  r0, r7, r8
-    it    eq
-    subseq  r0, r9, r12
-    bne   .Ldone
-    subs  r10, #3
-    bge   .Lloopback_triple
-
-.Ldo_remainder:
-    adds  r10, #3
-    beq   .Lreturn_diff
-
-.Lloopback_single:
-    ldrh  r3, [r2, #2]!
-    ldrh  r4, [r1, #2]!
-    subs  r0, r3, r4
-    bne   .Ldone
-    subs  r10, #1
-    bne   .Lloopback_single
-
-.Lreturn_diff:
-    mov   r0, r11
-    pop   {r4, r7-r12, pc}
-
-.Ldo_remainder2:
-    adds  r10, #2
-    bne   .Lloopback_single
-    mov   r0, r11
-    pop   {r4, r7-r12, pc}
-
-    /* Long string case */
-.Ldo_memcmp16:
-    mov   r7, r11
-    add   r0, r2, #2
-    add   r1, r1, #2
-    mov   r2, r10
-    bl    __memcmp16
-    cmp   r0, #0
-    it    eq
-    moveq r0, r7
-.Ldone:
-    pop   {r4, r7-r12, pc}
-END art_quick_string_compareto
-
     /* Assembly routines used to handle ABI differences. */
 
     /* double fmod(double a, double b) */
@@ -1629,7 +1830,7 @@
     add   sp, #4
     .cfi_adjust_cfa_offset -4
     pop   {pc}
-END art_quick_fmod
+END art_quick_fmodf
 
     /* int64_t art_d2l(double d) */
     .extern art_d2l
@@ -1659,3 +1860,104 @@
     .cfi_adjust_cfa_offset -4
     pop   {pc}
 END art_quick_l2f
+
+.macro CONDITIONAL_CBZ reg, reg_if, dest
+.ifc \reg, \reg_if
+    cbz \reg, \dest
+.endif
+.endm
+
+.macro CONDITIONAL_CMPBZ reg, reg_if, dest
+.ifc \reg, \reg_if
+    cmp \reg, #0
+    beq \dest
+.endif
+.endm
+
+// Use CBZ if the register is in {r0, r7} otherwise compare and branch.
+.macro SMART_CBZ reg, dest
+    CONDITIONAL_CBZ \reg, r0, \dest
+    CONDITIONAL_CBZ \reg, r1, \dest
+    CONDITIONAL_CBZ \reg, r2, \dest
+    CONDITIONAL_CBZ \reg, r3, \dest
+    CONDITIONAL_CBZ \reg, r4, \dest
+    CONDITIONAL_CBZ \reg, r5, \dest
+    CONDITIONAL_CBZ \reg, r6, \dest
+    CONDITIONAL_CBZ \reg, r7, \dest
+    CONDITIONAL_CMPBZ \reg, r8, \dest
+    CONDITIONAL_CMPBZ \reg, r9, \dest
+    CONDITIONAL_CMPBZ \reg, r10, \dest
+    CONDITIONAL_CMPBZ \reg, r11, \dest
+    CONDITIONAL_CMPBZ \reg, r12, \dest
+    CONDITIONAL_CMPBZ \reg, r13, \dest
+    CONDITIONAL_CMPBZ \reg, r14, \dest
+    CONDITIONAL_CMPBZ \reg, r15, \dest
+.endm
+
+    /*
+     * Create a function `name` calling the ReadBarrier::Mark routine,
+     * getting its argument and returning its result through register
+     * `reg`, saving and restoring all caller-save registers.
+     *
+     * If `reg` is different from `r0`, the generated function follows a
+     * non-standard runtime calling convention:
+     * - register `reg` is used to pass the (sole) argument of this
+     *   function (instead of R0);
+     * - register `reg` is used to return the result of this function
+     *   (instead of R0);
+     * - R0 is treated like a normal (non-argument) caller-save register;
+     * - everything else is the same as in the standard runtime calling
+     *   convention (e.g. standard callee-save registers are preserved).
+     */
+.macro READ_BARRIER_MARK_REG name, reg
+ENTRY \name
+    // Null check so that we can load the lock word.
+    SMART_CBZ \reg, .Lret_rb_\name
+    // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked.
+    ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    ands ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+    beq .Lslow_rb_\name
+    // Already marked, return right away.
+    bx lr
+
+.Lslow_rb_\name:
+    push  {r0-r5, r9, lr}               @ save return address and core caller-save registers
+                                        @ also save callee save r5 for 16 byte alignment
+    .cfi_adjust_cfa_offset 32
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset r2, 8
+    .cfi_rel_offset r3, 12
+    .cfi_rel_offset r4, 16
+    .cfi_rel_offset r5, 20
+    .cfi_rel_offset r9, 24
+    .cfi_rel_offset lr, 28
+    vpush {s0-s15}                      @ save floating-point caller-save registers
+    .cfi_adjust_cfa_offset 64
+
+    .ifnc \reg, r0
+      mov   r0, \reg                    @ pass arg1 - obj from `reg`
+    .endif
+    bl    artReadBarrierMark            @ r0 <- artReadBarrierMark(obj)
+    mov ip, r0                          @ Save result in IP
+    vpop {s0-s15}                       @ restore floating-point registers
+    .cfi_adjust_cfa_offset -64
+    pop   {r0-r5, r9, lr}               @ restore caller-save registers
+    mov \reg, ip                        @ copy result to reg
+.Lret_rb_\name:
+    bx lr
+END \name
+.endm
+
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, r4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, r5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, r6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, r7
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
diff --git a/runtime/arch/arm/quick_method_frame_info_arm.h b/runtime/arch/arm/quick_method_frame_info_arm.h
index 5580ee4..4b23c77 100644
--- a/runtime/arch/arm/quick_method_frame_info_arm.h
+++ b/runtime/arch/arm/quick_method_frame_info_arm.h
@@ -34,6 +34,9 @@
     (1 << art::arm::R1) | (1 << art::arm::R2) | (1 << art::arm::R3);
 static constexpr uint32_t kArmCalleeSaveAllSpills =
     (1 << art::arm::R4) | (1 << art::arm::R9);
+static constexpr uint32_t kArmCalleeSaveEverythingSpills =
+    (1 << art::arm::R0) | (1 << art::arm::R1) | (1 << art::arm::R2) | (1 << art::arm::R3) |
+    (1 << art::arm::R4) | (1 << art::arm::R9) | (1 << art::arm::R12);
 
 static constexpr uint32_t kArmCalleeSaveFpAlwaysSpills = 0;
 static constexpr uint32_t kArmCalleeSaveFpRefSpills = 0;
@@ -47,23 +50,27 @@
     (1 << art::arm::S20) | (1 << art::arm::S21) | (1 << art::arm::S22) | (1 << art::arm::S23) |
     (1 << art::arm::S24) | (1 << art::arm::S25) | (1 << art::arm::S26) | (1 << art::arm::S27) |
     (1 << art::arm::S28) | (1 << art::arm::S29) | (1 << art::arm::S30) | (1 << art::arm::S31);
+static constexpr uint32_t kArmCalleeSaveFpEverythingSpills =
+    kArmCalleeSaveFpArgSpills | kArmCalleeSaveFpAllSpills;
 
 constexpr uint32_t ArmCalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
   return kArmCalleeSaveAlwaysSpills | kArmCalleeSaveRefSpills |
-      (type == Runtime::kRefsAndArgs ? kArmCalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kArmCalleeSaveAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kArmCalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kArmCalleeSaveAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kArmCalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t ArmCalleeSaveFpSpills(Runtime::CalleeSaveType type) {
   return kArmCalleeSaveFpAlwaysSpills | kArmCalleeSaveFpRefSpills |
-      (type == Runtime::kRefsAndArgs ? kArmCalleeSaveFpArgSpills: 0) |
-      (type == Runtime::kSaveAll ? kArmCalleeSaveFpAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kArmCalleeSaveFpArgSpills: 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kArmCalleeSaveFpAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kArmCalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t ArmCalleeSaveFrameSize(Runtime::CalleeSaveType type) {
   return RoundUp((POPCOUNT(ArmCalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(ArmCalleeSaveFpSpills(type)) /* fprs */ +
-                  1 /* Method* */) * kArmPointerSize, kStackAlignment);
+                  1 /* Method* */) * static_cast<size_t>(kArmPointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo ArmCalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
@@ -75,17 +82,17 @@
 constexpr size_t ArmCalleeSaveFpr1Offset(Runtime::CalleeSaveType type) {
   return ArmCalleeSaveFrameSize(type) -
          (POPCOUNT(ArmCalleeSaveCoreSpills(type)) +
-          POPCOUNT(ArmCalleeSaveFpSpills(type))) * kArmPointerSize;
+          POPCOUNT(ArmCalleeSaveFpSpills(type))) * static_cast<size_t>(kArmPointerSize);
 }
 
 constexpr size_t ArmCalleeSaveGpr1Offset(Runtime::CalleeSaveType type) {
   return ArmCalleeSaveFrameSize(type) -
-         POPCOUNT(ArmCalleeSaveCoreSpills(type)) * kArmPointerSize;
+         POPCOUNT(ArmCalleeSaveCoreSpills(type)) * static_cast<size_t>(kArmPointerSize);
 }
 
 constexpr size_t ArmCalleeSaveLrOffset(Runtime::CalleeSaveType type) {
   return ArmCalleeSaveFrameSize(type) -
-      POPCOUNT(ArmCalleeSaveCoreSpills(type) & (-(1 << LR))) * kArmPointerSize;
+      POPCOUNT(ArmCalleeSaveCoreSpills(type) & (-(1 << LR))) * static_cast<size_t>(kArmPointerSize);
 }
 
 }  // namespace arm
diff --git a/runtime/arch/arm/thread_arm.cc b/runtime/arch/arm/thread_arm.cc
index 2a551a8..ff4f81b 100644
--- a/runtime/arch/arm/thread_arm.cc
+++ b/runtime/arch/arm/thread_arm.cc
@@ -17,15 +17,16 @@
 #include "thread.h"
 
 #include "asm_support_arm.h"
+#include "base/enums.h"
 #include "base/logging.h"
 
 namespace art {
 
 void Thread::InitCpu() {
-  CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<4>().Int32Value());
-  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<4>().Int32Value());
-  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<4>().Int32Value());
-  CHECK_EQ(THREAD_ID_OFFSET, ThinLockIdOffset<4>().Int32Value());
+  CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<PointerSize::k32>().Int32Value());
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<PointerSize::k32>().Int32Value());
+  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<PointerSize::k32>().Int32Value());
+  CHECK_EQ(THREAD_ID_OFFSET, ThinLockIdOffset<PointerSize::k32>().Int32Value());
 }
 
 void Thread::CleanupCpu() {
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index 989ecc6..5e7b51d 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -19,8 +19,9 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 96
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 224
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 176
+#define FRAME_SIZE_SAVE_REFS_ONLY 96
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS 224
+#define FRAME_SIZE_SAVE_EVERYTHING 512
 
 #endif  // ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 371cbb2..55b09c3 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -17,6 +17,7 @@
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
+#include "entrypoints/quick/quick_default_init_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/math_entrypoints.h"
@@ -26,71 +27,54 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
-                                            const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+                                          const mirror::Class* ref_class);
+
+// Read barrier entrypoints.
+// art_quick_read_barrier_mark_regX uses an non-standard calling
+// convention: it expects its input in register X and returns its
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg04(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg08(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg09(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg13(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg14(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg15(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg16(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg17(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg18(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg19(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg20(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg21(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg22(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg22(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg23(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg24(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg25(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg26(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg27(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg28(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg29(mirror::Object*);
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
-  // JNI
-  jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
-
-  // Alloc
-  ResetQuickAllocEntryPoints(qpoints);
+  DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
   qpoints->pCheckCast = art_quick_check_cast;
 
-  // DexCache
-  qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
-  qpoints->pInitializeTypeAndVerifyAccess = art_quick_initialize_type_and_verify_access;
-  qpoints->pInitializeType = art_quick_initialize_type;
-  qpoints->pResolveString = art_quick_resolve_string;
-
-  // Field
-  qpoints->pSet8Instance = art_quick_set8_instance;
-  qpoints->pSet8Static = art_quick_set8_static;
-  qpoints->pSet16Instance = art_quick_set16_instance;
-  qpoints->pSet16Static = art_quick_set16_static;
-  qpoints->pSet32Instance = art_quick_set32_instance;
-  qpoints->pSet32Static = art_quick_set32_static;
-  qpoints->pSet64Instance = art_quick_set64_instance;
-  qpoints->pSet64Static = art_quick_set64_static;
-  qpoints->pSetObjInstance = art_quick_set_obj_instance;
-  qpoints->pSetObjStatic = art_quick_set_obj_static;
-  qpoints->pGetBooleanInstance = art_quick_get_boolean_instance;
-  qpoints->pGetByteInstance = art_quick_get_byte_instance;
-  qpoints->pGetCharInstance = art_quick_get_char_instance;
-  qpoints->pGetShortInstance = art_quick_get_short_instance;
-  qpoints->pGet32Instance = art_quick_get32_instance;
-  qpoints->pGet64Instance = art_quick_get64_instance;
-  qpoints->pGetObjInstance = art_quick_get_obj_instance;
-  qpoints->pGetBooleanStatic = art_quick_get_boolean_static;
-  qpoints->pGetByteStatic = art_quick_get_byte_static;
-  qpoints->pGetCharStatic = art_quick_get_char_static;
-  qpoints->pGetShortStatic = art_quick_get_short_static;
-  qpoints->pGet32Static = art_quick_get32_static;
-  qpoints->pGet64Static = art_quick_get64_static;
-  qpoints->pGetObjStatic = art_quick_get_obj_static;
-
-  // Array
-  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
-  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
-  qpoints->pAputObject = art_quick_aput_obj;
-  qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
-
-  // JNI
-  qpoints->pJniMethodStart = JniMethodStart;
-  qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
-  qpoints->pJniMethodEnd = JniMethodEnd;
-  qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
-  qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
-  qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
-  qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
-
-  // Locks
-  qpoints->pLockObject = art_quick_lock_object;
-  qpoints->pUnlockObject = art_quick_unlock_object;
-
   // Math
   // TODO null entrypoints not needed for ARM64 - generate inline.
   qpoints->pCmpgDouble = nullptr;
@@ -113,43 +97,74 @@
   qpoints->pShrLong = nullptr;
   qpoints->pUshrLong = nullptr;
 
+  // More math.
+  qpoints->pCos = cos;
+  qpoints->pSin = sin;
+  qpoints->pAcos = acos;
+  qpoints->pAsin = asin;
+  qpoints->pAtan = atan;
+  qpoints->pAtan2 = atan2;
+  qpoints->pCbrt = cbrt;
+  qpoints->pCosh = cosh;
+  qpoints->pExp = exp;
+  qpoints->pExpm1 = expm1;
+  qpoints->pHypot = hypot;
+  qpoints->pLog = log;
+  qpoints->pLog10 = log10;
+  qpoints->pNextAfter = nextafter;
+  qpoints->pSinh = sinh;
+  qpoints->pTan = tan;
+  qpoints->pTanh = tanh;
+
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
-  qpoints->pStringCompareTo = art_quick_string_compareto;
+  // The ARM64 StringCompareTo intrinsic does not call the runtime.
+  qpoints->pStringCompareTo = nullptr;
   qpoints->pMemcpy = memcpy;
 
-  // Invocation
-  qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
-  qpoints->pQuickResolutionTrampoline = art_quick_resolution_trampoline;
-  qpoints->pQuickToInterpreterBridge = art_quick_to_interpreter_bridge;
-  qpoints->pInvokeDirectTrampolineWithAccessCheck =
-      art_quick_invoke_direct_trampoline_with_access_check;
-  qpoints->pInvokeInterfaceTrampolineWithAccessCheck =
-      art_quick_invoke_interface_trampoline_with_access_check;
-  qpoints->pInvokeStaticTrampolineWithAccessCheck =
-      art_quick_invoke_static_trampoline_with_access_check;
-  qpoints->pInvokeSuperTrampolineWithAccessCheck =
-      art_quick_invoke_super_trampoline_with_access_check;
-  qpoints->pInvokeVirtualTrampolineWithAccessCheck =
-      art_quick_invoke_virtual_trampoline_with_access_check;
-
-  // Thread
-  qpoints->pTestSuspend = art_quick_test_suspend;
-
-  // Throws
-  qpoints->pDeliverException = art_quick_deliver_exception;
-  qpoints->pThrowArrayBounds = art_quick_throw_array_bounds;
-  qpoints->pThrowDivZero = art_quick_throw_div_zero;
-  qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
-  qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
-  qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
-
-  // Deoptimization from compiled code.
-  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
-
-  // Read barrier
+  // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  // ARM64 is the architecture with the largest number of core
+  // registers (32) that supports the read barrier configuration.
+  // Because registers 30 (LR) and 31 (SP/XZR) cannot be used to pass
+  // arguments, only define ReadBarrierMarkRegX entrypoints for the
+  // first 30 registers.  This limitation is not a problem on other
+  // supported architectures (ARM, x86 and x86-64) either, as they
+  // have less core registers (resp. 16, 8 and 16).  (We may have to
+  // revise that design choice if read barrier support is added for
+  // MIPS and/or MIPS64.)
+  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
+  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
+  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
+  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  qpoints->pReadBarrierMarkReg04 = art_quick_read_barrier_mark_reg04;
+  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
+  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
+  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
+  qpoints->pReadBarrierMarkReg08 = art_quick_read_barrier_mark_reg08;
+  qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09;
+  qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10;
+  qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11;
+  qpoints->pReadBarrierMarkReg12 = art_quick_read_barrier_mark_reg12;
+  qpoints->pReadBarrierMarkReg13 = art_quick_read_barrier_mark_reg13;
+  qpoints->pReadBarrierMarkReg14 = art_quick_read_barrier_mark_reg14;
+  qpoints->pReadBarrierMarkReg15 = art_quick_read_barrier_mark_reg15;
+  qpoints->pReadBarrierMarkReg16 = nullptr;  // IP0 is used as a temp by the asm stub.
+  qpoints->pReadBarrierMarkReg17 = art_quick_read_barrier_mark_reg17;
+  qpoints->pReadBarrierMarkReg18 = art_quick_read_barrier_mark_reg18;
+  qpoints->pReadBarrierMarkReg19 = art_quick_read_barrier_mark_reg19;
+  qpoints->pReadBarrierMarkReg20 = art_quick_read_barrier_mark_reg20;
+  qpoints->pReadBarrierMarkReg21 = art_quick_read_barrier_mark_reg21;
+  qpoints->pReadBarrierMarkReg22 = art_quick_read_barrier_mark_reg22;
+  qpoints->pReadBarrierMarkReg23 = art_quick_read_barrier_mark_reg23;
+  qpoints->pReadBarrierMarkReg24 = art_quick_read_barrier_mark_reg24;
+  qpoints->pReadBarrierMarkReg25 = art_quick_read_barrier_mark_reg25;
+  qpoints->pReadBarrierMarkReg26 = art_quick_read_barrier_mark_reg26;
+  qpoints->pReadBarrierMarkReg27 = art_quick_read_barrier_mark_reg27;
+  qpoints->pReadBarrierMarkReg28 = art_quick_read_barrier_mark_reg28;
+  qpoints->pReadBarrierMarkReg29 = art_quick_read_barrier_mark_reg29;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
+  qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
 
 }  // namespace art
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index 3e9ad0d..6724d6d 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -20,6 +20,7 @@
 #include <sys/ucontext.h>
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "globals.h"
 #include "base/logging.h"
@@ -29,7 +30,7 @@
 #include "thread-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 extern "C" void art_quick_implicit_suspend();
 
 //
@@ -84,8 +85,10 @@
   *out_return_pc = sc->pc + 4;
 }
 
-bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context) {
+bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  if (!IsValidImplicitCheck(info)) {
+    return false;
+  }
   // The code that looks for the catch location needs to know the value of the
   // PC at the point of call.  For Null checks we insert a GC map that is immediately after
   // the load/store instruction that might cause the fault.
@@ -95,7 +98,10 @@
 
   sc->regs[30] = sc->pc + 4;      // LR needs to point to gc map location
 
-  sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+  sc->regs[0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
@@ -112,7 +118,8 @@
                                void* context) {
   // These are the instructions to check for.  The first one is the ldr x0,[r18,#xxx]
   // where xxx is the offset of the suspend trigger.
-  uint32_t checkinst1 = 0xf9400240 | (Thread::ThreadSuspendTriggerOffset<8>().Int32Value() << 7);
+  uint32_t checkinst1 = 0xf9400240 |
+      (Thread::ThreadSuspendTriggerOffset<PointerSize::k64>().Int32Value() << 7);
   uint32_t checkinst2 = 0xf9400000;
 
   struct ucontext *uc = reinterpret_cast<struct ucontext *>(context);
diff --git a/runtime/arch/arm64/instruction_set_features_arm64.cc b/runtime/arch/arm64/instruction_set_features_arm64.cc
index 395cee8..cad13b2 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64.cc
+++ b/runtime/arch/arm64/instruction_set_features_arm64.cc
@@ -39,7 +39,7 @@
   if (!needs_a53_835769_fix) {
     // Check to see if this is an expected variant.
     static const char* arm64_known_variants[] = {
-        "denver64"
+        "denver64", "kryo", "exynos-m1"
     };
     if (!FindVariantInArray(arm64_known_variants, arraysize(arm64_known_variants), variant)) {
       std::ostringstream os;
diff --git a/runtime/arch/arm64/instruction_set_features_arm64.h b/runtime/arch/arm64/instruction_set_features_arm64.h
index 805131f..abd7e83 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64.h
+++ b/runtime/arch/arm64/instruction_set_features_arm64.h
@@ -66,14 +66,6 @@
       return fix_cortex_a53_843419_;
   }
 
-  // NOTE: This flag can be tunned on a CPU basis. In general all ARMv8 CPUs
-  // should prefer the Acquire-Release semantics over the explicit DMBs when
-  // handling load/store-volatile. For a specific use case see the ARM64
-  // Optimizing backend.
-  bool PreferAcquireRelease() const {
-    return true;
-  }
-
   virtual ~Arm64InstructionSetFeatures() {}
 
  protected:
diff --git a/runtime/arch/arm64/instruction_set_features_arm64_test.cc b/runtime/arch/arm64/instruction_set_features_arm64_test.cc
index 599f24e..027e59c 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64_test.cc
+++ b/runtime/arch/arm64/instruction_set_features_arm64_test.cc
@@ -30,8 +30,6 @@
   EXPECT_TRUE(arm64_features->Equals(arm64_features.get()));
   EXPECT_STREQ("smp,a53", arm64_features->GetFeatureString().c_str());
   EXPECT_EQ(arm64_features->AsBitmap(), 3U);
-  // See the comments in instruction_set_features_arm64.h.
-  EXPECT_TRUE(arm64_features->AsArm64InstructionSetFeatures()->PreferAcquireRelease());
 }
 
 }  // namespace art
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index be5a15e..202846a 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -21,25 +21,25 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      */
-.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+    // art::Runtime** xIP0 = &art::Runtime::instance_
     adrp xIP0, :got:_ZN3art7Runtime9instance_E
     ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
 
     // Our registers aren't intermixed - just spill in order.
-    ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
+    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
 
-    // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
-    // Loads appropriate callee-save-method.
-    ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ]
+    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveAllCalleeSaves];
+    ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
 
     sub sp, sp, #176
     .cfi_adjust_cfa_offset 176
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 176)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(ARM64) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 176)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM64) size not as expected."
 #endif
 
     // Stack alignment filler [sp, #8].
@@ -74,7 +74,7 @@
     .cfi_rel_offset x29, 160
     .cfi_rel_offset x30, 168
 
-    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs].
+    // Store ArtMethod* Runtime::callee_save_methods_[kSaveAllCalleeSaves].
     str xIP0, [sp]
     // Place sp in Thread::Current()->top_quick_frame.
     mov xIP0, sp
@@ -83,25 +83,25 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly).
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
      */
-.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_REFS_ONLY_FRAME
+    // art::Runtime** xIP0 = &art::Runtime::instance_
     adrp xIP0, :got:_ZN3art7Runtime9instance_E
     ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
 
     // Our registers aren't intermixed - just spill in order.
-    ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
+    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
 
-    // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefOnly]  .
-    // Loads appropriate callee-save-method.
-    ldr xIP0, [xIP0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ]
+    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefOnly];
+    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
 
     sub sp, sp, #96
     .cfi_adjust_cfa_offset 96
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 96)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 96)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM64) size not as expected."
 #endif
 
     // GP callee-saves.
@@ -126,7 +126,7 @@
     .cfi_rel_offset x29, 80
     .cfi_rel_offset x30, 88
 
-    // Store ArtMethod* Runtime::callee_save_methods_[kRefsOnly].
+    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsOnly].
     stp xIP0, x20, [sp]
     .cfi_rel_offset x20, 8
 
@@ -136,7 +136,7 @@
 .endm
 
 // TODO: Probably no need to restore registers preserved by aapcs64.
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME
     // Callee-saves.
     ldr x20, [sp, #8]
     .cfi_restore x20
@@ -165,24 +165,24 @@
     .cfi_adjust_cfa_offset -96
 .endm
 
-.macro POP_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro POP_SAVE_REFS_ONLY_FRAME
     add sp, sp, #96
     .cfi_adjust_cfa_offset - 96
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
+    RESTORE_SAVE_REFS_ONLY_FRAME
     ret
 .endm
 
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
     sub sp, sp, #224
     .cfi_adjust_cfa_offset 224
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 224)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 224)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM64) size not as expected."
 #endif
 
     // Stack alignment filler [sp, #8].
@@ -235,30 +235,31 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs).
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
      *
      * TODO This is probably too conservative - saving FP & LR.
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
+    // art::Runtime** xIP0 = &art::Runtime::instance_
     adrp xIP0, :got:_ZN3art7Runtime9instance_E
     ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
 
     // Our registers aren't intermixed - just spill in order.
-    ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
+    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
 
-    // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
-    ldr xIP0, [xIP0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ]
+    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefAndArgs];
+    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
 
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
 
-    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
+    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsAndArgs].
     // Place sp in Thread::Current()->top_quick_frame.
     mov xIP0, sp
     str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
 .endm
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_X0
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
     str x0, [sp, #0]  // Store ArtMethod* to bottom of stack.
     // Place sp in Thread::Current()->top_quick_frame.
     mov xIP0, sp
@@ -266,7 +267,7 @@
 .endm
 
 // TODO: Probably no need to restore registers preserved by aapcs64.
-.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
     // FP args.
     ldp d0, d1, [sp, #16]
     ldp d2, d3, [sp, #32]
@@ -316,6 +317,208 @@
     .cfi_adjust_cfa_offset -224
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    sub sp, sp, #512
+    .cfi_adjust_cfa_offset 512
+
+    // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 512)
+#error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected."
+#endif
+
+    // Save FP registers.
+    // For better performance, store d0 and d31 separately, so that all STPs are 16-byte aligned.
+    str d0,       [sp, #8]
+    stp d1, d2,   [sp, #16]
+    stp d3, d4,   [sp, #32]
+    stp d5, d6,   [sp, #48]
+    stp d7, d8,   [sp, #64]
+    stp d9, d10,  [sp, #80]
+    stp d11, d12, [sp, #96]
+    stp d13, d14, [sp, #112]
+    stp d15, d16, [sp, #128]
+    stp d17, d18, [sp, #144]
+    stp d19, d20, [sp, #160]
+    stp d21, d22, [sp, #176]
+    stp d23, d24, [sp, #192]
+    stp d25, d26, [sp, #208]
+    stp d27, d28, [sp, #224]
+    stp d29, d30, [sp, #240]
+    str d31,      [sp, #256]
+
+    // Save core registers.
+    str x0,       [sp, #264]
+    .cfi_rel_offset x0, 264
+
+    stp x1, x2,   [sp, #272]
+    .cfi_rel_offset x1, 272
+    .cfi_rel_offset x2, 280
+
+    stp x3, x4,   [sp, #288]
+    .cfi_rel_offset x3, 288
+    .cfi_rel_offset x4, 296
+
+    stp x5, x6,   [sp, #304]
+    .cfi_rel_offset x5, 304
+    .cfi_rel_offset x6, 312
+
+    stp x7, x8,   [sp, #320]
+    .cfi_rel_offset x7, 320
+    .cfi_rel_offset x8, 328
+
+    stp x9, x10,  [sp, #336]
+    .cfi_rel_offset x9, 336
+    .cfi_rel_offset x10, 344
+
+    stp x11, x12, [sp, #352]
+    .cfi_rel_offset x11, 352
+    .cfi_rel_offset x12, 360
+
+    stp x13, x14, [sp, #368]
+    .cfi_rel_offset x13, 368
+    .cfi_rel_offset x14, 376
+
+    stp x15, x16, [sp, #384]
+    .cfi_rel_offset x15, 384
+    .cfi_rel_offset x16, 392
+
+    stp x17, x18, [sp, #400]
+    .cfi_rel_offset x17, 400
+    .cfi_rel_offset x18, 408
+
+    stp x19, x20, [sp, #416]
+    .cfi_rel_offset x19, 416
+    .cfi_rel_offset x20, 424
+
+    stp x21, x22, [sp, #432]
+    .cfi_rel_offset x21, 432
+    .cfi_rel_offset x22, 440
+
+    stp x23, x24, [sp, #448]
+    .cfi_rel_offset x23, 448
+    .cfi_rel_offset x24, 456
+
+    stp x25, x26, [sp, #464]
+    .cfi_rel_offset x25, 464
+    .cfi_rel_offset x26, 472
+
+    stp x27, x28, [sp, #480]
+    .cfi_rel_offset x27, 480
+    .cfi_rel_offset x28, 488
+
+    stp x29, xLR, [sp, #496]
+    .cfi_rel_offset x29, 496
+    .cfi_rel_offset x30, 504
+
+    // art::Runtime** xIP0 = &art::Runtime::instance_
+    adrp xIP0, :got:_ZN3art7Runtime9instance_E
+    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
+
+    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
+
+    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveEverything];
+    ldr xIP0, [xIP0, RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
+
+    // Store ArtMethod* Runtime::callee_save_methods_[kSaveEverything].
+    str xIP0, [sp]
+    // Place sp in Thread::Current()->top_quick_frame.
+    mov xIP0, sp
+    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
+.endm
+
+.macro RESTORE_SAVE_EVERYTHING_FRAME
+    // Restore FP registers.
+    // For better performance, load d0 and d31 separately, so that all LDPs are 16-byte aligned.
+    ldr d0,       [sp, #8]
+    ldp d1, d2,   [sp, #16]
+    ldp d3, d4,   [sp, #32]
+    ldp d5, d6,   [sp, #48]
+    ldp d7, d8,   [sp, #64]
+    ldp d9, d10,  [sp, #80]
+    ldp d11, d12, [sp, #96]
+    ldp d13, d14, [sp, #112]
+    ldp d15, d16, [sp, #128]
+    ldp d17, d18, [sp, #144]
+    ldp d19, d20, [sp, #160]
+    ldp d21, d22, [sp, #176]
+    ldp d23, d24, [sp, #192]
+    ldp d25, d26, [sp, #208]
+    ldp d27, d28, [sp, #224]
+    ldp d29, d30, [sp, #240]
+    ldr d31,      [sp, #256]
+
+    // Restore core registers.
+    ldr x0,       [sp, #264]
+    .cfi_restore x0
+
+    ldp x1, x2,   [sp, #272]
+    .cfi_restore x1
+    .cfi_restore x2
+
+    ldp x3, x4,   [sp, #288]
+    .cfi_restore x3
+    .cfi_restore x4
+
+    ldp x5, x6,   [sp, #304]
+    .cfi_restore x5
+    .cfi_restore x6
+
+    ldp x7, x8,   [sp, #320]
+    .cfi_restore x7
+    .cfi_restore x8
+
+    ldp x9, x10,  [sp, #336]
+    .cfi_restore x9
+    .cfi_restore x10
+
+    ldp x11, x12, [sp, #352]
+    .cfi_restore x11
+    .cfi_restore x12
+
+    ldp x13, x14, [sp, #368]
+    .cfi_restore x13
+    .cfi_restore x14
+
+    ldp x15, x16, [sp, #384]
+    .cfi_restore x15
+    .cfi_restore x16
+
+    ldp x17, x18, [sp, #400]
+    .cfi_restore x17
+    .cfi_restore x18
+
+    ldp x19, x20, [sp, #416]
+    .cfi_restore x19
+    .cfi_restore x20
+
+    ldp x21, x22, [sp, #432]
+    .cfi_restore x21
+    .cfi_restore x22
+
+    ldp x23, x24, [sp, #448]
+    .cfi_restore x23
+    .cfi_restore x24
+
+    ldp x25, x26, [sp, #464]
+    .cfi_restore x25
+    .cfi_restore x26
+
+    ldp x27, x28, [sp, #480]
+    .cfi_restore x27
+    .cfi_restore x28
+
+    ldp x29, xLR, [sp, #496]
+    .cfi_restore x29
+    .cfi_restore x30
+
+    add sp, sp, #512
+    .cfi_adjust_cfa_offset -512
+.endm
+
 .macro RETURN_IF_RESULT_IS_ZERO
     cbnz x0, 1f                // result non-zero branch over
     ret                        // return
@@ -333,7 +536,7 @@
      * exception is Thread::Current()->exception_
      */
 .macro DELIVER_PENDING_EXCEPTION
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     mov x0, xSELF
 
     // Point of no return.
@@ -368,7 +571,7 @@
 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov x0, xSELF                     // pass Thread::Current
     b   \cxx_name                     // \cxx_name(Thread*)
 END \c_name
@@ -377,7 +580,7 @@
 .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context.
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context.
     mov x1, xSELF                     // pass Thread::Current.
     b   \cxx_name                     // \cxx_name(arg, Thread*).
     brk 0
@@ -387,7 +590,7 @@
 .macro TWO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov x2, xSELF                     // pass Thread::Current
     b   \cxx_name                     // \cxx_name(arg1, arg2, Thread*)
     brk 0
@@ -406,6 +609,11 @@
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
@@ -417,6 +625,12 @@
 TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
+     */
+TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_string_bounds, artThrowStringBoundsFromCode
+
+    /*
      * Called by managed code to create and deliver a StackOverflowError.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
@@ -447,7 +661,7 @@
      */
 .macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME        // save callee saves in case allocation triggers GC
     // Helper signature is always
     // (method_idx, *this_object, *caller_method, *self, sp)
 
@@ -455,7 +669,7 @@
     mov    x3, sp
     bl     \cxx_name                      // (method_idx, this, Thread*, SP)
     mov    xIP0, x1                       // save Method*->code_
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     cbz    x0, 1f                         // did we find the target? if not go to exception delivery
     br     xIP0                           // tail call to target
 1:
@@ -915,6 +1129,105 @@
 
 
 
+/*  extern"C" void art_quick_osr_stub(void** stack,                x0
+ *                                    size_t stack_size_in_bytes,  x1
+ *                                    const uin8_t* native_pc,     x2
+ *                                    JValue *result,              x3
+ *                                    char   *shorty,              x4
+ *                                    Thread *self)                x5
+ */
+ENTRY art_quick_osr_stub
+SAVE_SIZE=15*8   // x3, x4, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved.
+    mov x9, sp                             // Save stack pointer.
+    .cfi_register sp,x9
+
+    sub x10, sp, # SAVE_SIZE
+    and x10, x10, # ~0xf                   // Enforce 16 byte stack alignment.
+    mov sp, x10                            // Set new SP.
+
+    str x28, [sp, #112]
+    stp x26, x27, [sp, #96]
+    stp x24, x25, [sp, #80]
+    stp x22, x23, [sp, #64]
+    stp x20, x21, [sp, #48]
+    stp x9, x19, [sp, #32]                // Save old stack pointer and x19.
+    stp x3, x4, [sp, #16]                 // Save result and shorty addresses.
+    stp xFP, xLR, [sp]                    // Store LR & FP.
+    mov xSELF, x5                         // Move thread pointer into SELF register.
+
+    sub sp, sp, #16
+    str xzr, [sp]                         // Store null for ArtMethod* slot
+    // Branch to stub.
+    bl .Losr_entry
+    add sp, sp, #16
+
+    // Restore return value address and shorty address.
+    ldp x3,x4, [sp, #16]
+    ldr x28, [sp, #112]
+    ldp x26, x27, [sp, #96]
+    ldp x24, x25, [sp, #80]
+    ldp x22, x23, [sp, #64]
+    ldp x20, x21, [sp, #48]
+
+    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
+    ldrb w10, [x4]
+
+    // Check the return type and store the correct register into the jvalue in memory.
+
+    // Don't set anything for a void type.
+    cmp w10, #'V'
+    beq .Losr_exit
+
+    // Is it a double?
+    cmp w10, #'D'
+    bne .Lno_double
+    str d0, [x3]
+    b .Losr_exit
+
+.Lno_double:  // Is it a float?
+    cmp w10, #'F'
+    bne .Lno_float
+    str s0, [x3]
+    b .Losr_exit
+
+.Lno_float:  // Just store x0. Doesn't matter if it is 64 or 32 bits.
+    str x0, [x3]
+
+.Losr_exit:  // Finish up.
+    ldp x2, x19, [sp, #32]   // Restore stack pointer and x19.
+    ldp xFP, xLR, [sp]    // Restore old frame pointer and link register.
+    mov sp, x2
+    ret
+
+.Losr_entry:
+    // Update stack pointer for the callee
+    sub sp, sp, x1
+
+    // Update link register slot expected by the callee.
+    sub w1, w1, #8
+    str lr, [sp, x1]
+
+    // Copy arguments into stack frame.
+    // Use simple copy routine for now.
+    // 4 bytes per slot.
+    // X0 - source address
+    // W1 - args length
+    // SP - destination address.
+    // W10 - temporary
+.Losr_loop_entry:
+    cmp w1, #0
+    beq .Losr_loop_exit
+    sub w1, w1, #4
+    ldr w10, [x0, x1]
+    str w10, [sp, x1]
+    b .Losr_loop_entry
+
+.Losr_loop_exit:
+    // Branch to the OSR entry point.
+    br x2
+
+END art_quick_osr_stub
+
     /*
      * On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_
      */
@@ -980,7 +1293,7 @@
     ldr    w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop?
     ldxr   w1, [x4]
     mov    x3, x1
-    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  // zero the read barrier bits
+    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
     cbnz   w3, .Lnot_unlocked         // already thin locked
     // unlocked case - x1: original lock word that's zero except for the read barrier bits.
     orr    x2, x1, x2                 // x2 holds thread id with count of 0 with preserved read barrier bits
@@ -996,9 +1309,9 @@
     cbnz   w2, .Lslow_lock            // lock word and self thread id's match -> recursive lock
                                       // else contention, go to slow path
     mov    x3, x1                     // copy the lock word to check count overflow.
-    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  // zero the read barrier bits.
+    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits.
     add    w2, w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count in lock word placing in w2 to check overflow
-    lsr    w3, w2, LOCK_WORD_READ_BARRIER_STATE_SHIFT  // if either of the upper two bits (28-29) are set, we overflowed.
+    lsr    w3, w2, #LOCK_WORD_GC_STATE_SHIFT     // if the first gc state bit is set, we overflowed.
     cbnz   w3, .Lslow_lock            // if we overflow the count go slow path
     add    w2, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count for real
     stxr   w3, w2, [x4]
@@ -1007,13 +1320,21 @@
 .Llock_stxr_fail:
     b      .Lretry_lock               // retry
 .Lslow_lock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
     mov    x1, xSELF                  // pass Thread::Current
     bl     artLockObjectFromCode      // (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_lock_object
 
+ENTRY art_quick_lock_object_no_inline
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
+    mov    x1, xSELF                  // pass Thread::Current
+    bl     artLockObjectFromCode      // (Object* obj, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_W0_IS_ZERO_OR_DELIVER
+END art_quick_lock_object_no_inline
+
     /*
      * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
      * x0 holds the possibly null object to lock.
@@ -1034,17 +1355,17 @@
     cbnz   w2, .Lslow_unlock          // if either of the top two bits are set, go slow path
     ldr    w2, [xSELF, #THREAD_ID_OFFSET]
     mov    x3, x1                     // copy lock word to check thread id equality
-    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  // zero the read barrier bits
+    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
     eor    w3, w3, w2                 // lock_word.ThreadId() ^ self->ThreadId()
     uxth   w3, w3                     // zero top 16 bits
     cbnz   w3, .Lslow_unlock          // do lock word and self thread id's match?
     mov    x3, x1                     // copy lock word to detect transition to unlocked
-    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  // zero the read barrier bits
+    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
     cmp    w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
     bpl    .Lrecursive_thin_unlock
     // transition to unlocked
     mov    x3, x1
-    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK  // w3: zero except for the preserved read barrier bits
+    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED  // w3: zero except for the preserved read barrier bits
     dmb    ish                        // full (LoadStore|StoreStore) memory barrier
 #ifndef USE_READ_BARRIER
     str    w3, [x4]
@@ -1065,13 +1386,21 @@
 .Lunlock_stxr_fail:
     b      .Lretry_unlock               // retry
 .Lslow_unlock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case exception allocation triggers GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
     mov    x1, xSELF                  // pass Thread::Current
     bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_unlock_object
 
+ENTRY art_quick_unlock_object_no_inline
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
+    mov    x1, xSELF                  // pass Thread::Current
+    bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_W0_IS_ZERO_OR_DELIVER
+END art_quick_unlock_object_no_inline
+
     /*
      * Entry from managed code that calls artIsAssignableFromCode and on failure calls
      * artThrowClassCastException.
@@ -1113,7 +1442,7 @@
     .cfi_restore x1
     .cfi_adjust_cfa_offset -32
 
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov x2, xSELF                     // pass Thread::Current
     b artThrowClassCastException      // (Class*, Class*, Thread*)
     brk 0                             // We should not return here...
@@ -1127,6 +1456,22 @@
     .endif
 .endm
 
+// Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude.
+// Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude.
+.macro POP_REGS_NE xReg1, xReg2, offset, xExclude
+    .ifc \xReg1, \xExclude
+        ldr \xReg2, [sp, #(\offset + 8)]        // restore xReg2
+    .else
+        .ifc \xReg2, \xExclude
+            ldr \xReg1, [sp, #\offset]          // restore xReg1
+        .else
+            ldp \xReg1, \xReg2, [sp, #\offset]  // restore xReg1 and xReg2
+        .endif
+    .endif
+    .cfi_restore \xReg1
+    .cfi_restore \xReg2
+.endm
+
     /*
      * Macro to insert read barrier, only used in art_quick_aput_obj.
      * xDest, wDest and xObj are registers, offset is a defined literal such as
@@ -1134,8 +1479,18 @@
      * name mismatch between instructions. This macro uses the lower 32b of register when possible.
      * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
      */
-.macro READ_BARRIER xDest, wDest, xObj, offset
+.macro READ_BARRIER xDest, wDest, xObj, xTemp, wTemp, offset, number
 #ifdef USE_READ_BARRIER
+#ifdef USE_BAKER_READ_BARRIER
+    ldr \wTemp, [\xObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    tbnz \wTemp, #LOCK_WORD_READ_BARRIER_STATE_SHIFT, .Lrb_slowpath\number
+    // False dependency to avoid needing load/load fence.
+    add \xObj, \xObj, \xTemp, lsr #32
+    ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
+    UNPOISON_HEAP_REF \wDest
+    b .Lrb_exit\number
+#endif
+.Lrb_slowpath\number:
     // Store registers used in art_quick_aput_obj (x0-x4, LR), stack is 16B aligned.
     stp x0, x1, [sp, #-48]!
     .cfi_adjust_cfa_offset 48
@@ -1169,6 +1524,7 @@
     .cfi_restore x30
     add sp, sp, #48
     .cfi_adjust_cfa_offset -48
+.Lrb_exit\number:
 #else
     ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
     UNPOISON_HEAP_REF \wDest
@@ -1207,12 +1563,12 @@
 #endif
 ENTRY art_quick_aput_obj
     cbz x2, .Ldo_aput_null
-    READ_BARRIER x3, w3, x0, MIRROR_OBJECT_CLASS_OFFSET     // Heap reference = 32b
-                                                         // This also zero-extends to x3
-    READ_BARRIER x4, w4, x2, MIRROR_OBJECT_CLASS_OFFSET     // Heap reference = 32b
-                                                         // This also zero-extends to x4
-    READ_BARRIER x3, w3, x3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET // Heap reference = 32b
-                                                         // This also zero-extends to x3
+    READ_BARRIER x3, w3, x0, x3, w3, MIRROR_OBJECT_CLASS_OFFSET, 0  // Heap reference = 32b
+                                                                    // This also zero-extends to x3
+    READ_BARRIER x3, w3, x3, x4, w4, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, 1 // Heap reference = 32b
+    // This also zero-extends to x3
+    READ_BARRIER x4, w4, x2, x4, w4, MIRROR_OBJECT_CLASS_OFFSET, 2  // Heap reference = 32b
+                                                                    // This also zero-extends to x4
     cmp w3, w4  // value's type == array's component type - trivial assignability
     bne .Lcheck_assignability
 .Ldo_aput:
@@ -1274,7 +1630,7 @@
     .cfi_restore x1
     .cfi_adjust_cfa_offset -32
 
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     mov x1, x2                    // Pass value.
     mov x2, xSELF                 // Pass Thread::Current.
     b artThrowArrayStoreException // (Object*, Object*, Thread*).
@@ -1285,10 +1641,10 @@
 .macro ONE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
     mov    x1, xSELF                  // pass Thread::Current
     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1297,10 +1653,10 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
     mov    x2, xSELF                  // pass Thread::Current
     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1309,10 +1665,10 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
     mov    x3, xSELF                  // pass Thread::Current
     bl     \entrypoint
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1321,10 +1677,10 @@
 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
     mov    x4, xSELF                  // pass Thread::Current
     bl     \entrypoint                //
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
     DELIVER_PENDING_EXCEPTION
 END \name
@@ -1334,11 +1690,11 @@
 .macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
-    ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
+    ldr    x1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
     mov    x2, xSELF                  // pass Thread::Current
     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*, SP)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1346,11 +1702,11 @@
 .macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
-    ldr    x2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
+    ldr    x2, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
     mov    x3, xSELF                  // pass Thread::Current
     bl     \entrypoint
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1358,11 +1714,11 @@
 .macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
-    ldr    x3, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
+    ldr    x3, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
     mov    x4, xSELF                  // pass Thread::Current
     bl     \entrypoint
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1420,45 +1776,468 @@
 // This is separated out as the argument order is different.
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
-    ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
+    ldr    x1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
                                       // x2 contains the parameter
     mov    x3, xSELF                  // pass Thread::Current
     bl     artSet64StaticFromCode
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_set64_static
 
     /*
-     * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
-     * exception on error. On success the String is returned. w0 holds the string index. The fast
-     * path check for hit in strings cache has already been performed.
+     * Entry from managed code to resolve a string, this stub will
+     * check the dex cache for a matching string (the fast path), and if not found,
+     * it will allocate a String and deliver an exception on error.
+     * On success the String is returned. R0 holds the string index.
      */
-ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+ENTRY art_quick_resolve_string
+    ldr   x1, [sp]                                               // load referrer
+    ldr   w2, [x1, #ART_METHOD_DECLARING_CLASS_OFFSET]           // load declaring class
+    ldr   x1, [x2, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET]    // load string dex cache
+    and   x2, x0, #STRING_DEX_CACHE_SIZE_MINUS_ONE               // get masked string index into x2
+    ldr   x2, [x1, x2, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT]  // load dex cache pair into x2
+    cmp   x0, x2, lsr #32                                         // compare against upper 32 bits
+    bne   .Lart_quick_resolve_string_slow_path
+    ubfx  x0, x2, #0, #32                                        // extract lower 32 bits into x0
+#ifdef USE_READ_BARRIER
+    // Most common case: GC is not marking.
+    ldr    w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   x3, .Lart_quick_resolve_string_marking
+#endif
+    ret
+
+// Slow path case, the index did not match.
+.Lart_quick_resolve_string_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME                      // save callee saves in case of GC
+    mov   x1, xSELF                                 // pass Thread::Current
+    bl    artResolveStringFromCode                  // (int32_t string_idx, Thread* self)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+// GC is marking case, need to check the mark bit.
+.Lart_quick_resolve_string_marking:
+    ldr   x3, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    tbnz  x3, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb
+    // Save LR so that we can return, also x1 for alignment purposes.
+    stp    x1, xLR, [sp, #-16]!                     // Save x1, LR.
+    bl     artReadBarrierMark                       // Get the marked string back.
+    ldp    x1, xLR, [sp], #16                       // Restore registers.
+.Lart_quick_resolve_string_no_rb:
+    ret
+
+END art_quick_resolve_string
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALL_ALLOC_ENTRYPOINTS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
+// Comment out allocators that have arm64 specific asm.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) implemented in asm
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB) implemented in asm
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+ENTRY art_quick_alloc_object_rosalloc
+    // Fast path rosalloc allocation.
+    // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+    // x2-x7: free.
+    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
+                                                              // Load the class (x2)
+    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+    cbz    x2, .Lart_quick_alloc_object_rosalloc_slow_path    // Check null class
+                                                              // Check class status.
+    ldr    w3, [x2, #MIRROR_CLASS_STATUS_OFFSET]
+    cmp    x3, #MIRROR_CLASS_STATUS_INITIALIZED
+    bne    .Lart_quick_alloc_object_rosalloc_slow_path
+                                                              // Add a fake dependence from the
+                                                              // following access flag and size
+                                                              // loads to the status load.
+                                                              // This is to prevent those loads
+                                                              // from being reordered above the
+                                                              // status load and reading wrong
+                                                              // values (an alternative is to use
+                                                              // a load-acquire for the status).
+    eor    x3, x3, x3
+    add    x2, x2, x3
+                                                              // Check access flags has
+                                                              // kAccClassIsFinalizable
+    ldr    w3, [x2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
+    tst    x3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE
+    bne    .Lart_quick_alloc_object_rosalloc_slow_path
+    ldr    x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]  // Check if the thread local
+                                                              // allocation stack has room.
+                                                              // ldp won't work due to large offset.
+    ldr    x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
+    cmp    x3, x4
+    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
+    ldr    w3, [x2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET]         // Load the object size (x3)
+    cmp    x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
+                                                              // local allocation
+    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
+                                                              // Compute the rosalloc bracket index
+                                                              // from the size.
+                                                              // Align up the size by the rosalloc
+                                                              // bracket quantum size and divide
+                                                              // by the quantum size and subtract
+                                                              // by 1. This code is a shorter but
+                                                              // equivalent version.
+    sub    x3, x3, #1
+    lsr    x3, x3, #ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT
+                                                              // Load the rosalloc run (x4)
+    add    x4, xSELF, x3, lsl #POINTER_SIZE_SHIFT
+    ldr    x4, [x4, #THREAD_ROSALLOC_RUNS_OFFSET]
+                                                              // Load the free list head (x3). This
+                                                              // will be the return val.
+    ldr    x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
+    cbz    x3, .Lart_quick_alloc_object_rosalloc_slow_path
+    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
+    ldr    x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
+                                                              // and update the list head with the
+                                                              // next pointer.
+    str    x1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
+                                                              // Store the class pointer in the
+                                                              // header. This also overwrites the
+                                                              // next pointer. The offsets are
+                                                              // asserted to match.
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+    POISON_HEAP_REF w2
+    str    w2, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
+                                                              // Fence. This is "ish" not "ishst" so
+                                                              // that it also ensures ordering of
+                                                              // the class status load with respect
+                                                              // to later accesses to the class
+                                                              // object. Alternatively we could use
+                                                              // "ishst" if we use load-acquire for
+                                                              // the class status load.)
+                                                              // Needs to be done before pushing on
+                                                              // allocation since Heap::VisitObjects
+                                                              // relies on seeing the class pointer.
+                                                              // b/28790624
+    dmb    ish
+                                                              // Push the new object onto the thread
+                                                              // local allocation stack and
+                                                              // increment the thread local
+                                                              // allocation stack top.
+    ldr    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
+    str    w3, [x1], #COMPRESSED_REFERENCE_SIZE               // (Increment x1 as a side effect.)
+    str    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
+                                                              // Decrement the size of the free list
+    ldr    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
+    sub    x1, x1, #1
+                                                              // TODO: consider combining this store
+                                                              // and the list head store above using
+                                                              // strd.
+    str    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
+
+    mov    x0, x3                                             // Set the return value and return.
+    ret
+.Lart_quick_alloc_object_rosalloc_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME             // save callee saves in case of GC
+    mov    x2, xSELF                       // pass Thread::Current
+    bl     artAllocObjectFromCodeRosAlloc  // (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END art_quick_alloc_object_rosalloc
+
+
+// The common fast path code for art_quick_alloc_array_region_tlab.
+.macro ALLOC_ARRAY_TLAB_FAST_PATH slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
+    // Check null class
+    cbz    \wClass, \slowPathLabel
+    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED \slowPathLabel, \xClass, \wClass, \xCount, \wCount, \xTemp0, \wTemp0, \xTemp1, \wTemp1, \xTemp2, \wTemp2
+.endm
+
+// The common fast path code for art_quick_alloc_array_region_tlab.
+.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
+    // Array classes are never finalizable or uninitialized, no need to check.
+    ldr    \wTemp0, [\xClass, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type
+    UNPOISON_HEAP_REF \wTemp0
+    ldr    \wTemp0, [\xTemp0, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
+    lsr    \xTemp0, \xTemp0, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16
+                                                              // bits.
+                                                              // xCount is holding a 32 bit value,
+                                                              // it can not overflow.
+    lsl    \xTemp1, \xCount, \xTemp0                          // Calculate data size
+    // Add array data offset and alignment.
+    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
+#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
+#error Long array data offset must be 4 greater than int array data offset.
+#endif
+
+    add    \xTemp0, \xTemp0, #1                               // Add 4 to the length only if the
+                                                              // component size shift is 3
+                                                              // (for 64 bit alignment).
+    and    \xTemp0, \xTemp0, #4
+    add    \xTemp1, \xTemp1, \xTemp0
+    and    \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED64 // Apply alignemnt mask
+                                                              // (addr + 7) & ~7. The mask must
+                                                              // be 64 bits to keep high bits in
+                                                              // case of overflow.
+    // Negative sized arrays are handled here since xCount holds a zero extended 32 bit value.
+    // Negative ints become large 64 bit unsigned ints which will always be larger than max signed
+    // 32 bit int. Since the max shift for arrays is 3, it can not become a negative 64 bit int.
+    cmp    \xTemp1, #MIN_LARGE_OBJECT_THRESHOLD               // Possibly a large object, go slow
+    bhs    \slowPathLabel                                     // path.
+
+    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Check tlab for space, note that
+                                                              // we use (end - begin) to handle
+                                                              // negative size arrays. It is
+                                                              // assumed that a negative size will
+                                                              // always be greater unsigned than
+                                                              // region size.
+    ldr    \xTemp2, [xSELF, #THREAD_LOCAL_END_OFFSET]
+    sub    \xTemp2, \xTemp2, \xTemp0
+    cmp    \xTemp1, \xTemp2
+    bhi    \slowPathLabel
+    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
+                                                              // Move old thread_local_pos to x0
+                                                              // for the return value.
+    mov    x0, \xTemp0
+    add    \xTemp0, \xTemp0, \xTemp1
+    str    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Store new thread_local_pos.
+    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]     // Increment thread_local_objects.
+    add    \xTemp0, \xTemp0, #1
+    str    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
+    POISON_HEAP_REF \wClass
+    str    \wClass, [x0, #MIRROR_OBJECT_CLASS_OFFSET]         // Store the class pointer.
+    str    \wCount, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]         // Store the array length.
+                                                              // Fence.
+    dmb    ishst
+    ret
+.endm
+
+// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+//
+// x0: type_idx/return value, x1: ArtMethod*, x2: Class*, xSELF(x19): Thread::Current
+// x3-x7: free.
+// Need to preserve x0 and x1 to the slow path.
+.macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel
+    cbz    x2, \slowPathLabel                                 // Check null class
+    ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED \slowPathLabel
+.endm
+
+.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel
+    ldr    w3, [x2, #MIRROR_CLASS_STATUS_OFFSET]              // Check class status.
+    cmp    x3, #MIRROR_CLASS_STATUS_INITIALIZED
+    bne    \slowPathLabel
+                                                              // Add a fake dependence from the
+                                                              // following access flag and size
+                                                              // loads to the status load.
+                                                              // This is to prevent those loads
+                                                              // from being reordered above the
+                                                              // status load and reading wrong
+                                                              // values (an alternative is to use
+                                                              // a load-acquire for the status).
+    eor    x3, x3, x3
+    add    x2, x2, x3
+    ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED \slowPathLabel
+.endm
+
+.macro ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED slowPathLabel
+                                                              // Check access flags has
+                                                              // kAccClassIsFinalizable.
+    ldr    w3, [x2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
+    tbnz   x3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT, \slowPathLabel
+                                                              // Load thread_local_pos (x4) and
+                                                              // thread_local_end (x5).
+    ldr    x4, [xSELF, #THREAD_LOCAL_POS_OFFSET]
+    ldr    x5, [xSELF, #THREAD_LOCAL_END_OFFSET]
+    sub    x6, x5, x4                                         // Compute the remaining buf size.
+    ldr    w7, [x2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET]         // Load the object size (x7).
+    cmp    x7, x6                                             // Check if it fits. OK to do this
+                                                              // before rounding up the object size
+                                                              // assuming the buf size alignment.
+    bhi    \slowPathLabel
+    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
+                                                              // Round up the object size by the
+                                                              // object alignment. (addr + 7) & ~7.
+    add    x7, x7, #OBJECT_ALIGNMENT_MASK
+    and    x7, x7, #OBJECT_ALIGNMENT_MASK_TOGGLED
+                                                              // Move old thread_local_pos to x0
+                                                              // for the return value.
+    mov    x0, x4
+    add    x5, x0, x7
+    str    x5, [xSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
+    ldr    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
+    add    x5, x5, #1
+    str    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
+    POISON_HEAP_REF w2
+    str    w2, [x0, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
+                                                              // Fence. This is "ish" not "ishst" so
+                                                              // that the code after this allocation
+                                                              // site will see the right values in
+                                                              // the fields of the class.
+                                                              // Alternatively we could use "ishst"
+                                                              // if we use load-acquire for the
+                                                              // class status load.)
+    dmb    ish
+    ret
+.endm
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+ENTRY art_quick_alloc_object_tlab
+    // Fast path tlab allocation.
+    // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+    // x2-x7: free.
+#if defined(USE_READ_BARRIER)
+    mvn    x0, xzr                                            // Read barrier not supported here.
+    ret                                                       // Return -1.
+#endif
+    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
+                                                              // Load the class (x2)
+    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
+.Lart_quick_alloc_object_tlab_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME           // Save callee saves in case of GC.
+    mov    x2, xSELF                     // Pass Thread::Current.
+    bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END art_quick_alloc_object_tlab
+
+// The common code for art_quick_alloc_object_*region_tlab
+.macro GENERATE_ALLOC_OBJECT_REGION_TLAB name, entrypoint, fast_path, is_resolved
+ENTRY \name
+    // Fast path region tlab allocation.
+    // x0: type_idx/resolved class/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+    // If is_resolved is 1 then x0 is the resolved type, otherwise it is the index.
+    // x2-x7: free.
+#if !defined(USE_READ_BARRIER)
+    mvn    x0, xzr                                            // Read barrier must be enabled here.
+    ret                                                       // Return -1.
+#endif
+.if \is_resolved
+    mov    x2, x0 // class is actually stored in x0 already
+.else
+    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
+                                                              // Load the class (x2)
+    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+.endif
+    // Most common case: GC is not marking.
+    ldr    w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   x3, .Lmarking\name
+.Ldo_allocation\name:
+    \fast_path .Lslow_path\name
+.Lmarking\name:
+    // GC is marking, check the lock word of the class for the mark bit.
+    // If the class is null, go slow path. The check is required to read the lock word.
+    cbz    w2, .Lslow_path\name
+    // Class is not null, check mark bit in lock word.
+    ldr    w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    // If the bit is not zero, do the allocation.
+    tbnz    w3, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name
+                                                              // The read barrier slow path. Mark
+                                                              // the class.
+    stp    x0, x1, [sp, #-32]!                                // Save registers (x0, x1, lr).
+    str    xLR, [sp, #16]                                     // Align sp by 16 bytes.
+    mov    x0, x2                                             // Pass the class as the first param.
+    bl     artReadBarrierMark
+    mov    x2, x0                                             // Get the (marked) class back.
+    ldp    x0, x1, [sp, #0]                                   // Restore registers.
+    ldr    xLR, [sp, #16]
+    add    sp, sp, #32
+    b      .Ldo_allocation\name
+.Lslow_path\name:
+    SETUP_SAVE_REFS_ONLY_FRAME                 // Save callee saves in case of GC.
+    mov    x2, xSELF                           // Pass Thread::Current.
+    bl     \entrypoint                         // (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END \name
+.endm
+
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_region_tlab, artAllocObjectFromCodeRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH, 0
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 1
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED, 1
+
+// The common code for art_quick_alloc_array_*region_tlab
+.macro GENERATE_ALLOC_ARRAY_REGION_TLAB name, entrypoint, fast_path, is_resolved
+ENTRY \name
+    // Fast path array allocation for region tlab allocation.
+    // x0: uint32_t type_idx
+    // x1: int32_t component_count
+    // x2: ArtMethod* method
+    // x3-x7: free.
+#if !defined(USE_READ_BARRIER)
+    mvn    x0, xzr                                            // Read barrier must be enabled here.
+    ret                                                       // Return -1.
+#endif
+.if \is_resolved
+    mov    x3, x0
+    // If already resolved, class is stored in x0
+.else
+    ldr    x3, [x2, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
+                                                              // Load the class (x2)
+    ldr    w3, [x3, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+.endif
+    // Most common case: GC is not marking.
+    ldr    w4, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   x4, .Lmarking\name
+.Ldo_allocation\name:
+    \fast_path .Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
+.Lmarking\name:
+    // GC is marking, check the lock word of the class for the mark bit.
+    // If the class is null, go slow path. The check is required to read the lock word.
+    cbz    w3, .Lslow_path\name
+    // Class is not null, check mark bit in lock word.
+    ldr    w4, [x3, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    // If the bit is not zero, do the allocation.
+    tbnz   w4, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name
+                                                              // The read barrier slow path. Mark
+                                                              // the class.
+    stp    x0, x1, [sp, #-32]!                                // Save registers (x0, x1, x2, lr).
+    stp    x2, xLR, [sp, #16]
+    mov    x0, x3                                             // Pass the class as the first param.
+    bl     artReadBarrierMark
+    mov    x3, x0                                             // Get the (marked) class back.
+    ldp    x2, xLR, [sp, #16]
+    ldp    x0, x1, [sp], #32                                  // Restore registers.
+    b      .Ldo_allocation\name
+.Lslow_path\name:
+    // x0: uint32_t type_idx / mirror::Class* klass (if resolved)
+    // x1: int32_t component_count
+    // x2: ArtMethod* method
+    // x3: Thread* self
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
+    mov    x3, xSELF                  // pass Thread::Current
+    bl     \entrypoint
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END \name
+.endm
+
+GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_region_tlab, artAllocArrayFromCodeRegionTLAB, ALLOC_ARRAY_TLAB_FAST_PATH, 0
+// TODO: art_quick_alloc_array_resolved_region_tlab seems to not get called. Investigate compiler.
+GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED, 1
 
     /*
      * Called by managed code when the thread has been asked to suspend.
      */
     .extern artTestSuspendFromCode
 ENTRY art_quick_test_suspend
-    ldrh   w0, [xSELF, #THREAD_FLAGS_OFFSET]  // get xSELF->state_and_flags.as_struct.flags
-    cbnz   w0, .Lneed_suspend                 // check flags == 0
-    ret                                       // return if flags == 0
-.Lneed_suspend:
+    SETUP_SAVE_EVERYTHING_FRAME               // save callee saves for stack crawl
     mov    x0, xSELF
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          // save callee saves for stack crawl
     bl     artTestSuspendFromCode             // (Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_SAVE_EVERYTHING_FRAME
+    ret
 END art_quick_test_suspend
 
 ENTRY art_quick_implicit_suspend
     mov    x0, xSELF
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          // save callee saves for stack crawl
+    SETUP_SAVE_REFS_ONLY_FRAME                // save callee saves for stack crawl
     bl     artTestSuspendFromCode             // (Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
 END art_quick_implicit_suspend
 
      /*
@@ -1468,42 +2247,67 @@
      */
      .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_X0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
     mov     x2, xSELF                   // pass Thread::Current
     mov     x3, sp                      // pass SP
     bl      artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
     ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
     cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME // Restore frame
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME    // Restore frame
     fmov    d0, x0                      // Store result in d0 in case it was float or double
     ret                                 // return on success
 .Lexception_in_proxy:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_proxy_invoke_handler
 
     /*
-     * Called to resolve an imt conflict. xIP1 is a hidden argument that holds the target method's
-     * dex method index.
+     * Called to resolve an imt conflict.
+     * x0 is the conflict ArtMethod.
+     * xIP1 is a hidden argument that holds the target interface method's dex method index.
+     *
+     * Note that this stub writes to xIP0, xIP1, and x0.
      */
     .extern artInvokeInterfaceTrampoline
 ENTRY art_quick_imt_conflict_trampoline
-    mov    x0, xIP1
+    ldr xIP0, [sp, #0]  // Load referrer
+    ldr xIP0, [xIP0, #ART_METHOD_DEX_CACHE_METHODS_OFFSET_64]   // Load dex cache methods array
+    ldr xIP0, [xIP0, xIP1, lsl #POINTER_SIZE_SHIFT]  // Load interface method
+    ldr xIP1, [x0, #ART_METHOD_JNI_OFFSET_64]  // Load ImtConflictTable
+    ldr x0, [xIP1]  // Load first entry in ImtConflictTable.
+.Limt_table_iterate:
+    cmp x0, xIP0
+    // Branch if found. Benchmarks have shown doing a branch here is better.
+    beq .Limt_table_found
+    // If the entry is null, the interface method is not in the ImtConflictTable.
+    cbz x0, .Lconflict_trampoline
+    // Iterate over the entries of the ImtConflictTable.
+    ldr x0, [xIP1, #(2 * __SIZEOF_POINTER__)]!
+    b .Limt_table_iterate
+.Limt_table_found:
+    // We successfully hit an entry in the table. Load the target method
+    // and jump to it.
+    ldr x0, [xIP1, #__SIZEOF_POINTER__]
+    ldr xIP0, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
+    br xIP0
+.Lconflict_trampoline:
+    // Call the runtime stub to populate the ImtConflictTable and jump to the
+    // resolved method.
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
 ENTRY art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     mov x2, xSELF
     mov x3, sp
     bl artQuickResolutionTrampoline  // (called, receiver, Thread*, SP)
     cbz x0, 1f
     mov xIP0, x0            // Remember returned code pointer in xIP0.
     ldr x0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     br xIP0
 1:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
@@ -1563,7 +2367,7 @@
      * Called to do a generic JNI down-call
      */
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_X0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
 
     // Save SP , so we can have static CFI info.
     mov x28, sp
@@ -1635,7 +2439,7 @@
     .cfi_def_cfa_register sp
 
     // Tear down the callee-save frame.
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     // store into fpr, for when it's a fpr return...
     fmov d0, x0
@@ -1657,7 +2461,7 @@
  * x1..x7, d0..d7 = arguments to that method.
  */
 ENTRY art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
+    SETUP_SAVE_REFS_AND_ARGS_FRAME         // Set up frame and save arguments.
 
     //  x0 will contain mirror::ArtMethod* method.
     mov x1, xSELF                          // How to get Thread::Current() ???
@@ -1667,7 +2471,7 @@
     //                                      mirror::ArtMethod** sp)
     bl   artQuickToInterpreterBridge
 
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME       // TODO: no need to restore arguments in this case.
 
     fmov d0, x0
 
@@ -1680,7 +2484,7 @@
 //
     .extern artInstrumentationMethodEntryFromCode
 ENTRY art_quick_instrumentation_entry
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
 
     mov   x20, x0             // Preserve method reference in a callee-save.
 
@@ -1691,7 +2495,7 @@
     mov   xIP0, x0            // x0 = result of call.
     mov   x0, x20             // Reload method reference.
 
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // Note: will restore xSELF
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME  // Note: will restore xSELF
     adr   xLR, art_quick_instrumentation_exit
     br    xIP0                // Tail-call method with lr set to art_quick_instrumentation_exit.
 END art_quick_instrumentation_entry
@@ -1700,7 +2504,7 @@
 ENTRY art_quick_instrumentation_exit
     mov   xLR, #0             // Clobber LR for later checks.
 
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
 
     // We need to save x0 and d0. We could use a callee-save from SETUP_REF_ONLY, but then
     // we would need to fully restore it. As there are a lot of callee-save registers, it seems
@@ -1723,7 +2527,7 @@
     ldr   x0, [sp], 16        // Restore integer result, and drop stack area.
     .cfi_adjust_cfa_offset 16
 
-    POP_REFS_ONLY_CALLEE_SAVE_FRAME
+    POP_SAVE_REFS_ONLY_FRAME
 
     br    xIP0                // Tail-call out.
 END art_quick_instrumentation_exit
@@ -1734,7 +2538,7 @@
      */
     .extern artDeoptimize
 ENTRY art_quick_deoptimize
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     mov    x0, xSELF          // Pass thread.
     bl     artDeoptimize      // artDeoptimize(Thread*)
     brk 0
@@ -1746,7 +2550,7 @@
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     mov    x0, xSELF                      // Pass thread.
     bl     artDeoptimizeFromCompiledCode  // artDeoptimizeFromCompiledCode(Thread*)
     brk 0
@@ -1845,107 +2649,158 @@
     ret
 END art_quick_indexof
 
-   /*
-     * String's compareTo.
-     *
-     * TODO: Not very optimized.
-     *
-     * On entry:
-     *    x0:   this object pointer
-     *    x1:   comp object pointer
-     *
-     */
-    .extern __memcmp16
-ENTRY art_quick_string_compareto
-    mov    x2, x0         // x0 is return, use x2 for first input.
-    sub    x0, x2, x1     // Same string object?
-    cbnz   x0,1f
-    ret
-1:                        // Different string objects.
-
-    ldr    w4, [x2, #MIRROR_STRING_COUNT_OFFSET]
-    ldr    w3, [x1, #MIRROR_STRING_COUNT_OFFSET]
-    add    x2, x2, #MIRROR_STRING_VALUE_OFFSET
-    add    x1, x1, #MIRROR_STRING_VALUE_OFFSET
-
     /*
-     * Now:           Data*  Count
-     *    first arg    x2      w4
-     *   second arg    x1      w3
+     * Create a function `name` calling the ReadBarrier::Mark routine,
+     * getting its argument and returning its result through W register
+     * `wreg` (corresponding to X register `xreg`), saving and restoring
+     * all caller-save registers.
+     *
+     * If `wreg` is different from `w0`, the generated function follows a
+     * non-standard runtime calling convention:
+     * - register `wreg` is used to pass the (sole) argument of this
+     *   function (instead of W0);
+     * - register `wreg` is used to return the result of this function
+     *   (instead of W0);
+     * - W0 is treated like a normal (non-argument) caller-save register;
+     * - everything else is the same as in the standard runtime calling
+     *   convention (e.g. standard callee-save registers are preserved).
      */
-
-    // x0 := str1.length(w4) - str2.length(w3). ldr zero-extended w3/w4 into x3/x4.
-    subs x0, x4, x3
-    // Min(count1, count2) into w3.
-    csel x3, x3, x4, ge
-
-    // TODO: Tune this value.
-    // Check for long string, do memcmp16 for them.
-    cmp w3, #28  // Constant from arm32.
-    bgt .Ldo_memcmp16
-
+.macro READ_BARRIER_MARK_REG name, wreg, xreg
+ENTRY \name
+    // Reference is null, no work to do at all.
+    cbz \wreg, .Lret_rb_\name
     /*
-     * Now:
-     *   x2: *first string data
-     *   x1: *second string data
-     *   w3: iteration count
-     *   x0: return value if comparison equal
-     *   x4, x5, x6, x7: free
+     * Allocate 46 stack slots * 8 = 368 bytes:
+     * - 20 slots for core registers X0-X19
+     * - 24 slots for floating-point registers D0-D7 and D16-D31
+     * -  1 slot for return address register XLR
+     * -  1 padding slot for 16-byte stack alignment
      */
-
-    // Do a simple unrolled loop.
-.Lloop:
-    // At least two more elements?
-    subs w3, w3, #2
-    b.lt .Lremainder_or_done
-
-    ldrh w4, [x2], #2
-    ldrh w5, [x1], #2
-
-    ldrh w6, [x2], #2
-    ldrh w7, [x1], #2
-
-    subs w4, w4, w5
-    b.ne .Lw4_result
-
-    subs w6, w6, w7
-    b.ne .Lw6_result
-
-    b .Lloop
-
-.Lremainder_or_done:
-    adds w3, w3, #1
-    b.eq .Lremainder
+    // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
+    ldr   wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_path_rb_\name
     ret
+.Lslow_path_rb_\name:
+    // Save all potentially live caller-save core registers.
+    stp   x0, x1,   [sp, #-368]!
+    .cfi_adjust_cfa_offset 368
+    .cfi_rel_offset x0, 0
+    .cfi_rel_offset x1, 8
+    stp   x2, x3,   [sp, #16]
+    .cfi_rel_offset x2, 16
+    .cfi_rel_offset x3, 24
+    stp   x4, x5,   [sp, #32]
+    .cfi_rel_offset x4, 32
+    .cfi_rel_offset x5, 40
+    stp   x6, x7,   [sp, #48]
+    .cfi_rel_offset x6, 48
+    .cfi_rel_offset x7, 56
+    stp   x8, x9,   [sp, #64]
+    .cfi_rel_offset x8, 64
+    .cfi_rel_offset x9, 72
+    stp   x10, x11, [sp, #80]
+    .cfi_rel_offset x10, 80
+    .cfi_rel_offset x11, 88
+    stp   x12, x13, [sp, #96]
+    .cfi_rel_offset x12, 96
+    .cfi_rel_offset x13, 104
+    stp   x14, x15, [sp, #112]
+    .cfi_rel_offset x14, 112
+    .cfi_rel_offset x15, 120
+    stp   x16, x17, [sp, #128]
+    .cfi_rel_offset x16, 128
+    .cfi_rel_offset x17, 136
+    stp   x18, x19, [sp, #144]
+    .cfi_rel_offset x18, 144
+    .cfi_rel_offset x19, 152
+    // Save all potentially live caller-save floating-point registers.
+    stp   d0, d1,   [sp, #160]
+    stp   d2, d3,   [sp, #176]
+    stp   d4, d5,   [sp, #192]
+    stp   d6, d7,   [sp, #208]
+    stp   d16, d17, [sp, #224]
+    stp   d18, d19, [sp, #240]
+    stp   d20, d21, [sp, #256]
+    stp   d22, d23, [sp, #272]
+    stp   d24, d25, [sp, #288]
+    stp   d26, d27, [sp, #304]
+    stp   d28, d29, [sp, #320]
+    stp   d30, d31, [sp, #336]
+    // Save return address.
+    str   xLR,      [sp, #352]
+    .cfi_rel_offset x30, 352
+    // (sp + #360 is a padding slot)
 
-.Lremainder:
-    ldrh w4, [x2], #2
-    ldrh w5, [x1], #2
-    subs w4, w4, w5
-    b.ne .Lw4_result
+    .ifnc \wreg, w0
+      mov   w0, \wreg                   // Pass arg1 - obj from `wreg`
+    .endif
+    bl    artReadBarrierMark            // artReadBarrierMark(obj)
+    .ifnc \wreg, w0
+      mov   \wreg, w0                   // Return result into `wreg`
+    .endif
+
+    // Restore core regs, except `xreg`, as `wreg` is used to return the
+    // result of this function (simply remove it from the stack instead).
+    POP_REGS_NE x0, x1,   0,   \xreg
+    POP_REGS_NE x2, x3,   16,  \xreg
+    POP_REGS_NE x4, x5,   32,  \xreg
+    POP_REGS_NE x6, x7,   48,  \xreg
+    POP_REGS_NE x8, x9,   64,  \xreg
+    POP_REGS_NE x10, x11, 80,  \xreg
+    POP_REGS_NE x12, x13, 96,  \xreg
+    POP_REGS_NE x14, x15, 112, \xreg
+    POP_REGS_NE x16, x17, 128, \xreg
+    POP_REGS_NE x18, x19, 144, \xreg
+    // Restore floating-point registers.
+    ldp   d0, d1,   [sp, #160]
+    ldp   d2, d3,   [sp, #176]
+    ldp   d4, d5,   [sp, #192]
+    ldp   d6, d7,   [sp, #208]
+    ldp   d16, d17, [sp, #224]
+    ldp   d18, d19, [sp, #240]
+    ldp   d20, d21, [sp, #256]
+    ldp   d22, d23, [sp, #272]
+    ldp   d24, d25, [sp, #288]
+    ldp   d26, d27, [sp, #304]
+    ldp   d28, d29, [sp, #320]
+    ldp   d30, d31, [sp, #336]
+    // Restore return address and remove padding.
+    ldr   xLR,      [sp, #352]
+    .cfi_restore x30
+    add sp, sp, #368
+    .cfi_adjust_cfa_offset -368
+.Lret_rb_\name:
     ret
+END \name
+.endm
 
-// Result is in w4
-.Lw4_result:
-    sxtw x0, w4
-    ret
-
-// Result is in w6
-.Lw6_result:
-    sxtw x0, w6
-    ret
-
-.Ldo_memcmp16:
-    mov x14, x0                  // Save x0 and LR. __memcmp16 does not use these temps.
-    mov x15, xLR                 //                 TODO: Codify and check that?
-
-    mov x0, x2
-    uxtw x2, w3
-    bl __memcmp16
-
-    mov xLR, x15                 // Restore LR.
-
-    cmp x0, #0                   // Check the memcmp difference.
-    csel x0, x0, x14, ne         // x0 := x0 != 0 ? x14(prev x0=length diff) : x1.
-    ret
-END art_quick_string_compareto
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0,  x0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1,  x1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2,  x2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3,  x3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4,  x4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5,  x5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6,  x6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7,  x7
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8,  x8
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9,  x9
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15
+// READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16 ip0 is blocked
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29
diff --git a/runtime/arch/arm64/quick_method_frame_info_arm64.h b/runtime/arch/arm64/quick_method_frame_info_arm64.h
index b525309..36f283b 100644
--- a/runtime/arch/arm64/quick_method_frame_info_arm64.h
+++ b/runtime/arch/arm64/quick_method_frame_info_arm64.h
@@ -29,7 +29,7 @@
 static constexpr uint32_t kArm64CalleeSaveAlwaysSpills =
     // Note: ArtMethod::GetReturnPcOffsetInBytes() rely on the assumption that
     // LR is always saved on the top of the frame for all targets.
-    // That is, lr = *(sp + framesize - pointsize).
+    // That is, lr = *(sp + framesize - pointer_size).
     (1 << art::arm64::LR);
 // Callee saved registers
 static constexpr uint32_t kArm64CalleeSaveRefSpills =
@@ -44,6 +44,14 @@
     (1 << art::arm64::X7);
 static constexpr uint32_t kArm64CalleeSaveAllSpills =
     (1 << art::arm64::X19);
+static constexpr uint32_t kArm64CalleeSaveEverythingSpills =
+    (1 << art::arm64::X0) | (1 << art::arm64::X1) | (1 << art::arm64::X2) |
+    (1 << art::arm64::X3) | (1 << art::arm64::X4) | (1 << art::arm64::X5) |
+    (1 << art::arm64::X6) | (1 << art::arm64::X7) | (1 << art::arm64::X8) |
+    (1 << art::arm64::X9) | (1 << art::arm64::X10) | (1 << art::arm64::X11) |
+    (1 << art::arm64::X12) | (1 << art::arm64::X13) | (1 << art::arm64::X14) |
+    (1 << art::arm64::X15) | (1 << art::arm64::X16) | (1 << art::arm64::X17) |
+    (1 << art::arm64::X18) | (1 << art::arm64::X19);
 
 static constexpr uint32_t kArm64CalleeSaveFpAlwaysSpills = 0;
 static constexpr uint32_t kArm64CalleeSaveFpRefSpills = 0;
@@ -55,23 +63,37 @@
     (1 << art::arm64::D8)  | (1 << art::arm64::D9)  | (1 << art::arm64::D10) |
     (1 << art::arm64::D11)  | (1 << art::arm64::D12)  | (1 << art::arm64::D13) |
     (1 << art::arm64::D14)  | (1 << art::arm64::D15);
+static constexpr uint32_t kArm64CalleeSaveFpEverythingSpills =
+    (1 << art::arm64::D0) | (1 << art::arm64::D1) | (1 << art::arm64::D2) |
+    (1 << art::arm64::D3) | (1 << art::arm64::D4) | (1 << art::arm64::D5) |
+    (1 << art::arm64::D6) | (1 << art::arm64::D7) | (1 << art::arm64::D8) |
+    (1 << art::arm64::D9) | (1 << art::arm64::D10) | (1 << art::arm64::D11) |
+    (1 << art::arm64::D12) | (1 << art::arm64::D13) | (1 << art::arm64::D14) |
+    (1 << art::arm64::D15) | (1 << art::arm64::D16) | (1 << art::arm64::D17) |
+    (1 << art::arm64::D18) | (1 << art::arm64::D19) | (1 << art::arm64::D20) |
+    (1 << art::arm64::D21) | (1 << art::arm64::D22) | (1 << art::arm64::D23) |
+    (1 << art::arm64::D24) | (1 << art::arm64::D25) | (1 << art::arm64::D26) |
+    (1 << art::arm64::D27) | (1 << art::arm64::D28) | (1 << art::arm64::D29) |
+    (1 << art::arm64::D30) | (1 << art::arm64::D31);
 
 constexpr uint32_t Arm64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
   return kArm64CalleeSaveAlwaysSpills | kArm64CalleeSaveRefSpills |
-      (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kArm64CalleeSaveAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kArm64CalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kArm64CalleeSaveAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kArm64CalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t Arm64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
   return kArm64CalleeSaveFpAlwaysSpills | kArm64CalleeSaveFpRefSpills |
-      (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveFpArgSpills: 0) |
-      (type == Runtime::kSaveAll ? kArm64CalleeSaveFpAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kArm64CalleeSaveFpArgSpills: 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kArm64CalleeSaveFpAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kArm64CalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t Arm64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
   return RoundUp((POPCOUNT(Arm64CalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(Arm64CalleeSaveFpSpills(type)) /* fprs */ +
-                  1 /* Method* */) * kArm64PointerSize, kStackAlignment);
+                  1 /* Method* */) * static_cast<size_t>(kArm64PointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo Arm64CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
@@ -83,17 +105,18 @@
 constexpr size_t Arm64CalleeSaveFpr1Offset(Runtime::CalleeSaveType type) {
   return Arm64CalleeSaveFrameSize(type) -
          (POPCOUNT(Arm64CalleeSaveCoreSpills(type)) +
-          POPCOUNT(Arm64CalleeSaveFpSpills(type))) * kArm64PointerSize;
+          POPCOUNT(Arm64CalleeSaveFpSpills(type))) * static_cast<size_t>(kArm64PointerSize);
 }
 
 constexpr size_t Arm64CalleeSaveGpr1Offset(Runtime::CalleeSaveType type) {
   return Arm64CalleeSaveFrameSize(type) -
-         POPCOUNT(Arm64CalleeSaveCoreSpills(type)) * kArm64PointerSize;
+         POPCOUNT(Arm64CalleeSaveCoreSpills(type)) * static_cast<size_t>(kArm64PointerSize);
 }
 
 constexpr size_t Arm64CalleeSaveLrOffset(Runtime::CalleeSaveType type) {
   return Arm64CalleeSaveFrameSize(type) -
-      POPCOUNT(Arm64CalleeSaveCoreSpills(type) & (-(1 << LR))) * kArm64PointerSize;
+      POPCOUNT(Arm64CalleeSaveCoreSpills(type) & (-(1 << LR))) *
+      static_cast<size_t>(kArm64PointerSize);
 }
 
 }  // namespace arm64
diff --git a/runtime/arch/arm64/thread_arm64.cc b/runtime/arch/arm64/thread_arm64.cc
index 564dced..3483b70 100644
--- a/runtime/arch/arm64/thread_arm64.cc
+++ b/runtime/arch/arm64/thread_arm64.cc
@@ -17,15 +17,16 @@
 #include "thread.h"
 
 #include "asm_support_arm64.h"
+#include "base/enums.h"
 #include "base/logging.h"
 
 namespace art {
 
 void Thread::InitCpu() {
-  CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<8>().Int32Value());
-  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<8>().Int32Value());
-  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<8>().Int32Value());
-  CHECK_EQ(THREAD_ID_OFFSET, ThinLockIdOffset<8>().Int32Value());
+  CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<PointerSize::k64>().Int32Value());
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<PointerSize::k64>().Int32Value());
+  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<PointerSize::k64>().Int32Value());
+  CHECK_EQ(THREAD_ID_OFFSET, ThinLockIdOffset<PointerSize::k64>().Int32Value());
 }
 
 void Thread::CleanupCpu() {
diff --git a/runtime/arch/instruction_set.cc b/runtime/arch/instruction_set.cc
index 81ca010..b35e088 100644
--- a/runtime/arch/instruction_set.cc
+++ b/runtime/arch/instruction_set.cc
@@ -18,6 +18,7 @@
 
 // Explicitly include our own elf.h to avoid Linux and other dependencies.
 #include "../elf.h"
+#include "base/bit_utils.h"
 #include "globals.h"
 
 namespace art {
@@ -113,14 +114,44 @@
   }
 }
 
-static constexpr size_t kDefaultStackOverflowReservedBytes = 16 * KB;
-static constexpr size_t kMipsStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
-static constexpr size_t kMips64StackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
+#if !defined(ART_STACK_OVERFLOW_GAP_arm) || !defined(ART_STACK_OVERFLOW_GAP_arm64) || \
+    !defined(ART_STACK_OVERFLOW_GAP_mips) || !defined(ART_STACK_OVERFLOW_GAP_mips64) || \
+    !defined(ART_STACK_OVERFLOW_GAP_x86) || !defined(ART_STACK_OVERFLOW_GAP_x86_64)
+#error "Missing defines for stack overflow gap"
+#endif
 
-static constexpr size_t kArmStackOverflowReservedBytes =    8 * KB;
-static constexpr size_t kArm64StackOverflowReservedBytes =  8 * KB;
-static constexpr size_t kX86StackOverflowReservedBytes =    8 * KB;
-static constexpr size_t kX86_64StackOverflowReservedBytes = 8 * KB;
+static constexpr size_t kArmStackOverflowReservedBytes    = ART_STACK_OVERFLOW_GAP_arm;
+static constexpr size_t kArm64StackOverflowReservedBytes  = ART_STACK_OVERFLOW_GAP_arm64;
+static constexpr size_t kMipsStackOverflowReservedBytes   = ART_STACK_OVERFLOW_GAP_mips;
+static constexpr size_t kMips64StackOverflowReservedBytes = ART_STACK_OVERFLOW_GAP_mips64;
+static constexpr size_t kX86StackOverflowReservedBytes    = ART_STACK_OVERFLOW_GAP_x86;
+static constexpr size_t kX86_64StackOverflowReservedBytes = ART_STACK_OVERFLOW_GAP_x86_64;
+
+static_assert(IsAligned<kPageSize>(kArmStackOverflowReservedBytes), "ARM gap not page aligned");
+static_assert(IsAligned<kPageSize>(kArm64StackOverflowReservedBytes), "ARM64 gap not page aligned");
+static_assert(IsAligned<kPageSize>(kMipsStackOverflowReservedBytes), "Mips gap not page aligned");
+static_assert(IsAligned<kPageSize>(kMips64StackOverflowReservedBytes),
+              "Mips64 gap not page aligned");
+static_assert(IsAligned<kPageSize>(kX86StackOverflowReservedBytes), "X86 gap not page aligned");
+static_assert(IsAligned<kPageSize>(kX86_64StackOverflowReservedBytes),
+              "X86_64 gap not page aligned");
+
+#if !defined(ART_FRAME_SIZE_LIMIT)
+#error "ART frame size limit missing"
+#endif
+
+// TODO: Should we require an extra page (RoundUp(SIZE) + kPageSize)?
+static_assert(ART_FRAME_SIZE_LIMIT < kArmStackOverflowReservedBytes, "Frame size limit too large");
+static_assert(ART_FRAME_SIZE_LIMIT < kArm64StackOverflowReservedBytes,
+              "Frame size limit too large");
+static_assert(ART_FRAME_SIZE_LIMIT < kMipsStackOverflowReservedBytes,
+              "Frame size limit too large");
+static_assert(ART_FRAME_SIZE_LIMIT < kMips64StackOverflowReservedBytes,
+              "Frame size limit too large");
+static_assert(ART_FRAME_SIZE_LIMIT < kX86StackOverflowReservedBytes,
+              "Frame size limit too large");
+static_assert(ART_FRAME_SIZE_LIMIT < kX86_64StackOverflowReservedBytes,
+              "Frame size limit too large");
 
 size_t GetStackOverflowReservedBytes(InstructionSet isa) {
   switch (isa) {
diff --git a/runtime/arch/instruction_set.h b/runtime/arch/instruction_set.h
index ff9c0b3..917acc9 100644
--- a/runtime/arch/instruction_set.h
+++ b/runtime/arch/instruction_set.h
@@ -20,6 +20,7 @@
 #include <iosfwd>
 #include <string>
 
+#include "base/enums.h"
 #include "base/logging.h"  // Logging is required for FATAL in the helper functions.
 
 namespace art {
@@ -53,12 +54,12 @@
 #endif
 
 // Architecture-specific pointer sizes
-static constexpr size_t kArmPointerSize = 4;
-static constexpr size_t kArm64PointerSize = 8;
-static constexpr size_t kMipsPointerSize = 4;
-static constexpr size_t kMips64PointerSize = 8;
-static constexpr size_t kX86PointerSize = 4;
-static constexpr size_t kX86_64PointerSize = 8;
+static constexpr PointerSize kArmPointerSize = PointerSize::k32;
+static constexpr PointerSize kArm64PointerSize = PointerSize::k64;
+static constexpr PointerSize kMipsPointerSize = PointerSize::k32;
+static constexpr PointerSize kMips64PointerSize = PointerSize::k64;
+static constexpr PointerSize kX86PointerSize = PointerSize::k32;
+static constexpr PointerSize kX86_64PointerSize = PointerSize::k64;
 
 // ARM instruction alignment. ARM processors require code to be 4-byte aligned,
 // but ARM ELF requires 8..
@@ -82,7 +83,7 @@
 
 InstructionSet GetInstructionSetFromELF(uint16_t e_machine, uint32_t e_flags);
 
-static inline size_t GetInstructionSetPointerSize(InstructionSet isa) {
+static inline PointerSize GetInstructionSetPointerSize(InstructionSet isa) {
   switch (isa) {
     case kArm:
       // Fall-through.
@@ -147,8 +148,8 @@
   }
 }
 
-static inline size_t InstructionSetPointerSize(InstructionSet isa) {
-  return Is64BitInstructionSet(isa) ? 8U : 4U;
+static inline PointerSize InstructionSetPointerSize(InstructionSet isa) {
+  return Is64BitInstructionSet(isa) ? PointerSize::k64 : PointerSize::k32;
 }
 
 static inline size_t GetBytesPerGprSpillLocation(InstructionSet isa) {
diff --git a/runtime/arch/instruction_set_features_test.cc b/runtime/arch/instruction_set_features_test.cc
index 99c2d4d..fb38b47 100644
--- a/runtime/arch/instruction_set_features_test.cc
+++ b/runtime/arch/instruction_set_features_test.cc
@@ -18,7 +18,7 @@
 
 #include <gtest/gtest.h>
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include "cutils/properties.h"
 #endif
 
@@ -26,7 +26,7 @@
 
 namespace art {
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #if defined(__aarch64__)
 TEST(InstructionSetFeaturesTest, DISABLED_FeaturesFromSystemPropertyVariant) {
   LOG(WARNING) << "Test disabled due to no CPP define for A53 erratum 835769";
@@ -111,7 +111,7 @@
 }
 #endif
 
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 TEST(InstructionSetFeaturesTest, HostFeaturesFromCppDefines) {
   std::string error_msg;
   std::unique_ptr<const InstructionSetFeatures> default_features(
diff --git a/runtime/arch/instruction_set_test.cc b/runtime/arch/instruction_set_test.cc
index 2f3cf18..5dfc4b4 100644
--- a/runtime/arch/instruction_set_test.cc
+++ b/runtime/arch/instruction_set_test.cc
@@ -18,6 +18,7 @@
 
 #include <gtest/gtest.h>
 
+#include "base/enums.h"
 #include "base/stringprintf.h"
 
 namespace art {
@@ -49,7 +50,7 @@
 }
 
 TEST(InstructionSetTest, PointerSize) {
-  EXPECT_EQ(sizeof(void*), GetInstructionSetPointerSize(kRuntimeISA));
+  EXPECT_EQ(kRuntimePointerSize, GetInstructionSetPointerSize(kRuntimeISA));
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips/asm_support_mips.S b/runtime/arch/mips/asm_support_mips.S
index 51e224c..801f708 100644
--- a/runtime/arch/mips/asm_support_mips.S
+++ b/runtime/arch/mips/asm_support_mips.S
@@ -129,4 +129,43 @@
 #endif  // USE_HEAP_POISONING
 .endm
 
+// Based on contents of creg select the minimum integer
+// At the end of the macro the original value of creg is lost
+.macro MINint dreg,rreg,sreg,creg
+  .set push
+  .set noat
+#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+  .ifc \dreg, \rreg
+  selnez \dreg, \rreg, \creg
+  seleqz \creg, \sreg, \creg
+  .else
+  seleqz \dreg, \sreg, \creg
+  selnez \creg, \rreg, \creg
+  .endif
+  or     \dreg, \dreg, \creg
+#else
+  movn   \dreg, \rreg, \creg
+  movz   \dreg, \sreg, \creg
+#endif
+  .set pop
+.endm
+
+// Find minimum of two signed registers
+.macro MINs dreg,rreg,sreg
+  .set push
+  .set noat
+  slt    $at, \rreg, \sreg
+  MINint \dreg, \rreg, \sreg, $at
+  .set pop
+.endm
+
+// Find minimum of two unsigned registers
+.macro MINu dreg,rreg,sreg
+  .set push
+  .set noat
+  sltu   $at, \rreg, \sreg
+  MINint \dreg, \rreg, \sreg, $at
+  .set pop
+.endm
+
 #endif  // ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_S_
diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h
index 453056d..135b074 100644
--- a/runtime/arch/mips/asm_support_mips.h
+++ b/runtime/arch/mips/asm_support_mips.h
@@ -19,8 +19,9 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 96
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 48
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 80
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 96
+#define FRAME_SIZE_SAVE_REFS_ONLY 48
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS 80
+#define FRAME_SIZE_SAVE_EVERYTHING 256
 
 #endif  // ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_H_
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index 4dedb33..375a03a 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -28,11 +28,11 @@
   std::fill_n(gprs_, arraysize(gprs_), nullptr);
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[SP] = &sp_;
-  gprs_[RA] = &ra_;
+  gprs_[T9] = &t9_;
   gprs_[A0] = &arg0_;
   // Initialize registers with easy to spot debug values.
   sp_ = MipsContext::kBadGprBase + SP;
-  ra_ = MipsContext::kBadGprBase + RA;
+  t9_ = MipsContext::kBadGprBase + T9;
   arg0_ = 0;
 }
 
diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h
index f1e2905..7dcff63 100644
--- a/runtime/arch/mips/context_mips.h
+++ b/runtime/arch/mips/context_mips.h
@@ -41,7 +41,7 @@
   }
 
   void SetPC(uintptr_t new_pc) OVERRIDE {
-    SetGPR(RA, new_pc);
+    SetGPR(T9, new_pc);
   }
 
   bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
@@ -86,9 +86,10 @@
   // Pointers to registers in the stack, initialized to null except for the special cases below.
   uintptr_t* gprs_[kNumberOfCoreRegisters];
   uint32_t* fprs_[kNumberOfFRegisters];
-  // Hold values for sp and ra (return address) if they are not located within a stack frame, as
-  // well as the first argument.
-  uintptr_t sp_, ra_, arg0_;
+  // Hold values for sp and t9 if they are not located within a stack frame. We use t9 for the
+  // PC (as ra is required to be valid for single-frame deopt and must not be clobbered). We
+  // also need the first argument for single-frame deopt.
+  uintptr_t sp_, t9_, arg0_;
 };
 }  // namespace mips
 }  // namespace art
diff --git a/runtime/arch/mips/entrypoints_direct_mips.h b/runtime/arch/mips/entrypoints_direct_mips.h
index f9c5315..937cd1e 100644
--- a/runtime/arch/mips/entrypoints_direct_mips.h
+++ b/runtime/arch/mips/entrypoints_direct_mips.h
@@ -45,7 +45,9 @@
       entrypoint == kQuickCmpgFloat ||
       entrypoint == kQuickCmplDouble ||
       entrypoint == kQuickCmplFloat ||
-      entrypoint == kQuickReadBarrierSlow;
+      entrypoint == kQuickReadBarrierJni ||
+      entrypoint == kQuickReadBarrierSlow ||
+      entrypoint == kQuickReadBarrierForRootSlow;
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 59421dd..09f8849 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -28,8 +28,8 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
-                                            const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+                                          const mirror::Class* ref_class);
 
 // Math entrypoints.
 extern int32_t CmpgDouble(double a, double b);
@@ -59,6 +59,9 @@
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
+  // Note: MIPS has asserts checking for the type of entrypoint. Don't move it
+  //       to InitDefaultEntryPoints().
+
   // JNI
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
@@ -167,9 +170,14 @@
                 "Non-direct C stub marked direct.");
 
   // Locks
-  qpoints->pLockObject = art_quick_lock_object;
+  if (UNLIKELY(VLOG_IS_ON(systrace_lock_logging))) {
+    qpoints->pLockObject = art_quick_lock_object_no_inline;
+    qpoints->pUnlockObject = art_quick_unlock_object_no_inline;
+  } else {
+    qpoints->pLockObject = art_quick_lock_object;
+    qpoints->pUnlockObject = art_quick_unlock_object;
+  }
   static_assert(!IsDirectEntrypoint(kQuickLockObject), "Non-direct C stub marked direct.");
-  qpoints->pUnlockObject = art_quick_unlock_object;
   static_assert(!IsDirectEntrypoint(kQuickUnlockObject), "Non-direct C stub marked direct.");
 
   // Math
@@ -260,6 +268,8 @@
   static_assert(!IsDirectEntrypoint(kQuickThrowNullPointer), "Non-direct C stub marked direct.");
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
   static_assert(!IsDirectEntrypoint(kQuickThrowStackOverflow), "Non-direct C stub marked direct.");
+  qpoints->pThrowStringBounds = art_quick_throw_string_bounds;
+  static_assert(!IsDirectEntrypoint(kQuickThrowStringBounds), "Non-direct C stub marked direct.");
 
   // Deoptimization from compiled code.
   qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
@@ -271,10 +281,106 @@
   qpoints->pA64Store = QuasiAtomic::Write64;
   static_assert(IsDirectEntrypoint(kQuickA64Store), "Non-direct C stub marked direct.");
 
+  // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierJni), "Non-direct C stub marked direct.");
+  static_assert(IsDirectEntrypoint(kQuickReadBarrierJni), "Direct C stub not marked direct.");
+  // Read barriers (and these entry points in particular) are not
+  // supported in the compiler on MIPS32.
+  qpoints->pReadBarrierMarkReg00 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg00),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg01 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg01),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg02 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg02),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg03 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg03),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg04 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg04),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg05 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg05),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg06 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg06),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg07 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg07),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg08 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg08),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg09 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg09),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg10 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg10),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg11 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg11),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg12 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg12),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg13 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg13),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg14 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg14),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg15 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg15),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg16 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg16),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg17 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg17),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg18 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg18),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg19 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg19),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg20 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg20),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg21 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg21),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg22 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg22),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg23 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg23),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg24 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg24),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg25 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg25),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg26 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg26),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg27 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg27),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg28 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg28),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg29 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg29),
+                "Non-direct C stub marked direct.");
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   static_assert(IsDirectEntrypoint(kQuickReadBarrierSlow), "Direct C stub not marked direct.");
+  qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
+  static_assert(IsDirectEntrypoint(kQuickReadBarrierForRootSlow),
+                "Direct C stub not marked direct.");
 };
 
 }  // namespace art
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index 8ea78eb..7969a8f 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -27,7 +27,7 @@
 #include "thread-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 
 //
 // Mips specific fault handler functions.
@@ -44,7 +44,7 @@
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
   struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  *out_sp = static_cast<uintptr_t>(sc->sc_regs[29]);   // SP register
+  *out_sp = static_cast<uintptr_t>(sc->sc_regs[mips::SP]);
   VLOG(signals) << "sp: " << *out_sp;
   if (*out_sp == 0) {
     return;
@@ -56,7 +56,7 @@
   uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
       reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kMips));
   if (overflow_addr == fault_addr) {
-    *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[4]);  // A0 register
+    *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[mips::A0]);
   } else {
     // The method is at the top of the stack.
     *out_method = *reinterpret_cast<ArtMethod**>(*out_sp);
@@ -71,8 +71,10 @@
   *out_return_pc = sc->sc_pc + 4;
 }
 
-bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context) {
+bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  if (!IsValidImplicitCheck(info)) {
+    return false;
+  }
   // The code that looks for the catch location needs to know the value of the
   // PC at the point of call.  For Null checks we insert a GC map that is immediately after
   // the load/store instruction that might cause the fault.
@@ -80,9 +82,12 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
-  sc->sc_regs[31] = sc->sc_pc + 4;      // RA needs to point to gc map location
-  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
-  sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+  sc->sc_regs[mips::RA] = sc->sc_pc + 4;      // RA needs to point to gc map location
+  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
+  sc->sc_regs[mips::T9] = sc->sc_pc;          // make sure T9 points to the function
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+  sc->sc_regs[mips::A0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
@@ -111,7 +116,7 @@
   VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
   VLOG(signals) << "sigcontext: " << std::hex << sc;
 
-  uintptr_t sp = sc->sc_regs[29];  // SP register
+  uintptr_t sp = sc->sc_regs[mips::SP];
   VLOG(signals) << "sp: " << std::hex << sp;
 
   uintptr_t fault_addr = reinterpret_cast<uintptr_t>(info->si_addr);  // BVA addr
@@ -134,7 +139,7 @@
   // caused this fault.  This will be inserted into a callee save frame by
   // the function to which this handler returns (art_quick_throw_stack_overflow).
   sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
-  sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+  sc->sc_regs[mips::T9] = sc->sc_pc;          // make sure T9 points to the function
 
   // The kernel will now return to the address in sc->arm_pc.
   return true;
diff --git a/runtime/arch/mips/instruction_set_features_mips.cc b/runtime/arch/mips/instruction_set_features_mips.cc
index 93d79b7..b3a9866 100644
--- a/runtime/arch/mips/instruction_set_features_mips.cc
+++ b/runtime/arch/mips/instruction_set_features_mips.cc
@@ -76,21 +76,22 @@
   GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit);
 
   // Override defaults based on variant string.
-  // Only care if it is R1, R2 or R6 and we assume all CPUs will have a FP unit.
+  // Only care if it is R1, R2, R5 or R6 and we assume all CPUs will have a FP unit.
   constexpr const char* kMips32Prefix = "mips32r";
   const size_t kPrefixLength = strlen(kMips32Prefix);
   if (variant.compare(0, kPrefixLength, kMips32Prefix, kPrefixLength) == 0 &&
       variant.size() > kPrefixLength) {
-    if (variant[kPrefixLength] >= '6') {
-      fpu_32bit = false;
-      r6 = true;
-    }
-    if (variant[kPrefixLength] >= '2') {
-      mips_isa_gte2 = true;
-    }
+    r6 = (variant[kPrefixLength] >= '6');
+    fpu_32bit = (variant[kPrefixLength] < '5');
+    mips_isa_gte2 = (variant[kPrefixLength] >= '2');
   } else if (variant == "default") {
-    // Default variant is: smp = true, has fpu, is gte2, is not r6. This is the traditional
-    // setting.
+    // Default variant is: smp = true, has FPU, is gte2. This is the traditional setting.
+    //
+    // Note, we get FPU bitness and R6-ness from the build (using cpp defines, see above)
+    // and don't override them because many things depend on the "default" variant being
+    // sufficient for most purposes. That is, "default" should work for both R2 and R6.
+    // Use "mips32r#" to get a specific configuration, possibly not matching the runtime
+    // ISA (e.g. for ISA-specific testing of dex2oat internals).
     mips_isa_gte2 = true;
   } else {
     LOG(WARNING) << "Unexpected CPU variant for Mips32 using defaults: " << variant;
diff --git a/runtime/arch/mips/instruction_set_features_mips.h b/runtime/arch/mips/instruction_set_features_mips.h
index aac436e..120dc1c 100644
--- a/runtime/arch/mips/instruction_set_features_mips.h
+++ b/runtime/arch/mips/instruction_set_features_mips.h
@@ -81,8 +81,19 @@
 
  private:
   MipsInstructionSetFeatures(bool smp, bool fpu_32bit, bool mips_isa_gte2, bool r6)
-      : InstructionSetFeatures(smp), fpu_32bit_(fpu_32bit),  mips_isa_gte2_(mips_isa_gte2), r6_(r6)
-  {}
+      : InstructionSetFeatures(smp),
+        fpu_32bit_(fpu_32bit),
+        mips_isa_gte2_(mips_isa_gte2),
+        r6_(r6) {
+    // Sanity checks.
+    if (r6) {
+      CHECK(mips_isa_gte2);
+      CHECK(!fpu_32bit);
+    }
+    if (!mips_isa_gte2) {
+      CHECK(fpu_32bit);
+    }
+  }
 
   // Bitmap positions for encoding features as a bitmap.
   enum {
diff --git a/runtime/arch/mips/jni_entrypoints_mips.S b/runtime/arch/mips/jni_entrypoints_mips.S
index 3558efd..5c95071 100644
--- a/runtime/arch/mips/jni_entrypoints_mips.S
+++ b/runtime/arch/mips/jni_entrypoints_mips.S
@@ -38,7 +38,8 @@
     .cfi_rel_offset 5, 4
     sw    $a0, 0($sp)
     .cfi_rel_offset 4, 0
-    jal   artFindNativeMethod   # (Thread*)
+    la    $t9, artFindNativeMethod
+    jalr  $t9                   # (Thread*)
     move  $a0, $s1              # pass Thread::Current()
     lw    $a0, 0($sp)           # restore registers from stack
     lw    $a1, 4($sp)
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index ba58c3f..3d393f6 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -30,19 +30,19 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      * Callee-save: $s0-$s8 + $gp + $ra, 11 total + 1 word for Method*
      * Clobbers $t0 and $sp
      * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
-     * Reserves FRAME_SIZE_SAVE_ALL_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
+     * Reserves FRAME_SIZE_SAVE_ALL_CALLEE_SAVES + ARG_SLOT_SIZE bytes on the stack
      */
-.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     addiu  $sp, $sp, -96
     .cfi_adjust_cfa_offset 96
 
      // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 96)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(MIPS) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 96)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(MIPS) size not as expected."
 #endif
 
     sw     $ra, 92($sp)
@@ -79,7 +79,7 @@
 
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
-    lw $t0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($t0)
+    lw $t0, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
     addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
@@ -88,20 +88,20 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly). Restoration assumes non-moving GC.
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly). Restoration assumes non-moving GC.
      * Does not include rSUSPEND or rSELF
      * callee-save: $s2-$s8 + $gp + $ra, 9 total + 2 words padding + 1 word to hold Method*
      * Clobbers $t0 and $sp
      * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
-     * Reserves FRAME_SIZE_REFS_ONLY_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
+     * Reserves FRAME_SIZE_SAVE_REFS_ONLY + ARG_SLOT_SIZE bytes on the stack
      */
-.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_REFS_ONLY_FRAME
     addiu  $sp, $sp, -48
     .cfi_adjust_cfa_offset 48
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 48)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(MIPS) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 48)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(MIPS) size not as expected."
 #endif
 
     sw     $ra, 44($sp)
@@ -126,14 +126,14 @@
 
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
-    lw $t0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($t0)
+    lw $t0, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
     addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
     .cfi_adjust_cfa_offset ARG_SLOT_SIZE
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME
     addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
     .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
     lw     $ra, 44($sp)
@@ -158,24 +158,24 @@
     .cfi_adjust_cfa_offset -48
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
+    RESTORE_SAVE_REFS_ONLY_FRAME
     jalr   $zero, $ra
     nop
 .endm
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs).
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
      * callee-save: $a1-$a3, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
     addiu  $sp, $sp, -80
     .cfi_adjust_cfa_offset 80
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 80)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(MIPS) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 80)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(MIPS) size not as expected."
 #endif
 
     sw     $ra, 76($sp)
@@ -209,17 +209,17 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes non-moving GC.
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes non-moving GC.
      * callee-save: $a1-$a3, $f12-$f15, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
      * Clobbers $t0 and $sp
      * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
-     * Reserves FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
+     * Reserves FRAME_SIZE_SAVE_REFS_AND_ARGS + ARG_SLOT_SIZE bytes on the stack
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
-    lw $t0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($t0)
+    lw $t0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
     addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
@@ -228,22 +228,22 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes non-moving GC.
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes non-moving GC.
      * callee-save: $a1-$a3, $f12-$f15, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
      * Clobbers $sp
      * Use $a0 as the Method* and loads it into bottom of stack.
      * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
-     * Reserves FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
+     * Reserves FRAME_SIZE_SAVE_REFS_AND_ARGS + ARG_SLOT_SIZE bytes on the stack
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
     sw $a0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
     addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
     .cfi_adjust_cfa_offset ARG_SLOT_SIZE
 .endm
 
-.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
     addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
     .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
     lw     $ra, 76($sp)
@@ -277,11 +277,208 @@
 .endm
 
     /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * Callee-save: $at, $v0-$v1, $a0-$a3, $t0-$t7, $s0-$s7, $t8-$t9, $gp, $fp $ra, $f0-$f31;
+     *              28(GPR)+ 32(FPR) + 3 words for padding and 1 word for Method*
+     * Clobbers $t0 and $t1.
+     * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
+     * Reserves FRAME_SIZE_SAVE_EVERYTHING + ARG_SLOT_SIZE bytes on the stack.
+     * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    addiu  $sp, $sp, -256
+    .cfi_adjust_cfa_offset 256
+
+     // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 256)
+#error "FRAME_SIZE_SAVE_EVERYTHING(MIPS) size not as expected."
+#endif
+
+    sw     $ra, 252($sp)
+    .cfi_rel_offset 31, 252
+    sw     $fp, 248($sp)
+    .cfi_rel_offset 30, 248
+    sw     $gp, 244($sp)
+    .cfi_rel_offset 28, 244
+    sw     $t9, 240($sp)
+    .cfi_rel_offset 25, 240
+    sw     $t8, 236($sp)
+    .cfi_rel_offset 24, 236
+    sw     $s7, 232($sp)
+    .cfi_rel_offset 23, 232
+    sw     $s6, 228($sp)
+    .cfi_rel_offset 22, 228
+    sw     $s5, 224($sp)
+    .cfi_rel_offset 21, 224
+    sw     $s4, 220($sp)
+    .cfi_rel_offset 20, 220
+    sw     $s3, 216($sp)
+    .cfi_rel_offset 19, 216
+    sw     $s2, 212($sp)
+    .cfi_rel_offset 18, 212
+    sw     $s1, 208($sp)
+    .cfi_rel_offset 17, 208
+    sw     $s0, 204($sp)
+    .cfi_rel_offset 16, 204
+    sw     $t7, 200($sp)
+    .cfi_rel_offset 15, 200
+    sw     $t6, 196($sp)
+    .cfi_rel_offset 14, 196
+    sw     $t5, 192($sp)
+    .cfi_rel_offset 13, 192
+    sw     $t4, 188($sp)
+    .cfi_rel_offset 12, 188
+    sw     $t3, 184($sp)
+    .cfi_rel_offset 11, 184
+    sw     $t2, 180($sp)
+    .cfi_rel_offset 10, 180
+    sw     $t1, 176($sp)
+    .cfi_rel_offset 9, 176
+    sw     $t0, 172($sp)
+    .cfi_rel_offset 8, 172
+    sw     $a3, 168($sp)
+    .cfi_rel_offset 7, 168
+    sw     $a2, 164($sp)
+    .cfi_rel_offset 6, 164
+    sw     $a1, 160($sp)
+    .cfi_rel_offset 5, 160
+    sw     $a0, 156($sp)
+    .cfi_rel_offset 4, 156
+    sw     $v1, 152($sp)
+    .cfi_rel_offset 3, 152
+    sw     $v0, 148($sp)
+    .cfi_rel_offset 2, 148
+
+    // Set up $gp, clobbering $ra and using the branch delay slot for a useful instruction.
+    bal 1f
+    .set push
+    .set noat
+    sw     $at, 144($sp)
+    .cfi_rel_offset 1, 144
+    .set pop
+1:
+    .cpload $ra
+
+    SDu $f30, $f31, 136, $sp, $t1
+    SDu $f28, $f29, 128, $sp, $t1
+    SDu $f26, $f27, 120, $sp, $t1
+    SDu $f24, $f25, 112, $sp, $t1
+    SDu $f22, $f23, 104, $sp, $t1
+    SDu $f20, $f21, 96,  $sp, $t1
+    SDu $f18, $f19, 88,  $sp, $t1
+    SDu $f16, $f17, 80,  $sp, $t1
+    SDu $f14, $f15, 72,  $sp, $t1
+    SDu $f12, $f13, 64,  $sp, $t1
+    SDu $f10, $f11, 56,  $sp, $t1
+    SDu $f8, $f9, 48,  $sp, $t1
+    SDu $f6, $f7, 40,  $sp, $t1
+    SDu $f4, $f5, 32,  $sp, $t1
+    SDu $f2, $f3, 24,  $sp, $t1
+    SDu $f0, $f1, 16,  $sp, $t1
+
+    # 3 words padding and 1 word for holding Method*
+
+    lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
+    lw $t0, 0($t0)
+    lw $t0, RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET($t0)
+    sw $t0, 0($sp)                                # Place Method* at bottom of stack.
+    sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
+    addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
+    .cfi_adjust_cfa_offset ARG_SLOT_SIZE
+.endm
+
+.macro RESTORE_SAVE_EVERYTHING_FRAME
+    addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
+    .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
+
+    LDu $f30, $f31, 136, $sp, $t1
+    LDu $f28, $f29, 128, $sp, $t1
+    LDu $f26, $f27, 120, $sp, $t1
+    LDu $f24, $f25, 112, $sp, $t1
+    LDu $f22, $f23, 104, $sp, $t1
+    LDu $f20, $f21, 96,  $sp, $t1
+    LDu $f18, $f19, 88,  $sp, $t1
+    LDu $f16, $f17, 80,  $sp, $t1
+    LDu $f14, $f15, 72,  $sp, $t1
+    LDu $f12, $f13, 64,  $sp, $t1
+    LDu $f10, $f11, 56,  $sp, $t1
+    LDu $f8, $f9, 48,  $sp, $t1
+    LDu $f6, $f7, 40,  $sp, $t1
+    LDu $f4, $f5, 32,  $sp, $t1
+    LDu $f2, $f3, 24,  $sp, $t1
+    LDu $f0, $f1, 16,  $sp, $t1
+
+    lw     $ra, 252($sp)
+    .cfi_restore 31
+    lw     $fp, 248($sp)
+    .cfi_restore 30
+    lw     $gp, 244($sp)
+    .cfi_restore 28
+    lw     $t9, 240($sp)
+    .cfi_restore 25
+    lw     $t8, 236($sp)
+    .cfi_restore 24
+    lw     $s7, 232($sp)
+    .cfi_restore 23
+    lw     $s6, 228($sp)
+    .cfi_restore 22
+    lw     $s5, 224($sp)
+    .cfi_restore 21
+    lw     $s4, 220($sp)
+    .cfi_restore 20
+    lw     $s3, 216($sp)
+    .cfi_restore 19
+    lw     $s2, 212($sp)
+    .cfi_restore 18
+    lw     $s1, 208($sp)
+    .cfi_restore 17
+    lw     $s0, 204($sp)
+    .cfi_restore 16
+    lw     $t7, 200($sp)
+    .cfi_restore 15
+    lw     $t6, 196($sp)
+    .cfi_restore 14
+    lw     $t5, 192($sp)
+    .cfi_restore 13
+    lw     $t4, 188($sp)
+    .cfi_restore 12
+    lw     $t3, 184($sp)
+    .cfi_restore 11
+    lw     $t2, 180($sp)
+    .cfi_restore 10
+    lw     $t1, 176($sp)
+    .cfi_restore 9
+    lw     $t0, 172($sp)
+    .cfi_restore 8
+    lw     $a3, 168($sp)
+    .cfi_restore 7
+    lw     $a2, 164($sp)
+    .cfi_restore 6
+    lw     $a1, 160($sp)
+    .cfi_restore 5
+    lw     $a0, 156($sp)
+    .cfi_restore 4
+    lw     $v1, 152($sp)
+    .cfi_restore 3
+    lw     $v0, 148($sp)
+    .cfi_restore 2
+    .set push
+    .set noat
+    lw     $at, 144($sp)
+    .cfi_restore 1
+    .set pop
+
+    addiu  $sp, $sp, 256            # pop frame
+    .cfi_adjust_cfa_offset -256
+.endm
+
+    /*
      * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
      * exception is Thread::Current()->exception_
      */
 .macro DELIVER_PENDING_EXCEPTION
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME     # save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME    # save callee saves for throw
     la      $t9, artDeliverPendingExceptionFromCode
     jalr    $zero, $t9                   # artDeliverPendingExceptionFromCode(Thread*)
     move    $a0, rSELF                   # pass Thread::Current
@@ -289,7 +486,7 @@
 
 .macro RETURN_IF_NO_EXCEPTION
     lw     $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bnez   $t0, 1f                       # success if no exception is pending
     nop
     jalr   $zero, $ra
@@ -299,7 +496,7 @@
 .endm
 
 .macro RETURN_IF_ZERO
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bnez   $v0, 1f                       # success?
     nop
     jalr   $zero, $ra                    # return on success
@@ -309,7 +506,7 @@
 .endm
 
 .macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     beqz   $v0, 1f                       # success?
     nop
     jalr   $zero, $ra                    # return on success
@@ -319,6 +516,111 @@
 .endm
 
     /*
+     * On stack replacement stub.
+     * On entry:
+     *   a0 = stack to copy
+     *   a1 = size of stack
+     *   a2 = pc to call
+     *   a3 = JValue* result
+     *   [sp + 16] = shorty
+     *   [sp + 20] = thread
+     */
+ENTRY art_quick_osr_stub
+    // Save callee general purpose registers, RA and GP.
+    addiu  $sp, $sp, -48
+    .cfi_adjust_cfa_offset 48
+    sw     $ra, 44($sp)
+    .cfi_rel_offset 31, 44
+    sw     $s8, 40($sp)
+    .cfi_rel_offset 30, 40
+    sw     $gp, 36($sp)
+    .cfi_rel_offset 28, 36
+    sw     $s7, 32($sp)
+    .cfi_rel_offset 23, 32
+    sw     $s6, 28($sp)
+    .cfi_rel_offset 22, 28
+    sw     $s5, 24($sp)
+    .cfi_rel_offset 21, 24
+    sw     $s4, 20($sp)
+    .cfi_rel_offset 20, 20
+    sw     $s3, 16($sp)
+    .cfi_rel_offset 19, 16
+    sw     $s2, 12($sp)
+    .cfi_rel_offset 18, 12
+    sw     $s1, 8($sp)
+    .cfi_rel_offset 17, 8
+    sw     $s0, 4($sp)
+    .cfi_rel_offset 16, 4
+
+    move   $s8, $sp                        # Save the stack pointer
+    move   $s7, $a1                        # Save size of stack
+    move   $s6, $a2                        # Save the pc to call
+    lw     rSELF, 48+20($sp)               # Save managed thread pointer into rSELF
+    addiu  $t0, $sp, -12                   # Reserve space for stack pointer,
+                                           #    JValue* result, and ArtMethod* slot.
+    srl    $t0, $t0, 4                     # Align stack pointer to 16 bytes
+    sll    $sp, $t0, 4                     # Update stack pointer
+    sw     $s8, 4($sp)                     # Save old stack pointer
+    sw     $a3, 8($sp)                     # Save JValue* result
+    sw     $zero, 0($sp)                   # Store null for ArtMethod* at bottom of frame
+    subu   $sp, $a1                        # Reserve space for callee stack
+    move   $a2, $a1
+    move   $a1, $a0
+    move   $a0, $sp
+    la     $t9, memcpy
+    jalr   $t9                             # memcpy (dest a0, src a1, bytes a2)
+    addiu  $sp, $sp, -16                   # make space for argument slots for memcpy
+    bal    .Losr_entry                     # Call the method
+    addiu  $sp, $sp, 16                    # restore stack after memcpy
+    lw     $a2, 8($sp)                     # Restore JValue* result
+    lw     $sp, 4($sp)                     # Restore saved stack pointer
+    lw     $a0, 48+16($sp)                 # load shorty
+    lbu    $a0, 0($a0)                     # load return type
+    li     $a1, 'D'                        # put char 'D' into a1
+    beq    $a0, $a1, .Losr_fp_result       # Test if result type char == 'D'
+    li     $a1, 'F'                        # put char 'F' into a1
+    beq    $a0, $a1, .Losr_fp_result       # Test if result type char == 'F'
+    nop
+    sw     $v0, 0($a2)
+    b      .Losr_exit
+    sw     $v1, 4($a2)                     # store v0/v1 into result
+.Losr_fp_result:
+    SDu    $f0, $f1, 0, $a2, $t0           # store f0/f1 into result
+.Losr_exit:
+    lw     $ra, 44($sp)
+    .cfi_restore 31
+    lw     $s8, 40($sp)
+    .cfi_restore 30
+    lw     $gp, 36($sp)
+    .cfi_restore 28
+    lw     $s7, 32($sp)
+    .cfi_restore 23
+    lw     $s6, 28($sp)
+    .cfi_restore 22
+    lw     $s5, 24($sp)
+    .cfi_restore 21
+    lw     $s4, 20($sp)
+    .cfi_restore 20
+    lw     $s3, 16($sp)
+    .cfi_restore 19
+    lw     $s2, 12($sp)
+    .cfi_restore 18
+    lw     $s1, 8($sp)
+    .cfi_restore 17
+    lw     $s0, 4($sp)
+    .cfi_restore 16
+    jalr   $zero, $ra
+    addiu  $sp, $sp, 48
+    .cfi_adjust_cfa_offset -48
+.Losr_entry:
+    addiu  $s7, $s7, -4
+    addu   $t0, $s7, $sp
+    move   $t9, $s6
+    jalr   $zero, $t9
+    sw     $ra, 0($t0)                     # Store RA per the compiler ABI
+END art_quick_osr_stub
+
+    /*
      * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_
      * FIXME: just guessing about the shape of the jmpbuf.  Where will pc be?
      */
@@ -373,8 +675,8 @@
     lw      $fp, 120($a0)
     lw      $ra, 124($a0)
     lw      $a0, 16($a0)
-    move    $v0, $zero          # clear result registers r0 and r1
-    jalr    $zero, $ra          # do long jump
+    move    $v0, $zero          # clear result registers v0 and v1 (in branch delay slot)
+    jalr    $zero, $t9          # do long jump
     move    $v1, $zero
 END art_quick_do_long_jump
 
@@ -384,7 +686,7 @@
      * the bottom of the thread. On entry a0 holds Throwable*
      */
 ENTRY art_quick_deliver_exception
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artDeliverExceptionFromCode
     jalr $zero, $t9                 # artDeliverExceptionFromCode(Throwable*, Thread*)
     move $a1, rSELF                 # pass Thread::Current
@@ -395,18 +697,30 @@
      */
     .extern artThrowNullPointerExceptionFromCode
 ENTRY art_quick_throw_null_pointer_exception
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowNullPointerExceptionFromCode
     jalr $zero, $t9                 # artThrowNullPointerExceptionFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
 END art_quick_throw_null_pointer_exception
 
+
+    /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+    .extern artThrowNullPointerExceptionFromSignal
+ENTRY art_quick_throw_null_pointer_exception_from_signal
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+    la   $t9, artThrowNullPointerExceptionFromSignal
+    jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uintptr_t, Thread*)
+    move $a1, rSELF                 # pass Thread::Current
+END art_quick_throw_null_pointer_exception_from_signal
+
     /*
      * Called by managed code to create and deliver an ArithmeticException
      */
     .extern artThrowDivZeroFromCode
 ENTRY art_quick_throw_div_zero
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowDivZeroFromCode
     jalr $zero, $t9                 # artThrowDivZeroFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
@@ -417,18 +731,30 @@
      */
     .extern artThrowArrayBoundsFromCode
 ENTRY art_quick_throw_array_bounds
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowArrayBoundsFromCode
     jalr $zero, $t9                 # artThrowArrayBoundsFromCode(index, limit, Thread*)
     move $a2, rSELF                 # pass Thread::Current
 END art_quick_throw_array_bounds
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt().
+     */
+    .extern artThrowStringBoundsFromCode
+ENTRY art_quick_throw_string_bounds
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+    la   $t9, artThrowStringBoundsFromCode
+    jalr $zero, $t9                 # artThrowStringBoundsFromCode(index, limit, Thread*)
+    move $a2, rSELF                 # pass Thread::Current
+END art_quick_throw_string_bounds
+
+    /*
      * Called by managed code to create and deliver a StackOverflowError.
      */
     .extern artThrowStackOverflowFromCode
 ENTRY art_quick_throw_stack_overflow
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowStackOverflowFromCode
     jalr $zero, $t9                 # artThrowStackOverflowFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
@@ -439,7 +765,7 @@
      */
     .extern artThrowNoSuchMethodFromCode
 ENTRY art_quick_throw_no_such_method
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowNoSuchMethodFromCode
     jalr $zero, $t9                 # artThrowNoSuchMethodFromCode(method_idx, Thread*)
     move $a1, rSELF                 # pass Thread::Current
@@ -462,12 +788,13 @@
      */
 .macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  # save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME         # save callee saves in case allocation triggers GC
     move  $a2, rSELF                       # pass Thread::Current
-    jal   \cxx_name                        # (method_idx, this, Thread*, $sp)
+    la    $t9, \cxx_name
+    jalr  $t9                              # (method_idx, this, Thread*, $sp)
     addiu $a3, $sp, ARG_SLOT_SIZE          # pass $sp (remove arg slots)
     move  $a0, $v0                         # save target Method*
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     beqz  $v0, 1f
     move  $t9, $v1                         # save $v0->code_
     jalr  $zero, $t9
@@ -514,6 +841,8 @@
     addiu $\index, 1
 .endm
 
+#define SPILL_SIZE    32
+
     /*
      * Invocation stub for quick code.
      * On entry:
@@ -526,8 +855,9 @@
      */
 ENTRY art_quick_invoke_stub
     sw    $a0, 0($sp)           # save out a0
-    addiu $sp, $sp, -16         # spill s0, s1, fp, ra
-    .cfi_adjust_cfa_offset 16
+    addiu $sp, $sp, -SPILL_SIZE # spill s0, s1, fp, ra and gp
+    .cfi_adjust_cfa_offset SPILL_SIZE
+    sw    $gp, 16($sp)
     sw    $ra, 12($sp)
     .cfi_rel_offset 31, 12
     sw    $fp, 8($sp)
@@ -545,16 +875,18 @@
     srl   $t0, $t0, 4           #   native calling convention only aligns to 8B,
     sll   $sp, $t0, 4           #   so we have to ensure ART 16B alignment ourselves.
     addiu $a0, $sp, 4           # pass stack pointer + ArtMethod* as dest for memcpy
-    jal   memcpy                # (dest, src, bytes)
+    la    $t9, memcpy
+    jalr  $t9                   # (dest, src, bytes)
     addiu $sp, $sp, -16         # make space for argument slots for memcpy
     addiu $sp, $sp, 16          # restore stack after memcpy
-    lw    $a0, 16($fp)          # restore ArtMethod*
+    lw    $gp, 16($fp)          # restore $gp
+    lw    $a0, SPILL_SIZE($fp)  # restore ArtMethod*
     lw    $a1, 4($sp)           # a1 = this*
     addiu $t0, $sp, 8           # t0 = pointer to the current argument (skip ArtMethod* and this*)
     li    $t3, 2                # t3 = gpr_index = 2 (skip A0 and A1)
     move  $t4, $zero            # t4 = fp_index = 0
-    lw    $t1, 20+16($fp)       # get shorty (20 is offset from the $sp on entry + 16 as the $fp is
-                                # 16 bytes below the $sp on entry)
+    lw    $t1, 20 + SPILL_SIZE($fp)  # get shorty (20 is offset from the $sp on entry + SPILL_SIZE
+                                # as the $fp is SPILL_SIZE bytes below the $sp on entry)
     addiu $t1, 1                # t1 = shorty + 1 (skip 1 for return type)
 loop:
     lbu   $t2, 0($t1)           # t2 = shorty[i]
@@ -619,8 +951,8 @@
     .cfi_restore 30
     lw    $ra, 12($sp)
     .cfi_restore 31
-    addiu $sp, $sp, 16
-    .cfi_adjust_cfa_offset -16
+    addiu $sp, $sp, SPILL_SIZE
+    .cfi_adjust_cfa_offset -SPILL_SIZE
     lw    $t0, 16($sp)          # get result pointer
     lw    $t1, 20($sp)          # get shorty
     lb    $t1, 0($t1)           # get result type char
@@ -649,8 +981,9 @@
      */
 ENTRY art_quick_invoke_static_stub
     sw    $a0, 0($sp)           # save out a0
-    addiu $sp, $sp, -16         # spill s0, s1, fp, ra
-    .cfi_adjust_cfa_offset 16
+    addiu $sp, $sp, -SPILL_SIZE # spill s0, s1, fp, ra and gp
+    .cfi_adjust_cfa_offset SPILL_SIZE
+    sw    $gp, 16($sp)
     sw    $ra, 12($sp)
     .cfi_rel_offset 31, 12
     sw    $fp, 8($sp)
@@ -668,15 +1001,17 @@
     srl   $t0, $t0, 4           #   native calling convention only aligns to 8B,
     sll   $sp, $t0, 4           #   so we have to ensure ART 16B alignment ourselves.
     addiu $a0, $sp, 4           # pass stack pointer + ArtMethod* as dest for memcpy
-    jal   memcpy                # (dest, src, bytes)
+    la    $t9, memcpy
+    jalr  $t9                   # (dest, src, bytes)
     addiu $sp, $sp, -16         # make space for argument slots for memcpy
     addiu $sp, $sp, 16          # restore stack after memcpy
-    lw    $a0, 16($fp)          # restore ArtMethod*
+    lw    $gp, 16($fp)          # restore $gp
+    lw    $a0, SPILL_SIZE($fp)  # restore ArtMethod*
     addiu $t0, $sp, 4           # t0 = pointer to the current argument (skip ArtMethod*)
     li    $t3, 1                # t3 = gpr_index = 1 (skip A0)
     move  $t4, $zero            # t4 = fp_index = 0
-    lw    $t1, 20+16($fp)       # get shorty (20 is offset from the $sp on entry + 16 as the $fp is
-                                # 16 bytes below the $sp on entry)
+    lw    $t1, 20 + SPILL_SIZE($fp)  # get shorty (20 is offset from the $sp on entry + SPILL_SIZE
+                                # as the $fp is SPILL_SIZE bytes below the $sp on entry)
     addiu $t1, 1                # t1 = shorty + 1 (skip 1 for return type)
 loopS:
     lbu   $t2, 0($t1)           # t2 = shorty[i]
@@ -744,8 +1079,8 @@
     .cfi_restore 30
     lw    $ra, 12($sp)
     .cfi_restore 31
-    addiu $sp, $sp, 16
-    .cfi_adjust_cfa_offset -16
+    addiu $sp, $sp, SPILL_SIZE
+    .cfi_adjust_cfa_offset -SPILL_SIZE
     lw    $t0, 16($sp)          # get result pointer
     lw    $t1, 20($sp)          # get shorty
     lb    $t1, 0($t1)           # get result type char
@@ -762,16 +1097,19 @@
     nop
 END art_quick_invoke_static_stub
 
+#undef SPILL_SIZE
+
     /*
      * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
      * failure.
      */
     .extern artHandleFillArrayDataFromCode
 ENTRY art_quick_handle_fill_data
-    lw     $a2, 0($sp)                    # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case exception allocation triggers GC
-    jal    artHandleFillArrayDataFromCode # (payload offset, Array*, method, Thread*)
-    move   $a3, rSELF                     # pass Thread::Current
+    lw     $a2, 0($sp)                # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case exception allocation triggers GC
+    la     $t9, artHandleFillArrayDataFromCode
+    jalr   $t9                        # (payload offset, Array*, method, Thread*)
+    move   $a3, rSELF                 # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_handle_fill_data
 
@@ -782,12 +1120,23 @@
 ENTRY art_quick_lock_object
     beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case we block
-    jal     artLockObjectFromCode         # (Object* obj, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME            # save callee saves in case we block
+    la      $t9, artLockObjectFromCode
+    jalr    $t9                           # (Object* obj, Thread*)
     move    $a1, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_lock_object
 
+ENTRY art_quick_lock_object_no_inline
+    beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
+    nop
+    SETUP_SAVE_REFS_ONLY_FRAME            # save callee saves in case we block
+    la      $t9, artLockObjectFromCode
+    jalr    $t9                           # (Object* obj, Thread*)
+    move    $a1, rSELF                    # pass Thread::Current
+    RETURN_IF_ZERO
+END art_quick_lock_object_no_inline
+
     /*
      * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
      */
@@ -795,39 +1144,53 @@
 ENTRY art_quick_unlock_object
     beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case exception allocation triggers GC
-    jal     artUnlockObjectFromCode   # (Object* obj, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case exception allocation triggers GC
+    la      $t9, artUnlockObjectFromCode
+    jalr    $t9                       # (Object* obj, Thread*)
     move    $a1, rSELF                # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_unlock_object
 
+ENTRY art_quick_unlock_object_no_inline
+    beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
+    nop
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case exception allocation triggers GC
+    la      $t9, artUnlockObjectFromCode
+    jalr    $t9                       # (Object* obj, Thread*)
+    move    $a1, rSELF                # pass Thread::Current
+    RETURN_IF_ZERO
+END art_quick_unlock_object_no_inline
+
     /*
      * Entry from managed code that calls artCheckCastFromCode and delivers exception on failure.
      */
     .extern artThrowClassCastException
 ENTRY art_quick_check_cast
-    addiu  $sp, $sp, -16
-    .cfi_adjust_cfa_offset 16
+    addiu  $sp, $sp, -32
+    .cfi_adjust_cfa_offset 32
+    sw     $gp, 16($sp)
     sw     $ra, 12($sp)
     .cfi_rel_offset 31, 12
     sw     $t9, 8($sp)
     sw     $a1, 4($sp)
     sw     $a0, 0($sp)
-    jal    artIsAssignableFromCode
+    la     $t9, artIsAssignableFromCode
+    jalr   $t9
     addiu  $sp, $sp, -16             # reserve argument slots on the stack
     addiu  $sp, $sp, 16
+    lw     $gp, 16($sp)
     beqz   $v0, .Lthrow_class_cast_exception
     lw     $ra, 12($sp)
     jalr   $zero, $ra
-    addiu  $sp, $sp, 16
-    .cfi_adjust_cfa_offset -16
+    addiu  $sp, $sp, 32
+    .cfi_adjust_cfa_offset -32
 .Lthrow_class_cast_exception:
     lw     $t9, 8($sp)
     lw     $a1, 4($sp)
     lw     $a0, 0($sp)
-    addiu  $sp, $sp, 16
-    .cfi_adjust_cfa_offset -16
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    addiu  $sp, $sp, 32
+    .cfi_adjust_cfa_offset -32
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowClassCastException
     jalr $zero, $t9                 # artThrowClassCastException (Class*, Class*, Thread*)
     move $a2, rSELF                 # pass Thread::Current
@@ -873,8 +1236,9 @@
     .ifnc \rObj, $a1
         move $a1, \rObj             # pass rObj
     .endif
-    addiu $a2, $zero, \offset       # pass offset
-    jal artReadBarrierSlow          # artReadBarrierSlow(ref, rObj, offset)
+    addiu  $a2, $zero, \offset      # pass offset
+    la     $t9, artReadBarrierSlow
+    jalr   $t9                      # artReadBarrierSlow(ref, rObj, offset)
     addiu  $sp, $sp, -16            # Use branch delay slot to reserve argument slots on the stack
                                     # before the call to artReadBarrierSlow.
     addiu  $sp, $sp, 16             # restore stack after call to artReadBarrierSlow
@@ -958,16 +1322,19 @@
     .cfi_adjust_cfa_offset 32
     sw     $ra, 28($sp)
     .cfi_rel_offset 31, 28
+    sw     $gp, 16($sp)
     sw     $t9, 12($sp)
     sw     $a2, 8($sp)
     sw     $a1, 4($sp)
     sw     $a0, 0($sp)
     move   $a1, $t1
     move   $a0, $t0
-    jal    artIsAssignableFromCode  # (Class*, Class*)
-    addiu $sp, $sp, -16     # reserve argument slots on the stack
-    addiu $sp, $sp, 16
+    la     $t9, artIsAssignableFromCode
+    jalr   $t9               # (Class*, Class*)
+    addiu  $sp, $sp, -16     # reserve argument slots on the stack
+    addiu  $sp, $sp, 16
     lw     $ra, 28($sp)
+    lw     $gp, 16($sp)
     lw     $t9, 12($sp)
     lw     $a2, 8($sp)
     lw     $a1, 4($sp)
@@ -976,7 +1343,7 @@
     .cfi_adjust_cfa_offset -32
     bnez   $v0, .Ldo_aput
     nop
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     move $a1, $a2
     la   $t9, artThrowArrayStoreException
     jalr $zero, $t9                 # artThrowArrayStoreException(Class*, Class*, Thread*)
@@ -989,8 +1356,9 @@
     .extern artGetBooleanStaticFromCode
 ENTRY art_quick_get_boolean_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artGetBooleanStaticFromCode   # (uint32_t field_idx, const Method* referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artGetBooleanStaticFromCode
+    jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_boolean_static
@@ -1000,8 +1368,9 @@
     .extern artGetByteStaticFromCode
 ENTRY art_quick_get_byte_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artGetByteStaticFromCode      # (uint32_t field_idx, const Method* referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artGetByteStaticFromCode
+    jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_byte_static
@@ -1012,8 +1381,9 @@
     .extern artGetCharStaticFromCode
 ENTRY art_quick_get_char_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artGetCharStaticFromCode      # (uint32_t field_idx, const Method* referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artGetCharStaticFromCode
+    jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_char_static
@@ -1023,8 +1393,9 @@
     .extern artGetShortStaticFromCode
 ENTRY art_quick_get_short_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artGetShortStaticFromCode     # (uint32_t field_idx, const Method* referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artGetShortStaticFromCode
+    jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_short_static
@@ -1035,8 +1406,9 @@
     .extern artGet32StaticFromCode
 ENTRY art_quick_get32_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artGet32StaticFromCode        # (uint32_t field_idx, const Method* referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artGet32StaticFromCode
+    jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
 END art_quick_get32_static
@@ -1047,8 +1419,9 @@
     .extern artGet64StaticFromCode
 ENTRY art_quick_get64_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artGet64StaticFromCode        # (uint32_t field_idx, const Method* referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artGet64StaticFromCode
+    jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
 END art_quick_get64_static
@@ -1059,8 +1432,9 @@
     .extern artGetObjStaticFromCode
 ENTRY art_quick_get_obj_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artGetObjStaticFromCode       # (uint32_t field_idx, const Method* referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artGetObjStaticFromCode
+    jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_obj_static
@@ -1071,8 +1445,9 @@
     .extern artGetBooleanInstanceFromCode
 ENTRY art_quick_get_boolean_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artGetBooleanInstanceFromCode # (field_idx, Object*, referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artGetBooleanInstanceFromCode
+    jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_boolean_instance
@@ -1082,8 +1457,9 @@
     .extern artGetByteInstanceFromCode
 ENTRY art_quick_get_byte_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artGetByteInstanceFromCode    # (field_idx, Object*, referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artGetByteInstanceFromCode
+    jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_byte_instance
@@ -1094,8 +1470,9 @@
     .extern artGetCharInstanceFromCode
 ENTRY art_quick_get_char_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artGetCharInstanceFromCode    # (field_idx, Object*, referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artGetCharInstanceFromCode
+    jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_char_instance
@@ -1105,8 +1482,9 @@
     .extern artGetShortInstanceFromCode
 ENTRY art_quick_get_short_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artGetShortInstanceFromCode   # (field_idx, Object*, referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artGetShortInstanceFromCode
+    jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_short_instance
@@ -1117,8 +1495,9 @@
     .extern artGet32InstanceFromCode
 ENTRY art_quick_get32_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artGet32InstanceFromCode      # (field_idx, Object*, referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artGet32InstanceFromCode
+    jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
 END art_quick_get32_instance
@@ -1129,8 +1508,9 @@
     .extern artGet64InstanceFromCode
 ENTRY art_quick_get64_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artGet64InstanceFromCode      # (field_idx, Object*, referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artGet64InstanceFromCode
+    jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
 END art_quick_get64_instance
@@ -1141,8 +1521,9 @@
     .extern artGetObjInstanceFromCode
 ENTRY art_quick_get_obj_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artGetObjInstanceFromCode     # (field_idx, Object*, referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artGetObjInstanceFromCode
+    jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_obj_instance
@@ -1153,8 +1534,9 @@
     .extern artSet8StaticFromCode
 ENTRY art_quick_set8_static
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artSet8StaticFromCode         # (field_idx, new_val, referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artSet8StaticFromCode
+    jalr   $t9                           # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_set8_static
@@ -1165,8 +1547,9 @@
     .extern artSet16StaticFromCode
 ENTRY art_quick_set16_static
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artSet16StaticFromCode        # (field_idx, new_val, referrer, Thread*, $sp)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artSet16StaticFromCode
+    jalr   $t9                           # (field_idx, new_val, referrer, Thread*, $sp)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_set16_static
@@ -1177,8 +1560,9 @@
     .extern artSet32StaticFromCode
 ENTRY art_quick_set32_static
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artSet32StaticFromCode        # (field_idx, new_val, referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artSet32StaticFromCode
+    jalr   $t9                           # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_set32_static
@@ -1190,8 +1574,9 @@
 ENTRY art_quick_set64_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
                                          # 64 bit new_val is in a2:a3 pair
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artSet64StaticFromCode        # (field_idx, referrer, new_val, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artSet64StaticFromCode
+    jalr   $t9                           # (field_idx, referrer, new_val, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_set64_static
@@ -1202,9 +1587,10 @@
     .extern artSetObjStaticFromCode
 ENTRY art_quick_set_obj_static
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artSetObjStaticFromCode
+    jalr   $t9                           # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
-    jal    artSetObjStaticFromCode       # (field_idx, new_val, referrer, Thread*)
     RETURN_IF_ZERO
 END art_quick_set_obj_static
 
@@ -1214,8 +1600,9 @@
     .extern artSet8InstanceFromCode
 ENTRY art_quick_set8_instance
     lw     $a3, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artSet8InstanceFromCode       # (field_idx, Object*, new_val, referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artSet8InstanceFromCode
+    jalr   $t9                           # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_set8_instance
@@ -1226,8 +1613,9 @@
     .extern artSet16InstanceFromCode
 ENTRY art_quick_set16_instance
     lw     $a3, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artSet16InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artSet16InstanceFromCode
+    jalr   $t9                           # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_set16_instance
@@ -1238,8 +1626,9 @@
     .extern artSet32InstanceFromCode
 ENTRY art_quick_set32_instance
     lw     $a3, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artSet32InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artSet32InstanceFromCode
+    jalr   $t9                           # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_set32_instance
@@ -1251,9 +1640,10 @@
 ENTRY art_quick_set64_instance
     lw     $t1, 0($sp)                   # load referrer's Method*
                                          # 64 bit new_val is in a2:a3 pair
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     sw     rSELF, 20($sp)                # pass Thread::Current
-    jal    artSet64InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
+    la     $t9, artSet64InstanceFromCode
+    jalr   $t9                           # (field_idx, Object*, new_val, referrer, Thread*)
     sw     $t1, 16($sp)                  # pass referrer's Method*
     RETURN_IF_ZERO
 END art_quick_set64_instance
@@ -1264,8 +1654,9 @@
     .extern artSetObjInstanceFromCode
 ENTRY art_quick_set_obj_instance
     lw     $a3, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    jal    artSetObjInstanceFromCode     # (field_idx, Object*, new_val, referrer, Thread*)
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    la     $t9, artSetObjInstanceFromCode
+    jalr   $t9                           # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_set_obj_instance
@@ -1274,8 +1665,9 @@
 .macro ONE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
-    jal     \entrypoint
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
+    la      $t9, \entrypoint
+    jalr    $t9
     move    $a1, rSELF                # pass Thread::Current
     \return
 END \name
@@ -1284,8 +1676,9 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
-    jal     \entrypoint
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
+    la      $t9, \entrypoint
+    jalr    $t9
     move    $a2, rSELF                # pass Thread::Current
     \return
 END \name
@@ -1294,8 +1687,9 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
-    jal     \entrypoint
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
+    la      $t9, \entrypoint
+    jalr    $t9
     move    $a3, rSELF                # pass Thread::Current
     \return
 END \name
@@ -1304,15 +1698,127 @@
 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
-    jal     \entrypoint
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
+    la      $t9, \entrypoint
+    jalr    $t9
     sw      rSELF, 16($sp)            # pass Thread::Current
     \return
 END \name
 .endm
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALL_ALLOC_ENTRYPOINTS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+ENTRY art_quick_alloc_object_rosalloc
+
+    # Fast path rosalloc allocation
+    # a0: type_idx
+    # a1: ArtMethod*
+    # s1: Thread::Current
+    # -----------------------------
+    # t0: class
+    # t1: object size
+    # t2: rosalloc run
+    # t3: thread stack top offset
+    # t4: thread stack bottom offset
+    # v0: free list head
+    #
+    # t5, t6 : temps
+
+    lw    $t0, ART_METHOD_DEX_CACHE_TYPES_OFFSET_32($a1)       # Load dex cache resolved types
+                                                               # array.
+
+    sll   $t5, $a0, COMPRESSED_REFERENCE_SIZE_SHIFT            # Shift the value.
+    addu  $t5, $t0, $t5                                        # Compute the index.
+    lw    $t0, 0($t5)                                          # Load class (t0).
+    beqz  $t0, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    li    $t6, MIRROR_CLASS_STATUS_INITIALIZED
+    lw    $t5, MIRROR_CLASS_STATUS_OFFSET($t0)                 # Check class status.
+    bne   $t5, $t6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Add a fake dependence from the following access flag and size loads to the status load. This
+    # is to prevent those loads from being reordered above the status load and reading wrong values.
+    xor   $t5, $t5, $t5
+    addu  $t0, $t0, $t5
+
+    lw    $t5, MIRROR_CLASS_ACCESS_FLAGS_OFFSET($t0)           # Check if access flags has
+    li    $t6, ACCESS_FLAGS_CLASS_IS_FINALIZABLE               # kAccClassIsFinalizable.
+    and   $t6, $t5, $t6
+    bnez  $t6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    lw    $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)        # Check if thread local allocation
+    lw    $t4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET($s1)        # stack has any room left.
+    bgeu  $t3, $t4, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    lw    $t1, MIRROR_CLASS_OBJECT_SIZE_OFFSET($t0)            # Load object size (t1).
+    li    $t5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE          # Check if size is for a thread local
+                                                               # allocation.
+    bgtu  $t1, $t5, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Compute the rosalloc bracket index from the size. Allign up the size by the rosalloc bracket
+    # quantum size and divide by the quantum size and subtract by 1.
+
+    addiu $t1, $t1, -1                                         # Decrease obj size and shift right
+    srl   $t1, $t1, ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT        # by quantum.
+
+    sll   $t2, $t1, POINTER_SIZE_SHIFT
+    addu  $t2, $t2, $s1
+    lw    $t2, THREAD_ROSALLOC_RUNS_OFFSET($t2)                # Load rosalloc run (t2).
+
+    # Load the free list head (v0).
+    # NOTE: this will be the return val.
+
+    lw    $v0, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+    beqz  $v0, .Lart_quick_alloc_object_rosalloc_slow_path
+    nop
+
+    # Load the next pointer of the head and update the list head with the next pointer.
+
+    lw    $t5, ROSALLOC_SLOT_NEXT_OFFSET($v0)
+    sw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+
+    # Store the class pointer in the header. This also overwrites the first pointer. The offsets are
+    # asserted to match.
+
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+
+    POISON_HEAP_REF $t0
+    sw    $t0, MIRROR_OBJECT_CLASS_OFFSET($v0)
+
+    # Push the new object onto the thread local allocation stack and increment the thread local
+    # allocation stack top.
+
+    sw    $v0, 0($t3)
+    addiu $t3, $t3, COMPRESSED_REFERENCE_SIZE
+    sw    $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)
+
+    # Decrement the size of the free list.
+
+    lw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+    addiu $t5, $t5, -1
+    sw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+
+    sync                                                          # Fence.
+
+    jalr  $zero, $ra
+    nop
+
+  .Lart_quick_alloc_object_rosalloc_slow_path:
+
+    SETUP_SAVE_REFS_ONLY_FRAME
+    la    $t9, artAllocObjectFromCodeRosAlloc
+    jalr  $t9
+    move  $a2, $s1                                                # Pass self as argument.
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+END art_quick_alloc_object_rosalloc
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
 
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
@@ -1343,31 +1849,35 @@
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
      */
     .extern artTestSuspendFromCode
-ENTRY art_quick_test_suspend
-    lh     $a0, THREAD_FLAGS_OFFSET(rSELF)
-    bnez   $a0, 1f
+ENTRY_NO_GP art_quick_test_suspend
+    lh     rSUSPEND, THREAD_FLAGS_OFFSET(rSELF)
+    bnez   rSUSPEND, 1f
     addiu  rSUSPEND, $zero, SUSPEND_CHECK_INTERVAL   # reset rSUSPEND to SUSPEND_CHECK_INTERVAL
     jalr   $zero, $ra
     nop
 1:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          # save callee saves for stack crawl
-    jal    artTestSuspendFromCode              # (Thread*)
+    SETUP_SAVE_EVERYTHING_FRAME                      # save everything for stack crawl
+    la     $t9, artTestSuspendFromCode
+    jalr   $t9                                       # (Thread*)
     move   $a0, rSELF
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_SAVE_EVERYTHING_FRAME
+    jalr   $zero, $ra
+    nop
 END art_quick_test_suspend
 
     /*
      * Called by managed code that is attempting to call a method on a proxy class. On entry
-     * r0 holds the proxy method; r1, r2 and r3 may contain arguments.
+     * a0 holds the proxy method; a1, a2 and a3 may contain arguments.
      */
     .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
     move    $a2, rSELF                  # pass Thread::Current
-    jal     artQuickProxyInvokeHandler  # (Method* proxy method, receiver, Thread*, SP)
+    la      $t9, artQuickProxyInvokeHandler
+    jalr    $t9                         # (Method* proxy method, receiver, Thread*, SP)
     addiu   $a3, $sp, ARG_SLOT_SIZE     # pass $sp (remove arg slots)
     lw      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     bnez    $t0, 1f
     # don't care if $v0 and/or $v1 are modified, when exception branch taken
     MTD     $v0, $v1, $f0, $f1          # move float value to return value
@@ -1378,42 +1888,74 @@
 END art_quick_proxy_invoke_handler
 
     /*
-     * Called to resolve an imt conflict. t0 is a hidden argument that holds the target method's
-     * dex method index.
+     * Called to resolve an imt conflict.
+     * a0 is the conflict ArtMethod.
+     * t0 is a hidden argument that holds the target interface method's dex method index.
+     *
+     * Note that this stub writes to a0, t0 and t1.
      */
 ENTRY art_quick_imt_conflict_trampoline
-    move    $a0, $t0
+    lw      $t1, 0($sp)                                      # Load referrer.
+    lw      $t1, ART_METHOD_DEX_CACHE_METHODS_OFFSET_32($t1) # Load dex cache methods array.
+    sll     $t0, $t0, POINTER_SIZE_SHIFT                     # Calculate offset.
+    addu    $t0, $t1, $t0                                    # Add offset to base.
+    lw      $t0, 0($t0)                                      # Load interface method.
+    lw      $a0, ART_METHOD_JNI_OFFSET_32($a0)               # Load ImtConflictTable.
+
+.Limt_table_iterate:
+    lw      $t1, 0($a0)                                      # Load next entry in ImtConflictTable.
+    # Branch if found.
+    beq     $t1, $t0, .Limt_table_found
+    nop
+    # If the entry is null, the interface method is not in the ImtConflictTable.
+    beqz    $t1, .Lconflict_trampoline
+    nop
+    # Iterate over the entries of the ImtConflictTable.
+    b       .Limt_table_iterate
+    addiu   $a0, $a0, 2 * __SIZEOF_POINTER__                 # Iterate to the next entry.
+
+.Limt_table_found:
+    # We successfully hit an entry in the table. Load the target method and jump to it.
+    lw      $a0, __SIZEOF_POINTER__($a0)
+    lw      $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0)
+    jr      $t9
+    nop
+
+.Lconflict_trampoline:
+    # Call the runtime stub to populate the ImtConflictTable and jump to the resolved method.
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
     .extern artQuickResolutionTrampoline
 ENTRY art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     move    $a2, rSELF                    # pass Thread::Current
-    jal     artQuickResolutionTrampoline  # (Method* called, receiver, Thread*, SP)
+    la      $t9, artQuickResolutionTrampoline
+    jalr    $t9                           # (Method* called, receiver, Thread*, SP)
     addiu   $a3, $sp, ARG_SLOT_SIZE       # pass $sp (remove arg slots)
     beqz    $v0, 1f
     lw      $a0, ARG_SLOT_SIZE($sp)       # load resolved method to $a0
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     move    $t9, $v0               # code pointer must be in $t9 to generate the global pointer
     jalr    $zero, $t9             # tail call to method
     nop
 1:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
     .extern artQuickGenericJniTrampoline
     .extern artQuickGenericJniEndTrampoline
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
     move    $s8, $sp               # save $sp to $s8
     move    $s3, $gp               # save $gp to $s3
 
     # prepare for call to artQuickGenericJniTrampoline(Thread*, SP)
     move    $a0, rSELF                     # pass Thread::Current
     addiu   $a1, $sp, ARG_SLOT_SIZE        # save $sp (remove arg slots)
-    jal     artQuickGenericJniTrampoline   # (Thread*, SP)
+    la      $t9, artQuickGenericJniTrampoline
+    jalr    $t9                            # (Thread*, SP)
     addiu   $sp, $sp, -5120                # reserve space on the stack
 
     # The C call will have registered the complete save-frame on success.
@@ -1442,7 +1984,8 @@
     move    $a2, $v0               # pass result
     move    $a3, $v1
     addiu   $sp, $sp, -24          # reserve arg slots
-    jal     artQuickGenericJniEndTrampoline
+    la      $t9, artQuickGenericJniEndTrampoline
+    jalr    $t9
     s.d     $f0, 16($sp)           # pass result_f
 
     lw      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
@@ -1451,7 +1994,7 @@
     move    $sp, $s8               # tear down the alloca
 
     # tear dpown the callee-save frame
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     MTD     $v0, $v1, $f0, $f1     # move float value to return value
     jalr    $zero, $ra
@@ -1465,12 +2008,13 @@
 
     .extern artQuickToInterpreterBridge
 ENTRY art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     move    $a1, rSELF                          # pass Thread::Current
-    jal     artQuickToInterpreterBridge         # (Method* method, Thread*, SP)
+    la      $t9, artQuickToInterpreterBridge
+    jalr    $t9                                 # (Method* method, Thread*, SP)
     addiu   $a2, $sp, ARG_SLOT_SIZE             # pass $sp (remove arg slots)
     lw      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     bnez    $t0, 1f
     # don't care if $v0 and/or $v1 are modified, when exception branch taken
     MTD     $v0, $v1, $f0, $f1                  # move float value to return value
@@ -1486,14 +2030,15 @@
     .extern artInstrumentationMethodEntryFromCode
     .extern artInstrumentationMethodExitFromCode
 ENTRY art_quick_instrumentation_entry
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     sw       $a0, 28($sp)   # save arg0 in free arg slot
     move     $a3, $ra       # pass $ra
-    jal      artInstrumentationMethodEntryFromCode  # (Method*, Object*, Thread*, LR)
+    la       $t9, artInstrumentationMethodEntryFromCode
+    jalr     $t9            # (Method*, Object*, Thread*, LR)
     move     $a2, rSELF     # pass Thread::Current
     move     $t9, $v0       # $t9 holds reference to code
     lw       $a0, 28($sp)   # restore arg0 from free arg slot
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     jalr     $t9            # call method
     nop
 END art_quick_instrumentation_entry
@@ -1505,7 +2050,7 @@
     .cpload  $t9
     move     $ra, $zero     # link register is to here, so clobber with 0 for later checks
 
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     addiu    $sp, $sp, -16  # allocate temp storage on the stack
     .cfi_adjust_cfa_offset 16
     sw       $v0, ARG_SLOT_SIZE+12($sp)
@@ -1517,7 +2062,8 @@
     move     $a2, $v0       # pass gpr result
     move     $a3, $v1
     addiu    $a1, $sp, ARG_SLOT_SIZE+16   # pass $sp (remove arg slots and temp storage)
-    jal      artInstrumentationMethodExitFromCode  # (Thread*, SP, gpr_res, fpr_res)
+    la       $t9, artInstrumentationMethodExitFromCode
+    jalr     $t9            # (Thread*, SP, gpr_res, fpr_res)
     move     $a0, rSELF     # pass Thread::Current
     move     $t9, $v0       # set aside returned link register
     move     $ra, $v1       # set link register for deoptimization
@@ -1525,8 +2071,8 @@
     lw       $v1, ARG_SLOT_SIZE+8($sp)
     l.d      $f0, ARG_SLOT_SIZE($sp)
     jalr     $zero, $t9     # return
-    addiu    $sp, $sp, ARG_SLOT_SIZE+FRAME_SIZE_REFS_ONLY_CALLEE_SAVE+16  # restore stack 
-    .cfi_adjust_cfa_offset -(ARG_SLOT_SIZE+FRAME_SIZE_REFS_ONLY_CALLEE_SAVE+16)
+    addiu    $sp, $sp, ARG_SLOT_SIZE+FRAME_SIZE_SAVE_REFS_ONLY+16  # restore stack
+    .cfi_adjust_cfa_offset -(ARG_SLOT_SIZE+FRAME_SIZE_SAVE_REFS_ONLY+16)
 END art_quick_instrumentation_exit
 
     /*
@@ -1535,8 +2081,9 @@
      */
     .extern artDeoptimize
 ENTRY art_quick_deoptimize
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-    jal      artDeoptimize  # artDeoptimize(Thread*)
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+    la       $t9, artDeoptimize
+    jalr     $t9            # artDeoptimize(Thread*)
                             # Returns caller method's frame size.
     move     $a0, rSELF     # pass Thread::current
 END art_quick_deoptimize
@@ -1547,8 +2094,9 @@
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-    jal      artDeoptimizeFromCompiledCode  # artDeoptimizeFromCompiledCode(Thread*)
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+    la       $t9, artDeoptimizeFromCompiledCode
+    jalr     $t9                            # artDeoptimizeFromCompiledCode(Thread*)
                                             # Returns caller method's frame size.
     move     $a0, rSELF                     # pass Thread::current
 END art_quick_deoptimize_from_compiled_code
@@ -1615,9 +2163,9 @@
      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
      * 6 bits.
      * On entry:
-     *   r0: low word
-     *   r1: high word
-     *   r2: shift count
+     *   $a0: low word
+     *   $a1: high word
+     *   $a2: shift count
      */
     /* ushr-long vAA, vBB, vCC */
 ENTRY_NO_GP art_quick_ushr_long
@@ -1637,5 +2185,70 @@
     nop
 END art_quick_ushr_long
 
-UNIMPLEMENTED art_quick_indexof
-UNIMPLEMENTED art_quick_string_compareto
+/* java.lang.String.indexOf(int ch, int fromIndex=0) */
+ENTRY_NO_GP art_quick_indexof
+/* $a0 holds address of "this" */
+/* $a1 holds "ch" */
+/* $a2 holds "fromIndex" */
+  lw    $t0, MIRROR_STRING_COUNT_OFFSET($a0)    # this.length()
+  slt   $t1, $a2, $zero # if fromIndex < 0
+#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+  seleqz $a2, $a2, $t1  #     fromIndex = 0;
+#else
+  movn   $a2, $zero, $t1 #    fromIndex = 0;
+#endif
+  subu  $t0, $t0, $a2   # this.length() - fromIndex
+  blez  $t0, 6f         # if this.length()-fromIndex <= 0
+  li    $v0, -1         #     return -1;
+
+  sll   $v0, $a2, 1     # $a0 += $a2 * 2
+  addu  $a0, $a0, $v0   #  "  "   "  " "
+  move  $v0, $a2        # Set i to fromIndex.
+
+1:
+  lhu   $t3, MIRROR_STRING_VALUE_OFFSET($a0)    # if this.charAt(i) == ch
+  beq   $t3, $a1, 6f                            #     return i;
+  addu  $a0, $a0, 2     # i++
+  subu  $t0, $t0, 1     # this.length() - i
+  bnez  $t0, 1b         # while this.length() - i > 0
+  addu  $v0, $v0, 1     # i++
+
+  li    $v0, -1         # if this.length() - i <= 0
+                        #     return -1;
+
+6:
+  j     $ra
+  nop
+END art_quick_indexof
+
+/* java.lang.String.compareTo(String anotherString) */
+ENTRY_NO_GP art_quick_string_compareto
+/* $a0 holds address of "this" */
+/* $a1 holds address of "anotherString" */
+  beq    $a0, $a1, 9f   # this and anotherString are the same object
+  move   $v0, $zero
+
+  lw     $a2, MIRROR_STRING_COUNT_OFFSET($a0)   # this.length()
+  lw     $a3, MIRROR_STRING_COUNT_OFFSET($a1)   # anotherString.length()
+  MINu   $t2, $a2, $a3
+# $t2 now holds min(this.length(),anotherString.length())
+
+  beqz   $t2, 9f        # while min(this.length(),anotherString.length())-i != 0
+  subu   $v0, $a2, $a3  # if $t2==0 return
+                        #     (this.length() - anotherString.length())
+1:
+  lhu    $t0, MIRROR_STRING_VALUE_OFFSET($a0)   # while this.charAt(i) == anotherString.charAt(i)
+  lhu    $t1, MIRROR_STRING_VALUE_OFFSET($a1)
+  bne    $t0, $t1, 9f   # if this.charAt(i) != anotherString.charAt(i)
+  subu   $v0, $t0, $t1  #     return (this.charAt(i) - anotherString.charAt(i))
+  addiu  $a0, $a0, 2    # point at this.charAt(i++)
+  subu   $t2, $t2, 1    # new value of
+                        # min(this.length(),anotherString.length())-i
+  bnez   $t2, 1b
+  addiu  $a1, $a1, 2    # point at anotherString.charAt(i++)
+  subu   $v0, $a2, $a3
+
+9:
+  j      $ra
+  nop
+END art_quick_string_compareto
diff --git a/runtime/arch/mips/quick_method_frame_info_mips.h b/runtime/arch/mips/quick_method_frame_info_mips.h
index f5d13c2..90e7b20 100644
--- a/runtime/arch/mips/quick_method_frame_info_mips.h
+++ b/runtime/arch/mips/quick_method_frame_info_mips.h
@@ -34,6 +34,12 @@
     (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3);
 static constexpr uint32_t kMipsCalleeSaveAllSpills =
     (1 << art::mips::S0) | (1 << art::mips::S1);
+static constexpr uint32_t kMipsCalleeSaveEverythingSpills =
+    (1 << art::mips::AT) | (1 << art::mips::V0) | (1 << art::mips::V1) |
+    (1 << art::mips::A0) | (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3) |
+    (1 << art::mips::T0) | (1 << art::mips::T1) | (1 << art::mips::T2) | (1 << art::mips::T3) |
+    (1 << art::mips::T4) | (1 << art::mips::T5) | (1 << art::mips::T6) | (1 << art::mips::T7) |
+    (1 << art::mips::S0) | (1 << art::mips::S1) | (1 << art::mips::T8) | (1 << art::mips::T9);
 
 static constexpr uint32_t kMipsCalleeSaveFpAlwaysSpills = 0;
 static constexpr uint32_t kMipsCalleeSaveFpRefSpills = 0;
@@ -43,23 +49,34 @@
     (1 << art::mips::F20) | (1 << art::mips::F21) | (1 << art::mips::F22) | (1 << art::mips::F23) |
     (1 << art::mips::F24) | (1 << art::mips::F25) | (1 << art::mips::F26) | (1 << art::mips::F27) |
     (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1 << art::mips::F31);
+static constexpr uint32_t kMipsCalleeSaveFpEverythingSpills =
+    (1 << art::mips::F0) | (1 << art::mips::F1) | (1 << art::mips::F2) | (1 << art::mips::F3) |
+    (1 << art::mips::F4) | (1 << art::mips::F5) | (1 << art::mips::F6) | (1 << art::mips::F7) |
+    (1 << art::mips::F8) | (1 << art::mips::F9) | (1 << art::mips::F10) | (1 << art::mips::F11) |
+    (1 << art::mips::F12) | (1 << art::mips::F13) | (1 << art::mips::F14) | (1 << art::mips::F15) |
+    (1 << art::mips::F16) | (1 << art::mips::F17) | (1 << art::mips::F18) | (1 << art::mips::F19) |
+    (1 << art::mips::F20) | (1 << art::mips::F21) | (1 << art::mips::F22) | (1 << art::mips::F23) |
+    (1 << art::mips::F24) | (1 << art::mips::F25) | (1 << art::mips::F26) | (1 << art::mips::F27) |
+    (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1 << art::mips::F31);
 
 constexpr uint32_t MipsCalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
   return kMipsCalleeSaveAlwaysSpills | kMipsCalleeSaveRefSpills |
-      (type == Runtime::kRefsAndArgs ? kMipsCalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kMipsCalleeSaveAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kMipsCalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kMipsCalleeSaveAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kMipsCalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t MipsCalleeSaveFPSpills(Runtime::CalleeSaveType type) {
   return kMipsCalleeSaveFpAlwaysSpills | kMipsCalleeSaveFpRefSpills |
-      (type == Runtime::kRefsAndArgs ? kMipsCalleeSaveFpArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kMipsCalleeSaveAllFPSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kMipsCalleeSaveFpArgSpills : 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kMipsCalleeSaveAllFPSpills : 0) |
+      (type == Runtime::kSaveEverything ? kMipsCalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t MipsCalleeSaveFrameSize(Runtime::CalleeSaveType type) {
   return RoundUp((POPCOUNT(MipsCalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(MipsCalleeSaveFPSpills(type))   /* fprs */ +
-                  1 /* Method* */) * kMipsPointerSize, kStackAlignment);
+                  1 /* Method* */) * static_cast<size_t>(kMipsPointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo MipsCalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/mips/registers_mips.h b/runtime/arch/mips/registers_mips.h
index 0f784ed..ae01bd5 100644
--- a/runtime/arch/mips/registers_mips.h
+++ b/runtime/arch/mips/registers_mips.h
@@ -59,6 +59,8 @@
   SP   = 29,  // Stack pointer.
   FP   = 30,  // Saved value/frame pointer.
   RA   = 31,  // Return address.
+  TR   = S1,  // ART Thread Register
+  TMP  = T8,  // scratch register (in addition to AT)
   kNumberOfCoreRegisters = 32,
   kNoRegister = -1  // Signals an illegal register.
 };
@@ -98,6 +100,7 @@
   F29 = 29,
   F30 = 30,
   F31 = 31,
+  FTMP = F8,  // scratch register
   kNumberOfFRegisters = 32,
   kNoFRegister = -1,
 };
diff --git a/runtime/arch/mips/thread_mips.cc b/runtime/arch/mips/thread_mips.cc
index a451496..0a9ab7a 100644
--- a/runtime/arch/mips/thread_mips.cc
+++ b/runtime/arch/mips/thread_mips.cc
@@ -17,14 +17,15 @@
 #include "thread.h"
 
 #include "asm_support_mips.h"
+#include "base/enums.h"
 #include "base/logging.h"
 
 namespace art {
 
 void Thread::InitCpu() {
-  CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<4>().Int32Value());
-  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<4>().Int32Value());
-  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<4>().Int32Value());
+  CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<PointerSize::k32>().Int32Value());
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<PointerSize::k32>().Int32Value());
+  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<PointerSize::k32>().Int32Value());
 }
 
 void Thread::CleanupCpu() {
diff --git a/runtime/arch/mips64/asm_support_mips64.S b/runtime/arch/mips64/asm_support_mips64.S
index b859c70..786e860 100644
--- a/runtime/arch/mips64/asm_support_mips64.S
+++ b/runtime/arch/mips64/asm_support_mips64.S
@@ -83,4 +83,38 @@
 #endif  // USE_HEAP_POISONING
 .endm
 
+// Based on contents of creg select the minimum integer
+// At the end of the macro the original value of creg is lost
+.macro MINint dreg,rreg,sreg,creg
+  .set push
+  .set noat
+  .ifc \dreg, \rreg
+  selnez \dreg, \rreg, \creg
+  seleqz \creg, \sreg, \creg
+  .else
+  seleqz \dreg, \sreg, \creg
+  selnez \creg, \rreg, \creg
+  .endif
+  or     \dreg, \dreg, \creg
+  .set pop
+.endm
+
+// Find minimum of two signed registers
+.macro MINs dreg,rreg,sreg
+  .set push
+  .set noat
+  slt    $at, \rreg, \sreg
+  MINint \dreg, \rreg, \sreg, $at
+  .set pop
+.endm
+
+// Find minimum of two unsigned registers
+.macro MINu dreg,rreg,sreg
+  .set push
+  .set noat
+  sltu   $at, \rreg, \sreg
+  MINint \dreg, \rreg, \sreg, $at
+  .set pop
+.endm
+
 #endif  // ART_RUNTIME_ARCH_MIPS64_ASM_SUPPORT_MIPS64_S_
diff --git a/runtime/arch/mips64/asm_support_mips64.h b/runtime/arch/mips64/asm_support_mips64.h
index 995fcf3..9063d20 100644
--- a/runtime/arch/mips64/asm_support_mips64.h
+++ b/runtime/arch/mips64/asm_support_mips64.h
@@ -20,10 +20,12 @@
 #include "asm_support.h"
 
 // 64 ($f24-$f31) + 64 ($s0-$s7) + 8 ($gp) + 8 ($s8) + 8 ($ra) + 1x8 bytes padding
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 160
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 160
 // 48 ($s2-$s7) + 8 ($gp) + 8 ($s8) + 8 ($ra) + 1x8 bytes padding
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 80
+#define FRAME_SIZE_SAVE_REFS_ONLY 80
 // $f12-$f19, $a1-$a7, $s2-$s7 + $gp + $s8 + $ra, 16 total + 1x8 bytes padding + method*
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 208
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS 208
+// $f0-$f31, $at, $v0-$v1, $a0-$a7, $t0-$t3, $s0-$s7, $t8-$t9, $gp, $s8, $ra + padding + method*
+#define FRAME_SIZE_SAVE_EVERYTHING 496
 
 #endif  // ART_RUNTIME_ARCH_MIPS64_ASM_SUPPORT_MIPS64_H_
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index 417d5fc..34b0638 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -18,6 +18,7 @@
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
+#include "entrypoints/quick/quick_default_init_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/math_entrypoints.h"
@@ -27,8 +28,8 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
-                                            const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+                                          const mirror::Class* ref_class);
 // Math entrypoints.
 extern int32_t CmpgDouble(double a, double b);
 extern int32_t CmplDouble(double a, double b);
@@ -57,67 +58,12 @@
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
-  // JNI
-  jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
-
-  // Alloc
-  ResetQuickAllocEntryPoints(qpoints);
+  DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
   qpoints->pCheckCast = art_quick_check_cast;
 
-  // DexCache
-  qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
-  qpoints->pInitializeTypeAndVerifyAccess = art_quick_initialize_type_and_verify_access;
-  qpoints->pInitializeType = art_quick_initialize_type;
-  qpoints->pResolveString = art_quick_resolve_string;
-
-  // Field
-  qpoints->pSet8Instance = art_quick_set8_instance;
-  qpoints->pSet8Static = art_quick_set8_static;
-  qpoints->pSet16Instance = art_quick_set16_instance;
-  qpoints->pSet16Static = art_quick_set16_static;
-  qpoints->pSet32Instance = art_quick_set32_instance;
-  qpoints->pSet32Static = art_quick_set32_static;
-  qpoints->pSet64Instance = art_quick_set64_instance;
-  qpoints->pSet64Static = art_quick_set64_static;
-  qpoints->pSetObjInstance = art_quick_set_obj_instance;
-  qpoints->pSetObjStatic = art_quick_set_obj_static;
-  qpoints->pGetBooleanInstance = art_quick_get_boolean_instance;
-  qpoints->pGetByteInstance = art_quick_get_byte_instance;
-  qpoints->pGetCharInstance = art_quick_get_char_instance;
-  qpoints->pGetShortInstance = art_quick_get_short_instance;
-  qpoints->pGet32Instance = art_quick_get32_instance;
-  qpoints->pGet64Instance = art_quick_get64_instance;
-  qpoints->pGetObjInstance = art_quick_get_obj_instance;
-  qpoints->pGetBooleanStatic = art_quick_get_boolean_static;
-  qpoints->pGetByteStatic = art_quick_get_byte_static;
-  qpoints->pGetCharStatic = art_quick_get_char_static;
-  qpoints->pGetShortStatic = art_quick_get_short_static;
-  qpoints->pGet32Static = art_quick_get32_static;
-  qpoints->pGet64Static = art_quick_get64_static;
-  qpoints->pGetObjStatic = art_quick_get_obj_static;
-
-  // Array
-  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
-  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
-  qpoints->pAputObject = art_quick_aput_obj;
-  qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
-
-  // JNI
-  qpoints->pJniMethodStart = JniMethodStart;
-  qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
-  qpoints->pJniMethodEnd = JniMethodEnd;
-  qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
-  qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
-  qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
-  qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
-
-  // Locks
-  qpoints->pLockObject = art_quick_lock_object;
-  qpoints->pUnlockObject = art_quick_unlock_object;
-
   // Math
   qpoints->pCmpgDouble = CmpgDouble;
   qpoints->pCmpgFloat = CmpgFloat;
@@ -144,43 +90,47 @@
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = memcpy;
 
-  // Invocation
-  qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
-  qpoints->pQuickResolutionTrampoline = art_quick_resolution_trampoline;
-  qpoints->pQuickToInterpreterBridge = art_quick_to_interpreter_bridge;
-  qpoints->pInvokeDirectTrampolineWithAccessCheck =
-      art_quick_invoke_direct_trampoline_with_access_check;
-  qpoints->pInvokeInterfaceTrampolineWithAccessCheck =
-      art_quick_invoke_interface_trampoline_with_access_check;
-  qpoints->pInvokeStaticTrampolineWithAccessCheck =
-      art_quick_invoke_static_trampoline_with_access_check;
-  qpoints->pInvokeSuperTrampolineWithAccessCheck =
-      art_quick_invoke_super_trampoline_with_access_check;
-  qpoints->pInvokeVirtualTrampolineWithAccessCheck =
-      art_quick_invoke_virtual_trampoline_with_access_check;
-
-  // Thread
-  qpoints->pTestSuspend = art_quick_test_suspend;
-
-  // Throws
-  qpoints->pDeliverException = art_quick_deliver_exception;
-  qpoints->pThrowArrayBounds = art_quick_throw_array_bounds;
-  qpoints->pThrowDivZero = art_quick_throw_div_zero;
-  qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
-  qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
-  qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
-
-  // Deoptimization from compiled code.
-  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
-
   // TODO - use lld/scd instructions for Mips64
   // Atomic 64-bit load/store
   qpoints->pA64Load = QuasiAtomic::Read64;
   qpoints->pA64Store = QuasiAtomic::Write64;
 
-  // Read barrier
+  // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  // Read barriers (and these entry points in particular) are not
+  // supported in the compiler on MIPS64.
+  qpoints->pReadBarrierMarkReg00 = nullptr;
+  qpoints->pReadBarrierMarkReg01 = nullptr;
+  qpoints->pReadBarrierMarkReg02 = nullptr;
+  qpoints->pReadBarrierMarkReg03 = nullptr;
+  qpoints->pReadBarrierMarkReg04 = nullptr;
+  qpoints->pReadBarrierMarkReg05 = nullptr;
+  qpoints->pReadBarrierMarkReg06 = nullptr;
+  qpoints->pReadBarrierMarkReg07 = nullptr;
+  qpoints->pReadBarrierMarkReg08 = nullptr;
+  qpoints->pReadBarrierMarkReg09 = nullptr;
+  qpoints->pReadBarrierMarkReg10 = nullptr;
+  qpoints->pReadBarrierMarkReg11 = nullptr;
+  qpoints->pReadBarrierMarkReg12 = nullptr;
+  qpoints->pReadBarrierMarkReg13 = nullptr;
+  qpoints->pReadBarrierMarkReg14 = nullptr;
+  qpoints->pReadBarrierMarkReg15 = nullptr;
+  qpoints->pReadBarrierMarkReg16 = nullptr;
+  qpoints->pReadBarrierMarkReg17 = nullptr;
+  qpoints->pReadBarrierMarkReg18 = nullptr;
+  qpoints->pReadBarrierMarkReg19 = nullptr;
+  qpoints->pReadBarrierMarkReg20 = nullptr;
+  qpoints->pReadBarrierMarkReg21 = nullptr;
+  qpoints->pReadBarrierMarkReg22 = nullptr;
+  qpoints->pReadBarrierMarkReg23 = nullptr;
+  qpoints->pReadBarrierMarkReg24 = nullptr;
+  qpoints->pReadBarrierMarkReg25 = nullptr;
+  qpoints->pReadBarrierMarkReg26 = nullptr;
+  qpoints->pReadBarrierMarkReg27 = nullptr;
+  qpoints->pReadBarrierMarkReg28 = nullptr;
+  qpoints->pReadBarrierMarkReg29 = nullptr;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
+  qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
 
 }  // namespace art
diff --git a/runtime/arch/mips64/fault_handler_mips64.cc b/runtime/arch/mips64/fault_handler_mips64.cc
index 4abfcf1..0bbb6e1 100644
--- a/runtime/arch/mips64/fault_handler_mips64.cc
+++ b/runtime/arch/mips64/fault_handler_mips64.cc
@@ -27,7 +27,7 @@
 #include "thread-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 
 //
 // Mips64 specific fault handler functions.
@@ -44,7 +44,7 @@
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
   struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  *out_sp = static_cast<uintptr_t>(sc->sc_regs[29]);   // SP register
+  *out_sp = static_cast<uintptr_t>(sc->sc_regs[mips64::SP]);
   VLOG(signals) << "sp: " << *out_sp;
   if (*out_sp == 0) {
     return;
@@ -56,7 +56,7 @@
   uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
       reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kMips64));
   if (overflow_addr == fault_addr) {
-    *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[4]);  // A0 register
+    *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[mips64::A0]);
   } else {
     // The method is at the top of the stack.
     *out_method = *reinterpret_cast<ArtMethod**>(*out_sp);
@@ -71,8 +71,11 @@
   *out_return_pc = sc->sc_pc + 4;
 }
 
-bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context) {
+bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  if (!IsValidImplicitCheck(info)) {
+    return false;
+  }
+
   // The code that looks for the catch location needs to know the value of the
   // PC at the point of call.  For Null checks we insert a GC map that is immediately after
   // the load/store instruction that might cause the fault.
@@ -80,9 +83,12 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
-  sc->sc_regs[31] = sc->sc_pc + 4;      // RA needs to point to gc map location
-  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
-  sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+  sc->sc_regs[mips64::RA] = sc->sc_pc + 4;      // RA needs to point to gc map location
+  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
+  sc->sc_regs[mips64::T9] = sc->sc_pc;          // make sure T9 points to the function
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+  sc->sc_regs[mips64::A0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
@@ -111,7 +117,7 @@
   VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
   VLOG(signals) << "sigcontext: " << std::hex << sc;
 
-  uintptr_t sp = sc->sc_regs[29];  // SP register
+  uintptr_t sp = sc->sc_regs[mips64::SP];
   VLOG(signals) << "sp: " << std::hex << sp;
 
   uintptr_t fault_addr = reinterpret_cast<uintptr_t>(info->si_addr);  // BVA addr
@@ -134,7 +140,7 @@
   // caused this fault.  This will be inserted into a callee save frame by
   // the function to which this handler returns (art_quick_throw_stack_overflow).
   sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
-  sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+  sc->sc_regs[mips64::T9] = sc->sc_pc;          // make sure T9 points to the function
 
   // The kernel will now return to the address in sc->arm_pc.
   return true;
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 66c8aad..9774eb9 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -41,16 +41,16 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      * callee-save: padding + $f24-$f31 + $s0-$s7 + $gp + $ra + $s8 = 19 total + 1x8 bytes padding
      */
-.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     daddiu $sp, $sp, -160
     .cfi_adjust_cfa_offset 160
 
      // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 160)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(MIPS64) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 160)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(MIPS64) size not as expected."
 #endif
 
     sd     $ra, 152($sp)
@@ -89,25 +89,25 @@
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
-    ld      $t1, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($t1)
+    ld      $t1, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET($t1)
     sd      $t1, 0($sp)                                # Place ArtMethod* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly). Restoration assumes
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly). Restoration assumes
      * non-moving GC.
      * Does not include rSUSPEND or rSELF
      * callee-save: padding + $s2-$s7 + $gp + $ra + $s8 = 9 total + 1x8 bytes padding
      */
-.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_REFS_ONLY_FRAME
     daddiu $sp, $sp, -80
     .cfi_adjust_cfa_offset 80
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 80)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(MIPS64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 80)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(MIPS64) size not as expected."
 #endif
 
     sd     $ra, 72($sp)
@@ -131,12 +131,12 @@
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
-    ld      $t1, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($t1)
+    ld      $t1, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET($t1)
     sd      $t1, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME
     ld     $ra, 72($sp)
     .cfi_restore 31
     ld     $s8, 64($sp)
@@ -160,7 +160,7 @@
     .cpreturn
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
     ld     $ra, 72($sp)
     .cfi_restore 31
     ld     $s8, 64($sp)
@@ -186,15 +186,15 @@
 .endm
 
 // This assumes the top part of these stack frame types are identical.
-#define REFS_AND_ARGS_MINUS_REFS_SIZE (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
+#define REFS_AND_ARGS_MINUS_REFS_SIZE (FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
     daddiu  $sp, $sp, -208
     .cfi_adjust_cfa_offset 208
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 208)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(MIPS64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 208)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(MIPS64) size not as expected."
 #endif
 
     sd     $ra, 200($sp)           # = kQuickCalleeSaveFrame_RefAndArgs_LrOffset
@@ -244,27 +244,27 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes
      * non-moving GC.
      * callee-save: padding + $f12-$f19 + $a1-$a7 + $s2-$s7 + $gp + $ra + $s8 = 24 total + 1 words padding + Method*
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
-    ld      $t1, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($t1)
+    ld      $t1, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET($t1)
     sd      $t1, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
     sd      $a0, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
     ld     $ra, 200($sp)
     .cfi_restore 31
     ld     $s8, 192($sp)
@@ -314,13 +314,235 @@
 .endm
 
     /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * callee-save: $at + $v0-$v1 + $a0-$a7 + $t0-$t3 + $s0-$s7 + $t8-$t9 + $gp + $s8 + $ra + $s8,
+     *              $f0-$f31; 28(GPR)+ 32(FPR) + 1x8 bytes padding + method*
+     * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    daddiu $sp, $sp, -496
+    .cfi_adjust_cfa_offset 496
+
+     // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 496)
+#error "FRAME_SIZE_SAVE_EVERYTHING(MIPS64) size not as expected."
+#endif
+
+    // Save core registers.
+    sd     $ra, 488($sp)
+    .cfi_rel_offset 31, 488
+    sd     $s8, 480($sp)
+    .cfi_rel_offset 30, 480
+    sd     $t9, 464($sp)
+    .cfi_rel_offset 25, 464
+    sd     $t8, 456($sp)
+    .cfi_rel_offset 24, 456
+    sd     $s7, 448($sp)
+    .cfi_rel_offset 23, 448
+    sd     $s6, 440($sp)
+    .cfi_rel_offset 22, 440
+    sd     $s5, 432($sp)
+    .cfi_rel_offset 21, 432
+    sd     $s4, 424($sp)
+    .cfi_rel_offset 20, 424
+    sd     $s3,  416($sp)
+    .cfi_rel_offset 19, 416
+    sd     $s2,  408($sp)
+    .cfi_rel_offset 18, 408
+    sd     $s1,  400($sp)
+    .cfi_rel_offset 17, 400
+    sd     $s0,  392($sp)
+    .cfi_rel_offset 16, 392
+    sd     $t3,  384($sp)
+    .cfi_rel_offset 15, 384
+    sd     $t2,  376($sp)
+    .cfi_rel_offset 14, 376
+    sd     $t1,  368($sp)
+    .cfi_rel_offset 13, 368
+    sd     $t0,  360($sp)
+    .cfi_rel_offset 12, 360
+    sd     $a7, 352($sp)
+    .cfi_rel_offset 11, 352
+    sd     $a6, 344($sp)
+    .cfi_rel_offset 10, 344
+    sd     $a5, 336($sp)
+    .cfi_rel_offset 9, 336
+    sd     $a4, 328($sp)
+    .cfi_rel_offset 8, 328
+    sd     $a3,  320($sp)
+    .cfi_rel_offset 7, 320
+    sd     $a2,  312($sp)
+    .cfi_rel_offset 6, 312
+    sd     $a1,  304($sp)
+    .cfi_rel_offset 5, 304
+    sd     $a0,  296($sp)
+    .cfi_rel_offset 4, 296
+    sd     $v1,  288($sp)
+    .cfi_rel_offset 3, 288
+    sd     $v0,  280($sp)
+    .cfi_rel_offset 2, 280
+
+    // Set up $gp, clobbering $ra and using the branch delay slot for a useful instruction.
+    bal 1f
+    .set push
+    .set noat
+    sd     $at,  272($sp)
+    .cfi_rel_offset 1, 272
+    .set pop
+1:
+    .cpsetup $ra, 472, 1b
+
+    // Save FP registers.
+    s.d    $f31, 264($sp)
+    s.d    $f30, 256($sp)
+    s.d    $f29, 248($sp)
+    s.d    $f28, 240($sp)
+    s.d    $f27, 232($sp)
+    s.d    $f26, 224($sp)
+    s.d    $f25, 216($sp)
+    s.d    $f24, 208($sp)
+    s.d    $f23, 200($sp)
+    s.d    $f22, 192($sp)
+    s.d    $f21, 184($sp)
+    s.d    $f20, 176($sp)
+    s.d    $f19, 168($sp)
+    s.d    $f18, 160($sp)
+    s.d    $f17, 152($sp)
+    s.d    $f16, 144($sp)
+    s.d    $f15, 136($sp)
+    s.d    $f14, 128($sp)
+    s.d    $f13, 120($sp)
+    s.d    $f12, 112($sp)
+    s.d    $f11, 104($sp)
+    s.d    $f10, 96($sp)
+    s.d    $f9, 88($sp)
+    s.d    $f8, 80($sp)
+    s.d    $f7, 72($sp)
+    s.d    $f6, 64($sp)
+    s.d    $f5, 56($sp)
+    s.d    $f4, 48($sp)
+    s.d    $f3, 40($sp)
+    s.d    $f2, 32($sp)
+    s.d    $f1, 24($sp)
+    s.d    $f0, 16($sp)
+
+    # load appropriate callee-save-method
+    ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
+    ld      $t1, 0($t1)
+    ld      $t1, RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET($t1)
+    sd      $t1, 0($sp)                                # Place ArtMethod* at bottom of stack.
+    # Place sp in Thread::Current()->top_quick_frame.
+    sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
+.endm
+
+.macro RESTORE_SAVE_EVERYTHING_FRAME
+    // Restore FP registers.
+    l.d    $f31, 264($sp)
+    l.d    $f30, 256($sp)
+    l.d    $f29, 248($sp)
+    l.d    $f28, 240($sp)
+    l.d    $f27, 232($sp)
+    l.d    $f26, 224($sp)
+    l.d    $f25, 216($sp)
+    l.d    $f24, 208($sp)
+    l.d    $f23, 200($sp)
+    l.d    $f22, 192($sp)
+    l.d    $f21, 184($sp)
+    l.d    $f20, 176($sp)
+    l.d    $f19, 168($sp)
+    l.d    $f18, 160($sp)
+    l.d    $f17, 152($sp)
+    l.d    $f16, 144($sp)
+    l.d    $f15, 136($sp)
+    l.d    $f14, 128($sp)
+    l.d    $f13, 120($sp)
+    l.d    $f12, 112($sp)
+    l.d    $f11, 104($sp)
+    l.d    $f10, 96($sp)
+    l.d    $f9, 88($sp)
+    l.d    $f8, 80($sp)
+    l.d    $f7, 72($sp)
+    l.d    $f6, 64($sp)
+    l.d    $f5, 56($sp)
+    l.d    $f4, 48($sp)
+    l.d    $f3, 40($sp)
+    l.d    $f2, 32($sp)
+    l.d    $f1, 24($sp)
+    l.d    $f0, 16($sp)
+
+    // Restore core registers.
+    .cpreturn
+    ld     $ra, 488($sp)
+    .cfi_restore 31
+    ld     $s8, 480($sp)
+    .cfi_restore 30
+    ld     $t9, 464($sp)
+    .cfi_restore 25
+    ld     $t8, 456($sp)
+    .cfi_restore 24
+    ld     $s7, 448($sp)
+    .cfi_restore 23
+    ld     $s6, 440($sp)
+    .cfi_restore 22
+    ld     $s5, 432($sp)
+    .cfi_restore 21
+    ld     $s4, 424($sp)
+    .cfi_restore 20
+    ld     $s3,  416($sp)
+    .cfi_restore 19
+    ld     $s2,  408($sp)
+    .cfi_restore 18
+    ld     $s1,  400($sp)
+    .cfi_restore 17
+    ld     $s0,  392($sp)
+    .cfi_restore 16
+    ld     $t3,  384($sp)
+    .cfi_restore 15
+    ld     $t2,  376($sp)
+    .cfi_restore 14
+    ld     $t1,  368($sp)
+    .cfi_restore 13
+    ld     $t0,  360($sp)
+    .cfi_restore 12
+    ld     $a7, 352($sp)
+    .cfi_restore 11
+    ld     $a6, 344($sp)
+    .cfi_restore 10
+    ld     $a5, 336($sp)
+    .cfi_restore 9
+    ld     $a4, 328($sp)
+    .cfi_restore 8
+    ld     $a3,  320($sp)
+    .cfi_restore 7
+    ld     $a2,  312($sp)
+    .cfi_restore 6
+    ld     $a1,  304($sp)
+    .cfi_restore 5
+    ld     $a0,  296($sp)
+    .cfi_restore 4
+    ld     $v1,  288($sp)
+    .cfi_restore 3
+    ld     $v0,  280($sp)
+    .cfi_restore 2
+    .set push
+    .set noat
+    ld     $at,  272($sp)
+    .cfi_restore 1
+    .set pop
+
+    daddiu $sp, $sp, 496
+    .cfi_adjust_cfa_offset -496
+.endm
+
+    /*
      * Macro that set calls through to artDeliverPendingExceptionFromCode,
      * where the pending
      * exception is Thread::Current()->exception_
      */
 .macro DELIVER_PENDING_EXCEPTION
     SETUP_GP
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME     # save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME    # save callee saves for throw
     dla     $t9, artDeliverPendingExceptionFromCode
     jalr    $zero, $t9                   # artDeliverPendingExceptionFromCode(Thread*)
     move    $a0, rSELF                   # pass Thread::Current
@@ -328,7 +550,7 @@
 
 .macro RETURN_IF_NO_EXCEPTION
     ld     $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bne    $t0, $zero, 1f                      # success if no exception is pending
     nop
     jalr   $zero, $ra
@@ -338,7 +560,7 @@
 .endm
 
 .macro RETURN_IF_ZERO
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bne    $v0, $zero, 1f                # success?
     nop
     jalr   $zero, $ra                    # return on success
@@ -348,7 +570,7 @@
 .endm
 
 .macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     beq    $v0, $zero, 1f                # success?
     nop
     jalr   $zero, $ra                    # return on success
@@ -358,6 +580,138 @@
 .endm
 
     /*
+     * On stack replacement stub.
+     * On entry:
+     *   a0 = stack to copy
+     *   a1 = size of stack
+     *   a2 = pc to call
+     *   a3 = JValue* result
+     *   a4 = shorty
+     *   a5 = thread
+     */
+ENTRY art_quick_osr_stub
+    move   $t0, $sp               # save stack pointer
+    daddiu $t1, $sp, -112         # reserve stack space
+    dsrl   $t1, $t1, 4            # enforce 16 byte stack alignment
+    dsll   $sp, $t1, 4            # update stack pointer
+
+    // Save callee general purpose registers, SP, T8(GP), RA, A3, and A4 (8x14 bytes)
+    sd     $ra, 104($sp)
+    .cfi_rel_offset 31, 104
+    sd     $s8, 96($sp)
+    .cfi_rel_offset 30, 96
+    sd     $t0, 88($sp)           # save original stack pointer stored in t0
+    .cfi_rel_offset 29, 88
+    sd     $t8, 80($sp)           # t8 holds caller's gp, now save it to the stack.
+    .cfi_rel_offset 28, 80        # Value from gp is pushed, so set the cfi offset accordingly.
+    sd     $s7, 72($sp)
+    .cfi_rel_offset 23, 72
+    sd     $s6, 64($sp)
+    .cfi_rel_offset 22, 64
+    sd     $s5, 56($sp)
+    .cfi_rel_offset 21, 56
+    sd     $s4, 48($sp)
+    .cfi_rel_offset 20, 48
+    sd     $s3, 40($sp)
+    .cfi_rel_offset 19, 40
+    sd     $s2, 32($sp)
+    .cfi_rel_offset 18, 32
+    sd     $s1, 24($sp)
+    .cfi_rel_offset 17, 24
+    sd     $s0, 16($sp)
+    .cfi_rel_offset 16, 16
+    sd     $a4, 8($sp)
+    .cfi_rel_offset 8, 8
+    sd     $a3, 0($sp)
+    .cfi_rel_offset 7, 0
+    move   rSELF, $a5                      # Save managed thread pointer into rSELF
+
+    daddiu $sp, $sp, -16
+    jal    .Losr_entry
+    sd     $zero, 0($sp)                   # Store null for ArtMethod* at bottom of frame
+    daddiu $sp, $sp, 16
+
+    // Restore return value address and shorty address
+    ld     $a4, 8($sp)                     # shorty address
+    .cfi_restore 8
+    ld     $a3, 0($sp)                     # result value address
+    .cfi_restore 7
+
+    lbu    $t1, 0($a4)                     # load return type
+    li     $t2, 'D'                        # put char 'D' into t2
+    beq    $t1, $t2, .Losr_fp_result       # branch if result type char == 'D'
+    li     $t2, 'F'                        # put char 'F' into t2
+    beq    $t1, $t2, .Losr_fp_result       # branch if result type char == 'F'
+    nop
+    b      .Losr_exit
+    dsrl   $v1, $v0, 32                    # put high half of result in v1
+.Losr_fp_result:
+    mfc1   $v0, $f0
+    mfhc1  $v1, $f0                        # put high half of FP result in v1
+.Losr_exit:
+    sw     $v0, 0($a3)                     # store low half of result
+    sw     $v1, 4($a3)                     # store high half of result
+
+    // Restore callee registers
+    ld     $ra, 104($sp)
+    .cfi_restore 31
+    ld     $s8, 96($sp)
+    .cfi_restore 30
+    ld     $t0, 88($sp)                    # save SP into t0 for now
+    .cfi_restore 29
+    ld     $t8, 80($sp)                    # Restore gp back to it's temp storage.
+    .cfi_restore 28
+    ld     $s7, 72($sp)
+    .cfi_restore 23
+    ld     $s6, 64($sp)
+    .cfi_restore 22
+    ld     $s5, 56($sp)
+    .cfi_restore 21
+    ld     $s4, 48($sp)
+    .cfi_restore 20
+    ld     $s3, 40($sp)
+    .cfi_restore 19
+    ld     $s2, 32($sp)
+    .cfi_restore 18
+    ld     $s1, 24($sp)
+    .cfi_restore 17
+    ld     $s0, 16($sp)
+    .cfi_restore 16
+    jalr   $zero, $ra
+    move   $sp, $t0
+
+.Losr_entry:
+    dsubu  $sp, $sp, $a1                   # Reserve space for callee stack
+    daddiu $a1, $a1, -8
+    daddu  $t0, $a1, $sp
+    sw     $ra, 0($t0)                     # Store low half of RA per compiler ABI
+    dsrl   $t1, $ra, 32
+    sw     $t1, 4($t0)                     # Store high half of RA per compiler ABI
+
+    // Copy arguments into callee stack
+    // Use simple copy routine for now.
+    // 4 bytes per slot.
+    // a0 = source address
+    // a1 = args length in bytes (does not include 8 bytes for RA)
+    // sp = destination address
+    beqz   $a1, .Losr_loop_exit
+    daddiu $a1, $a1, -4
+    daddu  $t1, $a0, $a1
+    daddu  $t2, $sp, $a1
+.Losr_loop_entry:
+    lw     $t0, 0($t1)
+    daddiu $t1, $t1, -4
+    sw     $t0, 0($t2)
+    bne    $sp, $t2, .Losr_loop_entry
+    daddiu $t2, $t2, -4
+
+.Losr_loop_exit:
+    move   $t9, $a2
+    jalr   $zero, $t9                      # Jump to the OSR entry point.
+    nop
+END art_quick_osr_stub
+
+    /*
      * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_
      * FIXME: just guessing about the shape of the jmpbuf.  Where will pc be?
      */
@@ -442,7 +796,7 @@
      * the bottom of the thread. On entry a0 holds Throwable*
      */
 ENTRY art_quick_deliver_exception
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artDeliverExceptionFromCode
     jalr $zero, $t9                 # artDeliverExceptionFromCode(Throwable*, Thread*)
     move $a1, rSELF                 # pass Thread::Current
@@ -454,18 +808,29 @@
     .extern artThrowNullPointerExceptionFromCode
 ENTRY art_quick_throw_null_pointer_exception
 .Lart_quick_throw_null_pointer_exception_gp_set:
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowNullPointerExceptionFromCode
     jalr $zero, $t9                 # artThrowNullPointerExceptionFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
 END art_quick_throw_null_pointer_exception
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException
+     */
+    .extern artThrowNullPointerExceptionFromSignal
+ENTRY art_quick_throw_null_pointer_exception_from_signal
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+    dla  $t9, artThrowNullPointerExceptionFromSignal
+    jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uinptr_t, Thread*)
+    move $a1, rSELF                 # pass Thread::Current
+END art_quick_throw_null_pointer_exception
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException
      */
     .extern artThrowDivZeroFromCode
 ENTRY art_quick_throw_div_zero
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowDivZeroFromCode
     jalr $zero, $t9                 # artThrowDivZeroFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
@@ -478,18 +843,31 @@
     .extern artThrowArrayBoundsFromCode
 ENTRY art_quick_throw_array_bounds
 .Lart_quick_throw_array_bounds_gp_set:
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowArrayBoundsFromCode
     jalr $zero, $t9                 # artThrowArrayBoundsFromCode(index, limit, Thread*)
     move $a2, rSELF                 # pass Thread::Current
 END art_quick_throw_array_bounds
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt().
+     */
+    .extern artThrowStringBoundsFromCode
+ENTRY art_quick_throw_string_bounds
+.Lart_quick_throw_string_bounds_gp_set:
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+    dla  $t9, artThrowStringBoundsFromCode
+    jalr $zero, $t9                 # artThrowStringBoundsFromCode(index, limit, Thread*)
+    move $a2, rSELF                 # pass Thread::Current
+END art_quick_throw_string_bounds
+
+    /*
      * Called by managed code to create and deliver a StackOverflowError.
      */
     .extern artThrowStackOverflowFromCode
 ENTRY art_quick_throw_stack_overflow
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowStackOverflowFromCode
     jalr $zero, $t9                 # artThrowStackOverflowFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
@@ -500,7 +878,7 @@
      */
     .extern artThrowNoSuchMethodFromCode
 ENTRY art_quick_throw_no_such_method
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowNoSuchMethodFromCode
     jalr $zero, $t9                 # artThrowNoSuchMethodFromCode(method_idx, Thread*)
     move $a1, rSELF                 # pass Thread::Current
@@ -524,13 +902,13 @@
      */
 .macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  # save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME         # save callee saves in case allocation triggers GC
     move  $a2, rSELF                       # pass Thread::Current
     jal   \cxx_name                        # (method_idx, this, Thread*, $sp)
     move  $a3, $sp                         # pass $sp
     move  $a0, $v0                         # save target Method*
     move  $t9, $v1                         # save $v0->code_
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     beq   $v0, $zero, 1f
     nop
     jalr  $zero, $t9
@@ -819,8 +1197,8 @@
      */
     .extern artHandleFillArrayDataFromCode
 ENTRY art_quick_handle_fill_data
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
-    ld      $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case exception allocation triggers GC
+    ld      $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)         # pass referrer's Method*
     jal     artHandleFillArrayDataFromCode              # (payload offset, Array*, method, Thread*)
     move    $a3, rSELF                                  # pass Thread::Current
     RETURN_IF_ZERO
@@ -833,12 +1211,21 @@
 ENTRY art_quick_lock_object
     beq     $a0, $zero, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME            # save callee saves in case we block
     jal     artLockObjectFromCode         # (Object* obj, Thread*)
     move    $a1, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_lock_object
 
+ENTRY art_quick_lock_object_no_inline
+    beq     $a0, $zero, .Lart_quick_throw_null_pointer_exception_gp_set
+    nop
+    SETUP_SAVE_REFS_ONLY_FRAME            # save callee saves in case we block
+    jal     artLockObjectFromCode         # (Object* obj, Thread*)
+    move    $a1, rSELF                    # pass Thread::Current
+    RETURN_IF_ZERO
+END art_quick_lock_object_no_inline
+
     /*
      * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
      */
@@ -846,12 +1233,21 @@
 ENTRY art_quick_unlock_object
     beq     $a0, $zero, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case exception allocation triggers GC
     jal     artUnlockObjectFromCode    # (Object* obj, Thread*)
     move    $a1, rSELF                 # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_unlock_object
 
+ENTRY art_quick_unlock_object_no_inline
+    beq     $a0, $zero, .Lart_quick_throw_null_pointer_exception_gp_set
+    nop
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case exception allocation triggers GC
+    jal     artUnlockObjectFromCode    # (Object* obj, Thread*)
+    move    $a1, rSELF                 # pass Thread::Current
+    RETURN_IF_ZERO
+END art_quick_unlock_object_no_inline
+
     /*
      * Entry from managed code that calls artCheckCastFromCode and delivers exception on failure.
      */
@@ -879,7 +1275,7 @@
     daddiu $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
     SETUP_GP
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowClassCastException
     jalr $zero, $t9                 # artThrowClassCastException (Class*, Class*, Thread*)
     move $a2, rSELF                 # pass Thread::Current
@@ -1027,7 +1423,7 @@
     SETUP_GP
     bne    $v0, $zero, .Ldo_aput
     nop
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     move   $a1, $a2
     dla  $t9, artThrowArrayStoreException
     jalr $zero, $t9                 # artThrowArrayStoreException(Class*, Class*, Thread*)
@@ -1039,8 +1435,8 @@
      */
     .extern artGetBooleanStaticFromCode
 ENTRY art_quick_get_boolean_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetBooleanStaticFromCode   # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1051,8 +1447,8 @@
      */
     .extern artGetByteStaticFromCode
 ENTRY art_quick_get_byte_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetByteStaticFromCode      # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1063,8 +1459,8 @@
      */
     .extern artGetCharStaticFromCode
 ENTRY art_quick_get_char_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetCharStaticFromCode      # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1075,8 +1471,8 @@
      */
     .extern artGetShortStaticFromCode
 ENTRY art_quick_get_short_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetShortStaticFromCode     # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1087,8 +1483,8 @@
      */
     .extern artGet32StaticFromCode
 ENTRY art_quick_get32_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGet32StaticFromCode        # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1099,8 +1495,8 @@
      */
     .extern artGet64StaticFromCode
 ENTRY art_quick_get64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGet64StaticFromCode        # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1111,8 +1507,8 @@
      */
     .extern artGetObjStaticFromCode
 ENTRY art_quick_get_obj_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetObjStaticFromCode       # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1123,8 +1519,8 @@
      */
     .extern artGetBooleanInstanceFromCode
 ENTRY art_quick_get_boolean_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetBooleanInstanceFromCode # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1135,8 +1531,8 @@
      */
     .extern artGetByteInstanceFromCode
 ENTRY art_quick_get_byte_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetByteInstanceFromCode    # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1147,8 +1543,8 @@
      */
     .extern artGetCharInstanceFromCode
 ENTRY art_quick_get_char_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetCharInstanceFromCode    # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1159,8 +1555,8 @@
      */
     .extern artGetShortInstanceFromCode
 ENTRY art_quick_get_short_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetShortInstanceFromCode   # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1171,8 +1567,8 @@
      */
     .extern artGet32InstanceFromCode
 ENTRY art_quick_get32_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGet32InstanceFromCode      # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1183,8 +1579,8 @@
      */
     .extern artGet64InstanceFromCode
 ENTRY art_quick_get64_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGet64InstanceFromCode      # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1195,8 +1591,8 @@
      */
     .extern artGetObjInstanceFromCode
 ENTRY art_quick_get_obj_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetObjInstanceFromCode     # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1207,8 +1603,8 @@
      */
     .extern artSet8StaticFromCode
 ENTRY art_quick_set8_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet8StaticFromCode         # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1219,8 +1615,8 @@
      */
     .extern artSet16StaticFromCode
 ENTRY art_quick_set16_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet16StaticFromCode        # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1231,8 +1627,8 @@
      */
     .extern artSet32StaticFromCode
 ENTRY art_quick_set32_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet32StaticFromCode        # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1243,9 +1639,9 @@
      */
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
                                          # a2 contains the new val
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet64StaticFromCode        # (field_idx, referrer, new_val, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1256,8 +1652,8 @@
      */
     .extern artSetObjStaticFromCode
 ENTRY art_quick_set_obj_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSetObjStaticFromCode       # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1268,8 +1664,8 @@
      */
     .extern artSet8InstanceFromCode
 ENTRY art_quick_set8_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a3, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet8InstanceFromCode       # (field_idx, Object*, new_val, referrer, Thread*)
     move   $a4, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1280,8 +1676,8 @@
      */
     .extern artSet16InstanceFromCode
 ENTRY art_quick_set16_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a3, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet16InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
     move   $a4, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1292,8 +1688,8 @@
      */
     .extern artSet32InstanceFromCode
 ENTRY art_quick_set32_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a3, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet32InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
     move   $a4, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1304,8 +1700,8 @@
      */
     .extern artSet64InstanceFromCode
 ENTRY art_quick_set64_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a3, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet64InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
     move   $a4, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1316,8 +1712,8 @@
      */
     .extern artSetObjInstanceFromCode
 ENTRY art_quick_set_obj_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a3, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSetObjInstanceFromCode     # (field_idx, Object*, new_val, referrer, Thread*)
     move   $a4, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1327,7 +1723,7 @@
 .macro ONE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case of GC
     jal     \entrypoint
     move    $a1, rSELF                 # pass Thread::Current
     \return
@@ -1338,7 +1734,7 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case of GC
     jal     \entrypoint
     move    $a2, rSELF                 # pass Thread::Current
     \return
@@ -1348,7 +1744,7 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case of GC
     jal     \entrypoint
     move    $a3, rSELF                 # pass Thread::Current
     \return
@@ -1358,7 +1754,7 @@
 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case of GC
     jal     \entrypoint
     move    $a4, rSELF                 # pass Thread::Current
     \return
@@ -1366,7 +1762,109 @@
 .endm
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALL_ALLOC_ENTRYPOINTS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+ENTRY art_quick_alloc_object_rosalloc
+
+    # Fast path rosalloc allocation
+    # a0: type_idx
+    # a1: ArtMethod*
+    # s1: Thread::Current
+    # -----------------------------
+    # t0: class
+    # t1: object size
+    # t2: rosalloc run
+    # t3: thread stack top offset
+    # a4: thread stack bottom offset
+    # v0: free list head
+    #
+    # a5, a6 : temps
+
+    ld     $t0, ART_METHOD_DEX_CACHE_TYPES_OFFSET_64($a1)   # Load dex cache resolved types array.
+
+    dsll   $a5, $a0, COMPRESSED_REFERENCE_SIZE_SHIFT        # Shift the value.
+    daddu  $a5, $t0, $a5                                    # Compute the index.
+    lwu    $t0, 0($a5)                                      # Load class (t0).
+    beqzc  $t0, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    li     $a6, MIRROR_CLASS_STATUS_INITIALIZED
+    lwu    $a5, MIRROR_CLASS_STATUS_OFFSET($t0)             # Check class status.
+    bnec   $a5, $a6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Add a fake dependence from the following access flag and size loads to the status load. This
+    # is to prevent those loads from being reordered above the status load and reading wrong values.
+    xor    $a5, $a5, $a5
+    daddu  $t0, $t0, $a5
+
+    lwu    $a5, MIRROR_CLASS_ACCESS_FLAGS_OFFSET($t0)       # Check if access flags has
+    li     $a6, ACCESS_FLAGS_CLASS_IS_FINALIZABLE           # kAccClassIsFinalizable.
+    and    $a6, $a5, $a6
+    bnezc  $a6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    ld     $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)    # Check if thread local allocation stack
+    ld     $a4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET($s1)    # has any room left.
+    bgeuc  $t3, $a4, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    lwu    $t1, MIRROR_CLASS_OBJECT_SIZE_OFFSET($t0)        # Load object size (t1).
+    li     $a5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE      # Check if size is for a thread local
+                                                            # allocation.
+    bltuc  $a5, $t1, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Compute the rosalloc bracket index from the size. Allign up the size by the rosalloc bracket
+    # quantum size and divide by the quantum size and subtract by 1.
+    daddiu $t1, $t1, -1                                     # Decrease obj size and shift right by
+    dsrl   $t1, $t1, ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT    # quantum.
+
+    dsll   $t2, $t1, POINTER_SIZE_SHIFT
+    daddu  $t2, $t2, $s1
+    ld     $t2, THREAD_ROSALLOC_RUNS_OFFSET($t2)            # Load rosalloc run (t2).
+
+    # Load the free list head (v0).
+    # NOTE: this will be the return val.
+    ld     $v0, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+    beqzc  $v0, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Load the next pointer of the head and update the list head with the next pointer.
+    ld     $a5, ROSALLOC_SLOT_NEXT_OFFSET($v0)
+    sd     $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+
+    # Store the class pointer in the header. This also overwrites the first pointer. The offsets are
+    # asserted to match.
+
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+
+    POISON_HEAP_REF $t0
+    sw     $t0, MIRROR_OBJECT_CLASS_OFFSET($v0)
+
+    # Push the new object onto the thread local allocation stack and increment the thread local
+    # allocation stack top.
+    sd     $v0, 0($t3)
+    daddiu $t3, $t3, COMPRESSED_REFERENCE_SIZE
+    sd     $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)
+
+    # Decrement the size of the free list.
+    lw     $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+    addiu  $a5, $a5, -1
+    sw     $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+
+    sync                                         # Fence.
+
+    jalr   $zero, $ra
+    .cpreturn                                    # Restore gp from t8 in branch delay slot.
+
+.Lart_quick_alloc_object_rosalloc_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME
+    jal    artAllocObjectFromCodeRosAlloc
+    move   $a2 ,$s1                              # Pass self as argument.
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+END art_quick_alloc_object_rosalloc
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
 
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
@@ -1397,17 +1895,19 @@
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
      */
     .extern artTestSuspendFromCode
-ENTRY art_quick_test_suspend
-    lh     $a0, THREAD_FLAGS_OFFSET(rSELF)
-    bne    $a0, $zero, 1f
+ENTRY_NO_GP art_quick_test_suspend
+    lh     rSUSPEND, THREAD_FLAGS_OFFSET(rSELF)
+    bne    rSUSPEND, $zero, 1f
     daddiu rSUSPEND, $zero, SUSPEND_CHECK_INTERVAL   # reset rSUSPEND to SUSPEND_CHECK_INTERVAL
     jalr   $zero, $ra
-    .cpreturn                                 # Restore gp from t8 in branch delay slot.
+    nop
 1:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME         # save callee saves for stack crawl
+    SETUP_SAVE_EVERYTHING_FRAME               # save everything for stack crawl
     jal    artTestSuspendFromCode             # (Thread*)
     move   $a0, rSELF
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_SAVE_EVERYTHING_FRAME
+    jalr   $zero, $ra
+    nop
 END art_quick_test_suspend
 
     /*
@@ -1416,13 +1916,13 @@
      */
     .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
     move    $a2, rSELF             # pass Thread::Current
     jal     artQuickProxyInvokeHandler  # (Method* proxy method, receiver, Thread*, SP)
     move    $a3, $sp               # pass $sp
     ld      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
     daddiu  $sp, $sp, REFS_AND_ARGS_MINUS_REFS_SIZE  # skip a0-a7 and f12-f19
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bne     $t0, $zero, 1f
     dmtc1   $v0, $f0               # place return value to FP return value
     jalr    $zero, $ra
@@ -1432,36 +1932,65 @@
 END art_quick_proxy_invoke_handler
 
     /*
-     * Called to resolve an imt conflict. t0 is a hidden argument that holds the target method's
-     * dex method index.
+     * Called to resolve an imt conflict.
+     * a0 is the conflict ArtMethod.
+     * t0 is a hidden argument that holds the target interface method's dex method index.
+     *
+     * Mote that this stub writes to a0, t0 and t1.
      */
 ENTRY art_quick_imt_conflict_trampoline
-    move    $a0, $t0
+    ld      $t1, 0($sp)                                      # Load referrer.
+    ld      $t1, ART_METHOD_DEX_CACHE_METHODS_OFFSET_64($t1) # Load dex cache methods array.
+    dsll    $t0, $t0, POINTER_SIZE_SHIFT                     # Calculate offset.
+    daddu   $t0, $t1, $t0                                    # Add offset to base.
+    ld      $t0, 0($t0)                                      # Load interface method.
+    ld      $a0, ART_METHOD_JNI_OFFSET_64($a0)               # Load ImtConflictTable.
+
+.Limt_table_iterate:
+    ld      $t1, 0($a0)                                      # Load next entry in ImtConflictTable.
+    # Branch if found.
+    beq     $t1, $t0, .Limt_table_found
+    nop
+    # If the entry is null, the interface method is not in the ImtConflictTable.
+    beqzc   $t1, .Lconflict_trampoline
+    # Iterate over the entries of the ImtConflictTable.
+    daddiu  $a0, $a0, 2 * __SIZEOF_POINTER__                 # Iterate to the next entry.
+    bc       .Limt_table_iterate
+
+.Limt_table_found:
+    # We successfully hit an entry in the table. Load the target method and jump to it.
+    ld      $a0, __SIZEOF_POINTER__($a0)
+    ld      $t9, ART_METHOD_QUICK_CODE_OFFSET_64($a0)
+    jr      $t9
+    .cpreturn                      # Restore gp from t8 in branch delay slot.
+
+.Lconflict_trampoline:
+    # Call the runtime stub to populate the ImtConflictTable and jump to the resolved method.
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
     .extern artQuickResolutionTrampoline
 ENTRY art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     move    $a2, rSELF             # pass Thread::Current
     jal     artQuickResolutionTrampoline  # (Method* called, receiver, Thread*, SP)
     move    $a3, $sp               # pass $sp
     beq     $v0, $zero, 1f
     ld      $a0, 0($sp)            # load resolved method in $a0
                                    # artQuickResolutionTrampoline puts resolved method in *SP
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     move    $t9, $v0               # code pointer must be in $t9 to generate the global pointer
     jalr    $zero, $t9             # tail call to method
     nop
 1:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
     .extern artQuickGenericJniTrampoline
     .extern artQuickGenericJniEndTrampoline
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
     move    $s8, $sp               # save $sp
 
     # prepare for call to artQuickGenericJniTrampoline(Thread*, SP)
@@ -1511,7 +2040,7 @@
     move    $sp, $s8               # tear down the alloca
 
     # tear dpown the callee-save frame
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     jalr    $zero, $ra
     dmtc1   $v0, $f0               # place return value to FP return value
@@ -1524,13 +2053,13 @@
 
     .extern artQuickToInterpreterBridge
 ENTRY art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     move    $a1, rSELF             # pass Thread::Current
     jal     artQuickToInterpreterBridge    # (Method* method, Thread*, SP)
     move    $a2, $sp               # pass $sp
     ld      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
     daddiu  $sp, $sp, REFS_AND_ARGS_MINUS_REFS_SIZE  # skip a0-a7 and f12-f19
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bne     $t0, $zero, 1f
     dmtc1   $v0, $f0               # place return value to FP return value
     jalr    $zero, $ra
@@ -1545,7 +2074,7 @@
     .extern artInstrumentationMethodEntryFromCode
     .extern artInstrumentationMethodExitFromCode
 ENTRY art_quick_instrumentation_entry
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     daddiu   $sp, $sp, -16     # space for saving arg0
     .cfi_adjust_cfa_offset 16
     sd       $a0, 0($sp)       # save arg0
@@ -1556,7 +2085,7 @@
     ld       $a0, 0($sp)       # restore arg0
     daddiu   $sp, $sp, 16      # remove args
     .cfi_adjust_cfa_offset -16
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     jalr     $t9               # call method
     nop
 END art_quick_instrumentation_entry
@@ -1566,7 +2095,7 @@
     .cfi_startproc
     SETUP_GP
     move     $ra, $zero        # link register is to here, so clobber with 0 for later checks
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     move     $t0, $sp          # remember bottom of caller's frame
     daddiu   $sp, $sp, -16     # save return values and set up args
     .cfi_adjust_cfa_offset 16
@@ -1586,8 +2115,9 @@
     ld       $v0, 0($sp)       # restore return values
     l.d      $f0, 8($sp)
     jalr     $zero, $t9        # return
-    daddiu   $sp, $sp, 16+FRAME_SIZE_REFS_ONLY_CALLEE_SAVE  # 16 bytes of saved values + ref_only callee save frame
-    .cfi_adjust_cfa_offset -(16+FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
+    # restore stack, 16 bytes of saved values + ref_only callee save frame
+    daddiu   $sp, $sp, 16+FRAME_SIZE_SAVE_REFS_ONLY
+    .cfi_adjust_cfa_offset -(16+FRAME_SIZE_SAVE_REFS_ONLY)
 END art_quick_instrumentation_exit
 
     /*
@@ -1597,7 +2127,7 @@
     .extern artDeoptimize
     .extern artEnterInterpreterFromDeoptimize
 ENTRY art_quick_deoptimize
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     jal      artDeoptimize     # artDeoptimize(Thread*, SP)
                                # Returns caller method's frame size.
     move     $a0, rSELF        # pass Thread::current
@@ -1609,7 +2139,7 @@
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     jal      artDeoptimizeFromCompiledCode    # artDeoptimizeFromCompiledCode(Thread*, SP)
                                               # Returns caller method's frame size.
     move     $a0, rSELF                       # pass Thread::current
@@ -1626,10 +2156,8 @@
 
   lw     $a2,MIRROR_STRING_COUNT_OFFSET($a0)    # this.length()
   lw     $a3,MIRROR_STRING_COUNT_OFFSET($a1)    # anotherString.length()
-  sltu   $at,$a2,$a3
-  seleqz $t2,$a3,$at
-  selnez $at,$a2,$at
-  or     $t2,$t2,$at    # $t2 now holds min(this.length(),anotherString.length())
+  MINu   $t2, $a2, $a3
+# $t2 now holds min(this.length(),anotherString.length())
 
   beqz   $t2,9f         # while min(this.length(),anotherString.length())-i != 0
   subu   $v0,$a2,$a3    # if $t2==0 return
@@ -1654,16 +2182,18 @@
 /* java.lang.String.indexOf(int ch, int fromIndex=0) */
 ENTRY_NO_GP art_quick_indexof
 /* $a0 holds address of "this" */
-/* $a1 holds address of "ch" */
-/* $a2 holds address of "fromIndex" */
+/* $a1 holds "ch" */
+/* $a2 holds "fromIndex" */
   lw    $t0,MIRROR_STRING_COUNT_OFFSET($a0)     # this.length()
-  subu  $t0,$t0,$a2     # this.length() - offset
-  blez  $t0,6f          # if this.length()-offset <= 0
+  slt   $at, $a2, $zero # if fromIndex < 0
+  seleqz $a2, $a2, $at  #     fromIndex = 0;
+  subu  $t0,$t0,$a2     # this.length() - fromIndex
+  blez  $t0,6f          # if this.length()-fromIndex <= 0
   li    $v0,-1          #     return -1;
 
   sll   $v0,$a2,1       # $a0 += $a2 * 2
   daddu $a0,$a0,$v0     #  "  "   "  " "
-  move  $v0,$a2         # Set i to offset.
+  move  $v0,$a2         # Set i to fromIndex.
 
 1:
   lhu   $t3,MIRROR_STRING_VALUE_OFFSET($a0)     # if this.charAt(i) == ch
diff --git a/runtime/arch/mips64/quick_method_frame_info_mips64.h b/runtime/arch/mips64/quick_method_frame_info_mips64.h
index f967be0..397776e 100644
--- a/runtime/arch/mips64/quick_method_frame_info_mips64.h
+++ b/runtime/arch/mips64/quick_method_frame_info_mips64.h
@@ -25,6 +25,8 @@
 namespace art {
 namespace mips64 {
 
+static constexpr uint32_t kMips64CalleeSaveAlwaysSpills =
+    (1 << art::mips64::RA);
 static constexpr uint32_t kMips64CalleeSaveRefSpills =
     (1 << art::mips64::S2) | (1 << art::mips64::S3) | (1 << art::mips64::S4) |
     (1 << art::mips64::S5) | (1 << art::mips64::S6) | (1 << art::mips64::S7) |
@@ -35,6 +37,14 @@
     (1 << art::mips64::A7);
 static constexpr uint32_t kMips64CalleeSaveAllSpills =
     (1 << art::mips64::S0) | (1 << art::mips64::S1);
+static constexpr uint32_t kMips64CalleeSaveEverythingSpills =
+    (1 << art::mips64::AT) | (1 << art::mips64::V0) | (1 << art::mips64::V1) |
+    (1 << art::mips64::A0) | (1 << art::mips64::A1) | (1 << art::mips64::A2) |
+    (1 << art::mips64::A3) | (1 << art::mips64::A4) | (1 << art::mips64::A5) |
+    (1 << art::mips64::A6) | (1 << art::mips64::A7) | (1 << art::mips64::T0) |
+    (1 << art::mips64::T1) | (1 << art::mips64::T2) | (1 << art::mips64::T3) |
+    (1 << art::mips64::S0) | (1 << art::mips64::S1) | (1 << art::mips64::T8) |
+    (1 << art::mips64::T9);
 
 static constexpr uint32_t kMips64CalleeSaveFpRefSpills = 0;
 static constexpr uint32_t kMips64CalleeSaveFpArgSpills =
@@ -46,23 +56,37 @@
     (1 << art::mips64::F24) | (1 << art::mips64::F25) | (1 << art::mips64::F26) |
     (1 << art::mips64::F27) | (1 << art::mips64::F28) | (1 << art::mips64::F29) |
     (1 << art::mips64::F30) | (1 << art::mips64::F31);
+static constexpr uint32_t kMips64CalleeSaveFpEverythingSpills =
+    (1 << art::mips64::F0) | (1 << art::mips64::F1) | (1 << art::mips64::F2) |
+    (1 << art::mips64::F3) | (1 << art::mips64::F4) | (1 << art::mips64::F5) |
+    (1 << art::mips64::F6) | (1 << art::mips64::F7) | (1 << art::mips64::F8) |
+    (1 << art::mips64::F9) | (1 << art::mips64::F10) | (1 << art::mips64::F11) |
+    (1 << art::mips64::F12) | (1 << art::mips64::F13) | (1 << art::mips64::F14) |
+    (1 << art::mips64::F15) | (1 << art::mips64::F16) | (1 << art::mips64::F17) |
+    (1 << art::mips64::F18) | (1 << art::mips64::F19) | (1 << art::mips64::F20) |
+    (1 << art::mips64::F21) | (1 << art::mips64::F22) | (1 << art::mips64::F23) |
+    (1 << art::mips64::F24) | (1 << art::mips64::F25) | (1 << art::mips64::F26) |
+    (1 << art::mips64::F27) | (1 << art::mips64::F28) | (1 << art::mips64::F29) |
+    (1 << art::mips64::F30) | (1 << art::mips64::F31);
 
 constexpr uint32_t Mips64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
-  return kMips64CalleeSaveRefSpills |
-      (type == Runtime::kRefsAndArgs ? kMips64CalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kMips64CalleeSaveAllSpills : 0) | (1 << art::mips64::RA);
+  return kMips64CalleeSaveAlwaysSpills | kMips64CalleeSaveRefSpills |
+      (type == Runtime::kSaveRefsAndArgs ? kMips64CalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kMips64CalleeSaveAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kMips64CalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t Mips64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
   return kMips64CalleeSaveFpRefSpills |
-      (type == Runtime::kRefsAndArgs ? kMips64CalleeSaveFpArgSpills: 0) |
-      (type == Runtime::kSaveAll ? kMips64CalleeSaveFpAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kMips64CalleeSaveFpArgSpills: 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kMips64CalleeSaveFpAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kMips64CalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t Mips64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
   return RoundUp((POPCOUNT(Mips64CalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(Mips64CalleeSaveFpSpills(type))   /* fprs */ +
-                  + 1 /* Method* */) * kMips64PointerSize, kStackAlignment);
+                  + 1 /* Method* */) * static_cast<size_t>(kMips64PointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo Mips64CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/mips64/registers_mips64.h b/runtime/arch/mips64/registers_mips64.h
index 1d07d47..81fae72 100644
--- a/runtime/arch/mips64/registers_mips64.h
+++ b/runtime/arch/mips64/registers_mips64.h
@@ -61,6 +61,7 @@
   RA   = 31,  // Return address.
   TR   = S1,  // ART Thread Register
   TMP  = T8,  // scratch register (in addition to AT)
+  TMP2 = T3,  // scratch register (in addition to AT, reserved for assembler)
   kNumberOfGpuRegisters = 32,
   kNoGpuRegister = -1  // Signals an illegal register.
 };
@@ -100,6 +101,7 @@
   F29 = 29,
   F30 = 30,
   F31 = 31,
+  FTMP = F8,  // scratch register
   kNumberOfFpuRegisters = 32,
   kNoFpuRegister = -1,
 };
diff --git a/runtime/arch/mips64/thread_mips64.cc b/runtime/arch/mips64/thread_mips64.cc
index c55537c..3ce5e50 100644
--- a/runtime/arch/mips64/thread_mips64.cc
+++ b/runtime/arch/mips64/thread_mips64.cc
@@ -17,14 +17,15 @@
 #include "thread.h"
 
 #include "asm_support_mips64.h"
+#include "base/enums.h"
 #include "base/logging.h"
 
 namespace art {
 
 void Thread::InitCpu() {
-  CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<8>().Int32Value());
-  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<8>().Int32Value());
-  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<8>().Int32Value());
+  CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<PointerSize::k64>().Int32Value());
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<PointerSize::k64>().Int32Value());
+  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<PointerSize::k64>().Int32Value());
 }
 
 void Thread::CleanupCpu() {
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index fbacdbc..fa86bf4 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -87,6 +87,27 @@
   ONE_ARG_DOWNCALL art_quick_alloc_string_from_string ## c_suffix, artAllocStringFromStringFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_REGION_TLAB_ALLOCATOR
+.endm
+
+.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_REGION_TLAB_ALLOCATOR
+// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
+.endm
+
+.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
@@ -219,19 +240,6 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented)
 
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
-
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab_instrumented, RegionTLABInstrumented)
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 1d10e5d..10adb3a 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -18,9 +18,11 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
+#include "linear_alloc.h"
 #include "mirror/class-inl.h"
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change.h"
@@ -175,12 +177,16 @@
 #elif defined(__aarch64__)
     __asm__ __volatile__(
         // Spill x0-x7 which we say we don't clobber. May contain args.
-        "sub sp, sp, #64\n\t"
-        ".cfi_adjust_cfa_offset 64\n\t"
+        "sub sp, sp, #80\n\t"
+        ".cfi_adjust_cfa_offset 80\n\t"
         "stp x0, x1, [sp]\n\t"
         "stp x2, x3, [sp, #16]\n\t"
         "stp x4, x5, [sp, #32]\n\t"
         "stp x6, x7, [sp, #48]\n\t"
+        // To be extra defensive, store x20. We do this because some of the stubs might make a
+        // transition into the runtime via the blr instruction below and *not* save x20.
+        "str x20, [sp, #64]\n\t"
+        // 8 byte buffer
 
         "sub sp, sp, #16\n\t"          // Reserve stack space, 16B aligned
         ".cfi_adjust_cfa_offset 16\n\t"
@@ -279,8 +285,9 @@
         "ldp x2, x3, [sp, #16]\n\t"
         "ldp x4, x5, [sp, #32]\n\t"
         "ldp x6, x7, [sp, #48]\n\t"
-        "add sp, sp, #64\n\t"         // Free stack space, now sp as on entry
-        ".cfi_adjust_cfa_offset -64\n\t"
+        "ldr x20, [sp, #64]\n\t"
+        "add sp, sp, #80\n\t"         // Free stack space, now sp as on entry
+        ".cfi_adjust_cfa_offset -80\n\t"
 
         "str x9, %[fpr_result]\n\t"   // Store the FPR comparison result
         "mov %[result], x8\n\t"              // Store the call result
@@ -298,13 +305,17 @@
           // Use the result from r0
         : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
           [referrer] "r"(referrer), [hidden] "r"(hidden), [fpr_result] "m" (fpr_result)
-        : "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20",
+          // Leave one register unclobbered, which is needed for compiling with
+          // -fstack-protector-strong. According to AAPCS64 registers x9-x15 are caller-saved,
+          // which means we should unclobber one of the callee-saved registers that are unused.
+          // Here we use x20.
+        : "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19",
           "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x30",
           "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
           "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
           "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
-          "memory");  // clobber.
+          "memory");
 #elif defined(__mips__) && !defined(__LP64__)
     __asm__ __volatile__ (
         // Spill a0-a3 and t0-t7 which we say we don't clobber. May contain args.
@@ -429,11 +440,13 @@
         : [result] "=r" (result)
         : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
           [referrer] "r"(referrer), [hidden] "r"(hidden)
-        : "at", "v0", "v1", "t0", "t1", "t2", "t3", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
-          "t8", "t9", "k0", "k1", "fp", "ra",
-          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
-          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
-          "f27", "f28", "f29", "f30", "f31",
+        // Instead aliases t0-t3, register names $12-$15 has been used in the clobber list because
+        // t0-t3 are ambiguous.
+        : "at", "v0", "v1", "$12", "$13", "$14", "$15", "s0", "s1", "s2", "s3", "s4", "s5", "s6",
+          "s7", "t8", "t9", "k0", "k1", "fp", "ra",
+          "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
+          "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22",
+          "$f23", "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",
           "memory");  // clobber.
 #elif defined(__x86_64__) && !defined(__APPLE__)
 #define PUSH(reg) "pushq " # reg "\n\t .cfi_adjust_cfa_offset 8\n\t"
@@ -517,11 +530,7 @@
 
   static uintptr_t GetEntrypoint(Thread* self, QuickEntrypointEnum entrypoint) {
     int32_t offset;
-#ifdef __LP64__
-    offset = GetThreadOffset<8>(entrypoint).Int32Value();
-#else
-    offset = GetThreadOffset<4>(entrypoint).Int32Value();
-#endif
+    offset = GetThreadOffset<kRuntimePointerSize>(entrypoint).Int32Value();
     return *reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(self) + offset);
   }
 
@@ -1004,7 +1013,7 @@
     // Use an arbitrary method from c to use as referrer
     size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
                             // arbitrary
-                            reinterpret_cast<size_t>(c->GetVirtualMethod(0, sizeof(void*))),
+                            reinterpret_cast<size_t>(c->GetVirtualMethod(0, kRuntimePointerSize)),
                             0U,
                             StubTest::GetEntrypoint(self, kQuickAllocObject),
                             self);
@@ -1135,12 +1144,13 @@
 
   if ((false)) {
     // Use an arbitrary method from c to use as referrer
-    size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
-                            10U,
-                            // arbitrary
-                            reinterpret_cast<size_t>(c_obj->GetVirtualMethod(0, sizeof(void*))),
-                            StubTest::GetEntrypoint(self, kQuickAllocArray),
-                            self);
+    size_t result = Invoke3(
+        static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
+        10U,
+        // arbitrary
+        reinterpret_cast<size_t>(c_obj->GetVirtualMethod(0, kRuntimePointerSize)),
+        StubTest::GetEntrypoint(self, kQuickAllocArray),
+        self);
 
     EXPECT_FALSE(self->IsExceptionPending());
     EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
@@ -1193,7 +1203,10 @@
 
 
 TEST_F(StubTest, StringCompareTo) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+  TEST_DISABLED_FOR_STRING_COMPRESSION();
+  // There is no StringCompareTo runtime entrypoint for __arm__ or __aarch64__.
+#if defined(__i386__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
   Thread* self = Thread::Current();
@@ -1785,7 +1798,7 @@
   Handle<mirror::Object> obj(hs.NewHandle(soa.Decode<mirror::Object*>(o)));
   Handle<mirror::Class> c(hs.NewHandle(obj->GetClass()));
   // Need a method as a referrer
-  ArtMethod* m = c->GetDirectMethod(0, sizeof(void*));
+  ArtMethod* m = c->GetDirectMethod(0, kRuntimePointerSize);
 
   // Play with it...
 
@@ -1921,7 +1934,12 @@
   TestFields(self, this, Primitive::Type::kPrimLong);
 }
 
-TEST_F(StubTest, IMT) {
+// Disabled, b/27991555 .
+// FIXME: Hacking the entry point to point to art_quick_to_interpreter_bridge is broken.
+// The bridge calls through to GetCalleeSaveMethodCaller() which looks up the pre-header
+// and gets a bogus OatQuickMethodHeader* pointing into our assembly code just before
+// the bridge and uses that to check for inlined frames, crashing in the process.
+TEST_F(StubTest, DISABLED_IMT) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
     (defined(__x86_64__) && !defined(__APPLE__))
   Thread* self = Thread::Current();
@@ -1988,11 +2006,26 @@
 
   // Contains.
 
+  // We construct the ImtConflictTable ourselves, as we cannot go into the runtime stub
+  // that will create it: the runtime stub expects to be called by compiled code.
+  LinearAlloc* linear_alloc = Runtime::Current()->GetLinearAlloc();
+  ArtMethod* conflict_method = Runtime::Current()->CreateImtConflictMethod(linear_alloc);
+  ImtConflictTable* empty_conflict_table =
+      Runtime::Current()->GetClassLinker()->CreateImtConflictTable(/*count*/0u, linear_alloc);
+  void* data = linear_alloc->Alloc(
+      self,
+      ImtConflictTable::ComputeSizeWithOneMoreEntry(empty_conflict_table, kRuntimePointerSize));
+  ImtConflictTable* new_table = new (data) ImtConflictTable(
+      empty_conflict_table, inf_contains, contains_amethod, kRuntimePointerSize);
+  conflict_method->SetImtConflictTable(new_table, kRuntimePointerSize);
+
   size_t result =
-      Invoke3WithReferrerAndHidden(0U, reinterpret_cast<size_t>(array_list.Get()),
+      Invoke3WithReferrerAndHidden(reinterpret_cast<size_t>(conflict_method),
+                                   reinterpret_cast<size_t>(array_list.Get()),
                                    reinterpret_cast<size_t>(obj.Get()),
                                    StubTest::GetEntrypoint(self, kQuickQuickImtConflictTrampoline),
-                                   self, contains_amethod,
+                                   self,
+                                   contains_amethod,
                                    static_cast<size_t>(inf_contains->GetDexMethodIndex()));
 
   ASSERT_FALSE(self->IsExceptionPending());
@@ -2006,10 +2039,14 @@
 
   // Contains.
 
-  result = Invoke3WithReferrerAndHidden(
-      0U, reinterpret_cast<size_t>(array_list.Get()), reinterpret_cast<size_t>(obj.Get()),
-      StubTest::GetEntrypoint(self, kQuickQuickImtConflictTrampoline), self, contains_amethod,
-      static_cast<size_t>(inf_contains->GetDexMethodIndex()));
+  result =
+      Invoke3WithReferrerAndHidden(reinterpret_cast<size_t>(conflict_method),
+                                   reinterpret_cast<size_t>(array_list.Get()),
+                                   reinterpret_cast<size_t>(obj.Get()),
+                                   StubTest::GetEntrypoint(self, kQuickQuickImtConflictTrampoline),
+                                   self,
+                                   contains_amethod,
+                                   static_cast<size_t>(inf_contains->GetDexMethodIndex()));
 
   ASSERT_FALSE(self->IsExceptionPending());
   EXPECT_EQ(static_cast<size_t>(JNI_TRUE), result);
@@ -2042,7 +2079,7 @@
 }
 
 TEST_F(StubTest, StringIndexOf) {
-#if defined(__arm__) || defined(__aarch64__)
+#if defined(__arm__) || defined(__aarch64__) || defined(__mips__)
   Thread* self = Thread::Current();
   ScopedObjectAccess soa(self);
   // garbage is created during ClassLinker::Init
@@ -2115,6 +2152,8 @@
 #endif
 }
 
+// TODO: Exercise the ReadBarrierMarkRegX entry points.
+
 TEST_F(StubTest, ReadBarrier) {
 #if defined(ART_USE_READ_BARRIER) && (defined(__i386__) || defined(__arm__) || \
       defined(__aarch64__) || defined(__mips__) || (defined(__x86_64__) && !defined(__APPLE__)))
@@ -2151,4 +2190,39 @@
 #endif
 }
 
+TEST_F(StubTest, ReadBarrierForRoot) {
+#if defined(ART_USE_READ_BARRIER) && (defined(__i386__) || defined(__arm__) || \
+      defined(__aarch64__) || defined(__mips__) || (defined(__x86_64__) && !defined(__APPLE__)))
+  Thread* self = Thread::Current();
+
+  const uintptr_t readBarrierForRootSlow =
+      StubTest::GetEntrypoint(self, kQuickReadBarrierForRootSlow);
+
+  // Create an object
+  ScopedObjectAccess soa(self);
+  // garbage is created during ClassLinker::Init
+
+  StackHandleScope<1> hs(soa.Self());
+
+  Handle<mirror::String> obj(
+      hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello, world!")));
+
+  EXPECT_FALSE(self->IsExceptionPending());
+
+  GcRoot<mirror::Class>& root = mirror::String::java_lang_String_;
+  size_t result = Invoke3(reinterpret_cast<size_t>(&root), 0U, 0U, readBarrierForRootSlow, self);
+
+  EXPECT_FALSE(self->IsExceptionPending());
+  EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
+  mirror::Class* klass = reinterpret_cast<mirror::Class*>(result);
+  EXPECT_EQ(klass, obj->GetClass());
+
+  // Tests done.
+#else
+  LOG(INFO) << "Skipping read_barrier_for_root_slow";
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping read_barrier_for_root_slow" << std::endl;
+#endif
+}
+
 }  // namespace art
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index 77b8e87..3e47209 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -142,6 +142,10 @@
     CFI_RESTORE(REG_VAR(reg))
 END_MACRO
 
+MACRO1(CFI_RESTORE_REG, reg)
+    CFI_RESTORE(REG_VAR(reg))
+END_MACRO
+
 #define UNREACHABLE int3
 
 MACRO1(UNIMPLEMENTED,name)
diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h
index b0a6017..2bba08d 100644
--- a/runtime/arch/x86/asm_support_x86.h
+++ b/runtime/arch/x86/asm_support_x86.h
@@ -19,10 +19,9 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 32
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
-
-// 32 bytes for GPRs and 32 bytes for FPRs.
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE (32 + 32)
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 32
+#define FRAME_SIZE_SAVE_REFS_ONLY 32
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS (32 + 32)
+#define FRAME_SIZE_SAVE_EVERYTHING (48 + 64)
 
 #endif  // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 019546f..bdf11da 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -17,6 +17,7 @@
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
+#include "entrypoints/quick/quick_default_init_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "interpreter/interpreter.h"
@@ -24,73 +25,49 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
-                                            const mirror::Class* ref_class);
+extern "C" size_t art_quick_is_assignable(const mirror::Class* klass,
+                                          const mirror::Class* ref_class);
 
 // Read barrier entrypoints.
+// art_quick_read_barrier_mark_regX uses an non-standard calling
+// convention: it expects its input in register X and returns its
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
+extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
-  // JNI
-  jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
-
-  // Alloc
-  ResetQuickAllocEntryPoints(qpoints);
+  DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = art_quick_is_assignable;
   qpoints->pCheckCast = art_quick_check_cast;
 
-  // DexCache
-  qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
-  qpoints->pInitializeTypeAndVerifyAccess = art_quick_initialize_type_and_verify_access;
-  qpoints->pInitializeType = art_quick_initialize_type;
-  qpoints->pResolveString = art_quick_resolve_string;
-
-  // Field
-  qpoints->pSet8Instance = art_quick_set8_instance;
-  qpoints->pSet8Static = art_quick_set8_static;
-  qpoints->pSet16Instance = art_quick_set16_instance;
-  qpoints->pSet16Static = art_quick_set16_static;
-  qpoints->pSet32Instance = art_quick_set32_instance;
-  qpoints->pSet32Static = art_quick_set32_static;
-  qpoints->pSet64Instance = art_quick_set64_instance;
-  qpoints->pSet64Static = art_quick_set64_static;
-  qpoints->pSetObjInstance = art_quick_set_obj_instance;
-  qpoints->pSetObjStatic = art_quick_set_obj_static;
-  qpoints->pGetByteInstance = art_quick_get_byte_instance;
-  qpoints->pGetBooleanInstance = art_quick_get_boolean_instance;
-  qpoints->pGetShortInstance = art_quick_get_short_instance;
-  qpoints->pGetCharInstance = art_quick_get_char_instance;
-  qpoints->pGet32Instance = art_quick_get32_instance;
-  qpoints->pGet64Instance = art_quick_get64_instance;
-  qpoints->pGetObjInstance = art_quick_get_obj_instance;
-  qpoints->pGetByteStatic = art_quick_get_byte_static;
-  qpoints->pGetBooleanStatic = art_quick_get_boolean_static;
-  qpoints->pGetShortStatic = art_quick_get_short_static;
-  qpoints->pGetCharStatic = art_quick_get_char_static;
-  qpoints->pGet32Static = art_quick_get32_static;
-  qpoints->pGet64Static = art_quick_get64_static;
-  qpoints->pGetObjStatic = art_quick_get_obj_static;
-
-  // Array
-  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
-  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
-  qpoints->pAputObject = art_quick_aput_obj;
-  qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
-
-  // JNI
-  qpoints->pJniMethodStart = JniMethodStart;
-  qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
-  qpoints->pJniMethodEnd = JniMethodEnd;
-  qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
-  qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
-  qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
-  qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
-
-  // Locks
-  qpoints->pLockObject = art_quick_lock_object;
-  qpoints->pUnlockObject = art_quick_unlock_object;
+  // More math.
+  qpoints->pCos = cos;
+  qpoints->pSin = sin;
+  qpoints->pAcos = acos;
+  qpoints->pAsin = asin;
+  qpoints->pAtan = atan;
+  qpoints->pAtan2 = atan2;
+  qpoints->pCbrt = cbrt;
+  qpoints->pCosh = cosh;
+  qpoints->pExp = exp;
+  qpoints->pExpm1 = expm1;
+  qpoints->pHypot = hypot;
+  qpoints->pLog = log;
+  qpoints->pLog10 = log10;
+  qpoints->pNextAfter = nextafter;
+  qpoints->pSinh = sinh;
+  qpoints->pTan = tan;
+  qpoints->pTanh = tanh;
 
   // Math
   qpoints->pD2l = art_quick_d2l;
@@ -107,38 +84,41 @@
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = art_quick_memcpy;
 
-  // Invocation
-  qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
-  qpoints->pQuickResolutionTrampoline = art_quick_resolution_trampoline;
-  qpoints->pQuickToInterpreterBridge = art_quick_to_interpreter_bridge;
-  qpoints->pInvokeDirectTrampolineWithAccessCheck =
-      art_quick_invoke_direct_trampoline_with_access_check;
-  qpoints->pInvokeInterfaceTrampolineWithAccessCheck =
-      art_quick_invoke_interface_trampoline_with_access_check;
-  qpoints->pInvokeStaticTrampolineWithAccessCheck =
-      art_quick_invoke_static_trampoline_with_access_check;
-  qpoints->pInvokeSuperTrampolineWithAccessCheck =
-      art_quick_invoke_super_trampoline_with_access_check;
-  qpoints->pInvokeVirtualTrampolineWithAccessCheck =
-      art_quick_invoke_virtual_trampoline_with_access_check;
-
-  // Thread
-  qpoints->pTestSuspend = art_quick_test_suspend;
-
-  // Throws
-  qpoints->pDeliverException = art_quick_deliver_exception;
-  qpoints->pThrowArrayBounds = art_quick_throw_array_bounds;
-  qpoints->pThrowDivZero = art_quick_throw_div_zero;
-  qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
-  qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
-  qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
-
-  // Deoptimize
-  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
-
-  // Read barrier
+  // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
+  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
+  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
+  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  qpoints->pReadBarrierMarkReg04 = nullptr;  // Cannot use register 4 (ESP) to pass arguments.
+  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
+  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
+  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
+  // x86 has only 8 core registers.
+  qpoints->pReadBarrierMarkReg08 = nullptr;
+  qpoints->pReadBarrierMarkReg09 = nullptr;
+  qpoints->pReadBarrierMarkReg10 = nullptr;
+  qpoints->pReadBarrierMarkReg11 = nullptr;
+  qpoints->pReadBarrierMarkReg12 = nullptr;
+  qpoints->pReadBarrierMarkReg13 = nullptr;
+  qpoints->pReadBarrierMarkReg14 = nullptr;
+  qpoints->pReadBarrierMarkReg15 = nullptr;
+  qpoints->pReadBarrierMarkReg16 = nullptr;
+  qpoints->pReadBarrierMarkReg17 = nullptr;
+  qpoints->pReadBarrierMarkReg18 = nullptr;
+  qpoints->pReadBarrierMarkReg19 = nullptr;
+  qpoints->pReadBarrierMarkReg20 = nullptr;
+  qpoints->pReadBarrierMarkReg21 = nullptr;
+  qpoints->pReadBarrierMarkReg22 = nullptr;
+  qpoints->pReadBarrierMarkReg23 = nullptr;
+  qpoints->pReadBarrierMarkReg24 = nullptr;
+  qpoints->pReadBarrierMarkReg25 = nullptr;
+  qpoints->pReadBarrierMarkReg26 = nullptr;
+  qpoints->pReadBarrierMarkReg27 = nullptr;
+  qpoints->pReadBarrierMarkReg28 = nullptr;
+  qpoints->pReadBarrierMarkReg29 = nullptr;
   qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
+  qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow;
 };
 
 }  // namespace art
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index d7c4cb1..3efeb40 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -20,6 +20,7 @@
 #include <sys/ucontext.h>
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "globals.h"
 #include "base/logging.h"
@@ -36,6 +37,7 @@
 #define CTX_EIP uc_mcontext->__ss.__rip
 #define CTX_EAX uc_mcontext->__ss.__rax
 #define CTX_METHOD uc_mcontext->__ss.__rdi
+#define CTX_RDI uc_mcontext->__ss.__rdi
 #define CTX_JMP_BUF uc_mcontext->__ss.__rdi
 #else
 // 32 bit mac build.
@@ -69,18 +71,9 @@
 
 namespace art {
 
-#if defined(__APPLE__) && defined(__x86_64__)
-// mac symbols have a prefix of _ on x86_64
-extern "C" void _art_quick_throw_null_pointer_exception();
-extern "C" void _art_quick_throw_stack_overflow();
-extern "C" void _art_quick_test_suspend();
-#define EXT_SYM(sym) _ ## sym
-#else
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_test_suspend();
-#define EXT_SYM(sym) sym
-#endif
 
 // Note this is different from the others (no underscore on 64 bit mac) due to
 // the way the symbol is defined in the .S file.
@@ -292,7 +285,10 @@
   *out_return_pc = reinterpret_cast<uintptr_t>(pc + instr_size);
 }
 
-bool NullPointerHandler::Action(int, siginfo_t*, void* context) {
+bool NullPointerHandler::Action(int, siginfo_t* sig, void* context) {
+  if (!IsValidImplicitCheck(sig)) {
+    return false;
+  }
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   uint8_t* pc = reinterpret_cast<uint8_t*>(uc->CTX_EIP);
   uint8_t* sp = reinterpret_cast<uint8_t*>(uc->CTX_ESP);
@@ -314,7 +310,15 @@
   *next_sp = retaddr;
   uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
-  uc->CTX_EIP = reinterpret_cast<uintptr_t>(EXT_SYM(art_quick_throw_null_pointer_exception));
+  uc->CTX_EIP = reinterpret_cast<uintptr_t>(
+      art_quick_throw_null_pointer_exception_from_signal);
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+#if defined(__x86_64__)
+  uc->CTX_RDI = reinterpret_cast<uintptr_t>(sig->si_addr);
+#else
+  uc->CTX_EAX = reinterpret_cast<uintptr_t>(sig->si_addr);
+#endif
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
@@ -335,11 +339,7 @@
 bool SuspensionHandler::Action(int, siginfo_t*, void* context) {
   // These are the instructions to check for.  The first one is the mov eax, fs:[xxx]
   // where xxx is the offset of the suspend trigger.
-#if defined(__x86_64__)
-  uint32_t trigger = Thread::ThreadSuspendTriggerOffset<8>().Int32Value();
-#else
-  uint32_t trigger = Thread::ThreadSuspendTriggerOffset<4>().Int32Value();
-#endif
+  uint32_t trigger = Thread::ThreadSuspendTriggerOffset<kRuntimePointerSize>().Int32Value();
 
   VLOG(signals) << "Checking for suspension point";
 #if defined(__x86_64__)
@@ -388,7 +388,7 @@
     *next_sp = retaddr;
     uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
-    uc->CTX_EIP = reinterpret_cast<uintptr_t>(EXT_SYM(art_quick_test_suspend));
+    uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_test_suspend);
 
     // Now remove the suspend trigger that caused this fault.
     Thread::Current()->RemoveSuspendTrigger();
@@ -434,7 +434,7 @@
   // the previous frame.
 
   // Now arrange for the signal handler to return to art_quick_throw_stack_overflow.
-  uc->CTX_EIP = reinterpret_cast<uintptr_t>(EXT_SYM(art_quick_throw_stack_overflow));
+  uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
 
   return true;
 }
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index ef39999..0093e82 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -45,6 +45,10 @@
     "silvermont",
 };
 
+static constexpr const char* x86_variants_with_popcnt[] = {
+    "silvermont",
+};
+
 const X86InstructionSetFeatures* X86InstructionSetFeatures::FromVariant(
     const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED,
     bool x86_64) {
@@ -60,6 +64,11 @@
   bool has_AVX = false;
   bool has_AVX2 = false;
 
+  bool has_POPCNT = FindVariantInArray(x86_variants_with_popcnt,
+                                       arraysize(x86_variants_with_popcnt),
+                                       variant);
+
+  // Verify that variant is known.
   bool known_variant = FindVariantInArray(x86_known_variants, arraysize(x86_known_variants),
                                           variant);
   if (!known_variant && variant != "default") {
@@ -68,10 +77,10 @@
 
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                            has_AVX2, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                            has_AVX2, has_POPCNT);
   }
 }
 
@@ -83,11 +92,13 @@
   bool has_SSE4_2 = (bitmap & kSse4_2Bitfield) != 0;
   bool has_AVX = (bitmap & kAvxBitfield) != 0;
   bool has_AVX2 = (bitmap & kAvxBitfield) != 0;
+  bool has_POPCNT = (bitmap & kPopCntBitfield) != 0;
   if (x86_64) {
-    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2);
+    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
+                                            has_AVX, has_AVX2, has_POPCNT);
   } else {
-    return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+    return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
+                                         has_AVX, has_AVX2, has_POPCNT);
   }
 }
 
@@ -124,11 +135,18 @@
   const bool has_AVX2 = true;
 #endif
 
+#ifndef __POPCNT__
+  const bool has_POPCNT = false;
+#else
+  const bool has_POPCNT = true;
+#endif
+
   if (x86_64) {
-    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2);
+    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+                                            has_AVX2, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                         has_AVX2, has_POPCNT);
   }
 }
 
@@ -141,6 +159,7 @@
   bool has_SSE4_2 = false;
   bool has_AVX = false;
   bool has_AVX2 = false;
+  bool has_POPCNT = false;
 
   std::ifstream in("/proc/cpuinfo");
   if (!in.fail()) {
@@ -166,6 +185,9 @@
           if (line.find("avx2") != std::string::npos) {
             has_AVX2 = true;
           }
+          if (line.find("popcnt") != std::string::npos) {
+            has_POPCNT = true;
+          }
         } else if (line.find("processor") != std::string::npos &&
             line.find(": 1") != std::string::npos) {
           smp = true;
@@ -177,10 +199,11 @@
     LOG(ERROR) << "Failed to open /proc/cpuinfo";
   }
   if (x86_64) {
-    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2);
+    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+                                            has_AVX2, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                         has_AVX2, has_POPCNT);
   }
 }
 
@@ -204,7 +227,8 @@
       (has_SSE4_1_ == other_as_x86->has_SSE4_1_) &&
       (has_SSE4_2_ == other_as_x86->has_SSE4_2_) &&
       (has_AVX_ == other_as_x86->has_AVX_) &&
-      (has_AVX2_ == other_as_x86->has_AVX2_);
+      (has_AVX2_ == other_as_x86->has_AVX2_) &&
+      (has_POPCNT_ == other_as_x86->has_POPCNT_);
 }
 
 uint32_t X86InstructionSetFeatures::AsBitmap() const {
@@ -213,7 +237,8 @@
       (has_SSE4_1_ ? kSse4_1Bitfield : 0) |
       (has_SSE4_2_ ? kSse4_2Bitfield : 0) |
       (has_AVX_ ? kAvxBitfield : 0) |
-      (has_AVX2_ ? kAvx2Bitfield : 0);
+      (has_AVX2_ ? kAvx2Bitfield : 0) |
+      (has_POPCNT_ ? kPopCntBitfield : 0);
 }
 
 std::string X86InstructionSetFeatures::GetFeatureString() const {
@@ -248,6 +273,11 @@
   } else {
     result += ",-avx2";
   }
+  if (has_POPCNT_) {
+    result += ",popcnt";
+  } else {
+    result += ",-popcnt";
+  }
   return result;
 }
 
@@ -259,6 +289,7 @@
   bool has_SSE4_2 = has_SSE4_2_;
   bool has_AVX = has_AVX_;
   bool has_AVX2 = has_AVX2_;
+  bool has_POPCNT = has_POPCNT_;
   for (auto i = features.begin(); i != features.end(); i++) {
     std::string feature = Trim(*i);
     if (feature == "ssse3") {
@@ -281,6 +312,10 @@
       has_AVX2 = true;
     } else if (feature == "-avx2") {
       has_AVX2 = false;
+    } else if (feature == "popcnt") {
+      has_POPCNT = true;
+    } else if (feature == "-popcnt") {
+      has_POPCNT = false;
     } else {
       *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
       return nullptr;
@@ -288,10 +323,10 @@
   }
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                            has_AVX2, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                         has_AVX2, has_POPCNT);
   }
 }
 
diff --git a/runtime/arch/x86/instruction_set_features_x86.h b/runtime/arch/x86/instruction_set_features_x86.h
index 7b61245..2aa8ae6 100644
--- a/runtime/arch/x86/instruction_set_features_x86.h
+++ b/runtime/arch/x86/instruction_set_features_x86.h
@@ -60,6 +60,8 @@
 
   bool HasSSE4_1() const { return has_SSE4_1_; }
 
+  bool HasPopCnt() const { return has_POPCNT_; }
+
  protected:
   // Parse a string of the form "ssse3" adding these to a new InstructionSetFeatures.
   virtual const InstructionSetFeatures*
@@ -73,9 +75,14 @@
                                  bool x86_64, std::string* error_msg) const;
 
   X86InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                            bool has_AVX, bool has_AVX2)
-      : InstructionSetFeatures(smp), has_SSSE3_(has_SSSE3), has_SSE4_1_(has_SSE4_1),
-        has_SSE4_2_(has_SSE4_2), has_AVX_(has_AVX), has_AVX2_(has_AVX2) {
+                            bool has_AVX, bool has_AVX2, bool has_POPCNT)
+      : InstructionSetFeatures(smp),
+        has_SSSE3_(has_SSSE3),
+        has_SSE4_1_(has_SSE4_1),
+        has_SSE4_2_(has_SSE4_2),
+        has_AVX_(has_AVX),
+        has_AVX2_(has_AVX2),
+        has_POPCNT_(has_POPCNT) {
   }
 
  private:
@@ -87,6 +94,7 @@
     kSse4_2Bitfield = 8,
     kAvxBitfield = 16,
     kAvx2Bitfield = 32,
+    kPopCntBitfield = 64,
   };
 
   const bool has_SSSE3_;   // x86 128bit SIMD - Supplemental SSE.
@@ -94,6 +102,7 @@
   const bool has_SSE4_2_;  // x86 128bit SIMD SSE4.2.
   const bool has_AVX_;     // x86 256bit SIMD AVX.
   const bool has_AVX2_;    // x86 256bit SIMD AVX 2.0.
+  const bool has_POPCNT_;  // x86 population count
 
   DISALLOW_COPY_AND_ASSIGN(X86InstructionSetFeatures);
 };
diff --git a/runtime/arch/x86/instruction_set_features_x86_test.cc b/runtime/arch/x86/instruction_set_features_x86_test.cc
index 25a406b..9e154c6 100644
--- a/runtime/arch/x86/instruction_set_features_x86_test.cc
+++ b/runtime/arch/x86/instruction_set_features_x86_test.cc
@@ -27,7 +27,8 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str());
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
+               x86_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_features->AsBitmap(), 1U);
 }
 
@@ -39,7 +40,8 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str());
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
+               x86_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_features->AsBitmap(), 3U);
 
   // Build features for a 32-bit x86 default processor.
@@ -48,7 +50,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -58,7 +60,7 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
                x86_64_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_64_features->AsBitmap(), 3U);
 
@@ -75,8 +77,9 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_features->AsBitmap(), 15U);
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,popcnt",
+               x86_features->GetFeatureString().c_str());
+  EXPECT_EQ(x86_features->AsBitmap(), 79U);
 
   // Build features for a 32-bit x86 default processor.
   std::unique_ptr<const InstructionSetFeatures> x86_default_features(
@@ -84,7 +87,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -94,9 +97,9 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2",
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,popcnt",
                x86_64_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_64_features->AsBitmap(), 15U);
+  EXPECT_EQ(x86_64_features->AsBitmap(), 79U);
 
   EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
   EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 2f485ae..282f10d 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -22,9 +22,9 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      */
-MACRO2(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME, got_reg, temp_reg)
+MACRO2(SETUP_SAVE_ALL_CALLEE_SAVES_FRAME, got_reg, temp_reg)
     PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
     PUSH esi
     PUSH ebp
@@ -35,22 +35,22 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    pushl RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
+    pushl RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the top quick frame.
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +4: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 3*4 + 16 + 4)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(X86) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 3*4 + 16 + 4)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(X86) size not as expected."
 #endif
 END_MACRO
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly)
      */
-MACRO2(SETUP_REFS_ONLY_CALLEE_SAVE_FRAME, got_reg, temp_reg)
+MACRO2(SETUP_SAVE_REFS_ONLY_FRAME, got_reg, temp_reg)
     PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
     PUSH esi
     PUSH ebp
@@ -61,19 +61,52 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    pushl RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
+    pushl RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the top quick frame.
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +4: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 3*4 + 16 + 4)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(X86) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 3*4 + 16 + 4)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(X86) size not as expected."
 #endif
 END_MACRO
 
-MACRO0(RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME)
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly)
+     * and preserves the value of got_reg at entry.
+     */
+MACRO2(SETUP_SAVE_REFS_ONLY_FRAME_PRESERVE_GOT_REG, got_reg, temp_reg)
+    PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
+    PUSH esi
+    PUSH ebp
+    PUSH RAW_VAR(got_reg)  // Save got_reg
+    subl MACRO_LITERAL(8), %esp  // Grow stack by 2 words.
+    CFI_ADJUST_CFA_OFFSET(8)
+
+    SETUP_GOT_NOSAVE RAW_VAR(got_reg)
+    // Load Runtime::instance_ from GOT.
+    movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
+    movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
+    // Push save all callee-save method.
+    pushl RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET(REG_VAR(temp_reg))
+    CFI_ADJUST_CFA_OFFSET(4)
+    // Store esp as the top quick frame.
+    movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
+    // Restore got_reg.
+    movl 12(%esp), REG_VAR(got_reg)
+    CFI_RESTORE(RAW_VAR(got_reg))
+
+    // Ugly compile-time check, but we only have the preprocessor.
+    // Last +4: implicit return address pushed on stack when caller made call.
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 3*4 + 16 + 4)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(X86) size not as expected."
+#endif
+END_MACRO
+
+MACRO0(RESTORE_SAVE_REFS_ONLY_FRAME)
     addl MACRO_LITERAL(16), %esp  // Unwind stack up to saved values
     CFI_ADJUST_CFA_OFFSET(-16)
     POP ebp  // Restore callee saves (ebx is saved/restored by the upcall)
@@ -83,9 +116,9 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs)
      */
-MACRO2(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME, got_reg, temp_reg)
+MACRO2(SETUP_SAVE_REFS_AND_ARGS_FRAME, got_reg, temp_reg)
     PUSH edi  // Save callee saves
     PUSH esi
     PUSH ebp
@@ -106,23 +139,23 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    pushl RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
+    pushl RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the stop quick frame.
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +4: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 7*4 + 4*8 + 4)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 7*4 + 4*8 + 4)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(X86) size not as expected."
 #endif
 END_MACRO
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs) where the method is passed in EAX.
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs) where the method is passed in EAX.
      */
-MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_EAX)
+MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_EAX)
     // Save callee and GPR args, mixed together to agree with core spills bitmap.
     PUSH edi  // Save callee saves
     PUSH esi
@@ -146,7 +179,7 @@
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
 END_MACRO
 
-MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
+MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME)
     // Restore FPRs. EAX is still on the stack.
     movsd 4(%esp), %xmm0
     movsd 12(%esp), %xmm1
@@ -167,7 +200,7 @@
 // Restore register and jump to routine
 // Inputs:  EDI contains pointer to code.
 // Notes: Need to pop EAX too (restores Method*)
-MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME_AND_JUMP)
+MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME_AND_JUMP)
     POP eax  // Restore Method*
 
     // Restore FPRs.
@@ -189,13 +222,81 @@
 END_MACRO
 
     /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+MACRO2(SETUP_SAVE_EVERYTHING_FRAME, got_reg, temp_reg)
+    // Save core registers.
+    PUSH edi
+    PUSH esi
+    PUSH ebp
+    PUSH ebx
+    PUSH edx
+    PUSH ecx
+    PUSH eax
+    // Create space for FPR registers and stack alignment padding.
+    subl MACRO_LITERAL(12 + 8 * 8), %esp
+    CFI_ADJUST_CFA_OFFSET(12 + 8 * 8)
+    // Save FPRs.
+    movsd %xmm0, 12(%esp)
+    movsd %xmm1, 20(%esp)
+    movsd %xmm2, 28(%esp)
+    movsd %xmm3, 36(%esp)
+    movsd %xmm4, 44(%esp)
+    movsd %xmm5, 52(%esp)
+    movsd %xmm6, 60(%esp)
+    movsd %xmm7, 68(%esp)
+
+    SETUP_GOT_NOSAVE RAW_VAR(got_reg)
+    // Load Runtime::instance_ from GOT.
+    movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
+    movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
+    // Push save everything callee-save method.
+    pushl RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET(REG_VAR(temp_reg))
+    CFI_ADJUST_CFA_OFFSET(4)
+    // Store esp as the stop quick frame.
+    movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
+
+    // Ugly compile-time check, but we only have the preprocessor.
+    // Last +4: implicit return address pushed on stack when caller made call.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 7*4 + 8*8 + 12 + 4 + 4)
+#error "FRAME_SIZE_SAVE_EVERYTHING(X86) size not as expected."
+#endif
+END_MACRO
+
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
+    // Restore FPRs. Method and padding is still on the stack.
+    movsd 16(%esp), %xmm0
+    movsd 24(%esp), %xmm1
+    movsd 32(%esp), %xmm2
+    movsd 40(%esp), %xmm3
+    movsd 48(%esp), %xmm4
+    movsd 56(%esp), %xmm5
+    movsd 64(%esp), %xmm6
+    movsd 72(%esp), %xmm7
+
+    // Remove save everything callee save method, stack alignment padding and FPRs.
+    addl MACRO_LITERAL(16 + 8 * 8), %esp
+    CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8))
+
+    // Restore core registers.
+    POP eax
+    POP ecx
+    POP edx
+    POP ebx
+    POP ebp
+    POP esi
+    POP edi
+END_MACRO
+
+    /*
      * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
      * exception is Thread::Current()->exception_.
      */
 MACRO0(DELIVER_PENDING_EXCEPTION)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save callee saves for throw
     // Outgoing argument set up
-    subl MACRO_LITERAL(12), %esp              // Alignment padding
+    subl MACRO_LITERAL(12), %esp               // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
@@ -205,23 +306,23 @@
 
 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  ebx, ebx  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     // Outgoing argument set up
-    subl MACRO_LITERAL(12), %esp                // alignment padding
+    subl MACRO_LITERAL(12), %esp               // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
-    pushl %fs:THREAD_SELF_OFFSET                // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    call CALLVAR(cxx_name)                      // cxx_name(Thread*)
+    call CALLVAR(cxx_name)                     // cxx_name(Thread*)
     UNREACHABLE
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     mov %esp, %ecx
     // Outgoing argument set up
-    subl MACRO_LITERAL(8), %esp               // alignment padding
+    subl MACRO_LITERAL(8), %esp                // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
     pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
@@ -233,7 +334,7 @@
 
 MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     // Outgoing argument set up
     PUSH eax                                   // alignment padding
     pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
@@ -251,6 +352,11 @@
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
@@ -278,6 +384,12 @@
 TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
+     */
+TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_string_bounds, artThrowStringBoundsFromCode
+
+    /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
      * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
      * the method_idx.  This wrapper will save arg1-arg3 and call the appropriate C helper.
@@ -293,7 +405,7 @@
      * pointing back to the original caller.
      */
 MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME ebx, ebx
+    SETUP_SAVE_REFS_AND_ARGS_FRAME ebx, ebx
     movl %esp, %edx  // remember SP
 
     // Outgoing argument set up
@@ -617,25 +729,9 @@
     ret
 END_FUNCTION art_quick_invoke_static_stub
 
-MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx  // save ref containing registers for GC
-    // Outgoing argument set up
-    subl MACRO_LITERAL(12), %esp                // push padding
-    CFI_ADJUST_CFA_OFFSET(12)
-    pushl %fs:THREAD_SELF_OFFSET                // pass Thread::Current()
-    CFI_ADJUST_CFA_OFFSET(4)
-    call CALLVAR(cxx_name)                      // cxx_name(Thread*)
-    addl MACRO_LITERAL(16), %esp                // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME         // restore frame up to return address
-    CALL_MACRO(return_macro)                    // return or deliver exception
-    END_FUNCTION VAR(c_name)
-END_MACRO
-
 MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx         // save ref containing registers for GC
     // Outgoing argument set up
     subl MACRO_LITERAL(8), %esp                  // push padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -645,14 +741,14 @@
     call CALLVAR(cxx_name)                       // cxx_name(arg1, Thread*)
     addl MACRO_LITERAL(16), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                 // restore frame up to return address
     CALL_MACRO(return_macro)                     // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx         // save ref containing registers for GC
     // Outgoing argument set up
     PUSH eax                                     // push padding
     pushl %fs:THREAD_SELF_OFFSET                 // pass Thread::Current()
@@ -662,14 +758,14 @@
     call CALLVAR(cxx_name)                       // cxx_name(arg1, arg2, Thread*)
     addl MACRO_LITERAL(16), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                 // restore frame up to return address
     CALL_MACRO(return_macro)                     // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx         // save ref containing registers for GC
     // Outgoing argument set up
     pushl %fs:THREAD_SELF_OFFSET                 // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
@@ -679,14 +775,15 @@
     call CALLVAR(cxx_name)                       // cxx_name(arg1, arg2, arg3, Thread*)
     addl MACRO_LITERAL(16), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                 // restore frame up to return address
     CALL_MACRO(return_macro)                     // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME_PRESERVE_GOT_REG ebx, ebx  // save ref containing registers for GC
+
     // Outgoing argument set up
     subl MACRO_LITERAL(12), %esp                 // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
@@ -699,16 +796,16 @@
     call CALLVAR(cxx_name)                       // cxx_name(arg1, arg2, arg3, arg4, Thread*)
     addl MACRO_LITERAL(32), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                 // restore frame up to return address
     CALL_MACRO(return_macro)                     // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx       // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx               // save ref containing registers for GC
     // Outgoing argument set up
-    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ecx  // get referrer
+    mov FRAME_SIZE_SAVE_REFS_ONLY(%esp), %ecx         // get referrer
     PUSH eax                                          // push padding
     pushl %fs:THREAD_SELF_OFFSET                      // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
@@ -717,16 +814,16 @@
     call CALLVAR(cxx_name)                            // cxx_name(arg1, referrer, Thread*)
     addl MACRO_LITERAL(16), %esp                      // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME               // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                      // restore frame up to return address
     CALL_MACRO(return_macro)                          // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx        // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx               // save ref containing registers for GC
     // Outgoing argument set up
-    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %edx  // get referrer
+    mov FRAME_SIZE_SAVE_REFS_ONLY(%esp), %edx         // get referrer
     pushl %fs:THREAD_SELF_OFFSET                      // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH edx                                          // pass referrer
@@ -735,16 +832,16 @@
     call CALLVAR(cxx_name)                            // cxx_name(arg1, arg2, referrer, Thread*)
     addl MACRO_LITERAL(16), %esp                      // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME               // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                      // restore frame up to return address
     CALL_MACRO(return_macro)                          // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx        // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx               // save ref containing registers for GC
     // Outgoing argument set up
-    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ebx  // get referrer
+    mov FRAME_SIZE_SAVE_REFS_ONLY(%esp), %ebx         // get referrer
     subl MACRO_LITERAL(12), %esp                      // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET                      // pass Thread::Current()
@@ -757,7 +854,7 @@
                                                       //          Thread*)
     addl LITERAL(32), %esp                            // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME               // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                      // restore frame up to return address
     CALL_MACRO(return_macro)                          // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -788,10 +885,277 @@
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
 
-ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+DEFINE_FUNCTION art_quick_alloc_object_rosalloc
+    // Fast path rosalloc allocation.
+    // eax: uint32_t type_idx/return value, ecx: ArtMethod*
+    // ebx, edx: free
+    PUSH edi
+    movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx  // Load dex cache resolved types array
+                                                        // Load the class (edx)
+    movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx
+    testl %edx, %edx                                    // Check null class
+    jz   .Lart_quick_alloc_object_rosalloc_slow_path
+                                                        // Check class status
+    cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%edx)
+    jne  .Lart_quick_alloc_object_rosalloc_slow_path
+                                                        // No fake dependence needed on x86
+                                                        // between status and flags load,
+                                                        // since each load is a load-acquire,
+                                                        // no loads reordering.
+                                                        // Check access flags has
+                                                        // kAccClassIsFinalizable
+    testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%edx)
+    jnz   .Lart_quick_alloc_object_rosalloc_slow_path
+
+    movl %fs:THREAD_SELF_OFFSET, %ebx                   // ebx = thread
+                                                        // Check if the thread local allocation
+                                                        // stack has room
+    movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %edi
+    cmpl THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%ebx), %edi
+    jae  .Lart_quick_alloc_object_rosalloc_slow_path
+
+    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%edx), %edi    // Load the object size (edi)
+                                                        // Check if the size is for a thread
+                                                        // local allocation
+    cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %edi
+    ja   .Lart_quick_alloc_object_rosalloc_slow_path
+    decl %edi
+    shrl LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %edi // Calculate the rosalloc bracket index
+                                                            // from object size.
+                                                            // Align up the size by the rosalloc
+                                                            // bracket quantum size and divide
+                                                            // by the quantum size and subtract
+                                                            // by 1. This code is a shorter but
+                                                            // equivalent version.
+                                                        // Load thread local rosalloc run (ebx)
+    movl THREAD_ROSALLOC_RUNS_OFFSET(%ebx, %edi, __SIZEOF_POINTER__), %ebx
+                                                        // Load free_list head (edi),
+                                                        // this will be the return value.
+    movl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx), %edi
+    test %edi, %edi
+    jz   .Lart_quick_alloc_object_rosalloc_slow_path
+                                                        // Point of no slow path. Won't go to
+                                                        // the slow path from here on. Ok to
+                                                        // clobber eax and ecx.
+    movl %edi, %eax
+                                                        // Load the next pointer of the head
+                                                        // and update head of free list with
+                                                        // next pointer
+    movl ROSALLOC_SLOT_NEXT_OFFSET(%eax), %edi
+    movl %edi, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx)
+                                                        // Decrement size of free list by 1
+    decl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%ebx)
+                                                        // Store the class pointer in the
+                                                        // header. This also overwrites the
+                                                        // next pointer. The offsets are
+                                                        // asserted to match.
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+    POISON_HEAP_REF edx
+    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%eax)
+    movl %fs:THREAD_SELF_OFFSET, %ebx                   // ebx = thread
+                                                        // Push the new object onto the thread
+                                                        // local allocation stack and
+                                                        // increment the thread local
+                                                        // allocation stack top.
+    movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %edi
+    movl %eax, (%edi)
+    addl LITERAL(COMPRESSED_REFERENCE_SIZE), %edi
+    movl %edi, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx)
+                                                        // No fence needed for x86.
+    POP edi
+    ret
+.Lart_quick_alloc_object_rosalloc_slow_path:
+    POP edi
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx          // save ref containing registers for GC
+    // Outgoing argument set up
+    PUSH eax                      // alignment padding
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH ecx
+    PUSH eax
+    call SYMBOL(artAllocObjectFromCodeRosAlloc)  // cxx_name(arg0, arg1, Thread*)
+    addl LITERAL(16), %esp                       // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_SAVE_REFS_ONLY_FRAME                 // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER      // return or deliver exception
+END_FUNCTION art_quick_alloc_object_rosalloc
+
+// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+//
+// EAX: type_idx/return_value, ECX: ArtMethod*, EDX: the class.
+MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel)
+    testl %edx, %edx                                    // Check null class
+    jz   VAR(slowPathLabel)
+                                                        // Check class status.
+    cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%edx)
+    jne  VAR(slowPathLabel)
+                                                        // No fake dependence needed on x86
+                                                        // between status and flags load,
+                                                        // since each load is a load-acquire,
+                                                        // no loads reordering.
+                                                        // Check access flags has
+                                                        // kAccClassIsFinalizable
+    testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%edx)
+    jnz  VAR(slowPathLabel)
+    movl %fs:THREAD_SELF_OFFSET, %ebx                   // ebx = thread
+    movl THREAD_LOCAL_END_OFFSET(%ebx), %edi            // Load thread_local_end.
+    subl THREAD_LOCAL_POS_OFFSET(%ebx), %edi            // Compute the remaining buffer size.
+    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%edx), %esi    // Load the object size.
+    cmpl %edi, %esi                                     // Check if it fits. OK to do this
+                                                        // before rounding up the object size
+                                                        // assuming the buf size alignment.
+    ja   VAR(slowPathLabel)
+    addl LITERAL(OBJECT_ALIGNMENT_MASK), %esi           // Align the size by 8. (addr + 7) & ~7.
+    andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %esi
+    movl THREAD_LOCAL_POS_OFFSET(%ebx), %eax            // Load thread_local_pos
+                                                        // as allocated object.
+    addl %eax, %esi                                     // Add the object size.
+    movl %esi, THREAD_LOCAL_POS_OFFSET(%ebx)            // Update thread_local_pos.
+    addl LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%ebx)  // Increase thread_local_objects.
+                                                        // Store the class pointer in the header.
+                                                        // No fence needed for x86.
+    POISON_HEAP_REF edx
+    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%eax)
+    POP edi
+    POP esi
+    ret                                                 // Fast path succeeded.
+END_MACRO
+
+// The common slow path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
+    POP edi
+    POP esi
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx                 // save ref containing registers for GC
+    // Outgoing argument set up
+    PUSH eax                                            // alignment padding
+    pushl %fs:THREAD_SELF_OFFSET                        // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH ecx
+    PUSH eax
+    call CALLVAR(cxx_name)                              // cxx_name(arg0, arg1, Thread*)
+    addl LITERAL(16), %esp
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_SAVE_REFS_ONLY_FRAME                        // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER             // return or deliver exception
+END_MACRO
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+DEFINE_FUNCTION art_quick_alloc_object_tlab
+    // Fast path tlab allocation.
+    // EAX: uint32_t type_idx/return value, ECX: ArtMethod*.
+    // EBX, EDX: free.
+#if defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    PUSH esi
+    PUSH edi
+    movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx   // Load dex cache resolved types array
+    // Might need to break down into multiple instructions to get the base address in a register.
+                                                            // Load the class
+    movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
+.Lart_quick_alloc_object_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB
+END_FUNCTION art_quick_alloc_object_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB).
+DEFINE_FUNCTION art_quick_alloc_object_region_tlab
+    // Fast path region tlab allocation.
+    // EAX: uint32_t type_idx/return value, ECX: ArtMethod*.
+    // EBX, EDX: free.
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    PUSH esi
+    PUSH edi
+    movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx   // Load dex cache resolved types array
+    // Might need to break down into multiple instructions to get the base address in a register.
+                                                            // Load the class
+    movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx
+                                                            // Read barrier for class load.
+    cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+    // Null check so that we can load the lock word.
+    testl %edx, %edx
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH eax
+    PUSH ecx
+    // Outgoing argument set up
+    subl MACRO_LITERAL(8), %esp                             // Alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    PUSH edx                                                // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                         // cxx_name(mirror::Object* obj)
+    movl %eax, %edx
+    addl MACRO_LITERAL(12), %esp
+    CFI_ADJUST_CFA_OFFSET(-12)
+    POP ecx
+    POP eax
+    jmp .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB
+END_FUNCTION art_quick_alloc_object_region_tlab
+
+DEFINE_FUNCTION art_quick_resolve_string
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx
+    movl FRAME_SIZE_SAVE_REFS_ONLY(%esp), %ecx                   // get referrer
+    movl ART_METHOD_DECLARING_CLASS_OFFSET(%ecx), %ecx           // get declaring class
+    movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %ecx    // get string dex cache
+    movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %edx
+    andl %eax, %edx
+    shl LITERAL(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT), %edx
+    addl %ecx, %edx
+    movlps (%edx), %xmm0                                     // load string idx and pointer to xmm0
+    movd %xmm0, %ecx                                         // extract pointer
+    pshufd LITERAL(0x55), %xmm0, %xmm0                       // shuffle index into lowest bits
+    movd %xmm0, %edx                                         // extract index
+    cmp %edx, %eax
+    jne .Lart_quick_resolve_string_slow_path
+    movl %ecx, %eax
+#ifdef USE_READ_BARRIER
+    cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_resolve_string_marking
+#endif
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    ret
+.Lart_quick_resolve_string_slow_path:
+    // Outgoing argument set up
+    subl LITERAL(8), %esp                                        // push padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    pushl %fs:THREAD_SELF_OFFSET                                 // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH eax                                                     // pass arg1
+    call SYMBOL(artResolveStringFromCode)
+    addl LITERAL(16), %esp                                       // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+.Lart_quick_resolve_string_marking:
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax)
+    jnz .Lart_quick_resolve_string_no_rb
+    subl LITERAL(12), %esp                                   // alignment padding
+    CFI_ADJUST_CFA_OFFSET(12)
+    PUSH eax                                                 // Pass the string as the first param.
+    call SYMBOL(artReadBarrierMark)
+    addl LITERAL(16), %esp
+    CFI_ADJUST_CFA_OFFSET(-16)
+.Lart_quick_resolve_string_no_rb:
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    ret
+END_FUNCTION art_quick_resolve_string
+
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
@@ -806,7 +1170,7 @@
     test LITERAL(LOCK_WORD_STATE_MASK), %ecx         // test the 2 high bits.
     jne  .Lslow_lock                      // slow path if either of the two high bits are set.
     movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
     test %ecx, %ecx
     jnz  .Lalready_thin                   // lock word contains a thin lock
     // unlocked case - edx: original lock word, eax: obj.
@@ -822,9 +1186,9 @@
     cmpw %cx, %dx                         // do we hold the lock already?
     jne  .Lslow_lock
     movl %edx, %ecx                       // copy the lock word to check count overflow.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the read barrier bits.
     addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count for overflow check.
-    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if either of the upper two bits (28-29) are set.
+    test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx  // overflowed if the first gc state bit is set.
     jne  .Lslow_lock                      // count overflowed so go slow
     movl %eax, %ecx                       // save obj to use eax for cmpxchg.
     movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
@@ -837,7 +1201,7 @@
     movl  %ecx, %eax                      // restore eax
     jmp  .Lretry_lock
 .Lslow_lock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
     subl LITERAL(8), %esp                 // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -847,10 +1211,26 @@
     call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
     addl LITERAL(16), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object
 
+DEFINE_FUNCTION art_quick_lock_object_no_inline
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx  // save ref containing registers for GC
+    // Outgoing argument set up
+    subl LITERAL(8), %esp                 // alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    pushl %fs:THREAD_SELF_OFFSET          // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH eax                              // pass object
+    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
+    addl LITERAL(16), %esp                // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
+    RETURN_IF_EAX_ZERO
+END_FUNCTION art_quick_lock_object_no_inline
+
+
 DEFINE_FUNCTION art_quick_unlock_object
     testl %eax, %eax                      // null check object/eax
     jz   .Lslow_unlock
@@ -862,13 +1242,13 @@
     cmpw %cx, %dx                         // does the thread id match?
     jne  .Lslow_unlock
     movl %ecx, %edx                       // copy the lock word to detect new count of 0.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx  // zero the gc bits.
     cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
     jae  .Lrecursive_thin_unlock
     // update lockword, cmpxchg necessary for read barrier bits.
     movl %eax, %edx                       // edx: obj
     movl %ecx, %eax                       // eax: old lock word.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // ecx: new lock word zero except original rb bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx  // ecx: new lock word zero except original rb bits.
 #ifndef USE_READ_BARRIER
     movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
 #else
@@ -892,7 +1272,7 @@
     movl %edx, %eax                       // restore eax
     jmp  .Lretry_unlock
 .Lslow_unlock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
     subl LITERAL(8), %esp                 // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -902,10 +1282,25 @@
     call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
     addl LITERAL(16), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object
 
+DEFINE_FUNCTION art_quick_unlock_object_no_inline
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx  // save ref containing registers for GC
+    // Outgoing argument set up
+    subl LITERAL(8), %esp                 // alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    pushl %fs:THREAD_SELF_OFFSET          // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH eax                              // pass object
+    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
+    addl LITERAL(16), %esp                // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
+    RETURN_IF_EAX_ZERO
+END_FUNCTION art_quick_unlock_object_no_inline
+
 DEFINE_FUNCTION art_quick_is_assignable
     PUSH eax                              // alignment padding
     PUSH ecx                              // pass arg2 - obj->klass
@@ -933,7 +1328,7 @@
     POP ecx
     addl LITERAL(4), %esp
     CFI_ADJUST_CFA_OFFSET(-4)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  ebx, ebx  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     // Outgoing argument set up
     PUSH eax                              // alignment padding
     pushl %fs:THREAD_SELF_OFFSET          // pass Thread::Current()
@@ -1085,7 +1480,7 @@
     POP  edx
     POP  ecx
     POP  eax
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     // Outgoing argument set up
     PUSH eax                      // alignment padding
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
@@ -1107,7 +1502,19 @@
     ret
 END_FUNCTION art_quick_memcpy
 
-NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
+DEFINE_FUNCTION art_quick_test_suspend
+    SETUP_SAVE_EVERYTHING_FRAME ebx, ebx              // save everything for GC
+    // Outgoing argument set up
+    subl MACRO_LITERAL(12), %esp                      // push padding
+    CFI_ADJUST_CFA_OFFSET(12)
+    pushl %fs:THREAD_SELF_OFFSET                      // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    call SYMBOL(artTestSuspendFromCode)               // (Thread*)
+    addl MACRO_LITERAL(16), %esp                      // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_SAVE_EVERYTHING_FRAME                     // restore frame up to return address
+    ret                                               // return
+END_FUNCTION art_quick_test_suspend
 
 DEFINE_FUNCTION art_quick_d2l
     subl LITERAL(12), %esp        // alignment padding, room for argument
@@ -1232,14 +1639,14 @@
 // Call artSet64InstanceFromCode with 4 word size arguments and the referrer.
 DEFINE_FUNCTION art_quick_set64_instance
     movd %ebx, %xmm0
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx  // save ref containing registers for GC
     movd %xmm0, %ebx
     // Outgoing argument set up
     subl LITERAL(8), %esp         // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    pushl (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE+12)(%esp)  // pass referrer
+    pushl (FRAME_SIZE_SAVE_REFS_ONLY+12)(%esp)  // pass referrer
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ebx                      // pass high half of new_val
     PUSH edx                      // pass low half of new_val
@@ -1248,7 +1655,7 @@
     call SYMBOL(artSet64InstanceFromCode)  // (field_idx, Object*, new_val, referrer, Thread*)
     addl LITERAL(32), %esp        // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME  // restore frame up to return address
     RETURN_IF_EAX_ZERO            // return or deliver exception
 END_FUNCTION art_quick_set64_instance
 
@@ -1257,9 +1664,9 @@
 DEFINE_FUNCTION art_quick_set64_static
     // TODO: Implement SETUP_GOT_NOSAVE for got_reg = ecx to avoid moving around the registers.
     movd %ebx, %xmm0
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx  // save ref containing registers for GC
     movd %xmm0, %ebx
-    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ecx  // get referrer
+    mov FRAME_SIZE_SAVE_REFS_ONLY(%esp), %ecx  // get referrer
     subl LITERAL(12), %esp        // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
@@ -1271,12 +1678,12 @@
     call SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*)
     addl LITERAL(32), %esp        // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME  // restore frame up to return address
     RETURN_IF_EAX_ZERO            // return or deliver exception
 END_FUNCTION art_quick_set64_static
 
 DEFINE_FUNCTION art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_EAX
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_EAX
     PUSH esp                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
@@ -1286,23 +1693,54 @@
     movd %eax, %xmm0              // place return value also into floating point return value
     movd %edx, %xmm1
     punpckldq %xmm1, %xmm0
-    addl LITERAL(16 + FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE), %esp
-    CFI_ADJUST_CFA_OFFSET(-(16 + FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE))
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    addl LITERAL(16 + FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY), %esp
+    CFI_ADJUST_CFA_OFFSET(-(16 + FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY))
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_proxy_invoke_handler
 
     /*
-     * Called to resolve an imt conflict. xmm7 is a hidden argument that holds the target method's
-     * dex method index.
+     * Called to resolve an imt conflict.
+     * eax is the conflict ArtMethod.
+     * xmm7 is a hidden argument that holds the target interface method's dex method index.
+     *
+     * Note that this stub writes to eax.
+     * Because of lack of free registers, it also saves and restores edi.
      */
 DEFINE_FUNCTION art_quick_imt_conflict_trampoline
+    PUSH EDI
+    movl 8(%esp), %edi // Load referrer
+    movl ART_METHOD_DEX_CACHE_METHODS_OFFSET_32(%edi), %edi   // Load dex cache methods array
+    pushl ART_METHOD_JNI_OFFSET_32(%eax)  // Push ImtConflictTable.
+    CFI_ADJUST_CFA_OFFSET(4)
     movd %xmm7, %eax              // get target method index stored in xmm7
+    movl 0(%edi, %eax, __SIZEOF_POINTER__), %edi  // Load interface method
+    popl %eax  // Pop ImtConflictTable.
+    CFI_ADJUST_CFA_OFFSET(-4)
+.Limt_table_iterate:
+    cmpl %edi, 0(%eax)
+    jne .Limt_table_next_entry
+    // We successfully hit an entry in the table. Load the target method
+    // and jump to it.
+    POP EDI
+    movl __SIZEOF_POINTER__(%eax), %eax
+    jmp *ART_METHOD_QUICK_CODE_OFFSET_32(%eax)
+.Limt_table_next_entry:
+    // If the entry is null, the interface method is not in the ImtConflictTable.
+    cmpl LITERAL(0), 0(%eax)
+    jz .Lconflict_trampoline
+    // Iterate over the entries of the ImtConflictTable.
+    addl LITERAL(2 * __SIZEOF_POINTER__), %eax
+    jmp .Limt_table_iterate
+.Lconflict_trampoline:
+    // Call the runtime stub to populate the ImtConflictTable and jump to the
+    // resolved method.
+    POP EDI
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END_FUNCTION art_quick_imt_conflict_trampoline
 
 DEFINE_FUNCTION art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME ebx, ebx
+    SETUP_SAVE_REFS_AND_ARGS_FRAME ebx, ebx
     movl %esp, %edi
     PUSH EDI                      // pass SP. do not just PUSH ESP; that messes up unwinding
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
@@ -1315,14 +1753,14 @@
     CFI_ADJUST_CFA_OFFSET(-16)
     test %eax, %eax               // if code pointer is null goto deliver pending exception
     jz 1f
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME_AND_JUMP
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME_AND_JUMP
 1:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_resolution_trampoline
 
 DEFINE_FUNCTION art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_EAX
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_EAX
     movl %esp, %ebp               // save SP at callee-save frame
     CFI_DEF_CFA_REGISTER(ebp)
     subl LITERAL(5120), %esp
@@ -1401,7 +1839,7 @@
 END_FUNCTION art_quick_generic_jni_trampoline
 
 DEFINE_FUNCTION art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  ebx, ebx  // save frame
+    SETUP_SAVE_REFS_AND_ARGS_FRAME  ebx, ebx  // save frame
     mov %esp, %edx                // remember SP
     PUSH eax                      // alignment padding
     PUSH edx                      // pass SP
@@ -1431,11 +1869,11 @@
      * Routine that intercepts method calls and returns.
      */
 DEFINE_FUNCTION art_quick_instrumentation_entry
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME ebx, edx
+    SETUP_SAVE_REFS_AND_ARGS_FRAME ebx, edx
     PUSH eax                      // Save eax which will be clobbered by the callee-save method.
     subl LITERAL(12), %esp        // Align stack.
     CFI_ADJUST_CFA_OFFSET(12)
-    pushl FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-4+16(%esp)  // Pass LR.
+    pushl FRAME_SIZE_SAVE_REFS_AND_ARGS-4+16(%esp)  // Pass LR.
     CFI_ADJUST_CFA_OFFSET(4)
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
     CFI_ADJUST_CFA_OFFSET(4)
@@ -1470,7 +1908,7 @@
 DEFINE_FUNCTION art_quick_instrumentation_exit
     pushl LITERAL(0)              // Push a fake return PC as there will be none on the stack.
     CFI_ADJUST_CFA_OFFSET(4)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx
     mov  %esp, %ecx               // Remember SP
     subl LITERAL(8), %esp         // Save float return value.
     CFI_ADJUST_CFA_OFFSET(8)
@@ -1496,7 +1934,7 @@
     movq (%esp), %xmm0            // Restore fpr return value.
     addl LITERAL(8), %esp
     CFI_ADJUST_CFA_OFFSET(-8)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     addl LITERAL(4), %esp         // Remove fake return pc.
     CFI_ADJUST_CFA_OFFSET(-4)
     jmp   *%ecx                   // Return.
@@ -1508,7 +1946,7 @@
      */
 DEFINE_FUNCTION art_quick_deoptimize
     PUSH ebx                      // Entry point for a jump. Fake that we were called.
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx
     subl LITERAL(12), %esp        // Align stack.
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
@@ -1522,7 +1960,7 @@
      * will long jump to the interpreter bridge.
      */
 DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx
     subl LITERAL(12), %esp                      // Align stack.
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET                // Pass Thread::Current().
@@ -1587,15 +2025,170 @@
     UNREACHABLE
 END_FUNCTION art_nested_signal_return
 
+// Create a function `name` calling the ReadBarrier::Mark routine,
+// getting its argument and returning its result through register
+// `reg`, saving and restoring all caller-save registers.
+//
+// If `reg` is different from `eax`, the generated function follows a
+// non-standard runtime calling convention:
+// - register `reg` is used to pass the (sole) argument of this function
+//   (instead of EAX);
+// - register `reg` is used to return the result of this function
+//   (instead of EAX);
+// - EAX is treated like a normal (non-argument) caller-save register;
+// - everything else is the same as in the standard runtime calling
+//   convention (e.g. standard callee-save registers are preserved).
+MACRO2(READ_BARRIER_MARK_REG, name, reg)
+    DEFINE_FUNCTION VAR(name)
+    // Null check so that we can load the lock word.
+    test REG_VAR(reg), REG_VAR(reg)
+    jz .Lret_rb_\name
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
+    jz .Lslow_rb_\name
+    ret
+.Lslow_rb_\name:
+    // Save all potentially live caller-save core registers.
+    PUSH eax
+    PUSH ecx
+    PUSH edx
+    PUSH ebx
+    // 8-byte align the stack to improve (8-byte) XMM register saving and restoring.
+    // and create space for caller-save floating-point registers.
+    subl MACRO_LITERAL(4 + 8 * 8), %esp
+    CFI_ADJUST_CFA_OFFSET(4 + 8 * 8)
+    // Save all potentially live caller-save floating-point registers.
+    movsd %xmm0, 0(%esp)
+    movsd %xmm1, 8(%esp)
+    movsd %xmm2, 16(%esp)
+    movsd %xmm3, 24(%esp)
+    movsd %xmm4, 32(%esp)
+    movsd %xmm5, 40(%esp)
+    movsd %xmm6, 48(%esp)
+    movsd %xmm7, 56(%esp)
+
+    subl LITERAL(4), %esp            // alignment padding
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH RAW_VAR(reg)                // pass arg1 - obj from `reg`
+    call SYMBOL(artReadBarrierMark)  // artReadBarrierMark(obj)
+    .ifnc RAW_VAR(reg), eax
+      movl %eax, REG_VAR(reg)        // return result into `reg`
+    .endif
+    addl LITERAL(8), %esp            // pop argument and remove padding
+    CFI_ADJUST_CFA_OFFSET(-8)
+
+    // Restore floating-point registers.
+    movsd 0(%esp), %xmm0
+    movsd 8(%esp), %xmm1
+    movsd 16(%esp), %xmm2
+    movsd 24(%esp), %xmm3
+    movsd 32(%esp), %xmm4
+    movsd 40(%esp), %xmm5
+    movsd 48(%esp), %xmm6
+    movsd 56(%esp), %xmm7
+    // Remove floating-point registers and padding.
+    addl MACRO_LITERAL(8 * 8 + 4), %esp
+    CFI_ADJUST_CFA_OFFSET(-(8 * 8 + 4))
+    // Restore core regs, except `reg`, as it is used to return the
+    // result of this function (simply remove it from the stack instead).
+    POP_REG_NE ebx, RAW_VAR(reg)
+    POP_REG_NE edx, RAW_VAR(reg)
+    POP_REG_NE ecx, RAW_VAR(reg)
+    POP_REG_NE eax, RAW_VAR(reg)
+.Lret_rb_\name:
+    ret
+    END_FUNCTION VAR(name)
+END_MACRO
+
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, eax
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, ecx
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, edx
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, ebx
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, ebp
+// Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (ESP)
+// cannot be used to pass arguments.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, esi
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, edi
+
 DEFINE_FUNCTION art_quick_read_barrier_slow
-    PUSH edx                        // pass arg3 - offset
-    PUSH ecx                        // pass arg2 - obj
-    PUSH eax                        // pass arg1 - ref
-    call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset)
-    addl LITERAL(12), %esp          // pop arguments
+    PUSH edx                         // pass arg3 - offset
+    PUSH ecx                         // pass arg2 - obj
+    PUSH eax                         // pass arg1 - ref
+    call SYMBOL(artReadBarrierSlow)  // artReadBarrierSlow(ref, obj, offset)
+    addl LITERAL(12), %esp           // pop arguments
     CFI_ADJUST_CFA_OFFSET(-12)
     ret
 END_FUNCTION art_quick_read_barrier_slow
 
+DEFINE_FUNCTION art_quick_read_barrier_for_root_slow
+    subl LITERAL(8), %esp                   // alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    PUSH eax                                // pass arg1 - root
+    call SYMBOL(artReadBarrierForRootSlow)  // artReadBarrierForRootSlow(root)
+    addl LITERAL(12), %esp                  // pop argument and remove padding
+    CFI_ADJUST_CFA_OFFSET(-12)
+    ret
+END_FUNCTION art_quick_read_barrier_for_root_slow
+
+  /*
+     * On stack replacement stub.
+     * On entry:
+     *   [sp] = return address
+     *   [sp + 4] = stack to copy
+     *   [sp + 8] = size of stack
+     *   [sp + 12] = pc to call
+     *   [sp + 16] = JValue* result
+     *   [sp + 20] = shorty
+     *   [sp + 24] = thread
+     */
+DEFINE_FUNCTION art_quick_osr_stub
+    // Save native callee saves.
+    PUSH ebp
+    PUSH ebx
+    PUSH esi
+    PUSH edi
+    mov 4+16(%esp), %esi           // ESI = argument array
+    mov 8+16(%esp), %ecx           // ECX = size of args
+    mov 12+16(%esp), %ebx          // EBX = pc to call
+    mov %esp, %ebp                 // Save stack pointer
+    andl LITERAL(0xFFFFFFF0), %esp // Align stack
+    PUSH ebp                       // Save old stack pointer
+    subl LITERAL(12), %esp         // Align stack
+    movl LITERAL(0), (%esp)        // Store null for ArtMethod* slot
+    call .Losr_entry
+
+    // Restore stack pointer.
+    addl LITERAL(12), %esp
+    POP ebp
+    mov %ebp, %esp
+
+    // Restore callee saves.
+    POP edi
+    POP esi
+    POP ebx
+    POP ebp
+    mov 16(%esp), %ecx            // Get JValue result
+    mov %eax, (%ecx)              // Store the result assuming it is a long, int or Object*
+    mov %edx, 4(%ecx)             // Store the other half of the result
+    mov 20(%esp), %edx            // Get the shorty
+    cmpb LITERAL(68), (%edx)      // Test if result type char == 'D'
+    je .Losr_return_double_quick
+    cmpb LITERAL(70), (%edx)      // Test if result type char == 'F'
+    je .Losr_return_float_quick
+    ret
+.Losr_return_double_quick:
+    movsd %xmm0, (%ecx)           // Store the floating point result
+    ret
+.Losr_return_float_quick:
+    movss %xmm0, (%ecx)           // Store the floating point result
+    ret
+.Losr_entry:
+    subl LITERAL(4), %ecx         // Given stack size contains pushed frame pointer, substract it.
+    subl %ecx, %esp
+    mov %esp, %edi                // EDI = beginning of stack
+    rep movsb                     // while (ecx--) { *edi++ = *esi++ }
+    jmp *%ebx
+END_FUNCTION art_quick_osr_stub
+
     // TODO: implement these!
 UNIMPLEMENTED art_quick_memcmp16
diff --git a/runtime/arch/x86/quick_method_frame_info_x86.h b/runtime/arch/x86/quick_method_frame_info_x86.h
index ed1d860..9fcde35 100644
--- a/runtime/arch/x86/quick_method_frame_info_x86.h
+++ b/runtime/arch/x86/quick_method_frame_info_x86.h
@@ -36,27 +36,39 @@
   XMM7 = 7,
 };
 
+static constexpr uint32_t kX86CalleeSaveAlwaysSpills =
+    (1 << art::x86::kNumberOfCpuRegisters);  // Fake return address callee save.
 static constexpr uint32_t kX86CalleeSaveRefSpills =
     (1 << art::x86::EBP) | (1 << art::x86::ESI) | (1 << art::x86::EDI);
 static constexpr uint32_t kX86CalleeSaveArgSpills =
     (1 << art::x86::ECX) | (1 << art::x86::EDX) | (1 << art::x86::EBX);
+static constexpr uint32_t kX86CalleeSaveEverythingSpills =
+    (1 << art::x86::EAX) | (1 << art::x86::ECX) | (1 << art::x86::EDX) | (1 << art::x86::EBX);
+
 static constexpr uint32_t kX86CalleeSaveFpArgSpills =
     (1 << art::x86::XMM0) | (1 << art::x86::XMM1) |
     (1 << art::x86::XMM2) | (1 << art::x86::XMM3);
+static constexpr uint32_t kX86CalleeSaveFpEverythingSpills =
+    (1 << art::x86::XMM0) | (1 << art::x86::XMM1) |
+    (1 << art::x86::XMM2) | (1 << art::x86::XMM3) |
+    (1 << art::x86::XMM4) | (1 << art::x86::XMM5) |
+    (1 << art::x86::XMM6) | (1 << art::x86::XMM7);
 
 constexpr uint32_t X86CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
-  return kX86CalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kX86CalleeSaveArgSpills : 0) |
-      (1 << art::x86::kNumberOfCpuRegisters);  // fake return address callee save
+  return kX86CalleeSaveAlwaysSpills | kX86CalleeSaveRefSpills |
+      (type == Runtime::kSaveRefsAndArgs ? kX86CalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveEverything ? kX86CalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t X86CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
-    return type == Runtime::kRefsAndArgs ? kX86CalleeSaveFpArgSpills : 0;
+    return (type == Runtime::kSaveRefsAndArgs ? kX86CalleeSaveFpArgSpills : 0) |
+        (type == Runtime::kSaveEverything ? kX86CalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t X86CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
   return RoundUp((POPCOUNT(X86CalleeSaveCoreSpills(type)) /* gprs */ +
                   2 * POPCOUNT(X86CalleeSaveFpSpills(type)) /* fprs */ +
-                  1 /* Method* */) * kX86PointerSize, kStackAlignment);
+                  1 /* Method* */) * static_cast<size_t>(kX86PointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo X86CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index 3d19f06..241650e 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -20,6 +20,7 @@
 #include <sys/types.h>
 
 #include "asm_support_x86.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "thread-inl.h"
 #include "thread_list.h"
@@ -45,16 +46,17 @@
   MutexLock mu(nullptr, *Locks::modify_ldt_lock_);
 
   const uintptr_t base = reinterpret_cast<uintptr_t>(this);
-  const size_t limit = kPageSize;
+  const size_t limit = sizeof(Thread);
 
   const int contents = MODIFY_LDT_CONTENTS_DATA;
   const int seg_32bit = 1;
   const int read_exec_only = 0;
-  const int limit_in_pages = 0;
+  const int limit_in_pages = 1;
   const int seg_not_present = 0;
   const int useable = 1;
 
-  int entry_number = -1;
+  int entry_number;
+  uint16_t table_indicator;
 
 #if defined(__APPLE__)
   descriptor_table_entry_t entry;
@@ -77,41 +79,52 @@
   if (entry_number == -1) {
     PLOG(FATAL) << "i386_set_ldt failed";
   }
+
+  table_indicator = 1 << 2;  // LDT
 #else
-  // Read current LDT entries.
-  static_assert(static_cast<size_t>(LDT_ENTRY_SIZE) == sizeof(uint64_t),
-                "LDT_ENTRY_SIZE is different from sizeof(uint64_t).");
-  std::vector<uint64_t> ldt(LDT_ENTRIES);
-  size_t ldt_size(sizeof(uint64_t) * ldt.size());
-  memset(&ldt[0], 0, ldt_size);
-  // TODO: why doesn't this return LDT_ENTRY_SIZE * LDT_ENTRIES for the main thread?
-  syscall(__NR_modify_ldt, 0, &ldt[0], ldt_size);
+  // We use a GDT entry on Linux.
+  user_desc gdt_entry;
+  memset(&gdt_entry, 0, sizeof(gdt_entry));
 
-  // Find the first empty slot.
-  for (entry_number = 0; entry_number < LDT_ENTRIES && ldt[entry_number] != 0; ++entry_number) {
-  }
-  if (entry_number >= LDT_ENTRIES) {
-    LOG(FATAL) << "Failed to find a free LDT slot";
-  }
+  // On Linux, there are 3 TLS GDT entries. We use one of those to to store our segment descriptor
+  // data.
+  //
+  // This entry must be shared, as the kernel only guarantees three TLS entries. For simplicity
+  // (and locality), use this local global, which practically becomes readonly after the first
+  // (startup) thread of the runtime has been initialized (during Runtime::Start()).
+  //
+  // We also share this between all runtimes in the process. This is both for simplicity (one
+  // well-known slot) as well as to avoid the three-slot limitation. Downside is that we cannot
+  // free the slot when it is known that a runtime stops.
+  static unsigned int gdt_entry_number = -1;
 
-  // Update LDT entry.
-  user_desc ldt_entry;
-  memset(&ldt_entry, 0, sizeof(ldt_entry));
-  ldt_entry.entry_number = entry_number;
-  ldt_entry.base_addr = base;
-  ldt_entry.limit = limit;
-  ldt_entry.seg_32bit = seg_32bit;
-  ldt_entry.contents = contents;
-  ldt_entry.read_exec_only = read_exec_only;
-  ldt_entry.limit_in_pages = limit_in_pages;
-  ldt_entry.seg_not_present = seg_not_present;
-  ldt_entry.useable = useable;
-  CHECK_EQ(0, syscall(__NR_modify_ldt, 1, &ldt_entry, sizeof(ldt_entry)));
-  entry_number = ldt_entry.entry_number;
+  if (gdt_entry_number == static_cast<unsigned int>(-1)) {
+    gdt_entry.entry_number = -1;  // Let the kernel choose.
+  } else {
+    gdt_entry.entry_number = gdt_entry_number;
+  }
+  gdt_entry.base_addr = base;
+  gdt_entry.limit = limit;
+  gdt_entry.seg_32bit = seg_32bit;
+  gdt_entry.contents = contents;
+  gdt_entry.read_exec_only = read_exec_only;
+  gdt_entry.limit_in_pages = limit_in_pages;
+  gdt_entry.seg_not_present = seg_not_present;
+  gdt_entry.useable = useable;
+  int rc = syscall(__NR_set_thread_area, &gdt_entry);
+  if (rc != -1) {
+    entry_number = gdt_entry.entry_number;
+    if (gdt_entry_number == static_cast<unsigned int>(-1)) {
+      gdt_entry_number = entry_number;  // Save the kernel-assigned entry number.
+    }
+  } else {
+    PLOG(FATAL) << "set_thread_area failed";
+    UNREACHABLE();
+  }
+  table_indicator = 0;  // GDT
 #endif
 
-  // Change %fs to be new LDT entry.
-  uint16_t table_indicator = 1 << 2;  // LDT
+  // Change %fs to be new DT entry.
   uint16_t rpl = 3;  // Requested privilege level
   uint16_t selector = (entry_number << 3) | table_indicator | rpl;
   __asm__ __volatile__("movw %w0, %%fs"
@@ -124,7 +137,7 @@
 
   // Sanity check that reads from %fs point to this Thread*.
   Thread* self_check;
-  CHECK_EQ(THREAD_SELF_OFFSET, SelfOffset<4>().Int32Value());
+  CHECK_EQ(THREAD_SELF_OFFSET, SelfOffset<PointerSize::k32>().Int32Value());
   __asm__ __volatile__("movl %%fs:(%1), %0"
       : "=r"(self_check)  // output
       : "r"(THREAD_SELF_OFFSET)  // input
@@ -132,9 +145,9 @@
   CHECK_EQ(self_check, this);
 
   // Sanity check other offsets.
-  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<4>().Int32Value());
-  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<4>().Int32Value());
-  CHECK_EQ(THREAD_ID_OFFSET, ThinLockIdOffset<4>().Int32Value());
+  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<PointerSize::k32>().Int32Value());
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<PointerSize::k32>().Int32Value());
+  CHECK_EQ(THREAD_ID_OFFSET, ThinLockIdOffset<PointerSize::k32>().Int32Value());
 }
 
 void Thread::CleanupCpu() {
@@ -163,13 +176,18 @@
   UNUSED(selector);
   // i386_set_ldt(selector >> 3, 0, 1);
 #else
-  user_desc ldt_entry;
-  memset(&ldt_entry, 0, sizeof(ldt_entry));
-  ldt_entry.entry_number = selector >> 3;
-  ldt_entry.contents = MODIFY_LDT_CONTENTS_DATA;
-  ldt_entry.seg_not_present = 1;
-
-  syscall(__NR_modify_ldt, 1, &ldt_entry, sizeof(ldt_entry));
+  // Note if we wanted to clean up the GDT entry, we would do that here, when the *last* thread
+  // is being deleted. But see the comment on gdt_entry_number. Code would look like this:
+  //
+  // user_desc gdt_entry;
+  // memset(&gdt_entry, 0, sizeof(gdt_entry));
+  // gdt_entry.entry_number = selector >> 3;
+  // gdt_entry.contents = MODIFY_LDT_CONTENTS_DATA;
+  // // "Empty" = Delete = seg_not_present==1 && read_exec_only==1.
+  // gdt_entry.seg_not_present = 1;
+  // gdt_entry.read_exec_only = 1;
+  // syscall(__NR_set_thread_area, &gdt_entry);
+  UNUSED(selector);
 #endif
 }
 
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index cf0039c..0728f99 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -31,7 +31,8 @@
     // Clang/llvm does not support .altmacro. However, the clang/llvm preprocessor doesn't
     // separate the backslash and parameter by a space. Everything just works.
     #define RAW_VAR(name) \name
-    #define VAR(name) SYMBOL(\name)
+    #define VAR(name) \name
+    #define CALLVAR(name) SYMBOL(\name)
     #define PLT_VAR(name) \name@PLT
     #define REG_VAR(name) %\name
     #define CALL_MACRO(name) \name
@@ -45,6 +46,7 @@
     .altmacro
     #define RAW_VAR(name) name&
     #define VAR(name) name&
+    #define CALLVAR(name) SYMBOL(name&)
     #define PLT_VAR(name) name&@PLT
     #define REG_VAR(name) %name
     #define CALL_MACRO(name) name&
@@ -52,7 +54,7 @@
 
 #define LITERAL(value) $value
 #if defined(__APPLE__)
-    #define MACRO_LITERAL(value) $$(value)
+    #define MACRO_LITERAL(value) $(value)
 #else
     #define MACRO_LITERAL(value) $value
 #endif
@@ -110,10 +112,10 @@
 // for mac builds.
 MACRO1(DEFINE_FUNCTION, c_name)
     FUNCTION_TYPE(SYMBOL(\c_name))
-    ASM_HIDDEN SYMBOL(\c_name)
-    .globl VAR(c_name)
+    ASM_HIDDEN CALLVAR(c_name)
+    .globl CALLVAR(c_name)
     ALIGN_FUNCTION_ENTRY
-VAR(c_name):
+CALLVAR(c_name):
     CFI_STARTPROC
     // Ensure we get a sane starting CFA.
     CFI_DEF_CFA(rsp, 8)
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index eddd172..a4446d3 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -19,8 +19,9 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64 + 4*8
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64 + 4*8
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176 + 4*8
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES (64 + 4*8)
+#define FRAME_SIZE_SAVE_REFS_ONLY (64 + 4*8)
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS (112 + 12*8)
+#define FRAME_SIZE_SAVE_EVERYTHING (144 + 16*8)
 
 #endif  // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index eae09ee..42b9699 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -17,6 +17,9 @@
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
+#if !defined(__APPLE__)
+#include "entrypoints/quick/quick_default_init_entrypoints.h"
+#endif
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/math_entrypoints.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
@@ -25,77 +28,61 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass,
-                                                   const mirror::Class* ref_class);
+extern "C" size_t art_quick_assignable_from_code(const mirror::Class* klass,
+                                                 const mirror::Class* ref_class);
 
 // Read barrier entrypoints.
+// art_quick_read_barrier_mark_regX uses an non-standard calling
+// convention: it expects its input in register X and returns its
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg08(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg09(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg13(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg14(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg15(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
+extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
 #if defined(__APPLE__)
   UNUSED(jpoints, qpoints);
   UNIMPLEMENTED(FATAL);
 #else
-  // JNI
-  jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
-
-  // Alloc
-  ResetQuickAllocEntryPoints(qpoints);
+  DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = art_quick_assignable_from_code;
   qpoints->pCheckCast = art_quick_check_cast;
 
-  // DexCache
-  qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
-  qpoints->pInitializeTypeAndVerifyAccess = art_quick_initialize_type_and_verify_access;
-  qpoints->pInitializeType = art_quick_initialize_type;
-  qpoints->pResolveString = art_quick_resolve_string;
-
-  // Field
-  qpoints->pSet8Instance = art_quick_set8_instance;
-  qpoints->pSet8Static = art_quick_set8_static;
-  qpoints->pSet16Instance = art_quick_set16_instance;
-  qpoints->pSet16Static = art_quick_set16_static;
-  qpoints->pSet32Instance = art_quick_set32_instance;
-  qpoints->pSet32Static = art_quick_set32_static;
-  qpoints->pSet64Instance = art_quick_set64_instance;
-  qpoints->pSet64Static = art_quick_set64_static;
-  qpoints->pSetObjInstance = art_quick_set_obj_instance;
-  qpoints->pSetObjStatic = art_quick_set_obj_static;
-  qpoints->pGetByteInstance = art_quick_get_byte_instance;
-  qpoints->pGetBooleanInstance = art_quick_get_boolean_instance;
-  qpoints->pGetShortInstance = art_quick_get_short_instance;
-  qpoints->pGetCharInstance = art_quick_get_char_instance;
-  qpoints->pGet32Instance = art_quick_get32_instance;
-  qpoints->pGet64Instance = art_quick_get64_instance;
-  qpoints->pGetObjInstance = art_quick_get_obj_instance;
-  qpoints->pGetByteStatic = art_quick_get_byte_static;
-  qpoints->pGetBooleanStatic = art_quick_get_boolean_static;
-  qpoints->pGetShortStatic = art_quick_get_short_static;
-  qpoints->pGetCharStatic = art_quick_get_char_static;
-  qpoints->pGet32Static = art_quick_get32_static;
-  qpoints->pGet64Static = art_quick_get64_static;
-  qpoints->pGetObjStatic = art_quick_get_obj_static;
-
-  // Array
-  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
-  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
-  qpoints->pAputObject = art_quick_aput_obj;
-  qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
-
-  // JNI
-  qpoints->pJniMethodStart = JniMethodStart;
-  qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
-  qpoints->pJniMethodEnd = JniMethodEnd;
-  qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
-  qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
-  qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
-  qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
-
-  // Locks
-  qpoints->pLockObject = art_quick_lock_object;
-  qpoints->pUnlockObject = art_quick_unlock_object;
+  // More math.
+  qpoints->pCos = cos;
+  qpoints->pSin = sin;
+  qpoints->pAcos = acos;
+  qpoints->pAsin = asin;
+  qpoints->pAtan = atan;
+  qpoints->pAtan2 = atan2;
+  qpoints->pCbrt = cbrt;
+  qpoints->pCosh = cosh;
+  qpoints->pExp = exp;
+  qpoints->pExpm1 = expm1;
+  qpoints->pHypot = hypot;
+  qpoints->pLog = log;
+  qpoints->pLog10 = log10;
+  qpoints->pNextAfter = nextafter;
+  qpoints->pSinh = sinh;
+  qpoints->pTan = tan;
+  qpoints->pTanh = tanh;
 
   // Math
   qpoints->pD2l = art_d2l;
@@ -111,38 +98,41 @@
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = art_quick_memcpy;
 
-  // Invocation
-  qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
-  qpoints->pQuickResolutionTrampoline = art_quick_resolution_trampoline;
-  qpoints->pQuickToInterpreterBridge = art_quick_to_interpreter_bridge;
-  qpoints->pInvokeDirectTrampolineWithAccessCheck =
-      art_quick_invoke_direct_trampoline_with_access_check;
-  qpoints->pInvokeInterfaceTrampolineWithAccessCheck =
-      art_quick_invoke_interface_trampoline_with_access_check;
-  qpoints->pInvokeStaticTrampolineWithAccessCheck =
-      art_quick_invoke_static_trampoline_with_access_check;
-  qpoints->pInvokeSuperTrampolineWithAccessCheck =
-      art_quick_invoke_super_trampoline_with_access_check;
-  qpoints->pInvokeVirtualTrampolineWithAccessCheck =
-      art_quick_invoke_virtual_trampoline_with_access_check;
-
-  // Thread
-  qpoints->pTestSuspend = art_quick_test_suspend;
-
-  // Throws
-  qpoints->pDeliverException = art_quick_deliver_exception;
-  qpoints->pThrowArrayBounds = art_quick_throw_array_bounds;
-  qpoints->pThrowDivZero = art_quick_throw_div_zero;
-  qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
-  qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
-  qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
-
-  // Deoptimize
-  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
-
-  // Read barrier
+  // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
+  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
+  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
+  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  qpoints->pReadBarrierMarkReg04 = nullptr;  // Cannot use register 4 (RSP) to pass arguments.
+  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
+  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
+  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
+  qpoints->pReadBarrierMarkReg08 = art_quick_read_barrier_mark_reg08;
+  qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09;
+  qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10;
+  qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11;
+  qpoints->pReadBarrierMarkReg12 = art_quick_read_barrier_mark_reg12;
+  qpoints->pReadBarrierMarkReg13 = art_quick_read_barrier_mark_reg13;
+  qpoints->pReadBarrierMarkReg14 = art_quick_read_barrier_mark_reg14;
+  qpoints->pReadBarrierMarkReg15 = art_quick_read_barrier_mark_reg15;
+  // x86-64 has only 16 core registers.
+  qpoints->pReadBarrierMarkReg16 = nullptr;
+  qpoints->pReadBarrierMarkReg17 = nullptr;
+  qpoints->pReadBarrierMarkReg18 = nullptr;
+  qpoints->pReadBarrierMarkReg19 = nullptr;
+  qpoints->pReadBarrierMarkReg20 = nullptr;
+  qpoints->pReadBarrierMarkReg21 = nullptr;
+  qpoints->pReadBarrierMarkReg22 = nullptr;
+  qpoints->pReadBarrierMarkReg23 = nullptr;
+  qpoints->pReadBarrierMarkReg24 = nullptr;
+  qpoints->pReadBarrierMarkReg25 = nullptr;
+  qpoints->pReadBarrierMarkReg26 = nullptr;
+  qpoints->pReadBarrierMarkReg27 = nullptr;
+  qpoints->pReadBarrierMarkReg28 = nullptr;
+  qpoints->pReadBarrierMarkReg29 = nullptr;
   qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
+  qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow;
 #endif  // __APPLE__
 };
 
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64.h b/runtime/arch/x86_64/instruction_set_features_x86_64.h
index 3280177..0840f89 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64.h
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64.h
@@ -74,8 +74,9 @@
 
  private:
   X86_64InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                               bool has_AVX, bool has_AVX2)
-      : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2) {
+                               bool has_AVX, bool has_AVX2, bool has_POPCNT)
+      : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+                                  has_AVX2, has_POPCNT) {
   }
 
   friend class X86InstructionSetFeatures;
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
index 5171080..f2b2cd8 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
@@ -27,7 +27,7 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
                x86_64_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_64_features->AsBitmap(), 1U);
 }
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 95f0ccb..62808ab 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -42,9 +42,9 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      */
-MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME)
+MACRO0(SETUP_SAVE_ALL_CALLEE_SAVES_FRAME)
 #if defined(__APPLE__)
     int3
     int3
@@ -68,7 +68,7 @@
     movq %xmm14, 24(%rsp)
     movq %xmm15, 32(%rsp)
     // R10 := ArtMethod* for save all callee save frame method.
-    movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    movq RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET(%r10), %r10
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
     // Store rsp as the top quick frame.
@@ -76,17 +76,17 @@
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6 * 8 + 4 * 8 + 8 + 8)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(X86_64) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 6 * 8 + 4 * 8 + 8 + 8)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(X86_64) size not as expected."
 #endif
 #endif  // __APPLE__
 END_MACRO
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly)
      */
-MACRO0(SETUP_REFS_ONLY_CALLEE_SAVE_FRAME)
+MACRO0(SETUP_SAVE_REFS_ONLY_FRAME)
 #if defined(__APPLE__)
     int3
     int3
@@ -110,7 +110,7 @@
     movq %xmm14, 24(%rsp)
     movq %xmm15, 32(%rsp)
     // R10 := ArtMethod* for refs only callee save frame method.
-    movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    movq RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET(%r10), %r10
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
     // Store rsp as the stop quick frame.
@@ -118,13 +118,13 @@
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6 * 8 + 4 * 8 + 8 + 8)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(X86_64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 6 * 8 + 4 * 8 + 8 + 8)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(X86_64) size not as expected."
 #endif
 #endif  // __APPLE__
 END_MACRO
 
-MACRO0(RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME)
+MACRO0(RESTORE_SAVE_REFS_ONLY_FRAME)
     movq 8(%rsp), %xmm12
     movq 16(%rsp), %xmm13
     movq 24(%rsp), %xmm14
@@ -142,9 +142,9 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs)
      */
-MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
+MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME)
 #if defined(__APPLE__)
     int3
     int3
@@ -165,10 +165,10 @@
     PUSH rdx  // Quick arg 2.
     PUSH rcx  // Quick arg 3.
     // Create space for FPR args and create 2 slots for ArtMethod*.
-    subq MACRO_LITERAL(80 + 4 * 8), %rsp
-    CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
+    subq MACRO_LITERAL(16 + 12 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(16 + 12 * 8)
     // R10 := ArtMethod* for ref and args callee save frame method.
-    movq RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10
     // Save FPRs.
     movq %xmm0, 16(%rsp)
     movq %xmm1, 24(%rsp)
@@ -189,13 +189,13 @@
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11 * 8 + 4 * 8 + 80 + 8)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 11 * 8 + 12 * 8 + 16 + 8)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(X86_64) size not as expected."
 #endif
 #endif  // __APPLE__
 END_MACRO
 
-MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI)
+MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI)
     // Save callee and GPR args, mixed together to agree with core spills bitmap.
     PUSH r15  // Callee save.
     PUSH r14  // Callee save.
@@ -230,7 +230,7 @@
     movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
 END_MACRO
 
-MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
+MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME)
     // Restore FPRs.
     movq 16(%rsp), %xmm0
     movq 24(%rsp), %xmm1
@@ -260,13 +260,115 @@
     POP r15
 END_MACRO
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+MACRO0(SETUP_SAVE_EVERYTHING_FRAME)
+#if defined(__APPLE__)
+    int3
+    int3
+#else
+    // Save core registers from highest to lowest to agree with core spills bitmap.
+    PUSH r15
+    PUSH r14
+    PUSH r13
+    PUSH r12
+    PUSH r11
+    PUSH r10
+    PUSH r9
+    PUSH r8
+    PUSH rdi
+    PUSH rsi
+    PUSH rbp
+    PUSH rbx
+    PUSH rdx
+    PUSH rcx
+    PUSH rax
+    // Create space for FPRs and stack alignment padding.
+    subq MACRO_LITERAL(8 + 16 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(8 + 16 * 8)
+    // R10 := Runtime::Current()
+    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
+    movq (%r10), %r10
+    // Save FPRs.
+    movq %xmm0, 8(%rsp)
+    movq %xmm1, 16(%rsp)
+    movq %xmm2, 24(%rsp)
+    movq %xmm3, 32(%rsp)
+    movq %xmm4, 40(%rsp)
+    movq %xmm5, 48(%rsp)
+    movq %xmm6, 56(%rsp)
+    movq %xmm7, 64(%rsp)
+    movq %xmm8, 72(%rsp)
+    movq %xmm9, 80(%rsp)
+    movq %xmm10, 88(%rsp)
+    movq %xmm11, 96(%rsp)
+    movq %xmm12, 104(%rsp)
+    movq %xmm13, 112(%rsp)
+    movq %xmm14, 120(%rsp)
+    movq %xmm15, 128(%rsp)
+    // Push ArtMethod* for save everything frame method.
+    pushq RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET(%r10)
+    CFI_ADJUST_CFA_OFFSET(8)
+    // Store rsp as the top quick frame.
+    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
+
+    // Ugly compile-time check, but we only have the preprocessor.
+    // Last +8: implicit return address pushed on stack when caller made call.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 15 * 8 + 16 * 8 + 16 + 8)
+#error "FRAME_SIZE_SAVE_EVERYTHING(X86_64) size not as expected."
+#endif
+#endif  // __APPLE__
+END_MACRO
+
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
+    // Restore FPRs. Method and padding is still on the stack.
+    movq 16(%rsp), %xmm0
+    movq 24(%rsp), %xmm1
+    movq 32(%rsp), %xmm2
+    movq 40(%rsp), %xmm3
+    movq 48(%rsp), %xmm4
+    movq 56(%rsp), %xmm5
+    movq 64(%rsp), %xmm6
+    movq 72(%rsp), %xmm7
+    movq 80(%rsp), %xmm8
+    movq 88(%rsp), %xmm9
+    movq 96(%rsp), %xmm10
+    movq 104(%rsp), %xmm11
+    movq 112(%rsp), %xmm12
+    movq 120(%rsp), %xmm13
+    movq 128(%rsp), %xmm14
+    movq 136(%rsp), %xmm15
+
+    // Remove save everything callee save method, stack alignment padding and FPRs.
+    addq MACRO_LITERAL(16 + 16 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8))
+    // Restore callee and GPR args, mixed together to agree with core spills bitmap.
+    POP rax
+    POP rcx
+    POP rdx
+    POP rbx
+    POP rbp
+    POP rsi
+    POP rdi
+    POP r8
+    POP r9
+    POP r10
+    POP r11
+    POP r12
+    POP r13
+    POP r14
+    POP r15
+END_MACRO
+
 
     /*
      * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
      * exception is Thread::Current()->exception_.
      */
 MACRO0(DELIVER_PENDING_EXCEPTION)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME         // save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME        // save callee saves for throw
     // (Thread*) setup
     movq %gs:THREAD_SELF_OFFSET, %rdi
     call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*)
@@ -275,30 +377,30 @@
 
 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME   // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
-    call VAR(cxx_name)                 // cxx_name(Thread*)
+    call CALLVAR(cxx_name)             // cxx_name(Thread*)
     UNREACHABLE
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME   // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
-    call VAR(cxx_name)                 // cxx_name(arg1, Thread*)
+    call CALLVAR(cxx_name)             // cxx_name(arg1, Thread*)
     UNREACHABLE
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME   // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    call VAR(cxx_name)                 // cxx_name(Thread*)
+    call CALLVAR(cxx_name)             // cxx_name(Thread*)
     UNREACHABLE
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -309,6 +411,11 @@
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
@@ -336,6 +443,12 @@
 TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
+     */
+TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_string_bounds, artThrowStringBoundsFromCode
+
+    /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
      * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
      * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
@@ -353,18 +466,18 @@
      * Adapted from x86 code.
      */
 MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME  // save callee saves in case allocation triggers GC
     // Helper signature is always
     // (method_idx, *this_object, *caller_method, *self, sp)
 
     movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread
     movq %rsp, %rcx                                        // pass SP
 
-    call VAR(cxx_name)                                     // cxx_name(arg1, arg2, Thread*, SP)
+    call CALLVAR(cxx_name)                                 // cxx_name(arg1, arg2, Thread*, SP)
                                                            // save the code pointer
     movq %rax, %rdi
     movq %rdx, %rax
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     testq %rdi, %rdi
     jz 1f
@@ -691,57 +804,46 @@
 #endif  // __APPLE__
 END_FUNCTION art_quick_do_long_jump
 
-MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
-    // Outgoing argument set up
-    movq %gs:THREAD_SELF_OFFSET, %rdi    // pass Thread::Current()
-    call VAR(cxx_name)                   // cxx_name(Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    CALL_MACRO(return_macro)             // return or deliver exception
-    END_FUNCTION VAR(c_name)
-END_MACRO
-
 MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rsi    // pass Thread::Current()
-    call VAR(cxx_name)                   // cxx_name(arg0, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    call CALLVAR(cxx_name)               // cxx_name(arg0, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
     CALL_MACRO(return_macro)             // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
-    call VAR(cxx_name)                   // cxx_name(arg0, arg1, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    call CALLVAR(cxx_name)               // cxx_name(arg0, arg1, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
     CALL_MACRO(return_macro)             // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
-    call VAR(cxx_name)                  // cxx_name(arg0, arg1, arg2, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
     CALL_MACRO(return_macro)            // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
-    call VAR(cxx_name)                  // cxx_name(arg1, arg2, arg3, arg4, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    call CALLVAR(cxx_name)              // cxx_name(arg1, arg2, arg3, arg4, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
     CALL_MACRO(return_macro)            // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -749,11 +851,11 @@
 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
     movq 8(%rsp), %rsi                  // pass referrer
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
                                         // arg0 is in rdi
     movq %gs:THREAD_SELF_OFFSET, %rdx   // pass Thread::Current()
-    call VAR(cxx_name)                  // cxx_name(arg0, referrer, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    call CALLVAR(cxx_name)              // cxx_name(arg0, referrer, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
     CALL_MACRO(return_macro)
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -761,11 +863,11 @@
 MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
     movq 8(%rsp), %rdx                  // pass referrer
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
                                         // arg0 and arg1 are in rdi/rsi
     movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
-    call VAR(cxx_name)                  // (arg0, arg1, referrer, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    call CALLVAR(cxx_name)              // (arg0, arg1, referrer, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
     CALL_MACRO(return_macro)
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -773,11 +875,11 @@
 MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
     movq 8(%rsp), %rcx                  // pass referrer
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
                                         // arg0, arg1, and arg2 are in rdi/rsi/rdx
     movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
-    call VAR(cxx_name)                  // cxx_name(arg0, arg1, arg2, referrer, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, referrer, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
     CALL_MACRO(return_macro)            // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -808,59 +910,472 @@
 END_MACRO
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
-// A handle-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
+// Comment out allocators that have x86_64 specific asm.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+DEFINE_FUNCTION art_quick_alloc_object_rosalloc
+    // Fast path rosalloc allocation.
+    // RDI: type_idx, RSI: ArtMethod*, RAX: return value
+    // RDX, RCX, R8, R9: free.
+    movq   ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
+                                                             // Load the class (edx)
+    movl   0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx
+    testl  %edx, %edx                                      // Check null class
+    jz     .Lart_quick_alloc_object_rosalloc_slow_path
+                                                           // Check class status.
+    cmpl   LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
+    jne    .Lart_quick_alloc_object_rosalloc_slow_path
+                                                           // We don't need a fence (between the
+                                                           // the status and the access flag
+                                                           // loads) here because every load is
+                                                           // a load acquire on x86.
+                                                           // Check access flags has
+                                                           // kAccClassIsFinalizable
+    testl  LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
+    jnz    .Lart_quick_alloc_object_rosalloc_slow_path
+                                                           // Check if the thread local
+                                                           // allocation stack has room.
+    movq   %gs:THREAD_SELF_OFFSET, %r8                     // r8 = thread
+    movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx  // rcx = alloc stack top.
+    cmpq   THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
+    jae    .Lart_quick_alloc_object_rosalloc_slow_path
+                                                           // Load the object size
+    movl   MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %eax
+                                                           // Check if the size is for a thread
+                                                           // local allocation
+    cmpl   LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
+    ja     .Lart_quick_alloc_object_rosalloc_slow_path
+                                                           // Compute the rosalloc bracket index
+                                                           // from the size.
+                                                           // Align up the size by the rosalloc
+                                                           // bracket quantum size and divide
+                                                           // by the quantum size and subtract
+                                                           // by 1. This code is a shorter but
+                                                           // equivalent version.
+    subq   LITERAL(1), %rax
+    shrq   LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
+                                                           // Load the rosalloc run (r9)
+    movq   THREAD_ROSALLOC_RUNS_OFFSET(%r8, %rax, __SIZEOF_POINTER__), %r9
+                                                           // Load the free list head (rax). This
+                                                           // will be the return val.
+    movq   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
+    testq  %rax, %rax
+    jz     .Lart_quick_alloc_object_rosalloc_slow_path
+    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
+                                                           // Push the new object onto the thread
+                                                           // local allocation stack and
+                                                           // increment the thread local
+                                                           // allocation stack top.
+    movl   %eax, (%rcx)
+    addq   LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx
+    movq   %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8)
+                                                           // Load the next pointer of the head
+                                                           // and update the list head with the
+                                                           // next pointer.
+    movq   ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx
+    movq   %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9)
+                                                           // Store the class pointer in the
+                                                           // header. This also overwrites the
+                                                           // next pointer. The offsets are
+                                                           // asserted to match.
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+    POISON_HEAP_REF edx
+    movl   %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
+                                                           // Decrement the size of the free list
+    decl   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
+                                                           // No fence necessary for x86.
+    ret
+.Lart_quick_alloc_object_rosalloc_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread::Current()
+    call SYMBOL(artAllocObjectFromCodeRosAlloc)            // cxx_name(arg0, arg1, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
+END_FUNCTION art_quick_alloc_object_rosalloc
+
+// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+//
+// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value.
+// RCX: scratch, r8: Thread::Current().
+MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel)
+    testl %edx, %edx                                       // Check null class
+    jz   RAW_VAR(slowPathLabel)
+    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel))
+END_MACRO
+
+// The common fast path code for art_quick_alloc_object_resolved_region_tlab.
+//
+// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value.
+// RCX: scratch, r8: Thread::Current().
+MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel)
+                                                           // Check class status.
+    cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
+    jne  RAW_VAR(slowPathLabel)
+                                                           // No fake dependence needed on x86
+                                                           // between status and flags load,
+                                                           // since each load is a load-acquire,
+                                                           // no loads reordering.
+                                                           // Check access flags has
+                                                           // kAccClassIsFinalizable
+    testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
+    jnz  RAW_VAR(slowPathLabel)
+    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel))
+END_MACRO
+
+// The fast path code for art_quick_alloc_object_initialized_region_tlab.
+//
+// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value.
+// RCX: scratch, r8: Thread::Current().
+MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel)
+    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
+    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx           // Load the object size.
+    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax
+    leaq OBJECT_ALIGNMENT_MASK(%rax, %rcx), %rcx               // Add size to pos, note that these
+                                                               // are both 32 bit ints, overflow
+                                                               // will cause the add to be past the
+                                                               // end of the thread local region.
+                                                               // Also sneak in alignment mask add.
+    andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %rcx        // Align the size by 8. (addr + 7) &
+                                                               // ~7.
+    cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
+    ja   RAW_VAR(slowPathLabel)
+    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
+    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8)          // Increase thread_local_objects.
+                                                               // Store the class pointer in the
+                                                               // header.
+                                                               // No fence needed for x86.
+    POISON_HEAP_REF edx
+    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
+    ret                                                        // Fast path succeeded.
+END_MACRO
+
+// The fast path code for art_quick_alloc_array_region_tlab.
+// Inputs: RDI: uint32_t type_idx, RSI: int32_t component_count, RDX: ArtMethod* method
+// Temps: RCX: the class, r8, r9
+// Output: RAX: return value.
+MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED, slowPathLabel)
+    movq %rcx, %r8                                             // Save class for later
+    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rcx), %ecx        // Load component type.
+    UNPOISON_HEAP_REF ecx
+    movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%rcx), %ecx // Load primitive type.
+    shrq LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %rcx        // Get component size shift.
+    movq %rsi, %r9
+    salq %cl, %r9                                              // Calculate array count shifted.
+    // Add array header + alignment rounding.
+    addq LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
+    // Add 4 extra bytes if we are doing a long array.
+    addq LITERAL(1), %rcx
+    andq LITERAL(4), %rcx
+    addq %rcx, %r9
+    movq %gs:THREAD_SELF_OFFSET, %rcx                          // rcx = thread
+#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
+#error Long array data offset must be 4 greater than int array data offset.
+#endif
+    // Mask out the unaligned part to make sure we are 8 byte aligned.
+    andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %r9
+    movq THREAD_LOCAL_POS_OFFSET(%rcx), %rax
+    addq %rax, %r9
+    cmpq THREAD_LOCAL_END_OFFSET(%rcx), %r9                    // Check if it fits.
+    ja   RAW_VAR(slowPathLabel)
+    movq %r9, THREAD_LOCAL_POS_OFFSET(%rcx)                    // Update thread_local_pos.
+    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%rcx)         // Increase thread_local_objects.
+                                                               // Store the class pointer in the
+                                                               // header.
+                                                               // No fence needed for x86.
+    POISON_HEAP_REF r8d
+    movl %r8d, MIRROR_OBJECT_CLASS_OFFSET(%rax)
+    movl %esi, MIRROR_ARRAY_LENGTH_OFFSET(%rax)
+    ret                                                        // Fast path succeeded.
+END_MACRO
+
+// The common slow path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
+    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread::Current()
+    call CALLVAR(cxx_name)                                 // cxx_name(arg0, arg1, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
+END_MACRO
+
+// The slow path code for art_quick_alloc_array_region_tlab.
+MACRO1(ALLOC_ARRAY_TLAB_SLOW_PATH, cxx_name)
+    SETUP_SAVE_REFS_ONLY_FRAME                                 // save ref containing registers for GC
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %rcx                          // pass Thread::Current()
+    call CALLVAR(cxx_name)                                     // cxx_name(arg0, arg1, arg2, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME                               // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
+END_MACRO
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
 DEFINE_FUNCTION art_quick_alloc_object_tlab
     // Fast path tlab allocation.
     // RDI: uint32_t type_idx, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
-    // TODO: Add read barrier when this function is used.
-    // Note this function can/should implement read barrier fast path only
-    // (no read barrier slow path) because this is the fast path of tlab allocation.
-    // We can fall back to the allocation slow path to do the read barrier slow path.
 #if defined(USE_READ_BARRIER)
     int3
     int3
 #endif
     // Might need a special macro since rsi and edx is 32b/64b mismatched.
     movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
-    // TODO: Add read barrier when this function is used.
     // Might need to break down into multiple instructions to get the base address in a register.
                                                                // Load the class
     movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx
-    testl %edx, %edx                                           // Check null class
-    jz   .Lart_quick_alloc_object_tlab_slow_path
-                                                               // Check class status.
-    cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
-    jne  .Lart_quick_alloc_object_tlab_slow_path
-                                                               // Check access flags has kAccClassIsFinalizable
-    testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
-    jnz  .Lart_quick_alloc_object_tlab_slow_path
-    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx           // Load the object size.
-    addl LITERAL(OBJECT_ALIGNMENT_MASK), %ecx                  // Align the size by 8. (addr + 7) & ~7.
-    andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %ecx
-    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
-    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax                    // Load thread_local_pos.
-    addq %rax, %rcx                                            // Add the object size.
-    cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
-    ja   .Lart_quick_alloc_object_tlab_slow_path
-    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
-    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8)          // Increment thread_local_objects.
-                                                               // Store the class pointer in the header.
-                                                               // No fence needed for x86.
-    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
-    ret                                                        // Fast path succeeded.
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
 .Lart_quick_alloc_object_tlab_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME                          // save ref containing registers for GC
-    // Outgoing argument set up
-    movq %gs:THREAD_SELF_OFFSET, %rdx                          // pass Thread::Current()
-    call SYMBOL(artAllocObjectFromCodeTLAB)                    // cxx_name(arg0, arg1, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME                        // restore frame up to return address
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB
 END_FUNCTION art_quick_alloc_object_tlab
 
-ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB).
+DEFINE_FUNCTION art_quick_alloc_array_region_tlab
+    // Fast path region tlab allocation.
+    // RDI: uint32_t type_idx, RSI: int32_t component_count, RDX: ArtMethod*
+    // RCX: klass, R8, R9: free. RAX: return val.
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rdx), %rcx      // Load dex cache resolved types array
+    movl 0(%rcx, %rdi, COMPRESSED_REFERENCE_SIZE), %ecx        // Load the class
+    // Null check so that we can load the lock word.
+    testl %ecx, %ecx
+    jz .Lart_quick_alloc_array_region_tlab_slow_path
+
+    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_marking
+.Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_region_tlab_slow_path
+.Lart_quick_alloc_array_region_tlab_class_load_read_barrier_marking:
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
+    jnz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH rdi
+    PUSH rsi
+    PUSH rdx
+    // Outgoing argument set up
+    movq %rcx, %rdi                                            // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
+    movq %rax, %rcx
+    POP rdx
+    POP rsi
+    POP rdi
+    jmp .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_array_region_tlab_slow_path:
+    ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeRegionTLAB
+END_FUNCTION art_quick_alloc_array_region_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB).
+DEFINE_FUNCTION art_quick_alloc_array_resolved_region_tlab
+    // Fast path region tlab allocation.
+    // RDI: mirror::Class* klass, RSI: int32_t component_count, RDX: ArtMethod*
+    // RCX: mirror::Class* klass, R8, R9: free. RAX: return val.
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    movq %rdi, %rcx
+    // Already resolved, no null check.
+    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_marking
+.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_resolved_region_tlab_slow_path
+.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_marking:
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
+    jnz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH rdi
+    PUSH rsi
+    PUSH rdx
+    // Outgoing argument set up
+    movq %rcx, %rdi                                            // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
+    movq %rax, %rcx
+    POP rdx
+    POP rsi
+    POP rdi
+    jmp .Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_array_resolved_region_tlab_slow_path:
+    ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedRegionTLAB
+END_FUNCTION art_quick_alloc_array_resolved_region_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB).
+DEFINE_FUNCTION art_quick_alloc_object_region_tlab
+    // Fast path region tlab allocation.
+    // RDI: uint32_t type_idx, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
+    movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx    // Load the class
+    // Null check so that we can load the lock word.
+    testl %edx, %edx
+    jz .Lart_quick_alloc_object_region_tlab_slow_path
+    // Test if the GC is marking.
+    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking:
+    // Check the mark bit, if it is 1 avoid the read barrier.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+    jnz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH rdi
+    PUSH rsi
+    subq LITERAL(8), %rsp // 16 byte alignment
+    // Outgoing argument set up
+    movq %rdx, %rdi                                            // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
+    movq %rax, %rdx
+    addq LITERAL(8), %rsp
+    POP rsi
+    POP rdi
+    jmp .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB
+END_FUNCTION art_quick_alloc_object_region_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB).
+DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab
+    // Fast path region tlab allocation.
+    // RDI: mirror::Class* klass, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    movq %rdi, %rdx
+    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_marking
+.Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
+.Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_marking:
+    // Check the mark bit, if it is 1 avoid the read barrier.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+    jnz .Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH rdi
+    PUSH rsi
+    subq LITERAL(8), %rsp // 16 byte alignment
+    // Outgoing argument set up
+    movq %rdx, %rdi                                            // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
+    movq %rax, %rdx
+    addq LITERAL(8), %rsp
+    POP rsi
+    POP rdi
+    jmp .Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_resolved_region_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB
+END_FUNCTION art_quick_alloc_object_resolved_region_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB).
+DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab
+    // Fast path region tlab allocation.
+    // RDI: mirror::Class* klass, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    // Might need a special macro since rsi and edx is 32b/64b mismatched.
+    movq %rdi, %rdx
+    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_marking
+.Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
+.Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_marking:
+    // Check the mark bit, if it is 1 avoid the read barrier.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+    jnz .Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path
+.Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH rdi
+    PUSH rsi
+    subq LITERAL(8), %rsp // 16 byte alignment
+    // Outgoing argument set up
+    movq %rdx, %rdi                                            // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
+    movq %rax, %rdx
+    addq LITERAL(8), %rsp
+    POP rsi
+    POP rdi
+    jmp .Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_initialized_region_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB
+END_FUNCTION art_quick_alloc_object_initialized_region_tlab
+
+DEFINE_FUNCTION art_quick_resolve_string
+    movq 8(%rsp), %rcx                                         // get referrer
+    movl ART_METHOD_DECLARING_CLASS_OFFSET(%rcx), %ecx         // get declaring class
+    movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %rcx  // get string dex cache
+    movq LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %rdx
+    andq %rdi, %rdx
+    shlq LITERAL(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT), %rdx
+    addq %rcx, %rdx
+    movq %rax, %rcx
+    movq (%rdx), %rdx
+    movq %rdx, %rax
+    movl %eax, %eax
+    shrq LITERAL(32), %rdx
+    cmp %rdx, %rdi
+    jne .Lart_quick_resolve_string_slow_path
+#ifdef USE_READ_BARRIER
+    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_resolve_string_marking
+#endif
+    ret
+// Slow path, the index did not match
+.Lart_quick_resolve_string_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME
+    movq %rcx, %rax
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()
+    call SYMBOL(artResolveStringFromCode)       // artResolveStringFromCode(arg0, referrer, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME                // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+// GC is marking case, need to check the mark bit.
+.Lart_quick_resolve_string_marking:
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%rax)
+    jnz .Lart_quick_resolve_string_no_rb
+    // Save LR so that we can return, also x1 for alignment purposes
+    PUSH rdi
+    PUSH rsi
+    subq LITERAL(8), %rsp                         // 16 byte alignment
+    movq %rax, %rdi
+    call SYMBOL(artReadBarrierMark)
+    addq LITERAL(8), %rsp
+    POP  rsi
+    POP  rdi
+.Lart_quick_resolve_string_no_rb:
+    ret
+END_FUNCTION art_quick_resolve_string
+
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
@@ -875,7 +1390,7 @@
     test LITERAL(LOCK_WORD_STATE_MASK), %ecx         // Test the 2 high bits.
     jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
     movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
     test %ecx, %ecx
     jnz  .Lalready_thin                   // Lock word contains a thin lock.
     // unlocked case - edx: original lock word, edi: obj.
@@ -890,9 +1405,9 @@
     cmpw %cx, %dx                         // do we hold the lock already?
     jne  .Lslow_lock
     movl %edx, %ecx                       // copy the lock word to check count overflow.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
     addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count
-    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if either of the upper two bits (28-29) are set
+    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if the upper bit (28) is set
     jne  .Lslow_lock                      // count overflowed so go slow
     movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
     addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx   // increment recursion count again for real.
@@ -901,13 +1416,21 @@
     jnz  .Lretry_lock                     // cmpxchg failed retry
     ret
 .Lslow_lock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
     call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object
 
+DEFINE_FUNCTION art_quick_lock_object_no_inline
+    SETUP_SAVE_REFS_ONLY_FRAME
+    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
+    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
+    RETURN_IF_EAX_ZERO
+END_FUNCTION art_quick_lock_object_no_inline
+
 DEFINE_FUNCTION art_quick_unlock_object
     testl %edi, %edi                      // null check object/edi
     jz   .Lslow_unlock
@@ -919,12 +1442,12 @@
     cmpw %cx, %dx                         // does the thread id match?
     jne  .Lslow_unlock
     movl %ecx, %edx                       // copy the lock word to detect new count of 0.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx  // zero the gc bits.
     cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
     jae  .Lrecursive_thin_unlock
     // update lockword, cmpxchg necessary for read barrier bits.
     movl %ecx, %eax                       // eax: old lock word.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // ecx: new lock word zero except original rb bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx  // ecx: new lock word zero except original gc bits.
 #ifndef USE_READ_BARRIER
     movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
 #else
@@ -944,13 +1467,21 @@
 #endif
     ret
 .Lslow_unlock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
     call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object
 
+DEFINE_FUNCTION art_quick_unlock_object_no_inline
+    SETUP_SAVE_REFS_ONLY_FRAME
+    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
+    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
+    RETURN_IF_EAX_ZERO
+END_FUNCTION art_quick_unlock_object_no_inline
+
 DEFINE_FUNCTION art_quick_check_cast
     PUSH rdi                          // Save args for exc
     PUSH rsi
@@ -973,7 +1504,7 @@
     CFI_ADJUST_CFA_OFFSET(-8)
     POP rsi                           // Pop arguments
     POP rdi
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
     call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*)
     UNREACHABLE
@@ -1150,7 +1681,7 @@
     POP  rsi
     POP  rdi
 
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // Save all registers as basis for long jump context.
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // Save all registers as basis for long jump context.
 
     // Outgoing argument set up.
     movq %rdx, %rsi                         // Pass arg 2 = value.
@@ -1166,7 +1697,14 @@
     ret
 END_FUNCTION art_quick_memcpy
 
-NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
+DEFINE_FUNCTION art_quick_test_suspend
+    SETUP_SAVE_EVERYTHING_FRAME                 // save everything for GC
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %rdi           // pass Thread::Current()
+    call SYMBOL(artTestSuspendFromCode)         // (Thread*)
+    RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
+    ret
+END_FUNCTION art_quick_test_suspend
 
 UNIMPLEMENTED art_quick_ldiv
 UNIMPLEMENTED art_quick_lmod
@@ -1206,48 +1744,71 @@
 DEFINE_FUNCTION art_quick_set64_static
                                          // new_val is already in %rdx
     movq 8(%rsp), %rsi                   // pass referrer
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
                                          // field_idx is in rdi
     movq %gs:THREAD_SELF_OFFSET, %rcx    // pass Thread::Current()
     call SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
     RETURN_IF_EAX_ZERO                   // return or deliver exception
 END_FUNCTION art_quick_set64_static
 
 
 DEFINE_FUNCTION art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
 
     movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
     movq %rsp, %rcx                         // Pass SP.
     call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     movq %rax, %xmm0                        // Copy return value in case of float returns.
     RETURN_OR_DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_proxy_invoke_handler
 
     /*
      * Called to resolve an imt conflict.
-     * rax is a hidden argument that holds the target method's dex method index.
+     * rdi is the conflict ArtMethod.
+     * rax is a hidden argument that holds the target interface method's dex method index.
+     *
+     * Note that this stub writes to r10 and rdi.
      */
 DEFINE_FUNCTION art_quick_imt_conflict_trampoline
 #if defined(__APPLE__)
     int3
     int3
 #else
-    movq %rax, %rdi
+    movq __SIZEOF_POINTER__(%rsp), %r10 // Load referrer
+    movq ART_METHOD_DEX_CACHE_METHODS_OFFSET_64(%r10), %r10   // Load dex cache methods array
+    movq 0(%r10, %rax, __SIZEOF_POINTER__), %r10 // Load interface method
+    movq ART_METHOD_JNI_OFFSET_64(%rdi), %rdi  // Load ImtConflictTable
+.Limt_table_iterate:
+    cmpq %r10, 0(%rdi)
+    jne .Limt_table_next_entry
+    // We successfully hit an entry in the table. Load the target method
+    // and jump to it.
+    movq __SIZEOF_POINTER__(%rdi), %rdi
+    jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi)
+.Limt_table_next_entry:
+    // If the entry is null, the interface method is not in the ImtConflictTable.
+    cmpq LITERAL(0), 0(%rdi)
+    jz .Lconflict_trampoline
+    // Iterate over the entries of the ImtConflictTable.
+    addq LITERAL(2 * __SIZEOF_POINTER__), %rdi
+    jmp .Limt_table_iterate
+.Lconflict_trampoline:
+    // Call the runtime stub to populate the ImtConflictTable and jump to the
+    // resolved method.
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 #endif  // __APPLE__
 END_FUNCTION art_quick_imt_conflict_trampoline
 
 DEFINE_FUNCTION art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     movq %gs:THREAD_SELF_OFFSET, %rdx
     movq %rsp, %rcx
     call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
     movq %rax, %r10               // Remember returned code pointer in R10.
     movq (%rsp), %rdi             // Load called method into RDI.
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     testq %r10, %r10              // If code pointer is null goto deliver pending exception.
     jz 1f
     jmp *%r10                     // Tail call into method.
@@ -1332,7 +1893,7 @@
      * Called to do a generic JNI down-call
      */
 DEFINE_FUNCTION art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
 
     movq %rsp, %rbp                 // save SP at (old) callee-save frame
     CFI_DEF_CFA_REGISTER(rbp)
@@ -1465,11 +2026,11 @@
      * RSI, RDX, RCX, R8, R9 are arguments to that method.
      */
 DEFINE_FUNCTION art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
-    movq %gs:THREAD_SELF_OFFSET, %rsi      // RSI := Thread::Current()
-    movq %rsp, %rdx                        // RDX := sp
+    SETUP_SAVE_REFS_AND_ARGS_FRAME     // Set up frame and save arguments.
+    movq %gs:THREAD_SELF_OFFSET, %rsi  // RSI := Thread::Current()
+    movq %rsp, %rdx                    // RDX := sp
     call SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME   // TODO: no need to restore arguments in this case.
     movq %rax, %xmm0                   // Place return value also into floating point return value.
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_to_interpreter_bridge
@@ -1482,12 +2043,12 @@
     int3
     int3
 #else
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
 
     movq %rdi, %r12               // Preserve method pointer in a callee-save.
 
     movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
-    movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp), %rcx   // Pass return PC.
+    movq FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp), %rcx   // Pass return PC.
 
     call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
 
@@ -1495,9 +2056,9 @@
     movq %r12, %rdi               // Reload method pointer.
 
     leaq art_quick_instrumentation_exit(%rip), %r12   // Set up return through instrumentation
-    movq %r12, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp) // exit.
+    movq %r12, FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp)  // exit.
 
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     jmp *%rax                     // Tail call to intended method.
 #endif  // __APPLE__
@@ -1506,7 +2067,7 @@
 DEFINE_FUNCTION art_quick_instrumentation_exit
     pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
 
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
 
     // We need to save rax and xmm0. We could use a callee-save from SETUP_REF_ONLY, but then
     // we would need to fully restore it. As there are a good number of callee-save registers, it
@@ -1533,7 +2094,7 @@
     CFI_ADJUST_CFA_OFFSET(-8)
     POP rax                   // Restore integer result.
 
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
 
     addq LITERAL(8), %rsp     // Drop fake return pc.
 
@@ -1547,7 +2108,7 @@
 DEFINE_FUNCTION art_quick_deoptimize
     pushq %rsi                         // Entry point for a jump. Fake that we were called.
                                        // Use hidden arg.
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
                                        // Stack should be aligned now.
     movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
     call SYMBOL(artDeoptimize)         // artDeoptimize(Thread*)
@@ -1559,7 +2120,7 @@
      * will long jump to the interpreter bridge.
      */
 DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
                                                 // Stack should be aligned now.
     movq %gs:THREAD_SELF_OFFSET, %rdi           // Pass Thread.
     call SYMBOL(artDeoptimizeFromCompiledCode)  // artDeoptimizeFromCompiledCode(Thread*)
@@ -1629,6 +2190,119 @@
     UNREACHABLE
 END_FUNCTION art_nested_signal_return
 
+// Create a function `name` calling the ReadBarrier::Mark routine,
+// getting its argument and returning its result through register
+// `reg`, saving and restoring all caller-save registers.
+//
+// The generated function follows a non-standard runtime calling
+// convention:
+// - register `reg` (which may be different from RDI) is used to pass
+//   the (sole) argument of this function;
+// - register `reg` (which may be different from RAX) is used to return
+//   the result of this function (instead of RAX);
+// - if `reg` is different from `rdi`, RDI is treated like a normal
+//   (non-argument) caller-save register;
+// - if `reg` is different from `rax`, RAX is treated like a normal
+//   (non-result) caller-save register;
+// - everything else is the same as in the standard runtime calling
+//   convention (e.g. standard callee-save registers are preserved).
+MACRO2(READ_BARRIER_MARK_REG, name, reg)
+    DEFINE_FUNCTION VAR(name)
+    // Null check so that we can load the lock word.
+    testq REG_VAR(reg), REG_VAR(reg)
+    jz .Lret_rb_\name
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
+    jz .Lslow_rb_\name
+    ret
+.Lslow_rb_\name:
+    // Save all potentially live caller-save core registers.
+    PUSH rax
+    PUSH rcx
+    PUSH rdx
+    PUSH rsi
+    PUSH rdi
+    PUSH r8
+    PUSH r9
+    PUSH r10
+    PUSH r11
+    // Create space for caller-save floating-point registers.
+    subq MACRO_LITERAL(12 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(12 * 8)
+    // Save all potentially live caller-save floating-point registers.
+    movq %xmm0, 0(%rsp)
+    movq %xmm1, 8(%rsp)
+    movq %xmm2, 16(%rsp)
+    movq %xmm3, 24(%rsp)
+    movq %xmm4, 32(%rsp)
+    movq %xmm5, 40(%rsp)
+    movq %xmm6, 48(%rsp)
+    movq %xmm7, 56(%rsp)
+    movq %xmm8, 64(%rsp)
+    movq %xmm9, 72(%rsp)
+    movq %xmm10, 80(%rsp)
+    movq %xmm11, 88(%rsp)
+    SETUP_FP_CALLEE_SAVE_FRAME
+
+    .ifnc RAW_VAR(reg), rdi
+      movq REG_VAR(reg), %rdi       // Pass arg1 - obj from `reg`.
+    .endif
+    call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
+    .ifnc RAW_VAR(reg), rax
+      movq %rax, REG_VAR(reg)       // Return result into `reg`.
+    .endif
+
+    RESTORE_FP_CALLEE_SAVE_FRAME
+    // Restore floating-point registers.
+    movq 0(%rsp), %xmm0
+    movq 8(%rsp), %xmm1
+    movq 16(%rsp), %xmm2
+    movq 24(%rsp), %xmm3
+    movq 32(%rsp), %xmm4
+    movq 40(%rsp), %xmm5
+    movq 48(%rsp), %xmm6
+    movq 56(%rsp), %xmm7
+    movq 64(%rsp), %xmm8
+    movq 72(%rsp), %xmm9
+    movq 80(%rsp), %xmm10
+    movq 88(%rsp), %xmm11
+    // Remove floating-point registers.
+    addq MACRO_LITERAL(12 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-(12 * 8))
+    // Restore core regs, except `reg`, as it is used to return the
+    // result of this function (simply remove it from the stack instead).
+    POP_REG_NE r11, RAW_VAR(reg)
+    POP_REG_NE r10, RAW_VAR(reg)
+    POP_REG_NE r9, RAW_VAR(reg)
+    POP_REG_NE r8, RAW_VAR(reg)
+    POP_REG_NE rdi, RAW_VAR(reg)
+    POP_REG_NE rsi, RAW_VAR(reg)
+    POP_REG_NE rdx, RAW_VAR(reg)
+    POP_REG_NE rcx, RAW_VAR(reg)
+    POP_REG_NE rax, RAW_VAR(reg)
+.Lret_rb_\name:
+    ret
+    END_FUNCTION VAR(name)
+END_MACRO
+
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx
+// Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (RSP)
+// cannot be used to pass arguments.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, r12
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, r13
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, r14
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, r15
+
 DEFINE_FUNCTION art_quick_read_barrier_slow
     SETUP_FP_CALLEE_SAVE_FRAME
     subq LITERAL(8), %rsp           // Alignment padding.
@@ -1639,3 +2313,75 @@
     RESTORE_FP_CALLEE_SAVE_FRAME
     ret
 END_FUNCTION art_quick_read_barrier_slow
+
+DEFINE_FUNCTION art_quick_read_barrier_for_root_slow
+    SETUP_FP_CALLEE_SAVE_FRAME
+    subq LITERAL(8), %rsp                  // Alignment padding.
+    CFI_ADJUST_CFA_OFFSET(8)
+    call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root)
+    addq LITERAL(8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-8)
+    RESTORE_FP_CALLEE_SAVE_FRAME
+    ret
+END_FUNCTION art_quick_read_barrier_for_root_slow
+
+    /*
+     * On stack replacement stub.
+     * On entry:
+     *   [sp] = return address
+     *   rdi = stack to copy
+     *   rsi = size of stack
+     *   rdx = pc to call
+     *   rcx = JValue* result
+     *   r8 = shorty
+     *   r9 = thread
+     *
+     * Note that the native C ABI already aligned the stack to 16-byte.
+     */
+DEFINE_FUNCTION art_quick_osr_stub
+    // Save the non-volatiles.
+    PUSH rbp                      // Save rbp.
+    PUSH rcx                      // Save rcx/result*.
+    PUSH r8                       // Save r8/shorty*.
+
+    // Save callee saves.
+    PUSH rbx
+    PUSH r12
+    PUSH r13
+    PUSH r14
+    PUSH r15
+
+    pushq LITERAL(0)              // Push null for ArtMethod*.
+    movl %esi, %ecx               // rcx := size of stack
+    movq %rdi, %rsi               // rsi := stack to copy
+    call .Losr_entry
+
+    // Restore stack and callee-saves.
+    addq LITERAL(8), %rsp
+    POP r15
+    POP r14
+    POP r13
+    POP r12
+    POP rbx
+    POP r8
+    POP rcx
+    POP rbp
+    cmpb LITERAL(68), (%r8)        // Test if result type char == 'D'.
+    je .Losr_return_double_quick
+    cmpb LITERAL(70), (%r8)        // Test if result type char == 'F'.
+    je .Losr_return_float_quick
+    movq %rax, (%rcx)              // Store the result assuming its a long, int or Object*
+    ret
+.Losr_return_double_quick:
+    movsd %xmm0, (%rcx)            // Store the double floating point result.
+    ret
+.Losr_return_float_quick:
+    movss %xmm0, (%rcx)            // Store the floating point result.
+    ret
+.Losr_entry:
+    subl LITERAL(8), %ecx         // Given stack size contains pushed frame pointer, substract it.
+    subq %rcx, %rsp
+    movq %rsp, %rdi               // rdi := beginning of stack
+    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
+    jmp *%rdx
+END_FUNCTION art_quick_osr_stub
diff --git a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
index 72d7e99..867522f 100644
--- a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
+++ b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
@@ -25,12 +25,19 @@
 namespace art {
 namespace x86_64 {
 
+static constexpr uint32_t kX86_64CalleeSaveAlwaysSpills =
+    (1 << art::x86_64::kNumberOfCpuRegisters);  // Fake return address callee save.
 static constexpr uint32_t kX86_64CalleeSaveRefSpills =
     (1 << art::x86_64::RBX) | (1 << art::x86_64::RBP) | (1 << art::x86_64::R12) |
     (1 << art::x86_64::R13) | (1 << art::x86_64::R14) | (1 << art::x86_64::R15);
 static constexpr uint32_t kX86_64CalleeSaveArgSpills =
     (1 << art::x86_64::RSI) | (1 << art::x86_64::RDX) | (1 << art::x86_64::RCX) |
     (1 << art::x86_64::R8) | (1 << art::x86_64::R9);
+static constexpr uint32_t kX86_64CalleeSaveEverythingSpills =
+    (1 << art::x86_64::RAX) | (1 << art::x86_64::RCX) | (1 << art::x86_64::RDX) |
+    (1 << art::x86_64::RSI) | (1 << art::x86_64::RDI) | (1 << art::x86_64::R8) |
+    (1 << art::x86_64::R9) | (1 << art::x86_64::R10) | (1 << art::x86_64::R11);
+
 static constexpr uint32_t kX86_64CalleeSaveFpArgSpills =
     (1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) | (1 << art::x86_64::XMM2) |
     (1 << art::x86_64::XMM3) | (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) |
@@ -38,22 +45,30 @@
 static constexpr uint32_t kX86_64CalleeSaveFpSpills =
     (1 << art::x86_64::XMM12) | (1 << art::x86_64::XMM13) |
     (1 << art::x86_64::XMM14) | (1 << art::x86_64::XMM15);
+static constexpr uint32_t kX86_64CalleeSaveFpEverythingSpills =
+    (1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) |
+    (1 << art::x86_64::XMM2) | (1 << art::x86_64::XMM3) |
+    (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) |
+    (1 << art::x86_64::XMM6) | (1 << art::x86_64::XMM7) |
+    (1 << art::x86_64::XMM8) | (1 << art::x86_64::XMM9) |
+    (1 << art::x86_64::XMM10) | (1 << art::x86_64::XMM11);
 
 constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
-  return kX86_64CalleeSaveRefSpills |
-      (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveArgSpills : 0) |
-      (1 << art::x86_64::kNumberOfCpuRegisters);  // fake return address callee save;
+  return kX86_64CalleeSaveAlwaysSpills | kX86_64CalleeSaveRefSpills |
+      (type == Runtime::kSaveRefsAndArgs ? kX86_64CalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveEverything ? kX86_64CalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t X86_64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
   return kX86_64CalleeSaveFpSpills |
-      (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0) |
+      (type == Runtime::kSaveEverything ? kX86_64CalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t X86_64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
   return RoundUp((POPCOUNT(X86_64CalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(X86_64CalleeSaveFpSpills(type)) /* fprs */ +
-                  1 /* Method* */) * kX86_64PointerSize, kStackAlignment);
+                  1 /* Method* */) * static_cast<size_t>(kX86_64PointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo X86_64CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h
index 4166e22..a102858 100644
--- a/runtime/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -33,11 +33,12 @@
 
 namespace art {
 
+template<ReadBarrierOption kReadBarrierOption>
 inline mirror::Class* ArtField::GetDeclaringClass() {
   GcRootSource gc_root_source(this);
-  mirror::Class* result = declaring_class_.Read(&gc_root_source);
+  mirror::Class* result = declaring_class_.Read<kReadBarrierOption>(&gc_root_source);
   DCHECK(result != nullptr);
-  DCHECK(result->IsLoaded() || result->IsErroneous());
+  DCHECK(result->IsLoaded() || result->IsErroneous()) << result->GetStatus();
   return result;
 }
 
@@ -122,21 +123,21 @@
 
 #define FIELD_GET(object, type) \
   DCHECK_EQ(Primitive::kPrim ## type, GetTypeAsPrimitiveType()) << PrettyField(this); \
-  DCHECK(object != nullptr) << PrettyField(this); \
-  DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
+  DCHECK((object) != nullptr) << PrettyField(this); \
+  DCHECK(!IsStatic() || ((object) == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
   if (UNLIKELY(IsVolatile())) { \
-    return object->GetField ## type ## Volatile(GetOffset()); \
+    return (object)->GetField ## type ## Volatile(GetOffset()); \
   } \
-  return object->GetField ## type(GetOffset());
+  return (object)->GetField ## type(GetOffset());
 
 #define FIELD_SET(object, type, value) \
   DCHECK_EQ(Primitive::kPrim ## type, GetTypeAsPrimitiveType()) << PrettyField(this); \
-  DCHECK(object != nullptr) << PrettyField(this); \
-  DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
+  DCHECK((object) != nullptr) << PrettyField(this); \
+  DCHECK(!IsStatic() || ((object) == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
   if (UNLIKELY(IsVolatile())) { \
-    object->SetField ## type ## Volatile<kTransactionActive>(GetOffset(), value); \
+    (object)->SetField ## type ## Volatile<kTransactionActive>(GetOffset(), value); \
   } else { \
-    object->SetField ## type<kTransactionActive>(GetOffset(), value); \
+    (object)->SetField ## type<kTransactionActive>(GetOffset(), value); \
   }
 
 inline uint8_t ArtField::GetBoolean(mirror::Object* object) {
@@ -334,6 +335,58 @@
   visitor.VisitRoot(declaring_class_.AddressWithoutBarrier());
 }
 
+template <typename Visitor>
+inline void ArtField::UpdateObjects(const Visitor& visitor) {
+  mirror::Class* old_class = DeclaringClassRoot().Read<kWithoutReadBarrier>();
+  mirror::Class* new_class = visitor(old_class);
+  if (old_class != new_class) {
+    SetDeclaringClass(new_class);
+  }
+}
+
+// If kExactOffset is true then we only find the matching offset, not the field containing the
+// offset.
+template <bool kExactOffset>
+static inline ArtField* FindFieldWithOffset(
+    const IterationRange<StrideIterator<ArtField>>& fields,
+    uint32_t field_offset) SHARED_REQUIRES(Locks::mutator_lock_) {
+  for (ArtField& field : fields) {
+    if (kExactOffset) {
+      if (field.GetOffset().Uint32Value() == field_offset) {
+        return &field;
+      }
+    } else {
+      const uint32_t offset = field.GetOffset().Uint32Value();
+      Primitive::Type type = field.GetTypeAsPrimitiveType();
+      const size_t field_size = Primitive::ComponentSize(type);
+      DCHECK_GT(field_size, 0u);
+      if (offset <= field_offset && field_offset < offset + field_size) {
+        return &field;
+      }
+    }
+  }
+  return nullptr;
+}
+
+template <bool kExactOffset>
+inline ArtField* ArtField::FindInstanceFieldWithOffset(mirror::Class* klass,
+                                                       uint32_t field_offset) {
+  DCHECK(klass != nullptr);
+  ArtField* field = FindFieldWithOffset<kExactOffset>(klass->GetIFields(), field_offset);
+  if (field != nullptr) {
+    return field;
+  }
+  // We did not find field in the class: look into superclass.
+  return (klass->GetSuperClass() != nullptr) ?
+      FindInstanceFieldWithOffset<kExactOffset>(klass->GetSuperClass(), field_offset) : nullptr;
+}
+
+template <bool kExactOffset>
+inline ArtField* ArtField::FindStaticFieldWithOffset(mirror::Class* klass, uint32_t field_offset) {
+  DCHECK(klass != nullptr);
+  return FindFieldWithOffset<kExactOffset>(klass->GetSFields(), field_offset);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ART_FIELD_INL_H_
diff --git a/runtime/art_field.cc b/runtime/art_field.cc
index 3737e0d..ea5078e 100644
--- a/runtime/art_field.cc
+++ b/runtime/art_field.cc
@@ -47,28 +47,6 @@
   offset_ = num_bytes.Uint32Value();
 }
 
-ArtField* ArtField::FindInstanceFieldWithOffset(mirror::Class* klass, uint32_t field_offset) {
-  DCHECK(klass != nullptr);
-  for (ArtField& field : klass->GetIFields()) {
-    if (field.GetOffset().Uint32Value() == field_offset) {
-      return &field;
-    }
-  }
-  // We did not find field in the class: look into superclass.
-  return (klass->GetSuperClass() != nullptr) ?
-      FindInstanceFieldWithOffset(klass->GetSuperClass(), field_offset) : nullptr;
-}
-
-ArtField* ArtField::FindStaticFieldWithOffset(mirror::Class* klass, uint32_t field_offset) {
-  DCHECK(klass != nullptr);
-  for (ArtField& field : klass->GetSFields()) {
-    if (field.GetOffset().Uint32Value() == field_offset) {
-      return &field;
-    }
-  }
-  return nullptr;
-}
-
 mirror::Class* ArtField::ProxyFindSystemClass(const char* descriptor) {
   DCHECK(GetDeclaringClass()->IsProxyClass());
   return Runtime::Current()->GetClassLinker()->FindSystemClass(Thread::Current(), descriptor);
diff --git a/runtime/art_field.h b/runtime/art_field.h
index a943a34..aaccbf3 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -41,6 +41,7 @@
  public:
   ArtField();
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   mirror::Class* GetDeclaringClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
   void SetDeclaringClass(mirror::Class *new_declaring_class)
@@ -159,9 +160,16 @@
   }
 
   // Returns an instance field with this offset in the given class or null if not found.
+  // If kExactOffset is true then we only find the matching offset, not the field containing the
+  // offset.
+  template <bool kExactOffset = true>
   static ArtField* FindInstanceFieldWithOffset(mirror::Class* klass, uint32_t field_offset)
       SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Returns a static field with this offset in the given class or null if not found.
+  // If kExactOffset is true then we only find the matching offset, not the field containing the
+  // offset.
+  template <bool kExactOffset = true>
   static ArtField* FindStaticFieldWithOffset(mirror::Class* klass, uint32_t field_offset)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -190,6 +198,11 @@
     return declaring_class_;
   }
 
+  // Update the declaring class with the passed in visitor. Does not use read barrier.
+  template <typename Visitor>
+  ALWAYS_INLINE void UpdateObjects(const Visitor& visitor)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   mirror::Class* ProxyFindSystemClass(const char* descriptor)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index f741732..1659f33 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -41,17 +41,15 @@
 
 namespace art {
 
+template <ReadBarrierOption kReadBarrierOption>
 inline mirror::Class* ArtMethod::GetDeclaringClassUnchecked() {
   GcRootSource gc_root_source(this);
-  return declaring_class_.Read(&gc_root_source);
+  return declaring_class_.Read<kReadBarrierOption>(&gc_root_source);
 }
 
-inline mirror::Class* ArtMethod::GetDeclaringClassNoBarrier() {
-  return declaring_class_.Read<kWithoutReadBarrier>();
-}
-
+template <ReadBarrierOption kReadBarrierOption>
 inline mirror::Class* ArtMethod::GetDeclaringClass() {
-  mirror::Class* result = GetDeclaringClassUnchecked();
+  mirror::Class* result = GetDeclaringClassUnchecked<kReadBarrierOption>();
   if (kIsDebugBuild) {
     if (!IsRuntimeMethod()) {
       CHECK(result != nullptr) << this;
@@ -79,24 +77,30 @@
 
 // AssertSharedHeld doesn't work in GetAccessFlags, so use a NO_THREAD_SAFETY_ANALYSIS helper.
 // TODO: Figure out why ASSERT_SHARED_CAPABILITY doesn't work.
-ALWAYS_INLINE
-static inline void DoGetAccessFlagsHelper(ArtMethod* method) NO_THREAD_SAFETY_ANALYSIS {
-  CHECK(method->IsRuntimeMethod() || method->GetDeclaringClass()->IsIdxLoaded() ||
-        method->GetDeclaringClass()->IsErroneous());
+template <ReadBarrierOption kReadBarrierOption>
+ALWAYS_INLINE static inline void DoGetAccessFlagsHelper(ArtMethod* method)
+    NO_THREAD_SAFETY_ANALYSIS {
+  CHECK(method->IsRuntimeMethod() ||
+        method->GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
+        method->GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
 }
 
+template <ReadBarrierOption kReadBarrierOption>
 inline uint32_t ArtMethod::GetAccessFlags() {
   if (kIsDebugBuild) {
     Thread* self = Thread::Current();
     if (!Locks::mutator_lock_->IsSharedHeld(self)) {
-      ScopedObjectAccess soa(self);
-      CHECK(IsRuntimeMethod() || GetDeclaringClass()->IsIdxLoaded() ||
-            GetDeclaringClass()->IsErroneous());
+      if (self->IsThreadSuspensionAllowable()) {
+        ScopedObjectAccess soa(self);
+        CHECK(IsRuntimeMethod() ||
+              GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
+              GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
+      }
     } else {
       // We cannot use SOA in this case. We might be holding the lock, but may not be in the
       // runnable state (e.g., during GC).
       Locks::mutator_lock_->AssertSharedHeld(self);
-      DoGetAccessFlagsHelper(this);
+      DoGetAccessFlagsHelper<kReadBarrierOption>(this);
     }
   }
   return access_flags_;
@@ -118,20 +122,25 @@
   return dex_method_index_;
 }
 
-inline ArtMethod** ArtMethod::GetDexCacheResolvedMethods(size_t pointer_size) {
+inline uint32_t ArtMethod::GetImtIndex() {
+  return GetDexMethodIndex() % ImTable::kSize;
+}
+
+inline ArtMethod** ArtMethod::GetDexCacheResolvedMethods(PointerSize pointer_size) {
   return GetNativePointer<ArtMethod**>(DexCacheResolvedMethodsOffset(pointer_size),
                                        pointer_size);
 }
 
-inline ArtMethod* ArtMethod::GetDexCacheResolvedMethod(uint16_t method_index, size_t ptr_size) {
+inline ArtMethod* ArtMethod::GetDexCacheResolvedMethod(uint16_t method_index,
+                                                       PointerSize pointer_size) {
   // NOTE: Unchecked, i.e. not throwing AIOOB. We don't even know the length here
   // without accessing the DexCache and we don't want to do that in release build.
   DCHECK_LT(method_index,
-            GetInterfaceMethodIfProxy(ptr_size)->GetDeclaringClass()
+            GetInterfaceMethodIfProxy(pointer_size)->GetDeclaringClass()
                 ->GetDexCache()->NumResolvedMethods());
-  ArtMethod* method = mirror::DexCache::GetElementPtrSize(GetDexCacheResolvedMethods(ptr_size),
+  ArtMethod* method = mirror::DexCache::GetElementPtrSize(GetDexCacheResolvedMethods(pointer_size),
                                                           method_index,
-                                                          ptr_size);
+                                                          pointer_size);
   if (LIKELY(method != nullptr)) {
     auto* declaring_class = method->GetDeclaringClass();
     if (LIKELY(declaring_class == nullptr || !declaring_class->IsErroneous())) {
@@ -141,70 +150,72 @@
   return nullptr;
 }
 
-inline void ArtMethod::SetDexCacheResolvedMethod(uint16_t method_index, ArtMethod* new_method,
-                                                 size_t ptr_size) {
+inline void ArtMethod::SetDexCacheResolvedMethod(uint16_t method_index,
+                                                 ArtMethod* new_method,
+                                                 PointerSize pointer_size) {
   // NOTE: Unchecked, i.e. not throwing AIOOB. We don't even know the length here
   // without accessing the DexCache and we don't want to do that in release build.
   DCHECK_LT(method_index,
-            GetInterfaceMethodIfProxy(ptr_size)->GetDeclaringClass()
+            GetInterfaceMethodIfProxy(pointer_size)->GetDeclaringClass()
                 ->GetDexCache()->NumResolvedMethods());
   DCHECK(new_method == nullptr || new_method->GetDeclaringClass() != nullptr);
-  mirror::DexCache::SetElementPtrSize(GetDexCacheResolvedMethods(ptr_size),
+  mirror::DexCache::SetElementPtrSize(GetDexCacheResolvedMethods(pointer_size),
                                       method_index,
                                       new_method,
-                                      ptr_size);
+                                      pointer_size);
 }
 
-inline bool ArtMethod::HasDexCacheResolvedMethods(size_t pointer_size) {
+inline bool ArtMethod::HasDexCacheResolvedMethods(PointerSize pointer_size) {
   return GetDexCacheResolvedMethods(pointer_size) != nullptr;
 }
 
 inline bool ArtMethod::HasSameDexCacheResolvedMethods(ArtMethod** other_cache,
-                                                      size_t pointer_size) {
+                                                      PointerSize pointer_size) {
   return GetDexCacheResolvedMethods(pointer_size) == other_cache;
 }
 
-inline bool ArtMethod::HasSameDexCacheResolvedMethods(ArtMethod* other, size_t pointer_size) {
+inline bool ArtMethod::HasSameDexCacheResolvedMethods(ArtMethod* other, PointerSize pointer_size) {
   return GetDexCacheResolvedMethods(pointer_size) ==
       other->GetDexCacheResolvedMethods(pointer_size);
 }
 
-inline GcRoot<mirror::Class>* ArtMethod::GetDexCacheResolvedTypes(size_t pointer_size) {
+inline GcRoot<mirror::Class>* ArtMethod::GetDexCacheResolvedTypes(PointerSize pointer_size) {
   return GetNativePointer<GcRoot<mirror::Class>*>(DexCacheResolvedTypesOffset(pointer_size),
                                                   pointer_size);
 }
 
 template <bool kWithCheck>
-inline mirror::Class* ArtMethod::GetDexCacheResolvedType(uint32_t type_index, size_t ptr_size) {
+inline mirror::Class* ArtMethod::GetDexCacheResolvedType(uint32_t type_index,
+                                                         PointerSize pointer_size) {
   if (kWithCheck) {
     mirror::DexCache* dex_cache =
-        GetInterfaceMethodIfProxy(ptr_size)->GetDeclaringClass()->GetDexCache();
+        GetInterfaceMethodIfProxy(pointer_size)->GetDeclaringClass()->GetDexCache();
     if (UNLIKELY(type_index >= dex_cache->NumResolvedTypes())) {
       ThrowArrayIndexOutOfBoundsException(type_index, dex_cache->NumResolvedTypes());
       return nullptr;
     }
   }
-  mirror::Class* klass = GetDexCacheResolvedTypes(ptr_size)[type_index].Read();
+  mirror::Class* klass = GetDexCacheResolvedTypes(pointer_size)[type_index].Read();
   return (klass != nullptr && !klass->IsErroneous()) ? klass : nullptr;
 }
 
-inline bool ArtMethod::HasDexCacheResolvedTypes(size_t pointer_size) {
+inline bool ArtMethod::HasDexCacheResolvedTypes(PointerSize pointer_size) {
   return GetDexCacheResolvedTypes(pointer_size) != nullptr;
 }
 
 inline bool ArtMethod::HasSameDexCacheResolvedTypes(GcRoot<mirror::Class>* other_cache,
-                                                    size_t pointer_size) {
+                                                    PointerSize pointer_size) {
   return GetDexCacheResolvedTypes(pointer_size) == other_cache;
 }
 
-inline bool ArtMethod::HasSameDexCacheResolvedTypes(ArtMethod* other, size_t pointer_size) {
+inline bool ArtMethod::HasSameDexCacheResolvedTypes(ArtMethod* other, PointerSize pointer_size) {
   return GetDexCacheResolvedTypes(pointer_size) == other->GetDexCacheResolvedTypes(pointer_size);
 }
 
 inline mirror::Class* ArtMethod::GetClassFromTypeIndex(uint16_t type_idx,
                                                        bool resolve,
-                                                       size_t ptr_size) {
-  mirror::Class* type = GetDexCacheResolvedType(type_idx, ptr_size);
+                                                       PointerSize pointer_size) {
+  mirror::Class* type = GetDexCacheResolvedType(type_idx, pointer_size);
   if (type == nullptr && resolve) {
     type = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, this);
     CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
@@ -225,8 +236,7 @@
     }
     case kSuper:
       // Constructors and static methods are called with invoke-direct.
-      // Interface methods cannot be invoked with invoke-super.
-      return IsConstructor() || IsStatic() || GetDeclaringClass()->IsInterface();
+      return IsConstructor() || IsStatic();
     case kInterface: {
       mirror::Class* methods_class = GetDeclaringClass();
       return IsDirect() || !(methods_class->IsInterface() || methods_class->IsObjectClass());
@@ -263,13 +273,6 @@
   return result;
 }
 
-inline bool ArtMethod::IsImtConflictMethod() {
-  bool result = this == Runtime::Current()->GetImtConflictMethod();
-  // Check that if we do think it is phony it looks like the imt conflict method.
-  DCHECK(!result || IsRuntimeMethod());
-  return result;
-}
-
 inline bool ArtMethod::IsImtUnimplementedMethod() {
   bool result = this == Runtime::Current()->GetImtUnimplementedMethod();
   // Check that if we do think it is phony it looks like the imt unimplemented method.
@@ -319,11 +322,11 @@
     return "<runtime internal resolution method>";
   } else if (this == runtime->GetImtConflictMethod()) {
     return "<runtime internal imt conflict method>";
-  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kSaveAll)) {
+  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves)) {
     return "<runtime internal callee-save all registers method>";
-  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kRefsOnly)) {
+  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kSaveRefsOnly)) {
     return "<runtime internal callee-save reference registers method>";
-  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs)) {
+  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs)) {
     return "<runtime internal callee-save reference and argument registers method>";
   } else {
     return "<unknown runtime internal method>";
@@ -334,9 +337,9 @@
   return GetDeclaringClass()->GetDexFile().GetCodeItem(GetCodeItemOffset());
 }
 
-inline bool ArtMethod::IsResolvedTypeIdx(uint16_t type_idx, size_t ptr_size) {
+inline bool ArtMethod::IsResolvedTypeIdx(uint16_t type_idx, PointerSize pointer_size) {
   DCHECK(!IsProxyMethod());
-  return GetDexCacheResolvedType(type_idx, ptr_size) != nullptr;
+  return GetDexCacheResolvedType(type_idx, pointer_size) != nullptr;
 }
 
 inline int32_t ArtMethod::GetLineNumFromDexPC(uint32_t dex_pc) {
@@ -401,11 +404,12 @@
   return GetDeclaringClass()->GetDexCache();
 }
 
+template<ReadBarrierOption kReadBarrierOption>
 inline bool ArtMethod::IsProxyMethod() {
-  return GetDeclaringClass()->IsProxyClass();
+  return GetDeclaringClass<kReadBarrierOption>()->IsProxyClass();
 }
 
-inline ArtMethod* ArtMethod::GetInterfaceMethodIfProxy(size_t pointer_size) {
+inline ArtMethod* ArtMethod::GetInterfaceMethodIfProxy(PointerSize pointer_size) {
   if (LIKELY(!IsProxyMethod())) {
     return this;
   }
@@ -421,22 +425,24 @@
 }
 
 inline void ArtMethod::SetDexCacheResolvedMethods(ArtMethod** new_dex_cache_methods,
-                                                  size_t ptr_size) {
-  SetNativePointer(DexCacheResolvedMethodsOffset(ptr_size), new_dex_cache_methods, ptr_size);
+                                                  PointerSize pointer_size) {
+  SetNativePointer(DexCacheResolvedMethodsOffset(pointer_size),
+                   new_dex_cache_methods,
+                   pointer_size);
 }
 
 inline void ArtMethod::SetDexCacheResolvedTypes(GcRoot<mirror::Class>* new_dex_cache_types,
-                                                size_t ptr_size) {
-  SetNativePointer(DexCacheResolvedTypesOffset(ptr_size), new_dex_cache_types, ptr_size);
+                                                PointerSize pointer_size) {
+  SetNativePointer(DexCacheResolvedTypesOffset(pointer_size), new_dex_cache_types, pointer_size);
 }
 
-inline mirror::Class* ArtMethod::GetReturnType(bool resolve, size_t ptr_size) {
+inline mirror::Class* ArtMethod::GetReturnType(bool resolve, PointerSize pointer_size) {
   DCHECK(!IsProxyMethod());
   const DexFile* dex_file = GetDexFile();
   const DexFile::MethodId& method_id = dex_file->GetMethodId(GetDexMethodIndex());
   const DexFile::ProtoId& proto_id = dex_file->GetMethodPrototype(method_id);
   uint16_t return_type_idx = proto_id.return_type_idx_;
-  mirror::Class* type = GetDexCacheResolvedType(return_type_idx, ptr_size);
+  mirror::Class* type = GetDexCacheResolvedType(return_type_idx, pointer_size);
   if (type == nullptr && resolve) {
     type = Runtime::Current()->GetClassLinker()->ResolveType(return_type_idx, this);
     CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
@@ -444,34 +450,77 @@
   return type;
 }
 
-template<typename RootVisitorType>
-void ArtMethod::VisitRoots(RootVisitorType& visitor, size_t pointer_size) {
-  ArtMethod* interface_method = nullptr;
-  mirror::Class* klass = declaring_class_.Read();
-  if (UNLIKELY(klass != nullptr && klass->IsProxyClass())) {
-    // For normal methods, dex cache shortcuts will be visited through the declaring class.
-    // However, for proxies we need to keep the interface method alive, so we visit its roots.
-    interface_method = mirror::DexCache::GetElementPtrSize(
-        GetDexCacheResolvedMethods(pointer_size),
-        GetDexMethodIndex(),
-        pointer_size);
-    DCHECK(interface_method != nullptr);
-    DCHECK_EQ(interface_method,
-              Runtime::Current()->GetClassLinker()->FindMethodForProxy(klass, this));
-    interface_method->VisitRoots(visitor, pointer_size);
-  }
-
-  visitor.VisitRootIfNonNull(declaring_class_.AddressWithoutBarrier());
-  ProfilingInfo* profiling_info = GetProfilingInfo(pointer_size);
-  if (hotness_count_ != 0 && !IsNative() && profiling_info != nullptr) {
-    profiling_info->VisitRoots(visitor);
+template<ReadBarrierOption kReadBarrierOption, typename RootVisitorType>
+void ArtMethod::VisitRoots(RootVisitorType& visitor, PointerSize pointer_size) {
+  if (LIKELY(!declaring_class_.IsNull())) {
+    visitor.VisitRoot(declaring_class_.AddressWithoutBarrier());
+    mirror::Class* klass = declaring_class_.Read<kReadBarrierOption>();
+    if (UNLIKELY(klass->IsProxyClass())) {
+      // For normal methods, dex cache shortcuts will be visited through the declaring class.
+      // However, for proxies we need to keep the interface method alive, so we visit its roots.
+      ArtMethod* interface_method = mirror::DexCache::GetElementPtrSize(
+          GetDexCacheResolvedMethods(pointer_size),
+          GetDexMethodIndex(),
+          pointer_size);
+      DCHECK(interface_method != nullptr);
+      DCHECK_EQ(interface_method,
+                Runtime::Current()->GetClassLinker()->FindMethodForProxy<kReadBarrierOption>(
+                    klass, this));
+      interface_method->VisitRoots(visitor, pointer_size);
+    }
+    // We know we don't have profiling information if the class hasn't been verified. Note
+    // that this check also ensures the IsNative call can be made, as IsNative expects a fully
+    // created class (and not a retired one).
+    if (klass->IsVerified()) {
+      // Runtime methods and native methods use the same field as the profiling info for
+      // storing their own data (jni entrypoint for native methods, and ImtConflictTable for
+      // some runtime methods).
+      if (!IsNative<kReadBarrierOption>() && !IsRuntimeMethod()) {
+        ProfilingInfo* profiling_info = GetProfilingInfo(pointer_size);
+        if (profiling_info != nullptr) {
+          profiling_info->VisitRoots(visitor);
+        }
+      }
+    }
   }
 }
 
-inline void ArtMethod::CopyFrom(const ArtMethod* src, size_t image_pointer_size) {
-  memcpy(reinterpret_cast<void*>(this), reinterpret_cast<const void*>(src),
-         Size(image_pointer_size));
-  declaring_class_ = GcRoot<mirror::Class>(const_cast<ArtMethod*>(src)->GetDeclaringClass());
+template <typename Visitor>
+inline void ArtMethod::UpdateObjectsForImageRelocation(const Visitor& visitor,
+                                                       PointerSize pointer_size) {
+  mirror::Class* old_class = GetDeclaringClassUnchecked<kWithoutReadBarrier>();
+  mirror::Class* new_class = visitor(old_class);
+  if (old_class != new_class) {
+    SetDeclaringClass(new_class);
+  }
+  ArtMethod** old_methods = GetDexCacheResolvedMethods(pointer_size);
+  ArtMethod** new_methods = visitor(old_methods);
+  if (old_methods != new_methods) {
+    SetDexCacheResolvedMethods(new_methods, pointer_size);
+  }
+  GcRoot<mirror::Class>* old_types = GetDexCacheResolvedTypes(pointer_size);
+  GcRoot<mirror::Class>* new_types = visitor(old_types);
+  if (old_types != new_types) {
+    SetDexCacheResolvedTypes(new_types, pointer_size);
+  }
+}
+
+template <ReadBarrierOption kReadBarrierOption, typename Visitor>
+inline void ArtMethod::UpdateEntrypoints(const Visitor& visitor, PointerSize pointer_size) {
+  if (IsNative<kReadBarrierOption>()) {
+    const void* old_native_code = GetEntryPointFromJniPtrSize(pointer_size);
+    const void* new_native_code = visitor(old_native_code);
+    if (old_native_code != new_native_code) {
+      SetEntryPointFromJniPtrSize(new_native_code, pointer_size);
+    }
+  } else {
+    DCHECK(GetDataPtrSize(pointer_size) == nullptr);
+  }
+  const void* old_code = GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
+  const void* new_code = visitor(old_code);
+  if (old_code != new_code) {
+    SetEntryPointFromQuickCompiledCodePtrSize(new_code, pointer_size);
+  }
 }
 
 }  // namespace art
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index f5befdf..f9bc249 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -16,6 +16,8 @@
 
 #include "art_method.h"
 
+#include <cstddef>
+
 #include "arch/context.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
@@ -24,7 +26,6 @@
 #include "debugger.h"
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
-#include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/card_table-inl.h"
 #include "interpreter/interpreter.h"
@@ -32,7 +33,6 @@
 #include "jit/jit_code_cache.h"
 #include "jit/profiling_info.h"
 #include "jni_internal.h"
-#include "mapping_table.h"
 #include "mirror/abstract_method.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
@@ -67,12 +67,24 @@
                                                              dex_cache);
 }
 
+void ArtMethod::ThrowInvocationTimeError() {
+  DCHECK(!IsInvokable());
+  // NOTE: IsDefaultConflicting must be first since the actual method might or might not be abstract
+  //       due to the way we select it.
+  if (IsDefaultConflicting()) {
+    ThrowIncompatibleClassChangeErrorForMethodConflict(this);
+  } else {
+    DCHECK(IsAbstract());
+    ThrowAbstractMethodError(this);
+  }
+}
+
 InvokeType ArtMethod::GetInvokeType() {
   // TODO: kSuper?
-  if (GetDeclaringClass()->IsInterface()) {
-    return kInterface;
-  } else if (IsStatic()) {
+  if (IsStatic()) {
     return kStatic;
+  } else if (GetDeclaringClass()->IsInterface()) {
+    return kInterface;
   } else if (IsDirect()) {
     return kDirect;
   } else {
@@ -94,24 +106,23 @@
   return num_registers;
 }
 
-static bool HasSameNameAndSignature(ArtMethod* method1, ArtMethod* method2)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+bool ArtMethod::HasSameNameAndSignature(ArtMethod* other) {
   ScopedAssertNoThreadSuspension ants(Thread::Current(), "HasSameNameAndSignature");
-  const DexFile* dex_file = method1->GetDexFile();
-  const DexFile::MethodId& mid = dex_file->GetMethodId(method1->GetDexMethodIndex());
-  if (method1->GetDexCache() == method2->GetDexCache()) {
-    const DexFile::MethodId& mid2 = dex_file->GetMethodId(method2->GetDexMethodIndex());
+  const DexFile* dex_file = GetDexFile();
+  const DexFile::MethodId& mid = dex_file->GetMethodId(GetDexMethodIndex());
+  if (GetDexCache() == other->GetDexCache()) {
+    const DexFile::MethodId& mid2 = dex_file->GetMethodId(other->GetDexMethodIndex());
     return mid.name_idx_ == mid2.name_idx_ && mid.proto_idx_ == mid2.proto_idx_;
   }
-  const DexFile* dex_file2 = method2->GetDexFile();
-  const DexFile::MethodId& mid2 = dex_file2->GetMethodId(method2->GetDexMethodIndex());
+  const DexFile* dex_file2 = other->GetDexFile();
+  const DexFile::MethodId& mid2 = dex_file2->GetMethodId(other->GetDexMethodIndex());
   if (!DexFileStringEquals(dex_file, mid.name_idx_, dex_file2, mid2.name_idx_)) {
     return false;  // Name mismatch.
   }
   return dex_file->GetMethodSignature(mid) == dex_file2->GetMethodSignature(mid2);
 }
 
-ArtMethod* ArtMethod::FindOverriddenMethod(size_t pointer_size) {
+ArtMethod* ArtMethod::FindOverriddenMethod(PointerSize pointer_size) {
   if (IsStatic()) {
     return nullptr;
   }
@@ -135,19 +146,18 @@
       mirror::IfTable* iftable = GetDeclaringClass()->GetIfTable();
       for (size_t i = 0; i < iftable->Count() && result == nullptr; i++) {
         mirror::Class* interface = iftable->GetInterface(i);
-        for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
-          ArtMethod* interface_method = interface->GetVirtualMethod(j, pointer_size);
-          if (HasSameNameAndSignature(
-              this, interface_method->GetInterfaceMethodIfProxy(sizeof(void*)))) {
-            result = interface_method;
+        for (ArtMethod& interface_method : interface->GetVirtualMethods(pointer_size)) {
+          if (HasSameNameAndSignature(interface_method.GetInterfaceMethodIfProxy(pointer_size))) {
+            result = &interface_method;
             break;
           }
         }
       }
     }
   }
-  DCHECK(result == nullptr || HasSameNameAndSignature(
-      GetInterfaceMethodIfProxy(sizeof(void*)), result->GetInterfaceMethodIfProxy(sizeof(void*))));
+  DCHECK(result == nullptr ||
+         GetInterfaceMethodIfProxy(pointer_size)->HasSameNameAndSignature(
+             result->GetInterfaceMethodIfProxy(pointer_size)));
   return result;
 }
 
@@ -163,18 +173,13 @@
     return dex_method_idx;
   }
   const char* mid_declaring_class_descriptor = dexfile->StringByTypeIdx(mid.class_idx_);
-  const DexFile::StringId* other_descriptor =
-      other_dexfile.FindStringId(mid_declaring_class_descriptor);
-  if (other_descriptor != nullptr) {
-    const DexFile::TypeId* other_type_id =
-        other_dexfile.FindTypeId(other_dexfile.GetIndexForStringId(*other_descriptor));
-    if (other_type_id != nullptr) {
-      const DexFile::MethodId* other_mid = other_dexfile.FindMethodId(
-          *other_type_id, other_dexfile.GetStringId(name_and_sig_mid.name_idx_),
-          other_dexfile.GetProtoId(name_and_sig_mid.proto_idx_));
-      if (other_mid != nullptr) {
-        return other_dexfile.GetIndexForMethodId(*other_mid);
-      }
+  const DexFile::TypeId* other_type_id = other_dexfile.FindTypeId(mid_declaring_class_descriptor);
+  if (other_type_id != nullptr) {
+    const DexFile::MethodId* other_mid = other_dexfile.FindMethodId(
+        *other_type_id, other_dexfile.GetStringId(name_and_sig_mid.name_idx_),
+        other_dexfile.GetProtoId(name_and_sig_mid.proto_idx_));
+    if (other_mid != nullptr) {
+      return other_dexfile.GetIndexForMethodId(*other_mid);
     }
   }
   return DexFile::kDexNoIndex;
@@ -191,7 +196,7 @@
   // Default to handler not found.
   uint32_t found_dex_pc = DexFile::kDexNoIndex;
   // Iterate over the catch handlers associated with dex_pc.
-  size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   for (CatchHandlerIterator it(*code_item, dex_pc); it.HasNext(); it.Next()) {
     uint16_t iter_type_idx = it.GetHandlerTypeIndex();
     // Catch all case
@@ -240,7 +245,7 @@
   if (kIsDebugBuild) {
     self->AssertThreadSuspensionIsAllowable();
     CHECK_EQ(kRunnable, self->GetState());
-    CHECK_STREQ(GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty(), shorty);
+    CHECK_STREQ(GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty(), shorty);
   }
 
   // Push a transition back into managed code onto the linked list in thread.
@@ -250,17 +255,20 @@
   Runtime* runtime = Runtime::Current();
   // Call the invoke stub, passing everything as arguments.
   // If the runtime is not yet started or it is required by the debugger, then perform the
-  // Invocation by the interpreter.
+  // Invocation by the interpreter, explicitly forcing interpretation over JIT to prevent
+  // cycling around the various JIT/Interpreter methods that handle method invocation.
   if (UNLIKELY(!runtime->IsStarted() || Dbg::IsForcedInterpreterNeededForCalling(self, this))) {
     if (IsStatic()) {
-      art::interpreter::EnterInterpreterFromInvoke(self, this, nullptr, args, result);
+      art::interpreter::EnterInterpreterFromInvoke(
+          self, this, nullptr, args, result, /*stay_in_interpreter*/ true);
     } else {
       mirror::Object* receiver =
           reinterpret_cast<StackReference<mirror::Object>*>(&args[0])->AsMirrorPtr();
-      art::interpreter::EnterInterpreterFromInvoke(self, this, receiver, args + 1, result);
+      art::interpreter::EnterInterpreterFromInvoke(
+          self, this, receiver, args + 1, result, /*stay_in_interpreter*/ true);
     }
   } else {
-    DCHECK_EQ(runtime->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
+    DCHECK_EQ(runtime->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
 
     constexpr bool kLogInvocationStartAndReturn = false;
     bool have_quick_code = GetEntryPointFromQuickCompiledCode() != nullptr;
@@ -273,7 +281,7 @@
 
       // Ensure that we won't be accidentally calling quick compiled code when -Xint.
       if (kIsDebugBuild && runtime->GetInstrumentation()->IsForcedInterpretOnly()) {
-        CHECK(!runtime->UseJit());
+        CHECK(!runtime->UseJitCompilation());
         const void* oat_quick_code = runtime->GetClassLinker()->GetOatMethodQuickCodeFor(this);
         CHECK(oat_quick_code == nullptr || oat_quick_code != GetEntryPointFromQuickCompiledCode())
             << "Don't call compiled code when -Xint " << PrettyMethod(this);
@@ -288,20 +296,7 @@
         // Unusual case where we were running generated code and an
         // exception was thrown to force the activations to be removed from the
         // stack. Continue execution in the interpreter.
-        self->ClearException();
-        ShadowFrame* shadow_frame =
-            self->PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
-        mirror::Throwable* pending_exception = nullptr;
-        self->PopDeoptimizationContext(result, &pending_exception);
-        self->SetTopOfStack(nullptr);
-        self->SetTopOfShadowStack(shadow_frame);
-
-        // Restore the exception that was pending before deoptimization then interpret the
-        // deoptimized frames.
-        if (pending_exception != nullptr) {
-          self->SetException(pending_exception);
-        }
-        interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, result);
+        self->DeoptimizeWithDeoptimizationException(result);
       }
       if (kLogInvocationStartAndReturn) {
         LOG(INFO) << StringPrintf("Returned '%s' quick code=%p", PrettyMethod(this).c_str(),
@@ -335,6 +330,27 @@
   RegisterNative(GetJniDlsymLookupStub(), false);
 }
 
+bool ArtMethod::IsOverridableByDefaultMethod() {
+  return GetDeclaringClass()->IsInterface();
+}
+
+bool ArtMethod::IsAnnotatedWithFastNative() {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  StackHandleScope<1> shs(self);
+
+  const DexFile& dex_file = GetDeclaringClass()->GetDexFile();
+
+  mirror::Class* fast_native_annotation =
+      soa.Decode<mirror::Class*>(WellKnownClasses::dalvik_annotation_optimization_FastNative);
+  Handle<mirror::Class> fast_native_handle(shs.NewHandle(fast_native_annotation));
+
+  // Note: Resolves any method annotations' classes as a side-effect.
+  // -- This seems allowed by the spec since it says we can preload any classes
+  //    referenced by another classes's constant pool table.
+  return dex_file.IsMethodAnnotationPresent(this, fast_native_handle, DexFile::kDexVisibilityBuild);
+}
+
 bool ArtMethod::EqualParameters(Handle<mirror::ObjectArray<mirror::Class>> params) {
   auto* dex_cache = GetDexCache();
   auto* dex_file = dex_cache->GetDexFile();
@@ -361,19 +377,6 @@
   return true;
 }
 
-ProfilingInfo* ArtMethod::CreateProfilingInfo() {
-  DCHECK(!Runtime::Current()->IsAotCompiler());
-  ProfilingInfo* info = ProfilingInfo::Create(this);
-  MemberOffset offset = ArtMethod::EntryPointFromJniOffset(sizeof(void*));
-  uintptr_t pointer = reinterpret_cast<uintptr_t>(this) + offset.Uint32Value();
-  if (!reinterpret_cast<Atomic<ProfilingInfo*>*>(pointer)->
-          CompareExchangeStrongSequentiallyConsistent(nullptr, info)) {
-    return GetProfilingInfo(sizeof(void*));
-  } else {
-    return info;
-  }
-}
-
 const uint8_t* ArtMethod::GetQuickenedInfo() {
   bool found = false;
   OatFile::OatMethod oat_method =
@@ -385,27 +388,154 @@
 }
 
 const OatQuickMethodHeader* ArtMethod::GetOatQuickMethodHeader(uintptr_t pc) {
-  if (IsRuntimeMethod() || IsProxyMethod()) {
+  // Our callers should make sure they don't pass the instrumentation exit pc,
+  // as this method does not look at the side instrumentation stack.
+  DCHECK_NE(pc, reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()));
+
+  if (IsRuntimeMethod()) {
     return nullptr;
   }
 
   Runtime* runtime = Runtime::Current();
-  const void* code = runtime->GetInstrumentation()->GetQuickCodeFor(this, sizeof(void*));
-  DCHECK(code != nullptr);
+  const void* existing_entry_point = GetEntryPointFromQuickCompiledCode();
+  CHECK(existing_entry_point != nullptr) << PrettyMethod(this) << "@" << this;
+  ClassLinker* class_linker = runtime->GetClassLinker();
 
-  if (runtime->GetClassLinker()->IsQuickGenericJniStub(code)) {
+  if (class_linker->IsQuickGenericJniStub(existing_entry_point)) {
     // The generic JNI does not have any method header.
     return nullptr;
   }
 
-  code = EntryPointToCodePointer(code);
-  OatQuickMethodHeader* method_header = reinterpret_cast<OatQuickMethodHeader*>(
-      reinterpret_cast<uintptr_t>(code) - sizeof(OatQuickMethodHeader));
+  if (existing_entry_point == GetQuickProxyInvokeHandler()) {
+    DCHECK(IsProxyMethod() && !IsConstructor());
+    // The proxy entry point does not have any method header.
+    return nullptr;
+  }
 
-  // TODO(ngeoffray): validate the pc. Note that unit tests can give unrelated pcs (for
-  // example arch_test).
-  UNUSED(pc);
+  // Check whether the current entry point contains this pc.
+  if (!class_linker->IsQuickResolutionStub(existing_entry_point) &&
+      !class_linker->IsQuickToInterpreterBridge(existing_entry_point)) {
+    OatQuickMethodHeader* method_header =
+        OatQuickMethodHeader::FromEntryPoint(existing_entry_point);
+
+    if (method_header->Contains(pc)) {
+      return method_header;
+    }
+  }
+
+  // Check whether the pc is in the JIT code cache.
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr) {
+    jit::JitCodeCache* code_cache = jit->GetCodeCache();
+    OatQuickMethodHeader* method_header = code_cache->LookupMethodHeader(pc, this);
+    if (method_header != nullptr) {
+      DCHECK(method_header->Contains(pc));
+      return method_header;
+    } else {
+      DCHECK(!code_cache->ContainsPc(reinterpret_cast<const void*>(pc)))
+          << PrettyMethod(this)
+          << ", pc=" << std::hex << pc
+          << ", entry_point=" << std::hex << reinterpret_cast<uintptr_t>(existing_entry_point)
+          << ", copy=" << std::boolalpha << IsCopied()
+          << ", proxy=" << std::boolalpha << IsProxyMethod();
+    }
+  }
+
+  // The code has to be in an oat file.
+  bool found;
+  OatFile::OatMethod oat_method = class_linker->FindOatMethodFor(this, &found);
+  if (!found) {
+    if (class_linker->IsQuickResolutionStub(existing_entry_point)) {
+      // We are running the generic jni stub, but the entry point of the method has not
+      // been updated yet.
+      DCHECK_EQ(pc, 0u) << "Should be a downcall";
+      DCHECK(IsNative());
+      return nullptr;
+    }
+    if (existing_entry_point == GetQuickInstrumentationEntryPoint()) {
+      // We are running the generic jni stub, but the method is being instrumented.
+      DCHECK_EQ(pc, 0u) << "Should be a downcall";
+      DCHECK(IsNative());
+      return nullptr;
+    }
+    // Only for unit tests.
+    // TODO(ngeoffray): Update these tests to pass the right pc?
+    return OatQuickMethodHeader::FromEntryPoint(existing_entry_point);
+  }
+  const void* oat_entry_point = oat_method.GetQuickCode();
+  if (oat_entry_point == nullptr || class_linker->IsQuickGenericJniStub(oat_entry_point)) {
+    DCHECK(IsNative()) << PrettyMethod(this);
+    return nullptr;
+  }
+
+  OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromEntryPoint(oat_entry_point);
+  if (pc == 0) {
+    // This is a downcall, it can only happen for a native method.
+    DCHECK(IsNative());
+    return method_header;
+  }
+
+  DCHECK(method_header->Contains(pc))
+      << PrettyMethod(this)
+      << " " << std::hex << pc << " " << oat_entry_point
+      << " " << (uintptr_t)(method_header->code_ + method_header->code_size_);
   return method_header;
 }
 
+bool ArtMethod::HasAnyCompiledCode() {
+  // Check whether the JIT has compiled it.
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr && jit->GetCodeCache()->ContainsMethod(this)) {
+    return true;
+  }
+
+  // Check whether we have AOT code.
+  return Runtime::Current()->GetClassLinker()->GetOatMethodQuickCodeFor(this) != nullptr;
+}
+
+void ArtMethod::CopyFrom(ArtMethod* src, PointerSize image_pointer_size) {
+  memcpy(reinterpret_cast<void*>(this), reinterpret_cast<const void*>(src),
+         Size(image_pointer_size));
+  declaring_class_ = GcRoot<mirror::Class>(const_cast<ArtMethod*>(src)->GetDeclaringClass());
+
+  // If the entry point of the method we are copying from is from JIT code, we just
+  // put the entry point of the new method to interpreter. We could set the entry point
+  // to the JIT code, but this would require taking the JIT code cache lock to notify
+  // it, which we do not want at this level.
+  Runtime* runtime = Runtime::Current();
+  if (runtime->UseJitCompilation()) {
+    if (runtime->GetJit()->GetCodeCache()->ContainsPc(GetEntryPointFromQuickCompiledCode())) {
+      SetEntryPointFromQuickCompiledCodePtrSize(GetQuickToInterpreterBridge(), image_pointer_size);
+    }
+  }
+  // Clear the profiling info for the same reasons as the JIT code.
+  if (!src->IsNative()) {
+    SetProfilingInfoPtrSize(nullptr, image_pointer_size);
+  }
+  // Clear hotness to let the JIT properly decide when to compile this method.
+  hotness_count_ = 0;
+}
+
+bool ArtMethod::IsImagePointerSize(PointerSize pointer_size) {
+  // Hijack this function to get access to PtrSizedFieldsOffset.
+  //
+  // Ensure that PrtSizedFieldsOffset is correct. We rely here on usually having both 32-bit and
+  // 64-bit builds.
+  static_assert(std::is_standard_layout<ArtMethod>::value, "ArtMethod is not standard layout.");
+  static_assert(
+      (sizeof(void*) != 4) ||
+          (offsetof(ArtMethod, ptr_sized_fields_) == PtrSizedFieldsOffset(PointerSize::k32)),
+      "Unexpected 32-bit class layout.");
+  static_assert(
+      (sizeof(void*) != 8) ||
+          (offsetof(ArtMethod, ptr_sized_fields_) == PtrSizedFieldsOffset(PointerSize::k64)),
+      "Unexpected 64-bit class layout.");
+
+  Runtime* runtime = Runtime::Current();
+  if (runtime == nullptr) {
+    return true;
+  }
+  return runtime->GetClassLinker()->GetImagePointerSize() == pointer_size;
+}
+
 }  // namespace art
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 9f1495c..a90ef23 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -17,8 +17,11 @@
 #ifndef ART_RUNTIME_ART_METHOD_H_
 #define ART_RUNTIME_ART_METHOD_H_
 
+#include <cstddef>
+
 #include "base/bit_utils.h"
 #include "base/casts.h"
+#include "base/enums.h"
 #include "dex_file.h"
 #include "gc_root.h"
 #include "invoke_type.h"
@@ -41,27 +44,196 @@
 namespace mirror {
 class Array;
 class Class;
+class IfTable;
 class PointerArray;
 }  // namespace mirror
 
+// Table to resolve IMT conflicts at runtime. The table is attached to
+// the jni entrypoint of IMT conflict ArtMethods.
+// The table contains a list of pairs of { interface_method, implementation_method }
+// with the last entry being null to make an assembly implementation of a lookup
+// faster.
+class ImtConflictTable {
+  enum MethodIndex {
+    kMethodInterface,
+    kMethodImplementation,
+    kMethodCount,  // Number of elements in enum.
+  };
+
+ public:
+  // Build a new table copying `other` and adding the new entry formed of
+  // the pair { `interface_method`, `implementation_method` }
+  ImtConflictTable(ImtConflictTable* other,
+                   ArtMethod* interface_method,
+                   ArtMethod* implementation_method,
+                   PointerSize pointer_size) {
+    const size_t count = other->NumEntries(pointer_size);
+    for (size_t i = 0; i < count; ++i) {
+      SetInterfaceMethod(i, pointer_size, other->GetInterfaceMethod(i, pointer_size));
+      SetImplementationMethod(i, pointer_size, other->GetImplementationMethod(i, pointer_size));
+    }
+    SetInterfaceMethod(count, pointer_size, interface_method);
+    SetImplementationMethod(count, pointer_size, implementation_method);
+    // Add the null marker.
+    SetInterfaceMethod(count + 1, pointer_size, nullptr);
+    SetImplementationMethod(count + 1, pointer_size, nullptr);
+  }
+
+  // num_entries excludes the header.
+  ImtConflictTable(size_t num_entries, PointerSize pointer_size) {
+    SetInterfaceMethod(num_entries, pointer_size, nullptr);
+    SetImplementationMethod(num_entries, pointer_size, nullptr);
+  }
+
+  // Set an entry at an index.
+  void SetInterfaceMethod(size_t index, PointerSize pointer_size, ArtMethod* method) {
+    SetMethod(index * kMethodCount + kMethodInterface, pointer_size, method);
+  }
+
+  void SetImplementationMethod(size_t index, PointerSize pointer_size, ArtMethod* method) {
+    SetMethod(index * kMethodCount + kMethodImplementation, pointer_size, method);
+  }
+
+  ArtMethod* GetInterfaceMethod(size_t index, PointerSize pointer_size) const {
+    return GetMethod(index * kMethodCount + kMethodInterface, pointer_size);
+  }
+
+  ArtMethod* GetImplementationMethod(size_t index, PointerSize pointer_size) const {
+    return GetMethod(index * kMethodCount + kMethodImplementation, pointer_size);
+  }
+
+  // Return true if two conflict tables are the same.
+  bool Equals(ImtConflictTable* other, PointerSize pointer_size) const {
+    size_t num = NumEntries(pointer_size);
+    if (num != other->NumEntries(pointer_size)) {
+      return false;
+    }
+    for (size_t i = 0; i < num; ++i) {
+      if (GetInterfaceMethod(i, pointer_size) != other->GetInterfaceMethod(i, pointer_size) ||
+          GetImplementationMethod(i, pointer_size) !=
+              other->GetImplementationMethod(i, pointer_size)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // Visit all of the entries.
+  // NO_THREAD_SAFETY_ANALYSIS for calling with held locks. Visitor is passed a pair of ArtMethod*
+  // and also returns one. The order is <interface, implementation>.
+  template<typename Visitor>
+  void Visit(const Visitor& visitor, PointerSize pointer_size) NO_THREAD_SAFETY_ANALYSIS {
+    uint32_t table_index = 0;
+    for (;;) {
+      ArtMethod* interface_method = GetInterfaceMethod(table_index, pointer_size);
+      if (interface_method == nullptr) {
+        break;
+      }
+      ArtMethod* implementation_method = GetImplementationMethod(table_index, pointer_size);
+      auto input = std::make_pair(interface_method, implementation_method);
+      std::pair<ArtMethod*, ArtMethod*> updated = visitor(input);
+      if (input.first != updated.first) {
+        SetInterfaceMethod(table_index, pointer_size, updated.first);
+      }
+      if (input.second != updated.second) {
+        SetImplementationMethod(table_index, pointer_size, updated.second);
+      }
+      ++table_index;
+    }
+  }
+
+  // Lookup the implementation ArtMethod associated to `interface_method`. Return null
+  // if not found.
+  ArtMethod* Lookup(ArtMethod* interface_method, PointerSize pointer_size) const {
+    uint32_t table_index = 0;
+    for (;;) {
+      ArtMethod* current_interface_method = GetInterfaceMethod(table_index, pointer_size);
+      if (current_interface_method == nullptr) {
+        break;
+      }
+      if (current_interface_method == interface_method) {
+        return GetImplementationMethod(table_index, pointer_size);
+      }
+      ++table_index;
+    }
+    return nullptr;
+  }
+
+  // Compute the number of entries in this table.
+  size_t NumEntries(PointerSize pointer_size) const {
+    uint32_t table_index = 0;
+    while (GetInterfaceMethod(table_index, pointer_size) != nullptr) {
+      ++table_index;
+    }
+    return table_index;
+  }
+
+  // Compute the size in bytes taken by this table.
+  size_t ComputeSize(PointerSize pointer_size) const {
+    // Add the end marker.
+    return ComputeSize(NumEntries(pointer_size), pointer_size);
+  }
+
+  // Compute the size in bytes needed for copying the given `table` and add
+  // one more entry.
+  static size_t ComputeSizeWithOneMoreEntry(ImtConflictTable* table, PointerSize pointer_size) {
+    return table->ComputeSize(pointer_size) + EntrySize(pointer_size);
+  }
+
+  // Compute size with a fixed number of entries.
+  static size_t ComputeSize(size_t num_entries, PointerSize pointer_size) {
+    return (num_entries + 1) * EntrySize(pointer_size);  // Add one for null terminator.
+  }
+
+  static size_t EntrySize(PointerSize pointer_size) {
+    return static_cast<size_t>(pointer_size) * static_cast<size_t>(kMethodCount);
+  }
+
+ private:
+  ArtMethod* GetMethod(size_t index, PointerSize pointer_size) const {
+    if (pointer_size == PointerSize::k64) {
+      return reinterpret_cast<ArtMethod*>(static_cast<uintptr_t>(data64_[index]));
+    } else {
+      return reinterpret_cast<ArtMethod*>(static_cast<uintptr_t>(data32_[index]));
+    }
+  }
+
+  void SetMethod(size_t index, PointerSize pointer_size, ArtMethod* method) {
+    if (pointer_size == PointerSize::k64) {
+      data64_[index] = dchecked_integral_cast<uint64_t>(reinterpret_cast<uintptr_t>(method));
+    } else {
+      data32_[index] = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(method));
+    }
+  }
+
+  // Array of entries that the assembly stubs will iterate over. Note that this is
+  // not fixed size, and we allocate data prior to calling the constructor
+  // of ImtConflictTable.
+  union {
+    uint32_t data32_[0];
+    uint64_t data64_[0];
+  };
+
+  DISALLOW_COPY_AND_ASSIGN(ImtConflictTable);
+};
+
 class ArtMethod FINAL {
  public:
   ArtMethod() : access_flags_(0), dex_code_item_offset_(0), dex_method_index_(0),
-      method_index_(0) { }
+      method_index_(0), hotness_count_(0) { }
 
-  ArtMethod(const ArtMethod& src, size_t image_pointer_size) {
-    CopyFrom(&src, image_pointer_size);
+  ArtMethod(ArtMethod* src, PointerSize image_pointer_size) {
+    CopyFrom(src, image_pointer_size);
   }
 
   static ArtMethod* FromReflectedMethod(const ScopedObjectAccessAlreadyRunnable& soa,
                                         jobject jlr_method)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE mirror::Class* GetDeclaringClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE mirror::Class* GetDeclaringClassNoBarrier()
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE mirror::Class* GetDeclaringClassUnchecked()
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -77,6 +249,7 @@
 
   // Note: GetAccessFlags acquires the mutator lock in debug mode to check that it is not called for
   // a proxy method.
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE uint32_t GetAccessFlags();
 
   void SetAccessFlags(uint32_t new_access_flags) {
@@ -132,17 +305,43 @@
     return (GetAccessFlags() & kAccFinal) != 0;
   }
 
+  bool IsCopied() {
+    const bool copied = (GetAccessFlags() & kAccCopied) != 0;
+    // (IsMiranda() || IsDefaultConflicting()) implies copied
+    DCHECK(!(IsMiranda() || IsDefaultConflicting()) || copied)
+        << "Miranda or default-conflict methods must always be copied.";
+    return copied;
+  }
+
   bool IsMiranda() {
     return (GetAccessFlags() & kAccMiranda) != 0;
   }
 
+  // Returns true if invoking this method will not throw an AbstractMethodError or
+  // IncompatibleClassChangeError.
+  bool IsInvokable() {
+    return !IsAbstract() && !IsDefaultConflicting();
+  }
+
+  bool IsCompilable() {
+    return (GetAccessFlags() & kAccCompileDontBother) == 0;
+  }
+
+  // A default conflict method is a special sentinel method that stands for a conflict between
+  // multiple default methods. It cannot be invoked, throwing an IncompatibleClassChangeError if one
+  // attempts to do so.
+  bool IsDefaultConflicting() {
+    return (GetAccessFlags() & kAccDefaultConflict) != 0u;
+  }
+
   // This is set by the class linker.
   bool IsDefault() {
     return (GetAccessFlags() & kAccDefault) != 0;
   }
 
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsNative() {
-    return (GetAccessFlags() & kAccNative) != 0;
+    return (GetAccessFlags<kReadBarrierOption>() & kAccNative) != 0;
   }
 
   bool IsFastNative() {
@@ -158,24 +357,37 @@
     return (GetAccessFlags() & kAccSynthetic) != 0;
   }
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsProxyMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool IsPreverified() {
-    return (GetAccessFlags() & kAccPreverified) != 0;
+  bool SkipAccessChecks() {
+    return (GetAccessFlags() & kAccSkipAccessChecks) != 0;
   }
 
-  void SetPreverified() {
-    DCHECK(!IsPreverified());
-    SetAccessFlags(GetAccessFlags() | kAccPreverified);
+  void SetSkipAccessChecks() {
+    DCHECK(!SkipAccessChecks());
+    SetAccessFlags(GetAccessFlags() | kAccSkipAccessChecks);
   }
 
+  // Should this method be run in the interpreter and count locks (e.g., failed structured-
+  // locking verification)?
+  bool MustCountLocks() {
+    return (GetAccessFlags() & kAccMustCountLocks) != 0;
+  }
+
+  // Checks to see if the method was annotated with @dalvik.annotation.optimization.FastNative
+  // -- Independent of kAccFastNative access flags.
+  bool IsAnnotatedWithFastNative();
+
   // Returns true if this method could be overridden by a default method.
-  bool IsOverridableByDefaultMethod() {
-    return IsDefault() || IsAbstract();
-  }
+  bool IsOverridableByDefaultMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool CheckIncompatibleClassChange(InvokeType type) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Throws the error that would result from trying to invoke this method (i.e.
+  // IncompatibleClassChangeError or AbstractMethodError). Only call if !IsInvokable();
+  void ThrowInvocationTimeError() SHARED_REQUIRES(Locks::mutator_lock_);
+
   uint16_t GetMethodIndex() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Doesn't do erroneous / unresolved class checks.
@@ -212,44 +424,53 @@
 
   ALWAYS_INLINE uint32_t GetDexMethodIndex() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  ALWAYS_INLINE uint32_t GetImtIndex() SHARED_REQUIRES(Locks::mutator_lock_);
+
   void SetDexMethodIndex(uint32_t new_idx) {
     // Not called within a transaction.
     dex_method_index_ = new_idx;
   }
 
-  ALWAYS_INLINE ArtMethod** GetDexCacheResolvedMethods(size_t pointer_size)
+  ALWAYS_INLINE ArtMethod** GetDexCacheResolvedMethods(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  ALWAYS_INLINE ArtMethod* GetDexCacheResolvedMethod(uint16_t method_index, size_t ptr_size)
+  ALWAYS_INLINE ArtMethod* GetDexCacheResolvedMethod(uint16_t method_index,
+                                                     PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
   ALWAYS_INLINE void SetDexCacheResolvedMethod(uint16_t method_index,
                                                ArtMethod* new_method,
-                                               size_t ptr_size)
+                                               PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  ALWAYS_INLINE void SetDexCacheResolvedMethods(ArtMethod** new_dex_cache_methods, size_t ptr_size)
+  ALWAYS_INLINE void SetDexCacheResolvedMethods(ArtMethod** new_dex_cache_methods,
+                                                PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool HasDexCacheResolvedMethods(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
-  bool HasSameDexCacheResolvedMethods(ArtMethod* other, size_t pointer_size)
+  bool HasDexCacheResolvedMethods(PointerSize pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+  bool HasSameDexCacheResolvedMethods(ArtMethod* other, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool HasSameDexCacheResolvedMethods(ArtMethod** other_cache, size_t pointer_size)
+  bool HasSameDexCacheResolvedMethods(ArtMethod** other_cache, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   template <bool kWithCheck = true>
-  mirror::Class* GetDexCacheResolvedType(uint32_t type_idx, size_t ptr_size)
+  mirror::Class* GetDexCacheResolvedType(uint32_t type_idx, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void SetDexCacheResolvedTypes(GcRoot<mirror::Class>* new_dex_cache_types, size_t ptr_size)
+  void SetDexCacheResolvedTypes(GcRoot<mirror::Class>* new_dex_cache_types,
+                                PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool HasDexCacheResolvedTypes(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
-  bool HasSameDexCacheResolvedTypes(ArtMethod* other, size_t pointer_size)
+  bool HasDexCacheResolvedTypes(PointerSize pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+  bool HasSameDexCacheResolvedTypes(ArtMethod* other, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool HasSameDexCacheResolvedTypes(GcRoot<mirror::Class>* other_cache, size_t pointer_size)
+  bool HasSameDexCacheResolvedTypes(GcRoot<mirror::Class>* other_cache, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Get the Class* from the type index into this method's dex cache.
-  mirror::Class* GetClassFromTypeIndex(uint16_t type_idx, bool resolve, size_t ptr_size)
+  mirror::Class* GetClassFromTypeIndex(uint16_t type_idx, bool resolve, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Returns true if this method has the same name and signature of the other method.
+  bool HasSameNameAndSignature(ArtMethod* other) SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Find the method that this method overrides.
-  ArtMethod* FindOverriddenMethod(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+  ArtMethod* FindOverriddenMethod(PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Find the method index for this method within other_dexfile. If this method isn't present then
   // return DexFile::kDexNoIndex. The name_and_signature_idx MUST refer to a MethodId with the same
@@ -263,21 +484,22 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   const void* GetEntryPointFromQuickCompiledCode() {
-    return GetEntryPointFromQuickCompiledCodePtrSize(sizeof(void*));
+    return GetEntryPointFromQuickCompiledCodePtrSize(kRuntimePointerSize);
   }
-  ALWAYS_INLINE const void* GetEntryPointFromQuickCompiledCodePtrSize(size_t pointer_size) {
+  ALWAYS_INLINE const void* GetEntryPointFromQuickCompiledCodePtrSize(PointerSize pointer_size) {
     return GetNativePointer<const void*>(
         EntryPointFromQuickCompiledCodeOffset(pointer_size), pointer_size);
   }
 
   void SetEntryPointFromQuickCompiledCode(const void* entry_point_from_quick_compiled_code) {
     SetEntryPointFromQuickCompiledCodePtrSize(entry_point_from_quick_compiled_code,
-                                              sizeof(void*));
+                                              kRuntimePointerSize);
   }
   ALWAYS_INLINE void SetEntryPointFromQuickCompiledCodePtrSize(
-      const void* entry_point_from_quick_compiled_code, size_t pointer_size) {
+      const void* entry_point_from_quick_compiled_code, PointerSize pointer_size) {
     SetNativePointer(EntryPointFromQuickCompiledCodeOffset(pointer_size),
-                     entry_point_from_quick_compiled_code, pointer_size);
+                     entry_point_from_quick_compiled_code,
+                     pointer_size);
   }
 
   void RegisterNative(const void* native_method, bool is_fast)
@@ -285,47 +507,86 @@
 
   void UnregisterNative() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static MemberOffset DexCacheResolvedMethodsOffset(size_t pointer_size) {
+  static MemberOffset DexCacheResolvedMethodsOffset(PointerSize pointer_size) {
     return MemberOffset(PtrSizedFieldsOffset(pointer_size) + OFFSETOF_MEMBER(
-        PtrSizedFields, dex_cache_resolved_methods_) / sizeof(void*) * pointer_size);
+        PtrSizedFields, dex_cache_resolved_methods_) / sizeof(void*)
+            * static_cast<size_t>(pointer_size));
   }
 
-  static MemberOffset DexCacheResolvedTypesOffset(size_t pointer_size) {
+  static MemberOffset DexCacheResolvedTypesOffset(PointerSize pointer_size) {
     return MemberOffset(PtrSizedFieldsOffset(pointer_size) + OFFSETOF_MEMBER(
-        PtrSizedFields, dex_cache_resolved_types_) / sizeof(void*) * pointer_size);
+        PtrSizedFields, dex_cache_resolved_types_) / sizeof(void*)
+            * static_cast<size_t>(pointer_size));
   }
 
-  static MemberOffset EntryPointFromJniOffset(size_t pointer_size) {
+  static MemberOffset DataOffset(PointerSize pointer_size) {
     return MemberOffset(PtrSizedFieldsOffset(pointer_size) + OFFSETOF_MEMBER(
-        PtrSizedFields, entry_point_from_jni_) / sizeof(void*) * pointer_size);
+        PtrSizedFields, data_) / sizeof(void*) * static_cast<size_t>(pointer_size));
   }
 
-  static MemberOffset EntryPointFromQuickCompiledCodeOffset(size_t pointer_size) {
-    return MemberOffset(PtrSizedFieldsOffset(pointer_size) + OFFSETOF_MEMBER(
-        PtrSizedFields, entry_point_from_quick_compiled_code_) / sizeof(void*) * pointer_size);
+  static MemberOffset EntryPointFromJniOffset(PointerSize pointer_size) {
+    return DataOffset(pointer_size);
   }
 
-  ProfilingInfo* CreateProfilingInfo() SHARED_REQUIRES(Locks::mutator_lock_);
+  static MemberOffset EntryPointFromQuickCompiledCodeOffset(PointerSize pointer_size) {
+    return MemberOffset(PtrSizedFieldsOffset(pointer_size) + OFFSETOF_MEMBER(
+        PtrSizedFields, entry_point_from_quick_compiled_code_) / sizeof(void*)
+            * static_cast<size_t>(pointer_size));
+  }
 
-  ProfilingInfo* GetProfilingInfo(size_t pointer_size) {
-    return reinterpret_cast<ProfilingInfo*>(GetEntryPointFromJniPtrSize(pointer_size));
+  ImtConflictTable* GetImtConflictTable(PointerSize pointer_size) {
+    DCHECK(IsRuntimeMethod());
+    return reinterpret_cast<ImtConflictTable*>(GetDataPtrSize(pointer_size));
+  }
+
+  ALWAYS_INLINE void SetImtConflictTable(ImtConflictTable* table, PointerSize pointer_size) {
+    DCHECK(IsRuntimeMethod());
+    SetDataPtrSize(table, pointer_size);
+  }
+
+  ProfilingInfo* GetProfilingInfo(PointerSize pointer_size) {
+    return reinterpret_cast<ProfilingInfo*>(GetDataPtrSize(pointer_size));
+  }
+
+  ALWAYS_INLINE void SetProfilingInfo(ProfilingInfo* info) {
+    SetDataPtrSize(info, kRuntimePointerSize);
+  }
+
+  ALWAYS_INLINE void SetProfilingInfoPtrSize(ProfilingInfo* info, PointerSize pointer_size) {
+    SetDataPtrSize(info, pointer_size);
+  }
+
+  static MemberOffset ProfilingInfoOffset() {
+    DCHECK(IsImagePointerSize(kRuntimePointerSize));
+    return DataOffset(kRuntimePointerSize);
   }
 
   void* GetEntryPointFromJni() {
-    return GetEntryPointFromJniPtrSize(sizeof(void*));
+    DCHECK(IsNative());
+    return GetEntryPointFromJniPtrSize(kRuntimePointerSize);
   }
 
-  ALWAYS_INLINE void* GetEntryPointFromJniPtrSize(size_t pointer_size) {
-    return GetNativePointer<void*>(EntryPointFromJniOffset(pointer_size), pointer_size);
+  ALWAYS_INLINE void* GetEntryPointFromJniPtrSize(PointerSize pointer_size) {
+    return GetDataPtrSize(pointer_size);
   }
 
   void SetEntryPointFromJni(const void* entrypoint) {
     DCHECK(IsNative());
-    SetEntryPointFromJniPtrSize(entrypoint, sizeof(void*));
+    SetEntryPointFromJniPtrSize(entrypoint, kRuntimePointerSize);
   }
 
-  ALWAYS_INLINE void SetEntryPointFromJniPtrSize(const void* entrypoint, size_t pointer_size) {
-    SetNativePointer(EntryPointFromJniOffset(pointer_size), entrypoint, pointer_size);
+  ALWAYS_INLINE void SetEntryPointFromJniPtrSize(const void* entrypoint, PointerSize pointer_size) {
+    SetDataPtrSize(entrypoint, pointer_size);
+  }
+
+  ALWAYS_INLINE void* GetDataPtrSize(PointerSize pointer_size) {
+    DCHECK(IsImagePointerSize(pointer_size));
+    return GetNativePointer<void*>(DataOffset(pointer_size), pointer_size);
+  }
+
+  ALWAYS_INLINE void SetDataPtrSize(const void* data, PointerSize pointer_size) {
+    DCHECK(IsImagePointerSize(pointer_size));
+    SetNativePointer(DataOffset(pointer_size), data, pointer_size);
   }
 
   // Is this a CalleSaveMethod or ResolutionMethod and therefore doesn't adhere to normal
@@ -337,8 +598,6 @@
 
   bool IsResolutionMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool IsImtConflictMethod() SHARED_REQUIRES(Locks::mutator_lock_);
-
   bool IsImtUnimplementedMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
   MethodReference ToMethodReference() SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -353,8 +612,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires.
-  template<typename RootVisitorType>
-  void VisitRoots(RootVisitorType& visitor, size_t pointer_size) NO_THREAD_SAFETY_ANALYSIS;
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename RootVisitorType>
+  void VisitRoots(RootVisitorType& visitor, PointerSize pointer_size) NO_THREAD_SAFETY_ANALYSIS;
 
   const DexFile* GetDexFile() SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -375,7 +634,8 @@
 
   const DexFile::CodeItem* GetCodeItem() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool IsResolvedTypeIdx(uint16_t type_idx, size_t ptr_size) SHARED_REQUIRES(Locks::mutator_lock_);
+  bool IsResolvedTypeIdx(uint16_t type_idx, PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   int32_t GetLineNumFromDexPC(uint32_t dex_pc) SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -396,14 +656,14 @@
 
   // May cause thread suspension due to GetClassFromTypeIdx calling ResolveType this caused a large
   // number of bugs at call sites.
-  mirror::Class* GetReturnType(bool resolve, size_t ptr_size)
+  mirror::Class* GetReturnType(bool resolve, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   mirror::ClassLoader* GetClassLoader() SHARED_REQUIRES(Locks::mutator_lock_);
 
   mirror::DexCache* GetDexCache() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE ArtMethod* GetInterfaceMethodIfProxy(size_t pointer_size)
+  ALWAYS_INLINE ArtMethod* GetInterfaceMethodIfProxy(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // May cause thread suspension due to class resolution.
@@ -411,28 +671,43 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Size of an instance of this native class.
-  static size_t Size(size_t pointer_size) {
-    return RoundUp(OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_), pointer_size) +
-        (sizeof(PtrSizedFields) / sizeof(void*)) * pointer_size;
+  static size_t Size(PointerSize pointer_size) {
+    return PtrSizedFieldsOffset(pointer_size) +
+        (sizeof(PtrSizedFields) / sizeof(void*)) * static_cast<size_t>(pointer_size);
   }
 
   // Alignment of an instance of this native class.
-  static size_t Alignment(size_t pointer_size) {
+  static size_t Alignment(PointerSize pointer_size) {
     // The ArtMethod alignment is the same as image pointer size. This differs from
     // alignof(ArtMethod) if cross-compiling with pointer_size != sizeof(void*).
-    return pointer_size;
+    return static_cast<size_t>(pointer_size);
   }
 
-  void CopyFrom(const ArtMethod* src, size_t image_pointer_size)
+  void CopyFrom(ArtMethod* src, PointerSize image_pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE GcRoot<mirror::Class>* GetDexCacheResolvedTypes(size_t pointer_size)
+  ALWAYS_INLINE GcRoot<mirror::Class>* GetDexCacheResolvedTypes(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Note, hotness_counter_ updates are non-atomic but it doesn't need to be precise.  Also,
+  // given that the counter is only 16 bits wide we can expect wrap-around in some
+  // situations.  Consumers of hotness_count_ must be able to deal with that.
   uint16_t IncrementCounter() {
     return ++hotness_count_;
   }
 
+  void ClearCounter() {
+    hotness_count_ = 0;
+  }
+
+  void SetCounter(int16_t hotness_count) {
+    hotness_count_ = hotness_count;
+  }
+
+  uint16_t GetCounter() const {
+    return hotness_count_;
+  }
+
   const uint8_t* GetQuickenedInfo() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns the method header for the compiled code containing 'pc'. Note that runtime
@@ -440,6 +715,21 @@
   const OatQuickMethodHeader* GetOatQuickMethodHeader(uintptr_t pc)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Returns whether the method has any compiled code, JIT or AOT.
+  bool HasAnyCompiledCode() SHARED_REQUIRES(Locks::mutator_lock_);
+
+
+  // Update heap objects and non-entrypoint pointers by the passed in visitor for image relocation.
+  // Does not use read barrier.
+  template <typename Visitor>
+  ALWAYS_INLINE void UpdateObjectsForImageRelocation(const Visitor& visitor,
+                                                     PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Update entry points by passing them through the visitor.
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename Visitor>
+  ALWAYS_INLINE void UpdateEntrypoints(const Visitor& visitor, PointerSize pointer_size);
+
  protected:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   // The class we are a part of.
@@ -463,16 +753,14 @@
   // ifTable.
   uint16_t method_index_;
 
-  // The hotness we measure for this method. Incremented by the interpreter. Not atomic, as we allow
+  // The hotness we measure for this method. Managed by the interpreter. Not atomic, as we allow
   // missing increments: if the method is hot, we will see it eventually.
   uint16_t hotness_count_;
 
   // Fake padding field gets inserted here.
 
   // Must be the last fields in the method.
-  // PACKED(4) is necessary for the correctness of
-  // RoundUp(OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_), pointer_size).
-  struct PACKED(4) PtrSizedFields {
+  struct PtrSizedFields {
     // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
     ArtMethod** dex_cache_resolved_methods_;
 
@@ -480,8 +768,8 @@
     GcRoot<mirror::Class>* dex_cache_resolved_types_;
 
     // Pointer to JNI function registered to this method, or a function to resolve the JNI function,
-    // or the profiling data for non-native methods.
-    void* entry_point_from_jni_;
+    // or the profiling data for non-native methods, or an ImtConflictTable.
+    void* data_;
 
     // Method dispatch from quick compiled code invokes this pointer which may cause bridging into
     // the interpreter.
@@ -489,17 +777,20 @@
   } ptr_sized_fields_;
 
  private:
-  static size_t PtrSizedFieldsOffset(size_t pointer_size) {
-    // Round up to pointer size for padding field.
-    return RoundUp(OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_), pointer_size);
+  static constexpr size_t PtrSizedFieldsOffset(PointerSize pointer_size) {
+    // Round up to pointer size for padding field. Tested in art_method.cc.
+    return RoundUp(offsetof(ArtMethod, hotness_count_) + sizeof(hotness_count_),
+                   static_cast<size_t>(pointer_size));
   }
 
+  // Compare given pointer size to the image pointer size.
+  static bool IsImagePointerSize(PointerSize pointer_size);
+
   template<typename T>
-  ALWAYS_INLINE T GetNativePointer(MemberOffset offset, size_t pointer_size) const {
+  ALWAYS_INLINE T GetNativePointer(MemberOffset offset, PointerSize pointer_size) const {
     static_assert(std::is_pointer<T>::value, "T must be a pointer type");
-    DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
     const auto addr = reinterpret_cast<uintptr_t>(this) + offset.Uint32Value();
-    if (pointer_size == sizeof(uint32_t)) {
+    if (pointer_size == PointerSize::k32) {
       return reinterpret_cast<T>(*reinterpret_cast<const uint32_t*>(addr));
     } else {
       auto v = *reinterpret_cast<const uint64_t*>(addr);
@@ -508,11 +799,10 @@
   }
 
   template<typename T>
-  ALWAYS_INLINE void SetNativePointer(MemberOffset offset, T new_value, size_t pointer_size) {
+  ALWAYS_INLINE void SetNativePointer(MemberOffset offset, T new_value, PointerSize pointer_size) {
     static_assert(std::is_pointer<T>::value, "T must be a pointer type");
-    DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
     const auto addr = reinterpret_cast<uintptr_t>(this) + offset.Uint32Value();
-    if (pointer_size == sizeof(uint32_t)) {
+    if (pointer_size == PointerSize::k32) {
       uintptr_t ptr = reinterpret_cast<uintptr_t>(new_value);
       *reinterpret_cast<uint32_t*>(addr) = dchecked_integral_cast<uint32_t>(ptr);
     } else {
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 69f6fe9..102b993 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -19,10 +19,15 @@
 
 #if defined(__cplusplus)
 #include "art_method.h"
+#include "base/bit_utils.h"
 #include "gc/allocator/rosalloc.h"
+#include "gc/heap.h"
+#include "jit/jit.h"
 #include "lock_word.h"
 #include "mirror/class.h"
+#include "mirror/dex_cache.h"
 #include "mirror/string.h"
+#include "utils/dex_cache_arrays_layout.h"
 #include "runtime.h"
 #include "thread.h"
 #endif
@@ -56,101 +61,105 @@
 
 #if defined(__LP64__)
 #define POINTER_SIZE_SHIFT 3
+#define POINTER_SIZE art::PointerSize::k64
 #else
 #define POINTER_SIZE_SHIFT 2
+#define POINTER_SIZE art::PointerSize::k32
 #endif
 ADD_TEST_EQ(static_cast<size_t>(1U << POINTER_SIZE_SHIFT),
             static_cast<size_t>(__SIZEOF_POINTER__))
 
-// Size of references to the heap on the stack.
-#define STACK_REFERENCE_SIZE 4
-ADD_TEST_EQ(static_cast<size_t>(STACK_REFERENCE_SIZE), sizeof(art::StackReference<art::mirror::Object>))
-
-// Size of heap references
-#define COMPRESSED_REFERENCE_SIZE 4
-ADD_TEST_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE),
-            sizeof(art::mirror::CompressedReference<art::mirror::Object>))
-
-#define COMPRESSED_REFERENCE_SIZE_SHIFT 2
-ADD_TEST_EQ(static_cast<size_t>(1U << COMPRESSED_REFERENCE_SIZE_SHIFT),
-            static_cast<size_t>(COMPRESSED_REFERENCE_SIZE))
-
-// Note: these callee save methods loads require read barriers.
-// Offset of field Runtime::callee_save_methods_[kSaveAll]
-#define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
-ADD_TEST_EQ(static_cast<size_t>(RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET),
-            art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kSaveAll))
-
-// Offset of field Runtime::callee_save_methods_[kRefsOnly]
-#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET 8
-ADD_TEST_EQ(static_cast<size_t>(RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET),
-            art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kRefsOnly))
-
-// Offset of field Runtime::callee_save_methods_[kRefsAndArgs]
-#define RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET (2 * 8)
-ADD_TEST_EQ(static_cast<size_t>(RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET),
-            art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kRefsAndArgs))
-
-// Offset of field Thread::tls32_.state_and_flags.
-#define THREAD_FLAGS_OFFSET 0
-ADD_TEST_EQ(THREAD_FLAGS_OFFSET,
-            art::Thread::ThreadFlagsOffset<__SIZEOF_POINTER__>().Int32Value())
-
-// Offset of field Thread::tls32_.thin_lock_thread_id.
-#define THREAD_ID_OFFSET 12
-ADD_TEST_EQ(THREAD_ID_OFFSET,
-            art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value())
-
-// Offset of field Thread::tlsPtr_.card_table.
-#define THREAD_CARD_TABLE_OFFSET 128
-ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET,
-            art::Thread::CardTableOffset<__SIZEOF_POINTER__>().Int32Value())
+// Import platform-independent constant defines from our autogenerated list.
+// Export new defines (for assembly use) by editing cpp-define-generator def files.
+#define DEFINE_CHECK_EQ ADD_TEST_EQ
+#include "generated/asm_support_gen.h"
 
 // Offset of field Thread::tlsPtr_.exception.
 #define THREAD_EXCEPTION_OFFSET (THREAD_CARD_TABLE_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_EXCEPTION_OFFSET,
-            art::Thread::ExceptionOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::ExceptionOffset<POINTER_SIZE>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.managed_stack.top_quick_frame_.
 #define THREAD_TOP_QUICK_FRAME_OFFSET (THREAD_CARD_TABLE_OFFSET + (3 * __SIZEOF_POINTER__))
 ADD_TEST_EQ(THREAD_TOP_QUICK_FRAME_OFFSET,
-            art::Thread::TopOfManagedStackOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::TopOfManagedStackOffset<POINTER_SIZE>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.self.
 #define THREAD_SELF_OFFSET (THREAD_CARD_TABLE_OFFSET + (9 * __SIZEOF_POINTER__))
 ADD_TEST_EQ(THREAD_SELF_OFFSET,
-            art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::SelfOffset<POINTER_SIZE>().Int32Value())
 
+// Offset of field Thread::tlsPtr_.thread_local_objects.
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_CARD_TABLE_OFFSET + 199 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
+            art::Thread::ThreadLocalObjectsOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_pos.
-#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 150 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_SIZE_T__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
-            art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::ThreadLocalPosOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_end.
 #define THREAD_LOCAL_END_OFFSET (THREAD_LOCAL_POS_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET,
-            art::Thread::ThreadLocalEndOffset<__SIZEOF_POINTER__>().Int32Value())
-// Offset of field Thread::tlsPtr_.thread_local_objects.
-#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_POS_OFFSET + 2 * __SIZEOF_POINTER__)
-ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
-            art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::ThreadLocalEndOffset<POINTER_SIZE>().Int32Value())
+// Offset of field Thread::tlsPtr_.mterp_current_ibase.
+#define THREAD_CURRENT_IBASE_OFFSET (THREAD_LOCAL_END_OFFSET + __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_CURRENT_IBASE_OFFSET,
+            art::Thread::MterpCurrentIBaseOffset<POINTER_SIZE>().Int32Value())
+// Offset of field Thread::tlsPtr_.mterp_default_ibase.
+#define THREAD_DEFAULT_IBASE_OFFSET (THREAD_CURRENT_IBASE_OFFSET + __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_DEFAULT_IBASE_OFFSET,
+            art::Thread::MterpDefaultIBaseOffset<POINTER_SIZE>().Int32Value())
+// Offset of field Thread::tlsPtr_.mterp_alt_ibase.
+#define THREAD_ALT_IBASE_OFFSET (THREAD_DEFAULT_IBASE_OFFSET + __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_ALT_IBASE_OFFSET,
+            art::Thread::MterpAltIBaseOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.rosalloc_runs.
-#define THREAD_ROSALLOC_RUNS_OFFSET (THREAD_LOCAL_POS_OFFSET + 3 * __SIZEOF_POINTER__)
+#define THREAD_ROSALLOC_RUNS_OFFSET (THREAD_ALT_IBASE_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_ROSALLOC_RUNS_OFFSET,
-            art::Thread::RosAllocRunsOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::RosAllocRunsOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_alloc_stack_top.
-#define THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 34 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 16 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET,
-            art::Thread::ThreadLocalAllocStackTopOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::ThreadLocalAllocStackTopOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_alloc_stack_end.
-#define THREAD_LOCAL_ALLOC_STACK_END_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 35 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_ALLOC_STACK_END_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 17 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_END_OFFSET,
-            art::Thread::ThreadLocalAllocStackEndOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::ThreadLocalAllocStackEndOffset<POINTER_SIZE>().Int32Value())
 
-// Offsets within java.lang.Object.
-#define MIRROR_OBJECT_CLASS_OFFSET 0
-ADD_TEST_EQ(MIRROR_OBJECT_CLASS_OFFSET, art::mirror::Object::ClassOffset().Int32Value())
-#define MIRROR_OBJECT_LOCK_WORD_OFFSET 4
-ADD_TEST_EQ(MIRROR_OBJECT_LOCK_WORD_OFFSET, art::mirror::Object::MonitorOffset().Int32Value())
+// Offsets within ShadowFrame.
+#define SHADOWFRAME_LINK_OFFSET 0
+ADD_TEST_EQ(SHADOWFRAME_LINK_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::LinkOffset()))
+#define SHADOWFRAME_METHOD_OFFSET (SHADOWFRAME_LINK_OFFSET + 1 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_METHOD_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::MethodOffset()))
+#define SHADOWFRAME_RESULT_REGISTER_OFFSET (SHADOWFRAME_LINK_OFFSET + 2 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_RESULT_REGISTER_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::ResultRegisterOffset()))
+#define SHADOWFRAME_DEX_PC_PTR_OFFSET (SHADOWFRAME_LINK_OFFSET + 3 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_DEX_PC_PTR_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::DexPCPtrOffset()))
+#define SHADOWFRAME_CODE_ITEM_OFFSET (SHADOWFRAME_LINK_OFFSET + 4 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_CODE_ITEM_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::CodeItemOffset()))
+#define SHADOWFRAME_LOCK_COUNT_DATA_OFFSET (SHADOWFRAME_LINK_OFFSET + 5 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_LOCK_COUNT_DATA_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::LockCountDataOffset()))
+#define SHADOWFRAME_NUMBER_OF_VREGS_OFFSET (SHADOWFRAME_LINK_OFFSET + 6 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::NumberOfVRegsOffset()))
+#define SHADOWFRAME_DEX_PC_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 4)
+ADD_TEST_EQ(SHADOWFRAME_DEX_PC_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::DexPCOffset()))
+#define SHADOWFRAME_CACHED_HOTNESS_COUNTDOWN_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 8)
+ADD_TEST_EQ(SHADOWFRAME_CACHED_HOTNESS_COUNTDOWN_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::CachedHotnessCountdownOffset()))
+#define SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 10)
+ADD_TEST_EQ(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::HotnessCountdownOffset()))
+#define SHADOWFRAME_VREGS_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 12)
+ADD_TEST_EQ(SHADOWFRAME_VREGS_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::VRegsOffset()))
 
 #if defined(USE_BROOKS_READ_BARRIER)
 #define MIRROR_OBJECT_HEADER_SIZE 16
@@ -163,22 +172,22 @@
 #define MIRROR_CLASS_COMPONENT_TYPE_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_COMPONENT_TYPE_OFFSET,
             art::mirror::Class::ComponentTypeOffset().Int32Value())
-#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (72 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (64 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_ACCESS_FLAGS_OFFSET,
             art::mirror::Class::AccessFlagsOffset().Int32Value())
-#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (104 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (96 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_OBJECT_SIZE_OFFSET,
             art::mirror::Class::ObjectSizeOffset().Int32Value())
-#define MIRROR_CLASS_STATUS_OFFSET (116 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET (100 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET,
+            art::mirror::Class::PrimitiveTypeOffset().Int32Value())
+#define MIRROR_CLASS_STATUS_OFFSET (108 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_STATUS_OFFSET,
             art::mirror::Class::StatusOffset().Int32Value())
 
-#define MIRROR_CLASS_STATUS_INITIALIZED 10
-ADD_TEST_EQ(static_cast<uint32_t>(MIRROR_CLASS_STATUS_INITIALIZED),
-            static_cast<uint32_t>(art::mirror::Class::kStatusInitialized))
-#define ACCESS_FLAGS_CLASS_IS_FINALIZABLE 0x80000000
-ADD_TEST_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE),
-            static_cast<uint32_t>(art::kAccClassIsFinalizable))
+#define PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT 16
+ADD_TEST_EQ(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT,
+            static_cast<int>(art::mirror::Class::kPrimitiveTypeSizeShiftShift))
 
 // Array offsets.
 #define MIRROR_ARRAY_LENGTH_OFFSET      MIRROR_OBJECT_HEADER_SIZE
@@ -188,6 +197,26 @@
 ADD_TEST_EQ(MIRROR_CHAR_ARRAY_DATA_OFFSET,
             art::mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value())
 
+#define MIRROR_BOOLEAN_ARRAY_DATA_OFFSET MIRROR_CHAR_ARRAY_DATA_OFFSET
+ADD_TEST_EQ(MIRROR_BOOLEAN_ARRAY_DATA_OFFSET,
+            art::mirror::Array::DataOffset(sizeof(uint8_t)).Int32Value())
+
+#define MIRROR_BYTE_ARRAY_DATA_OFFSET MIRROR_CHAR_ARRAY_DATA_OFFSET
+ADD_TEST_EQ(MIRROR_BYTE_ARRAY_DATA_OFFSET,
+            art::mirror::Array::DataOffset(sizeof(int8_t)).Int32Value())
+
+#define MIRROR_SHORT_ARRAY_DATA_OFFSET MIRROR_CHAR_ARRAY_DATA_OFFSET
+ADD_TEST_EQ(MIRROR_SHORT_ARRAY_DATA_OFFSET,
+            art::mirror::Array::DataOffset(sizeof(int16_t)).Int32Value())
+
+#define MIRROR_INT_ARRAY_DATA_OFFSET MIRROR_CHAR_ARRAY_DATA_OFFSET
+ADD_TEST_EQ(MIRROR_INT_ARRAY_DATA_OFFSET,
+            art::mirror::Array::DataOffset(sizeof(int32_t)).Int32Value())
+
+#define MIRROR_WIDE_ARRAY_DATA_OFFSET (8 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_WIDE_ARRAY_DATA_OFFSET,
+            art::mirror::Array::DataOffset(sizeof(uint64_t)).Int32Value())
+
 #define MIRROR_OBJECT_ARRAY_DATA_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_OBJECT_ARRAY_DATA_OFFSET,
     art::mirror::Array::DataOffset(
@@ -208,96 +237,7 @@
 #define MIRROR_STRING_VALUE_OFFSET (8 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_STRING_VALUE_OFFSET, art::mirror::String::ValueOffset().Int32Value())
 
-// Offsets within java.lang.reflect.ArtMethod.
-#define ART_METHOD_DEX_CACHE_METHODS_OFFSET_32 20
-ADD_TEST_EQ(ART_METHOD_DEX_CACHE_METHODS_OFFSET_32,
-            art::ArtMethod::DexCacheResolvedMethodsOffset(4).Int32Value())
 
-#define ART_METHOD_DEX_CACHE_METHODS_OFFSET_64 24
-ADD_TEST_EQ(ART_METHOD_DEX_CACHE_METHODS_OFFSET_64,
-            art::ArtMethod::DexCacheResolvedMethodsOffset(8).Int32Value())
-
-#define ART_METHOD_DEX_CACHE_TYPES_OFFSET_32 24
-ADD_TEST_EQ(ART_METHOD_DEX_CACHE_TYPES_OFFSET_32,
-            art::ArtMethod::DexCacheResolvedTypesOffset(4).Int32Value())
-
-#define ART_METHOD_DEX_CACHE_TYPES_OFFSET_64 32
-ADD_TEST_EQ(ART_METHOD_DEX_CACHE_TYPES_OFFSET_64,
-            art::ArtMethod::DexCacheResolvedTypesOffset(8).Int32Value())
-
-#define ART_METHOD_QUICK_CODE_OFFSET_32 32
-ADD_TEST_EQ(ART_METHOD_QUICK_CODE_OFFSET_32,
-            art::ArtMethod::EntryPointFromQuickCompiledCodeOffset(4).Int32Value())
-
-#define ART_METHOD_QUICK_CODE_OFFSET_64 48
-ADD_TEST_EQ(ART_METHOD_QUICK_CODE_OFFSET_64,
-            art::ArtMethod::EntryPointFromQuickCompiledCodeOffset(8).Int32Value())
-
-#define LOCK_WORD_STATE_SHIFT 30
-ADD_TEST_EQ(LOCK_WORD_STATE_SHIFT, static_cast<int32_t>(art::LockWord::kStateShift))
-
-#define LOCK_WORD_STATE_MASK 0xC0000000
-ADD_TEST_EQ(LOCK_WORD_STATE_MASK, static_cast<uint32_t>(art::LockWord::kStateMaskShifted))
-
-#define LOCK_WORD_READ_BARRIER_STATE_SHIFT 28
-ADD_TEST_EQ(LOCK_WORD_READ_BARRIER_STATE_SHIFT,
-            static_cast<int32_t>(art::LockWord::kReadBarrierStateShift))
-
-#define LOCK_WORD_READ_BARRIER_STATE_MASK 0x30000000
-ADD_TEST_EQ(LOCK_WORD_READ_BARRIER_STATE_MASK,
-            static_cast<int32_t>(art::LockWord::kReadBarrierStateMaskShifted))
-
-#define LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED 0xCFFFFFFF
-ADD_TEST_EQ(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED,
-            static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShiftedToggled))
-
-#define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
-ADD_TEST_EQ(LOCK_WORD_THIN_LOCK_COUNT_ONE, static_cast<int32_t>(art::LockWord::kThinLockCountOne))
-
-#define OBJECT_ALIGNMENT_MASK 7
-ADD_TEST_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), art::kObjectAlignment - 1)
-
-#define OBJECT_ALIGNMENT_MASK_TOGGLED 0xFFFFFFF8
-ADD_TEST_EQ(static_cast<uint32_t>(OBJECT_ALIGNMENT_MASK_TOGGLED),
-            ~static_cast<uint32_t>(art::kObjectAlignment - 1))
-
-#define ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE 128
-ADD_TEST_EQ(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::kMaxThreadLocalBracketSize))
-
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT 4
-ADD_TEST_EQ(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSizeShift))
-
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK 15
-ADD_TEST_EQ(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1))
-
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32 0xfffffff0
-ADD_TEST_EQ(static_cast<uint32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32),
-            ~static_cast<uint32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1))
-
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64 0xfffffffffffffff0
-ADD_TEST_EQ(static_cast<uint64_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64),
-            ~static_cast<uint64_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1))
-
-#define ROSALLOC_RUN_FREE_LIST_OFFSET 8
-ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_OFFSET,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListOffset()))
-
-#define ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET 0
-ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListHeadOffset()))
-
-#define ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET 16
-ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListSizeOffset()))
-
-#define ROSALLOC_SLOT_NEXT_OFFSET 0
-ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunSlotNextOffset()))
-// Assert this so that we can avoid zeroing the next field by installing the class pointer.
-ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET, MIRROR_OBJECT_CLASS_OFFSET)
 
 #if defined(__cplusplus)
 }  // End of CheckAsmSupportOffsets.
diff --git a/runtime/atomic.cc b/runtime/atomic.cc
index e766a8d..d5ae570 100644
--- a/runtime/atomic.cc
+++ b/runtime/atomic.cc
@@ -28,7 +28,7 @@
 }
 
 void QuasiAtomic::Startup() {
-  if (kNeedSwapMutexes) {
+  if (NeedSwapMutexes(kRuntimeISA)) {
     gSwapMutexes = new std::vector<Mutex*>;
     for (size_t i = 0; i < kSwapMutexCount; ++i) {
       gSwapMutexes->push_back(new Mutex("QuasiAtomic stripe", kSwapMutexesLock));
@@ -37,7 +37,7 @@
 }
 
 void QuasiAtomic::Shutdown() {
-  if (kNeedSwapMutexes) {
+  if (NeedSwapMutexes(kRuntimeISA)) {
     STLDeleteElements(gSwapMutexes);
     delete gSwapMutexes;
   }
diff --git a/runtime/atomic.h b/runtime/atomic.h
index 87de506..e2a7259 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -22,6 +22,7 @@
 #include <limits>
 #include <vector>
 
+#include "arch/instruction_set.h"
 #include "base/logging.h"
 #include "base/macros.h"
 
@@ -44,14 +45,10 @@
 // quasiatomic operations that are performed on partially-overlapping
 // memory.
 class QuasiAtomic {
-#if defined(__mips__) && !defined(__LP64__)
-  static constexpr bool kNeedSwapMutexes = true;
-#elif defined(__mips__) && defined(__LP64__)
-  // TODO - mips64 still need this for Cas64 ???
-  static constexpr bool kNeedSwapMutexes = true;
-#else
-  static constexpr bool kNeedSwapMutexes = false;
-#endif
+  static constexpr bool NeedSwapMutexes(InstructionSet isa) {
+    // TODO - mips64 still need this for Cas64 ???
+    return (isa == kMips) || (isa == kMips64);
+  }
 
  public:
   static void Startup();
@@ -60,7 +57,7 @@
 
   // Reads the 64-bit value at "addr" without tearing.
   static int64_t Read64(volatile const int64_t* addr) {
-    if (!kNeedSwapMutexes) {
+    if (!NeedSwapMutexes(kRuntimeISA)) {
       int64_t value;
 #if defined(__LP64__)
       value = *addr;
@@ -96,7 +93,7 @@
 
   // Writes to the 64-bit value at "addr" without tearing.
   static void Write64(volatile int64_t* addr, int64_t value) {
-    if (!kNeedSwapMutexes) {
+    if (!NeedSwapMutexes(kRuntimeISA)) {
 #if defined(__LP64__)
       *addr = value;
 #else
@@ -142,7 +139,7 @@
   // at some point during the execution of Cas64, *addr was not equal to
   // old_value.
   static bool Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
-    if (!kNeedSwapMutexes) {
+    if (!NeedSwapMutexes(kRuntimeISA)) {
       return __sync_bool_compare_and_swap(addr, old_value, new_value);
     } else {
       return SwapMutexCas64(old_value, new_value, addr);
@@ -150,8 +147,8 @@
   }
 
   // Does the architecture provide reasonable atomic long operations or do we fall back on mutexes?
-  static bool LongAtomicsUseMutexes() {
-    return kNeedSwapMutexes;
+  static bool LongAtomicsUseMutexes(InstructionSet isa) {
+    return NeedSwapMutexes(isa);
   }
 
   static void ThreadFenceAcquire() {
@@ -199,6 +196,11 @@
     return this->load(std::memory_order_relaxed);
   }
 
+  // Load from memory with acquire ordering.
+  T LoadAcquire() const {
+    return this->load(std::memory_order_acquire);
+  }
+
   // Word tearing allowed, but may race.
   // TODO: Optimize?
   // There has been some discussion of eventually disallowing word
@@ -273,6 +275,10 @@
     return this->fetch_add(value, std::memory_order_seq_cst);  // Return old_value.
   }
 
+  T FetchAndAddRelaxed(const T value) {
+    return this->fetch_add(value, std::memory_order_relaxed);  // Return old_value.
+  }
+
   T FetchAndSubSequentiallyConsistent(const T value) {
     return this->fetch_sub(value, std::memory_order_seq_cst);  // Return old value.
   }
diff --git a/runtime/base/allocator.h b/runtime/base/allocator.h
index ad255b8..e48eca9 100644
--- a/runtime/base/allocator.h
+++ b/runtime/base/allocator.h
@@ -22,6 +22,7 @@
 #include <unordered_map>
 
 #include "atomic.h"
+#include "base/hash_map.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "base/type_static_if.h"
@@ -51,7 +52,6 @@
   kAllocatorTagMonitorList,
   kAllocatorTagClassTable,
   kAllocatorTagInternTable,
-  kAllocatorTagLambdaBoxTable,
   kAllocatorTagMaps,
   kAllocatorTagLOS,
   kAllocatorTagSafeMap,
@@ -157,7 +157,7 @@
 
 template<class Key, class T, AllocatorTag kTag, class Compare = std::less<Key>>
 using AllocationTrackingMultiMap = std::multimap<
-    Key, T, Compare, TrackingAllocator<std::pair<Key, T>, kTag>>;
+    Key, T, Compare, TrackingAllocator<std::pair<const Key, T>, kTag>>;
 
 template<class Key, AllocatorTag kTag, class Compare = std::less<Key>>
 using AllocationTrackingSet = std::set<Key, Compare, TrackingAllocator<Key, kTag>>;
@@ -170,6 +170,14 @@
 using AllocationTrackingUnorderedMap = std::unordered_map<
     Key, T, Hash, Pred, TrackingAllocator<std::pair<const Key, T>, kTag>>;
 
+template<class Key,
+         class T,
+         class EmptyFn,
+         AllocatorTag kTag,
+         class Hash = std::hash<Key>,
+         class Pred = std::equal_to<Key>>
+using AllocationTrackingHashMap = HashMap<
+    Key, T, EmptyFn, Hash, Pred, TrackingAllocator<std::pair<Key, T>, kTag>>;
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_ALLOCATOR_H_
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 71afa0f..aeb990c 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -23,6 +23,7 @@
 #include "mem_map.h"
 #include "mutex.h"
 #include "thread-inl.h"
+#include "systrace.h"
 
 namespace art {
 
@@ -32,27 +33,9 @@
 template <bool kCount>
 const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = {
   "Misc         ",
-  "BBList       ",
-  "BBPreds      ",
-  "DfsPreOrd    ",
-  "DfsPostOrd   ",
-  "DomPostOrd   ",
-  "TopoOrd      ",
-  "Lowering     ",
-  "LIR          ",
-  "LIR masks    ",
   "SwitchTbl    ",
-  "FillArray    ",
   "SlowPaths    ",
-  "MIR          ",
-  "DataFlow     ",
-  "GrowList     ",
   "GrowBitMap   ",
-  "SSA2Dalvik   ",
-  "Dalvik2SSA   ",
-  "DebugInfo    ",
-  "RegAlloc     ",
-  "Data         ",
   "STL          ",
   "GraphBuilder ",
   "Graph        ",
@@ -79,23 +62,26 @@
   "MoveOperands ",
   "CodeBuffer   ",
   "StackMaps    ",
-  "BaselineMaps ",
   "Optimization ",
   "GVN          ",
   "InductionVar ",
   "BCE          ",
+  "DCE          ",
+  "LSE          ",
+  "LICM         ",
   "SsaLiveness  ",
   "SsaPhiElim   ",
   "RefTypeProp  ",
-  "PrimTypeProp ",
   "SideEffects  ",
   "RegAllocator ",
+  "RegAllocVldt ",
   "StackMapStm  ",
   "CodeGen      ",
+  "Assembler    ",
   "ParallelMove ",
   "GraphChecker ",
-  "LSE          ",
   "Verifier     ",
+  "CallingConv  ",
 };
 
 template <bool kCount>
@@ -176,6 +162,8 @@
 
 MallocArena::MallocArena(size_t size) {
   memory_ = reinterpret_cast<uint8_t*>(calloc(1, size));
+  CHECK(memory_ != nullptr);  // Abort on OOM.
+  DCHECK_ALIGNED(memory_, ArenaAllocator::kAlignment);
   size_ = size;
 }
 
@@ -183,10 +171,10 @@
   free(reinterpret_cast<void*>(memory_));
 }
 
-MemMapArena::MemMapArena(size_t size, bool low_4gb) {
+MemMapArena::MemMapArena(size_t size, bool low_4gb, const char* name) {
   std::string error_msg;
   map_.reset(MemMap::MapAnonymous(
-      "LinearAlloc", nullptr, size, PROT_READ | PROT_WRITE, low_4gb, false, &error_msg));
+      name, nullptr, size, PROT_READ | PROT_WRITE, low_4gb, false, &error_msg));
   CHECK(map_.get() != nullptr) << error_msg;
   memory_ = map_->Begin();
   size_ = map_->Size();
@@ -210,9 +198,12 @@
   }
 }
 
-ArenaPool::ArenaPool(bool use_malloc, bool low_4gb)
-    : use_malloc_(use_malloc), lock_("Arena pool lock", kArenaPoolLock), free_arenas_(nullptr),
-      low_4gb_(low_4gb) {
+ArenaPool::ArenaPool(bool use_malloc, bool low_4gb, const char* name)
+    : use_malloc_(use_malloc),
+      lock_("Arena pool lock", kArenaPoolLock),
+      free_arenas_(nullptr),
+      low_4gb_(low_4gb),
+      name_(name) {
   if (low_4gb) {
     CHECK(!use_malloc) << "low4gb must use map implementation";
   }
@@ -222,6 +213,10 @@
 }
 
 ArenaPool::~ArenaPool() {
+  ReclaimMemory();
+}
+
+void ArenaPool::ReclaimMemory() {
   while (free_arenas_ != nullptr) {
     auto* arena = free_arenas_;
     free_arenas_ = free_arenas_->next_;
@@ -229,6 +224,11 @@
   }
 }
 
+void ArenaPool::LockReclaimMemory() {
+  MutexLock lock(Thread::Current(), lock_);
+  ReclaimMemory();
+}
+
 Arena* ArenaPool::AllocArena(size_t size) {
   Thread* self = Thread::Current();
   Arena* ret = nullptr;
@@ -241,7 +241,7 @@
   }
   if (ret == nullptr) {
     ret = use_malloc_ ? static_cast<Arena*>(new MallocArena(size)) :
-        new MemMapArena(size, low_4gb_);
+        new MemMapArena(size, low_4gb_, name_);
   }
   ret->Reset();
   return ret;
@@ -249,6 +249,7 @@
 
 void ArenaPool::TrimMaps() {
   if (!use_malloc_) {
+    ScopedTrace trace(__PRETTY_FUNCTION__);
     // Doesn't work for malloc.
     MutexLock lock(Thread::Current(), lock_);
     for (auto* arena = free_arenas_; arena != nullptr; arena = arena->next_) {
@@ -316,22 +317,34 @@
 }
 
 void* ArenaAllocator::AllocWithMemoryTool(size_t bytes, ArenaAllocKind kind) {
+  // We mark all memory for a newly retrieved arena as inaccessible and then
+  // mark only the actually allocated memory as defined. That leaves red zones
+  // and padding between allocations marked as inaccessible.
   size_t rounded_bytes = RoundUp(bytes + kMemoryToolRedZoneBytes, 8);
-  if (UNLIKELY(ptr_ + rounded_bytes > end_)) {
-    // Obtain a new block.
-    ObtainNewArenaForAllocation(rounded_bytes);
-    CHECK(ptr_ != nullptr);
-    MEMORY_TOOL_MAKE_UNDEFINED(ptr_, end_ - ptr_);
-  }
   ArenaAllocatorStats::RecordAlloc(rounded_bytes, kind);
-  uint8_t* ret = ptr_;
-  ptr_ += rounded_bytes;
-  // Check that the memory is already zeroed out.
-  for (uint8_t* ptr = ret; ptr < ptr_; ++ptr) {
-    CHECK_EQ(*ptr, 0U);
+  uint8_t* ret;
+  if (UNLIKELY(rounded_bytes > static_cast<size_t>(end_ - ptr_))) {
+    ret = AllocFromNewArena(rounded_bytes);
+    uint8_t* noaccess_begin = ret + bytes;
+    uint8_t* noaccess_end;
+    if (ret == arena_head_->Begin()) {
+      DCHECK(ptr_ - rounded_bytes == ret);
+      noaccess_end = end_;
+    } else {
+      // We're still using the old arena but `ret` comes from a new one just after it.
+      DCHECK(arena_head_->next_ != nullptr);
+      DCHECK(ret == arena_head_->next_->Begin());
+      DCHECK_EQ(rounded_bytes, arena_head_->next_->GetBytesAllocated());
+      noaccess_end = arena_head_->next_->End();
+    }
+    MEMORY_TOOL_MAKE_NOACCESS(noaccess_begin, noaccess_end - noaccess_begin);
+  } else {
+    ret = ptr_;
+    ptr_ += rounded_bytes;
   }
   MEMORY_TOOL_MAKE_DEFINED(ret, bytes);
-  MEMORY_TOOL_MAKE_NOACCESS(ret + bytes, rounded_bytes - bytes);
+  // Check that the memory is already zeroed out.
+  DCHECK(std::all_of(ret, ret + bytes, [](uint8_t val) { return val == 0u; }));
   return ret;
 }
 
@@ -341,14 +354,28 @@
   pool_->FreeArenaChain(arena_head_);
 }
 
-void ArenaAllocator::ObtainNewArenaForAllocation(size_t allocation_size) {
-  UpdateBytesAllocated();
-  Arena* new_arena = pool_->AllocArena(std::max(Arena::kDefaultSize, allocation_size));
-  new_arena->next_ = arena_head_;
-  arena_head_ = new_arena;
-  // Update our internal data structures.
-  ptr_ = begin_ = new_arena->Begin();
-  end_ = new_arena->End();
+uint8_t* ArenaAllocator::AllocFromNewArena(size_t bytes) {
+  Arena* new_arena = pool_->AllocArena(std::max(Arena::kDefaultSize, bytes));
+  DCHECK(new_arena != nullptr);
+  DCHECK_LE(bytes, new_arena->Size());
+  if (static_cast<size_t>(end_ - ptr_) > new_arena->Size() - bytes) {
+    // The old arena has more space remaining than the new one, so keep using it.
+    // This can happen when the requested size is over half of the default size.
+    DCHECK(arena_head_ != nullptr);
+    new_arena->bytes_allocated_ = bytes;  // UpdateBytesAllocated() on the new_arena.
+    new_arena->next_ = arena_head_->next_;
+    arena_head_->next_ = new_arena;
+  } else {
+    UpdateBytesAllocated();
+    new_arena->next_ = arena_head_;
+    arena_head_ = new_arena;
+    // Update our internal data structures.
+    begin_ = new_arena->Begin();
+    DCHECK_ALIGNED(begin_, kAlignment);
+    ptr_ = begin_ + bytes;
+    end_ = new_arena->End();
+  }
+  return new_arena->Begin();
 }
 
 bool ArenaAllocator::Contains(const void* ptr) const {
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index ace6c38..3fad96b 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -44,27 +44,9 @@
 // Type of allocation for memory tuning.
 enum ArenaAllocKind {
   kArenaAllocMisc,
-  kArenaAllocBBList,
-  kArenaAllocBBPredecessors,
-  kArenaAllocDfsPreOrder,
-  kArenaAllocDfsPostOrder,
-  kArenaAllocDomPostOrder,
-  kArenaAllocTopologicalSortOrder,
-  kArenaAllocLoweringInfo,
-  kArenaAllocLIR,
-  kArenaAllocLIRResourceMask,
   kArenaAllocSwitchTable,
-  kArenaAllocFillArrayData,
   kArenaAllocSlowPaths,
-  kArenaAllocMIR,
-  kArenaAllocDFInfo,
-  kArenaAllocGrowableArray,
   kArenaAllocGrowableBitMap,
-  kArenaAllocSSAToDalvikMap,
-  kArenaAllocDalvikToSSAMap,
-  kArenaAllocDebugInfo,
-  kArenaAllocRegAlloc,
-  kArenaAllocData,
   kArenaAllocSTL,
   kArenaAllocGraphBuilder,
   kArenaAllocGraph,
@@ -91,23 +73,26 @@
   kArenaAllocMoveOperands,
   kArenaAllocCodeBuffer,
   kArenaAllocStackMaps,
-  kArenaAllocBaselineMaps,
   kArenaAllocOptimization,
   kArenaAllocGvn,
   kArenaAllocInductionVarAnalysis,
   kArenaAllocBoundsCheckElimination,
+  kArenaAllocDCE,
+  kArenaAllocLSE,
+  kArenaAllocLICM,
   kArenaAllocSsaLiveness,
   kArenaAllocSsaPhiElimination,
   kArenaAllocReferenceTypePropagation,
-  kArenaAllocPrimitiveTypePropagation,
   kArenaAllocSideEffectsAnalysis,
   kArenaAllocRegisterAllocator,
+  kArenaAllocRegisterAllocatorValidate,
   kArenaAllocStackMapStream,
   kArenaAllocCodeGenerator,
+  kArenaAllocAssembler,
   kArenaAllocParallelMoveResolver,
   kArenaAllocGraphChecker,
-  kArenaAllocLSE,
   kArenaAllocVerifier,
+  kArenaAllocCallingConvention,
   kNumArenaAllocKinds
 };
 
@@ -249,6 +234,8 @@
   friend class ScopedArenaAllocator;
   template <bool kCount> friend class ArenaAllocatorStatsImpl;
 
+  friend class ArenaAllocatorTest;
+
  private:
   DISALLOW_COPY_AND_ASSIGN(Arena);
 };
@@ -261,7 +248,7 @@
 
 class MemMapArena FINAL : public Arena {
  public:
-  MemMapArena(size_t size, bool low_4gb);
+  MemMapArena(size_t size, bool low_4gb, const char* name);
   virtual ~MemMapArena();
   void Release() OVERRIDE;
 
@@ -271,11 +258,15 @@
 
 class ArenaPool {
  public:
-  explicit ArenaPool(bool use_malloc = true, bool low_4gb = false);
+  ArenaPool(bool use_malloc = true,
+            bool low_4gb = false,
+            const char* name = "LinearAlloc");
   ~ArenaPool();
   Arena* AllocArena(size_t size) REQUIRES(!lock_);
   void FreeArenaChain(Arena* first) REQUIRES(!lock_);
   size_t GetBytesAllocated() const REQUIRES(!lock_);
+  void ReclaimMemory() NO_THREAD_SAFETY_ANALYSIS;
+  void LockReclaimMemory() REQUIRES(!lock_);
   // Trim the maps in arenas by madvising, used by JIT to reduce memory usage. This only works
   // use_malloc is false.
   void TrimMaps() REQUIRES(!lock_);
@@ -285,9 +276,15 @@
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   Arena* free_arenas_ GUARDED_BY(lock_);
   const bool low_4gb_;
+  const char* name_;
   DISALLOW_COPY_AND_ASSIGN(ArenaPool);
 };
 
+// Fast single-threaded allocator for zero-initialized memory chunks.
+//
+// Memory is allocated from ArenaPool in large chunks and then rationed through
+// the ArenaAllocator. It's returned to the ArenaPool only when the ArenaAllocator
+// is destroyed.
 class ArenaAllocator
     : private DebugStackRefCounter, private ArenaAllocatorStats, private ArenaAllocatorMemoryTool {
  public:
@@ -308,15 +305,12 @@
       return AllocWithMemoryTool(bytes, kind);
     }
     bytes = RoundUp(bytes, kAlignment);
-    if (UNLIKELY(ptr_ + bytes > end_)) {
-      // Obtain a new block.
-      ObtainNewArenaForAllocation(bytes);
-      if (UNLIKELY(ptr_ == nullptr)) {
-        return nullptr;
-      }
-    }
     ArenaAllocatorStats::RecordAlloc(bytes, kind);
+    if (UNLIKELY(bytes > static_cast<size_t>(end_ - ptr_))) {
+      return AllocFromNewArena(bytes);
+    }
     uint8_t* ret = ptr_;
+    DCHECK_ALIGNED(ret, kAlignment);
     ptr_ += bytes;
     return ret;
   }
@@ -326,34 +320,39 @@
                 ArenaAllocKind kind = kArenaAllocMisc) ALWAYS_INLINE {
     DCHECK_GE(new_size, ptr_size);
     DCHECK_EQ(ptr == nullptr, ptr_size == 0u);
-    auto* end = reinterpret_cast<uint8_t*>(ptr) + ptr_size;
+    // We always allocate aligned.
+    const size_t aligned_ptr_size = RoundUp(ptr_size, kAlignment);
+    auto* end = reinterpret_cast<uint8_t*>(ptr) + aligned_ptr_size;
     // If we haven't allocated anything else, we can safely extend.
     if (end == ptr_) {
       DCHECK(!IsRunningOnMemoryTool());  // Red zone prevents end == ptr_.
-      const size_t size_delta = new_size - ptr_size;
+      const size_t aligned_new_size = RoundUp(new_size, kAlignment);
+      const size_t size_delta = aligned_new_size - aligned_ptr_size;
       // Check remain space.
       const size_t remain = end_ - ptr_;
       if (remain >= size_delta) {
         ptr_ += size_delta;
         ArenaAllocatorStats::RecordAlloc(size_delta, kind);
+        DCHECK_ALIGNED(ptr_, kAlignment);
         return ptr;
       }
     }
-    auto* new_ptr = Alloc(new_size, kind);
+    auto* new_ptr = Alloc(new_size, kind);  // Note: Alloc will take care of aligning new_size.
     memcpy(new_ptr, ptr, ptr_size);
     // TODO: Call free on ptr if linear alloc supports free.
     return new_ptr;
   }
 
   template <typename T>
+  T* Alloc(ArenaAllocKind kind = kArenaAllocMisc) {
+    return AllocArray<T>(1, kind);
+  }
+
+  template <typename T>
   T* AllocArray(size_t length, ArenaAllocKind kind = kArenaAllocMisc) {
     return static_cast<T*>(Alloc(length * sizeof(T), kind));
   }
 
-  void* AllocWithMemoryTool(size_t bytes, ArenaAllocKind kind);
-
-  void ObtainNewArenaForAllocation(size_t allocation_size);
-
   size_t BytesAllocated() const;
 
   MemStats GetMemStats() const;
@@ -368,9 +367,13 @@
 
   bool Contains(const void* ptr) const;
 
- private:
   static constexpr size_t kAlignment = 8;
 
+ private:
+  void* AllocWithMemoryTool(size_t bytes, ArenaAllocKind kind);
+  uint8_t* AllocFromNewArena(size_t bytes);
+
+
   void UpdateBytesAllocated();
 
   ArenaPool* pool_;
@@ -382,6 +385,8 @@
   template <typename U>
   friend class ArenaAllocatorAdapter;
 
+  friend class ArenaAllocatorTest;
+
   DISALLOW_COPY_AND_ASSIGN(ArenaAllocator);
 };  // ArenaAllocator
 
diff --git a/runtime/base/arena_allocator_test.cc b/runtime/base/arena_allocator_test.cc
new file mode 100644
index 0000000..fd48a3f
--- /dev/null
+++ b/runtime/base/arena_allocator_test.cc
@@ -0,0 +1,345 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/arena_allocator.h"
+#include "base/arena_bit_vector.h"
+#include "base/memory_tool.h"
+#include "gtest/gtest.h"
+
+namespace art {
+
+class ArenaAllocatorTest : public testing::Test {
+ protected:
+  size_t NumberOfArenas(ArenaAllocator* arena) {
+    size_t result = 0u;
+    for (Arena* a = arena->arena_head_; a != nullptr; a = a->next_) {
+      ++result;
+    }
+    return result;
+  }
+};
+
+TEST_F(ArenaAllocatorTest, Test) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  ArenaBitVector bv(&arena, 10, true);
+  bv.SetBit(5);
+  EXPECT_EQ(1U, bv.GetStorageSize());
+  bv.SetBit(35);
+  EXPECT_EQ(2U, bv.GetStorageSize());
+}
+
+TEST_F(ArenaAllocatorTest, MakeDefined) {
+  // Regression test to make sure we mark the allocated area defined.
+  ArenaPool pool;
+  static constexpr size_t kSmallArraySize = 10;
+  static constexpr size_t kLargeArraySize = 50;
+  uint32_t* small_array;
+  {
+    // Allocate a small array from an arena and release it.
+    ArenaAllocator arena(&pool);
+    small_array = arena.AllocArray<uint32_t>(kSmallArraySize);
+    ASSERT_EQ(0u, small_array[kSmallArraySize - 1u]);
+  }
+  {
+    // Reuse the previous arena and allocate more than previous allocation including red zone.
+    ArenaAllocator arena(&pool);
+    uint32_t* large_array = arena.AllocArray<uint32_t>(kLargeArraySize);
+    ASSERT_EQ(0u, large_array[kLargeArraySize - 1u]);
+    // Verify that the allocation was made on the same arena.
+    ASSERT_EQ(small_array, large_array);
+  }
+}
+
+TEST_F(ArenaAllocatorTest, LargeAllocations) {
+  {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    // Note: Leaving some space for memory tool red zones.
+    void* alloc1 = arena.Alloc(Arena::kDefaultSize * 5 / 8);
+    void* alloc2 = arena.Alloc(Arena::kDefaultSize * 2 / 8);
+    ASSERT_NE(alloc1, alloc2);
+    ASSERT_EQ(1u, NumberOfArenas(&arena));
+  }
+  {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    void* alloc1 = arena.Alloc(Arena::kDefaultSize * 13 / 16);
+    void* alloc2 = arena.Alloc(Arena::kDefaultSize * 11 / 16);
+    ASSERT_NE(alloc1, alloc2);
+    ASSERT_EQ(2u, NumberOfArenas(&arena));
+    void* alloc3 = arena.Alloc(Arena::kDefaultSize * 7 / 16);
+    ASSERT_NE(alloc1, alloc3);
+    ASSERT_NE(alloc2, alloc3);
+    ASSERT_EQ(3u, NumberOfArenas(&arena));
+  }
+  {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    void* alloc1 = arena.Alloc(Arena::kDefaultSize * 13 / 16);
+    void* alloc2 = arena.Alloc(Arena::kDefaultSize * 9 / 16);
+    ASSERT_NE(alloc1, alloc2);
+    ASSERT_EQ(2u, NumberOfArenas(&arena));
+    // Note: Leaving some space for memory tool red zones.
+    void* alloc3 = arena.Alloc(Arena::kDefaultSize * 5 / 16);
+    ASSERT_NE(alloc1, alloc3);
+    ASSERT_NE(alloc2, alloc3);
+    ASSERT_EQ(2u, NumberOfArenas(&arena));
+  }
+  {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    void* alloc1 = arena.Alloc(Arena::kDefaultSize * 9 / 16);
+    void* alloc2 = arena.Alloc(Arena::kDefaultSize * 13 / 16);
+    ASSERT_NE(alloc1, alloc2);
+    ASSERT_EQ(2u, NumberOfArenas(&arena));
+    // Note: Leaving some space for memory tool red zones.
+    void* alloc3 = arena.Alloc(Arena::kDefaultSize * 5 / 16);
+    ASSERT_NE(alloc1, alloc3);
+    ASSERT_NE(alloc2, alloc3);
+    ASSERT_EQ(2u, NumberOfArenas(&arena));
+  }
+  {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    // Note: Leaving some space for memory tool red zones.
+    for (size_t i = 0; i != 15; ++i) {
+      arena.Alloc(Arena::kDefaultSize * 1 / 16);    // Allocate 15 times from the same arena.
+      ASSERT_EQ(i + 1u, NumberOfArenas(&arena));
+      arena.Alloc(Arena::kDefaultSize * 17 / 16);   // Allocate a separate arena.
+      ASSERT_EQ(i + 2u, NumberOfArenas(&arena));
+    }
+  }
+}
+
+TEST_F(ArenaAllocatorTest, AllocAlignment) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  for (size_t iterations = 0; iterations <= 10; ++iterations) {
+    for (size_t size = 1; size <= ArenaAllocator::kAlignment + 1; ++size) {
+      void* allocation = arena.Alloc(size);
+      EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(allocation))
+          << reinterpret_cast<uintptr_t>(allocation);
+    }
+  }
+}
+
+TEST_F(ArenaAllocatorTest, ReallocReuse) {
+  // Realloc does not reuse arenas when running under sanitization. So we cannot do those
+  if (RUNNING_ON_MEMORY_TOOL != 0) {
+    printf("WARNING: TEST DISABLED FOR MEMORY_TOOL\n");
+    return;
+  }
+
+  {
+    // Case 1: small aligned allocation, aligned extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2;
+    void* original_allocation = arena.Alloc(original_size);
+
+    const size_t new_size = ArenaAllocator::kAlignment * 3;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_EQ(original_allocation, realloc_allocation);
+  }
+
+  {
+    // Case 2: small aligned allocation, non-aligned extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2;
+    void* original_allocation = arena.Alloc(original_size);
+
+    const size_t new_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2);
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_EQ(original_allocation, realloc_allocation);
+  }
+
+  {
+    // Case 3: small non-aligned allocation, aligned extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2);
+    void* original_allocation = arena.Alloc(original_size);
+
+    const size_t new_size = ArenaAllocator::kAlignment * 4;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_EQ(original_allocation, realloc_allocation);
+  }
+
+  {
+    // Case 4: small non-aligned allocation, aligned non-extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2);
+    void* original_allocation = arena.Alloc(original_size);
+
+    const size_t new_size = ArenaAllocator::kAlignment * 3;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_EQ(original_allocation, realloc_allocation);
+  }
+
+  // The next part is brittle, as the default size for an arena is variable, and we don't know about
+  // sanitization.
+
+  {
+    // Case 5: large allocation, aligned extend into next arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = Arena::kDefaultSize - ArenaAllocator::kAlignment * 5;
+    void* original_allocation = arena.Alloc(original_size);
+
+    const size_t new_size = Arena::kDefaultSize + ArenaAllocator::kAlignment * 2;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_NE(original_allocation, realloc_allocation);
+  }
+
+  {
+    // Case 6: large allocation, non-aligned extend into next arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = Arena::kDefaultSize -
+        ArenaAllocator::kAlignment * 4 -
+        ArenaAllocator::kAlignment / 2;
+    void* original_allocation = arena.Alloc(original_size);
+
+    const size_t new_size = Arena::kDefaultSize +
+        ArenaAllocator::kAlignment * 2 +
+        ArenaAllocator::kAlignment / 2;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_NE(original_allocation, realloc_allocation);
+  }
+}
+
+TEST_F(ArenaAllocatorTest, ReallocAlignment) {
+  {
+    // Case 1: small aligned allocation, aligned extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2;
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = ArenaAllocator::kAlignment * 3;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+
+  {
+    // Case 2: small aligned allocation, non-aligned extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2;
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2);
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+
+  {
+    // Case 3: small non-aligned allocation, aligned extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2);
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = ArenaAllocator::kAlignment * 4;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+
+  {
+    // Case 4: small non-aligned allocation, aligned non-extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2);
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = ArenaAllocator::kAlignment * 3;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+
+  // The next part is brittle, as the default size for an arena is variable, and we don't know about
+  // sanitization.
+
+  {
+    // Case 5: large allocation, aligned extend into next arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = Arena::kDefaultSize - ArenaAllocator::kAlignment * 5;
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = Arena::kDefaultSize + ArenaAllocator::kAlignment * 2;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+
+  {
+    // Case 6: large allocation, non-aligned extend into next arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = Arena::kDefaultSize -
+        ArenaAllocator::kAlignment * 4 -
+        ArenaAllocator::kAlignment / 2;
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = Arena::kDefaultSize +
+        ArenaAllocator::kAlignment * 2 +
+        ArenaAllocator::kAlignment / 2;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+}
+
+
+}  // namespace art
diff --git a/runtime/base/arena_bit_vector.cc b/runtime/base/arena_bit_vector.cc
index fbbfd84..5f8f5d2 100644
--- a/runtime/base/arena_bit_vector.cc
+++ b/runtime/base/arena_bit_vector.cc
@@ -21,36 +21,78 @@
 
 namespace art {
 
-template <typename ArenaAlloc>
-class ArenaBitVectorAllocator FINAL : public Allocator,
-    public ArenaObject<kArenaAllocGrowableBitMap> {
+template <bool kCount>
+class ArenaBitVectorAllocatorKindImpl;
+
+template <>
+class ArenaBitVectorAllocatorKindImpl<false> {
  public:
-  explicit ArenaBitVectorAllocator(ArenaAlloc* arena) : arena_(arena) {}
-  ~ArenaBitVectorAllocator() {}
+  // Not tracking allocations, ignore the supplied kind and arbitrarily provide kArenaAllocSTL.
+  explicit ArenaBitVectorAllocatorKindImpl(ArenaAllocKind kind ATTRIBUTE_UNUSED) {}
+  ArenaBitVectorAllocatorKindImpl(const ArenaBitVectorAllocatorKindImpl&) = default;
+  ArenaBitVectorAllocatorKindImpl& operator=(const ArenaBitVectorAllocatorKindImpl&) = default;
+  ArenaAllocKind Kind() { return kArenaAllocGrowableBitMap; }
+};
+
+template <bool kCount>
+class ArenaBitVectorAllocatorKindImpl {
+ public:
+  explicit ArenaBitVectorAllocatorKindImpl(ArenaAllocKind kind) : kind_(kind) { }
+  ArenaBitVectorAllocatorKindImpl(const ArenaBitVectorAllocatorKindImpl&) = default;
+  ArenaBitVectorAllocatorKindImpl& operator=(const ArenaBitVectorAllocatorKindImpl&) = default;
+  ArenaAllocKind Kind() { return kind_; }
+
+ private:
+  ArenaAllocKind kind_;
+};
+
+using ArenaBitVectorAllocatorKind =
+    ArenaBitVectorAllocatorKindImpl<kArenaAllocatorCountAllocations>;
+
+template <typename ArenaAlloc>
+class ArenaBitVectorAllocator FINAL : public Allocator, private ArenaBitVectorAllocatorKind {
+ public:
+  static ArenaBitVectorAllocator* Create(ArenaAlloc* arena, ArenaAllocKind kind) {
+    void* storage = arena->template Alloc<ArenaBitVectorAllocator>(kind);
+    return new (storage) ArenaBitVectorAllocator(arena, kind);
+  }
+
+  ~ArenaBitVectorAllocator() {
+    LOG(FATAL) << "UNREACHABLE";
+    UNREACHABLE();
+  }
 
   virtual void* Alloc(size_t size) {
-    return arena_->Alloc(size, kArenaAllocGrowableBitMap);
+    return arena_->Alloc(size, this->Kind());
   }
 
   virtual void Free(void*) {}  // Nop.
 
  private:
+  ArenaBitVectorAllocator(ArenaAlloc* arena, ArenaAllocKind kind)
+      : ArenaBitVectorAllocatorKind(kind), arena_(arena) { }
+
   ArenaAlloc* const arena_;
+
   DISALLOW_COPY_AND_ASSIGN(ArenaBitVectorAllocator);
 };
 
-ArenaBitVector::ArenaBitVector(ArenaAllocator* arena, unsigned int start_bits,
-                               bool expandable, OatBitMapKind kind)
-  :  BitVector(start_bits, expandable,
-               new (arena) ArenaBitVectorAllocator<ArenaAllocator>(arena)), kind_(kind) {
-  UNUSED(kind_);
+ArenaBitVector::ArenaBitVector(ArenaAllocator* arena,
+                               unsigned int start_bits,
+                               bool expandable,
+                               ArenaAllocKind kind)
+  :  BitVector(start_bits,
+               expandable,
+               ArenaBitVectorAllocator<ArenaAllocator>::Create(arena, kind)) {
 }
 
-ArenaBitVector::ArenaBitVector(ScopedArenaAllocator* arena, unsigned int start_bits,
-                               bool expandable, OatBitMapKind kind)
-  :  BitVector(start_bits, expandable,
-               new (arena) ArenaBitVectorAllocator<ScopedArenaAllocator>(arena)), kind_(kind) {
-  UNUSED(kind_);
+ArenaBitVector::ArenaBitVector(ScopedArenaAllocator* arena,
+                               unsigned int start_bits,
+                               bool expandable,
+                               ArenaAllocKind kind)
+  :  BitVector(start_bits,
+               expandable,
+               ArenaBitVectorAllocator<ScopedArenaAllocator>::Create(arena, kind)) {
 }
 
 }  // namespace art
diff --git a/runtime/base/arena_bit_vector.h b/runtime/base/arena_bit_vector.h
index d606166..d86d622 100644
--- a/runtime/base/arena_bit_vector.h
+++ b/runtime/base/arena_bit_vector.h
@@ -25,44 +25,34 @@
 class ArenaAllocator;
 class ScopedArenaAllocator;
 
-// Type of growable bitmap for memory tuning.
-enum OatBitMapKind {
-  kBitMapMisc = 0,
-  kBitMapUse,
-  kBitMapDef,
-  kBitMapLiveIn,
-  kBitMapBMatrix,
-  kBitMapDominators,
-  kBitMapIDominated,
-  kBitMapDomFrontier,
-  kBitMapRegisterV,
-  kBitMapTempSSARegisterV,
-  kBitMapNullCheck,
-  kBitMapClInitCheck,
-  kBitMapPredecessors,
-  kNumBitMapKinds
-};
-
-std::ostream& operator<<(std::ostream& os, const OatBitMapKind& kind);
-
 /*
  * A BitVector implementation that uses Arena allocation.
  */
 class ArenaBitVector : public BitVector, public ArenaObject<kArenaAllocGrowableBitMap> {
  public:
-  ArenaBitVector(ArenaAllocator* arena, uint32_t start_bits, bool expandable,
-                 OatBitMapKind kind = kBitMapMisc);
-  ArenaBitVector(ScopedArenaAllocator* arena, uint32_t start_bits, bool expandable,
-                 OatBitMapKind kind = kBitMapMisc);
+  template <typename Allocator>
+  static ArenaBitVector* Create(Allocator* arena,
+                                uint32_t start_bits,
+                                bool expandable,
+                                ArenaAllocKind kind = kArenaAllocGrowableBitMap) {
+    void* storage = arena->template Alloc<ArenaBitVector>(kind);
+    return new (storage) ArenaBitVector(arena, start_bits, expandable, kind);
+  }
+
+  ArenaBitVector(ArenaAllocator* arena,
+                 uint32_t start_bits,
+                 bool expandable,
+                 ArenaAllocKind kind = kArenaAllocGrowableBitMap);
+  ArenaBitVector(ScopedArenaAllocator* arena,
+                 uint32_t start_bits,
+                 bool expandable,
+                 ArenaAllocKind kind = kArenaAllocGrowableBitMap);
   ~ArenaBitVector() {}
 
  private:
-  const OatBitMapKind kind_;      // for memory use tuning. TODO: currently unused.
-
   DISALLOW_COPY_AND_ASSIGN(ArenaBitVector);
 };
 
-
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_ARENA_BIT_VECTOR_H_
diff --git a/runtime/base/arena_containers.h b/runtime/base/arena_containers.h
index e2d4c24..68cacd5 100644
--- a/runtime/base/arena_containers.h
+++ b/runtime/base/arena_containers.h
@@ -20,6 +20,7 @@
 #include <deque>
 #include <queue>
 #include <set>
+#include <stack>
 #include <utility>
 
 #include "arena_allocator.h"
@@ -54,6 +55,12 @@
 using ArenaVector = dchecked_vector<T, ArenaAllocatorAdapter<T>>;
 
 template <typename T, typename Comparator = std::less<T>>
+using ArenaPriorityQueue = std::priority_queue<T, ArenaVector<T>, Comparator>;
+
+template <typename T>
+using ArenaStdStack = std::stack<T, ArenaDeque<T>>;
+
+template <typename T, typename Comparator = std::less<T>>
 using ArenaSet = std::set<T, Comparator, ArenaAllocatorAdapter<T>>;
 
 template <typename K, typename V, typename Comparator = std::less<K>>
diff --git a/runtime/base/arena_object.h b/runtime/base/arena_object.h
index 56e35d8..2d8e7d8 100644
--- a/runtime/base/arena_object.h
+++ b/runtime/base/arena_object.h
@@ -48,7 +48,6 @@
 
 
 // Parent for arena allocated objects that get deleted, gives appropriate new and delete operators.
-// Currently this is used by the quick compiler for debug reference counting arena allocations.
 template<enum ArenaAllocKind kAllocKind>
 class DeletableArenaObject {
  public:
diff --git a/runtime/base/array_slice.h b/runtime/base/array_slice.h
new file mode 100644
index 0000000..32283d0
--- /dev/null
+++ b/runtime/base/array_slice.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_ARRAY_SLICE_H_
+#define ART_RUNTIME_BASE_ARRAY_SLICE_H_
+
+#include "length_prefixed_array.h"
+#include "stride_iterator.h"
+#include "base/bit_utils.h"
+#include "base/casts.h"
+#include "base/iteration_range.h"
+
+namespace art {
+
+// An ArraySlice is an abstraction over an array or a part of an array of a particular type. It does
+// bounds checking and can be made from several common array-like structures in Art.
+template<typename T>
+class ArraySlice {
+ public:
+  // Create an empty array slice.
+  ArraySlice() : array_(nullptr), size_(0), element_size_(0) {}
+
+  // Create an array slice of the first 'length' elements of the array, with each element being
+  // element_size bytes long.
+  ArraySlice(T* array,
+             size_t length,
+             size_t element_size = sizeof(T))
+      : array_(array),
+        size_(dchecked_integral_cast<uint32_t>(length)),
+        element_size_(element_size) {
+    DCHECK(array_ != nullptr || length == 0);
+  }
+
+  // Create an array slice of the elements between start_offset and end_offset of the array with
+  // each element being element_size bytes long. Both start_offset and end_offset are in
+  // element_size units.
+  ArraySlice(T* array,
+             uint32_t start_offset,
+             uint32_t end_offset,
+             size_t element_size = sizeof(T))
+      : array_(nullptr),
+        size_(end_offset - start_offset),
+        element_size_(element_size) {
+    DCHECK(array_ != nullptr || size_ == 0);
+    DCHECK_LE(start_offset, end_offset);
+    if (size_ != 0) {
+      uintptr_t offset = start_offset * element_size_;
+      array_ = *reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(array) + offset);
+    }
+  }
+
+  // Create an array slice of the elements between start_offset and end_offset of the array with
+  // each element being element_size bytes long and having the given alignment. Both start_offset
+  // and end_offset are in element_size units.
+  ArraySlice(LengthPrefixedArray<T>* array,
+             uint32_t start_offset,
+             uint32_t end_offset,
+             size_t element_size = sizeof(T),
+             size_t alignment = alignof(T))
+      : array_(nullptr),
+        size_(end_offset - start_offset),
+        element_size_(element_size) {
+    DCHECK(array != nullptr || size_ == 0);
+    if (size_ != 0) {
+      DCHECK_LE(start_offset, end_offset);
+      DCHECK_LE(start_offset, array->size());
+      DCHECK_LE(end_offset, array->size());
+      array_ = &array->At(start_offset, element_size_, alignment);
+    }
+  }
+
+  T& At(size_t index) {
+    DCHECK_LT(index, size_);
+    return AtUnchecked(index);
+  }
+
+  const T& At(size_t index) const {
+    DCHECK_LT(index, size_);
+    return AtUnchecked(index);
+  }
+
+  T& operator[](size_t index) {
+    return At(index);
+  }
+
+  const T& operator[](size_t index) const {
+    return At(index);
+  }
+
+  StrideIterator<T> begin() {
+    return StrideIterator<T>(&AtUnchecked(0), element_size_);
+  }
+
+  StrideIterator<const T> begin() const {
+    return StrideIterator<const T>(&AtUnchecked(0), element_size_);
+  }
+
+  StrideIterator<T> end() {
+    return StrideIterator<T>(&AtUnchecked(size_), element_size_);
+  }
+
+  StrideIterator<const T> end() const {
+    return StrideIterator<const T>(&AtUnchecked(size_), element_size_);
+  }
+
+  IterationRange<StrideIterator<T>> AsRange() {
+    return size() != 0 ? MakeIterationRange(begin(), end())
+                       : MakeEmptyIterationRange(StrideIterator<T>(nullptr, 0));
+  }
+
+  size_t size() const {
+    return size_;
+  }
+
+  size_t ElementSize() const {
+    return element_size_;
+  }
+
+  bool Contains(const T* element) const {
+    return &AtUnchecked(0) <= element && element < &AtUnchecked(size_);
+  }
+
+ private:
+  T& AtUnchecked(size_t index) {
+    return *reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(array_) + index * element_size_);
+  }
+
+  const T& AtUnchecked(size_t index) const {
+    return *reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(array_) + index * element_size_);
+  }
+
+  T* array_;
+  size_t size_;
+  size_t element_size_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_ARRAY_SLICE_H_
diff --git a/runtime/base/bit_field.h b/runtime/base/bit_field.h
index fd65d50..a80ca28 100644
--- a/runtime/base/bit_field.h
+++ b/runtime/base/bit_field.h
@@ -26,9 +26,18 @@
 
 // BitField is a template for encoding and decoding a bit field inside
 // an unsigned machine word.
-template<typename T, int position, int size>
+template<typename T, size_t kPosition, size_t kSize>
 class BitField {
  public:
+  typedef T value_type;
+  static constexpr size_t position = kPosition;
+  static constexpr size_t size = kSize;
+
+  static_assert(position < sizeof(uintptr_t) * kBitsPerByte, "Invalid position.");
+  static_assert(size != 0u, "Invalid size.");
+  static_assert(size <= sizeof(uintptr_t) * kBitsPerByte, "Invalid size.");
+  static_assert(size + position <= sizeof(uintptr_t) * kBitsPerByte, "Invalid position + size.");
+
   // Tells whether the provided value fits into the bit field.
   static bool IsValid(T value) {
     return (static_cast<uintptr_t>(value) & ~((kUintPtrTOne << size) - 1)) == 0;
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index 9c78ee5..f279f45 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -23,6 +23,7 @@
 
 #include "base/logging.h"
 #include "base/iteration_range.h"
+#include "base/stl_util.h"
 
 namespace art {
 
@@ -53,6 +54,7 @@
           : __builtin_ctzll(x);
 }
 
+// Return the number of 1-bits in `x`.
 template<typename T>
 static constexpr int POPCOUNT(T x) {
   return (sizeof(T) == sizeof(uint32_t))
@@ -107,12 +109,12 @@
 }
 
 // For rounding integers.
-// NOTE: In the absence of std::omit_from_type_deduction<T> or std::identity<T>, use std::decay<T>.
+// Note: Omit the `n` from T type deduction, deduce only from the `x` argument.
 template<typename T>
-static constexpr T RoundDown(T x, typename std::decay<T>::type n) WARN_UNUSED;
+static constexpr T RoundDown(T x, typename Identity<T>::type n) WARN_UNUSED;
 
 template<typename T>
-static constexpr T RoundDown(T x, typename std::decay<T>::type n) {
+static constexpr T RoundDown(T x, typename Identity<T>::type n) {
   return
       DCHECK_CONSTEXPR(IsPowerOfTwo(n), , T(0))
       (x & -n);
@@ -267,7 +269,7 @@
 template <typename T>
 static constexpr T MaxInt(size_t bits) {
   return
-      DCHECK_CONSTEXPR(bits > 0, "bits cannot be zero", 0)
+      DCHECK_CONSTEXPR(std::is_unsigned<T>::value || bits > 0, "bits cannot be zero for signed", 0)
       DCHECK_CONSTEXPR(bits <= BitSizeOf<T>(), "kBits must be < max.", 0)
       bits == BitSizeOf<T>()
           ? std::numeric_limits<T>::max()
@@ -281,7 +283,7 @@
 template <typename T>
 static constexpr T MinInt(size_t bits) {
   return
-      DCHECK_CONSTEXPR(bits > 0, "bits cannot be zero", 0)
+      DCHECK_CONSTEXPR(std::is_unsigned<T>::value || bits > 0, "bits cannot be zero for signed", 0)
       DCHECK_CONSTEXPR(bits <= BitSizeOf<T>(), "kBits must be < max.", 0)
       bits == BitSizeOf<T>()
           ? std::numeric_limits<T>::min()
diff --git a/runtime/base/bit_vector.h b/runtime/base/bit_vector.h
index 9b55e70..5609067 100644
--- a/runtime/base/bit_vector.h
+++ b/runtime/base/bit_vector.h
@@ -111,6 +111,20 @@
     const BitVector* const bit_vector_;
   };
 
+  // MoveConstructible but not MoveAssignable, CopyConstructible or CopyAssignable.
+
+  BitVector(const BitVector& other) = delete;
+  BitVector& operator=(const BitVector& other) = delete;
+
+  BitVector(BitVector&& other)
+      : storage_(other.storage_),
+        storage_size_(other.storage_size_),
+        allocator_(other.allocator_),
+        expandable_(other.expandable_) {
+    other.storage_ = nullptr;
+    other.storage_size_ = 0u;
+  }
+
   BitVector(uint32_t start_bits,
             bool expandable,
             Allocator* allocator);
@@ -229,6 +243,11 @@
    */
   int GetHighestBitSet() const;
 
+  // Minimum number of bits required to store this vector, 0 if none are set.
+  size_t GetNumberOfBits() const {
+    return GetHighestBitSet() + 1;
+  }
+
   // Is bit set in storage. (No range check.)
   static bool IsBitSet(const uint32_t* storage, uint32_t idx) {
     return (storage[WordIndex(idx)] & BitMask(idx)) != 0;
diff --git a/runtime/base/casts.h b/runtime/base/casts.h
index f884649..6b67864 100644
--- a/runtime/base/casts.h
+++ b/runtime/base/casts.h
@@ -19,6 +19,7 @@
 
 #include <assert.h>
 #include <limits>
+#include <stdint.h>
 #include <string.h>
 #include <type_traits>
 
@@ -34,7 +35,7 @@
 // When you use implicit_cast, the compiler checks that the cast is safe.
 // Such explicit implicit_casts are necessary in surprisingly many
 // situations where C++ demands an exact type match instead of an
-// argument type convertable to a target type.
+// argument type convertible to a target type.
 //
 // The From type can be inferred, so the preferred syntax for using
 // implicit_cast is the same as for static_cast etc.:
@@ -102,6 +103,29 @@
   return static_cast<Dest>(source);
 }
 
+// A version of reinterpret_cast<>() between pointers and int64_t/uint64_t
+// that goes through uintptr_t to avoid treating the pointer as "signed."
+
+template <typename Dest, typename Source>
+inline Dest reinterpret_cast64(Source source) {
+  // This is the overload for casting from int64_t/uint64_t to a pointer.
+  static_assert(std::is_same<Source, int64_t>::value || std::is_same<Source, uint64_t>::value,
+                "Source must be int64_t or uint64_t.");
+  static_assert(std::is_pointer<Dest>::value, "Dest must be a pointer.");
+  // Check that we don't lose any non-0 bits here.
+  DCHECK_EQ(static_cast<Source>(static_cast<uintptr_t>(source)), source);
+  return reinterpret_cast<Dest>(static_cast<uintptr_t>(source));
+}
+
+template <typename Dest, typename Source>
+inline Dest reinterpret_cast64(Source* ptr) {
+  // This is the overload for casting from a pointer to int64_t/uint64_t.
+  static_assert(std::is_same<Dest, int64_t>::value || std::is_same<Dest, uint64_t>::value,
+                "Dest must be int64_t or uint64_t.");
+  static_assert(sizeof(uintptr_t) <= sizeof(Dest), "Expecting at most 64-bit pointers.");
+  return static_cast<Dest>(reinterpret_cast<uintptr_t>(ptr));
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_CASTS_H_
diff --git a/runtime/base/dchecked_vector.h b/runtime/base/dchecked_vector.h
index 6ec573a..77f0ea2 100644
--- a/runtime/base/dchecked_vector.h
+++ b/runtime/base/dchecked_vector.h
@@ -33,7 +33,7 @@
 // but we do not use exceptions, so this accessor is deliberately hidden.
 // Note: The common pattern &v[0] used to retrieve pointer to the data is not
 // valid for an empty dchecked_vector<>. Use data() to avoid checking empty().
-template <typename T, typename Alloc>
+template <typename T, typename Alloc = std::allocator<T>>
 class dchecked_vector : private std::vector<T, Alloc> {
  private:
   // std::vector<> has a slightly different specialization for bool. We don't provide that.
diff --git a/runtime/base/enums.h b/runtime/base/enums.h
new file mode 100644
index 0000000..51b86ea
--- /dev/null
+++ b/runtime/base/enums.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_ENUMS_H_
+#define ART_RUNTIME_BASE_ENUMS_H_
+
+#include <cstddef>
+#include <ostream>
+
+#include "base/logging.h"
+#include "base/macros.h"
+
+namespace art {
+
+enum class PointerSize : size_t {
+  k32 = 4,
+  k64 = 8
+};
+std::ostream& operator<<(std::ostream& os, const PointerSize& rhs);
+
+static constexpr PointerSize kRuntimePointerSize = sizeof(void*) == 8U
+                                                       ? PointerSize::k64
+                                                       : PointerSize::k32;
+
+template <typename T>
+static constexpr PointerSize ConvertToPointerSize(T any) {
+  if (any == 4 || any == 8) {
+    return static_cast<PointerSize>(any);
+  } else {
+    LOG(FATAL);
+    UNREACHABLE();
+  }
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_ENUMS_H_
diff --git a/runtime/base/file_magic.cc b/runtime/base/file_magic.cc
new file mode 100644
index 0000000..de6f423
--- /dev/null
+++ b/runtime/base/file_magic.cc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "file_magic.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "base/logging.h"
+#include "base/unix_file/fd_file.h"
+#include "dex_file.h"
+#include "stringprintf.h"
+
+namespace art {
+
+File OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg) {
+  CHECK(magic != nullptr);
+  File fd(filename, O_RDONLY, /* check_usage */ false);
+  if (fd.Fd() == -1) {
+    *error_msg = StringPrintf("Unable to open '%s' : %s", filename, strerror(errno));
+    return File();
+  }
+  int n = TEMP_FAILURE_RETRY(read(fd.Fd(), magic, sizeof(*magic)));
+  if (n != sizeof(*magic)) {
+    *error_msg = StringPrintf("Failed to find magic in '%s'", filename);
+    return File();
+  }
+  if (lseek(fd.Fd(), 0, SEEK_SET) != 0) {
+    *error_msg = StringPrintf("Failed to seek to beginning of file '%s' : %s", filename,
+                              strerror(errno));
+    return File();
+  }
+  return fd;
+}
+
+bool IsZipMagic(uint32_t magic) {
+  return (('P' == ((magic >> 0) & 0xff)) &&
+          ('K' == ((magic >> 8) & 0xff)));
+}
+
+bool IsDexMagic(uint32_t magic) {
+  return DexFile::IsMagicValid(reinterpret_cast<const uint8_t*>(&magic));
+}
+
+}  // namespace art
diff --git a/runtime/base/file_magic.h b/runtime/base/file_magic.h
new file mode 100644
index 0000000..4b5d2f5
--- /dev/null
+++ b/runtime/base/file_magic.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_FILE_MAGIC_H_
+#define ART_RUNTIME_BASE_FILE_MAGIC_H_
+
+#include <stdint.h>
+#include <string>
+
+#include "os.h"
+
+namespace art {
+
+// Open file and read magic number
+File OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg);
+
+// Check whether the given magic matches a known file type.
+bool IsZipMagic(uint32_t magic);
+bool IsDexMagic(uint32_t magic);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_FILE_MAGIC_H_
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index 4819f06..12d3be7 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -140,7 +140,7 @@
 
   HashSet() : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor) {}
 
-  HashSet(double min_load_factor, double max_load_factor)
+  HashSet(double min_load_factor, double max_load_factor) noexcept
       : num_elements_(0u),
         num_buckets_(0u),
         elements_until_expand_(0u),
@@ -152,7 +152,7 @@
     DCHECK_LT(max_load_factor, 1.0);
   }
 
-  explicit HashSet(const allocator_type& alloc)
+  explicit HashSet(const allocator_type& alloc) noexcept
       : allocfn_(alloc),
         hashfn_(),
         emptyfn_(),
@@ -166,7 +166,7 @@
         max_load_factor_(kDefaultMaxLoadFactor) {
   }
 
-  HashSet(const HashSet& other)
+  HashSet(const HashSet& other) noexcept
       : allocfn_(other.allocfn_),
         hashfn_(other.hashfn_),
         emptyfn_(other.emptyfn_),
@@ -184,7 +184,9 @@
     }
   }
 
-  HashSet(HashSet&& other)
+  // noexcept required so that the move constructor is used instead of copy constructor.
+  // b/27860101
+  HashSet(HashSet&& other) noexcept
       : allocfn_(std::move(other.allocfn_)),
         hashfn_(std::move(other.hashfn_)),
         emptyfn_(std::move(other.emptyfn_)),
@@ -206,7 +208,7 @@
   // Construct from existing data.
   // Read from a block of memory, if make_copy_of_data is false, then data_ points to within the
   // passed in ptr_.
-  HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) {
+  HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) noexcept {
     uint64_t temp;
     size_t offset = 0;
     offset = ReadFromBytes(ptr, offset, &temp);
@@ -236,7 +238,7 @@
 
   // Returns how large the table is after being written. If target is null, then no writing happens
   // but the size is still returned. Target must be 8 byte aligned.
-  size_t WriteToMemory(uint8_t* ptr) {
+  size_t WriteToMemory(uint8_t* ptr) const {
     size_t offset = 0;
     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_));
     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_));
@@ -256,12 +258,12 @@
     DeallocateStorage();
   }
 
-  HashSet& operator=(HashSet&& other) {
+  HashSet& operator=(HashSet&& other) noexcept {
     HashSet(std::move(other)).swap(*this);
     return *this;
   }
 
-  HashSet& operator=(const HashSet& other) {
+  HashSet& operator=(const HashSet& other) noexcept {
     HashSet(other).swap(*this);  // NOLINT(runtime/explicit) - a case of lint gone mad.
     return *this;
   }
@@ -298,6 +300,11 @@
     return Size() == 0;
   }
 
+  // Return true if the hash set has ownership of the underlying data.
+  bool OwnsData() const {
+    return owns_data_;
+  }
+
   // Erase algorithm:
   // Make an empty slot where the iterator is pointing.
   // Scan forwards until we hit another empty slot.
@@ -420,6 +427,19 @@
     Resize(Size() / max_load_factor_);
   }
 
+  // Reserve enough room to insert until Size() == num_elements without requiring to grow the hash
+  // set. No-op if the hash set is already large enough to do this.
+  void Reserve(size_t num_elements) {
+    size_t num_buckets = num_elements / max_load_factor_;
+    // Deal with rounding errors. Add one for rounding.
+    while (static_cast<size_t>(num_buckets * max_load_factor_) <= num_elements + 1u) {
+      ++num_buckets;
+    }
+    if (num_buckets > NumBuckets()) {
+      Resize(num_buckets);
+    }
+  }
+
   // To distance that inserted elements were probed. Used for measuring how good hash functions
   // are.
   size_t TotalProbeDistance() const {
@@ -444,7 +464,7 @@
   }
 
   // Make sure that everything reinserts in the right spot. Returns the number of errors.
-  size_t Verify() {
+  size_t Verify() NO_THREAD_SAFETY_ANALYSIS {
     size_t errors = 0;
     for (size_t i = 0; i < num_buckets_; ++i) {
       T& element = data_[i];
@@ -488,6 +508,15 @@
     }
   }
 
+  // The hash set expands when Size() reaches ElementsUntilExpand().
+  size_t ElementsUntilExpand() const {
+    return elements_until_expand_;
+  }
+
+  size_t NumBuckets() const {
+    return num_buckets_;
+  }
+
  private:
   T& ElementForIndex(size_t index) {
     DCHECK_LT(index, NumBuckets());
@@ -543,10 +572,6 @@
     return emptyfn_.IsEmpty(ElementForIndex(index));
   }
 
-  size_t NumBuckets() const {
-    return num_buckets_;
-  }
-
   // Allocate a number of buckets.
   void AllocateStorage(size_t num_buckets) {
     num_buckets_ = num_buckets;
diff --git a/runtime/base/hash_set_test.cc b/runtime/base/hash_set_test.cc
index 743e98e..8254063 100644
--- a/runtime/base/hash_set_test.cc
+++ b/runtime/base/hash_set_test.cc
@@ -333,4 +333,25 @@
   ASSERT_NE(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 2, 3, 4})));
 }
 
+TEST_F(HashSetTest, TestReserve) {
+  HashSet<std::string, IsEmptyFnString> hash_set;
+  std::vector<size_t> sizes = {1, 10, 25, 55, 128, 1024, 4096};
+  for (size_t size : sizes) {
+    hash_set.Reserve(size);
+    const size_t buckets_before = hash_set.NumBuckets();
+    // Check that we expanded enough.
+    CHECK_GE(hash_set.ElementsUntilExpand(), size);
+    // Try inserting elements until we are at our reserve size and ensure the hash set did not
+    // expand.
+    while (hash_set.Size() < size) {
+      hash_set.Insert(std::to_string(hash_set.Size()));
+    }
+    CHECK_EQ(hash_set.NumBuckets(), buckets_before);
+  }
+  // Check the behaviour for shrinking, it does not necessarily resize down.
+  constexpr size_t size = 100;
+  hash_set.Reserve(size);
+  CHECK_GE(hash_set.ElementsUntilExpand(), size);
+}
+
 }  // namespace art
diff --git a/runtime/base/histogram-inl.h b/runtime/base/histogram-inl.h
index 03980e3..4af47d1 100644
--- a/runtime/base/histogram-inl.h
+++ b/runtime/base/histogram-inl.h
@@ -26,6 +26,7 @@
 
 #include "base/bit_utils.h"
 #include "base/time_utils.h"
+#include "utils.h"
 
 namespace art {
 
@@ -200,6 +201,17 @@
 }
 
 template <class Value>
+inline void Histogram<Value>::PrintMemoryUse(std::ostream &os) const {
+  os << Name();
+  if (sample_size_ != 0u) {
+    os << ": Avg: " << PrettySize(Mean()) << " Max: "
+       << PrettySize(Max()) << " Min: " << PrettySize(Min()) << "\n";
+  } else {
+    os << ": <no data>\n";
+  }
+}
+
+template <class Value>
 inline void Histogram<Value>::CreateHistogram(CumulativeData* out_data) const {
   DCHECK_GT(sample_size_, 0ull);
   out_data->freq_.clear();
diff --git a/runtime/base/histogram.h b/runtime/base/histogram.h
index ef3a5d7..0e3bc8e 100644
--- a/runtime/base/histogram.h
+++ b/runtime/base/histogram.h
@@ -59,6 +59,7 @@
   double Percentile(double per, const CumulativeData& data) const;
   void PrintConfidenceIntervals(std::ostream& os, double interval,
                                 const CumulativeData& data) const;
+  void PrintMemoryUse(std::ostream& os) const;
   void PrintBins(std::ostream& os, const CumulativeData& data) const;
   void DumpBins(std::ostream& os) const;
   Value GetRange(size_t bucket_idx) const;
@@ -84,6 +85,10 @@
     return max_value_added_;
   }
 
+  Value BucketWidth() const {
+    return bucket_width_;
+  }
+
   const std::string& Name() const {
     return name_;
   }
diff --git a/runtime/base/length_prefixed_array.h b/runtime/base/length_prefixed_array.h
new file mode 100644
index 0000000..8060263
--- /dev/null
+++ b/runtime/base/length_prefixed_array.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_LENGTH_PREFIXED_ARRAY_H_
+#define ART_RUNTIME_BASE_LENGTH_PREFIXED_ARRAY_H_
+
+#include <stddef.h>  // for offsetof()
+#include <string.h>  // for memset()
+
+#include "stride_iterator.h"
+#include "base/bit_utils.h"
+#include "base/casts.h"
+#include "base/iteration_range.h"
+
+namespace art {
+
+template<typename T>
+class LengthPrefixedArray {
+ public:
+  explicit LengthPrefixedArray(size_t length)
+      : size_(dchecked_integral_cast<uint32_t>(length)) {}
+
+  T& At(size_t index, size_t element_size = sizeof(T), size_t alignment = alignof(T)) {
+    DCHECK_LT(index, size_);
+    return AtUnchecked(index, element_size, alignment);
+  }
+
+  const T& At(size_t index, size_t element_size = sizeof(T), size_t alignment = alignof(T)) const {
+    DCHECK_LT(index, size_);
+    return AtUnchecked(index, element_size, alignment);
+  }
+
+  StrideIterator<T> begin(size_t element_size = sizeof(T), size_t alignment = alignof(T)) {
+    return StrideIterator<T>(&AtUnchecked(0, element_size, alignment), element_size);
+  }
+
+  StrideIterator<const T> begin(size_t element_size = sizeof(T),
+                                size_t alignment = alignof(T)) const {
+    return StrideIterator<const T>(&AtUnchecked(0, element_size, alignment), element_size);
+  }
+
+  StrideIterator<T> end(size_t element_size = sizeof(T), size_t alignment = alignof(T)) {
+    return StrideIterator<T>(&AtUnchecked(size_, element_size, alignment), element_size);
+  }
+
+  StrideIterator<const T> end(size_t element_size = sizeof(T),
+                              size_t alignment = alignof(T)) const {
+    return StrideIterator<const T>(&AtUnchecked(size_, element_size, alignment), element_size);
+  }
+
+  static size_t OffsetOfElement(size_t index,
+                                size_t element_size = sizeof(T),
+                                size_t alignment = alignof(T)) {
+    DCHECK_ALIGNED_PARAM(element_size, alignment);
+    return RoundUp(offsetof(LengthPrefixedArray<T>, data), alignment) + index * element_size;
+  }
+
+  static size_t ComputeSize(size_t num_elements,
+                            size_t element_size = sizeof(T),
+                            size_t alignment = alignof(T)) {
+    size_t result = OffsetOfElement(num_elements, element_size, alignment);
+    DCHECK_ALIGNED_PARAM(result, alignment);
+    return result;
+  }
+
+  size_t size() const {
+    return size_;
+  }
+
+  // Update the length but does not reallocate storage.
+  void SetSize(size_t length) {
+    size_ = dchecked_integral_cast<uint32_t>(length);
+  }
+
+  // Clear the potentially uninitialized padding between the size_ and actual data.
+  void ClearPadding(size_t element_size = sizeof(T), size_t alignment = alignof(T)) {
+    size_t gap_offset = offsetof(LengthPrefixedArray<T>, data);
+    size_t gap_size = OffsetOfElement(0, element_size, alignment) - gap_offset;
+    memset(reinterpret_cast<uint8_t*>(this) + gap_offset, 0, gap_size);
+  }
+
+ private:
+  T& AtUnchecked(size_t index, size_t element_size, size_t alignment) {
+    return *reinterpret_cast<T*>(
+        reinterpret_cast<uintptr_t>(this) + OffsetOfElement(index, element_size, alignment));
+  }
+
+  const T& AtUnchecked(size_t index, size_t element_size, size_t alignment) const {
+    return *reinterpret_cast<T*>(
+        reinterpret_cast<uintptr_t>(this) + OffsetOfElement(index, element_size, alignment));
+  }
+
+  uint32_t size_;
+  uint8_t data[0];
+};
+
+// Returns empty iteration range if the array is null.
+template<typename T>
+IterationRange<StrideIterator<T>> MakeIterationRangeFromLengthPrefixedArray(
+    LengthPrefixedArray<T>* arr, size_t element_size = sizeof(T), size_t alignment = alignof(T)) {
+  return arr != nullptr ?
+      MakeIterationRange(arr->begin(element_size, alignment), arr->end(element_size, alignment)) :
+      MakeEmptyIterationRange(StrideIterator<T>(nullptr, 0));
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_LENGTH_PREFIXED_ARRAY_H_
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 7a620e3..28352cb 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -26,7 +26,7 @@
 #include "utils.h"
 
 // Headers for LogMessage::LogLine.
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include "cutils/log.h"
 #else
 #include <sys/types.h>
@@ -47,7 +47,7 @@
 // Print INTERNAL_FATAL messages directly instead of at destruction time. This only works on the
 // host right now: for the device, a stream buf collating output into lines and calling LogLine or
 // lower-level logging is necessary.
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 static constexpr bool kPrintInternalFatalDirectly = false;
 #else
 static constexpr bool kPrintInternalFatalDirectly = !kIsTargetBuild;
@@ -185,14 +185,18 @@
 LogMessage::LogMessage(const char* file, unsigned int line, LogSeverity severity, int error)
   : data_(new LogMessageData(file, line, severity, error)) {
   if (PrintDirectly(severity)) {
-    static const char* log_characters = "VDIWEFF";
-    CHECK_EQ(strlen(log_characters), INTERNAL_FATAL + 1U);
-    stream() << ProgramInvocationShortName() << " " << log_characters[static_cast<size_t>(severity)]
+    static constexpr char kLogCharacters[] = { 'N', 'V', 'D', 'I', 'W', 'E', 'F', 'F' };
+    static_assert(arraysize(kLogCharacters) == static_cast<size_t>(INTERNAL_FATAL) + 1,
+                  "Wrong character array size");
+    stream() << ProgramInvocationShortName() << " " << kLogCharacters[static_cast<size_t>(severity)]
              << " " << getpid() << " " << ::art::GetTid() << " " << file << ":" <<  line << "]";
   }
 }
 LogMessage::~LogMessage() {
-  if (!PrintDirectly(data_->GetSeverity())) {
+  if (PrintDirectly(data_->GetSeverity())) {
+    // Add newline at the end to match the not printing directly behavior.
+    std::cerr << '\n';
+  } else if (data_->GetSeverity() != LogSeverity::NONE) {
     if (data_->GetSeverity() < gMinimumLogSeverity) {
       return;  // No need to format something we're not going to output.
     }
@@ -234,8 +238,9 @@
   return data_->GetBuffer();
 }
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 static const android_LogPriority kLogSeverityToAndroidLogPriority[] = {
+  ANDROID_LOG_VERBOSE,  // NONE, use verbose as stand-in, will never be printed.
   ANDROID_LOG_VERBOSE, ANDROID_LOG_DEBUG, ANDROID_LOG_INFO, ANDROID_LOG_WARN,
   ANDROID_LOG_ERROR, ANDROID_LOG_FATAL, ANDROID_LOG_FATAL
 };
@@ -245,16 +250,20 @@
 
 void LogMessage::LogLine(const char* file, unsigned int line, LogSeverity log_severity,
                          const char* message) {
-#ifdef __ANDROID__
+  if (log_severity == LogSeverity::NONE) {
+    return;
+  }
+
+#ifdef ART_TARGET_ANDROID
   const char* tag = ProgramInvocationShortName();
-  int priority = kLogSeverityToAndroidLogPriority[log_severity];
+  int priority = kLogSeverityToAndroidLogPriority[static_cast<size_t>(log_severity)];
   if (priority == ANDROID_LOG_FATAL) {
     LOG_PRI(priority, tag, "%s:%u] %s", file, line, message);
   } else {
     LOG_PRI(priority, tag, "%s", message);
   }
 #else
-  static const char* log_characters = "VDIWEFF";
+  static const char* log_characters = "NVDIWEFF";
   CHECK_EQ(strlen(log_characters), INTERNAL_FATAL + 1U);
   char severity = log_characters[log_severity];
   fprintf(stderr, "%s %c %5d %5d %s:%u] %s\n",
@@ -264,10 +273,14 @@
 
 void LogMessage::LogLineLowStack(const char* file, unsigned int line, LogSeverity log_severity,
                                  const char* message) {
-#ifdef __ANDROID__
+  if (log_severity == LogSeverity::NONE) {
+    return;
+  }
+
+#ifdef ART_TARGET_ANDROID
   // Use android_writeLog() to avoid stack-based buffers used by android_printLog().
   const char* tag = ProgramInvocationShortName();
-  int priority = kLogSeverityToAndroidLogPriority[log_severity];
+  int priority = kLogSeverityToAndroidLogPriority[static_cast<size_t>(log_severity)];
   char* buf = nullptr;
   size_t buf_size = 0u;
   if (priority == ANDROID_LOG_FATAL) {
@@ -285,13 +298,14 @@
     android_writeLog(priority, tag, message);
   }
 #else
-  static const char* log_characters = "VDIWEFF";
-  CHECK_EQ(strlen(log_characters), INTERNAL_FATAL + 1U);
+  static constexpr char kLogCharacters[] = { 'N', 'V', 'D', 'I', 'W', 'E', 'F', 'F' };
+  static_assert(arraysize(kLogCharacters) == static_cast<size_t>(INTERNAL_FATAL) + 1,
+                "Wrong character array size");
 
   const char* program_name = ProgramInvocationShortName();
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, program_name, strlen(program_name)));
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, " ", 1));
-  TEMP_FAILURE_RETRY(write(STDERR_FILENO, &log_characters[log_severity], 1));
+  TEMP_FAILURE_RETRY(write(STDERR_FILENO, &kLogCharacters[static_cast<size_t>(log_severity)], 1));
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, " ", 1));
   // TODO: pid and tid.
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, file, strlen(file)));
@@ -300,7 +314,7 @@
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, "] ", 2));
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, message, strlen(message)));
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, "\n", 1));
-#endif
+#endif  // ART_TARGET_ANDROID
 }
 
 ScopedLogSeverity::ScopedLogSeverity(LogSeverity level) {
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 2cd1a4d..ac21a3f 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -24,6 +24,7 @@
 namespace art {
 
 enum LogSeverity {
+  NONE,            // Fake level, don't log at all.
   VERBOSE,
   DEBUG,
   INFO,
@@ -37,6 +38,7 @@
 // and the "-verbose:" command line argument.
 struct LogVerbosity {
   bool class_linker;  // Enabled with "-verbose:class".
+  bool collector;
   bool compiler;
   bool deopt;
   bool gc;
@@ -48,10 +50,14 @@
   bool oat;
   bool profiler;
   bool signals;
+  bool simulator;
   bool startup;
   bool third_party_jni;  // Enabled with "-verbose:third-party-jni".
   bool threads;
   bool verifier;
+  bool image;
+  bool systrace_lock_logging;  // Enabled with "-verbose:sys-locks".
+  bool agents;
 };
 
 // Global log verbosity setting, initialized by InitLogging.
@@ -135,11 +141,11 @@
 
 // Helper for CHECK_STRxx(s1,s2) macros.
 #define CHECK_STROP(s1, s2, sense) \
-  if (UNLIKELY((strcmp(s1, s2) == 0) != sense)) \
+  if (UNLIKELY((strcmp(s1, s2) == 0) != (sense))) \
     LOG(::art::FATAL) << "Check failed: " \
-        << "\"" << s1 << "\"" \
-        << (sense ? " == " : " != ") \
-        << "\"" << s2 << "\""
+        << "\"" << (s1) << "\"" \
+        << ((sense) ? " == " : " != ") \
+        << "\"" << (s2) << "\""
 
 // Check for string (const char*) equality between s1 and s2, LOG(FATAL) if not.
 #define CHECK_STREQ(s1, s2) CHECK_STROP(s1, s2, true)
@@ -151,7 +157,7 @@
     int rc = call args; \
     if (rc != 0) { \
       errno = rc; \
-      PLOG(::art::FATAL) << # call << " failed for " << what; \
+      PLOG(::art::FATAL) << # call << " failed for " << (what); \
     } \
   } while (false)
 
@@ -193,14 +199,14 @@
 // types of LHS and RHS.
 template <typename LHS, typename RHS>
 struct EagerEvaluator {
-  EagerEvaluator(LHS l, RHS r) : lhs(l), rhs(r) { }
+  constexpr EagerEvaluator(LHS l, RHS r) : lhs(l), rhs(r) { }
   LHS lhs;
   RHS rhs;
 };
 
 // Helper function for CHECK_xx.
 template <typename LHS, typename RHS>
-static inline EagerEvaluator<LHS, RHS> MakeEagerEvaluator(LHS lhs, RHS rhs) {
+static inline constexpr EagerEvaluator<LHS, RHS> MakeEagerEvaluator(LHS lhs, RHS rhs) {
   return EagerEvaluator<LHS, RHS>(lhs, rhs);
 }
 
diff --git a/runtime/base/macros.h b/runtime/base/macros.h
index dc692d2..5a50247 100644
--- a/runtime/base/macros.h
+++ b/runtime/base/macros.h
@@ -75,7 +75,7 @@
     ALWAYS_INLINE void* operator new(size_t, void* ptr) noexcept { return ptr; } \
     ALWAYS_INLINE void operator delete(void*, void*) noexcept { } \
   private: \
-    void* operator new(size_t) = delete
+    void* operator new(size_t) = delete  // NOLINT
 
 // The arraysize(arr) macro returns the # of elements in an array arr.
 // The expression is a compile-time constant, and therefore can be
@@ -135,13 +135,13 @@
 #define ARRAYSIZE_UNSAFE(a) \
   ((sizeof(a) / sizeof(*(a))) / static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
 
-#define SIZEOF_MEMBER(t, f) sizeof((reinterpret_cast<t*>(4096))->f)
+#define SIZEOF_MEMBER(t, f) sizeof((reinterpret_cast<t*>(4096))->f)  // NOLINT
 
 #define OFFSETOF_MEMBER(t, f) \
-  (reinterpret_cast<const char*>(&reinterpret_cast<t*>(16)->f) - reinterpret_cast<const char*>(16)) // NOLINT
+  (reinterpret_cast<uintptr_t>(&reinterpret_cast<t*>(16)->f) - static_cast<uintptr_t>(16u))  // NOLINT
 
-#define OFFSETOF_VOLATILE_MEMBER(t, f) \
-  (reinterpret_cast<volatile char*>(&reinterpret_cast<t*>(16)->f) - reinterpret_cast<volatile char*>(16)) // NOLINT
+#define OFFSETOF_MEMBERPTR(t, f) \
+  (reinterpret_cast<uintptr_t>(&(reinterpret_cast<t*>(16)->*f)) - static_cast<uintptr_t>(16))  // NOLINT
 
 #define PACKED(x) __attribute__ ((__aligned__(x), __packed__))
 
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index bd8de87..1c32024 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -73,6 +73,11 @@
           level == kThreadListLock ||
           // Ignore logging which may or may not have set up thread data structures.
           level == kLoggingLock ||
+          // When transitioning from suspended to runnable, a daemon thread might be in
+          // a situation where the runtime is shutting down. To not crash our debug locking
+          // mechanism we just pass null Thread* to the MutexLock during that transition
+          // (see Thread::TransitionFromSuspendedToRunnable).
+          level == kThreadSuspendCountLock ||
           // Avoid recursive death.
           level == kAbortLock) << level;
   }
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 70bd398..264a530 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -19,12 +19,10 @@
 #include <errno.h>
 #include <sys/time.h>
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
-#include "cutils/trace.h"
-
 #include "atomic.h"
 #include "base/logging.h"
 #include "base/time_utils.h"
+#include "base/systrace.h"
 #include "base/value_object.h"
 #include "mutex-inl.h"
 #include "runtime.h"
@@ -51,7 +49,7 @@
 MutatorMutex* Locks::mutator_lock_ = nullptr;
 Mutex* Locks::profiler_lock_ = nullptr;
 ReaderWriterMutex* Locks::oat_file_manager_lock_ = nullptr;
-ReaderWriterMutex* Locks::oat_file_count_lock_ = nullptr;
+Mutex* Locks::host_dlopen_handles_lock_ = nullptr;
 Mutex* Locks::reference_processor_lock_ = nullptr;
 Mutex* Locks::reference_queue_cleared_references_lock_ = nullptr;
 Mutex* Locks::reference_queue_finalizer_references_lock_ = nullptr;
@@ -64,7 +62,6 @@
 Mutex* Locks::thread_suspend_count_lock_ = nullptr;
 Mutex* Locks::trace_lock_ = nullptr;
 Mutex* Locks::unexpected_signal_lock_ = nullptr;
-Mutex* Locks::lambda_table_lock_ = nullptr;
 Uninterruptible Roles::uninterruptible_;
 
 struct AllMutexData {
@@ -855,6 +852,18 @@
       PLOG(FATAL) << "futex wait failed for " << name_;
     }
   }
+  if (self != nullptr) {
+    JNIEnvExt* const env = self->GetJniEnv();
+    if (UNLIKELY(env != nullptr && env->runtime_deleted)) {
+      CHECK(self->IsDaemon());
+      // If the runtime has been deleted, then we cannot proceed. Just sleep forever. This may
+      // occur for user daemon threads that get a spurious wakeup. This occurs for test 132 with
+      // --host and --gdb.
+      // After we wake up, the runtime may have been shutdown, which means that this condition may
+      // have been deleted. It is not safe to retry the wait.
+      SleepForever();
+    }
+  }
   guard_.ExclusiveLock(self);
   CHECK_GE(num_waiters_, 0);
   num_waiters_--;
@@ -943,7 +952,7 @@
     DCHECK(deoptimization_lock_ != nullptr);
     DCHECK(heap_bitmap_lock_ != nullptr);
     DCHECK(oat_file_manager_lock_ != nullptr);
-    DCHECK(oat_file_count_lock_ != nullptr);
+    DCHECK(host_dlopen_handles_lock_ != nullptr);
     DCHECK(intern_table_lock_ != nullptr);
     DCHECK(jni_libraries_lock_ != nullptr);
     DCHECK(logging_lock_ != nullptr);
@@ -953,7 +962,6 @@
     DCHECK(thread_suspend_count_lock_ != nullptr);
     DCHECK(trace_lock_ != nullptr);
     DCHECK(unexpected_signal_lock_ != nullptr);
-    DCHECK(lambda_table_lock_ != nullptr);
   } else {
     // Create global locks in level order from highest lock level to lowest.
     LockLevel current_lock_level = kInstrumentEntrypointsLock;
@@ -961,7 +969,7 @@
     instrument_entrypoints_lock_ = new Mutex("instrument entrypoint lock", current_lock_level);
 
     #define UPDATE_CURRENT_LOCK_LEVEL(new_level) \
-      if (new_level >= current_lock_level) { \
+      if ((new_level) >= current_lock_level) { \
         /* Do not use CHECKs or FATAL here, abort_lock_ is not setup yet. */ \
         fprintf(stderr, "New local level %d is not less than current level %d\n", \
                 new_level, current_lock_level); \
@@ -997,10 +1005,6 @@
     DCHECK(alloc_tracker_lock_ == nullptr);
     alloc_tracker_lock_ = new Mutex("AllocTracker lock", current_lock_level);
 
-    UPDATE_CURRENT_LOCK_LEVEL(kInterpreterStringInitMapLock);
-    DCHECK(interpreter_string_init_map_lock_ == nullptr);
-    interpreter_string_init_map_lock_ = new Mutex("Interpreter String initializer reference map lock", current_lock_level);
-
     UPDATE_CURRENT_LOCK_LEVEL(kThreadListLock);
     DCHECK(thread_list_lock_ == nullptr);
     thread_list_lock_ = new Mutex("thread list lock", current_lock_level);
@@ -1036,9 +1040,9 @@
     DCHECK(oat_file_manager_lock_ == nullptr);
     oat_file_manager_lock_ = new ReaderWriterMutex("OatFile manager lock", current_lock_level);
 
-    UPDATE_CURRENT_LOCK_LEVEL(kOatFileCountLock);
-    DCHECK(oat_file_count_lock_ == nullptr);
-    oat_file_count_lock_ = new ReaderWriterMutex("OatFile count lock", current_lock_level);
+    UPDATE_CURRENT_LOCK_LEVEL(kHostDlOpenHandlesLock);
+    DCHECK(host_dlopen_handles_lock_ == nullptr);
+    host_dlopen_handles_lock_ = new Mutex("host dlopen handles lock", current_lock_level);
 
     UPDATE_CURRENT_LOCK_LEVEL(kInternTableLock);
     DCHECK(intern_table_lock_ == nullptr);
@@ -1068,10 +1072,6 @@
     DCHECK(reference_queue_soft_references_lock_ == nullptr);
     reference_queue_soft_references_lock_ = new Mutex("ReferenceQueue soft references lock", current_lock_level);
 
-    UPDATE_CURRENT_LOCK_LEVEL(kLambdaTableLock);
-    DCHECK(lambda_table_lock_ == nullptr);
-    lambda_table_lock_ = new Mutex("lambda table lock", current_lock_level);
-
     UPDATE_CURRENT_LOCK_LEVEL(kAbortLock);
     DCHECK(abort_lock_ == nullptr);
     abort_lock_ = new Mutex("abort lock", current_lock_level, true);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index d4c9057..d0dc886 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -60,11 +60,13 @@
   kUnexpectedSignalLock,
   kThreadSuspendCountLock,
   kAbortLock,
-  kLambdaTableLock,
   kJdwpSocketLock,
   kRegionSpaceRegionLock,
-  kTransactionLogLock,
+  kRosAllocGlobalLock,
+  kRosAllocBracketLock,
+  kRosAllocBulkFreeLock,
   kMarkSweepMarkStackLock,
+  kTransactionLogLock,
   kJniWeakGlobalsLock,
   kReferenceQueueSoftReferencesLock,
   kReferenceQueuePhantomReferencesLock,
@@ -72,10 +74,7 @@
   kReferenceQueueWeakReferencesLock,
   kReferenceQueueClearedReferencesLock,
   kReferenceProcessorLock,
-  kJitCodeCacheLock,
-  kRosAllocGlobalLock,
-  kRosAllocBracketLock,
-  kRosAllocBulkFreeLock,
+  kJitDebugInterfaceLock,
   kAllocSpaceLock,
   kBumpPointerSpaceBlockLock,
   kArenaPoolLock,
@@ -83,10 +82,12 @@
   kDexFileToMethodInlinerMapLock,
   kInternTableLock,
   kOatFileSecondaryLookupLock,
-  kOatFileCountLock,
+  kHostDlOpenHandlesLock,
   kOatFileManagerLock,
   kTracingUniqueMethodsLock,
   kTracingStreamingLock,
+  kDeoptimizedMethodsLock,
+  kClassLoaderClassesLock,
   kDefaultMutexLevel,
   kMarkSweepLargeObjectLock,
   kPinTableLock,
@@ -95,13 +96,13 @@
   kAllocatedThreadIdsLock,
   kMonitorPoolLock,
   kMethodVerifiersLock,
-  kClassLinkerClassesLock,
+  kClassLinkerClassesLock,  // TODO rename.
+  kJitCodeCacheLock,
   kBreakpointLock,
   kMonitorLock,
   kMonitorListLock,
   kJniLoadLibraryLock,
   kThreadListLock,
-  kInterpreterStringInitMapLock,
   kAllocTrackerLock,
   kDeoptimizationLock,
   kProfilerLock,
@@ -649,11 +650,11 @@
   // Guards opened oat files in OatFileManager.
   static ReaderWriterMutex* oat_file_manager_lock_ ACQUIRED_AFTER(modify_ldt_lock_);
 
-  // Guards opened oat files in OatFileManager.
-  static ReaderWriterMutex* oat_file_count_lock_ ACQUIRED_AFTER(oat_file_manager_lock_);
+  // Guards dlopen_handles_ in DlOpenOatFile.
+  static Mutex* host_dlopen_handles_lock_ ACQUIRED_AFTER(oat_file_manager_lock_);
 
   // Guards intern table.
-  static Mutex* intern_table_lock_ ACQUIRED_AFTER(oat_file_count_lock_);
+  static Mutex* intern_table_lock_ ACQUIRED_AFTER(host_dlopen_handles_lock_);
 
   // Guards reference processor.
   static Mutex* reference_processor_lock_ ACQUIRED_AFTER(intern_table_lock_);
@@ -688,10 +689,6 @@
 
   // Have an exclusive logging thread.
   static Mutex* logging_lock_ ACQUIRED_AFTER(unexpected_signal_lock_);
-
-  // Allow reader-writer mutual exclusion on the boxed table of lambda objects.
-  // TODO: this should be a RW mutex lock, except that ConditionVariables don't work with it.
-  static Mutex* lambda_table_lock_ ACQUIRED_AFTER(mutator_lock_);
 };
 
 class Roles {
diff --git a/runtime/base/scoped_arena_allocator.cc b/runtime/base/scoped_arena_allocator.cc
index 31f96e4..7d04fa0 100644
--- a/runtime/base/scoped_arena_allocator.cc
+++ b/runtime/base/scoped_arena_allocator.cc
@@ -91,16 +91,19 @@
 }
 
 void* ArenaStack::AllocWithMemoryTool(size_t bytes, ArenaAllocKind kind) {
+  // We mark all memory for a newly retrieved arena as inaccessible and then
+  // mark only the actually allocated memory as defined. That leaves red zones
+  // and padding between allocations marked as inaccessible.
   size_t rounded_bytes = RoundUp(bytes + kMemoryToolRedZoneBytes, 8);
   uint8_t* ptr = top_ptr_;
   if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
     ptr = AllocateFromNextArena(rounded_bytes);
     CHECK(ptr != nullptr) << "Failed to allocate memory";
+    MEMORY_TOOL_MAKE_NOACCESS(ptr, top_end_ - ptr);
   }
   CurrentStats()->RecordAlloc(bytes, kind);
   top_ptr_ = ptr + rounded_bytes;
   MEMORY_TOOL_MAKE_UNDEFINED(ptr, bytes);
-  MEMORY_TOOL_MAKE_NOACCESS(ptr + bytes, rounded_bytes - bytes);
   return ptr;
 }
 
diff --git a/runtime/base/scoped_arena_allocator.h b/runtime/base/scoped_arena_allocator.h
index a30c73d..55044b3 100644
--- a/runtime/base/scoped_arena_allocator.h
+++ b/runtime/base/scoped_arena_allocator.h
@@ -42,6 +42,7 @@
 static constexpr size_t kArenaAlignment = 8;
 
 // Holds a list of Arenas for use by ScopedArenaAllocator stack.
+// The memory is returned to the ArenaPool when the ArenaStack is destroyed.
 class ArenaStack : private DebugStackRefCounter, private ArenaAllocatorMemoryTool {
  public:
   explicit ArenaStack(ArenaPool* arena_pool);
@@ -121,6 +122,12 @@
   DISALLOW_COPY_AND_ASSIGN(ArenaStack);
 };
 
+// Fast single-threaded allocator. Allocated chunks are _not_ guaranteed to be zero-initialized.
+//
+// Unlike the ArenaAllocator, ScopedArenaAllocator is intended for relatively short-lived
+// objects and allows nesting multiple allocators. Only the top allocator can be used but
+// once it's destroyed, its memory can be reused by the next ScopedArenaAllocator on the
+// stack. This is facilitated by returning the memory to the ArenaStack.
 class ScopedArenaAllocator
     : private DebugStackReference, private DebugStackRefCounter, private ArenaAllocatorStats {
  public:
@@ -145,6 +152,11 @@
   }
 
   template <typename T>
+  T* Alloc(ArenaAllocKind kind = kArenaAllocMisc) {
+    return AllocArray<T>(1, kind);
+  }
+
+  template <typename T>
   T* AllocArray(size_t length, ArenaAllocKind kind = kArenaAllocMisc) {
     return static_cast<T*>(Alloc(length * sizeof(T), kind));
   }
diff --git a/runtime/base/scoped_arena_containers.h b/runtime/base/scoped_arena_containers.h
index 1236585..bd19d00 100644
--- a/runtime/base/scoped_arena_containers.h
+++ b/runtime/base/scoped_arena_containers.h
@@ -201,20 +201,29 @@
 template <typename T>
 class ArenaDelete {
   static constexpr uint8_t kMagicFill = 0xCE;
- public:
-  void operator()(T* ptr) const {
-    ptr->~T();
+
+ protected:
+  // Used for variable sized objects such as RegisterLine.
+  ALWAYS_INLINE void ProtectMemory(T* ptr, size_t size) const {
     if (RUNNING_ON_MEMORY_TOOL > 0) {
-      // Writing to the memory will fail if it we already destroyed the pointer with
+      // Writing to the memory will fail ift we already destroyed the pointer with
       // DestroyOnlyDelete since we make it no access.
-      memset(ptr, kMagicFill, sizeof(T));
-      MEMORY_TOOL_MAKE_NOACCESS(ptr, sizeof(T));
+      memset(ptr, kMagicFill, size);
+      MEMORY_TOOL_MAKE_NOACCESS(ptr, size);
     } else if (kIsDebugBuild) {
       CHECK(ArenaStack::ArenaTagForAllocation(reinterpret_cast<void*>(ptr)) == ArenaFreeTag::kUsed)
           << "Freeing invalid object " << ptr;
       ArenaStack::ArenaTagForAllocation(reinterpret_cast<void*>(ptr)) = ArenaFreeTag::kFree;
       // Write a magic value to try and catch use after free error.
-      memset(ptr, kMagicFill, sizeof(T));
+      memset(ptr, kMagicFill, size);
+    }
+  }
+
+ public:
+  void operator()(T* ptr) const {
+    if (ptr != nullptr) {
+      ptr->~T();
+      ProtectMemory(ptr, sizeof(T));
     }
   }
 };
diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc
index 71e0590..0e8031f 100644
--- a/runtime/base/scoped_flock.cc
+++ b/runtime/base/scoped_flock.cc
@@ -26,16 +26,25 @@
 namespace art {
 
 bool ScopedFlock::Init(const char* filename, std::string* error_msg) {
+  return Init(filename, O_CREAT | O_RDWR, true, error_msg);
+}
+
+bool ScopedFlock::Init(const char* filename, int flags, bool block, std::string* error_msg) {
   while (true) {
     if (file_.get() != nullptr) {
       UNUSED(file_->FlushCloseOrErase());  // Ignore result.
     }
-    file_.reset(OS::OpenFileWithFlags(filename, O_CREAT | O_RDWR));
+    file_.reset(OS::OpenFileWithFlags(filename, flags));
     if (file_.get() == nullptr) {
       *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno));
       return false;
     }
-    int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_EX));
+    int operation = block ? LOCK_EX : (LOCK_EX | LOCK_NB);
+    int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), operation));
+    if (flock_result == EWOULDBLOCK) {
+      // File is locked by someone else and we are required not to block;
+      return false;
+    }
     if (flock_result != 0) {
       *error_msg = StringPrintf("Failed to lock file '%s': %s", filename, strerror(errno));
       return false;
@@ -51,18 +60,30 @@
     if (stat_result != 0) {
       PLOG(WARNING) << "Failed to stat, will retry: " << filename;
       // ENOENT can happen if someone racing with us unlinks the file we created so just retry.
-      continue;
+      if (block) {
+        continue;
+      } else {
+        // Note that in theory we could race with someone here for a long time and end up retrying
+        // over and over again. This potential behavior does not fit well in the non-blocking
+        // semantics. Thus, if we are not require to block return failure when racing.
+        return false;
+      }
     }
     if (fstat_stat.st_dev != stat_stat.st_dev || fstat_stat.st_ino != stat_stat.st_ino) {
       LOG(WARNING) << "File changed while locking, will retry: " << filename;
-      continue;
+      if (block) {
+        continue;
+      } else {
+        // See comment above.
+        return false;
+      }
     }
     return true;
   }
 }
 
 bool ScopedFlock::Init(File* file, std::string* error_msg) {
-  file_.reset(new File(dup(file->Fd()), true));
+  file_.reset(new File(dup(file->Fd()), file->GetPath(), file->CheckUsage(), file->ReadOnlyMode()));
   if (file_->Fd() == -1) {
     file_.reset();
     *error_msg = StringPrintf("Failed to duplicate open file '%s': %s",
@@ -78,7 +99,7 @@
   return true;
 }
 
-File* ScopedFlock::GetFile() {
+File* ScopedFlock::GetFile() const {
   CHECK(file_.get() != nullptr);
   return file_.get();
 }
@@ -93,7 +114,13 @@
   if (file_.get() != nullptr) {
     int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_UN));
     CHECK_EQ(0, flock_result);
-    if (file_->FlushCloseOrErase() != 0) {
+    int close_result = -1;
+    if (file_->ReadOnlyMode()) {
+      close_result = file_->Close();
+    } else {
+      close_result = file_->FlushCloseOrErase();
+    }
+    if (close_result != 0) {
       PLOG(WARNING) << "Could not close scoped file lock file.";
     }
   }
diff --git a/runtime/base/scoped_flock.h b/runtime/base/scoped_flock.h
index 08612e3..cc22056 100644
--- a/runtime/base/scoped_flock.h
+++ b/runtime/base/scoped_flock.h
@@ -32,10 +32,15 @@
   // Attempts to acquire an exclusive file lock (see flock(2)) on the file
   // at filename, and blocks until it can do so.
   //
-  // Returns true if the lock could be acquired, or false if an error
-  // occurred. It is an error if the file does not exist, or if its inode
-  // changed (usually due to a new file being created at the same path)
-  // between attempts to lock it.
+  // Returns true if the lock could be acquired, or false if an error occurred.
+  // It is an error if its inode changed (usually due to a new file being
+  // created at the same path) between attempts to lock it. In blocking mode,
+  // locking will be retried if the file changed. In non-blocking mode, false
+  // is returned and no attempt is made to re-acquire the lock.
+  //
+  // The file is opened with the provided flags.
+  bool Init(const char* filename, int flags, bool block, std::string* error_msg);
+  // Calls Init(filename, O_CREAT | O_RDWR, true, errror_msg)
   bool Init(const char* filename, std::string* error_msg);
   // Attempt to acquire an exclusive file lock (see flock(2)) on 'file'.
   // Returns true if the lock could be acquired or false if an error
@@ -43,7 +48,7 @@
   bool Init(File* file, std::string* error_msg);
 
   // Returns the (locked) file associated with this instance.
-  File* GetFile();
+  File* GetFile() const;
 
   // Returns whether a file is held.
   bool HasFile();
diff --git a/runtime/base/stl_util.h b/runtime/base/stl_util.h
index 0949619..a53dcea 100644
--- a/runtime/base/stl_util.h
+++ b/runtime/base/stl_util.h
@@ -149,6 +149,44 @@
   return it != container.end();
 }
 
+// const char* compare function suitable for std::map or std::set.
+struct CStringLess {
+  bool operator()(const char* lhs, const char* rhs) const {
+    return strcmp(lhs, rhs) < 0;
+  }
+};
+
+// 32-bit FNV-1a hash function suitable for std::unordered_map.
+// It can be used with any container which works with range-based for loop.
+// See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+template <typename Vector>
+struct FNVHash {
+  size_t operator()(const Vector& vector) const {
+    uint32_t hash = 2166136261u;
+    for (const auto& value : vector) {
+      hash = (hash ^ value) * 16777619u;
+    }
+    return hash;
+  }
+};
+
+// Use to suppress type deduction for a function argument.
+// See std::identity<> for more background:
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1856.html#20.2.2 - move/forward helpers
+//
+// e.g. "template <typename X> void bar(identity<X>::type foo);
+//     bar(5); // compilation error
+//     bar<int>(5); // ok
+// or "template <typename T> void foo(T* x, typename Identity<T*>::type y);
+//     Base b;
+//     Derived d;
+//     foo(&b, &d);  // Use implicit Derived* -> Base* conversion.
+// If T was deduced from both &b and &d, there would be a mismatch, i.e. deduction failure.
+template <typename T>
+struct Identity {
+  using type = T;
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_STL_UTIL_H_
diff --git a/runtime/base/systrace.h b/runtime/base/systrace.h
new file mode 100644
index 0000000..3901f96
--- /dev/null
+++ b/runtime/base/systrace.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_SYSTRACE_H_
+#define ART_RUNTIME_BASE_SYSTRACE_H_
+
+#define ATRACE_TAG ATRACE_TAG_DALVIK
+#include <cutils/trace.h>
+#include <string>
+#include <utils/Trace.h>
+
+namespace art {
+
+class ScopedTrace {
+ public:
+  explicit ScopedTrace(const char* name) {
+    ATRACE_BEGIN(name);
+  }
+
+  explicit ScopedTrace(const std::string& name) : ScopedTrace(name.c_str()) {}
+
+  ~ScopedTrace() {
+    ATRACE_END();
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_SYSTRACE_H_
diff --git a/runtime/base/time_utils.cc b/runtime/base/time_utils.cc
index 48b0a09..3e5bac8 100644
--- a/runtime/base/time_utils.cc
+++ b/runtime/base/time_utils.cc
@@ -15,6 +15,7 @@
  */
 
 #include <inttypes.h>
+#include <limits>
 #include <sstream>
 
 #include "time_utils.h"
@@ -174,8 +175,6 @@
 }
 
 void InitTimeSpec(bool absolute, int clock, int64_t ms, int32_t ns, timespec* ts) {
-  int64_t endSec;
-
   if (absolute) {
 #if !defined(__APPLE__)
     clock_gettime(clock, ts);
@@ -190,13 +189,20 @@
     ts->tv_sec = 0;
     ts->tv_nsec = 0;
   }
-  endSec = ts->tv_sec + ms / 1000;
-  if (UNLIKELY(endSec >= 0x7fffffff)) {
-    std::ostringstream ss;
-    LOG(INFO) << "Note: end time exceeds epoch: " << ss.str();
-    endSec = 0x7ffffffe;
+
+  int64_t end_sec = ts->tv_sec + ms / 1000;
+  constexpr int32_t int32_max = std::numeric_limits<int32_t>::max();
+  if (UNLIKELY(end_sec >= int32_max)) {
+    // Either ms was intended to denote an infinite timeout, or we have a
+    // problem. The former generally uses the largest possible millisecond
+    // or nanosecond value.  Log only in the latter case.
+    constexpr int64_t int64_max = std::numeric_limits<int64_t>::max();
+    if (ms != int64_max && ms != int64_max / (1000 * 1000)) {
+      LOG(INFO) << "Note: end time exceeds INT32_MAX: " << end_sec;
+    }
+    end_sec = int32_max - 1;  // Allow for increment below.
   }
-  ts->tv_sec = endSec;
+  ts->tv_sec = end_sec;
   ts->tv_nsec = (ts->tv_nsec + (ms % 1000) * 1000000) + ns;
 
   // Catch rollover.
diff --git a/runtime/base/timing_logger.cc b/runtime/base/timing_logger.cc
index f1f6f9b..9a0e0d0 100644
--- a/runtime/base/timing_logger.cc
+++ b/runtime/base/timing_logger.cc
@@ -15,15 +15,14 @@
  */
 
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
 #include <stdio.h>
-#include <cutils/trace.h>
 
 #include "timing_logger.h"
 
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/histogram-inl.h"
+#include "base/systrace.h"
 #include "base/time_utils.h"
 #include "thread-inl.h"
 
@@ -125,7 +124,7 @@
     histogram->CreateHistogram(&cumulative_data);
     histogram->PrintConfidenceIntervals(os, 0.99, cumulative_data);
   }
-  os << "Done Dumping histograms \n";
+  os << "Done Dumping histograms\n";
 }
 
 TimingLogger::TimingLogger(const char* name, bool precise, bool verbose)
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 07cadc4..48e3ceb 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -17,29 +17,51 @@
 #include "base/unix_file/fd_file.h"
 
 #include <errno.h>
+#include <limits>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
 
 #include "base/logging.h"
 
+// Includes needed for FdFile::Copy().
+#ifdef __linux__
+#include <sys/sendfile.h>
+#else
+#include <algorithm>
+#include "base/stl_util.h"
+#include "globals.h"
+#endif
+
 namespace unix_file {
 
-FdFile::FdFile() : guard_state_(GuardState::kClosed), fd_(-1), auto_close_(true) {
+FdFile::FdFile()
+    : guard_state_(GuardState::kClosed), fd_(-1), auto_close_(true), read_only_mode_(false) {
 }
 
 FdFile::FdFile(int fd, bool check_usage)
     : guard_state_(check_usage ? GuardState::kBase : GuardState::kNoCheck),
-      fd_(fd), auto_close_(true) {
+      fd_(fd), auto_close_(true), read_only_mode_(false) {
 }
 
 FdFile::FdFile(int fd, const std::string& path, bool check_usage)
-    : guard_state_(check_usage ? GuardState::kBase : GuardState::kNoCheck),
-      fd_(fd), file_path_(path), auto_close_(true) {
-  CHECK_NE(0U, path.size());
+    : FdFile(fd, path, check_usage, false) {
 }
 
-FdFile::~FdFile() {
+FdFile::FdFile(int fd, const std::string& path, bool check_usage, bool read_only_mode)
+    : guard_state_(check_usage ? GuardState::kBase : GuardState::kNoCheck),
+      fd_(fd), file_path_(path), auto_close_(true), read_only_mode_(read_only_mode) {
+}
+
+FdFile::FdFile(const std::string& path, int flags, mode_t mode, bool check_usage)
+    : fd_(-1), auto_close_(true) {
+  Open(path, flags, mode);
+  if (!check_usage || !IsOpened()) {
+    guard_state_ = GuardState::kNoCheck;
+  }
+}
+
+void FdFile::Destroy() {
   if (kCheckSafeUsage && (guard_state_ < GuardState::kNoCheck)) {
     if (guard_state_ < GuardState::kFlushed) {
       LOG(::art::ERROR) << "File " << file_path_ << " wasn't explicitly flushed before destruction.";
@@ -56,6 +78,28 @@
   }
 }
 
+FdFile& FdFile::operator=(FdFile&& other) {
+  if (this == &other) {
+    return *this;
+  }
+
+  if (this->fd_ != other.fd_) {
+    Destroy();  // Free old state.
+  }
+
+  guard_state_ = other.guard_state_;
+  fd_ = other.fd_;
+  file_path_ = std::move(other.file_path_);
+  auto_close_ = other.auto_close_;
+  other.Release();  // Release other.
+
+  return *this;
+}
+
+FdFile::~FdFile() {
+  Destroy();
+}
+
 void FdFile::moveTo(GuardState target, GuardState warn_threshold, const char* warning) {
   if (kCheckSafeUsage) {
     if (guard_state_ < GuardState::kNoCheck) {
@@ -88,13 +132,14 @@
 }
 
 bool FdFile::Open(const std::string& path, int flags, mode_t mode) {
+  static_assert(O_RDONLY == 0, "Readonly flag has unexpected value.");
   CHECK_EQ(fd_, -1) << path;
+  read_only_mode_ = ((flags & O_ACCMODE) == O_RDONLY);
   fd_ = TEMP_FAILURE_RETRY(open(path.c_str(), flags, mode));
   if (fd_ == -1) {
     return false;
   }
   file_path_ = path;
-  static_assert(O_RDONLY == 0, "Readonly flag has unexpected value.");
   if (kCheckSafeUsage && (flags & (O_RDWR | O_CREAT | O_WRONLY)) != 0) {
     // Start in the base state (not flushed, not closed).
     guard_state_ = GuardState::kBase;
@@ -126,6 +171,7 @@
 }
 
 int FdFile::Flush() {
+  DCHECK(!read_only_mode_);
 #ifdef __linux__
   int rc = TEMP_FAILURE_RETRY(fdatasync(fd_));
 #else
@@ -145,6 +191,7 @@
 }
 
 int FdFile::SetLength(int64_t new_length) {
+  DCHECK(!read_only_mode_);
 #ifdef __linux__
   int rc = TEMP_FAILURE_RETRY(ftruncate64(fd_, new_length));
 #else
@@ -161,6 +208,7 @@
 }
 
 int64_t FdFile::Write(const char* buf, int64_t byte_count, int64_t offset) {
+  DCHECK(!read_only_mode_);
 #ifdef __linux__
   int rc = TEMP_FAILURE_RETRY(pwrite64(fd_, buf, byte_count, offset));
 #else
@@ -174,6 +222,14 @@
   return fd_;
 }
 
+bool FdFile::ReadOnlyMode() const {
+  return read_only_mode_;
+}
+
+bool FdFile::CheckUsage() const {
+  return guard_state_ != GuardState::kNoCheck;
+}
+
 bool FdFile::IsOpened() const {
   return fd_ >= 0;
 }
@@ -208,27 +264,90 @@
   return ReadFullyGeneric<pread>(fd_, buffer, byte_count, offset);
 }
 
-bool FdFile::WriteFully(const void* buffer, size_t byte_count) {
-  const char* ptr = static_cast<const char*>(buffer);
+template <bool kUseOffset>
+bool FdFile::WriteFullyGeneric(const void* buffer, size_t byte_count, size_t offset) {
+  DCHECK(!read_only_mode_);
   moveTo(GuardState::kBase, GuardState::kClosed, "Writing into closed file.");
+  DCHECK(kUseOffset || offset == 0u);
+  const char* ptr = static_cast<const char*>(buffer);
   while (byte_count > 0) {
-    ssize_t bytes_written = TEMP_FAILURE_RETRY(write(fd_, ptr, byte_count));
+    ssize_t bytes_written = kUseOffset
+        ? TEMP_FAILURE_RETRY(pwrite(fd_, ptr, byte_count, offset))
+        : TEMP_FAILURE_RETRY(write(fd_, ptr, byte_count));
     if (bytes_written == -1) {
       return false;
     }
     byte_count -= bytes_written;  // Reduce the number of remaining bytes.
     ptr += bytes_written;  // Move the buffer forward.
+    offset += static_cast<size_t>(bytes_written);
   }
   return true;
 }
 
+bool FdFile::PwriteFully(const void* buffer, size_t byte_count, size_t offset) {
+  return WriteFullyGeneric<true>(buffer, byte_count, offset);
+}
+
+bool FdFile::WriteFully(const void* buffer, size_t byte_count) {
+  return WriteFullyGeneric<false>(buffer, byte_count, 0u);
+}
+
+bool FdFile::Copy(FdFile* input_file, int64_t offset, int64_t size) {
+  DCHECK(!read_only_mode_);
+  off_t off = static_cast<off_t>(offset);
+  off_t sz = static_cast<off_t>(size);
+  if (offset < 0 || static_cast<int64_t>(off) != offset ||
+      size < 0 || static_cast<int64_t>(sz) != size ||
+      sz > std::numeric_limits<off_t>::max() - off) {
+    errno = EINVAL;
+    return false;
+  }
+  if (size == 0) {
+    return true;
+  }
+#ifdef __linux__
+  // Use sendfile(), available for files since linux kernel 2.6.33.
+  off_t end = off + sz;
+  while (off != end) {
+    int result = TEMP_FAILURE_RETRY(
+        sendfile(Fd(), input_file->Fd(), &off, end - off));
+    if (result == -1) {
+      return false;
+    }
+    // Ignore the number of bytes in `result`, sendfile() already updated `off`.
+  }
+#else
+  if (lseek(input_file->Fd(), off, SEEK_SET) != off) {
+    return false;
+  }
+  constexpr size_t kMaxBufferSize = 4 * ::art::kPageSize;
+  const size_t buffer_size = std::min<uint64_t>(size, kMaxBufferSize);
+  art::UniqueCPtr<void> buffer(malloc(buffer_size));
+  if (buffer == nullptr) {
+    errno = ENOMEM;
+    return false;
+  }
+  while (size != 0) {
+    size_t chunk_size = std::min<uint64_t>(buffer_size, size);
+    if (!input_file->ReadFully(buffer.get(), chunk_size) ||
+        !WriteFully(buffer.get(), chunk_size)) {
+      return false;
+    }
+    size -= chunk_size;
+  }
+#endif
+  return true;
+}
+
 void FdFile::Erase() {
+  DCHECK(!read_only_mode_);
   TEMP_FAILURE_RETRY(SetLength(0));
   TEMP_FAILURE_RETRY(Flush());
   TEMP_FAILURE_RETRY(Close());
 }
 
 int FdFile::FlushCloseOrErase() {
+  DCHECK(!read_only_mode_);
   int flush_result = TEMP_FAILURE_RETRY(Flush());
   if (flush_result != 0) {
     LOG(::art::ERROR) << "CloseOrErase failed while flushing a file.";
@@ -245,6 +364,7 @@
 }
 
 int FdFile::FlushClose() {
+  DCHECK(!read_only_mode_);
   int flush_result = TEMP_FAILURE_RETRY(Flush());
   if (flush_result != 0) {
     LOG(::art::ERROR) << "FlushClose failed while flushing a file.";
@@ -260,4 +380,23 @@
   guard_state_ = GuardState::kNoCheck;
 }
 
+bool FdFile::ClearContent() {
+  DCHECK(!read_only_mode_);
+  if (SetLength(0) < 0) {
+    PLOG(art::ERROR) << "Failed to reset the length";
+    return false;
+  }
+  return ResetOffset();
+}
+
+bool FdFile::ResetOffset() {
+  DCHECK(!read_only_mode_);
+  off_t rc =  TEMP_FAILURE_RETRY(lseek(fd_, 0, SEEK_SET));
+  if (rc == static_cast<off_t>(-1)) {
+    PLOG(art::ERROR) << "Failed to reset the offset";
+    return false;
+  }
+  return true;
+}
+
 }  // namespace unix_file
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index f47368b..d896ee9 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -18,7 +18,9 @@
 #define ART_RUNTIME_BASE_UNIX_FILE_FD_FILE_H_
 
 #include <fcntl.h>
+
 #include <string>
+
 #include "base/unix_file/random_access_file.h"
 #include "base/macros.h"
 
@@ -37,6 +39,47 @@
   // file descriptor. (Use DisableAutoClose to retain ownership.)
   FdFile(int fd, bool checkUsage);
   FdFile(int fd, const std::string& path, bool checkUsage);
+  FdFile(int fd, const std::string& path, bool checkUsage, bool read_only_mode);
+
+  FdFile(const std::string& path, int flags, bool checkUsage)
+      : FdFile(path, flags, 0640, checkUsage) {}
+  FdFile(const std::string& path, int flags, mode_t mode, bool checkUsage);
+
+  // Move constructor.
+  FdFile(FdFile&& other)
+      : guard_state_(other.guard_state_),
+        fd_(other.fd_),
+        file_path_(std::move(other.file_path_)),
+        auto_close_(other.auto_close_),
+        read_only_mode_(other.read_only_mode_) {
+    other.Release();  // Release the src.
+  }
+
+  // Move assignment operator.
+  FdFile& operator=(FdFile&& other);
+
+  // Release the file descriptor. This will make further accesses to this FdFile invalid. Disables
+  // all further state checking.
+  int Release() {
+    int tmp_fd = fd_;
+    fd_ = -1;
+    guard_state_ = GuardState::kNoCheck;
+    auto_close_ = false;
+    return tmp_fd;
+  }
+
+  void Reset(int fd, bool check_usage) {
+    if (fd_ != -1 && fd_ != fd) {
+      Destroy();
+    }
+    fd_ = fd;
+    if (check_usage) {
+      guard_state_ = fd == -1 ? GuardState::kNoCheck : GuardState::kBase;
+    } else {
+      guard_state_ = GuardState::kNoCheck;
+    }
+    // Keep the auto_close_ state.
+  }
 
   // Destroys an FdFile, closing the file descriptor if Close hasn't already
   // been called. (If you care about the return value of Close, call it
@@ -45,17 +88,13 @@
   // guarantee that data actually made it to stable storage.)
   virtual ~FdFile();
 
-  // Opens file 'file_path' using 'flags' and 'mode'.
-  bool Open(const std::string& file_path, int flags);
-  bool Open(const std::string& file_path, int flags, mode_t mode);
-
   // RandomAccessFile API.
-  virtual int Close() WARN_UNUSED;
-  virtual int64_t Read(char* buf, int64_t byte_count, int64_t offset) const WARN_UNUSED;
-  virtual int SetLength(int64_t new_length) WARN_UNUSED;
-  virtual int64_t GetLength() const;
-  virtual int64_t Write(const char* buf, int64_t byte_count, int64_t offset) WARN_UNUSED;
-  virtual int Flush() WARN_UNUSED;
+  int Close() OVERRIDE WARN_UNUSED;
+  int64_t Read(char* buf, int64_t byte_count, int64_t offset) const OVERRIDE WARN_UNUSED;
+  int SetLength(int64_t new_length) OVERRIDE WARN_UNUSED;
+  int64_t GetLength() const OVERRIDE;
+  int64_t Write(const char* buf, int64_t byte_count, int64_t offset) OVERRIDE WARN_UNUSED;
+  int Flush() OVERRIDE WARN_UNUSED;
 
   // Short for SetLength(0); Flush(); Close();
   void Erase();
@@ -68,6 +107,8 @@
 
   // Bonus API.
   int Fd() const;
+  bool ReadOnlyMode() const;
+  bool CheckUsage() const;
   bool IsOpened() const;
   const std::string& GetPath() const {
     return file_path_;
@@ -76,6 +117,15 @@
   bool ReadFully(void* buffer, size_t byte_count) WARN_UNUSED;
   bool PreadFully(void* buffer, size_t byte_count, size_t offset) WARN_UNUSED;
   bool WriteFully(const void* buffer, size_t byte_count) WARN_UNUSED;
+  bool PwriteFully(const void* buffer, size_t byte_count, size_t offset) WARN_UNUSED;
+
+  // Copy data from another file.
+  bool Copy(FdFile* input_file, int64_t offset, int64_t size);
+  // Clears the file content and resets the file offset to 0.
+  // Returns true upon success, false otherwise.
+  bool ClearContent();
+  // Resets the file offset to the beginning of the file.
+  bool ResetOffset();
 
   // This enum is public so that we can define the << operator over it.
   enum class GuardState {
@@ -107,10 +157,20 @@
 
   GuardState guard_state_;
 
+  // Opens file 'file_path' using 'flags' and 'mode'.
+  bool Open(const std::string& file_path, int flags);
+  bool Open(const std::string& file_path, int flags, mode_t mode);
+
  private:
+  template <bool kUseOffset>
+  bool WriteFullyGeneric(const void* buffer, size_t byte_count, size_t offset);
+
+  void Destroy();  // For ~FdFile and operator=(&&).
+
   int fd_;
   std::string file_path_;
   bool auto_close_;
+  bool read_only_mode_;
 
   DISALLOW_COPY_AND_ASSIGN(FdFile);
 };
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index 388f717..99ef6f7 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -49,29 +49,31 @@
 
 TEST_F(FdFileTest, OpenClose) {
   std::string good_path(GetTmpPath("some-file.txt"));
-  FdFile file;
-  ASSERT_TRUE(file.Open(good_path, O_CREAT | O_WRONLY));
+  FdFile file(good_path, O_CREAT | O_WRONLY, true);
+  ASSERT_TRUE(file.IsOpened());
   EXPECT_GE(file.Fd(), 0);
   EXPECT_TRUE(file.IsOpened());
+  EXPECT_FALSE(file.ReadOnlyMode());
   EXPECT_EQ(0, file.Flush());
   EXPECT_EQ(0, file.Close());
   EXPECT_EQ(-1, file.Fd());
   EXPECT_FALSE(file.IsOpened());
-  EXPECT_TRUE(file.Open(good_path,  O_RDONLY));
-  EXPECT_GE(file.Fd(), 0);
-  EXPECT_TRUE(file.IsOpened());
+  FdFile file2(good_path, O_RDONLY, true);
+  EXPECT_TRUE(file2.IsOpened());
+  EXPECT_TRUE(file2.ReadOnlyMode());
+  EXPECT_GE(file2.Fd(), 0);
 
-  ASSERT_EQ(file.Close(), 0);
+  ASSERT_EQ(file2.Close(), 0);
   ASSERT_EQ(unlink(good_path.c_str()), 0);
 }
 
 TEST_F(FdFileTest, ReadFullyEmptyFile) {
   // New scratch file, zero-length.
   art::ScratchFile tmp;
-  FdFile file;
-  ASSERT_TRUE(file.Open(tmp.GetFilename(), O_RDONLY));
+  FdFile file(tmp.GetFilename(), O_RDONLY, false);
+  ASSERT_TRUE(file.IsOpened());
+  EXPECT_TRUE(file.ReadOnlyMode());
   EXPECT_GE(file.Fd(), 0);
-  EXPECT_TRUE(file.IsOpened());
   uint8_t buffer[16];
   EXPECT_FALSE(file.ReadFully(&buffer, 4));
 }
@@ -84,10 +86,10 @@
 TEST_F(FdFileTest, ReadFullyWithOffset) {
   // New scratch file, zero-length.
   art::ScratchFile tmp;
-  FdFile file;
-  ASSERT_TRUE(file.Open(tmp.GetFilename(), O_RDWR));
+  FdFile file(tmp.GetFilename(), O_RDWR, false);
+  ASSERT_TRUE(file.IsOpened());
   EXPECT_GE(file.Fd(), 0);
-  EXPECT_TRUE(file.IsOpened());
+  EXPECT_FALSE(file.ReadOnlyMode());
 
   char ignore_prefix[20] = {'a', };
   NullTerminateCharArray(ignore_prefix);
@@ -110,4 +112,78 @@
   ASSERT_EQ(file.Close(), 0);
 }
 
+TEST_F(FdFileTest, ReadWriteFullyWithOffset) {
+  // New scratch file, zero-length.
+  art::ScratchFile tmp;
+  FdFile file(tmp.GetFilename(), O_RDWR, false);
+  ASSERT_GE(file.Fd(), 0);
+  EXPECT_TRUE(file.IsOpened());
+  EXPECT_FALSE(file.ReadOnlyMode());
+
+  const char* test_string = "This is a test string";
+  size_t length = strlen(test_string) + 1;
+  const size_t offset = 12;
+  std::unique_ptr<char[]> offset_read_string(new char[length]);
+  std::unique_ptr<char[]> read_string(new char[length]);
+
+  // Write scratch data to file that we can read back into.
+  EXPECT_TRUE(file.PwriteFully(test_string, length, offset));
+  ASSERT_EQ(file.Flush(), 0);
+
+  // Test reading both the offsets.
+  EXPECT_TRUE(file.PreadFully(&offset_read_string[0], length, offset));
+  EXPECT_STREQ(test_string, &offset_read_string[0]);
+
+  EXPECT_TRUE(file.PreadFully(&read_string[0], length, 0u));
+  EXPECT_NE(memcmp(&read_string[0], test_string, length), 0);
+
+  ASSERT_EQ(file.Close(), 0);
+}
+
+TEST_F(FdFileTest, Copy) {
+  art::ScratchFile src_tmp;
+  FdFile src(src_tmp.GetFilename(), O_RDWR, false);
+  ASSERT_GE(src.Fd(), 0);
+  ASSERT_TRUE(src.IsOpened());
+
+  char src_data[] = "Some test data.";
+  ASSERT_TRUE(src.WriteFully(src_data, sizeof(src_data)));  // Including the zero terminator.
+  ASSERT_EQ(0, src.Flush());
+  ASSERT_EQ(static_cast<int64_t>(sizeof(src_data)), src.GetLength());
+
+  art::ScratchFile dest_tmp;
+  FdFile dest(src_tmp.GetFilename(), O_RDWR, false);
+  ASSERT_GE(dest.Fd(), 0);
+  ASSERT_TRUE(dest.IsOpened());
+
+  ASSERT_TRUE(dest.Copy(&src, 0, sizeof(src_data)));
+  ASSERT_EQ(0, dest.Flush());
+  ASSERT_EQ(static_cast<int64_t>(sizeof(src_data)), dest.GetLength());
+
+  char check_data[sizeof(src_data)];
+  ASSERT_TRUE(dest.PreadFully(check_data, sizeof(src_data), 0u));
+  CHECK_EQ(0, memcmp(check_data, src_data, sizeof(src_data)));
+
+  ASSERT_EQ(0, dest.Close());
+  ASSERT_EQ(0, src.Close());
+}
+
+TEST_F(FdFileTest, MoveConstructor) {
+  // New scratch file, zero-length.
+  art::ScratchFile tmp;
+  FdFile file(tmp.GetFilename(), O_RDWR, false);
+  ASSERT_TRUE(file.IsOpened());
+  EXPECT_GE(file.Fd(), 0);
+
+  int old_fd = file.Fd();
+
+  FdFile file2(std::move(file));
+  EXPECT_FALSE(file.IsOpened());
+  EXPECT_TRUE(file2.IsOpened());
+  EXPECT_EQ(old_fd, file2.Fd());
+
+  ASSERT_EQ(file2.Flush(), 0);
+  ASSERT_EQ(file2.Close(), 0);
+}
+
 }  // namespace unix_file
diff --git a/runtime/base/variant_map.h b/runtime/base/variant_map.h
index 82e5d2e..531cb37 100644
--- a/runtime/base/variant_map.h
+++ b/runtime/base/variant_map.h
@@ -19,8 +19,11 @@
 
 #include <memory.h>
 #include <map>
+#include <type_traits>
 #include <utility>
 
+#include "base/stl_util.h"
+
 namespace art {
 
 //
@@ -268,8 +271,9 @@
   }
 
   // Set a value for a given key, overwriting the previous value if any.
+  // Note: Omit the `value` from TValue type deduction, deduce only from the `key` argument.
   template <typename TValue>
-  void Set(const TKey<TValue>& key, const TValue& value) {
+  void Set(const TKey<TValue>& key, const typename Identity<TValue>::type& value) {
     // Clone the value first, to protect against &value == GetValuePtr(key).
     auto* new_value = new TValue(value);
 
@@ -279,8 +283,9 @@
 
   // Set a value for a given key, only if there was no previous value before.
   // Returns true if the value was set, false if a previous value existed.
+  // Note: Omit the `value` from TValue type deduction, deduce only from the `key` argument.
   template <typename TValue>
-  bool SetIfMissing(const TKey<TValue>& key, const TValue& value) {
+  bool SetIfMissing(const TKey<TValue>& key, const typename Identity<TValue>::type& value) {
     TValue* ptr = Get(key);
     if (ptr == nullptr) {
       Set(key, value);
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index beabce3..96fa53c 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -1176,14 +1176,16 @@
       return false;
     }
 
-    // Get the *correct* JNIEnv by going through our TLS pointer.
+    // Get the current thread's JNIEnv by going through our TLS pointer.
     JNIEnvExt* threadEnv = self->GetJniEnv();
 
     // Verify that the current thread is (a) attached and (b) associated with
     // this particular instance of JNIEnv.
     if (env != threadEnv) {
+      // Get the thread owning the JNIEnv that's being used.
+      Thread* envThread = reinterpret_cast<JNIEnvExt*>(env)->self;
       AbortF("thread %s using JNIEnv* from thread %s",
-             ToStr<Thread>(*self).c_str(), ToStr<Thread>(*self).c_str());
+             ToStr<Thread>(*self).c_str(), ToStr<Thread>(*envThread).c_str());
       return false;
     }
 
@@ -2427,19 +2429,20 @@
                                                      Primitive::kPrimDouble));
   }
 
+// NOLINT added to avoid wrong warning/fix from clang-tidy.
 #define PRIMITIVE_ARRAY_FUNCTIONS(ctype, name, ptype) \
-  static ctype* Get##name##ArrayElements(JNIEnv* env, ctype##Array array, jboolean* is_copy) { \
-    return reinterpret_cast<ctype*>( \
+  static ctype* Get##name##ArrayElements(JNIEnv* env, ctype##Array array, jboolean* is_copy) { /* NOLINT */ \
+    return reinterpret_cast<ctype*>( /* NOLINT */ \
         GetPrimitiveArrayElements(__FUNCTION__, ptype, env, array, is_copy)); \
   } \
   \
-  static void Release##name##ArrayElements(JNIEnv* env, ctype##Array array, ctype* elems, \
+  static void Release##name##ArrayElements(JNIEnv* env, ctype##Array array, ctype* elems, /* NOLINT */ \
                                            jint mode) { \
     ReleasePrimitiveArrayElements(__FUNCTION__, ptype, env, array, elems, mode); \
   } \
   \
   static void Get##name##ArrayRegion(JNIEnv* env, ctype##Array array, jsize start, jsize len, \
-                                     ctype* buf) { \
+                                     ctype* buf) { /* NOLINT */ \
     GetPrimitiveArrayRegion(__FUNCTION__, ptype, env, array, start, len, buf); \
   } \
   \
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index b9ea475..0e2f9f2 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -18,7 +18,6 @@
 #define ART_RUNTIME_CHECK_REFERENCE_MAP_VISITOR_H_
 
 #include "art_method-inl.h"
-#include "gc_map.h"
 #include "oat_quick_method_header.h"
 #include "scoped_thread_state_change.h"
 #include "stack_map.h"
@@ -54,11 +53,8 @@
 
   void CheckReferences(int* registers, int number_of_references, uint32_t native_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (GetCurrentOatQuickMethodHeader()->IsOptimized()) {
-      CheckOptimizedMethod(registers, number_of_references, native_pc_offset);
-    } else {
-      CheckQuickMethod(registers, number_of_references, native_pc_offset);
-    }
+    CHECK(GetCurrentOatQuickMethodHeader()->IsOptimized());
+    CheckOptimizedMethod(registers, number_of_references, native_pc_offset);
   }
 
  private:
@@ -66,13 +62,12 @@
       SHARED_REQUIRES(Locks::mutator_lock_) {
     ArtMethod* m = GetMethod();
     CodeInfo code_info = GetCurrentOatQuickMethodHeader()->GetOptimizedCodeInfo();
-    StackMapEncoding encoding = code_info.ExtractEncoding();
+    CodeInfoEncoding encoding = code_info.ExtractEncoding();
     StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
     uint16_t number_of_dex_registers = m->GetCodeItem()->registers_size_;
     DexRegisterMap dex_register_map =
         code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
-    MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
-    uint32_t register_mask = stack_map.GetRegisterMask(encoding);
+    uint32_t register_mask = stack_map.GetRegisterMask(encoding.stack_map_encoding);
     for (int i = 0; i < number_of_references; ++i) {
       int reg = registers[i];
       CHECK(reg < m->GetCodeItem()->registers_size_);
@@ -85,7 +80,8 @@
           break;
         case DexRegisterLocation::Kind::kInStack:
           DCHECK_EQ(location.GetValue() % kFrameSlotSize, 0);
-          CHECK(stack_mask.LoadBit(location.GetValue() / kFrameSlotSize));
+          CHECK(stack_map.GetStackMaskBit(encoding.stack_map_encoding,
+                                          location.GetValue() / kFrameSlotSize));
           break;
         case DexRegisterLocation::Kind::kInRegister:
         case DexRegisterLocation::Kind::kInRegisterHigh:
@@ -100,25 +96,10 @@
           CHECK_EQ(location.GetValue(), 0);
           break;
         default:
-          LOG(FATAL) << "Unexpected location kind"
-                     << DexRegisterLocation::PrettyDescriptor(location.GetInternalKind());
+          LOG(FATAL) << "Unexpected location kind " << location.GetInternalKind();
       }
     }
   }
-
-  void CheckQuickMethod(int* registers, int number_of_references, uint32_t native_pc_offset)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    ArtMethod* m = GetMethod();
-    NativePcOffsetToReferenceMap map(GetCurrentOatQuickMethodHeader()->GetNativeGcMap());
-    const uint8_t* ref_bitmap = map.FindBitMap(native_pc_offset);
-    CHECK(ref_bitmap);
-    for (int i = 0; i < number_of_references; ++i) {
-      int reg = registers[i];
-      CHECK(reg < m->GetCodeItem()->registers_size_);
-      CHECK((*((ref_bitmap) + reg / 8) >> (reg % 8) ) & 0x01)
-          << "Error: Reg @" << i << " is not in GC map";
-    }
-  }
 };
 
 }  // namespace art
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 88a3996..97aa499 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -27,10 +27,12 @@
 #include "mirror/object_array.h"
 #include "handle_scope-inl.h"
 
+#include <atomic>
+
 namespace art {
 
 inline mirror::Class* ClassLinker::FindSystemClass(Thread* self, const char* descriptor) {
-  return FindClass(self, descriptor, NullHandle<mirror::ClassLoader>());
+  return FindClass(self, descriptor, ScopedNullHandle<mirror::ClassLoader>());
 }
 
 inline mirror::Class* ClassLinker::FindArrayClass(Thread* self, mirror::Class** element_class) {
@@ -63,18 +65,21 @@
 inline mirror::String* ClassLinker::ResolveString(uint32_t string_idx, ArtMethod* referrer) {
   mirror::Class* declaring_class = referrer->GetDeclaringClass();
   // MethodVerifier refuses methods with string_idx out of bounds.
-  DCHECK_LT(string_idx, declaring_class->GetDexCache()->NumStrings());
-  mirror::String* resolved_string = declaring_class->GetDexCacheStrings()[string_idx].Read();
-  if (UNLIKELY(resolved_string == nullptr)) {
+  DCHECK_LT(string_idx, declaring_class->GetDexFile().NumStringIds());;
+  mirror::String* string =
+        mirror::StringDexCachePair::LookupString(declaring_class->GetDexCacheStrings(),
+                                                 string_idx,
+                                                 mirror::DexCache::kDexCacheStringCacheSize).Read();
+  if (UNLIKELY(string == nullptr)) {
     StackHandleScope<1> hs(Thread::Current());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
     const DexFile& dex_file = *dex_cache->GetDexFile();
-    resolved_string = ResolveString(dex_file, string_idx, dex_cache);
-    if (resolved_string != nullptr) {
-      DCHECK_EQ(dex_cache->GetResolvedString(string_idx), resolved_string);
+    string = ResolveString(dex_file, string_idx, dex_cache);
+    if (string != nullptr) {
+      DCHECK_EQ(dex_cache->GetResolvedString(string_idx), string);
     }
   }
-  return resolved_string;
+  return string;
 }
 
 inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx, ArtMethod* referrer) {
@@ -116,6 +121,28 @@
   return resolved_method;
 }
 
+inline mirror::Class* ClassLinker::ResolveReferencedClassOfMethod(
+    uint32_t method_idx,
+    Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader) {
+  // NB: We cannot simply use `GetResolvedMethod(method_idx, ...)->GetDeclaringClass()`. This is
+  // because if we did so than an invoke-super could be incorrectly dispatched in cases where
+  // GetMethodId(method_idx).class_idx_ refers to a non-interface, non-direct-superclass
+  // (super*-class?) of the referrer and the direct superclass of the referrer contains a concrete
+  // implementation of the method. If this class's implementation of the method is copied from an
+  // interface (either miranda, default or conflict) we would incorrectly assume that is what we
+  // want to invoke on, instead of the 'concrete' implementation that the direct superclass
+  // contains.
+  const DexFile* dex_file = dex_cache->GetDexFile();
+  const DexFile::MethodId& method = dex_file->GetMethodId(method_idx);
+  mirror::Class* resolved_type = dex_cache->GetResolvedType(method.class_idx_);
+  if (UNLIKELY(resolved_type == nullptr)) {
+    resolved_type = ResolveType(*dex_file, method.class_idx_, dex_cache, class_loader);
+  }
+  return resolved_type;
+}
+
+template <ClassLinker::ResolveMode kResolveMode>
 inline ArtMethod* ClassLinker::ResolveMethod(Thread* self,
                                              uint32_t method_idx,
                                              ArtMethod* referrer,
@@ -127,12 +154,12 @@
     Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
     Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
     const DexFile* dex_file = h_dex_cache->GetDexFile();
-    resolved_method = ResolveMethod(*dex_file,
-                                    method_idx,
-                                    h_dex_cache,
-                                    h_class_loader,
-                                    referrer,
-                                    type);
+    resolved_method = ResolveMethod<kResolveMode>(*dex_file,
+                                                  method_idx,
+                                                  h_dex_cache,
+                                                  h_class_loader,
+                                                  referrer,
+                                                  type);
   }
   // Note: We cannot check here to see whether we added the method to the cache. It
   //       might be an erroneous class, which results in it being hidden from us.
@@ -203,6 +230,34 @@
   return klass;
 }
 
+template<ReadBarrierOption kReadBarrierOption>
+ArtMethod* ClassLinker::FindMethodForProxy(mirror::Class* proxy_class, ArtMethod* proxy_method) {
+  DCHECK(proxy_class->IsProxyClass());
+  DCHECK(proxy_method->IsProxyMethod<kReadBarrierOption>());
+  {
+    Thread* const self = Thread::Current();
+    ReaderMutexLock mu(self, dex_lock_);
+    // Locate the dex cache of the original interface/Object
+    for (const DexCacheData& data : dex_caches_) {
+      if (!self->IsJWeakCleared(data.weak_root) &&
+          proxy_method->HasSameDexCacheResolvedTypes(data.resolved_types,
+                                                     image_pointer_size_)) {
+        mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(
+            self->DecodeJObject(data.weak_root));
+        if (dex_cache != nullptr) {
+          ArtMethod* resolved_method = dex_cache->GetResolvedMethod(
+              proxy_method->GetDexMethodIndex(), image_pointer_size_);
+          CHECK(resolved_method != nullptr);
+          return resolved_method;
+        }
+      }
+    }
+  }
+  LOG(FATAL) << "Didn't find dex cache for " << PrettyClass(proxy_class) << " "
+      << PrettyMethod(proxy_method);
+  UNREACHABLE();
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_CLASS_LINKER_INL_H_
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 81622e1..1a3bba5 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -36,6 +36,7 @@
 #include "base/scoped_arena_containers.h"
 #include "base/scoped_flock.h"
 #include "base/stl_util.h"
+#include "base/systrace.h"
 #include "base/time_utils.h"
 #include "base/unix_file/fd_file.h"
 #include "base/value_object.h"
@@ -46,21 +47,26 @@
 #include "dex_file-inl.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
+#include "experimental_flags.h"
 #include "gc_root-inl.h"
 #include "gc/accounting/card_table-inl.h"
-#include "gc/accounting/heap_bitmap.h"
+#include "gc/accounting/heap_bitmap-inl.h"
 #include "gc/heap.h"
+#include "gc/scoped_gc_critical_section.h"
 #include "gc/space/image_space.h"
 #include "handle_scope-inl.h"
+#include "image-inl.h"
 #include "intern_table.h"
 #include "interpreter/interpreter.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
+#include "jit/offline_profiling_info.h"
 #include "leb128.h"
 #include "linear_alloc.h"
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
+#include "mirror/dex_cache.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/field.h"
 #include "mirror/iftable-inl.h"
@@ -92,6 +98,7 @@
 namespace art {
 
 static constexpr bool kSanityCheckObjects = kIsDebugBuild;
+static constexpr bool kVerifyArtMethodDeclaringClasses = kIsDebugBuild;
 
 static void ThrowNoClassDefFoundError(const char* fmt, ...)
     __attribute__((__format__(__printf__, 1, 2)))
@@ -104,12 +111,13 @@
   va_end(args);
 }
 
-bool ClassLinker::HasInitWithString(Thread* self, const char* descriptor) {
+static bool HasInitWithString(Thread* self, ClassLinker* class_linker, const char* descriptor)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtMethod* method = self->GetCurrentMethod(nullptr);
   StackHandleScope<1> hs(self);
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(method != nullptr ?
       method->GetDeclaringClass()->GetClassLoader() : nullptr));
-  mirror::Class* exception_class = FindClass(self, descriptor, class_loader);
+  mirror::Class* exception_class = class_linker->FindClass(self, descriptor, class_loader);
 
   if (exception_class == nullptr) {
     // No exc class ~ no <init>-with-string.
@@ -119,11 +127,40 @@
   }
 
   ArtMethod* exception_init_method = exception_class->FindDeclaredDirectMethod(
-      "<init>", "(Ljava/lang/String;)V", image_pointer_size_);
+      "<init>", "(Ljava/lang/String;)V", class_linker->GetImagePointerSize());
   return exception_init_method != nullptr;
 }
 
-void ClassLinker::ThrowEarlierClassFailure(mirror::Class* c) {
+// Helper for ThrowEarlierClassFailure. Throws the stored error.
+static void HandleEarlierVerifyError(Thread* self, ClassLinker* class_linker, mirror::Class* c)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::Object* obj = c->GetVerifyError();
+  DCHECK(obj != nullptr);
+  self->AssertNoPendingException();
+  if (obj->IsClass()) {
+    // Previous error has been stored as class. Create a new exception of that type.
+
+    // It's possible the exception doesn't have a <init>(String).
+    std::string temp;
+    const char* descriptor = obj->AsClass()->GetDescriptor(&temp);
+
+    if (HasInitWithString(self, class_linker, descriptor)) {
+      self->ThrowNewException(descriptor, PrettyDescriptor(c).c_str());
+    } else {
+      self->ThrowNewException(descriptor, nullptr);
+    }
+  } else {
+    // Previous error has been stored as an instance. Just rethrow.
+    mirror::Class* throwable_class =
+        self->DecodeJObject(WellKnownClasses::java_lang_Throwable)->AsClass();
+    mirror::Class* error_class = obj->GetClass();
+    CHECK(throwable_class->IsAssignableFrom(error_class));
+    self->SetException(obj->AsThrowable());
+  }
+  self->AssertPendingException();
+}
+
+void ClassLinker::ThrowEarlierClassFailure(mirror::Class* c, bool wrap_in_no_class_def) {
   // The class failed to initialize on a previous attempt, so we want to throw
   // a NoClassDefFoundError (v2 2.17.5).  The exception to this rule is if we
   // failed in verification, in which case v2 5.4.1 says we need to re-throw
@@ -131,8 +168,13 @@
   Runtime* const runtime = Runtime::Current();
   if (!runtime->IsAotCompiler()) {  // Give info if this occurs at runtime.
     std::string extra;
-    if (c->GetVerifyErrorClass() != nullptr) {
-      extra = PrettyDescriptor(c->GetVerifyErrorClass());
+    if (c->GetVerifyError() != nullptr) {
+      mirror::Object* verify_error = c->GetVerifyError();
+      if (verify_error->IsClass()) {
+        extra = PrettyDescriptor(verify_error->AsClass());
+      } else {
+        extra = verify_error->AsThrowable()->Dump();
+      }
     }
     LOG(INFO) << "Rejecting re-init on previously-failed class " << PrettyClass(c) << ": " << extra;
   }
@@ -144,20 +186,16 @@
     mirror::Throwable* pre_allocated = runtime->GetPreAllocatedNoClassDefFoundError();
     self->SetException(pre_allocated);
   } else {
-    if (c->GetVerifyErrorClass() != nullptr) {
-      // TODO: change the verifier to store an _instance_, with a useful detail message?
-      // It's possible the exception doesn't have a <init>(String).
-      std::string temp;
-      const char* descriptor = c->GetVerifyErrorClass()->GetDescriptor(&temp);
-
-      if (HasInitWithString(self, descriptor)) {
-        self->ThrowNewException(descriptor, PrettyDescriptor(c).c_str());
-      } else {
-        self->ThrowNewException(descriptor, nullptr);
-      }
-    } else {
-      self->ThrowNewException("Ljava/lang/NoClassDefFoundError;",
-                              PrettyDescriptor(c).c_str());
+    if (c->GetVerifyError() != nullptr) {
+      // Rethrow stored error.
+      HandleEarlierVerifyError(self, this, c);
+    }
+    if (c->GetVerifyError() == nullptr || wrap_in_no_class_def) {
+      // If there isn't a recorded earlier error, or this is a repeat throw from initialization,
+      // the top-level exception must be a NoClassDefFoundError. The potentially already pending
+      // exception will be a cause.
+      self->ThrowNewWrappedException("Ljava/lang/NoClassDefFoundError;",
+                                     PrettyDescriptor(c).c_str());
     }
   }
 }
@@ -274,7 +312,7 @@
 ClassLinker::ClassLinker(InternTable* intern_table)
     // dex_lock_ is recursive as it may be used in stack dumping.
     : dex_lock_("ClassLinker dex lock", kDefaultMutexLevel),
-      dex_cache_image_class_lookup_required_(false),
+      dex_cache_boot_image_class_lookup_required_(false),
       failed_dex_cache_class_lookups_(0),
       class_roots_(nullptr),
       array_iftable_(nullptr),
@@ -286,21 +324,40 @@
       quick_imt_conflict_trampoline_(nullptr),
       quick_generic_jni_trampoline_(nullptr),
       quick_to_interpreter_bridge_trampoline_(nullptr),
-      image_pointer_size_(sizeof(void*)) {
+      image_pointer_size_(kRuntimePointerSize) {
   CHECK(intern_table_ != nullptr);
   static_assert(kFindArrayCacheSize == arraysize(find_array_class_cache_),
                 "Array cache size wrong.");
   std::fill_n(find_array_class_cache_, kFindArrayCacheSize, GcRoot<mirror::Class>(nullptr));
 }
 
-void ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path) {
+void ClassLinker::CheckSystemClass(Thread* self, Handle<mirror::Class> c1, const char* descriptor) {
+  mirror::Class* c2 = FindSystemClass(self, descriptor);
+  if (c2 == nullptr) {
+    LOG(FATAL) << "Could not find class " << descriptor;
+    UNREACHABLE();
+  }
+  if (c1.Get() != c2) {
+    std::ostringstream os1, os2;
+    c1->DumpClass(os1, mirror::Class::kDumpClassFullDetail);
+    c2->DumpClass(os2, mirror::Class::kDumpClassFullDetail);
+    LOG(FATAL) << "InitWithoutImage: Class mismatch for " << descriptor
+               << ". This is most likely the result of a broken build. Make sure that "
+               << "libcore and art projects match.\n\n"
+               << os1.str() << "\n\n" << os2.str();
+    UNREACHABLE();
+  }
+}
+
+bool ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path,
+                                   std::string* error_msg) {
   VLOG(startup) << "ClassLinker::Init";
 
   Thread* const self = Thread::Current();
   Runtime* const runtime = Runtime::Current();
   gc::Heap* const heap = runtime->GetHeap();
 
-  CHECK(!heap->HasImageSpace()) << "Runtime has image. We should use it.";
+  CHECK(!heap->HasBootImageSpace()) << "Runtime has image. We should use it.";
   CHECK(!init_done_);
 
   // Use the pointer size from the runtime since we are probably creating the image.
@@ -339,10 +396,12 @@
   mirror::Class::SetStatus(java_lang_Object, mirror::Class::kStatusLoaded, self);
 
   java_lang_Object->SetObjectSize(sizeof(mirror::Object));
-  runtime->SetSentinel(heap->AllocObject<true>(self,
-                                               java_lang_Object.Get(),
-                                               java_lang_Object->GetObjectSize(),
-                                               VoidFunctor()));
+  // Allocate in non-movable so that it's possible to check if a JNI weak global ref has been
+  // cleared without triggering the read barrier and unintentionally mark the sentinel alive.
+  runtime->SetSentinel(heap->AllocNonMovableObject<true>(self,
+                                                         java_lang_Object.Get(),
+                                                         java_lang_Object->GetObjectSize(),
+                                                         VoidFunctor()));
 
   // Object[] next to hold class roots.
   Handle<mirror::Class> object_array_class(hs.NewHandle(
@@ -438,19 +497,26 @@
   object_array_string->SetComponentType(java_lang_String.Get());
   SetClassRoot(kJavaLangStringArrayClass, object_array_string.Get());
 
+  LinearAlloc* linear_alloc = runtime->GetLinearAlloc();
   // Create runtime resolution and imt conflict methods.
   runtime->SetResolutionMethod(runtime->CreateResolutionMethod());
-  runtime->SetImtConflictMethod(runtime->CreateImtConflictMethod());
-  runtime->SetImtUnimplementedMethod(runtime->CreateImtConflictMethod());
+  runtime->SetImtConflictMethod(runtime->CreateImtConflictMethod(linear_alloc));
+  runtime->SetImtUnimplementedMethod(runtime->CreateImtConflictMethod(linear_alloc));
 
   // Setup boot_class_path_ and register class_path now that we can use AllocObjectArray to create
   // DexCache instances. Needs to be after String, Field, Method arrays since AllocDexCache uses
   // these roots.
-  CHECK_NE(0U, boot_class_path.size());
+  if (boot_class_path.empty()) {
+    *error_msg = "Boot classpath is empty.";
+    return false;
+  }
   for (auto& dex_file : boot_class_path) {
-    CHECK(dex_file.get() != nullptr);
+    if (dex_file.get() == nullptr) {
+      *error_msg = "Null dex file.";
+      return false;
+    }
     AppendToBootClassPath(self, *dex_file);
-    opened_dex_files_.push_back(std::move(dex_file));
+    boot_dex_files_.push_back(std::move(dex_file));
   }
 
   // now we can use FindSystemClass
@@ -471,18 +537,12 @@
 
   // Object, String and DexCache need to be rerun through FindSystemClass to finish init
   mirror::Class::SetStatus(java_lang_Object, mirror::Class::kStatusNotReady, self);
-  CHECK_EQ(java_lang_Object.Get(), FindSystemClass(self, "Ljava/lang/Object;"));
+  CheckSystemClass(self, java_lang_Object, "Ljava/lang/Object;");
   CHECK_EQ(java_lang_Object->GetObjectSize(), mirror::Object::InstanceSize());
   mirror::Class::SetStatus(java_lang_String, mirror::Class::kStatusNotReady, self);
-  mirror::Class* String_class = FindSystemClass(self, "Ljava/lang/String;");
-  if (java_lang_String.Get() != String_class) {
-    std::ostringstream os1, os2;
-    java_lang_String->DumpClass(os1, mirror::Class::kDumpClassFullDetail);
-    String_class->DumpClass(os2, mirror::Class::kDumpClassFullDetail);
-    LOG(FATAL) << os1.str() << "\n\n" << os2.str();
-  }
+  CheckSystemClass(self, java_lang_String, "Ljava/lang/String;");
   mirror::Class::SetStatus(java_lang_DexCache, mirror::Class::kStatusNotReady, self);
-  CHECK_EQ(java_lang_DexCache.Get(), FindSystemClass(self, "Ljava/lang/DexCache;"));
+  CheckSystemClass(self, java_lang_DexCache, "Ljava/lang/DexCache;");
   CHECK_EQ(java_lang_DexCache->GetObjectSize(), mirror::DexCache::InstanceSize());
 
   // Setup the primitive array type classes - can't be done until Object has a vtable.
@@ -492,14 +552,13 @@
   SetClassRoot(kByteArrayClass, FindSystemClass(self, "[B"));
   mirror::ByteArray::SetArrayClass(GetClassRoot(kByteArrayClass));
 
-  CHECK_EQ(char_array_class.Get(), FindSystemClass(self, "[C"));
+  CheckSystemClass(self, char_array_class, "[C");
 
   SetClassRoot(kShortArrayClass, FindSystemClass(self, "[S"));
   mirror::ShortArray::SetArrayClass(GetClassRoot(kShortArrayClass));
 
-  CHECK_EQ(int_array_class.Get(), FindSystemClass(self, "[I"));
-
-  CHECK_EQ(long_array_class.Get(), FindSystemClass(self, "[J"));
+  CheckSystemClass(self, int_array_class, "[I");
+  CheckSystemClass(self, long_array_class, "[J");
 
   SetClassRoot(kFloatArrayClass, FindSystemClass(self, "[F"));
   mirror::FloatArray::SetArrayClass(GetClassRoot(kFloatArrayClass));
@@ -507,9 +566,12 @@
   SetClassRoot(kDoubleArrayClass, FindSystemClass(self, "[D"));
   mirror::DoubleArray::SetArrayClass(GetClassRoot(kDoubleArrayClass));
 
-  CHECK_EQ(class_array_class.Get(), FindSystemClass(self, "[Ljava/lang/Class;"));
+  // Run Class through FindSystemClass. This initializes the dex_cache_ fields and register it
+  // in class_table_.
+  CheckSystemClass(self, java_lang_Class, "Ljava/lang/Class;");
 
-  CHECK_EQ(object_array_class.Get(), FindSystemClass(self, "[Ljava/lang/Object;"));
+  CheckSystemClass(self, class_array_class, "[Ljava/lang/Class;");
+  CheckSystemClass(self, object_array_class, "[Ljava/lang/Object;");
 
   // Setup the single, global copy of "iftable".
   auto java_lang_Cloneable = hs.NewHandle(FindSystemClass(self, "Ljava/lang/Cloneable;"));
@@ -531,14 +593,11 @@
            mirror::Class::GetDirectInterface(self, object_array_class, 0));
   CHECK_EQ(java_io_Serializable.Get(),
            mirror::Class::GetDirectInterface(self, object_array_class, 1));
-  // Run Class, ArtField, and ArtMethod through FindSystemClass. This initializes their
-  // dex_cache_ fields and register them in class_table_.
-  CHECK_EQ(java_lang_Class.Get(), FindSystemClass(self, "Ljava/lang/Class;"));
 
   CHECK_EQ(object_array_string.Get(),
            FindSystemClass(self, GetClassRootDescriptor(kJavaLangStringArrayClass)));
 
-  // End of special init trickery, subsequent classes may be loaded via FindSystemClass.
+  // End of special init trickery, all subsequent classes may be loaded via FindSystemClass.
 
   // Create java.lang.reflect.Proxy root.
   SetClassRoot(kJavaLangReflectProxy, FindSystemClass(self, "Ljava/lang/reflect/Proxy;"));
@@ -578,7 +637,7 @@
   // java.lang.ref classes need to be specially flagged, but otherwise are normal classes
   // finish initializing Reference class
   mirror::Class::SetStatus(java_lang_ref_Reference, mirror::Class::kStatusNotReady, self);
-  CHECK_EQ(java_lang_ref_Reference.Get(), FindSystemClass(self, "Ljava/lang/ref/Reference;"));
+  CheckSystemClass(self, java_lang_ref_Reference, "Ljava/lang/ref/Reference;");
   CHECK_EQ(java_lang_ref_Reference->GetObjectSize(), mirror::Reference::InstanceSize());
   CHECK_EQ(java_lang_ref_Reference->GetClassSize(),
            mirror::Reference::ClassSize(image_pointer_size_));
@@ -616,10 +675,7 @@
   // initialized.
   {
     const DexFile& dex_file = java_lang_Object->GetDexFile();
-    const DexFile::StringId* void_string_id = dex_file.FindStringId("V");
-    CHECK(void_string_id != nullptr);
-    uint32_t void_string_index = dex_file.GetIndexForStringId(*void_string_id);
-    const DexFile::TypeId* void_type_id = dex_file.FindTypeId(void_string_index);
+    const DexFile::TypeId* void_type_id = dex_file.FindTypeId("V");
     CHECK(void_type_id != nullptr);
     uint16_t void_type_idx = dex_file.GetIndexForTypeId(*void_type_id);
     // Now we resolve void type so the dex cache contains it. We use java.lang.Object class
@@ -629,9 +685,14 @@
     self->AssertNoPendingException();
   }
 
+  // Create conflict tables that depend on the class linker.
+  runtime->FixupConflictTables();
+
   FinishInit(self);
 
   VLOG(startup) << "ClassLinker::InitFromCompiler exiting";
+
+  return true;
 }
 
 void ClassLinker::FinishInit(Thread* self) {
@@ -699,27 +760,36 @@
 
 static void SanityCheckArtMethod(ArtMethod* m,
                                  mirror::Class* expected_class,
-                                 gc::space::ImageSpace* space)
+                                 const std::vector<gc::space::ImageSpace*>& spaces)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   if (m->IsRuntimeMethod()) {
-    CHECK(m->GetDeclaringClass() == nullptr) << PrettyMethod(m);
-  } else if (m->IsMiranda()) {
+    mirror::Class* declaring_class = m->GetDeclaringClassUnchecked();
+    CHECK(declaring_class == nullptr) << declaring_class << " " << PrettyMethod(m);
+  } else if (m->IsCopied()) {
     CHECK(m->GetDeclaringClass() != nullptr) << PrettyMethod(m);
   } else if (expected_class != nullptr) {
     CHECK_EQ(m->GetDeclaringClassUnchecked(), expected_class) << PrettyMethod(m);
   }
-  if (space != nullptr) {
-    auto& header = space->GetImageHeader();
-    auto& methods = header.GetMethodsSection();
-    auto offset = reinterpret_cast<uint8_t*>(m) - space->Begin();
-    CHECK(methods.Contains(offset)) << m << " not in " << methods;
+  if (!spaces.empty()) {
+    bool contains = false;
+    for (gc::space::ImageSpace* space : spaces) {
+      auto& header = space->GetImageHeader();
+      size_t offset = reinterpret_cast<uint8_t*>(m) - space->Begin();
+
+      const ImageSection& methods = header.GetMethodsSection();
+      contains = contains || methods.Contains(offset);
+
+      const ImageSection& runtime_methods = header.GetRuntimeMethodsSection();
+      contains = contains || runtime_methods.Contains(offset);
+    }
+    CHECK(contains) << m << " not found";
   }
 }
 
 static void SanityCheckArtMethodPointerArray(mirror::PointerArray* arr,
                                              mirror::Class* expected_class,
-                                             size_t pointer_size,
-                                             gc::space::ImageSpace* space)
+                                             PointerSize pointer_size,
+                                             const std::vector<gc::space::ImageSpace*>& spaces)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   CHECK(arr != nullptr);
   for (int32_t j = 0; j < arr->GetLength(); ++j) {
@@ -729,27 +799,34 @@
       CHECK(method != nullptr);
     }
     if (method != nullptr) {
-      SanityCheckArtMethod(method, expected_class, space);
+      SanityCheckArtMethod(method, expected_class, spaces);
     }
   }
 }
 
-static void SanityCheckArtMethodPointerArray(
-    ArtMethod** arr,
-    size_t size,
-    size_t pointer_size,
-    gc::space::ImageSpace* space) SHARED_REQUIRES(Locks::mutator_lock_) {
+static void SanityCheckArtMethodPointerArray(ArtMethod** arr,
+                                             size_t size,
+                                             PointerSize pointer_size,
+                                             const std::vector<gc::space::ImageSpace*>& spaces)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   CHECK_EQ(arr != nullptr, size != 0u);
   if (arr != nullptr) {
-    auto offset = reinterpret_cast<uint8_t*>(arr) - space->Begin();
-    CHECK(space->GetImageHeader().GetImageSection(
-        ImageHeader::kSectionDexCacheArrays).Contains(offset));
+    bool contains = false;
+    for (auto space : spaces) {
+      auto offset = reinterpret_cast<uint8_t*>(arr) - space->Begin();
+      if (space->GetImageHeader().GetImageSection(
+          ImageHeader::kSectionDexCacheArrays).Contains(offset)) {
+        contains = true;
+        break;
+      }
+    }
+    CHECK(contains);
   }
   for (size_t j = 0; j < size; ++j) {
     ArtMethod* method = mirror::DexCache::GetElementPtrSize(arr, j, pointer_size);
     // expected_class == null means we are a dex cache.
     if (method != nullptr) {
-      SanityCheckArtMethod(method, nullptr, space);
+      SanityCheckArtMethod(method, nullptr, spaces);
     }
   }
 }
@@ -768,32 +845,32 @@
       CHECK_EQ(field.GetDeclaringClass(), klass);
     }
     auto* runtime = Runtime::Current();
-    auto* image_space = runtime->GetHeap()->GetImageSpace();
+    auto image_spaces = runtime->GetHeap()->GetBootImageSpaces();
     auto pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
-    for (auto& m : klass->GetDirectMethods(pointer_size)) {
-      SanityCheckArtMethod(&m, klass, image_space);
-    }
-    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
-      SanityCheckArtMethod(&m, klass, image_space);
+    for (auto& m : klass->GetMethods(pointer_size)) {
+      SanityCheckArtMethod(&m, klass, image_spaces);
     }
     auto* vtable = klass->GetVTable();
     if (vtable != nullptr) {
-      SanityCheckArtMethodPointerArray(vtable, nullptr, pointer_size, image_space);
+      SanityCheckArtMethodPointerArray(vtable, nullptr, pointer_size, image_spaces);
     }
-    if (klass->ShouldHaveEmbeddedImtAndVTable()) {
-      for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-        SanityCheckArtMethod(klass->GetEmbeddedImTableEntry(i, pointer_size), nullptr, image_space);
+    if (klass->ShouldHaveImt()) {
+      ImTable* imt = klass->GetImt(pointer_size);
+      for (size_t i = 0; i < ImTable::kSize; ++i) {
+        SanityCheckArtMethod(imt->Get(i, pointer_size), nullptr, image_spaces);
       }
+    }
+    if (klass->ShouldHaveEmbeddedVTable()) {
       for (int32_t i = 0; i < klass->GetEmbeddedVTableLength(); ++i) {
-        SanityCheckArtMethod(klass->GetEmbeddedVTableEntry(i, pointer_size), nullptr, image_space);
+        SanityCheckArtMethod(klass->GetEmbeddedVTableEntry(i, pointer_size), nullptr, image_spaces);
       }
     }
     auto* iftable = klass->GetIfTable();
     if (iftable != nullptr) {
       for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
         if (iftable->GetMethodArrayCount(i) > 0) {
-          SanityCheckArtMethodPointerArray(iftable->GetMethodArray(i), nullptr, pointer_size,
-                                           image_space);
+          SanityCheckArtMethodPointerArray(
+              iftable->GetMethodArray(i), nullptr, pointer_size, image_spaces);
         }
       }
     }
@@ -803,7 +880,7 @@
 // Set image methods' entry point to interpreter.
 class SetInterpreterEntrypointArtMethodVisitor : public ArtMethodVisitor {
  public:
-  explicit SetInterpreterEntrypointArtMethodVisitor(size_t image_pointer_size)
+  explicit SetInterpreterEntrypointArtMethodVisitor(PointerSize image_pointer_size)
     : image_pointer_size_(image_pointer_size) {}
 
   void Visit(ArtMethod* method) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -817,42 +894,120 @@
   }
 
  private:
-  const size_t image_pointer_size_;
+  const PointerSize image_pointer_size_;
 
   DISALLOW_COPY_AND_ASSIGN(SetInterpreterEntrypointArtMethodVisitor);
 };
 
-void ClassLinker::InitFromImage() {
-  VLOG(startup) << "ClassLinker::InitFromImage entering";
+struct TrampolineCheckData {
+  const void* quick_resolution_trampoline;
+  const void* quick_imt_conflict_trampoline;
+  const void* quick_generic_jni_trampoline;
+  const void* quick_to_interpreter_bridge_trampoline;
+  PointerSize pointer_size;
+  ArtMethod* m;
+  bool error;
+};
+
+static void CheckTrampolines(mirror::Object* obj, void* arg) NO_THREAD_SAFETY_ANALYSIS {
+  if (obj->IsClass()) {
+    mirror::Class* klass = obj->AsClass();
+    TrampolineCheckData* d = reinterpret_cast<TrampolineCheckData*>(arg);
+    for (ArtMethod& m : klass->GetMethods(d->pointer_size)) {
+      const void* entrypoint = m.GetEntryPointFromQuickCompiledCodePtrSize(d->pointer_size);
+      if (entrypoint == d->quick_resolution_trampoline ||
+          entrypoint == d->quick_imt_conflict_trampoline ||
+          entrypoint == d->quick_generic_jni_trampoline ||
+          entrypoint == d->quick_to_interpreter_bridge_trampoline) {
+        d->m = &m;
+        d->error = true;
+        return;
+      }
+    }
+  }
+}
+
+bool ClassLinker::InitFromBootImage(std::string* error_msg) {
+  VLOG(startup) << __FUNCTION__ << " entering";
   CHECK(!init_done_);
 
   Runtime* const runtime = Runtime::Current();
   Thread* const self = Thread::Current();
   gc::Heap* const heap = runtime->GetHeap();
-  gc::space::ImageSpace* const space = heap->GetImageSpace();
-  CHECK(space != nullptr);
-  image_pointer_size_ = space->GetImageHeader().GetPointerSize();
-  dex_cache_image_class_lookup_required_ = true;
-  const OatFile* oat_file = runtime->GetOatFileManager().RegisterImageOatFile(space);
-  DCHECK(oat_file != nullptr);
-  CHECK_EQ(oat_file->GetOatHeader().GetImageFileLocationOatChecksum(), 0U);
-  CHECK_EQ(oat_file->GetOatHeader().GetImageFileLocationOatDataBegin(), 0U);
-  const char* image_file_location = oat_file->GetOatHeader().
+  std::vector<gc::space::ImageSpace*> spaces = heap->GetBootImageSpaces();
+  CHECK(!spaces.empty());
+  uint32_t pointer_size_unchecked = spaces[0]->GetImageHeader().GetPointerSizeUnchecked();
+  if (!ValidPointerSize(pointer_size_unchecked)) {
+    *error_msg = StringPrintf("Invalid image pointer size: %u", pointer_size_unchecked);
+    return false;
+  }
+  image_pointer_size_ = spaces[0]->GetImageHeader().GetPointerSize();
+  if (!runtime->IsAotCompiler()) {
+    // Only the Aot compiler supports having an image with a different pointer size than the
+    // runtime. This happens on the host for compiling 32 bit tests since we use a 64 bit libart
+    // compiler. We may also use 32 bit dex2oat on a system with 64 bit apps.
+    if (image_pointer_size_ != kRuntimePointerSize) {
+      *error_msg = StringPrintf("Runtime must use current image pointer size: %zu vs %zu",
+                                static_cast<size_t>(image_pointer_size_),
+                                sizeof(void*));
+      return false;
+    }
+  }
+  dex_cache_boot_image_class_lookup_required_ = true;
+  std::vector<const OatFile*> oat_files =
+      runtime->GetOatFileManager().RegisterImageOatFiles(spaces);
+  DCHECK(!oat_files.empty());
+  const OatHeader& default_oat_header = oat_files[0]->GetOatHeader();
+  CHECK_EQ(default_oat_header.GetImageFileLocationOatChecksum(), 0U);
+  CHECK_EQ(default_oat_header.GetImageFileLocationOatDataBegin(), 0U);
+  const char* image_file_location = oat_files[0]->GetOatHeader().
       GetStoreValueByKey(OatHeader::kImageLocationKey);
   CHECK(image_file_location == nullptr || *image_file_location == 0);
-  quick_resolution_trampoline_ = oat_file->GetOatHeader().GetQuickResolutionTrampoline();
-  quick_imt_conflict_trampoline_ = oat_file->GetOatHeader().GetQuickImtConflictTrampoline();
-  quick_generic_jni_trampoline_ = oat_file->GetOatHeader().GetQuickGenericJniTrampoline();
-  quick_to_interpreter_bridge_trampoline_ = oat_file->GetOatHeader().GetQuickToInterpreterBridge();
-  StackHandleScope<2> hs(self);
-  mirror::Object* dex_caches_object = space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
-  Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches(
-      hs.NewHandle(dex_caches_object->AsObjectArray<mirror::DexCache>()));
+  quick_resolution_trampoline_ = default_oat_header.GetQuickResolutionTrampoline();
+  quick_imt_conflict_trampoline_ = default_oat_header.GetQuickImtConflictTrampoline();
+  quick_generic_jni_trampoline_ = default_oat_header.GetQuickGenericJniTrampoline();
+  quick_to_interpreter_bridge_trampoline_ = default_oat_header.GetQuickToInterpreterBridge();
+  if (kIsDebugBuild) {
+    // Check that the other images use the same trampoline.
+    for (size_t i = 1; i < oat_files.size(); ++i) {
+      const OatHeader& ith_oat_header = oat_files[i]->GetOatHeader();
+      const void* ith_quick_resolution_trampoline =
+          ith_oat_header.GetQuickResolutionTrampoline();
+      const void* ith_quick_imt_conflict_trampoline =
+          ith_oat_header.GetQuickImtConflictTrampoline();
+      const void* ith_quick_generic_jni_trampoline =
+          ith_oat_header.GetQuickGenericJniTrampoline();
+      const void* ith_quick_to_interpreter_bridge_trampoline =
+          ith_oat_header.GetQuickToInterpreterBridge();
+      if (ith_quick_resolution_trampoline != quick_resolution_trampoline_ ||
+          ith_quick_imt_conflict_trampoline != quick_imt_conflict_trampoline_ ||
+          ith_quick_generic_jni_trampoline != quick_generic_jni_trampoline_ ||
+          ith_quick_to_interpreter_bridge_trampoline != quick_to_interpreter_bridge_trampoline_) {
+        // Make sure that all methods in this image do not contain those trampolines as
+        // entrypoints. Otherwise the class-linker won't be able to work with a single set.
+        TrampolineCheckData data;
+        data.error = false;
+        data.pointer_size = GetImagePointerSize();
+        data.quick_resolution_trampoline = ith_quick_resolution_trampoline;
+        data.quick_imt_conflict_trampoline = ith_quick_imt_conflict_trampoline;
+        data.quick_generic_jni_trampoline = ith_quick_generic_jni_trampoline;
+        data.quick_to_interpreter_bridge_trampoline = ith_quick_to_interpreter_bridge_trampoline;
+        ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+        spaces[i]->GetLiveBitmap()->Walk(CheckTrampolines, &data);
+        if (data.error) {
+          ArtMethod* m = data.m;
+          LOG(ERROR) << "Found a broken ArtMethod: " << PrettyMethod(m);
+          *error_msg = "Found an ArtMethod with a bad entrypoint";
+          return false;
+        }
+      }
+    }
+  }
 
-  Handle<mirror::ObjectArray<mirror::Class>> class_roots(hs.NewHandle(
-          space->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)->
-          AsObjectArray<mirror::Class>()));
-  class_roots_ = GcRoot<mirror::ObjectArray<mirror::Class>>(class_roots.Get());
+  class_roots_ = GcRoot<mirror::ObjectArray<mirror::Class>>(
+      down_cast<mirror::ObjectArray<mirror::Class>*>(
+          spaces[0]->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)));
+  mirror::Class::SetClassClass(class_roots_.Read()->Get(kJavaLangClass));
 
   // Special case of setting up the String class early so that we can test arbitrary objects
   // as being Strings or not
@@ -860,77 +1015,10 @@
 
   mirror::Class* java_lang_Object = GetClassRoot(kJavaLangObject);
   java_lang_Object->SetObjectSize(sizeof(mirror::Object));
-  Runtime::Current()->SetSentinel(Runtime::Current()->GetHeap()->AllocObject<true>(self,
-                                                          java_lang_Object,
-                                                          java_lang_Object->GetObjectSize(),
-                                                          VoidFunctor()));
-
-  CHECK_EQ(oat_file->GetOatHeader().GetDexFileCount(),
-           static_cast<uint32_t>(dex_caches->GetLength()));
-  for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
-    StackHandleScope<1> hs2(self);
-    Handle<mirror::DexCache> dex_cache(hs2.NewHandle(dex_caches->Get(i)));
-    const std::string& dex_file_location(dex_cache->GetLocation()->ToModifiedUtf8());
-    const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_file_location.c_str(),
-                                                                      nullptr);
-    CHECK(oat_dex_file != nullptr) << oat_file->GetLocation() << " " << dex_file_location;
-    std::string error_msg;
-    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
-    if (dex_file == nullptr) {
-      LOG(FATAL) << "Failed to open dex file " << dex_file_location
-                 << " from within oat file " << oat_file->GetLocation()
-                 << " error '" << error_msg << "'";
-      UNREACHABLE();
-    }
-
-    if (kSanityCheckObjects) {
-      SanityCheckArtMethodPointerArray(dex_cache->GetResolvedMethods(),
-                                       dex_cache->NumResolvedMethods(),
-                                       image_pointer_size_,
-                                       space);
-    }
-
-    CHECK_EQ(dex_file->GetLocationChecksum(), oat_dex_file->GetDexFileLocationChecksum());
-
-    AppendToBootClassPath(*dex_file.get(), dex_cache);
-    opened_dex_files_.push_back(std::move(dex_file));
-  }
-
-  CHECK(ValidPointerSize(image_pointer_size_)) << image_pointer_size_;
-
-  // Set classes on AbstractMethod early so that IsMethod tests can be performed during the live
-  // bitmap walk.
-  if (!runtime->IsAotCompiler()) {
-    // Only the Aot compiler supports having an image with a different pointer size than the
-    // runtime. This happens on the host for compile 32 bit tests since we use a 64 bit libart
-    // compiler. We may also use 32 bit dex2oat on a system with 64 bit apps.
-    CHECK_EQ(image_pointer_size_, sizeof(void*));
-  }
-
-  if (kSanityCheckObjects) {
-    for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
-      auto* dex_cache = dex_caches->Get(i);
-      for (size_t j = 0; j < dex_cache->NumResolvedFields(); ++j) {
-        auto* field = dex_cache->GetResolvedField(j, image_pointer_size_);
-        if (field != nullptr) {
-          CHECK(field->GetDeclaringClass()->GetClass() != nullptr);
-        }
-      }
-    }
-    heap->VisitObjects(SanityCheckObjectsCallback, nullptr);
-  }
-
-  // Set entry point to interpreter if in InterpretOnly mode.
-  if (!runtime->IsAotCompiler() && runtime->GetInstrumentation()->InterpretOnly()) {
-    const ImageHeader& header = space->GetImageHeader();
-    const ImageSection& methods = header.GetMethodsSection();
-    SetInterpreterEntrypointArtMethodVisitor visitor(image_pointer_size_);
-    methods.VisitPackedArtMethods(&visitor, space->Begin(), image_pointer_size_);
-  }
-
-  // reinit class_roots_
-  mirror::Class::SetClassClass(class_roots->Get(kJavaLangClass));
-  class_roots_ = GcRoot<mirror::ObjectArray<mirror::Class>>(class_roots.Get());
+  // Allocate in non-movable so that it's possible to check if a JNI weak global ref has been
+  // cleared without triggering the read barrier and unintentionally mark the sentinel alive.
+  runtime->SetSentinel(heap->AllocNonMovableObject<true>(
+      self, java_lang_Object, java_lang_Object->GetObjectSize(), VoidFunctor()));
 
   // reinit array_iftable_ from any array class instance, they should be ==
   array_iftable_ = GcRoot<mirror::IfTable>(GetClassRoot(kObjectArrayClass)->GetIfTable());
@@ -954,9 +1042,777 @@
   mirror::Throwable::SetClass(GetClassRoot(kJavaLangThrowable));
   mirror::StackTraceElement::SetClass(GetClassRoot(kJavaLangStackTraceElement));
 
+  for (gc::space::ImageSpace* image_space : spaces) {
+    // Boot class loader, use a null handle.
+    std::vector<std::unique_ptr<const DexFile>> dex_files;
+    if (!AddImageSpace(image_space,
+                       ScopedNullHandle<mirror::ClassLoader>(),
+                       /*dex_elements*/nullptr,
+                       /*dex_location*/nullptr,
+                       /*out*/&dex_files,
+                       error_msg)) {
+      return false;
+    }
+    // Append opened dex files at the end.
+    boot_dex_files_.insert(boot_dex_files_.end(),
+                           std::make_move_iterator(dex_files.begin()),
+                           std::make_move_iterator(dex_files.end()));
+  }
   FinishInit(self);
 
-  VLOG(startup) << "ClassLinker::InitFromImage exiting";
+  VLOG(startup) << __FUNCTION__ << " exiting";
+  return true;
+}
+
+bool ClassLinker::IsBootClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
+                                    mirror::ClassLoader* class_loader) {
+  return class_loader == nullptr ||
+      class_loader->GetClass() ==
+          soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader);
+}
+
+static mirror::String* GetDexPathListElementName(ScopedObjectAccessUnchecked& soa,
+                                                 mirror::Object* element)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* const dex_file_field =
+      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+  ArtField* const dex_file_name_field =
+      soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_fileName);
+  DCHECK(dex_file_field != nullptr);
+  DCHECK(dex_file_name_field != nullptr);
+  DCHECK(element != nullptr);
+  CHECK_EQ(dex_file_field->GetDeclaringClass(), element->GetClass()) << PrettyTypeOf(element);
+  mirror::Object* dex_file = dex_file_field->GetObject(element);
+  if (dex_file == nullptr) {
+    return nullptr;
+  }
+  mirror::Object* const name_object = dex_file_name_field->GetObject(dex_file);
+  if (name_object != nullptr) {
+    return name_object->AsString();
+  }
+  return nullptr;
+}
+
+static bool FlattenPathClassLoader(mirror::ClassLoader* class_loader,
+                                   std::list<mirror::String*>* out_dex_file_names,
+                                   std::string* error_msg)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  DCHECK(out_dex_file_names != nullptr);
+  DCHECK(error_msg != nullptr);
+  ScopedObjectAccessUnchecked soa(Thread::Current());
+  ArtField* const dex_path_list_field =
+      soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList);
+  ArtField* const dex_elements_field =
+      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements);
+  CHECK(dex_path_list_field != nullptr);
+  CHECK(dex_elements_field != nullptr);
+  while (!ClassLinker::IsBootClassLoader(soa, class_loader)) {
+    if (class_loader->GetClass() !=
+        soa.Decode<mirror::Class*>(WellKnownClasses::dalvik_system_PathClassLoader)) {
+      *error_msg = StringPrintf("Unknown class loader type %s", PrettyTypeOf(class_loader).c_str());
+      // Unsupported class loader.
+      return false;
+    }
+    mirror::Object* dex_path_list = dex_path_list_field->GetObject(class_loader);
+    if (dex_path_list != nullptr) {
+      // DexPathList has an array dexElements of Elements[] which each contain a dex file.
+      mirror::Object* dex_elements_obj = dex_elements_field->GetObject(dex_path_list);
+      // Loop through each dalvik.system.DexPathList$Element's dalvik.system.DexFile and look
+      // at the mCookie which is a DexFile vector.
+      if (dex_elements_obj != nullptr) {
+        mirror::ObjectArray<mirror::Object>* dex_elements =
+            dex_elements_obj->AsObjectArray<mirror::Object>();
+        // Reverse order since we insert the parent at the front.
+        for (int32_t i = dex_elements->GetLength() - 1; i >= 0; --i) {
+          mirror::Object* const element = dex_elements->GetWithoutChecks(i);
+          if (element == nullptr) {
+            *error_msg = StringPrintf("Null dex element at index %d", i);
+            return false;
+          }
+          mirror::String* const name = GetDexPathListElementName(soa, element);
+          if (name == nullptr) {
+            *error_msg = StringPrintf("Null name for dex element at index %d", i);
+            return false;
+          }
+          out_dex_file_names->push_front(name);
+        }
+      }
+    }
+    class_loader = class_loader->GetParent();
+  }
+  return true;
+}
+
+class FixupArtMethodArrayVisitor : public ArtMethodVisitor {
+ public:
+  explicit FixupArtMethodArrayVisitor(const ImageHeader& header) : header_(header) {}
+
+  virtual void Visit(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
+    GcRoot<mirror::Class>* resolved_types = method->GetDexCacheResolvedTypes(kRuntimePointerSize);
+    const bool is_copied = method->IsCopied();
+    if (resolved_types != nullptr) {
+      bool in_image_space = false;
+      if (kIsDebugBuild || is_copied) {
+        in_image_space = header_.GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
+            reinterpret_cast<const uint8_t*>(resolved_types) - header_.GetImageBegin());
+      }
+      // Must be in image space for non-miranda method.
+      DCHECK(is_copied || in_image_space)
+          << resolved_types << " is not in image starting at "
+          << reinterpret_cast<void*>(header_.GetImageBegin());
+      if (!is_copied || in_image_space) {
+        // Go through the array so that we don't need to do a slow map lookup.
+        method->SetDexCacheResolvedTypes(*reinterpret_cast<GcRoot<mirror::Class>**>(resolved_types),
+                                         kRuntimePointerSize);
+      }
+    }
+    ArtMethod** resolved_methods = method->GetDexCacheResolvedMethods(kRuntimePointerSize);
+    if (resolved_methods != nullptr) {
+      bool in_image_space = false;
+      if (kIsDebugBuild || is_copied) {
+        in_image_space = header_.GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
+              reinterpret_cast<const uint8_t*>(resolved_methods) - header_.GetImageBegin());
+      }
+      // Must be in image space for non-miranda method.
+      DCHECK(is_copied || in_image_space)
+          << resolved_methods << " is not in image starting at "
+          << reinterpret_cast<void*>(header_.GetImageBegin());
+      if (!is_copied || in_image_space) {
+        // Go through the array so that we don't need to do a slow map lookup.
+        method->SetDexCacheResolvedMethods(*reinterpret_cast<ArtMethod***>(resolved_methods),
+                                           kRuntimePointerSize);
+      }
+    }
+  }
+
+ private:
+  const ImageHeader& header_;
+};
+
+class VerifyClassInTableArtMethodVisitor : public ArtMethodVisitor {
+ public:
+  explicit VerifyClassInTableArtMethodVisitor(ClassTable* table) : table_(table) {}
+
+  virtual void Visit(ArtMethod* method)
+      SHARED_REQUIRES(Locks::mutator_lock_, Locks::classlinker_classes_lock_) {
+    mirror::Class* klass = method->GetDeclaringClass();
+    if (klass != nullptr && !Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
+      CHECK_EQ(table_->LookupByDescriptor(klass), klass) << PrettyClass(klass);
+    }
+  }
+
+ private:
+  ClassTable* const table_;
+};
+
+class VerifyDeclaringClassVisitor : public ArtMethodVisitor {
+ public:
+  VerifyDeclaringClassVisitor() SHARED_REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_)
+      : live_bitmap_(Runtime::Current()->GetHeap()->GetLiveBitmap()) {}
+
+  virtual void Visit(ArtMethod* method)
+      SHARED_REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    mirror::Class* klass = method->GetDeclaringClassUnchecked();
+    if (klass != nullptr) {
+      CHECK(live_bitmap_->Test(klass)) << "Image method has unmarked declaring class";
+    }
+  }
+
+ private:
+  gc::accounting::HeapBitmap* const live_bitmap_;
+};
+
+bool ClassLinker::UpdateAppImageClassLoadersAndDexCaches(
+    gc::space::ImageSpace* space,
+    Handle<mirror::ClassLoader> class_loader,
+    Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches,
+    ClassTable::ClassSet* new_class_set,
+    bool* out_forward_dex_cache_array,
+    std::string* out_error_msg) {
+  DCHECK(out_forward_dex_cache_array != nullptr);
+  DCHECK(out_error_msg != nullptr);
+  Thread* const self = Thread::Current();
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
+  const ImageHeader& header = space->GetImageHeader();
+  {
+    // Add image classes into the class table for the class loader, and fixup the dex caches and
+    // class loader fields.
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    ClassTable* table = InsertClassTableForClassLoader(class_loader.Get());
+    // Dex cache array fixup is all or nothing, we must reject app images that have mixed since we
+    // rely on clobering the dex cache arrays in the image to forward to bss.
+    size_t num_dex_caches_with_bss_arrays = 0;
+    const size_t num_dex_caches = dex_caches->GetLength();
+    for (size_t i = 0; i < num_dex_caches; i++) {
+      mirror::DexCache* const dex_cache = dex_caches->Get(i);
+      const DexFile* const dex_file = dex_cache->GetDexFile();
+      const OatFile::OatDexFile* oat_dex_file = dex_file->GetOatDexFile();
+      if (oat_dex_file != nullptr && oat_dex_file->GetDexCacheArrays() != nullptr) {
+        ++num_dex_caches_with_bss_arrays;
+      }
+    }
+    *out_forward_dex_cache_array = num_dex_caches_with_bss_arrays != 0;
+    if (*out_forward_dex_cache_array) {
+      if (num_dex_caches_with_bss_arrays != num_dex_caches) {
+        // Reject application image since we cannot forward only some of the dex cache arrays.
+        // TODO: We could get around this by having a dedicated forwarding slot. It should be an
+        // uncommon case.
+        *out_error_msg = StringPrintf("Dex caches in bss does not match total: %zu vs %zu",
+                                      num_dex_caches_with_bss_arrays,
+                                      num_dex_caches);
+        return false;
+      }
+    }
+    // Only add the classes to the class loader after the points where we can return false.
+    for (size_t i = 0; i < num_dex_caches; i++) {
+      mirror::DexCache* const dex_cache = dex_caches->Get(i);
+      const DexFile* const dex_file = dex_cache->GetDexFile();
+      const OatFile::OatDexFile* oat_dex_file = dex_file->GetOatDexFile();
+      if (oat_dex_file != nullptr && oat_dex_file->GetDexCacheArrays() != nullptr) {
+      // If the oat file expects the dex cache arrays to be in the BSS, then allocate there and
+        // copy over the arrays.
+        DCHECK(dex_file != nullptr);
+        size_t num_strings = mirror::DexCache::kDexCacheStringCacheSize;
+        if (dex_file->NumStringIds() < num_strings) {
+          num_strings = dex_file->NumStringIds();
+        }
+        const size_t num_types = dex_file->NumTypeIds();
+        const size_t num_methods = dex_file->NumMethodIds();
+        const size_t num_fields = dex_file->NumFieldIds();
+        CHECK_EQ(num_strings, dex_cache->NumStrings());
+        CHECK_EQ(num_types, dex_cache->NumResolvedTypes());
+        CHECK_EQ(num_methods, dex_cache->NumResolvedMethods());
+        CHECK_EQ(num_fields, dex_cache->NumResolvedFields());
+        DexCacheArraysLayout layout(image_pointer_size_, dex_file);
+        uint8_t* const raw_arrays = oat_dex_file->GetDexCacheArrays();
+        if (num_strings != 0u) {
+          mirror::StringDexCacheType* const image_resolved_strings = dex_cache->GetStrings();
+          mirror::StringDexCacheType* const strings =
+              reinterpret_cast<mirror::StringDexCacheType*>(raw_arrays + layout.StringsOffset());
+          for (size_t j = 0; j < num_strings; ++j) {
+            DCHECK_EQ(strings[j].load(std::memory_order_relaxed).string_index, 0u);
+            DCHECK(strings[j].load(std::memory_order_relaxed).string_pointer.IsNull());
+            strings[j].store(image_resolved_strings[j].load(std::memory_order_relaxed),
+                             std::memory_order_relaxed);
+          }
+          mirror::StringDexCachePair::Initialize(strings);
+          dex_cache->SetStrings(strings);
+        }
+        if (num_types != 0u) {
+          GcRoot<mirror::Class>* const image_resolved_types = dex_cache->GetResolvedTypes();
+          GcRoot<mirror::Class>* const types =
+              reinterpret_cast<GcRoot<mirror::Class>*>(raw_arrays + layout.TypesOffset());
+          for (size_t j = 0; kIsDebugBuild && j < num_types; ++j) {
+            DCHECK(types[j].IsNull());
+          }
+          std::copy_n(image_resolved_types, num_types, types);
+          // Store a pointer to the new location for fast ArtMethod patching without requiring map.
+          // This leaves random garbage at the start of the dex cache array, but nobody should ever
+          // read from it again.
+          *reinterpret_cast<GcRoot<mirror::Class>**>(image_resolved_types) = types;
+          dex_cache->SetResolvedTypes(types);
+        }
+        if (num_methods != 0u) {
+          ArtMethod** const methods = reinterpret_cast<ArtMethod**>(
+              raw_arrays + layout.MethodsOffset());
+          ArtMethod** const image_resolved_methods = dex_cache->GetResolvedMethods();
+          for (size_t j = 0; kIsDebugBuild && j < num_methods; ++j) {
+            DCHECK(methods[j] == nullptr);
+          }
+          std::copy_n(image_resolved_methods, num_methods, methods);
+          // Store a pointer to the new location for fast ArtMethod patching without requiring map.
+          *reinterpret_cast<ArtMethod***>(image_resolved_methods) = methods;
+          dex_cache->SetResolvedMethods(methods);
+        }
+        if (num_fields != 0u) {
+          ArtField** const fields =
+              reinterpret_cast<ArtField**>(raw_arrays + layout.FieldsOffset());
+          for (size_t j = 0; kIsDebugBuild && j < num_fields; ++j) {
+            DCHECK(fields[j] == nullptr);
+          }
+          std::copy_n(dex_cache->GetResolvedFields(), num_fields, fields);
+          dex_cache->SetResolvedFields(fields);
+        }
+      }
+      {
+        WriterMutexLock mu2(self, dex_lock_);
+        // Make sure to do this after we update the arrays since we store the resolved types array
+        // in DexCacheData in RegisterDexFileLocked. We need the array pointer to be the one in the
+        // BSS.
+        mirror::DexCache* existing_dex_cache = FindDexCacheLocked(self,
+                                                                  *dex_file,
+                                                                  /*allow_failure*/true);
+        CHECK(existing_dex_cache == nullptr);
+        StackHandleScope<1> hs3(self);
+        RegisterDexFileLocked(*dex_file, hs3.NewHandle(dex_cache));
+      }
+      GcRoot<mirror::Class>* const types = dex_cache->GetResolvedTypes();
+      const size_t num_types = dex_cache->NumResolvedTypes();
+      if (new_class_set == nullptr) {
+        for (int32_t j = 0; j < static_cast<int32_t>(num_types); j++) {
+          // The image space is not yet added to the heap, avoid read barriers.
+          mirror::Class* klass = types[j].Read();
+          // There may also be boot image classes,
+          if (space->HasAddress(klass)) {
+            DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
+            // Update the class loader from the one in the image class loader to the one that loaded
+            // the app image.
+            klass->SetClassLoader(class_loader.Get());
+            // The resolved type could be from another dex cache, go through the dex cache just in
+            // case. May be null for array classes.
+            if (klass->GetDexCacheStrings() != nullptr) {
+              DCHECK(!klass->IsArrayClass());
+              klass->SetDexCacheStrings(klass->GetDexCache()->GetStrings());
+            }
+            // If there are multiple dex caches, there may be the same class multiple times
+            // in different dex caches. Check for this since inserting will add duplicates
+            // otherwise.
+            if (num_dex_caches > 1) {
+              mirror::Class* existing = table->LookupByDescriptor(klass);
+              if (existing != nullptr) {
+                DCHECK_EQ(existing, klass) << PrettyClass(klass);
+              } else {
+                table->Insert(klass);
+              }
+            } else {
+              table->Insert(klass);
+            }
+            // Double checked VLOG to avoid overhead.
+            if (VLOG_IS_ON(image)) {
+              VLOG(image) << PrettyClass(klass) << " " << klass->GetStatus();
+              if (!klass->IsArrayClass()) {
+                VLOG(image) << "From " << klass->GetDexCache()->GetDexFile()->GetBaseLocation();
+              }
+              VLOG(image) << "Direct methods";
+              for (ArtMethod& m : klass->GetDirectMethods(kRuntimePointerSize)) {
+                VLOG(image) << PrettyMethod(&m);
+              }
+              VLOG(image) << "Virtual methods";
+              for (ArtMethod& m : klass->GetVirtualMethods(kRuntimePointerSize)) {
+                VLOG(image) << PrettyMethod(&m);
+              }
+            }
+          } else {
+            DCHECK(klass == nullptr || heap->ObjectIsInBootImageSpace(klass))
+                << klass << " " << PrettyClass(klass);
+          }
+        }
+      }
+      if (kIsDebugBuild) {
+        for (int32_t j = 0; j < static_cast<int32_t>(num_types); j++) {
+          // The image space is not yet added to the heap, avoid read barriers.
+          mirror::Class* klass = types[j].Read();
+          if (space->HasAddress(klass)) {
+            DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
+            if (kIsDebugBuild) {
+              if (new_class_set != nullptr) {
+                auto it = new_class_set->Find(GcRoot<mirror::Class>(klass));
+                DCHECK(it != new_class_set->end());
+                DCHECK_EQ(it->Read(), klass);
+                mirror::Class* super_class = klass->GetSuperClass();
+                if (super_class != nullptr && !heap->ObjectIsInBootImageSpace(super_class)) {
+                  auto it2 = new_class_set->Find(GcRoot<mirror::Class>(super_class));
+                  DCHECK(it2 != new_class_set->end());
+                  DCHECK_EQ(it2->Read(), super_class);
+                }
+              } else {
+                DCHECK_EQ(table->LookupByDescriptor(klass), klass);
+                mirror::Class* super_class = klass->GetSuperClass();
+                if (super_class != nullptr && !heap->ObjectIsInBootImageSpace(super_class)) {
+                  CHECK_EQ(table->LookupByDescriptor(super_class), super_class);
+                }
+              }
+            }
+            if (kIsDebugBuild) {
+              for (ArtMethod& m : klass->GetDirectMethods(kRuntimePointerSize)) {
+                const void* code = m.GetEntryPointFromQuickCompiledCode();
+                const void* oat_code = m.IsInvokable() ? GetQuickOatCodeFor(&m) : code;
+                if (!IsQuickResolutionStub(code) &&
+                    !IsQuickGenericJniStub(code) &&
+                    !IsQuickToInterpreterBridge(code) &&
+                    !m.IsNative()) {
+                  DCHECK_EQ(code, oat_code) << PrettyMethod(&m);
+                }
+              }
+              for (ArtMethod& m : klass->GetVirtualMethods(kRuntimePointerSize)) {
+                const void* code = m.GetEntryPointFromQuickCompiledCode();
+                const void* oat_code = m.IsInvokable() ? GetQuickOatCodeFor(&m) : code;
+                if (!IsQuickResolutionStub(code) &&
+                    !IsQuickGenericJniStub(code) &&
+                    !IsQuickToInterpreterBridge(code) &&
+                    !m.IsNative()) {
+                  DCHECK_EQ(code, oat_code) << PrettyMethod(&m);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  if (*out_forward_dex_cache_array) {
+    ScopedTrace timing("Fixup ArtMethod dex cache arrays");
+    FixupArtMethodArrayVisitor visitor(header);
+    header.VisitPackedArtMethods(&visitor, space->Begin(), kRuntimePointerSize);
+    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader.Get());
+  }
+  if (kVerifyArtMethodDeclaringClasses) {
+    ScopedTrace timing("Verify declaring classes");
+    ReaderMutexLock rmu(self, *Locks::heap_bitmap_lock_);
+    VerifyDeclaringClassVisitor visitor;
+    header.VisitPackedArtMethods(&visitor, space->Begin(), kRuntimePointerSize);
+  }
+  return true;
+}
+
+// Update the class loader and resolved string dex cache array of classes. Should only be used on
+// classes in the image space.
+class UpdateClassLoaderAndResolvedStringsVisitor {
+ public:
+  UpdateClassLoaderAndResolvedStringsVisitor(gc::space::ImageSpace* space,
+                                             mirror::ClassLoader* class_loader,
+                                             bool forward_strings)
+      : space_(space),
+        class_loader_(class_loader),
+        forward_strings_(forward_strings) {}
+
+  bool operator()(mirror::Class* klass) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (forward_strings_) {
+      mirror::StringDexCacheType* strings = klass->GetDexCacheStrings();
+      if (strings != nullptr) {
+        DCHECK(
+            space_->GetImageHeader().GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
+                reinterpret_cast<uint8_t*>(strings) - space_->Begin()))
+            << "String dex cache array for " << PrettyClass(klass) << " is not in app image";
+        // Dex caches have already been updated, so take the strings pointer from there.
+        mirror::StringDexCacheType* new_strings = klass->GetDexCache()->GetStrings();
+        DCHECK_NE(strings, new_strings);
+        klass->SetDexCacheStrings(new_strings);
+      }
+    }
+    // Finally, update class loader.
+    klass->SetClassLoader(class_loader_);
+    return true;
+  }
+
+  gc::space::ImageSpace* const space_;
+  mirror::ClassLoader* const class_loader_;
+  const bool forward_strings_;
+};
+
+static std::unique_ptr<const DexFile> OpenOatDexFile(const OatFile* oat_file,
+                                                     const char* location,
+                                                     std::string* error_msg)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  DCHECK(error_msg != nullptr);
+  std::unique_ptr<const DexFile> dex_file;
+  const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(location, nullptr, error_msg);
+  if (oat_dex_file == nullptr) {
+    return std::unique_ptr<const DexFile>();
+  }
+  std::string inner_error_msg;
+  dex_file = oat_dex_file->OpenDexFile(&inner_error_msg);
+  if (dex_file == nullptr) {
+    *error_msg = StringPrintf("Failed to open dex file %s from within oat file %s error '%s'",
+                              location,
+                              oat_file->GetLocation().c_str(),
+                              inner_error_msg.c_str());
+    return std::unique_ptr<const DexFile>();
+  }
+
+  if (dex_file->GetLocationChecksum() != oat_dex_file->GetDexFileLocationChecksum()) {
+    *error_msg = StringPrintf("Checksums do not match for %s: %x vs %x",
+                              location,
+                              dex_file->GetLocationChecksum(),
+                              oat_dex_file->GetDexFileLocationChecksum());
+    return std::unique_ptr<const DexFile>();
+  }
+  return dex_file;
+}
+
+bool ClassLinker::OpenImageDexFiles(gc::space::ImageSpace* space,
+                                    std::vector<std::unique_ptr<const DexFile>>* out_dex_files,
+                                    std::string* error_msg) {
+  ScopedAssertNoThreadSuspension nts(Thread::Current(), __FUNCTION__);
+  const ImageHeader& header = space->GetImageHeader();
+  mirror::Object* dex_caches_object = header.GetImageRoot(ImageHeader::kDexCaches);
+  DCHECK(dex_caches_object != nullptr);
+  mirror::ObjectArray<mirror::DexCache>* dex_caches =
+      dex_caches_object->AsObjectArray<mirror::DexCache>();
+  const OatFile* oat_file = space->GetOatFile();
+  for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
+    mirror::DexCache* dex_cache = dex_caches->Get(i);
+    std::string dex_file_location(dex_cache->GetLocation()->ToModifiedUtf8());
+    std::unique_ptr<const DexFile> dex_file = OpenOatDexFile(oat_file,
+                                                             dex_file_location.c_str(),
+                                                             error_msg);
+    if (dex_file == nullptr) {
+      return false;
+    }
+    dex_cache->SetDexFile(dex_file.get());
+    out_dex_files->push_back(std::move(dex_file));
+  }
+  return true;
+}
+
+bool ClassLinker::AddImageSpace(
+    gc::space::ImageSpace* space,
+    Handle<mirror::ClassLoader> class_loader,
+    jobjectArray dex_elements,
+    const char* dex_location,
+    std::vector<std::unique_ptr<const DexFile>>* out_dex_files,
+    std::string* error_msg) {
+  DCHECK(out_dex_files != nullptr);
+  DCHECK(error_msg != nullptr);
+  const uint64_t start_time = NanoTime();
+  const bool app_image = class_loader.Get() != nullptr;
+  const ImageHeader& header = space->GetImageHeader();
+  mirror::Object* dex_caches_object = header.GetImageRoot(ImageHeader::kDexCaches);
+  DCHECK(dex_caches_object != nullptr);
+  Runtime* const runtime = Runtime::Current();
+  gc::Heap* const heap = runtime->GetHeap();
+  Thread* const self = Thread::Current();
+  StackHandleScope<2> hs(self);
+  Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches(
+      hs.NewHandle(dex_caches_object->AsObjectArray<mirror::DexCache>()));
+  Handle<mirror::ObjectArray<mirror::Class>> class_roots(hs.NewHandle(
+      header.GetImageRoot(ImageHeader::kClassRoots)->AsObjectArray<mirror::Class>()));
+  const OatFile* oat_file = space->GetOatFile();
+  std::unordered_set<mirror::ClassLoader*> image_class_loaders;
+  // Check that the image is what we are expecting.
+  if (image_pointer_size_ != space->GetImageHeader().GetPointerSize()) {
+    *error_msg = StringPrintf("Application image pointer size does not match runtime: %zu vs %zu",
+                              static_cast<size_t>(space->GetImageHeader().GetPointerSize()),
+                              image_pointer_size_);
+    return false;
+  }
+  DCHECK(class_roots.Get() != nullptr);
+  if (class_roots->GetLength() != static_cast<int32_t>(kClassRootsMax)) {
+    *error_msg = StringPrintf("Expected %d class roots but got %d",
+                              class_roots->GetLength(),
+                              static_cast<int32_t>(kClassRootsMax));
+    return false;
+  }
+  // Check against existing class roots to make sure they match the ones in the boot image.
+  for (size_t i = 0; i < kClassRootsMax; i++) {
+    if (class_roots->Get(i) != GetClassRoot(static_cast<ClassRoot>(i))) {
+      *error_msg = "App image class roots must have pointer equality with runtime ones.";
+      return false;
+    }
+  }
+  if (oat_file->GetOatHeader().GetDexFileCount() !=
+      static_cast<uint32_t>(dex_caches->GetLength())) {
+    *error_msg = "Dex cache count and dex file count mismatch while trying to initialize from "
+                 "image";
+    return false;
+  }
+
+  StackHandleScope<1> hs2(self);
+  MutableHandle<mirror::DexCache> h_dex_cache(hs2.NewHandle<mirror::DexCache>(nullptr));
+  for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
+    h_dex_cache.Assign(dex_caches->Get(i));
+    std::string dex_file_location(h_dex_cache->GetLocation()->ToModifiedUtf8());
+    // TODO: Only store qualified paths.
+    // If non qualified, qualify it.
+    if (dex_file_location.find('/') == std::string::npos) {
+      std::string dex_location_path = dex_location;
+      const size_t pos = dex_location_path.find_last_of('/');
+      CHECK_NE(pos, std::string::npos);
+      dex_location_path = dex_location_path.substr(0, pos + 1);  // Keep trailing '/'
+      dex_file_location = dex_location_path + dex_file_location;
+    }
+    std::unique_ptr<const DexFile> dex_file = OpenOatDexFile(oat_file,
+                                                             dex_file_location.c_str(),
+                                                             error_msg);
+    if (dex_file == nullptr) {
+      return false;
+    }
+
+    if (app_image) {
+      // The current dex file field is bogus, overwrite it so that we can get the dex file in the
+      // loop below.
+      h_dex_cache->SetDexFile(dex_file.get());
+      // Check that each class loader resolved the same way.
+      // TODO: Store image class loaders as image roots.
+      GcRoot<mirror::Class>* const types = h_dex_cache->GetResolvedTypes();
+      for (int32_t j = 0, num_types = h_dex_cache->NumResolvedTypes(); j < num_types; j++) {
+        mirror::Class* klass = types[j].Read();
+        if (klass != nullptr) {
+          DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
+          mirror::ClassLoader* image_class_loader = klass->GetClassLoader();
+          image_class_loaders.insert(image_class_loader);
+        }
+      }
+    } else {
+      if (kSanityCheckObjects) {
+        SanityCheckArtMethodPointerArray(h_dex_cache->GetResolvedMethods(),
+                                         h_dex_cache->NumResolvedMethods(),
+                                         image_pointer_size_,
+                                         heap->GetBootImageSpaces());
+      }
+      // Register dex files, keep track of existing ones that are conflicts.
+      AppendToBootClassPath(*dex_file.get(), h_dex_cache);
+    }
+    out_dex_files->push_back(std::move(dex_file));
+  }
+
+  if (app_image) {
+    ScopedObjectAccessUnchecked soa(Thread::Current());
+    // Check that the class loader resolves the same way as the ones in the image.
+    // Image class loader [A][B][C][image dex files]
+    // Class loader = [???][dex_elements][image dex files]
+    // Need to ensure that [???][dex_elements] == [A][B][C].
+    // For each class loader, PathClassLoader, the laoder checks the parent first. Also the logic
+    // for PathClassLoader does this by looping through the array of dex files. To ensure they
+    // resolve the same way, simply flatten the hierarchy in the way the resolution order would be,
+    // and check that the dex file names are the same.
+    for (mirror::ClassLoader* image_class_loader : image_class_loaders) {
+      if (IsBootClassLoader(soa, image_class_loader)) {
+        // The dex cache can reference types from the boot class loader.
+        continue;
+      }
+      std::list<mirror::String*> image_dex_file_names;
+      std::string temp_error_msg;
+      if (!FlattenPathClassLoader(image_class_loader, &image_dex_file_names, &temp_error_msg)) {
+        *error_msg = StringPrintf("Failed to flatten image class loader hierarchy '%s'",
+                                  temp_error_msg.c_str());
+        return false;
+      }
+      std::list<mirror::String*> loader_dex_file_names;
+      if (!FlattenPathClassLoader(class_loader.Get(), &loader_dex_file_names, &temp_error_msg)) {
+        *error_msg = StringPrintf("Failed to flatten class loader hierarchy '%s'",
+                                  temp_error_msg.c_str());
+        return false;
+      }
+      // Add the temporary dex path list elements at the end.
+      auto* elements = soa.Decode<mirror::ObjectArray<mirror::Object>*>(dex_elements);
+      for (size_t i = 0, num_elems = elements->GetLength(); i < num_elems; ++i) {
+        mirror::Object* element = elements->GetWithoutChecks(i);
+        if (element != nullptr) {
+          // If we are somewhere in the middle of the array, there may be nulls at the end.
+          loader_dex_file_names.push_back(GetDexPathListElementName(soa, element));
+        }
+      }
+      // Ignore the number of image dex files since we are adding those to the class loader anyways.
+      CHECK_GE(static_cast<size_t>(image_dex_file_names.size()),
+               static_cast<size_t>(dex_caches->GetLength()));
+      size_t image_count = image_dex_file_names.size() - dex_caches->GetLength();
+      // Check that the dex file names match.
+      bool equal = image_count == loader_dex_file_names.size();
+      if (equal) {
+        auto it1 = image_dex_file_names.begin();
+        auto it2 = loader_dex_file_names.begin();
+        for (size_t i = 0; equal && i < image_count; ++i, ++it1, ++it2) {
+          equal = equal && (*it1)->Equals(*it2);
+        }
+      }
+      if (!equal) {
+        VLOG(image) << "Image dex files " << image_dex_file_names.size();
+        for (mirror::String* name : image_dex_file_names) {
+          VLOG(image) << name->ToModifiedUtf8();
+        }
+        VLOG(image) << "Loader dex files " << loader_dex_file_names.size();
+        for (mirror::String* name : loader_dex_file_names) {
+          VLOG(image) << name->ToModifiedUtf8();
+        }
+        *error_msg = "Rejecting application image due to class loader mismatch";
+        // Ignore class loader mismatch for now since these would just use possibly incorrect
+        // oat code anyways. The structural class check should be done in the parent.
+      }
+    }
+  }
+
+  if (kSanityCheckObjects) {
+    for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
+      auto* dex_cache = dex_caches->Get(i);
+      for (size_t j = 0; j < dex_cache->NumResolvedFields(); ++j) {
+        auto* field = dex_cache->GetResolvedField(j, image_pointer_size_);
+        if (field != nullptr) {
+          CHECK(field->GetDeclaringClass()->GetClass() != nullptr);
+        }
+      }
+    }
+    if (!app_image) {
+      heap->VisitObjects(SanityCheckObjectsCallback, nullptr);
+    }
+  }
+
+  // Set entry point to interpreter if in InterpretOnly mode.
+  if (!runtime->IsAotCompiler() && runtime->GetInstrumentation()->InterpretOnly()) {
+    SetInterpreterEntrypointArtMethodVisitor visitor(image_pointer_size_);
+    header.VisitPackedArtMethods(&visitor, space->Begin(), image_pointer_size_);
+  }
+
+  ClassTable* class_table = nullptr;
+  {
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    class_table = InsertClassTableForClassLoader(class_loader.Get());
+  }
+  // If we have a class table section, read it and use it for verification in
+  // UpdateAppImageClassLoadersAndDexCaches.
+  ClassTable::ClassSet temp_set;
+  const ImageSection& class_table_section = header.GetImageSection(ImageHeader::kSectionClassTable);
+  const bool added_class_table = class_table_section.Size() > 0u;
+  if (added_class_table) {
+    const uint64_t start_time2 = NanoTime();
+    size_t read_count = 0;
+    temp_set = ClassTable::ClassSet(space->Begin() + class_table_section.Offset(),
+                                    /*make copy*/false,
+                                    &read_count);
+    if (!app_image) {
+      dex_cache_boot_image_class_lookup_required_ = false;
+    }
+    VLOG(image) << "Adding class table classes took " << PrettyDuration(NanoTime() - start_time2);
+  }
+  if (app_image) {
+    bool forward_dex_cache_arrays = false;
+    if (!UpdateAppImageClassLoadersAndDexCaches(space,
+                                                class_loader,
+                                                dex_caches,
+                                                added_class_table ? &temp_set : nullptr,
+                                                /*out*/&forward_dex_cache_arrays,
+                                                /*out*/error_msg)) {
+      return false;
+    }
+    // Update class loader and resolved strings. If added_class_table is false, the resolved
+    // strings were forwarded UpdateAppImageClassLoadersAndDexCaches.
+    UpdateClassLoaderAndResolvedStringsVisitor visitor(space,
+                                                       class_loader.Get(),
+                                                       forward_dex_cache_arrays);
+    if (added_class_table) {
+      for (GcRoot<mirror::Class>& root : temp_set) {
+        visitor(root.Read());
+      }
+    }
+    // forward_dex_cache_arrays is true iff we copied all of the dex cache arrays into the .bss.
+    // In this case, madvise away the dex cache arrays section of the image to reduce RAM usage and
+    // mark as PROT_NONE to catch any invalid accesses.
+    if (forward_dex_cache_arrays) {
+      const ImageSection& dex_cache_section = header.GetImageSection(
+          ImageHeader::kSectionDexCacheArrays);
+      uint8_t* section_begin = AlignUp(space->Begin() + dex_cache_section.Offset(), kPageSize);
+      uint8_t* section_end = AlignDown(space->Begin() + dex_cache_section.End(), kPageSize);
+      if (section_begin < section_end) {
+        madvise(section_begin, section_end - section_begin, MADV_DONTNEED);
+        mprotect(section_begin, section_end - section_begin, PROT_NONE);
+        VLOG(image) << "Released and protected dex cache array image section from "
+                    << reinterpret_cast<const void*>(section_begin) << "-"
+                    << reinterpret_cast<const void*>(section_end);
+      }
+    }
+  }
+  if (added_class_table) {
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    class_table->AddClassSet(std::move(temp_set));
+  }
+  if (kIsDebugBuild && app_image) {
+    // This verification needs to happen after the classes have been added to the class loader.
+    // Since it ensures classes are in the class table.
+    VerifyClassInTableArtMethodVisitor visitor2(class_table);
+    header.VisitPackedArtMethods(&visitor2, space->Begin(), kRuntimePointerSize);
+  }
+  VLOG(class_linker) << "Adding image space took " << PrettyDuration(NanoTime() - start_time);
+  return true;
 }
 
 bool ClassLinker::ClassInClassTable(mirror::Class* klass) {
@@ -1040,7 +1896,7 @@
   void Visit(mirror::ClassLoader* class_loader)
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_) OVERRIDE {
     ClassTable* const class_table = class_loader->GetClassTable();
-    if (!done_ && class_table != nullptr && !class_table->Visit(visitor_)) {
+    if (!done_ && class_table != nullptr && !class_table->Visit(*visitor_)) {
       // If the visitor ClassTable returns false it means that we don't need to continue.
       done_ = true;
     }
@@ -1053,15 +1909,15 @@
 };
 
 void ClassLinker::VisitClassesInternal(ClassVisitor* visitor) {
-  if (boot_class_table_.Visit(visitor)) {
+  if (boot_class_table_.Visit(*visitor)) {
     VisitClassLoaderClassesVisitor loader_visitor(visitor);
     VisitClassLoaders(&loader_visitor);
   }
 }
 
 void ClassLinker::VisitClasses(ClassVisitor* visitor) {
-  if (dex_cache_image_class_lookup_required_) {
-    MoveImageClassesToClassTable();
+  if (dex_cache_boot_image_class_lookup_required_) {
+    AddBootImageClassesToClassTable();
   }
   Thread* const self = Thread::Current();
   ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
@@ -1076,7 +1932,7 @@
 
 class GetClassesInToVector : public ClassVisitor {
  public:
-  bool Visit(mirror::Class* klass) OVERRIDE {
+  bool operator()(mirror::Class* klass) OVERRIDE {
     classes_.push_back(klass);
     return true;
   }
@@ -1088,7 +1944,7 @@
   explicit GetClassInToObjectArray(mirror::ObjectArray<mirror::Class>* arr)
       : arr_(arr), index_(0) {}
 
-  bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     ++index_;
     if (index_ <= arr_->GetLength()) {
       arr_->Set(index_ - 1, klass);
@@ -1109,16 +1965,17 @@
 void ClassLinker::VisitClassesWithoutClassesLock(ClassVisitor* visitor) {
   // TODO: it may be possible to avoid secondary storage if we iterate over dex caches. The problem
   // is avoiding duplicates.
+  Thread* const self = Thread::Current();
   if (!kMovingClasses) {
+    ScopedAssertNoThreadSuspension nts(self, __FUNCTION__);
     GetClassesInToVector accumulator;
     VisitClasses(&accumulator);
     for (mirror::Class* klass : accumulator.classes_) {
-      if (!visitor->Visit(klass)) {
+      if (!visitor->operator()(klass)) {
         return;
       }
     }
   } else {
-    Thread* const self = Thread::Current();
     StackHandleScope<1> hs(self);
     auto classes = hs.NewHandle<mirror::ObjectArray<mirror::Class>>(nullptr);
     // We size the array assuming classes won't be added to the class table during the visit.
@@ -1146,7 +2003,7 @@
       // the class table grew then the loop repeats. If classes are created after the loop has
       // finished then we don't visit.
       mirror::Class* klass = classes->Get(i);
-      if (klass != nullptr && !visitor->Visit(klass)) {
+      if (klass != nullptr && !visitor->operator()(klass)) {
         return;
       }
     }
@@ -1174,19 +2031,32 @@
   mirror::LongArray::ResetArrayClass();
   mirror::ShortArray::ResetArrayClass();
   Thread* const self = Thread::Current();
-  JavaVMExt* const vm = Runtime::Current()->GetJavaVM();
   for (const ClassLoaderData& data : class_loaders_) {
-    vm->DeleteWeakGlobalRef(self, data.weak_root);
-    delete data.allocator;
-    delete data.class_table;
+    DeleteClassLoader(self, data);
   }
   class_loaders_.clear();
 }
 
+void ClassLinker::DeleteClassLoader(Thread* self, const ClassLoaderData& data) {
+  Runtime* const runtime = Runtime::Current();
+  JavaVMExt* const vm = runtime->GetJavaVM();
+  vm->DeleteWeakGlobalRef(self, data.weak_root);
+  // Notify the JIT that we need to remove the methods and/or profiling info.
+  if (runtime->GetJit() != nullptr) {
+    jit::JitCodeCache* code_cache = runtime->GetJit()->GetCodeCache();
+    if (code_cache != nullptr) {
+      code_cache->RemoveMethodsIn(self, *data.allocator);
+    }
+  }
+  delete data.allocator;
+  delete data.class_table;
+}
+
 mirror::PointerArray* ClassLinker::AllocPointerArray(Thread* self, size_t length) {
-  return down_cast<mirror::PointerArray*>(image_pointer_size_ == 8u ?
-      static_cast<mirror::Array*>(mirror::LongArray::Alloc(self, length)) :
-      static_cast<mirror::Array*>(mirror::IntArray::Alloc(self, length)));
+  return down_cast<mirror::PointerArray*>(
+      image_pointer_size_ == PointerSize::k64
+          ? static_cast<mirror::Array*>(mirror::LongArray::Alloc(self, length))
+          : static_cast<mirror::Array*>(mirror::IntArray::Alloc(self, length)));
 }
 
 mirror::DexCache* ClassLinker::AllocDexCache(Thread* self,
@@ -1211,23 +2081,52 @@
     raw_arrays = dex_file.GetOatDexFile()->GetDexCacheArrays();
   } else if (dex_file.NumStringIds() != 0u || dex_file.NumTypeIds() != 0u ||
       dex_file.NumMethodIds() != 0u || dex_file.NumFieldIds() != 0u) {
-    // NOTE: We "leak" the raw_arrays because we never destroy the dex cache.
-    DCHECK(image_pointer_size_ == 4u || image_pointer_size_ == 8u);
     // Zero-initialized.
     raw_arrays = reinterpret_cast<uint8_t*>(linear_alloc->Alloc(self, layout.Size()));
   }
-  GcRoot<mirror::String>* strings = (dex_file.NumStringIds() == 0u) ? nullptr :
-      reinterpret_cast<GcRoot<mirror::String>*>(raw_arrays + layout.StringsOffset());
+  mirror::StringDexCacheType* strings = (dex_file.NumStringIds() == 0u) ? nullptr :
+      reinterpret_cast<mirror::StringDexCacheType*>(raw_arrays + layout.StringsOffset());
   GcRoot<mirror::Class>* types = (dex_file.NumTypeIds() == 0u) ? nullptr :
       reinterpret_cast<GcRoot<mirror::Class>*>(raw_arrays + layout.TypesOffset());
   ArtMethod** methods = (dex_file.NumMethodIds() == 0u) ? nullptr :
       reinterpret_cast<ArtMethod**>(raw_arrays + layout.MethodsOffset());
   ArtField** fields = (dex_file.NumFieldIds() == 0u) ? nullptr :
       reinterpret_cast<ArtField**>(raw_arrays + layout.FieldsOffset());
+  size_t num_strings = mirror::DexCache::kDexCacheStringCacheSize;
+  if (dex_file.NumStringIds() < num_strings) {
+    num_strings = dex_file.NumStringIds();
+  }
+  DCHECK_ALIGNED(raw_arrays, alignof(mirror::StringDexCacheType)) <<
+                 "Expected raw_arrays to align to StringDexCacheType.";
+  DCHECK_ALIGNED(layout.StringsOffset(), alignof(mirror::StringDexCacheType)) <<
+                 "Expected StringsOffset() to align to StringDexCacheType.";
+  DCHECK_ALIGNED(strings, alignof(mirror::StringDexCacheType)) <<
+                 "Expected strings to align to StringDexCacheType.";
+  static_assert(alignof(mirror::StringDexCacheType) == 8u,
+                "Expected StringDexCacheType to have align of 8.");
+  if (kIsDebugBuild) {
+    // Sanity check to make sure all the dex cache arrays are empty. b/28992179
+    for (size_t i = 0; i < num_strings; ++i) {
+      CHECK_EQ(strings[i].load(std::memory_order_relaxed).string_index, 0u);
+      CHECK(strings[i].load(std::memory_order_relaxed).string_pointer.IsNull());
+    }
+    for (size_t i = 0; i < dex_file.NumTypeIds(); ++i) {
+      CHECK(types[i].Read<kWithoutReadBarrier>() == nullptr);
+    }
+    for (size_t i = 0; i < dex_file.NumMethodIds(); ++i) {
+      CHECK(mirror::DexCache::GetElementPtrSize(methods, i, image_pointer_size_) == nullptr);
+    }
+    for (size_t i = 0; i < dex_file.NumFieldIds(); ++i) {
+      CHECK(mirror::DexCache::GetElementPtrSize(fields, i, image_pointer_size_) == nullptr);
+    }
+  }
+  if (strings != nullptr) {
+    mirror::StringDexCachePair::Initialize(strings);
+  }
   dex_cache->Init(&dex_file,
                   location.Get(),
                   strings,
-                  dex_file.NumStringIds(),
+                  num_strings,
                   types,
                   dex_file.NumTypeIds(),
                   methods,
@@ -1294,20 +2193,37 @@
   }
 
   // Wait for the class if it has not already been linked.
-  if (!klass->IsResolved() && !klass->IsErroneous()) {
+  size_t index = 0;
+  // Maximum number of yield iterations until we start sleeping.
+  static const size_t kNumYieldIterations = 1000;
+  // How long each sleep is in us.
+  static const size_t kSleepDurationUS = 1000;  // 1 ms.
+  while (!klass->IsResolved() && !klass->IsErroneous()) {
     StackHandleScope<1> hs(self);
     HandleWrapper<mirror::Class> h_class(hs.NewHandleWrapper(&klass));
-    ObjectLock<mirror::Class> lock(self, h_class);
-    // Check for circular dependencies between classes.
-    if (!h_class->IsResolved() && h_class->GetClinitThreadId() == self->GetTid()) {
-      ThrowClassCircularityError(h_class.Get());
-      mirror::Class::SetStatus(h_class, mirror::Class::kStatusError, self);
-      return nullptr;
+    {
+      ObjectTryLock<mirror::Class> lock(self, h_class);
+      // Can not use a monitor wait here since it may block when returning and deadlock if another
+      // thread has locked klass.
+      if (lock.Acquired()) {
+        // Check for circular dependencies between classes, the lock is required for SetStatus.
+        if (!h_class->IsResolved() && h_class->GetClinitThreadId() == self->GetTid()) {
+          ThrowClassCircularityError(h_class.Get());
+          mirror::Class::SetStatus(h_class, mirror::Class::kStatusError, self);
+          return nullptr;
+        }
+      }
     }
-    // Wait for the pending initialization to complete.
-    while (!h_class->IsResolved() && !h_class->IsErroneous()) {
-      lock.WaitIgnoringInterrupts();
+    {
+      // Handle wrapper deals with klass moving.
+      ScopedThreadSuspension sts(self, kSuspended);
+      if (index < kNumYieldIterations) {
+        sched_yield();
+      } else {
+        usleep(kSleepDurationUS);
+      }
     }
+    ++index;
   }
 
   if (klass->IsErroneous()) {
@@ -1334,14 +2250,6 @@
   return ClassPathEntry(nullptr, nullptr);
 }
 
-static bool IsBootClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
-                              mirror::ClassLoader* class_loader)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  return class_loader == nullptr ||
-      class_loader->GetClass() ==
-          soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader);
-}
-
 bool ClassLinker::FindClassInPathClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
                                              Thread* self,
                                              const char* descriptor,
@@ -1357,8 +2265,12 @@
       if (klass != nullptr) {
         *result = EnsureResolved(self, descriptor, klass);
       } else {
-        *result = DefineClass(self, descriptor, hash, NullHandle<mirror::ClassLoader>(),
-                              *pair.first, *pair.second);
+        *result = DefineClass(self,
+                              descriptor,
+                              hash,
+                              ScopedNullHandle<mirror::ClassLoader>(),
+                              *pair.first,
+                              *pair.second);
       }
       if (*result == nullptr) {
         CHECK(self->IsExceptionPending()) << descriptor;
@@ -1483,7 +2395,11 @@
     // The boot class loader, search the boot class path.
     ClassPathEntry pair = FindInClassPath(descriptor, hash, boot_class_path_);
     if (pair.second != nullptr) {
-      return DefineClass(self, descriptor, hash, NullHandle<mirror::ClassLoader>(), *pair.first,
+      return DefineClass(self,
+                         descriptor,
+                         hash,
+                         ScopedNullHandle<mirror::ClassLoader>(),
+                         *pair.first,
                          *pair.second);
     } else {
       // The boot class loader is searched ahead of the application class loader, failures are
@@ -1585,9 +2501,7 @@
     self->AssertPendingOOMException();
     return nullptr;
   }
-  mirror::DexCache* dex_cache = RegisterDexFile(
-      dex_file,
-      GetOrCreateAllocatorForClassLoader(class_loader.Get()));
+  mirror::DexCache* dex_cache = RegisterDexFile(dex_file, class_loader.Get());
   if (dex_cache == nullptr) {
     self->AssertPendingOOMException();
     return nullptr;
@@ -1619,6 +2533,7 @@
   // inserted before we allocate / fill in these fields.
   LoadClass(self, dex_file, dex_class_def, klass);
   if (self->IsExceptionPending()) {
+    VLOG(class_linker) << self->GetException()->Dump();
     // An exception occured during load, set status to erroneous while holding klass' lock in case
     // notification is necessary.
     if (!klass->IsErroneous()) {
@@ -1654,6 +2569,10 @@
   CHECK(h_new_class.Get() != nullptr) << descriptor;
   CHECK(h_new_class->IsResolved()) << descriptor;
 
+  // Update the dex cache of where the class is defined. Inlining depends on having
+  // this filled.
+  h_new_class->GetDexCache()->SetResolvedType(h_new_class->GetDexTypeIndex(), h_new_class.Get());
+
   // Instrumentation may have updated entrypoints for all methods of all
   // classes. However it could not update methods of this class while we
   // were loading it. Now the class is resolved, we can update entrypoints
@@ -1679,6 +2598,9 @@
    */
   Dbg::PostClassPrepare(h_new_class.Get());
 
+  // Notify native debugger of the new class and its layout.
+  jit::Jit::NewTypeLoadedIfUsingJit(h_new_class.Get());
+
   return h_new_class.Get();
 }
 
@@ -1691,8 +2613,18 @@
   size_t num_32 = 0;
   size_t num_64 = 0;
   if (class_data != nullptr) {
+    // We allow duplicate definitions of the same field in a class_data_item
+    // but ignore the repeated indexes here, b/21868015.
+    uint32_t last_field_idx = DexFile::kDexNoIndex;
     for (ClassDataItemIterator it(dex_file, class_data); it.HasNextStaticField(); it.Next()) {
-      const DexFile::FieldId& field_id = dex_file.GetFieldId(it.GetMemberIndex());
+      uint32_t field_idx = it.GetMemberIndex();
+      // Ordering enforced by DexFileVerifier.
+      DCHECK(last_field_idx == DexFile::kDexNoIndex || last_field_idx <= field_idx);
+      if (UNLIKELY(field_idx == last_field_idx)) {
+        continue;
+      }
+      last_field_idx = field_idx;
+      const DexFile::FieldId& field_id = dex_file.GetFieldId(field_idx);
       const char* descriptor = dex_file.GetFieldTypeDescriptor(field_id);
       char c = descriptor[0];
       switch (c) {
@@ -1792,12 +2724,10 @@
     // We're invoking a virtual method directly (thanks to sharpening), compute the oat_method_index
     // by search for its position in the declared virtual methods.
     oat_method_index = declaring_class->NumDirectMethods();
-    size_t end = declaring_class->NumVirtualMethods();
     bool found_virtual = false;
-    for (size_t i = 0; i < end; i++) {
+    for (ArtMethod& art_method : declaring_class->GetVirtualMethods(image_pointer_size_)) {
       // Check method index instead of identity in case of duplicate method definitions.
-      if (method->GetDexMethodIndex() ==
-          declaring_class->GetVirtualMethod(i, image_pointer_size_)->GetDexMethodIndex()) {
+      if (method->GetDexMethodIndex() == art_method.GetDexMethodIndex()) {
         found_virtual = true;
         break;
       }
@@ -1821,7 +2751,7 @@
 
 // Special case to get oat code without overwriting a trampoline.
 const void* ClassLinker::GetQuickOatCodeFor(ArtMethod* method) {
-  CHECK(!method->IsAbstract()) << PrettyMethod(method);
+  CHECK(method->IsInvokable()) << PrettyMethod(method);
   if (method->IsProxyMethod()) {
     return GetQuickProxyInvokeHandler();
   }
@@ -1833,13 +2763,6 @@
       return code;
     }
   }
-  jit::Jit* const jit = Runtime::Current()->GetJit();
-  if (jit != nullptr) {
-    auto* code = jit->GetCodeCache()->GetCodeFor(method);
-    if (code != nullptr) {
-      return code;
-    }
-  }
   if (method->IsNative()) {
     // No code and native? Use generic trampoline.
     return GetQuickGenericJniStub();
@@ -1848,7 +2771,7 @@
 }
 
 const void* ClassLinker::GetOatMethodQuickCodeFor(ArtMethod* method) {
-  if (method->IsNative() || method->IsAbstract() || method->IsProxyMethod()) {
+  if (method->IsNative() || !method->IsInvokable() || method->IsProxyMethod()) {
     return nullptr;
   }
   bool found;
@@ -1856,41 +2779,50 @@
   if (found) {
     return oat_method.GetQuickCode();
   }
-  jit::Jit* jit = Runtime::Current()->GetJit();
-  if (jit != nullptr) {
-    auto* code = jit->GetCodeCache()->GetCodeFor(method);
-    if (code != nullptr) {
-      return code;
-    }
-  }
   return nullptr;
 }
 
-const void* ClassLinker::GetQuickOatCodeFor(const DexFile& dex_file,
-                                            uint16_t class_def_idx,
-                                            uint32_t method_idx) {
-  bool found;
-  OatFile::OatClass oat_class = FindOatClass(dex_file, class_def_idx, &found);
-  if (!found) {
-    return nullptr;
+bool ClassLinker::ShouldUseInterpreterEntrypoint(ArtMethod* method, const void* quick_code) {
+  if (UNLIKELY(method->IsNative() || method->IsProxyMethod())) {
+    return false;
   }
-  uint32_t oat_method_idx = GetOatMethodIndexFromMethodIndex(dex_file, class_def_idx, method_idx);
-  return oat_class.GetOatMethod(oat_method_idx).GetQuickCode();
-}
 
-// Returns true if the method must run with interpreter, false otherwise.
-static bool NeedsInterpreter(ArtMethod* method, const void* quick_code)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
   if (quick_code == nullptr) {
-    // No code: need interpreter.
-    // May return true for native code, in the case of generic JNI
-    // DCHECK(!method->IsNative());
     return true;
   }
-  // If interpreter mode is enabled, every method (except native and proxy) must
-  // be run with interpreter.
-  return Runtime::Current()->GetInstrumentation()->InterpretOnly() &&
-         !method->IsNative() && !method->IsProxyMethod();
+
+  Runtime* runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  if (instr->InterpretOnly()) {
+    return true;
+  }
+
+  if (runtime->GetClassLinker()->IsQuickToInterpreterBridge(quick_code)) {
+    // Doing this check avoids doing compiled/interpreter transitions.
+    return true;
+  }
+
+  if (Dbg::IsForcedInterpreterNeededForCalling(Thread::Current(), method)) {
+    // Force the use of interpreter when it is required by the debugger.
+    return true;
+  }
+
+  if (runtime->IsNativeDebuggable()) {
+    DCHECK(runtime->UseJitCompilation() && runtime->GetJit()->JitAtFirstUse());
+    // If we are doing native debugging, ignore application's AOT code,
+    // since we want to JIT it with extra stackmaps for native debugging.
+    // On the other hand, keep all AOT code from the boot image, since the
+    // blocking JIT would results in non-negligible performance impact.
+    return !runtime->GetHeap()->IsInBootImageOatFile(quick_code);
+  }
+
+  if (Dbg::IsDebuggerActive()) {
+    // Boot image classes may be AOT-compiled as non-debuggable.
+    // This is not suitable for the Java debugger, so ignore the AOT code.
+    return runtime->GetHeap()->IsInBootImageOatFile(quick_code);
+  }
+
+  return false;
 }
 
 void ClassLinker::FixupStaticTrampolines(mirror::Class* klass) {
@@ -1900,7 +2832,7 @@
   }
   Runtime* runtime = Runtime::Current();
   if (!runtime->IsStarted()) {
-    if (runtime->IsAotCompiler() || runtime->GetHeap()->HasImageSpace()) {
+    if (runtime->IsAotCompiler() || runtime->GetHeap()->HasBootImageSpace()) {
       return;  // OAT file unavailable.
     }
   }
@@ -1935,21 +2867,25 @@
       OatFile::OatMethod oat_method = oat_class.GetOatMethod(method_index);
       quick_code = oat_method.GetQuickCode();
     }
-    const bool enter_interpreter = NeedsInterpreter(method, quick_code);
-    if (enter_interpreter) {
+    // Check whether the method is native, in which case it's generic JNI.
+    if (quick_code == nullptr && method->IsNative()) {
+      quick_code = GetQuickGenericJniStub();
+    } else if (ShouldUseInterpreterEntrypoint(method, quick_code)) {
       // Use interpreter entry point.
-      // Check whether the method is native, in which case it's generic JNI.
-      if (quick_code == nullptr && method->IsNative()) {
-        quick_code = GetQuickGenericJniStub();
-      } else {
-        quick_code = GetQuickToInterpreterBridge();
-      }
+      quick_code = GetQuickToInterpreterBridge();
     }
     runtime->GetInstrumentation()->UpdateMethodsCode(method, quick_code);
   }
   // Ignore virtual methods on the iterator.
 }
 
+void ClassLinker::EnsureThrowsInvocationError(ArtMethod* method) {
+  DCHECK(method != nullptr);
+  DCHECK(!method->IsInvokable());
+  method->SetEntryPointFromQuickCompiledCodePtrSize(quick_to_interpreter_bridge_trampoline_,
+                                                    image_pointer_size_);
+}
+
 void ClassLinker::LinkCode(ArtMethod* method, const OatFile::OatClass* oat_class,
                            uint32_t class_def_method_index) {
   Runtime* const runtime = Runtime::Current();
@@ -1967,10 +2903,11 @@
   }
 
   // Install entry point from interpreter.
-  bool enter_interpreter = NeedsInterpreter(method, method->GetEntryPointFromQuickCompiledCode());
+  const void* quick_code = method->GetEntryPointFromQuickCompiledCode();
+  bool enter_interpreter = ShouldUseInterpreterEntrypoint(method, quick_code);
 
-  if (method->IsAbstract()) {
-    method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
+  if (!method->IsInvokable()) {
+    EnsureThrowsInvocationError(method);
     return;
   }
 
@@ -1979,20 +2916,18 @@
     // It will be replaced by the proper entry point by ClassLinker::FixupStaticTrampolines
     // after initializing class (see ClassLinker::InitializeClass method).
     method->SetEntryPointFromQuickCompiledCode(GetQuickResolutionStub());
+  } else if (quick_code == nullptr && method->IsNative()) {
+    method->SetEntryPointFromQuickCompiledCode(GetQuickGenericJniStub());
   } else if (enter_interpreter) {
-    if (!method->IsNative()) {
-      // Set entry point from compiled code if there's no code or in interpreter only mode.
-      method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
-    } else {
-      method->SetEntryPointFromQuickCompiledCode(GetQuickGenericJniStub());
-    }
+    // Set entry point from compiled code if there's no code or in interpreter only mode.
+    method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
   }
 
   if (method->IsNative()) {
     // Unregistering restores the dlsym lookup stub.
     method->UnregisterNative();
 
-    if (enter_interpreter) {
+    if (enter_interpreter || quick_code == nullptr) {
       // We have a native method here without code. Then it should have either the generic JNI
       // trampoline as entrypoint (non-static), or the resolution trampoline (static).
       // TODO: this doesn't handle all the cases where trampolines may be installed.
@@ -2097,8 +3032,9 @@
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
   LinearAlloc* allocator = class_loader->GetAllocator();
   if (allocator == nullptr) {
-    allocator = Runtime::Current()->CreateLinearAlloc();
-    class_loader->SetAllocator(allocator);
+    RegisterClassLoader(class_loader);
+    allocator = class_loader->GetAllocator();
+    CHECK(allocator != nullptr);
   }
   return allocator;
 }
@@ -2132,8 +3068,6 @@
         last_field_idx = field_idx;
       }
     }
-    klass->SetSFieldsPtr(sfields);
-    DCHECK_EQ(klass->NumStaticFields(), num_sfields);
     // Load instance fields.
     LengthPrefixedArray<ArtField>* ifields = AllocArtFieldArray(self,
                                                                 allocator,
@@ -2155,16 +3089,28 @@
       LOG(WARNING) << "Duplicate fields in class " << PrettyDescriptor(klass.Get())
           << " (unique static fields: " << num_sfields << "/" << it.NumStaticFields()
           << ", unique instance fields: " << num_ifields << "/" << it.NumInstanceFields() << ")";
-      // NOTE: Not shrinking the over-allocated sfields/ifields.
+      // NOTE: Not shrinking the over-allocated sfields/ifields, just setting size.
+      if (sfields != nullptr) {
+        sfields->SetSize(num_sfields);
+      }
+      if (ifields != nullptr) {
+        ifields->SetSize(num_ifields);
+      }
     }
+    // Set the field arrays.
+    klass->SetSFieldsPtr(sfields);
+    DCHECK_EQ(klass->NumStaticFields(), num_sfields);
     klass->SetIFieldsPtr(ifields);
     DCHECK_EQ(klass->NumInstanceFields(), num_ifields);
     // Load methods.
-    klass->SetDirectMethodsPtr(AllocArtMethodArray(self, allocator, it.NumDirectMethods()));
-    klass->SetVirtualMethodsPtr(AllocArtMethodArray(self, allocator, it.NumVirtualMethods()));
+    klass->SetMethodsPtr(
+        AllocArtMethodArray(self, allocator, it.NumDirectMethods() + it.NumVirtualMethods()),
+        it.NumDirectMethods(),
+        it.NumVirtualMethods());
     size_t class_def_method_index = 0;
     uint32_t last_dex_method_index = DexFile::kDexNoIndex;
     size_t last_class_def_method_index = 0;
+    // TODO These should really use the iterators.
     for (size_t i = 0; it.HasNextDirectMethod(); i++, it.Next()) {
       ArtMethod* method = klass->GetDirectMethodUnchecked(i, image_pointer_size_);
       LoadMethod(self, dex_file, it, klass, method);
@@ -2282,25 +3228,44 @@
   Thread* const self = Thread::Current();
   dex_lock_.AssertExclusiveHeld(self);
   CHECK(dex_cache.Get() != nullptr) << dex_file.GetLocation();
-  CHECK(dex_cache->GetLocation()->Equals(dex_file.GetLocation()))
-      << dex_cache->GetLocation()->ToModifiedUtf8() << " " << dex_file.GetLocation();
+  // For app images, the dex cache location may be a suffix of the dex file location since the
+  // dex file location is an absolute path.
+  const std::string dex_cache_location = dex_cache->GetLocation()->ToModifiedUtf8();
+  const size_t dex_cache_length = dex_cache_location.length();
+  CHECK_GT(dex_cache_length, 0u) << dex_file.GetLocation();
+  std::string dex_file_location = dex_file.GetLocation();
+  CHECK_GE(dex_file_location.length(), dex_cache_length)
+      << dex_cache_location << " " << dex_file.GetLocation();
+  // Take suffix.
+  const std::string dex_file_suffix = dex_file_location.substr(
+      dex_file_location.length() - dex_cache_length,
+      dex_cache_length);
+  // Example dex_cache location is SettingsProvider.apk and
+  // dex file location is /system/priv-app/SettingsProvider/SettingsProvider.apk
+  CHECK_EQ(dex_cache_location, dex_file_suffix);
   // Clean up pass to remove null dex caches.
   // Null dex caches can occur due to class unloading and we are lazily removing null entries.
   JavaVMExt* const vm = self->GetJniEnv()->vm;
-  for (auto it = dex_caches_.begin(); it != dex_caches_.end();) {
-    mirror::Object* dex_cache_root = self->DecodeJObject(*it);
-    if (dex_cache_root == nullptr) {
-      vm->DeleteWeakGlobalRef(self, *it);
+  for (auto it = dex_caches_.begin(); it != dex_caches_.end(); ) {
+    DexCacheData data = *it;
+    if (self->IsJWeakCleared(data.weak_root)) {
+      vm->DeleteWeakGlobalRef(self, data.weak_root);
       it = dex_caches_.erase(it);
     } else {
       ++it;
     }
   }
-  dex_caches_.push_back(vm->AddWeakGlobalRef(self, dex_cache.Get()));
+  jweak dex_cache_jweak = vm->AddWeakGlobalRef(self, dex_cache.Get());
   dex_cache->SetDexFile(&dex_file);
+  DexCacheData data;
+  data.weak_root = dex_cache_jweak;
+  data.dex_file = dex_cache->GetDexFile();
+  data.resolved_types = dex_cache->GetResolvedTypes();
+  dex_caches_.push_back(data);
 }
 
-mirror::DexCache* ClassLinker::RegisterDexFile(const DexFile& dex_file, LinearAlloc* linear_alloc) {
+mirror::DexCache* ClassLinker::RegisterDexFile(const DexFile& dex_file,
+                                               mirror::ClassLoader* class_loader) {
   Thread* self = Thread::Current();
   {
     ReaderMutexLock mu(self, dex_lock_);
@@ -2309,21 +3274,31 @@
       return dex_cache;
     }
   }
+  LinearAlloc* const linear_alloc = GetOrCreateAllocatorForClassLoader(class_loader);
+  DCHECK(linear_alloc != nullptr);
+  ClassTable* table;
+  {
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    table = InsertClassTableForClassLoader(class_loader);
+  }
   // Don't alloc while holding the lock, since allocation may need to
   // suspend all threads and another thread may need the dex_lock_ to
   // get to a suspend point.
   StackHandleScope<1> hs(self);
   Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(AllocDexCache(self, dex_file, linear_alloc)));
-  WriterMutexLock mu(self, dex_lock_);
-  mirror::DexCache* dex_cache = FindDexCacheLocked(self, dex_file, true);
-  if (dex_cache != nullptr) {
-    return dex_cache;
+  {
+    WriterMutexLock mu(self, dex_lock_);
+    mirror::DexCache* dex_cache = FindDexCacheLocked(self, dex_file, true);
+    if (dex_cache != nullptr) {
+      return dex_cache;
+    }
+    if (h_dex_cache.Get() == nullptr) {
+      self->AssertPendingOOMException();
+      return nullptr;
+    }
+    RegisterDexFileLocked(dex_file, h_dex_cache);
   }
-  if (h_dex_cache.Get() == nullptr) {
-    self->AssertPendingOOMException();
-    return nullptr;
-  }
-  RegisterDexFileLocked(dex_file, h_dex_cache);
+  table->InsertStrongRoot(h_dex_cache.Get());
   return h_dex_cache.Get();
 }
 
@@ -2344,10 +3319,16 @@
                                                   const DexFile& dex_file,
                                                   bool allow_failure) {
   // Search assuming unique-ness of dex file.
-  for (jweak weak_root : dex_caches_) {
-    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
-    if (dex_cache != nullptr && dex_cache->GetDexFile() == &dex_file) {
-      return dex_cache;
+  for (const DexCacheData& data : dex_caches_) {
+    // Avoid decoding (and read barriers) other unrelated dex caches.
+    if (data.dex_file == &dex_file) {
+      mirror::DexCache* dex_cache =
+          down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+      if (dex_cache != nullptr) {
+        return dex_cache;
+      } else {
+        break;
+      }
     }
   }
   if (allow_failure) {
@@ -2355,8 +3336,8 @@
   }
   std::string location(dex_file.GetLocation());
   // Failure, dump diagnostic and abort.
-  for (jobject weak_root : dex_caches_) {
-    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
+  for (const DexCacheData& data : dex_caches_) {
+    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
     if (dex_cache != nullptr) {
       LOG(ERROR) << "Registered dex file " << dex_cache->GetDexFile()->GetLocation();
     }
@@ -2368,10 +3349,13 @@
 void ClassLinker::FixupDexCaches(ArtMethod* resolution_method) {
   Thread* const self = Thread::Current();
   ReaderMutexLock mu(self, dex_lock_);
-  for (jobject weak_root : dex_caches_) {
-    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
-    if (dex_cache != nullptr) {
-      dex_cache->Fixup(resolution_method, image_pointer_size_);
+  for (const DexCacheData& data : dex_caches_) {
+    if (!self->IsJWeakCleared(data.weak_root)) {
+      mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(
+          self->DecodeJObject(data.weak_root));
+      if (dex_cache != nullptr) {
+        dex_cache->Fixup(resolution_method, image_pointer_size_);
+      }
     }
   }
 }
@@ -2509,16 +3493,13 @@
     new_class->SetClassFlags(mirror::kClassFlagObjectArray);
   }
   mirror::Class::SetStatus(new_class, mirror::Class::kStatusLoaded, self);
-  {
-    ArtMethod* imt[mirror::Class::kImtSize];
-    std::fill_n(imt, arraysize(imt), Runtime::Current()->GetImtUnimplementedMethod());
-    new_class->PopulateEmbeddedImtAndVTable(imt, image_pointer_size_);
-  }
+  new_class->PopulateEmbeddedVTable(image_pointer_size_);
+  ImTable* object_imt = java_lang_Object->GetImt(image_pointer_size_);
+  new_class->SetImt(object_imt, image_pointer_size_);
   mirror::Class::SetStatus(new_class, mirror::Class::kStatusInitialized, self);
   // don't need to set new_class->SetObjectSize(..)
   // because Object::SizeOf delegates to Array::SizeOf
 
-
   // All arrays have java/lang/Cloneable and java/io/Serializable as
   // interfaces.  We need to set that up here, so that stuff like
   // "instanceof" works right.
@@ -2550,6 +3531,7 @@
 
   mirror::Class* existing = InsertClass(descriptor, new_class.Get(), hash);
   if (existing == nullptr) {
+    jit::Jit::NewTypeLoadedIfUsingJit(new_class.Get());
     return new_class.Get();
   }
   // Another thread must have loaded the class after we
@@ -2599,37 +3581,50 @@
     }
     LOG(INFO) << "Loaded class " << descriptor << source;
   }
-  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  mirror::ClassLoader* const class_loader = klass->GetClassLoader();
-  ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
-  mirror::Class* existing = class_table->Lookup(descriptor, hash);
-  if (existing != nullptr) {
-    return existing;
-  }
-  if (kIsDebugBuild && !klass->IsTemp() && class_loader == nullptr &&
-      dex_cache_image_class_lookup_required_) {
-    // Check a class loaded with the system class loader matches one in the image if the class
-    // is in the image.
-    existing = LookupClassFromImage(descriptor);
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+    mirror::ClassLoader* const class_loader = klass->GetClassLoader();
+    ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
+    mirror::Class* existing = class_table->Lookup(descriptor, hash);
     if (existing != nullptr) {
-      CHECK_EQ(klass, existing);
+      return existing;
+    }
+    if (kIsDebugBuild &&
+        !klass->IsTemp() &&
+        class_loader == nullptr &&
+        dex_cache_boot_image_class_lookup_required_) {
+      // Check a class loaded with the system class loader matches one in the image if the class
+      // is in the image.
+      existing = LookupClassFromBootImage(descriptor);
+      if (existing != nullptr) {
+        CHECK_EQ(klass, existing);
+      }
+    }
+    VerifyObject(klass);
+    class_table->InsertWithHash(klass, hash);
+    if (class_loader != nullptr) {
+      // This is necessary because we need to have the card dirtied for remembered sets.
+      Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
+    }
+    if (log_new_class_table_roots_) {
+      new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
     }
   }
-  VerifyObject(klass);
-  class_table->InsertWithHash(klass, hash);
-  if (class_loader != nullptr) {
-    // This is necessary because we need to have the card dirtied for remembered sets.
-    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
-  }
-  if (log_new_class_table_roots_) {
-    new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
+  if (kIsDebugBuild) {
+    // Test that copied methods correctly can find their holder.
+    for (ArtMethod& method : klass->GetCopiedMethods(image_pointer_size_)) {
+      CHECK_EQ(GetHoldingClassOfCopiedMethod(&method), klass);
+    }
   }
   return nullptr;
 }
 
-void ClassLinker::UpdateClassVirtualMethods(mirror::Class* klass,
-                                            LengthPrefixedArray<ArtMethod>* new_methods) {
-  klass->SetVirtualMethodsPtr(new_methods);
+// TODO This should really be in mirror::Class.
+void ClassLinker::UpdateClassMethods(mirror::Class* klass,
+                                     LengthPrefixedArray<ArtMethod>* new_methods) {
+  klass->SetMethodsPtrUnchecked(new_methods,
+                                klass->NumDirectMethods(),
+                                klass->NumDeclaredVirtualMethods());
   // Need to mark the card so that the remembered sets and mod union tables get updated.
   Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(klass);
 }
@@ -2654,11 +3649,11 @@
       }
     }
   }
-  if (class_loader != nullptr || !dex_cache_image_class_lookup_required_) {
+  if (class_loader != nullptr || !dex_cache_boot_image_class_lookup_required_) {
     return nullptr;
   }
   // Lookup failed but need to search dex_caches_.
-  mirror::Class* result = LookupClassFromImage(descriptor);
+  mirror::Class* result = LookupClassFromBootImage(descriptor);
   if (result != nullptr) {
     result = InsertClass(descriptor, result, hash);
   } else {
@@ -2667,53 +3662,67 @@
     // classes into the class table.
     constexpr uint32_t kMaxFailedDexCacheLookups = 1000;
     if (++failed_dex_cache_class_lookups_ > kMaxFailedDexCacheLookups) {
-      MoveImageClassesToClassTable();
+      AddBootImageClassesToClassTable();
     }
   }
   return result;
 }
 
-static mirror::ObjectArray<mirror::DexCache>* GetImageDexCaches()
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  gc::space::ImageSpace* image = Runtime::Current()->GetHeap()->GetImageSpace();
-  CHECK(image != nullptr);
-  mirror::Object* root = image->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
-  return root->AsObjectArray<mirror::DexCache>();
+static std::vector<mirror::ObjectArray<mirror::DexCache>*> GetImageDexCaches(
+    std::vector<gc::space::ImageSpace*> image_spaces) SHARED_REQUIRES(Locks::mutator_lock_) {
+  CHECK(!image_spaces.empty());
+  std::vector<mirror::ObjectArray<mirror::DexCache>*> dex_caches_vector;
+  for (gc::space::ImageSpace* image_space : image_spaces) {
+    mirror::Object* root = image_space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
+    DCHECK(root != nullptr);
+    dex_caches_vector.push_back(root->AsObjectArray<mirror::DexCache>());
+  }
+  return dex_caches_vector;
 }
 
-void ClassLinker::MoveImageClassesToClassTable() {
+void ClassLinker::AddBootImageClassesToClassTable() {
+  if (dex_cache_boot_image_class_lookup_required_) {
+    AddImageClassesToClassTable(Runtime::Current()->GetHeap()->GetBootImageSpaces(),
+                                /*class_loader*/nullptr);
+    dex_cache_boot_image_class_lookup_required_ = false;
+  }
+}
+
+void ClassLinker::AddImageClassesToClassTable(std::vector<gc::space::ImageSpace*> image_spaces,
+                                              mirror::ClassLoader* class_loader) {
   Thread* self = Thread::Current();
   WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-  if (!dex_cache_image_class_lookup_required_) {
-    return;  // All dex cache classes are already in the class table.
-  }
   ScopedAssertNoThreadSuspension ants(self, "Moving image classes to class table");
-  mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches();
+
+  ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
+
   std::string temp;
-  ClassTable* const class_table = InsertClassTableForClassLoader(nullptr);
-  for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
-    mirror::DexCache* dex_cache = dex_caches->Get(i);
-    GcRoot<mirror::Class>* types = dex_cache->GetResolvedTypes();
-    for (int32_t j = 0, num_types = dex_cache->NumResolvedTypes(); j < num_types; j++) {
-      mirror::Class* klass = types[j].Read();
-      if (klass != nullptr) {
-        DCHECK(klass->GetClassLoader() == nullptr);
-        const char* descriptor = klass->GetDescriptor(&temp);
-        size_t hash = ComputeModifiedUtf8Hash(descriptor);
-        mirror::Class* existing = class_table->Lookup(descriptor, hash);
-        if (existing != nullptr) {
-          CHECK_EQ(existing, klass) << PrettyClassAndClassLoader(existing) << " != "
-              << PrettyClassAndClassLoader(klass);
-        } else {
-          class_table->Insert(klass);
-          if (log_new_class_table_roots_) {
-            new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
+  std::vector<mirror::ObjectArray<mirror::DexCache>*> dex_caches_vector =
+      GetImageDexCaches(image_spaces);
+  for (mirror::ObjectArray<mirror::DexCache>* dex_caches : dex_caches_vector) {
+    for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
+      mirror::DexCache* dex_cache = dex_caches->Get(i);
+      GcRoot<mirror::Class>* types = dex_cache->GetResolvedTypes();
+      for (int32_t j = 0, num_types = dex_cache->NumResolvedTypes(); j < num_types; j++) {
+        mirror::Class* klass = types[j].Read();
+        if (klass != nullptr) {
+          DCHECK_EQ(klass->GetClassLoader(), class_loader);
+          const char* descriptor = klass->GetDescriptor(&temp);
+          size_t hash = ComputeModifiedUtf8Hash(descriptor);
+          mirror::Class* existing = class_table->Lookup(descriptor, hash);
+          if (existing != nullptr) {
+            CHECK_EQ(existing, klass) << PrettyClassAndClassLoader(existing) << " != "
+                << PrettyClassAndClassLoader(klass);
+          } else {
+            class_table->Insert(klass);
+            if (log_new_class_table_roots_) {
+              new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
+            }
           }
         }
       }
     }
   }
-  dex_cache_image_class_lookup_required_ = false;
 }
 
 class MoveClassTableToPreZygoteVisitor : public ClassLoaderVisitor {
@@ -2737,17 +3746,16 @@
   VisitClassLoaders(&visitor);
 }
 
-mirror::Class* ClassLinker::LookupClassFromImage(const char* descriptor) {
+mirror::Class* ClassLinker::LookupClassFromBootImage(const char* descriptor) {
   ScopedAssertNoThreadSuspension ants(Thread::Current(), "Image class lookup");
-  mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches();
-  for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
-    mirror::DexCache* dex_cache = dex_caches->Get(i);
-    const DexFile* dex_file = dex_cache->GetDexFile();
-    // Try binary searching the string/type index.
-    const DexFile::StringId* string_id = dex_file->FindStringId(descriptor);
-    if (string_id != nullptr) {
-      const DexFile::TypeId* type_id =
-          dex_file->FindTypeId(dex_file->GetIndexForStringId(*string_id));
+  std::vector<mirror::ObjectArray<mirror::DexCache>*> dex_caches_vector =
+      GetImageDexCaches(Runtime::Current()->GetHeap()->GetBootImageSpaces());
+  for (mirror::ObjectArray<mirror::DexCache>* dex_caches : dex_caches_vector) {
+    for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
+      mirror::DexCache* dex_cache = dex_caches->Get(i);
+      const DexFile* dex_file = dex_cache->GetDexFile();
+      // Try binary searching the type index by descriptor.
+      const DexFile::TypeId* type_id = dex_file->FindTypeId(descriptor);
       if (type_id != nullptr) {
         uint16_t type_idx = dex_file->GetIndexForTypeId(*type_id);
         mirror::Class* klass = dex_cache->GetResolvedType(type_idx);
@@ -2785,8 +3793,8 @@
 
 void ClassLinker::LookupClasses(const char* descriptor, std::vector<mirror::Class*>& result) {
   result.clear();
-  if (dex_cache_image_class_lookup_required_) {
-    MoveImageClassesToClassTable();
+  if (dex_cache_boot_image_class_lookup_required_) {
+    AddBootImageClassesToClassTable();
   }
   Thread* const self = Thread::Current();
   ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
@@ -2799,108 +3807,170 @@
   VisitClassLoaders(&visitor);
 }
 
-void ClassLinker::VerifyClass(Thread* self, Handle<mirror::Class> klass) {
-  // TODO: assert that the monitor on the Class is held
-  ObjectLock<mirror::Class> lock(self, klass);
+bool ClassLinker::AttemptSupertypeVerification(Thread* self,
+                                               Handle<mirror::Class> klass,
+                                               Handle<mirror::Class> supertype) {
+  DCHECK(self != nullptr);
+  DCHECK(klass.Get() != nullptr);
+  DCHECK(supertype.Get() != nullptr);
 
-  // Don't attempt to re-verify if already sufficiently verified.
-  if (klass->IsVerified()) {
-    EnsurePreverifiedMethods(klass);
-    return;
+  if (!supertype->IsVerified() && !supertype->IsErroneous()) {
+    VerifyClass(self, supertype);
   }
-  if (klass->IsCompileTimeVerified() && Runtime::Current()->IsAotCompiler()) {
-    return;
+  if (supertype->IsCompileTimeVerified()) {
+    // Either we are verified or we soft failed and need to retry at runtime.
+    return true;
   }
+  // If we got this far then we have a hard failure.
+  std::string error_msg =
+      StringPrintf("Rejecting class %s that attempts to sub-type erroneous class %s",
+                   PrettyDescriptor(klass.Get()).c_str(),
+                   PrettyDescriptor(supertype.Get()).c_str());
+  LOG(WARNING) << error_msg  << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Throwable> cause(hs.NewHandle(self->GetException()));
+  if (cause.Get() != nullptr) {
+    // Set during VerifyClass call (if at all).
+    self->ClearException();
+  }
+  // Change into a verify error.
+  ThrowVerifyError(klass.Get(), "%s", error_msg.c_str());
+  if (cause.Get() != nullptr) {
+    self->GetException()->SetCause(cause.Get());
+  }
+  ClassReference ref(klass->GetDexCache()->GetDexFile(), klass->GetDexClassDefIndex());
+  if (Runtime::Current()->IsAotCompiler()) {
+    Runtime::Current()->GetCompilerCallbacks()->ClassRejected(ref);
+  }
+  // Need to grab the lock to change status.
+  ObjectLock<mirror::Class> super_lock(self, klass);
+  mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
+  return false;
+}
 
-  // The class might already be erroneous, for example at compile time if we attempted to verify
-  // this class as a parent to another.
-  if (klass->IsErroneous()) {
-    ThrowEarlierClassFailure(klass.Get());
-    return;
-  }
+void ClassLinker::VerifyClass(Thread* self, Handle<mirror::Class> klass, LogSeverity log_level) {
+  {
+    // TODO: assert that the monitor on the Class is held
+    ObjectLock<mirror::Class> lock(self, klass);
 
-  if (klass->GetStatus() == mirror::Class::kStatusResolved) {
-    mirror::Class::SetStatus(klass, mirror::Class::kStatusVerifying, self);
-  } else {
-    CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusRetryVerificationAtRuntime)
-        << PrettyClass(klass.Get());
-    CHECK(!Runtime::Current()->IsAotCompiler());
-    mirror::Class::SetStatus(klass, mirror::Class::kStatusVerifyingAtRuntime, self);
-  }
+    // Is somebody verifying this now?
+    mirror::Class::Status old_status = klass->GetStatus();
+    while (old_status == mirror::Class::kStatusVerifying ||
+        old_status == mirror::Class::kStatusVerifyingAtRuntime) {
+      lock.WaitIgnoringInterrupts();
+      CHECK(klass->IsErroneous() || (klass->GetStatus() > old_status))
+          << "Class '" << PrettyClass(klass.Get()) << "' performed an illegal verification state "
+          << "transition from " << old_status << " to " << klass->GetStatus();
+      old_status = klass->GetStatus();
+    }
 
-  // Skip verification if we are forcing a soft fail.
-  // This has to be before the normal verification enabled check,
-  // since technically verification is disabled in this mode.
-  if (UNLIKELY(Runtime::Current()->IsVerificationSoftFail())) {
-    // Force verification to be a 'soft failure'.
-    mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
-    // As this is a fake verified status, make sure the methods are _not_ marked preverified
-    // later.
-    klass->SetPreverified();
-    return;
-  }
+    // The class might already be erroneous, for example at compile time if we attempted to verify
+    // this class as a parent to another.
+    if (klass->IsErroneous()) {
+      ThrowEarlierClassFailure(klass.Get());
+      return;
+    }
 
-  // Skip verification if disabled.
-  if (!Runtime::Current()->IsVerificationEnabled()) {
-    mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
-    EnsurePreverifiedMethods(klass);
-    return;
+    // Don't attempt to re-verify if already sufficiently verified.
+    if (klass->IsVerified()) {
+      EnsureSkipAccessChecksMethods(klass);
+      return;
+    }
+    if (klass->IsCompileTimeVerified() && Runtime::Current()->IsAotCompiler()) {
+      return;
+    }
+
+    if (klass->GetStatus() == mirror::Class::kStatusResolved) {
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusVerifying, self);
+    } else {
+      CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusRetryVerificationAtRuntime)
+            << PrettyClass(klass.Get());
+      CHECK(!Runtime::Current()->IsAotCompiler());
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusVerifyingAtRuntime, self);
+    }
+
+    // Skip verification if disabled.
+    if (!Runtime::Current()->IsVerificationEnabled()) {
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
+      EnsureSkipAccessChecksMethods(klass);
+      return;
+    }
   }
 
   // Verify super class.
   StackHandleScope<2> hs(self);
-  Handle<mirror::Class> super(hs.NewHandle(klass->GetSuperClass()));
-  if (super.Get() != nullptr) {
-    // Acquire lock to prevent races on verifying the super class.
-    ObjectLock<mirror::Class> super_lock(self, super);
+  MutableHandle<mirror::Class> supertype(hs.NewHandle(klass->GetSuperClass()));
+  // If we have a superclass and we get a hard verification failure we can return immediately.
+  if (supertype.Get() != nullptr && !AttemptSupertypeVerification(self, klass, supertype)) {
+    CHECK(self->IsExceptionPending()) << "Verification error should be pending.";
+    return;
+  }
 
-    if (!super->IsVerified() && !super->IsErroneous()) {
-      VerifyClass(self, super);
-    }
-    if (!super->IsCompileTimeVerified()) {
-      std::string error_msg(
-          StringPrintf("Rejecting class %s that attempts to sub-class erroneous class %s",
-                       PrettyDescriptor(klass.Get()).c_str(),
-                       PrettyDescriptor(super.Get()).c_str()));
-      LOG(WARNING) << error_msg  << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8();
-      Handle<mirror::Throwable> cause(hs.NewHandle(self->GetException()));
-      if (cause.Get() != nullptr) {
-        self->ClearException();
+  // Verify all default super-interfaces.
+  //
+  // (1) Don't bother if the superclass has already had a soft verification failure.
+  //
+  // (2) Interfaces shouldn't bother to do this recursive verification because they cannot cause
+  //     recursive initialization by themselves. This is because when an interface is initialized
+  //     directly it must not initialize its superinterfaces. We are allowed to verify regardless
+  //     but choose not to for an optimization. If the interfaces is being verified due to a class
+  //     initialization (which would need all the default interfaces to be verified) the class code
+  //     will trigger the recursive verification anyway.
+  if ((supertype.Get() == nullptr || supertype->IsVerified())  // See (1)
+      && !klass->IsInterface()) {                              // See (2)
+    int32_t iftable_count = klass->GetIfTableCount();
+    MutableHandle<mirror::Class> iface(hs.NewHandle<mirror::Class>(nullptr));
+    // Loop through all interfaces this class has defined. It doesn't matter the order.
+    for (int32_t i = 0; i < iftable_count; i++) {
+      iface.Assign(klass->GetIfTable()->GetInterface(i));
+      DCHECK(iface.Get() != nullptr);
+      // We only care if we have default interfaces and can skip if we are already verified...
+      if (LIKELY(!iface->HasDefaultMethods() || iface->IsVerified())) {
+        continue;
+      } else if (UNLIKELY(!AttemptSupertypeVerification(self, klass, iface))) {
+        // We had a hard failure while verifying this interface. Just return immediately.
+        CHECK(self->IsExceptionPending()) << "Verification error should be pending.";
+        return;
+      } else if (UNLIKELY(!iface->IsVerified())) {
+        // We softly failed to verify the iface. Stop checking and clean up.
+        // Put the iface into the supertype handle so we know what caused us to fail.
+        supertype.Assign(iface.Get());
+        break;
       }
-      ThrowVerifyError(klass.Get(), "%s", error_msg.c_str());
-      if (cause.Get() != nullptr) {
-        self->GetException()->SetCause(cause.Get());
-      }
-      ClassReference ref(klass->GetDexCache()->GetDexFile(), klass->GetDexClassDefIndex());
-      if (Runtime::Current()->IsAotCompiler()) {
-        Runtime::Current()->GetCompilerCallbacks()->ClassRejected(ref);
-      }
-      mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
-      return;
     }
   }
 
+  // At this point if verification failed, then supertype is the "first" supertype that failed
+  // verification (without a specific order). If verification succeeded, then supertype is either
+  // null or the original superclass of klass and is verified.
+  DCHECK(supertype.Get() == nullptr ||
+         supertype.Get() == klass->GetSuperClass() ||
+         !supertype->IsVerified());
+
   // Try to use verification information from the oat file, otherwise do runtime verification.
   const DexFile& dex_file = *klass->GetDexCache()->GetDexFile();
   mirror::Class::Status oat_file_class_status(mirror::Class::kStatusNotReady);
   bool preverified = VerifyClassUsingOatFile(dex_file, klass.Get(), oat_file_class_status);
-  if (oat_file_class_status == mirror::Class::kStatusError) {
-    VLOG(class_linker) << "Skipping runtime verification of erroneous class "
-        << PrettyDescriptor(klass.Get()) << " in "
-        << klass->GetDexCache()->GetLocation()->ToModifiedUtf8();
-    ThrowVerifyError(klass.Get(), "Rejecting class %s because it failed compile-time verification",
-                     PrettyDescriptor(klass.Get()).c_str());
-    mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
-    return;
-  }
+  // If the oat file says the class had an error, re-run the verifier. That way we will get a
+  // precise error message. To ensure a rerun, test:
+  //     oat_file_class_status == mirror::Class::kStatusError => !preverified
+  DCHECK(!(oat_file_class_status == mirror::Class::kStatusError) || !preverified);
+
   verifier::MethodVerifier::FailureKind verifier_failure = verifier::MethodVerifier::kNoFailure;
   std::string error_msg;
   if (!preverified) {
+    Runtime* runtime = Runtime::Current();
     verifier_failure = verifier::MethodVerifier::VerifyClass(self,
                                                              klass.Get(),
-                                                             Runtime::Current()->IsAotCompiler(),
+                                                             runtime->GetCompilerCallbacks(),
+                                                             runtime->IsAotCompiler(),
+                                                             log_level,
                                                              &error_msg);
   }
+
+  // Verification is done, grab the lock again.
+  ObjectLock<mirror::Class> lock(self, klass);
+
   if (preverified || verifier_failure != verifier::MethodVerifier::kHardFailure) {
     if (!preverified && verifier_failure != verifier::MethodVerifier::kNoFailure) {
       VLOG(class_linker) << "Soft verification failure in class " << PrettyDescriptor(klass.Get())
@@ -2909,16 +3979,16 @@
     }
     self->AssertNoPendingException();
     // Make sure all classes referenced by catch blocks are resolved.
-    ResolveClassExceptionHandlerTypes(dex_file, klass);
+    ResolveClassExceptionHandlerTypes(klass);
     if (verifier_failure == verifier::MethodVerifier::kNoFailure) {
-      // Even though there were no verifier failures we need to respect whether the super-class
-      // was verified or requiring runtime reverification.
-      if (super.Get() == nullptr || super->IsVerified()) {
+      // Even though there were no verifier failures we need to respect whether the super-class and
+      // super-default-interfaces were verified or requiring runtime reverification.
+      if (supertype.Get() == nullptr || supertype->IsVerified()) {
         mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
       } else {
-        CHECK_EQ(super->GetStatus(), mirror::Class::kStatusRetryVerificationAtRuntime);
+        CHECK_EQ(supertype->GetStatus(), mirror::Class::kStatusRetryVerificationAtRuntime);
         mirror::Class::SetStatus(klass, mirror::Class::kStatusRetryVerificationAtRuntime, self);
-        // Pretend a soft failure occured so that we don't consider the class verified below.
+        // Pretend a soft failure occurred so that we don't consider the class verified below.
         verifier_failure = verifier::MethodVerifier::kSoftFailure;
       }
     } else {
@@ -2930,34 +4000,41 @@
         mirror::Class::SetStatus(klass, mirror::Class::kStatusRetryVerificationAtRuntime, self);
       } else {
         mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
-        // As this is a fake verified status, make sure the methods are _not_ marked preverified
-        // later.
-        klass->SetPreverified();
+        // As this is a fake verified status, make sure the methods are _not_ marked
+        // kAccSkipAccessChecks later.
+        klass->SetVerificationAttempted();
       }
     }
   } else {
-    LOG(WARNING) << "Verification failed on class " << PrettyDescriptor(klass.Get())
-        << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8()
-        << " because: " << error_msg;
+    VLOG(verifier) << "Verification failed on class " << PrettyDescriptor(klass.Get())
+                  << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8()
+                  << " because: " << error_msg;
     self->AssertNoPendingException();
     ThrowVerifyError(klass.Get(), "%s", error_msg.c_str());
     mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
   }
   if (preverified || verifier_failure == verifier::MethodVerifier::kNoFailure) {
     // Class is verified so we don't need to do any access check on its methods.
-    // Let the interpreter know it by setting the kAccPreverified flag onto each
+    // Let the interpreter know it by setting the kAccSkipAccessChecks flag onto each
     // method.
     // Note: we're going here during compilation and at runtime. When we set the
-    // kAccPreverified flag when compiling image classes, the flag is recorded
+    // kAccSkipAccessChecks flag when compiling image classes, the flag is recorded
     // in the image and is set when loading the image.
-    EnsurePreverifiedMethods(klass);
+
+    if (UNLIKELY(Runtime::Current()->IsVerificationSoftFail())) {
+      // Never skip access checks if the verification soft fail is forced.
+      // Mark the class as having a verification attempt to avoid re-running the verifier.
+      klass->SetVerificationAttempted();
+    } else {
+      EnsureSkipAccessChecksMethods(klass);
+    }
   }
 }
 
-void ClassLinker::EnsurePreverifiedMethods(Handle<mirror::Class> klass) {
-  if (!klass->IsPreverified()) {
-    klass->SetPreverifiedFlagOnAllMethods(image_pointer_size_);
-    klass->SetPreverified();
+void ClassLinker::EnsureSkipAccessChecksMethods(Handle<mirror::Class> klass) {
+  if (!klass->WasVerificationAttempted()) {
+    klass->SetSkipAccessChecksFlagOnAllMethods(image_pointer_size_);
+    klass->SetVerificationAttempted();
   }
 }
 
@@ -2988,14 +4065,14 @@
   }
 
   // We may be running with a preopted oat file but without image. In this case,
-  // we don't skip verification of preverified classes to ensure we initialize
+  // we don't skip verification of skip_access_checks classes to ensure we initialize
   // dex caches with all types resolved during verification.
   // We need to trust image classes, as these might be coming out of a pre-opted, quickened boot
   // image (that we just failed loading), and the verifier can't be run on quickened opcodes when
   // the runtime isn't started. On the other hand, app classes can be re-verified even if they are
   // already pre-opted, as then the runtime is started.
   if (!Runtime::Current()->IsAotCompiler() &&
-      !Runtime::Current()->GetHeap()->HasImageSpace() &&
+      !Runtime::Current()->GetHeap()->HasBootImageSpace() &&
       klass->GetClassLoader() != nullptr) {
     return false;
   }
@@ -3004,7 +4081,12 @@
   oat_file_class_status = oat_dex_file->GetOatClass(class_def_index).GetStatus();
   if (oat_file_class_status == mirror::Class::kStatusVerified ||
       oat_file_class_status == mirror::Class::kStatusInitialized) {
-      return true;
+    return true;
+  }
+  // If we only verified a subset of the classes at compile time, we can end up with classes that
+  // were resolved by the verifier.
+  if (oat_file_class_status == mirror::Class::kStatusResolved) {
+    return false;
   }
   if (oat_file_class_status == mirror::Class::kStatusRetryVerificationAtRuntime) {
     // Compile time verification failed with a soft error. Compile time verification can fail
@@ -3045,20 +4127,16 @@
   UNREACHABLE();
 }
 
-void ClassLinker::ResolveClassExceptionHandlerTypes(const DexFile& dex_file,
-                                                    Handle<mirror::Class> klass) {
-  for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
-    ResolveMethodExceptionHandlerTypes(dex_file, klass->GetDirectMethod(i, image_pointer_size_));
-  }
-  for (size_t i = 0; i < klass->NumVirtualMethods(); i++) {
-    ResolveMethodExceptionHandlerTypes(dex_file, klass->GetVirtualMethod(i, image_pointer_size_));
+void ClassLinker::ResolveClassExceptionHandlerTypes(Handle<mirror::Class> klass) {
+  for (ArtMethod& method : klass->GetMethods(image_pointer_size_)) {
+    ResolveMethodExceptionHandlerTypes(&method);
   }
 }
 
-void ClassLinker::ResolveMethodExceptionHandlerTypes(const DexFile& dex_file,
-                                                     ArtMethod* method) {
+void ClassLinker::ResolveMethodExceptionHandlerTypes(ArtMethod* method) {
   // similar to DexVerifier::ScanTryCatchBlocks and dex2oat's ResolveExceptionsForMethod.
-  const DexFile::CodeItem* code_item = dex_file.GetCodeItem(method->GetCodeItemOffset());
+  const DexFile::CodeItem* code_item =
+      method->GetDexFile()->GetCodeItem(method->GetCodeItemOffset());
   if (code_item == nullptr) {
     return;  // native or abstract method
   }
@@ -3100,8 +4178,9 @@
   }
   DCHECK(klass->GetClass() != nullptr);
   klass->SetObjectSize(sizeof(mirror::Proxy));
-  // Set the class access flags incl. preverified, so we do not try to set the flag on the methods.
-  klass->SetAccessFlags(kAccClassIsProxy | kAccPublic | kAccFinal | kAccPreverified);
+  // Set the class access flags incl. VerificationAttempted, so we do not try to set the flag on
+  // the methods.
+  klass->SetAccessFlags(kAccClassIsProxy | kAccPublic | kAccFinal | kAccVerificationAttempted);
   klass->SetClassLoader(soa.Decode<mirror::ClassLoader*>(loader));
   DCHECK_EQ(klass->GetPrimitiveType(), Primitive::kPrimNot);
   klass->SetName(soa.Decode<mirror::String*>(name));
@@ -3139,29 +4218,30 @@
   throws_sfield.SetAccessFlags(kAccStatic | kAccPublic | kAccFinal);
 
   // Proxies have 1 direct method, the constructor
-  LengthPrefixedArray<ArtMethod>* directs = AllocArtMethodArray(self, allocator, 1);
-  // Currently AllocArtMethodArray cannot return null, but the OOM logic is left there in case we
-  // want to throw OOM in the future.
-  if (UNLIKELY(directs == nullptr)) {
-    self->AssertPendingOOMException();
-    return nullptr;
-  }
-  klass->SetDirectMethodsPtr(directs);
-  CreateProxyConstructor(klass, klass->GetDirectMethodUnchecked(0, image_pointer_size_));
+  const size_t num_direct_methods = 1;
 
-  // Create virtual method using specified prototypes.
+  // They have as many virtual methods as the array
   auto h_methods = hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Method>*>(methods));
   DCHECK_EQ(h_methods->GetClass(), mirror::Method::ArrayClass())
       << PrettyClass(h_methods->GetClass());
   const size_t num_virtual_methods = h_methods->GetLength();
-  auto* virtuals = AllocArtMethodArray(self, allocator, num_virtual_methods);
+
+  // Create the methods array.
+  LengthPrefixedArray<ArtMethod>* proxy_class_methods = AllocArtMethodArray(
+        self, allocator, num_direct_methods + num_virtual_methods);
   // Currently AllocArtMethodArray cannot return null, but the OOM logic is left there in case we
   // want to throw OOM in the future.
-  if (UNLIKELY(virtuals == nullptr)) {
+  if (UNLIKELY(proxy_class_methods == nullptr)) {
     self->AssertPendingOOMException();
     return nullptr;
   }
-  klass->SetVirtualMethodsPtr(virtuals);
+  klass->SetMethodsPtr(proxy_class_methods, num_direct_methods, num_virtual_methods);
+
+  // Create the single direct method.
+  CreateProxyConstructor(klass, klass->GetDirectMethodUnchecked(0, image_pointer_size_));
+
+  // Create virtual method using specified prototypes.
+  // TODO These should really use the iterators.
   for (size_t i = 0; i < num_virtual_methods; ++i) {
     auto* virtual_method = klass->GetVirtualMethodUnchecked(i, image_pointer_size_);
     auto* prototype = h_methods->Get(i)->GetArtMethod();
@@ -3242,35 +4322,12 @@
   return DotToDescriptor(name->ToModifiedUtf8().c_str());
 }
 
-ArtMethod* ClassLinker::FindMethodForProxy(mirror::Class* proxy_class, ArtMethod* proxy_method) {
-  DCHECK(proxy_class->IsProxyClass());
-  DCHECK(proxy_method->IsProxyMethod());
-  {
-    Thread* const self = Thread::Current();
-    ReaderMutexLock mu(self, dex_lock_);
-    // Locate the dex cache of the original interface/Object
-    for (jobject weak_root : dex_caches_) {
-      mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
-      if (dex_cache != nullptr &&
-          proxy_method->HasSameDexCacheResolvedTypes(dex_cache->GetResolvedTypes(),
-                                                     image_pointer_size_)) {
-        ArtMethod* resolved_method = dex_cache->GetResolvedMethod(
-            proxy_method->GetDexMethodIndex(), image_pointer_size_);
-        CHECK(resolved_method != nullptr);
-        return resolved_method;
-      }
-    }
-  }
-  LOG(FATAL) << "Didn't find dex cache for " << PrettyClass(proxy_class) << " "
-      << PrettyMethod(proxy_method);
-  UNREACHABLE();
-}
-
 void ClassLinker::CreateProxyConstructor(Handle<mirror::Class> klass, ArtMethod* out) {
   // Create constructor for Proxy that must initialize the method.
-  CHECK_EQ(GetClassRoot(kJavaLangReflectProxy)->NumDirectMethods(), 16u);
+  CHECK_EQ(GetClassRoot(kJavaLangReflectProxy)->NumDirectMethods(), 18u);
   ArtMethod* proxy_constructor = GetClassRoot(kJavaLangReflectProxy)->GetDirectMethodUnchecked(
       2, image_pointer_size_);
+  DCHECK_EQ(std::string(proxy_constructor->GetName()), "<init>");
   // Ensure constructor is in dex cache so that we can use the dex cache to look up the overridden
   // constructor method.
   GetClassRoot(kJavaLangReflectProxy)->GetDexCache()->SetResolvedMethod(
@@ -3308,10 +4365,18 @@
   DCHECK(out != nullptr);
   out->CopyFrom(prototype, image_pointer_size_);
 
-  // Set class to be the concrete proxy class and clear the abstract flag, modify exceptions to
-  // the intersection of throw exceptions as defined in Proxy
+  // Set class to be the concrete proxy class.
   out->SetDeclaringClass(klass.Get());
-  out->SetAccessFlags((out->GetAccessFlags() & ~kAccAbstract) | kAccFinal);
+  // Clear the abstract, default and conflict flags to ensure that defaults aren't picked in
+  // preference to the invocation handler.
+  const uint32_t kRemoveFlags = kAccAbstract | kAccDefault | kAccDefaultConflict;
+  // Make the method final.
+  const uint32_t kAddFlags = kAccFinal;
+  out->SetAccessFlags((out->GetAccessFlags() & ~kRemoveFlags) | kAddFlags);
+
+  // Clear the dex_code_item_offset_. It needs to be 0 since proxy methods have no CodeItems but the
+  // method they copy might (if it's a default method).
+  out->SetCodeItemOffset(0);
 
   // At runtime the method looks like a reference and argument saving method, clone the code
   // related parameters from this method.
@@ -3322,7 +4387,7 @@
   // Basic sanity
   CHECK(!prototype->IsFinal());
   CHECK(method->IsFinal());
-  CHECK(!method->IsAbstract());
+  CHECK(method->IsInvokable());
 
   // The proxy method doesn't have its own dex cache or dex file and so it steals those of its
   // interface prototype. The exception to this are Constructors and the Class of the Proxy itself.
@@ -3409,7 +4474,7 @@
 
     // Was the class already found to be erroneous? Done under the lock to match the JLS.
     if (klass->IsErroneous()) {
-      ThrowEarlierClassFailure(klass.Get());
+      ThrowEarlierClassFailure(klass.Get(), true);
       VlogClassInitializationFailure(klass);
       return false;
     }
@@ -3422,7 +4487,20 @@
         // We failed to verify, expect either the klass to be erroneous or verification failed at
         // compile time.
         if (klass->IsErroneous()) {
-          CHECK(self->IsExceptionPending());
+          // The class is erroneous. This may be a verifier error, or another thread attempted
+          // verification and/or initialization and failed. We can distinguish those cases by
+          // whether an exception is already pending.
+          if (self->IsExceptionPending()) {
+            // Check that it's a VerifyError.
+            DCHECK_EQ("java.lang.Class<java.lang.VerifyError>",
+                      PrettyClass(self->GetException()->GetClass()));
+          } else {
+            // Check that another thread attempted initialization.
+            DCHECK_NE(0, klass->GetClinitThreadId());
+            DCHECK_NE(self->GetTid(), klass->GetClinitThreadId());
+            // Need to rethrow the previous failure now.
+            ThrowEarlierClassFailure(klass.Get(), true);
+          }
           VlogClassInitializationFailure(klass);
         } else {
           CHECK(Runtime::Current()->IsAotCompiler());
@@ -3432,6 +4510,14 @@
       } else {
         self->AssertNoPendingException();
       }
+
+      // A separate thread could have moved us all the way to initialized. A "simple" example
+      // involves a subclass of the current class being initialized at the same time (which
+      // will implicitly initialize the superclass, if scheduled that way). b/28254258
+      DCHECK_NE(mirror::Class::kStatusError, klass->GetStatus());
+      if (klass->IsInitialized()) {
+        return true;
+      }
     }
 
     // If the class is kStatusInitializing, either this thread is
@@ -3565,18 +4651,23 @@
         } else {
           value_it.ReadValueToField<false>(field);
         }
+        if (self->IsExceptionPending()) {
+          break;
+        }
         DCHECK(!value_it.HasNext() || field_it.HasNextStaticField());
       }
     }
   }
 
-  ArtMethod* clinit = klass->FindClassInitializer(image_pointer_size_);
-  if (clinit != nullptr) {
-    CHECK(can_init_statics);
-    JValue result;
-    clinit->Invoke(self, nullptr, 0, &result, "V");
-  }
 
+  if (!self->IsExceptionPending()) {
+    ArtMethod* clinit = klass->FindClassInitializer(image_pointer_size_);
+    if (clinit != nullptr) {
+      CHECK(can_init_statics);
+      JValue result;
+      clinit->Invoke(self, nullptr, 0, &result, "V");
+    }
+  }
   self->AllowThreadSuspension();
   uint64_t t1 = NanoTime();
 
@@ -3766,7 +4857,7 @@
 }
 
 static bool HasSameSignatureWithDifferentClassLoaders(Thread* self,
-                                                      size_t pointer_size,
+                                                      PointerSize pointer_size,
                                                       Handle<mirror::Class> klass,
                                                       Handle<mirror::Class> super_klass,
                                                       ArtMethod* method1,
@@ -3905,7 +4996,8 @@
                                     bool can_init_parents) {
   DCHECK(c.Get() != nullptr);
   if (c->IsInitialized()) {
-    EnsurePreverifiedMethods(c);
+    EnsureSkipAccessChecksMethods(c);
+    self->AssertNoPendingException();
     return true;
   }
   const bool success = InitializeClass(self, c, can_init_fields, can_init_parents);
@@ -3936,14 +5028,8 @@
   }
 
   DCHECK_EQ(temp_class->NumDirectMethods(), 0u);
-  for (auto& method : new_class->GetDirectMethods(image_pointer_size_)) {
-    if (method.GetDeclaringClass() == temp_class) {
-      method.SetDeclaringClass(new_class);
-    }
-  }
-
   DCHECK_EQ(temp_class->NumVirtualMethods(), 0u);
-  for (auto& method : new_class->GetVirtualMethods(image_pointer_size_)) {
+  for (auto& method : new_class->GetMethods(image_pointer_size_)) {
     if (method.GetDeclaringClass() == temp_class) {
       method.SetDeclaringClass(new_class);
     }
@@ -3954,24 +5040,31 @@
   Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(new_class);
 }
 
+void ClassLinker::RegisterClassLoader(mirror::ClassLoader* class_loader) {
+  CHECK(class_loader->GetAllocator() == nullptr);
+  CHECK(class_loader->GetClassTable() == nullptr);
+  Thread* const self = Thread::Current();
+  ClassLoaderData data;
+  data.weak_root = self->GetJniEnv()->vm->AddWeakGlobalRef(self, class_loader);
+  // Create and set the class table.
+  data.class_table = new ClassTable;
+  class_loader->SetClassTable(data.class_table);
+  // Create and set the linear allocator.
+  data.allocator = Runtime::Current()->CreateLinearAlloc();
+  class_loader->SetAllocator(data.allocator);
+  // Add to the list so that we know to free the data later.
+  class_loaders_.push_back(data);
+}
+
 ClassTable* ClassLinker::InsertClassTableForClassLoader(mirror::ClassLoader* class_loader) {
   if (class_loader == nullptr) {
     return &boot_class_table_;
   }
   ClassTable* class_table = class_loader->GetClassTable();
   if (class_table == nullptr) {
-    class_table = new ClassTable;
-    Thread* const self = Thread::Current();
-    ClassLoaderData data;
-    data.weak_root = self->GetJniEnv()->vm->AddWeakGlobalRef(self, class_loader);
-    data.class_table = class_table;
-    // Don't already have a class table, add it to the class loader.
-    CHECK(class_loader->GetClassTable() == nullptr);
-    class_loader->SetClassTable(data.class_table);
-    // Should have been set when we registered the dex file.
-    data.allocator = class_loader->GetAllocator();
-    CHECK(data.allocator != nullptr);
-    class_loaders_.push_back(data);
+    RegisterClassLoader(class_loader);
+    class_table = class_loader->GetClassTable();
+    DCHECK(class_table != nullptr);
   }
   return class_table;
 }
@@ -3980,6 +5073,17 @@
   return class_loader == nullptr ? &boot_class_table_ : class_loader->GetClassTable();
 }
 
+static ImTable* FindSuperImt(mirror::Class* klass, PointerSize pointer_size)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  while (klass->HasSuperClass()) {
+    klass = klass->GetSuperClass();
+    if (klass->ShouldHaveImt()) {
+      return klass->GetImt(pointer_size);
+    }
+  }
+  return nullptr;
+}
+
 bool ClassLinker::LinkClass(Thread* self,
                             const char* descriptor,
                             Handle<mirror::Class> klass,
@@ -3990,9 +5094,11 @@
   if (!LinkSuperClass(klass)) {
     return false;
   }
-  ArtMethod* imt[mirror::Class::kImtSize];
-  std::fill_n(imt, arraysize(imt), Runtime::Current()->GetImtUnimplementedMethod());
-  if (!LinkMethods(self, klass, interfaces, imt)) {
+  ArtMethod* imt_data[ImTable::kSize];
+  // If there are any new conflicts compared to super class.
+  bool new_conflict = false;
+  std::fill_n(imt_data, arraysize(imt_data), Runtime::Current()->GetImtUnimplementedMethod());
+  if (!LinkMethods(self, klass, interfaces, &new_conflict, imt_data)) {
     return false;
   }
   if (!LinkInstanceFields(self, klass)) {
@@ -4005,15 +5111,47 @@
   CreateReferenceInstanceOffsets(klass);
   CHECK_EQ(mirror::Class::kStatusLoaded, klass->GetStatus());
 
+  ImTable* imt = nullptr;
+  if (klass->ShouldHaveImt()) {
+    // If there are any new conflicts compared to the super class we can not make a copy. There
+    // can be cases where both will have a conflict method at the same slot without having the same
+    // set of conflicts. In this case, we can not share the IMT since the conflict table slow path
+    // will possibly create a table that is incorrect for either of the classes.
+    // Same IMT with new_conflict does not happen very often.
+    if (!new_conflict) {
+      ImTable* super_imt = FindSuperImt(klass.Get(), image_pointer_size_);
+      if (super_imt != nullptr) {
+        bool imt_equals = true;
+        for (size_t i = 0; i < ImTable::kSize && imt_equals; ++i) {
+          imt_equals = imt_equals && (super_imt->Get(i, image_pointer_size_) == imt_data[i]);
+        }
+        if (imt_equals) {
+          imt = super_imt;
+        }
+      }
+    }
+    if (imt == nullptr) {
+      LinearAlloc* allocator = GetAllocatorForClassLoader(klass->GetClassLoader());
+      imt = reinterpret_cast<ImTable*>(
+          allocator->Alloc(self, ImTable::SizeInBytes(image_pointer_size_)));
+      if (imt == nullptr) {
+        return false;
+      }
+      imt->Populate(imt_data, image_pointer_size_);
+    }
+  }
+
   if (!klass->IsTemp() || (!init_done_ && klass->GetClassSize() == class_size)) {
     // We don't need to retire this class as it has no embedded tables or it was created the
     // correct size during class linker initialization.
     CHECK_EQ(klass->GetClassSize(), class_size) << PrettyDescriptor(klass.Get());
 
-    if (klass->ShouldHaveEmbeddedImtAndVTable()) {
-      klass->PopulateEmbeddedImtAndVTable(imt, image_pointer_size_);
+    if (klass->ShouldHaveEmbeddedVTable()) {
+      klass->PopulateEmbeddedVTable(image_pointer_size_);
     }
-
+    if (klass->ShouldHaveImt()) {
+      klass->SetImt(imt, image_pointer_size_);
+    }
     // This will notify waiters on klass that saw the not yet resolved
     // class in the class_table_ during EnsureResolved.
     mirror::Class::SetStatus(klass, mirror::Class::kStatusResolved, self);
@@ -4027,8 +5165,7 @@
     // ArtMethod array pointers. If this occurs, it causes bugs in remembered sets since the GC
     // may not see any references to the target space and clean the card for a class if another
     // class had the same array pointer.
-    klass->SetDirectMethodsPtrUnchecked(nullptr);
-    klass->SetVirtualMethodsPtr(nullptr);
+    klass->SetMethodsPtrUnchecked(nullptr, 0, 0);
     klass->SetSFieldsPtrUnchecked(nullptr);
     klass->SetIFieldsPtrUnchecked(nullptr);
     if (UNLIKELY(h_new_class.Get() == nullptr)) {
@@ -4053,10 +5190,10 @@
         Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
       }
       CHECK_EQ(existing, klass.Get());
-      if (kIsDebugBuild && class_loader == nullptr && dex_cache_image_class_lookup_required_) {
+      if (kIsDebugBuild && class_loader == nullptr && dex_cache_boot_image_class_lookup_required_) {
         // Check a class loaded with the system class loader matches one in the image if the class
         // is in the image.
-        mirror::Class* const image_class = LookupClassFromImage(descriptor);
+        mirror::Class* const image_class = LookupClassFromBootImage(descriptor);
         if (image_class != nullptr) {
           CHECK_EQ(klass.Get(), existing) << descriptor;
         }
@@ -4278,6 +5415,19 @@
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(klass->GetDexClassDefIndex());
   uint16_t super_class_idx = class_def.superclass_idx_;
   if (super_class_idx != DexFile::kDexNoIndex16) {
+    // Check that a class does not inherit from itself directly.
+    //
+    // TODO: This is a cheap check to detect the straightforward case
+    // of a class extending itself (b/28685551), but we should do a
+    // proper cycle detection on loaded classes, to detect all cases
+    // of class circularity errors (b/28830038).
+    if (super_class_idx == class_def.class_idx_) {
+      ThrowClassCircularityError(klass.Get(),
+                                 "Class %s extends itself",
+                                 PrettyDescriptor(klass.Get()).c_str());
+      return false;
+    }
+
     mirror::Class* super_class = ResolveType(dex_file, super_class_idx, klass.Get());
     if (super_class == nullptr) {
       DCHECK(Thread::Current()->IsExceptionPending());
@@ -4393,18 +5543,19 @@
 bool ClassLinker::LinkMethods(Thread* self,
                               Handle<mirror::Class> klass,
                               Handle<mirror::ObjectArray<mirror::Class>> interfaces,
+                              bool* out_new_conflict,
                               ArtMethod** out_imt) {
   self->AllowThreadSuspension();
   // A map from vtable indexes to the method they need to be updated to point to. Used because we
   // need to have default methods be in the virtuals array of each class but we don't set that up
   // until LinkInterfaceMethods.
-  std::unordered_map<size_t, ArtMethod*> default_translations;
+  std::unordered_map<size_t, ClassLinker::MethodTranslation> default_translations;
   // Link virtual methods then interface methods.
   // We set up the interface lookup table first because we need it to determine if we need to update
   // any vtable entries with new default method implementations.
   return SetupInterfaceLookupTable(self, klass, interfaces)
           && LinkVirtualMethods(self, klass, /*out*/ &default_translations)
-          && LinkInterfaceMethods(self, klass, default_translations, out_imt);
+          && LinkInterfaceMethods(self, klass, default_translations, out_new_conflict, out_imt);
 }
 
 // Comparator for name and signature of a method, used in finding overriding methods. Implementation
@@ -4460,7 +5611,7 @@
   LinkVirtualHashTable(Handle<mirror::Class> klass,
                        size_t hash_size,
                        uint32_t* hash_table,
-                       size_t image_pointer_size)
+                       PointerSize image_pointer_size)
      : klass_(klass),
        hash_size_(hash_size),
        hash_table_(hash_table),
@@ -4522,16 +5673,23 @@
   Handle<mirror::Class> klass_;
   const size_t hash_size_;
   uint32_t* const hash_table_;
-  const size_t image_pointer_size_;
+  const PointerSize image_pointer_size_;
 };
 
 const uint32_t LinkVirtualHashTable::invalid_index_ = std::numeric_limits<uint32_t>::max();
 const uint32_t LinkVirtualHashTable::removed_index_ = std::numeric_limits<uint32_t>::max() - 1;
 
-bool ClassLinker::LinkVirtualMethods(
+// b/30419309
+#if defined(__i386__)
+#define X86_OPTNONE __attribute__((optnone))
+#else
+#define X86_OPTNONE
+#endif
+
+X86_OPTNONE bool ClassLinker::LinkVirtualMethods(
     Thread* self,
     Handle<mirror::Class> klass,
-    /*out*/std::unordered_map<size_t, ArtMethod*>* default_translations) {
+    /*out*/std::unordered_map<size_t, ClassLinker::MethodTranslation>* default_translations) {
   const size_t num_virtual_methods = klass->NumVirtualMethods();
   if (klass->IsInterface()) {
     // No vtable.
@@ -4540,7 +5698,6 @@
       return false;
     }
     bool has_defaults = false;
-    // TODO May need to replace this with real VTable for invoke_super
     // Assign each method an IMT index and set the default flag.
     for (size_t i = 0; i < num_virtual_methods; ++i) {
       ArtMethod* m = klass->GetVirtualMethodDuringLinking(i, image_pointer_size_);
@@ -4563,7 +5720,7 @@
     StackHandleScope<2> hs(self);
     Handle<mirror::Class> super_class(hs.NewHandle(klass->GetSuperClass()));
     MutableHandle<mirror::PointerArray> vtable;
-    if (super_class->ShouldHaveEmbeddedImtAndVTable()) {
+    if (super_class->ShouldHaveEmbeddedVTable()) {
       vtable = hs.NewHandle(AllocPointerArray(self, max_count));
       if (UNLIKELY(vtable.Get() == nullptr)) {
         self->AssertPendingOOMException();
@@ -4654,46 +5811,55 @@
                        << " would have incorrectly overridden the package-private method in "
                        << PrettyDescriptor(super_method->GetDeclaringClassDescriptor());
         }
-      } else if (super_method->IsDefault()) {
+      } else if (super_method->IsOverridableByDefaultMethod()) {
         // We didn't directly override this method but we might through default methods...
         // Check for default method update.
         ArtMethod* default_method = nullptr;
-        std::string icce_message;
-        if (!FindDefaultMethodImplementation(self,
-                                             super_method,
-                                             klass,
-                                             /*out*/&default_method,
-                                             /*out*/&icce_message)) {
-          // An error occurred while finding default methods.
-          // TODO This should actually be thrown when we attempt to invoke this method.
-          ThrowIncompatibleClassChangeError(klass.Get(), "%s", icce_message.c_str());
-          return false;
-        }
-        // This should always work because we inherit superclass interfaces. We should either get
-        //  1) An IncompatibleClassChangeError because of conflicting default method
-        //     implementations.
-        //  2) The same default method implementation as the superclass.
-        //  3) A default method that overrides the superclass's.
-        // Therefore this check should never fail.
-        CHECK(default_method != nullptr);
-        if (UNLIKELY(default_method->GetDeclaringClass() != super_method->GetDeclaringClass())) {
-          // TODO Refactor this add default methods to virtuals here and not in
-          //      LinkInterfaceMethods maybe.
-          //      The problem is default methods might override previously present default-method or
-          //      miranda-method vtable entries from the superclass. Unfortunately we need these to
-          //      be entries in this class's virtuals. We do not give these entries there until
-          //      LinkInterfaceMethods so we pass this map around to let it know which vtable
-          //      entries need to be updated.
-          // Make a note that vtable entry j must be updated, store what it needs to be updated to.
-          // We will allocate a virtual method slot in LinkInterfaceMethods and fix it up then.
-          default_translations->insert({j, default_method});
-          VLOG(class_linker) << "Method " << PrettyMethod(super_method) << " overridden by default "
-                             << PrettyMethod(default_method) << " in " << PrettyClass(klass.Get());
-        } else {
-          // They are the same method/no override
-          // Cannot do direct comparison because we had to copy the ArtMethod object into the
-          // superclass's vtable.
-          continue;
+        switch (FindDefaultMethodImplementation(self,
+                                                super_method,
+                                                klass,
+                                                /*out*/&default_method)) {
+          case DefaultMethodSearchResult::kDefaultConflict: {
+            // A conflict was found looking for default methods. Note this (assuming it wasn't
+            // pre-existing) in the translations map.
+            if (UNLIKELY(!super_method->IsDefaultConflicting())) {
+              // Don't generate another conflict method to reduce memory use as an optimization.
+              default_translations->insert(
+                  {j, ClassLinker::MethodTranslation::CreateConflictingMethod()});
+            }
+            break;
+          }
+          case DefaultMethodSearchResult::kAbstractFound: {
+            // No conflict but method is abstract.
+            // We note that this vtable entry must be made abstract.
+            if (UNLIKELY(!super_method->IsAbstract())) {
+              default_translations->insert(
+                  {j, ClassLinker::MethodTranslation::CreateAbstractMethod()});
+            }
+            break;
+          }
+          case DefaultMethodSearchResult::kDefaultFound: {
+            if (UNLIKELY(super_method->IsDefaultConflicting() ||
+                        default_method->GetDeclaringClass() != super_method->GetDeclaringClass())) {
+              // Found a default method implementation that is new.
+              // TODO Refactor this add default methods to virtuals here and not in
+              //      LinkInterfaceMethods maybe.
+              //      The problem is default methods might override previously present
+              //      default-method or miranda-method vtable entries from the superclass.
+              //      Unfortunately we need these to be entries in this class's virtuals. We do not
+              //      give these entries there until LinkInterfaceMethods so we pass this map around
+              //      to let it know which vtable entries need to be updated.
+              // Make a note that vtable entry j must be updated, store what it needs to be updated
+              // to. We will allocate a virtual method slot in LinkInterfaceMethods and fix it up
+              // then.
+              default_translations->insert(
+                  {j, ClassLinker::MethodTranslation::CreateTranslatedMethod(default_method)});
+              VLOG(class_linker) << "Method " << PrettyMethod(super_method)
+                                 << " overridden by default " << PrettyMethod(default_method)
+                                 << " in " << PrettyClass(klass.Get());
+            }
+            break;
+          }
         }
       }
     }
@@ -4746,23 +5912,73 @@
   return true;
 }
 
+// Determine if the given iface has any subinterface in the given list that declares the method
+// specified by 'target'.
+//
+// Arguments
+// - self:    The thread we are running on
+// - target:  A comparator that will match any method that overrides the method we are checking for
+// - iftable: The iftable we are searching for an overriding method on.
+// - ifstart: The index of the interface we are checking to see if anything overrides
+// - iface:   The interface we are checking to see if anything overrides.
+// - image_pointer_size:
+//            The image pointer size.
+//
+// Returns
+// - True:  There is some method that matches the target comparator defined in an interface that
+//          is a subtype of iface.
+// - False: There is no method that matches the target comparator in any interface that is a subtype
+//          of iface.
+static bool ContainsOverridingMethodOf(Thread* self,
+                                       MethodNameAndSignatureComparator& target,
+                                       Handle<mirror::IfTable> iftable,
+                                       size_t ifstart,
+                                       Handle<mirror::Class> iface,
+                                       PointerSize image_pointer_size)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  DCHECK(self != nullptr);
+  DCHECK(iface.Get() != nullptr);
+  DCHECK(iftable.Get() != nullptr);
+  DCHECK_GE(ifstart, 0u);
+  DCHECK_LT(ifstart, iftable->Count());
+  DCHECK_EQ(iface.Get(), iftable->GetInterface(ifstart));
+  DCHECK(iface->IsInterface());
+
+  size_t iftable_count = iftable->Count();
+  StackHandleScope<1> hs(self);
+  MutableHandle<mirror::Class> current_iface(hs.NewHandle<mirror::Class>(nullptr));
+  for (size_t k = ifstart + 1; k < iftable_count; k++) {
+    // Skip ifstart since our current interface obviously cannot override itself.
+    current_iface.Assign(iftable->GetInterface(k));
+    // Iterate through every method on this interface. The order does not matter.
+    for (ArtMethod& current_method : current_iface->GetDeclaredVirtualMethods(image_pointer_size)) {
+      if (UNLIKELY(target.HasSameNameAndSignature(
+                      current_method.GetInterfaceMethodIfProxy(image_pointer_size)))) {
+        // Check if the i'th interface is a subtype of this one.
+        if (iface->IsAssignableFrom(current_iface.Get())) {
+          return true;
+        }
+        break;
+      }
+    }
+  }
+  return false;
+}
+
 // Find the default method implementation for 'interface_method' in 'klass'. Stores it into
-// out_default_method and returns true on success. If no default method was found stores nullptr
-// into out_default_method and returns true. If an error occurs (such as a default_method conflict)
-// it will fill the icce_message with an appropriate message for an IncompatibleClassChangeError,
-// which should then be thrown by the caller.
-bool ClassLinker::FindDefaultMethodImplementation(Thread* self,
-                                                  ArtMethod* target_method,
-                                                  Handle<mirror::Class> klass,
-                                                  /*out*/ArtMethod** out_default_method,
-                                                  /*out*/std::string* icce_message) const {
+// out_default_method and returns kDefaultFound on success. If no default method was found return
+// kAbstractFound and store nullptr into out_default_method. If an error occurs (such as a
+// default_method conflict) it will return kDefaultConflict.
+ClassLinker::DefaultMethodSearchResult ClassLinker::FindDefaultMethodImplementation(
+    Thread* self,
+    ArtMethod* target_method,
+    Handle<mirror::Class> klass,
+    /*out*/ArtMethod** out_default_method) const {
   DCHECK(self != nullptr);
   DCHECK(target_method != nullptr);
   DCHECK(out_default_method != nullptr);
-  DCHECK(icce_message != nullptr);
 
   *out_default_method = nullptr;
-  mirror::Class* chosen_iface = nullptr;
 
   // We organize the interface table so that, for interface I any subinterfaces J follow it in the
   // table. This lets us walk the table backwards when searching for default methods.  The first one
@@ -4773,81 +5989,343 @@
   // The order of unrelated interfaces does not matter and is not defined.
   size_t iftable_count = klass->GetIfTableCount();
   if (iftable_count == 0) {
-    // No interfaces. We have already reset out to null so just return true.
-    return true;
+    // No interfaces. We have already reset out to null so just return kAbstractFound.
+    return DefaultMethodSearchResult::kAbstractFound;
   }
 
-  StackHandleScope<1> hs(self);
+  StackHandleScope<3> hs(self);
+  MutableHandle<mirror::Class> chosen_iface(hs.NewHandle<mirror::Class>(nullptr));
   MutableHandle<mirror::IfTable> iftable(hs.NewHandle(klass->GetIfTable()));
+  MutableHandle<mirror::Class> iface(hs.NewHandle<mirror::Class>(nullptr));
   MethodNameAndSignatureComparator target_name_comparator(
       target_method->GetInterfaceMethodIfProxy(image_pointer_size_));
   // Iterates over the klass's iftable in reverse
-  // We have a break at the end because size_t is unsigned.
-  for (size_t k = iftable_count - 1; /* break if k == 0 at end */; --k) {
+  for (size_t k = iftable_count; k != 0; ) {
+    --k;
+
     DCHECK_LT(k, iftable->Count());
-    mirror::Class* iface = iftable->GetInterface(k);
-    size_t num_instance_methods = iface->NumVirtualMethods();
-    // Iterate through every method on this interface. The order does not matter so we go forwards.
-    for (size_t m = 0; m < num_instance_methods; m++) {
-      ArtMethod* current_method = iface->GetVirtualMethodUnchecked(m, image_pointer_size_);
+
+    iface.Assign(iftable->GetInterface(k));
+    // Iterate through every declared method on this interface. The order does not matter.
+    for (auto& method_iter : iface->GetDeclaredVirtualMethods(image_pointer_size_)) {
+      ArtMethod* current_method = &method_iter;
       // Skip abstract methods and methods with different names.
       if (current_method->IsAbstract() ||
           !target_name_comparator.HasSameNameAndSignature(
               current_method->GetInterfaceMethodIfProxy(image_pointer_size_))) {
         continue;
+      } else if (!current_method->IsPublic()) {
+        // The verifier should have caught the non-public method for dex version 37. Just warn and
+        // skip it since this is from before default-methods so we don't really need to care that it
+        // has code.
+        LOG(WARNING) << "Interface method " << PrettyMethod(current_method) << " is not public! "
+                     << "This will be a fatal error in subsequent versions of android. "
+                     << "Continuing anyway.";
       }
-      // The verifier should have caught the non-public method.
-      DCHECK(current_method->IsPublic()) << "Interface method is not public!";
-      if (UNLIKELY(chosen_iface != nullptr)) {
-        // We have multiple default impls of the same method. We need to check they do not
-        // conflict and throw an error if they do. Conflicting means that the current iface is not
-        // masked by the chosen interface.
-        if (!iface->IsAssignableFrom(chosen_iface)) {
-          *icce_message = StringPrintf("Conflicting default method implementations: '%s' and '%s'",
-                                       PrettyMethod(current_method).c_str(),
-                                       PrettyMethod(*out_default_method).c_str());
-          return false;
+      if (UNLIKELY(chosen_iface.Get() != nullptr)) {
+        // We have multiple default impls of the same method. This is a potential default conflict.
+        // We need to check if this possibly conflicting method is either a superclass of the chosen
+        // default implementation or is overridden by a non-default interface method. In either case
+        // there is no conflict.
+        if (!iface->IsAssignableFrom(chosen_iface.Get()) &&
+            !ContainsOverridingMethodOf(self,
+                                        target_name_comparator,
+                                        iftable,
+                                        k,
+                                        iface,
+                                        image_pointer_size_)) {
+          VLOG(class_linker) << "Conflicting default method implementations found: "
+                             << PrettyMethod(current_method) << " and "
+                             << PrettyMethod(*out_default_method) << " in class "
+                             << PrettyClass(klass.Get()) << " conflict.";
+          *out_default_method = nullptr;
+          return DefaultMethodSearchResult::kDefaultConflict;
         } else {
           break;  // Continue checking at the next interface.
         }
       } else {
-        *out_default_method = current_method;
-        chosen_iface = iface;
-        // We should now finish traversing the graph to find if we have default methods that
-        // conflict.
-        break;
+        // chosen_iface == null
+        if (!ContainsOverridingMethodOf(self,
+                                        target_name_comparator,
+                                        iftable,
+                                        k,
+                                        iface,
+                                        image_pointer_size_)) {
+          // Don't set this as the chosen interface if something else is overriding it (because that
+          // other interface would be potentially chosen instead if it was default). If the other
+          // interface was abstract then we wouldn't select this interface as chosen anyway since
+          // the abstract method masks it.
+          *out_default_method = current_method;
+          chosen_iface.Assign(iface.Get());
+          // We should now finish traversing the graph to find if we have default methods that
+          // conflict.
+        } else {
+          VLOG(class_linker) << "A default method '" << PrettyMethod(current_method) << "' was "
+                            << "skipped because it was overridden by an abstract method in a "
+                            << "subinterface on class '" << PrettyClass(klass.Get()) << "'";
+        }
       }
-    }
-    if (k == 0) {
       break;
     }
   }
-  return true;
+  if (*out_default_method != nullptr) {
+    VLOG(class_linker) << "Default method '" << PrettyMethod(*out_default_method) << "' selected "
+                       << "as the implementation for '" << PrettyMethod(target_method) << "' "
+                       << "in '" << PrettyClass(klass.Get()) << "'";
+    return DefaultMethodSearchResult::kDefaultFound;
+  } else {
+    return DefaultMethodSearchResult::kAbstractFound;
+  }
 }
 
-// Sets imt_ref appropriately for LinkInterfaceMethods.
-// If there is no method in the imt location of imt_ref it will store the given method there.
-// Otherwise it will set the conflict method which will figure out which method to use during
-// runtime.
-static void SetIMTRef(ArtMethod* unimplemented_method,
-                      ArtMethod* conflict_method,
-                      size_t image_pointer_size,
-                      ArtMethod* current_method,
-                      /*out*/ArtMethod** imt_ref)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+ArtMethod* ClassLinker::AddMethodToConflictTable(mirror::Class* klass,
+                                                 ArtMethod* conflict_method,
+                                                 ArtMethod* interface_method,
+                                                 ArtMethod* method,
+                                                 bool force_new_conflict_method) {
+  ImtConflictTable* current_table = conflict_method->GetImtConflictTable(kRuntimePointerSize);
+  Runtime* const runtime = Runtime::Current();
+  LinearAlloc* linear_alloc = GetAllocatorForClassLoader(klass->GetClassLoader());
+  bool new_entry = conflict_method == runtime->GetImtConflictMethod() || force_new_conflict_method;
+
+  // Create a new entry if the existing one is the shared conflict method.
+  ArtMethod* new_conflict_method = new_entry
+      ? runtime->CreateImtConflictMethod(linear_alloc)
+      : conflict_method;
+
+  // Allocate a new table. Note that we will leak this table at the next conflict,
+  // but that's a tradeoff compared to making the table fixed size.
+  void* data = linear_alloc->Alloc(
+      Thread::Current(), ImtConflictTable::ComputeSizeWithOneMoreEntry(current_table,
+                                                                       image_pointer_size_));
+  if (data == nullptr) {
+    LOG(ERROR) << "Failed to allocate conflict table";
+    return conflict_method;
+  }
+  ImtConflictTable* new_table = new (data) ImtConflictTable(current_table,
+                                                            interface_method,
+                                                            method,
+                                                            image_pointer_size_);
+
+  // Do a fence to ensure threads see the data in the table before it is assigned
+  // to the conflict method.
+  // Note that there is a race in the presence of multiple threads and we may leak
+  // memory from the LinearAlloc, but that's a tradeoff compared to using
+  // atomic operations.
+  QuasiAtomic::ThreadFenceRelease();
+  new_conflict_method->SetImtConflictTable(new_table, image_pointer_size_);
+  return new_conflict_method;
+}
+
+void ClassLinker::SetIMTRef(ArtMethod* unimplemented_method,
+                            ArtMethod* imt_conflict_method,
+                            ArtMethod* current_method,
+                            /*out*/bool* new_conflict,
+                            /*out*/ArtMethod** imt_ref) {
   // Place method in imt if entry is empty, place conflict otherwise.
   if (*imt_ref == unimplemented_method) {
     *imt_ref = current_method;
-  } else if (*imt_ref != conflict_method) {
+  } else if (!(*imt_ref)->IsRuntimeMethod()) {
     // If we are not a conflict and we have the same signature and name as the imt
     // entry, it must be that we overwrote a superclass vtable entry.
+    // Note that we have checked IsRuntimeMethod, as there may be multiple different
+    // conflict methods.
     MethodNameAndSignatureComparator imt_comparator(
-        (*imt_ref)->GetInterfaceMethodIfProxy(image_pointer_size));
+        (*imt_ref)->GetInterfaceMethodIfProxy(image_pointer_size_));
     if (imt_comparator.HasSameNameAndSignature(
-          current_method->GetInterfaceMethodIfProxy(image_pointer_size))) {
+          current_method->GetInterfaceMethodIfProxy(image_pointer_size_))) {
       *imt_ref = current_method;
     } else {
-      *imt_ref = conflict_method;
+      *imt_ref = imt_conflict_method;
+      *new_conflict = true;
+    }
+  } else {
+    // Place the default conflict method. Note that there may be an existing conflict
+    // method in the IMT, but it could be one tailored to the super class, with a
+    // specific ImtConflictTable.
+    *imt_ref = imt_conflict_method;
+    *new_conflict = true;
+  }
+}
+
+void ClassLinker::FillIMTAndConflictTables(mirror::Class* klass) {
+  DCHECK(klass->ShouldHaveImt()) << PrettyClass(klass);
+  DCHECK(!klass->IsTemp()) << PrettyClass(klass);
+  ArtMethod* imt_data[ImTable::kSize];
+  Runtime* const runtime = Runtime::Current();
+  ArtMethod* const unimplemented_method = runtime->GetImtUnimplementedMethod();
+  ArtMethod* const conflict_method = runtime->GetImtConflictMethod();
+  std::fill_n(imt_data, arraysize(imt_data), unimplemented_method);
+  if (klass->GetIfTable() != nullptr) {
+    bool new_conflict = false;
+    FillIMTFromIfTable(klass->GetIfTable(),
+                       unimplemented_method,
+                       conflict_method,
+                       klass,
+                       /*create_conflict_tables*/true,
+                       /*ignore_copied_methods*/false,
+                       &new_conflict,
+                       &imt_data[0]);
+  }
+  if (!klass->ShouldHaveImt()) {
+    return;
+  }
+  // Compare the IMT with the super class including the conflict methods. If they are equivalent,
+  // we can just use the same pointer.
+  ImTable* imt = nullptr;
+  mirror::Class* super_class = klass->GetSuperClass();
+  if (super_class != nullptr && super_class->ShouldHaveImt()) {
+    ImTable* super_imt = super_class->GetImt(image_pointer_size_);
+    bool same = true;
+    for (size_t i = 0; same && i < ImTable::kSize; ++i) {
+      ArtMethod* method = imt_data[i];
+      ArtMethod* super_method = super_imt->Get(i, image_pointer_size_);
+      if (method != super_method) {
+        bool is_conflict_table = method->IsRuntimeMethod() &&
+                                 method != unimplemented_method &&
+                                 method != conflict_method;
+        // Verify conflict contents.
+        bool super_conflict_table = super_method->IsRuntimeMethod() &&
+                                    super_method != unimplemented_method &&
+                                    super_method != conflict_method;
+        if (!is_conflict_table || !super_conflict_table) {
+          same = false;
+        } else {
+          ImtConflictTable* table1 = method->GetImtConflictTable(image_pointer_size_);
+          ImtConflictTable* table2 = super_method->GetImtConflictTable(image_pointer_size_);
+          same = same && table1->Equals(table2, image_pointer_size_);
+        }
+      }
+    }
+    if (same) {
+      imt = super_imt;
+    }
+  }
+  if (imt == nullptr) {
+    imt = klass->GetImt(image_pointer_size_);
+    DCHECK(imt != nullptr);
+    imt->Populate(imt_data, image_pointer_size_);
+  } else {
+    klass->SetImt(imt, image_pointer_size_);
+  }
+}
+
+ImtConflictTable* ClassLinker::CreateImtConflictTable(size_t count,
+                                                      LinearAlloc* linear_alloc,
+                                                      PointerSize image_pointer_size) {
+  void* data = linear_alloc->Alloc(Thread::Current(),
+                                   ImtConflictTable::ComputeSize(count,
+                                                                 image_pointer_size));
+  return (data != nullptr) ? new (data) ImtConflictTable(count, image_pointer_size) : nullptr;
+}
+
+ImtConflictTable* ClassLinker::CreateImtConflictTable(size_t count, LinearAlloc* linear_alloc) {
+  return CreateImtConflictTable(count, linear_alloc, image_pointer_size_);
+}
+
+void ClassLinker::FillIMTFromIfTable(mirror::IfTable* if_table,
+                                     ArtMethod* unimplemented_method,
+                                     ArtMethod* imt_conflict_method,
+                                     mirror::Class* klass,
+                                     bool create_conflict_tables,
+                                     bool ignore_copied_methods,
+                                     /*out*/bool* new_conflict,
+                                     /*out*/ArtMethod** imt) {
+  uint32_t conflict_counts[ImTable::kSize] = {};
+  for (size_t i = 0, length = if_table->Count(); i < length; ++i) {
+    mirror::Class* interface = if_table->GetInterface(i);
+    const size_t num_virtuals = interface->NumVirtualMethods();
+    const size_t method_array_count = if_table->GetMethodArrayCount(i);
+    // Virtual methods can be larger than the if table methods if there are default methods.
+    DCHECK_GE(num_virtuals, method_array_count);
+    if (kIsDebugBuild) {
+      if (klass->IsInterface()) {
+        DCHECK_EQ(method_array_count, 0u);
+      } else {
+        DCHECK_EQ(interface->NumDeclaredVirtualMethods(), method_array_count);
+      }
+    }
+    if (method_array_count == 0) {
+      continue;
+    }
+    auto* method_array = if_table->GetMethodArray(i);
+    for (size_t j = 0; j < method_array_count; ++j) {
+      ArtMethod* implementation_method =
+          method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
+      if (ignore_copied_methods && implementation_method->IsCopied()) {
+        continue;
+      }
+      DCHECK(implementation_method != nullptr);
+      // Miranda methods cannot be used to implement an interface method, but they are safe to put
+      // in the IMT since their entrypoint is the interface trampoline. If we put any copied methods
+      // or interface methods in the IMT here they will not create extra conflicts since we compare
+      // names and signatures in SetIMTRef.
+      ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
+      const uint32_t imt_index = interface_method->GetImtIndex();
+
+      // There is only any conflicts if all of the interface methods for an IMT slot don't have
+      // the same implementation method, keep track of this to avoid creating a conflict table in
+      // this case.
+
+      // Conflict table size for each IMT slot.
+      ++conflict_counts[imt_index];
+
+      SetIMTRef(unimplemented_method,
+                imt_conflict_method,
+                implementation_method,
+                /*out*/new_conflict,
+                /*out*/&imt[imt_index]);
+    }
+  }
+
+  if (create_conflict_tables) {
+    // Create the conflict tables.
+    LinearAlloc* linear_alloc = GetAllocatorForClassLoader(klass->GetClassLoader());
+    for (size_t i = 0; i < ImTable::kSize; ++i) {
+      size_t conflicts = conflict_counts[i];
+      if (imt[i] == imt_conflict_method) {
+        ImtConflictTable* new_table = CreateImtConflictTable(conflicts, linear_alloc);
+        if (new_table != nullptr) {
+          ArtMethod* new_conflict_method =
+              Runtime::Current()->CreateImtConflictMethod(linear_alloc);
+          new_conflict_method->SetImtConflictTable(new_table, image_pointer_size_);
+          imt[i] = new_conflict_method;
+        } else {
+          LOG(ERROR) << "Failed to allocate conflict table";
+          imt[i] = imt_conflict_method;
+        }
+      } else {
+        DCHECK_NE(imt[i], imt_conflict_method);
+      }
+    }
+
+    for (size_t i = 0, length = if_table->Count(); i < length; ++i) {
+      mirror::Class* interface = if_table->GetInterface(i);
+      const size_t method_array_count = if_table->GetMethodArrayCount(i);
+      // Virtual methods can be larger than the if table methods if there are default methods.
+      if (method_array_count == 0) {
+        continue;
+      }
+      auto* method_array = if_table->GetMethodArray(i);
+      for (size_t j = 0; j < method_array_count; ++j) {
+        ArtMethod* implementation_method =
+            method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
+        if (ignore_copied_methods && implementation_method->IsCopied()) {
+          continue;
+        }
+        DCHECK(implementation_method != nullptr);
+        ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
+        const uint32_t imt_index = interface_method->GetImtIndex();
+        if (!imt[imt_index]->IsRuntimeMethod() ||
+            imt[imt_index] == unimplemented_method ||
+            imt[imt_index] == imt_conflict_method) {
+          continue;
+        }
+        ImtConflictTable* table = imt[imt_index]->GetImtConflictTable(image_pointer_size_);
+        const size_t num_entries = table->NumEntries(image_pointer_size_);
+        table->SetInterfaceMethod(num_entries, image_pointer_size_, interface_method);
+        table->SetImplementationMethod(num_entries, image_pointer_size_, implementation_method);
+      }
     }
   }
 }
@@ -5054,14 +6532,81 @@
   return true;
 }
 
+// Finds the method with a name/signature that matches cmp in the given list of methods. The list of
+// methods must be unique.
+static ArtMethod* FindSameNameAndSignature(MethodNameAndSignatureComparator& cmp,
+                                           const ScopedArenaVector<ArtMethod*>& list)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  for (ArtMethod* method : list) {
+    if (cmp.HasSameNameAndSignature(method)) {
+      return method;
+    }
+  }
+  return nullptr;
+}
+
+static void SanityCheckVTable(Handle<mirror::Class> klass, PointerSize pointer_size)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::PointerArray* check_vtable = klass->GetVTableDuringLinking();
+  mirror::Class* superclass = (klass->HasSuperClass()) ? klass->GetSuperClass() : nullptr;
+  int32_t super_vtable_length = (superclass != nullptr) ? superclass->GetVTableLength() : 0;
+  for (int32_t i = 0; i < check_vtable->GetLength(); ++i) {
+    ArtMethod* m = check_vtable->GetElementPtrSize<ArtMethod*>(i, pointer_size);
+    CHECK(m != nullptr);
+
+    ArraySlice<ArtMethod> virtuals = klass->GetVirtualMethodsSliceUnchecked(pointer_size);
+    auto is_same_method = [m] (const ArtMethod& meth) {
+      return &meth == m;
+    };
+    CHECK((super_vtable_length > i && superclass->GetVTableEntry(i, pointer_size) == m) ||
+          std::find_if(virtuals.begin(), virtuals.end(), is_same_method) != virtuals.end())
+        << "While linking class '" << PrettyClass(klass.Get()) << "' unable to find owning class "
+        << "of '" << PrettyMethod(m) << "' (vtable index: " << i << ").";
+  }
+}
+
+void ClassLinker::FillImtFromSuperClass(Handle<mirror::Class> klass,
+                                        ArtMethod* unimplemented_method,
+                                        ArtMethod* imt_conflict_method,
+                                        bool* new_conflict,
+                                        ArtMethod** imt) {
+  DCHECK(klass->HasSuperClass());
+  mirror::Class* super_class = klass->GetSuperClass();
+  if (super_class->ShouldHaveImt()) {
+    ImTable* super_imt = super_class->GetImt(image_pointer_size_);
+    for (size_t i = 0; i < ImTable::kSize; ++i) {
+      imt[i] = super_imt->Get(i, image_pointer_size_);
+    }
+  } else {
+    // No imt in the super class, need to reconstruct from the iftable.
+    mirror::IfTable* if_table = super_class->GetIfTable();
+    if (if_table != nullptr) {
+      // Ignore copied methods since we will handle these in LinkInterfaceMethods.
+      FillIMTFromIfTable(if_table,
+                         unimplemented_method,
+                         imt_conflict_method,
+                         klass.Get(),
+                         /*create_conflict_table*/false,
+                         /*ignore_copied_methods*/true,
+                         /*out*/new_conflict,
+                         /*out*/imt);
+    }
+  }
+}
+
+// TODO This method needs to be split up into several smaller methods.
 bool ClassLinker::LinkInterfaceMethods(
     Thread* self,
     Handle<mirror::Class> klass,
-    const std::unordered_map<size_t, ArtMethod*>& default_translations,
+    const std::unordered_map<size_t, ClassLinker::MethodTranslation>& default_translations,
+    bool* out_new_conflict,
     ArtMethod** out_imt) {
   StackHandleScope<3> hs(self);
   Runtime* const runtime = Runtime::Current();
+
+  const bool is_interface = klass->IsInterface();
   const bool has_superclass = klass->HasSuperClass();
+  const bool fill_tables = !is_interface;
   const size_t super_ifcount = has_superclass ? klass->GetSuperClass()->GetIfTableCount() : 0U;
   const size_t method_alignment = ArtMethod::Alignment(image_pointer_size_);
   const size_t method_size = ArtMethod::Size(image_pointer_size_);
@@ -5069,10 +6614,6 @@
 
   MutableHandle<mirror::IfTable> iftable(hs.NewHandle(klass->GetIfTable()));
 
-  // If we're an interface, we don't need the vtable pointers, so we're done.
-  if (klass->IsInterface()) {
-    return true;
-  }
   // These are allocated on the heap to begin, we then transfer to linear alloc when we re-create
   // the virtual methods array.
   // Need to use low 4GB arenas for compiler or else the pointers wont fit in 32 bit method array
@@ -5080,111 +6621,107 @@
   // Use the linear alloc pool since this one is in the low 4gb for the compiler.
   ArenaStack stack(runtime->GetLinearAlloc()->GetArenaPool());
   ScopedArenaAllocator allocator(&stack);
+
+  ScopedArenaVector<ArtMethod*> default_conflict_methods(allocator.Adapter());
   ScopedArenaVector<ArtMethod*> miranda_methods(allocator.Adapter());
   ScopedArenaVector<ArtMethod*> default_methods(allocator.Adapter());
 
   MutableHandle<mirror::PointerArray> vtable(hs.NewHandle(klass->GetVTableDuringLinking()));
   ArtMethod* const unimplemented_method = runtime->GetImtUnimplementedMethod();
-  ArtMethod* const conflict_method = runtime->GetImtConflictMethod();
+  ArtMethod* const imt_conflict_method = runtime->GetImtConflictMethod();
   // Copy the IMT from the super class if possible.
-  bool extend_super_iftable = false;
-  if (has_superclass) {
-    mirror::Class* super_class = klass->GetSuperClass();
-    extend_super_iftable = true;
-    if (super_class->ShouldHaveEmbeddedImtAndVTable()) {
-      for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-        out_imt[i] = super_class->GetEmbeddedImTableEntry(i, image_pointer_size_);
-      }
-    } else {
-      // No imt in the super class, need to reconstruct from the iftable.
-      mirror::IfTable* if_table = super_class->GetIfTable();
-      const size_t length = super_class->GetIfTableCount();
-      for (size_t i = 0; i < length; ++i) {
-        mirror::Class* interface = iftable->GetInterface(i);
-        const size_t num_virtuals = interface->NumVirtualMethods();
-        const size_t method_array_count = if_table->GetMethodArrayCount(i);
-        DCHECK_EQ(num_virtuals, method_array_count);
-        if (method_array_count == 0) {
-          continue;
-        }
-        auto* method_array = if_table->GetMethodArray(i);
-        for (size_t j = 0; j < num_virtuals; ++j) {
-          auto method = method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
-          DCHECK(method != nullptr) << PrettyClass(super_class);
-          // Miranda methods cannot be used to implement an interface method and defaults should be
-          // skipped in case we override it.
-          if (method->IsDefault() || method->IsMiranda()) {
-            continue;
-          }
-          ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
-          uint32_t imt_index = interface_method->GetDexMethodIndex() % mirror::Class::kImtSize;
-          auto** imt_ref = &out_imt[imt_index];
-          if (*imt_ref == unimplemented_method) {
-            *imt_ref = method;
-          } else if (*imt_ref != conflict_method) {
-            *imt_ref = conflict_method;
-          }
-        }
-      }
-    }
+  const bool extend_super_iftable = has_superclass;
+  if (has_superclass && fill_tables) {
+    FillImtFromSuperClass(klass,
+                          unimplemented_method,
+                          imt_conflict_method,
+                          out_new_conflict,
+                          out_imt);
   }
   // Allocate method arrays before since we don't want miss visiting miranda method roots due to
   // thread suspension.
-  for (size_t i = 0; i < ifcount; ++i) {
-    size_t num_methods = iftable->GetInterface(i)->NumVirtualMethods();
-    if (num_methods > 0) {
-      const bool is_super = i < super_ifcount;
-      // This is an interface implemented by a super-class. Therefore we can just copy the method
-      // array from the superclass.
-      const bool super_interface = is_super && extend_super_iftable;
-      mirror::PointerArray* method_array;
-      if (super_interface) {
-        mirror::IfTable* if_table = klass->GetSuperClass()->GetIfTable();
-        DCHECK(if_table != nullptr);
-        DCHECK(if_table->GetMethodArray(i) != nullptr);
-        // If we are working on a super interface, try extending the existing method array.
-        method_array = down_cast<mirror::PointerArray*>(if_table->GetMethodArray(i)->Clone(self));
-      } else {
-        method_array = AllocPointerArray(self, num_methods);
+  if (fill_tables) {
+    for (size_t i = 0; i < ifcount; ++i) {
+      size_t num_methods = iftable->GetInterface(i)->NumDeclaredVirtualMethods();
+      if (num_methods > 0) {
+        const bool is_super = i < super_ifcount;
+        // This is an interface implemented by a super-class. Therefore we can just copy the method
+        // array from the superclass.
+        const bool super_interface = is_super && extend_super_iftable;
+        mirror::PointerArray* method_array;
+        if (super_interface) {
+          mirror::IfTable* if_table = klass->GetSuperClass()->GetIfTable();
+          DCHECK(if_table != nullptr);
+          DCHECK(if_table->GetMethodArray(i) != nullptr);
+          // If we are working on a super interface, try extending the existing method array.
+          method_array = down_cast<mirror::PointerArray*>(if_table->GetMethodArray(i)->Clone(self));
+        } else {
+          method_array = AllocPointerArray(self, num_methods);
+        }
+        if (UNLIKELY(method_array == nullptr)) {
+          self->AssertPendingOOMException();
+          return false;
+        }
+        iftable->SetMethodArray(i, method_array);
       }
-      if (UNLIKELY(method_array == nullptr)) {
-        self->AssertPendingOOMException();
-        return false;
-      }
-      iftable->SetMethodArray(i, method_array);
     }
   }
 
   auto* old_cause = self->StartAssertNoThreadSuspension(
       "Copying ArtMethods for LinkInterfaceMethods");
-  for (size_t i = 0; i < ifcount; ++i) {
-    size_t num_methods = iftable->GetInterface(i)->NumVirtualMethods();
+  // Going in reverse to ensure that we will hit abstract methods that override defaults before the
+  // defaults. This means we don't need to do any trickery when creating the Miranda methods, since
+  // they will already be null. This has the additional benefit that the declarer of a miranda
+  // method will actually declare an abstract method.
+  for (size_t i = ifcount; i != 0; ) {
+    --i;
+
+    DCHECK_GE(i, 0u);
+    DCHECK_LT(i, ifcount);
+
+    size_t num_methods = iftable->GetInterface(i)->NumDeclaredVirtualMethods();
     if (num_methods > 0) {
       StackHandleScope<2> hs2(self);
       const bool is_super = i < super_ifcount;
       const bool super_interface = is_super && extend_super_iftable;
-      auto method_array(hs2.NewHandle(iftable->GetMethodArray(i)));
+      // We don't actually create or fill these tables for interfaces, we just copy some methods for
+      // conflict methods. Just set this as nullptr in those cases.
+      Handle<mirror::PointerArray> method_array(fill_tables
+                                                ? hs2.NewHandle(iftable->GetMethodArray(i))
+                                                : hs2.NewHandle<mirror::PointerArray>(nullptr));
 
-      LengthPrefixedArray<ArtMethod>* input_virtual_methods = nullptr;
-      Handle<mirror::PointerArray> input_vtable_array = NullHandle<mirror::PointerArray>();
+      ArraySlice<ArtMethod> input_virtual_methods;
+      ScopedNullHandle<mirror::PointerArray> null_handle;
+      Handle<mirror::PointerArray> input_vtable_array(null_handle);
       int32_t input_array_length = 0;
-      if (super_interface) {
-        // We are overwriting a super class interface, try to only virtual methods instead of the
+
+      // TODO Cleanup Needed: In the presence of default methods this optimization is rather dirty
+      //      and confusing. Default methods should always look through all the superclasses
+      //      because they are the last choice of an implementation. We get around this by looking
+      //      at the super-classes iftable methods (copied into method_array previously) when we are
+      //      looking for the implementation of a super-interface method but that is rather dirty.
+      bool using_virtuals;
+      if (super_interface || is_interface) {
+        // If we are overwriting a super class interface, try to only virtual methods instead of the
         // whole vtable.
-        input_virtual_methods = klass->GetVirtualMethodsPtr();
-        input_array_length = klass->NumVirtualMethods();
+        using_virtuals = true;
+        input_virtual_methods = klass->GetDeclaredMethodsSlice(image_pointer_size_);
+        input_array_length = input_virtual_methods.size();
       } else {
-        // A new interface, we need the whole vtable in case a new interface method is implemented
-        // in the whole superclass.
+        // For a new interface, however, we need the whole vtable in case a new
+        // interface method is implemented in the whole superclass.
+        using_virtuals = false;
+        DCHECK(vtable.Get() != nullptr);
         input_vtable_array = vtable;
         input_array_length = input_vtable_array->GetLength();
       }
+
       // For each method in interface
       for (size_t j = 0; j < num_methods; ++j) {
         auto* interface_method = iftable->GetInterface(i)->GetVirtualMethod(j, image_pointer_size_);
         MethodNameAndSignatureComparator interface_name_comparator(
             interface_method->GetInterfaceMethodIfProxy(image_pointer_size_));
-        uint32_t imt_index = interface_method->GetDexMethodIndex() % mirror::Class::kImtSize;
+        uint32_t imt_index = interface_method->GetImtIndex();
         ArtMethod** imt_ptr = &out_imt[imt_index];
         // For each method listed in the interface's method list, find the
         // matching method in our class's method list.  We want to favor the
@@ -5197,11 +6734,10 @@
         //
         // To find defaults we need to do the same but also go over interfaces.
         bool found_impl = false;
-        ArtMethod* default_impl = nullptr;
-        bool found_default_impl = false;
+        ArtMethod* vtable_impl = nullptr;
         for (int32_t k = input_array_length - 1; k >= 0; --k) {
-          ArtMethod* vtable_method = input_virtual_methods != nullptr ?
-              &input_virtual_methods->At(k, method_size, method_alignment) :
+          ArtMethod* vtable_method = using_virtuals ?
+              &input_virtual_methods[k] :
               input_vtable_array->GetElementPtrSize<ArtMethod*>(k, image_pointer_size_);
           ArtMethod* vtable_method_for_name_comparison =
               vtable_method->GetInterfaceMethodIfProxy(image_pointer_size_);
@@ -5215,119 +6751,215 @@
                   "Method '%s' implementing interface method '%s' is not public",
                   PrettyMethod(vtable_method).c_str(), PrettyMethod(interface_method).c_str());
               return false;
-            } else if (vtable_method->IsDefault()) {
+            } else if (UNLIKELY(vtable_method->IsOverridableByDefaultMethod())) {
               // We might have a newer, better, default method for this, so we just skip it. If we
               // are still using this we will select it again when scanning for default methods. To
               // obviate the need to copy the method again we will make a note that we already found
               // a default here.
               // TODO This should be much cleaner.
-              found_default_impl = true;
-              default_impl = vtable_method;
+              vtable_impl = vtable_method;
               break;
             } else {
               found_impl = true;
-            }
-            method_array->SetElementPtrSize(j, vtable_method, image_pointer_size_);
-            // Place method in imt if entry is empty, place conflict otherwise.
-            SetIMTRef(unimplemented_method,
-                      conflict_method,
-                      image_pointer_size_,
-                      vtable_method,
-                      /*out*/imt_ptr);
-            break;
-          }
-        }
-        // We should only search for default implementations when the class does not implement the
-        // method directly and either (1) the interface is newly implemented on this class and not
-        // on any of its superclasses, (2) the superclass's implementation is a default method, or
-        // (3) the superclass does not have an implementation.
-        if (!found_impl && (!super_interface ||
-                            method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_)
-                                ->IsOverridableByDefaultMethod())) {
-          ArtMethod* current_method = nullptr;
-          std::string icce_message;
-          if (!FindDefaultMethodImplementation(self,
-                                               interface_method,
-                                               klass,
-                                               /*out*/&current_method,
-                                               /*out*/&icce_message)) {
-            // There was a conflict with default method implementations.
-            self->EndAssertNoThreadSuspension(old_cause);
-            // TODO This should actually be thrown when we attempt to invoke this method.
-            ThrowIncompatibleClassChangeError(klass.Get(), "%s", icce_message.c_str());
-            return false;
-          } else if (current_method != nullptr) {
-            if (found_default_impl &&
-                current_method->GetDeclaringClass() == default_impl->GetDeclaringClass()) {
-              // We found a default method but it was the same one we already have from our
-              // superclass. Don't bother adding it to our vtable again.
-              current_method = default_impl;
-            } else {
-              // We found a default method implementation and there were no conflicts.
-              // Save the default method. We need to add it to the vtable.
-              default_methods.push_back(current_method);
-            }
-            method_array->SetElementPtrSize(j, current_method, image_pointer_size_);
-            SetIMTRef(unimplemented_method,
-                      conflict_method,
-                      image_pointer_size_,
-                      current_method,
-                      /*out*/imt_ptr);
-            found_impl = true;
-          }
-        }
-        if (!found_impl && !super_interface) {
-          // It is defined in this class or any of its subclasses.
-          ArtMethod* miranda_method = nullptr;
-          for (auto& mir_method : miranda_methods) {
-            if (interface_name_comparator.HasSameNameAndSignature(mir_method)) {
-              miranda_method = mir_method;
+              if (LIKELY(fill_tables)) {
+                method_array->SetElementPtrSize(j, vtable_method, image_pointer_size_);
+                // Place method in imt if entry is empty, place conflict otherwise.
+                SetIMTRef(unimplemented_method,
+                          imt_conflict_method,
+                          vtable_method,
+                          /*out*/out_new_conflict,
+                          /*out*/imt_ptr);
+              }
               break;
             }
           }
-          if (miranda_method == nullptr) {
-            miranda_method = reinterpret_cast<ArtMethod*>(allocator.Alloc(method_size));
-            CHECK(miranda_method != nullptr);
-            // Point the interface table at a phantom slot.
-            new(miranda_method) ArtMethod(*interface_method, image_pointer_size_);
-            miranda_methods.push_back(miranda_method);
-          }
-          method_array->SetElementPtrSize(j, miranda_method, image_pointer_size_);
         }
-      }
-    }
-  }
-  if (!miranda_methods.empty() || !default_methods.empty()) {
-    const size_t old_method_count = klass->NumVirtualMethods();
-    const size_t new_method_count =
-        old_method_count + miranda_methods.size() + default_methods.size();
+        // Continue on to the next method if we are done.
+        if (LIKELY(found_impl)) {
+          continue;
+        } else if (LIKELY(super_interface)) {
+          // Don't look for a default implementation when the super-method is implemented directly
+          // by the class.
+          //
+          // See if we can use the superclasses method and skip searching everything else.
+          // Note: !found_impl && super_interface
+          CHECK(extend_super_iftable);
+          // If this is a super_interface method it is possible we shouldn't override it because a
+          // superclass could have implemented it directly.  We get the method the superclass used
+          // to implement this to know if we can override it with a default method. Doing this is
+          // safe since we know that the super_iftable is filled in so we can simply pull it from
+          // there. We don't bother if this is not a super-classes interface since in that case we
+          // have scanned the entire vtable anyway and would have found it.
+          // TODO This is rather dirty but it is faster than searching through the entire vtable
+          //      every time.
+          ArtMethod* supers_method =
+              method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
+          DCHECK(supers_method != nullptr);
+          DCHECK(interface_name_comparator.HasSameNameAndSignature(supers_method));
+          if (LIKELY(!supers_method->IsOverridableByDefaultMethod())) {
+            // The method is not overridable by a default method (i.e. it is directly implemented
+            // in some class). Therefore move onto the next interface method.
+            continue;
+          } else {
+            // If the super-classes method is override-able by a default method we need to keep
+            // track of it since though it is override-able it is not guaranteed to be 'overridden'.
+            // If it turns out not to be overridden and we did not keep track of it we might add it
+            // to the vtable twice, causing corruption (vtable entries having inconsistent and
+            // illegal states, incorrect vtable size, and incorrect or inconsistent iftable entries)
+            // in this class and any subclasses.
+            DCHECK(vtable_impl == nullptr || vtable_impl == supers_method)
+                << "vtable_impl was " << PrettyMethod(vtable_impl) << " and not 'nullptr' or "
+                << PrettyMethod(supers_method) << " as expected. IFTable appears to be corrupt!";
+            vtable_impl = supers_method;
+          }
+        }
+        // If we haven't found it yet we should search through the interfaces for default methods.
+        ArtMethod* current_method = nullptr;
+        switch (FindDefaultMethodImplementation(self,
+                                                interface_method,
+                                                klass,
+                                                /*out*/&current_method)) {
+          case DefaultMethodSearchResult::kDefaultConflict: {
+            // Default method conflict.
+            DCHECK(current_method == nullptr);
+            ArtMethod* default_conflict_method = nullptr;
+            if (vtable_impl != nullptr && vtable_impl->IsDefaultConflicting()) {
+              // We can reuse the method from the superclass, don't bother adding it to virtuals.
+              default_conflict_method = vtable_impl;
+            } else {
+              // See if we already have a conflict method for this method.
+              ArtMethod* preexisting_conflict = FindSameNameAndSignature(interface_name_comparator,
+                                                                         default_conflict_methods);
+              if (LIKELY(preexisting_conflict != nullptr)) {
+                // We already have another conflict we can reuse.
+                default_conflict_method = preexisting_conflict;
+              } else {
+                // Note that we do this even if we are an interface since we need to create this and
+                // cannot reuse another classes.
+                // Create a new conflict method for this to use.
+                default_conflict_method =
+                    reinterpret_cast<ArtMethod*>(allocator.Alloc(method_size));
+                new(default_conflict_method) ArtMethod(interface_method, image_pointer_size_);
+                default_conflict_methods.push_back(default_conflict_method);
+              }
+            }
+            current_method = default_conflict_method;
+            break;
+          }  // case kDefaultConflict
+          case DefaultMethodSearchResult::kDefaultFound: {
+            DCHECK(current_method != nullptr);
+            // Found a default method.
+            if (vtable_impl != nullptr &&
+                current_method->GetDeclaringClass() == vtable_impl->GetDeclaringClass()) {
+              // We found a default method but it was the same one we already have from our
+              // superclass. Don't bother adding it to our vtable again.
+              current_method = vtable_impl;
+            } else if (LIKELY(fill_tables)) {
+              // Interfaces don't need to copy default methods since they don't have vtables.
+              // Only record this default method if it is new to save space.
+              // TODO It might be worthwhile to copy default methods on interfaces anyway since it
+              //      would make lookup for interface super much faster. (We would only need to scan
+              //      the iftable to find if there is a NSME or AME.)
+              ArtMethod* old = FindSameNameAndSignature(interface_name_comparator, default_methods);
+              if (old == nullptr) {
+                // We found a default method implementation and there were no conflicts.
+                // Save the default method. We need to add it to the vtable.
+                default_methods.push_back(current_method);
+              } else {
+                CHECK(old == current_method) << "Multiple default implementations selected!";
+              }
+            }
+            break;
+          }  // case kDefaultFound
+          case DefaultMethodSearchResult::kAbstractFound: {
+            DCHECK(current_method == nullptr);
+            // Abstract method masks all defaults.
+            if (vtable_impl != nullptr &&
+                vtable_impl->IsAbstract() &&
+                !vtable_impl->IsDefaultConflicting()) {
+              // We need to make this an abstract method but the version in the vtable already is so
+              // don't do anything.
+              current_method = vtable_impl;
+            }
+            break;
+          }  // case kAbstractFound
+        }
+        if (LIKELY(fill_tables)) {
+          if (current_method == nullptr && !super_interface) {
+            // We could not find an implementation for this method and since it is a brand new
+            // interface we searched the entire vtable (and all default methods) for an
+            // implementation but couldn't find one. We therefore need to make a miranda method.
+            //
+            // Find out if there is already a miranda method we can use.
+            ArtMethod* miranda_method = FindSameNameAndSignature(interface_name_comparator,
+                                                                 miranda_methods);
+            if (miranda_method == nullptr) {
+              DCHECK(interface_method->IsAbstract()) << PrettyMethod(interface_method);
+              miranda_method = reinterpret_cast<ArtMethod*>(allocator.Alloc(method_size));
+              CHECK(miranda_method != nullptr);
+              // Point the interface table at a phantom slot.
+              new(miranda_method) ArtMethod(interface_method, image_pointer_size_);
+              miranda_methods.push_back(miranda_method);
+            }
+            current_method = miranda_method;
+          }
+
+          if (current_method != nullptr) {
+            // We found a default method implementation. Record it in the iftable and IMT.
+            method_array->SetElementPtrSize(j, current_method, image_pointer_size_);
+            SetIMTRef(unimplemented_method,
+                      imt_conflict_method,
+                      current_method,
+                      /*out*/out_new_conflict,
+                      /*out*/imt_ptr);
+          }
+        }
+      }  // For each method in interface end.
+    }  // if (num_methods > 0)
+  }  // For each interface.
+  const bool has_new_virtuals = !(miranda_methods.empty() &&
+                                  default_methods.empty() &&
+                                  default_conflict_methods.empty());
+  // TODO don't extend virtuals of interface unless necessary (when is it?).
+  if (has_new_virtuals) {
+    DCHECK(!is_interface || (default_methods.empty() && miranda_methods.empty()))
+        << "Interfaces should only have default-conflict methods appended to them.";
+    VLOG(class_linker) << PrettyClass(klass.Get()) << ": miranda_methods=" << miranda_methods.size()
+                       << " default_methods=" << default_methods.size()
+                       << " default_conflict_methods=" << default_conflict_methods.size();
+    const size_t old_method_count = klass->NumMethods();
+    const size_t new_method_count = old_method_count +
+                                    miranda_methods.size() +
+                                    default_methods.size() +
+                                    default_conflict_methods.size();
     // Attempt to realloc to save RAM if possible.
-    LengthPrefixedArray<ArtMethod>* old_virtuals = klass->GetVirtualMethodsPtr();
-    // The Realloced virtual methods aren't visiblef from the class roots, so there is no issue
+    LengthPrefixedArray<ArtMethod>* old_methods = klass->GetMethodsPtr();
+    // The Realloced virtual methods aren't visible from the class roots, so there is no issue
     // where GCs could attempt to mark stale pointers due to memcpy. And since we overwrite the
     // realloced memory with out->CopyFrom, we are guaranteed to have objects in the to space since
     // CopyFrom has internal read barriers.
-    const size_t old_size = old_virtuals != nullptr
-        ? LengthPrefixedArray<ArtMethod>::ComputeSize(old_method_count,
-                                                      method_size,
-                                                      method_alignment)
-        : 0u;
+    //
+    // TODO We should maybe move some of this into mirror::Class or at least into another method.
+    const size_t old_size = LengthPrefixedArray<ArtMethod>::ComputeSize(old_method_count,
+                                                                        method_size,
+                                                                        method_alignment);
     const size_t new_size = LengthPrefixedArray<ArtMethod>::ComputeSize(new_method_count,
                                                                         method_size,
                                                                         method_alignment);
-    auto* virtuals = reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
-        runtime->GetLinearAlloc()->Realloc(self, old_virtuals, old_size, new_size));
-    if (UNLIKELY(virtuals == nullptr)) {
+    const size_t old_methods_ptr_size = (old_methods != nullptr) ? old_size : 0;
+    auto* methods = reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
+        runtime->GetLinearAlloc()->Realloc(self, old_methods, old_methods_ptr_size, new_size));
+    if (UNLIKELY(methods == nullptr)) {
       self->AssertPendingOOMException();
       self->EndAssertNoThreadSuspension(old_cause);
       return false;
     }
     ScopedArenaUnorderedMap<ArtMethod*, ArtMethod*> move_table(allocator.Adapter());
-    if (virtuals != old_virtuals) {
+    if (methods != old_methods) {
       // Maps from heap allocated miranda method to linear alloc miranda method.
-      StrideIterator<ArtMethod> out = virtuals->Begin(method_size, method_alignment);
-      // Copy over the old methods + miranda methods.
-      for (auto& m : klass->GetVirtualMethods(image_pointer_size_)) {
+      StrideIterator<ArtMethod> out = methods->begin(method_size, method_alignment);
+      // Copy over the old methods.
+      for (auto& m : klass->GetMethods(image_pointer_size_)) {
         move_table.emplace(&m, &*out);
         // The CopyFrom is only necessary to not miss read barriers since Realloc won't do read
         // barriers when it copies.
@@ -5335,22 +6967,20 @@
         ++out;
       }
     }
-    StrideIterator<ArtMethod> out(virtuals->Begin(method_size, method_alignment)
-                                      + old_method_count);
+    StrideIterator<ArtMethod> out(methods->begin(method_size, method_alignment) + old_method_count);
     // Copy over miranda methods before copying vtable since CopyOf may cause thread suspension and
     // we want the roots of the miranda methods to get visited.
     for (ArtMethod* mir_method : miranda_methods) {
       ArtMethod& new_method = *out;
       new_method.CopyFrom(mir_method, image_pointer_size_);
-      new_method.SetAccessFlags(new_method.GetAccessFlags() | kAccMiranda);
+      new_method.SetAccessFlags(new_method.GetAccessFlags() | kAccMiranda | kAccCopied);
       DCHECK_NE(new_method.GetAccessFlags() & kAccAbstract, 0u)
           << "Miranda method should be abstract!";
       move_table.emplace(mir_method, &new_method);
       ++out;
     }
-    // We need to copy the default methods into our own virtual method table since the runtime
-    // requires that every method on a class's vtable be in that respective class's virtual method
-    // table.
+    // We need to copy the default methods into our own method table since the runtime requires that
+    // every method on a class's vtable be in that respective class's virtual method table.
     // NOTE This means that two classes might have the same implementation of a method from the same
     // interface but will have different ArtMethod*s for them. This also means we cannot compare a
     // default method found on a class with one found on the declaring interface directly and must
@@ -5358,112 +6988,167 @@
     for (ArtMethod* def_method : default_methods) {
       ArtMethod& new_method = *out;
       new_method.CopyFrom(def_method, image_pointer_size_);
-      new_method.SetAccessFlags(new_method.GetAccessFlags() | kAccDefault);
-      // Clear the preverified flag if it is present. Since this class hasn't been verified yet it
-      // shouldn't have methods that are preverified.
+      // Clear the kAccSkipAccessChecks flag if it is present. Since this class hasn't been verified
+      // yet it shouldn't have methods that are skipping access checks.
       // TODO This is rather arbitrary. We should maybe support classes where only some of its
-      // methods are preverified.
-      new_method.SetAccessFlags(new_method.GetAccessFlags() & ~kAccPreverified);
+      // methods are skip_access_checks.
+      constexpr uint32_t kSetFlags = kAccDefault | kAccCopied;
+      constexpr uint32_t kMaskFlags = ~kAccSkipAccessChecks;
+      new_method.SetAccessFlags((new_method.GetAccessFlags() | kSetFlags) & kMaskFlags);
       move_table.emplace(def_method, &new_method);
       ++out;
     }
-    virtuals->SetLength(new_method_count);
-    UpdateClassVirtualMethods(klass.Get(), virtuals);
+    for (ArtMethod* conf_method : default_conflict_methods) {
+      ArtMethod& new_method = *out;
+      new_method.CopyFrom(conf_method, image_pointer_size_);
+      // This is a type of default method (there are default method impls, just a conflict) so mark
+      // this as a default, non-abstract method, since thats what it is. Also clear the
+      // kAccSkipAccessChecks bit since this class hasn't been verified yet it shouldn't have
+      // methods that are skipping access checks.
+      constexpr uint32_t kSetFlags = kAccDefault | kAccDefaultConflict | kAccCopied;
+      constexpr uint32_t kMaskFlags = ~(kAccAbstract | kAccSkipAccessChecks);
+      new_method.SetAccessFlags((new_method.GetAccessFlags() | kSetFlags) & kMaskFlags);
+      DCHECK(new_method.IsDefaultConflicting());
+      // The actual method might or might not be marked abstract since we just copied it from a
+      // (possibly default) interface method. We need to set it entry point to be the bridge so that
+      // the compiler will not invoke the implementation of whatever method we copied from.
+      EnsureThrowsInvocationError(&new_method);
+      move_table.emplace(conf_method, &new_method);
+      ++out;
+    }
+    methods->SetSize(new_method_count);
+    UpdateClassMethods(klass.Get(), methods);
     // Done copying methods, they are all roots in the class now, so we can end the no thread
     // suspension assert.
     self->EndAssertNoThreadSuspension(old_cause);
 
-    const size_t old_vtable_count = vtable->GetLength();
-    const size_t new_vtable_count =
-        old_vtable_count + miranda_methods.size() + default_methods.size();
-    miranda_methods.clear();
-    vtable.Assign(down_cast<mirror::PointerArray*>(vtable->CopyOf(self, new_vtable_count)));
-    if (UNLIKELY(vtable.Get() == nullptr)) {
-      self->AssertPendingOOMException();
-      return false;
-    }
-    out = virtuals->Begin(method_size, method_alignment) + old_method_count;
-    size_t vtable_pos = old_vtable_count;
-    for (size_t i = old_method_count; i < new_method_count; ++i) {
-      // Leave the declaring class alone as type indices are relative to it
-      out->SetMethodIndex(0xFFFF & vtable_pos);
-      vtable->SetElementPtrSize(vtable_pos, &*out, image_pointer_size_);
-      ++out;
-      ++vtable_pos;
-    }
-    CHECK_EQ(vtable_pos, new_vtable_count);
-    // Update old vtable methods. We use the default_translations map to figure out what each vtable
-    // entry should be updated to, if they need to be at all.
-    for (size_t i = 0; i < old_vtable_count; ++i) {
-      ArtMethod* translated_method = vtable->GetElementPtrSize<ArtMethod*>(i, image_pointer_size_);
-      // Try and find what we need to change this method to.
-      auto translation_it = default_translations.find(i);
-      bool found_translation = false;
-      if (translation_it != default_translations.end()) {
-        size_t vtable_index;
-        std::tie(vtable_index, translated_method) = *translation_it;
-        DCHECK_EQ(vtable_index, i);
-        found_translation = true;
+    if (fill_tables) {
+      // Update the vtable to the new method structures. We can skip this for interfaces since they
+      // do not have vtables.
+      const size_t old_vtable_count = vtable->GetLength();
+      const size_t new_vtable_count = old_vtable_count +
+                                      miranda_methods.size() +
+                                      default_methods.size() +
+                                      default_conflict_methods.size();
+      vtable.Assign(down_cast<mirror::PointerArray*>(vtable->CopyOf(self, new_vtable_count)));
+      if (UNLIKELY(vtable.Get() == nullptr)) {
+        self->AssertPendingOOMException();
+        return false;
       }
-      DCHECK(translated_method != nullptr);
-      auto it = move_table.find(translated_method);
-      if (it != move_table.end()) {
-        auto* new_method = it->second;
-        DCHECK(new_method != nullptr);
-        vtable->SetElementPtrSize(i, new_method, image_pointer_size_);
-      } else {
-        // If it was not going to be updated we wouldn't have put it into the default_translations
-        // map.
-        CHECK(!found_translation) << "We were asked to update this vtable entry. Must not fail.";
+      out = methods->begin(method_size, method_alignment) + old_method_count;
+      size_t vtable_pos = old_vtable_count;
+      // Update all the newly copied method's indexes so they denote their placement in the vtable.
+      for (size_t i = old_method_count; i < new_method_count; ++i) {
+        // Leave the declaring class alone the method's dex_code_item_offset_ and dex_method_index_
+        // fields are references into the dex file the method was defined in. Since the ArtMethod
+        // does not store that information it uses declaring_class_->dex_cache_.
+        out->SetMethodIndex(0xFFFF & vtable_pos);
+        vtable->SetElementPtrSize(vtable_pos, &*out, image_pointer_size_);
+        ++out;
+        ++vtable_pos;
       }
-    }
-
-    klass->SetVTable(vtable.Get());
-    // Go fix up all the stale miranda pointers.
-    for (size_t i = 0; i < ifcount; ++i) {
-      for (size_t j = 0, count = iftable->GetMethodArrayCount(i); j < count; ++j) {
-        auto* method_array = iftable->GetMethodArray(i);
-        auto* m = method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
-        DCHECK(m != nullptr) << PrettyClass(klass.Get());
-        auto it = move_table.find(m);
+      CHECK_EQ(vtable_pos, new_vtable_count);
+      // Update old vtable methods. We use the default_translations map to figure out what each
+      // vtable entry should be updated to, if they need to be at all.
+      for (size_t i = 0; i < old_vtable_count; ++i) {
+        ArtMethod* translated_method = vtable->GetElementPtrSize<ArtMethod*>(
+              i, image_pointer_size_);
+        // Try and find what we need to change this method to.
+        auto translation_it = default_translations.find(i);
+        bool found_translation = false;
+        if (translation_it != default_translations.end()) {
+          if (translation_it->second.IsInConflict()) {
+            // Find which conflict method we are to use for this method.
+            MethodNameAndSignatureComparator old_method_comparator(
+                translated_method->GetInterfaceMethodIfProxy(image_pointer_size_));
+            ArtMethod* new_conflict_method = FindSameNameAndSignature(old_method_comparator,
+                                                                      default_conflict_methods);
+            CHECK(new_conflict_method != nullptr) << "Expected a conflict method!";
+            translated_method = new_conflict_method;
+          } else if (translation_it->second.IsAbstract()) {
+            // Find which miranda method we are to use for this method.
+            MethodNameAndSignatureComparator old_method_comparator(
+                translated_method->GetInterfaceMethodIfProxy(image_pointer_size_));
+            ArtMethod* miranda_method = FindSameNameAndSignature(old_method_comparator,
+                                                                miranda_methods);
+            DCHECK(miranda_method != nullptr);
+            translated_method = miranda_method;
+          } else {
+            // Normal default method (changed from an older default or abstract interface method).
+            DCHECK(translation_it->second.IsTranslation());
+            translated_method = translation_it->second.GetTranslation();
+          }
+          found_translation = true;
+        }
+        DCHECK(translated_method != nullptr);
+        auto it = move_table.find(translated_method);
         if (it != move_table.end()) {
-          auto* new_m = it->second;
-          DCHECK(new_m != nullptr) << PrettyClass(klass.Get());
-          method_array->SetElementPtrSize(j, new_m, image_pointer_size_);
+          auto* new_method = it->second;
+          DCHECK(new_method != nullptr);
+          vtable->SetElementPtrSize(i, new_method, image_pointer_size_);
+        } else {
+          // If it was not going to be updated we wouldn't have put it into the default_translations
+          // map.
+          CHECK(!found_translation) << "We were asked to update this vtable entry. Must not fail.";
+        }
+      }
+      klass->SetVTable(vtable.Get());
+
+      // Go fix up all the stale iftable pointers.
+      for (size_t i = 0; i < ifcount; ++i) {
+        for (size_t j = 0, count = iftable->GetMethodArrayCount(i); j < count; ++j) {
+          auto* method_array = iftable->GetMethodArray(i);
+          auto* m = method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
+          DCHECK(m != nullptr) << PrettyClass(klass.Get());
+          auto it = move_table.find(m);
+          if (it != move_table.end()) {
+            auto* new_m = it->second;
+            DCHECK(new_m != nullptr) << PrettyClass(klass.Get());
+            method_array->SetElementPtrSize(j, new_m, image_pointer_size_);
+          }
+        }
+      }
+
+      // Fix up IMT next
+      for (size_t i = 0; i < ImTable::kSize; ++i) {
+        auto it = move_table.find(out_imt[i]);
+        if (it != move_table.end()) {
+          out_imt[i] = it->second;
         }
       }
     }
-    // Fix up IMT in case it has any miranda methods in it.
-    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-      auto it = move_table.find(out_imt[i]);
-      if (it != move_table.end()) {
-        out_imt[i] = it->second;
-      }
-    }
+
     // Check that there are no stale methods are in the dex cache array.
     if (kIsDebugBuild) {
       auto* resolved_methods = klass->GetDexCache()->GetResolvedMethods();
       for (size_t i = 0, count = klass->GetDexCache()->NumResolvedMethods(); i < count; ++i) {
         auto* m = mirror::DexCache::GetElementPtrSize(resolved_methods, i, image_pointer_size_);
-        // We don't remove default methods from the move table since we need them to update the
-        // vtable. Therefore just skip them for this check.
-        if (!m->IsDefault()) {
-          CHECK(move_table.find(m) == move_table.end()) << PrettyMethod(m);
-        }
+        CHECK(move_table.find(m) == move_table.end() ||
+              // The original versions of copied methods will still be present so allow those too.
+              // Note that if the first check passes this might fail to GetDeclaringClass().
+              std::find_if(m->GetDeclaringClass()->GetMethods(image_pointer_size_).begin(),
+                           m->GetDeclaringClass()->GetMethods(image_pointer_size_).end(),
+                           [m] (ArtMethod& meth) {
+                             return &meth == m;
+                           }) != m->GetDeclaringClass()->GetMethods(image_pointer_size_).end())
+            << "Obsolete methods " << PrettyMethod(m) << " is in dex cache!";
       }
     }
-    // Put some random garbage in old virtuals to help find stale pointers.
-    if (virtuals != old_virtuals) {
-      memset(old_virtuals, 0xFEu, old_size);
+    // Put some random garbage in old methods to help find stale pointers.
+    if (methods != old_methods && old_methods != nullptr && kIsDebugBuild) {
+      // Need to make sure the GC is not running since it could be scanning the methods we are
+      // about to overwrite.
+      ScopedThreadStateChange tsc(self, kSuspended);
+      gc::ScopedGCCriticalSection gcs(self,
+                                      gc::kGcCauseClassLinker,
+                                      gc::kCollectorTypeClassLinker);
+      memset(old_methods, 0xFEu, old_size);
     }
   } else {
     self->EndAssertNoThreadSuspension(old_cause);
   }
-  if (kIsDebugBuild) {
-    auto* check_vtable = klass->GetVTableDuringLinking();
-    for (int i = 0; i < check_vtable->GetLength(); ++i) {
-      CHECK(check_vtable->GetElementPtrSize<ArtMethod*>(i, image_pointer_size_) != nullptr);
-    }
+  if (kIsDebugBuild && !is_interface) {
+    SanityCheckVTable(klass, image_pointer_size_);
   }
   return true;
 }
@@ -5538,6 +7223,20 @@
 
   // we want a relatively stable order so that adding new fields
   // minimizes disruption of C++ version such as Class and Method.
+  //
+  // The overall sort order order is:
+  // 1) All object reference fields, sorted alphabetically.
+  // 2) All java long (64-bit) integer fields, sorted alphabetically.
+  // 3) All java double (64-bit) floating point fields, sorted alphabetically.
+  // 4) All java int (32-bit) integer fields, sorted alphabetically.
+  // 5) All java float (32-bit) floating point fields, sorted alphabetically.
+  // 6) All java char (16-bit) integer fields, sorted alphabetically.
+  // 7) All java short (16-bit) integer fields, sorted alphabetically.
+  // 8) All java boolean (8-bit) integer fields, sorted alphabetically.
+  // 9) All java byte (8-bit) integer fields, sorted alphabetically.
+  //
+  // Once the fields are sorted in this order we will attempt to fill any gaps that might be present
+  // in the memory layout of the structure. See ShuffleForward for how this is done.
   std::deque<ArtField*> grouped_and_sorted_fields;
   const char* old_no_suspend_cause = self->StartAssertNoThreadSuspension(
       "Naked ArtField references in deque");
@@ -5728,6 +7427,23 @@
   return string;
 }
 
+mirror::String* ClassLinker::LookupString(const DexFile& dex_file,
+                                          uint32_t string_idx,
+                                          Handle<mirror::DexCache> dex_cache) {
+  DCHECK(dex_cache.Get() != nullptr);
+  mirror::String* resolved = dex_cache->GetResolvedString(string_idx);
+  if (resolved != nullptr) {
+    return resolved;
+  }
+  uint32_t utf16_length;
+  const char* utf8_data = dex_file.StringDataAndUtf16LengthByIdx(string_idx, &utf16_length);
+  mirror::String* string = intern_table_->LookupStrong(Thread::Current(), utf16_length, utf8_data);
+  if (string != nullptr) {
+    dex_cache->SetResolvedString(string_idx, string);
+  }
+  return string;
+}
+
 mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file,
                                         uint16_t type_idx,
                                         mirror::Class* referrer) {
@@ -5771,6 +7487,7 @@
   return resolved;
 }
 
+template <ClassLinker::ResolveMode kResolveMode>
 ArtMethod* ClassLinker::ResolveMethod(const DexFile& dex_file,
                                       uint32_t method_idx,
                                       Handle<mirror::DexCache> dex_cache,
@@ -5782,6 +7499,12 @@
   ArtMethod* resolved = dex_cache->GetResolvedMethod(method_idx, image_pointer_size_);
   if (resolved != nullptr && !resolved->IsRuntimeMethod()) {
     DCHECK(resolved->GetDeclaringClassUnchecked() != nullptr) << resolved->GetDexMethodIndex();
+    if (kResolveMode == ClassLinker::kForceICCECheck) {
+      if (resolved->CheckIncompatibleClassChange(type)) {
+        ThrowIncompatibleClassChangeError(type, resolved->GetInvokeType(), resolved, referrer);
+        return nullptr;
+      }
+    }
     return resolved;
   }
   // Fail, get the declaring class.
@@ -5800,10 +7523,44 @@
       DCHECK(resolved == nullptr || resolved->GetDeclaringClassUnchecked() != nullptr);
       break;
     case kInterface:
-      resolved = klass->FindInterfaceMethod(dex_cache.Get(), method_idx, image_pointer_size_);
-      DCHECK(resolved == nullptr || resolved->GetDeclaringClass()->IsInterface());
+      // We have to check whether the method id really belongs to an interface (dex static bytecode
+      // constraint A15). Otherwise you must not invoke-interface on it.
+      //
+      // This is not symmetric to A12-A14 (direct, static, virtual), as using FindInterfaceMethod
+      // assumes that the given type is an interface, and will check the interface table if the
+      // method isn't declared in the class. So it may find an interface method (usually by name
+      // in the handling below, but we do the constraint check early). In that case,
+      // CheckIncompatibleClassChange will succeed (as it is called on an interface method)
+      // unexpectedly.
+      // Example:
+      //    interface I {
+      //      foo()
+      //    }
+      //    class A implements I {
+      //      ...
+      //    }
+      //    class B extends A {
+      //      ...
+      //    }
+      //    invoke-interface B.foo
+      //      -> FindInterfaceMethod finds I.foo (interface method), not A.foo (miranda method)
+      if (UNLIKELY(!klass->IsInterface())) {
+        ThrowIncompatibleClassChangeError(klass,
+                                          "Found class %s, but interface was expected",
+                                          PrettyDescriptor(klass).c_str());
+        return nullptr;
+      } else {
+        resolved = klass->FindInterfaceMethod(dex_cache.Get(), method_idx, image_pointer_size_);
+        DCHECK(resolved == nullptr || resolved->GetDeclaringClass()->IsInterface());
+      }
       break;
-    case kSuper:  // Fall-through.
+    case kSuper:
+      if (klass->IsInterface()) {
+        resolved = klass->FindInterfaceMethod(dex_cache.Get(), method_idx, image_pointer_size_);
+      } else {
+        resolved = klass->FindVirtualMethod(dex_cache.Get(), method_idx, image_pointer_size_);
+      }
+      break;
     case kVirtual:
       resolved = klass->FindVirtualMethod(dex_cache.Get(), method_idx, image_pointer_size_);
       break;
@@ -5825,7 +7582,13 @@
         resolved = klass->FindInterfaceMethod(name, signature, image_pointer_size_);
         DCHECK(resolved == nullptr || resolved->GetDeclaringClass()->IsInterface());
         break;
-      case kSuper:  // Fall-through.
+      case kSuper:
+        if (klass->IsInterface()) {
+          resolved = klass->FindInterfaceMethod(name, signature, image_pointer_size_);
+        } else {
+          resolved = klass->FindVirtualMethod(name, signature, image_pointer_size_);
+        }
+        break;
       case kVirtual:
         resolved = klass->FindVirtualMethod(name, signature, image_pointer_size_);
         break;
@@ -5983,7 +7746,7 @@
   }
 
   if (is_static) {
-    resolved = mirror::Class::FindStaticField(self, klass, dex_cache.Get(), field_idx);
+    resolved = mirror::Class::FindStaticField(self, klass.Get(), dex_cache.Get(), field_idx);
   } else {
     resolved = klass->FindInstanceField(dex_cache.Get(), field_idx);
   }
@@ -6050,7 +7813,7 @@
  public:
   explicit DumpClassVisitor(int flags) : flags_(flags) {}
 
-  bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     klass->DumpClass(LOG(ERROR), flags_);
     return true;
   }
@@ -6109,8 +7872,8 @@
 
 void ClassLinker::DumpForSigQuit(std::ostream& os) {
   ScopedObjectAccess soa(Thread::Current());
-  if (dex_cache_image_class_lookup_required_) {
-    MoveImageClassesToClassTable();
+  if (dex_cache_boot_image_class_lookup_required_) {
+    AddBootImageClassesToClassTable();
   }
   ReaderMutexLock mu(soa.Self(), *Locks::classlinker_classes_lock_);
   os << "Zygote loaded classes=" << NumZygoteClasses() << " post zygote classes="
@@ -6147,8 +7910,8 @@
 }
 
 size_t ClassLinker::NumLoadedClasses() {
-  if (dex_cache_image_class_lookup_required_) {
-    MoveImageClassesToClassTable();
+  if (dex_cache_boot_image_class_lookup_required_) {
+    AddBootImageClassesToClassTable();
   }
   ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
   // Only return non zygote classes since these are the ones which apps which care about.
@@ -6223,35 +7986,8 @@
   return descriptor;
 }
 
-bool ClassLinker::MayBeCalledWithDirectCodePointer(ArtMethod* m) {
-  Runtime* const runtime = Runtime::Current();
-  if (runtime->UseJit()) {
-    // JIT can have direct code pointers from any method to any other method.
-    return true;
-  }
-  // Non-image methods don't use direct code pointer.
-  if (!m->GetDeclaringClass()->IsBootStrapClassLoaded()) {
-    return false;
-  }
-  if (m->IsPrivate()) {
-    // The method can only be called inside its own oat file. Therefore it won't be called using
-    // its direct code if the oat file has been compiled in PIC mode.
-    const DexFile& dex_file = m->GetDeclaringClass()->GetDexFile();
-    const OatFile::OatDexFile* oat_dex_file = dex_file.GetOatDexFile();
-    if (oat_dex_file == nullptr) {
-      // No oat file: the method has not been compiled.
-      return false;
-    }
-    const OatFile* oat_file = oat_dex_file->GetOatFile();
-    return oat_file != nullptr && !oat_file->IsPic();
-  } else {
-    // The method can be called outside its own oat file. Therefore it won't be called using its
-    // direct code pointer only if all loaded oat files have been compiled in PIC mode.
-    return runtime->GetOatFileManager().HaveNonPicOatFile();
-  }
-}
-
-jobject ClassLinker::CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files) {
+jobject ClassLinker::CreatePathClassLoader(Thread* self,
+                                           const std::vector<const DexFile*>& dex_files) {
   // SOAAlreadyRunnable is protected, and we need something to add a global reference.
   // We could move the jobject to the callers, but all call-sites do this...
   ScopedObjectAccessUnchecked soa(self);
@@ -6277,10 +8013,13 @@
   ArtField* cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
   DCHECK_EQ(cookie_field->GetDeclaringClass(), element_file_field->GetType<false>());
 
+  ArtField* file_name_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_fileName);
+  DCHECK_EQ(file_name_field->GetDeclaringClass(), element_file_field->GetType<false>());
+
   // Fill the elements array.
   int32_t index = 0;
   for (const DexFile* dex_file : dex_files) {
-    StackHandleScope<3> hs2(self);
+    StackHandleScope<4> hs2(self);
 
     // CreatePathClassLoader is only used by gtests. Index 0 of h_long_array is supposed to be the
     // oat file but we can leave it null.
@@ -6295,6 +8034,11 @@
     DCHECK(h_dex_file.Get() != nullptr);
     cookie_field->SetObject<false>(h_dex_file.Get(), h_long_array.Get());
 
+    Handle<mirror::String> h_file_name = hs2.NewHandle(
+        mirror::String::AllocFromModifiedUtf8(self, dex_file->GetLocation().c_str()));
+    DCHECK(h_file_name.Get() != nullptr);
+    file_name_field->SetObject<false>(h_dex_file.Get(), h_file_name.Get());
+
     Handle<mirror::Object> h_element = hs2.NewHandle(h_dex_element_class->AllocObject(self));
     DCHECK(h_element.Get() != nullptr);
     element_file_field->SetObject<false>(h_element.Get(), h_dex_file.Get());
@@ -6339,12 +8083,12 @@
   return soa.Env()->NewGlobalRef(local_ref.get());
 }
 
-ArtMethod* ClassLinker::CreateRuntimeMethod() {
+ArtMethod* ClassLinker::CreateRuntimeMethod(LinearAlloc* linear_alloc) {
   const size_t method_alignment = ArtMethod::Alignment(image_pointer_size_);
   const size_t method_size = ArtMethod::Size(image_pointer_size_);
   LengthPrefixedArray<ArtMethod>* method_array = AllocArtMethodArray(
       Thread::Current(),
-      Runtime::Current()->GetLinearAlloc(),
+      linear_alloc,
       1);
   ArtMethod* method = &method_array->At(0, method_size, method_alignment);
   CHECK(method != nullptr);
@@ -6358,6 +8102,16 @@
   find_array_class_cache_next_victim_ = 0;
 }
 
+void ClassLinker::ClearClassTableStrongRoots() const {
+  Thread* const self = Thread::Current();
+  WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+  for (const ClassLoaderData& data : class_loaders_) {
+    if (data.class_table != nullptr) {
+      data.class_table->ClearStrongRoots();
+    }
+  }
+}
+
 void ClassLinker::VisitClassLoaders(ClassLoaderVisitor* visitor) const {
   Thread* const self = Thread::Current();
   for (const ClassLoaderData& data : class_loaders_) {
@@ -6372,12 +8126,11 @@
 void ClassLinker::InsertDexFileInToClassLoader(mirror::Object* dex_file,
                                                mirror::ClassLoader* class_loader) {
   DCHECK(dex_file != nullptr);
-  DCHECK(class_loader != nullptr);
   Thread* const self = Thread::Current();
   WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-  ClassTable* const table = class_loader->GetClassTable();
+  ClassTable* const table = ClassTableForClassLoader(class_loader);
   DCHECK(table != nullptr);
-  if (table->InsertDexFile(dex_file)) {
+  if (table->InsertStrongRoot(dex_file) && class_loader != nullptr) {
     // It was not already inserted, perform the write barrier to let the GC know the class loader's
     // class table was modified.
     Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
@@ -6386,22 +8139,186 @@
 
 void ClassLinker::CleanupClassLoaders() {
   Thread* const self = Thread::Current();
-  WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-  JavaVMExt* const vm = Runtime::Current()->GetJavaVM();
-  for (auto it = class_loaders_.begin(); it != class_loaders_.end(); ) {
-    const ClassLoaderData& data = *it;
-    // Need to use DecodeJObject so that we get null for cleared JNI weak globals.
-    auto* const class_loader = down_cast<mirror::ClassLoader*>(self->DecodeJObject(data.weak_root));
-    if (class_loader != nullptr) {
-      ++it;
-    } else {
-      // Weak reference was cleared, delete the data associated with this class loader.
-      delete data.class_table;
-      delete data.allocator;
-      vm->DeleteWeakGlobalRef(self, data.weak_root);
-      it = class_loaders_.erase(it);
+  std::vector<ClassLoaderData> to_delete;
+  // Do the delete outside the lock to avoid lock violation in jit code cache.
+  {
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    for (auto it = class_loaders_.begin(); it != class_loaders_.end(); ) {
+      const ClassLoaderData& data = *it;
+      // Need to use DecodeJObject so that we get null for cleared JNI weak globals.
+      auto* const class_loader =
+          down_cast<mirror::ClassLoader*>(self->DecodeJObject(data.weak_root));
+      if (class_loader != nullptr) {
+        ++it;
+      } else {
+        VLOG(class_linker) << "Freeing class loader";
+        to_delete.push_back(data);
+        it = class_loaders_.erase(it);
+      }
     }
   }
+  for (ClassLoaderData& data : to_delete) {
+    DeleteClassLoader(self, data);
+  }
 }
 
+std::set<DexCacheResolvedClasses> ClassLinker::GetResolvedClasses(bool ignore_boot_classes) {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
+  ScopedObjectAccess soa(Thread::Current());
+  ScopedAssertNoThreadSuspension ants(soa.Self(), __FUNCTION__);
+  std::set<DexCacheResolvedClasses> ret;
+  VLOG(class_linker) << "Collecting resolved classes";
+  const uint64_t start_time = NanoTime();
+  ReaderMutexLock mu(soa.Self(), *DexLock());
+  // Loop through all the dex caches and inspect resolved classes.
+  for (const ClassLinker::DexCacheData& data : GetDexCachesData()) {
+    if (soa.Self()->IsJWeakCleared(data.weak_root)) {
+      continue;
+    }
+    mirror::DexCache* dex_cache =
+        down_cast<mirror::DexCache*>(soa.Self()->DecodeJObject(data.weak_root));
+    if (dex_cache == nullptr) {
+      continue;
+    }
+    const DexFile* dex_file = dex_cache->GetDexFile();
+    const std::string& location = dex_file->GetLocation();
+    const size_t num_class_defs = dex_file->NumClassDefs();
+    // Use the resolved types, this will miss array classes.
+    const size_t num_types = dex_file->NumTypeIds();
+    VLOG(class_linker) << "Collecting class profile for dex file " << location
+                       << " types=" << num_types << " class_defs=" << num_class_defs;
+    DexCacheResolvedClasses resolved_classes(dex_file->GetLocation(),
+                                             dex_file->GetBaseLocation(),
+                                             dex_file->GetLocationChecksum());
+    size_t num_resolved = 0;
+    std::unordered_set<uint16_t> class_set;
+    CHECK_EQ(num_types, dex_cache->NumResolvedTypes());
+    for (size_t i = 0; i < num_types; ++i) {
+      mirror::Class* klass = dex_cache->GetResolvedType(i);
+      // Filter out null class loader since that is the boot class loader.
+      if (klass == nullptr || (ignore_boot_classes && klass->GetClassLoader() == nullptr)) {
+        continue;
+      }
+      ++num_resolved;
+      DCHECK(!klass->IsProxyClass());
+      if (!klass->IsResolved()) {
+        DCHECK(klass->IsErroneous());
+        continue;
+      }
+      mirror::DexCache* klass_dex_cache = klass->GetDexCache();
+      if (klass_dex_cache == dex_cache) {
+        const size_t class_def_idx = klass->GetDexClassDefIndex();
+        DCHECK(klass->IsResolved());
+        CHECK_LT(class_def_idx, num_class_defs);
+        class_set.insert(class_def_idx);
+      }
+    }
+
+    if (!class_set.empty()) {
+      auto it = ret.find(resolved_classes);
+      if (it != ret.end()) {
+        // Already have the key, union the class def idxs.
+        it->AddClasses(class_set.begin(), class_set.end());
+      } else {
+        resolved_classes.AddClasses(class_set.begin(), class_set.end());
+        ret.insert(resolved_classes);
+      }
+    }
+
+    VLOG(class_linker) << "Dex location " << location << " has " << num_resolved << " / "
+                       << num_class_defs << " resolved classes";
+  }
+  VLOG(class_linker) << "Collecting class profile took " << PrettyDuration(NanoTime() - start_time);
+  return ret;
+}
+
+std::unordered_set<std::string> ClassLinker::GetClassDescriptorsForProfileKeys(
+    const std::set<DexCacheResolvedClasses>& classes) {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
+  std::unordered_set<std::string> ret;
+  Thread* const self = Thread::Current();
+  std::unordered_map<std::string, const DexFile*> location_to_dex_file;
+  ScopedObjectAccess soa(self);
+  ScopedAssertNoThreadSuspension ants(soa.Self(), __FUNCTION__);
+  ReaderMutexLock mu(self, *DexLock());
+  for (const ClassLinker::DexCacheData& data : GetDexCachesData()) {
+    if (!self->IsJWeakCleared(data.weak_root)) {
+      mirror::DexCache* dex_cache =
+          down_cast<mirror::DexCache*>(soa.Self()->DecodeJObject(data.weak_root));
+      if (dex_cache != nullptr) {
+        const DexFile* dex_file = dex_cache->GetDexFile();
+        // There could be duplicates if two dex files with the same location are mapped.
+        location_to_dex_file.emplace(
+            ProfileCompilationInfo::GetProfileDexFileKey(dex_file->GetLocation()), dex_file);
+      }
+    }
+  }
+  for (const DexCacheResolvedClasses& info : classes) {
+    const std::string& profile_key = info.GetDexLocation();
+    auto found = location_to_dex_file.find(profile_key);
+    if (found != location_to_dex_file.end()) {
+      const DexFile* dex_file = found->second;
+      VLOG(profiler) << "Found opened dex file for " << dex_file->GetLocation() << " with "
+                     << info.GetClasses().size() << " classes";
+      DCHECK_EQ(dex_file->GetLocationChecksum(), info.GetLocationChecksum());
+      for (uint16_t class_def_idx : info.GetClasses()) {
+        if (class_def_idx >= dex_file->NumClassDefs()) {
+          LOG(WARNING) << "Class def index " << class_def_idx << " >= " << dex_file->NumClassDefs();
+          continue;
+        }
+        const DexFile::TypeId& type_id = dex_file->GetTypeId(
+            dex_file->GetClassDef(class_def_idx).class_idx_);
+        const char* descriptor = dex_file->GetTypeDescriptor(type_id);
+        ret.insert(descriptor);
+      }
+    } else {
+      VLOG(class_linker) << "Failed to find opened dex file for profile key " << profile_key;
+    }
+  }
+  return ret;
+}
+
+class ClassLinker::FindVirtualMethodHolderVisitor : public ClassVisitor {
+ public:
+  FindVirtualMethodHolderVisitor(const ArtMethod* method, PointerSize pointer_size)
+      : method_(method),
+        pointer_size_(pointer_size) {}
+
+  bool operator()(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) OVERRIDE {
+    if (klass->GetVirtualMethodsSliceUnchecked(pointer_size_).Contains(method_)) {
+      holder_ = klass;
+    }
+    // Return false to stop searching if holder_ is not null.
+    return holder_ == nullptr;
+  }
+
+  mirror::Class* holder_ = nullptr;
+  const ArtMethod* const method_;
+  const PointerSize pointer_size_;
+};
+
+mirror::Class* ClassLinker::GetHoldingClassOfCopiedMethod(ArtMethod* method) {
+  ScopedTrace trace(__FUNCTION__);  // Since this function is slow, have a trace to notify people.
+  CHECK(method->IsCopied());
+  FindVirtualMethodHolderVisitor visitor(method, image_pointer_size_);
+  VisitClasses(&visitor);
+  return visitor.holder_;
+}
+
+// Instantiate ResolveMethod.
+template ArtMethod* ClassLinker::ResolveMethod<ClassLinker::kForceICCECheck>(
+    const DexFile& dex_file,
+    uint32_t method_idx,
+    Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader,
+    ArtMethod* referrer,
+    InvokeType type);
+template ArtMethod* ClassLinker::ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+    const DexFile& dex_file,
+    uint32_t method_idx,
+    Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader,
+    ArtMethod* referrer,
+    InvokeType type);
+
 }  // namespace art
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index a2d38ac..c3ab8c5 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -17,16 +17,20 @@
 #ifndef ART_RUNTIME_CLASS_LINKER_H_
 #define ART_RUNTIME_CLASS_LINKER_H_
 
+#include <set>
 #include <string>
 #include <unordered_map>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
 #include "base/allocator.h"
+#include "base/enums.h"
 #include "base/hash_set.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "class_table.h"
+#include "dex_cache_resolved_classes.h"
 #include "dex_file.h"
 #include "gc_root.h"
 #include "jni.h"
@@ -50,6 +54,7 @@
   class StackTraceElement;
 }  // namespace mirror
 
+class ImtConflictTable;
 template<class T> class Handle;
 template<class T> class MutableHandle;
 class InternTable;
@@ -60,6 +65,13 @@
 
 enum VisitRootFlags : uint8_t;
 
+class ClassVisitor {
+ public:
+  virtual ~ClassVisitor() {}
+  // Return true to continue visiting.
+  virtual bool operator()(mirror::Class* klass) = 0;
+};
+
 class ClassLoaderVisitor {
  public:
   virtual ~ClassLoaderVisitor() {}
@@ -115,12 +127,35 @@
   ~ClassLinker();
 
   // Initialize class linker by bootstraping from dex files.
-  void InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path)
+  bool InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path,
+                        std::string* error_msg)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
-  // Initialize class linker from one or more images.
-  void InitFromImage() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+  // Initialize class linker from one or more boot images.
+  bool InitFromBootImage(std::string* error_msg)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
+
+  // Add an image space to the class linker, may fix up classloader fields and dex cache fields.
+  // The dex files that were newly opened for the space are placed in the out argument
+  // out_dex_files. Returns true if the operation succeeded.
+  // The space must be already added to the heap before calling AddImageSpace since we need to
+  // properly handle read barriers and object marking.
+  bool AddImageSpace(gc::space::ImageSpace* space,
+                     Handle<mirror::ClassLoader> class_loader,
+                     jobjectArray dex_elements,
+                     const char* dex_location,
+                     std::vector<std::unique_ptr<const DexFile>>* out_dex_files,
+                     std::string* error_msg)
+      REQUIRES(!dex_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool OpenImageDexFiles(gc::space::ImageSpace* space,
+                         std::vector<std::unique_ptr<const DexFile>>* out_dex_files,
+                         std::string* error_msg)
+      REQUIRES(!dex_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Finds a class by its descriptor, loading it if necessary.
   // If class_loader is null, searches boot_class_path_.
@@ -214,8 +249,14 @@
                                 Handle<mirror::DexCache> dex_cache)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Find a String with the given index from the DexFile, storing the
+  // result in the DexCache if found. Return null if not found.
+  mirror::String* LookupString(const DexFile& dex_file, uint32_t string_idx,
+                               Handle<mirror::DexCache> dex_cache)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Resolve a Type with the given index from the DexFile, storing the
-  // result in the DexCache. The referrer is used to identity the
+  // result in the DexCache. The referrer is used to identify the
   // target DexCache and ClassLoader to use for resolution.
   mirror::Class* ResolveType(const DexFile& dex_file, uint16_t type_idx, mirror::Class* referrer)
       SHARED_REQUIRES(Locks::mutator_lock_)
@@ -243,11 +284,19 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
+  // Determine whether a dex cache result should be trusted, or an IncompatibleClassChangeError
+  // check should be performed even after a hit.
+  enum ResolveMode {  // private.
+    kNoICCECheckForCache,
+    kForceICCECheck
+  };
+
   // Resolve a method with a given ID from the DexFile, storing the
   // result in DexCache. The ClassLinker and ClassLoader are used as
   // in ResolveType. What is unique is the method type argument which
   // is used to determine if this method is a direct, static, or
   // virtual method.
+  template <ResolveMode kResolveMode>
   ArtMethod* ResolveMethod(const DexFile& dex_file,
                            uint32_t method_idx,
                            Handle<mirror::DexCache> dex_cache,
@@ -259,6 +308,16 @@
 
   ArtMethod* GetResolvedMethod(uint32_t method_idx, ArtMethod* referrer)
       SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // This returns the class referred to by GetMethodId(method_idx).class_idx_. This might be
+  // different then the declaring class of the resolved method due to copied
+  // miranda/default/conflict methods.
+  mirror::Class* ResolveReferencedClassOfMethod(uint32_t method_idx,
+                                                Handle<mirror::DexCache> dex_cache,
+                                                Handle<mirror::ClassLoader> class_loader)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+  template <ResolveMode kResolveMode>
   ArtMethod* ResolveMethod(Thread* self, uint32_t method_idx, ArtMethod* referrer, InvokeType type)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
@@ -319,7 +378,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
-  mirror::DexCache* RegisterDexFile(const DexFile& dex_file, LinearAlloc* linear_alloc)
+  mirror::DexCache* RegisterDexFile(const DexFile& dex_file,
+                                    mirror::ClassLoader* class_loader)
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void RegisterDexFile(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
@@ -398,7 +458,9 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
-  void VerifyClass(Thread* self, Handle<mirror::Class> klass)
+  void VerifyClass(Thread* self,
+                   Handle<mirror::Class> klass,
+                   LogSeverity log_level = LogSeverity::NONE)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
   bool VerifyClassUsingOatFile(const DexFile& dex_file,
@@ -406,11 +468,10 @@
                                mirror::Class::Status& oat_file_class_status)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
-  void ResolveClassExceptionHandlerTypes(const DexFile& dex_file,
-                                         Handle<mirror::Class> klass)
+  void ResolveClassExceptionHandlerTypes(Handle<mirror::Class> klass)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
-  void ResolveMethodExceptionHandlerTypes(const DexFile& dex_file, ArtMethod* klass)
+  void ResolveMethodExceptionHandlerTypes(ArtMethod* klass)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
@@ -423,6 +484,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   std::string GetDescriptorForProxy(mirror::Class* proxy_class)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ArtMethod* FindMethodForProxy(mirror::Class* proxy_class, ArtMethod* proxy_method)
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -431,12 +493,6 @@
   const void* GetQuickOatCodeFor(ArtMethod* method)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Get the oat code for a method from a method index.
-  const void* GetQuickOatCodeFor(const DexFile& dex_file,
-                                 uint16_t class_def_idx,
-                                 uint32_t method_idx)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   // Get compiled code for a method, return null if no code
   // exists. This is unlike Get..OatCodeFor which will return a bridge
   // or interpreter entrypoint.
@@ -487,10 +543,17 @@
     return class_roots;
   }
 
-  // Move all of the image classes into the class table for faster lookups.
-  void MoveImageClassesToClassTable()
+  // Move all of the boot image classes into the class table for faster lookups.
+  void AddBootImageClassesToClassTable()
       REQUIRES(!Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Add image classes to the class table.
+  void AddImageClassesToClassTable(std::vector<gc::space::ImageSpace*> image_spaces,
+                                   mirror::ClassLoader* class_loader)
+      REQUIRES(!Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Move the class table to the pre-zygote table to reduce memory usage. This works by ensuring
   // that no more classes are ever added to the pre zygote table which makes it that the pages
   // always remain shared dirty instead of private dirty.
@@ -498,18 +561,13 @@
       REQUIRES(!Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Returns true if the method can be called with its direct code pointer, false otherwise.
-  bool MayBeCalledWithDirectCodePointer(ArtMethod* m)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
-
   // Creates a GlobalRef PathClassLoader that can be used to load classes from the given dex files.
   // Note: the objects are not completely set up. Do not use this outside of tests and the compiler.
-  jobject CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files)
+  jobject CreatePathClassLoader(Thread* self, const std::vector<const DexFile*>& dex_files)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
-  size_t GetImagePointerSize() const {
-    DCHECK(ValidPointerSize(image_pointer_size_)) << image_pointer_size_;
+  PointerSize GetImagePointerSize() const {
     return image_pointer_size_;
   }
 
@@ -518,7 +576,7 @@
       REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ArtMethod* CreateRuntimeMethod();
+  ArtMethod* CreateRuntimeMethod(LinearAlloc* linear_alloc);
 
   // Clear the ArrayClass cache. This is necessary when cleaning up for the image, as the cache
   // entries are roots, but potentially not image classes.
@@ -531,19 +589,80 @@
 
   // Unlike GetOrCreateAllocatorForClassLoader, GetAllocatorForClassLoader asserts that the
   // allocator for this class loader is already created.
-  static LinearAlloc* GetAllocatorForClassLoader(mirror::ClassLoader* class_loader)
+  LinearAlloc* GetAllocatorForClassLoader(mirror::ClassLoader* class_loader)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Return the linear alloc for a class loader if it is already allocated, otherwise allocate and
   // set it. TODO: Consider using a lock other than classlinker_classes_lock_.
-  static LinearAlloc* GetOrCreateAllocatorForClassLoader(mirror::ClassLoader* class_loader)
+  LinearAlloc* GetOrCreateAllocatorForClassLoader(mirror::ClassLoader* class_loader)
       REQUIRES(!Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // May be called with null class_loader due to legacy code. b/27954959
   void InsertDexFileInToClassLoader(mirror::Object* dex_file, mirror::ClassLoader* class_loader)
       REQUIRES(!Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  static bool ShouldUseInterpreterEntrypoint(ArtMethod* method, const void* quick_code)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  std::set<DexCacheResolvedClasses> GetResolvedClasses(bool ignore_boot_classes)
+      REQUIRES(!dex_lock_);
+
+  std::unordered_set<std::string> GetClassDescriptorsForProfileKeys(
+      const std::set<DexCacheResolvedClasses>& classes)
+      REQUIRES(!dex_lock_);
+
+  static bool IsBootClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
+                                mirror::ClassLoader* class_loader)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ArtMethod* AddMethodToConflictTable(mirror::Class* klass,
+                                      ArtMethod* conflict_method,
+                                      ArtMethod* interface_method,
+                                      ArtMethod* method,
+                                      bool force_new_conflict_method)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Create a conflict table with a specified capacity.
+  ImtConflictTable* CreateImtConflictTable(size_t count, LinearAlloc* linear_alloc);
+
+  // Static version for when the class linker is not yet created.
+  static ImtConflictTable* CreateImtConflictTable(size_t count,
+                                                  LinearAlloc* linear_alloc,
+                                                  PointerSize pointer_size);
+
+
+  // Create the IMT and conflict tables for a class.
+  void FillIMTAndConflictTables(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Clear class table strong roots (other than classes themselves). This is done by dex2oat to
+  // allow pruning dex caches.
+  void ClearClassTableStrongRoots() const
+      REQUIRES(!Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Throw the class initialization failure recorded when first trying to initialize the given
+  // class.
+  void ThrowEarlierClassFailure(mirror::Class* c, bool wrap_in_no_class_def = false)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
+
+  // Get the actual holding class for a copied method. Pretty slow, don't call often.
+  mirror::Class* GetHoldingClassOfCopiedMethod(ArtMethod* method)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  struct DexCacheData {
+    // Weak root to the DexCache. Note: Do not decode this unnecessarily or else class unloading may
+    // not work properly.
+    jweak weak_root;
+    // The following two fields are caches to the DexCache's fields and here to avoid unnecessary
+    // jweak decode that triggers read barriers (and mark them alive unnecessarily and mess with
+    // class unloading.)
+    const DexFile* dex_file;
+    GcRoot<mirror::Class>* resolved_types;
+  };
+
  private:
   struct ClassLoaderData {
     jweak weak_root;  // Weak root to enable class unloading.
@@ -551,6 +670,18 @@
     LinearAlloc* allocator;
   };
 
+  // Ensures that the supertype of 'klass' ('supertype') is verified. Returns false and throws
+  // appropriate exceptions if verification failed hard. Returns true for successful verification or
+  // soft-failures.
+  bool AttemptSupertypeVerification(Thread* self,
+                                    Handle<mirror::Class> klass,
+                                    Handle<mirror::Class> supertype)
+      REQUIRES(!dex_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  static void DeleteClassLoader(Thread* self, const ClassLoaderData& data)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   void VisitClassLoaders(ClassLoaderVisitor* visitor) const
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
 
@@ -568,7 +699,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void FinishInit(Thread* self)
-  SHARED_REQUIRES(Locks::mutator_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
   // For early bootstrapping by Init
@@ -705,9 +836,87 @@
   bool LinkMethods(Thread* self,
                    Handle<mirror::Class> klass,
                    Handle<mirror::ObjectArray<mirror::Class>> interfaces,
+                   bool* out_new_conflict,
                    ArtMethod** out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Does anything needed to make sure that the compiler will not generate a direct invoke to this
+  // method. Should only be called on non-invokable methods.
+  void EnsureThrowsInvocationError(ArtMethod* method)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // A wrapper class representing the result of a method translation used for linking methods and
+  // updating superclass default methods. For each method in a classes vtable there are 4 states it
+  // could be in:
+  // 1) No translation is necessary. In this case there is no MethodTranslation object for it. This
+  //    is the standard case and is true when the method is not overridable by a default method,
+  //    the class defines a concrete implementation of the method, the default method implementation
+  //    remains the same, or an abstract method stayed abstract.
+  // 2) The method must be translated to a different default method. We note this with
+  //    CreateTranslatedMethod.
+  // 3) The method must be replaced with a conflict method. This happens when a superclass
+  //    implements an interface with a default method and this class implements an unrelated
+  //    interface that also defines that default method. We note this with CreateConflictingMethod.
+  // 4) The method must be replaced with an abstract miranda method. This happens when a superclass
+  //    implements an interface with a default method and this class implements a subinterface of
+  //    the superclass's interface which declares the default method abstract. We note this with
+  //    CreateAbstractMethod.
+  //
+  // When a method translation is unnecessary (case #1), we don't put it into the
+  // default_translation maps. So an instance of MethodTranslation must be in one of #2-#4.
+  class MethodTranslation {
+   public:
+    // This slot must become a default conflict method.
+    static MethodTranslation CreateConflictingMethod() {
+      return MethodTranslation(Type::kConflict, /*translation*/nullptr);
+    }
+
+    // This slot must become an abstract method.
+    static MethodTranslation CreateAbstractMethod() {
+      return MethodTranslation(Type::kAbstract, /*translation*/nullptr);
+    }
+
+    // Use the given method as the current value for this vtable slot during translation.
+    static MethodTranslation CreateTranslatedMethod(ArtMethod* new_method) {
+      return MethodTranslation(Type::kTranslation, new_method);
+    }
+
+    // Returns true if this is a method that must become a conflict method.
+    bool IsInConflict() const {
+      return type_ == Type::kConflict;
+    }
+
+    // Returns true if this is a method that must become an abstract method.
+    bool IsAbstract() const {
+      return type_ == Type::kAbstract;
+    }
+
+    // Returns true if this is a method that must become a different method.
+    bool IsTranslation() const {
+      return type_ == Type::kTranslation;
+    }
+
+    // Get the translated version of this method.
+    ArtMethod* GetTranslation() const {
+      DCHECK(IsTranslation());
+      DCHECK(translation_ != nullptr);
+      return translation_;
+    }
+
+   private:
+    enum class Type {
+      kTranslation,
+      kConflict,
+      kAbstract,
+    };
+
+    MethodTranslation(Type type, ArtMethod* translation)
+        : translation_(translation), type_(type) {}
+
+    ArtMethod* const translation_;
+    const Type type_;
+  };
+
   // Links the virtual methods for the given class and records any default methods that will need to
   // be updated later.
   //
@@ -724,9 +933,10 @@
   //                          scan, we therefore store the vtable index's that might need to be
   //                          updated with the method they will turn into.
   // TODO This whole default_translations thing is very dirty. There should be a better way.
-  bool LinkVirtualMethods(Thread* self,
-                          Handle<mirror::Class> klass,
-                          /*out*/std::unordered_map<size_t, ArtMethod*>* default_translations)
+  bool LinkVirtualMethods(
+        Thread* self,
+        Handle<mirror::Class> klass,
+        /*out*/std::unordered_map<size_t, MethodTranslation>* default_translations)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Sets up the interface lookup table (IFTable) in the correct order to allow searching for
@@ -736,6 +946,13 @@
                                  Handle<mirror::ObjectArray<mirror::Class>> interfaces)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+
+  enum class DefaultMethodSearchResult {
+    kDefaultFound,
+    kAbstractFound,
+    kDefaultConflict
+  };
+
   // Find the default method implementation for 'interface_method' in 'klass', if one exists.
   //
   // Arguments:
@@ -743,31 +960,32 @@
   // * target_method - The method we are trying to find a default implementation for.
   // * klass - The class we are searching for a definition of target_method.
   // * out_default_method - The pointer we will store the found default method to on success.
-  // * icce_message - A string we will store an appropriate IncompatibleClassChangeError message
-  //                  into in case of failure. Note we must do it this way since we do not know
-  //                  whether we can allocate the exception object, which could cause us to go to
-  //                  sleep.
   //
   // Return value:
-  // * True - There were no conflicting method implementations found in the class while searching
-  //          for target_method. The default method implementation is stored into out_default_method
-  //          if it was found.  Otherwise *out_default_method will be set to nullptr.
-  // * False - Conflicting method implementations were found when searching for target_method. The
-  //           value of *out_default_method is undefined and *icce_message is a string that should
-  //           be used to create an IncompatibleClassChangeError as soon as possible.
-  bool FindDefaultMethodImplementation(Thread* self,
-                                       ArtMethod* target_method,
-                                       Handle<mirror::Class> klass,
-                                       /*out*/ArtMethod** out_default_method,
-                                       /*out*/std::string* icce_message) const
+  // * kDefaultFound - There were no conflicting method implementations found in the class while
+  //                   searching for target_method. The default method implementation is stored into
+  //                   out_default_method.
+  // * kAbstractFound - There were no conflicting method implementations found in the class while
+  //                   searching for target_method but no default implementation was found either.
+  //                   out_default_method is set to null and the method should be considered not
+  //                   implemented.
+  // * kDefaultConflict - Conflicting method implementations were found when searching for
+  //                      target_method. The value of *out_default_method is null.
+  DefaultMethodSearchResult FindDefaultMethodImplementation(
+      Thread* self,
+      ArtMethod* target_method,
+      Handle<mirror::Class> klass,
+      /*out*/ArtMethod** out_default_method) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Sets the imt entries and fixes up the vtable for the given class by linking all the interface
   // methods. See LinkVirtualMethods for an explanation of what default_translations is.
-  bool LinkInterfaceMethods(Thread* self,
-                            Handle<mirror::Class> klass,
-                            const std::unordered_map<size_t, ArtMethod*>& default_translations,
-                            ArtMethod** out_imt)
+  bool LinkInterfaceMethods(
+      Thread* self,
+      Handle<mirror::Class> klass,
+      const std::unordered_map<size_t, MethodTranslation>& default_translations,
+      bool* out_new_conflict,
+      ArtMethod** out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool LinkStaticFields(Thread* self, Handle<mirror::Class> klass, size_t* class_size)
@@ -797,7 +1015,8 @@
   size_t GetDexCacheCount() SHARED_REQUIRES(Locks::mutator_lock_, dex_lock_) {
     return dex_caches_.size();
   }
-  const std::list<jweak>& GetDexCaches() SHARED_REQUIRES(Locks::mutator_lock_, dex_lock_) {
+  const std::list<DexCacheData>& GetDexCachesData()
+      SHARED_REQUIRES(Locks::mutator_lock_, dex_lock_) {
     return dex_caches_;
   }
 
@@ -806,17 +1025,25 @@
   void CreateProxyMethod(Handle<mirror::Class> klass, ArtMethod* prototype, ArtMethod* out)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Ensures that methods have the kAccPreverified bit set. We use the kAccPreverfied bit on the
-  // class access flags to determine whether this has been done before.
-  void EnsurePreverifiedMethods(Handle<mirror::Class> c)
+  // Ensures that methods have the kAccSkipAccessChecks bit set. We use the
+  // kAccVerificationAttempted bit on the class access flags to determine whether this has been done
+  // before.
+  void EnsureSkipAccessChecksMethods(Handle<mirror::Class> c)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  mirror::Class* LookupClassFromImage(const char* descriptor)
+  mirror::Class* LookupClassFromBootImage(const char* descriptor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Register a class loader and create its class table and allocator. Should not be called if
+  // these are already created.
+  void RegisterClassLoader(mirror::ClassLoader* class_loader)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(Locks::classlinker_classes_lock_);
+
   // Returns null if not found.
   ClassTable* ClassTableForClassLoader(mirror::ClassLoader* class_loader)
-      SHARED_REQUIRES(Locks::mutator_lock_, Locks::classlinker_classes_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Insert a new class table if not found.
   ClassTable* InsertClassTableForClassLoader(mirror::ClassLoader* class_loader)
       SHARED_REQUIRES(Locks::mutator_lock_)
@@ -842,32 +1069,63 @@
   // Return the quick generic JNI stub for testing.
   const void* GetRuntimeQuickGenericJniStub() const;
 
-  // Throw the class initialization failure recorded when first trying to initialize the given
-  // class.
-  // Note: Currently we only store the descriptor, so we cannot throw the exact throwable, only
-  //       a recreation with a custom string.
-  void ThrowEarlierClassFailure(mirror::Class* c)
-      SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
-
-  bool HasInitWithString(Thread* self, const char* descriptor)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
-
   bool CanWeInitializeClass(mirror::Class* klass, bool can_init_statics, bool can_init_parents)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void UpdateClassVirtualMethods(mirror::Class* klass,
-                                 LengthPrefixedArray<ArtMethod>* new_methods)
+  void UpdateClassMethods(mirror::Class* klass,
+                          LengthPrefixedArray<ArtMethod>* new_methods)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::classlinker_classes_lock_);
 
+  // new_class_set is the set of classes that were read from the class table section in the image.
+  // If there was no class table section, it is null.
+  bool UpdateAppImageClassLoadersAndDexCaches(
+      gc::space::ImageSpace* space,
+      Handle<mirror::ClassLoader> class_loader,
+      Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches,
+      ClassTable::ClassSet* new_class_set,
+      bool* out_forward_dex_cache_array,
+      std::string* out_error_msg)
+      REQUIRES(!dex_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Check that c1 == FindSystemClass(self, descriptor). Abort with class dumps otherwise.
+  void CheckSystemClass(Thread* self, Handle<mirror::Class> c1, const char* descriptor)
+      REQUIRES(!dex_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Sets imt_ref appropriately for LinkInterfaceMethods.
+  // If there is no method in the imt location of imt_ref it will store the given method there.
+  // Otherwise it will set the conflict method which will figure out which method to use during
+  // runtime.
+  void SetIMTRef(ArtMethod* unimplemented_method,
+                 ArtMethod* imt_conflict_method,
+                 ArtMethod* current_method,
+                 /*out*/bool* new_conflict,
+                 /*out*/ArtMethod** imt_ref) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void FillIMTFromIfTable(mirror::IfTable* if_table,
+                          ArtMethod* unimplemented_method,
+                          ArtMethod* imt_conflict_method,
+                          mirror::Class* klass,
+                          bool create_conflict_tables,
+                          bool ignore_copied_methods,
+                          /*out*/bool* new_conflict,
+                          /*out*/ArtMethod** imt) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void FillImtFromSuperClass(Handle<mirror::Class> klass,
+                             ArtMethod* unimplemented_method,
+                             ArtMethod* imt_conflict_method,
+                             bool* new_conflict,
+                             ArtMethod** imt) SHARED_REQUIRES(Locks::mutator_lock_);
+
   std::vector<const DexFile*> boot_class_path_;
-  std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
+  std::vector<std::unique_ptr<const DexFile>> boot_dex_files_;
 
   mutable ReaderWriterMutex dex_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  // JNI weak globals to allow dex caches to get unloaded. We lazily delete weak globals when we
-  // register new dex files.
-  std::list<jweak> dex_caches_ GUARDED_BY(dex_lock_);
+  // JNI weak globals and side data to allow dex caches to get unloaded. We lazily delete weak
+  // globals when we register new dex files.
+  std::list<DexCacheData> dex_caches_ GUARDED_BY(dex_lock_);
 
   // This contains the class loaders which have class tables. It is populated by
   // InsertClassTableForClassLoader.
@@ -880,8 +1138,8 @@
   // New class roots, only used by CMS since the GC needs to mark these in the pause.
   std::vector<GcRoot<mirror::Class>> new_class_roots_ GUARDED_BY(Locks::classlinker_classes_lock_);
 
-  // Do we need to search dex caches to find image classes?
-  bool dex_cache_image_class_lookup_required_;
+  // Do we need to search dex caches to find boot image classes?
+  bool dex_cache_boot_image_class_lookup_required_;
   // Number of times we've searched dex caches for a class. After a certain number of misses we move
   // the classes into the class_table_ to avoid dex cache based searches.
   Atomic<uint32_t> failed_dex_cache_class_lookups_;
@@ -911,14 +1169,15 @@
   const void* quick_to_interpreter_bridge_trampoline_;
 
   // Image pointer size.
-  size_t image_pointer_size_;
+  PointerSize image_pointer_size_;
 
+  class FindVirtualMethodHolderVisitor;
   friend class ImageDumper;  // for DexLock
   friend class ImageWriter;  // for GetClassRoots
   friend class JniCompilerTest;  // for GetRuntimeQuickGenericJniStub
   friend class JniInternalTest;  // for GetRuntimeQuickGenericJniStub
+  ART_FRIEND_TEST(ClassLinkerTest, RegisterDexFileName);  // for DexLock, and RegisterDexFileLocked
   ART_FRIEND_TEST(mirror::DexCacheTest, Open);  // for AllocDexCache
-
   DISALLOW_COPY_AND_ASSIGN(ClassLinker);
 };
 
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 0926ce3..7999aef 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -21,9 +21,11 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
+#include "experimental_flags.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "gc/heap.h"
 #include "mirror/abstract_method.h"
@@ -99,6 +101,62 @@
     EXPECT_EQ(kAccPublic | kAccFinal | kAccAbstract, primitive->GetAccessFlags());
   }
 
+  void AssertObjectClass(mirror::Class* JavaLangObject)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    ASSERT_TRUE(JavaLangObject != nullptr);
+    ASSERT_TRUE(JavaLangObject->GetClass() != nullptr);
+    ASSERT_EQ(JavaLangObject->GetClass(),
+              JavaLangObject->GetClass()->GetClass());
+    EXPECT_EQ(JavaLangObject, JavaLangObject->GetClass()->GetSuperClass());
+    std::string temp;
+    ASSERT_STREQ(JavaLangObject->GetDescriptor(&temp), "Ljava/lang/Object;");
+    EXPECT_TRUE(JavaLangObject->GetSuperClass() == nullptr);
+    EXPECT_FALSE(JavaLangObject->HasSuperClass());
+    EXPECT_TRUE(JavaLangObject->GetClassLoader() == nullptr);
+    EXPECT_EQ(mirror::Class::kStatusInitialized, JavaLangObject->GetStatus());
+    EXPECT_FALSE(JavaLangObject->IsErroneous());
+    EXPECT_TRUE(JavaLangObject->IsLoaded());
+    EXPECT_TRUE(JavaLangObject->IsResolved());
+    EXPECT_TRUE(JavaLangObject->IsVerified());
+    EXPECT_TRUE(JavaLangObject->IsInitialized());
+    EXPECT_FALSE(JavaLangObject->IsArrayInstance());
+    EXPECT_FALSE(JavaLangObject->IsArrayClass());
+    EXPECT_TRUE(JavaLangObject->GetComponentType() == nullptr);
+    EXPECT_FALSE(JavaLangObject->IsInterface());
+    EXPECT_TRUE(JavaLangObject->IsPublic());
+    EXPECT_FALSE(JavaLangObject->IsFinal());
+    EXPECT_FALSE(JavaLangObject->IsPrimitive());
+    EXPECT_FALSE(JavaLangObject->IsSynthetic());
+    EXPECT_EQ(2U, JavaLangObject->NumDirectMethods());
+    EXPECT_EQ(11U, JavaLangObject->NumVirtualMethods());
+    if (!kUseBrooksReadBarrier) {
+      EXPECT_EQ(2U, JavaLangObject->NumInstanceFields());
+    } else {
+      EXPECT_EQ(4U, JavaLangObject->NumInstanceFields());
+    }
+    EXPECT_STREQ(JavaLangObject->GetInstanceField(0)->GetName(),
+                 "shadow$_klass_");
+    EXPECT_STREQ(JavaLangObject->GetInstanceField(1)->GetName(),
+                 "shadow$_monitor_");
+    if (kUseBrooksReadBarrier) {
+      EXPECT_STREQ(JavaLangObject->GetInstanceField(2)->GetName(),
+                   "shadow$_x_rb_ptr_");
+      EXPECT_STREQ(JavaLangObject->GetInstanceField(3)->GetName(),
+                   "shadow$_x_xpadding_");
+    }
+
+    EXPECT_EQ(0U, JavaLangObject->NumStaticFields());
+    EXPECT_EQ(0U, JavaLangObject->NumDirectInterfaces());
+
+    PointerSize pointer_size = class_linker_->GetImagePointerSize();
+    ArtMethod* unimplemented = runtime_->GetImtUnimplementedMethod();
+    ImTable* imt = JavaLangObject->GetImt(pointer_size);
+    ASSERT_NE(nullptr, imt);
+    for (size_t i = 0; i < ImTable::kSize; ++i) {
+      ASSERT_EQ(unimplemented, imt->Get(i, pointer_size));
+    }
+  }
+
   void AssertArrayClass(const std::string& array_descriptor,
                         const std::string& component_type,
                         mirror::ClassLoader* class_loader)
@@ -147,7 +205,8 @@
     EXPECT_EQ(0U, array->NumInstanceFields());
     EXPECT_EQ(0U, array->NumStaticFields());
     EXPECT_EQ(2U, array->NumDirectInterfaces());
-    EXPECT_TRUE(array->ShouldHaveEmbeddedImtAndVTable());
+    EXPECT_TRUE(array->ShouldHaveImt());
+    EXPECT_TRUE(array->ShouldHaveEmbeddedVTable());
     EXPECT_EQ(2, array->GetIfTableCount());
     ASSERT_TRUE(array->GetIfTable() != nullptr);
     mirror::Class* direct_interface0 = mirror::Class::GetDirectInterface(self, array, 0);
@@ -157,6 +216,13 @@
     EXPECT_STREQ(direct_interface1->GetDescriptor(&temp), "Ljava/io/Serializable;");
     mirror::Class* array_ptr = array->GetComponentType();
     EXPECT_EQ(class_linker_->FindArrayClass(self, &array_ptr), array.Get());
+
+    PointerSize pointer_size = class_linker_->GetImagePointerSize();
+    mirror::Class* JavaLangObject =
+        class_linker_->FindSystemClass(self, "Ljava/lang/Object;");
+    ImTable* JavaLangObject_imt = JavaLangObject->GetImt(pointer_size);
+    // IMT of a array class should be shared with the IMT of the java.lag.Object
+    ASSERT_EQ(JavaLangObject_imt, array->GetImt(pointer_size));
   }
 
   void AssertMethod(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -165,14 +231,14 @@
     EXPECT_TRUE(method->GetName() != nullptr);
     EXPECT_TRUE(method->GetSignature() != Signature::NoSignature());
 
-    EXPECT_TRUE(method->HasDexCacheResolvedMethods(sizeof(void*)));
-    EXPECT_TRUE(method->HasDexCacheResolvedTypes(sizeof(void*)));
+    EXPECT_TRUE(method->HasDexCacheResolvedMethods(kRuntimePointerSize));
+    EXPECT_TRUE(method->HasDexCacheResolvedTypes(kRuntimePointerSize));
     EXPECT_TRUE(method->HasSameDexCacheResolvedMethods(
         method->GetDeclaringClass()->GetDexCache()->GetResolvedMethods(),
-        sizeof(void*)));
+        kRuntimePointerSize));
     EXPECT_TRUE(method->HasSameDexCacheResolvedTypes(
         method->GetDeclaringClass()->GetDexCache()->GetResolvedTypes(),
-        sizeof(void*)));
+        kRuntimePointerSize));
   }
 
   void AssertField(mirror::Class* klass, ArtField* field)
@@ -209,11 +275,10 @@
                                                klass->GetDescriptor(&temp2)));
     if (klass->IsInterface()) {
       EXPECT_TRUE(klass->IsAbstract());
-      if (klass->NumDirectMethods() == 1) {
-        EXPECT_TRUE(klass->GetDirectMethod(0, sizeof(void*))->IsClassInitializer());
-        EXPECT_TRUE(klass->GetDirectMethod(0, sizeof(void*))->IsDirect());
-      } else {
-        EXPECT_EQ(0U, klass->NumDirectMethods());
+      // Check that all direct methods are static (either <clinit> or a regular static method).
+      for (ArtMethod& m : klass->GetDirectMethods(kRuntimePointerSize)) {
+        EXPECT_TRUE(m.IsStatic());
+        EXPECT_TRUE(m.IsDirect());
       }
     } else {
       if (!klass->IsSynthetic()) {
@@ -228,7 +293,7 @@
       if (klass->IsInterface()) {
         EXPECT_EQ(0U, iftable->GetMethodArrayCount(i));
       } else {
-        EXPECT_EQ(interface->NumVirtualMethods(), iftable->GetMethodArrayCount(i));
+        EXPECT_EQ(interface->NumDeclaredVirtualMethods(), iftable->GetMethodArrayCount(i));
       }
     }
     if (klass->IsAbstract()) {
@@ -248,16 +313,26 @@
     EXPECT_FALSE(klass->IsPrimitive());
     EXPECT_TRUE(klass->CanAccess(klass.Get()));
 
-    for (ArtMethod& method : klass->GetDirectMethods(sizeof(void*))) {
+    for (ArtMethod& method : klass->GetDirectMethods(kRuntimePointerSize)) {
       AssertMethod(&method);
       EXPECT_TRUE(method.IsDirect());
       EXPECT_EQ(klass.Get(), method.GetDeclaringClass());
     }
 
-    for (ArtMethod& method : klass->GetVirtualMethods(sizeof(void*))) {
+    for (ArtMethod& method : klass->GetDeclaredVirtualMethods(kRuntimePointerSize)) {
       AssertMethod(&method);
       EXPECT_FALSE(method.IsDirect());
-      EXPECT_TRUE(method.GetDeclaringClass()->IsAssignableFrom(klass.Get()));
+      EXPECT_EQ(klass.Get(), method.GetDeclaringClass());
+    }
+
+    for (ArtMethod& method : klass->GetCopiedMethods(kRuntimePointerSize)) {
+      AssertMethod(&method);
+      EXPECT_FALSE(method.IsDirect());
+      EXPECT_TRUE(method.IsCopied());
+      EXPECT_TRUE(method.GetDeclaringClass()->IsInterface())
+          << "declaring class: " << PrettyClass(method.GetDeclaringClass());
+      EXPECT_TRUE(method.GetDeclaringClass()->IsAssignableFrom(klass.Get()))
+          << "declaring class: " << PrettyClass(method.GetDeclaringClass());
     }
 
     for (size_t i = 0; i < klass->NumInstanceFields(); i++) {
@@ -361,7 +436,7 @@
     auto* resolved_methods = dex_cache->GetResolvedMethods();
     for (size_t i = 0, num_methods = dex_cache->NumResolvedMethods(); i != num_methods; ++i) {
       EXPECT_TRUE(
-          mirror::DexCache::GetElementPtrSize(resolved_methods, i, sizeof(void*)) != nullptr)
+          mirror::DexCache::GetElementPtrSize(resolved_methods, i, kRuntimePointerSize) != nullptr)
           << dex.GetLocation() << " i=" << i;
     }
   }
@@ -496,13 +571,14 @@
     addOffset(OFFSETOF_MEMBER(mirror::Class, class_size_), "classSize");
     addOffset(OFFSETOF_MEMBER(mirror::Class, clinit_thread_id_), "clinitThreadId");
     addOffset(OFFSETOF_MEMBER(mirror::Class, component_type_), "componentType");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, copied_methods_offset_), "copiedMethodsOffset");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_cache_), "dexCache");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_cache_strings_), "dexCacheStrings");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_class_def_idx_), "dexClassDefIndex");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_type_idx_), "dexTypeIndex");
-    addOffset(OFFSETOF_MEMBER(mirror::Class, direct_methods_), "directMethods");
     addOffset(OFFSETOF_MEMBER(mirror::Class, ifields_), "iFields");
     addOffset(OFFSETOF_MEMBER(mirror::Class, iftable_), "ifTable");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, methods_), "methods");
     addOffset(OFFSETOF_MEMBER(mirror::Class, name_), "name");
     addOffset(OFFSETOF_MEMBER(mirror::Class, num_reference_instance_fields_),
               "numReferenceInstanceFields");
@@ -515,8 +591,8 @@
     addOffset(OFFSETOF_MEMBER(mirror::Class, sfields_), "sFields");
     addOffset(OFFSETOF_MEMBER(mirror::Class, status_), "status");
     addOffset(OFFSETOF_MEMBER(mirror::Class, super_class_), "superClass");
-    addOffset(OFFSETOF_MEMBER(mirror::Class, verify_error_class_), "verifyErrorClass");
-    addOffset(OFFSETOF_MEMBER(mirror::Class, virtual_methods_), "virtualMethods");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, verify_error_), "verifyError");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, virtual_methods_offset_), "virtualMethodsOffset");
     addOffset(OFFSETOF_MEMBER(mirror::Class, vtable_), "vtable");
   };
 };
@@ -524,15 +600,15 @@
 struct StringOffsets : public CheckOffsets<mirror::String> {
   StringOffsets() : CheckOffsets<mirror::String>(false, "Ljava/lang/String;") {
     addOffset(OFFSETOF_MEMBER(mirror::String, count_), "count");
-    addOffset(OFFSETOF_MEMBER(mirror::String, hash_code_), "hashCode");
+    addOffset(OFFSETOF_MEMBER(mirror::String, hash_code_), "hash");
   };
 };
 
 struct ThrowableOffsets : public CheckOffsets<mirror::Throwable> {
   ThrowableOffsets() : CheckOffsets<mirror::Throwable>(false, "Ljava/lang/Throwable;") {
+    addOffset(OFFSETOF_MEMBER(mirror::Throwable, backtrace_), "backtrace");
     addOffset(OFFSETOF_MEMBER(mirror::Throwable, cause_), "cause");
     addOffset(OFFSETOF_MEMBER(mirror::Throwable, detail_message_), "detailMessage");
-    addOffset(OFFSETOF_MEMBER(mirror::Throwable, stack_state_), "stackState");
     addOffset(OFFSETOF_MEMBER(mirror::Throwable, stack_trace_), "stackTrace");
     addOffset(OFFSETOF_MEMBER(mirror::Throwable, suppressed_exceptions_), "suppressedExceptions");
   };
@@ -601,7 +677,7 @@
 struct AccessibleObjectOffsets : public CheckOffsets<mirror::AccessibleObject> {
   AccessibleObjectOffsets() : CheckOffsets<mirror::AccessibleObject>(
       false, "Ljava/lang/reflect/AccessibleObject;") {
-    addOffset(mirror::AccessibleObject::FlagOffset().Uint32Value(), "flag");
+    addOffset(mirror::AccessibleObject::FlagOffset().Uint32Value(), "override");
   };
 };
 
@@ -701,45 +777,7 @@
 TEST_F(ClassLinkerTest, FindClass) {
   ScopedObjectAccess soa(Thread::Current());
   mirror::Class* JavaLangObject = class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;");
-  ASSERT_TRUE(JavaLangObject != nullptr);
-  ASSERT_TRUE(JavaLangObject->GetClass() != nullptr);
-  ASSERT_EQ(JavaLangObject->GetClass(), JavaLangObject->GetClass()->GetClass());
-  EXPECT_EQ(JavaLangObject, JavaLangObject->GetClass()->GetSuperClass());
-  std::string temp;
-  ASSERT_STREQ(JavaLangObject->GetDescriptor(&temp), "Ljava/lang/Object;");
-  EXPECT_TRUE(JavaLangObject->GetSuperClass() == nullptr);
-  EXPECT_FALSE(JavaLangObject->HasSuperClass());
-  EXPECT_TRUE(JavaLangObject->GetClassLoader() == nullptr);
-  EXPECT_EQ(mirror::Class::kStatusInitialized, JavaLangObject->GetStatus());
-  EXPECT_FALSE(JavaLangObject->IsErroneous());
-  EXPECT_TRUE(JavaLangObject->IsLoaded());
-  EXPECT_TRUE(JavaLangObject->IsResolved());
-  EXPECT_TRUE(JavaLangObject->IsVerified());
-  EXPECT_TRUE(JavaLangObject->IsInitialized());
-  EXPECT_FALSE(JavaLangObject->IsArrayInstance());
-  EXPECT_FALSE(JavaLangObject->IsArrayClass());
-  EXPECT_TRUE(JavaLangObject->GetComponentType() == nullptr);
-  EXPECT_FALSE(JavaLangObject->IsInterface());
-  EXPECT_TRUE(JavaLangObject->IsPublic());
-  EXPECT_FALSE(JavaLangObject->IsFinal());
-  EXPECT_FALSE(JavaLangObject->IsPrimitive());
-  EXPECT_FALSE(JavaLangObject->IsSynthetic());
-  EXPECT_EQ(2U, JavaLangObject->NumDirectMethods());
-  EXPECT_EQ(11U, JavaLangObject->NumVirtualMethods());
-  if (!kUseBrooksReadBarrier) {
-    EXPECT_EQ(2U, JavaLangObject->NumInstanceFields());
-  } else {
-    EXPECT_EQ(4U, JavaLangObject->NumInstanceFields());
-  }
-  EXPECT_STREQ(JavaLangObject->GetInstanceField(0)->GetName(), "shadow$_klass_");
-  EXPECT_STREQ(JavaLangObject->GetInstanceField(1)->GetName(), "shadow$_monitor_");
-  if (kUseBrooksReadBarrier) {
-    EXPECT_STREQ(JavaLangObject->GetInstanceField(2)->GetName(), "shadow$_x_rb_ptr_");
-    EXPECT_STREQ(JavaLangObject->GetInstanceField(3)->GetName(), "shadow$_x_xpadding_");
-  }
-
-  EXPECT_EQ(0U, JavaLangObject->NumStaticFields());
-  EXPECT_EQ(0U, JavaLangObject->NumDirectInterfaces());
+  AssertObjectClass(JavaLangObject);
 
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::ClassLoader> class_loader(
@@ -750,6 +788,7 @@
   ASSERT_TRUE(MyClass->GetClass() != nullptr);
   ASSERT_EQ(MyClass->GetClass(), MyClass->GetClass()->GetClass());
   EXPECT_EQ(JavaLangObject, MyClass->GetClass()->GetSuperClass());
+  std::string temp;
   ASSERT_STREQ(MyClass->GetDescriptor(&temp), "LMyClass;");
   EXPECT_TRUE(MyClass->GetSuperClass() == JavaLangObject);
   EXPECT_TRUE(MyClass->HasSuperClass());
@@ -844,7 +883,7 @@
   // Validate that the "value" field is always the 0th field in each of java.lang's box classes.
   // This lets UnboxPrimitive avoid searching for the field by name at runtime.
   ScopedObjectAccess soa(Thread::Current());
-  NullHandle<mirror::ClassLoader> class_loader;
+  ScopedNullHandle<mirror::ClassLoader> class_loader;
   mirror::Class* c;
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Boolean;", class_loader);
   EXPECT_STREQ("value", c->GetIFieldsPtr()->At(0).GetName());
@@ -890,7 +929,7 @@
   // Static final primitives that are initialized by a compile-time constant
   // expression resolve to a copy of a constant value from the constant pool.
   // So <clinit> should be null.
-  ArtMethod* clinit = statics->FindDirectMethod("<clinit>", "()V", sizeof(void*));
+  ArtMethod* clinit = statics->FindDirectMethod("<clinit>", "()V", kRuntimePointerSize);
   EXPECT_TRUE(clinit == nullptr);
 
   EXPECT_EQ(9U, statics->NumStaticFields());
@@ -977,15 +1016,15 @@
   EXPECT_TRUE(J->IsAssignableFrom(B.Get()));
 
   const Signature void_sig = I->GetDexCache()->GetDexFile()->CreateSignature("()V");
-  ArtMethod* Ii = I->FindVirtualMethod("i", void_sig, sizeof(void*));
-  ArtMethod* Jj1 = J->FindVirtualMethod("j1", void_sig, sizeof(void*));
-  ArtMethod* Jj2 = J->FindVirtualMethod("j2", void_sig, sizeof(void*));
-  ArtMethod* Kj1 = K->FindInterfaceMethod("j1", void_sig, sizeof(void*));
-  ArtMethod* Kj2 = K->FindInterfaceMethod("j2", void_sig, sizeof(void*));
-  ArtMethod* Kk = K->FindInterfaceMethod("k", void_sig, sizeof(void*));
-  ArtMethod* Ai = A->FindVirtualMethod("i", void_sig, sizeof(void*));
-  ArtMethod* Aj1 = A->FindVirtualMethod("j1", void_sig, sizeof(void*));
-  ArtMethod* Aj2 = A->FindVirtualMethod("j2", void_sig, sizeof(void*));
+  ArtMethod* Ii = I->FindVirtualMethod("i", void_sig, kRuntimePointerSize);
+  ArtMethod* Jj1 = J->FindVirtualMethod("j1", void_sig, kRuntimePointerSize);
+  ArtMethod* Jj2 = J->FindVirtualMethod("j2", void_sig, kRuntimePointerSize);
+  ArtMethod* Kj1 = K->FindInterfaceMethod("j1", void_sig, kRuntimePointerSize);
+  ArtMethod* Kj2 = K->FindInterfaceMethod("j2", void_sig, kRuntimePointerSize);
+  ArtMethod* Kk = K->FindInterfaceMethod("k", void_sig, kRuntimePointerSize);
+  ArtMethod* Ai = A->FindVirtualMethod("i", void_sig, kRuntimePointerSize);
+  ArtMethod* Aj1 = A->FindVirtualMethod("j1", void_sig, kRuntimePointerSize);
+  ArtMethod* Aj2 = A->FindVirtualMethod("j2", void_sig, kRuntimePointerSize);
   ASSERT_TRUE(Ii != nullptr);
   ASSERT_TRUE(Jj1 != nullptr);
   ASSERT_TRUE(Jj2 != nullptr);
@@ -1000,12 +1039,12 @@
   EXPECT_NE(Jj2, Aj2);
   EXPECT_EQ(Kj1, Jj1);
   EXPECT_EQ(Kj2, Jj2);
-  EXPECT_EQ(Ai, A->FindVirtualMethodForInterface(Ii, sizeof(void*)));
-  EXPECT_EQ(Aj1, A->FindVirtualMethodForInterface(Jj1, sizeof(void*)));
-  EXPECT_EQ(Aj2, A->FindVirtualMethodForInterface(Jj2, sizeof(void*)));
-  EXPECT_EQ(Ai, A->FindVirtualMethodForVirtualOrInterface(Ii, sizeof(void*)));
-  EXPECT_EQ(Aj1, A->FindVirtualMethodForVirtualOrInterface(Jj1, sizeof(void*)));
-  EXPECT_EQ(Aj2, A->FindVirtualMethodForVirtualOrInterface(Jj2, sizeof(void*)));
+  EXPECT_EQ(Ai, A->FindVirtualMethodForInterface(Ii, kRuntimePointerSize));
+  EXPECT_EQ(Aj1, A->FindVirtualMethodForInterface(Jj1, kRuntimePointerSize));
+  EXPECT_EQ(Aj2, A->FindVirtualMethodForInterface(Jj2, kRuntimePointerSize));
+  EXPECT_EQ(Ai, A->FindVirtualMethodForVirtualOrInterface(Ii, kRuntimePointerSize));
+  EXPECT_EQ(Aj1, A->FindVirtualMethodForVirtualOrInterface(Jj1, kRuntimePointerSize));
+  EXPECT_EQ(Aj2, A->FindVirtualMethodForVirtualOrInterface(Jj2, kRuntimePointerSize));
 
   ArtField* Afoo = mirror::Class::FindStaticField(soa.Self(), A, "foo", "Ljava/lang/String;");
   ArtField* Bfoo = mirror::Class::FindStaticField(soa.Self(), B, "foo", "Ljava/lang/String;");
@@ -1030,11 +1069,9 @@
   Handle<mirror::ClassLoader> class_loader(
       hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
   mirror::Class* klass = class_linker_->FindClass(soa.Self(), "LStaticsFromCode;", class_loader);
-  ArtMethod* clinit = klass->FindClassInitializer(sizeof(void*));
-  ArtMethod* getS0 = klass->FindDirectMethod("getS0", "()Ljava/lang/Object;", sizeof(void*));
-  const DexFile::StringId* string_id = dex_file->FindStringId("LStaticsFromCode;");
-  ASSERT_TRUE(string_id != nullptr);
-  const DexFile::TypeId* type_id = dex_file->FindTypeId(dex_file->GetIndexForStringId(*string_id));
+  ArtMethod* clinit = klass->FindClassInitializer(kRuntimePointerSize);
+  ArtMethod* getS0 = klass->FindDirectMethod("getS0", "()Ljava/lang/Object;", kRuntimePointerSize);
+  const DexFile::TypeId* type_id = dex_file->FindTypeId("LStaticsFromCode;");
   ASSERT_TRUE(type_id != nullptr);
   uint32_t type_idx = dex_file->GetIndexForTypeId(*type_id);
   mirror::Class* uninit = ResolveVerifyAndClinit(type_idx, clinit, soa.Self(), true, false);
@@ -1092,42 +1129,39 @@
 
 TEST_F(ClassLinkerTest, ValidatePredefinedClassSizes) {
   ScopedObjectAccess soa(Thread::Current());
-  NullHandle<mirror::ClassLoader> class_loader;
+  ScopedNullHandle<mirror::ClassLoader> class_loader;
   mirror::Class* c;
 
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Class;", class_loader);
   ASSERT_TRUE(c != nullptr);
-  EXPECT_EQ(c->GetClassSize(), mirror::Class::ClassClassSize(sizeof(void*)));
+  EXPECT_EQ(c->GetClassSize(), mirror::Class::ClassClassSize(kRuntimePointerSize));
 
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Object;", class_loader);
   ASSERT_TRUE(c != nullptr);
-  EXPECT_EQ(c->GetClassSize(), mirror::Object::ClassSize(sizeof(void*)));
+  EXPECT_EQ(c->GetClassSize(), mirror::Object::ClassSize(kRuntimePointerSize));
 
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/String;", class_loader);
   ASSERT_TRUE(c != nullptr);
-  EXPECT_EQ(c->GetClassSize(), mirror::String::ClassSize(sizeof(void*)));
+  EXPECT_EQ(c->GetClassSize(), mirror::String::ClassSize(kRuntimePointerSize));
 
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/DexCache;", class_loader);
   ASSERT_TRUE(c != nullptr);
-  EXPECT_EQ(c->GetClassSize(), mirror::DexCache::ClassSize(sizeof(void*)));
+  EXPECT_EQ(c->GetClassSize(), mirror::DexCache::ClassSize(kRuntimePointerSize));
 }
 
 static void CheckMethod(ArtMethod* method, bool verified)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   if (!method->IsNative() && !method->IsAbstract()) {
-    EXPECT_EQ((method->GetAccessFlags() & kAccPreverified) != 0U, verified)
+    EXPECT_EQ((method->GetAccessFlags() & kAccSkipAccessChecks) != 0U, verified)
         << PrettyMethod(method, true);
   }
 }
 
-static void CheckPreverified(mirror::Class* c, bool preverified)
+static void CheckVerificationAttempted(mirror::Class* c, bool preverified)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  EXPECT_EQ((c->GetAccessFlags() & kAccPreverified) != 0U, preverified)
+  EXPECT_EQ((c->GetAccessFlags() & kAccVerificationAttempted) != 0U, preverified)
       << "Class " << PrettyClass(c) << " not as expected";
-  for (auto& m : c->GetDirectMethods(sizeof(void*))) {
-    CheckMethod(&m, preverified);
-  }
-  for (auto& m : c->GetVirtualMethods(sizeof(void*))) {
+  for (auto& m : c->GetMethods(kRuntimePointerSize)) {
     CheckMethod(&m, preverified);
   }
 }
@@ -1139,7 +1173,7 @@
   ASSERT_TRUE(JavaLangObject != nullptr);
   EXPECT_TRUE(JavaLangObject->IsInitialized()) << "Not testing already initialized class from the "
                                                   "core";
-  CheckPreverified(JavaLangObject, true);
+  CheckVerificationAttempted(JavaLangObject, true);
 }
 
 TEST_F(ClassLinkerTest, Preverified_UninitializedBoot) {
@@ -1152,10 +1186,10 @@
   EXPECT_FALSE(security_manager->IsInitialized()) << "Not testing uninitialized class from the "
                                                      "core";
 
-  CheckPreverified(security_manager.Get(), false);
+  CheckVerificationAttempted(security_manager.Get(), false);
 
   class_linker_->EnsureInitialized(soa.Self(), security_manager, true, true);
-  CheckPreverified(security_manager.Get(), true);
+  CheckVerificationAttempted(security_manager.Get(), true);
 }
 
 TEST_F(ClassLinkerTest, Preverified_App) {
@@ -1167,10 +1201,10 @@
   Handle<mirror::Class> statics(
       hs.NewHandle(class_linker_->FindClass(soa.Self(), "LStatics;", class_loader)));
 
-  CheckPreverified(statics.Get(), false);
+  CheckVerificationAttempted(statics.Get(), false);
 
   class_linker_->EnsureInitialized(soa.Self(), statics, true, true);
-  CheckPreverified(statics.Get(), true);
+  CheckVerificationAttempted(statics.Get(), true);
 }
 
 TEST_F(ClassLinkerTest, IsBootStrapClassLoaded) {
@@ -1193,4 +1227,42 @@
   EXPECT_FALSE(statics.Get()->IsBootStrapClassLoaded());
 }
 
+// Regression test for b/26799552.
+TEST_F(ClassLinkerTest, RegisterDexFileName) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<2> hs(soa.Self());
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  MutableHandle<mirror::DexCache> dex_cache(hs.NewHandle<mirror::DexCache>(nullptr));
+  {
+    ReaderMutexLock mu(soa.Self(), *class_linker->DexLock());
+    for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
+      dex_cache.Assign(down_cast<mirror::DexCache*>(soa.Self()->DecodeJObject(data.weak_root)));
+      if (dex_cache.Get() != nullptr) {
+        break;
+      }
+    }
+    ASSERT_TRUE(dex_cache.Get() != nullptr);
+  }
+  // Make a copy of the dex cache and change the name.
+  dex_cache.Assign(dex_cache->Clone(soa.Self())->AsDexCache());
+  const uint16_t data[] = { 0x20AC, 0x20A1 };
+  Handle<mirror::String> location(hs.NewHandle(mirror::String::AllocFromUtf16(soa.Self(),
+                                                                              arraysize(data),
+                                                                              data)));
+  dex_cache->SetLocation(location.Get());
+  const DexFile* old_dex_file = dex_cache->GetDexFile();
+
+  std::unique_ptr<DexFile> dex_file(new DexFile(old_dex_file->Begin(),
+                                                old_dex_file->Size(),
+                                                location->ToModifiedUtf8(),
+                                                0u,
+                                                nullptr,
+                                                nullptr));
+  {
+    WriterMutexLock mu(soa.Self(), *class_linker->DexLock());
+    // Check that inserting with a UTF16 name works.
+    class_linker->RegisterDexFileLocked(*dex_file, dex_cache);
+  }
+}
+
 }  // namespace art
diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h
index aef02b6..d52365d 100644
--- a/runtime/class_table-inl.h
+++ b/runtime/class_table-inl.h
@@ -23,25 +23,44 @@
 
 template<class Visitor>
 void ClassTable::VisitRoots(Visitor& visitor) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     for (GcRoot<mirror::Class>& root : class_set) {
       visitor.VisitRoot(root.AddressWithoutBarrier());
     }
   }
+  for (GcRoot<mirror::Object>& root : strong_roots_) {
+    visitor.VisitRoot(root.AddressWithoutBarrier());
+  }
 }
 
 template<class Visitor>
 void ClassTable::VisitRoots(const Visitor& visitor) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     for (GcRoot<mirror::Class>& root : class_set) {
       visitor.VisitRoot(root.AddressWithoutBarrier());
     }
   }
-  for (GcRoot<mirror::Object>& root : dex_files_) {
+  for (GcRoot<mirror::Object>& root : strong_roots_) {
     visitor.VisitRoot(root.AddressWithoutBarrier());
   }
 }
 
+template <typename Visitor>
+bool ClassTable::Visit(Visitor& visitor) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
+  for (ClassSet& class_set : classes_) {
+    for (GcRoot<mirror::Class>& root : class_set) {
+      if (!visitor(root.Read())) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_CLASS_TABLE_INL_H_
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index 3ed1c95..e9154cb 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -20,17 +20,19 @@
 
 namespace art {
 
-ClassTable::ClassTable() {
+ClassTable::ClassTable() : lock_("Class loader classes", kClassLoaderClassesLock) {
   Runtime* const runtime = Runtime::Current();
   classes_.push_back(ClassSet(runtime->GetHashTableMinLoadFactor(),
                               runtime->GetHashTableMaxLoadFactor()));
 }
 
 void ClassTable::FreezeSnapshot() {
+  WriterMutexLock mu(Thread::Current(), lock_);
   classes_.push_back(ClassSet());
 }
 
 bool ClassTable::Contains(mirror::Class* klass) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     auto it = class_set.Find(GcRoot<mirror::Class>(klass));
     if (it != class_set.end()) {
@@ -40,7 +42,19 @@
   return false;
 }
 
+mirror::Class* ClassTable::LookupByDescriptor(mirror::Class* klass) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
+  for (ClassSet& class_set : classes_) {
+    auto it = class_set.Find(GcRoot<mirror::Class>(klass));
+    if (it != class_set.end()) {
+      return it->Read();
+    }
+  }
+  return nullptr;
+}
+
 mirror::Class* ClassTable::UpdateClass(const char* descriptor, mirror::Class* klass, size_t hash) {
+  WriterMutexLock mu(Thread::Current(), lock_);
   // Should only be updating latest table.
   auto existing_it = classes_.back().FindWithHash(descriptor, hash);
   if (kIsDebugBuild && existing_it == classes_.back().end()) {
@@ -63,18 +77,8 @@
   return existing;
 }
 
-bool ClassTable::Visit(ClassVisitor* visitor) {
-  for (ClassSet& class_set : classes_) {
-    for (GcRoot<mirror::Class>& root : class_set) {
-      if (!visitor->Visit(root.Read())) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
 size_t ClassTable::NumZygoteClasses() const {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   size_t sum = 0;
   for (size_t i = 0; i < classes_.size() - 1; ++i) {
     sum += classes_[i].Size();
@@ -83,10 +87,12 @@
 }
 
 size_t ClassTable::NumNonZygoteClasses() const {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   return classes_.back().Size();
 }
 
 mirror::Class* ClassTable::Lookup(const char* descriptor, size_t hash) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     auto it = class_set.FindWithHash(descriptor, hash);
     if (it != class_set.end()) {
@@ -97,14 +103,17 @@
 }
 
 void ClassTable::Insert(mirror::Class* klass) {
+  WriterMutexLock mu(Thread::Current(), lock_);
   classes_.back().Insert(GcRoot<mirror::Class>(klass));
 }
 
 void ClassTable::InsertWithHash(mirror::Class* klass, size_t hash) {
+  WriterMutexLock mu(Thread::Current(), lock_);
   classes_.back().InsertWithHash(GcRoot<mirror::Class>(klass), hash);
 }
 
 bool ClassTable::Remove(const char* descriptor) {
+  WriterMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     auto it = class_set.Find(descriptor);
     if (it != class_set.end()) {
@@ -115,7 +124,7 @@
   return false;
 }
 
-std::size_t ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& root)
+uint32_t ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& root)
     const {
   std::string temp;
   return ComputeModifiedUtf8Hash(root.Read()->GetDescriptor(&temp));
@@ -133,19 +142,55 @@
   return a.Read()->DescriptorEquals(descriptor);
 }
 
-std::size_t ClassTable::ClassDescriptorHashEquals::operator()(const char* descriptor) const {
+uint32_t ClassTable::ClassDescriptorHashEquals::operator()(const char* descriptor) const {
   return ComputeModifiedUtf8Hash(descriptor);
 }
 
-bool ClassTable::InsertDexFile(mirror::Object* dex_file) {
-  DCHECK(dex_file != nullptr);
-  for (GcRoot<mirror::Object>& root : dex_files_) {
-    if (root.Read() == dex_file) {
+bool ClassTable::InsertStrongRoot(mirror::Object* obj) {
+  WriterMutexLock mu(Thread::Current(), lock_);
+  DCHECK(obj != nullptr);
+  for (GcRoot<mirror::Object>& root : strong_roots_) {
+    if (root.Read() == obj) {
       return false;
     }
   }
-  dex_files_.push_back(GcRoot<mirror::Object>(dex_file));
+  strong_roots_.push_back(GcRoot<mirror::Object>(obj));
   return true;
 }
 
+size_t ClassTable::WriteToMemory(uint8_t* ptr) const {
+  ReaderMutexLock mu(Thread::Current(), lock_);
+  ClassSet combined;
+  // Combine all the class sets in case there are multiple, also adjusts load factor back to
+  // default in case classes were pruned.
+  for (const ClassSet& class_set : classes_) {
+    for (const GcRoot<mirror::Class>& root : class_set) {
+      combined.Insert(root);
+    }
+  }
+  const size_t ret = combined.WriteToMemory(ptr);
+  // Sanity check.
+  if (kIsDebugBuild && ptr != nullptr) {
+    size_t read_count;
+    ClassSet class_set(ptr, /*make copy*/false, &read_count);
+    class_set.Verify();
+  }
+  return ret;
+}
+
+size_t ClassTable::ReadFromMemory(uint8_t* ptr) {
+  size_t read_count = 0;
+  AddClassSet(ClassSet(ptr, /*make copy*/false, &read_count));
+  return read_count;
+}
+
+void ClassTable::AddClassSet(ClassSet&& set) {
+  WriterMutexLock mu(Thread::Current(), lock_);
+  classes_.insert(classes_.begin(), std::move(set));
+}
+
+void ClassTable::ClearStrongRoots() {
+  WriterMutexLock mu(Thread::Current(), lock_);
+  strong_roots_.clear();
+}
 }  // namespace art
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 002bb56..6fb4206 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -36,85 +36,21 @@
   class ClassLoader;
 }  // namespace mirror
 
-class ClassVisitor {
- public:
-  virtual ~ClassVisitor() {}
-  // Return true to continue visiting.
-  virtual bool Visit(mirror::Class* klass) = 0;
-};
-
 // Each loader has a ClassTable
 class ClassTable {
  public:
-  ClassTable();
-
-  // Used by image writer for checking.
-  bool Contains(mirror::Class* klass)
-      REQUIRES(Locks::classlinker_classes_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Freeze the current class tables by allocating a new table and never updating or modifying the
-  // existing table. This helps prevents dirty pages after caused by inserting after zygote fork.
-  void FreezeSnapshot()
-      REQUIRES(Locks::classlinker_classes_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Returns the number of classes in previous snapshots.
-  size_t NumZygoteClasses() const SHARED_REQUIRES(Locks::classlinker_classes_lock_);
-
-  // Returns all off the classes in the lastest snapshot.
-  size_t NumNonZygoteClasses() const SHARED_REQUIRES(Locks::classlinker_classes_lock_);
-
-  // Update a class in the table with the new class. Returns the existing class which was replaced.
-  mirror::Class* UpdateClass(const char* descriptor, mirror::Class* new_klass, size_t hash)
-      REQUIRES(Locks::classlinker_classes_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // NO_THREAD_SAFETY_ANALYSIS for object marking requiring heap bitmap lock.
-  template<class Visitor>
-  void VisitRoots(Visitor& visitor)
-      NO_THREAD_SAFETY_ANALYSIS
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
-  template<class Visitor>
-  void VisitRoots(const Visitor& visitor)
-      NO_THREAD_SAFETY_ANALYSIS
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
-
-  // Return false if the callback told us to exit.
-  bool Visit(ClassVisitor* visitor)
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
-
-  mirror::Class* Lookup(const char* descriptor, size_t hash)
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
-
-  void Insert(mirror::Class* klass)
-      REQUIRES(Locks::classlinker_classes_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  void InsertWithHash(mirror::Class* klass, size_t hash)
-      REQUIRES(Locks::classlinker_classes_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Returns true if the class was found and removed, false otherwise.
-  bool Remove(const char* descriptor)
-      REQUIRES(Locks::classlinker_classes_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Return true if we inserted the dex file, false if it already exists.
-  bool InsertDexFile(mirror::Object* dex_file)
-      REQUIRES(Locks::classlinker_classes_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
- private:
   class ClassDescriptorHashEquals {
    public:
+    // uint32_t for cross compilation.
+    uint32_t operator()(const GcRoot<mirror::Class>& root) const NO_THREAD_SAFETY_ANALYSIS;
     // Same class loader and descriptor.
-    std::size_t operator()(const GcRoot<mirror::Class>& root) const NO_THREAD_SAFETY_ANALYSIS;
     bool operator()(const GcRoot<mirror::Class>& a, const GcRoot<mirror::Class>& b) const
         NO_THREAD_SAFETY_ANALYSIS;;
     // Same descriptor.
     bool operator()(const GcRoot<mirror::Class>& a, const char* descriptor) const
         NO_THREAD_SAFETY_ANALYSIS;
-    std::size_t operator()(const char* descriptor) const NO_THREAD_SAFETY_ANALYSIS;
+    // uint32_t for cross compilation.
+    uint32_t operator()(const char* descriptor) const NO_THREAD_SAFETY_ANALYSIS;
   };
   class GcRootEmptyFn {
    public:
@@ -131,12 +67,110 @@
       ClassDescriptorHashEquals, TrackingAllocator<GcRoot<mirror::Class>, kAllocatorTagClassTable>>
       ClassSet;
 
-  // TODO: shard lock to have one per class loader.
+  ClassTable();
+
+  // Used by image writer for checking.
+  bool Contains(mirror::Class* klass)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Freeze the current class tables by allocating a new table and never updating or modifying the
+  // existing table. This helps prevents dirty pages after caused by inserting after zygote fork.
+  void FreezeSnapshot()
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Returns the number of classes in previous snapshots.
+  size_t NumZygoteClasses() const REQUIRES(!lock_);
+
+  // Returns all off the classes in the lastest snapshot.
+  size_t NumNonZygoteClasses() const REQUIRES(!lock_);
+
+  // Update a class in the table with the new class. Returns the existing class which was replaced.
+  mirror::Class* UpdateClass(const char* descriptor, mirror::Class* new_klass, size_t hash)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // NO_THREAD_SAFETY_ANALYSIS for object marking requiring heap bitmap lock.
+  template<class Visitor>
+  void VisitRoots(Visitor& visitor)
+      NO_THREAD_SAFETY_ANALYSIS
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<class Visitor>
+  void VisitRoots(const Visitor& visitor)
+      NO_THREAD_SAFETY_ANALYSIS
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Stops visit if the visitor returns false.
+  template <typename Visitor>
+  bool Visit(Visitor& visitor)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Return the first class that matches the descriptor. Returns null if there are none.
+  mirror::Class* Lookup(const char* descriptor, size_t hash)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Return the first class that matches the descriptor of klass. Returns null if there are none.
+  mirror::Class* LookupByDescriptor(mirror::Class* klass)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void Insert(mirror::Class* klass)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void InsertWithHash(mirror::Class* klass, size_t hash)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Returns true if the class was found and removed, false otherwise.
+  bool Remove(const char* descriptor)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Return true if we inserted the strong root, false if it already exists.
+  bool InsertStrongRoot(mirror::Object* obj)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Combines all of the tables into one class set.
+  size_t WriteToMemory(uint8_t* ptr) const
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Read a table from ptr and put it at the front of the class set.
+  size_t ReadFromMemory(uint8_t* ptr)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Add a class set to the front of classes.
+  void AddClassSet(ClassSet&& set)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Clear strong roots (other than classes themselves).
+  void ClearStrongRoots()
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ReaderWriterMutex& GetLock() {
+    return lock_;
+  }
+
+ private:
+  // Lock to guard inserting and removing.
+  mutable ReaderWriterMutex lock_;
   // We have a vector to help prevent dirty pages after the zygote forks by calling FreezeSnapshot.
-  std::vector<ClassSet> classes_ GUARDED_BY(Locks::classlinker_classes_lock_);
-  // Dex files used by the class loader which may not be owned by the class loader. We keep these
-  // live so that we do not have issues closing any of the dex files.
-  std::vector<GcRoot<mirror::Object>> dex_files_ GUARDED_BY(Locks::classlinker_classes_lock_);
+  std::vector<ClassSet> classes_ GUARDED_BY(lock_);
+  // Extra strong roots that can be either dex files or dex caches. Dex files used by the class
+  // loader which may not be owned by the class loader must be held strongly live. Also dex caches
+  // are held live to prevent them being unloading once they have classes in them.
+  std::vector<GcRoot<mirror::Object>> strong_roots_ GUARDED_BY(lock_);
 };
 
 }  // namespace art
diff --git a/runtime/code_simulator_container.cc b/runtime/code_simulator_container.cc
new file mode 100644
index 0000000..d884c58
--- /dev/null
+++ b/runtime/code_simulator_container.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <dlfcn.h>
+
+#include "code_simulator_container.h"
+#include "globals.h"
+
+namespace art {
+
+CodeSimulatorContainer::CodeSimulatorContainer(InstructionSet target_isa)
+    : libart_simulator_handle_(nullptr),
+      simulator_(nullptr) {
+  const char* libart_simulator_so_name =
+      kIsDebugBuild ? "libartd-simulator.so" : "libart-simulator.so";
+  libart_simulator_handle_ = dlopen(libart_simulator_so_name, RTLD_NOW);
+  // It is not a real error when libart-simulator does not exist, e.g., on target.
+  if (libart_simulator_handle_ == nullptr) {
+    VLOG(simulator) << "Could not load " << libart_simulator_so_name << ": " << dlerror();
+  } else {
+    typedef CodeSimulator* (*create_code_simulator_ptr_)(InstructionSet target_isa);
+    create_code_simulator_ptr_ create_code_simulator_ =
+        reinterpret_cast<create_code_simulator_ptr_>(
+            dlsym(libart_simulator_handle_, "CreateCodeSimulator"));
+    DCHECK(create_code_simulator_ != nullptr) << "Fail to find symbol of CreateCodeSimulator: "
+        << dlerror();
+    simulator_ = create_code_simulator_(target_isa);
+  }
+}
+
+CodeSimulatorContainer::~CodeSimulatorContainer() {
+  // Free simulator object before closing libart-simulator because destructor of
+  // CodeSimulator lives in it.
+  if (simulator_ != nullptr) {
+    delete simulator_;
+  }
+  if (libart_simulator_handle_ != nullptr) {
+    dlclose(libart_simulator_handle_);
+  }
+}
+
+}  // namespace art
diff --git a/runtime/code_simulator_container.h b/runtime/code_simulator_container.h
new file mode 100644
index 0000000..655a247
--- /dev/null
+++ b/runtime/code_simulator_container.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_CODE_SIMULATOR_CONTAINER_H_
+#define ART_RUNTIME_CODE_SIMULATOR_CONTAINER_H_
+
+#include "arch/instruction_set.h"
+#include "simulator/code_simulator.h"
+
+namespace art {
+
+// This container dynamically opens and closes libart-simulator.
+class CodeSimulatorContainer {
+ public:
+  explicit CodeSimulatorContainer(InstructionSet target_isa);
+  ~CodeSimulatorContainer();
+
+  bool CanSimulate() const {
+    return simulator_ != nullptr;
+  }
+
+  CodeSimulator* Get() {
+    DCHECK(CanSimulate());
+    return simulator_;
+  }
+
+  const CodeSimulator* Get() const {
+    DCHECK(CanSimulate());
+    return simulator_;
+  }
+
+ private:
+  void* libart_simulator_handle_;
+  CodeSimulator* simulator_;
+
+  DISALLOW_COPY_AND_ASSIGN(CodeSimulatorContainer);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_CODE_SIMULATOR_CONTAINER_H_
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index b6b5141..741b682 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -72,7 +72,7 @@
   filename_ = getenv("ANDROID_DATA");
   filename_ += "/TmpFile-XXXXXX";
   int fd = mkstemp(&filename_[0]);
-  CHECK_NE(-1, fd);
+  CHECK_NE(-1, fd) << strerror(errno) << " for " << filename_;
   file_.reset(new File(fd, GetFilename(), true));
 }
 
@@ -117,14 +117,15 @@
 
 static bool unstarted_initialized_ = false;
 
-CommonRuntimeTest::CommonRuntimeTest() {}
-CommonRuntimeTest::~CommonRuntimeTest() {
+CommonRuntimeTestImpl::CommonRuntimeTestImpl() {}
+
+CommonRuntimeTestImpl::~CommonRuntimeTestImpl() {
   // Ensure the dex files are cleaned up before the runtime.
   loaded_dex_files_.clear();
   runtime_.reset();
 }
 
-void CommonRuntimeTest::SetUpAndroidRoot() {
+void CommonRuntimeTestImpl::SetUpAndroidRoot() {
   if (IsHost()) {
     // $ANDROID_ROOT is set on the device, but not necessarily on the host.
     // But it needs to be set so that icu4c can find its locale data.
@@ -166,7 +167,7 @@
   }
 }
 
-void CommonRuntimeTest::SetUpAndroidData(std::string& android_data) {
+void CommonRuntimeTestImpl::SetUpAndroidData(std::string& android_data) {
   // On target, Cannot use /mnt/sdcard because it is mounted noexec, so use subdir of dalvik-cache
   if (IsHost()) {
     const char* tmpdir = getenv("TMPDIR");
@@ -185,7 +186,8 @@
   setenv("ANDROID_DATA", android_data.c_str(), 1);
 }
 
-void CommonRuntimeTest::TearDownAndroidData(const std::string& android_data, bool fail_on_error) {
+void CommonRuntimeTestImpl::TearDownAndroidData(const std::string& android_data,
+                                                bool fail_on_error) {
   if (fail_on_error) {
     ASSERT_EQ(rmdir(android_data.c_str()), 0);
   } else {
@@ -230,18 +232,18 @@
   }
 
   if (founddir.empty()) {
-    ADD_FAILURE() << "Can not find Android tools directory.";
+    ADD_FAILURE() << "Cannot find Android tools directory.";
   }
   return founddir;
 }
 
-std::string CommonRuntimeTest::GetAndroidHostToolsDir() {
+std::string CommonRuntimeTestImpl::GetAndroidHostToolsDir() {
   return GetAndroidToolsDir("prebuilts/gcc/linux-x86/host",
                             "x86_64-linux-glibc2.15",
                             "x86_64-linux");
 }
 
-std::string CommonRuntimeTest::GetAndroidTargetToolsDir(InstructionSet isa) {
+std::string CommonRuntimeTestImpl::GetAndroidTargetToolsDir(InstructionSet isa) {
   switch (isa) {
     case kArm:
     case kThumb2:
@@ -269,19 +271,21 @@
   return "";
 }
 
-std::string CommonRuntimeTest::GetCoreArtLocation() {
+std::string CommonRuntimeTestImpl::GetCoreArtLocation() {
   return GetCoreFileLocation("art");
 }
 
-std::string CommonRuntimeTest::GetCoreOatLocation() {
+std::string CommonRuntimeTestImpl::GetCoreOatLocation() {
   return GetCoreFileLocation("oat");
 }
 
-std::unique_ptr<const DexFile> CommonRuntimeTest::LoadExpectSingleDexFile(const char* location) {
+std::unique_ptr<const DexFile> CommonRuntimeTestImpl::LoadExpectSingleDexFile(
+    const char* location) {
   std::vector<std::unique_ptr<const DexFile>> dex_files;
   std::string error_msg;
   MemMap::Init();
-  if (!DexFile::Open(location, location, &error_msg, &dex_files)) {
+  static constexpr bool kVerifyChecksum = true;
+  if (!DexFile::Open(location, location, kVerifyChecksum, &error_msg, &dex_files)) {
     LOG(FATAL) << "Could not open .dex file '" << location << "': " << error_msg << "\n";
     UNREACHABLE();
   } else {
@@ -290,7 +294,7 @@
   }
 }
 
-void CommonRuntimeTest::SetUp() {
+void CommonRuntimeTestImpl::SetUp() {
   SetUpAndroidRoot();
   SetUpAndroidData(android_data_);
   dalvik_cache_.append(android_data_.c_str());
@@ -303,7 +307,12 @@
 
 
   RuntimeOptions options;
-  std::string boot_class_path_string = "-Xbootclasspath:" + GetLibCoreDexFileName();
+  std::string boot_class_path_string = "-Xbootclasspath";
+  for (const std::string &core_dex_file_name : GetLibCoreDexFileNames()) {
+    boot_class_path_string += ":";
+    boot_class_path_string += core_dex_file_name;
+  }
+
   options.push_back(std::make_pair(boot_class_path_string, nullptr));
   options.push_back(std::make_pair("-Xcheck:jni", nullptr));
   options.push_back(std::make_pair(min_heap_string, nullptr));
@@ -328,6 +337,19 @@
   class_linker_ = runtime_->GetClassLinker();
   class_linker_->FixupDexCaches(runtime_->GetResolutionMethod());
 
+  // Runtime::Create acquired the mutator_lock_ that is normally given away when we
+  // Runtime::Start, give it away now and then switch to a more managable ScopedObjectAccess.
+  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+
+  // Get the boot class path from the runtime so it can be used in tests.
+  boot_class_path_ = class_linker_->GetBootClassPath();
+  ASSERT_FALSE(boot_class_path_.empty());
+  java_lang_dex_file_ = boot_class_path_[0];
+
+  FinalizeSetup();
+}
+
+void CommonRuntimeTestImpl::FinalizeSetup() {
   // Initialize maps for unstarted runtime. This needs to be here, as running clinits needs this
   // set up.
   if (!unstarted_initialized_) {
@@ -335,14 +357,10 @@
     unstarted_initialized_ = true;
   }
 
-  class_linker_->RunRootClinits();
-  boot_class_path_ = class_linker_->GetBootClassPath();
-  java_lang_dex_file_ = boot_class_path_[0];
-
-
-  // Runtime::Create acquired the mutator_lock_ that is normally given away when we
-  // Runtime::Start, give it away now and then switch to a more managable ScopedObjectAccess.
-  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    class_linker_->RunRootClinits();
+  }
 
   // We're back in native, take the opportunity to initialize well known classes.
   WellKnownClasses::Init(Thread::Current()->GetJniEnv());
@@ -353,14 +371,9 @@
   runtime_->GetHeap()->VerifyHeap();  // Check for heap corruption before the test
   // Reduce timinig-dependent flakiness in OOME behavior (eg StubTest.AllocObject).
   runtime_->GetHeap()->SetMinIntervalHomogeneousSpaceCompactionByOom(0U);
-
-  // Get the boot class path from the runtime so it can be used in tests.
-  boot_class_path_ = class_linker_->GetBootClassPath();
-  ASSERT_FALSE(boot_class_path_.empty());
-  java_lang_dex_file_ = boot_class_path_[0];
 }
 
-void CommonRuntimeTest::ClearDirectory(const char* dirpath) {
+void CommonRuntimeTestImpl::ClearDirectory(const char* dirpath) {
   ASSERT_TRUE(dirpath != nullptr);
   DIR* dir = opendir(dirpath);
   ASSERT_TRUE(dir != nullptr);
@@ -387,13 +400,14 @@
   closedir(dir);
 }
 
-void CommonRuntimeTest::TearDown() {
+void CommonRuntimeTestImpl::TearDown() {
   const char* android_data = getenv("ANDROID_DATA");
   ASSERT_TRUE(android_data != nullptr);
   ClearDirectory(dalvik_cache_.c_str());
   int rmdir_cache_result = rmdir(dalvik_cache_.c_str());
   ASSERT_EQ(0, rmdir_cache_result);
   TearDownAndroidData(android_data_, true);
+  dalvik_cache_.clear();
 
   // icu4c has a fixed 10-element array "gCommonICUDataArray".
   // If we run > 10 tests, we fill that array and u_setCommonData fails.
@@ -407,20 +421,29 @@
   Runtime::Current()->GetHeap()->VerifyHeap();  // Check for heap corruption after the test
 }
 
-std::string CommonRuntimeTest::GetLibCoreDexFileName() {
-  return GetDexFileName("core-libart");
-}
-
-std::string CommonRuntimeTest::GetDexFileName(const std::string& jar_prefix) {
-  if (IsHost()) {
+static std::string GetDexFileName(const std::string& jar_prefix, bool host) {
+  std::string path;
+  if (host) {
     const char* host_dir = getenv("ANDROID_HOST_OUT");
     CHECK(host_dir != nullptr);
-    return StringPrintf("%s/framework/%s-hostdex.jar", host_dir, jar_prefix.c_str());
+    path = host_dir;
+  } else {
+    path = GetAndroidRoot();
   }
-  return StringPrintf("%s/framework/%s.jar", GetAndroidRoot(), jar_prefix.c_str());
+
+  std::string suffix = host
+      ? "-hostdex"                 // The host version.
+      : "-testdex";                // The unstripped target version.
+
+  return StringPrintf("%s/framework/%s%s.jar", path.c_str(), jar_prefix.c_str(), suffix.c_str());
 }
 
-std::string CommonRuntimeTest::GetTestAndroidRoot() {
+std::vector<std::string> CommonRuntimeTestImpl::GetLibCoreDexFileNames() {
+  return std::vector<std::string>({GetDexFileName("core-oj", IsHost()),
+                                   GetDexFileName("core-libart", IsHost())});
+}
+
+std::string CommonRuntimeTestImpl::GetTestAndroidRoot() {
   if (IsHost()) {
     const char* host_dir = getenv("ANDROID_HOST_OUT");
     CHECK(host_dir != nullptr);
@@ -440,7 +463,7 @@
 #define ART_TARGET_NATIVETEST_DIR_STRING ""
 #endif
 
-std::string CommonRuntimeTest::GetTestDexFileName(const char* name) {
+std::string CommonRuntimeTestImpl::GetTestDexFileName(const char* name) const {
   CHECK(name != nullptr);
   std::string filename;
   if (IsHost()) {
@@ -455,11 +478,14 @@
   return filename;
 }
 
-std::vector<std::unique_ptr<const DexFile>> CommonRuntimeTest::OpenTestDexFiles(const char* name) {
+std::vector<std::unique_ptr<const DexFile>> CommonRuntimeTestImpl::OpenTestDexFiles(
+    const char* name) {
   std::string filename = GetTestDexFileName(name);
+  static constexpr bool kVerifyChecksum = true;
   std::string error_msg;
   std::vector<std::unique_ptr<const DexFile>> dex_files;
-  bool success = DexFile::Open(filename.c_str(), filename.c_str(), &error_msg, &dex_files);
+  bool success = DexFile::Open(
+      filename.c_str(), filename.c_str(), kVerifyChecksum, &error_msg, &dex_files);
   CHECK(success) << "Failed to open '" << filename << "': " << error_msg;
   for (auto& dex_file : dex_files) {
     CHECK_EQ(PROT_READ, dex_file->GetPermissions());
@@ -468,13 +494,13 @@
   return dex_files;
 }
 
-std::unique_ptr<const DexFile> CommonRuntimeTest::OpenTestDexFile(const char* name) {
+std::unique_ptr<const DexFile> CommonRuntimeTestImpl::OpenTestDexFile(const char* name) {
   std::vector<std::unique_ptr<const DexFile>> vector = OpenTestDexFiles(name);
   EXPECT_EQ(1U, vector.size());
   return std::move(vector[0]);
 }
 
-std::vector<const DexFile*> CommonRuntimeTest::GetDexFiles(jobject jclass_loader) {
+std::vector<const DexFile*> CommonRuntimeTestImpl::GetDexFiles(jobject jclass_loader) {
   std::vector<const DexFile*> ret;
 
   ScopedObjectAccess soa(Thread::Current());
@@ -534,7 +560,7 @@
   return ret;
 }
 
-const DexFile* CommonRuntimeTest::GetFirstDexFile(jobject jclass_loader) {
+const DexFile* CommonRuntimeTestImpl::GetFirstDexFile(jobject jclass_loader) {
   std::vector<const DexFile*> tmp(GetDexFiles(jclass_loader));
   DCHECK(!tmp.empty());
   const DexFile* ret = tmp[0];
@@ -542,7 +568,7 @@
   return ret;
 }
 
-jobject CommonRuntimeTest::LoadDex(const char* dex_name) {
+jobject CommonRuntimeTestImpl::LoadDex(const char* dex_name) {
   std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles(dex_name);
   std::vector<const DexFile*> class_path;
   CHECK_NE(0U, dex_files.size());
@@ -558,7 +584,7 @@
   return class_loader;
 }
 
-std::string CommonRuntimeTest::GetCoreFileLocation(const char* suffix) {
+std::string CommonRuntimeTestImpl::GetCoreFileLocation(const char* suffix) {
   CHECK(suffix != nullptr);
 
   std::string location;
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index a474ae6..2d16a49 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -64,8 +64,10 @@
   std::unique_ptr<File> file_;
 };
 
-class CommonRuntimeTest : public testing::Test {
+class CommonRuntimeTestImpl {
  public:
+  CommonRuntimeTestImpl();
+  virtual ~CommonRuntimeTestImpl();
   static void SetUpAndroidRoot();
 
   // Note: setting up ANDROID_DATA may create a temporary directory. If this is used in a
@@ -74,19 +76,25 @@
 
   static void TearDownAndroidData(const std::string& android_data, bool fail_on_error);
 
-  CommonRuntimeTest();
-  ~CommonRuntimeTest();
-
-  // Gets the path of the libcore dex file.
-  static std::string GetLibCoreDexFileName();
+  // Gets the paths of the libcore dex files.
+  static std::vector<std::string> GetLibCoreDexFileNames();
 
   // Returns bin directory which contains host's prebuild tools.
   static std::string GetAndroidHostToolsDir();
 
-  // Returns bin directory which contains target's prebuild tools.
+  // Returns bin directory wahich contains target's prebuild tools.
   static std::string GetAndroidTargetToolsDir(InstructionSet isa);
 
  protected:
+  // Allow subclases such as CommonCompilerTest to add extra options.
+  virtual void SetUpRuntimeOptions(RuntimeOptions* options ATTRIBUTE_UNUSED) {}
+
+  // Called before the runtime is created.
+  virtual void PreRuntimeCreate() {}
+
+  // Called after the runtime is created.
+  virtual void PostRuntimeCreate() {}
+
   static bool IsHost() {
     return !kIsTargetBuild;
   }
@@ -99,27 +107,11 @@
 
   std::unique_ptr<const DexFile> LoadExpectSingleDexFile(const char* location);
 
-  virtual void SetUp();
-
-  // Allow subclases such as CommonCompilerTest to add extra options.
-  virtual void SetUpRuntimeOptions(RuntimeOptions* options ATTRIBUTE_UNUSED) {}
-
   void ClearDirectory(const char* dirpath);
 
-  virtual void TearDown();
-
-  // Called before the runtime is created.
-  virtual void PreRuntimeCreate() {}
-
-  // Called after the runtime is created.
-  virtual void PostRuntimeCreate() {}
-
-  // Gets the path of the specified dex file for host or target.
-  static std::string GetDexFileName(const std::string& jar_prefix);
-
   std::string GetTestAndroidRoot();
 
-  std::string GetTestDexFileName(const char* name);
+  std::string GetTestDexFileName(const char* name) const;
 
   std::vector<std::unique_ptr<const DexFile>> OpenTestDexFiles(const char* name)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -149,12 +141,41 @@
 
   std::unique_ptr<CompilerCallbacks> callbacks_;
 
+  virtual void SetUp();
+
+  virtual void TearDown();
+
+  // Called to finish up runtime creation and filling test fields. By default runs root
+  // initializers, initialize well-known classes, and creates the heap thread pool.
+  virtual void FinalizeSetup();
+
  private:
   static std::string GetCoreFileLocation(const char* suffix);
 
   std::vector<std::unique_ptr<const DexFile>> loaded_dex_files_;
 };
 
+template <typename TestType>
+class CommonRuntimeTestBase : public TestType, public CommonRuntimeTestImpl {
+ public:
+  CommonRuntimeTestBase() {}
+  virtual ~CommonRuntimeTestBase() {}
+
+ protected:
+  virtual void SetUp() OVERRIDE {
+    CommonRuntimeTestImpl::SetUp();
+  }
+
+  virtual void TearDown() OVERRIDE {
+    CommonRuntimeTestImpl::TearDown();
+  }
+};
+
+using CommonRuntimeTest = CommonRuntimeTestBase<testing::Test>;
+
+template <typename Param>
+using CommonRuntimeTestWithParam = CommonRuntimeTestBase<testing::TestWithParam<Param>>;
+
 // Sets a CheckJni abort hook to catch failures. Note that this will cause CheckJNI to carry on
 // rather than aborting, so be careful!
 class CheckJniAbortCatcher {
@@ -174,19 +195,24 @@
   DISALLOW_COPY_AND_ASSIGN(CheckJniAbortCatcher);
 };
 
-// TODO: When read barrier works with the compiler, get rid of this.
-#define TEST_DISABLED_FOR_READ_BARRIER() \
-  if (kUseReadBarrier) { \
-    printf("WARNING: TEST DISABLED FOR READ BARRIER\n"); \
-    return; \
-  }
-
 #define TEST_DISABLED_FOR_MIPS() \
   if (kRuntimeISA == kMips) { \
     printf("WARNING: TEST DISABLED FOR MIPS\n"); \
     return; \
   }
 
+#define TEST_DISABLED_FOR_READ_BARRIER_ON_X86() \
+  if (kUseReadBarrier && kRuntimeISA == kX86) { \
+    printf("WARNING: TEST DISABLED FOR READ BARRIER ON X86\n"); \
+    return; \
+  }
+
+#define TEST_DISABLED_FOR_STRING_COMPRESSION() \
+  if (mirror::kUseStringCompression) { \
+    printf("WARNING: TEST DISABLED FOR STRING COMPRESSION\n"); \
+    return; \
+  }
+
 }  // namespace art
 
 namespace std {
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index de692d1..99732c6 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -18,6 +18,8 @@
 
 #include <sstream>
 
+#include "ScopedLocalRef.h"
+
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/logging.h"
@@ -84,6 +86,14 @@
                               PrettyMethod(method).c_str()).c_str());
 }
 
+void ThrowAbstractMethodError(uint32_t method_idx, const DexFile& dex_file) {
+  ThrowException("Ljava/lang/AbstractMethodError;", /* referrer */ nullptr,
+                 StringPrintf("abstract method \"%s\"",
+                              PrettyMethod(method_idx,
+                                           dex_file,
+                                           /* with_signature */ true).c_str()).c_str());
+}
+
 // ArithmeticException
 
 void ThrowArithmeticExceptionDivideByZero() {
@@ -127,13 +137,21 @@
   ThrowException("Ljava/lang/ClassCircularityError;", c, msg.str().c_str());
 }
 
+void ThrowClassCircularityError(mirror::Class* c, const char* fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  ThrowException("Ljava/lang/ClassCircularityError;", c, fmt, &args);
+  va_end(args);
+}
+
 // ClassFormatError
 
 void ThrowClassFormatError(mirror::Class* referrer, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowException("Ljava/lang/ClassFormatError;", referrer, fmt, &args);
-  va_end(args);}
+  va_end(args);
+}
 
 // IllegalAccessError
 
@@ -209,6 +227,22 @@
                  msg.str().c_str());
 }
 
+void ThrowIncompatibleClassChangeErrorClassForInterfaceSuper(ArtMethod* method,
+                                                             mirror::Class* target_class,
+                                                             mirror::Object* this_object,
+                                                             ArtMethod* referrer) {
+  // Referrer is calling interface_method on this_object, however, the interface_method isn't
+  // implemented by this_object.
+  CHECK(this_object != nullptr);
+  std::ostringstream msg;
+  msg << "Class '" << PrettyDescriptor(this_object->GetClass())
+      << "' does not implement interface '" << PrettyDescriptor(target_class) << "' in call to '"
+      << PrettyMethod(method) << "'";
+  ThrowException("Ljava/lang/IncompatibleClassChangeError;",
+                 referrer != nullptr ? referrer->GetDeclaringClass() : nullptr,
+                 msg.str().c_str());
+}
+
 void ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(ArtMethod* interface_method,
                                                                 mirror::Object* this_object,
                                                                 ArtMethod* referrer) {
@@ -242,6 +276,15 @@
   va_end(args);
 }
 
+void ThrowIncompatibleClassChangeErrorForMethodConflict(ArtMethod* method) {
+  DCHECK(method != nullptr);
+  ThrowException("Ljava/lang/IncompatibleClassChangeError;",
+                 /*referrer*/nullptr,
+                 StringPrintf("Conflicting default method implementations %s",
+                              PrettyMethod(method).c_str()).c_str());
+}
+
+
 // IOException
 
 void ThrowIOException(const char* fmt, ...) {
@@ -359,12 +402,117 @@
                                                dex_file, type);
 }
 
-void ThrowNullPointerExceptionFromDexPC() {
+static bool IsValidImplicitCheck(uintptr_t addr, ArtMethod* method, const Instruction& instr)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (!CanDoImplicitNullCheckOn(addr)) {
+    return false;
+  }
+
+  switch (instr.Opcode()) {
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_DIRECT_RANGE:
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+    case Instruction::INVOKE_VIRTUAL_QUICK:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
+      // Without inlining, we could just check that the offset is the class offset.
+      // However, when inlining, the compiler can (validly) merge the null check with a field access
+      // on the same object. Note that the stack map at the NPE will reflect the invoke's location,
+      // which is the caller.
+      return true;
+    }
+
+    case Instruction::IGET:
+    case Instruction::IGET_WIDE:
+    case Instruction::IGET_OBJECT:
+    case Instruction::IGET_BOOLEAN:
+    case Instruction::IGET_BYTE:
+    case Instruction::IGET_CHAR:
+    case Instruction::IGET_SHORT:
+    case Instruction::IPUT:
+    case Instruction::IPUT_WIDE:
+    case Instruction::IPUT_OBJECT:
+    case Instruction::IPUT_BOOLEAN:
+    case Instruction::IPUT_BYTE:
+    case Instruction::IPUT_CHAR:
+    case Instruction::IPUT_SHORT: {
+      ArtField* field =
+          Runtime::Current()->GetClassLinker()->ResolveField(instr.VRegC_22c(), method, false);
+      return (addr == 0) ||
+          (addr == field->GetOffset().Uint32Value()) ||
+          (kEmitCompilerReadBarrier && (addr == mirror::Object::MonitorOffset().Uint32Value()));
+    }
+
+    case Instruction::IGET_QUICK:
+    case Instruction::IGET_BOOLEAN_QUICK:
+    case Instruction::IGET_BYTE_QUICK:
+    case Instruction::IGET_CHAR_QUICK:
+    case Instruction::IGET_SHORT_QUICK:
+    case Instruction::IGET_WIDE_QUICK:
+    case Instruction::IGET_OBJECT_QUICK:
+    case Instruction::IPUT_QUICK:
+    case Instruction::IPUT_BOOLEAN_QUICK:
+    case Instruction::IPUT_BYTE_QUICK:
+    case Instruction::IPUT_CHAR_QUICK:
+    case Instruction::IPUT_SHORT_QUICK:
+    case Instruction::IPUT_WIDE_QUICK:
+    case Instruction::IPUT_OBJECT_QUICK: {
+      return (addr == 0u) ||
+          (addr == instr.VRegC_22c()) ||
+          (kEmitCompilerReadBarrier && (addr == mirror::Object::MonitorOffset().Uint32Value()));
+    }
+
+    case Instruction::AGET:
+    case Instruction::AGET_WIDE:
+    case Instruction::AGET_OBJECT:
+    case Instruction::AGET_BOOLEAN:
+    case Instruction::AGET_BYTE:
+    case Instruction::AGET_CHAR:
+    case Instruction::AGET_SHORT:
+    case Instruction::APUT:
+    case Instruction::APUT_WIDE:
+    case Instruction::APUT_OBJECT:
+    case Instruction::APUT_BOOLEAN:
+    case Instruction::APUT_BYTE:
+    case Instruction::APUT_CHAR:
+    case Instruction::APUT_SHORT:
+    case Instruction::FILL_ARRAY_DATA:
+    case Instruction::ARRAY_LENGTH: {
+      // The length access should crash. We currently do not do implicit checks on
+      // the array access itself.
+      return (addr == 0u) ||
+          (addr == mirror::Array::LengthOffset().Uint32Value()) ||
+          (kEmitCompilerReadBarrier && (addr == mirror::Object::MonitorOffset().Uint32Value()));
+    }
+
+    default: {
+      // We have covered all the cases where an NPE could occur.
+      // Note that this must be kept in sync with the compiler, and adding
+      // any new way to do implicit checks in the compiler should also update
+      // this code.
+      return false;
+    }
+  }
+}
+
+void ThrowNullPointerExceptionFromDexPC(bool check_address, uintptr_t addr) {
   uint32_t throw_dex_pc;
   ArtMethod* method = Thread::Current()->GetCurrentMethod(&throw_dex_pc);
   const DexFile::CodeItem* code = method->GetCodeItem();
   CHECK_LT(throw_dex_pc, code->insns_size_in_code_units_);
   const Instruction* instr = Instruction::At(&code->insns_[throw_dex_pc]);
+  if (check_address && !IsValidImplicitCheck(addr, method, *instr)) {
+    const DexFile* dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
+    LOG(FATAL) << "Invalid address for an implicit NullPointerException check: "
+               << "0x" << std::hex << addr << std::dec
+               << ", at "
+               << instr->DumpString(dex_file)
+               << " in "
+               << PrettyMethod(method);
+  }
+
   switch (instr->Opcode()) {
     case Instruction::INVOKE_DIRECT:
       ThrowNullPointerExceptionForMethodAccess(instr->VRegB_35c(), kDirect);
@@ -487,14 +635,24 @@
       ThrowException("Ljava/lang/NullPointerException;", nullptr,
                      "Attempt to get length of null array");
       break;
+    case Instruction::FILL_ARRAY_DATA: {
+      ThrowException("Ljava/lang/NullPointerException;", nullptr,
+                     "Attempt to write to null array");
+      break;
+    }
+    case Instruction::MONITOR_ENTER:
+    case Instruction::MONITOR_EXIT: {
+      ThrowException("Ljava/lang/NullPointerException;", nullptr,
+                     "Attempt to do a synchronize operation on a null object");
+      break;
+    }
     default: {
-      // TODO: We should have covered all the cases where we expect a NPE above, this
-      //       message/logging is so we can improve any cases we've missed in the future.
       const DexFile* dex_file =
           method->GetDeclaringClass()->GetDexCache()->GetDexFile();
-      ThrowException("Ljava/lang/NullPointerException;", nullptr,
-                     StringPrintf("Null pointer exception during instruction '%s'",
-                                  instr->DumpString(dex_file).c_str()).c_str());
+      LOG(FATAL) << "NullPointerException at an unexpected instruction: "
+                 << instr->DumpString(dex_file)
+                 << " in "
+                 << PrettyMethod(method);
       break;
     }
   }
@@ -513,6 +671,111 @@
   va_end(args);
 }
 
+// Stack overflow.
+
+void ThrowStackOverflowError(Thread* self) {
+  if (self->IsHandlingStackOverflow()) {
+    LOG(ERROR) << "Recursive stack overflow.";
+    // We don't fail here because SetStackEndForStackOverflow will print better diagnostics.
+  }
+
+  self->SetStackEndForStackOverflow();  // Allow space on the stack for constructor to execute.
+  JNIEnvExt* env = self->GetJniEnv();
+  std::string msg("stack size ");
+  msg += PrettySize(self->GetStackSize());
+
+  // Avoid running Java code for exception initialization.
+  // TODO: Checks to make this a bit less brittle.
+
+  std::string error_msg;
+
+  // Allocate an uninitialized object.
+  ScopedLocalRef<jobject> exc(env,
+                              env->AllocObject(WellKnownClasses::java_lang_StackOverflowError));
+  if (exc.get() != nullptr) {
+    // "Initialize".
+    // StackOverflowError -> VirtualMachineError -> Error -> Throwable -> Object.
+    // Only Throwable has "custom" fields:
+    //   String detailMessage.
+    //   Throwable cause (= this).
+    //   List<Throwable> suppressedExceptions (= Collections.emptyList()).
+    //   Object stackState;
+    //   StackTraceElement[] stackTrace;
+    // Only Throwable has a non-empty constructor:
+    //   this.stackTrace = EmptyArray.STACK_TRACE_ELEMENT;
+    //   fillInStackTrace();
+
+    // detailMessage.
+    // TODO: Use String::FromModifiedUTF...?
+    ScopedLocalRef<jstring> s(env, env->NewStringUTF(msg.c_str()));
+    if (s.get() != nullptr) {
+      env->SetObjectField(exc.get(), WellKnownClasses::java_lang_Throwable_detailMessage, s.get());
+
+      // cause.
+      env->SetObjectField(exc.get(), WellKnownClasses::java_lang_Throwable_cause, exc.get());
+
+      // suppressedExceptions.
+      ScopedLocalRef<jobject> emptylist(env, env->GetStaticObjectField(
+          WellKnownClasses::java_util_Collections,
+          WellKnownClasses::java_util_Collections_EMPTY_LIST));
+      CHECK(emptylist.get() != nullptr);
+      env->SetObjectField(exc.get(),
+                          WellKnownClasses::java_lang_Throwable_suppressedExceptions,
+                          emptylist.get());
+
+      // stackState is set as result of fillInStackTrace. fillInStackTrace calls
+      // nativeFillInStackTrace.
+      ScopedLocalRef<jobject> stack_state_val(env, nullptr);
+      {
+        ScopedObjectAccessUnchecked soa(env);
+        stack_state_val.reset(soa.Self()->CreateInternalStackTrace<false>(soa));
+      }
+      if (stack_state_val.get() != nullptr) {
+        env->SetObjectField(exc.get(),
+                            WellKnownClasses::java_lang_Throwable_stackState,
+                            stack_state_val.get());
+
+        // stackTrace.
+        ScopedLocalRef<jobject> stack_trace_elem(env, env->GetStaticObjectField(
+            WellKnownClasses::libcore_util_EmptyArray,
+            WellKnownClasses::libcore_util_EmptyArray_STACK_TRACE_ELEMENT));
+        env->SetObjectField(exc.get(),
+                            WellKnownClasses::java_lang_Throwable_stackTrace,
+                            stack_trace_elem.get());
+      } else {
+        error_msg = "Could not create stack trace.";
+      }
+      // Throw the exception.
+      self->SetException(reinterpret_cast<mirror::Throwable*>(self->DecodeJObject(exc.get())));
+    } else {
+      // Could not allocate a string object.
+      error_msg = "Couldn't throw new StackOverflowError because JNI NewStringUTF failed.";
+    }
+  } else {
+    error_msg = "Could not allocate StackOverflowError object.";
+  }
+
+  if (!error_msg.empty()) {
+    LOG(WARNING) << error_msg;
+    CHECK(self->IsExceptionPending());
+  }
+
+  bool explicit_overflow_check = Runtime::Current()->ExplicitStackOverflowChecks();
+  self->ResetDefaultStackEnd();  // Return to default stack size.
+
+  // And restore protection if implicit checks are on.
+  if (!explicit_overflow_check) {
+    self->ProtectStack();
+  }
+}
+
+// StringIndexOutOfBoundsException
+
+void ThrowStringIndexOutOfBoundsException(int index, int length) {
+  ThrowException("Ljava/lang/StringIndexOutOfBoundsException;", nullptr,
+                 StringPrintf("length=%d; index=%d", length, index).c_str());
+}
+
 // VerifyError
 
 void ThrowVerifyError(mirror::Class* referrer, const char* fmt, ...) {
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index 2402e6f..cbd338d 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -27,6 +27,7 @@
 }  // namespace mirror
 class ArtField;
 class ArtMethod;
+class DexFile;
 class Signature;
 class StringPiece;
 
@@ -35,6 +36,9 @@
 void ThrowAbstractMethodError(ArtMethod* method)
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
+void ThrowAbstractMethodError(uint32_t method_idx, const DexFile& dex_file)
+    SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
+
 // ArithmeticException
 
 void ThrowArithmeticExceptionDivideByZero() SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
@@ -54,6 +58,9 @@
 void ThrowClassCircularityError(mirror::Class* c)
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
+void ThrowClassCircularityError(mirror::Class* c, const char* fmt, ...)
+    SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
+
 // ClassCastException
 
 void ThrowClassCastException(mirror::Class* dest_type, mirror::Class* src_type)
@@ -107,6 +114,12 @@
                                        ArtMethod* method, ArtMethod* referrer)
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
+void ThrowIncompatibleClassChangeErrorClassForInterfaceSuper(ArtMethod* method,
+                                                             mirror::Class* target_class,
+                                                             mirror::Object* this_object,
+                                                             ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
+
 void ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(ArtMethod* interface_method,
                                                                 mirror::Object* this_object,
                                                                 ArtMethod* referrer)
@@ -120,6 +133,9 @@
     __attribute__((__format__(__printf__, 2, 3)))
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
+void ThrowIncompatibleClassChangeErrorForMethodConflict(ArtMethod* method)
+    SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
+
 // IOException
 
 void ThrowIOException(const char* fmt, ...) __attribute__((__format__(__printf__, 1, 2)))
@@ -151,10 +167,10 @@
 
 void ThrowNoSuchFieldError(const StringPiece& scope, mirror::Class* c,
                            const StringPiece& type, const StringPiece& name)
-    SHARED_REQUIRES(Locks::mutator_lock_);
+    SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowNoSuchFieldException(mirror::Class* c, const StringPiece& name)
-    SHARED_REQUIRES(Locks::mutator_lock_);
+    SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
 // NoSuchMethodError
 
@@ -179,7 +195,7 @@
                                               InvokeType type)
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowNullPointerExceptionFromDexPC()
+void ThrowNullPointerExceptionFromDexPC(bool check_address = false, uintptr_t addr = 0)
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowNullPointerException(const char* msg)
@@ -191,6 +207,15 @@
     __attribute__((__format__(__printf__, 1, 2)))
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
+// Stack overflow.
+
+void ThrowStackOverflowError(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
+
+// StringIndexOutOfBoundsException
+
+void ThrowStringIndexOutOfBoundsException(int index, int length)
+    SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
+
 // VerifyError
 
 void ThrowVerifyError(mirror::Class* referrer, const char* fmt, ...)
diff --git a/runtime/compiler_callbacks.h b/runtime/compiler_callbacks.h
index af7b04f..a39d682 100644
--- a/runtime/compiler_callbacks.h
+++ b/runtime/compiler_callbacks.h
@@ -37,8 +37,8 @@
 
   virtual ~CompilerCallbacks() { }
 
-  virtual bool MethodVerified(verifier::MethodVerifier* verifier)
-  SHARED_REQUIRES(Locks::mutator_lock_) = 0;
+  virtual void MethodVerified(verifier::MethodVerifier* verifier)
+      SHARED_REQUIRES(Locks::mutator_lock_) = 0;
   virtual void ClassRejected(ClassReference ref) = 0;
 
   // Return true if we should attempt to relocate to a random base address if we have not already
diff --git a/runtime/compiler_filter.cc b/runtime/compiler_filter.cc
new file mode 100644
index 0000000..dc197c1
--- /dev/null
+++ b/runtime/compiler_filter.cc
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler_filter.h"
+
+#include "utils.h"
+
+namespace art {
+
+bool CompilerFilter::IsBytecodeCompilationEnabled(Filter filter) {
+  switch (filter) {
+    case CompilerFilter::kVerifyNone:
+    case CompilerFilter::kVerifyAtRuntime:
+    case CompilerFilter::kVerifyProfile:
+    case CompilerFilter::kInterpretOnly: return false;
+
+    case CompilerFilter::kSpaceProfile:
+    case CompilerFilter::kSpace:
+    case CompilerFilter::kBalanced:
+    case CompilerFilter::kTime:
+    case CompilerFilter::kSpeedProfile:
+    case CompilerFilter::kSpeed:
+    case CompilerFilter::kEverythingProfile:
+    case CompilerFilter::kEverything: return true;
+  }
+  UNREACHABLE();
+}
+
+bool CompilerFilter::IsJniCompilationEnabled(Filter filter) {
+  switch (filter) {
+    case CompilerFilter::kVerifyNone:
+    case CompilerFilter::kVerifyAtRuntime: return false;
+
+    case CompilerFilter::kVerifyProfile:
+    case CompilerFilter::kInterpretOnly:
+    case CompilerFilter::kSpaceProfile:
+    case CompilerFilter::kSpace:
+    case CompilerFilter::kBalanced:
+    case CompilerFilter::kTime:
+    case CompilerFilter::kSpeedProfile:
+    case CompilerFilter::kSpeed:
+    case CompilerFilter::kEverythingProfile:
+    case CompilerFilter::kEverything: return true;
+  }
+  UNREACHABLE();
+}
+
+bool CompilerFilter::IsVerificationEnabled(Filter filter) {
+  switch (filter) {
+    case CompilerFilter::kVerifyNone:
+    case CompilerFilter::kVerifyAtRuntime: return false;
+
+    case CompilerFilter::kVerifyProfile:
+    case CompilerFilter::kInterpretOnly:
+    case CompilerFilter::kSpaceProfile:
+    case CompilerFilter::kSpace:
+    case CompilerFilter::kBalanced:
+    case CompilerFilter::kTime:
+    case CompilerFilter::kSpeedProfile:
+    case CompilerFilter::kSpeed:
+    case CompilerFilter::kEverythingProfile:
+    case CompilerFilter::kEverything: return true;
+  }
+  UNREACHABLE();
+}
+
+bool CompilerFilter::DependsOnImageChecksum(Filter filter) {
+  // We run dex2dex with verification, so the oat file will depend on the
+  // image checksum if verification is enabled.
+  return IsVerificationEnabled(filter);
+}
+
+bool CompilerFilter::DependsOnProfile(Filter filter) {
+  switch (filter) {
+    case CompilerFilter::kVerifyNone:
+    case CompilerFilter::kVerifyAtRuntime:
+    case CompilerFilter::kInterpretOnly:
+    case CompilerFilter::kSpace:
+    case CompilerFilter::kBalanced:
+    case CompilerFilter::kTime:
+    case CompilerFilter::kSpeed:
+    case CompilerFilter::kEverything: return false;
+
+    case CompilerFilter::kVerifyProfile:
+    case CompilerFilter::kSpaceProfile:
+    case CompilerFilter::kSpeedProfile:
+    case CompilerFilter::kEverythingProfile: return true;
+  }
+  UNREACHABLE();
+}
+
+CompilerFilter::Filter CompilerFilter::GetNonProfileDependentFilterFrom(Filter filter) {
+  switch (filter) {
+    case CompilerFilter::kVerifyNone:
+    case CompilerFilter::kVerifyAtRuntime:
+    case CompilerFilter::kInterpretOnly:
+    case CompilerFilter::kSpace:
+    case CompilerFilter::kBalanced:
+    case CompilerFilter::kTime:
+    case CompilerFilter::kSpeed:
+    case CompilerFilter::kEverything:
+      return filter;
+
+    case CompilerFilter::kVerifyProfile:
+      return CompilerFilter::kInterpretOnly;
+
+    case CompilerFilter::kSpaceProfile:
+      return CompilerFilter::kSpace;
+
+    case CompilerFilter::kSpeedProfile:
+      return CompilerFilter::kSpeed;
+
+    case CompilerFilter::kEverythingProfile:
+      return CompilerFilter::kEverything;
+  }
+  UNREACHABLE();
+}
+
+
+bool CompilerFilter::IsAsGoodAs(Filter current, Filter target) {
+  return current >= target;
+}
+
+std::string CompilerFilter::NameOfFilter(Filter filter) {
+  switch (filter) {
+    case CompilerFilter::kVerifyNone: return "verify-none";
+    case CompilerFilter::kVerifyAtRuntime: return "verify-at-runtime";
+    case CompilerFilter::kVerifyProfile: return "verify-profile";
+    case CompilerFilter::kInterpretOnly: return "interpret-only";
+    case CompilerFilter::kSpaceProfile: return "space-profile";
+    case CompilerFilter::kSpace: return "space";
+    case CompilerFilter::kBalanced: return "balanced";
+    case CompilerFilter::kTime: return "time";
+    case CompilerFilter::kSpeedProfile: return "speed-profile";
+    case CompilerFilter::kSpeed: return "speed";
+    case CompilerFilter::kEverythingProfile: return "everything-profile";
+    case CompilerFilter::kEverything: return "everything";
+  }
+  UNREACHABLE();
+}
+
+bool CompilerFilter::ParseCompilerFilter(const char* option, Filter* filter) {
+  CHECK(filter != nullptr);
+
+  if (strcmp(option, "verify-none") == 0) {
+    *filter = kVerifyNone;
+  } else if (strcmp(option, "interpret-only") == 0) {
+    *filter = kInterpretOnly;
+  } else if (strcmp(option, "verify-profile") == 0) {
+    *filter = kVerifyProfile;
+  } else if (strcmp(option, "verify-at-runtime") == 0) {
+    *filter = kVerifyAtRuntime;
+  } else if (strcmp(option, "space") == 0) {
+    *filter = kSpace;
+  } else if (strcmp(option, "space-profile") == 0) {
+    *filter = kSpaceProfile;
+  } else if (strcmp(option, "balanced") == 0) {
+    *filter = kBalanced;
+  } else if (strcmp(option, "speed") == 0) {
+    *filter = kSpeed;
+  } else if (strcmp(option, "speed-profile") == 0) {
+    *filter = kSpeedProfile;
+  } else if (strcmp(option, "everything") == 0) {
+    *filter = kEverything;
+  } else if (strcmp(option, "everything-profile") == 0) {
+    *filter = kEverythingProfile;
+  } else if (strcmp(option, "time") == 0) {
+    *filter = kTime;
+  } else {
+    return false;
+  }
+  return true;
+}
+
+std::ostream& operator<<(std::ostream& os, const CompilerFilter::Filter& rhs) {
+  return os << CompilerFilter::NameOfFilter(rhs);
+}
+
+}  // namespace art
diff --git a/runtime/compiler_filter.h b/runtime/compiler_filter.h
new file mode 100644
index 0000000..37631cc
--- /dev/null
+++ b/runtime/compiler_filter.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_COMPILER_FILTER_H_
+#define ART_RUNTIME_COMPILER_FILTER_H_
+
+#include <ostream>
+#include <string>
+#include <vector>
+
+#include "base/macros.h"
+
+namespace art {
+
+class CompilerFilter FINAL {
+ public:
+  // Note: Order here matters. Later filter choices are considered "as good
+  // as" earlier filter choices.
+  enum Filter {
+    kVerifyNone,          // Skip verification but mark all classes as verified anyway.
+    kVerifyAtRuntime,     // Delay verication to runtime, do not compile anything.
+    kVerifyProfile,       // Verify only the classes in the profile, compile only JNI stubs.
+    kInterpretOnly,       // Verify everything, compile only JNI stubs.
+    kTime,                // Compile methods, but minimize compilation time.
+    kSpaceProfile,        // Maximize space savings based on profile.
+    kSpace,               // Maximize space savings.
+    kBalanced,            // Good performance return on compilation investment.
+    kSpeedProfile,        // Maximize runtime performance based on profile.
+    kSpeed,               // Maximize runtime performance.
+    kEverythingProfile,   // Compile everything capable of being compiled based on profile.
+    kEverything,          // Compile everything capable of being compiled.
+  };
+
+  static const Filter kDefaultCompilerFilter = kSpeed;
+
+  // Returns true if an oat file with this compiler filter contains
+  // compiled executable code for bytecode.
+  static bool IsBytecodeCompilationEnabled(Filter filter);
+
+  // Returns true if an oat file with this compiler filter contains
+  // compiled executable code for JNI methods.
+  static bool IsJniCompilationEnabled(Filter filter);
+
+  // Returns true if this compiler filter requires running verification.
+  static bool IsVerificationEnabled(Filter filter);
+
+  // Returns true if an oat file with this compiler filter depends on the
+  // boot image checksum.
+  static bool DependsOnImageChecksum(Filter filter);
+
+  // Returns true if an oat file with this compiler filter depends on a
+  // profile.
+  static bool DependsOnProfile(Filter filter);
+
+  // Returns a non-profile-guided version of the given filter.
+  static Filter GetNonProfileDependentFilterFrom(Filter filter);
+
+  // Returns true if the 'current' compiler filter is considered at least as
+  // good as the 'target' compilation type.
+  // For example: kSpeed is as good as kInterpretOnly, but kInterpretOnly is
+  // not as good as kSpeed.
+  static bool IsAsGoodAs(Filter current, Filter target);
+
+  // Return the flag name of the given filter.
+  // For example: given kVerifyAtRuntime, returns "verify-at-runtime".
+  // The name returned corresponds to the name accepted by
+  // ParseCompilerFilter.
+  static std::string NameOfFilter(Filter filter);
+
+  // Parse the compiler filter from the given name.
+  // Returns true and sets filter to the parsed value if name refers to a
+  // valid filter. Returns false if no filter matches that name.
+  // 'filter' must be non-null.
+  static bool ParseCompilerFilter(const char* name, /*out*/Filter* filter);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(CompilerFilter);
+};
+
+std::ostream& operator<<(std::ostream& os, const CompilerFilter::Filter& rhs);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_COMPILER_FILTER_H_
diff --git a/runtime/compiler_filter_test.cc b/runtime/compiler_filter_test.cc
new file mode 100644
index 0000000..c603be6
--- /dev/null
+++ b/runtime/compiler_filter_test.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler_filter.h"
+
+#include <gtest/gtest.h>
+
+namespace art {
+
+static void TestCompilerFilterName(CompilerFilter::Filter filter, std::string name) {
+  CompilerFilter::Filter parsed;
+  EXPECT_TRUE(CompilerFilter::ParseCompilerFilter(name.c_str(), &parsed));
+  EXPECT_EQ(filter, parsed);
+
+  EXPECT_EQ(name, CompilerFilter::NameOfFilter(filter));
+}
+
+// Verify the dexopt status values from dalvik.system.DexFile
+// match the OatFileAssistant::DexOptStatus values.
+TEST(CompilerFilterTest, ParseCompilerFilter) {
+  CompilerFilter::Filter filter;
+
+  TestCompilerFilterName(CompilerFilter::kVerifyNone, "verify-none");
+  TestCompilerFilterName(CompilerFilter::kVerifyAtRuntime, "verify-at-runtime");
+  TestCompilerFilterName(CompilerFilter::kVerifyProfile, "verify-profile");
+  TestCompilerFilterName(CompilerFilter::kInterpretOnly, "interpret-only");
+  TestCompilerFilterName(CompilerFilter::kTime, "time");
+  TestCompilerFilterName(CompilerFilter::kSpaceProfile, "space-profile");
+  TestCompilerFilterName(CompilerFilter::kSpace, "space");
+  TestCompilerFilterName(CompilerFilter::kBalanced, "balanced");
+  TestCompilerFilterName(CompilerFilter::kSpeedProfile, "speed-profile");
+  TestCompilerFilterName(CompilerFilter::kSpeed, "speed");
+  TestCompilerFilterName(CompilerFilter::kEverythingProfile, "everything-profile");
+  TestCompilerFilterName(CompilerFilter::kEverything, "everything");
+
+  EXPECT_FALSE(CompilerFilter::ParseCompilerFilter("super-awesome-filter", &filter));
+}
+
+}  // namespace art
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index b17b76e..a5b0689 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -23,11 +23,13 @@
 #include "arch/context.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/time_utils.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
+#include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/allocation_record.h"
 #include "gc/scoped_gc_critical_section.h"
@@ -43,7 +45,6 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
 #include "mirror/throwable.h"
-#include "quick/inline_method_analyser.h"
 #include "reflection.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
@@ -52,7 +53,6 @@
 #include "handle_scope-inl.h"
 #include "thread_list.h"
 #include "utf.h"
-#include "verifier/method_verifier-inl.h"
 #include "well_known_classes.h"
 
 namespace art {
@@ -69,29 +69,41 @@
   return alloc_record_count;
 }
 
-class Breakpoint {
+// Takes a method and returns a 'canonical' one if the method is default (and therefore potentially
+// copied from some other class). This ensures that the debugger does not get confused as to which
+// method we are in.
+static ArtMethod* GetCanonicalMethod(ArtMethod* m)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (LIKELY(!m->IsDefault())) {
+    return m;
+  } else {
+    mirror::Class* declaring_class = m->GetDeclaringClass();
+    return declaring_class->FindDeclaredVirtualMethod(declaring_class->GetDexCache(),
+                                                      m->GetDexMethodIndex(),
+                                                      kRuntimePointerSize);
+  }
+}
+
+class Breakpoint : public ValueObject {
  public:
-  Breakpoint(ArtMethod* method, uint32_t dex_pc,
-             DeoptimizationRequest::Kind deoptimization_kind)
-    SHARED_REQUIRES(Locks::mutator_lock_)
-    : method_(nullptr), dex_pc_(dex_pc), deoptimization_kind_(deoptimization_kind) {
+  Breakpoint(ArtMethod* method, uint32_t dex_pc, DeoptimizationRequest::Kind deoptimization_kind)
+    : method_(GetCanonicalMethod(method)),
+      dex_pc_(dex_pc),
+      deoptimization_kind_(deoptimization_kind) {
     CHECK(deoptimization_kind_ == DeoptimizationRequest::kNothing ||
           deoptimization_kind_ == DeoptimizationRequest::kSelectiveDeoptimization ||
           deoptimization_kind_ == DeoptimizationRequest::kFullDeoptimization);
-    ScopedObjectAccessUnchecked soa(Thread::Current());
-    method_ = soa.EncodeMethod(method);
   }
 
   Breakpoint(const Breakpoint& other) SHARED_REQUIRES(Locks::mutator_lock_)
-    : method_(nullptr), dex_pc_(other.dex_pc_),
-      deoptimization_kind_(other.deoptimization_kind_) {
-    ScopedObjectAccessUnchecked soa(Thread::Current());
-    method_ = soa.EncodeMethod(other.Method());
-  }
+    : method_(other.method_),
+      dex_pc_(other.dex_pc_),
+      deoptimization_kind_(other.deoptimization_kind_) {}
 
-  ArtMethod* Method() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    ScopedObjectAccessUnchecked soa(Thread::Current());
-    return soa.DecodeMethod(method_);
+  // Method() is called from root visiting, do not use ScopedObjectAccess here or it can cause
+  // GC to deadlock if another thread tries to call SuspendAll while the GC is in a runnable state.
+  ArtMethod* Method() const {
+    return method_;
   }
 
   uint32_t DexPc() const {
@@ -102,9 +114,15 @@
     return deoptimization_kind_;
   }
 
+  // Returns true if the method of this breakpoint and the passed in method should be considered the
+  // same. That is, they are either the same method or they are copied from the same method.
+  bool IsInMethod(ArtMethod* m) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return method_ == GetCanonicalMethod(m);
+  }
+
  private:
   // The location of this breakpoint.
-  jmethodID method_;
+  ArtMethod* method_;
   uint32_t dex_pc_;
 
   // Indicates whether breakpoint needs full deoptimization or selective deoptimization.
@@ -212,11 +230,11 @@
     Dbg::PostException(exception_object);
   }
 
-  // We only care about how many backward branches were executed in the Jit.
-  void BackwardBranch(Thread* /*thread*/, ArtMethod* method, int32_t dex_pc_offset)
+  // We only care about branches in the Jit.
+  void Branch(Thread* /*thread*/, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    LOG(ERROR) << "Unexpected backward branch event in debugger " << PrettyMethod(method)
-               << " " << dex_pc_offset;
+    LOG(ERROR) << "Unexpected branch event in debugger " << PrettyMethod(method)
+               << " " << dex_pc << ", " << dex_pc_offset;
   }
 
   // We only care about invokes in the Jit.
@@ -309,12 +327,12 @@
   return dex_pcs_.find(dex_pc) == dex_pcs_.end();
 }
 
-static bool IsBreakpoint(const ArtMethod* m, uint32_t dex_pc)
+static bool IsBreakpoint(ArtMethod* m, uint32_t dex_pc)
     REQUIRES(!Locks::breakpoint_lock_)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ReaderMutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
   for (size_t i = 0, e = gBreakpoints.size(); i < e; ++i) {
-    if (gBreakpoints[i].DexPc() == dex_pc && gBreakpoints[i].Method() == m) {
+    if (gBreakpoints[i].DexPc() == dex_pc && gBreakpoints[i].IsInMethod(m)) {
       VLOG(jdwp) << "Hit breakpoint #" << i << ": " << gBreakpoints[i];
       return true;
     }
@@ -552,6 +570,29 @@
   return !Runtime::Current()->GetInstrumentation()->IsForcedInterpretOnly();
 }
 
+// Used to patch boot image method entry point to interpreter bridge.
+class UpdateEntryPointsClassVisitor : public ClassVisitor {
+ public:
+  explicit UpdateEntryPointsClassVisitor(instrumentation::Instrumentation* instrumentation)
+      : instrumentation_(instrumentation) {}
+
+  bool operator()(mirror::Class* klass) OVERRIDE REQUIRES(Locks::mutator_lock_) {
+    auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+    for (auto& m : klass->GetMethods(pointer_size)) {
+      const void* code = m.GetEntryPointFromQuickCompiledCode();
+      if (Runtime::Current()->GetHeap()->IsInBootImageOatFile(code) &&
+          !m.IsNative() &&
+          !m.IsProxyMethod()) {
+        instrumentation_->UpdateMethodsCodeFromDebugger(&m, GetQuickToInterpreterBridge());
+      }
+    }
+    return true;
+  }
+
+ private:
+  instrumentation::Instrumentation* const instrumentation_;
+};
+
 void Dbg::GoActive() {
   // Enable all debugging features, including scans for breakpoints.
   // This is a no-op if we're already active.
@@ -580,6 +621,16 @@
   }
 
   Runtime* runtime = Runtime::Current();
+  // Since boot image code may be AOT compiled as not debuggable, we need to patch
+  // entry points of methods in boot image to interpreter bridge.
+  // However, the performance cost of this is non-negligible during native-debugging due to the
+  // forced JIT, so we keep the AOT code in that case in exchange for limited native debugging.
+  if (!runtime->GetInstrumentation()->IsForcedInterpretOnly() && !runtime->IsNativeDebuggable()) {
+    ScopedObjectAccess soa(self);
+    UpdateEntryPointsClassVisitor visitor(runtime->GetInstrumentation());
+    runtime->GetClassLinker()->VisitClasses(&visitor);
+  }
+
   ScopedSuspendAll ssa(__FUNCTION__);
   if (RequiresDeoptimization()) {
     runtime->GetInstrumentation()->EnableDeoptimization();
@@ -594,8 +645,7 @@
 
   LOG(INFO) << "Debugger is no longer active";
 
-  // Suspend all threads and exclusively acquire the mutator lock. Set the state of the thread
-  // to kRunnable to avoid scoped object access transitions. Remove the debugger as a listener
+  // Suspend all threads and exclusively acquire the mutator lock. Remove the debugger as a listener
   // and clear the object registry.
   Runtime* runtime = Runtime::Current();
   Thread* self = Thread::Current();
@@ -605,7 +655,6 @@
                                     gc::kGcCauseInstrumentation,
                                     gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa(__FUNCTION__);
-    ThreadState old_state = self->SetStateUnsafe(kRunnable);
     // Debugger may not be active at this point.
     if (IsDebuggerActive()) {
       {
@@ -626,7 +675,6 @@
       }
       gDebuggerActive = false;
     }
-    CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
   }
 
   {
@@ -708,11 +756,11 @@
 
 JDWP::JdwpError Dbg::GetClassLoader(JDWP::RefTypeId id, JDWP::ExpandBuf* pReply) {
   JDWP::JdwpError error;
-  mirror::Object* o = gRegistry->Get<mirror::Object*>(id, &error);
-  if (o == nullptr) {
-    return JDWP::ERR_INVALID_OBJECT;
+  mirror::Class* c = DecodeClass(id, &error);
+  if (c == nullptr) {
+    return error;
   }
-  expandBufAddObjectId(pReply, gRegistry->Add(o->GetClass()->GetClassLoader()));
+  expandBufAddObjectId(pReply, gRegistry->Add(c->GetClassLoader()));
   return JDWP::ERR_NONE;
 }
 
@@ -965,7 +1013,7 @@
  public:
   explicit ClassListCreator(std::vector<JDWP::RefTypeId>* classes) : classes_(classes) {}
 
-  bool Visit(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     if (!c->IsPrimitive()) {
       classes_->push_back(Dbg::GetObjectRegistry()->AddRefType(c));
     }
@@ -1234,7 +1282,14 @@
     return error;
   }
   Thread* self = Thread::Current();
-  mirror::Object* new_object = c->AllocObject(self);
+  mirror::Object* new_object;
+  if (c->IsStringClass()) {
+    // Special case for java.lang.String.
+    gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+    new_object = mirror::String::AllocEmptyString<true>(self, allocator_type);
+  } else {
+    new_object = c->AllocObject(self);
+  }
   if (new_object == nullptr) {
     DCHECK(self->IsExceptionPending());
     self->ClearException();
@@ -1277,9 +1332,9 @@
   return static_cast<JDWP::FieldId>(reinterpret_cast<uintptr_t>(f));
 }
 
-static JDWP::MethodId ToMethodId(const ArtMethod* m)
+static JDWP::MethodId ToMethodId(ArtMethod* m)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  return static_cast<JDWP::MethodId>(reinterpret_cast<uintptr_t>(m));
+  return static_cast<JDWP::MethodId>(reinterpret_cast<uintptr_t>(GetCanonicalMethod(m)));
 }
 
 static ArtField* FromFieldId(JDWP::FieldId fid)
@@ -1351,7 +1406,7 @@
   if (m == nullptr) {
     return "null";
   }
-  return m->GetInterfaceMethodIfProxy(sizeof(void*))->GetName();
+  return m->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetName();
 }
 
 std::string Dbg::GetFieldName(JDWP::FieldId field_id) {
@@ -1398,6 +1453,15 @@
   }
 }
 
+static size_t GetMethodNumArgRegistersIncludingThis(ArtMethod* method)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  uint32_t num_registers = ArtMethod::NumArgRegisters(method->GetShorty());
+  if (!method->IsStatic()) {
+    ++num_registers;
+  }
+  return num_registers;
+}
+
 /*
  * Circularly shifts registers so that arguments come last. Reverts
  * slots to dex style argument placement.
@@ -1409,7 +1473,7 @@
     // We should not get here for a method without code (native, proxy or abstract). Log it and
     // return the slot as is since all registers are arguments.
     LOG(WARNING) << "Trying to demangle slot for method without code " << PrettyMethod(m);
-    uint16_t vreg_count = ArtMethod::NumArgRegisters(m->GetShorty());
+    uint16_t vreg_count = GetMethodNumArgRegistersIncludingThis(m);
     if (slot < vreg_count) {
       *error = JDWP::ERR_NONE;
       return slot;
@@ -1465,25 +1529,20 @@
     return error;
   }
 
-  size_t direct_method_count = c->NumDirectMethods();
-  size_t virtual_method_count = c->NumVirtualMethods();
-
-  expandBufAdd4BE(pReply, direct_method_count + virtual_method_count);
+  expandBufAdd4BE(pReply, c->NumMethods());
 
   auto* cl = Runtime::Current()->GetClassLinker();
   auto ptr_size = cl->GetImagePointerSize();
-  for (size_t i = 0; i < direct_method_count + virtual_method_count; ++i) {
-    ArtMethod* m = i < direct_method_count ?
-        c->GetDirectMethod(i, ptr_size) : c->GetVirtualMethod(i - direct_method_count, ptr_size);
-    expandBufAddMethodId(pReply, ToMethodId(m));
-    expandBufAddUtf8String(pReply, m->GetInterfaceMethodIfProxy(sizeof(void*))->GetName());
-    expandBufAddUtf8String(pReply,
-                           m->GetInterfaceMethodIfProxy(sizeof(void*))->GetSignature().ToString());
+  for (ArtMethod& m : c->GetMethods(ptr_size)) {
+    expandBufAddMethodId(pReply, ToMethodId(&m));
+    expandBufAddUtf8String(pReply, m.GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetName());
+    expandBufAddUtf8String(
+        pReply, m.GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetSignature().ToString());
     if (with_generic) {
       const char* generic_signature = "";
       expandBufAddUtf8String(pReply, generic_signature);
     }
-    expandBufAdd4BE(pReply, MangleAccessFlags(m->GetAccessFlags()));
+    expandBufAdd4BE(pReply, MangleAccessFlags(m.GetAccessFlags()));
   }
   return JDWP::ERR_NONE;
 }
@@ -1510,10 +1569,10 @@
     int numItems;
     JDWP::ExpandBuf* pReply;
 
-    static bool Callback(void* context, uint32_t address, uint32_t line_number) {
+    static bool Callback(void* context, const DexFile::PositionInfo& entry) {
       DebugCallbackContext* pContext = reinterpret_cast<DebugCallbackContext*>(context);
-      expandBufAdd8BE(pContext->pReply, address);
-      expandBufAdd4BE(pContext->pReply, line_number);
+      expandBufAdd8BE(pContext->pReply, entry.address_);
+      expandBufAdd4BE(pContext->pReply, entry.line_);
       pContext->numItems++;
       return false;
     }
@@ -1543,8 +1602,7 @@
   context.pReply = pReply;
 
   if (code_item != nullptr) {
-    m->GetDexFile()->DecodeDebugInfo(code_item, m->IsStatic(), m->GetDexMethodIndex(),
-                                     DebugCallbackContext::Callback, nullptr, &context);
+    m->GetDexFile()->DecodeDebugPositionInfo(code_item, DebugCallbackContext::Callback, &context);
   }
 
   JDWP::Set4BE(expandBufGetBuffer(pReply) + numLinesOffset, context.numItems);
@@ -1558,25 +1616,26 @@
     size_t variable_count;
     bool with_generic;
 
-    static void Callback(void* context, uint16_t slot, uint32_t startAddress, uint32_t endAddress,
-                         const char* name, const char* descriptor, const char* signature)
+    static void Callback(void* context, const DexFile::LocalInfo& entry)
         SHARED_REQUIRES(Locks::mutator_lock_) {
       DebugCallbackContext* pContext = reinterpret_cast<DebugCallbackContext*>(context);
 
+      uint16_t slot = entry.reg_;
       VLOG(jdwp) << StringPrintf("    %2zd: %d(%d) '%s' '%s' '%s' actual slot=%d mangled slot=%d",
-                                 pContext->variable_count, startAddress, endAddress - startAddress,
-                                 name, descriptor, signature, slot,
+                                 pContext->variable_count, entry.start_address_,
+                                 entry.end_address_ - entry.start_address_,
+                                 entry.name_, entry.descriptor_, entry.signature_, slot,
                                  MangleSlot(slot, pContext->method));
 
       slot = MangleSlot(slot, pContext->method);
 
-      expandBufAdd8BE(pContext->pReply, startAddress);
-      expandBufAddUtf8String(pContext->pReply, name);
-      expandBufAddUtf8String(pContext->pReply, descriptor);
+      expandBufAdd8BE(pContext->pReply, entry.start_address_);
+      expandBufAddUtf8String(pContext->pReply, entry.name_);
+      expandBufAddUtf8String(pContext->pReply, entry.descriptor_);
       if (pContext->with_generic) {
-        expandBufAddUtf8String(pContext->pReply, signature);
+        expandBufAddUtf8String(pContext->pReply, entry.signature_);
       }
-      expandBufAdd4BE(pContext->pReply, endAddress - startAddress);
+      expandBufAdd4BE(pContext->pReply, entry.end_address_- entry.start_address_);
       expandBufAdd4BE(pContext->pReply, slot);
 
       ++pContext->variable_count;
@@ -1586,8 +1645,7 @@
 
   // arg_count considers doubles and longs to take 2 units.
   // variable_count considers everything to take 1 unit.
-  std::string shorty(m->GetShorty());
-  expandBufAdd4BE(pReply, ArtMethod::NumArgRegisters(shorty));
+  expandBufAdd4BE(pReply, GetMethodNumArgRegistersIncludingThis(m));
 
   // We don't know the total number of variables yet, so leave a blank and update it later.
   size_t variable_count_offset = expandBufGetLength(pReply);
@@ -1601,8 +1659,8 @@
 
   const DexFile::CodeItem* code_item = m->GetCodeItem();
   if (code_item != nullptr) {
-    m->GetDexFile()->DecodeDebugInfo(
-        code_item, m->IsStatic(), m->GetDexMethodIndex(), nullptr, DebugCallbackContext::Callback,
+    m->GetDexFile()->DecodeDebugLocalInfo(
+        code_item, m->IsStatic(), m->GetDexMethodIndex(), DebugCallbackContext::Callback,
         &context);
   }
 
@@ -2012,29 +2070,28 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   CHECK(thread_group != nullptr);
 
-  // Get the ArrayList<ThreadGroup> "groups" out of this thread group...
-  ArtField* groups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_groups);
-  mirror::Object* groups_array_list = groups_field->GetObject(thread_group);
-  {
-    // The "groups" field is declared as a java.util.List: check it really is
-    // an instance of java.util.ArrayList.
-    CHECK(groups_array_list != nullptr);
-    mirror::Class* java_util_ArrayList_class =
-        soa.Decode<mirror::Class*>(WellKnownClasses::java_util_ArrayList);
-    CHECK(groups_array_list->InstanceOf(java_util_ArrayList_class));
+  // Get the int "ngroups" count of this thread group...
+  ArtField* ngroups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_ngroups);
+  CHECK(ngroups_field != nullptr);
+  const int32_t size = ngroups_field->GetInt(thread_group);
+  if (size == 0) {
+    return;
   }
 
-  // Get the array and size out of the ArrayList<ThreadGroup>...
-  ArtField* array_field = soa.DecodeField(WellKnownClasses::java_util_ArrayList_array);
-  ArtField* size_field = soa.DecodeField(WellKnownClasses::java_util_ArrayList_size);
-  mirror::ObjectArray<mirror::Object>* groups_array =
-      array_field->GetObject(groups_array_list)->AsObjectArray<mirror::Object>();
-  const int32_t size = size_field->GetInt(groups_array_list);
+  // Get the ThreadGroup[] "groups" out of this thread group...
+  ArtField* groups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_groups);
+  mirror::Object* groups_array = groups_field->GetObject(thread_group);
+
+  CHECK(groups_array != nullptr);
+  CHECK(groups_array->IsObjectArray());
+
+  mirror::ObjectArray<mirror::Object>* groups_array_as_array =
+      groups_array->AsObjectArray<mirror::Object>();
 
   // Copy the first 'size' elements out of the array into the result.
   ObjectRegistry* registry = Dbg::GetObjectRegistry();
   for (int32_t i = 0; i < size; ++i) {
-    child_thread_group_ids->push_back(registry->Add(groups_array->Get(i)));
+    child_thread_group_ids->push_back(registry->Add(groups_array_as_array->Get(i)));
   }
 }
 
@@ -2310,6 +2367,10 @@
 }
 
 void Dbg::SuspendVM() {
+  // Avoid a deadlock between GC and debugger where GC gets suspended during GC. b/25800335.
+  gc::ScopedGCCriticalSection gcs(Thread::Current(),
+                                  gc::kGcCauseDebugger,
+                                  gc::kCollectorTypeDebugger);
   Runtime::Current()->GetThreadList()->SuspendAllForDebugger();
 }
 
@@ -2679,26 +2740,26 @@
     case JDWP::JT_BOOLEAN:
     case JDWP::JT_BYTE:
       CHECK_EQ(width, 1U);
-      if (!visitor.SetVRegFromDebugger(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
+      if (!visitor.SetVReg(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, static_cast<uint32_t>(value));
       }
       break;
     case JDWP::JT_SHORT:
     case JDWP::JT_CHAR:
       CHECK_EQ(width, 2U);
-      if (!visitor.SetVRegFromDebugger(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
+      if (!visitor.SetVReg(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, static_cast<uint32_t>(value));
       }
       break;
     case JDWP::JT_INT:
       CHECK_EQ(width, 4U);
-      if (!visitor.SetVRegFromDebugger(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
+      if (!visitor.SetVReg(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, static_cast<uint32_t>(value));
       }
       break;
     case JDWP::JT_FLOAT:
       CHECK_EQ(width, 4U);
-      if (!visitor.SetVRegFromDebugger(m, vreg, static_cast<uint32_t>(value), kFloatVReg)) {
+      if (!visitor.SetVReg(m, vreg, static_cast<uint32_t>(value), kFloatVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, static_cast<uint32_t>(value));
       }
       break;
@@ -2716,7 +2777,7 @@
         VLOG(jdwp) << tag << " object " << o << " is an invalid object";
         return JDWP::ERR_INVALID_OBJECT;
       }
-      if (!visitor.SetVRegFromDebugger(m, vreg, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(o)),
+      if (!visitor.SetVReg(m, vreg, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(o)),
                                  kReferenceVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, reinterpret_cast<uintptr_t>(o));
       }
@@ -2724,14 +2785,14 @@
     }
     case JDWP::JT_DOUBLE: {
       CHECK_EQ(width, 8U);
-      if (!visitor.SetVRegPairFromDebugger(m, vreg, value, kDoubleLoVReg, kDoubleHiVReg)) {
+      if (!visitor.SetVRegPair(m, vreg, value, kDoubleLoVReg, kDoubleHiVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, value);
       }
       break;
     }
     case JDWP::JT_LONG: {
       CHECK_EQ(width, 8U);
-      if (!visitor.SetVRegPairFromDebugger(m, vreg, value, kLongLoVReg, kLongHiVReg)) {
+      if (!visitor.SetVRegPair(m, vreg, value, kLongLoVReg, kLongHiVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, value);
       }
       break;
@@ -2758,7 +2819,7 @@
   if (m == nullptr) {
     memset(location, 0, sizeof(*location));
   } else {
-    location->method = m;
+    location->method = GetCanonicalMethod(m);
     location->dex_pc = (m->IsNative() || m->IsProxyMethod()) ? static_cast<uint32_t>(-1) : dex_pc;
   }
 }
@@ -3185,31 +3246,10 @@
   CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
 }
 
-static bool IsMethodPossiblyInlined(Thread* self, ArtMethod* m)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  const DexFile::CodeItem* code_item = m->GetCodeItem();
-  if (code_item == nullptr) {
-    // TODO We should not be asked to watch location in a native or abstract method so the code item
-    // should never be null. We could just check we never encounter this case.
-    return false;
-  }
-  // Note: method verifier may cause thread suspension.
-  self->AssertThreadSuspensionIsAllowable();
-  StackHandleScope<2> hs(self);
-  mirror::Class* declaring_class = m->GetDeclaringClass();
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
-  verifier::MethodVerifier verifier(self, dex_cache->GetDexFile(), dex_cache, class_loader,
-                                    &m->GetClassDef(), code_item, m->GetDexMethodIndex(), m,
-                                    m->GetAccessFlags(), false, true, false, true);
-  // Note: we don't need to verify the method.
-  return InlineMethodAnalyser::AnalyseMethodCode(&verifier, nullptr);
-}
-
 static const Breakpoint* FindFirstBreakpointForMethod(ArtMethod* m)
     SHARED_REQUIRES(Locks::mutator_lock_, Locks::breakpoint_lock_) {
   for (Breakpoint& breakpoint : gBreakpoints) {
-    if (breakpoint.Method() == m) {
+    if (breakpoint.IsInMethod(m)) {
       return &breakpoint;
     }
   }
@@ -3226,7 +3266,7 @@
                                            DeoptimizationRequest::Kind deoptimization_kind)
     SHARED_REQUIRES(Locks::mutator_lock_, Locks::breakpoint_lock_) {
   for (const Breakpoint& breakpoint : gBreakpoints) {
-    if (breakpoint.Method() == m) {
+    if (breakpoint.IsInMethod(m)) {
       CHECK_EQ(deoptimization_kind, breakpoint.GetDeoptimizationKind());
     }
   }
@@ -3268,30 +3308,22 @@
   }
 
   if (first_breakpoint == nullptr) {
-    // There is no breakpoint on this method yet: we need to deoptimize. If this method may be
-    // inlined, we deoptimize everything; otherwise we deoptimize only this method.
-    // Note: IsMethodPossiblyInlined goes into the method verifier and may cause thread suspension.
-    // Therefore we must not hold any lock when we call it.
-    bool need_full_deoptimization = IsMethodPossiblyInlined(self, m);
+    // There is no breakpoint on this method yet: we need to deoptimize. If this method is default,
+    // we deoptimize everything; otherwise we deoptimize only this method. We
+    // deoptimize with defaults because we do not know everywhere they are used. It is possible some
+    // of the copies could be missed.
+    // TODO Deoptimizing on default methods might not be necessary in all cases.
+    bool need_full_deoptimization = m->IsDefault();
     if (need_full_deoptimization) {
-      VLOG(jdwp) << "Need full deoptimization because of possible inlining of method "
+      VLOG(jdwp) << "Need full deoptimization because of copying of method "
                  << PrettyMethod(m);
       return DeoptimizationRequest::kFullDeoptimization;
     } else {
       // We don't need to deoptimize if the method has not been compiled.
-      ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-      const bool is_compiled = class_linker->GetOatMethodQuickCodeFor(m) != nullptr;
+      const bool is_compiled = m->HasAnyCompiledCode();
       if (is_compiled) {
-        // If the method may be called through its direct code pointer (without loading
-        // its updated entrypoint), we need full deoptimization to not miss the breakpoint.
-        if (class_linker->MayBeCalledWithDirectCodePointer(m)) {
-          VLOG(jdwp) << "Need full deoptimization because of possible direct code call "
-                     << "into image for compiled method " << PrettyMethod(m);
-          return DeoptimizationRequest::kFullDeoptimization;
-        } else {
-          VLOG(jdwp) << "Need selective deoptimization for compiled method " << PrettyMethod(m);
-          return DeoptimizationRequest::kSelectiveDeoptimization;
-        }
+        VLOG(jdwp) << "Need selective deoptimization for compiled method " << PrettyMethod(m);
+        return DeoptimizationRequest::kSelectiveDeoptimization;
       } else {
         // Method is not compiled: we don't need to deoptimize.
         VLOG(jdwp) << "No need for deoptimization for non-compiled method " << PrettyMethod(m);
@@ -3354,7 +3386,7 @@
   DCHECK(m != nullptr) << "No method for method id " << location->method_id;
   DeoptimizationRequest::Kind deoptimization_kind = DeoptimizationRequest::kNothing;
   for (size_t i = 0, e = gBreakpoints.size(); i < e; ++i) {
-    if (gBreakpoints[i].DexPc() == location->dex_pc && gBreakpoints[i].Method() == m) {
+    if (gBreakpoints[i].DexPc() == location->dex_pc && gBreakpoints[i].IsInMethod(m)) {
       VLOG(jdwp) << "Removed breakpoint #" << i << ": " << gBreakpoints[i];
       deoptimization_kind = gBreakpoints[i].GetDeoptimizationKind();
       DCHECK_EQ(deoptimization_kind == DeoptimizationRequest::kSelectiveDeoptimization,
@@ -3687,19 +3719,19 @@
           code_item_(code_item), last_pc_valid(false), last_pc(0) {
     }
 
-    static bool Callback(void* raw_context, uint32_t address, uint32_t line_number_cb) {
+    static bool Callback(void* raw_context, const DexFile::PositionInfo& entry) {
       DebugCallbackContext* context = reinterpret_cast<DebugCallbackContext*>(raw_context);
-      if (static_cast<int32_t>(line_number_cb) == context->line_number_) {
+      if (static_cast<int32_t>(entry.line_) == context->line_number_) {
         if (!context->last_pc_valid) {
           // Everything from this address until the next line change is ours.
-          context->last_pc = address;
+          context->last_pc = entry.address_;
           context->last_pc_valid = true;
         }
         // Otherwise, if we're already in a valid range for this line,
         // just keep going (shouldn't really happen)...
       } else if (context->last_pc_valid) {  // and the line number is new
         // Add everything from the last entry up until here to the set
-        for (uint32_t dex_pc = context->last_pc; dex_pc < address; ++dex_pc) {
+        for (uint32_t dex_pc = context->last_pc; dex_pc < entry.address_; ++dex_pc) {
           context->single_step_control_->AddDexPc(dex_pc);
         }
         context->last_pc_valid = false;
@@ -3740,8 +3772,7 @@
   if (m != nullptr && !m->IsNative()) {
     const DexFile::CodeItem* const code_item = m->GetCodeItem();
     DebugCallbackContext context(single_step_control, line_number, code_item);
-    m->GetDexFile()->DecodeDebugInfo(code_item, m->IsStatic(), m->GetDexMethodIndex(),
-                                     DebugCallbackContext::Callback, nullptr, &context);
+    m->GetDexFile()->DecodeDebugPositionInfo(code_item, DebugCallbackContext::Callback, &context);
   }
 
   // Activate single-step in the thread.
@@ -3911,7 +3942,7 @@
           mirror::Class* parameter_type =
               m->GetClassFromTypeIndex(types->GetTypeItem(i).type_idx_,
                                        true /* resolve */,
-                                       sizeof(void*));
+                                       kRuntimePointerSize);
           mirror::Object* argument = gRegistry->Get<mirror::Object*>(arg_values[i], &error);
           if (error != JDWP::ERR_NONE) {
             return JDWP::ERR_INVALID_OBJECT;
@@ -4002,7 +4033,7 @@
 
   // Translate the method through the vtable, unless the debugger wants to suppress it.
   ArtMethod* m = pReq->method;
-  size_t image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  PointerSize image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   if ((pReq->options & JDWP::INVOKE_NONVIRTUAL) == 0 && pReq->receiver.Read() != nullptr) {
     ArtMethod* actual_method =
         pReq->klass.Read()->FindVirtualMethodForVirtualOrInterface(m, image_pointer_size);
@@ -4027,7 +4058,7 @@
   // Prepare JDWP ids for the reply.
   JDWP::JdwpTag result_tag = BasicTagFromDescriptor(m->GetShorty());
   const bool is_object_result = (result_tag == JDWP::JT_OBJECT);
-  StackHandleScope<2> hs(soa.Self());
+  StackHandleScope<3> hs(soa.Self());
   Handle<mirror::Object> object_result = hs.NewHandle(is_object_result ? result.GetL() : nullptr);
   Handle<mirror::Throwable> exception = hs.NewHandle(soa.Self()->GetException());
   soa.Self()->ClearException();
@@ -4064,19 +4095,30 @@
   if (is_constructor) {
     // If we invoked a constructor (which actually returns void), return the receiver,
     // unless we threw, in which case we return null.
-    result_tag = JDWP::JT_OBJECT;
+    DCHECK_EQ(JDWP::JT_VOID, result_tag);
     if (exceptionObjectId == 0) {
-      // TODO we could keep the receiver ObjectId in the DebugInvokeReq to avoid looking into the
-      // object registry.
-      result_value = GetObjectRegistry()->Add(pReq->receiver.Read());
+      if (m->GetDeclaringClass()->IsStringClass()) {
+        // For string constructors, the new string is remapped to the receiver (stored in ref).
+        Handle<mirror::Object> decoded_ref = hs.NewHandle(soa.Self()->DecodeJObject(ref.get()));
+        result_value = gRegistry->Add(decoded_ref);
+        result_tag = TagFromObject(soa, decoded_ref.Get());
+      } else {
+        // TODO we could keep the receiver ObjectId in the DebugInvokeReq to avoid looking into the
+        // object registry.
+        result_value = GetObjectRegistry()->Add(pReq->receiver.Read());
+        result_tag = TagFromObject(soa, pReq->receiver.Read());
+      }
     } else {
       result_value = 0;
+      result_tag = JDWP::JT_OBJECT;
     }
   }
 
   // Suspend other threads if the invoke is not single-threaded.
   if ((pReq->options & JDWP::INVOKE_SINGLE_THREADED) == 0) {
     ScopedThreadSuspension sts(soa.Self(), kWaitingForDebuggerSuspension);
+    // Avoid a deadlock between GC and debugger where GC gets suspended during GC. b/25800335.
+    gc::ScopedGCCriticalSection gcs(soa.Self(), gc::kGcCauseDebugger, gc::kCollectorTypeDebugger);
     VLOG(jdwp) << "      Suspending all threads";
     Runtime::Current()->GetThreadList()->SuspendAllForDebugger();
   }
@@ -4291,10 +4333,16 @@
     Handle<mirror::String> name(hs.NewHandle(t->GetThreadName(soa)));
     size_t char_count = (name.Get() != nullptr) ? name->GetLength() : 0;
     const jchar* chars = (name.Get() != nullptr) ? name->GetValue() : nullptr;
+    bool is_compressed = (name.Get() != nullptr) ? name->IsCompressed() : false;
 
     std::vector<uint8_t> bytes;
     JDWP::Append4BE(bytes, t->GetThreadId());
-    JDWP::AppendUtf16BE(bytes, chars, char_count);
+    if (is_compressed) {
+      const uint8_t* chars_compressed = name->GetValueCompressed();
+      JDWP::AppendUtf16CompressedBE(bytes, chars_compressed, char_count);
+    } else {
+      JDWP::AppendUtf16BE(bytes, chars, char_count);
+    }
     CHECK_EQ(bytes.size(), char_count*2 + sizeof(uint32_t)*2);
     Dbg::DdmSendChunk(type, bytes);
   }
@@ -4725,12 +4773,7 @@
   // Send a series of heap segment chunks.
   HeapChunkContext context(what == HPSG_WHAT_MERGED_OBJECTS, native);
   if (native) {
-#if defined(__ANDROID__) && defined(USE_DLMALLOC)
-    dlmalloc_inspect_all(HeapChunkContext::HeapChunkNativeCallback, &context);
-    HeapChunkContext::HeapChunkNativeCallback(nullptr, nullptr, 0, &context);  // Indicate end of a space.
-#else
-    UNIMPLEMENTED(WARNING) << "Native heap inspection is only supported with dlmalloc";
-#endif
+    UNIMPLEMENTED(WARNING) << "Native heap inspection is not supported";
   } else {
     gc::Heap* heap = Runtime::Current()->GetHeap();
     for (const auto& space : heap->GetContinuousSpaces()) {
@@ -4799,7 +4842,7 @@
   LOG(INFO) << "Tracked allocations, (count=" << count << ")";
   for (auto it = records->RBegin(), end = records->REnd();
       count > 0 && it != end; count--, it++) {
-    const gc::AllocRecord* record = it->second;
+    const gc::AllocRecord* record = &it->second;
 
     LOG(INFO) << StringPrintf(" Thread %-2d %6zd bytes ", record->GetTid(), record->ByteCount())
               << PrettyClass(record->GetClass());
@@ -4938,7 +4981,7 @@
     uint16_t count = capped_count;
     for (auto it = records->RBegin(), end = records->REnd();
          count > 0 && it != end; count--, it++) {
-      const gc::AllocRecord* record = it->second;
+      const gc::AllocRecord* record = &it->second;
       std::string temp;
       class_names.Add(record->GetClassDescriptor(&temp));
       for (size_t i = 0, depth = record->GetDepth(); i < depth; i++) {
@@ -4989,7 +5032,7 @@
       // (2b) thread id
       // (2b) allocated object's class name index
       // (1b) stack depth
-      const gc::AllocRecord* record = it->second;
+      const gc::AllocRecord* record = &it->second;
       size_t stack_depth = record->GetDepth();
       size_t allocated_object_class_name_index =
           class_names.IndexOf(record->GetClassDescriptor(&temp));
@@ -5046,7 +5089,7 @@
   ReaderMutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
   BufferedRootVisitor<128> root_visitor(visitor, RootInfo(kRootVMInternal));
   for (Breakpoint& breakpoint : gBreakpoints) {
-    breakpoint.Method()->VisitRoots(root_visitor, sizeof(void*));
+    breakpoint.Method()->VisitRoots(root_visitor, kRuntimePointerSize);
   }
 }
 
diff --git a/runtime/dex2oat_environment_test.h b/runtime/dex2oat_environment_test.h
new file mode 100644
index 0000000..d717ec0
--- /dev/null
+++ b/runtime/dex2oat_environment_test.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_DEX2OAT_ENVIRONMENT_TEST_H_
+#define ART_RUNTIME_DEX2OAT_ENVIRONMENT_TEST_H_
+
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "common_runtime_test.h"
+#include "compiler_callbacks.h"
+#include "gc/heap.h"
+#include "gc/space/image_space.h"
+#include "oat_file_assistant.h"
+#include "os.h"
+#include "runtime.h"
+#include "utils.h"
+
+namespace art {
+
+// Test class that provides some helpers to set a test up for compilation using dex2oat.
+class Dex2oatEnvironmentTest : public CommonRuntimeTest {
+ public:
+  virtual void SetUp() OVERRIDE {
+    CommonRuntimeTest::SetUp();
+
+    // Create a scratch directory to work from.
+    scratch_dir_ = android_data_ + "/Dex2oatEnvironmentTest";
+    ASSERT_EQ(0, mkdir(scratch_dir_.c_str(), 0700));
+
+    // Create a subdirectory in scratch for odex files.
+    odex_oat_dir_ = scratch_dir_ + "/oat";
+    ASSERT_EQ(0, mkdir(odex_oat_dir_.c_str(), 0700));
+
+    odex_dir_ = odex_oat_dir_ + "/" + std::string(GetInstructionSetString(kRuntimeISA));
+    ASSERT_EQ(0, mkdir(odex_dir_.c_str(), 0700));
+
+    // Verify the environment is as we expect
+    uint32_t checksum;
+    std::string error_msg;
+    ASSERT_TRUE(OS::FileExists(GetSystemImageFile().c_str()))
+      << "Expected pre-compiled boot image to be at: " << GetSystemImageFile();
+    ASSERT_TRUE(OS::FileExists(GetDexSrc1().c_str()))
+      << "Expected dex file to be at: " << GetDexSrc1();
+    ASSERT_TRUE(OS::FileExists(GetStrippedDexSrc1().c_str()))
+      << "Expected stripped dex file to be at: " << GetStrippedDexSrc1();
+    ASSERT_FALSE(DexFile::GetChecksum(GetStrippedDexSrc1().c_str(), &checksum, &error_msg))
+      << "Expected stripped dex file to be stripped: " << GetStrippedDexSrc1();
+    ASSERT_TRUE(OS::FileExists(GetDexSrc2().c_str()))
+      << "Expected dex file to be at: " << GetDexSrc2();
+
+    // GetMultiDexSrc2 should have the same primary dex checksum as
+    // GetMultiDexSrc1, but a different secondary dex checksum.
+    static constexpr bool kVerifyChecksum = true;
+    std::vector<std::unique_ptr<const DexFile>> multi1;
+    ASSERT_TRUE(DexFile::Open(GetMultiDexSrc1().c_str(),
+          GetMultiDexSrc1().c_str(), kVerifyChecksum, &error_msg, &multi1)) << error_msg;
+    ASSERT_GT(multi1.size(), 1u);
+
+    std::vector<std::unique_ptr<const DexFile>> multi2;
+    ASSERT_TRUE(DexFile::Open(GetMultiDexSrc2().c_str(),
+          GetMultiDexSrc2().c_str(), kVerifyChecksum, &error_msg, &multi2)) << error_msg;
+    ASSERT_GT(multi2.size(), 1u);
+
+    ASSERT_EQ(multi1[0]->GetLocationChecksum(), multi2[0]->GetLocationChecksum());
+    ASSERT_NE(multi1[1]->GetLocationChecksum(), multi2[1]->GetLocationChecksum());
+  }
+
+  virtual void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE {
+    // options->push_back(std::make_pair("-verbose:oat", nullptr));
+
+    // Set up the image location.
+    options->push_back(std::make_pair("-Ximage:" + GetImageLocation(),
+          nullptr));
+    // Make sure compilercallbacks are not set so that relocation will be
+    // enabled.
+    callbacks_.reset();
+  }
+
+  virtual void TearDown() OVERRIDE {
+    ClearDirectory(odex_dir_.c_str());
+    ASSERT_EQ(0, rmdir(odex_dir_.c_str()));
+
+    ClearDirectory(odex_oat_dir_.c_str());
+    ASSERT_EQ(0, rmdir(odex_oat_dir_.c_str()));
+
+    ClearDirectory(scratch_dir_.c_str());
+    ASSERT_EQ(0, rmdir(scratch_dir_.c_str()));
+
+    CommonRuntimeTest::TearDown();
+  }
+
+  static void Copy(const std::string& src, const std::string& dst) {
+    std::ifstream  src_stream(src, std::ios::binary);
+    std::ofstream  dst_stream(dst, std::ios::binary);
+
+    dst_stream << src_stream.rdbuf();
+  }
+
+  // Returns the directory where the pre-compiled core.art can be found.
+  // TODO: We should factor out this into common tests somewhere rather than
+  // re-hardcoding it here (This was copied originally from the elf writer
+  // test).
+  std::string GetImageDirectory() const {
+    if (IsHost()) {
+      const char* host_dir = getenv("ANDROID_HOST_OUT");
+      CHECK(host_dir != nullptr);
+      return std::string(host_dir) + "/framework";
+    } else {
+      return std::string("/data/art-test");
+    }
+  }
+
+  std::string GetImageLocation() const {
+    return GetImageDirectory() + "/core.art";
+  }
+
+  std::string GetSystemImageFile() const {
+    return GetImageDirectory() + "/" + GetInstructionSetString(kRuntimeISA)
+      + "/core.art";
+  }
+
+  bool GetCachedImageFile(/*out*/std::string* image, std::string* error_msg) const {
+    std::string cache;
+    bool have_android_data;
+    bool dalvik_cache_exists;
+    bool is_global_cache;
+    GetDalvikCache(GetInstructionSetString(kRuntimeISA),
+                   true,
+                   &cache,
+                   &have_android_data,
+                   &dalvik_cache_exists,
+                   &is_global_cache);
+    if (!dalvik_cache_exists) {
+      *error_msg = "Failed to create dalvik cache";
+      return false;
+    }
+    return GetDalvikCacheFilename(GetImageLocation().c_str(), cache.c_str(), image, error_msg);
+  }
+
+  std::string GetDexSrc1() const {
+    return GetTestDexFileName("Main");
+  }
+
+  // Returns the path to a dex file equivalent to GetDexSrc1, but with the dex
+  // file stripped.
+  std::string GetStrippedDexSrc1() const {
+    return GetTestDexFileName("MainStripped");
+  }
+
+  std::string GetMultiDexSrc1() const {
+    return GetTestDexFileName("MultiDex");
+  }
+
+  // Returns the path to a multidex file equivalent to GetMultiDexSrc2, but
+  // with the contents of the secondary dex file changed.
+  std::string GetMultiDexSrc2() const {
+    return GetTestDexFileName("MultiDexModifiedSecondary");
+  }
+
+  std::string GetDexSrc2() const {
+    return GetTestDexFileName("Nested");
+  }
+
+  // Scratch directory, for dex and odex files (oat files will go in the
+  // dalvik cache).
+  const std::string& GetScratchDir() const {
+    return scratch_dir_;
+  }
+
+  // Odex directory is the subdirectory in the scratch directory where odex
+  // files should be located.
+  const std::string& GetOdexDir() const {
+    return odex_dir_;
+  }
+
+ private:
+  std::string scratch_dir_;
+  std::string odex_oat_dir_;
+  std::string odex_dir_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_DEX2OAT_ENVIRONMENT_TEST_H_
diff --git a/runtime/dex_cache_resolved_classes.h b/runtime/dex_cache_resolved_classes.h
new file mode 100644
index 0000000..0febbed
--- /dev/null
+++ b/runtime/dex_cache_resolved_classes.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_DEX_CACHE_RESOLVED_CLASSES_H_
+#define ART_RUNTIME_DEX_CACHE_RESOLVED_CLASSES_H_
+
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+namespace art {
+
+// Data structure for passing around which classes belonging to a dex cache / dex file are resolved.
+class DexCacheResolvedClasses {
+ public:
+  DexCacheResolvedClasses(const std::string& dex_location,
+                          const std::string& base_location,
+                          uint32_t location_checksum)
+      : dex_location_(dex_location),
+        base_location_(base_location),
+        location_checksum_(location_checksum) {}
+
+  // Only compare the key elements, ignore the resolved classes.
+  int Compare(const DexCacheResolvedClasses& other) const {
+    if (location_checksum_ != other.location_checksum_) {
+      return static_cast<int>(location_checksum_ - other.location_checksum_);
+    }
+    // Don't need to compare base_location_ since dex_location_ has more info.
+    return dex_location_.compare(other.dex_location_);
+  }
+
+  template <class InputIt>
+  void AddClasses(InputIt begin, InputIt end) const {
+    classes_.insert(begin, end);
+  }
+
+  const std::string& GetDexLocation() const {
+    return dex_location_;
+  }
+
+  const std::string& GetBaseLocation() const {
+    return base_location_;
+  }
+
+  uint32_t GetLocationChecksum() const {
+    return location_checksum_;
+  }
+
+  const std::unordered_set<uint16_t>& GetClasses() const {
+    return classes_;
+  }
+
+ private:
+  const std::string dex_location_;
+  const std::string base_location_;
+  const uint32_t location_checksum_;
+  // Array of resolved class def indexes.
+  mutable std::unordered_set<uint16_t> classes_;
+};
+
+inline bool operator<(const DexCacheResolvedClasses& a, const DexCacheResolvedClasses& b) {
+  return a.Compare(b) < 0;
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_DEX_CACHE_RESOLVED_CLASSES_H_
diff --git a/runtime/dex_file-inl.h b/runtime/dex_file-inl.h
index 4e6c3ca..108a5af 100644
--- a/runtime/dex_file-inl.h
+++ b/runtime/dex_file-inl.h
@@ -38,10 +38,88 @@
   return reinterpret_cast<const char*>(ptr);
 }
 
+inline const char* DexFile::GetStringData(const StringId& string_id) const {
+  uint32_t ignored;
+  return GetStringDataAndUtf16Length(string_id, &ignored);
+}
+
+inline const char* DexFile::StringDataAndUtf16LengthByIdx(uint32_t idx,
+                                                          uint32_t* utf16_length) const {
+  if (idx == kDexNoIndex) {
+    *utf16_length = 0;
+    return nullptr;
+  }
+  const StringId& string_id = GetStringId(idx);
+  return GetStringDataAndUtf16Length(string_id, utf16_length);
+}
+
+inline const char* DexFile::StringDataByIdx(uint32_t idx) const {
+  uint32_t unicode_length;
+  return StringDataAndUtf16LengthByIdx(idx, &unicode_length);
+}
+
+inline const char* DexFile::StringByTypeIdx(uint32_t idx, uint32_t* unicode_length) const {
+  const TypeId& type_id = GetTypeId(idx);
+  return StringDataAndUtf16LengthByIdx(type_id.descriptor_idx_, unicode_length);
+}
+
+inline const char* DexFile::StringByTypeIdx(uint32_t idx) const {
+  const TypeId& type_id = GetTypeId(idx);
+  return StringDataByIdx(type_id.descriptor_idx_);
+}
+
+inline const char* DexFile::GetTypeDescriptor(const TypeId& type_id) const {
+  return StringDataByIdx(type_id.descriptor_idx_);
+}
+
+inline const char* DexFile::GetFieldTypeDescriptor(const FieldId& field_id) const {
+  const DexFile::TypeId& type_id = GetTypeId(field_id.type_idx_);
+  return GetTypeDescriptor(type_id);
+}
+
+inline const char* DexFile::GetFieldName(const FieldId& field_id) const {
+  return StringDataByIdx(field_id.name_idx_);
+}
+
+inline const char* DexFile::GetMethodDeclaringClassDescriptor(const MethodId& method_id) const {
+  const DexFile::TypeId& type_id = GetTypeId(method_id.class_idx_);
+  return GetTypeDescriptor(type_id);
+}
+
 inline const Signature DexFile::GetMethodSignature(const MethodId& method_id) const {
   return Signature(this, GetProtoId(method_id.proto_idx_));
 }
 
+inline const char* DexFile::GetMethodName(const MethodId& method_id) const {
+  return StringDataByIdx(method_id.name_idx_);
+}
+
+inline const char* DexFile::GetMethodShorty(uint32_t idx) const {
+  return StringDataByIdx(GetProtoId(GetMethodId(idx).proto_idx_).shorty_idx_);
+}
+
+inline const char* DexFile::GetMethodShorty(const MethodId& method_id) const {
+  return StringDataByIdx(GetProtoId(method_id.proto_idx_).shorty_idx_);
+}
+
+inline const char* DexFile::GetMethodShorty(const MethodId& method_id, uint32_t* length) const {
+  // Using the UTF16 length is safe here as shorties are guaranteed to be ASCII characters.
+  return StringDataAndUtf16LengthByIdx(GetProtoId(method_id.proto_idx_).shorty_idx_, length);
+}
+
+inline const char* DexFile::GetClassDescriptor(const ClassDef& class_def) const {
+  return StringByTypeIdx(class_def.class_idx_);
+}
+
+inline const char* DexFile::GetReturnTypeDescriptor(const ProtoId& proto_id) const {
+  return StringByTypeIdx(proto_id.return_type_idx_);
+}
+
+inline const char* DexFile::GetShorty(uint32_t proto_idx) const {
+  const ProtoId& proto_id = GetProtoId(proto_idx);
+  return StringDataByIdx(proto_id.shorty_idx_);
+}
+
 inline const DexFile::TryItem* DexFile::GetTryItems(const CodeItem& code_item, uint32_t offset) {
   const uint16_t* insns_end_ = &code_item.insns_[code_item.insns_size_in_code_units_];
   return reinterpret_cast<const TryItem*>
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index ae62e2b..90c678c 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -29,14 +29,19 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
+#include "base/file_magic.h"
 #include "base/hash_map.h"
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/stringprintf.h"
+#include "base/systrace.h"
+#include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
 #include "dex_file_verifier.h"
 #include "globals.h"
+#include "handle_scope-inl.h"
 #include "leb128.h"
 #include "mirror/field.h"
 #include "mirror/method.h"
@@ -44,42 +49,24 @@
 #include "os.h"
 #include "reflection.h"
 #include "safe_map.h"
-#include "handle_scope-inl.h"
 #include "thread.h"
+#include "type_lookup_table.h"
 #include "utf-inl.h"
 #include "utils.h"
 #include "well_known_classes.h"
 #include "zip_archive.h"
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wshadow"
-#include "ScopedFd.h"
-#pragma GCC diagnostic pop
-
 namespace art {
 
 const uint8_t DexFile::kDexMagic[] = { 'd', 'e', 'x', '\n' };
-const uint8_t DexFile::kDexMagicVersion[] = { '0', '3', '5', '\0' };
-
-static int OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg) {
-  CHECK(magic != nullptr);
-  ScopedFd fd(open(filename, O_RDONLY, 0));
-  if (fd.get() == -1) {
-    *error_msg = StringPrintf("Unable to open '%s' : %s", filename, strerror(errno));
-    return -1;
-  }
-  int n = TEMP_FAILURE_RETRY(read(fd.get(), magic, sizeof(*magic)));
-  if (n != sizeof(*magic)) {
-    *error_msg = StringPrintf("Failed to find magic in '%s'", filename);
-    return -1;
-  }
-  if (lseek(fd.get(), 0, SEEK_SET) != 0) {
-    *error_msg = StringPrintf("Failed to seek to beginning of file '%s' : %s", filename,
-                              strerror(errno));
-    return -1;
-  }
-  return fd.release();
-}
+const uint8_t DexFile::kDexMagicVersions[DexFile::kNumDexVersions][DexFile::kDexVersionLen] = {
+  {'0', '3', '5', '\0'},
+  // Dex version 036 skipped because of an old dalvik bug on some versions of android where dex
+  // files with that version number would erroneously be accepted and run.
+  {'0', '3', '7', '\0'},
+  // Dex version 038: Android "O" and beyond.
+  {'0', '3', '8', '\0'}
+};
 
 bool DexFile::GetChecksum(const char* filename, uint32_t* checksum, std::string* error_msg) {
   CHECK(checksum != nullptr);
@@ -97,14 +84,14 @@
     DCHECK_EQ(zip_entry_name[-1], kMultiDexSeparator);
   }
 
-  ScopedFd fd(OpenAndReadMagic(file_part, &magic, error_msg));
-  if (fd.get() == -1) {
+  File fd = OpenAndReadMagic(file_part, &magic, error_msg);
+  if (fd.Fd() == -1) {
     DCHECK(!error_msg->empty());
     return false;
   }
   if (IsZipMagic(magic)) {
     std::unique_ptr<ZipArchive> zip_archive(
-        ZipArchive::OpenFromFd(fd.release(), filename, error_msg));
+        ZipArchive::OpenFromFd(fd.Release(), filename, error_msg));
     if (zip_archive.get() == nullptr) {
       *error_msg = StringPrintf("Failed to open zip archive '%s' (error msg: %s)", file_part,
                                 error_msg->c_str());
@@ -121,7 +108,7 @@
   }
   if (IsDexMagic(magic)) {
     std::unique_ptr<const DexFile> dex_file(
-        DexFile::OpenFile(fd.release(), filename, false, error_msg));
+        DexFile::OpenFile(fd.Release(), filename, false, false, error_msg));
     if (dex_file.get() == nullptr) {
       return false;
     }
@@ -132,20 +119,27 @@
   return false;
 }
 
-bool DexFile::Open(const char* filename, const char* location, std::string* error_msg,
+bool DexFile::Open(const char* filename,
+                   const char* location,
+                   bool verify_checksum,
+                   std::string* error_msg,
                    std::vector<std::unique_ptr<const DexFile>>* dex_files) {
+  ScopedTrace trace(std::string("Open dex file ") + location);
   DCHECK(dex_files != nullptr) << "DexFile::Open: out-param is nullptr";
   uint32_t magic;
-  ScopedFd fd(OpenAndReadMagic(filename, &magic, error_msg));
-  if (fd.get() == -1) {
+  File fd = OpenAndReadMagic(filename, &magic, error_msg);
+  if (fd.Fd() == -1) {
     DCHECK(!error_msg->empty());
     return false;
   }
   if (IsZipMagic(magic)) {
-    return DexFile::OpenZip(fd.release(), location, error_msg, dex_files);
+    return DexFile::OpenZip(fd.Release(), location, verify_checksum, error_msg, dex_files);
   }
   if (IsDexMagic(magic)) {
-    std::unique_ptr<const DexFile> dex_file(DexFile::OpenFile(fd.release(), location, true,
+    std::unique_ptr<const DexFile> dex_file(DexFile::OpenFile(fd.Release(),
+                                                              location,
+                                                              /* verify */ true,
+                                                              verify_checksum,
                                                               error_msg));
     if (dex_file.get() != nullptr) {
       dex_files->push_back(std::move(dex_file));
@@ -171,12 +165,12 @@
 bool DexFile::MaybeDex(const char* filename) {
   uint32_t magic;
   std::string error_msg;
-  ScopedFd fd(OpenAndReadMagic(filename, &magic, &error_msg));
-  if (fd.get() == -1) {
+  File fd = OpenAndReadMagic(filename, &magic, &error_msg);
+  if (fd.Fd() == -1) {
     return false;
   }
   if (IsZipMagic(magic)) {
-    return ContainsClassesDex(fd.release(), filename);
+    return ContainsClassesDex(fd.Release(), filename);
   } else if (IsDexMagic(magic)) {
     return true;
   }
@@ -213,12 +207,43 @@
   }
 }
 
-std::unique_ptr<const DexFile> DexFile::OpenFile(int fd, const char* location, bool verify,
+std::unique_ptr<const DexFile> DexFile::Open(const uint8_t* base, size_t size,
+                                             const std::string& location,
+                                             uint32_t location_checksum,
+                                             const OatDexFile* oat_dex_file,
+                                             bool verify,
+                                             bool verify_checksum,
+                                             std::string* error_msg) {
+  ScopedTrace trace(std::string("Open dex file from RAM ") + location);
+  std::unique_ptr<const DexFile> dex_file = OpenMemory(base,
+                                                       size,
+                                                       location,
+                                                       location_checksum,
+                                                       nullptr,
+                                                       oat_dex_file,
+                                                       error_msg);
+  if (verify && !DexFileVerifier::Verify(dex_file.get(),
+                                         dex_file->Begin(),
+                                         dex_file->Size(),
+                                         location.c_str(),
+                                         verify_checksum,
+                                         error_msg)) {
+    return nullptr;
+  }
+
+  return dex_file;
+}
+
+std::unique_ptr<const DexFile> DexFile::OpenFile(int fd,
+                                                 const char* location,
+                                                 bool verify,
+                                                 bool verify_checksum,
                                                  std::string* error_msg) {
+  ScopedTrace trace(std::string("Open dex file ") + location);
   CHECK(location != nullptr);
   std::unique_ptr<MemMap> map;
   {
-    ScopedFd delayed_close(fd);
+    File delayed_close(fd, /* check_usage */ false);
     struct stat sbuf;
     memset(&sbuf, 0, sizeof(sbuf));
     if (fstat(fd, &sbuf) == -1) {
@@ -230,7 +255,14 @@
       return nullptr;
     }
     size_t length = sbuf.st_size;
-    map.reset(MemMap::MapFile(length, PROT_READ, MAP_PRIVATE, fd, 0, location, error_msg));
+    map.reset(MemMap::MapFile(length,
+                              PROT_READ,
+                              MAP_PRIVATE,
+                              fd,
+                              0,
+                              /*low_4gb*/false,
+                              location,
+                              error_msg));
     if (map.get() == nullptr) {
       DCHECK(!error_msg->empty());
       return nullptr;
@@ -254,7 +286,9 @@
   }
 
   if (verify && !DexFileVerifier::Verify(dex_file.get(), dex_file->Begin(), dex_file->Size(),
-                                         location, error_msg)) {
+                                         location,
+                                         verify_checksum,
+                                         error_msg)) {
     return nullptr;
   }
 
@@ -263,15 +297,19 @@
 
 const char* DexFile::kClassesDex = "classes.dex";
 
-bool DexFile::OpenZip(int fd, const std::string& location, std::string* error_msg,
+bool DexFile::OpenZip(int fd,
+                      const std::string& location,
+                      bool verify_checksum,
+                      std::string* error_msg,
                       std::vector<std::unique_ptr<const DexFile>>* dex_files) {
+  ScopedTrace trace("Dex file open Zip " + std::string(location));
   DCHECK(dex_files != nullptr) << "DexFile::OpenZip: out-param is nullptr";
   std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd, location.c_str(), error_msg));
   if (zip_archive.get() == nullptr) {
     DCHECK(!error_msg->empty());
     return false;
   }
-  return DexFile::OpenFromZip(*zip_archive, location, error_msg, dex_files);
+  return DexFile::OpenFromZip(*zip_archive, location, verify_checksum, error_msg, dex_files);
 }
 
 std::unique_ptr<const DexFile> DexFile::OpenMemory(const std::string& location,
@@ -287,15 +325,24 @@
                     error_msg);
 }
 
-std::unique_ptr<const DexFile> DexFile::Open(const ZipArchive& zip_archive, const char* entry_name,
-                                             const std::string& location, std::string* error_msg,
+std::unique_ptr<const DexFile> DexFile::Open(const ZipArchive& zip_archive,
+                                             const char* entry_name,
+                                             const std::string& location,
+                                             bool verify_checksum,
+                                             std::string* error_msg,
                                              ZipOpenErrorCode* error_code) {
+  ScopedTrace trace("Dex file open from Zip Archive " + std::string(location));
   CHECK(!location.empty());
   std::unique_ptr<ZipEntry> zip_entry(zip_archive.Find(entry_name, error_msg));
   if (zip_entry.get() == nullptr) {
     *error_code = ZipOpenErrorCode::kEntryNotFound;
     return nullptr;
   }
+  if (zip_entry->GetUncompressedLength() == 0) {
+    *error_msg = StringPrintf("Dex file '%s' has zero length", location.c_str());
+    *error_code = ZipOpenErrorCode::kDexFileError;
+    return nullptr;
+  }
   std::unique_ptr<MemMap> map(zip_entry->ExtractToMemMap(location.c_str(), entry_name, error_msg));
   if (map.get() == nullptr) {
     *error_msg = StringPrintf("Failed to extract '%s' from '%s': %s", entry_name, location.c_str(),
@@ -318,7 +365,9 @@
   }
   CHECK(dex_file->IsReadOnly()) << location;
   if (!DexFileVerifier::Verify(dex_file.get(), dex_file->Begin(), dex_file->Size(),
-                               location.c_str(), error_msg)) {
+                               location.c_str(),
+                               verify_checksum,
+                               error_msg)) {
     *error_code = ZipOpenErrorCode::kVerifyError;
     return nullptr;
   }
@@ -332,13 +381,16 @@
 // seems an excessive number.
 static constexpr size_t kWarnOnManyDexFilesThreshold = 100;
 
-bool DexFile::OpenFromZip(const ZipArchive& zip_archive, const std::string& location,
+bool DexFile::OpenFromZip(const ZipArchive& zip_archive,
+                          const std::string& location,
+                          bool verify_checksum,
                           std::string* error_msg,
                           std::vector<std::unique_ptr<const DexFile>>* dex_files) {
+  ScopedTrace trace("Dex file open from Zip " + std::string(location));
   DCHECK(dex_files != nullptr) << "DexFile::OpenFromZip: out-param is nullptr";
   ZipOpenErrorCode error_code;
-  std::unique_ptr<const DexFile> dex_file(Open(zip_archive, kClassesDex, location, error_msg,
-                                               &error_code));
+  std::unique_ptr<const DexFile> dex_file(
+      Open(zip_archive, kClassesDex, location, verify_checksum, error_msg, &error_code));
   if (dex_file.get() == nullptr) {
     return false;
   } else {
@@ -353,8 +405,8 @@
     for (size_t i = 1; ; ++i) {
       std::string name = GetMultiDexClassesDexName(i);
       std::string fake_location = GetMultiDexLocation(i, location.c_str());
-      std::unique_ptr<const DexFile> next_dex_file(Open(zip_archive, name.c_str(), fake_location,
-                                                        error_msg, &error_code));
+      std::unique_ptr<const DexFile> next_dex_file(
+          Open(zip_archive, name.c_str(), fake_location, verify_checksum, error_msg, &error_code));
       if (next_dex_file.get() == nullptr) {
         if (error_code != ZipOpenErrorCode::kEntryNotFound) {
           LOG(WARNING) << error_msg;
@@ -388,6 +440,8 @@
                                                    MemMap* mem_map,
                                                    const OatDexFile* oat_dex_file,
                                                    std::string* error_msg) {
+  DCHECK(base != nullptr);
+  DCHECK_NE(size, 0U);
   CHECK_ALIGNED(base, 4);  // various dex file structures must be word aligned
   std::unique_ptr<DexFile> dex_file(
       new DexFile(base, size, location, location_checksum, mem_map, oat_dex_file));
@@ -414,11 +468,19 @@
       method_ids_(reinterpret_cast<const MethodId*>(base + header_->method_ids_off_)),
       proto_ids_(reinterpret_cast<const ProtoId*>(base + header_->proto_ids_off_)),
       class_defs_(reinterpret_cast<const ClassDef*>(base + header_->class_defs_off_)),
-      find_class_def_misses_(0),
-      class_def_index_(nullptr),
       oat_dex_file_(oat_dex_file) {
   CHECK(begin_ != nullptr) << GetLocation();
   CHECK_GT(size_, 0U) << GetLocation();
+  const uint8_t* lookup_data = (oat_dex_file != nullptr)
+      ? oat_dex_file->GetLookupTableData()
+      : nullptr;
+  if (lookup_data != nullptr) {
+    if (lookup_data + TypeLookupTable::RawDataLength(*this) > oat_dex_file->GetOatFile()->End()) {
+      LOG(WARNING) << "found truncated lookup table in " << GetLocation();
+    } else {
+      lookup_table_.reset(TypeLookupTable::Open(lookup_data, *this));
+    }
+  }
 }
 
 DexFile::~DexFile() {
@@ -426,8 +488,6 @@
   // that's only called after DetachCurrentThread, which means there's no JNIEnv. We could
   // re-attach, but cleaning up these global references is not obviously useful. It's not as if
   // the global reference table is otherwise empty!
-  // Remove the index if one were created.
-  delete class_def_index_.LoadRelaxed();
 }
 
 bool DexFile::Init(std::string* error_msg) {
@@ -467,61 +527,41 @@
 
 bool DexFile::IsVersionValid(const uint8_t* magic) {
   const uint8_t* version = &magic[sizeof(kDexMagic)];
-  return (memcmp(version, kDexMagicVersion, sizeof(kDexMagicVersion)) == 0);
+  for (uint32_t i = 0; i < kNumDexVersions; i++) {
+    if (memcmp(version, kDexMagicVersions[i], kDexVersionLen) == 0) {
+      return true;
+    }
+  }
+  return false;
 }
 
-uint32_t DexFile::GetVersion() const {
-  const char* version = reinterpret_cast<const char*>(&GetHeader().magic_[sizeof(kDexMagic)]);
+uint32_t DexFile::Header::GetVersion() const {
+  const char* version = reinterpret_cast<const char*>(&magic_[sizeof(kDexMagic)]);
   return atoi(version);
 }
 
 const DexFile::ClassDef* DexFile::FindClassDef(const char* descriptor, size_t hash) const {
   DCHECK_EQ(ComputeModifiedUtf8Hash(descriptor), hash);
-  // If we have an index lookup the descriptor via that as its constant time to search.
-  Index* index = class_def_index_.LoadSequentiallyConsistent();
-  if (index != nullptr) {
-    auto it = index->FindWithHash(descriptor, hash);
-    return (it == index->end()) ? nullptr : it->second;
+  if (LIKELY(lookup_table_ != nullptr)) {
+    const uint32_t class_def_idx = lookup_table_->Lookup(descriptor, hash);
+    return (class_def_idx != DexFile::kDexNoIndex) ? &GetClassDef(class_def_idx) : nullptr;
   }
-  // Fast path for rate no class defs case.
-  uint32_t num_class_defs = NumClassDefs();
+
+  // Fast path for rare no class defs case.
+  const uint32_t num_class_defs = NumClassDefs();
   if (num_class_defs == 0) {
     return nullptr;
   }
-  // Search for class def with 2 binary searches and then a linear search.
-  const StringId* string_id = FindStringId(descriptor);
-  if (string_id != nullptr) {
-    const TypeId* type_id = FindTypeId(GetIndexForStringId(*string_id));
-    if (type_id != nullptr) {
-      uint16_t type_idx = GetIndexForTypeId(*type_id);
-      for (size_t i = 0; i < num_class_defs; ++i) {
-        const ClassDef& class_def = GetClassDef(i);
-        if (class_def.class_idx_ == type_idx) {
-          return &class_def;
-        }
+  const TypeId* type_id = FindTypeId(descriptor);
+  if (type_id != nullptr) {
+    uint16_t type_idx = GetIndexForTypeId(*type_id);
+    for (size_t i = 0; i < num_class_defs; ++i) {
+      const ClassDef& class_def = GetClassDef(i);
+      if (class_def.class_idx_ == type_idx) {
+        return &class_def;
       }
     }
   }
-  // A miss. If we've had kMaxFailedDexClassDefLookups misses then build an index to speed things
-  // up. This isn't done eagerly at construction as construction is not performed in multi-threaded
-  // sections of tools like dex2oat. If we're lazy we hopefully increase the chance of balancing
-  // out which thread builds the index.
-  const uint32_t kMaxFailedDexClassDefLookups = 100;
-  uint32_t old_misses = find_class_def_misses_.FetchAndAddSequentiallyConsistent(1);
-  if (old_misses == kMaxFailedDexClassDefLookups) {
-    // Are we the ones moving the miss count past the max? Sanity check the index doesn't exist.
-    CHECK(class_def_index_.LoadSequentiallyConsistent() == nullptr);
-    // Build the index.
-    index = new Index();
-    for (uint32_t i = 0; i < num_class_defs;  ++i) {
-      const ClassDef& class_def = GetClassDef(i);
-      const char* class_descriptor = GetClassDescriptor(class_def);
-      index->Insert(std::make_pair(class_descriptor, &class_def));
-    }
-    // Sanity check the index still doesn't exist, only 1 thread should build it.
-    CHECK(class_def_index_.LoadSequentiallyConsistent() == nullptr);
-    class_def_index_.StoreSequentiallyConsistent(index);
-  }
   return nullptr;
 }
 
@@ -537,8 +577,8 @@
 }
 
 const DexFile::FieldId* DexFile::FindFieldId(const DexFile::TypeId& declaring_klass,
-                                              const DexFile::StringId& name,
-                                              const DexFile::TypeId& type) const {
+                                             const DexFile::StringId& name,
+                                             const DexFile::TypeId& type) const {
   // Binary search MethodIds knowing that they are sorted by class_idx, name_idx then proto_idx
   const uint16_t class_idx = GetIndexForTypeId(declaring_klass);
   const uint32_t name_idx = GetIndexForStringId(name);
@@ -625,6 +665,26 @@
   return nullptr;
 }
 
+const DexFile::TypeId* DexFile::FindTypeId(const char* string) const {
+  int32_t lo = 0;
+  int32_t hi = NumTypeIds() - 1;
+  while (hi >= lo) {
+    int32_t mid = (hi + lo) / 2;
+    const TypeId& type_id = GetTypeId(mid);
+    const DexFile::StringId& str_id = GetStringId(type_id.descriptor_idx_);
+    const char* str = GetStringData(str_id);
+    int compare = CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(string, str);
+    if (compare > 0) {
+      lo = mid + 1;
+    } else if (compare < 0) {
+      hi = mid - 1;
+    } else {
+      return &type_id;
+    }
+  }
+  return nullptr;
+}
+
 const DexFile::StringId* DexFile::FindStringId(const uint16_t* string, size_t length) const {
   int32_t lo = 0;
   int32_t hi = NumStringIds() - 1;
@@ -697,6 +757,10 @@
   return nullptr;
 }
 
+void DexFile::CreateTypeLookupTable(uint8_t* storage) const {
+  lookup_table_.reset(TypeLookupTable::Create(*this, storage));
+}
+
 // Given a signature place the type ids into the given vector
 bool DexFile::CreateTypeList(const StringPiece& signature, uint16_t* return_type_idx,
                              std::vector<uint16_t>* param_type_idxs) const {
@@ -732,11 +796,7 @@
     }
     // TODO: avoid creating a std::string just to get a 0-terminated char array
     std::string descriptor(signature.data() + start_offset, offset - start_offset);
-    const DexFile::StringId* string_id = FindStringId(descriptor.c_str());
-    if (string_id == nullptr) {
-      return false;
-    }
-    const DexFile::TypeId* type_id = FindTypeId(GetIndexForStringId(*string_id));
+    const DexFile::TypeId* type_id = FindTypeId(descriptor.c_str());
     if (type_id == nullptr) {
       return false;
     }
@@ -777,8 +837,7 @@
 
   // A method with no line number info should return -1
   LineNumFromPcContext context(rel_pc, -1);
-  DecodeDebugInfo(code_item, method->IsStatic(), method->GetDexMethodIndex(), LineNumForPcCb,
-                  nullptr, &context);
+  DecodeDebugPositionInfo(code_item, LineNumForPcCb, &context);
   return context.line_num_;
 }
 
@@ -815,45 +874,48 @@
   }
 }
 
-void DexFile::DecodeDebugInfo0(const CodeItem* code_item, bool is_static, uint32_t method_idx,
-                               DexDebugNewPositionCb position_cb, DexDebugNewLocalCb local_cb,
-                               void* context, const uint8_t* stream, LocalInfo* local_in_reg)
-    const {
-  uint32_t line = DecodeUnsignedLeb128(&stream);
-  uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
-  uint16_t arg_reg = code_item->registers_size_ - code_item->ins_size_;
-  uint32_t address = 0;
-  bool need_locals = (local_cb != nullptr);
+bool DexFile::DecodeDebugLocalInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
+                                   DexDebugNewLocalCb local_cb, void* context) const {
+  DCHECK(local_cb != nullptr);
+  if (code_item == nullptr) {
+    return false;
+  }
+  const uint8_t* stream = GetDebugInfoStream(code_item);
+  if (stream == nullptr) {
+    return false;
+  }
+  std::vector<LocalInfo> local_in_reg(code_item->registers_size_);
 
+  uint16_t arg_reg = code_item->registers_size_ - code_item->ins_size_;
   if (!is_static) {
-    if (need_locals) {
-      const char* descriptor = GetMethodDeclaringClassDescriptor(GetMethodId(method_idx));
-      local_in_reg[arg_reg].name_ = "this";
-      local_in_reg[arg_reg].descriptor_ = descriptor;
-      local_in_reg[arg_reg].signature_ = nullptr;
-      local_in_reg[arg_reg].start_address_ = 0;
-      local_in_reg[arg_reg].is_live_ = true;
-    }
+    const char* descriptor = GetMethodDeclaringClassDescriptor(GetMethodId(method_idx));
+    local_in_reg[arg_reg].name_ = "this";
+    local_in_reg[arg_reg].descriptor_ = descriptor;
+    local_in_reg[arg_reg].signature_ = nullptr;
+    local_in_reg[arg_reg].start_address_ = 0;
+    local_in_reg[arg_reg].reg_ = arg_reg;
+    local_in_reg[arg_reg].is_live_ = true;
     arg_reg++;
   }
 
   DexFileParameterIterator it(*this, GetMethodPrototype(GetMethodId(method_idx)));
-  for (uint32_t i = 0; i < parameters_size && it.HasNext(); ++i, it.Next()) {
+  DecodeUnsignedLeb128(&stream);  // Line.
+  uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
+  uint32_t i;
+  for (i = 0; i < parameters_size && it.HasNext(); ++i, it.Next()) {
     if (arg_reg >= code_item->registers_size_) {
       LOG(ERROR) << "invalid stream - arg reg >= reg size (" << arg_reg
                  << " >= " << code_item->registers_size_ << ") in " << GetLocation();
-      return;
+      return false;
     }
-    uint32_t id = DecodeUnsignedLeb128P1(&stream);
+    uint32_t name_idx = DecodeUnsignedLeb128P1(&stream);
     const char* descriptor = it.GetDescriptor();
-    if (need_locals && id != kDexNoIndex) {
-      const char* name = StringDataByIdx(id);
-      local_in_reg[arg_reg].name_ = name;
-      local_in_reg[arg_reg].descriptor_ = descriptor;
-      local_in_reg[arg_reg].signature_ = nullptr;
-      local_in_reg[arg_reg].start_address_ = address;
-      local_in_reg[arg_reg].is_live_ = true;
-    }
+    local_in_reg[arg_reg].name_ = StringDataByIdx(name_idx);
+    local_in_reg[arg_reg].descriptor_ = descriptor;
+    local_in_reg[arg_reg].signature_ = nullptr;
+    local_in_reg[arg_reg].start_address_ = 0;
+    local_in_reg[arg_reg].reg_ = arg_reg;
+    local_in_reg[arg_reg].is_live_ = true;
     switch (*descriptor) {
       case 'D':
       case 'J':
@@ -864,152 +926,188 @@
         break;
     }
   }
-
-  if (it.HasNext()) {
+  if (i != parameters_size || it.HasNext()) {
     LOG(ERROR) << "invalid stream - problem with parameter iterator in " << GetLocation()
                << " for method " << PrettyMethod(method_idx, *this);
-    return;
+    return false;
   }
 
+  uint32_t address = 0;
   for (;;)  {
     uint8_t opcode = *stream++;
-    uint16_t reg;
-    uint32_t name_idx;
-    uint32_t descriptor_idx;
-    uint32_t signature_idx = 0;
-
     switch (opcode) {
       case DBG_END_SEQUENCE:
-        return;
-
+        // Emit all variables which are still alive at the end of the method.
+        for (uint16_t reg = 0; reg < code_item->registers_size_; reg++) {
+          if (local_in_reg[reg].is_live_) {
+            local_in_reg[reg].end_address_ = code_item->insns_size_in_code_units_;
+            local_cb(context, local_in_reg[reg]);
+          }
+        }
+        return true;
       case DBG_ADVANCE_PC:
         address += DecodeUnsignedLeb128(&stream);
         break;
-
       case DBG_ADVANCE_LINE:
-        line += DecodeSignedLeb128(&stream);
+        DecodeSignedLeb128(&stream);  // Line.
         break;
-
       case DBG_START_LOCAL:
-      case DBG_START_LOCAL_EXTENDED:
-        reg = DecodeUnsignedLeb128(&stream);
-        if (reg > code_item->registers_size_) {
-          LOG(ERROR) << "invalid stream - reg > reg size (" << reg << " > "
+      case DBG_START_LOCAL_EXTENDED: {
+        uint16_t reg = DecodeUnsignedLeb128(&stream);
+        if (reg >= code_item->registers_size_) {
+          LOG(ERROR) << "invalid stream - reg >= reg size (" << reg << " >= "
                      << code_item->registers_size_ << ") in " << GetLocation();
-          return;
+          return false;
         }
 
-        name_idx = DecodeUnsignedLeb128P1(&stream);
-        descriptor_idx = DecodeUnsignedLeb128P1(&stream);
+        uint32_t name_idx = DecodeUnsignedLeb128P1(&stream);
+        uint32_t descriptor_idx = DecodeUnsignedLeb128P1(&stream);
+        uint32_t signature_idx = kDexNoIndex;
         if (opcode == DBG_START_LOCAL_EXTENDED) {
           signature_idx = DecodeUnsignedLeb128P1(&stream);
         }
 
         // Emit what was previously there, if anything
-        if (need_locals) {
-          InvokeLocalCbIfLive(context, reg, address, local_in_reg, local_cb);
+        if (local_in_reg[reg].is_live_) {
+          local_in_reg[reg].end_address_ = address;
+          local_cb(context, local_in_reg[reg]);
+        }
 
-          local_in_reg[reg].name_ = StringDataByIdx(name_idx);
-          local_in_reg[reg].descriptor_ = StringByTypeIdx(descriptor_idx);
-          local_in_reg[reg].signature_ =
-              (opcode == DBG_START_LOCAL_EXTENDED) ? StringDataByIdx(signature_idx)
-                                                   : nullptr;
+        local_in_reg[reg].name_ = StringDataByIdx(name_idx);
+        local_in_reg[reg].descriptor_ = StringByTypeIdx(descriptor_idx);
+        local_in_reg[reg].signature_ = StringDataByIdx(signature_idx);
+        local_in_reg[reg].start_address_ = address;
+        local_in_reg[reg].reg_ = reg;
+        local_in_reg[reg].is_live_ = true;
+        break;
+      }
+      case DBG_END_LOCAL: {
+        uint16_t reg = DecodeUnsignedLeb128(&stream);
+        if (reg >= code_item->registers_size_) {
+          LOG(ERROR) << "invalid stream - reg >= reg size (" << reg << " >= "
+                     << code_item->registers_size_ << ") in " << GetLocation();
+          return false;
+        }
+        if (!local_in_reg[reg].is_live_) {
+          LOG(ERROR) << "invalid stream - end without start in " << GetLocation();
+          return false;
+        }
+        local_in_reg[reg].end_address_ = address;
+        local_cb(context, local_in_reg[reg]);
+        local_in_reg[reg].is_live_ = false;
+        break;
+      }
+      case DBG_RESTART_LOCAL: {
+        uint16_t reg = DecodeUnsignedLeb128(&stream);
+        if (reg >= code_item->registers_size_) {
+          LOG(ERROR) << "invalid stream - reg >= reg size (" << reg << " >= "
+                     << code_item->registers_size_ << ") in " << GetLocation();
+          return false;
+        }
+        // If the register is live, the "restart" is superfluous,
+        // and we don't want to mess with the existing start address.
+        if (!local_in_reg[reg].is_live_) {
           local_in_reg[reg].start_address_ = address;
           local_in_reg[reg].is_live_ = true;
         }
         break;
-
-      case DBG_END_LOCAL:
-        reg = DecodeUnsignedLeb128(&stream);
-        if (reg > code_item->registers_size_) {
-          LOG(ERROR) << "invalid stream - reg > reg size (" << reg << " > "
-                     << code_item->registers_size_ << ") in " << GetLocation();
-          return;
-        }
-
-        if (need_locals) {
-          InvokeLocalCbIfLive(context, reg, address, local_in_reg, local_cb);
-          local_in_reg[reg].is_live_ = false;
-        }
-        break;
-
-      case DBG_RESTART_LOCAL:
-        reg = DecodeUnsignedLeb128(&stream);
-        if (reg > code_item->registers_size_) {
-          LOG(ERROR) << "invalid stream - reg > reg size (" << reg << " > "
-                     << code_item->registers_size_ << ") in " << GetLocation();
-          return;
-        }
-
-        if (need_locals) {
-          if (local_in_reg[reg].name_ == nullptr || local_in_reg[reg].descriptor_ == nullptr) {
-            LOG(ERROR) << "invalid stream - no name or descriptor in " << GetLocation();
-            return;
-          }
-
-          // If the register is live, the "restart" is superfluous,
-          // and we don't want to mess with the existing start address.
-          if (!local_in_reg[reg].is_live_) {
-            local_in_reg[reg].start_address_ = address;
-            local_in_reg[reg].is_live_ = true;
-          }
-        }
-        break;
-
+      }
       case DBG_SET_PROLOGUE_END:
       case DBG_SET_EPILOGUE_BEGIN:
-      case DBG_SET_FILE:
         break;
+      case DBG_SET_FILE:
+        DecodeUnsignedLeb128P1(&stream);  // name.
+        break;
+      default:
+        address += (opcode - DBG_FIRST_SPECIAL) / DBG_LINE_RANGE;
+        break;
+    }
+  }
+}
 
+bool DexFile::DecodeDebugPositionInfo(const CodeItem* code_item, DexDebugNewPositionCb position_cb,
+                                      void* context) const {
+  DCHECK(position_cb != nullptr);
+  if (code_item == nullptr) {
+    return false;
+  }
+  const uint8_t* stream = GetDebugInfoStream(code_item);
+  if (stream == nullptr) {
+    return false;
+  }
+
+  PositionInfo entry = PositionInfo();
+  entry.line_ = DecodeUnsignedLeb128(&stream);
+  uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
+  for (uint32_t i = 0; i < parameters_size; ++i) {
+    DecodeUnsignedLeb128P1(&stream);  // Parameter name.
+  }
+
+  for (;;)  {
+    uint8_t opcode = *stream++;
+    switch (opcode) {
+      case DBG_END_SEQUENCE:
+        return true;  // end of stream.
+      case DBG_ADVANCE_PC:
+        entry.address_ += DecodeUnsignedLeb128(&stream);
+        break;
+      case DBG_ADVANCE_LINE:
+        entry.line_ += DecodeSignedLeb128(&stream);
+        break;
+      case DBG_START_LOCAL:
+        DecodeUnsignedLeb128(&stream);  // reg.
+        DecodeUnsignedLeb128P1(&stream);  // name.
+        DecodeUnsignedLeb128P1(&stream);  // descriptor.
+        break;
+      case DBG_START_LOCAL_EXTENDED:
+        DecodeUnsignedLeb128(&stream);  // reg.
+        DecodeUnsignedLeb128P1(&stream);  // name.
+        DecodeUnsignedLeb128P1(&stream);  // descriptor.
+        DecodeUnsignedLeb128P1(&stream);  // signature.
+        break;
+      case DBG_END_LOCAL:
+      case DBG_RESTART_LOCAL:
+        DecodeUnsignedLeb128(&stream);  // reg.
+        break;
+      case DBG_SET_PROLOGUE_END:
+        entry.prologue_end_ = true;
+        break;
+      case DBG_SET_EPILOGUE_BEGIN:
+        entry.epilogue_begin_ = true;
+        break;
+      case DBG_SET_FILE: {
+        uint32_t name_idx = DecodeUnsignedLeb128P1(&stream);
+        entry.source_file_ = StringDataByIdx(name_idx);
+        break;
+      }
       default: {
         int adjopcode = opcode - DBG_FIRST_SPECIAL;
-
-        address += adjopcode / DBG_LINE_RANGE;
-        line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
-
-        if (position_cb != nullptr) {
-          if (position_cb(context, address, line)) {
-            // early exit
-            return;
-          }
+        entry.address_ += adjopcode / DBG_LINE_RANGE;
+        entry.line_ += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
+        if (position_cb(context, entry)) {
+          return true;  // early exit.
         }
+        entry.prologue_end_ = false;
+        entry.epilogue_begin_ = false;
         break;
       }
     }
   }
 }
 
-void DexFile::DecodeDebugInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
-                              DexDebugNewPositionCb position_cb, DexDebugNewLocalCb local_cb,
-                              void* context) const {
-  DCHECK(code_item != nullptr);
-  const uint8_t* stream = GetDebugInfoStream(code_item);
-  std::unique_ptr<LocalInfo[]> local_in_reg(local_cb != nullptr ?
-                                      new LocalInfo[code_item->registers_size_] :
-                                      nullptr);
-  if (stream != nullptr) {
-    DecodeDebugInfo0(code_item, is_static, method_idx, position_cb, local_cb, context, stream,
-                     &local_in_reg[0]);
-  }
-  for (int reg = 0; reg < code_item->registers_size_; reg++) {
-    InvokeLocalCbIfLive(context, reg, code_item->insns_size_in_code_units_, &local_in_reg[0],
-                        local_cb);
-  }
-}
-
-bool DexFile::LineNumForPcCb(void* raw_context, uint32_t address, uint32_t line_num) {
+bool DexFile::LineNumForPcCb(void* raw_context, const PositionInfo& entry) {
   LineNumFromPcContext* context = reinterpret_cast<LineNumFromPcContext*>(raw_context);
 
   // We know that this callback will be called in
   // ascending address order, so keep going until we find
   // a match or we've just gone past it.
-  if (address > context->address_) {
+  if (entry.address_ > context->address_) {
     // The line number from the previous positions callback
     // wil be the final result.
     return true;
   } else {
-    context->line_num_ = line_num;
-    return address == context->address_;
+    context->line_num_ = entry.line_;
+    return entry.address_ == context->address_;
   }
 }
 
@@ -1094,6 +1192,18 @@
   return val;
 }
 
+// Checks that visibility is as expected. Includes special behavior for M and
+// before to allow runtime and build visibility when expecting runtime.
+static bool IsVisibilityCompatible(uint32_t actual, uint32_t expected) {
+  if (expected == DexFile::kDexVisibilityRuntime) {
+    int32_t sdk_version = Runtime::Current()->GetTargetSdkVersion();
+    if (sdk_version > 0 && sdk_version <= 23) {
+      return actual == DexFile::kDexVisibilityRuntime || actual == DexFile::kDexVisibilityBuild;
+    }
+  }
+  return actual == expected;
+}
+
 const DexFile::AnnotationSetItem* DexFile::FindAnnotationSetForField(ArtField* field) const {
   mirror::Class* klass = field->GetDeclaringClass();
   const AnnotationsDirectoryItem* annotations_dir = GetAnnotationsDirectory(*klass->GetClassDef());
@@ -1228,7 +1338,7 @@
   AnnotationValue annotation_value;
   StackHandleScope<2> hs(Thread::Current());
   Handle<mirror::Class> h_klass(hs.NewHandle(klass));
-  size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   Handle<mirror::Class> return_type(hs.NewHandle(
       method->GetReturnType(true /* resolve */, pointer_size)));
   if (!ProcessAnnotationValue(h_klass, &annotation, &annotation_value, return_type, kAllObjects)) {
@@ -1282,7 +1392,20 @@
   return ProcessAnnotationSetRefList(method_class, set_ref_list, size);
 }
 
-bool DexFile::IsMethodAnnotationPresent(ArtMethod* method, Handle<mirror::Class> annotation_class)
+mirror::ObjectArray<mirror::String>* DexFile::GetSignatureAnnotationForMethod(ArtMethod* method)
+    const {
+  const AnnotationSetItem* annotation_set = FindAnnotationSetForMethod(method);
+  if (annotation_set == nullptr) {
+    return nullptr;
+  }
+  StackHandleScope<1> hs(Thread::Current());
+  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
+  return GetSignatureValue(method_class, annotation_set);
+}
+
+bool DexFile::IsMethodAnnotationPresent(ArtMethod* method,
+                                        Handle<mirror::Class> annotation_class,
+                                        uint32_t visibility /* = kDexVisibilityRuntime */)
     const {
   const AnnotationSetItem* annotation_set = FindAnnotationSetForMethod(method);
   if (annotation_set == nullptr) {
@@ -1290,8 +1413,10 @@
   }
   StackHandleScope<1> hs(Thread::Current());
   Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
-  const AnnotationItem* annotation_item = GetAnnotationItemFromAnnotationSet(
-      method_class, annotation_set, kDexVisibilityRuntime, annotation_class);
+  const AnnotationItem* annotation_item = GetAnnotationItemFromAnnotationSet(method_class,
+                                                                             annotation_set,
+                                                                             visibility,
+                                                                             annotation_class);
   return annotation_item != nullptr;
 }
 
@@ -1355,8 +1480,11 @@
   if (annotation_item == nullptr) {
     return nullptr;
   }
-  mirror::Object* obj = GetAnnotationValue(
-      klass, annotation_item, "value", NullHandle<mirror::Class>(), kDexAnnotationType);
+  mirror::Object* obj = GetAnnotationValue(klass,
+                                           annotation_item,
+                                           "value",
+                                           ScopedNullHandle<mirror::Class>(),
+                                           kDexAnnotationType);
   if (obj == nullptr) {
     return nullptr;
   }
@@ -1382,8 +1510,11 @@
     return nullptr;
   }
   AnnotationValue annotation_value;
-  if (!ProcessAnnotationValue(
-      klass, &annotation, &annotation_value, NullHandle<mirror::Class>(), kAllRaw)) {
+  if (!ProcessAnnotationValue(klass,
+                              &annotation,
+                              &annotation_value,
+                              ScopedNullHandle<mirror::Class>(),
+                              kAllRaw)) {
     return nullptr;
   }
   if (annotation_value.type_ != kDexAnnotationMethod) {
@@ -1411,7 +1542,7 @@
     return nullptr;
   }
   return GetAnnotationValue(
-      klass, annotation_item, "value", NullHandle<mirror::Class>(), kDexAnnotationMethod);
+      klass, annotation_item, "value", ScopedNullHandle<mirror::Class>(), kDexAnnotationMethod);
 }
 
 bool DexFile::GetInnerClass(Handle<mirror::Class> klass, mirror::String** name) const {
@@ -1429,8 +1560,11 @@
     return false;
   }
   AnnotationValue annotation_value;
-  if (!ProcessAnnotationValue(
-      klass, &annotation, &annotation_value, NullHandle<mirror::Class>(), kAllObjects)) {
+  if (!ProcessAnnotationValue(klass,
+                              &annotation,
+                              &annotation_value,
+                              ScopedNullHandle<mirror::Class>(),
+                              kAllObjects)) {
     return false;
   }
   if (annotation_value.type_ != kDexAnnotationNull &&
@@ -1456,8 +1590,11 @@
     return false;
   }
   AnnotationValue annotation_value;
-  if (!ProcessAnnotationValue(
-      klass, &annotation, &annotation_value, NullHandle<mirror::Class>(), kAllRaw)) {
+  if (!ProcessAnnotationValue(klass,
+                              &annotation,
+                              &annotation_value,
+                              ScopedNullHandle<mirror::Class>(),
+                              kAllRaw)) {
     return false;
   }
   if (annotation_value.type_ != kDexAnnotationInt) {
@@ -1467,6 +1604,15 @@
   return true;
 }
 
+mirror::ObjectArray<mirror::String>* DexFile::GetSignatureAnnotationForClass(
+    Handle<mirror::Class> klass) const {
+  const AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  if (annotation_set == nullptr) {
+    return nullptr;
+  }
+  return GetSignatureValue(klass, annotation_set);
+}
+
 bool DexFile::IsClassAnnotationPresent(Handle<mirror::Class> klass,
                                        Handle<mirror::Class> annotation_class) const {
   const AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
@@ -1488,12 +1634,12 @@
   Handle<mirror::String> string_name(
       hs.NewHandle(mirror::String::AllocFromModifiedUtf8(self, name)));
 
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   ArtMethod* annotation_method =
-      annotation_class->FindDeclaredVirtualMethodByName(name, sizeof(void*));
+      annotation_class->FindDeclaredVirtualMethodByName(name, pointer_size);
   if (annotation_method == nullptr) {
     return nullptr;
   }
-  size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   Handle<mirror::Class> method_return(hs.NewHandle(
       annotation_method->GetReturnType(true /* resolve */, pointer_size)));
 
@@ -1506,8 +1652,16 @@
   mirror::Class* annotation_member_class =
       WellKnownClasses::ToClass(WellKnownClasses::libcore_reflect_AnnotationMember);
   Handle<mirror::Object> new_member(hs.NewHandle(annotation_member_class->AllocObject(self)));
-  Handle<mirror::Method> method_object(
-      hs.NewHandle(mirror::Method::CreateFromArtMethod(self, annotation_method)));
+  mirror::Method* method_obj_ptr;
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  if (pointer_size == PointerSize::k64) {
+    method_obj_ptr = mirror::Method::CreateFromArtMethod<PointerSize::k64, false>(
+        self, annotation_method);
+  } else {
+    method_obj_ptr = mirror::Method::CreateFromArtMethod<PointerSize::k32, false>(
+        self, annotation_method);
+  }
+  Handle<mirror::Method> method_object(hs.NewHandle(method_obj_ptr));
 
   if (new_member.Get() == nullptr || string_name.Get() == nullptr ||
       method_object.Get() == nullptr || method_return.Get() == nullptr) {
@@ -1539,7 +1693,7 @@
     Handle<mirror::Class> annotation_class) const {
   for (uint32_t i = 0; i < annotation_set->size_; ++i) {
     const AnnotationItem* annotation_item = GetAnnotationItem(annotation_set, i);
-    if (annotation_item->visibility_ != visibility) {
+    if (!IsVisibilityCompatible(annotation_item->visibility_, visibility)) {
       continue;
     }
     const uint8_t* annotation = annotation_item->annotation_;
@@ -1657,6 +1811,8 @@
   uint32_t dest_index = 0;
   for (uint32_t i = 0; i < size; ++i) {
     const AnnotationItem* annotation_item = GetAnnotationItem(annotation_set, i);
+    // Note that we do not use IsVisibilityCompatible here because older code
+    // was correct for this case.
     if (annotation_item->visibility_ != visibility) {
       continue;
     }
@@ -1813,16 +1969,31 @@
         StackHandleScope<2> hs(self);
         Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
         Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
-        ArtMethod* method = Runtime::Current()->GetClassLinker()->ResolveMethodWithoutInvokeType(
+        ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+        ArtMethod* method = class_linker->ResolveMethodWithoutInvokeType(
             klass->GetDexFile(), index, dex_cache, class_loader);
         if (method == nullptr) {
           return false;
         }
+        PointerSize pointer_size = class_linker->GetImagePointerSize();
         set_object = true;
+        DCHECK(!Runtime::Current()->IsActiveTransaction());
         if (method->IsConstructor()) {
-          element_object = mirror::Constructor::CreateFromArtMethod(self, method);
+          if (pointer_size == PointerSize::k64) {
+            element_object = mirror::Constructor::CreateFromArtMethod<PointerSize::k64,
+                                                                      false>(self, method);
+          } else {
+            element_object = mirror::Constructor::CreateFromArtMethod<PointerSize::k32,
+                                                                      false>(self, method);
+          }
         } else {
-          element_object = mirror::Method::CreateFromArtMethod(self, method);
+          if (pointer_size == PointerSize::k64) {
+            element_object = mirror::Method::CreateFromArtMethod<PointerSize::k64,
+                                                                 false>(self, method);
+          } else {
+            element_object = mirror::Method::CreateFromArtMethod<PointerSize::k32,
+                                                                 false>(self, method);
+          }
         }
         if (element_object == nullptr) {
           return false;
@@ -1844,7 +2015,12 @@
           return false;
         }
         set_object = true;
-        element_object = mirror::Field::CreateFromArtField(self, field, true);
+        PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+        if (pointer_size == PointerSize::k64) {
+          element_object = mirror::Field::CreateFromArtField<PointerSize::k64>(self, field, true);
+        } else {
+          element_object = mirror::Field::CreateFromArtField<PointerSize::k32>(self, field, true);
+        }
         if (element_object == nullptr) {
           return false;
         }
@@ -1861,10 +2037,10 @@
         Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
         ArtField* enum_field = Runtime::Current()->GetClassLinker()->ResolveField(
             klass->GetDexFile(), index, dex_cache, class_loader, true);
-        Handle<mirror::Class> field_class(hs.NewHandle(enum_field->GetDeclaringClass()));
         if (enum_field == nullptr) {
           return false;
         } else {
+          Handle<mirror::Class> field_class(hs.NewHandle(enum_field->GetDeclaringClass()));
           Runtime::Current()->GetClassLinker()->EnsureInitialized(self, field_class, true, true);
           element_object = enum_field->GetObject(field_class.Get());
           set_object = true;
@@ -2045,7 +2221,7 @@
   const AnnotationItem* result = nullptr;
   for (uint32_t i = 0; i < annotation_set->size_; ++i) {
     const AnnotationItem* annotation_item = GetAnnotationItem(annotation_set, i);
-    if (annotation_item->visibility_ != visibility) {
+    if (!IsVisibilityCompatible(annotation_item->visibility_, visibility)) {
       continue;
     }
     const uint8_t* annotation = annotation_item->annotation_;
@@ -2218,13 +2394,48 @@
 }
 
 EncodedStaticFieldValueIterator::EncodedStaticFieldValueIterator(
-    const DexFile& dex_file, Handle<mirror::DexCache>* dex_cache,
-    Handle<mirror::ClassLoader>* class_loader, ClassLinker* linker,
+    const DexFile& dex_file,
     const DexFile::ClassDef& class_def)
-    : dex_file_(dex_file), dex_cache_(dex_cache), class_loader_(class_loader), linker_(linker),
-      array_size_(), pos_(-1), type_(kByte) {
-  DCHECK(dex_cache != nullptr);
-  DCHECK(class_loader != nullptr);
+    : EncodedStaticFieldValueIterator(dex_file,
+                                      nullptr,
+                                      nullptr,
+                                      nullptr,
+                                      class_def,
+                                      -1,
+                                      kByte) {
+}
+
+EncodedStaticFieldValueIterator::EncodedStaticFieldValueIterator(
+    const DexFile& dex_file,
+    Handle<mirror::DexCache>* dex_cache,
+    Handle<mirror::ClassLoader>* class_loader,
+    ClassLinker* linker,
+    const DexFile::ClassDef& class_def)
+    : EncodedStaticFieldValueIterator(dex_file,
+                                      dex_cache, class_loader,
+                                      linker,
+                                      class_def,
+                                      -1,
+                                      kByte) {
+  DCHECK(dex_cache_ != nullptr);
+  DCHECK(class_loader_ != nullptr);
+}
+
+EncodedStaticFieldValueIterator::EncodedStaticFieldValueIterator(
+    const DexFile& dex_file,
+    Handle<mirror::DexCache>* dex_cache,
+    Handle<mirror::ClassLoader>* class_loader,
+    ClassLinker* linker,
+    const DexFile::ClassDef& class_def,
+    size_t pos,
+    ValueType type)
+    : dex_file_(dex_file),
+      dex_cache_(dex_cache),
+      class_loader_(class_loader),
+      linker_(linker),
+      array_size_(),
+      pos_(pos),
+      type_(type) {
   ptr_ = dex_file.GetEncodedStaticFieldValuesArray(class_def);
   if (ptr_ == nullptr) {
     array_size_ = 0;
@@ -2298,6 +2509,8 @@
 
 template<bool kTransactionActive>
 void EncodedStaticFieldValueIterator::ReadValueToField(ArtField* field) const {
+  DCHECK(dex_cache_ != nullptr);
+  DCHECK(class_loader_ != nullptr);
   switch (type_) {
     case kBoolean: field->SetBoolean<kTransactionActive>(field->GetDeclaringClass(), jval_.z);
         break;
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 47e5c12..59339ef 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -51,13 +51,22 @@
 class Signature;
 template<class T> class Handle;
 class StringPiece;
+class TypeLookupTable;
 class ZipArchive;
 
 // TODO: move all of the macro functionality into the DexCache class.
 class DexFile {
  public:
+  // First Dex format version supporting default methods.
+  static const uint32_t kDefaultMethodsVersion = 37;
+  // First Dex format version enforcing class definition ordering rules.
+  static const uint32_t kClassDefinitionOrderEnforcedVersion = 37;
+
   static const uint8_t kDexMagic[];
-  static const uint8_t kDexMagicVersion[];
+  static constexpr size_t kNumDexVersions = 3;
+  static constexpr size_t kDexVersionLen = 4;
+  static const uint8_t kDexMagicVersions[kNumDexVersions][kDexVersionLen];
+
   static constexpr size_t kSha1DigestSize = 20;
   static constexpr uint32_t kDexEndianConstant = 0x12345678;
 
@@ -70,7 +79,7 @@
   // The value of an invalid index.
   static const uint16_t kDexNoIndex16 = 0xFFFF;
 
-  // The separator charactor in MultiDex locations.
+  // The separator character in MultiDex locations.
   static constexpr char kMultiDexSeparator = ':';
 
   // A string version of the previous. This is a define so that we can merge string literals in the
@@ -103,6 +112,9 @@
     uint32_t data_size_;  // unused
     uint32_t data_off_;  // unused
 
+    // Decode the dex magic version
+    uint32_t GetVersion() const;
+
    private:
     DISALLOW_COPY_AND_ASSIGN(Header);
   };
@@ -404,7 +416,10 @@
   static bool GetChecksum(const char* filename, uint32_t* checksum, std::string* error_msg);
 
   // Opens .dex files found in the container, guessing the container format based on file extension.
-  static bool Open(const char* filename, const char* location, std::string* error_msg,
+  static bool Open(const char* filename,
+                   const char* location,
+                   bool verify_checksum,
+                   std::string* error_msg,
                    std::vector<std::unique_ptr<const DexFile>>* dex_files);
 
   // Checks whether the given file has the dex magic, or is a zip file with a classes.dex entry.
@@ -416,12 +431,14 @@
                                              const std::string& location,
                                              uint32_t location_checksum,
                                              const OatDexFile* oat_dex_file,
-                                             std::string* error_msg) {
-    return OpenMemory(base, size, location, location_checksum, nullptr, oat_dex_file, error_msg);
-  }
+                                             bool verify,
+                                             bool verify_checksum,
+                                             std::string* error_msg);
 
   // Open all classesXXX.dex files from a zip archive.
-  static bool OpenFromZip(const ZipArchive& zip_archive, const std::string& location,
+  static bool OpenFromZip(const ZipArchive& zip_archive,
+                          const std::string& location,
+                          bool verify_checksum,
                           std::string* error_msg,
                           std::vector<std::unique_ptr<const DexFile>>* dex_files);
 
@@ -476,7 +493,9 @@
   }
 
   // Decode the dex magic version
-  uint32_t GetVersion() const;
+  uint32_t GetVersion() const {
+    return GetHeader().GetVersion();
+  }
 
   // Returns true if the byte string points to the magic value.
   static bool IsMagicValid(const uint8_t* magic);
@@ -509,29 +528,18 @@
   // as the string length of the string data.
   const char* GetStringDataAndUtf16Length(const StringId& string_id, uint32_t* utf16_length) const;
 
-  const char* GetStringData(const StringId& string_id) const {
-    uint32_t ignored;
-    return GetStringDataAndUtf16Length(string_id, &ignored);
-  }
+  const char* GetStringData(const StringId& string_id) const;
 
   // Index version of GetStringDataAndUtf16Length.
-  const char* StringDataAndUtf16LengthByIdx(uint32_t idx, uint32_t* utf16_length) const {
-    if (idx == kDexNoIndex) {
-      *utf16_length = 0;
-      return nullptr;
-    }
-    const StringId& string_id = GetStringId(idx);
-    return GetStringDataAndUtf16Length(string_id, utf16_length);
-  }
+  const char* StringDataAndUtf16LengthByIdx(uint32_t idx, uint32_t* utf16_length) const;
 
-  const char* StringDataByIdx(uint32_t idx) const {
-    uint32_t unicode_length;
-    return StringDataAndUtf16LengthByIdx(idx, &unicode_length);
-  }
+  const char* StringDataByIdx(uint32_t idx) const;
 
   // Looks up a string id for a given modified utf8 string.
   const StringId* FindStringId(const char* string) const;
 
+  const TypeId* FindTypeId(const char* string) const;
+
   // Looks up a string id for a given utf16 string.
   const StringId* FindStringId(const uint16_t* string, size_t length) const;
 
@@ -556,20 +564,12 @@
   }
 
   // Get the descriptor string associated with a given type index.
-  const char* StringByTypeIdx(uint32_t idx, uint32_t* unicode_length) const {
-    const TypeId& type_id = GetTypeId(idx);
-    return StringDataAndUtf16LengthByIdx(type_id.descriptor_idx_, unicode_length);
-  }
+  const char* StringByTypeIdx(uint32_t idx, uint32_t* unicode_length) const;
 
-  const char* StringByTypeIdx(uint32_t idx) const {
-    const TypeId& type_id = GetTypeId(idx);
-    return StringDataByIdx(type_id.descriptor_idx_);
-  }
+  const char* StringByTypeIdx(uint32_t idx) const;
 
   // Returns the type descriptor string of a type id.
-  const char* GetTypeDescriptor(const TypeId& type_id) const {
-    return StringDataByIdx(type_id.descriptor_idx_);
-  }
+  const char* GetTypeDescriptor(const TypeId& type_id) const;
 
   // Looks up a type for the given string index
   const TypeId* FindTypeId(uint32_t string_idx) const;
@@ -604,15 +604,10 @@
   }
 
   // Returns the class descriptor string of a field id.
-  const char* GetFieldTypeDescriptor(const FieldId& field_id) const {
-    const DexFile::TypeId& type_id = GetTypeId(field_id.type_idx_);
-    return GetTypeDescriptor(type_id);
-  }
+  const char* GetFieldTypeDescriptor(const FieldId& field_id) const;
 
   // Returns the name of a field id.
-  const char* GetFieldName(const FieldId& field_id) const {
-    return StringDataByIdx(field_id.name_idx_);
-  }
+  const char* GetFieldName(const FieldId& field_id) const;
 
   // Returns the number of method identifiers in the .dex file.
   size_t NumMethodIds() const {
@@ -638,10 +633,7 @@
                                const DexFile::ProtoId& signature) const;
 
   // Returns the declaring class descriptor string of a method id.
-  const char* GetMethodDeclaringClassDescriptor(const MethodId& method_id) const {
-    const DexFile::TypeId& type_id = GetTypeId(method_id.class_idx_);
-    return GetTypeDescriptor(type_id);
-  }
+  const char* GetMethodDeclaringClassDescriptor(const MethodId& method_id) const;
 
   // Returns the prototype of a method id.
   const ProtoId& GetMethodPrototype(const MethodId& method_id) const {
@@ -652,23 +644,15 @@
   const Signature GetMethodSignature(const MethodId& method_id) const;
 
   // Returns the name of a method id.
-  const char* GetMethodName(const MethodId& method_id) const {
-    return StringDataByIdx(method_id.name_idx_);
-  }
+  const char* GetMethodName(const MethodId& method_id) const;
 
   // Returns the shorty of a method by its index.
-  const char* GetMethodShorty(uint32_t idx) const {
-    return StringDataByIdx(GetProtoId(GetMethodId(idx).proto_idx_).shorty_idx_);
-  }
+  const char* GetMethodShorty(uint32_t idx) const;
 
   // Returns the shorty of a method id.
-  const char* GetMethodShorty(const MethodId& method_id) const {
-    return StringDataByIdx(GetProtoId(method_id.proto_idx_).shorty_idx_);
-  }
-  const char* GetMethodShorty(const MethodId& method_id, uint32_t* length) const {
-    // Using the UTF16 length is safe here as shorties are guaranteed to be ASCII characters.
-    return StringDataAndUtf16LengthByIdx(GetProtoId(method_id.proto_idx_).shorty_idx_, length);
-  }
+  const char* GetMethodShorty(const MethodId& method_id) const;
+  const char* GetMethodShorty(const MethodId& method_id, uint32_t* length) const;
+
   // Returns the number of class definitions in the .dex file.
   uint32_t NumClassDefs() const {
     DCHECK(header_ != nullptr) << GetLocation();
@@ -688,9 +672,7 @@
   }
 
   // Returns the class descriptor string of a class definition.
-  const char* GetClassDescriptor(const ClassDef& class_def) const {
-    return StringByTypeIdx(class_def.class_idx_);
-  }
+  const char* GetClassDescriptor(const ClassDef& class_def) const;
 
   // Looks up a class definition by its class descriptor. Hash must be
   // ComputeModifiedUtf8Hash(descriptor).
@@ -719,6 +701,7 @@
 
   //
   const CodeItem* GetCodeItem(const uint32_t code_off) const {
+    DCHECK_LT(code_off, size_) << "Code item offset larger then maximum allowed offset";
     if (code_off == 0) {
       return nullptr;  // native or abstract method
     } else {
@@ -727,9 +710,7 @@
     }
   }
 
-  const char* GetReturnTypeDescriptor(const ProtoId& proto_id) const {
-    return StringByTypeIdx(proto_id.return_type_idx_);
-  }
+  const char* GetReturnTypeDescriptor(const ProtoId& proto_id) const;
 
   // Returns the number of prototype identifiers in the .dex file.
   size_t NumProtoIds() const {
@@ -766,10 +747,7 @@
   const Signature CreateSignature(const StringPiece& signature) const;
 
   // Returns the short form method descriptor for the given prototype.
-  const char* GetShorty(uint32_t proto_idx) const {
-    const ProtoId& proto_id = GetProtoId(proto_idx);
-    return StringDataByIdx(proto_id.shorty_idx_);
-  }
+  const char* GetShorty(uint32_t proto_idx) const;
 
   const TypeList* GetProtoParameters(const ProtoId& proto_id) const {
     if (proto_id.parameters_off_ == 0) {
@@ -815,20 +793,50 @@
     }
   }
 
+  struct PositionInfo {
+    PositionInfo()
+        : address_(0),
+          line_(0),
+          source_file_(nullptr),
+          prologue_end_(false),
+          epilogue_begin_(false) {
+    }
+
+    uint32_t address_;  // In 16-bit code units.
+    uint32_t line_;  // Source code line number starting at 1.
+    const char* source_file_;  // nullptr if the file from ClassDef still applies.
+    bool prologue_end_;
+    bool epilogue_begin_;
+  };
+
   // Callback for "new position table entry".
   // Returning true causes the decoder to stop early.
-  typedef bool (*DexDebugNewPositionCb)(void* context, uint32_t address, uint32_t line_num);
+  typedef bool (*DexDebugNewPositionCb)(void* context, const PositionInfo& entry);
 
-  // Callback for "new locals table entry". "signature" is an empty string
-  // if no signature is available for an entry.
-  typedef void (*DexDebugNewLocalCb)(void* context, uint16_t reg,
-                                     uint32_t start_address,
-                                     uint32_t end_address,
-                                     const char* name,
-                                     const char* descriptor,
-                                     const char* signature);
+  struct LocalInfo {
+    LocalInfo()
+        : name_(nullptr),
+          descriptor_(nullptr),
+          signature_(nullptr),
+          start_address_(0),
+          end_address_(0),
+          reg_(0),
+          is_live_(false) {
+    }
 
-  static bool LineNumForPcCb(void* context, uint32_t address, uint32_t line_num);
+    const char* name_;  // E.g., list.  It can be nullptr if unknown.
+    const char* descriptor_;  // E.g., Ljava/util/LinkedList;
+    const char* signature_;  // E.g., java.util.LinkedList<java.lang.Integer>
+    uint32_t start_address_;  // PC location where the local is first defined.
+    uint32_t end_address_;  // PC location where the local is no longer defined.
+    uint16_t reg_;  // Dex register which stores the values.
+    bool is_live_;  // Is the local defined and live.
+  };
+
+  // Callback for "new locals table entry".
+  typedef void (*DexDebugNewLocalCb)(void* context, const LocalInfo& entry);
+
+  static bool LineNumForPcCb(void* context, const PositionInfo& entry);
 
   const AnnotationsDirectoryItem* GetAnnotationsDirectory(const ClassDef& class_def) const {
     if (class_def.annotations_off_ == 0) {
@@ -950,7 +958,11 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::ObjectArray<mirror::Object>* GetParameterAnnotations(ArtMethod* method) const
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool IsMethodAnnotationPresent(ArtMethod* method, Handle<mirror::Class> annotation_class) const
+  mirror::ObjectArray<mirror::String>* GetSignatureAnnotationForMethod(ArtMethod* method) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  bool IsMethodAnnotationPresent(ArtMethod* method,
+                                 Handle<mirror::Class> annotation_class,
+                                 uint32_t visibility = kDexVisibilityRuntime) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   const AnnotationSetItem* FindAnnotationSetForClass(Handle<mirror::Class> klass) const
@@ -972,6 +984,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   bool GetInnerClassFlags(Handle<mirror::Class> klass, uint32_t* flags) const
       SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::ObjectArray<mirror::String>* GetSignatureAnnotationForClass(Handle<mirror::Class> klass)
+      const SHARED_REQUIRES(Locks::mutator_lock_);
   bool IsClassAnnotationPresent(Handle<mirror::Class> klass, Handle<mirror::Class> annotation_class)
       const SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -1040,21 +1054,6 @@
     DBG_LINE_RANGE           = 15,
   };
 
-  struct LocalInfo {
-    LocalInfo()
-        : name_(nullptr), descriptor_(nullptr), signature_(nullptr), start_address_(0),
-          is_live_(false) {}
-
-    const char* name_;  // E.g., list
-    const char* descriptor_;  // E.g., Ljava/util/LinkedList;
-    const char* signature_;  // E.g., java.util.LinkedList<java.lang.Integer>
-    uint16_t start_address_;  // PC location where the local is first defined.
-    bool is_live_;  // Is the local defined and live.
-
-   private:
-    DISALLOW_COPY_AND_ASSIGN(LocalInfo);
-  };
-
   struct LineNumFromPcContext {
     LineNumFromPcContext(uint32_t address, uint32_t line_num)
         : address_(address), line_num_(line_num) {}
@@ -1064,15 +1063,6 @@
     DISALLOW_COPY_AND_ASSIGN(LineNumFromPcContext);
   };
 
-  void InvokeLocalCbIfLive(void* context, int reg, uint32_t end_address,
-                           LocalInfo* local_in_reg, DexDebugNewLocalCb local_cb) const {
-    if (local_cb != nullptr && local_in_reg[reg].is_live_) {
-      local_cb(context, reg, local_in_reg[reg].start_address_, end_address,
-          local_in_reg[reg].name_, local_in_reg[reg].descriptor_,
-          local_in_reg[reg].signature_ != nullptr ? local_in_reg[reg].signature_ : "");
-    }
-  }
-
   // Determine the source file line number based on the program counter.
   // "pc" is an offset, in 16-bit units, from the start of the method's code.
   //
@@ -1084,9 +1074,13 @@
   int32_t GetLineNumFromPC(ArtMethod* method, uint32_t rel_pc) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void DecodeDebugInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
-                       DexDebugNewPositionCb position_cb, DexDebugNewLocalCb local_cb,
-                       void* context) const;
+  // Returns false if there is no debugging information or if it cannot be decoded.
+  bool DecodeDebugLocalInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
+                            DexDebugNewLocalCb local_cb, void* context) const;
+
+  // Returns false if there is no debugging information or if it cannot be decoded.
+  bool DecodeDebugPositionInfo(const CodeItem* code_item, DexDebugNewPositionCb position_cb,
+                               void* context) const;
 
   const char* GetSourceFile(const ClassDef& class_def) const {
     if (class_def.source_file_idx_ == 0xffffffff) {
@@ -1139,13 +1133,25 @@
     return oat_dex_file_;
   }
 
+  TypeLookupTable* GetTypeLookupTable() const {
+    return lookup_table_.get();
+  }
+
+  void CreateTypeLookupTable(uint8_t* storage = nullptr) const;
+
  private:
   // Opens a .dex file
-  static std::unique_ptr<const DexFile> OpenFile(int fd, const char* location,
-                                                 bool verify, std::string* error_msg);
+  static std::unique_ptr<const DexFile> OpenFile(int fd,
+                                                 const char* location,
+                                                 bool verify,
+                                                 bool verify_checksum,
+                                                 std::string* error_msg);
 
   // Opens dex files from within a .jar, .zip, or .apk file
-  static bool OpenZip(int fd, const std::string& location, std::string* error_msg,
+  static bool OpenZip(int fd,
+                      const std::string& location,
+                      bool verify_checksum,
+                      std::string* error_msg,
                       std::vector<std::unique_ptr<const DexFile>>* dex_files);
 
   enum class ZipOpenErrorCode {  // private
@@ -1159,8 +1165,11 @@
 
   // Opens .dex file from the entry_name in a zip archive. error_code is undefined when non-null
   // return.
-  static std::unique_ptr<const DexFile> Open(const ZipArchive& zip_archive, const char* entry_name,
-                                             const std::string& location, std::string* error_msg,
+  static std::unique_ptr<const DexFile> Open(const ZipArchive& zip_archive,
+                                             const char* entry_name,
+                                             const std::string& location,
+                                             bool verify_checksum,
+                                             std::string* error_msg,
                                              ZipOpenErrorCode* error_code);
 
   // Opens a .dex file at the given address backed by a MemMap
@@ -1190,10 +1199,6 @@
   // Returns true if the header magic and version numbers are of the expected values.
   bool CheckMagicAndVersion(std::string* error_msg) const;
 
-  void DecodeDebugInfo0(const CodeItem* code_item, bool is_static, uint32_t method_idx,
-      DexDebugNewPositionCb position_cb, DexDebugNewLocalCb local_cb,
-      void* context, const uint8_t* stream, LocalInfo* local_in_reg) const;
-
   // Check whether a location denotes a multidex dex file. This is a very simple check: returns
   // whether the string contains the separator character.
   static bool IsMultiDexLocation(const char* location);
@@ -1237,46 +1242,14 @@
   // Points to the base of the class definition list.
   const ClassDef* const class_defs_;
 
-  // Number of misses finding a class def from a descriptor.
-  mutable Atomic<uint32_t> find_class_def_misses_;
-
-  struct UTF16EmptyFn {
-    void MakeEmpty(std::pair<const char*, const ClassDef*>& pair) const {
-      pair.first = nullptr;
-      pair.second = nullptr;
-    }
-    bool IsEmpty(const std::pair<const char*, const ClassDef*>& pair) const {
-      if (pair.first == nullptr) {
-        DCHECK(pair.second == nullptr);
-        return true;
-      }
-      return false;
-    }
-  };
-  struct UTF16HashCmp {
-    // Hash function.
-    size_t operator()(const char* key) const {
-      return ComputeModifiedUtf8Hash(key);
-    }
-    // std::equal function.
-    bool operator()(const char* a, const char* b) const {
-      return CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(a, b) == 0;
-    }
-  };
-  using Index = HashMap<const char*,
-                        const ClassDef*,
-                        UTF16EmptyFn,
-                        UTF16HashCmp,
-                        UTF16HashCmp,
-                        std::allocator<std::pair<const char*, const ClassDef*>>>;
-  mutable Atomic<Index*> class_def_index_;
-
   // If this dex file was loaded from an oat file, oat_dex_file_ contains a
   // pointer to the OatDexFile it was loaded from. Otherwise oat_dex_file_ is
   // null.
   const OatDexFile* oat_dex_file_;
+  mutable std::unique_ptr<TypeLookupTable> lookup_table_;
 
   friend class DexFileVerifierTest;
+  ART_FRIEND_TEST(ClassLinkerTest, RegisterDexFileName);  // for constructor
 };
 
 struct DexFileReference {
@@ -1298,6 +1271,7 @@
     }
   }
   bool HasNext() const { return pos_ < size_; }
+  size_t Size() const { return size_; }
   void Next() { ++pos_; }
   uint16_t GetTypeIdx() {
     return type_list_->GetTypeItem(pos_).type_idx_;
@@ -1533,9 +1507,17 @@
 
 class EncodedStaticFieldValueIterator {
  public:
-  EncodedStaticFieldValueIterator(const DexFile& dex_file, Handle<mirror::DexCache>* dex_cache,
+  // A constructor for static tools. You cannot call
+  // ReadValueToField() for an object created by this.
+  EncodedStaticFieldValueIterator(const DexFile& dex_file,
+                                  const DexFile::ClassDef& class_def);
+
+  // A constructor meant to be called from runtime code.
+  EncodedStaticFieldValueIterator(const DexFile& dex_file,
+                                  Handle<mirror::DexCache>* dex_cache,
                                   Handle<mirror::ClassLoader>* class_loader,
-                                  ClassLinker* linker, const DexFile::ClassDef& class_def)
+                                  ClassLinker* linker,
+                                  const DexFile::ClassDef& class_def)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<bool kTransactionActive>
@@ -1564,7 +1546,18 @@
     kBoolean = 0x1f
   };
 
+  ValueType GetValueType() const { return type_; }
+  const jvalue& GetJavaValue() const { return jval_; }
+
  private:
+  EncodedStaticFieldValueIterator(const DexFile& dex_file,
+                                  Handle<mirror::DexCache>* dex_cache,
+                                  Handle<mirror::ClassLoader>* class_loader,
+                                  ClassLinker* linker,
+                                  const DexFile::ClassDef& class_def,
+                                  size_t pos,
+                                  ValueType type);
+
   static constexpr uint8_t kEncodedValueTypeMask = 0x1f;  // 0b11111
   static constexpr uint8_t kEncodedValueArgShift = 5;
 
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 90b35a3..2704d8a 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -133,8 +133,46 @@
   "AAACAAAAQAEAAAEgAAACAAAAVAEAAAYgAAACAAAAiAEAAAEQAAABAAAAqAEAAAIgAAAPAAAArgEA"
   "AAMgAAACAAAAiAIAAAQgAAADAAAAlAIAAAAgAAACAAAAqwIAAAAQAAABAAAAxAIAAA==";
 
-static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64,
-                                                        const char* location) {
+// kRawDex38 and 39 are dex'ed versions of the following Java source :
+//
+// public class Main {
+//     public static void main(String[] foo) {
+//     }
+// }
+//
+// The dex file was manually edited to change its dex version code to 38
+// or 39, respectively.
+static const char kRawDex38[] =
+  "ZGV4CjAzOAC4OovJlJ1089ikzK6asMf/f8qp3Kve5VsgAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAI"
+  "AAAAcAAAAAQAAACQAAAAAgAAAKAAAAAAAAAAAAAAAAMAAAC4AAAAAQAAANAAAAAwAQAA8AAAACIB"
+  "AAAqAQAAMgEAAEYBAABRAQAAVAEAAFgBAABtAQAAAQAAAAIAAAAEAAAABgAAAAQAAAACAAAAAAAA"
+  "AAUAAAACAAAAHAEAAAAAAAAAAAAAAAABAAcAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAADAAAA"
+  "AAAAAH4BAAAAAAAAAQABAAEAAABzAQAABAAAAHAQAgAAAA4AAQABAAAAAAB4AQAAAQAAAA4AAAAB"
+  "AAAAAwAGPGluaXQ+AAZMTWFpbjsAEkxqYXZhL2xhbmcvT2JqZWN0OwAJTWFpbi5qYXZhAAFWAAJW"
+  "TAATW0xqYXZhL2xhbmcvU3RyaW5nOwAEbWFpbgABAAcOAAMBAAcOAAAAAgAAgYAE8AEBCYgCDAAA"
+  "AAAAAAABAAAAAAAAAAEAAAAIAAAAcAAAAAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAA"
+  "uAAAAAYAAAABAAAA0AAAAAEgAAACAAAA8AAAAAEQAAABAAAAHAEAAAIgAAAIAAAAIgEAAAMgAAAC"
+  "AAAAcwEAAAAgAAABAAAAfgEAAAAQAAABAAAAjAEAAA==";
+
+static const char kRawDex39[] =
+  "ZGV4CjAzOQC4OovJlJ1089ikzK6asMf/f8qp3Kve5VsgAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAI"
+  "AAAAcAAAAAQAAACQAAAAAgAAAKAAAAAAAAAAAAAAAAMAAAC4AAAAAQAAANAAAAAwAQAA8AAAACIB"
+  "AAAqAQAAMgEAAEYBAABRAQAAVAEAAFgBAABtAQAAAQAAAAIAAAAEAAAABgAAAAQAAAACAAAAAAAA"
+  "AAUAAAACAAAAHAEAAAAAAAAAAAAAAAABAAcAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAADAAAA"
+  "AAAAAH4BAAAAAAAAAQABAAEAAABzAQAABAAAAHAQAgAAAA4AAQABAAAAAAB4AQAAAQAAAA4AAAAB"
+  "AAAAAwAGPGluaXQ+AAZMTWFpbjsAEkxqYXZhL2xhbmcvT2JqZWN0OwAJTWFpbi5qYXZhAAFWAAJW"
+  "TAATW0xqYXZhL2xhbmcvU3RyaW5nOwAEbWFpbgABAAcOAAMBAAcOAAAAAgAAgYAE8AEBCYgCDAAA"
+  "AAAAAAABAAAAAAAAAAEAAAAIAAAAcAAAAAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAA"
+  "uAAAAAYAAAABAAAA0AAAAAEgAAACAAAA8AAAAAEQAAABAAAAHAEAAAIgAAAIAAAAIgEAAAMgAAAC"
+  "AAAAcwEAAAAgAAABAAAAfgEAAAAQAAABAAAAjAEAAA==";
+
+static const char kRawDexZeroLength[] =
+  "UEsDBAoAAAAAAOhxAkkAAAAAAAAAAAAAAAALABwAY2xhc3Nlcy5kZXhVVAkAA2QNoVdnDaFXdXgL"
+  "AAEE5AMBAASIEwAAUEsBAh4DCgAAAAAA6HECSQAAAAAAAAAAAAAAAAsAGAAAAAAAAAAAAKCBAAAA"
+  "AGNsYXNzZXMuZGV4VVQFAANkDaFXdXgLAAEE5AMBAASIEwAAUEsFBgAAAAABAAEAUQAAAEUAAAAA"
+  "AA==";
+
+static void DecodeAndWriteDexFile(const char* base64, const char* location) {
   // decode base64
   CHECK(base64 != nullptr);
   size_t length;
@@ -150,13 +188,18 @@
   if (file->FlushCloseOrErase() != 0) {
     PLOG(FATAL) << "Could not flush and close test file.";
   }
-  file.reset();
+}
+
+static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64,
+                                                        const char* location) {
+  DecodeAndWriteDexFile(base64, location);
 
   // read dex file
   ScopedObjectAccess soa(Thread::Current());
+  static constexpr bool kVerifyChecksum = true;
   std::string error_msg;
   std::vector<std::unique_ptr<const DexFile>> tmp;
-  bool success = DexFile::Open(location, location, &error_msg, &tmp);
+  bool success = DexFile::Open(location, location, kVerifyChecksum, &error_msg, &tmp);
   CHECK(success) << error_msg;
   EXPECT_EQ(1U, tmp.size());
   std::unique_ptr<const DexFile> dex_file = std::move(tmp[0]);
@@ -196,6 +239,39 @@
   EXPECT_EQ(header.checksum_, raw->GetLocationChecksum());
 }
 
+TEST_F(DexFileTest, Version38Accepted) {
+  ScratchFile tmp;
+  std::unique_ptr<const DexFile> raw(OpenDexFileBase64(kRawDex38, tmp.GetFilename().c_str()));
+  ASSERT_TRUE(raw.get() != nullptr);
+
+  const DexFile::Header& header = raw->GetHeader();
+  EXPECT_EQ(38u, header.GetVersion());
+}
+
+TEST_F(DexFileTest, Version39Rejected) {
+  ScratchFile tmp;
+  const char* location = tmp.GetFilename().c_str();
+  DecodeAndWriteDexFile(kRawDex39, location);
+
+  ScopedObjectAccess soa(Thread::Current());
+  static constexpr bool kVerifyChecksum = true;
+  std::string error_msg;
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  ASSERT_FALSE(DexFile::Open(location, location, kVerifyChecksum, &error_msg, &dex_files));
+}
+
+TEST_F(DexFileTest, ZeroLengthDexRejected) {
+  ScratchFile tmp;
+  const char* location = tmp.GetFilename().c_str();
+  DecodeAndWriteDexFile(kRawDexZeroLength, location);
+
+  ScopedObjectAccess soa(Thread::Current());
+  static constexpr bool kVerifyChecksum = true;
+  std::string error_msg;
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  ASSERT_FALSE(DexFile::Open(location, location, kVerifyChecksum, &error_msg, &dex_files));
+}
+
 TEST_F(DexFileTest, GetLocationChecksum) {
   ScopedObjectAccess soa(Thread::Current());
   std::unique_ptr<const DexFile> raw(OpenTestDexFile("Main"));
@@ -206,7 +282,7 @@
   uint32_t checksum;
   ScopedObjectAccess soa(Thread::Current());
   std::string error_msg;
-  EXPECT_TRUE(DexFile::GetChecksum(GetLibCoreDexFileName().c_str(), &checksum, &error_msg))
+  EXPECT_TRUE(DexFile::GetChecksum(GetLibCoreDexFileNames()[0].c_str(), &checksum, &error_msg))
       << error_msg;
   EXPECT_EQ(java_lang_dex_file_->GetLocationChecksum(), checksum);
 }
@@ -297,6 +373,7 @@
     ASSERT_TRUE(type_str_id != nullptr);
     uint32_t type_str_idx = java_lang_dex_file_->GetIndexForStringId(*type_str_id);
     const DexFile::TypeId* type_id = java_lang_dex_file_->FindTypeId(type_str_idx);
+    ASSERT_EQ(type_id, java_lang_dex_file_->FindTypeId(type_str));
     ASSERT_TRUE(type_id != nullptr);
     EXPECT_EQ(java_lang_dex_file_->GetIndexForTypeId(*type_id), i);
   }
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index a5f9d09..5132efc 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -101,36 +101,41 @@
 }
 
 // Helper macro to load string and return false on error.
-#define LOAD_STRING(var, idx, error)                  \
-  const char* var = CheckLoadStringByIdx(idx, error); \
-  if (UNLIKELY(var == nullptr)) {                     \
-    return false;                                     \
+#define LOAD_STRING(var, idx, error)                    \
+  const char* (var) = CheckLoadStringByIdx(idx, error); \
+  if (UNLIKELY((var) == nullptr)) {                     \
+    return false;                                       \
   }
 
 // Helper macro to load string by type idx and return false on error.
-#define LOAD_STRING_BY_TYPE(var, type_idx, error)              \
-  const char* var = CheckLoadStringByTypeIdx(type_idx, error); \
-  if (UNLIKELY(var == nullptr)) {                              \
-    return false;                                              \
+#define LOAD_STRING_BY_TYPE(var, type_idx, error)                \
+  const char* (var) = CheckLoadStringByTypeIdx(type_idx, error); \
+  if (UNLIKELY((var) == nullptr)) {                              \
+    return false;                                                \
   }
 
 // Helper macro to load method id. Return last parameter on error.
-#define LOAD_METHOD(var, idx, error_string, error_stmt)                 \
-  const DexFile::MethodId* var  = CheckLoadMethodId(idx, error_string); \
-  if (UNLIKELY(var == nullptr)) {                                       \
-    error_stmt;                                                         \
+#define LOAD_METHOD(var, idx, error_string, error_stmt)                   \
+  const DexFile::MethodId* (var)  = CheckLoadMethodId(idx, error_string); \
+  if (UNLIKELY((var) == nullptr)) {                                       \
+    error_stmt;                                                           \
   }
 
 // Helper macro to load method id. Return last parameter on error.
-#define LOAD_FIELD(var, idx, fmt, error_stmt)               \
-  const DexFile::FieldId* var = CheckLoadFieldId(idx, fmt); \
-  if (UNLIKELY(var == nullptr)) {                           \
-    error_stmt;                                             \
+#define LOAD_FIELD(var, idx, fmt, error_stmt)                 \
+  const DexFile::FieldId* (var) = CheckLoadFieldId(idx, fmt); \
+  if (UNLIKELY((var) == nullptr)) {                           \
+    error_stmt;                                               \
   }
 
-bool DexFileVerifier::Verify(const DexFile* dex_file, const uint8_t* begin, size_t size,
-                             const char* location, std::string* error_msg) {
-  std::unique_ptr<DexFileVerifier> verifier(new DexFileVerifier(dex_file, begin, size, location));
+bool DexFileVerifier::Verify(const DexFile* dex_file,
+                             const uint8_t* begin,
+                             size_t size,
+                             const char* location,
+                             bool verify_checksum,
+                             std::string* error_msg) {
+  std::unique_ptr<DexFileVerifier> verifier(
+      new DexFileVerifier(dex_file, begin, size, location, verify_checksum));
   if (!verifier->Verify()) {
     *error_msg = verifier->FailureReason();
     return false;
@@ -230,7 +235,10 @@
   return true;
 }
 
-bool DexFileVerifier::CheckValidOffsetAndSize(uint32_t offset, uint32_t size, const char* label) {
+bool DexFileVerifier::CheckValidOffsetAndSize(uint32_t offset,
+                                              uint32_t size,
+                                              size_t alignment,
+                                              const char* label) {
   if (size == 0) {
     if (offset != 0) {
       ErrorStringPrintf("Offset(%d) should be zero when size is zero for %s.", offset, label);
@@ -241,6 +249,18 @@
     ErrorStringPrintf("Offset(%d) should be within file size(%zu) for %s.", offset, size_, label);
     return false;
   }
+  if (alignment != 0 && !IsAlignedParam(offset, alignment)) {
+    ErrorStringPrintf("Offset(%d) should be aligned by %zu for %s.", offset, alignment, label);
+    return false;
+  }
+  return true;
+}
+
+bool DexFileVerifier::CheckSizeLimit(uint32_t size, uint32_t limit, const char* label) {
+  if (size > limit) {
+    ErrorStringPrintf("Size(%u) should not exceed limit(%u) for %s.", size, limit, label);
+    return false;
+  }
   return true;
 }
 
@@ -258,8 +278,13 @@
   const uint8_t* non_sum_ptr = reinterpret_cast<const uint8_t*>(header_) + non_sum;
   adler_checksum = adler32(adler_checksum, non_sum_ptr, expected_size - non_sum);
   if (adler_checksum != header_->checksum_) {
-    ErrorStringPrintf("Bad checksum (%08x, expected %08x)", adler_checksum, header_->checksum_);
-    return false;
+    if (verify_checksum_) {
+      ErrorStringPrintf("Bad checksum (%08x, expected %08x)", adler_checksum, header_->checksum_);
+      return false;
+    } else {
+      LOG(WARNING) << StringPrintf(
+          "Ignoring bad checksum (%08x, expected %08x)", adler_checksum, header_->checksum_);
+    }
   }
 
   // Check the contents of the header.
@@ -275,16 +300,45 @@
 
   // Check that all offsets are inside the file.
   bool result =
-      CheckValidOffsetAndSize(header_->link_off_, header_->link_size_, "link") &&
-      CheckValidOffsetAndSize(header_->map_off_, header_->map_off_, "map") &&
-      CheckValidOffsetAndSize(header_->string_ids_off_, header_->string_ids_size_, "string-ids") &&
-      CheckValidOffsetAndSize(header_->type_ids_off_, header_->type_ids_size_, "type-ids") &&
-      CheckValidOffsetAndSize(header_->proto_ids_off_, header_->proto_ids_size_, "proto-ids") &&
-      CheckValidOffsetAndSize(header_->field_ids_off_, header_->field_ids_size_, "field-ids") &&
-      CheckValidOffsetAndSize(header_->method_ids_off_, header_->method_ids_size_, "method-ids") &&
-      CheckValidOffsetAndSize(header_->class_defs_off_, header_->class_defs_size_, "class-defs") &&
-      CheckValidOffsetAndSize(header_->data_off_, header_->data_size_, "data");
-
+      CheckValidOffsetAndSize(header_->link_off_,
+                              header_->link_size_,
+                              0 /* unaligned */,
+                              "link") &&
+      CheckValidOffsetAndSize(header_->map_off_,
+                              header_->map_off_,
+                              4,
+                              "map") &&
+      CheckValidOffsetAndSize(header_->string_ids_off_,
+                              header_->string_ids_size_,
+                              4,
+                              "string-ids") &&
+      CheckValidOffsetAndSize(header_->type_ids_off_,
+                              header_->type_ids_size_,
+                              4,
+                              "type-ids") &&
+      CheckSizeLimit(header_->type_ids_size_, DexFile::kDexNoIndex16, "type-ids") &&
+      CheckValidOffsetAndSize(header_->proto_ids_off_,
+                              header_->proto_ids_size_,
+                              4,
+                              "proto-ids") &&
+      CheckSizeLimit(header_->proto_ids_size_, DexFile::kDexNoIndex16, "proto-ids") &&
+      CheckValidOffsetAndSize(header_->field_ids_off_,
+                              header_->field_ids_size_,
+                              4,
+                              "field-ids") &&
+      CheckValidOffsetAndSize(header_->method_ids_off_,
+                              header_->method_ids_size_,
+                              4,
+                              "method-ids") &&
+      CheckValidOffsetAndSize(header_->class_defs_off_,
+                              header_->class_defs_size_,
+                              4,
+                              "class-defs") &&
+      CheckValidOffsetAndSize(header_->data_off_,
+                              header_->data_size_,
+                              0,  // Unaligned, spec doesn't talk about it, even though size
+                                  // is supposed to be a multiple of 4.
+                              "data");
   return result;
 }
 
@@ -478,7 +532,7 @@
 
   // Check field access flags.
   std::string error_msg;
-  if (!CheckFieldAccessFlags(access_flags, class_access_flags, &error_msg)) {
+  if (!CheckFieldAccessFlags(idx, access_flags, class_access_flags, &error_msg)) {
     ErrorStringPrintf("%s", error_msg.c_str());
     return false;
   }
@@ -1416,7 +1470,12 @@
     }
 
     if (IsDataSectionType(type)) {
-      offset_to_type_map_.Put(aligned_offset, type);
+      if (aligned_offset == 0u) {
+        ErrorStringPrintf("Item %d offset is 0", i);
+        return false;
+      }
+      DCHECK(offset_to_type_map_.Find(aligned_offset) == offset_to_type_map_.end());
+      offset_to_type_map_.Insert(std::pair<uint32_t, uint16_t>(aligned_offset, type));
     }
 
     aligned_offset = ptr_ - begin_;
@@ -1589,7 +1648,8 @@
 }
 
 bool DexFileVerifier::CheckOffsetToTypeMap(size_t offset, uint16_t type) {
-  auto it = offset_to_type_map_.find(offset);
+  DCHECK_NE(offset, 0u);
+  auto it = offset_to_type_map_.Find(offset);
   if (UNLIKELY(it == offset_to_type_map_.end())) {
     ErrorStringPrintf("No data map entry found @ %zx; expected %x", offset, type);
     return false;
@@ -1746,13 +1806,8 @@
       while (curr_it.HasNext() && prev_it.HasNext()) {
         uint16_t prev_idx = prev_it.GetTypeIdx();
         uint16_t curr_idx = curr_it.GetTypeIdx();
-        if (prev_idx == DexFile::kDexNoIndex16) {
-          break;
-        }
-        if (UNLIKELY(curr_idx == DexFile::kDexNoIndex16)) {
-          ErrorStringPrintf("Out-of-order proto_id arguments");
-          return false;
-        }
+        DCHECK_NE(prev_idx, DexFile::kDexNoIndex16);
+        DCHECK_NE(curr_idx, DexFile::kDexNoIndex16);
 
         if (prev_idx < curr_idx) {
           break;
@@ -1764,6 +1819,12 @@
         prev_it.Next();
         curr_it.Next();
       }
+      if (!curr_it.HasNext()) {
+        // Either a duplicate ProtoId or a ProtoId with a shorter argument list follows
+        // a ProtoId with a longer one. Both cases are forbidden by the specification.
+        ErrorStringPrintf("Out-of-order proto_id arguments");
+        return false;
+      }
     }
   }
 
@@ -1905,6 +1966,31 @@
   }
 
   if (item->superclass_idx_ != DexFile::kDexNoIndex16) {
+    if (header_->GetVersion() >= DexFile::kClassDefinitionOrderEnforcedVersion) {
+      // Check that a class does not inherit from itself directly (by having
+      // the same type idx as its super class).
+      if (UNLIKELY(item->superclass_idx_ == item->class_idx_)) {
+        ErrorStringPrintf("Class with same type idx as its superclass: '%d'", item->class_idx_);
+        return false;
+      }
+
+      // Check that a class is defined after its super class (if the
+      // latter is defined in the same Dex file).
+      const DexFile::ClassDef* superclass_def = dex_file_->FindClassDef(item->superclass_idx_);
+      if (superclass_def != nullptr) {
+        // The superclass is defined in this Dex file.
+        if (superclass_def > item) {
+          // ClassDef item for super class appearing after the class' ClassDef item.
+          ErrorStringPrintf("Invalid class definition ordering:"
+                            " class with type idx: '%d' defined before"
+                            " superclass with type idx: '%d'",
+                            item->class_idx_,
+                            item->superclass_idx_);
+          return false;
+        }
+      }
+    }
+
     LOAD_STRING_BY_TYPE(superclass_descriptor, item->superclass_idx_,
                         "inter_class_def_item superclass_idx")
     if (UNLIKELY(!IsValidDescriptor(superclass_descriptor) || superclass_descriptor[0] != 'L')) {
@@ -1913,12 +1999,39 @@
     }
   }
 
+  // Check interfaces.
   const DexFile::TypeList* interfaces = dex_file_->GetInterfacesList(*item);
   if (interfaces != nullptr) {
     uint32_t size = interfaces->Size();
-
-    // Ensure that all interfaces refer to classes (not arrays or primitives).
     for (uint32_t i = 0; i < size; i++) {
+      if (header_->GetVersion() >= DexFile::kClassDefinitionOrderEnforcedVersion) {
+        // Check that a class does not implement itself directly (by having the
+        // same type idx as one of its immediate implemented interfaces).
+        if (UNLIKELY(interfaces->GetTypeItem(i).type_idx_ == item->class_idx_)) {
+          ErrorStringPrintf("Class with same type idx as implemented interface: '%d'",
+                            item->class_idx_);
+          return false;
+        }
+
+        // Check that a class is defined after the interfaces it implements
+        // (if they are defined in the same Dex file).
+        const DexFile::ClassDef* interface_def =
+            dex_file_->FindClassDef(interfaces->GetTypeItem(i).type_idx_);
+        if (interface_def != nullptr) {
+          // The interface is defined in this Dex file.
+          if (interface_def > item) {
+            // ClassDef item for interface appearing after the class' ClassDef item.
+            ErrorStringPrintf("Invalid class definition ordering:"
+                              " class with type idx: '%d' defined before"
+                              " implemented interface with type idx: '%d'",
+                              item->class_idx_,
+                              interfaces->GetTypeItem(i).type_idx_);
+            return false;
+          }
+        }
+      }
+
+      // Ensure that the interface refers to a class (not an array nor a primitive type).
       LOAD_STRING_BY_TYPE(inf_descriptor, interfaces->GetTypeItem(i).type_idx_,
                           "inter_class_def_item interface type_idx")
       if (UNLIKELY(!IsValidDescriptor(inf_descriptor) || inf_descriptor[0] != 'L')) {
@@ -1959,6 +2072,11 @@
 
   // Check that references in annotations_directory_item are to right class.
   if (item->annotations_off_ != 0) {
+    // annotations_off_ is supposed to be aligned by 4.
+    if (!IsAlignedParam(item->annotations_off_, 4)) {
+      ErrorStringPrintf("Invalid annotations_off_, not aligned by 4");
+      return false;
+    }
     const uint8_t* data = begin_ + item->annotations_off_;
     bool success;
     uint16_t annotations_definer = FindFirstAnnotationsDirectoryDefiner(data, &success);
@@ -2306,12 +2424,89 @@
   return count <= 1;
 }
 
-bool DexFileVerifier::CheckFieldAccessFlags(uint32_t field_access_flags,
+// Helper functions to retrieve names from the dex file. We do not want to rely on DexFile
+// functionality, as we're still verifying the dex file. begin and header correspond to the
+// underscored variants in the DexFileVerifier.
+
+static std::string GetStringOrError(const uint8_t* const begin,
+                                    const DexFile::Header* const header,
+                                    uint32_t string_idx) {
+  // The `string_idx` is not guaranteed to be valid yet.
+  if (header->string_ids_size_ <= string_idx) {
+    return "(error)";
+  }
+
+  const DexFile::StringId* string_id =
+      reinterpret_cast<const DexFile::StringId*>(begin + header->string_ids_off_) + string_idx;
+
+  // Assume that the data is OK at this point. String data has been checked at this point.
+
+  const uint8_t* ptr = begin + string_id->string_data_off_;
+  DecodeUnsignedLeb128(&ptr);
+  return reinterpret_cast<const char*>(ptr);
+}
+
+static std::string GetClassOrError(const uint8_t* const begin,
+                                   const DexFile::Header* const header,
+                                   uint32_t class_idx) {
+  // The `class_idx` is either `FieldId::class_idx_` or `MethodId::class_idx_` and
+  // it has already been checked in `DexFileVerifier::CheckClassDataItemField()`
+  // or `DexFileVerifier::CheckClassDataItemMethod()`, respectively, to match
+  // a valid defining class.
+  CHECK_LT(class_idx, header->type_ids_size_);
+
+  const DexFile::TypeId* type_id =
+      reinterpret_cast<const DexFile::TypeId*>(begin + header->type_ids_off_) + class_idx;
+
+  // Assume that the data is OK at this point. Type id offsets have been checked at this point.
+
+  return GetStringOrError(begin, header, type_id->descriptor_idx_);
+}
+
+static std::string GetFieldDescriptionOrError(const uint8_t* const begin,
+                                              const DexFile::Header* const header,
+                                              uint32_t idx) {
+  // The `idx` has already been checked in `DexFileVerifier::CheckClassDataItemField()`.
+  CHECK_LT(idx, header->field_ids_size_);
+
+  const DexFile::FieldId* field_id =
+      reinterpret_cast<const DexFile::FieldId*>(begin + header->field_ids_off_) + idx;
+
+  // Assume that the data is OK at this point. Field id offsets have been checked at this point.
+
+  std::string class_name = GetClassOrError(begin, header, field_id->class_idx_);
+  std::string field_name = GetStringOrError(begin, header, field_id->name_idx_);
+
+  return class_name + "." + field_name;
+}
+
+static std::string GetMethodDescriptionOrError(const uint8_t* const begin,
+                                               const DexFile::Header* const header,
+                                               uint32_t idx) {
+  // The `idx` has already been checked in `DexFileVerifier::CheckClassDataItemMethod()`.
+  CHECK_LT(idx, header->method_ids_size_);
+
+  const DexFile::MethodId* method_id =
+      reinterpret_cast<const DexFile::MethodId*>(begin + header->method_ids_off_) + idx;
+
+  // Assume that the data is OK at this point. Method id offsets have been checked at this point.
+
+  std::string class_name = GetClassOrError(begin, header, method_id->class_idx_);
+  std::string method_name = GetStringOrError(begin, header, method_id->name_idx_);
+
+  return class_name + "." + method_name;
+}
+
+bool DexFileVerifier::CheckFieldAccessFlags(uint32_t idx,
+                                            uint32_t field_access_flags,
                                             uint32_t class_access_flags,
                                             std::string* error_msg) {
   // Generally sort out >16-bit flags.
   if ((field_access_flags & ~kAccJavaFlagsMask) != 0) {
-    *error_msg = StringPrintf("Bad class_data_item field access_flags %x", field_access_flags);
+    *error_msg = StringPrintf("Bad field access_flags for %s: %x(%s)",
+                              GetFieldDescriptionOrError(begin_, header_, idx).c_str(),
+                              field_access_flags,
+                              PrettyJavaAccessFlags(field_access_flags).c_str());
     return false;
   }
 
@@ -2328,8 +2523,10 @@
 
   // Fields may have only one of public/protected/final.
   if (!CheckAtMostOneOfPublicProtectedPrivate(field_access_flags)) {
-    *error_msg = StringPrintf("Field may have only one of public/protected/private, %x",
-                              field_access_flags);
+    *error_msg = StringPrintf("Field may have only one of public/protected/private, %s: %x(%s)",
+                              GetFieldDescriptionOrError(begin_, header_, idx).c_str(),
+                              field_access_flags,
+                              PrettyJavaAccessFlags(field_access_flags).c_str());
     return false;
   }
 
@@ -2338,15 +2535,32 @@
     // Interface fields must be public final static.
     constexpr uint32_t kPublicFinalStatic = kAccPublic | kAccFinal | kAccStatic;
     if ((field_access_flags & kPublicFinalStatic) != kPublicFinalStatic) {
-      *error_msg = StringPrintf("Interface field is not public final static: %x",
-                                field_access_flags);
-      return false;
+      *error_msg = StringPrintf("Interface field is not public final static, %s: %x(%s)",
+                                GetFieldDescriptionOrError(begin_, header_, idx).c_str(),
+                                field_access_flags,
+                                PrettyJavaAccessFlags(field_access_flags).c_str());
+      if (header_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+        return false;
+      } else {
+        // Allow in older versions, but warn.
+        LOG(WARNING) << "This dex file is invalid and will be rejected in the future. Error is: "
+                     << *error_msg;
+      }
     }
     // Interface fields may be synthetic, but may not have other flags.
     constexpr uint32_t kDisallowed = ~(kPublicFinalStatic | kAccSynthetic);
     if ((field_access_flags & kFieldAccessFlags & kDisallowed) != 0) {
-      *error_msg = StringPrintf("Interface field has disallowed flag: %x", field_access_flags);
-      return false;
+      *error_msg = StringPrintf("Interface field has disallowed flag, %s: %x(%s)",
+                                GetFieldDescriptionOrError(begin_, header_, idx).c_str(),
+                                field_access_flags,
+                                PrettyJavaAccessFlags(field_access_flags).c_str());
+      if (header_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+        return false;
+      } else {
+        // Allow in older versions, but warn.
+        LOG(WARNING) << "This dex file is invalid and will be rejected in the future. Error is: "
+                     << *error_msg;
+      }
     }
     return true;
   }
@@ -2354,7 +2568,8 @@
   // Volatile fields may not be final.
   constexpr uint32_t kVolatileFinal = kAccVolatile | kAccFinal;
   if ((field_access_flags & kVolatileFinal) == kVolatileFinal) {
-    *error_msg = "Fields may not be volatile and final";
+    *error_msg = StringPrintf("Fields may not be volatile and final: %s",
+                              GetFieldDescriptionOrError(begin_, header_, idx).c_str());
     return false;
   }
 
@@ -2404,7 +2619,9 @@
   constexpr uint32_t kAllMethodFlags =
       kAccJavaFlagsMask | kAccConstructor | kAccDeclaredSynchronized;
   if ((method_access_flags & ~kAllMethodFlags) != 0) {
-    *error_msg = StringPrintf("Bad class_data_item method access_flags %x", method_access_flags);
+    *error_msg = StringPrintf("Bad method access_flags for %s: %x",
+                              GetMethodDescriptionOrError(begin_, header_, method_index).c_str(),
+                              method_access_flags);
     return false;
   }
 
@@ -2424,7 +2641,8 @@
 
   // Methods may have only one of public/protected/final.
   if (!CheckAtMostOneOfPublicProtectedPrivate(method_access_flags)) {
-    *error_msg = StringPrintf("Method may have only one of public/protected/private, %x",
+    *error_msg = StringPrintf("Method may have only one of public/protected/private, %s: %x",
+                              GetMethodDescriptionOrError(begin_, header_, method_index).c_str(),
                               method_access_flags);
     return false;
   }
@@ -2450,8 +2668,10 @@
   // Only methods named "<clinit>" or "<init>" may be marked constructor. Note: we cannot enforce
   // the reverse for backwards compatibility reasons.
   if (((method_access_flags & kAccConstructor) != 0) && !is_constructor) {
-    *error_msg = StringPrintf("Method %" PRIu32 " is marked constructor, but doesn't match name",
-                              method_index);
+    *error_msg =
+        StringPrintf("Method %" PRIu32 "(%s) is marked constructor, but doesn't match name",
+                     method_index,
+                     GetMethodDescriptionOrError(begin_, header_, method_index).c_str());
     return false;
   }
   // Check that the static constructor (= static initializer) is named "<clinit>" and that the
@@ -2459,17 +2679,25 @@
   if (is_constructor) {
     bool is_static = (method_access_flags & kAccStatic) != 0;
     if (is_static ^ is_clinit_by_name) {
-      *error_msg = StringPrintf("Constructor %" PRIu32 " is not flagged correctly wrt/ static.",
-                                method_index);
-      return false;
+      *error_msg = StringPrintf("Constructor %" PRIu32 "(%s) is not flagged correctly wrt/ static.",
+                                method_index,
+                                GetMethodDescriptionOrError(begin_, header_, method_index).c_str());
+      if (header_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+        return false;
+      } else {
+        // Allow in older versions, but warn.
+        LOG(WARNING) << "This dex file is invalid and will be rejected in the future. Error is: "
+                     << *error_msg;
+      }
     }
   }
   // Check that static and private methods, as well as constructors, are in the direct methods list,
   // and other methods in the virtual methods list.
   bool is_direct = (method_access_flags & (kAccStatic | kAccPrivate)) != 0 || is_constructor;
   if (is_direct != expect_direct) {
-    *error_msg = StringPrintf("Direct/virtual method %" PRIu32 " not in expected list %d",
+    *error_msg = StringPrintf("Direct/virtual method %" PRIu32 "(%s) not in expected list %d",
                               method_index,
+                              GetMethodDescriptionOrError(begin_, header_, method_index).c_str(),
                               expect_direct);
     return false;
   }
@@ -2478,44 +2706,83 @@
   // From here on out it is easier to mask out the bits we're supposed to ignore.
   method_access_flags &= kMethodAccessFlags;
 
+  // Interfaces are special.
+  if ((class_access_flags & kAccInterface) != 0) {
+    // Non-static interface methods must be public or private.
+    uint32_t desired_flags = (kAccPublic | kAccStatic);
+    if (dex_file_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+      desired_flags |= kAccPrivate;
+    }
+    if ((method_access_flags & desired_flags) == 0) {
+      *error_msg = StringPrintf("Interface virtual method %" PRIu32 "(%s) is not public",
+          method_index,
+          GetMethodDescriptionOrError(begin_, header_, method_index).c_str());
+      if (header_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+        return false;
+      } else {
+        // Allow in older versions, but warn.
+        LOG(WARNING) << "This dex file is invalid and will be rejected in the future. Error is: "
+                      << *error_msg;
+      }
+    }
+  }
+
   // If there aren't any instructions, make sure that's expected.
   if (!has_code) {
     // Only native or abstract methods may not have code.
     if ((method_access_flags & (kAccNative | kAccAbstract)) == 0) {
-      *error_msg = StringPrintf("Method %" PRIu32 " has no code, but is not marked native or "
+      *error_msg = StringPrintf("Method %" PRIu32 "(%s) has no code, but is not marked native or "
                                 "abstract",
-                                method_index);
+                                method_index,
+                                GetMethodDescriptionOrError(begin_, header_, method_index).c_str());
       return false;
     }
     // Constructors must always have code.
     if (is_constructor) {
-      *error_msg = StringPrintf("Constructor %u must not be abstract or native", method_index);
-      return false;
+      *error_msg = StringPrintf("Constructor %u(%s) must not be abstract or native",
+                                method_index,
+                                GetMethodDescriptionOrError(begin_, header_, method_index).c_str());
+      if (header_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+        return false;
+      } else {
+        // Allow in older versions, but warn.
+        LOG(WARNING) << "This dex file is invalid and will be rejected in the future. Error is: "
+                      << *error_msg;
+      }
     }
     if ((method_access_flags & kAccAbstract) != 0) {
       // Abstract methods are not allowed to have the following flags.
       constexpr uint32_t kForbidden =
           kAccPrivate | kAccStatic | kAccFinal | kAccNative | kAccStrict | kAccSynchronized;
       if ((method_access_flags & kForbidden) != 0) {
-        *error_msg = StringPrintf("Abstract method %" PRIu32 " has disallowed access flags %x",
-                                  method_index,
-                                  method_access_flags);
+        *error_msg = StringPrintf("Abstract method %" PRIu32 "(%s) has disallowed access flags %x",
+            method_index,
+            GetMethodDescriptionOrError(begin_, header_, method_index).c_str(),
+            method_access_flags);
         return false;
       }
-      // Abstract methods must be in an abstract class or interface.
+      // Abstract methods should be in an abstract class or interface.
       if ((class_access_flags & (kAccInterface | kAccAbstract)) == 0) {
-        *error_msg = StringPrintf("Method %" PRIu32 " is abstract, but the declaring class "
-                                  "is neither abstract nor an interface", method_index);
-        return false;
+        LOG(WARNING) << "Method " << GetMethodDescriptionOrError(begin_, header_, method_index)
+                     << " is abstract, but the declaring class is neither abstract nor an "
+                     << "interface in dex file "
+                     << dex_file_->GetLocation();
       }
     }
     // Interfaces are special.
     if ((class_access_flags & kAccInterface) != 0) {
-      // Interface methods must be public and abstract.
+      // Interface methods without code must be abstract.
       if ((method_access_flags & (kAccPublic | kAccAbstract)) != (kAccPublic | kAccAbstract)) {
-        *error_msg = StringPrintf("Interface method %" PRIu32 " is not public and abstract",
-                                  method_index);
-        return false;
+        *error_msg = StringPrintf("Interface method %" PRIu32 "(%s) is not public and abstract",
+            method_index,
+            GetMethodDescriptionOrError(begin_, header_, method_index).c_str());
+        if (header_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+          return false;
+        } else {
+          // Allow in older versions, but warn.
+          LOG(WARNING) << "This dex file is invalid and will be rejected in the future. Error is: "
+                       << *error_msg;
+        }
       }
       // At this point, we know the method is public and abstract. This means that all the checks
       // for invalid combinations above applies. In addition, interface methods must not be
@@ -2526,22 +2793,9 @@
 
   // When there's code, the method must not be native or abstract.
   if ((method_access_flags & (kAccNative | kAccAbstract)) != 0) {
-    *error_msg = StringPrintf("Method %" PRIu32 " has code, but is marked native or abstract",
-                              method_index);
-    return false;
-  }
-
-  // Only the static initializer may have code in an interface.
-  // TODO We should have some way determine whether to allow this experimental flag without the
-  // runtime being started.
-  // We assume experimental flags are enabled when running without a runtime to enable tools like
-  // dexdump to handle dex files with these features.
-  if (((class_access_flags & kAccInterface) != 0)
-      && !is_clinit_by_name
-      && Runtime::Current() != nullptr
-      && !Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods)) {
-    *error_msg = StringPrintf("Non-clinit interface method %" PRIu32 " should not have code",
-                              method_index);
+    *error_msg = StringPrintf("Method %" PRIu32 "(%s) has code, but is marked native or abstract",
+                              method_index,
+                              GetMethodDescriptionOrError(begin_, header_, method_index).c_str());
     return false;
   }
 
@@ -2550,8 +2804,9 @@
     static constexpr uint32_t kInitAllowed =
         kAccPrivate | kAccProtected | kAccPublic | kAccStrict | kAccVarargs | kAccSynthetic;
     if ((method_access_flags & ~kInitAllowed) != 0) {
-      *error_msg = StringPrintf("Constructor %" PRIu32 " flagged inappropriately %x",
+      *error_msg = StringPrintf("Constructor %" PRIu32 "(%s) flagged inappropriately %x",
                                 method_index,
+                                GetMethodDescriptionOrError(begin_, header_, method_index).c_str(),
                                 method_access_flags);
       return false;
     }
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index 4f15357..133e432 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -26,17 +26,31 @@
 
 class DexFileVerifier {
  public:
-  static bool Verify(const DexFile* dex_file, const uint8_t* begin, size_t size,
-                     const char* location, std::string* error_msg);
+  static bool Verify(const DexFile* dex_file,
+                     const uint8_t* begin,
+                     size_t size,
+                     const char* location,
+                     bool verify_checksum,
+                     std::string* error_msg);
 
   const std::string& FailureReason() const {
     return failure_reason_;
   }
 
  private:
-  DexFileVerifier(const DexFile* dex_file, const uint8_t* begin, size_t size, const char* location)
-      : dex_file_(dex_file), begin_(begin), size_(size), location_(location),
-        header_(&dex_file->GetHeader()), ptr_(nullptr), previous_item_(nullptr)  {
+  DexFileVerifier(const DexFile* dex_file,
+                  const uint8_t* begin,
+                  size_t size,
+                  const char* location,
+                  bool verify_checksum)
+      : dex_file_(dex_file),
+        begin_(begin),
+        size_(size),
+        location_(location),
+        verify_checksum_(verify_checksum),
+        header_(&dex_file->GetHeader()),
+        ptr_(nullptr),
+        previous_item_(nullptr)  {
   }
 
   bool Verify();
@@ -48,7 +62,9 @@
   bool CheckList(size_t element_size, const char* label, const uint8_t* *ptr);
   // Checks whether the offset is zero (when size is zero) or that the offset falls within the area
   // claimed by the file.
-  bool CheckValidOffsetAndSize(uint32_t offset, uint32_t size, const char* label);
+  bool CheckValidOffsetAndSize(uint32_t offset, uint32_t size, size_t alignment, const char* label);
+  // Checks whether the size is less than the limit.
+  bool CheckSizeLimit(uint32_t size, uint32_t limit, const char* label);
   bool CheckIndex(uint32_t field, uint32_t limit, const char* label);
 
   bool CheckHeader();
@@ -157,9 +173,10 @@
 
   // Check validity of the given access flags, interpreted for a field in the context of a class
   // with the given second access flags.
-  static bool CheckFieldAccessFlags(uint32_t field_access_flags,
-                                    uint32_t class_access_flags,
-                                    std::string* error_msg);
+  bool CheckFieldAccessFlags(uint32_t idx,
+                             uint32_t field_access_flags,
+                             uint32_t class_access_flags,
+                             std::string* error_msg);
   // Check validity of the given method and access flags, in the context of a class with the given
   // second access flags.
   bool CheckMethodAccessFlags(uint32_t method_index,
@@ -173,9 +190,38 @@
   const uint8_t* const begin_;
   const size_t size_;
   const char* const location_;
+  const bool verify_checksum_;
   const DexFile::Header* const header_;
 
-  AllocationTrackingSafeMap<uint32_t, uint16_t, kAllocatorTagDexFileVerifier> offset_to_type_map_;
+  struct OffsetTypeMapEmptyFn {
+    // Make a hash map slot empty by making the offset 0. Offset 0 is a valid dex file offset that
+    // is in the offset of the dex file header. However, we only store data section items in the
+    // map, and these are after the header.
+    void MakeEmpty(std::pair<uint32_t, uint16_t>& pair) const {
+      pair.first = 0u;
+    }
+    // Check if a hash map slot is empty.
+    bool IsEmpty(const std::pair<uint32_t, uint16_t>& pair) const {
+      return pair.first == 0;
+    }
+  };
+  struct OffsetTypeMapHashCompareFn {
+    // Hash function for offset.
+    size_t operator()(const uint32_t key) const {
+      return key;
+    }
+    // std::equal function for offset.
+    bool operator()(const uint32_t a, const uint32_t b) const {
+      return a == b;
+    }
+  };
+  // Map from offset to dex file type, HashMap for performance reasons.
+  AllocationTrackingHashMap<uint32_t,
+                            uint16_t,
+                            OffsetTypeMapEmptyFn,
+                            kAllocatorTagDexFileVerifier,
+                            OffsetTypeMapHashCompareFn,
+                            OffsetTypeMapHashCompareFn> offset_to_type_map_;
   const uint8_t* ptr_;
   const void* previous_item_;
 
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 272249c..71c0ad9 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -57,7 +57,14 @@
   255, 255, 255, 255
 };
 
-static inline uint8_t* DecodeBase64(const char* src, size_t* dst_size) {
+// Make the Dex file version 37.
+static void MakeDexVersion37(DexFile* dex_file) {
+  size_t offset = OFFSETOF_MEMBER(DexFile::Header, magic_) + 6;
+  CHECK_EQ(*(dex_file->Begin() + offset), '5');
+  *(const_cast<uint8_t*>(dex_file->Begin()) + offset) = '7';
+}
+
+static inline std::unique_ptr<uint8_t[]> DecodeBase64(const char* src, size_t* dst_size) {
   std::vector<uint8_t> tmp;
   uint32_t t = 0, y = 0;
   int g = 3;
@@ -100,7 +107,7 @@
     *dst_size = 0;
   }
   std::copy(tmp.begin(), tmp.end(), dst.get());
-  return dst.release();
+  return dst;
 }
 
 static void FixUpChecksum(uint8_t* dex_file) {
@@ -113,33 +120,31 @@
   header->checksum_ = adler_checksum;
 }
 
-// Custom deleter. Necessary to clean up the memory we use (to be able to mutate).
-struct DexFileDeleter {
-  void operator()(DexFile* in) {
-    if (in != nullptr) {
-      delete[] in->Begin();
-      delete in;
-    }
-  }
-};
-
-using DexFileUniquePtr = std::unique_ptr<DexFile, DexFileDeleter>;
-
 class DexFileVerifierTest : public CommonRuntimeTest {
  protected:
+  DexFile* GetDexFile(const uint8_t* dex_bytes, size_t length) {
+    return new DexFile(dex_bytes, length, "tmp", 0, nullptr, nullptr);
+  }
+
   void VerifyModification(const char* dex_file_base64_content,
                           const char* location,
                           std::function<void(DexFile*)> f,
                           const char* expected_error) {
-    DexFileUniquePtr dex_file(WrapAsDexFile(dex_file_base64_content));
+    size_t length;
+    std::unique_ptr<uint8_t[]> dex_bytes = DecodeBase64(dex_file_base64_content, &length);
+    CHECK(dex_bytes != nullptr);
+    // Note: `dex_file` will be destroyed before `dex_bytes`.
+    std::unique_ptr<DexFile> dex_file(GetDexFile(dex_bytes.get(), length));
     f(dex_file.get());
     FixUpChecksum(const_cast<uint8_t*>(dex_file->Begin()));
 
+    static constexpr bool kVerifyChecksum = true;
     std::string error_msg;
     bool success = DexFileVerifier::Verify(dex_file.get(),
                                            dex_file->Begin(),
                                            dex_file->Size(),
                                            location,
+                                           kVerifyChecksum,
                                            &error_msg);
     if (expected_error == nullptr) {
       EXPECT_TRUE(success) << error_msg;
@@ -150,15 +155,6 @@
       }
     }
   }
-
- private:
-  static DexFile* WrapAsDexFile(const char* dex_file_content_in_base_64) {
-    // Decode base64.
-    size_t length;
-    uint8_t* dex_bytes = DecodeBase64(dex_file_content_in_base_64, &length);
-    CHECK(dex_bytes != nullptr);
-    return new DexFile(dex_bytes, length, "tmp", 0, nullptr, nullptr);
-  }
 };
 
 static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64,
@@ -184,7 +180,7 @@
   // read dex file
   ScopedObjectAccess soa(Thread::Current());
   std::vector<std::unique_ptr<const DexFile>> tmp;
-  bool success = DexFile::Open(location, location, error_msg, &tmp);
+  bool success = DexFile::Open(location, location, true, error_msg, &tmp);
   CHECK(success) << error_msg;
   EXPECT_EQ(1U, tmp.size());
   std::unique_ptr<const DexFile> dex_file = std::move(tmp[0]);
@@ -193,6 +189,12 @@
   return dex_file;
 }
 
+// To generate a base64 encoded Dex file (such as kGoodTestDex, below)
+// from Smali files, use:
+//
+//   smali -o classes.dex class1.smali [class2.smali ...]
+//   base64 classes.dex >classes.dex.base64
+
 // For reference.
 static const char kGoodTestDex[] =
     "ZGV4CjAzNQDrVbyVkxX1HljTznNf95AglkUAhQuFtmKkAgAAcAAAAHhWNBIAAAAAAAAAAAQCAAAN"
@@ -290,7 +292,9 @@
 // Find the method data for the first method with the given name (from class 0). Note: the pointer
 // is to the access flags, so that the caller doesn't have to handle the leb128-encoded method-index
 // delta.
-static const uint8_t* FindMethodData(const DexFile* dex_file, const char* name) {
+static const uint8_t* FindMethodData(const DexFile* dex_file,
+                                     const char* name,
+                                     /*out*/ uint32_t* method_idx = nullptr) {
   const DexFile::ClassDef& class_def = dex_file->GetClassDef(0);
   const uint8_t* class_data = dex_file->GetClassData(class_def);
 
@@ -316,6 +320,9 @@
     const DexFile::StringId& string_id = dex_file->GetStringId(name_index);
     const char* str = dex_file->GetStringData(string_id);
     if (strcmp(name, str) == 0) {
+      if (method_idx != nullptr) {
+        *method_idx = method_index;
+      }
       DecodeUnsignedLeb128(&trailing);
       return trailing;
     }
@@ -449,6 +456,7 @@
         kMethodFlagsTestDex,
         "method_flags_constructor_native_nocode",
         [&](DexFile* dex_file) {
+          MakeDexVersion37(dex_file);
           ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
           ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
 
@@ -461,6 +469,7 @@
         kMethodFlagsTestDex,
         "method_flags_constructor_abstract_nocode",
         [&](DexFile* dex_file) {
+          MakeDexVersion37(dex_file);
           ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
           ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
 
@@ -521,13 +530,14 @@
       kMethodFlagsTestDex,
       "init_not_allowed_flags",
       [&](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
         ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
         ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
 
         ApplyMaskToMethodFlags(dex_file, "<init>", ~kAccPublic);
         OrMaskToMethodFlags(dex_file, "<init>", kAccStatic);
       },
-      "Constructor 1 is not flagged correctly wrt/ static");
+      "Constructor 1(LMethodFlags;.<init>) is not flagged correctly wrt/ static");
   static constexpr uint32_t kInitNotAllowed[] = {
       kAccFinal,
       kAccSynchronized,
@@ -544,7 +554,7 @@
           ApplyMaskToMethodFlags(dex_file, "<init>", ~kAccPublic);
           OrMaskToMethodFlags(dex_file, "<init>", kInitNotAllowed[i]);
         },
-        "Constructor 1 flagged inappropriately");
+        "Constructor 1(LMethodFlags;.<init>) flagged inappropriately");
   }
 }
 
@@ -683,34 +693,25 @@
   }
 }
 
+TEST_F(DexFileVerifierTest, B28552165) {
+  // Regression test for bad error string retrieval in different situations.
+  // Using invalid access flags to trigger the error.
+  VerifyModification(
+      kMethodFlagsTestDex,
+      "b28552165",
+      [](DexFile* dex_file) {
+        OrMaskToMethodFlags(dex_file, "foo", kAccPublic | kAccProtected);
+        uint32_t method_idx;
+        FindMethodData(dex_file, "foo", &method_idx);
+        auto* method_id = const_cast<DexFile::MethodId*>(&dex_file->GetMethodId(method_idx));
+        method_id->name_idx_ = dex_file->NumStringIds();
+      },
+      "Method may have only one of public/protected/private, LMethodFlags;.(error)");
+}
+
 // Set of dex files for interface method tests. As it's not as easy to mutate method names, it's
 // just easier to break up bad cases.
 
-// Interface with an instance constructor.
-//
-// .class public interface LInterfaceMethodFlags;
-// .super Ljava/lang/Object;
-//
-// .method public static constructor <clinit>()V
-// .registers 1
-//     return-void
-// .end method
-//
-// .method public constructor <init>()V
-// .registers 1
-//     return-void
-// .end method
-static const char kMethodFlagsInterfaceWithInit[] =
-    "ZGV4CjAzNQDRNt+hZ6X3I+xe66iVlCW7h9I38HmN4SvUAQAAcAAAAHhWNBIAAAAAAAAAAEwBAAAF"
-    "AAAAcAAAAAMAAACEAAAAAQAAAJAAAAAAAAAAAAAAAAIAAACcAAAAAQAAAKwAAAAIAQAAzAAAAMwA"
-    "AADWAAAA3gAAAPYAAAAKAQAAAgAAAAMAAAAEAAAABAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAQAA"
-    "AAAAAAABAgAAAQAAAAAAAAD/////AAAAADoBAAAAAAAACDxjbGluaXQ+AAY8aW5pdD4AFkxJbnRl"
-    "cmZhY2VNZXRob2RGbGFnczsAEkxqYXZhL2xhbmcvT2JqZWN0OwABVgAAAAAAAAAAAQAAAAAAAAAA"
-    "AAAAAQAAAA4AAAABAAEAAAAAAAAAAAABAAAADgAAAAIAAImABJQCAYGABKgCAAALAAAAAAAAAAEA"
-    "AAAAAAAAAQAAAAUAAABwAAAAAgAAAAMAAACEAAAAAwAAAAEAAACQAAAABQAAAAIAAACcAAAABgAA"
-    "AAEAAACsAAAAAiAAAAUAAADMAAAAAxAAAAEAAAAQAQAAASAAAAIAAAAUAQAAACAAAAEAAAA6AQAA"
-    "ABAAAAEAAABMAQAA";
-
 // Standard interface. Use declared-synchronized again for 3B encoding.
 //
 // .class public interface LInterfaceMethodFlags;
@@ -751,13 +752,6 @@
 }
 
 TEST_F(DexFileVerifierTest, MethodAccessFlagsInterfaces) {
-  // Reject interface with <init>.
-  VerifyModification(
-      kMethodFlagsInterfaceWithInit,
-      "method_flags_interface_with_init",
-      [](DexFile* dex_file ATTRIBUTE_UNUSED) {},
-      "Non-clinit interface method 1 should not have code");
-
   VerifyModification(
       kMethodFlagsInterface,
       "method_flags_interface_ok",
@@ -765,6 +759,14 @@
         ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
       },
       nullptr);
+  VerifyModification(
+      kMethodFlagsInterface,
+      "method_flags_interface_ok37",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+      },
+      nullptr);
 
   VerifyModification(
       kMethodFlagsInterface,
@@ -774,7 +776,18 @@
 
         ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
       },
-      "Interface method 1 is not public and abstract");
+      nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+  VerifyModification(
+      kMethodFlagsInterface,
+      "method_flags_interface_non_public",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
+      },
+      "Interface virtual method 1(LInterfaceMethodFlags;.foo) is not public");
+
   VerifyModification(
       kMethodFlagsInterface,
       "method_flags_interface_non_abstract",
@@ -783,7 +796,7 @@
 
         ApplyMaskToMethodFlags(dex_file, "foo", ~kAccAbstract);
       },
-      "Method 1 has no code, but is not marked native or abstract");
+      "Method 1(LInterfaceMethodFlags;.foo) has no code, but is not marked native or abstract");
 
   VerifyModification(
       kMethodFlagsInterface,
@@ -793,7 +806,7 @@
 
         OrMaskToMethodFlags(dex_file, "foo", kAccStatic);
       },
-      "Direct/virtual method 1 not in expected list 0");
+      "Direct/virtual method 1(LInterfaceMethodFlags;.foo) not in expected list 0");
   VerifyModification(
       kMethodFlagsInterface,
       "method_flags_interface_private",
@@ -803,7 +816,7 @@
         ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
         OrMaskToMethodFlags(dex_file, "foo", kAccPrivate);
       },
-      "Direct/virtual method 1 not in expected list 0");
+      "Direct/virtual method 1(LInterfaceMethodFlags;.foo) not in expected list 0");
 
   VerifyModification(
       kMethodFlagsInterface,
@@ -813,7 +826,18 @@
 
         ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
       },
-      "Interface method 1 is not public and abstract");
+      nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+  VerifyModification(
+      kMethodFlagsInterface,
+      "method_flags_interface_non_public",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
+      },
+      "Interface virtual method 1(LInterfaceMethodFlags;.foo) is not public");
+
   VerifyModification(
       kMethodFlagsInterface,
       "method_flags_interface_protected",
@@ -823,7 +847,18 @@
         ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
         OrMaskToMethodFlags(dex_file, "foo", kAccProtected);
       },
-      "Interface method 1 is not public and abstract");
+      nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+  VerifyModification(
+      kMethodFlagsInterface,
+      "method_flags_interface_protected",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
+        OrMaskToMethodFlags(dex_file, "foo", kAccProtected);
+      },
+      "Interface virtual method 1(LInterfaceMethodFlags;.foo) is not public");
 
   constexpr uint32_t kAllMethodFlags =
       kAccPublic |
@@ -863,7 +898,7 @@
           }
           OrMaskToMethodFlags(dex_file, "foo", mask);
         },
-        "Abstract method 1 has disallowed access flags");
+        "Abstract method 1(LInterfaceMethodFlags;.foo) has disallowed access flags");
   }
 }
 
@@ -1102,6 +1137,14 @@
         ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
       },
       nullptr);
+  VerifyModification(
+      kFieldFlagsInterfaceTestDex,
+      "field_flags_interface",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+      },
+      nullptr);
 
   VerifyModification(
       kFieldFlagsInterfaceTestDex,
@@ -1111,7 +1154,18 @@
 
         ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
       },
+      nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+  VerifyModification(
+      kFieldFlagsInterfaceTestDex,
+      "field_flags_interface_non_public",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
+      },
       "Interface field is not public final static");
+
   VerifyModification(
       kFieldFlagsInterfaceTestDex,
       "field_flags_interface_non_final",
@@ -1120,7 +1174,18 @@
 
         ApplyMaskToFieldFlags(dex_file, "foo", ~kAccFinal);
       },
+      nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+  VerifyModification(
+      kFieldFlagsInterfaceTestDex,
+      "field_flags_interface_non_final",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccFinal);
+      },
       "Interface field is not public final static");
+
   VerifyModification(
       kFieldFlagsInterfaceTestDex,
       "field_flags_interface_protected",
@@ -1130,7 +1195,19 @@
         ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
         OrMaskToFieldFlags(dex_file, "foo", kAccProtected);
       },
+      nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+  VerifyModification(
+      kFieldFlagsInterfaceTestDex,
+      "field_flags_interface_protected",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
+        OrMaskToFieldFlags(dex_file, "foo", kAccProtected);
+      },
       "Interface field is not public final static");
+
   VerifyModification(
       kFieldFlagsInterfaceTestDex,
       "field_flags_interface_private",
@@ -1140,6 +1217,17 @@
         ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
         OrMaskToFieldFlags(dex_file, "foo", kAccPrivate);
       },
+      nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+  VerifyModification(
+      kFieldFlagsInterfaceTestDex,
+      "field_flags_interface_private",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
+        OrMaskToFieldFlags(dex_file, "foo", kAccPrivate);
+      },
       "Interface field is not public final static");
 
   VerifyModification(
@@ -1184,6 +1272,21 @@
           }
           OrMaskToFieldFlags(dex_file, "foo", mask);
         },
+        nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+    VerifyModification(
+        kFieldFlagsInterfaceTestDex,
+        "field_flags_interface_disallowed",
+        [&](DexFile* dex_file) {
+          MakeDexVersion37(dex_file);
+          ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+          uint32_t mask = ApplyMaskShifted(kInterfaceDisallowed, i);
+          if ((mask & kAccProtected) != 0) {
+            mask &= ~kAccProtected;
+            ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
+          }
+          OrMaskToFieldFlags(dex_file, "foo", mask);
+        },
         "Interface field has disallowed flag");
   }
 }
@@ -1212,6 +1315,14 @@
       [](DexFile* dex_file) {
         ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
       },
+      nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+  VerifyModification(
+      kFieldFlagsInterfaceBadTestDex,
+      "field_flags_interface_non_static",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+      },
       "Interface field is not public final static");
 }
 
@@ -1285,4 +1396,351 @@
       "DBG_START_LOCAL type_idx");
 }
 
+TEST_F(DexFileVerifierTest, SectionAlignment) {
+  {
+    // The input dex file should be good before modification. Any file is fine, as long as it
+    // uses all sections.
+    ScratchFile tmp;
+    std::string error_msg;
+    std::unique_ptr<const DexFile> raw(OpenDexFileBase64(kGoodTestDex,
+                                                         tmp.GetFilename().c_str(),
+                                                         &error_msg));
+    ASSERT_TRUE(raw.get() != nullptr) << error_msg;
+  }
+
+  // Modify all section offsets to be unaligned.
+  constexpr size_t kSections = 7;
+  for (size_t i = 0; i < kSections; ++i) {
+    VerifyModification(
+        kGoodTestDex,
+        "section_align",
+        [&](DexFile* dex_file) {
+          DexFile::Header* header = const_cast<DexFile::Header*>(
+              reinterpret_cast<const DexFile::Header*>(dex_file->Begin()));
+          uint32_t* off_ptr;
+          switch (i) {
+            case 0:
+              off_ptr = &header->map_off_;
+              break;
+            case 1:
+              off_ptr = &header->string_ids_off_;
+              break;
+            case 2:
+              off_ptr = &header->type_ids_off_;
+              break;
+            case 3:
+              off_ptr = &header->proto_ids_off_;
+              break;
+            case 4:
+              off_ptr = &header->field_ids_off_;
+              break;
+            case 5:
+              off_ptr = &header->method_ids_off_;
+              break;
+            case 6:
+              off_ptr = &header->class_defs_off_;
+              break;
+
+            static_assert(kSections == 7, "kSections is wrong");
+            default:
+              LOG(FATAL) << "Unexpected section";
+              UNREACHABLE();
+          }
+          ASSERT_TRUE(off_ptr != nullptr);
+          ASSERT_NE(*off_ptr, 0U) << i;  // Should already contain a value (in use).
+          (*off_ptr)++;                  // Add one, which should misalign it (all the sections
+                                         // above are aligned by 4).
+        },
+        "should be aligned by 4 for");
+  }
+}
+
+// Generated from
+//
+// .class LOverloading;
+//
+// .super Ljava/lang/Object;
+//
+// .method public static foo()V
+// .registers 1
+//     return-void
+// .end method
+//
+// .method public static foo(I)V
+// .registers 1
+//     return-void
+// .end method
+static const char kProtoOrderingTestDex[] =
+    "ZGV4CjAzNQA1L+ABE6voQ9Lr4Ci//efB53oGnDr5PinsAQAAcAAAAHhWNBIAAAAAAAAAAFgBAAAG"
+    "AAAAcAAAAAQAAACIAAAAAgAAAJgAAAAAAAAAAAAAAAIAAACwAAAAAQAAAMAAAAAMAQAA4AAAAOAA"
+    "AADjAAAA8gAAAAYBAAAJAQAADQEAAAAAAAABAAAAAgAAAAMAAAADAAAAAwAAAAAAAAAEAAAAAwAA"
+    "ABQBAAABAAAABQAAAAEAAQAFAAAAAQAAAAAAAAACAAAAAAAAAP////8AAAAASgEAAAAAAAABSQAN"
+    "TE92ZXJsb2FkaW5nOwASTGphdmEvbGFuZy9PYmplY3Q7AAFWAAJWSQADZm9vAAAAAQAAAAAAAAAA"
+    "AAAAAAAAAAEAAAAAAAAAAAAAAAEAAAAOAAAAAQABAAAAAAAAAAAAAQAAAA4AAAACAAAJpAIBCbgC"
+    "AAAMAAAAAAAAAAEAAAAAAAAAAQAAAAYAAABwAAAAAgAAAAQAAACIAAAAAwAAAAIAAACYAAAABQAA"
+    "AAIAAACwAAAABgAAAAEAAADAAAAAAiAAAAYAAADgAAAAARAAAAEAAAAUAQAAAxAAAAIAAAAcAQAA"
+    "ASAAAAIAAAAkAQAAACAAAAEAAABKAQAAABAAAAEAAABYAQAA";
+
+TEST_F(DexFileVerifierTest, ProtoOrdering) {
+  {
+    // The input dex file should be good before modification.
+    ScratchFile tmp;
+    std::string error_msg;
+    std::unique_ptr<const DexFile> raw(OpenDexFileBase64(kProtoOrderingTestDex,
+                                                         tmp.GetFilename().c_str(),
+                                                         &error_msg));
+    ASSERT_TRUE(raw.get() != nullptr) << error_msg;
+  }
+
+  // Modify the order of the ProtoIds for two overloads of "foo" with the
+  // same return type and one having longer parameter list than the other.
+  for (size_t i = 0; i != 2; ++i) {
+    VerifyModification(
+        kProtoOrderingTestDex,
+        "proto_ordering",
+        [i](DexFile* dex_file) {
+          uint32_t method_idx;
+          const uint8_t* data = FindMethodData(dex_file, "foo", &method_idx);
+          CHECK(data != nullptr);
+          // There should be 2 methods called "foo".
+          CHECK_LT(method_idx + 1u, dex_file->NumMethodIds());
+          CHECK_EQ(dex_file->GetMethodId(method_idx).name_idx_,
+                   dex_file->GetMethodId(method_idx + 1).name_idx_);
+          CHECK_EQ(dex_file->GetMethodId(method_idx).proto_idx_ + 1u,
+                   dex_file->GetMethodId(method_idx + 1).proto_idx_);
+          // Their return types should be the same.
+          uint32_t proto1_idx = dex_file->GetMethodId(method_idx).proto_idx_;
+          const DexFile::ProtoId& proto1 = dex_file->GetProtoId(proto1_idx);
+          const DexFile::ProtoId& proto2 = dex_file->GetProtoId(proto1_idx + 1u);
+          CHECK_EQ(proto1.return_type_idx_, proto2.return_type_idx_);
+          // And the first should not have any parameters while the second should have some.
+          CHECK(!DexFileParameterIterator(*dex_file, proto1).HasNext());
+          CHECK(DexFileParameterIterator(*dex_file, proto2).HasNext());
+          if (i == 0) {
+            // Swap the proto parameters and shorties to break the ordering.
+            std::swap(const_cast<uint32_t&>(proto1.parameters_off_),
+                      const_cast<uint32_t&>(proto2.parameters_off_));
+            std::swap(const_cast<uint32_t&>(proto1.shorty_idx_),
+                      const_cast<uint32_t&>(proto2.shorty_idx_));
+          } else {
+            // Copy the proto parameters and shorty to create duplicate proto id.
+            const_cast<uint32_t&>(proto1.parameters_off_) = proto2.parameters_off_;
+            const_cast<uint32_t&>(proto1.shorty_idx_) = proto2.shorty_idx_;
+          }
+        },
+        "Out-of-order proto_id arguments");
+  }
+}
+
+// To generate a base64 encoded Dex file version 037 from Smali files, use:
+//
+//   smali --api-level 24 -o classes.dex class1.smali [class2.smali ...]
+//   base64 classes.dex >classes.dex.base64
+
+// Dex file version 037 generated from:
+//
+//   .class public LB28685551;
+//   .super LB28685551;
+
+static const char kClassExtendsItselfTestDex[] =
+    "ZGV4CjAzNwDeGbgRg1kb6swszpcTWrrOAALB++F4OPT0AAAAcAAAAHhWNBIAAAAAAAAAAKgAAAAB"
+    "AAAAcAAAAAEAAAB0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAHgAAABcAAAAmAAAAJgA"
+    "AAAAAAAAAAAAAAEAAAAAAAAAAAAAAP////8AAAAAAAAAAAAAAAALTEIyODY4NTU1MTsAAAAABgAA"
+    "AAAAAAABAAAAAAAAAAEAAAABAAAAcAAAAAIAAAABAAAAdAAAAAYAAAABAAAAeAAAAAIgAAABAAAA"
+    "mAAAAAAQAAABAAAAqAAAAA==";
+
+TEST_F(DexFileVerifierTest, ClassExtendsItself) {
+  VerifyModification(
+      kClassExtendsItselfTestDex,
+      "class_extends_itself",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Class with same type idx as its superclass: '0'");
+}
+
+// Dex file version 037 generated from:
+//
+//   .class public LFoo;
+//   .super LBar;
+//
+// and:
+//
+//    .class public LBar;
+//    .super LFoo;
+
+static const char kClassesExtendOneAnotherTestDex[] =
+    "ZGV4CjAzNwBXHSrwpDMwRBkg+L+JeQCuFNRLhQ86duEcAQAAcAAAAHhWNBIAAAAAAAAAANAAAAAC"
+    "AAAAcAAAAAIAAAB4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAIAAAABcAAAAwAAAAMAA"
+    "AADHAAAAAAAAAAEAAAABAAAAAQAAAAAAAAAAAAAA/////wAAAAAAAAAAAAAAAAAAAAABAAAAAQAA"
+    "AAAAAAD/////AAAAAAAAAAAAAAAABUxCYXI7AAVMRm9vOwAAAAYAAAAAAAAAAQAAAAAAAAABAAAA"
+    "AgAAAHAAAAACAAAAAgAAAHgAAAAGAAAAAgAAAIAAAAACIAAAAgAAAMAAAAAAEAAAAQAAANAAAAA=";
+
+TEST_F(DexFileVerifierTest, ClassesExtendOneAnother) {
+  VerifyModification(
+      kClassesExtendOneAnotherTestDex,
+      "classes_extend_one_another",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Invalid class definition ordering: class with type idx: '1' defined before"
+      " superclass with type idx: '0'");
+}
+
+// Dex file version 037 generated from:
+//
+//   .class public LAll;
+//   .super LYour;
+//
+// and:
+//
+//   .class public LYour;
+//   .super LBase;
+//
+// and:
+//
+//   .class public LBase;
+//   .super LAll;
+
+static const char kCircularClassInheritanceTestDex[] =
+    "ZGV4CjAzNwBMJxgP0SJz6oLXnKfl+J7lSEORLRwF5LNMAQAAcAAAAHhWNBIAAAAAAAAAAAABAAAD"
+    "AAAAcAAAAAMAAAB8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAIgAAABkAAAA6AAAAOgA"
+    "AADvAAAA9wAAAAAAAAABAAAAAgAAAAEAAAABAAAAAAAAAAAAAAD/////AAAAAAAAAAAAAAAAAgAA"
+    "AAEAAAABAAAAAAAAAP////8AAAAAAAAAAAAAAAAAAAAAAQAAAAIAAAAAAAAA/////wAAAAAAAAAA"
+    "AAAAAAVMQWxsOwAGTEJhc2U7AAZMWW91cjsAAAYAAAAAAAAAAQAAAAAAAAABAAAAAwAAAHAAAAAC"
+    "AAAAAwAAAHwAAAAGAAAAAwAAAIgAAAACIAAAAwAAAOgAAAAAEAAAAQAAAAABAAA=";
+
+TEST_F(DexFileVerifierTest, CircularClassInheritance) {
+  VerifyModification(
+      kCircularClassInheritanceTestDex,
+      "circular_class_inheritance",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Invalid class definition ordering: class with type idx: '1' defined before"
+      " superclass with type idx: '0'");
+}
+
+// Dex file version 037 generated from:
+//
+//   .class public abstract interface LInterfaceImplementsItself;
+//   .super Ljava/lang/Object;
+//   .implements LInterfaceImplementsItself;
+
+static const char kInterfaceImplementsItselfTestDex[] =
+    "ZGV4CjAzNwCKKrjatp8XbXl5S/bEVJnqaBhjZkQY4440AQAAcAAAAHhWNBIAAAAAAAAAANwAAAAC"
+    "AAAAcAAAAAIAAAB4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAIAAAACUAAAAoAAAAKAA"
+    "AAC9AAAAAAAAAAEAAAAAAAAAAQYAAAEAAADUAAAA/////wAAAAAAAAAAAAAAABtMSW50ZXJmYWNl"
+    "SW1wbGVtZW50c0l0c2VsZjsAEkxqYXZhL2xhbmcvT2JqZWN0OwAAAAABAAAAAAAAAAcAAAAAAAAA"
+    "AQAAAAAAAAABAAAAAgAAAHAAAAACAAAAAgAAAHgAAAAGAAAAAQAAAIAAAAACIAAAAgAAAKAAAAAB"
+    "EAAAAQAAANQAAAAAEAAAAQAAANwAAAA=";
+
+TEST_F(DexFileVerifierTest, InterfaceImplementsItself) {
+  VerifyModification(
+      kInterfaceImplementsItselfTestDex,
+      "interface_implements_itself",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Class with same type idx as implemented interface: '0'");
+}
+
+// Dex file version 037 generated from:
+//
+//   .class public abstract interface LPing;
+//   .super Ljava/lang/Object;
+//   .implements LPong;
+//
+// and:
+//
+//   .class public abstract interface LPong;
+//   .super Ljava/lang/Object;
+//   .implements LPing;
+
+static const char kInterfacesImplementOneAnotherTestDex[] =
+    "ZGV4CjAzNwD0Kk9sxlYdg3Dy1Cff0gQCuJAQfEP6ohZUAQAAcAAAAHhWNBIAAAAAAAAAAPwAAAAD"
+    "AAAAcAAAAAMAAAB8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAIgAAACMAAAAyAAAAMgA"
+    "AADQAAAA2AAAAAAAAAABAAAAAgAAAAEAAAABBgAAAgAAAOwAAAD/////AAAAAAAAAAAAAAAAAAAA"
+    "AAEGAAACAAAA9AAAAP////8AAAAAAAAAAAAAAAAGTFBpbmc7AAZMUG9uZzsAEkxqYXZhL2xhbmcv"
+    "T2JqZWN0OwABAAAAAAAAAAEAAAABAAAABwAAAAAAAAABAAAAAAAAAAEAAAADAAAAcAAAAAIAAAAD"
+    "AAAAfAAAAAYAAAACAAAAiAAAAAIgAAADAAAAyAAAAAEQAAACAAAA7AAAAAAQAAABAAAA/AAAAA==";
+
+TEST_F(DexFileVerifierTest, InterfacesImplementOneAnother) {
+  VerifyModification(
+      kInterfacesImplementOneAnotherTestDex,
+      "interfaces_implement_one_another",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Invalid class definition ordering: class with type idx: '1' defined before"
+      " implemented interface with type idx: '0'");
+}
+
+// Dex file version 037 generated from:
+//
+//   .class public abstract interface LA;
+//   .super Ljava/lang/Object;
+//   .implements LB;
+//
+// and:
+//
+//   .class public abstract interface LB;
+//   .super Ljava/lang/Object;
+//   .implements LC;
+//
+// and:
+//
+//   .class public abstract interface LC;
+//   .super Ljava/lang/Object;
+//   .implements LA;
+
+static const char kCircularInterfaceImplementationTestDex[] =
+    "ZGV4CjAzNwCzKmD5Fol6XAU6ichYHcUTIP7Z7MdTcEmEAQAAcAAAAHhWNBIAAAAAAAAAACwBAAAE"
+    "AAAAcAAAAAQAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAJAAAACUAAAA8AAAAPAA"
+    "AAD1AAAA+gAAAP8AAAAAAAAAAQAAAAIAAAADAAAAAgAAAAEGAAADAAAAHAEAAP////8AAAAAAAAA"
+    "AAAAAAABAAAAAQYAAAMAAAAUAQAA/////wAAAAAAAAAAAAAAAAAAAAABBgAAAwAAACQBAAD/////"
+    "AAAAAAAAAAAAAAAAA0xBOwADTEI7AANMQzsAEkxqYXZhL2xhbmcvT2JqZWN0OwAAAQAAAAIAAAAB"
+    "AAAAAAAAAAEAAAABAAAABwAAAAAAAAABAAAAAAAAAAEAAAAEAAAAcAAAAAIAAAAEAAAAgAAAAAYA"
+    "AAADAAAAkAAAAAIgAAAEAAAA8AAAAAEQAAADAAAAFAEAAAAQAAABAAAALAEAAA==";
+
+TEST_F(DexFileVerifierTest, CircularInterfaceImplementation) {
+  VerifyModification(
+      kCircularInterfaceImplementationTestDex,
+      "circular_interface_implementation",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Invalid class definition ordering: class with type idx: '2' defined before"
+      " implemented interface with type idx: '0'");
+}
+
+TEST_F(DexFileVerifierTest, Checksum) {
+  size_t length;
+  std::unique_ptr<uint8_t[]> dex_bytes = DecodeBase64(kGoodTestDex, &length);
+  CHECK(dex_bytes != nullptr);
+  // Note: `dex_file` will be destroyed before `dex_bytes`.
+  std::unique_ptr<DexFile> dex_file(GetDexFile(dex_bytes.get(), length));
+  std::string error_msg;
+
+  // Good checksum: all pass.
+  EXPECT_TRUE(DexFileVerifier::Verify(dex_file.get(),
+                                      dex_file->Begin(),
+                                      dex_file->Size(),
+                                       "good checksum, no verify",
+                                      /*verify_checksum*/ false,
+                                      &error_msg));
+  EXPECT_TRUE(DexFileVerifier::Verify(dex_file.get(),
+                                      dex_file->Begin(),
+                                      dex_file->Size(),
+                                      "good checksum, verify",
+                                      /*verify_checksum*/ true,
+                                      &error_msg));
+
+  // Bad checksum: !verify_checksum passes verify_checksum fails.
+  DexFile::Header* header = reinterpret_cast<DexFile::Header*>(
+      const_cast<uint8_t*>(dex_file->Begin()));
+  header->checksum_ = 0;
+  EXPECT_TRUE(DexFileVerifier::Verify(dex_file.get(),
+                                      dex_file->Begin(),
+                                      dex_file->Size(),
+                                      "bad checksum, no verify",
+                                      /*verify_checksum*/ false,
+                                      &error_msg));
+  EXPECT_FALSE(DexFileVerifier::Verify(dex_file.get(),
+                                       dex_file->Begin(),
+                                       dex_file->Size(),
+                                       "bad checksum, verify",
+                                       /*verify_checksum*/ true,
+                                       &error_msg));
+  EXPECT_NE(error_msg.find("Bad checksum"), std::string::npos) << error_msg;
+}
+
 }  // namespace art
diff --git a/runtime/dex_instruction-inl.h b/runtime/dex_instruction-inl.h
index e160a10..f6ed1f0 100644
--- a/runtime/dex_instruction-inl.h
+++ b/runtime/dex_instruction-inl.h
@@ -49,6 +49,8 @@
     case k32x: return true;
     case k35c: return true;
     case k3rc: return true;
+    case k45cc: return true;
+    case k4rcc: return true;
     case k51l: return true;
     default: return false;
   }
@@ -79,6 +81,8 @@
     case k32x: return VRegA_32x();
     case k35c: return VRegA_35c();
     case k3rc: return VRegA_3rc();
+    case k45cc: return VRegA_45cc();
+    case k4rcc: return VRegA_4rcc();
     case k51l: return VRegA_51l();
     default:
       LOG(FATAL) << "Tried to access vA of instruction " << Name() << " which has no A operand.";
@@ -206,6 +210,16 @@
   return InstAA(inst_data);
 }
 
+inline uint4_t Instruction::VRegA_45cc(uint16_t inst_data) const {
+  DCHECK_EQ(FormatOf(Opcode()), k45cc);
+  return InstB(inst_data);  // This is labeled A in the spec.
+}
+
+inline uint8_t Instruction::VRegA_4rcc(uint16_t inst_data) const {
+  DCHECK_EQ(FormatOf(Opcode()), k4rcc);
+  return InstAA(inst_data);
+}
+
 //------------------------------------------------------------------------------
 // VRegB
 //------------------------------------------------------------------------------
@@ -223,13 +237,14 @@
     case k22t: return true;
     case k22x: return true;
     case k23x: return true;
-    case k25x: return true;
     case k31c: return true;
     case k31i: return true;
     case k31t: return true;
     case k32x: return true;
     case k35c: return true;
     case k3rc: return true;
+    case k45cc: return true;
+    case k4rcc: return true;
     case k51l: return true;
     default: return false;
   }
@@ -253,13 +268,14 @@
     case k22t: return VRegB_22t();
     case k22x: return VRegB_22x();
     case k23x: return VRegB_23x();
-    case k25x: return VRegB_25x();
     case k31c: return VRegB_31c();
     case k31i: return VRegB_31i();
     case k31t: return VRegB_31t();
     case k32x: return VRegB_32x();
     case k35c: return VRegB_35c();
     case k3rc: return VRegB_3rc();
+    case k45cc: return VRegB_45cc();
+    case k4rcc: return VRegB_4rcc();
     case k51l: return VRegB_51l();
     default:
       LOG(FATAL) << "Tried to access vB of instruction " << Name() << " which has no B operand.";
@@ -331,12 +347,6 @@
   return static_cast<uint8_t>(Fetch16(1) & 0xff);
 }
 
-// Number of additional registers in this instruction. # of var arg registers = this value + 1.
-inline uint4_t Instruction::VRegB_25x() const {
-  DCHECK_EQ(FormatOf(Opcode()), k25x);
-  return InstB(Fetch16(0));
-}
-
 inline uint32_t Instruction::VRegB_31c() const {
   DCHECK_EQ(FormatOf(Opcode()), k31c);
   return Fetch32(1);
@@ -367,6 +377,16 @@
   return Fetch16(1);
 }
 
+inline uint16_t Instruction::VRegB_45cc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k45cc);
+  return Fetch16(1);
+}
+
+inline uint16_t Instruction::VRegB_4rcc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k4rcc);
+  return Fetch16(1);
+}
+
 inline uint64_t Instruction::VRegB_51l() const {
   DCHECK_EQ(FormatOf(Opcode()), k51l);
   uint64_t vB_wide = Fetch32(1) | ((uint64_t) Fetch32(3) << 32);
@@ -383,9 +403,10 @@
     case k22s: return true;
     case k22t: return true;
     case k23x: return true;
-    case k25x: return true;
     case k35c: return true;
     case k3rc: return true;
+    case k45cc: return true;
+    case k4rcc: return true;
     default: return false;
   }
 }
@@ -397,9 +418,10 @@
     case k22s: return VRegC_22s();
     case k22t: return VRegC_22t();
     case k23x: return VRegC_23x();
-    case k25x: return VRegC_25x();
     case k35c: return VRegC_35c();
     case k3rc: return VRegC_3rc();
+    case k45cc: return VRegC_45cc();
+    case k4rcc: return VRegC_4rcc();
     default:
       LOG(FATAL) << "Tried to access vC of instruction " << Name() << " which has no C operand.";
       exit(EXIT_FAILURE);
@@ -431,11 +453,6 @@
   return static_cast<uint8_t>(Fetch16(1) >> 8);
 }
 
-inline uint4_t Instruction::VRegC_25x() const {
-  DCHECK_EQ(FormatOf(Opcode()), k25x);
-  return static_cast<uint4_t>(Fetch16(1) & 0xf);
-}
-
 inline uint4_t Instruction::VRegC_35c() const {
   DCHECK_EQ(FormatOf(Opcode()), k35c);
   return static_cast<uint4_t>(Fetch16(2) & 0x0f);
@@ -446,81 +463,53 @@
   return Fetch16(2);
 }
 
-inline bool Instruction::HasVarArgs35c() const {
-  return FormatOf(Opcode()) == k35c;
+inline uint4_t Instruction::VRegC_45cc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k45cc);
+  return static_cast<uint4_t>(Fetch16(2) & 0x0f);
 }
 
-inline bool Instruction::HasVarArgs25x() const {
-  return FormatOf(Opcode()) == k25x;
+inline uint16_t Instruction::VRegC_4rcc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k4rcc);
+  return Fetch16(2);
 }
 
-// Copies all of the parameter registers into the arg array. Check the length with VRegB_25x()+2.
-inline void Instruction::GetAllArgs25x(uint32_t (&arg)[kMaxVarArgRegs25x]) const {
-  DCHECK_EQ(FormatOf(Opcode()), k25x);
-
-  /*
-   * The opcode looks like this:
-   *   op vC, {vD, vE, vF, vG}
-   *
-   *  and vB is the (implicit) register count (0-4) which denotes how far from vD to vG to read.
-   *
-   *  vC is always present, so with "op vC, {}" the register count will be 0 even though vC
-   *  is valid.
-   *
-   *  The exact semantic meanings of vC:vG is up to the instruction using the format.
-   *
-   *  Encoding drawing as a bit stream:
-   *  (Note that each uint16 is little endian, and each register takes up 4 bits)
-   *
-   *       uint16  |||   uint16
-   *   7-0     15-8    7-0   15-8
-   *  |------|-----|||-----|-----|
-   *  |opcode|vB|vG|||vD|vC|vF|vE|
-   *  |------|-----|||-----|-----|
-   */
-  uint16_t reg_list = Fetch16(1);
-  uint4_t count = VRegB_25x();
-  DCHECK_LE(count, 4U) << "Invalid arg count in 25x (" << count << ")";
-
-  /*
-   * TODO(iam): Change instruction encoding to one of:
-   *
-   * - (X) vA = args count, vB = closure register, {vC..vG} = args (25x)
-   * - (Y) vA = args count, vB = method index, {vC..vG} = args (35x)
-   *
-   * (do this in conjunction with adding verifier support for invoke-lambda)
-   */
-
-  /*
-   * Copy the argument registers into the arg[] array, and
-   * also copy the first argument into vC. (The
-   * DecodedInstruction structure doesn't have separate
-   * fields for {vD, vE, vF, vG}, so there's no need to make
-   * copies of those.) Note that all cases fall-through.
-   */
-  switch (count) {
-    case 4:
-      arg[5] = (Fetch16(0) >> 8) & 0x0f;  // vG
-      FALLTHROUGH_INTENDED;
-    case 3:
-      arg[4] = (reg_list >> 12) & 0x0f;  // vF
-      FALLTHROUGH_INTENDED;
-    case 2:
-      arg[3] = (reg_list >> 8) & 0x0f;  // vE
-      FALLTHROUGH_INTENDED;
-    case 1:
-      arg[2] = (reg_list >> 4) & 0x0f;  // vD
-      FALLTHROUGH_INTENDED;
-    default:  // case 0
-      // The required lambda 'this' is actually a pair, but the pair is implicit.
-      arg[0] = VRegC_25x();  // vC
-      arg[1] = arg[0] + 1;   // vC + 1
-      break;
+//------------------------------------------------------------------------------
+// VRegH
+//------------------------------------------------------------------------------
+inline bool Instruction::HasVRegH() const {
+  switch (FormatOf(Opcode())) {
+    case k45cc: return true;
+    case k4rcc: return true;
+    default : return false;
   }
 }
 
+inline int32_t Instruction::VRegH() const {
+  switch (FormatOf(Opcode())) {
+    case k45cc: return VRegH_45cc();
+    case k4rcc: return VRegH_4rcc();
+    default :
+      LOG(FATAL) << "Tried to access vH of instruction " << Name() << " which has no H operand.";
+      exit(EXIT_FAILURE);
+  }
+}
+
+inline uint16_t Instruction::VRegH_45cc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k45cc);
+  return Fetch16(3);
+}
+
+inline uint16_t Instruction::VRegH_4rcc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k4rcc);
+  return Fetch16(3);
+}
+
+inline bool Instruction::HasVarArgs() const {
+  return (FormatOf(Opcode()) == k35c) || (FormatOf(Opcode()) == k45cc);
+}
+
 inline void Instruction::GetVarArgs(uint32_t arg[kMaxVarArgRegs], uint16_t inst_data) const {
-  DCHECK_EQ(FormatOf(Opcode()), k35c);
+  DCHECK(HasVarArgs());
 
   /*
    * Note that the fields mentioned in the spec don't appear in
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 5250b0d..c31d236 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -28,7 +28,7 @@
 namespace art {
 
 const char* const Instruction::kInstructionNames[] = {
-#define INSTRUCTION_NAME(o, c, pname, f, r, i, a, v) pname,
+#define INSTRUCTION_NAME(o, c, pname, f, i, a, v) pname,
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_NAME)
 #undef DEX_INSTRUCTION_LIST
@@ -36,7 +36,7 @@
 };
 
 Instruction::Format const Instruction::kInstructionFormats[] = {
-#define INSTRUCTION_FORMAT(o, c, p, format, r, i, a, v) format,
+#define INSTRUCTION_FORMAT(o, c, p, format, i, a, v) format,
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_FORMAT)
 #undef DEX_INSTRUCTION_LIST
@@ -44,7 +44,7 @@
 };
 
 Instruction::IndexType const Instruction::kInstructionIndexTypes[] = {
-#define INSTRUCTION_INDEX_TYPE(o, c, p, f, r, index, a, v) index,
+#define INSTRUCTION_INDEX_TYPE(o, c, p, f, index, a, v) index,
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_INDEX_TYPE)
 #undef DEX_INSTRUCTION_LIST
@@ -52,7 +52,7 @@
 };
 
 int const Instruction::kInstructionFlags[] = {
-#define INSTRUCTION_FLAGS(o, c, p, f, r, i, flags, v) flags,
+#define INSTRUCTION_FLAGS(o, c, p, f, i, flags, v) flags,
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_FLAGS)
 #undef DEX_INSTRUCTION_LIST
@@ -60,7 +60,7 @@
 };
 
 int const Instruction::kInstructionVerifyFlags[] = {
-#define INSTRUCTION_VERIFY_FLAGS(o, c, p, f, r, i, a, vflags) vflags,
+#define INSTRUCTION_VERIFY_FLAGS(o, c, p, f, i, a, vflags) vflags,
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_VERIFY_FLAGS)
 #undef DEX_INSTRUCTION_LIST
@@ -68,12 +68,13 @@
 };
 
 int const Instruction::kInstructionSizeInCodeUnits[] = {
-#define INSTRUCTION_SIZE(opcode, c, p, format, r, i, a, v) \
-    ((opcode == NOP)                        ? -1 : \
-     ((format >= k10x) && (format <= k10t)) ?  1 : \
-     ((format >= k20t) && (format <= k25x)) ?  2 : \
-     ((format >= k32x) && (format <= k3rc)) ?  3 : \
-      (format == k51l)                      ?  5 : -1),
+#define INSTRUCTION_SIZE(opcode, c, p, format, i, a, v) \
+    (((opcode) == NOP) ? -1 : \
+     (((format) >= k10x) && ((format) <= k10t)) ?  1 : \
+     (((format) >= k20t) && ((format) <= k22c)) ?  2 : \
+     (((format) >= k32x) && ((format) <= k3rc)) ?  3 : \
+     (((format) >= k45cc) && ((format) <= k4rcc)) ? 4 : \
+      ((format) == k51l) ?  5 : -1),
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_SIZE)
 #undef DEX_INSTRUCTION_LIST
@@ -189,8 +190,17 @@
         case CONST_STRING:
           if (file != nullptr) {
             uint32_t string_idx = VRegB_21c();
-            os << StringPrintf("const-string v%d, %s // string@%d", VRegA_21c(),
-                               PrintableString(file->StringDataByIdx(string_idx)).c_str(), string_idx);
+            if (string_idx < file->NumStringIds()) {
+              os << StringPrintf("const-string v%d, %s // string@%d",
+                                 VRegA_21c(),
+                                 PrintableString(file->StringDataByIdx(string_idx)).c_str(),
+                                 string_idx);
+            } else {
+              os << StringPrintf("const-string v%d, <<invalid-string-idx-%d>> // string@%d",
+                                 VRegA_21c(),
+                                 string_idx,
+                                 string_idx);
+            }
             break;
           }
           FALLTHROUGH_INTENDED;
@@ -232,14 +242,6 @@
             break;
           }
           FALLTHROUGH_INTENDED;
-        case CREATE_LAMBDA:
-          if (file != nullptr) {
-            uint32_t method_idx = VRegB_21c();
-            os << opcode << " v" << static_cast<int>(VRegA_21c()) << ", " << PrettyMethod(method_idx, *file, true)
-               << " // method@" << method_idx;
-            break;
-          }
-          FALLTHROUGH_INTENDED;
         default:
           os << StringPrintf("%s v%d, thing@%d", opcode, VRegA_21c(), VRegB_21c());
           break;
@@ -320,26 +322,6 @@
       }
       break;
     }
-    case k25x: {
-      if (Opcode() == INVOKE_LAMBDA) {
-        uint32_t arg[kMaxVarArgRegs25x];
-        GetAllArgs25x(arg);
-        const size_t num_extra_var_args = VRegB_25x();
-        DCHECK_LE(num_extra_var_args + 2, arraysize(arg));
-
-        // invoke-lambda vC, {vD, vE, vF, vG}
-        os << opcode << " v" << arg[0] << ", {";
-        for (size_t i = 0; i < num_extra_var_args; ++i) {
-          if (i != 0) {
-            os << ", ";
-          }
-          os << "v" << arg[i+2];  // Don't print the pair of vC registers. Pair is implicit.
-        }
-        os << "}";
-        break;
-      }
-      FALLTHROUGH_INTENDED;
-    }
     case k32x:  os << StringPrintf("%s v%d, v%d", opcode, VRegA_32x(), VRegB_32x()); break;
     case k30t:  os << StringPrintf("%s %+d", opcode, VRegA_30t()); break;
     case k31t:  os << StringPrintf("%s v%d, %+d", opcode, VRegA_31t(), VRegB_31t()); break;
@@ -348,9 +330,19 @@
       if (Opcode() == CONST_STRING_JUMBO) {
         uint32_t string_idx = VRegB_31c();
         if (file != nullptr) {
-          os << StringPrintf("%s v%d, %s // string@%d", opcode, VRegA_31c(),
-                             PrintableString(file->StringDataByIdx(string_idx)).c_str(),
-                             string_idx);
+          if (string_idx < file->NumStringIds()) {
+            os << StringPrintf("%s v%d, %s // string@%d",
+                               opcode,
+                               VRegA_31c(),
+                               PrintableString(file->StringDataByIdx(string_idx)).c_str(),
+                               string_idx);
+          } else {
+            os << StringPrintf("%s v%d, <<invalid-string-idx-%d>> // string@%d",
+                               opcode,
+                               VRegA_31c(),
+                               string_idx,
+                               string_idx);
+          }
         } else {
           os << StringPrintf("%s v%d, string@%d", opcode, VRegA_31c(), string_idx);
         }
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index 2871f76..f437fde 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -80,7 +80,7 @@
   };
 
   enum Code {  // private marker to avoid generate-operator-out.py from processing.
-#define INSTRUCTION_ENUM(opcode, cname, p, f, r, i, a, v) cname = opcode,
+#define INSTRUCTION_ENUM(opcode, cname, p, f, i, a, v) cname = (opcode),
 #include "dex_instruction_list.h"
     DEX_INSTRUCTION_LIST(INSTRUCTION_ENUM)
 #undef DEX_INSTRUCTION_LIST
@@ -105,7 +105,6 @@
     k22t,  // op vA, vB, +CCCC
     k22s,  // op vA, vB, #+CCCC
     k22c,  // op vA, vB, thing@CCCC
-    k25x,  // op vC, {vD, vE, vF, vG} (B: count)
     k32x,  // op vAAAA, vBBBB
     k30t,  // op +AAAAAAAA
     k31t,  // op vAA, +BBBBBBBB
@@ -113,6 +112,15 @@
     k31c,  // op vAA, thing@BBBBBBBB
     k35c,  // op {vC, vD, vE, vF, vG}, thing@BBBB (B: count, A: vG)
     k3rc,  // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB
+
+    // op {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH (A: count)
+    // format: AG op BBBB FEDC HHHH
+    k45cc,
+
+    // op {VCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH (AA: count)
+    // format: AA op BBBB CCCC HHHH
+    k4rcc,  // op {VCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH (AA: count)
+
     k51l,  // op vAA, #+BBBBBBBBBBBBBBBB
   };
 
@@ -180,11 +188,9 @@
     kVerifyVarArgRangeNonZero = 0x100000,
     kVerifyRuntimeOnly        = 0x200000,
     kVerifyError              = 0x400000,
-    kVerifyRegCString         = 0x800000,
   };
 
   static constexpr uint32_t kMaxVarArgRegs = 5;
-  static constexpr uint32_t kMaxVarArgRegs25x = 6;  // lambdas are 2 registers.
 
   // Returns the size (in 2 byte code units) of this instruction.
   size_t SizeInCodeUnits() const {
@@ -220,7 +226,7 @@
 
   // Returns a pointer to the instruction after this 2xx instruction in the stream.
   const Instruction* Next_2xx() const {
-    DCHECK(FormatOf(Opcode()) >= k20t && FormatOf(Opcode()) <= k25x);
+    DCHECK(FormatOf(Opcode()) >= k20t && FormatOf(Opcode()) <= k22c);
     return RelativeAt(2);
   }
 
@@ -230,6 +236,12 @@
     return RelativeAt(3);
   }
 
+  // Returns a pointer to the instruction after this 4xx instruction in the stream.
+  const Instruction* Next_4xx() const {
+    DCHECK(FormatOf(Opcode()) >= k45cc && FormatOf(Opcode()) <= k4rcc);
+    return RelativeAt(4);
+  }
+
   // Returns a pointer to the instruction after this 51l instruction in the stream.
   const Instruction* Next_51l() const {
     DCHECK(FormatOf(Opcode()) == k51l);
@@ -316,6 +328,12 @@
   uint8_t VRegA_51l() const {
     return VRegA_51l(Fetch16(0));
   }
+  uint4_t VRegA_45cc() const {
+    return VRegA_45cc(Fetch16(0));
+  }
+  uint8_t VRegA_4rcc() const {
+    return VRegA_4rcc(Fetch16(0));
+  }
 
   // The following methods return the vA operand for various instruction formats. The "inst_data"
   // parameter holds the first 16 bits of instruction which the returned value is decoded from.
@@ -340,6 +358,8 @@
   uint4_t VRegA_35c(uint16_t inst_data) const;
   uint8_t VRegA_3rc(uint16_t inst_data) const;
   uint8_t VRegA_51l(uint16_t inst_data) const;
+  uint4_t VRegA_45cc(uint16_t inst_data) const;
+  uint8_t VRegA_4rcc(uint16_t inst_data) const;
 
   // VRegB
   bool HasVRegB() const;
@@ -370,7 +390,6 @@
   }
   uint16_t VRegB_22x() const;
   uint8_t VRegB_23x() const;
-  uint4_t VRegB_25x() const;
   uint32_t VRegB_31c() const;
   int32_t VRegB_31i() const;
   int32_t VRegB_31t() const;
@@ -378,6 +397,8 @@
   uint16_t VRegB_35c() const;
   uint16_t VRegB_3rc() const;
   uint64_t VRegB_51l() const;  // vB_wide
+  uint16_t VRegB_45cc() const;
+  uint16_t VRegB_4rcc() const;
 
   // The following methods return the vB operand for all instruction formats where it is encoded in
   // the first 16 bits of instruction. The "inst_data" parameter holds these 16 bits. The returned
@@ -397,20 +418,24 @@
   int16_t VRegC_22s() const;
   int16_t VRegC_22t() const;
   uint8_t VRegC_23x() const;
-  uint4_t VRegC_25x() const;
   uint4_t VRegC_35c() const;
   uint16_t VRegC_3rc() const;
+  uint4_t VRegC_45cc() const;
+  uint16_t VRegC_4rcc() const;
+
+
+  // VRegH
+  bool HasVRegH() const;
+  int32_t VRegH() const;
+  uint16_t VRegH_45cc() const;
+  uint16_t VRegH_4rcc() const;
 
   // Fills the given array with the 'arg' array of the instruction.
-  bool HasVarArgs35c() const;
-  bool HasVarArgs25x() const;
-
-  // TODO(iam): Make this name more consistent with GetAllArgs25x by including the opcode format.
+  bool HasVarArgs() const;
   void GetVarArgs(uint32_t args[kMaxVarArgRegs], uint16_t inst_data) const;
   void GetVarArgs(uint32_t args[kMaxVarArgRegs]) const {
     return GetVarArgs(args, Fetch16(0));
   }
-  void GetAllArgs25x(uint32_t (&args)[kMaxVarArgRegs25x]) const;
 
   // Returns the opcode field of the instruction. The given "inst_data" parameter must be the first
   // 16 bits of instruction.
@@ -538,7 +563,7 @@
 
   int GetVerifyTypeArgumentC() const {
     return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegC | kVerifyRegCField |
-        kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide | kVerifyRegCString));
+        kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide));
   }
 
   int GetVerifyExtraFlags() const {
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index 9d7e0c4..e974932 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -17,264 +17,265 @@
 #ifndef ART_RUNTIME_DEX_INSTRUCTION_LIST_H_
 #define ART_RUNTIME_DEX_INSTRUCTION_LIST_H_
 
+// V(opcode, instruction_code, name, format, index, flags, verifier_flags);
 #define DEX_INSTRUCTION_LIST(V) \
-  V(0x00, NOP, "nop", k10x, false, kIndexNone, kContinue, kVerifyNone) \
-  V(0x01, MOVE, "move", k12x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x02, MOVE_FROM16, "move/from16", k22x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x03, MOVE_16, "move/16", k32x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x04, MOVE_WIDE, "move-wide", k12x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x05, MOVE_WIDE_FROM16, "move-wide/from16", k22x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x06, MOVE_WIDE_16, "move-wide/16", k32x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x07, MOVE_OBJECT, "move-object", k12x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x08, MOVE_OBJECT_FROM16, "move-object/from16", k22x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x09, MOVE_OBJECT_16, "move-object/16", k32x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x0A, MOVE_RESULT, "move-result", k11x, true, kIndexNone, kContinue, kVerifyRegA) \
-  V(0x0B, MOVE_RESULT_WIDE, "move-result-wide", k11x, true, kIndexNone, kContinue, kVerifyRegAWide) \
-  V(0x0C, MOVE_RESULT_OBJECT, "move-result-object", k11x, true, kIndexNone, kContinue, kVerifyRegA) \
-  V(0x0D, MOVE_EXCEPTION, "move-exception", k11x, true, kIndexNone, kContinue, kVerifyRegA) \
-  V(0x0E, RETURN_VOID, "return-void", k10x, false, kIndexNone, kReturn, kVerifyNone) \
-  V(0x0F, RETURN, "return", k11x, false, kIndexNone, kReturn, kVerifyRegA) \
-  V(0x10, RETURN_WIDE, "return-wide", k11x, false, kIndexNone, kReturn, kVerifyRegAWide) \
-  V(0x11, RETURN_OBJECT, "return-object", k11x, false, kIndexNone, kReturn, kVerifyRegA) \
-  V(0x12, CONST_4, "const/4", k11n, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
-  V(0x13, CONST_16, "const/16", k21s, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
-  V(0x14, CONST, "const", k31i, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
-  V(0x15, CONST_HIGH16, "const/high16", k21h, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
-  V(0x16, CONST_WIDE_16, "const-wide/16", k21s, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
-  V(0x17, CONST_WIDE_32, "const-wide/32", k31i, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
-  V(0x18, CONST_WIDE, "const-wide", k51l, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
-  V(0x19, CONST_WIDE_HIGH16, "const-wide/high16", k21h, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
-  V(0x1A, CONST_STRING, "const-string", k21c, true, kIndexStringRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBString) \
-  V(0x1B, CONST_STRING_JUMBO, "const-string/jumbo", k31c, true, kIndexStringRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBString) \
-  V(0x1C, CONST_CLASS, "const-class", k21c, true, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBType) \
-  V(0x1D, MONITOR_ENTER, "monitor-enter", k11x, false, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA) \
-  V(0x1E, MONITOR_EXIT, "monitor-exit", k11x, false, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA) \
-  V(0x1F, CHECK_CAST, "check-cast", k21c, true, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBType) \
-  V(0x20, INSTANCE_OF, "instance-of", k22c, true, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \
-  V(0x21, ARRAY_LENGTH, "array-length", k12x, true, kIndexNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
-  V(0x22, NEW_INSTANCE, "new-instance", k21c, true, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyRegBNewInstance) \
-  V(0x23, NEW_ARRAY, "new-array", k22c, true, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyRegB | kVerifyRegCNewArray) \
-  V(0x24, FILLED_NEW_ARRAY, "filled-new-array", k35c, false, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegBType | kVerifyVarArg) \
-  V(0x25, FILLED_NEW_ARRAY_RANGE, "filled-new-array/range", k3rc, false, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegBType | kVerifyVarArgRange) \
-  V(0x26, FILL_ARRAY_DATA, "fill-array-data", k31t, false, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyArrayData) \
-  V(0x27, THROW, "throw", k11x, false, kIndexNone, kThrow, kVerifyRegA) \
-  V(0x28, GOTO, "goto", k10t, false, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
-  V(0x29, GOTO_16, "goto/16", k20t, false, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
-  V(0x2A, GOTO_32, "goto/32", k30t, false, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
-  V(0x2B, PACKED_SWITCH, "packed-switch", k31t, false, kIndexNone, kContinue | kSwitch, kVerifyRegA | kVerifySwitchTargets) \
-  V(0x2C, SPARSE_SWITCH, "sparse-switch", k31t, false, kIndexNone, kContinue | kSwitch, kVerifyRegA | kVerifySwitchTargets) \
-  V(0x2D, CMPL_FLOAT, "cmpl-float", k23x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x2E, CMPG_FLOAT, "cmpg-float", k23x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x2F, CMPL_DOUBLE, "cmpl-double", k23x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x30, CMPG_DOUBLE, "cmpg-double", k23x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x31, CMP_LONG, "cmp-long", k23x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x32, IF_EQ, "if-eq", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x33, IF_NE, "if-ne", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x34, IF_LT, "if-lt", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x35, IF_GE, "if-ge", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x36, IF_GT, "if-gt", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x37, IF_LE, "if-le", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x38, IF_EQZ, "if-eqz", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x39, IF_NEZ, "if-nez", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3A, IF_LTZ, "if-ltz", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3B, IF_GEZ, "if-gez", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3C, IF_GTZ, "if-gtz", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3D, IF_LEZ, "if-lez", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3E, UNUSED_3E, "unused-3e", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x3F, UNUSED_3F, "unused-3f", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x40, UNUSED_40, "unused-40", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x41, UNUSED_41, "unused-41", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x42, UNUSED_42, "unused-42", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x43, UNUSED_43, "unused-43", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x44, AGET, "aget", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x45, AGET_WIDE, "aget-wide", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
-  V(0x46, AGET_OBJECT, "aget-object", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x47, AGET_BOOLEAN, "aget-boolean", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x48, AGET_BYTE, "aget-byte", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x49, AGET_CHAR, "aget-char", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4A, AGET_SHORT, "aget-short", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4B, APUT, "aput", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4C, APUT_WIDE, "aput-wide", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
-  V(0x4D, APUT_OBJECT, "aput-object", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4E, APUT_BOOLEAN, "aput-boolean", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4F, APUT_BYTE, "aput-byte", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x50, APUT_CHAR, "aput-char", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x51, APUT_SHORT, "aput-short", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x52, IGET, "iget", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x53, IGET_WIDE, "iget-wide", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
-  V(0x54, IGET_OBJECT, "iget-object", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x55, IGET_BOOLEAN, "iget-boolean", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x56, IGET_BYTE, "iget-byte", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x57, IGET_CHAR, "iget-char", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x58, IGET_SHORT, "iget-short", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x59, IPUT, "iput", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5A, IPUT_WIDE, "iput-wide", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
-  V(0x5B, IPUT_OBJECT, "iput-object", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5C, IPUT_BOOLEAN, "iput-boolean", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5D, IPUT_BYTE, "iput-byte", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5E, IPUT_CHAR, "iput-char", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5F, IPUT_SHORT, "iput-short", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x60, SGET, "sget", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x61, SGET_WIDE, "sget-wide", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
-  V(0x62, SGET_OBJECT, "sget-object", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x63, SGET_BOOLEAN, "sget-boolean", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x64, SGET_BYTE, "sget-byte", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x65, SGET_CHAR, "sget-char", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x66, SGET_SHORT, "sget-short", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x67, SPUT, "sput", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x68, SPUT_WIDE, "sput-wide", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
-  V(0x69, SPUT_OBJECT, "sput-object", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6A, SPUT_BOOLEAN, "sput-boolean", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6B, SPUT_BYTE, "sput-byte", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6C, SPUT_CHAR, "sput-char", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6D, SPUT_SHORT, "sput-short", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6E, INVOKE_VIRTUAL, "invoke-virtual", k35c, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x6F, INVOKE_SUPER, "invoke-super", k35c, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x70, INVOKE_DIRECT, "invoke-direct", k35c, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x71, INVOKE_STATIC, "invoke-static", k35c, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \
-  V(0x72, INVOKE_INTERFACE, "invoke-interface", k35c, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x73, RETURN_VOID_NO_BARRIER, "return-void-no-barrier", k10x, false, kIndexNone, kReturn, kVerifyNone) \
-  V(0x74, INVOKE_VIRTUAL_RANGE, "invoke-virtual/range", k3rc, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
-  V(0x75, INVOKE_SUPER_RANGE, "invoke-super/range", k3rc, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
-  V(0x76, INVOKE_DIRECT_RANGE, "invoke-direct/range", k3rc, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
-  V(0x77, INVOKE_STATIC_RANGE, "invoke-static/range", k3rc, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \
-  V(0x78, INVOKE_INTERFACE_RANGE, "invoke-interface/range", k3rc, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
-  V(0x79, UNUSED_79, "unused-79", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x7A, UNUSED_7A, "unused-7a", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x7B, NEG_INT, "neg-int", k12x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x7C, NOT_INT, "not-int", k12x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x7D, NEG_LONG, "neg-long", k12x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x7E, NOT_LONG, "not-long", k12x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x7F, NEG_FLOAT, "neg-float", k12x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x80, NEG_DOUBLE, "neg-double", k12x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x81, INT_TO_LONG, "int-to-long", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
-  V(0x82, INT_TO_FLOAT, "int-to-float", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x83, INT_TO_DOUBLE, "int-to-double", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
-  V(0x84, LONG_TO_INT, "long-to-int", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
-  V(0x85, LONG_TO_FLOAT, "long-to-float", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
-  V(0x86, LONG_TO_DOUBLE, "long-to-double", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x87, FLOAT_TO_INT, "float-to-int", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x88, FLOAT_TO_LONG, "float-to-long", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
-  V(0x89, FLOAT_TO_DOUBLE, "float-to-double", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
-  V(0x8A, DOUBLE_TO_INT, "double-to-int", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
-  V(0x8B, DOUBLE_TO_LONG, "double-to-long", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x8C, DOUBLE_TO_FLOAT, "double-to-float", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
-  V(0x8D, INT_TO_BYTE, "int-to-byte", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x8E, INT_TO_CHAR, "int-to-char", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x8F, INT_TO_SHORT, "int-to-short", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x90, ADD_INT, "add-int", k23x, true, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x91, SUB_INT, "sub-int", k23x, true, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x92, MUL_INT, "mul-int", k23x, true, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x93, DIV_INT, "div-int", k23x, true, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x94, REM_INT, "rem-int", k23x, true, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x95, AND_INT, "and-int", k23x, true, kIndexNone, kContinue | kAnd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x96, OR_INT, "or-int", k23x, true, kIndexNone, kContinue | kOr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x97, XOR_INT, "xor-int", k23x, true, kIndexNone, kContinue | kXor, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x98, SHL_INT, "shl-int", k23x, true, kIndexNone, kContinue | kShl, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x99, SHR_INT, "shr-int", k23x, true, kIndexNone, kContinue | kShr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x9A, USHR_INT, "ushr-int", k23x, true, kIndexNone, kContinue | kUshr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x9B, ADD_LONG, "add-long", k23x, true, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9C, SUB_LONG, "sub-long", k23x, true, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9D, MUL_LONG, "mul-long", k23x, true, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9E, DIV_LONG, "div-long", k23x, true, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9F, REM_LONG, "rem-long", k23x, true, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA0, AND_LONG, "and-long", k23x, true, kIndexNone, kContinue | kAnd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA1, OR_LONG, "or-long", k23x, true, kIndexNone, kContinue | kOr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA2, XOR_LONG, "xor-long", k23x, true, kIndexNone, kContinue | kXor, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA3, SHL_LONG, "shl-long", k23x, true, kIndexNone, kContinue | kShl, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
-  V(0xA4, SHR_LONG, "shr-long", k23x, true, kIndexNone, kContinue | kShr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
-  V(0xA5, USHR_LONG, "ushr-long", k23x, true, kIndexNone, kContinue | kUshr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
-  V(0xA6, ADD_FLOAT, "add-float", k23x, true, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xA7, SUB_FLOAT, "sub-float", k23x, true, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xA8, MUL_FLOAT, "mul-float", k23x, true, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xA9, DIV_FLOAT, "div-float", k23x, true, kIndexNone, kContinue | kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xAA, REM_FLOAT, "rem-float", k23x, true, kIndexNone, kContinue | kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xAB, ADD_DOUBLE, "add-double", k23x, true, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAC, SUB_DOUBLE, "sub-double", k23x, true, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAD, MUL_DOUBLE, "mul-double", k23x, true, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAE, DIV_DOUBLE, "div-double", k23x, true, kIndexNone, kContinue | kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAF, REM_DOUBLE, "rem-double", k23x, true, kIndexNone, kContinue | kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xB0, ADD_INT_2ADDR, "add-int/2addr", k12x, true, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB) \
-  V(0xB1, SUB_INT_2ADDR, "sub-int/2addr", k12x, true, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB) \
-  V(0xB2, MUL_INT_2ADDR, "mul-int/2addr", k12x, true, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB) \
-  V(0xB3, DIV_INT_2ADDR, "div-int/2addr", k12x, true, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegA | kVerifyRegB) \
-  V(0xB4, REM_INT_2ADDR, "rem-int/2addr", k12x, true, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegA | kVerifyRegB) \
-  V(0xB5, AND_INT_2ADDR, "and-int/2addr", k12x, true, kIndexNone, kContinue | kAnd, kVerifyRegA | kVerifyRegB) \
-  V(0xB6, OR_INT_2ADDR, "or-int/2addr", k12x, true, kIndexNone, kContinue | kOr, kVerifyRegA | kVerifyRegB) \
-  V(0xB7, XOR_INT_2ADDR, "xor-int/2addr", k12x, true, kIndexNone, kContinue | kXor, kVerifyRegA | kVerifyRegB) \
-  V(0xB8, SHL_INT_2ADDR, "shl-int/2addr", k12x, true, kIndexNone, kContinue | kShl, kVerifyRegA | kVerifyRegB) \
-  V(0xB9, SHR_INT_2ADDR, "shr-int/2addr", k12x, true, kIndexNone, kContinue | kShr, kVerifyRegA | kVerifyRegB) \
-  V(0xBA, USHR_INT_2ADDR, "ushr-int/2addr", k12x, true, kIndexNone, kContinue | kUshr, kVerifyRegA | kVerifyRegB) \
-  V(0xBB, ADD_LONG_2ADDR, "add-long/2addr", k12x, true, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBC, SUB_LONG_2ADDR, "sub-long/2addr", k12x, true, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBD, MUL_LONG_2ADDR, "mul-long/2addr", k12x, true, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBE, DIV_LONG_2ADDR, "div-long/2addr", k12x, true, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBF, REM_LONG_2ADDR, "rem-long/2addr", k12x, true, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC0, AND_LONG_2ADDR, "and-long/2addr", k12x, true, kIndexNone, kContinue | kAnd, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC1, OR_LONG_2ADDR, "or-long/2addr", k12x, true, kIndexNone, kContinue | kOr, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC2, XOR_LONG_2ADDR, "xor-long/2addr", k12x, true, kIndexNone, kContinue | kXor, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC3, SHL_LONG_2ADDR, "shl-long/2addr", k12x, true, kIndexNone, kContinue | kShl, kVerifyRegAWide | kVerifyRegB) \
-  V(0xC4, SHR_LONG_2ADDR, "shr-long/2addr", k12x, true, kIndexNone, kContinue | kShr, kVerifyRegAWide | kVerifyRegB) \
-  V(0xC5, USHR_LONG_2ADDR, "ushr-long/2addr", k12x, true, kIndexNone, kContinue | kUshr, kVerifyRegAWide | kVerifyRegB) \
-  V(0xC6, ADD_FLOAT_2ADDR, "add-float/2addr", k12x, true, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB) \
-  V(0xC7, SUB_FLOAT_2ADDR, "sub-float/2addr", k12x, true, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB) \
-  V(0xC8, MUL_FLOAT_2ADDR, "mul-float/2addr", k12x, true, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB) \
-  V(0xC9, DIV_FLOAT_2ADDR, "div-float/2addr", k12x, true, kIndexNone, kContinue | kDivide, kVerifyRegA | kVerifyRegB) \
-  V(0xCA, REM_FLOAT_2ADDR, "rem-float/2addr", k12x, true, kIndexNone, kContinue | kRemainder, kVerifyRegA | kVerifyRegB) \
-  V(0xCB, ADD_DOUBLE_2ADDR, "add-double/2addr", k12x, true, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCC, SUB_DOUBLE_2ADDR, "sub-double/2addr", k12x, true, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCD, MUL_DOUBLE_2ADDR, "mul-double/2addr", k12x, true, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCE, DIV_DOUBLE_2ADDR, "div-double/2addr", k12x, true, kIndexNone, kContinue | kDivide, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCF, REM_DOUBLE_2ADDR, "rem-double/2addr", k12x, true, kIndexNone, kContinue | kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xD0, ADD_INT_LIT16, "add-int/lit16", k22s, true, kIndexNone, kContinue | kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD1, RSUB_INT, "rsub-int", k22s, true, kIndexNone, kContinue | kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD2, MUL_INT_LIT16, "mul-int/lit16", k22s, true, kIndexNone, kContinue | kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD3, DIV_INT_LIT16, "div-int/lit16", k22s, true, kIndexNone, kContinue | kThrow | kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD4, REM_INT_LIT16, "rem-int/lit16", k22s, true, kIndexNone, kContinue | kThrow | kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD5, AND_INT_LIT16, "and-int/lit16", k22s, true, kIndexNone, kContinue | kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD6, OR_INT_LIT16, "or-int/lit16", k22s, true, kIndexNone, kContinue | kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD7, XOR_INT_LIT16, "xor-int/lit16", k22s, true, kIndexNone, kContinue | kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD8, ADD_INT_LIT8, "add-int/lit8", k22b, true, kIndexNone, kContinue | kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD9, RSUB_INT_LIT8, "rsub-int/lit8", k22b, true, kIndexNone, kContinue | kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDA, MUL_INT_LIT8, "mul-int/lit8", k22b, true, kIndexNone, kContinue | kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDB, DIV_INT_LIT8, "div-int/lit8", k22b, true, kIndexNone, kContinue | kThrow | kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDC, REM_INT_LIT8, "rem-int/lit8", k22b, true, kIndexNone, kContinue | kThrow | kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDD, AND_INT_LIT8, "and-int/lit8", k22b, true, kIndexNone, kContinue | kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDE, OR_INT_LIT8, "or-int/lit8", k22b, true, kIndexNone, kContinue | kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDF, XOR_INT_LIT8, "xor-int/lit8", k22b, true, kIndexNone, kContinue | kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE0, SHL_INT_LIT8, "shl-int/lit8", k22b, true, kIndexNone, kContinue | kShl | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE1, SHR_INT_LIT8, "shr-int/lit8", k22b, true, kIndexNone, kContinue | kShr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE2, USHR_INT_LIT8, "ushr-int/lit8", k22b, true, kIndexNone, kContinue | kUshr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE3, IGET_QUICK, "iget-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE4, IGET_WIDE_QUICK, "iget-wide-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE5, IGET_OBJECT_QUICK, "iget-object-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE6, IPUT_QUICK, "iput-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE7, IPUT_WIDE_QUICK, "iput-wide-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE8, IPUT_OBJECT_QUICK, "iput-object-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, false, kIndexVtableOffset, kContinue | kThrow | kInvoke, kVerifyVarArgNonZero | kVerifyRuntimeOnly) \
-  V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, false, kIndexVtableOffset, kContinue | kThrow | kInvoke, kVerifyVarArgRangeNonZero | kVerifyRuntimeOnly) \
-  V(0xEB, IPUT_BOOLEAN_QUICK, "iput-boolean-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xEC, IPUT_BYTE_QUICK, "iput-byte-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xED, IPUT_CHAR_QUICK, "iput-char-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xEE, IPUT_SHORT_QUICK, "iput-short-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xEF, IGET_BOOLEAN_QUICK, "iget-boolean-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF0, IGET_BYTE_QUICK, "iget-byte-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF1, IGET_CHAR_QUICK, "iget-char-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF2, IGET_SHORT_QUICK, "iget-short-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF3, INVOKE_LAMBDA, "invoke-lambda", k25x, false, kIndexNone, kContinue | kThrow | kInvoke | kExperimental, kVerifyRegC /*TODO: | kVerifyVarArg*/) \
-  V(0xF4, UNUSED_F4, "unused-f4", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xF5, CAPTURE_VARIABLE, "capture-variable", k21c, false, kIndexStringRef, kExperimental, kVerifyRegA | kVerifyRegBString) \
-  /* TODO(iam): get rid of the unused 'false' column */ \
-  V(0xF6, CREATE_LAMBDA, "create-lambda", k21c, false_UNUSED, kIndexMethodRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegBMethod) \
-  V(0xF7, LIBERATE_VARIABLE, "liberate-variable", k22c, false, kIndexStringRef, kExperimental, kVerifyRegA | kVerifyRegB | kVerifyRegCString) \
-  V(0xF8, BOX_LAMBDA, "box-lambda", k22x, true, kIndexNone, kContinue | kExperimental, kVerifyRegA | kVerifyRegB) \
-  V(0xF9, UNBOX_LAMBDA, "unbox-lambda", k22c, true, kIndexTypeRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \
-  V(0xFA, UNUSED_FA, "unused-fa", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xFB, UNUSED_FB, "unused-fb", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xFC, UNUSED_FC, "unused-fc", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xFD, UNUSED_FD, "unused-fd", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xFE, UNUSED_FE, "unused-fe", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xFF, UNUSED_FF, "unused-ff", k10x, false, kIndexUnknown, 0, kVerifyError)
+  V(0x00, NOP, "nop", k10x, kIndexNone, kContinue, kVerifyNone) \
+  V(0x01, MOVE, "move", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x02, MOVE_FROM16, "move/from16", k22x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x03, MOVE_16, "move/16", k32x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x04, MOVE_WIDE, "move-wide", k12x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x05, MOVE_WIDE_FROM16, "move-wide/from16", k22x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x06, MOVE_WIDE_16, "move-wide/16", k32x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x07, MOVE_OBJECT, "move-object", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x08, MOVE_OBJECT_FROM16, "move-object/from16", k22x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x09, MOVE_OBJECT_16, "move-object/16", k32x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x0A, MOVE_RESULT, "move-result", k11x, kIndexNone, kContinue, kVerifyRegA) \
+  V(0x0B, MOVE_RESULT_WIDE, "move-result-wide", k11x, kIndexNone, kContinue, kVerifyRegAWide) \
+  V(0x0C, MOVE_RESULT_OBJECT, "move-result-object", k11x, kIndexNone, kContinue, kVerifyRegA) \
+  V(0x0D, MOVE_EXCEPTION, "move-exception", k11x, kIndexNone, kContinue, kVerifyRegA) \
+  V(0x0E, RETURN_VOID, "return-void", k10x, kIndexNone, kReturn, kVerifyNone) \
+  V(0x0F, RETURN, "return", k11x, kIndexNone, kReturn, kVerifyRegA) \
+  V(0x10, RETURN_WIDE, "return-wide", k11x, kIndexNone, kReturn, kVerifyRegAWide) \
+  V(0x11, RETURN_OBJECT, "return-object", k11x, kIndexNone, kReturn, kVerifyRegA) \
+  V(0x12, CONST_4, "const/4", k11n, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x13, CONST_16, "const/16", k21s, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x14, CONST, "const", k31i, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x15, CONST_HIGH16, "const/high16", k21h, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x16, CONST_WIDE_16, "const-wide/16", k21s, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x17, CONST_WIDE_32, "const-wide/32", k31i, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x18, CONST_WIDE, "const-wide", k51l, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x19, CONST_WIDE_HIGH16, "const-wide/high16", k21h, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x1A, CONST_STRING, "const-string", k21c, kIndexStringRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBString) \
+  V(0x1B, CONST_STRING_JUMBO, "const-string/jumbo", k31c, kIndexStringRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBString) \
+  V(0x1C, CONST_CLASS, "const-class", k21c, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBType) \
+  V(0x1D, MONITOR_ENTER, "monitor-enter", k11x, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA) \
+  V(0x1E, MONITOR_EXIT, "monitor-exit", k11x, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA) \
+  V(0x1F, CHECK_CAST, "check-cast", k21c, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBType) \
+  V(0x20, INSTANCE_OF, "instance-of", k22c, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \
+  V(0x21, ARRAY_LENGTH, "array-length", k12x, kIndexNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
+  V(0x22, NEW_INSTANCE, "new-instance", k21c, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyRegBNewInstance) \
+  V(0x23, NEW_ARRAY, "new-array", k22c, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyRegB | kVerifyRegCNewArray) \
+  V(0x24, FILLED_NEW_ARRAY, "filled-new-array", k35c, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegBType | kVerifyVarArg) \
+  V(0x25, FILLED_NEW_ARRAY_RANGE, "filled-new-array/range", k3rc, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegBType | kVerifyVarArgRange) \
+  V(0x26, FILL_ARRAY_DATA, "fill-array-data", k31t, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyArrayData) \
+  V(0x27, THROW, "throw", k11x, kIndexNone, kThrow, kVerifyRegA) \
+  V(0x28, GOTO, "goto", k10t, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
+  V(0x29, GOTO_16, "goto/16", k20t, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
+  V(0x2A, GOTO_32, "goto/32", k30t, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
+  V(0x2B, PACKED_SWITCH, "packed-switch", k31t, kIndexNone, kContinue | kSwitch, kVerifyRegA | kVerifySwitchTargets) \
+  V(0x2C, SPARSE_SWITCH, "sparse-switch", k31t, kIndexNone, kContinue | kSwitch, kVerifyRegA | kVerifySwitchTargets) \
+  V(0x2D, CMPL_FLOAT, "cmpl-float", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x2E, CMPG_FLOAT, "cmpg-float", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x2F, CMPL_DOUBLE, "cmpl-double", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x30, CMPG_DOUBLE, "cmpg-double", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x31, CMP_LONG, "cmp-long", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x32, IF_EQ, "if-eq", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x33, IF_NE, "if-ne", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x34, IF_LT, "if-lt", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x35, IF_GE, "if-ge", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x36, IF_GT, "if-gt", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x37, IF_LE, "if-le", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x38, IF_EQZ, "if-eqz", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x39, IF_NEZ, "if-nez", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3A, IF_LTZ, "if-ltz", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3B, IF_GEZ, "if-gez", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3C, IF_GTZ, "if-gtz", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3D, IF_LEZ, "if-lez", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3E, UNUSED_3E, "unused-3e", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x3F, UNUSED_3F, "unused-3f", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x40, UNUSED_40, "unused-40", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x41, UNUSED_41, "unused-41", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x42, UNUSED_42, "unused-42", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x43, UNUSED_43, "unused-43", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x44, AGET, "aget", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x45, AGET_WIDE, "aget-wide", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
+  V(0x46, AGET_OBJECT, "aget-object", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x47, AGET_BOOLEAN, "aget-boolean", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x48, AGET_BYTE, "aget-byte", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x49, AGET_CHAR, "aget-char", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4A, AGET_SHORT, "aget-short", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4B, APUT, "aput", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4C, APUT_WIDE, "aput-wide", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
+  V(0x4D, APUT_OBJECT, "aput-object", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4E, APUT_BOOLEAN, "aput-boolean", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4F, APUT_BYTE, "aput-byte", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x50, APUT_CHAR, "aput-char", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x51, APUT_SHORT, "aput-short", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x52, IGET, "iget", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x53, IGET_WIDE, "iget-wide", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
+  V(0x54, IGET_OBJECT, "iget-object", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x55, IGET_BOOLEAN, "iget-boolean", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x56, IGET_BYTE, "iget-byte", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x57, IGET_CHAR, "iget-char", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x58, IGET_SHORT, "iget-short", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x59, IPUT, "iput", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5A, IPUT_WIDE, "iput-wide", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
+  V(0x5B, IPUT_OBJECT, "iput-object", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5C, IPUT_BOOLEAN, "iput-boolean", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5D, IPUT_BYTE, "iput-byte", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5E, IPUT_CHAR, "iput-char", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5F, IPUT_SHORT, "iput-short", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x60, SGET, "sget", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x61, SGET_WIDE, "sget-wide", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
+  V(0x62, SGET_OBJECT, "sget-object", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x63, SGET_BOOLEAN, "sget-boolean", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x64, SGET_BYTE, "sget-byte", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x65, SGET_CHAR, "sget-char", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x66, SGET_SHORT, "sget-short", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x67, SPUT, "sput", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x68, SPUT_WIDE, "sput-wide", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
+  V(0x69, SPUT_OBJECT, "sput-object", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6A, SPUT_BOOLEAN, "sput-boolean", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6B, SPUT_BYTE, "sput-byte", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6C, SPUT_CHAR, "sput-char", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6D, SPUT_SHORT, "sput-short", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6E, INVOKE_VIRTUAL, "invoke-virtual", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x6F, INVOKE_SUPER, "invoke-super", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x70, INVOKE_DIRECT, "invoke-direct", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x71, INVOKE_STATIC, "invoke-static", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \
+  V(0x72, INVOKE_INTERFACE, "invoke-interface", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x73, RETURN_VOID_NO_BARRIER, "return-void-no-barrier", k10x, kIndexNone, kReturn, kVerifyNone) \
+  V(0x74, INVOKE_VIRTUAL_RANGE, "invoke-virtual/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x75, INVOKE_SUPER_RANGE, "invoke-super/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x76, INVOKE_DIRECT_RANGE, "invoke-direct/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x77, INVOKE_STATIC_RANGE, "invoke-static/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \
+  V(0x78, INVOKE_INTERFACE_RANGE, "invoke-interface/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x79, UNUSED_79, "unused-79", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x7A, UNUSED_7A, "unused-7a", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x7B, NEG_INT, "neg-int", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x7C, NOT_INT, "not-int", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x7D, NEG_LONG, "neg-long", k12x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x7E, NOT_LONG, "not-long", k12x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x7F, NEG_FLOAT, "neg-float", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x80, NEG_DOUBLE, "neg-double", k12x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x81, INT_TO_LONG, "int-to-long", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x82, INT_TO_FLOAT, "int-to-float", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x83, INT_TO_DOUBLE, "int-to-double", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x84, LONG_TO_INT, "long-to-int", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x85, LONG_TO_FLOAT, "long-to-float", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x86, LONG_TO_DOUBLE, "long-to-double", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x87, FLOAT_TO_INT, "float-to-int", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x88, FLOAT_TO_LONG, "float-to-long", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x89, FLOAT_TO_DOUBLE, "float-to-double", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x8A, DOUBLE_TO_INT, "double-to-int", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x8B, DOUBLE_TO_LONG, "double-to-long", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x8C, DOUBLE_TO_FLOAT, "double-to-float", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x8D, INT_TO_BYTE, "int-to-byte", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x8E, INT_TO_CHAR, "int-to-char", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x8F, INT_TO_SHORT, "int-to-short", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x90, ADD_INT, "add-int", k23x, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x91, SUB_INT, "sub-int", k23x, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x92, MUL_INT, "mul-int", k23x, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x93, DIV_INT, "div-int", k23x, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x94, REM_INT, "rem-int", k23x, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x95, AND_INT, "and-int", k23x, kIndexNone, kContinue | kAnd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x96, OR_INT, "or-int", k23x, kIndexNone, kContinue | kOr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x97, XOR_INT, "xor-int", k23x, kIndexNone, kContinue | kXor, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x98, SHL_INT, "shl-int", k23x, kIndexNone, kContinue | kShl, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x99, SHR_INT, "shr-int", k23x, kIndexNone, kContinue | kShr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x9A, USHR_INT, "ushr-int", k23x, kIndexNone, kContinue | kUshr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x9B, ADD_LONG, "add-long", k23x, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9C, SUB_LONG, "sub-long", k23x, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9D, MUL_LONG, "mul-long", k23x, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9E, DIV_LONG, "div-long", k23x, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9F, REM_LONG, "rem-long", k23x, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA0, AND_LONG, "and-long", k23x, kIndexNone, kContinue | kAnd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA1, OR_LONG, "or-long", k23x, kIndexNone, kContinue | kOr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA2, XOR_LONG, "xor-long", k23x, kIndexNone, kContinue | kXor, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA3, SHL_LONG, "shl-long", k23x, kIndexNone, kContinue | kShl, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
+  V(0xA4, SHR_LONG, "shr-long", k23x, kIndexNone, kContinue | kShr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
+  V(0xA5, USHR_LONG, "ushr-long", k23x, kIndexNone, kContinue | kUshr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
+  V(0xA6, ADD_FLOAT, "add-float", k23x, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xA7, SUB_FLOAT, "sub-float", k23x, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xA8, MUL_FLOAT, "mul-float", k23x, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xA9, DIV_FLOAT, "div-float", k23x, kIndexNone, kContinue | kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xAA, REM_FLOAT, "rem-float", k23x, kIndexNone, kContinue | kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xAB, ADD_DOUBLE, "add-double", k23x, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAC, SUB_DOUBLE, "sub-double", k23x, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAD, MUL_DOUBLE, "mul-double", k23x, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAE, DIV_DOUBLE, "div-double", k23x, kIndexNone, kContinue | kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAF, REM_DOUBLE, "rem-double", k23x, kIndexNone, kContinue | kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xB0, ADD_INT_2ADDR, "add-int/2addr", k12x, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB) \
+  V(0xB1, SUB_INT_2ADDR, "sub-int/2addr", k12x, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB) \
+  V(0xB2, MUL_INT_2ADDR, "mul-int/2addr", k12x, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB) \
+  V(0xB3, DIV_INT_2ADDR, "div-int/2addr", k12x, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegA | kVerifyRegB) \
+  V(0xB4, REM_INT_2ADDR, "rem-int/2addr", k12x, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegA | kVerifyRegB) \
+  V(0xB5, AND_INT_2ADDR, "and-int/2addr", k12x, kIndexNone, kContinue | kAnd, kVerifyRegA | kVerifyRegB) \
+  V(0xB6, OR_INT_2ADDR, "or-int/2addr", k12x, kIndexNone, kContinue | kOr, kVerifyRegA | kVerifyRegB) \
+  V(0xB7, XOR_INT_2ADDR, "xor-int/2addr", k12x, kIndexNone, kContinue | kXor, kVerifyRegA | kVerifyRegB) \
+  V(0xB8, SHL_INT_2ADDR, "shl-int/2addr", k12x, kIndexNone, kContinue | kShl, kVerifyRegA | kVerifyRegB) \
+  V(0xB9, SHR_INT_2ADDR, "shr-int/2addr", k12x, kIndexNone, kContinue | kShr, kVerifyRegA | kVerifyRegB) \
+  V(0xBA, USHR_INT_2ADDR, "ushr-int/2addr", k12x, kIndexNone, kContinue | kUshr, kVerifyRegA | kVerifyRegB) \
+  V(0xBB, ADD_LONG_2ADDR, "add-long/2addr", k12x, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBC, SUB_LONG_2ADDR, "sub-long/2addr", k12x, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBD, MUL_LONG_2ADDR, "mul-long/2addr", k12x, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBE, DIV_LONG_2ADDR, "div-long/2addr", k12x, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBF, REM_LONG_2ADDR, "rem-long/2addr", k12x, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC0, AND_LONG_2ADDR, "and-long/2addr", k12x, kIndexNone, kContinue | kAnd, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC1, OR_LONG_2ADDR, "or-long/2addr", k12x, kIndexNone, kContinue | kOr, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC2, XOR_LONG_2ADDR, "xor-long/2addr", k12x, kIndexNone, kContinue | kXor, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC3, SHL_LONG_2ADDR, "shl-long/2addr", k12x, kIndexNone, kContinue | kShl, kVerifyRegAWide | kVerifyRegB) \
+  V(0xC4, SHR_LONG_2ADDR, "shr-long/2addr", k12x, kIndexNone, kContinue | kShr, kVerifyRegAWide | kVerifyRegB) \
+  V(0xC5, USHR_LONG_2ADDR, "ushr-long/2addr", k12x, kIndexNone, kContinue | kUshr, kVerifyRegAWide | kVerifyRegB) \
+  V(0xC6, ADD_FLOAT_2ADDR, "add-float/2addr", k12x, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB) \
+  V(0xC7, SUB_FLOAT_2ADDR, "sub-float/2addr", k12x, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB) \
+  V(0xC8, MUL_FLOAT_2ADDR, "mul-float/2addr", k12x, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB) \
+  V(0xC9, DIV_FLOAT_2ADDR, "div-float/2addr", k12x, kIndexNone, kContinue | kDivide, kVerifyRegA | kVerifyRegB) \
+  V(0xCA, REM_FLOAT_2ADDR, "rem-float/2addr", k12x, kIndexNone, kContinue | kRemainder, kVerifyRegA | kVerifyRegB) \
+  V(0xCB, ADD_DOUBLE_2ADDR, "add-double/2addr", k12x, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCC, SUB_DOUBLE_2ADDR, "sub-double/2addr", k12x, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCD, MUL_DOUBLE_2ADDR, "mul-double/2addr", k12x, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCE, DIV_DOUBLE_2ADDR, "div-double/2addr", k12x, kIndexNone, kContinue | kDivide, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCF, REM_DOUBLE_2ADDR, "rem-double/2addr", k12x, kIndexNone, kContinue | kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xD0, ADD_INT_LIT16, "add-int/lit16", k22s, kIndexNone, kContinue | kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD1, RSUB_INT, "rsub-int", k22s, kIndexNone, kContinue | kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD2, MUL_INT_LIT16, "mul-int/lit16", k22s, kIndexNone, kContinue | kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD3, DIV_INT_LIT16, "div-int/lit16", k22s, kIndexNone, kContinue | kThrow | kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD4, REM_INT_LIT16, "rem-int/lit16", k22s, kIndexNone, kContinue | kThrow | kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD5, AND_INT_LIT16, "and-int/lit16", k22s, kIndexNone, kContinue | kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD6, OR_INT_LIT16, "or-int/lit16", k22s, kIndexNone, kContinue | kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD7, XOR_INT_LIT16, "xor-int/lit16", k22s, kIndexNone, kContinue | kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD8, ADD_INT_LIT8, "add-int/lit8", k22b, kIndexNone, kContinue | kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD9, RSUB_INT_LIT8, "rsub-int/lit8", k22b, kIndexNone, kContinue | kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDA, MUL_INT_LIT8, "mul-int/lit8", k22b, kIndexNone, kContinue | kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDB, DIV_INT_LIT8, "div-int/lit8", k22b, kIndexNone, kContinue | kThrow | kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDC, REM_INT_LIT8, "rem-int/lit8", k22b, kIndexNone, kContinue | kThrow | kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDD, AND_INT_LIT8, "and-int/lit8", k22b, kIndexNone, kContinue | kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDE, OR_INT_LIT8, "or-int/lit8", k22b, kIndexNone, kContinue | kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDF, XOR_INT_LIT8, "xor-int/lit8", k22b, kIndexNone, kContinue | kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE0, SHL_INT_LIT8, "shl-int/lit8", k22b, kIndexNone, kContinue | kShl | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE1, SHR_INT_LIT8, "shr-int/lit8", k22b, kIndexNone, kContinue | kShr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE2, USHR_INT_LIT8, "ushr-int/lit8", k22b, kIndexNone, kContinue | kUshr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE3, IGET_QUICK, "iget-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE4, IGET_WIDE_QUICK, "iget-wide-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE5, IGET_OBJECT_QUICK, "iget-object-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE6, IPUT_QUICK, "iput-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE7, IPUT_WIDE_QUICK, "iput-wide-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE8, IPUT_OBJECT_QUICK, "iput-object-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, kIndexVtableOffset, kContinue | kThrow | kInvoke, kVerifyVarArgNonZero | kVerifyRuntimeOnly) \
+  V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, kIndexVtableOffset, kContinue | kThrow | kInvoke, kVerifyVarArgRangeNonZero | kVerifyRuntimeOnly) \
+  V(0xEB, IPUT_BOOLEAN_QUICK, "iput-boolean-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xEC, IPUT_BYTE_QUICK, "iput-byte-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xED, IPUT_CHAR_QUICK, "iput-char-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xEE, IPUT_SHORT_QUICK, "iput-short-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xEF, IGET_BOOLEAN_QUICK, "iget-boolean-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xF0, IGET_BYTE_QUICK, "iget-byte-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xF1, IGET_CHAR_QUICK, "iget-char-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xF2, IGET_SHORT_QUICK, "iget-short-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xF3, UNUSED_F3, "unused-f3", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF4, UNUSED_F4, "unused-f4", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF5, UNUSED_F5, "unused-f5", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF6, UNUSED_F6, "unused-f6", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF7, UNUSED_F7, "unused-f7", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF8, UNUSED_F8, "unused-f8", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF9, UNUSED_F9, "unused-f9", k10x, kIndexUnknown, 0, kVerifyError) \
+  /* TODO(narayan): The following two entries are placeholders. */ \
+  V(0xFA, INVOKE_POLYMORPHIC, "invoke-polymorphic", k45cc, kIndexUnknown, 0, kVerifyError) \
+  V(0xFB, INVOKE_POLYMORPHIC_RANGE, "invoke-polymorphic/range", k4rcc, kIndexUnknown, 0, kVerifyError) \
+  V(0xFC, UNUSED_FC, "unused-fc", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xFD, UNUSED_FD, "unused-fd", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xFE, UNUSED_FE, "unused-fe", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xFF, UNUSED_FF, "unused-ff", k10x, kIndexUnknown, 0, kVerifyError)
 
 #define DEX_INSTRUCTION_FORMAT_LIST(V) \
   V(k10x) \
@@ -293,7 +294,6 @@
   V(k22t) \
   V(k22s) \
   V(k22c) \
-  V(k25x) \
   V(k32x) \
   V(k30t) \
   V(k31t) \
diff --git a/runtime/dex_instruction_test.cc b/runtime/dex_instruction_test.cc
index 671ac0e..95e4181 100644
--- a/runtime/dex_instruction_test.cc
+++ b/runtime/dex_instruction_test.cc
@@ -28,4 +28,107 @@
   EXPECT_EQ(Instruction::kVerifyNone, Instruction::VerifyFlagsOf(nop));
 }
 
+static void Build45cc(uint8_t num_args, uint16_t method_idx, uint16_t proto_idx,
+                      uint16_t arg_regs, uint16_t* out) {
+  // A = num argument registers
+  // B = method_idx
+  // C - G = argument registers
+  // H = proto_idx
+  //
+  // op = 0xFA
+  //
+  // format:
+  // AG op BBBB FEDC HHHH
+  out[0] = 0;
+  out[0] |= (num_args << 12);
+  out[0] |= 0x00FA;
+
+  out[1] = method_idx;
+  out[2] = arg_regs;
+  out[3] = proto_idx;
+}
+
+static void Build4rcc(uint16_t num_args, uint16_t method_idx, uint16_t proto_idx,
+                      uint16_t arg_regs_start, uint16_t* out) {
+  // A = num argument registers
+  // B = method_idx
+  // C = first argument register
+  // H = proto_idx
+  //
+  // op = 0xFB
+  //
+  // format:
+  // AA op BBBB CCCC HHHH
+  out[0] = 0;
+  out[0] |= (num_args << 8);
+  out[0] |= 0x00FB;
+
+  out[1] = method_idx;
+  out[2] = arg_regs_start;
+  out[3] = proto_idx;
+}
+
+TEST(Instruction, PropertiesOf45cc) {
+  uint16_t instruction[4];
+  Build45cc(4u /* num_vregs */, 16u /* method_idx */, 32u /* proto_idx */,
+            0xcafe /* arg_regs */, instruction);
+
+  const Instruction* ins = Instruction::At(instruction);
+  ASSERT_EQ(4u, ins->SizeInCodeUnits());
+
+  ASSERT_TRUE(ins->HasVRegA());
+  ASSERT_EQ(4, ins->VRegA());
+  ASSERT_EQ(4u, ins->VRegA_45cc());
+  ASSERT_EQ(4u, ins->VRegA_45cc(instruction[0]));
+
+  ASSERT_TRUE(ins->HasVRegB());
+  ASSERT_EQ(16, ins->VRegB());
+  ASSERT_EQ(16u, ins->VRegB_45cc());
+
+  ASSERT_TRUE(ins->HasVRegC());
+  ASSERT_EQ(0xe, ins->VRegC());
+  ASSERT_EQ(0xe, ins->VRegC_45cc());
+
+  ASSERT_TRUE(ins->HasVRegH());
+  ASSERT_EQ(32, ins->VRegH());
+  ASSERT_EQ(32, ins->VRegH_45cc());
+
+  ASSERT_TRUE(ins->HasVarArgs());
+
+  uint32_t arg_regs[Instruction::kMaxVarArgRegs];
+  ins->GetVarArgs(arg_regs);
+  ASSERT_EQ(0xeu, arg_regs[0]);
+  ASSERT_EQ(0xfu, arg_regs[1]);
+  ASSERT_EQ(0xau, arg_regs[2]);
+  ASSERT_EQ(0xcu, arg_regs[3]);
+}
+
+TEST(Instruction, PropertiesOf4rcc) {
+  uint16_t instruction[4];
+  Build4rcc(4u /* num_vregs */, 16u /* method_idx */, 32u /* proto_idx */,
+            0xcafe /* arg_regs */, instruction);
+
+  const Instruction* ins = Instruction::At(instruction);
+  ASSERT_EQ(4u, ins->SizeInCodeUnits());
+
+  ASSERT_TRUE(ins->HasVRegA());
+  ASSERT_EQ(4, ins->VRegA());
+  ASSERT_EQ(4u, ins->VRegA_4rcc());
+  ASSERT_EQ(4u, ins->VRegA_4rcc(instruction[0]));
+
+  ASSERT_TRUE(ins->HasVRegB());
+  ASSERT_EQ(16, ins->VRegB());
+  ASSERT_EQ(16u, ins->VRegB_4rcc());
+
+  ASSERT_TRUE(ins->HasVRegC());
+  ASSERT_EQ(0xcafe, ins->VRegC());
+  ASSERT_EQ(0xcafe, ins->VRegC_4rcc());
+
+  ASSERT_TRUE(ins->HasVRegH());
+  ASSERT_EQ(32, ins->VRegH());
+  ASSERT_EQ(32, ins->VRegH_4rcc());
+
+  ASSERT_FALSE(ins->HasVarArgs());
+}
+
 }  // namespace art
diff --git a/runtime/dex_instruction_utils.h b/runtime/dex_instruction_utils.h
index 1ae2b1b..2849cd8 100644
--- a/runtime/dex_instruction_utils.h
+++ b/runtime/dex_instruction_utils.h
@@ -49,6 +49,16 @@
 
 // NOTE: The following functions disregard quickened instructions.
 
+// By "direct" const we mean to exclude const-string and const-class
+// which load data from somewhere else, i.e. indirectly.
+constexpr bool IsInstructionDirectConst(Instruction::Code opcode) {
+  return Instruction::CONST_4 <= opcode && opcode <= Instruction::CONST_WIDE_HIGH16;
+}
+
+constexpr bool IsInstructionConstWide(Instruction::Code opcode) {
+  return Instruction::CONST_WIDE_16 <= opcode && opcode <= Instruction::CONST_WIDE_HIGH16;
+}
+
 constexpr bool IsInstructionReturn(Instruction::Code opcode) {
   return Instruction::RETURN_VOID <= opcode && opcode <= Instruction::RETURN_OBJECT;
 }
diff --git a/runtime/dex_instruction_visitor.h b/runtime/dex_instruction_visitor.h
index 795b95b..42af6a9 100644
--- a/runtime/dex_instruction_visitor.h
+++ b/runtime/dex_instruction_visitor.h
@@ -32,7 +32,7 @@
     while (i < size_in_code_units) {
       const Instruction* inst = Instruction::At(&code[i]);
       switch (inst->Opcode()) {
-#define INSTRUCTION_CASE(o, cname, p, f, r, i, a, v)  \
+#define INSTRUCTION_CASE(o, cname, p, f, i, a, v)  \
         case Instruction::cname: {                    \
           derived->Do_ ## cname(inst);                \
           break;                                      \
@@ -50,7 +50,7 @@
 
  private:
   // Specific handlers for each instruction.
-#define INSTRUCTION_VISITOR(o, cname, p, f, r, i, a, v)    \
+#define INSTRUCTION_VISITOR(o, cname, p, f, i, a, v)    \
   void Do_ ## cname(const Instruction* inst) {             \
     T* derived = static_cast<T*>(this);                    \
     derived->Do_Default(inst);                             \
diff --git a/runtime/elf.h b/runtime/elf.h
index d1efc92..63b18c5 100644
--- a/runtime/elf.h
+++ b/runtime/elf.h
@@ -1284,6 +1284,7 @@
 
   SHT_MIPS_REGINFO        = 0x70000006, // Register usage information
   SHT_MIPS_OPTIONS        = 0x7000000d, // General options
+  SHT_MIPS_ABIFLAGS       = 0x7000002a, // Abiflags options
 
   SHT_HIPROC        = 0x7fffffff, // Highest processor arch-specific type.
   SHT_LOUSER        = 0x80000000, // Lowest type reserved for applications.
@@ -1606,7 +1607,8 @@
   // MIPS program header types.
   PT_MIPS_REGINFO  = 0x70000000,  // Register usage information.
   PT_MIPS_RTPROC   = 0x70000001,  // Runtime procedure table.
-  PT_MIPS_OPTIONS  = 0x70000002   // Options segment.
+  PT_MIPS_OPTIONS  = 0x70000002,  // Options segment.
+  PT_MIPS_ABIFLAGS = 0x70000003   // Abiflags segment.
 };
 
 // Segment flag bits.
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 723ee74..096f003 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -32,84 +32,6 @@
 
 namespace art {
 
-// -------------------------------------------------------------------
-// Binary GDB JIT Interface as described in
-//   http://sourceware.org/gdb/onlinedocs/gdb/Declarations.html
-extern "C" {
-  typedef enum {
-    JIT_NOACTION = 0,
-    JIT_REGISTER_FN,
-    JIT_UNREGISTER_FN
-  } JITAction;
-
-  struct JITCodeEntry {
-    JITCodeEntry* next_;
-    JITCodeEntry* prev_;
-    const uint8_t *symfile_addr_;
-    uint64_t symfile_size_;
-  };
-
-  struct JITDescriptor {
-    uint32_t version_;
-    uint32_t action_flag_;
-    JITCodeEntry* relevant_entry_;
-    JITCodeEntry* first_entry_;
-  };
-
-  // GDB will place breakpoint into this function.
-  // To prevent GCC from inlining or removing it we place noinline attribute
-  // and inline assembler statement inside.
-  void __attribute__((noinline)) __jit_debug_register_code();
-  void __attribute__((noinline)) __jit_debug_register_code() {
-    __asm__("");
-  }
-
-  // GDB will inspect contents of this descriptor.
-  // Static initialization is necessary to prevent GDB from seeing
-  // uninitialized descriptor.
-  JITDescriptor __jit_debug_descriptor = { 1, JIT_NOACTION, nullptr, nullptr };
-}
-
-
-static JITCodeEntry* CreateCodeEntry(const uint8_t *symfile_addr,
-                                     uintptr_t symfile_size) {
-  JITCodeEntry* entry = new JITCodeEntry;
-  entry->symfile_addr_ = symfile_addr;
-  entry->symfile_size_ = symfile_size;
-  entry->prev_ = nullptr;
-
-  // TODO: Do we need a lock here?
-  entry->next_ = __jit_debug_descriptor.first_entry_;
-  if (entry->next_ != nullptr) {
-    entry->next_->prev_ = entry;
-  }
-  __jit_debug_descriptor.first_entry_ = entry;
-  __jit_debug_descriptor.relevant_entry_ = entry;
-
-  __jit_debug_descriptor.action_flag_ = JIT_REGISTER_FN;
-  __jit_debug_register_code();
-  return entry;
-}
-
-
-static void UnregisterCodeEntry(JITCodeEntry* entry) {
-  // TODO: Do we need a lock here?
-  if (entry->prev_ != nullptr) {
-    entry->prev_->next_ = entry->next_;
-  } else {
-    __jit_debug_descriptor.first_entry_ = entry->next_;
-  }
-
-  if (entry->next_ != nullptr) {
-    entry->next_->prev_ = entry->prev_;
-  }
-
-  __jit_debug_descriptor.relevant_entry_ = entry;
-  __jit_debug_descriptor.action_flag_ = JIT_UNREGISTER_FN;
-  __jit_debug_register_code();
-  delete entry;
-}
-
 template <typename ElfTypes>
 ElfFileImpl<ElfTypes>::ElfFileImpl(File* file, bool writable,
                                    bool program_header_only,
@@ -130,16 +52,17 @@
     hash_section_start_(nullptr),
     symtab_symbol_table_(nullptr),
     dynsym_symbol_table_(nullptr),
-    jit_elf_image_(nullptr),
-    jit_gdb_entry_(nullptr),
     requested_base_(requested_base) {
   CHECK(file != nullptr);
 }
 
 template <typename ElfTypes>
-ElfFileImpl<ElfTypes>* ElfFileImpl<ElfTypes>::Open(
-    File* file, bool writable, bool program_header_only,
-    std::string* error_msg, uint8_t* requested_base) {
+ElfFileImpl<ElfTypes>* ElfFileImpl<ElfTypes>::Open(File* file,
+                                                   bool writable,
+                                                   bool program_header_only,
+                                                   bool low_4gb,
+                                                   std::string* error_msg,
+                                                   uint8_t* requested_base) {
   std::unique_ptr<ElfFileImpl<ElfTypes>> elf_file(new ElfFileImpl<ElfTypes>
       (file, writable, program_header_only, requested_base));
   int prot;
@@ -151,26 +74,29 @@
     prot = PROT_READ;
     flags = MAP_PRIVATE;
   }
-  if (!elf_file->Setup(prot, flags, error_msg)) {
+  if (!elf_file->Setup(prot, flags, low_4gb, error_msg)) {
     return nullptr;
   }
   return elf_file.release();
 }
 
 template <typename ElfTypes>
-ElfFileImpl<ElfTypes>* ElfFileImpl<ElfTypes>::Open(
-    File* file, int prot, int flags, std::string* error_msg) {
+ElfFileImpl<ElfTypes>* ElfFileImpl<ElfTypes>::Open(File* file,
+                                                   int prot,
+                                                   int flags,
+                                                   bool low_4gb,
+                                                   std::string* error_msg) {
   std::unique_ptr<ElfFileImpl<ElfTypes>> elf_file(new ElfFileImpl<ElfTypes>
       (file, (prot & PROT_WRITE) == PROT_WRITE, /*program_header_only*/false,
       /*requested_base*/nullptr));
-  if (!elf_file->Setup(prot, flags, error_msg)) {
+  if (!elf_file->Setup(prot, flags, low_4gb, error_msg)) {
     return nullptr;
   }
   return elf_file.release();
 }
 
 template <typename ElfTypes>
-bool ElfFileImpl<ElfTypes>::Setup(int prot, int flags, std::string* error_msg) {
+bool ElfFileImpl<ElfTypes>::Setup(int prot, int flags, bool low_4gb, std::string* error_msg) {
   int64_t temp_file_length = file_->GetLength();
   if (temp_file_length < 0) {
     errno = -temp_file_length;
@@ -189,8 +115,14 @@
   if (program_header_only_) {
     // first just map ELF header to get program header size information
     size_t elf_header_size = sizeof(Elf_Ehdr);
-    if (!SetMap(MemMap::MapFile(elf_header_size, prot, flags, file_->Fd(), 0,
-                                file_->GetPath().c_str(), error_msg),
+    if (!SetMap(MemMap::MapFile(elf_header_size,
+                                prot,
+                                flags,
+                                file_->Fd(),
+                                0,
+                                low_4gb,
+                                file_->GetPath().c_str(),
+                                error_msg),
                 error_msg)) {
       return false;
     }
@@ -202,16 +134,28 @@
                                 sizeof(Elf_Ehdr), file_->GetPath().c_str());
       return false;
     }
-    if (!SetMap(MemMap::MapFile(program_header_size, prot, flags, file_->Fd(), 0,
-                                file_->GetPath().c_str(), error_msg),
+    if (!SetMap(MemMap::MapFile(program_header_size,
+                                prot,
+                                flags,
+                                file_->Fd(),
+                                0,
+                                low_4gb,
+                                file_->GetPath().c_str(),
+                                error_msg),
                 error_msg)) {
       *error_msg = StringPrintf("Failed to map ELF program headers: %s", error_msg->c_str());
       return false;
     }
   } else {
     // otherwise map entire file
-    if (!SetMap(MemMap::MapFile(file_->GetLength(), prot, flags, file_->Fd(), 0,
-                                file_->GetPath().c_str(), error_msg),
+    if (!SetMap(MemMap::MapFile(file_->GetLength(),
+                                prot,
+                                flags,
+                                file_->Fd(),
+                                0,
+                                low_4gb,
+                                file_->GetPath().c_str(),
+                                error_msg),
                 error_msg)) {
       *error_msg = StringPrintf("Failed to map ELF file: %s", error_msg->c_str());
       return false;
@@ -332,10 +276,6 @@
   STLDeleteElements(&segments_);
   delete symtab_symbol_table_;
   delete dynsym_symbol_table_;
-  delete jit_elf_image_;
-  if (jit_gdb_entry_) {
-    UnregisterCodeEntry(jit_gdb_entry_);
-  }
 }
 
 template <typename ElfTypes>
@@ -1124,7 +1064,7 @@
 }
 
 template <typename ElfTypes>
-bool ElfFileImpl<ElfTypes>::Load(bool executable, std::string* error_msg) {
+bool ElfFileImpl<ElfTypes>::Load(bool executable, bool low_4gb, std::string* error_msg) {
   CHECK(program_header_only_) << file_->GetPath();
 
   if (executable) {
@@ -1190,7 +1130,10 @@
       }
       std::unique_ptr<MemMap> reserve(MemMap::MapAnonymous(reservation_name.c_str(),
                                                            reserve_base_override,
-                                                           loaded_size, PROT_NONE, false, false,
+                                                           loaded_size,
+                                                           PROT_NONE,
+                                                           low_4gb,
+                                                           false,
                                                            error_msg));
       if (reserve.get() == nullptr) {
         *error_msg = StringPrintf("Failed to allocate %s: %s",
@@ -1258,9 +1201,12 @@
       std::unique_ptr<MemMap> segment(
           MemMap::MapFileAtAddress(p_vaddr,
                                    program_header->p_filesz,
-                                   prot, flags, file_->Fd(),
+                                   prot,
+                                   flags,
+                                   file_->Fd(),
                                    program_header->p_offset,
-                                   true,  // implies MAP_FIXED
+                                   /*low4_gb*/false,
+                                   /*reuse*/true,  // implies MAP_FIXED
                                    file_->GetPath().c_str(),
                                    error_msg));
       if (segment.get() == nullptr) {
@@ -1356,11 +1302,6 @@
     return false;
   }
 
-  // Use GDB JIT support to do stack backtrace, etc.
-  if (executable) {
-    GdbJITSupport();
-  }
-
   return true;
 }
 
@@ -1451,50 +1392,6 @@
 }
 
 template <typename ElfTypes>
-void ElfFileImpl<ElfTypes>::GdbJITSupport() {
-  // We only get here if we only are mapping the program header.
-  DCHECK(program_header_only_);
-
-  // Well, we need the whole file to do this.
-  std::string error_msg;
-  // Make it MAP_PRIVATE so we can just give it to gdb if all the necessary
-  // sections are there.
-  std::unique_ptr<ElfFileImpl<ElfTypes>> all_ptr(
-      Open(const_cast<File*>(file_), PROT_READ | PROT_WRITE, MAP_PRIVATE, &error_msg));
-  if (all_ptr.get() == nullptr) {
-    return;
-  }
-  ElfFileImpl<ElfTypes>& all = *all_ptr;
-
-  // We need the eh_frame for gdb but debug info might be present without it.
-  const Elf_Shdr* eh_frame = all.FindSectionByName(".eh_frame");
-  if (eh_frame == nullptr) {
-    return;
-  }
-
-  // Do we have interesting sections?
-  // We need to add in a strtab and symtab to the image.
-  // all is MAP_PRIVATE so it can be written to freely.
-  // We also already have strtab and symtab so we are fine there.
-  Elf_Ehdr& elf_hdr = all.GetHeader();
-  elf_hdr.e_entry = 0;
-  elf_hdr.e_phoff = 0;
-  elf_hdr.e_phnum = 0;
-  elf_hdr.e_phentsize = 0;
-  elf_hdr.e_type = ET_EXEC;
-
-  // Since base_address_ is 0 if we are actually loaded at a known address (i.e. this is boot.oat)
-  // and the actual address stuff starts at in regular files this is good.
-  if (!all.FixupDebugSections(reinterpret_cast<intptr_t>(base_address_))) {
-    LOG(ERROR) << "Failed to load GDB data";
-    return;
-  }
-
-  jit_gdb_entry_ = CreateCodeEntry(all.Begin(), all.Size());
-  gdb_file_mapping_.reset(all_ptr.release());
-}
-
-template <typename ElfTypes>
 bool ElfFileImpl<ElfTypes>::Strip(std::string* error_msg) {
   // ELF files produced by MCLinker look roughly like this
   //
@@ -1768,28 +1665,46 @@
   CHECK_NE(elf32_.get() == nullptr, elf64_.get() == nullptr);
 }
 
-ElfFile* ElfFile::Open(File* file, bool writable, bool program_header_only, std::string* error_msg,
+ElfFile* ElfFile::Open(File* file,
+                       bool writable,
+                       bool program_header_only,
+                       bool low_4gb,
+                       std::string* error_msg,
                        uint8_t* requested_base) {
   if (file->GetLength() < EI_NIDENT) {
     *error_msg = StringPrintf("File %s is too short to be a valid ELF file",
                               file->GetPath().c_str());
     return nullptr;
   }
-  std::unique_ptr<MemMap> map(MemMap::MapFile(EI_NIDENT, PROT_READ, MAP_PRIVATE, file->Fd(), 0,
-                                              file->GetPath().c_str(), error_msg));
+  std::unique_ptr<MemMap> map(MemMap::MapFile(EI_NIDENT,
+                                              PROT_READ,
+                                              MAP_PRIVATE,
+                                              file->Fd(),
+                                              0,
+                                              low_4gb,
+                                              file->GetPath().c_str(),
+                                              error_msg));
   if (map == nullptr && map->Size() != EI_NIDENT) {
     return nullptr;
   }
   uint8_t* header = map->Begin();
   if (header[EI_CLASS] == ELFCLASS64) {
-    ElfFileImpl64* elf_file_impl = ElfFileImpl64::Open(file, writable, program_header_only,
-                                                       error_msg, requested_base);
+    ElfFileImpl64* elf_file_impl = ElfFileImpl64::Open(file,
+                                                       writable,
+                                                       program_header_only,
+                                                       low_4gb,
+                                                       error_msg,
+                                                       requested_base);
     if (elf_file_impl == nullptr)
       return nullptr;
     return new ElfFile(elf_file_impl);
   } else if (header[EI_CLASS] == ELFCLASS32) {
-    ElfFileImpl32* elf_file_impl = ElfFileImpl32::Open(file, writable, program_header_only,
-                                                       error_msg, requested_base);
+    ElfFileImpl32* elf_file_impl = ElfFileImpl32::Open(file,
+                                                       writable,
+                                                       program_header_only,
+                                                       low_4gb,
+                                                       error_msg,
+                                                       requested_base);
     if (elf_file_impl == nullptr) {
       return nullptr;
     }
@@ -1804,25 +1719,41 @@
 }
 
 ElfFile* ElfFile::Open(File* file, int mmap_prot, int mmap_flags, std::string* error_msg) {
+  // low_4gb support not required for this path.
+  constexpr bool low_4gb = false;
   if (file->GetLength() < EI_NIDENT) {
     *error_msg = StringPrintf("File %s is too short to be a valid ELF file",
                               file->GetPath().c_str());
     return nullptr;
   }
-  std::unique_ptr<MemMap> map(MemMap::MapFile(EI_NIDENT, PROT_READ, MAP_PRIVATE, file->Fd(), 0,
-                                              file->GetPath().c_str(), error_msg));
+  std::unique_ptr<MemMap> map(MemMap::MapFile(EI_NIDENT,
+                                              PROT_READ,
+                                              MAP_PRIVATE,
+                                              file->Fd(),
+                                              0,
+                                              low_4gb,
+                                              file->GetPath().c_str(),
+                                              error_msg));
   if (map == nullptr && map->Size() != EI_NIDENT) {
     return nullptr;
   }
   uint8_t* header = map->Begin();
   if (header[EI_CLASS] == ELFCLASS64) {
-    ElfFileImpl64* elf_file_impl = ElfFileImpl64::Open(file, mmap_prot, mmap_flags, error_msg);
+    ElfFileImpl64* elf_file_impl = ElfFileImpl64::Open(file,
+                                                       mmap_prot,
+                                                       mmap_flags,
+                                                       low_4gb,
+                                                       error_msg);
     if (elf_file_impl == nullptr) {
       return nullptr;
     }
     return new ElfFile(elf_file_impl);
   } else if (header[EI_CLASS] == ELFCLASS32) {
-    ElfFileImpl32* elf_file_impl = ElfFileImpl32::Open(file, mmap_prot, mmap_flags, error_msg);
+    ElfFileImpl32* elf_file_impl = ElfFileImpl32::Open(file,
+                                                       mmap_prot,
+                                                       mmap_flags,
+                                                       low_4gb,
+                                                       error_msg);
     if (elf_file_impl == nullptr) {
       return nullptr;
     }
@@ -1844,8 +1775,8 @@
     return elf32_->func(__VA_ARGS__); \
   }
 
-bool ElfFile::Load(bool executable, std::string* error_msg) {
-  DELEGATE_TO_IMPL(Load, executable, error_msg);
+bool ElfFile::Load(bool executable, bool low_4gb, std::string* error_msg) {
+  DELEGATE_TO_IMPL(Load, executable, low_4gb, error_msg);
 }
 
 const uint8_t* ElfFile::FindDynamicSymbolAddress(const std::string& symbol_name) const {
@@ -1899,6 +1830,14 @@
   }
 }
 
+bool ElfFile::HasSection(const std::string& name) const {
+  if (elf64_.get() != nullptr) {
+    return elf64_->FindSectionByName(name) != nullptr;
+  } else {
+    return elf32_->FindSectionByName(name) != nullptr;
+  }
+}
+
 uint64_t ElfFile::FindSymbolAddress(unsigned section_type,
                                     const std::string& symbol_name,
                                     bool build_map) {
@@ -1910,7 +1849,7 @@
 }
 
 bool ElfFile::Strip(File* file, std::string* error_msg) {
-  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, true, false, error_msg));
+  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, true, false, /*low_4gb*/false, error_msg));
   if (elf_file.get() == nullptr) {
     return false;
   }
diff --git a/runtime/elf_file.h b/runtime/elf_file.h
index 1188c97..c3616f7 100644
--- a/runtime/elf_file.h
+++ b/runtime/elf_file.h
@@ -38,15 +38,22 @@
 // ELFObjectFile.
 class ElfFile {
  public:
-  static ElfFile* Open(File* file, bool writable, bool program_header_only, std::string* error_msg,
+  static ElfFile* Open(File* file,
+                       bool writable,
+                       bool program_header_only,
+                       bool low_4gb,
+                       std::string* error_msg,
                        uint8_t* requested_base = nullptr);  // TODO: move arg to before error_msg.
   // Open with specific mmap flags, Always maps in the whole file, not just the
   // program header sections.
-  static ElfFile* Open(File* file, int mmap_prot, int mmap_flags, std::string* error_msg);
+  static ElfFile* Open(File* file,
+                       int mmap_prot,
+                       int mmap_flags,
+                       std::string* error_msg);
   ~ElfFile();
 
   // Load segments into memory based on PT_LOAD program headers
-  bool Load(bool executable, std::string* error_msg);
+  bool Load(bool executable, bool low_4gb, std::string* error_msg);
 
   const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name) const;
 
@@ -62,6 +69,8 @@
 
   bool GetSectionOffsetAndSize(const char* section_name, uint64_t* offset, uint64_t* size) const;
 
+  bool HasSection(const std::string& name) const;
+
   uint64_t FindSymbolAddress(unsigned section_type,
                              const std::string& symbol_name,
                              bool build_map);
diff --git a/runtime/elf_file_impl.h b/runtime/elf_file_impl.h
index 0f466bd..1cdbedc 100644
--- a/runtime/elf_file_impl.h
+++ b/runtime/elf_file_impl.h
@@ -48,9 +48,17 @@
   using Elf_Phdr = typename ElfTypes::Phdr;
   using Elf_Dyn = typename ElfTypes::Dyn;
 
-  static ElfFileImpl* Open(File* file, bool writable, bool program_header_only,
-                           std::string* error_msg, uint8_t* requested_base = nullptr);
-  static ElfFileImpl* Open(File* file, int mmap_prot, int mmap_flags, std::string* error_msg);
+  static ElfFileImpl* Open(File* file,
+                           bool writable,
+                           bool program_header_only,
+                           bool low_4gb,
+                           std::string* error_msg,
+                           uint8_t* requested_base = nullptr);
+  static ElfFileImpl* Open(File* file,
+                           int mmap_prot,
+                           int mmap_flags,
+                           bool low_4gb,
+                           std::string* error_msg);
   ~ElfFileImpl();
 
   const File& GetFile() const {
@@ -111,7 +119,7 @@
 
   // Load segments into memory based on PT_LOAD program headers.
   // executable is true at run time, false at compile time.
-  bool Load(bool executable, std::string* error_msg);
+  bool Load(bool executable, bool low_4gb, std::string* error_msg);
 
   bool Fixup(Elf_Addr base_address);
   bool FixupDynamic(Elf_Addr base_address);
@@ -129,7 +137,7 @@
  private:
   ElfFileImpl(File* file, bool writable, bool program_header_only, uint8_t* requested_base);
 
-  bool Setup(int prot, int flags, std::string* error_msg);
+  bool Setup(int prot, int flags, bool low_4gb, std::string* error_msg);
 
   bool SetMap(MemMap* map, std::string* error_msg);
 
@@ -213,12 +221,6 @@
   SymbolTable* symtab_symbol_table_;
   SymbolTable* dynsym_symbol_table_;
 
-  // Support for GDB JIT
-  uint8_t* jit_elf_image_;
-  JITCodeEntry* jit_gdb_entry_;
-  std::unique_ptr<ElfFileImpl<ElfTypes>> gdb_file_mapping_;
-  void GdbJITSupport();
-
   // Override the 'base' p_vaddr in the first LOAD segment with this value (if non-null).
   uint8_t* requested_base_;
 
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 21e4e44..08fec91 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -19,7 +19,8 @@
 
 #include "entrypoint_utils.h"
 
-#include "art_method.h"
+#include "art_method-inl.h"
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "common_throws.h"
 #include "dex_file.h"
@@ -41,35 +42,82 @@
 
 inline ArtMethod* GetResolvedMethod(ArtMethod* outer_method,
                                     const InlineInfo& inline_info,
+                                    const InlineInfoEncoding& encoding,
                                     uint8_t inlining_depth)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
-  uint32_t method_index = inline_info.GetMethodIndexAtDepth(inlining_depth);
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  // This method is being used by artQuickResolutionTrampoline, before it sets up
+  // the passed parameters in a GC friendly way. Therefore we must never be
+  // suspended while executing it.
+  ScopedAssertNoThreadSuspension sants(Thread::Current(), __FUNCTION__);
+
+  uint32_t method_index = inline_info.GetMethodIndexAtDepth(encoding, inlining_depth);
   InvokeType invoke_type = static_cast<InvokeType>(
-        inline_info.GetInvokeTypeAtDepth(inlining_depth));
-  ArtMethod* caller = outer_method->GetDexCacheResolvedMethod(method_index, sizeof(void*));
-  if (!caller->IsRuntimeMethod()) {
-    return caller;
+        inline_info.GetInvokeTypeAtDepth(encoding, inlining_depth));
+  ArtMethod* inlined_method = outer_method->GetDexCacheResolvedMethod(method_index,
+                                                                      kRuntimePointerSize);
+  if (!inlined_method->IsRuntimeMethod()) {
+    return inlined_method;
   }
 
-  // The method in the dex cache can be the runtime method responsible for invoking
+  // The method in the dex cache is the runtime method responsible for invoking
   // the stub that will then update the dex cache. Therefore, we need to do the
   // resolution ourselves.
 
-  // We first find the class loader of our caller. If it is the outer method, we can directly
-  // use its class loader. Otherwise, we also need to resolve our caller.
-  StackHandleScope<2> hs(Thread::Current());
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  MutableHandle<mirror::ClassLoader> class_loader(hs.NewHandle<mirror::Class>(nullptr));
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(outer_method->GetDexCache()));
-  if (inlining_depth == 0) {
-    class_loader.Assign(outer_method->GetClassLoader());
+  // We first find the dex cache of our caller. If it is the outer method, we can directly
+  // use its dex cache. Otherwise, we also need to resolve our caller.
+  ArtMethod* caller = outer_method;
+  if (inlining_depth != 0) {
+    caller = GetResolvedMethod(outer_method,
+                               inline_info,
+                               encoding,
+                               inlining_depth - 1);
+  }
+  DCHECK_EQ(caller->GetDexCache(), outer_method->GetDexCache())
+      << "Compiler only supports inlining calls within the same dex cache";
+  const DexFile* dex_file = outer_method->GetDexFile();
+  const DexFile::MethodId& method_id = dex_file->GetMethodId(method_index);
+
+  if (inline_info.GetDexPcAtDepth(encoding, inlining_depth) == static_cast<uint32_t>(-1)) {
+    // "charAt" special case. It is the only non-leaf method we inline across dex files.
+    if (kIsDebugBuild) {
+      const char* name = dex_file->StringDataByIdx(method_id.name_idx_);
+      DCHECK_EQ(std::string(name), "charAt");
+      DCHECK_EQ(std::string(dex_file->GetMethodShorty(method_id)), "CI")
+          << std::string(dex_file->GetMethodShorty(method_id));
+      DCHECK_EQ(std::string(dex_file->StringByTypeIdx(method_id.class_idx_)), "Ljava/lang/String;")
+          << std::string(dex_file->StringByTypeIdx(method_id.class_idx_));
+    }
+    mirror::Class* cls =
+        Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kJavaLangString);
+    // Update the dex cache for future lookups.
+    caller->GetDexCache()->SetResolvedType(method_id.class_idx_, cls);
+    inlined_method = cls->FindVirtualMethod("charAt", "(I)C", kRuntimePointerSize);
   } else {
-    caller = GetResolvedMethod(outer_method, inline_info, inlining_depth - 1);
-    class_loader.Assign(caller->GetClassLoader());
+    mirror::Class* klass = caller->GetDexCache()->GetResolvedType(method_id.class_idx_);
+    DCHECK_EQ(klass->GetDexCache(), caller->GetDexCache())
+        << "Compiler only supports inlining calls within the same dex cache";
+    switch (invoke_type) {
+      case kDirect:
+      case kStatic:
+        inlined_method =
+            klass->FindDirectMethod(klass->GetDexCache(), method_index, kRuntimePointerSize);
+        break;
+      case kSuper:
+      case kVirtual:
+        inlined_method =
+            klass->FindVirtualMethod(klass->GetDexCache(), method_index, kRuntimePointerSize);
+        break;
+      default:
+        LOG(FATAL) << "Unimplemented inlined invocation type: " << invoke_type;
+        UNREACHABLE();
+    }
   }
 
-  return class_linker->ResolveMethod(
-      *outer_method->GetDexFile(), method_index, dex_cache, class_loader, nullptr, invoke_type);
+  // Update the dex cache for future lookups. Note that for static methods, this is safe
+  // when the class is being initialized, as the entrypoint for the ArtMethod is at
+  // this point still the resolution trampoline.
+  outer_method->SetDexCacheResolvedMethod(method_index, inlined_method, kRuntimePointerSize);
+  return inlined_method;
 }
 
 inline ArtMethod* GetCalleeSaveMethodCaller(Thread* self, Runtime::CalleeSaveType type)
@@ -84,7 +132,7 @@
                                        ArtMethod* method,
                                        Thread* self, bool* slow_path) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  size_t pointer_size = class_linker->GetImagePointerSize();
+  PointerSize pointer_size = class_linker->GetImagePointerSize();
   mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx, pointer_size);
   if (UNLIKELY(klass == nullptr)) {
     klass = class_linker->ResolveType(type_idx, method);
@@ -102,6 +150,11 @@
       *slow_path = true;
       return nullptr;  // Failure
     }
+    if (UNLIKELY(klass->IsClassClass())) {
+      ThrowIllegalAccessError(nullptr, "Class %s is inaccessible", PrettyDescriptor(klass).c_str());
+      *slow_path = true;
+      return nullptr;  // Failure
+    }
     mirror::Class* referrer = method->GetDeclaringClass();
     if (UNLIKELY(!referrer->CanAccess(klass))) {
       ThrowIllegalAccessErrorClass(referrer, klass);
@@ -173,7 +226,10 @@
     if (klass == nullptr) {
       return nullptr;
     }
-    return klass->Alloc<kInstrumented>(self, Runtime::Current()->GetHeap()->GetCurrentAllocator());
+    // CheckObjectAlloc can cause thread suspension which means we may now be instrumented.
+    return klass->Alloc</*kInstrumented*/true>(
+        self,
+        Runtime::Current()->GetHeap()->GetCurrentAllocator());
   }
   DCHECK(klass != nullptr);
   return klass->Alloc<kInstrumented>(self, allocator_type);
@@ -193,10 +249,12 @@
       return nullptr;
     }
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    // Pass in false since the object can not be finalizable.
-    return klass->Alloc<kInstrumented, false>(self, heap->GetCurrentAllocator());
+    // Pass in false since the object cannot be finalizable.
+    // CheckClassInitializedForObjectAlloc can cause thread suspension which means we may now be
+    // instrumented.
+    return klass->Alloc</*kInstrumented*/true, false>(self, heap->GetCurrentAllocator());
   }
-  // Pass in false since the object can not be finalizable.
+  // Pass in false since the object cannot be finalizable.
   return klass->Alloc<kInstrumented, false>(self, allocator_type);
 }
 
@@ -207,7 +265,7 @@
                                                       Thread* self,
                                                       gc::AllocatorType allocator_type) {
   DCHECK(klass != nullptr);
-  // Pass in false since the object can not be finalizable.
+  // Pass in false since the object cannot be finalizable.
   return klass->Alloc<kInstrumented, false>(self, allocator_type);
 }
 
@@ -224,7 +282,7 @@
     return nullptr;  // Failure
   }
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  size_t pointer_size = class_linker->GetImagePointerSize();
+  PointerSize pointer_size = class_linker->GetImagePointerSize();
   mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx, pointer_size);
   if (UNLIKELY(klass == nullptr)) {  // Not in dex cache so try to resolve
     klass = class_linker->ResolveType(type_idx, method);
@@ -265,9 +323,12 @@
       return nullptr;
     }
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
-                                               klass->GetComponentSizeShift(),
-                                               heap->GetCurrentAllocator());
+    // CheckArrayAlloc can cause thread suspension which means we may now be instrumented.
+    return mirror::Array::Alloc</*kInstrumented*/true>(self,
+                                                       klass,
+                                                       component_count,
+                                                       klass->GetComponentSizeShift(),
+                                                       heap->GetCurrentAllocator());
   }
   return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
                                              klass->GetComponentSizeShift(), allocator_type);
@@ -299,8 +360,10 @@
 }
 
 template<FindFieldType type, bool access_check>
-inline ArtField* FindFieldFromCode(uint32_t field_idx, ArtMethod* referrer,
-                                           Thread* self, size_t expected_size) {
+inline ArtField* FindFieldFromCode(uint32_t field_idx,
+                                   ArtMethod* referrer,
+                                   Thread* self,
+                                   size_t expected_size) REQUIRES(!Roles::uninterruptible_) {
   bool is_primitive;
   bool is_set;
   bool is_static;
@@ -316,7 +379,31 @@
     default:                     is_primitive = true;  is_set = true;  is_static = true;  break;
   }
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  ArtField* resolved_field = class_linker->ResolveField(field_idx, referrer, is_static);
+
+  ArtField* resolved_field;
+  if (access_check) {
+    // Slow path: According to JLS 13.4.8, a linkage error may occur if a compile-time
+    // qualifying type of a field and the resolved run-time qualifying type of a field differed
+    // in their static-ness.
+    //
+    // In particular, don't assume the dex instruction already correctly knows if the
+    // real field is static or not. The resolution must not be aware of this.
+    ArtMethod* method = referrer->GetInterfaceMethodIfProxy(kRuntimePointerSize);
+
+    StackHandleScope<2> hs(self);
+    Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(method->GetDexCache()));
+    Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(method->GetClassLoader()));
+
+    resolved_field = class_linker->ResolveFieldJLS(*method->GetDexFile(),
+                                                   field_idx,
+                                                   h_dex_cache,
+                                                   h_class_loader);
+  } else {
+    // Fast path: Verifier already would've called ResolveFieldJLS and we wouldn't
+    // be executing here if there was a static/non-static mismatch.
+    resolved_field = class_linker->ResolveField(field_idx, referrer, is_static);
+  }
+
   if (UNLIKELY(resolved_field == nullptr)) {
     DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
     return nullptr;  // Failure.
@@ -401,22 +488,21 @@
     mirror::Object* null_this = nullptr;
     HandleWrapper<mirror::Object> h_this(
         hs.NewHandleWrapper(type == kStatic ? &null_this : this_object));
-    resolved_method = class_linker->ResolveMethod(self, method_idx, referrer, type);
+    constexpr ClassLinker::ResolveMode resolve_mode =
+        access_check ? ClassLinker::kForceICCECheck
+                     : ClassLinker::kNoICCECheckForCache;
+    resolved_method = class_linker->ResolveMethod<resolve_mode>(self, method_idx, referrer, type);
   }
+  // Resolution and access check.
   if (UNLIKELY(resolved_method == nullptr)) {
     DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
     return nullptr;  // Failure.
-  } else if (UNLIKELY(*this_object == nullptr && type != kStatic)) {
-    // Maintain interpreter-like semantics where NullPointerException is thrown
-    // after potential NoSuchMethodError from class linker.
-    ThrowNullPointerExceptionForMethodAccess(method_idx, type);
-    return nullptr;  // Failure.
   } else if (access_check) {
     mirror::Class* methods_class = resolved_method->GetDeclaringClass();
-    mirror::Class* referring_class = referrer->GetDeclaringClass();
     bool can_access_resolved_method =
-        referring_class->CheckResolvedMethodAccess<type>(methods_class, resolved_method,
-                                                         method_idx);
+        referrer->GetDeclaringClass()->CheckResolvedMethodAccess<type>(methods_class,
+                                                                       resolved_method,
+                                                                       method_idx);
     if (UNLIKELY(!can_access_resolved_method)) {
       DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
       return nullptr;  // Failure.
@@ -428,6 +514,22 @@
       return nullptr;  // Failure.
     }
   }
+  // Next, null pointer check.
+  if (UNLIKELY(*this_object == nullptr && type != kStatic)) {
+    if (UNLIKELY(resolved_method->GetDeclaringClass()->IsStringClass() &&
+                 resolved_method->IsConstructor())) {
+      // Hack for String init:
+      //
+      // We assume that the input of String.<init> in verified code is always
+      // an unitialized reference. If it is a null constant, it must have been
+      // optimized out by the compiler. Do not throw NullPointerException.
+    } else {
+      // Maintain interpreter-like semantics where NullPointerException is thrown
+      // after potential NoSuchMethodError from class linker.
+      ThrowNullPointerExceptionForMethodAccess(method_idx, type);
+      return nullptr;  // Failure.
+    }
+  }
   switch (type) {
     case kStatic:
     case kDirect:
@@ -447,29 +549,69 @@
       return klass->GetVTableEntry(vtable_index, class_linker->GetImagePointerSize());
     }
     case kSuper: {
-      mirror::Class* super_class = referrer->GetDeclaringClass()->GetSuperClass();
-      uint16_t vtable_index = resolved_method->GetMethodIndex();
-      if (access_check) {
-        // Check existence of super class.
-        if (super_class == nullptr || !super_class->HasVTable() ||
-            vtable_index >= static_cast<uint32_t>(super_class->GetVTableLength())) {
-          // Behavior to agree with that of the verifier.
+      // TODO This lookup is quite slow.
+      // NB This is actually quite tricky to do any other way. We cannot use GetDeclaringClass since
+      //    that will actually not be what we want in some cases where there are miranda methods or
+      //    defaults. What we actually need is a GetContainingClass that says which classes virtuals
+      //    this method is coming from.
+      mirror::Class* referring_class = referrer->GetDeclaringClass();
+      uint16_t method_type_idx = referring_class->GetDexFile().GetMethodId(method_idx).class_idx_;
+      mirror::Class* method_reference_class = class_linker->ResolveType(method_type_idx, referrer);
+      if (UNLIKELY(method_reference_class == nullptr)) {
+        // Bad type idx.
+        CHECK(self->IsExceptionPending());
+        return nullptr;
+      } else if (!method_reference_class->IsInterface()) {
+        // It is not an interface. If the referring class is in the class hierarchy of the
+        // referenced class in the bytecode, we use its super class. Otherwise, we throw
+        // a NoSuchMethodError.
+        mirror::Class* super_class = nullptr;
+        if (method_reference_class->IsAssignableFrom(referring_class)) {
+          super_class = referring_class->GetSuperClass();
+        }
+        uint16_t vtable_index = resolved_method->GetMethodIndex();
+        if (access_check) {
+          // Check existence of super class.
+          if (super_class == nullptr ||
+              !super_class->HasVTable() ||
+              vtable_index >= static_cast<uint32_t>(super_class->GetVTableLength())) {
+            // Behavior to agree with that of the verifier.
+            ThrowNoSuchMethodError(type, resolved_method->GetDeclaringClass(),
+                                   resolved_method->GetName(), resolved_method->GetSignature());
+            return nullptr;  // Failure.
+          }
+        }
+        DCHECK(super_class != nullptr);
+        DCHECK(super_class->HasVTable());
+        return super_class->GetVTableEntry(vtable_index, class_linker->GetImagePointerSize());
+      } else {
+        // It is an interface.
+        if (access_check) {
+          if (!method_reference_class->IsAssignableFrom((*this_object)->GetClass())) {
+            ThrowIncompatibleClassChangeErrorClassForInterfaceSuper(resolved_method,
+                                                                    method_reference_class,
+                                                                    *this_object,
+                                                                    referrer);
+            return nullptr;  // Failure.
+          }
+        }
+        // TODO We can do better than this for a (compiled) fastpath.
+        ArtMethod* result = method_reference_class->FindVirtualMethodForInterfaceSuper(
+            resolved_method, class_linker->GetImagePointerSize());
+        // Throw an NSME if nullptr;
+        if (result == nullptr) {
           ThrowNoSuchMethodError(type, resolved_method->GetDeclaringClass(),
                                  resolved_method->GetName(), resolved_method->GetSignature());
-          return nullptr;  // Failure.
         }
-      } else {
-        // Super class must exist.
-        DCHECK(super_class != nullptr);
+        return result;
       }
-      DCHECK(super_class->HasVTable());
-      return super_class->GetVTableEntry(vtable_index, class_linker->GetImagePointerSize());
     }
     case kInterface: {
-      uint32_t imt_index = resolved_method->GetDexMethodIndex() % mirror::Class::kImtSize;
-      ArtMethod* imt_method = (*this_object)->GetClass()->GetEmbeddedImTableEntry(
-          imt_index, class_linker->GetImagePointerSize());
-      if (!imt_method->IsImtConflictMethod() && !imt_method->IsImtUnimplementedMethod()) {
+      uint32_t imt_index = resolved_method->GetImtIndex();
+      PointerSize pointer_size = class_linker->GetImagePointerSize();
+      ArtMethod* imt_method = (*this_object)->GetClass()->GetImt(pointer_size)->
+          Get(imt_index, pointer_size);
+      if (!imt_method->IsRuntimeMethod()) {
         if (kIsDebugBuild) {
           mirror::Class* klass = (*this_object)->GetClass();
           ArtMethod* method = klass->FindVirtualMethodForInterface(
@@ -520,7 +662,8 @@
 inline ArtField* FindFieldFast(uint32_t field_idx, ArtMethod* referrer, FindFieldType type,
                                size_t expected_size) {
   ArtField* resolved_field =
-      referrer->GetDeclaringClass()->GetDexCache()->GetResolvedField(field_idx, sizeof(void*));
+      referrer->GetDeclaringClass()->GetDexCache()->GetResolvedField(field_idx,
+                                                                     kRuntimePointerSize);
   if (UNLIKELY(resolved_field == nullptr)) {
     return nullptr;
   }
@@ -573,8 +716,9 @@
   if (UNLIKELY(this_object == nullptr && type != kStatic)) {
     return nullptr;
   }
+  mirror::Class* referring_class = referrer->GetDeclaringClass();
   ArtMethod* resolved_method =
-      referrer->GetDeclaringClass()->GetDexCache()->GetResolvedMethod(method_idx, sizeof(void*));
+      referring_class->GetDexCache()->GetResolvedMethod(method_idx, kRuntimePointerSize);
   if (UNLIKELY(resolved_method == nullptr)) {
     return nullptr;
   }
@@ -585,7 +729,6 @@
       return nullptr;
     }
     mirror::Class* methods_class = resolved_method->GetDeclaringClass();
-    mirror::Class* referring_class = referrer->GetDeclaringClass();
     if (UNLIKELY(!referring_class->CanAccess(methods_class) ||
                  !referring_class->CanAccessMember(methods_class,
                                                    resolved_method->GetAccessFlags()))) {
@@ -594,16 +737,39 @@
     }
   }
   if (type == kInterface) {  // Most common form of slow path dispatch.
-    return this_object->GetClass()->FindVirtualMethodForInterface(resolved_method, sizeof(void*));
+    return this_object->GetClass()->FindVirtualMethodForInterface(resolved_method,
+                                                                  kRuntimePointerSize);
   } else if (type == kStatic || type == kDirect) {
     return resolved_method;
   } else if (type == kSuper) {
-    return referrer->GetDeclaringClass()->GetSuperClass()->GetVTableEntry(
-        resolved_method->GetMethodIndex(), sizeof(void*));
+    // TODO This lookup is rather slow.
+    uint16_t method_type_idx = referring_class->GetDexFile().GetMethodId(method_idx).class_idx_;
+    mirror::Class* method_reference_class =
+        referring_class->GetDexCache()->GetResolvedType(method_type_idx);
+    if (method_reference_class == nullptr) {
+      // Need to do full type resolution...
+      return nullptr;
+    } else if (!method_reference_class->IsInterface()) {
+      // It is not an interface. If the referring class is in the class hierarchy of the
+      // referenced class in the bytecode, we use its super class. Otherwise, we cannot
+      // resolve the method.
+      if (!method_reference_class->IsAssignableFrom(referring_class)) {
+        return nullptr;
+      }
+      mirror::Class* super_class = referring_class->GetSuperClass();
+      if (resolved_method->GetMethodIndex() >= super_class->GetVTableLength()) {
+        // The super class does not have the method.
+        return nullptr;
+      }
+      return super_class->GetVTableEntry(resolved_method->GetMethodIndex(), kRuntimePointerSize);
+    } else {
+      return method_reference_class->FindVirtualMethodForInterfaceSuper(
+          resolved_method, kRuntimePointerSize);
+    }
   } else {
     DCHECK(type == kVirtual);
     return this_object->GetClass()->GetVTableEntry(
-        resolved_method->GetMethodIndex(), sizeof(void*));
+        resolved_method->GetMethodIndex(), kRuntimePointerSize);
   }
 }
 
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index e57569e..fd1c02f 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -18,6 +18,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/mutex.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
@@ -33,7 +34,6 @@
 #include "oat_quick_method_header.h"
 #include "reflection.h"
 #include "scoped_thread_state_change.h"
-#include "ScopedLocalRef.h"
 #include "well_known_classes.h"
 
 namespace art {
@@ -49,7 +49,7 @@
     return nullptr;  // Failure
   }
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  size_t pointer_size = class_linker->GetImagePointerSize();
+  PointerSize pointer_size = class_linker->GetImagePointerSize();
   mirror::Class* klass = referrer->GetDexCacheResolvedType<false>(type_idx, pointer_size);
   if (UNLIKELY(klass == nullptr)) {  // Not in dex cache so try to resolve
     klass = class_linker->ResolveType(type_idx, referrer);
@@ -120,109 +120,13 @@
                                     heap->GetCurrentAllocator());
 }
 
-void ThrowStackOverflowError(Thread* self) {
-  if (self->IsHandlingStackOverflow()) {
-    LOG(ERROR) << "Recursive stack overflow.";
-    // We don't fail here because SetStackEndForStackOverflow will print better diagnostics.
-  }
-
-  self->SetStackEndForStackOverflow();  // Allow space on the stack for constructor to execute.
-  JNIEnvExt* env = self->GetJniEnv();
-  std::string msg("stack size ");
-  msg += PrettySize(self->GetStackSize());
-
-  // Avoid running Java code for exception initialization.
-  // TODO: Checks to make this a bit less brittle.
-
-  std::string error_msg;
-
-  // Allocate an uninitialized object.
-  ScopedLocalRef<jobject> exc(env,
-                              env->AllocObject(WellKnownClasses::java_lang_StackOverflowError));
-  if (exc.get() != nullptr) {
-    // "Initialize".
-    // StackOverflowError -> VirtualMachineError -> Error -> Throwable -> Object.
-    // Only Throwable has "custom" fields:
-    //   String detailMessage.
-    //   Throwable cause (= this).
-    //   List<Throwable> suppressedExceptions (= Collections.emptyList()).
-    //   Object stackState;
-    //   StackTraceElement[] stackTrace;
-    // Only Throwable has a non-empty constructor:
-    //   this.stackTrace = EmptyArray.STACK_TRACE_ELEMENT;
-    //   fillInStackTrace();
-
-    // detailMessage.
-    // TODO: Use String::FromModifiedUTF...?
-    ScopedLocalRef<jstring> s(env, env->NewStringUTF(msg.c_str()));
-    if (s.get() != nullptr) {
-      env->SetObjectField(exc.get(), WellKnownClasses::java_lang_Throwable_detailMessage, s.get());
-
-      // cause.
-      env->SetObjectField(exc.get(), WellKnownClasses::java_lang_Throwable_cause, exc.get());
-
-      // suppressedExceptions.
-      ScopedLocalRef<jobject> emptylist(env, env->GetStaticObjectField(
-          WellKnownClasses::java_util_Collections,
-          WellKnownClasses::java_util_Collections_EMPTY_LIST));
-      CHECK(emptylist.get() != nullptr);
-      env->SetObjectField(exc.get(),
-                          WellKnownClasses::java_lang_Throwable_suppressedExceptions,
-                          emptylist.get());
-
-      // stackState is set as result of fillInStackTrace. fillInStackTrace calls
-      // nativeFillInStackTrace.
-      ScopedLocalRef<jobject> stack_state_val(env, nullptr);
-      {
-        ScopedObjectAccessUnchecked soa(env);
-        stack_state_val.reset(soa.Self()->CreateInternalStackTrace<false>(soa));
-      }
-      if (stack_state_val.get() != nullptr) {
-        env->SetObjectField(exc.get(),
-                            WellKnownClasses::java_lang_Throwable_stackState,
-                            stack_state_val.get());
-
-        // stackTrace.
-        ScopedLocalRef<jobject> stack_trace_elem(env, env->GetStaticObjectField(
-            WellKnownClasses::libcore_util_EmptyArray,
-            WellKnownClasses::libcore_util_EmptyArray_STACK_TRACE_ELEMENT));
-        env->SetObjectField(exc.get(),
-                            WellKnownClasses::java_lang_Throwable_stackTrace,
-                            stack_trace_elem.get());
-      } else {
-        error_msg = "Could not create stack trace.";
-      }
-      // Throw the exception.
-      self->SetException(reinterpret_cast<mirror::Throwable*>(self->DecodeJObject(exc.get())));
-    } else {
-      // Could not allocate a string object.
-      error_msg = "Couldn't throw new StackOverflowError because JNI NewStringUTF failed.";
-    }
-  } else {
-    error_msg = "Could not allocate StackOverflowError object.";
-  }
-
-  if (!error_msg.empty()) {
-    LOG(WARNING) << error_msg;
-    CHECK(self->IsExceptionPending());
-  }
-
-  bool explicit_overflow_check = Runtime::Current()->ExplicitStackOverflowChecks();
-  self->ResetDefaultStackEnd();  // Return to default stack size.
-
-  // And restore protection if implicit checks are on.
-  if (!explicit_overflow_check) {
-    self->ProtectStack();
-  }
-}
-
 void CheckReferenceResult(mirror::Object* o, Thread* self) {
   if (o == nullptr) {
     return;
   }
   // Make sure that the result is an instance of the type this method was expected to return.
   mirror::Class* return_type = self->GetCurrentMethod(nullptr)->GetReturnType(true /* resolve */,
-                                                                              sizeof(void*));
+                                                                              kRuntimePointerSize);
 
   if (!o->InstanceOf(return_type)) {
     Runtime::Current()->GetJavaVM()->JniAbortF(nullptr,
@@ -285,7 +189,7 @@
       StackHandleScope<1> hs(soa.Self());
       auto h_interface_method(hs.NewHandle(soa.Decode<mirror::Method*>(interface_method_jobj)));
       // This can cause thread suspension.
-      size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+      PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
       mirror::Class* result_type =
           h_interface_method->GetArtMethod()->GetReturnType(true /* resolve */, pointer_size);
       mirror::Object* result_ref = soa.Decode<mirror::Object*>(result);
@@ -305,12 +209,14 @@
       mirror::Class* proxy_class = rcvr->GetClass();
       mirror::Method* interface_method = soa.Decode<mirror::Method*>(interface_method_jobj);
       ArtMethod* proxy_method = rcvr->GetClass()->FindVirtualMethodForInterface(
-          interface_method->GetArtMethod(), sizeof(void*));
-      auto* virtual_methods = proxy_class->GetVirtualMethodsPtr();
+          interface_method->GetArtMethod(), kRuntimePointerSize);
+      auto virtual_methods = proxy_class->GetVirtualMethodsSlice(kRuntimePointerSize);
       size_t num_virtuals = proxy_class->NumVirtualMethods();
-      size_t method_size = ArtMethod::Size(sizeof(void*));
+      size_t method_size = ArtMethod::Size(kRuntimePointerSize);
+      // Rely on the fact that the methods are contiguous to determine the index of the method in
+      // the slice.
       int throws_index = (reinterpret_cast<uintptr_t>(proxy_method) -
-          reinterpret_cast<uintptr_t>(virtual_methods)) / method_size;
+          reinterpret_cast<uintptr_t>(&virtual_methods.At(0))) / method_size;
       CHECK_LT(throws_index, static_cast<int>(num_virtuals));
       mirror::ObjectArray<mirror::Class>* declared_exceptions =
           proxy_class->GetThrows()->Get(throws_index);
@@ -364,36 +270,37 @@
       (reinterpret_cast<uint8_t*>(sp) + callee_return_pc_offset));
   ArtMethod* outer_method = *caller_sp;
   ArtMethod* caller = outer_method;
-
-  if (outer_method != nullptr) {
-    const OatQuickMethodHeader* current_code = outer_method->GetOatQuickMethodHeader(caller_pc);
-    if (current_code->IsOptimized()) {
-      if (LIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()))) {
-        uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
-        CodeInfo code_info = current_code->GetOptimizedCodeInfo();
-        StackMapEncoding encoding = code_info.ExtractEncoding();
-        StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
-        DCHECK(stack_map.IsValid());
-        if (stack_map.HasInlineInfo(encoding)) {
-          InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-          caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
-        }
-      } else {
-        // We're instrumenting, just use the StackVisitor which knows how to
-        // handle instrumented frames.
-        NthCallerVisitor visitor(Thread::Current(), 1, true);
-        visitor.WalkStack();
-        caller = visitor.caller;
+  if (LIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()))) {
+    if (outer_method != nullptr) {
+      const OatQuickMethodHeader* current_code = outer_method->GetOatQuickMethodHeader(caller_pc);
+      DCHECK(current_code != nullptr);
+      DCHECK(current_code->IsOptimized());
+      uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
+      CodeInfo code_info = current_code->GetOptimizedCodeInfo();
+      CodeInfoEncoding encoding = code_info.ExtractEncoding();
+      StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
+      DCHECK(stack_map.IsValid());
+      if (stack_map.HasInlineInfo(encoding.stack_map_encoding)) {
+        InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
+        caller = GetResolvedMethod(outer_method,
+                                   inline_info,
+                                   encoding.inline_info_encoding,
+                                   inline_info.GetDepth(encoding.inline_info_encoding) - 1);
       }
     }
-  }
-
-  if (kIsDebugBuild && do_caller_check) {
-    // Note that do_caller_check is optional, as this method can be called by
-    // stubs, and tests without a proper call stack.
+    if (kIsDebugBuild && do_caller_check) {
+      // Note that do_caller_check is optional, as this method can be called by
+      // stubs, and tests without a proper call stack.
+      NthCallerVisitor visitor(Thread::Current(), 1, true);
+      visitor.WalkStack();
+      CHECK_EQ(caller, visitor.caller);
+    }
+  } else {
+    // We're instrumenting, just use the StackVisitor which knows how to
+    // handle instrumented frames.
     NthCallerVisitor visitor(Thread::Current(), 1, true);
     visitor.WalkStack();
-    CHECK_EQ(caller, visitor.caller);
+    caller = visitor.caller;
   }
 
   return caller;
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 0469ee6..a28376f 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -158,8 +158,6 @@
     uint32_t type_idx, ArtMethod* referrer, Thread* self, bool can_run_clinit, bool verify_access)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-extern void ThrowStackOverflowError(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
-
 inline mirror::String* ResolveStringFromCode(ArtMethod* referrer, uint32_t string_idx)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/entrypoints/quick/callee_save_frame.h b/runtime/entrypoints/quick/callee_save_frame.h
index 331de91..a81a7e7 100644
--- a/runtime/entrypoints/quick/callee_save_frame.h
+++ b/runtime/entrypoints/quick/callee_save_frame.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_ENTRYPOINTS_QUICK_CALLEE_SAVE_FRAME_H_
 
 #include "arch/instruction_set.h"
+#include "base/enums.h"
 #include "base/mutex.h"
 #include "runtime.h"
 #include "thread-inl.h"
@@ -86,7 +87,7 @@
 }
 
 // Note: this specialized statement is sanity-checked in the quick-trampoline gtest.
-static constexpr size_t GetConstExprPointerSize(InstructionSet isa) {
+static constexpr PointerSize GetConstExprPointerSize(InstructionSet isa) {
   // constexpr must be a return statement.
   return (isa == kArm || isa == kThumb2) ? kArmPointerSize :
          isa == kArm64 ? kArm64PointerSize :
@@ -94,14 +95,14 @@
          isa == kMips64 ? kMips64PointerSize :
          isa == kX86 ? kX86PointerSize :
          isa == kX86_64 ? kX86_64PointerSize :
-         isa == kNone ? (LOG(FATAL) << "kNone has no pointer size", 0) :
-         (LOG(FATAL) << "Unknown instruction set" << isa, 0);
+         isa == kNone ? (LOG(FATAL) << "kNone has no pointer size", PointerSize::k32) :
+         (LOG(FATAL) << "Unknown instruction set" << isa, PointerSize::k32);
 }
 
 // Note: this specialized statement is sanity-checked in the quick-trampoline gtest.
 static constexpr size_t GetCalleeSaveReturnPcOffset(InstructionSet isa,
                                                     Runtime::CalleeSaveType type) {
-  return GetCalleeSaveFrameSize(isa, type) - GetConstExprPointerSize(isa);
+  return GetCalleeSaveFrameSize(isa, type) - static_cast<size_t>(GetConstExprPointerSize(isa));
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 4e4f851..4686a51 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -17,6 +17,7 @@
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "callee_save_frame.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/class-inl.h"
@@ -32,8 +33,8 @@
     uint32_t type_idx, ArtMethod* method, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
-    mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx, sizeof(void*)); \
+  if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \
+    mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx, kRuntimePointerSize); \
     if (LIKELY(klass != nullptr && klass->IsInitialized() && !klass->IsFinalizable())) { \
       size_t byte_count = klass->GetObjectSize(); \
       byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
@@ -59,7 +60,7 @@
     mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+  if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \
     if (LIKELY(klass->IsInitialized())) { \
       size_t byte_count = klass->GetObjectSize(); \
       byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
@@ -85,7 +86,7 @@
     mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+  if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \
     size_t byte_count = klass->GetObjectSize(); \
     byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
     mirror::Object* obj; \
@@ -136,7 +137,7 @@
     uint32_t type_idx, int32_t component_count, ArtMethod* method, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (!instrumented_bool) { \
+  if (!(instrumented_bool)) { \
     return CheckAndAllocArrayFromCode(type_idx, component_count, method, self, false, allocator_type); \
   } else { \
     return CheckAndAllocArrayFromCodeInstrumented(type_idx, component_count, method, self, false, allocator_type); \
@@ -146,7 +147,7 @@
     uint32_t type_idx, int32_t component_count, ArtMethod* method, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (!instrumented_bool) { \
+  if (!(instrumented_bool)) { \
     return CheckAndAllocArrayFromCode(type_idx, component_count, method, self, true, allocator_type); \
   } else { \
     return CheckAndAllocArrayFromCodeInstrumented(type_idx, component_count, method, self, true, allocator_type); \
@@ -170,7 +171,7 @@
   return mirror::String::AllocFromCharArray<instrumented_bool>(self, char_count, handle_array, \
                                                                offset, allocator_type); \
 } \
-extern "C" mirror::String* artAllocStringFromStringFromCode##suffix##suffix2( \
+extern "C" mirror::String* artAllocStringFromStringFromCode##suffix##suffix2( /* NOLINT */ \
     mirror::String* string, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   StackHandleScope<1> hs(self); \
diff --git a/runtime/entrypoints/quick/quick_cast_entrypoints.cc b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
index 968ac53..8db69a3 100644
--- a/runtime/entrypoints/quick/quick_cast_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
@@ -20,7 +20,7 @@
 namespace art {
 
 // Assignable test for code, won't throw.  Null and equality tests already performed
-extern "C" uint32_t artIsAssignableFromCode(mirror::Class* klass, mirror::Class* ref_class)
+extern "C" size_t artIsAssignableFromCode(mirror::Class* klass, mirror::Class* ref_class)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   DCHECK(klass != nullptr);
   DCHECK(ref_class != nullptr);
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index fbf028d..86fb881 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -50,16 +50,16 @@
 extern "C" int art_quick_set64_static(uint32_t, int64_t);
 extern "C" int art_quick_set_obj_instance(uint32_t, void*, void*);
 extern "C" int art_quick_set_obj_static(uint32_t, void*);
-extern "C" int8_t art_quick_get_byte_instance(uint32_t, void*);
-extern "C" uint8_t art_quick_get_boolean_instance(uint32_t, void*);
-extern "C" int8_t art_quick_get_byte_static(uint32_t);
-extern "C" uint8_t art_quick_get_boolean_static(uint32_t);
-extern "C" int16_t art_quick_get_short_instance(uint32_t, void*);
-extern "C" uint16_t art_quick_get_char_instance(uint32_t, void*);
-extern "C" int16_t art_quick_get_short_static(uint32_t);
-extern "C" uint16_t art_quick_get_char_static(uint32_t);
-extern "C" int32_t art_quick_get32_instance(uint32_t, void*);
-extern "C" int32_t art_quick_get32_static(uint32_t);
+extern "C" ssize_t art_quick_get_byte_instance(uint32_t, void*);
+extern "C" size_t art_quick_get_boolean_instance(uint32_t, void*);
+extern "C" ssize_t art_quick_get_byte_static(uint32_t);
+extern "C" size_t art_quick_get_boolean_static(uint32_t);
+extern "C" ssize_t art_quick_get_short_instance(uint32_t, void*);
+extern "C" size_t art_quick_get_char_instance(uint32_t, void*);
+extern "C" ssize_t art_quick_get_short_static(uint32_t);
+extern "C" size_t art_quick_get_char_static(uint32_t);
+extern "C" ssize_t art_quick_get32_instance(uint32_t, void*);
+extern "C" ssize_t art_quick_get32_static(uint32_t);
 extern "C" int64_t art_quick_get64_instance(uint32_t, void*);
 extern "C" int64_t art_quick_get64_static(uint32_t);
 extern "C" void* art_quick_get_obj_instance(uint32_t, void*);
@@ -77,6 +77,10 @@
 extern "C" void art_quick_lock_object(art::mirror::Object*);
 extern "C" void art_quick_unlock_object(art::mirror::Object*);
 
+// Lock entrypoints that do not inline any behavior (e.g., thin-locks).
+extern "C" void art_quick_lock_object_no_inline(art::mirror::Object*);
+extern "C" void art_quick_unlock_object_no_inline(art::mirror::Object*);
+
 // Math entrypoints.
 extern "C" int64_t art_quick_d2l(double);
 extern "C" int64_t art_quick_f2l(float);
@@ -93,7 +97,7 @@
 extern "C" uint64_t art_quick_ushr_long(uint64_t, uint32_t);
 
 // Intrinsic entrypoints.
-extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
+extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 extern "C" void* art_quick_memcpy(void*, const void*, size_t);
 
@@ -116,6 +120,8 @@
 extern "C" void art_quick_throw_div_zero();
 extern "C" void art_quick_throw_no_such_method(int32_t method_idx);
 extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal(uintptr_t address);
 extern "C" void art_quick_throw_stack_overflow(void*);
+extern "C" void art_quick_throw_string_bounds(int32_t index, int32_t limit);
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_DEFAULT_EXTERNS_H_
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
new file mode 100644
index 0000000..2a206c2
--- /dev/null
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_DEFAULT_INIT_ENTRYPOINTS_H_
+#define ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_DEFAULT_INIT_ENTRYPOINTS_H_
+
+#include "base/logging.h"
+#include "entrypoints/jni/jni_entrypoints.h"
+#include "entrypoints/runtime_asm_entrypoints.h"
+#include "quick_alloc_entrypoints.h"
+#include "quick_default_externs.h"
+#include "quick_entrypoints.h"
+
+namespace art {
+
+void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
+  // JNI
+  jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
+
+  // Alloc
+  ResetQuickAllocEntryPoints(qpoints);
+
+  // DexCache
+  qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
+  qpoints->pInitializeTypeAndVerifyAccess = art_quick_initialize_type_and_verify_access;
+  qpoints->pInitializeType = art_quick_initialize_type;
+  qpoints->pResolveString = art_quick_resolve_string;
+
+  // Field
+  qpoints->pSet8Instance = art_quick_set8_instance;
+  qpoints->pSet8Static = art_quick_set8_static;
+  qpoints->pSet16Instance = art_quick_set16_instance;
+  qpoints->pSet16Static = art_quick_set16_static;
+  qpoints->pSet32Instance = art_quick_set32_instance;
+  qpoints->pSet32Static = art_quick_set32_static;
+  qpoints->pSet64Instance = art_quick_set64_instance;
+  qpoints->pSet64Static = art_quick_set64_static;
+  qpoints->pSetObjInstance = art_quick_set_obj_instance;
+  qpoints->pSetObjStatic = art_quick_set_obj_static;
+  qpoints->pGetByteInstance = art_quick_get_byte_instance;
+  qpoints->pGetBooleanInstance = art_quick_get_boolean_instance;
+  qpoints->pGetShortInstance = art_quick_get_short_instance;
+  qpoints->pGetCharInstance = art_quick_get_char_instance;
+  qpoints->pGet32Instance = art_quick_get32_instance;
+  qpoints->pGet64Instance = art_quick_get64_instance;
+  qpoints->pGetObjInstance = art_quick_get_obj_instance;
+  qpoints->pGetByteStatic = art_quick_get_byte_static;
+  qpoints->pGetBooleanStatic = art_quick_get_boolean_static;
+  qpoints->pGetShortStatic = art_quick_get_short_static;
+  qpoints->pGetCharStatic = art_quick_get_char_static;
+  qpoints->pGet32Static = art_quick_get32_static;
+  qpoints->pGet64Static = art_quick_get64_static;
+  qpoints->pGetObjStatic = art_quick_get_obj_static;
+
+  // Array
+  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
+  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
+  qpoints->pAputObject = art_quick_aput_obj;
+  qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
+
+  // JNI
+  qpoints->pJniMethodStart = JniMethodStart;
+  qpoints->pJniMethodFastStart = JniMethodFastStart;
+  qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
+  qpoints->pJniMethodEnd = JniMethodEnd;
+  qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
+  qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
+  qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
+  qpoints->pJniMethodFastEnd = JniMethodFastEnd;
+  qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
+
+  // Locks
+  if (UNLIKELY(VLOG_IS_ON(systrace_lock_logging))) {
+    qpoints->pLockObject = art_quick_lock_object_no_inline;
+    qpoints->pUnlockObject = art_quick_unlock_object_no_inline;
+  } else {
+    qpoints->pLockObject = art_quick_lock_object;
+    qpoints->pUnlockObject = art_quick_unlock_object;
+  }
+
+  // Invocation
+  qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
+  qpoints->pQuickResolutionTrampoline = art_quick_resolution_trampoline;
+  qpoints->pQuickToInterpreterBridge = art_quick_to_interpreter_bridge;
+  qpoints->pInvokeDirectTrampolineWithAccessCheck =
+      art_quick_invoke_direct_trampoline_with_access_check;
+  qpoints->pInvokeInterfaceTrampolineWithAccessCheck =
+      art_quick_invoke_interface_trampoline_with_access_check;
+  qpoints->pInvokeStaticTrampolineWithAccessCheck =
+      art_quick_invoke_static_trampoline_with_access_check;
+  qpoints->pInvokeSuperTrampolineWithAccessCheck =
+      art_quick_invoke_super_trampoline_with_access_check;
+  qpoints->pInvokeVirtualTrampolineWithAccessCheck =
+      art_quick_invoke_virtual_trampoline_with_access_check;
+
+  // Thread
+  qpoints->pTestSuspend = art_quick_test_suspend;
+
+  // Throws
+  qpoints->pDeliverException = art_quick_deliver_exception;
+  qpoints->pThrowArrayBounds = art_quick_throw_array_bounds;
+  qpoints->pThrowDivZero = art_quick_throw_div_zero;
+  qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
+  qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
+  qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
+  qpoints->pThrowStringBounds = art_quick_throw_string_bounds;
+
+  // Deoptimize
+  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_DEFAULT_INIT_ENTRYPOINTS_H_
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index dfd9fcd..f35c2fe 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -29,39 +29,51 @@
 
 namespace art {
 
-extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
-  ScopedQuickEntrypointChecks sqec(self);
-
+NO_RETURN static void artDeoptimizeImpl(Thread* self, bool single_frame)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
   if (VLOG_IS_ON(deopt)) {
-    LOG(INFO) << "Deopting:";
-    self->Dump(LOG(INFO));
+    if (single_frame) {
+      // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the
+      // specialized visitor that will show whether a method is Quick or Shadow.
+    } else {
+      LOG(INFO) << "Deopting:";
+      self->Dump(LOG(INFO));
+    }
   }
 
   self->AssertHasDeoptimizationContext();
-  self->SetException(Thread::GetDeoptimizationException());
-  self->QuickDeliverException();
+  QuickExceptionHandler exception_handler(self, true);
+  if (single_frame) {
+    exception_handler.DeoptimizeSingleFrame();
+  } else {
+    exception_handler.DeoptimizeStack();
+  }
+  uintptr_t return_pc = exception_handler.UpdateInstrumentationStack();
+  if (exception_handler.IsFullFragmentDone()) {
+    exception_handler.DoLongJump(true);
+  } else {
+    exception_handler.DeoptimizePartialFragmentFixup(return_pc);
+    // We cannot smash the caller-saves, as we need the ArtMethod in a parameter register that would
+    // be caller-saved. This has the downside that we cannot track incorrect register usage down the
+    // line.
+    exception_handler.DoLongJump(false);
+  }
 }
 
+extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+  artDeoptimizeImpl(self, false);
+}
+
+// This is called directly from compiled code by an HDepptimize.
 extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-
-  // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the
-  // specialized visitor that will show whether a method is Quick or Shadow.
-
   // Before deoptimizing to interpreter, we must push the deoptimization context.
   JValue return_value;
   return_value.SetJ(0);  // we never deoptimize from compiled code with an invoke result.
-  self->PushDeoptimizationContext(return_value, false, self->GetException());
-
-  QuickExceptionHandler exception_handler(self, true);
-  exception_handler.DeoptimizeSingleFrame();
-  exception_handler.UpdateInstrumentationStack();
-  exception_handler.DeoptimizeSingleFrameArchDependentFixup();
-  // We cannot smash the caller-saves, as we need the ArtMethod in a parameter register that would
-  // be caller-saved. This has the downside that we cannot track incorrect register usage down the
-  // line.
-  exception_handler.DoLongJump(false);
+  self->PushDeoptimizationContext(return_value, false, /* from_code */ true, self->GetException());
+  artDeoptimizeImpl(self, true);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index b12b118..c045e84 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -31,7 +31,7 @@
   // A class may be accessing another class' fields when it doesn't have access, as access has been
   // given by inheritance.
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
   return ResolveVerifyAndClinit(type_idx, caller, self, true, false);
 }
 
@@ -39,7 +39,7 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   // Called when method->dex_cache_resolved_types_[] misses.
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
   return ResolveVerifyAndClinit(type_idx, caller, self, false, false);
 }
 
@@ -48,14 +48,14 @@
   // Called when caller isn't guaranteed to have access to a type and the dex cache may be
   // unpopulated.
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
   return ResolveVerifyAndClinit(type_idx, caller, self, false, true);
 }
 
 extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
   return ResolveStringFromCode(caller, string_idx);
 }
 
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 3d3f7a1..08e0d6e 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -31,12 +31,12 @@
 namespace mirror {
 class Array;
 class Class;
+template<class MirrorType> class CompressedReference;
 class Object;
-template<class MirrorType>
-class CompressedReference;
 }  // namespace mirror
 
 class ArtMethod;
+template<class MirrorType> class GcRoot;
 class Thread;
 
 // Pointers to functions that are called by quick compiler generated code via thread-local storage.
@@ -52,10 +52,13 @@
 // JNI entrypoints.
 // TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI.
 extern uint32_t JniMethodStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+extern uint32_t JniMethodFastStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 extern uint32_t JniMethodStartSynchronized(jobject to_lock, Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+extern void JniMethodFastEnd(uint32_t saved_local_ref_cookie, Thread* self)
+    NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject locked,
                                      Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
@@ -72,16 +75,31 @@
                            Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 
+
 // Read barrier entrypoints.
-// Compilers for ARM, ARM64, MIPS, MIPS64 can insert a call to this function directly.
-// For x86 and x86_64, compilers need a wrapper assembly function, to handle mismatch in ABI.
-// This is the read barrier slow path for instance and static fields and reference-type arrays.
-// TODO: Currently the read barrier does not have a fast path for compilers to directly generate.
-// Ideally the slow path should only take one parameter "ref".
-extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref, mirror::Object* obj,
+//
+// Compilers for ARM, ARM64, MIPS, MIPS64 can insert a call to these
+// functions directly.  For x86 and x86-64, compilers need a wrapper
+// assembly function, to handle mismatch in ABI.
+
+// Mark the heap reference `obj`. This entry point is used by read
+// barrier fast path implementations generated by the compiler to mark
+// an object that is referenced by a field of a gray object.
+extern "C" mirror::Object* artReadBarrierMark(mirror::Object* obj)
+    SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR;
+
+// Read barrier entrypoint for heap references.
+// This is the read barrier slow path for instance and static fields
+// and reference type arrays.
+extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+                                              mirror::Object* obj,
                                               uint32_t offset)
     SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR;
 
+// Read barrier entrypoint for GC roots.
+extern "C" mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root)
+    SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR;
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_H_
diff --git a/runtime/entrypoints/quick/quick_entrypoints_enum.h b/runtime/entrypoints/quick/quick_entrypoints_enum.h
index 5a95491..8de1137 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_enum.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_enum.h
@@ -36,7 +36,7 @@
 std::ostream& operator<<(std::ostream& os, const QuickEntrypointEnum& kind);
 
 // Translate a QuickEntrypointEnum value to the corresponding ThreadOffset.
-template <size_t pointer_size>
+template <PointerSize pointer_size>
 static ThreadOffset<pointer_size> GetThreadOffset(QuickEntrypointEnum trampoline) {
   switch (trampoline)
   {  // NOLINT(whitespace/braces)
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 73d8ae7..74c928a 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -33,7 +33,7 @@
   V(AllocStringFromChars, void*, int32_t, int32_t, void*) \
   V(AllocStringFromString, void*, void*) \
 \
-  V(InstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*) \
+  V(InstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*) \
   V(CheckCast, void, const mirror::Class*, const mirror::Class*) \
 \
   V(InitializeStaticStorage, void*, uint32_t) \
@@ -51,16 +51,16 @@
   V(Set64Static, int, uint32_t, int64_t) \
   V(SetObjInstance, int, uint32_t, void*, void*) \
   V(SetObjStatic, int, uint32_t, void*) \
-  V(GetByteInstance, int8_t, uint32_t, void*) \
-  V(GetBooleanInstance, uint8_t, uint32_t, void*) \
-  V(GetByteStatic, int8_t, uint32_t) \
-  V(GetBooleanStatic, uint8_t, uint32_t) \
-  V(GetShortInstance, int16_t, uint32_t, void*) \
-  V(GetCharInstance, uint16_t, uint32_t, void*) \
-  V(GetShortStatic, int16_t, uint32_t) \
-  V(GetCharStatic, uint16_t, uint32_t) \
-  V(Get32Instance, int32_t, uint32_t, void*) \
-  V(Get32Static, int32_t, uint32_t) \
+  V(GetByteInstance, ssize_t, uint32_t, void*) \
+  V(GetBooleanInstance, size_t, uint32_t, void*) \
+  V(GetByteStatic, ssize_t, uint32_t) \
+  V(GetBooleanStatic, size_t, uint32_t) \
+  V(GetShortInstance, ssize_t, uint32_t, void*) \
+  V(GetCharInstance, size_t, uint32_t, void*) \
+  V(GetShortStatic, ssize_t, uint32_t) \
+  V(GetCharStatic, size_t, uint32_t) \
+  V(Get32Instance, ssize_t, uint32_t, void*) \
+  V(Get32Static, ssize_t, uint32_t) \
   V(Get64Instance, int64_t, uint32_t, void*) \
   V(Get64Static, int64_t, uint32_t) \
   V(GetObjInstance, void*, uint32_t, void*) \
@@ -72,8 +72,10 @@
   V(HandleFillArrayData, void, void*, void*) \
 \
   V(JniMethodStart, uint32_t, Thread*) \
+  V(JniMethodFastStart, uint32_t, Thread*) \
   V(JniMethodStartSynchronized, uint32_t, jobject, Thread*) \
   V(JniMethodEnd, void, uint32_t, Thread*) \
+  V(JniMethodFastEnd, void, uint32_t, Thread*) \
   V(JniMethodEndSynchronized, void, uint32_t, jobject, Thread*) \
   V(JniMethodEndWithReference, mirror::Object*, jobject, uint32_t, Thread*) \
   V(JniMethodEndWithReferenceSynchronized, mirror::Object*, jobject, uint32_t, jobject, Thread*) \
@@ -86,6 +88,23 @@
   V(CmpgFloat, int32_t, float, float) \
   V(CmplDouble, int32_t, double, double) \
   V(CmplFloat, int32_t, float, float) \
+  V(Cos, double, double) \
+  V(Sin, double, double) \
+  V(Acos, double, double) \
+  V(Asin, double, double) \
+  V(Atan, double, double) \
+  V(Atan2, double, double, double) \
+  V(Cbrt, double, double) \
+  V(Cosh, double, double) \
+  V(Exp, double, double) \
+  V(Expm1, double, double) \
+  V(Hypot, double, double, double) \
+  V(Log, double, double) \
+  V(Log10, double, double) \
+  V(NextAfter, double, double, double) \
+  V(Sinh, double, double) \
+  V(Tan, double, double) \
+  V(Tanh, double, double) \
   V(Fmod, double, double, double) \
   V(L2d, double, int64_t) \
   V(Fmodf, float, float, float) \
@@ -102,7 +121,7 @@
   V(ShrLong, uint64_t, uint64_t, uint32_t) \
   V(UshrLong, uint64_t, uint64_t, uint32_t) \
 \
-  V(IndexOf, int32_t, void*, uint32_t, uint32_t, uint32_t) \
+  V(IndexOf, int32_t, void*, uint32_t, uint32_t) \
   V(StringCompareTo, int32_t, void*, void*) \
   V(Memcpy, void*, void*, const void*, size_t) \
 \
@@ -123,6 +142,7 @@
   V(ThrowNoSuchMethod, void, int32_t) \
   V(ThrowNullPointer, void, void) \
   V(ThrowStackOverflow, void, void*) \
+  V(ThrowStringBounds, void, int32_t, int32_t) \
   V(Deoptimize, void, void) \
 \
   V(A64Load, int64_t, volatile const int64_t *) \
@@ -146,7 +166,39 @@
   V(NewStringFromStringBuilder, void) \
 \
   V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*) \
-  V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t)
+  V(ReadBarrierMarkReg00, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg01, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg02, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg03, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg04, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg05, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg06, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg07, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg08, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg09, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg10, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg11, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg12, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg13, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg14, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg15, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg16, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg17, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg18, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg19, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg20, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg21, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg22, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg23, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg24, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg25, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg26, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg27, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg28, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg29, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t) \
+  V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*) \
+\
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_
 #undef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_   // #define is only for lint.
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 7361d34..1a12bd4 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -14,19 +14,48 @@
  * limitations under the License.
  */
 
+#include <stdint.h>
+
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "callee_save_frame.h"
 #include "dex_file-inl.h"
 #include "entrypoints/entrypoint_utils-inl.h"
+#include "gc_root-inl.h"
 #include "mirror/class-inl.h"
-
-#include <stdint.h>
+#include "mirror/object_reference.h"
 
 namespace art {
 
-extern "C" int8_t artGetByteStaticFromCode(uint32_t field_idx, ArtMethod* referrer,
-                                           Thread* self)
+inline constexpr bool FindFieldTypeIsRead(FindFieldType type) {
+  return type == InstanceObjectRead ||
+         type == InstancePrimitiveRead ||
+         type == StaticObjectRead ||
+         type == StaticPrimitiveRead;
+}
+
+// Helper function to do a null check after trying to resolve the field. Not for statics since obj
+// does not exist there. There is a suspend check, object is a double pointer to update the value
+// in the caller in case it moves.
+template<FindFieldType type, bool kAccessCheck>
+ALWAYS_INLINE static inline ArtField* FindInstanceField(uint32_t field_idx,
+                                                        ArtMethod* referrer,
+                                                        Thread* self,
+                                                        size_t size,
+                                                        mirror::Object** obj)
+    REQUIRES(!Roles::uninterruptible_)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  StackHandleScope<1> hs(self);
+  HandleWrapper<mirror::Object> h(hs.NewHandleWrapper(obj));
+  ArtField* field = FindFieldFromCode<type, kAccessCheck>(field_idx, referrer, self, size);
+  if (LIKELY(field != nullptr) && UNLIKELY(h.Get() == nullptr)) {
+    ThrowNullPointerExceptionForFieldAccess(field, /*is_read*/FindFieldTypeIsRead(type));
+    return nullptr;
+  }
+  return field;
+}
+
+extern "C" ssize_t artGetByteStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int8_t));
@@ -40,8 +69,7 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint8_t artGetBooleanStaticFromCode(uint32_t field_idx, ArtMethod* referrer,
-                                               Thread* self)
+extern "C" size_t artGetBooleanStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int8_t));
@@ -55,8 +83,7 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" int16_t artGetShortStaticFromCode(uint32_t field_idx, ArtMethod* referrer,
-                                             Thread* self)
+extern "C" ssize_t artGetShortStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int16_t));
@@ -70,9 +97,7 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint16_t artGetCharStaticFromCode(uint32_t field_idx,
-                                             ArtMethod* referrer,
-                                             Thread* self)
+extern "C" size_t artGetCharStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int16_t));
@@ -86,9 +111,7 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint32_t artGet32StaticFromCode(uint32_t field_idx,
-                                           ArtMethod* referrer,
-                                           Thread* self)
+extern "C" size_t artGet32StaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int32_t));
@@ -123,12 +146,16 @@
                                                    Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectRead,
+  ArtField* field = FindFieldFast(field_idx,
+                                  referrer,
+                                  StaticObjectRead,
                                   sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr)) {
     return field->GetObj(field->GetDeclaringClass());
   }
-  field = FindFieldFromCode<StaticObjectRead, true>(field_idx, referrer, self,
+  field = FindFieldFromCode<StaticObjectRead, true>(field_idx,
+                                                    referrer,
+                                                    self,
                                                     sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr)) {
     return field->GetObj(field->GetDeclaringClass());
@@ -136,149 +163,159 @@
   return nullptr;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" int8_t artGetByteInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                             ArtMethod* referrer, Thread* self)
+extern "C" ssize_t artGetByteInstanceFromCode(uint32_t field_idx,
+                                              mirror::Object* obj,
+                                              ArtMethod* referrer,
+                                              Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int8_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetByte(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int8_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int8_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->GetByte(obj);
-    }
+    return field->GetByte(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint8_t artGetBooleanInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                                 ArtMethod* referrer, Thread* self)
+extern "C" size_t artGetBooleanInstanceFromCode(uint32_t field_idx,
+                                                mirror::Object* obj,
+                                                ArtMethod* referrer,
+                                                Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int8_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetBoolean(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int8_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int8_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->GetBoolean(obj);
-    }
+    return field->GetBoolean(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
-extern "C" int16_t artGetShortInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                               ArtMethod* referrer, Thread* self)
+extern "C" ssize_t artGetShortInstanceFromCode(uint32_t field_idx,
+                                               mirror::Object* obj,
+                                               ArtMethod* referrer,
+                                               Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int16_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetShort(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int16_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int16_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->GetShort(obj);
-    }
+    return field->GetShort(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint16_t artGetCharInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                               ArtMethod* referrer, Thread* self)
+extern "C" size_t artGetCharInstanceFromCode(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             ArtMethod* referrer,
+                                             Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int16_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetChar(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int16_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int16_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->GetChar(obj);
-    }
+    return field->GetChar(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint32_t artGet32InstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                             ArtMethod* referrer, Thread* self)
+extern "C" size_t artGet32InstanceFromCode(uint32_t field_idx,
+                                           mirror::Object* obj,
+                                           ArtMethod* referrer,
+                                           Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int32_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->Get32(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int32_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int32_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->Get32(obj);
-    }
+    return field->Get32(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint64_t artGet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                             ArtMethod* referrer, Thread* self)
+extern "C" uint64_t artGet64InstanceFromCode(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             ArtMethod* referrer,
+                                             Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int64_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->Get64(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int64_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int64_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->Get64(obj);
-    }
+    return field->Get64(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" mirror::Object* artGetObjInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
+extern "C" mirror::Object* artGetObjInstanceFromCode(uint32_t field_idx,
+                                                     mirror::Object* obj,
                                                      ArtMethod* referrer,
                                                      Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectRead,
+  ArtField* field = FindFieldFast(field_idx,
+                                  referrer,
+                                  InstanceObjectRead,
                                   sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetObj(obj);
   }
-  field = FindFieldFromCode<InstanceObjectRead, true>(
-      field_idx, referrer, self, sizeof(mirror::HeapReference<mirror::Object>));
+  field = FindInstanceField<InstanceObjectRead, true>(field_idx,
+                                                      referrer,
+                                                      self,
+                                                      sizeof(mirror::HeapReference<mirror::Object>),
+                                                      &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->GetObj(obj);
-    }
+    return field->GetObj(obj);
   }
   return nullptr;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" int artSet8StaticFromCode(uint32_t field_idx, uint32_t new_value,
-                                     ArtMethod* referrer, Thread* self)
+extern "C" int artSet8StaticFromCode(uint32_t field_idx,
+                                     uint32_t new_value,
+                                     ArtMethod* referrer,
+                                     Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int8_t));
@@ -308,8 +345,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet16StaticFromCode(uint32_t field_idx, uint16_t new_value,
-                                      ArtMethod* referrer, Thread* self)
+extern "C" int artSet16StaticFromCode(uint32_t field_idx,
+                                      uint16_t new_value,
+                                      ArtMethod* referrer,
+                                      Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int16_t));
@@ -339,8 +378,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet32StaticFromCode(uint32_t field_idx, uint32_t new_value,
-                                      ArtMethod* referrer, Thread* self)
+extern "C" int artSet32StaticFromCode(uint32_t field_idx,
+                                      uint32_t new_value,
+                                      ArtMethod* referrer,
+                                      Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int32_t));
@@ -358,8 +399,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet64StaticFromCode(uint32_t field_idx, ArtMethod* referrer,
-                                      uint64_t new_value, Thread* self)
+extern "C" int artSet64StaticFromCode(uint32_t field_idx,
+                                      ArtMethod* referrer,
+                                      uint64_t new_value,
+                                      Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int64_t));
@@ -377,11 +420,15 @@
   return -1;  // failure
 }
 
-extern "C" int artSetObjStaticFromCode(uint32_t field_idx, mirror::Object* new_value,
-                                       ArtMethod* referrer, Thread* self)
+extern "C" int artSetObjStaticFromCode(uint32_t field_idx,
+                                       mirror::Object* new_value,
+                                       ArtMethod* referrer,
+                                       Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectWrite,
+  ArtField* field = FindFieldFast(field_idx,
+                                  referrer,
+                                  StaticObjectWrite,
                                   sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr)) {
     if (LIKELY(!field->IsPrimitiveType())) {
@@ -390,8 +437,15 @@
       return 0;  // success
     }
   }
-  field = FindFieldFromCode<StaticObjectWrite, true>(field_idx, referrer, self,
-                                                     sizeof(mirror::HeapReference<mirror::Object>));
+  {
+    StackHandleScope<1> hs(self);
+    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&new_value));
+    field = FindFieldFromCode<StaticObjectWrite, true>(
+        field_idx,
+        referrer,
+        self,
+        sizeof(mirror::HeapReference<mirror::Object>));
+  }
   if (LIKELY(field != nullptr)) {
     // Compiled code can't use transactional mode.
     field->SetObj<false>(field->GetDeclaringClass(), new_value);
@@ -400,8 +454,11 @@
   return -1;  // failure
 }
 
-extern "C" int artSet8InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint8_t new_value,
-                                       ArtMethod* referrer, Thread* self)
+extern "C" int artSet8InstanceFromCode(uint32_t field_idx,
+                                       mirror::Object* obj,
+                                       uint8_t new_value,
+                                       ArtMethod* referrer,
+                                       Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int8_t));
@@ -416,31 +473,29 @@
     }
     return 0;  // success
   }
-  {
-    StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
-    field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
-                                                            sizeof(int8_t));
-  }
+  field = FindInstanceField<InstancePrimitiveWrite, true>(field_idx,
+                                                          referrer,
+                                                          self,
+                                                          sizeof(int8_t),
+                                                          &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, false);
+    Primitive::Type type = field->GetTypeAsPrimitiveType();
+    // Compiled code can't use transactional mode.
+    if (type == Primitive::kPrimBoolean) {
+      field->SetBoolean<false>(obj, new_value);
     } else {
-      Primitive::Type type = field->GetTypeAsPrimitiveType();
-      // Compiled code can't use transactional mode.
-      if (type == Primitive::kPrimBoolean) {
-        field->SetBoolean<false>(obj, new_value);
-      } else {
-        field->SetByte<false>(obj, new_value);
-      }
-      return 0;  // success
+      field->SetByte<false>(obj, new_value);
     }
+    return 0;  // success
   }
   return -1;  // failure
 }
 
-extern "C" int artSet16InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint16_t new_value,
-                                        ArtMethod* referrer, Thread* self)
+extern "C" int artSet16InstanceFromCode(uint32_t field_idx,
+                                        mirror::Object* obj,
+                                        uint16_t new_value,
+                                        ArtMethod* referrer,
+                                        Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int16_t));
@@ -455,32 +510,30 @@
     }
     return 0;  // success
   }
-  {
-    StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
-    field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
-                                                            sizeof(int16_t));
-  }
+  field = FindInstanceField<InstancePrimitiveWrite, true>(field_idx,
+                                                          referrer,
+                                                          self,
+                                                          sizeof(int16_t),
+                                                          &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, false);
+    Primitive::Type type = field->GetTypeAsPrimitiveType();
+    // Compiled code can't use transactional mode.
+    if (type == Primitive::kPrimChar) {
+      field->SetChar<false>(obj, new_value);
     } else {
-      Primitive::Type type = field->GetTypeAsPrimitiveType();
-      // Compiled code can't use transactional mode.
-      if (type == Primitive::kPrimChar) {
-        field->SetChar<false>(obj, new_value);
-      } else {
-        DCHECK_EQ(Primitive::kPrimShort, type);
-        field->SetShort<false>(obj, new_value);
-      }
-      return 0;  // success
+      DCHECK_EQ(Primitive::kPrimShort, type);
+      field->SetShort<false>(obj, new_value);
     }
+    return 0;  // success
   }
   return -1;  // failure
 }
 
-extern "C" int artSet32InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint32_t new_value,
-                                        ArtMethod* referrer, Thread* self)
+extern "C" int artSet32InstanceFromCode(uint32_t field_idx,
+                                        mirror::Object* obj,
+                                        uint32_t new_value,
+                                        ArtMethod* referrer,
+                                        Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int32_t));
@@ -489,26 +542,24 @@
     field->Set32<false>(obj, new_value);
     return 0;  // success
   }
-  {
-    StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
-    field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
-                                                            sizeof(int32_t));
-  }
+  field = FindInstanceField<InstancePrimitiveWrite, true>(field_idx,
+                                                          referrer,
+                                                          self,
+                                                          sizeof(int32_t),
+                                                          &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, false);
-    } else {
-      // Compiled code can't use transactional mode.
-      field->Set32<false>(obj, new_value);
-      return 0;  // success
-    }
+    // Compiled code can't use transactional mode.
+    field->Set32<false>(obj, new_value);
+    return 0;  // success
   }
   return -1;  // failure
 }
 
-extern "C" int artSet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint64_t new_value,
-                                        ArtMethod* referrer, Thread* self)
+extern "C" int artSet64InstanceFromCode(uint32_t field_idx,
+                                        mirror::Object* obj,
+                                        uint64_t new_value,
+                                        ArtMethod* referrer,
+                                        Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int64_t));
@@ -517,34 +568,45 @@
     field->Set64<false>(obj, new_value);
     return 0;  // success
   }
-  field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
-                                                          sizeof(int64_t));
+  field = FindInstanceField<InstancePrimitiveWrite, true>(field_idx,
+                                                          referrer,
+                                                          self,
+                                                          sizeof(int64_t),
+                                                          &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, false);
-    } else {
-      // Compiled code can't use transactional mode.
-      field->Set64<false>(obj, new_value);
-      return 0;  // success
-    }
+    // Compiled code can't use transactional mode.
+    field->Set64<false>(obj, new_value);
+    return 0;
   }
   return -1;  // failure
 }
 
-extern "C" int artSetObjInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
+extern "C" int artSetObjInstanceFromCode(uint32_t field_idx,
+                                         mirror::Object* obj,
                                          mirror::Object* new_value,
-                                         ArtMethod* referrer, Thread* self)
+                                         ArtMethod* referrer,
+                                         Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
+  ArtField* field = FindFieldFast(field_idx,
+                                  referrer,
+                                  InstanceObjectWrite,
                                   sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     // Compiled code can't use transactional mode.
     field->SetObj<false>(obj, new_value);
     return 0;  // success
   }
-  field = FindFieldFromCode<InstanceObjectWrite, true>(field_idx, referrer, self,
-                                                       sizeof(mirror::HeapReference<mirror::Object>));
+  {
+    StackHandleScope<2> hs(self);
+    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
+    HandleWrapper<mirror::Object> h_new_value(hs.NewHandleWrapper(&new_value));
+    field = FindFieldFromCode<InstanceObjectWrite, true>(
+        field_idx,
+        referrer,
+        self,
+        sizeof(mirror::HeapReference<mirror::Object>));
+  }
   if (LIKELY(field != nullptr)) {
     if (UNLIKELY(obj == nullptr)) {
       ThrowNullPointerExceptionForFieldAccess(field, false);
@@ -557,16 +619,30 @@
   return -1;  // failure
 }
 
-// TODO: Currently the read barrier does not have a fast path. Ideally the slow path should only
-// take one parameter "ref", which is given by the fast path.
+extern "C" mirror::Object* artReadBarrierMark(mirror::Object* obj) {
+  DCHECK(kEmitCompilerReadBarrier);
+  return ReadBarrier::Mark(obj);
+}
+
 extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref ATTRIBUTE_UNUSED,
-                                              mirror::Object* obj, uint32_t offset) {
-  DCHECK(kUseReadBarrier);
+                                              mirror::Object* obj,
+                                              uint32_t offset) {
+  DCHECK(kEmitCompilerReadBarrier);
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(obj) + offset;
   mirror::HeapReference<mirror::Object>* ref_addr =
-      reinterpret_cast<mirror::HeapReference<mirror::Object>*>(raw_addr);
-  return ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, true>(obj, MemberOffset(offset),
-                                                                      ref_addr);
+     reinterpret_cast<mirror::HeapReference<mirror::Object>*>(raw_addr);
+  constexpr ReadBarrierOption kReadBarrierOption =
+      kUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
+  mirror::Object* result =
+      ReadBarrier::Barrier<mirror::Object, kReadBarrierOption>(obj,
+                                                               MemberOffset(offset),
+                                                               ref_addr);
+  return result;
+}
+
+extern "C" mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root) {
+  DCHECK(kEmitCompilerReadBarrier);
+  return root->Read();
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index 8e660a2..82d5467 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -15,6 +15,7 @@
  */
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "callee_save_frame.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "instrumentation.h"
@@ -37,7 +38,7 @@
   if (instrumentation->IsDeoptimized(method)) {
     result = GetQuickToInterpreterBridge();
   } else {
-    result = instrumentation->GetQuickCodeFor(method, sizeof(void*));
+    result = instrumentation->GetQuickCodeFor(method, kRuntimePointerSize);
     DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(result));
   }
   bool interpreter_entry = (result == GetQuickToInterpreterBridge());
@@ -55,7 +56,7 @@
   CHECK(!self->IsExceptionPending()) << "Enter instrumentation exit stub with pending exception "
                                      << self->GetException()->Dump();
   // Compute address of return PC and sanity check that it currently holds 0.
-  size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsOnly);
+  size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveRefsOnly);
   uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) +
                                                       return_pc_offset);
   CHECK_EQ(*return_pc, 0U);
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index 58f256a..c06824c 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -29,6 +29,21 @@
   handle_on_stack->Assign(to_ref);
 }
 
+// Called on entry to fast JNI, push a new local reference table only.
+extern uint32_t JniMethodFastStart(Thread* self) {
+  JNIEnvExt* env = self->GetJniEnv();
+  DCHECK(env != nullptr);
+  uint32_t saved_local_ref_cookie = env->local_ref_cookie;
+  env->local_ref_cookie = env->locals.GetSegmentState();
+
+  if (kIsDebugBuild) {
+    ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
+    CHECK(native_method->IsAnnotatedWithFastNative()) << PrettyMethod(native_method);
+  }
+
+  return saved_local_ref_cookie;
+}
+
 // Called on entry to JNI, transition out of Runnable and release share of mutator_lock_.
 extern uint32_t JniMethodStart(Thread* self) {
   JNIEnvExt* env = self->GetJniEnv();
@@ -73,11 +88,32 @@
   self->PopHandleScope();
 }
 
+// TODO: These should probably be templatized or macro-ized.
+// Otherwise there's just too much repetitive boilerplate.
+
 extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self) {
   GoToRunnable(self);
   PopLocalReferences(saved_local_ref_cookie, self);
 }
 
+extern void JniMethodFastEnd(uint32_t saved_local_ref_cookie, Thread* self) {
+  // inlined fast version of GoToRunnable(self);
+
+  if (kIsDebugBuild) {
+    ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
+    CHECK(native_method->IsAnnotatedWithFastNative()) << PrettyMethod(native_method);
+  }
+
+  if (UNLIKELY(self->TestAllFlags())) {
+    // In fast JNI mode we never transitioned out of runnable. Perform a suspend check if there
+    // is a flag raised.
+    DCHECK(Locks::mutator_lock_->IsSharedHeld(self));
+    self->CheckSuspend();
+  }
+
+  PopLocalReferences(saved_local_ref_cookie, self);
+}
+
 extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject locked,
                                      Thread* self) {
   GoToRunnable(self);
@@ -85,6 +121,10 @@
   PopLocalReferences(saved_local_ref_cookie, self);
 }
 
+// TODO: JniMethodFastEndWithReference
+// (Probably don't need to have a synchronized variant since
+// it already has to do atomic operations)
+
 // Common result handling for EndWithReference.
 static mirror::Object* JniMethodEndWithReferenceHandleResult(jobject result,
                                                              uint32_t saved_local_ref_cookie,
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index 5a82b3a..ea9f7b0 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -16,7 +16,6 @@
 
 #include "callee_save_frame.h"
 #include "common_throws.h"
-#include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/object-inl.h"
 #include "thread.h"
 #include "well_known_classes.h"
@@ -30,7 +29,7 @@
   self->QuickDeliverException();
 }
 
-// Called by generated call to throw an exception.
+// Called by generated code to throw an exception.
 extern "C" NO_RETURN void artDeliverExceptionFromCode(mirror::Throwable* exception, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   /*
@@ -49,17 +48,27 @@
   self->QuickDeliverException();
 }
 
-// Called by generated call to throw a NPE exception.
+// Called by generated code to throw a NPE exception.
 extern "C" NO_RETURN void artThrowNullPointerExceptionFromCode(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
+  // We come from an explicit check in the generated code. This path is triggered
+  // only if the object is indeed null.
+  ThrowNullPointerExceptionFromDexPC(/* check_address */ false, 0U);
+  self->QuickDeliverException();
+}
+
+// Installed by a signal handler to throw a NPE exception.
+extern "C" NO_RETURN void artThrowNullPointerExceptionFromSignal(uintptr_t addr, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   self->NoteSignalBeingHandled();
-  ThrowNullPointerExceptionFromDexPC();
+  ThrowNullPointerExceptionFromDexPC(/* check_address */ true, addr);
   self->NoteSignalHandlerDone();
   self->QuickDeliverException();
 }
 
-// Called by generated call to throw an arithmetic divide by zero exception.
+// Called by generated code to throw an arithmetic divide by zero exception.
 extern "C" NO_RETURN void artThrowDivZeroFromCode(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
@@ -67,7 +76,7 @@
   self->QuickDeliverException();
 }
 
-// Called by generated call to throw an array index out of bounds exception.
+// Called by generated code to throw an array index out of bounds exception.
 extern "C" NO_RETURN void artThrowArrayBoundsFromCode(int index, int length, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
@@ -75,6 +84,14 @@
   self->QuickDeliverException();
 }
 
+// Called by generated code to throw a string index out of bounds exception.
+extern "C" NO_RETURN void artThrowStringBoundsFromCode(int index, int length, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+  ThrowStringIndexOutOfBoundsException(index, length);
+  self->QuickDeliverException();
+}
+
 extern "C" NO_RETURN void artThrowStackOverflowFromCode(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 5eda6d6..c67379a 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -15,6 +15,7 @@
  */
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "callee_save_frame.h"
 #include "common_throws.h"
 #include "dex_file-inl.h"
@@ -23,6 +24,7 @@
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/card_table-inl.h"
 #include "interpreter/interpreter.h"
+#include "linear_alloc.h"
 #include "method_reference.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
@@ -44,7 +46,7 @@
   static constexpr size_t kBytesStackArgLocation = 4;
   // Frame size in bytes of a callee-save frame for RefsAndArgs.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize =
-      GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kRefsAndArgs);
+      GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kSaveRefsAndArgs);
 #if defined(__arm__)
   // The callee save frame is pointed to by SP.
   // | argN       |  |
@@ -73,11 +75,11 @@
   static constexpr size_t kNumQuickFprArgs = kArm32QuickCodeUseSoftFloat ? 0 : 16;
   static constexpr bool kGprFprLockstep = false;
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset =
-      arm::ArmCalleeSaveFpr1Offset(Runtime::kRefsAndArgs);  // Offset of first FPR arg.
+      arm::ArmCalleeSaveFpr1Offset(Runtime::kSaveRefsAndArgs);  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset =
-      arm::ArmCalleeSaveGpr1Offset(Runtime::kRefsAndArgs);  // Offset of first GPR arg.
+      arm::ArmCalleeSaveGpr1Offset(Runtime::kSaveRefsAndArgs);  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset =
-      arm::ArmCalleeSaveLrOffset(Runtime::kRefsAndArgs);  // Offset of return address.
+      arm::ArmCalleeSaveLrOffset(Runtime::kSaveRefsAndArgs);  // Offset of return address.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -111,11 +113,11 @@
   static constexpr size_t kNumQuickFprArgs = 8;  // 8 arguments passed in FPRs.
   static constexpr bool kGprFprLockstep = false;
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset =
-      arm64::Arm64CalleeSaveFpr1Offset(Runtime::kRefsAndArgs);  // Offset of first FPR arg.
+      arm64::Arm64CalleeSaveFpr1Offset(Runtime::kSaveRefsAndArgs);  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset =
-      arm64::Arm64CalleeSaveGpr1Offset(Runtime::kRefsAndArgs);  // Offset of first GPR arg.
+      arm64::Arm64CalleeSaveGpr1Offset(Runtime::kSaveRefsAndArgs);  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset =
-      arm64::Arm64CalleeSaveLrOffset(Runtime::kRefsAndArgs);  // Offset of return address.
+      arm64::Arm64CalleeSaveLrOffset(Runtime::kSaveRefsAndArgs);  // Offset of return address.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -305,7 +307,7 @@
 
   static ArtMethod* GetCallingMethod(ArtMethod** sp) SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK((*sp)->IsCalleeSaveMethod());
-    return GetCalleeSaveMethodCaller(sp, Runtime::kRefsAndArgs);
+    return GetCalleeSaveMethodCaller(sp, Runtime::kSaveRefsAndArgs);
   }
 
   static ArtMethod* GetOuterMethod(ArtMethod** sp) SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -317,7 +319,7 @@
 
   static uint32_t GetCallingDexPc(ArtMethod** sp) SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK((*sp)->IsCalleeSaveMethod());
-    const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kRefsAndArgs);
+    const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kSaveRefsAndArgs);
     ArtMethod** caller_sp = reinterpret_cast<ArtMethod**>(
         reinterpret_cast<uintptr_t>(sp) + callee_frame_size);
     uintptr_t outer_pc = QuickArgumentVisitor::GetCallingPc(sp);
@@ -326,14 +328,15 @@
 
     if (current_code->IsOptimized()) {
       CodeInfo code_info = current_code->GetOptimizedCodeInfo();
-      StackMapEncoding encoding = code_info.ExtractEncoding();
+      CodeInfoEncoding encoding = code_info.ExtractEncoding();
       StackMap stack_map = code_info.GetStackMapForNativePcOffset(outer_pc_offset, encoding);
       DCHECK(stack_map.IsValid());
-      if (stack_map.HasInlineInfo(encoding)) {
+      if (stack_map.HasInlineInfo(encoding.stack_map_encoding)) {
         InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-        return inline_info.GetDexPcAtDepth(inline_info.GetDepth() - 1);
+        return inline_info.GetDexPcAtDepth(encoding.inline_info_encoding,
+                                           inline_info.GetDepth(encoding.inline_info_encoding)-1);
       } else {
-        return stack_map.GetDexPc(encoding);
+        return stack_map.GetDexPc(encoding.stack_map_encoding);
       }
     } else {
       return current_code->ToDexPc(*caller_sp, outer_pc);
@@ -364,7 +367,7 @@
     // next register is even.
     static_assert(!kQuickDoubleRegAlignedFloatBackFilled || kNumQuickFprArgs % 2 == 0,
                   "Number of Quick FPR arguments not even");
-    DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
+    DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
   }
 
   virtual ~QuickArgumentVisitor() {}
@@ -645,27 +648,27 @@
   // frame.
   ScopedQuickEntrypointChecks sqec(self);
 
-  if (method->IsAbstract()) {
-    ThrowAbstractMethodError(method);
+  if (UNLIKELY(!method->IsInvokable())) {
+    method->ThrowInvocationTimeError();
     return 0;
   }
 
   JValue tmp_value;
   ShadowFrame* deopt_frame = self->PopStackedShadowFrame(
-      StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame, false);
-  const DexFile::CodeItem* code_item = method->GetCodeItem();
-  DCHECK(code_item != nullptr) << PrettyMethod(method);
+      StackedShadowFrameType::kDeoptimizationShadowFrame, false);
   ManagedStack fragment;
 
   DCHECK(!method->IsNative()) << PrettyMethod(method);
   uint32_t shorty_len = 0;
-  auto* non_proxy_method = method->GetInterfaceMethodIfProxy(sizeof(void*));
+  ArtMethod* non_proxy_method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
+  const DexFile::CodeItem* code_item = non_proxy_method->GetCodeItem();
+  DCHECK(code_item != nullptr) << PrettyMethod(method);
   const char* shorty = non_proxy_method->GetShorty(&shorty_len);
 
   JValue result;
 
   if (deopt_frame != nullptr) {
-    // Coming from single-frame deopt.
+    // Coming from partial-fragment deopt.
 
     if (kIsDebugBuild) {
       // Sanity-check: are the methods as expected? We check that the last shadow frame (the bottom
@@ -679,13 +682,14 @@
     }
 
     if (VLOG_IS_ON(deopt)) {
-      // Print out the stack to verify that it was a single-frame deopt.
+      // Print out the stack to verify that it was a partial-fragment deopt.
       LOG(INFO) << "Continue-ing from deopt. Stack is:";
       QuickExceptionHandler::DumpFramesWithType(self, true);
     }
 
     mirror::Throwable* pending_exception = nullptr;
-    self->PopDeoptimizationContext(&result, &pending_exception);
+    bool from_code = false;
+    self->PopDeoptimizationContext(&result, &pending_exception, /* out */ &from_code);
 
     // Push a transition back into managed code onto the linked list in thread.
     self->PushManagedStackFragment(&fragment);
@@ -712,7 +716,7 @@
     if (pending_exception != nullptr) {
       self->SetException(pending_exception);
     }
-    interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, &result);
+    interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, from_code, &result);
   } else {
     const char* old_cause = self->StartAssertNoThreadSuspension(
         "Building interpreter shadow frame");
@@ -751,10 +755,16 @@
 
   // Request a stack deoptimization if needed
   ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
-  if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
+  uintptr_t caller_pc = QuickArgumentVisitor::GetCallingPc(sp);
+  // If caller_pc is the instrumentation exit stub, the stub will check to see if deoptimization
+  // should be done and it knows the real return pc.
+  if (UNLIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()) &&
+               Dbg::IsForcedInterpreterNeededForUpcall(self, caller) &&
+               Runtime::Current()->IsDeoptimizeable(caller_pc))) {
     // Push the context of the deoptimization stack so we can restore the return value and the
     // exception before executing the deoptimized frames.
-    self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException());
+    self->PushDeoptimizationContext(
+        result, shorty[0] == 'L', /* from_code */ false, self->GetException());
 
     // Set special exception to cause deoptimization.
     self->SetException(Thread::GetDeoptimizationException());
@@ -850,7 +860,7 @@
   jobject rcvr_jobj = soa.AddLocalReference<jobject>(receiver);
 
   // Placing arguments into args vector and remove the receiver.
-  ArtMethod* non_proxy_method = proxy_method->GetInterfaceMethodIfProxy(sizeof(void*));
+  ArtMethod* non_proxy_method = proxy_method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
   CHECK(!non_proxy_method->IsStatic()) << PrettyMethod(proxy_method) << " "
                                        << PrettyMethod(non_proxy_method);
   std::vector<jvalue> args;
@@ -863,12 +873,15 @@
   args.erase(args.begin());
 
   // Convert proxy method into expected interface method.
-  ArtMethod* interface_method = proxy_method->FindOverriddenMethod(sizeof(void*));
+  ArtMethod* interface_method = proxy_method->FindOverriddenMethod(kRuntimePointerSize);
   DCHECK(interface_method != nullptr) << PrettyMethod(proxy_method);
   DCHECK(!interface_method->IsProxyMethod()) << PrettyMethod(interface_method);
   self->EndAssertNoThreadSuspension(old_cause);
+  DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
   jobject interface_method_jobj = soa.AddLocalReference<jobject>(
-      mirror::Method::CreateFromArtMethod(soa.Self(), interface_method));
+      mirror::Method::CreateFromArtMethod<kRuntimePointerSize, false>(soa.Self(),
+                                                                      interface_method));
 
   // All naked Object*s should now be in jobjects, so its safe to go into the main invoke code
   // that performs allocations.
@@ -1012,22 +1025,41 @@
     HandleWrapper<mirror::Object> h_receiver(
         hs.NewHandleWrapper(virtual_or_interface ? &receiver : &dummy));
     DCHECK_EQ(caller->GetDexFile(), called_method.dex_file);
-    called = linker->ResolveMethod(self, called_method.dex_method_index, caller, invoke_type);
+    called = linker->ResolveMethod<ClassLinker::kForceICCECheck>(
+        self, called_method.dex_method_index, caller, invoke_type);
   }
   const void* code = nullptr;
   if (LIKELY(!self->IsExceptionPending())) {
     // Incompatible class change should have been handled in resolve method.
     CHECK(!called->CheckIncompatibleClassChange(invoke_type))
         << PrettyMethod(called) << " " << invoke_type;
-    if (virtual_or_interface) {
-      // Refine called method based on receiver.
-      CHECK(receiver != nullptr) << invoke_type;
-
+    if (virtual_or_interface || invoke_type == kSuper) {
+      // Refine called method based on receiver for kVirtual/kInterface, and
+      // caller for kSuper.
       ArtMethod* orig_called = called;
       if (invoke_type == kVirtual) {
-        called = receiver->GetClass()->FindVirtualMethodForVirtual(called, sizeof(void*));
+        CHECK(receiver != nullptr) << invoke_type;
+        called = receiver->GetClass()->FindVirtualMethodForVirtual(called, kRuntimePointerSize);
+      } else if (invoke_type == kInterface) {
+        CHECK(receiver != nullptr) << invoke_type;
+        called = receiver->GetClass()->FindVirtualMethodForInterface(called, kRuntimePointerSize);
       } else {
-        called = receiver->GetClass()->FindVirtualMethodForInterface(called, sizeof(void*));
+        DCHECK_EQ(invoke_type, kSuper);
+        CHECK(caller != nullptr) << invoke_type;
+        StackHandleScope<2> hs(self);
+        Handle<mirror::DexCache> dex_cache(
+            hs.NewHandle(caller->GetDeclaringClass()->GetDexCache()));
+        Handle<mirror::ClassLoader> class_loader(
+            hs.NewHandle(caller->GetDeclaringClass()->GetClassLoader()));
+        // TODO Maybe put this into a mirror::Class function.
+        mirror::Class* ref_class = linker->ResolveReferencedClassOfMethod(
+            called_method.dex_method_index, dex_cache, class_loader);
+        if (ref_class->IsInterface()) {
+          called = ref_class->FindVirtualMethodForInterfaceSuper(called, kRuntimePointerSize);
+        } else {
+          called = caller->GetDeclaringClass()->GetSuperClass()->GetVTableEntry(
+              called->GetMethodIndex(), kRuntimePointerSize);
+        }
       }
 
       CHECK(called != nullptr) << PrettyMethod(orig_called) << " "
@@ -1040,7 +1072,7 @@
       // FindVirtualMethodFor... This is ok for FindDexMethodIndexInOtherDexFile that only cares
       // about the name and signature.
       uint32_t update_dex_cache_method_index = called->GetDexMethodIndex();
-      if (!called->HasSameDexCacheResolvedMethods(caller, sizeof(void*))) {
+      if (!called->HasSameDexCacheResolvedMethods(caller, kRuntimePointerSize)) {
         // Calling from one dex file to another, need to compute the method index appropriate to
         // the caller's dex file. Since we get here only if the original called was a runtime
         // method, we've got the correct dex_file and a dex_method_idx from above.
@@ -1054,8 +1086,10 @@
       }
       if ((update_dex_cache_method_index != DexFile::kDexNoIndex) &&
           (caller->GetDexCacheResolvedMethod(
-              update_dex_cache_method_index, sizeof(void*)) != called)) {
-        caller->SetDexCacheResolvedMethod(update_dex_cache_method_index, called, sizeof(void*));
+              update_dex_cache_method_index, kRuntimePointerSize) != called)) {
+        caller->SetDexCacheResolvedMethod(update_dex_cache_method_index,
+                                          called,
+                                          kRuntimePointerSize);
       }
     } else if (invoke_type == kStatic) {
       const auto called_dex_method_idx = called->GetDexMethodIndex();
@@ -1065,7 +1099,9 @@
       // b/19175856
       if (called->GetDexFile() == called_method.dex_file &&
           called_method.dex_method_index != called_dex_method_idx) {
-        called->GetDexCache()->SetResolvedMethod(called_dex_method_idx, called, sizeof(void*));
+        called->GetDexCache()->SetResolvedMethod(called_dex_method_idx,
+                                                 called,
+                                                 kRuntimePointerSize);
       }
     }
 
@@ -1599,7 +1635,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_) {
     ArtMethod* method = **m;
 
-    DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
+    DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
 
     uint8_t* sp8 = reinterpret_cast<uint8_t*>(sp);
 
@@ -1769,8 +1805,7 @@
 
   void FinalizeHandleScope(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  StackReference<mirror::Object>* GetFirstHandleScopeEntry()
-      SHARED_REQUIRES(Locks::mutator_lock_) {
+  StackReference<mirror::Object>* GetFirstHandleScopeEntry() {
     return handle_scope_->GetHandle(0).GetReference();
   }
 
@@ -2019,7 +2054,7 @@
 static TwoWordReturn artInvokeCommon(uint32_t method_idx, mirror::Object* this_object, Thread* self,
                                      ArtMethod** sp) {
   ScopedQuickEntrypointChecks sqec(self);
-  DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs));
+  DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs));
   ArtMethod* caller_method = QuickArgumentVisitor::GetCallingMethod(sp);
   ArtMethod* method = FindMethodFast(method_idx, this_object, caller_method, access_check, type);
   if (UNLIKELY(method == nullptr)) {
@@ -2102,48 +2137,73 @@
   return artInvokeCommon<kVirtual, true>(method_idx, this_object, self, sp);
 }
 
-// Determine target of interface dispatch. This object is known non-null.
-extern "C" TwoWordReturn artInvokeInterfaceTrampoline(uint32_t dex_method_idx,
+// Determine target of interface dispatch. This object is known non-null. First argument
+// is there for consistency but should not be used, as some architectures overwrite it
+// in the assembly trampoline.
+extern "C" TwoWordReturn artInvokeInterfaceTrampoline(uint32_t deadbeef ATTRIBUTE_UNUSED,
                                                       mirror::Object* this_object,
-                                                      Thread* self, ArtMethod** sp)
+                                                      Thread* self,
+                                                      ArtMethod** sp)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  // The optimizing compiler currently does not inline methods that have an interface
-  // invocation. We use the outer method directly to avoid fetching a stack map, which is
-  // more expensive.
-  ArtMethod* caller_method = QuickArgumentVisitor::GetOuterMethod(sp);
-  DCHECK_EQ(caller_method, QuickArgumentVisitor::GetCallingMethod(sp));
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> cls(hs.NewHandle(this_object->GetClass()));
+
+  ArtMethod* caller_method = QuickArgumentVisitor::GetCallingMethod(sp);
+
+  // Fetch the dex_method_idx of the target interface method from the caller.
+  uint32_t dex_pc = QuickArgumentVisitor::GetCallingDexPc(sp);
+
+  const DexFile::CodeItem* code_item = caller_method->GetCodeItem();
+  CHECK_LT(dex_pc, code_item->insns_size_in_code_units_);
+  const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
+  Instruction::Code instr_code = instr->Opcode();
+  CHECK(instr_code == Instruction::INVOKE_INTERFACE ||
+        instr_code == Instruction::INVOKE_INTERFACE_RANGE)
+      << "Unexpected call into interface trampoline: " << instr->DumpString(nullptr);
+  uint32_t dex_method_idx;
+  if (instr_code == Instruction::INVOKE_INTERFACE) {
+    dex_method_idx = instr->VRegB_35c();
+  } else {
+    CHECK_EQ(instr_code, Instruction::INVOKE_INTERFACE_RANGE);
+    dex_method_idx = instr->VRegB_3rc();
+  }
+
   ArtMethod* interface_method = caller_method->GetDexCacheResolvedMethod(
-      dex_method_idx, sizeof(void*));
+      dex_method_idx, kRuntimePointerSize);
   DCHECK(interface_method != nullptr) << dex_method_idx << " " << PrettyMethod(caller_method);
-  ArtMethod* method;
+  ArtMethod* method = nullptr;
+  ImTable* imt = cls->GetImt(kRuntimePointerSize);
+
   if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) {
-    method = this_object->GetClass()->FindVirtualMethodForInterface(
-        interface_method, sizeof(void*));
+    // If the dex cache already resolved the interface method, look whether we have
+    // a match in the ImtConflictTable.
+    ArtMethod* conflict_method = imt->Get(interface_method->GetImtIndex(), kRuntimePointerSize);
+    if (LIKELY(conflict_method->IsRuntimeMethod())) {
+      ImtConflictTable* current_table = conflict_method->GetImtConflictTable(kRuntimePointerSize);
+      DCHECK(current_table != nullptr);
+      method = current_table->Lookup(interface_method, kRuntimePointerSize);
+    } else {
+      // It seems we aren't really a conflict method!
+      method = cls->FindVirtualMethodForInterface(interface_method, kRuntimePointerSize);
+    }
+    if (method != nullptr) {
+      return GetTwoWordSuccessValue(
+          reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCode()),
+          reinterpret_cast<uintptr_t>(method));
+    }
+
+    // No match, use the IfTable.
+    method = cls->FindVirtualMethodForInterface(interface_method, kRuntimePointerSize);
     if (UNLIKELY(method == nullptr)) {
       ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(
           interface_method, this_object, caller_method);
       return GetTwoWordFailureValue();  // Failure.
     }
   } else {
+    // The dex cache did not resolve the method, look it up in the dex file
+    // of the caller,
     DCHECK_EQ(interface_method, Runtime::Current()->GetResolutionMethod());
-    if (kIsDebugBuild) {
-      uint32_t dex_pc = QuickArgumentVisitor::GetCallingDexPc(sp);
-      const DexFile::CodeItem* code = caller_method->GetCodeItem();
-      CHECK_LT(dex_pc, code->insns_size_in_code_units_);
-      const Instruction* instr = Instruction::At(&code->insns_[dex_pc]);
-      Instruction::Code instr_code = instr->Opcode();
-      CHECK(instr_code == Instruction::INVOKE_INTERFACE ||
-            instr_code == Instruction::INVOKE_INTERFACE_RANGE)
-          << "Unexpected call into interface trampoline: " << instr->DumpString(nullptr);
-      if (instr_code == Instruction::INVOKE_INTERFACE) {
-        CHECK_EQ(dex_method_idx, instr->VRegB_35c());
-      } else {
-        CHECK_EQ(instr_code, Instruction::INVOKE_INTERFACE_RANGE);
-        CHECK_EQ(dex_method_idx, instr->VRegB_3rc());
-      }
-    }
-
     const DexFile* dex_file = caller_method->GetDeclaringClass()->GetDexCache()
         ->GetDexFile();
     uint32_t shorty_len;
@@ -2163,7 +2223,31 @@
       CHECK(self->IsExceptionPending());
       return GetTwoWordFailureValue();  // Failure.
     }
+    interface_method =
+        caller_method->GetDexCacheResolvedMethod(dex_method_idx, kRuntimePointerSize);
+    DCHECK(!interface_method->IsRuntimeMethod());
   }
+
+  // We arrive here if we have found an implementation, and it is not in the ImtConflictTable.
+  // We create a new table with the new pair { interface_method, method }.
+  uint32_t imt_index = interface_method->GetImtIndex();
+  ArtMethod* conflict_method = imt->Get(imt_index, kRuntimePointerSize);
+  if (conflict_method->IsRuntimeMethod()) {
+    ArtMethod* new_conflict_method = Runtime::Current()->GetClassLinker()->AddMethodToConflictTable(
+        cls.Get(),
+        conflict_method,
+        interface_method,
+        method,
+        /*force_new_conflict_method*/false);
+    if (new_conflict_method != conflict_method) {
+      // Update the IMT if we create a new conflict method. No fence needed here, as the
+      // data is consistent.
+      imt->Set(imt_index,
+               new_conflict_method,
+               kRuntimePointerSize);
+    }
+  }
+
   const void* code = method->GetEntryPointFromQuickCompiledCode();
 
   // When we return, the caller will branch to this address, so it had better not be 0!
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
index 4e85913..553c092 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
@@ -31,6 +31,13 @@
     options->push_back(std::make_pair("imageinstructionset", "x86_64"));
   }
 
+  // Do not do any of the finalization. We don't want to run any code, we don't need the heap
+  // prepared, it actually will be a problem with setting the instruction set to x86_64 in
+  // SetUpRuntimeOptions.
+  void FinalizeSetup() OVERRIDE {
+    ASSERT_EQ(InstructionSet::kX86_64, Runtime::Current()->GetInstructionSet());
+  }
+
   static ArtMethod* CreateCalleeSaveMethod(InstructionSet isa, Runtime::CalleeSaveType type)
       NO_THREAD_SAFETY_ANALYSIS {
     Runtime* r = Runtime::Current();
@@ -73,10 +80,16 @@
 // This test ensures that kQuickCalleeSaveFrame_RefAndArgs_FrameSize is correct.
 TEST_F(QuickTrampolineEntrypointsTest, FrameSize) {
   // We have to use a define here as the callee_save_frame.h functions are constexpr.
-#define CHECK_FRAME_SIZE(isa)                                                                     \
-  CheckFrameSize(isa, Runtime::kRefsAndArgs, GetCalleeSaveFrameSize(isa, Runtime::kRefsAndArgs)); \
-  CheckFrameSize(isa, Runtime::kRefsOnly, GetCalleeSaveFrameSize(isa, Runtime::kRefsOnly));       \
-  CheckFrameSize(isa, Runtime::kSaveAll, GetCalleeSaveFrameSize(isa, Runtime::kSaveAll))
+#define CHECK_FRAME_SIZE(isa)                                                 \
+  CheckFrameSize(isa,                                                         \
+                 Runtime::kSaveRefsAndArgs,                                   \
+                 GetCalleeSaveFrameSize(isa, Runtime::kSaveRefsAndArgs));     \
+  CheckFrameSize(isa,                                                         \
+                 Runtime::kSaveRefsOnly,                                      \
+                 GetCalleeSaveFrameSize(isa, Runtime::kSaveRefsOnly));        \
+  CheckFrameSize(isa,                                                         \
+                 Runtime::kSaveAllCalleeSaves,                                \
+                 GetCalleeSaveFrameSize(isa, Runtime::kSaveAllCalleeSaves))
 
   CHECK_FRAME_SIZE(kArm);
   CHECK_FRAME_SIZE(kArm64);
@@ -101,12 +114,12 @@
   // Ensure that the computation in callee_save_frame.h correct.
   // Note: we can only check against the kRuntimeISA, because the ArtMethod computation uses
   // sizeof(void*), which is wrong when the target bitwidth is not the same as the host's.
-  CheckPCOffset(kRuntimeISA, Runtime::kRefsAndArgs,
-                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsAndArgs));
-  CheckPCOffset(kRuntimeISA, Runtime::kRefsOnly,
-                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsOnly));
-  CheckPCOffset(kRuntimeISA, Runtime::kSaveAll,
-                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveAll));
+  CheckPCOffset(kRuntimeISA, Runtime::kSaveRefsAndArgs,
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveRefsAndArgs));
+  CheckPCOffset(kRuntimeISA, Runtime::kSaveRefsOnly,
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveRefsOnly));
+  CheckPCOffset(kRuntimeISA, Runtime::kSaveAllCalleeSaves,
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveAllCalleeSaves));
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 78f56ee..004cdc4 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -112,19 +112,24 @@
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, name, pthread_self, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, pthread_self, last_no_thread_suspension_cause,
                         sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, last_no_thread_suspension_cause, checkpoint_functions,
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, last_no_thread_suspension_cause, checkpoint_function,
                         sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_functions, jni_entrypoints,
-                        sizeof(void*) * 6);
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_function, active_suspend_barriers,
+                        sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, active_suspend_barriers, jni_entrypoints,
+                        sizeof(Thread::tls_ptr_sized_values::active_suspend_barriers));
 
     // Skip across the entrypoints structures.
 
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_objects, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, thread_local_pos, sizeof(size_t));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_objects, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, rosalloc_runs, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, mterp_current_ibase, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_current_ibase, mterp_default_ibase, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_default_ibase, mterp_alt_ibase, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_alt_ibase, rosalloc_runs, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, rosalloc_runs, thread_local_alloc_stack_top,
-                        sizeof(void*) * kNumRosAllocThreadLocalSizeBrackets);
+                        sizeof(void*) * kNumRosAllocThreadLocalSizeBracketsInThread);
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_top, thread_local_alloc_stack_end,
                         sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_end, held_mutexes, sizeof(void*));
@@ -206,11 +211,14 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAputObjectWithBoundCheck, pAputObject, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAputObject, pHandleFillArrayData, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pHandleFillArrayData, pJniMethodStart, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStart, pJniMethodStartSynchronized,
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStart, pJniMethodFastStart,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodFastStart, pJniMethodStartSynchronized,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStartSynchronized, pJniMethodEnd,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEnd, pJniMethodEndSynchronized, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEnd, pJniMethodFastEnd, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodFastEnd, pJniMethodEndSynchronized, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndSynchronized, pJniMethodEndWithReference,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReference,
@@ -223,7 +231,24 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCmpgDouble, pCmpgFloat, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCmpgFloat, pCmplDouble, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCmplDouble, pCmplFloat, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCmplFloat, pFmod, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCmplFloat, pCos, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCos, pSin, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pSin, pAcos, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAcos, pAsin, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAsin, pAtan, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAtan, pAtan2, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAtan2, pCbrt, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCbrt, pCosh, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCosh, pExp, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pExp, pExpm1, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pExpm1, pHypot, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pHypot, pLog, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pLog, pLog10, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pLog10, pNextAfter, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNextAfter, pSinh, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pSinh, pTan, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pTan, pTanh, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pTanh, pFmod, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pFmod, pL2d, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pL2d, pFmodf, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pFmodf, pL2f, sizeof(void*));
@@ -265,7 +290,8 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowDivZero, pThrowNoSuchMethod, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowNoSuchMethod, pThrowNullPointer, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowNullPointer, pThrowStackOverflow, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowStackOverflow, pDeoptimize, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowStackOverflow, pThrowStringBounds, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowStringBounds, pDeoptimize, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pDeoptimize, pA64Load, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pA64Load, pA64Store, sizeof(void*));
 
@@ -301,9 +327,70 @@
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromStringBuilder, pReadBarrierJni,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierSlow, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierMarkReg00, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg00, pReadBarrierMarkReg01,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg01, pReadBarrierMarkReg02,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg02, pReadBarrierMarkReg03,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg03, pReadBarrierMarkReg04,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg04, pReadBarrierMarkReg05,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg05, pReadBarrierMarkReg06,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg06, pReadBarrierMarkReg07,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg07, pReadBarrierMarkReg08,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg08, pReadBarrierMarkReg09,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg09, pReadBarrierMarkReg10,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg10, pReadBarrierMarkReg11,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg11, pReadBarrierMarkReg12,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg12, pReadBarrierMarkReg13,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg13, pReadBarrierMarkReg14,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg14, pReadBarrierMarkReg15,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg15, pReadBarrierMarkReg16,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg16, pReadBarrierMarkReg17,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg17, pReadBarrierMarkReg18,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg18, pReadBarrierMarkReg19,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg19, pReadBarrierMarkReg20,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg20, pReadBarrierMarkReg21,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg21, pReadBarrierMarkReg22,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg22, pReadBarrierMarkReg23,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg23, pReadBarrierMarkReg24,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg24, pReadBarrierMarkReg25,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg25, pReadBarrierMarkReg26,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg26, pReadBarrierMarkReg27,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg27, pReadBarrierMarkReg28,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg28, pReadBarrierMarkReg29,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg29, pReadBarrierSlow, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierSlow, pReadBarrierForRootSlow,
+                         sizeof(void*));
 
-    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pReadBarrierSlow)
+    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pReadBarrierForRootSlow)
             + sizeof(void*) == sizeof(QuickEntryPoints), QuickEntryPoints_all);
   }
 };
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
deleted file mode 100644
index 4de8a8e..0000000
--- a/runtime/exception_test.cc
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <memory>
-
-#include "class_linker.h"
-#include "common_runtime_test.h"
-#include "dex_file.h"
-#include "dex_file-inl.h"
-#include "gtest/gtest.h"
-#include "leb128.h"
-#include "mirror/class-inl.h"
-#include "mirror/object_array-inl.h"
-#include "mirror/object-inl.h"
-#include "mirror/stack_trace_element.h"
-#include "oat_quick_method_header.h"
-#include "runtime.h"
-#include "scoped_thread_state_change.h"
-#include "handle_scope-inl.h"
-#include "thread.h"
-#include "vmap_table.h"
-
-namespace art {
-
-class ExceptionTest : public CommonRuntimeTest {
- protected:
-  virtual void SetUp() {
-    CommonRuntimeTest::SetUp();
-
-    ScopedObjectAccess soa(Thread::Current());
-    StackHandleScope<2> hs(soa.Self());
-    Handle<mirror::ClassLoader> class_loader(
-        hs.NewHandle(soa.Decode<mirror::ClassLoader*>(LoadDex("ExceptionHandle"))));
-    my_klass_ = class_linker_->FindClass(soa.Self(), "LExceptionHandle;", class_loader);
-    ASSERT_TRUE(my_klass_ != nullptr);
-    Handle<mirror::Class> klass(hs.NewHandle(my_klass_));
-    class_linker_->EnsureInitialized(soa.Self(), klass, true, true);
-    my_klass_ = klass.Get();
-
-    dex_ = my_klass_->GetDexCache()->GetDexFile();
-
-    uint32_t code_size = 12;
-    for (size_t i = 0 ; i < code_size; i++) {
-      fake_code_.push_back(0x70 | i);
-    }
-
-    fake_mapping_data_.PushBackUnsigned(4);  // first element is count
-    fake_mapping_data_.PushBackUnsigned(4);  // total (non-length) elements
-    fake_mapping_data_.PushBackUnsigned(2);  // count of pc to dex elements
-                                      // ---  pc to dex table
-    fake_mapping_data_.PushBackUnsigned(3 - 0);  // offset 3
-    fake_mapping_data_.PushBackSigned(3 - 0);    // maps to dex offset 3
-                                      // ---  dex to pc table
-    fake_mapping_data_.PushBackUnsigned(3 - 0);  // offset 3
-    fake_mapping_data_.PushBackSigned(3 - 0);    // maps to dex offset 3
-
-    fake_vmap_table_data_.PushBackUnsigned(0 + VmapTable::kEntryAdjustment);
-
-    fake_gc_map_.push_back(0);  // 0 bytes to encode references and native pc offsets.
-    fake_gc_map_.push_back(0);
-    fake_gc_map_.push_back(0);  // 0 entries.
-    fake_gc_map_.push_back(0);
-
-    const std::vector<uint8_t>& fake_vmap_table_data = fake_vmap_table_data_.GetData();
-    const std::vector<uint8_t>& fake_mapping_data = fake_mapping_data_.GetData();
-    uint32_t vmap_table_offset = sizeof(OatQuickMethodHeader) + fake_vmap_table_data.size();
-    uint32_t mapping_table_offset = vmap_table_offset + fake_mapping_data.size();
-    uint32_t gc_map_offset = mapping_table_offset + fake_gc_map_.size();
-    OatQuickMethodHeader method_header(mapping_table_offset, vmap_table_offset, gc_map_offset,
-                                       4 * sizeof(void*), 0u, 0u, code_size);
-    fake_header_code_and_maps_.resize(sizeof(method_header));
-    memcpy(&fake_header_code_and_maps_[0], &method_header, sizeof(method_header));
-    fake_header_code_and_maps_.insert(fake_header_code_and_maps_.begin(),
-                                      fake_vmap_table_data.begin(), fake_vmap_table_data.end());
-    fake_header_code_and_maps_.insert(fake_header_code_and_maps_.begin(),
-                                      fake_mapping_data.begin(), fake_mapping_data.end());
-    fake_header_code_and_maps_.insert(fake_header_code_and_maps_.begin(),
-                                      fake_gc_map_.begin(), fake_gc_map_.end());
-    fake_header_code_and_maps_.insert(fake_header_code_and_maps_.end(),
-                                      fake_code_.begin(), fake_code_.end());
-
-    // NOTE: Don't align the code (it will not be executed) but check that the Thumb2
-    // adjustment will be a NOP, see ArtMethod::EntryPointToCodePointer().
-    CHECK_ALIGNED(mapping_table_offset, 2);
-    const uint8_t* code_ptr = &fake_header_code_and_maps_[gc_map_offset];
-
-    method_f_ = my_klass_->FindVirtualMethod("f", "()I", sizeof(void*));
-    ASSERT_TRUE(method_f_ != nullptr);
-    method_f_->SetEntryPointFromQuickCompiledCode(code_ptr);
-
-    method_g_ = my_klass_->FindVirtualMethod("g", "(I)V", sizeof(void*));
-    ASSERT_TRUE(method_g_ != nullptr);
-    method_g_->SetEntryPointFromQuickCompiledCode(code_ptr);
-  }
-
-  const DexFile* dex_;
-
-  std::vector<uint8_t> fake_code_;
-  Leb128EncodingVector<> fake_mapping_data_;
-  Leb128EncodingVector<> fake_vmap_table_data_;
-  std::vector<uint8_t> fake_gc_map_;
-  std::vector<uint8_t> fake_header_code_and_maps_;
-
-  ArtMethod* method_f_;
-  ArtMethod* method_g_;
-
- private:
-  mirror::Class* my_klass_;
-};
-
-TEST_F(ExceptionTest, FindCatchHandler) {
-  ScopedObjectAccess soa(Thread::Current());
-  const DexFile::CodeItem* code_item = dex_->GetCodeItem(method_f_->GetCodeItemOffset());
-
-  ASSERT_TRUE(code_item != nullptr);
-
-  ASSERT_EQ(2u, code_item->tries_size_);
-  ASSERT_NE(0u, code_item->insns_size_in_code_units_);
-
-  const DexFile::TryItem *t0, *t1;
-  t0 = dex_->GetTryItems(*code_item, 0);
-  t1 = dex_->GetTryItems(*code_item, 1);
-  EXPECT_LE(t0->start_addr_, t1->start_addr_);
-  {
-    CatchHandlerIterator iter(*code_item, 4 /* Dex PC in the first try block */);
-    EXPECT_STREQ("Ljava/io/IOException;", dex_->StringByTypeIdx(iter.GetHandlerTypeIndex()));
-    ASSERT_TRUE(iter.HasNext());
-    iter.Next();
-    EXPECT_STREQ("Ljava/lang/Exception;", dex_->StringByTypeIdx(iter.GetHandlerTypeIndex()));
-    ASSERT_TRUE(iter.HasNext());
-    iter.Next();
-    EXPECT_FALSE(iter.HasNext());
-  }
-  {
-    CatchHandlerIterator iter(*code_item, 8 /* Dex PC in the second try block */);
-    EXPECT_STREQ("Ljava/io/IOException;", dex_->StringByTypeIdx(iter.GetHandlerTypeIndex()));
-    ASSERT_TRUE(iter.HasNext());
-    iter.Next();
-    EXPECT_FALSE(iter.HasNext());
-  }
-  {
-    CatchHandlerIterator iter(*code_item, 11 /* Dex PC not in any try block */);
-    EXPECT_FALSE(iter.HasNext());
-  }
-}
-
-TEST_F(ExceptionTest, StackTraceElement) {
-  Thread* thread = Thread::Current();
-  thread->TransitionFromSuspendedToRunnable();
-  bool started = runtime_->Start();
-  CHECK(started);
-  JNIEnv* env = thread->GetJniEnv();
-  ScopedObjectAccess soa(env);
-
-  std::vector<uintptr_t> fake_stack;
-  Runtime* r = Runtime::Current();
-  r->SetInstructionSet(kRuntimeISA);
-  ArtMethod* save_method = r->CreateCalleeSaveMethod();
-  r->SetCalleeSaveMethod(save_method, Runtime::kSaveAll);
-  QuickMethodFrameInfo frame_info = r->GetRuntimeMethodFrameInfo(save_method);
-
-  ASSERT_EQ(kStackAlignment, 16U);
-  // ASSERT_EQ(sizeof(uintptr_t), sizeof(uint32_t));
-
-
-  // Create three fake stack frames with mapping data created in SetUp. We map offset 3 in the
-  // code to dex pc 3.
-  const uint32_t dex_pc = 3;
-
-  // Create the stack frame for the callee save method, expected by the runtime.
-  fake_stack.push_back(reinterpret_cast<uintptr_t>(save_method));
-  for (size_t i = 0; i < frame_info.FrameSizeInBytes() - 2 * sizeof(uintptr_t);
-       i += sizeof(uintptr_t)) {
-    fake_stack.push_back(0);
-  }
-
-  fake_stack.push_back(method_g_->GetOatQuickMethodHeader(0)->ToNativeQuickPc(
-      method_g_, dex_pc, /* is_catch_handler */ false));  // return pc
-
-  // Create/push fake 16byte stack frame for method g
-  fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_));
-  fake_stack.push_back(0);
-  fake_stack.push_back(0);
-  fake_stack.push_back(method_g_->GetOatQuickMethodHeader(0)->ToNativeQuickPc(
-      method_g_, dex_pc, /* is_catch_handler */ false));  // return pc
-
-  // Create/push fake 16byte stack frame for method f
-  fake_stack.push_back(reinterpret_cast<uintptr_t>(method_f_));
-  fake_stack.push_back(0);
-  fake_stack.push_back(0);
-  fake_stack.push_back(0xEBAD6070);  // return pc
-
-  // Push Method* of null to terminate the trace
-  fake_stack.push_back(0);
-
-  // Push null values which will become null incoming arguments.
-  fake_stack.push_back(0);
-  fake_stack.push_back(0);
-  fake_stack.push_back(0);
-
-  // Set up thread to appear as if we called out of method_g_ at pc dex 3
-  thread->SetTopOfStack(reinterpret_cast<ArtMethod**>(&fake_stack[0]));
-
-  jobject internal = thread->CreateInternalStackTrace<false>(soa);
-  ASSERT_TRUE(internal != nullptr);
-  jobjectArray ste_array = Thread::InternalStackTraceToStackTraceElementArray(soa, internal);
-  ASSERT_TRUE(ste_array != nullptr);
-  auto* trace_array = soa.Decode<mirror::ObjectArray<mirror::StackTraceElement>*>(ste_array);
-
-  ASSERT_TRUE(trace_array != nullptr);
-  ASSERT_TRUE(trace_array->Get(0) != nullptr);
-  EXPECT_STREQ("ExceptionHandle",
-               trace_array->Get(0)->GetDeclaringClass()->ToModifiedUtf8().c_str());
-  EXPECT_STREQ("ExceptionHandle.java",
-               trace_array->Get(0)->GetFileName()->ToModifiedUtf8().c_str());
-  EXPECT_STREQ("g", trace_array->Get(0)->GetMethodName()->ToModifiedUtf8().c_str());
-  EXPECT_EQ(37, trace_array->Get(0)->GetLineNumber());
-
-  ASSERT_TRUE(trace_array->Get(1) != nullptr);
-  EXPECT_STREQ("ExceptionHandle",
-               trace_array->Get(1)->GetDeclaringClass()->ToModifiedUtf8().c_str());
-  EXPECT_STREQ("ExceptionHandle.java",
-               trace_array->Get(1)->GetFileName()->ToModifiedUtf8().c_str());
-  EXPECT_STREQ("f", trace_array->Get(1)->GetMethodName()->ToModifiedUtf8().c_str());
-  EXPECT_EQ(22, trace_array->Get(1)->GetLineNumber());
-
-  thread->SetTopOfStack(nullptr);  // Disarm the assertion that no code is running when we detach.
-}
-
-}  // namespace art
diff --git a/runtime/experimental_flags.h b/runtime/experimental_flags.h
index 2e674e9..7faa2dc 100644
--- a/runtime/experimental_flags.h
+++ b/runtime/experimental_flags.h
@@ -26,8 +26,8 @@
   // The actual flag values.
   enum {
     kNone           = 0x0000,
-    kLambdas        = 0x0001,
-    kDefaultMethods = 0x0002,
+    kAgents         = 0x0001,  // 0b00000001
+    kRuntimePlugins = 0x0002,  // 0b00000010
   };
 
   constexpr ExperimentalFlags() : value_(0x0000) {}
@@ -65,12 +65,12 @@
 
 inline std::ostream& operator<<(std::ostream& stream, const ExperimentalFlags& e) {
   bool started = false;
-  if (e & ExperimentalFlags::kLambdas) {
-    stream << (started ? "|" : "") << "kLambdas";
+  if (e & ExperimentalFlags::kAgents) {
+    stream << (started ? "|" : "") << "kAgents";
     started = true;
   }
-  if (e & ExperimentalFlags::kDefaultMethods) {
-    stream << (started ? "|" : "") << "kDefaultMethods";
+  if (e & ExperimentalFlags::kRuntimePlugins) {
+    stream << (started ? "|" : "") << "kRuntimePlugins";
     started = true;
   }
   if (!started) {
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 52ccbee..f86921c 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -146,43 +146,17 @@
   }
 }
 
-void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
-  // BE CAREFUL ALLOCATING HERE INCLUDING USING LOG(...)
-  //
-  // If malloc calls abort, it will be holding its lock.
-  // If the handler tries to call malloc, it will deadlock.
-  VLOG(signals) << "Handling fault";
-  if (IsInGeneratedCode(info, context, true)) {
-    VLOG(signals) << "in generated code, looking for handler";
-    for (const auto& handler : generated_code_handlers_) {
-      VLOG(signals) << "invoking Action on handler " << handler;
-      if (handler->Action(sig, info, context)) {
-#ifdef TEST_NESTED_SIGNAL
-        // In test mode we want to fall through to stack trace handler
-        // on every signal (in reality this will cause a crash on the first
-        // signal).
-        break;
-#else
-        // We have handled a signal so it's time to return from the
-        // signal handler to the appropriate place.
-        return;
-#endif
-      }
-    }
+bool FaultManager::HandleFaultByOtherHandlers(int sig, siginfo_t* info, void* context) {
+  if (other_handlers_.empty()) {
+    return false;
   }
 
-  // We hit a signal we didn't handle.  This might be something for which
-  // we can give more information about so call all registered handlers to see
-  // if it is.
-
   Thread* self = Thread::Current();
 
-  // If ART is not running, or the thread is not attached to ART pass the
-  // signal on to the next handler in the chain.
-  if (self == nullptr || Runtime::Current() == nullptr || !Runtime::Current()->IsStarted()) {
-    InvokeUserSignalHandler(sig, info, context);
-    return;
-  }
+  DCHECK(self != nullptr);
+  DCHECK(Runtime::Current() != nullptr);
+  DCHECK(Runtime::Current()->IsStarted());
+
   // Now set up the nested signal handler.
 
   // TODO: add SIGSEGV back to the nested signals when we can handle running out stack gracefully.
@@ -231,6 +205,7 @@
       break;
     }
   }
+
   if (success) {
     // Save the current state and call the handlers.  If anything causes a signal
     // our nested signal handler will be invoked and this will longjmp to the saved
@@ -247,7 +222,7 @@
             }
           }
           fault_manager.Init();
-          return;
+          return true;
         }
       }
     } else {
@@ -265,6 +240,40 @@
 
   // Now put the fault manager back in place.
   fault_manager.Init();
+  return false;
+}
+
+void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
+  // BE CAREFUL ALLOCATING HERE INCLUDING USING LOG(...)
+  //
+  // If malloc calls abort, it will be holding its lock.
+  // If the handler tries to call malloc, it will deadlock.
+  VLOG(signals) << "Handling fault";
+  if (IsInGeneratedCode(info, context, true)) {
+    VLOG(signals) << "in generated code, looking for handler";
+    for (const auto& handler : generated_code_handlers_) {
+      VLOG(signals) << "invoking Action on handler " << handler;
+      if (handler->Action(sig, info, context)) {
+#ifdef TEST_NESTED_SIGNAL
+        // In test mode we want to fall through to stack trace handler
+        // on every signal (in reality this will cause a crash on the first
+        // signal).
+        break;
+#else
+        // We have handled a signal so it's time to return from the
+        // signal handler to the appropriate place.
+        return;
+#endif
+      }
+    }
+
+    // We hit a signal we didn't handle.  This might be something for which
+    // we can give more information about so call all registered handlers to see
+    // if it is.
+    if (HandleFaultByOtherHandlers(sig, info, context)) {
+        return;
+    }
+  }
 
   // Set a breakpoint in this function to catch unhandled signals.
   art_sigsegv_fault();
@@ -332,7 +341,7 @@
   // If we don't have a potential method, we're outta here.
   VLOG(signals) << "potential method: " << method_obj;
   // TODO: Check linear alloc and image.
-  DCHECK_ALIGNED(ArtMethod::Size(sizeof(void*)), sizeof(void*))
+  DCHECK_ALIGNED(ArtMethod::Size(kRuntimePointerSize), sizeof(void*))
       << "ArtMethod is not pointer aligned";
   if (method_obj == nullptr || !IsAligned<sizeof(void*)>(method_obj)) {
     VLOG(signals) << "no method";
@@ -344,7 +353,7 @@
   // Check that the class pointer inside the object is not null and is aligned.
   // TODO: Method might be not a heap address, and GetClass could fault.
   // No read barrier because method_obj may not be a real object.
-  mirror::Class* cls = method_obj->GetDeclaringClassNoBarrier();
+  mirror::Class* cls = method_obj->GetDeclaringClassUnchecked<kWithoutReadBarrier>();
   if (cls == nullptr) {
     VLOG(signals) << "not a class";
     return false;
diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h
index 3b03a14..56e0fb7 100644
--- a/runtime/fault_handler.h
+++ b/runtime/fault_handler.h
@@ -62,6 +62,10 @@
                          NO_THREAD_SAFETY_ANALYSIS;
 
  private:
+  // The HandleFaultByOtherHandlers function is only called by HandleFault function for generated code.
+  bool HandleFaultByOtherHandlers(int sig, siginfo_t* info, void* context)
+                                  NO_THREAD_SAFETY_ANALYSIS;
+
   std::vector<FaultHandler*> generated_code_handlers_;
   std::vector<FaultHandler*> other_handlers_;
   struct sigaction oldaction_;
@@ -92,6 +96,14 @@
 
   bool Action(int sig, siginfo_t* siginfo, void* context) OVERRIDE;
 
+  static bool IsValidImplicitCheck(siginfo_t* siginfo) {
+    // Our implicit NPE checks always limit the range to a page.
+    // Note that the runtime will do more exhaustive checks (that we cannot
+    // reasonably do in signal processing code) based on the dex instruction
+    // faulting.
+    return CanDoImplicitNullCheckOn(reinterpret_cast<uintptr_t>(siginfo->si_addr));
+  }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(NullPointerHandler);
 };
diff --git a/runtime/gc/accounting/bitmap.cc b/runtime/gc/accounting/bitmap.cc
index fdded02..380cb8e 100644
--- a/runtime/gc/accounting/bitmap.cc
+++ b/runtime/gc/accounting/bitmap.cc
@@ -18,6 +18,7 @@
 
 #include "base/bit_utils.h"
 #include "card_table.h"
+#include "jit/jit_code_cache.h"
 #include "mem_map.h"
 
 namespace art {
@@ -91,6 +92,7 @@
 }
 
 template class MemoryRangeBitmap<CardTable::kCardSize>;
+template class MemoryRangeBitmap<jit::kJitCodeAlignment>;
 
 }  // namespace accounting
 }  // namespace gc
diff --git a/runtime/gc/accounting/card_table.cc b/runtime/gc/accounting/card_table.cc
index 1a7b1a3..121da37 100644
--- a/runtime/gc/accounting/card_table.cc
+++ b/runtime/gc/accounting/card_table.cc
@@ -17,6 +17,7 @@
 #include "card_table.h"
 
 #include "base/logging.h"
+#include "base/systrace.h"
 #include "card_table-inl.h"
 #include "gc/heap.h"
 #include "gc/space/space.h"
@@ -57,6 +58,7 @@
  */
 
 CardTable* CardTable::Create(const uint8_t* heap_begin, size_t heap_capacity) {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
   /* Set up the card table */
   size_t capacity = heap_capacity / kCardSize;
   /* Allocate an extra 256 bytes to allow fixed low-byte of base */
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index 88a6c6c..b6af908 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -115,6 +115,8 @@
 
   // Resets all of the bytes in the card table to clean.
   void ClearCardTable();
+
+  // Clear a range of cards that covers start to end, start and end must be aligned to kCardSize.
   void ClearCardRange(uint8_t* start, uint8_t* end);
 
   // Resets all of the bytes in the card table which do not map to the image space.
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 1361f7b..35bcb18 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -175,6 +175,11 @@
   card_table->ModifyCardsAtomic(space_->Begin(), space_->End(), AgeCardVisitor(), visitor);
 }
 
+void ModUnionTableReferenceCache::ClearTable() {
+  cleared_cards_.clear();
+  references_.clear();
+}
+
 class AddToReferenceArrayVisitor {
  public:
   AddToReferenceArrayVisitor(ModUnionTableReferenceCache* mod_union_table,
@@ -210,7 +215,11 @@
     if (mod_union_table_->ShouldAddReference(root->AsMirrorPtr())) {
       *has_target_reference_ = true;
       // TODO: Add MarkCompressedReference callback here.
-      root->Assign(visitor_->MarkObject(root->AsMirrorPtr()));
+      mirror::Object* old_ref = root->AsMirrorPtr();
+      mirror::Object* new_ref = visitor_->MarkObject(old_ref);
+      if (old_ref != new_ref) {
+        root->Assign(new_ref);
+      }
     }
   }
 
@@ -314,6 +323,18 @@
   const std::set<mirror::Object*>& references_;
 };
 
+class EmptyMarkObjectVisitor : public MarkObjectVisitor {
+ public:
+  mirror::Object* MarkObject(mirror::Object* obj) OVERRIDE {return obj;}
+  void MarkHeapReference(mirror::HeapReference<mirror::Object>*) OVERRIDE {}
+};
+
+void ModUnionTable::FilterCards() {
+  EmptyMarkObjectVisitor visitor;
+  // Use empty visitor since filtering is automatically done by UpdateAndMarkReferences.
+  UpdateAndMarkReferences(&visitor);
+}
+
 void ModUnionTableReferenceCache::Verify() {
   // Start by checking that everything in the mod union table is marked.
   for (const auto& ref_pair : references_) {
@@ -360,6 +381,31 @@
   }
 }
 
+void ModUnionTableReferenceCache::VisitObjects(ObjectCallback* callback, void* arg) {
+  CardTable* const card_table = heap_->GetCardTable();
+  ContinuousSpaceBitmap* live_bitmap = space_->GetLiveBitmap();
+  for (uint8_t* card : cleared_cards_) {
+    uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card));
+    uintptr_t end = start + CardTable::kCardSize;
+    live_bitmap->VisitMarkedRange(start,
+                                  end,
+                                  [this, callback, arg](mirror::Object* obj) {
+      callback(obj, arg);
+    });
+  }
+  // This may visit the same card twice, TODO avoid this.
+  for (const auto& pair : references_) {
+    const uint8_t* card = pair.first;
+    uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card));
+    uintptr_t end = start + CardTable::kCardSize;
+    live_bitmap->VisitMarkedRange(start,
+                                  end,
+                                  [this, callback, arg](mirror::Object* obj) {
+      callback(obj, arg);
+    });
+  }
+}
+
 void ModUnionTableReferenceCache::UpdateAndMarkReferences(MarkObjectVisitor* visitor) {
   CardTable* const card_table = heap_->GetCardTable();
   std::vector<mirror::HeapReference<mirror::Object>*> cards_references;
@@ -485,9 +531,15 @@
   card_table->ModifyCardsAtomic(space_->Begin(), space_->End(), AgeCardVisitor(), visitor);
 }
 
+void ModUnionTableCardCache::ClearTable() {
+  card_bitmap_->Bitmap::Clear();
+}
+
 // Mark all references to the alloc space(s).
 void ModUnionTableCardCache::UpdateAndMarkReferences(MarkObjectVisitor* visitor) {
-  auto* image_space = heap_->GetImageSpace();
+  // TODO: Needs better support for multi-images? b/26317072
+  space::ImageSpace* image_space =
+      heap_->GetBootImageSpaces().empty() ? nullptr : heap_->GetBootImageSpaces()[0];
   // If we don't have an image space, just pass in space_ as the immune space. Pass in the same
   // space_ instead of image_space to avoid a null check in ModUnionUpdateObjectReferencesVisitor.
   CardBitVisitor bit_visitor(visitor, space_, image_space != nullptr ? image_space : space_,
@@ -496,6 +548,22 @@
       0, RoundUp(space_->Size(), CardTable::kCardSize) / CardTable::kCardSize, bit_visitor);
 }
 
+void ModUnionTableCardCache::VisitObjects(ObjectCallback* callback, void* arg) {
+  card_bitmap_->VisitSetBits(
+      0,
+      RoundUp(space_->Size(), CardTable::kCardSize) / CardTable::kCardSize,
+      [this, callback, arg](size_t bit_index) {
+        const uintptr_t start = card_bitmap_->AddrFromBitIndex(bit_index);
+        DCHECK(space_->HasAddress(reinterpret_cast<mirror::Object*>(start)))
+            << start << " " << *space_;
+        space_->GetLiveBitmap()->VisitMarkedRange(start,
+                                                  start + CardTable::kCardSize,
+                                                  [this, callback, arg](mirror::Object* obj) {
+          callback(obj, arg);
+        });
+      });
+}
+
 void ModUnionTableCardCache::Dump(std::ostream& os) {
   os << "ModUnionTable dirty cards: [";
   // TODO: Find cleaner way of doing this.
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index a7a4246..6aa2417 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -63,11 +63,17 @@
   // Set all the cards.
   virtual void SetCards() = 0;
 
+  // Clear all of the table.
+  virtual void ClearTable() = 0;
+
   // Update the mod-union table using data stored by ClearCards. There may be multiple ClearCards
   // before a call to update, for example, back-to-back sticky GCs. Also mark references to other
   // spaces which are stored in the mod-union table.
   virtual void UpdateAndMarkReferences(MarkObjectVisitor* visitor) = 0;
 
+  // Visit all of the objects that may contain references to other spaces.
+  virtual void VisitObjects(ObjectCallback* callback, void* arg) = 0;
+
   // Verification, sanity checks that we don't have clean cards which conflict with out cached data
   // for said cards. Exclusive lock is required since verify sometimes uses
   // SpaceBitmap::VisitMarkedRange and VisitMarkedRange can't know if the callback will modify the
@@ -78,6 +84,9 @@
   // doesn't need to be aligned.
   virtual bool ContainsCardFor(uintptr_t addr) = 0;
 
+  // Filter out cards that don't need to be marked. Automatically done with UpdateAndMarkReferences.
+  void FilterCards();
+
   virtual void Dump(std::ostream& os) = 0;
 
   space::ContinuousSpace* GetSpace() {
@@ -115,6 +124,10 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(Locks::heap_bitmap_lock_);
 
+  virtual void VisitObjects(ObjectCallback* callback, void* arg) OVERRIDE
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Exclusive lock is required since verify uses SpaceBitmap::VisitMarkedRange and
   // VisitMarkedRange can't know if the callback will modify the bitmap or not.
   void Verify() OVERRIDE
@@ -130,6 +143,8 @@
 
   virtual void SetCards() OVERRIDE;
 
+  virtual void ClearTable() OVERRIDE;
+
  protected:
   // Cleared card array, used to update the mod-union table.
   ModUnionTable::CardSet cleared_cards_;
@@ -156,6 +171,10 @@
       REQUIRES(Locks::heap_bitmap_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  virtual void VisitObjects(ObjectCallback* callback, void* arg) OVERRIDE
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Nothing to verify.
   virtual void Verify() OVERRIDE {}
 
@@ -163,9 +182,10 @@
 
   virtual bool ContainsCardFor(uintptr_t addr) OVERRIDE;
 
-  // Sets all the cards in the mod union table to be marked.
   virtual void SetCards() OVERRIDE;
 
+  virtual void ClearTable() OVERRIDE;
+
  protected:
   // Cleared card bitmap, used to update the mod-union table.
   std::unique_ptr<CardBitmap> card_bitmap_;
diff --git a/runtime/gc/accounting/mod_union_table_test.cc b/runtime/gc/accounting/mod_union_table_test.cc
index edab1b0..349d6ff 100644
--- a/runtime/gc/accounting/mod_union_table_test.cc
+++ b/runtime/gc/accounting/mod_union_table_test.cc
@@ -22,6 +22,7 @@
 #include "mirror/array-inl.h"
 #include "space_bitmap-inl.h"
 #include "thread-inl.h"
+#include "thread_list.h"
 
 namespace art {
 namespace gc {
@@ -184,7 +185,11 @@
   std::unique_ptr<space::DlMallocSpace> other_space(space::DlMallocSpace::Create(
       "other space", 128 * KB, 4 * MB, 4 * MB, nullptr, false));
   ASSERT_TRUE(other_space.get() != nullptr);
-  heap->AddSpace(other_space.get());
+  {
+    ScopedThreadSuspension sts(self, kSuspended);
+    ScopedSuspendAll ssa("Add image space");
+    heap->AddSpace(other_space.get());
+  }
   std::unique_ptr<ModUnionTable> table(ModUnionTableFactory::Create(
       type, space, other_space.get()));
   ASSERT_TRUE(table.get() != nullptr);
@@ -253,6 +258,8 @@
   std::ostringstream oss2;
   table->Dump(oss2);
   // Remove the space we added so it doesn't persist to the next test.
+  ScopedThreadSuspension sts(self, kSuspended);
+  ScopedSuspendAll ssa("Add image space");
   heap->RemoveSpace(other_space.get());
 }
 
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index 006d2c7..4cf5b4f 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -46,7 +46,7 @@
       DCHECK(Test(obj));
       return true;
     }
-  } while (!atomic_entry->CompareExchangeWeakSequentiallyConsistent(old_word, old_word | mask));
+  } while (!atomic_entry->CompareExchangeWeakRelaxed(old_word, old_word | mask));
   DCHECK(Test(obj));
   return false;
 }
@@ -167,7 +167,14 @@
   uintptr_t* address = &bitmap_begin_[index];
   uintptr_t old_word = *address;
   if (kSetBit) {
-    *address = old_word | mask;
+    // Check the bit before setting the word incase we are trying to mark a read only bitmap
+    // like an image space bitmap. This bitmap is mapped as read only and will fault if we
+    // attempt to change any words. Since all of the objects are marked, this will never
+    // occur if we check before setting the bit. This also prevents dirty pages that would
+    // occur if the bitmap was read write and we did not check the bit.
+    if ((old_word & mask) == 0) {
+      *address = old_word | mask;
+    }
   } else {
     *address = old_word & ~mask;
   }
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index 369e408..522f236 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -17,10 +17,11 @@
 #include "allocation_record.h"
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/stl_util.h"
 #include "stack.h"
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include "cutils/properties.h"
 #endif
 
@@ -34,15 +35,11 @@
 
 const char* AllocRecord::GetClassDescriptor(std::string* storage) const {
   // klass_ could contain null only if we implement class unloading.
-  if (UNLIKELY(klass_.IsNull())) {
-    return "null";
-  } else {
-    return klass_.Read()->GetDescriptor(storage);
-  }
+  return klass_.IsNull() ? "null" : klass_.Read()->GetDescriptor(storage);
 }
 
 void AllocRecordObjectMap::SetProperties() {
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   // Check whether there's a system property overriding the max number of records.
   const char* propertyName = "dalvik.vm.allocTrackerMax";
   char allocMaxString[PROPERTY_VALUE_MAX];
@@ -92,11 +89,11 @@
       max_stack_depth_ = value;
     }
   }
-#endif
+#endif  // ART_TARGET_ANDROID
 }
 
 AllocRecordObjectMap::~AllocRecordObjectMap() {
-  STLDeleteValues(&entries_);
+  Clear();
 }
 
 void AllocRecordObjectMap::VisitRoots(RootVisitor* visitor) {
@@ -105,8 +102,19 @@
   size_t count = recent_record_max_;
   // Only visit the last recent_record_max_ number of allocation records in entries_ and mark the
   // klass_ fields as strong roots.
-  for (auto it = entries_.rbegin(), end = entries_.rend(); count > 0 && it != end; count--, ++it) {
-    buffered_visitor.VisitRootIfNonNull(it->second->GetClassGcRoot());
+  for (auto it = entries_.rbegin(), end = entries_.rend(); it != end; ++it) {
+    AllocRecord& record = it->second;
+    if (count > 0) {
+      buffered_visitor.VisitRootIfNonNull(record.GetClassGcRoot());
+      --count;
+    }
+    // Visit all of the stack frames to make sure no methods in the stack traces get unloaded by
+    // class unloading.
+    for (size_t i = 0, depth = record.GetDepth(); i < depth; ++i) {
+      const AllocRecordStackTraceElement& element = record.StackElement(i);
+      DCHECK(element.GetMethod() != nullptr);
+      element.GetMethod()->VisitRoots(buffered_visitor, kRuntimePointerSize);
+    }
   }
 }
 
@@ -131,25 +139,19 @@
   VLOG(heap) << "Start SweepAllocationRecords()";
   size_t count_deleted = 0, count_moved = 0, count = 0;
   // Only the first (size - recent_record_max_) number of records can be deleted.
-  size_t delete_bound;
-  if (entries_.size() <= recent_record_max_) {
-    delete_bound = 0;
-  } else {
-    delete_bound = entries_.size() - recent_record_max_;
-  }
+  const size_t delete_bound = std::max(entries_.size(), recent_record_max_) - recent_record_max_;
   for (auto it = entries_.begin(), end = entries_.end(); it != end;) {
     ++count;
     // This does not need a read barrier because this is called by GC.
     mirror::Object* old_object = it->first.Read<kWithoutReadBarrier>();
-    AllocRecord* record = it->second;
+    AllocRecord& record = it->second;
     mirror::Object* new_object = old_object == nullptr ? nullptr : visitor->IsMarked(old_object);
     if (new_object == nullptr) {
       if (count > delete_bound) {
         it->first = GcRoot<mirror::Object>(nullptr);
-        SweepClassObject(record, visitor);
+        SweepClassObject(&record, visitor);
         ++it;
       } else {
-        delete record;
         it = entries_.erase(it);
         ++count_deleted;
       }
@@ -158,7 +160,7 @@
         it->first = GcRoot<mirror::Object>(new_object);
         ++count_moved;
       }
-      SweepClassObject(record, visitor);
+      SweepClassObject(&record, visitor);
       ++it;
     }
   }
@@ -182,35 +184,32 @@
   new_record_condition_.Broadcast(Thread::Current());
 }
 
-struct AllocRecordStackVisitor : public StackVisitor {
-  AllocRecordStackVisitor(Thread* thread, AllocRecordStackTrace* trace_in, size_t max)
+class AllocRecordStackVisitor : public StackVisitor {
+ public:
+  AllocRecordStackVisitor(Thread* thread, size_t max_depth, AllocRecordStackTrace* trace_out)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
-        trace(trace_in),
-        depth(0),
-        max_depth(max) {}
+        max_depth_(max_depth),
+        trace_(trace_out) {}
 
   // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
   // annotalysis.
   bool VisitFrame() OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
-    if (depth >= max_depth) {
+    if (trace_->GetDepth() >= max_depth_) {
       return false;
     }
     ArtMethod* m = GetMethod();
-    if (!m->IsRuntimeMethod()) {
-      trace->SetStackElementAt(depth, m, GetDexPc());
-      ++depth;
+    // m may be null if we have inlined methods of unresolved classes. b/27858645
+    if (m != nullptr && !m->IsRuntimeMethod()) {
+      m = m->GetInterfaceMethodIfProxy(kRuntimePointerSize);
+      trace_->AddStackElement(AllocRecordStackTraceElement(m, GetDexPc()));
     }
     return true;
   }
 
-  ~AllocRecordStackVisitor() {
-    trace->SetDepth(depth);
-  }
-
-  AllocRecordStackTrace* trace;
-  size_t depth;
-  const size_t max_depth;
+ private:
+  const size_t max_depth_;
+  AllocRecordStackTrace* const trace_;
 };
 
 void AllocRecordObjectMap::SetAllocTrackingEnabled(bool enable) {
@@ -222,7 +221,11 @@
       if (heap->IsAllocTrackingEnabled()) {
         return;  // Already enabled, bail.
       }
-      AllocRecordObjectMap* records = new AllocRecordObjectMap();
+      AllocRecordObjectMap* records = heap->GetAllocationRecords();
+      if (records == nullptr) {
+        records = new AllocRecordObjectMap;
+        heap->SetAllocationRecords(records);
+      }
       CHECK(records != nullptr);
       records->SetProperties();
       std::string self_name;
@@ -230,17 +233,20 @@
       if (self_name == "JDWP") {
         records->alloc_ddm_thread_id_ = self->GetTid();
       }
-      records->scratch_trace_.SetDepth(records->max_stack_depth_);
       size_t sz = sizeof(AllocRecordStackTraceElement) * records->max_stack_depth_ +
                   sizeof(AllocRecord) + sizeof(AllocRecordStackTrace);
       LOG(INFO) << "Enabling alloc tracker (" << records->alloc_record_max_ << " entries of "
                 << records->max_stack_depth_ << " frames, taking up to "
                 << PrettySize(sz * records->alloc_record_max_) << ")";
-      heap->SetAllocationRecords(records);
-      heap->SetAllocTrackingEnabled(true);
     }
     Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints();
+    {
+      MutexLock mu(self, *Locks::alloc_tracker_lock_);
+      heap->SetAllocTrackingEnabled(true);
+    }
   } else {
+    // Delete outside of the critical section to avoid possible lock violations like the runtime
+    // shutdown lock.
     {
       MutexLock mu(self, *Locks::alloc_tracker_lock_);
       if (!heap->IsAllocTrackingEnabled()) {
@@ -248,53 +254,67 @@
       }
       heap->SetAllocTrackingEnabled(false);
       LOG(INFO) << "Disabling alloc tracker";
-      heap->SetAllocationRecords(nullptr);
+      AllocRecordObjectMap* records = heap->GetAllocationRecords();
+      records->Clear();
     }
     // If an allocation comes in before we uninstrument, we will safely drop it on the floor.
     Runtime::Current()->GetInstrumentation()->UninstrumentQuickAllocEntryPoints();
   }
 }
 
-void AllocRecordObjectMap::RecordAllocation(Thread* self, mirror::Object* obj, mirror::Class* klass,
+void AllocRecordObjectMap::RecordAllocation(Thread* self,
+                                            mirror::Object** obj,
                                             size_t byte_count) {
+  // Get stack trace outside of lock in case there are allocations during the stack walk.
+  // b/27858645.
+  AllocRecordStackTrace trace;
+  AllocRecordStackVisitor visitor(self, max_stack_depth_, /*out*/ &trace);
+  {
+    StackHandleScope<1> hs(self);
+    auto obj_wrapper = hs.NewHandleWrapper(obj);
+    visitor.WalkStack();
+  }
+
   MutexLock mu(self, *Locks::alloc_tracker_lock_);
-  Heap* heap = Runtime::Current()->GetHeap();
+  Heap* const heap = Runtime::Current()->GetHeap();
   if (!heap->IsAllocTrackingEnabled()) {
     // In the process of shutting down recording, bail.
     return;
   }
 
-  AllocRecordObjectMap* records = heap->GetAllocationRecords();
-  DCHECK(records != nullptr);
-
-  // Do not record for DDM thread
-  if (records->alloc_ddm_thread_id_ == self->GetTid()) {
+  // Do not record for DDM thread.
+  if (alloc_ddm_thread_id_ == self->GetTid()) {
     return;
   }
 
   // Wait for GC's sweeping to complete and allow new records
-  while (UNLIKELY((!kUseReadBarrier && !records->allow_new_record_) ||
+  while (UNLIKELY((!kUseReadBarrier && !allow_new_record_) ||
                   (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
-    records->new_record_condition_.WaitHoldingLocks(self);
+    new_record_condition_.WaitHoldingLocks(self);
   }
 
-  DCHECK_LE(records->Size(), records->alloc_record_max_);
-
-  // Get stack trace.
-  // add scope to make "visitor" destroyed promptly, in order to set the scratch_trace_->depth_
-  {
-    AllocRecordStackVisitor visitor(self, &records->scratch_trace_, records->max_stack_depth_);
-    visitor.WalkStack();
+  if (!heap->IsAllocTrackingEnabled()) {
+    // Return if the allocation tracking has been disabled while waiting for system weak access
+    // above.
+    return;
   }
-  records->scratch_trace_.SetTid(self->GetTid());
-  AllocRecordStackTrace* trace = new AllocRecordStackTrace(records->scratch_trace_);
 
-  // Fill in the basics.
-  AllocRecord* record = new AllocRecord(byte_count, klass, trace);
+  DCHECK_LE(Size(), alloc_record_max_);
 
-  records->Put(obj, record);
-  DCHECK_LE(records->Size(), records->alloc_record_max_);
+  // Erase extra unfilled elements.
+  trace.SetTid(self->GetTid());
+
+  // Add the record.
+  Put(*obj, AllocRecord(byte_count, (*obj)->GetClass(), std::move(trace)));
+  DCHECK_LE(Size(), alloc_record_max_);
 }
 
+void AllocRecordObjectMap::Clear() {
+  entries_.clear();
+}
+
+AllocRecordObjectMap::AllocRecordObjectMap()
+    : new_record_condition_("New allocation record condition", *Locks::alloc_tracker_lock_) {}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/allocation_record.h b/runtime/gc/allocation_record.h
index ffdfd31..a2d86cc 100644
--- a/runtime/gc/allocation_record.h
+++ b/runtime/gc/allocation_record.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_GC_ALLOCATION_RECORD_H_
 
 #include <list>
+#include <memory>
 
 #include "base/mutex.h"
 #include "object_callbacks.h"
@@ -37,10 +38,13 @@
 
 class AllocRecordStackTraceElement {
  public:
-  AllocRecordStackTraceElement() : method_(nullptr), dex_pc_(0) {}
-
   int32_t ComputeLineNumber() const SHARED_REQUIRES(Locks::mutator_lock_);
 
+  AllocRecordStackTraceElement() = default;
+  AllocRecordStackTraceElement(ArtMethod* method, uint32_t dex_pc)
+      : method_(method),
+        dex_pc_(dex_pc) {}
+
   ArtMethod* GetMethod() const {
     return method_;
   }
@@ -58,32 +62,27 @@
   }
 
   bool operator==(const AllocRecordStackTraceElement& other) const {
-    if (this == &other) return true;
     return method_ == other.method_ && dex_pc_ == other.dex_pc_;
   }
 
  private:
-  ArtMethod* method_;
-  uint32_t dex_pc_;
+  ArtMethod* method_ = nullptr;
+  uint32_t dex_pc_ = 0;
 };
 
 class AllocRecordStackTrace {
  public:
   static constexpr size_t kHashMultiplier = 17;
 
-  explicit AllocRecordStackTrace(size_t max_depth)
-      : tid_(0), depth_(0), stack_(new AllocRecordStackTraceElement[max_depth]) {}
+  AllocRecordStackTrace() = default;
+
+  AllocRecordStackTrace(AllocRecordStackTrace&& r)
+      : tid_(r.tid_),
+        stack_(std::move(r.stack_)) {}
 
   AllocRecordStackTrace(const AllocRecordStackTrace& r)
-      : tid_(r.tid_), depth_(r.depth_), stack_(new AllocRecordStackTraceElement[r.depth_]) {
-    for (size_t i = 0; i < depth_; ++i) {
-      stack_[i] = r.stack_[i];
-    }
-  }
-
-  ~AllocRecordStackTrace() {
-    delete[] stack_;
-  }
+      : tid_(r.tid_),
+        stack_(r.stack_) {}
 
   pid_t GetTid() const {
     return tid_;
@@ -94,37 +93,32 @@
   }
 
   size_t GetDepth() const {
-    return depth_;
-  }
-
-  void SetDepth(size_t depth) {
-    depth_ = depth;
+    return stack_.size();
   }
 
   const AllocRecordStackTraceElement& GetStackElement(size_t index) const {
-    DCHECK_LT(index, depth_);
+    DCHECK_LT(index, GetDepth());
     return stack_[index];
   }
 
+  void AddStackElement(const AllocRecordStackTraceElement& element) {
+    stack_.push_back(element);
+  }
+
   void SetStackElementAt(size_t index, ArtMethod* m, uint32_t dex_pc) {
+    DCHECK_LT(index, stack_.size());
     stack_[index].SetMethod(m);
     stack_[index].SetDexPc(dex_pc);
   }
 
   bool operator==(const AllocRecordStackTrace& other) const {
     if (this == &other) return true;
-    if (tid_ != other.tid_) return false;
-    if (depth_ != other.depth_) return false;
-    for (size_t i = 0; i < depth_; ++i) {
-      if (!(stack_[i] == other.stack_[i])) return false;
-    }
-    return true;
+    return tid_ == other.tid_ && stack_ == other.stack_;
   }
 
  private:
-  pid_t tid_;
-  size_t depth_;
-  AllocRecordStackTraceElement* const stack_;
+  pid_t tid_ = 0;
+  std::vector<AllocRecordStackTraceElement> stack_;
 };
 
 struct HashAllocRecordTypes {
@@ -161,19 +155,15 @@
 class AllocRecord {
  public:
   // All instances of AllocRecord should be managed by an instance of AllocRecordObjectMap.
-  AllocRecord(size_t count, mirror::Class* klass, AllocRecordStackTrace* trace)
-      : byte_count_(count), klass_(klass), trace_(trace) {}
-
-  ~AllocRecord() {
-    delete trace_;
-  }
+  AllocRecord(size_t count, mirror::Class* klass, AllocRecordStackTrace&& trace)
+      : byte_count_(count), klass_(klass), trace_(std::move(trace)) {}
 
   size_t GetDepth() const {
-    return trace_->GetDepth();
+    return trace_.GetDepth();
   }
 
   const AllocRecordStackTrace* GetStackTrace() const {
-    return trace_;
+    return &trace_;
   }
 
   size_t ByteCount() const {
@@ -181,7 +171,7 @@
   }
 
   pid_t GetTid() const {
-    return trace_->GetTid();
+    return trace_.GetTid();
   }
 
   mirror::Class* GetClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -196,16 +186,15 @@
   }
 
   const AllocRecordStackTraceElement& StackElement(size_t index) const {
-    return trace_->GetStackElement(index);
+    return trace_.GetStackElement(index);
   }
 
  private:
   const size_t byte_count_;
   // The klass_ could be a strong or weak root for GC
   GcRoot<mirror::Class> klass_;
-  // TODO: Currently trace_ is like a std::unique_ptr,
-  // but in future with deduplication it could be a std::shared_ptr.
-  const AllocRecordStackTrace* const trace_;
+  // TODO: Share between alloc records with identical stack traces.
+  AllocRecordStackTrace trace_;
 };
 
 class AllocRecordObjectMap {
@@ -215,36 +204,29 @@
   // weak roots). The last recent_record_max_ number of pairs in the list are always kept for DDMS's
   // recent allocation tracking, but GcRoot<mirror::Object> pointers in these pairs can become null.
   // Both types of pointers need read barriers, do not directly access them.
-  typedef std::list<std::pair<GcRoot<mirror::Object>, AllocRecord*>> EntryList;
+  using EntryPair = std::pair<GcRoot<mirror::Object>, AllocRecord>;
+  typedef std::list<EntryPair> EntryList;
 
-  // "static" because it is part of double-checked locking. It needs to check a bool first,
-  // in order to make sure the AllocRecordObjectMap object is not null.
-  static void RecordAllocation(Thread* self, mirror::Object* obj, mirror::Class* klass,
-                               size_t byte_count)
+  // Caller needs to check that it is enabled before calling since we read the stack trace before
+  // checking the enabled boolean.
+  void RecordAllocation(Thread* self,
+                        mirror::Object** obj,
+                        size_t byte_count)
       REQUIRES(!Locks::alloc_tracker_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static void SetAllocTrackingEnabled(bool enabled) REQUIRES(!Locks::alloc_tracker_lock_);
 
-  AllocRecordObjectMap() REQUIRES(Locks::alloc_tracker_lock_)
-      : alloc_record_max_(kDefaultNumAllocRecords),
-        recent_record_max_(kDefaultNumRecentRecords),
-        max_stack_depth_(kDefaultAllocStackDepth),
-        scratch_trace_(kMaxSupportedStackDepth),
-        alloc_ddm_thread_id_(0),
-        allow_new_record_(true),
-        new_record_condition_("New allocation record condition", *Locks::alloc_tracker_lock_) {}
-
+  AllocRecordObjectMap() REQUIRES(Locks::alloc_tracker_lock_);
   ~AllocRecordObjectMap();
 
-  void Put(mirror::Object* obj, AllocRecord* record)
+  void Put(mirror::Object* obj, AllocRecord&& record)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(Locks::alloc_tracker_lock_) {
     if (entries_.size() == alloc_record_max_) {
-      delete entries_.front().second;
       entries_.pop_front();
     }
-    entries_.emplace_back(GcRoot<mirror::Object>(obj), record);
+    entries_.push_back(EntryPair(GcRoot<mirror::Object>(obj), std::move(record)));
   }
 
   size_t Size() const SHARED_REQUIRES(Locks::alloc_tracker_lock_) {
@@ -306,17 +288,18 @@
     return entries_.rend();
   }
 
+  void Clear() REQUIRES(Locks::alloc_tracker_lock_);
+
  private:
   static constexpr size_t kDefaultNumAllocRecords = 512 * 1024;
   static constexpr size_t kDefaultNumRecentRecords = 64 * 1024 - 1;
   static constexpr size_t kDefaultAllocStackDepth = 16;
   static constexpr size_t kMaxSupportedStackDepth = 128;
-  size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_);
-  size_t recent_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_);
-  size_t max_stack_depth_ GUARDED_BY(Locks::alloc_tracker_lock_);
-  AllocRecordStackTrace scratch_trace_ GUARDED_BY(Locks::alloc_tracker_lock_);
-  pid_t alloc_ddm_thread_id_ GUARDED_BY(Locks::alloc_tracker_lock_);
-  bool allow_new_record_ GUARDED_BY(Locks::alloc_tracker_lock_);
+  size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_) = kDefaultNumAllocRecords;
+  size_t recent_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_) = kDefaultNumRecentRecords;
+  size_t max_stack_depth_ = kDefaultAllocStackDepth;
+  pid_t alloc_ddm_thread_id_  GUARDED_BY(Locks::alloc_tracker_lock_) = 0;
+  bool allow_new_record_ GUARDED_BY(Locks::alloc_tracker_lock_) = true;
   ConditionVariable new_record_condition_ GUARDED_BY(Locks::alloc_tracker_lock_);
   // see the comment in typedef of EntryList
   EntryList entries_ GUARDED_BY(Locks::alloc_tracker_lock_);
diff --git a/runtime/gc/allocator/dlmalloc.cc b/runtime/gc/allocator/dlmalloc.cc
index e747f00..dc4e312 100644
--- a/runtime/gc/allocator/dlmalloc.cc
+++ b/runtime/gc/allocator/dlmalloc.cc
@@ -36,7 +36,7 @@
 #pragma GCC diagnostic ignored "-Wredundant-decls"
 #pragma GCC diagnostic ignored "-Wempty-body"
 #pragma GCC diagnostic ignored "-Wstrict-aliasing"
-#include "../../../bionic/libc/upstream-dlmalloc/malloc.c"
+#include "../../../external/dlmalloc/malloc.c"
 #pragma GCC diagnostic pop
 
 static void* art_heap_morecore(void* m, intptr_t increment) {
diff --git a/runtime/gc/allocator/dlmalloc.h b/runtime/gc/allocator/dlmalloc.h
index 0558921..c07da5d 100644
--- a/runtime/gc/allocator/dlmalloc.h
+++ b/runtime/gc/allocator/dlmalloc.h
@@ -32,10 +32,10 @@
 
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wredundant-decls"
-#include "../../bionic/libc/upstream-dlmalloc/malloc.h"
+#include "../../external/dlmalloc/malloc.h"
 #pragma GCC diagnostic pop
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 // Define dlmalloc routines from bionic that cannot be included directly because of redefining
 // symbols from the include above.
 extern "C" void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*), void* arg);
diff --git a/runtime/gc/allocator/rosalloc-inl.h b/runtime/gc/allocator/rosalloc-inl.h
index 2510514..d1c81e3 100644
--- a/runtime/gc/allocator/rosalloc-inl.h
+++ b/runtime/gc/allocator/rosalloc-inl.h
@@ -62,11 +62,6 @@
   }
   size_t bracket_size;
   size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
-  DCHECK_EQ(idx, SizeToIndex(size));
-  DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-  DCHECK_EQ(bracket_size, bracketSizes[idx]);
-  DCHECK_LE(size, bracket_size);
-  DCHECK(size > 512 || bracket_size - size < 16);
   DCHECK_LT(idx, kNumThreadLocalSizeBrackets);
   Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx));
   if (kIsDebugBuild) {
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 9c8e4df..375d869 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -58,10 +58,16 @@
       page_release_mode_(page_release_mode),
       page_release_size_threshold_(page_release_size_threshold),
       is_running_on_memory_tool_(running_on_memory_tool) {
+  DCHECK_ALIGNED(base, kPageSize);
   DCHECK_EQ(RoundUp(capacity, kPageSize), capacity);
   DCHECK_EQ(RoundUp(max_capacity, kPageSize), max_capacity);
   CHECK_LE(capacity, max_capacity);
   CHECK_ALIGNED(page_release_size_threshold_, kPageSize);
+  // Zero the memory explicitly (don't rely on that the mem map is zero-initialized).
+  if (!kMadviseZeroes) {
+    memset(base_, 0, max_capacity);
+  }
+  CHECK_EQ(madvise(base_, max_capacity, MADV_DONTNEED), 0);
   if (!initialized_) {
     Initialize();
   }
@@ -638,11 +644,6 @@
   DCHECK_LE(size, kLargeSizeThreshold);
   size_t bracket_size;
   size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
-  DCHECK_EQ(idx, SizeToIndex(size));
-  DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-  DCHECK_EQ(bracket_size, bracketSizes[idx]);
-  DCHECK_LE(size, bracket_size);
-  DCHECK(size > 512 || bracket_size - size < 16);
   Locks::mutator_lock_->AssertExclusiveHeld(self);
   void* slot_addr = AllocFromCurrentRunUnlocked(self, idx);
   if (LIKELY(slot_addr != nullptr)) {
@@ -662,14 +663,7 @@
   DCHECK_LE(size, kLargeSizeThreshold);
   size_t bracket_size;
   size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
-  DCHECK_EQ(idx, SizeToIndex(size));
-  DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-  DCHECK_EQ(bracket_size, bracketSizes[idx]);
-  DCHECK_LE(size, bracket_size);
-  DCHECK(size > 512 || bracket_size - size < 16);
-
   void* slot_addr;
-
   if (LIKELY(idx < kNumThreadLocalSizeBrackets)) {
     // Use a thread-local run.
     Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx));
@@ -881,17 +875,6 @@
   return stream.str();
 }
 
-inline size_t RosAlloc::Run::SlotIndex(Slot* slot) {
-  const uint8_t idx = size_bracket_idx_;
-  const size_t bracket_size = bracketSizes[idx];
-  const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot)
-      - reinterpret_cast<uint8_t*>(FirstSlot());
-  DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
-  size_t slot_idx = offset_from_slot_base / bracket_size;
-  DCHECK_LT(slot_idx, numOfSlots[idx]);
-  return slot_idx;
-}
-
 void RosAlloc::Run::FreeSlot(void* ptr) {
   DCHECK(!IsThreadLocal());
   const uint8_t idx = size_bracket_idx_;
@@ -1038,7 +1021,7 @@
 
   // First mark slots to free in the bulk free bit map without locking the
   // size bracket locks. On host, unordered_set is faster than vector + flag.
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   std::vector<Run*> runs;
 #else
   std::unordered_set<Run*, hash_run, eq_run> runs;
@@ -1105,7 +1088,7 @@
     DCHECK_EQ(run->magic_num_, kMagicNum);
     // Set the bit in the bulk free bit map.
     freed_bytes += run->AddToBulkFreeList(ptr);
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     if (!run->to_be_bulk_freed_) {
       run->to_be_bulk_freed_ = true;
       runs.push_back(run);
@@ -1120,7 +1103,7 @@
   // union the bulk free bit map into the thread-local free bit map
   // (for thread-local runs.)
   for (Run* run : runs) {
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     DCHECK(run->to_be_bulk_freed_);
     run->to_be_bulk_freed_ = false;
 #endif
@@ -1526,10 +1509,9 @@
   }
 }
 
+// Below may be called by mutator itself just before thread termination.
 size_t RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
   Thread* self = Thread::Current();
-  // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
-  ReaderMutexLock wmu(self, bulk_free_lock_);
   size_t free_bytes = 0U;
   for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
     MutexLock mu(self, *size_bracket_locks_[idx]);
@@ -1544,10 +1526,17 @@
       // Count the number of free slots left.
       size_t num_free_slots = thread_local_run->NumberOfFreeSlots();
       free_bytes += num_free_slots * bracketSizes[idx];
+      // The above bracket index lock guards thread local free list to avoid race condition
+      // with unioning bulk free list to thread local free list by GC thread in BulkFree.
+      // If thread local run is true, GC thread will help update thread local free list
+      // in BulkFree. And the latest thread local free list will be merged to free list
+      // either when this thread local run is full or when revoking this run here. In this
+      // case the free list wll be updated. If thread local run is false, GC thread will help
+      // merge bulk free list in next BulkFree.
+      // Thus no need to merge bulk free list to free list again here.
       bool dont_care;
       thread_local_run->MergeThreadLocalFreeListToFreeList(&dont_care);
       thread_local_run->SetIsThreadLocal(false);
-      thread_local_run->MergeBulkFreeListToFreeList();
       DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
       DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
       RevokeRun(self, idx, thread_local_run);
@@ -1641,9 +1630,14 @@
 
 void RosAlloc::Initialize() {
   // bracketSizes.
+  static_assert(kNumRegularSizeBrackets == kNumOfSizeBrackets - 2,
+                "There should be two non-regular brackets");
   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
-    if (i < kNumOfSizeBrackets - 2) {
-      bracketSizes[i] = 16 * (i + 1);
+    if (i < kNumThreadLocalSizeBrackets) {
+      bracketSizes[i] = kThreadLocalBracketQuantumSize * (i + 1);
+    } else if (i < kNumRegularSizeBrackets) {
+      bracketSizes[i] = kBracketQuantumSize * (i - kNumThreadLocalSizeBrackets + 1) +
+          (kThreadLocalBracketQuantumSize *  kNumThreadLocalSizeBrackets);
     } else if (i == kNumOfSizeBrackets - 2) {
       bracketSizes[i] = 1 * KB;
     } else {
@@ -1656,20 +1650,17 @@
   }
   // numOfPages.
   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
-    if (i < 4) {
+    if (i < kNumThreadLocalSizeBrackets) {
       numOfPages[i] = 1;
-    } else if (i < 8) {
+    } else if (i < (kNumThreadLocalSizeBrackets + kNumRegularSizeBrackets) / 2) {
       numOfPages[i] = 1;
-    } else if (i < 16) {
-      numOfPages[i] = 4;
-    } else if (i < 32) {
-      numOfPages[i] = 8;
-    } else if (i == 32) {
-      DCHECK_EQ(i, kNumOfSizeBrackets - 2);
-      numOfPages[i] = 16;
+    } else if (i < kNumRegularSizeBrackets) {
+      numOfPages[i] = 1;
+    } else if (i == kNumOfSizeBrackets - 2) {
+      numOfPages[i] = 2;
     } else {
       DCHECK_EQ(i, kNumOfSizeBrackets - 1);
-      numOfPages[i] = 32;
+      numOfPages[i] = 4;
     }
     if (kTraceRosAlloc) {
       LOG(INFO) << "numOfPages[" << i << "]=" << numOfPages[i];
@@ -1695,8 +1686,8 @@
       size_t tmp_header_size = (tmp_unaligned_header_size % bracket_size == 0) ?
           tmp_unaligned_header_size :
           tmp_unaligned_header_size + (bracket_size - tmp_unaligned_header_size % bracket_size);
-      DCHECK_EQ(tmp_header_size % bracket_size, static_cast<size_t>(0));
-      DCHECK_EQ(tmp_header_size % 8, static_cast<size_t>(0));
+      DCHECK_EQ(tmp_header_size % bracket_size, 0U);
+      DCHECK_EQ(tmp_header_size % sizeof(uint64_t), 0U);
       if (tmp_slots_size + tmp_header_size <= run_size) {
         // Found the right number of slots, that is, there was enough
         // space for the header (including the bit maps.)
@@ -1705,8 +1696,8 @@
         break;
       }
     }
-    DCHECK_GT(num_of_slots, 0U);
-    DCHECK_GT(header_size, 0U);
+    DCHECK_GT(num_of_slots, 0U) << i;
+    DCHECK_GT(header_size, 0U) << i;
     // Add the padding for the alignment remainder.
     header_size += run_size % bracket_size;
     DCHECK_EQ(header_size + num_of_slots * bracket_size, run_size);
@@ -1717,7 +1708,7 @@
                 << ", headerSizes[" << i << "]=" << headerSizes[i];
     }
   }
-  // Fill the alloc bitmap so nobody can successfully allocate from it.
+  // Set up the dedicated full run so that nobody can successfully allocate from it.
   if (kIsDebugBuild) {
     dedicated_full_run_->magic_num_ = kMagicNum;
   }
@@ -1729,6 +1720,9 @@
 
   // The smallest bracket size must be at least as large as the sizeof(Slot).
   DCHECK_LE(sizeof(Slot), bracketSizes[0]) << "sizeof(Slot) <= the smallest bracket size";
+  // Check the invariants between the max bracket sizes and the number of brackets.
+  DCHECK_EQ(kMaxThreadLocalBracketSize, bracketSizes[kNumThreadLocalSizeBrackets - 1]);
+  DCHECK_EQ(kMaxRegularBracketSize, bracketSizes[kNumRegularSizeBrackets - 1]);
 }
 
 void RosAlloc::BytesAllocatedCallback(void* start ATTRIBUTE_UNUSED, void* end ATTRIBUTE_UNUSED,
@@ -2108,6 +2102,94 @@
   }
 }
 
+void RosAlloc::DumpStats(std::ostream& os) {
+  Thread* self = Thread::Current();
+  CHECK(Locks::mutator_lock_->IsExclusiveHeld(self))
+      << "The mutator locks isn't exclusively locked at " << __PRETTY_FUNCTION__;
+  size_t num_large_objects = 0;
+  size_t num_pages_large_objects = 0;
+  // These arrays are zero initialized.
+  std::unique_ptr<size_t[]> num_runs(new size_t[kNumOfSizeBrackets]());
+  std::unique_ptr<size_t[]> num_pages_runs(new size_t[kNumOfSizeBrackets]());
+  std::unique_ptr<size_t[]> num_slots(new size_t[kNumOfSizeBrackets]());
+  std::unique_ptr<size_t[]> num_used_slots(new size_t[kNumOfSizeBrackets]());
+  std::unique_ptr<size_t[]> num_metadata_bytes(new size_t[kNumOfSizeBrackets]());
+  ReaderMutexLock rmu(self, bulk_free_lock_);
+  MutexLock lock_mu(self, lock_);
+  for (size_t i = 0; i < page_map_size_; ) {
+    uint8_t pm = page_map_[i];
+    switch (pm) {
+      case kPageMapReleased:
+      case kPageMapEmpty:
+        ++i;
+        break;
+      case kPageMapLargeObject: {
+        size_t num_pages = 1;
+        size_t idx = i + 1;
+        while (idx < page_map_size_ && page_map_[idx] == kPageMapLargeObjectPart) {
+          num_pages++;
+          idx++;
+        }
+        num_large_objects++;
+        num_pages_large_objects += num_pages;
+        i += num_pages;
+        break;
+      }
+      case kPageMapLargeObjectPart:
+        LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(pm) << std::endl
+                   << DumpPageMap();
+        break;
+      case kPageMapRun: {
+        Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
+        size_t idx = run->size_bracket_idx_;
+        size_t num_pages = numOfPages[idx];
+        num_runs[idx]++;
+        num_pages_runs[idx] += num_pages;
+        num_slots[idx] += numOfSlots[idx];
+        size_t num_free_slots = run->NumberOfFreeSlots();
+        num_used_slots[idx] += numOfSlots[idx] - num_free_slots;
+        num_metadata_bytes[idx] += headerSizes[idx];
+        i += num_pages;
+        break;
+      }
+      case kPageMapRunPart:
+        // Fall-through.
+      default:
+        LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(pm) << std::endl
+                   << DumpPageMap();
+        break;
+    }
+  }
+  os << "RosAlloc stats:\n";
+  for (size_t i = 0; i < kNumOfSizeBrackets; ++i) {
+    os << "Bracket " << i << " (" << bracketSizes[i] << "):"
+       << " #runs=" << num_runs[i]
+       << " #pages=" << num_pages_runs[i]
+       << " (" << PrettySize(num_pages_runs[i] * kPageSize) << ")"
+       << " #metadata_bytes=" << PrettySize(num_metadata_bytes[i])
+       << " #slots=" << num_slots[i] << " (" << PrettySize(num_slots[i] * bracketSizes[i]) << ")"
+       << " #used_slots=" << num_used_slots[i]
+       << " (" << PrettySize(num_used_slots[i] * bracketSizes[i]) << ")\n";
+  }
+  os << "Large #allocations=" << num_large_objects
+     << " #pages=" << num_pages_large_objects
+     << " (" << PrettySize(num_pages_large_objects * kPageSize) << ")\n";
+  size_t total_num_pages = 0;
+  size_t total_metadata_bytes = 0;
+  size_t total_allocated_bytes = 0;
+  for (size_t i = 0; i < kNumOfSizeBrackets; ++i) {
+    total_num_pages += num_pages_runs[i];
+    total_metadata_bytes += num_metadata_bytes[i];
+    total_allocated_bytes += num_used_slots[i] * bracketSizes[i];
+  }
+  total_num_pages += num_pages_large_objects;
+  total_allocated_bytes += num_pages_large_objects * kPageSize;
+  os << "Total #total_bytes=" << PrettySize(total_num_pages * kPageSize)
+     << " #metadata_bytes=" << PrettySize(total_metadata_bytes)
+     << " #used_bytes=" << PrettySize(total_allocated_bytes) << "\n";
+  os << "\n";
+}
+
 }  // namespace allocator
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 3ce3d63..1fa2d1a 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -192,6 +192,7 @@
         Verify();
       }
       DCHECK(slot != nullptr);
+      DCHECK(slot->Next() == nullptr);
       Slot** headp = reinterpret_cast<Slot**>(&head_);
       Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
       Slot* old_head = *headp;
@@ -366,7 +367,7 @@
     static size_t fixed_header_size() {
       return sizeof(Run);
     }
-    Slot* FirstSlot() {
+    Slot* FirstSlot() const {
       const uint8_t idx = size_bracket_idx_;
       return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) + headerSizes[idx]);
     }
@@ -473,7 +474,16 @@
       DCHECK_LT(slot_idx, numOfSlots[idx]);
       return reinterpret_cast<Slot*>(ptr);
     }
-    size_t SlotIndex(Slot* slot);
+    size_t SlotIndex(Slot* slot) const {
+      const uint8_t idx = size_bracket_idx_;
+      const size_t bracket_size = bracketSizes[idx];
+      const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot)
+          - reinterpret_cast<uint8_t*>(FirstSlot());
+      DCHECK_EQ(offset_from_slot_base % bracket_size, 0U);
+      size_t slot_idx = offset_from_slot_base / bracket_size;
+      DCHECK_LT(slot_idx, numOfSlots[idx]);
+      return slot_idx;
+    }
 
     // TODO: DISALLOW_COPY_AND_ASSIGN(Run);
   };
@@ -482,10 +492,8 @@
   static constexpr uint8_t kMagicNum = 42;
   // The magic number for free pages.
   static constexpr uint8_t kMagicNumFree = 43;
-  // The number of size brackets. Sync this with the length of Thread::rosalloc_runs_.
-  static constexpr size_t kNumOfSizeBrackets = kNumRosAllocThreadLocalSizeBrackets;
-  // The number of smaller size brackets that are the quantum size apart.
-  static constexpr size_t kNumOfQuantumSizeBrackets = 32;
+  // The number of size brackets.
+  static constexpr size_t kNumOfSizeBrackets = 42;
   // The sizes (the slot sizes, in bytes) of the size brackets.
   static size_t bracketSizes[kNumOfSizeBrackets];
   // The numbers of pages that are used for runs for each size bracket.
@@ -506,16 +514,23 @@
   }
   // Returns the index of the size bracket from the bracket size.
   static size_t BracketSizeToIndex(size_t size) {
-    DCHECK(16 <= size && ((size < 1 * KB && size % 16 == 0) || size == 1 * KB || size == 2 * KB));
+    DCHECK(8 <= size &&
+           ((size <= kMaxThreadLocalBracketSize && size % kThreadLocalBracketQuantumSize == 0) ||
+            (size <= kMaxRegularBracketSize && size % kBracketQuantumSize == 0) ||
+            size == 1 * KB || size == 2 * KB));
     size_t idx;
     if (UNLIKELY(size == 1 * KB)) {
       idx = kNumOfSizeBrackets - 2;
     } else if (UNLIKELY(size == 2 * KB)) {
       idx = kNumOfSizeBrackets - 1;
+    } else if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
+      DCHECK_EQ(size % kThreadLocalBracketQuantumSize, 0U);
+      idx = size / kThreadLocalBracketQuantumSize - 1;
     } else {
-      DCHECK(size < 1 * KB);
-      DCHECK_EQ(size % 16, static_cast<size_t>(0));
-      idx = size / 16 - 1;
+      DCHECK(size <= kMaxRegularBracketSize);
+      DCHECK_EQ((size - kMaxThreadLocalBracketSize) % kBracketQuantumSize, 0U);
+      idx = ((size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
+          + kNumThreadLocalSizeBrackets;
     }
     DCHECK(bracketSizes[idx] == size);
     return idx;
@@ -530,51 +545,64 @@
   // Rounds up the size up the nearest bracket size.
   static size_t RoundToBracketSize(size_t size) {
     DCHECK(size <= kLargeSizeThreshold);
-    if (LIKELY(size <= 512)) {
-      return RoundUp(size, 16);
-    } else if (512 < size && size <= 1 * KB) {
+    if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
+      return RoundUp(size, kThreadLocalBracketQuantumSize);
+    } else if (size <= kMaxRegularBracketSize) {
+      return RoundUp(size, kBracketQuantumSize);
+    } else if (UNLIKELY(size <= 1 * KB)) {
       return 1 * KB;
     } else {
-      DCHECK(1 * KB < size && size <= 2 * KB);
+      DCHECK_LE(size, 2 * KB);
       return 2 * KB;
     }
   }
   // Returns the size bracket index from the byte size with rounding.
   static size_t SizeToIndex(size_t size) {
     DCHECK(size <= kLargeSizeThreshold);
-    if (LIKELY(size <= 512)) {
-      return RoundUp(size, 16) / 16 - 1;
-    } else if (512 < size && size <= 1 * KB) {
+    if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
+      return RoundUp(size, kThreadLocalBracketQuantumSize) / kThreadLocalBracketQuantumSize - 1;
+    } else if (size <= kMaxRegularBracketSize) {
+      return (RoundUp(size, kBracketQuantumSize) - kMaxThreadLocalBracketSize) / kBracketQuantumSize
+          - 1 + kNumThreadLocalSizeBrackets;
+    } else if (size <= 1 * KB) {
       return kNumOfSizeBrackets - 2;
     } else {
-      DCHECK(1 * KB < size && size <= 2 * KB);
+      DCHECK_LE(size, 2 * KB);
       return kNumOfSizeBrackets - 1;
     }
   }
   // A combination of SizeToIndex() and RoundToBracketSize().
   static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) {
     DCHECK(size <= kLargeSizeThreshold);
-    if (LIKELY(size <= 512)) {
-      size_t bracket_size = RoundUp(size, 16);
-      *bracket_size_out = bracket_size;
-      size_t idx = bracket_size / 16 - 1;
-      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-      return idx;
-    } else if (512 < size && size <= 1 * KB) {
-      size_t bracket_size = 1024;
-      *bracket_size_out = bracket_size;
-      size_t idx = kNumOfSizeBrackets - 2;
-      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-      return idx;
+    size_t idx;
+    size_t bracket_size;
+    if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
+      bracket_size = RoundUp(size, kThreadLocalBracketQuantumSize);
+      idx = bracket_size / kThreadLocalBracketQuantumSize - 1;
+    } else if (size <= kMaxRegularBracketSize) {
+      bracket_size = RoundUp(size, kBracketQuantumSize);
+      idx = ((bracket_size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
+          + kNumThreadLocalSizeBrackets;
+    } else if (size <= 1 * KB) {
+      bracket_size = 1 * KB;
+      idx = kNumOfSizeBrackets - 2;
     } else {
-      DCHECK(1 * KB < size && size <= 2 * KB);
-      size_t bracket_size = 2048;
-      *bracket_size_out = bracket_size;
-      size_t idx = kNumOfSizeBrackets - 1;
-      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-      return idx;
+      DCHECK(size <= 2 * KB);
+      bracket_size = 2 * KB;
+      idx = kNumOfSizeBrackets - 1;
     }
+    DCHECK_EQ(idx, SizeToIndex(size)) << idx;
+    DCHECK_EQ(bracket_size, IndexToBracketSize(idx)) << idx;
+    DCHECK_EQ(bracket_size, bracketSizes[idx]) << idx;
+    DCHECK_LE(size, bracket_size) << idx;
+    DCHECK(size > kMaxRegularBracketSize ||
+           (size <= kMaxThreadLocalBracketSize &&
+            bracket_size - size < kThreadLocalBracketQuantumSize) ||
+           (size <= kMaxRegularBracketSize && bracket_size - size < kBracketQuantumSize)) << idx;
+    *bracket_size_out = bracket_size;
+    return idx;
   }
+
   // Returns the page map index from an address. Requires that the
   // address is page size aligned.
   size_t ToPageMapIndex(const void* addr) const {
@@ -630,18 +658,37 @@
   // The default value for page_release_size_threshold_.
   static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB;
 
-  // We use thread-local runs for the size Brackets whose indexes
+  // We use thread-local runs for the size brackets whose indexes
   // are less than this index. We use shared (current) runs for the rest.
-  static const size_t kNumThreadLocalSizeBrackets = 8;
+  // Sync this with the length of Thread::rosalloc_runs_.
+  static const size_t kNumThreadLocalSizeBrackets = 16;
+  static_assert(kNumThreadLocalSizeBrackets == kNumRosAllocThreadLocalSizeBracketsInThread,
+                "Mismatch between kNumThreadLocalSizeBrackets and "
+                "kNumRosAllocThreadLocalSizeBracketsInThread");
 
   // The size of the largest bracket we use thread-local runs for.
   // This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1].
   static const size_t kMaxThreadLocalBracketSize = 128;
 
-  // The bracket size increment for the brackets of size <= 512 bytes.
+  // We use regular (8 or 16-bytes increment) runs for the size brackets whose indexes are less than
+  // this index.
+  static const size_t kNumRegularSizeBrackets = 40;
+
+  // The size of the largest regular (8 or 16-byte increment) bracket. Non-regular brackets are the
+  // 1 KB and the 2 KB brackets. This should be equal to bracketSizes[kNumRegularSizeBrackets - 1].
+  static const size_t kMaxRegularBracketSize = 512;
+
+  // The bracket size increment for the thread-local brackets (<= kMaxThreadLocalBracketSize bytes).
+  static constexpr size_t kThreadLocalBracketQuantumSize = 8;
+
+  // Equal to Log2(kThreadLocalBracketQuantumSize).
+  static constexpr size_t kThreadLocalBracketQuantumSizeShift = 3;
+
+  // The bracket size increment for the non-thread-local, regular brackets (of size <=
+  // kMaxRegularBracketSize bytes and > kMaxThreadLocalBracketSize bytes).
   static constexpr size_t kBracketQuantumSize = 16;
 
-  // Equal to Log2(kQuantumBracketSizeIncrement).
+  // Equal to Log2(kBracketQuantumSize).
   static constexpr size_t kBracketQuantumSizeShift = 4;
 
  private:
@@ -881,6 +928,9 @@
   void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes)
       REQUIRES(!bulk_free_lock_, !lock_);
 
+  void DumpStats(std::ostream& os)
+      REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_) REQUIRES(!bulk_free_lock_);
+
  private:
   friend std::ostream& operator<<(std::ostream& os, const RosAlloc::PageMapKind& rhs);
 
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
new file mode 100644
index 0000000..fb774a4
--- /dev/null
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_CONCURRENT_COPYING_INL_H_
+#define ART_RUNTIME_GC_COLLECTOR_CONCURRENT_COPYING_INL_H_
+
+#include "concurrent_copying.h"
+
+#include "gc/accounting/space_bitmap-inl.h"
+#include "gc/heap.h"
+#include "gc/space/region_space.h"
+#include "lock_word.h"
+
+namespace art {
+namespace gc {
+namespace collector {
+
+inline mirror::Object* ConcurrentCopying::MarkUnevacFromSpaceRegion(
+    mirror::Object* ref, accounting::ContinuousSpaceBitmap* bitmap) {
+  // For the Baker-style RB, in a rare case, we could incorrectly change the object from white
+  // to gray even though the object has already been marked through. This happens if a mutator
+  // thread gets preempted before the AtomicSetReadBarrierPointer below, GC marks through the
+  // object (changes it from white to gray and back to white), and the thread runs and
+  // incorrectly changes it from white to gray. We need to detect such "false gray" cases and
+  // change the objects back to white at the end of marking.
+  if (kUseBakerReadBarrier) {
+    // Test the bitmap first to reduce the chance of false gray cases.
+    if (bitmap->Test(ref)) {
+      return ref;
+    }
+  }
+  // This may or may not succeed, which is ok because the object may already be gray.
+  bool cas_success = false;
+  if (kUseBakerReadBarrier) {
+    cas_success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
+                                                   ReadBarrier::GrayPtr());
+  }
+  if (bitmap->AtomicTestAndSet(ref)) {
+    // Already marked.
+    if (kUseBakerReadBarrier &&
+        cas_success &&
+        // The object could be white here if a thread gets preempted after a success at the
+        // above AtomicSetReadBarrierPointer, GC has marked through it, and the thread runs up
+        // to this point.
+        ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+      // Register a "false-gray" object to change it from gray to white at the end of marking.
+      PushOntoFalseGrayStack(ref);
+    }
+  } else {
+    // Newly marked.
+    if (kUseBakerReadBarrier) {
+      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
+    }
+    PushOntoMarkStack(ref);
+  }
+  return ref;
+}
+
+template<bool kGrayImmuneObject>
+inline mirror::Object* ConcurrentCopying::MarkImmuneSpace(mirror::Object* ref) {
+  if (kUseBakerReadBarrier) {
+    // The GC-running thread doesn't (need to) gray immune objects except when updating thread roots
+    // in the thread flip on behalf of suspended threads (when gc_grays_immune_objects_ is
+    // true). Also, a mutator doesn't (need to) gray an immune object after GC has updated all
+    // immune space objects (when updated_all_immune_objects_ is true).
+    if (kIsDebugBuild) {
+      if (Thread::Current() == thread_running_gc_) {
+        DCHECK(!kGrayImmuneObject ||
+               updated_all_immune_objects_.LoadRelaxed() ||
+               gc_grays_immune_objects_);
+      } else {
+        DCHECK(kGrayImmuneObject);
+      }
+    }
+    if (!kGrayImmuneObject || updated_all_immune_objects_.LoadRelaxed()) {
+      return ref;
+    }
+    // This may or may not succeed, which is ok because the object may already be gray.
+    bool success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
+                                                    ReadBarrier::GrayPtr());
+    if (success) {
+      MutexLock mu(Thread::Current(), immune_gray_stack_lock_);
+      immune_gray_stack_.push_back(ref);
+    }
+  }
+  return ref;
+}
+
+template<bool kGrayImmuneObject>
+inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) {
+  if (from_ref == nullptr) {
+    return nullptr;
+  }
+  DCHECK(heap_->collector_type_ == kCollectorTypeCC);
+  if (UNLIKELY(kUseBakerReadBarrier && !is_active_)) {
+    // In the lock word forward address state, the read barrier bits
+    // in the lock word are part of the stored forwarding address and
+    // invalid. This is usually OK as the from-space copy of objects
+    // aren't accessed by mutators due to the to-space
+    // invariant. However, during the dex2oat image writing relocation
+    // and the zygote compaction, objects can be in the forward
+    // address state (to store the forward/relocation addresses) and
+    // they can still be accessed and the invalid read barrier bits
+    // are consulted. If they look like gray but aren't really, the
+    // read barriers slow path can trigger when it shouldn't. To guard
+    // against this, return here if the CC collector isn't running.
+    return from_ref;
+  }
+  DCHECK(region_space_ != nullptr) << "Read barrier slow path taken when CC isn't running?";
+  space::RegionSpace::RegionType rtype = region_space_->GetRegionType(from_ref);
+  switch (rtype) {
+    case space::RegionSpace::RegionType::kRegionTypeToSpace:
+      // It's already marked.
+      return from_ref;
+    case space::RegionSpace::RegionType::kRegionTypeFromSpace: {
+      mirror::Object* to_ref = GetFwdPtr(from_ref);
+      if (kUseBakerReadBarrier) {
+        DCHECK_NE(to_ref, ReadBarrier::GrayPtr())
+            << "from_ref=" << from_ref << " to_ref=" << to_ref;
+      }
+      if (to_ref == nullptr) {
+        // It isn't marked yet. Mark it by copying it to the to-space.
+        to_ref = Copy(from_ref);
+      }
+      DCHECK(region_space_->IsInToSpace(to_ref) || heap_->non_moving_space_->HasAddress(to_ref))
+          << "from_ref=" << from_ref << " to_ref=" << to_ref;
+      return to_ref;
+    }
+    case space::RegionSpace::RegionType::kRegionTypeUnevacFromSpace: {
+      return MarkUnevacFromSpaceRegion(from_ref, region_space_bitmap_);
+    }
+    case space::RegionSpace::RegionType::kRegionTypeNone:
+      if (immune_spaces_.ContainsObject(from_ref)) {
+        return MarkImmuneSpace<kGrayImmuneObject>(from_ref);
+      } else {
+        return MarkNonMoving(from_ref);
+      }
+    default:
+      UNREACHABLE();
+  }
+}
+
+inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) {
+  mirror::Object* ret;
+  // TODO: Delete GetMarkBit check when all of the callers properly check the bit. Remaining caller
+  // is array allocations.
+  if (from_ref == nullptr || from_ref->GetMarkBit()) {
+    return from_ref;
+  }
+  // TODO: Consider removing this check when we are done investigating slow paths. b/30162165
+  if (UNLIKELY(mark_from_read_barrier_measurements_)) {
+    ret = MarkFromReadBarrierWithMeasurements(from_ref);
+  } else {
+    ret = Mark(from_ref);
+  }
+  // Only set the mark bit for baker barrier.
+  if (kUseBakerReadBarrier && LIKELY(!rb_mark_bit_stack_full_ && ret->AtomicSetMarkBit(0, 1))) {
+    // If the mark stack is full, we may temporarily go to mark and back to unmarked. Seeing both
+    // values are OK since the only race is doing an unnecessary Mark.
+    if (!rb_mark_bit_stack_->AtomicPushBack(ret)) {
+      // Mark stack is full, set the bit back to zero.
+      CHECK(ret->AtomicSetMarkBit(1, 0));
+      // Set rb_mark_bit_stack_full_, this is racy but OK since AtomicPushBack is thread safe.
+      rb_mark_bit_stack_full_ = true;
+    }
+  }
+  return ret;
+}
+
+inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) {
+  DCHECK(region_space_->IsInFromSpace(from_ref));
+  LockWord lw = from_ref->GetLockWord(false);
+  if (lw.GetState() == LockWord::kForwardingAddress) {
+    mirror::Object* fwd_ptr = reinterpret_cast<mirror::Object*>(lw.ForwardingAddress());
+    DCHECK(fwd_ptr != nullptr);
+    return fwd_ptr;
+  } else {
+    return nullptr;
+  }
+}
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_CONCURRENT_COPYING_INL_H_
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index d2d12af..42816a0 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -17,13 +17,18 @@
 #include "concurrent_copying.h"
 
 #include "art_field-inl.h"
+#include "base/enums.h"
+#include "base/histogram-inl.h"
 #include "base/stl_util.h"
+#include "base/systrace.h"
 #include "debugger.h"
 #include "gc/accounting/heap_bitmap-inl.h"
+#include "gc/accounting/mod_union_table-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/reference_processor.h"
 #include "gc/space/image_space.h"
-#include "gc/space/space.h"
+#include "gc/space/space-inl.h"
+#include "image-inl.h"
 #include "intern_table.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -36,24 +41,53 @@
 namespace gc {
 namespace collector {
 
-ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix)
+static constexpr size_t kDefaultGcMarkStackSize = 2 * MB;
+// If kFilterModUnionCards then we attempt to filter cards that don't need to be dirty in the mod
+// union table. Disabled since it does not seem to help the pause much.
+static constexpr bool kFilterModUnionCards = kIsDebugBuild;
+// If kDisallowReadBarrierDuringScan is true then the GC aborts if there are any that occur during
+// ConcurrentCopying::Scan. May be used to diagnose possibly unnecessary read barriers.
+// Only enabled for kIsDebugBuild to avoid performance hit.
+static constexpr bool kDisallowReadBarrierDuringScan = kIsDebugBuild;
+// Slow path mark stack size, increase this if the stack is getting full and it is causing
+// performance problems.
+static constexpr size_t kReadBarrierMarkStackSize = 512 * KB;
+
+ConcurrentCopying::ConcurrentCopying(Heap* heap,
+                                     const std::string& name_prefix,
+                                     bool measure_read_barrier_slow_path)
     : GarbageCollector(heap,
                        name_prefix + (name_prefix.empty() ? "" : " ") +
                        "concurrent copying + mark sweep"),
       region_space_(nullptr), gc_barrier_(new Barrier(0)),
       gc_mark_stack_(accounting::ObjectStack::Create("concurrent copying gc mark stack",
-                                                     2 * MB, 2 * MB)),
+                                                     kDefaultGcMarkStackSize,
+                                                     kDefaultGcMarkStackSize)),
+      rb_mark_bit_stack_(accounting::ObjectStack::Create("rb copying gc mark stack",
+                                                         kReadBarrierMarkStackSize,
+                                                         kReadBarrierMarkStackSize)),
+      rb_mark_bit_stack_full_(false),
       mark_stack_lock_("concurrent copying mark stack lock", kMarkSweepMarkStackLock),
       thread_running_gc_(nullptr),
       is_marking_(false), is_active_(false), is_asserting_to_space_invariant_(false),
+      region_space_bitmap_(nullptr),
       heap_mark_bitmap_(nullptr), live_stack_freeze_size_(0), mark_stack_mode_(kMarkStackModeOff),
       weak_ref_access_enabled_(true),
       skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock),
+      measure_read_barrier_slow_path_(measure_read_barrier_slow_path),
+      rb_slow_path_ns_(0),
+      rb_slow_path_count_(0),
+      rb_slow_path_count_gc_(0),
+      rb_slow_path_histogram_lock_("Read barrier histogram lock"),
+      rb_slow_path_time_histogram_("Mutator time in read barrier slow path", 500, 32),
+      rb_slow_path_count_total_(0),
+      rb_slow_path_count_gc_total_(0),
       rb_table_(heap_->GetReadBarrierTable()),
-      force_evacuate_all_(false) {
+      force_evacuate_all_(false),
+      immune_gray_stack_lock_("concurrent copying immune gray stack lock",
+                              kMarkSweepMarkStackLock) {
   static_assert(space::RegionSpace::kRegionSize == accounting::ReadBarrierTable::kRegionSize,
                 "The region space size and the read barrier table region size must match");
-  cc_heap_bitmap_.reset(new accounting::HeapBitmap(heap));
   Thread* self = Thread::Current();
   {
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
@@ -131,23 +165,14 @@
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : heap_->GetContinuousSpaces()) {
-    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect
-        || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
+    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
+        space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
       CHECK(space->IsZygoteSpace() || space->IsImageSpace());
-      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
-      const char* bitmap_name = space->IsImageSpace() ? "cc image space bitmap" :
-          "cc zygote space bitmap";
-      // TODO: try avoiding using bitmaps for image/zygote to save space.
-      accounting::ContinuousSpaceBitmap* bitmap =
-          accounting::ContinuousSpaceBitmap::Create(bitmap_name, space->Begin(), space->Capacity());
-      cc_heap_bitmap_->AddContinuousSpaceBitmap(bitmap);
-      cc_bitmaps_.push_back(bitmap);
+      immune_spaces_.AddSpace(space);
     } else if (space == region_space_) {
       accounting::ContinuousSpaceBitmap* bitmap =
           accounting::ContinuousSpaceBitmap::Create("cc region space bitmap",
                                                     space->Begin(), space->Capacity());
-      cc_heap_bitmap_->AddContinuousSpaceBitmap(bitmap);
-      cc_bitmaps_.push_back(bitmap);
       region_space_bitmap_ = bitmap;
     }
   }
@@ -161,7 +186,20 @@
               << reinterpret_cast<void*>(region_space_->Limit());
   }
   CheckEmptyMarkStack();
-  immune_region_.Reset();
+  if (kIsDebugBuild) {
+    MutexLock mu(Thread::Current(), mark_stack_lock_);
+    CHECK(false_gray_stack_.empty());
+  }
+
+  rb_mark_bit_stack_full_ = false;
+  mark_from_read_barrier_measurements_ = measure_read_barrier_slow_path_;
+  if (measure_read_barrier_slow_path_) {
+    rb_slow_path_ns_.StoreRelaxed(0);
+    rb_slow_path_count_.StoreRelaxed(0);
+    rb_slow_path_count_gc_.StoreRelaxed(0);
+  }
+
+  immune_spaces_.Reset();
   bytes_moved_.StoreRelaxed(0);
   objects_moved_.StoreRelaxed(0);
   if (GetCurrentIteration()->GetGcCause() == kGcCauseExplicit ||
@@ -171,16 +209,31 @@
   } else {
     force_evacuate_all_ = false;
   }
+  if (kUseBakerReadBarrier) {
+    updated_all_immune_objects_.StoreRelaxed(false);
+    // GC may gray immune objects in the thread flip.
+    gc_grays_immune_objects_ = true;
+    if (kIsDebugBuild) {
+      MutexLock mu(Thread::Current(), immune_gray_stack_lock_);
+      DCHECK(immune_gray_stack_.empty());
+    }
+  }
   BindBitmaps();
   if (kVerboseMode) {
     LOG(INFO) << "force_evacuate_all=" << force_evacuate_all_;
-    LOG(INFO) << "Immune region: " << immune_region_.Begin() << "-" << immune_region_.End();
+    LOG(INFO) << "Largest immune region: " << immune_spaces_.GetLargestImmuneRegion().Begin()
+              << "-" << immune_spaces_.GetLargestImmuneRegion().End();
+    for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) {
+      LOG(INFO) << "Immune space: " << *space;
+    }
     LOG(INFO) << "GC end of InitializePhase";
   }
+  // Mark all of the zygote large objects without graying them.
+  MarkZygoteLargeObjects();
 }
 
 // Used to switch the thread roots of a thread from from-space refs to to-space refs.
-class ThreadFlipVisitor : public Closure {
+class ConcurrentCopying::ThreadFlipVisitor : public Closure, public RootVisitor {
  public:
   ThreadFlipVisitor(ConcurrentCopying* concurrent_copying, bool use_tlab)
       : concurrent_copying_(concurrent_copying), use_tlab_(use_tlab) {
@@ -207,17 +260,51 @@
       thread->RevokeThreadLocalAllocationStack();
     }
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    thread->VisitRoots(concurrent_copying_);
+    // We can use the non-CAS VisitRoots functions below because we update thread-local GC roots
+    // only.
+    thread->VisitRoots(this);
     concurrent_copying_->GetBarrier().Pass(self);
   }
 
+  void VisitRoots(mirror::Object*** roots,
+                  size_t count,
+                  const RootInfo& info ATTRIBUTE_UNUSED)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mirror::Object** root = roots[i];
+      mirror::Object* ref = *root;
+      if (ref != nullptr) {
+        mirror::Object* to_ref = concurrent_copying_->Mark(ref);
+        if (to_ref != ref) {
+          *root = to_ref;
+        }
+      }
+    }
+  }
+
+  void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
+                  size_t count,
+                  const RootInfo& info ATTRIBUTE_UNUSED)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mirror::CompressedReference<mirror::Object>* const root = roots[i];
+      if (!root->IsNull()) {
+        mirror::Object* ref = root->AsMirrorPtr();
+        mirror::Object* to_ref = concurrent_copying_->Mark(ref);
+        if (to_ref != ref) {
+          root->Assign(to_ref);
+        }
+      }
+    }
+  }
+
  private:
   ConcurrentCopying* const concurrent_copying_;
   const bool use_tlab_;
 };
 
 // Called back from Runtime::FlipThreadRoots() during a pause.
-class FlipCallback : public Closure {
+class ConcurrentCopying::FlipCallback : public Closure {
  public:
   explicit FlipCallback(ConcurrentCopying* concurrent_copying)
       : concurrent_copying_(concurrent_copying) {
@@ -239,17 +326,103 @@
     }
     cc->is_marking_ = true;
     cc->mark_stack_mode_.StoreRelaxed(ConcurrentCopying::kMarkStackModeThreadLocal);
+    if (kIsDebugBuild) {
+      cc->region_space_->AssertAllRegionLiveBytesZeroOrCleared();
+    }
     if (UNLIKELY(Runtime::Current()->IsActiveTransaction())) {
       CHECK(Runtime::Current()->IsAotCompiler());
       TimingLogger::ScopedTiming split2("(Paused)VisitTransactionRoots", cc->GetTimings());
       Runtime::Current()->VisitTransactionRoots(cc);
     }
+    if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
+      cc->GrayAllDirtyImmuneObjects();
+      if (kIsDebugBuild) {
+        // Check that all non-gray immune objects only refernce immune objects.
+        cc->VerifyGrayImmuneObjects();
+      }
+    }
   }
 
  private:
   ConcurrentCopying* const concurrent_copying_;
 };
 
+class ConcurrentCopying::VerifyGrayImmuneObjectsVisitor {
+ public:
+  explicit VerifyGrayImmuneObjectsVisitor(ConcurrentCopying* collector)
+      : collector_(collector) {}
+
+  void operator()(mirror::Object* obj, MemberOffset offset, bool /* is_static */)
+      const ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_)
+      SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
+    CheckReference(obj->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(offset),
+                   obj, offset);
+  }
+
+  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+      SHARED_REQUIRES(Locks::mutator_lock_) ALWAYS_INLINE {
+    CHECK(klass->IsTypeOfReferenceClass());
+    CheckReference(ref->GetReferent<kWithoutReadBarrier>(),
+                   ref,
+                   mirror::Reference::ReferentOffset());
+  }
+
+  void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+      ALWAYS_INLINE
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!root->IsNull()) {
+      VisitRoot(root);
+    }
+  }
+
+  void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+      ALWAYS_INLINE
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    CheckReference(root->AsMirrorPtr(), nullptr, MemberOffset(0));
+  }
+
+ private:
+  ConcurrentCopying* const collector_;
+
+  void CheckReference(mirror::Object* ref, mirror::Object* holder, MemberOffset offset) const
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (ref != nullptr) {
+      if (!collector_->immune_spaces_.ContainsObject(ref)) {
+        // Not immune, must be a zygote large object.
+        CHECK(Runtime::Current()->GetHeap()->GetLargeObjectsSpace()->IsZygoteLargeObject(
+            Thread::Current(), ref))
+            << "Non gray object references non immune, non zygote large object "<< ref << " "
+            << PrettyTypeOf(ref) << " in holder " << holder << " " << PrettyTypeOf(holder)
+            << " offset=" << offset.Uint32Value();
+      } else {
+        // Make sure the large object class is immune since we will never scan the large object.
+        CHECK(collector_->immune_spaces_.ContainsObject(
+            ref->GetClass<kVerifyNone, kWithoutReadBarrier>()));
+      }
+    }
+  }
+};
+
+void ConcurrentCopying::VerifyGrayImmuneObjects() {
+  TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
+  for (auto& space : immune_spaces_.GetSpaces()) {
+    DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+    accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+    VerifyGrayImmuneObjectsVisitor visitor(this);
+    live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                  reinterpret_cast<uintptr_t>(space->Limit()),
+                                  [&visitor](mirror::Object* obj)
+        SHARED_REQUIRES(Locks::mutator_lock_) {
+      // If an object is not gray, it should only have references to things in the immune spaces.
+      if (obj->GetReadBarrierPointer() != ReadBarrier::GrayPtr()) {
+        obj->VisitReferences</*kVisitNativeRoots*/true,
+                             kDefaultVerifyFlags,
+                             kWithoutReadBarrier>(visitor, visitor);
+      }
+    });
+  }
+}
+
 // Switch threads that from from-space to to-space refs. Forward/mark the thread roots.
 void ConcurrentCopying::FlipThreadRoots() {
   TimingLogger::ScopedTiming split("FlipThreadRoots", GetTimings());
@@ -262,10 +435,8 @@
   gc_barrier_->Init(self, 0);
   ThreadFlipVisitor thread_flip_visitor(this, heap_->use_tlab_);
   FlipCallback flip_callback(this);
-  heap_->ThreadFlipBegin(self);  // Sync with JNI critical calls.
   size_t barrier_count = Runtime::Current()->FlipThreadRoots(
       &thread_flip_visitor, &flip_callback, this);
-  heap_->ThreadFlipEnd(self);
   {
     ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
     gc_barrier_->Increment(self, barrier_count);
@@ -279,6 +450,52 @@
   }
 }
 
+class ConcurrentCopying::GrayImmuneObjectVisitor {
+ public:
+  explicit GrayImmuneObjectVisitor() {}
+
+  ALWAYS_INLINE void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (kUseBakerReadBarrier) {
+      if (kIsDebugBuild) {
+        Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
+      }
+      obj->SetReadBarrierPointer(ReadBarrier::GrayPtr());
+    }
+  }
+
+  static void Callback(mirror::Object* obj, void* arg) SHARED_REQUIRES(Locks::mutator_lock_) {
+    reinterpret_cast<GrayImmuneObjectVisitor*>(arg)->operator()(obj);
+  }
+};
+
+void ConcurrentCopying::GrayAllDirtyImmuneObjects() {
+  TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
+  accounting::CardTable* const card_table = heap->GetCardTable();
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) {
+    DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+    GrayImmuneObjectVisitor visitor;
+    accounting::ModUnionTable* table = heap->FindModUnionTableFromSpace(space);
+    // Mark all the objects on dirty cards since these may point to objects in other space.
+    // Once these are marked, the GC will eventually clear them later.
+    // Table is non null for boot image and zygote spaces. It is only null for application image
+    // spaces.
+    if (table != nullptr) {
+      // TODO: Add preclean outside the pause.
+      table->ClearCards();
+      table->VisitObjects(GrayImmuneObjectVisitor::Callback, &visitor);
+    } else {
+      // TODO: Consider having a mark bitmap for app image spaces and avoid scanning during the
+      // pause because app image spaces are all dirty pages anyways.
+      card_table->Scan<false>(space->GetMarkBitmap(), space->Begin(), space->End(), visitor);
+    }
+  }
+  // Since all of the objects that may point to other spaces are marked, we can avoid all the read
+  // barriers in the immune spaces.
+  updated_all_immune_objects_.StoreRelaxed(true);
+}
+
 void ConcurrentCopying::SwapStacks() {
   heap_->SwapStacks();
 }
@@ -288,41 +505,6 @@
   live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
 }
 
-// Used to visit objects in the immune spaces.
-class ConcurrentCopyingImmuneSpaceObjVisitor {
- public:
-  explicit ConcurrentCopyingImmuneSpaceObjVisitor(ConcurrentCopying* cc)
-      : collector_(cc) {}
-
-  void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_)
-      SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
-    DCHECK(obj != nullptr);
-    DCHECK(collector_->immune_region_.ContainsObject(obj));
-    accounting::ContinuousSpaceBitmap* cc_bitmap =
-        collector_->cc_heap_bitmap_->GetContinuousSpaceBitmap(obj);
-    DCHECK(cc_bitmap != nullptr)
-        << "An immune space object must have a bitmap";
-    if (kIsDebugBuild) {
-      DCHECK(collector_->heap_->GetMarkBitmap()->Test(obj))
-          << "Immune space object must be already marked";
-    }
-    // This may or may not succeed, which is ok.
-    if (kUseBakerReadBarrier) {
-      obj->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
-    }
-    if (cc_bitmap->AtomicTestAndSet(obj)) {
-      // Already marked. Do nothing.
-    } else {
-      // Newly marked. Set the gray bit and push it onto the mark stack.
-      CHECK(!kUseBakerReadBarrier || obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
-      collector_->PushOntoMarkStack(obj);
-    }
-  }
-
- private:
-  ConcurrentCopying* const collector_;
-};
-
 class EmptyCheckpoint : public Closure {
  public:
   explicit EmptyCheckpoint(ConcurrentCopying* concurrent_copying)
@@ -336,15 +518,48 @@
         << thread->GetState() << " thread " << thread << " self " << self;
     // If thread is a running mutator, then act on behalf of the garbage collector.
     // See the code in ThreadList::RunCheckpoint.
-    if (thread->GetState() == kRunnable) {
-      concurrent_copying_->GetBarrier().Pass(self);
-    }
+    concurrent_copying_->GetBarrier().Pass(self);
   }
 
  private:
   ConcurrentCopying* const concurrent_copying_;
 };
 
+// Used to visit objects in the immune spaces.
+inline void ConcurrentCopying::ScanImmuneObject(mirror::Object* obj) {
+  DCHECK(obj != nullptr);
+  DCHECK(immune_spaces_.ContainsObject(obj));
+  // Update the fields without graying it or pushing it onto the mark stack.
+  Scan(obj);
+}
+
+class ConcurrentCopying::ImmuneSpaceScanObjVisitor {
+ public:
+  explicit ImmuneSpaceScanObjVisitor(ConcurrentCopying* cc)
+      : collector_(cc) {}
+
+  ALWAYS_INLINE void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
+      if (obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+        collector_->ScanImmuneObject(obj);
+        // Done scanning the object, go back to white.
+        bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
+                                                        ReadBarrier::WhitePtr());
+        CHECK(success);
+      }
+    } else {
+      collector_->ScanImmuneObject(obj);
+    }
+  }
+
+  static void Callback(mirror::Object* obj, void* arg) SHARED_REQUIRES(Locks::mutator_lock_) {
+    reinterpret_cast<ImmuneSpaceScanObjVisitor*>(arg)->operator()(obj);
+  }
+
+ private:
+  ConcurrentCopying* const collector_;
+};
+
 // Concurrently mark roots that are guarded by read barriers and process the mark stack.
 void ConcurrentCopying::MarkingPhase() {
   TimingLogger::ScopedTiming split("MarkingPhase", GetTimings());
@@ -352,57 +567,63 @@
     LOG(INFO) << "GC MarkingPhase";
   }
   CHECK(weak_ref_access_enabled_);
+
+  // Scan immune spaces.
+  // Update all the fields in the immune spaces first without graying the objects so that we
+  // minimize dirty pages in the immune spaces. Note mutators can concurrently access and gray some
+  // of the objects.
+  if (kUseBakerReadBarrier) {
+    gc_grays_immune_objects_ = false;
+  }
   {
-    // Mark the image root. The WB-based collectors do not need to
-    // scan the image objects from roots by relying on the card table,
-    // but it's necessary for the RB to-space invariant to hold.
-    TimingLogger::ScopedTiming split1("VisitImageRoots", GetTimings());
-    gc::space::ImageSpace* image = heap_->GetImageSpace();
-    if (image != nullptr) {
-      mirror::ObjectArray<mirror::Object>* image_root = image->GetImageHeader().GetImageRoots();
-      mirror::Object* marked_image_root = Mark(image_root);
-      CHECK_EQ(image_root, marked_image_root) << "An image object does not move";
-      if (ReadBarrier::kEnableToSpaceInvariantChecks) {
-        AssertToSpaceInvariant(nullptr, MemberOffset(0), marked_image_root);
+    TimingLogger::ScopedTiming split2("ScanImmuneSpaces", GetTimings());
+    for (auto& space : immune_spaces_.GetSpaces()) {
+      DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+      accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+      accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
+      ImmuneSpaceScanObjVisitor visitor(this);
+      if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects && table != nullptr) {
+        table->VisitObjects(ImmuneSpaceScanObjVisitor::Callback, &visitor);
+      } else {
+        live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                      reinterpret_cast<uintptr_t>(space->Limit()),
+                                      visitor);
       }
     }
   }
-  // TODO: Other garbage collectors uses Runtime::VisitConcurrentRoots(), refactor this part
-  // to also use the same function.
-  {
-    TimingLogger::ScopedTiming split2("VisitConstantRoots", GetTimings());
-    Runtime::Current()->VisitConstantRoots(this);
+  if (kUseBakerReadBarrier) {
+    // This release fence makes the field updates in the above loop visible before allowing mutator
+    // getting access to immune objects without graying it first.
+    updated_all_immune_objects_.StoreRelease(true);
+    // Now whiten immune objects concurrently accessed and grayed by mutators. We can't do this in
+    // the above loop because we would incorrectly disable the read barrier by whitening an object
+    // which may point to an unscanned, white object, breaking the to-space invariant.
+    //
+    // Make sure no mutators are in the middle of marking an immune object before whitening immune
+    // objects.
+    IssueEmptyCheckpoint();
+    MutexLock mu(Thread::Current(), immune_gray_stack_lock_);
+    if (kVerboseMode) {
+      LOG(INFO) << "immune gray stack size=" << immune_gray_stack_.size();
+    }
+    for (mirror::Object* obj : immune_gray_stack_) {
+      DCHECK(obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
+      bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
+                                                      ReadBarrier::WhitePtr());
+      DCHECK(success);
+    }
+    immune_gray_stack_.clear();
   }
+
   {
-    TimingLogger::ScopedTiming split3("VisitInternTableRoots", GetTimings());
-    Runtime::Current()->GetInternTable()->VisitRoots(this, kVisitRootFlagAllRoots);
-  }
-  {
-    TimingLogger::ScopedTiming split4("VisitClassLinkerRoots", GetTimings());
-    Runtime::Current()->GetClassLinker()->VisitRoots(this, kVisitRootFlagAllRoots);
+    TimingLogger::ScopedTiming split2("VisitConcurrentRoots", GetTimings());
+    Runtime::Current()->VisitConcurrentRoots(this, kVisitRootFlagAllRoots);
   }
   {
     // TODO: don't visit the transaction roots if it's not active.
     TimingLogger::ScopedTiming split5("VisitNonThreadRoots", GetTimings());
     Runtime::Current()->VisitNonThreadRoots(this);
   }
-  {
-    TimingLogger::ScopedTiming split6("Dbg::VisitRoots", GetTimings());
-    Dbg::VisitRoots(this);
-  }
-  Runtime::Current()->GetHeap()->VisitAllocationRecords(this);
-
-  // Immune spaces.
-  for (auto& space : heap_->GetContinuousSpaces()) {
-    if (immune_region_.ContainsSpace(space)) {
-      DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
-      accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
-      ConcurrentCopyingImmuneSpaceObjVisitor visitor(this);
-      live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
-                                    reinterpret_cast<uintptr_t>(space->Limit()),
-                                    visitor);
-    }
-  }
 
   Thread* self = Thread::Current();
   {
@@ -466,6 +687,9 @@
     Runtime::Current()->GetClassLinker()->CleanupClassLoaders();
     // Marking is done. Disable marking.
     DisableMarking();
+    if (kUseBakerReadBarrier) {
+      ProcessFalseGrayStack();
+    }
     CheckEmptyMarkStack();
   }
 
@@ -494,7 +718,7 @@
   Runtime::Current()->BroadcastForNewSystemWeaks();
 }
 
-class DisableMarkingCheckpoint : public Closure {
+class ConcurrentCopying::DisableMarkingCheckpoint : public Closure {
  public:
   explicit DisableMarkingCheckpoint(ConcurrentCopying* concurrent_copying)
       : concurrent_copying_(concurrent_copying) {
@@ -511,9 +735,7 @@
     thread->SetIsGcMarking(false);
     // If thread is a running mutator, then act on behalf of the garbage collector.
     // See the code in ThreadList::RunCheckpoint.
-    if (thread->GetState() == kRunnable) {
-      concurrent_copying_->GetBarrier().Pass(self);
-    }
+    concurrent_copying_->GetBarrier().Pass(self);
   }
 
  private:
@@ -557,6 +779,32 @@
   mark_stack_mode_.StoreSequentiallyConsistent(kMarkStackModeOff);
 }
 
+void ConcurrentCopying::PushOntoFalseGrayStack(mirror::Object* ref) {
+  CHECK(kUseBakerReadBarrier);
+  DCHECK(ref != nullptr);
+  MutexLock mu(Thread::Current(), mark_stack_lock_);
+  false_gray_stack_.push_back(ref);
+}
+
+void ConcurrentCopying::ProcessFalseGrayStack() {
+  CHECK(kUseBakerReadBarrier);
+  // Change the objects on the false gray stack from gray to white.
+  MutexLock mu(Thread::Current(), mark_stack_lock_);
+  for (mirror::Object* obj : false_gray_stack_) {
+    DCHECK(IsMarked(obj));
+    // The object could be white here if a thread got preempted after a success at the
+    // AtomicSetReadBarrierPointer in Mark(), GC started marking through it (but not finished so
+    // still gray), and the thread ran to register it onto the false gray stack.
+    if (obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+      bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
+                                                      ReadBarrier::WhitePtr());
+      DCHECK(success);
+    }
+  }
+  false_gray_stack_.clear();
+}
+
+
 void ConcurrentCopying::IssueEmptyCheckpoint() {
   Thread* self = Thread::Current();
   EmptyCheckpoint check_point(this);
@@ -577,17 +825,31 @@
   Locks::mutator_lock_->SharedLock(self);
 }
 
+void ConcurrentCopying::ExpandGcMarkStack() {
+  DCHECK(gc_mark_stack_->IsFull());
+  const size_t new_size = gc_mark_stack_->Capacity() * 2;
+  std::vector<StackReference<mirror::Object>> temp(gc_mark_stack_->Begin(),
+                                                   gc_mark_stack_->End());
+  gc_mark_stack_->Resize(new_size);
+  for (auto& ref : temp) {
+    gc_mark_stack_->PushBack(ref.AsMirrorPtr());
+  }
+  DCHECK(!gc_mark_stack_->IsFull());
+}
+
 void ConcurrentCopying::PushOntoMarkStack(mirror::Object* to_ref) {
   CHECK_EQ(is_mark_stack_push_disallowed_.LoadRelaxed(), 0)
       << " " << to_ref << " " << PrettyTypeOf(to_ref);
   Thread* self = Thread::Current();  // TODO: pass self as an argument from call sites?
   CHECK(thread_running_gc_ != nullptr);
   MarkStackMode mark_stack_mode = mark_stack_mode_.LoadRelaxed();
-  if (mark_stack_mode == kMarkStackModeThreadLocal) {
-    if (self == thread_running_gc_) {
+  if (LIKELY(mark_stack_mode == kMarkStackModeThreadLocal)) {
+    if (LIKELY(self == thread_running_gc_)) {
       // If GC-running thread, use the GC mark stack instead of a thread-local mark stack.
       CHECK(self->GetThreadLocalMarkStack() == nullptr);
-      CHECK(!gc_mark_stack_->IsFull());
+      if (UNLIKELY(gc_mark_stack_->IsFull())) {
+        ExpandGcMarkStack();
+      }
       gc_mark_stack_->PushBack(to_ref);
     } else {
       // Otherwise, use a thread-local mark stack.
@@ -621,7 +883,9 @@
   } else if (mark_stack_mode == kMarkStackModeShared) {
     // Access the shared GC mark stack with a lock.
     MutexLock mu(self, mark_stack_lock_);
-    CHECK(!gc_mark_stack_->IsFull());
+    if (UNLIKELY(gc_mark_stack_->IsFull())) {
+      ExpandGcMarkStack();
+    }
     gc_mark_stack_->PushBack(to_ref);
   } else {
     CHECK_EQ(static_cast<uint32_t>(mark_stack_mode),
@@ -633,7 +897,9 @@
         << "Only GC-running thread should access the mark stack "
         << "in the GC exclusive mark stack mode";
     // Access the GC mark stack without a lock.
-    CHECK(!gc_mark_stack_->IsFull());
+    if (UNLIKELY(gc_mark_stack_->IsFull())) {
+      ExpandGcMarkStack();
+    }
     gc_mark_stack_->PushBack(to_ref);
   }
 }
@@ -646,23 +912,11 @@
   return heap_->live_stack_.get();
 }
 
-inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) {
-  DCHECK(region_space_->IsInFromSpace(from_ref));
-  LockWord lw = from_ref->GetLockWord(false);
-  if (lw.GetState() == LockWord::kForwardingAddress) {
-    mirror::Object* fwd_ptr = reinterpret_cast<mirror::Object*>(lw.ForwardingAddress());
-    CHECK(fwd_ptr != nullptr);
-    return fwd_ptr;
-  } else {
-    return nullptr;
-  }
-}
-
-// The following visitors are that used to verify that there's no
-// references to the from-space left after marking.
-class ConcurrentCopyingVerifyNoFromSpaceRefsVisitor : public SingleRootVisitor {
+// The following visitors are used to verify that there's no references to the from-space left after
+// marking.
+class ConcurrentCopying::VerifyNoFromSpaceRefsVisitor : public SingleRootVisitor {
  public:
-  explicit ConcurrentCopyingVerifyNoFromSpaceRefsVisitor(ConcurrentCopying* collector)
+  explicit VerifyNoFromSpaceRefsVisitor(ConcurrentCopying* collector)
       : collector_(collector) {}
 
   void operator()(mirror::Object* ref) const
@@ -673,20 +927,9 @@
     }
     collector_->AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
     if (kUseBakerReadBarrier) {
-      if (collector_->RegionSpace()->IsInToSpace(ref)) {
-        CHECK(ref->GetReadBarrierPointer() == nullptr)
-            << "To-space ref " << ref << " " << PrettyTypeOf(ref)
-            << " has non-white rb_ptr " << ref->GetReadBarrierPointer();
-      } else {
-        CHECK(ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr() ||
-              (ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr() &&
-               collector_->IsOnAllocStack(ref)))
-            << "Non-moving/unevac from space ref " << ref << " " << PrettyTypeOf(ref)
-            << " has non-black rb_ptr " << ref->GetReadBarrierPointer()
-            << " but isn't on the alloc stack (and has white rb_ptr)."
-            << " Is it in the non-moving space="
-            << (collector_->GetHeap()->GetNonMovingSpace()->HasAddress(ref));
-      }
+      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
+          << "Ref " << ref << " " << PrettyTypeOf(ref)
+          << " has non-white rb_ptr ";
     }
   }
 
@@ -700,16 +943,16 @@
   ConcurrentCopying* const collector_;
 };
 
-class ConcurrentCopyingVerifyNoFromSpaceRefsFieldVisitor {
+class ConcurrentCopying::VerifyNoFromSpaceRefsFieldVisitor {
  public:
-  explicit ConcurrentCopyingVerifyNoFromSpaceRefsFieldVisitor(ConcurrentCopying* collector)
+  explicit VerifyNoFromSpaceRefsFieldVisitor(ConcurrentCopying* collector)
       : collector_(collector) {}
 
   void operator()(mirror::Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
       SHARED_REQUIRES(Locks::mutator_lock_) ALWAYS_INLINE {
     mirror::Object* ref =
         obj->GetFieldObject<mirror::Object, kDefaultVerifyFlags, kWithoutReadBarrier>(offset);
-    ConcurrentCopyingVerifyNoFromSpaceRefsVisitor visitor(collector_);
+    VerifyNoFromSpaceRefsVisitor visitor(collector_);
     visitor(ref);
   }
   void operator()(mirror::Class* klass, mirror::Reference* ref) const
@@ -727,7 +970,7 @@
 
   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    ConcurrentCopyingVerifyNoFromSpaceRefsVisitor visitor(collector_);
+    VerifyNoFromSpaceRefsVisitor visitor(collector_);
     visitor(root->AsMirrorPtr());
   }
 
@@ -735,9 +978,9 @@
   ConcurrentCopying* const collector_;
 };
 
-class ConcurrentCopyingVerifyNoFromSpaceRefsObjectVisitor {
+class ConcurrentCopying::VerifyNoFromSpaceRefsObjectVisitor {
  public:
-  explicit ConcurrentCopyingVerifyNoFromSpaceRefsObjectVisitor(ConcurrentCopying* collector)
+  explicit VerifyNoFromSpaceRefsObjectVisitor(ConcurrentCopying* collector)
       : collector_(collector) {}
   void operator()(mirror::Object* obj) const
       SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -749,21 +992,11 @@
     ConcurrentCopying* collector = reinterpret_cast<ConcurrentCopying*>(arg);
     space::RegionSpace* region_space = collector->RegionSpace();
     CHECK(!region_space->IsInFromSpace(obj)) << "Scanning object " << obj << " in from space";
-    ConcurrentCopyingVerifyNoFromSpaceRefsFieldVisitor visitor(collector);
+    VerifyNoFromSpaceRefsFieldVisitor visitor(collector);
     obj->VisitReferences(visitor, visitor);
     if (kUseBakerReadBarrier) {
-      if (collector->RegionSpace()->IsInToSpace(obj)) {
-        CHECK(obj->GetReadBarrierPointer() == nullptr)
-            << "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer();
-      } else {
-        CHECK(obj->GetReadBarrierPointer() == ReadBarrier::BlackPtr() ||
-              (obj->GetReadBarrierPointer() == ReadBarrier::WhitePtr() &&
-               collector->IsOnAllocStack(obj)))
-            << "Non-moving space/unevac from space ref " << obj << " " << PrettyTypeOf(obj)
-            << " has non-black rb_ptr " << obj->GetReadBarrierPointer()
-            << " but isn't on the alloc stack (and has white rb_ptr). Is it in the non-moving space="
-            << (collector->GetHeap()->GetNonMovingSpace()->HasAddress(obj));
-      }
+      CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
+          << "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer();
     }
   }
 
@@ -783,16 +1016,15 @@
       CHECK(!thread->GetIsGcMarking());
     }
   }
-  ConcurrentCopyingVerifyNoFromSpaceRefsObjectVisitor visitor(this);
+  VerifyNoFromSpaceRefsObjectVisitor visitor(this);
   // Roots.
   {
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    ConcurrentCopyingVerifyNoFromSpaceRefsVisitor ref_visitor(this);
+    VerifyNoFromSpaceRefsVisitor ref_visitor(this);
     Runtime::Current()->VisitRoots(&ref_visitor);
   }
   // The to-space.
-  region_space_->WalkToSpace(ConcurrentCopyingVerifyNoFromSpaceRefsObjectVisitor::ObjectCallback,
-                             this);
+  region_space_->WalkToSpace(VerifyNoFromSpaceRefsObjectVisitor::ObjectCallback, this);
   // Non-moving spaces.
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
@@ -800,7 +1032,7 @@
   }
   // The alloc stack.
   {
-    ConcurrentCopyingVerifyNoFromSpaceRefsVisitor ref_visitor(this);
+    VerifyNoFromSpaceRefsVisitor ref_visitor(this);
     for (auto* it = heap_->allocation_stack_->Begin(), *end = heap_->allocation_stack_->End();
         it < end; ++it) {
       mirror::Object* const obj = it->AsMirrorPtr();
@@ -815,9 +1047,9 @@
 }
 
 // The following visitors are used to assert the to-space invariant.
-class ConcurrentCopyingAssertToSpaceInvariantRefsVisitor {
+class ConcurrentCopying::AssertToSpaceInvariantRefsVisitor {
  public:
-  explicit ConcurrentCopyingAssertToSpaceInvariantRefsVisitor(ConcurrentCopying* collector)
+  explicit AssertToSpaceInvariantRefsVisitor(ConcurrentCopying* collector)
       : collector_(collector) {}
 
   void operator()(mirror::Object* ref) const
@@ -833,16 +1065,16 @@
   ConcurrentCopying* const collector_;
 };
 
-class ConcurrentCopyingAssertToSpaceInvariantFieldVisitor {
+class ConcurrentCopying::AssertToSpaceInvariantFieldVisitor {
  public:
-  explicit ConcurrentCopyingAssertToSpaceInvariantFieldVisitor(ConcurrentCopying* collector)
+  explicit AssertToSpaceInvariantFieldVisitor(ConcurrentCopying* collector)
       : collector_(collector) {}
 
   void operator()(mirror::Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
       SHARED_REQUIRES(Locks::mutator_lock_) ALWAYS_INLINE {
     mirror::Object* ref =
         obj->GetFieldObject<mirror::Object, kDefaultVerifyFlags, kWithoutReadBarrier>(offset);
-    ConcurrentCopyingAssertToSpaceInvariantRefsVisitor visitor(collector_);
+    AssertToSpaceInvariantRefsVisitor visitor(collector_);
     visitor(ref);
   }
   void operator()(mirror::Class* klass, mirror::Reference* ref ATTRIBUTE_UNUSED) const
@@ -859,7 +1091,7 @@
 
   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    ConcurrentCopyingAssertToSpaceInvariantRefsVisitor visitor(collector_);
+    AssertToSpaceInvariantRefsVisitor visitor(collector_);
     visitor(root->AsMirrorPtr());
   }
 
@@ -867,9 +1099,9 @@
   ConcurrentCopying* const collector_;
 };
 
-class ConcurrentCopyingAssertToSpaceInvariantObjectVisitor {
+class ConcurrentCopying::AssertToSpaceInvariantObjectVisitor {
  public:
-  explicit ConcurrentCopyingAssertToSpaceInvariantObjectVisitor(ConcurrentCopying* collector)
+  explicit AssertToSpaceInvariantObjectVisitor(ConcurrentCopying* collector)
       : collector_(collector) {}
   void operator()(mirror::Object* obj) const
       SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -882,7 +1114,7 @@
     space::RegionSpace* region_space = collector->RegionSpace();
     CHECK(!region_space->IsInFromSpace(obj)) << "Scanning object " << obj << " in from space";
     collector->AssertToSpaceInvariant(nullptr, MemberOffset(0), obj);
-    ConcurrentCopyingAssertToSpaceInvariantFieldVisitor visitor(collector);
+    AssertToSpaceInvariantFieldVisitor visitor(collector);
     obj->VisitReferences(visitor, visitor);
   }
 
@@ -890,7 +1122,7 @@
   ConcurrentCopying* const collector_;
 };
 
-class RevokeThreadLocalMarkStackCheckpoint : public Closure {
+class ConcurrentCopying::RevokeThreadLocalMarkStackCheckpoint : public Closure {
  public:
   RevokeThreadLocalMarkStackCheckpoint(ConcurrentCopying* concurrent_copying,
                                        bool disable_weak_ref_access)
@@ -916,9 +1148,7 @@
     }
     // If thread is a running mutator, then act on behalf of the garbage collector.
     // See the code in ThreadList::RunCheckpoint.
-    if (thread->GetState() == kRunnable) {
-      concurrent_copying_->GetBarrier().Pass(self);
-    }
+    concurrent_copying_->GetBarrier().Pass(self);
   }
 
  private:
@@ -1065,7 +1295,7 @@
   return count;
 }
 
-void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) {
+inline void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) {
   DCHECK(!region_space_->IsInFromSpace(to_ref));
   if (kUseBakerReadBarrier) {
     DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
@@ -1074,51 +1304,45 @@
   }
   // Scan ref fields.
   Scan(to_ref);
-  // Mark the gray ref as white or black.
   if (kUseBakerReadBarrier) {
     DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
         << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
         << " is_marked=" << IsMarked(to_ref);
   }
-  if (to_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()->IsTypeOfReferenceClass() &&
-      to_ref->AsReference()->GetReferent<kWithoutReadBarrier>() != nullptr &&
-      !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())) {
-    // Leave this Reference gray in the queue so that GetReferent() will trigger a read barrier. We
-    // will change it to black or white later in ReferenceQueue::DequeuePendingReference().
-    CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
-  } else {
-    // We may occasionally leave a Reference black or white in the queue if its referent happens to
-    // be concurrently marked after the Scan() call above has enqueued the Reference, in which case
-    // the above IsInToSpace() evaluates to true and we change the color from gray to black or white
-    // here in this else block.
 #ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
+  if (UNLIKELY((to_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()->IsTypeOfReferenceClass() &&
+                to_ref->AsReference()->GetReferent<kWithoutReadBarrier>() != nullptr &&
+                !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())))) {
+    // Leave this reference gray in the queue so that GetReferent() will trigger a read barrier. We
+    // will change it to white later in ReferenceQueue::DequeuePendingReference().
+    DCHECK(to_ref->AsReference()->GetPendingNext() != nullptr) << "Left unenqueued ref gray " << to_ref;
+  } else {
+    // We may occasionally leave a reference white in the queue if its referent happens to be
+    // concurrently marked after the Scan() call above has enqueued the Reference, in which case the
+    // above IsInToSpace() evaluates to true and we change the color from gray to white here in this
+    // else block.
     if (kUseBakerReadBarrier) {
-      if (region_space_->IsInToSpace(to_ref)) {
-        // If to-space, change from gray to white.
-        bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                           ReadBarrier::WhitePtr());
-        CHECK(success) << "Must succeed as we won the race.";
-        CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
-      } else {
-        // If non-moving space/unevac from space, change from gray
-        // to black. We can't change gray to white because it's not
-        // safe to use CAS if two threads change values in opposite
-        // directions (A->B and B->A). So, we change it to black to
-        // indicate non-moving objects that have been marked
-        // through. Note we'd need to change from black to white
-        // later (concurrently).
-        bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                           ReadBarrier::BlackPtr());
-        CHECK(success) << "Must succeed as we won the race.";
-        CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
-      }
+      bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
+          ReadBarrier::GrayPtr(),
+          ReadBarrier::WhitePtr());
+      DCHECK(success) << "Must succeed as we won the race.";
     }
+  }
 #else
-    DCHECK(!kUseBakerReadBarrier);
+  DCHECK(!kUseBakerReadBarrier);
 #endif
+
+  if (region_space_->IsInUnevacFromSpace(to_ref)) {
+    // Add to the live bytes per unevacuated from space. Note this code is always run by the
+    // GC-running thread (no synchronization required).
+    DCHECK(region_space_bitmap_->Test(to_ref));
+    // Disable the read barrier in SizeOf for performance, which is safe.
+    size_t obj_size = to_ref->SizeOf<kDefaultVerifyFlags, kWithoutReadBarrier>();
+    size_t alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment);
+    region_space_->AddLiveBytes(to_ref, alloc_size);
   }
   if (ReadBarrier::kEnableToSpaceInvariantChecks || kIsDebugBuild) {
-    ConcurrentCopyingAssertToSpaceInvariantObjectVisitor visitor(this);
+    AssertToSpaceInvariantObjectVisitor visitor(this);
     visitor(to_ref);
   }
 }
@@ -1213,7 +1437,7 @@
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->IsContinuousMemMapAllocSpace()) {
       space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
-      if (space == region_space_ || immune_region_.ContainsSpace(space)) {
+      if (space == region_space_ || immune_spaces_.ContainsSpace(space)) {
         continue;
       }
       TimingLogger::ScopedTiming split2(
@@ -1224,69 +1448,31 @@
   SweepLargeObjects(swap_bitmaps);
 }
 
+void ConcurrentCopying::MarkZygoteLargeObjects() {
+  TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
+  Thread* const self = Thread::Current();
+  WriterMutexLock rmu(self, *Locks::heap_bitmap_lock_);
+  space::LargeObjectSpace* const los = heap_->GetLargeObjectsSpace();
+  // Pick the current live bitmap (mark bitmap if swapped).
+  accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap();
+  accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap();
+  // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept.
+  live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(los->Begin()),
+                                reinterpret_cast<uintptr_t>(los->End()),
+                                [mark_bitmap, los, self](mirror::Object* obj)
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (los->IsZygoteLargeObject(self, obj)) {
+      mark_bitmap->Set(obj);
+    }
+  });
+}
+
 void ConcurrentCopying::SweepLargeObjects(bool swap_bitmaps) {
   TimingLogger::ScopedTiming split("SweepLargeObjects", GetTimings());
   RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps));
 }
 
-class ConcurrentCopyingClearBlackPtrsVisitor {
- public:
-  explicit ConcurrentCopyingClearBlackPtrsVisitor(ConcurrentCopying* cc)
-      : collector_(cc) {}
-#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
-  NO_RETURN
-#endif
-  void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_)
-      SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
-    DCHECK(obj != nullptr);
-    DCHECK(collector_->heap_->GetMarkBitmap()->Test(obj)) << obj;
-    DCHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << obj;
-    obj->AtomicSetReadBarrierPointer(ReadBarrier::BlackPtr(), ReadBarrier::WhitePtr());
-    DCHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << obj;
-  }
-
- private:
-  ConcurrentCopying* const collector_;
-};
-
-// Clear the black ptrs in non-moving objects back to white.
-void ConcurrentCopying::ClearBlackPtrs() {
-  CHECK(kUseBakerReadBarrier);
-  TimingLogger::ScopedTiming split("ClearBlackPtrs", GetTimings());
-  ConcurrentCopyingClearBlackPtrsVisitor visitor(this);
-  for (auto& space : heap_->GetContinuousSpaces()) {
-    if (space == region_space_) {
-      continue;
-    }
-    accounting::ContinuousSpaceBitmap* mark_bitmap = space->GetMarkBitmap();
-    if (kVerboseMode) {
-      LOG(INFO) << "ClearBlackPtrs: " << *space << " bitmap: " << *mark_bitmap;
-    }
-    mark_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
-                                  reinterpret_cast<uintptr_t>(space->Limit()),
-                                  visitor);
-  }
-  space::LargeObjectSpace* large_object_space = heap_->GetLargeObjectsSpace();
-  large_object_space->GetMarkBitmap()->VisitMarkedRange(
-      reinterpret_cast<uintptr_t>(large_object_space->Begin()),
-      reinterpret_cast<uintptr_t>(large_object_space->End()),
-      visitor);
-  // Objects on the allocation stack?
-  if (ReadBarrier::kEnableReadBarrierInvariantChecks || kIsDebugBuild) {
-    size_t count = GetAllocationStack()->Size();
-    auto* it = GetAllocationStack()->Begin();
-    auto* end = GetAllocationStack()->End();
-    for (size_t i = 0; i < count; ++i, ++it) {
-      CHECK_LT(it, end);
-      mirror::Object* obj = it->AsMirrorPtr();
-      if (obj != nullptr) {
-        // Must have been cleared above.
-        CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << obj;
-      }
-    }
-  }
-}
-
 void ConcurrentCopying::ReclaimPhase() {
   TimingLogger::ScopedTiming split("ReclaimPhase", GetTimings());
   if (kVerboseMode) {
@@ -1305,6 +1491,9 @@
     IssueEmptyCheckpoint();
     // Disable the check.
     is_mark_stack_push_disallowed_.StoreSequentiallyConsistent(0);
+    if (kUseBakerReadBarrier) {
+      updated_all_immune_objects_.StoreSequentiallyConsistent(false);
+    }
     CheckEmptyMarkStack();
   }
 
@@ -1317,7 +1506,9 @@
     uint64_t unevac_from_bytes = region_space_->GetBytesAllocatedInUnevacFromSpace();
     uint64_t unevac_from_objects = region_space_->GetObjectsAllocatedInUnevacFromSpace();
     uint64_t to_bytes = bytes_moved_.LoadSequentiallyConsistent();
+    cumulative_bytes_moved_.FetchAndAddRelaxed(to_bytes);
     uint64_t to_objects = objects_moved_.LoadSequentiallyConsistent();
+    cumulative_objects_moved_.FetchAndAddRelaxed(to_objects);
     if (kEnableFromSpaceAccountingCheck) {
       CHECK_EQ(from_space_num_objects_at_first_pause_, from_objects + unevac_from_objects);
       CHECK_EQ(from_space_num_bytes_at_first_pause_, from_bytes + unevac_from_bytes);
@@ -1344,31 +1535,19 @@
   }
 
   {
-    TimingLogger::ScopedTiming split3("ComputeUnevacFromSpaceLiveRatio", GetTimings());
-    ComputeUnevacFromSpaceLiveRatio();
-  }
-
-  {
     TimingLogger::ScopedTiming split4("ClearFromSpace", GetTimings());
     region_space_->ClearFromSpace();
   }
 
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    if (kUseBakerReadBarrier) {
-      ClearBlackPtrs();
-    }
     Sweep(false);
     SwapBitmaps();
     heap_->UnBindBitmaps();
 
-    // Remove bitmaps for the immune spaces.
-    while (!cc_bitmaps_.empty()) {
-      accounting::ContinuousSpaceBitmap* cc_bitmap = cc_bitmaps_.back();
-      cc_heap_bitmap_->RemoveContinuousSpaceBitmap(cc_bitmap);
-      delete cc_bitmap;
-      cc_bitmaps_.pop_back();
-    }
+    // Delete the region bitmap.
+    DCHECK(region_space_bitmap_ != nullptr);
+    delete region_space_bitmap_;
     region_space_bitmap_ = nullptr;
   }
 
@@ -1379,39 +1558,6 @@
   }
 }
 
-class ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor {
- public:
-  explicit ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor(ConcurrentCopying* cc)
-      : collector_(cc) {}
-  void operator()(mirror::Object* ref) const SHARED_REQUIRES(Locks::mutator_lock_)
-      SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
-    DCHECK(ref != nullptr);
-    DCHECK(collector_->region_space_bitmap_->Test(ref)) << ref;
-    DCHECK(collector_->region_space_->IsInUnevacFromSpace(ref)) << ref;
-    if (kUseBakerReadBarrier) {
-      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << ref;
-      // Clear the black ptr.
-      ref->AtomicSetReadBarrierPointer(ReadBarrier::BlackPtr(), ReadBarrier::WhitePtr());
-      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << ref;
-    }
-    size_t obj_size = ref->SizeOf();
-    size_t alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment);
-    collector_->region_space_->AddLiveBytes(ref, alloc_size);
-  }
-
- private:
-  ConcurrentCopying* const collector_;
-};
-
-// Compute how much live objects are left in regions.
-void ConcurrentCopying::ComputeUnevacFromSpaceLiveRatio() {
-  region_space_->AssertAllRegionLiveBytesZeroOrCleared();
-  ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor visitor(this);
-  region_space_bitmap_->VisitMarkedRange(reinterpret_cast<uintptr_t>(region_space_->Begin()),
-                                         reinterpret_cast<uintptr_t>(region_space_->Limit()),
-                                         visitor);
-}
-
 // Assert the to-space invariant.
 void ConcurrentCopying::AssertToSpaceInvariant(mirror::Object* obj, MemberOffset offset,
                                                mirror::Object* ref) {
@@ -1482,7 +1628,7 @@
         ArtMethod* method = gc_root_source->GetArtMethod();
         LOG(INTERNAL_FATAL) << "gc root in method " << method << " " << PrettyMethod(method);
         RootPrinter root_printer;
-        method->VisitRoots(root_printer, sizeof(void*));
+        method->VisitRoots(root_printer, kRuntimePointerSize);
       }
       ref->GetLockWord(false).Dump(LOG(INTERNAL_FATAL));
       region_space_->DumpNonFreeRegions(LOG(INTERNAL_FATAL));
@@ -1515,19 +1661,10 @@
     }
   } else {
     // In a non-moving space.
-    if (immune_region_.ContainsObject(obj)) {
-      LOG(INFO) << "holder is in the image or the zygote space.";
-      accounting::ContinuousSpaceBitmap* cc_bitmap =
-          cc_heap_bitmap_->GetContinuousSpaceBitmap(obj);
-      CHECK(cc_bitmap != nullptr)
-          << "An immune space object must have a bitmap.";
-      if (cc_bitmap->Test(obj)) {
-        LOG(INFO) << "holder is marked in the bit map.";
-      } else {
-        LOG(INFO) << "holder is NOT marked in the bit map.";
-      }
+    if (immune_spaces_.ContainsObject(obj)) {
+      LOG(INFO) << "holder is in an immune image or the zygote space.";
     } else {
-      LOG(INFO) << "holder is in a non-moving (or main) space.";
+      LOG(INFO) << "holder is in a non-immune, non-moving (or main) space.";
       accounting::ContinuousSpaceBitmap* mark_bitmap =
           heap_mark_bitmap_->GetContinuousSpaceBitmap(obj);
       accounting::LargeObjectBitmap* los_bitmap =
@@ -1555,18 +1692,18 @@
 void ConcurrentCopying::AssertToSpaceInvariantInNonMovingSpace(mirror::Object* obj,
                                                                mirror::Object* ref) {
   // In a non-moving spaces. Check that the ref is marked.
-  if (immune_region_.ContainsObject(ref)) {
-    accounting::ContinuousSpaceBitmap* cc_bitmap =
-        cc_heap_bitmap_->GetContinuousSpaceBitmap(ref);
-    CHECK(cc_bitmap != nullptr)
-        << "An immune space ref must have a bitmap. " << ref;
+  if (immune_spaces_.ContainsObject(ref)) {
     if (kUseBakerReadBarrier) {
-      CHECK(cc_bitmap->Test(ref))
+      // Immune object may not be gray if called from the GC.
+      if (Thread::Current() == thread_running_gc_ && !gc_grays_immune_objects_) {
+        return;
+      }
+      bool updated_all_immune_objects = updated_all_immune_objects_.LoadSequentiallyConsistent();
+      CHECK(updated_all_immune_objects || ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
           << "Unmarked immune space ref. obj=" << obj << " rb_ptr="
-          << obj->GetReadBarrierPointer() << " ref=" << ref;
-    } else {
-      CHECK(cc_bitmap->Test(ref))
-          << "Unmarked immune space ref. obj=" << obj << " ref=" << ref;
+          << (obj != nullptr ? obj->GetReadBarrierPointer() : nullptr)
+          << " ref=" << ref << " ref rb_ptr=" << ref->GetReadBarrierPointer()
+          << " updated_all_immune_objects=" << updated_all_immune_objects;
     }
   } else {
     accounting::ContinuousSpaceBitmap* mark_bitmap =
@@ -1589,9 +1726,9 @@
 }
 
 // Used to scan ref fields of an object.
-class ConcurrentCopyingRefFieldsVisitor {
+class ConcurrentCopying::RefFieldsVisitor {
  public:
-  explicit ConcurrentCopyingRefFieldsVisitor(ConcurrentCopying* collector)
+  explicit RefFieldsVisitor(ConcurrentCopying* collector)
       : collector_(collector) {}
 
   void operator()(mirror::Object* obj, MemberOffset offset, bool /* is_static */)
@@ -1607,6 +1744,7 @@
   }
 
   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+      ALWAYS_INLINE
       SHARED_REQUIRES(Locks::mutator_lock_) {
     if (!root->IsNull()) {
       VisitRoot(root);
@@ -1614,8 +1752,9 @@
   }
 
   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+      ALWAYS_INLINE
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    collector_->MarkRoot(root);
+    collector_->MarkRoot</*kGrayImmuneObject*/false>(root);
   }
 
  private:
@@ -1623,20 +1762,28 @@
 };
 
 // Scan ref fields of an object.
-void ConcurrentCopying::Scan(mirror::Object* to_ref) {
+inline void ConcurrentCopying::Scan(mirror::Object* to_ref) {
+  if (kDisallowReadBarrierDuringScan) {
+    // Avoid all read barriers during visit references to help performance.
+    Thread::Current()->ModifyDebugDisallowReadBarrier(1);
+  }
   DCHECK(!region_space_->IsInFromSpace(to_ref));
-  ConcurrentCopyingRefFieldsVisitor visitor(this);
-  to_ref->VisitReferences(visitor, visitor);
+  DCHECK_EQ(Thread::Current(), thread_running_gc_);
+  RefFieldsVisitor visitor(this);
+  // Disable the read barrier for a performance reason.
+  to_ref->VisitReferences</*kVisitNativeRoots*/true, kDefaultVerifyFlags, kWithoutReadBarrier>(
+      visitor, visitor);
+  if (kDisallowReadBarrierDuringScan) {
+    Thread::Current()->ModifyDebugDisallowReadBarrier(-1);
+  }
 }
 
 // Process a field.
 inline void ConcurrentCopying::Process(mirror::Object* obj, MemberOffset offset) {
+  DCHECK_EQ(Thread::Current(), thread_running_gc_);
   mirror::Object* ref = obj->GetFieldObject<
       mirror::Object, kVerifyNone, kWithoutReadBarrier, false>(offset);
-  if (ref == nullptr || region_space_->IsInToSpace(ref)) {
-    return;
-  }
-  mirror::Object* to_ref = Mark(ref);
+  mirror::Object* to_ref = Mark</*kGrayImmuneObject*/false>(ref);
   if (to_ref == ref) {
     return;
   }
@@ -1649,19 +1796,16 @@
       // It was updated by the mutator.
       break;
     }
-  } while (!obj->CasFieldWeakSequentiallyConsistentObjectWithoutWriteBarrier<
+  } while (!obj->CasFieldWeakRelaxedObjectWithoutWriteBarrier<
       false, false, kVerifyNone>(offset, expected_ref, new_ref));
 }
 
 // Process some roots.
-void ConcurrentCopying::VisitRoots(
+inline void ConcurrentCopying::VisitRoots(
     mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) {
   for (size_t i = 0; i < count; ++i) {
     mirror::Object** root = roots[i];
     mirror::Object* ref = *root;
-    if (ref == nullptr || region_space_->IsInToSpace(ref)) {
-      continue;
-    }
     mirror::Object* to_ref = Mark(ref);
     if (to_ref == ref) {
       continue;
@@ -1674,17 +1818,15 @@
         // It was updated by the mutator.
         break;
       }
-    } while (!addr->CompareExchangeWeakSequentiallyConsistent(expected_ref, new_ref));
+    } while (!addr->CompareExchangeWeakRelaxed(expected_ref, new_ref));
   }
 }
 
-void ConcurrentCopying::MarkRoot(mirror::CompressedReference<mirror::Object>* root) {
+template<bool kGrayImmuneObject>
+inline void ConcurrentCopying::MarkRoot(mirror::CompressedReference<mirror::Object>* root) {
   DCHECK(!root->IsNull());
   mirror::Object* const ref = root->AsMirrorPtr();
-  if (region_space_->IsInToSpace(ref)) {
-    return;
-  }
-  mirror::Object* to_ref = Mark(ref);
+  mirror::Object* to_ref = Mark<kGrayImmuneObject>(ref);
   if (to_ref != ref) {
     auto* addr = reinterpret_cast<Atomic<mirror::CompressedReference<mirror::Object>>*>(root);
     auto expected_ref = mirror::CompressedReference<mirror::Object>::FromMirrorPtr(ref);
@@ -1695,49 +1837,85 @@
         // It was updated by the mutator.
         break;
       }
-    } while (!addr->CompareExchangeWeakSequentiallyConsistent(expected_ref, new_ref));
+    } while (!addr->CompareExchangeWeakRelaxed(expected_ref, new_ref));
   }
 }
 
-void ConcurrentCopying::VisitRoots(
+inline void ConcurrentCopying::VisitRoots(
     mirror::CompressedReference<mirror::Object>** roots, size_t count,
     const RootInfo& info ATTRIBUTE_UNUSED) {
   for (size_t i = 0; i < count; ++i) {
     mirror::CompressedReference<mirror::Object>* const root = roots[i];
     if (!root->IsNull()) {
-      MarkRoot(root);
+      // kGrayImmuneObject is true because this is used for the thread flip.
+      MarkRoot</*kGrayImmuneObject*/true>(root);
     }
   }
 }
 
+// Temporary set gc_grays_immune_objects_ to true in a scope if the current thread is GC.
+class ConcurrentCopying::ScopedGcGraysImmuneObjects {
+ public:
+  explicit ScopedGcGraysImmuneObjects(ConcurrentCopying* collector)
+      : collector_(collector), enabled_(false) {
+    if (kUseBakerReadBarrier &&
+        collector_->thread_running_gc_ == Thread::Current() &&
+        !collector_->gc_grays_immune_objects_) {
+      collector_->gc_grays_immune_objects_ = true;
+      enabled_ = true;
+    }
+  }
+
+  ~ScopedGcGraysImmuneObjects() {
+    if (kUseBakerReadBarrier &&
+        collector_->thread_running_gc_ == Thread::Current() &&
+        enabled_) {
+      DCHECK(collector_->gc_grays_immune_objects_);
+      collector_->gc_grays_immune_objects_ = false;
+    }
+  }
+
+ private:
+  ConcurrentCopying* const collector_;
+  bool enabled_;
+};
+
 // Fill the given memory block with a dummy object. Used to fill in a
 // copy of objects that was lost in race.
 void ConcurrentCopying::FillWithDummyObject(mirror::Object* dummy_obj, size_t byte_size) {
+  // GC doesn't gray immune objects while scanning immune objects. But we need to trigger the read
+  // barriers here because we need the updated reference to the int array class, etc. Temporary set
+  // gc_grays_immune_objects_ to true so that we won't cause a DCHECK failure in MarkImmuneSpace().
+  ScopedGcGraysImmuneObjects scoped_gc_gray_immune_objects(this);
   CHECK_ALIGNED(byte_size, kObjectAlignment);
   memset(dummy_obj, 0, byte_size);
-  mirror::Class* int_array_class = mirror::IntArray::GetArrayClass();
+  // Avoid going through read barrier for since kDisallowReadBarrierDuringScan may be enabled.
+  // Explicitly mark to make sure to get an object in the to-space.
+  mirror::Class* int_array_class = down_cast<mirror::Class*>(
+      Mark(mirror::IntArray::GetArrayClass<kWithoutReadBarrier>()));
   CHECK(int_array_class != nullptr);
   AssertToSpaceInvariant(nullptr, MemberOffset(0), int_array_class);
-  size_t component_size = int_array_class->GetComponentSize();
+  size_t component_size = int_array_class->GetComponentSize<kWithoutReadBarrier>();
   CHECK_EQ(component_size, sizeof(int32_t));
   size_t data_offset = mirror::Array::DataOffset(component_size).SizeValue();
   if (data_offset > byte_size) {
     // An int array is too big. Use java.lang.Object.
     mirror::Class* java_lang_Object = WellKnownClasses::ToClass(WellKnownClasses::java_lang_Object);
     AssertToSpaceInvariant(nullptr, MemberOffset(0), java_lang_Object);
-    CHECK_EQ(byte_size, java_lang_Object->GetObjectSize());
+    CHECK_EQ(byte_size, (java_lang_Object->GetObjectSize<kVerifyNone, kWithoutReadBarrier>()));
     dummy_obj->SetClass(java_lang_Object);
-    CHECK_EQ(byte_size, dummy_obj->SizeOf());
+    CHECK_EQ(byte_size, (dummy_obj->SizeOf<kVerifyNone, kWithoutReadBarrier>()));
   } else {
     // Use an int array.
     dummy_obj->SetClass(int_array_class);
-    CHECK(dummy_obj->IsArrayInstance());
+    CHECK((dummy_obj->IsArrayInstance<kVerifyNone, kWithoutReadBarrier>()));
     int32_t length = (byte_size - data_offset) / component_size;
-    dummy_obj->AsArray()->SetLength(length);
-    CHECK_EQ(dummy_obj->AsArray()->GetLength(), length)
+    mirror::Array* dummy_arr = dummy_obj->AsArray<kVerifyNone, kWithoutReadBarrier>();
+    dummy_arr->SetLength(length);
+    CHECK_EQ(dummy_arr->GetLength(), length)
         << "byte_size=" << byte_size << " length=" << length
         << " component_size=" << component_size << " data_offset=" << data_offset;
-    CHECK_EQ(byte_size, dummy_obj->SizeOf())
+    CHECK_EQ(byte_size, (dummy_obj->SizeOf<kVerifyNone, kWithoutReadBarrier>()))
         << "byte_size=" << byte_size << " length=" << length
         << " component_size=" << component_size << " data_offset=" << data_offset;
   }
@@ -1749,14 +1927,16 @@
   CHECK_ALIGNED(alloc_size, space::RegionSpace::kAlignment);
   Thread* self = Thread::Current();
   size_t min_object_size = RoundUp(sizeof(mirror::Object), space::RegionSpace::kAlignment);
-  MutexLock mu(self, skipped_blocks_lock_);
-  auto it = skipped_blocks_map_.lower_bound(alloc_size);
-  if (it == skipped_blocks_map_.end()) {
-    // Not found.
-    return nullptr;
-  }
+  size_t byte_size;
+  uint8_t* addr;
   {
-    size_t byte_size = it->first;
+    MutexLock mu(self, skipped_blocks_lock_);
+    auto it = skipped_blocks_map_.lower_bound(alloc_size);
+    if (it == skipped_blocks_map_.end()) {
+      // Not found.
+      return nullptr;
+    }
+    byte_size = it->first;
     CHECK_GE(byte_size, alloc_size);
     if (byte_size > alloc_size && byte_size - alloc_size < min_object_size) {
       // If remainder would be too small for a dummy object, retry with a larger request size.
@@ -1769,27 +1949,33 @@
       CHECK_GE(it->first - alloc_size, min_object_size)
           << "byte_size=" << byte_size << " it->first=" << it->first << " alloc_size=" << alloc_size;
     }
+    // Found a block.
+    CHECK(it != skipped_blocks_map_.end());
+    byte_size = it->first;
+    addr = it->second;
+    CHECK_GE(byte_size, alloc_size);
+    CHECK(region_space_->IsInToSpace(reinterpret_cast<mirror::Object*>(addr)));
+    CHECK_ALIGNED(byte_size, space::RegionSpace::kAlignment);
+    if (kVerboseMode) {
+      LOG(INFO) << "Reusing skipped bytes : " << reinterpret_cast<void*>(addr) << ", " << byte_size;
+    }
+    skipped_blocks_map_.erase(it);
   }
-  // Found a block.
-  CHECK(it != skipped_blocks_map_.end());
-  size_t byte_size = it->first;
-  uint8_t* addr = it->second;
-  CHECK_GE(byte_size, alloc_size);
-  CHECK(region_space_->IsInToSpace(reinterpret_cast<mirror::Object*>(addr)));
-  CHECK_ALIGNED(byte_size, space::RegionSpace::kAlignment);
-  if (kVerboseMode) {
-    LOG(INFO) << "Reusing skipped bytes : " << reinterpret_cast<void*>(addr) << ", " << byte_size;
-  }
-  skipped_blocks_map_.erase(it);
   memset(addr, 0, byte_size);
   if (byte_size > alloc_size) {
     // Return the remainder to the map.
     CHECK_ALIGNED(byte_size - alloc_size, space::RegionSpace::kAlignment);
     CHECK_GE(byte_size - alloc_size, min_object_size);
+    // FillWithDummyObject may mark an object, avoid holding skipped_blocks_lock_ to prevent lock
+    // violation and possible deadlock. The deadlock case is a recursive case:
+    // FillWithDummyObject -> IntArray::GetArrayClass -> Mark -> Copy -> AllocateInSkippedBlock.
     FillWithDummyObject(reinterpret_cast<mirror::Object*>(addr + alloc_size),
                         byte_size - alloc_size);
     CHECK(region_space_->IsInToSpace(reinterpret_cast<mirror::Object*>(addr + alloc_size)));
-    skipped_blocks_map_.insert(std::make_pair(byte_size - alloc_size, addr + alloc_size));
+    {
+      MutexLock mu(self, skipped_blocks_lock_);
+      skipped_blocks_map_.insert(std::make_pair(byte_size - alloc_size, addr + alloc_size));
+    }
   }
   return reinterpret_cast<mirror::Object*>(addr);
 }
@@ -1947,22 +2133,9 @@
     }
   } else {
     // from_ref is in a non-moving space.
-    if (immune_region_.ContainsObject(from_ref)) {
-      accounting::ContinuousSpaceBitmap* cc_bitmap =
-          cc_heap_bitmap_->GetContinuousSpaceBitmap(from_ref);
-      DCHECK(cc_bitmap != nullptr)
-          << "An immune space object must have a bitmap";
-      if (kIsDebugBuild) {
-        DCHECK(heap_mark_bitmap_->GetContinuousSpaceBitmap(from_ref)->Test(from_ref))
-            << "Immune space object must be already marked";
-      }
-      if (cc_bitmap->Test(from_ref)) {
-        // Already marked.
-        to_ref = from_ref;
-      } else {
-        // Newly marked.
-        to_ref = nullptr;
-      }
+    if (immune_spaces_.ContainsObject(from_ref)) {
+      // An immune object is alive.
+      to_ref = from_ref;
     } else {
       // Non-immune non-moving space. Use the mark bitmap.
       accounting::ContinuousSpaceBitmap* mark_bitmap =
@@ -1998,153 +2171,87 @@
   return alloc_stack->Contains(ref);
 }
 
-mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) {
-  if (from_ref == nullptr) {
-    return nullptr;
-  }
-  DCHECK(from_ref != nullptr);
-  DCHECK(heap_->collector_type_ == kCollectorTypeCC);
-  if (kUseBakerReadBarrier && !is_active_) {
-    // In the lock word forward address state, the read barrier bits
-    // in the lock word are part of the stored forwarding address and
-    // invalid. This is usually OK as the from-space copy of objects
-    // aren't accessed by mutators due to the to-space
-    // invariant. However, during the dex2oat image writing relocation
-    // and the zygote compaction, objects can be in the forward
-    // address state (to store the forward/relocation addresses) and
-    // they can still be accessed and the invalid read barrier bits
-    // are consulted. If they look like gray but aren't really, the
-    // read barriers slow path can trigger when it shouldn't. To guard
-    // against this, return here if the CC collector isn't running.
-    return from_ref;
-  }
-  DCHECK(region_space_ != nullptr) << "Read barrier slow path taken when CC isn't running?";
-  space::RegionSpace::RegionType rtype = region_space_->GetRegionType(from_ref);
-  if (rtype == space::RegionSpace::RegionType::kRegionTypeToSpace) {
-    // It's already marked.
-    return from_ref;
-  }
-  mirror::Object* to_ref;
-  if (rtype == space::RegionSpace::RegionType::kRegionTypeFromSpace) {
-    to_ref = GetFwdPtr(from_ref);
+mirror::Object* ConcurrentCopying::MarkNonMoving(mirror::Object* ref) {
+  // ref is in a non-moving space (from_ref == to_ref).
+  DCHECK(!region_space_->HasAddress(ref)) << ref;
+  DCHECK(!immune_spaces_.ContainsObject(ref));
+  // Use the mark bitmap.
+  accounting::ContinuousSpaceBitmap* mark_bitmap =
+      heap_mark_bitmap_->GetContinuousSpaceBitmap(ref);
+  accounting::LargeObjectBitmap* los_bitmap =
+      heap_mark_bitmap_->GetLargeObjectBitmap(ref);
+  CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
+  bool is_los = mark_bitmap == nullptr;
+  if (!is_los && mark_bitmap->Test(ref)) {
+    // Already marked.
     if (kUseBakerReadBarrier) {
-      DCHECK(to_ref != ReadBarrier::GrayPtr()) << "from_ref=" << from_ref << " to_ref=" << to_ref;
+      DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
+             ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
     }
-    if (to_ref == nullptr) {
-      // It isn't marked yet. Mark it by copying it to the to-space.
-      to_ref = Copy(from_ref);
-    }
-    DCHECK(region_space_->IsInToSpace(to_ref) || heap_->non_moving_space_->HasAddress(to_ref))
-        << "from_ref=" << from_ref << " to_ref=" << to_ref;
-  } else if (rtype == space::RegionSpace::RegionType::kRegionTypeUnevacFromSpace) {
-    // This may or may not succeed, which is ok.
+  } else if (is_los && los_bitmap->Test(ref)) {
+    // Already marked in LOS.
     if (kUseBakerReadBarrier) {
-      from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
-    }
-    if (region_space_bitmap_->AtomicTestAndSet(from_ref)) {
-      // Already marked.
-      to_ref = from_ref;
-    } else {
-      // Newly marked.
-      to_ref = from_ref;
-      if (kUseBakerReadBarrier) {
-        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
-      }
-      PushOntoMarkStack(to_ref);
+      DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
+             ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
     }
   } else {
-    // from_ref is in a non-moving space.
-    DCHECK(!region_space_->HasAddress(from_ref)) << from_ref;
-    if (immune_region_.ContainsObject(from_ref)) {
-      accounting::ContinuousSpaceBitmap* cc_bitmap =
-          cc_heap_bitmap_->GetContinuousSpaceBitmap(from_ref);
-      DCHECK(cc_bitmap != nullptr)
-          << "An immune space object must have a bitmap";
-      if (kIsDebugBuild) {
-        DCHECK(heap_mark_bitmap_->GetContinuousSpaceBitmap(from_ref)->Test(from_ref))
-            << "Immune space object must be already marked";
-      }
-      // This may or may not succeed, which is ok.
-      if (kUseBakerReadBarrier) {
-        from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
-      }
-      if (cc_bitmap->AtomicTestAndSet(from_ref)) {
-        // Already marked.
-        to_ref = from_ref;
+    // Not marked.
+    if (IsOnAllocStack(ref)) {
+      // If it's on the allocation stack, it's considered marked. Keep it white.
+      // Objects on the allocation stack need not be marked.
+      if (!is_los) {
+        DCHECK(!mark_bitmap->Test(ref));
       } else {
-        // Newly marked.
-        to_ref = from_ref;
-        if (kUseBakerReadBarrier) {
-          DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
-        }
-        PushOntoMarkStack(to_ref);
+        DCHECK(!los_bitmap->Test(ref));
+      }
+      if (kUseBakerReadBarrier) {
+        DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
       }
     } else {
-      // Use the mark bitmap.
-      accounting::ContinuousSpaceBitmap* mark_bitmap =
-          heap_mark_bitmap_->GetContinuousSpaceBitmap(from_ref);
-      accounting::LargeObjectBitmap* los_bitmap =
-          heap_mark_bitmap_->GetLargeObjectBitmap(from_ref);
-      CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
-      bool is_los = mark_bitmap == nullptr;
-      if (!is_los && mark_bitmap->Test(from_ref)) {
-        // Already marked.
-        to_ref = from_ref;
-        if (kUseBakerReadBarrier) {
-          DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-                 to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+      // For the baker-style RB, we need to handle 'false-gray' cases. See the
+      // kRegionTypeUnevacFromSpace-case comment in Mark().
+      if (kUseBakerReadBarrier) {
+        // Test the bitmap first to reduce the chance of false gray cases.
+        if ((!is_los && mark_bitmap->Test(ref)) ||
+            (is_los && los_bitmap->Test(ref))) {
+          return ref;
         }
-      } else if (is_los && los_bitmap->Test(from_ref)) {
+      }
+      // Not marked or on the allocation stack. Try to mark it.
+      // This may or may not succeed, which is ok.
+      bool cas_success = false;
+      if (kUseBakerReadBarrier) {
+        cas_success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
+                                                       ReadBarrier::GrayPtr());
+      }
+      if (!is_los && mark_bitmap->AtomicTestAndSet(ref)) {
+        // Already marked.
+        if (kUseBakerReadBarrier && cas_success &&
+            ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+          PushOntoFalseGrayStack(ref);
+        }
+      } else if (is_los && los_bitmap->AtomicTestAndSet(ref)) {
         // Already marked in LOS.
-        to_ref = from_ref;
-        if (kUseBakerReadBarrier) {
-          DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-                 to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+        if (kUseBakerReadBarrier && cas_success &&
+            ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+          PushOntoFalseGrayStack(ref);
         }
       } else {
-        // Not marked.
-        if (IsOnAllocStack(from_ref)) {
-          // If it's on the allocation stack, it's considered marked. Keep it white.
-          to_ref = from_ref;
-          // Objects on the allocation stack need not be marked.
-          if (!is_los) {
-            DCHECK(!mark_bitmap->Test(to_ref));
-          } else {
-            DCHECK(!los_bitmap->Test(to_ref));
-          }
-          if (kUseBakerReadBarrier) {
-            DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
-          }
-        } else {
-          // Not marked or on the allocation stack. Try to mark it.
-          // This may or may not succeed, which is ok.
-          if (kUseBakerReadBarrier) {
-            from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
-          }
-          if (!is_los && mark_bitmap->AtomicTestAndSet(from_ref)) {
-            // Already marked.
-            to_ref = from_ref;
-          } else if (is_los && los_bitmap->AtomicTestAndSet(from_ref)) {
-            // Already marked in LOS.
-            to_ref = from_ref;
-          } else {
-            // Newly marked.
-            to_ref = from_ref;
-            if (kUseBakerReadBarrier) {
-              DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
-            }
-            PushOntoMarkStack(to_ref);
-          }
+        // Newly marked.
+        if (kUseBakerReadBarrier) {
+          DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
         }
+        PushOntoMarkStack(ref);
       }
     }
   }
-  return to_ref;
+  return ref;
 }
 
 void ConcurrentCopying::FinishPhase() {
+  Thread* const self = Thread::Current();
   {
-    MutexLock mu(Thread::Current(), mark_stack_lock_);
+    MutexLock mu(self, mark_stack_lock_);
     CHECK_EQ(pooled_mark_stacks_.size(), kMarkStackPoolSize);
   }
   region_space_ = nullptr;
@@ -2152,8 +2259,41 @@
     MutexLock mu(Thread::Current(), skipped_blocks_lock_);
     skipped_blocks_map_.clear();
   }
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
-  heap_->ClearMarkedObjects();
+  {
+    ReaderMutexLock mu(self, *Locks::mutator_lock_);
+    {
+      WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_);
+      heap_->ClearMarkedObjects();
+    }
+    if (kUseBakerReadBarrier && kFilterModUnionCards) {
+      TimingLogger::ScopedTiming split("FilterModUnionCards", GetTimings());
+      ReaderMutexLock mu2(self, *Locks::heap_bitmap_lock_);
+      gc::Heap* const heap = Runtime::Current()->GetHeap();
+      for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) {
+        DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+        accounting::ModUnionTable* table = heap->FindModUnionTableFromSpace(space);
+        // Filter out cards that don't need to be set.
+        if (table != nullptr) {
+          table->FilterCards();
+        }
+      }
+    }
+    if (kUseBakerReadBarrier) {
+      TimingLogger::ScopedTiming split("EmptyRBMarkBitStack", GetTimings());
+      DCHECK(rb_mark_bit_stack_.get() != nullptr);
+      const auto* limit = rb_mark_bit_stack_->End();
+      for (StackReference<mirror::Object>* it = rb_mark_bit_stack_->Begin(); it != limit; ++it) {
+        CHECK(it->AsMirrorPtr()->AtomicSetMarkBit(1, 0));
+      }
+      rb_mark_bit_stack_->Reset();
+    }
+  }
+  if (measure_read_barrier_slow_path_) {
+    MutexLock mu(self, rb_slow_path_histogram_lock_);
+    rb_slow_path_time_histogram_.AdjustAndAddValue(rb_slow_path_ns_.LoadRelaxed());
+    rb_slow_path_count_total_ += rb_slow_path_count_.LoadRelaxed();
+    rb_slow_path_count_gc_total_ += rb_slow_path_count_gc_.LoadRelaxed();
+  }
 }
 
 bool ConcurrentCopying::IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* field) {
@@ -2191,6 +2331,39 @@
   region_space_->RevokeAllThreadLocalBuffers();
 }
 
+mirror::Object* ConcurrentCopying::MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref) {
+  if (Thread::Current() != thread_running_gc_) {
+    rb_slow_path_count_.FetchAndAddRelaxed(1u);
+  } else {
+    rb_slow_path_count_gc_.FetchAndAddRelaxed(1u);
+  }
+  ScopedTrace tr(__FUNCTION__);
+  const uint64_t start_time = measure_read_barrier_slow_path_ ? NanoTime() : 0u;
+  mirror::Object* ret = Mark(from_ref);
+  if (measure_read_barrier_slow_path_) {
+    rb_slow_path_ns_.FetchAndAddRelaxed(NanoTime() - start_time);
+  }
+  return ret;
+}
+
+void ConcurrentCopying::DumpPerformanceInfo(std::ostream& os) {
+  GarbageCollector::DumpPerformanceInfo(os);
+  MutexLock mu(Thread::Current(), rb_slow_path_histogram_lock_);
+  if (rb_slow_path_time_histogram_.SampleSize() > 0) {
+    Histogram<uint64_t>::CumulativeData cumulative_data;
+    rb_slow_path_time_histogram_.CreateHistogram(&cumulative_data);
+    rb_slow_path_time_histogram_.PrintConfidenceIntervals(os, 0.99, cumulative_data);
+  }
+  if (rb_slow_path_count_total_ > 0) {
+    os << "Slow path count " << rb_slow_path_count_total_ << "\n";
+  }
+  if (rb_slow_path_count_gc_total_ > 0) {
+    os << "GC slow path count " << rb_slow_path_count_gc_total_ << "\n";
+  }
+  os << "Cumulative bytes moved " << cumulative_bytes_moved_.LoadRelaxed() << "\n";
+  os << "Cumulative objects moved " << cumulative_objects_moved_.LoadRelaxed() << "\n";
+}
+
 }  // namespace collector
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 8efad73..5b0e2d6 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -19,7 +19,7 @@
 
 #include "barrier.h"
 #include "garbage_collector.h"
-#include "immune_region.h"
+#include "immune_spaces.h"
 #include "jni.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -51,23 +51,34 @@
 
 class ConcurrentCopying : public GarbageCollector {
  public:
-  // TODO: disable thse flags for production use.
   // Enable the no-from-space-refs verification at the pause.
-  static constexpr bool kEnableNoFromSpaceRefsVerification = true;
+  static constexpr bool kEnableNoFromSpaceRefsVerification = kIsDebugBuild;
   // Enable the from-space bytes/objects check.
-  static constexpr bool kEnableFromSpaceAccountingCheck = true;
+  static constexpr bool kEnableFromSpaceAccountingCheck = kIsDebugBuild;
   // Enable verbose mode.
-  static constexpr bool kVerboseMode = true;
+  static constexpr bool kVerboseMode = false;
+  // If kGrayDirtyImmuneObjects is true then we gray dirty objects in the GC pause to prevent dirty
+  // pages.
+  static constexpr bool kGrayDirtyImmuneObjects = true;
 
-  ConcurrentCopying(Heap* heap, const std::string& name_prefix = "");
+  ConcurrentCopying(Heap* heap,
+                    const std::string& name_prefix = "",
+                    bool measure_read_barrier_slow_path = false);
   ~ConcurrentCopying();
 
-  virtual void RunPhases() OVERRIDE REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
-  void InitializePhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
+  virtual void RunPhases() OVERRIDE
+      REQUIRES(!immune_gray_stack_lock_,
+               !mark_stack_lock_,
+               !rb_slow_path_histogram_lock_,
+               !skipped_blocks_lock_);
+  void InitializePhase() SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !immune_gray_stack_lock_);
   void MarkingPhase() SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   void ReclaimPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
-  void FinishPhase() REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+  void FinishPhase() REQUIRES(!mark_stack_lock_,
+                              !rb_slow_path_histogram_lock_,
+                              !skipped_blocks_lock_);
 
   void BindBitmaps() SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::heap_bitmap_lock_);
@@ -93,8 +104,13 @@
     DCHECK(ref != nullptr);
     return IsMarked(ref) == ref;
   }
-  mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+  template<bool kGrayImmuneObject = true>
+  ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+  ALWAYS_INLINE mirror::Object* MarkFromReadBarrier(mirror::Object* from_ref)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   bool IsMarking() const {
     return is_marking_;
   }
@@ -114,20 +130,23 @@
   void PushOntoMarkStack(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
   mirror::Object* Copy(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!skipped_blocks_lock_, !mark_stack_lock_);
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   void Scan(mirror::Object* to_ref) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
   void Process(mirror::Object* obj, MemberOffset offset)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_ , !skipped_blocks_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_ , !skipped_blocks_lock_, !immune_gray_stack_lock_);
   virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+  template<bool kGrayImmuneObject>
   void MarkRoot(mirror::CompressedReference<mirror::Object>* root)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
                           const RootInfo& info)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   void VerifyNoFromSpaceReferences() REQUIRES(Locks::mutator_lock_);
   accounting::ObjectStack* GetAllocationStack();
   accounting::ObjectStack* GetLiveStack();
@@ -136,6 +155,12 @@
   bool ProcessMarkStackOnce() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
   void ProcessMarkStackRef(mirror::Object* to_ref) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
+  void GrayAllDirtyImmuneObjects()
+      REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
+  void VerifyGrayImmuneObjects()
+      REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
   size_t ProcessThreadLocalMarkStacks(bool disable_weak_ref_access)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
   void RevokeThreadLocalMarkStacks(bool disable_weak_ref_access)
@@ -147,9 +172,11 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   void ProcessReferences(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
   virtual mirror::Object* MarkObject(mirror::Object* from_ref) OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* from_ref) OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   virtual mirror::Object* IsMarked(mirror::Object* from_ref) OVERRIDE
       SHARED_REQUIRES(Locks::mutator_lock_);
   virtual bool IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* field) OVERRIDE
@@ -160,12 +187,14 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_);
   void SweepLargeObjects(bool swap_bitmaps)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
-  void ClearBlackPtrs()
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
+  void MarkZygoteLargeObjects()
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void FillWithDummyObject(mirror::Object* dummy_obj, size_t byte_size)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::Object* AllocateInSkippedBlock(size_t alloc_size)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!skipped_blocks_lock_);
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void CheckEmptyMarkStack() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
   void IssueEmptyCheckpoint() SHARED_REQUIRES(Locks::mutator_lock_);
   bool IsOnAllocStack(mirror::Object* ref) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -182,10 +211,33 @@
   void ReenableWeakRefAccess(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
   void DisableMarking() SHARED_REQUIRES(Locks::mutator_lock_);
   void IssueDisableMarkingCheckpoint() SHARED_REQUIRES(Locks::mutator_lock_);
+  void ExpandGcMarkStack() SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::Object* MarkNonMoving(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+  ALWAYS_INLINE mirror::Object* MarkUnevacFromSpaceRegion(mirror::Object* from_ref,
+      accounting::SpaceBitmap<kObjectAlignment>* bitmap)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+  template<bool kGrayImmuneObject>
+  ALWAYS_INLINE mirror::Object* MarkImmuneSpace(mirror::Object* from_ref)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!immune_gray_stack_lock_);
+  void PushOntoFalseGrayStack(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
+  void ProcessFalseGrayStack() SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
+  void ScanImmuneObject(mirror::Object* obj)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
+  mirror::Object* MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+  void DumpPerformanceInfo(std::ostream& os) OVERRIDE REQUIRES(!rb_slow_path_histogram_lock_);
 
   space::RegionSpace* region_space_;      // The underlying region space.
   std::unique_ptr<Barrier> gc_barrier_;
   std::unique_ptr<accounting::ObjectStack> gc_mark_stack_;
+  std::unique_ptr<accounting::ObjectStack> rb_mark_bit_stack_;
+  bool rb_mark_bit_stack_full_;
+  std::vector<mirror::Object*> false_gray_stack_ GUARDED_BY(mark_stack_lock_);
   Mutex mark_stack_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::vector<accounting::ObjectStack*> revoked_mark_stacks_
       GUARDED_BY(mark_stack_lock_);
@@ -197,9 +249,7 @@
   bool is_marking_;                       // True while marking is ongoing.
   bool is_active_;                        // True while the collection is ongoing.
   bool is_asserting_to_space_invariant_;  // True while asserting the to-space invariant.
-  ImmuneRegion immune_region_;
-  std::unique_ptr<accounting::HeapBitmap> cc_heap_bitmap_;
-  std::vector<accounting::SpaceBitmap<kObjectAlignment>*> cc_bitmaps_;
+  ImmuneSpaces immune_spaces_;
   accounting::SpaceBitmap<kObjectAlignment>* region_space_bitmap_;
   // A cache of Heap::GetMarkBitmap().
   accounting::HeapBitmap* heap_mark_bitmap_;
@@ -222,6 +272,8 @@
   // How many objects and bytes we moved. Used for accounting.
   Atomic<size_t> bytes_moved_;
   Atomic<size_t> objects_moved_;
+  Atomic<uint64_t> cumulative_bytes_moved_;
+  Atomic<uint64_t> cumulative_objects_moved_;
 
   // The skipped blocks are memory blocks/chucks that were copies of
   // objects that were unused due to lost races (cas failures) at
@@ -231,19 +283,45 @@
   Atomic<size_t> to_space_bytes_skipped_;
   Atomic<size_t> to_space_objects_skipped_;
 
+  // If measure_read_barrier_slow_path_ is true, we count how long is spent in MarkFromReadBarrier
+  // and also log.
+  bool measure_read_barrier_slow_path_;
+  // mark_from_read_barrier_measurements_ is true if systrace is enabled or
+  // measure_read_barrier_time_ is true.
+  bool mark_from_read_barrier_measurements_;
+  Atomic<uint64_t> rb_slow_path_ns_;
+  Atomic<uint64_t> rb_slow_path_count_;
+  Atomic<uint64_t> rb_slow_path_count_gc_;
+  mutable Mutex rb_slow_path_histogram_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  Histogram<uint64_t> rb_slow_path_time_histogram_ GUARDED_BY(rb_slow_path_histogram_lock_);
+  uint64_t rb_slow_path_count_total_ GUARDED_BY(rb_slow_path_histogram_lock_);
+  uint64_t rb_slow_path_count_gc_total_ GUARDED_BY(rb_slow_path_histogram_lock_);
+
   accounting::ReadBarrierTable* rb_table_;
   bool force_evacuate_all_;  // True if all regions are evacuated.
+  Atomic<bool> updated_all_immune_objects_;
+  bool gc_grays_immune_objects_;
+  Mutex immune_gray_stack_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  std::vector<mirror::Object*> immune_gray_stack_ GUARDED_BY(immune_gray_stack_lock_);
 
-  friend class ConcurrentCopyingRefFieldsVisitor;
-  friend class ConcurrentCopyingImmuneSpaceObjVisitor;
-  friend class ConcurrentCopyingVerifyNoFromSpaceRefsVisitor;
-  friend class ConcurrentCopyingVerifyNoFromSpaceRefsObjectVisitor;
-  friend class ConcurrentCopyingClearBlackPtrsVisitor;
-  friend class ConcurrentCopyingLostCopyVisitor;
-  friend class ThreadFlipVisitor;
-  friend class FlipCallback;
-  friend class ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor;
-  friend class RevokeThreadLocalMarkStackCheckpoint;
+  class AssertToSpaceInvariantFieldVisitor;
+  class AssertToSpaceInvariantObjectVisitor;
+  class AssertToSpaceInvariantRefsVisitor;
+  class ClearBlackPtrsVisitor;
+  class ComputeUnevacFromSpaceLiveRatioVisitor;
+  class DisableMarkingCheckpoint;
+  class FlipCallback;
+  class GrayImmuneObjectVisitor;
+  class ImmuneSpaceScanObjVisitor;
+  class LostCopyVisitor;
+  class RefFieldsVisitor;
+  class RevokeThreadLocalMarkStackCheckpoint;
+  class ScopedGcGraysImmuneObjects;
+  class ThreadFlipVisitor;
+  class VerifyGrayImmuneObjectsVisitor;
+  class VerifyNoFromSpaceRefsFieldVisitor;
+  class VerifyNoFromSpaceRefsObjectVisitor;
+  class VerifyNoFromSpaceRefsVisitor;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(ConcurrentCopying);
 };
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index afd0a30..18c4adf 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -18,13 +18,11 @@
 
 #include "garbage_collector.h"
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
-#include "cutils/trace.h"
-
 #include "base/dumpable.h"
 #include "base/histogram-inl.h"
 #include "base/logging.h"
 #include "base/mutex-inl.h"
+#include "base/systrace.h"
 #include "base/time_utils.h"
 #include "gc/accounting/heap_bitmap.h"
 #include "gc/space/large_object_space.h"
@@ -81,7 +79,7 @@
 }
 
 void GarbageCollector::Run(GcCause gc_cause, bool clear_soft_references) {
-  ATRACE_BEGIN(StringPrintf("%s %s GC", PrettyCause(gc_cause), GetName()).c_str());
+  ScopedTrace trace(StringPrintf("%s %s GC", PrettyCause(gc_cause), GetName()));
   Thread* self = Thread::Current();
   uint64_t start_time = NanoTime();
   Iteration* current_iteration = GetCurrentIteration();
@@ -107,7 +105,6 @@
     MutexLock mu(self, pause_histogram_lock_);
     pause_histogram_.AdjustAndAddValue(pause_time);
   }
-  ATRACE_END();
 }
 
 void GarbageCollector::SwapBitmaps() {
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 954c80e..e0b71a7 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -153,7 +153,9 @@
   void ResetCumulativeStatistics() REQUIRES(!pause_histogram_lock_);
   // Swap the live and mark bitmaps of spaces that are active for the collector. For partial GC,
   // this is the allocation space, for full GC then we swap the zygote bitmaps too.
-  void SwapBitmaps() REQUIRES(Locks::heap_bitmap_lock_);
+  void SwapBitmaps()
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   uint64_t GetTotalPausedTimeNs() REQUIRES(!pause_histogram_lock_);
   int64_t GetTotalFreedBytes() const {
     return total_freed_bytes_;
@@ -179,7 +181,7 @@
   void RecordFree(const ObjectBytePair& freed);
   // Record a free of large objects.
   void RecordFreeLOS(const ObjectBytePair& freed);
-  void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_);
+  virtual void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_);
 
   // Helper functions for querying if objects are marked. These are used for processing references,
   // and will be used for reading system weaks while the GC is running.
diff --git a/runtime/gc/collector/immune_region.cc b/runtime/gc/collector/immune_region.cc
index 3e1c944..8a04c17 100644
--- a/runtime/gc/collector/immune_region.cc
+++ b/runtime/gc/collector/immune_region.cc
@@ -32,39 +32,6 @@
   SetEnd(nullptr);
 }
 
-bool ImmuneRegion::AddContinuousSpace(space::ContinuousSpace* space) {
-  // Bind live to mark bitmap if necessary.
-  if (space->GetLiveBitmap() != space->GetMarkBitmap()) {
-    CHECK(space->IsContinuousMemMapAllocSpace());
-    space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
-  }
-  mirror::Object* space_begin = reinterpret_cast<mirror::Object*>(space->Begin());
-  mirror::Object* space_limit = reinterpret_cast<mirror::Object*>(space->Limit());
-  if (IsEmpty()) {
-    SetBegin(space_begin);
-    SetEnd(space_limit);
-  } else {
-    if (space_limit <= begin_) {  // Space is before the immune region.
-      SetBegin(space_begin);
-    } else if (space_begin >= end_) {  // Space is after the immune region.
-      SetEnd(space_limit);
-    } else {
-      return false;
-    }
-  }
-  return true;
-}
-
-bool ImmuneRegion::ContainsSpace(const space::ContinuousSpace* space) const {
-  bool contains =
-      begin_ <= reinterpret_cast<mirror::Object*>(space->Begin()) &&
-      end_ >= reinterpret_cast<mirror::Object*>(space->Limit());
-  if (kIsDebugBuild && contains) {
-    // A bump pointer space shoult not be in the immune region.
-    DCHECK(space->GetType() != space::kSpaceTypeBumpPointerSpace);
-  }
-  return contains;
-}
 
 }  // namespace collector
 }  // namespace gc
diff --git a/runtime/gc/collector/immune_region.h b/runtime/gc/collector/immune_region.h
index 3ead501..c9ac435 100644
--- a/runtime/gc/collector/immune_region.h
+++ b/runtime/gc/collector/immune_region.h
@@ -39,35 +39,38 @@
 class ImmuneRegion {
  public:
   ImmuneRegion();
+
   void Reset();
-  bool AddContinuousSpace(space::ContinuousSpace* space)
-      REQUIRES(Locks::heap_bitmap_lock_);
-  bool ContainsSpace(const space::ContinuousSpace* space) const;
+
   // Returns true if an object is inside of the immune region (assumed to be marked).
-  bool ContainsObject(const mirror::Object* obj) const ALWAYS_INLINE {
+  ALWAYS_INLINE bool ContainsObject(const mirror::Object* obj) const {
     // Note: Relies on integer underflow behavior.
     return reinterpret_cast<uintptr_t>(obj) - reinterpret_cast<uintptr_t>(begin_) < size_;
   }
+
   void SetBegin(mirror::Object* begin) {
     begin_ = begin;
     UpdateSize();
   }
+
   void SetEnd(mirror::Object* end) {
     end_ = end;
     UpdateSize();
   }
 
-  mirror::Object* Begin() {
+  mirror::Object* Begin() const {
     return begin_;
   }
-  mirror::Object* End() {
+
+  mirror::Object* End() const {
     return end_;
   }
 
- private:
-  bool IsEmpty() const {
-    return size_ == 0;
+  size_t Size() const {
+    return size_;
   }
+
+ private:
   void UpdateSize() {
     size_ = reinterpret_cast<uintptr_t>(end_) - reinterpret_cast<uintptr_t>(begin_);
   }
diff --git a/runtime/gc/collector/immune_spaces.cc b/runtime/gc/collector/immune_spaces.cc
new file mode 100644
index 0000000..1e5f283
--- /dev/null
+++ b/runtime/gc/collector/immune_spaces.cc
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "immune_spaces.h"
+
+#include <vector>
+#include <tuple>
+
+#include "gc/space/space-inl.h"
+#include "mirror/object.h"
+#include "oat_file.h"
+
+namespace art {
+namespace gc {
+namespace collector {
+
+void ImmuneSpaces::Reset() {
+  spaces_.clear();
+  largest_immune_region_.Reset();
+}
+
+void ImmuneSpaces::CreateLargestImmuneRegion() {
+  uintptr_t best_begin = 0u;
+  uintptr_t best_end = 0u;
+  uintptr_t best_heap_size = 0u;
+  uintptr_t cur_begin = 0u;
+  uintptr_t cur_end = 0u;
+  uintptr_t cur_heap_size = 0u;
+  using Interval = std::tuple</*start*/uintptr_t, /*end*/uintptr_t, /*is_heap*/bool>;
+  std::vector<Interval> intervals;
+  for (space::ContinuousSpace* space : GetSpaces()) {
+    uintptr_t space_begin = reinterpret_cast<uintptr_t>(space->Begin());
+    uintptr_t space_end = reinterpret_cast<uintptr_t>(space->Limit());
+    if (space->IsImageSpace()) {
+      // For the boot image, the boot oat file is always directly after. For app images it may not
+      // be if the app image was mapped at a random address.
+      space::ImageSpace* image_space = space->AsImageSpace();
+      // Update the end to include the other non-heap sections.
+      space_end = RoundUp(reinterpret_cast<uintptr_t>(image_space->GetImageEnd()), kPageSize);
+      // For the app image case, GetOatFileBegin is where the oat file was mapped during image
+      // creation, the actual oat file could be somewhere else.
+      const OatFile* const image_oat_file = image_space->GetOatFile();
+      if (image_oat_file != nullptr) {
+        intervals.push_back(Interval(reinterpret_cast<uintptr_t>(image_oat_file->Begin()),
+                                     reinterpret_cast<uintptr_t>(image_oat_file->End()),
+                                     /*image*/false));
+      }
+    }
+    intervals.push_back(Interval(space_begin, space_end, /*is_heap*/true));
+  }
+  std::sort(intervals.begin(), intervals.end());
+  // Intervals are already sorted by begin, if a new interval begins at the end of the current
+  // region then we append, otherwise we restart the current interval. To prevent starting an
+  // interval on an oat file, ignore oat files that are not extending an existing interval.
+  // If the total number of image bytes in the current interval is larger than the current best
+  // one, then we set the best one to be the current one.
+  for (const Interval& interval : intervals) {
+    const uintptr_t begin = std::get<0>(interval);
+    const uintptr_t end = std::get<1>(interval);
+    const bool is_heap = std::get<2>(interval);
+    VLOG(collector) << "Interval " << reinterpret_cast<const void*>(begin) << "-"
+                    << reinterpret_cast<const void*>(end) << " is_heap=" << is_heap;
+    DCHECK_GE(end, begin);
+    DCHECK_GE(begin, cur_end);
+    // New interval is not at the end of the current one, start a new interval if we are a heap
+    // interval. Otherwise continue since we never start a new region with non image intervals.
+    if (begin != cur_end) {
+      if (!is_heap) {
+        continue;
+      }
+      // Not extending, reset the region.
+      cur_begin = begin;
+      cur_heap_size = 0;
+    }
+    cur_end = end;
+    if (is_heap) {
+      // Only update if the total number of image bytes is greater than the current best one.
+      // We don't want to count the oat file bytes since these contain no java objects.
+      cur_heap_size += end - begin;
+      if (cur_heap_size > best_heap_size) {
+        best_begin = cur_begin;
+        best_end = cur_end;
+        best_heap_size = cur_heap_size;
+      }
+    }
+  }
+  largest_immune_region_.SetBegin(reinterpret_cast<mirror::Object*>(best_begin));
+  largest_immune_region_.SetEnd(reinterpret_cast<mirror::Object*>(best_end));
+  VLOG(collector) << "Immune region " << largest_immune_region_.Begin() << "-"
+                  << largest_immune_region_.End();
+}
+
+void ImmuneSpaces::AddSpace(space::ContinuousSpace* space) {
+  DCHECK(spaces_.find(space) == spaces_.end()) << *space;
+  // Bind live to mark bitmap if necessary.
+  if (space->GetLiveBitmap() != space->GetMarkBitmap()) {
+    CHECK(space->IsContinuousMemMapAllocSpace());
+    space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
+  }
+  spaces_.insert(space);
+  CreateLargestImmuneRegion();
+}
+
+bool ImmuneSpaces::CompareByBegin::operator()(space::ContinuousSpace* a, space::ContinuousSpace* b)
+    const {
+  return a->Begin() < b->Begin();
+}
+
+bool ImmuneSpaces::ContainsSpace(space::ContinuousSpace* space) const {
+  return spaces_.find(space) != spaces_.end();
+}
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/collector/immune_spaces.h b/runtime/gc/collector/immune_spaces.h
new file mode 100644
index 0000000..72cb60d
--- /dev/null
+++ b/runtime/gc/collector/immune_spaces.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_IMMUNE_SPACES_H_
+#define ART_RUNTIME_GC_COLLECTOR_IMMUNE_SPACES_H_
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "gc/space/space.h"
+#include "immune_region.h"
+
+#include <set>
+
+namespace art {
+namespace gc {
+namespace space {
+class ContinuousSpace;
+}  // namespace space
+
+namespace collector {
+
+// ImmuneSpaces is a set of spaces which are not going to have any objects become marked during the
+// GC.
+class ImmuneSpaces {
+  class CompareByBegin {
+   public:
+    bool operator()(space::ContinuousSpace* a, space::ContinuousSpace* b) const;
+  };
+
+ public:
+  ImmuneSpaces() {}
+  void Reset();
+
+  // Add a continuous space to the immune spaces set.
+  void AddSpace(space::ContinuousSpace* space) REQUIRES(Locks::heap_bitmap_lock_);
+
+  // Returns true if an object is inside of the immune region (assumed to be marked). Only returns
+  // true for the largest immune region. The object can still be inside of an immune space.
+  ALWAYS_INLINE bool IsInImmuneRegion(const mirror::Object* obj) const {
+    return largest_immune_region_.ContainsObject(obj);
+  }
+
+  // Return true if the spaces is contained.
+  bool ContainsSpace(space::ContinuousSpace* space) const;
+
+  // Return the set of spaces in the immune region.
+  const std::set<space::ContinuousSpace*, CompareByBegin>& GetSpaces() {
+    return spaces_;
+  }
+
+  // Return the associated largest immune region.
+  const ImmuneRegion& GetLargestImmuneRegion() const {
+    return largest_immune_region_;
+  }
+
+  // Return true if the object is contained by any of the immune space.s
+  ALWAYS_INLINE bool ContainsObject(const mirror::Object* obj) const {
+    if (largest_immune_region_.ContainsObject(obj)) {
+      return true;
+    }
+    for (space::ContinuousSpace* space : spaces_) {
+      if (space->HasAddress(obj)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+ private:
+  // Setup the immune region to the largest continuous set of immune spaces. The immune region is
+  // just the for the fast path lookup.
+  void CreateLargestImmuneRegion();
+
+  std::set<space::ContinuousSpace*, CompareByBegin> spaces_;
+  ImmuneRegion largest_immune_region_;
+};
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_IMMUNE_SPACES_H_
diff --git a/runtime/gc/collector/immune_spaces_test.cc b/runtime/gc/collector/immune_spaces_test.cc
new file mode 100644
index 0000000..cf93ec6
--- /dev/null
+++ b/runtime/gc/collector/immune_spaces_test.cc
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_runtime_test.h"
+#include "gc/collector/immune_spaces.h"
+#include "gc/space/image_space.h"
+#include "gc/space/space-inl.h"
+#include "oat_file.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace mirror {
+class Object;
+}  // namespace mirror
+namespace gc {
+namespace collector {
+
+class DummyOatFile : public OatFile {
+ public:
+  DummyOatFile(uint8_t* begin, uint8_t* end) : OatFile("Location", /*is_executable*/ false) {
+    begin_ = begin;
+    end_ = end;
+  }
+};
+
+class DummyImageSpace : public space::ImageSpace {
+ public:
+  DummyImageSpace(MemMap* map,
+                  accounting::ContinuousSpaceBitmap* live_bitmap,
+                  std::unique_ptr<DummyOatFile>&& oat_file,
+                  std::unique_ptr<MemMap>&& oat_map)
+      : ImageSpace("DummyImageSpace",
+                   /*image_location*/"",
+                   map,
+                   live_bitmap,
+                   map->End()),
+        oat_map_(std::move(oat_map)) {
+    oat_file_ = std::move(oat_file);
+    oat_file_non_owned_ = oat_file_.get();
+  }
+
+ private:
+  std::unique_ptr<MemMap> oat_map_;
+};
+
+class ImmuneSpacesTest : public CommonRuntimeTest {
+  static constexpr size_t kMaxBitmaps = 10;
+
+ public:
+  ImmuneSpacesTest() {}
+
+  void ReserveBitmaps() {
+    // Create a bunch of dummy bitmaps since these are required to create image spaces. The bitmaps
+    // do not need to cover the image spaces though.
+    for (size_t i = 0; i < kMaxBitmaps; ++i) {
+      std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap(
+          accounting::ContinuousSpaceBitmap::Create("bitmap",
+                                                    reinterpret_cast<uint8_t*>(kPageSize),
+                                                    kPageSize));
+      CHECK(bitmap != nullptr);
+      live_bitmaps_.push_back(std::move(bitmap));
+    }
+  }
+
+  // Create an image space, the oat file is optional.
+  DummyImageSpace* CreateImageSpace(uint8_t* image_begin,
+                                    size_t image_size,
+                                    uint8_t* oat_begin,
+                                    size_t oat_size) {
+    std::string error_str;
+    std::unique_ptr<MemMap> map(MemMap::MapAnonymous("DummyImageSpace",
+                                                     image_begin,
+                                                     image_size,
+                                                     PROT_READ | PROT_WRITE,
+                                                     /*low_4gb*/true,
+                                                     /*reuse*/false,
+                                                     &error_str));
+    if (map == nullptr) {
+      LOG(ERROR) << error_str;
+      return nullptr;
+    }
+    CHECK(!live_bitmaps_.empty());
+    std::unique_ptr<accounting::ContinuousSpaceBitmap> live_bitmap(std::move(live_bitmaps_.back()));
+    live_bitmaps_.pop_back();
+    std::unique_ptr<MemMap> oat_map(MemMap::MapAnonymous("OatMap",
+                                                         oat_begin,
+                                                         oat_size,
+                                                         PROT_READ | PROT_WRITE,
+                                                         /*low_4gb*/true,
+                                                         /*reuse*/false,
+                                                         &error_str));
+    if (oat_map == nullptr) {
+      LOG(ERROR) << error_str;
+      return nullptr;
+    }
+    std::unique_ptr<DummyOatFile> oat_file(new DummyOatFile(oat_map->Begin(), oat_map->End()));
+    // Create image header.
+    ImageSection sections[ImageHeader::kSectionCount];
+    new (map->Begin()) ImageHeader(
+        /*image_begin*/PointerToLowMemUInt32(map->Begin()),
+        /*image_size*/map->Size(),
+        sections,
+        /*image_roots*/PointerToLowMemUInt32(map->Begin()) + 1,
+        /*oat_checksum*/0u,
+        // The oat file data in the header is always right after the image space.
+        /*oat_file_begin*/PointerToLowMemUInt32(oat_begin),
+        /*oat_data_begin*/PointerToLowMemUInt32(oat_begin),
+        /*oat_data_end*/PointerToLowMemUInt32(oat_begin + oat_size),
+        /*oat_file_end*/PointerToLowMemUInt32(oat_begin + oat_size),
+        /*boot_image_begin*/0u,
+        /*boot_image_size*/0u,
+        /*boot_oat_begin*/0u,
+        /*boot_oat_size*/0u,
+        /*pointer_size*/sizeof(void*),
+        /*compile_pic*/false,
+        /*is_pic*/false,
+        ImageHeader::kStorageModeUncompressed,
+        /*storage_size*/0u);
+    return new DummyImageSpace(map.release(),
+                               live_bitmap.release(),
+                               std::move(oat_file),
+                               std::move(oat_map));
+  }
+
+  // Does not reserve the memory, the caller needs to be sure no other threads will map at the
+  // returned address.
+  static uint8_t* GetContinuousMemoryRegion(size_t size) {
+    std::string error_str;
+    std::unique_ptr<MemMap> map(MemMap::MapAnonymous("reserve",
+                                                     nullptr,
+                                                     size,
+                                                     PROT_READ | PROT_WRITE,
+                                                     /*low_4gb*/true,
+                                                     /*reuse*/false,
+                                                     &error_str));
+    if (map == nullptr) {
+      LOG(ERROR) << "Failed to allocate memory region " << error_str;
+      return nullptr;
+    }
+    return map->Begin();
+  }
+
+ private:
+  // Bitmap pool for pre-allocated dummy bitmaps. We need to pre-allocate them since we don't want
+  // them to randomly get placed somewhere where we want an image space.
+  std::vector<std::unique_ptr<accounting::ContinuousSpaceBitmap>> live_bitmaps_;
+};
+
+class DummySpace : public space::ContinuousSpace {
+ public:
+  DummySpace(uint8_t* begin, uint8_t* end)
+      : ContinuousSpace("DummySpace",
+                        space::kGcRetentionPolicyNeverCollect,
+                        begin,
+                        end,
+                        /*limit*/end) {}
+
+  space::SpaceType GetType() const OVERRIDE {
+    return space::kSpaceTypeMallocSpace;
+  }
+
+  bool CanMoveObjects() const OVERRIDE {
+    return false;
+  }
+
+  accounting::ContinuousSpaceBitmap* GetLiveBitmap() const OVERRIDE {
+    return nullptr;
+  }
+
+  accounting::ContinuousSpaceBitmap* GetMarkBitmap() const OVERRIDE {
+    return nullptr;
+  }
+};
+
+TEST_F(ImmuneSpacesTest, AppendBasic) {
+  ImmuneSpaces spaces;
+  uint8_t* const base = reinterpret_cast<uint8_t*>(0x1000);
+  DummySpace a(base, base + 45 * KB);
+  DummySpace b(a.Limit(), a.Limit() + 813 * KB);
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    spaces.AddSpace(&a);
+    spaces.AddSpace(&b);
+  }
+  EXPECT_TRUE(spaces.ContainsSpace(&a));
+  EXPECT_TRUE(spaces.ContainsSpace(&b));
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()), a.Begin());
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), b.Limit());
+}
+
+// Tests [image][oat][space] producing a single large immune region.
+TEST_F(ImmuneSpacesTest, AppendAfterImage) {
+  ReserveBitmaps();
+  ImmuneSpaces spaces;
+  constexpr size_t kImageSize = 123 * kPageSize;
+  constexpr size_t kImageOatSize = 321 * kPageSize;
+  constexpr size_t kOtherSpaceSize= 100 * kPageSize;
+
+  uint8_t* memory = GetContinuousMemoryRegion(kImageSize + kImageOatSize + kOtherSpaceSize);
+
+  std::unique_ptr<DummyImageSpace> image_space(CreateImageSpace(memory,
+                                                                kImageSize,
+                                                                memory + kImageSize,
+                                                                kImageOatSize));
+  ASSERT_TRUE(image_space != nullptr);
+  const ImageHeader& image_header = image_space->GetImageHeader();
+  DummySpace space(image_header.GetOatFileEnd(), image_header.GetOatFileEnd() + kOtherSpaceSize);
+
+  EXPECT_EQ(image_header.GetImageSize(), kImageSize);
+  EXPECT_EQ(static_cast<size_t>(image_header.GetOatFileEnd() - image_header.GetOatFileBegin()),
+            kImageOatSize);
+  EXPECT_EQ(image_space->GetOatFile()->Size(), kImageOatSize);
+  // Check that we do not include the oat if there is no space after.
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    spaces.AddSpace(image_space.get());
+  }
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()),
+            image_space->Begin());
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()),
+            image_space->Limit());
+  // Add another space and ensure it gets appended.
+  EXPECT_NE(image_space->Limit(), space.Begin());
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    spaces.AddSpace(&space);
+  }
+  EXPECT_TRUE(spaces.ContainsSpace(image_space.get()));
+  EXPECT_TRUE(spaces.ContainsSpace(&space));
+  // CreateLargestImmuneRegion should have coalesced the two spaces since the oat code after the
+  // image prevents gaps.
+  // Check that we have a continuous region.
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()),
+            image_space->Begin());
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), space.Limit());
+}
+
+// Test [image1][image2][image1 oat][image2 oat][image3] producing a single large immune region.
+TEST_F(ImmuneSpacesTest, MultiImage) {
+  ReserveBitmaps();
+  // Image 2 needs to be smaller or else it may be chosen for immune region.
+  constexpr size_t kImage1Size = kPageSize * 17;
+  constexpr size_t kImage2Size = kPageSize * 13;
+  constexpr size_t kImage3Size = kPageSize * 3;
+  constexpr size_t kImage1OatSize = kPageSize * 5;
+  constexpr size_t kImage2OatSize = kPageSize * 8;
+  constexpr size_t kImage3OatSize = kPageSize;
+  constexpr size_t kImageBytes = kImage1Size + kImage2Size + kImage3Size;
+  constexpr size_t kMemorySize = kImageBytes + kImage1OatSize + kImage2OatSize + kImage3OatSize;
+  uint8_t* memory = GetContinuousMemoryRegion(kMemorySize);
+  uint8_t* space1_begin = memory;
+  memory += kImage1Size;
+  uint8_t* space2_begin = memory;
+  memory += kImage2Size;
+  uint8_t* space1_oat_begin = memory;
+  memory += kImage1OatSize;
+  uint8_t* space2_oat_begin = memory;
+  memory += kImage2OatSize;
+  uint8_t* space3_begin = memory;
+
+  std::unique_ptr<DummyImageSpace> space1(CreateImageSpace(space1_begin,
+                                                           kImage1Size,
+                                                           space1_oat_begin,
+                                                           kImage1OatSize));
+  ASSERT_TRUE(space1 != nullptr);
+
+
+  std::unique_ptr<DummyImageSpace> space2(CreateImageSpace(space2_begin,
+                                                           kImage2Size,
+                                                           space2_oat_begin,
+                                                           kImage2OatSize));
+  ASSERT_TRUE(space2 != nullptr);
+
+  // Finally put a 3rd image space.
+  std::unique_ptr<DummyImageSpace> space3(CreateImageSpace(space3_begin,
+                                                           kImage3Size,
+                                                           space3_begin + kImage3Size,
+                                                           kImage3OatSize));
+  ASSERT_TRUE(space3 != nullptr);
+
+  // Check that we do not include the oat if there is no space after.
+  ImmuneSpaces spaces;
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    LOG(INFO) << "Adding space1 " << reinterpret_cast<const void*>(space1->Begin());
+    spaces.AddSpace(space1.get());
+    LOG(INFO) << "Adding space2 " << reinterpret_cast<const void*>(space2->Begin());
+    spaces.AddSpace(space2.get());
+  }
+  // There are no more heap bytes, the immune region should only be the first 2 image spaces and
+  // should exclude the image oat files.
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()),
+            space1->Begin());
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()),
+            space2->Limit());
+
+  // Add another space after the oat files, now it should contain the entire memory region.
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    LOG(INFO) << "Adding space3 " << reinterpret_cast<const void*>(space3->Begin());
+    spaces.AddSpace(space3.get());
+  }
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()),
+            space1->Begin());
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()),
+            space3->Limit());
+
+  // Add a smaller non-adjacent space and ensure it does not become part of the immune region.
+  // Image size is kImageBytes - kPageSize
+  // Oat size is kPageSize.
+  // Guard pages to ensure it is not adjacent to an existing immune region.
+  // Layout:  [guard page][image][oat][guard page]
+  constexpr size_t kGuardSize = kPageSize;
+  constexpr size_t kImage4Size = kImageBytes - kPageSize;
+  constexpr size_t kImage4OatSize = kPageSize;
+  uint8_t* memory2 = GetContinuousMemoryRegion(kImage4Size + kImage4OatSize + kGuardSize * 2);
+  std::unique_ptr<DummyImageSpace> space4(CreateImageSpace(memory2 + kGuardSize,
+                                                           kImage4Size,
+                                                           memory2 + kGuardSize + kImage4Size,
+                                                           kImage4OatSize));
+  ASSERT_TRUE(space4 != nullptr);
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    LOG(INFO) << "Adding space4 " << reinterpret_cast<const void*>(space4->Begin());
+    spaces.AddSpace(space4.get());
+  }
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()),
+            space1->Begin());
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()),
+            space3->Limit());
+
+  // Add a larger non-adjacent space and ensure it becomes the new largest immune region.
+  // Image size is kImageBytes + kPageSize
+  // Oat size is kPageSize.
+  // Guard pages to ensure it is not adjacent to an existing immune region.
+  // Layout:  [guard page][image][oat][guard page]
+  constexpr size_t kImage5Size = kImageBytes + kPageSize;
+  constexpr size_t kImage5OatSize = kPageSize;
+  uint8_t* memory3 = GetContinuousMemoryRegion(kImage5Size + kImage5OatSize + kGuardSize * 2);
+  std::unique_ptr<DummyImageSpace> space5(CreateImageSpace(memory3 + kGuardSize,
+                                                           kImage5Size,
+                                                           memory3 + kGuardSize + kImage5Size,
+                                                           kImage5OatSize));
+  ASSERT_TRUE(space5 != nullptr);
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    LOG(INFO) << "Adding space5 " << reinterpret_cast<const void*>(space5->Begin());
+    spaces.AddSpace(space5.get());
+  }
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()), space5->Begin());
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), space5->Limit());
+}
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index f561764..43482eb 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -45,15 +45,16 @@
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
-      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+      immune_spaces_.AddSpace(space);
     }
   }
 }
 
 MarkCompact::MarkCompact(Heap* heap, const std::string& name_prefix)
     : GarbageCollector(heap, name_prefix + (name_prefix.empty() ? "" : " ") + "mark compact"),
-      space_(nullptr), collector_name_(name_), updating_references_(false) {
-}
+      space_(nullptr),
+      collector_name_(name_),
+      updating_references_(false) {}
 
 void MarkCompact::RunPhases() {
   Thread* self = Thread::Current();
@@ -85,37 +86,27 @@
   ++live_objects_in_space_;
 }
 
-class CalculateObjectForwardingAddressVisitor {
- public:
-  explicit CalculateObjectForwardingAddressVisitor(MarkCompact* collector)
-      : collector_(collector) {}
-  void operator()(mirror::Object* obj) const REQUIRES(Locks::mutator_lock_,
-                                                                      Locks::heap_bitmap_lock_) {
-    DCHECK_ALIGNED(obj, space::BumpPointerSpace::kAlignment);
-    DCHECK(collector_->IsMarked(obj) != nullptr);
-    collector_->ForwardObject(obj);
-  }
-
- private:
-  MarkCompact* const collector_;
-};
 
 void MarkCompact::CalculateObjectForwardingAddresses() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // The bump pointer in the space where the next forwarding address will be.
   bump_pointer_ = reinterpret_cast<uint8_t*>(space_->Begin());
   // Visit all the marked objects in the bitmap.
-  CalculateObjectForwardingAddressVisitor visitor(this);
   objects_before_forwarding_->VisitMarkedRange(reinterpret_cast<uintptr_t>(space_->Begin()),
                                                reinterpret_cast<uintptr_t>(space_->End()),
-                                               visitor);
+                                               [this](mirror::Object* obj)
+      REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    DCHECK_ALIGNED(obj, space::BumpPointerSpace::kAlignment);
+    DCHECK(IsMarked(obj) != nullptr);
+    ForwardObject(obj);
+  });
 }
 
 void MarkCompact::InitializePhase() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
-  immune_region_.Reset();
+  immune_spaces_.Reset();
   CHECK(space_->CanMoveObjects()) << "Attempting compact non-movable space from " << *space_;
   // TODO: I don't think we should need heap bitmap lock to Get the mark bitmap.
   ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
@@ -129,17 +120,6 @@
       false, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(), this);
 }
 
-class BitmapSetSlowPathVisitor {
- public:
-  void operator()(const mirror::Object* obj) const {
-    // Marking a large object, make sure its aligned as a sanity check.
-    if (!IsAligned<kPageSize>(obj)) {
-      Runtime::Current()->GetHeap()->DumpSpaces(LOG(ERROR));
-      LOG(FATAL) << obj;
-    }
-  }
-};
-
 inline mirror::Object* MarkCompact::MarkObject(mirror::Object* obj) {
   if (obj == nullptr) {
     return nullptr;
@@ -148,15 +128,22 @@
     // Verify all the objects have the correct forward pointer installed.
     obj->AssertReadBarrierPointer();
   }
-  if (!immune_region_.ContainsObject(obj)) {
+  if (!immune_spaces_.IsInImmuneRegion(obj)) {
     if (objects_before_forwarding_->HasAddress(obj)) {
       if (!objects_before_forwarding_->Set(obj)) {
         MarkStackPush(obj);  // This object was not previously marked.
       }
     } else {
       DCHECK(!space_->HasAddress(obj));
-      BitmapSetSlowPathVisitor visitor;
-      if (!mark_bitmap_->Set(obj, visitor)) {
+      auto slow_path = [this](const mirror::Object* ref)
+          SHARED_REQUIRES(Locks::mutator_lock_) {
+        // Marking a large object, make sure its aligned as a sanity check.
+        if (!IsAligned<kPageSize>(ref)) {
+          Runtime::Current()->GetHeap()->DumpSpaces(LOG(ERROR));
+          LOG(FATAL) << ref;
+        }
+      };
+      if (!mark_bitmap_->Set(obj, slow_path)) {
         // This object was not previously marked.
         MarkStackPush(obj);
       }
@@ -180,7 +167,7 @@
   t.NewTiming("ProcessCards");
   // Process dirty cards and add dirty cards to mod-union tables.
   heap_->ProcessCards(GetTimings(), false, false, true);
-  // Clear the whole card table since we can not Get any additional dirty cards during the
+  // Clear the whole card table since we cannot get any additional dirty cards during the
   // paused GC. This saves memory but only works for pause the world collectors.
   t.NewTiming("ClearCardTable");
   heap_->GetCardTable()->ClearCardTable();
@@ -218,7 +205,7 @@
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   for (auto& space : heap_->GetContinuousSpaces()) {
     // If the space is immune then we need to mark the references to other spaces.
-    if (immune_region_.ContainsSpace(space)) {
+    if (immune_spaces_.ContainsSpace(space)) {
       accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
       if (table != nullptr) {
         // TODO: Improve naming.
@@ -296,10 +283,9 @@
   }
 }
 
-class UpdateRootVisitor : public RootVisitor {
+class MarkCompact::UpdateRootVisitor : public RootVisitor {
  public:
-  explicit UpdateRootVisitor(MarkCompact* collector) : collector_(collector) {
-  }
+  explicit UpdateRootVisitor(MarkCompact* collector) : collector_(collector) {}
 
   void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
       OVERRIDE REQUIRES(Locks::mutator_lock_)
@@ -332,10 +318,10 @@
   MarkCompact* const collector_;
 };
 
-class UpdateObjectReferencesVisitor {
+class MarkCompact::UpdateObjectReferencesVisitor {
  public:
-  explicit UpdateObjectReferencesVisitor(MarkCompact* collector) : collector_(collector) {
-  }
+  explicit UpdateObjectReferencesVisitor(MarkCompact* collector) : collector_(collector) {}
+
   void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::heap_bitmap_lock_)
           REQUIRES(Locks::mutator_lock_) ALWAYS_INLINE {
     collector_->UpdateObjectReferences(obj);
@@ -423,10 +409,9 @@
   }
 }
 
-class UpdateReferenceVisitor {
+class MarkCompact::UpdateReferenceVisitor {
  public:
-  explicit UpdateReferenceVisitor(MarkCompact* collector) : collector_(collector) {
-  }
+  explicit UpdateReferenceVisitor(MarkCompact* collector) : collector_(collector) {}
 
   void operator()(mirror::Object* obj, MemberOffset offset, bool /*is_static*/) const
       ALWAYS_INLINE REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
@@ -475,7 +460,7 @@
 }
 
 mirror::Object* MarkCompact::IsMarked(mirror::Object* object) {
-  if (immune_region_.ContainsObject(object)) {
+  if (immune_spaces_.IsInImmuneRegion(object)) {
     return object;
   }
   if (updating_references_) {
@@ -498,22 +483,9 @@
 }
 
 bool MarkCompact::ShouldSweepSpace(space::ContinuousSpace* space) const {
-  return space != space_ && !immune_region_.ContainsSpace(space);
+  return space != space_ && !immune_spaces_.ContainsSpace(space);
 }
 
-class MoveObjectVisitor {
- public:
-  explicit MoveObjectVisitor(MarkCompact* collector) : collector_(collector) {
-  }
-  void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::heap_bitmap_lock_)
-          REQUIRES(Locks::mutator_lock_) ALWAYS_INLINE {
-      collector_->MoveObject(obj, obj->SizeOf());
-  }
-
- private:
-  MarkCompact* const collector_;
-};
-
 void MarkCompact::MoveObject(mirror::Object* obj, size_t len) {
   // Look at the forwarding address stored in the lock word to know where to copy.
   DCHECK(space_->HasAddress(obj)) << obj;
@@ -534,10 +506,13 @@
 void MarkCompact::MoveObjects() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // Move the objects in the before forwarding bitmap.
-  MoveObjectVisitor visitor(this);
   objects_before_forwarding_->VisitMarkedRange(reinterpret_cast<uintptr_t>(space_->Begin()),
                                                reinterpret_cast<uintptr_t>(space_->End()),
-                                               visitor);
+                                               [this](mirror::Object* obj)
+      SHARED_REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(Locks::mutator_lock_) ALWAYS_INLINE {
+    MoveObject(obj, obj->SizeOf());
+  });
   CHECK(lock_words_to_restore_.empty());
 }
 
@@ -572,10 +547,9 @@
   heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference, this);
 }
 
-class MarkCompactMarkObjectVisitor {
+class MarkCompact::MarkObjectVisitor {
  public:
-  explicit MarkCompactMarkObjectVisitor(MarkCompact* collector) : collector_(collector) {
-  }
+  explicit MarkObjectVisitor(MarkCompact* collector) : collector_(collector) {}
 
   void operator()(mirror::Object* obj, MemberOffset offset, bool /*is_static*/) const ALWAYS_INLINE
       REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
@@ -608,7 +582,7 @@
 
 // Visit all of the references of an object and update.
 void MarkCompact::ScanObject(mirror::Object* obj) {
-  MarkCompactMarkObjectVisitor visitor(this);
+  MarkObjectVisitor visitor(this);
   obj->VisitReferences(visitor, visitor);
 }
 
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index 8d91939..16abfb7 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -26,7 +26,7 @@
 #include "garbage_collector.h"
 #include "gc_root.h"
 #include "gc/accounting/heap_bitmap.h"
-#include "immune_region.h"
+#include "immune_spaces.h"
 #include "lock_word.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -106,7 +106,7 @@
       REQUIRES(Locks::mutator_lock_);
 
   // Sweeps unmarked objects to complete the garbage collection.
-  void Sweep(bool swap_bitmaps) REQUIRES(Locks::heap_bitmap_lock_);
+  void Sweep(bool swap_bitmaps) REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   // Sweeps unmarked objects to complete the garbage collection.
   void SweepLargeObjects(bool swap_bitmaps) REQUIRES(Locks::heap_bitmap_lock_);
@@ -194,8 +194,8 @@
 
   accounting::ObjectStack* mark_stack_;
 
-  // Immune region, every object inside the immune region is assumed to be marked.
-  ImmuneRegion immune_region_;
+  // Every object inside the immune spaces is assumed to be marked.
+  ImmuneSpaces immune_spaces_;
 
   // Bump pointer space which we are collecting.
   space::BumpPointerSpace* space_;
@@ -222,13 +222,10 @@
   bool updating_references_;
 
  private:
-  friend class BitmapSetSlowPathVisitor;
-  friend class CalculateObjectForwardingAddressVisitor;
-  friend class MarkCompactMarkObjectVisitor;
-  friend class MoveObjectVisitor;
-  friend class UpdateObjectReferencesVisitor;
-  friend class UpdateReferenceVisitor;
-  friend class UpdateRootVisitor;
+  class MarkObjectVisitor;
+  class UpdateObjectReferencesVisitor;
+  class UpdateReferenceVisitor;
+  class UpdateRootVisitor;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(MarkCompact);
 };
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 77a288b..3904160 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -22,13 +22,12 @@
 #include <climits>
 #include <vector>
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
-#include "cutils/trace.h"
-
 #include "base/bounded_fifo.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/mutex-inl.h"
+#include "base/systrace.h"
 #include "base/time_utils.h"
 #include "base/timing_logger.h"
 #include "gc/accounting/card_table-inl.h"
@@ -86,7 +85,7 @@
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect) {
-      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+      immune_spaces_.AddSpace(space);
     }
   }
 }
@@ -115,7 +114,7 @@
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
-  immune_region_.Reset();
+  immune_spaces_.Reset();
   no_reference_class_count_.StoreRelaxed(0);
   normal_count_.StoreRelaxed(0);
   class_count_.StoreRelaxed(0);
@@ -268,16 +267,41 @@
   PreCleanCards();
 }
 
+class MarkSweep::ScanObjectVisitor {
+ public:
+  explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE
+      : mark_sweep_(mark_sweep) {}
+
+  void operator()(mirror::Object* obj) const
+      ALWAYS_INLINE
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (kCheckLocks) {
+      Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+      Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
+    }
+    mark_sweep_->ScanObject(obj);
+  }
+
+ private:
+  MarkSweep* const mark_sweep_;
+};
+
 void MarkSweep::UpdateAndMarkModUnion() {
-  for (const auto& space : heap_->GetContinuousSpaces()) {
-    if (immune_region_.ContainsSpace(space)) {
-      const char* name = space->IsZygoteSpace()
-          ? "UpdateAndMarkZygoteModUnionTable"
-          : "UpdateAndMarkImageModUnionTable";
-      TimingLogger::ScopedTiming t(name, GetTimings());
-      accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space);
-      CHECK(mod_union_table != nullptr);
+  for (const auto& space : immune_spaces_.GetSpaces()) {
+    const char* name = space->IsZygoteSpace()
+        ? "UpdateAndMarkZygoteModUnionTable"
+        : "UpdateAndMarkImageModUnionTable";
+    DCHECK(space->IsZygoteSpace() || space->IsImageSpace()) << *space;
+    TimingLogger::ScopedTiming t(name, GetTimings());
+    accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space);
+    if (mod_union_table != nullptr) {
       mod_union_table->UpdateAndMarkReferences(this);
+    } else {
+      // No mod-union table, scan all the live bits. This can only occur for app images.
+      space->GetLiveBitmap()->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                               reinterpret_cast<uintptr_t>(space->End()),
+                                               ScanObjectVisitor(this));
     }
   }
 }
@@ -370,12 +394,14 @@
   return IsMarked(ref->AsMirrorPtr());
 }
 
-class MarkSweepMarkObjectSlowPath {
+class MarkSweep::MarkObjectSlowPath {
  public:
-  explicit MarkSweepMarkObjectSlowPath(MarkSweep* mark_sweep,
-                                       mirror::Object* holder = nullptr,
-                                       MemberOffset offset = MemberOffset(0))
-      : mark_sweep_(mark_sweep), holder_(holder), offset_(offset) {}
+  explicit MarkObjectSlowPath(MarkSweep* mark_sweep,
+                              mirror::Object* holder = nullptr,
+                              MemberOffset offset = MemberOffset(0))
+      : mark_sweep_(mark_sweep),
+        holder_(holder),
+        offset_(offset) {}
 
   void operator()(const mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
     if (kProfileLargeObjects) {
@@ -405,13 +431,12 @@
                             << " first_ref_field_offset="
                             << (holder_->IsClass()
                                 ? holder_->AsClass()->GetFirstReferenceStaticFieldOffset(
-                                    sizeof(void*))
+                                    kRuntimePointerSize)
                                 : holder_->GetClass()->GetFirstReferenceInstanceFieldOffset())
                             << " num_of_ref_fields="
                             << (holder_->IsClass()
                                 ? holder_->AsClass()->NumReferenceStaticFields()
-                                : holder_->GetClass()->NumReferenceInstanceFields())
-                            << "\n";
+                                : holder_->GetClass()->NumReferenceInstanceFields());
         // Print the memory content of the holder.
         for (size_t i = 0; i < holder_size / sizeof(uint32_t); ++i) {
           uint32_t* p = reinterpret_cast<uint32_t*>(holder_);
@@ -421,27 +446,8 @@
       }
       PrintFileToLog("/proc/self/maps", LogSeverity::INTERNAL_FATAL);
       MemMap::DumpMaps(LOG(INTERNAL_FATAL), true);
-      {
-        LOG(INTERNAL_FATAL) << "Attempting see if it's a bad root";
-        Thread* self = Thread::Current();
-        if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
-          mark_sweep_->VerifyRoots();
-        } else {
-          const bool heap_bitmap_exclusive_locked =
-              Locks::heap_bitmap_lock_->IsExclusiveHeld(self);
-          if (heap_bitmap_exclusive_locked) {
-            Locks::heap_bitmap_lock_->ExclusiveUnlock(self);
-          }
-          {
-            ScopedThreadSuspension(self, kSuspended);
-            ScopedSuspendAll ssa(__FUNCTION__);
-            mark_sweep_->VerifyRoots();
-          }
-          if (heap_bitmap_exclusive_locked) {
-            Locks::heap_bitmap_lock_->ExclusiveLock(self);
-          }
-        }
-      }
+      LOG(INTERNAL_FATAL) << "Attempting see if it's a bad thread root";
+      mark_sweep_->VerifySuspendedThreadRoots();
       LOG(FATAL) << "Can't mark invalid object";
     }
   }
@@ -460,7 +466,7 @@
     // Verify all the objects have the correct pointer installed.
     obj->AssertReadBarrierPointer();
   }
-  if (immune_region_.ContainsObject(obj)) {
+  if (immune_spaces_.IsInImmuneRegion(obj)) {
     if (kCountMarkedObjects) {
       ++mark_immune_count_;
     }
@@ -476,7 +482,7 @@
     if (kCountMarkedObjects) {
       ++mark_slowpath_count_;
     }
-    MarkSweepMarkObjectSlowPath visitor(this, holder, offset);
+    MarkObjectSlowPath visitor(this, holder, offset);
     // TODO: We already know that the object is not in the current_space_bitmap_ but MarkBitmap::Set
     // will check again.
     if (!mark_bitmap_->Set(obj, visitor)) {
@@ -501,7 +507,7 @@
     // Verify all the objects have the correct pointer installed.
     obj->AssertReadBarrierPointer();
   }
-  if (immune_region_.ContainsObject(obj)) {
+  if (immune_spaces_.IsInImmuneRegion(obj)) {
     DCHECK(IsMarked(obj) != nullptr);
     return false;
   }
@@ -511,7 +517,7 @@
   if (LIKELY(object_bitmap->HasAddress(obj))) {
     return !object_bitmap->AtomicTestAndSet(obj);
   }
-  MarkSweepMarkObjectSlowPath visitor(this);
+  MarkObjectSlowPath visitor(this);
   return !mark_bitmap_->AtomicTestAndSet(obj, visitor);
 }
 
@@ -530,7 +536,7 @@
   }
 }
 
-class VerifyRootMarkedVisitor : public SingleRootVisitor {
+class MarkSweep::VerifyRootMarkedVisitor : public SingleRootVisitor {
  public:
   explicit VerifyRootMarkedVisitor(MarkSweep* collector) : collector_(collector) { }
 
@@ -559,7 +565,7 @@
   }
 }
 
-class VerifyRootVisitor : public SingleRootVisitor {
+class MarkSweep::VerifyRootVisitor : public SingleRootVisitor {
  public:
   void VisitRoot(mirror::Object* root, const RootInfo& info) OVERRIDE
       SHARED_REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
@@ -574,9 +580,9 @@
   }
 };
 
-void MarkSweep::VerifyRoots() {
+void MarkSweep::VerifySuspendedThreadRoots() {
   VerifyRootVisitor visitor;
-  Runtime::Current()->GetThreadList()->VisitRoots(&visitor);
+  Runtime::Current()->GetThreadList()->VisitRootsForSuspendedThreads(&visitor);
 }
 
 void MarkSweep::MarkRoots(Thread* self) {
@@ -606,27 +612,7 @@
       this, static_cast<VisitRootFlags>(flags | kVisitRootFlagNonMoving));
 }
 
-class ScanObjectVisitor {
- public:
-  explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE
-      : mark_sweep_(mark_sweep) {}
-
-  void operator()(mirror::Object* obj) const
-      ALWAYS_INLINE
-      REQUIRES(Locks::heap_bitmap_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (kCheckLocks) {
-      Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
-      Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
-    }
-    mark_sweep_->ScanObject(obj);
-  }
-
- private:
-  MarkSweep* const mark_sweep_;
-};
-
-class DelayReferenceReferentVisitor {
+class MarkSweep::DelayReferenceReferentVisitor {
  public:
   explicit DelayReferenceReferentVisitor(MarkSweep* collector) : collector_(collector) {}
 
@@ -641,7 +627,7 @@
 };
 
 template <bool kUseFinger = false>
-class MarkStackTask : public Task {
+class MarkSweep::MarkStackTask : public Task {
  public:
   MarkStackTask(ThreadPool* thread_pool,
                 MarkSweep* mark_sweep,
@@ -799,7 +785,7 @@
   }
 };
 
-class CardScanTask : public MarkStackTask<false> {
+class MarkSweep::CardScanTask : public MarkStackTask<false> {
  public:
   CardScanTask(ThreadPool* thread_pool,
                MarkSweep* mark_sweep,
@@ -842,7 +828,9 @@
 };
 
 size_t MarkSweep::GetThreadCount(bool paused) const {
-  if (heap_->GetThreadPool() == nullptr || !heap_->CareAboutPauseTimes()) {
+  // Use less threads if we are in a background state (non jank perceptible) since we want to leave
+  // more CPU time for the foreground apps.
+  if (heap_->GetThreadPool() == nullptr || !Runtime::Current()->InJankPerceptibleProcessState()) {
     return 1;
   }
   return (paused ? heap_->GetParallelGCThreadCount() : heap_->GetConcGCThreadCount()) + 1;
@@ -962,7 +950,7 @@
   }
 }
 
-class RecursiveMarkTask : public MarkStackTask<false> {
+class MarkSweep::RecursiveMarkTask : public MarkStackTask<false> {
  public:
   RecursiveMarkTask(ThreadPool* thread_pool,
                     MarkSweep* mark_sweep,
@@ -1075,7 +1063,7 @@
   Runtime::Current()->SweepSystemWeaks(this);
 }
 
-class VerifySystemWeakVisitor : public IsMarkedVisitor {
+class MarkSweep::VerifySystemWeakVisitor : public IsMarkedVisitor {
  public:
   explicit VerifySystemWeakVisitor(MarkSweep* mark_sweep) : mark_sweep_(mark_sweep) {}
 
@@ -1104,7 +1092,7 @@
   Runtime::Current()->SweepSystemWeaks(&visitor);
 }
 
-class CheckpointMarkThreadRoots : public Closure, public RootVisitor {
+class MarkSweep::CheckpointMarkThreadRoots : public Closure, public RootVisitor {
  public:
   CheckpointMarkThreadRoots(MarkSweep* mark_sweep,
                             bool revoke_ros_alloc_thread_local_buffers_at_checkpoint)
@@ -1132,23 +1120,19 @@
   }
 
   virtual void Run(Thread* thread) OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
-    ATRACE_BEGIN("Marking thread roots");
+    ScopedTrace trace("Marking thread roots");
     // Note: self is not necessarily equal to thread since thread may be suspended.
     Thread* const self = Thread::Current();
     CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
         << thread->GetState() << " thread " << thread << " self " << self;
     thread->VisitRoots(this);
-    ATRACE_END();
     if (revoke_ros_alloc_thread_local_buffers_at_checkpoint_) {
-      ATRACE_BEGIN("RevokeRosAllocThreadLocalBuffers");
+      ScopedTrace trace2("RevokeRosAllocThreadLocalBuffers");
       mark_sweep_->GetHeap()->RevokeRosAllocThreadLocalBuffers(thread);
-      ATRACE_END();
     }
     // If thread is a running mutator, then act on behalf of the garbage collector.
     // See the code in ThreadList::RunCheckpoint.
-    if (thread->GetState() == kRunnable) {
-      mark_sweep_->GetBarrier().Pass(self);
-    }
+    mark_sweep_->GetBarrier().Pass(self);
   }
 
  private:
@@ -1195,7 +1179,8 @@
   std::vector<space::ContinuousSpace*> sweep_spaces;
   space::ContinuousSpace* non_moving_space = nullptr;
   for (space::ContinuousSpace* space : heap_->GetContinuousSpaces()) {
-    if (space->IsAllocSpace() && !immune_region_.ContainsSpace(space) &&
+    if (space->IsAllocSpace() &&
+        !immune_spaces_.ContainsSpace(space) &&
         space->GetLiveBitmap() != nullptr) {
       if (space == heap_->GetNonMovingSpace()) {
         non_moving_space = space;
@@ -1424,7 +1409,7 @@
 }
 
 inline mirror::Object* MarkSweep::IsMarked(mirror::Object* object) {
-  if (immune_region_.ContainsObject(object)) {
+  if (immune_spaces_.IsInImmuneRegion(object)) {
     return object;
   }
   if (current_space_bitmap_->HasAddress(object)) {
@@ -1463,7 +1448,9 @@
   }
   CHECK(mark_stack_->IsEmpty());  // Ensure that the mark stack is empty.
   mark_stack_->Reset();
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  Thread* const self = Thread::Current();
+  ReaderMutexLock mu(self, *Locks::mutator_lock_);
+  WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_);
   heap_->ClearMarkedObjects();
 }
 
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 8f7df78..9747031 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -26,7 +26,7 @@
 #include "garbage_collector.h"
 #include "gc_root.h"
 #include "gc/accounting/heap_bitmap.h"
-#include "immune_region.h"
+#include "immune_spaces.h"
 #include "object_callbacks.h"
 #include "offsets.h"
 
@@ -85,7 +85,7 @@
   void Init();
 
   // Find the default mark bitmap.
-  void FindDefaultSpaceBitmap();
+  void FindDefaultSpaceBitmap() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Marks all objects in the root set at the start of a garbage collection.
   void MarkRoots(Thread* self)
@@ -231,7 +231,7 @@
  protected:
   // Returns object if the object is marked in the heap bitmap, otherwise null.
   virtual mirror::Object* IsMarked(mirror::Object* object) OVERRIDE
-      SHARED_REQUIRES(Locks::heap_bitmap_lock_);
+      SHARED_REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   void MarkObjectNonNull(mirror::Object* obj,
                          mirror::Object* holder = nullptr,
@@ -250,8 +250,8 @@
 
   // Verify the roots of the heap and print out information related to any invalid roots.
   // Called in MarkObject, so may we may not hold the mutator lock.
-  void VerifyRoots()
-      NO_THREAD_SAFETY_ANALYSIS;
+  void VerifySuspendedThreadRoots()
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Expand mark stack to 2x its current size.
   void ExpandMarkStack()
@@ -314,8 +314,9 @@
 
   accounting::ObjectStack* mark_stack_;
 
-  // Immune region, every object inside the immune range is assumed to be marked.
-  ImmuneRegion immune_region_;
+  // Every object inside the immune spaces is assumed to be marked. Immune spaces that aren't in the
+  // immune region are handled by the normal marking logic.
+  ImmuneSpaces immune_spaces_;
 
   // Parallel finger.
   AtomicInteger atomic_finger_;
@@ -352,17 +353,17 @@
   std::unique_ptr<MemMap> sweep_array_free_buffer_mem_map_;
 
  private:
-  friend class CardScanTask;
-  friend class CheckBitmapVisitor;
-  friend class CheckReferenceVisitor;
-  friend class CheckpointMarkThreadRoots;
-  friend class Heap;
-  friend class FifoMarkStackChunk;
-  friend class MarkObjectVisitor;
-  template<bool kUseFinger> friend class MarkStackTask;
-  friend class MarkSweepMarkObjectSlowPath;
-  friend class VerifyRootMarkedVisitor;
-  friend class VerifyRootVisitor;
+  class CardScanTask;
+  class CheckpointMarkThreadRoots;
+  class DelayReferenceReferentVisitor;
+  template<bool kUseFinger> class MarkStackTask;
+  class MarkObjectSlowPath;
+  class RecursiveMarkTask;
+  class ScanObjectParallelVisitor;
+  class ScanObjectVisitor;
+  class VerifyRootMarkedVisitor;
+  class VerifyRootVisitor;
+  class VerifySystemWeakVisitor;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(MarkSweep);
 };
diff --git a/runtime/gc/collector/partial_mark_sweep.cc b/runtime/gc/collector/partial_mark_sweep.cc
index 15f782a..9847794 100644
--- a/runtime/gc/collector/partial_mark_sweep.cc
+++ b/runtime/gc/collector/partial_mark_sweep.cc
@@ -39,7 +39,7 @@
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
       CHECK(space->IsZygoteSpace());
-      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+      immune_spaces_.AddSpace(space);
     }
   }
 }
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
index 06d20f5..78fb2d2 100644
--- a/runtime/gc/collector/semi_space-inl.h
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -26,21 +26,6 @@
 namespace gc {
 namespace collector {
 
-class BitmapSetSlowPathVisitor {
- public:
-  explicit BitmapSetSlowPathVisitor(SemiSpace* semi_space) : semi_space_(semi_space) {
-  }
-
-  void operator()(const mirror::Object* obj) const {
-    CHECK(!semi_space_->to_space_->HasAddress(obj)) << "Marking " << obj << " in to_space_";
-    // Marking a large object, make sure its aligned as a sanity check.
-    CHECK_ALIGNED(obj, kPageSize);
-  }
-
- private:
-  SemiSpace* const semi_space_;
-};
-
 inline mirror::Object* SemiSpace::GetForwardingAddressInFromSpace(mirror::Object* obj) const {
   DCHECK(from_space_->HasAddress(obj));
   LockWord lock_word = obj->GetLockWord(false);
@@ -74,9 +59,14 @@
       MarkStackPush(forward_address);
     }
     obj_ptr->Assign(forward_address);
-  } else if (!collect_from_space_only_ && !immune_region_.ContainsObject(obj)) {
-    BitmapSetSlowPathVisitor visitor(this);
-    if (!mark_bitmap_->Set(obj, visitor)) {
+  } else if (!collect_from_space_only_ && !immune_spaces_.IsInImmuneRegion(obj)) {
+    DCHECK(!to_space_->HasAddress(obj)) << "Tried to mark " << obj << " in to-space";
+    auto slow_path = [this](const mirror::Object* ref) {
+      CHECK(!to_space_->HasAddress(ref)) << "Marking " << ref << " in to_space_";
+      // Marking a large object, make sure its aligned as a sanity check.
+      CHECK_ALIGNED(ref, kPageSize);
+    };
+    if (!mark_bitmap_->Set(obj, slow_path)) {
       // This object was not previously marked.
       MarkStackPush(obj);
     }
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 7f57f30..7a4c025 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -66,8 +66,9 @@
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
-      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+      immune_spaces_.AddSpace(space);
     } else if (space->GetLiveBitmap() != nullptr) {
+      // TODO: We can probably also add this space to the immune region.
       if (space == to_space_ || collect_from_space_only_) {
         if (collect_from_space_only_) {
           // Bind the bitmaps of the main free list space and the non-moving space we are doing a
@@ -144,7 +145,7 @@
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
-  immune_region_.Reset();
+  immune_spaces_.Reset();
   is_large_object_space_immune_ = false;
   saved_bytes_ = 0;
   bytes_moved_ = 0;
@@ -226,7 +227,7 @@
   BindBitmaps();
   // Process dirty cards and add dirty cards to mod-union tables.
   heap_->ProcessCards(GetTimings(), kUseRememberedSet && generational_, false, true);
-  // Clear the whole card table since we can not Get any additional dirty cards during the
+  // Clear the whole card table since we cannot get any additional dirty cards during the
   // paused GC. This saves memory but only works for pause the world collectors.
   t.NewTiming("ClearCardTable");
   heap_->GetCardTable()->ClearCardTable();
@@ -281,22 +282,11 @@
   }
 }
 
-class SemiSpaceScanObjectVisitor {
- public:
-  explicit SemiSpaceScanObjectVisitor(SemiSpace* ss) : semi_space_(ss) {}
-  void operator()(Object* obj) const REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
-    DCHECK(obj != nullptr);
-    semi_space_->ScanObject(obj);
-  }
- private:
-  SemiSpace* const semi_space_;
-};
-
 // Used to verify that there's no references to the from-space.
-class SemiSpaceVerifyNoFromSpaceReferencesVisitor {
+class SemiSpace::VerifyNoFromSpaceReferencesVisitor {
  public:
-  explicit SemiSpaceVerifyNoFromSpaceReferencesVisitor(space::ContinuousMemMapAllocSpace* from_space) :
-      from_space_(from_space) {}
+  explicit VerifyNoFromSpaceReferencesVisitor(space::ContinuousMemMapAllocSpace* from_space)
+      : from_space_(from_space) {}
 
   void operator()(Object* obj, MemberOffset offset, bool /* is_static */) const
       SHARED_REQUIRES(Locks::mutator_lock_) ALWAYS_INLINE {
@@ -330,23 +320,10 @@
 
 void SemiSpace::VerifyNoFromSpaceReferences(Object* obj) {
   DCHECK(!from_space_->HasAddress(obj)) << "Scanning object " << obj << " in from space";
-  SemiSpaceVerifyNoFromSpaceReferencesVisitor visitor(from_space_);
+  VerifyNoFromSpaceReferencesVisitor visitor(from_space_);
   obj->VisitReferences(visitor, VoidFunctor());
 }
 
-class SemiSpaceVerifyNoFromSpaceReferencesObjectVisitor {
- public:
-  explicit SemiSpaceVerifyNoFromSpaceReferencesObjectVisitor(SemiSpace* ss) : semi_space_(ss) {}
-  void operator()(Object* obj) const
-      SHARED_REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-    DCHECK(obj != nullptr);
-    semi_space_->VerifyNoFromSpaceReferences(obj);
-  }
-
- private:
-  SemiSpace* const semi_space_;
-};
-
 void SemiSpace::MarkReachableObjects() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   {
@@ -366,38 +343,49 @@
                                    GetTimings());
       table->UpdateAndMarkReferences(this);
       DCHECK(GetHeap()->FindRememberedSetFromSpace(space) == nullptr);
-    } else if (collect_from_space_only_ && space->GetLiveBitmap() != nullptr) {
-      // If the space has no mod union table (the non-moving space and main spaces when the bump
-      // pointer space only collection is enabled,) then we need to scan its live bitmap or dirty
-      // cards as roots (including the objects on the live stack which have just marked in the live
-      // bitmap above in MarkAllocStackAsLive().)
-      DCHECK(space == heap_->GetNonMovingSpace() || space == heap_->GetPrimaryFreeListSpace())
-          << "Space " << space->GetName() << " "
-          << "generational_=" << generational_ << " "
-          << "collect_from_space_only_=" << collect_from_space_only_;
+    } else if ((space->IsImageSpace() || collect_from_space_only_) &&
+               space->GetLiveBitmap() != nullptr) {
+      // If the space has no mod union table (the non-moving space, app image spaces, main spaces
+      // when the bump pointer space only collection is enabled,) then we need to scan its live
+      // bitmap or dirty cards as roots (including the objects on the live stack which have just
+      // marked in the live bitmap above in MarkAllocStackAsLive().)
       accounting::RememberedSet* rem_set = GetHeap()->FindRememberedSetFromSpace(space);
-      CHECK_EQ(rem_set != nullptr, kUseRememberedSet);
+      if (!space->IsImageSpace()) {
+        DCHECK(space == heap_->GetNonMovingSpace() || space == heap_->GetPrimaryFreeListSpace())
+            << "Space " << space->GetName() << " "
+            << "generational_=" << generational_ << " "
+            << "collect_from_space_only_=" << collect_from_space_only_;
+        // App images currently do not have remembered sets.
+        DCHECK_EQ(kUseRememberedSet, rem_set != nullptr);
+      } else {
+        DCHECK(rem_set == nullptr);
+      }
       if (rem_set != nullptr) {
         TimingLogger::ScopedTiming t2("UpdateAndMarkRememberedSet", GetTimings());
         rem_set->UpdateAndMarkReferences(from_space_, this);
-        if (kIsDebugBuild) {
-          // Verify that there are no from-space references that
-          // remain in the space, that is, the remembered set (and the
-          // card table) didn't miss any from-space references in the
-          // space.
-          accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
-          SemiSpaceVerifyNoFromSpaceReferencesObjectVisitor visitor(this);
-          live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
-                                        reinterpret_cast<uintptr_t>(space->End()),
-                                        visitor);
-        }
       } else {
         TimingLogger::ScopedTiming t2("VisitLiveBits", GetTimings());
         accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
-        SemiSpaceScanObjectVisitor visitor(this);
         live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
                                       reinterpret_cast<uintptr_t>(space->End()),
-                                      visitor);
+                                      [this](mirror::Object* obj)
+           REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+          ScanObject(obj);
+        });
+      }
+      if (kIsDebugBuild) {
+        // Verify that there are no from-space references that
+        // remain in the space, that is, the remembered set (and the
+        // card table) didn't miss any from-space references in the
+        // space.
+        accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+        live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                      reinterpret_cast<uintptr_t>(space->End()),
+                                      [this](Object* obj)
+            SHARED_REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
+          DCHECK(obj != nullptr);
+          VerifyNoFromSpaceReferences(obj);
+        });
       }
     }
   }
@@ -417,10 +405,12 @@
     // classes (primitive array classes) that could move though they
     // don't contain any other references.
     accounting::LargeObjectBitmap* large_live_bitmap = los->GetLiveBitmap();
-    SemiSpaceScanObjectVisitor visitor(this);
     large_live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(los->Begin()),
                                         reinterpret_cast<uintptr_t>(los->End()),
-                                        visitor);
+                                        [this](mirror::Object* obj)
+        REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+      ScanObject(obj);
+    });
   }
   // Recursively process the mark stack.
   ProcessMarkStack();
@@ -612,7 +602,7 @@
 
 mirror::Object* SemiSpace::MarkObject(mirror::Object* root) {
   auto ref = StackReference<mirror::Object>::FromMirrorPtr(root);
-  MarkObject(&ref);
+  MarkObjectIfNotInToSpace(&ref);
   return ref.AsMirrorPtr();
 }
 
@@ -690,10 +680,9 @@
   heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference, this);
 }
 
-class SemiSpaceMarkObjectVisitor {
+class SemiSpace::MarkObjectVisitor {
  public:
-  explicit SemiSpaceMarkObjectVisitor(SemiSpace* collector) : collector_(collector) {
-  }
+  explicit MarkObjectVisitor(SemiSpace* collector) : collector_(collector) {}
 
   void operator()(Object* obj, MemberOffset offset, bool /* is_static */) const ALWAYS_INLINE
       REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
@@ -732,7 +721,7 @@
 // Visit all of the references of an object and update.
 void SemiSpace::ScanObject(Object* obj) {
   DCHECK(!from_space_->HasAddress(obj)) << "Scanning object " << obj << " in from space";
-  SemiSpaceMarkObjectVisitor visitor(this);
+  MarkObjectVisitor visitor(this);
   obj->VisitReferences(visitor, visitor);
 }
 
@@ -767,7 +756,8 @@
   if (from_space_->HasAddress(obj)) {
     // Returns either the forwarding address or null.
     return GetForwardingAddressInFromSpace(obj);
-  } else if (collect_from_space_only_ || immune_region_.ContainsObject(obj) ||
+  } else if (collect_from_space_only_ ||
+             immune_spaces_.IsInImmuneRegion(obj) ||
              to_space_->HasAddress(obj)) {
     return obj;  // Already forwarded, must be marked.
   }
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index b9246ca..694e536 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -25,7 +25,7 @@
 #include "garbage_collector.h"
 #include "gc_root.h"
 #include "gc/accounting/heap_bitmap.h"
-#include "immune_region.h"
+#include "immune_spaces.h"
 #include "mirror/object_reference.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -135,7 +135,9 @@
       REQUIRES(Locks::mutator_lock_);
 
   // Sweeps unmarked objects to complete the garbage collection.
-  virtual void Sweep(bool swap_bitmaps) REQUIRES(Locks::heap_bitmap_lock_);
+  virtual void Sweep(bool swap_bitmaps)
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Sweeps unmarked objects to complete the garbage collection.
   void SweepLargeObjects(bool swap_bitmaps) REQUIRES(Locks::heap_bitmap_lock_);
@@ -201,8 +203,8 @@
   // object.
   accounting::ObjectStack* mark_stack_;
 
-  // Immune region, every object inside the immune region is assumed to be marked.
-  ImmuneRegion immune_region_;
+  // Every object inside the immune spaces is assumed to be marked.
+  ImmuneSpaces immune_spaces_;
 
   // If true, the large object space is immune.
   bool is_large_object_space_immune_;
@@ -270,7 +272,9 @@
   bool swap_semi_spaces_;
 
  private:
-  friend class BitmapSetSlowPathVisitor;
+  class BitmapSetSlowPathVisitor;
+  class MarkObjectVisitor;
+  class VerifyNoFromSpaceReferencesVisitor;
   DISALLOW_IMPLICIT_CONSTRUCTORS(SemiSpace);
 };
 
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index 416510d..7899a7c 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -34,7 +34,7 @@
   kCollectorTypeSS,
   // A generational variant of kCollectorTypeSS.
   kCollectorTypeGSS,
-  // Mark compact colector.
+  // Mark compact collector.
   kCollectorTypeMC,
   // Heap trimming collector, doesn't do any actual collecting.
   kCollectorTypeHeapTrim,
@@ -42,9 +42,17 @@
   kCollectorTypeCC,
   // Instrumentation critical section fake collector.
   kCollectorTypeInstrumentation,
+  // Fake collector for adding or removing application image spaces.
+  kCollectorTypeAddRemoveAppImageSpace,
+  // Fake collector used to implement exclusion between GC and debugger.
+  kCollectorTypeDebugger,
   // A homogeneous space compaction collector used in background transition
   // when both foreground and background collector are CMS.
   kCollectorTypeHomogeneousSpaceCompact,
+  // Class linker fake collector.
+  kCollectorTypeClassLinker,
+  // JIT Code cache fake collector.
+  kCollectorTypeJitCodeCache,
 };
 std::ostream& operator<<(std::ostream& os, const CollectorType& collector_type);
 
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index 84243df..1d377a4 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -34,6 +34,10 @@
     case kGcCauseHomogeneousSpaceCompact: return "HomogeneousSpaceCompact";
     case kGcCauseTrim: return "HeapTrim";
     case kGcCauseInstrumentation: return "Instrumentation";
+    case kGcCauseAddRemoveAppImageSpace: return "AddRemoveAppImageSpace";
+    case kGcCauseDebugger: return "Debugger";
+    case kGcCauseClassLinker: return "ClassLinker";
+    case kGcCauseJitCodeCache: return "JitCodeCache";
     default:
       LOG(FATAL) << "Unreachable";
       UNREACHABLE();
diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h
index 34c7766..4348a41 100644
--- a/runtime/gc/gc_cause.h
+++ b/runtime/gc/gc_cause.h
@@ -41,8 +41,16 @@
   kGcCauseTrim,
   // Not a real GC cause, used to implement exclusion between GC and instrumentation.
   kGcCauseInstrumentation,
+  // Not a real GC cause, used to add or remove app image spaces.
+  kGcCauseAddRemoveAppImageSpace,
+  // Not a real GC cause, used to implement exclusion between GC and debugger.
+  kGcCauseDebugger,
   // GC triggered for background transition when both foreground and background collector are CMS.
   kGcCauseHomogeneousSpaceCompact,
+  // Class linker cause, used to guard filling art methods with special values.
+  kGcCauseClassLinker,
+  // Not a real GC cause, used to implement exclusion between code cache metadata and GC.
+  kGcCauseJitCodeCache,
 };
 
 const char* PrettyCause(GcCause cause);
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index d1ab587..b0ca18e 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -49,6 +49,7 @@
     // done in the runnable state where suspension is expected.
     CHECK_EQ(self->GetState(), kRunnable);
     self->AssertThreadSuspensionIsAllowable();
+    self->AssertNoPendingException();
   }
   // Need to check that we arent the large object allocator since the large object allocation code
   // path this function. If we didn't check we would have an infinite loop.
@@ -109,16 +110,25 @@
     obj = TryToAllocate<kInstrumented, false>(self, allocator, byte_count, &bytes_allocated,
                                               &usable_size, &bytes_tl_bulk_allocated);
     if (UNLIKELY(obj == nullptr)) {
-      bool is_current_allocator = allocator == GetCurrentAllocator();
-      obj = AllocateInternalWithGc(self, allocator, byte_count, &bytes_allocated, &usable_size,
+      // AllocateInternalWithGc can cause thread suspension, if someone instruments the entrypoints
+      // or changes the allocator in a suspend point here, we need to retry the allocation.
+      obj = AllocateInternalWithGc(self,
+                                   allocator,
+                                   kInstrumented,
+                                   byte_count,
+                                   &bytes_allocated,
+                                   &usable_size,
                                    &bytes_tl_bulk_allocated, &klass);
       if (obj == nullptr) {
-        bool after_is_current_allocator = allocator == GetCurrentAllocator();
-        // If there is a pending exception, fail the allocation right away since the next one
-        // could cause OOM and abort the runtime.
-        if (!self->IsExceptionPending() && is_current_allocator && !after_is_current_allocator) {
-          // If the allocator changed, we need to restart the allocation.
-          return AllocObject<kInstrumented>(self, klass, byte_count, pre_fence_visitor);
+        // The only way that we can get a null return if there is no pending exception is if the
+        // allocator or instrumentation changed.
+        if (!self->IsExceptionPending()) {
+          // AllocObject will pick up the new allocator type, and instrumented as true is the safe
+          // default.
+          return AllocObject</*kInstrumented*/true>(self,
+                                                    klass,
+                                                    byte_count,
+                                                    pre_fence_visitor);
         }
         return nullptr;
       }
@@ -145,9 +155,9 @@
       WriteBarrierField(obj, mirror::Object::ClassOffset(), klass);
     }
     pre_fence_visitor(obj, usable_size);
+    QuasiAtomic::ThreadFenceForConstructor();
     new_num_bytes_allocated = static_cast<size_t>(
-        num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_tl_bulk_allocated))
-        + bytes_tl_bulk_allocated;
+        num_bytes_allocated_.FetchAndAddRelaxed(bytes_tl_bulk_allocated)) + bytes_tl_bulk_allocated;
   }
   if (kIsDebugBuild && Runtime::Current()->IsStarted()) {
     CHECK_LE(obj->SizeOf(), usable_size);
@@ -165,17 +175,19 @@
   } else {
     DCHECK(!Runtime::Current()->HasStatsEnabled());
   }
-  if (AllocatorHasAllocationStack(allocator)) {
-    PushOnAllocationStack(self, &obj);
-  }
   if (kInstrumented) {
     if (IsAllocTrackingEnabled()) {
-      // Use obj->GetClass() instead of klass, because PushOnAllocationStack() could move klass
-      AllocRecordObjectMap::RecordAllocation(self, obj, obj->GetClass(), bytes_allocated);
+      // allocation_records_ is not null since it never becomes null after allocation tracking is
+      // enabled.
+      DCHECK(allocation_records_ != nullptr);
+      allocation_records_->RecordAllocation(self, &obj, bytes_allocated);
     }
   } else {
     DCHECK(!IsAllocTrackingEnabled());
   }
+  if (AllocatorHasAllocationStack(allocator)) {
+    PushOnAllocationStack(self, &obj);
+  }
   if (kInstrumented) {
     if (gc_stress_mode_) {
       CheckGcStressMode(self, &obj);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 657fcb5..638c1d8 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -16,9 +16,6 @@
 
 #include "heap.h"
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
-#include <cutils/trace.h>
-
 #include <limits>
 #include <memory>
 #include <unwind.h>  // For GC verification.
@@ -26,9 +23,11 @@
 
 #include "art_field-inl.h"
 #include "base/allocator.h"
+#include "base/arena_allocator.h"
 #include "base/dumpable.h"
 #include "base/histogram-inl.h"
 #include "base/stl_util.h"
+#include "base/systrace.h"
 #include "base/time_utils.h"
 #include "common_throws.h"
 #include "cutils/sched_policy.h"
@@ -60,6 +59,8 @@
 #include "heap-inl.h"
 #include "image.h"
 #include "intern_table.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
@@ -113,6 +114,21 @@
 // timeout on how long we wait for finalizers to run. b/21544853
 static constexpr uint64_t kNativeAllocationFinalizeTimeout = MsToNs(250u);
 
+// For deterministic compilation, we need the heap to be at a well-known address.
+static constexpr uint32_t kAllocSpaceBeginForDeterministicAoT = 0x40000000;
+// Dump the rosalloc stats on SIGQUIT.
+static constexpr bool kDumpRosAllocStatsOnSigQuit = false;
+
+static constexpr size_t kNativeAllocationHistogramBuckets = 16;
+
+// Extra added to the heap growth multiplier. Used to adjust the GC ergonomics for the read barrier
+// config.
+static constexpr double kExtraHeapGrowthMultiplier = kUseReadBarrier ? 1.0 : 0.0;
+
+static inline bool CareAboutPauseTimes() {
+  return Runtime::Current()->InJankPerceptibleProcessState();
+}
+
 Heap::Heap(size_t initial_size,
            size_t growth_limit,
            size_t min_free,
@@ -141,6 +157,7 @@
            bool verify_pre_sweeping_rosalloc,
            bool verify_post_gc_rosalloc,
            bool gc_stress_mode,
+           bool measure_gc_performance,
            bool use_homogeneous_space_compaction_for_oom,
            uint64_t min_interval_homogeneous_space_compaction_by_oom)
     : non_moving_space_(nullptr),
@@ -171,13 +188,16 @@
       max_allowed_footprint_(initial_size),
       native_footprint_gc_watermark_(initial_size),
       native_need_to_run_finalization_(false),
-      // Initially assume we perceive jank in case the process state is never updated.
-      process_state_(kProcessStateJankPerceptible),
       concurrent_start_bytes_(std::numeric_limits<size_t>::max()),
       total_bytes_freed_ever_(0),
       total_objects_freed_ever_(0),
       num_bytes_allocated_(0),
       native_bytes_allocated_(0),
+      native_histogram_lock_("Native allocation lock"),
+      native_allocation_histogram_("Native allocation sizes",
+                                   1U,
+                                   kNativeAllocationHistogramBuckets),
+      native_free_histogram_("Native free sizes", 1U, kNativeAllocationHistogramBuckets),
       num_bytes_freed_revoke_(0),
       verify_missing_card_marks_(false),
       verify_system_weaks_(false),
@@ -205,10 +225,14 @@
       min_free_(min_free),
       max_free_(max_free),
       target_utilization_(target_utilization),
-      foreground_heap_growth_multiplier_(foreground_heap_growth_multiplier),
+      foreground_heap_growth_multiplier_(
+          foreground_heap_growth_multiplier + kExtraHeapGrowthMultiplier),
       total_wait_time_(0),
       verify_object_mode_(kVerifyObjectModeDisabled),
       disable_moving_gc_count_(0),
+      semi_space_collector_(nullptr),
+      mark_compact_collector_(nullptr),
+      concurrent_copying_collector_(nullptr),
       is_running_on_memory_tool_(Runtime::Current()->IsRunningOnMemoryTool()),
       use_tlab_(use_tlab),
       main_space_backup_(nullptr),
@@ -236,6 +260,8 @@
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
   }
+  CHECK_GE(large_object_threshold, kMinLargeObjectThreshold);
+  ScopedTrace trace(__FUNCTION__);
   Runtime* const runtime = Runtime::Current();
   // If we aren't the zygote, switch to the default non zygote allocator. This may update the
   // entrypoints.
@@ -258,22 +284,66 @@
     CHECK_GE(300 * MB, non_moving_space_capacity);
     requested_alloc_space_begin = reinterpret_cast<uint8_t*>(300 * MB) - non_moving_space_capacity;
   }
+
+  // Load image space(s).
   if (!image_file_name.empty()) {
-    ATRACE_BEGIN("ImageSpace::Create");
-    std::string error_msg;
-    auto* image_space = space::ImageSpace::Create(image_file_name.c_str(), image_instruction_set,
-                                                  &error_msg);
-    ATRACE_END();
-    if (image_space != nullptr) {
-      AddSpace(image_space);
-      // Oat files referenced by image files immediately follow them in memory, ensure alloc space
-      // isn't going to get in the middle
-      uint8_t* oat_file_end_addr = image_space->GetImageHeader().GetOatFileEnd();
-      CHECK_GT(oat_file_end_addr, image_space->End());
-      requested_alloc_space_begin = AlignUp(oat_file_end_addr, kPageSize);
-    } else {
-      LOG(ERROR) << "Could not create image space with image file '" << image_file_name << "'. "
-                   << "Attempting to fall back to imageless running. Error was: " << error_msg;
+    // For code reuse, handle this like a work queue.
+    std::vector<std::string> image_file_names;
+    image_file_names.push_back(image_file_name);
+    // The loaded spaces. Secondary images may fail to load, in which case we need to remove
+    // already added spaces.
+    std::vector<space::Space*> added_image_spaces;
+    uint8_t* const original_requested_alloc_space_begin = requested_alloc_space_begin;
+    for (size_t index = 0; index < image_file_names.size(); ++index) {
+      std::string& image_name = image_file_names[index];
+      std::string error_msg;
+      std::unique_ptr<space::ImageSpace> boot_image_space_uptr = space::ImageSpace::CreateBootImage(
+          image_name.c_str(),
+          image_instruction_set,
+          index > 0,
+          &error_msg);
+      if (boot_image_space_uptr != nullptr) {
+        space::ImageSpace* boot_image_space = boot_image_space_uptr.release();
+        AddSpace(boot_image_space);
+        added_image_spaces.push_back(boot_image_space);
+        // Oat files referenced by image files immediately follow them in memory, ensure alloc space
+        // isn't going to get in the middle
+        uint8_t* oat_file_end_addr = boot_image_space->GetImageHeader().GetOatFileEnd();
+        CHECK_GT(oat_file_end_addr, boot_image_space->End());
+        requested_alloc_space_begin = AlignUp(oat_file_end_addr, kPageSize);
+        boot_image_spaces_.push_back(boot_image_space);
+
+        if (index == 0) {
+          // If this was the first space, check whether there are more images to load.
+          const OatFile* boot_oat_file = boot_image_space->GetOatFile();
+          if (boot_oat_file == nullptr) {
+            continue;
+          }
+
+          const OatHeader& boot_oat_header = boot_oat_file->GetOatHeader();
+          const char* boot_classpath =
+              boot_oat_header.GetStoreValueByKey(OatHeader::kBootClassPathKey);
+          if (boot_classpath == nullptr) {
+            continue;
+          }
+
+          space::ImageSpace::CreateMultiImageLocations(image_file_name,
+                                                       boot_classpath,
+                                                       &image_file_names);
+        }
+      } else {
+        LOG(ERROR) << "Could not create image space with image file '" << image_file_name << "'. "
+            << "Attempting to fall back to imageless running. Error was: " << error_msg
+            << "\nAttempted image: " << image_name;
+        // Remove already loaded spaces.
+        for (space::Space* loaded_space : added_image_spaces) {
+          RemoveSpace(loaded_space);
+          delete loaded_space;
+        }
+        boot_image_spaces_.clear();
+        requested_alloc_space_begin = original_requested_alloc_space_begin;
+        break;
+      }
     }
   }
   /*
@@ -304,19 +374,28 @@
   bool separate_non_moving_space = is_zygote ||
       support_homogeneous_space_compaction || IsMovingGc(foreground_collector_type_) ||
       IsMovingGc(background_collector_type_);
-  if (foreground_collector_type == kCollectorTypeGSS) {
+  if (foreground_collector_type_ == kCollectorTypeGSS) {
     separate_non_moving_space = false;
   }
   std::unique_ptr<MemMap> main_mem_map_1;
   std::unique_ptr<MemMap> main_mem_map_2;
+
+  // Gross hack to make dex2oat deterministic.
+  if (foreground_collector_type_ == kCollectorTypeMS &&
+      requested_alloc_space_begin == nullptr &&
+      Runtime::Current()->IsAotCompiler()) {
+    // Currently only enabled for MS collector since that is what the deterministic dex2oat uses.
+    // b/26849108
+    requested_alloc_space_begin = reinterpret_cast<uint8_t*>(kAllocSpaceBeginForDeterministicAoT);
+  }
   uint8_t* request_begin = requested_alloc_space_begin;
   if (request_begin != nullptr && separate_non_moving_space) {
     request_begin += non_moving_space_capacity;
   }
   std::string error_str;
   std::unique_ptr<MemMap> non_moving_space_mem_map;
-  ATRACE_BEGIN("Create heap maps");
   if (separate_non_moving_space) {
+    ScopedTrace trace2("Create separate non moving space");
     // If we are the zygote, the non moving space becomes the zygote space when we run
     // PreZygoteFork the first time. In this case, call the map "zygote space" since we can't
     // rename the mem map later.
@@ -333,12 +412,16 @@
   }
   // Attempt to create 2 mem maps at or after the requested begin.
   if (foreground_collector_type_ != kCollectorTypeCC) {
-    if (separate_non_moving_space) {
-      main_mem_map_1.reset(MapAnonymousPreferredAddress(kMemMapSpaceName[0], request_begin,
-                                                        capacity_, &error_str));
+    ScopedTrace trace2("Create main mem map");
+    if (separate_non_moving_space || !is_zygote) {
+      main_mem_map_1.reset(MapAnonymousPreferredAddress(kMemMapSpaceName[0],
+                                                        request_begin,
+                                                        capacity_,
+                                                        &error_str));
     } else {
-      // If no separate non-moving space, the main space must come
-      // right after the image space to avoid a gap.
+      // If no separate non-moving space and we are the zygote, the main space must come right
+      // after the image space to avoid a gap. This is required since we want the zygote space to
+      // be adjacent to the image space.
       main_mem_map_1.reset(MemMap::MapAnonymous(kMemMapSpaceName[0], request_begin, capacity_,
                                                 PROT_READ | PROT_WRITE, true, false,
                                                 &error_str));
@@ -348,14 +431,15 @@
   if (support_homogeneous_space_compaction ||
       background_collector_type_ == kCollectorTypeSS ||
       foreground_collector_type_ == kCollectorTypeSS) {
+    ScopedTrace trace2("Create main mem map 2");
     main_mem_map_2.reset(MapAnonymousPreferredAddress(kMemMapSpaceName[1], main_mem_map_1->End(),
                                                       capacity_, &error_str));
     CHECK(main_mem_map_2.get() != nullptr) << error_str;
   }
-  ATRACE_END();
-  ATRACE_BEGIN("Create spaces");
+
   // Create the non moving space first so that bitmaps don't take up the address range.
   if (separate_non_moving_space) {
+    ScopedTrace trace2("Add non moving space");
     // Non moving space is always dlmalloc since we currently don't have support for multiple
     // active rosalloc spaces.
     const size_t size = non_moving_space_mem_map->Size();
@@ -369,7 +453,7 @@
   }
   // Create other spaces based on whether or not we have a moving GC.
   if (foreground_collector_type_ == kCollectorTypeCC) {
-    region_space_ = space::RegionSpace::Create("Region space", capacity_ * 2, request_begin);
+    region_space_ = space::RegionSpace::Create("main space (region space)", capacity_ * 2, request_begin);
     AddSpace(region_space_);
   } else if (IsMovingGc(foreground_collector_type_) &&
       foreground_collector_type_ != kCollectorTypeGSS) {
@@ -443,23 +527,30 @@
   if (main_space_backup_.get() != nullptr) {
     RemoveSpace(main_space_backup_.get());
   }
-  ATRACE_END();
   // Allocate the card table.
-  ATRACE_BEGIN("Create card table");
-  card_table_.reset(accounting::CardTable::Create(heap_begin, heap_capacity));
+  // We currently don't support dynamically resizing the card table.
+  // Since we don't know where in the low_4gb the app image will be located, make the card table
+  // cover the whole low_4gb. TODO: Extend the card table in AddSpace.
+  UNUSED(heap_capacity);
+  // Start at 64 KB, we can be sure there are no spaces mapped this low since the address range is
+  // reserved by the kernel.
+  static constexpr size_t kMinHeapAddress = 4 * KB;
+  card_table_.reset(accounting::CardTable::Create(reinterpret_cast<uint8_t*>(kMinHeapAddress),
+                                                  4 * GB - kMinHeapAddress));
   CHECK(card_table_.get() != nullptr) << "Failed to create card table";
-  ATRACE_END();
   if (foreground_collector_type_ == kCollectorTypeCC && kUseTableLookupReadBarrier) {
     rb_table_.reset(new accounting::ReadBarrierTable());
     DCHECK(rb_table_->IsAllCleared());
   }
-  if (GetImageSpace() != nullptr) {
+  if (HasBootImageSpace()) {
     // Don't add the image mod union table if we are running without an image, this can crash if
     // we use the CardCache implementation.
-    accounting::ModUnionTable* mod_union_table = new accounting::ModUnionTableToZygoteAllocspace(
-        "Image mod-union table", this, GetImageSpace());
-    CHECK(mod_union_table != nullptr) << "Failed to create image mod-union table";
-    AddModUnionTable(mod_union_table);
+    for (space::ImageSpace* image_space : GetBootImageSpaces()) {
+      accounting::ModUnionTable* mod_union_table = new accounting::ModUnionTableToZygoteAllocspace(
+          "Image mod-union table", this, image_space);
+      CHECK(mod_union_table != nullptr) << "Failed to create image mod-union table";
+      AddModUnionTable(mod_union_table);
+    }
   }
   if (collector::SemiSpace::kUseRememberedSet && non_moving_space_ != main_space_) {
     accounting::RememberedSet* non_moving_space_rem_set =
@@ -514,7 +605,9 @@
       garbage_collectors_.push_back(semi_space_collector_);
     }
     if (MayUseCollector(kCollectorTypeCC)) {
-      concurrent_copying_collector_ = new collector::ConcurrentCopying(this);
+      concurrent_copying_collector_ = new collector::ConcurrentCopying(this,
+                                                                       "",
+                                                                       measure_gc_performance);
       garbage_collectors_.push_back(concurrent_copying_collector_);
     }
     if (MayUseCollector(kCollectorTypeMC)) {
@@ -522,13 +615,19 @@
       garbage_collectors_.push_back(mark_compact_collector_);
     }
   }
-  if (GetImageSpace() != nullptr && non_moving_space_ != nullptr &&
+  if (!GetBootImageSpaces().empty() && non_moving_space_ != nullptr &&
       (is_zygote || separate_non_moving_space || foreground_collector_type_ == kCollectorTypeGSS)) {
     // Check that there's no gap between the image space and the non moving space so that the
     // immune region won't break (eg. due to a large object allocated in the gap). This is only
     // required when we're the zygote or using GSS.
-    bool no_gap = MemMap::CheckNoGaps(GetImageSpace()->GetMemMap(),
-                                      non_moving_space_->GetMemMap());
+    // Space with smallest Begin().
+    space::ImageSpace* first_space = nullptr;
+    for (space::ImageSpace* space : boot_image_spaces_) {
+      if (first_space == nullptr || space->Begin() < first_space->Begin()) {
+        first_space = space;
+      }
+    }
+    bool no_gap = MemMap::CheckNoGaps(first_space->GetMemMap(), non_moving_space_->GetMemMap());
     if (!no_gap) {
       PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
       MemMap::DumpMaps(LOG(ERROR), true);
@@ -739,6 +838,7 @@
   if (!Runtime::Current()->IsAotCompiler()) {
     return false;
   }
+  ScopedObjectAccess soa(Thread::Current());
   for (const auto& space : continuous_spaces_) {
     if (space->IsImageSpace() || space->IsZygoteSpace()) {
       return false;
@@ -747,15 +847,6 @@
   return true;
 }
 
-bool Heap::HasImageSpace() const {
-  for (const auto& space : continuous_spaces_) {
-    if (space->IsImageSpace()) {
-      return true;
-    }
-  }
-  return false;
-}
-
 void Heap::IncrementDisableMovingGC(Thread* self) {
   // Need to do this holding the lock to prevent races where the GC is about to run / running when
   // we attempt to disable it.
@@ -776,13 +867,24 @@
 void Heap::IncrementDisableThreadFlip(Thread* self) {
   // Supposed to be called by mutators. If thread_flip_running_ is true, block. Otherwise, go ahead.
   CHECK(kUseReadBarrier);
+  bool is_nested = self->GetDisableThreadFlipCount() > 0;
+  self->IncrementDisableThreadFlipCount();
+  if (is_nested) {
+    // If this is a nested JNI critical section enter, we don't need to wait or increment the global
+    // counter. The global counter is incremented only once for a thread for the outermost enter.
+    return;
+  }
   ScopedThreadStateChange tsc(self, kWaitingForGcThreadFlip);
   MutexLock mu(self, *thread_flip_lock_);
   bool has_waited = false;
   uint64_t wait_start = NanoTime();
-  while (thread_flip_running_) {
-    has_waited = true;
-    thread_flip_cond_->Wait(self);
+  if (thread_flip_running_) {
+    TimingLogger::ScopedTiming split("IncrementDisableThreadFlip",
+                                     GetCurrentGcIteration()->GetTimings());
+    while (thread_flip_running_) {
+      has_waited = true;
+      thread_flip_cond_->Wait(self);
+    }
   }
   ++disable_thread_flip_count_;
   if (has_waited) {
@@ -798,10 +900,20 @@
   // Supposed to be called by mutators. Decrement disable_thread_flip_count_ and potentially wake up
   // the GC waiting before doing a thread flip.
   CHECK(kUseReadBarrier);
+  self->DecrementDisableThreadFlipCount();
+  bool is_outermost = self->GetDisableThreadFlipCount() == 0;
+  if (!is_outermost) {
+    // If this is not an outermost JNI critical exit, we don't need to decrement the global counter.
+    // The global counter is decremented only once for a thread for the outermost exit.
+    return;
+  }
   MutexLock mu(self, *thread_flip_lock_);
   CHECK_GT(disable_thread_flip_count_, 0U);
   --disable_thread_flip_count_;
-  thread_flip_cond_->Broadcast(self);
+  if (disable_thread_flip_count_ == 0) {
+    // Potentially notify the GC thread blocking to begin a thread flip.
+    thread_flip_cond_->Broadcast(self);
+  }
 }
 
 void Heap::ThreadFlipBegin(Thread* self) {
@@ -813,7 +925,8 @@
   bool has_waited = false;
   uint64_t wait_start = NanoTime();
   CHECK(!thread_flip_running_);
-  // Set this to true before waiting so that a new mutator entering a JNI critical won't starve GC.
+  // Set this to true before waiting so that frequent JNI critical enter/exits won't starve
+  // GC. This like a writer preference of a reader-writer lock.
   thread_flip_running_ = true;
   while (disable_thread_flip_count_ > 0) {
     has_waited = true;
@@ -835,20 +948,22 @@
   MutexLock mu(self, *thread_flip_lock_);
   CHECK(thread_flip_running_);
   thread_flip_running_ = false;
+  // Potentially notify mutator threads blocking to enter a JNI critical section.
   thread_flip_cond_->Broadcast(self);
 }
 
-void Heap::UpdateProcessState(ProcessState process_state) {
-  if (process_state_ != process_state) {
-    process_state_ = process_state;
+void Heap::UpdateProcessState(ProcessState old_process_state, ProcessState new_process_state) {
+  if (old_process_state != new_process_state) {
+    const bool jank_perceptible = new_process_state == kProcessStateJankPerceptible;
     for (size_t i = 1; i <= kCollectorTransitionStressIterations; ++i) {
       // Start at index 1 to avoid "is always false" warning.
       // Have iteration 1 always transition the collector.
-      TransitionCollector((((i & 1) == 1) == (process_state_ == kProcessStateJankPerceptible))
-                          ? foreground_collector_type_ : background_collector_type_);
+      TransitionCollector((((i & 1) == 1) == jank_perceptible)
+          ? foreground_collector_type_
+          : background_collector_type_);
       usleep(kCollectorTransitionStressWait);
     }
-    if (process_state_ == kProcessStateJankPerceptible) {
+    if (jank_perceptible) {
       // Transition back to foreground right away to prevent jank.
       RequestCollectorTransition(foreground_collector_type_, 0);
     } else {
@@ -1090,6 +1205,24 @@
     }
   }
 
+  if (kDumpRosAllocStatsOnSigQuit && rosalloc_space_ != nullptr) {
+    rosalloc_space_->DumpStats(os);
+  }
+
+  {
+    MutexLock mu(Thread::Current(), native_histogram_lock_);
+    if (native_allocation_histogram_.SampleSize() > 0u) {
+      os << "Histogram of native allocation ";
+      native_allocation_histogram_.DumpBins(os);
+      os << " bucket size " << native_allocation_histogram_.BucketWidth() << "\n";
+    }
+    if (native_free_histogram_.SampleSize() > 0u) {
+      os << "Histogram of native free ";
+      native_free_histogram_.DumpBins(os);
+      os << " bucket size " << native_free_histogram_.BucketWidth() << "\n";
+    }
+  }
+
   BaseMutex::DumpAll(os);
 }
 
@@ -1163,6 +1296,7 @@
   STLDeleteElements(&continuous_spaces_);
   STLDeleteElements(&discontinuous_spaces_);
   delete gc_complete_lock_;
+  delete thread_flip_lock_;
   delete pending_task_lock_;
   delete backtrace_lock_;
   if (unique_backtrace_count_.LoadRelaxed() != 0 || seen_backtrace_count_.LoadRelaxed() != 0) {
@@ -1207,16 +1341,14 @@
   return FindDiscontinuousSpaceFromObject(obj, fail_ok);
 }
 
-space::ImageSpace* Heap::GetImageSpace() const {
-  for (const auto& space : continuous_spaces_) {
-    if (space->IsImageSpace()) {
-      return space->AsImageSpace();
-    }
-  }
-  return nullptr;
-}
-
 void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) {
+  // If we're in a stack overflow, do not create a new exception. It would require running the
+  // constructor, which will of course still be in a stack overflow.
+  if (self->IsHandlingStackOverflow()) {
+    self->SetException(Runtime::Current()->GetPreAllocatedOutOfMemoryError());
+    return;
+  }
+
   std::ostringstream oss;
   size_t total_bytes_free = GetFreeMemory();
   oss << "Failed to allocate a " << byte_count << " byte allocation with " << total_bytes_free
@@ -1258,22 +1390,21 @@
 }
 
 void Heap::Trim(Thread* self) {
+  Runtime* const runtime = Runtime::Current();
   if (!CareAboutPauseTimes()) {
-    ATRACE_BEGIN("Deflating monitors");
     // Deflate the monitors, this can cause a pause but shouldn't matter since we don't care
     // about pauses.
-    Runtime* runtime = Runtime::Current();
-    {
-      ScopedSuspendAll ssa(__FUNCTION__);
-      uint64_t start_time = NanoTime();
-      size_t count = runtime->GetMonitorList()->DeflateMonitors();
-      VLOG(heap) << "Deflating " << count << " monitors took "
-          << PrettyDuration(NanoTime() - start_time);
-    }
-    ATRACE_END();
+    ScopedTrace trace("Deflating monitors");
+    ScopedSuspendAll ssa(__FUNCTION__);
+    uint64_t start_time = NanoTime();
+    size_t count = runtime->GetMonitorList()->DeflateMonitors();
+    VLOG(heap) << "Deflating " << count << " monitors took "
+        << PrettyDuration(NanoTime() - start_time);
   }
   TrimIndirectReferenceTables(self);
   TrimSpaces(self);
+  // Trim arenas that may have been used by JIT or verifier.
+  runtime->GetArenaPool()->TrimMaps();
 }
 
 class TrimIndirectReferenceTableClosure : public Closure {
@@ -1281,14 +1412,10 @@
   explicit TrimIndirectReferenceTableClosure(Barrier* barrier) : barrier_(barrier) {
   }
   virtual void Run(Thread* thread) OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
-    ATRACE_BEGIN("Trimming reference table");
     thread->GetJniEnv()->locals.Trim();
-    ATRACE_END();
     // If thread is a running mutator, then act on behalf of the trim thread.
     // See the code in ThreadList::RunCheckpoint.
-    if (thread->GetState() == kRunnable) {
-      barrier_->Pass(Thread::Current());
-    }
+    barrier_->Pass(Thread::Current());
   }
 
  private:
@@ -1297,7 +1424,7 @@
 
 void Heap::TrimIndirectReferenceTables(Thread* self) {
   ScopedObjectAccess soa(self);
-  ATRACE_BEGIN(__FUNCTION__);
+  ScopedTrace trace(__PRETTY_FUNCTION__);
   JavaVMExt* vm = soa.Vm();
   // Trim globals indirect reference table.
   vm->TrimGlobals();
@@ -1309,7 +1436,6 @@
   if (barrier_count != 0) {
     barrier.Increment(self, barrier_count);
   }
-  ATRACE_END();
 }
 
 void Heap::StartGC(Thread* self, GcCause cause, CollectorType collector_type) {
@@ -1328,21 +1454,24 @@
     // trimming.
     StartGC(self, kGcCauseTrim, kCollectorTypeHeapTrim);
   }
-  ATRACE_BEGIN(__FUNCTION__);
+  ScopedTrace trace(__PRETTY_FUNCTION__);
   const uint64_t start_ns = NanoTime();
   // Trim the managed spaces.
   uint64_t total_alloc_space_allocated = 0;
   uint64_t total_alloc_space_size = 0;
   uint64_t managed_reclaimed = 0;
-  for (const auto& space : continuous_spaces_) {
-    if (space->IsMallocSpace()) {
-      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
-      if (malloc_space->IsRosAllocSpace() || !CareAboutPauseTimes()) {
-        // Don't trim dlmalloc spaces if we care about pauses since this can hold the space lock
-        // for a long period of time.
-        managed_reclaimed += malloc_space->Trim();
+  {
+    ScopedObjectAccess soa(self);
+    for (const auto& space : continuous_spaces_) {
+      if (space->IsMallocSpace()) {
+        gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+        if (malloc_space->IsRosAllocSpace() || !CareAboutPauseTimes()) {
+          // Don't trim dlmalloc spaces if we care about pauses since this can hold the space lock
+          // for a long period of time.
+          managed_reclaimed += malloc_space->Trim();
+        }
+        total_alloc_space_size += malloc_space->Size();
       }
-      total_alloc_space_size += malloc_space->Size();
     }
   }
   total_alloc_space_allocated = GetBytesAllocated();
@@ -1360,29 +1489,10 @@
   uint64_t gc_heap_end_ns = NanoTime();
   // We never move things in the native heap, so we can finish the GC at this point.
   FinishGC(self, collector::kGcTypeNone);
-  size_t native_reclaimed = 0;
 
-#ifdef __ANDROID__
-  // Only trim the native heap if we don't care about pauses.
-  if (!CareAboutPauseTimes()) {
-#if defined(USE_DLMALLOC)
-    // Trim the native heap.
-    dlmalloc_trim(0);
-    dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
-#elif defined(USE_JEMALLOC)
-    // Jemalloc does it's own internal trimming.
-#else
-    UNIMPLEMENTED(WARNING) << "Add trimming support";
-#endif
-  }
-#endif  // __ANDROID__
-  uint64_t end_ns = NanoTime();
   VLOG(heap) << "Heap trim of managed (duration=" << PrettyDuration(gc_heap_end_ns - start_ns)
-      << ", advised=" << PrettySize(managed_reclaimed) << ") and native (duration="
-      << PrettyDuration(end_ns - gc_heap_end_ns) << ", advised=" << PrettySize(native_reclaimed)
-      << ") heaps. Managed heap utilization of " << static_cast<int>(100 * managed_utilization)
-      << "%.";
-  ATRACE_END();
+      << ", advised=" << PrettySize(managed_reclaimed) << ") heap. Managed heap utilization of "
+      << static_cast<int>(100 * managed_utilization) << "%.";
 }
 
 bool Heap::IsValidObjectAddress(const mirror::Object* obj) const {
@@ -1569,6 +1679,9 @@
 }
 
 space::RosAllocSpace* Heap::GetRosAllocSpace(gc::allocator::RosAlloc* rosalloc) const {
+  if (rosalloc_space_ != nullptr && rosalloc_space_->GetRosAlloc() == rosalloc) {
+    return rosalloc_space_;
+  }
   for (const auto& space : continuous_spaces_) {
     if (space->AsContinuousSpace()->IsRosAllocSpace()) {
       if (space->AsContinuousSpace()->AsRosAllocSpace()->GetRosAlloc() == rosalloc) {
@@ -1579,8 +1692,15 @@
   return nullptr;
 }
 
+static inline bool EntrypointsInstrumented() SHARED_REQUIRES(Locks::mutator_lock_) {
+  instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  return instrumentation != nullptr && instrumentation->AllocEntrypointsInstrumented();
+}
+
 mirror::Object* Heap::AllocateInternalWithGc(Thread* self,
                                              AllocatorType allocator,
+                                             bool instrumented,
                                              size_t alloc_size,
                                              size_t* bytes_allocated,
                                              size_t* usable_size,
@@ -1596,12 +1716,13 @@
   // The allocation failed. If the GC is running, block until it completes, and then retry the
   // allocation.
   collector::GcType last_gc = WaitForGcToComplete(kGcCauseForAlloc, self);
+  // If we were the default allocator but the allocator changed while we were suspended,
+  // abort the allocation.
+  if ((was_default_allocator && allocator != GetCurrentAllocator()) ||
+      (!instrumented && EntrypointsInstrumented())) {
+    return nullptr;
+  }
   if (last_gc != collector::kGcTypeNone) {
-    // If we were the default allocator but the allocator changed while we were suspended,
-    // abort the allocation.
-    if (was_default_allocator && allocator != GetCurrentAllocator()) {
-      return nullptr;
-    }
     // A GC was in progress and we blocked, retry allocation now that memory has been freed.
     mirror::Object* ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated,
                                                      usable_size, bytes_tl_bulk_allocated);
@@ -1613,7 +1734,8 @@
   collector::GcType tried_type = next_gc_type_;
   const bool gc_ran =
       CollectGarbageInternal(tried_type, kGcCauseForAlloc, false) != collector::kGcTypeNone;
-  if (was_default_allocator && allocator != GetCurrentAllocator()) {
+  if ((was_default_allocator && allocator != GetCurrentAllocator()) ||
+      (!instrumented && EntrypointsInstrumented())) {
     return nullptr;
   }
   if (gc_ran) {
@@ -1632,7 +1754,8 @@
     // Attempt to run the collector, if we succeed, re-try the allocation.
     const bool plan_gc_ran =
         CollectGarbageInternal(gc_type, kGcCauseForAlloc, false) != collector::kGcTypeNone;
-    if (was_default_allocator && allocator != GetCurrentAllocator()) {
+    if ((was_default_allocator && allocator != GetCurrentAllocator()) ||
+        (!instrumented && EntrypointsInstrumented())) {
       return nullptr;
     }
     if (plan_gc_ran) {
@@ -1661,7 +1784,8 @@
   // We don't need a WaitForGcToComplete here either.
   DCHECK(!gc_plan_.empty());
   CollectGarbageInternal(gc_plan_.back(), kGcCauseForAlloc, true);
-  if (was_default_allocator && allocator != GetCurrentAllocator()) {
+  if ((was_default_allocator && allocator != GetCurrentAllocator()) ||
+      (!instrumented && EntrypointsInstrumented())) {
     return nullptr;
   }
   ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size,
@@ -1677,6 +1801,11 @@
             min_interval_homogeneous_space_compaction_by_oom_) {
           last_time_homogeneous_space_compaction_by_oom_ = current_time;
           HomogeneousSpaceCompactResult result = PerformHomogeneousSpaceCompact();
+          // Thread suspension could have occurred.
+          if ((was_default_allocator && allocator != GetCurrentAllocator()) ||
+              (!instrumented && EntrypointsInstrumented())) {
+            return nullptr;
+          }
           switch (result) {
             case HomogeneousSpaceCompactResult::kSuccess:
               // If the allocation succeeded, we delayed an oom.
@@ -1717,6 +1846,11 @@
           // If we aren't out of memory then the OOM was probably from the non moving space being
           // full. Attempt to disable compaction and turn the main space into a non moving space.
           DisableMovingGc();
+          // Thread suspension could have occurred.
+          if ((was_default_allocator && allocator != GetCurrentAllocator()) ||
+              (!instrumented && EntrypointsInstrumented())) {
+            return nullptr;
+          }
           // If we are still a moving GC then something must have caused the transition to fail.
           if (IsMovingGc(collector_type_)) {
             MutexLock mu(self, *gc_complete_lock_);
@@ -2124,8 +2258,8 @@
   } else {
     saved_str = " expanded " + PrettySize(-delta_allocated);
   }
-  VLOG(heap) << "Heap transition to " << process_state_ << " took "
-      << PrettyDuration(duration) << saved_str;
+  VLOG(heap) << "Collector transition to " << collector_type << " took "
+             << PrettyDuration(duration) << saved_str;
 }
 
 void Heap::ChangeCollector(CollectorType collector_type) {
@@ -2309,6 +2443,9 @@
     // We still want to GC in case there is some unreachable non moving objects that could cause a
     // suboptimal bin packing when we compact the zygote space.
     CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false);
+    // Trim the pages at the end of the non moving space. Trim while not holding zygote lock since
+    // the trim process may require locking the mutator lock.
+    non_moving_space_->Trim();
   }
   Thread* self = Thread::Current();
   MutexLock mu(self, zygote_creation_lock_);
@@ -2316,11 +2453,9 @@
   if (HasZygoteSpace()) {
     return;
   }
-  Runtime::Current()->GetInternTable()->SwapPostZygoteWithPreZygote();
+  Runtime::Current()->GetInternTable()->AddNewTable();
   Runtime::Current()->GetClassLinker()->MoveClassTableToPreZygote();
   VLOG(heap) << "Starting PreZygoteFork";
-  // Trim the pages at the end of the non moving space.
-  non_moving_space_->Trim();
   // The end of the non-moving space may be protected, unprotect it so that we can copy the zygote
   // there.
   non_moving_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
@@ -2412,14 +2547,38 @@
   AddSpace(zygote_space_);
   non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
   AddSpace(non_moving_space_);
+  if (kUseBakerReadBarrier && gc::collector::ConcurrentCopying::kGrayDirtyImmuneObjects) {
+    // Treat all of the objects in the zygote as marked to avoid unnecessary dirty pages. This is
+    // safe since we mark all of the objects that may reference non immune objects as gray.
+    zygote_space_->GetLiveBitmap()->VisitMarkedRange(
+        reinterpret_cast<uintptr_t>(zygote_space_->Begin()),
+        reinterpret_cast<uintptr_t>(zygote_space_->Limit()),
+        [](mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
+      CHECK(obj->AtomicSetMarkBit(0, 1));
+    });
+  }
+
   // Create the zygote space mod union table.
   accounting::ModUnionTable* mod_union_table =
-      new accounting::ModUnionTableCardCache("zygote space mod-union table", this,
-                                             zygote_space_);
+      new accounting::ModUnionTableCardCache("zygote space mod-union table", this, zygote_space_);
   CHECK(mod_union_table != nullptr) << "Failed to create zygote space mod-union table";
-  // Set all the cards in the mod-union table since we don't know which objects contain references
-  // to large objects.
-  mod_union_table->SetCards();
+
+  if (collector_type_ != kCollectorTypeCC) {
+    // Set all the cards in the mod-union table since we don't know which objects contain references
+    // to large objects.
+    mod_union_table->SetCards();
+  } else {
+    // For CC we never collect zygote large objects. This means we do not need to set the cards for
+    // the zygote mod-union table and we can also clear all of the existing image mod-union tables.
+    // The existing mod-union tables are only for image spaces and may only reference zygote and
+    // image objects.
+    for (auto& pair : mod_union_tables_) {
+      CHECK(pair.first->IsImageSpace());
+      CHECK(!pair.first->AsImageSpace()->GetImageHeader().IsAppImage());
+      accounting::ModUnionTable* table = pair.second;
+      table->ClearTable();
+    }
+  }
   AddModUnionTable(mod_union_table);
   large_object_space_->SetAllLargeObjectsAsZygoteObjects(self);
   if (collector::SemiSpace::kUseRememberedSet) {
@@ -2569,6 +2728,10 @@
     }
     if (collector != mark_compact_collector_ && collector != concurrent_copying_collector_) {
       temp_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+      if (kIsDebugBuild) {
+        // Try to read each page of the memory map in case mprotect didn't work properly b/19894268.
+        temp_space_->GetMemMap()->TryReadable();
+      }
       CHECK(temp_space_->IsEmpty());
     }
     gc_type = collector::kGcTypeFull;  // TODO: Not hard code this in.
@@ -2585,6 +2748,13 @@
     // permanantly disabled. b/17942071
     concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
   }
+
+  // It's time to clear all inline caches, in case some classes can be unloaded.
+  if (((gc_type == collector::kGcTypeFull) || (gc_type == collector::kGcTypePartial)) &&
+      (runtime->GetJit() != nullptr)) {
+    runtime->GetJit()->GetCodeCache()->ClearGcRootsInInlineCaches(self);
+  }
+
   CHECK(collector != nullptr)
       << "Could not find garbage collector with collector_type="
       << static_cast<size_t>(collector_type_) << " and gc_type=" << gc_type;
@@ -3170,7 +3340,13 @@
     } else if (process_alloc_space_cards) {
       TimingLogger::ScopedTiming t2("AllocSpaceClearCards", timings);
       if (clear_alloc_space_cards) {
-        card_table_->ClearCardRange(space->Begin(), space->End());
+        uint8_t* end = space->End();
+        if (space->IsImageSpace()) {
+          // Image space end is the end of the mirror objects, it is not necessarily page or card
+          // aligned. Align up so that the check in ClearCardRange does not fail.
+          end = AlignUp(end, accounting::CardTable::kCardSize);
+        }
+        card_table_->ClearCardRange(space->Begin(), end);
       } else {
         // No mod union table for the AllocSpace. Age the cards so that the GC knows that these
         // cards were dirty before the GC started.
@@ -3331,11 +3507,10 @@
       running_collection_is_blocking_ = true;
       VLOG(gc) << "Waiting for a blocking GC " << cause;
     }
-    ATRACE_BEGIN("GC: Wait For Completion");
+    ScopedTrace trace("GC: Wait For Completion");
     // We must wait, change thread state then sleep on gc_complete_cond_;
     gc_complete_cond_->Wait(self);
     last_gc_type = last_gc_type_;
-    ATRACE_END();
   }
   uint64_t wait_time = NanoTime() - wait_start;
   total_wait_time_ += wait_time;
@@ -3495,7 +3670,8 @@
 
 void Heap::ClampGrowthLimit() {
   // Use heap bitmap lock to guard against races with BindLiveToMarkBitmap.
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  ScopedObjectAccess soa(Thread::Current());
+  WriterMutexLock mu(soa.Self(), *Locks::heap_bitmap_lock_);
   capacity_ = growth_limit_;
   for (const auto& space : continuous_spaces_) {
     if (space->IsMallocSpace()) {
@@ -3511,6 +3687,7 @@
 
 void Heap::ClearGrowthLimit() {
   growth_limit_ = capacity_;
+  ScopedObjectAccess soa(Thread::Current());
   for (const auto& space : continuous_spaces_) {
     if (space->IsMallocSpace()) {
       gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
@@ -3734,6 +3911,10 @@
 
 void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) {
   Thread* self = ThreadForEnv(env);
+  {
+    MutexLock mu(self, native_histogram_lock_);
+    native_allocation_histogram_.AddValue(bytes);
+  }
   if (native_need_to_run_finalization_) {
     RunFinalization(env, kNativeAllocationFinalizeTimeout);
     UpdateMaxNativeFootprint();
@@ -3778,6 +3959,10 @@
 
 void Heap::RegisterNativeFree(JNIEnv* env, size_t bytes) {
   size_t expected_size;
+  {
+    MutexLock mu(Thread::Current(), native_histogram_lock_);
+    native_free_histogram_.AddValue(bytes);
+  }
   do {
     expected_size = native_bytes_allocated_.LoadRelaxed();
     if (UNLIKELY(bytes > expected_size)) {
@@ -3862,31 +4047,31 @@
 
 void Heap::AllowNewAllocationRecords() const {
   CHECK(!kUseReadBarrier);
-  if (IsAllocTrackingEnabled()) {
-    MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
-    if (IsAllocTrackingEnabled()) {
-      GetAllocationRecords()->AllowNewAllocationRecords();
-    }
+  MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
+  AllocRecordObjectMap* allocation_records = GetAllocationRecords();
+  if (allocation_records != nullptr) {
+    allocation_records->AllowNewAllocationRecords();
   }
 }
 
 void Heap::DisallowNewAllocationRecords() const {
   CHECK(!kUseReadBarrier);
-  if (IsAllocTrackingEnabled()) {
-    MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
-    if (IsAllocTrackingEnabled()) {
-      GetAllocationRecords()->DisallowNewAllocationRecords();
-    }
+  MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
+  AllocRecordObjectMap* allocation_records = GetAllocationRecords();
+  if (allocation_records != nullptr) {
+    allocation_records->DisallowNewAllocationRecords();
   }
 }
 
 void Heap::BroadcastForNewAllocationRecords() const {
   CHECK(kUseReadBarrier);
-  if (IsAllocTrackingEnabled()) {
-    MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
-    if (IsAllocTrackingEnabled()) {
-      GetAllocationRecords()->BroadcastForNewAllocationRecords();
-    }
+  // Always broadcast without checking IsAllocTrackingEnabled() because IsAllocTrackingEnabled() may
+  // be set to false while some threads are waiting for system weak access in
+  // AllocRecordObjectMap::RecordAllocation() and we may fail to wake them up. b/27467554.
+  MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
+  AllocRecordObjectMap* allocation_records = GetAllocationRecords();
+  if (allocation_records != nullptr) {
+    allocation_records->BroadcastForNewAllocationRecords();
   }
 }
 
@@ -3965,5 +4150,52 @@
   gc_disabled_for_shutdown_ = true;
 }
 
+bool Heap::ObjectIsInBootImageSpace(mirror::Object* obj) const {
+  for (gc::space::ImageSpace* space : boot_image_spaces_) {
+    if (space->HasAddress(obj)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Heap::IsInBootImageOatFile(const void* p) const {
+  for (gc::space::ImageSpace* space : boot_image_spaces_) {
+    if (space->GetOatFile()->Contains(p)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void Heap::GetBootImagesSize(uint32_t* boot_image_begin,
+                             uint32_t* boot_image_end,
+                             uint32_t* boot_oat_begin,
+                             uint32_t* boot_oat_end) {
+  DCHECK(boot_image_begin != nullptr);
+  DCHECK(boot_image_end != nullptr);
+  DCHECK(boot_oat_begin != nullptr);
+  DCHECK(boot_oat_end != nullptr);
+  *boot_image_begin = 0u;
+  *boot_image_end = 0u;
+  *boot_oat_begin = 0u;
+  *boot_oat_end = 0u;
+  for (gc::space::ImageSpace* space_ : GetBootImageSpaces()) {
+    const uint32_t image_begin = PointerToLowMemUInt32(space_->Begin());
+    const uint32_t image_size = space_->GetImageHeader().GetImageSize();
+    if (*boot_image_begin == 0 || image_begin < *boot_image_begin) {
+      *boot_image_begin = image_begin;
+    }
+    *boot_image_end = std::max(*boot_image_end, image_begin + image_size);
+    const OatFile* boot_oat_file = space_->GetOatFile();
+    const uint32_t oat_begin = PointerToLowMemUInt32(boot_oat_file->Begin());
+    const uint32_t oat_size = boot_oat_file->Size();
+    if (*boot_oat_begin == 0 || oat_begin < *boot_oat_begin) {
+      *boot_oat_begin = oat_begin;
+    }
+    *boot_oat_end = std::max(*boot_oat_end, oat_begin + oat_size);
+  }
+}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index cc48172..be8ed40 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -36,6 +36,7 @@
 #include "globals.h"
 #include "object_callbacks.h"
 #include "offsets.h"
+#include "process_state.h"
 #include "safe_map.h"
 #include "verify_object.h"
 
@@ -89,7 +90,6 @@
   class RegionSpace;
   class RosAllocSpace;
   class Space;
-  class SpaceTest;
   class ZygoteSpace;
 }  // namespace space
 
@@ -117,14 +117,6 @@
 // If true, use thread-local allocation stack.
 static constexpr bool kUseThreadLocalAllocationStack = true;
 
-// The process state passed in from the activity manager, used to determine when to do trimming
-// and compaction.
-enum ProcessState {
-  kProcessStateJankPerceptible = 0,
-  kProcessStateJankImperceptible = 1,
-};
-std::ostream& operator<<(std::ostream& os, const ProcessState& process_state);
-
 class Heap {
  public:
   // If true, measure the total allocation time.
@@ -136,11 +128,12 @@
   static constexpr size_t kDefaultMinFree = kDefaultMaxFree / 4;
   static constexpr size_t kDefaultLongPauseLogThreshold = MsToNs(5);
   static constexpr size_t kDefaultLongGCLogThreshold = MsToNs(100);
-  static constexpr size_t kDefaultTLABSize = 256 * KB;
+  static constexpr size_t kDefaultTLABSize = 32 * KB;
   static constexpr double kDefaultTargetUtilization = 0.5;
   static constexpr double kDefaultHeapGrowthMultiplier = 2.0;
   // Primitive arrays larger than this size are put in the large object space.
-  static constexpr size_t kDefaultLargeObjectThreshold = 3 * kPageSize;
+  static constexpr size_t kMinLargeObjectThreshold = 3 * kPageSize;
+  static constexpr size_t kDefaultLargeObjectThreshold = kMinLargeObjectThreshold;
   // Whether or not parallel GC is enabled. If not, then we never create the thread pool.
   static constexpr bool kDefaultEnableParallelGC = false;
 
@@ -190,6 +183,7 @@
        bool verify_pre_sweeping_rosalloc,
        bool verify_post_gc_rosalloc,
        bool gc_stress_mode,
+       bool measure_gc_performance,
        bool use_homogeneous_space_compaction,
        uint64_t min_interval_homogeneous_space_compaction_by_oom);
 
@@ -249,9 +243,9 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void RegisterNativeAllocation(JNIEnv* env, size_t bytes)
-      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_);
+      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !native_histogram_lock_);
   void RegisterNativeFree(JNIEnv* env, size_t bytes)
-      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_);
+      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !native_histogram_lock_);
 
   // Change the allocator, updates entrypoints.
   void ChangeAllocator(AllocatorType allocator)
@@ -309,7 +303,10 @@
   void ThreadFlipEnd(Thread* self) REQUIRES(!*thread_flip_lock_);
 
   // Clear all of the mark bits, doesn't clear bitmaps which have the same live bits as mark bits.
-  void ClearMarkedObjects() REQUIRES(Locks::heap_bitmap_lock_);
+  // Mutator lock is required for GetContinuousSpaces.
+  void ClearMarkedObjects()
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Initiates an explicit garbage collection.
   void CollectGarbage(bool clear_soft_references)
@@ -360,8 +357,12 @@
   // due to usage by tests.
   void SetSpaceAsDefault(space::ContinuousSpace* continuous_space)
       REQUIRES(!Locks::heap_bitmap_lock_);
-  void AddSpace(space::Space* space) REQUIRES(!Locks::heap_bitmap_lock_);
-  void RemoveSpace(space::Space* space) REQUIRES(!Locks::heap_bitmap_lock_);
+  void AddSpace(space::Space* space)
+      REQUIRES(!Locks::heap_bitmap_lock_)
+      REQUIRES(Locks::mutator_lock_);
+  void RemoveSpace(space::Space* space)
+    REQUIRES(!Locks::heap_bitmap_lock_)
+    REQUIRES(Locks::mutator_lock_);
 
   // Set target ideal heap utilization ratio, implements
   // dalvik.system.VMRuntime.setTargetHeapUtilization.
@@ -376,10 +377,16 @@
   collector::GcType WaitForGcToComplete(GcCause cause, Thread* self) REQUIRES(!*gc_complete_lock_);
 
   // Update the heap's process state to a new value, may cause compaction to occur.
-  void UpdateProcessState(ProcessState process_state)
+  void UpdateProcessState(ProcessState old_process_state, ProcessState new_process_state)
       REQUIRES(!*pending_task_lock_, !*gc_complete_lock_);
 
-  const std::vector<space::ContinuousSpace*>& GetContinuousSpaces() const {
+  bool HaveContinuousSpaces() const NO_THREAD_SAFETY_ANALYSIS {
+    // No lock since vector empty is thread safe.
+    return !continuous_spaces_.empty();
+  }
+
+  const std::vector<space::ContinuousSpace*>& GetContinuousSpaces() const
+      SHARED_REQUIRES(Locks::mutator_lock_) {
     return continuous_spaces_;
   }
 
@@ -519,12 +526,15 @@
   // get the space that corresponds to an object's address. Current implementation searches all
   // spaces in turn. If fail_ok is false then failing to find a space will cause an abort.
   // TODO: consider using faster data structure like binary tree.
-  space::ContinuousSpace* FindContinuousSpaceFromObject(const mirror::Object*, bool fail_ok) const;
+  space::ContinuousSpace* FindContinuousSpaceFromObject(const mirror::Object*, bool fail_ok) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
   space::DiscontinuousSpace* FindDiscontinuousSpaceFromObject(const mirror::Object*,
-                                                              bool fail_ok) const;
-  space::Space* FindSpaceFromObject(const mirror::Object*, bool fail_ok) const;
+                                                              bool fail_ok) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  space::Space* FindSpaceFromObject(const mirror::Object*, bool fail_ok) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void DumpForSigQuit(std::ostream& os) REQUIRES(!*gc_complete_lock_);
+  void DumpForSigQuit(std::ostream& os) REQUIRES(!*gc_complete_lock_, !native_histogram_lock_);
 
   // Do a pending collector transition.
   void DoPendingCollectorTransition() REQUIRES(!*gc_complete_lock_);
@@ -578,11 +588,25 @@
       REQUIRES(Locks::heap_bitmap_lock_);
 
   // Unbind any bound bitmaps.
-  void UnBindBitmaps() REQUIRES(Locks::heap_bitmap_lock_);
+  void UnBindBitmaps()
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // DEPRECATED: Should remove in "near" future when support for multiple image spaces is added.
-  // Assumes there is only one image space.
-  space::ImageSpace* GetImageSpace() const;
+  // Returns the boot image spaces. There may be multiple boot image spaces.
+  const std::vector<space::ImageSpace*>& GetBootImageSpaces() const {
+    return boot_image_spaces_;
+  }
+
+  bool ObjectIsInBootImageSpace(mirror::Object* obj) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool IsInBootImageOatFile(const void* p) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void GetBootImagesSize(uint32_t* boot_image_begin,
+                         uint32_t* boot_image_end,
+                         uint32_t* boot_oat_begin,
+                         uint32_t* boot_oat_end);
 
   // Permenantly disable moving garbage collection.
   void DisableMovingGc() REQUIRES(!*gc_complete_lock_);
@@ -596,7 +620,8 @@
   }
 
   // Return the corresponding rosalloc space.
-  space::RosAllocSpace* GetRosAllocSpace(gc::allocator::RosAlloc* rosalloc) const;
+  space::RosAllocSpace* GetRosAllocSpace(gc::allocator::RosAlloc* rosalloc) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   space::MallocSpace* GetNonMovingSpace() const {
     return non_moving_space_;
@@ -621,8 +646,8 @@
     }
   }
 
-  std::string DumpSpaces() const WARN_UNUSED;
-  void DumpSpaces(std::ostream& stream) const;
+  void DumpSpaces(std::ostream& stream) const SHARED_REQUIRES(Locks::mutator_lock_);
+  std::string DumpSpaces() const SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Dump object should only be used by the signal handler.
   void DumpObject(std::ostream& stream, mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
@@ -631,14 +656,10 @@
   std::string SafePrettyTypeOf(mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
 
   // GC performance measuring
-  void DumpGcPerformanceInfo(std::ostream& os) REQUIRES(!*gc_complete_lock_);
+  void DumpGcPerformanceInfo(std::ostream& os)
+      REQUIRES(!*gc_complete_lock_, !native_histogram_lock_);
   void ResetGcPerformanceInfo() REQUIRES(!*gc_complete_lock_);
 
-  // Returns true if we currently care about pause times.
-  bool CareAboutPauseTimes() const {
-    return process_state_ == kProcessStateJankPerceptible;
-  }
-
   // Thread pool.
   void CreateThreadPool();
   void DeleteThreadPool();
@@ -660,7 +681,9 @@
   void RemoveRememberedSet(space::Space* space);
 
   bool IsCompilingBoot() const;
-  bool HasImageSpace() const;
+  bool HasBootImageSpace() const {
+    return !boot_image_spaces_.empty();
+  }
 
   ReferenceProcessor* GetReferenceProcessor() {
     return reference_processor_.get();
@@ -833,6 +856,7 @@
   // an initial allocation attempt failed.
   mirror::Object* AllocateInternalWithGc(Thread* self,
                                          AllocatorType allocator,
+                                         bool instrumented,
                                          size_t num_bytes,
                                          size_t* bytes_allocated,
                                          size_t* usable_size,
@@ -952,7 +976,8 @@
   void ProcessCards(TimingLogger* timings,
                     bool use_rem_sets,
                     bool process_alloc_space_cards,
-                    bool clear_alloc_space_cards);
+                    bool clear_alloc_space_cards)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Push an object onto the allocation stack.
   void PushOnAllocationStack(Thread* self, mirror::Object** obj)
@@ -995,10 +1020,10 @@
       REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !*backtrace_lock_);
 
   // All-known continuous spaces, where objects lie within fixed bounds.
-  std::vector<space::ContinuousSpace*> continuous_spaces_;
+  std::vector<space::ContinuousSpace*> continuous_spaces_ GUARDED_BY(Locks::mutator_lock_);
 
   // All-known discontinuous spaces, where objects may be placed throughout virtual memory.
-  std::vector<space::DiscontinuousSpace*> discontinuous_spaces_;
+  std::vector<space::DiscontinuousSpace*> discontinuous_spaces_ GUARDED_BY(Locks::mutator_lock_);
 
   // All-known alloc spaces, where objects may be or have been allocated.
   std::vector<space::AllocSpace*> alloc_spaces_;
@@ -1083,6 +1108,8 @@
   // Used to synchronize between JNI critical calls and the thread flip of the CC collector.
   Mutex* thread_flip_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::unique_ptr<ConditionVariable> thread_flip_cond_ GUARDED_BY(thread_flip_lock_);
+  // This counter keeps track of how many threads are currently in a JNI critical section. This is
+  // incremented once per thread even with nested enters.
   size_t disable_thread_flip_count_ GUARDED_BY(thread_flip_lock_);
   bool thread_flip_running_ GUARDED_BY(thread_flip_lock_);
 
@@ -1116,9 +1143,6 @@
   // Whether or not we need to run finalizers in the next native allocation.
   bool native_need_to_run_finalization_;
 
-  // Whether or not we currently care about pause times.
-  ProcessState process_state_;
-
   // When num_bytes_allocated_ exceeds this amount then a concurrent GC should be requested so that
   // it completes ahead of an allocation failing.
   size_t concurrent_start_bytes_;
@@ -1135,6 +1159,11 @@
   // Bytes which are allocated and managed by native code but still need to be accounted for.
   Atomic<size_t> native_bytes_allocated_;
 
+  // Native allocation stats.
+  Mutex native_histogram_lock_;
+  Histogram<uint64_t> native_allocation_histogram_;
+  Histogram<uint64_t> native_free_histogram_;
+
   // Number of bytes freed by thread local buffer revokes. This will
   // cancel out the ahead-of-time bulk counting of bytes allocated in
   // rosalloc thread-local buffers.  It is temporarily accumulated
@@ -1305,8 +1334,7 @@
 
   // Allocation tracking support
   Atomic<bool> alloc_tracking_enabled_;
-  std::unique_ptr<AllocRecordObjectMap> allocation_records_
-      GUARDED_BY(Locks::alloc_tracker_lock_);
+  std::unique_ptr<AllocRecordObjectMap> allocation_records_;
 
   // GC stress related data structures.
   Mutex* backtrace_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -1320,6 +1348,9 @@
   // allocating.
   bool gc_disabled_for_shutdown_ GUARDED_BY(gc_complete_lock_);
 
+  // Boot image spaces.
+  std::vector<space::ImageSpace*> boot_image_spaces_;
+
   friend class CollectorTransitionTask;
   friend class collector::GarbageCollector;
   friend class collector::MarkCompact;
@@ -1331,7 +1362,6 @@
   friend class VerifyReferenceCardVisitor;
   friend class VerifyReferenceVisitor;
   friend class VerifyObjectVisitor;
-  friend class space::SpaceTest;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(Heap);
 };
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 39ba743..e172f85 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -86,14 +86,14 @@
     // it to the mutator as long as the GC is not preserving references.
     if (LIKELY(collector_ != nullptr)) {
       // If it's null it means not marked, but it could become marked if the referent is reachable
-      // by finalizer referents. So we can not return in this case and must block. Otherwise, we
+      // by finalizer referents. So we cannot return in this case and must block. Otherwise, we
       // can return it to the mutator as long as the GC is not preserving references, in which
       // case only black nodes can be safely returned. If the GC is preserving references, the
       // mutator could take a white field from a grey or white node and move it somewhere else
       // in the heap causing corruption since this field would get swept.
       if (collector_->IsMarkedHeapReference(referent_addr)) {
         if (!preserving_references_ ||
-           (LIKELY(!reference->IsFinalizerReferenceInstance()) && !reference->IsEnqueued())) {
+           (LIKELY(!reference->IsFinalizerReferenceInstance()) && reference->IsUnprocessed())) {
           return referent_addr->AsMirrorPtr();
         }
       }
@@ -275,13 +275,9 @@
   // GC queues, but since we hold the lock finalizer_reference_queue_ lock it also prevents this
   // race.
   MutexLock mu2(self, *Locks::reference_queue_finalizer_references_lock_);
-  if (!reference->IsEnqueued()) {
+  if (reference->IsUnprocessed()) {
     CHECK(reference->IsFinalizerReferenceInstance());
-    if (Runtime::Current()->IsActiveTransaction()) {
-      reference->SetPendingNext<true>(reference);
-    } else {
-      reference->SetPendingNext<false>(reference);
-    }
+    reference->SetPendingNext(reference);
     return true;
   }
   return false;
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 56957ba..62625c4 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -32,88 +32,57 @@
 void ReferenceQueue::AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Reference* ref) {
   DCHECK(ref != nullptr);
   MutexLock mu(self, *lock_);
-  if (!ref->IsEnqueued()) {
-    EnqueuePendingReference(ref);
+  if (ref->IsUnprocessed()) {
+    EnqueueReference(ref);
   }
 }
 
 void ReferenceQueue::EnqueueReference(mirror::Reference* ref) {
-  CHECK(ref->IsEnqueuable());
-  EnqueuePendingReference(ref);
-}
-
-void ReferenceQueue::EnqueuePendingReference(mirror::Reference* ref) {
   DCHECK(ref != nullptr);
+  CHECK(ref->IsUnprocessed());
   if (IsEmpty()) {
     // 1 element cyclic queue, ie: Reference ref = ..; ref.pendingNext = ref;
     list_ = ref;
   } else {
-    mirror::Reference* head = list_->GetPendingNext();
-    if (Runtime::Current()->IsActiveTransaction()) {
-      ref->SetPendingNext<true>(head);
-    } else {
-      ref->SetPendingNext<false>(head);
-    }
+    // The list is owned by the GC, everything that has been inserted must already be at least
+    // gray.
+    mirror::Reference* head = list_->GetPendingNext<kWithoutReadBarrier>();
+    DCHECK(head != nullptr);
+    ref->SetPendingNext(head);
   }
-  if (Runtime::Current()->IsActiveTransaction()) {
-    list_->SetPendingNext<true>(ref);
-  } else {
-    list_->SetPendingNext<false>(ref);
-  }
+  // Add the reference in the middle to preserve the cycle.
+  list_->SetPendingNext(ref);
 }
 
 mirror::Reference* ReferenceQueue::DequeuePendingReference() {
   DCHECK(!IsEmpty());
-  mirror::Reference* head = list_->GetPendingNext();
-  DCHECK(head != nullptr);
-  mirror::Reference* ref;
+  mirror::Reference* ref = list_->GetPendingNext<kWithoutReadBarrier>();
+  DCHECK(ref != nullptr);
   // Note: the following code is thread-safe because it is only called from ProcessReferences which
   // is single threaded.
-  if (list_ == head) {
-    ref = list_;
+  if (list_ == ref) {
     list_ = nullptr;
   } else {
-    mirror::Reference* next = head->GetPendingNext();
-    if (Runtime::Current()->IsActiveTransaction()) {
-      list_->SetPendingNext<true>(next);
-    } else {
-      list_->SetPendingNext<false>(next);
-    }
-    ref = head;
+    mirror::Reference* next = ref->GetPendingNext<kWithoutReadBarrier>();
+    list_->SetPendingNext(next);
   }
-  if (Runtime::Current()->IsActiveTransaction()) {
-    ref->SetPendingNext<true>(nullptr);
-  } else {
-    ref->SetPendingNext<false>(nullptr);
-  }
+  ref->SetPendingNext(nullptr);
   Heap* heap = Runtime::Current()->GetHeap();
   if (kUseBakerOrBrooksReadBarrier && heap->CurrentCollectorType() == kCollectorTypeCC &&
       heap->ConcurrentCopyingCollector()->IsActive()) {
-    // Change the gray ptr we left in ConcurrentCopying::ProcessMarkStackRef() to black or white.
+    // Change the gray ptr we left in ConcurrentCopying::ProcessMarkStackRef() to white.
     // We check IsActive() above because we don't want to do this when the zygote compaction
     // collector (SemiSpace) is running.
     CHECK(ref != nullptr);
     collector::ConcurrentCopying* concurrent_copying = heap->ConcurrentCopyingCollector();
-    const bool is_moving = concurrent_copying->RegionSpace()->IsInToSpace(ref);
-    if (ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
-      if (is_moving) {
-        ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::WhitePtr());
-        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
-      } else {
-        ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::BlackPtr());
-        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr());
-      }
+    mirror::Object* rb_ptr = ref->GetReadBarrierPointer();
+    if (rb_ptr == ReadBarrier::GrayPtr()) {
+      ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::WhitePtr());
+      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
     } else {
-      // In ConcurrentCopying::ProcessMarkStackRef() we may leave a black or white Reference in the
-      // queue and find it here, which is OK. Check that the color makes sense depending on whether
-      // the Reference is moving or not and that the referent has been marked.
-      if (is_moving) {
-        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
-            << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer();
-      } else {
-        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr())
-            << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer();
-      }
+      // In ConcurrentCopying::ProcessMarkStackRef() we may leave a white reference in the queue and
+      // find it here, which is OK.
+      CHECK_EQ(rb_ptr, ReadBarrier::WhitePtr()) << "ref=" << ref << " rb_ptr=" << rb_ptr;
       mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
       // The referent could be null if it's cleared by a mutator (Reference.clear()).
       if (referent != nullptr) {
@@ -168,9 +137,7 @@
       } else {
         ref->ClearReferent<false>();
       }
-      if (ref->IsEnqueuable()) {
-        cleared_references->EnqueuePendingReference(ref);
-      }
+      cleared_references->EnqueueReference(ref);
     }
   }
 }
@@ -183,8 +150,6 @@
     if (referent_addr->AsMirrorPtr() != nullptr &&
         !collector->IsMarkedHeapReference(referent_addr)) {
       mirror::Object* forward_address = collector->MarkObject(referent_addr->AsMirrorPtr());
-      // If the referent is non-null the reference must queuable.
-      DCHECK(ref->IsEnqueuable());
       // Move the updated referent to the zombie field.
       if (Runtime::Current()->IsActiveTransaction()) {
         ref->SetZombie<true>(forward_address);
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index aabac97..04d3454 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -44,27 +44,24 @@
 class Heap;
 
 // Used to temporarily store java.lang.ref.Reference(s) during GC and prior to queueing on the
-// appropriate java.lang.ref.ReferenceQueue. The linked list is maintained in the
-// java.lang.ref.Reference objects.
+// appropriate java.lang.ref.ReferenceQueue. The linked list is maintained as an unordered,
+// circular, and singly-linked list using the pendingNext fields of the java.lang.ref.Reference
+// objects.
 class ReferenceQueue {
  public:
   explicit ReferenceQueue(Mutex* lock);
 
-  // Enqueue a reference if is not already enqueued. Thread safe to call from multiple threads
-  // since it uses a lock to avoid a race between checking for the references presence and adding
-  // it.
+  // Enqueue a reference if it is unprocessed. Thread safe to call from multiple
+  // threads since it uses a lock to avoid a race between checking for the references presence and
+  // adding it.
   void AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Reference* ref)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*lock_);
 
-  // Enqueue a reference, unlike EnqueuePendingReference, enqueue reference checks that the
-  // reference IsEnqueueable. Not thread safe, used when mutators are paused to minimize lock
-  // overhead.
+  // Enqueue a reference. The reference must be unprocessed.
+  // Not thread safe, used when mutators are paused to minimize lock overhead.
   void EnqueueReference(mirror::Reference* ref) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Enqueue a reference without checking that it is enqueable.
-  void EnqueuePendingReference(mirror::Reference* ref) SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Dequeue the first reference (returns list_).
+  // Dequeue a reference from the queue and return that dequeued reference.
   mirror::Reference* DequeuePendingReference() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Enqueues finalizer references with white referents.  White referents are blackened, moved to
diff --git a/runtime/gc/reference_queue_test.cc b/runtime/gc/reference_queue_test.cc
index ab921d9..35bf718 100644
--- a/runtime/gc/reference_queue_test.cc
+++ b/runtime/gc/reference_queue_test.cc
@@ -35,25 +35,28 @@
   ASSERT_EQ(queue.GetLength(), 0U);
   auto ref_class = hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindClass(self, "Ljava/lang/ref/WeakReference;",
-                                                      NullHandle<mirror::ClassLoader>()));
+                                                      ScopedNullHandle<mirror::ClassLoader>()));
   ASSERT_TRUE(ref_class.Get() != nullptr);
   auto ref1(hs.NewHandle(ref_class->AllocObject(self)->AsReference()));
   ASSERT_TRUE(ref1.Get() != nullptr);
   auto ref2(hs.NewHandle(ref_class->AllocObject(self)->AsReference()));
   ASSERT_TRUE(ref2.Get() != nullptr);
-  // FIFO ordering.
-  queue.EnqueuePendingReference(ref1.Get());
+  queue.EnqueueReference(ref1.Get());
   ASSERT_TRUE(!queue.IsEmpty());
   ASSERT_EQ(queue.GetLength(), 1U);
-  queue.EnqueuePendingReference(ref2.Get());
+  queue.EnqueueReference(ref2.Get());
   ASSERT_TRUE(!queue.IsEmpty());
   ASSERT_EQ(queue.GetLength(), 2U);
-  ASSERT_EQ(queue.DequeuePendingReference(), ref2.Get());
+
+  std::set<mirror::Reference*> refs = {ref1.Get(), ref2.Get()};
+  std::set<mirror::Reference*> dequeued;
+  dequeued.insert(queue.DequeuePendingReference());
   ASSERT_TRUE(!queue.IsEmpty());
   ASSERT_EQ(queue.GetLength(), 1U);
-  ASSERT_EQ(queue.DequeuePendingReference(), ref1.Get());
+  dequeued.insert(queue.DequeuePendingReference());
   ASSERT_EQ(queue.GetLength(), 0U);
   ASSERT_TRUE(queue.IsEmpty());
+  ASSERT_EQ(refs, dequeued);
 }
 
 TEST_F(ReferenceQueueTest, Dump) {
@@ -65,19 +68,19 @@
   queue.Dump(LOG(INFO));
   auto weak_ref_class = hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindClass(self, "Ljava/lang/ref/WeakReference;",
-                                                      NullHandle<mirror::ClassLoader>()));
+                                                      ScopedNullHandle<mirror::ClassLoader>()));
   ASSERT_TRUE(weak_ref_class.Get() != nullptr);
   auto finalizer_ref_class = hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindClass(self, "Ljava/lang/ref/FinalizerReference;",
-                                                      NullHandle<mirror::ClassLoader>()));
+                                                      ScopedNullHandle<mirror::ClassLoader>()));
   ASSERT_TRUE(finalizer_ref_class.Get() != nullptr);
   auto ref1(hs.NewHandle(weak_ref_class->AllocObject(self)->AsReference()));
   ASSERT_TRUE(ref1.Get() != nullptr);
   auto ref2(hs.NewHandle(finalizer_ref_class->AllocObject(self)->AsReference()));
   ASSERT_TRUE(ref2.Get() != nullptr);
-  queue.EnqueuePendingReference(ref1.Get());
+  queue.EnqueueReference(ref1.Get());
   queue.Dump(LOG(INFO));
-  queue.EnqueuePendingReference(ref2.Get());
+  queue.EnqueueReference(ref2.Get());
   queue.Dump(LOG(INFO));
 }
 
diff --git a/runtime/gc/scoped_gc_critical_section.cc b/runtime/gc/scoped_gc_critical_section.cc
index e7786a1..b5eb979 100644
--- a/runtime/gc/scoped_gc_critical_section.cc
+++ b/runtime/gc/scoped_gc_critical_section.cc
@@ -38,4 +38,3 @@
 
 }  // namespace gc
 }  // namespace art
-
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 77f606d..455d28e 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -20,6 +20,8 @@
 #include "gc/accounting/card_table.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "memory_tool_malloc_space-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -317,11 +319,18 @@
 namespace allocator {
 
 // Implement the dlmalloc morecore callback.
-void* ArtDlMallocMoreCore(void* mspace, intptr_t increment) {
-  Heap* heap = Runtime::Current()->GetHeap();
+void* ArtDlMallocMoreCore(void* mspace, intptr_t increment) SHARED_REQUIRES(Locks::mutator_lock_) {
+  Runtime* runtime = Runtime::Current();
+  Heap* heap = runtime->GetHeap();
   ::art::gc::space::DlMallocSpace* dlmalloc_space = heap->GetDlMallocSpace();
   // Support for multiple DlMalloc provided by a slow path.
   if (UNLIKELY(dlmalloc_space == nullptr || dlmalloc_space->GetMspace() != mspace)) {
+    if (LIKELY(runtime->GetJit() != nullptr)) {
+      jit::JitCodeCache* code_cache = runtime->GetJit()->GetCodeCache();
+      if (code_cache->OwnsSpace(mspace)) {
+        return code_cache->MoreCore(mspace, increment);
+      }
+    }
     dlmalloc_space = nullptr;
     for (space::ContinuousSpace* space : heap->GetContinuousSpaces()) {
       if (space->IsDlMallocSpace()) {
diff --git a/runtime/gc/space/dlmalloc_space_base_test.cc b/runtime/gc/space/dlmalloc_space_base_test.cc
deleted file mode 100644
index 93fe155..0000000
--- a/runtime/gc/space/dlmalloc_space_base_test.cc
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "space_test.h"
-
-#include "dlmalloc_space.h"
-#include "scoped_thread_state_change.h"
-
-namespace art {
-namespace gc {
-namespace space {
-
-MallocSpace* CreateDlMallocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
-                                 size_t capacity, uint8_t* requested_begin) {
-  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin, false);
-}
-
-TEST_SPACE_CREATE_FN_BASE(DlMallocSpace, CreateDlMallocSpace)
-
-
-}  // namespace space
-}  // namespace gc
-}  // namespace art
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index ce64b10..c87312b 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -16,20 +16,22 @@
 
 #include "image_space.h"
 
-#include <dirent.h>
+#include <lz4.h>
+#include <random>
 #include <sys/statvfs.h>
 #include <sys/types.h>
 #include <unistd.h>
 
-#include <random>
-
 #include "art_method.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "base/stl_util.h"
 #include "base/scoped_flock.h"
+#include "base/systrace.h"
 #include "base/time_utils.h"
-#include "base/unix_file/fd_file.h"
 #include "gc/accounting/space_bitmap-inl.h"
+#include "image-inl.h"
+#include "image_space_fs.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "oat_file.h"
@@ -43,11 +45,18 @@
 
 Atomic<uint32_t> ImageSpace::bitmap_index_(0);
 
-ImageSpace::ImageSpace(const std::string& image_filename, const char* image_location,
-                       MemMap* mem_map, accounting::ContinuousSpaceBitmap* live_bitmap,
+ImageSpace::ImageSpace(const std::string& image_filename,
+                       const char* image_location,
+                       MemMap* mem_map,
+                       accounting::ContinuousSpaceBitmap* live_bitmap,
                        uint8_t* end)
-    : MemMapSpace(image_filename, mem_map, mem_map->Begin(), end, end,
+    : MemMapSpace(image_filename,
+                  mem_map,
+                  mem_map->Begin(),
+                  end,
+                  end,
                   kGcRetentionPolicyNeverCollect),
+      oat_file_non_owned_(nullptr),
       image_location_(image_location) {
   DCHECK(live_bitmap != nullptr);
   live_bitmap_.reset(live_bitmap);
@@ -58,10 +67,7 @@
   CHECK_ALIGNED(max_delta, kPageSize);
   CHECK_LT(min_delta, max_delta);
 
-  std::default_random_engine generator;
-  generator.seed(NanoTime() * getpid());
-  std::uniform_int_distribution<int32_t> distribution(min_delta, max_delta);
-  int32_t r = distribution(generator);
+  int32_t r = GetRandomNumber<int32_t>(min_delta, max_delta);
   if (r % 2 == 0) {
     r = RoundUp(r, kPageSize);
   } else {
@@ -73,106 +79,12 @@
   return r;
 }
 
-// We are relocating or generating the core image. We should get rid of everything. It is all
-// out-of-date. We also don't really care if this fails since it is just a convenience.
-// Adapted from prune_dex_cache(const char* subdir) in frameworks/native/cmds/installd/commands.c
-// Note this should only be used during first boot.
-static void RealPruneDalvikCache(const std::string& cache_dir_path);
-
-static void PruneDalvikCache(InstructionSet isa) {
-  CHECK_NE(isa, kNone);
-  // Prune the base /data/dalvik-cache.
-  RealPruneDalvikCache(GetDalvikCacheOrDie(".", false));
-  // Prune /data/dalvik-cache/<isa>.
-  RealPruneDalvikCache(GetDalvikCacheOrDie(GetInstructionSetString(isa), false));
+static int32_t ChooseRelocationOffsetDelta() {
+  return ChooseRelocationOffsetDelta(ART_BASE_ADDRESS_MIN_DELTA, ART_BASE_ADDRESS_MAX_DELTA);
 }
 
-static void RealPruneDalvikCache(const std::string& cache_dir_path) {
-  if (!OS::DirectoryExists(cache_dir_path.c_str())) {
-    return;
-  }
-  DIR* cache_dir = opendir(cache_dir_path.c_str());
-  if (cache_dir == nullptr) {
-    PLOG(WARNING) << "Unable to open " << cache_dir_path << " to delete it's contents";
-    return;
-  }
-
-  for (struct dirent* de = readdir(cache_dir); de != nullptr; de = readdir(cache_dir)) {
-    const char* name = de->d_name;
-    if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) {
-      continue;
-    }
-    // We only want to delete regular files and symbolic links.
-    if (de->d_type != DT_REG && de->d_type != DT_LNK) {
-      if (de->d_type != DT_DIR) {
-        // We do expect some directories (namely the <isa> for pruning the base dalvik-cache).
-        LOG(WARNING) << "Unexpected file type of " << std::hex << de->d_type << " encountered.";
-      }
-      continue;
-    }
-    std::string cache_file(cache_dir_path);
-    cache_file += '/';
-    cache_file += name;
-    if (TEMP_FAILURE_RETRY(unlink(cache_file.c_str())) != 0) {
-      PLOG(ERROR) << "Unable to unlink " << cache_file;
-      continue;
-    }
-  }
-  CHECK_EQ(0, TEMP_FAILURE_RETRY(closedir(cache_dir))) << "Unable to close directory.";
-}
-
-// We write out an empty file to the zygote's ISA specific cache dir at the start of
-// every zygote boot and delete it when the boot completes. If we find a file already
-// present, it usually means the boot didn't complete. We wipe the entire dalvik
-// cache if that's the case.
-static void MarkZygoteStart(const InstructionSet isa, const uint32_t max_failed_boots) {
-  const std::string isa_subdir = GetDalvikCacheOrDie(GetInstructionSetString(isa), false);
-  const std::string boot_marker = isa_subdir + "/.booting";
-  const char* file_name = boot_marker.c_str();
-
-  uint32_t num_failed_boots = 0;
-  std::unique_ptr<File> file(OS::OpenFileReadWrite(file_name));
-  if (file.get() == nullptr) {
-    file.reset(OS::CreateEmptyFile(file_name));
-
-    if (file.get() == nullptr) {
-      PLOG(WARNING) << "Failed to create boot marker.";
-      return;
-    }
-  } else {
-    if (!file->ReadFully(&num_failed_boots, sizeof(num_failed_boots))) {
-      PLOG(WARNING) << "Failed to read boot marker.";
-      file->Erase();
-      return;
-    }
-  }
-
-  if (max_failed_boots != 0 && num_failed_boots > max_failed_boots) {
-    LOG(WARNING) << "Incomplete boot detected. Pruning dalvik cache";
-    RealPruneDalvikCache(isa_subdir);
-  }
-
-  ++num_failed_boots;
-  VLOG(startup) << "Number of failed boots on : " << boot_marker << " = " << num_failed_boots;
-
-  if (lseek(file->Fd(), 0, SEEK_SET) == -1) {
-    PLOG(WARNING) << "Failed to write boot marker.";
-    file->Erase();
-    return;
-  }
-
-  if (!file->WriteFully(&num_failed_boots, sizeof(num_failed_boots))) {
-    PLOG(WARNING) << "Failed to write boot marker.";
-    file->Erase();
-    return;
-  }
-
-  if (file->FlushCloseOrErase() != 0) {
-    PLOG(WARNING) << "Failed to flush boot marker.";
-  }
-}
-
-static bool GenerateImage(const std::string& image_filename, InstructionSet image_isa,
+static bool GenerateImage(const std::string& image_filename,
+                          InstructionSet image_isa,
                           std::string* error_msg) {
   const std::string boot_class_path_string(Runtime::Current()->GetBootClassPathString());
   std::vector<std::string> boot_class_path;
@@ -211,8 +123,7 @@
   CHECK_EQ(image_isa, kRuntimeISA)
       << "We should always be generating an image for the current isa.";
 
-  int32_t base_offset = ChooseRelocationOffsetDelta(ART_BASE_ADDRESS_MIN_DELTA,
-                                                    ART_BASE_ADDRESS_MAX_DELTA);
+  int32_t base_offset = ChooseRelocationOffsetDelta();
   LOG(INFO) << "Using an offset of 0x" << std::hex << base_offset << " from default "
             << "art base address of 0x" << std::hex << ART_BASE_ADDRESS;
   arg_vector.push_back(StringPrintf("--base=0x%x", ART_BASE_ADDRESS + base_offset));
@@ -231,14 +142,17 @@
   return Exec(arg_vector, error_msg);
 }
 
-bool ImageSpace::FindImageFilename(const char* image_location,
-                                   const InstructionSet image_isa,
-                                   std::string* system_filename,
-                                   bool* has_system,
-                                   std::string* cache_filename,
-                                   bool* dalvik_cache_exists,
-                                   bool* has_cache,
-                                   bool* is_global_cache) {
+static bool FindImageFilenameImpl(const char* image_location,
+                                  const InstructionSet image_isa,
+                                  bool* has_system,
+                                  std::string* system_filename,
+                                  bool* dalvik_cache_exists,
+                                  std::string* dalvik_cache,
+                                  bool* is_global_cache,
+                                  bool* has_cache,
+                                  std::string* cache_filename) {
+  DCHECK(dalvik_cache != nullptr);
+
   *has_system = false;
   *has_cache = false;
   // image_location = /system/framework/boot.art
@@ -251,9 +165,12 @@
 
   bool have_android_data = false;
   *dalvik_cache_exists = false;
-  std::string dalvik_cache;
-  GetDalvikCache(GetInstructionSetString(image_isa), true, &dalvik_cache,
-                 &have_android_data, dalvik_cache_exists, is_global_cache);
+  GetDalvikCache(GetInstructionSetString(image_isa),
+                 true,
+                 dalvik_cache,
+                 &have_android_data,
+                 dalvik_cache_exists,
+                 is_global_cache);
 
   if (have_android_data && *dalvik_cache_exists) {
     // Always set output location even if it does not exist,
@@ -262,7 +179,10 @@
     // image_location = /system/framework/boot.art
     // *image_filename = /data/dalvik-cache/<image_isa>/boot.art
     std::string error_msg;
-    if (!GetDalvikCacheFilename(image_location, dalvik_cache.c_str(), cache_filename, &error_msg)) {
+    if (!GetDalvikCacheFilename(image_location,
+                                dalvik_cache->c_str(),
+                                cache_filename,
+                                &error_msg)) {
       LOG(WARNING) << error_msg;
       return *has_system;
     }
@@ -271,6 +191,26 @@
   return *has_system || *has_cache;
 }
 
+bool ImageSpace::FindImageFilename(const char* image_location,
+                                   const InstructionSet image_isa,
+                                   std::string* system_filename,
+                                   bool* has_system,
+                                   std::string* cache_filename,
+                                   bool* dalvik_cache_exists,
+                                   bool* has_cache,
+                                   bool* is_global_cache) {
+  std::string dalvik_cache_unused;
+  return FindImageFilenameImpl(image_location,
+                               image_isa,
+                               has_system,
+                               system_filename,
+                               dalvik_cache_exists,
+                               &dalvik_cache_unused,
+                               is_global_cache,
+                               has_cache,
+                               cache_filename);
+}
+
 static bool ReadSpecificImageHeader(const char* filename, ImageHeader* image_header) {
     std::unique_ptr<File> image_file(OS::OpenFileForReading(filename));
     if (image_file.get() == nullptr) {
@@ -284,8 +224,10 @@
 }
 
 // Relocate the image at image_location to dest_filename and relocate it by a random amount.
-static bool RelocateImage(const char* image_location, const char* dest_filename,
-                               InstructionSet isa, std::string* error_msg) {
+static bool RelocateImage(const char* image_location,
+                          const char* dest_filename,
+                          InstructionSet isa,
+                          std::string* error_msg) {
   // We should clean up so we are more likely to have room for the image.
   if (Runtime::Current()->IsZygote()) {
     LOG(INFO) << "Pruning dalvik-cache since we are relocating an image and will need to recompile";
@@ -300,18 +242,11 @@
   std::string output_image_filename_arg("--output-image-file=");
   output_image_filename_arg += dest_filename;
 
-  std::string input_oat_location_arg("--input-oat-location=");
-  input_oat_location_arg += ImageHeader::GetOatLocationFromImageLocation(image_location);
-
-  std::string output_oat_filename_arg("--output-oat-file=");
-  output_oat_filename_arg += ImageHeader::GetOatLocationFromImageLocation(dest_filename);
-
   std::string instruction_set_arg("--instruction-set=");
   instruction_set_arg += GetInstructionSetString(isa);
 
   std::string base_offset_arg("--base-offset-delta=");
-  StringAppendF(&base_offset_arg, "%d", ChooseRelocationOffsetDelta(ART_BASE_ADDRESS_MIN_DELTA,
-                                                                    ART_BASE_ADDRESS_MAX_DELTA));
+  StringAppendF(&base_offset_arg, "%d", ChooseRelocationOffsetDelta());
 
   std::vector<std::string> argv;
   argv.push_back(patchoat);
@@ -319,9 +254,6 @@
   argv.push_back(input_image_location_arg);
   argv.push_back(output_image_filename_arg);
 
-  argv.push_back(input_oat_location_arg);
-  argv.push_back(output_oat_filename_arg);
-
   argv.push_back(instruction_set_arg);
   argv.push_back(base_offset_arg);
 
@@ -339,16 +271,6 @@
   return hdr.release();
 }
 
-ImageHeader* ImageSpace::ReadImageHeaderOrDie(const char* image_location,
-                                              const InstructionSet image_isa) {
-  std::string error_msg;
-  ImageHeader* image_header = ReadImageHeader(image_location, image_isa, &error_msg);
-  if (image_header == nullptr) {
-    LOG(FATAL) << error_msg;
-  }
-  return image_header;
-}
-
 ImageHeader* ImageSpace::ReadImageHeader(const char* image_location,
                                          const InstructionSet image_isa,
                                          std::string* error_msg) {
@@ -414,11 +336,31 @@
   return nullptr;
 }
 
-static bool ChecksumsMatch(const char* image_a, const char* image_b) {
+static bool ChecksumsMatch(const char* image_a, const char* image_b, std::string* error_msg) {
+  DCHECK(error_msg != nullptr);
+
   ImageHeader hdr_a;
   ImageHeader hdr_b;
-  return ReadSpecificImageHeader(image_a, &hdr_a) && ReadSpecificImageHeader(image_b, &hdr_b)
-      && hdr_a.GetOatChecksum() == hdr_b.GetOatChecksum();
+
+  if (!ReadSpecificImageHeader(image_a, &hdr_a)) {
+    *error_msg = StringPrintf("Cannot read header of %s", image_a);
+    return false;
+  }
+  if (!ReadSpecificImageHeader(image_b, &hdr_b)) {
+    *error_msg = StringPrintf("Cannot read header of %s", image_b);
+    return false;
+  }
+
+  if (hdr_a.GetOatChecksum() != hdr_b.GetOatChecksum()) {
+    *error_msg = StringPrintf("Checksum mismatch: %u(%s) vs %u(%s)",
+                              hdr_a.GetOatChecksum(),
+                              image_a,
+                              hdr_b.GetOatChecksum(),
+                              image_b);
+    return false;
+  }
+
+  return true;
 }
 
 static bool ImageCreationAllowed(bool is_global_cache, std::string* error_msg) {
@@ -436,6 +378,993 @@
   return false;
 }
 
+void ImageSpace::VerifyImageAllocations() {
+  uint8_t* current = Begin() + RoundUp(sizeof(ImageHeader), kObjectAlignment);
+  while (current < End()) {
+    CHECK_ALIGNED(current, kObjectAlignment);
+    auto* obj = reinterpret_cast<mirror::Object*>(current);
+    CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
+    CHECK(live_bitmap_->Test(obj)) << PrettyTypeOf(obj);
+    if (kUseBakerOrBrooksReadBarrier) {
+      obj->AssertReadBarrierPointer();
+    }
+    current += RoundUp(obj->SizeOf(), kObjectAlignment);
+  }
+}
+
+// Helper class for relocating from one range of memory to another.
+class RelocationRange {
+ public:
+  RelocationRange() = default;
+  RelocationRange(const RelocationRange&) = default;
+  RelocationRange(uintptr_t source, uintptr_t dest, uintptr_t length)
+      : source_(source),
+        dest_(dest),
+        length_(length) {}
+
+  bool InSource(uintptr_t address) const {
+    return address - source_ < length_;
+  }
+
+  bool InDest(uintptr_t address) const {
+    return address - dest_ < length_;
+  }
+
+  // Translate a source address to the destination space.
+  uintptr_t ToDest(uintptr_t address) const {
+    DCHECK(InSource(address));
+    return address + Delta();
+  }
+
+  // Returns the delta between the dest from the source.
+  uintptr_t Delta() const {
+    return dest_ - source_;
+  }
+
+  uintptr_t Source() const {
+    return source_;
+  }
+
+  uintptr_t Dest() const {
+    return dest_;
+  }
+
+  uintptr_t Length() const {
+    return length_;
+  }
+
+ private:
+  const uintptr_t source_;
+  const uintptr_t dest_;
+  const uintptr_t length_;
+};
+
+std::ostream& operator<<(std::ostream& os, const RelocationRange& reloc) {
+  return os << "(" << reinterpret_cast<const void*>(reloc.Source()) << "-"
+            << reinterpret_cast<const void*>(reloc.Source() + reloc.Length()) << ")->("
+            << reinterpret_cast<const void*>(reloc.Dest()) << "-"
+            << reinterpret_cast<const void*>(reloc.Dest() + reloc.Length()) << ")";
+}
+
+// Helper class encapsulating loading, so we can access private ImageSpace members (this is a
+// friend class), but not declare functions in the header.
+class ImageSpaceLoader {
+ public:
+  static std::unique_ptr<ImageSpace> Load(const char* image_location,
+                                          const std::string& image_filename,
+                                          bool is_zygote,
+                                          bool is_global_cache,
+                                          bool validate_oat_file,
+                                          std::string* error_msg)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    // Note that we must not use the file descriptor associated with
+    // ScopedFlock::GetFile to Init the image file. We want the file
+    // descriptor (and the associated exclusive lock) to be released when
+    // we leave Create.
+    ScopedFlock image_lock;
+    // Should this be a RDWR lock? This is only a defensive measure, as at
+    // this point the image should exist.
+    // However, only the zygote can write into the global dalvik-cache, so
+    // restrict to zygote processes, or any process that isn't using
+    // /data/dalvik-cache (which we assume to be allowed to write there).
+    const bool rw_lock = is_zygote || !is_global_cache;
+    image_lock.Init(image_filename.c_str(),
+                    rw_lock ? (O_CREAT | O_RDWR) : O_RDONLY /* flags */,
+                    true /* block */,
+                    error_msg);
+    VLOG(startup) << "Using image file " << image_filename.c_str() << " for image location "
+                  << image_location;
+    // If we are in /system we can assume the image is good. We can also
+    // assume this if we are using a relocated image (i.e. image checksum
+    // matches) since this is only different by the offset. We need this to
+    // make sure that host tests continue to work.
+    // Since we are the boot image, pass null since we load the oat file from the boot image oat
+    // file name.
+    return Init(image_filename.c_str(),
+                image_location,
+                validate_oat_file,
+                /* oat_file */nullptr,
+                error_msg);
+  }
+
+  static std::unique_ptr<ImageSpace> Init(const char* image_filename,
+                                          const char* image_location,
+                                          bool validate_oat_file,
+                                          const OatFile* oat_file,
+                                          std::string* error_msg)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    CHECK(image_filename != nullptr);
+    CHECK(image_location != nullptr);
+
+    TimingLogger logger(__PRETTY_FUNCTION__, true, VLOG_IS_ON(image));
+    VLOG(image) << "ImageSpace::Init entering image_filename=" << image_filename;
+
+    std::unique_ptr<File> file;
+    {
+      TimingLogger::ScopedTiming timing("OpenImageFile", &logger);
+      file.reset(OS::OpenFileForReading(image_filename));
+      if (file == nullptr) {
+        *error_msg = StringPrintf("Failed to open '%s'", image_filename);
+        return nullptr;
+      }
+    }
+    ImageHeader temp_image_header;
+    ImageHeader* image_header = &temp_image_header;
+    {
+      TimingLogger::ScopedTiming timing("ReadImageHeader", &logger);
+      bool success = file->ReadFully(image_header, sizeof(*image_header));
+      if (!success || !image_header->IsValid()) {
+        *error_msg = StringPrintf("Invalid image header in '%s'", image_filename);
+        return nullptr;
+      }
+    }
+    // Check that the file is larger or equal to the header size + data size.
+    const uint64_t image_file_size = static_cast<uint64_t>(file->GetLength());
+    if (image_file_size < sizeof(ImageHeader) + image_header->GetDataSize()) {
+      *error_msg = StringPrintf("Image file truncated: %" PRIu64 " vs. %" PRIu64 ".",
+                                image_file_size,
+                                sizeof(ImageHeader) + image_header->GetDataSize());
+      return nullptr;
+    }
+
+    if (oat_file != nullptr) {
+      // If we have an oat file, check the oat file checksum. The oat file is only non-null for the
+      // app image case. Otherwise, we open the oat file after the image and check the checksum there.
+      const uint32_t oat_checksum = oat_file->GetOatHeader().GetChecksum();
+      const uint32_t image_oat_checksum = image_header->GetOatChecksum();
+      if (oat_checksum != image_oat_checksum) {
+        *error_msg = StringPrintf("Oat checksum 0x%x does not match the image one 0x%x in image %s",
+                                  oat_checksum,
+                                  image_oat_checksum,
+                                  image_filename);
+        return nullptr;
+      }
+    }
+
+    if (VLOG_IS_ON(startup)) {
+      LOG(INFO) << "Dumping image sections";
+      for (size_t i = 0; i < ImageHeader::kSectionCount; ++i) {
+        const auto section_idx = static_cast<ImageHeader::ImageSections>(i);
+        auto& section = image_header->GetImageSection(section_idx);
+        LOG(INFO) << section_idx << " start="
+            << reinterpret_cast<void*>(image_header->GetImageBegin() + section.Offset()) << " "
+            << section;
+      }
+    }
+
+    const auto& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap);
+    // The location we want to map from is the first aligned page after the end of the stored
+    // (possibly compressed) data.
+    const size_t image_bitmap_offset = RoundUp(sizeof(ImageHeader) + image_header->GetDataSize(),
+                                               kPageSize);
+    const size_t end_of_bitmap = image_bitmap_offset + bitmap_section.Size();
+    if (end_of_bitmap != image_file_size) {
+      *error_msg = StringPrintf(
+          "Image file size does not equal end of bitmap: size=%" PRIu64 " vs. %zu.", image_file_size,
+          end_of_bitmap);
+      return nullptr;
+    }
+
+    std::unique_ptr<MemMap> map;
+    // GetImageBegin is the preferred address to map the image. If we manage to map the
+    // image at the image begin, the amount of fixup work required is minimized.
+    map.reset(LoadImageFile(image_filename,
+                            image_location,
+                            *image_header,
+                            image_header->GetImageBegin(),
+                            file->Fd(),
+                            logger,
+                            error_msg));
+    // If the header specifies PIC mode, we can also map at a random low_4gb address since we can
+    // relocate in-place.
+    if (map == nullptr && image_header->IsPic()) {
+      map.reset(LoadImageFile(image_filename,
+                              image_location,
+                              *image_header,
+                              /* address */ nullptr,
+                              file->Fd(),
+                              logger,
+                              error_msg));
+    }
+    // Were we able to load something and continue?
+    if (map == nullptr) {
+      DCHECK(!error_msg->empty());
+      return nullptr;
+    }
+    DCHECK_EQ(0, memcmp(image_header, map->Begin(), sizeof(ImageHeader)));
+
+    std::unique_ptr<MemMap> image_bitmap_map(MemMap::MapFileAtAddress(nullptr,
+                                                                      bitmap_section.Size(),
+                                                                      PROT_READ, MAP_PRIVATE,
+                                                                      file->Fd(),
+                                                                      image_bitmap_offset,
+                                                                      /*low_4gb*/false,
+                                                                      /*reuse*/false,
+                                                                      image_filename,
+                                                                      error_msg));
+    if (image_bitmap_map == nullptr) {
+      *error_msg = StringPrintf("Failed to map image bitmap: %s", error_msg->c_str());
+      return nullptr;
+    }
+    // Loaded the map, use the image header from the file now in case we patch it with
+    // RelocateInPlace.
+    image_header = reinterpret_cast<ImageHeader*>(map->Begin());
+    const uint32_t bitmap_index = ImageSpace::bitmap_index_.FetchAndAddSequentiallyConsistent(1);
+    std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u",
+                                         image_filename,
+                                         bitmap_index));
+    // Bitmap only needs to cover until the end of the mirror objects section.
+    const ImageSection& image_objects = image_header->GetImageSection(ImageHeader::kSectionObjects);
+    // We only want the mirror object, not the ArtFields and ArtMethods.
+    uint8_t* const image_end = map->Begin() + image_objects.End();
+    std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap;
+    {
+      TimingLogger::ScopedTiming timing("CreateImageBitmap", &logger);
+      bitmap.reset(
+          accounting::ContinuousSpaceBitmap::CreateFromMemMap(
+              bitmap_name,
+              image_bitmap_map.release(),
+              reinterpret_cast<uint8_t*>(map->Begin()),
+              image_objects.End()));
+      if (bitmap == nullptr) {
+        *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str());
+        return nullptr;
+      }
+    }
+    {
+      TimingLogger::ScopedTiming timing("RelocateImage", &logger);
+      if (!RelocateInPlace(*image_header,
+                           map->Begin(),
+                           bitmap.get(),
+                           oat_file,
+                           error_msg)) {
+        return nullptr;
+      }
+    }
+    // We only want the mirror object, not the ArtFields and ArtMethods.
+    std::unique_ptr<ImageSpace> space(new ImageSpace(image_filename,
+                                                     image_location,
+                                                     map.release(),
+                                                     bitmap.release(),
+                                                     image_end));
+
+    // VerifyImageAllocations() will be called later in Runtime::Init()
+    // as some class roots like ArtMethod::java_lang_reflect_ArtMethod_
+    // and ArtField::java_lang_reflect_ArtField_, which are used from
+    // Object::SizeOf() which VerifyImageAllocations() calls, are not
+    // set yet at this point.
+    if (oat_file == nullptr) {
+      TimingLogger::ScopedTiming timing("OpenOatFile", &logger);
+      space->oat_file_ = OpenOatFile(*space, image_filename, error_msg);
+      if (space->oat_file_ == nullptr) {
+        DCHECK(!error_msg->empty());
+        return nullptr;
+      }
+      space->oat_file_non_owned_ = space->oat_file_.get();
+    } else {
+      space->oat_file_non_owned_ = oat_file;
+    }
+
+    if (validate_oat_file) {
+      TimingLogger::ScopedTiming timing("ValidateOatFile", &logger);
+      CHECK(space->oat_file_ != nullptr);
+      if (!ValidateOatFile(*space, *space->oat_file_, error_msg)) {
+        DCHECK(!error_msg->empty());
+        return nullptr;
+      }
+    }
+
+    Runtime* runtime = Runtime::Current();
+
+    // If oat_file is null, then it is the boot image space. Use oat_file_non_owned_ from the space
+    // to set the runtime methods.
+    CHECK_EQ(oat_file != nullptr, image_header->IsAppImage());
+    if (image_header->IsAppImage()) {
+      CHECK_EQ(runtime->GetResolutionMethod(),
+               image_header->GetImageMethod(ImageHeader::kResolutionMethod));
+      CHECK_EQ(runtime->GetImtConflictMethod(),
+               image_header->GetImageMethod(ImageHeader::kImtConflictMethod));
+      CHECK_EQ(runtime->GetImtUnimplementedMethod(),
+               image_header->GetImageMethod(ImageHeader::kImtUnimplementedMethod));
+      CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves),
+               image_header->GetImageMethod(ImageHeader::kSaveAllCalleeSavesMethod));
+      CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveRefsOnly),
+               image_header->GetImageMethod(ImageHeader::kSaveRefsOnlyMethod));
+      CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs),
+               image_header->GetImageMethod(ImageHeader::kSaveRefsAndArgsMethod));
+      CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveEverything),
+               image_header->GetImageMethod(ImageHeader::kSaveEverythingMethod));
+    } else if (!runtime->HasResolutionMethod()) {
+      runtime->SetInstructionSet(space->oat_file_non_owned_->GetOatHeader().GetInstructionSet());
+      runtime->SetResolutionMethod(image_header->GetImageMethod(ImageHeader::kResolutionMethod));
+      runtime->SetImtConflictMethod(image_header->GetImageMethod(ImageHeader::kImtConflictMethod));
+      runtime->SetImtUnimplementedMethod(
+          image_header->GetImageMethod(ImageHeader::kImtUnimplementedMethod));
+      runtime->SetCalleeSaveMethod(
+          image_header->GetImageMethod(ImageHeader::kSaveAllCalleeSavesMethod),
+          Runtime::kSaveAllCalleeSaves);
+      runtime->SetCalleeSaveMethod(
+          image_header->GetImageMethod(ImageHeader::kSaveRefsOnlyMethod), Runtime::kSaveRefsOnly);
+      runtime->SetCalleeSaveMethod(
+          image_header->GetImageMethod(ImageHeader::kSaveRefsAndArgsMethod),
+          Runtime::kSaveRefsAndArgs);
+      runtime->SetCalleeSaveMethod(
+          image_header->GetImageMethod(ImageHeader::kSaveEverythingMethod), Runtime::kSaveEverything);
+    }
+
+    VLOG(image) << "ImageSpace::Init exiting " << *space.get();
+    if (VLOG_IS_ON(image)) {
+      logger.Dump(LOG(INFO));
+    }
+    return space;
+  }
+
+ private:
+  static MemMap* LoadImageFile(const char* image_filename,
+                               const char* image_location,
+                               const ImageHeader& image_header,
+                               uint8_t* address,
+                               int fd,
+                               TimingLogger& logger,
+                               std::string* error_msg) {
+    TimingLogger::ScopedTiming timing("MapImageFile", &logger);
+    const ImageHeader::StorageMode storage_mode = image_header.GetStorageMode();
+    if (storage_mode == ImageHeader::kStorageModeUncompressed) {
+      return MemMap::MapFileAtAddress(address,
+                                      image_header.GetImageSize(),
+                                      PROT_READ | PROT_WRITE,
+                                      MAP_PRIVATE,
+                                      fd,
+                                      0,
+                                      /*low_4gb*/true,
+                                      /*reuse*/false,
+                                      image_filename,
+                                      error_msg);
+    }
+
+    if (storage_mode != ImageHeader::kStorageModeLZ4 &&
+        storage_mode != ImageHeader::kStorageModeLZ4HC) {
+      *error_msg = StringPrintf("Invalid storage mode in image header %d",
+                                static_cast<int>(storage_mode));
+      return nullptr;
+    }
+
+    // Reserve output and decompress into it.
+    std::unique_ptr<MemMap> map(MemMap::MapAnonymous(image_location,
+                                                     address,
+                                                     image_header.GetImageSize(),
+                                                     PROT_READ | PROT_WRITE,
+                                                     /*low_4gb*/true,
+                                                     /*reuse*/false,
+                                                     error_msg));
+    if (map != nullptr) {
+      const size_t stored_size = image_header.GetDataSize();
+      const size_t decompress_offset = sizeof(ImageHeader);  // Skip the header.
+      std::unique_ptr<MemMap> temp_map(MemMap::MapFile(sizeof(ImageHeader) + stored_size,
+                                                       PROT_READ,
+                                                       MAP_PRIVATE,
+                                                       fd,
+                                                       /*offset*/0,
+                                                       /*low_4gb*/false,
+                                                       image_filename,
+                                                       error_msg));
+      if (temp_map == nullptr) {
+        DCHECK(!error_msg->empty());
+        return nullptr;
+      }
+      memcpy(map->Begin(), &image_header, sizeof(ImageHeader));
+      const uint64_t start = NanoTime();
+      // LZ4HC and LZ4 have same internal format, both use LZ4_decompress.
+      TimingLogger::ScopedTiming timing2("LZ4 decompress image", &logger);
+      const size_t decompressed_size = LZ4_decompress_safe(
+          reinterpret_cast<char*>(temp_map->Begin()) + sizeof(ImageHeader),
+          reinterpret_cast<char*>(map->Begin()) + decompress_offset,
+          stored_size,
+          map->Size() - decompress_offset);
+      VLOG(image) << "Decompressing image took " << PrettyDuration(NanoTime() - start);
+      if (decompressed_size + sizeof(ImageHeader) != image_header.GetImageSize()) {
+        *error_msg = StringPrintf(
+            "Decompressed size does not match expected image size %zu vs %zu",
+            decompressed_size + sizeof(ImageHeader),
+            image_header.GetImageSize());
+        return nullptr;
+      }
+    }
+
+    return map.release();
+  }
+
+  class FixupVisitor : public ValueObject {
+   public:
+    FixupVisitor(const RelocationRange& boot_image,
+                 const RelocationRange& boot_oat,
+                 const RelocationRange& app_image,
+                 const RelocationRange& app_oat)
+        : boot_image_(boot_image),
+          boot_oat_(boot_oat),
+          app_image_(app_image),
+          app_oat_(app_oat) {}
+
+    // Return the relocated address of a heap object.
+    template <typename T>
+    ALWAYS_INLINE T* ForwardObject(T* src) const {
+      const uintptr_t uint_src = reinterpret_cast<uintptr_t>(src);
+      if (boot_image_.InSource(uint_src)) {
+        return reinterpret_cast<T*>(boot_image_.ToDest(uint_src));
+      }
+      if (app_image_.InSource(uint_src)) {
+        return reinterpret_cast<T*>(app_image_.ToDest(uint_src));
+      }
+      // Since we are fixing up the app image, there should only be pointers to the app image and
+      // boot image.
+      DCHECK(src == nullptr) << reinterpret_cast<const void*>(src);
+      return src;
+    }
+
+    // Return the relocated address of a code pointer (contained by an oat file).
+    ALWAYS_INLINE const void* ForwardCode(const void* src) const {
+      const uintptr_t uint_src = reinterpret_cast<uintptr_t>(src);
+      if (boot_oat_.InSource(uint_src)) {
+        return reinterpret_cast<const void*>(boot_oat_.ToDest(uint_src));
+      }
+      if (app_oat_.InSource(uint_src)) {
+        return reinterpret_cast<const void*>(app_oat_.ToDest(uint_src));
+      }
+      DCHECK(src == nullptr) << src;
+      return src;
+    }
+
+    // Must be called on pointers that already have been relocated to the destination relocation.
+    ALWAYS_INLINE bool IsInAppImage(mirror::Object* object) const {
+      return app_image_.InDest(reinterpret_cast<uintptr_t>(object));
+    }
+
+   protected:
+    // Source section.
+    const RelocationRange boot_image_;
+    const RelocationRange boot_oat_;
+    const RelocationRange app_image_;
+    const RelocationRange app_oat_;
+  };
+
+  // Adapt for mirror::Class::FixupNativePointers.
+  class FixupObjectAdapter : public FixupVisitor {
+   public:
+    template<typename... Args>
+    explicit FixupObjectAdapter(Args... args) : FixupVisitor(args...) {}
+
+    template <typename T>
+    T* operator()(T* obj) const {
+      return ForwardObject(obj);
+    }
+  };
+
+  class FixupRootVisitor : public FixupVisitor {
+   public:
+    template<typename... Args>
+    explicit FixupRootVisitor(Args... args) : FixupVisitor(args...) {}
+
+    ALWAYS_INLINE void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+        SHARED_REQUIRES(Locks::mutator_lock_) {
+      if (!root->IsNull()) {
+        VisitRoot(root);
+      }
+    }
+
+    ALWAYS_INLINE void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+        SHARED_REQUIRES(Locks::mutator_lock_) {
+      mirror::Object* ref = root->AsMirrorPtr();
+      mirror::Object* new_ref = ForwardObject(ref);
+      if (ref != new_ref) {
+        root->Assign(new_ref);
+      }
+    }
+  };
+
+  class FixupObjectVisitor : public FixupVisitor {
+   public:
+    template<typename... Args>
+    explicit FixupObjectVisitor(gc::accounting::ContinuousSpaceBitmap* visited,
+                                const PointerSize pointer_size,
+                                Args... args)
+        : FixupVisitor(args...),
+          pointer_size_(pointer_size),
+          visited_(visited) {}
+
+    // Fix up separately since we also need to fix up method entrypoints.
+    ALWAYS_INLINE void VisitRootIfNonNull(
+        mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED) const {}
+
+    ALWAYS_INLINE void VisitRoot(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED)
+        const {}
+
+    ALWAYS_INLINE void operator()(mirror::Object* obj,
+                                  MemberOffset offset,
+                                  bool is_static ATTRIBUTE_UNUSED) const
+        NO_THREAD_SAFETY_ANALYSIS {
+      // There could be overlap between ranges, we must avoid visiting the same reference twice.
+      // Avoid the class field since we already fixed it up in FixupClassVisitor.
+      if (offset.Uint32Value() != mirror::Object::ClassOffset().Uint32Value()) {
+        // Space is not yet added to the heap, don't do a read barrier.
+        mirror::Object* ref = obj->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(
+            offset);
+        // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
+        // image.
+        obj->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(offset, ForwardObject(ref));
+      }
+    }
+
+    // Visit a pointer array and forward corresponding native data. Ignores pointer arrays in the
+    // boot image. Uses the bitmap to ensure the same array is not visited multiple times.
+    template <typename Visitor>
+    void UpdatePointerArrayContents(mirror::PointerArray* array, const Visitor& visitor) const
+        NO_THREAD_SAFETY_ANALYSIS {
+      DCHECK(array != nullptr);
+      DCHECK(visitor.IsInAppImage(array));
+      // The bit for the array contents is different than the bit for the array. Since we may have
+      // already visited the array as a long / int array from walking the bitmap without knowing it
+      // was a pointer array.
+      static_assert(kObjectAlignment == 8u, "array bit may be in another object");
+      mirror::Object* const contents_bit = reinterpret_cast<mirror::Object*>(
+          reinterpret_cast<uintptr_t>(array) + kObjectAlignment);
+      // If the bit is not set then the contents have not yet been updated.
+      if (!visited_->Test(contents_bit)) {
+        array->Fixup<kVerifyNone, kWithoutReadBarrier>(array, pointer_size_, visitor);
+        visited_->Set(contents_bit);
+      }
+    }
+
+    // java.lang.ref.Reference visitor.
+    void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const
+        SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
+      mirror::Object* obj = ref->GetReferent<kWithoutReadBarrier>();
+      ref->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
+          mirror::Reference::ReferentOffset(),
+          ForwardObject(obj));
+    }
+
+    void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+      if (visited_->Test(obj)) {
+        // Already visited.
+        return;
+      }
+      visited_->Set(obj);
+
+      // Handle class specially first since we need it to be updated to properly visit the rest of
+      // the instance fields.
+      {
+        mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
+        DCHECK(klass != nullptr) << "Null class in image";
+        // No AsClass since our fields aren't quite fixed up yet.
+        mirror::Class* new_klass = down_cast<mirror::Class*>(ForwardObject(klass));
+        if (klass != new_klass) {
+          obj->SetClass<kVerifyNone>(new_klass);
+        }
+        if (new_klass != klass && IsInAppImage(new_klass)) {
+          // Make sure the klass contents are fixed up since we depend on it to walk the fields.
+          operator()(new_klass);
+        }
+      }
+
+      obj->VisitReferences</*visit native roots*/false, kVerifyNone, kWithoutReadBarrier>(
+          *this,
+          *this);
+      // Note that this code relies on no circular dependencies.
+      // We want to use our own class loader and not the one in the image.
+      if (obj->IsClass<kVerifyNone, kWithoutReadBarrier>()) {
+        mirror::Class* as_klass = obj->AsClass<kVerifyNone, kWithoutReadBarrier>();
+        FixupObjectAdapter visitor(boot_image_, boot_oat_, app_image_, app_oat_);
+        as_klass->FixupNativePointers<kVerifyNone, kWithoutReadBarrier>(as_klass,
+                                                                        pointer_size_,
+                                                                        visitor);
+        // Deal with the pointer arrays. Use the helper function since multiple classes can reference
+        // the same arrays.
+        mirror::PointerArray* const vtable = as_klass->GetVTable<kVerifyNone, kWithoutReadBarrier>();
+        if (vtable != nullptr && IsInAppImage(vtable)) {
+          operator()(vtable);
+          UpdatePointerArrayContents(vtable, visitor);
+        }
+        mirror::IfTable* iftable = as_klass->GetIfTable<kVerifyNone, kWithoutReadBarrier>();
+        // Ensure iftable arrays are fixed up since we need GetMethodArray to return the valid
+        // contents.
+        if (iftable != nullptr && IsInAppImage(iftable)) {
+          operator()(iftable);
+          for (int32_t i = 0, count = iftable->Count(); i < count; ++i) {
+            if (iftable->GetMethodArrayCount<kVerifyNone, kWithoutReadBarrier>(i) > 0) {
+              mirror::PointerArray* methods =
+                  iftable->GetMethodArray<kVerifyNone, kWithoutReadBarrier>(i);
+              if (visitor.IsInAppImage(methods)) {
+                operator()(methods);
+                DCHECK(methods != nullptr);
+                UpdatePointerArrayContents(methods, visitor);
+              }
+            }
+          }
+        }
+      }
+    }
+
+   private:
+    const PointerSize pointer_size_;
+    gc::accounting::ContinuousSpaceBitmap* const visited_;
+  };
+
+  class ForwardObjectAdapter {
+   public:
+    ALWAYS_INLINE explicit ForwardObjectAdapter(const FixupVisitor* visitor) : visitor_(visitor) {}
+
+    template <typename T>
+    ALWAYS_INLINE T* operator()(T* src) const {
+      return visitor_->ForwardObject(src);
+    }
+
+   private:
+    const FixupVisitor* const visitor_;
+  };
+
+  class ForwardCodeAdapter {
+   public:
+    ALWAYS_INLINE explicit ForwardCodeAdapter(const FixupVisitor* visitor)
+        : visitor_(visitor) {}
+
+    template <typename T>
+    ALWAYS_INLINE T* operator()(T* src) const {
+      return visitor_->ForwardCode(src);
+    }
+
+   private:
+    const FixupVisitor* const visitor_;
+  };
+
+  class FixupArtMethodVisitor : public FixupVisitor, public ArtMethodVisitor {
+   public:
+    template<typename... Args>
+    explicit FixupArtMethodVisitor(bool fixup_heap_objects, PointerSize pointer_size, Args... args)
+        : FixupVisitor(args...),
+          fixup_heap_objects_(fixup_heap_objects),
+          pointer_size_(pointer_size) {}
+
+    virtual void Visit(ArtMethod* method) NO_THREAD_SAFETY_ANALYSIS {
+      // TODO: Separate visitor for runtime vs normal methods.
+      if (UNLIKELY(method->IsRuntimeMethod())) {
+        ImtConflictTable* table = method->GetImtConflictTable(pointer_size_);
+        if (table != nullptr) {
+          ImtConflictTable* new_table = ForwardObject(table);
+          if (table != new_table) {
+            method->SetImtConflictTable(new_table, pointer_size_);
+          }
+        }
+        const void* old_code = method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_);
+        const void* new_code = ForwardCode(old_code);
+        if (old_code != new_code) {
+          method->SetEntryPointFromQuickCompiledCodePtrSize(new_code, pointer_size_);
+        }
+      } else {
+        if (fixup_heap_objects_) {
+          method->UpdateObjectsForImageRelocation(ForwardObjectAdapter(this), pointer_size_);
+        }
+        method->UpdateEntrypoints<kWithoutReadBarrier>(ForwardCodeAdapter(this), pointer_size_);
+      }
+    }
+
+   private:
+    const bool fixup_heap_objects_;
+    const PointerSize pointer_size_;
+  };
+
+  class FixupArtFieldVisitor : public FixupVisitor, public ArtFieldVisitor {
+   public:
+    template<typename... Args>
+    explicit FixupArtFieldVisitor(Args... args) : FixupVisitor(args...) {}
+
+    virtual void Visit(ArtField* field) NO_THREAD_SAFETY_ANALYSIS {
+      field->UpdateObjects(ForwardObjectAdapter(this));
+    }
+  };
+
+  // Relocate an image space mapped at target_base which possibly used to be at a different base
+  // address. Only needs a single image space, not one for both source and destination.
+  // In place means modifying a single ImageSpace in place rather than relocating from one ImageSpace
+  // to another.
+  static bool RelocateInPlace(ImageHeader& image_header,
+                              uint8_t* target_base,
+                              accounting::ContinuousSpaceBitmap* bitmap,
+                              const OatFile* app_oat_file,
+                              std::string* error_msg) {
+    DCHECK(error_msg != nullptr);
+    if (!image_header.IsPic()) {
+      if (image_header.GetImageBegin() == target_base) {
+        return true;
+      }
+      *error_msg = StringPrintf("Cannot relocate non-pic image for oat file %s",
+                                (app_oat_file != nullptr) ? app_oat_file->GetLocation().c_str() : "");
+      return false;
+    }
+    // Set up sections.
+    uint32_t boot_image_begin = 0;
+    uint32_t boot_image_end = 0;
+    uint32_t boot_oat_begin = 0;
+    uint32_t boot_oat_end = 0;
+    const PointerSize pointer_size = image_header.GetPointerSize();
+    gc::Heap* const heap = Runtime::Current()->GetHeap();
+    heap->GetBootImagesSize(&boot_image_begin, &boot_image_end, &boot_oat_begin, &boot_oat_end);
+    if (boot_image_begin == boot_image_end) {
+      *error_msg = "Can not relocate app image without boot image space";
+      return false;
+    }
+    if (boot_oat_begin == boot_oat_end) {
+      *error_msg = "Can not relocate app image without boot oat file";
+      return false;
+    }
+    const uint32_t boot_image_size = boot_image_end - boot_image_begin;
+    const uint32_t boot_oat_size = boot_oat_end - boot_oat_begin;
+    const uint32_t image_header_boot_image_size = image_header.GetBootImageSize();
+    const uint32_t image_header_boot_oat_size = image_header.GetBootOatSize();
+    if (boot_image_size != image_header_boot_image_size) {
+      *error_msg = StringPrintf("Boot image size %" PRIu64 " does not match expected size %"
+                                    PRIu64,
+                                static_cast<uint64_t>(boot_image_size),
+                                static_cast<uint64_t>(image_header_boot_image_size));
+      return false;
+    }
+    if (boot_oat_size != image_header_boot_oat_size) {
+      *error_msg = StringPrintf("Boot oat size %" PRIu64 " does not match expected size %"
+                                    PRIu64,
+                                static_cast<uint64_t>(boot_oat_size),
+                                static_cast<uint64_t>(image_header_boot_oat_size));
+      return false;
+    }
+    TimingLogger logger(__FUNCTION__, true, false);
+    RelocationRange boot_image(image_header.GetBootImageBegin(),
+                               boot_image_begin,
+                               boot_image_size);
+    RelocationRange boot_oat(image_header.GetBootOatBegin(),
+                             boot_oat_begin,
+                             boot_oat_size);
+    RelocationRange app_image(reinterpret_cast<uintptr_t>(image_header.GetImageBegin()),
+                              reinterpret_cast<uintptr_t>(target_base),
+                              image_header.GetImageSize());
+    // Use the oat data section since this is where the OatFile::Begin is.
+    RelocationRange app_oat(reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin()),
+                            // Not necessarily in low 4GB.
+                            reinterpret_cast<uintptr_t>(app_oat_file->Begin()),
+                            image_header.GetOatDataEnd() - image_header.GetOatDataBegin());
+    VLOG(image) << "App image " << app_image;
+    VLOG(image) << "App oat " << app_oat;
+    VLOG(image) << "Boot image " << boot_image;
+    VLOG(image) << "Boot oat " << boot_oat;
+    // True if we need to fixup any heap pointers, otherwise only code pointers.
+    const bool fixup_image = boot_image.Delta() != 0 || app_image.Delta() != 0;
+    const bool fixup_code = boot_oat.Delta() != 0 || app_oat.Delta() != 0;
+    if (!fixup_image && !fixup_code) {
+      // Nothing to fix up.
+      return true;
+    }
+    ScopedDebugDisallowReadBarriers sddrb(Thread::Current());
+    // Need to update the image to be at the target base.
+    const ImageSection& objects_section = image_header.GetImageSection(ImageHeader::kSectionObjects);
+    uintptr_t objects_begin = reinterpret_cast<uintptr_t>(target_base + objects_section.Offset());
+    uintptr_t objects_end = reinterpret_cast<uintptr_t>(target_base + objects_section.End());
+    FixupObjectAdapter fixup_adapter(boot_image, boot_oat, app_image, app_oat);
+    if (fixup_image) {
+      // Two pass approach, fix up all classes first, then fix up non class-objects.
+      // The visited bitmap is used to ensure that pointer arrays are not forwarded twice.
+      std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> visited_bitmap(
+          gc::accounting::ContinuousSpaceBitmap::Create("Relocate bitmap",
+                                                        target_base,
+                                                        image_header.GetImageSize()));
+      FixupObjectVisitor fixup_object_visitor(visited_bitmap.get(),
+                                              pointer_size,
+                                              boot_image,
+                                              boot_oat,
+                                              app_image,
+                                              app_oat);
+      TimingLogger::ScopedTiming timing("Fixup classes", &logger);
+      // Fixup objects may read fields in the boot image, use the mutator lock here for sanity. Though
+      // its probably not required.
+      ScopedObjectAccess soa(Thread::Current());
+      timing.NewTiming("Fixup objects");
+      bitmap->VisitMarkedRange(objects_begin, objects_end, fixup_object_visitor);
+      // Fixup image roots.
+      CHECK(app_image.InSource(reinterpret_cast<uintptr_t>(
+          image_header.GetImageRoots<kWithoutReadBarrier>())));
+      image_header.RelocateImageObjects(app_image.Delta());
+      CHECK_EQ(image_header.GetImageBegin(), target_base);
+      // Fix up dex cache DexFile pointers.
+      auto* dex_caches = image_header.GetImageRoot<kWithoutReadBarrier>(ImageHeader::kDexCaches)->
+          AsObjectArray<mirror::DexCache, kVerifyNone, kWithoutReadBarrier>();
+      for (int32_t i = 0, count = dex_caches->GetLength(); i < count; ++i) {
+        mirror::DexCache* dex_cache = dex_caches->Get<kVerifyNone, kWithoutReadBarrier>(i);
+        // Fix up dex cache pointers.
+        mirror::StringDexCacheType* strings = dex_cache->GetStrings();
+        if (strings != nullptr) {
+          mirror::StringDexCacheType* new_strings = fixup_adapter.ForwardObject(strings);
+          if (strings != new_strings) {
+            dex_cache->SetStrings(new_strings);
+          }
+          dex_cache->FixupStrings<kWithoutReadBarrier>(new_strings, fixup_adapter);
+        }
+        GcRoot<mirror::Class>* types = dex_cache->GetResolvedTypes();
+        if (types != nullptr) {
+          GcRoot<mirror::Class>* new_types = fixup_adapter.ForwardObject(types);
+          if (types != new_types) {
+            dex_cache->SetResolvedTypes(new_types);
+          }
+          dex_cache->FixupResolvedTypes<kWithoutReadBarrier>(new_types, fixup_adapter);
+        }
+        ArtMethod** methods = dex_cache->GetResolvedMethods();
+        if (methods != nullptr) {
+          ArtMethod** new_methods = fixup_adapter.ForwardObject(methods);
+          if (methods != new_methods) {
+            dex_cache->SetResolvedMethods(new_methods);
+          }
+          for (size_t j = 0, num = dex_cache->NumResolvedMethods(); j != num; ++j) {
+            ArtMethod* orig = mirror::DexCache::GetElementPtrSize(new_methods, j, pointer_size);
+            ArtMethod* copy = fixup_adapter.ForwardObject(orig);
+            if (orig != copy) {
+              mirror::DexCache::SetElementPtrSize(new_methods, j, copy, pointer_size);
+            }
+          }
+        }
+        ArtField** fields = dex_cache->GetResolvedFields();
+        if (fields != nullptr) {
+          ArtField** new_fields = fixup_adapter.ForwardObject(fields);
+          if (fields != new_fields) {
+            dex_cache->SetResolvedFields(new_fields);
+          }
+          for (size_t j = 0, num = dex_cache->NumResolvedFields(); j != num; ++j) {
+            ArtField* orig = mirror::DexCache::GetElementPtrSize(new_fields, j, pointer_size);
+            ArtField* copy = fixup_adapter.ForwardObject(orig);
+            if (orig != copy) {
+              mirror::DexCache::SetElementPtrSize(new_fields, j, copy, pointer_size);
+            }
+          }
+        }
+      }
+    }
+    {
+      // Only touches objects in the app image, no need for mutator lock.
+      TimingLogger::ScopedTiming timing("Fixup methods", &logger);
+      FixupArtMethodVisitor method_visitor(fixup_image,
+                                           pointer_size,
+                                           boot_image,
+                                           boot_oat,
+                                           app_image,
+                                           app_oat);
+      image_header.VisitPackedArtMethods(&method_visitor, target_base, pointer_size);
+    }
+    if (fixup_image) {
+      {
+        // Only touches objects in the app image, no need for mutator lock.
+        TimingLogger::ScopedTiming timing("Fixup fields", &logger);
+        FixupArtFieldVisitor field_visitor(boot_image, boot_oat, app_image, app_oat);
+        image_header.VisitPackedArtFields(&field_visitor, target_base);
+      }
+      {
+        TimingLogger::ScopedTiming timing("Fixup imt", &logger);
+        image_header.VisitPackedImTables(fixup_adapter, target_base, pointer_size);
+      }
+      {
+        TimingLogger::ScopedTiming timing("Fixup conflict tables", &logger);
+        image_header.VisitPackedImtConflictTables(fixup_adapter, target_base, pointer_size);
+      }
+      // In the app image case, the image methods are actually in the boot image.
+      image_header.RelocateImageMethods(boot_image.Delta());
+      const auto& class_table_section = image_header.GetImageSection(ImageHeader::kSectionClassTable);
+      if (class_table_section.Size() > 0u) {
+        // Note that we require that ReadFromMemory does not make an internal copy of the elements.
+        // This also relies on visit roots not doing any verification which could fail after we update
+        // the roots to be the image addresses.
+        ScopedObjectAccess soa(Thread::Current());
+        WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+        ClassTable temp_table;
+        temp_table.ReadFromMemory(target_base + class_table_section.Offset());
+        FixupRootVisitor root_visitor(boot_image, boot_oat, app_image, app_oat);
+        temp_table.VisitRoots(root_visitor);
+      }
+    }
+    if (VLOG_IS_ON(image)) {
+      logger.Dump(LOG(INFO));
+    }
+    return true;
+  }
+
+  static std::unique_ptr<OatFile> OpenOatFile(const ImageSpace& image,
+                                              const char* image_path,
+                                              std::string* error_msg) {
+    const ImageHeader& image_header = image.GetImageHeader();
+    std::string oat_filename = ImageHeader::GetOatLocationFromImageLocation(image_path);
+
+    CHECK(image_header.GetOatDataBegin() != nullptr);
+
+    std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_filename,
+                                                    oat_filename,
+                                                    image_header.GetOatDataBegin(),
+                                                    image_header.GetOatFileBegin(),
+                                                    !Runtime::Current()->IsAotCompiler(),
+                                                    /*low_4gb*/false,
+                                                    nullptr,
+                                                    error_msg));
+    if (oat_file == nullptr) {
+      *error_msg = StringPrintf("Failed to open oat file '%s' referenced from image %s: %s",
+                                oat_filename.c_str(),
+                                image.GetName(),
+                                error_msg->c_str());
+      return nullptr;
+    }
+    uint32_t oat_checksum = oat_file->GetOatHeader().GetChecksum();
+    uint32_t image_oat_checksum = image_header.GetOatChecksum();
+    if (oat_checksum != image_oat_checksum) {
+      *error_msg = StringPrintf("Failed to match oat file checksum 0x%x to expected oat checksum 0x%x"
+                                " in image %s",
+                                oat_checksum,
+                                image_oat_checksum,
+                                image.GetName());
+      return nullptr;
+    }
+    int32_t image_patch_delta = image_header.GetPatchDelta();
+    int32_t oat_patch_delta = oat_file->GetOatHeader().GetImagePatchDelta();
+    if (oat_patch_delta != image_patch_delta && !image_header.CompilePic()) {
+      // We should have already relocated by this point. Bail out.
+      *error_msg = StringPrintf("Failed to match oat file patch delta %d to expected patch delta %d "
+                                "in image %s",
+                                oat_patch_delta,
+                                image_patch_delta,
+                                image.GetName());
+      return nullptr;
+    }
+
+    return oat_file;
+  }
+
+  static bool ValidateOatFile(const ImageSpace& space,
+                              const OatFile& oat_file,
+                              std::string* error_msg) {
+    for (const OatFile::OatDexFile* oat_dex_file : oat_file.GetOatDexFiles()) {
+      const std::string& dex_file_location = oat_dex_file->GetDexFileLocation();
+      uint32_t dex_file_location_checksum;
+      if (!DexFile::GetChecksum(dex_file_location.c_str(), &dex_file_location_checksum, error_msg)) {
+        *error_msg = StringPrintf("Failed to get checksum of dex file '%s' referenced by image %s: "
+                                  "%s",
+                                  dex_file_location.c_str(),
+                                  space.GetName(),
+                                  error_msg->c_str());
+        return false;
+      }
+      if (dex_file_location_checksum != oat_dex_file->GetDexFileLocationChecksum()) {
+        *error_msg = StringPrintf("ValidateOatFile found checksum mismatch between oat file '%s' and "
+                                  "dex file '%s' (0x%x != 0x%x)",
+                                  oat_file.GetLocation().c_str(),
+                                  dex_file_location.c_str(),
+                                  oat_dex_file->GetDexFileLocationChecksum(),
+                                  dex_file_location_checksum);
+        return false;
+      }
+    }
+    return true;
+  }
+};
+
 static constexpr uint64_t kLowSpaceValue = 50 * MB;
 static constexpr uint64_t kTmpFsSentinelValue = 384 * MB;
 
@@ -461,8 +1390,8 @@
   // for time-to-UI.
   if (fs_overall_size > kTmpFsSentinelValue) {
     if (fs_free_size < kLowSpaceValue) {
-      *error_msg = StringPrintf("Low-memory situation: only %4.2f megabytes available after image"
-                                " generation, need at least %" PRIu64 ".",
+      *error_msg = StringPrintf("Low-memory situation: only %4.2f megabytes available, need at "
+                                "least %" PRIu64 ".",
                                 static_cast<double>(fs_free_size) / MB,
                                 kLowSpaceValue / MB);
       return false;
@@ -471,375 +1400,206 @@
   return true;
 }
 
-ImageSpace* ImageSpace::Create(const char* image_location,
-                               const InstructionSet image_isa,
-                               std::string* error_msg) {
+std::unique_ptr<ImageSpace> ImageSpace::CreateBootImage(const char* image_location,
+                                                        const InstructionSet image_isa,
+                                                        bool secondary_image,
+                                                        std::string* error_msg) {
+  ScopedTrace trace(__FUNCTION__);
+
+  // Step 0: Extra zygote work.
+
+  // Step 0.a: If we're the zygote, mark boot.
+  const bool is_zygote = Runtime::Current()->IsZygote();
+  if (is_zygote && !secondary_image) {
+    MarkZygoteStart(image_isa, Runtime::Current()->GetZygoteMaxFailedBoots());
+  }
+
+  // Step 0.b: If we're the zygote, check for free space, and prune the cache preemptively,
+  //           if necessary. While the runtime may be fine (it is pretty tolerant to
+  //           out-of-disk-space situations), other parts of the platform are not.
+  //
+  //           The advantage of doing this proactively is that the later steps are simplified,
+  //           i.e., we do not need to code retries.
   std::string system_filename;
   bool has_system = false;
   std::string cache_filename;
   bool has_cache = false;
   bool dalvik_cache_exists = false;
   bool is_global_cache = true;
-  const bool found_image = FindImageFilename(image_location, image_isa, &system_filename,
-                                             &has_system, &cache_filename, &dalvik_cache_exists,
-                                             &has_cache, &is_global_cache);
+  std::string dalvik_cache;
+  bool found_image = FindImageFilenameImpl(image_location,
+                                           image_isa,
+                                           &has_system,
+                                           &system_filename,
+                                           &dalvik_cache_exists,
+                                           &dalvik_cache,
+                                           &is_global_cache,
+                                           &has_cache,
+                                           &cache_filename);
 
-  if (Runtime::Current()->IsZygote()) {
-    MarkZygoteStart(image_isa, Runtime::Current()->GetZygoteMaxFailedBoots());
+  if (is_zygote && dalvik_cache_exists) {
+    DCHECK(!dalvik_cache.empty());
+    std::string local_error_msg;
+    if (!CheckSpace(dalvik_cache, &local_error_msg)) {
+      LOG(WARNING) << local_error_msg << " Preemptively pruning the dalvik cache.";
+      PruneDalvikCache(image_isa);
+
+      // Re-evaluate the image.
+      found_image = FindImageFilenameImpl(image_location,
+                                          image_isa,
+                                          &has_system,
+                                          &system_filename,
+                                          &dalvik_cache_exists,
+                                          &dalvik_cache,
+                                          &is_global_cache,
+                                          &has_cache,
+                                          &cache_filename);
+    }
   }
 
-  ImageSpace* space;
+  // Collect all the errors.
+  std::vector<std::string> error_msgs;
+
+  // Step 1: Check if we have an existing and relocated image.
+
+  // Step 1.a: Have files in system and cache. Then they need to match.
+  if (found_image && has_system && has_cache) {
+    std::string local_error_msg;
+    // Check that the files are matching.
+    if (ChecksumsMatch(system_filename.c_str(), cache_filename.c_str(), &local_error_msg)) {
+      std::unique_ptr<ImageSpace> relocated_space =
+          ImageSpaceLoader::Load(image_location,
+                                 cache_filename,
+                                 is_zygote,
+                                 is_global_cache,
+                                 /* validate_oat_file */ false,
+                                 &local_error_msg);
+      if (relocated_space != nullptr) {
+        return relocated_space;
+      }
+    }
+    error_msgs.push_back(local_error_msg);
+  }
+
+  // Step 1.b: Only have a cache file.
+  if (found_image && !has_system && has_cache) {
+    std::string local_error_msg;
+    std::unique_ptr<ImageSpace> cache_space =
+        ImageSpaceLoader::Load(image_location,
+                               cache_filename,
+                               is_zygote,
+                               is_global_cache,
+                               /* validate_oat_file */ true,
+                               &local_error_msg);
+    if (cache_space != nullptr) {
+      return cache_space;
+    }
+    error_msgs.push_back(local_error_msg);
+  }
+
+  // Step 2: We have an existing image in /system.
+
+  // Step 2.a: We are not required to relocate it. Then we can use it directly.
   bool relocate = Runtime::Current()->ShouldRelocate();
-  bool can_compile = Runtime::Current()->IsImageDex2OatEnabled();
-  if (found_image) {
-    const std::string* image_filename;
-    bool is_system = false;
-    bool relocated_version_used = false;
-    if (relocate) {
-      if (!dalvik_cache_exists) {
-        *error_msg = StringPrintf("Requiring relocation for image '%s' at '%s' but we do not have "
-                                  "any dalvik_cache to find/place it in.",
-                                  image_location, system_filename.c_str());
-        return nullptr;
-      }
-      if (has_system) {
-        if (has_cache && ChecksumsMatch(system_filename.c_str(), cache_filename.c_str())) {
-          // We already have a relocated version
-          image_filename = &cache_filename;
-          relocated_version_used = true;
-        } else {
-          // We cannot have a relocated version, Relocate the system one and use it.
 
-          std::string reason;
-          bool success;
+  if (found_image && has_system && !relocate) {
+    std::string local_error_msg;
+    std::unique_ptr<ImageSpace> system_space =
+        ImageSpaceLoader::Load(image_location,
+                               system_filename,
+                               is_zygote,
+                               is_global_cache,
+                               /* validate_oat_file */ false,
+                               &local_error_msg);
+    if (system_space != nullptr) {
+      return system_space;
+    }
+    error_msgs.push_back(local_error_msg);
+  }
 
-          // Check whether we are allowed to relocate.
-          if (!can_compile) {
-            reason = "Image dex2oat disabled by -Xnoimage-dex2oat.";
-            success = false;
-          } else if (!ImageCreationAllowed(is_global_cache, &reason)) {
-            // Whether we can write to the cache.
-            success = false;
-          } else {
-            // Try to relocate.
-            success = RelocateImage(image_location, cache_filename.c_str(), image_isa, &reason);
-          }
-
-          if (success) {
-            relocated_version_used = true;
-            image_filename = &cache_filename;
-          } else {
-            *error_msg = StringPrintf("Unable to relocate image '%s' from '%s' to '%s': %s",
-                                      image_location, system_filename.c_str(),
-                                      cache_filename.c_str(), reason.c_str());
-            // We failed to create files, remove any possibly garbage output.
-            // Since ImageCreationAllowed was true above, we are the zygote
-            // and therefore the only process expected to generate these for
-            // the device.
-            PruneDalvikCache(image_isa);
-            return nullptr;
-          }
+  // Step 2.b: We require a relocated image. Then we must patch it. This step fails if this is a
+  //           secondary image.
+  if (found_image && has_system && relocate) {
+    std::string local_error_msg;
+    if (!Runtime::Current()->IsImageDex2OatEnabled()) {
+      local_error_msg = "Patching disabled.";
+    } else if (secondary_image) {
+      local_error_msg = "Cannot patch a secondary image.";
+    } else if (ImageCreationAllowed(is_global_cache, &local_error_msg)) {
+      bool patch_success =
+          RelocateImage(image_location, cache_filename.c_str(), image_isa, &local_error_msg);
+      if (patch_success) {
+        std::unique_ptr<ImageSpace> patched_space =
+            ImageSpaceLoader::Load(image_location,
+                                   cache_filename,
+                                   is_zygote,
+                                   is_global_cache,
+                                   /* validate_oat_file */ false,
+                                   &local_error_msg);
+        if (patched_space != nullptr) {
+          return patched_space;
         }
-      } else {
-        CHECK(has_cache);
-        // We can just use cache's since it should be fine. This might or might not be relocated.
-        image_filename = &cache_filename;
       }
-    } else {
-      if (has_system && has_cache) {
-        // Check they have the same cksum. If they do use the cache. Otherwise system.
-        if (ChecksumsMatch(system_filename.c_str(), cache_filename.c_str())) {
-          image_filename = &cache_filename;
-          relocated_version_used = true;
-        } else {
-          image_filename = &system_filename;
-          is_system = true;
+    }
+    error_msgs.push_back(StringPrintf("Cannot relocate image %s to %s: %s",
+                                      image_location,
+                                      cache_filename.c_str(),
+                                      local_error_msg.c_str()));
+  }
+
+  // Step 3: We do not have an existing image in /system, so generate an image into the dalvik
+  //         cache. This step fails if this is a secondary image.
+  if (!has_system) {
+    std::string local_error_msg;
+    if (!Runtime::Current()->IsImageDex2OatEnabled()) {
+      local_error_msg = "Image compilation disabled.";
+    } else if (secondary_image) {
+      local_error_msg = "Cannot compile a secondary image.";
+    } else if (ImageCreationAllowed(is_global_cache, &local_error_msg)) {
+      bool compilation_success = GenerateImage(cache_filename, image_isa, &local_error_msg);
+      if (compilation_success) {
+        std::unique_ptr<ImageSpace> compiled_space =
+            ImageSpaceLoader::Load(image_location,
+                                   cache_filename,
+                                   is_zygote,
+                                   is_global_cache,
+                                   /* validate_oat_file */ false,
+                                   &local_error_msg);
+        if (compiled_space != nullptr) {
+          return compiled_space;
         }
-      } else if (has_system) {
-        image_filename = &system_filename;
-        is_system = true;
-      } else {
-        CHECK(has_cache);
-        image_filename = &cache_filename;
       }
     }
-    {
-      // Note that we must not use the file descriptor associated with
-      // ScopedFlock::GetFile to Init the image file. We want the file
-      // descriptor (and the associated exclusive lock) to be released when
-      // we leave Create.
-      ScopedFlock image_lock;
-      image_lock.Init(image_filename->c_str(), error_msg);
-      VLOG(startup) << "Using image file " << image_filename->c_str() << " for image location "
-                    << image_location;
-      // If we are in /system we can assume the image is good. We can also
-      // assume this if we are using a relocated image (i.e. image checksum
-      // matches) since this is only different by the offset. We need this to
-      // make sure that host tests continue to work.
-      space = ImageSpace::Init(image_filename->c_str(), image_location,
-                               !(is_system || relocated_version_used), error_msg);
-    }
-    if (space != nullptr) {
-      return space;
-    }
-
-    if (relocated_version_used) {
-      // Something is wrong with the relocated copy (even though checksums match). Cleanup.
-      // This can happen if the .oat is corrupt, since the above only checks the .art checksums.
-      // TODO: Check the oat file validity earlier.
-      *error_msg = StringPrintf("Attempted to use relocated version of %s at %s generated from %s "
-                                "but image failed to load: %s",
-                                image_location, cache_filename.c_str(), system_filename.c_str(),
-                                error_msg->c_str());
-      PruneDalvikCache(image_isa);
-      return nullptr;
-    } else if (is_system) {
-      // If the /system file exists, it should be up-to-date, don't try to generate it.
-      *error_msg = StringPrintf("Failed to load /system image '%s': %s",
-                                image_filename->c_str(), error_msg->c_str());
-      return nullptr;
-    } else {
-      // Otherwise, log a warning and fall through to GenerateImage.
-      LOG(WARNING) << *error_msg;
-    }
+    error_msgs.push_back(StringPrintf("Cannot compile image to %s: %s",
+                                      cache_filename.c_str(),
+                                      local_error_msg.c_str()));
   }
 
-  if (!can_compile) {
-    *error_msg = "Not attempting to compile image because -Xnoimage-dex2oat";
-    return nullptr;
-  } else if (!dalvik_cache_exists) {
-    *error_msg = StringPrintf("No place to put generated image.");
-    return nullptr;
-  } else if (!ImageCreationAllowed(is_global_cache, error_msg)) {
-    return nullptr;
-  } else if (!GenerateImage(cache_filename, image_isa, error_msg)) {
-    *error_msg = StringPrintf("Failed to generate image '%s': %s",
-                              cache_filename.c_str(), error_msg->c_str());
-    // We failed to create files, remove any possibly garbage output.
-    // Since ImageCreationAllowed was true above, we are the zygote
-    // and therefore the only process expected to generate these for
-    // the device.
-    PruneDalvikCache(image_isa);
-    return nullptr;
-  } else {
-    // Check whether there is enough space left over after we have generated the image.
-    if (!CheckSpace(cache_filename, error_msg)) {
-      // No. Delete the generated image and try to run out of the dex files.
-      PruneDalvikCache(image_isa);
-      return nullptr;
-    }
+  // We failed. Prune the cache the free up space, create a compound error message and return no
+  // image.
+  PruneDalvikCache(image_isa);
 
-    // Note that we must not use the file descriptor associated with
-    // ScopedFlock::GetFile to Init the image file. We want the file
-    // descriptor (and the associated exclusive lock) to be released when
-    // we leave Create.
-    ScopedFlock image_lock;
-    image_lock.Init(cache_filename.c_str(), error_msg);
-    space = ImageSpace::Init(cache_filename.c_str(), image_location, true, error_msg);
-    if (space == nullptr) {
-      *error_msg = StringPrintf("Failed to load generated image '%s': %s",
-                                cache_filename.c_str(), error_msg->c_str());
+  std::ostringstream oss;
+  bool first = true;
+  for (auto msg : error_msgs) {
+    if (!first) {
+      oss << "\n    ";
     }
-    return space;
+    oss << msg;
   }
+  *error_msg = oss.str();
+
+  return nullptr;
 }
 
-void ImageSpace::VerifyImageAllocations() {
-  uint8_t* current = Begin() + RoundUp(sizeof(ImageHeader), kObjectAlignment);
-  while (current < End()) {
-    CHECK_ALIGNED(current, kObjectAlignment);
-    auto* obj = reinterpret_cast<mirror::Object*>(current);
-    CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
-    CHECK(live_bitmap_->Test(obj)) << PrettyTypeOf(obj);
-    if (kUseBakerOrBrooksReadBarrier) {
-      obj->AssertReadBarrierPointer();
-    }
-    current += RoundUp(obj->SizeOf(), kObjectAlignment);
-  }
-}
-
-ImageSpace* ImageSpace::Init(const char* image_filename, const char* image_location,
-                             bool validate_oat_file, std::string* error_msg) {
-  CHECK(image_filename != nullptr);
-  CHECK(image_location != nullptr);
-
-  uint64_t start_time = 0;
-  if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
-    start_time = NanoTime();
-    LOG(INFO) << "ImageSpace::Init entering image_filename=" << image_filename;
-  }
-
-  std::unique_ptr<File> file(OS::OpenFileForReading(image_filename));
-  if (file.get() == nullptr) {
-    *error_msg = StringPrintf("Failed to open '%s'", image_filename);
-    return nullptr;
-  }
-  ImageHeader image_header;
-  bool success = file->ReadFully(&image_header, sizeof(image_header));
-  if (!success || !image_header.IsValid()) {
-    *error_msg = StringPrintf("Invalid image header in '%s'", image_filename);
-    return nullptr;
-  }
-  // Check that the file is large enough.
-  uint64_t image_file_size = static_cast<uint64_t>(file->GetLength());
-  if (image_header.GetImageSize() > image_file_size) {
-    *error_msg = StringPrintf("Image file too small for image heap: %" PRIu64 " vs. %zu.",
-                              image_file_size, image_header.GetImageSize());
-    return nullptr;
-  }
-
-  if (kIsDebugBuild) {
-    LOG(INFO) << "Dumping image sections";
-    for (size_t i = 0; i < ImageHeader::kSectionCount; ++i) {
-      const auto section_idx = static_cast<ImageHeader::ImageSections>(i);
-      auto& section = image_header.GetImageSection(section_idx);
-      LOG(INFO) << section_idx << " start="
-          << reinterpret_cast<void*>(image_header.GetImageBegin() + section.Offset()) << " "
-          << section;
-    }
-  }
-
-  const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap);
-  auto end_of_bitmap = static_cast<size_t>(bitmap_section.End());
-  if (end_of_bitmap != image_file_size) {
-    *error_msg = StringPrintf(
-        "Image file size does not equal end of bitmap: size=%" PRIu64 " vs. %zu.", image_file_size,
-        end_of_bitmap);
-    return nullptr;
-  }
-
-  // Note: The image header is part of the image due to mmap page alignment required of offset.
-  std::unique_ptr<MemMap> map(MemMap::MapFileAtAddress(
-      image_header.GetImageBegin(), image_header.GetImageSize(),
-      PROT_READ | PROT_WRITE, MAP_PRIVATE, file->Fd(), 0, false, image_filename, error_msg));
-  if (map.get() == nullptr) {
-    DCHECK(!error_msg->empty());
-    return nullptr;
-  }
-  CHECK_EQ(image_header.GetImageBegin(), map->Begin());
-  DCHECK_EQ(0, memcmp(&image_header, map->Begin(), sizeof(ImageHeader)));
-
-  std::unique_ptr<MemMap> image_map(MemMap::MapFileAtAddress(
-      nullptr, bitmap_section.Size(), PROT_READ, MAP_PRIVATE, file->Fd(),
-      bitmap_section.Offset(), false, image_filename, error_msg));
-  if (image_map.get() == nullptr) {
-    *error_msg = StringPrintf("Failed to map image bitmap: %s", error_msg->c_str());
-    return nullptr;
-  }
-  uint32_t bitmap_index = bitmap_index_.FetchAndAddSequentiallyConsistent(1);
-  std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u", image_filename,
-                                       bitmap_index));
-  std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap(
-      accounting::ContinuousSpaceBitmap::CreateFromMemMap(
-          bitmap_name, image_map.release(), reinterpret_cast<uint8_t*>(map->Begin()),
-          accounting::ContinuousSpaceBitmap::ComputeHeapSize(bitmap_section.Size())));
-  if (bitmap.get() == nullptr) {
-    *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str());
-    return nullptr;
-  }
-
-  // We only want the mirror object, not the ArtFields and ArtMethods.
-  uint8_t* const image_end =
-      map->Begin() + image_header.GetImageSection(ImageHeader::kSectionObjects).End();
-  std::unique_ptr<ImageSpace> space(new ImageSpace(image_filename, image_location,
-                                                   map.release(), bitmap.release(), image_end));
-
-  // VerifyImageAllocations() will be called later in Runtime::Init()
-  // as some class roots like ArtMethod::java_lang_reflect_ArtMethod_
-  // and ArtField::java_lang_reflect_ArtField_, which are used from
-  // Object::SizeOf() which VerifyImageAllocations() calls, are not
-  // set yet at this point.
-
-  space->oat_file_.reset(space->OpenOatFile(image_filename, error_msg));
-  if (space->oat_file_.get() == nullptr) {
-    DCHECK(!error_msg->empty());
-    return nullptr;
-  }
-  space->oat_file_non_owned_ = space->oat_file_.get();
-
-  if (validate_oat_file && !space->ValidateOatFile(error_msg)) {
-    DCHECK(!error_msg->empty());
-    return nullptr;
-  }
-
-  Runtime* runtime = Runtime::Current();
-  runtime->SetInstructionSet(space->oat_file_->GetOatHeader().GetInstructionSet());
-
-  runtime->SetResolutionMethod(image_header.GetImageMethod(ImageHeader::kResolutionMethod));
-  runtime->SetImtConflictMethod(image_header.GetImageMethod(ImageHeader::kImtConflictMethod));
-  runtime->SetImtUnimplementedMethod(
-      image_header.GetImageMethod(ImageHeader::kImtUnimplementedMethod));
-  runtime->SetCalleeSaveMethod(
-      image_header.GetImageMethod(ImageHeader::kCalleeSaveMethod), Runtime::kSaveAll);
-  runtime->SetCalleeSaveMethod(
-      image_header.GetImageMethod(ImageHeader::kRefsOnlySaveMethod), Runtime::kRefsOnly);
-  runtime->SetCalleeSaveMethod(
-      image_header.GetImageMethod(ImageHeader::kRefsAndArgsSaveMethod), Runtime::kRefsAndArgs);
-
-  if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
-    LOG(INFO) << "ImageSpace::Init exiting (" << PrettyDuration(NanoTime() - start_time)
-             << ") " << *space.get();
-  }
-  return space.release();
-}
-
-OatFile* ImageSpace::OpenOatFile(const char* image_path, std::string* error_msg) const {
-  const ImageHeader& image_header = GetImageHeader();
-  std::string oat_filename = ImageHeader::GetOatLocationFromImageLocation(image_path);
-
-  CHECK(image_header.GetOatDataBegin() != nullptr);
-
-  OatFile* oat_file = OatFile::Open(oat_filename,
-                                    oat_filename,
-                                    image_header.GetOatDataBegin(),
-                                    image_header.GetOatFileBegin(),
-                                    !Runtime::Current()->IsAotCompiler(),
-                                    nullptr,
-                                    error_msg);
-  if (oat_file == nullptr) {
-    *error_msg = StringPrintf("Failed to open oat file '%s' referenced from image %s: %s",
-                              oat_filename.c_str(), GetName(), error_msg->c_str());
-    return nullptr;
-  }
-  uint32_t oat_checksum = oat_file->GetOatHeader().GetChecksum();
-  uint32_t image_oat_checksum = image_header.GetOatChecksum();
-  if (oat_checksum != image_oat_checksum) {
-    *error_msg = StringPrintf("Failed to match oat file checksum 0x%x to expected oat checksum 0x%x"
-                              " in image %s", oat_checksum, image_oat_checksum, GetName());
-    return nullptr;
-  }
-  int32_t image_patch_delta = image_header.GetPatchDelta();
-  int32_t oat_patch_delta = oat_file->GetOatHeader().GetImagePatchDelta();
-  if (oat_patch_delta != image_patch_delta && !image_header.CompilePic()) {
-    // We should have already relocated by this point. Bail out.
-    *error_msg = StringPrintf("Failed to match oat file patch delta %d to expected patch delta %d "
-                              "in image %s", oat_patch_delta, image_patch_delta, GetName());
-    return nullptr;
-  }
-
-  return oat_file;
-}
-
-bool ImageSpace::ValidateOatFile(std::string* error_msg) const {
-  CHECK(oat_file_.get() != nullptr);
-  for (const OatFile::OatDexFile* oat_dex_file : oat_file_->GetOatDexFiles()) {
-    const std::string& dex_file_location = oat_dex_file->GetDexFileLocation();
-    uint32_t dex_file_location_checksum;
-    if (!DexFile::GetChecksum(dex_file_location.c_str(), &dex_file_location_checksum, error_msg)) {
-      *error_msg = StringPrintf("Failed to get checksum of dex file '%s' referenced by image %s: "
-                                "%s", dex_file_location.c_str(), GetName(), error_msg->c_str());
-      return false;
-    }
-    if (dex_file_location_checksum != oat_dex_file->GetDexFileLocationChecksum()) {
-      *error_msg = StringPrintf("ValidateOatFile found checksum mismatch between oat file '%s' and "
-                                "dex file '%s' (0x%x != 0x%x)",
-                                oat_file_->GetLocation().c_str(), dex_file_location.c_str(),
-                                oat_dex_file->GetDexFileLocationChecksum(),
-                                dex_file_location_checksum);
-      return false;
-    }
-  }
-  return true;
+std::unique_ptr<ImageSpace> ImageSpace::CreateFromAppImage(const char* image,
+                                                           const OatFile* oat_file,
+                                                           std::string* error_msg) {
+  return ImageSpaceLoader::Init(image,
+                                image,
+                                /*validate_oat_file*/false,
+                                oat_file,
+                                /*out*/error_msg);
 }
 
 const OatFile* ImageSpace::GetOatFile() const {
@@ -859,6 +1619,59 @@
       << ",name=\"" << GetName() << "\"]";
 }
 
+void ImageSpace::CreateMultiImageLocations(const std::string& input_image_file_name,
+                                           const std::string& boot_classpath,
+                                           std::vector<std::string>* image_file_names) {
+  DCHECK(image_file_names != nullptr);
+
+  std::vector<std::string> images;
+  Split(boot_classpath, ':', &images);
+
+  // Add the rest into the list. We have to adjust locations, possibly:
+  //
+  // For example, image_file_name is /a/b/c/d/e.art
+  //              images[0] is          f/c/d/e.art
+  // ----------------------------------------------
+  //              images[1] is          g/h/i/j.art  -> /a/b/h/i/j.art
+  const std::string& first_image = images[0];
+  // Length of common suffix.
+  size_t common = 0;
+  while (common < input_image_file_name.size() &&
+         common < first_image.size() &&
+         *(input_image_file_name.end() - common - 1) == *(first_image.end() - common - 1)) {
+    ++common;
+  }
+  // We want to replace the prefix of the input image with the prefix of the boot class path.
+  // This handles the case where the image file contains @ separators.
+  // Example image_file_name is oats/system@framework@boot.art
+  // images[0] is .../arm/boot.art
+  // means that the image name prefix will be oats/system@framework@
+  // so that the other images are openable.
+  const size_t old_prefix_length = first_image.size() - common;
+  const std::string new_prefix = input_image_file_name.substr(
+      0,
+      input_image_file_name.size() - common);
+
+  // Apply pattern to images[1] .. images[n].
+  for (size_t i = 1; i < images.size(); ++i) {
+    const std::string& image = images[i];
+    CHECK_GT(image.length(), old_prefix_length);
+    std::string suffix = image.substr(old_prefix_length);
+    image_file_names->push_back(new_prefix + suffix);
+  }
+}
+
+void ImageSpace::DumpSections(std::ostream& os) const {
+  const uint8_t* base = Begin();
+  const ImageHeader& header = GetImageHeader();
+  for (size_t i = 0; i < ImageHeader::kSectionCount; ++i) {
+    auto section_type = static_cast<ImageHeader::ImageSections>(i);
+    const ImageSection& section = header.GetImageSection(section_type);
+    os << section_type << " " << reinterpret_cast<const void*>(base + section.Offset())
+       << "-" << reinterpret_cast<const void*>(base + section.End()) << "\n";
+  }
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 9920742..534232d 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -35,7 +35,7 @@
     return kSpaceTypeImageSpace;
   }
 
-  // Create a Space from an image file for a specified instruction
+  // Create a boot image space from an image file for a specified instruction
   // set. Cannot be used for future allocation or collected.
   //
   // Create also opens the OatFile associated with the image file so
@@ -43,13 +43,17 @@
   // creation of the alloc space. The ReleaseOatFile will later be
   // used to transfer ownership of the OatFile to the ClassLinker when
   // it is initialized.
-  static ImageSpace* Create(const char* image, InstructionSet image_isa, std::string* error_msg)
+  static std::unique_ptr<ImageSpace> CreateBootImage(const char* image,
+                                     InstructionSet image_isa,
+                                     bool secondary_image,
+                                     std::string* error_msg)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Reads the image header from the specified image location for the
-  // instruction set image_isa or dies trying.
-  static ImageHeader* ReadImageHeaderOrDie(const char* image_location,
-                                           InstructionSet image_isa);
+  // Try to open an existing app image space.
+  static std::unique_ptr<ImageSpace> CreateFromAppImage(const char* image,
+                                                        const OatFile* oat_file,
+                                                        std::string* error_msg)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Reads the image header from the specified image location for the
   // instruction set image_isa. Returns null on failure, with
@@ -119,32 +123,52 @@
                                 bool* has_data,
                                 bool *is_global_cache);
 
- private:
-  // Tries to initialize an ImageSpace from the given image path,
-  // returning null on error.
+  // Use the input image filename to adapt the names in the given boot classpath to establish
+  // complete locations for secondary images.
+  static void CreateMultiImageLocations(const std::string& input_image_file_name,
+                                        const std::string& boot_classpath,
+                                        std::vector<std::string>* image_filenames);
+
+  // Return the end of the image which includes non-heap objects such as ArtMethods and ArtFields.
+  uint8_t* GetImageEnd() const {
+    return Begin() + GetImageHeader().GetImageSize();
+  }
+
+  // Return the start of the associated oat file.
+  uint8_t* GetOatFileBegin() const {
+    return GetImageHeader().GetOatFileBegin();
+  }
+
+  // Return the end of the associated oat file.
+  uint8_t* GetOatFileEnd() const {
+    return GetImageHeader().GetOatFileEnd();
+  }
+
+  void DumpSections(std::ostream& os) const;
+
+ protected:
+  // Tries to initialize an ImageSpace from the given image path, returning null on error.
   //
-  // If validate_oat_file is false (for /system), do not verify that
-  // image's OatFile is up-to-date relative to its DexFile
-  // inputs. Otherwise (for /data), validate the inputs and generate
-  // the OatFile in /data/dalvik-cache if necessary.
-  static ImageSpace* Init(const char* image_filename, const char* image_location,
-                          bool validate_oat_file, std::string* error_msg)
+  // If validate_oat_file is false (for /system), do not verify that image's OatFile is up-to-date
+  // relative to its DexFile inputs. Otherwise (for /data), validate the inputs and generate the
+  // OatFile in /data/dalvik-cache if necessary. If the oat_file is null, it uses the oat file from
+  // the image.
+  static std::unique_ptr<ImageSpace> Init(const char* image_filename,
+                                          const char* image_location,
+                                          bool validate_oat_file,
+                                          const OatFile* oat_file,
+                                          std::string* error_msg)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  OatFile* OpenOatFile(const char* image, std::string* error_msg) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  bool ValidateOatFile(std::string* error_msg) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  friend class Space;
-
   static Atomic<uint32_t> bitmap_index_;
 
   std::unique_ptr<accounting::ContinuousSpaceBitmap> live_bitmap_;
 
-  ImageSpace(const std::string& name, const char* image_location,
-             MemMap* mem_map, accounting::ContinuousSpaceBitmap* live_bitmap, uint8_t* end);
+  ImageSpace(const std::string& name,
+             const char* image_location,
+             MemMap* mem_map,
+             accounting::ContinuousSpaceBitmap* live_bitmap,
+             uint8_t* end);
 
   // The OatFile associated with the image during early startup to
   // reserve space contiguous to the image. It is later released to
@@ -157,6 +181,10 @@
 
   const std::string image_location_;
 
+  friend class ImageSpaceLoader;
+  friend class Space;
+
+ private:
   DISALLOW_COPY_AND_ASSIGN(ImageSpace);
 };
 
diff --git a/runtime/gc/space/image_space_fs.h b/runtime/gc/space/image_space_fs.h
new file mode 100644
index 0000000..fa941c0
--- /dev/null
+++ b/runtime/gc/space/image_space_fs.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_IMAGE_SPACE_FS_H_
+#define ART_RUNTIME_GC_SPACE_IMAGE_SPACE_FS_H_
+
+#include <dirent.h>
+#include <dlfcn.h>
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/stringprintf.h"
+#include "base/unix_file/fd_file.h"
+#include "globals.h"
+#include "os.h"
+#include "runtime.h"
+#include "utils.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+// This file contains helper code for ImageSpace. It has most of the file-system
+// related code, including handling A/B OTA.
+
+namespace impl {
+
+// Delete the directory and its (regular or link) contents. If the recurse flag is true, delete
+// sub-directories recursively.
+static void DeleteDirectoryContents(const std::string& dir, bool recurse) {
+  if (!OS::DirectoryExists(dir.c_str())) {
+    return;
+  }
+  DIR* c_dir = opendir(dir.c_str());
+  if (c_dir == nullptr) {
+    PLOG(WARNING) << "Unable to open " << dir << " to delete it's contents";
+    return;
+  }
+
+  for (struct dirent* de = readdir(c_dir); de != nullptr; de = readdir(c_dir)) {
+    const char* name = de->d_name;
+    if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) {
+      continue;
+    }
+    // We only want to delete regular files and symbolic links.
+    std::string file = StringPrintf("%s/%s", dir.c_str(), name);
+    if (de->d_type != DT_REG && de->d_type != DT_LNK) {
+      if (de->d_type == DT_DIR) {
+        if (recurse) {
+          DeleteDirectoryContents(file, recurse);
+          // Try to rmdir the directory.
+          if (rmdir(file.c_str()) != 0) {
+            PLOG(ERROR) << "Unable to rmdir " << file;
+          }
+        }
+      } else {
+        LOG(WARNING) << "Unexpected file type of " << std::hex << de->d_type << " encountered.";
+      }
+    } else {
+      // Try to unlink the file.
+      if (unlink(file.c_str()) != 0) {
+        PLOG(ERROR) << "Unable to unlink " << file;
+      }
+    }
+  }
+  CHECK_EQ(0, closedir(c_dir)) << "Unable to close directory.";
+}
+
+}  // namespace impl
+
+
+// We are relocating or generating the core image. We should get rid of everything. It is all
+// out-of-date. We also don't really care if this fails since it is just a convenience.
+// Adapted from prune_dex_cache(const char* subdir) in frameworks/native/cmds/installd/commands.c
+// Note this should only be used during first boot.
+static void PruneDalvikCache(InstructionSet isa) {
+  CHECK_NE(isa, kNone);
+  // Prune the base /data/dalvik-cache.
+  // Note: GetDalvikCache may return the empty string if the directory doesn't
+  // exist. It is safe to pass "" to DeleteDirectoryContents, so this is okay.
+  impl::DeleteDirectoryContents(GetDalvikCache("."), false);
+  // Prune /data/dalvik-cache/<isa>.
+  impl::DeleteDirectoryContents(GetDalvikCache(GetInstructionSetString(isa)), false);
+
+  // Be defensive. There should be a runtime created here, but this may be called in a test.
+  if (Runtime::Current() != nullptr) {
+    Runtime::Current()->SetPrunedDalvikCache(true);
+  }
+}
+
+// We write out an empty file to the zygote's ISA specific cache dir at the start of
+// every zygote boot and delete it when the boot completes. If we find a file already
+// present, it usually means the boot didn't complete. We wipe the entire dalvik
+// cache if that's the case.
+static void MarkZygoteStart(const InstructionSet isa, const uint32_t max_failed_boots) {
+  const std::string isa_subdir = GetDalvikCache(GetInstructionSetString(isa));
+  CHECK(!isa_subdir.empty()) << "Dalvik cache not found";
+  const std::string boot_marker = isa_subdir + "/.booting";
+  const char* file_name = boot_marker.c_str();
+
+  uint32_t num_failed_boots = 0;
+  std::unique_ptr<File> file(OS::OpenFileReadWrite(file_name));
+  if (file.get() == nullptr) {
+    file.reset(OS::CreateEmptyFile(file_name));
+
+    if (file.get() == nullptr) {
+      int saved_errno = errno;
+      PLOG(WARNING) << "Failed to create boot marker.";
+      if (saved_errno != ENOSPC) {
+        return;
+      }
+
+      LOG(WARNING) << "Pruning dalvik cache because of low-memory situation.";
+      impl::DeleteDirectoryContents(isa_subdir, false);
+
+      // Try once more.
+      file.reset(OS::OpenFileReadWrite(file_name));
+      if (file == nullptr) {
+        PLOG(WARNING) << "Failed to create boot marker.";
+        return;
+      }
+    }
+  } else {
+    if (!file->ReadFully(&num_failed_boots, sizeof(num_failed_boots))) {
+      PLOG(WARNING) << "Failed to read boot marker.";
+      file->Erase();
+      return;
+    }
+  }
+
+  if (max_failed_boots != 0 && num_failed_boots > max_failed_boots) {
+    LOG(WARNING) << "Incomplete boot detected. Pruning dalvik cache";
+    impl::DeleteDirectoryContents(isa_subdir, false);
+  }
+
+  ++num_failed_boots;
+  VLOG(startup) << "Number of failed boots on : " << boot_marker << " = " << num_failed_boots;
+
+  if (lseek(file->Fd(), 0, SEEK_SET) == -1) {
+    PLOG(WARNING) << "Failed to write boot marker.";
+    file->Erase();
+    return;
+  }
+
+  if (!file->WriteFully(&num_failed_boots, sizeof(num_failed_boots))) {
+    PLOG(WARNING) << "Failed to write boot marker.";
+    file->Erase();
+    return;
+  }
+
+  if (file->FlushCloseOrErase() != 0) {
+    PLOG(WARNING) << "Failed to flush boot marker.";
+  }
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_IMAGE_SPACE_FS_H_
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 2798b21..010f677 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -27,6 +27,7 @@
 #include "base/stl_util.h"
 #include "image.h"
 #include "os.h"
+#include "scoped_thread_state_change.h"
 #include "space-inl.h"
 #include "thread-inl.h"
 
@@ -190,6 +191,7 @@
   MutexLock mu(self, lock_);
   auto it = large_objects_.find(ptr);
   if (UNLIKELY(it == large_objects_.end())) {
+    ScopedObjectAccess soa(self);
     Runtime::Current()->GetHeap()->DumpSpaces(LOG(INTERNAL_FATAL));
     LOG(FATAL) << "Attempted to free large object " << ptr << " which was not live";
   }
@@ -521,7 +523,7 @@
   num_bytes_allocated_ += allocation_size;
   total_bytes_allocated_ += allocation_size;
   mirror::Object* obj = reinterpret_cast<mirror::Object*>(GetAddressForAllocationInfo(new_info));
-  // We always put our object at the start of the free block, there can not be another free block
+  // We always put our object at the start of the free block, there cannot be another free block
   // before it.
   if (kIsDebugBuild) {
     mprotect(obj, allocation_size, PROT_READ | PROT_WRITE);
diff --git a/runtime/gc/space/large_object_space_test.cc b/runtime/gc/space/large_object_space_test.cc
index 05b484a..ad38724 100644
--- a/runtime/gc/space/large_object_space_test.cc
+++ b/runtime/gc/space/large_object_space_test.cc
@@ -22,7 +22,7 @@
 namespace gc {
 namespace space {
 
-class LargeObjectSpaceTest : public SpaceTest {
+class LargeObjectSpaceTest : public SpaceTest<CommonRuntimeTest> {
  public:
   void LargeObjectTest();
 
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index 4e56c4a..c6b2870 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -39,7 +39,7 @@
     int rc = call args; \
     if (UNLIKELY(rc != 0)) { \
       errno = rc; \
-      PLOG(FATAL) << # call << " failed for " << what; \
+      PLOG(FATAL) << # call << " failed for " << (what); \
     } \
   } while (false)
 
diff --git a/runtime/gc/space/memory_tool_malloc_space-inl.h b/runtime/gc/space/memory_tool_malloc_space-inl.h
index ea8b8aa..6cb2465 100644
--- a/runtime/gc/space/memory_tool_malloc_space-inl.h
+++ b/runtime/gc/space/memory_tool_malloc_space-inl.h
@@ -240,9 +240,9 @@
                     kAdjustForRedzoneInAllocSize,
                     kUseObjSizeForUsable>::MemoryToolMallocSpace(
     MemMap* mem_map, size_t initial_size, Params... params) : S(mem_map, initial_size, params...) {
-  MEMORY_TOOL_MAKE_DEFINED(mem_map->Begin(), initial_size);
-  MEMORY_TOOL_MAKE_UNDEFINED(mem_map->Begin() + initial_size,
-                     mem_map->Size() - initial_size);
+  // Don't want to change the valgrind states of the mem map here as the allocator is already
+  // initialized at this point and that may interfere with what the allocator does internally. Note
+  // that the tail beyond the initial size is mprotected.
 }
 
 template <typename S,
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 9a2d0c6..2d71294 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -126,15 +126,20 @@
   } else {
     bool is_live_percent_valid = live_bytes_ != static_cast<size_t>(-1);
     if (is_live_percent_valid) {
-      uint live_percent = GetLivePercent();
+      DCHECK(IsInToSpace());
+      DCHECK(!IsLargeTail());
+      DCHECK_NE(live_bytes_, static_cast<size_t>(-1));
+      DCHECK_LE(live_bytes_, BytesAllocated());
+      const size_t bytes_allocated = RoundUp(BytesAllocated(), kRegionSize);
+      DCHECK_LE(live_bytes_, bytes_allocated);
       if (IsAllocated()) {
         // Side node: live_percent == 0 does not necessarily mean
         // there's no live objects due to rounding (there may be a
         // few).
-        result = live_percent < kEvaculateLivePercentThreshold;
+        result = live_bytes_ * 100U < kEvaculateLivePercentThreshold * bytes_allocated;
       } else {
         DCHECK(IsLarge());
-        result = live_percent == 0U;
+        result = live_bytes_ == 0U;
       }
     } else {
       result = false;
@@ -216,17 +221,6 @@
   evac_region_ = nullptr;
 }
 
-void RegionSpace::AssertAllRegionLiveBytesZeroOrCleared() {
-  if (kIsDebugBuild) {
-    MutexLock mu(Thread::Current(), region_lock_);
-    for (size_t i = 0; i < num_regions_; ++i) {
-      Region* r = &regions_[i];
-      size_t live_bytes = r->LiveBytes();
-      CHECK(live_bytes == 0U || live_bytes == static_cast<size_t>(-1)) << live_bytes;
-    }
-  }
-}
-
 void RegionSpace::LogFragmentationAllocFailure(std::ostream& os,
                                                size_t /* failed_alloc_bytes */) {
   size_t max_contiguous_allocation = 0;
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index 14e8005..823aa38 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -215,7 +215,16 @@
     reg->AddLiveBytes(alloc_size);
   }
 
-  void AssertAllRegionLiveBytesZeroOrCleared() REQUIRES(!region_lock_);
+  void AssertAllRegionLiveBytesZeroOrCleared() REQUIRES(!region_lock_) {
+    if (kIsDebugBuild) {
+      MutexLock mu(Thread::Current(), region_lock_);
+      for (size_t i = 0; i < num_regions_; ++i) {
+        Region* r = &regions_[i];
+        size_t live_bytes = r->LiveBytes();
+        CHECK(live_bytes == 0U || live_bytes == static_cast<size_t>(-1)) << live_bytes;
+      }
+    }
+  }
 
   void RecordAlloc(mirror::Object* ref) REQUIRES(!region_lock_);
   bool AllocNewTlab(Thread* self) REQUIRES(!region_lock_);
@@ -386,18 +395,6 @@
       return live_bytes_;
     }
 
-    uint GetLivePercent() const {
-      DCHECK(IsInToSpace());
-      DCHECK(!IsLargeTail());
-      DCHECK_NE(live_bytes_, static_cast<size_t>(-1));
-      DCHECK_LE(live_bytes_, BytesAllocated());
-      size_t bytes_allocated = RoundUp(BytesAllocated(), kRegionSize);
-      DCHECK_GE(bytes_allocated, 0U);
-      uint result = (live_bytes_ * 100U) / bytes_allocated;
-      DCHECK_LE(result, 100U);
-      return result;
-    }
-
     size_t BytesAllocated() const {
       if (IsLarge()) {
         DCHECK_LT(begin_ + kRegionSize, top_);
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index 49126d2..b016095 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -17,9 +17,6 @@
 
 #include "rosalloc_space-inl.h"
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
-#include "cutils/trace.h"
-
 #include "base/time_utils.h"
 #include "gc/accounting/card_table.h"
 #include "gc/accounting/space_bitmap-inl.h"
@@ -247,7 +244,10 @@
 size_t RosAllocSpace::Trim() {
   VLOG(heap) << "RosAllocSpace::Trim() ";
   {
-    MutexLock mu(Thread::Current(), lock_);
+    Thread* const self = Thread::Current();
+    // SOA required for Rosalloc::Trim() -> ArtRosAllocMoreCore() -> Heap::GetRosAllocSpace.
+    ScopedObjectAccess soa(self);
+    MutexLock mu(self, lock_);
     // Trim to release memory at the end of the space.
     rosalloc_->Trim();
   }
@@ -368,12 +368,18 @@
   SetFootprintLimit(footprint_limit);
 }
 
+void RosAllocSpace::DumpStats(std::ostream& os) {
+  ScopedSuspendAll ssa(__FUNCTION__);
+  rosalloc_->DumpStats(os);
+}
+
 }  // namespace space
 
 namespace allocator {
 
 // Callback from rosalloc when it needs to increase the footprint.
-void* ArtRosAllocMoreCore(allocator::RosAlloc* rosalloc, intptr_t increment) {
+void* ArtRosAllocMoreCore(allocator::RosAlloc* rosalloc, intptr_t increment)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   Heap* heap = Runtime::Current()->GetHeap();
   art::gc::space::RosAllocSpace* rosalloc_space = heap->GetRosAllocSpace(rosalloc);
   DCHECK(rosalloc_space != nullptr);
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index bc14738..b175fbf 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -144,6 +144,8 @@
     rosalloc_->LogFragmentationAllocFailure(os, failed_alloc_bytes);
   }
 
+  void DumpStats(std::ostream& os);
+
  protected:
   RosAllocSpace(MemMap* mem_map, size_t initial_size, const std::string& name,
                 allocator::RosAlloc* rosalloc, uint8_t* begin, uint8_t* end, uint8_t* limit,
diff --git a/runtime/gc/space/rosalloc_space_base_test.cc b/runtime/gc/space/rosalloc_space_base_test.cc
deleted file mode 100644
index 0c5be03..0000000
--- a/runtime/gc/space/rosalloc_space_base_test.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "space_test.h"
-
-namespace art {
-namespace gc {
-namespace space {
-
-MallocSpace* CreateRosAllocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
-                                 size_t capacity, uint8_t* requested_begin) {
-  return RosAllocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin,
-                               Runtime::Current()->GetHeap()->IsLowMemoryMode(), false);
-}
-
-TEST_SPACE_CREATE_FN_BASE(RosAllocSpace, CreateRosAllocSpace)
-
-
-}  // namespace space
-}  // namespace gc
-}  // namespace art
diff --git a/runtime/gc/space/space_create_test.cc b/runtime/gc/space/space_create_test.cc
new file mode 100644
index 0000000..170f927
--- /dev/null
+++ b/runtime/gc/space/space_create_test.cc
@@ -0,0 +1,364 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "space_test.h"
+
+#include "dlmalloc_space.h"
+#include "rosalloc_space.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+enum MallocSpaceType {
+  kMallocSpaceDlMalloc,
+  kMallocSpaceRosAlloc,
+};
+
+class SpaceCreateTest : public SpaceTest<CommonRuntimeTestWithParam<MallocSpaceType>> {
+ public:
+  MallocSpace* CreateSpace(const std::string& name,
+                           size_t initial_size,
+                           size_t growth_limit,
+                           size_t capacity,
+                           uint8_t* requested_begin) {
+    const MallocSpaceType type = GetParam();
+    if (type == kMallocSpaceDlMalloc) {
+      return DlMallocSpace::Create(name,
+                                   initial_size,
+                                   growth_limit,
+                                   capacity,
+                                   requested_begin,
+                                   false);
+    }
+    DCHECK_EQ(static_cast<uint32_t>(type), static_cast<uint32_t>(kMallocSpaceRosAlloc));
+    return RosAllocSpace::Create(name,
+                                 initial_size,
+                                 growth_limit,
+                                 capacity,
+                                 requested_begin,
+                                 Runtime::Current()->GetHeap()->IsLowMemoryMode(),
+                                 false);
+  }
+};
+
+TEST_P(SpaceCreateTest, InitTestBody) {
+  // This will lead to error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
+  {
+    // Init < max == growth
+    std::unique_ptr<Space> space(CreateSpace("test", 16 * MB, 32 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Init == max == growth
+    space.reset(CreateSpace("test", 16 * MB, 16 * MB, 16 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Init > max == growth
+    space.reset(CreateSpace("test", 32 * MB, 16 * MB, 16 * MB, nullptr));
+    EXPECT_TRUE(space == nullptr);
+    // Growth == init < max
+    space.reset(CreateSpace("test", 16 * MB, 16 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Growth < init < max
+    space.reset(CreateSpace("test", 16 * MB, 8 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space == nullptr);
+    // Init < growth < max
+    space.reset(CreateSpace("test", 8 * MB, 16 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Init < max < growth
+    space.reset(CreateSpace("test", 8 * MB, 32 * MB, 16 * MB, nullptr));
+    EXPECT_TRUE(space == nullptr);
+  }
+}
+
+// TODO: This test is not very good, we should improve it.
+// The test should do more allocations before the creation of the ZygoteSpace, and then do
+// allocations after the ZygoteSpace is created. The test should also do some GCs to ensure that
+// the GC works with the ZygoteSpace.
+TEST_P(SpaceCreateTest, ZygoteSpaceTestBody) {
+  size_t dummy;
+  MallocSpace* space(CreateSpace("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
+  ASSERT_TRUE(space != nullptr);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space);
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Succeeds, fits without adjusting the footprint limit.
+  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
+  StackHandleScope<3> hs(soa.Self());
+  MutableHandle<mirror::Object> ptr1(hs.NewHandle(Alloc(space,
+                                                        self,
+                                                        1 * MB,
+                                                        &ptr1_bytes_allocated,
+                                                        &ptr1_usable_size,
+                                                        &ptr1_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
+  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr2 == nullptr);
+
+  // Succeeds, adjusts the footprint.
+  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
+  MutableHandle<mirror::Object> ptr3(hs.NewHandle(AllocWithGrowth(space,
+                                                                  self,
+                                                                  8 * MB,
+                                                                  &ptr3_bytes_allocated,
+                                                                  &ptr3_usable_size,
+                                                                  &ptr3_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
+  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(8U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
+  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr4 == nullptr);
+
+  // Also fails, requires a higher allowed footprint.
+  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr5 == nullptr);
+
+  // Release some memory.
+  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
+  EXPECT_EQ(free3, ptr3_bytes_allocated);
+  EXPECT_EQ(free3, space->Free(self, ptr3.Assign(nullptr)));
+  EXPECT_LE(8U * MB, free3);
+
+  // Succeeds, now that memory has been freed.
+  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
+  Handle<mirror::Object> ptr6(hs.NewHandle(AllocWithGrowth(space,
+                                                           self,
+                                                           9 * MB,
+                                                           &ptr6_bytes_allocated,
+                                                           &ptr6_usable_size,
+                                                           &ptr6_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr6.Get() != nullptr);
+  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
+  EXPECT_LE(9U * MB, ptr6_usable_size);
+  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
+  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
+
+  // Final clean up.
+  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
+  EXPECT_LE(1U * MB, free1);
+
+  // Make sure that the zygote space isn't directly at the start of the space.
+  EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr, &dummy) != nullptr);
+
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  space::Space* old_space = space;
+  {
+    ScopedThreadSuspension sts(self, kSuspended);
+    ScopedSuspendAll ssa("Add image space");
+    heap->RemoveSpace(old_space);
+  }
+  heap->RevokeAllThreadLocalBuffers();
+  space::ZygoteSpace* zygote_space = space->CreateZygoteSpace("alloc space",
+                                                              heap->IsLowMemoryMode(),
+                                                              &space);
+  delete old_space;
+  // Add the zygote space.
+  AddSpace(zygote_space, false);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space, false);
+
+  // Succeeds, fits without adjusting the footprint limit.
+  ptr1.Assign(Alloc(space,
+                    self,
+                    1 * MB,
+                    &ptr1_bytes_allocated,
+                    &ptr1_usable_size,
+                    &ptr1_bytes_tl_bulk_allocated));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
+  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr2 == nullptr);
+
+  // Succeeds, adjusts the footprint.
+  ptr3.Assign(AllocWithGrowth(space,
+                              self,
+                              2 * MB,
+                              &ptr3_bytes_allocated,
+                              &ptr3_usable_size,
+                              &ptr3_bytes_tl_bulk_allocated));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
+  EXPECT_LE(2U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(2U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
+  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
+  space->Free(self, ptr3.Assign(nullptr));
+
+  // Final clean up.
+  free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
+  EXPECT_LE(1U * MB, free1);
+}
+
+TEST_P(SpaceCreateTest, AllocAndFreeTestBody) {
+  size_t dummy = 0;
+  MallocSpace* space(CreateSpace("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
+  ASSERT_TRUE(space != nullptr);
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space);
+
+  // Succeeds, fits without adjusting the footprint limit.
+  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
+  StackHandleScope<3> hs(soa.Self());
+  MutableHandle<mirror::Object> ptr1(hs.NewHandle(Alloc(space,
+                                                        self,
+                                                        1 * MB,
+                                                        &ptr1_bytes_allocated,
+                                                        &ptr1_usable_size,
+                                                        &ptr1_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
+  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr2 == nullptr);
+
+  // Succeeds, adjusts the footprint.
+  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
+  MutableHandle<mirror::Object> ptr3(hs.NewHandle(AllocWithGrowth(space,
+                                                                  self,
+                                                                  8 * MB,
+                                                                  &ptr3_bytes_allocated,
+                                                                  &ptr3_usable_size,
+                                                                  &ptr3_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
+  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(8U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
+  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr4 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr4 == nullptr);
+
+  // Also fails, requires a higher allowed footprint.
+  mirror::Object* ptr5 = AllocWithGrowth(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr5 == nullptr);
+
+  // Release some memory.
+  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
+  EXPECT_EQ(free3, ptr3_bytes_allocated);
+  space->Free(self, ptr3.Assign(nullptr));
+  EXPECT_LE(8U * MB, free3);
+
+  // Succeeds, now that memory has been freed.
+  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
+  Handle<mirror::Object> ptr6(hs.NewHandle(AllocWithGrowth(space,
+                                                           self,
+                                                           9 * MB,
+                                                           &ptr6_bytes_allocated,
+                                                           &ptr6_usable_size,
+                                                           &ptr6_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr6.Get() != nullptr);
+  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
+  EXPECT_LE(9U * MB, ptr6_usable_size);
+  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
+  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
+
+  // Final clean up.
+  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
+  EXPECT_LE(1U * MB, free1);
+}
+
+TEST_P(SpaceCreateTest, AllocAndFreeListTestBody) {
+  MallocSpace* space(CreateSpace("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
+  ASSERT_TRUE(space != nullptr);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space);
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Succeeds, fits without adjusting the max allowed footprint.
+  mirror::Object* lots_of_objects[1024];
+  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
+    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
+    size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray();
+    lots_of_objects[i] = Alloc(space,
+                               self,
+                               size_of_zero_length_byte_array,
+                               &allocation_size,
+                               &usable_size,
+                               &bytes_tl_bulk_allocated);
+    EXPECT_TRUE(lots_of_objects[i] != nullptr);
+    size_t computed_usable_size;
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
+    EXPECT_EQ(usable_size, computed_usable_size);
+    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
+                bytes_tl_bulk_allocated >= allocation_size);
+  }
+
+  // Release memory.
+  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
+
+  // Succeeds, fits by adjusting the max allowed footprint.
+  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
+    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
+    lots_of_objects[i] = AllocWithGrowth(space,
+                                         self,
+                                         1024,
+                                         &allocation_size,
+                                         &usable_size,
+                                         &bytes_tl_bulk_allocated);
+    EXPECT_TRUE(lots_of_objects[i] != nullptr);
+    size_t computed_usable_size;
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
+    EXPECT_EQ(usable_size, computed_usable_size);
+    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
+                bytes_tl_bulk_allocated >= allocation_size);
+  }
+
+  // Release memory.
+  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
+}
+
+INSTANTIATE_TEST_CASE_P(CreateRosAllocSpace,
+                        SpaceCreateTest,
+                        testing::Values(kMallocSpaceRosAlloc));
+INSTANTIATE_TEST_CASE_P(CreateDlMallocSpace,
+                        SpaceCreateTest,
+                        testing::Values(kMallocSpaceDlMalloc));
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index 4d2db11..20ef44a 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -27,25 +27,28 @@
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
 #include "scoped_thread_state_change.h"
+#include "thread_list.h"
 #include "zygote_space.h"
 
 namespace art {
 namespace gc {
 namespace space {
 
-class SpaceTest : public CommonRuntimeTest {
+template <class Super>
+class SpaceTest : public Super {
  public:
-  jobject byte_array_class_;
-
-  SpaceTest() : byte_array_class_(nullptr) {
-  }
+  jobject byte_array_class_ = nullptr;
 
   void AddSpace(ContinuousSpace* space, bool revoke = true) {
     Heap* heap = Runtime::Current()->GetHeap();
     if (revoke) {
       heap->RevokeAllThreadLocalBuffers();
     }
-    heap->AddSpace(space);
+    {
+      ScopedThreadStateChange sts(Thread::Current(), kSuspended);
+      ScopedSuspendAll ssa("Add image space");
+      heap->AddSpace(space);
+    }
     heap->SetSpaceAsDefault(space);
   }
 
@@ -62,13 +65,19 @@
     return reinterpret_cast<mirror::Class*>(self->DecodeJObject(byte_array_class_));
   }
 
-  mirror::Object* Alloc(space::MallocSpace* alloc_space, Thread* self, size_t bytes,
-                        size_t* bytes_allocated, size_t* usable_size,
+  mirror::Object* Alloc(space::MallocSpace* alloc_space,
+                        Thread* self,
+                        size_t bytes,
+                        size_t* bytes_allocated,
+                        size_t* usable_size,
                         size_t* bytes_tl_bulk_allocated)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     StackHandleScope<1> hs(self);
     Handle<mirror::Class> byte_array_class(hs.NewHandle(GetByteArrayClass(self)));
-    mirror::Object* obj = alloc_space->Alloc(self, bytes, bytes_allocated, usable_size,
+    mirror::Object* obj = alloc_space->Alloc(self,
+                                             bytes,
+                                             bytes_allocated,
+                                             usable_size,
                                              bytes_tl_bulk_allocated);
     if (obj != nullptr) {
       InstallClass(obj, byte_array_class.Get(), bytes);
@@ -76,8 +85,11 @@
     return obj;
   }
 
-  mirror::Object* AllocWithGrowth(space::MallocSpace* alloc_space, Thread* self, size_t bytes,
-                                  size_t* bytes_allocated, size_t* usable_size,
+  mirror::Object* AllocWithGrowth(space::MallocSpace* alloc_space,
+                                  Thread* self,
+                                  size_t bytes,
+                                  size_t* bytes_allocated,
+                                  size_t* usable_size,
                                   size_t* bytes_tl_bulk_allocated)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     StackHandleScope<1> hs(self);
@@ -117,10 +129,6 @@
 
   typedef MallocSpace* (*CreateSpaceFn)(const std::string& name, size_t initial_size, size_t growth_limit,
                                         size_t capacity, uint8_t* requested_begin);
-  void InitTestBody(CreateSpaceFn create_space);
-  void ZygoteSpaceTestBody(CreateSpaceFn create_space);
-  void AllocAndFreeTestBody(CreateSpaceFn create_space);
-  void AllocAndFreeListTestBody(CreateSpaceFn create_space);
 
   void SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t object_size,
                                            int round, size_t growth_limit);
@@ -132,278 +140,11 @@
   return *seed;
 }
 
-void SpaceTest::InitTestBody(CreateSpaceFn create_space) {
-  // This will lead to error messages in the log.
-  ScopedLogSeverity sls(LogSeverity::FATAL);
-
-  {
-    // Init < max == growth
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 32 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Init == max == growth
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 16 * MB, 16 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Init > max == growth
-    std::unique_ptr<Space> space(create_space("test", 32 * MB, 16 * MB, 16 * MB, nullptr));
-    EXPECT_TRUE(space.get() == nullptr);
-  }
-  {
-    // Growth == init < max
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 16 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Growth < init < max
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 8 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() == nullptr);
-  }
-  {
-    // Init < growth < max
-    std::unique_ptr<Space> space(create_space("test", 8 * MB, 16 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Init < max < growth
-    std::unique_ptr<Space> space(create_space("test", 8 * MB, 32 * MB, 16 * MB, nullptr));
-    EXPECT_TRUE(space.get() == nullptr);
-  }
-}
-
-// TODO: This test is not very good, we should improve it.
-// The test should do more allocations before the creation of the ZygoteSpace, and then do
-// allocations after the ZygoteSpace is created. The test should also do some GCs to ensure that
-// the GC works with the ZygoteSpace.
-void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) {
-  size_t dummy;
-  MallocSpace* space(create_space("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
-  ASSERT_TRUE(space != nullptr);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space);
-  Thread* self = Thread::Current();
-  ScopedObjectAccess soa(self);
-
-  // Succeeds, fits without adjusting the footprint limit.
-  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
-  StackHandleScope<3> hs(soa.Self());
-  MutableHandle<mirror::Object> ptr1(
-      hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size,
-                         &ptr1_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr1.Get() != nullptr);
-  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
-  EXPECT_LE(1U * MB, ptr1_usable_size);
-  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
-  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr2 == nullptr);
-
-  // Succeeds, adjusts the footprint.
-  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
-  MutableHandle<mirror::Object> ptr3(
-      hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size,
-                                   &ptr3_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr3.Get() != nullptr);
-  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
-  EXPECT_LE(8U * MB, ptr3_usable_size);
-  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
-  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr4 == nullptr);
-
-  // Also fails, requires a higher allowed footprint.
-  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr5 == nullptr);
-
-  // Release some memory.
-  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
-  EXPECT_EQ(free3, ptr3_bytes_allocated);
-  EXPECT_EQ(free3, space->Free(self, ptr3.Assign(nullptr)));
-  EXPECT_LE(8U * MB, free3);
-
-  // Succeeds, now that memory has been freed.
-  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
-  Handle<mirror::Object> ptr6(
-      hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size,
-                                   &ptr6_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr6.Get() != nullptr);
-  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
-  EXPECT_LE(9U * MB, ptr6_usable_size);
-  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
-  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
-
-  // Final clean up.
-  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
-  space->Free(self, ptr1.Assign(nullptr));
-  EXPECT_LE(1U * MB, free1);
-
-  // Make sure that the zygote space isn't directly at the start of the space.
-  EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr, &dummy) != nullptr);
-
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  space::Space* old_space = space;
-  heap->RemoveSpace(old_space);
-  heap->RevokeAllThreadLocalBuffers();
-  space::ZygoteSpace* zygote_space = space->CreateZygoteSpace("alloc space",
-                                                              heap->IsLowMemoryMode(),
-                                                              &space);
-  delete old_space;
-  // Add the zygote space.
-  AddSpace(zygote_space, false);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space, false);
-
-  // Succeeds, fits without adjusting the footprint limit.
-  ptr1.Assign(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size,
-                    &ptr1_bytes_tl_bulk_allocated));
-  EXPECT_TRUE(ptr1.Get() != nullptr);
-  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
-  EXPECT_LE(1U * MB, ptr1_usable_size);
-  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
-  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr2 == nullptr);
-
-  // Succeeds, adjusts the footprint.
-  ptr3.Assign(AllocWithGrowth(space, self, 2 * MB, &ptr3_bytes_allocated, &ptr3_usable_size,
-                              &ptr3_bytes_tl_bulk_allocated));
-  EXPECT_TRUE(ptr3.Get() != nullptr);
-  EXPECT_LE(2U * MB, ptr3_bytes_allocated);
-  EXPECT_LE(2U * MB, ptr3_usable_size);
-  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
-  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
-  space->Free(self, ptr3.Assign(nullptr));
-
-  // Final clean up.
-  free1 = space->AllocationSize(ptr1.Get(), nullptr);
-  space->Free(self, ptr1.Assign(nullptr));
-  EXPECT_LE(1U * MB, free1);
-}
-
-void SpaceTest::AllocAndFreeTestBody(CreateSpaceFn create_space) {
-  size_t dummy = 0;
-  MallocSpace* space(create_space("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
-  ASSERT_TRUE(space != nullptr);
-  Thread* self = Thread::Current();
-  ScopedObjectAccess soa(self);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space);
-
-  // Succeeds, fits without adjusting the footprint limit.
-  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
-  StackHandleScope<3> hs(soa.Self());
-  MutableHandle<mirror::Object> ptr1(
-      hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size,
-                         &ptr1_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr1.Get() != nullptr);
-  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
-  EXPECT_LE(1U * MB, ptr1_usable_size);
-  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
-  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr2 == nullptr);
-
-  // Succeeds, adjusts the footprint.
-  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
-  MutableHandle<mirror::Object> ptr3(
-      hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size,
-                                   &ptr3_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr3.Get() != nullptr);
-  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
-  EXPECT_LE(8U * MB, ptr3_usable_size);
-  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
-  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr4 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr4 == nullptr);
-
-  // Also fails, requires a higher allowed footprint.
-  mirror::Object* ptr5 = AllocWithGrowth(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr5 == nullptr);
-
-  // Release some memory.
-  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
-  EXPECT_EQ(free3, ptr3_bytes_allocated);
-  space->Free(self, ptr3.Assign(nullptr));
-  EXPECT_LE(8U * MB, free3);
-
-  // Succeeds, now that memory has been freed.
-  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
-  Handle<mirror::Object> ptr6(
-      hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size,
-                                   &ptr6_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr6.Get() != nullptr);
-  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
-  EXPECT_LE(9U * MB, ptr6_usable_size);
-  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
-  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
-
-  // Final clean up.
-  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
-  space->Free(self, ptr1.Assign(nullptr));
-  EXPECT_LE(1U * MB, free1);
-}
-
-void SpaceTest::AllocAndFreeListTestBody(CreateSpaceFn create_space) {
-  MallocSpace* space(create_space("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
-  ASSERT_TRUE(space != nullptr);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space);
-  Thread* self = Thread::Current();
-  ScopedObjectAccess soa(self);
-
-  // Succeeds, fits without adjusting the max allowed footprint.
-  mirror::Object* lots_of_objects[1024];
-  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
-    size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray();
-    lots_of_objects[i] = Alloc(space, self, size_of_zero_length_byte_array, &allocation_size,
-                               &usable_size, &bytes_tl_bulk_allocated);
-    EXPECT_TRUE(lots_of_objects[i] != nullptr);
-    size_t computed_usable_size;
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
-    EXPECT_EQ(usable_size, computed_usable_size);
-    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
-                bytes_tl_bulk_allocated >= allocation_size);
-  }
-
-  // Release memory.
-  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
-
-  // Succeeds, fits by adjusting the max allowed footprint.
-  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
-    lots_of_objects[i] = AllocWithGrowth(space, self, 1024, &allocation_size, &usable_size,
-                                         &bytes_tl_bulk_allocated);
-    EXPECT_TRUE(lots_of_objects[i] != nullptr);
-    size_t computed_usable_size;
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
-    EXPECT_EQ(usable_size, computed_usable_size);
-    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
-                bytes_tl_bulk_allocated >= allocation_size);
-  }
-
-  // Release memory.
-  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
-}
-
-void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t object_size,
-                                                    int round, size_t growth_limit) {
+template <class Super>
+void SpaceTest<Super>::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space,
+                                                           intptr_t object_size,
+                                                           int round,
+                                                           size_t growth_limit) {
   if (((object_size > 0 && object_size >= static_cast<intptr_t>(growth_limit))) ||
       ((object_size < 0 && -object_size >= static_cast<intptr_t>(growth_limit)))) {
     // No allocation can succeed
@@ -576,7 +317,9 @@
   EXPECT_LE(space->Size(), growth_limit);
 }
 
-void SpaceTest::SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size, CreateSpaceFn create_space) {
+template <class Super>
+void SpaceTest<Super>::SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size,
+                                                             CreateSpaceFn create_space) {
   if (object_size < SizeOfZeroLengthByteArray()) {
     // Too small for the object layout/model.
     return;
@@ -614,25 +357,8 @@
     SizeFootPrintGrowthLimitAndTrimDriver(-size, spaceFn); \
   }
 
-#define TEST_SPACE_CREATE_FN_BASE(spaceName, spaceFn) \
-  class spaceName##BaseTest : public SpaceTest { \
-  }; \
-  \
-  TEST_F(spaceName##BaseTest, Init) { \
-    InitTestBody(spaceFn); \
-  } \
-  TEST_F(spaceName##BaseTest, ZygoteSpace) { \
-    ZygoteSpaceTestBody(spaceFn); \
-  } \
-  TEST_F(spaceName##BaseTest, AllocAndFree) { \
-    AllocAndFreeTestBody(spaceFn); \
-  } \
-  TEST_F(spaceName##BaseTest, AllocAndFreeList) { \
-    AllocAndFreeListTestBody(spaceFn); \
-  }
-
 #define TEST_SPACE_CREATE_FN_STATIC(spaceName, spaceFn) \
-  class spaceName##StaticTest : public SpaceTest { \
+  class spaceName##StaticTest : public SpaceTest<CommonRuntimeTest> { \
   }; \
   \
   TEST_SizeFootPrintGrowthLimitAndTrimStatic(12B, spaceName, spaceFn, 12) \
@@ -648,7 +374,7 @@
   TEST_SizeFootPrintGrowthLimitAndTrimStatic(8MB, spaceName, spaceFn, 8 * MB)
 
 #define TEST_SPACE_CREATE_FN_RANDOM(spaceName, spaceFn) \
-  class spaceName##RandomTest : public SpaceTest { \
+  class spaceName##RandomTest : public SpaceTest<CommonRuntimeTest> { \
   }; \
   \
   TEST_SizeFootPrintGrowthLimitAndTrimRandom(16B, spaceName, spaceFn, 16) \
diff --git a/runtime/gc_map.h b/runtime/gc_map.h
deleted file mode 100644
index b4ccdd6..0000000
--- a/runtime/gc_map.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_GC_MAP_H_
-#define ART_RUNTIME_GC_MAP_H_
-
-#include <stdint.h>
-
-#include "base/logging.h"
-#include "base/macros.h"
-
-namespace art {
-
-// Lightweight wrapper for native PC offset to reference bit maps.
-class NativePcOffsetToReferenceMap {
- public:
-  explicit NativePcOffsetToReferenceMap(const uint8_t* data) : data_(data) {
-    CHECK(data_ != nullptr);
-  }
-
-  // The number of entries in the table.
-  size_t NumEntries() const {
-    return data_[2] | (data_[3] << 8);
-  }
-
-  // Return address of bitmap encoding what are live references.
-  const uint8_t* GetBitMap(size_t index) const {
-    size_t entry_offset = index * EntryWidth();
-    return &Table()[entry_offset + NativeOffsetWidth()];
-  }
-
-  // Get the native PC encoded in the table at the given index.
-  uintptr_t GetNativePcOffset(size_t index) const {
-    size_t entry_offset = index * EntryWidth();
-    uintptr_t result = 0;
-    for (size_t i = 0; i < NativeOffsetWidth(); ++i) {
-      result |= Table()[entry_offset + i] << (i * 8);
-    }
-    return result;
-  }
-
-  // Does the given offset have an entry?
-  bool HasEntry(uintptr_t native_pc_offset) {
-    for (size_t i = 0; i < NumEntries(); ++i) {
-      if (GetNativePcOffset(i) == native_pc_offset) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  // Finds the bitmap associated with the native pc offset.
-  const uint8_t* FindBitMap(uintptr_t native_pc_offset) {
-    size_t num_entries = NumEntries();
-    size_t index = Hash(native_pc_offset) % num_entries;
-    size_t misses = 0;
-    while (GetNativePcOffset(index) != native_pc_offset) {
-      index = (index + 1) % num_entries;
-      misses++;
-      DCHECK_LT(misses, num_entries) << "Failed to find offset: " << native_pc_offset;
-    }
-    return GetBitMap(index);
-  }
-
-  static uint32_t Hash(uint32_t native_offset) {
-    uint32_t hash = native_offset;
-    hash ^= (hash >> 20) ^ (hash >> 12);
-    hash ^= (hash >> 7) ^ (hash >> 4);
-    return hash;
-  }
-
-  // The number of bytes used to encode registers.
-  size_t RegWidth() const {
-    return (static_cast<size_t>(data_[0]) | (static_cast<size_t>(data_[1]) << 8)) >> 3;
-  }
-
- private:
-  // Skip the size information at the beginning of data.
-  const uint8_t* Table() const {
-    return data_ + 4;
-  }
-
-  // Number of bytes used to encode a native offset.
-  size_t NativeOffsetWidth() const {
-    return data_[0] & 7;
-  }
-
-  // The width of an entry in the table.
-  size_t EntryWidth() const {
-    return NativeOffsetWidth() + RegWidth();
-  }
-
-  const uint8_t* const data_;  // The header and table data
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_GC_MAP_H_
diff --git a/runtime/gc_root.h b/runtime/gc_root.h
index 477e67b..0304d0d 100644
--- a/runtime/gc_root.h
+++ b/runtime/gc_root.h
@@ -195,10 +195,11 @@
     return root_.IsNull();
   }
 
-  ALWAYS_INLINE GcRoot(MirrorType* ref = nullptr) SHARED_REQUIRES(Locks::mutator_lock_);
+  ALWAYS_INLINE GcRoot() {}
+  explicit ALWAYS_INLINE GcRoot(MirrorType* ref) SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
-  // Root visitors take pointers to root_ and place the min CompressedReference** arrays. We use a
+  // Root visitors take pointers to root_ and place them in CompressedReference** arrays. We use a
   // CompressedReference<mirror::Object> here since it violates strict aliasing requirements to
   // cast CompressedReference<MirrorType>* to CompressedReference<mirror::Object>*.
   mutable mirror::CompressedReference<mirror::Object> root_;
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
new file mode 100644
index 0000000..40b71c4
--- /dev/null
+++ b/runtime/generated/asm_support_gen.h
@@ -0,0 +1,141 @@
+
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+#define ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+
+// This file has been auto-generated by cpp-define-generator; do not edit directly.
+
+#define STACK_REFERENCE_SIZE 0x4
+DEFINE_CHECK_EQ(static_cast<size_t>(STACK_REFERENCE_SIZE), (static_cast<size_t>(sizeof(art::StackReference<art::mirror::Object>))))
+#define COMPRESSED_REFERENCE_SIZE 0x4
+DEFINE_CHECK_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE), (static_cast<size_t>(sizeof(art::mirror::CompressedReference<art::mirror::Object>))))
+#define COMPRESSED_REFERENCE_SIZE_SHIFT 0x2
+DEFINE_CHECK_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE_SHIFT), (static_cast<size_t>(art::WhichPowerOf2(sizeof(art::mirror::CompressedReference<art::mirror::Object>)))))
+#define RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveAllCalleeSaves))))
+#define RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET 0x8
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveRefsOnly))))
+#define RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET 0x10
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveRefsAndArgs))))
+#define RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 0x18
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveEverything))))
+#define THREAD_FLAGS_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_FLAGS_OFFSET), (static_cast<int32_t>(art::Thread:: ThreadFlagsOffset<art::kRuntimePointerSize>().Int32Value())))
+#define THREAD_ID_OFFSET 12
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_ID_OFFSET), (static_cast<int32_t>(art::Thread:: ThinLockIdOffset<art::kRuntimePointerSize>().Int32Value())))
+#define THREAD_IS_GC_MARKING_OFFSET 52
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_IS_GC_MARKING_OFFSET), (static_cast<int32_t>(art::Thread:: IsGcMarkingOffset<art::kRuntimePointerSize>().Int32Value())))
+#define THREAD_CARD_TABLE_OFFSET 128
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_CARD_TABLE_OFFSET), (static_cast<int32_t>(art::Thread:: CardTableOffset<art::kRuntimePointerSize>().Int32Value())))
+#define CODEITEM_INSNS_OFFSET 16
+DEFINE_CHECK_EQ(static_cast<int32_t>(CODEITEM_INSNS_OFFSET), (static_cast<int32_t>(__builtin_offsetof(art::DexFile::CodeItem, insns_))))
+#define MIRROR_OBJECT_CLASS_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<int32_t>(MIRROR_OBJECT_CLASS_OFFSET), (static_cast<int32_t>(art::mirror::Object:: ClassOffset().Int32Value())))
+#define MIRROR_OBJECT_LOCK_WORD_OFFSET 4
+DEFINE_CHECK_EQ(static_cast<int32_t>(MIRROR_OBJECT_LOCK_WORD_OFFSET), (static_cast<int32_t>(art::mirror::Object:: MonitorOffset().Int32Value())))
+#define MIRROR_CLASS_STATUS_INITIALIZED 0xa
+DEFINE_CHECK_EQ(static_cast<uint32_t>(MIRROR_CLASS_STATUS_INITIALIZED), (static_cast<uint32_t>((art::mirror::Class::kStatusInitialized))))
+#define ACCESS_FLAGS_CLASS_IS_FINALIZABLE 0x80000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), (static_cast<uint32_t>((art::kAccClassIsFinalizable))))
+#define ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT 0x1f
+DEFINE_CHECK_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT), (static_cast<uint32_t>((art::MostSignificantBit(art::kAccClassIsFinalizable)))))
+#define ART_METHOD_DEX_CACHE_METHODS_OFFSET_32 20
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DEX_CACHE_METHODS_OFFSET_32), (static_cast<int32_t>(art::ArtMethod:: DexCacheResolvedMethodsOffset(art::PointerSize::k32).Int32Value())))
+#define ART_METHOD_DEX_CACHE_METHODS_OFFSET_64 24
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DEX_CACHE_METHODS_OFFSET_64), (static_cast<int32_t>(art::ArtMethod:: DexCacheResolvedMethodsOffset(art::PointerSize::k64).Int32Value())))
+#define ART_METHOD_DEX_CACHE_TYPES_OFFSET_32 24
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DEX_CACHE_TYPES_OFFSET_32), (static_cast<int32_t>(art::ArtMethod:: DexCacheResolvedTypesOffset(art::PointerSize::k32).Int32Value())))
+#define ART_METHOD_DEX_CACHE_TYPES_OFFSET_64 32
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DEX_CACHE_TYPES_OFFSET_64), (static_cast<int32_t>(art::ArtMethod:: DexCacheResolvedTypesOffset(art::PointerSize::k64).Int32Value())))
+#define ART_METHOD_JNI_OFFSET_32 28
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_JNI_OFFSET_32), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromJniOffset(art::PointerSize::k32).Int32Value())))
+#define ART_METHOD_JNI_OFFSET_64 40
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_JNI_OFFSET_64), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromJniOffset(art::PointerSize::k64).Int32Value())))
+#define ART_METHOD_QUICK_CODE_OFFSET_32 32
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_QUICK_CODE_OFFSET_32), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k32).Int32Value())))
+#define ART_METHOD_QUICK_CODE_OFFSET_64 48
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_QUICK_CODE_OFFSET_64), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k64).Int32Value())))
+#define ART_METHOD_DECLARING_CLASS_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DECLARING_CLASS_OFFSET), (static_cast<int32_t>(art::ArtMethod:: DeclaringClassOffset().Int32Value())))
+#define DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET 40
+DEFINE_CHECK_EQ(static_cast<int32_t>(DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET), (static_cast<int32_t>(art::mirror::Class:: DexCacheStringsOffset().Int32Value())))
+#define STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT 3
+DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT), (static_cast<int32_t>(art::WhichPowerOf2(sizeof(art::mirror::StringDexCachePair)))))
+#define STRING_DEX_CACHE_SIZE_MINUS_ONE 1023
+DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_SIZE_MINUS_ONE), (static_cast<int32_t>(art::mirror::DexCache::kDexCacheStringCacheSize - 1)))
+#define STRING_DEX_CACHE_HASH_BITS 10
+DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_HASH_BITS), (static_cast<int32_t>(art::LeastSignificantBit(art::mirror::DexCache::kDexCacheStringCacheSize))))
+#define MIN_LARGE_OBJECT_THRESHOLD 0x3000
+DEFINE_CHECK_EQ(static_cast<size_t>(MIN_LARGE_OBJECT_THRESHOLD), (static_cast<size_t>(art::gc::Heap::kMinLargeObjectThreshold)))
+#define LOCK_WORD_STATE_SHIFT 30
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kStateShift)))
+#define LOCK_WORD_STATE_MASK 0xc0000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_MASK), (static_cast<uint32_t>(art::LockWord::kStateMaskShifted)))
+#define LOCK_WORD_READ_BARRIER_STATE_SHIFT 28
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_READ_BARRIER_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kReadBarrierStateShift)))
+#define LOCK_WORD_READ_BARRIER_STATE_MASK 0x10000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShifted)))
+#define LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED 0xefffffff
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShiftedToggled)))
+#define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_THIN_LOCK_COUNT_ONE), (static_cast<int32_t>(art::LockWord::kThinLockCountOne)))
+#define LOCK_WORD_GC_STATE_MASK_SHIFTED 0x30000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_GC_STATE_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kGCStateMaskShifted)))
+#define LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED 0xcfffffff
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), (static_cast<uint32_t>(art::LockWord::kGCStateMaskShiftedToggled)))
+#define LOCK_WORD_GC_STATE_SHIFT 28
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_GC_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kGCStateShift)))
+#define LOCK_WORD_MARK_BIT_SHIFT 29
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_MARK_BIT_SHIFT), (static_cast<int32_t>(art::LockWord::kMarkBitStateShift)))
+#define LOCK_WORD_MARK_BIT_MASK_SHIFTED 0x20000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_MARK_BIT_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kMarkBitStateMaskShifted)))
+#define OBJECT_ALIGNMENT_MASK 0x7
+DEFINE_CHECK_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), (static_cast<size_t>(art::kObjectAlignment - 1)))
+#define OBJECT_ALIGNMENT_MASK_TOGGLED 0xfffffff8
+DEFINE_CHECK_EQ(static_cast<uint32_t>(OBJECT_ALIGNMENT_MASK_TOGGLED), (static_cast<uint32_t>(~static_cast<uint32_t>(art::kObjectAlignment - 1))))
+#define OBJECT_ALIGNMENT_MASK_TOGGLED64 0xfffffffffffffff8
+DEFINE_CHECK_EQ(static_cast<uint64_t>(OBJECT_ALIGNMENT_MASK_TOGGLED64), (static_cast<uint64_t>(~static_cast<uint64_t>(art::kObjectAlignment - 1))))
+#define ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE 128
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), (static_cast<int32_t>((art::gc::allocator::RosAlloc::kMaxThreadLocalBracketSize))))
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT 3
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), (static_cast<int32_t>((art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSizeShift))))
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK 7
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK), (static_cast<int32_t>((static_cast<int32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1)))))
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32 0xfffffff8
+DEFINE_CHECK_EQ(static_cast<uint32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32), (static_cast<uint32_t>((~static_cast<uint32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1)))))
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64 0xfffffffffffffff8
+DEFINE_CHECK_EQ(static_cast<uint64_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64), (static_cast<uint64_t>((~static_cast<uint64_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1)))))
+#define ROSALLOC_RUN_FREE_LIST_OFFSET 8
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_RUN_FREE_LIST_OFFSET), (static_cast<int32_t>((art::gc::allocator::RosAlloc::RunFreeListOffset()))))
+#define ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET), (static_cast<int32_t>((art::gc::allocator::RosAlloc::RunFreeListHeadOffset()))))
+#define ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET 16
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET), (static_cast<int32_t>((art::gc::allocator::RosAlloc::RunFreeListSizeOffset()))))
+#define ROSALLOC_SLOT_NEXT_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_SLOT_NEXT_OFFSET), (static_cast<int32_t>((art::gc::allocator::RosAlloc::RunSlotNextOffset()))))
+#define THREAD_SUSPEND_REQUEST 1
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_SUSPEND_REQUEST), (static_cast<int32_t>((art::kSuspendRequest))))
+#define THREAD_CHECKPOINT_REQUEST 2
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kCheckpointRequest))))
+#define JIT_CHECK_OSR (-1)
+DEFINE_CHECK_EQ(static_cast<int16_t>(JIT_CHECK_OSR), (static_cast<int16_t>((art::jit::kJitCheckForOSR))))
+#define JIT_HOTNESS_DISABLE (-2)
+DEFINE_CHECK_EQ(static_cast<int16_t>(JIT_HOTNESS_DISABLE), (static_cast<int16_t>((art::jit::kJitHotnessDisabled))))
+
+#endif  // ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+
diff --git a/runtime/globals.h b/runtime/globals.h
index 987a94e..9045d40 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -40,8 +40,15 @@
 // compile-time constant so the compiler can generate better code.
 static constexpr int kPageSize = 4096;
 
+// Returns whether the given memory offset can be used for generating
+// an implicit null check.
+static inline bool CanDoImplicitNullCheckOn(uintptr_t offset) {
+  return offset < kPageSize;
+}
+
 // Required object alignment
-static constexpr size_t kObjectAlignment = 8;
+static constexpr size_t kObjectAlignmentShift = 3;
+static constexpr size_t kObjectAlignment = 1u << kObjectAlignmentShift;
 static constexpr size_t kLargeObjectAlignment = kPageSize;
 
 // Whether or not this is a debug build. Useful in conditionals where NDEBUG isn't.
@@ -51,11 +58,31 @@
 static constexpr bool kIsDebugBuild = true;
 #endif
 
-// Whether or not this is a target (vs host) build. Useful in conditionals where ART_TARGET isn't.
+// ART_TARGET - Defined for target builds of ART.
+// ART_TARGET_LINUX - Defined for target Linux builds of ART.
+// ART_TARGET_ANDROID - Defined for target Android builds of ART.
+// Note: Either ART_TARGET_LINUX or ART_TARGET_ANDROID need to be set when ART_TARGET is set.
+// Note: When ART_TARGET_LINUX is defined mem_map.h will not be using Ashmem for memory mappings
+// (usually only available on Android kernels).
 #if defined(ART_TARGET)
+// Useful in conditionals where ART_TARGET isn't.
 static constexpr bool kIsTargetBuild = true;
+#if defined(ART_TARGET_LINUX)
+static constexpr bool kIsTargetLinux = true;
+#elif defined(ART_TARGET_ANDROID)
+static constexpr bool kIsTargetLinux = false;
+#else
+#error "Either ART_TARGET_LINUX or ART_TARGET_ANDROID needs to be defined for target builds."
+#endif
 #else
 static constexpr bool kIsTargetBuild = false;
+#if defined(ART_TARGET_LINUX)
+#error "ART_TARGET_LINUX defined for host build."
+#elif defined(ART_TARGET_ANDROID)
+#error "ART_TARGET_ANDROID defined for host build."
+#else
+static constexpr bool kIsTargetLinux = false;
+#endif
 #endif
 
 // Garbage collector constants.
@@ -87,8 +114,18 @@
 #endif
 
 static constexpr bool kUseBakerOrBrooksReadBarrier = kUseBakerReadBarrier || kUseBrooksReadBarrier;
-static constexpr bool kUseReadBarrier = kUseBakerReadBarrier || kUseBrooksReadBarrier ||
-    kUseTableLookupReadBarrier;
+static constexpr bool kUseReadBarrier =
+    kUseBakerReadBarrier || kUseBrooksReadBarrier || kUseTableLookupReadBarrier;
+
+// Debugging flag that forces the generation of read barriers, but
+// does not trigger the use of the concurrent copying GC.
+//
+// TODO: Remove this flag when the read barriers compiler
+// instrumentation is completed.
+static constexpr bool kForceReadBarrier = false;
+// TODO: Likewise, remove this flag when kForceReadBarrier is removed
+// and replace it with kUseReadBarrier.
+static constexpr bool kEmitCompilerReadBarrier = kForceReadBarrier || kUseReadBarrier;
 
 // If true, references within the heap are poisoned (negated).
 #ifdef USE_HEAP_POISONING
diff --git a/runtime/handle.h b/runtime/handle.h
index f939ec5..a415373 100644
--- a/runtime/handle.h
+++ b/runtime/handle.h
@@ -64,19 +64,17 @@
 
   ALWAYS_INLINE jobject ToJObject() const SHARED_REQUIRES(Locks::mutator_lock_) {
     if (UNLIKELY(reference_->AsMirrorPtr() == nullptr)) {
-      // Special case so that we work with NullHandles.
+      // Special case so that we work with null handles.
       return nullptr;
     }
     return reinterpret_cast<jobject>(reference_);
   }
 
-  ALWAYS_INLINE StackReference<mirror::Object>* GetReference()
-      SHARED_REQUIRES(Locks::mutator_lock_) {
+  ALWAYS_INLINE StackReference<mirror::Object>* GetReference() {
     return reference_;
   }
 
-  ALWAYS_INLINE const StackReference<mirror::Object>* GetReference() const
-      SHARED_REQUIRES(Locks::mutator_lock_) {
+  ALWAYS_INLINE const StackReference<mirror::Object>* GetReference() const {
     return reference_;
   }
 
@@ -147,12 +145,12 @@
   template<size_t kNumReferences> friend class StackHandleScope;
 };
 
-// A special case of Handle that only holds references to null.
+// A special case of Handle that only holds references to null. Invalid when if it goes out of
+// scope. Example: Handle<T> h = ScopedNullHandle<T> will leave h being undefined.
 template<class T>
-class NullHandle : public Handle<T> {
+class ScopedNullHandle : public Handle<T> {
  public:
-  NullHandle() : Handle<T>(&null_ref_) {
-  }
+  ScopedNullHandle() : Handle<T>(&null_ref_) {}
 
  private:
   StackReference<mirror::Object> null_ref_;
diff --git a/runtime/handle_scope-inl.h b/runtime/handle_scope-inl.h
index ca206ef..2e1b8ed 100644
--- a/runtime/handle_scope-inl.h
+++ b/runtime/handle_scope-inl.h
@@ -57,9 +57,9 @@
   return header_size + data_size;
 }
 
-inline size_t HandleScope::SizeOf(size_t pointer_size, uint32_t num_references) {
+inline size_t HandleScope::SizeOf(PointerSize pointer_size, uint32_t num_references) {
   // Assume that the layout is packed.
-  size_t header_size = pointer_size + sizeof(number_of_references_);
+  size_t header_size = ReferencesOffset(pointer_size);
   size_t data_size = sizeof(StackReference<mirror::Object>) * num_references;
   return header_size + data_size;
 }
diff --git a/runtime/handle_scope.h b/runtime/handle_scope.h
index e617348..67d7054 100644
--- a/runtime/handle_scope.h
+++ b/runtime/handle_scope.h
@@ -19,6 +19,7 @@
 
 #include <stack>
 
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "handle.h"
@@ -52,7 +53,7 @@
   static size_t SizeOf(uint32_t num_references);
 
   // Returns the size of a HandleScope containing num_references handles.
-  static size_t SizeOf(size_t pointer_size, uint32_t num_references);
+  static size_t SizeOf(PointerSize pointer_size, uint32_t num_references);
 
   // Link to previous HandleScope or null.
   HandleScope* GetLink() const {
@@ -62,8 +63,7 @@
   ALWAYS_INLINE mirror::Object* GetReference(size_t i) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE Handle<mirror::Object> GetHandle(size_t i)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+  ALWAYS_INLINE Handle<mirror::Object> GetHandle(size_t i);
 
   ALWAYS_INLINE MutableHandle<mirror::Object> GetMutableHandle(size_t i)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -74,18 +74,18 @@
   ALWAYS_INLINE bool Contains(StackReference<mirror::Object>* handle_scope_entry) const;
 
   // Offset of link within HandleScope, used by generated code.
-  static size_t LinkOffset(size_t pointer_size ATTRIBUTE_UNUSED) {
+  static constexpr size_t LinkOffset(PointerSize pointer_size ATTRIBUTE_UNUSED) {
     return 0;
   }
 
   // Offset of length within handle scope, used by generated code.
-  static size_t NumberOfReferencesOffset(size_t pointer_size) {
-    return pointer_size;
+  static constexpr size_t NumberOfReferencesOffset(PointerSize pointer_size) {
+    return static_cast<size_t>(pointer_size);
   }
 
   // Offset of link within handle scope, used by generated code.
-  static size_t ReferencesOffset(size_t pointer_size) {
-    return pointer_size + sizeof(number_of_references_);
+  static constexpr size_t ReferencesOffset(PointerSize pointer_size) {
+    return NumberOfReferencesOffset(pointer_size) + sizeof(number_of_references_);
   }
 
   // Placement new creation.
@@ -97,7 +97,7 @@
  protected:
   // Return backing storage used for references.
   ALWAYS_INLINE StackReference<mirror::Object>* GetReferences() const {
-    uintptr_t address = reinterpret_cast<uintptr_t>(this) + ReferencesOffset(sizeof(void*));
+    uintptr_t address = reinterpret_cast<uintptr_t>(this) + ReferencesOffset(kRuntimePointerSize);
     return reinterpret_cast<StackReference<mirror::Object>*>(address);
   }
 
diff --git a/runtime/handle_scope_test.cc b/runtime/handle_scope_test.cc
index dc99987..58f3800 100644
--- a/runtime/handle_scope_test.cc
+++ b/runtime/handle_scope_test.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "base/enums.h"
 #include "gtest/gtest.h"
 #include "handle_scope-inl.h"
 #include "scoped_thread_state_change.h"
@@ -48,13 +49,13 @@
 
   {
     uintptr_t* link_ptr = reinterpret_cast<uintptr_t*>(table_base_ptr +
-        HandleScope::LinkOffset(sizeof(void*)));
+        HandleScope::LinkOffset(kRuntimePointerSize));
     EXPECT_EQ(*link_ptr, static_cast<size_t>(0x5678));
   }
 
   {
     uint32_t* num_ptr = reinterpret_cast<uint32_t*>(table_base_ptr +
-        HandleScope::NumberOfReferencesOffset(sizeof(void*)));
+        HandleScope::NumberOfReferencesOffset(kRuntimePointerSize));
     EXPECT_EQ(*num_ptr, static_cast<size_t>(0x9ABC));
   }
 
@@ -64,7 +65,7 @@
     EXPECT_EQ(sizeof(StackReference<mirror::Object>), sizeof(uint32_t));
 
     uint32_t* ref_ptr = reinterpret_cast<uint32_t*>(table_base_ptr +
-        HandleScope::ReferencesOffset(sizeof(void*)));
+        HandleScope::ReferencesOffset(kRuntimePointerSize));
     EXPECT_EQ(*ref_ptr, static_cast<uint32_t>(0x1234));
   }
 }
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index dfc1f5f..4005f05 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -223,6 +223,12 @@
     HandleU1List(values, count);
     length_ += count;
   }
+  void AddU1AsU2List(const uint8_t* values, size_t count) {
+    HandleU1AsU2List(values, count);
+    // Array of char from compressed String (8-bit) is added as 16-bit blocks
+    int ceil_count_to_even = count + ((count & 1) ? 1 : 0);
+    length_ += ceil_count_to_even * sizeof(uint8_t);
+  }
   void AddU2List(const uint16_t* values, size_t count) {
     HandleU2List(values, count);
     length_ += count * sizeof(uint16_t);
@@ -268,6 +274,9 @@
   virtual void HandleU1List(const uint8_t* values ATTRIBUTE_UNUSED,
                             size_t count ATTRIBUTE_UNUSED) {
   }
+  virtual void HandleU1AsU2List(const uint8_t* values ATTRIBUTE_UNUSED,
+                                size_t count ATTRIBUTE_UNUSED) {
+  }
   virtual void HandleU2List(const uint16_t* values ATTRIBUTE_UNUSED,
                             size_t count ATTRIBUTE_UNUSED) {
   }
@@ -308,6 +317,19 @@
     buffer_.insert(buffer_.end(), values, values + count);
   }
 
+  void HandleU1AsU2List(const uint8_t* values, size_t count) OVERRIDE {
+    DCHECK_EQ(length_, buffer_.size());
+    // All 8-bits are grouped in 2 to make 16-bit block like Java Char
+    if (count & 1) {
+      buffer_.push_back(0);
+    }
+    for (size_t i = 0; i < count; ++i) {
+      uint8_t value = *values;
+      buffer_.push_back(value);
+      values++;
+    }
+  }
+
   void HandleU2List(const uint16_t* values, size_t count) OVERRIDE {
     DCHECK_EQ(length_, buffer_.size());
     for (size_t i = 0; i < count; ++i) {
@@ -419,18 +441,13 @@
   Hprof(const char* output_filename, int fd, bool direct_to_ddms)
       : filename_(output_filename),
         fd_(fd),
-        direct_to_ddms_(direct_to_ddms),
-        start_ns_(NanoTime()),
-        output_(nullptr),
-        current_heap_(HPROF_HEAP_DEFAULT),
-        objects_in_segment_(0),
-        next_string_id_(0x400000),
-        next_class_serial_number_(1) {
+        direct_to_ddms_(direct_to_ddms) {
     LOG(INFO) << "hprof: heap dump \"" << filename_ << "\" starting...";
   }
 
   void Dump()
-    REQUIRES(Locks::mutator_lock_, !Locks::heap_bitmap_lock_, !Locks::alloc_tracker_lock_) {
+    REQUIRES(Locks::mutator_lock_)
+    REQUIRES(!Locks::heap_bitmap_lock_, !Locks::alloc_tracker_lock_) {
     {
       MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
       if (Runtime::Current()->GetHeap()->IsAllocTrackingEnabled()) {
@@ -462,10 +479,11 @@
     }
 
     if (okay) {
-      uint64_t duration = NanoTime() - start_ns_;
-      LOG(INFO) << "hprof: heap dump completed ("
-          << PrettySize(RoundUp(overall_size, 1024))
-          << ") in " << PrettyDuration(duration);
+      const uint64_t duration = NanoTime() - start_ns_;
+      LOG(INFO) << "hprof: heap dump completed (" << PrettySize(RoundUp(overall_size, KB))
+                << ") in " << PrettyDuration(duration)
+                << " objects " << total_objects_
+                << " objects with stack traces " << total_objects_with_stack_trace_;
     }
   }
 
@@ -509,6 +527,7 @@
     // Walk the roots and the heap.
     output_->StartNewRecord(HPROF_TAG_HEAP_DUMP_SEGMENT, kHprofTime);
 
+    simple_roots_.clear();
     runtime->VisitRoots(this);
     runtime->VisitImageRoots(this);
     runtime->GetHeap()->VisitObjectsPaused(VisitObjectCallback, this);
@@ -832,7 +851,7 @@
         continue;
       }
       ++count;
-      const gc::AllocRecordStackTrace* trace = it->second->GetStackTrace();
+      const gc::AllocRecordStackTrace* trace = it->second.GetStackTrace();
 
       // Copy the pair into a real hash map to speed up look up.
       auto records_result = allocation_records_.emplace(obj, trace);
@@ -855,7 +874,7 @@
     }
     CHECK_EQ(traces_.size(), next_trace_sn - kHprofNullStackTrace - 1);
     CHECK_EQ(frames_.size(), next_frame_id);
-    VLOG(heap) << "hprof: found " << count << " objects with allocation stack traces";
+    total_objects_with_stack_trace_ = count;
   }
 
   // If direct_to_ddms_ is set, "filename_" and "fd" will be ignored.
@@ -865,16 +884,19 @@
   int fd_;
   bool direct_to_ddms_;
 
-  uint64_t start_ns_;
+  uint64_t start_ns_ = NanoTime();
 
-  EndianOutput* output_;
+  EndianOutput* output_ = nullptr;
 
-  HprofHeapId current_heap_;  // Which heap we're currently dumping.
-  size_t objects_in_segment_;
+  HprofHeapId current_heap_ = HPROF_HEAP_DEFAULT;  // Which heap we're currently dumping.
+  size_t objects_in_segment_ = 0;
 
-  HprofStringId next_string_id_;
+  size_t total_objects_ = 0u;
+  size_t total_objects_with_stack_trace_ = 0u;
+
+  HprofStringId next_string_id_ = 0x400000;
   SafeMap<std::string, HprofStringId> strings_;
-  HprofClassSerialNumber next_class_serial_number_;
+  HprofClassSerialNumber next_class_serial_number_ = 1;
   SafeMap<mirror::Class*, HprofClassSerialNumber> classes_;
 
   std::unordered_map<const gc::AllocRecordStackTrace*, HprofStackTraceSerialNumber,
@@ -885,6 +907,14 @@
                      gc::EqAllocRecordTypesPtr<gc::AllocRecordStackTraceElement>> frames_;
   std::unordered_map<const mirror::Object*, const gc::AllocRecordStackTrace*> allocation_records_;
 
+  // Set used to keep track of what simple root records we have already
+  // emitted, to avoid emitting duplicate entries. The simple root records are
+  // those that contain no other information than the root type and the object
+  // id. A pair of root type and object id is packed into a uint64_t, with
+  // the root type in the upper 32 bits and the object id in the lower 32
+  // bits.
+  std::unordered_set<uint64_t> simple_roots_;
+
   friend class GcRootVisitor;
   DISALLOW_COPY_AND_ASSIGN(Hprof);
 };
@@ -963,10 +993,14 @@
     case HPROF_ROOT_MONITOR_USED:
     case HPROF_ROOT_INTERNED_STRING:
     case HPROF_ROOT_DEBUGGER:
-    case HPROF_ROOT_VM_INTERNAL:
-      __ AddU1(heap_tag);
-      __ AddObjectId(obj);
+    case HPROF_ROOT_VM_INTERNAL: {
+      uint64_t key = (static_cast<uint64_t>(heap_tag) << 32) | PointerToLowMemUInt32(obj);
+      if (simple_roots_.insert(key).second) {
+        __ AddU1(heap_tag);
+        __ AddObjectId(obj);
+      }
       break;
+    }
 
       // ID: object ID
       // ID: JNI global ref ID
@@ -1064,6 +1098,8 @@
     return;
   }
 
+  ++total_objects_;
+
   GcRootVisitor visitor(this);
   obj->VisitReferences(visitor, VoidFunctor());
 
@@ -1340,7 +1376,11 @@
         string_value = reinterpret_cast<mirror::Object*>(
             reinterpret_cast<uintptr_t>(s) + kObjectAlignment);
       } else {
-        string_value = reinterpret_cast<mirror::Object*>(s->GetValue());
+        if (s->IsCompressed()) {
+          string_value = reinterpret_cast<mirror::Object*>(s->GetValueCompressed());
+        } else {
+          string_value = reinterpret_cast<mirror::Object*>(s->GetValue());
+        }
       }
       __ AddObjectId(string_value);
     }
@@ -1355,12 +1395,18 @@
   CHECK_EQ(obj->IsString(), string_value != nullptr);
   if (string_value != nullptr) {
     mirror::String* s = obj->AsString();
+    // Compressed string's (8-bit) length is ceil(length/2) in 16-bit blocks
+    int length_in_16_bit = (s->IsCompressed()) ? ((s->GetLength() + 1) / 2) : s->GetLength();
     __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP);
     __ AddObjectId(string_value);
     __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj));
-    __ AddU4(s->GetLength());
+    __ AddU4(length_in_16_bit);
     __ AddU1(hprof_basic_char);
-    __ AddU2List(s->GetValue(), s->GetLength());
+    if (s->IsCompressed()) {
+      __ AddU1AsU2List(s->GetValueCompressed(), s->GetLength());
+    } else {
+      __ AddU2List(s->GetValue(), s->GetLength());
+    }
   }
 }
 
diff --git a/runtime/image-inl.h b/runtime/image-inl.h
new file mode 100644
index 0000000..28620db
--- /dev/null
+++ b/runtime/image-inl.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_IMAGE_INL_H_
+#define ART_RUNTIME_IMAGE_INL_H_
+
+#include "image.h"
+
+#include "art_method.h"
+#include "imtable.h"
+
+namespace art {
+
+template <ReadBarrierOption kReadBarrierOption>
+inline mirror::Object* ImageHeader::GetImageRoot(ImageRoot image_root) const {
+  mirror::ObjectArray<mirror::Object>* image_roots = GetImageRoots<kReadBarrierOption>();
+  return image_roots->Get<kVerifyNone, kReadBarrierOption>(static_cast<int32_t>(image_root));
+}
+
+template <ReadBarrierOption kReadBarrierOption>
+inline mirror::ObjectArray<mirror::Object>* ImageHeader::GetImageRoots() const {
+  // Need a read barrier as it's not visited during root scan.
+  // Pass in the address of the local variable to the read barrier
+  // rather than image_roots_ because it won't move (asserted below)
+  // and it's a const member.
+  mirror::ObjectArray<mirror::Object>* image_roots =
+      reinterpret_cast<mirror::ObjectArray<mirror::Object>*>(image_roots_);
+  mirror::ObjectArray<mirror::Object>* result =
+      ReadBarrier::BarrierForRoot<mirror::ObjectArray<mirror::Object>, kReadBarrierOption>(
+          &image_roots);
+  DCHECK_EQ(image_roots, result);
+  return image_roots;
+}
+
+template <typename Visitor>
+inline void ImageHeader::VisitPackedImTables(const Visitor& visitor,
+                                             uint8_t* base,
+                                             PointerSize pointer_size) const {
+  const ImageSection& section = GetImageSection(kSectionImTables);
+  for (size_t pos = 0; pos < section.Size();) {
+    ImTable* imt = reinterpret_cast<ImTable*>(base + section.Offset() + pos);
+    for (size_t i = 0; i < ImTable::kSize; ++i) {
+      ArtMethod* orig = imt->Get(i, pointer_size);
+      ArtMethod* updated = visitor(orig);
+      if (updated != orig) {
+        imt->Set(i, updated, pointer_size);
+      }
+    }
+    pos += ImTable::SizeInBytes(pointer_size);
+  }
+}
+
+template <typename Visitor>
+inline void ImageHeader::VisitPackedImtConflictTables(const Visitor& visitor,
+                                                      uint8_t* base,
+                                                      PointerSize pointer_size) const {
+  const ImageSection& section = GetImageSection(kSectionIMTConflictTables);
+  for (size_t pos = 0; pos < section.Size(); ) {
+    auto* table = reinterpret_cast<ImtConflictTable*>(base + section.Offset() + pos);
+    table->Visit([&visitor](const std::pair<ArtMethod*, ArtMethod*>& methods) {
+      return std::make_pair(visitor(methods.first), visitor(methods.second));
+    }, pointer_size);
+    pos += table->ComputeSize(pointer_size);
+  }
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_IMAGE_INL_H_
diff --git a/runtime/image.cc b/runtime/image.cc
index 192371f..6888183 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '2', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '0', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
@@ -35,8 +35,15 @@
                          uint32_t oat_data_begin,
                          uint32_t oat_data_end,
                          uint32_t oat_file_end,
+                         uint32_t boot_image_begin,
+                         uint32_t boot_image_size,
+                         uint32_t boot_oat_begin,
+                         uint32_t boot_oat_size,
                          uint32_t pointer_size,
-                         bool compile_pic)
+                         bool compile_pic,
+                         bool is_pic,
+                         StorageMode storage_mode,
+                         size_t data_size)
   : image_begin_(image_begin),
     image_size_(image_size),
     oat_checksum_(oat_checksum),
@@ -44,14 +51,20 @@
     oat_data_begin_(oat_data_begin),
     oat_data_end_(oat_data_end),
     oat_file_end_(oat_file_end),
+    boot_image_begin_(boot_image_begin),
+    boot_image_size_(boot_image_size),
+    boot_oat_begin_(boot_oat_begin),
+    boot_oat_size_(boot_oat_size),
     patch_delta_(0),
     image_roots_(image_roots),
     pointer_size_(pointer_size),
-    compile_pic_(compile_pic) {
+    compile_pic_(compile_pic),
+    is_pic_(is_pic),
+    storage_mode_(storage_mode),
+    data_size_(data_size) {
   CHECK_EQ(image_begin, RoundUp(image_begin, kPageSize));
   CHECK_EQ(oat_file_begin, RoundUp(oat_file_begin, kPageSize));
   CHECK_EQ(oat_data_begin, RoundUp(oat_data_begin, kPageSize));
-  CHECK_LT(image_begin, image_roots);
   CHECK_LT(image_roots, oat_file_begin);
   CHECK_LE(oat_file_begin, oat_data_begin);
   CHECK_LT(oat_data_begin, oat_data_end);
@@ -64,13 +77,21 @@
 
 void ImageHeader::RelocateImage(off_t delta) {
   CHECK_ALIGNED(delta, kPageSize) << " patch delta must be page aligned";
-  image_begin_ += delta;
   oat_file_begin_ += delta;
   oat_data_begin_ += delta;
   oat_data_end_ += delta;
   oat_file_end_ += delta;
-  image_roots_ += delta;
   patch_delta_ += delta;
+  RelocateImageObjects(delta);
+  RelocateImageMethods(delta);
+}
+
+void ImageHeader::RelocateImageObjects(off_t delta) {
+  image_begin_ += delta;
+  image_roots_ += delta;
+}
+
+void ImageHeader::RelocateImageMethods(off_t delta) {
   for (size_t i = 0; i < kImageMethodsCount; ++i) {
     image_methods_[i] += delta;
   }
@@ -96,9 +117,6 @@
   if (oat_file_begin_ >= oat_data_begin_) {
     return false;
   }
-  if (image_roots_ <= image_begin_ || oat_file_begin_ <= image_roots_) {
-    return false;
-  }
   if (!IsAligned<kPageSize>(patch_delta_)) {
     return false;
   }
@@ -110,24 +128,6 @@
   return reinterpret_cast<const char*>(magic_);
 }
 
-mirror::Object* ImageHeader::GetImageRoot(ImageRoot image_root) const {
-  return GetImageRoots()->Get(image_root);
-}
-
-mirror::ObjectArray<mirror::Object>* ImageHeader::GetImageRoots() const {
-  // Need a read barrier as it's not visited during root scan.
-  // Pass in the address of the local variable to the read barrier
-  // rather than image_roots_ because it won't move (asserted below)
-  // and it's a const member.
-  mirror::ObjectArray<mirror::Object>* image_roots =
-      reinterpret_cast<mirror::ObjectArray<mirror::Object>*>(image_roots_);
-  mirror::ObjectArray<mirror::Object>* result =
-      ReadBarrier::BarrierForRoot<mirror::ObjectArray<mirror::Object>, kWithReadBarrier, true>(
-          &image_roots);
-  DCHECK_EQ(image_roots, result);
-  return result;
-}
-
 ArtMethod* ImageHeader::GetImageMethod(ImageMethod index) const {
   CHECK_LT(static_cast<size_t>(index), kImageMethodsCount);
   return reinterpret_cast<ArtMethod*>(image_methods_[index]);
@@ -147,27 +147,35 @@
   return os << "size=" << section.Size() << " range=" << section.Offset() << "-" << section.End();
 }
 
-void ImageSection::VisitPackedArtFields(ArtFieldVisitor* visitor, uint8_t* base) const {
-  for (size_t pos = 0; pos < Size(); ) {
-    auto* array = reinterpret_cast<LengthPrefixedArray<ArtField>*>(base + Offset() + pos);
-    for (size_t i = 0; i < array->Length(); ++i) {
+void ImageHeader::VisitPackedArtFields(ArtFieldVisitor* visitor, uint8_t* base) const {
+  const ImageSection& fields = GetFieldsSection();
+  for (size_t pos = 0; pos < fields.Size(); ) {
+    auto* array = reinterpret_cast<LengthPrefixedArray<ArtField>*>(base + fields.Offset() + pos);
+    for (size_t i = 0; i < array->size(); ++i) {
       visitor->Visit(&array->At(i, sizeof(ArtField)));
     }
-    pos += array->ComputeSize(array->Length());
+    pos += array->ComputeSize(array->size());
   }
 }
 
-void ImageSection::VisitPackedArtMethods(ArtMethodVisitor* visitor,
-                                         uint8_t* base,
-                                         size_t pointer_size) const {
+void ImageHeader::VisitPackedArtMethods(ArtMethodVisitor* visitor,
+                                        uint8_t* base,
+                                        PointerSize pointer_size) const {
   const size_t method_alignment = ArtMethod::Alignment(pointer_size);
   const size_t method_size = ArtMethod::Size(pointer_size);
-  for (size_t pos = 0; pos < Size(); ) {
-    auto* array = reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(base + Offset() + pos);
-    for (size_t i = 0; i < array->Length(); ++i) {
+  const ImageSection& methods = GetMethodsSection();
+  for (size_t pos = 0; pos < methods.Size(); ) {
+    auto* array = reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(base + methods.Offset() + pos);
+    for (size_t i = 0; i < array->size(); ++i) {
       visitor->Visit(&array->At(i, method_size, method_alignment));
     }
-    pos += array->ComputeSize(array->Length(), method_size, method_alignment);
+    pos += array->ComputeSize(array->size(), method_size, method_alignment);
+  }
+  const ImageSection& runtime_methods = GetRuntimeMethodsSection();
+  for (size_t pos = 0; pos < runtime_methods.Size(); ) {
+    auto* method = reinterpret_cast<ArtMethod*>(base + runtime_methods.Offset() + pos);
+    visitor->Visit(method);
+    pos += method_size;
   }
 }
 
diff --git a/runtime/image.h b/runtime/image.h
index 20e4159..9ff18d6 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -19,6 +19,7 @@
 
 #include <string.h>
 
+#include "base/enums.h"
 #include "globals.h"
 #include "mirror/object.h"
 
@@ -64,12 +65,6 @@
     return offset - offset_ < size_;
   }
 
-  // Visit ArtMethods in the section starting at base.
-  void VisitPackedArtMethods(ArtMethodVisitor* visitor, uint8_t* base, size_t pointer_size) const;
-
-  // Visit ArtMethods in the section starting at base.
-  void VisitPackedArtFields(ArtFieldVisitor* visitor, uint8_t* base) const;
-
  private:
   uint32_t offset_;
   uint32_t size_;
@@ -78,13 +73,36 @@
 // header of image files written by ImageWriter, read and validated by Space.
 class PACKED(4) ImageHeader {
  public:
+  enum StorageMode : uint32_t {
+    kStorageModeUncompressed,
+    kStorageModeLZ4,
+    kStorageModeLZ4HC,
+    kStorageModeCount,  // Number of elements in enum.
+  };
+  static constexpr StorageMode kDefaultStorageMode = kStorageModeUncompressed;
+
   ImageHeader()
-      : image_begin_(0U), image_size_(0U), oat_checksum_(0U), oat_file_begin_(0U),
-        oat_data_begin_(0U), oat_data_end_(0U), oat_file_end_(0U), patch_delta_(0),
-        image_roots_(0U), pointer_size_(0U), compile_pic_(0) {}
+      : image_begin_(0U),
+        image_size_(0U),
+        oat_checksum_(0U),
+        oat_file_begin_(0U),
+        oat_data_begin_(0U),
+        oat_data_end_(0U),
+        oat_file_end_(0U),
+        boot_image_begin_(0U),
+        boot_image_size_(0U),
+        boot_oat_begin_(0U),
+        boot_oat_size_(0U),
+        patch_delta_(0),
+        image_roots_(0U),
+        pointer_size_(0U),
+        compile_pic_(0),
+        is_pic_(0),
+        storage_mode_(kDefaultStorageMode),
+        data_size_(0) {}
 
   ImageHeader(uint32_t image_begin,
-              uint32_t image_size_,
+              uint32_t image_size,
               ImageSection* sections,
               uint32_t image_roots,
               uint32_t oat_checksum,
@@ -92,8 +110,15 @@
               uint32_t oat_data_begin,
               uint32_t oat_data_end,
               uint32_t oat_file_end,
+              uint32_t boot_image_begin,
+              uint32_t boot_image_size,
+              uint32_t boot_oat_begin,
+              uint32_t boot_oat_size,
               uint32_t pointer_size,
-              bool compile_pic_);
+              bool compile_pic,
+              bool is_pic,
+              StorageMode storage_mode,
+              size_t data_size);
 
   bool IsValid() const;
   const char* GetMagic() const;
@@ -114,6 +139,8 @@
     oat_checksum_ = oat_checksum;
   }
 
+  // The location that the oat file was expected to be when the image was created. The actual
+  // oat file may be at a different location for application images.
   uint8_t* GetOatFileBegin() const {
     return reinterpret_cast<uint8_t*>(oat_file_begin_);
   }
@@ -130,7 +157,11 @@
     return reinterpret_cast<uint8_t*>(oat_file_end_);
   }
 
-  uint32_t GetPointerSize() const {
+  PointerSize GetPointerSize() const {
+    return ConvertToPointerSize(pointer_size_);
+  }
+
+  uint32_t GetPointerSizeUnchecked() const {
     return pointer_size_;
   }
 
@@ -152,9 +183,10 @@
     kResolutionMethod,
     kImtConflictMethod,
     kImtUnimplementedMethod,
-    kCalleeSaveMethod,
-    kRefsOnlySaveMethod,
-    kRefsAndArgsSaveMethod,
+    kSaveAllCalleeSavesMethod,
+    kSaveRefsOnlyMethod,
+    kSaveRefsAndArgsMethod,
+    kSaveEverythingMethod,
     kImageMethodsCount,  // Number of elements in enum.
   };
 
@@ -168,8 +200,12 @@
     kSectionObjects,
     kSectionArtFields,
     kSectionArtMethods,
+    kSectionRuntimeMethods,
+    kSectionImTables,
+    kSectionIMTConflictTables,
     kSectionDexCacheArrays,
     kSectionInternedStrings,
+    kSectionClassTable,
     kSectionImageBitmap,
     kSectionCount,  // Number of elements in enum.
   };
@@ -178,21 +214,89 @@
   void SetImageMethod(ImageMethod index, ArtMethod* method);
 
   const ImageSection& GetImageSection(ImageSections index) const;
+
   const ImageSection& GetMethodsSection() const {
     return GetImageSection(kSectionArtMethods);
   }
 
+  const ImageSection& GetRuntimeMethodsSection() const {
+    return GetImageSection(kSectionRuntimeMethods);
+  }
+
+  const ImageSection& GetFieldsSection() const {
+    return GetImageSection(ImageHeader::kSectionArtFields);
+  }
+
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   mirror::Object* GetImageRoot(ImageRoot image_root) const
       SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   mirror::ObjectArray<mirror::Object>* GetImageRoots() const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void RelocateImage(off_t delta);
+  void RelocateImageMethods(off_t delta);
+  void RelocateImageObjects(off_t delta);
 
   bool CompilePic() const {
     return compile_pic_ != 0;
   }
 
+  bool IsPic() const {
+    return is_pic_ != 0;
+  }
+
+  uint32_t GetBootImageBegin() const {
+    return boot_image_begin_;
+  }
+
+  uint32_t GetBootImageSize() const {
+    return boot_image_size_;
+  }
+
+  uint32_t GetBootOatBegin() const {
+    return boot_oat_begin_;
+  }
+
+  uint32_t GetBootOatSize() const {
+    return boot_oat_size_;
+  }
+
+  StorageMode GetStorageMode() const {
+    return storage_mode_;
+  }
+
+  uint64_t GetDataSize() const {
+    return data_size_;
+  }
+
+  bool IsAppImage() const {
+    // App images currently require a boot image, if the size is non zero then it is an app image
+    // header.
+    return boot_image_size_ != 0u;
+  }
+
+  // Visit ArtMethods in the section starting at base. Includes runtime methods.
+  // TODO: Delete base parameter if it is always equal to GetImageBegin.
+  void VisitPackedArtMethods(ArtMethodVisitor* visitor,
+                             uint8_t* base,
+                             PointerSize pointer_size) const;
+
+  // Visit ArtMethods in the section starting at base.
+  // TODO: Delete base parameter if it is always equal to GetImageBegin.
+  void VisitPackedArtFields(ArtFieldVisitor* visitor, uint8_t* base) const;
+
+  template <typename Visitor>
+  void VisitPackedImTables(const Visitor& visitor,
+                           uint8_t* base,
+                           PointerSize pointer_size) const;
+
+  template <typename Visitor>
+  void VisitPackedImtConflictTables(const Visitor& visitor,
+                                    uint8_t* base,
+                                    PointerSize pointer_size) const;
+
  private:
   static const uint8_t kImageMagic[4];
   static const uint8_t kImageVersion[4];
@@ -222,6 +326,16 @@
   // .so files. Used for positioning a following alloc spaces.
   uint32_t oat_file_end_;
 
+  // Boot image begin and end (app image headers only).
+  uint32_t boot_image_begin_;
+  uint32_t boot_image_size_;
+
+  // Boot oat begin and end (app image headers only).
+  uint32_t boot_oat_begin_;
+  uint32_t boot_oat_size_;
+
+  // TODO: We should probably insert a boot image checksum for app images.
+
   // The total delta that this image has been patched.
   int32_t patch_delta_;
 
@@ -234,12 +348,24 @@
   // Boolean (0 or 1) to denote if the image was compiled with --compile-pic option
   const uint32_t compile_pic_;
 
-  // Image sections
+  // Boolean (0 or 1) to denote if the image can be mapped at a random address, this only refers to
+  // the .art file. Currently, app oat files do not depend on their app image. There are no pointers
+  // from the app oat code to the app image.
+  const uint32_t is_pic_;
+
+  // Image section sizes/offsets correspond to the uncompressed form.
   ImageSection sections_[kSectionCount];
 
-  // Image methods.
+  // Image methods, may be inside of the boot image for app images.
   uint64_t image_methods_[kImageMethodsCount];
 
+  // Storage method for the image, the image may be compressed.
+  StorageMode storage_mode_;
+
+  // Data size for the image data excluding the bitmap and the header. For compressed images, this
+  // is the compressed size in the file.
+  uint32_t data_size_;
+
   friend class ImageWriter;
 };
 
@@ -247,6 +373,7 @@
 std::ostream& operator<<(std::ostream& os, const ImageHeader::ImageRoot& policy);
 std::ostream& operator<<(std::ostream& os, const ImageHeader::ImageSections& section);
 std::ostream& operator<<(std::ostream& os, const ImageSection& section);
+std::ostream& operator<<(std::ostream& os, const ImageHeader::StorageMode& mode);
 
 }  // namespace art
 
diff --git a/runtime/imtable.h b/runtime/imtable.h
new file mode 100644
index 0000000..2416621
--- /dev/null
+++ b/runtime/imtable.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_IMTABLE_H_
+#define ART_RUNTIME_IMTABLE_H_
+
+#ifndef IMT_SIZE
+#error IMT_SIZE not defined
+#endif
+
+namespace art {
+
+class ArtMethod;
+
+class ImTable {
+ public:
+  // Interface method table size. Increasing this value reduces the chance of two interface methods
+  // colliding in the interface method table but increases the size of classes that implement
+  // (non-marker) interfaces.
+  static constexpr size_t kSize = IMT_SIZE;
+
+  ArtMethod* Get(size_t index, PointerSize pointer_size) {
+    DCHECK_LT(index, kSize);
+    uint8_t* ptr = reinterpret_cast<uint8_t*>(this) + OffsetOfElement(index, pointer_size);
+    if (pointer_size == PointerSize::k32) {
+      uint32_t value = *reinterpret_cast<uint32_t*>(ptr);
+      return reinterpret_cast<ArtMethod*>(value);
+    } else {
+      uint64_t value = *reinterpret_cast<uint64_t*>(ptr);
+      return reinterpret_cast<ArtMethod*>(value);
+    }
+  }
+
+  void Set(size_t index, ArtMethod* method, PointerSize pointer_size) {
+    DCHECK_LT(index, kSize);
+    uint8_t* ptr = reinterpret_cast<uint8_t*>(this) + OffsetOfElement(index, pointer_size);
+    if (pointer_size == PointerSize::k32) {
+      uintptr_t value = reinterpret_cast<uintptr_t>(method);
+      DCHECK_EQ(static_cast<uint32_t>(value), value);  // Check that we dont lose any non 0 bits.
+      *reinterpret_cast<uint32_t*>(ptr) = static_cast<uint32_t>(value);
+    } else {
+      *reinterpret_cast<uint64_t*>(ptr) = reinterpret_cast<uint64_t>(method);
+    }
+  }
+
+  static size_t OffsetOfElement(size_t index, PointerSize pointer_size) {
+    return index * static_cast<size_t>(pointer_size);
+  }
+
+  void Populate(ArtMethod** data, PointerSize pointer_size) {
+    for (size_t i = 0; i < kSize; ++i) {
+      Set(i, data[i], pointer_size);
+    }
+  }
+
+  constexpr static size_t SizeInBytes(PointerSize pointer_size) {
+    return kSize * static_cast<size_t>(pointer_size);
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_IMTABLE_H_
+
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index a5b63b4..8e49492 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -16,6 +16,7 @@
 
 #include "indirect_reference_table-inl.h"
 
+#include "base/systrace.h"
 #include "jni_internal.h"
 #include "nth_caller_visitor.h"
 #include "reference_table.h"
@@ -261,6 +262,7 @@
 }
 
 void IndirectReferenceTable::Trim() {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
   const size_t top_index = Capacity();
   auto* release_start = AlignUp(reinterpret_cast<uint8_t*>(&table_[top_index]), kPageSize);
   uint8_t* release_end = table_mem_map_->End();
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index d13526b..2d0ae63 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -344,8 +344,11 @@
     segment_state_.all = new_state;
   }
 
-  static Offset SegmentStateOffset() {
-    return Offset(OFFSETOF_MEMBER(IndirectReferenceTable, segment_state_));
+  static Offset SegmentStateOffset(size_t pointer_size ATTRIBUTE_UNUSED) {
+    // Note: Currently segment_state_ is at offset 0. We're testing the expected value in
+    //       jni_internal_test to make sure it stays correct. It is not OFFSETOF_MEMBER, as that
+    //       is not pointer-size-safe.
+    return Offset(0);
   }
 
   // Release pages past the end of the table that may have previously held references.
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index ed64d7e..4a86e36 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -55,7 +55,7 @@
   explicit InstallStubsClassVisitor(Instrumentation* instrumentation)
       : instrumentation_(instrumentation) {}
 
-  bool Visit(mirror::Class* klass) OVERRIDE REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* klass) OVERRIDE REQUIRES(Locks::mutator_lock_) {
     instrumentation_->InstallStubsForClass(klass);
     return true;  // we visit all classes.
   }
@@ -66,17 +66,25 @@
 
 
 Instrumentation::Instrumentation()
-    : instrumentation_stubs_installed_(false), entry_exit_stubs_installed_(false),
+    : instrumentation_stubs_installed_(false),
+      entry_exit_stubs_installed_(false),
       interpreter_stubs_installed_(false),
-      interpret_only_(false), forced_interpret_only_(false),
-      have_method_entry_listeners_(false), have_method_exit_listeners_(false),
-      have_method_unwind_listeners_(false), have_dex_pc_listeners_(false),
-      have_field_read_listeners_(false), have_field_write_listeners_(false),
-      have_exception_caught_listeners_(false), have_backward_branch_listeners_(false),
-      deoptimized_methods_lock_("deoptimized methods lock"),
+      interpret_only_(false),
+      forced_interpret_only_(false),
+      have_method_entry_listeners_(false),
+      have_method_exit_listeners_(false),
+      have_method_unwind_listeners_(false),
+      have_dex_pc_listeners_(false),
+      have_field_read_listeners_(false),
+      have_field_write_listeners_(false),
+      have_exception_caught_listeners_(false),
+      have_branch_listeners_(false),
+      have_invoke_virtual_or_interface_listeners_(false),
+      deoptimized_methods_lock_("deoptimized methods lock", kDeoptimizedMethodsLock),
       deoptimization_enabled_(false),
       interpreter_handler_table_(kMainHandlerTable),
-      quick_alloc_entry_points_instrumentation_counter_(0) {
+      quick_alloc_entry_points_instrumentation_counter_(0),
+      alloc_entrypoints_instrumented_(false) {
 }
 
 void Instrumentation::InstallStubsForClass(mirror::Class* klass) {
@@ -86,32 +94,27 @@
     // We need the class to be resolved to install/uninstall stubs. Otherwise its methods
     // could not be initialized or linked with regards to class inheritance.
   } else {
-    for (size_t i = 0, e = klass->NumDirectMethods(); i < e; i++) {
-      InstallStubsForMethod(klass->GetDirectMethod(i, sizeof(void*)));
-    }
-    for (size_t i = 0, e = klass->NumVirtualMethods(); i < e; i++) {
-      InstallStubsForMethod(klass->GetVirtualMethod(i, sizeof(void*)));
+    for (ArtMethod& method : klass->GetMethods(kRuntimePointerSize)) {
+      InstallStubsForMethod(&method);
     }
   }
 }
 
 static void UpdateEntrypoints(ArtMethod* method, const void* quick_code)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  Runtime* const runtime = Runtime::Current();
-  jit::Jit* jit = runtime->GetJit();
-  if (jit != nullptr) {
-    const void* old_code_ptr = method->GetEntryPointFromQuickCompiledCode();
-    jit::JitCodeCache* code_cache = jit->GetCodeCache();
-    if (code_cache->ContainsCodePtr(old_code_ptr)) {
-      // Save the old compiled code since we need it to implement ClassLinker::GetQuickOatCodeFor.
-      code_cache->SaveCompiledCode(method, old_code_ptr);
-    }
-  }
   method->SetEntryPointFromQuickCompiledCode(quick_code);
 }
 
+bool Instrumentation::NeedDebugVersionForBootImageCode(ArtMethod* method, const void* code) const
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return Dbg::IsDebuggerActive() &&
+         Runtime::Current()->GetHeap()->IsInBootImageOatFile(code) &&
+         !method->IsNative() &&
+         !method->IsProxyMethod();
+}
+
 void Instrumentation::InstallStubsForMethod(ArtMethod* method) {
-  if (method->IsAbstract() || method->IsProxyMethod()) {
+  if (!method->IsInvokable() || method->IsProxyMethod()) {
     // Do not change stubs for these methods.
     return;
   }
@@ -130,6 +133,9 @@
       new_quick_code = GetQuickToInterpreterBridge();
     } else if (is_class_initialized || !method->IsStatic() || method->IsConstructor()) {
       new_quick_code = class_linker->GetQuickOatCodeFor(method);
+      if (NeedDebugVersionForBootImageCode(method, new_quick_code)) {
+        new_quick_code = GetQuickToInterpreterBridge();
+      }
     } else {
       new_quick_code = GetQuickResolutionStub();
     }
@@ -142,10 +148,13 @@
       // class, all its static methods code will be set to the instrumentation entry point.
       // For more details, see ClassLinker::FixupStaticTrampolines.
       if (is_class_initialized || !method->IsStatic() || method->IsConstructor()) {
-        if (entry_exit_stubs_installed_) {
+        new_quick_code = class_linker->GetQuickOatCodeFor(method);
+        if (NeedDebugVersionForBootImageCode(method, new_quick_code)) {
+          // Oat code should not be used. Don't install instrumentation stub and
+          // use interpreter for instrumentation.
+          new_quick_code = GetQuickToInterpreterBridge();
+        } else if (entry_exit_stubs_installed_) {
           new_quick_code = GetQuickInstrumentationEntryPoint();
-        } else {
-          new_quick_code = class_linker->GetQuickOatCodeFor(method);
         }
       } else {
         new_quick_code = GetQuickResolutionStub();
@@ -307,7 +316,9 @@
 
 // Removes the instrumentation exit pc as the return PC for every quick frame.
 static void InstrumentationRestoreStack(Thread* thread, void* arg)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+    REQUIRES(Locks::mutator_lock_) {
+  Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
+
   struct RestoreStackVisitor FINAL : public StackVisitor {
     RestoreStackVisitor(Thread* thread_in, uintptr_t instrumentation_exit_pc,
                         Instrumentation* instrumentation)
@@ -345,7 +356,7 @@
             LOG(INFO) << "  Removing exit stub in " << DescribeLocation();
           }
           if (instrumentation_frame.interpreter_entry_) {
-            CHECK(m == Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs));
+            CHECK(m == Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs));
           } else {
             CHECK(m == instrumentation_frame.method_) << PrettyMethod(m);
           }
@@ -397,146 +408,151 @@
   return (events & expected) != 0;
 }
 
+static void PotentiallyAddListenerTo(Instrumentation::InstrumentationEvent event,
+                                     uint32_t events,
+                                     std::list<InstrumentationListener*>& list,
+                                     InstrumentationListener* listener,
+                                     bool* has_listener)
+    REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::classlinker_classes_lock_) {
+  Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
+  if (!HasEvent(event, events)) {
+    return;
+  }
+  // If there is a free slot in the list, we insert the listener in that slot.
+  // Otherwise we add it to the end of the list.
+  auto it = std::find(list.begin(), list.end(), nullptr);
+  if (it != list.end()) {
+    *it = listener;
+  } else {
+    list.push_back(listener);
+  }
+  *has_listener = true;
+}
+
 void Instrumentation::AddListener(InstrumentationListener* listener, uint32_t events) {
   Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
-  if (HasEvent(kMethodEntered, events)) {
-    method_entry_listeners_.push_back(listener);
-    have_method_entry_listeners_ = true;
-  }
-  if (HasEvent(kMethodExited, events)) {
-    method_exit_listeners_.push_back(listener);
-    have_method_exit_listeners_ = true;
-  }
-  if (HasEvent(kMethodUnwind, events)) {
-    method_unwind_listeners_.push_back(listener);
-    have_method_unwind_listeners_ = true;
-  }
-  if (HasEvent(kBackwardBranch, events)) {
-    backward_branch_listeners_.push_back(listener);
-    have_backward_branch_listeners_ = true;
-  }
-  if (HasEvent(kInvokeVirtualOrInterface, events)) {
-    invoke_virtual_or_interface_listeners_.push_back(listener);
-    have_invoke_virtual_or_interface_listeners_ = true;
-  }
-  if (HasEvent(kDexPcMoved, events)) {
-    std::list<InstrumentationListener*>* modified;
-    if (have_dex_pc_listeners_) {
-      modified = new std::list<InstrumentationListener*>(*dex_pc_listeners_.get());
-    } else {
-      modified = new std::list<InstrumentationListener*>();
-    }
-    modified->push_back(listener);
-    dex_pc_listeners_.reset(modified);
-    have_dex_pc_listeners_ = true;
-  }
-  if (HasEvent(kFieldRead, events)) {
-    std::list<InstrumentationListener*>* modified;
-    if (have_field_read_listeners_) {
-      modified = new std::list<InstrumentationListener*>(*field_read_listeners_.get());
-    } else {
-      modified = new std::list<InstrumentationListener*>();
-    }
-    modified->push_back(listener);
-    field_read_listeners_.reset(modified);
-    have_field_read_listeners_ = true;
-  }
-  if (HasEvent(kFieldWritten, events)) {
-    std::list<InstrumentationListener*>* modified;
-    if (have_field_write_listeners_) {
-      modified = new std::list<InstrumentationListener*>(*field_write_listeners_.get());
-    } else {
-      modified = new std::list<InstrumentationListener*>();
-    }
-    modified->push_back(listener);
-    field_write_listeners_.reset(modified);
-    have_field_write_listeners_ = true;
-  }
-  if (HasEvent(kExceptionCaught, events)) {
-    std::list<InstrumentationListener*>* modified;
-    if (have_exception_caught_listeners_) {
-      modified = new std::list<InstrumentationListener*>(*exception_caught_listeners_.get());
-    } else {
-      modified = new std::list<InstrumentationListener*>();
-    }
-    modified->push_back(listener);
-    exception_caught_listeners_.reset(modified);
-    have_exception_caught_listeners_ = true;
-  }
+  PotentiallyAddListenerTo(kMethodEntered,
+                           events,
+                           method_entry_listeners_,
+                           listener,
+                           &have_method_entry_listeners_);
+  PotentiallyAddListenerTo(kMethodExited,
+                           events,
+                           method_exit_listeners_,
+                           listener,
+                           &have_method_exit_listeners_);
+  PotentiallyAddListenerTo(kMethodUnwind,
+                           events,
+                           method_unwind_listeners_,
+                           listener,
+                           &have_method_unwind_listeners_);
+  PotentiallyAddListenerTo(kBranch,
+                           events,
+                           branch_listeners_,
+                           listener,
+                           &have_branch_listeners_);
+  PotentiallyAddListenerTo(kInvokeVirtualOrInterface,
+                           events,
+                           invoke_virtual_or_interface_listeners_,
+                           listener,
+                           &have_invoke_virtual_or_interface_listeners_);
+  PotentiallyAddListenerTo(kDexPcMoved,
+                           events,
+                           dex_pc_listeners_,
+                           listener,
+                           &have_dex_pc_listeners_);
+  PotentiallyAddListenerTo(kFieldRead,
+                           events,
+                           field_read_listeners_,
+                           listener,
+                           &have_field_read_listeners_);
+  PotentiallyAddListenerTo(kFieldWritten,
+                           events,
+                           field_write_listeners_,
+                           listener,
+                           &have_field_write_listeners_);
+  PotentiallyAddListenerTo(kExceptionCaught,
+                           events,
+                           exception_caught_listeners_,
+                           listener,
+                           &have_exception_caught_listeners_);
   UpdateInterpreterHandlerTable();
 }
 
+static void PotentiallyRemoveListenerFrom(Instrumentation::InstrumentationEvent event,
+                                          uint32_t events,
+                                          std::list<InstrumentationListener*>& list,
+                                          InstrumentationListener* listener,
+                                          bool* has_listener)
+    REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::classlinker_classes_lock_) {
+  Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
+  if (!HasEvent(event, events)) {
+    return;
+  }
+  auto it = std::find(list.begin(), list.end(), listener);
+  if (it != list.end()) {
+    // Just update the entry, do not remove from the list. Removing entries in the list
+    // is unsafe when mutators are iterating over it.
+    *it = nullptr;
+  }
+
+  // Check if the list contains any non-null listener, and update 'has_listener'.
+  for (InstrumentationListener* l : list) {
+    if (l != nullptr) {
+      *has_listener = true;
+      return;
+    }
+  }
+  *has_listener = false;
+}
+
 void Instrumentation::RemoveListener(InstrumentationListener* listener, uint32_t events) {
   Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
-
-  if (HasEvent(kMethodEntered, events) && have_method_entry_listeners_) {
-    method_entry_listeners_.remove(listener);
-    have_method_entry_listeners_ = !method_entry_listeners_.empty();
-  }
-  if (HasEvent(kMethodExited, events) && have_method_exit_listeners_) {
-    method_exit_listeners_.remove(listener);
-    have_method_exit_listeners_ = !method_exit_listeners_.empty();
-  }
-  if (HasEvent(kMethodUnwind, events) && have_method_unwind_listeners_) {
-    method_unwind_listeners_.remove(listener);
-    have_method_unwind_listeners_ = !method_unwind_listeners_.empty();
-  }
-  if (HasEvent(kBackwardBranch, events) && have_backward_branch_listeners_) {
-    backward_branch_listeners_.remove(listener);
-    have_backward_branch_listeners_ = !backward_branch_listeners_.empty();
-  }
-  if (HasEvent(kInvokeVirtualOrInterface, events) && have_invoke_virtual_or_interface_listeners_) {
-    invoke_virtual_or_interface_listeners_.remove(listener);
-    have_invoke_virtual_or_interface_listeners_ = !invoke_virtual_or_interface_listeners_.empty();
-  }
-  if (HasEvent(kDexPcMoved, events) && have_dex_pc_listeners_) {
-    std::list<InstrumentationListener*>* modified =
-        new std::list<InstrumentationListener*>(*dex_pc_listeners_.get());
-    modified->remove(listener);
-    have_dex_pc_listeners_ = !modified->empty();
-    if (have_dex_pc_listeners_) {
-      dex_pc_listeners_.reset(modified);
-    } else {
-      dex_pc_listeners_.reset();
-      delete modified;
-    }
-  }
-  if (HasEvent(kFieldRead, events) && have_field_read_listeners_) {
-    std::list<InstrumentationListener*>* modified =
-        new std::list<InstrumentationListener*>(*field_read_listeners_.get());
-    modified->remove(listener);
-    have_field_read_listeners_ = !modified->empty();
-    if (have_field_read_listeners_) {
-      field_read_listeners_.reset(modified);
-    } else {
-      field_read_listeners_.reset();
-      delete modified;
-    }
-  }
-  if (HasEvent(kFieldWritten, events) && have_field_write_listeners_) {
-    std::list<InstrumentationListener*>* modified =
-        new std::list<InstrumentationListener*>(*field_write_listeners_.get());
-    modified->remove(listener);
-    have_field_write_listeners_ = !modified->empty();
-    if (have_field_write_listeners_) {
-      field_write_listeners_.reset(modified);
-    } else {
-      field_write_listeners_.reset();
-      delete modified;
-    }
-  }
-  if (HasEvent(kExceptionCaught, events) && have_exception_caught_listeners_) {
-    std::list<InstrumentationListener*>* modified =
-        new std::list<InstrumentationListener*>(*exception_caught_listeners_.get());
-    modified->remove(listener);
-    have_exception_caught_listeners_ = !modified->empty();
-    if (have_exception_caught_listeners_) {
-      exception_caught_listeners_.reset(modified);
-    } else {
-      exception_caught_listeners_.reset();
-      delete modified;
-    }
-  }
+  PotentiallyRemoveListenerFrom(kMethodEntered,
+                                events,
+                                method_entry_listeners_,
+                                listener,
+                                &have_method_entry_listeners_);
+  PotentiallyRemoveListenerFrom(kMethodExited,
+                                events,
+                                method_exit_listeners_,
+                                listener,
+                                &have_method_exit_listeners_);
+  PotentiallyRemoveListenerFrom(kMethodUnwind,
+                                events,
+                                method_unwind_listeners_,
+                                listener,
+                                &have_method_unwind_listeners_);
+  PotentiallyRemoveListenerFrom(kBranch,
+                                events,
+                                branch_listeners_,
+                                listener,
+                                &have_branch_listeners_);
+  PotentiallyRemoveListenerFrom(kInvokeVirtualOrInterface,
+                                events,
+                                invoke_virtual_or_interface_listeners_,
+                                listener,
+                                &have_invoke_virtual_or_interface_listeners_);
+  PotentiallyRemoveListenerFrom(kDexPcMoved,
+                                events,
+                                dex_pc_listeners_,
+                                listener,
+                                &have_dex_pc_listeners_);
+  PotentiallyRemoveListenerFrom(kFieldRead,
+                                events,
+                                field_read_listeners_,
+                                listener,
+                                &have_field_read_listeners_);
+  PotentiallyRemoveListenerFrom(kFieldWritten,
+                                events,
+                                field_write_listeners_,
+                                listener,
+                                &have_field_write_listeners_);
+  PotentiallyRemoveListenerFrom(kExceptionCaught,
+                                events,
+                                exception_caught_listeners_,
+                                listener,
+                                &have_exception_caught_listeners_);
   UpdateInterpreterHandlerTable();
 }
 
@@ -604,9 +620,11 @@
       empty = IsDeoptimizedMethodsEmpty();  // Avoid lock violation.
     }
     if (empty) {
-      instrumentation_stubs_installed_ = false;
       MutexLock mu(self, *Locks::thread_list_lock_);
       Runtime::Current()->GetThreadList()->ForEach(InstrumentationRestoreStack, this);
+      // Only do this after restoring, as walking the stack when restoring will see
+      // the instrumentation exit pc.
+      instrumentation_stubs_installed_ = false;
     }
   }
 }
@@ -625,10 +643,12 @@
     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
     SetQuickAllocEntryPointsInstrumented(instrumented);
     ResetQuickAllocEntryPoints();
+    alloc_entrypoints_instrumented_ = instrumented;
   } else {
     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
     SetQuickAllocEntryPointsInstrumented(instrumented);
     ResetQuickAllocEntryPoints();
+    alloc_entrypoints_instrumented_ = instrumented;
   }
 }
 
@@ -667,8 +687,7 @@
   }
 }
 
-void Instrumentation::UpdateMethodsCode(ArtMethod* method, const void* quick_code) {
-  DCHECK(method->GetDeclaringClass()->IsResolved());
+void Instrumentation::UpdateMethodsCodeImpl(ArtMethod* method, const void* quick_code) {
   const void* new_quick_code;
   if (LIKELY(!instrumentation_stubs_installed_)) {
     new_quick_code = quick_code;
@@ -690,6 +709,18 @@
   UpdateEntrypoints(method, new_quick_code);
 }
 
+void Instrumentation::UpdateMethodsCode(ArtMethod* method, const void* quick_code) {
+  DCHECK(method->GetDeclaringClass()->IsResolved());
+  UpdateMethodsCodeImpl(method, quick_code);
+}
+
+void Instrumentation::UpdateMethodsCodeFromDebugger(ArtMethod* method, const void* quick_code) {
+  // When debugger attaches, we may update the entry points of all methods of a class
+  // to the interpreter bridge. A method's declaring class might not be in resolved
+  // state yet in that case.
+  UpdateMethodsCodeImpl(method, quick_code);
+}
+
 bool Instrumentation::AddDeoptimizedMethod(ArtMethod* method) {
   if (IsDeoptimizedMethod(method)) {
     // Already in the map. Return.
@@ -728,7 +759,7 @@
 void Instrumentation::Deoptimize(ArtMethod* method) {
   CHECK(!method->IsNative());
   CHECK(!method->IsProxyMethod());
-  CHECK(!method->IsAbstract());
+  CHECK(method->IsInvokable());
 
   Thread* self = Thread::Current();
   {
@@ -751,7 +782,7 @@
 void Instrumentation::Undeoptimize(ArtMethod* method) {
   CHECK(!method->IsNative());
   CHECK(!method->IsProxyMethod());
-  CHECK(!method->IsAbstract());
+  CHECK(method->IsInvokable());
 
   Thread* self = Thread::Current();
   bool empty;
@@ -772,6 +803,9 @@
       UpdateEntrypoints(method, GetQuickResolutionStub());
     } else {
       const void* quick_code = class_linker->GetQuickOatCodeFor(method);
+      if (NeedDebugVersionForBootImageCode(method, quick_code)) {
+        quick_code = GetQuickToInterpreterBridge();
+      }
       UpdateEntrypoints(method, quick_code);
     }
 
@@ -852,7 +886,7 @@
   ConfigureStubs(key, InstrumentationLevel::kInstrumentNothing);
 }
 
-const void* Instrumentation::GetQuickCodeFor(ArtMethod* method, size_t pointer_size) const {
+const void* Instrumentation::GetQuickCodeFor(ArtMethod* method, PointerSize pointer_size) const {
   Runtime* runtime = Runtime::Current();
   if (LIKELY(!instrumentation_stubs_installed_)) {
     const void* code = method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
@@ -871,28 +905,24 @@
 void Instrumentation::MethodEnterEventImpl(Thread* thread, mirror::Object* this_object,
                                            ArtMethod* method,
                                            uint32_t dex_pc) const {
-  auto it = method_entry_listeners_.begin();
-  bool is_end = (it == method_entry_listeners_.end());
-  // Implemented this way to prevent problems caused by modification of the list while iterating.
-  while (!is_end) {
-    InstrumentationListener* cur = *it;
-    ++it;
-    is_end = (it == method_entry_listeners_.end());
-    cur->MethodEntered(thread, this_object, method, dex_pc);
+  if (HasMethodEntryListeners()) {
+    for (InstrumentationListener* listener : method_entry_listeners_) {
+      if (listener != nullptr) {
+        listener->MethodEntered(thread, this_object, method, dex_pc);
+      }
+    }
   }
 }
 
 void Instrumentation::MethodExitEventImpl(Thread* thread, mirror::Object* this_object,
                                           ArtMethod* method,
                                           uint32_t dex_pc, const JValue& return_value) const {
-  auto it = method_exit_listeners_.begin();
-  bool is_end = (it == method_exit_listeners_.end());
-  // Implemented this way to prevent problems caused by modification of the list while iterating.
-  while (!is_end) {
-    InstrumentationListener* cur = *it;
-    ++it;
-    is_end = (it == method_exit_listeners_.end());
-    cur->MethodExited(thread, this_object, method, dex_pc, return_value);
+  if (HasMethodExitListeners()) {
+    for (InstrumentationListener* listener : method_exit_listeners_) {
+      if (listener != nullptr) {
+        listener->MethodExited(thread, this_object, method, dex_pc, return_value);
+      }
+    }
   }
 }
 
@@ -901,7 +931,9 @@
                                         uint32_t dex_pc) const {
   if (HasMethodUnwindListeners()) {
     for (InstrumentationListener* listener : method_unwind_listeners_) {
-      listener->MethodUnwind(thread, this_object, method, dex_pc);
+      if (listener != nullptr) {
+        listener->MethodUnwind(thread, this_object, method, dex_pc);
+      }
     }
   }
 }
@@ -909,16 +941,21 @@
 void Instrumentation::DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
                                           ArtMethod* method,
                                           uint32_t dex_pc) const {
-  std::shared_ptr<std::list<InstrumentationListener*>> original(dex_pc_listeners_);
-  for (InstrumentationListener* listener : *original.get()) {
-    listener->DexPcMoved(thread, this_object, method, dex_pc);
+  for (InstrumentationListener* listener : dex_pc_listeners_) {
+    if (listener != nullptr) {
+      listener->DexPcMoved(thread, this_object, method, dex_pc);
+    }
   }
 }
 
-void Instrumentation::BackwardBranchImpl(Thread* thread, ArtMethod* method,
-                                         int32_t offset) const {
-  for (InstrumentationListener* listener : backward_branch_listeners_) {
-    listener->BackwardBranch(thread, method, offset);
+void Instrumentation::BranchImpl(Thread* thread,
+                                 ArtMethod* method,
+                                 uint32_t dex_pc,
+                                 int32_t offset) const {
+  for (InstrumentationListener* listener : branch_listeners_) {
+    if (listener != nullptr) {
+      listener->Branch(thread, method, dex_pc, offset);
+    }
   }
 }
 
@@ -927,26 +964,33 @@
                                                    ArtMethod* caller,
                                                    uint32_t dex_pc,
                                                    ArtMethod* callee) const {
+  // We cannot have thread suspension since that would cause the this_object parameter to
+  // potentially become a dangling pointer. An alternative could be to put it in a handle instead.
+  ScopedAssertNoThreadSuspension ants(thread, __FUNCTION__);
   for (InstrumentationListener* listener : invoke_virtual_or_interface_listeners_) {
-    listener->InvokeVirtualOrInterface(thread, this_object, caller, dex_pc, callee);
+    if (listener != nullptr) {
+      listener->InvokeVirtualOrInterface(thread, this_object, caller, dex_pc, callee);
+    }
   }
 }
 
 void Instrumentation::FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
                                          ArtMethod* method, uint32_t dex_pc,
                                          ArtField* field) const {
-  std::shared_ptr<std::list<InstrumentationListener*>> original(field_read_listeners_);
-  for (InstrumentationListener* listener : *original.get()) {
-    listener->FieldRead(thread, this_object, method, dex_pc, field);
+  for (InstrumentationListener* listener : field_read_listeners_) {
+    if (listener != nullptr) {
+      listener->FieldRead(thread, this_object, method, dex_pc, field);
+    }
   }
 }
 
 void Instrumentation::FieldWriteEventImpl(Thread* thread, mirror::Object* this_object,
                                          ArtMethod* method, uint32_t dex_pc,
                                          ArtField* field, const JValue& field_value) const {
-  std::shared_ptr<std::list<InstrumentationListener*>> original(field_write_listeners_);
-  for (InstrumentationListener* listener : *original.get()) {
-    listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
+  for (InstrumentationListener* listener : field_write_listeners_) {
+    if (listener != nullptr) {
+      listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
+    }
   }
 }
 
@@ -955,9 +999,10 @@
   if (HasExceptionCaughtListeners()) {
     DCHECK_EQ(thread->GetException(), exception_object);
     thread->ClearException();
-    std::shared_ptr<std::list<InstrumentationListener*>> original(exception_caught_listeners_);
-    for (InstrumentationListener* listener : *original.get()) {
-      listener->ExceptionCaught(thread, exception_object);
+    for (InstrumentationListener* listener : exception_caught_listeners_) {
+      if (listener != nullptr) {
+        listener->ExceptionCaught(thread, exception_object);
+      }
     }
     thread->SetException(exception_object);
   }
@@ -1018,7 +1063,7 @@
 
   ArtMethod* method = instrumentation_frame.method_;
   uint32_t length;
-  const size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  const PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   char return_shorty = method->GetInterfaceMethodIfProxy(pointer_size)->GetShorty(&length)[0];
   JValue return_value;
   if (return_shorty == 'V') {
@@ -1043,14 +1088,16 @@
   bool deoptimize = (visitor.caller != nullptr) &&
                     (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) ||
                     Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller));
-  if (deoptimize) {
+  if (deoptimize && Runtime::Current()->IsDeoptimizeable(*return_pc)) {
     if (kVerboseInstrumentation) {
       LOG(INFO) << StringPrintf("Deoptimizing %s by returning from %s with result %#" PRIx64 " in ",
                                 PrettyMethod(visitor.caller).c_str(),
                                 PrettyMethod(method).c_str(),
                                 return_value.GetJ()) << *self;
     }
-    self->PushDeoptimizationContext(return_value, return_shorty == 'L',
+    self->PushDeoptimizationContext(return_value,
+                                    return_shorty == 'L',
+                                    false /* from_code */,
                                     nullptr /* no pending exception */);
     return GetTwoWordSuccessValue(*return_pc,
                                   reinterpret_cast<uintptr_t>(GetQuickDeoptimizationEntryPoint()));
@@ -1063,7 +1110,7 @@
   }
 }
 
-void Instrumentation::PopMethodForUnwind(Thread* self, bool is_deoptimization) const {
+uintptr_t Instrumentation::PopMethodForUnwind(Thread* self, bool is_deoptimization) const {
   // Do the pop.
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   CHECK_GT(stack->size(), 0U);
@@ -1087,6 +1134,7 @@
     uint32_t dex_pc = DexFile::kDexNoIndex;
     MethodUnwindEvent(self, instrumentation_frame.this_object_, method, dex_pc);
   }
+  return instrumentation_frame.return_pc_;
 }
 
 std::string InstrumentationStackFrame::Dump() const {
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 8dd2357..757be8e 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -22,6 +22,7 @@
 #include <unordered_set>
 
 #include "arch/instruction_set.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc_root.h"
@@ -94,8 +95,11 @@
   virtual void ExceptionCaught(Thread* thread, mirror::Throwable* exception_object)
       SHARED_REQUIRES(Locks::mutator_lock_) = 0;
 
-  // Call-back for when we get a backward branch.
-  virtual void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  // Call-back for when we execute a branch.
+  virtual void Branch(Thread* thread,
+                      ArtMethod* method,
+                      uint32_t dex_pc,
+                      int32_t dex_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) = 0;
 
   // Call-back for when we get an invokevirtual or an invokeinterface.
@@ -104,6 +108,7 @@
                                         ArtMethod* caller,
                                         uint32_t dex_pc,
                                         ArtMethod* callee)
+      REQUIRES(Roles::uninterruptible_)
       SHARED_REQUIRES(Locks::mutator_lock_) = 0;
 };
 
@@ -121,7 +126,7 @@
     kFieldRead = 0x10,
     kFieldWritten = 0x20,
     kExceptionCaught = 0x40,
-    kBackwardBranch = 0x80,
+    kBranch = 0x80,
     kInvokeVirtualOrInterface = 0x100,
   };
 
@@ -223,10 +228,14 @@
   void UpdateMethodsCode(ArtMethod* method, const void* quick_code)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!deoptimized_methods_lock_);
 
+  // Update the code of a method respecting any installed stubs from debugger.
+  void UpdateMethodsCodeFromDebugger(ArtMethod* method, const void* quick_code)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!deoptimized_methods_lock_);
+
   // Get the quick code for the given method. More efficient than asking the class linker as it
   // will short-cut to GetCode if instrumentation and static method resolution stubs aren't
   // installed.
-  const void* GetQuickCodeFor(ArtMethod* method, size_t pointer_size) const
+  const void* GetQuickCodeFor(ArtMethod* method, PointerSize pointer_size) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ForceInterpretOnly() {
@@ -243,6 +252,11 @@
     return forced_interpret_only_;
   }
 
+  // Code is in boot image oat file which isn't compiled as debuggable.
+  // Need debug version (interpreter or jitted) if that's the case.
+  bool NeedDebugVersionForBootImageCode(ArtMethod* method, const void* code) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   bool AreExitStubsInstalled() const {
     return instrumentation_stubs_installed_;
   }
@@ -275,8 +289,8 @@
     return have_exception_caught_listeners_;
   }
 
-  bool HasBackwardBranchListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return have_backward_branch_listeners_;
+  bool HasBranchListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return have_branch_listeners_;
   }
 
   bool HasInvokeVirtualOrInterfaceListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -286,7 +300,16 @@
   bool IsActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
     return have_dex_pc_listeners_ || have_method_entry_listeners_ || have_method_exit_listeners_ ||
         have_field_read_listeners_ || have_field_write_listeners_ ||
-        have_exception_caught_listeners_ || have_method_unwind_listeners_;
+        have_exception_caught_listeners_ || have_method_unwind_listeners_ ||
+        have_branch_listeners_ || have_invoke_virtual_or_interface_listeners_;
+  }
+
+  // Any instrumentation *other* than what is needed for Jit profiling active?
+  bool NonJitProfilingActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return have_dex_pc_listeners_ || have_method_exit_listeners_ ||
+        have_field_read_listeners_ || have_field_write_listeners_ ||
+        have_exception_caught_listeners_ || have_method_unwind_listeners_ ||
+        have_branch_listeners_;
   }
 
   // Inform listeners that a method has been entered. A dex PC is provided as we may install
@@ -323,11 +346,11 @@
     }
   }
 
-  // Inform listeners that a backward branch has been taken (only supported by the interpreter).
-  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t offset) const
+  // Inform listeners that a branch has been taken (only supported by the interpreter).
+  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t offset) const
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (UNLIKELY(HasBackwardBranchListeners())) {
-      BackwardBranchImpl(thread, method, offset);
+    if (UNLIKELY(HasBranchListeners())) {
+      BranchImpl(thread, method, dex_pc, offset);
     }
   }
 
@@ -380,7 +403,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!deoptimized_methods_lock_);
 
   // Pops an instrumentation frame from the current thread and generate an unwind event.
-  void PopMethodForUnwind(Thread* self, bool is_deoptimization) const
+  // Returns the return pc for the instrumentation frame that's popped.
+  uintptr_t PopMethodForUnwind(Thread* self, bool is_deoptimization) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Call back for configure stubs.
@@ -402,6 +426,12 @@
                                size_t inlined_frames_before_frame)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Does not hold lock, used to check if someone changed from not instrumented to instrumented
+  // during a GC suspend point.
+  bool AllocEntrypointsInstrumented() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return alloc_entrypoints_instrumented_;
+  }
+
  private:
   InstrumentationLevel GetCurrentInstrumentationLevel() const;
 
@@ -417,6 +447,13 @@
                !Locks::classlinker_classes_lock_);
 
   void UpdateInterpreterHandlerTable() REQUIRES(Locks::mutator_lock_) {
+    /*
+     * TUNING: Dalvik's mterp stashes the actual current handler table base in a
+     * tls field.  For Arm, this enables all suspend, debug & tracing checks to be
+     * collapsed into a single conditionally-executed ldw instruction.
+     * Move to Dalvik-style handler-table management for both the goto interpreter and
+     * mterp.
+     */
     interpreter_handler_table_ = IsActive() ? kAlternativeHandlerTable : kMainHandlerTable;
   }
 
@@ -434,7 +471,7 @@
   void DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
                            ArtMethod* method, uint32_t dex_pc) const
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void BackwardBranchImpl(Thread* thread, ArtMethod* method, int32_t offset) const
+  void BranchImpl(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t offset) const
       SHARED_REQUIRES(Locks::mutator_lock_);
   void InvokeVirtualOrInterfaceImpl(Thread* thread,
                                     mirror::Object* this_object,
@@ -462,6 +499,9 @@
       SHARED_REQUIRES(Locks::mutator_lock_, deoptimized_methods_lock_);
   bool IsDeoptimizedMethodsEmpty() const
       SHARED_REQUIRES(Locks::mutator_lock_, deoptimized_methods_lock_);
+  void UpdateMethodsCodeImpl(ArtMethod* method, const void* quick_code)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!deoptimized_methods_lock_);
+
 
   // Have we hijacked ArtMethod::code_ so that it calls instrumentation/interpreter code?
   bool instrumentation_stubs_installed_;
@@ -505,8 +545,8 @@
   // Do we have any exception caught listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_exception_caught_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
-  // Do we have any backward branch listeners? Short-cut to avoid taking the instrumentation_lock_.
-  bool have_backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  // Do we have any branch listeners? Short-cut to avoid taking the instrumentation_lock_.
+  bool have_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
   // Do we have any invoke listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_invoke_virtual_or_interface_listeners_ GUARDED_BY(Locks::mutator_lock_);
@@ -517,20 +557,25 @@
   InstrumentationLevelTable requested_instrumentation_levels_ GUARDED_BY(Locks::mutator_lock_);
 
   // The event listeners, written to with the mutator_lock_ exclusively held.
+  // Mutators must be able to iterate over these lists concurrently, that is, with listeners being
+  // added or removed while iterating. The modifying thread holds exclusive lock,
+  // so other threads cannot iterate (i.e. read the data of the list) at the same time but they
+  // do keep iterators that need to remain valid. This is the reason these listeners are std::list
+  // and not for example std::vector: the existing storage for a std::list does not move.
+  // Note that mutators cannot make a copy of these lists before iterating, as the instrumentation
+  // listeners can also be deleted concurrently.
+  // As a result, these lists are never trimmed. That's acceptable given the low number of
+  // listeners we have.
   std::list<InstrumentationListener*> method_entry_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_exit_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_unwind_listeners_ GUARDED_BY(Locks::mutator_lock_);
-  std::list<InstrumentationListener*> backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> invoke_virtual_or_interface_listeners_
       GUARDED_BY(Locks::mutator_lock_);
-  std::shared_ptr<std::list<InstrumentationListener*>> dex_pc_listeners_
-      GUARDED_BY(Locks::mutator_lock_);
-  std::shared_ptr<std::list<InstrumentationListener*>> field_read_listeners_
-      GUARDED_BY(Locks::mutator_lock_);
-  std::shared_ptr<std::list<InstrumentationListener*>> field_write_listeners_
-      GUARDED_BY(Locks::mutator_lock_);
-  std::shared_ptr<std::list<InstrumentationListener*>> exception_caught_listeners_
-      GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> dex_pc_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> field_read_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> field_write_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> exception_caught_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
   // The set of methods being deoptimized (by the debugger) which must be executed with interpreter
   // only.
@@ -543,8 +588,12 @@
   InterpreterHandlerTable interpreter_handler_table_ GUARDED_BY(Locks::mutator_lock_);
 
   // Greater than 0 if quick alloc entry points instrumented.
-  size_t quick_alloc_entry_points_instrumentation_counter_
-      GUARDED_BY(Locks::instrument_entrypoints_lock_);
+  size_t quick_alloc_entry_points_instrumentation_counter_;
+
+  // alloc_entrypoints_instrumented_ is only updated with all the threads suspended, this is done
+  // to prevent races with the GC where the GC relies on thread suspension only see
+  // alloc_entrypoints_instrumented_ change during suspend points.
+  bool alloc_entrypoints_instrumented_;
 
   friend class InstrumentationTest;  // For GetCurrentInstrumentationLevel and ConfigureStubs.
 
diff --git a/runtime/instrumentation_test.cc b/runtime/instrumentation_test.cc
index e4688a2..684c471 100644
--- a/runtime/instrumentation_test.cc
+++ b/runtime/instrumentation_test.cc
@@ -16,6 +16,7 @@
 
 #include "instrumentation.h"
 
+#include "base/enums.h"
 #include "common_runtime_test.h"
 #include "common_throws.h"
 #include "class_linker-inl.h"
@@ -37,7 +38,7 @@
     : received_method_enter_event(false), received_method_exit_event(false),
       received_method_unwind_event(false), received_dex_pc_moved_event(false),
       received_field_read_event(false), received_field_written_event(false),
-      received_exception_caught_event(false), received_backward_branch_event(false),
+      received_exception_caught_event(false), received_branch_event(false),
       received_invoke_virtual_or_interface_event(false) {}
 
   virtual ~TestInstrumentationListener() {}
@@ -100,11 +101,12 @@
     received_exception_caught_event = true;
   }
 
-  void BackwardBranch(Thread* thread ATTRIBUTE_UNUSED,
-                      ArtMethod* method ATTRIBUTE_UNUSED,
-                      int32_t dex_pc_offset ATTRIBUTE_UNUSED)
+  void Branch(Thread* thread ATTRIBUTE_UNUSED,
+              ArtMethod* method ATTRIBUTE_UNUSED,
+              uint32_t dex_pc ATTRIBUTE_UNUSED,
+              int32_t dex_pc_offset ATTRIBUTE_UNUSED)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    received_backward_branch_event = true;
+    received_branch_event = true;
   }
 
   void InvokeVirtualOrInterface(Thread* thread ATTRIBUTE_UNUSED,
@@ -124,7 +126,7 @@
     received_field_read_event = false;
     received_field_written_event = false;
     received_exception_caught_event = false;
-    received_backward_branch_event = false;
+    received_branch_event = false;
     received_invoke_virtual_or_interface_event = false;
   }
 
@@ -135,7 +137,7 @@
   bool received_field_read_event;
   bool received_field_written_event;
   bool received_exception_caught_event;
-  bool received_backward_branch_event;
+  bool received_branch_event;
   bool received_invoke_virtual_or_interface_event;
 
  private:
@@ -305,8 +307,8 @@
         return instr->HasFieldWriteListeners();
       case instrumentation::Instrumentation::kExceptionCaught:
         return instr->HasExceptionCaughtListeners();
-      case instrumentation::Instrumentation::kBackwardBranch:
-        return instr->HasBackwardBranchListeners();
+      case instrumentation::Instrumentation::kBranch:
+        return instr->HasBranchListeners();
       case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
         return instr->HasInvokeVirtualOrInterfaceListeners();
       default:
@@ -349,8 +351,8 @@
         self->ClearException();
         break;
       }
-      case instrumentation::Instrumentation::kBackwardBranch:
-        instr->BackwardBranch(self, method, dex_pc);
+      case instrumentation::Instrumentation::kBranch:
+        instr->Branch(self, method, dex_pc, -1);
         break;
       case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
         instr->InvokeVirtualOrInterface(self, obj, method, dex_pc, method);
@@ -378,8 +380,8 @@
         return listener.received_field_written_event;
       case instrumentation::Instrumentation::kExceptionCaught:
         return listener.received_exception_caught_event;
-      case instrumentation::Instrumentation::kBackwardBranch:
-        return listener.received_backward_branch_event;
+      case instrumentation::Instrumentation::kBranch:
+        return listener.received_branch_event;
       case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
         return listener.received_invoke_virtual_or_interface_event;
       default:
@@ -441,8 +443,8 @@
   TestEvent(instrumentation::Instrumentation::kExceptionCaught);
 }
 
-TEST_F(InstrumentationTest, BackwardBranchEvent) {
-  TestEvent(instrumentation::Instrumentation::kBackwardBranch);
+TEST_F(InstrumentationTest, BranchEvent) {
+  TestEvent(instrumentation::Instrumentation::kBranch);
 }
 
 TEST_F(InstrumentationTest, InvokeVirtualOrInterfaceEvent) {
@@ -460,7 +462,7 @@
   mirror::Class* klass = class_linker->FindClass(soa.Self(), "LInstrumentation;", loader);
   ASSERT_TRUE(klass != nullptr);
   ArtMethod* method_to_deoptimize = klass->FindDeclaredDirectMethod("instanceMethod", "()V",
-                                                                    sizeof(void*));
+                                                                    kRuntimePointerSize);
   ASSERT_TRUE(method_to_deoptimize != nullptr);
 
   EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
@@ -507,7 +509,7 @@
   mirror::Class* klass = class_linker->FindClass(soa.Self(), "LInstrumentation;", loader);
   ASSERT_TRUE(klass != nullptr);
   ArtMethod* method_to_deoptimize = klass->FindDeclaredDirectMethod("instanceMethod", "()V",
-                                                                    sizeof(void*));
+                                                                    kRuntimePointerSize);
   ASSERT_TRUE(method_to_deoptimize != nullptr);
 
   EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index f4658d5..1940d67 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -22,6 +22,7 @@
 #include "gc/collector/garbage_collector.h"
 #include "gc/space/image_space.h"
 #include "gc/weak_root_state.h"
+#include "image-inl.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
@@ -32,7 +33,8 @@
 namespace art {
 
 InternTable::InternTable()
-    : image_added_to_intern_table_(false), log_new_roots_(false),
+    : images_added_to_intern_table_(false),
+      log_new_roots_(false),
       weak_intern_condition_("New intern condition", *Locks::intern_table_lock_),
       weak_root_state_(gc::kWeakRootStateNormal) {
 }
@@ -85,18 +87,39 @@
   // Note: we deliberately don't visit the weak_interns_ table and the immutable image roots.
 }
 
-mirror::String* InternTable::LookupStrong(mirror::String* s) {
-  return strong_interns_.Find(s);
+mirror::String* InternTable::LookupWeak(Thread* self, mirror::String* s) {
+  MutexLock mu(self, *Locks::intern_table_lock_);
+  return LookupWeakLocked(s);
 }
 
-mirror::String* InternTable::LookupWeak(mirror::String* s) {
+mirror::String* InternTable::LookupStrong(Thread* self, mirror::String* s) {
+  MutexLock mu(self, *Locks::intern_table_lock_);
+  return LookupStrongLocked(s);
+}
+
+mirror::String* InternTable::LookupStrong(Thread* self,
+                                          uint32_t utf16_length,
+                                          const char* utf8_data) {
+  DCHECK_EQ(utf16_length, CountModifiedUtf8Chars(utf8_data));
+  Utf8String string(utf16_length,
+                    utf8_data,
+                    ComputeUtf16HashFromModifiedUtf8(utf8_data, utf16_length));
+  MutexLock mu(self, *Locks::intern_table_lock_);
+  return strong_interns_.Find(string);
+}
+
+mirror::String* InternTable::LookupWeakLocked(mirror::String* s) {
   return weak_interns_.Find(s);
 }
 
-void InternTable::SwapPostZygoteWithPreZygote() {
+mirror::String* InternTable::LookupStrongLocked(mirror::String* s) {
+  return strong_interns_.Find(s);
+}
+
+void InternTable::AddNewTable() {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  weak_interns_.SwapPostZygoteWithPreZygote();
-  strong_interns_.SwapPostZygoteWithPreZygote();
+  weak_interns_.AddNewTable();
+  strong_interns_.AddNewTable();
 }
 
 mirror::String* InternTable::InsertStrong(mirror::String* s) {
@@ -150,15 +173,14 @@
   RemoveWeak(s);
 }
 
-void InternTable::AddImageStringsToTable(gc::space::ImageSpace* image_space) {
-  CHECK(image_space != nullptr);
+void InternTable::AddImagesStringsToTable(const std::vector<gc::space::ImageSpace*>& image_spaces) {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  if (!image_added_to_intern_table_) {
+  for (gc::space::ImageSpace* image_space : image_spaces) {
     const ImageHeader* const header = &image_space->GetImageHeader();
     // Check if we have the interned strings section.
     const ImageSection& section = header->GetImageSection(ImageHeader::kSectionInternedStrings);
     if (section.Size() > 0) {
-      ReadFromMemoryLocked(image_space->Begin() + section.Offset());
+      AddTableFromMemoryLocked(image_space->Begin() + section.Offset());
     } else {
       // TODO: Delete this logic?
       mirror::Object* root = header->GetImageRoot(ImageHeader::kDexCaches);
@@ -169,7 +191,7 @@
         for (size_t j = 0; j < num_strings; ++j) {
           mirror::String* image_string = dex_cache->GetResolvedString(j);
           if (image_string != nullptr) {
-            mirror::String* found = LookupStrong(image_string);
+            mirror::String* found = LookupStrongLocked(image_string);
             if (found == nullptr) {
               InsertStrong(image_string);
             } else {
@@ -179,32 +201,33 @@
         }
       }
     }
-    image_added_to_intern_table_ = true;
   }
+  images_added_to_intern_table_ = true;
 }
 
 mirror::String* InternTable::LookupStringFromImage(mirror::String* s) {
-  if (image_added_to_intern_table_) {
-    return nullptr;
-  }
-  gc::space::ImageSpace* image = Runtime::Current()->GetHeap()->GetImageSpace();
-  if (image == nullptr) {
+  DCHECK(!images_added_to_intern_table_);
+  const std::vector<gc::space::ImageSpace*>& image_spaces =
+      Runtime::Current()->GetHeap()->GetBootImageSpaces();
+  if (image_spaces.empty()) {
     return nullptr;  // No image present.
   }
-  mirror::Object* root = image->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
-  mirror::ObjectArray<mirror::DexCache>* dex_caches = root->AsObjectArray<mirror::DexCache>();
   const std::string utf8 = s->ToModifiedUtf8();
-  for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
-    mirror::DexCache* dex_cache = dex_caches->Get(i);
-    const DexFile* dex_file = dex_cache->GetDexFile();
-    // Binary search the dex file for the string index.
-    const DexFile::StringId* string_id = dex_file->FindStringId(utf8.c_str());
-    if (string_id != nullptr) {
-      uint32_t string_idx = dex_file->GetIndexForStringId(*string_id);
-      // GetResolvedString() contains a RB.
-      mirror::String* image_string = dex_cache->GetResolvedString(string_idx);
-      if (image_string != nullptr) {
-        return image_string;
+  for (gc::space::ImageSpace* image_space : image_spaces) {
+    mirror::Object* root = image_space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
+    mirror::ObjectArray<mirror::DexCache>* dex_caches = root->AsObjectArray<mirror::DexCache>();
+    for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
+      mirror::DexCache* dex_cache = dex_caches->Get(i);
+      const DexFile* dex_file = dex_cache->GetDexFile();
+      // Binary search the dex file for the string index.
+      const DexFile::StringId* string_id = dex_file->FindStringId(utf8.c_str());
+      if (string_id != nullptr) {
+        uint32_t string_idx = dex_file->GetIndexForStringId(*string_id);
+        // GetResolvedString() contains a RB.
+        mirror::String* image_string = dex_cache->GetResolvedString(string_idx);
+        if (image_string != nullptr) {
+          return image_string;
+        }
       }
     }
   }
@@ -249,7 +272,7 @@
       }
     }
     // Check the strong table for a match.
-    mirror::String* strong = LookupStrong(s);
+    mirror::String* strong = LookupStrongLocked(s);
     if (strong != nullptr) {
       return strong;
     }
@@ -271,7 +294,7 @@
     CHECK(self->GetWeakRefAccessEnabled());
   }
   // There is no match in the strong table, check the weak table.
-  mirror::String* weak = LookupWeak(s);
+  mirror::String* weak = LookupWeakLocked(s);
   if (weak != nullptr) {
     if (is_strong) {
       // A match was found in the weak table. Promote to the strong table.
@@ -281,9 +304,11 @@
     return weak;
   }
   // Check the image for a match.
-  mirror::String* image = LookupStringFromImage(s);
-  if (image != nullptr) {
-    return is_strong ? InsertStrong(image) : InsertWeak(image);
+  if (!images_added_to_intern_table_) {
+    mirror::String* const image_string = LookupStringFromImage(s);
+    if (image_string != nullptr) {
+      return is_strong ? InsertStrong(image_string) : InsertWeak(image_string);
+    }
   }
   // No match in the strong table or the weak table. Insert into the strong / weak table.
   return is_strong ? InsertStrong(s) : InsertWeak(s);
@@ -314,8 +339,7 @@
 }
 
 bool InternTable::ContainsWeak(mirror::String* s) {
-  MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  return LookupWeak(s) == s;
+  return LookupWeak(Thread::Current(), s) == s;
 }
 
 void InternTable::SweepInternTableWeaks(IsMarkedVisitor* visitor) {
@@ -323,27 +347,18 @@
   weak_interns_.SweepWeaks(visitor);
 }
 
-void InternTable::AddImageInternTable(gc::space::ImageSpace* image_space) {
-  const ImageSection& intern_section = image_space->GetImageHeader().GetImageSection(
-      ImageHeader::kSectionInternedStrings);
-  // Read the string tables from the image.
-  const uint8_t* ptr = image_space->Begin() + intern_section.Offset();
-  const size_t offset = ReadFromMemory(ptr);
-  CHECK_LE(offset, intern_section.Size());
-}
-
-size_t InternTable::ReadFromMemory(const uint8_t* ptr) {
+size_t InternTable::AddTableFromMemory(const uint8_t* ptr) {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  return ReadFromMemoryLocked(ptr);
+  return AddTableFromMemoryLocked(ptr);
 }
 
-size_t InternTable::ReadFromMemoryLocked(const uint8_t* ptr) {
-  return strong_interns_.ReadIntoPreZygoteTable(ptr);
+size_t InternTable::AddTableFromMemoryLocked(const uint8_t* ptr) {
+  return strong_interns_.AddTableFromMemory(ptr);
 }
 
 size_t InternTable::WriteToMemory(uint8_t* ptr) {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  return strong_interns_.WriteFromPostZygoteTable(ptr);
+  return strong_interns_.WriteToMemory(ptr);
 }
 
 std::size_t InternTable::StringHashEquals::operator()(const GcRoot<mirror::String>& root) const {
@@ -361,71 +376,131 @@
   return a.Read()->Equals(b.Read());
 }
 
-size_t InternTable::Table::ReadIntoPreZygoteTable(const uint8_t* ptr) {
-  CHECK_EQ(pre_zygote_table_.Size(), 0u);
+bool InternTable::StringHashEquals::operator()(const GcRoot<mirror::String>& a,
+                                               const Utf8String& b) const {
+  if (kIsDebugBuild) {
+    Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+  }
+  mirror::String* a_string = a.Read();
+  uint32_t a_length = static_cast<uint32_t>(a_string->GetLength());
+  if (a_length != b.GetUtf16Length()) {
+    return false;
+  }
+  if (a_string->IsCompressed()) {
+    size_t b_byte_count = strlen(b.GetUtf8Data());
+    size_t b_utf8_length = CountModifiedUtf8Chars(b.GetUtf8Data(), b_byte_count);
+    // Modified UTF-8 single byte character range is 0x01 .. 0x7f
+    // The string compression occurs on regular ASCII with same exact range,
+    // not on extended ASCII which up to 0xff
+    const bool is_b_regular_ascii = (b_byte_count == b_utf8_length);
+    if (is_b_regular_ascii) {
+      return memcmp(b.GetUtf8Data(),
+                    a_string->GetValueCompressed(), a_length * sizeof(uint8_t)) == 0;
+    } else {
+      return false;
+    }
+  } else {
+    const uint16_t* a_value = a_string->GetValue();
+    return CompareModifiedUtf8ToUtf16AsCodePointValues(b.GetUtf8Data(), a_value, a_length) == 0;
+  }
+}
+
+size_t InternTable::Table::AddTableFromMemory(const uint8_t* ptr) {
   size_t read_count = 0;
-  pre_zygote_table_ = UnorderedSet(ptr, false /* make copy */, &read_count);
+  UnorderedSet set(ptr, /*make copy*/false, &read_count);
+  if (set.Empty()) {
+    // Avoid inserting empty sets.
+    return read_count;
+  }
+  // TODO: Disable this for app images if app images have intern tables.
+  static constexpr bool kCheckDuplicates = true;
+  if (kCheckDuplicates) {
+    for (GcRoot<mirror::String>& string : set) {
+      CHECK(Find(string.Read()) == nullptr) << "Already found " << string.Read()->ToModifiedUtf8();
+    }
+  }
+  // Insert at the front since we add new interns into the back.
+  tables_.insert(tables_.begin(), std::move(set));
   return read_count;
 }
 
-size_t InternTable::Table::WriteFromPostZygoteTable(uint8_t* ptr) {
-  return post_zygote_table_.WriteToMemory(ptr);
+size_t InternTable::Table::WriteToMemory(uint8_t* ptr) {
+  if (tables_.empty()) {
+    return 0;
+  }
+  UnorderedSet* table_to_write;
+  UnorderedSet combined;
+  if (tables_.size() > 1) {
+    table_to_write = &combined;
+    for (UnorderedSet& table : tables_) {
+      for (GcRoot<mirror::String>& string : table) {
+        combined.Insert(string);
+      }
+    }
+  } else {
+    table_to_write = &tables_.back();
+  }
+  return table_to_write->WriteToMemory(ptr);
 }
 
 void InternTable::Table::Remove(mirror::String* s) {
-  auto it = post_zygote_table_.Find(GcRoot<mirror::String>(s));
-  if (it != post_zygote_table_.end()) {
-    post_zygote_table_.Erase(it);
-  } else {
-    it = pre_zygote_table_.Find(GcRoot<mirror::String>(s));
-    DCHECK(it != pre_zygote_table_.end());
-    pre_zygote_table_.Erase(it);
+  for (UnorderedSet& table : tables_) {
+    auto it = table.Find(GcRoot<mirror::String>(s));
+    if (it != table.end()) {
+      table.Erase(it);
+      return;
+    }
   }
+  LOG(FATAL) << "Attempting to remove non-interned string " << s->ToModifiedUtf8();
 }
 
 mirror::String* InternTable::Table::Find(mirror::String* s) {
   Locks::intern_table_lock_->AssertHeld(Thread::Current());
-  auto it = pre_zygote_table_.Find(GcRoot<mirror::String>(s));
-  if (it != pre_zygote_table_.end()) {
-    return it->Read();
-  }
-  it = post_zygote_table_.Find(GcRoot<mirror::String>(s));
-  if (it != post_zygote_table_.end()) {
-    return it->Read();
+  for (UnorderedSet& table : tables_) {
+    auto it = table.Find(GcRoot<mirror::String>(s));
+    if (it != table.end()) {
+      return it->Read();
+    }
   }
   return nullptr;
 }
 
-void InternTable::Table::SwapPostZygoteWithPreZygote() {
-  if (pre_zygote_table_.Empty()) {
-    std::swap(pre_zygote_table_, post_zygote_table_);
-    VLOG(heap) << "Swapping " << pre_zygote_table_.Size() << " interns to the pre zygote table";
-  } else {
-    // This case happens if read the intern table from the image.
-    VLOG(heap) << "Not swapping due to non-empty pre_zygote_table_";
+mirror::String* InternTable::Table::Find(const Utf8String& string) {
+  Locks::intern_table_lock_->AssertHeld(Thread::Current());
+  for (UnorderedSet& table : tables_) {
+    auto it = table.Find(string);
+    if (it != table.end()) {
+      return it->Read();
+    }
   }
+  return nullptr;
+}
+
+void InternTable::Table::AddNewTable() {
+  tables_.push_back(UnorderedSet());
 }
 
 void InternTable::Table::Insert(mirror::String* s) {
-  // Always insert the post zygote table, this gets swapped when we create the zygote to be the
-  // pre zygote table.
-  post_zygote_table_.Insert(GcRoot<mirror::String>(s));
+  // Always insert the last table, the image tables are before and we avoid inserting into these
+  // to prevent dirty pages.
+  DCHECK(!tables_.empty());
+  tables_.back().Insert(GcRoot<mirror::String>(s));
 }
 
 void InternTable::Table::VisitRoots(RootVisitor* visitor) {
   BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(
       visitor, RootInfo(kRootInternedString));
-  for (auto& intern : pre_zygote_table_) {
-    buffered_visitor.VisitRoot(intern);
-  }
-  for (auto& intern : post_zygote_table_) {
-    buffered_visitor.VisitRoot(intern);
+  for (UnorderedSet& table : tables_) {
+    for (auto& intern : table) {
+      buffered_visitor.VisitRoot(intern);
+    }
   }
 }
 
 void InternTable::Table::SweepWeaks(IsMarkedVisitor* visitor) {
-  SweepWeaks(&pre_zygote_table_, visitor);
-  SweepWeaks(&post_zygote_table_, visitor);
+  for (UnorderedSet& table : tables_) {
+    SweepWeaks(&table, visitor);
+  }
 }
 
 void InternTable::Table::SweepWeaks(UnorderedSet* set, IsMarkedVisitor* visitor) {
@@ -443,7 +518,12 @@
 }
 
 size_t InternTable::Table::Size() const {
-  return pre_zygote_table_.Size() + post_zygote_table_.Size();
+  return std::accumulate(tables_.begin(),
+                         tables_.end(),
+                         0U,
+                         [](size_t sum, const UnorderedSet& set) {
+                           return sum + set.Size();
+                         });
 }
 
 void InternTable::ChangeWeakRootState(gc::WeakRootState new_state) {
@@ -461,10 +541,10 @@
 
 InternTable::Table::Table() {
   Runtime* const runtime = Runtime::Current();
-  pre_zygote_table_.SetLoadFactor(runtime->GetHashTableMinLoadFactor(),
-                                  runtime->GetHashTableMaxLoadFactor());
-  post_zygote_table_.SetLoadFactor(runtime->GetHashTableMinLoadFactor(),
-                                   runtime->GetHashTableMaxLoadFactor());
+  // Initial table.
+  tables_.push_back(UnorderedSet());
+  tables_.back().SetLoadFactor(runtime->GetHashTableMinLoadFactor(),
+                               runtime->GetHashTableMaxLoadFactor());
 }
 
 }  // namespace art
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 3a4e8d8..f845de5 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -61,7 +61,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
   // Only used by image writer. Special version that may not cause thread suspension since the GC
-  // can not be running while we are doing image writing. Maybe be called while while holding a
+  // cannot be running while we are doing image writing. Maybe be called while while holding a
   // lock since there will not be thread suspension.
   mirror::String* InternStrongImageString(mirror::String* s)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -84,10 +84,25 @@
   bool ContainsWeak(mirror::String* s) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::intern_table_lock_);
 
+  // Lookup a strong intern, returns null if not found.
+  mirror::String* LookupStrong(Thread* self, mirror::String* s)
+      REQUIRES(!Locks::intern_table_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::String* LookupStrong(Thread* self, uint32_t utf16_length, const char* utf8_data)
+      REQUIRES(!Locks::intern_table_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Lookup a weak intern, returns null if not found.
+  mirror::String* LookupWeak(Thread* self, mirror::String* s)
+      REQUIRES(!Locks::intern_table_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Total number of interned strings.
   size_t Size() const REQUIRES(!Locks::intern_table_lock_);
+
   // Total number of weakly live interned strings.
   size_t StrongSize() const REQUIRES(!Locks::intern_table_lock_);
+
   // Total number of strongly live interned strings.
   size_t WeakSize() const REQUIRES(!Locks::intern_table_lock_);
 
@@ -98,22 +113,20 @@
 
   void BroadcastForNewInterns() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Adds all of the resolved image strings from the image space into the intern table. The
-  // advantage of doing this is preventing expensive DexFile::FindStringId calls.
-  void AddImageStringsToTable(gc::space::ImageSpace* image_space)
+  // Adds all of the resolved image strings from the image spaces into the intern table. The
+  // advantage of doing this is preventing expensive DexFile::FindStringId calls. Sets
+  // images_added_to_intern_table_ to true.
+  void AddImagesStringsToTable(const std::vector<gc::space::ImageSpace*>& image_spaces)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::intern_table_lock_);
 
-  // Copy the post zygote tables to pre zygote to save memory by preventing dirty pages.
-  void SwapPostZygoteWithPreZygote()
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::intern_table_lock_);
-
-  // Add an intern table which was serialized to the image.
-  void AddImageInternTable(gc::space::ImageSpace* image_space)
+  // Add a new intern table for inserting to, previous intern tables are still there but no
+  // longer inserted into and ideally unmodified. This is done to prevent dirty pages.
+  void AddNewTable()
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::intern_table_lock_);
 
   // Read the intern table from memory. The elements aren't copied, the intern hash set data will
   // point to somewhere within ptr. Only reads the strong interns.
-  size_t ReadFromMemory(const uint8_t* ptr) REQUIRES(!Locks::intern_table_lock_)
+  size_t AddTableFromMemory(const uint8_t* ptr) REQUIRES(!Locks::intern_table_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Write the post zygote intern table to a pointer. Only writes the strong interns since it is
@@ -126,11 +139,32 @@
       REQUIRES(!Locks::intern_table_lock_);
 
  private:
+  // Modified UTF-8-encoded string treated as UTF16.
+  class Utf8String {
+   public:
+    Utf8String(uint32_t utf16_length, const char* utf8_data, int32_t hash)
+        : hash_(hash), utf16_length_(utf16_length), utf8_data_(utf8_data) { }
+
+    int32_t GetHash() const { return hash_; }
+    uint32_t GetUtf16Length() const { return utf16_length_; }
+    const char* GetUtf8Data() const { return utf8_data_; }
+
+   private:
+    int32_t hash_;
+    uint32_t utf16_length_;
+    const char* utf8_data_;
+  };
+
   class StringHashEquals {
    public:
     std::size_t operator()(const GcRoot<mirror::String>& root) const NO_THREAD_SAFETY_ANALYSIS;
     bool operator()(const GcRoot<mirror::String>& a, const GcRoot<mirror::String>& b) const
         NO_THREAD_SAFETY_ANALYSIS;
+
+    // Utf8String can be used for lookup.
+    std::size_t operator()(const Utf8String& key) const { return key.GetHash(); }
+    bool operator()(const GcRoot<mirror::String>& a, const Utf8String& b) const
+        NO_THREAD_SAFETY_ANALYSIS;
   };
   class GcRootEmptyFn {
    public:
@@ -149,6 +183,8 @@
     Table();
     mirror::String* Find(mirror::String* s) SHARED_REQUIRES(Locks::mutator_lock_)
         REQUIRES(Locks::intern_table_lock_);
+    mirror::String* Find(const Utf8String& string) SHARED_REQUIRES(Locks::mutator_lock_)
+        REQUIRES(Locks::intern_table_lock_);
     void Insert(mirror::String* s) SHARED_REQUIRES(Locks::mutator_lock_)
         REQUIRES(Locks::intern_table_lock_);
     void Remove(mirror::String* s)
@@ -157,15 +193,17 @@
         SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
     void SweepWeaks(IsMarkedVisitor* visitor)
         SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-    void SwapPostZygoteWithPreZygote() REQUIRES(Locks::intern_table_lock_);
+    // Add a new intern table that will only be inserted into from now on.
+    void AddNewTable() REQUIRES(Locks::intern_table_lock_);
     size_t Size() const REQUIRES(Locks::intern_table_lock_);
-    // Read pre zygote table is called from ReadFromMemory which happens during runtime creation
-    // when we load the image intern table. Returns how many bytes were read.
-    size_t ReadIntoPreZygoteTable(const uint8_t* ptr)
+    // Read and add an intern table from ptr.
+    // Tables read are inserted at the front of the table array. Only checks for conflicts in
+    // debug builds. Returns how many bytes were read.
+    size_t AddTableFromMemory(const uint8_t* ptr)
         REQUIRES(Locks::intern_table_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
-    // The image writer calls WritePostZygoteTable through WriteToMemory, it writes the interns in
-    // the post zygote table. Returns how many bytes were written.
-    size_t WriteFromPostZygoteTable(uint8_t* ptr)
+    // Write the intern tables to ptr, if there are multiple tables they are combined into a single
+    // one. Returns how many bytes were written.
+    size_t WriteToMemory(uint8_t* ptr)
         REQUIRES(Locks::intern_table_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
 
    private:
@@ -175,12 +213,9 @@
     void SweepWeaks(UnorderedSet* set, IsMarkedVisitor* visitor)
         SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
 
-    // We call SwapPostZygoteWithPreZygote when we create the zygote to reduce private dirty pages
-    // caused by modifying the zygote intern table hash table. The pre zygote table are the
-    // interned strings which were interned before we created the zygote space. Post zygote is self
-    // explanatory.
-    UnorderedSet pre_zygote_table_;
-    UnorderedSet post_zygote_table_;
+    // We call AddNewTable when we create the zygote to reduce private dirty pages caused by
+    // modifying the zygote intern table. The back of table is modified when strings are interned.
+    std::vector<UnorderedSet> tables_;
   };
 
   // Insert if non null, otherwise return null. Must be called holding the mutator lock.
@@ -189,9 +224,9 @@
   mirror::String* Insert(mirror::String* s, bool is_strong, bool holding_locks)
       REQUIRES(!Locks::intern_table_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  mirror::String* LookupStrong(mirror::String* s)
+  mirror::String* LookupStrongLocked(mirror::String* s)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  mirror::String* LookupWeak(mirror::String* s)
+  mirror::String* LookupWeakLocked(mirror::String* s)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
   mirror::String* InsertStrong(mirror::String* s)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
@@ -214,7 +249,7 @@
   void RemoveWeakFromTransaction(mirror::String* s)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
 
-  size_t ReadFromMemoryLocked(const uint8_t* ptr)
+  size_t AddTableFromMemoryLocked(const uint8_t* ptr)
       REQUIRES(Locks::intern_table_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Change the weak root state. May broadcast to waiters.
@@ -225,7 +260,7 @@
   void WaitUntilAccessible(Thread* self)
       REQUIRES(Locks::intern_table_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool image_added_to_intern_table_ GUARDED_BY(Locks::intern_table_lock_);
+  bool images_added_to_intern_table_ GUARDED_BY(Locks::intern_table_lock_);
   bool log_new_roots_ GUARDED_BY(Locks::intern_table_lock_);
   ConditionVariable weak_intern_condition_ GUARDED_BY(Locks::intern_table_lock_);
   // Since this contains (strong) roots, they need a read barrier to
diff --git a/runtime/intern_table_test.cc b/runtime/intern_table_test.cc
index b60b32d..fe78bf2 100644
--- a/runtime/intern_table_test.cc
+++ b/runtime/intern_table_test.cc
@@ -35,12 +35,14 @@
   Handle<mirror::String> foo_3(
       hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "foo")));
   Handle<mirror::String> bar(hs.NewHandle(intern_table.InternStrong(3, "bar")));
+  ASSERT_TRUE(foo_1.Get() != nullptr);
+  ASSERT_TRUE(foo_2.Get() != nullptr);
+  ASSERT_TRUE(foo_3.Get() != nullptr);
+  ASSERT_TRUE(bar.Get() != nullptr);
+  EXPECT_EQ(foo_1.Get(), foo_2.Get());
   EXPECT_TRUE(foo_1->Equals("foo"));
   EXPECT_TRUE(foo_2->Equals("foo"));
   EXPECT_TRUE(foo_3->Equals("foo"));
-  EXPECT_TRUE(foo_1.Get() != nullptr);
-  EXPECT_TRUE(foo_2.Get() != nullptr);
-  EXPECT_EQ(foo_1.Get(), foo_2.Get());
   EXPECT_NE(foo_1.Get(), bar.Get());
   EXPECT_NE(foo_2.Get(), bar.Get());
   EXPECT_NE(foo_3.Get(), bar.Get());
@@ -175,4 +177,39 @@
   }
 }
 
+TEST_F(InternTableTest, LookupStrong) {
+  ScopedObjectAccess soa(Thread::Current());
+  InternTable intern_table;
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::String> foo(hs.NewHandle(intern_table.InternStrong(3, "foo")));
+  Handle<mirror::String> bar(hs.NewHandle(intern_table.InternStrong(3, "bar")));
+  Handle<mirror::String> foobar(hs.NewHandle(intern_table.InternStrong(6, "foobar")));
+  ASSERT_TRUE(foo.Get() != nullptr);
+  ASSERT_TRUE(bar.Get() != nullptr);
+  ASSERT_TRUE(foobar.Get() != nullptr);
+  ASSERT_TRUE(foo->Equals("foo"));
+  ASSERT_TRUE(bar->Equals("bar"));
+  ASSERT_TRUE(foobar->Equals("foobar"));
+  ASSERT_NE(foo.Get(), bar.Get());
+  ASSERT_NE(foo.Get(), foobar.Get());
+  ASSERT_NE(bar.Get(), foobar.Get());
+  mirror::String* lookup_foo = intern_table.LookupStrong(soa.Self(), 3, "foo");
+  EXPECT_EQ(lookup_foo, foo.Get());
+  mirror::String* lookup_bar = intern_table.LookupStrong(soa.Self(), 3, "bar");
+  EXPECT_EQ(lookup_bar, bar.Get());
+  mirror::String* lookup_foobar = intern_table.LookupStrong(soa.Self(), 6, "foobar");
+  EXPECT_EQ(lookup_foobar, foobar.Get());
+  mirror::String* lookup_foox = intern_table.LookupStrong(soa.Self(), 4, "foox");
+  EXPECT_TRUE(lookup_foox == nullptr);
+  mirror::String* lookup_fooba = intern_table.LookupStrong(soa.Self(), 5, "fooba");
+  EXPECT_TRUE(lookup_fooba == nullptr);
+  mirror::String* lookup_foobaR = intern_table.LookupStrong(soa.Self(), 6, "foobaR");
+  EXPECT_TRUE(lookup_foobaR == nullptr);
+  // Try a hash conflict.
+  ASSERT_EQ(ComputeUtf16HashFromModifiedUtf8("foobar", 6),
+            ComputeUtf16HashFromModifiedUtf8("foobbS", 6));
+  mirror::String* lookup_foobbS = intern_table.LookupStrong(soa.Self(), 6, "foobbS");
+  EXPECT_TRUE(lookup_foobbS == nullptr);
+}
+
 }  // namespace art
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 7c0594a..101c9a1 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -18,12 +18,19 @@
 
 #include <limits>
 
+#include "common_throws.h"
 #include "interpreter_common.h"
+#include "interpreter_goto_table_impl.h"
+#include "interpreter_mterp_impl.h"
+#include "interpreter_switch_impl.h"
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
 #include "stack.h"
 #include "unstarted_runtime.h"
+#include "mterp/mterp.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 
 namespace art {
 namespace interpreter {
@@ -223,57 +230,106 @@
 }
 
 enum InterpreterImplKind {
-  kSwitchImpl,            // Switch-based interpreter implementation.
-  kComputedGotoImplKind   // Computed-goto-based interpreter implementation.
+  kSwitchImplKind,        // Switch-based interpreter implementation.
+  kComputedGotoImplKind,  // Computed-goto-based interpreter implementation.
+  kMterpImplKind          // Assembly interpreter
 };
 static std::ostream& operator<<(std::ostream& os, const InterpreterImplKind& rhs) {
-  os << ((rhs == kSwitchImpl) ? "Switch-based interpreter" : "Computed-goto-based interpreter");
+  os << ((rhs == kSwitchImplKind)
+              ? "Switch-based interpreter"
+              : (rhs == kComputedGotoImplKind)
+                  ? "Computed-goto-based interpreter"
+                  : "Asm interpreter");
   return os;
 }
 
-#if !defined(__clang__)
-static constexpr InterpreterImplKind kInterpreterImplKind = kComputedGotoImplKind;
-#else
-// Clang 3.4 fails to build the goto interpreter implementation.
-static constexpr InterpreterImplKind kInterpreterImplKind = kSwitchImpl;
-template<bool do_access_check, bool transaction_active>
-JValue ExecuteGotoImpl(Thread*, const DexFile::CodeItem*, ShadowFrame&, JValue) {
-  LOG(FATAL) << "UNREACHABLE";
-  UNREACHABLE();
-}
-// Explicit definitions of ExecuteGotoImpl.
-template<> SHARED_REQUIRES(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
-                                    ShadowFrame& shadow_frame, JValue result_register);
-template<> SHARED_REQUIRES(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
-                                     ShadowFrame& shadow_frame, JValue result_register);
-template<> SHARED_REQUIRES(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<true, true>(Thread* self,  const DexFile::CodeItem* code_item,
-                                   ShadowFrame& shadow_frame, JValue result_register);
-template<> SHARED_REQUIRES(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
-                                    ShadowFrame& shadow_frame, JValue result_register);
-#endif
+static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
 
-static JValue Execute(Thread* self, const DexFile::CodeItem* code_item, ShadowFrame& shadow_frame,
-                      JValue result_register)
-    SHARED_REQUIRES(Locks::mutator_lock_);
-
-static inline JValue Execute(Thread* self, const DexFile::CodeItem* code_item,
-                             ShadowFrame& shadow_frame, JValue result_register) {
+static inline JValue Execute(
+    Thread* self,
+    const DexFile::CodeItem* code_item,
+    ShadowFrame& shadow_frame,
+    JValue result_register,
+    bool stay_in_interpreter = false) SHARED_REQUIRES(Locks::mutator_lock_) {
   DCHECK(!shadow_frame.GetMethod()->IsAbstract());
   DCHECK(!shadow_frame.GetMethod()->IsNative());
+  if (LIKELY(shadow_frame.GetDexPC() == 0)) {  // Entering the method, but not via deoptimization.
+    if (kIsDebugBuild) {
+      self->AssertNoPendingException();
+    }
+    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+    ArtMethod *method = shadow_frame.GetMethod();
+
+    if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
+      instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                        method, 0);
+    }
+
+    if (!stay_in_interpreter) {
+      jit::Jit* jit = Runtime::Current()->GetJit();
+      if (jit != nullptr) {
+        jit->MethodEntered(self, shadow_frame.GetMethod());
+        if (jit->CanInvokeCompiledCode(method)) {
+          JValue result;
+
+          // Pop the shadow frame before calling into compiled code.
+          self->PopShadowFrame();
+          ArtInterpreterToCompiledCodeBridge(self, nullptr, code_item, &shadow_frame, &result);
+          // Push the shadow frame back as the caller will expect it.
+          self->PushShadowFrame(&shadow_frame);
+
+          return result;
+        }
+      }
+    }
+  }
+
   shadow_frame.GetMethod()->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
 
+  // Lock counting is a special version of accessibility checks, and for simplicity and
+  // reduction of template parameters, we gate it behind access-checks mode.
+  ArtMethod* method = shadow_frame.GetMethod();
+  DCHECK(!method->SkipAccessChecks() || !method->MustCountLocks());
+
   bool transaction_active = Runtime::Current()->IsActiveTransaction();
-  if (LIKELY(shadow_frame.GetMethod()->IsPreverified())) {
+  if (LIKELY(method->SkipAccessChecks())) {
     // Enter the "without access check" interpreter.
-    if (kInterpreterImplKind == kSwitchImpl) {
+    if (kInterpreterImplKind == kMterpImplKind) {
       if (transaction_active) {
-        return ExecuteSwitchImpl<false, true>(self, code_item, shadow_frame, result_register);
+        // No Mterp variant - just use the switch interpreter.
+        return ExecuteSwitchImpl<false, true>(self, code_item, shadow_frame, result_register,
+                                              false);
+      } else if (UNLIKELY(!Runtime::Current()->IsStarted())) {
+        return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register,
+                                               false);
       } else {
-        return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register);
+        while (true) {
+          // Mterp does not support all instrumentation/debugging.
+          if (MterpShouldSwitchInterpreters() != 0) {
+            return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register,
+                                                   false);
+          }
+          bool returned = ExecuteMterpImpl(self, code_item, &shadow_frame, &result_register);
+          if (returned) {
+            return result_register;
+          } else {
+            // Mterp didn't like that instruction.  Single-step it with the reference interpreter.
+            result_register = ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame,
+                                                               result_register, true);
+            if (shadow_frame.GetDexPC() == DexFile::kDexNoIndex) {
+              // Single-stepped a return or an exception not handled locally.  Return to caller.
+              return result_register;
+            }
+          }
+        }
+      }
+    } else if (kInterpreterImplKind == kSwitchImplKind) {
+      if (transaction_active) {
+        return ExecuteSwitchImpl<false, true>(self, code_item, shadow_frame, result_register,
+                                              false);
+      } else {
+        return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register,
+                                               false);
       }
     } else {
       DCHECK_EQ(kInterpreterImplKind, kComputedGotoImplKind);
@@ -285,11 +341,22 @@
     }
   } else {
     // Enter the "with access check" interpreter.
-    if (kInterpreterImplKind == kSwitchImpl) {
+    if (kInterpreterImplKind == kMterpImplKind) {
+      // No access check variants for Mterp.  Just use the switch version.
       if (transaction_active) {
-        return ExecuteSwitchImpl<true, true>(self, code_item, shadow_frame, result_register);
+        return ExecuteSwitchImpl<true, true>(self, code_item, shadow_frame, result_register,
+                                             false);
       } else {
-        return ExecuteSwitchImpl<true, false>(self, code_item, shadow_frame, result_register);
+        return ExecuteSwitchImpl<true, false>(self, code_item, shadow_frame, result_register,
+                                              false);
+      }
+    } else if (kInterpreterImplKind == kSwitchImplKind) {
+      if (transaction_active) {
+        return ExecuteSwitchImpl<true, true>(self, code_item, shadow_frame, result_register,
+                                             false);
+      } else {
+        return ExecuteSwitchImpl<true, false>(self, code_item, shadow_frame, result_register,
+                                              false);
       }
     } else {
       DCHECK_EQ(kInterpreterImplKind, kComputedGotoImplKind);
@@ -303,7 +370,8 @@
 }
 
 void EnterInterpreterFromInvoke(Thread* self, ArtMethod* method, Object* receiver,
-                                uint32_t* args, JValue* result) {
+                                uint32_t* args, JValue* result,
+                                bool stay_in_interpreter) {
   DCHECK_EQ(self, Thread::Current());
   bool implicit_check = !Runtime::Current()->ExplicitStackOverflowChecks();
   if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEndForInterpreter(implicit_check))) {
@@ -318,9 +386,9 @@
   if (code_item != nullptr) {
     num_regs =  code_item->registers_size_;
     num_ins = code_item->ins_size_;
-  } else if (method->IsAbstract()) {
+  } else if (!method->IsInvokable()) {
     self->EndAssertNoThreadSuspension(old_cause);
-    ThrowAbstractMethodError(method);
+    method->ThrowInvocationTimeError();
     return;
   } else {
     DCHECK(method->IsNative());
@@ -378,7 +446,7 @@
     }
   }
   if (LIKELY(!method->IsNative())) {
-    JValue r = Execute(self, code_item, *shadow_frame, JValue());
+    JValue r = Execute(self, code_item, *shadow_frame, JValue(), stay_in_interpreter);
     if (result != nullptr) {
       *result = r;
     }
@@ -397,7 +465,40 @@
   self->PopShadowFrame();
 }
 
-void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame, JValue* ret_val)
+static bool IsStringInit(const Instruction* instr, ArtMethod* caller)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (instr->Opcode() == Instruction::INVOKE_DIRECT ||
+      instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE) {
+    // Instead of calling ResolveMethod() which has suspend point and can trigger
+    // GC, look up the callee method symbolically.
+    uint16_t callee_method_idx = (instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE) ?
+        instr->VRegB_3rc() : instr->VRegB_35c();
+    const DexFile* dex_file = caller->GetDexFile();
+    const DexFile::MethodId& method_id = dex_file->GetMethodId(callee_method_idx);
+    const char* class_name = dex_file->StringByTypeIdx(method_id.class_idx_);
+    const char* method_name = dex_file->GetMethodName(method_id);
+    // Compare method's class name and method name against string init.
+    // It's ok since it's not allowed to create your own java/lang/String.
+    // TODO: verify that assumption.
+    if ((strcmp(class_name, "Ljava/lang/String;") == 0) &&
+        (strcmp(method_name, "<init>") == 0)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+static int16_t GetReceiverRegisterForStringInit(const Instruction* instr) {
+  DCHECK(instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE ||
+         instr->Opcode() == Instruction::INVOKE_DIRECT);
+  return (instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE) ?
+      instr->VRegC_3rc() : instr->VRegC_35c();
+}
+
+void EnterInterpreterFromDeoptimize(Thread* self,
+                                    ShadowFrame* shadow_frame,
+                                    bool from_code,
+                                    JValue* ret_val)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue value;
   // Set value to last known result in case the shadow frame chain is empty.
@@ -405,10 +506,14 @@
   // Are we executing the first shadow frame?
   bool first = true;
   while (shadow_frame != nullptr) {
+    // We do not want to recover lock state for lock counting when deoptimizing. Currently,
+    // the compiler should not have compiled a method that failed structured-locking checks.
+    DCHECK(!shadow_frame->GetMethod()->MustCountLocks());
+
     self->SetTopOfShadowStack(shadow_frame);
     const DexFile::CodeItem* code_item = shadow_frame->GetMethod()->GetCodeItem();
     const uint32_t dex_pc = shadow_frame->GetDexPC();
-    uint32_t new_dex_pc;
+    uint32_t new_dex_pc = dex_pc;
     if (UNLIKELY(self->IsExceptionPending())) {
       // If we deoptimize from the QuickExceptionHandler, we already reported the exception to
       // the instrumentation. To prevent from reporting it a second time, we simply pass a
@@ -419,11 +524,48 @@
                                                                     instrumentation);
       new_dex_pc = found_dex_pc;  // the dex pc of a matching catch handler
                                   // or DexFile::kDexNoIndex if there is none.
-    } else {
-      const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
-      // For an invoke, use the dex pc of the next instruction.
+    } else if (!from_code) {
+      // For the debugger and full deoptimization stack, we must go past the invoke
+      // instruction, as it already executed.
       // TODO: should be tested more once b/17586779 is fixed.
-      new_dex_pc = dex_pc + (instr->IsInvoke() ? instr->SizeInCodeUnits() : 0);
+      const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
+      if (instr->IsInvoke()) {
+        if (IsStringInit(instr, shadow_frame->GetMethod())) {
+          uint16_t this_obj_vreg = GetReceiverRegisterForStringInit(instr);
+          // Move the StringFactory.newStringFromChars() result into the register representing
+          // "this object" when invoking the string constructor in the original dex instruction.
+          // Also move the result into all aliases.
+          DCHECK(value.GetL()->IsString());
+          SetStringInitValueToAllAliases(shadow_frame, this_obj_vreg, value);
+          // Calling string constructor in the original dex code doesn't generate a result value.
+          value.SetJ(0);
+        }
+        new_dex_pc = dex_pc + instr->SizeInCodeUnits();
+      } else if (instr->Opcode() == Instruction::NEW_INSTANCE) {
+        // It's possible to deoptimize at a NEW_INSTANCE dex instruciton that's for a
+        // java string, which is turned into a call into StringFactory.newEmptyString();
+        // Move the StringFactory.newEmptyString() result into the destination register.
+        DCHECK(value.GetL()->IsString());
+        shadow_frame->SetVRegReference(instr->VRegA_21c(), value.GetL());
+        // new-instance doesn't generate a result value.
+        value.SetJ(0);
+        // Skip the dex instruction since we essentially come back from an invocation.
+        new_dex_pc = dex_pc + instr->SizeInCodeUnits();
+        if (kIsDebugBuild) {
+          ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+          // This is a suspend point. But it's ok since value has been set into shadow_frame.
+          mirror::Class* klass = class_linker->ResolveType(
+              instr->VRegB_21c(), shadow_frame->GetMethod());
+          DCHECK(klass->IsStringClass());
+        }
+      } else {
+        CHECK(false) << "Unexpected instruction opcode " << instr->Opcode()
+                     << " at dex_pc " << dex_pc
+                     << " of method: " << PrettyMethod(shadow_frame->GetMethod(), false);
+      }
+    } else {
+      // Nothing to do, the dex_pc is the one at which the code requested
+      // the deoptimization.
     }
     if (new_dex_pc != DexFile::kDexNoIndex) {
       shadow_frame->SetDexPC(new_dex_pc);
@@ -432,6 +574,8 @@
     ShadowFrame* old_frame = shadow_frame;
     shadow_frame = shadow_frame->GetLink();
     ShadowFrame::DeleteDeoptimizedFrame(old_frame);
+    // Following deoptimizations of shadow frames must pass the invoke instruction.
+    from_code = false;
     first = false;
   }
   ret_val->SetJ(value.GetJ());
@@ -446,6 +590,10 @@
     return JValue();
   }
 
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr) {
+    jit->NotifyCompiledCodeToInterpreterTransition(self, shadow_frame->GetMethod());
+  }
   return Execute(self, code_item, *shadow_frame, JValue());
 }
 
@@ -490,5 +638,13 @@
   self->PopShadowFrame();
 }
 
+void CheckInterpreterAsmConstants() {
+  CheckMterpAsmConstants();
+}
+
+void InitInterpreterTls(Thread* self) {
+  InitMterpTls(self);
+}
+
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h
index b21ea84..bf4bcff 100644
--- a/runtime/interpreter/interpreter.h
+++ b/runtime/interpreter/interpreter.h
@@ -33,11 +33,15 @@
 namespace interpreter {
 
 // Called by ArtMethod::Invoke, shadow frames arguments are taken from the args array.
+// The optional stay_in_interpreter parameter (false by default) can be used by clients to
+// explicitly force interpretation in the remaining path that implements method invocation.
 extern void EnterInterpreterFromInvoke(Thread* self, ArtMethod* method,
-                                       mirror::Object* receiver, uint32_t* args, JValue* result)
+                                       mirror::Object* receiver, uint32_t* args, JValue* result,
+                                       bool stay_in_interpreter = false)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-extern void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame,
+// 'from_code' denotes whether the deoptimization was explicitly triggered by compiled code.
+extern void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame, bool from_code,
                                            JValue* ret_val)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -49,6 +53,11 @@
                                        ShadowFrame* shadow_frame, JValue* result)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
+// One-time sanity check.
+void CheckInterpreterAsmConstants();
+
+void InitInterpreterTls(Thread* self);
+
 }  // namespace interpreter
 
 }  // namespace art
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 18fb0d8..ac146b3 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -18,8 +18,10 @@
 
 #include <cmath>
 
+#include "base/enums.h"
 #include "debugger.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
+#include "jit/jit.h"
 #include "mirror/array-inl.h"
 #include "stack.h"
 #include "unstarted_runtime.h"
@@ -28,9 +30,6 @@
 namespace art {
 namespace interpreter {
 
-// All lambda closures have to be a consecutive pair of virtual registers.
-static constexpr size_t kLambdaVirtualRegisterWidth = 2;
-
 void ThrowNullPointerExceptionFromInterpreter() {
   ThrowNullPointerExceptionFromDexPC();
 }
@@ -501,27 +500,11 @@
                                 uint32_t (&arg)[kVarArgMax],
                                 uint32_t vregC) ALWAYS_INLINE;
 
-SHARED_REQUIRES(Locks::mutator_lock_)
-static inline bool NeedsInterpreter(Thread* self, ShadowFrame* new_shadow_frame) ALWAYS_INLINE;
-
-static inline bool NeedsInterpreter(Thread* self, ShadowFrame* new_shadow_frame) {
-  ArtMethod* target = new_shadow_frame->GetMethod();
-  if (UNLIKELY(target->IsNative() || target->IsProxyMethod())) {
-    return false;
-  }
-  Runtime* runtime = Runtime::Current();
-  ClassLinker* class_linker = runtime->GetClassLinker();
-  return runtime->GetInstrumentation()->IsForcedInterpretOnly() ||
-        // Doing this check avoids doing compiled/interpreter transitions.
-        class_linker->IsQuickToInterpreterBridge(target->GetEntryPointFromQuickCompiledCode()) ||
-        // Force the use of interpreter when it is required by the debugger.
-        Dbg::IsForcedInterpreterNeededForCalling(self, target);
-}
-
-static void ArtInterpreterToCompiledCodeBridge(Thread* self,
-                                               const DexFile::CodeItem* code_item,
-                                               ShadowFrame* shadow_frame,
-                                               JValue* result)
+void ArtInterpreterToCompiledCodeBridge(Thread* self,
+                                        ArtMethod* caller,
+                                        const DexFile::CodeItem* code_item,
+                                        ShadowFrame* shadow_frame,
+                                        JValue* result)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtMethod* method = shadow_frame->GetMethod();
   // Ensure static methods are initialized.
@@ -546,9 +529,37 @@
   uint16_t arg_offset = (code_item == nullptr)
                             ? 0
                             : code_item->registers_size_ - code_item->ins_size_;
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr && caller != nullptr) {
+    jit->NotifyInterpreterToCompiledCodeTransition(self, caller);
+  }
   method->Invoke(self, shadow_frame->GetVRegArgs(arg_offset),
                  (shadow_frame->NumberOfVRegs() - arg_offset) * sizeof(uint32_t),
-                 result, method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty());
+                 result, method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty());
+}
+
+void SetStringInitValueToAllAliases(ShadowFrame* shadow_frame,
+                                    uint16_t this_obj_vreg,
+                                    JValue result)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  Object* existing = shadow_frame->GetVRegReference(this_obj_vreg);
+  if (existing == nullptr) {
+    // If it's null, we come from compiled code that was deoptimized. Nothing to do,
+    // as the compiler verified there was no alias.
+    // Set the new string result of the StringFactory.
+    shadow_frame->SetVRegReference(this_obj_vreg, result.GetL());
+    return;
+  }
+  // Set the string init result into all aliases.
+  for (uint32_t i = 0, e = shadow_frame->NumberOfVRegs(); i < e; ++i) {
+    if (shadow_frame->GetVRegReference(i) == existing) {
+      DCHECK_EQ(shadow_frame->GetVRegReference(i),
+                reinterpret_cast<mirror::Object*>(shadow_frame->GetVReg(i)));
+      shadow_frame->SetVRegReference(i, result.GetL());
+      DCHECK_EQ(shadow_frame->GetVRegReference(i),
+                reinterpret_cast<mirror::Object*>(shadow_frame->GetVReg(i)));
+    }
+  }
 }
 
 template <bool is_range,
@@ -592,6 +603,10 @@
   //
   // (at this point the ArtMethod has already been replaced,
   // so we just need to fix-up the arguments)
+  //
+  // Note that FindMethodFromCode in entrypoint_utils-inl.h was also special-cased
+  // to handle the compiler optimization of replacing `this` with null without
+  // throwing NullPointerException.
   uint32_t string_init_vreg_this = is_range ? vregC : arg[0];
   if (UNLIKELY(string_init)) {
     DCHECK_GT(num_regs, 0u);  // As the method is an instance method, there should be at least 1.
@@ -633,17 +648,26 @@
         self, new_shadow_frame, StackedShadowFrameType::kShadowFrameUnderConstruction);
     self->EndAssertNoThreadSuspension(old_cause);
 
+    // ArtMethod here is needed to check type information of the call site against the callee.
+    // Type information is retrieved from a DexFile/DexCache for that respective declared method.
+    //
+    // As a special case for proxy methods, which are not dex-backed,
+    // we have to retrieve type information from the proxy's method
+    // interface method instead (which is dex backed since proxies are never interfaces).
+    ArtMethod* method =
+        new_shadow_frame->GetMethod()->GetInterfaceMethodIfProxy(kRuntimePointerSize);
+
     // We need to do runtime check on reference assignment. We need to load the shorty
     // to get the exact type of each reference argument.
-    const DexFile::TypeList* params = new_shadow_frame->GetMethod()->GetParameterTypeList();
+    const DexFile::TypeList* params = method->GetParameterTypeList();
     uint32_t shorty_len = 0;
-    const char* shorty = new_shadow_frame->GetMethod()->GetShorty(&shorty_len);
+    const char* shorty = method->GetShorty(&shorty_len);
 
     // Handle receiver apart since it's not part of the shorty.
     size_t dest_reg = first_dest_reg;
     size_t arg_offset = 0;
 
-    if (!new_shadow_frame->GetMethod()->IsStatic()) {
+    if (!method->IsStatic()) {
       size_t receiver_reg = is_range ? vregC : arg[0];
       new_shadow_frame->SetVRegReference(dest_reg, shadow_frame.GetVRegReference(receiver_reg));
       ++dest_reg;
@@ -661,9 +685,9 @@
         case 'L': {
           Object* o = shadow_frame.GetVRegReference(src_reg);
           if (do_assignability_check && o != nullptr) {
-            size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+            PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
             Class* arg_type =
-                new_shadow_frame->GetMethod()->GetClassFromTypeIndex(
+                method->GetClassFromTypeIndex(
                     params->GetTypeItem(shorty_pos).type_idx_, true /* resolve */, pointer_size);
             if (arg_type == nullptr) {
               CHECK(self->IsExceptionPending());
@@ -705,7 +729,6 @@
 
     // Fast path: no extra checks.
     if (is_range) {
-      // TODO: Implement the range version of invoke-lambda
       uint16_t first_src_reg = vregC;
 
       for (size_t src_reg = first_src_reg, dest_reg = first_dest_reg; dest_reg < num_regs;
@@ -724,86 +747,27 @@
 
   // Do the call now.
   if (LIKELY(Runtime::Current()->IsStarted())) {
-    if (NeedsInterpreter(self, new_shadow_frame)) {
+    ArtMethod* target = new_shadow_frame->GetMethod();
+    if (ClassLinker::ShouldUseInterpreterEntrypoint(
+        target,
+        target->GetEntryPointFromQuickCompiledCode())) {
       ArtInterpreterToInterpreterBridge(self, code_item, new_shadow_frame, result);
     } else {
-      ArtInterpreterToCompiledCodeBridge(self, code_item, new_shadow_frame, result);
+      ArtInterpreterToCompiledCodeBridge(
+          self, shadow_frame.GetMethod(), code_item, new_shadow_frame, result);
     }
   } else {
     UnstartedRuntime::Invoke(self, code_item, new_shadow_frame, result, first_dest_reg);
   }
 
   if (string_init && !self->IsExceptionPending()) {
-    // Set the new string result of the StringFactory.
-    shadow_frame.SetVRegReference(string_init_vreg_this, result->GetL());
-    // Overwrite all potential copies of the original result of the new-instance of string with the
-    // new result of the StringFactory. Use the verifier to find this set of registers.
-    ArtMethod* method = shadow_frame.GetMethod();
-    MethodReference method_ref = method->ToMethodReference();
-    SafeMap<uint32_t, std::set<uint32_t>>* string_init_map_ptr = nullptr;
-    MethodRefToStringInitRegMap& method_to_string_init_map = Runtime::Current()->GetStringInitMap();
-    {
-      MutexLock mu(self, *Locks::interpreter_string_init_map_lock_);
-      auto it = method_to_string_init_map.find(method_ref);
-      if (it != method_to_string_init_map.end()) {
-        string_init_map_ptr = &it->second;
-      }
-    }
-    if (string_init_map_ptr == nullptr) {
-      SafeMap<uint32_t, std::set<uint32_t>> string_init_map =
-          verifier::MethodVerifier::FindStringInitMap(method);
-      MutexLock mu(self, *Locks::interpreter_string_init_map_lock_);
-      auto it = method_to_string_init_map.lower_bound(method_ref);
-      if (it == method_to_string_init_map.end() ||
-          method_to_string_init_map.key_comp()(method_ref, it->first)) {
-        it = method_to_string_init_map.PutBefore(it, method_ref, std::move(string_init_map));
-      }
-      string_init_map_ptr = &it->second;
-    }
-    if (string_init_map_ptr->size() != 0) {
-      uint32_t dex_pc = shadow_frame.GetDexPC();
-      auto map_it = string_init_map_ptr->find(dex_pc);
-      if (map_it != string_init_map_ptr->end()) {
-        const std::set<uint32_t>& reg_set = map_it->second;
-        for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) {
-          shadow_frame.SetVRegReference(*set_it, result->GetL());
-        }
-      }
-    }
+    SetStringInitValueToAllAliases(&shadow_frame, string_init_vreg_this, *result);
   }
 
   return !self->IsExceptionPending();
 }
 
 template<bool is_range, bool do_assignability_check>
-bool DoLambdaCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
-                  const Instruction* inst, uint16_t inst_data ATTRIBUTE_UNUSED, JValue* result) {
-  const uint4_t num_additional_registers = inst->VRegB_25x();
-  // Argument word count.
-  const uint16_t number_of_inputs = num_additional_registers + kLambdaVirtualRegisterWidth;
-  // The lambda closure register is always present and is not encoded in the count.
-  // Furthermore, the lambda closure register is always wide, so it counts as 2 inputs.
-
-  // TODO: find a cleaner way to separate non-range and range information without duplicating
-  //       code.
-  uint32_t arg[Instruction::kMaxVarArgRegs25x];  // only used in invoke-XXX.
-  uint32_t vregC = 0;   // only used in invoke-XXX-range.
-  if (is_range) {
-    vregC = inst->VRegC_3rc();
-  } else {
-    // TODO(iam): See if it's possible to remove inst_data dependency from 35x to avoid this path
-    inst->GetAllArgs25x(arg);
-  }
-
-  // TODO: if there's an assignability check, throw instead?
-  DCHECK(called_method->IsStatic());
-
-  return DoCallCommon<is_range, do_assignability_check>(
-      called_method, self, shadow_frame,
-      result, number_of_inputs, arg, vregC);
-}
-
-template<bool is_range, bool do_assignability_check>
 bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result) {
   // Argument word count.
@@ -951,20 +915,6 @@
 EXPLICIT_DO_CALL_TEMPLATE_DECL(true, true);
 #undef EXPLICIT_DO_CALL_TEMPLATE_DECL
 
-// Explicit DoLambdaCall template function declarations.
-#define EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(_is_range, _do_assignability_check)               \
-  template SHARED_REQUIRES(Locks::mutator_lock_)                                                \
-  bool DoLambdaCall<_is_range, _do_assignability_check>(ArtMethod* method, Thread* self,        \
-                                                        ShadowFrame& shadow_frame,              \
-                                                        const Instruction* inst,                \
-                                                        uint16_t inst_data,                     \
-                                                        JValue* result)
-EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(false, false);
-EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(false, true);
-EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(true, false);
-EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(true, true);
-#undef EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL
-
 // Explicit DoFilledNewArray template function declarations.
 #define EXPLICIT_DO_FILLED_NEW_ARRAY_TEMPLATE_DECL(_is_range_, _check, _transaction_active)       \
   template SHARED_REQUIRES(Locks::mutator_lock_)                                                  \
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 8c495fc..7b38473 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -23,9 +23,11 @@
 
 #include <iostream>
 #include <sstream>
+#include <atomic>
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "class_linker-inl.h"
@@ -34,13 +36,9 @@
 #include "dex_instruction-inl.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "handle_scope-inl.h"
-#include "lambda/art_lambda_method.h"
-#include "lambda/box_table.h"
-#include "lambda/closure.h"
-#include "lambda/closure_builder-inl.h"
-#include "lambda/leaking_allocator.h"
-#include "lambda/shorty_field_type.h"
+#include "jit/jit.h"
 #include "mirror/class-inl.h"
+#include "mirror/dex_cache.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
@@ -67,16 +65,6 @@
 namespace art {
 namespace interpreter {
 
-// External references to both interpreter implementations.
-
-template<bool do_access_check, bool transaction_active>
-extern JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
-                                ShadowFrame& shadow_frame, JValue result_register);
-
-template<bool do_access_check, bool transaction_active>
-extern JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item,
-                              ShadowFrame& shadow_frame, JValue result_register);
-
 void ThrowNullPointerExceptionFromInterpreter()
     SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -89,7 +77,9 @@
   StackHandleScope<1> hs(self);
   Handle<Object> h_ref(hs.NewHandle(ref));
   h_ref->MonitorEnter(self);
-  frame->GetLockCountData().AddMonitor<kMonitorCounting>(self, h_ref.Get());
+  if (kMonitorCounting && frame->GetMethod()->MustCountLocks()) {
+    frame->GetLockCountData().AddMonitor(self, h_ref.Get());
+  }
 }
 
 template <bool kMonitorCounting>
@@ -101,7 +91,19 @@
   StackHandleScope<1> hs(self);
   Handle<Object> h_ref(hs.NewHandle(ref));
   h_ref->MonitorExit(self);
-  frame->GetLockCountData().RemoveMonitorOrThrow<kMonitorCounting>(self, h_ref.Get());
+  if (kMonitorCounting && frame->GetMethod()->MustCountLocks()) {
+    frame->GetLockCountData().RemoveMonitorOrThrow(self, h_ref.Get());
+  }
+}
+
+template <bool kMonitorCounting>
+static inline bool DoMonitorCheckOnExit(Thread* self, ShadowFrame* frame)
+    NO_THREAD_SAFETY_ANALYSIS
+    REQUIRES(!Roles::uninterruptible_) {
+  if (kMonitorCounting && frame->GetMethod()->MustCountLocks()) {
+    return frame->GetLockCountData().CheckAllMonitorsReleasedOrThrow(self);
+  }
+  return true;
 }
 
 void AbortTransactionF(Thread* self, const char* fmt, ...)
@@ -121,485 +123,7 @@
 bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result);
 
-// Invokes the given lambda closure. This is part of the invocation support and is used by
-// DoLambdaInvoke functions.
-// Returns true on success, otherwise throws an exception and returns false.
-template<bool is_range, bool do_assignability_check>
-bool DoLambdaCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
-                  const Instruction* inst, uint16_t inst_data, JValue* result);
-
-// Validates that the art method corresponding to a lambda method target
-// is semantically valid:
-//
-// Must be ACC_STATIC and ACC_LAMBDA. Must be a concrete managed implementation
-// (i.e. not native, not proxy, not abstract, ...).
-//
-// If the validation fails, return false and raise an exception.
-static inline bool IsValidLambdaTargetOrThrow(ArtMethod* called_method)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  bool success = false;
-
-  if (UNLIKELY(called_method == nullptr)) {
-    // The shadow frame should already be pushed, so we don't need to update it.
-  } else if (UNLIKELY(called_method->IsAbstract())) {
-    ThrowAbstractMethodError(called_method);
-    // TODO(iam): Also handle the case when the method is non-static, what error do we throw?
-    // TODO(iam): Also make sure that ACC_LAMBDA is set.
-  } else if (UNLIKELY(called_method->GetCodeItem() == nullptr)) {
-    // Method could be native, proxy method, etc. Lambda targets have to be concrete impls,
-    // so don't allow this.
-  } else {
-    success = true;
-  }
-
-  return success;
-}
-
-// Write out the 'Closure*' into vreg and vreg+1, as if it was a jlong.
-static inline void WriteLambdaClosureIntoVRegs(ShadowFrame& shadow_frame,
-                                               const lambda::Closure* lambda_closure,
-                                               uint32_t vreg) {
-  // Split the method into a lo and hi 32 bits so we can encode them into 2 virtual registers.
-  uint32_t closure_lo = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(lambda_closure));
-  uint32_t closure_hi = static_cast<uint32_t>(reinterpret_cast<uint64_t>(lambda_closure)
-                                                    >> BitSizeOf<uint32_t>());
-  // Use uint64_t instead of uintptr_t to allow shifting past the max on 32-bit.
-  static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
-
-  DCHECK_NE(closure_lo | closure_hi, 0u);
-
-  shadow_frame.SetVReg(vreg, closure_lo);
-  shadow_frame.SetVReg(vreg + 1, closure_hi);
-}
-
-// Handles create-lambda instructions.
-// Returns true on success, otherwise throws an exception and returns false.
-// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-//
-// The closure must be allocated big enough to hold the data, and should not be
-// pre-initialized. It is initialized with the actual captured variables as a side-effect,
-// although this should be unimportant to the caller since this function also handles storing it to
-// the ShadowFrame.
-//
-// As a work-in-progress implementation, this shoves the ArtMethod object corresponding
-// to the target dex method index into the target register vA and vA + 1.
-template<bool do_access_check>
-static inline bool DoCreateLambda(Thread* self,
-                                  const Instruction* inst,
-                                  /*inout*/ShadowFrame& shadow_frame,
-                                  /*inout*/lambda::ClosureBuilder* closure_builder,
-                                  /*inout*/lambda::Closure* uninitialized_closure) {
-  DCHECK(closure_builder != nullptr);
-  DCHECK(uninitialized_closure != nullptr);
-  DCHECK_ALIGNED(uninitialized_closure, alignof(lambda::Closure));
-
-  /*
-   * create-lambda is opcode 0x21c
-   * - vA is the target register where the closure will be stored into
-   *   (also stores into vA + 1)
-   * - vB is the method index which will be the target for a later invoke-lambda
-   */
-  const uint32_t method_idx = inst->VRegB_21c();
-  mirror::Object* receiver = nullptr;  // Always static. (see 'kStatic')
-  ArtMethod* sf_method = shadow_frame.GetMethod();
-  ArtMethod* const called_method = FindMethodFromCode<kStatic, do_access_check>(
-      method_idx, &receiver, sf_method, self);
-
-  uint32_t vreg_dest_closure = inst->VRegA_21c();
-
-  if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) {
-    CHECK(self->IsExceptionPending());
-    shadow_frame.SetVReg(vreg_dest_closure, 0u);
-    shadow_frame.SetVReg(vreg_dest_closure + 1, 0u);
-    return false;
-  }
-
-  lambda::ArtLambdaMethod* initialized_lambda_method;
-  // Initialize the ArtLambdaMethod with the right data.
-  {
-    lambda::ArtLambdaMethod* uninitialized_lambda_method =
-        reinterpret_cast<lambda::ArtLambdaMethod*>(
-            lambda::LeakingAllocator::AllocateMemory(self, sizeof(lambda::ArtLambdaMethod)));
-
-    std::string captured_variables_shorty = closure_builder->GetCapturedVariableShortyTypes();
-    std::string captured_variables_long_type_desc;
-
-    // Synthesize a long type descriptor from the short one.
-    for (char shorty : captured_variables_shorty) {
-      lambda::ShortyFieldType shorty_field_type(shorty);
-      if (shorty_field_type.IsObject()) {
-        // Not the true type, but good enough until we implement verifier support.
-        captured_variables_long_type_desc += "Ljava/lang/Object;";
-        UNIMPLEMENTED(FATAL) << "create-lambda with an object captured variable";
-      } else if (shorty_field_type.IsLambda()) {
-        // Not the true type, but good enough until we implement verifier support.
-        captured_variables_long_type_desc += "Ljava/lang/Runnable;";
-        UNIMPLEMENTED(FATAL) << "create-lambda with a lambda captured variable";
-      } else {
-        // The primitive types have the same length shorty or not, so this is always correct.
-        DCHECK(shorty_field_type.IsPrimitive());
-        captured_variables_long_type_desc += shorty_field_type;
-      }
-    }
-
-    // Copy strings to dynamically allocated storage. This leaks, but that's ok. Fix it later.
-    // TODO: Strings need to come from the DexFile, so they won't need their own allocations.
-    char* captured_variables_type_desc = lambda::LeakingAllocator::MakeFlexibleInstance<char>(
-        self,
-        captured_variables_long_type_desc.size() + 1);
-    strcpy(captured_variables_type_desc, captured_variables_long_type_desc.c_str());
-    char* captured_variables_shorty_copy = lambda::LeakingAllocator::MakeFlexibleInstance<char>(
-        self,
-        captured_variables_shorty.size() + 1);
-    strcpy(captured_variables_shorty_copy, captured_variables_shorty.c_str());
-
-    new (uninitialized_lambda_method) lambda::ArtLambdaMethod(called_method,
-                                                              captured_variables_type_desc,
-                                                              captured_variables_shorty_copy,
-                                                              true);  // innate lambda
-    initialized_lambda_method = uninitialized_lambda_method;
-  }
-
-  // Write all the closure captured variables and the closure header into the closure.
-  lambda::Closure* initialized_closure;
-  {
-    initialized_closure =
-        closure_builder->CreateInPlace(uninitialized_closure, initialized_lambda_method);
-  }
-
-  WriteLambdaClosureIntoVRegs(/*inout*/shadow_frame, initialized_closure, vreg_dest_closure);
-  return true;
-}
-
-// Reads out the 'ArtMethod*' stored inside of vreg and vreg+1
-//
-// Validates that the art method points to a valid lambda function, otherwise throws
-// an exception and returns null.
-// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-static inline lambda::Closure* ReadLambdaClosureFromVRegsOrThrow(ShadowFrame& shadow_frame,
-                                                                 uint32_t vreg)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  // Lambda closures take up a consecutive pair of 2 virtual registers.
-  // On 32-bit the high bits are always 0.
-  uint32_t vc_value_lo = shadow_frame.GetVReg(vreg);
-  uint32_t vc_value_hi = shadow_frame.GetVReg(vreg + 1);
-
-  uint64_t vc_value_ptr = (static_cast<uint64_t>(vc_value_hi) << BitSizeOf<uint32_t>())
-                           | vc_value_lo;
-
-  // Use uint64_t instead of uintptr_t to allow left-shifting past the max on 32-bit.
-  static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
-  lambda::Closure* const lambda_closure = reinterpret_cast<lambda::Closure*>(vc_value_ptr);
-  DCHECK_ALIGNED(lambda_closure, alignof(lambda::Closure));
-
-  // Guard against the user passing a null closure, which is odd but (sadly) semantically valid.
-  if (UNLIKELY(lambda_closure == nullptr)) {
-    ThrowNullPointerExceptionFromInterpreter();
-    return nullptr;
-  } else if (UNLIKELY(!IsValidLambdaTargetOrThrow(lambda_closure->GetTargetMethod()))) {
-    // Sanity check against data corruption.
-    return nullptr;
-  }
-
-  return lambda_closure;
-}
-
-// Forward declaration for lock annotations. See below for documentation.
-template <bool do_access_check>
-static inline const char* GetStringDataByDexStringIndexOrThrow(ShadowFrame& shadow_frame,
-                                                               uint32_t string_idx)
-    SHARED_REQUIRES(Locks::mutator_lock_);
-
-// Find the c-string data corresponding to a dex file's string index.
-// Otherwise, returns null if not found and throws a VerifyError.
-//
-// Note that with do_access_check=false, we never return null because the verifier
-// must guard against invalid string indices.
-// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-template <bool do_access_check>
-static inline const char* GetStringDataByDexStringIndexOrThrow(ShadowFrame& shadow_frame,
-                                                               uint32_t string_idx) {
-  ArtMethod* method = shadow_frame.GetMethod();
-  const DexFile* dex_file = method->GetDexFile();
-
-  mirror::Class* declaring_class = method->GetDeclaringClass();
-  if (!do_access_check) {
-    // MethodVerifier refuses methods with string_idx out of bounds.
-    DCHECK_LT(string_idx, declaring_class->GetDexCache()->NumStrings());
-  } else {
-    // Access checks enabled: perform string index bounds ourselves.
-    if (string_idx >= dex_file->GetHeader().string_ids_size_) {
-      ThrowVerifyError(declaring_class, "String index '%" PRIu32 "' out of bounds",
-                       string_idx);
-      return nullptr;
-    }
-  }
-
-  const char* type_string = dex_file->StringDataByIdx(string_idx);
-
-  if (UNLIKELY(type_string == nullptr)) {
-    CHECK_EQ(false, do_access_check)
-        << " verifier should've caught invalid string index " << string_idx;
-    CHECK_EQ(true, do_access_check)
-        << " string idx size check should've caught invalid string index " << string_idx;
-  }
-
-  return type_string;
-}
-
-// Handles capture-variable instructions.
-// Returns true on success, otherwise throws an exception and returns false.
-// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-template<bool do_access_check>
-static inline bool DoCaptureVariable(Thread* self,
-                                     const Instruction* inst,
-                                     /*inout*/ShadowFrame& shadow_frame,
-                                     /*inout*/lambda::ClosureBuilder* closure_builder) {
-  DCHECK(closure_builder != nullptr);
-  using lambda::ShortyFieldType;
-  /*
-   * capture-variable is opcode 0xf6, fmt 0x21c
-   * - vA is the source register of the variable that will be captured
-   * - vB is the string ID of the variable's type that will be captured
-   */
-  const uint32_t source_vreg = inst->VRegA_21c();
-  const uint32_t string_idx = inst->VRegB_21c();
-  // TODO: this should be a proper [type id] instead of a [string ID] pointing to a type.
-
-  const char* type_string = GetStringDataByDexStringIndexOrThrow<do_access_check>(shadow_frame,
-                                                                                  string_idx);
-  if (UNLIKELY(type_string == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-
-  char type_first_letter = type_string[0];
-  ShortyFieldType shorty_type;
-  if (do_access_check &&
-      UNLIKELY(!ShortyFieldType::MaybeCreate(type_first_letter, /*out*/&shorty_type))) {  // NOLINT: [whitespace/comma] [3]
-    ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
-                     "capture-variable vB must be a valid type");
-    return false;
-  } else {
-    // Already verified that the type is valid.
-    shorty_type = ShortyFieldType(type_first_letter);
-  }
-
-  const size_t captured_variable_count = closure_builder->GetCaptureCount();
-
-  // Note: types are specified explicitly so that the closure is packed tightly.
-  switch (shorty_type) {
-    case ShortyFieldType::kBoolean: {
-      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
-      closure_builder->CaptureVariablePrimitive<bool>(primitive_narrow_value);
-      break;
-    }
-    case ShortyFieldType::kByte: {
-      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
-      closure_builder->CaptureVariablePrimitive<int8_t>(primitive_narrow_value);
-      break;
-    }
-    case ShortyFieldType::kChar: {
-      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
-      closure_builder->CaptureVariablePrimitive<uint16_t>(primitive_narrow_value);
-      break;
-    }
-    case ShortyFieldType::kShort: {
-      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
-      closure_builder->CaptureVariablePrimitive<int16_t>(primitive_narrow_value);
-      break;
-    }
-    case ShortyFieldType::kInt: {
-      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
-      closure_builder->CaptureVariablePrimitive<int32_t>(primitive_narrow_value);
-      break;
-    }
-    case ShortyFieldType::kDouble: {
-      closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegDouble(source_vreg));
-      break;
-    }
-    case ShortyFieldType::kFloat: {
-      closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegFloat(source_vreg));
-      break;
-    }
-    case ShortyFieldType::kLambda: {
-      UNIMPLEMENTED(FATAL) << " capture-variable with type kLambda";
-      // TODO: Capturing lambdas recursively will be done at a later time.
-      UNREACHABLE();
-    }
-    case ShortyFieldType::kLong: {
-      closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegLong(source_vreg));
-      break;
-    }
-    case ShortyFieldType::kObject: {
-      closure_builder->CaptureVariableObject(shadow_frame.GetVRegReference(source_vreg));
-      UNIMPLEMENTED(FATAL) << " capture-variable with type kObject";
-      // TODO: finish implementing this. disabled for now since we can't track lambda refs for GC.
-      UNREACHABLE();
-    }
-
-    default:
-      LOG(FATAL) << "Invalid shorty type value " << shorty_type;
-      UNREACHABLE();
-  }
-
-  DCHECK_EQ(captured_variable_count + 1, closure_builder->GetCaptureCount());
-
-  return true;
-}
-
-// Handles capture-variable instructions.
-// Returns true on success, otherwise throws an exception and returns false.
-// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-template<bool do_access_check>
-static inline bool DoLiberateVariable(Thread* self,
-                                     const Instruction* inst,
-                                     size_t captured_variable_index,
-                                     /*inout*/ShadowFrame& shadow_frame) {
-  using lambda::ShortyFieldType;
-  /*
-   * liberate-variable is opcode 0xf7, fmt 0x22c
-   * - vA is the destination register
-   * - vB is the register with the lambda closure in it
-   * - vC is the string ID which needs to be a valid field type descriptor
-   */
-
-  const uint32_t dest_vreg = inst->VRegA_22c();
-  const uint32_t closure_vreg = inst->VRegB_22c();
-  const uint32_t string_idx = inst->VRegC_22c();
-  // TODO: this should be a proper [type id] instead of a [string ID] pointing to a type.
-
-
-  // Synthesize a long type descriptor from a shorty type descriptor list.
-  // TODO: Fix the dex encoding to contain the long and short type descriptors.
-  const char* type_string = GetStringDataByDexStringIndexOrThrow<do_access_check>(shadow_frame,
-                                                                                  string_idx);
-  if (UNLIKELY(do_access_check && type_string == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    shadow_frame.SetVReg(dest_vreg, 0);
-    return false;
-  }
-
-  char type_first_letter = type_string[0];
-  ShortyFieldType shorty_type;
-  if (do_access_check &&
-      UNLIKELY(!ShortyFieldType::MaybeCreate(type_first_letter, /*out*/&shorty_type))) {  // NOLINT: [whitespace/comma] [3]
-    ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
-                     "liberate-variable vC must be a valid type");
-    shadow_frame.SetVReg(dest_vreg, 0);
-    return false;
-  } else {
-    // Already verified that the type is valid.
-    shorty_type = ShortyFieldType(type_first_letter);
-  }
-
-  // Check for closure being null *after* the type check.
-  // This way we can access the type info in case we fail later, to know how many vregs to clear.
-  const lambda::Closure* lambda_closure =
-      ReadLambdaClosureFromVRegsOrThrow(/*inout*/shadow_frame, closure_vreg);
-
-  // Failed lambda target runtime check, an exception was raised.
-  if (UNLIKELY(lambda_closure == nullptr)) {
-    CHECK(self->IsExceptionPending());
-
-    // Clear the destination vreg(s) to be safe.
-    shadow_frame.SetVReg(dest_vreg, 0);
-    if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) {
-      shadow_frame.SetVReg(dest_vreg + 1, 0);
-    }
-    return false;
-  }
-
-  if (do_access_check &&
-      UNLIKELY(captured_variable_index >= lambda_closure->GetNumberOfCapturedVariables())) {
-    ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
-                     "liberate-variable captured variable index %zu out of bounds",
-                     lambda_closure->GetNumberOfCapturedVariables());
-    // Clear the destination vreg(s) to be safe.
-    shadow_frame.SetVReg(dest_vreg, 0);
-    if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) {
-      shadow_frame.SetVReg(dest_vreg + 1, 0);
-    }
-    return false;
-  }
-
-  // Verify that the runtime type of the captured-variable matches the requested dex type.
-  if (do_access_check) {
-    ShortyFieldType actual_type = lambda_closure->GetCapturedShortyType(captured_variable_index);
-    if (actual_type != shorty_type) {
-      ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
-                     "cannot liberate-variable of runtime type '%c' to dex type '%c'",
-                     static_cast<char>(actual_type),
-                     static_cast<char>(shorty_type));
-
-      shadow_frame.SetVReg(dest_vreg, 0);
-      if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) {
-        shadow_frame.SetVReg(dest_vreg + 1, 0);
-      }
-      return false;
-    }
-
-    if (actual_type.IsLambda() || actual_type.IsObject()) {
-      UNIMPLEMENTED(FATAL) << "liberate-variable type checks needs to "
-                           << "parse full type descriptor for objects and lambdas";
-    }
-  }
-
-  // Unpack the captured variable from the closure into the correct type, then save it to the vreg.
-  if (shorty_type.IsPrimitiveNarrow()) {
-    uint32_t primitive_narrow_value =
-        lambda_closure->GetCapturedPrimitiveNarrow(captured_variable_index);
-    shadow_frame.SetVReg(dest_vreg, primitive_narrow_value);
-  } else if (shorty_type.IsPrimitiveWide()) {
-      uint64_t primitive_wide_value =
-          lambda_closure->GetCapturedPrimitiveWide(captured_variable_index);
-      shadow_frame.SetVRegLong(dest_vreg, static_cast<int64_t>(primitive_wide_value));
-  } else if (shorty_type.IsObject()) {
-    mirror::Object* unpacked_object =
-        lambda_closure->GetCapturedObject(captured_variable_index);
-    shadow_frame.SetVRegReference(dest_vreg, unpacked_object);
-
-    UNIMPLEMENTED(FATAL) << "liberate-variable cannot unpack objects yet";
-  } else if (shorty_type.IsLambda()) {
-    UNIMPLEMENTED(FATAL) << "liberate-variable cannot unpack lambdas yet";
-  } else {
-    LOG(FATAL) << "unreachable";
-    UNREACHABLE();
-  }
-
-  return true;
-}
-
-template<bool do_access_check>
-static inline bool DoInvokeLambda(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
-                                  uint16_t inst_data, JValue* result) {
-  /*
-   * invoke-lambda is opcode 0x25
-   *
-   * - vC is the closure register (both vC and vC + 1 will be used to store the closure).
-   * - vB is the number of additional registers up to |{vD,vE,vF,vG}| (4)
-   * - the rest of the registers are always var-args
-   *
-   * - reading var-args for 0x25 gets us vD,vE,vF,vG (but not vB)
-   */
-  uint32_t vreg_closure = inst->VRegC_25x();
-  const lambda::Closure* lambda_closure =
-      ReadLambdaClosureFromVRegsOrThrow(shadow_frame, vreg_closure);
-
-  // Failed lambda target runtime check, an exception was raised.
-  if (UNLIKELY(lambda_closure == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    result->SetJ(0);
-    return false;
-  }
-
-  ArtMethod* const called_method = lambda_closure->GetTargetMethod();
-  // Invoke a non-range lambda
-  return DoLambdaCall<false, do_access_check>(called_method, self, shadow_frame, inst, inst_data,
-                                              result);
-}
-
-// Handles invoke-XXX/range instructions (other than invoke-lambda[-range]).
+// Handles invoke-XXX/range instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<InvokeType type, bool is_range, bool do_access_check>
 static inline bool DoInvoke(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
@@ -615,11 +139,20 @@
     CHECK(self->IsExceptionPending());
     result->SetJ(0);
     return false;
-  } else if (UNLIKELY(called_method->IsAbstract())) {
-    ThrowAbstractMethodError(called_method);
+  } else if (UNLIKELY(!called_method->IsInvokable())) {
+    called_method->ThrowInvocationTimeError();
     result->SetJ(0);
     return false;
   } else {
+    jit::Jit* jit = Runtime::Current()->GetJit();
+    if (jit != nullptr) {
+      if (type == kVirtual || type == kInterface) {
+        jit->InvokeVirtualOrInterface(
+            self, receiver, sf_method, shadow_frame.GetDexPC(), called_method);
+      }
+      jit->AddSamples(self, sf_method, 1, /*with_backedges*/false);
+    }
+    // TODO: Remove the InvokeVirtualOrInterface instrumentation, as it was only used by the JIT.
     if (type == kVirtual || type == kInterface) {
       instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
       if (UNLIKELY(instrumentation->HasInvokeVirtualOrInterfaceListeners())) {
@@ -647,19 +180,26 @@
     return false;
   }
   const uint32_t vtable_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
-  CHECK(receiver->GetClass()->ShouldHaveEmbeddedImtAndVTable());
+  CHECK(receiver->GetClass()->ShouldHaveEmbeddedVTable());
   ArtMethod* const called_method = receiver->GetClass()->GetEmbeddedVTableEntry(
-      vtable_idx, sizeof(void*));
+      vtable_idx, kRuntimePointerSize);
   if (UNLIKELY(called_method == nullptr)) {
     CHECK(self->IsExceptionPending());
     result->SetJ(0);
     return false;
-  } else if (UNLIKELY(called_method->IsAbstract())) {
-    ThrowAbstractMethodError(called_method);
+  } else if (UNLIKELY(!called_method->IsInvokable())) {
+    called_method->ThrowInvocationTimeError();
     result->SetJ(0);
     return false;
   } else {
+    jit::Jit* jit = Runtime::Current()->GetJit();
+    if (jit != nullptr) {
+      jit->InvokeVirtualOrInterface(
+          self, receiver, shadow_frame.GetMethod(), shadow_frame.GetDexPC(), called_method);
+      jit->AddSamples(self, shadow_frame.GetMethod(), 1, /*with_backedges*/false);
+    }
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+    // TODO: Remove the InvokeVirtualOrInterface instrumentation, as it was only used by the JIT.
     if (UNLIKELY(instrumentation->HasInvokeVirtualOrInterfaceListeners())) {
       instrumentation->InvokeVirtualOrInterface(
           self, receiver, shadow_frame.GetMethod(), shadow_frame.GetDexPC(), called_method);
@@ -712,15 +252,20 @@
   ArtMethod* method = shadow_frame.GetMethod();
   mirror::Class* declaring_class = method->GetDeclaringClass();
   // MethodVerifier refuses methods with string_idx out of bounds.
-  DCHECK_LT(string_idx, declaring_class->GetDexCache()->NumStrings());
-  mirror::String* s = declaring_class->GetDexCacheStrings()[string_idx].Read();
-  if (UNLIKELY(s == nullptr)) {
+  DCHECK_LT(string_idx % mirror::DexCache::kDexCacheStringCacheSize,
+            declaring_class->GetDexFile().NumStringIds());
+  mirror::String* string_ptr =
+      mirror::StringDexCachePair::LookupString(declaring_class->GetDexCacheStrings(),
+                                               string_idx,
+                                               mirror::DexCache::kDexCacheStringCacheSize).Read();
+  if (UNLIKELY(string_ptr == nullptr)) {
     StackHandleScope<1> hs(self);
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
-    s = Runtime::Current()->GetClassLinker()->ResolveString(*method->GetDexFile(), string_idx,
-                                                            dex_cache);
+    string_ptr = Runtime::Current()->GetClassLinker()->ResolveString(*method->GetDexFile(),
+                                                                     string_idx,
+                                                                     dex_cache);
   }
-  return s;
+  return string_ptr;
 }
 
 // Handles div-int, div-int/2addr, div-int/li16 and div-int/lit8 instructions.
@@ -864,74 +409,6 @@
   return 3;
 }
 
-template <bool _do_check>
-static inline bool DoBoxLambda(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
-                               uint16_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) {
-  /*
-   * box-lambda vA, vB /// opcode 0xf8, format 22x
-   * - vA is the target register where the Object representation of the closure will be stored into
-   * - vB is a closure (made by create-lambda)
-   *   (also reads vB + 1)
-   */
-  uint32_t vreg_target_object = inst->VRegA_22x(inst_data);
-  uint32_t vreg_source_closure = inst->VRegB_22x();
-
-  lambda::Closure* lambda_closure = ReadLambdaClosureFromVRegsOrThrow(shadow_frame,
-                                                                      vreg_source_closure);
-
-  // Failed lambda target runtime check, an exception was raised.
-  if (UNLIKELY(lambda_closure == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-
-  mirror::Object* closure_as_object =
-      Runtime::Current()->GetLambdaBoxTable()->BoxLambda(lambda_closure);
-
-  // Failed to box the lambda, an exception was raised.
-  if (UNLIKELY(closure_as_object == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-
-  shadow_frame.SetVRegReference(vreg_target_object, closure_as_object);
-  return true;
-}
-
-template <bool _do_check> SHARED_REQUIRES(Locks::mutator_lock_)
-static inline bool DoUnboxLambda(Thread* self,
-                                 ShadowFrame& shadow_frame,
-                                 const Instruction* inst,
-                                 uint16_t inst_data) {
-  /*
-   * unbox-lambda vA, vB, [type id] /// opcode 0xf9, format 22c
-   * - vA is the target register where the closure will be written into
-   *   (also writes vA + 1)
-   * - vB is the Object representation of the closure (made by box-lambda)
-   */
-  uint32_t vreg_target_closure = inst->VRegA_22c(inst_data);
-  uint32_t vreg_source_object = inst->VRegB_22c();
-
-  // Raise NullPointerException if object is null
-  mirror::Object* boxed_closure_object = shadow_frame.GetVRegReference(vreg_source_object);
-  if (UNLIKELY(boxed_closure_object == nullptr)) {
-    ThrowNullPointerExceptionFromInterpreter();
-    return false;
-  }
-
-  lambda::Closure* unboxed_closure = nullptr;
-  // Raise an exception if unboxing fails.
-  if (!Runtime::Current()->GetLambdaBoxTable()->UnboxLambda(boxed_closure_object,
-                                                            /*out*/&unboxed_closure)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-
-  DCHECK(unboxed_closure != nullptr);
-  WriteLambdaClosureIntoVRegs(/*inout*/shadow_frame, unboxed_closure, vreg_target_closure);
-  return true;
-}
-
 uint32_t FindNextInstructionFollowingException(Thread* self, ShadowFrame& shadow_frame,
     uint32_t dex_pc, const instrumentation::Instrumentation* instrumentation)
         SHARED_REQUIRES(Locks::mutator_lock_);
@@ -940,11 +417,13 @@
   __attribute__((cold))
   SHARED_REQUIRES(Locks::mutator_lock_);
 
+// Set true if you want TraceExecution invocation before each bytecode execution.
+constexpr bool kTraceExecutionEnabled = false;
+
 static inline void TraceExecution(const ShadowFrame& shadow_frame, const Instruction* inst,
                                   const uint32_t dex_pc)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  constexpr bool kTracing = false;
-  if (kTracing) {
+  if (kTraceExecutionEnabled) {
 #define TRACE_LOG std::cerr
     std::ostringstream oss;
     oss << PrettyMethod(shadow_frame.GetMethod())
@@ -956,7 +435,7 @@
       oss << StringPrintf(" vreg%u=0x%08X", i, raw_value);
       if (ref_value != nullptr) {
         if (ref_value->GetClass()->IsStringClass() &&
-            ref_value->AsString()->GetValue() != nullptr) {
+            !ref_value->AsString()->IsValueNull()) {
           oss << "/java.lang.String \"" << ref_value->AsString()->ToModifiedUtf8() << "\"";
         } else {
           oss << "/" << PrettyTypeOf(ref_value);
@@ -972,6 +451,18 @@
   return branch_offset <= 0;
 }
 
+void ArtInterpreterToCompiledCodeBridge(Thread* self,
+                                        ArtMethod* caller,
+                                        const DexFile::CodeItem* code_item,
+                                        ShadowFrame* shadow_frame,
+                                        JValue* result);
+
+// Set string value created from StringFactory.newStringFromXXX() into all aliases of
+// StringFactory.newEmptyString().
+void SetStringInitValueToAllAliases(ShadowFrame* shadow_frame,
+                                    uint16_t this_obj_vreg,
+                                    JValue result);
+
 // Explicitly instantiate all DoInvoke functions.
 #define EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, _is_range, _do_check)                      \
   template SHARED_REQUIRES(Locks::mutator_lock_)                                     \
@@ -1004,72 +495,6 @@
 EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(true);   // invoke-virtual-quick-range.
 #undef EXPLICIT_INSTANTIATION_DO_INVOKE_VIRTUAL_QUICK
 
-// Explicitly instantiate all DoCreateLambda functions.
-#define EXPLICIT_DO_CREATE_LAMBDA_DECL(_do_check)                                                 \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                                    \
-bool DoCreateLambda<_do_check>(Thread* self,                                                      \
-                               const Instruction* inst,                                           \
-                               /*inout*/ShadowFrame& shadow_frame,                                \
-                               /*inout*/lambda::ClosureBuilder* closure_builder,                  \
-                               /*inout*/lambda::Closure* uninitialized_closure);
-
-EXPLICIT_DO_CREATE_LAMBDA_DECL(false);  // create-lambda
-EXPLICIT_DO_CREATE_LAMBDA_DECL(true);   // create-lambda
-#undef EXPLICIT_DO_CREATE_LAMBDA_DECL
-
-// Explicitly instantiate all DoInvokeLambda functions.
-#define EXPLICIT_DO_INVOKE_LAMBDA_DECL(_do_check)                                    \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                 \
-bool DoInvokeLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, \
-                               uint16_t inst_data, JValue* result);
-
-EXPLICIT_DO_INVOKE_LAMBDA_DECL(false);  // invoke-lambda
-EXPLICIT_DO_INVOKE_LAMBDA_DECL(true);   // invoke-lambda
-#undef EXPLICIT_DO_INVOKE_LAMBDA_DECL
-
-// Explicitly instantiate all DoBoxLambda functions.
-#define EXPLICIT_DO_BOX_LAMBDA_DECL(_do_check)                                                \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                          \
-bool DoBoxLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, \
-                            uint16_t inst_data);
-
-EXPLICIT_DO_BOX_LAMBDA_DECL(false);  // box-lambda
-EXPLICIT_DO_BOX_LAMBDA_DECL(true);   // box-lambda
-#undef EXPLICIT_DO_BOX_LAMBDA_DECL
-
-// Explicitly instantiate all DoUnBoxLambda functions.
-#define EXPLICIT_DO_UNBOX_LAMBDA_DECL(_do_check)                                                \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                            \
-bool DoUnboxLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, \
-                              uint16_t inst_data);
-
-EXPLICIT_DO_UNBOX_LAMBDA_DECL(false);  // unbox-lambda
-EXPLICIT_DO_UNBOX_LAMBDA_DECL(true);   // unbox-lambda
-#undef EXPLICIT_DO_BOX_LAMBDA_DECL
-
-// Explicitly instantiate all DoCaptureVariable functions.
-#define EXPLICIT_DO_CAPTURE_VARIABLE_DECL(_do_check)                                    \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                          \
-bool DoCaptureVariable<_do_check>(Thread* self,                                         \
-                                  const Instruction* inst,                              \
-                                  ShadowFrame& shadow_frame,                            \
-                                  lambda::ClosureBuilder* closure_builder);
-
-EXPLICIT_DO_CAPTURE_VARIABLE_DECL(false);  // capture-variable
-EXPLICIT_DO_CAPTURE_VARIABLE_DECL(true);   // capture-variable
-#undef EXPLICIT_DO_CREATE_LAMBDA_DECL
-
-// Explicitly instantiate all DoLiberateVariable functions.
-#define EXPLICIT_DO_LIBERATE_VARIABLE_DECL(_do_check)                                   \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                          \
-bool DoLiberateVariable<_do_check>(Thread* self,                                        \
-                                   const Instruction* inst,                             \
-                                   size_t captured_variable_index,                      \
-                                   ShadowFrame& shadow_frame);                          \
-
-EXPLICIT_DO_LIBERATE_VARIABLE_DECL(false);  // liberate-variable
-EXPLICIT_DO_LIBERATE_VARIABLE_DECL(true);   // liberate-variable
-#undef EXPLICIT_DO_LIBERATE_LAMBDA_DECL
 }  // namespace interpreter
 }  // namespace art
 
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 9766299..37dd63b 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -14,20 +14,29 @@
  * limitations under the License.
  */
 
+#include "interpreter_goto_table_impl.h"
+
+// Common includes
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "stack.h"
+#include "thread.h"
+
+// Clang compiles the GOTO interpreter very slowly. So we skip it. These are the implementation
+// details only necessary when compiling it.
 #if !defined(__clang__)
-// Clang 3.4 fails to build the goto interpreter implementation.
-
-
-#include "base/stl_util.h"  // MakeUnique
 #include "experimental_flags.h"
 #include "interpreter_common.h"
+#include "jit/jit.h"
 #include "safe_math.h"
-
-#include <memory>  // std::unique_ptr
+#endif
 
 namespace art {
 namespace interpreter {
 
+#if !defined(__clang__)
+
 // In the following macros, we expect the following local variables exist:
 // - "self": the current Thread*.
 // - "inst" : the current Instruction*.
@@ -63,10 +72,22 @@
   currentHandlersTable = handlersTable[ \
       Runtime::Current()->GetInstrumentation()->GetInterpreterHandlerTable()]
 
-#define BACKWARD_BRANCH_INSTRUMENTATION(offset) \
-  do { \
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); \
-    instrumentation->BackwardBranch(self, shadow_frame.GetMethod(), offset); \
+#define BRANCH_INSTRUMENTATION(offset)                                                          \
+  do {                                                                                          \
+    if (UNLIKELY(instrumentation->HasBranchListeners())) {                                      \
+      instrumentation->Branch(self, method, dex_pc, offset);                                    \
+    }                                                                                           \
+    JValue result;                                                                              \
+    if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {           \
+      return result;                                                                            \
+    }                                                                                           \
+  } while (false)
+
+#define HOTNESS_UPDATE()                                                                       \
+  do {                                                                                         \
+    if (jit != nullptr) {                                                                      \
+      jit->AddSamples(self, method, 1, /*with_backedges*/ true);                               \
+    }                                                                                          \
   } while (false)
 
 #define UNREACHABLE_CODE_CHECK()                \
@@ -80,19 +101,8 @@
 #define HANDLE_INSTRUCTION_START(opcode) op_##opcode:  // NOLINT(whitespace/labels)
 #define HANDLE_INSTRUCTION_END() UNREACHABLE_CODE_CHECK()
 
-// Use with instructions labeled with kExperimental flag:
-#define HANDLE_EXPERIMENTAL_INSTRUCTION_START(opcode)                                             \
-  HANDLE_INSTRUCTION_START(opcode);                                                               \
-  DCHECK(inst->IsExperimental());                                                                 \
-  if (Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas)) {
-#define HANDLE_EXPERIMENTAL_INSTRUCTION_END()                                                     \
-  } else {                                                                                        \
-      UnexpectedOpcode(inst, shadow_frame);                                                       \
-  } HANDLE_INSTRUCTION_END();
-
 #define HANDLE_MONITOR_CHECKS()                                                                   \
-  if (!shadow_frame.GetLockCountData().                                                           \
-          CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self)) {                        \
+  if (!DoMonitorCheckOnExit<do_assignability_check>(self, &shadow_frame)) {                       \
     HANDLE_PENDING_EXCEPTION();                                                                   \
   }
 
@@ -151,14 +161,14 @@
   static const void* const handlersTable[instrumentation::kNumHandlerTables][kNumPackedOpcodes] = {
     {
     // Main handler table.
-#define INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v) &&op_##code,
+#define INSTRUCTION_HANDLER(o, code, n, f, i, a, v) &&op_##code,
 #include "dex_instruction_list.h"
       DEX_INSTRUCTION_LIST(INSTRUCTION_HANDLER)
 #undef DEX_INSTRUCTION_LIST
 #undef INSTRUCTION_HANDLER
     }, {
     // Alternative handler table.
-#define INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v) &&alt_op_##code,
+#define INSTRUCTION_HANDLER(o, code, n, f, i, a, v) &&alt_op_##code,
 #include "dex_instruction_list.h"
       DEX_INSTRUCTION_LIST(INSTRUCTION_HANDLER)
 #undef DEX_INSTRUCTION_LIST
@@ -178,19 +188,9 @@
   uint16_t inst_data;
   const void* const* currentHandlersTable;
   UPDATE_HANDLER_TABLE();
-  if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing.
-    if (kIsDebugBuild) {
-      self->AssertNoPendingException();
-    }
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-    if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
-      instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                        shadow_frame.GetMethod(), 0);
-    }
-  }
-
-  std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder;
-  size_t lambda_captured_variable_index = 0;
+  const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
+  ArtMethod* method = shadow_frame.GetMethod();
+  jit::Jit* jit = Runtime::Current()->GetJit();
 
   // Jump to first instruction.
   ADVANCE(0);
@@ -282,7 +282,6 @@
     JValue result;
     self->AllowThreadSuspension();
     HANDLE_MONITOR_CHECKS();
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
@@ -297,7 +296,6 @@
     JValue result;
     self->AllowThreadSuspension();
     HANDLE_MONITOR_CHECKS();
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
@@ -313,7 +311,6 @@
     result.SetI(shadow_frame.GetVReg(inst->VRegA_11x(inst_data)));
     self->AllowThreadSuspension();
     HANDLE_MONITOR_CHECKS();
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
@@ -328,7 +325,6 @@
     result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x(inst_data)));
     self->AllowThreadSuspension();
     HANDLE_MONITOR_CHECKS();
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
@@ -364,7 +360,6 @@
       }
     }
     result.SetL(obj_result);
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
@@ -546,8 +541,7 @@
     if (LIKELY(c != nullptr)) {
       if (UNLIKELY(c->IsStringClass())) {
         gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
-        mirror::SetStringCountVisitor visitor(0);
-        obj = String::Alloc<true>(self, 0, allocator_type, visitor);
+        obj = mirror::String::AllocEmptyString<true>(self, allocator_type);
       } else {
         obj = AllocObjectFromCode<do_access_check, true>(
             inst->VRegB_21c(), shadow_frame.GetMethod(), self,
@@ -633,8 +627,9 @@
 
   HANDLE_INSTRUCTION_START(GOTO) {
     int8_t offset = inst->VRegA_10t(inst_data);
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
+      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -646,8 +641,9 @@
 
   HANDLE_INSTRUCTION_START(GOTO_16) {
     int16_t offset = inst->VRegA_20t();
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
+      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -659,8 +655,9 @@
 
   HANDLE_INSTRUCTION_START(GOTO_32) {
     int32_t offset = inst->VRegA_30t();
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
+      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -672,8 +669,9 @@
 
   HANDLE_INSTRUCTION_START(PACKED_SWITCH) {
     int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
+      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -685,8 +683,9 @@
 
   HANDLE_INSTRUCTION_START(SPARSE_SWITCH) {
     int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
+      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -788,8 +787,9 @@
   HANDLE_INSTRUCTION_START(IF_EQ) {
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) == shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -797,6 +797,7 @@
       }
       ADVANCE(offset);
     } else {
+      BRANCH_INSTRUMENTATION(2);
       ADVANCE(2);
     }
   }
@@ -806,8 +807,9 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) !=
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -815,6 +817,7 @@
       }
       ADVANCE(offset);
     } else {
+      BRANCH_INSTRUMENTATION(2);
       ADVANCE(2);
     }
   }
@@ -824,8 +827,9 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -833,6 +837,7 @@
       }
       ADVANCE(offset);
     } else {
+      BRANCH_INSTRUMENTATION(2);
       ADVANCE(2);
     }
   }
@@ -842,8 +847,9 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >=
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -851,6 +857,7 @@
       }
       ADVANCE(offset);
     } else {
+      BRANCH_INSTRUMENTATION(2);
       ADVANCE(2);
     }
   }
@@ -860,8 +867,9 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >
     shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -869,6 +877,7 @@
       }
       ADVANCE(offset);
     } else {
+      BRANCH_INSTRUMENTATION(2);
       ADVANCE(2);
     }
   }
@@ -878,8 +887,9 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <=
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -887,6 +897,7 @@
       }
       ADVANCE(offset);
     } else {
+      BRANCH_INSTRUMENTATION(2);
       ADVANCE(2);
     }
   }
@@ -895,8 +906,9 @@
   HANDLE_INSTRUCTION_START(IF_EQZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -904,6 +916,7 @@
       }
       ADVANCE(offset);
     } else {
+      BRANCH_INSTRUMENTATION(2);
       ADVANCE(2);
     }
   }
@@ -912,8 +925,9 @@
   HANDLE_INSTRUCTION_START(IF_NEZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -921,6 +935,7 @@
       }
       ADVANCE(offset);
     } else {
+      BRANCH_INSTRUMENTATION(2);
       ADVANCE(2);
     }
   }
@@ -929,8 +944,9 @@
   HANDLE_INSTRUCTION_START(IF_LTZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -938,6 +954,7 @@
       }
       ADVANCE(offset);
     } else {
+      BRANCH_INSTRUMENTATION(2);
       ADVANCE(2);
     }
   }
@@ -946,8 +963,9 @@
   HANDLE_INSTRUCTION_START(IF_GEZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -955,6 +973,7 @@
       }
       ADVANCE(offset);
     } else {
+      BRANCH_INSTRUMENTATION(2);
       ADVANCE(2);
     }
   }
@@ -963,8 +982,9 @@
   HANDLE_INSTRUCTION_START(IF_GTZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -972,6 +992,7 @@
       }
       ADVANCE(offset);
     } else {
+      BRANCH_INSTRUMENTATION(2);
       ADVANCE(2);
     }
   }
@@ -980,8 +1001,9 @@
   HANDLE_INSTRUCTION_START(IF_LEZ)  {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -989,6 +1011,7 @@
       }
       ADVANCE(offset);
     } else {
+      BRANCH_INSTRUMENTATION(2);
       ADVANCE(2);
     }
   }
@@ -1640,14 +1663,6 @@
   }
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(INVOKE_LAMBDA) {
-    bool success = DoInvokeLambda<do_access_check>(self, shadow_frame, inst, inst_data,
-                                                   &result_register);
-    UPDATE_HANDLER_TABLE();
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
   HANDLE_INSTRUCTION_START(NEG_INT)
     shadow_frame.SetVReg(
         inst->VRegA_12x(inst_data), -shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
@@ -2429,62 +2444,6 @@
     ADVANCE(2);
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(CREATE_LAMBDA) {
-    if (lambda_closure_builder == nullptr) {
-      // DoCreateLambda always needs a ClosureBuilder, even if it has 0 captured variables.
-      lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
-    }
-
-    // TODO: these allocations should not leak, and the lambda method should not be local.
-    lambda::Closure* lambda_closure =
-        reinterpret_cast<lambda::Closure*>(alloca(lambda_closure_builder->GetSize()));
-    bool success = DoCreateLambda<do_access_check>(self,
-                                                   inst,
-                                                   /*inout*/shadow_frame,
-                                                   /*inout*/lambda_closure_builder.get(),
-                                                   /*inout*/lambda_closure);
-    lambda_closure_builder.reset(nullptr);  // reset state of variables captured
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(BOX_LAMBDA) {
-    bool success = DoBoxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(UNBOX_LAMBDA) {
-    bool success = DoUnboxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(CAPTURE_VARIABLE) {
-    if (lambda_closure_builder == nullptr) {
-      lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
-    }
-
-    bool success = DoCaptureVariable<do_access_check>(self,
-                                                      inst,
-                                                      /*inout*/shadow_frame,
-                                                      /*inout*/lambda_closure_builder.get());
-
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(LIBERATE_VARIABLE) {
-    bool success = DoLiberateVariable<do_access_check>(self,
-                                                           inst,
-                                                           lambda_captured_variable_index,
-                                                           /*inout*/shadow_frame);
-    // Temporarily only allow sequences of 'liberate-variable, liberate-variable, ...'
-    lambda_captured_variable_index++;
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
   HANDLE_INSTRUCTION_START(UNUSED_3E)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
@@ -2517,10 +2476,34 @@
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
+  HANDLE_INSTRUCTION_START(UNUSED_F3)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
   HANDLE_INSTRUCTION_START(UNUSED_F4)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
+  HANDLE_INSTRUCTION_START(UNUSED_F5)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F6)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F7)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F8)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F9)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
   HANDLE_INSTRUCTION_START(UNUSED_FA)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
@@ -2551,12 +2534,11 @@
       self->CheckSuspend();
       UPDATE_HANDLER_TABLE();
     }
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     uint32_t found_dex_pc = FindNextInstructionFollowingException(self, shadow_frame, dex_pc,
                                                                   instrumentation);
     if (found_dex_pc == DexFile::kDexNoIndex) {
       // Structured locking is to be enforced for abnormal termination, too.
-      shadow_frame.GetLockCountData().CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self);
+      DoMonitorCheckOnExit<do_assignability_check>(self, &shadow_frame);
       return JValue(); /* Handled in caller. */
     } else {
       int32_t displacement = static_cast<int32_t>(found_dex_pc) - static_cast<int32_t>(dex_pc);
@@ -2570,10 +2552,8 @@
 // Note: we do not use the kReturn instruction flag here (to test the instruction is a return). The
 // compiler seems to not evaluate "(Instruction::FlagsOf(Instruction::code) & kReturn) != 0" to
 // a constant condition that would remove the "if" statement so the test is free.
-#define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v)                        \
+#define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, i, a, v)                        \
   alt_op_##code: {                                                                            \
-    Runtime* const runtime = Runtime::Current();                                              \
-    const instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();  \
     if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                     \
       Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);                 \
       instrumentation->DexPcMovedEvent(self, this_object, shadow_frame.GetMethod(), dex_pc);  \
@@ -2588,20 +2568,40 @@
 }  // NOLINT(readability/fn_size)
 
 // Explicit definitions of ExecuteGotoImpl.
-template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR
+template HOT_ATTR
 JValue ExecuteGotoImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
                                     ShadowFrame& shadow_frame, JValue result_register);
-template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR
+template HOT_ATTR
 JValue ExecuteGotoImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
                                      ShadowFrame& shadow_frame, JValue result_register);
-template SHARED_REQUIRES(Locks::mutator_lock_)
+template
 JValue ExecuteGotoImpl<true, true>(Thread* self, const DexFile::CodeItem* code_item,
                                    ShadowFrame& shadow_frame, JValue result_register);
-template SHARED_REQUIRES(Locks::mutator_lock_)
+template
 JValue ExecuteGotoImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
                                     ShadowFrame& shadow_frame, JValue result_register);
 
+#else
+
+template<bool do_access_check, bool transaction_active>
+JValue ExecuteGotoImpl(Thread*, const DexFile::CodeItem*, ShadowFrame&, JValue) {
+  LOG(FATAL) << "UNREACHABLE";
+  UNREACHABLE();
+}
+// Explicit definitions of ExecuteGotoImpl.
+template<>
+JValue ExecuteGotoImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
+                                    ShadowFrame& shadow_frame, JValue result_register);
+template<>
+JValue ExecuteGotoImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
+                                     ShadowFrame& shadow_frame, JValue result_register);
+template<>
+JValue ExecuteGotoImpl<true, true>(Thread* self,  const DexFile::CodeItem* code_item,
+                                   ShadowFrame& shadow_frame, JValue result_register);
+template<>
+JValue ExecuteGotoImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
+                                    ShadowFrame& shadow_frame, JValue result_register);
+#endif
+
 }  // namespace interpreter
 }  // namespace art
-
-#endif
diff --git a/runtime/interpreter/interpreter_goto_table_impl.h b/runtime/interpreter/interpreter_goto_table_impl.h
new file mode 100644
index 0000000..bb9be88
--- /dev/null
+++ b/runtime/interpreter/interpreter_goto_table_impl.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_GOTO_TABLE_IMPL_H_
+#define ART_RUNTIME_INTERPRETER_INTERPRETER_GOTO_TABLE_IMPL_H_
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "dex_file.h"
+#include "jvalue.h"
+
+namespace art {
+
+class ShadowFrame;
+class Thread;
+
+namespace interpreter {
+
+template<bool do_access_check, bool transaction_active>
+JValue ExecuteGotoImpl(Thread* self,
+                       const DexFile::CodeItem* code_item,
+                       ShadowFrame& shadow_frame,
+                       JValue result_register) SHARED_REQUIRES(Locks::mutator_lock_);
+
+}  // namespace interpreter
+}  // namespace art
+
+#endif  // ART_RUNTIME_INTERPRETER_INTERPRETER_GOTO_TABLE_IMPL_H_
diff --git a/runtime/interpreter/interpreter_mterp_impl.h b/runtime/interpreter/interpreter_mterp_impl.h
new file mode 100644
index 0000000..322df4e
--- /dev/null
+++ b/runtime/interpreter/interpreter_mterp_impl.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_MTERP_IMPL_H_
+#define ART_RUNTIME_INTERPRETER_INTERPRETER_MTERP_IMPL_H_
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "dex_file.h"
+#include "jvalue.h"
+
+namespace art {
+
+class ShadowFrame;
+class Thread;
+
+namespace interpreter {
+
+// Mterp does not support transactions or access check, thus no templated versions.
+extern "C" bool ExecuteMterpImpl(Thread* self,
+                                 const DexFile::CodeItem* code_item,
+                                 ShadowFrame* shadow_frame,
+                                 JValue* result_register) SHARED_REQUIRES(Locks::mutator_lock_);
+
+}  // namespace interpreter
+}  // namespace art
+
+#endif  // ART_RUNTIME_INTERPRETER_INTERPRETER_MTERP_IMPL_H_
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index bf95a0e..fd37737 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -14,13 +14,14 @@
  * limitations under the License.
  */
 
-#include "base/stl_util.h"  // MakeUnique
+#include "interpreter_switch_impl.h"
+
+#include "base/enums.h"
 #include "experimental_flags.h"
 #include "interpreter_common.h"
+#include "jit/jit.h"
 #include "safe_math.h"
 
-#include <memory>  // std::unique_ptr
-
 namespace art {
 namespace interpreter {
 
@@ -33,8 +34,11 @@
                                                                   instrumentation);             \
     if (found_dex_pc == DexFile::kDexNoIndex) {                                                 \
       /* Structured locking is to be enforced for abnormal termination, too. */                 \
-      shadow_frame.GetLockCountData().                                                          \
-          CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self);                        \
+      DoMonitorCheckOnExit<do_assignability_check>(self, &shadow_frame);                        \
+      if (interpret_one_instruction) {                                                          \
+        /* Signal mterp to return to caller */                                                  \
+        shadow_frame.SetDexPC(DexFile::kDexNoIndex);                                            \
+      }                                                                                         \
       return JValue(); /* Handled in caller. */                                                 \
     } else {                                                                                    \
       int32_t displacement = static_cast<int32_t>(found_dex_pc) - static_cast<int32_t>(dex_pc); \
@@ -52,8 +56,7 @@
   } while (false)
 
 #define HANDLE_MONITOR_CHECKS()                                                                   \
-  if (!shadow_frame.GetLockCountData().                                                           \
-          CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self)) {                        \
+  if (!DoMonitorCheckOnExit<do_assignability_check>(self, &shadow_frame)) {                       \
     HANDLE_PENDING_EXCEPTION();                                                                   \
   }
 
@@ -66,14 +69,32 @@
     }                                                                                           \
   } while (false)
 
-static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
-  DCHECK(inst->IsExperimental());
-  return Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas);
-}
+#define BRANCH_INSTRUMENTATION(offset)                                                         \
+  do {                                                                                         \
+    if (UNLIKELY(instrumentation->HasBranchListeners())) {                                     \
+      instrumentation->Branch(self, method, dex_pc, offset);                                   \
+    }                                                                                          \
+    JValue result;                                                                             \
+    if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {          \
+      if (interpret_one_instruction) {                                                         \
+        /* OSR has completed execution of the method.  Signal mterp to return to caller */     \
+        shadow_frame.SetDexPC(DexFile::kDexNoIndex);                                           \
+      }                                                                                        \
+      return result;                                                                           \
+    }                                                                                          \
+  } while (false)
+
+#define HOTNESS_UPDATE()                                                                       \
+  do {                                                                                         \
+    if (jit != nullptr) {                                                                      \
+      jit->AddSamples(self, method, 1, /*with_backedges*/ true);                               \
+    }                                                                                          \
+  } while (false)
 
 template<bool do_access_check, bool transaction_active>
 JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
-                         ShadowFrame& shadow_frame, JValue result_register) {
+                         ShadowFrame& shadow_frame, JValue result_register,
+                         bool interpret_one_instruction) {
   constexpr bool do_assignability_check = do_access_check;
   if (UNLIKELY(!shadow_frame.HasReferenceArray())) {
     LOG(FATAL) << "Invalid shadow frame for interpreter use";
@@ -83,24 +104,13 @@
 
   uint32_t dex_pc = shadow_frame.GetDexPC();
   const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
-  if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing.
-    if (kIsDebugBuild) {
-        self->AssertNoPendingException();
-    }
-    if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
-      instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                        shadow_frame.GetMethod(), 0);
-    }
-  }
   const uint16_t* const insns = code_item->insns_;
   const Instruction* inst = Instruction::At(insns + dex_pc);
   uint16_t inst_data;
+  ArtMethod* method = shadow_frame.GetMethod();
+  jit::Jit* jit = Runtime::Current()->GetJit();
 
-  // TODO: collapse capture-variable+create-lambda into one opcode, then we won't need
-  // to keep this live for the scope of the entire function call.
-  std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder;
-  size_t lambda_captured_variable_index = 0;
-  while (true) {
+  do {
     dex_pc = inst->GetDexPc(insns);
     shadow_frame.SetDexPC(dex_pc);
     TraceExecution(shadow_frame, inst, dex_pc);
@@ -198,6 +208,10 @@
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
         }
+        if (interpret_one_instruction) {
+          /* Signal mterp to return to caller */
+          shadow_frame.SetDexPC(DexFile::kDexNoIndex);
+        }
         return result;
       }
       case Instruction::RETURN_VOID: {
@@ -211,6 +225,10 @@
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
         }
+        if (interpret_one_instruction) {
+          /* Signal mterp to return to caller */
+          shadow_frame.SetDexPC(DexFile::kDexNoIndex);
+        }
         return result;
       }
       case Instruction::RETURN: {
@@ -225,6 +243,10 @@
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
         }
+        if (interpret_one_instruction) {
+          /* Signal mterp to return to caller */
+          shadow_frame.SetDexPC(DexFile::kDexNoIndex);
+        }
         return result;
       }
       case Instruction::RETURN_WIDE: {
@@ -238,6 +260,10 @@
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
         }
+        if (interpret_one_instruction) {
+          /* Signal mterp to return to caller */
+          shadow_frame.SetDexPC(DexFile::kDexNoIndex);
+        }
         return result;
       }
       case Instruction::RETURN_OBJECT: {
@@ -248,7 +274,7 @@
         const size_t ref_idx = inst->VRegA_11x(inst_data);
         Object* obj_result = shadow_frame.GetVRegReference(ref_idx);
         if (do_assignability_check && obj_result != nullptr) {
-          size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+          PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
           Class* return_type = shadow_frame.GetMethod()->GetReturnType(true /* resolve */,
                                                                        pointer_size);
           // Re-load since it might have moved.
@@ -273,6 +299,10 @@
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
         }
+        if (interpret_one_instruction) {
+          /* Signal mterp to return to caller */
+          shadow_frame.SetDexPC(DexFile::kDexNoIndex);
+        }
         return result;
       }
       case Instruction::CONST_4: {
@@ -449,8 +479,7 @@
         if (LIKELY(c != nullptr)) {
           if (UNLIKELY(c->IsStringClass())) {
             gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
-            mirror::SetStringCountVisitor visitor(0);
-            obj = String::Alloc<true>(self, 0, allocator_type, visitor);
+            obj = mirror::String::AllocEmptyString<true>(self, allocator_type);
           } else {
             obj = AllocObjectFromCode<do_access_check, true>(
               inst->VRegB_21c(), shadow_frame.GetMethod(), self,
@@ -541,7 +570,9 @@
       case Instruction::GOTO: {
         PREAMBLE();
         int8_t offset = inst->VRegA_10t(inst_data);
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -550,7 +581,9 @@
       case Instruction::GOTO_16: {
         PREAMBLE();
         int16_t offset = inst->VRegA_20t();
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -559,7 +592,9 @@
       case Instruction::GOTO_32: {
         PREAMBLE();
         int32_t offset = inst->VRegA_30t();
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -568,7 +603,9 @@
       case Instruction::PACKED_SWITCH: {
         PREAMBLE();
         int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -577,7 +614,9 @@
       case Instruction::SPARSE_SWITCH: {
         PREAMBLE();
         int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -680,11 +719,14 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) ==
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
         } else {
+          BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
         }
         break;
@@ -694,11 +736,14 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) !=
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
         } else {
+          BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
         }
         break;
@@ -708,11 +753,14 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
         } else {
+          BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
         }
         break;
@@ -722,11 +770,14 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >=
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
         } else {
+          BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
         }
         break;
@@ -736,11 +787,14 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
         } else {
+          BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
         }
         break;
@@ -750,11 +804,14 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <=
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
         } else {
+          BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
         }
         break;
@@ -763,11 +820,14 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
         } else {
+          BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
         }
         break;
@@ -776,11 +836,14 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
         } else {
+          BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
         }
         break;
@@ -789,11 +852,14 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
         } else {
+          BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
         }
         break;
@@ -802,11 +868,14 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
         } else {
+          BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
         }
         break;
@@ -815,11 +884,14 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
         } else {
+          BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
         }
         break;
@@ -828,11 +900,14 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
         } else {
+          BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
         }
         break;
@@ -2247,123 +2322,38 @@
                              (inst->VRegC_22b() & 0x1f));
         inst = inst->Next_2xx();
         break;
-      case Instruction::INVOKE_LAMBDA: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        PREAMBLE();
-        bool success = DoInvokeLambda<do_access_check>(self, shadow_frame, inst, inst_data,
-                                                       &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::CAPTURE_VARIABLE: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        if (lambda_closure_builder == nullptr) {
-          lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
-        }
-
-        PREAMBLE();
-        bool success = DoCaptureVariable<do_access_check>(self,
-                                                          inst,
-                                                          /*inout*/shadow_frame,
-                                                          /*inout*/lambda_closure_builder.get());
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::CREATE_LAMBDA: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        PREAMBLE();
-
-        if (lambda_closure_builder == nullptr) {
-          // DoCreateLambda always needs a ClosureBuilder, even if it has 0 captured variables.
-          lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
-        }
-
-        // TODO: these allocations should not leak, and the lambda method should not be local.
-        lambda::Closure* lambda_closure =
-            reinterpret_cast<lambda::Closure*>(alloca(lambda_closure_builder->GetSize()));
-        bool success = DoCreateLambda<do_access_check>(self,
-                                                       inst,
-                                                       /*inout*/shadow_frame,
-                                                       /*inout*/lambda_closure_builder.get(),
-                                                       /*inout*/lambda_closure);
-        lambda_closure_builder.reset(nullptr);  // reset state of variables captured
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::LIBERATE_VARIABLE: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        PREAMBLE();
-        bool success = DoLiberateVariable<do_access_check>(self,
-                                                           inst,
-                                                           lambda_captured_variable_index,
-                                                           /*inout*/shadow_frame);
-        // Temporarily only allow sequences of 'liberate-variable, liberate-variable, ...'
-        lambda_captured_variable_index++;
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::UNUSED_F4: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        CHECK(false);  // TODO(iam): Implement opcodes for lambdas
-        break;
-      }
-      case Instruction::BOX_LAMBDA: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        PREAMBLE();
-        bool success = DoBoxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::UNBOX_LAMBDA: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        PREAMBLE();
-        bool success = DoUnboxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
       case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
-      case Instruction::UNUSED_FA ... Instruction::UNUSED_FF:
+      case Instruction::UNUSED_F3 ... Instruction::UNUSED_F9:
+      case Instruction::UNUSED_FC ... Instruction::UNUSED_FF:
       case Instruction::UNUSED_79:
       case Instruction::UNUSED_7A:
+      case Instruction::INVOKE_POLYMORPHIC:
+      case Instruction::INVOKE_POLYMORPHIC_RANGE:
         UnexpectedOpcode(inst, shadow_frame);
     }
-  }
+  } while (!interpret_one_instruction);
+  // Record where we stopped.
+  shadow_frame.SetDexPC(inst->GetDexPc(insns));
+  return result_register;
 }  // NOLINT(readability/fn_size)
 
 // Explicit definitions of ExecuteSwitchImpl.
-template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR
+template HOT_ATTR
 JValue ExecuteSwitchImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
-                                      ShadowFrame& shadow_frame, JValue result_register);
-template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR
+                                      ShadowFrame& shadow_frame, JValue result_register,
+                                      bool interpret_one_instruction);
+template HOT_ATTR
 JValue ExecuteSwitchImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
-                                       ShadowFrame& shadow_frame, JValue result_register);
-template SHARED_REQUIRES(Locks::mutator_lock_)
+                                       ShadowFrame& shadow_frame, JValue result_register,
+                                       bool interpret_one_instruction);
+template
 JValue ExecuteSwitchImpl<true, true>(Thread* self, const DexFile::CodeItem* code_item,
-                                     ShadowFrame& shadow_frame, JValue result_register);
-template SHARED_REQUIRES(Locks::mutator_lock_)
+                                     ShadowFrame& shadow_frame, JValue result_register,
+                                     bool interpret_one_instruction);
+template
 JValue ExecuteSwitchImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
-                                      ShadowFrame& shadow_frame, JValue result_register);
+                                      ShadowFrame& shadow_frame, JValue result_register,
+                                      bool interpret_one_instruction);
 
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/interpreter_switch_impl.h b/runtime/interpreter/interpreter_switch_impl.h
new file mode 100644
index 0000000..90ec908
--- /dev/null
+++ b/runtime/interpreter/interpreter_switch_impl.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_SWITCH_IMPL_H_
+#define ART_RUNTIME_INTERPRETER_INTERPRETER_SWITCH_IMPL_H_
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "dex_file.h"
+#include "jvalue.h"
+
+namespace art {
+
+class ShadowFrame;
+class Thread;
+
+namespace interpreter {
+
+template<bool do_access_check, bool transaction_active>
+JValue ExecuteSwitchImpl(Thread* self,
+                         const DexFile::CodeItem* code_item,
+                         ShadowFrame& shadow_frame,
+                         JValue result_register,
+                         bool interpret_one_instruction) SHARED_REQUIRES(Locks::mutator_lock_);
+
+}  // namespace interpreter
+}  // namespace art
+
+#endif  // ART_RUNTIME_INTERPRETER_INTERPRETER_SWITCH_IMPL_H_
diff --git a/runtime/interpreter/mterp/Makefile_mterp b/runtime/interpreter/mterp/Makefile_mterp
new file mode 100644
index 0000000..f0c30ad
--- /dev/null
+++ b/runtime/interpreter/mterp/Makefile_mterp
@@ -0,0 +1,49 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Makefile for the Art fast interpreter.  This is not currently
+# integrated into the build system.
+#
+
+SHELL := /bin/sh
+
+# Build system has TARGET_ARCH=arm, but we can support the exact architecture
+# if it is worthwhile.
+#
+# To generate sources:
+# for arch in arm arm64 x86 x86_64 mips mips64
+# do
+#   TARGET_ARCH_EXT=$arch make -f Makefile-mterp
+# done
+#
+
+OUTPUT_DIR := out
+
+# Accumulate all possible dependencies for the generated files in a very
+# conservative fashion.  If it's not one of the generated files in "out",
+# assume it's a dependency.
+SOURCE_DEPS := \
+	$(shell find . -path ./$(OUTPUT_DIR) -prune -o -type f -print) \
+
+# Source files generated by the script.  There's always one C and one
+# assembly file, though in practice one or the other could be empty.
+GEN_SOURCES := \
+	$(OUTPUT_DIR)/interp_asm_$(TARGET_ARCH_EXT).S
+
+target: $(GEN_SOURCES)
+
+$(GEN_SOURCES): $(SOURCE_DEPS)
+	@mkdir -p out
+	./gen_mterp.py $(TARGET_ARCH_EXT) $(OUTPUT_DIR)
diff --git a/runtime/interpreter/mterp/README.txt b/runtime/interpreter/mterp/README.txt
new file mode 100644
index 0000000..19e02be
--- /dev/null
+++ b/runtime/interpreter/mterp/README.txt
@@ -0,0 +1,197 @@
+rt "mterp" README
+
+NOTE: Find rebuilding instructions at the bottom of this file.
+
+
+==== Overview ====
+
+Every configuration has a "config-*" file that controls how the sources
+are generated.  The sources are written into the "out" directory, where
+they are picked up by the Android build system.
+
+The best way to become familiar with the interpreter is to look at the
+generated files in the "out" directory.
+
+
+==== Config file format ====
+
+The config files are parsed from top to bottom.  Each line in the file
+may be blank, hold a comment (line starts with '#'), or be a command.
+
+The commands are:
+
+  handler-style <computed-goto|jump-table>
+
+    Specify which style of interpreter to generate.  In computed-goto,
+    each handler is allocated a fixed region, allowing transitions to
+    be done via table-start-address + (opcode * handler-size). With
+    jump-table style, handlers may be of any length, and the generated
+    table is an array of pointers to the handlers.  This command is required,
+    and must be the first command in the config file.
+
+  handler-size <bytes>
+
+    Specify the size of the fixed region, in bytes.  On most platforms
+    this will need to be a power of 2.  For jump-table implementations,
+    this command is ignored.
+
+  import <filename>
+
+    The specified file is included immediately, in its entirety.  No
+    substitutions are performed.  ".cpp" and ".h" files are copied to the
+    C output, ".S" files are copied to the asm output.
+
+  asm-alt-stub <filename>
+
+    When present, this command will cause the generation of an alternate
+    set of entry points (for computed-goto interpreters) or an alternate
+    jump table (for jump-table interpreters).
+
+  fallback-stub <filename>
+
+    Specifies a file to be used for the special FALLBACK tag on the "op"
+    command below.  Intended to be used to transfer control to an alternate
+    interpreter to single-step a not-yet-implemented opcode.  Note: should
+    note be used on RETURN-class instructions.
+
+  op-start <directory>
+
+    Indicates the start of the opcode list.  Must precede any "op"
+    commands.  The specified directory is the default location to pull
+    instruction files from.
+
+  op <opcode> <directory>|FALLBACK
+
+    Can only appear after "op-start" and before "op-end".  Overrides the
+    default source file location of the specified opcode.  The opcode
+    definition will come from the specified file, e.g. "op OP_NOP arm"
+    will load from "arm/OP_NOP.S".  A substitution dictionary will be
+    applied (see below).  If the special "FALLBACK" token is used instead of
+    a directory name, the source file specified in fallback-stub will instead
+    be used for this opcode.
+
+  alt <opcode> <directory>
+
+    Can only appear after "op-start" and before "op-end".  Similar to the
+    "op" command above, but denotes a source file to override the entry
+    in the alternate handler table.  The opcode definition will come from
+    the specified file, e.g. "alt OP_NOP arm" will load from
+    "arm/ALT_OP_NOP.S".  A substitution dictionary will be applied
+    (see below).
+
+  op-end
+
+    Indicates the end of the opcode list.  All kNumPackedOpcodes
+    opcodes are emitted when this is seen, followed by any code that
+    didn't fit inside the fixed-size instruction handler space.
+
+The order of "op" and "alt" directives are not significant; the generation
+tool will extract ordering info from the VM sources.
+
+Typically the form in which most opcodes currently exist is used in
+the "op-start" directive.
+
+==== Instruction file format ====
+
+The assembly instruction files are simply fragments of assembly sources.
+The starting label will be provided by the generation tool, as will
+declarations for the segment type and alignment.  The expected target
+assembler is GNU "as", but others will work (may require fiddling with
+some of the pseudo-ops emitted by the generation tool).
+
+A substitution dictionary is applied to all opcode fragments as they are
+appended to the output.  Substitutions can look like "$value" or "${value}".
+
+The dictionary always includes:
+
+  $opcode - opcode name, e.g. "OP_NOP"
+  $opnum - opcode number, e.g. 0 for OP_NOP
+  $handler_size_bytes - max size of an instruction handler, in bytes
+  $handler_size_bits - max size of an instruction handler, log 2
+
+Both C and assembly sources will be passed through the C pre-processor,
+so you can take advantage of C-style comments and preprocessor directives
+like "#define".
+
+Some generator operations are available.
+
+  %include "filename" [subst-dict]
+
+    Includes the file, which should look like "arm/OP_NOP.S".  You can
+    specify values for the substitution dictionary, using standard Python
+    syntax.  For example, this:
+      %include "arm/unop.S" {"result":"r1"}
+    would insert "arm/unop.S" at the current file position, replacing
+    occurrences of "$result" with "r1".
+
+  %default <subst-dict>
+
+    Specify default substitution dictionary values, using standard Python
+    syntax.  Useful if you want to have a "base" version and variants.
+
+  %break
+
+    Identifies the split between the main portion of the instruction
+    handler (which must fit in "handler-size" bytes) and the "sister"
+    code, which is appended to the end of the instruction handler block.
+    In jump table implementations, %break is ignored.
+
+The generation tool does *not* print a warning if your instructions
+exceed "handler-size", but the VM will abort on startup if it detects an
+oversized handler.  On architectures with fixed-width instructions this
+is easy to work with, on others this you will need to count bytes.
+
+
+==== Using C constants from assembly sources ====
+
+The file "art/runtime/asm_support.h" has some definitions for constant
+values, structure sizes, and struct member offsets.  The format is fairly
+restricted, as simple macros are used to massage it for use with both C
+(where it is verified) and assembly (where the definitions are used).
+
+If a constant in the file becomes out of sync, the VM will log an error
+message and abort during startup.
+
+
+==== Development tips ====
+
+If you need to debug the initial piece of an opcode handler, and your
+debug code expands it beyond the handler size limit, you can insert a
+generic header at the top:
+
+    b       ${opcode}_start
+%break
+${opcode}_start:
+
+If you already have a %break, it's okay to leave it in place -- the second
+%break is ignored.
+
+
+==== Rebuilding ====
+
+If you change any of the source file fragments, you need to rebuild the
+combined source files in the "out" directory.  Make sure the files in
+"out" are editable, then:
+
+    $ cd mterp
+    $ ./rebuild.sh
+
+The ultimate goal is to have the build system generate the necessary
+output files without requiring this separate step, but we're not yet
+ready to require Python in the build.
+
+==== Interpreter Control ====
+
+The mterp fast interpreter achieves much of its performance advantage
+over the C++ interpreter through its efficient mechanism of
+transitioning from one Dalvik bytecode to the next.  Mterp for ARM targets
+uses a computed-goto mechanism, in which the handler entrypoints are
+located at the base of the handler table + (opcode * 128).
+
+In normal operation, the dedicated register rIBASE
+(r8 for ARM, edx for x86) holds a mainHandlerTable.  If we need to switch
+to a mode that requires inter-instruction checking, rIBASE is changed
+to altHandlerTable.  Note that this change is not immediate.  What is actually
+changed is the value of curHandlerTable - which is part of the interpBreak
+structure.  Rather than explicitly check for changes, each thread will
+blindly refresh rIBASE at backward branches, exception throws and returns.
diff --git a/runtime/interpreter/mterp/arm/alt_stub.S b/runtime/interpreter/mterp/arm/alt_stub.S
new file mode 100644
index 0000000..9db5bf7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/alt_stub.S
@@ -0,0 +1,12 @@
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (${opnum} * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
diff --git a/runtime/interpreter/mterp/arm/bincmp.S b/runtime/interpreter/mterp/arm/bincmp.S
new file mode 100644
index 0000000..8fad42f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/bincmp.S
@@ -0,0 +1,19 @@
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r0, r0                     @ r0<- vA
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    cmp     r0, r3                      @ compare (vA, vB)
+    b${condition} MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/binop.S b/runtime/interpreter/mterp/arm/binop.S
new file mode 100644
index 0000000..eeb72ef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binop.S
@@ -0,0 +1,35 @@
+%default {"preinstr":"", "result":"r0", "chkzero":"0"}
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if $chkzero
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ $result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG $result, r9                @ vAA<- $result
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm/binop2addr.S b/runtime/interpreter/mterp/arm/binop2addr.S
new file mode 100644
index 0000000..d09a43a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binop2addr.S
@@ -0,0 +1,32 @@
+%default {"preinstr":"", "result":"r0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if $chkzero
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ $result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG $result, r9                @ vAA<- $result
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm/binopLit16.S b/runtime/interpreter/mterp/arm/binopLit16.S
new file mode 100644
index 0000000..065394e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binopLit16.S
@@ -0,0 +1,29 @@
+%default {"result":"r0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if $chkzero
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    $instr                              @ $result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG $result, r9                @ vAA<- $result
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm/binopLit8.S b/runtime/interpreter/mterp/arm/binopLit8.S
new file mode 100644
index 0000000..7c9c631
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binopLit8.S
@@ -0,0 +1,35 @@
+%default {"extract":"asr     r1, r3, #8", "result":"r0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    $extract                            @ optional; typically r1<- ssssssCC (sign extended)
+    .if $chkzero
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    $instr                              @ $result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG $result, r9                @ vAA<- $result
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
diff --git a/runtime/interpreter/mterp/arm/binopWide.S b/runtime/interpreter/mterp/arm/binopWide.S
new file mode 100644
index 0000000..4d88001
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binopWide.S
@@ -0,0 +1,38 @@
+%default {"preinstr":"", "result0":"r0", "result1":"r1", "chkzero":"0"}
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     rINST, rINST, lsr #8        @ rINST<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if $chkzero
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, lr, ip     @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {$result0,$result1}     @ vAA/vAA+1<- $result0/$result1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
diff --git a/runtime/interpreter/mterp/arm/binopWide2addr.S b/runtime/interpreter/mterp/arm/binopWide2addr.S
new file mode 100644
index 0000000..bb16335
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binopWide2addr.S
@@ -0,0 +1,34 @@
+%default {"preinstr":"", "result0":"r0", "result1":"r1", "chkzero":"0"}
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if $chkzero
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {$result0,$result1}     @ vAA/vAA+1<- $result0/$result1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
diff --git a/runtime/interpreter/mterp/arm/entry.S b/runtime/interpreter/mterp/arm/entry.S
new file mode 100644
index 0000000..a6b131d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/entry.S
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Interpreter entry point.
+ */
+
+    .text
+    .align  2
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+
+/*
+ * On entry:
+ *  r0  Thread* self/
+ *  r1  code_item
+ *  r2  ShadowFrame
+ *  r3  JValue* result_register
+ *
+ */
+
+ExecuteMterpImpl:
+    .fnstart
+    .save {r3-r10,fp,lr}
+    stmfd   sp!, {r3-r10,fp,lr}         @ save 10 regs, (r3 just to align 64)
+
+    /* Remember the return register */
+    str     r3, [r2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
+
+    /* Remember the code_item */
+    str     r1, [r2, #SHADOWFRAME_CODE_ITEM_OFFSET]
+
+    /* set up "named" registers */
+    mov     rSELF, r0
+    ldr     r0, [r2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
+    add     rFP, r2, #SHADOWFRAME_VREGS_OFFSET     @ point to vregs.
+    VREG_INDEX_TO_ADDR rREFS, r0                   @ point to reference array in shadow frame
+    ldr     r0, [r2, #SHADOWFRAME_DEX_PC_OFFSET]   @ Get starting dex_pc.
+    add     rPC, r1, #CODEITEM_INSNS_OFFSET        @ Point to base of insns[]
+    add     rPC, rPC, r0, lsl #1                   @ Create direct pointer to 1st dex opcode
+    EXPORT_PC
+
+    /* Starting ibase */
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+
+    /* Set up for backwards branches & osr profiling */
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    bl      MterpSetUpHotnessCountdown
+    mov     rPROFILE, r0                @ Starting hotness countdown to rPROFILE
+
+    /* start executing the instruction at rPC */
+    FETCH_INST                          @ load rINST from rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* NOTE: no fallthrough */
diff --git a/runtime/interpreter/mterp/arm/fallback.S b/runtime/interpreter/mterp/arm/fallback.S
new file mode 100644
index 0000000..44e7e12
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/fallback.S
@@ -0,0 +1,3 @@
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
diff --git a/runtime/interpreter/mterp/arm/fbinop.S b/runtime/interpreter/mterp/arm/fbinop.S
new file mode 100644
index 0000000..594ee03
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/fbinop.S
@@ -0,0 +1,23 @@
+    /*
+     * Generic 32-bit floating-point operation.  Provide an "instr" line that
+     * specifies an instruction that performs "s2 = s0 op s1".  Because we
+     * use the "softfp" ABI, this must be an instruction, not a function call.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    flds    s0, [r2]                    @ s0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    $instr                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/fbinop2addr.S b/runtime/interpreter/mterp/arm/fbinop2addr.S
new file mode 100644
index 0000000..53c87a0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/fbinop2addr.S
@@ -0,0 +1,19 @@
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    flds    s1, [r3]                    @ s1<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    flds    s0, [r9]                    @ s0<- vA
+    $instr                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/fbinopWide.S b/runtime/interpreter/mterp/arm/fbinopWide.S
new file mode 100644
index 0000000..ca13bfb
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/fbinopWide.S
@@ -0,0 +1,23 @@
+    /*
+     * Generic 64-bit double-precision floating point binary operation.
+     * Provide an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * for: add-double, sub-double, mul-double, div-double
+     */
+    /* doubleop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    $instr                              @ s2<- op
+    CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/fbinopWide2addr.S b/runtime/interpreter/mterp/arm/fbinopWide2addr.S
new file mode 100644
index 0000000..9766e2c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/fbinopWide2addr.S
@@ -0,0 +1,21 @@
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *      div-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
+    fldd    d1, [r3]                    @ d1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fldd    d0, [r9]                    @ d0<- vA
+    $instr                              @ d2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S
new file mode 100644
index 0000000..62e573a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/footer.S
@@ -0,0 +1,299 @@
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogDivideByZeroException
+#endif
+    b MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogArrayIndexException
+#endif
+    b MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNegativeArraySizeException
+#endif
+    b MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNoSuchMethodException
+#endif
+    b MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNullObjectException
+#endif
+    b MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogExceptionThrownException
+#endif
+    b MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    ldr  r2, [rSELF, #THREAD_FLAGS_OFFSET]
+    bl MterpLogSuspendFallback
+#endif
+    b MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    ldr     r0, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    cmp     r0, #0                                  @ Exception pending?
+    beq     MterpFallback                           @ If not, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    bl      MterpHandleException                    @ (self, shadow_frame)
+    cmp     r0, #0
+    beq     MterpExceptionReturn                    @ no local catch, back to caller.
+    ldr     r0, [rFP, #OFF_FP_CODE_ITEM]
+    ldr     r1, [rFP, #OFF_FP_DEX_PC]
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+    add     rPC, r0, #CODEITEM_INSNS_OFFSET
+    add     rPC, rPC, r1, lsl #1                    @ generate new dex_pc_ptr
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
+    /* resume execution at catch block */
+    EXPORT_PC
+    FETCH_INST
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+    /* NOTE: no fallthrough */
+
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
+ */
+MterpCommonTakenBranchNoFlags:
+    cmp     rINST, #0
+MterpCommonTakenBranch:
+    bgt     .L_forward_branch           @ don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    cmp     rPROFILE, #JIT_CHECK_OSR
+    beq     .L_osr_check
+    subgts  rPROFILE, #1
+    beq     .L_add_batch                @ counted down to zero - report
+.L_resume_backward_branch:
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    REFRESH_IBASE
+    add     r2, rINST, rINST            @ r2<- byte offset
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    bne     .L_suspend_request_pending
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L_suspend_request_pending:
+    EXPORT_PC
+    mov     r0, rSELF
+    bl      MterpSuspendCheck           @ (self)
+    cmp     r0, #0
+    bne     MterpFallback
+    REFRESH_IBASE                       @ might have changed during suspend
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L_no_count_backwards:
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    bne     .L_resume_backward_branch
+.L_osr_check:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    cmp     rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry?
+    beq     .L_check_osr_forward
+.L_resume_forward_branch:
+    add     r2, rINST, rINST            @ r2<- byte offset
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L_check_osr_forward:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    strh    rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    mov     r2, rSELF
+    bl      MterpAddHotnessBatch        @ (method, shadow_frame, self)
+    mov     rPROFILE, r0                @ restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, #2
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov r0, rSELF
+    add r1, rFP, #OFF_FP_SHADOWFRAME
+    mov r2, rINST
+    bl MterpLogOSR
+#endif
+    mov r0, #1                          @ Signal normal return
+    b MterpDone
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogFallback
+#endif
+MterpCommonFallback:
+    mov     r0, #0                                  @ signal retry with reference interpreter.
+    b       MterpDone
+
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and LR.  Here we restore SP, restore the registers, and then restore
+ * LR to PC.
+ *
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    mov     r0, #1                                  @ signal return to caller.
+    b MterpDone
+MterpReturn:
+    ldr     r2, [rFP, #OFF_FP_RESULT_REGISTER]
+    str     r0, [r2]
+    str     r1, [r2, #4]
+    mov     r0, #1                                  @ signal return to caller.
+MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmp     rPROFILE, #0
+    bgt     MterpProfileActive                      @ if > 0, we may have some counts to report.
+    ldmfd   sp!, {r3-r10,fp,pc}                     @ restore 10 regs and return
+
+MterpProfileActive:
+    mov     rINST, r0                               @ stash return value
+    /* Report cached hotness counts */
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rSELF
+    strh    rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    bl      MterpAddHotnessBatch                    @ (method, shadow_frame, self)
+    mov     r0, rINST                               @ restore return value
+    ldmfd   sp!, {r3-r10,fp,pc}                     @ restore 10 regs and return
+
+    .fnend
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
+
diff --git a/runtime/interpreter/mterp/arm/funop.S b/runtime/interpreter/mterp/arm/funop.S
new file mode 100644
index 0000000..1b8bb8b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/funop.S
@@ -0,0 +1,17 @@
+    /*
+     * Generic 32-bit unary floating-point operation.  Provide an "instr"
+     * line that specifies an instruction that performs "s1 = op s0".
+     *
+     * for: int-to-float, float-to-int
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    $instr                              @ s1<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s1, [r9]                    @ vA<- s1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/funopNarrower.S b/runtime/interpreter/mterp/arm/funopNarrower.S
new file mode 100644
index 0000000..b9f758b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/funopNarrower.S
@@ -0,0 +1,17 @@
+    /*
+     * Generic 64bit-to-32bit unary floating point operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: double-to-int, double-to-float
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    fldd    d0, [r3]                    @ d0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    $instr                              @ s0<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s0, [r9]                    @ vA<- s0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/funopWider.S b/runtime/interpreter/mterp/arm/funopWider.S
new file mode 100644
index 0000000..854cdc9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/funopWider.S
@@ -0,0 +1,18 @@
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    $instr                              @ d0<- op
+    CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fstd    d0, [r9]                    @ vA<- d0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/header.S b/runtime/interpreter/mterp/arm/header.S
new file mode 100644
index 0000000..039bcbe
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/header.S
@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+/*
+ARM EABI general notes:
+
+r0-r3 hold first 4 args to a method; they are not preserved across method calls
+r4-r8 are available for general use
+r9 is given special treatment in some situations, but not for us
+r10 (sl) seems to be generally available
+r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
+r12 (ip) is scratch -- not preserved across method calls
+r13 (sp) should be managed carefully in case a signal arrives
+r14 (lr) must be preserved
+r15 (pc) can be tinkered with directly
+
+r0 holds returns of <= 4 bytes
+r0-r1 hold returns of 8 bytes, low word in r0
+
+Callee must save/restore r4+ (except r12) if it modifies them.  If VFP
+is present, registers s16-s31 (a/k/a d8-d15, a/k/a q4-q7) must be preserved,
+s0-s15 (d0-d7, q0-a3) do not need to be.
+
+Stack is "full descending".  Only the arguments that don't fit in the first 4
+registers are placed on the stack.  "sp" points at the first stacked argument
+(i.e. the 5th arg).
+
+VFP: single-precision results in s0, double-precision results in d0.
+
+In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
+64-bit quantities (long long, double) must be 64-bit aligned.
+*/
+
+/*
+Mterp and ARM notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  r4  rPC       interpreted program counter, used for fetching instructions
+  r5  rFP       interpreted frame pointer, used for accessing locals and args
+  r6  rSELF     self (Thread) pointer
+  r7  rINST     first 16-bit code unit of current instruction
+  r8  rIBASE    interpreted instruction base pointer, used for computed goto
+  r10 rPROFILE  branch profiling countdown
+  r11 rREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rPC      r4
+#define rFP      r5
+#define rSELF    r6
+#define rINST    r7
+#define rIBASE   r8
+#define rPROFILE r10
+#define rREFS    r11
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    str  rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+.endm
+
+.macro EXPORT_DEX_PC tmp
+    ldr  \tmp, [rFP, #OFF_FP_CODE_ITEM]
+    str  rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+    add  \tmp, #CODEITEM_INSNS_OFFSET
+    sub  \tmp, rPC, \tmp
+    asr  \tmp, #1
+    str  \tmp, [rFP, #OFF_FP_DEX_PC]
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINST.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    ldrh    rINST, [rPC]
+.endm
+
+/*
+ * Fetch the next instruction from the specified offset.  Advances rPC
+ * to point to the next instruction.  "_count" is in 16-bit code units.
+ *
+ * Because of the limited size of immediate constants on ARM, this is only
+ * suitable for small forward movements (i.e. don't try to implement "goto"
+ * with this).
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss.  (This also implies that it must come after
+ * EXPORT_PC.)
+ */
+.macro FETCH_ADVANCE_INST count
+    ldrh    rINST, [rPC, #((\count)*2)]!
+.endm
+
+/*
+ * The operation performed here is similar to FETCH_ADVANCE_INST, except the
+ * src and dest registers are parameterized (not hard-wired to rPC and rINST).
+ */
+.macro PREFETCH_ADVANCE_INST dreg, sreg, count
+    ldrh    \dreg, [\sreg, #((\count)*2)]!
+.endm
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update rPC.  Used to load
+ * rINST ahead of possible exception point.  Be sure to manually advance rPC
+ * later.
+ */
+.macro PREFETCH_INST count
+    ldrh    rINST, [rPC, #((\count)*2)]
+.endm
+
+/* Advance rPC by some number of code units. */
+.macro ADVANCE count
+  add  rPC, #((\count)*2)
+.endm
+
+/*
+ * Fetch the next instruction from an offset specified by _reg.  Updates
+ * rPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.
+ *
+ * We want to write "ldrh rINST, [rPC, _reg, lsl #1]!", but some of the
+ * bits that hold the shift distance are used for the half/byte/sign flags.
+ * In some cases we can pre-double _reg for free, so we require a byte offset
+ * here.
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    ldrh    rINST, [rPC, \reg]!
+.endm
+
+/*
+ * Fetch a half-word code unit from an offset past the current PC.  The
+ * "_count" value is in 16-bit code units.  Does not advance rPC.
+ *
+ * The "_S" variant works the same but treats the value as signed.
+ */
+.macro FETCH reg, count
+    ldrh    \reg, [rPC, #((\count)*2)]
+.endm
+
+.macro FETCH_S reg, count
+    ldrsh   \reg, [rPC, #((\count)*2)]
+.endm
+
+/*
+ * Fetch one byte from an offset past the current PC.  Pass in the same
+ * "_count" as you would for FETCH, and an additional 0/1 indicating which
+ * byte of the halfword you want (lo/hi).
+ */
+.macro FETCH_B reg, count, byte
+    ldrb     \reg, [rPC, #((\count)*2+(\byte))]
+.endm
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+.macro GET_INST_OPCODE reg
+    and     \reg, rINST, #255
+.endm
+
+/*
+ * Put the prefetched instruction's opcode field into the specified register.
+ */
+.macro GET_PREFETCHED_OPCODE oreg, ireg
+    and     \oreg, \ireg, #255
+.endm
+
+/*
+ * Begin executing the opcode in _reg.  Because this only jumps within the
+ * interpreter, we don't have to worry about pre-ARMv5 THUMB interwork.
+ */
+.macro GOTO_OPCODE reg
+    add     pc, rIBASE, \reg, lsl #${handler_size_bits}
+.endm
+.macro GOTO_OPCODE_BASE base,reg
+    add     pc, \base, \reg, lsl #${handler_size_bits}
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+.macro GET_VREG reg, vreg
+    ldr     \reg, [rFP, \vreg, lsl #2]
+.endm
+.macro SET_VREG reg, vreg
+    str     \reg, [rFP, \vreg, lsl #2]
+    mov     \reg, #0
+    str     \reg, [rREFS, \vreg, lsl #2]
+.endm
+.macro SET_VREG_OBJECT reg, vreg, tmpreg
+    str     \reg, [rFP, \vreg, lsl #2]
+    str     \reg, [rREFS, \vreg, lsl #2]
+.endm
+.macro SET_VREG_SHADOW reg, vreg
+    str     \reg, [rREFS, \vreg, lsl #2]
+.endm
+
+/*
+ * Clear the corresponding shadow regs for a vreg pair
+ */
+.macro CLEAR_SHADOW_PAIR vreg, tmp1, tmp2
+    mov     \tmp1, #0
+    add     \tmp2, \vreg, #1
+    SET_VREG_SHADOW \tmp1, \vreg
+    SET_VREG_SHADOW \tmp1, \tmp2
+.endm
+
+/*
+ * Convert a virtual register index into an address.
+ */
+.macro VREG_INDEX_TO_ADDR reg, vreg
+    add     \reg, rFP, \vreg, lsl #2   /* WARNING/FIXME: handle shadow frame vreg zero if store */
+.endm
+
+/*
+ * Refresh handler table.
+ */
+.macro REFRESH_IBASE
+  ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+.endm
diff --git a/runtime/interpreter/mterp/arm/invoke.S b/runtime/interpreter/mterp/arm/invoke.S
new file mode 100644
index 0000000..e47dd1b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/invoke.S
@@ -0,0 +1,22 @@
+%default { "helper":"UndefinedInvokeHandler" }
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern $helper
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      $helper
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
diff --git a/runtime/interpreter/mterp/arm/op_add_double.S b/runtime/interpreter/mterp/arm/op_add_double.S
new file mode 100644
index 0000000..9332bf2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_double.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide.S" {"instr":"faddd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_double_2addr.S b/runtime/interpreter/mterp/arm/op_add_double_2addr.S
new file mode 100644
index 0000000..3242c53
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_double_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide2addr.S" {"instr":"faddd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_float.S b/runtime/interpreter/mterp/arm/op_add_float.S
new file mode 100644
index 0000000..afb7967
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_float.S
@@ -0,0 +1 @@
+%include "arm/fbinop.S" {"instr":"fadds   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_float_2addr.S b/runtime/interpreter/mterp/arm/op_add_float_2addr.S
new file mode 100644
index 0000000..0067b6a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_float_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinop2addr.S" {"instr":"fadds   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_int.S b/runtime/interpreter/mterp/arm/op_add_int.S
new file mode 100644
index 0000000..1dcae7e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"instr":"add     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_int_2addr.S b/runtime/interpreter/mterp/arm/op_add_int_2addr.S
new file mode 100644
index 0000000..9ea98f1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"instr":"add     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_int_lit16.S b/runtime/interpreter/mterp/arm/op_add_int_lit16.S
new file mode 100644
index 0000000..5763ab8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_int_lit16.S
@@ -0,0 +1 @@
+%include "arm/binopLit16.S" {"instr":"add     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_int_lit8.S b/runtime/interpreter/mterp/arm/op_add_int_lit8.S
new file mode 100644
index 0000000..035510d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"extract":"", "instr":"add     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_add_long.S b/runtime/interpreter/mterp/arm/op_add_long.S
new file mode 100644
index 0000000..093223e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"preinstr":"adds    r0, r0, r2", "instr":"adc     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_add_long_2addr.S b/runtime/interpreter/mterp/arm/op_add_long_2addr.S
new file mode 100644
index 0000000..c11e0af
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"preinstr":"adds    r0, r0, r2", "instr":"adc     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_aget.S b/runtime/interpreter/mterp/arm/op_aget.S
new file mode 100644
index 0000000..11f7079
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget.S
@@ -0,0 +1,29 @@
+%default { "load":"ldr", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #$shift     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    $load   r2, [r0, #$data_offset]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r2, r9                     @ vAA<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_aget_boolean.S b/runtime/interpreter/mterp/arm/op_aget_boolean.S
new file mode 100644
index 0000000..8f678dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_aget.S" { "load":"ldrb", "shift":"0", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aget_byte.S b/runtime/interpreter/mterp/arm/op_aget_byte.S
new file mode 100644
index 0000000..a304650
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_byte.S
@@ -0,0 +1 @@
+%include "arm/op_aget.S" { "load":"ldrsb", "shift":"0", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aget_char.S b/runtime/interpreter/mterp/arm/op_aget_char.S
new file mode 100644
index 0000000..4908306
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_char.S
@@ -0,0 +1 @@
+%include "arm/op_aget.S" { "load":"ldrh", "shift":"1", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aget_object.S b/runtime/interpreter/mterp/arm/op_aget_object.S
new file mode 100644
index 0000000..4e0aab5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_object.S
@@ -0,0 +1,21 @@
+    /*
+     * Array object get.  vAA <- vBB[vCC].
+     *
+     * for: aget-object
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    EXPORT_PC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    bl       artAGetObjectFromMterp     @ (array, index)
+    ldr      r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    PREFETCH_INST 2
+    cmp      r1, #0
+    bne      MterpException
+    SET_VREG_OBJECT r0, r9
+    ADVANCE 2
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_aget_short.S b/runtime/interpreter/mterp/arm/op_aget_short.S
new file mode 100644
index 0000000..b71e659
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_short.S
@@ -0,0 +1 @@
+%include "arm/op_aget.S" { "load":"ldrsh", "shift":"1", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aget_wide.S b/runtime/interpreter/mterp/arm/op_aget_wide.S
new file mode 100644
index 0000000..853a7a4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_wide.S
@@ -0,0 +1,25 @@
+    /*
+     * Array get, 64 bits.  vAA <- vBB[vCC].
+     *
+     * Arrays of long/double are 64-bit aligned, so it's okay to use LDRD.
+     */
+    /* aget-wide vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    CLEAR_SHADOW_PAIR r9, r2, r3        @ Zero out the shadow regs
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #3          @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r2-r3}                 @ vAA/vAA+1<- r2/r3
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_and_int.S b/runtime/interpreter/mterp/arm/op_and_int.S
new file mode 100644
index 0000000..7c16d37
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"instr":"and     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_and_int_2addr.S b/runtime/interpreter/mterp/arm/op_and_int_2addr.S
new file mode 100644
index 0000000..0fbab02
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"instr":"and     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_and_int_lit16.S b/runtime/interpreter/mterp/arm/op_and_int_lit16.S
new file mode 100644
index 0000000..541e9b7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_int_lit16.S
@@ -0,0 +1 @@
+%include "arm/binopLit16.S" {"instr":"and     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_and_int_lit8.S b/runtime/interpreter/mterp/arm/op_and_int_lit8.S
new file mode 100644
index 0000000..af746b5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"extract":"", "instr":"and     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_and_long.S b/runtime/interpreter/mterp/arm/op_and_long.S
new file mode 100644
index 0000000..4ad5158
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"preinstr":"and     r0, r0, r2", "instr":"and     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_and_long_2addr.S b/runtime/interpreter/mterp/arm/op_and_long_2addr.S
new file mode 100644
index 0000000..e23ea44
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"preinstr":"and     r0, r0, r2", "instr":"and     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_aput.S b/runtime/interpreter/mterp/arm/op_aput.S
new file mode 100644
index 0000000..a511fa5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput.S
@@ -0,0 +1,29 @@
+%default { "store":"str", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #$shift     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    $store  r2, [r0, #$data_offset]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_aput_boolean.S b/runtime/interpreter/mterp/arm/op_aput_boolean.S
new file mode 100644
index 0000000..e86663f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_aput.S" { "store":"strb", "shift":"0", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aput_byte.S b/runtime/interpreter/mterp/arm/op_aput_byte.S
new file mode 100644
index 0000000..83694b7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_byte.S
@@ -0,0 +1 @@
+%include "arm/op_aput.S" { "store":"strb", "shift":"0", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aput_char.S b/runtime/interpreter/mterp/arm/op_aput_char.S
new file mode 100644
index 0000000..3551cac
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_char.S
@@ -0,0 +1 @@
+%include "arm/op_aput.S" { "store":"strh", "shift":"1", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aput_object.S b/runtime/interpreter/mterp/arm/op_aput_object.S
new file mode 100644
index 0000000..c539916
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_object.S
@@ -0,0 +1,14 @@
+    /*
+     * Store an object into an array.  vBB[vCC] <- vAA.
+     */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    bl      MterpAputObject
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_aput_short.S b/runtime/interpreter/mterp/arm/op_aput_short.S
new file mode 100644
index 0000000..0a0590e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_short.S
@@ -0,0 +1 @@
+%include "arm/op_aput.S" { "store":"strh", "shift":"1", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aput_wide.S b/runtime/interpreter/mterp/arm/op_aput_wide.S
new file mode 100644
index 0000000..0057507
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_wide.S
@@ -0,0 +1,24 @@
+    /*
+     * Array put, 64 bits.  vBB[vCC] <- vAA.
+     *
+     * Arrays of long/double are 64-bit aligned, so it's okay to use STRD.
+     */
+    /* aput-wide vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #3          @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldmia   r9, {r2-r3}                 @ r2/r3<- vAA/vAA+1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_array_length.S b/runtime/interpreter/mterp/arm/op_array_length.S
new file mode 100644
index 0000000..43b1682
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_array_length.S
@@ -0,0 +1,13 @@
+    /*
+     * Return the length of an array.
+     */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    GET_VREG r0, r1                     @ r0<- vB (object ref)
+    cmp     r0, #0                      @ is object null?
+    beq     common_errNullObject        @ yup, fail
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- array length
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r3, r2                     @ vB<- length
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_check_cast.S b/runtime/interpreter/mterp/arm/op_check_cast.S
new file mode 100644
index 0000000..24eba45
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_check_cast.S
@@ -0,0 +1,17 @@
+    /*
+     * Check to see if a cast from one class to another is allowed.
+     */
+    /* check-cast vAA, class@BBBB */
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- BBBB
+    mov      r1, rINST, lsr #8          @ r1<- AA
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &object
+    ldr      r2, [rFP, #OFF_FP_METHOD]  @ r2<- method
+    mov      r3, rSELF                  @ r3<- self
+    bl       MterpCheckCast             @ (index, &obj, method, self)
+    PREFETCH_INST 2
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_cmp_long.S b/runtime/interpreter/mterp/arm/op_cmp_long.S
new file mode 100644
index 0000000..6626ff0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_cmp_long.S
@@ -0,0 +1,23 @@
+    /*
+     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+     * register based on the results of the comparison.
+     */
+    /* cmp-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    cmp     r0, r2
+    sbcs    ip, r1, r3                  @ Sets correct CCs for checking LT (but not EQ/NE)
+    mov     ip, #0
+    mvnlt   ip, #0                      @ -1
+    cmpeq   r0, r2                      @ For correct EQ/NE, we may need to repeat the first CMP
+    orrne   ip, #1
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG ip, r9                     @ vAA<- ip
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_cmpg_double.S b/runtime/interpreter/mterp/arm/op_cmpg_double.S
new file mode 100644
index 0000000..602a4b1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_cmpg_double.S
@@ -0,0 +1,34 @@
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    vcmpe.f64 d0, d1                    @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, #1                      @ r0<- 1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_cmpg_float.S b/runtime/interpreter/mterp/arm/op_cmpg_float.S
new file mode 100644
index 0000000..965091f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_cmpg_float.S
@@ -0,0 +1,34 @@
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    flds    s0, [r2]                    @ s0<- vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    vcmpe.f32 s0, s1                    @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, #1                      @ r0<- 1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_cmpl_double.S b/runtime/interpreter/mterp/arm/op_cmpl_double.S
new file mode 100644
index 0000000..8a5e509
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_cmpl_double.S
@@ -0,0 +1,34 @@
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    vcmpe.f64 d0, d1                    @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mvn     r0, #0                      @ r0<- -1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r1<- 1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_cmpl_float.S b/runtime/interpreter/mterp/arm/op_cmpl_float.S
new file mode 100644
index 0000000..9df0c2c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_cmpl_float.S
@@ -0,0 +1,34 @@
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    flds    s0, [r2]                    @ s0<- vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    vcmpe.f32  s0, s1                   @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mvn     r0, #0                      @ r0<- -1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r1<- 1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const.S b/runtime/interpreter/mterp/arm/op_const.S
new file mode 100644
index 0000000..39890a0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const.S
@@ -0,0 +1,9 @@
+    /* const vAA, #+BBBBbbbb */
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r1, 2                         @ r1<- BBBB (high)
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_16.S b/runtime/interpreter/mterp/arm/op_const_16.S
new file mode 100644
index 0000000..a30cf3a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_16.S
@@ -0,0 +1,7 @@
+    /* const/16 vAA, #+BBBB */
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_4.S b/runtime/interpreter/mterp/arm/op_const_4.S
new file mode 100644
index 0000000..c97b0e9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_4.S
@@ -0,0 +1,7 @@
+    /* const/4 vA, #+B */
+    sbfx    r1, rINST, #12, #4          @ r1<- sssssssB (sign-extended)
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    SET_VREG r1, r0                     @ fp[A]<- r1
+    GOTO_OPCODE ip                      @ execute next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_class.S b/runtime/interpreter/mterp/arm/op_const_class.S
new file mode 100644
index 0000000..0b111f4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_class.S
@@ -0,0 +1,13 @@
+    /* const/class vAA, Class@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- BBBB
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstClass             @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2
+    cmp     r0, #0
+    bne     MterpPossibleException
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_high16.S b/runtime/interpreter/mterp/arm/op_const_high16.S
new file mode 100644
index 0000000..536276d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_high16.S
@@ -0,0 +1,8 @@
+    /* const/high16 vAA, #+BBBB0000 */
+    FETCH r0, 1                         @ r0<- 0000BBBB (zero-extended)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r0, r0, lsl #16             @ r0<- BBBB0000
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_string.S b/runtime/interpreter/mterp/arm/op_const_string.S
new file mode 100644
index 0000000..4b8302a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_string.S
@@ -0,0 +1,13 @@
+    /* const/string vAA, String@BBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- BBBB
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstString            @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2                     @ load rINST
+    cmp     r0, #0                      @ fail?
+    bne     MterpPossibleException      @ let reference interpreter deal with it.
+    ADVANCE 2                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_string_jumbo.S b/runtime/interpreter/mterp/arm/op_const_string_jumbo.S
new file mode 100644
index 0000000..1255c07
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_string_jumbo.S
@@ -0,0 +1,15 @@
+    /* const/string vAA, String@BBBBBBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r2, 2                         @ r2<- BBBB (high)
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    orr     r0, r0, r2, lsl #16         @ r1<- BBBBbbbb
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstString            @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 3                     @ advance rPC
+    cmp     r0, #0                      @ fail?
+    bne     MterpPossibleException      @ let reference interpreter deal with it.
+    ADVANCE 3                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_wide.S b/runtime/interpreter/mterp/arm/op_const_wide.S
new file mode 100644
index 0000000..8310a4c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_wide.S
@@ -0,0 +1,14 @@
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r1, 2                         @ r1<- BBBB (low middle)
+    FETCH r2, 3                         @ r2<- hhhh (high middle)
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb (low word)
+    FETCH r3, 4                         @ r3<- HHHH (high)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    orr     r1, r2, r3, lsl #16         @ r1<- HHHHhhhh (high word)
+    CLEAR_SHADOW_PAIR r9, r2, r3        @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 5                @ advance rPC, load rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_wide_16.S b/runtime/interpreter/mterp/arm/op_const_wide_16.S
new file mode 100644
index 0000000..28abb51
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_wide_16.S
@@ -0,0 +1,10 @@
+    /* const-wide/16 vAA, #+BBBB */
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r1, r0, asr #31             @ r1<- ssssssss
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    CLEAR_SHADOW_PAIR r3, r2, lr        @ Zero out the shadow regs
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_wide_32.S b/runtime/interpreter/mterp/arm/op_const_wide_32.S
new file mode 100644
index 0000000..c10bb04
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_wide_32.S
@@ -0,0 +1,12 @@
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    FETCH r0, 1                         @ r0<- 0000bbbb (low)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH_S r2, 2                       @ r2<- ssssBBBB (high)
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    orr     r0, r0, r2, lsl #16         @ r0<- BBBBbbbb
+    CLEAR_SHADOW_PAIR r3, r2, lr        @ Zero out the shadow regs
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
+    mov     r1, r0, asr #31             @ r1<- ssssssss
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_wide_high16.S b/runtime/interpreter/mterp/arm/op_const_wide_high16.S
new file mode 100644
index 0000000..d7e38ec
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_wide_high16.S
@@ -0,0 +1,11 @@
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    FETCH r1, 1                         @ r1<- 0000BBBB (zero-extended)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r0, #0                      @ r0<- 00000000
+    mov     r1, r1, lsl #16             @ r1<- BBBB0000
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    CLEAR_SHADOW_PAIR r3, r0, r2        @ Zero shadow regs
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_div_double.S b/runtime/interpreter/mterp/arm/op_div_double.S
new file mode 100644
index 0000000..5147550
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_double.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide.S" {"instr":"fdivd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_div_double_2addr.S b/runtime/interpreter/mterp/arm/op_div_double_2addr.S
new file mode 100644
index 0000000..b812f17
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_double_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide2addr.S" {"instr":"fdivd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_div_float.S b/runtime/interpreter/mterp/arm/op_div_float.S
new file mode 100644
index 0000000..0f24d11
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_float.S
@@ -0,0 +1 @@
+%include "arm/fbinop.S" {"instr":"fdivs   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_div_float_2addr.S b/runtime/interpreter/mterp/arm/op_div_float_2addr.S
new file mode 100644
index 0000000..a1dbf01
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_float_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinop2addr.S" {"instr":"fdivs   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_div_int.S b/runtime/interpreter/mterp/arm/op_div_int.S
new file mode 100644
index 0000000..251064b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_int.S
@@ -0,0 +1,30 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int
+     *
+     */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl    __aeabi_idiv                  @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_div_int_2addr.S b/runtime/interpreter/mterp/arm/op_div_int_2addr.S
new file mode 100644
index 0000000..9be4cd8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_int_2addr.S
@@ -0,0 +1,29 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/2addr
+     *
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl       __aeabi_idiv               @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
diff --git a/runtime/interpreter/mterp/arm/op_div_int_lit16.S b/runtime/interpreter/mterp/arm/op_div_int_lit16.S
new file mode 100644
index 0000000..d9bc7d6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_int_lit16.S
@@ -0,0 +1,28 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/lit16
+     *
+     */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl       __aeabi_idiv               @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_div_int_lit8.S b/runtime/interpreter/mterp/arm/op_div_int_lit8.S
new file mode 100644
index 0000000..5d2dbd3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_int_lit8.S
@@ -0,0 +1,29 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/lit8
+     *
+     */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl   __aeabi_idiv                   @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_div_long.S b/runtime/interpreter/mterp/arm/op_div_long.S
new file mode 100644
index 0000000..0f21a84
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"instr":"bl      __aeabi_ldivmod", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_div_long_2addr.S b/runtime/interpreter/mterp/arm/op_div_long_2addr.S
new file mode 100644
index 0000000..e172b29
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"instr":"bl      __aeabi_ldivmod", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_double_to_float.S b/runtime/interpreter/mterp/arm/op_double_to_float.S
new file mode 100644
index 0000000..98fdfbc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_double_to_float.S
@@ -0,0 +1 @@
+%include "arm/funopNarrower.S" {"instr":"vcvt.f32.f64  s0, d0"}
diff --git a/runtime/interpreter/mterp/arm/op_double_to_int.S b/runtime/interpreter/mterp/arm/op_double_to_int.S
new file mode 100644
index 0000000..aa035de
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_double_to_int.S
@@ -0,0 +1 @@
+%include "arm/funopNarrower.S" {"instr":"ftosizd  s0, d0"}
diff --git a/runtime/interpreter/mterp/arm/op_double_to_long.S b/runtime/interpreter/mterp/arm/op_double_to_long.S
new file mode 100644
index 0000000..b100810
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_double_to_long.S
@@ -0,0 +1,52 @@
+@include "arm/unopWide.S" {"instr":"bl      __aeabi_d2lz"}
+%include "arm/unopWide.S" {"instr":"bl      d2l_doconv"}
+
+%break
+/*
+ * Convert the double in r0/r1 to a long in r0/r1.
+ *
+ * We have to clip values to long min/max per the specification.  The
+ * expected common case is a "reasonable" value that converts directly
+ * to modest integer.  The EABI convert function isn't doing this for us.
+ */
+d2l_doconv:
+    stmfd   sp!, {r4, r5, lr}           @ save regs
+    mov     r3, #0x43000000             @ maxlong, as a double (high word)
+    add     r3, #0x00e00000             @  0x43e00000
+    mov     r2, #0                      @ maxlong, as a double (low word)
+    sub     sp, sp, #4                  @ align for EABI
+    mov     r4, r0                      @ save a copy of r0
+    mov     r5, r1                      @  and r1
+    bl      __aeabi_dcmpge              @ is arg >= maxlong?
+    cmp     r0, #0                      @ nonzero == yes
+    mvnne   r0, #0                      @ return maxlong (7fffffffffffffff)
+    mvnne   r1, #0x80000000
+    bne     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    mov     r3, #0xc3000000             @ minlong, as a double (high word)
+    add     r3, #0x00e00000             @  0xc3e00000
+    mov     r2, #0                      @ minlong, as a double (low word)
+    bl      __aeabi_dcmple              @ is arg <= minlong?
+    cmp     r0, #0                      @ nonzero == yes
+    movne   r0, #0                      @ return minlong (8000000000000000)
+    movne   r1, #0x80000000
+    bne     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    mov     r2, r4                      @ compare against self
+    mov     r3, r5
+    bl      __aeabi_dcmpeq              @ is arg == self?
+    cmp     r0, #0                      @ zero == no
+    moveq   r1, #0                      @ return zero for NaN
+    beq     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    bl      __aeabi_d2lz                @ convert double to long
+
+1:
+    add     sp, sp, #4
+    ldmfd   sp!, {r4, r5, pc}
diff --git a/runtime/interpreter/mterp/arm/op_fill_array_data.S b/runtime/interpreter/mterp/arm/op_fill_array_data.S
new file mode 100644
index 0000000..e1ca85c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_fill_array_data.S
@@ -0,0 +1,14 @@
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r1, r0, r1, lsl #16         @ r1<- BBBBbbbb
+    GET_VREG r0, r3                     @ r0<- vAA (array object)
+    add     r1, rPC, r1, lsl #1         @ r1<- PC + BBBBbbbb*2 (array data off.)
+    bl      MterpFillArrayData          @ (obj, payload)
+    cmp     r0, #0                      @ 0 means an exception is thrown
+    beq     MterpPossibleException      @ exception?
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_filled_new_array.S b/runtime/interpreter/mterp/arm/op_filled_new_array.S
new file mode 100644
index 0000000..1075f0c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_filled_new_array.S
@@ -0,0 +1,19 @@
+%default { "helper":"MterpFilledNewArray" }
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern $helper
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rSELF
+    bl      $helper
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_filled_new_array_range.S b/runtime/interpreter/mterp/arm/op_filled_new_array_range.S
new file mode 100644
index 0000000..16567af
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_filled_new_array_range.S
@@ -0,0 +1 @@
+%include "arm/op_filled_new_array.S" { "helper":"MterpFilledNewArrayRange" }
diff --git a/runtime/interpreter/mterp/arm/op_float_to_double.S b/runtime/interpreter/mterp/arm/op_float_to_double.S
new file mode 100644
index 0000000..b1e12bd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_float_to_double.S
@@ -0,0 +1 @@
+%include "arm/funopWider.S" {"instr":"vcvt.f64.f32  d0, s0"}
diff --git a/runtime/interpreter/mterp/arm/op_float_to_int.S b/runtime/interpreter/mterp/arm/op_float_to_int.S
new file mode 100644
index 0000000..aab8716
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_float_to_int.S
@@ -0,0 +1 @@
+%include "arm/funop.S" {"instr":"ftosizs s1, s0"}
diff --git a/runtime/interpreter/mterp/arm/op_float_to_long.S b/runtime/interpreter/mterp/arm/op_float_to_long.S
new file mode 100644
index 0000000..5c8680f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_float_to_long.S
@@ -0,0 +1,39 @@
+@include "arm/unopWider.S" {"instr":"bl      __aeabi_f2lz"}
+%include "arm/unopWider.S" {"instr":"bl      f2l_doconv"}
+
+%break
+/*
+ * Convert the float in r0 to a long in r0/r1.
+ *
+ * We have to clip values to long min/max per the specification.  The
+ * expected common case is a "reasonable" value that converts directly
+ * to modest integer.  The EABI convert function isn't doing this for us.
+ */
+f2l_doconv:
+    stmfd   sp!, {r4, lr}
+    mov     r1, #0x5f000000             @ (float)maxlong
+    mov     r4, r0
+    bl      __aeabi_fcmpge              @ is arg >= maxlong?
+    cmp     r0, #0                      @ nonzero == yes
+    mvnne   r0, #0                      @ return maxlong (7fffffff)
+    mvnne   r1, #0x80000000
+    popne   {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, #0xdf000000             @ (float)minlong
+    bl      __aeabi_fcmple              @ is arg <= minlong?
+    cmp     r0, #0                      @ nonzero == yes
+    movne   r0, #0                      @ return minlong (80000000)
+    movne   r1, #0x80000000
+    popne   {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r4
+    bl      __aeabi_fcmpeq              @ is arg == self?
+    cmp     r0, #0                      @ zero == no
+    moveq   r1, #0                      @ return zero for NaN
+    popeq   {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    bl      __aeabi_f2lz                @ convert float to long
+    ldmfd   sp!, {r4, pc}
diff --git a/runtime/interpreter/mterp/arm/op_goto.S b/runtime/interpreter/mterp/arm/op_goto.S
new file mode 100644
index 0000000..aa42dfd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_goto.S
@@ -0,0 +1,9 @@
+    /*
+     * Unconditional branch, 8-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto +AA */
+    sbfx    rINST, rINST, #8, #8           @ rINST<- ssssssAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm/op_goto_16.S b/runtime/interpreter/mterp/arm/op_goto_16.S
new file mode 100644
index 0000000..12a6bc0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_goto_16.S
@@ -0,0 +1,9 @@
+    /*
+     * Unconditional branch, 16-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto/16 +AAAA */
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm/op_goto_32.S b/runtime/interpreter/mterp/arm/op_goto_32.S
new file mode 100644
index 0000000..7325a1c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_goto_32.S
@@ -0,0 +1,16 @@
+    /*
+     * Unconditional branch, 32-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     *
+     * Unlike most opcodes, this one is allowed to branch to itself, so
+     * our "backward branch" test must be "<=0" instead of "<0".  Because
+     * we need the V bit set, we'll use an adds to convert from Dalvik
+     * offset to byte offset.
+     */
+    /* goto/32 +AAAAAAAA */
+    FETCH r0, 1                         @ r0<- aaaa (lo)
+    FETCH r3, 2                         @ r1<- AAAA (hi)
+    orrs    rINST, r0, r3, lsl #16      @ rINST<- AAAAaaaa
+    b       MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/arm/op_if_eq.S b/runtime/interpreter/mterp/arm/op_if_eq.S
new file mode 100644
index 0000000..b8b6a6e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_eq.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/arm/op_if_eqz.S b/runtime/interpreter/mterp/arm/op_if_eqz.S
new file mode 100644
index 0000000..7012f61
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_eqz.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ge.S b/runtime/interpreter/mterp/arm/op_if_ge.S
new file mode 100644
index 0000000..eb29e63
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_ge.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gez.S b/runtime/interpreter/mterp/arm/op_if_gez.S
new file mode 100644
index 0000000..d9da374
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_gez.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gt.S b/runtime/interpreter/mterp/arm/op_if_gt.S
new file mode 100644
index 0000000..a35eab8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_gt.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gtz.S b/runtime/interpreter/mterp/arm/op_if_gtz.S
new file mode 100644
index 0000000..4ef4d8e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_gtz.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_le.S b/runtime/interpreter/mterp/arm/op_if_le.S
new file mode 100644
index 0000000..c7c31bc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_le.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/arm/op_if_lez.S b/runtime/interpreter/mterp/arm/op_if_lez.S
new file mode 100644
index 0000000..9fbf6c9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_lez.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/arm/op_if_lt.S b/runtime/interpreter/mterp/arm/op_if_lt.S
new file mode 100644
index 0000000..9469fbb
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_lt.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ltz.S b/runtime/interpreter/mterp/arm/op_if_ltz.S
new file mode 100644
index 0000000..a4fc1b8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_ltz.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ne.S b/runtime/interpreter/mterp/arm/op_if_ne.S
new file mode 100644
index 0000000..c945331
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_ne.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/arm/op_if_nez.S b/runtime/interpreter/mterp/arm/op_if_nez.S
new file mode 100644
index 0000000..2d81fda
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_nez.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/arm/op_iget.S b/runtime/interpreter/mterp/arm/op_iget.S
new file mode 100644
index 0000000..c7f777b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget.S
@@ -0,0 +1,26 @@
+%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       $helper
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if $is_object
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_boolean.S b/runtime/interpreter/mterp/arm/op_iget_boolean.S
new file mode 100644
index 0000000..628f40a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_iget.S" { "helper":"artGetBooleanInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_boolean_quick.S b/runtime/interpreter/mterp/arm/op_iget_boolean_quick.S
new file mode 100644
index 0000000..0ae4843
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_boolean_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iget_quick.S" { "load":"ldrb" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_byte.S b/runtime/interpreter/mterp/arm/op_iget_byte.S
new file mode 100644
index 0000000..c4e08e2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_byte.S
@@ -0,0 +1 @@
+%include "arm/op_iget.S" { "helper":"artGetByteInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_byte_quick.S b/runtime/interpreter/mterp/arm/op_iget_byte_quick.S
new file mode 100644
index 0000000..e1b3083
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_byte_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iget_quick.S" { "load":"ldrsb" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_char.S b/runtime/interpreter/mterp/arm/op_iget_char.S
new file mode 100644
index 0000000..5e8da66
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_char.S
@@ -0,0 +1 @@
+%include "arm/op_iget.S" { "helper":"artGetCharInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_char_quick.S b/runtime/interpreter/mterp/arm/op_iget_char_quick.S
new file mode 100644
index 0000000..b44d8f1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_char_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iget_quick.S" { "load":"ldrh" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_object.S b/runtime/interpreter/mterp/arm/op_iget_object.S
new file mode 100644
index 0000000..1cf2e3c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_object.S
@@ -0,0 +1 @@
+%include "arm/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_object_quick.S b/runtime/interpreter/mterp/arm/op_iget_object_quick.S
new file mode 100644
index 0000000..16cb118
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_object_quick.S
@@ -0,0 +1,16 @@
+    /* For: iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    EXPORT_PC
+    GET_VREG r0, r2                     @ r0<- object we're operating on
+    bl      artIGetObjectFromMterp      @ (obj, offset)
+    ldr     r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    PREFETCH_INST 2
+    cmp     r3, #0
+    bne     MterpPossibleException      @ bail out
+    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
+    ADVANCE 2                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_quick.S b/runtime/interpreter/mterp/arm/op_iget_quick.S
new file mode 100644
index 0000000..0eaf364
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_quick.S
@@ -0,0 +1,14 @@
+%default { "load":"ldr" }
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    $load   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_short.S b/runtime/interpreter/mterp/arm/op_iget_short.S
new file mode 100644
index 0000000..460f045
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_short.S
@@ -0,0 +1 @@
+%include "arm/op_iget.S" { "helper":"artGetShortInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_short_quick.S b/runtime/interpreter/mterp/arm/op_iget_short_quick.S
new file mode 100644
index 0000000..1831b99
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_short_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iget_quick.S" { "load":"ldrsh" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_wide.S b/runtime/interpreter/mterp/arm/op_iget_wide.S
new file mode 100644
index 0000000..e287d51
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_wide.S
@@ -0,0 +1,23 @@
+    /*
+     * 64-bit instance field get.
+     *
+     * for: iget-wide
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGet64InstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpException                @ bail out
+    CLEAR_SHADOW_PAIR r2, ip, lr           @ Zero out the shadow regs
+    VREG_INDEX_TO_ADDR r3, r2              @ r3<- &fp[A]
+    stmia    r3, {r0-r1}                   @ fp[A]<- r0/r1
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm/op_iget_wide_quick.S
new file mode 100644
index 0000000..5a7177d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_wide_quick.S
@@ -0,0 +1,14 @@
+    /* iget-wide-quick vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH ip, 1                         @ ip<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrd    r0, [r3, ip]                @ r0<- obj.field (64 bits, aligned)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    VREG_INDEX_TO_ADDR r3, r2           @ r3<- &fp[A]
+    CLEAR_SHADOW_PAIR r2, ip, lr        @ Zero out the shadow regs
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ fp[A]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_instance_of.S b/runtime/interpreter/mterp/arm/op_instance_of.S
new file mode 100644
index 0000000..019929e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_instance_of.S
@@ -0,0 +1,23 @@
+    /*
+     * Check to see if an object reference is an instance of a class.
+     *
+     * Most common situation is a non-null object, being compared against
+     * an already-resolved class.
+     */
+    /* instance-of vA, vB, class@CCCC */
+    EXPORT_PC
+    FETCH     r0, 1                     @ r0<- CCCC
+    mov       r1, rINST, lsr #12        @ r1<- B
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &object
+    ldr       r2, [rFP, #OFF_FP_METHOD] @ r2<- method
+    mov       r3, rSELF                 @ r3<- self
+    bl        MterpInstanceOf           @ (index, &obj, method, self)
+    ldr       r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx      r9, rINST, #8, #4         @ r9<- A
+    PREFETCH_INST 2
+    cmp       r1, #0                    @ exception pending?
+    bne       MterpException
+    ADVANCE 2                           @ advance rPC
+    SET_VREG r0, r9                     @ vA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_int_to_byte.S b/runtime/interpreter/mterp/arm/op_int_to_byte.S
new file mode 100644
index 0000000..059d5c2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_byte.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"sxtb    r0, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_int_to_char.S b/runtime/interpreter/mterp/arm/op_int_to_char.S
new file mode 100644
index 0000000..83a0c19
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_char.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"uxth    r0, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_int_to_double.S b/runtime/interpreter/mterp/arm/op_int_to_double.S
new file mode 100644
index 0000000..810c2e4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_double.S
@@ -0,0 +1 @@
+%include "arm/funopWider.S" {"instr":"fsitod  d0, s0"}
diff --git a/runtime/interpreter/mterp/arm/op_int_to_float.S b/runtime/interpreter/mterp/arm/op_int_to_float.S
new file mode 100644
index 0000000..f41654c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_float.S
@@ -0,0 +1 @@
+%include "arm/funop.S" {"instr":"fsitos  s1, s0"}
diff --git a/runtime/interpreter/mterp/arm/op_int_to_long.S b/runtime/interpreter/mterp/arm/op_int_to_long.S
new file mode 100644
index 0000000..b5aed8e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_long.S
@@ -0,0 +1 @@
+%include "arm/unopWider.S" {"instr":"mov     r1, r0, asr #31"}
diff --git a/runtime/interpreter/mterp/arm/op_int_to_short.S b/runtime/interpreter/mterp/arm/op_int_to_short.S
new file mode 100644
index 0000000..717bd96
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_short.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"sxth    r0, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_invoke_direct.S b/runtime/interpreter/mterp/arm/op_invoke_direct.S
new file mode 100644
index 0000000..1edf221
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_direct.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeDirect" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_direct_range.S b/runtime/interpreter/mterp/arm/op_invoke_direct_range.S
new file mode 100644
index 0000000..3097b8e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_direct_range.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeDirectRange" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_interface.S b/runtime/interpreter/mterp/arm/op_invoke_interface.S
new file mode 100644
index 0000000..f6d565b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_interface.S
@@ -0,0 +1,8 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeInterface" }
+    /*
+     * Handle an interface method call.
+     *
+     * for: invoke-interface, invoke-interface/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/arm/op_invoke_interface_range.S b/runtime/interpreter/mterp/arm/op_invoke_interface_range.S
new file mode 100644
index 0000000..c8443b0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_interface_range.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeInterfaceRange" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_static.S b/runtime/interpreter/mterp/arm/op_invoke_static.S
new file mode 100644
index 0000000..c3cefcf
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_static.S
@@ -0,0 +1,2 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeStatic" }
+
diff --git a/runtime/interpreter/mterp/arm/op_invoke_static_range.S b/runtime/interpreter/mterp/arm/op_invoke_static_range.S
new file mode 100644
index 0000000..dd60d7b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_static_range.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeStaticRange" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_super.S b/runtime/interpreter/mterp/arm/op_invoke_super.S
new file mode 100644
index 0000000..92ef2a4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_super.S
@@ -0,0 +1,8 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeSuper" }
+    /*
+     * Handle a "super" method call.
+     *
+     * for: invoke-super, invoke-super/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/arm/op_invoke_super_range.S b/runtime/interpreter/mterp/arm/op_invoke_super_range.S
new file mode 100644
index 0000000..9e4fb1c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_super_range.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeSuperRange" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_virtual.S b/runtime/interpreter/mterp/arm/op_invoke_virtual.S
new file mode 100644
index 0000000..5b893ff
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_virtual.S
@@ -0,0 +1,8 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeVirtual" }
+    /*
+     * Handle a virtual method call.
+     *
+     * for: invoke-virtual, invoke-virtual/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/arm/op_invoke_virtual_quick.S b/runtime/interpreter/mterp/arm/op_invoke_virtual_quick.S
new file mode 100644
index 0000000..020e8b8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_virtual_quick.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeVirtualQuick" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_virtual_range.S b/runtime/interpreter/mterp/arm/op_invoke_virtual_range.S
new file mode 100644
index 0000000..2b42a78
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_virtual_range.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeVirtualRange" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_virtual_range_quick.S b/runtime/interpreter/mterp/arm/op_invoke_virtual_range_quick.S
new file mode 100644
index 0000000..42f2ded
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_virtual_range_quick.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeVirtualQuickRange" }
diff --git a/runtime/interpreter/mterp/arm/op_iput.S b/runtime/interpreter/mterp/arm/op_iput.S
new file mode 100644
index 0000000..d224cd8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput.S
@@ -0,0 +1,22 @@
+%default { "is_object":"0", "handler":"artSet32InstanceFromMterp" }
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern $handler
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       $handler
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iput_boolean.S b/runtime/interpreter/mterp/arm/op_iput_boolean.S
new file mode 100644
index 0000000..c9e8589
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_boolean_quick.S b/runtime/interpreter/mterp/arm/op_iput_boolean_quick.S
new file mode 100644
index 0000000..f0a2777
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_boolean_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iput_quick.S" { "store":"strb" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_byte.S b/runtime/interpreter/mterp/arm/op_iput_byte.S
new file mode 100644
index 0000000..c9e8589
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_byte.S
@@ -0,0 +1 @@
+%include "arm/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_byte_quick.S b/runtime/interpreter/mterp/arm/op_iput_byte_quick.S
new file mode 100644
index 0000000..f0a2777
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_byte_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iput_quick.S" { "store":"strb" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_char.S b/runtime/interpreter/mterp/arm/op_iput_char.S
new file mode 100644
index 0000000..5046f6b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_char.S
@@ -0,0 +1 @@
+%include "arm/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_char_quick.S b/runtime/interpreter/mterp/arm/op_iput_char_quick.S
new file mode 100644
index 0000000..5212fc3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_char_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iput_quick.S" { "store":"strh" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_object.S b/runtime/interpreter/mterp/arm/op_iput_object.S
new file mode 100644
index 0000000..d942e84
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_object.S
@@ -0,0 +1,11 @@
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpIputObject
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iput_object_quick.S b/runtime/interpreter/mterp/arm/op_iput_object_quick.S
new file mode 100644
index 0000000..876b3da
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_object_quick.S
@@ -0,0 +1,10 @@
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    bl      MterpIputObjectQuick
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iput_quick.S b/runtime/interpreter/mterp/arm/op_iput_quick.S
new file mode 100644
index 0000000..98c8150
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_quick.S
@@ -0,0 +1,14 @@
+%default { "store":"str" }
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    $store     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iput_short.S b/runtime/interpreter/mterp/arm/op_iput_short.S
new file mode 100644
index 0000000..5046f6b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_short.S
@@ -0,0 +1 @@
+%include "arm/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_short_quick.S b/runtime/interpreter/mterp/arm/op_iput_short_quick.S
new file mode 100644
index 0000000..5212fc3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_short_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iput_quick.S" { "store":"strh" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_wide.S b/runtime/interpreter/mterp/arm/op_iput_wide.S
new file mode 100644
index 0000000..3dda187
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_wide.S
@@ -0,0 +1,16 @@
+    /* iput-wide vA, vB, field@CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet64InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm/op_iput_wide_quick.S
new file mode 100644
index 0000000..88e6ea1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_wide_quick.S
@@ -0,0 +1,13 @@
+    /* iput-wide-quick vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r3, 1                         @ r3<- field byte offset
+    GET_VREG r2, r2                     @ r2<- fp[B], the object pointer
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    cmp     r2, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[A]
+    ldmia   r0, {r0-r1}                 @ r0/r1<- fp[A]/fp[A+1]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strd    r0, [r2, r3]                @ obj.field<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_long_to_double.S b/runtime/interpreter/mterp/arm/op_long_to_double.S
new file mode 100644
index 0000000..cac12d4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_long_to_double.S
@@ -0,0 +1,27 @@
+%default {}
+    /*
+     * Specialised 64-bit floating point operation.
+     *
+     * Note: The result will be returned in d2.
+     *
+     * For: long-to-double
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
+    vldr    d0, [r3]                    @ d0<- vAA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    vcvt.f64.s32    d1, s1              @ d1<- (double)(vAAh)
+    vcvt.f64.u32    d2, s0              @ d2<- (double)(vAAl)
+    vldr            d3, constval$opcode
+    vmla.f64        d2, d1, d3          @ d2<- vAAh*2^32 + vAAl
+
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    vstr.64 d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+    /* literal pool helper */
+constval${opcode}:
+    .8byte          0x41f0000000000000
diff --git a/runtime/interpreter/mterp/arm/op_long_to_float.S b/runtime/interpreter/mterp/arm/op_long_to_float.S
new file mode 100644
index 0000000..efa5a66
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_long_to_float.S
@@ -0,0 +1 @@
+%include "arm/unopNarrower.S" {"instr":"bl      __aeabi_l2f"}
diff --git a/runtime/interpreter/mterp/arm/op_long_to_int.S b/runtime/interpreter/mterp/arm/op_long_to_int.S
new file mode 100644
index 0000000..3e91f23
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_long_to_int.S
@@ -0,0 +1,2 @@
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+%include "arm/op_move.S"
diff --git a/runtime/interpreter/mterp/arm/op_monitor_enter.S b/runtime/interpreter/mterp/arm/op_monitor_enter.S
new file mode 100644
index 0000000..3c34f75
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_monitor_enter.S
@@ -0,0 +1,14 @@
+    /*
+     * Synchronize on an object.
+     */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r0, r2                      @ r0<- vAA (object)
+    mov      r1, rSELF                   @ r1<- self
+    bl       artLockObjectFromCode
+    cmp      r0, #0
+    bne      MterpException
+    FETCH_ADVANCE_INST 1
+    GET_INST_OPCODE ip                   @ extract opcode from rINST
+    GOTO_OPCODE ip                       @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_monitor_exit.S b/runtime/interpreter/mterp/arm/op_monitor_exit.S
new file mode 100644
index 0000000..fc7cef5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_monitor_exit.S
@@ -0,0 +1,18 @@
+    /*
+     * Unlock an object.
+     *
+     * Exceptions that occur when unlocking a monitor need to appear as
+     * if they happened at the following instruction.  See the Dalvik
+     * instruction spec.
+     */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8          @ r2<- AA
+    GET_VREG r0, r2                     @ r0<- vAA (object)
+    mov      r1, rSELF                  @ r0<- self
+    bl       artUnlockObjectFromCode    @ r0<- success for unlock(self, obj)
+    cmp     r0, #0                      @ failed?
+    bne     MterpException
+    FETCH_ADVANCE_INST 1                @ before throw: advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move.S b/runtime/interpreter/mterp/arm/op_move.S
new file mode 100644
index 0000000..dfecc24
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B from 15:12
+    ubfx    r0, rINST, #8, #4           @ r0<- A from 11:8
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[B]
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT r2, r0              @ fp[A]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[A]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ execute next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_16.S b/runtime/interpreter/mterp/arm/op_move_16.S
new file mode 100644
index 0000000..78138a2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_16.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH r1, 2                         @ r1<- BBBB
+    FETCH r0, 1                         @ r0<- AAAA
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT r2, r0              @ fp[AAAA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AAAA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_exception.S b/runtime/interpreter/mterp/arm/op_move_exception.S
new file mode 100644
index 0000000..0242e26
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_exception.S
@@ -0,0 +1,9 @@
+    /* move-exception vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    ldr     r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov     r1, #0                      @ r1<- 0
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    SET_VREG_OBJECT r3, r2              @ fp[AA]<- exception obj
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    str     r1, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ clear exception
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_from16.S b/runtime/interpreter/mterp/arm/op_move_from16.S
new file mode 100644
index 0000000..3e79417
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_from16.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH r1, 1                         @ r1<- BBBB
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT r2, r0              @ fp[AA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_object.S b/runtime/interpreter/mterp/arm/op_move_object.S
new file mode 100644
index 0000000..16de57b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_object.S
@@ -0,0 +1 @@
+%include "arm/op_move.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_move_object_16.S b/runtime/interpreter/mterp/arm/op_move_object_16.S
new file mode 100644
index 0000000..2534300
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_object_16.S
@@ -0,0 +1 @@
+%include "arm/op_move_16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_move_object_from16.S b/runtime/interpreter/mterp/arm/op_move_object_from16.S
new file mode 100644
index 0000000..9e0cf02
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_object_from16.S
@@ -0,0 +1 @@
+%include "arm/op_move_from16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_move_result.S b/runtime/interpreter/mterp/arm/op_move_result.S
new file mode 100644
index 0000000..f2586a0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_result.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ldr     r0, [rFP, #OFF_FP_RESULT_REGISTER]  @ get pointer to result JType.
+    ldr     r0, [r0]                    @ r0 <- result.i.
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT r0, r2, r1          @ fp[AA]<- r0
+    .else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_result_object.S b/runtime/interpreter/mterp/arm/op_move_result_object.S
new file mode 100644
index 0000000..643296a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_result_object.S
@@ -0,0 +1 @@
+%include "arm/op_move_result.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_move_result_wide.S b/runtime/interpreter/mterp/arm/op_move_result_wide.S
new file mode 100644
index 0000000..87929ea
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_result_wide.S
@@ -0,0 +1,10 @@
+    /* move-result-wide vAA */
+    mov     rINST, rINST, lsr #8        @ rINST<- AA
+    ldr     r3, [rFP, #OFF_FP_RESULT_REGISTER]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[AA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- retval.j
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    stmia   r2, {r0-r1}                 @ fp[AA]<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_wide.S b/runtime/interpreter/mterp/arm/op_move_wide.S
new file mode 100644
index 0000000..ff353ea
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_wide.S
@@ -0,0 +1,12 @@
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[B]
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r2, {r0-r1}                 @ fp[A]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_wide_16.S b/runtime/interpreter/mterp/arm/op_move_wide_16.S
new file mode 100644
index 0000000..9812b66
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_wide_16.S
@@ -0,0 +1,12 @@
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH r3, 2                         @ r3<- BBBB
+    FETCH r2, 1                         @ r2<- AAAA
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BBBB]
+    VREG_INDEX_TO_ADDR lr, r2           @ r2<- &fp[AAAA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    CLEAR_SHADOW_PAIR r2, r3, ip        @ Zero out the shadow regs
+    stmia   lr, {r0-r1}                 @ fp[AAAA]<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_wide_from16.S b/runtime/interpreter/mterp/arm/op_move_wide_from16.S
new file mode 100644
index 0000000..d2cc60c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_wide_from16.S
@@ -0,0 +1,12 @@
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH r3, 1                         @ r3<- BBBB
+    mov     rINST, rINST, lsr #8        @ rINST<- AA
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BBBB]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[AA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r2, {r0-r1}                 @ fp[AA]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_mul_double.S b/runtime/interpreter/mterp/arm/op_mul_double.S
new file mode 100644
index 0000000..530e85a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_double.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide.S" {"instr":"fmuld   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_double_2addr.S b/runtime/interpreter/mterp/arm/op_mul_double_2addr.S
new file mode 100644
index 0000000..da1abc6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_double_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide2addr.S" {"instr":"fmuld   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_float.S b/runtime/interpreter/mterp/arm/op_mul_float.S
new file mode 100644
index 0000000..6a72e6f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_float.S
@@ -0,0 +1 @@
+%include "arm/fbinop.S" {"instr":"fmuls   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_float_2addr.S b/runtime/interpreter/mterp/arm/op_mul_float_2addr.S
new file mode 100644
index 0000000..edb5101
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_float_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinop2addr.S" {"instr":"fmuls   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_int.S b/runtime/interpreter/mterp/arm/op_mul_int.S
new file mode 100644
index 0000000..d6151d4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_int.S
@@ -0,0 +1,2 @@
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+%include "arm/binop.S" {"instr":"mul     r0, r1, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_int_2addr.S b/runtime/interpreter/mterp/arm/op_mul_int_2addr.S
new file mode 100644
index 0000000..66a797d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_int_2addr.S
@@ -0,0 +1,2 @@
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+%include "arm/binop2addr.S" {"instr":"mul     r0, r1, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_int_lit16.S b/runtime/interpreter/mterp/arm/op_mul_int_lit16.S
new file mode 100644
index 0000000..4e40c43
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_int_lit16.S
@@ -0,0 +1,2 @@
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+%include "arm/binopLit16.S" {"instr":"mul     r0, r1, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_int_lit8.S b/runtime/interpreter/mterp/arm/op_mul_int_lit8.S
new file mode 100644
index 0000000..dbafae9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_int_lit8.S
@@ -0,0 +1,2 @@
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+%include "arm/binopLit8.S" {"instr":"mul     r0, r1, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_long.S b/runtime/interpreter/mterp/arm/op_mul_long.S
new file mode 100644
index 0000000..a13c803
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_long.S
@@ -0,0 +1,36 @@
+    /*
+     * Signed 64-bit integer multiply.
+     *
+     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
+     *        WX
+     *      x YZ
+     *  --------
+     *     ZW ZX
+     *  YW YX
+     *
+     * The low word of the result holds ZX, the high word holds
+     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
+     * it doesn't fit in the low 64 bits.
+     *
+     * Unlike most ARM math operations, multiply instructions have
+     * restrictions on using the same register more than once (Rd and Rm
+     * cannot be the same).
+     */
+    /* mul-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    mul     ip, r2, r1                  @ ip<- ZxW
+    umull   r1, lr, r2, r0              @ r1/lr <- ZxX
+    mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    add     r2, r2, lr                  @ r2<- lr + low(ZxW + (YxX))
+    VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[AA]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r0, {r1-r2 }                @ vAA/vAA+1<- r1/r2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_mul_long_2addr.S b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
new file mode 100644
index 0000000..4c1f058
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
@@ -0,0 +1,24 @@
+    /*
+     * Signed 64-bit integer multiply, "/2addr" version.
+     *
+     * See op_mul_long for an explanation.
+     *
+     * We get a little tight on registers, so to avoid looking up &fp[A]
+     * again we stuff it into rINST.
+     */
+    /* mul-long/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR rINST, r9        @ rINST<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
+    mul     ip, r2, r1                  @ ip<- ZxW
+    umull   r1, lr, r2, r0              @ r1/lr <- ZxX
+    mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
+    mov     r0, rINST                   @ r0<- &fp[A] (free up rINST)
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    add     r2, r2, lr                  @ r2<- r2 + low(ZxW + (YxX))
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r0, {r1-r2}                 @ vAA/vAA+1<- r1/r2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_neg_double.S b/runtime/interpreter/mterp/arm/op_neg_double.S
new file mode 100644
index 0000000..33e609c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_neg_double.S
@@ -0,0 +1 @@
+%include "arm/unopWide.S" {"instr":"add     r1, r1, #0x80000000"}
diff --git a/runtime/interpreter/mterp/arm/op_neg_float.S b/runtime/interpreter/mterp/arm/op_neg_float.S
new file mode 100644
index 0000000..993583f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_neg_float.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"add     r0, r0, #0x80000000"}
diff --git a/runtime/interpreter/mterp/arm/op_neg_int.S b/runtime/interpreter/mterp/arm/op_neg_int.S
new file mode 100644
index 0000000..ec0b253
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_neg_int.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"rsb     r0, r0, #0"}
diff --git a/runtime/interpreter/mterp/arm/op_neg_long.S b/runtime/interpreter/mterp/arm/op_neg_long.S
new file mode 100644
index 0000000..dab2eb4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_neg_long.S
@@ -0,0 +1 @@
+%include "arm/unopWide.S" {"preinstr":"rsbs    r0, r0, #0", "instr":"rsc     r1, r1, #0"}
diff --git a/runtime/interpreter/mterp/arm/op_new_array.S b/runtime/interpreter/mterp/arm/op_new_array.S
new file mode 100644
index 0000000..8bb792c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_new_array.S
@@ -0,0 +1,19 @@
+    /*
+     * Allocate an array of objects, specified with the array class
+     * and a count.
+     *
+     * The verifier guarantees that this is an array class, so we don't
+     * check for it here.
+     */
+    /* new-array vA, vB, class@CCCC */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpNewArray
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_new_instance.S b/runtime/interpreter/mterp/arm/op_new_instance.S
new file mode 100644
index 0000000..95d4be8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_new_instance.S
@@ -0,0 +1,14 @@
+    /*
+     * Create a new instance of a class.
+     */
+    /* new-instance vAA, class@BBBB */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rSELF
+    mov     r2, rINST
+    bl      MterpNewInstance           @ (shadow_frame, self, inst_data)
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2               @ advance rPC, load rINST
+    GET_INST_OPCODE ip                 @ extract opcode from rINST
+    GOTO_OPCODE ip                     @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_nop.S b/runtime/interpreter/mterp/arm/op_nop.S
new file mode 100644
index 0000000..af0f88f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_nop.S
@@ -0,0 +1,3 @@
+    FETCH_ADVANCE_INST 1                @ advance to next instr, load rINST
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    GOTO_OPCODE ip                      @ execute it
diff --git a/runtime/interpreter/mterp/arm/op_not_int.S b/runtime/interpreter/mterp/arm/op_not_int.S
new file mode 100644
index 0000000..816485a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_not_int.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"mvn     r0, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_not_long.S b/runtime/interpreter/mterp/arm/op_not_long.S
new file mode 100644
index 0000000..49a5905
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_not_long.S
@@ -0,0 +1 @@
+%include "arm/unopWide.S" {"preinstr":"mvn     r0, r0", "instr":"mvn     r1, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_or_int.S b/runtime/interpreter/mterp/arm/op_or_int.S
new file mode 100644
index 0000000..b046e8d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"instr":"orr     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_or_int_2addr.S b/runtime/interpreter/mterp/arm/op_or_int_2addr.S
new file mode 100644
index 0000000..493c59f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"instr":"orr     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_or_int_lit16.S b/runtime/interpreter/mterp/arm/op_or_int_lit16.S
new file mode 100644
index 0000000..0a01db8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_int_lit16.S
@@ -0,0 +1 @@
+%include "arm/binopLit16.S" {"instr":"orr     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_or_int_lit8.S b/runtime/interpreter/mterp/arm/op_or_int_lit8.S
new file mode 100644
index 0000000..9882bfc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"extract":"", "instr":"orr     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_or_long.S b/runtime/interpreter/mterp/arm/op_or_long.S
new file mode 100644
index 0000000..048c45c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"preinstr":"orr     r0, r0, r2", "instr":"orr     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_or_long_2addr.S b/runtime/interpreter/mterp/arm/op_or_long_2addr.S
new file mode 100644
index 0000000..9395346
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"preinstr":"orr     r0, r0, r2", "instr":"orr     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_packed_switch.S b/runtime/interpreter/mterp/arm/op_packed_switch.S
new file mode 100644
index 0000000..412c58f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_packed_switch.S
@@ -0,0 +1,20 @@
+%default { "func":"MterpDoPackedSwitch" }
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      $func                       @ r0<- code-unit branch offset
+    movs    rINST, r0
+    b       MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/arm/op_rem_double.S b/runtime/interpreter/mterp/arm/op_rem_double.S
new file mode 100644
index 0000000..b539221
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_double.S
@@ -0,0 +1,2 @@
+/* EABI doesn't define a double remainder function, but libm does */
+%include "arm/binopWide.S" {"instr":"bl      fmod"}
diff --git a/runtime/interpreter/mterp/arm/op_rem_double_2addr.S b/runtime/interpreter/mterp/arm/op_rem_double_2addr.S
new file mode 100644
index 0000000..372ef1d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_double_2addr.S
@@ -0,0 +1,2 @@
+/* EABI doesn't define a double remainder function, but libm does */
+%include "arm/binopWide2addr.S" {"instr":"bl      fmod"}
diff --git a/runtime/interpreter/mterp/arm/op_rem_float.S b/runtime/interpreter/mterp/arm/op_rem_float.S
new file mode 100644
index 0000000..7bd10de
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_float.S
@@ -0,0 +1,2 @@
+/* EABI doesn't define a float remainder function, but libm does */
+%include "arm/binop.S" {"instr":"bl      fmodf"}
diff --git a/runtime/interpreter/mterp/arm/op_rem_float_2addr.S b/runtime/interpreter/mterp/arm/op_rem_float_2addr.S
new file mode 100644
index 0000000..93c5fae
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_float_2addr.S
@@ -0,0 +1,2 @@
+/* EABI doesn't define a float remainder function, but libm does */
+%include "arm/binop2addr.S" {"instr":"bl      fmodf"}
diff --git a/runtime/interpreter/mterp/arm/op_rem_int.S b/runtime/interpreter/mterp/arm/op_rem_int.S
new file mode 100644
index 0000000..ff62573
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_int.S
@@ -0,0 +1,33 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int
+     *
+     */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls  r1, r1, r2, r0                 @ r1<- op, r0-r2 changed
+#else
+    bl   __aeabi_idivmod                @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_rem_int_2addr.S b/runtime/interpreter/mterp/arm/op_rem_int_2addr.S
new file mode 100644
index 0000000..ba5751a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_int_2addr.S
@@ -0,0 +1,32 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/2addr
+     *
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl      __aeabi_idivmod             @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
diff --git a/runtime/interpreter/mterp/arm/op_rem_int_lit16.S b/runtime/interpreter/mterp/arm/op_rem_int_lit16.S
new file mode 100644
index 0000000..4edb187
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_int_lit16.S
@@ -0,0 +1,31 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/lit16
+     *
+     */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl     __aeabi_idivmod              @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_rem_int_lit8.S b/runtime/interpreter/mterp/arm/op_rem_int_lit8.S
new file mode 100644
index 0000000..3888361
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_int_lit8.S
@@ -0,0 +1,32 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/lit8
+     *
+     */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl       __aeabi_idivmod            @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_rem_long.S b/runtime/interpreter/mterp/arm/op_rem_long.S
new file mode 100644
index 0000000..b2b1c24
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_long.S
@@ -0,0 +1,2 @@
+/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */
+%include "arm/binopWide.S" {"instr":"bl      __aeabi_ldivmod", "result0":"r2", "result1":"r3", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_rem_long_2addr.S b/runtime/interpreter/mterp/arm/op_rem_long_2addr.S
new file mode 100644
index 0000000..f87d493
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_long_2addr.S
@@ -0,0 +1,2 @@
+/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */
+%include "arm/binopWide2addr.S" {"instr":"bl      __aeabi_ldivmod", "result0":"r2", "result1":"r3", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_return.S b/runtime/interpreter/mterp/arm/op_return.S
new file mode 100644
index 0000000..1888373
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_return.S
@@ -0,0 +1,16 @@
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r0, r2                     @ r0<- vAA
+    mov     r1, #0
+    b       MterpReturn
diff --git a/runtime/interpreter/mterp/arm/op_return_object.S b/runtime/interpreter/mterp/arm/op_return_object.S
new file mode 100644
index 0000000..c490730
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_return_object.S
@@ -0,0 +1 @@
+%include "arm/op_return.S"
diff --git a/runtime/interpreter/mterp/arm/op_return_void.S b/runtime/interpreter/mterp/arm/op_return_void.S
new file mode 100644
index 0000000..cbea2bf
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_return_void.S
@@ -0,0 +1,9 @@
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
+    mov    r0, #0
+    mov    r1, #0
+    b      MterpReturn
diff --git a/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S b/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S
new file mode 100644
index 0000000..2dde7ae
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S
@@ -0,0 +1,7 @@
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
+    mov    r0, #0
+    mov    r1, #0
+    b      MterpReturn
diff --git a/runtime/interpreter/mterp/arm/op_return_wide.S b/runtime/interpreter/mterp/arm/op_return_wide.S
new file mode 100644
index 0000000..ceae878
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_return_wide.S
@@ -0,0 +1,14 @@
+    /*
+     * Return a 64-bit value.
+     */
+    /* return-wide vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[AA]
+    ldmia   r2, {r0-r1}                 @ r0/r1 <- vAA/vAA+1
+    b       MterpReturn
diff --git a/runtime/interpreter/mterp/arm/op_rsub_int.S b/runtime/interpreter/mterp/arm/op_rsub_int.S
new file mode 100644
index 0000000..1508dd4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rsub_int.S
@@ -0,0 +1,2 @@
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+%include "arm/binopLit16.S" {"instr":"rsb     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S b/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S
new file mode 100644
index 0000000..dc953dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"extract":"", "instr":"rsb     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_sget.S b/runtime/interpreter/mterp/arm/op_sget.S
new file mode 100644
index 0000000..2b81f50
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget.S
@@ -0,0 +1,27 @@
+%default { "is_object":"0", "helper":"artGet32StaticFromCode" }
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern $helper
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    $helper
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if $is_object
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
diff --git a/runtime/interpreter/mterp/arm/op_sget_boolean.S b/runtime/interpreter/mterp/arm/op_sget_boolean.S
new file mode 100644
index 0000000..ebfb44c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_sget.S" {"helper":"artGetBooleanStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_byte.S b/runtime/interpreter/mterp/arm/op_sget_byte.S
new file mode 100644
index 0000000..d76862e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_byte.S
@@ -0,0 +1 @@
+%include "arm/op_sget.S" {"helper":"artGetByteStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_char.S b/runtime/interpreter/mterp/arm/op_sget_char.S
new file mode 100644
index 0000000..b7fcfc2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_char.S
@@ -0,0 +1 @@
+%include "arm/op_sget.S" {"helper":"artGetCharStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_object.S b/runtime/interpreter/mterp/arm/op_sget_object.S
new file mode 100644
index 0000000..8e7d075
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_object.S
@@ -0,0 +1 @@
+%include "arm/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_short.S b/runtime/interpreter/mterp/arm/op_sget_short.S
new file mode 100644
index 0000000..3e80f0d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_short.S
@@ -0,0 +1 @@
+%include "arm/op_sget.S" {"helper":"artGetShortStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_wide.S b/runtime/interpreter/mterp/arm/op_sget_wide.S
new file mode 100644
index 0000000..4f2f89d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_wide.S
@@ -0,0 +1,22 @@
+    /*
+     * SGET_WIDE handler wrapper.
+     *
+     */
+    /* sget-wide vAA, field@BBBB */
+
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGet64StaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r9, rINST, lsr #8             @ r9<- AA
+    VREG_INDEX_TO_ADDR lr, r9           @ r9<- &fp[AA]
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    CLEAR_SHADOW_PAIR r9, r2, ip        @ Zero out the shadow regs
+    stmia lr, {r0-r1}                   @ vAA/vAA+1<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shl_int.S b/runtime/interpreter/mterp/arm/op_shl_int.S
new file mode 100644
index 0000000..7e4c768
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shl_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asl r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shl_int_2addr.S b/runtime/interpreter/mterp/arm/op_shl_int_2addr.S
new file mode 100644
index 0000000..4286577
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shl_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asl r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm/op_shl_int_lit8.S
new file mode 100644
index 0000000..60a1498
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shl_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"extract":"ubfx    r1, r3, #8, #5", "instr":"mov     r0, r0, asl r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shl_long.S b/runtime/interpreter/mterp/arm/op_shl_long.S
new file mode 100644
index 0000000..82ec6ed
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shl_long.S
@@ -0,0 +1,27 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* shl-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
+    mov     r1, r1, asl r2              @ r1<- r1 << r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, r0, asl r2              @ r0<- r0 << r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shl_long_2addr.S b/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
new file mode 100644
index 0000000..f361a7d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
@@ -0,0 +1,22 @@
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* shl-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    mov     r1, r1, asl r2              @ r1<- r1 << r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @ r0<- r0 << r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shr_int.S b/runtime/interpreter/mterp/arm/op_shr_int.S
new file mode 100644
index 0000000..6317605
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shr_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shr_int_2addr.S b/runtime/interpreter/mterp/arm/op_shr_int_2addr.S
new file mode 100644
index 0000000..cc8632f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shr_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm/op_shr_int_lit8.S
new file mode 100644
index 0000000..c2f6cb0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shr_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"extract":"ubfx    r1, r3, #8, #5", "instr":"mov     r0, r0, asr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shr_long.S b/runtime/interpreter/mterp/arm/op_shr_long.S
new file mode 100644
index 0000000..a0afe5b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shr_long.S
@@ -0,0 +1,27 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* shr-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r1, r1, asr r2              @ r1<- r1 >> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shr_long_2addr.S b/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
new file mode 100644
index 0000000..976110e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
@@ -0,0 +1,22 @@
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* shr-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @ r1<- r1 >> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_sparse_switch.S b/runtime/interpreter/mterp/arm/op_sparse_switch.S
new file mode 100644
index 0000000..9f7a42b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sparse_switch.S
@@ -0,0 +1 @@
+%include "arm/op_packed_switch.S" { "func":"MterpDoSparseSwitch" }
diff --git a/runtime/interpreter/mterp/arm/op_sput.S b/runtime/interpreter/mterp/arm/op_sput.S
new file mode 100644
index 0000000..7e0c1a6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput.S
@@ -0,0 +1,20 @@
+%default { "helper":"artSet32StaticFromCode"}
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      $helper
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_sput_boolean.S b/runtime/interpreter/mterp/arm/op_sput_boolean.S
new file mode 100644
index 0000000..e3bbf2b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_byte.S b/runtime/interpreter/mterp/arm/op_sput_byte.S
new file mode 100644
index 0000000..e3bbf2b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_byte.S
@@ -0,0 +1 @@
+%include "arm/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_char.S b/runtime/interpreter/mterp/arm/op_sput_char.S
new file mode 100644
index 0000000..d8d65cb
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_char.S
@@ -0,0 +1 @@
+%include "arm/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_object.S b/runtime/interpreter/mterp/arm/op_sput_object.S
new file mode 100644
index 0000000..6d3a9a7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_object.S
@@ -0,0 +1,11 @@
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpSputObject
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_sput_short.S b/runtime/interpreter/mterp/arm/op_sput_short.S
new file mode 100644
index 0000000..d8d65cb
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_short.S
@@ -0,0 +1 @@
+%include "arm/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_wide.S b/runtime/interpreter/mterp/arm/op_sput_wide.S
new file mode 100644
index 0000000..8d8ed8c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_wide.S
@@ -0,0 +1,19 @@
+    /*
+     * SPUT_WIDE handler wrapper.
+     *
+     */
+    /* sput-wide vAA, field@BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    ldr     r1, [rFP, #OFF_FP_METHOD]
+    mov     r2, rINST, lsr #8           @ r3<- AA
+    VREG_INDEX_TO_ADDR r2, r2
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet64IndirectStaticFromMterp
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_sub_double.S b/runtime/interpreter/mterp/arm/op_sub_double.S
new file mode 100644
index 0000000..69bcc67
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_double.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide.S" {"instr":"fsubd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_double_2addr.S b/runtime/interpreter/mterp/arm/op_sub_double_2addr.S
new file mode 100644
index 0000000..2ea59fe
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_double_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide2addr.S" {"instr":"fsubd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_float.S b/runtime/interpreter/mterp/arm/op_sub_float.S
new file mode 100644
index 0000000..3f17a0d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_float.S
@@ -0,0 +1 @@
+%include "arm/fbinop.S" {"instr":"fsubs   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_float_2addr.S b/runtime/interpreter/mterp/arm/op_sub_float_2addr.S
new file mode 100644
index 0000000..2f4aac4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_float_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinop2addr.S" {"instr":"fsubs   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_int.S b/runtime/interpreter/mterp/arm/op_sub_int.S
new file mode 100644
index 0000000..efb9e10
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"instr":"sub     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_int_2addr.S b/runtime/interpreter/mterp/arm/op_sub_int_2addr.S
new file mode 100644
index 0000000..4d3036b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"instr":"sub     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_long.S b/runtime/interpreter/mterp/arm/op_sub_long.S
new file mode 100644
index 0000000..6f1eb6e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"preinstr":"subs    r0, r0, r2", "instr":"sbc     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_long_2addr.S b/runtime/interpreter/mterp/arm/op_sub_long_2addr.S
new file mode 100644
index 0000000..8e9da05
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"preinstr":"subs    r0, r0, r2", "instr":"sbc     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_throw.S b/runtime/interpreter/mterp/arm/op_throw.S
new file mode 100644
index 0000000..be49ada
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_throw.S
@@ -0,0 +1,11 @@
+    /*
+     * Throw an exception object in the current thread.
+     */
+    /* throw vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r1, r2                      @ r1<- vAA (exception object)
+    cmp      r1, #0                      @ null object?
+    beq      common_errNullObject        @ yes, throw an NPE instead
+    str      r1, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ thread->exception<- obj
+    b        MterpException
diff --git a/runtime/interpreter/mterp/arm/op_unused_3e.S b/runtime/interpreter/mterp/arm/op_unused_3e.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_3e.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_3f.S b/runtime/interpreter/mterp/arm/op_unused_3f.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_3f.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_40.S b/runtime/interpreter/mterp/arm/op_unused_40.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_40.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_41.S b/runtime/interpreter/mterp/arm/op_unused_41.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_41.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_42.S b/runtime/interpreter/mterp/arm/op_unused_42.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_42.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_43.S b/runtime/interpreter/mterp/arm/op_unused_43.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_43.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_73.S b/runtime/interpreter/mterp/arm/op_unused_73.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_73.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_79.S b/runtime/interpreter/mterp/arm/op_unused_79.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_79.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_7a.S b/runtime/interpreter/mterp/arm/op_unused_7a.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_7a.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f3.S b/runtime/interpreter/mterp/arm/op_unused_f3.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f3.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f4.S b/runtime/interpreter/mterp/arm/op_unused_f4.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f4.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f5.S b/runtime/interpreter/mterp/arm/op_unused_f5.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f5.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f6.S b/runtime/interpreter/mterp/arm/op_unused_f6.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f6.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f7.S b/runtime/interpreter/mterp/arm/op_unused_f7.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f7.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f8.S b/runtime/interpreter/mterp/arm/op_unused_f8.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f8.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f9.S b/runtime/interpreter/mterp/arm/op_unused_f9.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f9.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_fa.S b/runtime/interpreter/mterp/arm/op_unused_fa.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_fa.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_fb.S b/runtime/interpreter/mterp/arm/op_unused_fb.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_fb.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_fc.S b/runtime/interpreter/mterp/arm/op_unused_fc.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_fc.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_fd.S b/runtime/interpreter/mterp/arm/op_unused_fd.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_fd.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_fe.S b/runtime/interpreter/mterp/arm/op_unused_fe.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_fe.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_ff.S b/runtime/interpreter/mterp/arm/op_unused_ff.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_ff.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_ushr_int.S b/runtime/interpreter/mterp/arm/op_ushr_int.S
new file mode 100644
index 0000000..a74361b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_ushr_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, lsr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_ushr_int_2addr.S b/runtime/interpreter/mterp/arm/op_ushr_int_2addr.S
new file mode 100644
index 0000000..f2d1d13
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_ushr_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, lsr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S
new file mode 100644
index 0000000..5554eb0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"extract":"ubfx    r1, r3, #8, #5", "instr":"mov     r0, r0, lsr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_ushr_long.S b/runtime/interpreter/mterp/arm/op_ushr_long.S
new file mode 100644
index 0000000..c817bc9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_ushr_long.S
@@ -0,0 +1,27 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* ushr-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S b/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
new file mode 100644
index 0000000..2735f87
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
@@ -0,0 +1,22 @@
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* ushr-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_xor_int.S b/runtime/interpreter/mterp/arm/op_xor_int.S
new file mode 100644
index 0000000..fd7a4b7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"instr":"eor     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_int_2addr.S b/runtime/interpreter/mterp/arm/op_xor_int_2addr.S
new file mode 100644
index 0000000..196a665
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"instr":"eor     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_int_lit16.S b/runtime/interpreter/mterp/arm/op_xor_int_lit16.S
new file mode 100644
index 0000000..39f2a47
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_int_lit16.S
@@ -0,0 +1 @@
+%include "arm/binopLit16.S" {"instr":"eor     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_int_lit8.S b/runtime/interpreter/mterp/arm/op_xor_int_lit8.S
new file mode 100644
index 0000000..97d0b9e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"extract":"", "instr":"eor     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_long.S b/runtime/interpreter/mterp/arm/op_xor_long.S
new file mode 100644
index 0000000..4f830d0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"preinstr":"eor     r0, r0, r2", "instr":"eor     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_long_2addr.S b/runtime/interpreter/mterp/arm/op_xor_long_2addr.S
new file mode 100644
index 0000000..5b5ed88
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"preinstr":"eor     r0, r0, r2", "instr":"eor     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/unop.S b/runtime/interpreter/mterp/arm/unop.S
new file mode 100644
index 0000000..56518b5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/unop.S
@@ -0,0 +1,20 @@
+%default {"preinstr":""}
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+    $preinstr                           @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    $instr                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
diff --git a/runtime/interpreter/mterp/arm/unopNarrower.S b/runtime/interpreter/mterp/arm/unopNarrower.S
new file mode 100644
index 0000000..2d0453a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/unopNarrower.S
@@ -0,0 +1,23 @@
+%default {"preinstr":""}
+    /*
+     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op r0/r1", where
+     * "result" is a 32-bit quantity in r0.
+     *
+     * For: long-to-float, double-to-int, double-to-float
+     *
+     * (This would work for long-to-int, but that instruction is actually
+     * an exact match for op_move.)
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vB/vB+1
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 9-10 instructions */
diff --git a/runtime/interpreter/mterp/arm/unopWide.S b/runtime/interpreter/mterp/arm/unopWide.S
new file mode 100644
index 0000000..cd5defd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/unopWide.S
@@ -0,0 +1,22 @@
+%default {"preinstr":""}
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0/r1".
+     * This could be an ARM instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double, long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ r0/r1<- op, r2-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-11 instructions */
diff --git a/runtime/interpreter/mterp/arm/unopWider.S b/runtime/interpreter/mterp/arm/unopWider.S
new file mode 100644
index 0000000..9d50489
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/unopWider.S
@@ -0,0 +1,21 @@
+%default {"preinstr":""}
+    /*
+     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op r0", where
+     * "result" is a 64-bit quantity in r0/r1.
+     *
+     * For: int-to-long, int-to-double, float-to-long, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    GET_VREG r0, r3                     @ r0<- vB
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+    $preinstr                           @ optional op; may set condition codes
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    $instr                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vA/vA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 9-10 instructions */
diff --git a/runtime/interpreter/mterp/arm/unused.S b/runtime/interpreter/mterp/arm/unused.S
new file mode 100644
index 0000000..ffa00be
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/unused.S
@@ -0,0 +1,4 @@
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
diff --git a/runtime/interpreter/mterp/arm/zcmp.S b/runtime/interpreter/mterp/arm/zcmp.S
new file mode 100644
index 0000000..5db8b6c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/zcmp.S
@@ -0,0 +1,17 @@
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r0, r0                     @ r0<- vAA
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    cmp     r0, #0                      @ compare (vA, 0)
+    b${condition} MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/alt_stub.S b/runtime/interpreter/mterp/arm64/alt_stub.S
new file mode 100644
index 0000000..3a463fe
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/alt_stub.S
@@ -0,0 +1,12 @@
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (${opnum} * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
diff --git a/runtime/interpreter/mterp/arm64/bincmp.S b/runtime/interpreter/mterp/arm64/bincmp.S
new file mode 100644
index 0000000..8dd4fed
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/bincmp.S
@@ -0,0 +1,19 @@
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    b.${condition} MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/binop.S b/runtime/interpreter/mterp/arm64/binop.S
new file mode 100644
index 0000000..b629b0b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binop.S
@@ -0,0 +1,33 @@
+%default {"preinstr":"", "result":"w0", "chkzero":"0"}
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if $chkzero
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $preinstr                           // optional op; may set condition codes
+    $instr                              // $result<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG $result, w9                // vAA<- $result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm64/binop2addr.S b/runtime/interpreter/mterp/arm64/binop2addr.S
new file mode 100644
index 0000000..a480a7d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binop2addr.S
@@ -0,0 +1,30 @@
+%default {"preinstr":"", "result":"w0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if $chkzero
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    $preinstr                           // optional op; may set condition codes
+    $instr                              // $result<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG $result, w9                // vAA<- $result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm64/binopLit16.S b/runtime/interpreter/mterp/arm64/binopLit16.S
new file mode 100644
index 0000000..4f9d205
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binopLit16.S
@@ -0,0 +1,28 @@
+%default {"preinstr":"", "result":"w0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if $chkzero
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $preinstr
+    $instr                              // $result<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG $result, w9                // vAA<- $result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm64/binopLit8.S b/runtime/interpreter/mterp/arm64/binopLit8.S
new file mode 100644
index 0000000..dfa3169
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binopLit8.S
@@ -0,0 +1,34 @@
+%default {"extract": "asr     w1, w3, #8", "preinstr":"", "result":"w0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    $extract                            // optional; typically w1<- ssssssCC (sign extended)
+    .if $chkzero
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $preinstr                           // optional op; may set condition codes
+    $instr                              // $result<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG $result, w9                // vAA<- $result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
diff --git a/runtime/interpreter/mterp/arm64/binopWide.S b/runtime/interpreter/mterp/arm64/binopWide.S
new file mode 100644
index 0000000..9de24f1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binopWide.S
@@ -0,0 +1,30 @@
+%default {"preinstr":"", "instr":"add x0, x1, x2", "result":"x0", "r1":"x1", "r2":"x2", "chkzero":"0"}
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE $r2, w2               // w2<- vCC
+    GET_VREG_WIDE $r1, w1               // w1<- vBB
+    .if $chkzero
+    cbz     $r2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $preinstr
+    $instr                              // $result<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE $result, w4           // vAA<- $result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm64/binopWide2addr.S b/runtime/interpreter/mterp/arm64/binopWide2addr.S
new file mode 100644
index 0000000..d9927a2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binopWide2addr.S
@@ -0,0 +1,29 @@
+%default {"preinstr":"", "instr":"add x0, x0, x1", "r0":"x0", "r1":"x1", "chkzero":"0"}
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE $r1, w1               // x1<- vB
+    GET_VREG_WIDE $r0, w2               // x0<- vA
+    .if $chkzero
+    cbz     $r1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    $preinstr
+    $instr                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE $r0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm64/entry.S b/runtime/interpreter/mterp/arm64/entry.S
new file mode 100644
index 0000000..9fbbbd3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/entry.S
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+    .text
+
+/*
+ * Interpreter entry point.
+ * On entry:
+ *  x0  Thread* self/
+ *  x1  code_item
+ *  x2  ShadowFrame
+ *  x3  JValue* result_register
+ *
+ */
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+    .balign 16
+
+ExecuteMterpImpl:
+    .cfi_startproc
+    stp     xPROFILE, x27, [sp, #-80]!
+    stp     xIBASE, xREFS, [sp, #16]
+    stp     xSELF, xINST, [sp, #32]
+    stp     xPC, xFP, [sp, #48]
+    stp     fp, lr, [sp, #64]
+    add     fp, sp, #64
+
+    /* Remember the return register */
+    str     x3, [x2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
+
+    /* Remember the code_item */
+    str     x1, [x2, #SHADOWFRAME_CODE_ITEM_OFFSET]
+
+    /* set up "named" registers */
+    mov     xSELF, x0
+    ldr     w0, [x2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
+    add     xFP, x2, #SHADOWFRAME_VREGS_OFFSET     // point to vregs.
+    add     xREFS, xFP, w0, lsl #2                 // point to reference array in shadow frame
+    ldr     w0, [x2, #SHADOWFRAME_DEX_PC_OFFSET]   // Get starting dex_pc.
+    add     xPC, x1, #CODEITEM_INSNS_OFFSET        // Point to base of insns[]
+    add     xPC, xPC, w0, lsl #1                   // Create direct pointer to 1st dex opcode
+    EXPORT_PC
+
+    /* Starting ibase */
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+
+    /* Set up for backwards branches & osr profiling */
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    bl      MterpSetUpHotnessCountdown
+    mov     wPROFILE, w0                // Starting hotness countdown to xPROFILE
+
+    /* start executing the instruction at rPC */
+    FETCH_INST                          // load wINST from rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* NOTE: no fallthrough */
diff --git a/runtime/interpreter/mterp/arm64/fallback.S b/runtime/interpreter/mterp/arm64/fallback.S
new file mode 100644
index 0000000..44e7e12
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/fallback.S
@@ -0,0 +1,3 @@
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
diff --git a/runtime/interpreter/mterp/arm64/fbinop.S b/runtime/interpreter/mterp/arm64/fbinop.S
new file mode 100644
index 0000000..926d078
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/fbinop.S
@@ -0,0 +1,19 @@
+%default {}
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    $instr                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/fbinop2addr.S b/runtime/interpreter/mterp/arm64/fbinop2addr.S
new file mode 100644
index 0000000..04236ad
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/fbinop2addr.S
@@ -0,0 +1,17 @@
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    $instr                              // s2<- op
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/fcmp.S b/runtime/interpreter/mterp/arm64/fcmp.S
new file mode 100644
index 0000000..cad6318
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/fcmp.S
@@ -0,0 +1,19 @@
+%default {"wide":"", "r1":"s1", "r2":"s2", "cond":"lt"}
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG$wide $r1, w2
+    GET_VREG$wide $r2, w3
+    fcmp $r1, $r2
+    cset w0, ne
+    cneg w0, w0, $cond
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w4                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
new file mode 100644
index 0000000..7628ed3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -0,0 +1,313 @@
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogDivideByZeroException
+#endif
+    b MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogArrayIndexException
+#endif
+    b MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNegativeArraySizeException
+#endif
+    b MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNoSuchMethodException
+#endif
+    b MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNullObjectException
+#endif
+    b MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogExceptionThrownException
+#endif
+    b MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    ldr  x2, [xSELF, #THREAD_FLAGS_OFFSET]
+    bl MterpLogSuspendFallback
+#endif
+    b MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    ldr     x0, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    cbz     x0, MterpFallback                       // If not, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    bl      MterpHandleException                    // (self, shadow_frame)
+    cbz     w0, MterpExceptionReturn                // no local catch, back to caller.
+    ldr     x0, [xFP, #OFF_FP_CODE_ITEM]
+    ldr     w1, [xFP, #OFF_FP_DEX_PC]
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+    add     xPC, x0, #CODEITEM_INSNS_OFFSET
+    add     xPC, xPC, x1, lsl #1                    // generate new dex_pc_ptr
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    /* resume execution at catch block */
+    EXPORT_PC
+    FETCH_INST
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+    /* NOTE: no fallthrough */
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    wINST          <= signed offset
+ *    wPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
+ */
+MterpCommonTakenBranchNoFlags:
+    cmp     wINST, #0
+    b.gt    .L_forward_branch           // don't add forward branches to hotness
+    tbnz    wPROFILE, #31, .L_no_count_backwards  // go if negative
+    subs    wPROFILE, wPROFILE, #1      // countdown
+    b.eq    .L_add_batch                // counted down to zero - report
+.L_resume_backward_branch:
+    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
+    add     w2, wINST, wINST            // w2<- byte offset
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    REFRESH_IBASE
+    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L_suspend_request_pending
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_suspend_request_pending:
+    EXPORT_PC
+    mov     x0, xSELF
+    bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback
+    REFRESH_IBASE                       // might have changed during suspend
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_no_count_backwards:
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.ne    .L_resume_backward_branch
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_osr_forward
+.L_resume_forward_branch:
+    add     w2, wINST, wINST            // w2<- byte offset
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_check_osr_forward:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    mov     x2, xSELF
+    bl      MterpAddHotnessBatch        // (method, shadow_frame, self)
+    mov     wPROFILE, w0                // restore new hotness countdown to wPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, #2
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/*
+ * Check for suspend check request.  Assumes wINST already loaded, xPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
+ */
+MterpCheckSuspendAndContinue:
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh xIBASE
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    check1
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+check1:
+    EXPORT_PC
+    mov     x0, xSELF
+    bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback           // Something in the environment changed, switch interpreters
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    sxtw x2, wINST
+    bl MterpLogOSR
+#endif
+    mov  x0, #1                         // Signal normal return
+    b    MterpDone
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogFallback
+#endif
+MterpCommonFallback:
+    mov     x0, #0                                  // signal retry with reference interpreter.
+    b       MterpDone
+
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and LR.  Here we restore SP, restore the registers, and then restore
+ * LR to PC.
+ *
+ * On entry:
+ *  uint32_t* xFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    mov     x0, #1                                  // signal return to caller.
+    b MterpDone
+MterpReturn:
+    ldr     x2, [xFP, #OFF_FP_RESULT_REGISTER]
+    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
+    str     x0, [x2]
+    mov     x0, xSELF
+    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.eq    check2
+    bl      MterpSuspendCheck                       // (self)
+check2:
+    mov     x0, #1                                  // signal return to caller.
+MterpDone:
+/*
+ * At this point, we expect wPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending wPROFILE and the cached hotness counter).  wPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmp     wPROFILE, #0
+    bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
+    ldp     fp, lr, [sp, #64]
+    ldp     xPC, xFP, [sp, #48]
+    ldp     xSELF, xINST, [sp, #32]
+    ldp     xIBASE, xREFS, [sp, #16]
+    ldp     xPROFILE, x27, [sp], #80
+    ret
+
+MterpProfileActive:
+    mov     xINST, x0                               // stash return value
+    /* Report cached hotness counts */
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xSELF
+    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
+    mov     x0, xINST                               // restore return value
+    ldp     fp, lr, [sp, #64]
+    ldp     xPC, xFP, [sp, #48]
+    ldp     xSELF, xINST, [sp, #32]
+    ldp     xIBASE, xREFS, [sp, #16]
+    ldp     xPROFILE, x27, [sp], #80
+    ret
+
+    .cfi_endproc
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
+
diff --git a/runtime/interpreter/mterp/arm64/funopNarrow.S b/runtime/interpreter/mterp/arm64/funopNarrow.S
new file mode 100644
index 0000000..aed830b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/funopNarrow.S
@@ -0,0 +1,17 @@
+%default {"srcreg":"s0", "tgtreg":"d0"}
+    /*
+     * Generic 32bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "$tgtreg = op $srcreg".
+     *
+     * For: int-to-float, float-to-int
+     * TODO: refactor all of the conversions - parameterize width and use same template.
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG $srcreg, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    $instr                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG $tgtreg, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/funopNarrower.S b/runtime/interpreter/mterp/arm64/funopNarrower.S
new file mode 100644
index 0000000..6fddfea
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/funopNarrower.S
@@ -0,0 +1,16 @@
+%default {"srcreg":"s0", "tgtreg":"d0"}
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "$tgtreg = op $srcreg".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE $srcreg, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    $instr                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG $tgtreg, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/funopWide.S b/runtime/interpreter/mterp/arm64/funopWide.S
new file mode 100644
index 0000000..409e26b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/funopWide.S
@@ -0,0 +1,16 @@
+%default {"srcreg":"s0", "tgtreg":"d0"}
+    /*
+     * Generic 64bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "$tgtreg = op $srcreg".
+     *
+     * For: long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE $srcreg, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    $instr                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE $tgtreg, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/funopWider.S b/runtime/interpreter/mterp/arm64/funopWider.S
new file mode 100644
index 0000000..4c91ebc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/funopWider.S
@@ -0,0 +1,16 @@
+%default {"srcreg":"s0", "tgtreg":"d0"}
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "$tgtreg = op $srcreg".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG $srcreg, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    $instr                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE $tgtreg, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
new file mode 100644
index 0000000..c791eb5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat xFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via xFP &
+  number_of_vregs_.
+
+ */
+
+/*
+ARM64 Runtime register usage conventions.
+
+  r0     : w0 is 32-bit return register and x0 is 64-bit.
+  r0-r7  : Argument registers.
+  r8-r15 : Caller save registers (used as temporary registers).
+  r16-r17: Also known as ip0-ip1, respectively. Used as scratch registers by
+           the linker, by the trampolines and other stubs (the backend uses
+           these as temporary registers).
+  r18    : Caller save register (used as temporary register).
+  r19    : Pointer to thread-local storage.
+  r20-r29: Callee save registers.
+  r30    : (lr) is reserved (the link register).
+  rsp    : (sp) is reserved (the stack pointer).
+  rzr    : (zr) is reserved (the zero register).
+
+  Floating-point registers
+  v0-v31
+
+  v0     : s0 is return register for singles (32-bit) and d0 for doubles (64-bit).
+           This is analogous to the C/C++ (hard-float) calling convention.
+  v0-v7  : Floating-point argument registers in both Dalvik and C/C++ conventions.
+           Also used as temporary and codegen scratch registers.
+
+  v0-v7 and v16-v31 : trashed across C calls.
+  v8-v15 : bottom 64-bits preserved across C calls (d8-d15 are preserved).
+
+  v16-v31: Used as codegen temp/scratch.
+  v8-v15 : Can be used for promotion.
+
+  Must maintain 16-byte stack alignment.
+
+Mterp notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  x20  xPC       interpreted program counter, used for fetching instructions
+  x21  xFP       interpreted frame pointer, used for accessing locals and args
+  x22  xSELF     self (Thread) pointer
+  x23  xINST     first 16-bit code unit of current instruction
+  x24  xIBASE    interpreted instruction base pointer, used for computed goto
+  x25  xREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  x26  wPROFILE  jit profile hotness countdown
+  x16  ip        scratch reg
+  x17  ip2       scratch reg (used by macros)
+
+Macros are provided for common operations.  They MUST NOT alter unspecified registers or condition
+codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
+/* During bringup, we'll use the shadow frame model instead of xFP */
+/* single-purpose registers, given names for clarity */
+#define xPC      x20
+#define xFP      x21
+#define xSELF    x22
+#define xINST    x23
+#define wINST    w23
+#define xIBASE   x24
+#define xREFS    x25
+#define wPROFILE w26
+#define xPROFILE x26
+#define ip       x16
+#define ip2      x17
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep xFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    str  xPC, [xFP, #OFF_FP_DEX_PC_PTR]
+.endm
+
+/*
+ * Fetch the next instruction from xPC into wINST.  Does not advance xPC.
+ */
+.macro FETCH_INST
+    ldrh    wINST, [xPC]
+.endm
+
+/*
+ * Fetch the next instruction from the specified offset.  Advances xPC
+ * to point to the next instruction.  "_count" is in 16-bit code units.
+ *
+ * Because of the limited size of immediate constants on ARM, this is only
+ * suitable for small forward movements (i.e. don't try to implement "goto"
+ * with this).
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss.  (This also implies that it must come after
+ * EXPORT_PC.)
+ */
+.macro FETCH_ADVANCE_INST count
+    ldrh    wINST, [xPC, #((\count)*2)]!
+.endm
+
+/*
+ * The operation performed here is similar to FETCH_ADVANCE_INST, except the
+ * src and dest registers are parameterized (not hard-wired to xPC and xINST).
+ */
+.macro PREFETCH_ADVANCE_INST dreg, sreg, count
+    ldrh    \dreg, [\sreg, #((\count)*2)]!
+.endm
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update xPC.  Used to load
+ * xINST ahead of possible exception point.  Be sure to manually advance xPC
+ * later.
+ */
+.macro PREFETCH_INST count
+    ldrh    wINST, [xPC, #((\count)*2)]
+.endm
+
+/* Advance xPC by some number of code units. */
+.macro ADVANCE count
+  add  xPC, xPC, #((\count)*2)
+.endm
+
+/*
+ * Fetch the next instruction from an offset specified by _reg and advance xPC.
+ * xPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.  Must not set flags.
+ *
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    add     xPC, xPC, \reg, sxtw
+    ldrh    wINST, [xPC]
+.endm
+
+/*
+ * Fetch a half-word code unit from an offset past the current PC.  The
+ * "_count" value is in 16-bit code units.  Does not advance xPC.
+ *
+ * The "_S" variant works the same but treats the value as signed.
+ */
+.macro FETCH reg, count
+    ldrh    \reg, [xPC, #((\count)*2)]
+.endm
+
+.macro FETCH_S reg, count
+    ldrsh   \reg, [xPC, #((\count)*2)]
+.endm
+
+/*
+ * Fetch one byte from an offset past the current PC.  Pass in the same
+ * "_count" as you would for FETCH, and an additional 0/1 indicating which
+ * byte of the halfword you want (lo/hi).
+ */
+.macro FETCH_B reg, count, byte
+    ldrb     \reg, [xPC, #((\count)*2+(\byte))]
+.endm
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+.macro GET_INST_OPCODE reg
+    and     \reg, xINST, #255
+.endm
+
+/*
+ * Put the prefetched instruction's opcode field into the specified register.
+ */
+.macro GET_PREFETCHED_OPCODE oreg, ireg
+    and     \oreg, \ireg, #255
+.endm
+
+/*
+ * Begin executing the opcode in _reg.  Clobbers reg
+ */
+
+.macro GOTO_OPCODE reg
+    add     \reg, xIBASE, \reg, lsl #${handler_size_bits}
+    br      \reg
+.endm
+.macro GOTO_OPCODE_BASE base,reg
+    add     \reg, \base, \reg, lsl #${handler_size_bits}
+    br      \reg
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+.macro GET_VREG reg, vreg
+    ldr     \reg, [xFP, \vreg, uxtw #2]
+.endm
+.macro SET_VREG reg, vreg
+    str     \reg, [xFP, \vreg, uxtw #2]
+    str     wzr, [xREFS, \vreg, uxtw #2]
+.endm
+.macro SET_VREG_OBJECT reg, vreg, tmpreg
+    str     \reg, [xFP, \vreg, uxtw #2]
+    str     \reg, [xREFS, \vreg, uxtw #2]
+.endm
+
+/*
+ * Get/set the 64-bit value from a Dalvik register.
+ * TUNING: can we do better here?
+ */
+.macro GET_VREG_WIDE reg, vreg
+    add     ip2, xFP, \vreg, lsl #2
+    ldr     \reg, [ip2]
+.endm
+.macro SET_VREG_WIDE reg, vreg
+    add     ip2, xFP, \vreg, lsl #2
+    str     \reg, [ip2]
+    add     ip2, xREFS, \vreg, lsl #2
+    str     xzr, [ip2]
+.endm
+
+/*
+ * Get the 32-bit value from a Dalvik register and sign-extend to 64-bit.
+ * Used to avoid an extra instruction in int-to-long.
+ */
+.macro GET_VREG_S reg, vreg
+    ldrsw   \reg, [xFP, \vreg, uxtw #2]
+.endm
+
+/*
+ * Convert a virtual register index into an address.
+ */
+.macro VREG_INDEX_TO_ADDR reg, vreg
+    add     \reg, xFP, \vreg, lsl #2   /* WARNING: handle shadow frame vreg zero if store */
+.endm
+
+/*
+ * Refresh handler table.
+ */
+.macro REFRESH_IBASE
+  ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+.endm
diff --git a/runtime/interpreter/mterp/arm64/invoke.S b/runtime/interpreter/mterp/arm64/invoke.S
new file mode 100644
index 0000000..7a32df7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/invoke.S
@@ -0,0 +1,20 @@
+%default { "helper":"UndefinedInvokeHandler" }
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern $helper
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    mov     x3, xINST
+    bl      $helper
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
diff --git a/runtime/interpreter/mterp/arm64/op_add_double.S b/runtime/interpreter/mterp/arm64/op_add_double.S
new file mode 100644
index 0000000..8509f70
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_double.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"fadd d0, d1, d2", "result":"d0", "r1":"d1", "r2":"d2"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_double_2addr.S b/runtime/interpreter/mterp/arm64/op_add_double_2addr.S
new file mode 100644
index 0000000..61fd58f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_double_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"fadd     d0, d0, d1", "r0":"d0", "r1":"d1"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_float.S b/runtime/interpreter/mterp/arm64/op_add_float.S
new file mode 100644
index 0000000..7d09fef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_float.S
@@ -0,0 +1 @@
+%include "arm64/fbinop.S" {"instr":"fadd   s0, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_float_2addr.S b/runtime/interpreter/mterp/arm64/op_add_float_2addr.S
new file mode 100644
index 0000000..7b378e2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_float_2addr.S
@@ -0,0 +1 @@
+%include "arm64/fbinop2addr.S" {"instr":"fadd   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_int.S b/runtime/interpreter/mterp/arm64/op_add_int.S
new file mode 100644
index 0000000..6eadb54
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"add     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_int_2addr.S b/runtime/interpreter/mterp/arm64/op_add_int_2addr.S
new file mode 100644
index 0000000..d35bc8e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"add     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_int_lit16.S b/runtime/interpreter/mterp/arm64/op_add_int_lit16.S
new file mode 100644
index 0000000..4930ad7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_int_lit16.S
@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"instr":"add     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_int_lit8.S b/runtime/interpreter/mterp/arm64/op_add_int_lit8.S
new file mode 100644
index 0000000..2dfb8b9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"extract":"", "instr":"add     w0, w0, w3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_long.S b/runtime/interpreter/mterp/arm64/op_add_long.S
new file mode 100644
index 0000000..bc334aa
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_long.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"add x0, x1, x2"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_long_2addr.S b/runtime/interpreter/mterp/arm64/op_add_long_2addr.S
new file mode 100644
index 0000000..5e5dbce
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"add     x0, x0, x1"}
diff --git a/runtime/interpreter/mterp/arm64/op_aget.S b/runtime/interpreter/mterp/arm64/op_aget.S
new file mode 100644
index 0000000..662c9cc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget.S
@@ -0,0 +1,28 @@
+%default { "load":"ldr", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #$shift    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $load   w2, [x0, #$data_offset]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_aget_boolean.S b/runtime/interpreter/mterp/arm64/op_aget_boolean.S
new file mode 100644
index 0000000..6ab6cc1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_boolean.S
@@ -0,0 +1 @@
+%include "arm64/op_aget.S" { "load":"ldrb", "shift":"0", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm64/op_aget_byte.S b/runtime/interpreter/mterp/arm64/op_aget_byte.S
new file mode 100644
index 0000000..c7f5b23
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_byte.S
@@ -0,0 +1 @@
+%include "arm64/op_aget.S" { "load":"ldrsb", "shift":"0", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm64/op_aget_char.S b/runtime/interpreter/mterp/arm64/op_aget_char.S
new file mode 100644
index 0000000..9fddf17
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_char.S
@@ -0,0 +1 @@
+%include "arm64/op_aget.S" { "load":"ldrh", "shift":"1", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm64/op_aget_object.S b/runtime/interpreter/mterp/arm64/op_aget_object.S
new file mode 100644
index 0000000..1bbe3e8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_object.S
@@ -0,0 +1,20 @@
+    /*
+     * Array object get.  vAA <- vBB[vCC].
+     *
+     * for: aget-object
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    FETCH_B w3, 1, 1                    // w3<- CC
+    EXPORT_PC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    bl       artAGetObjectFromMterp     // (array, index)
+    ldr      x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr      w2, wINST, #8               // w9<- AA
+    PREFETCH_INST 2
+    cbnz     w1, MterpException
+    SET_VREG_OBJECT w0, w2
+    ADVANCE 2
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_aget_short.S b/runtime/interpreter/mterp/arm64/op_aget_short.S
new file mode 100644
index 0000000..39554de
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_short.S
@@ -0,0 +1 @@
+%include "arm64/op_aget.S" { "load":"ldrsh", "shift":"1", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm64/op_aget_wide.S b/runtime/interpreter/mterp/arm64/op_aget_wide.S
new file mode 100644
index 0000000..6f990ba
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_wide.S
@@ -0,0 +1,21 @@
+    /*
+     * Array get, 64 bits.  vAA <- vBB[vCC].
+     *
+     */
+    /* aget-wide vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject        // yes, bail
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #3          // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    ldr     x2, [x0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  // x2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x2, w4
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_and_int.S b/runtime/interpreter/mterp/arm64/op_and_int.S
new file mode 100644
index 0000000..31f3f73
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"and     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_and_int_2addr.S b/runtime/interpreter/mterp/arm64/op_and_int_2addr.S
new file mode 100644
index 0000000..e59632c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"and     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_and_int_lit16.S b/runtime/interpreter/mterp/arm64/op_and_int_lit16.S
new file mode 100644
index 0000000..6540f81
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_int_lit16.S
@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"instr":"and     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_and_int_lit8.S b/runtime/interpreter/mterp/arm64/op_and_int_lit8.S
new file mode 100644
index 0000000..495b5cd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"extract":"", "instr":"and     w0, w0, w3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/op_and_long.S b/runtime/interpreter/mterp/arm64/op_and_long.S
new file mode 100644
index 0000000..ede047d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_long.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"and x0, x1, x2"}
diff --git a/runtime/interpreter/mterp/arm64/op_and_long_2addr.S b/runtime/interpreter/mterp/arm64/op_and_long_2addr.S
new file mode 100644
index 0000000..d62ccef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"and     x0, x0, x1"}
diff --git a/runtime/interpreter/mterp/arm64/op_aput.S b/runtime/interpreter/mterp/arm64/op_aput.S
new file mode 100644
index 0000000..175b483
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput.S
@@ -0,0 +1,28 @@
+%default { "store":"str", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #$shift     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    $store  w2, [x0, #$data_offset]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_aput_boolean.S b/runtime/interpreter/mterp/arm64/op_aput_boolean.S
new file mode 100644
index 0000000..5e7a86f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_boolean.S
@@ -0,0 +1 @@
+%include "arm64/op_aput.S" { "store":"strb", "shift":"0", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm64/op_aput_byte.S b/runtime/interpreter/mterp/arm64/op_aput_byte.S
new file mode 100644
index 0000000..d659ebc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_byte.S
@@ -0,0 +1 @@
+%include "arm64/op_aput.S" { "store":"strb", "shift":"0", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm64/op_aput_char.S b/runtime/interpreter/mterp/arm64/op_aput_char.S
new file mode 100644
index 0000000..7547c80
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_char.S
@@ -0,0 +1 @@
+%include "arm64/op_aput.S" { "store":"strh", "shift":"1", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm64/op_aput_object.S b/runtime/interpreter/mterp/arm64/op_aput_object.S
new file mode 100644
index 0000000..0146fdc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_object.S
@@ -0,0 +1,13 @@
+    /*
+     * Store an object into an array.  vBB[vCC] <- vAA.
+     */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    bl      MterpAputObject
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_aput_short.S b/runtime/interpreter/mterp/arm64/op_aput_short.S
new file mode 100644
index 0000000..8631e28
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_short.S
@@ -0,0 +1 @@
+%include "arm64/op_aput.S" { "store":"strh", "shift":"1", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm64/op_aput_wide.S b/runtime/interpreter/mterp/arm64/op_aput_wide.S
new file mode 100644
index 0000000..e1cf9c1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_wide.S
@@ -0,0 +1,21 @@
+    /*
+     * Array put, 64 bits.  vBB[vCC] <- vAA.
+     *
+     */
+    /* aput-wide vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #3          // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    GET_VREG_WIDE x1, w4
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    str     x1, [x0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_array_length.S b/runtime/interpreter/mterp/arm64/op_array_length.S
new file mode 100644
index 0000000..0cce917
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_array_length.S
@@ -0,0 +1,12 @@
+    /*
+     * Return the length of an array.
+     */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w0, w1                     // w0<- vB (object ref)
+    cbz     w0, common_errNullObject    // yup, fail
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- array length
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w3, w2                     // vB<- length
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_check_cast.S b/runtime/interpreter/mterp/arm64/op_check_cast.S
new file mode 100644
index 0000000..cb9f606
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_check_cast.S
@@ -0,0 +1,16 @@
+    /*
+     * Check to see if a cast from one class to another is allowed.
+     */
+    /* check-cast vAA, class//BBBB */
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- BBBB
+    lsr      w1, wINST, #8              // w1<- AA
+    VREG_INDEX_TO_ADDR x1, w1           // w1<- &object
+    ldr      x2, [xFP, #OFF_FP_METHOD]  // w2<- method
+    mov      x3, xSELF                  // w3<- self
+    bl       MterpCheckCast             // (index, &obj, method, self)
+    PREFETCH_INST 2
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_cmp_long.S b/runtime/interpreter/mterp/arm64/op_cmp_long.S
new file mode 100644
index 0000000..c4ad984
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_cmp_long.S
@@ -0,0 +1,13 @@
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG_WIDE x1, w2
+    GET_VREG_WIDE x2, w3
+    cmp     x1, x2
+    cset    w0, ne
+    cneg    w0, w0, lt
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG w0, w4
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_cmpg_double.S b/runtime/interpreter/mterp/arm64/op_cmpg_double.S
new file mode 100644
index 0000000..30cb7eb
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_cmpg_double.S
@@ -0,0 +1 @@
+%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "cond":"cc"}
diff --git a/runtime/interpreter/mterp/arm64/op_cmpg_float.S b/runtime/interpreter/mterp/arm64/op_cmpg_float.S
new file mode 100644
index 0000000..ba23f43
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_cmpg_float.S
@@ -0,0 +1 @@
+%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "cond":"cc"}
diff --git a/runtime/interpreter/mterp/arm64/op_cmpl_double.S b/runtime/interpreter/mterp/arm64/op_cmpl_double.S
new file mode 100644
index 0000000..c739685
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_cmpl_double.S
@@ -0,0 +1 @@
+%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "cond":"lt"}
diff --git a/runtime/interpreter/mterp/arm64/op_cmpl_float.S b/runtime/interpreter/mterp/arm64/op_cmpl_float.S
new file mode 100644
index 0000000..32a9319
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_cmpl_float.S
@@ -0,0 +1 @@
+%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "cond":"lt"}
diff --git a/runtime/interpreter/mterp/arm64/op_const.S b/runtime/interpreter/mterp/arm64/op_const.S
new file mode 100644
index 0000000..031ede1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const.S
@@ -0,0 +1,9 @@
+    /* const vAA, #+BBBBbbbb */
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH w0, 1                         // w0<- bbbb (low
+    FETCH w1, 2                         // w1<- BBBB (high
+    FETCH_ADVANCE_INST 3                // advance rPC, load wINST
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG w0, w3                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_16.S b/runtime/interpreter/mterp/arm64/op_const_16.S
new file mode 100644
index 0000000..f0e8192
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_16.S
@@ -0,0 +1,7 @@
+    /* const/16 vAA, #+BBBB */
+    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended)
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_ADVANCE_INST 2                // advance xPC, load wINST
+    SET_VREG w0, w3                     // vAA<- w0
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_4.S b/runtime/interpreter/mterp/arm64/op_const_4.S
new file mode 100644
index 0000000..9a36115
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_4.S
@@ -0,0 +1,7 @@
+    /* const/4 vA, #+B */
+    sbfx    w1, wINST, #12, #4          // w1<- sssssssB
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    FETCH_ADVANCE_INST 1                // advance xPC, load wINST
+    GET_INST_OPCODE ip                  // ip<- opcode from xINST
+    SET_VREG w1, w0                     // fp[A]<- w1
+    GOTO_OPCODE ip                      // execute next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_class.S b/runtime/interpreter/mterp/arm64/op_const_class.S
new file mode 100644
index 0000000..971cfa0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_class.S
@@ -0,0 +1,12 @@
+    /* const/class vAA, Class//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // w0<- BBBB
+    lsr     w1, wINST, #8               // w1<- AA
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstClass             // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2
+    cbnz    w0, MterpPossibleException
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_high16.S b/runtime/interpreter/mterp/arm64/op_const_high16.S
new file mode 100644
index 0000000..3a9edff
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_high16.S
@@ -0,0 +1,8 @@
+    /* const/high16 vAA, #+BBBB0000 */
+    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended)
+    lsr     w3, wINST, #8               // r3<- AA
+    lsl     w0, w0, #16                 // r0<- BBBB0000
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    SET_VREG w0, w3                     // vAA<- r0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_string.S b/runtime/interpreter/mterp/arm64/op_const_string.S
new file mode 100644
index 0000000..896f1e7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_string.S
@@ -0,0 +1,12 @@
+    /* const/string vAA, String//BBBB */
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- BBBB
+    lsr     w1, wINST, #8               // w1<- AA
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstString            // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2                     // load rINST
+    cbnz    w0, MterpPossibleException  // let reference interpreter deal with it.
+    ADVANCE 2                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_string_jumbo.S b/runtime/interpreter/mterp/arm64/op_const_string_jumbo.S
new file mode 100644
index 0000000..e1a7339
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_string_jumbo.S
@@ -0,0 +1,14 @@
+    /* const/string vAA, String//BBBBBBBB */
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- bbbb (low
+    FETCH w2, 2                         // w2<- BBBB (high
+    lsr     w1, wINST, #8               // w1<- AA
+    orr     w0, w0, w2, lsl #16         // w1<- BBBBbbbb
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstString            // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 3                     // advance rPC
+    cbnz    w0, MterpPossibleException      // let reference interpreter deal with it.
+    ADVANCE 3                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_wide.S b/runtime/interpreter/mterp/arm64/op_const_wide.S
new file mode 100644
index 0000000..8f57dda
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_wide.S
@@ -0,0 +1,13 @@
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    FETCH w0, 1                         // w0<- bbbb (low)
+    FETCH w1, 2                         // w1<- BBBB (low middle)
+    FETCH w2, 3                         // w2<- hhhh (high middle)
+    FETCH w3, 4                         // w3<- HHHH (high)
+    lsr     w4, wINST, #8               // r4<- AA
+    FETCH_ADVANCE_INST 5                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    orr     w0, w0, w1, lsl #16         // w0<-         BBBBbbbb
+    orr     x0, x0, x2, lsl #32         // w0<-     hhhhBBBBbbbb
+    orr     x0, x0, x3, lsl #48         // w0<- HHHHhhhhBBBBbbbb
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_16.S b/runtime/interpreter/mterp/arm64/op_const_wide_16.S
new file mode 100644
index 0000000..553d481
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_16.S
@@ -0,0 +1,7 @@
+    /* const-wide/16 vAA, #+BBBB */
+    FETCH_S x0, 1                       // x0<- ssssssssssssBBBB (sign-extended)
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_32.S b/runtime/interpreter/mterp/arm64/op_const_wide_32.S
new file mode 100644
index 0000000..9dc4fc3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_32.S
@@ -0,0 +1,9 @@
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (low)
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_S x2, 2                       // x2<- ssssssssssssBBBB (high)
+    FETCH_ADVANCE_INST 3                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    orr     x0, x0, x2, lsl #16         // x0<- ssssssssBBBBbbbb
+    SET_VREG_WIDE x0, w3
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_high16.S b/runtime/interpreter/mterp/arm64/op_const_wide_high16.S
new file mode 100644
index 0000000..94ab987
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_high16.S
@@ -0,0 +1,8 @@
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    FETCH w0, 1                         // w0<- 0000BBBB (zero-extended)
+    lsr     w1, wINST, #8               // w1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    lsl     x0, x0, #48
+    SET_VREG_WIDE x0, w1
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_div_double.S b/runtime/interpreter/mterp/arm64/op_div_double.S
new file mode 100644
index 0000000..1f7dad0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_double.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"fdiv d0, d1, d2", "result":"d0", "r1":"d1", "r2":"d2"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_double_2addr.S b/runtime/interpreter/mterp/arm64/op_div_double_2addr.S
new file mode 100644
index 0000000..414a175
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_double_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"fdiv     d0, d0, d1", "r0":"d0", "r1":"d1"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_float.S b/runtime/interpreter/mterp/arm64/op_div_float.S
new file mode 100644
index 0000000..f24a26c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_float.S
@@ -0,0 +1 @@
+%include "arm64/fbinop.S" {"instr":"fdiv   s0, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_float_2addr.S b/runtime/interpreter/mterp/arm64/op_div_float_2addr.S
new file mode 100644
index 0000000..2888049
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_float_2addr.S
@@ -0,0 +1 @@
+%include "arm64/fbinop2addr.S" {"instr":"fdiv   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_int.S b/runtime/interpreter/mterp/arm64/op_div_int.S
new file mode 100644
index 0000000..88371c0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"sdiv     w0, w0, w1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_int_2addr.S b/runtime/interpreter/mterp/arm64/op_div_int_2addr.S
new file mode 100644
index 0000000..5f5a80f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"sdiv     w0, w0, w1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_int_lit16.S b/runtime/interpreter/mterp/arm64/op_div_int_lit16.S
new file mode 100644
index 0000000..dc7a484
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_int_lit16.S
@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"instr":"sdiv w0, w0, w1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_int_lit8.S b/runtime/interpreter/mterp/arm64/op_div_int_lit8.S
new file mode 100644
index 0000000..c06521c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"instr":"sdiv     w0, w0, w1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_long.S b/runtime/interpreter/mterp/arm64/op_div_long.S
new file mode 100644
index 0000000..820ae3d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_long.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"sdiv x0, x1, x2", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_long_2addr.S b/runtime/interpreter/mterp/arm64/op_div_long_2addr.S
new file mode 100644
index 0000000..da7eabd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"sdiv     x0, x0, x1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_double_to_float.S b/runtime/interpreter/mterp/arm64/op_double_to_float.S
new file mode 100644
index 0000000..c1555fd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_double_to_float.S
@@ -0,0 +1 @@
+%include "arm64/funopNarrower.S" {"instr":"fcvt s0, d0", "srcreg":"d0", "tgtreg":"s0"}
diff --git a/runtime/interpreter/mterp/arm64/op_double_to_int.S b/runtime/interpreter/mterp/arm64/op_double_to_int.S
new file mode 100644
index 0000000..7244bac
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_double_to_int.S
@@ -0,0 +1 @@
+%include "arm64/funopNarrower.S" {"instr":"fcvtzs w0, d0", "srcreg":"d0", "tgtreg":"w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_double_to_long.S b/runtime/interpreter/mterp/arm64/op_double_to_long.S
new file mode 100644
index 0000000..741160b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_double_to_long.S
@@ -0,0 +1 @@
+%include "arm64/funopWide.S" {"instr":"fcvtzs x0, d0", "srcreg":"d0", "tgtreg":"x0"}
diff --git a/runtime/interpreter/mterp/arm64/op_fill_array_data.S b/runtime/interpreter/mterp/arm64/op_fill_array_data.S
new file mode 100644
index 0000000..86fa6db
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_fill_array_data.S
@@ -0,0 +1,13 @@
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (lo)
+    FETCH_S x1, 2                       // x1<- ssssssssssssBBBB (hi)
+    lsr     w3, wINST, #8               // w3<- AA
+    orr     x1, x0, x1, lsl #16         // x1<- ssssssssBBBBbbbb
+    GET_VREG w0, w3                     // w0<- vAA (array object)
+    add     x1, xPC, x1, lsl #1         // x1<- PC + ssssssssBBBBbbbb*2 (array data off.)
+    bl      MterpFillArrayData          // (obj, payload)
+    cbz     w0, MterpPossibleException      // exception?
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_filled_new_array.S b/runtime/interpreter/mterp/arm64/op_filled_new_array.S
new file mode 100644
index 0000000..806a1b1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_filled_new_array.S
@@ -0,0 +1,18 @@
+%default { "helper":"MterpFilledNewArray" }
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class//CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type//BBBB */
+    .extern $helper
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     x2, xSELF
+    bl      $helper
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_filled_new_array_range.S b/runtime/interpreter/mterp/arm64/op_filled_new_array_range.S
new file mode 100644
index 0000000..3c9a419
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_filled_new_array_range.S
@@ -0,0 +1 @@
+%include "arm64/op_filled_new_array.S" { "helper":"MterpFilledNewArrayRange" }
diff --git a/runtime/interpreter/mterp/arm64/op_float_to_double.S b/runtime/interpreter/mterp/arm64/op_float_to_double.S
new file mode 100644
index 0000000..892feca
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_float_to_double.S
@@ -0,0 +1 @@
+%include "arm64/funopWider.S" {"instr":"fcvt  d0, s0", "srcreg":"s0", "tgtreg":"d0"}
diff --git a/runtime/interpreter/mterp/arm64/op_float_to_int.S b/runtime/interpreter/mterp/arm64/op_float_to_int.S
new file mode 100644
index 0000000..c849d81
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_float_to_int.S
@@ -0,0 +1 @@
+%include "arm64/funopNarrow.S" {"instr":"fcvtzs w0, s0", "srcreg":"s0", "tgtreg":"w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_float_to_long.S b/runtime/interpreter/mterp/arm64/op_float_to_long.S
new file mode 100644
index 0000000..c3de16f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_float_to_long.S
@@ -0,0 +1 @@
+%include "arm64/funopWider.S" {"instr":"fcvtzs x0, s0", "srcreg":"s0", "tgtreg":"x0"}
diff --git a/runtime/interpreter/mterp/arm64/op_goto.S b/runtime/interpreter/mterp/arm64/op_goto.S
new file mode 100644
index 0000000..6381e94
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_goto.S
@@ -0,0 +1,9 @@
+    /*
+     * Unconditional branch, 8-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto +AA */
+    sbfx    wINST, wINST, #8, #8           // wINST<- ssssssAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm64/op_goto_16.S b/runtime/interpreter/mterp/arm64/op_goto_16.S
new file mode 100644
index 0000000..fb9a80a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_goto_16.S
@@ -0,0 +1,9 @@
+    /*
+     * Unconditional branch, 16-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto/16 +AAAA */
+    FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm64/op_goto_32.S b/runtime/interpreter/mterp/arm64/op_goto_32.S
new file mode 100644
index 0000000..b13cb41
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_goto_32.S
@@ -0,0 +1,16 @@
+    /*
+     * Unconditional branch, 32-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     *
+     * Unlike most opcodes, this one is allowed to branch to itself, so
+     * our "backward branch" test must be "<=0" instead of "<0".  Because
+     * we need the V bit set, we'll use an adds to convert from Dalvik
+     * offset to byte offset.
+     */
+    /* goto/32 +AAAAAAAA */
+    FETCH w0, 1                         // w0<- aaaa (lo)
+    FETCH w1, 2                         // w1<- AAAA (hi)
+    orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm64/op_if_eq.S b/runtime/interpreter/mterp/arm64/op_if_eq.S
new file mode 100644
index 0000000..aa4a0f1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_eq.S
@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_eqz.S b/runtime/interpreter/mterp/arm64/op_if_eqz.S
new file mode 100644
index 0000000..47c1dee
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_eqz.S
@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "compare":"0", "branch":"cbz     w2," }
diff --git a/runtime/interpreter/mterp/arm64/op_if_ge.S b/runtime/interpreter/mterp/arm64/op_if_ge.S
new file mode 100644
index 0000000..d6ec761
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_ge.S
@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_gez.S b/runtime/interpreter/mterp/arm64/op_if_gez.S
new file mode 100644
index 0000000..087e094
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_gez.S
@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "compare":"0", "branch":"tbz     w2, #31," }
diff --git a/runtime/interpreter/mterp/arm64/op_if_gt.S b/runtime/interpreter/mterp/arm64/op_if_gt.S
new file mode 100644
index 0000000..7db8e9d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_gt.S
@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_gtz.S b/runtime/interpreter/mterp/arm64/op_if_gtz.S
new file mode 100644
index 0000000..476b265
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_gtz.S
@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "branch":"b.gt" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_le.S b/runtime/interpreter/mterp/arm64/op_if_le.S
new file mode 100644
index 0000000..ca3a83f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_le.S
@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_lez.S b/runtime/interpreter/mterp/arm64/op_if_lez.S
new file mode 100644
index 0000000..2717a60
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_lez.S
@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "branch":"b.le" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_lt.S b/runtime/interpreter/mterp/arm64/op_if_lt.S
new file mode 100644
index 0000000..56450a1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_lt.S
@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_ltz.S b/runtime/interpreter/mterp/arm64/op_if_ltz.S
new file mode 100644
index 0000000..86089c1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_ltz.S
@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "compare":"0", "branch":"tbnz    w2, #31," }
diff --git a/runtime/interpreter/mterp/arm64/op_if_ne.S b/runtime/interpreter/mterp/arm64/op_if_ne.S
new file mode 100644
index 0000000..14d9e13
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_ne.S
@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_nez.S b/runtime/interpreter/mterp/arm64/op_if_nez.S
new file mode 100644
index 0000000..efacc88
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_nez.S
@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "compare":"0", "branch":"cbnz    w2," }
diff --git a/runtime/interpreter/mterp/arm64/op_iget.S b/runtime/interpreter/mterp/arm64/op_iget.S
new file mode 100644
index 0000000..88533bd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget.S
@@ -0,0 +1,26 @@
+%default { "extend":"", "is_object":"0", "helper":"artGet32InstanceFromCode"}
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       $helper
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    $extend
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if $is_object
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iget_boolean.S b/runtime/interpreter/mterp/arm64/op_iget_boolean.S
new file mode 100644
index 0000000..36a9b6b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_boolean.S
@@ -0,0 +1 @@
+%include "arm64/op_iget.S" { "helper":"artGetBooleanInstanceFromCode", "extend":"uxtb w0, w0" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_boolean_quick.S b/runtime/interpreter/mterp/arm64/op_iget_boolean_quick.S
new file mode 100644
index 0000000..2ceccb9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_boolean_quick.S
@@ -0,0 +1 @@
+%include "arm64/op_iget_quick.S" { "load":"ldrb" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_byte.S b/runtime/interpreter/mterp/arm64/op_iget_byte.S
new file mode 100644
index 0000000..fd3f164
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_byte.S
@@ -0,0 +1 @@
+%include "arm64/op_iget.S" { "helper":"artGetByteInstanceFromCode", "extend":"sxtb w0, w0" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_byte_quick.S b/runtime/interpreter/mterp/arm64/op_iget_byte_quick.S
new file mode 100644
index 0000000..6e97b72
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_byte_quick.S
@@ -0,0 +1 @@
+%include "arm64/op_iget_quick.S" { "load":"ldrsb" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_char.S b/runtime/interpreter/mterp/arm64/op_iget_char.S
new file mode 100644
index 0000000..ea23275
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_char.S
@@ -0,0 +1 @@
+%include "arm64/op_iget.S" { "helper":"artGetCharInstanceFromCode", "extend":"uxth w0, w0" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_char_quick.S b/runtime/interpreter/mterp/arm64/op_iget_char_quick.S
new file mode 100644
index 0000000..325dd1c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_char_quick.S
@@ -0,0 +1 @@
+%include "arm64/op_iget_quick.S" { "load":"ldrh" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_object.S b/runtime/interpreter/mterp/arm64/op_iget_object.S
new file mode 100644
index 0000000..03be78d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_object.S
@@ -0,0 +1 @@
+%include "arm64/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_object_quick.S b/runtime/interpreter/mterp/arm64/op_iget_object_quick.S
new file mode 100644
index 0000000..e9a797d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_object_quick.S
@@ -0,0 +1,15 @@
+    /* For: iget-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    EXPORT_PC
+    GET_VREG w0, w2                     // w0<- object we're operating on
+    bl      artIGetObjectFromMterp      // (obj, offset)
+    ldr     x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    PREFETCH_INST 2
+    cbnz    w3, MterpPossibleException      // bail out
+    SET_VREG_OBJECT w0, w2              // fp[A]<- w0
+    ADVANCE 2                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iget_quick.S b/runtime/interpreter/mterp/arm64/op_iget_quick.S
new file mode 100644
index 0000000..699b2c4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_quick.S
@@ -0,0 +1,14 @@
+%default { "load":"ldr", "extend":"" }
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject    // object was null
+    $load   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $extend
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iget_short.S b/runtime/interpreter/mterp/arm64/op_iget_short.S
new file mode 100644
index 0000000..c347542
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_short.S
@@ -0,0 +1 @@
+%include "arm64/op_iget.S" { "helper":"artGetShortInstanceFromCode", "extend":"sxth w0, w0" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_short_quick.S b/runtime/interpreter/mterp/arm64/op_iget_short_quick.S
new file mode 100644
index 0000000..8367070
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_short_quick.S
@@ -0,0 +1 @@
+%include "arm64/op_iget_quick.S" { "load":"ldrsh" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_wide.S b/runtime/interpreter/mterp/arm64/op_iget_wide.S
new file mode 100644
index 0000000..9718390
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_wide.S
@@ -0,0 +1,21 @@
+    /*
+     * 64-bit instance field get.
+     *
+     * for: iget-wide
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGet64InstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cmp      w3, #0
+    cbnz     w3, MterpException            // bail out
+    SET_VREG_WIDE x0, w2
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from wINST
+    GOTO_OPCODE ip                         // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
new file mode 100644
index 0000000..e9388e4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
@@ -0,0 +1,11 @@
+    /* iget-wide-quick vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w4, 1                         // w4<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject    // object was null
+    ldr     x0, [x3, x4]                // x0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG_WIDE x0, w2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_instance_of.S b/runtime/interpreter/mterp/arm64/op_instance_of.S
new file mode 100644
index 0000000..a56705a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_instance_of.S
@@ -0,0 +1,22 @@
+    /*
+     * Check to see if an object reference is an instance of a class.
+     *
+     * Most common situation is a non-null object, being compared against
+     * an already-resolved class.
+     */
+    /* instance-of vA, vB, class//CCCC */
+    EXPORT_PC
+    FETCH     w0, 1                     // w0<- CCCC
+    lsr       w1, wINST, #12            // w1<- B
+    VREG_INDEX_TO_ADDR x1, w1           // w1<- &object
+    ldr       x2, [xFP, #OFF_FP_METHOD] // w2<- method
+    mov       x3, xSELF                 // w3<- self
+    bl        MterpInstanceOf           // (index, &obj, method, self)
+    ldr       x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx      w2, wINST, #8, #4         // w2<- A
+    PREFETCH_INST 2
+    cbnz      x1, MterpException
+    ADVANCE 2                           // advance rPC
+    SET_VREG w0, w2                     // vA<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_byte.S b/runtime/interpreter/mterp/arm64/op_int_to_byte.S
new file mode 100644
index 0000000..43f8148
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_byte.S
@@ -0,0 +1 @@
+%include "arm64/unop.S" {"instr":"sxtb    w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_char.S b/runtime/interpreter/mterp/arm64/op_int_to_char.S
new file mode 100644
index 0000000..f092170
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_char.S
@@ -0,0 +1 @@
+%include "arm64/unop.S" {"instr":"uxth    w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_double.S b/runtime/interpreter/mterp/arm64/op_int_to_double.S
new file mode 100644
index 0000000..3dee75a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_double.S
@@ -0,0 +1 @@
+%include "arm64/funopWider.S" {"instr":"scvtf d0, w0", "srcreg":"w0", "tgtreg":"d0"}
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_float.S b/runtime/interpreter/mterp/arm64/op_int_to_float.S
new file mode 100644
index 0000000..3ebbdc7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_float.S
@@ -0,0 +1 @@
+%include "arm64/funopNarrow.S" {"instr":"scvtf s0, w0", "srcreg":"w0", "tgtreg":"s0"}
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_long.S b/runtime/interpreter/mterp/arm64/op_int_to_long.S
new file mode 100644
index 0000000..45e3112
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_long.S
@@ -0,0 +1,8 @@
+    /* int-to-long vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_S x0, w3                   // x0<- sign_extend(fp[B])
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4                // fp[A]<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_short.S b/runtime/interpreter/mterp/arm64/op_int_to_short.S
new file mode 100644
index 0000000..87fb804
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_short.S
@@ -0,0 +1 @@
+%include "arm64/unop.S" {"instr":"sxth    w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_direct.S b/runtime/interpreter/mterp/arm64/op_invoke_direct.S
new file mode 100644
index 0000000..c117232
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_direct.S
@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeDirect" }
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_direct_range.S b/runtime/interpreter/mterp/arm64/op_invoke_direct_range.S
new file mode 100644
index 0000000..efc54c7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_direct_range.S
@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeDirectRange" }
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_interface.S b/runtime/interpreter/mterp/arm64/op_invoke_interface.S
new file mode 100644
index 0000000..12dfa59
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_interface.S
@@ -0,0 +1,8 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeInterface" }
+    /*
+     * Handle an interface method call.
+     *
+     * for: invoke-interface, invoke-interface/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_interface_range.S b/runtime/interpreter/mterp/arm64/op_invoke_interface_range.S
new file mode 100644
index 0000000..61caaf4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_interface_range.S
@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeInterfaceRange" }
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_static.S b/runtime/interpreter/mterp/arm64/op_invoke_static.S
new file mode 100644
index 0000000..634eda2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_static.S
@@ -0,0 +1,2 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeStatic" }
+
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_static_range.S b/runtime/interpreter/mterp/arm64/op_invoke_static_range.S
new file mode 100644
index 0000000..32cdcdd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_static_range.S
@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeStaticRange" }
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_super.S b/runtime/interpreter/mterp/arm64/op_invoke_super.S
new file mode 100644
index 0000000..def2c55
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_super.S
@@ -0,0 +1,8 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeSuper" }
+    /*
+     * Handle a "super" method call.
+     *
+     * for: invoke-super, invoke-super/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_super_range.S b/runtime/interpreter/mterp/arm64/op_invoke_super_range.S
new file mode 100644
index 0000000..27fb859
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_super_range.S
@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeSuperRange" }
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_virtual.S b/runtime/interpreter/mterp/arm64/op_invoke_virtual.S
new file mode 100644
index 0000000..66d0502
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_virtual.S
@@ -0,0 +1,8 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeVirtual" }
+    /*
+     * Handle a virtual method call.
+     *
+     * for: invoke-virtual, invoke-virtual/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_virtual_quick.S b/runtime/interpreter/mterp/arm64/op_invoke_virtual_quick.S
new file mode 100644
index 0000000..4300c34
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_virtual_quick.S
@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeVirtualQuick" }
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_virtual_range.S b/runtime/interpreter/mterp/arm64/op_invoke_virtual_range.S
new file mode 100644
index 0000000..b43955c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_virtual_range.S
@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeVirtualRange" }
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_virtual_range_quick.S b/runtime/interpreter/mterp/arm64/op_invoke_virtual_range_quick.S
new file mode 100644
index 0000000..90c7b65
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_virtual_range_quick.S
@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeVirtualQuickRange" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput.S b/runtime/interpreter/mterp/arm64/op_iput.S
new file mode 100644
index 0000000..a8c0e61
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput.S
@@ -0,0 +1,21 @@
+%default { "is_object":"0", "handler":"artSet32InstanceFromMterp" }
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern $handler
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       $handler
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iput_boolean.S b/runtime/interpreter/mterp/arm64/op_iput_boolean.S
new file mode 100644
index 0000000..bbf5319
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_boolean.S
@@ -0,0 +1 @@
+%include "arm64/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput_boolean_quick.S b/runtime/interpreter/mterp/arm64/op_iput_boolean_quick.S
new file mode 100644
index 0000000..25c61d7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_boolean_quick.S
@@ -0,0 +1 @@
+%include "arm64/op_iput_quick.S" { "store":"strb" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput_byte.S b/runtime/interpreter/mterp/arm64/op_iput_byte.S
new file mode 100644
index 0000000..bbf5319
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_byte.S
@@ -0,0 +1 @@
+%include "arm64/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput_byte_quick.S b/runtime/interpreter/mterp/arm64/op_iput_byte_quick.S
new file mode 100644
index 0000000..25c61d7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_byte_quick.S
@@ -0,0 +1 @@
+%include "arm64/op_iput_quick.S" { "store":"strb" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput_char.S b/runtime/interpreter/mterp/arm64/op_iput_char.S
new file mode 100644
index 0000000..150d879
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_char.S
@@ -0,0 +1 @@
+%include "arm64/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput_char_quick.S b/runtime/interpreter/mterp/arm64/op_iput_char_quick.S
new file mode 100644
index 0000000..c6ef46a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_char_quick.S
@@ -0,0 +1 @@
+%include "arm64/op_iput_quick.S" { "store":"strh" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput_object.S b/runtime/interpreter/mterp/arm64/op_iput_object.S
new file mode 100644
index 0000000..37a649b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_object.S
@@ -0,0 +1,10 @@
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    mov     x3, xSELF
+    bl      MterpIputObject
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iput_object_quick.S b/runtime/interpreter/mterp/arm64/op_iput_object_quick.S
new file mode 100644
index 0000000..6fbf2b1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_object_quick.S
@@ -0,0 +1,9 @@
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    bl      MterpIputObjectQuick
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iput_quick.S b/runtime/interpreter/mterp/arm64/op_iput_quick.S
new file mode 100644
index 0000000..e95da76
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_quick.S
@@ -0,0 +1,13 @@
+%default { "store":"str" }
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $store     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iput_short.S b/runtime/interpreter/mterp/arm64/op_iput_short.S
new file mode 100644
index 0000000..150d879
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_short.S
@@ -0,0 +1 @@
+%include "arm64/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput_short_quick.S b/runtime/interpreter/mterp/arm64/op_iput_short_quick.S
new file mode 100644
index 0000000..c6ef46a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_short_quick.S
@@ -0,0 +1 @@
+%include "arm64/op_iput_quick.S" { "store":"strh" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide.S b/runtime/interpreter/mterp/arm64/op_iput_wide.S
new file mode 100644
index 0000000..e1ab127
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_wide.S
@@ -0,0 +1,15 @@
+    /* iput-wide vA, vB, field//CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    VREG_INDEX_TO_ADDR x2, x2           // w2<- &fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet64InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
new file mode 100644
index 0000000..6cec363
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
@@ -0,0 +1,11 @@
+    /* iput-wide-quick vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w3, 1                         // w3<- field byte offset
+    GET_VREG w2, w2                     // w2<- fp[B], the object pointer
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    cbz     w2, common_errNullObject    // object was null
+    GET_VREG_WIDE x0, w0                // x0-< fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    str     x0, [x2, x3]                // obj.field<- x0
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_long_to_double.S b/runtime/interpreter/mterp/arm64/op_long_to_double.S
new file mode 100644
index 0000000..a3f59c2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_long_to_double.S
@@ -0,0 +1 @@
+%include "arm64/funopWide.S" {"instr":"scvtf d0, x0", "srcreg":"x0", "tgtreg":"d0"}
diff --git a/runtime/interpreter/mterp/arm64/op_long_to_float.S b/runtime/interpreter/mterp/arm64/op_long_to_float.S
new file mode 100644
index 0000000..e9c9145
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_long_to_float.S
@@ -0,0 +1 @@
+%include "arm64/funopNarrower.S" {"instr":"scvtf s0, x0", "srcreg":"x0", "tgtreg":"s0"}
diff --git a/runtime/interpreter/mterp/arm64/op_long_to_int.S b/runtime/interpreter/mterp/arm64/op_long_to_int.S
new file mode 100644
index 0000000..73f58d8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_long_to_int.S
@@ -0,0 +1,2 @@
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+%include "arm64/op_move.S"
diff --git a/runtime/interpreter/mterp/arm64/op_monitor_enter.S b/runtime/interpreter/mterp/arm64/op_monitor_enter.S
new file mode 100644
index 0000000..6fbd9ae
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_monitor_enter.S
@@ -0,0 +1,13 @@
+    /*
+     * Synchronize on an object.
+     */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8               // w2<- AA
+    GET_VREG w0, w2                      // w0<- vAA (object)
+    mov      x1, xSELF                   // w1<- self
+    bl       artLockObjectFromCode
+    cbnz     w0, MterpException
+    FETCH_ADVANCE_INST 1
+    GET_INST_OPCODE ip                   // extract opcode from rINST
+    GOTO_OPCODE ip                       // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_monitor_exit.S b/runtime/interpreter/mterp/arm64/op_monitor_exit.S
new file mode 100644
index 0000000..26e2d8d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_monitor_exit.S
@@ -0,0 +1,17 @@
+    /*
+     * Unlock an object.
+     *
+     * Exceptions that occur when unlocking a monitor need to appear as
+     * if they happened at the following instruction.  See the Dalvik
+     * instruction spec.
+     */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8              // w2<- AA
+    GET_VREG w0, w2                     // w0<- vAA (object)
+    mov      x1, xSELF                  // w0<- self
+    bl       artUnlockObjectFromCode    // w0<- success for unlock(self, obj)
+    cbnz     w0, MterpException
+    FETCH_ADVANCE_INST 1                // before throw: advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move.S b/runtime/interpreter/mterp/arm64/op_move.S
new file mode 100644
index 0000000..195b7eb
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    lsr     w1, wINST, #12              // x1<- B from 15:12
+    ubfx    w0, wINST, #8, #4           // x0<- A from 11:8
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_VREG w2, w1                     // x2<- fp[B]
+    GET_INST_OPCODE ip                  // ip<- opcode from wINST
+    .if $is_object
+    SET_VREG_OBJECT w2, w0              // fp[A]<- x2
+    .else
+    SET_VREG w2, w0                     // fp[A]<- x2
+    .endif
+    GOTO_OPCODE ip                      // execute next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move_16.S b/runtime/interpreter/mterp/arm64/op_move_16.S
new file mode 100644
index 0000000..5146e3d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_16.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH w1, 2                         // w1<- BBBB
+    FETCH w0, 1                         // w0<- AAAA
+    FETCH_ADVANCE_INST 3                // advance xPC, load xINST
+    GET_VREG w2, w1                     // w2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    .if $is_object
+    SET_VREG_OBJECT w2, w0              // fp[AAAA]<- w2
+    .else
+    SET_VREG w2, w0                     // fp[AAAA]<- w2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move_exception.S b/runtime/interpreter/mterp/arm64/op_move_exception.S
new file mode 100644
index 0000000..b29298f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_exception.S
@@ -0,0 +1,9 @@
+    /* move-exception vAA */
+    lsr     w2, wINST, #8               // w2<- AA
+    ldr     x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    mov     x1, #0                      // w1<- 0
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    SET_VREG_OBJECT w3, w2              // fp[AA]<- exception obj
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    str     x1, [xSELF, #THREAD_EXCEPTION_OFFSET]  // clear exception
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move_from16.S b/runtime/interpreter/mterp/arm64/op_move_from16.S
new file mode 100644
index 0000000..78f344d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_from16.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH w1, 1                         // r1<- BBBB
+    lsr     w0, wINST, #8               // r0<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_VREG w2, w1                     // r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if $is_object
+    SET_VREG_OBJECT w2, w0              // fp[AA]<- r2
+    .else
+    SET_VREG w2, w0                     // fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move_object.S b/runtime/interpreter/mterp/arm64/op_move_object.S
new file mode 100644
index 0000000..a5adc59
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_object.S
@@ -0,0 +1 @@
+%include "arm64/op_move.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_move_object_16.S b/runtime/interpreter/mterp/arm64/op_move_object_16.S
new file mode 100644
index 0000000..ef86c45
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_object_16.S
@@ -0,0 +1 @@
+%include "arm64/op_move_16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_move_object_from16.S b/runtime/interpreter/mterp/arm64/op_move_object_from16.S
new file mode 100644
index 0000000..0c73b3b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_object_from16.S
@@ -0,0 +1 @@
+%include "arm64/op_move_from16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_move_result.S b/runtime/interpreter/mterp/arm64/op_move_result.S
new file mode 100644
index 0000000..06fe962
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_result.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    lsr     w2, wINST, #8               // r2<- AA
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    ldr     x0, [xFP, #OFF_FP_RESULT_REGISTER]  // get pointer to result JType.
+    ldr     w0, [x0]                    // r0 <- result.i.
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if $is_object
+    SET_VREG_OBJECT w0, w2, w1          // fp[AA]<- r0
+    .else
+    SET_VREG w0, w2                     // fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move_result_object.S b/runtime/interpreter/mterp/arm64/op_move_result_object.S
new file mode 100644
index 0000000..da2bbee
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_result_object.S
@@ -0,0 +1 @@
+%include "arm64/op_move_result.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_move_result_wide.S b/runtime/interpreter/mterp/arm64/op_move_result_wide.S
new file mode 100644
index 0000000..f90a33f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_result_wide.S
@@ -0,0 +1,9 @@
+    /* for: move-result-wide */
+    /* op vAA */
+    lsr     w2, wINST, #8               // r2<- AA
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    ldr     x0, [xFP, #OFF_FP_RESULT_REGISTER]  // get pointer to result JType.
+    ldr     x0, [x0]                    // r0 <- result.i.
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, x2                // fp[AA]<- r0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move_wide.S b/runtime/interpreter/mterp/arm64/op_move_wide.S
new file mode 100644
index 0000000..538f079
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_wide.S
@@ -0,0 +1,9 @@
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE  x3, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE  x3, w2
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move_wide_16.S b/runtime/interpreter/mterp/arm64/op_move_wide_16.S
new file mode 100644
index 0000000..c79cdc50
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_wide_16.S
@@ -0,0 +1,9 @@
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH w3, 2                         // w3<- BBBB
+    FETCH w2, 1                         // w2<- AAAA
+    GET_VREG_WIDE x3, w3
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    SET_VREG_WIDE x3, w2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move_wide_from16.S b/runtime/interpreter/mterp/arm64/op_move_wide_from16.S
new file mode 100644
index 0000000..70dbe99
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_wide_from16.S
@@ -0,0 +1,9 @@
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH w3, 1                         // w3<- BBBB
+    lsr     w2, wINST, #8               // w2<- AA
+    GET_VREG_WIDE x3, w3
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x3, w2
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_mul_double.S b/runtime/interpreter/mterp/arm64/op_mul_double.S
new file mode 100644
index 0000000..8d35b81
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_double.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"fmul d0, d1, d2", "result":"d0", "r1":"d1", "r2":"d2"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_double_2addr.S b/runtime/interpreter/mterp/arm64/op_mul_double_2addr.S
new file mode 100644
index 0000000..526cb3b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_double_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"fmul     d0, d0, d1", "r0":"d0", "r1":"d1"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_float.S b/runtime/interpreter/mterp/arm64/op_mul_float.S
new file mode 100644
index 0000000..eea7733
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_float.S
@@ -0,0 +1 @@
+%include "arm64/fbinop.S" {"instr":"fmul   s0, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_float_2addr.S b/runtime/interpreter/mterp/arm64/op_mul_float_2addr.S
new file mode 100644
index 0000000..c1f2376
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_float_2addr.S
@@ -0,0 +1 @@
+%include "arm64/fbinop2addr.S" {"instr":"fmul   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_int.S b/runtime/interpreter/mterp/arm64/op_mul_int.S
new file mode 100644
index 0000000..d14cae1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_int.S
@@ -0,0 +1,2 @@
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+%include "arm64/binop.S" {"instr":"mul     w0, w1, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_int_2addr.S b/runtime/interpreter/mterp/arm64/op_mul_int_2addr.S
new file mode 100644
index 0000000..f079118
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_int_2addr.S
@@ -0,0 +1,2 @@
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+%include "arm64/binop2addr.S" {"instr":"mul     w0, w1, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_int_lit16.S b/runtime/interpreter/mterp/arm64/op_mul_int_lit16.S
new file mode 100644
index 0000000..a378559
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_int_lit16.S
@@ -0,0 +1,2 @@
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+%include "arm64/binopLit16.S" {"instr":"mul     w0, w1, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_int_lit8.S b/runtime/interpreter/mterp/arm64/op_mul_int_lit8.S
new file mode 100644
index 0000000..b3d4014
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_int_lit8.S
@@ -0,0 +1,2 @@
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+%include "arm64/binopLit8.S" {"instr":"mul     w0, w1, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_long.S b/runtime/interpreter/mterp/arm64/op_mul_long.S
new file mode 100644
index 0000000..bc0dcbd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_long.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"mul x0, x1, x2"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_long_2addr.S b/runtime/interpreter/mterp/arm64/op_mul_long_2addr.S
new file mode 100644
index 0000000..fa1cdf8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"mul     x0, x0, x1"}
diff --git a/runtime/interpreter/mterp/arm64/op_neg_double.S b/runtime/interpreter/mterp/arm64/op_neg_double.S
new file mode 100644
index 0000000..d77859d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_neg_double.S
@@ -0,0 +1 @@
+%include "arm64/unopWide.S" {"instr":"eor     x0, x0, #0x8000000000000000"}
diff --git a/runtime/interpreter/mterp/arm64/op_neg_float.S b/runtime/interpreter/mterp/arm64/op_neg_float.S
new file mode 100644
index 0000000..6652aec
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_neg_float.S
@@ -0,0 +1 @@
+%include "arm64/unop.S" {"instr":"eor     w0, w0, #0x80000000"}
diff --git a/runtime/interpreter/mterp/arm64/op_neg_int.S b/runtime/interpreter/mterp/arm64/op_neg_int.S
new file mode 100644
index 0000000..59c14a9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_neg_int.S
@@ -0,0 +1 @@
+%include "arm64/unop.S" {"instr":"sub     w0, wzr, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_neg_long.S b/runtime/interpreter/mterp/arm64/op_neg_long.S
new file mode 100644
index 0000000..0c71ea7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_neg_long.S
@@ -0,0 +1 @@
+%include "arm64/unopWide.S" {"instr":"sub x0, xzr, x0"}
diff --git a/runtime/interpreter/mterp/arm64/op_new_array.S b/runtime/interpreter/mterp/arm64/op_new_array.S
new file mode 100644
index 0000000..886120a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_new_array.S
@@ -0,0 +1,18 @@
+    /*
+     * Allocate an array of objects, specified with the array class
+     * and a count.
+     *
+     * The verifier guarantees that this is an array class, so we don't
+     * check for it here.
+     */
+    /* new-array vA, vB, class//CCCC */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    mov     x3, xSELF
+    bl      MterpNewArray
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_new_instance.S b/runtime/interpreter/mterp/arm64/op_new_instance.S
new file mode 100644
index 0000000..c171ac5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_new_instance.S
@@ -0,0 +1,13 @@
+    /*
+     * Create a new instance of a class.
+     */
+    /* new-instance vAA, class//BBBB */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xSELF
+    mov     w2, wINST
+    bl      MterpNewInstance           // (shadow_frame, self, inst_data)
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2               // advance rPC, load rINST
+    GET_INST_OPCODE ip                 // extract opcode from rINST
+    GOTO_OPCODE ip                     // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_nop.S b/runtime/interpreter/mterp/arm64/op_nop.S
new file mode 100644
index 0000000..80c2d45
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_nop.S
@@ -0,0 +1,3 @@
+    FETCH_ADVANCE_INST 1                // advance to next instr, load rINST
+    GET_INST_OPCODE ip                  // ip<- opcode from rINST
+    GOTO_OPCODE ip                      // execute it
diff --git a/runtime/interpreter/mterp/arm64/op_not_int.S b/runtime/interpreter/mterp/arm64/op_not_int.S
new file mode 100644
index 0000000..55d7750
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_not_int.S
@@ -0,0 +1 @@
+%include "arm64/unop.S" {"instr":"mvn     w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_not_long.S b/runtime/interpreter/mterp/arm64/op_not_long.S
new file mode 100644
index 0000000..e5ebdd6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_not_long.S
@@ -0,0 +1 @@
+%include "arm64/unopWide.S" {"instr":"mvn     x0, x0"}
diff --git a/runtime/interpreter/mterp/arm64/op_or_int.S b/runtime/interpreter/mterp/arm64/op_or_int.S
new file mode 100644
index 0000000..648c1e6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"orr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_or_int_2addr.S b/runtime/interpreter/mterp/arm64/op_or_int_2addr.S
new file mode 100644
index 0000000..abdf599
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"orr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_or_int_lit16.S b/runtime/interpreter/mterp/arm64/op_or_int_lit16.S
new file mode 100644
index 0000000..db7f4ff
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_int_lit16.S
@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"instr":"orr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_or_int_lit8.S b/runtime/interpreter/mterp/arm64/op_or_int_lit8.S
new file mode 100644
index 0000000..7cb26b7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"extract":"", "instr":"orr     w0, w0, w3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/op_or_long.S b/runtime/interpreter/mterp/arm64/op_or_long.S
new file mode 100644
index 0000000..dd137ce
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_long.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"orr x0, x1, x2"}
diff --git a/runtime/interpreter/mterp/arm64/op_or_long_2addr.S b/runtime/interpreter/mterp/arm64/op_or_long_2addr.S
new file mode 100644
index 0000000..f785230
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"orr     x0, x0, x1"}
diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S
new file mode 100644
index 0000000..408e030
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S
@@ -0,0 +1,20 @@
+%default { "func":"MterpDoPackedSwitch" }
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (lo)
+    FETCH_S x1, 2                       // x1<- ssssssssssssBBBB (hi)
+    lsr     w3, wINST, #8               // w3<- AA
+    orr     x0, x0, x1, lsl #16         // x0<- ssssssssBBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     x0, xPC, x0, lsl #1         // x0<- PC + ssssssssBBBBbbbb*2
+    bl      $func                       // w0<- code-unit branch offset
+    sxtw    xINST, w0
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm64/op_rem_double.S b/runtime/interpreter/mterp/arm64/op_rem_double.S
new file mode 100644
index 0000000..c631ddb
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_double.S
@@ -0,0 +1,13 @@
+    /* rem vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d1, w2                // d1<- vCC
+    GET_VREG_WIDE d0, w1                // d0<- vBB
+    bl  fmod
+    lsr     w4, wINST, #8               // w4<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4                // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm64/op_rem_double_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_double_2addr.S
new file mode 100644
index 0000000..9868f41
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_double_2addr.S
@@ -0,0 +1,12 @@
+    /* rem vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1                // d1<- vB
+    GET_VREG_WIDE d0, w2                // d0<- vA
+    bl fmod
+    ubfx    w2, wINST, #8, #4           // w2<- A (need to reload - killed across call)
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2                // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm64/op_rem_float.S b/runtime/interpreter/mterp/arm64/op_rem_float.S
new file mode 100644
index 0000000..73f7060
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_float.S
@@ -0,0 +1,2 @@
+/* EABI doesn't define a float remainder function, but libm does */
+%include "arm64/fbinop.S" {"instr":"bl      fmodf"}
diff --git a/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
new file mode 100644
index 0000000..95f81c5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
@@ -0,0 +1,11 @@
+    /* rem vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    bl  fmodf
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s0, w9
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_rem_int.S b/runtime/interpreter/mterp/arm64/op_rem_int.S
new file mode 100644
index 0000000..dd9dfda
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"preinstr":"sdiv     w2, w0, w1", "instr":"msub w0, w2, w1, w0", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_rem_int_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_int_2addr.S
new file mode 100644
index 0000000..57fc4971
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"preinstr":"sdiv     w2, w0, w1", "instr":"msub w0, w2, w1, w0", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_rem_int_lit16.S b/runtime/interpreter/mterp/arm64/op_rem_int_lit16.S
new file mode 100644
index 0000000..b51a739
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_int_lit16.S
@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"preinstr":"sdiv w3, w0, w1", "instr":"msub w0, w3, w1, w0", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_rem_int_lit8.S b/runtime/interpreter/mterp/arm64/op_rem_int_lit8.S
new file mode 100644
index 0000000..03ea324
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"preinstr":"sdiv w3, w0, w1", "instr":"msub w0, w3, w1, w0", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_rem_long.S b/runtime/interpreter/mterp/arm64/op_rem_long.S
new file mode 100644
index 0000000..f133f86
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_long.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"preinstr":"sdiv x3, x1, x2","instr":"msub x0, x3, x2, x1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_rem_long_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_long_2addr.S
new file mode 100644
index 0000000..b45e2a9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"preinstr":"sdiv x3, x0, x1", "instr":"msub x0, x3, x1, x0", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_return.S b/runtime/interpreter/mterp/arm64/op_return.S
new file mode 100644
index 0000000..28630ee
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_return.S
@@ -0,0 +1,19 @@
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L${opcode}_check
+.L${opcode}_return:
+    lsr     w2, wINST, #8               // r2<- AA
+    GET_VREG w0, w2                     // r0<- vAA
+    b       MterpReturn
+.L${opcode}_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .L${opcode}_return
diff --git a/runtime/interpreter/mterp/arm64/op_return_object.S b/runtime/interpreter/mterp/arm64/op_return_object.S
new file mode 100644
index 0000000..b6cb532
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_return_object.S
@@ -0,0 +1 @@
+%include "arm64/op_return.S"
diff --git a/runtime/interpreter/mterp/arm64/op_return_void.S b/runtime/interpreter/mterp/arm64/op_return_void.S
new file mode 100644
index 0000000..3a5aa56
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_return_void.S
@@ -0,0 +1,12 @@
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L${opcode}_check
+.L${opcode}_return:
+    mov     x0, #0
+    b       MterpReturn
+.L${opcode}_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .L${opcode}_return
diff --git a/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S b/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S
new file mode 100644
index 0000000..1e06953
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S
@@ -0,0 +1,10 @@
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L${opcode}_check
+.L${opcode}_return:
+    mov     x0, #0
+    b       MterpReturn
+.L${opcode}_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .L${opcode}_return
diff --git a/runtime/interpreter/mterp/arm64/op_return_wide.S b/runtime/interpreter/mterp/arm64/op_return_wide.S
new file mode 100644
index 0000000..c6e1d9d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_return_wide.S
@@ -0,0 +1,18 @@
+    /*
+     * Return a 64-bit value.
+     */
+    /* return-wide vAA */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L${opcode}_check
+.L${opcode}_return:
+    lsr     w2, wINST, #8               // w2<- AA
+    GET_VREG_WIDE x0, w2                // x0<- vAA
+    b       MterpReturn
+.L${opcode}_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .L${opcode}_return
diff --git a/runtime/interpreter/mterp/arm64/op_rsub_int.S b/runtime/interpreter/mterp/arm64/op_rsub_int.S
new file mode 100644
index 0000000..3bf45fe
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rsub_int.S
@@ -0,0 +1,2 @@
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+%include "arm64/binopLit16.S" {"instr":"sub     w0, w1, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_rsub_int_lit8.S b/runtime/interpreter/mterp/arm64/op_rsub_int_lit8.S
new file mode 100644
index 0000000..7a3572b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rsub_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"instr":"sub     w0, w1, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget.S b/runtime/interpreter/mterp/arm64/op_sget.S
new file mode 100644
index 0000000..6352ce0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget.S
@@ -0,0 +1,27 @@
+%default { "is_object":"0", "helper":"artGet32StaticFromCode", "extend":"" }
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern $helper
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    $helper
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    $extend
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if $is_object
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
diff --git a/runtime/interpreter/mterp/arm64/op_sget_boolean.S b/runtime/interpreter/mterp/arm64/op_sget_boolean.S
new file mode 100644
index 0000000..c40dbdd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_boolean.S
@@ -0,0 +1 @@
+%include "arm64/op_sget.S" {"helper":"artGetBooleanStaticFromCode", "extend":"uxtb w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_byte.S b/runtime/interpreter/mterp/arm64/op_sget_byte.S
new file mode 100644
index 0000000..6cf69a3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_byte.S
@@ -0,0 +1 @@
+%include "arm64/op_sget.S" {"helper":"artGetByteStaticFromCode", "extend":"sxtb w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_char.S b/runtime/interpreter/mterp/arm64/op_sget_char.S
new file mode 100644
index 0000000..8924a34
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_char.S
@@ -0,0 +1 @@
+%include "arm64/op_sget.S" {"helper":"artGetCharStaticFromCode", "extend":"uxth w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_object.S b/runtime/interpreter/mterp/arm64/op_sget_object.S
new file mode 100644
index 0000000..620b0ba
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_object.S
@@ -0,0 +1 @@
+%include "arm64/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_short.S b/runtime/interpreter/mterp/arm64/op_sget_short.S
new file mode 100644
index 0000000..19dbba6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_short.S
@@ -0,0 +1 @@
+%include "arm64/op_sget.S" {"helper":"artGetShortStaticFromCode", "extend":"sxth w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_wide.S b/runtime/interpreter/mterp/arm64/op_sget_wide.S
new file mode 100644
index 0000000..287f66d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_wide.S
@@ -0,0 +1,19 @@
+    /*
+     * SGET_WIDE handler wrapper.
+     *
+     */
+    /* sget-wide vAA, field//BBBB */
+
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGet64StaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w4, wINST, #8                 // w4<- AA
+    cbnz  x3, MterpException            // bail out
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG_WIDE x0, w4
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int.S b/runtime/interpreter/mterp/arm64/op_shl_int.S
new file mode 100644
index 0000000..3062a3f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shl_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"lsl     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
new file mode 100644
index 0000000..9a7e09f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"lsl     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
new file mode 100644
index 0000000..9c19b55
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"extract":"ubfx    w1, w3, #8, #5", "instr":"lsl     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_long.S b/runtime/interpreter/mterp/arm64/op_shl_long.S
new file mode 100644
index 0000000..bbf9600
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shl_long.S
@@ -0,0 +1 @@
+%include "arm64/shiftWide.S" {"opcode":"lsl"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_long_2addr.S b/runtime/interpreter/mterp/arm64/op_shl_long_2addr.S
new file mode 100644
index 0000000..a5c4013
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shl_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/shiftWide2addr.S" {"opcode":"lsl"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int.S b/runtime/interpreter/mterp/arm64/op_shr_int.S
new file mode 100644
index 0000000..493b740
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shr_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"asr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
new file mode 100644
index 0000000..6efe8ee
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"asr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
new file mode 100644
index 0000000..c7b61df
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"extract":"ubfx    w1, w3, #8, #5", "instr":"asr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_long.S b/runtime/interpreter/mterp/arm64/op_shr_long.S
new file mode 100644
index 0000000..4d33235
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shr_long.S
@@ -0,0 +1 @@
+%include "arm64/shiftWide.S" {"opcode":"asr"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_long_2addr.S b/runtime/interpreter/mterp/arm64/op_shr_long_2addr.S
new file mode 100644
index 0000000..0a4a386
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shr_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/shiftWide2addr.S" {"opcode":"asr"}
diff --git a/runtime/interpreter/mterp/arm64/op_sparse_switch.S b/runtime/interpreter/mterp/arm64/op_sparse_switch.S
new file mode 100644
index 0000000..5a8d748
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sparse_switch.S
@@ -0,0 +1 @@
+%include "arm64/op_packed_switch.S" { "func":"MterpDoSparseSwitch" }
diff --git a/runtime/interpreter/mterp/arm64/op_sput.S b/runtime/interpreter/mterp/arm64/op_sput.S
new file mode 100644
index 0000000..75f27ab
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput.S
@@ -0,0 +1,19 @@
+%default { "helper":"artSet32StaticFromCode"}
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      $helper
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_sput_boolean.S b/runtime/interpreter/mterp/arm64/op_sput_boolean.S
new file mode 100644
index 0000000..11c55e5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_boolean.S
@@ -0,0 +1 @@
+%include "arm64/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm64/op_sput_byte.S b/runtime/interpreter/mterp/arm64/op_sput_byte.S
new file mode 100644
index 0000000..11c55e5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_byte.S
@@ -0,0 +1 @@
+%include "arm64/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm64/op_sput_char.S b/runtime/interpreter/mterp/arm64/op_sput_char.S
new file mode 100644
index 0000000..b4dd5aa
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_char.S
@@ -0,0 +1 @@
+%include "arm64/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm64/op_sput_object.S b/runtime/interpreter/mterp/arm64/op_sput_object.S
new file mode 100644
index 0000000..c176da2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_object.S
@@ -0,0 +1,10 @@
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     x2, xINST
+    mov     x3, xSELF
+    bl      MterpSputObject
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_sput_short.S b/runtime/interpreter/mterp/arm64/op_sput_short.S
new file mode 100644
index 0000000..b4dd5aa
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_short.S
@@ -0,0 +1 @@
+%include "arm64/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm64/op_sput_wide.S b/runtime/interpreter/mterp/arm64/op_sput_wide.S
new file mode 100644
index 0000000..a79b1a6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_wide.S
@@ -0,0 +1,18 @@
+    /*
+     * SPUT_WIDE handler wrapper.
+     *
+     */
+    /* sput-wide vAA, field//BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    FETCH   w0, 1                       // w0<- field ref BBBB
+    ldr     x1, [xFP, #OFF_FP_METHOD]
+    lsr     w2, wINST, #8               // w3<- AA
+    VREG_INDEX_TO_ADDR x2, w2
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet64IndirectStaticFromMterp
+    cbnz    w0, MterpException          // 0 on success, -1 on failure
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_sub_double.S b/runtime/interpreter/mterp/arm64/op_sub_double.S
new file mode 100644
index 0000000..e8e3401
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_double.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"fsub d0, d1, d2", "result":"d0", "r1":"d1", "r2":"d2"}
diff --git a/runtime/interpreter/mterp/arm64/op_sub_double_2addr.S b/runtime/interpreter/mterp/arm64/op_sub_double_2addr.S
new file mode 100644
index 0000000..ddab55e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_double_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"fsub     d0, d0, d1", "r0":"d0", "r1":"d1"}
diff --git a/runtime/interpreter/mterp/arm64/op_sub_float.S b/runtime/interpreter/mterp/arm64/op_sub_float.S
new file mode 100644
index 0000000..227b15f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_float.S
@@ -0,0 +1 @@
+%include "arm64/fbinop.S" {"instr":"fsub   s0, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm64/op_sub_float_2addr.S b/runtime/interpreter/mterp/arm64/op_sub_float_2addr.S
new file mode 100644
index 0000000..19ac8d5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_float_2addr.S
@@ -0,0 +1 @@
+%include "arm64/fbinop2addr.S" {"instr":"fsub   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm64/op_sub_int.S b/runtime/interpreter/mterp/arm64/op_sub_int.S
new file mode 100644
index 0000000..0e7ce0e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"sub     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_sub_int_2addr.S b/runtime/interpreter/mterp/arm64/op_sub_int_2addr.S
new file mode 100644
index 0000000..d2c1bd3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"sub     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_sub_long.S b/runtime/interpreter/mterp/arm64/op_sub_long.S
new file mode 100644
index 0000000..263c70d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_long.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"sub x0, x1, x2"}
diff --git a/runtime/interpreter/mterp/arm64/op_sub_long_2addr.S b/runtime/interpreter/mterp/arm64/op_sub_long_2addr.S
new file mode 100644
index 0000000..5be3772
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"sub     x0, x0, x1"}
diff --git a/runtime/interpreter/mterp/arm64/op_throw.S b/runtime/interpreter/mterp/arm64/op_throw.S
new file mode 100644
index 0000000..9a951af
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_throw.S
@@ -0,0 +1,10 @@
+    /*
+     * Throw an exception object in the current thread.
+     */
+    /* throw vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8               // r2<- AA
+    GET_VREG w1, w2                      // r1<- vAA (exception object)
+    cbz      w1, common_errNullObject
+    str      x1, [xSELF, #THREAD_EXCEPTION_OFFSET]  // thread->exception<- obj
+    b        MterpException
diff --git a/runtime/interpreter/mterp/arm64/op_unused_3e.S b/runtime/interpreter/mterp/arm64/op_unused_3e.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_3e.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_3f.S b/runtime/interpreter/mterp/arm64/op_unused_3f.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_3f.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_40.S b/runtime/interpreter/mterp/arm64/op_unused_40.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_40.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_41.S b/runtime/interpreter/mterp/arm64/op_unused_41.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_41.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_42.S b/runtime/interpreter/mterp/arm64/op_unused_42.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_42.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_43.S b/runtime/interpreter/mterp/arm64/op_unused_43.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_43.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_73.S b/runtime/interpreter/mterp/arm64/op_unused_73.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_73.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_79.S b/runtime/interpreter/mterp/arm64/op_unused_79.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_79.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_7a.S b/runtime/interpreter/mterp/arm64/op_unused_7a.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_7a.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_f3.S b/runtime/interpreter/mterp/arm64/op_unused_f3.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f3.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_f4.S b/runtime/interpreter/mterp/arm64/op_unused_f4.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f4.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_f5.S b/runtime/interpreter/mterp/arm64/op_unused_f5.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f5.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_f6.S b/runtime/interpreter/mterp/arm64/op_unused_f6.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f6.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_f7.S b/runtime/interpreter/mterp/arm64/op_unused_f7.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f7.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_f8.S b/runtime/interpreter/mterp/arm64/op_unused_f8.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f8.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_f9.S b/runtime/interpreter/mterp/arm64/op_unused_f9.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f9.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_fa.S b/runtime/interpreter/mterp/arm64/op_unused_fa.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_fa.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_fb.S b/runtime/interpreter/mterp/arm64/op_unused_fb.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_fb.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_fc.S b/runtime/interpreter/mterp/arm64/op_unused_fc.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_fc.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_fd.S b/runtime/interpreter/mterp/arm64/op_unused_fd.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_fd.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_fe.S b/runtime/interpreter/mterp/arm64/op_unused_fe.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_fe.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_ff.S b/runtime/interpreter/mterp/arm64/op_unused_ff.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_ff.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int.S b/runtime/interpreter/mterp/arm64/op_ushr_int.S
new file mode 100644
index 0000000..005452b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"lsr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
new file mode 100644
index 0000000..1cb8cb7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"lsr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
new file mode 100644
index 0000000..555ed4e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"extract":"ubfx    w1, w3, #8, #5", "instr":"lsr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_long.S b/runtime/interpreter/mterp/arm64/op_ushr_long.S
new file mode 100644
index 0000000..e13c86a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_ushr_long.S
@@ -0,0 +1 @@
+%include "arm64/shiftWide.S" {"opcode":"lsr"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_long_2addr.S b/runtime/interpreter/mterp/arm64/op_ushr_long_2addr.S
new file mode 100644
index 0000000..67ec91e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_ushr_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/shiftWide2addr.S" {"opcode":"lsr"}
diff --git a/runtime/interpreter/mterp/arm64/op_xor_int.S b/runtime/interpreter/mterp/arm64/op_xor_int.S
new file mode 100644
index 0000000..7483663
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"eor     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_xor_int_2addr.S b/runtime/interpreter/mterp/arm64/op_xor_int_2addr.S
new file mode 100644
index 0000000..2f9a2c7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"eor     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_xor_int_lit16.S b/runtime/interpreter/mterp/arm64/op_xor_int_lit16.S
new file mode 100644
index 0000000..6b72c56
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_int_lit16.S
@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"instr":"eor     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S b/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S
new file mode 100644
index 0000000..1d3d93e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"extract":"", "instr":"eor     w0, w0, w3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/op_xor_long.S b/runtime/interpreter/mterp/arm64/op_xor_long.S
new file mode 100644
index 0000000..3880d5d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_long.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"eor x0, x1, x2"}
diff --git a/runtime/interpreter/mterp/arm64/op_xor_long_2addr.S b/runtime/interpreter/mterp/arm64/op_xor_long_2addr.S
new file mode 100644
index 0000000..3690552
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"eor     x0, x0, x1"}
diff --git a/runtime/interpreter/mterp/arm64/shiftWide.S b/runtime/interpreter/mterp/arm64/shiftWide.S
new file mode 100644
index 0000000..dcb2fb7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/shiftWide.S
@@ -0,0 +1,19 @@
+%default {"opcode":"shl"}
+    /*
+     * 64-bit shift operation.
+     *
+     * For: shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr      w3, wINST, #8               // w3<- AA
+    lsr      w2, w0, #8                  // w2<- CC
+    GET_VREG w2, w2                     // w2<- vCC (shift count)
+    and      w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x1, w1                // x1<- vBB
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $opcode  x0, x1, x2                 // Do the shift. Only low 6 bits of x2 are used.
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3                // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm64/shiftWide2addr.S b/runtime/interpreter/mterp/arm64/shiftWide2addr.S
new file mode 100644
index 0000000..b860dfd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/shiftWide2addr.S
@@ -0,0 +1,15 @@
+%default {"opcode":"lsl"}
+    /*
+     * Generic 64-bit shift operation.
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w1, w1                     // x1<- vB
+    GET_VREG_WIDE x0, w2                // x0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    $opcode x0, x0, x1                  // Do the shift. Only low 6 bits of x1 are used.
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm64/unop.S b/runtime/interpreter/mterp/arm64/unop.S
new file mode 100644
index 0000000..e681968
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/unop.S
@@ -0,0 +1,18 @@
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    $instr                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
diff --git a/runtime/interpreter/mterp/arm64/unopWide.S b/runtime/interpreter/mterp/arm64/unopWide.S
new file mode 100644
index 0000000..6ee4f92
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/unopWide.S
@@ -0,0 +1,17 @@
+%default {"instr":"sub x0, xzr, x0"}
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op x0".
+     *
+     * For: neg-long, not-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    $instr
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-11 instructions */
diff --git a/runtime/interpreter/mterp/arm64/unused.S b/runtime/interpreter/mterp/arm64/unused.S
new file mode 100644
index 0000000..ffa00be
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/unused.S
@@ -0,0 +1,4 @@
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
diff --git a/runtime/interpreter/mterp/arm64/zcmp.S b/runtime/interpreter/mterp/arm64/zcmp.S
new file mode 100644
index 0000000..510a3c1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/zcmp.S
@@ -0,0 +1,20 @@
+%default { "compare":"1" }
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if ${compare}
+    cmp     w2, #0                      // compare (vA, 0)
+    .endif
+    ${branch} MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/config_arm b/runtime/interpreter/mterp/config_arm
new file mode 100644
index 0000000..b6caf11
--- /dev/null
+++ b/runtime/interpreter/mterp/config_arm
@@ -0,0 +1,298 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for ARMv7-A targets.
+#
+
+handler-style computed-goto
+handler-size 128
+
+# source for alternate entry stub
+asm-alt-stub arm/alt_stub.S
+
+# file header and basic definitions
+import arm/header.S
+
+# arch-specific entry point to interpreter
+import arm/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub arm/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start arm
+    # (override example:) op op_sub_float_2addr arm-vfp
+    # (fallback example:) op op_sub_float_2addr FALLBACK
+
+    # op op_nop FALLBACK
+    # op op_move FALLBACK
+    # op op_move_from16 FALLBACK
+    # op op_move_16 FALLBACK
+    # op op_move_wide FALLBACK
+    # op op_move_wide_from16 FALLBACK
+    # op op_move_wide_16 FALLBACK
+    # op op_move_object FALLBACK
+    # op op_move_object_from16 FALLBACK
+    # op op_move_object_16 FALLBACK
+    # op op_move_result FALLBACK
+    # op op_move_result_wide FALLBACK
+    # op op_move_result_object FALLBACK
+    # op op_move_exception FALLBACK
+    # op op_return_void FALLBACK
+    # op op_return FALLBACK
+    # op op_return_wide FALLBACK
+    # op op_return_object FALLBACK
+    # op op_const_4 FALLBACK
+    # op op_const_16 FALLBACK
+    # op op_const FALLBACK
+    # op op_const_high16 FALLBACK
+    # op op_const_wide_16 FALLBACK
+    # op op_const_wide_32 FALLBACK
+    # op op_const_wide FALLBACK
+    # op op_const_wide_high16 FALLBACK
+    # op op_const_string FALLBACK
+    # op op_const_string_jumbo FALLBACK
+    # op op_const_class FALLBACK
+    # op op_monitor_enter FALLBACK
+    # op op_monitor_exit FALLBACK
+    # op op_check_cast FALLBACK
+    # op op_instance_of FALLBACK
+    # op op_array_length FALLBACK
+    # op op_new_instance FALLBACK
+    # op op_new_array FALLBACK
+    # op op_filled_new_array FALLBACK
+    # op op_filled_new_array_range FALLBACK
+    # op op_fill_array_data FALLBACK
+    # op op_throw FALLBACK
+    # op op_goto FALLBACK
+    # op op_goto_16 FALLBACK
+    # op op_goto_32 FALLBACK
+    # op op_packed_switch FALLBACK
+    # op op_sparse_switch FALLBACK
+    # op op_cmpl_float FALLBACK
+    # op op_cmpg_float FALLBACK
+    # op op_cmpl_double FALLBACK
+    # op op_cmpg_double FALLBACK
+    # op op_cmp_long FALLBACK
+    # op op_if_eq FALLBACK
+    # op op_if_ne FALLBACK
+    # op op_if_lt FALLBACK
+    # op op_if_ge FALLBACK
+    # op op_if_gt FALLBACK
+    # op op_if_le FALLBACK
+    # op op_if_eqz FALLBACK
+    # op op_if_nez FALLBACK
+    # op op_if_ltz FALLBACK
+    # op op_if_gez FALLBACK
+    # op op_if_gtz FALLBACK
+    # op op_if_lez FALLBACK
+    # op op_unused_3e FALLBACK
+    # op op_unused_3f FALLBACK
+    # op op_unused_40 FALLBACK
+    # op op_unused_41 FALLBACK
+    # op op_unused_42 FALLBACK
+    # op op_unused_43 FALLBACK
+    # op op_aget FALLBACK
+    # op op_aget_wide FALLBACK
+    # op op_aget_object FALLBACK
+    # op op_aget_boolean FALLBACK
+    # op op_aget_byte FALLBACK
+    # op op_aget_char FALLBACK
+    # op op_aget_short FALLBACK
+    # op op_aput FALLBACK
+    # op op_aput_wide FALLBACK
+    # op op_aput_object FALLBACK
+    # op op_aput_boolean FALLBACK
+    # op op_aput_byte FALLBACK
+    # op op_aput_char FALLBACK
+    # op op_aput_short FALLBACK
+    # op op_iget FALLBACK
+    # op op_iget_wide FALLBACK
+    # op op_iget_object FALLBACK
+    # op op_iget_boolean FALLBACK
+    # op op_iget_byte FALLBACK
+    # op op_iget_char FALLBACK
+    # op op_iget_short FALLBACK
+    # op op_iput FALLBACK
+    # op op_iput_wide FALLBACK
+    # op op_iput_object FALLBACK
+    # op op_iput_boolean FALLBACK
+    # op op_iput_byte FALLBACK
+    # op op_iput_char FALLBACK
+    # op op_iput_short FALLBACK
+    # op op_sget FALLBACK
+    # op op_sget_wide FALLBACK
+    # op op_sget_object FALLBACK
+    # op op_sget_boolean FALLBACK
+    # op op_sget_byte FALLBACK
+    # op op_sget_char FALLBACK
+    # op op_sget_short FALLBACK
+    # op op_sput FALLBACK
+    # op op_sput_wide FALLBACK
+    # op op_sput_object FALLBACK
+    # op op_sput_boolean FALLBACK
+    # op op_sput_byte FALLBACK
+    # op op_sput_char FALLBACK
+    # op op_sput_short FALLBACK
+    # op op_invoke_virtual FALLBACK
+    # op op_invoke_super FALLBACK
+    # op op_invoke_direct FALLBACK
+    # op op_invoke_static FALLBACK
+    # op op_invoke_interface FALLBACK
+    # op op_return_void_no_barrier FALLBACK
+    # op op_invoke_virtual_range FALLBACK
+    # op op_invoke_super_range FALLBACK
+    # op op_invoke_direct_range FALLBACK
+    # op op_invoke_static_range FALLBACK
+    # op op_invoke_interface_range FALLBACK
+    # op op_unused_79 FALLBACK
+    # op op_unused_7a FALLBACK
+    # op op_neg_int FALLBACK
+    # op op_not_int FALLBACK
+    # op op_neg_long FALLBACK
+    # op op_not_long FALLBACK
+    # op op_neg_float FALLBACK
+    # op op_neg_double FALLBACK
+    # op op_int_to_long FALLBACK
+    # op op_int_to_float FALLBACK
+    # op op_int_to_double FALLBACK
+    # op op_long_to_int FALLBACK
+    # op op_long_to_float FALLBACK
+    # op op_long_to_double FALLBACK
+    # op op_float_to_int FALLBACK
+    # op op_float_to_long FALLBACK
+    # op op_float_to_double FALLBACK
+    # op op_double_to_int FALLBACK
+    # op op_double_to_long FALLBACK
+    # op op_double_to_float FALLBACK
+    # op op_int_to_byte FALLBACK
+    # op op_int_to_char FALLBACK
+    # op op_int_to_short FALLBACK
+    # op op_add_int FALLBACK
+    # op op_sub_int FALLBACK
+    # op op_mul_int FALLBACK
+    # op op_div_int FALLBACK
+    # op op_rem_int FALLBACK
+    # op op_and_int FALLBACK
+    # op op_or_int FALLBACK
+    # op op_xor_int FALLBACK
+    # op op_shl_int FALLBACK
+    # op op_shr_int FALLBACK
+    # op op_ushr_int FALLBACK
+    # op op_add_long FALLBACK
+    # op op_sub_long FALLBACK
+    # op op_mul_long FALLBACK
+    # op op_div_long FALLBACK
+    # op op_rem_long FALLBACK
+    # op op_and_long FALLBACK
+    # op op_or_long FALLBACK
+    # op op_xor_long FALLBACK
+    # op op_shl_long FALLBACK
+    # op op_shr_long FALLBACK
+    # op op_ushr_long FALLBACK
+    # op op_add_float FALLBACK
+    # op op_sub_float FALLBACK
+    # op op_mul_float FALLBACK
+    # op op_div_float FALLBACK
+    # op op_rem_float FALLBACK
+    # op op_add_double FALLBACK
+    # op op_sub_double FALLBACK
+    # op op_mul_double FALLBACK
+    # op op_div_double FALLBACK
+    # op op_rem_double FALLBACK
+    # op op_add_int_2addr FALLBACK
+    # op op_sub_int_2addr FALLBACK
+    # op op_mul_int_2addr FALLBACK
+    # op op_div_int_2addr FALLBACK
+    # op op_rem_int_2addr FALLBACK
+    # op op_and_int_2addr FALLBACK
+    # op op_or_int_2addr FALLBACK
+    # op op_xor_int_2addr FALLBACK
+    # op op_shl_int_2addr FALLBACK
+    # op op_shr_int_2addr FALLBACK
+    # op op_ushr_int_2addr FALLBACK
+    # op op_add_long_2addr FALLBACK
+    # op op_sub_long_2addr FALLBACK
+    # op op_mul_long_2addr FALLBACK
+    # op op_div_long_2addr FALLBACK
+    # op op_rem_long_2addr FALLBACK
+    # op op_and_long_2addr FALLBACK
+    # op op_or_long_2addr FALLBACK
+    # op op_xor_long_2addr FALLBACK
+    # op op_shl_long_2addr FALLBACK
+    # op op_shr_long_2addr FALLBACK
+    # op op_ushr_long_2addr FALLBACK
+    # op op_add_float_2addr FALLBACK
+    # op op_sub_float_2addr FALLBACK
+    # op op_mul_float_2addr FALLBACK
+    # op op_div_float_2addr FALLBACK
+    # op op_rem_float_2addr FALLBACK
+    # op op_add_double_2addr FALLBACK
+    # op op_sub_double_2addr FALLBACK
+    # op op_mul_double_2addr FALLBACK
+    # op op_div_double_2addr FALLBACK
+    # op op_rem_double_2addr FALLBACK
+    # op op_add_int_lit16 FALLBACK
+    # op op_rsub_int FALLBACK
+    # op op_mul_int_lit16 FALLBACK
+    # op op_div_int_lit16 FALLBACK
+    # op op_rem_int_lit16 FALLBACK
+    # op op_and_int_lit16 FALLBACK
+    # op op_or_int_lit16 FALLBACK
+    # op op_xor_int_lit16 FALLBACK
+    # op op_add_int_lit8 FALLBACK
+    # op op_rsub_int_lit8 FALLBACK
+    # op op_mul_int_lit8 FALLBACK
+    # op op_div_int_lit8 FALLBACK
+    # op op_rem_int_lit8 FALLBACK
+    # op op_and_int_lit8 FALLBACK
+    # op op_or_int_lit8 FALLBACK
+    # op op_xor_int_lit8 FALLBACK
+    # op op_shl_int_lit8 FALLBACK
+    # op op_shr_int_lit8 FALLBACK
+    # op op_ushr_int_lit8 FALLBACK
+    # op op_iget_quick FALLBACK
+    # op op_iget_wide_quick FALLBACK
+    # op op_iget_object_quick FALLBACK
+    # op op_iput_quick FALLBACK
+    # op op_iput_wide_quick FALLBACK
+    # op op_iput_object_quick FALLBACK
+    # op op_invoke_virtual_quick FALLBACK
+    # op op_invoke_virtual_range_quick FALLBACK
+    # op op_iput_boolean_quick FALLBACK
+    # op op_iput_byte_quick FALLBACK
+    # op op_iput_char_quick FALLBACK
+    # op op_iput_short_quick FALLBACK
+    # op op_iget_boolean_quick FALLBACK
+    # op op_iget_byte_quick FALLBACK
+    # op op_iget_char_quick FALLBACK
+    # op op_iget_short_quick FALLBACK
+    # op op_unused_f3 FALLBACK
+    # op op_unused_f4 FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
+    # op op_unused_fa FALLBACK
+    # op op_unused_fb FALLBACK
+    # op op_unused_fc FALLBACK
+    # op op_unused_fd FALLBACK
+    # op op_unused_fe FALLBACK
+    # op op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import arm/footer.S
diff --git a/runtime/interpreter/mterp/config_arm64 b/runtime/interpreter/mterp/config_arm64
new file mode 100644
index 0000000..c5e06c7
--- /dev/null
+++ b/runtime/interpreter/mterp/config_arm64
@@ -0,0 +1,303 @@
+
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for ARM64
+#
+
+handler-style computed-goto
+handler-size 128
+
+# file header and basic definitions
+import arm64/header.S
+
+# arch-specific entry point to interpreter
+import arm64/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub arm64/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start arm64
+    # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
+    # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
+
+    # op op_nop FALLBACK
+    # op op_move FALLBACK
+    # op op_move_from16 FALLBACK
+    # op op_move_16 FALLBACK
+    # op op_move_wide FALLBACK
+    # op op_move_wide_from16 FALLBACK
+    # op op_move_wide_16 FALLBACK
+    # op op_move_object FALLBACK
+    # op op_move_object_from16 FALLBACK
+    # op op_move_object_16 FALLBACK
+    # op op_move_result FALLBACK
+    # op op_move_result_wide FALLBACK
+    # op op_move_result_object FALLBACK
+    # op op_move_exception FALLBACK
+    # op op_return_void FALLBACK
+    # op op_return FALLBACK
+    # op op_return_wide FALLBACK
+    # op op_return_object FALLBACK
+    # op op_const_4 FALLBACK
+    # op op_const_16 FALLBACK
+    # op op_const FALLBACK
+    # op op_const_high16 FALLBACK
+    # op op_const_wide_16 FALLBACK
+    # op op_const_wide_32 FALLBACK
+    # op op_const_wide FALLBACK
+    # op op_const_wide_high16 FALLBACK
+    # op op_const_string FALLBACK
+    # op op_const_string_jumbo FALLBACK
+    # op op_const_class FALLBACK
+    # op op_monitor_enter FALLBACK
+    # op op_monitor_exit FALLBACK
+    # op op_check_cast FALLBACK
+    # op op_instance_of FALLBACK
+    # op op_array_length FALLBACK
+    # op op_new_instance FALLBACK
+    # op op_new_array FALLBACK
+    # op op_filled_new_array FALLBACK
+    # op op_filled_new_array_range FALLBACK
+    # op op_fill_array_data FALLBACK
+    # op op_throw FALLBACK
+    # op op_goto FALLBACK
+    # op op_goto_16 FALLBACK
+    # op op_goto_32 FALLBACK
+    # op op_packed_switch FALLBACK
+    # op op_sparse_switch FALLBACK
+    # op op_cmpl_float FALLBACK
+    # op op_cmpg_float FALLBACK
+    # op op_cmpl_double FALLBACK
+    # op op_cmpg_double FALLBACK
+    # op op_cmp_long FALLBACK
+    # op op_if_eq FALLBACK
+    # op op_if_ne FALLBACK
+    # op op_if_lt FALLBACK
+    # op op_if_ge FALLBACK
+    # op op_if_gt FALLBACK
+    # op op_if_le FALLBACK
+    # op op_if_eqz FALLBACK
+    # op op_if_nez FALLBACK
+    # op op_if_ltz FALLBACK
+    # op op_if_gez FALLBACK
+    # op op_if_gtz FALLBACK
+    # op op_if_lez FALLBACK
+    # op op_unused_3e FALLBACK
+    # op op_unused_3f FALLBACK
+    # op op_unused_40 FALLBACK
+    # op op_unused_41 FALLBACK
+    # op op_unused_42 FALLBACK
+    # op op_unused_43 FALLBACK
+    # op op_aget FALLBACK
+    # op op_aget_wide FALLBACK
+    # op op_aget_object FALLBACK
+    # op op_aget_boolean FALLBACK
+    # op op_aget_byte FALLBACK
+    # op op_aget_char FALLBACK
+    # op op_aget_short FALLBACK
+    # op op_aput FALLBACK
+    # op op_aput_wide FALLBACK
+    # op op_aput_object FALLBACK
+    # op op_aput_boolean FALLBACK
+    # op op_aput_byte FALLBACK
+    # op op_aput_char FALLBACK
+    # op op_aput_short FALLBACK
+    # op op_iget FALLBACK
+    # op op_iget_wide FALLBACK
+    # op op_iget_object FALLBACK
+    # op op_iget_boolean FALLBACK
+    # op op_iget_byte FALLBACK
+    # op op_iget_char FALLBACK
+    # op op_iget_short FALLBACK
+    # op op_iput FALLBACK
+    # op op_iput_wide FALLBACK
+    # op op_iput_object FALLBACK
+    # op op_iput_boolean FALLBACK
+    # op op_iput_byte FALLBACK
+    # op op_iput_char FALLBACK
+    # op op_iput_short FALLBACK
+    # op op_sget FALLBACK
+    # op op_sget_wide FALLBACK
+    # op op_sget_object FALLBACK
+    # op op_sget_boolean FALLBACK
+    # op op_sget_byte FALLBACK
+    # op op_sget_char FALLBACK
+    # op op_sget_short FALLBACK
+    # op op_sput FALLBACK
+    # op op_sput_wide FALLBACK
+    # op op_sput_object FALLBACK
+    # op op_sput_boolean FALLBACK
+    # op op_sput_byte FALLBACK
+    # op op_sput_char FALLBACK
+    # op op_sput_short FALLBACK
+    # op op_invoke_virtual FALLBACK
+    # op op_invoke_super FALLBACK
+    # op op_invoke_direct FALLBACK
+    # op op_invoke_static FALLBACK
+    # op op_invoke_interface FALLBACK
+    # op op_return_void_no_barrier FALLBACK
+    # op op_invoke_virtual_range FALLBACK
+    # op op_invoke_super_range FALLBACK
+    # op op_invoke_direct_range FALLBACK
+    # op op_invoke_static_range FALLBACK
+    # op op_invoke_interface_range FALLBACK
+    # op op_unused_79 FALLBACK
+    # op op_unused_7a FALLBACK
+    # op op_neg_int FALLBACK
+    # op op_not_int FALLBACK
+    # op op_neg_long FALLBACK
+    # op op_not_long FALLBACK
+    # op op_neg_float FALLBACK
+    # op op_neg_double FALLBACK
+    # op op_int_to_long FALLBACK
+    # op op_int_to_float FALLBACK
+    # op op_int_to_double FALLBACK
+    # op op_long_to_int FALLBACK
+    # op op_long_to_float FALLBACK
+    # op op_long_to_double FALLBACK
+    # op op_float_to_int FALLBACK
+    # op op_float_to_long FALLBACK
+    # op op_float_to_double FALLBACK
+    # op op_double_to_int FALLBACK
+    # op op_double_to_long FALLBACK
+    # op op_double_to_float FALLBACK
+    # op op_int_to_byte FALLBACK
+    # op op_int_to_char FALLBACK
+    # op op_int_to_short FALLBACK
+    # op op_add_int FALLBACK
+    # op op_sub_int FALLBACK
+    # op op_mul_int FALLBACK
+    # op op_div_int FALLBACK
+    # op op_rem_int FALLBACK
+    # op op_and_int FALLBACK
+    # op op_or_int FALLBACK
+    # op op_xor_int FALLBACK
+    # op op_shl_int FALLBACK
+    # op op_shr_int FALLBACK
+    # op op_ushr_int FALLBACK
+    # op op_add_long FALLBACK
+    # op op_sub_long FALLBACK
+    # op op_mul_long FALLBACK
+    # op op_div_long FALLBACK
+    # op op_rem_long FALLBACK
+    # op op_and_long FALLBACK
+    # op op_or_long FALLBACK
+    # op op_xor_long FALLBACK
+    # op op_shl_long FALLBACK
+    # op op_shr_long FALLBACK
+    # op op_ushr_long FALLBACK
+    # op op_add_float FALLBACK
+    # op op_sub_float FALLBACK
+    # op op_mul_float FALLBACK
+    # op op_div_float FALLBACK
+    # op op_rem_float FALLBACK
+    # op op_add_double FALLBACK
+    # op op_sub_double FALLBACK
+    # op op_mul_double FALLBACK
+    # op op_div_double FALLBACK
+    # op op_rem_double FALLBACK
+    # op op_add_int_2addr FALLBACK
+    # op op_sub_int_2addr FALLBACK
+    # op op_mul_int_2addr FALLBACK
+    # op op_div_int_2addr FALLBACK
+    # op op_rem_int_2addr FALLBACK
+    # op op_and_int_2addr FALLBACK
+    # op op_or_int_2addr FALLBACK
+    # op op_xor_int_2addr FALLBACK
+    # op op_shl_int_2addr FALLBACK
+    # op op_shr_int_2addr FALLBACK
+    # op op_ushr_int_2addr FALLBACK
+    # op op_add_long_2addr FALLBACK
+    # op op_sub_long_2addr FALLBACK
+    # op op_mul_long_2addr FALLBACK
+    # op op_div_long_2addr FALLBACK
+    # op op_rem_long_2addr FALLBACK
+    # op op_and_long_2addr FALLBACK
+    # op op_or_long_2addr FALLBACK
+    # op op_xor_long_2addr FALLBACK
+    # op op_shl_long_2addr FALLBACK
+    # op op_shr_long_2addr FALLBACK
+    # op op_ushr_long_2addr FALLBACK
+    # op op_add_float_2addr FALLBACK
+    # op op_sub_float_2addr FALLBACK
+    # op op_mul_float_2addr FALLBACK
+    # op op_div_float_2addr FALLBACK
+    # op op_rem_float_2addr FALLBACK
+    # op op_add_double_2addr FALLBACK
+    # op op_sub_double_2addr FALLBACK
+    # op op_mul_double_2addr FALLBACK
+    # op op_div_double_2addr FALLBACK
+    # op op_rem_double_2addr FALLBACK
+    # op op_add_int_lit16 FALLBACK
+    # op op_rsub_int FALLBACK
+    # op op_mul_int_lit16 FALLBACK
+    # op op_div_int_lit16 FALLBACK
+    # op op_rem_int_lit16 FALLBACK
+    # op op_and_int_lit16 FALLBACK
+    # op op_or_int_lit16 FALLBACK
+    # op op_xor_int_lit16 FALLBACK
+    # op op_add_int_lit8 FALLBACK
+    # op op_rsub_int_lit8 FALLBACK
+    # op op_mul_int_lit8 FALLBACK
+    # op op_div_int_lit8 FALLBACK
+    # op op_rem_int_lit8 FALLBACK
+    # op op_and_int_lit8 FALLBACK
+    # op op_or_int_lit8 FALLBACK
+    # op op_xor_int_lit8 FALLBACK
+    # op op_shl_int_lit8 FALLBACK
+    # op op_shr_int_lit8 FALLBACK
+    # op op_ushr_int_lit8 FALLBACK
+    # op op_iget_quick FALLBACK
+    # op op_iget_wide_quick FALLBACK
+    # op op_iget_object_quick FALLBACK
+    # op op_iput_quick FALLBACK
+    # op op_iput_wide_quick FALLBACK
+    # op op_iput_object_quick FALLBACK
+    # op op_invoke_virtual_quick FALLBACK
+    # op op_invoke_virtual_range_quick FALLBACK
+    # op op_iput_boolean_quick FALLBACK
+    # op op_iput_byte_quick FALLBACK
+    # op op_iput_char_quick FALLBACK
+    # op op_iput_short_quick FALLBACK
+    # op op_iget_boolean_quick FALLBACK
+    # op op_iget_byte_quick FALLBACK
+    # op op_iget_char_quick FALLBACK
+    # op op_iget_short_quick FALLBACK
+    # op op_unused_f3 FALLBACK
+    # op op_unused_f4 FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
+    # op op_unused_fa FALLBACK
+    # op op_unused_fb FALLBACK
+    # op op_unused_fc FALLBACK
+    # op op_unused_fd FALLBACK
+    # op op_unused_fe FALLBACK
+    # op op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm; we emit the footer before alternate
+# entry stubs, so that TBZ/TBNZ from ops can reach targets in footer
+import arm64/footer.S
+
+# source for alternate entry stub
+asm-alt-stub arm64/alt_stub.S
+
+# emit alternate entry stubs
+alt-ops
diff --git a/runtime/interpreter/mterp/config_mips b/runtime/interpreter/mterp/config_mips
new file mode 100644
index 0000000..515cb0b
--- /dev/null
+++ b/runtime/interpreter/mterp/config_mips
@@ -0,0 +1,298 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for MIPS_32 targets.
+#
+
+handler-style computed-goto
+handler-size 128
+
+# source for alternate entry stub
+asm-alt-stub mips/alt_stub.S
+
+# file header and basic definitions
+import mips/header.S
+
+# arch-specific entry point to interpreter
+import mips/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub mips/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start mips
+    # (override example:) op op_sub_float_2addr arm-vfp
+    # (fallback example:) op op_sub_float_2addr FALLBACK
+
+    # op op_nop FALLBACK
+    # op op_move FALLBACK
+    # op op_move_from16 FALLBACK
+    # op op_move_16 FALLBACK
+    # op op_move_wide FALLBACK
+    # op op_move_wide_from16 FALLBACK
+    # op op_move_wide_16 FALLBACK
+    # op op_move_object FALLBACK
+    # op op_move_object_from16 FALLBACK
+    # op op_move_object_16 FALLBACK
+    # op op_move_result FALLBACK
+    # op op_move_result_wide FALLBACK
+    # op op_move_result_object FALLBACK
+    # op op_move_exception FALLBACK
+    # op op_return_void FALLBACK
+    # op op_return FALLBACK
+    # op op_return_wide FALLBACK
+    # op op_return_object FALLBACK
+    # op op_const_4 FALLBACK
+    # op op_const_16 FALLBACK
+    # op op_const FALLBACK
+    # op op_const_high16 FALLBACK
+    # op op_const_wide_16 FALLBACK
+    # op op_const_wide_32 FALLBACK
+    # op op_const_wide FALLBACK
+    # op op_const_wide_high16 FALLBACK
+    # op op_const_string FALLBACK
+    # op op_const_string_jumbo FALLBACK
+    # op op_const_class FALLBACK
+    # op op_monitor_enter FALLBACK
+    # op op_monitor_exit FALLBACK
+    # op op_check_cast FALLBACK
+    # op op_instance_of FALLBACK
+    # op op_array_length FALLBACK
+    # op op_new_instance FALLBACK
+    # op op_new_array FALLBACK
+    # op op_filled_new_array FALLBACK
+    # op op_filled_new_array_range FALLBACK
+    # op op_fill_array_data FALLBACK
+    # op op_throw FALLBACK
+    # op op_goto FALLBACK
+    # op op_goto_16 FALLBACK
+    # op op_goto_32 FALLBACK
+    # op op_packed_switch FALLBACK
+    # op op_sparse_switch FALLBACK
+    # op op_cmpl_float FALLBACK
+    # op op_cmpg_float FALLBACK
+    # op op_cmpl_double FALLBACK
+    # op op_cmpg_double FALLBACK
+    # op op_cmp_long FALLBACK
+    # op op_if_eq FALLBACK
+    # op op_if_ne FALLBACK
+    # op op_if_lt FALLBACK
+    # op op_if_ge FALLBACK
+    # op op_if_gt FALLBACK
+    # op op_if_le FALLBACK
+    # op op_if_eqz FALLBACK
+    # op op_if_nez FALLBACK
+    # op op_if_ltz FALLBACK
+    # op op_if_gez FALLBACK
+    # op op_if_gtz FALLBACK
+    # op op_if_lez FALLBACK
+    # op op_unused_3e FALLBACK
+    # op op_unused_3f FALLBACK
+    # op op_unused_40 FALLBACK
+    # op op_unused_41 FALLBACK
+    # op op_unused_42 FALLBACK
+    # op op_unused_43 FALLBACK
+    # op op_aget FALLBACK
+    # op op_aget_wide FALLBACK
+    # op op_aget_object FALLBACK
+    # op op_aget_boolean FALLBACK
+    # op op_aget_byte FALLBACK
+    # op op_aget_char FALLBACK
+    # op op_aget_short FALLBACK
+    # op op_aput FALLBACK
+    # op op_aput_wide FALLBACK
+    # op op_aput_object FALLBACK
+    # op op_aput_boolean FALLBACK
+    # op op_aput_byte FALLBACK
+    # op op_aput_char FALLBACK
+    # op op_aput_short FALLBACK
+    # op op_iget FALLBACK
+    # op op_iget_wide FALLBACK
+    # op op_iget_object FALLBACK
+    # op op_iget_boolean FALLBACK
+    # op op_iget_byte FALLBACK
+    # op op_iget_char FALLBACK
+    # op op_iget_short FALLBACK
+    # op op_iput FALLBACK
+    # op op_iput_wide FALLBACK
+    # op op_iput_object FALLBACK
+    # op op_iput_boolean FALLBACK
+    # op op_iput_byte FALLBACK
+    # op op_iput_char FALLBACK
+    # op op_iput_short FALLBACK
+    # op op_sget FALLBACK
+    # op op_sget_wide FALLBACK
+    # op op_sget_object FALLBACK
+    # op op_sget_boolean FALLBACK
+    # op op_sget_byte FALLBACK
+    # op op_sget_char FALLBACK
+    # op op_sget_short FALLBACK
+    # op op_sput FALLBACK
+    # op op_sput_wide FALLBACK
+    # op op_sput_object FALLBACK
+    # op op_sput_boolean FALLBACK
+    # op op_sput_byte FALLBACK
+    # op op_sput_char FALLBACK
+    # op op_sput_short FALLBACK
+    # op op_invoke_virtual FALLBACK
+    # op op_invoke_super FALLBACK
+    # op op_invoke_direct FALLBACK
+    # op op_invoke_static FALLBACK
+    # op op_invoke_interface FALLBACK
+    # op op_return_void_no_barrier FALLBACK
+    # op op_invoke_virtual_range FALLBACK
+    # op op_invoke_super_range FALLBACK
+    # op op_invoke_direct_range FALLBACK
+    # op op_invoke_static_range FALLBACK
+    # op op_invoke_interface_range FALLBACK
+    # op op_unused_79 FALLBACK
+    # op op_unused_7a FALLBACK
+    # op op_neg_int FALLBACK
+    # op op_not_int FALLBACK
+    # op op_neg_long FALLBACK
+    # op op_not_long FALLBACK
+    # op op_neg_float FALLBACK
+    # op op_neg_double FALLBACK
+    # op op_int_to_long FALLBACK
+    # op op_int_to_float FALLBACK
+    # op op_int_to_double FALLBACK
+    # op op_long_to_int FALLBACK
+    # op op_long_to_float FALLBACK
+    # op op_long_to_double FALLBACK
+    # op op_float_to_int FALLBACK
+    # op op_float_to_long FALLBACK
+    # op op_float_to_double FALLBACK
+    # op op_double_to_int FALLBACK
+    # op op_double_to_long FALLBACK
+    # op op_double_to_float FALLBACK
+    # op op_int_to_byte FALLBACK
+    # op op_int_to_char FALLBACK
+    # op op_int_to_short FALLBACK
+    # op op_add_int FALLBACK
+    # op op_sub_int FALLBACK
+    # op op_mul_int FALLBACK
+    # op op_div_int FALLBACK
+    # op op_rem_int FALLBACK
+    # op op_and_int FALLBACK
+    # op op_or_int FALLBACK
+    # op op_xor_int FALLBACK
+    # op op_shl_int FALLBACK
+    # op op_shr_int FALLBACK
+    # op op_ushr_int FALLBACK
+    # op op_add_long FALLBACK
+    # op op_sub_long FALLBACK
+    # op op_mul_long FALLBACK
+    # op op_div_long FALLBACK
+    # op op_rem_long FALLBACK
+    # op op_and_long FALLBACK
+    # op op_or_long FALLBACK
+    # op op_xor_long FALLBACK
+    # op op_shl_long FALLBACK
+    # op op_shr_long FALLBACK
+    # op op_ushr_long FALLBACK
+    # op op_add_float FALLBACK
+    # op op_sub_float FALLBACK
+    # op op_mul_float FALLBACK
+    # op op_div_float FALLBACK
+    # op op_rem_float FALLBACK
+    # op op_add_double FALLBACK
+    # op op_sub_double FALLBACK
+    # op op_mul_double FALLBACK
+    # op op_div_double FALLBACK
+    # op op_rem_double FALLBACK
+    # op op_add_int_2addr FALLBACK
+    # op op_sub_int_2addr FALLBACK
+    # op op_mul_int_2addr FALLBACK
+    # op op_div_int_2addr FALLBACK
+    # op op_rem_int_2addr FALLBACK
+    # op op_and_int_2addr FALLBACK
+    # op op_or_int_2addr FALLBACK
+    # op op_xor_int_2addr FALLBACK
+    # op op_shl_int_2addr FALLBACK
+    # op op_shr_int_2addr FALLBACK
+    # op op_ushr_int_2addr FALLBACK
+    # op op_add_long_2addr FALLBACK
+    # op op_sub_long_2addr FALLBACK
+    # op op_mul_long_2addr FALLBACK
+    # op op_div_long_2addr FALLBACK
+    # op op_rem_long_2addr FALLBACK
+    # op op_and_long_2addr FALLBACK
+    # op op_or_long_2addr FALLBACK
+    # op op_xor_long_2addr FALLBACK
+    # op op_shl_long_2addr FALLBACK
+    # op op_shr_long_2addr FALLBACK
+    # op op_ushr_long_2addr FALLBACK
+    # op op_add_float_2addr FALLBACK
+    # op op_sub_float_2addr FALLBACK
+    # op op_mul_float_2addr FALLBACK
+    # op op_div_float_2addr FALLBACK
+    # op op_rem_float_2addr FALLBACK
+    # op op_add_double_2addr FALLBACK
+    # op op_sub_double_2addr FALLBACK
+    # op op_mul_double_2addr FALLBACK
+    # op op_div_double_2addr FALLBACK
+    # op op_rem_double_2addr FALLBACK
+    # op op_add_int_lit16 FALLBACK
+    # op op_rsub_int FALLBACK
+    # op op_mul_int_lit16 FALLBACK
+    # op op_div_int_lit16 FALLBACK
+    # op op_rem_int_lit16 FALLBACK
+    # op op_and_int_lit16 FALLBACK
+    # op op_or_int_lit16 FALLBACK
+    # op op_xor_int_lit16 FALLBACK
+    # op op_add_int_lit8 FALLBACK
+    # op op_rsub_int_lit8 FALLBACK
+    # op op_mul_int_lit8 FALLBACK
+    # op op_div_int_lit8 FALLBACK
+    # op op_rem_int_lit8 FALLBACK
+    # op op_and_int_lit8 FALLBACK
+    # op op_or_int_lit8 FALLBACK
+    # op op_xor_int_lit8 FALLBACK
+    # op op_shl_int_lit8 FALLBACK
+    # op op_shr_int_lit8 FALLBACK
+    # op op_ushr_int_lit8 FALLBACK
+    # op op_iget_quick FALLBACK
+    # op op_iget_wide_quick FALLBACK
+    # op op_iget_object_quick FALLBACK
+    # op op_iput_quick FALLBACK
+    # op op_iput_wide_quick FALLBACK
+    # op op_iput_object_quick FALLBACK
+    # op op_invoke_virtual_quick FALLBACK
+    # op op_invoke_virtual_range_quick FALLBACK
+    # op op_iput_boolean_quick FALLBACK
+    # op op_iput_byte_quick FALLBACK
+    # op op_iput_char_quick FALLBACK
+    # op op_iput_short_quick FALLBACK
+    # op op_iget_boolean_quick FALLBACK
+    # op op_iget_byte_quick FALLBACK
+    # op op_iget_char_quick FALLBACK
+    # op op_iget_short_quick FALLBACK
+    # op op_unused_f3 FALLBACK
+    # op op_unused_f4 FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
+    # op op_unused_fa FALLBACK
+    # op op_unused_fb FALLBACK
+    # op op_unused_fc FALLBACK
+    # op op_unused_fd FALLBACK
+    # op op_unused_fe FALLBACK
+    # op op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import mips/footer.S
diff --git a/runtime/interpreter/mterp/config_mips64 b/runtime/interpreter/mterp/config_mips64
new file mode 100644
index 0000000..aafd248
--- /dev/null
+++ b/runtime/interpreter/mterp/config_mips64
@@ -0,0 +1,298 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for MIPS_64
+#
+
+handler-style computed-goto
+handler-size 128
+
+# source for alternate entry stub
+asm-alt-stub mips64/alt_stub.S
+
+# file header and basic definitions
+import mips64/header.S
+
+# arch-specific entry point to interpreter
+import mips64/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub mips64/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start mips64
+    # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
+    # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
+
+    # op op_nop FALLBACK
+    # op op_move FALLBACK
+    # op op_move_from16 FALLBACK
+    # op op_move_16 FALLBACK
+    # op op_move_wide FALLBACK
+    # op op_move_wide_from16 FALLBACK
+    # op op_move_wide_16 FALLBACK
+    # op op_move_object FALLBACK
+    # op op_move_object_from16 FALLBACK
+    # op op_move_object_16 FALLBACK
+    # op op_move_result FALLBACK
+    # op op_move_result_wide FALLBACK
+    # op op_move_result_object FALLBACK
+    # op op_move_exception FALLBACK
+    # op op_return_void FALLBACK
+    # op op_return FALLBACK
+    # op op_return_wide FALLBACK
+    # op op_return_object FALLBACK
+    # op op_const_4 FALLBACK
+    # op op_const_16 FALLBACK
+    # op op_const FALLBACK
+    # op op_const_high16 FALLBACK
+    # op op_const_wide_16 FALLBACK
+    # op op_const_wide_32 FALLBACK
+    # op op_const_wide FALLBACK
+    # op op_const_wide_high16 FALLBACK
+    # op op_const_string FALLBACK
+    # op op_const_string_jumbo FALLBACK
+    # op op_const_class FALLBACK
+    # op op_monitor_enter FALLBACK
+    # op op_monitor_exit FALLBACK
+    # op op_check_cast FALLBACK
+    # op op_instance_of FALLBACK
+    # op op_array_length FALLBACK
+    # op op_new_instance FALLBACK
+    # op op_new_array FALLBACK
+    # op op_filled_new_array FALLBACK
+    # op op_filled_new_array_range FALLBACK
+    # op op_fill_array_data FALLBACK
+    # op op_throw FALLBACK
+    # op op_goto FALLBACK
+    # op op_goto_16 FALLBACK
+    # op op_goto_32 FALLBACK
+    # op op_packed_switch FALLBACK
+    # op op_sparse_switch FALLBACK
+    # op op_cmpl_float FALLBACK
+    # op op_cmpg_float FALLBACK
+    # op op_cmpl_double FALLBACK
+    # op op_cmpg_double FALLBACK
+    # op op_cmp_long FALLBACK
+    # op op_if_eq FALLBACK
+    # op op_if_ne FALLBACK
+    # op op_if_lt FALLBACK
+    # op op_if_ge FALLBACK
+    # op op_if_gt FALLBACK
+    # op op_if_le FALLBACK
+    # op op_if_eqz FALLBACK
+    # op op_if_nez FALLBACK
+    # op op_if_ltz FALLBACK
+    # op op_if_gez FALLBACK
+    # op op_if_gtz FALLBACK
+    # op op_if_lez FALLBACK
+    # op op_unused_3e FALLBACK
+    # op op_unused_3f FALLBACK
+    # op op_unused_40 FALLBACK
+    # op op_unused_41 FALLBACK
+    # op op_unused_42 FALLBACK
+    # op op_unused_43 FALLBACK
+    # op op_aget FALLBACK
+    # op op_aget_wide FALLBACK
+    # op op_aget_object FALLBACK
+    # op op_aget_boolean FALLBACK
+    # op op_aget_byte FALLBACK
+    # op op_aget_char FALLBACK
+    # op op_aget_short FALLBACK
+    # op op_aput FALLBACK
+    # op op_aput_wide FALLBACK
+    # op op_aput_object FALLBACK
+    # op op_aput_boolean FALLBACK
+    # op op_aput_byte FALLBACK
+    # op op_aput_char FALLBACK
+    # op op_aput_short FALLBACK
+    # op op_iget FALLBACK
+    # op op_iget_wide FALLBACK
+    # op op_iget_object FALLBACK
+    # op op_iget_boolean FALLBACK
+    # op op_iget_byte FALLBACK
+    # op op_iget_char FALLBACK
+    # op op_iget_short FALLBACK
+    # op op_iput FALLBACK
+    # op op_iput_wide FALLBACK
+    # op op_iput_object FALLBACK
+    # op op_iput_boolean FALLBACK
+    # op op_iput_byte FALLBACK
+    # op op_iput_char FALLBACK
+    # op op_iput_short FALLBACK
+    # op op_sget FALLBACK
+    # op op_sget_wide FALLBACK
+    # op op_sget_object FALLBACK
+    # op op_sget_boolean FALLBACK
+    # op op_sget_byte FALLBACK
+    # op op_sget_char FALLBACK
+    # op op_sget_short FALLBACK
+    # op op_sput FALLBACK
+    # op op_sput_wide FALLBACK
+    # op op_sput_object FALLBACK
+    # op op_sput_boolean FALLBACK
+    # op op_sput_byte FALLBACK
+    # op op_sput_char FALLBACK
+    # op op_sput_short FALLBACK
+    # op op_invoke_virtual FALLBACK
+    # op op_invoke_super FALLBACK
+    # op op_invoke_direct FALLBACK
+    # op op_invoke_static FALLBACK
+    # op op_invoke_interface FALLBACK
+    # op op_return_void_no_barrier FALLBACK
+    # op op_invoke_virtual_range FALLBACK
+    # op op_invoke_super_range FALLBACK
+    # op op_invoke_direct_range FALLBACK
+    # op op_invoke_static_range FALLBACK
+    # op op_invoke_interface_range FALLBACK
+    # op op_unused_79 FALLBACK
+    # op op_unused_7a FALLBACK
+    # op op_neg_int FALLBACK
+    # op op_not_int FALLBACK
+    # op op_neg_long FALLBACK
+    # op op_not_long FALLBACK
+    # op op_neg_float FALLBACK
+    # op op_neg_double FALLBACK
+    # op op_int_to_long FALLBACK
+    # op op_int_to_float FALLBACK
+    # op op_int_to_double FALLBACK
+    # op op_long_to_int FALLBACK
+    # op op_long_to_float FALLBACK
+    # op op_long_to_double FALLBACK
+    # op op_float_to_int FALLBACK
+    # op op_float_to_long FALLBACK
+    # op op_float_to_double FALLBACK
+    # op op_double_to_int FALLBACK
+    # op op_double_to_long FALLBACK
+    # op op_double_to_float FALLBACK
+    # op op_int_to_byte FALLBACK
+    # op op_int_to_char FALLBACK
+    # op op_int_to_short FALLBACK
+    # op op_add_int FALLBACK
+    # op op_sub_int FALLBACK
+    # op op_mul_int FALLBACK
+    # op op_div_int FALLBACK
+    # op op_rem_int FALLBACK
+    # op op_and_int FALLBACK
+    # op op_or_int FALLBACK
+    # op op_xor_int FALLBACK
+    # op op_shl_int FALLBACK
+    # op op_shr_int FALLBACK
+    # op op_ushr_int FALLBACK
+    # op op_add_long FALLBACK
+    # op op_sub_long FALLBACK
+    # op op_mul_long FALLBACK
+    # op op_div_long FALLBACK
+    # op op_rem_long FALLBACK
+    # op op_and_long FALLBACK
+    # op op_or_long FALLBACK
+    # op op_xor_long FALLBACK
+    # op op_shl_long FALLBACK
+    # op op_shr_long FALLBACK
+    # op op_ushr_long FALLBACK
+    # op op_add_float FALLBACK
+    # op op_sub_float FALLBACK
+    # op op_mul_float FALLBACK
+    # op op_div_float FALLBACK
+    # op op_rem_float FALLBACK
+    # op op_add_double FALLBACK
+    # op op_sub_double FALLBACK
+    # op op_mul_double FALLBACK
+    # op op_div_double FALLBACK
+    # op op_rem_double FALLBACK
+    # op op_add_int_2addr FALLBACK
+    # op op_sub_int_2addr FALLBACK
+    # op op_mul_int_2addr FALLBACK
+    # op op_div_int_2addr FALLBACK
+    # op op_rem_int_2addr FALLBACK
+    # op op_and_int_2addr FALLBACK
+    # op op_or_int_2addr FALLBACK
+    # op op_xor_int_2addr FALLBACK
+    # op op_shl_int_2addr FALLBACK
+    # op op_shr_int_2addr FALLBACK
+    # op op_ushr_int_2addr FALLBACK
+    # op op_add_long_2addr FALLBACK
+    # op op_sub_long_2addr FALLBACK
+    # op op_mul_long_2addr FALLBACK
+    # op op_div_long_2addr FALLBACK
+    # op op_rem_long_2addr FALLBACK
+    # op op_and_long_2addr FALLBACK
+    # op op_or_long_2addr FALLBACK
+    # op op_xor_long_2addr FALLBACK
+    # op op_shl_long_2addr FALLBACK
+    # op op_shr_long_2addr FALLBACK
+    # op op_ushr_long_2addr FALLBACK
+    # op op_add_float_2addr FALLBACK
+    # op op_sub_float_2addr FALLBACK
+    # op op_mul_float_2addr FALLBACK
+    # op op_div_float_2addr FALLBACK
+    # op op_rem_float_2addr FALLBACK
+    # op op_add_double_2addr FALLBACK
+    # op op_sub_double_2addr FALLBACK
+    # op op_mul_double_2addr FALLBACK
+    # op op_div_double_2addr FALLBACK
+    # op op_rem_double_2addr FALLBACK
+    # op op_add_int_lit16 FALLBACK
+    # op op_rsub_int FALLBACK
+    # op op_mul_int_lit16 FALLBACK
+    # op op_div_int_lit16 FALLBACK
+    # op op_rem_int_lit16 FALLBACK
+    # op op_and_int_lit16 FALLBACK
+    # op op_or_int_lit16 FALLBACK
+    # op op_xor_int_lit16 FALLBACK
+    # op op_add_int_lit8 FALLBACK
+    # op op_rsub_int_lit8 FALLBACK
+    # op op_mul_int_lit8 FALLBACK
+    # op op_div_int_lit8 FALLBACK
+    # op op_rem_int_lit8 FALLBACK
+    # op op_and_int_lit8 FALLBACK
+    # op op_or_int_lit8 FALLBACK
+    # op op_xor_int_lit8 FALLBACK
+    # op op_shl_int_lit8 FALLBACK
+    # op op_shr_int_lit8 FALLBACK
+    # op op_ushr_int_lit8 FALLBACK
+    # op op_iget_quick FALLBACK
+    # op op_iget_wide_quick FALLBACK
+    # op op_iget_object_quick FALLBACK
+    # op op_iput_quick FALLBACK
+    # op op_iput_wide_quick FALLBACK
+    # op op_iput_object_quick FALLBACK
+    # op op_invoke_virtual_quick FALLBACK
+    # op op_invoke_virtual_range_quick FALLBACK
+    # op op_iput_boolean_quick FALLBACK
+    # op op_iput_byte_quick FALLBACK
+    # op op_iput_char_quick FALLBACK
+    # op op_iput_short_quick FALLBACK
+    # op op_iget_boolean_quick FALLBACK
+    # op op_iget_byte_quick FALLBACK
+    # op op_iget_char_quick FALLBACK
+    # op op_iget_short_quick FALLBACK
+    # op op_unused_f3 FALLBACK
+    # op op_unused_f4 FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
+    # op op_unused_fa FALLBACK
+    # op op_unused_fb FALLBACK
+    # op op_unused_fc FALLBACK
+    # op op_unused_fd FALLBACK
+    # op op_unused_fe FALLBACK
+    # op op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import mips64/footer.S
diff --git a/runtime/interpreter/mterp/config_x86 b/runtime/interpreter/mterp/config_x86
new file mode 100644
index 0000000..64d8ee8
--- /dev/null
+++ b/runtime/interpreter/mterp/config_x86
@@ -0,0 +1,302 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for X86
+#
+
+handler-style computed-goto
+handler-size 128
+
+function-type-format FUNCTION_TYPE(%s)
+function-size-format SIZE(%s,%s)
+global-name-format SYMBOL(%s)
+
+# source for alternate entry stub
+asm-alt-stub x86/alt_stub.S
+
+# file header and basic definitions
+import x86/header.S
+
+# arch-specific entry point to interpreter
+import x86/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub x86/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start x86
+    # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
+    # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
+
+    # op op_nop FALLBACK
+    # op op_move FALLBACK
+    # op op_move_from16 FALLBACK
+    # op op_move_16 FALLBACK
+    # op op_move_wide FALLBACK
+    # op op_move_wide_from16 FALLBACK
+    # op op_move_wide_16 FALLBACK
+    # op op_move_object FALLBACK
+    # op op_move_object_from16 FALLBACK
+    # op op_move_object_16 FALLBACK
+    # op op_move_result FALLBACK
+    # op op_move_result_wide FALLBACK
+    # op op_move_result_object FALLBACK
+    # op op_move_exception FALLBACK
+    # op op_return_void FALLBACK
+    # op op_return FALLBACK
+    # op op_return_wide FALLBACK
+    # op op_return_object FALLBACK
+    # op op_const_4 FALLBACK
+    # op op_const_16 FALLBACK
+    # op op_const FALLBACK
+    # op op_const_high16 FALLBACK
+    # op op_const_wide_16 FALLBACK
+    # op op_const_wide_32 FALLBACK
+    # op op_const_wide FALLBACK
+    # op op_const_wide_high16 FALLBACK
+    # op op_const_string FALLBACK
+    # op op_const_string_jumbo FALLBACK
+    # op op_const_class FALLBACK
+    # op op_monitor_enter FALLBACK
+    # op op_monitor_exit FALLBACK
+    # op op_check_cast FALLBACK
+    # op op_instance_of FALLBACK
+    # op op_array_length FALLBACK
+    # op op_new_instance FALLBACK
+    # op op_new_array FALLBACK
+    # op op_filled_new_array FALLBACK
+    # op op_filled_new_array_range FALLBACK
+    # op op_fill_array_data FALLBACK
+    # op op_throw FALLBACK
+    # op op_goto FALLBACK
+    # op op_goto_16 FALLBACK
+    # op op_goto_32 FALLBACK
+    # op op_packed_switch FALLBACK
+    # op op_sparse_switch FALLBACK
+    # op op_cmpl_float FALLBACK
+    # op op_cmpg_float FALLBACK
+    # op op_cmpl_double FALLBACK
+    # op op_cmpg_double FALLBACK
+    # op op_cmp_long FALLBACK
+    # op op_if_eq FALLBACK
+    # op op_if_ne FALLBACK
+    # op op_if_lt FALLBACK
+    # op op_if_ge FALLBACK
+    # op op_if_gt FALLBACK
+    # op op_if_le FALLBACK
+    # op op_if_eqz FALLBACK
+    # op op_if_nez FALLBACK
+    # op op_if_ltz FALLBACK
+    # op op_if_gez FALLBACK
+    # op op_if_gtz FALLBACK
+    # op op_if_lez FALLBACK
+    # op op_unused_3e FALLBACK
+    # op op_unused_3f FALLBACK
+    # op op_unused_40 FALLBACK
+    # op op_unused_41 FALLBACK
+    # op op_unused_42 FALLBACK
+    # op op_unused_43 FALLBACK
+    # op op_aget FALLBACK
+    # op op_aget_wide FALLBACK
+    # op op_aget_object FALLBACK
+    # op op_aget_boolean FALLBACK
+    # op op_aget_byte FALLBACK
+    # op op_aget_char FALLBACK
+    # op op_aget_short FALLBACK
+    # op op_aput FALLBACK
+    # op op_aput_wide FALLBACK
+    # op op_aput_object FALLBACK
+    # op op_aput_boolean FALLBACK
+    # op op_aput_byte FALLBACK
+    # op op_aput_char FALLBACK
+    # op op_aput_short FALLBACK
+    # op op_iget FALLBACK
+    # op op_iget_wide FALLBACK
+    # op op_iget_object FALLBACK
+    # op op_iget_boolean FALLBACK
+    # op op_iget_byte FALLBACK
+    # op op_iget_char FALLBACK
+    # op op_iget_short FALLBACK
+    # op op_iput FALLBACK
+    # op op_iput_wide FALLBACK
+    # op op_iput_object FALLBACK
+    # op op_iput_boolean FALLBACK
+    # op op_iput_byte FALLBACK
+    # op op_iput_char FALLBACK
+    # op op_iput_short FALLBACK
+    # op op_sget FALLBACK
+    # op op_sget_wide FALLBACK
+    # op op_sget_object FALLBACK
+    # op op_sget_boolean FALLBACK
+    # op op_sget_byte FALLBACK
+    # op op_sget_char FALLBACK
+    # op op_sget_short FALLBACK
+    # op op_sput FALLBACK
+    # op op_sput_wide FALLBACK
+    # op op_sput_object FALLBACK
+    # op op_sput_boolean FALLBACK
+    # op op_sput_byte FALLBACK
+    # op op_sput_char FALLBACK
+    # op op_sput_short FALLBACK
+    # op op_invoke_virtual FALLBACK
+    # op op_invoke_super FALLBACK
+    # op op_invoke_direct FALLBACK
+    # op op_invoke_static FALLBACK
+    # op op_invoke_interface FALLBACK
+    # op op_return_void_no_barrier FALLBACK
+    # op op_invoke_virtual_range FALLBACK
+    # op op_invoke_super_range FALLBACK
+    # op op_invoke_direct_range FALLBACK
+    # op op_invoke_static_range FALLBACK
+    # op op_invoke_interface_range FALLBACK
+    # op op_unused_79 FALLBACK
+    # op op_unused_7a FALLBACK
+    # op op_neg_int FALLBACK
+    # op op_not_int FALLBACK
+    # op op_neg_long FALLBACK
+    # op op_not_long FALLBACK
+    # op op_neg_float FALLBACK
+    # op op_neg_double FALLBACK
+    # op op_int_to_long FALLBACK
+    # op op_int_to_float FALLBACK
+    # op op_int_to_double FALLBACK
+    # op op_long_to_int FALLBACK
+    # op op_long_to_float FALLBACK
+    # op op_long_to_double FALLBACK
+    # op op_float_to_int FALLBACK
+    # op op_float_to_long FALLBACK
+    # op op_float_to_double FALLBACK
+    # op op_double_to_int FALLBACK
+    # op op_double_to_long FALLBACK
+    # op op_double_to_float FALLBACK
+    # op op_int_to_byte FALLBACK
+    # op op_int_to_char FALLBACK
+    # op op_int_to_short FALLBACK
+    # op op_add_int FALLBACK
+    # op op_sub_int FALLBACK
+    # op op_mul_int FALLBACK
+    # op op_div_int FALLBACK
+    # op op_rem_int FALLBACK
+    # op op_and_int FALLBACK
+    # op op_or_int FALLBACK
+    # op op_xor_int FALLBACK
+    # op op_shl_int FALLBACK
+    # op op_shr_int FALLBACK
+    # op op_ushr_int FALLBACK
+    # op op_add_long FALLBACK
+    # op op_sub_long FALLBACK
+    # op op_mul_long FALLBACK
+    # op op_div_long FALLBACK
+    # op op_rem_long FALLBACK
+    # op op_and_long FALLBACK
+    # op op_or_long FALLBACK
+    # op op_xor_long FALLBACK
+    # op op_shl_long FALLBACK
+    # op op_shr_long FALLBACK
+    # op op_ushr_long FALLBACK
+    # op op_add_float FALLBACK
+    # op op_sub_float FALLBACK
+    # op op_mul_float FALLBACK
+    # op op_div_float FALLBACK
+    # op op_rem_float FALLBACK
+    # op op_add_double FALLBACK
+    # op op_sub_double FALLBACK
+    # op op_mul_double FALLBACK
+    # op op_div_double FALLBACK
+    # op op_rem_double FALLBACK
+    # op op_add_int_2addr FALLBACK
+    # op op_sub_int_2addr FALLBACK
+    # op op_mul_int_2addr FALLBACK
+    # op op_div_int_2addr FALLBACK
+    # op op_rem_int_2addr FALLBACK
+    # op op_and_int_2addr FALLBACK
+    # op op_or_int_2addr FALLBACK
+    # op op_xor_int_2addr FALLBACK
+    # op op_shl_int_2addr FALLBACK
+    # op op_shr_int_2addr FALLBACK
+    # op op_ushr_int_2addr FALLBACK
+    # op op_add_long_2addr FALLBACK
+    # op op_sub_long_2addr FALLBACK
+    # op op_mul_long_2addr FALLBACK
+    # op op_div_long_2addr FALLBACK
+    # op op_rem_long_2addr FALLBACK
+    # op op_and_long_2addr FALLBACK
+    # op op_or_long_2addr FALLBACK
+    # op op_xor_long_2addr FALLBACK
+    # op op_shl_long_2addr FALLBACK
+    # op op_shr_long_2addr FALLBACK
+    # op op_ushr_long_2addr FALLBACK
+    # op op_add_float_2addr FALLBACK
+    # op op_sub_float_2addr FALLBACK
+    # op op_mul_float_2addr FALLBACK
+    # op op_div_float_2addr FALLBACK
+    # op op_rem_float_2addr FALLBACK
+    # op op_add_double_2addr FALLBACK
+    # op op_sub_double_2addr FALLBACK
+    # op op_mul_double_2addr FALLBACK
+    # op op_div_double_2addr FALLBACK
+    # op op_rem_double_2addr FALLBACK
+    # op op_add_int_lit16 FALLBACK
+    # op op_rsub_int FALLBACK
+    # op op_mul_int_lit16 FALLBACK
+    # op op_div_int_lit16 FALLBACK
+    # op op_rem_int_lit16 FALLBACK
+    # op op_and_int_lit16 FALLBACK
+    # op op_or_int_lit16 FALLBACK
+    # op op_xor_int_lit16 FALLBACK
+    # op op_add_int_lit8 FALLBACK
+    # op op_rsub_int_lit8 FALLBACK
+    # op op_mul_int_lit8 FALLBACK
+    # op op_div_int_lit8 FALLBACK
+    # op op_rem_int_lit8 FALLBACK
+    # op op_and_int_lit8 FALLBACK
+    # op op_or_int_lit8 FALLBACK
+    # op op_xor_int_lit8 FALLBACK
+    # op op_shl_int_lit8 FALLBACK
+    # op op_shr_int_lit8 FALLBACK
+    # op op_ushr_int_lit8 FALLBACK
+    # op op_iget_quick FALLBACK
+    # op op_iget_wide_quick FALLBACK
+    # op op_iget_object_quick FALLBACK
+    # op op_iput_quick FALLBACK
+    # op op_iput_wide_quick FALLBACK
+    # op op_iput_object_quick FALLBACK
+    # op op_invoke_virtual_quick FALLBACK
+    # op op_invoke_virtual_range_quick FALLBACK
+    # op op_iput_boolean_quick FALLBACK
+    # op op_iput_byte_quick FALLBACK
+    # op op_iput_char_quick FALLBACK
+    # op op_iput_short_quick FALLBACK
+    # op op_iget_boolean_quick FALLBACK
+    # op op_iget_byte_quick FALLBACK
+    # op op_iget_char_quick FALLBACK
+    # op op_iget_short_quick FALLBACK
+    # op op_unused_f3 FALLBACK
+    # op op_unused_f4 FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
+    # op op_unused_fa FALLBACK
+    # op op_unused_fb FALLBACK
+    # op op_unused_fc FALLBACK
+    # op op_unused_fd FALLBACK
+    # op op_unused_fe FALLBACK
+    # op op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import x86/footer.S
diff --git a/runtime/interpreter/mterp/config_x86_64 b/runtime/interpreter/mterp/config_x86_64
new file mode 100644
index 0000000..7c357db
--- /dev/null
+++ b/runtime/interpreter/mterp/config_x86_64
@@ -0,0 +1,302 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for X86_64
+#
+
+handler-style computed-goto
+handler-size 128
+
+function-type-format FUNCTION_TYPE(%s)
+function-size-format SIZE(%s,%s)
+global-name-format SYMBOL(%s)
+
+# source for alternate entry stub
+asm-alt-stub x86_64/alt_stub.S
+
+# file header and basic definitions
+import x86_64/header.S
+
+# arch-specific entry point to interpreter
+import x86_64/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub x86_64/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start x86_64
+    # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
+    # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
+
+    # op op_nop FALLBACK
+    # op op_move FALLBACK
+    # op op_move_from16 FALLBACK
+    # op op_move_16 FALLBACK
+    # op op_move_wide FALLBACK
+    # op op_move_wide_from16 FALLBACK
+    # op op_move_wide_16 FALLBACK
+    # op op_move_object FALLBACK
+    # op op_move_object_from16 FALLBACK
+    # op op_move_object_16 FALLBACK
+    # op op_move_result FALLBACK
+    # op op_move_result_wide FALLBACK
+    # op op_move_result_object FALLBACK
+    # op op_move_exception FALLBACK
+    # op op_return_void FALLBACK
+    # op op_return FALLBACK
+    # op op_return_wide FALLBACK
+    # op op_return_object FALLBACK
+    # op op_const_4 FALLBACK
+    # op op_const_16 FALLBACK
+    # op op_const FALLBACK
+    # op op_const_high16 FALLBACK
+    # op op_const_wide_16 FALLBACK
+    # op op_const_wide_32 FALLBACK
+    # op op_const_wide FALLBACK
+    # op op_const_wide_high16 FALLBACK
+    # op op_const_string FALLBACK
+    # op op_const_string_jumbo FALLBACK
+    # op op_const_class FALLBACK
+    # op op_monitor_enter FALLBACK
+    # op op_monitor_exit FALLBACK
+    # op op_check_cast FALLBACK
+    # op op_instance_of FALLBACK
+    # op op_array_length FALLBACK
+    # op op_new_instance FALLBACK
+    # op op_new_array FALLBACK
+    # op op_filled_new_array FALLBACK
+    # op op_filled_new_array_range FALLBACK
+    # op op_fill_array_data FALLBACK
+    # op op_throw FALLBACK
+    # op op_goto FALLBACK
+    # op op_goto_16 FALLBACK
+    # op op_goto_32 FALLBACK
+    # op op_packed_switch FALLBACK
+    # op op_sparse_switch FALLBACK
+    # op op_cmpl_float FALLBACK
+    # op op_cmpg_float FALLBACK
+    # op op_cmpl_double FALLBACK
+    # op op_cmpg_double FALLBACK
+    # op op_cmp_long FALLBACK
+    # op op_if_eq FALLBACK
+    # op op_if_ne FALLBACK
+    # op op_if_lt FALLBACK
+    # op op_if_ge FALLBACK
+    # op op_if_gt FALLBACK
+    # op op_if_le FALLBACK
+    # op op_if_eqz FALLBACK
+    # op op_if_nez FALLBACK
+    # op op_if_ltz FALLBACK
+    # op op_if_gez FALLBACK
+    # op op_if_gtz FALLBACK
+    # op op_if_lez FALLBACK
+    # op op_unused_3e FALLBACK
+    # op op_unused_3f FALLBACK
+    # op op_unused_40 FALLBACK
+    # op op_unused_41 FALLBACK
+    # op op_unused_42 FALLBACK
+    # op op_unused_43 FALLBACK
+    # op op_aget FALLBACK
+    # op op_aget_wide FALLBACK
+    # op op_aget_object FALLBACK
+    # op op_aget_boolean FALLBACK
+    # op op_aget_byte FALLBACK
+    # op op_aget_char FALLBACK
+    # op op_aget_short FALLBACK
+    # op op_aput FALLBACK
+    # op op_aput_wide FALLBACK
+    # op op_aput_object FALLBACK
+    # op op_aput_boolean FALLBACK
+    # op op_aput_byte FALLBACK
+    # op op_aput_char FALLBACK
+    # op op_aput_short FALLBACK
+    # op op_iget FALLBACK
+    # op op_iget_wide FALLBACK
+    # op op_iget_object FALLBACK
+    # op op_iget_boolean FALLBACK
+    # op op_iget_byte FALLBACK
+    # op op_iget_char FALLBACK
+    # op op_iget_short FALLBACK
+    # op op_iput FALLBACK
+    # op op_iput_wide FALLBACK
+    # op op_iput_object FALLBACK
+    # op op_iput_boolean FALLBACK
+    # op op_iput_byte FALLBACK
+    # op op_iput_char FALLBACK
+    # op op_iput_short FALLBACK
+    # op op_sget FALLBACK
+    # op op_sget_wide FALLBACK
+    # op op_sget_object FALLBACK
+    # op op_sget_boolean FALLBACK
+    # op op_sget_byte FALLBACK
+    # op op_sget_char FALLBACK
+    # op op_sget_short FALLBACK
+    # op op_sput FALLBACK
+    # op op_sput_wide FALLBACK
+    # op op_sput_object FALLBACK
+    # op op_sput_boolean FALLBACK
+    # op op_sput_byte FALLBACK
+    # op op_sput_char FALLBACK
+    # op op_sput_short FALLBACK
+    # op op_invoke_virtual FALLBACK
+    # op op_invoke_super FALLBACK
+    # op op_invoke_direct FALLBACK
+    # op op_invoke_static FALLBACK
+    # op op_invoke_interface FALLBACK
+    # op op_return_void_no_barrier FALLBACK
+    # op op_invoke_virtual_range FALLBACK
+    # op op_invoke_super_range FALLBACK
+    # op op_invoke_direct_range FALLBACK
+    # op op_invoke_static_range FALLBACK
+    # op op_invoke_interface_range FALLBACK
+    # op op_unused_79 FALLBACK
+    # op op_unused_7a FALLBACK
+    # op op_neg_int FALLBACK
+    # op op_not_int FALLBACK
+    # op op_neg_long FALLBACK
+    # op op_not_long FALLBACK
+    # op op_neg_float FALLBACK
+    # op op_neg_double FALLBACK
+    # op op_int_to_long FALLBACK
+    # op op_int_to_float FALLBACK
+    # op op_int_to_double FALLBACK
+    # op op_long_to_int FALLBACK
+    # op op_long_to_float FALLBACK
+    # op op_long_to_double FALLBACK
+    # op op_float_to_int FALLBACK
+    # op op_float_to_long FALLBACK
+    # op op_float_to_double FALLBACK
+    # op op_double_to_int FALLBACK
+    # op op_double_to_long FALLBACK
+    # op op_double_to_float FALLBACK
+    # op op_int_to_byte FALLBACK
+    # op op_int_to_char FALLBACK
+    # op op_int_to_short FALLBACK
+    # op op_add_int FALLBACK
+    # op op_sub_int FALLBACK
+    # op op_mul_int FALLBACK
+    # op op_div_int FALLBACK
+    # op op_rem_int FALLBACK
+    # op op_and_int FALLBACK
+    # op op_or_int FALLBACK
+    # op op_xor_int FALLBACK
+    # op op_shl_int FALLBACK
+    # op op_shr_int FALLBACK
+    # op op_ushr_int FALLBACK
+    # op op_add_long FALLBACK
+    # op op_sub_long FALLBACK
+    # op op_mul_long FALLBACK
+    # op op_div_long FALLBACK
+    # op op_rem_long FALLBACK
+    # op op_and_long FALLBACK
+    # op op_or_long FALLBACK
+    # op op_xor_long FALLBACK
+    # op op_shl_long FALLBACK
+    # op op_shr_long FALLBACK
+    # op op_ushr_long FALLBACK
+    # op op_add_float FALLBACK
+    # op op_sub_float FALLBACK
+    # op op_mul_float FALLBACK
+    # op op_div_float FALLBACK
+    # op op_rem_float FALLBACK
+    # op op_add_double FALLBACK
+    # op op_sub_double FALLBACK
+    # op op_mul_double FALLBACK
+    # op op_div_double FALLBACK
+    # op op_rem_double FALLBACK
+    # op op_add_int_2addr FALLBACK
+    # op op_sub_int_2addr FALLBACK
+    # op op_mul_int_2addr FALLBACK
+    # op op_div_int_2addr FALLBACK
+    # op op_rem_int_2addr FALLBACK
+    # op op_and_int_2addr FALLBACK
+    # op op_or_int_2addr FALLBACK
+    # op op_xor_int_2addr FALLBACK
+    # op op_shl_int_2addr FALLBACK
+    # op op_shr_int_2addr FALLBACK
+    # op op_ushr_int_2addr FALLBACK
+    # op op_add_long_2addr FALLBACK
+    # op op_sub_long_2addr FALLBACK
+    # op op_mul_long_2addr FALLBACK
+    # op op_div_long_2addr FALLBACK
+    # op op_rem_long_2addr FALLBACK
+    # op op_and_long_2addr FALLBACK
+    # op op_or_long_2addr FALLBACK
+    # op op_xor_long_2addr FALLBACK
+    # op op_shl_long_2addr FALLBACK
+    # op op_shr_long_2addr FALLBACK
+    # op op_ushr_long_2addr FALLBACK
+    # op op_add_float_2addr FALLBACK
+    # op op_sub_float_2addr FALLBACK
+    # op op_mul_float_2addr FALLBACK
+    # op op_div_float_2addr FALLBACK
+    # op op_rem_float_2addr FALLBACK
+    # op op_add_double_2addr FALLBACK
+    # op op_sub_double_2addr FALLBACK
+    # op op_mul_double_2addr FALLBACK
+    # op op_div_double_2addr FALLBACK
+    # op op_rem_double_2addr FALLBACK
+    # op op_add_int_lit16 FALLBACK
+    # op op_rsub_int FALLBACK
+    # op op_mul_int_lit16 FALLBACK
+    # op op_div_int_lit16 FALLBACK
+    # op op_rem_int_lit16 FALLBACK
+    # op op_and_int_lit16 FALLBACK
+    # op op_or_int_lit16 FALLBACK
+    # op op_xor_int_lit16 FALLBACK
+    # op op_add_int_lit8 FALLBACK
+    # op op_rsub_int_lit8 FALLBACK
+    # op op_mul_int_lit8 FALLBACK
+    # op op_div_int_lit8 FALLBACK
+    # op op_rem_int_lit8 FALLBACK
+    # op op_and_int_lit8 FALLBACK
+    # op op_or_int_lit8 FALLBACK
+    # op op_xor_int_lit8 FALLBACK
+    # op op_shl_int_lit8 FALLBACK
+    # op op_shr_int_lit8 FALLBACK
+    # op op_ushr_int_lit8 FALLBACK
+    # op op_iget_quick FALLBACK
+    # op op_iget_wide_quick FALLBACK
+    # op op_iget_object_quick FALLBACK
+    # op op_iput_quick FALLBACK
+    # op op_iput_wide_quick FALLBACK
+    # op op_iput_object_quick FALLBACK
+    # op op_invoke_virtual_quick FALLBACK
+    # op op_invoke_virtual_range_quick FALLBACK
+    # op op_iput_boolean_quick FALLBACK
+    # op op_iput_byte_quick FALLBACK
+    # op op_iput_char_quick FALLBACK
+    # op op_iput_short_quick FALLBACK
+    # op op_iget_boolean_quick FALLBACK
+    # op op_iget_byte_quick FALLBACK
+    # op op_iget_char_quick FALLBACK
+    # op op_iget_short_quick FALLBACK
+    # op op_unused_f3 FALLBACK
+    # op op_unused_f4 FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
+    # op op_unused_fa FALLBACK
+    # op op_unused_fb FALLBACK
+    # op op_unused_fc FALLBACK
+    # op op_unused_fd FALLBACK
+    # op op_unused_fe FALLBACK
+    # op op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import x86_64/footer.S
diff --git a/runtime/interpreter/mterp/gen_mterp.py b/runtime/interpreter/mterp/gen_mterp.py
new file mode 100755
index 0000000..5839b5f
--- /dev/null
+++ b/runtime/interpreter/mterp/gen_mterp.py
@@ -0,0 +1,630 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Using instructions from an architecture-specific config file, generate C
+# and assembly source files for the Dalvik interpreter.
+#
+
+import sys, string, re, time
+from string import Template
+
+interp_defs_file = "../../dex_instruction_list.h" # need opcode list
+kNumPackedOpcodes = 256
+
+splitops = False
+verbose = False
+handler_size_bits = -1000
+handler_size_bytes = -1000
+in_op_start = 0             # 0=not started, 1=started, 2=ended
+in_alt_op_start = 0         # 0=not started, 1=started, 2=ended
+default_op_dir = None
+default_alt_stub = None
+opcode_locations = {}
+alt_opcode_locations = {}
+asm_stub_text = []
+fallback_stub_text = []
+label_prefix = ".L"         # use ".L" to hide labels from gdb
+alt_label_prefix = ".L_ALT" # use ".L" to hide labels from gdb
+style = None                # interpreter style
+generate_alt_table = False
+function_type_format = ".type   %s, %%function"
+function_size_format = ".size   %s, .-%s"
+global_name_format = "%s"
+
+# Exception class.
+class DataParseError(SyntaxError):
+    "Failure when parsing data file"
+
+#
+# Set any omnipresent substitution values.
+#
+def getGlobalSubDict():
+    return { "handler_size_bits":handler_size_bits,
+             "handler_size_bytes":handler_size_bytes }
+
+#
+# Parse arch config file --
+# Set interpreter style.
+#
+def setHandlerStyle(tokens):
+    global style
+    if len(tokens) != 2:
+        raise DataParseError("handler-style requires one argument")
+    style = tokens[1]
+    if style != "computed-goto":
+        raise DataParseError("handler-style (%s) invalid" % style)
+
+#
+# Parse arch config file --
+# Set handler_size_bytes to the value of tokens[1], and handler_size_bits to
+# log2(handler_size_bytes).  Throws an exception if "bytes" is not 0 or
+# a power of two.
+#
+def setHandlerSize(tokens):
+    global handler_size_bits, handler_size_bytes
+    if style != "computed-goto":
+        print "Warning: handler-size valid only for computed-goto interpreters"
+    if len(tokens) != 2:
+        raise DataParseError("handler-size requires one argument")
+    if handler_size_bits != -1000:
+        raise DataParseError("handler-size may only be set once")
+
+    # compute log2(n), and make sure n is 0 or a power of 2
+    handler_size_bytes = bytes = int(tokens[1])
+    bits = -1
+    while bytes > 0:
+        bytes //= 2     # halve with truncating division
+        bits += 1
+
+    if handler_size_bytes == 0 or handler_size_bytes != (1 << bits):
+        raise DataParseError("handler-size (%d) must be power of 2" \
+                % orig_bytes)
+    handler_size_bits = bits
+
+#
+# Parse arch config file --
+# Copy a file in to asm output file.
+#
+def importFile(tokens):
+    if len(tokens) != 2:
+        raise DataParseError("import requires one argument")
+    source = tokens[1]
+    if source.endswith(".S"):
+        appendSourceFile(tokens[1], getGlobalSubDict(), asm_fp, None)
+    else:
+        raise DataParseError("don't know how to import %s (expecting .cpp/.S)"
+                % source)
+
+#
+# Parse arch config file --
+# Copy a file in to the C or asm output file.
+#
+def setAsmStub(tokens):
+    global asm_stub_text
+    if len(tokens) != 2:
+        raise DataParseError("import requires one argument")
+    try:
+        stub_fp = open(tokens[1])
+        asm_stub_text = stub_fp.readlines()
+    except IOError, err:
+        stub_fp.close()
+        raise DataParseError("unable to load asm-stub: %s" % str(err))
+    stub_fp.close()
+
+#
+# Parse arch config file --
+# Copy a file in to the C or asm output file.
+#
+def setFallbackStub(tokens):
+    global fallback_stub_text
+    if len(tokens) != 2:
+        raise DataParseError("import requires one argument")
+    try:
+        stub_fp = open(tokens[1])
+        fallback_stub_text = stub_fp.readlines()
+    except IOError, err:
+        stub_fp.close()
+        raise DataParseError("unable to load fallback-stub: %s" % str(err))
+    stub_fp.close()
+#
+# Parse arch config file --
+# Record location of default alt stub
+#
+def setAsmAltStub(tokens):
+    global default_alt_stub, generate_alt_table
+    if len(tokens) != 2:
+        raise DataParseError("import requires one argument")
+    default_alt_stub = tokens[1]
+    generate_alt_table = True
+#
+# Change the default function type format
+#
+def setFunctionTypeFormat(tokens):
+    global function_type_format
+    function_type_format = tokens[1]
+#
+# Change the default function size format
+#
+def setFunctionSizeFormat(tokens):
+    global function_size_format
+    function_size_format = tokens[1]
+#
+# Change the global name format
+#
+def setGlobalNameFormat(tokens):
+    global global_name_format
+    global_name_format = tokens[1]
+#
+# Parse arch config file --
+# Start of opcode list.
+#
+def opStart(tokens):
+    global in_op_start
+    global default_op_dir
+    if len(tokens) != 2:
+        raise DataParseError("opStart takes a directory name argument")
+    if in_op_start != 0:
+        raise DataParseError("opStart can only be specified once")
+    default_op_dir = tokens[1]
+    in_op_start = 1
+
+#
+# Parse arch config file --
+# Set location of a single alt opcode's source file.
+#
+def altEntry(tokens):
+    global generate_alt_table
+    if len(tokens) != 3:
+        raise DataParseError("alt requires exactly two arguments")
+    if in_op_start != 1:
+        raise DataParseError("alt statements must be between opStart/opEnd")
+    try:
+        index = opcodes.index(tokens[1])
+    except ValueError:
+        raise DataParseError("unknown opcode %s" % tokens[1])
+    if alt_opcode_locations.has_key(tokens[1]):
+        print "Note: alt overrides earlier %s (%s -> %s)" \
+                % (tokens[1], alt_opcode_locations[tokens[1]], tokens[2])
+    alt_opcode_locations[tokens[1]] = tokens[2]
+    generate_alt_table = True
+
+#
+# Parse arch config file --
+# Set location of a single opcode's source file.
+#
+def opEntry(tokens):
+    #global opcode_locations
+    if len(tokens) != 3:
+        raise DataParseError("op requires exactly two arguments")
+    if in_op_start != 1:
+        raise DataParseError("op statements must be between opStart/opEnd")
+    try:
+        index = opcodes.index(tokens[1])
+    except ValueError:
+        raise DataParseError("unknown opcode %s" % tokens[1])
+    if opcode_locations.has_key(tokens[1]):
+        print "Note: op overrides earlier %s (%s -> %s)" \
+                % (tokens[1], opcode_locations[tokens[1]], tokens[2])
+    opcode_locations[tokens[1]] = tokens[2]
+
+#
+# Parse arch config file --
+# End of opcode list; emit instruction blocks.
+#
+def opEnd(tokens):
+    global in_op_start
+    if len(tokens) != 1:
+        raise DataParseError("opEnd takes no arguments")
+    if in_op_start != 1:
+        raise DataParseError("opEnd must follow opStart, and only appear once")
+    in_op_start = 2
+
+    loadAndEmitOpcodes()
+    if splitops == False:
+        if generate_alt_table:
+            loadAndEmitAltOpcodes()
+
+def genaltop(tokens):
+    if in_op_start != 2:
+       raise DataParseError("alt-op can be specified only after op-end")
+    if len(tokens) != 1:
+        raise DataParseError("opEnd takes no arguments")
+    if generate_alt_table:
+        loadAndEmitAltOpcodes()
+
+#
+# Extract an ordered list of instructions from the VM sources.  We use the
+# "goto table" definition macro, which has exactly kNumPackedOpcodes
+# entries.
+#
+def getOpcodeList():
+    opcodes = []
+    opcode_fp = open(interp_defs_file)
+    opcode_re = re.compile(r"^\s*V\((....), (\w+),.*", re.DOTALL)
+    for line in opcode_fp:
+        match = opcode_re.match(line)
+        if not match:
+            continue
+        opcodes.append("op_" + match.group(2).lower())
+    opcode_fp.close()
+
+    if len(opcodes) != kNumPackedOpcodes:
+        print "ERROR: found %d opcodes in Interp.h (expected %d)" \
+                % (len(opcodes), kNumPackedOpcodes)
+        raise SyntaxError, "bad opcode count"
+    return opcodes
+
+def emitAlign():
+    if style == "computed-goto":
+        asm_fp.write("    .balign %d\n" % handler_size_bytes)
+
+#
+# Load and emit opcodes for all kNumPackedOpcodes instructions.
+#
+def loadAndEmitOpcodes():
+    sister_list = []
+    assert len(opcodes) == kNumPackedOpcodes
+    need_dummy_start = False
+    start_label = global_name_format % "artMterpAsmInstructionStart"
+    end_label = global_name_format % "artMterpAsmInstructionEnd"
+
+    # point MterpAsmInstructionStart at the first handler or stub
+    asm_fp.write("\n    .global %s\n" % start_label)
+    asm_fp.write("    " + (function_type_format % start_label) + "\n");
+    asm_fp.write("%s = " % start_label + label_prefix + "_op_nop\n")
+    asm_fp.write("    .text\n\n")
+
+    for i in xrange(kNumPackedOpcodes):
+        op = opcodes[i]
+
+        if opcode_locations.has_key(op):
+            location = opcode_locations[op]
+        else:
+            location = default_op_dir
+
+        if location == "FALLBACK":
+            emitFallback(i)
+        else:
+            loadAndEmitAsm(location, i, sister_list)
+
+    # For a 100% C implementation, there are no asm handlers or stubs.  We
+    # need to have the MterpAsmInstructionStart label point at op_nop, and it's
+    # too annoying to try to slide it in after the alignment psuedo-op, so
+    # we take the low road and just emit a dummy op_nop here.
+    if need_dummy_start:
+        emitAlign()
+        asm_fp.write(label_prefix + "_op_nop:   /* dummy */\n");
+
+    emitAlign()
+    asm_fp.write("    " + (function_size_format % (start_label, start_label)) + "\n")
+    asm_fp.write("    .global %s\n" % end_label)
+    asm_fp.write("%s:\n" % end_label)
+
+    if style == "computed-goto":
+        start_sister_label = global_name_format % "artMterpAsmSisterStart"
+        end_sister_label = global_name_format % "artMterpAsmSisterEnd"
+        emitSectionComment("Sister implementations", asm_fp)
+        asm_fp.write("    .global %s\n" % start_sister_label)
+        asm_fp.write("    " + (function_type_format % start_sister_label) + "\n");
+        asm_fp.write("    .text\n")
+        asm_fp.write("    .balign 4\n")
+        asm_fp.write("%s:\n" % start_sister_label)
+        asm_fp.writelines(sister_list)
+        asm_fp.write("\n    " + (function_size_format % (start_sister_label, start_sister_label)) + "\n")
+        asm_fp.write("    .global %s\n" % end_sister_label)
+        asm_fp.write("%s:\n\n" % end_sister_label)
+
+#
+# Load an alternate entry stub
+#
+def loadAndEmitAltStub(source, opindex):
+    op = opcodes[opindex]
+    if verbose:
+        print " alt emit %s --> stub" % source
+    dict = getGlobalSubDict()
+    dict.update({ "opcode":op, "opnum":opindex })
+
+    emitAsmHeader(asm_fp, dict, alt_label_prefix)
+    appendSourceFile(source, dict, asm_fp, None)
+
+#
+# Load and emit alternate opcodes for all kNumPackedOpcodes instructions.
+#
+def loadAndEmitAltOpcodes():
+    assert len(opcodes) == kNumPackedOpcodes
+    start_label = global_name_format % "artMterpAsmAltInstructionStart"
+    end_label = global_name_format % "artMterpAsmAltInstructionEnd"
+
+    # point MterpAsmInstructionStart at the first handler or stub
+    asm_fp.write("\n    .global %s\n" % start_label)
+    asm_fp.write("    " + (function_type_format % start_label) + "\n");
+    asm_fp.write("    .text\n\n")
+    asm_fp.write("%s = " % start_label + label_prefix + "_ALT_op_nop\n")
+
+    for i in xrange(kNumPackedOpcodes):
+        op = opcodes[i]
+        if alt_opcode_locations.has_key(op):
+            source = "%s/alt_%s.S" % (alt_opcode_locations[op], op)
+        else:
+            source = default_alt_stub
+        loadAndEmitAltStub(source, i)
+
+    emitAlign()
+    asm_fp.write("    " + (function_size_format % (start_label, start_label)) + "\n")
+    asm_fp.write("    .global %s\n" % end_label)
+    asm_fp.write("%s:\n" % end_label)
+
+#
+# Load an assembly fragment and emit it.
+#
+def loadAndEmitAsm(location, opindex, sister_list):
+    op = opcodes[opindex]
+    source = "%s/%s.S" % (location, op)
+    dict = getGlobalSubDict()
+    dict.update({ "opcode":op, "opnum":opindex })
+    if verbose:
+        print " emit %s --> asm" % source
+
+    emitAsmHeader(asm_fp, dict, label_prefix)
+    appendSourceFile(source, dict, asm_fp, sister_list)
+
+#
+# Emit fallback fragment
+#
+def emitFallback(opindex):
+    op = opcodes[opindex]
+    dict = getGlobalSubDict()
+    dict.update({ "opcode":op, "opnum":opindex })
+    emitAsmHeader(asm_fp, dict, label_prefix)
+    for line in fallback_stub_text:
+        asm_fp.write(line)
+    asm_fp.write("\n")
+
+#
+# Output the alignment directive and label for an assembly piece.
+#
+def emitAsmHeader(outfp, dict, prefix):
+    outfp.write("/* ------------------------------ */\n")
+    # The alignment directive ensures that the handler occupies
+    # at least the correct amount of space.  We don't try to deal
+    # with overflow here.
+    emitAlign()
+    # Emit a label so that gdb will say the right thing.  We prepend an
+    # underscore so the symbol name doesn't clash with the Opcode enum.
+    outfp.write(prefix + "_%(opcode)s: /* 0x%(opnum)02x */\n" % dict)
+
+#
+# Output a generic instruction stub that updates the "glue" struct and
+# calls the C implementation.
+#
+def emitAsmStub(outfp, dict):
+    emitAsmHeader(outfp, dict, label_prefix)
+    for line in asm_stub_text:
+        templ = Template(line)
+        outfp.write(templ.substitute(dict))
+
+#
+# Append the file specified by "source" to the open "outfp".  Each line will
+# be template-replaced using the substitution dictionary "dict".
+#
+# If the first line of the file starts with "%" it is taken as a directive.
+# A "%include" line contains a filename and, optionally, a Python-style
+# dictionary declaration with substitution strings.  (This is implemented
+# with recursion.)
+#
+# If "sister_list" is provided, and we find a line that contains only "&",
+# all subsequent lines from the file will be appended to sister_list instead
+# of copied to the output.
+#
+# This may modify "dict".
+#
+def appendSourceFile(source, dict, outfp, sister_list):
+    outfp.write("/* File: %s */\n" % source)
+    infp = open(source, "r")
+    in_sister = False
+    for line in infp:
+        if line.startswith("%include"):
+            # Parse the "include" line
+            tokens = line.strip().split(' ', 2)
+            if len(tokens) < 2:
+                raise DataParseError("malformed %%include in %s" % source)
+
+            alt_source = tokens[1].strip("\"")
+            if alt_source == source:
+                raise DataParseError("self-referential %%include in %s"
+                        % source)
+
+            new_dict = dict.copy()
+            if len(tokens) == 3:
+                new_dict.update(eval(tokens[2]))
+            #print " including src=%s dict=%s" % (alt_source, new_dict)
+            appendSourceFile(alt_source, new_dict, outfp, sister_list)
+            continue
+
+        elif line.startswith("%default"):
+            # copy keywords into dictionary
+            tokens = line.strip().split(' ', 1)
+            if len(tokens) < 2:
+                raise DataParseError("malformed %%default in %s" % source)
+            defaultValues = eval(tokens[1])
+            for entry in defaultValues:
+                dict.setdefault(entry, defaultValues[entry])
+            continue
+
+        elif line.startswith("%break") and sister_list != None:
+            # allow more than one %break, ignoring all following the first
+            if style == "computed-goto" and not in_sister:
+                in_sister = True
+                sister_list.append("\n/* continuation for %(opcode)s */\n"%dict)
+            continue
+
+        # perform keyword substitution if a dictionary was provided
+        if dict != None:
+            templ = Template(line)
+            try:
+                subline = templ.substitute(dict)
+            except KeyError, err:
+                raise DataParseError("keyword substitution failed in %s: %s"
+                        % (source, str(err)))
+            except:
+                print "ERROR: substitution failed: " + line
+                raise
+        else:
+            subline = line
+
+        # write output to appropriate file
+        if in_sister:
+            sister_list.append(subline)
+        else:
+            outfp.write(subline)
+    outfp.write("\n")
+    infp.close()
+
+#
+# Emit a C-style section header comment.
+#
+def emitSectionComment(str, fp):
+    equals = "========================================" \
+             "==================================="
+
+    fp.write("\n/*\n * %s\n *  %s\n * %s\n */\n" %
+        (equals, str, equals))
+
+
+#
+# ===========================================================================
+# "main" code
+#
+
+#
+# Check args.
+#
+if len(sys.argv) != 3:
+    print "Usage: %s target-arch output-dir" % sys.argv[0]
+    sys.exit(2)
+
+target_arch = sys.argv[1]
+output_dir = sys.argv[2]
+
+#
+# Extract opcode list.
+#
+opcodes = getOpcodeList()
+#for op in opcodes:
+#    print "  %s" % op
+
+#
+# Open config file.
+#
+try:
+    config_fp = open("config_%s" % target_arch)
+except:
+    print "Unable to open config file 'config_%s'" % target_arch
+    sys.exit(1)
+
+#
+# Open and prepare output files.
+#
+try:
+    asm_fp = open("%s/mterp_%s.S" % (output_dir, target_arch), "w")
+except:
+    print "Unable to open output files"
+    print "Make sure directory '%s' exists and existing files are writable" \
+            % output_dir
+    # Ideally we'd remove the files to avoid confusing "make", but if they
+    # failed to open we probably won't be able to remove them either.
+    sys.exit(1)
+
+print "Generating %s" % (asm_fp.name)
+
+file_header = """/*
+ * This file was generated automatically by gen-mterp.py for '%s'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+""" % (target_arch)
+
+asm_fp.write(file_header)
+
+#
+# Process the config file.
+#
+failed = False
+try:
+    for line in config_fp:
+        line = line.strip()         # remove CRLF, leading spaces
+        tokens = line.split(' ')    # tokenize
+        #print "%d: %s" % (len(tokens), tokens)
+        if len(tokens[0]) == 0:
+            #print "  blank"
+            pass
+        elif tokens[0][0] == '#':
+            #print "  comment"
+            pass
+        else:
+            if tokens[0] == "handler-size":
+                setHandlerSize(tokens)
+            elif tokens[0] == "import":
+                importFile(tokens)
+            elif tokens[0] == "asm-stub":
+                setAsmStub(tokens)
+            elif tokens[0] == "asm-alt-stub":
+                setAsmAltStub(tokens)
+            elif tokens[0] == "op-start":
+                opStart(tokens)
+            elif tokens[0] == "op-end":
+                opEnd(tokens)
+            elif tokens[0] == "alt":
+                altEntry(tokens)
+            elif tokens[0] == "op":
+                opEntry(tokens)
+            elif tokens[0] == "handler-style":
+                setHandlerStyle(tokens)
+            elif tokens[0] == "alt-ops":
+                genaltop(tokens)
+            elif tokens[0] == "split-ops":
+                splitops = True
+            elif tokens[0] == "fallback-stub":
+               setFallbackStub(tokens)
+            elif tokens[0] == "function-type-format":
+               setFunctionTypeFormat(tokens)
+            elif tokens[0] == "function-size-format":
+               setFunctionSizeFormat(tokens)
+            elif tokens[0] == "global-name-format":
+               setGlobalNameFormat(tokens)
+            else:
+                raise DataParseError, "unrecognized command '%s'" % tokens[0]
+            if style == None:
+                print "tokens[0] = %s" % tokens[0]
+                raise DataParseError, "handler-style must be first command"
+except DataParseError, err:
+    print "Failed: " + str(err)
+    # TODO: remove output files so "make" doesn't get confused
+    failed = True
+    asm_fp.close()
+    asm_fp = None
+
+config_fp.close()
+
+#
+# Done!
+#
+if asm_fp:
+    asm_fp.close()
+
+sys.exit(failed)
diff --git a/runtime/interpreter/mterp/mips/alt_stub.S b/runtime/interpreter/mterp/mips/alt_stub.S
new file mode 100644
index 0000000..de13313
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/alt_stub.S
@@ -0,0 +1,13 @@
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (${opnum} * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
diff --git a/runtime/interpreter/mterp/mips/bincmp.S b/runtime/interpreter/mterp/mips/bincmp.S
new file mode 100644
index 0000000..68df5c3
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/bincmp.S
@@ -0,0 +1,18 @@
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    GET_OPA4(a0)                           #  a0 <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    GET_VREG(a3, a1)                       #  a3 <- vB
+    GET_VREG(a0, a0)                       #  a0 <- vA
+    FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
+    b${condition} a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/binop.S b/runtime/interpreter/mterp/mips/binop.S
new file mode 100644
index 0000000..66627e2
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/binop.S
@@ -0,0 +1,33 @@
+%default {"preinstr":"", "result":"a0", "chkzero":"0"}
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG(a1, a3)                       #  a1 <- vCC
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    .if $chkzero
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    $preinstr                              #  optional op
+    $instr                                 #  $result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO($result, rOBJ, t0)       #  vAA <- $result
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/mips/binop2addr.S b/runtime/interpreter/mterp/mips/binop2addr.S
new file mode 100644
index 0000000..548cbcb
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/binop2addr.S
@@ -0,0 +1,29 @@
+%default {"preinstr":"", "result":"a0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a0, rOBJ)                     #  a0 <- vA
+    GET_VREG(a1, a3)                       #  a1 <- vB
+    .if $chkzero
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+    $preinstr                              #  optional op
+    $instr                                 #  $result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO($result, rOBJ, t0)       #  vAA <- $result
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/mips/binopLit16.S b/runtime/interpreter/mterp/mips/binopLit16.S
new file mode 100644
index 0000000..fc0c9ff
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/binopLit16.S
@@ -0,0 +1,30 @@
+%default {"preinstr":"", "result":"a0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    # binop/lit16 vA, vB,                  /* +CCCC */
+    FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
+    GET_OPB(a2)                            #  a2 <- B
+    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_VREG(a0, a2)                       #  a0 <- vB
+    and       rOBJ, rOBJ, 15
+    .if $chkzero
+    # cmp a1, 0; is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+    $preinstr                              #  optional op
+    $instr                                 #  $result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO($result, rOBJ, t0)       #  vAA <- $result
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/mips/binopLit8.S b/runtime/interpreter/mterp/mips/binopLit8.S
new file mode 100644
index 0000000..a591408
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/binopLit8.S
@@ -0,0 +1,31 @@
+%default {"preinstr":"", "result":"a0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    # binop/lit8 vAA, vBB,                 /* +CC */
+    FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a3, 255                  #  a2 <- BB
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    sra       a1, a3, 8                    #  a1 <- ssssssCC (sign extended)
+    .if $chkzero
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+    $preinstr                              #  optional op
+    $instr                                 #  $result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO($result, rOBJ, t0)       #  vAA <- $result
+    /* 10-12 instructions */
diff --git a/runtime/interpreter/mterp/mips/binopWide.S b/runtime/interpreter/mterp/mips/binopWide.S
new file mode 100644
index 0000000..608525b
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/binopWide.S
@@ -0,0 +1,35 @@
+%default {"preinstr":"", "result0":"a0", "result1":"a1", "chkzero":"0", "arg0":"a0", "arg1":"a1", "arg2":"a2", "arg3":"a3"}
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
+    EAS2(t1, rFP, a3)                      #  a3 <- &fp[CC]
+    LOAD64($arg0, $arg1, a2)               #  a0/a1 <- vBB/vBB+1
+    LOAD64($arg2, $arg3, t1)               #  a2/a3 <- vCC/vCC+1
+    .if $chkzero
+    or        t0, $arg2, $arg3             #  second arg (a2-a3) is zero?
+    beqz      t0, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+    $preinstr                              #  optional op
+    $instr                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_GOTO($result0, $result1, rOBJ, t0)   #  vAA/vAA+1 <- $result0/$result1
+    /* 14-17 instructions */
diff --git a/runtime/interpreter/mterp/mips/binopWide2addr.S b/runtime/interpreter/mterp/mips/binopWide2addr.S
new file mode 100644
index 0000000..cc92149
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/binopWide2addr.S
@@ -0,0 +1,33 @@
+%default {"preinstr":"", "result0":"a0", "result1":"a1", "chkzero":"0", "arg0":"a0", "arg1":"a1", "arg2":"a2", "arg3":"a3"}
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
+    EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
+    LOAD64($arg2, $arg3, a1)               #  a2/a3 <- vBB/vBB+1
+    LOAD64($arg0, $arg1, t0)               #  a0/a1 <- vAA/vAA+1
+    .if $chkzero
+    or        t0, $arg2, $arg3             #  second arg (a2-a3) is zero?
+    beqz      t0, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+    $preinstr                              #  optional op
+    $instr                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64($result0, $result1, rOBJ)   #  vAA/vAA+1 <- $result0/$result1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 12-15 instructions */
diff --git a/runtime/interpreter/mterp/mips/entry.S b/runtime/interpreter/mterp/mips/entry.S
new file mode 100644
index 0000000..c806a67
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/entry.S
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Interpreter entry point.
+ */
+
+    .text
+    .align 2
+    .global ExecuteMterpImpl
+    .ent    ExecuteMterpImpl
+    .frame sp, STACK_SIZE, ra
+/*
+ * On entry:
+ *  a0  Thread* self
+ *  a1  code_item
+ *  a2  ShadowFrame
+ *  a3  JValue* result_register
+ *
+ */
+
+ExecuteMterpImpl:
+    .set noreorder
+    .cpload t9
+    .set reorder
+/* Save to the stack. Frame size = STACK_SIZE */
+    STACK_STORE_FULL()
+/* This directive will make sure all subsequent jal restore gp at a known offset */
+    .cprestore STACK_OFFSET_GP
+
+    /* Remember the return register */
+    sw      a3, SHADOWFRAME_RESULT_REGISTER_OFFSET(a2)
+
+    /* Remember the code_item */
+    sw      a1, SHADOWFRAME_CODE_ITEM_OFFSET(a2)
+
+    /* set up "named" registers */
+    move    rSELF, a0
+    lw      a0, SHADOWFRAME_NUMBER_OF_VREGS_OFFSET(a2)
+    addu    rFP, a2, SHADOWFRAME_VREGS_OFFSET     # point to vregs.
+    EAS2(rREFS, rFP, a0)                          # point to reference array in shadow frame
+    lw      a0, SHADOWFRAME_DEX_PC_OFFSET(a2)     # Get starting dex_pc
+    addu    rPC, a1, CODEITEM_INSNS_OFFSET        # Point to base of insns[]
+    EAS1(rPC, rPC, a0)                            # Create direct pointer to 1st dex opcode
+
+    EXPORT_PC()
+
+    /* Starting ibase */
+    lw      rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
+
+    /* Set up for backwards branches & osr profiling */
+    lw      a0, OFF_FP_METHOD(rFP)
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpSetUpHotnessCountdown)        # (method, shadow_frame)
+    move    rPROFILE, v0                   # Starting hotness countdown to rPROFILE
+
+    /* start executing the instruction at rPC */
+    FETCH_INST()                           # load rINST from rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+    /* NOTE: no fallthrough */
diff --git a/runtime/interpreter/mterp/mips/fallback.S b/runtime/interpreter/mterp/mips/fallback.S
new file mode 100644
index 0000000..82cbc63
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/fallback.S
@@ -0,0 +1,2 @@
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
diff --git a/runtime/interpreter/mterp/mips/fbinop.S b/runtime/interpreter/mterp/mips/fbinop.S
new file mode 100644
index 0000000..d0d39ae
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/fbinop.S
@@ -0,0 +1,19 @@
+    /*
+     * Generic 32-bit binary float operation.
+     *
+     * For: add-fp, sub-fp, mul-fp, div-fp, rem-fp
+     */
+
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  s5 <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG_F(fa1, a3)                    #  a1 <- vCC
+    GET_VREG_F(fa0, a2)                    #  a0 <- vBB
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    $instr                                 #  f0 = result
+    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/fbinop2addr.S b/runtime/interpreter/mterp/mips/fbinop2addr.S
new file mode 100644
index 0000000..ccb67b1
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/fbinop2addr.S
@@ -0,0 +1,19 @@
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
+     * div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG_F(fa0, rOBJ)
+    GET_VREG_F(fa1, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+    $instr
+    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/fbinopWide.S b/runtime/interpreter/mterp/mips/fbinopWide.S
new file mode 100644
index 0000000..3be9325
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/fbinopWide.S
@@ -0,0 +1,28 @@
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be an MIPS instruction or a function call.
+     *
+     * for: add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  s5 <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
+    EAS2(t1, rFP, a3)                      #  a3 <- &fp[CC]
+    LOAD64_F(fa0, fa0f, a2)
+    LOAD64_F(fa1, fa1f, t1)
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    $instr
+    SET_VREG64_F(fv0, fv0f, rOBJ)
+    b         .L${opcode}_finish
+%break
+
+.L${opcode}_finish:
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/fbinopWide2addr.S b/runtime/interpreter/mterp/mips/fbinopWide2addr.S
new file mode 100644
index 0000000..8541f11
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/fbinopWide2addr.S
@@ -0,0 +1,21 @@
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be an MIPS instruction or a function call.
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *  div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
+    EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
+    LOAD64_F(fa0, fa0f, t0)
+    LOAD64_F(fa1, fa1f, a1)
+
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    $instr
+    SET_VREG64_F(fv0, fv0f, rOBJ)
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/footer.S b/runtime/interpreter/mterp/mips/footer.S
new file mode 100644
index 0000000..1363751
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/footer.S
@@ -0,0 +1,289 @@
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align 2
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+common_errDivideByZero:
+    EXPORT_PC()
+#if MTERP_LOGGING
+    move  a0, rSELF
+    addu  a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpLogDivideByZeroException)
+#endif
+    b MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC()
+#if MTERP_LOGGING
+    move  a0, rSELF
+    addu  a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpLogArrayIndexException)
+#endif
+    b MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC()
+#if MTERP_LOGGING
+    move  a0, rSELF
+    addu  a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpLogNegativeArraySizeException)
+#endif
+    b MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC()
+#if MTERP_LOGGING
+    move  a0, rSELF
+    addu  a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpLogNoSuchMethodException)
+#endif
+    b MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC()
+#if MTERP_LOGGING
+    move  a0, rSELF
+    addu  a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpLogNullObjectException)
+#endif
+    b MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC()
+#if MTERP_LOGGING
+    move  a0, rSELF
+    addu  a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpLogExceptionThrownException)
+#endif
+    b MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC()
+#if MTERP_LOGGING
+    move  a0, rSELF
+    addu  a1, rFP, OFF_FP_SHADOWFRAME
+    lw    a2, THREAD_FLAGS_OFFSET(rSELF)
+    JAL(MterpLogSuspendFallback)
+#endif
+    b MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    lw      a0, THREAD_EXCEPTION_OFFSET(rSELF)
+    beqz    a0, MterpFallback          # If exception, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpHandleException)                    # (self, shadow_frame)
+    beqz    v0, MterpExceptionReturn             # no local catch, back to caller.
+    lw      a0, OFF_FP_CODE_ITEM(rFP)
+    lw      a1, OFF_FP_DEX_PC(rFP)
+    lw      rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
+    addu    rPC, a0, CODEITEM_INSNS_OFFSET
+    sll     a1, a1, 1
+    addu    rPC, rPC, a1                         # generate new dex_pc_ptr
+    /* Do we need to switch interpreters? */
+    JAL(MterpShouldSwitchInterpreters)
+    bnez    v0, MterpFallback
+    /* resume execution at catch block */
+    EXPORT_PC()
+    FETCH_INST()
+    GET_INST_OPCODE(t0)
+    GOTO_OPCODE(t0)
+    /* NOTE: no fallthrough */
+
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ */
+MterpCommonTakenBranchNoFlags:
+    bgtz    rINST, .L_forward_branch    # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    li      t0, JIT_CHECK_OSR
+    beq     rPROFILE, t0, .L_osr_check
+    blt     rPROFILE, t0, .L_resume_backward_branch
+    subu    rPROFILE, 1
+    beqz    rPROFILE, .L_add_batch      # counted down to zero - report
+.L_resume_backward_branch:
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    REFRESH_IBASE()
+    addu    a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB(a2)           # update rPC, load rINST
+    and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    bnez    ra, .L_suspend_request_pending
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+.L_suspend_request_pending:
+    EXPORT_PC()
+    move    a0, rSELF
+    JAL(MterpSuspendCheck)              # (self)
+    bnez    v0, MterpFallback
+    REFRESH_IBASE()                     # might have changed during suspend
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+.L_no_count_backwards:
+    li      t0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    bne     rPROFILE, t0, .L_resume_backward_branch
+.L_osr_check:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    li      t0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    beq     rPROFILE, t0, .L_check_osr_forward
+.L_resume_forward_branch:
+    add     a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB(a2)           # update rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+.L_check_osr_forward:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    lw      a0, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    JAL(MterpAddHotnessBatch)           # (method, shadow_frame, self)
+    move    rPROFILE, v0                # restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    li      a2, 2
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST(2)
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    JAL(MterpLogOSR)
+#endif
+    li      v0, 1                       # Signal normal return
+    b       MterpDone
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC()
+#if MTERP_LOGGING
+    move  a0, rSELF
+    addu  a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpLogFallback)
+#endif
+MterpCommonFallback:
+    move    v0, zero                    # signal retry with reference interpreter.
+    b       MterpDone
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and LR.  Here we restore SP, restore the registers, and then restore
+ * LR to PC.
+ *
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    li      v0, 1                       # signal return to caller.
+    b       MterpDone
+MterpReturn:
+    lw      a2, OFF_FP_RESULT_REGISTER(rFP)
+    sw      v0, 0(a2)
+    sw      v1, 4(a2)
+    li      v0, 1                       # signal return to caller.
+MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    blez    rPROFILE, .L_pop_and_return # if > 0, we may have some counts to report.
+
+MterpProfileActive:
+    move    rINST, v0                   # stash return value
+    /* Report cached hotness counts */
+    lw      a0, OFF_FP_METHOD(rFP)
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rSELF
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    JAL(MterpAddHotnessBatch)           # (method, shadow_frame, self)
+    move    v0, rINST                   # restore return value
+
+.L_pop_and_return:
+/* Restore from the stack and return. Frame size = STACK_SIZE */
+    STACK_LOAD_FULL()
+    jalr    zero, ra
+
+    .end ExecuteMterpImpl
diff --git a/runtime/interpreter/mterp/mips/funop.S b/runtime/interpreter/mterp/mips/funop.S
new file mode 100644
index 0000000..bfb9346
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/funop.S
@@ -0,0 +1,18 @@
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0".
+     * This could be a MIPS instruction or a function call.
+     *
+     * for: int-to-float, float-to-int
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(rOBJ)                         #  t0 <- A+
+    GET_VREG_F(fa0, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    $instr
+
+.L${opcode}_set_vreg_f:
+    SET_VREG_F(fv0, rOBJ)
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GOTO_OPCODE(t1)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/funopWide.S b/runtime/interpreter/mterp/mips/funopWide.S
new file mode 100644
index 0000000..3d4cf22
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/funopWide.S
@@ -0,0 +1,22 @@
+%default {"preinstr":"", "ld_arg":"LOAD64_F(fa0, fa0f, a3)", "st_result":"SET_VREG64_F(fv0, fv0f, rOBJ)"}
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0/a1".
+     * This could be a MIPS instruction or a function call.
+     *
+     * long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    $ld_arg
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    $preinstr                              #  optional op
+    $instr                                 #  a0/a1 <- op, a2-a3 changed
+
+.L${opcode}_set_vreg:
+    $st_result                             #  vAA <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 12-13 instructions */
diff --git a/runtime/interpreter/mterp/mips/funopWider.S b/runtime/interpreter/mterp/mips/funopWider.S
new file mode 100644
index 0000000..efb85f3
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/funopWider.S
@@ -0,0 +1,19 @@
+%default {"st_result":"SET_VREG64_F(fv0, fv0f, rOBJ)"}
+    /*
+     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op a0", where
+     * "result" is a 64-bit quantity in a0/a1.
+     *
+     * For: int-to-double, float-to-long, float-to-double
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG_F(fa0, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    $instr
+
+.L${opcode}_set_vreg:
+    $st_result                             #  vA/vA+1 <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/header.S b/runtime/interpreter/mterp/mips/header.S
new file mode 100644
index 0000000..a3a6744
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/header.S
@@ -0,0 +1,492 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+#include "asm_support.h"
+
+#if (__mips==32) && (__mips_isa_rev>=2)
+#define MIPS32REVGE2    /* mips32r2 and greater */
+#if (__mips==32) && (__mips_isa_rev>=5)
+#define FPU64           /* 64 bit FPU */
+#if (__mips==32) && (__mips_isa_rev>=6)
+#define MIPS32REVGE6    /* mips32r6 and greater */
+#endif
+#endif
+#endif
+
+/* MIPS definitions and declarations
+
+   reg  nick      purpose
+   s0   rPC       interpreted program counter, used for fetching instructions
+   s1   rFP       interpreted frame pointer, used for accessing locals and args
+   s2   rSELF     self (Thread) pointer
+   s3   rIBASE    interpreted instruction base pointer, used for computed goto
+   s4   rINST     first 16-bit code unit of current instruction
+   s5   rOBJ      object pointer
+   s6   rREFS     base of object references in shadow frame (ideally, we'll get rid of this later).
+   s7   rTEMP     used as temp storage that can survive a function call
+   s8   rPROFILE  branch profiling countdown
+
+*/
+
+/* single-purpose registers, given names for clarity */
+#define rPC s0
+#define rFP s1
+#define rSELF s2
+#define rIBASE s3
+#define rINST s4
+#define rOBJ s5
+#define rREFS s6
+#define rTEMP s7
+#define rPROFILE s8
+
+#define rARG0 a0
+#define rARG1 a1
+#define rARG2 a2
+#define rARG3 a3
+#define rRESULT0 v0
+#define rRESULT1 v1
+
+/* GP register definitions */
+#define zero    $$0      /* always zero */
+#define AT      $$at     /* assembler temp */
+#define v0      $$2      /* return value */
+#define v1      $$3
+#define a0      $$4      /* argument registers */
+#define a1      $$5
+#define a2      $$6
+#define a3      $$7
+#define t0      $$8      /* temp registers (not saved across subroutine calls) */
+#define t1      $$9
+#define t2      $$10
+#define t3      $$11
+#define t4      $$12
+#define t5      $$13
+#define t6      $$14
+#define t7      $$15
+#define ta0     $$12     /* alias */
+#define ta1     $$13
+#define ta2     $$14
+#define ta3     $$15
+#define s0      $$16     /* saved across subroutine calls (callee saved) */
+#define s1      $$17
+#define s2      $$18
+#define s3      $$19
+#define s4      $$20
+#define s5      $$21
+#define s6      $$22
+#define s7      $$23
+#define t8      $$24     /* two more temp registers */
+#define t9      $$25
+#define k0      $$26     /* kernel temporary */
+#define k1      $$27
+#define gp      $$28     /* global pointer */
+#define sp      $$29     /* stack pointer */
+#define s8      $$30     /* one more callee saved */
+#define ra      $$31     /* return address */
+
+/* FP register definitions */
+#define fv0    $$f0
+#define fv0f   $$f1
+#define fv1    $$f2
+#define fv1f   $$f3
+#define fa0    $$f12
+#define fa0f   $$f13
+#define fa1    $$f14
+#define fa1f   $$f15
+#define ft0    $$f4
+#define ft0f   $$f5
+#define ft1    $$f6
+#define ft1f   $$f7
+#define ft2    $$f8
+#define ft2f   $$f9
+#define ft3    $$f10
+#define ft3f   $$f11
+#define ft4    $$f16
+#define ft4f   $$f17
+#define ft5    $$f18
+#define ft5f   $$f19
+#define fs0    $$f20
+#define fs0f   $$f21
+#define fs1    $$f22
+#define fs1f   $$f23
+#define fs2    $$f24
+#define fs2f   $$f25
+#define fs3    $$f26
+#define fs3f   $$f27
+#define fs4    $$f28
+#define fs4f   $$f29
+#define fs5    $$f30
+#define fs5f   $$f31
+
+#ifndef MIPS32REVGE6
+#define fcc0   $$fcc0
+#define fcc1   $$fcc1
+#endif
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
+
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+#define EXPORT_PC() \
+    sw        rPC, OFF_FP_DEX_PC_PTR(rFP)
+
+#define EXPORT_DEX_PC(tmp) \
+    lw   tmp, OFF_FP_CODE_ITEM(rFP) \
+    sw   rPC, OFF_FP_DEX_PC_PTR(rFP) \
+    addu tmp, CODEITEM_INSNS_OFFSET \
+    subu tmp, rPC, tmp \
+    sra  tmp, tmp, 1 \
+    sw   tmp, OFF_FP_DEX_PC(rFP)
+
+/*
+ * Fetch the next instruction from rPC into rINST.  Does not advance rPC.
+ */
+#define FETCH_INST() lhu rINST, (rPC)
+
+/*
+ * Fetch the next instruction from the specified offset.  Advances rPC
+ * to point to the next instruction.  "_count" is in 16-bit code units.
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss.  (This also implies that it must come after
+ * EXPORT_PC().)
+ */
+#define FETCH_ADVANCE_INST(_count) lhu rINST, ((_count)*2)(rPC); \
+    addu      rPC, rPC, ((_count) * 2)
+
+/*
+ * The operation performed here is similar to FETCH_ADVANCE_INST, except the
+ * src and dest registers are parameterized (not hard-wired to rPC and rINST).
+ */
+#define PREFETCH_ADVANCE_INST(_dreg, _sreg, _count) \
+    lhu       _dreg, ((_count)*2)(_sreg) ;            \
+    addu      _sreg, _sreg, (_count)*2
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update rPC.  Used to load
+ * rINST ahead of possible exception point.  Be sure to manually advance rPC
+ * later.
+ */
+#define PREFETCH_INST(_count) lhu rINST, ((_count)*2)(rPC)
+
+/* Advance rPC by some number of code units. */
+#define ADVANCE(_count) addu rPC, rPC, ((_count) * 2)
+
+/*
+ * Fetch the next instruction from an offset specified by rd.  Updates
+ * rPC to point to the next instruction.  "rd" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.
+ */
+#define FETCH_ADVANCE_INST_RB(rd) addu rPC, rPC, rd; \
+    lhu       rINST, (rPC)
+
+/*
+ * Fetch a half-word code unit from an offset past the current PC.  The
+ * "_count" value is in 16-bit code units.  Does not advance rPC.
+ *
+ * The "_S" variant works the same but treats the value as signed.
+ */
+#define FETCH(rd, _count) lhu rd, ((_count) * 2)(rPC)
+#define FETCH_S(rd, _count) lh rd, ((_count) * 2)(rPC)
+
+/*
+ * Fetch one byte from an offset past the current PC.  Pass in the same
+ * "_count" as you would for FETCH, and an additional 0/1 indicating which
+ * byte of the halfword you want (lo/hi).
+ */
+#define FETCH_B(rd, _count, _byte) lbu rd, ((_count) * 2 + _byte)(rPC)
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+#define GET_INST_OPCODE(rd) and rd, rINST, 0xFF
+
+/*
+ * Put the prefetched instruction's opcode field into the specified register.
+ */
+#define GET_PREFETCHED_OPCODE(dreg, sreg)   andi     dreg, sreg, 255
+
+/*
+ * Begin executing the opcode in rd.
+ */
+#define GOTO_OPCODE(rd) sll rd, rd, ${handler_size_bits}; \
+    addu      rd, rIBASE, rd; \
+    jalr      zero, rd
+
+#define GOTO_OPCODE_BASE(_base, rd)  sll rd, rd, ${handler_size_bits}; \
+    addu      rd, _base, rd; \
+    jalr      zero, rd
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+#define GET_VREG(rd, rix) LOAD_eas2(rd, rFP, rix)
+
+#define GET_VREG_F(rd, rix) EAS2(AT, rFP, rix); \
+    .set noat; l.s rd, (AT); .set at
+
+#define SET_VREG(rd, rix) .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8)
+
+#define SET_VREG64(rlo, rhi, rix) .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rlo, 0(t8); \
+    sw        rhi, 4(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8)
+
+#ifdef FPU64
+#define SET_VREG64_F(rlo, rhi, rix) .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rREFS, AT; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8); \
+    addu      t8, rFP, AT; \
+    mfhc1     AT, rlo; \
+    sw        AT, 4(t8); \
+    .set at; \
+    s.s       rlo, 0(t8)
+#else
+#define SET_VREG64_F(rlo, rhi, rix) .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    s.s       rlo, 0(t8); \
+    s.s       rhi, 4(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8)
+#endif
+
+#define SET_VREG_OBJECT(rd, rix) .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        rd, 0(t8)
+
+/* Combination of the SET_VREG and GOTO_OPCODE functions to save 1 instruction */
+#define SET_VREG_GOTO(rd, rix, dst) .set noreorder; \
+    sll       dst, dst, ${handler_size_bits}; \
+    addu      dst, rIBASE, dst; \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+
+/* Combination of the SET_VREG64 and GOTO_OPCODE functions to save 1 instruction */
+#define SET_VREG64_GOTO(rlo, rhi, rix, dst) .set noreorder; \
+    sll       dst, dst, ${handler_size_bits}; \
+    addu      dst, rIBASE, dst; \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rlo, 0(t8); \
+    sw        rhi, 4(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+
+#define SET_VREG_F(rd, rix) .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    s.s       rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8)
+
+#define GET_OPA(rd) srl rd, rINST, 8
+#ifdef MIPS32REVGE2
+#define GET_OPA4(rd) ext rd, rINST, 8, 4
+#else
+#define GET_OPA4(rd) GET_OPA(rd); and rd, 0xf
+#endif
+#define GET_OPB(rd) srl rd, rINST, 12
+
+/*
+ * Form an Effective Address rd = rbase + roff<<n;
+ * Uses reg AT
+ */
+#define EASN(rd, rbase, roff, rshift) .set noat; \
+    sll       AT, roff, rshift; \
+    addu      rd, rbase, AT; \
+    .set at
+
+#define EAS1(rd, rbase, roff) EASN(rd, rbase, roff, 1)
+#define EAS2(rd, rbase, roff) EASN(rd, rbase, roff, 2)
+#define EAS3(rd, rbase, roff) EASN(rd, rbase, roff, 3)
+#define EAS4(rd, rbase, roff) EASN(rd, rbase, roff, 4)
+
+/*
+ * Form an Effective Shift Right rd = rbase + roff>>n;
+ * Uses reg AT
+ */
+#define ESRN(rd, rbase, roff, rshift) .set noat; \
+    srl       AT, roff, rshift; \
+    addu      rd, rbase, AT; \
+    .set at
+
+#define LOAD_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \
+    .set noat; lw rd, 0(AT); .set at
+
+#define STORE_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \
+    .set noat; sw rd, 0(AT); .set at
+
+#define LOAD_RB_OFF(rd, rbase, off) lw rd, off(rbase)
+#define STORE_RB_OFF(rd, rbase, off) sw rd, off(rbase)
+
+#define STORE64_off(rlo, rhi, rbase, off) sw rlo, off(rbase); \
+    sw        rhi, (off+4)(rbase)
+#define LOAD64_off(rlo, rhi, rbase, off) lw rlo, off(rbase); \
+    lw        rhi, (off+4)(rbase)
+
+#define STORE64(rlo, rhi, rbase) STORE64_off(rlo, rhi, rbase, 0)
+#define LOAD64(rlo, rhi, rbase) LOAD64_off(rlo, rhi, rbase, 0)
+
+#ifdef FPU64
+#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \
+    .set noat; \
+    mfhc1     AT, rlo; \
+    sw        AT, (off+4)(rbase); \
+    .set at
+#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \
+    .set noat; \
+    lw        AT, (off+4)(rbase); \
+    mthc1     AT, rlo; \
+    .set at
+#else
+#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \
+    s.s       rhi, (off+4)(rbase)
+#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \
+    l.s       rhi, (off+4)(rbase)
+#endif
+
+#define STORE64_F(rlo, rhi, rbase) STORE64_off_F(rlo, rhi, rbase, 0)
+#define LOAD64_F(rlo, rhi, rbase) LOAD64_off_F(rlo, rhi, rbase, 0)
+
+
+#define LOAD_base_offMirrorArray_length(rd, rbase) LOAD_RB_OFF(rd, rbase, MIRROR_ARRAY_LENGTH_OFFSET)
+
+#define STACK_STORE(rd, off) sw rd, off(sp)
+#define STACK_LOAD(rd, off) lw rd, off(sp)
+#define CREATE_STACK(n) subu sp, sp, n
+#define DELETE_STACK(n) addu sp, sp, n
+
+#define LOAD_ADDR(dest, addr) la dest, addr
+#define LOAD_IMM(dest, imm) li dest, imm
+#define MOVE_REG(dest, src) move dest, src
+#define STACK_SIZE 128
+
+#define STACK_OFFSET_ARG04 16
+#define STACK_OFFSET_ARG05 20
+#define STACK_OFFSET_ARG06 24
+#define STACK_OFFSET_ARG07 28
+#define STACK_OFFSET_GP    84
+
+#define JAL(n) jal n
+#define BAL(n) bal n
+
+/*
+ * FP register usage restrictions:
+ * 1) We don't use the callee save FP registers so we don't have to save them.
+ * 2) We don't use the odd FP registers so we can share code with mips32r6.
+ */
+#define STACK_STORE_FULL() CREATE_STACK(STACK_SIZE); \
+    STACK_STORE(ra, 124); \
+    STACK_STORE(s8, 120); \
+    STACK_STORE(s0, 116); \
+    STACK_STORE(s1, 112); \
+    STACK_STORE(s2, 108); \
+    STACK_STORE(s3, 104); \
+    STACK_STORE(s4, 100); \
+    STACK_STORE(s5, 96); \
+    STACK_STORE(s6, 92); \
+    STACK_STORE(s7, 88);
+
+#define STACK_LOAD_FULL() STACK_LOAD(gp, STACK_OFFSET_GP); \
+    STACK_LOAD(s7, 88); \
+    STACK_LOAD(s6, 92); \
+    STACK_LOAD(s5, 96); \
+    STACK_LOAD(s4, 100); \
+    STACK_LOAD(s3, 104); \
+    STACK_LOAD(s2, 108); \
+    STACK_LOAD(s1, 112); \
+    STACK_LOAD(s0, 116); \
+    STACK_LOAD(s8, 120); \
+    STACK_LOAD(ra, 124); \
+    DELETE_STACK(STACK_SIZE)
+
+#define REFRESH_IBASE() \
+    lw        rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
diff --git a/runtime/interpreter/mterp/mips/invoke.S b/runtime/interpreter/mterp/mips/invoke.S
new file mode 100644
index 0000000..bcd3a57
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/invoke.S
@@ -0,0 +1,19 @@
+%default { "helper":"UndefinedInvokeHandler" }
+    /*
+     * Generic invoke handler wrapper.
+     */
+    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
+    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    .extern $helper
+    EXPORT_PC()
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    JAL($helper)
+    beqz    v0, MterpException
+    FETCH_ADVANCE_INST(3)
+    JAL(MterpShouldSwitchInterpreters)
+    bnez    v0, MterpFallback
+    GET_INST_OPCODE(t0)
+    GOTO_OPCODE(t0)
diff --git a/runtime/interpreter/mterp/mips/op_add_double.S b/runtime/interpreter/mterp/mips/op_add_double.S
new file mode 100644
index 0000000..12ef0cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_add_double.S
@@ -0,0 +1 @@
+%include "mips/fbinopWide.S" {"instr":"add.d fv0, fa0, fa1"}
diff --git a/runtime/interpreter/mterp/mips/op_add_double_2addr.S b/runtime/interpreter/mterp/mips/op_add_double_2addr.S
new file mode 100644
index 0000000..c57add5
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_add_double_2addr.S
@@ -0,0 +1 @@
+%include "mips/fbinopWide2addr.S" {"instr":"add.d fv0, fa0, fa1"}
diff --git a/runtime/interpreter/mterp/mips/op_add_float.S b/runtime/interpreter/mterp/mips/op_add_float.S
new file mode 100644
index 0000000..6a46cf0
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_add_float.S
@@ -0,0 +1 @@
+%include "mips/fbinop.S" {"instr":"add.s fv0, fa0, fa1"}
diff --git a/runtime/interpreter/mterp/mips/op_add_float_2addr.S b/runtime/interpreter/mterp/mips/op_add_float_2addr.S
new file mode 100644
index 0000000..6ab5cc1
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_add_float_2addr.S
@@ -0,0 +1 @@
+%include "mips/fbinop2addr.S" {"instr":"add.s fv0, fa0, fa1"}
diff --git a/runtime/interpreter/mterp/mips/op_add_int.S b/runtime/interpreter/mterp/mips/op_add_int.S
new file mode 100644
index 0000000..53a0cb1
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_add_int.S
@@ -0,0 +1 @@
+%include "mips/binop.S" {"instr":"addu a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_add_int_2addr.S b/runtime/interpreter/mterp/mips/op_add_int_2addr.S
new file mode 100644
index 0000000..ddd9214
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_add_int_2addr.S
@@ -0,0 +1 @@
+%include "mips/binop2addr.S" {"instr":"addu a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_add_int_lit16.S b/runtime/interpreter/mterp/mips/op_add_int_lit16.S
new file mode 100644
index 0000000..05535c1
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_add_int_lit16.S
@@ -0,0 +1 @@
+%include "mips/binopLit16.S" {"instr":"addu a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_add_int_lit8.S b/runtime/interpreter/mterp/mips/op_add_int_lit8.S
new file mode 100644
index 0000000..fd021b3
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_add_int_lit8.S
@@ -0,0 +1 @@
+%include "mips/binopLit8.S" {"instr":"addu a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_add_long.S b/runtime/interpreter/mterp/mips/op_add_long.S
new file mode 100644
index 0000000..faacc6a
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_add_long.S
@@ -0,0 +1,9 @@
+/*
+ *  The compiler generates the following sequence for
+ *  [v1 v0] =  [a1 a0] + [a3 a2];
+ *    addu v0,a2,a0
+ *    addu a1,a3,a1
+ *    sltu v1,v0,a2
+ *    addu v1,v1,a1
+ */
+%include "mips/binopWide.S" { "result0":"v0", "result1":"v1", "preinstr":"addu v0, a2, a0", "instr":"addu a1, a3, a1; sltu v1, v0, a2; addu v1, v1, a1" }
diff --git a/runtime/interpreter/mterp/mips/op_add_long_2addr.S b/runtime/interpreter/mterp/mips/op_add_long_2addr.S
new file mode 100644
index 0000000..bf827c1
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_add_long_2addr.S
@@ -0,0 +1,4 @@
+/*
+ * See op_add_long.S for details
+ */
+%include "mips/binopWide2addr.S" { "result0":"v0", "result1":"v1", "preinstr":"addu v0, a2, a0", "instr":"addu a1, a3, a1; sltu v1, v0, a2; addu v1, v1, a1" }
diff --git a/runtime/interpreter/mterp/mips/op_aget.S b/runtime/interpreter/mterp/mips/op_aget.S
new file mode 100644
index 0000000..8aa8992
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_aget.S
@@ -0,0 +1,32 @@
+%default { "load":"lw", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B(a2, 1, 0)                      #  a2 <- BB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    FETCH_B(a3, 1, 1)                      #  a3 <- CC
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    # null array object?
+    beqz      a0, common_errNullObject     #  yes, bail
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
+    .if $shift
+    EASN(a0, a0, a1, $shift)               #  a0 <- arrayObj + index*width
+    .else
+    addu      a0, a0, a1
+    .endif
+    # a1 >= a3; compare unsigned index
+    bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    $load a2, $data_offset(a0)             #  a2 <- vBB[vCC]
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a2, rOBJ, t0)            #  vAA <- a2
diff --git a/runtime/interpreter/mterp/mips/op_aget_boolean.S b/runtime/interpreter/mterp/mips/op_aget_boolean.S
new file mode 100644
index 0000000..59f7f82
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_aget_boolean.S
@@ -0,0 +1 @@
+%include "mips/op_aget.S" { "load":"lbu", "shift":"0", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/mips/op_aget_byte.S b/runtime/interpreter/mterp/mips/op_aget_byte.S
new file mode 100644
index 0000000..11038fa
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_aget_byte.S
@@ -0,0 +1 @@
+%include "mips/op_aget.S" { "load":"lb", "shift":"0", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/mips/op_aget_char.S b/runtime/interpreter/mterp/mips/op_aget_char.S
new file mode 100644
index 0000000..96f2ab6
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_aget_char.S
@@ -0,0 +1 @@
+%include "mips/op_aget.S" { "load":"lhu", "shift":"1", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/mips/op_aget_object.S b/runtime/interpreter/mterp/mips/op_aget_object.S
new file mode 100644
index 0000000..e3ab9d8
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_aget_object.S
@@ -0,0 +1,20 @@
+    /*
+     * Array object get.  vAA <- vBB[vCC].
+     *
+     * for: aget-object
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B(a2, 1, 0)                      #  a2 <- BB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    FETCH_B(a3, 1, 1)                      #  a3 <- CC
+    EXPORT_PC()
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    JAL(artAGetObjectFromMterp)            #  v0 <- GetObj(array, index)
+    lw   a1, THREAD_EXCEPTION_OFFSET(rSELF)
+    PREFETCH_INST(2)                       #  load rINST
+    bnez a1, MterpException
+    SET_VREG_OBJECT(v0, rOBJ)              #  vAA <- v0
+    ADVANCE(2)                             #  advance rPC
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_aget_short.S b/runtime/interpreter/mterp/mips/op_aget_short.S
new file mode 100644
index 0000000..cd7f7bf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_aget_short.S
@@ -0,0 +1 @@
+%include "mips/op_aget.S" { "load":"lh", "shift":"1", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/mips/op_aget_wide.S b/runtime/interpreter/mterp/mips/op_aget_wide.S
new file mode 100644
index 0000000..08822f5
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_aget_wide.S
@@ -0,0 +1,22 @@
+    /*
+     * Array get, 64 bits.  vAA <- vBB[vCC].
+     *
+     * Arrays of long/double are 64-bit aligned.
+     */
+    /* aget-wide vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    # null array object?
+    beqz      a0, common_errNullObject     #  yes, bail
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
+    EAS3(a0, a0, a1)                       #  a0 <- arrayObj + index*width
+    bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    LOAD64_off(a2, a3, a0, MIRROR_WIDE_ARRAY_DATA_OFFSET)
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_GOTO(a2, a3, rOBJ, t0)      #  vAA/vAA+1 <- a2/a3
diff --git a/runtime/interpreter/mterp/mips/op_and_int.S b/runtime/interpreter/mterp/mips/op_and_int.S
new file mode 100644
index 0000000..98fe4af
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_and_int.S
@@ -0,0 +1 @@
+%include "mips/binop.S" {"instr":"and a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_and_int_2addr.S b/runtime/interpreter/mterp/mips/op_and_int_2addr.S
new file mode 100644
index 0000000..7f90ed4
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_and_int_2addr.S
@@ -0,0 +1 @@
+%include "mips/binop2addr.S" {"instr":"and a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_and_int_lit16.S b/runtime/interpreter/mterp/mips/op_and_int_lit16.S
new file mode 100644
index 0000000..e46f23b
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_and_int_lit16.S
@@ -0,0 +1 @@
+%include "mips/binopLit16.S" {"instr":"and a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_and_int_lit8.S b/runtime/interpreter/mterp/mips/op_and_int_lit8.S
new file mode 100644
index 0000000..3332883
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_and_int_lit8.S
@@ -0,0 +1 @@
+%include "mips/binopLit8.S" {"instr":"and a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_and_long.S b/runtime/interpreter/mterp/mips/op_and_long.S
new file mode 100644
index 0000000..a98a6df
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_and_long.S
@@ -0,0 +1 @@
+%include "mips/binopWide.S" {"preinstr":"and a0, a0, a2", "instr":"and a1, a1, a3"}
diff --git a/runtime/interpreter/mterp/mips/op_and_long_2addr.S b/runtime/interpreter/mterp/mips/op_and_long_2addr.S
new file mode 100644
index 0000000..350c044
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_and_long_2addr.S
@@ -0,0 +1 @@
+%include "mips/binopWide2addr.S" {"preinstr":"and a0, a0, a2", "instr":"and a1, a1, a3"}
diff --git a/runtime/interpreter/mterp/mips/op_aput.S b/runtime/interpreter/mterp/mips/op_aput.S
new file mode 100644
index 0000000..53d6ae0
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_aput.S
@@ -0,0 +1,30 @@
+%default { "store":"sw", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B(a2, 1, 0)                      #  a2 <- BB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    FETCH_B(a3, 1, 1)                      #  a3 <- CC
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    # null array object?
+    beqz      a0, common_errNullObject     #  yes, bail
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
+    .if $shift
+    EASN(a0, a0, a1, $shift)               #  a0 <- arrayObj + index*width
+    .else
+    addu      a0, a0, a1
+    .endif
+    bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_VREG(a2, rOBJ)                     #  a2 <- vAA
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    $store a2, $data_offset(a0)            #  vBB[vCC] <- a2
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_aput_boolean.S b/runtime/interpreter/mterp/mips/op_aput_boolean.S
new file mode 100644
index 0000000..9cae5ef
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_aput_boolean.S
@@ -0,0 +1 @@
+%include "mips/op_aput.S" { "store":"sb", "shift":"0", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/mips/op_aput_byte.S b/runtime/interpreter/mterp/mips/op_aput_byte.S
new file mode 100644
index 0000000..3bbd12c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_aput_byte.S
@@ -0,0 +1 @@
+%include "mips/op_aput.S" { "store":"sb", "shift":"0", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/mips/op_aput_char.S b/runtime/interpreter/mterp/mips/op_aput_char.S
new file mode 100644
index 0000000..ae69717
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_aput_char.S
@@ -0,0 +1 @@
+%include "mips/op_aput.S" { "store":"sh", "shift":"1", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/mips/op_aput_object.S b/runtime/interpreter/mterp/mips/op_aput_object.S
new file mode 100644
index 0000000..55b13b1
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_aput_object.S
@@ -0,0 +1,14 @@
+    /*
+     * Store an object into an array.  vBB[vCC] <- vAA.
+     *
+     */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC()
+    addu   a0, rFP, OFF_FP_SHADOWFRAME
+    move   a1, rPC
+    move   a2, rINST
+    JAL(MterpAputObject)
+    beqz   v0, MterpPossibleException
+    FETCH_ADVANCE_INST(2)               # advance rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_aput_short.S b/runtime/interpreter/mterp/mips/op_aput_short.S
new file mode 100644
index 0000000..9586259
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_aput_short.S
@@ -0,0 +1 @@
+%include "mips/op_aput.S" { "store":"sh", "shift":"1", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/mips/op_aput_wide.S b/runtime/interpreter/mterp/mips/op_aput_wide.S
new file mode 100644
index 0000000..ef99261
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_aput_wide.S
@@ -0,0 +1,25 @@
+    /*
+     * Array put, 64 bits.  vBB[vCC] <- vAA.
+     *
+     * Arrays of long/double are 64-bit aligned, so it's okay to use STRD.
+     */
+    /* aput-wide vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(t0)                            #  t0 <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    # null array object?
+    beqz      a0, common_errNullObject     #  yes, bail
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
+    EAS3(a0, a0, a1)                       #  a0 <- arrayObj + index*width
+    EAS2(rOBJ, rFP, t0)                    #  rOBJ <- &fp[AA]
+    # compare unsigned index, length
+    bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    LOAD64(a2, a3, rOBJ)                   #  a2/a3 <- vAA/vAA+1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    STORE64_off(a2, a3, a0, MIRROR_WIDE_ARRAY_DATA_OFFSET) #  a2/a3 <- vBB[vCC]
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_array_length.S b/runtime/interpreter/mterp/mips/op_array_length.S
new file mode 100644
index 0000000..2b4a86f
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_array_length.S
@@ -0,0 +1,12 @@
+    /*
+     * Return the length of an array.
+     */
+    GET_OPB(a1)                            #  a1 <- B
+    GET_OPA4(a2)                           #  a2 <- A+
+    GET_VREG(a0, a1)                       #  a0 <- vB (object ref)
+    # is object null?
+    beqz      a0, common_errNullObject     #  yup, fail
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- array length
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a3, a2, t0)              #  vA <- length
diff --git a/runtime/interpreter/mterp/mips/op_check_cast.S b/runtime/interpreter/mterp/mips/op_check_cast.S
new file mode 100644
index 0000000..9a6cefa
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_check_cast.S
@@ -0,0 +1,16 @@
+    /*
+     * Check to see if a cast from one class to another is allowed.
+     */
+    # check-cast vAA, class                /* BBBB */
+    EXPORT_PC()
+    FETCH(a0, 1)                           #  a0 <- BBBB
+    GET_OPA(a1)                            #  a1 <- AA
+    EAS2(a1, rFP, a1)                      #  a1 <- &object
+    lw     a2, OFF_FP_METHOD(rFP)          #  a2 <- method
+    move   a3, rSELF                       #  a3 <- self
+    JAL(MterpCheckCast)                    #  v0 <- CheckCast(index, &obj, method, self)
+    PREFETCH_INST(2)
+    bnez   v0, MterpPossibleException
+    ADVANCE(2)
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_cmp_long.S b/runtime/interpreter/mterp/mips/op_cmp_long.S
new file mode 100644
index 0000000..44806c3
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_cmp_long.S
@@ -0,0 +1,34 @@
+    /*
+     * Compare two 64-bit values
+     *    x = y     return  0
+     *    x < y     return -1
+     *    x > y     return  1
+     *
+     * I think I can improve on the ARM code by the following observation
+     *    slt   t0,  x.hi, y.hi;        # (x.hi < y.hi) ? 1:0
+     *    sgt   t1,  x.hi, y.hi;        # (y.hi > x.hi) ? 1:0
+     *    subu  v0, t0, t1              # v0= -1:1:0 for [ < > = ]
+     */
+    /* cmp-long vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[CC]
+    LOAD64(a0, a1, a2)                     #  a0/a1 <- vBB/vBB+1
+    LOAD64(a2, a3, a3)                     #  a2/a3 <- vCC/vCC+1
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    slt       t0, a1, a3                   #  compare hi
+    sgt       t1, a1, a3
+    subu      v0, t1, t0                   #  v0 <- (-1, 1, 0)
+    bnez      v0, .L${opcode}_finish
+    # at this point x.hi==y.hi
+    sltu      t0, a0, a2                   #  compare lo
+    sgtu      t1, a0, a2
+    subu      v0, t1, t0                   #  v0 <- (-1, 1, 0) for [< > =]
+
+.L${opcode}_finish:
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(v0, rOBJ, t0)            #  vAA <- v0
diff --git a/runtime/interpreter/mterp/mips/op_cmpg_double.S b/runtime/interpreter/mterp/mips/op_cmpg_double.S
new file mode 100644
index 0000000..e7965a7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_cmpg_double.S
@@ -0,0 +1 @@
+%include "mips/op_cmpl_double.S" { "naninst":"li rTEMP, 1" }
diff --git a/runtime/interpreter/mterp/mips/op_cmpg_float.S b/runtime/interpreter/mterp/mips/op_cmpg_float.S
new file mode 100644
index 0000000..53519a6
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_cmpg_float.S
@@ -0,0 +1 @@
+%include "mips/op_cmpl_float.S" { "naninst":"li rTEMP, 1" }
diff --git a/runtime/interpreter/mterp/mips/op_cmpl_double.S b/runtime/interpreter/mterp/mips/op_cmpl_double.S
new file mode 100644
index 0000000..5a47fd7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_cmpl_double.S
@@ -0,0 +1,54 @@
+%default { "naninst":"li rTEMP, -1" }
+    /*
+     * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
+     * into the destination register (rTEMP) based on the comparison results.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * See op_cmpl_float for more details.
+     *
+     * For: cmpl-double, cmpg-double
+     */
+    /* op vAA, vBB, vCC */
+
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    and       rOBJ, a0, 255                #  s5 <- BB
+    srl       t0, a0, 8                    #  t0 <- CC
+    EAS2(rOBJ, rFP, rOBJ)                  #  s5 <- &fp[BB]
+    EAS2(t0, rFP, t0)                      #  t0 <- &fp[CC]
+    LOAD64_F(ft0, ft0f, rOBJ)
+    LOAD64_F(ft1, ft1f, t0)
+#ifdef MIPS32REVGE6
+    cmp.ult.d ft2, ft0, ft1
+    li        rTEMP, -1
+    bc1nez    ft2, .L${opcode}_finish
+    cmp.ult.d ft2, ft1, ft0
+    li        rTEMP, 1
+    bc1nez    ft2, .L${opcode}_finish
+    cmp.eq.d  ft2, ft0, ft1
+    li        rTEMP, 0
+    bc1nez    ft2, .L${opcode}_finish
+    b         .L${opcode}_nan
+#else
+    c.olt.d   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, .L${opcode}_finish
+    c.olt.d   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, .L${opcode}_finish
+    c.eq.d    fcc0, ft0, ft1
+    li        rTEMP, 0
+    bc1t      fcc0, .L${opcode}_finish
+    b         .L${opcode}_nan
+#endif
+%break
+
+.L${opcode}_nan:
+    $naninst
+
+.L${opcode}_finish:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
diff --git a/runtime/interpreter/mterp/mips/op_cmpl_float.S b/runtime/interpreter/mterp/mips/op_cmpl_float.S
new file mode 100644
index 0000000..cfd87ee
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_cmpl_float.S
@@ -0,0 +1,61 @@
+%default { "naninst":"li rTEMP, -1" }
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register rTEMP based on the results of the comparison.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * The operation we're implementing is:
+     *   if (x == y)
+     *     return 0;
+     *   else if (x < y)
+     *     return -1;
+     *   else if (x > y)
+     *     return 1;
+     *   else
+     *     return {-1 or 1};  // one or both operands was NaN
+     *
+     * for: cmpl-float, cmpg-float
+     */
+    /* op vAA, vBB, vCC */
+
+    /* "clasic" form */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8
+    GET_VREG_F(ft0, a2)
+    GET_VREG_F(ft1, a3)
+#ifdef MIPS32REVGE6
+    cmp.ult.s ft2, ft0, ft1               # Is ft0 < ft1
+    li        rTEMP, -1
+    bc1nez    ft2, .L${opcode}_finish
+    cmp.ult.s ft2, ft1, ft0
+    li        rTEMP, 1
+    bc1nez    ft2, .L${opcode}_finish
+    cmp.eq.s  ft2, ft0, ft1
+    li        rTEMP, 0
+    bc1nez    ft2, .L${opcode}_finish
+    b         .L${opcode}_nan
+#else
+    c.olt.s   fcc0, ft0, ft1               # Is ft0 < ft1
+    li        rTEMP, -1
+    bc1t      fcc0, .L${opcode}_finish
+    c.olt.s   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, .L${opcode}_finish
+    c.eq.s    fcc0, ft0, ft1
+    li        rTEMP, 0
+    bc1t      fcc0, .L${opcode}_finish
+    b         .L${opcode}_nan
+#endif
+%break
+
+.L${opcode}_nan:
+    $naninst
+
+.L${opcode}_finish:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
diff --git a/runtime/interpreter/mterp/mips/op_const.S b/runtime/interpreter/mterp/mips/op_const.S
new file mode 100644
index 0000000..c505761
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_const.S
@@ -0,0 +1,9 @@
+    # const vAA,                           /* +BBBBbbbb */
+    GET_OPA(a3)                            #  a3 <- AA
+    FETCH(a0, 1)                           #  a0 <- bbbb (low)
+    FETCH(a1, 2)                           #  a1 <- BBBB (high)
+    FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
+    sll       a1, a1, 16
+    or        a0, a1, a0                   #  a0 <- BBBBbbbb
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, a3, t0)              #  vAA <- a0
diff --git a/runtime/interpreter/mterp/mips/op_const_16.S b/runtime/interpreter/mterp/mips/op_const_16.S
new file mode 100644
index 0000000..5e47633
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_const_16.S
@@ -0,0 +1,6 @@
+    # const/16 vAA,                        /* +BBBB */
+    FETCH_S(a0, 1)                         #  a0 <- ssssBBBB (sign-extended)
+    GET_OPA(a3)                            #  a3 <- AA
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, a3, t0)              #  vAA <- a0
diff --git a/runtime/interpreter/mterp/mips/op_const_4.S b/runtime/interpreter/mterp/mips/op_const_4.S
new file mode 100644
index 0000000..8b662f9
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_const_4.S
@@ -0,0 +1,8 @@
+    # const/4 vA,                          /* +B */
+    sll       a1, rINST, 16                #  a1 <- Bxxx0000
+    GET_OPA(a0)                            #  a0 <- A+
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    sra       a1, a1, 28                   #  a1 <- sssssssB (sign-extended)
+    and       a0, a0, 15
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a1, a0, t0)              #  fp[A] <- a1
diff --git a/runtime/interpreter/mterp/mips/op_const_class.S b/runtime/interpreter/mterp/mips/op_const_class.S
new file mode 100644
index 0000000..7202b11
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_const_class.S
@@ -0,0 +1,12 @@
+    # const/class vAA, Class               /* BBBB */
+    EXPORT_PC()
+    FETCH(a0, 1)                        # a0 <- BBBB
+    GET_OPA(a1)                         # a1 <- AA
+    addu   a2, rFP, OFF_FP_SHADOWFRAME  # a2 <- shadow frame
+    move   a3, rSELF
+    JAL(MterpConstClass)
+    PREFETCH_INST(2)                    # load rINST
+    bnez   v0, MterpPossibleException
+    ADVANCE(2)                          # advance rPC
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_const_high16.S b/runtime/interpreter/mterp/mips/op_const_high16.S
new file mode 100644
index 0000000..36c1c35
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_const_high16.S
@@ -0,0 +1,7 @@
+    # const/high16 vAA,                    /* +BBBB0000 */
+    FETCH(a0, 1)                           #  a0 <- 0000BBBB (zero-extended)
+    GET_OPA(a3)                            #  a3 <- AA
+    sll       a0, a0, 16                   #  a0 <- BBBB0000
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, a3, t0)              #  vAA <- a0
diff --git a/runtime/interpreter/mterp/mips/op_const_string.S b/runtime/interpreter/mterp/mips/op_const_string.S
new file mode 100644
index 0000000..d8eeb46
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_const_string.S
@@ -0,0 +1,12 @@
+    # const/string vAA, String             /* BBBB */
+    EXPORT_PC()
+    FETCH(a0, 1)                        # a0 <- BBBB
+    GET_OPA(a1)                         # a1 <- AA
+    addu   a2, rFP, OFF_FP_SHADOWFRAME  # a2 <- shadow frame
+    move   a3, rSELF
+    JAL(MterpConstString)               # v0 <- Mterp(index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST(2)                    # load rINST
+    bnez   v0, MterpPossibleException
+    ADVANCE(2)                          # advance rPC
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_const_string_jumbo.S b/runtime/interpreter/mterp/mips/op_const_string_jumbo.S
new file mode 100644
index 0000000..d732ca1
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_const_string_jumbo.S
@@ -0,0 +1,15 @@
+    # const/string vAA, String          /* BBBBBBBB */
+    EXPORT_PC()
+    FETCH(a0, 1)                        # a0 <- bbbb (low)
+    FETCH(a2, 2)                        # a2 <- BBBB (high)
+    GET_OPA(a1)                         # a1 <- AA
+    sll    a2, a2, 16
+    or     a0, a0, a2                   # a0 <- BBBBbbbb
+    addu   a2, rFP, OFF_FP_SHADOWFRAME  # a2 <- shadow frame
+    move   a3, rSELF
+    JAL(MterpConstString)               # v0 <- Mterp(index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST(3)                    # load rINST
+    bnez   v0, MterpPossibleException
+    ADVANCE(3)                          # advance rPC
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_const_wide.S b/runtime/interpreter/mterp/mips/op_const_wide.S
new file mode 100644
index 0000000..01d0f87
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_const_wide.S
@@ -0,0 +1,14 @@
+    # const-wide vAA,                      /* +HHHHhhhhBBBBbbbb */
+    FETCH(a0, 1)                           #  a0 <- bbbb (low)
+    FETCH(a1, 2)                           #  a1 <- BBBB (low middle)
+    FETCH(a2, 3)                           #  a2 <- hhhh (high middle)
+    sll       a1, 16 #
+    or        a0, a1                       #  a0 <- BBBBbbbb (low word)
+    FETCH(a3, 4)                           #  a3 <- HHHH (high)
+    GET_OPA(t1)                            #  t1 <- AA
+    sll       a3, 16
+    or        a1, a3, a2                   #  a1 <- HHHHhhhh (high word)
+    FETCH_ADVANCE_INST(5)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(a0, a1, t1)                 #  vAA <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_const_wide_16.S b/runtime/interpreter/mterp/mips/op_const_wide_16.S
new file mode 100644
index 0000000..583d9ef
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_const_wide_16.S
@@ -0,0 +1,8 @@
+    # const-wide/16 vAA,                   /* +BBBB */
+    FETCH_S(a0, 1)                         #  a0 <- ssssBBBB (sign-extended)
+    GET_OPA(a3)                            #  a3 <- AA
+    sra       a1, a0, 31                   #  a1 <- ssssssss
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_const_wide_32.S b/runtime/interpreter/mterp/mips/op_const_wide_32.S
new file mode 100644
index 0000000..3eb4574
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_const_wide_32.S
@@ -0,0 +1,11 @@
+    # const-wide/32 vAA,                   /* +BBBBbbbb */
+    FETCH(a0, 1)                           #  a0 <- 0000bbbb (low)
+    GET_OPA(a3)                            #  a3 <- AA
+    FETCH_S(a2, 2)                         #  a2 <- ssssBBBB (high)
+    FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
+    sll       a2, a2, 16
+    or        a0, a0, a2                   #  a0 <- BBBBbbbb
+    sra       a1, a0, 31                   #  a1 <- ssssssss
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_const_wide_high16.S b/runtime/interpreter/mterp/mips/op_const_wide_high16.S
new file mode 100644
index 0000000..88382c6
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_const_wide_high16.S
@@ -0,0 +1,9 @@
+    # const-wide/high16 vAA,               /* +BBBB000000000000 */
+    FETCH(a1, 1)                           #  a1 <- 0000BBBB (zero-extended)
+    GET_OPA(a3)                            #  a3 <- AA
+    li        a0, 0                        #  a0 <- 00000000
+    sll       a1, 16                       #  a1 <- BBBB0000
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_div_double.S b/runtime/interpreter/mterp/mips/op_div_double.S
new file mode 100644
index 0000000..84e4c4e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_div_double.S
@@ -0,0 +1 @@
+%include "mips/fbinopWide.S" {"instr":"div.d fv0, fa0, fa1"}
diff --git a/runtime/interpreter/mterp/mips/op_div_double_2addr.S b/runtime/interpreter/mterp/mips/op_div_double_2addr.S
new file mode 100644
index 0000000..65b92e3
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_div_double_2addr.S
@@ -0,0 +1 @@
+%include "mips/fbinopWide2addr.S" {"instr":"div.d fv0, fa0, fa1"}
diff --git a/runtime/interpreter/mterp/mips/op_div_float.S b/runtime/interpreter/mterp/mips/op_div_float.S
new file mode 100644
index 0000000..44b8d47
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_div_float.S
@@ -0,0 +1 @@
+%include "mips/fbinop.S" {"instr":"div.s fv0, fa0, fa1"}
diff --git a/runtime/interpreter/mterp/mips/op_div_float_2addr.S b/runtime/interpreter/mterp/mips/op_div_float_2addr.S
new file mode 100644
index 0000000..e5fff92
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_div_float_2addr.S
@@ -0,0 +1 @@
+%include "mips/fbinop2addr.S" {"instr":"div.s fv0, fa0, fa1"}
diff --git a/runtime/interpreter/mterp/mips/op_div_int.S b/runtime/interpreter/mterp/mips/op_div_int.S
new file mode 100644
index 0000000..5d28c84
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_div_int.S
@@ -0,0 +1,5 @@
+#ifdef MIPS32REVGE6
+%include "mips/binop.S" {"instr":"div a0, a0, a1", "chkzero":"1"}
+#else
+%include "mips/binop.S" {"preinstr":"div zero, a0, a1", "instr":"mflo a0", "chkzero":"1"}
+#endif
diff --git a/runtime/interpreter/mterp/mips/op_div_int_2addr.S b/runtime/interpreter/mterp/mips/op_div_int_2addr.S
new file mode 100644
index 0000000..6c079e0
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_div_int_2addr.S
@@ -0,0 +1,5 @@
+#ifdef MIPS32REVGE6
+%include "mips/binop2addr.S" {"instr":"div a0, a0, a1", "chkzero":"1"}
+#else
+%include "mips/binop2addr.S" {"preinstr":"div zero, a0, a1", "instr":"mflo a0", "chkzero":"1"}
+#endif
diff --git a/runtime/interpreter/mterp/mips/op_div_int_lit16.S b/runtime/interpreter/mterp/mips/op_div_int_lit16.S
new file mode 100644
index 0000000..ee7452c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_div_int_lit16.S
@@ -0,0 +1,5 @@
+#ifdef MIPS32REVGE6
+%include "mips/binopLit16.S" {"instr":"div a0, a0, a1", "chkzero":"1"}
+#else
+%include "mips/binopLit16.S" {"preinstr":"div zero, a0, a1", "instr":"mflo a0", "chkzero":"1"}
+#endif
diff --git a/runtime/interpreter/mterp/mips/op_div_int_lit8.S b/runtime/interpreter/mterp/mips/op_div_int_lit8.S
new file mode 100644
index 0000000..d2964b8
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_div_int_lit8.S
@@ -0,0 +1,5 @@
+#ifdef MIPS32REVGE6
+%include "mips/binopLit8.S" {"instr":"div a0, a0, a1", "chkzero":"1"}
+#else
+%include "mips/binopLit8.S" {"preinstr":"div zero, a0, a1", "instr":"mflo a0", "chkzero":"1"}
+#endif
diff --git a/runtime/interpreter/mterp/mips/op_div_long.S b/runtime/interpreter/mterp/mips/op_div_long.S
new file mode 100644
index 0000000..2097866
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_div_long.S
@@ -0,0 +1 @@
+%include "mips/binopWide.S" {"result0":"v0", "result1":"v1", "instr":"JAL(__divdi3)", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/mips/op_div_long_2addr.S b/runtime/interpreter/mterp/mips/op_div_long_2addr.S
new file mode 100644
index 0000000..c279305
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_div_long_2addr.S
@@ -0,0 +1 @@
+%include "mips/binopWide2addr.S" {"result0":"v0", "result1":"v1", "instr":"JAL(__divdi3)", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/mips/op_double_to_float.S b/runtime/interpreter/mterp/mips/op_double_to_float.S
new file mode 100644
index 0000000..1d32c2e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_double_to_float.S
@@ -0,0 +1 @@
+%include "mips/unopNarrower.S" {"instr":"cvt.s.d fv0, fa0"}
diff --git a/runtime/interpreter/mterp/mips/op_double_to_int.S b/runtime/interpreter/mterp/mips/op_double_to_int.S
new file mode 100644
index 0000000..30a0a73
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_double_to_int.S
@@ -0,0 +1,58 @@
+%include "mips/unopNarrower.S" {"instr":"b d2i_doconv"}
+/*
+ * Convert the double in a0/a1 to an int in a0.
+ *
+ * We have to clip values to int min/max per the specification.  The
+ * expected common case is a "reasonable" value that converts directly
+ * to modest integer.  The EABI convert function isn't doing this for us.
+ */
+%break
+
+d2i_doconv:
+#ifdef MIPS32REVGE6
+    la        t0, .LDOUBLE_TO_INT_max
+    LOAD64_F(fa1, fa1f, t0)
+    cmp.ule.d ft2, fa1, fa0
+    l.s       fv0, .LDOUBLE_TO_INT_maxret
+    bc1nez    ft2, .L${opcode}_set_vreg_f
+
+    la        t0, .LDOUBLE_TO_INT_min
+    LOAD64_F(fa1, fa1f, t0)
+    cmp.ule.d ft2, fa0, fa1
+    l.s       fv0, .LDOUBLE_TO_INT_minret
+    bc1nez    ft2, .L${opcode}_set_vreg_f
+
+    mov.d     fa1, fa0
+    cmp.un.d  ft2, fa0, fa1
+    li.s      fv0, 0
+    bc1nez    ft2, .L${opcode}_set_vreg_f
+#else
+    la        t0, .LDOUBLE_TO_INT_max
+    LOAD64_F(fa1, fa1f, t0)
+    c.ole.d   fcc0, fa1, fa0
+    l.s       fv0, .LDOUBLE_TO_INT_maxret
+    bc1t      .L${opcode}_set_vreg_f
+
+    la        t0, .LDOUBLE_TO_INT_min
+    LOAD64_F(fa1, fa1f, t0)
+    c.ole.d   fcc0, fa0, fa1
+    l.s       fv0, .LDOUBLE_TO_INT_minret
+    bc1t      .L${opcode}_set_vreg_f
+
+    mov.d     fa1, fa0
+    c.un.d    fcc0, fa0, fa1
+    li.s      fv0, 0
+    bc1t      .L${opcode}_set_vreg_f
+#endif
+
+    trunc.w.d  fv0, fa0
+    b         .L${opcode}_set_vreg_f
+
+.LDOUBLE_TO_INT_max:
+    .dword 0x41dfffffffc00000
+.LDOUBLE_TO_INT_min:
+    .dword 0xc1e0000000000000              #  minint, as a double (high word)
+.LDOUBLE_TO_INT_maxret:
+    .word 0x7fffffff
+.LDOUBLE_TO_INT_minret:
+    .word 0x80000000
diff --git a/runtime/interpreter/mterp/mips/op_double_to_long.S b/runtime/interpreter/mterp/mips/op_double_to_long.S
new file mode 100644
index 0000000..4f9e367
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_double_to_long.S
@@ -0,0 +1,56 @@
+%include "mips/funopWide.S" {"instr":"b d2l_doconv", "st_result":"SET_VREG64(rRESULT0, rRESULT1, rOBJ)"}
+%break
+
+d2l_doconv:
+#ifdef MIPS32REVGE6
+    la        t0, .LDOUBLE_TO_LONG_max
+    LOAD64_F(fa1, fa1f, t0)
+    cmp.ule.d ft2, fa1, fa0
+    la        t0, .LDOUBLE_TO_LONG_ret_max
+    LOAD64(rRESULT0, rRESULT1, t0)
+    bc1nez    ft2, .L${opcode}_set_vreg
+
+    la        t0, .LDOUBLE_TO_LONG_min
+    LOAD64_F(fa1, fa1f, t0)
+    cmp.ule.d ft2, fa0, fa1
+    la        t0, .LDOUBLE_TO_LONG_ret_min
+    LOAD64(rRESULT0, rRESULT1, t0)
+    bc1nez    ft2, .L${opcode}_set_vreg
+
+    mov.d     fa1, fa0
+    cmp.un.d  ft2, fa0, fa1
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1nez    ft2, .L${opcode}_set_vreg
+#else
+    la        t0, .LDOUBLE_TO_LONG_max
+    LOAD64_F(fa1, fa1f, t0)
+    c.ole.d   fcc0, fa1, fa0
+    la        t0, .LDOUBLE_TO_LONG_ret_max
+    LOAD64(rRESULT0, rRESULT1, t0)
+    bc1t      .L${opcode}_set_vreg
+
+    la        t0, .LDOUBLE_TO_LONG_min
+    LOAD64_F(fa1, fa1f, t0)
+    c.ole.d   fcc0, fa0, fa1
+    la        t0, .LDOUBLE_TO_LONG_ret_min
+    LOAD64(rRESULT0, rRESULT1, t0)
+    bc1t      .L${opcode}_set_vreg
+
+    mov.d     fa1, fa0
+    c.un.d    fcc0, fa0, fa1
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1t      .L${opcode}_set_vreg
+#endif
+    JAL(__fixdfdi)
+    b         .L${opcode}_set_vreg
+
+.LDOUBLE_TO_LONG_max:
+    .dword 0x43e0000000000000              #  maxlong, as a double (high word)
+.LDOUBLE_TO_LONG_min:
+    .dword 0xc3e0000000000000              #  minlong, as a double (high word)
+.LDOUBLE_TO_LONG_ret_max:
+    .dword 0x7fffffffffffffff
+.LDOUBLE_TO_LONG_ret_min:
+    .dword 0x8000000000000000
diff --git a/runtime/interpreter/mterp/mips/op_fill_array_data.S b/runtime/interpreter/mterp/mips/op_fill_array_data.S
new file mode 100644
index 0000000..8605746
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_fill_array_data.S
@@ -0,0 +1,14 @@
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC()
+    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
+    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
+    GET_OPA(a3)                            #  a3 <- AA
+    sll       a1, a1, 16                   #  a1 <- BBBBbbbb
+    or        a1, a0, a1                   #  a1 <- BBBBbbbb
+    GET_VREG(a0, a3)                       #  a0 <- vAA (array object)
+    EAS1(a1, rPC, a1)                      #  a1 <- PC + BBBBbbbb*2 (array data off.)
+    JAL(MterpFillArrayData)                #  v0 <- Mterp(obj, payload)
+    beqz      v0,  MterpPossibleException  #  has exception
+    FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_filled_new_array.S b/runtime/interpreter/mterp/mips/op_filled_new_array.S
new file mode 100644
index 0000000..3f62fae
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_filled_new_array.S
@@ -0,0 +1,18 @@
+%default { "helper":"MterpFilledNewArray" }
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
+    # op {vCCCC..v(CCCC+AA-1)}, type       /* BBBB */
+    .extern $helper
+    EXPORT_PC()
+    addu   a0, rFP, OFF_FP_SHADOWFRAME     # a0 <- shadow frame
+    move   a1, rPC
+    move   a2, rSELF
+    JAL($helper)                           #  v0 <- helper(shadow_frame, pc, self)
+    beqz      v0,  MterpPossibleException  #  has exception
+    FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_filled_new_array_range.S b/runtime/interpreter/mterp/mips/op_filled_new_array_range.S
new file mode 100644
index 0000000..f8dcb0e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_filled_new_array_range.S
@@ -0,0 +1 @@
+%include "mips/op_filled_new_array.S" { "helper":"MterpFilledNewArrayRange" }
diff --git a/runtime/interpreter/mterp/mips/op_float_to_double.S b/runtime/interpreter/mterp/mips/op_float_to_double.S
new file mode 100644
index 0000000..1315255
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_float_to_double.S
@@ -0,0 +1 @@
+%include "mips/funopWider.S" {"instr":"cvt.d.s fv0, fa0"}
diff --git a/runtime/interpreter/mterp/mips/op_float_to_int.S b/runtime/interpreter/mterp/mips/op_float_to_int.S
new file mode 100644
index 0000000..e032869
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_float_to_int.S
@@ -0,0 +1,50 @@
+%include "mips/funop.S" {"instr":"b f2i_doconv"}
+%break
+
+/*
+ * Not an entry point as it is used only once !!
+ */
+f2i_doconv:
+#ifdef MIPS32REVGE6
+    l.s       fa1, .LFLOAT_TO_INT_max
+    cmp.ule.s ft2, fa1, fa0
+    l.s       fv0, .LFLOAT_TO_INT_ret_max
+    bc1nez    ft2, .L${opcode}_set_vreg_f
+
+    l.s       fa1, .LFLOAT_TO_INT_min
+    cmp.ule.s ft2, fa0, fa1
+    l.s       fv0, .LFLOAT_TO_INT_ret_min
+    bc1nez    ft2, .L${opcode}_set_vreg_f
+
+    mov.s     fa1, fa0
+    cmp.un.s  ft2, fa0, fa1
+    li.s      fv0, 0
+    bc1nez    ft2, .L${opcode}_set_vreg_f
+#else
+    l.s       fa1, .LFLOAT_TO_INT_max
+    c.ole.s   fcc0, fa1, fa0
+    l.s       fv0, .LFLOAT_TO_INT_ret_max
+    bc1t      .L${opcode}_set_vreg_f
+
+    l.s       fa1, .LFLOAT_TO_INT_min
+    c.ole.s   fcc0, fa0, fa1
+    l.s       fv0, .LFLOAT_TO_INT_ret_min
+    bc1t      .L${opcode}_set_vreg_f
+
+    mov.s     fa1, fa0
+    c.un.s    fcc0, fa0, fa1
+    li.s      fv0, 0
+    bc1t      .L${opcode}_set_vreg_f
+#endif
+
+    trunc.w.s  fv0, fa0
+    b         .L${opcode}_set_vreg_f
+
+.LFLOAT_TO_INT_max:
+    .word 0x4f000000
+.LFLOAT_TO_INT_min:
+    .word 0xcf000000
+.LFLOAT_TO_INT_ret_max:
+    .word 0x7fffffff
+.LFLOAT_TO_INT_ret_min:
+    .word 0x80000000
diff --git a/runtime/interpreter/mterp/mips/op_float_to_long.S b/runtime/interpreter/mterp/mips/op_float_to_long.S
new file mode 100644
index 0000000..77b2c46
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_float_to_long.S
@@ -0,0 +1,51 @@
+%include "mips/funopWider.S" {"instr":"b f2l_doconv", "st_result":"SET_VREG64(rRESULT0, rRESULT1, rOBJ)"}
+%break
+
+f2l_doconv:
+#ifdef MIPS32REVGE6
+    l.s       fa1, .LLONG_TO_max
+    cmp.ule.s ft2, fa1, fa0
+    li        rRESULT0, ~0
+    li        rRESULT1, ~0x80000000
+    bc1nez    ft2, .L${opcode}_set_vreg
+
+    l.s       fa1, .LLONG_TO_min
+    cmp.ule.s ft2, fa0, fa1
+    li        rRESULT0, 0
+    li        rRESULT1, 0x80000000
+    bc1nez    ft2, .L${opcode}_set_vreg
+
+    mov.s     fa1, fa0
+    cmp.un.s  ft2, fa0, fa1
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1nez    ft2, .L${opcode}_set_vreg
+#else
+    l.s       fa1, .LLONG_TO_max
+    c.ole.s   fcc0, fa1, fa0
+    li        rRESULT0, ~0
+    li        rRESULT1, ~0x80000000
+    bc1t      .L${opcode}_set_vreg
+
+    l.s       fa1, .LLONG_TO_min
+    c.ole.s   fcc0, fa0, fa1
+    li        rRESULT0, 0
+    li        rRESULT1, 0x80000000
+    bc1t      .L${opcode}_set_vreg
+
+    mov.s     fa1, fa0
+    c.un.s    fcc0, fa0, fa1
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1t      .L${opcode}_set_vreg
+#endif
+
+    JAL(__fixsfdi)
+
+    b         .L${opcode}_set_vreg
+
+.LLONG_TO_max:
+    .word 0x5f000000
+
+.LLONG_TO_min:
+    .word 0xdf000000
diff --git a/runtime/interpreter/mterp/mips/op_goto.S b/runtime/interpreter/mterp/mips/op_goto.S
new file mode 100644
index 0000000..57182a5
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_goto.S
@@ -0,0 +1,10 @@
+    /*
+     * Unconditional branch, 8-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto +AA */
+    sll       a0, rINST, 16                #  a0 <- AAxx0000
+    sra       rINST, a0, 24                #  rINST <- ssssssAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips/op_goto_16.S b/runtime/interpreter/mterp/mips/op_goto_16.S
new file mode 100644
index 0000000..06c96cd
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_goto_16.S
@@ -0,0 +1,9 @@
+    /*
+     * Unconditional branch, 16-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto/16 +AAAA */
+    FETCH_S(rINST, 1)                      #  rINST <- ssssAAAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips/op_goto_32.S b/runtime/interpreter/mterp/mips/op_goto_32.S
new file mode 100644
index 0000000..67f52e9
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_goto_32.S
@@ -0,0 +1,15 @@
+    /*
+     * Unconditional branch, 32-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     *
+     * Unlike most opcodes, this one is allowed to branch to itself, so
+     * our "backward branch" test must be "<=0" instead of "<0".
+     */
+    /* goto/32 +AAAAAAAA */
+    FETCH(a0, 1)                           #  a0 <- aaaa (lo)
+    FETCH(a1, 2)                           #  a1 <- AAAA (hi)
+    sll       a1, a1, 16
+    or        rINST, a0, a1                #  rINST <- AAAAaaaa
+    b         MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips/op_if_eq.S b/runtime/interpreter/mterp/mips/op_if_eq.S
new file mode 100644
index 0000000..d6f9987
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_if_eq.S
@@ -0,0 +1 @@
+%include "mips/bincmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/mips/op_if_eqz.S b/runtime/interpreter/mterp/mips/op_if_eqz.S
new file mode 100644
index 0000000..c52b76a
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_if_eqz.S
@@ -0,0 +1 @@
+%include "mips/zcmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/mips/op_if_ge.S b/runtime/interpreter/mterp/mips/op_if_ge.S
new file mode 100644
index 0000000..bd06ff5
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_if_ge.S
@@ -0,0 +1 @@
+%include "mips/bincmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/mips/op_if_gez.S b/runtime/interpreter/mterp/mips/op_if_gez.S
new file mode 100644
index 0000000..549231a
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_if_gez.S
@@ -0,0 +1 @@
+%include "mips/zcmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/mips/op_if_gt.S b/runtime/interpreter/mterp/mips/op_if_gt.S
new file mode 100644
index 0000000..0be3091
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_if_gt.S
@@ -0,0 +1 @@
+%include "mips/bincmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/mips/op_if_gtz.S b/runtime/interpreter/mterp/mips/op_if_gtz.S
new file mode 100644
index 0000000..5c7bcc4
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_if_gtz.S
@@ -0,0 +1 @@
+%include "mips/zcmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/mips/op_if_le.S b/runtime/interpreter/mterp/mips/op_if_le.S
new file mode 100644
index 0000000..c35c1a2
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_if_le.S
@@ -0,0 +1 @@
+%include "mips/bincmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/mips/op_if_lez.S b/runtime/interpreter/mterp/mips/op_if_lez.S
new file mode 100644
index 0000000..3dc6543
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_if_lez.S
@@ -0,0 +1 @@
+%include "mips/zcmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/mips/op_if_lt.S b/runtime/interpreter/mterp/mips/op_if_lt.S
new file mode 100644
index 0000000..3f3386c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_if_lt.S
@@ -0,0 +1 @@
+%include "mips/bincmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/mips/op_if_ltz.S b/runtime/interpreter/mterp/mips/op_if_ltz.S
new file mode 100644
index 0000000..e6d6ed6
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_if_ltz.S
@@ -0,0 +1 @@
+%include "mips/zcmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/mips/op_if_ne.S b/runtime/interpreter/mterp/mips/op_if_ne.S
new file mode 100644
index 0000000..3d7bf35
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_if_ne.S
@@ -0,0 +1 @@
+%include "mips/bincmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/mips/op_if_nez.S b/runtime/interpreter/mterp/mips/op_if_nez.S
new file mode 100644
index 0000000..d121eae
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_if_nez.S
@@ -0,0 +1 @@
+%include "mips/zcmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/mips/op_iget.S b/runtime/interpreter/mterp/mips/op_iget.S
new file mode 100644
index 0000000..86d44fa
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iget.S
@@ -0,0 +1,25 @@
+%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
+    move  a3, rSELF                        # a3 <- self
+    JAL($helper)
+    lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA4(a2)                           # a2<- A+
+    PREFETCH_INST(2)                       # load rINST
+    bnez  a3, MterpPossibleException        # bail out
+    .if $is_object
+    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
+    .else
+    SET_VREG(v0, a2)                       # fp[A] <- v0
+    .endif
+    ADVANCE(2)                             #  advance rPC
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_iget_boolean.S b/runtime/interpreter/mterp/mips/op_iget_boolean.S
new file mode 100644
index 0000000..e03364e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iget_boolean.S
@@ -0,0 +1 @@
+%include "mips/op_iget.S" { "helper":"artGetBooleanInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/mips/op_iget_boolean_quick.S b/runtime/interpreter/mterp/mips/op_iget_boolean_quick.S
new file mode 100644
index 0000000..f3032b3
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iget_boolean_quick.S
@@ -0,0 +1 @@
+%include "mips/op_iget_quick.S" { "load":"lbu" }
diff --git a/runtime/interpreter/mterp/mips/op_iget_byte.S b/runtime/interpreter/mterp/mips/op_iget_byte.S
new file mode 100644
index 0000000..dc87cfe
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iget_byte.S
@@ -0,0 +1 @@
+%include "mips/op_iget.S" { "helper":"artGetByteInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/mips/op_iget_byte_quick.S b/runtime/interpreter/mterp/mips/op_iget_byte_quick.S
new file mode 100644
index 0000000..d93f844
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iget_byte_quick.S
@@ -0,0 +1 @@
+%include "mips/op_iget_quick.S" { "load":"lb" }
diff --git a/runtime/interpreter/mterp/mips/op_iget_char.S b/runtime/interpreter/mterp/mips/op_iget_char.S
new file mode 100644
index 0000000..55f8a93
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iget_char.S
@@ -0,0 +1 @@
+%include "mips/op_iget.S" { "helper":"artGetCharInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/mips/op_iget_char_quick.S b/runtime/interpreter/mterp/mips/op_iget_char_quick.S
new file mode 100644
index 0000000..6f6d608
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iget_char_quick.S
@@ -0,0 +1 @@
+%include "mips/op_iget_quick.S" { "load":"lhu" }
diff --git a/runtime/interpreter/mterp/mips/op_iget_object.S b/runtime/interpreter/mterp/mips/op_iget_object.S
new file mode 100644
index 0000000..11d93a4
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iget_object.S
@@ -0,0 +1 @@
+%include "mips/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/mips/op_iget_object_quick.S b/runtime/interpreter/mterp/mips/op_iget_object_quick.S
new file mode 100644
index 0000000..31d94b9
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iget_object_quick.S
@@ -0,0 +1,15 @@
+    /* For: iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    GET_OPB(a2)                            #  a2 <- B
+    FETCH(a1, 1)                           #  a1 <- field byte offset
+    EXPORT_PC()
+    GET_VREG(a0, a2)                       #  a0 <- object we're operating on
+    JAL(artIGetObjectFromMterp)            #  v0 <- GetObj(obj, offset)
+    lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA4(a2)                           #  a2<- A+
+    PREFETCH_INST(2)                       #  load rINST
+    bnez a3, MterpPossibleException        #  bail out
+    SET_VREG_OBJECT(v0, a2)                #  fp[A] <- v0
+    ADVANCE(2)                             #  advance rPC
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_iget_quick.S b/runtime/interpreter/mterp/mips/op_iget_quick.S
new file mode 100644
index 0000000..fbafa5b
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iget_quick.S
@@ -0,0 +1,14 @@
+%default { "load":"lw" }
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    # op vA, vB, offset                    /* CCCC */
+    GET_OPB(a2)                            #  a2 <- B
+    GET_VREG(a3, a2)                       #  a3 <- object we're operating on
+    FETCH(a1, 1)                           #  a1 <- field byte offset
+    GET_OPA4(a2)                           #  a2 <- A(+)
+    # check object for null
+    beqz      a3, common_errNullObject     #  object was null
+    addu      t0, a3, a1
+    $load     a0, 0(t0)                    #  a0 <- obj.field (8/16/32 bits)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, a2, t0)              #  fp[A] <- a0
diff --git a/runtime/interpreter/mterp/mips/op_iget_short.S b/runtime/interpreter/mterp/mips/op_iget_short.S
new file mode 100644
index 0000000..9086246
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iget_short.S
@@ -0,0 +1 @@
+%include "mips/op_iget.S" { "helper":"artGetShortInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/mips/op_iget_short_quick.S b/runtime/interpreter/mterp/mips/op_iget_short_quick.S
new file mode 100644
index 0000000..899a0fe
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iget_short_quick.S
@@ -0,0 +1 @@
+%include "mips/op_iget_quick.S" { "load":"lh" }
diff --git a/runtime/interpreter/mterp/mips/op_iget_wide.S b/runtime/interpreter/mterp/mips/op_iget_wide.S
new file mode 100644
index 0000000..8fe3089
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iget_wide.S
@@ -0,0 +1,20 @@
+    /*
+     * 64-bit instance field get.
+     *
+     * for: iget-wide
+     */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field byte offset
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
+    move  a3, rSELF                        # a3 <- self
+    JAL(artGet64InstanceFromCode)
+    lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA4(a2)                           # a2<- A+
+    PREFETCH_INST(2)                       # load rINST
+    bnez a3, MterpException                # bail out
+    SET_VREG64(v0, v1, a2)                 # fp[A] <- v0/v1
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_iget_wide_quick.S b/runtime/interpreter/mterp/mips/op_iget_wide_quick.S
new file mode 100644
index 0000000..4d2f291
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iget_wide_quick.S
@@ -0,0 +1,13 @@
+    # iget-wide-quick vA, vB, offset       /* CCCC */
+    GET_OPB(a2)                            #  a2 <- B
+    GET_VREG(a3, a2)                       #  a3 <- object we're operating on
+    FETCH(a1, 1)                           #  a1 <- field byte offset
+    GET_OPA4(a2)                           #  a2 <- A(+)
+    # check object for null
+    beqz      a3, common_errNullObject     #  object was null
+    addu      t0, a3, a1                   #  t0 <- a3 + a1
+    LOAD64(a0, a1, t0)                     #  a0 <- obj.field (64 bits, aligned)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(a0, a1, a2)                 #  fp[A] <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_instance_of.S b/runtime/interpreter/mterp/mips/op_instance_of.S
new file mode 100644
index 0000000..d2679bd
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_instance_of.S
@@ -0,0 +1,21 @@
+    /*
+     * Check to see if an object reference is an instance of a class.
+     *
+     * Most common situation is a non-null object, being compared against
+     * an already-resolved class.
+     */
+    # instance-of vA, vB, class            /* CCCC */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- CCCC
+    GET_OPB(a1)                            # a1 <- B
+    EAS2(a1, rFP, a1)                      # a1 <- &object
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
+    move  a3, rSELF                        # a3 <- self
+    GET_OPA4(rOBJ)                         # rOBJ <- A+
+    JAL(MterpInstanceOf)                   # v0 <- Mterp(index, &obj, method, self)
+    lw   a1, THREAD_EXCEPTION_OFFSET(rSELF)
+    PREFETCH_INST(2)                       # load rINST
+    bnez a1, MterpException
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    SET_VREG_GOTO(v0, rOBJ, t0)            # vA <- v0
diff --git a/runtime/interpreter/mterp/mips/op_int_to_byte.S b/runtime/interpreter/mterp/mips/op_int_to_byte.S
new file mode 100644
index 0000000..77314c62
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_int_to_byte.S
@@ -0,0 +1 @@
+%include "mips/unop.S" {"preinstr":"sll a0, a0, 24", "instr":"sra a0, a0, 24"}
diff --git a/runtime/interpreter/mterp/mips/op_int_to_char.S b/runtime/interpreter/mterp/mips/op_int_to_char.S
new file mode 100644
index 0000000..1b74a6e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_int_to_char.S
@@ -0,0 +1 @@
+%include "mips/unop.S" {"preinstr":"", "instr":"and a0, 0xffff"}
diff --git a/runtime/interpreter/mterp/mips/op_int_to_double.S b/runtime/interpreter/mterp/mips/op_int_to_double.S
new file mode 100644
index 0000000..89484ce
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_int_to_double.S
@@ -0,0 +1 @@
+%include "mips/funopWider.S" {"instr":"cvt.d.w fv0, fa0"}
diff --git a/runtime/interpreter/mterp/mips/op_int_to_float.S b/runtime/interpreter/mterp/mips/op_int_to_float.S
new file mode 100644
index 0000000..d6f4b36
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_int_to_float.S
@@ -0,0 +1 @@
+%include "mips/funop.S" {"instr":"cvt.s.w fv0, fa0"}
diff --git a/runtime/interpreter/mterp/mips/op_int_to_long.S b/runtime/interpreter/mterp/mips/op_int_to_long.S
new file mode 100644
index 0000000..9907463
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_int_to_long.S
@@ -0,0 +1 @@
+%include "mips/unopWider.S" {"instr":"sra a1, a0, 31"}
diff --git a/runtime/interpreter/mterp/mips/op_int_to_short.S b/runtime/interpreter/mterp/mips/op_int_to_short.S
new file mode 100644
index 0000000..5649c2a
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_int_to_short.S
@@ -0,0 +1 @@
+%include "mips/unop.S" {"preinstr":"sll a0, 16", "instr":"sra a0, 16"}
diff --git a/runtime/interpreter/mterp/mips/op_invoke_direct.S b/runtime/interpreter/mterp/mips/op_invoke_direct.S
new file mode 100644
index 0000000..1ef198a
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_invoke_direct.S
@@ -0,0 +1 @@
+%include "mips/invoke.S" { "helper":"MterpInvokeDirect" }
diff --git a/runtime/interpreter/mterp/mips/op_invoke_direct_range.S b/runtime/interpreter/mterp/mips/op_invoke_direct_range.S
new file mode 100644
index 0000000..af7477f
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_invoke_direct_range.S
@@ -0,0 +1 @@
+%include "mips/invoke.S" { "helper":"MterpInvokeDirectRange" }
diff --git a/runtime/interpreter/mterp/mips/op_invoke_interface.S b/runtime/interpreter/mterp/mips/op_invoke_interface.S
new file mode 100644
index 0000000..80a485a
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_invoke_interface.S
@@ -0,0 +1 @@
+%include "mips/invoke.S" { "helper":"MterpInvokeInterface" }
diff --git a/runtime/interpreter/mterp/mips/op_invoke_interface_range.S b/runtime/interpreter/mterp/mips/op_invoke_interface_range.S
new file mode 100644
index 0000000..8d725dc
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_invoke_interface_range.S
@@ -0,0 +1 @@
+%include "mips/invoke.S" { "helper":"MterpInvokeInterfaceRange" }
diff --git a/runtime/interpreter/mterp/mips/op_invoke_static.S b/runtime/interpreter/mterp/mips/op_invoke_static.S
new file mode 100644
index 0000000..46253cb
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_invoke_static.S
@@ -0,0 +1 @@
+%include "mips/invoke.S" { "helper":"MterpInvokeStatic" }
diff --git a/runtime/interpreter/mterp/mips/op_invoke_static_range.S b/runtime/interpreter/mterp/mips/op_invoke_static_range.S
new file mode 100644
index 0000000..96abafe
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_invoke_static_range.S
@@ -0,0 +1 @@
+%include "mips/invoke.S" { "helper":"MterpInvokeStaticRange" }
diff --git a/runtime/interpreter/mterp/mips/op_invoke_super.S b/runtime/interpreter/mterp/mips/op_invoke_super.S
new file mode 100644
index 0000000..473951b
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_invoke_super.S
@@ -0,0 +1 @@
+%include "mips/invoke.S" { "helper":"MterpInvokeSuper" }
diff --git a/runtime/interpreter/mterp/mips/op_invoke_super_range.S b/runtime/interpreter/mterp/mips/op_invoke_super_range.S
new file mode 100644
index 0000000..963ff27
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_invoke_super_range.S
@@ -0,0 +1 @@
+%include "mips/invoke.S" { "helper":"MterpInvokeSuperRange" }
diff --git a/runtime/interpreter/mterp/mips/op_invoke_virtual.S b/runtime/interpreter/mterp/mips/op_invoke_virtual.S
new file mode 100644
index 0000000..ea51e98
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_invoke_virtual.S
@@ -0,0 +1 @@
+%include "mips/invoke.S" { "helper":"MterpInvokeVirtual" }
diff --git a/runtime/interpreter/mterp/mips/op_invoke_virtual_quick.S b/runtime/interpreter/mterp/mips/op_invoke_virtual_quick.S
new file mode 100644
index 0000000..0c00091
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_invoke_virtual_quick.S
@@ -0,0 +1 @@
+%include "mips/invoke.S" { "helper":"MterpInvokeVirtualQuick" }
diff --git a/runtime/interpreter/mterp/mips/op_invoke_virtual_range.S b/runtime/interpreter/mterp/mips/op_invoke_virtual_range.S
new file mode 100644
index 0000000..82201e7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_invoke_virtual_range.S
@@ -0,0 +1 @@
+%include "mips/invoke.S" { "helper":"MterpInvokeVirtualRange" }
diff --git a/runtime/interpreter/mterp/mips/op_invoke_virtual_range_quick.S b/runtime/interpreter/mterp/mips/op_invoke_virtual_range_quick.S
new file mode 100644
index 0000000..c783675
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_invoke_virtual_range_quick.S
@@ -0,0 +1 @@
+%include "mips/invoke.S" { "helper":"MterpInvokeVirtualQuickRange" }
diff --git a/runtime/interpreter/mterp/mips/op_iput.S b/runtime/interpreter/mterp/mips/op_iput.S
new file mode 100644
index 0000000..732a9a4
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iput.S
@@ -0,0 +1,21 @@
+%default { "handler":"artSet32InstanceFromMterp" }
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    # op vA, vB, field                     /* CCCC */
+    .extern $handler
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    GET_OPA4(a2)                           # a2 <- A+
+    GET_VREG(a2, a2)                       # a2 <- fp[A]
+    lw    a3, OFF_FP_METHOD(rFP)           # a3 <- referrer
+    PREFETCH_INST(2)                       # load rINST
+    JAL($handler)
+    bnez  v0, MterpPossibleException       # bail out
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_iput_boolean.S b/runtime/interpreter/mterp/mips/op_iput_boolean.S
new file mode 100644
index 0000000..da28c97
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iput_boolean.S
@@ -0,0 +1 @@
+%include "mips/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips/op_iput_boolean_quick.S b/runtime/interpreter/mterp/mips/op_iput_boolean_quick.S
new file mode 100644
index 0000000..7d5caf6
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iput_boolean_quick.S
@@ -0,0 +1 @@
+%include "mips/op_iput_quick.S" { "store":"sb" }
diff --git a/runtime/interpreter/mterp/mips/op_iput_byte.S b/runtime/interpreter/mterp/mips/op_iput_byte.S
new file mode 100644
index 0000000..da28c97
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iput_byte.S
@@ -0,0 +1 @@
+%include "mips/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips/op_iput_byte_quick.S b/runtime/interpreter/mterp/mips/op_iput_byte_quick.S
new file mode 100644
index 0000000..7d5caf6
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iput_byte_quick.S
@@ -0,0 +1 @@
+%include "mips/op_iput_quick.S" { "store":"sb" }
diff --git a/runtime/interpreter/mterp/mips/op_iput_char.S b/runtime/interpreter/mterp/mips/op_iput_char.S
new file mode 100644
index 0000000..389b0bf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iput_char.S
@@ -0,0 +1 @@
+%include "mips/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips/op_iput_char_quick.S b/runtime/interpreter/mterp/mips/op_iput_char_quick.S
new file mode 100644
index 0000000..4bc84eb
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iput_char_quick.S
@@ -0,0 +1 @@
+%include "mips/op_iput_quick.S" { "store":"sh" }
diff --git a/runtime/interpreter/mterp/mips/op_iput_object.S b/runtime/interpreter/mterp/mips/op_iput_object.S
new file mode 100644
index 0000000..6b856e7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iput_object.S
@@ -0,0 +1,16 @@
+    /*
+     * 32-bit instance field put.
+     *
+     * for: iput-object, iput-object-volatile
+     */
+    # op vA, vB, field                     /* CCCC */
+    EXPORT_PC()
+    addu   a0, rFP, OFF_FP_SHADOWFRAME
+    move   a1, rPC
+    move   a2, rINST
+    move   a3, rSELF
+    JAL(MterpIputObject)
+    beqz   v0, MterpException
+    FETCH_ADVANCE_INST(2)               # advance rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_iput_object_quick.S b/runtime/interpreter/mterp/mips/op_iput_object_quick.S
new file mode 100644
index 0000000..c3f1526
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iput_object_quick.S
@@ -0,0 +1,11 @@
+    /* For: iput-object-quick */
+    # op vA, vB, offset                 /* CCCC */
+    EXPORT_PC()
+    addu   a0, rFP, OFF_FP_SHADOWFRAME
+    move   a1, rPC
+    move   a2, rINST
+    JAL(MterpIputObjectQuick)
+    beqz   v0, MterpException
+    FETCH_ADVANCE_INST(2)               # advance rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_iput_quick.S b/runtime/interpreter/mterp/mips/op_iput_quick.S
new file mode 100644
index 0000000..0829666
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iput_quick.S
@@ -0,0 +1,14 @@
+%default { "store":"sw" }
+    /* For: iput-quick, iput-object-quick */
+    # op vA, vB, offset                    /* CCCC */
+    GET_OPB(a2)                            #  a2 <- B
+    GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
+    FETCH(a1, 1)                           #  a1 <- field byte offset
+    GET_OPA4(a2)                           #  a2 <- A(+)
+    beqz      a3, common_errNullObject     #  object was null
+    GET_VREG(a0, a2)                       #  a0 <- fp[A]
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    addu      t0, a3, a1
+    $store    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_iput_short.S b/runtime/interpreter/mterp/mips/op_iput_short.S
new file mode 100644
index 0000000..389b0bf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iput_short.S
@@ -0,0 +1 @@
+%include "mips/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips/op_iput_short_quick.S b/runtime/interpreter/mterp/mips/op_iput_short_quick.S
new file mode 100644
index 0000000..4bc84eb
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iput_short_quick.S
@@ -0,0 +1 @@
+%include "mips/op_iput_quick.S" { "store":"sh" }
diff --git a/runtime/interpreter/mterp/mips/op_iput_wide.S b/runtime/interpreter/mterp/mips/op_iput_wide.S
new file mode 100644
index 0000000..6d23f8c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iput_wide.S
@@ -0,0 +1,15 @@
+    # iput-wide vA, vB, field              /* CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    GET_OPA4(a2)                           # a2 <- A+
+    EAS2(a2, rFP, a2)                      # a2 <- &fp[A]
+    lw    a3, OFF_FP_METHOD(rFP)           # a3 <- referrer
+    PREFETCH_INST(2)                       # load rINST
+    JAL(artSet64InstanceFromMterp)
+    bnez  v0, MterpPossibleException       # bail out
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_iput_wide_quick.S b/runtime/interpreter/mterp/mips/op_iput_wide_quick.S
new file mode 100644
index 0000000..9fdb847
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_iput_wide_quick.S
@@ -0,0 +1,14 @@
+    # iput-wide-quick vA, vB, offset       /* CCCC */
+    GET_OPA4(a0)                           #  a0 <- A(+)
+    GET_OPB(a1)                            #  a1 <- B
+    GET_VREG(a2, a1)                       #  a2 <- fp[B], the object pointer
+    # check object for null
+    beqz      a2, common_errNullObject     #  object was null
+    EAS2(a3, rFP, a0)                      #  a3 <- &fp[A]
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[A]
+    FETCH(a3, 1)                           #  a3 <- field byte offset
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    addu      a2, a2, a3                   #  obj.field (64 bits, aligned) <- a0/a1
+    STORE64(a0, a1, a2)                    #  obj.field (64 bits, aligned) <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_long_to_double.S b/runtime/interpreter/mterp/mips/op_long_to_double.S
new file mode 100644
index 0000000..b83aaf4
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_long_to_double.S
@@ -0,0 +1 @@
+%include "mips/funopWide.S" {"instr":"JAL(__floatdidf)", "ld_arg":"LOAD64(rARG0, rARG1, a3)"}
diff --git a/runtime/interpreter/mterp/mips/op_long_to_float.S b/runtime/interpreter/mterp/mips/op_long_to_float.S
new file mode 100644
index 0000000..27faba5
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_long_to_float.S
@@ -0,0 +1 @@
+%include "mips/unopNarrower.S" {"instr":"JAL(__floatdisf)", "load":"LOAD64(rARG0, rARG1, a3)"}
diff --git a/runtime/interpreter/mterp/mips/op_long_to_int.S b/runtime/interpreter/mterp/mips/op_long_to_int.S
new file mode 100644
index 0000000..949c180
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_long_to_int.S
@@ -0,0 +1,2 @@
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+%include "mips/op_move.S"
diff --git a/runtime/interpreter/mterp/mips/op_monitor_enter.S b/runtime/interpreter/mterp/mips/op_monitor_enter.S
new file mode 100644
index 0000000..20d9029
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_monitor_enter.S
@@ -0,0 +1,13 @@
+    /*
+     * Synchronize on an object.
+     */
+    /* monitor-enter vAA */
+    EXPORT_PC()
+    GET_OPA(a2)                            # a2 <- AA
+    GET_VREG(a0, a2)                       # a0 <- vAA (object)
+    move   a1, rSELF                       # a1 <- self
+    JAL(artLockObjectFromCode)             # v0 <- artLockObject(obj, self)
+    bnez v0, MterpException
+    FETCH_ADVANCE_INST(1)                  # advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_monitor_exit.S b/runtime/interpreter/mterp/mips/op_monitor_exit.S
new file mode 100644
index 0000000..1eadff9
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_monitor_exit.S
@@ -0,0 +1,17 @@
+    /*
+     * Unlock an object.
+     *
+     * Exceptions that occur when unlocking a monitor need to appear as
+     * if they happened at the following instruction.  See the Dalvik
+     * instruction spec.
+     */
+    /* monitor-exit vAA */
+    EXPORT_PC()
+    GET_OPA(a2)                            # a2 <- AA
+    GET_VREG(a0, a2)                       # a0 <- vAA (object)
+    move   a1, rSELF                       # a1 <- self
+    JAL(artUnlockObjectFromCode)           # v0 <- artUnlockObject(obj, self)
+    bnez v0, MterpException
+    FETCH_ADVANCE_INST(1)                  # advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move.S b/runtime/interpreter/mterp/mips/op_move.S
new file mode 100644
index 0000000..76588ba
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_move.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    GET_OPB(a1)                            #  a1 <- B from 15:12
+    GET_OPA4(a0)                           #  a0 <- A from 11:8
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    GET_VREG(a2, a1)                       #  a2 <- fp[B]
+    GET_INST_OPCODE(t0)                    #  t0 <- opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT(a2, a0)                #  fp[A] <- a2
+    .else
+    SET_VREG(a2, a0)                       #  fp[A] <- a2
+    .endif
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_16.S b/runtime/interpreter/mterp/mips/op_move_16.S
new file mode 100644
index 0000000..f7de6c2
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_move_16.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH(a1, 2)                           #  a1 <- BBBB
+    FETCH(a0, 1)                           #  a0 <- AAAA
+    FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
+    GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT(a2, a0)                #  fp[AAAA] <- a2
+    .else
+    SET_VREG(a2, a0)                       #  fp[AAAA] <- a2
+    .endif
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_exception.S b/runtime/interpreter/mterp/mips/op_move_exception.S
new file mode 100644
index 0000000..f04a035
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_move_exception.S
@@ -0,0 +1,8 @@
+    /* move-exception vAA */
+    GET_OPA(a2)                                 #  a2 <- AA
+    lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)    #  get exception obj
+    FETCH_ADVANCE_INST(1)                       #  advance rPC, load rINST
+    SET_VREG_OBJECT(a3, a2)                     #  fp[AA] <- exception obj
+    GET_INST_OPCODE(t0)                         #  extract opcode from rINST
+    sw    zero, THREAD_EXCEPTION_OFFSET(rSELF)  #  clear exception
+    GOTO_OPCODE(t0)                             #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_from16.S b/runtime/interpreter/mterp/mips/op_move_from16.S
new file mode 100644
index 0000000..b8be741
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_move_from16.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH(a1, 1)                           #  a1 <- BBBB
+    GET_OPA(a0)                            #  a0 <- AA
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT(a2, a0)                #  fp[AA] <- a2
+    .else
+    SET_VREG(a2, a0)                       #  fp[AA] <- a2
+    .endif
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_object.S b/runtime/interpreter/mterp/mips/op_move_object.S
new file mode 100644
index 0000000..9420ff3
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_move_object.S
@@ -0,0 +1 @@
+%include "mips/op_move.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/mips/op_move_object_16.S b/runtime/interpreter/mterp/mips/op_move_object_16.S
new file mode 100644
index 0000000..d6454c2
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_move_object_16.S
@@ -0,0 +1 @@
+%include "mips/op_move_16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/mips/op_move_object_from16.S b/runtime/interpreter/mterp/mips/op_move_object_from16.S
new file mode 100644
index 0000000..db0aca1
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_move_object_from16.S
@@ -0,0 +1 @@
+%include "mips/op_move_from16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/mips/op_move_result.S b/runtime/interpreter/mterp/mips/op_move_result.S
new file mode 100644
index 0000000..315c68e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_move_result.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    GET_OPA(a2)                            #  a2 <- AA
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    lw    a0, OFF_FP_RESULT_REGISTER(rFP)  #  get pointer to result JType
+    lw    a0, 0(a0)                        #  a0 <- result.i
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT(a0, a2)                #  fp[AA] <- a0
+    .else
+    SET_VREG(a0, a2)                       #  fp[AA] <- a0
+    .endif
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_result_object.S b/runtime/interpreter/mterp/mips/op_move_result_object.S
new file mode 100644
index 0000000..fcbffee
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_move_result_object.S
@@ -0,0 +1 @@
+%include "mips/op_move_result.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/mips/op_move_result_wide.S b/runtime/interpreter/mterp/mips/op_move_result_wide.S
new file mode 100644
index 0000000..940c1ff
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_move_result_wide.S
@@ -0,0 +1,8 @@
+    /* move-result-wide vAA */
+    GET_OPA(a2)                            #  a2 <- AA
+    lw    a3, OFF_FP_RESULT_REGISTER(rFP)  #  get pointer to result JType
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- retval.j
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    SET_VREG64(a0, a1, a2)                 #  fp[AA] <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_wide.S b/runtime/interpreter/mterp/mips/op_move_wide.S
new file mode 100644
index 0000000..dd224c3
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_move_wide.S
@@ -0,0 +1,10 @@
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6, v7" or "move v7, v6" */
+    GET_OPA4(a2)                           #  a2 <- A(+)
+    GET_OPB(a3)                            #  a3 <- B
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[B]
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    SET_VREG64(a0, a1, a2)                 #  fp[A] <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_wide_16.S b/runtime/interpreter/mterp/mips/op_move_wide_16.S
new file mode 100644
index 0000000..d8761eb
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_move_wide_16.S
@@ -0,0 +1,10 @@
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6, v7" or "move v7, v6" */
+    FETCH(a3, 2)                           #  a3 <- BBBB
+    FETCH(a2, 1)                           #  a2 <- AAAA
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[BBBB]
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[BBBB]
+    FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
+    SET_VREG64(a0, a1, a2)                 #  fp[AAAA] <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_wide_from16.S b/runtime/interpreter/mterp/mips/op_move_wide_from16.S
new file mode 100644
index 0000000..2103fa1
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_move_wide_from16.S
@@ -0,0 +1,10 @@
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6, v7" or "move v7, v6" */
+    FETCH(a3, 1)                           #  a3 <- BBBB
+    GET_OPA(a2)                            #  a2 <- AA
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[BBBB]
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[BBBB]
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    SET_VREG64(a0, a1, a2)                 #  fp[AA] <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_mul_double.S b/runtime/interpreter/mterp/mips/op_mul_double.S
new file mode 100644
index 0000000..44a473b
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_mul_double.S
@@ -0,0 +1 @@
+%include "mips/fbinopWide.S" {"instr":"mul.d fv0, fa0, fa1"}
diff --git a/runtime/interpreter/mterp/mips/op_mul_double_2addr.S b/runtime/interpreter/mterp/mips/op_mul_double_2addr.S
new file mode 100644
index 0000000..4e5c230
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_mul_double_2addr.S
@@ -0,0 +1 @@
+%include "mips/fbinopWide2addr.S" {"instr":"mul.d fv0, fa0, fa1"}
diff --git a/runtime/interpreter/mterp/mips/op_mul_float.S b/runtime/interpreter/mterp/mips/op_mul_float.S
new file mode 100644
index 0000000..abc9390
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_mul_float.S
@@ -0,0 +1 @@
+%include "mips/fbinop.S" {"instr":"mul.s fv0, fa0, fa1"}
diff --git a/runtime/interpreter/mterp/mips/op_mul_float_2addr.S b/runtime/interpreter/mterp/mips/op_mul_float_2addr.S
new file mode 100644
index 0000000..2469109
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_mul_float_2addr.S
@@ -0,0 +1 @@
+%include "mips/fbinop2addr.S" {"instr":"mul.s fv0, fa0, fa1"}
diff --git a/runtime/interpreter/mterp/mips/op_mul_int.S b/runtime/interpreter/mterp/mips/op_mul_int.S
new file mode 100644
index 0000000..266823c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_mul_int.S
@@ -0,0 +1 @@
+%include "mips/binop.S" {"instr":"mul a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_mul_int_2addr.S b/runtime/interpreter/mterp/mips/op_mul_int_2addr.S
new file mode 100644
index 0000000..b7dc5d3
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_mul_int_2addr.S
@@ -0,0 +1 @@
+%include "mips/binop2addr.S" {"instr":"mul a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_mul_int_lit16.S b/runtime/interpreter/mterp/mips/op_mul_int_lit16.S
new file mode 100644
index 0000000..fb4c8ec
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_mul_int_lit16.S
@@ -0,0 +1 @@
+%include "mips/binopLit16.S" {"instr":"mul a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_mul_int_lit8.S b/runtime/interpreter/mterp/mips/op_mul_int_lit8.S
new file mode 100644
index 0000000..6d2e7de
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_mul_int_lit8.S
@@ -0,0 +1 @@
+%include "mips/binopLit8.S" {"instr":"mul a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_mul_long.S b/runtime/interpreter/mterp/mips/op_mul_long.S
new file mode 100644
index 0000000..803bbec
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_mul_long.S
@@ -0,0 +1,43 @@
+    /*
+     * Signed 64-bit integer multiply.
+     *         a1   a0
+     *   x     a3   a2
+     *   -------------
+     *       a2a1 a2a0
+     *       a3a0
+     *  a3a1 (<= unused)
+     *  ---------------
+     *         v1   v0
+     */
+    /* mul-long vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    and       t0, a0, 255                  #  a2 <- BB
+    srl       t1, a0, 8                    #  a3 <- CC
+    EAS2(t0, rFP, t0)                      #  t0 <- &fp[BB]
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vBB/vBB+1
+
+    EAS2(t1, rFP, t1)                      #  t0 <- &fp[CC]
+    LOAD64(a2, a3, t1)                     #  a2/a3 <- vCC/vCC+1
+
+    mul       v1, a3, a0                   #  v1= a3a0
+#ifdef MIPS32REVGE6
+    mulu      v0, a2, a0                   #  v0= a2a0
+    muhu      t1, a2, a0
+#else
+    multu     a2, a0
+    mfhi      t1
+    mflo      v0                           #  v0= a2a0
+#endif
+    mul       t0, a2, a1                   #  t0= a2a1
+    addu      v1, v1, t1                   #  v1+= hi(a2a0)
+    addu      v1, v1, t0                   #  v1= a3a0 + a2a1;
+
+    GET_OPA(a0)                            #  a0 <- AA
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    b         .L${opcode}_finish
+%break
+
+.L${opcode}_finish:
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(v0, v1, a0)                 #  vAA::vAA+1 <- v0(low) :: v1(high)
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_mul_long_2addr.S b/runtime/interpreter/mterp/mips/op_mul_long_2addr.S
new file mode 100644
index 0000000..6950b71
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_mul_long_2addr.S
@@ -0,0 +1,31 @@
+    /*
+     * See op_mul_long.S for more details
+     */
+    /* mul-long/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+
+    EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
+    LOAD64(a0, a1, t0)                     #  vAA.low / high
+
+    GET_OPB(t1)                            #  t1 <- B
+    EAS2(t1, rFP, t1)                      #  t1 <- &fp[B]
+    LOAD64(a2, a3, t1)                     #  vBB.low / high
+
+    mul       v1, a3, a0                   #  v1= a3a0
+#ifdef MIPS32REVGE6
+    mulu      v0, a2, a0                   #  v0= a2a0
+    muhu      t1, a2, a0
+#else
+    multu     a2, a0
+    mfhi      t1
+    mflo      v0                           #  v0= a2a0
+ #endif
+    mul       t2, a2, a1                   #  t2= a2a1
+    addu      v1, v1, t1                   #  v1= a3a0 + hi(a2a0)
+    addu      v1, v1, t2                   #  v1= v1 + a2a1;
+
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    # vAA <- v0 (low)
+    SET_VREG64(v0, v1, rOBJ)               #  vAA+1 <- v1 (high)
+    GOTO_OPCODE(t1)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_neg_double.S b/runtime/interpreter/mterp/mips/op_neg_double.S
new file mode 100644
index 0000000..89cc918
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_neg_double.S
@@ -0,0 +1 @@
+%include "mips/unopWide.S" {"instr":"addu a1, a1, 0x80000000"}
diff --git a/runtime/interpreter/mterp/mips/op_neg_float.S b/runtime/interpreter/mterp/mips/op_neg_float.S
new file mode 100644
index 0000000..e702755
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_neg_float.S
@@ -0,0 +1 @@
+%include "mips/unop.S" {"instr":"addu a0, a0, 0x80000000"}
diff --git a/runtime/interpreter/mterp/mips/op_neg_int.S b/runtime/interpreter/mterp/mips/op_neg_int.S
new file mode 100644
index 0000000..4461731
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_neg_int.S
@@ -0,0 +1 @@
+%include "mips/unop.S" {"instr":"negu a0, a0"}
diff --git a/runtime/interpreter/mterp/mips/op_neg_long.S b/runtime/interpreter/mterp/mips/op_neg_long.S
new file mode 100644
index 0000000..71e60f5
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_neg_long.S
@@ -0,0 +1 @@
+%include "mips/unopWide.S" {"result0":"v0", "result1":"v1", "preinstr":"negu v0, a0", "instr":"negu v1, a1; sltu a0, zero, v0; subu v1, v1, a0"}
diff --git a/runtime/interpreter/mterp/mips/op_new_array.S b/runtime/interpreter/mterp/mips/op_new_array.S
new file mode 100644
index 0000000..4a6512d
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_new_array.S
@@ -0,0 +1,18 @@
+    /*
+     * Allocate an array of objects, specified with the array class
+     * and a count.
+     *
+     * The verifier guarantees that this is an array class, so we don't
+     * check for it here.
+     */
+    /* new-array vA, vB, class@CCCC */
+    EXPORT_PC()
+    addu   a0, rFP, OFF_FP_SHADOWFRAME
+    move   a1, rPC
+    move   a2, rINST
+    move   a3, rSELF
+    JAL(MterpNewArray)
+    beqz   v0, MterpPossibleException
+    FETCH_ADVANCE_INST(2)               # advance rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_new_instance.S b/runtime/interpreter/mterp/mips/op_new_instance.S
new file mode 100644
index 0000000..51a09b2
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_new_instance.S
@@ -0,0 +1,13 @@
+    /*
+     * Create a new instance of a class.
+     */
+    # new-instance vAA, class              /* BBBB */
+    EXPORT_PC()
+    addu   a0, rFP, OFF_FP_SHADOWFRAME
+    move   a1, rSELF
+    move   a2, rINST
+    JAL(MterpNewInstance)
+    beqz   v0, MterpPossibleException
+    FETCH_ADVANCE_INST(2)               # advance rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_nop.S b/runtime/interpreter/mterp/mips/op_nop.S
new file mode 100644
index 0000000..3565631
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_nop.S
@@ -0,0 +1,3 @@
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_not_int.S b/runtime/interpreter/mterp/mips/op_not_int.S
new file mode 100644
index 0000000..55d8cc1
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_not_int.S
@@ -0,0 +1 @@
+%include "mips/unop.S" {"instr":"not a0, a0"}
diff --git a/runtime/interpreter/mterp/mips/op_not_long.S b/runtime/interpreter/mterp/mips/op_not_long.S
new file mode 100644
index 0000000..9e7c95b
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_not_long.S
@@ -0,0 +1 @@
+%include "mips/unopWide.S" {"preinstr":"not a0, a0", "instr":"not a1, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_or_int.S b/runtime/interpreter/mterp/mips/op_or_int.S
new file mode 100644
index 0000000..c7ce760
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_or_int.S
@@ -0,0 +1 @@
+%include "mips/binop.S" {"instr":"or a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_or_int_2addr.S b/runtime/interpreter/mterp/mips/op_or_int_2addr.S
new file mode 100644
index 0000000..192d611
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_or_int_2addr.S
@@ -0,0 +1 @@
+%include "mips/binop2addr.S" {"instr":"or a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_or_int_lit16.S b/runtime/interpreter/mterp/mips/op_or_int_lit16.S
new file mode 100644
index 0000000..f4ef75f
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_or_int_lit16.S
@@ -0,0 +1 @@
+%include "mips/binopLit16.S" {"instr":"or a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_or_int_lit8.S b/runtime/interpreter/mterp/mips/op_or_int_lit8.S
new file mode 100644
index 0000000..f6212e2
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_or_int_lit8.S
@@ -0,0 +1 @@
+%include "mips/binopLit8.S" {"instr":"or a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_or_long.S b/runtime/interpreter/mterp/mips/op_or_long.S
new file mode 100644
index 0000000..0f94486
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_or_long.S
@@ -0,0 +1 @@
+%include "mips/binopWide.S" {"preinstr":"or a0, a0, a2", "instr":"or a1, a1, a3"}
diff --git a/runtime/interpreter/mterp/mips/op_or_long_2addr.S b/runtime/interpreter/mterp/mips/op_or_long_2addr.S
new file mode 100644
index 0000000..43c3d05
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_or_long_2addr.S
@@ -0,0 +1 @@
+%include "mips/binopWide2addr.S" {"preinstr":"or a0, a0, a2", "instr":"or a1, a1, a3"}
diff --git a/runtime/interpreter/mterp/mips/op_packed_switch.S b/runtime/interpreter/mterp/mips/op_packed_switch.S
new file mode 100644
index 0000000..ffa4f47
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_packed_switch.S
@@ -0,0 +1,21 @@
+%default { "func":"MterpDoPackedSwitch" }
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
+    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
+    GET_OPA(a3)                            #  a3 <- AA
+    sll       t0, a1, 16
+    or        a0, a0, t0                   #  a0 <- BBBBbbbb
+    GET_VREG(a1, a3)                       #  a1 <- vAA
+    EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
+    JAL($func)                             #  a0 <- code-unit branch offset
+    move      rINST, v0
+    b         MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips/op_rem_double.S b/runtime/interpreter/mterp/mips/op_rem_double.S
new file mode 100644
index 0000000..a6890a8
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_rem_double.S
@@ -0,0 +1 @@
+%include "mips/fbinopWide.S" {"instr":"JAL(fmod)"}
diff --git a/runtime/interpreter/mterp/mips/op_rem_double_2addr.S b/runtime/interpreter/mterp/mips/op_rem_double_2addr.S
new file mode 100644
index 0000000..a24e160
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_rem_double_2addr.S
@@ -0,0 +1 @@
+%include "mips/fbinopWide2addr.S" {"instr":"JAL(fmod)"}
diff --git a/runtime/interpreter/mterp/mips/op_rem_float.S b/runtime/interpreter/mterp/mips/op_rem_float.S
new file mode 100644
index 0000000..ac3d50c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_rem_float.S
@@ -0,0 +1 @@
+%include "mips/fbinop.S" {"instr":"JAL(fmodf)"}
diff --git a/runtime/interpreter/mterp/mips/op_rem_float_2addr.S b/runtime/interpreter/mterp/mips/op_rem_float_2addr.S
new file mode 100644
index 0000000..7f0a932
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_rem_float_2addr.S
@@ -0,0 +1 @@
+%include "mips/fbinop2addr.S" {"instr":"JAL(fmodf)"}
diff --git a/runtime/interpreter/mterp/mips/op_rem_int.S b/runtime/interpreter/mterp/mips/op_rem_int.S
new file mode 100644
index 0000000..c2a334a
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_rem_int.S
@@ -0,0 +1,5 @@
+#ifdef MIPS32REVGE6
+%include "mips/binop.S" {"instr":"mod a0, a0, a1", "chkzero":"1"}
+#else
+%include "mips/binop.S" {"preinstr":"div zero, a0, a1", "instr":"mfhi a0", "chkzero":"1"}
+#endif
diff --git a/runtime/interpreter/mterp/mips/op_rem_int_2addr.S b/runtime/interpreter/mterp/mips/op_rem_int_2addr.S
new file mode 100644
index 0000000..46c353f
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_rem_int_2addr.S
@@ -0,0 +1,5 @@
+#ifdef MIPS32REVGE6
+%include "mips/binop2addr.S" {"instr":"mod a0, a0, a1", "chkzero":"1"}
+#else
+%include "mips/binop2addr.S" {"preinstr":"div zero, a0, a1", "instr":"mfhi a0", "chkzero":"1"}
+#endif
diff --git a/runtime/interpreter/mterp/mips/op_rem_int_lit16.S b/runtime/interpreter/mterp/mips/op_rem_int_lit16.S
new file mode 100644
index 0000000..2894ad3
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_rem_int_lit16.S
@@ -0,0 +1,5 @@
+#ifdef MIPS32REVGE6
+%include "mips/binopLit16.S" {"instr":"mod a0, a0, a1", "chkzero":"1"}
+#else
+%include "mips/binopLit16.S" {"preinstr":"div zero, a0, a1", "instr":"mfhi a0", "chkzero":"1"}
+#endif
diff --git a/runtime/interpreter/mterp/mips/op_rem_int_lit8.S b/runtime/interpreter/mterp/mips/op_rem_int_lit8.S
new file mode 100644
index 0000000..582248b
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_rem_int_lit8.S
@@ -0,0 +1,5 @@
+#ifdef MIPS32REVGE6
+%include "mips/binopLit8.S" {"instr":"mod a0, a0, a1", "chkzero":"1"}
+#else
+%include "mips/binopLit8.S" {"preinstr":"div zero, a0, a1", "instr":"mfhi a0", "chkzero":"1"}
+#endif
diff --git a/runtime/interpreter/mterp/mips/op_rem_long.S b/runtime/interpreter/mterp/mips/op_rem_long.S
new file mode 100644
index 0000000..e3eb19b
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_rem_long.S
@@ -0,0 +1 @@
+%include "mips/binopWide.S" { "result0":"v0", "result1":"v1", "instr":"JAL(__moddi3)", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/mips/op_rem_long_2addr.S b/runtime/interpreter/mterp/mips/op_rem_long_2addr.S
new file mode 100644
index 0000000..8fc9fdb
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_rem_long_2addr.S
@@ -0,0 +1 @@
+%include "mips/binopWide2addr.S" { "result0":"v0", "result1":"v1", "instr":"JAL(__moddi3)", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/mips/op_return.S b/runtime/interpreter/mterp/mips/op_return.S
new file mode 100644
index 0000000..894ae18
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_return.S
@@ -0,0 +1,18 @@
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    JAL(MterpThreadFenceForConstructor)
+    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
+    move      a0, rSELF
+    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqz      ra, 1f
+    JAL(MterpSuspendCheck)                 # (self)
+1:
+    GET_OPA(a2)                            #  a2 <- AA
+    GET_VREG(v0, a2)                       #  v0 <- vAA
+    move      v1, zero
+    b         MterpReturn
diff --git a/runtime/interpreter/mterp/mips/op_return_object.S b/runtime/interpreter/mterp/mips/op_return_object.S
new file mode 100644
index 0000000..7350e00
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_return_object.S
@@ -0,0 +1 @@
+%include "mips/op_return.S"
diff --git a/runtime/interpreter/mterp/mips/op_return_void.S b/runtime/interpreter/mterp/mips/op_return_void.S
new file mode 100644
index 0000000..35c1326
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_return_void.S
@@ -0,0 +1,11 @@
+    .extern MterpThreadFenceForConstructor
+    JAL(MterpThreadFenceForConstructor)
+    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
+    move      a0, rSELF
+    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqz      ra, 1f
+    JAL(MterpSuspendCheck)                 # (self)
+1:
+    move      v0, zero
+    move      v1, zero
+    b         MterpReturn
diff --git a/runtime/interpreter/mterp/mips/op_return_void_no_barrier.S b/runtime/interpreter/mterp/mips/op_return_void_no_barrier.S
new file mode 100644
index 0000000..56968b5
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_return_void_no_barrier.S
@@ -0,0 +1,9 @@
+    lw     ra, THREAD_FLAGS_OFFSET(rSELF)
+    move   a0, rSELF
+    and    ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqz   ra, 1f
+    JAL(MterpSuspendCheck)                 # (self)
+1:
+    move   v0, zero
+    move   v1, zero
+    b      MterpReturn
diff --git a/runtime/interpreter/mterp/mips/op_return_wide.S b/runtime/interpreter/mterp/mips/op_return_wide.S
new file mode 100644
index 0000000..91d62bf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_return_wide.S
@@ -0,0 +1,16 @@
+    /*
+     * Return a 64-bit value.
+     */
+    /* return-wide vAA */
+    .extern MterpThreadFenceForConstructor
+    JAL(MterpThreadFenceForConstructor)
+    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
+    move      a0, rSELF
+    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqz      ra, 1f
+    JAL(MterpSuspendCheck)                 # (self)
+1:
+    GET_OPA(a2)                            #  a2 <- AA
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[AA]
+    LOAD64(v0, v1, a2)                     #  v0/v1 <- vAA/vAA+1
+    b         MterpReturn
diff --git a/runtime/interpreter/mterp/mips/op_rsub_int.S b/runtime/interpreter/mterp/mips/op_rsub_int.S
new file mode 100644
index 0000000..f7e61bb
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_rsub_int.S
@@ -0,0 +1,2 @@
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+%include "mips/binopLit16.S" {"instr":"subu a0, a1, a0"}
diff --git a/runtime/interpreter/mterp/mips/op_rsub_int_lit8.S b/runtime/interpreter/mterp/mips/op_rsub_int_lit8.S
new file mode 100644
index 0000000..3968a5e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_rsub_int_lit8.S
@@ -0,0 +1 @@
+%include "mips/binopLit8.S" {"instr":"subu a0, a1, a0"}
diff --git a/runtime/interpreter/mterp/mips/op_sget.S b/runtime/interpreter/mterp/mips/op_sget.S
new file mode 100644
index 0000000..3efcfbb
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sget.S
@@ -0,0 +1,25 @@
+%default { "is_object":"0", "helper":"artGet32StaticFromCode" }
+    /*
+     * General SGET handler.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    # op vAA, field                        /* BBBB */
+    .extern $helper
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref BBBB
+    lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
+    move  a2, rSELF                        # a2 <- self
+    JAL($helper)
+    lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA(a2)                            # a2 <- AA
+    PREFETCH_INST(2)
+    bnez  a3, MterpException               # bail out
+.if $is_object
+    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
+.else
+    SET_VREG(v0, a2)                       # fp[AA] <- v0
+.endif
+    ADVANCE(2)
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_sget_boolean.S b/runtime/interpreter/mterp/mips/op_sget_boolean.S
new file mode 100644
index 0000000..45a5a70
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sget_boolean.S
@@ -0,0 +1 @@
+%include "mips/op_sget.S" {"helper":"artGetBooleanStaticFromCode"}
diff --git a/runtime/interpreter/mterp/mips/op_sget_byte.S b/runtime/interpreter/mterp/mips/op_sget_byte.S
new file mode 100644
index 0000000..319122c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sget_byte.S
@@ -0,0 +1 @@
+%include "mips/op_sget.S" {"helper":"artGetByteStaticFromCode"}
diff --git a/runtime/interpreter/mterp/mips/op_sget_char.S b/runtime/interpreter/mterp/mips/op_sget_char.S
new file mode 100644
index 0000000..7103847
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sget_char.S
@@ -0,0 +1 @@
+%include "mips/op_sget.S" {"helper":"artGetCharStaticFromCode"}
diff --git a/runtime/interpreter/mterp/mips/op_sget_object.S b/runtime/interpreter/mterp/mips/op_sget_object.S
new file mode 100644
index 0000000..b205f51
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sget_object.S
@@ -0,0 +1 @@
+%include "mips/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
diff --git a/runtime/interpreter/mterp/mips/op_sget_short.S b/runtime/interpreter/mterp/mips/op_sget_short.S
new file mode 100644
index 0000000..3301823
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sget_short.S
@@ -0,0 +1 @@
+%include "mips/op_sget.S" {"helper":"artGetShortStaticFromCode"}
diff --git a/runtime/interpreter/mterp/mips/op_sget_wide.S b/runtime/interpreter/mterp/mips/op_sget_wide.S
new file mode 100644
index 0000000..7aee386
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sget_wide.S
@@ -0,0 +1,17 @@
+    /*
+     * 64-bit SGET handler.
+     */
+    # sget-wide vAA, field                 /* BBBB */
+    .extern artGet64StaticFromCode
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref BBBB
+    lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
+    move  a2, rSELF                        # a2 <- self
+    JAL(artGet64StaticFromCode)
+    lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    bnez  a3, MterpException
+    GET_OPA(a1)                            # a1 <- AA
+    FETCH_ADVANCE_INST(2)                  # advance rPC, load rINST
+    SET_VREG64(v0, v1, a1)                 # vAA/vAA+1 <- v0/v1
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_shl_int.S b/runtime/interpreter/mterp/mips/op_shl_int.S
new file mode 100644
index 0000000..15cbe94
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_shl_int.S
@@ -0,0 +1 @@
+%include "mips/binop.S" {"instr":"sll a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_shl_int_2addr.S b/runtime/interpreter/mterp/mips/op_shl_int_2addr.S
new file mode 100644
index 0000000..ef9bd65
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_shl_int_2addr.S
@@ -0,0 +1 @@
+%include "mips/binop2addr.S" {"instr":"sll a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_shl_int_lit8.S b/runtime/interpreter/mterp/mips/op_shl_int_lit8.S
new file mode 100644
index 0000000..d2afb53
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_shl_int_lit8.S
@@ -0,0 +1 @@
+%include "mips/binopLit8.S" {"instr":"sll a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_shl_long.S b/runtime/interpreter/mterp/mips/op_shl_long.S
new file mode 100644
index 0000000..0121669
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_shl_long.S
@@ -0,0 +1,31 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* shl-long vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(t2)                            #  t2 <- AA
+    and       a3, a0, 255                  #  a3 <- BB
+    srl       a0, a0, 8                    #  a0 <- CC
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[BB]
+    GET_VREG(a2, a0)                       #  a2 <- vCC
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vBB/vBB+1
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+
+    andi    v1, a2, 0x20                   #  shift< shift & 0x20
+    sll     v0, a0, a2                     #  rlo<- alo << (shift&31)
+    bnez    v1, .L${opcode}_finish
+    not     v1, a2                         #  rhi<- 31-shift  (shift is 5b)
+    srl     a0, 1
+    srl     a0, v1                         #  alo<- alo >> (32-(shift&31))
+    sll     v1, a1, a2                     #  rhi<- ahi << (shift&31)
+    or      v1, a0                         #  rhi<- rhi | alo
+    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- a0/a1
+%break
+
+.L${opcode}_finish:
+    SET_VREG64_GOTO(zero, v0, t2, t0)      #  vAA/vAA+1 <- rlo/rhi
diff --git a/runtime/interpreter/mterp/mips/op_shl_long_2addr.S b/runtime/interpreter/mterp/mips/op_shl_long_2addr.S
new file mode 100644
index 0000000..8ce6058
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_shl_long_2addr.S
@@ -0,0 +1,27 @@
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* shl-long/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a2, a3)                       #  a2 <- vB
+    EAS2(t2, rFP, rOBJ)                    #  t2 <- &fp[A]
+    LOAD64(a0, a1, t2)                     #  a0/a1 <- vAA/vAA+1
+
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+
+    andi    v1, a2, 0x20                   #  shift< shift & 0x20
+    sll     v0, a0, a2                     #  rlo<- alo << (shift&31)
+    bnez    v1, .L${opcode}_finish
+    not     v1, a2                         #  rhi<- 31-shift  (shift is 5b)
+    srl     a0, 1
+    srl     a0, v1                         #  alo<- alo >> (32-(shift&31))
+    sll     v1, a1, a2                     #  rhi<- ahi << (shift&31)
+    or      v1, a0                         #  rhi<- rhi | alo
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)      #  vAA/vAA+1 <- a0/a1
+%break
+
+.L${opcode}_finish:
+    SET_VREG64_GOTO(zero, v0, rOBJ, t0)    #  vAA/vAA+1 <- rlo/rhi
diff --git a/runtime/interpreter/mterp/mips/op_shr_int.S b/runtime/interpreter/mterp/mips/op_shr_int.S
new file mode 100644
index 0000000..6110839
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_shr_int.S
@@ -0,0 +1 @@
+%include "mips/binop.S" {"instr":"sra a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_shr_int_2addr.S b/runtime/interpreter/mterp/mips/op_shr_int_2addr.S
new file mode 100644
index 0000000..e00ff5b
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_shr_int_2addr.S
@@ -0,0 +1 @@
+%include "mips/binop2addr.S" {"instr":"sra a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_shr_int_lit8.S b/runtime/interpreter/mterp/mips/op_shr_int_lit8.S
new file mode 100644
index 0000000..d058f58
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_shr_int_lit8.S
@@ -0,0 +1 @@
+%include "mips/binopLit8.S" {"instr":"sra a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_shr_long.S b/runtime/interpreter/mterp/mips/op_shr_long.S
new file mode 100644
index 0000000..4c42758
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_shr_long.S
@@ -0,0 +1,31 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* shr-long vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(t3)                            #  t3 <- AA
+    and       a3, a0, 255                  #  a3 <- BB
+    srl       a0, a0, 8                    #  a0 <- CC
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[BB]
+    GET_VREG(a2, a0)                       #  a2 <- vCC
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vBB/vBB+1
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+
+    andi    v0, a2, 0x20                   #  shift & 0x20
+    sra     v1, a1, a2                     #  rhi<- ahi >> (shift&31)
+    bnez    v0, .L${opcode}_finish
+    srl     v0, a0, a2                     #  rlo<- alo >> (shift&31)
+    not     a0, a2                         #  alo<- 31-shift (shift is 5b)
+    sll     a1, 1
+    sll     a1, a0                         #  ahi<- ahi << (32-(shift&31))
+    or      v0, a1                         #  rlo<- rlo | ahi
+    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/VAA+1 <- v0/v0
+%break
+
+.L${opcode}_finish:
+    sra     a3, a1, 31                     #  a3<- sign(ah)
+    SET_VREG64_GOTO(v1, a3, t3, t0)        #  vAA/VAA+1 <- rlo/rhi
diff --git a/runtime/interpreter/mterp/mips/op_shr_long_2addr.S b/runtime/interpreter/mterp/mips/op_shr_long_2addr.S
new file mode 100644
index 0000000..3adc085
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_shr_long_2addr.S
@@ -0,0 +1,27 @@
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* shr-long/2addr vA, vB */
+    GET_OPA4(t2)                           #  t2 <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a2, a3)                       #  a2 <- vB
+    EAS2(t0, rFP, t2)                      #  t0 <- &fp[A]
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vAA/vAA+1
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+
+    andi    v0, a2, 0x20                   #  shift & 0x20
+    sra     v1, a1, a2                     #  rhi<- ahi >> (shift&31)
+    bnez    v0, .L${opcode}_finish
+    srl     v0, a0, a2                     #  rlo<- alo >> (shift&31)
+    not     a0, a2                         #  alo<- 31-shift (shift is 5b)
+    sll     a1, 1
+    sll     a1, a0                         #  ahi<- ahi << (32-(shift&31))
+    or      v0, a1                         #  rlo<- rlo | ahi
+    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- a0/a1
+%break
+
+.L${opcode}_finish:
+    sra     a3, a1, 31                     #  a3<- sign(ah)
+    SET_VREG64_GOTO(v1, a3, t2, t0)        #  vAA/vAA+1 <- rlo/rhi
diff --git a/runtime/interpreter/mterp/mips/op_sparse_switch.S b/runtime/interpreter/mterp/mips/op_sparse_switch.S
new file mode 100644
index 0000000..670f464
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sparse_switch.S
@@ -0,0 +1 @@
+%include "mips/op_packed_switch.S" { "func":"MterpDoSparseSwitch" }
diff --git a/runtime/interpreter/mterp/mips/op_sput.S b/runtime/interpreter/mterp/mips/op_sput.S
new file mode 100644
index 0000000..ee313b9
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sput.S
@@ -0,0 +1,19 @@
+%default { "helper":"artSet32StaticFromCode"}
+    /*
+     * General SPUT handler.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    # op vAA, field                        /* BBBB */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref BBBB
+    GET_OPA(a3)                            # a3 <- AA
+    GET_VREG(a1, a3)                       # a1 <- fp[AA], the object pointer
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
+    move  a3, rSELF                        # a3 <- self
+    PREFETCH_INST(2)                       # load rINST
+    JAL($helper)
+    bnez  v0, MterpException               # bail out
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_sput_boolean.S b/runtime/interpreter/mterp/mips/op_sput_boolean.S
new file mode 100644
index 0000000..7909ef5
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sput_boolean.S
@@ -0,0 +1 @@
+%include "mips/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/mips/op_sput_byte.S b/runtime/interpreter/mterp/mips/op_sput_byte.S
new file mode 100644
index 0000000..7909ef5
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sput_byte.S
@@ -0,0 +1 @@
+%include "mips/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/mips/op_sput_char.S b/runtime/interpreter/mterp/mips/op_sput_char.S
new file mode 100644
index 0000000..188195c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sput_char.S
@@ -0,0 +1 @@
+%include "mips/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/mips/op_sput_object.S b/runtime/interpreter/mterp/mips/op_sput_object.S
new file mode 100644
index 0000000..4f9034e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sput_object.S
@@ -0,0 +1,16 @@
+    /*
+     * General 32-bit SPUT handler.
+     *
+     * for: sput-object,
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC()
+    addu   a0, rFP, OFF_FP_SHADOWFRAME
+    move   a1, rPC
+    move   a2, rINST
+    move   a3, rSELF
+    JAL(MterpSputObject)
+    beqz   v0, MterpException
+    FETCH_ADVANCE_INST(2)               # advance rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_sput_short.S b/runtime/interpreter/mterp/mips/op_sput_short.S
new file mode 100644
index 0000000..188195c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sput_short.S
@@ -0,0 +1 @@
+%include "mips/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/mips/op_sput_wide.S b/runtime/interpreter/mterp/mips/op_sput_wide.S
new file mode 100644
index 0000000..1e11466
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sput_wide.S
@@ -0,0 +1,17 @@
+    /*
+     * 64-bit SPUT handler.
+     */
+    # sput-wide vAA, field                 /* BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
+    GET_OPA(a2)                            # a2 <- AA
+    EAS2(a2, rFP, a2)                      # a2 <- &fp[AA]
+    move  a3, rSELF                        # a3 <- self
+    PREFETCH_INST(2)                       # load rINST
+    JAL(artSet64IndirectStaticFromMterp)
+    bnez  v0, MterpException               # bail out
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_sub_double.S b/runtime/interpreter/mterp/mips/op_sub_double.S
new file mode 100644
index 0000000..9473218
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sub_double.S
@@ -0,0 +1 @@
+%include "mips/fbinopWide.S" {"instr":"sub.d fv0, fa0, fa1"}
diff --git a/runtime/interpreter/mterp/mips/op_sub_double_2addr.S b/runtime/interpreter/mterp/mips/op_sub_double_2addr.S
new file mode 100644
index 0000000..7ce7c74
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sub_double_2addr.S
@@ -0,0 +1 @@
+%include "mips/fbinopWide2addr.S" {"instr":"sub.d fv0, fa0, fa1"}
diff --git a/runtime/interpreter/mterp/mips/op_sub_float.S b/runtime/interpreter/mterp/mips/op_sub_float.S
new file mode 100644
index 0000000..04650d9
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sub_float.S
@@ -0,0 +1 @@
+%include "mips/fbinop.S" {"instr":"sub.s fv0, fa0, fa1"}
diff --git a/runtime/interpreter/mterp/mips/op_sub_float_2addr.S b/runtime/interpreter/mterp/mips/op_sub_float_2addr.S
new file mode 100644
index 0000000..dfe935c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sub_float_2addr.S
@@ -0,0 +1 @@
+%include "mips/fbinop2addr.S" {"instr":"sub.s fv0, fa0, fa1"}
diff --git a/runtime/interpreter/mterp/mips/op_sub_int.S b/runtime/interpreter/mterp/mips/op_sub_int.S
new file mode 100644
index 0000000..43da1b6
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sub_int.S
@@ -0,0 +1 @@
+%include "mips/binop.S" {"instr":"subu a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_sub_int_2addr.S b/runtime/interpreter/mterp/mips/op_sub_int_2addr.S
new file mode 100644
index 0000000..cf34aa6
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sub_int_2addr.S
@@ -0,0 +1 @@
+%include "mips/binop2addr.S" {"instr":"subu a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_sub_long.S b/runtime/interpreter/mterp/mips/op_sub_long.S
new file mode 100644
index 0000000..0f58e8e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sub_long.S
@@ -0,0 +1,8 @@
+/*
+ * For little endian the code sequence looks as follows:
+ *    subu    v0,a0,a2
+ *    subu    v1,a1,a3
+ *    sltu    a0,a0,v0
+ *    subu    v1,v1,a0
+ */
+%include "mips/binopWide.S" { "result0":"v0", "result1":"v1", "preinstr":"subu v0, a0, a2", "instr":"subu v1, a1, a3; sltu a0, a0, v0; subu v1, v1, a0" }
diff --git a/runtime/interpreter/mterp/mips/op_sub_long_2addr.S b/runtime/interpreter/mterp/mips/op_sub_long_2addr.S
new file mode 100644
index 0000000..aa256c2
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_sub_long_2addr.S
@@ -0,0 +1,4 @@
+/*
+ * See op_sub_long.S for more details
+ */
+%include "mips/binopWide2addr.S" { "result0":"v0", "result1":"v1", "preinstr":"subu v0, a0, a2", "instr":"subu v1, a1, a3; sltu a0, a0, v0; subu v1, v1, a0" }
diff --git a/runtime/interpreter/mterp/mips/op_throw.S b/runtime/interpreter/mterp/mips/op_throw.S
new file mode 100644
index 0000000..adc8b04
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_throw.S
@@ -0,0 +1,11 @@
+    /*
+     * Throw an exception object in the current thread.
+     */
+    /* throw vAA */
+    EXPORT_PC()                              #  exception handler can throw
+    GET_OPA(a2)                              #  a2 <- AA
+    GET_VREG(a1, a2)                         #  a1 <- vAA (exception object)
+    # null object?
+    beqz  a1, common_errNullObject           #  yes, throw an NPE instead
+    sw    a1, THREAD_EXCEPTION_OFFSET(rSELF) #  thread->exception <- obj
+    b         MterpException
diff --git a/runtime/interpreter/mterp/mips/op_unused_3e.S b/runtime/interpreter/mterp/mips/op_unused_3e.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_3e.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_3f.S b/runtime/interpreter/mterp/mips/op_unused_3f.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_3f.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_40.S b/runtime/interpreter/mterp/mips/op_unused_40.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_40.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_41.S b/runtime/interpreter/mterp/mips/op_unused_41.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_41.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_42.S b/runtime/interpreter/mterp/mips/op_unused_42.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_42.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_43.S b/runtime/interpreter/mterp/mips/op_unused_43.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_43.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_73.S b/runtime/interpreter/mterp/mips/op_unused_73.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_73.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_79.S b/runtime/interpreter/mterp/mips/op_unused_79.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_79.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_7a.S b/runtime/interpreter/mterp/mips/op_unused_7a.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_7a.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_f3.S b/runtime/interpreter/mterp/mips/op_unused_f3.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_f3.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_f4.S b/runtime/interpreter/mterp/mips/op_unused_f4.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_f4.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_f5.S b/runtime/interpreter/mterp/mips/op_unused_f5.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_f5.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_f6.S b/runtime/interpreter/mterp/mips/op_unused_f6.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_f6.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_f7.S b/runtime/interpreter/mterp/mips/op_unused_f7.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_f7.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_f8.S b/runtime/interpreter/mterp/mips/op_unused_f8.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_f8.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_f9.S b/runtime/interpreter/mterp/mips/op_unused_f9.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_f9.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_fa.S b/runtime/interpreter/mterp/mips/op_unused_fa.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_fa.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_fb.S b/runtime/interpreter/mterp/mips/op_unused_fb.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_fb.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_fc.S b/runtime/interpreter/mterp/mips/op_unused_fc.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_fc.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_fd.S b/runtime/interpreter/mterp/mips/op_unused_fd.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_fd.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_fe.S b/runtime/interpreter/mterp/mips/op_unused_fe.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_fe.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_ff.S b/runtime/interpreter/mterp/mips/op_unused_ff.S
new file mode 100644
index 0000000..99ef3cf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_unused_ff.S
@@ -0,0 +1 @@
+%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_ushr_int.S b/runtime/interpreter/mterp/mips/op_ushr_int.S
new file mode 100644
index 0000000..b95472b
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_ushr_int.S
@@ -0,0 +1 @@
+%include "mips/binop.S" {"instr":"srl a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_ushr_int_2addr.S b/runtime/interpreter/mterp/mips/op_ushr_int_2addr.S
new file mode 100644
index 0000000..fc17778
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_ushr_int_2addr.S
@@ -0,0 +1 @@
+%include "mips/binop2addr.S" {"instr":"srl a0, a0, a1 "}
diff --git a/runtime/interpreter/mterp/mips/op_ushr_int_lit8.S b/runtime/interpreter/mterp/mips/op_ushr_int_lit8.S
new file mode 100644
index 0000000..c82cfba
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_ushr_int_lit8.S
@@ -0,0 +1 @@
+%include "mips/binopLit8.S" {"instr":"srl a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_ushr_long.S b/runtime/interpreter/mterp/mips/op_ushr_long.S
new file mode 100644
index 0000000..2e227a9
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_ushr_long.S
@@ -0,0 +1,31 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* ushr-long vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a3, a0, 255                  #  a3 <- BB
+    srl       a0, a0, 8                    #  a0 <- CC
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[BB]
+    GET_VREG(a2, a0)                       #  a2 <- vCC
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vBB/vBB+1
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+
+    andi      v0, a2, 0x20                 #  shift & 0x20
+    srl       v1, a1, a2                   #  rhi<- ahi >> (shift&31)
+    bnez      v0, .L${opcode}_finish
+    srl       v0, a0, a2                   #  rlo<- alo >> (shift&31)
+    not       a0, a2                       #  alo<- 31-n  (shift is 5b)
+    sll       a1, 1
+    sll       a1, a0                       #  ahi<- ahi << (32-(shift&31))
+    or        v0, a1                       #  rlo<- rlo | ahi
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)      #  vAA/vAA+1 <- v0/v1
+%break
+
+.L${opcode}_finish:
+    SET_VREG64_GOTO(v1, zero, rOBJ, t0)    #  vAA/vAA+1 <- rlo/rhi
diff --git a/runtime/interpreter/mterp/mips/op_ushr_long_2addr.S b/runtime/interpreter/mterp/mips/op_ushr_long_2addr.S
new file mode 100644
index 0000000..ccf1f7e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_ushr_long_2addr.S
@@ -0,0 +1,27 @@
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* ushr-long/2addr vA, vB */
+    GET_OPA4(t3)                           #  t3 <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a2, a3)                       #  a2 <- vB
+    EAS2(t0, rFP, t3)                      #  t0 <- &fp[A]
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vAA/vAA+1
+
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+
+    andi      v0, a2, 0x20                 #  shift & 0x20
+    srl       v1, a1, a2                   #  rhi<- ahi >> (shift&31)
+    bnez      v0, .L${opcode}_finish
+    srl       v0, a0, a2                   #  rlo<- alo >> (shift&31)
+    not       a0, a2                       #  alo<- 31-n  (shift is 5b)
+    sll       a1, 1
+    sll       a1, a0                       #  ahi<- ahi << (32-(shift&31))
+    or        v0, a1                       #  rlo<- rlo | ahi
+    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/vAA+1 <- a0/a1
+%break
+
+.L${opcode}_finish:
+    SET_VREG64_GOTO(v1, zero, t3, t0)      #  vAA/vAA+1 <- rlo/rhi
diff --git a/runtime/interpreter/mterp/mips/op_xor_int.S b/runtime/interpreter/mterp/mips/op_xor_int.S
new file mode 100644
index 0000000..6c23f1f
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_xor_int.S
@@ -0,0 +1 @@
+%include "mips/binop.S" {"instr":"xor a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_xor_int_2addr.S b/runtime/interpreter/mterp/mips/op_xor_int_2addr.S
new file mode 100644
index 0000000..5ee1667
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_xor_int_2addr.S
@@ -0,0 +1 @@
+%include "mips/binop2addr.S" {"instr":"xor a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_xor_int_lit16.S b/runtime/interpreter/mterp/mips/op_xor_int_lit16.S
new file mode 100644
index 0000000..2af37a6
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_xor_int_lit16.S
@@ -0,0 +1 @@
+%include "mips/binopLit16.S" {"instr":"xor a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_xor_int_lit8.S b/runtime/interpreter/mterp/mips/op_xor_int_lit8.S
new file mode 100644
index 0000000..944ed69
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_xor_int_lit8.S
@@ -0,0 +1 @@
+%include "mips/binopLit8.S" {"instr":"xor a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips/op_xor_long.S b/runtime/interpreter/mterp/mips/op_xor_long.S
new file mode 100644
index 0000000..93f8f70
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_xor_long.S
@@ -0,0 +1 @@
+%include "mips/binopWide.S" {"preinstr":"xor a0, a0, a2", "instr":"xor a1, a1, a3"}
diff --git a/runtime/interpreter/mterp/mips/op_xor_long_2addr.S b/runtime/interpreter/mterp/mips/op_xor_long_2addr.S
new file mode 100644
index 0000000..49f3fa4
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/op_xor_long_2addr.S
@@ -0,0 +1 @@
+%include "mips/binopWide2addr.S" {"preinstr":"xor a0, a0, a2", "instr":"xor a1, a1, a3"}
diff --git a/runtime/interpreter/mterp/mips/unop.S b/runtime/interpreter/mterp/mips/unop.S
new file mode 100644
index 0000000..52a8f0a
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/unop.S
@@ -0,0 +1,19 @@
+%default {"preinstr":"", "result0":"a0"}
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0".
+     * This could be a MIPS instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(t0)                           #  t0 <- A+
+    GET_VREG(a0, a3)                       #  a0 <- vB
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    $preinstr                              #  optional op
+    $instr                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    SET_VREG_GOTO($result0, t0, t1)        #  vAA <- result0
+    /* 9-10 instructions */
diff --git a/runtime/interpreter/mterp/mips/unopNarrower.S b/runtime/interpreter/mterp/mips/unopNarrower.S
new file mode 100644
index 0000000..9c38bad
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/unopNarrower.S
@@ -0,0 +1,24 @@
+%default {"load":"LOAD64_F(fa0, fa0f, a3)"}
+    /*
+     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op a0/a1", where
+     * "result" is a 32-bit quantity in a0.
+     *
+     * For: long-to-float, double-to-int, double-to-float
+     * If hard floating point support is available, use fa0 as the parameter,
+     * except for long-to-float opcode.
+     * (This would work for long-to-int, but that instruction is actually
+     * an exact match for OP_MOVE.)
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(rOBJ)                         #  t1 <- A+
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    $load
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    $instr
+
+.L${opcode}_set_vreg_f:
+    SET_VREG_F(fv0, rOBJ)                  #  vA <- result0
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/unopWide.S b/runtime/interpreter/mterp/mips/unopWide.S
new file mode 100644
index 0000000..fd25dff
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/unopWide.S
@@ -0,0 +1,20 @@
+%default {"preinstr":"", "result0":"a0", "result1":"a1"}
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0/a1".
+     * This could be MIPS instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double,
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vAA
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    $preinstr                              #  optional op
+    $instr                                 #  a0/a1 <- op, a2-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64($result0, $result1, rOBJ)   #  vAA <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 12-13 instructions */
diff --git a/runtime/interpreter/mterp/mips/unopWider.S b/runtime/interpreter/mterp/mips/unopWider.S
new file mode 100644
index 0000000..1c18837
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/unopWider.S
@@ -0,0 +1,19 @@
+%default {"preinstr":"", "result0":"a0", "result1":"a1"}
+    /*
+     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op a0", where
+     * "result" is a 64-bit quantity in a0/a1.
+     *
+     * For: int-to-long
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a0, a3)                       #  a0 <- vB
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    $preinstr                              #  optional op
+    $instr                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64($result0, $result1, rOBJ)   #  vA/vA+1 <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 10-11 instructions */
diff --git a/runtime/interpreter/mterp/mips/unused.S b/runtime/interpreter/mterp/mips/unused.S
new file mode 100644
index 0000000..ffa00be
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/unused.S
@@ -0,0 +1,4 @@
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
diff --git a/runtime/interpreter/mterp/mips/zcmp.S b/runtime/interpreter/mterp/mips/zcmp.S
new file mode 100644
index 0000000..8d3a198
--- /dev/null
+++ b/runtime/interpreter/mterp/mips/zcmp.S
@@ -0,0 +1,16 @@
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    GET_OPA(a0)                            #  a0 <- AA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
+    FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
+    b${condition} a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/alt_stub.S b/runtime/interpreter/mterp/mips64/alt_stub.S
new file mode 100644
index 0000000..12fa84d
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/alt_stub.S
@@ -0,0 +1,14 @@
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (${opnum} * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
diff --git a/runtime/interpreter/mterp/mips64/bincmp.S b/runtime/interpreter/mterp/mips64/bincmp.S
new file mode 100644
index 0000000..07b1210
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/bincmp.S
@@ -0,0 +1,20 @@
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform, e.g. for
+     * "if-le" you would use "le".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    .extern MterpProfileBranch
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    b${condition}c a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/binop.S b/runtime/interpreter/mterp/mips64/binop.S
new file mode 100644
index 0000000..fab48b7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/binop.S
@@ -0,0 +1,30 @@
+%default {"preinstr":"", "result":"a0", "chkzero":"0"}
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG a0, a2                     # a0 <- vBB
+    GET_VREG a1, a3                     # a1 <- vCC
+    .if $chkzero
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    $preinstr                           # optional op
+    $instr                              # $result <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG $result, a4                # vAA <- $result
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/binop2addr.S b/runtime/interpreter/mterp/mips64/binop2addr.S
new file mode 100644
index 0000000..1ae73f5
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/binop2addr.S
@@ -0,0 +1,30 @@
+%default {"preinstr":"", "result":"a0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    .if $chkzero
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    $preinstr                           # optional op
+    $instr                              # $result <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG $result, a2                # vA <- $result
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/binopLit16.S b/runtime/interpreter/mterp/mips64/binopLit16.S
new file mode 100644
index 0000000..9257758
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/binopLit16.S
@@ -0,0 +1,28 @@
+%default {"preinstr":"", "result":"a0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CCCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    lh      a1, 2(rPC)                  # a1 <- sign-extended CCCC
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB
+    .if $chkzero
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    $preinstr                           # optional op
+    $instr                              # $result <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG $result, a2                # vA <- $result
+    GOTO_OPCODE v0                      # jump to next instruction
+
diff --git a/runtime/interpreter/mterp/mips64/binopLit8.S b/runtime/interpreter/mterp/mips64/binopLit8.S
new file mode 100644
index 0000000..f4a0bba
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/binopLit8.S
@@ -0,0 +1,29 @@
+%default {"preinstr":"", "result":"a0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    lbu     a3, 2(rPC)                  # a3 <- BB
+    lb      a1, 3(rPC)                  # a1 <- sign-extended CC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG a0, a3                     # a0 <- vBB
+    .if $chkzero
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    $preinstr                           # optional op
+    $instr                              # $result <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG $result, a2                # vAA <- $result
+    GOTO_OPCODE v0                      # jump to next instruction
+
diff --git a/runtime/interpreter/mterp/mips64/binopWide.S b/runtime/interpreter/mterp/mips64/binopWide.S
new file mode 100644
index 0000000..732f0d6
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/binopWide.S
@@ -0,0 +1,30 @@
+%default {"preinstr":"", "result":"a0", "chkzero":"0"}
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_WIDE a0, a2                # a0 <- vBB
+    GET_VREG_WIDE a1, a3                # a1 <- vCC
+    .if $chkzero
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    $preinstr                           # optional op
+    $instr                              # $result <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE $result, a4           # vAA <- $result
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/binopWide2addr.S b/runtime/interpreter/mterp/mips64/binopWide2addr.S
new file mode 100644
index 0000000..45d8d82
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/binopWide2addr.S
@@ -0,0 +1,30 @@
+%default {"preinstr":"", "result":"a0", "chkzero":"0"}
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      rem-long/2addr, and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_WIDE a0, a2                # a0 <- vA
+    GET_VREG_WIDE a1, a3                # a1 <- vB
+    .if $chkzero
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    $preinstr                           # optional op
+    $instr                              # $result <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE $result, a2           # vA <- $result
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/entry.S b/runtime/interpreter/mterp/mips64/entry.S
new file mode 100644
index 0000000..cc48d45
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/entry.S
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Interpreter entry point.
+ */
+
+    .set    reorder
+
+    .text
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+    .balign 16
+/*
+ * On entry:
+ *  a0  Thread* self
+ *  a1  code_item
+ *  a2  ShadowFrame
+ *  a3  JValue* result_register
+ *
+ */
+ExecuteMterpImpl:
+    .cfi_startproc
+    .cpsetup t9, t8, ExecuteMterpImpl
+
+    .cfi_def_cfa sp, 0
+    daddu   sp, sp, -STACK_SIZE
+    .cfi_adjust_cfa_offset STACK_SIZE
+
+    sd      t8, STACK_OFFSET_GP(sp)
+    .cfi_rel_offset 28, STACK_OFFSET_GP
+    sd      ra, STACK_OFFSET_RA(sp)
+    .cfi_rel_offset 31, STACK_OFFSET_RA
+
+    sd      s0, STACK_OFFSET_S0(sp)
+    .cfi_rel_offset 16, STACK_OFFSET_S0
+    sd      s1, STACK_OFFSET_S1(sp)
+    .cfi_rel_offset 17, STACK_OFFSET_S1
+    sd      s2, STACK_OFFSET_S2(sp)
+    .cfi_rel_offset 18, STACK_OFFSET_S2
+    sd      s3, STACK_OFFSET_S3(sp)
+    .cfi_rel_offset 19, STACK_OFFSET_S3
+    sd      s4, STACK_OFFSET_S4(sp)
+    .cfi_rel_offset 20, STACK_OFFSET_S4
+    sd      s5, STACK_OFFSET_S5(sp)
+    .cfi_rel_offset 21, STACK_OFFSET_S5
+    sd      s6, STACK_OFFSET_S6(sp)
+    .cfi_rel_offset 22, STACK_OFFSET_S6
+
+    /* Remember the return register */
+    sd      a3, SHADOWFRAME_RESULT_REGISTER_OFFSET(a2)
+
+    /* Remember the code_item */
+    sd      a1, SHADOWFRAME_CODE_ITEM_OFFSET(a2)
+
+    /* set up "named" registers */
+    move    rSELF, a0
+    daddu   rFP, a2, SHADOWFRAME_VREGS_OFFSET
+    lw      v0, SHADOWFRAME_NUMBER_OF_VREGS_OFFSET(a2)
+    dlsa    rREFS, v0, rFP, 2
+    daddu   rPC, a1, CODEITEM_INSNS_OFFSET
+    lw      v0, SHADOWFRAME_DEX_PC_OFFSET(a2)
+    dlsa    rPC, v0, rPC, 1
+    EXPORT_PC
+
+    /* Starting ibase */
+    REFRESH_IBASE
+
+    /* Set up for backwards branches & osr profiling */
+    ld      a0, OFF_FP_METHOD(rFP)
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    jal     MterpSetUpHotnessCountdown
+    move    rPROFILE, v0                # Starting hotness countdown to rPROFILE
+
+    /* start executing the instruction at rPC */
+    FETCH_INST
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
+
+    /* NOTE: no fallthrough */
diff --git a/runtime/interpreter/mterp/mips64/fallback.S b/runtime/interpreter/mterp/mips64/fallback.S
new file mode 100644
index 0000000..560b994
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/fallback.S
@@ -0,0 +1,2 @@
+/* Transfer stub to alternate interpreter */
+    b       MterpFallback
diff --git a/runtime/interpreter/mterp/mips64/fbinop.S b/runtime/interpreter/mterp/mips64/fbinop.S
new file mode 100644
index 0000000..f19dd1c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/fbinop.S
@@ -0,0 +1,18 @@
+%default {}
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float.
+     * form: <op> f0, f0, f1
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_FLOAT f0, a2               # f0 <- vBB
+    GET_VREG_FLOAT f1, a3               # f1 <- vCC
+    $instr                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a4               # vAA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/fbinop2addr.S b/runtime/interpreter/mterp/mips64/fbinop2addr.S
new file mode 100644
index 0000000..2e2cd7e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/fbinop2addr.S
@@ -0,0 +1,17 @@
+%default {}
+    /*:
+     * Generic 32-bit "/2addr" floating-point operation.
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr.
+     * form: <op> f0, f0, f1
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_FLOAT f0, a2               # f0 <- vA
+    GET_VREG_FLOAT f1, a3               # f1 <- vB
+    $instr                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a2               # vA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/fbinopWide.S b/runtime/interpreter/mterp/mips64/fbinopWide.S
new file mode 100644
index 0000000..8915c94
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/fbinopWide.S
@@ -0,0 +1,18 @@
+%default {}
+    /*:
+     * Generic 64-bit floating-point operation.
+     *
+     * For: add-double, sub-double, mul-double, div-double.
+     * form: <op> f0, f0, f1
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_DOUBLE f0, a2              # f0 <- vBB
+    GET_VREG_DOUBLE f1, a3              # f1 <- vCC
+    $instr                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a4              # vAA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/fbinopWide2addr.S b/runtime/interpreter/mterp/mips64/fbinopWide2addr.S
new file mode 100644
index 0000000..a3f4eaa
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/fbinopWide2addr.S
@@ -0,0 +1,17 @@
+%default {}
+    /*:
+     * Generic 64-bit "/2addr" floating-point operation.
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr, div-double/2addr.
+     * form: <op> f0, f0, f1
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_DOUBLE f0, a2              # f0 <- vA
+    GET_VREG_DOUBLE f1, a3              # f1 <- vB
+    $instr                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a2              # vA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/fcmp.S b/runtime/interpreter/mterp/mips64/fcmp.S
new file mode 100644
index 0000000..2e1a3e4
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/fcmp.S
@@ -0,0 +1,32 @@
+%default {}
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * For: cmpl-float, cmpg-float
+     */
+    /* op vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_FLOAT f0, a2               # f0 <- vBB
+    GET_VREG_FLOAT f1, a3               # f1 <- vCC
+    cmp.eq.s f2, f0, f1
+    li      a0, 0
+    bc1nez  f2, 1f                      # done if vBB == vCC (ordered)
+    .if $gt_bias
+    cmp.lt.s f2, f0, f1
+    li      a0, -1
+    bc1nez  f2, 1f                      # done if vBB < vCC (ordered)
+    li      a0, 1                       # vBB > vCC or unordered
+    .else
+    cmp.lt.s f2, f1, f0
+    li      a0, 1
+    bc1nez  f2, 1f                      # done if vBB > vCC (ordered)
+    li      a0, -1                      # vBB < vCC or unordered
+    .endif
+1:
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                     # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/fcmpWide.S b/runtime/interpreter/mterp/mips64/fcmpWide.S
new file mode 100644
index 0000000..2a3a341
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/fcmpWide.S
@@ -0,0 +1,32 @@
+%default {}
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * For: cmpl-double, cmpg-double
+     */
+    /* op vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_DOUBLE f0, a2              # f0 <- vBB
+    GET_VREG_DOUBLE f1, a3              # f1 <- vCC
+    cmp.eq.d f2, f0, f1
+    li      a0, 0
+    bc1nez  f2, 1f                      # done if vBB == vCC (ordered)
+    .if $gt_bias
+    cmp.lt.d f2, f0, f1
+    li      a0, -1
+    bc1nez  f2, 1f                      # done if vBB < vCC (ordered)
+    li      a0, 1                       # vBB > vCC or unordered
+    .else
+    cmp.lt.d f2, f1, f0
+    li      a0, 1
+    bc1nez  f2, 1f                      # done if vBB > vCC (ordered)
+    li      a0, -1                      # vBB < vCC or unordered
+    .endif
+1:
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                     # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/fcvtFooter.S b/runtime/interpreter/mterp/mips64/fcvtFooter.S
new file mode 100644
index 0000000..06e9507
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/fcvtFooter.S
@@ -0,0 +1,18 @@
+    /*
+     * Stores a specified register containing the result of conversion
+     * from or to a floating-point type and jumps to the next instruction.
+     *
+     * Expects a1 to contain the destination Dalvik register number.
+     * a1 is set up by fcvtHeader.S.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     *
+     * Note that this file can't be included after a break in other files
+     * and in those files its contents appear as a copy.
+     * See: float-to-int, float-to-long, double-to-int, double-to-long.
+     */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG$suffix $valreg, a1
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/fcvtHeader.S b/runtime/interpreter/mterp/mips64/fcvtHeader.S
new file mode 100644
index 0000000..8742e42
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/fcvtHeader.S
@@ -0,0 +1,15 @@
+    /*
+     * Loads a specified register from vB. Used primarily for conversions
+     * from or to a floating-point type.
+     *
+     * Sets up a1 = A and a2 = B. a2 is later used by fcvtFooter.S to
+     * store the result in vA and jump to the next instruction.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     */
+    ext     a1, rINST, 8, 4             # a1 <- A
+    srl     a2, rINST, 12               # a2 <- B
+    GET_VREG$suffix $valreg, a2
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/mips64/footer.S b/runtime/interpreter/mterp/mips64/footer.S
new file mode 100644
index 0000000..9994169
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/footer.S
@@ -0,0 +1,282 @@
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+
+    .extern MterpLogDivideByZeroException
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    jal     MterpLogDivideByZeroException
+#endif
+    b       MterpCommonFallback
+
+    .extern MterpLogArrayIndexException
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    jal     MterpLogArrayIndexException
+#endif
+    b       MterpCommonFallback
+
+    .extern MterpLogNullObjectException
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    jal     MterpLogNullObjectException
+#endif
+    b       MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    ld      a0, THREAD_EXCEPTION_OFFSET(rSELF)
+    beqzc   a0, MterpFallback                       # If not, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+    .extern MterpHandleException
+    .extern MterpShouldSwitchInterpreters
+MterpException:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    jal     MterpHandleException                    # (self, shadow_frame)
+    beqzc   v0, MterpExceptionReturn                # no local catch, back to caller.
+    ld      a0, OFF_FP_CODE_ITEM(rFP)
+    lwu     a1, OFF_FP_DEX_PC(rFP)
+    REFRESH_IBASE
+    daddu   rPC, a0, CODEITEM_INSNS_OFFSET
+    dlsa    rPC, a1, rPC, 1                         # generate new dex_pc_ptr
+    /* Do we need to switch interpreters? */
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
+    /* resume execution at catch block */
+    EXPORT_PC
+    FETCH_INST
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
+    /* NOTE: no fallthrough */
+
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 64 bits)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
+ */
+MterpCommonTakenBranchNoFlags:
+    bgtzc   rINST, .L_forward_branch    # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+    li      v0, JIT_CHECK_OSR
+    beqc    rPROFILE, v0, .L_osr_check
+    bltc    rPROFILE, v0, .L_resume_backward_branch
+    dsubu   rPROFILE, 1
+    beqzc   rPROFILE, .L_add_batch      # counted down to zero - report
+.L_resume_backward_branch:
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    REFRESH_IBASE
+    daddu   a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB a2            # update rPC, load rINST
+    and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    bnezc   ra, .L_suspend_request_pending
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_suspend_request_pending:
+    EXPORT_PC
+    move    a0, rSELF
+    jal     MterpSuspendCheck           # (self)
+    bnezc   v0, MterpFallback
+    REFRESH_IBASE                       # might have changed during suspend
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_no_count_backwards:
+    li      v0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    bnec    rPROFILE, v0, .L_resume_backward_branch
+.L_osr_check:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC
+    jal MterpMaybeDoOnStackReplacement  # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    li      v0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    beqc    rPROFILE, v0, .L_check_osr_forward
+.L_resume_forward_branch:
+    daddu   a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB a2            # update rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_check_osr_forward:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC
+    jal     MterpMaybeDoOnStackReplacement # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    ld      a0, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    jal     MterpAddHotnessBatch        # (method, shadow_frame, self)
+    move    rPROFILE, v0                # restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    li      a2, 2
+    EXPORT_PC
+    jal     MterpMaybeDoOnStackReplacement # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2 
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST                               # rINST contains offset
+    jal     MterpLogOSR
+#endif
+    li      v0, 1                                   # Signal normal return
+    b       MterpDone
+
+/*
+ * Bail out to reference interpreter.
+ */
+    .extern MterpLogFallback
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    jal     MterpLogFallback
+#endif
+MterpCommonFallback:
+    li      v0, 0                                   # signal retry with reference interpreter.
+    b       MterpDone
+
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and RA.  Here we restore SP, restore the registers, and then restore
+ * RA to PC.
+ *
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    li      v0, 1                                   # signal return to caller.
+    b       MterpDone
+/*
+ * Returned value is expected in a0 and if it's not 64-bit, the 32 most
+ * significant bits of a0 must be 0.
+ */
+MterpReturn:
+    ld      a2, OFF_FP_RESULT_REGISTER(rFP)
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    sd      a0, 0(a2)
+    move    a0, rSELF
+    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqzc   ra, check2
+    jal     MterpSuspendCheck                       # (self)
+check2:
+    li      v0, 1                                   # signal return to caller.
+MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    blez    rPROFILE, .L_pop_and_return # if > 0, we may have some counts to report.
+
+MterpProfileActive:
+    move    rINST, v0                   # stash return value
+    /* Report cached hotness counts */
+    ld      a0, OFF_FP_METHOD(rFP)
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rSELF
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    jal     MterpAddHotnessBatch        # (method, shadow_frame, self)
+    move    v0, rINST                   # restore return value
+
+.L_pop_and_return:
+    ld      s6, STACK_OFFSET_S6(sp)
+    .cfi_restore 22
+    ld      s5, STACK_OFFSET_S5(sp)
+    .cfi_restore 21
+    ld      s4, STACK_OFFSET_S4(sp)
+    .cfi_restore 20
+    ld      s3, STACK_OFFSET_S3(sp)
+    .cfi_restore 19
+    ld      s2, STACK_OFFSET_S2(sp)
+    .cfi_restore 18
+    ld      s1, STACK_OFFSET_S1(sp)
+    .cfi_restore 17
+    ld      s0, STACK_OFFSET_S0(sp)
+    .cfi_restore 16
+
+    ld      ra, STACK_OFFSET_RA(sp)
+    .cfi_restore 31
+
+    ld      t8, STACK_OFFSET_GP(sp)
+    .cpreturn
+    .cfi_restore 28
+
+    .set    noreorder
+    jr      ra
+    daddu   sp, sp, STACK_SIZE
+    .cfi_adjust_cfa_offset -STACK_SIZE
+
+    .cfi_endproc
+    .set    reorder
+    .size ExecuteMterpImpl, .-ExecuteMterpImpl
diff --git a/runtime/interpreter/mterp/mips64/header.S b/runtime/interpreter/mterp/mips64/header.S
new file mode 100644
index 0000000..b67df20
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/header.S
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <machine/regdef.h>
+
+/* TODO: add the missing file and use its FP register definitions. */
+/* #include <machine/fpregdef.h> */
+/* FP register definitions */
+#define f0  $$f0
+#define f1  $$f1
+#define f2  $$f2
+#define f3  $$f3
+#define f12 $$f12
+#define f13 $$f13
+
+/*
+ * It looks like the GNU assembler currently does not support the blec and bgtc
+ * idioms, which should translate into bgec and bltc respectively with swapped
+ * left and right register operands.
+ * TODO: remove these macros when the assembler is fixed.
+ */
+.macro blec lreg, rreg, target
+    bgec    \rreg, \lreg, \target
+.endm
+.macro bgtc lreg, rreg, target
+    bltc    \rreg, \lreg, \target
+.endm
+
+/*
+Mterp and MIPS64 notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  s0  rPC       interpreted program counter, used for fetching instructions
+  s1  rFP       interpreted frame pointer, used for accessing locals and args
+  s2  rSELF     self (Thread) pointer
+  s3  rINST     first 16-bit code unit of current instruction
+  s4  rIBASE    interpreted instruction base pointer, used for computed goto
+  s5  rREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  s6  rPROFILE  jit profile hotness countdown
+*/
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rPC      s0
+#define rFP      s1
+#define rSELF    s2
+#define rINST    s3
+#define rIBASE   s4
+#define rREFS    s5
+#define rPROFILE s6
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
+
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    sd      rPC, OFF_FP_DEX_PC_PTR(rFP)
+.endm
+
+/*
+ * Refresh handler table.
+ */
+.macro REFRESH_IBASE
+    ld      rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINST.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    lhu     rINST, 0(rPC)
+.endm
+
+/* Advance rPC by some number of code units. */
+.macro ADVANCE count
+    daddu   rPC, rPC, (\count) * 2
+.endm
+
+/*
+ * Fetch the next instruction from an offset specified by _reg and advance xPC.
+ * xPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.  Must not set flags.
+ *
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    daddu   rPC, rPC, \reg
+    FETCH_INST
+.endm
+
+/*
+ * Fetch the next instruction from the specified offset.  Advances rPC
+ * to point to the next instruction.
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss.  (This also implies that it must come after
+ * EXPORT_PC.)
+ */
+.macro FETCH_ADVANCE_INST count
+    ADVANCE \count
+    FETCH_INST
+.endm
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update rPC.  Used to load
+ * rINST ahead of possible exception point.  Be sure to manually advance rPC
+ * later.
+ */
+.macro PREFETCH_INST count
+    lhu     rINST, ((\count) * 2)(rPC)
+.endm
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+.macro GET_INST_OPCODE reg
+    and     \reg, rINST, 255
+.endm
+
+/*
+ * Begin executing the opcode in _reg.
+ */
+.macro GOTO_OPCODE reg
+    .set noat
+    sll     AT, \reg, 7
+    daddu   AT, rIBASE, AT
+    jic     AT, 0
+    .set at
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ * Note, GET_VREG does sign extension to 64 bits while
+ * GET_VREG_U does zero extension to 64 bits.
+ * One is useful for arithmetic while the other is
+ * useful for storing the result value as 64-bit.
+ */
+.macro GET_VREG reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    lw      \reg, 0(AT)
+    .set at
+.endm
+.macro GET_VREG_U reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    lwu     \reg, 0(AT)
+    .set at
+.endm
+.macro GET_VREG_FLOAT reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    lwc1    \reg, 0(AT)
+    .set at
+.endm
+.macro SET_VREG reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    sw      \reg, 0(AT)
+    dlsa    AT, \vreg, rREFS, 2
+    sw      zero, 0(AT)
+    .set at
+.endm
+.macro SET_VREG_OBJECT reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    sw      \reg, 0(AT)
+    dlsa    AT, \vreg, rREFS, 2
+    sw      \reg, 0(AT)
+    .set at
+.endm
+.macro SET_VREG_FLOAT reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    swc1    \reg, 0(AT)
+    dlsa    AT, \vreg, rREFS, 2
+    sw      zero, 0(AT)
+    .set at
+.endm
+
+/*
+ * Get/set the 64-bit value from a Dalvik register.
+ * Avoid unaligned memory accesses.
+ * Note, SET_VREG_WIDE clobbers the register containing the value being stored.
+ * Note, SET_VREG_DOUBLE clobbers the register containing the Dalvik register number.
+ */
+.macro GET_VREG_WIDE reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    lw      \reg, 0(AT)
+    lw      AT, 4(AT)
+    dinsu   \reg, AT, 32, 32
+    .set at
+.endm
+.macro GET_VREG_DOUBLE reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    lwc1    \reg, 0(AT)
+    lw      AT, 4(AT)
+    mthc1   AT, \reg
+    .set at
+.endm
+.macro SET_VREG_WIDE reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    sw      \reg, 0(AT)
+    drotr32 \reg, \reg, 0
+    sw      \reg, 4(AT)
+    dlsa    AT, \vreg, rREFS, 2
+    sw      zero, 0(AT)
+    sw      zero, 4(AT)
+    .set at
+.endm
+.macro SET_VREG_DOUBLE reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rREFS, 2
+    sw      zero, 0(AT)
+    sw      zero, 4(AT)
+    dlsa    AT, \vreg, rFP, 2
+    swc1    \reg, 0(AT)
+    mfhc1   \vreg, \reg
+    sw      \vreg, 4(AT)
+    .set at
+.endm
+
+/*
+ * On-stack offsets for spilling/unspilling callee-saved registers
+ * and the frame size.
+ */
+#define STACK_OFFSET_RA 0
+#define STACK_OFFSET_GP 8
+#define STACK_OFFSET_S0 16
+#define STACK_OFFSET_S1 24
+#define STACK_OFFSET_S2 32
+#define STACK_OFFSET_S3 40
+#define STACK_OFFSET_S4 48
+#define STACK_OFFSET_S5 56
+#define STACK_OFFSET_S6 64
+#define STACK_SIZE      80    /* needs 16 byte alignment */
+
+/* Constants for float/double_to_int/long conversions */
+#define INT_MIN             0x80000000
+#define INT_MIN_AS_FLOAT    0xCF000000
+#define INT_MIN_AS_DOUBLE   0xC1E0000000000000
+#define LONG_MIN            0x8000000000000000
+#define LONG_MIN_AS_FLOAT   0xDF000000
+#define LONG_MIN_AS_DOUBLE  0xC3E0000000000000
diff --git a/runtime/interpreter/mterp/mips64/invoke.S b/runtime/interpreter/mterp/mips64/invoke.S
new file mode 100644
index 0000000..be647b6
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/invoke.S
@@ -0,0 +1,20 @@
+%default { "helper":"UndefinedInvokeHandler" }
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern $helper
+    .extern MterpShouldSwitchInterpreters
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    jal     $helper
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
diff --git a/runtime/interpreter/mterp/mips64/op_add_double.S b/runtime/interpreter/mterp/mips64/op_add_double.S
new file mode 100644
index 0000000..1520e32
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_add_double.S
@@ -0,0 +1 @@
+%include "mips64/fbinopWide.S" {"instr":"add.d f0, f0, f1"}
diff --git a/runtime/interpreter/mterp/mips64/op_add_double_2addr.S b/runtime/interpreter/mterp/mips64/op_add_double_2addr.S
new file mode 100644
index 0000000..c14382e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_add_double_2addr.S
@@ -0,0 +1 @@
+%include "mips64/fbinopWide2addr.S" {"instr":"add.d f0, f0, f1"}
diff --git a/runtime/interpreter/mterp/mips64/op_add_float.S b/runtime/interpreter/mterp/mips64/op_add_float.S
new file mode 100644
index 0000000..c6ed558
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_add_float.S
@@ -0,0 +1 @@
+%include "mips64/fbinop.S" {"instr":"add.s f0, f0, f1"}
diff --git a/runtime/interpreter/mterp/mips64/op_add_float_2addr.S b/runtime/interpreter/mterp/mips64/op_add_float_2addr.S
new file mode 100644
index 0000000..4c20547
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_add_float_2addr.S
@@ -0,0 +1 @@
+%include "mips64/fbinop2addr.S" {"instr":"add.s f0, f0, f1"}
diff --git a/runtime/interpreter/mterp/mips64/op_add_int.S b/runtime/interpreter/mterp/mips64/op_add_int.S
new file mode 100644
index 0000000..6e569de
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_add_int.S
@@ -0,0 +1 @@
+%include "mips64/binop.S" {"instr":"addu a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_add_int_2addr.S b/runtime/interpreter/mterp/mips64/op_add_int_2addr.S
new file mode 100644
index 0000000..2a84124
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_add_int_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binop2addr.S" {"instr":"addu a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_add_int_lit16.S b/runtime/interpreter/mterp/mips64/op_add_int_lit16.S
new file mode 100644
index 0000000..94b053b
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_add_int_lit16.S
@@ -0,0 +1 @@
+%include "mips64/binopLit16.S" {"instr":"addu a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_add_int_lit8.S b/runtime/interpreter/mterp/mips64/op_add_int_lit8.S
new file mode 100644
index 0000000..3b6d734
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_add_int_lit8.S
@@ -0,0 +1 @@
+%include "mips64/binopLit8.S" {"instr":"addu a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_add_long.S b/runtime/interpreter/mterp/mips64/op_add_long.S
new file mode 100644
index 0000000..c8d702f
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_add_long.S
@@ -0,0 +1 @@
+%include "mips64/binopWide.S" {"instr":"daddu a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_add_long_2addr.S b/runtime/interpreter/mterp/mips64/op_add_long_2addr.S
new file mode 100644
index 0000000..928ff54
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_add_long_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binopWide2addr.S" {"instr":"daddu a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_aget.S b/runtime/interpreter/mterp/mips64/op_aget.S
new file mode 100644
index 0000000..0472a06
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_aget.S
@@ -0,0 +1,29 @@
+%default { "load":"lw", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    beqz    a0, common_errNullObject    # bail if null array object
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- arrayObj->length
+    .if $shift
+    # [d]lsa does not support shift count of 0.
+    dlsa    a0, a1, a0, $shift          # a0 <- arrayObj + index*width
+    .else
+    daddu   a0, a1, a0                  # a0 <- arrayObj + index*width
+    .endif
+    bgeu    a1, a3, common_errArrayIndex  # unsigned compare: index >= length, bail
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    $load   a2, $data_offset(a0)        # a2 <- vBB[vCC]
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a2, a4                     # vAA <- a2
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_aget_boolean.S b/runtime/interpreter/mterp/mips64/op_aget_boolean.S
new file mode 100644
index 0000000..d5be01b
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_aget_boolean.S
@@ -0,0 +1 @@
+%include "mips64/op_aget.S" { "load":"lbu", "shift":"0", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/mips64/op_aget_byte.S b/runtime/interpreter/mterp/mips64/op_aget_byte.S
new file mode 100644
index 0000000..084de8d
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_aget_byte.S
@@ -0,0 +1 @@
+%include "mips64/op_aget.S" { "load":"lb", "shift":"0", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/mips64/op_aget_char.S b/runtime/interpreter/mterp/mips64/op_aget_char.S
new file mode 100644
index 0000000..6c99ed5
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_aget_char.S
@@ -0,0 +1 @@
+%include "mips64/op_aget.S" { "load":"lhu", "shift":"1", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/mips64/op_aget_object.S b/runtime/interpreter/mterp/mips64/op_aget_object.S
new file mode 100644
index 0000000..6374a05
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_aget_object.S
@@ -0,0 +1,21 @@
+    /*
+     * Array object get.  vAA <- vBB[vCC].
+     *
+     * for: aget-object
+     */
+    /* op vAA, vBB, vCC */
+    .extern artAGetObjectFromMterp
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    EXPORT_PC
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    jal     artAGetObjectFromMterp      # (array, index)
+    ld      a1, THREAD_EXCEPTION_OFFSET(rSELF)
+    srl     a4, rINST, 8                # a4 <- AA
+    PREFETCH_INST 2
+    bnez    a1, MterpException
+    SET_VREG_OBJECT v0, a4              # vAA <- v0
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_aget_short.S b/runtime/interpreter/mterp/mips64/op_aget_short.S
new file mode 100644
index 0000000..0158b0a
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_aget_short.S
@@ -0,0 +1 @@
+%include "mips64/op_aget.S" { "load":"lh", "shift":"1", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/mips64/op_aget_wide.S b/runtime/interpreter/mterp/mips64/op_aget_wide.S
new file mode 100644
index 0000000..0945aca
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_aget_wide.S
@@ -0,0 +1,21 @@
+    /*
+     * Array get, 64 bits.  vAA <- vBB[vCC].
+     *
+     */
+    /* aget-wide vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    beqz    a0, common_errNullObject    # bail if null array object
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- arrayObj->length
+    dlsa    a0, a1, a0, 3               # a0 <- arrayObj + index*width
+    bgeu    a1, a3, common_errArrayIndex  # unsigned compare: index >= length, bail
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    lw      a2, MIRROR_WIDE_ARRAY_DATA_OFFSET(a0)
+    lw      a3, (MIRROR_WIDE_ARRAY_DATA_OFFSET+4)(a0)
+    dinsu   a2, a3, 32, 32              # a2 <- vBB[vCC]
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a2, a4                # vAA <- a2
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_and_int.S b/runtime/interpreter/mterp/mips64/op_and_int.S
new file mode 100644
index 0000000..f0792a8
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_and_int.S
@@ -0,0 +1 @@
+%include "mips64/binop.S" {"instr":"and a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_and_int_2addr.S b/runtime/interpreter/mterp/mips64/op_and_int_2addr.S
new file mode 100644
index 0000000..08dc615
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_and_int_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binop2addr.S" {"instr":"and a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_and_int_lit16.S b/runtime/interpreter/mterp/mips64/op_and_int_lit16.S
new file mode 100644
index 0000000..65d28ad
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_and_int_lit16.S
@@ -0,0 +1 @@
+%include "mips64/binopLit16.S" {"instr":"and a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_and_int_lit8.S b/runtime/interpreter/mterp/mips64/op_and_int_lit8.S
new file mode 100644
index 0000000..ab84bb7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_and_int_lit8.S
@@ -0,0 +1 @@
+%include "mips64/binopLit8.S" {"instr":"and a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_and_long.S b/runtime/interpreter/mterp/mips64/op_and_long.S
new file mode 100644
index 0000000..e383ba0
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_and_long.S
@@ -0,0 +1 @@
+%include "mips64/binopWide.S" {"instr":"and a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_and_long_2addr.S b/runtime/interpreter/mterp/mips64/op_and_long_2addr.S
new file mode 100644
index 0000000..f863bb9
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_and_long_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binopWide2addr.S" {"instr":"and a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_aput.S b/runtime/interpreter/mterp/mips64/op_aput.S
new file mode 100644
index 0000000..9bfda97
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_aput.S
@@ -0,0 +1,29 @@
+%default { "store":"sw", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    beqz    a0, common_errNullObject    # bail if null array object
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- arrayObj->length
+    .if $shift
+    # [d]lsa does not support shift count of 0.
+    dlsa    a0, a1, a0, $shift          # a0 <- arrayObj + index*width
+    .else
+    daddu   a0, a1, a0                  # a0 <- arrayObj + index*width
+    .endif
+    bgeu    a1, a3, common_errArrayIndex  # unsigned compare: index >= length, bail
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_VREG a2, a4                     # a2 <- vAA
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    $store  a2, $data_offset(a0)        # vBB[vCC] <- a2
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_aput_boolean.S b/runtime/interpreter/mterp/mips64/op_aput_boolean.S
new file mode 100644
index 0000000..6707a1f
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_aput_boolean.S
@@ -0,0 +1 @@
+%include "mips64/op_aput.S" { "store":"sb", "shift":"0", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/mips64/op_aput_byte.S b/runtime/interpreter/mterp/mips64/op_aput_byte.S
new file mode 100644
index 0000000..7b9ce48
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_aput_byte.S
@@ -0,0 +1 @@
+%include "mips64/op_aput.S" { "store":"sb", "shift":"0", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/mips64/op_aput_char.S b/runtime/interpreter/mterp/mips64/op_aput_char.S
new file mode 100644
index 0000000..82bc8f7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_aput_char.S
@@ -0,0 +1 @@
+%include "mips64/op_aput.S" { "store":"sh", "shift":"1", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/mips64/op_aput_object.S b/runtime/interpreter/mterp/mips64/op_aput_object.S
new file mode 100644
index 0000000..b132456
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_aput_object.S
@@ -0,0 +1,14 @@
+    /*
+     * Store an object into an array.  vBB[vCC] <- vAA.
+     */
+    /* op vAA, vBB, vCC */
+    .extern MterpAputObject
+    EXPORT_PC
+    daddu   a0, rFP, OFF_FP_SHADOWFRAME
+    move    a1, rPC
+    move    a2, rINST
+    jal     MterpAputObject
+    beqzc   v0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_aput_short.S b/runtime/interpreter/mterp/mips64/op_aput_short.S
new file mode 100644
index 0000000..a7af294
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_aput_short.S
@@ -0,0 +1 @@
+%include "mips64/op_aput.S" { "store":"sh", "shift":"1", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/mips64/op_aput_wide.S b/runtime/interpreter/mterp/mips64/op_aput_wide.S
new file mode 100644
index 0000000..a1d7a3b
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_aput_wide.S
@@ -0,0 +1,21 @@
+    /*
+     * Array put, 64 bits.  vBB[vCC] <- vAA.
+     *
+     */
+    /* aput-wide vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    beqz    a0, common_errNullObject    # bail if null array object
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- arrayObj->length
+    dlsa    a0, a1, a0, 3               # a0 <- arrayObj + index*width
+    bgeu    a1, a3, common_errArrayIndex  # unsigned compare: index >= length, bail
+    GET_VREG_WIDE a2, a4                # a2 <- vAA
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    sw      a2, MIRROR_WIDE_ARRAY_DATA_OFFSET(a0)
+    dsrl32  a2, a2, 0
+    sw      a2, (MIRROR_WIDE_ARRAY_DATA_OFFSET+4)(a0)  # vBB[vCC] <- a2
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_array_length.S b/runtime/interpreter/mterp/mips64/op_array_length.S
new file mode 100644
index 0000000..2d9e172
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_array_length.S
@@ -0,0 +1,12 @@
+    /*
+     * Return the length of an array.
+     */
+    srl     a1, rINST, 12               # a1 <- B
+    GET_VREG_U a0, a1                   # a0 <- vB (object ref)
+    ext     a2, rINST, 8, 4             # a2 <- A
+    beqz    a0, common_errNullObject    # yup, fail
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- array length
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a3, a2                     # vB <- length
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_check_cast.S b/runtime/interpreter/mterp/mips64/op_check_cast.S
new file mode 100644
index 0000000..472595d
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_check_cast.S
@@ -0,0 +1,17 @@
+    /*
+     * Check to see if a cast from one class to another is allowed.
+     */
+    /* check-cast vAA, class//BBBB */
+    .extern MterpCheckCast
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- BBBB
+    srl     a1, rINST, 8                # a1 <- AA
+    dlsa    a1, a1, rFP, 2              # a1 <- &object
+    ld      a2, OFF_FP_METHOD(rFP)      # a2 <- method
+    move    a3, rSELF                   # a3 <- self
+    jal     MterpCheckCast              # (index, &obj, method, self)
+    PREFETCH_INST 2
+    bnez    v0, MterpPossibleException
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_cmp_long.S b/runtime/interpreter/mterp/mips64/op_cmp_long.S
new file mode 100644
index 0000000..6e9376c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_cmp_long.S
@@ -0,0 +1,13 @@
+    /* cmp-long vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_WIDE a0, a2                # a0 <- vBB
+    GET_VREG_WIDE a1, a3                # a1 <- vCC
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    slt     a2, a0, a1
+    slt     a0, a1, a0
+    subu    a0, a0, a2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                     # vAA <- result
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_cmpg_double.S b/runtime/interpreter/mterp/mips64/op_cmpg_double.S
new file mode 100644
index 0000000..a8e2ef9
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_cmpg_double.S
@@ -0,0 +1 @@
+%include "mips64/fcmpWide.S" {"gt_bias":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_cmpg_float.S b/runtime/interpreter/mterp/mips64/op_cmpg_float.S
new file mode 100644
index 0000000..0c93eac
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_cmpg_float.S
@@ -0,0 +1 @@
+%include "mips64/fcmp.S" {"gt_bias":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_cmpl_double.S b/runtime/interpreter/mterp/mips64/op_cmpl_double.S
new file mode 100644
index 0000000..9111b06
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_cmpl_double.S
@@ -0,0 +1 @@
+%include "mips64/fcmpWide.S" {"gt_bias":"0"}
diff --git a/runtime/interpreter/mterp/mips64/op_cmpl_float.S b/runtime/interpreter/mterp/mips64/op_cmpl_float.S
new file mode 100644
index 0000000..b047451
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_cmpl_float.S
@@ -0,0 +1 @@
+%include "mips64/fcmp.S" {"gt_bias":"0"}
diff --git a/runtime/interpreter/mterp/mips64/op_const.S b/runtime/interpreter/mterp/mips64/op_const.S
new file mode 100644
index 0000000..4b0d69b
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_const.S
@@ -0,0 +1,9 @@
+    /* const vAA, #+BBBBbbbb */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      a0, 2(rPC)                  # a0 <- bbbb (low)
+    lh      a1, 4(rPC)                  # a1 <- BBBB (high)
+    FETCH_ADVANCE_INST 3                # advance rPC, load rINST
+    ins     a0, a1, 16, 16              # a0 = BBBBbbbb
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                     # vAA <- +BBBBbbbb
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_const_16.S b/runtime/interpreter/mterp/mips64/op_const_16.S
new file mode 100644
index 0000000..51e68a7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_const_16.S
@@ -0,0 +1,7 @@
+    /* const/16 vAA, #+BBBB */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      a0, 2(rPC)                  # a0 <- sign-extended BBBB
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                     # vAA <- +BBBB
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_const_4.S b/runtime/interpreter/mterp/mips64/op_const_4.S
new file mode 100644
index 0000000..0a58bff
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_const_4.S
@@ -0,0 +1,8 @@
+    /* const/4 vA, #+B */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    seh     a0, rINST                   # sign extend B in rINST
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    sra     a0, a0, 12                  # shift B into its final position
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                     # vA <- +B
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_const_class.S b/runtime/interpreter/mterp/mips64/op_const_class.S
new file mode 100644
index 0000000..adf79df
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_const_class.S
@@ -0,0 +1,13 @@
+    /* const/class vAA, Class//BBBB */
+    .extern MterpConstClass
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- BBBB
+    srl     a1, rINST, 8                # a1 <- AA
+    daddu   a2, rFP, OFF_FP_SHADOWFRAME
+    move    a3, rSELF
+    jal     MterpConstClass             # (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2                     # load rINST
+    bnez    v0, MterpPossibleException  # let reference interpreter deal with it.
+    ADVANCE 2                           # advance rPC
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_const_high16.S b/runtime/interpreter/mterp/mips64/op_const_high16.S
new file mode 100644
index 0000000..43effb6
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_const_high16.S
@@ -0,0 +1,8 @@
+    /* const/high16 vAA, #+BBBB0000 */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      a0, 2(rPC)                  # a0 <- BBBB
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    sll     a0, a0, 16                  # a0 <- BBBB0000
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                     # vAA <- +BBBB0000
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_const_string.S b/runtime/interpreter/mterp/mips64/op_const_string.S
new file mode 100644
index 0000000..4684c11
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_const_string.S
@@ -0,0 +1,13 @@
+    /* const/string vAA, String//BBBB */
+    .extern MterpConstString
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- BBBB
+    srl     a1, rINST, 8                # a1 <- AA
+    daddu   a2, rFP, OFF_FP_SHADOWFRAME
+    move    a3, rSELF
+    jal     MterpConstString            # (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2                     # load rINST
+    bnez    v0, MterpPossibleException  # let reference interpreter deal with it.
+    ADVANCE 2                           # advance rPC
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_const_string_jumbo.S b/runtime/interpreter/mterp/mips64/op_const_string_jumbo.S
new file mode 100644
index 0000000..47f2101
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_const_string_jumbo.S
@@ -0,0 +1,15 @@
+    /* const/string vAA, String//BBBBBBBB */
+    .extern MterpConstString
+    EXPORT_PC
+    lh      a0, 2(rPC)                  # a0 <- bbbb (low)
+    lh      a4, 4(rPC)                  # a4 <- BBBB (high)
+    srl     a1, rINST, 8                # a1 <- AA
+    ins     a0, a4, 16, 16              # a0 <- BBBBbbbb
+    daddu   a2, rFP, OFF_FP_SHADOWFRAME
+    move    a3, rSELF
+    jal     MterpConstString            # (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 3                     # load rINST
+    bnez    v0, MterpPossibleException  # let reference interpreter deal with it.
+    ADVANCE 3                           # advance rPC
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_const_wide.S b/runtime/interpreter/mterp/mips64/op_const_wide.S
new file mode 100644
index 0000000..f7eaf7c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_const_wide.S
@@ -0,0 +1,13 @@
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    srl     a4, rINST, 8                # a4 <- AA
+    lh      a0, 2(rPC)                  # a0 <- bbbb (low)
+    lh      a1, 4(rPC)                  # a1 <- BBBB (low middle)
+    lh      a2, 6(rPC)                  # a2 <- hhhh (high middle)
+    lh      a3, 8(rPC)                  # a3 <- HHHH (high)
+    FETCH_ADVANCE_INST 5                # advance rPC, load rINST
+    ins     a0, a1, 16, 16              # a0 = BBBBbbbb
+    ins     a2, a3, 16, 16              # a2 = HHHHhhhh
+    dinsu   a0, a2, 32, 32              # a0 = HHHHhhhhBBBBbbbb
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a4                # vAA <- +HHHHhhhhBBBBbbbb
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_const_wide_16.S b/runtime/interpreter/mterp/mips64/op_const_wide_16.S
new file mode 100644
index 0000000..3a70937
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_const_wide_16.S
@@ -0,0 +1,7 @@
+    /* const-wide/16 vAA, #+BBBB */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      a0, 2(rPC)                  # a0 <- sign-extended BBBB
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vAA <- +BBBB
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_const_wide_32.S b/runtime/interpreter/mterp/mips64/op_const_wide_32.S
new file mode 100644
index 0000000..867197c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_const_wide_32.S
@@ -0,0 +1,9 @@
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      a0, 2(rPC)                  # a0 <- bbbb (low)
+    lh      a1, 4(rPC)                  # a1 <- BBBB (high)
+    FETCH_ADVANCE_INST 3                # advance rPC, load rINST
+    ins     a0, a1, 16, 16              # a0 = BBBBbbbb
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vAA <- +BBBBbbbb
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_const_wide_high16.S b/runtime/interpreter/mterp/mips64/op_const_wide_high16.S
new file mode 100644
index 0000000..d741631
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_const_wide_high16.S
@@ -0,0 +1,8 @@
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      a0, 2(rPC)                  # a0 <- BBBB
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    dsll32  a0, a0, 16                  # a0 <- BBBB000000000000
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vAA <- +BBBB000000000000
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_div_double.S b/runtime/interpreter/mterp/mips64/op_div_double.S
new file mode 100644
index 0000000..44998f0
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_div_double.S
@@ -0,0 +1 @@
+%include "mips64/fbinopWide.S" {"instr":"div.d f0, f0, f1"}
diff --git a/runtime/interpreter/mterp/mips64/op_div_double_2addr.S b/runtime/interpreter/mterp/mips64/op_div_double_2addr.S
new file mode 100644
index 0000000..396af79
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_div_double_2addr.S
@@ -0,0 +1 @@
+%include "mips64/fbinopWide2addr.S" {"instr":"div.d f0, f0, f1"}
diff --git a/runtime/interpreter/mterp/mips64/op_div_float.S b/runtime/interpreter/mterp/mips64/op_div_float.S
new file mode 100644
index 0000000..7b09d52
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_div_float.S
@@ -0,0 +1 @@
+%include "mips64/fbinop.S" {"instr":"div.s f0, f0, f1"}
diff --git a/runtime/interpreter/mterp/mips64/op_div_float_2addr.S b/runtime/interpreter/mterp/mips64/op_div_float_2addr.S
new file mode 100644
index 0000000..e74fdda
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_div_float_2addr.S
@@ -0,0 +1 @@
+%include "mips64/fbinop2addr.S" {"instr":"div.s f0, f0, f1"}
diff --git a/runtime/interpreter/mterp/mips64/op_div_int.S b/runtime/interpreter/mterp/mips64/op_div_int.S
new file mode 100644
index 0000000..fb04acb
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_div_int.S
@@ -0,0 +1 @@
+%include "mips64/binop.S" {"instr":"div a0, a0, a1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_div_int_2addr.S b/runtime/interpreter/mterp/mips64/op_div_int_2addr.S
new file mode 100644
index 0000000..db29b84
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_div_int_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binop2addr.S" {"instr":"div a0, a0, a1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_div_int_lit16.S b/runtime/interpreter/mterp/mips64/op_div_int_lit16.S
new file mode 100644
index 0000000..e903dde
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_div_int_lit16.S
@@ -0,0 +1 @@
+%include "mips64/binopLit16.S" {"instr":"div a0, a0, a1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_div_int_lit8.S b/runtime/interpreter/mterp/mips64/op_div_int_lit8.S
new file mode 100644
index 0000000..0559605
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_div_int_lit8.S
@@ -0,0 +1 @@
+%include "mips64/binopLit8.S" {"instr":"div a0, a0, a1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_div_long.S b/runtime/interpreter/mterp/mips64/op_div_long.S
new file mode 100644
index 0000000..01fc2b2
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_div_long.S
@@ -0,0 +1 @@
+%include "mips64/binopWide.S" {"instr":"ddiv a0, a0, a1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_div_long_2addr.S b/runtime/interpreter/mterp/mips64/op_div_long_2addr.S
new file mode 100644
index 0000000..9627ab8
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_div_long_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binopWide2addr.S" {"instr":"ddiv a0, a0, a1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_double_to_float.S b/runtime/interpreter/mterp/mips64/op_double_to_float.S
new file mode 100644
index 0000000..2b2acee
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_double_to_float.S
@@ -0,0 +1,8 @@
+    /*
+     * Conversion from or to floating-point happens in a floating-point register.
+     * Therefore we load the input and store the output into or from a
+     * floating-point register irrespective of the type.
+     */
+%include "mips64/fcvtHeader.S" { "suffix":"_DOUBLE", "valreg":"f0" }
+    cvt.s.d f0, f0
+%include "mips64/fcvtFooter.S" { "suffix":"_FLOAT", "valreg":"f0" }
diff --git a/runtime/interpreter/mterp/mips64/op_double_to_int.S b/runtime/interpreter/mterp/mips64/op_double_to_int.S
new file mode 100644
index 0000000..aa2cbca
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_double_to_int.S
@@ -0,0 +1,23 @@
+%include "mips64/fcvtHeader.S" { "suffix":"_DOUBLE", "valreg":"f0" }
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    dli     t0, INT_MIN_AS_DOUBLE
+    dmtc1   t0, f1
+    cmp.le.d f1, f1, f0
+    bc1nez  f1, .L${opcode}_trunc
+    cmp.eq.d f1, f0, f0
+    li      t0, INT_MIN
+    mfc1    t1, f1
+    and     t0, t0, t1
+    b       .L${opcode}_done
+%break
+.L${opcode}_trunc:
+    trunc.w.d f0, f0
+    mfc1    t0, f0
+.L${opcode}_done:
+    /* Can't include fcvtFooter.S after break */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG t0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_double_to_long.S b/runtime/interpreter/mterp/mips64/op_double_to_long.S
new file mode 100644
index 0000000..777cfeb
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_double_to_long.S
@@ -0,0 +1,23 @@
+%include "mips64/fcvtHeader.S" { "suffix":"_DOUBLE", "valreg":"f0" }
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    dli     t0, LONG_MIN_AS_DOUBLE
+    dmtc1   t0, f1
+    cmp.le.d f1, f1, f0
+    bc1nez  f1, .L${opcode}_trunc
+    cmp.eq.d f1, f0, f0
+    dli     t0, LONG_MIN
+    mfc1    t1, f1
+    and     t0, t0, t1
+    b       .L${opcode}_done
+%break
+.L${opcode}_trunc:
+    trunc.l.d f0, f0
+    dmfc1   t0, f0
+.L${opcode}_done:
+    /* Can't include fcvtFooter.S after break */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE t0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_fill_array_data.S b/runtime/interpreter/mterp/mips64/op_fill_array_data.S
new file mode 100644
index 0000000..c90f0b9
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_fill_array_data.S
@@ -0,0 +1,14 @@
+    /* fill-array-data vAA, +BBBBBBBB */
+    .extern MterpFillArrayData
+    EXPORT_PC
+    lh      a1, 2(rPC)                  # a1 <- bbbb (lo)
+    lh      a0, 4(rPC)                  # a0 <- BBBB (hi)
+    srl     a3, rINST, 8                # a3 <- AA
+    ins     a1, a0, 16, 16              # a1 <- BBBBbbbb
+    GET_VREG_U a0, a3                   # a0 <- vAA (array object)
+    dlsa    a1, a1, rPC, 1              # a1 <- PC + BBBBbbbb*2 (array data off.)
+    jal     MterpFillArrayData          # (obj, payload)
+    beqzc   v0, MterpPossibleException  # exception?
+    FETCH_ADVANCE_INST 3                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_filled_new_array.S b/runtime/interpreter/mterp/mips64/op_filled_new_array.S
new file mode 100644
index 0000000..35f55c2
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_filled_new_array.S
@@ -0,0 +1,18 @@
+%default { "helper":"MterpFilledNewArray" }
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class//CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type//BBBB */
+    .extern $helper
+    EXPORT_PC
+    daddu   a0, rFP, OFF_FP_SHADOWFRAME
+    move    a1, rPC
+    move    a2, rSELF
+    jal     $helper
+    beqzc   v0, MterpPossibleException
+    FETCH_ADVANCE_INST 3                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_filled_new_array_range.S b/runtime/interpreter/mterp/mips64/op_filled_new_array_range.S
new file mode 100644
index 0000000..a4e18f6
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_filled_new_array_range.S
@@ -0,0 +1 @@
+%include "mips64/op_filled_new_array.S" { "helper":"MterpFilledNewArrayRange" }
diff --git a/runtime/interpreter/mterp/mips64/op_float_to_double.S b/runtime/interpreter/mterp/mips64/op_float_to_double.S
new file mode 100644
index 0000000..6accfee
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_float_to_double.S
@@ -0,0 +1,8 @@
+    /*
+     * Conversion from or to floating-point happens in a floating-point register.
+     * Therefore we load the input and store the output into or from a
+     * floating-point register irrespective of the type.
+     */
+%include "mips64/fcvtHeader.S" { "suffix":"_FLOAT", "valreg":"f0" }
+    cvt.d.s f0, f0
+%include "mips64/fcvtFooter.S" { "suffix":"_DOUBLE", "valreg":"f0" }
diff --git a/runtime/interpreter/mterp/mips64/op_float_to_int.S b/runtime/interpreter/mterp/mips64/op_float_to_int.S
new file mode 100644
index 0000000..d957540
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_float_to_int.S
@@ -0,0 +1,23 @@
+%include "mips64/fcvtHeader.S" { "suffix":"_FLOAT", "valreg":"f0" }
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    li      t0, INT_MIN_AS_FLOAT
+    mtc1    t0, f1
+    cmp.le.s f1, f1, f0
+    bc1nez  f1, .L${opcode}_trunc
+    cmp.eq.s f1, f0, f0
+    li      t0, INT_MIN
+    mfc1    t1, f1
+    and     t0, t0, t1
+    b       .L${opcode}_done
+%break
+.L${opcode}_trunc:
+    trunc.w.s f0, f0
+    mfc1    t0, f0
+.L${opcode}_done:
+    /* Can't include fcvtFooter.S after break */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG t0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_float_to_long.S b/runtime/interpreter/mterp/mips64/op_float_to_long.S
new file mode 100644
index 0000000..5d036c8
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_float_to_long.S
@@ -0,0 +1,23 @@
+%include "mips64/fcvtHeader.S" { "suffix":"_FLOAT", "valreg":"f0" }
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    li      t0, LONG_MIN_AS_FLOAT
+    mtc1    t0, f1
+    cmp.le.s f1, f1, f0
+    bc1nez  f1, .L${opcode}_trunc
+    cmp.eq.s f1, f0, f0
+    dli     t0, LONG_MIN
+    mfc1    t1, f1
+    and     t0, t0, t1
+    b       .L${opcode}_done
+%break
+.L${opcode}_trunc:
+    trunc.l.s f0, f0
+    dmfc1   t0, f0
+.L${opcode}_done:
+    /* Can't include fcvtFooter.S after break */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE t0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_goto.S b/runtime/interpreter/mterp/mips64/op_goto.S
new file mode 100644
index 0000000..68fc83d
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_goto.S
@@ -0,0 +1,10 @@
+    /*
+     * Unconditional branch, 8-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto +AA */
+    srl     rINST, rINST, 8
+    seb     rINST, rINST                # rINST <- offset (sign-extended AA)
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips64/op_goto_16.S b/runtime/interpreter/mterp/mips64/op_goto_16.S
new file mode 100644
index 0000000..ae56066
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_goto_16.S
@@ -0,0 +1,9 @@
+    /*
+     * Unconditional branch, 16-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto/16 +AAAA */
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended AAAA)
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips64/op_goto_32.S b/runtime/interpreter/mterp/mips64/op_goto_32.S
new file mode 100644
index 0000000..498b6d6
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_goto_32.S
@@ -0,0 +1,14 @@
+    /*
+     * Unconditional branch, 32-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     *
+     * Unlike most opcodes, this one is allowed to branch to itself, so
+     * our "backward branch" test must be "<=0" instead of "<0".
+     */
+    /* goto/32 +AAAAAAAA */
+    lh      rINST, 2(rPC)               # rINST <- aaaa (low)
+    lh      a1, 4(rPC)                  # a1 <- AAAA (high)
+    ins     rINST, a1, 16, 16           # rINST <- offset (sign-extended AAAAaaaa)
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips64/op_if_eq.S b/runtime/interpreter/mterp/mips64/op_if_eq.S
new file mode 100644
index 0000000..aa35cad
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_if_eq.S
@@ -0,0 +1 @@
+%include "mips64/bincmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/mips64/op_if_eqz.S b/runtime/interpreter/mterp/mips64/op_if_eqz.S
new file mode 100644
index 0000000..0fe3418
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_if_eqz.S
@@ -0,0 +1 @@
+%include "mips64/zcmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/mips64/op_if_ge.S b/runtime/interpreter/mterp/mips64/op_if_ge.S
new file mode 100644
index 0000000..59fdcc5
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_if_ge.S
@@ -0,0 +1 @@
+%include "mips64/bincmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/mips64/op_if_gez.S b/runtime/interpreter/mterp/mips64/op_if_gez.S
new file mode 100644
index 0000000..57f1f66
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_if_gez.S
@@ -0,0 +1 @@
+%include "mips64/zcmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/mips64/op_if_gt.S b/runtime/interpreter/mterp/mips64/op_if_gt.S
new file mode 100644
index 0000000..26cc119
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_if_gt.S
@@ -0,0 +1 @@
+%include "mips64/bincmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/mips64/op_if_gtz.S b/runtime/interpreter/mterp/mips64/op_if_gtz.S
new file mode 100644
index 0000000..69fcacb
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_if_gtz.S
@@ -0,0 +1 @@
+%include "mips64/zcmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/mips64/op_if_le.S b/runtime/interpreter/mterp/mips64/op_if_le.S
new file mode 100644
index 0000000..a7fce17
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_if_le.S
@@ -0,0 +1 @@
+%include "mips64/bincmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/mips64/op_if_lez.S b/runtime/interpreter/mterp/mips64/op_if_lez.S
new file mode 100644
index 0000000..f3edcc6
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_if_lez.S
@@ -0,0 +1 @@
+%include "mips64/zcmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/mips64/op_if_lt.S b/runtime/interpreter/mterp/mips64/op_if_lt.S
new file mode 100644
index 0000000..a975a31
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_if_lt.S
@@ -0,0 +1 @@
+%include "mips64/bincmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/mips64/op_if_ltz.S b/runtime/interpreter/mterp/mips64/op_if_ltz.S
new file mode 100644
index 0000000..c1d730d
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_if_ltz.S
@@ -0,0 +1 @@
+%include "mips64/zcmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/mips64/op_if_ne.S b/runtime/interpreter/mterp/mips64/op_if_ne.S
new file mode 100644
index 0000000..f143ee9
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_if_ne.S
@@ -0,0 +1 @@
+%include "mips64/bincmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/mips64/op_if_nez.S b/runtime/interpreter/mterp/mips64/op_if_nez.S
new file mode 100644
index 0000000..1856b96
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_if_nez.S
@@ -0,0 +1 @@
+%include "mips64/zcmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/mips64/op_iget.S b/runtime/interpreter/mterp/mips64/op_iget.S
new file mode 100644
index 0000000..ade4b31
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iget.S
@@ -0,0 +1,26 @@
+%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    .extern $helper
+    EXPORT_PC
+    lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
+    srl      a1, rINST, 12              # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
+    move     a3, rSELF                  # a3 <- self
+    jal      $helper
+    ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    ext      a2, rINST, 8, 4            # a2 <- A
+    PREFETCH_INST 2
+    bnez     a3, MterpPossibleException # bail out
+    .if $is_object
+    SET_VREG_OBJECT v0, a2              # fp[A] <- v0
+    .else
+    SET_VREG v0, a2                     # fp[A] <- v0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_iget_boolean.S b/runtime/interpreter/mterp/mips64/op_iget_boolean.S
new file mode 100644
index 0000000..cb2c8be
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iget_boolean.S
@@ -0,0 +1 @@
+%include "mips64/op_iget.S" { "helper":"artGetBooleanInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/mips64/op_iget_boolean_quick.S b/runtime/interpreter/mterp/mips64/op_iget_boolean_quick.S
new file mode 100644
index 0000000..979dc70
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iget_boolean_quick.S
@@ -0,0 +1 @@
+%include "mips64/op_iget_quick.S" { "load":"lbu" }
diff --git a/runtime/interpreter/mterp/mips64/op_iget_byte.S b/runtime/interpreter/mterp/mips64/op_iget_byte.S
new file mode 100644
index 0000000..099d8d0
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iget_byte.S
@@ -0,0 +1 @@
+%include "mips64/op_iget.S" { "helper":"artGetByteInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/mips64/op_iget_byte_quick.S b/runtime/interpreter/mterp/mips64/op_iget_byte_quick.S
new file mode 100644
index 0000000..cb35556
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iget_byte_quick.S
@@ -0,0 +1 @@
+%include "mips64/op_iget_quick.S" { "load":"lb" }
diff --git a/runtime/interpreter/mterp/mips64/op_iget_char.S b/runtime/interpreter/mterp/mips64/op_iget_char.S
new file mode 100644
index 0000000..927b7af
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iget_char.S
@@ -0,0 +1 @@
+%include "mips64/op_iget.S" { "helper":"artGetCharInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/mips64/op_iget_char_quick.S b/runtime/interpreter/mterp/mips64/op_iget_char_quick.S
new file mode 100644
index 0000000..6034567
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iget_char_quick.S
@@ -0,0 +1 @@
+%include "mips64/op_iget_quick.S" { "load":"lhu" }
diff --git a/runtime/interpreter/mterp/mips64/op_iget_object.S b/runtime/interpreter/mterp/mips64/op_iget_object.S
new file mode 100644
index 0000000..c658556
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iget_object.S
@@ -0,0 +1 @@
+%include "mips64/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/mips64/op_iget_object_quick.S b/runtime/interpreter/mterp/mips64/op_iget_object_quick.S
new file mode 100644
index 0000000..171d543
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iget_object_quick.S
@@ -0,0 +1,16 @@
+    /* For: iget-object-quick */
+    /* op vA, vB, offset//CCCC */
+    .extern artIGetObjectFromMterp
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a1, 2(rPC)                  # a1 <- field byte offset
+    EXPORT_PC
+    GET_VREG_U a0, a2                   # a0 <- object we're operating on
+    jal     artIGetObjectFromMterp      # (obj, offset)
+    ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    ext     a2, rINST, 8, 4             # a2 <- A
+    PREFETCH_INST 2
+    bnez    a3, MterpPossibleException  # bail out
+    SET_VREG_OBJECT v0, a2              # fp[A] <- v0
+    ADVANCE 2                           # advance rPC
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_iget_quick.S b/runtime/interpreter/mterp/mips64/op_iget_quick.S
new file mode 100644
index 0000000..fee6ab7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iget_quick.S
@@ -0,0 +1,14 @@
+%default { "load":"lw" }
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a1, 2(rPC)                  # a1 <- field byte offset
+    GET_VREG_U a3, a2                   # a3 <- object we're operating on
+    ext     a4, rINST, 8, 4             # a4 <- A
+    daddu   a1, a1, a3
+    beqz    a3, common_errNullObject    # object was null
+    $load   a0, 0(a1)                   # a0 <- obj.field
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    SET_VREG a0, a4                     # fp[A] <- a0
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_iget_short.S b/runtime/interpreter/mterp/mips64/op_iget_short.S
new file mode 100644
index 0000000..28b5093
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iget_short.S
@@ -0,0 +1 @@
+%include "mips64/op_iget.S" { "helper":"artGetShortInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/mips64/op_iget_short_quick.S b/runtime/interpreter/mterp/mips64/op_iget_short_quick.S
new file mode 100644
index 0000000..6e152db
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iget_short_quick.S
@@ -0,0 +1 @@
+%include "mips64/op_iget_quick.S" { "load":"lh" }
diff --git a/runtime/interpreter/mterp/mips64/op_iget_wide.S b/runtime/interpreter/mterp/mips64/op_iget_wide.S
new file mode 100644
index 0000000..85cf670
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iget_wide.S
@@ -0,0 +1,21 @@
+    /*
+     * 64-bit instance field get.
+     *
+     * for: iget-wide
+     */
+    .extern artGet64InstanceFromCode
+    EXPORT_PC
+    lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
+    srl      a1, rINST, 12              # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
+    move     a3, rSELF                  # a3 <- self
+    jal      artGet64InstanceFromCode
+    ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    ext      a2, rINST, 8, 4            # a2 <- A
+    PREFETCH_INST 2
+    bnez     a3, MterpPossibleException # bail out
+    SET_VREG_WIDE v0, a2                # fp[A] <- v0
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_iget_wide_quick.S b/runtime/interpreter/mterp/mips64/op_iget_wide_quick.S
new file mode 100644
index 0000000..2adc6ad
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iget_wide_quick.S
@@ -0,0 +1,14 @@
+    /* iget-wide-quick vA, vB, offset//CCCC */
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a4, 2(rPC)                  # a4 <- field byte offset
+    GET_VREG_U a3, a2                   # a3 <- object we're operating on
+    ext     a2, rINST, 8, 4             # a2 <- A
+    beqz    a3, common_errNullObject    # object was null
+    daddu   a4, a3, a4                  # create direct pointer
+    lw      a0, 0(a4)
+    lw      a1, 4(a4)
+    dinsu   a0, a1, 32, 32
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    SET_VREG_WIDE a0, a2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_instance_of.S b/runtime/interpreter/mterp/mips64/op_instance_of.S
new file mode 100644
index 0000000..39a5dc7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_instance_of.S
@@ -0,0 +1,23 @@
+    /*
+     * Check to see if an object reference is an instance of a class.
+     *
+     * Most common situation is a non-null object, being compared against
+     * an already-resolved class.
+     */
+    /* instance-of vA, vB, class//CCCC */
+    .extern MterpInstanceOf
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- CCCC
+    srl     a1, rINST, 12               # a1 <- B
+    dlsa    a1, a1, rFP, 2              # a1 <- &object
+    ld      a2, OFF_FP_METHOD(rFP)      # a2 <- method
+    move    a3, rSELF                   # a3 <- self
+    jal     MterpInstanceOf             # (index, &obj, method, self)
+    ld      a1, THREAD_EXCEPTION_OFFSET(rSELF)
+    ext     a2, rINST, 8, 4             # a2 <- A
+    PREFETCH_INST 2
+    bnez    a1, MterpException
+    ADVANCE 2                           # advance rPC
+    SET_VREG v0, a2                     # vA <- v0
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_int_to_byte.S b/runtime/interpreter/mterp/mips64/op_int_to_byte.S
new file mode 100644
index 0000000..1993e07
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_int_to_byte.S
@@ -0,0 +1 @@
+%include "mips64/unop.S" {"instr":"seb     a0, a0"}
diff --git a/runtime/interpreter/mterp/mips64/op_int_to_char.S b/runtime/interpreter/mterp/mips64/op_int_to_char.S
new file mode 100644
index 0000000..8f03acd
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_int_to_char.S
@@ -0,0 +1 @@
+%include "mips64/unop.S" {"instr":"and     a0, a0, 0xffff"}
diff --git a/runtime/interpreter/mterp/mips64/op_int_to_double.S b/runtime/interpreter/mterp/mips64/op_int_to_double.S
new file mode 100644
index 0000000..6df71be
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_int_to_double.S
@@ -0,0 +1,8 @@
+    /*
+     * Conversion from or to floating-point happens in a floating-point register.
+     * Therefore we load the input and store the output into or from a
+     * floating-point register irrespective of the type.
+     */
+%include "mips64/fcvtHeader.S" { "suffix":"_FLOAT", "valreg":"f0" }
+    cvt.d.w f0, f0
+%include "mips64/fcvtFooter.S" { "suffix":"_DOUBLE", "valreg":"f0" }
diff --git a/runtime/interpreter/mterp/mips64/op_int_to_float.S b/runtime/interpreter/mterp/mips64/op_int_to_float.S
new file mode 100644
index 0000000..77e9eba
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_int_to_float.S
@@ -0,0 +1,8 @@
+    /*
+     * Conversion from or to floating-point happens in a floating-point register.
+     * Therefore we load the input and store the output into or from a
+     * floating-point register irrespective of the type.
+     */
+%include "mips64/fcvtHeader.S" { "suffix":"_FLOAT", "valreg":"f0" }
+    cvt.s.w f0, f0
+%include "mips64/fcvtFooter.S" { "suffix":"_FLOAT", "valreg":"f0" }
diff --git a/runtime/interpreter/mterp/mips64/op_int_to_long.S b/runtime/interpreter/mterp/mips64/op_int_to_long.S
new file mode 100644
index 0000000..7b9ad86
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_int_to_long.S
@@ -0,0 +1,8 @@
+    /* int-to-long vA, vB */
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB (sign-extended to 64 bits)
+    ext     a2, rINST, 8, 4             # a2 <- A
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vA <- vB
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_int_to_short.S b/runtime/interpreter/mterp/mips64/op_int_to_short.S
new file mode 100644
index 0000000..4a3f234
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_int_to_short.S
@@ -0,0 +1 @@
+%include "mips64/unop.S" {"instr":"seh     a0, a0"}
diff --git a/runtime/interpreter/mterp/mips64/op_invoke_direct.S b/runtime/interpreter/mterp/mips64/op_invoke_direct.S
new file mode 100644
index 0000000..5047118
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_invoke_direct.S
@@ -0,0 +1 @@
+%include "mips64/invoke.S" { "helper":"MterpInvokeDirect" }
diff --git a/runtime/interpreter/mterp/mips64/op_invoke_direct_range.S b/runtime/interpreter/mterp/mips64/op_invoke_direct_range.S
new file mode 100644
index 0000000..5c9b95f
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_invoke_direct_range.S
@@ -0,0 +1 @@
+%include "mips64/invoke.S" { "helper":"MterpInvokeDirectRange" }
diff --git a/runtime/interpreter/mterp/mips64/op_invoke_interface.S b/runtime/interpreter/mterp/mips64/op_invoke_interface.S
new file mode 100644
index 0000000..ed148ad
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_invoke_interface.S
@@ -0,0 +1,8 @@
+%include "mips64/invoke.S" { "helper":"MterpInvokeInterface" }
+    /*
+     * Handle an interface method call.
+     *
+     * for: invoke-interface, invoke-interface/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/mips64/op_invoke_interface_range.S b/runtime/interpreter/mterp/mips64/op_invoke_interface_range.S
new file mode 100644
index 0000000..91c231e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_invoke_interface_range.S
@@ -0,0 +1 @@
+%include "mips64/invoke.S" { "helper":"MterpInvokeInterfaceRange" }
diff --git a/runtime/interpreter/mterp/mips64/op_invoke_static.S b/runtime/interpreter/mterp/mips64/op_invoke_static.S
new file mode 100644
index 0000000..44f5cb7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_invoke_static.S
@@ -0,0 +1 @@
+%include "mips64/invoke.S" { "helper":"MterpInvokeStatic" }
diff --git a/runtime/interpreter/mterp/mips64/op_invoke_static_range.S b/runtime/interpreter/mterp/mips64/op_invoke_static_range.S
new file mode 100644
index 0000000..289e5aa
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_invoke_static_range.S
@@ -0,0 +1 @@
+%include "mips64/invoke.S" { "helper":"MterpInvokeStaticRange" }
diff --git a/runtime/interpreter/mterp/mips64/op_invoke_super.S b/runtime/interpreter/mterp/mips64/op_invoke_super.S
new file mode 100644
index 0000000..b13fffe
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_invoke_super.S
@@ -0,0 +1,8 @@
+%include "mips64/invoke.S" { "helper":"MterpInvokeSuper" }
+    /*
+     * Handle a "super" method call.
+     *
+     * for: invoke-super, invoke-super/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/mips64/op_invoke_super_range.S b/runtime/interpreter/mterp/mips64/op_invoke_super_range.S
new file mode 100644
index 0000000..350b975
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_invoke_super_range.S
@@ -0,0 +1 @@
+%include "mips64/invoke.S" { "helper":"MterpInvokeSuperRange" }
diff --git a/runtime/interpreter/mterp/mips64/op_invoke_virtual.S b/runtime/interpreter/mterp/mips64/op_invoke_virtual.S
new file mode 100644
index 0000000..0d26cda
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_invoke_virtual.S
@@ -0,0 +1,8 @@
+%include "mips64/invoke.S" { "helper":"MterpInvokeVirtual" }
+    /*
+     * Handle a virtual method call.
+     *
+     * for: invoke-virtual, invoke-virtual/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/mips64/op_invoke_virtual_quick.S b/runtime/interpreter/mterp/mips64/op_invoke_virtual_quick.S
new file mode 100644
index 0000000..f39562c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_invoke_virtual_quick.S
@@ -0,0 +1 @@
+%include "mips64/invoke.S" { "helper":"MterpInvokeVirtualQuick" }
diff --git a/runtime/interpreter/mterp/mips64/op_invoke_virtual_range.S b/runtime/interpreter/mterp/mips64/op_invoke_virtual_range.S
new file mode 100644
index 0000000..0bb43f8
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_invoke_virtual_range.S
@@ -0,0 +1 @@
+%include "mips64/invoke.S" { "helper":"MterpInvokeVirtualRange" }
diff --git a/runtime/interpreter/mterp/mips64/op_invoke_virtual_range_quick.S b/runtime/interpreter/mterp/mips64/op_invoke_virtual_range_quick.S
new file mode 100644
index 0000000..c448851
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_invoke_virtual_range_quick.S
@@ -0,0 +1 @@
+%include "mips64/invoke.S" { "helper":"MterpInvokeVirtualQuickRange" }
diff --git a/runtime/interpreter/mterp/mips64/op_iput.S b/runtime/interpreter/mterp/mips64/op_iput.S
new file mode 100644
index 0000000..a906a0f
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iput.S
@@ -0,0 +1,21 @@
+%default { "helper":"artSet32InstanceFromMterp" }
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern $helper
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref CCCC
+    srl     a1, rINST, 12               # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ext     a2, rINST, 8, 4             # a2 <- A
+    GET_VREG a2, a2                     # a2 <- fp[A]
+    ld      a3, OFF_FP_METHOD(rFP)      # a3 <- referrer
+    PREFETCH_INST 2
+    jal     $helper
+    bnez    v0, MterpPossibleException  # bail out
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_iput_boolean.S b/runtime/interpreter/mterp/mips64/op_iput_boolean.S
new file mode 100644
index 0000000..3034fa5
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iput_boolean.S
@@ -0,0 +1 @@
+%include "mips64/op_iput.S" { "helper":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips64/op_iput_boolean_quick.S b/runtime/interpreter/mterp/mips64/op_iput_boolean_quick.S
new file mode 100644
index 0000000..df99948
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iput_boolean_quick.S
@@ -0,0 +1 @@
+%include "mips64/op_iput_quick.S" { "store":"sb" }
diff --git a/runtime/interpreter/mterp/mips64/op_iput_byte.S b/runtime/interpreter/mterp/mips64/op_iput_byte.S
new file mode 100644
index 0000000..3034fa5
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iput_byte.S
@@ -0,0 +1 @@
+%include "mips64/op_iput.S" { "helper":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips64/op_iput_byte_quick.S b/runtime/interpreter/mterp/mips64/op_iput_byte_quick.S
new file mode 100644
index 0000000..df99948
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iput_byte_quick.S
@@ -0,0 +1 @@
+%include "mips64/op_iput_quick.S" { "store":"sb" }
diff --git a/runtime/interpreter/mterp/mips64/op_iput_char.S b/runtime/interpreter/mterp/mips64/op_iput_char.S
new file mode 100644
index 0000000..4c2fa28
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iput_char.S
@@ -0,0 +1 @@
+%include "mips64/op_iput.S" { "helper":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips64/op_iput_char_quick.S b/runtime/interpreter/mterp/mips64/op_iput_char_quick.S
new file mode 100644
index 0000000..a6286b7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iput_char_quick.S
@@ -0,0 +1 @@
+%include "mips64/op_iput_quick.S" { "store":"sh" }
diff --git a/runtime/interpreter/mterp/mips64/op_iput_object.S b/runtime/interpreter/mterp/mips64/op_iput_object.S
new file mode 100644
index 0000000..9a42f54
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iput_object.S
@@ -0,0 +1,11 @@
+    .extern MterpIputObject
+    EXPORT_PC
+    daddu   a0, rFP, OFF_FP_SHADOWFRAME
+    move    a1, rPC
+    move    a2, rINST
+    move    a3, rSELF
+    jal     MterpIputObject
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_iput_object_quick.S b/runtime/interpreter/mterp/mips64/op_iput_object_quick.S
new file mode 100644
index 0000000..658ef42
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iput_object_quick.S
@@ -0,0 +1,10 @@
+    .extern MterpIputObjectQuick
+    EXPORT_PC
+    daddu   a0, rFP, OFF_FP_SHADOWFRAME
+    move    a1, rPC
+    move    a2, rINST
+    jal     MterpIputObjectQuick
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_iput_quick.S b/runtime/interpreter/mterp/mips64/op_iput_quick.S
new file mode 100644
index 0000000..b95adfc
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iput_quick.S
@@ -0,0 +1,14 @@
+%default { "store":"sw" }
+    /* For: iput-quick, iput-boolean-quick, iput-byte-quick, iput-char-quick, iput-short-quick */
+    /* op vA, vB, offset//CCCC */
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a1, 2(rPC)                  # a1 <- field byte offset
+    GET_VREG_U a3, a2                   # a3 <- fp[B], the object pointer
+    ext     a2, rINST, 8, 4             # a2 <- A
+    beqz    a3, common_errNullObject    # object was null
+    GET_VREG a0, a2                     # a0 <- fp[A]
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    daddu   a1, a1, a3
+    $store  a0, 0(a1)                   # obj.field <- a0
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_iput_short.S b/runtime/interpreter/mterp/mips64/op_iput_short.S
new file mode 100644
index 0000000..4c2fa28
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iput_short.S
@@ -0,0 +1 @@
+%include "mips64/op_iput.S" { "helper":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips64/op_iput_short_quick.S b/runtime/interpreter/mterp/mips64/op_iput_short_quick.S
new file mode 100644
index 0000000..a6286b7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iput_short_quick.S
@@ -0,0 +1 @@
+%include "mips64/op_iput_quick.S" { "store":"sh" }
diff --git a/runtime/interpreter/mterp/mips64/op_iput_wide.S b/runtime/interpreter/mterp/mips64/op_iput_wide.S
new file mode 100644
index 0000000..9b790f8
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iput_wide.S
@@ -0,0 +1,15 @@
+    /* iput-wide vA, vB, field//CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
+    srl      a1, rINST, 12              # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ext      a2, rINST, 8, 4            # a2 <- A
+    dlsa     a2, a2, rFP, 2             # a2 <- &fp[A]
+    ld       a3, OFF_FP_METHOD(rFP)     # a3 <- referrer
+    PREFETCH_INST 2
+    jal      artSet64InstanceFromMterp
+    bnez     v0, MterpPossibleException # bail out
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_iput_wide_quick.S b/runtime/interpreter/mterp/mips64/op_iput_wide_quick.S
new file mode 100644
index 0000000..95a8ad8
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_iput_wide_quick.S
@@ -0,0 +1,14 @@
+    /* iput-wide-quick vA, vB, offset//CCCC */
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a3, 2(rPC)                  # a3 <- field byte offset
+    GET_VREG_U a2, a2                   # a2 <- fp[B], the object pointer
+    ext     a0, rINST, 8, 4             # a0 <- A
+    beqz    a2, common_errNullObject    # object was null
+    GET_VREG_WIDE a0, a0                # a0 <- fp[A]
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    daddu   a1, a2, a3                  # create a direct pointer
+    sw      a0, 0(a1)
+    dsrl32  a0, a0, 0
+    sw      a0, 4(a1)
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_long_to_double.S b/runtime/interpreter/mterp/mips64/op_long_to_double.S
new file mode 100644
index 0000000..8503e76
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_long_to_double.S
@@ -0,0 +1,8 @@
+    /*
+     * Conversion from or to floating-point happens in a floating-point register.
+     * Therefore we load the input and store the output into or from a
+     * floating-point register irrespective of the type.
+     */
+%include "mips64/fcvtHeader.S" { "suffix":"_DOUBLE", "valreg":"f0" }
+    cvt.d.l f0, f0
+%include "mips64/fcvtFooter.S" { "suffix":"_DOUBLE", "valreg":"f0" }
diff --git a/runtime/interpreter/mterp/mips64/op_long_to_float.S b/runtime/interpreter/mterp/mips64/op_long_to_float.S
new file mode 100644
index 0000000..31f5c0e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_long_to_float.S
@@ -0,0 +1,8 @@
+    /*
+     * Conversion from or to floating-point happens in a floating-point register.
+     * Therefore we load the input and store the output into or from a
+     * floating-point register irrespective of the type.
+     */
+%include "mips64/fcvtHeader.S" { "suffix":"_DOUBLE", "valreg":"f0" }
+    cvt.s.l f0, f0
+%include "mips64/fcvtFooter.S" { "suffix":"_FLOAT", "valreg":"f0" }
diff --git a/runtime/interpreter/mterp/mips64/op_long_to_int.S b/runtime/interpreter/mterp/mips64/op_long_to_int.S
new file mode 100644
index 0000000..4ef4b51
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_long_to_int.S
@@ -0,0 +1,2 @@
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+%include "mips64/op_move.S"
diff --git a/runtime/interpreter/mterp/mips64/op_monitor_enter.S b/runtime/interpreter/mterp/mips64/op_monitor_enter.S
new file mode 100644
index 0000000..36ae503
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_monitor_enter.S
@@ -0,0 +1,14 @@
+    /*
+     * Synchronize on an object.
+     */
+    /* monitor-enter vAA */
+    .extern artLockObjectFromCode
+    EXPORT_PC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vAA (object)
+    move    a1, rSELF                   # a1 <- self
+    jal     artLockObjectFromCode
+    bnezc   v0, MterpException
+    FETCH_ADVANCE_INST 1
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_monitor_exit.S b/runtime/interpreter/mterp/mips64/op_monitor_exit.S
new file mode 100644
index 0000000..9945952
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_monitor_exit.S
@@ -0,0 +1,18 @@
+    /*
+     * Unlock an object.
+     *
+     * Exceptions that occur when unlocking a monitor need to appear as
+     * if they happened at the following instruction.  See the Dalvik
+     * instruction spec.
+     */
+    /* monitor-exit vAA */
+    .extern artUnlockObjectFromCode
+    EXPORT_PC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vAA (object)
+    move    a1, rSELF                   # a1 <- self
+    jal     artUnlockObjectFromCode     # v0 <- success for unlock(self, obj)
+    bnezc   v0, MterpException
+    FETCH_ADVANCE_INST 1                # before throw: advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_move.S b/runtime/interpreter/mterp/mips64/op_move.S
new file mode 100644
index 0000000..c79f6cd
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_move.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_VREG a0, a3                     # a0 <- vB
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT a0, a2              # vA <- vB
+    .else
+    SET_VREG a0, a2                     # vA <- vB
+    .endif
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_move_16.S b/runtime/interpreter/mterp/mips64/op_move_16.S
new file mode 100644
index 0000000..9d5c4dc
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_move_16.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    lhu     a3, 4(rPC)                  # a3 <- BBBB
+    lhu     a2, 2(rPC)                  # a2 <- AAAA
+    FETCH_ADVANCE_INST 3                # advance rPC, load rINST
+    GET_VREG a0, a3                     # a0 <- vBBBB
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT a0, a2              # vAAAA <- vBBBB
+    .else
+    SET_VREG a0, a2                     # vAAAA <- vBBBB
+    .endif
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_move_exception.S b/runtime/interpreter/mterp/mips64/op_move_exception.S
new file mode 100644
index 0000000..d226718
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_move_exception.S
@@ -0,0 +1,8 @@
+    /* move-exception vAA */
+    srl     a2, rINST, 8                # a2 <- AA
+    ld      a0, THREAD_EXCEPTION_OFFSET(rSELF)  # load exception obj
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    SET_VREG_OBJECT a0, a2              # vAA <- exception obj
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    sd      zero, THREAD_EXCEPTION_OFFSET(rSELF)  # clear exception
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_move_from16.S b/runtime/interpreter/mterp/mips64/op_move_from16.S
new file mode 100644
index 0000000..6d6bde0
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_move_from16.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    lhu     a3, 2(rPC)                  # a3 <- BBBB
+    srl     a2, rINST, 8                # a2 <- AA
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_VREG a0, a3                     # a0 <- vBBBB
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT a0, a2              # vAA <- vBBBB
+    .else
+    SET_VREG a0, a2                     # vAA <- vBBBB
+    .endif
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_move_object.S b/runtime/interpreter/mterp/mips64/op_move_object.S
new file mode 100644
index 0000000..47e0272
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_move_object.S
@@ -0,0 +1 @@
+%include "mips64/op_move.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_move_object_16.S b/runtime/interpreter/mterp/mips64/op_move_object_16.S
new file mode 100644
index 0000000..a777dcd
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_move_object_16.S
@@ -0,0 +1 @@
+%include "mips64/op_move_16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_move_object_from16.S b/runtime/interpreter/mterp/mips64/op_move_object_from16.S
new file mode 100644
index 0000000..ab55ebd
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_move_object_from16.S
@@ -0,0 +1 @@
+%include "mips64/op_move_from16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_move_result.S b/runtime/interpreter/mterp/mips64/op_move_result.S
new file mode 100644
index 0000000..1ec28cb
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_move_result.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    srl     a2, rINST, 8                # a2 <- AA
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    ld      a0, OFF_FP_RESULT_REGISTER(rFP)  # get pointer to result JType
+    lw      a0, 0(a0)                   # a0 <- result.i
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT a0, a2              # vAA <- result
+    .else
+    SET_VREG a0, a2                     # vAA <- result
+    .endif
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_move_result_object.S b/runtime/interpreter/mterp/mips64/op_move_result_object.S
new file mode 100644
index 0000000..e76bc22
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_move_result_object.S
@@ -0,0 +1 @@
+%include "mips64/op_move_result.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_move_result_wide.S b/runtime/interpreter/mterp/mips64/op_move_result_wide.S
new file mode 100644
index 0000000..3ba0d72
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_move_result_wide.S
@@ -0,0 +1,9 @@
+    /* for: move-result-wide */
+    /* op vAA */
+    srl     a2, rINST, 8                # a2 <- AA
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    ld      a0, OFF_FP_RESULT_REGISTER(rFP)  # get pointer to result JType
+    ld      a0, 0(a0)                   # a0 <- result.j
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vAA <- result
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_move_wide.S b/runtime/interpreter/mterp/mips64/op_move_wide.S
new file mode 100644
index 0000000..ea23f87
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_move_wide.S
@@ -0,0 +1,9 @@
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    ext     a3, rINST, 12, 4            # a3 <- B
+    ext     a2, rINST, 8, 4             # a2 <- A
+    GET_VREG_WIDE a0, a3                # a0 <- vB
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vA <- vB
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_move_wide_16.S b/runtime/interpreter/mterp/mips64/op_move_wide_16.S
new file mode 100644
index 0000000..8ec6068
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_move_wide_16.S
@@ -0,0 +1,9 @@
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    lhu     a3, 4(rPC)                  # a3 <- BBBB
+    lhu     a2, 2(rPC)                  # a2 <- AAAA
+    GET_VREG_WIDE a0, a3                # a0 <- vBBBB
+    FETCH_ADVANCE_INST 3                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vAAAA <- vBBBB
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_move_wide_from16.S b/runtime/interpreter/mterp/mips64/op_move_wide_from16.S
new file mode 100644
index 0000000..11d5603
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_move_wide_from16.S
@@ -0,0 +1,9 @@
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    lhu     a3, 2(rPC)                  # a3 <- BBBB
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG_WIDE a0, a3                # a0 <- vBBBB
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vAA <- vBBBB
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_mul_double.S b/runtime/interpreter/mterp/mips64/op_mul_double.S
new file mode 100644
index 0000000..e7e17f7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_mul_double.S
@@ -0,0 +1 @@
+%include "mips64/fbinopWide.S" {"instr":"mul.d f0, f0, f1"}
diff --git a/runtime/interpreter/mterp/mips64/op_mul_double_2addr.S b/runtime/interpreter/mterp/mips64/op_mul_double_2addr.S
new file mode 100644
index 0000000..f404d46
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_mul_double_2addr.S
@@ -0,0 +1 @@
+%include "mips64/fbinopWide2addr.S" {"instr":"mul.d f0, f0, f1"}
diff --git a/runtime/interpreter/mterp/mips64/op_mul_float.S b/runtime/interpreter/mterp/mips64/op_mul_float.S
new file mode 100644
index 0000000..9a695fc
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_mul_float.S
@@ -0,0 +1 @@
+%include "mips64/fbinop.S" {"instr":"mul.s f0, f0, f1"}
diff --git a/runtime/interpreter/mterp/mips64/op_mul_float_2addr.S b/runtime/interpreter/mterp/mips64/op_mul_float_2addr.S
new file mode 100644
index 0000000..a134a34
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_mul_float_2addr.S
@@ -0,0 +1 @@
+%include "mips64/fbinop2addr.S" {"instr":"mul.s f0, f0, f1"}
diff --git a/runtime/interpreter/mterp/mips64/op_mul_int.S b/runtime/interpreter/mterp/mips64/op_mul_int.S
new file mode 100644
index 0000000..e1b90ff
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_mul_int.S
@@ -0,0 +1 @@
+%include "mips64/binop.S" {"instr":"mul a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_mul_int_2addr.S b/runtime/interpreter/mterp/mips64/op_mul_int_2addr.S
new file mode 100644
index 0000000..c0c4063
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_mul_int_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binop2addr.S" {"instr":"mul a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_mul_int_lit16.S b/runtime/interpreter/mterp/mips64/op_mul_int_lit16.S
new file mode 100644
index 0000000..bb4fff8
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_mul_int_lit16.S
@@ -0,0 +1 @@
+%include "mips64/binopLit16.S" {"instr":"mul a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_mul_int_lit8.S b/runtime/interpreter/mterp/mips64/op_mul_int_lit8.S
new file mode 100644
index 0000000..da11ea9
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_mul_int_lit8.S
@@ -0,0 +1 @@
+%include "mips64/binopLit8.S" {"instr":"mul a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_mul_long.S b/runtime/interpreter/mterp/mips64/op_mul_long.S
new file mode 100644
index 0000000..ec32850
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_mul_long.S
@@ -0,0 +1 @@
+%include "mips64/binopWide.S" {"instr":"dmul a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_mul_long_2addr.S b/runtime/interpreter/mterp/mips64/op_mul_long_2addr.S
new file mode 100644
index 0000000..eb50cda
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_mul_long_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binopWide2addr.S" {"instr":"dmul a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_neg_double.S b/runtime/interpreter/mterp/mips64/op_neg_double.S
new file mode 100644
index 0000000..a135d61
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_neg_double.S
@@ -0,0 +1,3 @@
+%include "mips64/fcvtHeader.S" { "suffix":"_DOUBLE", "valreg":"f0" }
+    neg.d   f0, f0
+%include "mips64/fcvtFooter.S" { "suffix":"_DOUBLE", "valreg":"f0" }
diff --git a/runtime/interpreter/mterp/mips64/op_neg_float.S b/runtime/interpreter/mterp/mips64/op_neg_float.S
new file mode 100644
index 0000000..78019f0
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_neg_float.S
@@ -0,0 +1,3 @@
+%include "mips64/fcvtHeader.S" { "suffix":"_FLOAT", "valreg":"f0" }
+    neg.s   f0, f0
+%include "mips64/fcvtFooter.S" { "suffix":"_FLOAT", "valreg":"f0" }
diff --git a/runtime/interpreter/mterp/mips64/op_neg_int.S b/runtime/interpreter/mterp/mips64/op_neg_int.S
new file mode 100644
index 0000000..31538c0
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_neg_int.S
@@ -0,0 +1 @@
+%include "mips64/unop.S" {"instr":"subu    a0, zero, a0"}
diff --git a/runtime/interpreter/mterp/mips64/op_neg_long.S b/runtime/interpreter/mterp/mips64/op_neg_long.S
new file mode 100644
index 0000000..bc80d06
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_neg_long.S
@@ -0,0 +1 @@
+%include "mips64/unopWide.S" {"instr":"dsubu   a0, zero, a0"}
diff --git a/runtime/interpreter/mterp/mips64/op_new_array.S b/runtime/interpreter/mterp/mips64/op_new_array.S
new file mode 100644
index 0000000..d78b4ac
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_new_array.S
@@ -0,0 +1,19 @@
+    /*
+     * Allocate an array of objects, specified with the array class
+     * and a count.
+     *
+     * The verifier guarantees that this is an array class, so we don't
+     * check for it here.
+     */
+    /* new-array vA, vB, class//CCCC */
+    .extern MterpNewArray
+    EXPORT_PC
+    daddu   a0, rFP, OFF_FP_SHADOWFRAME
+    move    a1, rPC
+    move    a2, rINST
+    move    a3, rSELF
+    jal     MterpNewArray
+    beqzc   v0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_new_instance.S b/runtime/interpreter/mterp/mips64/op_new_instance.S
new file mode 100644
index 0000000..cc5e13e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_new_instance.S
@@ -0,0 +1,14 @@
+    /*
+     * Create a new instance of a class.
+     */
+    /* new-instance vAA, class//BBBB */
+    .extern MterpNewInstance
+    EXPORT_PC
+    daddu   a0, rFP, OFF_FP_SHADOWFRAME
+    move    a1, rSELF
+    move    a2, rINST
+    jal     MterpNewInstance            # (shadow_frame, self, inst_data)
+    beqzc   v0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_nop.S b/runtime/interpreter/mterp/mips64/op_nop.S
new file mode 100644
index 0000000..cc803a7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_nop.S
@@ -0,0 +1,3 @@
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_not_int.S b/runtime/interpreter/mterp/mips64/op_not_int.S
new file mode 100644
index 0000000..5954095
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_not_int.S
@@ -0,0 +1 @@
+%include "mips64/unop.S" {"instr":"nor     a0, zero, a0"}
diff --git a/runtime/interpreter/mterp/mips64/op_not_long.S b/runtime/interpreter/mterp/mips64/op_not_long.S
new file mode 100644
index 0000000..c8f5da7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_not_long.S
@@ -0,0 +1 @@
+%include "mips64/unopWide.S" {"instr":"nor     a0, zero, a0"}
diff --git a/runtime/interpreter/mterp/mips64/op_or_int.S b/runtime/interpreter/mterp/mips64/op_or_int.S
new file mode 100644
index 0000000..0102355
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_or_int.S
@@ -0,0 +1 @@
+%include "mips64/binop.S" {"instr":"or a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_or_int_2addr.S b/runtime/interpreter/mterp/mips64/op_or_int_2addr.S
new file mode 100644
index 0000000..eed8900
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_or_int_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binop2addr.S" {"instr":"or a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_or_int_lit16.S b/runtime/interpreter/mterp/mips64/op_or_int_lit16.S
new file mode 100644
index 0000000..16a0f3e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_or_int_lit16.S
@@ -0,0 +1 @@
+%include "mips64/binopLit16.S" {"instr":"or a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_or_int_lit8.S b/runtime/interpreter/mterp/mips64/op_or_int_lit8.S
new file mode 100644
index 0000000..dbbf790
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_or_int_lit8.S
@@ -0,0 +1 @@
+%include "mips64/binopLit8.S" {"instr":"or a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_or_long.S b/runtime/interpreter/mterp/mips64/op_or_long.S
new file mode 100644
index 0000000..e6f8639
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_or_long.S
@@ -0,0 +1 @@
+%include "mips64/binopWide.S" {"instr":"or a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_or_long_2addr.S b/runtime/interpreter/mterp/mips64/op_or_long_2addr.S
new file mode 100644
index 0000000..ad5e6c8
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_or_long_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binopWide2addr.S" {"instr":"or a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_packed_switch.S b/runtime/interpreter/mterp/mips64/op_packed_switch.S
new file mode 100644
index 0000000..27ce580
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_packed_switch.S
@@ -0,0 +1,22 @@
+%default { "func":"MterpDoPackedSwitch" }
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBBBBBB */
+    .extern $func
+    .extern MterpProfileBranch
+    lh      a0, 2(rPC)                  # a0 <- bbbb (lo)
+    lh      a1, 4(rPC)                  # a1 <- BBBB (hi)
+    srl     a3, rINST, 8                # a3 <- AA
+    ins     a0, a1, 16, 16              # a0 <- BBBBbbbb
+    GET_VREG a1, a3                     # a1 <- vAA
+    dlsa    a0, a0, rPC, 1              # a0 <- PC + BBBBbbbb*2
+    jal     $func                       # v0 <- code-unit branch offset
+    move    rINST, v0
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips64/op_rem_double.S b/runtime/interpreter/mterp/mips64/op_rem_double.S
new file mode 100644
index 0000000..ba61cfd
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_rem_double.S
@@ -0,0 +1,12 @@
+    /* rem-double vAA, vBB, vCC */
+    .extern fmod
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_DOUBLE f12, a2             # f12 <- vBB
+    GET_VREG_DOUBLE f13, a3             # f13 <- vCC
+    jal     fmod                        # f0 <- f12 op f13
+    srl     a4, rINST, 8                # a4 <- AA
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a4              # vAA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_rem_double_2addr.S b/runtime/interpreter/mterp/mips64/op_rem_double_2addr.S
new file mode 100644
index 0000000..c649f0d
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_rem_double_2addr.S
@@ -0,0 +1,12 @@
+    /* rem-double/2addr vA, vB */
+    .extern fmod
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_DOUBLE f12, a2             # f12 <- vA
+    GET_VREG_DOUBLE f13, a3             # f13 <- vB
+    jal     fmod                        # f0 <- f12 op f13
+    ext     a2, rINST, 8, 4             # a2 <- A
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a2              # vA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_rem_float.S b/runtime/interpreter/mterp/mips64/op_rem_float.S
new file mode 100644
index 0000000..3967b0b
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_rem_float.S
@@ -0,0 +1,12 @@
+    /* rem-float vAA, vBB, vCC */
+    .extern fmodf
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_FLOAT f12, a2              # f12 <- vBB
+    GET_VREG_FLOAT f13, a3              # f13 <- vCC
+    jal     fmodf                       # f0 <- f12 op f13
+    srl     a4, rINST, 8                # a4 <- AA
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a4               # vAA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_rem_float_2addr.S b/runtime/interpreter/mterp/mips64/op_rem_float_2addr.S
new file mode 100644
index 0000000..3fed41e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_rem_float_2addr.S
@@ -0,0 +1,12 @@
+    /* rem-float/2addr vA, vB */
+    .extern fmodf
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_FLOAT f12, a2              # f12 <- vA
+    GET_VREG_FLOAT f13, a3              # f13 <- vB
+    jal     fmodf                       # f0 <- f12 op f13
+    ext     a2, rINST, 8, 4             # a2 <- A
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a2               # vA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_rem_int.S b/runtime/interpreter/mterp/mips64/op_rem_int.S
new file mode 100644
index 0000000..c05e9c4
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_rem_int.S
@@ -0,0 +1 @@
+%include "mips64/binop.S" {"instr":"mod a0, a0, a1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_rem_int_2addr.S b/runtime/interpreter/mterp/mips64/op_rem_int_2addr.S
new file mode 100644
index 0000000..a4e162d
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_rem_int_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binop2addr.S" {"instr":"mod a0, a0, a1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_rem_int_lit16.S b/runtime/interpreter/mterp/mips64/op_rem_int_lit16.S
new file mode 100644
index 0000000..3284f14
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_rem_int_lit16.S
@@ -0,0 +1 @@
+%include "mips64/binopLit16.S" {"instr":"mod a0, a0, a1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_rem_int_lit8.S b/runtime/interpreter/mterp/mips64/op_rem_int_lit8.S
new file mode 100644
index 0000000..1e6a584
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_rem_int_lit8.S
@@ -0,0 +1 @@
+%include "mips64/binopLit8.S" {"instr":"mod a0, a0, a1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_rem_long.S b/runtime/interpreter/mterp/mips64/op_rem_long.S
new file mode 100644
index 0000000..32b2d19
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_rem_long.S
@@ -0,0 +1 @@
+%include "mips64/binopWide.S" {"instr":"dmod a0, a0, a1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_rem_long_2addr.S b/runtime/interpreter/mterp/mips64/op_rem_long_2addr.S
new file mode 100644
index 0000000..ad658e1
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_rem_long_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binopWide2addr.S" {"instr":"dmod a0, a0, a1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/mips64/op_return.S b/runtime/interpreter/mterp/mips64/op_return.S
new file mode 100644
index 0000000..ec986b8
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_return.S
@@ -0,0 +1,18 @@
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    .extern MterpSuspendCheck
+    jal     MterpThreadFenceForConstructor
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    move    a0, rSELF
+    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqzc   ra, 1f
+    jal     MterpSuspendCheck           # (self)
+1:
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vAA
+    b       MterpReturn
diff --git a/runtime/interpreter/mterp/mips64/op_return_object.S b/runtime/interpreter/mterp/mips64/op_return_object.S
new file mode 100644
index 0000000..67f1871
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_return_object.S
@@ -0,0 +1 @@
+%include "mips64/op_return.S"
diff --git a/runtime/interpreter/mterp/mips64/op_return_void.S b/runtime/interpreter/mterp/mips64/op_return_void.S
new file mode 100644
index 0000000..05253ae
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_return_void.S
@@ -0,0 +1,11 @@
+    .extern MterpThreadFenceForConstructor
+    .extern MterpSuspendCheck
+    jal     MterpThreadFenceForConstructor
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    move    a0, rSELF
+    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqzc   ra, 1f
+    jal     MterpSuspendCheck           # (self)
+1:
+    li      a0, 0
+    b       MterpReturn
diff --git a/runtime/interpreter/mterp/mips64/op_return_void_no_barrier.S b/runtime/interpreter/mterp/mips64/op_return_void_no_barrier.S
new file mode 100644
index 0000000..f67e811
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_return_void_no_barrier.S
@@ -0,0 +1,9 @@
+    .extern MterpSuspendCheck
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    move    a0, rSELF
+    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqzc   ra, 1f
+    jal     MterpSuspendCheck           # (self)
+1:
+    li      a0, 0
+    b       MterpReturn
diff --git a/runtime/interpreter/mterp/mips64/op_return_wide.S b/runtime/interpreter/mterp/mips64/op_return_wide.S
new file mode 100644
index 0000000..544e027
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_return_wide.S
@@ -0,0 +1,17 @@
+    /*
+     * Return a 64-bit value.
+     */
+    /* return-wide vAA */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    .extern MterpSuspendCheck
+    jal     MterpThreadFenceForConstructor
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    move    a0, rSELF
+    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqzc   ra, 1f
+    jal     MterpSuspendCheck           # (self)
+1:
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG_WIDE a0, a2                # a0 <- vAA
+    b       MterpReturn
diff --git a/runtime/interpreter/mterp/mips64/op_rsub_int.S b/runtime/interpreter/mterp/mips64/op_rsub_int.S
new file mode 100644
index 0000000..fa31a0a
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_rsub_int.S
@@ -0,0 +1 @@
+%include "mips64/binopLit16.S" {"instr":"subu a0, a1, a0"}
diff --git a/runtime/interpreter/mterp/mips64/op_rsub_int_lit8.S b/runtime/interpreter/mterp/mips64/op_rsub_int_lit8.S
new file mode 100644
index 0000000..c31ff32
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_rsub_int_lit8.S
@@ -0,0 +1 @@
+%include "mips64/binopLit8.S" {"instr":"subu a0, a1, a0"}
diff --git a/runtime/interpreter/mterp/mips64/op_sget.S b/runtime/interpreter/mterp/mips64/op_sget.S
new file mode 100644
index 0000000..bd2cfe3
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sget.S
@@ -0,0 +1,26 @@
+%default { "is_object":"0", "helper":"artGet32StaticFromCode", "extend":"" }
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+    .extern $helper
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    ld      a1, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    jal     $helper
+    ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    srl     a2, rINST, 8                # a2 <- AA
+    $extend
+    PREFETCH_INST 2
+    bnez    a3, MterpException          # bail out
+    .if $is_object
+    SET_VREG_OBJECT v0, a2              # fp[AA] <- v0
+    .else
+    SET_VREG v0, a2                     # fp[AA] <- v0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0
diff --git a/runtime/interpreter/mterp/mips64/op_sget_boolean.S b/runtime/interpreter/mterp/mips64/op_sget_boolean.S
new file mode 100644
index 0000000..e7b1844
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sget_boolean.S
@@ -0,0 +1 @@
+%include "mips64/op_sget.S" {"helper":"artGetBooleanStaticFromCode", "extend":"and v0, v0, 0xff"}
diff --git a/runtime/interpreter/mterp/mips64/op_sget_byte.S b/runtime/interpreter/mterp/mips64/op_sget_byte.S
new file mode 100644
index 0000000..52a2e4a
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sget_byte.S
@@ -0,0 +1 @@
+%include "mips64/op_sget.S" {"helper":"artGetByteStaticFromCode", "extend":"seb v0, v0"}
diff --git a/runtime/interpreter/mterp/mips64/op_sget_char.S b/runtime/interpreter/mterp/mips64/op_sget_char.S
new file mode 100644
index 0000000..873d82a
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sget_char.S
@@ -0,0 +1 @@
+%include "mips64/op_sget.S" {"helper":"artGetCharStaticFromCode", "extend":"and v0, v0, 0xffff"}
diff --git a/runtime/interpreter/mterp/mips64/op_sget_object.S b/runtime/interpreter/mterp/mips64/op_sget_object.S
new file mode 100644
index 0000000..3108417
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sget_object.S
@@ -0,0 +1 @@
+%include "mips64/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
diff --git a/runtime/interpreter/mterp/mips64/op_sget_short.S b/runtime/interpreter/mterp/mips64/op_sget_short.S
new file mode 100644
index 0000000..fed4e76
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sget_short.S
@@ -0,0 +1 @@
+%include "mips64/op_sget.S" {"helper":"artGetShortStaticFromCode", "extend":"seh v0, v0"}
diff --git a/runtime/interpreter/mterp/mips64/op_sget_wide.S b/runtime/interpreter/mterp/mips64/op_sget_wide.S
new file mode 100644
index 0000000..77124d1
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sget_wide.S
@@ -0,0 +1,18 @@
+    /*
+     * SGET_WIDE handler wrapper.
+     *
+     */
+    /* sget-wide vAA, field//BBBB */
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    ld      a1, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    jal     artGet64StaticFromCode
+    ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    srl     a4, rINST, 8                # a4 <- AA
+    bnez    a3, MterpException          # bail out
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    SET_VREG_WIDE v0, a4
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_shl_int.S b/runtime/interpreter/mterp/mips64/op_shl_int.S
new file mode 100644
index 0000000..784481f
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_shl_int.S
@@ -0,0 +1 @@
+%include "mips64/binop.S" {"instr":"sll a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_shl_int_2addr.S b/runtime/interpreter/mterp/mips64/op_shl_int_2addr.S
new file mode 100644
index 0000000..a6c8a78
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_shl_int_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binop2addr.S" {"instr":"sll a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_shl_int_lit8.S b/runtime/interpreter/mterp/mips64/op_shl_int_lit8.S
new file mode 100644
index 0000000..36ef207
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_shl_int_lit8.S
@@ -0,0 +1 @@
+%include "mips64/binopLit8.S" {"instr":"sll a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_shl_long.S b/runtime/interpreter/mterp/mips64/op_shl_long.S
new file mode 100644
index 0000000..225a2cb
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_shl_long.S
@@ -0,0 +1 @@
+%include "mips64/binopWide.S" {"instr":"dsll a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_shl_long_2addr.S b/runtime/interpreter/mterp/mips64/op_shl_long_2addr.S
new file mode 100644
index 0000000..c04d882
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_shl_long_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binopWide2addr.S" {"instr":"dsll a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_shr_int.S b/runtime/interpreter/mterp/mips64/op_shr_int.S
new file mode 100644
index 0000000..eded037
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_shr_int.S
@@ -0,0 +1 @@
+%include "mips64/binop.S" {"instr":"sra a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_shr_int_2addr.S b/runtime/interpreter/mterp/mips64/op_shr_int_2addr.S
new file mode 100644
index 0000000..5b4d96f
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_shr_int_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binop2addr.S" {"instr":"sra a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_shr_int_lit8.S b/runtime/interpreter/mterp/mips64/op_shr_int_lit8.S
new file mode 100644
index 0000000..175eb86
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_shr_int_lit8.S
@@ -0,0 +1 @@
+%include "mips64/binopLit8.S" {"instr":"sra a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_shr_long.S b/runtime/interpreter/mterp/mips64/op_shr_long.S
new file mode 100644
index 0000000..0db38c8
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_shr_long.S
@@ -0,0 +1 @@
+%include "mips64/binopWide.S" {"instr":"dsra a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_shr_long_2addr.S b/runtime/interpreter/mterp/mips64/op_shr_long_2addr.S
new file mode 100644
index 0000000..48131ad
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_shr_long_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binopWide2addr.S" {"instr":"dsra a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_sparse_switch.S b/runtime/interpreter/mterp/mips64/op_sparse_switch.S
new file mode 100644
index 0000000..b065aaa
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sparse_switch.S
@@ -0,0 +1 @@
+%include "mips64/op_packed_switch.S" { "func":"MterpDoSparseSwitch" }
diff --git a/runtime/interpreter/mterp/mips64/op_sput.S b/runtime/interpreter/mterp/mips64/op_sput.S
new file mode 100644
index 0000000..142f18f
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sput.S
@@ -0,0 +1,20 @@
+%default { "helper":"artSet32StaticFromCode" }
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    .extern $helper
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    srl     a3, rINST, 8                # a3 <- AA
+    GET_VREG a1, a3                     # a1 <- fp[AA]
+    ld      a2, OFF_FP_METHOD(rFP)
+    move    a3, rSELF
+    PREFETCH_INST 2                     # Get next inst, but don't advance rPC
+    jal     $helper
+    bnezc   v0, MterpException          # 0 on success
+    ADVANCE 2                           # Past exception point - now advance rPC
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_sput_boolean.S b/runtime/interpreter/mterp/mips64/op_sput_boolean.S
new file mode 100644
index 0000000..f5b8dbf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sput_boolean.S
@@ -0,0 +1 @@
+%include "mips64/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/mips64/op_sput_byte.S b/runtime/interpreter/mterp/mips64/op_sput_byte.S
new file mode 100644
index 0000000..f5b8dbf
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sput_byte.S
@@ -0,0 +1 @@
+%include "mips64/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/mips64/op_sput_char.S b/runtime/interpreter/mterp/mips64/op_sput_char.S
new file mode 100644
index 0000000..c4d195c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sput_char.S
@@ -0,0 +1 @@
+%include "mips64/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/mips64/op_sput_object.S b/runtime/interpreter/mterp/mips64/op_sput_object.S
new file mode 100644
index 0000000..ef4c685
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sput_object.S
@@ -0,0 +1,11 @@
+    .extern MterpSputObject
+    EXPORT_PC
+    daddu   a0, rFP, OFF_FP_SHADOWFRAME
+    move    a1, rPC
+    move    a2, rINST
+    move    a3, rSELF
+    jal     MterpSputObject
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_sput_short.S b/runtime/interpreter/mterp/mips64/op_sput_short.S
new file mode 100644
index 0000000..c4d195c
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sput_short.S
@@ -0,0 +1 @@
+%include "mips64/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/mips64/op_sput_wide.S b/runtime/interpreter/mterp/mips64/op_sput_wide.S
new file mode 100644
index 0000000..828ddc1
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sput_wide.S
@@ -0,0 +1,18 @@
+    /*
+     * SPUT_WIDE handler wrapper.
+     *
+     */
+    /* sput-wide vAA, field//BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    ld      a1, OFF_FP_METHOD(rFP)
+    srl     a2, rINST, 8                # a2 <- AA
+    dlsa    a2, a2, rFP, 2
+    move    a3, rSELF
+    PREFETCH_INST 2                     # Get next inst, but don't advance rPC
+    jal     artSet64IndirectStaticFromMterp
+    bnezc   v0, MterpException          # 0 on success, -1 on failure
+    ADVANCE 2                           # Past exception point - now advance rPC
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_sub_double.S b/runtime/interpreter/mterp/mips64/op_sub_double.S
new file mode 100644
index 0000000..40a6c89
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sub_double.S
@@ -0,0 +1 @@
+%include "mips64/fbinopWide.S" {"instr":"sub.d f0, f0, f1"}
diff --git a/runtime/interpreter/mterp/mips64/op_sub_double_2addr.S b/runtime/interpreter/mterp/mips64/op_sub_double_2addr.S
new file mode 100644
index 0000000..984737e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sub_double_2addr.S
@@ -0,0 +1 @@
+%include "mips64/fbinopWide2addr.S" {"instr":"sub.d f0, f0, f1"}
diff --git a/runtime/interpreter/mterp/mips64/op_sub_float.S b/runtime/interpreter/mterp/mips64/op_sub_float.S
new file mode 100644
index 0000000..9010592
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sub_float.S
@@ -0,0 +1 @@
+%include "mips64/fbinop.S" {"instr":"sub.s f0, f0, f1"}
diff --git a/runtime/interpreter/mterp/mips64/op_sub_float_2addr.S b/runtime/interpreter/mterp/mips64/op_sub_float_2addr.S
new file mode 100644
index 0000000..e7d4ffe
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sub_float_2addr.S
@@ -0,0 +1 @@
+%include "mips64/fbinop2addr.S" {"instr":"sub.s f0, f0, f1"}
diff --git a/runtime/interpreter/mterp/mips64/op_sub_int.S b/runtime/interpreter/mterp/mips64/op_sub_int.S
new file mode 100644
index 0000000..609ea05
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sub_int.S
@@ -0,0 +1 @@
+%include "mips64/binop.S" {"instr":"subu a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_sub_int_2addr.S b/runtime/interpreter/mterp/mips64/op_sub_int_2addr.S
new file mode 100644
index 0000000..ba2f1e8
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sub_int_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binop2addr.S" {"instr":"subu a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_sub_long.S b/runtime/interpreter/mterp/mips64/op_sub_long.S
new file mode 100644
index 0000000..09a6afd
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sub_long.S
@@ -0,0 +1 @@
+%include "mips64/binopWide.S" {"instr":"dsubu a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_sub_long_2addr.S b/runtime/interpreter/mterp/mips64/op_sub_long_2addr.S
new file mode 100644
index 0000000..b9ec82a
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_sub_long_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binopWide2addr.S" {"instr":"dsubu a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_throw.S b/runtime/interpreter/mterp/mips64/op_throw.S
new file mode 100644
index 0000000..6418d57
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_throw.S
@@ -0,0 +1,10 @@
+    /*
+     * Throw an exception object in the current thread.
+     */
+    /* throw vAA */
+    EXPORT_PC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vAA (exception object)
+    beqzc   a0, common_errNullObject
+    sd      a0, THREAD_EXCEPTION_OFFSET(rSELF)  # thread->exception <- obj
+    b       MterpException
diff --git a/runtime/interpreter/mterp/mips64/op_unused_3e.S b/runtime/interpreter/mterp/mips64/op_unused_3e.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_3e.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_3f.S b/runtime/interpreter/mterp/mips64/op_unused_3f.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_3f.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_40.S b/runtime/interpreter/mterp/mips64/op_unused_40.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_40.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_41.S b/runtime/interpreter/mterp/mips64/op_unused_41.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_41.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_42.S b/runtime/interpreter/mterp/mips64/op_unused_42.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_42.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_43.S b/runtime/interpreter/mterp/mips64/op_unused_43.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_43.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_79.S b/runtime/interpreter/mterp/mips64/op_unused_79.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_79.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_7a.S b/runtime/interpreter/mterp/mips64/op_unused_7a.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_7a.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f3.S b/runtime/interpreter/mterp/mips64/op_unused_f3.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f3.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f4.S b/runtime/interpreter/mterp/mips64/op_unused_f4.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f4.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f5.S b/runtime/interpreter/mterp/mips64/op_unused_f5.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f5.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f6.S b/runtime/interpreter/mterp/mips64/op_unused_f6.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f6.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f7.S b/runtime/interpreter/mterp/mips64/op_unused_f7.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f7.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f8.S b/runtime/interpreter/mterp/mips64/op_unused_f8.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f8.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f9.S b/runtime/interpreter/mterp/mips64/op_unused_f9.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f9.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_fa.S b/runtime/interpreter/mterp/mips64/op_unused_fa.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_fa.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_fb.S b/runtime/interpreter/mterp/mips64/op_unused_fb.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_fb.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_fc.S b/runtime/interpreter/mterp/mips64/op_unused_fc.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_fc.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_fd.S b/runtime/interpreter/mterp/mips64/op_unused_fd.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_fd.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_fe.S b/runtime/interpreter/mterp/mips64/op_unused_fe.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_fe.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_ff.S b/runtime/interpreter/mterp/mips64/op_unused_ff.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_ff.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_ushr_int.S b/runtime/interpreter/mterp/mips64/op_ushr_int.S
new file mode 100644
index 0000000..37c90cb
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_ushr_int.S
@@ -0,0 +1 @@
+%include "mips64/binop.S" {"instr":"srl a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_ushr_int_2addr.S b/runtime/interpreter/mterp/mips64/op_ushr_int_2addr.S
new file mode 100644
index 0000000..d6bf413
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_ushr_int_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binop2addr.S" {"instr":"srl a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_ushr_int_lit8.S b/runtime/interpreter/mterp/mips64/op_ushr_int_lit8.S
new file mode 100644
index 0000000..2a2d843
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_ushr_int_lit8.S
@@ -0,0 +1 @@
+%include "mips64/binopLit8.S" {"instr":"srl a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_ushr_long.S b/runtime/interpreter/mterp/mips64/op_ushr_long.S
new file mode 100644
index 0000000..e724405
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_ushr_long.S
@@ -0,0 +1 @@
+%include "mips64/binopWide.S" {"instr":"dsrl a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_ushr_long_2addr.S b/runtime/interpreter/mterp/mips64/op_ushr_long_2addr.S
new file mode 100644
index 0000000..d2cf135
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_ushr_long_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binopWide2addr.S" {"instr":"dsrl a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_xor_int.S b/runtime/interpreter/mterp/mips64/op_xor_int.S
new file mode 100644
index 0000000..ee25ebc
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_xor_int.S
@@ -0,0 +1 @@
+%include "mips64/binop.S" {"instr":"xor a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_xor_int_2addr.S b/runtime/interpreter/mterp/mips64/op_xor_int_2addr.S
new file mode 100644
index 0000000..0f04967
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_xor_int_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binop2addr.S" {"instr":"xor a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_xor_int_lit16.S b/runtime/interpreter/mterp/mips64/op_xor_int_lit16.S
new file mode 100644
index 0000000..ecb21ae
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_xor_int_lit16.S
@@ -0,0 +1 @@
+%include "mips64/binopLit16.S" {"instr":"xor a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_xor_int_lit8.S b/runtime/interpreter/mterp/mips64/op_xor_int_lit8.S
new file mode 100644
index 0000000..115ae99
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_xor_int_lit8.S
@@ -0,0 +1 @@
+%include "mips64/binopLit8.S" {"instr":"xor a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_xor_long.S b/runtime/interpreter/mterp/mips64/op_xor_long.S
new file mode 100644
index 0000000..7ebabc2
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_xor_long.S
@@ -0,0 +1 @@
+%include "mips64/binopWide.S" {"instr":"xor a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/op_xor_long_2addr.S b/runtime/interpreter/mterp/mips64/op_xor_long_2addr.S
new file mode 100644
index 0000000..0f1919a
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_xor_long_2addr.S
@@ -0,0 +1 @@
+%include "mips64/binopWide2addr.S" {"instr":"xor a0, a0, a1"}
diff --git a/runtime/interpreter/mterp/mips64/unop.S b/runtime/interpreter/mterp/mips64/unop.S
new file mode 100644
index 0000000..e3f7ea0
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/unop.S
@@ -0,0 +1,18 @@
+%default {"preinstr":""}
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "a0 = op a0".
+     *
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      not-int, neg-int
+     */
+    /* unop vA, vB */
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB
+    ext     a2, rINST, 8, 4             # a2 <- A
+    $preinstr                           # optional op
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    $instr                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                     # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/unopWide.S b/runtime/interpreter/mterp/mips64/unopWide.S
new file mode 100644
index 0000000..c0dd1aa
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/unopWide.S
@@ -0,0 +1,17 @@
+%default {"preinstr":""}
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "a0 = op a0".
+     *
+     * For: not-long, neg-long
+     */
+    /* unop vA, vB */
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_WIDE a0, a3                # a0 <- vB
+    ext     a2, rINST, 8, 4             # a2 <- A
+    $preinstr                           # optional op
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    $instr                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/unused.S b/runtime/interpreter/mterp/mips64/unused.S
new file mode 100644
index 0000000..30d38bd
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/unused.S
@@ -0,0 +1,4 @@
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
diff --git a/runtime/interpreter/mterp/mips64/zcmp.S b/runtime/interpreter/mterp/mips64/zcmp.S
new file mode 100644
index 0000000..75db49e
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/zcmp.S
@@ -0,0 +1,17 @@
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform, e.g. for
+     * "if-lez" you would use "le".
+     *
+     * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
+    GET_VREG a0, a2                     # a0 <- vAA
+    b${condition}zc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
new file mode 100644
index 0000000..20a0753
--- /dev/null
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -0,0 +1,789 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Mterp entry point and support functions.
+ */
+#include "interpreter/interpreter_common.h"
+#include "entrypoints/entrypoint_utils-inl.h"
+#include "mterp.h"
+#include "debugger.h"
+
+namespace art {
+namespace interpreter {
+/*
+ * Verify some constants used by the mterp interpreter.
+ */
+void CheckMterpAsmConstants() {
+  /*
+   * If we're using computed goto instruction transitions, make sure
+   * none of the handlers overflows the 128-byte limit.  This won't tell
+   * which one did, but if any one is too big the total size will
+   * overflow.
+   */
+  const int width = 128;
+  int interp_size = (uintptr_t) artMterpAsmInstructionEnd -
+                    (uintptr_t) artMterpAsmInstructionStart;
+  if ((interp_size == 0) || (interp_size != (art::kNumPackedOpcodes * width))) {
+      LOG(art::FATAL) << "ERROR: unexpected asm interp size " << interp_size
+                      << "(did an instruction handler exceed " << width << " bytes?)";
+  }
+}
+
+void InitMterpTls(Thread* self) {
+  self->SetMterpDefaultIBase(artMterpAsmInstructionStart);
+  self->SetMterpAltIBase(artMterpAsmAltInstructionStart);
+  self->SetMterpCurrentIBase((kTraceExecutionEnabled || kTestExportPC) ?
+                             artMterpAsmAltInstructionStart :
+                             artMterpAsmInstructionStart);
+}
+
+/*
+ * Find the matching case.  Returns the offset to the handler instructions.
+ *
+ * Returns 3 if we don't find a match (it's the size of the sparse-switch
+ * instruction).
+ */
+extern "C" ssize_t MterpDoSparseSwitch(const uint16_t* switchData, int32_t testVal) {
+  const int kInstrLen = 3;
+  uint16_t size;
+  const int32_t* keys;
+  const int32_t* entries;
+
+  /*
+   * Sparse switch data format:
+   *  ushort ident = 0x0200   magic value
+   *  ushort size             number of entries in the table; > 0
+   *  int keys[size]          keys, sorted low-to-high; 32-bit aligned
+   *  int targets[size]       branch targets, relative to switch opcode
+   *
+   * Total size is (2+size*4) 16-bit code units.
+   */
+
+  uint16_t signature = *switchData++;
+  DCHECK_EQ(signature, static_cast<uint16_t>(art::Instruction::kSparseSwitchSignature));
+
+  size = *switchData++;
+
+  /* The keys are guaranteed to be aligned on a 32-bit boundary;
+   * we can treat them as a native int array.
+   */
+  keys = reinterpret_cast<const int32_t*>(switchData);
+
+  /* The entries are guaranteed to be aligned on a 32-bit boundary;
+   * we can treat them as a native int array.
+   */
+  entries = keys + size;
+
+  /*
+   * Binary-search through the array of keys, which are guaranteed to
+   * be sorted low-to-high.
+   */
+  int lo = 0;
+  int hi = size - 1;
+  while (lo <= hi) {
+    int mid = (lo + hi) >> 1;
+
+    int32_t foundVal = keys[mid];
+    if (testVal < foundVal) {
+      hi = mid - 1;
+    } else if (testVal > foundVal) {
+      lo = mid + 1;
+    } else {
+      return entries[mid];
+    }
+  }
+  return kInstrLen;
+}
+
+extern "C" ssize_t MterpDoPackedSwitch(const uint16_t* switchData, int32_t testVal) {
+  const int kInstrLen = 3;
+
+  /*
+   * Packed switch data format:
+   *  ushort ident = 0x0100   magic value
+   *  ushort size             number of entries in the table
+   *  int first_key           first (and lowest) switch case value
+   *  int targets[size]       branch targets, relative to switch opcode
+   *
+   * Total size is (4+size*2) 16-bit code units.
+   */
+  uint16_t signature = *switchData++;
+  DCHECK_EQ(signature, static_cast<uint16_t>(art::Instruction::kPackedSwitchSignature));
+
+  uint16_t size = *switchData++;
+
+  int32_t firstKey = *switchData++;
+  firstKey |= (*switchData++) << 16;
+
+  int index = testVal - firstKey;
+  if (index < 0 || index >= size) {
+    return kInstrLen;
+  }
+
+  /*
+   * The entries are guaranteed to be aligned on a 32-bit boundary;
+   * we can treat them as a native int array.
+   */
+  const int32_t* entries = reinterpret_cast<const int32_t*>(switchData);
+  return entries[index];
+}
+
+extern "C" size_t MterpShouldSwitchInterpreters()
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  return instrumentation->NonJitProfilingActive() || Dbg::IsDebuggerActive();
+}
+
+
+extern "C" size_t MterpInvokeVirtual(Thread* self,
+                                     ShadowFrame* shadow_frame,
+                                     uint16_t* dex_pc_ptr,
+                                     uint16_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kVirtual, false, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" size_t MterpInvokeSuper(Thread* self,
+                                   ShadowFrame* shadow_frame,
+                                   uint16_t* dex_pc_ptr,
+                                   uint16_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kSuper, false, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" size_t MterpInvokeInterface(Thread* self,
+                                       ShadowFrame* shadow_frame,
+                                       uint16_t* dex_pc_ptr,
+                                       uint16_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kInterface, false, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" size_t MterpInvokeDirect(Thread* self,
+                                    ShadowFrame* shadow_frame,
+                                    uint16_t* dex_pc_ptr,
+                                    uint16_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kDirect, false, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" size_t MterpInvokeStatic(Thread* self,
+                                    ShadowFrame* shadow_frame,
+                                    uint16_t* dex_pc_ptr,
+                                    uint16_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kStatic, false, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" size_t MterpInvokeVirtualRange(Thread* self,
+                                          ShadowFrame* shadow_frame,
+                                          uint16_t* dex_pc_ptr,
+                                          uint16_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kVirtual, true, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" size_t MterpInvokeSuperRange(Thread* self,
+                                        ShadowFrame* shadow_frame,
+                                        uint16_t* dex_pc_ptr,
+                                        uint16_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kSuper, true, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" size_t MterpInvokeInterfaceRange(Thread* self,
+                                            ShadowFrame* shadow_frame,
+                                            uint16_t* dex_pc_ptr,
+                                            uint16_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kInterface, true, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" size_t MterpInvokeDirectRange(Thread* self,
+                                         ShadowFrame* shadow_frame,
+                                         uint16_t* dex_pc_ptr,
+                                         uint16_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kDirect, true, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" size_t MterpInvokeStaticRange(Thread* self,
+                                         ShadowFrame* shadow_frame,
+                                         uint16_t* dex_pc_ptr,
+                                         uint16_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kStatic, true, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" size_t MterpInvokeVirtualQuick(Thread* self,
+                                          ShadowFrame* shadow_frame,
+                                          uint16_t* dex_pc_ptr,
+                                          uint16_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvokeVirtualQuick<false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" size_t MterpInvokeVirtualQuickRange(Thread* self,
+                                               ShadowFrame* shadow_frame,
+                                               uint16_t* dex_pc_ptr,
+                                               uint16_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvokeVirtualQuick<true>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" void MterpThreadFenceForConstructor() {
+  QuasiAtomic::ThreadFenceForConstructor();
+}
+
+extern "C" size_t MterpConstString(uint32_t index,
+                                   uint32_t tgt_vreg,
+                                   ShadowFrame* shadow_frame,
+                                   Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  String* s = ResolveString(self, *shadow_frame,  index);
+  if (UNLIKELY(s == nullptr)) {
+    return true;
+  }
+  shadow_frame->SetVRegReference(tgt_vreg, s);
+  return false;
+}
+
+extern "C" size_t MterpConstClass(uint32_t index,
+                                  uint32_t tgt_vreg,
+                                  ShadowFrame* shadow_frame,
+                                  Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  Class* c = ResolveVerifyAndClinit(index, shadow_frame->GetMethod(), self, false, false);
+  if (UNLIKELY(c == nullptr)) {
+    return true;
+  }
+  shadow_frame->SetVRegReference(tgt_vreg, c);
+  return false;
+}
+
+extern "C" size_t MterpCheckCast(uint32_t index,
+                                 StackReference<mirror::Object>* vreg_addr,
+                                 art::ArtMethod* method,
+                                 Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  Class* c = ResolveVerifyAndClinit(index, method, self, false, false);
+  if (UNLIKELY(c == nullptr)) {
+    return true;
+  }
+  // Must load obj from vreg following ResolveVerifyAndClinit due to moving gc.
+  Object* obj = vreg_addr->AsMirrorPtr();
+  if (UNLIKELY(obj != nullptr && !obj->InstanceOf(c))) {
+    ThrowClassCastException(c, obj->GetClass());
+    return true;
+  }
+  return false;
+}
+
+extern "C" size_t MterpInstanceOf(uint32_t index,
+                                  StackReference<mirror::Object>* vreg_addr,
+                                  art::ArtMethod* method,
+                                  Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  Class* c = ResolveVerifyAndClinit(index, method, self, false, false);
+  if (UNLIKELY(c == nullptr)) {
+    return false;  // Caller will check for pending exception.  Return value unimportant.
+  }
+  // Must load obj from vreg following ResolveVerifyAndClinit due to moving gc.
+  Object* obj = vreg_addr->AsMirrorPtr();
+  return (obj != nullptr) && obj->InstanceOf(c);
+}
+
+extern "C" size_t MterpFillArrayData(Object* obj, const Instruction::ArrayDataPayload* payload)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return FillArrayData(obj, payload);
+}
+
+extern "C" size_t MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint32_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  Object* obj = nullptr;
+  Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame->GetMethod(),
+                                    self, false, false);
+  if (LIKELY(c != nullptr)) {
+    if (UNLIKELY(c->IsStringClass())) {
+      gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+      obj = mirror::String::AllocEmptyString<true>(self, allocator_type);
+    } else {
+      obj = AllocObjectFromCode<false, true>(
+        inst->VRegB_21c(), shadow_frame->GetMethod(), self,
+        Runtime::Current()->GetHeap()->GetCurrentAllocator());
+    }
+  }
+  if (UNLIKELY(obj == nullptr)) {
+    return false;
+  }
+  obj->GetClass()->AssertInitializedOrInitializingInThread(self);
+  shadow_frame->SetVRegReference(inst->VRegA_21c(inst_data), obj);
+  return true;
+}
+
+extern "C" size_t MterpSputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+                                uint32_t inst_data, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoFieldPut<StaticObjectWrite, Primitive::kPrimNot, false, false>
+      (self, *shadow_frame, inst, inst_data);
+}
+
+extern "C" size_t MterpIputObject(ShadowFrame* shadow_frame,
+                                  uint16_t* dex_pc_ptr,
+                                  uint32_t inst_data,
+                                  Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoFieldPut<InstanceObjectWrite, Primitive::kPrimNot, false, false>
+      (self, *shadow_frame, inst, inst_data);
+}
+
+extern "C" size_t MterpIputObjectQuick(ShadowFrame* shadow_frame,
+                                       uint16_t* dex_pc_ptr,
+                                       uint32_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoIPutQuick<Primitive::kPrimNot, false>(*shadow_frame, inst, inst_data);
+}
+
+extern "C" size_t MterpAputObject(ShadowFrame* shadow_frame,
+                                  uint16_t* dex_pc_ptr,
+                                  uint32_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  Object* a = shadow_frame->GetVRegReference(inst->VRegB_23x());
+  if (UNLIKELY(a == nullptr)) {
+    return false;
+  }
+  int32_t index = shadow_frame->GetVReg(inst->VRegC_23x());
+  Object* val = shadow_frame->GetVRegReference(inst->VRegA_23x(inst_data));
+  ObjectArray<Object>* array = a->AsObjectArray<Object>();
+  if (array->CheckIsValidIndex(index) && array->CheckAssignable(val)) {
+    array->SetWithoutChecks<false>(index, val);
+    return true;
+  }
+  return false;
+}
+
+extern "C" size_t MterpFilledNewArray(ShadowFrame* shadow_frame,
+                                      uint16_t* dex_pc_ptr,
+                                      Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoFilledNewArray<false, false, false>(inst, *shadow_frame, self,
+                                               shadow_frame->GetResultRegister());
+}
+
+extern "C" size_t MterpFilledNewArrayRange(ShadowFrame* shadow_frame,
+                                           uint16_t* dex_pc_ptr,
+                                           Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoFilledNewArray<true, false, false>(inst, *shadow_frame, self,
+                                              shadow_frame->GetResultRegister());
+}
+
+extern "C" size_t MterpNewArray(ShadowFrame* shadow_frame,
+                                uint16_t* dex_pc_ptr,
+                                uint32_t inst_data, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  int32_t length = shadow_frame->GetVReg(inst->VRegB_22c(inst_data));
+  Object* obj = AllocArrayFromCode<false, true>(
+      inst->VRegC_22c(), length, shadow_frame->GetMethod(), self,
+      Runtime::Current()->GetHeap()->GetCurrentAllocator());
+  if (UNLIKELY(obj == nullptr)) {
+      return false;
+  }
+  shadow_frame->SetVRegReference(inst->VRegA_22c(inst_data), obj);
+  return true;
+}
+
+extern "C" size_t MterpHandleException(Thread* self, ShadowFrame* shadow_frame)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  DCHECK(self->IsExceptionPending());
+  const instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  uint32_t found_dex_pc = FindNextInstructionFollowingException(self, *shadow_frame,
+                                                                shadow_frame->GetDexPC(),
+                                                                instrumentation);
+  if (found_dex_pc == DexFile::kDexNoIndex) {
+    return false;
+  }
+  // OK - we can deal with it.  Update and continue.
+  shadow_frame->SetDexPC(found_dex_pc);
+  return true;
+}
+
+extern "C" void MterpCheckBefore(Thread* self, ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  uint16_t inst_data = inst->Fetch16(0);
+  if (inst->Opcode(inst_data) == Instruction::MOVE_EXCEPTION) {
+    self->AssertPendingException();
+  } else {
+    self->AssertNoPendingException();
+  }
+  if (kTraceExecutionEnabled) {
+    uint32_t dex_pc = dex_pc_ptr - shadow_frame->GetCodeItem()->insns_;
+    TraceExecution(*shadow_frame, inst, dex_pc);
+  }
+  if (kTestExportPC) {
+    // Save invalid dex pc to force segfault if improperly used.
+    shadow_frame->SetDexPCPtr(reinterpret_cast<uint16_t*>(kExportPCPoison));
+  }
+}
+
+extern "C" void MterpLogDivideByZeroException(Thread* self, ShadowFrame* shadow_frame)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "DivideByZero: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogArrayIndexException(Thread* self, ShadowFrame* shadow_frame)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "ArrayIndex: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogNegativeArraySizeException(Thread* self, ShadowFrame* shadow_frame)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "NegativeArraySize: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogNoSuchMethodException(Thread* self, ShadowFrame* shadow_frame)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "NoSuchMethod: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogExceptionThrownException(Thread* self, ShadowFrame* shadow_frame)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "ExceptionThrown: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogNullObjectException(Thread* self, ShadowFrame* shadow_frame)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "NullObject: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogFallback(Thread* self, ShadowFrame* shadow_frame)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "Fallback: " << inst->Opcode(inst_data) << ", Suspend Pending?: "
+            << self->IsExceptionPending();
+}
+
+extern "C" void MterpLogOSR(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "OSR: " << inst->Opcode(inst_data) << ", offset = " << offset;
+}
+
+extern "C" void MterpLogSuspendFallback(Thread* self, ShadowFrame* shadow_frame, uint32_t flags)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  if (flags & kCheckpointRequest) {
+    LOG(INFO) << "Checkpoint fallback: " << inst->Opcode(inst_data);
+  } else if (flags & kSuspendRequest) {
+    LOG(INFO) << "Suspend fallback: " << inst->Opcode(inst_data);
+  }
+}
+
+extern "C" size_t MterpSuspendCheck(Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  self->AllowThreadSuspension();
+  return MterpShouldSwitchInterpreters();
+}
+
+extern "C" ssize_t artSet64IndirectStaticFromMterp(uint32_t field_idx,
+                                                   ArtMethod* referrer,
+                                                   uint64_t* new_value,
+                                                   Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int64_t));
+  if (LIKELY(field != nullptr)) {
+    // Compiled code can't use transactional mode.
+    field->Set64<false>(field->GetDeclaringClass(), *new_value);
+    return 0;  // success
+  }
+  field = FindFieldFromCode<StaticPrimitiveWrite, true>(field_idx, referrer, self, sizeof(int64_t));
+  if (LIKELY(field != nullptr)) {
+    // Compiled code can't use transactional mode.
+    field->Set64<false>(field->GetDeclaringClass(), *new_value);
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" ssize_t artSet8InstanceFromMterp(uint32_t field_idx,
+                                            mirror::Object* obj,
+                                            uint8_t new_value,
+                                            ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int8_t));
+  if (LIKELY(field != nullptr && obj != nullptr)) {
+    Primitive::Type type = field->GetTypeAsPrimitiveType();
+    if (type == Primitive::kPrimBoolean) {
+      field->SetBoolean<false>(obj, new_value);
+    } else {
+      DCHECK_EQ(Primitive::kPrimByte, type);
+      field->SetByte<false>(obj, new_value);
+    }
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" ssize_t artSet16InstanceFromMterp(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             uint16_t new_value,
+                                             ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
+                                          sizeof(int16_t));
+  if (LIKELY(field != nullptr && obj != nullptr)) {
+    Primitive::Type type = field->GetTypeAsPrimitiveType();
+    if (type == Primitive::kPrimChar) {
+      field->SetChar<false>(obj, new_value);
+    } else {
+      DCHECK_EQ(Primitive::kPrimShort, type);
+      field->SetShort<false>(obj, new_value);
+    }
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" ssize_t artSet32InstanceFromMterp(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             uint32_t new_value,
+                                             ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
+                                          sizeof(int32_t));
+  if (LIKELY(field != nullptr && obj != nullptr)) {
+    field->Set32<false>(obj, new_value);
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" ssize_t artSet64InstanceFromMterp(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             uint64_t* new_value,
+                                             ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
+                                          sizeof(int64_t));
+  if (LIKELY(field != nullptr  && obj != nullptr)) {
+    field->Set64<false>(obj, *new_value);
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" ssize_t artSetObjInstanceFromMterp(uint32_t field_idx,
+                                              mirror::Object* obj,
+                                              mirror::Object* new_value,
+                                              ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
+                                          sizeof(mirror::HeapReference<mirror::Object>));
+  if (LIKELY(field != nullptr && obj != nullptr)) {
+    field->SetObj<false>(obj, new_value);
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" mirror::Object* artAGetObjectFromMterp(mirror::Object* arr, int32_t index)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (UNLIKELY(arr == nullptr)) {
+    ThrowNullPointerExceptionFromInterpreter();
+    return nullptr;
+  }
+  ObjectArray<Object>* array = arr->AsObjectArray<Object>();
+  if (LIKELY(array->CheckIsValidIndex(index))) {
+    return array->GetWithoutChecks(index);
+  } else {
+    return nullptr;
+  }
+}
+
+extern "C" mirror::Object* artIGetObjectFromMterp(mirror::Object* obj, uint32_t field_offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (UNLIKELY(obj == nullptr)) {
+    ThrowNullPointerExceptionFromInterpreter();
+    return nullptr;
+  }
+  return obj->GetFieldObject<mirror::Object>(MemberOffset(field_offset));
+}
+
+/*
+ * Create a hotness_countdown based on the current method hotness_count and profiling
+ * mode.  In short, determine how many hotness events we hit before reporting back
+ * to the full instrumentation via MterpAddHotnessBatch.  Called once on entry to the method,
+ * and regenerated following batch updates.
+ */
+extern "C" ssize_t MterpSetUpHotnessCountdown(ArtMethod* method, ShadowFrame* shadow_frame)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  uint16_t hotness_count = method->GetCounter();
+  int32_t countdown_value = jit::kJitHotnessDisabled;
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr) {
+    int32_t warm_threshold = jit->WarmMethodThreshold();
+    int32_t hot_threshold = jit->HotMethodThreshold();
+    int32_t osr_threshold = jit->OSRMethodThreshold();
+    if (hotness_count < warm_threshold) {
+      countdown_value = warm_threshold - hotness_count;
+    } else if (hotness_count < hot_threshold) {
+      countdown_value = hot_threshold - hotness_count;
+    } else if (hotness_count < osr_threshold) {
+      countdown_value = osr_threshold - hotness_count;
+    } else {
+      countdown_value = jit::kJitCheckForOSR;
+    }
+    if (jit::Jit::ShouldUsePriorityThreadWeight()) {
+      int32_t priority_thread_weight = jit->PriorityThreadWeight();
+      countdown_value = std::min(countdown_value, countdown_value / priority_thread_weight);
+    }
+  }
+  /*
+   * The actual hotness threshold may exceed the range of our int16_t countdown value.  This is
+   * not a problem, though.  We can just break it down into smaller chunks.
+   */
+  countdown_value = std::min(countdown_value,
+                             static_cast<int32_t>(std::numeric_limits<int16_t>::max()));
+  shadow_frame->SetCachedHotnessCountdown(countdown_value);
+  shadow_frame->SetHotnessCountdown(countdown_value);
+  return countdown_value;
+}
+
+/*
+ * Report a batch of hotness events to the instrumentation and then return the new
+ * countdown value to the next time we should report.
+ */
+extern "C" ssize_t MterpAddHotnessBatch(ArtMethod* method,
+                                        ShadowFrame* shadow_frame,
+                                        Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr) {
+    int16_t count = shadow_frame->GetCachedHotnessCountdown() - shadow_frame->GetHotnessCountdown();
+    jit->AddSamples(self, method, count, /*with_backedges*/ true);
+  }
+  return MterpSetUpHotnessCountdown(method, shadow_frame);
+}
+
+// TUNING: Unused by arm/arm64/x86/x86_64.  Remove when mips/mips64 mterps support batch updates.
+extern "C" size_t MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtMethod* method = shadow_frame->GetMethod();
+  JValue* result = shadow_frame->GetResultRegister();
+  uint32_t dex_pc = shadow_frame->GetDexPC();
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if ((jit != nullptr) && (offset <= 0)) {
+    jit->AddSamples(self, method, 1, /*with_backedges*/ true);
+  }
+  int16_t countdown_value = MterpSetUpHotnessCountdown(method, shadow_frame);
+  if (countdown_value == jit::kJitCheckForOSR) {
+    return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result);
+  } else {
+    return false;
+  }
+}
+
+extern "C" size_t MterpMaybeDoOnStackReplacement(Thread* self,
+                                                 ShadowFrame* shadow_frame,
+                                                 int32_t offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtMethod* method = shadow_frame->GetMethod();
+  JValue* result = shadow_frame->GetResultRegister();
+  uint32_t dex_pc = shadow_frame->GetDexPC();
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (offset <= 0) {
+    // Keep updating hotness in case a compilation request was dropped.  Eventually it will retry.
+    jit->AddSamples(self, method, 1, /*with_backedges*/ true);
+  }
+  // Assumes caller has already determined that an OSR check is appropriate.
+  return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result);
+}
+
+}  // namespace interpreter
+}  // namespace art
diff --git a/runtime/interpreter/mterp/mterp.h b/runtime/interpreter/mterp/mterp.h
new file mode 100644
index 0000000..45ab98b
--- /dev/null
+++ b/runtime/interpreter/mterp/mterp.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_MTERP_MTERP_H_
+#define ART_RUNTIME_INTERPRETER_MTERP_MTERP_H_
+
+/*
+ * Mterp assembly handler bases
+ */
+extern "C" void* artMterpAsmInstructionStart[];
+extern "C" void* artMterpAsmInstructionEnd[];
+extern "C" void* artMterpAsmAltInstructionStart[];
+extern "C" void* artMterpAsmAltInstructionEnd[];
+
+namespace art {
+namespace interpreter {
+
+void InitMterpTls(Thread* self);
+void CheckMterpAsmConstants();
+
+// The return type should be 'bool' but our assembly stubs expect 'bool'
+// to be zero-extended to the whole register and that's broken on x86-64
+// as a 'bool' is returned in 'al' and the rest of 'rax' is garbage.
+// TODO: Fix mterp and stubs and revert this workaround. http://b/30232671
+extern "C" size_t MterpShouldSwitchInterpreters();
+
+// Poison value for TestExportPC.  If we segfault with this value, it means that a mterp
+// handler for a recent opcode failed to export the Dalvik PC prior to a possible exit from
+// the mterp environment.
+constexpr uintptr_t kExportPCPoison = 0xdead00ff;
+// Set true to enable poison testing of ExportPC.  Uses Alt interpreter.
+constexpr bool kTestExportPC = false;
+
+}  // namespace interpreter
+}  // namespace art
+
+#endif  // ART_RUNTIME_INTERPRETER_MTERP_MTERP_H_
diff --git a/runtime/interpreter/mterp/mterp_stub.cc b/runtime/interpreter/mterp/mterp_stub.cc
new file mode 100644
index 0000000..7e7337e
--- /dev/null
+++ b/runtime/interpreter/mterp/mterp_stub.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "../interpreter_common.h"
+
+/*
+ * Stub definitions for targets without mterp implementations.
+ */
+
+namespace art {
+namespace interpreter {
+/*
+ * Call this during initialization to verify that the values in asm-constants.h
+ * are still correct.
+ */
+void CheckMterpAsmConstants() {
+  // Dummy version when mterp not implemented.
+}
+
+void InitMterpTls(Thread* self) {
+  self->SetMterpDefaultIBase(nullptr);
+  self->SetMterpCurrentIBase(nullptr);
+  self->SetMterpAltIBase(nullptr);
+}
+
+/*
+ * The platform-specific implementation must provide this.
+ */
+extern "C" bool ExecuteMterpImpl(Thread* self, const DexFile::CodeItem* code_item,
+                                 ShadowFrame* shadow_frame, JValue* result_register)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self); UNUSED(shadow_frame); UNUSED(code_item); UNUSED(result_register);
+  UNIMPLEMENTED(art::FATAL);
+  return false;
+}
+
+}  // namespace interpreter
+}  // namespace art
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
new file mode 100644
index 0000000..c33df6d
--- /dev/null
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -0,0 +1,12141 @@
+/*
+ * This file was generated automatically by gen-mterp.py for 'arm'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+/* File: arm/header.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+/*
+ARM EABI general notes:
+
+r0-r3 hold first 4 args to a method; they are not preserved across method calls
+r4-r8 are available for general use
+r9 is given special treatment in some situations, but not for us
+r10 (sl) seems to be generally available
+r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
+r12 (ip) is scratch -- not preserved across method calls
+r13 (sp) should be managed carefully in case a signal arrives
+r14 (lr) must be preserved
+r15 (pc) can be tinkered with directly
+
+r0 holds returns of <= 4 bytes
+r0-r1 hold returns of 8 bytes, low word in r0
+
+Callee must save/restore r4+ (except r12) if it modifies them.  If VFP
+is present, registers s16-s31 (a/k/a d8-d15, a/k/a q4-q7) must be preserved,
+s0-s15 (d0-d7, q0-a3) do not need to be.
+
+Stack is "full descending".  Only the arguments that don't fit in the first 4
+registers are placed on the stack.  "sp" points at the first stacked argument
+(i.e. the 5th arg).
+
+VFP: single-precision results in s0, double-precision results in d0.
+
+In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
+64-bit quantities (long long, double) must be 64-bit aligned.
+*/
+
+/*
+Mterp and ARM notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  r4  rPC       interpreted program counter, used for fetching instructions
+  r5  rFP       interpreted frame pointer, used for accessing locals and args
+  r6  rSELF     self (Thread) pointer
+  r7  rINST     first 16-bit code unit of current instruction
+  r8  rIBASE    interpreted instruction base pointer, used for computed goto
+  r10 rPROFILE  branch profiling countdown
+  r11 rREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rPC      r4
+#define rFP      r5
+#define rSELF    r6
+#define rINST    r7
+#define rIBASE   r8
+#define rPROFILE r10
+#define rREFS    r11
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    str  rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+.endm
+
+.macro EXPORT_DEX_PC tmp
+    ldr  \tmp, [rFP, #OFF_FP_CODE_ITEM]
+    str  rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+    add  \tmp, #CODEITEM_INSNS_OFFSET
+    sub  \tmp, rPC, \tmp
+    asr  \tmp, #1
+    str  \tmp, [rFP, #OFF_FP_DEX_PC]
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINST.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    ldrh    rINST, [rPC]
+.endm
+
+/*
+ * Fetch the next instruction from the specified offset.  Advances rPC
+ * to point to the next instruction.  "_count" is in 16-bit code units.
+ *
+ * Because of the limited size of immediate constants on ARM, this is only
+ * suitable for small forward movements (i.e. don't try to implement "goto"
+ * with this).
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss.  (This also implies that it must come after
+ * EXPORT_PC.)
+ */
+.macro FETCH_ADVANCE_INST count
+    ldrh    rINST, [rPC, #((\count)*2)]!
+.endm
+
+/*
+ * The operation performed here is similar to FETCH_ADVANCE_INST, except the
+ * src and dest registers are parameterized (not hard-wired to rPC and rINST).
+ */
+.macro PREFETCH_ADVANCE_INST dreg, sreg, count
+    ldrh    \dreg, [\sreg, #((\count)*2)]!
+.endm
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update rPC.  Used to load
+ * rINST ahead of possible exception point.  Be sure to manually advance rPC
+ * later.
+ */
+.macro PREFETCH_INST count
+    ldrh    rINST, [rPC, #((\count)*2)]
+.endm
+
+/* Advance rPC by some number of code units. */
+.macro ADVANCE count
+  add  rPC, #((\count)*2)
+.endm
+
+/*
+ * Fetch the next instruction from an offset specified by _reg.  Updates
+ * rPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.
+ *
+ * We want to write "ldrh rINST, [rPC, _reg, lsl #1]!", but some of the
+ * bits that hold the shift distance are used for the half/byte/sign flags.
+ * In some cases we can pre-double _reg for free, so we require a byte offset
+ * here.
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    ldrh    rINST, [rPC, \reg]!
+.endm
+
+/*
+ * Fetch a half-word code unit from an offset past the current PC.  The
+ * "_count" value is in 16-bit code units.  Does not advance rPC.
+ *
+ * The "_S" variant works the same but treats the value as signed.
+ */
+.macro FETCH reg, count
+    ldrh    \reg, [rPC, #((\count)*2)]
+.endm
+
+.macro FETCH_S reg, count
+    ldrsh   \reg, [rPC, #((\count)*2)]
+.endm
+
+/*
+ * Fetch one byte from an offset past the current PC.  Pass in the same
+ * "_count" as you would for FETCH, and an additional 0/1 indicating which
+ * byte of the halfword you want (lo/hi).
+ */
+.macro FETCH_B reg, count, byte
+    ldrb     \reg, [rPC, #((\count)*2+(\byte))]
+.endm
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+.macro GET_INST_OPCODE reg
+    and     \reg, rINST, #255
+.endm
+
+/*
+ * Put the prefetched instruction's opcode field into the specified register.
+ */
+.macro GET_PREFETCHED_OPCODE oreg, ireg
+    and     \oreg, \ireg, #255
+.endm
+
+/*
+ * Begin executing the opcode in _reg.  Because this only jumps within the
+ * interpreter, we don't have to worry about pre-ARMv5 THUMB interwork.
+ */
+.macro GOTO_OPCODE reg
+    add     pc, rIBASE, \reg, lsl #7
+.endm
+.macro GOTO_OPCODE_BASE base,reg
+    add     pc, \base, \reg, lsl #7
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+.macro GET_VREG reg, vreg
+    ldr     \reg, [rFP, \vreg, lsl #2]
+.endm
+.macro SET_VREG reg, vreg
+    str     \reg, [rFP, \vreg, lsl #2]
+    mov     \reg, #0
+    str     \reg, [rREFS, \vreg, lsl #2]
+.endm
+.macro SET_VREG_OBJECT reg, vreg, tmpreg
+    str     \reg, [rFP, \vreg, lsl #2]
+    str     \reg, [rREFS, \vreg, lsl #2]
+.endm
+.macro SET_VREG_SHADOW reg, vreg
+    str     \reg, [rREFS, \vreg, lsl #2]
+.endm
+
+/*
+ * Clear the corresponding shadow regs for a vreg pair
+ */
+.macro CLEAR_SHADOW_PAIR vreg, tmp1, tmp2
+    mov     \tmp1, #0
+    add     \tmp2, \vreg, #1
+    SET_VREG_SHADOW \tmp1, \vreg
+    SET_VREG_SHADOW \tmp1, \tmp2
+.endm
+
+/*
+ * Convert a virtual register index into an address.
+ */
+.macro VREG_INDEX_TO_ADDR reg, vreg
+    add     \reg, rFP, \vreg, lsl #2   /* WARNING/FIXME: handle shadow frame vreg zero if store */
+.endm
+
+/*
+ * Refresh handler table.
+ */
+.macro REFRESH_IBASE
+  ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+.endm
+
+/* File: arm/entry.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Interpreter entry point.
+ */
+
+    .text
+    .align  2
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+
+/*
+ * On entry:
+ *  r0  Thread* self/
+ *  r1  code_item
+ *  r2  ShadowFrame
+ *  r3  JValue* result_register
+ *
+ */
+
+ExecuteMterpImpl:
+    .fnstart
+    .save {r3-r10,fp,lr}
+    stmfd   sp!, {r3-r10,fp,lr}         @ save 10 regs, (r3 just to align 64)
+
+    /* Remember the return register */
+    str     r3, [r2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
+
+    /* Remember the code_item */
+    str     r1, [r2, #SHADOWFRAME_CODE_ITEM_OFFSET]
+
+    /* set up "named" registers */
+    mov     rSELF, r0
+    ldr     r0, [r2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
+    add     rFP, r2, #SHADOWFRAME_VREGS_OFFSET     @ point to vregs.
+    VREG_INDEX_TO_ADDR rREFS, r0                   @ point to reference array in shadow frame
+    ldr     r0, [r2, #SHADOWFRAME_DEX_PC_OFFSET]   @ Get starting dex_pc.
+    add     rPC, r1, #CODEITEM_INSNS_OFFSET        @ Point to base of insns[]
+    add     rPC, rPC, r0, lsl #1                   @ Create direct pointer to 1st dex opcode
+    EXPORT_PC
+
+    /* Starting ibase */
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+
+    /* Set up for backwards branches & osr profiling */
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    bl      MterpSetUpHotnessCountdown
+    mov     rPROFILE, r0                @ Starting hotness countdown to rPROFILE
+
+    /* start executing the instruction at rPC */
+    FETCH_INST                          @ load rINST from rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* NOTE: no fallthrough */
+
+
+    .global artMterpAsmInstructionStart
+    .type   artMterpAsmInstructionStart, %function
+artMterpAsmInstructionStart = .L_op_nop
+    .text
+
+/* ------------------------------ */
+    .balign 128
+.L_op_nop: /* 0x00 */
+/* File: arm/op_nop.S */
+    FETCH_ADVANCE_INST 1                @ advance to next instr, load rINST
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    GOTO_OPCODE ip                      @ execute it
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move: /* 0x01 */
+/* File: arm/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B from 15:12
+    ubfx    r0, rINST, #8, #4           @ r0<- A from 11:8
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[B]
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r0              @ fp[A]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[A]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ execute next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_from16: /* 0x02 */
+/* File: arm/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH r1, 1                         @ r1<- BBBB
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r0              @ fp[AA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_16: /* 0x03 */
+/* File: arm/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH r1, 2                         @ r1<- BBBB
+    FETCH r0, 1                         @ r0<- AAAA
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r0              @ fp[AAAA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AAAA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide: /* 0x04 */
+/* File: arm/op_move_wide.S */
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[B]
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r2, {r0-r1}                 @ fp[A]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_from16: /* 0x05 */
+/* File: arm/op_move_wide_from16.S */
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH r3, 1                         @ r3<- BBBB
+    mov     rINST, rINST, lsr #8        @ rINST<- AA
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BBBB]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[AA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r2, {r0-r1}                 @ fp[AA]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_16: /* 0x06 */
+/* File: arm/op_move_wide_16.S */
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH r3, 2                         @ r3<- BBBB
+    FETCH r2, 1                         @ r2<- AAAA
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BBBB]
+    VREG_INDEX_TO_ADDR lr, r2           @ r2<- &fp[AAAA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    CLEAR_SHADOW_PAIR r2, r3, ip        @ Zero out the shadow regs
+    stmia   lr, {r0-r1}                 @ fp[AAAA]<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object: /* 0x07 */
+/* File: arm/op_move_object.S */
+/* File: arm/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B from 15:12
+    ubfx    r0, rINST, #8, #4           @ r0<- A from 11:8
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[B]
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    .if 1
+    SET_VREG_OBJECT r2, r0              @ fp[A]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[A]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ execute next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_from16: /* 0x08 */
+/* File: arm/op_move_object_from16.S */
+/* File: arm/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH r1, 1                         @ r1<- BBBB
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 1
+    SET_VREG_OBJECT r2, r0              @ fp[AA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_16: /* 0x09 */
+/* File: arm/op_move_object_16.S */
+/* File: arm/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH r1, 2                         @ r1<- BBBB
+    FETCH r0, 1                         @ r0<- AAAA
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 1
+    SET_VREG_OBJECT r2, r0              @ fp[AAAA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AAAA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result: /* 0x0a */
+/* File: arm/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ldr     r0, [rFP, #OFF_FP_RESULT_REGISTER]  @ get pointer to result JType.
+    ldr     r0, [r0]                    @ r0 <- result.i.
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r0, r2, r1          @ fp[AA]<- r0
+    .else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_wide: /* 0x0b */
+/* File: arm/op_move_result_wide.S */
+    /* move-result-wide vAA */
+    mov     rINST, rINST, lsr #8        @ rINST<- AA
+    ldr     r3, [rFP, #OFF_FP_RESULT_REGISTER]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[AA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- retval.j
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    stmia   r2, {r0-r1}                 @ fp[AA]<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_object: /* 0x0c */
+/* File: arm/op_move_result_object.S */
+/* File: arm/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ldr     r0, [rFP, #OFF_FP_RESULT_REGISTER]  @ get pointer to result JType.
+    ldr     r0, [r0]                    @ r0 <- result.i.
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 1
+    SET_VREG_OBJECT r0, r2, r1          @ fp[AA]<- r0
+    .else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_exception: /* 0x0d */
+/* File: arm/op_move_exception.S */
+    /* move-exception vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    ldr     r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov     r1, #0                      @ r1<- 0
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    SET_VREG_OBJECT r3, r2              @ fp[AA]<- exception obj
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    str     r1, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ clear exception
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void: /* 0x0e */
+/* File: arm/op_return_void.S */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
+    mov    r0, #0
+    mov    r1, #0
+    b      MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return: /* 0x0f */
+/* File: arm/op_return.S */
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r0, r2                     @ r0<- vAA
+    mov     r1, #0
+    b       MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_wide: /* 0x10 */
+/* File: arm/op_return_wide.S */
+    /*
+     * Return a 64-bit value.
+     */
+    /* return-wide vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[AA]
+    ldmia   r2, {r0-r1}                 @ r0/r1 <- vAA/vAA+1
+    b       MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_object: /* 0x11 */
+/* File: arm/op_return_object.S */
+/* File: arm/op_return.S */
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r0, r2                     @ r0<- vAA
+    mov     r1, #0
+    b       MterpReturn
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_4: /* 0x12 */
+/* File: arm/op_const_4.S */
+    /* const/4 vA, #+B */
+    sbfx    r1, rINST, #12, #4          @ r1<- sssssssB (sign-extended)
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    SET_VREG r1, r0                     @ fp[A]<- r1
+    GOTO_OPCODE ip                      @ execute next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_16: /* 0x13 */
+/* File: arm/op_const_16.S */
+    /* const/16 vAA, #+BBBB */
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const: /* 0x14 */
+/* File: arm/op_const.S */
+    /* const vAA, #+BBBBbbbb */
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r1, 2                         @ r1<- BBBB (high)
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_high16: /* 0x15 */
+/* File: arm/op_const_high16.S */
+    /* const/high16 vAA, #+BBBB0000 */
+    FETCH r0, 1                         @ r0<- 0000BBBB (zero-extended)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r0, r0, lsl #16             @ r0<- BBBB0000
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_16: /* 0x16 */
+/* File: arm/op_const_wide_16.S */
+    /* const-wide/16 vAA, #+BBBB */
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r1, r0, asr #31             @ r1<- ssssssss
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    CLEAR_SHADOW_PAIR r3, r2, lr        @ Zero out the shadow regs
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_32: /* 0x17 */
+/* File: arm/op_const_wide_32.S */
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    FETCH r0, 1                         @ r0<- 0000bbbb (low)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH_S r2, 2                       @ r2<- ssssBBBB (high)
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    orr     r0, r0, r2, lsl #16         @ r0<- BBBBbbbb
+    CLEAR_SHADOW_PAIR r3, r2, lr        @ Zero out the shadow regs
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
+    mov     r1, r0, asr #31             @ r1<- ssssssss
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide: /* 0x18 */
+/* File: arm/op_const_wide.S */
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r1, 2                         @ r1<- BBBB (low middle)
+    FETCH r2, 3                         @ r2<- hhhh (high middle)
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb (low word)
+    FETCH r3, 4                         @ r3<- HHHH (high)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    orr     r1, r2, r3, lsl #16         @ r1<- HHHHhhhh (high word)
+    CLEAR_SHADOW_PAIR r9, r2, r3        @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 5                @ advance rPC, load rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_high16: /* 0x19 */
+/* File: arm/op_const_wide_high16.S */
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    FETCH r1, 1                         @ r1<- 0000BBBB (zero-extended)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r0, #0                      @ r0<- 00000000
+    mov     r1, r1, lsl #16             @ r1<- BBBB0000
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    CLEAR_SHADOW_PAIR r3, r0, r2        @ Zero shadow regs
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string: /* 0x1a */
+/* File: arm/op_const_string.S */
+    /* const/string vAA, String@BBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- BBBB
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstString            @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2                     @ load rINST
+    cmp     r0, #0                      @ fail?
+    bne     MterpPossibleException      @ let reference interpreter deal with it.
+    ADVANCE 2                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string_jumbo: /* 0x1b */
+/* File: arm/op_const_string_jumbo.S */
+    /* const/string vAA, String@BBBBBBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r2, 2                         @ r2<- BBBB (high)
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    orr     r0, r0, r2, lsl #16         @ r1<- BBBBbbbb
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstString            @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 3                     @ advance rPC
+    cmp     r0, #0                      @ fail?
+    bne     MterpPossibleException      @ let reference interpreter deal with it.
+    ADVANCE 3                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_class: /* 0x1c */
+/* File: arm/op_const_class.S */
+    /* const/class vAA, Class@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- BBBB
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstClass             @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2
+    cmp     r0, #0
+    bne     MterpPossibleException
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_enter: /* 0x1d */
+/* File: arm/op_monitor_enter.S */
+    /*
+     * Synchronize on an object.
+     */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r0, r2                      @ r0<- vAA (object)
+    mov      r1, rSELF                   @ r1<- self
+    bl       artLockObjectFromCode
+    cmp      r0, #0
+    bne      MterpException
+    FETCH_ADVANCE_INST 1
+    GET_INST_OPCODE ip                   @ extract opcode from rINST
+    GOTO_OPCODE ip                       @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_exit: /* 0x1e */
+/* File: arm/op_monitor_exit.S */
+    /*
+     * Unlock an object.
+     *
+     * Exceptions that occur when unlocking a monitor need to appear as
+     * if they happened at the following instruction.  See the Dalvik
+     * instruction spec.
+     */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8          @ r2<- AA
+    GET_VREG r0, r2                     @ r0<- vAA (object)
+    mov      r1, rSELF                  @ r0<- self
+    bl       artUnlockObjectFromCode    @ r0<- success for unlock(self, obj)
+    cmp     r0, #0                      @ failed?
+    bne     MterpException
+    FETCH_ADVANCE_INST 1                @ before throw: advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_check_cast: /* 0x1f */
+/* File: arm/op_check_cast.S */
+    /*
+     * Check to see if a cast from one class to another is allowed.
+     */
+    /* check-cast vAA, class@BBBB */
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- BBBB
+    mov      r1, rINST, lsr #8          @ r1<- AA
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &object
+    ldr      r2, [rFP, #OFF_FP_METHOD]  @ r2<- method
+    mov      r3, rSELF                  @ r3<- self
+    bl       MterpCheckCast             @ (index, &obj, method, self)
+    PREFETCH_INST 2
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_instance_of: /* 0x20 */
+/* File: arm/op_instance_of.S */
+    /*
+     * Check to see if an object reference is an instance of a class.
+     *
+     * Most common situation is a non-null object, being compared against
+     * an already-resolved class.
+     */
+    /* instance-of vA, vB, class@CCCC */
+    EXPORT_PC
+    FETCH     r0, 1                     @ r0<- CCCC
+    mov       r1, rINST, lsr #12        @ r1<- B
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &object
+    ldr       r2, [rFP, #OFF_FP_METHOD] @ r2<- method
+    mov       r3, rSELF                 @ r3<- self
+    bl        MterpInstanceOf           @ (index, &obj, method, self)
+    ldr       r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx      r9, rINST, #8, #4         @ r9<- A
+    PREFETCH_INST 2
+    cmp       r1, #0                    @ exception pending?
+    bne       MterpException
+    ADVANCE 2                           @ advance rPC
+    SET_VREG r0, r9                     @ vA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_array_length: /* 0x21 */
+/* File: arm/op_array_length.S */
+    /*
+     * Return the length of an array.
+     */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    GET_VREG r0, r1                     @ r0<- vB (object ref)
+    cmp     r0, #0                      @ is object null?
+    beq     common_errNullObject        @ yup, fail
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- array length
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r3, r2                     @ vB<- length
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_instance: /* 0x22 */
+/* File: arm/op_new_instance.S */
+    /*
+     * Create a new instance of a class.
+     */
+    /* new-instance vAA, class@BBBB */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rSELF
+    mov     r2, rINST
+    bl      MterpNewInstance           @ (shadow_frame, self, inst_data)
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2               @ advance rPC, load rINST
+    GET_INST_OPCODE ip                 @ extract opcode from rINST
+    GOTO_OPCODE ip                     @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_array: /* 0x23 */
+/* File: arm/op_new_array.S */
+    /*
+     * Allocate an array of objects, specified with the array class
+     * and a count.
+     *
+     * The verifier guarantees that this is an array class, so we don't
+     * check for it here.
+     */
+    /* new-array vA, vB, class@CCCC */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpNewArray
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array: /* 0x24 */
+/* File: arm/op_filled_new_array.S */
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern MterpFilledNewArray
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rSELF
+    bl      MterpFilledNewArray
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array_range: /* 0x25 */
+/* File: arm/op_filled_new_array_range.S */
+/* File: arm/op_filled_new_array.S */
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern MterpFilledNewArrayRange
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rSELF
+    bl      MterpFilledNewArrayRange
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_fill_array_data: /* 0x26 */
+/* File: arm/op_fill_array_data.S */
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r1, r0, r1, lsl #16         @ r1<- BBBBbbbb
+    GET_VREG r0, r3                     @ r0<- vAA (array object)
+    add     r1, rPC, r1, lsl #1         @ r1<- PC + BBBBbbbb*2 (array data off.)
+    bl      MterpFillArrayData          @ (obj, payload)
+    cmp     r0, #0                      @ 0 means an exception is thrown
+    beq     MterpPossibleException      @ exception?
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_throw: /* 0x27 */
+/* File: arm/op_throw.S */
+    /*
+     * Throw an exception object in the current thread.
+     */
+    /* throw vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r1, r2                      @ r1<- vAA (exception object)
+    cmp      r1, #0                      @ null object?
+    beq      common_errNullObject        @ yes, throw an NPE instead
+    str      r1, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ thread->exception<- obj
+    b        MterpException
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto: /* 0x28 */
+/* File: arm/op_goto.S */
+    /*
+     * Unconditional branch, 8-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto +AA */
+    sbfx    rINST, rINST, #8, #8           @ rINST<- ssssssAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_16: /* 0x29 */
+/* File: arm/op_goto_16.S */
+    /*
+     * Unconditional branch, 16-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto/16 +AAAA */
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_32: /* 0x2a */
+/* File: arm/op_goto_32.S */
+    /*
+     * Unconditional branch, 32-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     *
+     * Unlike most opcodes, this one is allowed to branch to itself, so
+     * our "backward branch" test must be "<=0" instead of "<0".  Because
+     * we need the V bit set, we'll use an adds to convert from Dalvik
+     * offset to byte offset.
+     */
+    /* goto/32 +AAAAAAAA */
+    FETCH r0, 1                         @ r0<- aaaa (lo)
+    FETCH r3, 2                         @ r1<- AAAA (hi)
+    orrs    rINST, r0, r3, lsl #16      @ rINST<- AAAAaaaa
+    b       MterpCommonTakenBranch
+
+/* ------------------------------ */
+    .balign 128
+.L_op_packed_switch: /* 0x2b */
+/* File: arm/op_packed_switch.S */
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
+    movs    rINST, r0
+    b       MterpCommonTakenBranch
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sparse_switch: /* 0x2c */
+/* File: arm/op_sparse_switch.S */
+/* File: arm/op_packed_switch.S */
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
+    movs    rINST, r0
+    b       MterpCommonTakenBranch
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_float: /* 0x2d */
+/* File: arm/op_cmpl_float.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    flds    s0, [r2]                    @ s0<- vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    vcmpe.f32  s0, s1                   @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mvn     r0, #0                      @ r0<- -1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r1<- 1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_float: /* 0x2e */
+/* File: arm/op_cmpg_float.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    flds    s0, [r2]                    @ s0<- vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    vcmpe.f32 s0, s1                    @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, #1                      @ r0<- 1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_double: /* 0x2f */
+/* File: arm/op_cmpl_double.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    vcmpe.f64 d0, d1                    @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mvn     r0, #0                      @ r0<- -1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r1<- 1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_double: /* 0x30 */
+/* File: arm/op_cmpg_double.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    vcmpe.f64 d0, d1                    @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, #1                      @ r0<- 1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmp_long: /* 0x31 */
+/* File: arm/op_cmp_long.S */
+    /*
+     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+     * register based on the results of the comparison.
+     */
+    /* cmp-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    cmp     r0, r2
+    sbcs    ip, r1, r3                  @ Sets correct CCs for checking LT (but not EQ/NE)
+    mov     ip, #0
+    mvnlt   ip, #0                      @ -1
+    cmpeq   r0, r2                      @ For correct EQ/NE, we may need to repeat the first CMP
+    orrne   ip, #1
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG ip, r9                     @ vAA<- ip
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eq: /* 0x32 */
+/* File: arm/op_if_eq.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r0, r0                     @ r0<- vA
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    cmp     r0, r3                      @ compare (vA, vB)
+    beq MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ne: /* 0x33 */
+/* File: arm/op_if_ne.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r0, r0                     @ r0<- vA
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    cmp     r0, r3                      @ compare (vA, vB)
+    bne MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lt: /* 0x34 */
+/* File: arm/op_if_lt.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r0, r0                     @ r0<- vA
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    cmp     r0, r3                      @ compare (vA, vB)
+    blt MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ge: /* 0x35 */
+/* File: arm/op_if_ge.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r0, r0                     @ r0<- vA
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    cmp     r0, r3                      @ compare (vA, vB)
+    bge MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gt: /* 0x36 */
+/* File: arm/op_if_gt.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r0, r0                     @ r0<- vA
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    cmp     r0, r3                      @ compare (vA, vB)
+    bgt MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_le: /* 0x37 */
+/* File: arm/op_if_le.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r0, r0                     @ r0<- vA
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    cmp     r0, r3                      @ compare (vA, vB)
+    ble MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eqz: /* 0x38 */
+/* File: arm/op_if_eqz.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r0, r0                     @ r0<- vAA
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    cmp     r0, #0                      @ compare (vA, 0)
+    beq MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_nez: /* 0x39 */
+/* File: arm/op_if_nez.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r0, r0                     @ r0<- vAA
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    cmp     r0, #0                      @ compare (vA, 0)
+    bne MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ltz: /* 0x3a */
+/* File: arm/op_if_ltz.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r0, r0                     @ r0<- vAA
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    cmp     r0, #0                      @ compare (vA, 0)
+    blt MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gez: /* 0x3b */
+/* File: arm/op_if_gez.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r0, r0                     @ r0<- vAA
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    cmp     r0, #0                      @ compare (vA, 0)
+    bge MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gtz: /* 0x3c */
+/* File: arm/op_if_gtz.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r0, r0                     @ r0<- vAA
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    cmp     r0, #0                      @ compare (vA, 0)
+    bgt MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lez: /* 0x3d */
+/* File: arm/op_if_lez.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r0, r0                     @ r0<- vAA
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    cmp     r0, #0                      @ compare (vA, 0)
+    ble MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3e: /* 0x3e */
+/* File: arm/op_unused_3e.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3f: /* 0x3f */
+/* File: arm/op_unused_3f.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_40: /* 0x40 */
+/* File: arm/op_unused_40.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_41: /* 0x41 */
+/* File: arm/op_unused_41.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_42: /* 0x42 */
+/* File: arm/op_unused_42.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_43: /* 0x43 */
+/* File: arm/op_unused_43.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget: /* 0x44 */
+/* File: arm/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #2     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldr   r2, [r0, #MIRROR_INT_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r2, r9                     @ vAA<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_wide: /* 0x45 */
+/* File: arm/op_aget_wide.S */
+    /*
+     * Array get, 64 bits.  vAA <- vBB[vCC].
+     *
+     * Arrays of long/double are 64-bit aligned, so it's okay to use LDRD.
+     */
+    /* aget-wide vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    CLEAR_SHADOW_PAIR r9, r2, r3        @ Zero out the shadow regs
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #3          @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r2-r3}                 @ vAA/vAA+1<- r2/r3
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_object: /* 0x46 */
+/* File: arm/op_aget_object.S */
+    /*
+     * Array object get.  vAA <- vBB[vCC].
+     *
+     * for: aget-object
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    EXPORT_PC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    bl       artAGetObjectFromMterp     @ (array, index)
+    ldr      r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    PREFETCH_INST 2
+    cmp      r1, #0
+    bne      MterpException
+    SET_VREG_OBJECT r0, r9
+    ADVANCE 2
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_boolean: /* 0x47 */
+/* File: arm/op_aget_boolean.S */
+/* File: arm/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #0     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrb   r2, [r0, #MIRROR_BOOLEAN_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r2, r9                     @ vAA<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_byte: /* 0x48 */
+/* File: arm/op_aget_byte.S */
+/* File: arm/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #0     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrsb   r2, [r0, #MIRROR_BYTE_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r2, r9                     @ vAA<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_char: /* 0x49 */
+/* File: arm/op_aget_char.S */
+/* File: arm/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #1     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrh   r2, [r0, #MIRROR_CHAR_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r2, r9                     @ vAA<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_short: /* 0x4a */
+/* File: arm/op_aget_short.S */
+/* File: arm/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #1     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrsh   r2, [r0, #MIRROR_SHORT_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r2, r9                     @ vAA<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput: /* 0x4b */
+/* File: arm/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #2     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    str  r2, [r0, #MIRROR_INT_ARRAY_DATA_OFFSET]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_wide: /* 0x4c */
+/* File: arm/op_aput_wide.S */
+    /*
+     * Array put, 64 bits.  vBB[vCC] <- vAA.
+     *
+     * Arrays of long/double are 64-bit aligned, so it's okay to use STRD.
+     */
+    /* aput-wide vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #3          @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldmia   r9, {r2-r3}                 @ r2/r3<- vAA/vAA+1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_object: /* 0x4d */
+/* File: arm/op_aput_object.S */
+    /*
+     * Store an object into an array.  vBB[vCC] <- vAA.
+     */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    bl      MterpAputObject
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_boolean: /* 0x4e */
+/* File: arm/op_aput_boolean.S */
+/* File: arm/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #0     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strb  r2, [r0, #MIRROR_BOOLEAN_ARRAY_DATA_OFFSET]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_byte: /* 0x4f */
+/* File: arm/op_aput_byte.S */
+/* File: arm/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #0     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strb  r2, [r0, #MIRROR_BYTE_ARRAY_DATA_OFFSET]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_char: /* 0x50 */
+/* File: arm/op_aput_char.S */
+/* File: arm/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #1     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strh  r2, [r0, #MIRROR_CHAR_ARRAY_DATA_OFFSET]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_short: /* 0x51 */
+/* File: arm/op_aput_short.S */
+/* File: arm/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #1     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strh  r2, [r0, #MIRROR_SHORT_ARRAY_DATA_OFFSET]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget: /* 0x52 */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGet32InstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 0
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide: /* 0x53 */
+/* File: arm/op_iget_wide.S */
+    /*
+     * 64-bit instance field get.
+     *
+     * for: iget-wide
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGet64InstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpException                @ bail out
+    CLEAR_SHADOW_PAIR r2, ip, lr           @ Zero out the shadow regs
+    VREG_INDEX_TO_ADDR r3, r2              @ r3<- &fp[A]
+    stmia    r3, {r0-r1}                   @ fp[A]<- r0/r1
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object: /* 0x54 */
+/* File: arm/op_iget_object.S */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGetObjInstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 1
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean: /* 0x55 */
+/* File: arm/op_iget_boolean.S */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGetBooleanInstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 0
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte: /* 0x56 */
+/* File: arm/op_iget_byte.S */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGetByteInstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 0
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char: /* 0x57 */
+/* File: arm/op_iget_char.S */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGetCharInstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 0
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short: /* 0x58 */
+/* File: arm/op_iget_short.S */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGetShortInstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 0
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput: /* 0x59 */
+/* File: arm/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern artSet32InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet32InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide: /* 0x5a */
+/* File: arm/op_iput_wide.S */
+    /* iput-wide vA, vB, field@CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet64InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object: /* 0x5b */
+/* File: arm/op_iput_object.S */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpIputObject
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean: /* 0x5c */
+/* File: arm/op_iput_boolean.S */
+/* File: arm/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet8InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte: /* 0x5d */
+/* File: arm/op_iput_byte.S */
+/* File: arm/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet8InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char: /* 0x5e */
+/* File: arm/op_iput_char.S */
+/* File: arm/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet16InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short: /* 0x5f */
+/* File: arm/op_iput_short.S */
+/* File: arm/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet16InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget: /* 0x60 */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGet32StaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGet32StaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 0
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_wide: /* 0x61 */
+/* File: arm/op_sget_wide.S */
+    /*
+     * SGET_WIDE handler wrapper.
+     *
+     */
+    /* sget-wide vAA, field@BBBB */
+
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGet64StaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r9, rINST, lsr #8             @ r9<- AA
+    VREG_INDEX_TO_ADDR lr, r9           @ r9<- &fp[AA]
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    CLEAR_SHADOW_PAIR r9, r2, ip        @ Zero out the shadow regs
+    stmia lr, {r0-r1}                   @ vAA/vAA+1<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_object: /* 0x62 */
+/* File: arm/op_sget_object.S */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGetObjStaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGetObjStaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 1
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_boolean: /* 0x63 */
+/* File: arm/op_sget_boolean.S */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGetBooleanStaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGetBooleanStaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 0
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_byte: /* 0x64 */
+/* File: arm/op_sget_byte.S */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGetByteStaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGetByteStaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 0
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_char: /* 0x65 */
+/* File: arm/op_sget_char.S */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGetCharStaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGetCharStaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 0
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_short: /* 0x66 */
+/* File: arm/op_sget_short.S */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGetShortStaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGetShortStaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 0
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput: /* 0x67 */
+/* File: arm/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet32StaticFromCode
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_wide: /* 0x68 */
+/* File: arm/op_sput_wide.S */
+    /*
+     * SPUT_WIDE handler wrapper.
+     *
+     */
+    /* sput-wide vAA, field@BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    ldr     r1, [rFP, #OFF_FP_METHOD]
+    mov     r2, rINST, lsr #8           @ r3<- AA
+    VREG_INDEX_TO_ADDR r2, r2
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet64IndirectStaticFromMterp
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_object: /* 0x69 */
+/* File: arm/op_sput_object.S */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpSputObject
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_boolean: /* 0x6a */
+/* File: arm/op_sput_boolean.S */
+/* File: arm/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet8StaticFromCode
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_byte: /* 0x6b */
+/* File: arm/op_sput_byte.S */
+/* File: arm/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet8StaticFromCode
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_char: /* 0x6c */
+/* File: arm/op_sput_char.S */
+/* File: arm/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet16StaticFromCode
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_short: /* 0x6d */
+/* File: arm/op_sput_short.S */
+/* File: arm/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet16StaticFromCode
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual: /* 0x6e */
+/* File: arm/op_invoke_virtual.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtual
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeVirtual
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle a virtual method call.
+     *
+     * for: invoke-virtual, invoke-virtual/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super: /* 0x6f */
+/* File: arm/op_invoke_super.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuper
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeSuper
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle a "super" method call.
+     *
+     * for: invoke-super, invoke-super/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct: /* 0x70 */
+/* File: arm/op_invoke_direct.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirect
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeDirect
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static: /* 0x71 */
+/* File: arm/op_invoke_static.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStatic
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeStatic
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface: /* 0x72 */
+/* File: arm/op_invoke_interface.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterface
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeInterface
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle an interface method call.
+     *
+     * for: invoke-interface, invoke-interface/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void_no_barrier: /* 0x73 */
+/* File: arm/op_return_void_no_barrier.S */
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
+    mov    r0, #0
+    mov    r1, #0
+    b      MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range: /* 0x74 */
+/* File: arm/op_invoke_virtual_range.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeVirtualRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super_range: /* 0x75 */
+/* File: arm/op_invoke_super_range.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuperRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeSuperRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct_range: /* 0x76 */
+/* File: arm/op_invoke_direct_range.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirectRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeDirectRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static_range: /* 0x77 */
+/* File: arm/op_invoke_static_range.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStaticRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeStaticRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface_range: /* 0x78 */
+/* File: arm/op_invoke_interface_range.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterfaceRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeInterfaceRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_79: /* 0x79 */
+/* File: arm/op_unused_79.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_7a: /* 0x7a */
+/* File: arm/op_unused_7a.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_int: /* 0x7b */
+/* File: arm/op_neg_int.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    rsb     r0, r0, #0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_int: /* 0x7c */
+/* File: arm/op_not_int.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    mvn     r0, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_long: /* 0x7d */
+/* File: arm/op_neg_long.S */
+/* File: arm/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0/r1".
+     * This could be an ARM instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double, long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    rsbs    r0, r0, #0                           @ optional op; may set condition codes
+    rsc     r1, r1, #0                              @ r0/r1<- op, r2-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_long: /* 0x7e */
+/* File: arm/op_not_long.S */
+/* File: arm/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0/r1".
+     * This could be an ARM instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double, long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    mvn     r0, r0                           @ optional op; may set condition codes
+    mvn     r1, r1                              @ r0/r1<- op, r2-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_float: /* 0x7f */
+/* File: arm/op_neg_float.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    add     r0, r0, #0x80000000                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_double: /* 0x80 */
+/* File: arm/op_neg_double.S */
+/* File: arm/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0/r1".
+     * This could be an ARM instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double, long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    add     r1, r1, #0x80000000                              @ r0/r1<- op, r2-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_long: /* 0x81 */
+/* File: arm/op_int_to_long.S */
+/* File: arm/unopWider.S */
+    /*
+     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op r0", where
+     * "result" is a 64-bit quantity in r0/r1.
+     *
+     * For: int-to-long, int-to-double, float-to-long, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    GET_VREG r0, r3                     @ r0<- vB
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+                               @ optional op; may set condition codes
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    mov     r1, r0, asr #31                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vA/vA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 9-10 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_float: /* 0x82 */
+/* File: arm/op_int_to_float.S */
+/* File: arm/funop.S */
+    /*
+     * Generic 32-bit unary floating-point operation.  Provide an "instr"
+     * line that specifies an instruction that performs "s1 = op s0".
+     *
+     * for: int-to-float, float-to-int
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fsitos  s1, s0                              @ s1<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s1, [r9]                    @ vA<- s1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_double: /* 0x83 */
+/* File: arm/op_int_to_double.S */
+/* File: arm/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fsitod  d0, s0                              @ d0<- op
+    CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fstd    d0, [r9]                    @ vA<- d0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_int: /* 0x84 */
+/* File: arm/op_long_to_int.S */
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+/* File: arm/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B from 15:12
+    ubfx    r0, rINST, #8, #4           @ r0<- A from 11:8
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[B]
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r0              @ fp[A]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[A]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ execute next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_float: /* 0x85 */
+/* File: arm/op_long_to_float.S */
+/* File: arm/unopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op r0/r1", where
+     * "result" is a 32-bit quantity in r0.
+     *
+     * For: long-to-float, double-to-int, double-to-float
+     *
+     * (This would work for long-to-int, but that instruction is actually
+     * an exact match for op_move.)
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vB/vB+1
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    bl      __aeabi_l2f                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 9-10 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_double: /* 0x86 */
+/* File: arm/op_long_to_double.S */
+    /*
+     * Specialised 64-bit floating point operation.
+     *
+     * Note: The result will be returned in d2.
+     *
+     * For: long-to-double
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
+    vldr    d0, [r3]                    @ d0<- vAA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    vcvt.f64.s32    d1, s1              @ d1<- (double)(vAAh)
+    vcvt.f64.u32    d2, s0              @ d2<- (double)(vAAl)
+    vldr            d3, constvalop_long_to_double
+    vmla.f64        d2, d1, d3          @ d2<- vAAh*2^32 + vAAl
+
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    vstr.64 d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+    /* literal pool helper */
+constvalop_long_to_double:
+    .8byte          0x41f0000000000000
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_int: /* 0x87 */
+/* File: arm/op_float_to_int.S */
+/* File: arm/funop.S */
+    /*
+     * Generic 32-bit unary floating-point operation.  Provide an "instr"
+     * line that specifies an instruction that performs "s1 = op s0".
+     *
+     * for: int-to-float, float-to-int
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ftosizs s1, s0                              @ s1<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s1, [r9]                    @ vA<- s1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_long: /* 0x88 */
+/* File: arm/op_float_to_long.S */
+@include "arm/unopWider.S" {"instr":"bl      __aeabi_f2lz"}
+/* File: arm/unopWider.S */
+    /*
+     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op r0", where
+     * "result" is a 64-bit quantity in r0/r1.
+     *
+     * For: int-to-long, int-to-double, float-to-long, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    GET_VREG r0, r3                     @ r0<- vB
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+                               @ optional op; may set condition codes
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    bl      f2l_doconv                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vA/vA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 9-10 instructions */
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_double: /* 0x89 */
+/* File: arm/op_float_to_double.S */
+/* File: arm/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    vcvt.f64.f32  d0, s0                              @ d0<- op
+    CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fstd    d0, [r9]                    @ vA<- d0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_int: /* 0x8a */
+/* File: arm/op_double_to_int.S */
+/* File: arm/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit unary floating point operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: double-to-int, double-to-float
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    fldd    d0, [r3]                    @ d0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ftosizd  s0, d0                              @ s0<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s0, [r9]                    @ vA<- s0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_long: /* 0x8b */
+/* File: arm/op_double_to_long.S */
+@include "arm/unopWide.S" {"instr":"bl      __aeabi_d2lz"}
+/* File: arm/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0/r1".
+     * This could be an ARM instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double, long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    bl      d2l_doconv                              @ r0/r1<- op, r2-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-11 instructions */
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_float: /* 0x8c */
+/* File: arm/op_double_to_float.S */
+/* File: arm/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit unary floating point operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: double-to-int, double-to-float
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    fldd    d0, [r3]                    @ d0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    vcvt.f32.f64  s0, d0                              @ s0<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s0, [r9]                    @ vA<- s0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_byte: /* 0x8d */
+/* File: arm/op_int_to_byte.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    sxtb    r0, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_char: /* 0x8e */
+/* File: arm/op_int_to_char.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    uxth    r0, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_short: /* 0x8f */
+/* File: arm/op_int_to_short.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    sxth    r0, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int: /* 0x90 */
+/* File: arm/op_add_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    add     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int: /* 0x91 */
+/* File: arm/op_sub_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    sub     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int: /* 0x92 */
+/* File: arm/op_mul_int.S */
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    mul     r0, r1, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int: /* 0x93 */
+/* File: arm/op_div_int.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int
+     *
+     */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl    __aeabi_idiv                  @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int: /* 0x94 */
+/* File: arm/op_rem_int.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int
+     *
+     */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls  r1, r1, r2, r0                 @ r1<- op, r0-r2 changed
+#else
+    bl   __aeabi_idivmod                @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int: /* 0x95 */
+/* File: arm/op_and_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    and     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int: /* 0x96 */
+/* File: arm/op_or_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    orr     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int: /* 0x97 */
+/* File: arm/op_xor_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    eor     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int: /* 0x98 */
+/* File: arm/op_shl_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, asl r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int: /* 0x99 */
+/* File: arm/op_shr_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, asr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int: /* 0x9a */
+/* File: arm/op_ushr_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, lsr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long: /* 0x9b */
+/* File: arm/op_add_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     rINST, rINST, lsr #8        @ rINST<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, lr, ip     @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    adds    r0, r0, r2                           @ optional op; may set condition codes
+    adc     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long: /* 0x9c */
+/* File: arm/op_sub_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     rINST, rINST, lsr #8        @ rINST<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, lr, ip     @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    subs    r0, r0, r2                           @ optional op; may set condition codes
+    sbc     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long: /* 0x9d */
+/* File: arm/op_mul_long.S */
+    /*
+     * Signed 64-bit integer multiply.
+     *
+     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
+     *        WX
+     *      x YZ
+     *  --------
+     *     ZW ZX
+     *  YW YX
+     *
+     * The low word of the result holds ZX, the high word holds
+     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
+     * it doesn't fit in the low 64 bits.
+     *
+     * Unlike most ARM math operations, multiply instructions have
+     * restrictions on using the same register more than once (Rd and Rm
+     * cannot be the same).
+     */
+    /* mul-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    mul     ip, r2, r1                  @ ip<- ZxW
+    umull   r1, lr, r2, r0              @ r1/lr <- ZxX
+    mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    add     r2, r2, lr                  @ r2<- lr + low(ZxW + (YxX))
+    VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[AA]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r0, {r1-r2 }                @ vAA/vAA+1<- r1/r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long: /* 0x9e */
+/* File: arm/op_div_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     rINST, rINST, lsr #8        @ rINST<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 1
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, lr, ip     @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    bl      __aeabi_ldivmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long: /* 0x9f */
+/* File: arm/op_rem_long.S */
+/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     rINST, rINST, lsr #8        @ rINST<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 1
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, lr, ip     @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    bl      __aeabi_ldivmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r2,r3}     @ vAA/vAA+1<- r2/r3
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long: /* 0xa0 */
+/* File: arm/op_and_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     rINST, rINST, lsr #8        @ rINST<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, lr, ip     @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    and     r0, r0, r2                           @ optional op; may set condition codes
+    and     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long: /* 0xa1 */
+/* File: arm/op_or_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     rINST, rINST, lsr #8        @ rINST<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, lr, ip     @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    orr     r0, r0, r2                           @ optional op; may set condition codes
+    orr     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long: /* 0xa2 */
+/* File: arm/op_xor_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     rINST, rINST, lsr #8        @ rINST<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, lr, ip     @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    eor     r0, r0, r2                           @ optional op; may set condition codes
+    eor     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long: /* 0xa3 */
+/* File: arm/op_shl_long.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* shl-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
+    mov     r1, r1, asl r2              @ r1<- r1 << r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, r0, asl r2              @ r0<- r0 << r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long: /* 0xa4 */
+/* File: arm/op_shr_long.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* shr-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r1, r1, asr r2              @ r1<- r1 >> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long: /* 0xa5 */
+/* File: arm/op_ushr_long.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* ushr-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float: /* 0xa6 */
+/* File: arm/op_add_float.S */
+/* File: arm/fbinop.S */
+    /*
+     * Generic 32-bit floating-point operation.  Provide an "instr" line that
+     * specifies an instruction that performs "s2 = s0 op s1".  Because we
+     * use the "softfp" ABI, this must be an instruction, not a function call.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    flds    s0, [r2]                    @ s0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fadds   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float: /* 0xa7 */
+/* File: arm/op_sub_float.S */
+/* File: arm/fbinop.S */
+    /*
+     * Generic 32-bit floating-point operation.  Provide an "instr" line that
+     * specifies an instruction that performs "s2 = s0 op s1".  Because we
+     * use the "softfp" ABI, this must be an instruction, not a function call.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    flds    s0, [r2]                    @ s0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fsubs   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float: /* 0xa8 */
+/* File: arm/op_mul_float.S */
+/* File: arm/fbinop.S */
+    /*
+     * Generic 32-bit floating-point operation.  Provide an "instr" line that
+     * specifies an instruction that performs "s2 = s0 op s1".  Because we
+     * use the "softfp" ABI, this must be an instruction, not a function call.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    flds    s0, [r2]                    @ s0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fmuls   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float: /* 0xa9 */
+/* File: arm/op_div_float.S */
+/* File: arm/fbinop.S */
+    /*
+     * Generic 32-bit floating-point operation.  Provide an "instr" line that
+     * specifies an instruction that performs "s2 = s0 op s1".  Because we
+     * use the "softfp" ABI, this must be an instruction, not a function call.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    flds    s0, [r2]                    @ s0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fdivs   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float: /* 0xaa */
+/* File: arm/op_rem_float.S */
+/* EABI doesn't define a float remainder function, but libm does */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    bl      fmodf                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double: /* 0xab */
+/* File: arm/op_add_double.S */
+/* File: arm/fbinopWide.S */
+    /*
+     * Generic 64-bit double-precision floating point binary operation.
+     * Provide an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * for: add-double, sub-double, mul-double, div-double
+     */
+    /* doubleop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    faddd   d2, d0, d1                              @ s2<- op
+    CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double: /* 0xac */
+/* File: arm/op_sub_double.S */
+/* File: arm/fbinopWide.S */
+    /*
+     * Generic 64-bit double-precision floating point binary operation.
+     * Provide an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * for: add-double, sub-double, mul-double, div-double
+     */
+    /* doubleop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fsubd   d2, d0, d1                              @ s2<- op
+    CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double: /* 0xad */
+/* File: arm/op_mul_double.S */
+/* File: arm/fbinopWide.S */
+    /*
+     * Generic 64-bit double-precision floating point binary operation.
+     * Provide an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * for: add-double, sub-double, mul-double, div-double
+     */
+    /* doubleop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fmuld   d2, d0, d1                              @ s2<- op
+    CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double: /* 0xae */
+/* File: arm/op_div_double.S */
+/* File: arm/fbinopWide.S */
+    /*
+     * Generic 64-bit double-precision floating point binary operation.
+     * Provide an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * for: add-double, sub-double, mul-double, div-double
+     */
+    /* doubleop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fdivd   d2, d0, d1                              @ s2<- op
+    CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double: /* 0xaf */
+/* File: arm/op_rem_double.S */
+/* EABI doesn't define a double remainder function, but libm does */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     rINST, rINST, lsr #8        @ rINST<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, lr, ip     @ Zero out the shadow regs
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    bl      fmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_2addr: /* 0xb0 */
+/* File: arm/op_add_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    add     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int_2addr: /* 0xb1 */
+/* File: arm/op_sub_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    sub     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_2addr: /* 0xb2 */
+/* File: arm/op_mul_int_2addr.S */
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    mul     r0, r1, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_2addr: /* 0xb3 */
+/* File: arm/op_div_int_2addr.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/2addr
+     *
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl       __aeabi_idiv               @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_2addr: /* 0xb4 */
+/* File: arm/op_rem_int_2addr.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/2addr
+     *
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl      __aeabi_idivmod             @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_2addr: /* 0xb5 */
+/* File: arm/op_and_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    and     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_2addr: /* 0xb6 */
+/* File: arm/op_or_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    orr     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_2addr: /* 0xb7 */
+/* File: arm/op_xor_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    eor     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_2addr: /* 0xb8 */
+/* File: arm/op_shl_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, asl r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_2addr: /* 0xb9 */
+/* File: arm/op_shr_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, asr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_2addr: /* 0xba */
+/* File: arm/op_ushr_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, lsr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long_2addr: /* 0xbb */
+/* File: arm/op_add_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    adds    r0, r0, r2                           @ optional op; may set condition codes
+    adc     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long_2addr: /* 0xbc */
+/* File: arm/op_sub_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    subs    r0, r0, r2                           @ optional op; may set condition codes
+    sbc     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long_2addr: /* 0xbd */
+/* File: arm/op_mul_long_2addr.S */
+    /*
+     * Signed 64-bit integer multiply, "/2addr" version.
+     *
+     * See op_mul_long for an explanation.
+     *
+     * We get a little tight on registers, so to avoid looking up &fp[A]
+     * again we stuff it into rINST.
+     */
+    /* mul-long/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR rINST, r9        @ rINST<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
+    mul     ip, r2, r1                  @ ip<- ZxW
+    umull   r1, lr, r2, r0              @ r1/lr <- ZxX
+    mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
+    mov     r0, rINST                   @ r0<- &fp[A] (free up rINST)
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    add     r2, r2, lr                  @ r2<- r2 + low(ZxW + (YxX))
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r0, {r1-r2}                 @ vAA/vAA+1<- r1/r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long_2addr: /* 0xbe */
+/* File: arm/op_div_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 1
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    bl      __aeabi_ldivmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long_2addr: /* 0xbf */
+/* File: arm/op_rem_long_2addr.S */
+/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 1
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    bl      __aeabi_ldivmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r2,r3}     @ vAA/vAA+1<- r2/r3
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long_2addr: /* 0xc0 */
+/* File: arm/op_and_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r0, r0, r2                           @ optional op; may set condition codes
+    and     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long_2addr: /* 0xc1 */
+/* File: arm/op_or_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    orr     r0, r0, r2                           @ optional op; may set condition codes
+    orr     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long_2addr: /* 0xc2 */
+/* File: arm/op_xor_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    eor     r0, r0, r2                           @ optional op; may set condition codes
+    eor     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long_2addr: /* 0xc3 */
+/* File: arm/op_shl_long_2addr.S */
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* shl-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    mov     r1, r1, asl r2              @ r1<- r1 << r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @ r0<- r0 << r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long_2addr: /* 0xc4 */
+/* File: arm/op_shr_long_2addr.S */
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* shr-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @ r1<- r1 >> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long_2addr: /* 0xc5 */
+/* File: arm/op_ushr_long_2addr.S */
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* ushr-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float_2addr: /* 0xc6 */
+/* File: arm/op_add_float_2addr.S */
+/* File: arm/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    flds    s1, [r3]                    @ s1<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    flds    s0, [r9]                    @ s0<- vA
+    fadds   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float_2addr: /* 0xc7 */
+/* File: arm/op_sub_float_2addr.S */
+/* File: arm/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    flds    s1, [r3]                    @ s1<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    flds    s0, [r9]                    @ s0<- vA
+    fsubs   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float_2addr: /* 0xc8 */
+/* File: arm/op_mul_float_2addr.S */
+/* File: arm/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    flds    s1, [r3]                    @ s1<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    flds    s0, [r9]                    @ s0<- vA
+    fmuls   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float_2addr: /* 0xc9 */
+/* File: arm/op_div_float_2addr.S */
+/* File: arm/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    flds    s1, [r3]                    @ s1<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    flds    s0, [r9]                    @ s0<- vA
+    fdivs   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float_2addr: /* 0xca */
+/* File: arm/op_rem_float_2addr.S */
+/* EABI doesn't define a float remainder function, but libm does */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    bl      fmodf                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double_2addr: /* 0xcb */
+/* File: arm/op_add_double_2addr.S */
+/* File: arm/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *      div-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
+    fldd    d1, [r3]                    @ d1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fldd    d0, [r9]                    @ d0<- vA
+    faddd   d2, d0, d1                              @ d2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double_2addr: /* 0xcc */
+/* File: arm/op_sub_double_2addr.S */
+/* File: arm/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *      div-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
+    fldd    d1, [r3]                    @ d1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fldd    d0, [r9]                    @ d0<- vA
+    fsubd   d2, d0, d1                              @ d2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double_2addr: /* 0xcd */
+/* File: arm/op_mul_double_2addr.S */
+/* File: arm/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *      div-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
+    fldd    d1, [r3]                    @ d1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fldd    d0, [r9]                    @ d0<- vA
+    fmuld   d2, d0, d1                              @ d2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double_2addr: /* 0xce */
+/* File: arm/op_div_double_2addr.S */
+/* File: arm/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *      div-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
+    fldd    d1, [r3]                    @ d1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fldd    d0, [r9]                    @ d0<- vA
+    fdivd   d2, d0, d1                              @ d2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double_2addr: /* 0xcf */
+/* File: arm/op_rem_double_2addr.S */
+/* EABI doesn't define a double remainder function, but libm does */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    rINST, rINST, #8, #4        @ rINST<- A
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    bl      fmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit16: /* 0xd0 */
+/* File: arm/op_add_int_lit16.S */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    add     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int: /* 0xd1 */
+/* File: arm/op_rsub_int.S */
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    rsb     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit16: /* 0xd2 */
+/* File: arm/op_mul_int_lit16.S */
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    mul     r0, r1, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit16: /* 0xd3 */
+/* File: arm/op_div_int_lit16.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/lit16
+     *
+     */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl       __aeabi_idiv               @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit16: /* 0xd4 */
+/* File: arm/op_rem_int_lit16.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/lit16
+     *
+     */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl     __aeabi_idivmod              @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit16: /* 0xd5 */
+/* File: arm/op_and_int_lit16.S */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    and     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit16: /* 0xd6 */
+/* File: arm/op_or_int_lit16.S */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    orr     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit16: /* 0xd7 */
+/* File: arm/op_xor_int_lit16.S */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    eor     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit8: /* 0xd8 */
+/* File: arm/op_add_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+                                @ optional; typically r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    add     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int_lit8: /* 0xd9 */
+/* File: arm/op_rsub_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+                                @ optional; typically r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    rsb     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit8: /* 0xda */
+/* File: arm/op_mul_int_lit8.S */
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    asr     r1, r3, #8                            @ optional; typically r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    mul     r0, r1, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit8: /* 0xdb */
+/* File: arm/op_div_int_lit8.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/lit8
+     *
+     */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl   __aeabi_idiv                   @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit8: /* 0xdc */
+/* File: arm/op_rem_int_lit8.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/lit8
+     *
+     */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl       __aeabi_idivmod            @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit8: /* 0xdd */
+/* File: arm/op_and_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+                                @ optional; typically r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    and     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit8: /* 0xde */
+/* File: arm/op_or_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+                                @ optional; typically r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    orr     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit8: /* 0xdf */
+/* File: arm/op_xor_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+                                @ optional; typically r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    eor     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_lit8: /* 0xe0 */
+/* File: arm/op_shl_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    ubfx    r1, r3, #8, #5                            @ optional; typically r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    mov     r0, r0, asl r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_lit8: /* 0xe1 */
+/* File: arm/op_shr_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    ubfx    r1, r3, #8, #5                            @ optional; typically r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    mov     r0, r0, asr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_lit8: /* 0xe2 */
+/* File: arm/op_ushr_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    ubfx    r1, r3, #8, #5                            @ optional; typically r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    mov     r0, r0, lsr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_quick: /* 0xe3 */
+/* File: arm/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldr   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide_quick: /* 0xe4 */
+/* File: arm/op_iget_wide_quick.S */
+    /* iget-wide-quick vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH ip, 1                         @ ip<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrd    r0, [r3, ip]                @ r0<- obj.field (64 bits, aligned)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    VREG_INDEX_TO_ADDR r3, r2           @ r3<- &fp[A]
+    CLEAR_SHADOW_PAIR r2, ip, lr        @ Zero out the shadow regs
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ fp[A]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object_quick: /* 0xe5 */
+/* File: arm/op_iget_object_quick.S */
+    /* For: iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    EXPORT_PC
+    GET_VREG r0, r2                     @ r0<- object we're operating on
+    bl      artIGetObjectFromMterp      @ (obj, offset)
+    ldr     r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    PREFETCH_INST 2
+    cmp     r3, #0
+    bne     MterpPossibleException      @ bail out
+    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
+    ADVANCE 2                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_quick: /* 0xe6 */
+/* File: arm/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    str     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide_quick: /* 0xe7 */
+/* File: arm/op_iput_wide_quick.S */
+    /* iput-wide-quick vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r3, 1                         @ r3<- field byte offset
+    GET_VREG r2, r2                     @ r2<- fp[B], the object pointer
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    cmp     r2, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[A]
+    ldmia   r0, {r0-r1}                 @ r0/r1<- fp[A]/fp[A+1]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strd    r0, [r2, r3]                @ obj.field<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object_quick: /* 0xe8 */
+/* File: arm/op_iput_object_quick.S */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    bl      MterpIputObjectQuick
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_quick: /* 0xe9 */
+/* File: arm/op_invoke_virtual_quick.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuick
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeVirtualQuick
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range_quick: /* 0xea */
+/* File: arm/op_invoke_virtual_range_quick.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuickRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeVirtualQuickRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean_quick: /* 0xeb */
+/* File: arm/op_iput_boolean_quick.S */
+/* File: arm/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strb     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte_quick: /* 0xec */
+/* File: arm/op_iput_byte_quick.S */
+/* File: arm/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strb     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char_quick: /* 0xed */
+/* File: arm/op_iput_char_quick.S */
+/* File: arm/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strh     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short_quick: /* 0xee */
+/* File: arm/op_iput_short_quick.S */
+/* File: arm/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strh     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean_quick: /* 0xef */
+/* File: arm/op_iget_boolean_quick.S */
+/* File: arm/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrb   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte_quick: /* 0xf0 */
+/* File: arm/op_iget_byte_quick.S */
+/* File: arm/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrsb   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char_quick: /* 0xf1 */
+/* File: arm/op_iget_char_quick.S */
+/* File: arm/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrh   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short_quick: /* 0xf2 */
+/* File: arm/op_iget_short_quick.S */
+/* File: arm/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrsh   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f3: /* 0xf3 */
+/* File: arm/op_unused_f3.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f4: /* 0xf4 */
+/* File: arm/op_unused_f4.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f5: /* 0xf5 */
+/* File: arm/op_unused_f5.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f6: /* 0xf6 */
+/* File: arm/op_unused_f6.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f7: /* 0xf7 */
+/* File: arm/op_unused_f7.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f8: /* 0xf8 */
+/* File: arm/op_unused_f8.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f9: /* 0xf9 */
+/* File: arm/op_unused_f9.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fa: /* 0xfa */
+/* File: arm/op_unused_fa.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fb: /* 0xfb */
+/* File: arm/op_unused_fb.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fc: /* 0xfc */
+/* File: arm/op_unused_fc.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fd: /* 0xfd */
+/* File: arm/op_unused_fd.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fe: /* 0xfe */
+/* File: arm/op_unused_fe.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_ff: /* 0xff */
+/* File: arm/op_unused_ff.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+    .balign 128
+    .size   artMterpAsmInstructionStart, .-artMterpAsmInstructionStart
+    .global artMterpAsmInstructionEnd
+artMterpAsmInstructionEnd:
+
+/*
+ * ===========================================================================
+ *  Sister implementations
+ * ===========================================================================
+ */
+    .global artMterpAsmSisterStart
+    .type   artMterpAsmSisterStart, %function
+    .text
+    .balign 4
+artMterpAsmSisterStart:
+
+/* continuation for op_float_to_long */
+/*
+ * Convert the float in r0 to a long in r0/r1.
+ *
+ * We have to clip values to long min/max per the specification.  The
+ * expected common case is a "reasonable" value that converts directly
+ * to modest integer.  The EABI convert function isn't doing this for us.
+ */
+f2l_doconv:
+    stmfd   sp!, {r4, lr}
+    mov     r1, #0x5f000000             @ (float)maxlong
+    mov     r4, r0
+    bl      __aeabi_fcmpge              @ is arg >= maxlong?
+    cmp     r0, #0                      @ nonzero == yes
+    mvnne   r0, #0                      @ return maxlong (7fffffff)
+    mvnne   r1, #0x80000000
+    popne   {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, #0xdf000000             @ (float)minlong
+    bl      __aeabi_fcmple              @ is arg <= minlong?
+    cmp     r0, #0                      @ nonzero == yes
+    movne   r0, #0                      @ return minlong (80000000)
+    movne   r1, #0x80000000
+    popne   {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r4
+    bl      __aeabi_fcmpeq              @ is arg == self?
+    cmp     r0, #0                      @ zero == no
+    moveq   r1, #0                      @ return zero for NaN
+    popeq   {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    bl      __aeabi_f2lz                @ convert float to long
+    ldmfd   sp!, {r4, pc}
+
+/* continuation for op_double_to_long */
+/*
+ * Convert the double in r0/r1 to a long in r0/r1.
+ *
+ * We have to clip values to long min/max per the specification.  The
+ * expected common case is a "reasonable" value that converts directly
+ * to modest integer.  The EABI convert function isn't doing this for us.
+ */
+d2l_doconv:
+    stmfd   sp!, {r4, r5, lr}           @ save regs
+    mov     r3, #0x43000000             @ maxlong, as a double (high word)
+    add     r3, #0x00e00000             @  0x43e00000
+    mov     r2, #0                      @ maxlong, as a double (low word)
+    sub     sp, sp, #4                  @ align for EABI
+    mov     r4, r0                      @ save a copy of r0
+    mov     r5, r1                      @  and r1
+    bl      __aeabi_dcmpge              @ is arg >= maxlong?
+    cmp     r0, #0                      @ nonzero == yes
+    mvnne   r0, #0                      @ return maxlong (7fffffffffffffff)
+    mvnne   r1, #0x80000000
+    bne     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    mov     r3, #0xc3000000             @ minlong, as a double (high word)
+    add     r3, #0x00e00000             @  0xc3e00000
+    mov     r2, #0                      @ minlong, as a double (low word)
+    bl      __aeabi_dcmple              @ is arg <= minlong?
+    cmp     r0, #0                      @ nonzero == yes
+    movne   r0, #0                      @ return minlong (8000000000000000)
+    movne   r1, #0x80000000
+    bne     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    mov     r2, r4                      @ compare against self
+    mov     r3, r5
+    bl      __aeabi_dcmpeq              @ is arg == self?
+    cmp     r0, #0                      @ zero == no
+    moveq   r1, #0                      @ return zero for NaN
+    beq     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    bl      __aeabi_d2lz                @ convert double to long
+
+1:
+    add     sp, sp, #4
+    ldmfd   sp!, {r4, r5, pc}
+
+    .size   artMterpAsmSisterStart, .-artMterpAsmSisterStart
+    .global artMterpAsmSisterEnd
+artMterpAsmSisterEnd:
+
+
+    .global artMterpAsmAltInstructionStart
+    .type   artMterpAsmAltInstructionStart, %function
+    .text
+
+artMterpAsmAltInstructionStart = .L_ALT_op_nop
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_nop: /* 0x00 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (0 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move: /* 0x01 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (1 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_from16: /* 0x02 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (2 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_16: /* 0x03 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (3 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide: /* 0x04 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (4 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_from16: /* 0x05 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (5 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_16: /* 0x06 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (6 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object: /* 0x07 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (7 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_from16: /* 0x08 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (8 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_16: /* 0x09 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (9 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result: /* 0x0a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (10 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_wide: /* 0x0b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (11 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_object: /* 0x0c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (12 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_exception: /* 0x0d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (13 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void: /* 0x0e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (14 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return: /* 0x0f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (15 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_wide: /* 0x10 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (16 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_object: /* 0x11 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (17 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_4: /* 0x12 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (18 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_16: /* 0x13 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (19 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const: /* 0x14 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (20 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_high16: /* 0x15 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (21 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_16: /* 0x16 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (22 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_32: /* 0x17 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (23 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide: /* 0x18 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (24 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_high16: /* 0x19 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (25 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string: /* 0x1a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (26 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string_jumbo: /* 0x1b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (27 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_class: /* 0x1c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (28 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_enter: /* 0x1d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (29 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_exit: /* 0x1e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (30 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_check_cast: /* 0x1f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (31 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_instance_of: /* 0x20 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (32 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_array_length: /* 0x21 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (33 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_instance: /* 0x22 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (34 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_array: /* 0x23 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (35 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array: /* 0x24 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (36 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array_range: /* 0x25 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (37 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_fill_array_data: /* 0x26 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (38 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_throw: /* 0x27 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (39 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto: /* 0x28 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (40 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_16: /* 0x29 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (41 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_32: /* 0x2a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (42 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_packed_switch: /* 0x2b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (43 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sparse_switch: /* 0x2c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (44 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_float: /* 0x2d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (45 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_float: /* 0x2e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (46 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_double: /* 0x2f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (47 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_double: /* 0x30 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (48 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmp_long: /* 0x31 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (49 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eq: /* 0x32 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (50 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ne: /* 0x33 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (51 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lt: /* 0x34 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (52 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ge: /* 0x35 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (53 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gt: /* 0x36 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (54 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_le: /* 0x37 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (55 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eqz: /* 0x38 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (56 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_nez: /* 0x39 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (57 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ltz: /* 0x3a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (58 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gez: /* 0x3b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (59 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gtz: /* 0x3c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (60 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lez: /* 0x3d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (61 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3e: /* 0x3e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (62 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3f: /* 0x3f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (63 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_40: /* 0x40 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (64 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_41: /* 0x41 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (65 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_42: /* 0x42 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (66 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_43: /* 0x43 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (67 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget: /* 0x44 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (68 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_wide: /* 0x45 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (69 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_object: /* 0x46 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (70 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_boolean: /* 0x47 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (71 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_byte: /* 0x48 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (72 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_char: /* 0x49 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (73 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_short: /* 0x4a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (74 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput: /* 0x4b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (75 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_wide: /* 0x4c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (76 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_object: /* 0x4d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (77 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_boolean: /* 0x4e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (78 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_byte: /* 0x4f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (79 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_char: /* 0x50 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (80 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_short: /* 0x51 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (81 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget: /* 0x52 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (82 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide: /* 0x53 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (83 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object: /* 0x54 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (84 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean: /* 0x55 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (85 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte: /* 0x56 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (86 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char: /* 0x57 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (87 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short: /* 0x58 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (88 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput: /* 0x59 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (89 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide: /* 0x5a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (90 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object: /* 0x5b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (91 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean: /* 0x5c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (92 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte: /* 0x5d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (93 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char: /* 0x5e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (94 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short: /* 0x5f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (95 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget: /* 0x60 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (96 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_wide: /* 0x61 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (97 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_object: /* 0x62 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (98 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_boolean: /* 0x63 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (99 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_byte: /* 0x64 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (100 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_char: /* 0x65 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (101 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_short: /* 0x66 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (102 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput: /* 0x67 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (103 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_wide: /* 0x68 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (104 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_object: /* 0x69 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (105 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_boolean: /* 0x6a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (106 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_byte: /* 0x6b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (107 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_char: /* 0x6c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (108 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_short: /* 0x6d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (109 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual: /* 0x6e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (110 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super: /* 0x6f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (111 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct: /* 0x70 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (112 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static: /* 0x71 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (113 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface: /* 0x72 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (114 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void_no_barrier: /* 0x73 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (115 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range: /* 0x74 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (116 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super_range: /* 0x75 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (117 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct_range: /* 0x76 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (118 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static_range: /* 0x77 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (119 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface_range: /* 0x78 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (120 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_79: /* 0x79 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (121 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_7a: /* 0x7a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (122 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_int: /* 0x7b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (123 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_int: /* 0x7c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (124 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_long: /* 0x7d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (125 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_long: /* 0x7e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (126 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_float: /* 0x7f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (127 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_double: /* 0x80 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (128 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_long: /* 0x81 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (129 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_float: /* 0x82 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (130 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_double: /* 0x83 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (131 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_int: /* 0x84 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (132 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_float: /* 0x85 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (133 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_double: /* 0x86 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (134 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_int: /* 0x87 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (135 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_long: /* 0x88 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (136 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_double: /* 0x89 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (137 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_int: /* 0x8a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (138 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_long: /* 0x8b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (139 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_float: /* 0x8c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (140 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_byte: /* 0x8d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (141 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_char: /* 0x8e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (142 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_short: /* 0x8f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (143 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int: /* 0x90 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (144 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int: /* 0x91 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (145 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int: /* 0x92 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (146 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int: /* 0x93 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (147 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int: /* 0x94 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (148 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int: /* 0x95 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (149 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int: /* 0x96 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (150 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int: /* 0x97 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (151 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int: /* 0x98 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (152 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int: /* 0x99 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (153 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int: /* 0x9a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (154 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long: /* 0x9b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (155 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long: /* 0x9c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (156 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long: /* 0x9d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (157 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long: /* 0x9e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (158 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long: /* 0x9f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (159 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long: /* 0xa0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (160 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long: /* 0xa1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (161 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long: /* 0xa2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (162 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long: /* 0xa3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (163 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long: /* 0xa4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (164 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long: /* 0xa5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (165 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float: /* 0xa6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (166 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float: /* 0xa7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (167 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float: /* 0xa8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (168 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float: /* 0xa9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (169 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float: /* 0xaa */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (170 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double: /* 0xab */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (171 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double: /* 0xac */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (172 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double: /* 0xad */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (173 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double: /* 0xae */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (174 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double: /* 0xaf */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (175 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_2addr: /* 0xb0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (176 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int_2addr: /* 0xb1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (177 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_2addr: /* 0xb2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (178 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_2addr: /* 0xb3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (179 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_2addr: /* 0xb4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (180 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_2addr: /* 0xb5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (181 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_2addr: /* 0xb6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (182 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_2addr: /* 0xb7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (183 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_2addr: /* 0xb8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (184 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_2addr: /* 0xb9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (185 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_2addr: /* 0xba */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (186 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long_2addr: /* 0xbb */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (187 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long_2addr: /* 0xbc */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (188 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long_2addr: /* 0xbd */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (189 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long_2addr: /* 0xbe */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (190 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long_2addr: /* 0xbf */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (191 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long_2addr: /* 0xc0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (192 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long_2addr: /* 0xc1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (193 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long_2addr: /* 0xc2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (194 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long_2addr: /* 0xc3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (195 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long_2addr: /* 0xc4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (196 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long_2addr: /* 0xc5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (197 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float_2addr: /* 0xc6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (198 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float_2addr: /* 0xc7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (199 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float_2addr: /* 0xc8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (200 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float_2addr: /* 0xc9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (201 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float_2addr: /* 0xca */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (202 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double_2addr: /* 0xcb */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (203 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double_2addr: /* 0xcc */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (204 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double_2addr: /* 0xcd */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (205 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double_2addr: /* 0xce */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (206 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double_2addr: /* 0xcf */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (207 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit16: /* 0xd0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (208 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int: /* 0xd1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (209 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit16: /* 0xd2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (210 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit16: /* 0xd3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (211 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit16: /* 0xd4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (212 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit16: /* 0xd5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (213 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit16: /* 0xd6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (214 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit16: /* 0xd7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (215 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit8: /* 0xd8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (216 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int_lit8: /* 0xd9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (217 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit8: /* 0xda */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (218 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit8: /* 0xdb */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (219 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit8: /* 0xdc */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (220 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit8: /* 0xdd */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (221 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit8: /* 0xde */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (222 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit8: /* 0xdf */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (223 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_lit8: /* 0xe0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (224 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_lit8: /* 0xe1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (225 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_lit8: /* 0xe2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (226 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_quick: /* 0xe3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (227 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide_quick: /* 0xe4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (228 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object_quick: /* 0xe5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (229 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_quick: /* 0xe6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (230 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide_quick: /* 0xe7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (231 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object_quick: /* 0xe8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (232 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_quick: /* 0xe9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (233 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range_quick: /* 0xea */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (234 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean_quick: /* 0xeb */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (235 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte_quick: /* 0xec */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (236 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char_quick: /* 0xed */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (237 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short_quick: /* 0xee */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (238 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean_quick: /* 0xef */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (239 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte_quick: /* 0xf0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (240 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char_quick: /* 0xf1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (241 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short_quick: /* 0xf2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (242 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f3: /* 0xf3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (243 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f4: /* 0xf4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (244 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f5: /* 0xf5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (245 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f6: /* 0xf6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (246 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f7: /* 0xf7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (247 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f8: /* 0xf8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (248 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f9: /* 0xf9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (249 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fa: /* 0xfa */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (250 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fb: /* 0xfb */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (251 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fc: /* 0xfc */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (252 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fd: /* 0xfd */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (253 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fe: /* 0xfe */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (254 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_ff: /* 0xff */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (255 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
+
+    .balign 128
+    .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
+    .global artMterpAsmAltInstructionEnd
+artMterpAsmAltInstructionEnd:
+/* File: arm/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogDivideByZeroException
+#endif
+    b MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogArrayIndexException
+#endif
+    b MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNegativeArraySizeException
+#endif
+    b MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNoSuchMethodException
+#endif
+    b MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNullObjectException
+#endif
+    b MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogExceptionThrownException
+#endif
+    b MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    ldr  r2, [rSELF, #THREAD_FLAGS_OFFSET]
+    bl MterpLogSuspendFallback
+#endif
+    b MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    ldr     r0, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    cmp     r0, #0                                  @ Exception pending?
+    beq     MterpFallback                           @ If not, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    bl      MterpHandleException                    @ (self, shadow_frame)
+    cmp     r0, #0
+    beq     MterpExceptionReturn                    @ no local catch, back to caller.
+    ldr     r0, [rFP, #OFF_FP_CODE_ITEM]
+    ldr     r1, [rFP, #OFF_FP_DEX_PC]
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+    add     rPC, r0, #CODEITEM_INSNS_OFFSET
+    add     rPC, rPC, r1, lsl #1                    @ generate new dex_pc_ptr
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
+    /* resume execution at catch block */
+    EXPORT_PC
+    FETCH_INST
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+    /* NOTE: no fallthrough */
+
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
+ */
+MterpCommonTakenBranchNoFlags:
+    cmp     rINST, #0
+MterpCommonTakenBranch:
+    bgt     .L_forward_branch           @ don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    cmp     rPROFILE, #JIT_CHECK_OSR
+    beq     .L_osr_check
+    subgts  rPROFILE, #1
+    beq     .L_add_batch                @ counted down to zero - report
+.L_resume_backward_branch:
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    REFRESH_IBASE
+    add     r2, rINST, rINST            @ r2<- byte offset
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    bne     .L_suspend_request_pending
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L_suspend_request_pending:
+    EXPORT_PC
+    mov     r0, rSELF
+    bl      MterpSuspendCheck           @ (self)
+    cmp     r0, #0
+    bne     MterpFallback
+    REFRESH_IBASE                       @ might have changed during suspend
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L_no_count_backwards:
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    bne     .L_resume_backward_branch
+.L_osr_check:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    cmp     rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry?
+    beq     .L_check_osr_forward
+.L_resume_forward_branch:
+    add     r2, rINST, rINST            @ r2<- byte offset
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L_check_osr_forward:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    strh    rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    mov     r2, rSELF
+    bl      MterpAddHotnessBatch        @ (method, shadow_frame, self)
+    mov     rPROFILE, r0                @ restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, #2
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov r0, rSELF
+    add r1, rFP, #OFF_FP_SHADOWFRAME
+    mov r2, rINST
+    bl MterpLogOSR
+#endif
+    mov r0, #1                          @ Signal normal return
+    b MterpDone
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogFallback
+#endif
+MterpCommonFallback:
+    mov     r0, #0                                  @ signal retry with reference interpreter.
+    b       MterpDone
+
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and LR.  Here we restore SP, restore the registers, and then restore
+ * LR to PC.
+ *
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    mov     r0, #1                                  @ signal return to caller.
+    b MterpDone
+MterpReturn:
+    ldr     r2, [rFP, #OFF_FP_RESULT_REGISTER]
+    str     r0, [r2]
+    str     r1, [r2, #4]
+    mov     r0, #1                                  @ signal return to caller.
+MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmp     rPROFILE, #0
+    bgt     MterpProfileActive                      @ if > 0, we may have some counts to report.
+    ldmfd   sp!, {r3-r10,fp,pc}                     @ restore 10 regs and return
+
+MterpProfileActive:
+    mov     rINST, r0                               @ stash return value
+    /* Report cached hotness counts */
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rSELF
+    strh    rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    bl      MterpAddHotnessBatch                    @ (method, shadow_frame, self)
+    mov     r0, rINST                               @ restore return value
+    ldmfd   sp!, {r3-r10,fp,pc}                     @ restore 10 regs and return
+
+    .fnend
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
+
+
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
new file mode 100644
index 0000000..c7303b9
--- /dev/null
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -0,0 +1,11625 @@
+/*
+ * This file was generated automatically by gen-mterp.py for 'arm64'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+/* File: arm64/header.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat xFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via xFP &
+  number_of_vregs_.
+
+ */
+
+/*
+ARM64 Runtime register usage conventions.
+
+  r0     : w0 is 32-bit return register and x0 is 64-bit.
+  r0-r7  : Argument registers.
+  r8-r15 : Caller save registers (used as temporary registers).
+  r16-r17: Also known as ip0-ip1, respectively. Used as scratch registers by
+           the linker, by the trampolines and other stubs (the backend uses
+           these as temporary registers).
+  r18    : Caller save register (used as temporary register).
+  r19    : Pointer to thread-local storage.
+  r20-r29: Callee save registers.
+  r30    : (lr) is reserved (the link register).
+  rsp    : (sp) is reserved (the stack pointer).
+  rzr    : (zr) is reserved (the zero register).
+
+  Floating-point registers
+  v0-v31
+
+  v0     : s0 is return register for singles (32-bit) and d0 for doubles (64-bit).
+           This is analogous to the C/C++ (hard-float) calling convention.
+  v0-v7  : Floating-point argument registers in both Dalvik and C/C++ conventions.
+           Also used as temporary and codegen scratch registers.
+
+  v0-v7 and v16-v31 : trashed across C calls.
+  v8-v15 : bottom 64-bits preserved across C calls (d8-d15 are preserved).
+
+  v16-v31: Used as codegen temp/scratch.
+  v8-v15 : Can be used for promotion.
+
+  Must maintain 16-byte stack alignment.
+
+Mterp notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  x20  xPC       interpreted program counter, used for fetching instructions
+  x21  xFP       interpreted frame pointer, used for accessing locals and args
+  x22  xSELF     self (Thread) pointer
+  x23  xINST     first 16-bit code unit of current instruction
+  x24  xIBASE    interpreted instruction base pointer, used for computed goto
+  x25  xREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  x26  wPROFILE  jit profile hotness countdown
+  x16  ip        scratch reg
+  x17  ip2       scratch reg (used by macros)
+
+Macros are provided for common operations.  They MUST NOT alter unspecified registers or condition
+codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
+/* During bringup, we'll use the shadow frame model instead of xFP */
+/* single-purpose registers, given names for clarity */
+#define xPC      x20
+#define xFP      x21
+#define xSELF    x22
+#define xINST    x23
+#define wINST    w23
+#define xIBASE   x24
+#define xREFS    x25
+#define wPROFILE w26
+#define xPROFILE x26
+#define ip       x16
+#define ip2      x17
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep xFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    str  xPC, [xFP, #OFF_FP_DEX_PC_PTR]
+.endm
+
+/*
+ * Fetch the next instruction from xPC into wINST.  Does not advance xPC.
+ */
+.macro FETCH_INST
+    ldrh    wINST, [xPC]
+.endm
+
+/*
+ * Fetch the next instruction from the specified offset.  Advances xPC
+ * to point to the next instruction.  "_count" is in 16-bit code units.
+ *
+ * Because of the limited size of immediate constants on ARM, this is only
+ * suitable for small forward movements (i.e. don't try to implement "goto"
+ * with this).
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss.  (This also implies that it must come after
+ * EXPORT_PC.)
+ */
+.macro FETCH_ADVANCE_INST count
+    ldrh    wINST, [xPC, #((\count)*2)]!
+.endm
+
+/*
+ * The operation performed here is similar to FETCH_ADVANCE_INST, except the
+ * src and dest registers are parameterized (not hard-wired to xPC and xINST).
+ */
+.macro PREFETCH_ADVANCE_INST dreg, sreg, count
+    ldrh    \dreg, [\sreg, #((\count)*2)]!
+.endm
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update xPC.  Used to load
+ * xINST ahead of possible exception point.  Be sure to manually advance xPC
+ * later.
+ */
+.macro PREFETCH_INST count
+    ldrh    wINST, [xPC, #((\count)*2)]
+.endm
+
+/* Advance xPC by some number of code units. */
+.macro ADVANCE count
+  add  xPC, xPC, #((\count)*2)
+.endm
+
+/*
+ * Fetch the next instruction from an offset specified by _reg and advance xPC.
+ * xPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.  Must not set flags.
+ *
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    add     xPC, xPC, \reg, sxtw
+    ldrh    wINST, [xPC]
+.endm
+
+/*
+ * Fetch a half-word code unit from an offset past the current PC.  The
+ * "_count" value is in 16-bit code units.  Does not advance xPC.
+ *
+ * The "_S" variant works the same but treats the value as signed.
+ */
+.macro FETCH reg, count
+    ldrh    \reg, [xPC, #((\count)*2)]
+.endm
+
+.macro FETCH_S reg, count
+    ldrsh   \reg, [xPC, #((\count)*2)]
+.endm
+
+/*
+ * Fetch one byte from an offset past the current PC.  Pass in the same
+ * "_count" as you would for FETCH, and an additional 0/1 indicating which
+ * byte of the halfword you want (lo/hi).
+ */
+.macro FETCH_B reg, count, byte
+    ldrb     \reg, [xPC, #((\count)*2+(\byte))]
+.endm
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+.macro GET_INST_OPCODE reg
+    and     \reg, xINST, #255
+.endm
+
+/*
+ * Put the prefetched instruction's opcode field into the specified register.
+ */
+.macro GET_PREFETCHED_OPCODE oreg, ireg
+    and     \oreg, \ireg, #255
+.endm
+
+/*
+ * Begin executing the opcode in _reg.  Clobbers reg
+ */
+
+.macro GOTO_OPCODE reg
+    add     \reg, xIBASE, \reg, lsl #7
+    br      \reg
+.endm
+.macro GOTO_OPCODE_BASE base,reg
+    add     \reg, \base, \reg, lsl #7
+    br      \reg
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+.macro GET_VREG reg, vreg
+    ldr     \reg, [xFP, \vreg, uxtw #2]
+.endm
+.macro SET_VREG reg, vreg
+    str     \reg, [xFP, \vreg, uxtw #2]
+    str     wzr, [xREFS, \vreg, uxtw #2]
+.endm
+.macro SET_VREG_OBJECT reg, vreg, tmpreg
+    str     \reg, [xFP, \vreg, uxtw #2]
+    str     \reg, [xREFS, \vreg, uxtw #2]
+.endm
+
+/*
+ * Get/set the 64-bit value from a Dalvik register.
+ * TUNING: can we do better here?
+ */
+.macro GET_VREG_WIDE reg, vreg
+    add     ip2, xFP, \vreg, lsl #2
+    ldr     \reg, [ip2]
+.endm
+.macro SET_VREG_WIDE reg, vreg
+    add     ip2, xFP, \vreg, lsl #2
+    str     \reg, [ip2]
+    add     ip2, xREFS, \vreg, lsl #2
+    str     xzr, [ip2]
+.endm
+
+/*
+ * Get the 32-bit value from a Dalvik register and sign-extend to 64-bit.
+ * Used to avoid an extra instruction in int-to-long.
+ */
+.macro GET_VREG_S reg, vreg
+    ldrsw   \reg, [xFP, \vreg, uxtw #2]
+.endm
+
+/*
+ * Convert a virtual register index into an address.
+ */
+.macro VREG_INDEX_TO_ADDR reg, vreg
+    add     \reg, xFP, \vreg, lsl #2   /* WARNING: handle shadow frame vreg zero if store */
+.endm
+
+/*
+ * Refresh handler table.
+ */
+.macro REFRESH_IBASE
+  ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+.endm
+
+/* File: arm64/entry.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+    .text
+
+/*
+ * Interpreter entry point.
+ * On entry:
+ *  x0  Thread* self/
+ *  x1  code_item
+ *  x2  ShadowFrame
+ *  x3  JValue* result_register
+ *
+ */
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+    .balign 16
+
+ExecuteMterpImpl:
+    .cfi_startproc
+    stp     xPROFILE, x27, [sp, #-80]!
+    stp     xIBASE, xREFS, [sp, #16]
+    stp     xSELF, xINST, [sp, #32]
+    stp     xPC, xFP, [sp, #48]
+    stp     fp, lr, [sp, #64]
+    add     fp, sp, #64
+
+    /* Remember the return register */
+    str     x3, [x2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
+
+    /* Remember the code_item */
+    str     x1, [x2, #SHADOWFRAME_CODE_ITEM_OFFSET]
+
+    /* set up "named" registers */
+    mov     xSELF, x0
+    ldr     w0, [x2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
+    add     xFP, x2, #SHADOWFRAME_VREGS_OFFSET     // point to vregs.
+    add     xREFS, xFP, w0, lsl #2                 // point to reference array in shadow frame
+    ldr     w0, [x2, #SHADOWFRAME_DEX_PC_OFFSET]   // Get starting dex_pc.
+    add     xPC, x1, #CODEITEM_INSNS_OFFSET        // Point to base of insns[]
+    add     xPC, xPC, w0, lsl #1                   // Create direct pointer to 1st dex opcode
+    EXPORT_PC
+
+    /* Starting ibase */
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+
+    /* Set up for backwards branches & osr profiling */
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    bl      MterpSetUpHotnessCountdown
+    mov     wPROFILE, w0                // Starting hotness countdown to xPROFILE
+
+    /* start executing the instruction at rPC */
+    FETCH_INST                          // load wINST from rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* NOTE: no fallthrough */
+
+
+    .global artMterpAsmInstructionStart
+    .type   artMterpAsmInstructionStart, %function
+artMterpAsmInstructionStart = .L_op_nop
+    .text
+
+/* ------------------------------ */
+    .balign 128
+.L_op_nop: /* 0x00 */
+/* File: arm64/op_nop.S */
+    FETCH_ADVANCE_INST 1                // advance to next instr, load rINST
+    GET_INST_OPCODE ip                  // ip<- opcode from rINST
+    GOTO_OPCODE ip                      // execute it
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move: /* 0x01 */
+/* File: arm64/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    lsr     w1, wINST, #12              // x1<- B from 15:12
+    ubfx    w0, wINST, #8, #4           // x0<- A from 11:8
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_VREG w2, w1                     // x2<- fp[B]
+    GET_INST_OPCODE ip                  // ip<- opcode from wINST
+    .if 0
+    SET_VREG_OBJECT w2, w0              // fp[A]<- x2
+    .else
+    SET_VREG w2, w0                     // fp[A]<- x2
+    .endif
+    GOTO_OPCODE ip                      // execute next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_from16: /* 0x02 */
+/* File: arm64/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH w1, 1                         // r1<- BBBB
+    lsr     w0, wINST, #8               // r0<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_VREG w2, w1                     // r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if 0
+    SET_VREG_OBJECT w2, w0              // fp[AA]<- r2
+    .else
+    SET_VREG w2, w0                     // fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_16: /* 0x03 */
+/* File: arm64/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH w1, 2                         // w1<- BBBB
+    FETCH w0, 1                         // w0<- AAAA
+    FETCH_ADVANCE_INST 3                // advance xPC, load xINST
+    GET_VREG w2, w1                     // w2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    .if 0
+    SET_VREG_OBJECT w2, w0              // fp[AAAA]<- w2
+    .else
+    SET_VREG w2, w0                     // fp[AAAA]<- w2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide: /* 0x04 */
+/* File: arm64/op_move_wide.S */
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE  x3, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE  x3, w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_from16: /* 0x05 */
+/* File: arm64/op_move_wide_from16.S */
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH w3, 1                         // w3<- BBBB
+    lsr     w2, wINST, #8               // w2<- AA
+    GET_VREG_WIDE x3, w3
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x3, w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_16: /* 0x06 */
+/* File: arm64/op_move_wide_16.S */
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH w3, 2                         // w3<- BBBB
+    FETCH w2, 1                         // w2<- AAAA
+    GET_VREG_WIDE x3, w3
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    SET_VREG_WIDE x3, w2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object: /* 0x07 */
+/* File: arm64/op_move_object.S */
+/* File: arm64/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    lsr     w1, wINST, #12              // x1<- B from 15:12
+    ubfx    w0, wINST, #8, #4           // x0<- A from 11:8
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_VREG w2, w1                     // x2<- fp[B]
+    GET_INST_OPCODE ip                  // ip<- opcode from wINST
+    .if 1
+    SET_VREG_OBJECT w2, w0              // fp[A]<- x2
+    .else
+    SET_VREG w2, w0                     // fp[A]<- x2
+    .endif
+    GOTO_OPCODE ip                      // execute next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_from16: /* 0x08 */
+/* File: arm64/op_move_object_from16.S */
+/* File: arm64/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH w1, 1                         // r1<- BBBB
+    lsr     w0, wINST, #8               // r0<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_VREG w2, w1                     // r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if 1
+    SET_VREG_OBJECT w2, w0              // fp[AA]<- r2
+    .else
+    SET_VREG w2, w0                     // fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_16: /* 0x09 */
+/* File: arm64/op_move_object_16.S */
+/* File: arm64/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH w1, 2                         // w1<- BBBB
+    FETCH w0, 1                         // w0<- AAAA
+    FETCH_ADVANCE_INST 3                // advance xPC, load xINST
+    GET_VREG w2, w1                     // w2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    .if 1
+    SET_VREG_OBJECT w2, w0              // fp[AAAA]<- w2
+    .else
+    SET_VREG w2, w0                     // fp[AAAA]<- w2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result: /* 0x0a */
+/* File: arm64/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    lsr     w2, wINST, #8               // r2<- AA
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    ldr     x0, [xFP, #OFF_FP_RESULT_REGISTER]  // get pointer to result JType.
+    ldr     w0, [x0]                    // r0 <- result.i.
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if 0
+    SET_VREG_OBJECT w0, w2, w1          // fp[AA]<- r0
+    .else
+    SET_VREG w0, w2                     // fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_wide: /* 0x0b */
+/* File: arm64/op_move_result_wide.S */
+    /* for: move-result-wide */
+    /* op vAA */
+    lsr     w2, wINST, #8               // r2<- AA
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    ldr     x0, [xFP, #OFF_FP_RESULT_REGISTER]  // get pointer to result JType.
+    ldr     x0, [x0]                    // r0 <- result.i.
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, x2                // fp[AA]<- r0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_object: /* 0x0c */
+/* File: arm64/op_move_result_object.S */
+/* File: arm64/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    lsr     w2, wINST, #8               // r2<- AA
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    ldr     x0, [xFP, #OFF_FP_RESULT_REGISTER]  // get pointer to result JType.
+    ldr     w0, [x0]                    // r0 <- result.i.
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if 1
+    SET_VREG_OBJECT w0, w2, w1          // fp[AA]<- r0
+    .else
+    SET_VREG w0, w2                     // fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_exception: /* 0x0d */
+/* File: arm64/op_move_exception.S */
+    /* move-exception vAA */
+    lsr     w2, wINST, #8               // w2<- AA
+    ldr     x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    mov     x1, #0                      // w1<- 0
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    SET_VREG_OBJECT w3, w2              // fp[AA]<- exception obj
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    str     x1, [xSELF, #THREAD_EXCEPTION_OFFSET]  // clear exception
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void: /* 0x0e */
+/* File: arm64/op_return_void.S */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .Lop_return_void_check
+.Lop_return_void_return:
+    mov     x0, #0
+    b       MterpReturn
+.Lop_return_void_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .Lop_return_void_return
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return: /* 0x0f */
+/* File: arm64/op_return.S */
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .Lop_return_check
+.Lop_return_return:
+    lsr     w2, wINST, #8               // r2<- AA
+    GET_VREG w0, w2                     // r0<- vAA
+    b       MterpReturn
+.Lop_return_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .Lop_return_return
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_wide: /* 0x10 */
+/* File: arm64/op_return_wide.S */
+    /*
+     * Return a 64-bit value.
+     */
+    /* return-wide vAA */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .Lop_return_wide_check
+.Lop_return_wide_return:
+    lsr     w2, wINST, #8               // w2<- AA
+    GET_VREG_WIDE x0, w2                // x0<- vAA
+    b       MterpReturn
+.Lop_return_wide_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .Lop_return_wide_return
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_object: /* 0x11 */
+/* File: arm64/op_return_object.S */
+/* File: arm64/op_return.S */
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .Lop_return_object_check
+.Lop_return_object_return:
+    lsr     w2, wINST, #8               // r2<- AA
+    GET_VREG w0, w2                     // r0<- vAA
+    b       MterpReturn
+.Lop_return_object_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .Lop_return_object_return
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_4: /* 0x12 */
+/* File: arm64/op_const_4.S */
+    /* const/4 vA, #+B */
+    sbfx    w1, wINST, #12, #4          // w1<- sssssssB
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    FETCH_ADVANCE_INST 1                // advance xPC, load wINST
+    GET_INST_OPCODE ip                  // ip<- opcode from xINST
+    SET_VREG w1, w0                     // fp[A]<- w1
+    GOTO_OPCODE ip                      // execute next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_16: /* 0x13 */
+/* File: arm64/op_const_16.S */
+    /* const/16 vAA, #+BBBB */
+    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended)
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_ADVANCE_INST 2                // advance xPC, load wINST
+    SET_VREG w0, w3                     // vAA<- w0
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const: /* 0x14 */
+/* File: arm64/op_const.S */
+    /* const vAA, #+BBBBbbbb */
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH w0, 1                         // w0<- bbbb (low
+    FETCH w1, 2                         // w1<- BBBB (high
+    FETCH_ADVANCE_INST 3                // advance rPC, load wINST
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG w0, w3                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_high16: /* 0x15 */
+/* File: arm64/op_const_high16.S */
+    /* const/high16 vAA, #+BBBB0000 */
+    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended)
+    lsr     w3, wINST, #8               // r3<- AA
+    lsl     w0, w0, #16                 // r0<- BBBB0000
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    SET_VREG w0, w3                     // vAA<- r0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_16: /* 0x16 */
+/* File: arm64/op_const_wide_16.S */
+    /* const-wide/16 vAA, #+BBBB */
+    FETCH_S x0, 1                       // x0<- ssssssssssssBBBB (sign-extended)
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_32: /* 0x17 */
+/* File: arm64/op_const_wide_32.S */
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (low)
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_S x2, 2                       // x2<- ssssssssssssBBBB (high)
+    FETCH_ADVANCE_INST 3                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    orr     x0, x0, x2, lsl #16         // x0<- ssssssssBBBBbbbb
+    SET_VREG_WIDE x0, w3
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide: /* 0x18 */
+/* File: arm64/op_const_wide.S */
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    FETCH w0, 1                         // w0<- bbbb (low)
+    FETCH w1, 2                         // w1<- BBBB (low middle)
+    FETCH w2, 3                         // w2<- hhhh (high middle)
+    FETCH w3, 4                         // w3<- HHHH (high)
+    lsr     w4, wINST, #8               // r4<- AA
+    FETCH_ADVANCE_INST 5                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    orr     w0, w0, w1, lsl #16         // w0<-         BBBBbbbb
+    orr     x0, x0, x2, lsl #32         // w0<-     hhhhBBBBbbbb
+    orr     x0, x0, x3, lsl #48         // w0<- HHHHhhhhBBBBbbbb
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_high16: /* 0x19 */
+/* File: arm64/op_const_wide_high16.S */
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    FETCH w0, 1                         // w0<- 0000BBBB (zero-extended)
+    lsr     w1, wINST, #8               // w1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    lsl     x0, x0, #48
+    SET_VREG_WIDE x0, w1
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string: /* 0x1a */
+/* File: arm64/op_const_string.S */
+    /* const/string vAA, String//BBBB */
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- BBBB
+    lsr     w1, wINST, #8               // w1<- AA
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstString            // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2                     // load rINST
+    cbnz    w0, MterpPossibleException  // let reference interpreter deal with it.
+    ADVANCE 2                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string_jumbo: /* 0x1b */
+/* File: arm64/op_const_string_jumbo.S */
+    /* const/string vAA, String//BBBBBBBB */
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- bbbb (low
+    FETCH w2, 2                         // w2<- BBBB (high
+    lsr     w1, wINST, #8               // w1<- AA
+    orr     w0, w0, w2, lsl #16         // w1<- BBBBbbbb
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstString            // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 3                     // advance rPC
+    cbnz    w0, MterpPossibleException      // let reference interpreter deal with it.
+    ADVANCE 3                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_class: /* 0x1c */
+/* File: arm64/op_const_class.S */
+    /* const/class vAA, Class//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // w0<- BBBB
+    lsr     w1, wINST, #8               // w1<- AA
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstClass             // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2
+    cbnz    w0, MterpPossibleException
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_enter: /* 0x1d */
+/* File: arm64/op_monitor_enter.S */
+    /*
+     * Synchronize on an object.
+     */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8               // w2<- AA
+    GET_VREG w0, w2                      // w0<- vAA (object)
+    mov      x1, xSELF                   // w1<- self
+    bl       artLockObjectFromCode
+    cbnz     w0, MterpException
+    FETCH_ADVANCE_INST 1
+    GET_INST_OPCODE ip                   // extract opcode from rINST
+    GOTO_OPCODE ip                       // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_exit: /* 0x1e */
+/* File: arm64/op_monitor_exit.S */
+    /*
+     * Unlock an object.
+     *
+     * Exceptions that occur when unlocking a monitor need to appear as
+     * if they happened at the following instruction.  See the Dalvik
+     * instruction spec.
+     */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8              // w2<- AA
+    GET_VREG w0, w2                     // w0<- vAA (object)
+    mov      x1, xSELF                  // w0<- self
+    bl       artUnlockObjectFromCode    // w0<- success for unlock(self, obj)
+    cbnz     w0, MterpException
+    FETCH_ADVANCE_INST 1                // before throw: advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_check_cast: /* 0x1f */
+/* File: arm64/op_check_cast.S */
+    /*
+     * Check to see if a cast from one class to another is allowed.
+     */
+    /* check-cast vAA, class//BBBB */
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- BBBB
+    lsr      w1, wINST, #8              // w1<- AA
+    VREG_INDEX_TO_ADDR x1, w1           // w1<- &object
+    ldr      x2, [xFP, #OFF_FP_METHOD]  // w2<- method
+    mov      x3, xSELF                  // w3<- self
+    bl       MterpCheckCast             // (index, &obj, method, self)
+    PREFETCH_INST 2
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_instance_of: /* 0x20 */
+/* File: arm64/op_instance_of.S */
+    /*
+     * Check to see if an object reference is an instance of a class.
+     *
+     * Most common situation is a non-null object, being compared against
+     * an already-resolved class.
+     */
+    /* instance-of vA, vB, class//CCCC */
+    EXPORT_PC
+    FETCH     w0, 1                     // w0<- CCCC
+    lsr       w1, wINST, #12            // w1<- B
+    VREG_INDEX_TO_ADDR x1, w1           // w1<- &object
+    ldr       x2, [xFP, #OFF_FP_METHOD] // w2<- method
+    mov       x3, xSELF                 // w3<- self
+    bl        MterpInstanceOf           // (index, &obj, method, self)
+    ldr       x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx      w2, wINST, #8, #4         // w2<- A
+    PREFETCH_INST 2
+    cbnz      x1, MterpException
+    ADVANCE 2                           // advance rPC
+    SET_VREG w0, w2                     // vA<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_array_length: /* 0x21 */
+/* File: arm64/op_array_length.S */
+    /*
+     * Return the length of an array.
+     */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w0, w1                     // w0<- vB (object ref)
+    cbz     w0, common_errNullObject    // yup, fail
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- array length
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w3, w2                     // vB<- length
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_instance: /* 0x22 */
+/* File: arm64/op_new_instance.S */
+    /*
+     * Create a new instance of a class.
+     */
+    /* new-instance vAA, class//BBBB */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xSELF
+    mov     w2, wINST
+    bl      MterpNewInstance           // (shadow_frame, self, inst_data)
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2               // advance rPC, load rINST
+    GET_INST_OPCODE ip                 // extract opcode from rINST
+    GOTO_OPCODE ip                     // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_array: /* 0x23 */
+/* File: arm64/op_new_array.S */
+    /*
+     * Allocate an array of objects, specified with the array class
+     * and a count.
+     *
+     * The verifier guarantees that this is an array class, so we don't
+     * check for it here.
+     */
+    /* new-array vA, vB, class//CCCC */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    mov     x3, xSELF
+    bl      MterpNewArray
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array: /* 0x24 */
+/* File: arm64/op_filled_new_array.S */
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class//CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type//BBBB */
+    .extern MterpFilledNewArray
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     x2, xSELF
+    bl      MterpFilledNewArray
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array_range: /* 0x25 */
+/* File: arm64/op_filled_new_array_range.S */
+/* File: arm64/op_filled_new_array.S */
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class//CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type//BBBB */
+    .extern MterpFilledNewArrayRange
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     x2, xSELF
+    bl      MterpFilledNewArrayRange
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_fill_array_data: /* 0x26 */
+/* File: arm64/op_fill_array_data.S */
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (lo)
+    FETCH_S x1, 2                       // x1<- ssssssssssssBBBB (hi)
+    lsr     w3, wINST, #8               // w3<- AA
+    orr     x1, x0, x1, lsl #16         // x1<- ssssssssBBBBbbbb
+    GET_VREG w0, w3                     // w0<- vAA (array object)
+    add     x1, xPC, x1, lsl #1         // x1<- PC + ssssssssBBBBbbbb*2 (array data off.)
+    bl      MterpFillArrayData          // (obj, payload)
+    cbz     w0, MterpPossibleException      // exception?
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_throw: /* 0x27 */
+/* File: arm64/op_throw.S */
+    /*
+     * Throw an exception object in the current thread.
+     */
+    /* throw vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8               // r2<- AA
+    GET_VREG w1, w2                      // r1<- vAA (exception object)
+    cbz      w1, common_errNullObject
+    str      x1, [xSELF, #THREAD_EXCEPTION_OFFSET]  // thread->exception<- obj
+    b        MterpException
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto: /* 0x28 */
+/* File: arm64/op_goto.S */
+    /*
+     * Unconditional branch, 8-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto +AA */
+    sbfx    wINST, wINST, #8, #8           // wINST<- ssssssAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_16: /* 0x29 */
+/* File: arm64/op_goto_16.S */
+    /*
+     * Unconditional branch, 16-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto/16 +AAAA */
+    FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_32: /* 0x2a */
+/* File: arm64/op_goto_32.S */
+    /*
+     * Unconditional branch, 32-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     *
+     * Unlike most opcodes, this one is allowed to branch to itself, so
+     * our "backward branch" test must be "<=0" instead of "<0".  Because
+     * we need the V bit set, we'll use an adds to convert from Dalvik
+     * offset to byte offset.
+     */
+    /* goto/32 +AAAAAAAA */
+    FETCH w0, 1                         // w0<- aaaa (lo)
+    FETCH w1, 2                         // w1<- AAAA (hi)
+    orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
+    b       MterpCommonTakenBranchNoFlags
+
+/* ------------------------------ */
+    .balign 128
+.L_op_packed_switch: /* 0x2b */
+/* File: arm64/op_packed_switch.S */
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (lo)
+    FETCH_S x1, 2                       // x1<- ssssssssssssBBBB (hi)
+    lsr     w3, wINST, #8               // w3<- AA
+    orr     x0, x0, x1, lsl #16         // x0<- ssssssssBBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     x0, xPC, x0, lsl #1         // x0<- PC + ssssssssBBBBbbbb*2
+    bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
+    sxtw    xINST, w0
+    b       MterpCommonTakenBranchNoFlags
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sparse_switch: /* 0x2c */
+/* File: arm64/op_sparse_switch.S */
+/* File: arm64/op_packed_switch.S */
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (lo)
+    FETCH_S x1, 2                       // x1<- ssssssssssssBBBB (hi)
+    lsr     w3, wINST, #8               // w3<- AA
+    orr     x0, x0, x1, lsl #16         // x0<- ssssssssBBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     x0, xPC, x0, lsl #1         // x0<- PC + ssssssssBBBBbbbb*2
+    bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
+    sxtw    xINST, w0
+    b       MterpCommonTakenBranchNoFlags
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_float: /* 0x2d */
+/* File: arm64/op_cmpl_float.S */
+/* File: arm64/fcmp.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG s1, w2
+    GET_VREG s2, w3
+    fcmp s1, s2
+    cset w0, ne
+    cneg w0, w0, lt
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w4                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_float: /* 0x2e */
+/* File: arm64/op_cmpg_float.S */
+/* File: arm64/fcmp.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG s1, w2
+    GET_VREG s2, w3
+    fcmp s1, s2
+    cset w0, ne
+    cneg w0, w0, cc
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w4                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_double: /* 0x2f */
+/* File: arm64/op_cmpl_double.S */
+/* File: arm64/fcmp.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG_WIDE d1, w2
+    GET_VREG_WIDE d2, w3
+    fcmp d1, d2
+    cset w0, ne
+    cneg w0, w0, lt
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w4                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_double: /* 0x30 */
+/* File: arm64/op_cmpg_double.S */
+/* File: arm64/fcmp.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG_WIDE d1, w2
+    GET_VREG_WIDE d2, w3
+    fcmp d1, d2
+    cset w0, ne
+    cneg w0, w0, cc
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w4                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmp_long: /* 0x31 */
+/* File: arm64/op_cmp_long.S */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG_WIDE x1, w2
+    GET_VREG_WIDE x2, w3
+    cmp     x1, x2
+    cset    w0, ne
+    cneg    w0, w0, lt
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG w0, w4
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eq: /* 0x32 */
+/* File: arm64/op_if_eq.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    b.eq MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ne: /* 0x33 */
+/* File: arm64/op_if_ne.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    b.ne MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lt: /* 0x34 */
+/* File: arm64/op_if_lt.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    b.lt MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ge: /* 0x35 */
+/* File: arm64/op_if_ge.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    b.ge MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gt: /* 0x36 */
+/* File: arm64/op_if_gt.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    b.gt MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_le: /* 0x37 */
+/* File: arm64/op_if_le.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    b.le MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eqz: /* 0x38 */
+/* File: arm64/op_if_eqz.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 0
+    cmp     w2, #0                      // compare (vA, 0)
+    .endif
+    cbz     w2, MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_nez: /* 0x39 */
+/* File: arm64/op_if_nez.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 0
+    cmp     w2, #0                      // compare (vA, 0)
+    .endif
+    cbnz    w2, MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ltz: /* 0x3a */
+/* File: arm64/op_if_ltz.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 0
+    cmp     w2, #0                      // compare (vA, 0)
+    .endif
+    tbnz    w2, #31, MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gez: /* 0x3b */
+/* File: arm64/op_if_gez.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 0
+    cmp     w2, #0                      // compare (vA, 0)
+    .endif
+    tbz     w2, #31, MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gtz: /* 0x3c */
+/* File: arm64/op_if_gtz.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 1
+    cmp     w2, #0                      // compare (vA, 0)
+    .endif
+    b.gt MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lez: /* 0x3d */
+/* File: arm64/op_if_lez.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 1
+    cmp     w2, #0                      // compare (vA, 0)
+    .endif
+    b.le MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3e: /* 0x3e */
+/* File: arm64/op_unused_3e.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3f: /* 0x3f */
+/* File: arm64/op_unused_3f.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_40: /* 0x40 */
+/* File: arm64/op_unused_40.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_41: /* 0x41 */
+/* File: arm64/op_unused_41.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_42: /* 0x42 */
+/* File: arm64/op_unused_42.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_43: /* 0x43 */
+/* File: arm64/op_unused_43.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget: /* 0x44 */
+/* File: arm64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #2    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    ldr   w2, [x0, #MIRROR_INT_ARRAY_DATA_OFFSET]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_wide: /* 0x45 */
+/* File: arm64/op_aget_wide.S */
+    /*
+     * Array get, 64 bits.  vAA <- vBB[vCC].
+     *
+     */
+    /* aget-wide vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject        // yes, bail
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #3          // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    ldr     x2, [x0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  // x2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x2, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_object: /* 0x46 */
+/* File: arm64/op_aget_object.S */
+    /*
+     * Array object get.  vAA <- vBB[vCC].
+     *
+     * for: aget-object
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    FETCH_B w3, 1, 1                    // w3<- CC
+    EXPORT_PC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    bl       artAGetObjectFromMterp     // (array, index)
+    ldr      x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr      w2, wINST, #8               // w9<- AA
+    PREFETCH_INST 2
+    cbnz     w1, MterpException
+    SET_VREG_OBJECT w0, w2
+    ADVANCE 2
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_boolean: /* 0x47 */
+/* File: arm64/op_aget_boolean.S */
+/* File: arm64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #0    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    ldrb   w2, [x0, #MIRROR_BOOLEAN_ARRAY_DATA_OFFSET]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_byte: /* 0x48 */
+/* File: arm64/op_aget_byte.S */
+/* File: arm64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #0    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    ldrsb   w2, [x0, #MIRROR_BYTE_ARRAY_DATA_OFFSET]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_char: /* 0x49 */
+/* File: arm64/op_aget_char.S */
+/* File: arm64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #1    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    ldrh   w2, [x0, #MIRROR_CHAR_ARRAY_DATA_OFFSET]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_short: /* 0x4a */
+/* File: arm64/op_aget_short.S */
+/* File: arm64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #1    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    ldrsh   w2, [x0, #MIRROR_SHORT_ARRAY_DATA_OFFSET]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput: /* 0x4b */
+/* File: arm64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #2     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    str  w2, [x0, #MIRROR_INT_ARRAY_DATA_OFFSET]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_wide: /* 0x4c */
+/* File: arm64/op_aput_wide.S */
+    /*
+     * Array put, 64 bits.  vBB[vCC] <- vAA.
+     *
+     */
+    /* aput-wide vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #3          // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    GET_VREG_WIDE x1, w4
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    str     x1, [x0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_object: /* 0x4d */
+/* File: arm64/op_aput_object.S */
+    /*
+     * Store an object into an array.  vBB[vCC] <- vAA.
+     */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    bl      MterpAputObject
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_boolean: /* 0x4e */
+/* File: arm64/op_aput_boolean.S */
+/* File: arm64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #0     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    strb  w2, [x0, #MIRROR_BOOLEAN_ARRAY_DATA_OFFSET]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_byte: /* 0x4f */
+/* File: arm64/op_aput_byte.S */
+/* File: arm64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #0     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    strb  w2, [x0, #MIRROR_BYTE_ARRAY_DATA_OFFSET]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_char: /* 0x50 */
+/* File: arm64/op_aput_char.S */
+/* File: arm64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #1     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    strh  w2, [x0, #MIRROR_CHAR_ARRAY_DATA_OFFSET]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_short: /* 0x51 */
+/* File: arm64/op_aput_short.S */
+/* File: arm64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #1     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    strh  w2, [x0, #MIRROR_SHORT_ARRAY_DATA_OFFSET]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget: /* 0x52 */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGet32InstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 0
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide: /* 0x53 */
+/* File: arm64/op_iget_wide.S */
+    /*
+     * 64-bit instance field get.
+     *
+     * for: iget-wide
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGet64InstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cmp      w3, #0
+    cbnz     w3, MterpException            // bail out
+    SET_VREG_WIDE x0, w2
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from wINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object: /* 0x54 */
+/* File: arm64/op_iget_object.S */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGetObjInstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 1
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean: /* 0x55 */
+/* File: arm64/op_iget_boolean.S */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGetBooleanInstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    uxtb w0, w0
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 0
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte: /* 0x56 */
+/* File: arm64/op_iget_byte.S */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGetByteInstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    sxtb w0, w0
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 0
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char: /* 0x57 */
+/* File: arm64/op_iget_char.S */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGetCharInstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    uxth w0, w0
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 0
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short: /* 0x58 */
+/* File: arm64/op_iget_short.S */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGetShortInstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    sxth w0, w0
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 0
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput: /* 0x59 */
+/* File: arm64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet32InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet32InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide: /* 0x5a */
+/* File: arm64/op_iput_wide.S */
+    /* iput-wide vA, vB, field//CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    VREG_INDEX_TO_ADDR x2, x2           // w2<- &fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet64InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object: /* 0x5b */
+/* File: arm64/op_iput_object.S */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    mov     x3, xSELF
+    bl      MterpIputObject
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean: /* 0x5c */
+/* File: arm64/op_iput_boolean.S */
+/* File: arm64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet8InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte: /* 0x5d */
+/* File: arm64/op_iput_byte.S */
+/* File: arm64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet8InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char: /* 0x5e */
+/* File: arm64/op_iput_char.S */
+/* File: arm64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet16InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short: /* 0x5f */
+/* File: arm64/op_iput_short.S */
+/* File: arm64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet16InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget: /* 0x60 */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGet32StaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGet32StaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 0
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_wide: /* 0x61 */
+/* File: arm64/op_sget_wide.S */
+    /*
+     * SGET_WIDE handler wrapper.
+     *
+     */
+    /* sget-wide vAA, field//BBBB */
+
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGet64StaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w4, wINST, #8                 // w4<- AA
+    cbnz  x3, MterpException            // bail out
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG_WIDE x0, w4
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_object: /* 0x62 */
+/* File: arm64/op_sget_object.S */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGetObjStaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGetObjStaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 1
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_boolean: /* 0x63 */
+/* File: arm64/op_sget_boolean.S */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGetBooleanStaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGetBooleanStaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    uxtb w0, w0
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 0
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_byte: /* 0x64 */
+/* File: arm64/op_sget_byte.S */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGetByteStaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGetByteStaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    sxtb w0, w0
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 0
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_char: /* 0x65 */
+/* File: arm64/op_sget_char.S */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGetCharStaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGetCharStaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    uxth w0, w0
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 0
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_short: /* 0x66 */
+/* File: arm64/op_sget_short.S */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGetShortStaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGetShortStaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    sxth w0, w0
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 0
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput: /* 0x67 */
+/* File: arm64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet32StaticFromCode
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_wide: /* 0x68 */
+/* File: arm64/op_sput_wide.S */
+    /*
+     * SPUT_WIDE handler wrapper.
+     *
+     */
+    /* sput-wide vAA, field//BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    FETCH   w0, 1                       // w0<- field ref BBBB
+    ldr     x1, [xFP, #OFF_FP_METHOD]
+    lsr     w2, wINST, #8               // w3<- AA
+    VREG_INDEX_TO_ADDR x2, w2
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet64IndirectStaticFromMterp
+    cbnz    w0, MterpException          // 0 on success, -1 on failure
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_object: /* 0x69 */
+/* File: arm64/op_sput_object.S */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     x2, xINST
+    mov     x3, xSELF
+    bl      MterpSputObject
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_boolean: /* 0x6a */
+/* File: arm64/op_sput_boolean.S */
+/* File: arm64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet8StaticFromCode
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_byte: /* 0x6b */
+/* File: arm64/op_sput_byte.S */
+/* File: arm64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet8StaticFromCode
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_char: /* 0x6c */
+/* File: arm64/op_sput_char.S */
+/* File: arm64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet16StaticFromCode
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_short: /* 0x6d */
+/* File: arm64/op_sput_short.S */
+/* File: arm64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet16StaticFromCode
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual: /* 0x6e */
+/* File: arm64/op_invoke_virtual.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtual
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    mov     x3, xINST
+    bl      MterpInvokeVirtual
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle a virtual method call.
+     *
+     * for: invoke-virtual, invoke-virtual/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super: /* 0x6f */
+/* File: arm64/op_invoke_super.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuper
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    mov     x3, xINST
+    bl      MterpInvokeSuper
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle a "super" method call.
+     *
+     * for: invoke-super, invoke-super/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct: /* 0x70 */
+/* File: arm64/op_invoke_direct.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirect
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    mov     x3, xINST
+    bl      MterpInvokeDirect
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static: /* 0x71 */
+/* File: arm64/op_invoke_static.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStatic
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    mov     x3, xINST
+    bl      MterpInvokeStatic
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface: /* 0x72 */
+/* File: arm64/op_invoke_interface.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterface
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    mov     x3, xINST
+    bl      MterpInvokeInterface
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle an interface method call.
+     *
+     * for: invoke-interface, invoke-interface/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void_no_barrier: /* 0x73 */
+/* File: arm64/op_return_void_no_barrier.S */
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .Lop_return_void_no_barrier_check
+.Lop_return_void_no_barrier_return:
+    mov     x0, #0
+    b       MterpReturn
+.Lop_return_void_no_barrier_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .Lop_return_void_no_barrier_return
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range: /* 0x74 */
+/* File: arm64/op_invoke_virtual_range.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    mov     x3, xINST
+    bl      MterpInvokeVirtualRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super_range: /* 0x75 */
+/* File: arm64/op_invoke_super_range.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuperRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    mov     x3, xINST
+    bl      MterpInvokeSuperRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct_range: /* 0x76 */
+/* File: arm64/op_invoke_direct_range.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirectRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    mov     x3, xINST
+    bl      MterpInvokeDirectRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static_range: /* 0x77 */
+/* File: arm64/op_invoke_static_range.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStaticRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    mov     x3, xINST
+    bl      MterpInvokeStaticRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface_range: /* 0x78 */
+/* File: arm64/op_invoke_interface_range.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterfaceRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    mov     x3, xINST
+    bl      MterpInvokeInterfaceRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_79: /* 0x79 */
+/* File: arm64/op_unused_79.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_7a: /* 0x7a */
+/* File: arm64/op_unused_7a.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_int: /* 0x7b */
+/* File: arm64/op_neg_int.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    sub     w0, wzr, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_int: /* 0x7c */
+/* File: arm64/op_not_int.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    mvn     w0, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_long: /* 0x7d */
+/* File: arm64/op_neg_long.S */
+/* File: arm64/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op x0".
+     *
+     * For: neg-long, not-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    sub x0, xzr, x0
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_long: /* 0x7e */
+/* File: arm64/op_not_long.S */
+/* File: arm64/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op x0".
+     *
+     * For: neg-long, not-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    mvn     x0, x0
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_float: /* 0x7f */
+/* File: arm64/op_neg_float.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    eor     w0, w0, #0x80000000                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_double: /* 0x80 */
+/* File: arm64/op_neg_double.S */
+/* File: arm64/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op x0".
+     *
+     * For: neg-long, not-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    eor     x0, x0, #0x8000000000000000
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_long: /* 0x81 */
+/* File: arm64/op_int_to_long.S */
+    /* int-to-long vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_S x0, w3                   // x0<- sign_extend(fp[B])
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4                // fp[A]<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_float: /* 0x82 */
+/* File: arm64/op_int_to_float.S */
+/* File: arm64/funopNarrow.S */
+    /*
+     * Generic 32bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op w0".
+     *
+     * For: int-to-float, float-to-int
+     * TODO: refactor all of the conversions - parameterize width and use same template.
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG w0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    scvtf s0, w0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG s0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_double: /* 0x83 */
+/* File: arm64/op_int_to_double.S */
+/* File: arm64/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op w0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG w0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    scvtf d0, w0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE d0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_int: /* 0x84 */
+/* File: arm64/op_long_to_int.S */
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+/* File: arm64/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    lsr     w1, wINST, #12              // x1<- B from 15:12
+    ubfx    w0, wINST, #8, #4           // x0<- A from 11:8
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_VREG w2, w1                     // x2<- fp[B]
+    GET_INST_OPCODE ip                  // ip<- opcode from wINST
+    .if 0
+    SET_VREG_OBJECT w2, w0              // fp[A]<- x2
+    .else
+    SET_VREG w2, w0                     // fp[A]<- x2
+    .endif
+    GOTO_OPCODE ip                      // execute next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_float: /* 0x85 */
+/* File: arm64/op_long_to_float.S */
+/* File: arm64/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op x0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    scvtf s0, x0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG s0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_double: /* 0x86 */
+/* File: arm64/op_long_to_double.S */
+/* File: arm64/funopWide.S */
+    /*
+     * Generic 64bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op x0".
+     *
+     * For: long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    scvtf d0, x0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE d0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_int: /* 0x87 */
+/* File: arm64/op_float_to_int.S */
+/* File: arm64/funopNarrow.S */
+    /*
+     * Generic 32bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "w0 = op s0".
+     *
+     * For: int-to-float, float-to-int
+     * TODO: refactor all of the conversions - parameterize width and use same template.
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG s0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    fcvtzs w0, s0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG w0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_long: /* 0x88 */
+/* File: arm64/op_float_to_long.S */
+/* File: arm64/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "x0 = op s0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG s0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    fcvtzs x0, s0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_double: /* 0x89 */
+/* File: arm64/op_float_to_double.S */
+/* File: arm64/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG s0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    fcvt  d0, s0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE d0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_int: /* 0x8a */
+/* File: arm64/op_double_to_int.S */
+/* File: arm64/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "w0 = op d0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE d0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    fcvtzs w0, d0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG w0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_long: /* 0x8b */
+/* File: arm64/op_double_to_long.S */
+/* File: arm64/funopWide.S */
+    /*
+     * Generic 64bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "x0 = op d0".
+     *
+     * For: long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE d0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    fcvtzs x0, d0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_float: /* 0x8c */
+/* File: arm64/op_double_to_float.S */
+/* File: arm64/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE d0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    fcvt s0, d0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG s0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_byte: /* 0x8d */
+/* File: arm64/op_int_to_byte.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    sxtb    w0, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_char: /* 0x8e */
+/* File: arm64/op_int_to_char.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    uxth    w0, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_short: /* 0x8f */
+/* File: arm64/op_int_to_short.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    sxth    w0, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int: /* 0x90 */
+/* File: arm64/op_add_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    add     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int: /* 0x91 */
+/* File: arm64/op_sub_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sub     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int: /* 0x92 */
+/* File: arm64/op_mul_int.S */
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    mul     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int: /* 0x93 */
+/* File: arm64/op_div_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 1
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sdiv     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int: /* 0x94 */
+/* File: arm64/op_rem_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 1
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    sdiv     w2, w0, w1                           // optional op; may set condition codes
+    msub w0, w2, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int: /* 0x95 */
+/* File: arm64/op_and_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    and     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int: /* 0x96 */
+/* File: arm64/op_or_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    orr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int: /* 0x97 */
+/* File: arm64/op_xor_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    eor     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int: /* 0x98 */
+/* File: arm64/op_shl_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    lsl     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int: /* 0x99 */
+/* File: arm64/op_shr_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    asr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int: /* 0x9a */
+/* File: arm64/op_ushr_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    lsr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long: /* 0x9b */
+/* File: arm64/op_add_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    add x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long: /* 0x9c */
+/* File: arm64/op_sub_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    sub x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long: /* 0x9d */
+/* File: arm64/op_mul_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    mul x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long: /* 0x9e */
+/* File: arm64/op_div_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 1
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    sdiv x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long: /* 0x9f */
+/* File: arm64/op_rem_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 1
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    sdiv x3, x1, x2
+    msub x0, x3, x2, x1                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long: /* 0xa0 */
+/* File: arm64/op_and_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    and x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long: /* 0xa1 */
+/* File: arm64/op_or_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    orr x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long: /* 0xa2 */
+/* File: arm64/op_xor_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    eor x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long: /* 0xa3 */
+/* File: arm64/op_shl_long.S */
+/* File: arm64/shiftWide.S */
+    /*
+     * 64-bit shift operation.
+     *
+     * For: shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr      w3, wINST, #8               // w3<- AA
+    lsr      w2, w0, #8                  // w2<- CC
+    GET_VREG w2, w2                     // w2<- vCC (shift count)
+    and      w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x1, w1                // x1<- vBB
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    lsl  x0, x1, x2                 // Do the shift. Only low 6 bits of x2 are used.
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3                // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long: /* 0xa4 */
+/* File: arm64/op_shr_long.S */
+/* File: arm64/shiftWide.S */
+    /*
+     * 64-bit shift operation.
+     *
+     * For: shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr      w3, wINST, #8               // w3<- AA
+    lsr      w2, w0, #8                  // w2<- CC
+    GET_VREG w2, w2                     // w2<- vCC (shift count)
+    and      w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x1, w1                // x1<- vBB
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    asr  x0, x1, x2                 // Do the shift. Only low 6 bits of x2 are used.
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3                // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long: /* 0xa5 */
+/* File: arm64/op_ushr_long.S */
+/* File: arm64/shiftWide.S */
+    /*
+     * 64-bit shift operation.
+     *
+     * For: shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr      w3, wINST, #8               // w3<- AA
+    lsr      w2, w0, #8                  // w2<- CC
+    GET_VREG w2, w2                     // w2<- vCC (shift count)
+    and      w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x1, w1                // x1<- vBB
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    lsr  x0, x1, x2                 // Do the shift. Only low 6 bits of x2 are used.
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3                // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float: /* 0xa6 */
+/* File: arm64/op_add_float.S */
+/* File: arm64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    fadd   s0, s0, s1                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float: /* 0xa7 */
+/* File: arm64/op_sub_float.S */
+/* File: arm64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    fsub   s0, s0, s1                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float: /* 0xa8 */
+/* File: arm64/op_mul_float.S */
+/* File: arm64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    fmul   s0, s0, s1                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float: /* 0xa9 */
+/* File: arm64/op_div_float.S */
+/* File: arm64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    fdiv   s0, s0, s1                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float: /* 0xaa */
+/* File: arm64/op_rem_float.S */
+/* EABI doesn't define a float remainder function, but libm does */
+/* File: arm64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    bl      fmodf                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double: /* 0xab */
+/* File: arm64/op_add_double.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d2, w2               // w2<- vCC
+    GET_VREG_WIDE d1, w1               // w1<- vBB
+    .if 0
+    cbz     d2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    fadd d0, d1, d2                              // d0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4           // vAA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double: /* 0xac */
+/* File: arm64/op_sub_double.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d2, w2               // w2<- vCC
+    GET_VREG_WIDE d1, w1               // w1<- vBB
+    .if 0
+    cbz     d2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    fsub d0, d1, d2                              // d0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4           // vAA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double: /* 0xad */
+/* File: arm64/op_mul_double.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d2, w2               // w2<- vCC
+    GET_VREG_WIDE d1, w1               // w1<- vBB
+    .if 0
+    cbz     d2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    fmul d0, d1, d2                              // d0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4           // vAA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double: /* 0xae */
+/* File: arm64/op_div_double.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d2, w2               // w2<- vCC
+    GET_VREG_WIDE d1, w1               // w1<- vBB
+    .if 0
+    cbz     d2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    fdiv d0, d1, d2                              // d0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4           // vAA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double: /* 0xaf */
+/* File: arm64/op_rem_double.S */
+    /* rem vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d1, w2                // d1<- vCC
+    GET_VREG_WIDE d0, w1                // d0<- vBB
+    bl  fmod
+    lsr     w4, wINST, #8               // w4<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4                // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_2addr: /* 0xb0 */
+/* File: arm64/op_add_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    add     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int_2addr: /* 0xb1 */
+/* File: arm64/op_sub_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sub     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_2addr: /* 0xb2 */
+/* File: arm64/op_mul_int_2addr.S */
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    mul     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_2addr: /* 0xb3 */
+/* File: arm64/op_div_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sdiv     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_2addr: /* 0xb4 */
+/* File: arm64/op_rem_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    sdiv     w2, w0, w1                           // optional op; may set condition codes
+    msub w0, w2, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_2addr: /* 0xb5 */
+/* File: arm64/op_and_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    and     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_2addr: /* 0xb6 */
+/* File: arm64/op_or_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    orr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_2addr: /* 0xb7 */
+/* File: arm64/op_xor_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    eor     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_2addr: /* 0xb8 */
+/* File: arm64/op_shl_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    lsl     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_2addr: /* 0xb9 */
+/* File: arm64/op_shr_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    asr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_2addr: /* 0xba */
+/* File: arm64/op_ushr_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    lsr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long_2addr: /* 0xbb */
+/* File: arm64/op_add_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    add     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long_2addr: /* 0xbc */
+/* File: arm64/op_sub_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    sub     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long_2addr: /* 0xbd */
+/* File: arm64/op_mul_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    mul     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long_2addr: /* 0xbe */
+/* File: arm64/op_div_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 1
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    sdiv     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long_2addr: /* 0xbf */
+/* File: arm64/op_rem_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 1
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    sdiv x3, x0, x1
+    msub x0, x3, x1, x0                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long_2addr: /* 0xc0 */
+/* File: arm64/op_and_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    and     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long_2addr: /* 0xc1 */
+/* File: arm64/op_or_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    orr     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long_2addr: /* 0xc2 */
+/* File: arm64/op_xor_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    eor     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long_2addr: /* 0xc3 */
+/* File: arm64/op_shl_long_2addr.S */
+/* File: arm64/shiftWide2addr.S */
+    /*
+     * Generic 64-bit shift operation.
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w1, w1                     // x1<- vB
+    GET_VREG_WIDE x0, w2                // x0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    lsl x0, x0, x1                  // Do the shift. Only low 6 bits of x1 are used.
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long_2addr: /* 0xc4 */
+/* File: arm64/op_shr_long_2addr.S */
+/* File: arm64/shiftWide2addr.S */
+    /*
+     * Generic 64-bit shift operation.
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w1, w1                     // x1<- vB
+    GET_VREG_WIDE x0, w2                // x0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    asr x0, x0, x1                  // Do the shift. Only low 6 bits of x1 are used.
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long_2addr: /* 0xc5 */
+/* File: arm64/op_ushr_long_2addr.S */
+/* File: arm64/shiftWide2addr.S */
+    /*
+     * Generic 64-bit shift operation.
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w1, w1                     // x1<- vB
+    GET_VREG_WIDE x0, w2                // x0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    lsr x0, x0, x1                  // Do the shift. Only low 6 bits of x1 are used.
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float_2addr: /* 0xc6 */
+/* File: arm64/op_add_float_2addr.S */
+/* File: arm64/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    fadd   s2, s0, s1                              // s2<- op
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float_2addr: /* 0xc7 */
+/* File: arm64/op_sub_float_2addr.S */
+/* File: arm64/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    fsub   s2, s0, s1                              // s2<- op
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float_2addr: /* 0xc8 */
+/* File: arm64/op_mul_float_2addr.S */
+/* File: arm64/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    fmul   s2, s0, s1                              // s2<- op
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float_2addr: /* 0xc9 */
+/* File: arm64/op_div_float_2addr.S */
+/* File: arm64/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    fdiv   s2, s0, s1                              // s2<- op
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float_2addr: /* 0xca */
+/* File: arm64/op_rem_float_2addr.S */
+    /* rem vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    bl  fmodf
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s0, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double_2addr: /* 0xcb */
+/* File: arm64/op_add_double_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1               // x1<- vB
+    GET_VREG_WIDE d0, w2               // x0<- vA
+    .if 0
+    cbz     d1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    fadd     d0, d0, d1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double_2addr: /* 0xcc */
+/* File: arm64/op_sub_double_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1               // x1<- vB
+    GET_VREG_WIDE d0, w2               // x0<- vA
+    .if 0
+    cbz     d1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    fsub     d0, d0, d1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double_2addr: /* 0xcd */
+/* File: arm64/op_mul_double_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1               // x1<- vB
+    GET_VREG_WIDE d0, w2               // x0<- vA
+    .if 0
+    cbz     d1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    fmul     d0, d0, d1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double_2addr: /* 0xce */
+/* File: arm64/op_div_double_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1               // x1<- vB
+    GET_VREG_WIDE d0, w2               // x0<- vA
+    .if 0
+    cbz     d1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    fdiv     d0, d0, d1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double_2addr: /* 0xcf */
+/* File: arm64/op_rem_double_2addr.S */
+    /* rem vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1                // d1<- vB
+    GET_VREG_WIDE d0, w2                // d0<- vA
+    bl fmod
+    ubfx    w2, wINST, #8, #4           // w2<- A (need to reload - killed across call)
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2                // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit16: /* 0xd0 */
+/* File: arm64/op_add_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    add     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int: /* 0xd1 */
+/* File: arm64/op_rsub_int.S */
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    sub     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit16: /* 0xd2 */
+/* File: arm64/op_mul_int_lit16.S */
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    mul     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit16: /* 0xd3 */
+/* File: arm64/op_div_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    sdiv w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit16: /* 0xd4 */
+/* File: arm64/op_rem_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    sdiv w3, w0, w1
+    msub w0, w3, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit16: /* 0xd5 */
+/* File: arm64/op_and_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    and     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit16: /* 0xd6 */
+/* File: arm64/op_or_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    orr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit16: /* 0xd7 */
+/* File: arm64/op_xor_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    eor     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit8: /* 0xd8 */
+/* File: arm64/op_add_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+                                // optional; typically w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    add     w0, w0, w3, asr #8                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int_lit8: /* 0xd9 */
+/* File: arm64/op_rsub_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr     w1, w3, #8                            // optional; typically w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sub     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit8: /* 0xda */
+/* File: arm64/op_mul_int_lit8.S */
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr     w1, w3, #8                            // optional; typically w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    mul     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit8: /* 0xdb */
+/* File: arm64/op_div_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr     w1, w3, #8                            // optional; typically w1<- ssssssCC (sign extended)
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sdiv     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit8: /* 0xdc */
+/* File: arm64/op_rem_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr     w1, w3, #8                            // optional; typically w1<- ssssssCC (sign extended)
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    sdiv w3, w0, w1                           // optional op; may set condition codes
+    msub w0, w3, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit8: /* 0xdd */
+/* File: arm64/op_and_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+                                // optional; typically w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    and     w0, w0, w3, asr #8                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit8: /* 0xde */
+/* File: arm64/op_or_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+                                // optional; typically w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    orr     w0, w0, w3, asr #8                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit8: /* 0xdf */
+/* File: arm64/op_xor_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+                                // optional; typically w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    eor     w0, w0, w3, asr #8                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_lit8: /* 0xe0 */
+/* File: arm64/op_shl_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    ubfx    w1, w3, #8, #5                            // optional; typically w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    lsl     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_lit8: /* 0xe1 */
+/* File: arm64/op_shr_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    ubfx    w1, w3, #8, #5                            // optional; typically w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    asr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_lit8: /* 0xe2 */
+/* File: arm64/op_ushr_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    ubfx    w1, w3, #8, #5                            // optional; typically w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    lsr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_quick: /* 0xe3 */
+/* File: arm64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject    // object was null
+    ldr   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide_quick: /* 0xe4 */
+/* File: arm64/op_iget_wide_quick.S */
+    /* iget-wide-quick vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w4, 1                         // w4<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject    // object was null
+    ldr     x0, [x3, x4]                // x0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG_WIDE x0, w2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object_quick: /* 0xe5 */
+/* File: arm64/op_iget_object_quick.S */
+    /* For: iget-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    EXPORT_PC
+    GET_VREG w0, w2                     // w0<- object we're operating on
+    bl      artIGetObjectFromMterp      // (obj, offset)
+    ldr     x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    PREFETCH_INST 2
+    cbnz    w3, MterpPossibleException      // bail out
+    SET_VREG_OBJECT w0, w2              // fp[A]<- w0
+    ADVANCE 2                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_quick: /* 0xe6 */
+/* File: arm64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    str     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide_quick: /* 0xe7 */
+/* File: arm64/op_iput_wide_quick.S */
+    /* iput-wide-quick vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w3, 1                         // w3<- field byte offset
+    GET_VREG w2, w2                     // w2<- fp[B], the object pointer
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    cbz     w2, common_errNullObject    // object was null
+    GET_VREG_WIDE x0, w0                // x0-< fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    str     x0, [x2, x3]                // obj.field<- x0
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object_quick: /* 0xe8 */
+/* File: arm64/op_iput_object_quick.S */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    bl      MterpIputObjectQuick
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_quick: /* 0xe9 */
+/* File: arm64/op_invoke_virtual_quick.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuick
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    mov     x3, xINST
+    bl      MterpInvokeVirtualQuick
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range_quick: /* 0xea */
+/* File: arm64/op_invoke_virtual_range_quick.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuickRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    mov     x3, xINST
+    bl      MterpInvokeVirtualQuickRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean_quick: /* 0xeb */
+/* File: arm64/op_iput_boolean_quick.S */
+/* File: arm64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    strb     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte_quick: /* 0xec */
+/* File: arm64/op_iput_byte_quick.S */
+/* File: arm64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    strb     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char_quick: /* 0xed */
+/* File: arm64/op_iput_char_quick.S */
+/* File: arm64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    strh     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short_quick: /* 0xee */
+/* File: arm64/op_iput_short_quick.S */
+/* File: arm64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    strh     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean_quick: /* 0xef */
+/* File: arm64/op_iget_boolean_quick.S */
+/* File: arm64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject    // object was null
+    ldrb   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte_quick: /* 0xf0 */
+/* File: arm64/op_iget_byte_quick.S */
+/* File: arm64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject    // object was null
+    ldrsb   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char_quick: /* 0xf1 */
+/* File: arm64/op_iget_char_quick.S */
+/* File: arm64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject    // object was null
+    ldrh   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short_quick: /* 0xf2 */
+/* File: arm64/op_iget_short_quick.S */
+/* File: arm64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject    // object was null
+    ldrsh   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f3: /* 0xf3 */
+/* File: arm64/op_unused_f3.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f4: /* 0xf4 */
+/* File: arm64/op_unused_f4.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f5: /* 0xf5 */
+/* File: arm64/op_unused_f5.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f6: /* 0xf6 */
+/* File: arm64/op_unused_f6.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f7: /* 0xf7 */
+/* File: arm64/op_unused_f7.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f8: /* 0xf8 */
+/* File: arm64/op_unused_f8.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f9: /* 0xf9 */
+/* File: arm64/op_unused_f9.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fa: /* 0xfa */
+/* File: arm64/op_unused_fa.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fb: /* 0xfb */
+/* File: arm64/op_unused_fb.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fc: /* 0xfc */
+/* File: arm64/op_unused_fc.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fd: /* 0xfd */
+/* File: arm64/op_unused_fd.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fe: /* 0xfe */
+/* File: arm64/op_unused_fe.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_ff: /* 0xff */
+/* File: arm64/op_unused_ff.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+    .balign 128
+    .size   artMterpAsmInstructionStart, .-artMterpAsmInstructionStart
+    .global artMterpAsmInstructionEnd
+artMterpAsmInstructionEnd:
+
+/*
+ * ===========================================================================
+ *  Sister implementations
+ * ===========================================================================
+ */
+    .global artMterpAsmSisterStart
+    .type   artMterpAsmSisterStart, %function
+    .text
+    .balign 4
+artMterpAsmSisterStart:
+
+    .size   artMterpAsmSisterStart, .-artMterpAsmSisterStart
+    .global artMterpAsmSisterEnd
+artMterpAsmSisterEnd:
+
+/* File: arm64/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogDivideByZeroException
+#endif
+    b MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogArrayIndexException
+#endif
+    b MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNegativeArraySizeException
+#endif
+    b MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNoSuchMethodException
+#endif
+    b MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNullObjectException
+#endif
+    b MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogExceptionThrownException
+#endif
+    b MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    ldr  x2, [xSELF, #THREAD_FLAGS_OFFSET]
+    bl MterpLogSuspendFallback
+#endif
+    b MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    ldr     x0, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    cbz     x0, MterpFallback                       // If not, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    bl      MterpHandleException                    // (self, shadow_frame)
+    cbz     w0, MterpExceptionReturn                // no local catch, back to caller.
+    ldr     x0, [xFP, #OFF_FP_CODE_ITEM]
+    ldr     w1, [xFP, #OFF_FP_DEX_PC]
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+    add     xPC, x0, #CODEITEM_INSNS_OFFSET
+    add     xPC, xPC, x1, lsl #1                    // generate new dex_pc_ptr
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    /* resume execution at catch block */
+    EXPORT_PC
+    FETCH_INST
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+    /* NOTE: no fallthrough */
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    wINST          <= signed offset
+ *    wPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
+ */
+MterpCommonTakenBranchNoFlags:
+    cmp     wINST, #0
+    b.gt    .L_forward_branch           // don't add forward branches to hotness
+    tbnz    wPROFILE, #31, .L_no_count_backwards  // go if negative
+    subs    wPROFILE, wPROFILE, #1      // countdown
+    b.eq    .L_add_batch                // counted down to zero - report
+.L_resume_backward_branch:
+    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
+    add     w2, wINST, wINST            // w2<- byte offset
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    REFRESH_IBASE
+    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L_suspend_request_pending
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_suspend_request_pending:
+    EXPORT_PC
+    mov     x0, xSELF
+    bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback
+    REFRESH_IBASE                       // might have changed during suspend
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_no_count_backwards:
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.ne    .L_resume_backward_branch
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_osr_forward
+.L_resume_forward_branch:
+    add     w2, wINST, wINST            // w2<- byte offset
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_check_osr_forward:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    mov     x2, xSELF
+    bl      MterpAddHotnessBatch        // (method, shadow_frame, self)
+    mov     wPROFILE, w0                // restore new hotness countdown to wPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, #2
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/*
+ * Check for suspend check request.  Assumes wINST already loaded, xPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
+ */
+MterpCheckSuspendAndContinue:
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh xIBASE
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    check1
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+check1:
+    EXPORT_PC
+    mov     x0, xSELF
+    bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback           // Something in the environment changed, switch interpreters
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    sxtw x2, wINST
+    bl MterpLogOSR
+#endif
+    mov  x0, #1                         // Signal normal return
+    b    MterpDone
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogFallback
+#endif
+MterpCommonFallback:
+    mov     x0, #0                                  // signal retry with reference interpreter.
+    b       MterpDone
+
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and LR.  Here we restore SP, restore the registers, and then restore
+ * LR to PC.
+ *
+ * On entry:
+ *  uint32_t* xFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    mov     x0, #1                                  // signal return to caller.
+    b MterpDone
+MterpReturn:
+    ldr     x2, [xFP, #OFF_FP_RESULT_REGISTER]
+    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
+    str     x0, [x2]
+    mov     x0, xSELF
+    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.eq    check2
+    bl      MterpSuspendCheck                       // (self)
+check2:
+    mov     x0, #1                                  // signal return to caller.
+MterpDone:
+/*
+ * At this point, we expect wPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending wPROFILE and the cached hotness counter).  wPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmp     wPROFILE, #0
+    bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
+    ldp     fp, lr, [sp, #64]
+    ldp     xPC, xFP, [sp, #48]
+    ldp     xSELF, xINST, [sp, #32]
+    ldp     xIBASE, xREFS, [sp, #16]
+    ldp     xPROFILE, x27, [sp], #80
+    ret
+
+MterpProfileActive:
+    mov     xINST, x0                               // stash return value
+    /* Report cached hotness counts */
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xSELF
+    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
+    mov     x0, xINST                               // restore return value
+    ldp     fp, lr, [sp, #64]
+    ldp     xPC, xFP, [sp, #48]
+    ldp     xSELF, xINST, [sp, #32]
+    ldp     xIBASE, xREFS, [sp, #16]
+    ldp     xPROFILE, x27, [sp], #80
+    ret
+
+    .cfi_endproc
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
+
+
+
+    .global artMterpAsmAltInstructionStart
+    .type   artMterpAsmAltInstructionStart, %function
+    .text
+
+artMterpAsmAltInstructionStart = .L_ALT_op_nop
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_nop: /* 0x00 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (0 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move: /* 0x01 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (1 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_from16: /* 0x02 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (2 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_16: /* 0x03 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (3 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide: /* 0x04 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (4 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_from16: /* 0x05 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (5 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_16: /* 0x06 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (6 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object: /* 0x07 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (7 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_from16: /* 0x08 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (8 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_16: /* 0x09 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (9 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result: /* 0x0a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (10 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_wide: /* 0x0b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (11 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_object: /* 0x0c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (12 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_exception: /* 0x0d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (13 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void: /* 0x0e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (14 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return: /* 0x0f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (15 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_wide: /* 0x10 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (16 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_object: /* 0x11 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (17 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_4: /* 0x12 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (18 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_16: /* 0x13 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (19 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const: /* 0x14 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (20 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_high16: /* 0x15 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (21 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_16: /* 0x16 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (22 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_32: /* 0x17 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (23 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide: /* 0x18 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (24 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_high16: /* 0x19 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (25 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string: /* 0x1a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (26 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string_jumbo: /* 0x1b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (27 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_class: /* 0x1c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (28 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_enter: /* 0x1d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (29 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_exit: /* 0x1e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (30 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_check_cast: /* 0x1f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (31 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_instance_of: /* 0x20 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (32 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_array_length: /* 0x21 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (33 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_instance: /* 0x22 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (34 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_array: /* 0x23 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (35 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array: /* 0x24 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (36 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array_range: /* 0x25 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (37 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_fill_array_data: /* 0x26 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (38 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_throw: /* 0x27 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (39 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto: /* 0x28 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (40 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_16: /* 0x29 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (41 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_32: /* 0x2a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (42 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_packed_switch: /* 0x2b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (43 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sparse_switch: /* 0x2c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (44 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_float: /* 0x2d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (45 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_float: /* 0x2e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (46 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_double: /* 0x2f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (47 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_double: /* 0x30 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (48 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmp_long: /* 0x31 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (49 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eq: /* 0x32 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (50 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ne: /* 0x33 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (51 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lt: /* 0x34 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (52 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ge: /* 0x35 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (53 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gt: /* 0x36 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (54 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_le: /* 0x37 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (55 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eqz: /* 0x38 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (56 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_nez: /* 0x39 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (57 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ltz: /* 0x3a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (58 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gez: /* 0x3b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (59 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gtz: /* 0x3c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (60 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lez: /* 0x3d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (61 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3e: /* 0x3e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (62 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3f: /* 0x3f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (63 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_40: /* 0x40 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (64 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_41: /* 0x41 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (65 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_42: /* 0x42 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (66 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_43: /* 0x43 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (67 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget: /* 0x44 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (68 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_wide: /* 0x45 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (69 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_object: /* 0x46 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (70 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_boolean: /* 0x47 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (71 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_byte: /* 0x48 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (72 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_char: /* 0x49 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (73 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_short: /* 0x4a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (74 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput: /* 0x4b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (75 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_wide: /* 0x4c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (76 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_object: /* 0x4d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (77 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_boolean: /* 0x4e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (78 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_byte: /* 0x4f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (79 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_char: /* 0x50 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (80 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_short: /* 0x51 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (81 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget: /* 0x52 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (82 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide: /* 0x53 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (83 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object: /* 0x54 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (84 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean: /* 0x55 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (85 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte: /* 0x56 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (86 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char: /* 0x57 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (87 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short: /* 0x58 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (88 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput: /* 0x59 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (89 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide: /* 0x5a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (90 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object: /* 0x5b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (91 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean: /* 0x5c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (92 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte: /* 0x5d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (93 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char: /* 0x5e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (94 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short: /* 0x5f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (95 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget: /* 0x60 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (96 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_wide: /* 0x61 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (97 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_object: /* 0x62 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (98 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_boolean: /* 0x63 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (99 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_byte: /* 0x64 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (100 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_char: /* 0x65 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (101 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_short: /* 0x66 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (102 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput: /* 0x67 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (103 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_wide: /* 0x68 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (104 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_object: /* 0x69 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (105 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_boolean: /* 0x6a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (106 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_byte: /* 0x6b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (107 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_char: /* 0x6c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (108 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_short: /* 0x6d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (109 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual: /* 0x6e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (110 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super: /* 0x6f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (111 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct: /* 0x70 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (112 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static: /* 0x71 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (113 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface: /* 0x72 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (114 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void_no_barrier: /* 0x73 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (115 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range: /* 0x74 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (116 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super_range: /* 0x75 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (117 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct_range: /* 0x76 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (118 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static_range: /* 0x77 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (119 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface_range: /* 0x78 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (120 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_79: /* 0x79 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (121 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_7a: /* 0x7a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (122 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_int: /* 0x7b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (123 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_int: /* 0x7c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (124 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_long: /* 0x7d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (125 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_long: /* 0x7e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (126 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_float: /* 0x7f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (127 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_double: /* 0x80 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (128 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_long: /* 0x81 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (129 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_float: /* 0x82 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (130 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_double: /* 0x83 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (131 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_int: /* 0x84 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (132 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_float: /* 0x85 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (133 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_double: /* 0x86 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (134 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_int: /* 0x87 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (135 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_long: /* 0x88 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (136 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_double: /* 0x89 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (137 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_int: /* 0x8a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (138 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_long: /* 0x8b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (139 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_float: /* 0x8c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (140 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_byte: /* 0x8d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (141 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_char: /* 0x8e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (142 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_short: /* 0x8f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (143 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int: /* 0x90 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (144 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int: /* 0x91 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (145 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int: /* 0x92 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (146 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int: /* 0x93 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (147 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int: /* 0x94 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (148 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int: /* 0x95 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (149 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int: /* 0x96 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (150 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int: /* 0x97 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (151 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int: /* 0x98 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (152 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int: /* 0x99 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (153 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int: /* 0x9a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (154 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long: /* 0x9b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (155 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long: /* 0x9c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (156 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long: /* 0x9d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (157 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long: /* 0x9e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (158 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long: /* 0x9f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (159 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long: /* 0xa0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (160 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long: /* 0xa1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (161 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long: /* 0xa2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (162 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long: /* 0xa3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (163 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long: /* 0xa4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (164 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long: /* 0xa5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (165 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float: /* 0xa6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (166 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float: /* 0xa7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (167 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float: /* 0xa8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (168 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float: /* 0xa9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (169 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float: /* 0xaa */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (170 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double: /* 0xab */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (171 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double: /* 0xac */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (172 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double: /* 0xad */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (173 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double: /* 0xae */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (174 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double: /* 0xaf */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (175 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_2addr: /* 0xb0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (176 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int_2addr: /* 0xb1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (177 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_2addr: /* 0xb2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (178 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_2addr: /* 0xb3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (179 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_2addr: /* 0xb4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (180 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_2addr: /* 0xb5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (181 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_2addr: /* 0xb6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (182 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_2addr: /* 0xb7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (183 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_2addr: /* 0xb8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (184 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_2addr: /* 0xb9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (185 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_2addr: /* 0xba */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (186 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long_2addr: /* 0xbb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (187 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long_2addr: /* 0xbc */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (188 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long_2addr: /* 0xbd */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (189 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long_2addr: /* 0xbe */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (190 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long_2addr: /* 0xbf */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (191 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long_2addr: /* 0xc0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (192 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long_2addr: /* 0xc1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (193 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long_2addr: /* 0xc2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (194 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long_2addr: /* 0xc3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (195 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long_2addr: /* 0xc4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (196 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long_2addr: /* 0xc5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (197 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float_2addr: /* 0xc6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (198 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float_2addr: /* 0xc7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (199 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float_2addr: /* 0xc8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (200 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float_2addr: /* 0xc9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (201 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float_2addr: /* 0xca */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (202 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double_2addr: /* 0xcb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (203 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double_2addr: /* 0xcc */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (204 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double_2addr: /* 0xcd */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (205 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double_2addr: /* 0xce */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (206 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double_2addr: /* 0xcf */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (207 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit16: /* 0xd0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (208 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int: /* 0xd1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (209 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit16: /* 0xd2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (210 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit16: /* 0xd3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (211 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit16: /* 0xd4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (212 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit16: /* 0xd5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (213 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit16: /* 0xd6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (214 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit16: /* 0xd7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (215 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit8: /* 0xd8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (216 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int_lit8: /* 0xd9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (217 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit8: /* 0xda */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (218 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit8: /* 0xdb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (219 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit8: /* 0xdc */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (220 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit8: /* 0xdd */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (221 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit8: /* 0xde */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (222 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit8: /* 0xdf */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (223 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_lit8: /* 0xe0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (224 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_lit8: /* 0xe1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (225 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_lit8: /* 0xe2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (226 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_quick: /* 0xe3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (227 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide_quick: /* 0xe4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (228 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object_quick: /* 0xe5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (229 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_quick: /* 0xe6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (230 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide_quick: /* 0xe7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (231 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object_quick: /* 0xe8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (232 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_quick: /* 0xe9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (233 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range_quick: /* 0xea */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (234 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean_quick: /* 0xeb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (235 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte_quick: /* 0xec */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (236 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char_quick: /* 0xed */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (237 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short_quick: /* 0xee */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (238 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean_quick: /* 0xef */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (239 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte_quick: /* 0xf0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (240 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char_quick: /* 0xf1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (241 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short_quick: /* 0xf2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (242 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f3: /* 0xf3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (243 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f4: /* 0xf4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (244 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f5: /* 0xf5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (245 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f6: /* 0xf6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (246 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f7: /* 0xf7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (247 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f8: /* 0xf8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (248 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f9: /* 0xf9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (249 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fa: /* 0xfa */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (250 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fb: /* 0xfb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (251 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fc: /* 0xfc */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (252 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fd: /* 0xfd */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (253 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fe: /* 0xfe */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (254 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_ff: /* 0xff */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (255 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+    .balign 128
+    .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
+    .global artMterpAsmAltInstructionEnd
+artMterpAsmAltInstructionEnd:
diff --git a/runtime/interpreter/mterp/out/mterp_mips.S b/runtime/interpreter/mterp/out/mterp_mips.S
new file mode 100644
index 0000000..fef7dc6
--- /dev/null
+++ b/runtime/interpreter/mterp/out/mterp_mips.S
@@ -0,0 +1,12940 @@
+/*
+ * This file was generated automatically by gen-mterp.py for 'mips'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+/* File: mips/header.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+#include "asm_support.h"
+
+#if (__mips==32) && (__mips_isa_rev>=2)
+#define MIPS32REVGE2    /* mips32r2 and greater */
+#if (__mips==32) && (__mips_isa_rev>=5)
+#define FPU64           /* 64 bit FPU */
+#if (__mips==32) && (__mips_isa_rev>=6)
+#define MIPS32REVGE6    /* mips32r6 and greater */
+#endif
+#endif
+#endif
+
+/* MIPS definitions and declarations
+
+   reg  nick      purpose
+   s0   rPC       interpreted program counter, used for fetching instructions
+   s1   rFP       interpreted frame pointer, used for accessing locals and args
+   s2   rSELF     self (Thread) pointer
+   s3   rIBASE    interpreted instruction base pointer, used for computed goto
+   s4   rINST     first 16-bit code unit of current instruction
+   s5   rOBJ      object pointer
+   s6   rREFS     base of object references in shadow frame (ideally, we'll get rid of this later).
+   s7   rTEMP     used as temp storage that can survive a function call
+   s8   rPROFILE  branch profiling countdown
+
+*/
+
+/* single-purpose registers, given names for clarity */
+#define rPC s0
+#define rFP s1
+#define rSELF s2
+#define rIBASE s3
+#define rINST s4
+#define rOBJ s5
+#define rREFS s6
+#define rTEMP s7
+#define rPROFILE s8
+
+#define rARG0 a0
+#define rARG1 a1
+#define rARG2 a2
+#define rARG3 a3
+#define rRESULT0 v0
+#define rRESULT1 v1
+
+/* GP register definitions */
+#define zero    $0      /* always zero */
+#define AT      $at     /* assembler temp */
+#define v0      $2      /* return value */
+#define v1      $3
+#define a0      $4      /* argument registers */
+#define a1      $5
+#define a2      $6
+#define a3      $7
+#define t0      $8      /* temp registers (not saved across subroutine calls) */
+#define t1      $9
+#define t2      $10
+#define t3      $11
+#define t4      $12
+#define t5      $13
+#define t6      $14
+#define t7      $15
+#define ta0     $12     /* alias */
+#define ta1     $13
+#define ta2     $14
+#define ta3     $15
+#define s0      $16     /* saved across subroutine calls (callee saved) */
+#define s1      $17
+#define s2      $18
+#define s3      $19
+#define s4      $20
+#define s5      $21
+#define s6      $22
+#define s7      $23
+#define t8      $24     /* two more temp registers */
+#define t9      $25
+#define k0      $26     /* kernel temporary */
+#define k1      $27
+#define gp      $28     /* global pointer */
+#define sp      $29     /* stack pointer */
+#define s8      $30     /* one more callee saved */
+#define ra      $31     /* return address */
+
+/* FP register definitions */
+#define fv0    $f0
+#define fv0f   $f1
+#define fv1    $f2
+#define fv1f   $f3
+#define fa0    $f12
+#define fa0f   $f13
+#define fa1    $f14
+#define fa1f   $f15
+#define ft0    $f4
+#define ft0f   $f5
+#define ft1    $f6
+#define ft1f   $f7
+#define ft2    $f8
+#define ft2f   $f9
+#define ft3    $f10
+#define ft3f   $f11
+#define ft4    $f16
+#define ft4f   $f17
+#define ft5    $f18
+#define ft5f   $f19
+#define fs0    $f20
+#define fs0f   $f21
+#define fs1    $f22
+#define fs1f   $f23
+#define fs2    $f24
+#define fs2f   $f25
+#define fs3    $f26
+#define fs3f   $f27
+#define fs4    $f28
+#define fs4f   $f29
+#define fs5    $f30
+#define fs5f   $f31
+
+#ifndef MIPS32REVGE6
+#define fcc0   $fcc0
+#define fcc1   $fcc1
+#endif
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
+
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+#define EXPORT_PC() \
+    sw        rPC, OFF_FP_DEX_PC_PTR(rFP)
+
+#define EXPORT_DEX_PC(tmp) \
+    lw   tmp, OFF_FP_CODE_ITEM(rFP) \
+    sw   rPC, OFF_FP_DEX_PC_PTR(rFP) \
+    addu tmp, CODEITEM_INSNS_OFFSET \
+    subu tmp, rPC, tmp \
+    sra  tmp, tmp, 1 \
+    sw   tmp, OFF_FP_DEX_PC(rFP)
+
+/*
+ * Fetch the next instruction from rPC into rINST.  Does not advance rPC.
+ */
+#define FETCH_INST() lhu rINST, (rPC)
+
+/*
+ * Fetch the next instruction from the specified offset.  Advances rPC
+ * to point to the next instruction.  "_count" is in 16-bit code units.
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss.  (This also implies that it must come after
+ * EXPORT_PC().)
+ */
+#define FETCH_ADVANCE_INST(_count) lhu rINST, ((_count)*2)(rPC); \
+    addu      rPC, rPC, ((_count) * 2)
+
+/*
+ * The operation performed here is similar to FETCH_ADVANCE_INST, except the
+ * src and dest registers are parameterized (not hard-wired to rPC and rINST).
+ */
+#define PREFETCH_ADVANCE_INST(_dreg, _sreg, _count) \
+    lhu       _dreg, ((_count)*2)(_sreg) ;            \
+    addu      _sreg, _sreg, (_count)*2
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update rPC.  Used to load
+ * rINST ahead of possible exception point.  Be sure to manually advance rPC
+ * later.
+ */
+#define PREFETCH_INST(_count) lhu rINST, ((_count)*2)(rPC)
+
+/* Advance rPC by some number of code units. */
+#define ADVANCE(_count) addu rPC, rPC, ((_count) * 2)
+
+/*
+ * Fetch the next instruction from an offset specified by rd.  Updates
+ * rPC to point to the next instruction.  "rd" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.
+ */
+#define FETCH_ADVANCE_INST_RB(rd) addu rPC, rPC, rd; \
+    lhu       rINST, (rPC)
+
+/*
+ * Fetch a half-word code unit from an offset past the current PC.  The
+ * "_count" value is in 16-bit code units.  Does not advance rPC.
+ *
+ * The "_S" variant works the same but treats the value as signed.
+ */
+#define FETCH(rd, _count) lhu rd, ((_count) * 2)(rPC)
+#define FETCH_S(rd, _count) lh rd, ((_count) * 2)(rPC)
+
+/*
+ * Fetch one byte from an offset past the current PC.  Pass in the same
+ * "_count" as you would for FETCH, and an additional 0/1 indicating which
+ * byte of the halfword you want (lo/hi).
+ */
+#define FETCH_B(rd, _count, _byte) lbu rd, ((_count) * 2 + _byte)(rPC)
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+#define GET_INST_OPCODE(rd) and rd, rINST, 0xFF
+
+/*
+ * Put the prefetched instruction's opcode field into the specified register.
+ */
+#define GET_PREFETCHED_OPCODE(dreg, sreg)   andi     dreg, sreg, 255
+
+/*
+ * Begin executing the opcode in rd.
+ */
+#define GOTO_OPCODE(rd) sll rd, rd, 7; \
+    addu      rd, rIBASE, rd; \
+    jalr      zero, rd
+
+#define GOTO_OPCODE_BASE(_base, rd)  sll rd, rd, 7; \
+    addu      rd, _base, rd; \
+    jalr      zero, rd
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+#define GET_VREG(rd, rix) LOAD_eas2(rd, rFP, rix)
+
+#define GET_VREG_F(rd, rix) EAS2(AT, rFP, rix); \
+    .set noat; l.s rd, (AT); .set at
+
+#define SET_VREG(rd, rix) .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8)
+
+#define SET_VREG64(rlo, rhi, rix) .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rlo, 0(t8); \
+    sw        rhi, 4(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8)
+
+#ifdef FPU64
+#define SET_VREG64_F(rlo, rhi, rix) .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rREFS, AT; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8); \
+    addu      t8, rFP, AT; \
+    mfhc1     AT, rlo; \
+    sw        AT, 4(t8); \
+    .set at; \
+    s.s       rlo, 0(t8)
+#else
+#define SET_VREG64_F(rlo, rhi, rix) .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    s.s       rlo, 0(t8); \
+    s.s       rhi, 4(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8)
+#endif
+
+#define SET_VREG_OBJECT(rd, rix) .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        rd, 0(t8)
+
+/* Combination of the SET_VREG and GOTO_OPCODE functions to save 1 instruction */
+#define SET_VREG_GOTO(rd, rix, dst) .set noreorder; \
+    sll       dst, dst, 7; \
+    addu      dst, rIBASE, dst; \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+
+/* Combination of the SET_VREG64 and GOTO_OPCODE functions to save 1 instruction */
+#define SET_VREG64_GOTO(rlo, rhi, rix, dst) .set noreorder; \
+    sll       dst, dst, 7; \
+    addu      dst, rIBASE, dst; \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rlo, 0(t8); \
+    sw        rhi, 4(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+
+#define SET_VREG_F(rd, rix) .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    s.s       rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8)
+
+#define GET_OPA(rd) srl rd, rINST, 8
+#ifdef MIPS32REVGE2
+#define GET_OPA4(rd) ext rd, rINST, 8, 4
+#else
+#define GET_OPA4(rd) GET_OPA(rd); and rd, 0xf
+#endif
+#define GET_OPB(rd) srl rd, rINST, 12
+
+/*
+ * Form an Effective Address rd = rbase + roff<<n;
+ * Uses reg AT
+ */
+#define EASN(rd, rbase, roff, rshift) .set noat; \
+    sll       AT, roff, rshift; \
+    addu      rd, rbase, AT; \
+    .set at
+
+#define EAS1(rd, rbase, roff) EASN(rd, rbase, roff, 1)
+#define EAS2(rd, rbase, roff) EASN(rd, rbase, roff, 2)
+#define EAS3(rd, rbase, roff) EASN(rd, rbase, roff, 3)
+#define EAS4(rd, rbase, roff) EASN(rd, rbase, roff, 4)
+
+/*
+ * Form an Effective Shift Right rd = rbase + roff>>n;
+ * Uses reg AT
+ */
+#define ESRN(rd, rbase, roff, rshift) .set noat; \
+    srl       AT, roff, rshift; \
+    addu      rd, rbase, AT; \
+    .set at
+
+#define LOAD_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \
+    .set noat; lw rd, 0(AT); .set at
+
+#define STORE_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \
+    .set noat; sw rd, 0(AT); .set at
+
+#define LOAD_RB_OFF(rd, rbase, off) lw rd, off(rbase)
+#define STORE_RB_OFF(rd, rbase, off) sw rd, off(rbase)
+
+#define STORE64_off(rlo, rhi, rbase, off) sw rlo, off(rbase); \
+    sw        rhi, (off+4)(rbase)
+#define LOAD64_off(rlo, rhi, rbase, off) lw rlo, off(rbase); \
+    lw        rhi, (off+4)(rbase)
+
+#define STORE64(rlo, rhi, rbase) STORE64_off(rlo, rhi, rbase, 0)
+#define LOAD64(rlo, rhi, rbase) LOAD64_off(rlo, rhi, rbase, 0)
+
+#ifdef FPU64
+#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \
+    .set noat; \
+    mfhc1     AT, rlo; \
+    sw        AT, (off+4)(rbase); \
+    .set at
+#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \
+    .set noat; \
+    lw        AT, (off+4)(rbase); \
+    mthc1     AT, rlo; \
+    .set at
+#else
+#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \
+    s.s       rhi, (off+4)(rbase)
+#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \
+    l.s       rhi, (off+4)(rbase)
+#endif
+
+#define STORE64_F(rlo, rhi, rbase) STORE64_off_F(rlo, rhi, rbase, 0)
+#define LOAD64_F(rlo, rhi, rbase) LOAD64_off_F(rlo, rhi, rbase, 0)
+
+
+#define LOAD_base_offMirrorArray_length(rd, rbase) LOAD_RB_OFF(rd, rbase, MIRROR_ARRAY_LENGTH_OFFSET)
+
+#define STACK_STORE(rd, off) sw rd, off(sp)
+#define STACK_LOAD(rd, off) lw rd, off(sp)
+#define CREATE_STACK(n) subu sp, sp, n
+#define DELETE_STACK(n) addu sp, sp, n
+
+#define LOAD_ADDR(dest, addr) la dest, addr
+#define LOAD_IMM(dest, imm) li dest, imm
+#define MOVE_REG(dest, src) move dest, src
+#define STACK_SIZE 128
+
+#define STACK_OFFSET_ARG04 16
+#define STACK_OFFSET_ARG05 20
+#define STACK_OFFSET_ARG06 24
+#define STACK_OFFSET_ARG07 28
+#define STACK_OFFSET_GP    84
+
+#define JAL(n) jal n
+#define BAL(n) bal n
+
+/*
+ * FP register usage restrictions:
+ * 1) We don't use the callee save FP registers so we don't have to save them.
+ * 2) We don't use the odd FP registers so we can share code with mips32r6.
+ */
+#define STACK_STORE_FULL() CREATE_STACK(STACK_SIZE); \
+    STACK_STORE(ra, 124); \
+    STACK_STORE(s8, 120); \
+    STACK_STORE(s0, 116); \
+    STACK_STORE(s1, 112); \
+    STACK_STORE(s2, 108); \
+    STACK_STORE(s3, 104); \
+    STACK_STORE(s4, 100); \
+    STACK_STORE(s5, 96); \
+    STACK_STORE(s6, 92); \
+    STACK_STORE(s7, 88);
+
+#define STACK_LOAD_FULL() STACK_LOAD(gp, STACK_OFFSET_GP); \
+    STACK_LOAD(s7, 88); \
+    STACK_LOAD(s6, 92); \
+    STACK_LOAD(s5, 96); \
+    STACK_LOAD(s4, 100); \
+    STACK_LOAD(s3, 104); \
+    STACK_LOAD(s2, 108); \
+    STACK_LOAD(s1, 112); \
+    STACK_LOAD(s0, 116); \
+    STACK_LOAD(s8, 120); \
+    STACK_LOAD(ra, 124); \
+    DELETE_STACK(STACK_SIZE)
+
+#define REFRESH_IBASE() \
+    lw        rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
+
+/* File: mips/entry.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Interpreter entry point.
+ */
+
+    .text
+    .align 2
+    .global ExecuteMterpImpl
+    .ent    ExecuteMterpImpl
+    .frame sp, STACK_SIZE, ra
+/*
+ * On entry:
+ *  a0  Thread* self
+ *  a1  code_item
+ *  a2  ShadowFrame
+ *  a3  JValue* result_register
+ *
+ */
+
+ExecuteMterpImpl:
+    .set noreorder
+    .cpload t9
+    .set reorder
+/* Save to the stack. Frame size = STACK_SIZE */
+    STACK_STORE_FULL()
+/* This directive will make sure all subsequent jal restore gp at a known offset */
+    .cprestore STACK_OFFSET_GP
+
+    /* Remember the return register */
+    sw      a3, SHADOWFRAME_RESULT_REGISTER_OFFSET(a2)
+
+    /* Remember the code_item */
+    sw      a1, SHADOWFRAME_CODE_ITEM_OFFSET(a2)
+
+    /* set up "named" registers */
+    move    rSELF, a0
+    lw      a0, SHADOWFRAME_NUMBER_OF_VREGS_OFFSET(a2)
+    addu    rFP, a2, SHADOWFRAME_VREGS_OFFSET     # point to vregs.
+    EAS2(rREFS, rFP, a0)                          # point to reference array in shadow frame
+    lw      a0, SHADOWFRAME_DEX_PC_OFFSET(a2)     # Get starting dex_pc
+    addu    rPC, a1, CODEITEM_INSNS_OFFSET        # Point to base of insns[]
+    EAS1(rPC, rPC, a0)                            # Create direct pointer to 1st dex opcode
+
+    EXPORT_PC()
+
+    /* Starting ibase */
+    lw      rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
+
+    /* Set up for backwards branches & osr profiling */
+    lw      a0, OFF_FP_METHOD(rFP)
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpSetUpHotnessCountdown)        # (method, shadow_frame)
+    move    rPROFILE, v0                   # Starting hotness countdown to rPROFILE
+
+    /* start executing the instruction at rPC */
+    FETCH_INST()                           # load rINST from rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+    /* NOTE: no fallthrough */
+
+
+    .global artMterpAsmInstructionStart
+    .type   artMterpAsmInstructionStart, %function
+artMterpAsmInstructionStart = .L_op_nop
+    .text
+
+/* ------------------------------ */
+    .balign 128
+.L_op_nop: /* 0x00 */
+/* File: mips/op_nop.S */
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move: /* 0x01 */
+/* File: mips/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    GET_OPB(a1)                            #  a1 <- B from 15:12
+    GET_OPA4(a0)                           #  a0 <- A from 11:8
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    GET_VREG(a2, a1)                       #  a2 <- fp[B]
+    GET_INST_OPCODE(t0)                    #  t0 <- opcode from rINST
+    .if 0
+    SET_VREG_OBJECT(a2, a0)                #  fp[A] <- a2
+    .else
+    SET_VREG(a2, a0)                       #  fp[A] <- a2
+    .endif
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_from16: /* 0x02 */
+/* File: mips/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH(a1, 1)                           #  a1 <- BBBB
+    GET_OPA(a0)                            #  a0 <- AA
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT(a2, a0)                #  fp[AA] <- a2
+    .else
+    SET_VREG(a2, a0)                       #  fp[AA] <- a2
+    .endif
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_16: /* 0x03 */
+/* File: mips/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH(a1, 2)                           #  a1 <- BBBB
+    FETCH(a0, 1)                           #  a0 <- AAAA
+    FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
+    GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT(a2, a0)                #  fp[AAAA] <- a2
+    .else
+    SET_VREG(a2, a0)                       #  fp[AAAA] <- a2
+    .endif
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide: /* 0x04 */
+/* File: mips/op_move_wide.S */
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6, v7" or "move v7, v6" */
+    GET_OPA4(a2)                           #  a2 <- A(+)
+    GET_OPB(a3)                            #  a3 <- B
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[B]
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    SET_VREG64(a0, a1, a2)                 #  fp[A] <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_from16: /* 0x05 */
+/* File: mips/op_move_wide_from16.S */
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6, v7" or "move v7, v6" */
+    FETCH(a3, 1)                           #  a3 <- BBBB
+    GET_OPA(a2)                            #  a2 <- AA
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[BBBB]
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[BBBB]
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    SET_VREG64(a0, a1, a2)                 #  fp[AA] <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_16: /* 0x06 */
+/* File: mips/op_move_wide_16.S */
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6, v7" or "move v7, v6" */
+    FETCH(a3, 2)                           #  a3 <- BBBB
+    FETCH(a2, 1)                           #  a2 <- AAAA
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[BBBB]
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[BBBB]
+    FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
+    SET_VREG64(a0, a1, a2)                 #  fp[AAAA] <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object: /* 0x07 */
+/* File: mips/op_move_object.S */
+/* File: mips/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    GET_OPB(a1)                            #  a1 <- B from 15:12
+    GET_OPA4(a0)                           #  a0 <- A from 11:8
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    GET_VREG(a2, a1)                       #  a2 <- fp[B]
+    GET_INST_OPCODE(t0)                    #  t0 <- opcode from rINST
+    .if 1
+    SET_VREG_OBJECT(a2, a0)                #  fp[A] <- a2
+    .else
+    SET_VREG(a2, a0)                       #  fp[A] <- a2
+    .endif
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_from16: /* 0x08 */
+/* File: mips/op_move_object_from16.S */
+/* File: mips/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH(a1, 1)                           #  a1 <- BBBB
+    GET_OPA(a0)                            #  a0 <- AA
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    .if 1
+    SET_VREG_OBJECT(a2, a0)                #  fp[AA] <- a2
+    .else
+    SET_VREG(a2, a0)                       #  fp[AA] <- a2
+    .endif
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_16: /* 0x09 */
+/* File: mips/op_move_object_16.S */
+/* File: mips/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH(a1, 2)                           #  a1 <- BBBB
+    FETCH(a0, 1)                           #  a0 <- AAAA
+    FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
+    GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    .if 1
+    SET_VREG_OBJECT(a2, a0)                #  fp[AAAA] <- a2
+    .else
+    SET_VREG(a2, a0)                       #  fp[AAAA] <- a2
+    .endif
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result: /* 0x0a */
+/* File: mips/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    GET_OPA(a2)                            #  a2 <- AA
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    lw    a0, OFF_FP_RESULT_REGISTER(rFP)  #  get pointer to result JType
+    lw    a0, 0(a0)                        #  a0 <- result.i
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT(a0, a2)                #  fp[AA] <- a0
+    .else
+    SET_VREG(a0, a2)                       #  fp[AA] <- a0
+    .endif
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_wide: /* 0x0b */
+/* File: mips/op_move_result_wide.S */
+    /* move-result-wide vAA */
+    GET_OPA(a2)                            #  a2 <- AA
+    lw    a3, OFF_FP_RESULT_REGISTER(rFP)  #  get pointer to result JType
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- retval.j
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    SET_VREG64(a0, a1, a2)                 #  fp[AA] <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_object: /* 0x0c */
+/* File: mips/op_move_result_object.S */
+/* File: mips/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    GET_OPA(a2)                            #  a2 <- AA
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    lw    a0, OFF_FP_RESULT_REGISTER(rFP)  #  get pointer to result JType
+    lw    a0, 0(a0)                        #  a0 <- result.i
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    .if 1
+    SET_VREG_OBJECT(a0, a2)                #  fp[AA] <- a0
+    .else
+    SET_VREG(a0, a2)                       #  fp[AA] <- a0
+    .endif
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_exception: /* 0x0d */
+/* File: mips/op_move_exception.S */
+    /* move-exception vAA */
+    GET_OPA(a2)                                 #  a2 <- AA
+    lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)    #  get exception obj
+    FETCH_ADVANCE_INST(1)                       #  advance rPC, load rINST
+    SET_VREG_OBJECT(a3, a2)                     #  fp[AA] <- exception obj
+    GET_INST_OPCODE(t0)                         #  extract opcode from rINST
+    sw    zero, THREAD_EXCEPTION_OFFSET(rSELF)  #  clear exception
+    GOTO_OPCODE(t0)                             #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void: /* 0x0e */
+/* File: mips/op_return_void.S */
+    .extern MterpThreadFenceForConstructor
+    JAL(MterpThreadFenceForConstructor)
+    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
+    move      a0, rSELF
+    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqz      ra, 1f
+    JAL(MterpSuspendCheck)                 # (self)
+1:
+    move      v0, zero
+    move      v1, zero
+    b         MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return: /* 0x0f */
+/* File: mips/op_return.S */
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    JAL(MterpThreadFenceForConstructor)
+    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
+    move      a0, rSELF
+    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqz      ra, 1f
+    JAL(MterpSuspendCheck)                 # (self)
+1:
+    GET_OPA(a2)                            #  a2 <- AA
+    GET_VREG(v0, a2)                       #  v0 <- vAA
+    move      v1, zero
+    b         MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_wide: /* 0x10 */
+/* File: mips/op_return_wide.S */
+    /*
+     * Return a 64-bit value.
+     */
+    /* return-wide vAA */
+    .extern MterpThreadFenceForConstructor
+    JAL(MterpThreadFenceForConstructor)
+    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
+    move      a0, rSELF
+    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqz      ra, 1f
+    JAL(MterpSuspendCheck)                 # (self)
+1:
+    GET_OPA(a2)                            #  a2 <- AA
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[AA]
+    LOAD64(v0, v1, a2)                     #  v0/v1 <- vAA/vAA+1
+    b         MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_object: /* 0x11 */
+/* File: mips/op_return_object.S */
+/* File: mips/op_return.S */
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    JAL(MterpThreadFenceForConstructor)
+    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
+    move      a0, rSELF
+    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqz      ra, 1f
+    JAL(MterpSuspendCheck)                 # (self)
+1:
+    GET_OPA(a2)                            #  a2 <- AA
+    GET_VREG(v0, a2)                       #  v0 <- vAA
+    move      v1, zero
+    b         MterpReturn
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_4: /* 0x12 */
+/* File: mips/op_const_4.S */
+    # const/4 vA,                          /* +B */
+    sll       a1, rINST, 16                #  a1 <- Bxxx0000
+    GET_OPA(a0)                            #  a0 <- A+
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    sra       a1, a1, 28                   #  a1 <- sssssssB (sign-extended)
+    and       a0, a0, 15
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a1, a0, t0)              #  fp[A] <- a1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_16: /* 0x13 */
+/* File: mips/op_const_16.S */
+    # const/16 vAA,                        /* +BBBB */
+    FETCH_S(a0, 1)                         #  a0 <- ssssBBBB (sign-extended)
+    GET_OPA(a3)                            #  a3 <- AA
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, a3, t0)              #  vAA <- a0
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const: /* 0x14 */
+/* File: mips/op_const.S */
+    # const vAA,                           /* +BBBBbbbb */
+    GET_OPA(a3)                            #  a3 <- AA
+    FETCH(a0, 1)                           #  a0 <- bbbb (low)
+    FETCH(a1, 2)                           #  a1 <- BBBB (high)
+    FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
+    sll       a1, a1, 16
+    or        a0, a1, a0                   #  a0 <- BBBBbbbb
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, a3, t0)              #  vAA <- a0
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_high16: /* 0x15 */
+/* File: mips/op_const_high16.S */
+    # const/high16 vAA,                    /* +BBBB0000 */
+    FETCH(a0, 1)                           #  a0 <- 0000BBBB (zero-extended)
+    GET_OPA(a3)                            #  a3 <- AA
+    sll       a0, a0, 16                   #  a0 <- BBBB0000
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, a3, t0)              #  vAA <- a0
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_16: /* 0x16 */
+/* File: mips/op_const_wide_16.S */
+    # const-wide/16 vAA,                   /* +BBBB */
+    FETCH_S(a0, 1)                         #  a0 <- ssssBBBB (sign-extended)
+    GET_OPA(a3)                            #  a3 <- AA
+    sra       a1, a0, 31                   #  a1 <- ssssssss
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_32: /* 0x17 */
+/* File: mips/op_const_wide_32.S */
+    # const-wide/32 vAA,                   /* +BBBBbbbb */
+    FETCH(a0, 1)                           #  a0 <- 0000bbbb (low)
+    GET_OPA(a3)                            #  a3 <- AA
+    FETCH_S(a2, 2)                         #  a2 <- ssssBBBB (high)
+    FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
+    sll       a2, a2, 16
+    or        a0, a0, a2                   #  a0 <- BBBBbbbb
+    sra       a1, a0, 31                   #  a1 <- ssssssss
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide: /* 0x18 */
+/* File: mips/op_const_wide.S */
+    # const-wide vAA,                      /* +HHHHhhhhBBBBbbbb */
+    FETCH(a0, 1)                           #  a0 <- bbbb (low)
+    FETCH(a1, 2)                           #  a1 <- BBBB (low middle)
+    FETCH(a2, 3)                           #  a2 <- hhhh (high middle)
+    sll       a1, 16 #
+    or        a0, a1                       #  a0 <- BBBBbbbb (low word)
+    FETCH(a3, 4)                           #  a3 <- HHHH (high)
+    GET_OPA(t1)                            #  t1 <- AA
+    sll       a3, 16
+    or        a1, a3, a2                   #  a1 <- HHHHhhhh (high word)
+    FETCH_ADVANCE_INST(5)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(a0, a1, t1)                 #  vAA <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_high16: /* 0x19 */
+/* File: mips/op_const_wide_high16.S */
+    # const-wide/high16 vAA,               /* +BBBB000000000000 */
+    FETCH(a1, 1)                           #  a1 <- 0000BBBB (zero-extended)
+    GET_OPA(a3)                            #  a3 <- AA
+    li        a0, 0                        #  a0 <- 00000000
+    sll       a1, 16                       #  a1 <- BBBB0000
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string: /* 0x1a */
+/* File: mips/op_const_string.S */
+    # const/string vAA, String             /* BBBB */
+    EXPORT_PC()
+    FETCH(a0, 1)                        # a0 <- BBBB
+    GET_OPA(a1)                         # a1 <- AA
+    addu   a2, rFP, OFF_FP_SHADOWFRAME  # a2 <- shadow frame
+    move   a3, rSELF
+    JAL(MterpConstString)               # v0 <- Mterp(index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST(2)                    # load rINST
+    bnez   v0, MterpPossibleException
+    ADVANCE(2)                          # advance rPC
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string_jumbo: /* 0x1b */
+/* File: mips/op_const_string_jumbo.S */
+    # const/string vAA, String          /* BBBBBBBB */
+    EXPORT_PC()
+    FETCH(a0, 1)                        # a0 <- bbbb (low)
+    FETCH(a2, 2)                        # a2 <- BBBB (high)
+    GET_OPA(a1)                         # a1 <- AA
+    sll    a2, a2, 16
+    or     a0, a0, a2                   # a0 <- BBBBbbbb
+    addu   a2, rFP, OFF_FP_SHADOWFRAME  # a2 <- shadow frame
+    move   a3, rSELF
+    JAL(MterpConstString)               # v0 <- Mterp(index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST(3)                    # load rINST
+    bnez   v0, MterpPossibleException
+    ADVANCE(3)                          # advance rPC
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_class: /* 0x1c */
+/* File: mips/op_const_class.S */
+    # const/class vAA, Class               /* BBBB */
+    EXPORT_PC()
+    FETCH(a0, 1)                        # a0 <- BBBB
+    GET_OPA(a1)                         # a1 <- AA
+    addu   a2, rFP, OFF_FP_SHADOWFRAME  # a2 <- shadow frame
+    move   a3, rSELF
+    JAL(MterpConstClass)
+    PREFETCH_INST(2)                    # load rINST
+    bnez   v0, MterpPossibleException
+    ADVANCE(2)                          # advance rPC
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_enter: /* 0x1d */
+/* File: mips/op_monitor_enter.S */
+    /*
+     * Synchronize on an object.
+     */
+    /* monitor-enter vAA */
+    EXPORT_PC()
+    GET_OPA(a2)                            # a2 <- AA
+    GET_VREG(a0, a2)                       # a0 <- vAA (object)
+    move   a1, rSELF                       # a1 <- self
+    JAL(artLockObjectFromCode)             # v0 <- artLockObject(obj, self)
+    bnez v0, MterpException
+    FETCH_ADVANCE_INST(1)                  # advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_exit: /* 0x1e */
+/* File: mips/op_monitor_exit.S */
+    /*
+     * Unlock an object.
+     *
+     * Exceptions that occur when unlocking a monitor need to appear as
+     * if they happened at the following instruction.  See the Dalvik
+     * instruction spec.
+     */
+    /* monitor-exit vAA */
+    EXPORT_PC()
+    GET_OPA(a2)                            # a2 <- AA
+    GET_VREG(a0, a2)                       # a0 <- vAA (object)
+    move   a1, rSELF                       # a1 <- self
+    JAL(artUnlockObjectFromCode)           # v0 <- artUnlockObject(obj, self)
+    bnez v0, MterpException
+    FETCH_ADVANCE_INST(1)                  # advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_check_cast: /* 0x1f */
+/* File: mips/op_check_cast.S */
+    /*
+     * Check to see if a cast from one class to another is allowed.
+     */
+    # check-cast vAA, class                /* BBBB */
+    EXPORT_PC()
+    FETCH(a0, 1)                           #  a0 <- BBBB
+    GET_OPA(a1)                            #  a1 <- AA
+    EAS2(a1, rFP, a1)                      #  a1 <- &object
+    lw     a2, OFF_FP_METHOD(rFP)          #  a2 <- method
+    move   a3, rSELF                       #  a3 <- self
+    JAL(MterpCheckCast)                    #  v0 <- CheckCast(index, &obj, method, self)
+    PREFETCH_INST(2)
+    bnez   v0, MterpPossibleException
+    ADVANCE(2)
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_instance_of: /* 0x20 */
+/* File: mips/op_instance_of.S */
+    /*
+     * Check to see if an object reference is an instance of a class.
+     *
+     * Most common situation is a non-null object, being compared against
+     * an already-resolved class.
+     */
+    # instance-of vA, vB, class            /* CCCC */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- CCCC
+    GET_OPB(a1)                            # a1 <- B
+    EAS2(a1, rFP, a1)                      # a1 <- &object
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
+    move  a3, rSELF                        # a3 <- self
+    GET_OPA4(rOBJ)                         # rOBJ <- A+
+    JAL(MterpInstanceOf)                   # v0 <- Mterp(index, &obj, method, self)
+    lw   a1, THREAD_EXCEPTION_OFFSET(rSELF)
+    PREFETCH_INST(2)                       # load rINST
+    bnez a1, MterpException
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    SET_VREG_GOTO(v0, rOBJ, t0)            # vA <- v0
+
+/* ------------------------------ */
+    .balign 128
+.L_op_array_length: /* 0x21 */
+/* File: mips/op_array_length.S */
+    /*
+     * Return the length of an array.
+     */
+    GET_OPB(a1)                            #  a1 <- B
+    GET_OPA4(a2)                           #  a2 <- A+
+    GET_VREG(a0, a1)                       #  a0 <- vB (object ref)
+    # is object null?
+    beqz      a0, common_errNullObject     #  yup, fail
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- array length
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a3, a2, t0)              #  vA <- length
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_instance: /* 0x22 */
+/* File: mips/op_new_instance.S */
+    /*
+     * Create a new instance of a class.
+     */
+    # new-instance vAA, class              /* BBBB */
+    EXPORT_PC()
+    addu   a0, rFP, OFF_FP_SHADOWFRAME
+    move   a1, rSELF
+    move   a2, rINST
+    JAL(MterpNewInstance)
+    beqz   v0, MterpPossibleException
+    FETCH_ADVANCE_INST(2)               # advance rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_array: /* 0x23 */
+/* File: mips/op_new_array.S */
+    /*
+     * Allocate an array of objects, specified with the array class
+     * and a count.
+     *
+     * The verifier guarantees that this is an array class, so we don't
+     * check for it here.
+     */
+    /* new-array vA, vB, class@CCCC */
+    EXPORT_PC()
+    addu   a0, rFP, OFF_FP_SHADOWFRAME
+    move   a1, rPC
+    move   a2, rINST
+    move   a3, rSELF
+    JAL(MterpNewArray)
+    beqz   v0, MterpPossibleException
+    FETCH_ADVANCE_INST(2)               # advance rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array: /* 0x24 */
+/* File: mips/op_filled_new_array.S */
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
+    # op {vCCCC..v(CCCC+AA-1)}, type       /* BBBB */
+    .extern MterpFilledNewArray
+    EXPORT_PC()
+    addu   a0, rFP, OFF_FP_SHADOWFRAME     # a0 <- shadow frame
+    move   a1, rPC
+    move   a2, rSELF
+    JAL(MterpFilledNewArray)                           #  v0 <- helper(shadow_frame, pc, self)
+    beqz      v0,  MterpPossibleException  #  has exception
+    FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array_range: /* 0x25 */
+/* File: mips/op_filled_new_array_range.S */
+/* File: mips/op_filled_new_array.S */
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
+    # op {vCCCC..v(CCCC+AA-1)}, type       /* BBBB */
+    .extern MterpFilledNewArrayRange
+    EXPORT_PC()
+    addu   a0, rFP, OFF_FP_SHADOWFRAME     # a0 <- shadow frame
+    move   a1, rPC
+    move   a2, rSELF
+    JAL(MterpFilledNewArrayRange)                           #  v0 <- helper(shadow_frame, pc, self)
+    beqz      v0,  MterpPossibleException  #  has exception
+    FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_fill_array_data: /* 0x26 */
+/* File: mips/op_fill_array_data.S */
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC()
+    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
+    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
+    GET_OPA(a3)                            #  a3 <- AA
+    sll       a1, a1, 16                   #  a1 <- BBBBbbbb
+    or        a1, a0, a1                   #  a1 <- BBBBbbbb
+    GET_VREG(a0, a3)                       #  a0 <- vAA (array object)
+    EAS1(a1, rPC, a1)                      #  a1 <- PC + BBBBbbbb*2 (array data off.)
+    JAL(MterpFillArrayData)                #  v0 <- Mterp(obj, payload)
+    beqz      v0,  MterpPossibleException  #  has exception
+    FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_throw: /* 0x27 */
+/* File: mips/op_throw.S */
+    /*
+     * Throw an exception object in the current thread.
+     */
+    /* throw vAA */
+    EXPORT_PC()                              #  exception handler can throw
+    GET_OPA(a2)                              #  a2 <- AA
+    GET_VREG(a1, a2)                         #  a1 <- vAA (exception object)
+    # null object?
+    beqz  a1, common_errNullObject           #  yes, throw an NPE instead
+    sw    a1, THREAD_EXCEPTION_OFFSET(rSELF) #  thread->exception <- obj
+    b         MterpException
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto: /* 0x28 */
+/* File: mips/op_goto.S */
+    /*
+     * Unconditional branch, 8-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto +AA */
+    sll       a0, rINST, 16                #  a0 <- AAxx0000
+    sra       rINST, a0, 24                #  rINST <- ssssssAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_16: /* 0x29 */
+/* File: mips/op_goto_16.S */
+    /*
+     * Unconditional branch, 16-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto/16 +AAAA */
+    FETCH_S(rINST, 1)                      #  rINST <- ssssAAAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_32: /* 0x2a */
+/* File: mips/op_goto_32.S */
+    /*
+     * Unconditional branch, 32-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     *
+     * Unlike most opcodes, this one is allowed to branch to itself, so
+     * our "backward branch" test must be "<=0" instead of "<0".
+     */
+    /* goto/32 +AAAAAAAA */
+    FETCH(a0, 1)                           #  a0 <- aaaa (lo)
+    FETCH(a1, 2)                           #  a1 <- AAAA (hi)
+    sll       a1, a1, 16
+    or        rINST, a0, a1                #  rINST <- AAAAaaaa
+    b         MterpCommonTakenBranchNoFlags
+
+/* ------------------------------ */
+    .balign 128
+.L_op_packed_switch: /* 0x2b */
+/* File: mips/op_packed_switch.S */
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
+    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
+    GET_OPA(a3)                            #  a3 <- AA
+    sll       t0, a1, 16
+    or        a0, a0, t0                   #  a0 <- BBBBbbbb
+    GET_VREG(a1, a3)                       #  a1 <- vAA
+    EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
+    JAL(MterpDoPackedSwitch)                             #  a0 <- code-unit branch offset
+    move      rINST, v0
+    b         MterpCommonTakenBranchNoFlags
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sparse_switch: /* 0x2c */
+/* File: mips/op_sparse_switch.S */
+/* File: mips/op_packed_switch.S */
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
+    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
+    GET_OPA(a3)                            #  a3 <- AA
+    sll       t0, a1, 16
+    or        a0, a0, t0                   #  a0 <- BBBBbbbb
+    GET_VREG(a1, a3)                       #  a1 <- vAA
+    EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
+    JAL(MterpDoSparseSwitch)                             #  a0 <- code-unit branch offset
+    move      rINST, v0
+    b         MterpCommonTakenBranchNoFlags
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_float: /* 0x2d */
+/* File: mips/op_cmpl_float.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register rTEMP based on the results of the comparison.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * The operation we're implementing is:
+     *   if (x == y)
+     *     return 0;
+     *   else if (x < y)
+     *     return -1;
+     *   else if (x > y)
+     *     return 1;
+     *   else
+     *     return {-1 or 1};  // one or both operands was NaN
+     *
+     * for: cmpl-float, cmpg-float
+     */
+    /* op vAA, vBB, vCC */
+
+    /* "clasic" form */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8
+    GET_VREG_F(ft0, a2)
+    GET_VREG_F(ft1, a3)
+#ifdef MIPS32REVGE6
+    cmp.ult.s ft2, ft0, ft1               # Is ft0 < ft1
+    li        rTEMP, -1
+    bc1nez    ft2, .Lop_cmpl_float_finish
+    cmp.ult.s ft2, ft1, ft0
+    li        rTEMP, 1
+    bc1nez    ft2, .Lop_cmpl_float_finish
+    cmp.eq.s  ft2, ft0, ft1
+    li        rTEMP, 0
+    bc1nez    ft2, .Lop_cmpl_float_finish
+    b         .Lop_cmpl_float_nan
+#else
+    c.olt.s   fcc0, ft0, ft1               # Is ft0 < ft1
+    li        rTEMP, -1
+    bc1t      fcc0, .Lop_cmpl_float_finish
+    c.olt.s   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, .Lop_cmpl_float_finish
+    c.eq.s    fcc0, ft0, ft1
+    li        rTEMP, 0
+    bc1t      fcc0, .Lop_cmpl_float_finish
+    b         .Lop_cmpl_float_nan
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_float: /* 0x2e */
+/* File: mips/op_cmpg_float.S */
+/* File: mips/op_cmpl_float.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register rTEMP based on the results of the comparison.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * The operation we're implementing is:
+     *   if (x == y)
+     *     return 0;
+     *   else if (x < y)
+     *     return -1;
+     *   else if (x > y)
+     *     return 1;
+     *   else
+     *     return {-1 or 1};  // one or both operands was NaN
+     *
+     * for: cmpl-float, cmpg-float
+     */
+    /* op vAA, vBB, vCC */
+
+    /* "clasic" form */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8
+    GET_VREG_F(ft0, a2)
+    GET_VREG_F(ft1, a3)
+#ifdef MIPS32REVGE6
+    cmp.ult.s ft2, ft0, ft1               # Is ft0 < ft1
+    li        rTEMP, -1
+    bc1nez    ft2, .Lop_cmpg_float_finish
+    cmp.ult.s ft2, ft1, ft0
+    li        rTEMP, 1
+    bc1nez    ft2, .Lop_cmpg_float_finish
+    cmp.eq.s  ft2, ft0, ft1
+    li        rTEMP, 0
+    bc1nez    ft2, .Lop_cmpg_float_finish
+    b         .Lop_cmpg_float_nan
+#else
+    c.olt.s   fcc0, ft0, ft1               # Is ft0 < ft1
+    li        rTEMP, -1
+    bc1t      fcc0, .Lop_cmpg_float_finish
+    c.olt.s   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, .Lop_cmpg_float_finish
+    c.eq.s    fcc0, ft0, ft1
+    li        rTEMP, 0
+    bc1t      fcc0, .Lop_cmpg_float_finish
+    b         .Lop_cmpg_float_nan
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_double: /* 0x2f */
+/* File: mips/op_cmpl_double.S */
+    /*
+     * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
+     * into the destination register (rTEMP) based on the comparison results.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * See op_cmpl_float for more details.
+     *
+     * For: cmpl-double, cmpg-double
+     */
+    /* op vAA, vBB, vCC */
+
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    and       rOBJ, a0, 255                #  s5 <- BB
+    srl       t0, a0, 8                    #  t0 <- CC
+    EAS2(rOBJ, rFP, rOBJ)                  #  s5 <- &fp[BB]
+    EAS2(t0, rFP, t0)                      #  t0 <- &fp[CC]
+    LOAD64_F(ft0, ft0f, rOBJ)
+    LOAD64_F(ft1, ft1f, t0)
+#ifdef MIPS32REVGE6
+    cmp.ult.d ft2, ft0, ft1
+    li        rTEMP, -1
+    bc1nez    ft2, .Lop_cmpl_double_finish
+    cmp.ult.d ft2, ft1, ft0
+    li        rTEMP, 1
+    bc1nez    ft2, .Lop_cmpl_double_finish
+    cmp.eq.d  ft2, ft0, ft1
+    li        rTEMP, 0
+    bc1nez    ft2, .Lop_cmpl_double_finish
+    b         .Lop_cmpl_double_nan
+#else
+    c.olt.d   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, .Lop_cmpl_double_finish
+    c.olt.d   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, .Lop_cmpl_double_finish
+    c.eq.d    fcc0, ft0, ft1
+    li        rTEMP, 0
+    bc1t      fcc0, .Lop_cmpl_double_finish
+    b         .Lop_cmpl_double_nan
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_double: /* 0x30 */
+/* File: mips/op_cmpg_double.S */
+/* File: mips/op_cmpl_double.S */
+    /*
+     * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
+     * into the destination register (rTEMP) based on the comparison results.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * See op_cmpl_float for more details.
+     *
+     * For: cmpl-double, cmpg-double
+     */
+    /* op vAA, vBB, vCC */
+
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    and       rOBJ, a0, 255                #  s5 <- BB
+    srl       t0, a0, 8                    #  t0 <- CC
+    EAS2(rOBJ, rFP, rOBJ)                  #  s5 <- &fp[BB]
+    EAS2(t0, rFP, t0)                      #  t0 <- &fp[CC]
+    LOAD64_F(ft0, ft0f, rOBJ)
+    LOAD64_F(ft1, ft1f, t0)
+#ifdef MIPS32REVGE6
+    cmp.ult.d ft2, ft0, ft1
+    li        rTEMP, -1
+    bc1nez    ft2, .Lop_cmpg_double_finish
+    cmp.ult.d ft2, ft1, ft0
+    li        rTEMP, 1
+    bc1nez    ft2, .Lop_cmpg_double_finish
+    cmp.eq.d  ft2, ft0, ft1
+    li        rTEMP, 0
+    bc1nez    ft2, .Lop_cmpg_double_finish
+    b         .Lop_cmpg_double_nan
+#else
+    c.olt.d   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, .Lop_cmpg_double_finish
+    c.olt.d   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, .Lop_cmpg_double_finish
+    c.eq.d    fcc0, ft0, ft1
+    li        rTEMP, 0
+    bc1t      fcc0, .Lop_cmpg_double_finish
+    b         .Lop_cmpg_double_nan
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmp_long: /* 0x31 */
+/* File: mips/op_cmp_long.S */
+    /*
+     * Compare two 64-bit values
+     *    x = y     return  0
+     *    x < y     return -1
+     *    x > y     return  1
+     *
+     * I think I can improve on the ARM code by the following observation
+     *    slt   t0,  x.hi, y.hi;        # (x.hi < y.hi) ? 1:0
+     *    sgt   t1,  x.hi, y.hi;        # (y.hi > x.hi) ? 1:0
+     *    subu  v0, t0, t1              # v0= -1:1:0 for [ < > = ]
+     */
+    /* cmp-long vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[CC]
+    LOAD64(a0, a1, a2)                     #  a0/a1 <- vBB/vBB+1
+    LOAD64(a2, a3, a3)                     #  a2/a3 <- vCC/vCC+1
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    slt       t0, a1, a3                   #  compare hi
+    sgt       t1, a1, a3
+    subu      v0, t1, t0                   #  v0 <- (-1, 1, 0)
+    bnez      v0, .Lop_cmp_long_finish
+    # at this point x.hi==y.hi
+    sltu      t0, a0, a2                   #  compare lo
+    sgtu      t1, a0, a2
+    subu      v0, t1, t0                   #  v0 <- (-1, 1, 0) for [< > =]
+
+.Lop_cmp_long_finish:
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(v0, rOBJ, t0)            #  vAA <- v0
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eq: /* 0x32 */
+/* File: mips/op_if_eq.S */
+/* File: mips/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    GET_OPA4(a0)                           #  a0 <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    GET_VREG(a3, a1)                       #  a3 <- vB
+    GET_VREG(a0, a0)                       #  a0 <- vA
+    FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
+    beq a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ne: /* 0x33 */
+/* File: mips/op_if_ne.S */
+/* File: mips/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    GET_OPA4(a0)                           #  a0 <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    GET_VREG(a3, a1)                       #  a3 <- vB
+    GET_VREG(a0, a0)                       #  a0 <- vA
+    FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
+    bne a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lt: /* 0x34 */
+/* File: mips/op_if_lt.S */
+/* File: mips/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    GET_OPA4(a0)                           #  a0 <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    GET_VREG(a3, a1)                       #  a3 <- vB
+    GET_VREG(a0, a0)                       #  a0 <- vA
+    FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
+    blt a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ge: /* 0x35 */
+/* File: mips/op_if_ge.S */
+/* File: mips/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    GET_OPA4(a0)                           #  a0 <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    GET_VREG(a3, a1)                       #  a3 <- vB
+    GET_VREG(a0, a0)                       #  a0 <- vA
+    FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
+    bge a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gt: /* 0x36 */
+/* File: mips/op_if_gt.S */
+/* File: mips/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    GET_OPA4(a0)                           #  a0 <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    GET_VREG(a3, a1)                       #  a3 <- vB
+    GET_VREG(a0, a0)                       #  a0 <- vA
+    FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
+    bgt a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_le: /* 0x37 */
+/* File: mips/op_if_le.S */
+/* File: mips/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    GET_OPA4(a0)                           #  a0 <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    GET_VREG(a3, a1)                       #  a3 <- vB
+    GET_VREG(a0, a0)                       #  a0 <- vA
+    FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
+    ble a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eqz: /* 0x38 */
+/* File: mips/op_if_eqz.S */
+/* File: mips/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    GET_OPA(a0)                            #  a0 <- AA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
+    FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
+    beq a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_nez: /* 0x39 */
+/* File: mips/op_if_nez.S */
+/* File: mips/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    GET_OPA(a0)                            #  a0 <- AA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
+    FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
+    bne a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ltz: /* 0x3a */
+/* File: mips/op_if_ltz.S */
+/* File: mips/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    GET_OPA(a0)                            #  a0 <- AA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
+    FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
+    blt a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gez: /* 0x3b */
+/* File: mips/op_if_gez.S */
+/* File: mips/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    GET_OPA(a0)                            #  a0 <- AA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
+    FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
+    bge a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gtz: /* 0x3c */
+/* File: mips/op_if_gtz.S */
+/* File: mips/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    GET_OPA(a0)                            #  a0 <- AA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
+    FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
+    bgt a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lez: /* 0x3d */
+/* File: mips/op_if_lez.S */
+/* File: mips/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    GET_OPA(a0)                            #  a0 <- AA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
+    FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
+    ble a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3e: /* 0x3e */
+/* File: mips/op_unused_3e.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3f: /* 0x3f */
+/* File: mips/op_unused_3f.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_40: /* 0x40 */
+/* File: mips/op_unused_40.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_41: /* 0x41 */
+/* File: mips/op_unused_41.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_42: /* 0x42 */
+/* File: mips/op_unused_42.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_43: /* 0x43 */
+/* File: mips/op_unused_43.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget: /* 0x44 */
+/* File: mips/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B(a2, 1, 0)                      #  a2 <- BB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    FETCH_B(a3, 1, 1)                      #  a3 <- CC
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    # null array object?
+    beqz      a0, common_errNullObject     #  yes, bail
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
+    .if 2
+    EASN(a0, a0, a1, 2)               #  a0 <- arrayObj + index*width
+    .else
+    addu      a0, a0, a1
+    .endif
+    # a1 >= a3; compare unsigned index
+    bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    lw a2, MIRROR_INT_ARRAY_DATA_OFFSET(a0)             #  a2 <- vBB[vCC]
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a2, rOBJ, t0)            #  vAA <- a2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_wide: /* 0x45 */
+/* File: mips/op_aget_wide.S */
+    /*
+     * Array get, 64 bits.  vAA <- vBB[vCC].
+     *
+     * Arrays of long/double are 64-bit aligned.
+     */
+    /* aget-wide vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    # null array object?
+    beqz      a0, common_errNullObject     #  yes, bail
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
+    EAS3(a0, a0, a1)                       #  a0 <- arrayObj + index*width
+    bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    LOAD64_off(a2, a3, a0, MIRROR_WIDE_ARRAY_DATA_OFFSET)
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_GOTO(a2, a3, rOBJ, t0)      #  vAA/vAA+1 <- a2/a3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_object: /* 0x46 */
+/* File: mips/op_aget_object.S */
+    /*
+     * Array object get.  vAA <- vBB[vCC].
+     *
+     * for: aget-object
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B(a2, 1, 0)                      #  a2 <- BB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    FETCH_B(a3, 1, 1)                      #  a3 <- CC
+    EXPORT_PC()
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    JAL(artAGetObjectFromMterp)            #  v0 <- GetObj(array, index)
+    lw   a1, THREAD_EXCEPTION_OFFSET(rSELF)
+    PREFETCH_INST(2)                       #  load rINST
+    bnez a1, MterpException
+    SET_VREG_OBJECT(v0, rOBJ)              #  vAA <- v0
+    ADVANCE(2)                             #  advance rPC
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_boolean: /* 0x47 */
+/* File: mips/op_aget_boolean.S */
+/* File: mips/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B(a2, 1, 0)                      #  a2 <- BB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    FETCH_B(a3, 1, 1)                      #  a3 <- CC
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    # null array object?
+    beqz      a0, common_errNullObject     #  yes, bail
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
+    .if 0
+    EASN(a0, a0, a1, 0)               #  a0 <- arrayObj + index*width
+    .else
+    addu      a0, a0, a1
+    .endif
+    # a1 >= a3; compare unsigned index
+    bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    lbu a2, MIRROR_BOOLEAN_ARRAY_DATA_OFFSET(a0)             #  a2 <- vBB[vCC]
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a2, rOBJ, t0)            #  vAA <- a2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_byte: /* 0x48 */
+/* File: mips/op_aget_byte.S */
+/* File: mips/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B(a2, 1, 0)                      #  a2 <- BB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    FETCH_B(a3, 1, 1)                      #  a3 <- CC
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    # null array object?
+    beqz      a0, common_errNullObject     #  yes, bail
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
+    .if 0
+    EASN(a0, a0, a1, 0)               #  a0 <- arrayObj + index*width
+    .else
+    addu      a0, a0, a1
+    .endif
+    # a1 >= a3; compare unsigned index
+    bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    lb a2, MIRROR_BYTE_ARRAY_DATA_OFFSET(a0)             #  a2 <- vBB[vCC]
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a2, rOBJ, t0)            #  vAA <- a2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_char: /* 0x49 */
+/* File: mips/op_aget_char.S */
+/* File: mips/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B(a2, 1, 0)                      #  a2 <- BB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    FETCH_B(a3, 1, 1)                      #  a3 <- CC
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    # null array object?
+    beqz      a0, common_errNullObject     #  yes, bail
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
+    .if 1
+    EASN(a0, a0, a1, 1)               #  a0 <- arrayObj + index*width
+    .else
+    addu      a0, a0, a1
+    .endif
+    # a1 >= a3; compare unsigned index
+    bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    lhu a2, MIRROR_CHAR_ARRAY_DATA_OFFSET(a0)             #  a2 <- vBB[vCC]
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a2, rOBJ, t0)            #  vAA <- a2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_short: /* 0x4a */
+/* File: mips/op_aget_short.S */
+/* File: mips/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B(a2, 1, 0)                      #  a2 <- BB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    FETCH_B(a3, 1, 1)                      #  a3 <- CC
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    # null array object?
+    beqz      a0, common_errNullObject     #  yes, bail
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
+    .if 1
+    EASN(a0, a0, a1, 1)               #  a0 <- arrayObj + index*width
+    .else
+    addu      a0, a0, a1
+    .endif
+    # a1 >= a3; compare unsigned index
+    bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    lh a2, MIRROR_SHORT_ARRAY_DATA_OFFSET(a0)             #  a2 <- vBB[vCC]
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a2, rOBJ, t0)            #  vAA <- a2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput: /* 0x4b */
+/* File: mips/op_aput.S */
+
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B(a2, 1, 0)                      #  a2 <- BB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    FETCH_B(a3, 1, 1)                      #  a3 <- CC
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    # null array object?
+    beqz      a0, common_errNullObject     #  yes, bail
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
+    .if 2
+    EASN(a0, a0, a1, 2)               #  a0 <- arrayObj + index*width
+    .else
+    addu      a0, a0, a1
+    .endif
+    bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_VREG(a2, rOBJ)                     #  a2 <- vAA
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    sw a2, MIRROR_INT_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_wide: /* 0x4c */
+/* File: mips/op_aput_wide.S */
+    /*
+     * Array put, 64 bits.  vBB[vCC] <- vAA.
+     *
+     * Arrays of long/double are 64-bit aligned, so it's okay to use STRD.
+     */
+    /* aput-wide vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(t0)                            #  t0 <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    # null array object?
+    beqz      a0, common_errNullObject     #  yes, bail
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
+    EAS3(a0, a0, a1)                       #  a0 <- arrayObj + index*width
+    EAS2(rOBJ, rFP, t0)                    #  rOBJ <- &fp[AA]
+    # compare unsigned index, length
+    bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    LOAD64(a2, a3, rOBJ)                   #  a2/a3 <- vAA/vAA+1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    STORE64_off(a2, a3, a0, MIRROR_WIDE_ARRAY_DATA_OFFSET) #  a2/a3 <- vBB[vCC]
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_object: /* 0x4d */
+/* File: mips/op_aput_object.S */
+    /*
+     * Store an object into an array.  vBB[vCC] <- vAA.
+     *
+     */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC()
+    addu   a0, rFP, OFF_FP_SHADOWFRAME
+    move   a1, rPC
+    move   a2, rINST
+    JAL(MterpAputObject)
+    beqz   v0, MterpPossibleException
+    FETCH_ADVANCE_INST(2)               # advance rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_boolean: /* 0x4e */
+/* File: mips/op_aput_boolean.S */
+/* File: mips/op_aput.S */
+
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B(a2, 1, 0)                      #  a2 <- BB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    FETCH_B(a3, 1, 1)                      #  a3 <- CC
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    # null array object?
+    beqz      a0, common_errNullObject     #  yes, bail
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
+    .if 0
+    EASN(a0, a0, a1, 0)               #  a0 <- arrayObj + index*width
+    .else
+    addu      a0, a0, a1
+    .endif
+    bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_VREG(a2, rOBJ)                     #  a2 <- vAA
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    sb a2, MIRROR_BOOLEAN_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_byte: /* 0x4f */
+/* File: mips/op_aput_byte.S */
+/* File: mips/op_aput.S */
+
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B(a2, 1, 0)                      #  a2 <- BB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    FETCH_B(a3, 1, 1)                      #  a3 <- CC
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    # null array object?
+    beqz      a0, common_errNullObject     #  yes, bail
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
+    .if 0
+    EASN(a0, a0, a1, 0)               #  a0 <- arrayObj + index*width
+    .else
+    addu      a0, a0, a1
+    .endif
+    bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_VREG(a2, rOBJ)                     #  a2 <- vAA
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    sb a2, MIRROR_BYTE_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_char: /* 0x50 */
+/* File: mips/op_aput_char.S */
+/* File: mips/op_aput.S */
+
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B(a2, 1, 0)                      #  a2 <- BB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    FETCH_B(a3, 1, 1)                      #  a3 <- CC
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    # null array object?
+    beqz      a0, common_errNullObject     #  yes, bail
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
+    .if 1
+    EASN(a0, a0, a1, 1)               #  a0 <- arrayObj + index*width
+    .else
+    addu      a0, a0, a1
+    .endif
+    bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_VREG(a2, rOBJ)                     #  a2 <- vAA
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    sh a2, MIRROR_CHAR_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_short: /* 0x51 */
+/* File: mips/op_aput_short.S */
+/* File: mips/op_aput.S */
+
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B(a2, 1, 0)                      #  a2 <- BB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    FETCH_B(a3, 1, 1)                      #  a3 <- CC
+    GET_VREG(a0, a2)                       #  a0 <- vBB (array object)
+    GET_VREG(a1, a3)                       #  a1 <- vCC (requested index)
+    # null array object?
+    beqz      a0, common_errNullObject     #  yes, bail
+    LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
+    .if 1
+    EASN(a0, a0, a1, 1)               #  a0 <- arrayObj + index*width
+    .else
+    addu      a0, a0, a1
+    .endif
+    bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_VREG(a2, rOBJ)                     #  a2 <- vAA
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    sh a2, MIRROR_SHORT_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget: /* 0x52 */
+/* File: mips/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
+    move  a3, rSELF                        # a3 <- self
+    JAL(artGet32InstanceFromCode)
+    lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA4(a2)                           # a2<- A+
+    PREFETCH_INST(2)                       # load rINST
+    bnez  a3, MterpPossibleException        # bail out
+    .if 0
+    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
+    .else
+    SET_VREG(v0, a2)                       # fp[A] <- v0
+    .endif
+    ADVANCE(2)                             #  advance rPC
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide: /* 0x53 */
+/* File: mips/op_iget_wide.S */
+    /*
+     * 64-bit instance field get.
+     *
+     * for: iget-wide
+     */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field byte offset
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
+    move  a3, rSELF                        # a3 <- self
+    JAL(artGet64InstanceFromCode)
+    lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA4(a2)                           # a2<- A+
+    PREFETCH_INST(2)                       # load rINST
+    bnez a3, MterpException                # bail out
+    SET_VREG64(v0, v1, a2)                 # fp[A] <- v0/v1
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object: /* 0x54 */
+/* File: mips/op_iget_object.S */
+/* File: mips/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
+    move  a3, rSELF                        # a3 <- self
+    JAL(artGetObjInstanceFromCode)
+    lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA4(a2)                           # a2<- A+
+    PREFETCH_INST(2)                       # load rINST
+    bnez  a3, MterpPossibleException        # bail out
+    .if 1
+    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
+    .else
+    SET_VREG(v0, a2)                       # fp[A] <- v0
+    .endif
+    ADVANCE(2)                             #  advance rPC
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean: /* 0x55 */
+/* File: mips/op_iget_boolean.S */
+/* File: mips/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
+    move  a3, rSELF                        # a3 <- self
+    JAL(artGetBooleanInstanceFromCode)
+    lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA4(a2)                           # a2<- A+
+    PREFETCH_INST(2)                       # load rINST
+    bnez  a3, MterpPossibleException        # bail out
+    .if 0
+    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
+    .else
+    SET_VREG(v0, a2)                       # fp[A] <- v0
+    .endif
+    ADVANCE(2)                             #  advance rPC
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte: /* 0x56 */
+/* File: mips/op_iget_byte.S */
+/* File: mips/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
+    move  a3, rSELF                        # a3 <- self
+    JAL(artGetByteInstanceFromCode)
+    lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA4(a2)                           # a2<- A+
+    PREFETCH_INST(2)                       # load rINST
+    bnez  a3, MterpPossibleException        # bail out
+    .if 0
+    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
+    .else
+    SET_VREG(v0, a2)                       # fp[A] <- v0
+    .endif
+    ADVANCE(2)                             #  advance rPC
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char: /* 0x57 */
+/* File: mips/op_iget_char.S */
+/* File: mips/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
+    move  a3, rSELF                        # a3 <- self
+    JAL(artGetCharInstanceFromCode)
+    lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA4(a2)                           # a2<- A+
+    PREFETCH_INST(2)                       # load rINST
+    bnez  a3, MterpPossibleException        # bail out
+    .if 0
+    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
+    .else
+    SET_VREG(v0, a2)                       # fp[A] <- v0
+    .endif
+    ADVANCE(2)                             #  advance rPC
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short: /* 0x58 */
+/* File: mips/op_iget_short.S */
+/* File: mips/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
+    move  a3, rSELF                        # a3 <- self
+    JAL(artGetShortInstanceFromCode)
+    lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA4(a2)                           # a2<- A+
+    PREFETCH_INST(2)                       # load rINST
+    bnez  a3, MterpPossibleException        # bail out
+    .if 0
+    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
+    .else
+    SET_VREG(v0, a2)                       # fp[A] <- v0
+    .endif
+    ADVANCE(2)                             #  advance rPC
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput: /* 0x59 */
+/* File: mips/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    # op vA, vB, field                     /* CCCC */
+    .extern artSet32InstanceFromMterp
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    GET_OPA4(a2)                           # a2 <- A+
+    GET_VREG(a2, a2)                       # a2 <- fp[A]
+    lw    a3, OFF_FP_METHOD(rFP)           # a3 <- referrer
+    PREFETCH_INST(2)                       # load rINST
+    JAL(artSet32InstanceFromMterp)
+    bnez  v0, MterpPossibleException       # bail out
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide: /* 0x5a */
+/* File: mips/op_iput_wide.S */
+    # iput-wide vA, vB, field              /* CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    GET_OPA4(a2)                           # a2 <- A+
+    EAS2(a2, rFP, a2)                      # a2 <- &fp[A]
+    lw    a3, OFF_FP_METHOD(rFP)           # a3 <- referrer
+    PREFETCH_INST(2)                       # load rINST
+    JAL(artSet64InstanceFromMterp)
+    bnez  v0, MterpPossibleException       # bail out
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object: /* 0x5b */
+/* File: mips/op_iput_object.S */
+    /*
+     * 32-bit instance field put.
+     *
+     * for: iput-object, iput-object-volatile
+     */
+    # op vA, vB, field                     /* CCCC */
+    EXPORT_PC()
+    addu   a0, rFP, OFF_FP_SHADOWFRAME
+    move   a1, rPC
+    move   a2, rINST
+    move   a3, rSELF
+    JAL(MterpIputObject)
+    beqz   v0, MterpException
+    FETCH_ADVANCE_INST(2)               # advance rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean: /* 0x5c */
+/* File: mips/op_iput_boolean.S */
+/* File: mips/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    # op vA, vB, field                     /* CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    GET_OPA4(a2)                           # a2 <- A+
+    GET_VREG(a2, a2)                       # a2 <- fp[A]
+    lw    a3, OFF_FP_METHOD(rFP)           # a3 <- referrer
+    PREFETCH_INST(2)                       # load rINST
+    JAL(artSet8InstanceFromMterp)
+    bnez  v0, MterpPossibleException       # bail out
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte: /* 0x5d */
+/* File: mips/op_iput_byte.S */
+/* File: mips/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    # op vA, vB, field                     /* CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    GET_OPA4(a2)                           # a2 <- A+
+    GET_VREG(a2, a2)                       # a2 <- fp[A]
+    lw    a3, OFF_FP_METHOD(rFP)           # a3 <- referrer
+    PREFETCH_INST(2)                       # load rINST
+    JAL(artSet8InstanceFromMterp)
+    bnez  v0, MterpPossibleException       # bail out
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char: /* 0x5e */
+/* File: mips/op_iput_char.S */
+/* File: mips/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    # op vA, vB, field                     /* CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    GET_OPA4(a2)                           # a2 <- A+
+    GET_VREG(a2, a2)                       # a2 <- fp[A]
+    lw    a3, OFF_FP_METHOD(rFP)           # a3 <- referrer
+    PREFETCH_INST(2)                       # load rINST
+    JAL(artSet16InstanceFromMterp)
+    bnez  v0, MterpPossibleException       # bail out
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short: /* 0x5f */
+/* File: mips/op_iput_short.S */
+/* File: mips/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    # op vA, vB, field                     /* CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    GET_OPB(a1)                            # a1 <- B
+    GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
+    GET_OPA4(a2)                           # a2 <- A+
+    GET_VREG(a2, a2)                       # a2 <- fp[A]
+    lw    a3, OFF_FP_METHOD(rFP)           # a3 <- referrer
+    PREFETCH_INST(2)                       # load rINST
+    JAL(artSet16InstanceFromMterp)
+    bnez  v0, MterpPossibleException       # bail out
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget: /* 0x60 */
+/* File: mips/op_sget.S */
+    /*
+     * General SGET handler.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    # op vAA, field                        /* BBBB */
+    .extern artGet32StaticFromCode
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref BBBB
+    lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
+    move  a2, rSELF                        # a2 <- self
+    JAL(artGet32StaticFromCode)
+    lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA(a2)                            # a2 <- AA
+    PREFETCH_INST(2)
+    bnez  a3, MterpException               # bail out
+.if 0
+    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
+.else
+    SET_VREG(v0, a2)                       # fp[AA] <- v0
+.endif
+    ADVANCE(2)
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_wide: /* 0x61 */
+/* File: mips/op_sget_wide.S */
+    /*
+     * 64-bit SGET handler.
+     */
+    # sget-wide vAA, field                 /* BBBB */
+    .extern artGet64StaticFromCode
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref BBBB
+    lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
+    move  a2, rSELF                        # a2 <- self
+    JAL(artGet64StaticFromCode)
+    lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    bnez  a3, MterpException
+    GET_OPA(a1)                            # a1 <- AA
+    FETCH_ADVANCE_INST(2)                  # advance rPC, load rINST
+    SET_VREG64(v0, v1, a1)                 # vAA/vAA+1 <- v0/v1
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_object: /* 0x62 */
+/* File: mips/op_sget_object.S */
+/* File: mips/op_sget.S */
+    /*
+     * General SGET handler.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    # op vAA, field                        /* BBBB */
+    .extern artGetObjStaticFromCode
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref BBBB
+    lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
+    move  a2, rSELF                        # a2 <- self
+    JAL(artGetObjStaticFromCode)
+    lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA(a2)                            # a2 <- AA
+    PREFETCH_INST(2)
+    bnez  a3, MterpException               # bail out
+.if 1
+    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
+.else
+    SET_VREG(v0, a2)                       # fp[AA] <- v0
+.endif
+    ADVANCE(2)
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_boolean: /* 0x63 */
+/* File: mips/op_sget_boolean.S */
+/* File: mips/op_sget.S */
+    /*
+     * General SGET handler.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    # op vAA, field                        /* BBBB */
+    .extern artGetBooleanStaticFromCode
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref BBBB
+    lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
+    move  a2, rSELF                        # a2 <- self
+    JAL(artGetBooleanStaticFromCode)
+    lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA(a2)                            # a2 <- AA
+    PREFETCH_INST(2)
+    bnez  a3, MterpException               # bail out
+.if 0
+    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
+.else
+    SET_VREG(v0, a2)                       # fp[AA] <- v0
+.endif
+    ADVANCE(2)
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_byte: /* 0x64 */
+/* File: mips/op_sget_byte.S */
+/* File: mips/op_sget.S */
+    /*
+     * General SGET handler.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    # op vAA, field                        /* BBBB */
+    .extern artGetByteStaticFromCode
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref BBBB
+    lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
+    move  a2, rSELF                        # a2 <- self
+    JAL(artGetByteStaticFromCode)
+    lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA(a2)                            # a2 <- AA
+    PREFETCH_INST(2)
+    bnez  a3, MterpException               # bail out
+.if 0
+    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
+.else
+    SET_VREG(v0, a2)                       # fp[AA] <- v0
+.endif
+    ADVANCE(2)
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_char: /* 0x65 */
+/* File: mips/op_sget_char.S */
+/* File: mips/op_sget.S */
+    /*
+     * General SGET handler.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    # op vAA, field                        /* BBBB */
+    .extern artGetCharStaticFromCode
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref BBBB
+    lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
+    move  a2, rSELF                        # a2 <- self
+    JAL(artGetCharStaticFromCode)
+    lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA(a2)                            # a2 <- AA
+    PREFETCH_INST(2)
+    bnez  a3, MterpException               # bail out
+.if 0
+    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
+.else
+    SET_VREG(v0, a2)                       # fp[AA] <- v0
+.endif
+    ADVANCE(2)
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_short: /* 0x66 */
+/* File: mips/op_sget_short.S */
+/* File: mips/op_sget.S */
+    /*
+     * General SGET handler.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    # op vAA, field                        /* BBBB */
+    .extern artGetShortStaticFromCode
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref BBBB
+    lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
+    move  a2, rSELF                        # a2 <- self
+    JAL(artGetShortStaticFromCode)
+    lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA(a2)                            # a2 <- AA
+    PREFETCH_INST(2)
+    bnez  a3, MterpException               # bail out
+.if 0
+    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
+.else
+    SET_VREG(v0, a2)                       # fp[AA] <- v0
+.endif
+    ADVANCE(2)
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput: /* 0x67 */
+/* File: mips/op_sput.S */
+    /*
+     * General SPUT handler.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    # op vAA, field                        /* BBBB */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref BBBB
+    GET_OPA(a3)                            # a3 <- AA
+    GET_VREG(a1, a3)                       # a1 <- fp[AA], the object pointer
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
+    move  a3, rSELF                        # a3 <- self
+    PREFETCH_INST(2)                       # load rINST
+    JAL(artSet32StaticFromCode)
+    bnez  v0, MterpException               # bail out
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_wide: /* 0x68 */
+/* File: mips/op_sput_wide.S */
+    /*
+     * 64-bit SPUT handler.
+     */
+    # sput-wide vAA, field                 /* BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref CCCC
+    lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
+    GET_OPA(a2)                            # a2 <- AA
+    EAS2(a2, rFP, a2)                      # a2 <- &fp[AA]
+    move  a3, rSELF                        # a3 <- self
+    PREFETCH_INST(2)                       # load rINST
+    JAL(artSet64IndirectStaticFromMterp)
+    bnez  v0, MterpException               # bail out
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_object: /* 0x69 */
+/* File: mips/op_sput_object.S */
+    /*
+     * General 32-bit SPUT handler.
+     *
+     * for: sput-object,
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC()
+    addu   a0, rFP, OFF_FP_SHADOWFRAME
+    move   a1, rPC
+    move   a2, rINST
+    move   a3, rSELF
+    JAL(MterpSputObject)
+    beqz   v0, MterpException
+    FETCH_ADVANCE_INST(2)               # advance rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_boolean: /* 0x6a */
+/* File: mips/op_sput_boolean.S */
+/* File: mips/op_sput.S */
+    /*
+     * General SPUT handler.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    # op vAA, field                        /* BBBB */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref BBBB
+    GET_OPA(a3)                            # a3 <- AA
+    GET_VREG(a1, a3)                       # a1 <- fp[AA], the object pointer
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
+    move  a3, rSELF                        # a3 <- self
+    PREFETCH_INST(2)                       # load rINST
+    JAL(artSet8StaticFromCode)
+    bnez  v0, MterpException               # bail out
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_byte: /* 0x6b */
+/* File: mips/op_sput_byte.S */
+/* File: mips/op_sput.S */
+    /*
+     * General SPUT handler.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    # op vAA, field                        /* BBBB */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref BBBB
+    GET_OPA(a3)                            # a3 <- AA
+    GET_VREG(a1, a3)                       # a1 <- fp[AA], the object pointer
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
+    move  a3, rSELF                        # a3 <- self
+    PREFETCH_INST(2)                       # load rINST
+    JAL(artSet8StaticFromCode)
+    bnez  v0, MterpException               # bail out
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_char: /* 0x6c */
+/* File: mips/op_sput_char.S */
+/* File: mips/op_sput.S */
+    /*
+     * General SPUT handler.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    # op vAA, field                        /* BBBB */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref BBBB
+    GET_OPA(a3)                            # a3 <- AA
+    GET_VREG(a1, a3)                       # a1 <- fp[AA], the object pointer
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
+    move  a3, rSELF                        # a3 <- self
+    PREFETCH_INST(2)                       # load rINST
+    JAL(artSet16StaticFromCode)
+    bnez  v0, MterpException               # bail out
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_short: /* 0x6d */
+/* File: mips/op_sput_short.S */
+/* File: mips/op_sput.S */
+    /*
+     * General SPUT handler.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    # op vAA, field                        /* BBBB */
+    EXPORT_PC()
+    FETCH(a0, 1)                           # a0 <- field ref BBBB
+    GET_OPA(a3)                            # a3 <- AA
+    GET_VREG(a1, a3)                       # a1 <- fp[AA], the object pointer
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
+    move  a3, rSELF                        # a3 <- self
+    PREFETCH_INST(2)                       # load rINST
+    JAL(artSet16StaticFromCode)
+    bnez  v0, MterpException               # bail out
+    ADVANCE(2)                             # advance rPC
+    GET_INST_OPCODE(t0)                    # extract opcode from rINST
+    GOTO_OPCODE(t0)                        # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual: /* 0x6e */
+/* File: mips/op_invoke_virtual.S */
+/* File: mips/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
+    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    .extern MterpInvokeVirtual
+    EXPORT_PC()
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    JAL(MterpInvokeVirtual)
+    beqz    v0, MterpException
+    FETCH_ADVANCE_INST(3)
+    JAL(MterpShouldSwitchInterpreters)
+    bnez    v0, MterpFallback
+    GET_INST_OPCODE(t0)
+    GOTO_OPCODE(t0)
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super: /* 0x6f */
+/* File: mips/op_invoke_super.S */
+/* File: mips/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
+    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    .extern MterpInvokeSuper
+    EXPORT_PC()
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    JAL(MterpInvokeSuper)
+    beqz    v0, MterpException
+    FETCH_ADVANCE_INST(3)
+    JAL(MterpShouldSwitchInterpreters)
+    bnez    v0, MterpFallback
+    GET_INST_OPCODE(t0)
+    GOTO_OPCODE(t0)
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct: /* 0x70 */
+/* File: mips/op_invoke_direct.S */
+/* File: mips/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
+    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    .extern MterpInvokeDirect
+    EXPORT_PC()
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    JAL(MterpInvokeDirect)
+    beqz    v0, MterpException
+    FETCH_ADVANCE_INST(3)
+    JAL(MterpShouldSwitchInterpreters)
+    bnez    v0, MterpFallback
+    GET_INST_OPCODE(t0)
+    GOTO_OPCODE(t0)
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static: /* 0x71 */
+/* File: mips/op_invoke_static.S */
+/* File: mips/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
+    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    .extern MterpInvokeStatic
+    EXPORT_PC()
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    JAL(MterpInvokeStatic)
+    beqz    v0, MterpException
+    FETCH_ADVANCE_INST(3)
+    JAL(MterpShouldSwitchInterpreters)
+    bnez    v0, MterpFallback
+    GET_INST_OPCODE(t0)
+    GOTO_OPCODE(t0)
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface: /* 0x72 */
+/* File: mips/op_invoke_interface.S */
+/* File: mips/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
+    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    .extern MterpInvokeInterface
+    EXPORT_PC()
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    JAL(MterpInvokeInterface)
+    beqz    v0, MterpException
+    FETCH_ADVANCE_INST(3)
+    JAL(MterpShouldSwitchInterpreters)
+    bnez    v0, MterpFallback
+    GET_INST_OPCODE(t0)
+    GOTO_OPCODE(t0)
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void_no_barrier: /* 0x73 */
+/* File: mips/op_return_void_no_barrier.S */
+    lw     ra, THREAD_FLAGS_OFFSET(rSELF)
+    move   a0, rSELF
+    and    ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqz   ra, 1f
+    JAL(MterpSuspendCheck)                 # (self)
+1:
+    move   v0, zero
+    move   v1, zero
+    b      MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range: /* 0x74 */
+/* File: mips/op_invoke_virtual_range.S */
+/* File: mips/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
+    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    .extern MterpInvokeVirtualRange
+    EXPORT_PC()
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    JAL(MterpInvokeVirtualRange)
+    beqz    v0, MterpException
+    FETCH_ADVANCE_INST(3)
+    JAL(MterpShouldSwitchInterpreters)
+    bnez    v0, MterpFallback
+    GET_INST_OPCODE(t0)
+    GOTO_OPCODE(t0)
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super_range: /* 0x75 */
+/* File: mips/op_invoke_super_range.S */
+/* File: mips/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
+    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    .extern MterpInvokeSuperRange
+    EXPORT_PC()
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    JAL(MterpInvokeSuperRange)
+    beqz    v0, MterpException
+    FETCH_ADVANCE_INST(3)
+    JAL(MterpShouldSwitchInterpreters)
+    bnez    v0, MterpFallback
+    GET_INST_OPCODE(t0)
+    GOTO_OPCODE(t0)
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct_range: /* 0x76 */
+/* File: mips/op_invoke_direct_range.S */
+/* File: mips/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
+    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    .extern MterpInvokeDirectRange
+    EXPORT_PC()
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    JAL(MterpInvokeDirectRange)
+    beqz    v0, MterpException
+    FETCH_ADVANCE_INST(3)
+    JAL(MterpShouldSwitchInterpreters)
+    bnez    v0, MterpFallback
+    GET_INST_OPCODE(t0)
+    GOTO_OPCODE(t0)
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static_range: /* 0x77 */
+/* File: mips/op_invoke_static_range.S */
+/* File: mips/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
+    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    .extern MterpInvokeStaticRange
+    EXPORT_PC()
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    JAL(MterpInvokeStaticRange)
+    beqz    v0, MterpException
+    FETCH_ADVANCE_INST(3)
+    JAL(MterpShouldSwitchInterpreters)
+    bnez    v0, MterpFallback
+    GET_INST_OPCODE(t0)
+    GOTO_OPCODE(t0)
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface_range: /* 0x78 */
+/* File: mips/op_invoke_interface_range.S */
+/* File: mips/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
+    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    .extern MterpInvokeInterfaceRange
+    EXPORT_PC()
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    JAL(MterpInvokeInterfaceRange)
+    beqz    v0, MterpException
+    FETCH_ADVANCE_INST(3)
+    JAL(MterpShouldSwitchInterpreters)
+    bnez    v0, MterpFallback
+    GET_INST_OPCODE(t0)
+    GOTO_OPCODE(t0)
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_79: /* 0x79 */
+/* File: mips/op_unused_79.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_7a: /* 0x7a */
+/* File: mips/op_unused_7a.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_int: /* 0x7b */
+/* File: mips/op_neg_int.S */
+/* File: mips/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0".
+     * This could be a MIPS instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(t0)                           #  t0 <- A+
+    GET_VREG(a0, a3)                       #  a0 <- vB
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+                                  #  optional op
+    negu a0, a0                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
+    /* 9-10 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_int: /* 0x7c */
+/* File: mips/op_not_int.S */
+/* File: mips/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0".
+     * This could be a MIPS instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(t0)                           #  t0 <- A+
+    GET_VREG(a0, a3)                       #  a0 <- vB
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+                                  #  optional op
+    not a0, a0                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
+    /* 9-10 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_long: /* 0x7d */
+/* File: mips/op_neg_long.S */
+/* File: mips/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0/a1".
+     * This could be MIPS instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double,
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vAA
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    negu v0, a0                              #  optional op
+    negu v1, a1; sltu a0, zero, v0; subu v1, v1, a0                                 #  a0/a1 <- op, a2-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(v0, v1, rOBJ)   #  vAA <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 12-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_long: /* 0x7e */
+/* File: mips/op_not_long.S */
+/* File: mips/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0/a1".
+     * This could be MIPS instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double,
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vAA
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    not a0, a0                              #  optional op
+    not a1, a1                                 #  a0/a1 <- op, a2-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(a0, a1, rOBJ)   #  vAA <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 12-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_float: /* 0x7f */
+/* File: mips/op_neg_float.S */
+/* File: mips/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0".
+     * This could be a MIPS instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(t0)                           #  t0 <- A+
+    GET_VREG(a0, a3)                       #  a0 <- vB
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+                                  #  optional op
+    addu a0, a0, 0x80000000                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
+    /* 9-10 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_double: /* 0x80 */
+/* File: mips/op_neg_double.S */
+/* File: mips/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0/a1".
+     * This could be MIPS instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double,
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vAA
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+                                  #  optional op
+    addu a1, a1, 0x80000000                                 #  a0/a1 <- op, a2-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(a0, a1, rOBJ)   #  vAA <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 12-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_long: /* 0x81 */
+/* File: mips/op_int_to_long.S */
+/* File: mips/unopWider.S */
+    /*
+     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op a0", where
+     * "result" is a 64-bit quantity in a0/a1.
+     *
+     * For: int-to-long
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a0, a3)                       #  a0 <- vB
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+                                  #  optional op
+    sra a1, a0, 31                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(a0, a1, rOBJ)   #  vA/vA+1 <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_float: /* 0x82 */
+/* File: mips/op_int_to_float.S */
+/* File: mips/funop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0".
+     * This could be a MIPS instruction or a function call.
+     *
+     * for: int-to-float, float-to-int
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(rOBJ)                         #  t0 <- A+
+    GET_VREG_F(fa0, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    cvt.s.w fv0, fa0
+
+.Lop_int_to_float_set_vreg_f:
+    SET_VREG_F(fv0, rOBJ)
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GOTO_OPCODE(t1)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_double: /* 0x83 */
+/* File: mips/op_int_to_double.S */
+/* File: mips/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op a0", where
+     * "result" is a 64-bit quantity in a0/a1.
+     *
+     * For: int-to-double, float-to-long, float-to-double
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG_F(fa0, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    cvt.d.w fv0, fa0
+
+.Lop_int_to_double_set_vreg:
+    SET_VREG64_F(fv0, fv0f, rOBJ)                             #  vA/vA+1 <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_int: /* 0x84 */
+/* File: mips/op_long_to_int.S */
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+/* File: mips/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    GET_OPB(a1)                            #  a1 <- B from 15:12
+    GET_OPA4(a0)                           #  a0 <- A from 11:8
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    GET_VREG(a2, a1)                       #  a2 <- fp[B]
+    GET_INST_OPCODE(t0)                    #  t0 <- opcode from rINST
+    .if 0
+    SET_VREG_OBJECT(a2, a0)                #  fp[A] <- a2
+    .else
+    SET_VREG(a2, a0)                       #  fp[A] <- a2
+    .endif
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_float: /* 0x85 */
+/* File: mips/op_long_to_float.S */
+/* File: mips/unopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op a0/a1", where
+     * "result" is a 32-bit quantity in a0.
+     *
+     * For: long-to-float, double-to-int, double-to-float
+     * If hard floating point support is available, use fa0 as the parameter,
+     * except for long-to-float opcode.
+     * (This would work for long-to-int, but that instruction is actually
+     * an exact match for OP_MOVE.)
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(rOBJ)                         #  t1 <- A+
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64(rARG0, rARG1, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    JAL(__floatdisf)
+
+.Lop_long_to_float_set_vreg_f:
+    SET_VREG_F(fv0, rOBJ)                  #  vA <- result0
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_double: /* 0x86 */
+/* File: mips/op_long_to_double.S */
+/* File: mips/funopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0/a1".
+     * This could be a MIPS instruction or a function call.
+     *
+     * long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64(rARG0, rARG1, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+                                  #  optional op
+    JAL(__floatdidf)                                 #  a0/a1 <- op, a2-a3 changed
+
+.Lop_long_to_double_set_vreg:
+    SET_VREG64_F(fv0, fv0f, rOBJ)                             #  vAA <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 12-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_int: /* 0x87 */
+/* File: mips/op_float_to_int.S */
+/* File: mips/funop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0".
+     * This could be a MIPS instruction or a function call.
+     *
+     * for: int-to-float, float-to-int
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(rOBJ)                         #  t0 <- A+
+    GET_VREG_F(fa0, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    b f2i_doconv
+
+.Lop_float_to_int_set_vreg_f:
+    SET_VREG_F(fv0, rOBJ)
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GOTO_OPCODE(t1)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_long: /* 0x88 */
+/* File: mips/op_float_to_long.S */
+/* File: mips/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op a0", where
+     * "result" is a 64-bit quantity in a0/a1.
+     *
+     * For: int-to-double, float-to-long, float-to-double
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG_F(fa0, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    b f2l_doconv
+
+.Lop_float_to_long_set_vreg:
+    SET_VREG64(rRESULT0, rRESULT1, rOBJ)                             #  vA/vA+1 <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_double: /* 0x89 */
+/* File: mips/op_float_to_double.S */
+/* File: mips/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op a0", where
+     * "result" is a 64-bit quantity in a0/a1.
+     *
+     * For: int-to-double, float-to-long, float-to-double
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG_F(fa0, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    cvt.d.s fv0, fa0
+
+.Lop_float_to_double_set_vreg:
+    SET_VREG64_F(fv0, fv0f, rOBJ)                             #  vA/vA+1 <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_int: /* 0x8a */
+/* File: mips/op_double_to_int.S */
+/* File: mips/unopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op a0/a1", where
+     * "result" is a 32-bit quantity in a0.
+     *
+     * For: long-to-float, double-to-int, double-to-float
+     * If hard floating point support is available, use fa0 as the parameter,
+     * except for long-to-float opcode.
+     * (This would work for long-to-int, but that instruction is actually
+     * an exact match for OP_MOVE.)
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(rOBJ)                         #  t1 <- A+
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64_F(fa0, fa0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    b d2i_doconv
+
+.Lop_double_to_int_set_vreg_f:
+    SET_VREG_F(fv0, rOBJ)                  #  vA <- result0
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/*
+ * Convert the double in a0/a1 to an int in a0.
+ *
+ * We have to clip values to int min/max per the specification.  The
+ * expected common case is a "reasonable" value that converts directly
+ * to modest integer.  The EABI convert function isn't doing this for us.
+ */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_long: /* 0x8b */
+/* File: mips/op_double_to_long.S */
+/* File: mips/funopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0/a1".
+     * This could be a MIPS instruction or a function call.
+     *
+     * long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64_F(fa0, fa0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+                                  #  optional op
+    b d2l_doconv                                 #  a0/a1 <- op, a2-a3 changed
+
+.Lop_double_to_long_set_vreg:
+    SET_VREG64(rRESULT0, rRESULT1, rOBJ)                             #  vAA <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 12-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_float: /* 0x8c */
+/* File: mips/op_double_to_float.S */
+/* File: mips/unopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op a0/a1", where
+     * "result" is a 32-bit quantity in a0.
+     *
+     * For: long-to-float, double-to-int, double-to-float
+     * If hard floating point support is available, use fa0 as the parameter,
+     * except for long-to-float opcode.
+     * (This would work for long-to-int, but that instruction is actually
+     * an exact match for OP_MOVE.)
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(rOBJ)                         #  t1 <- A+
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64_F(fa0, fa0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    cvt.s.d fv0, fa0
+
+.Lop_double_to_float_set_vreg_f:
+    SET_VREG_F(fv0, rOBJ)                  #  vA <- result0
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_byte: /* 0x8d */
+/* File: mips/op_int_to_byte.S */
+/* File: mips/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0".
+     * This could be a MIPS instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(t0)                           #  t0 <- A+
+    GET_VREG(a0, a3)                       #  a0 <- vB
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    sll a0, a0, 24                              #  optional op
+    sra a0, a0, 24                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
+    /* 9-10 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_char: /* 0x8e */
+/* File: mips/op_int_to_char.S */
+/* File: mips/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0".
+     * This could be a MIPS instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(t0)                           #  t0 <- A+
+    GET_VREG(a0, a3)                       #  a0 <- vB
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+                                  #  optional op
+    and a0, 0xffff                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
+    /* 9-10 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_short: /* 0x8f */
+/* File: mips/op_int_to_short.S */
+/* File: mips/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op a0".
+     * This could be a MIPS instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(t0)                           #  t0 <- A+
+    GET_VREG(a0, a3)                       #  a0 <- vB
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    sll a0, 16                              #  optional op
+    sra a0, 16                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
+    /* 9-10 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int: /* 0x90 */
+/* File: mips/op_add_int.S */
+/* File: mips/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG(a1, a3)                       #  a1 <- vCC
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+                                  #  optional op
+    addu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int: /* 0x91 */
+/* File: mips/op_sub_int.S */
+/* File: mips/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG(a1, a3)                       #  a1 <- vCC
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+                                  #  optional op
+    subu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int: /* 0x92 */
+/* File: mips/op_mul_int.S */
+/* File: mips/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG(a1, a3)                       #  a1 <- vCC
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+                                  #  optional op
+    mul a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int: /* 0x93 */
+/* File: mips/op_div_int.S */
+#ifdef MIPS32REVGE6
+/* File: mips/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG(a1, a3)                       #  a1 <- vCC
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    .if 1
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+                                  #  optional op
+    div a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 11-14 instructions */
+
+#else
+/* File: mips/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG(a1, a3)                       #  a1 <- vCC
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    .if 1
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    div zero, a0, a1                              #  optional op
+    mflo a0                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 11-14 instructions */
+
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int: /* 0x94 */
+/* File: mips/op_rem_int.S */
+#ifdef MIPS32REVGE6
+/* File: mips/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG(a1, a3)                       #  a1 <- vCC
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    .if 1
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+                                  #  optional op
+    mod a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 11-14 instructions */
+
+#else
+/* File: mips/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG(a1, a3)                       #  a1 <- vCC
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    .if 1
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    div zero, a0, a1                              #  optional op
+    mfhi a0                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 11-14 instructions */
+
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int: /* 0x95 */
+/* File: mips/op_and_int.S */
+/* File: mips/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG(a1, a3)                       #  a1 <- vCC
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+                                  #  optional op
+    and a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int: /* 0x96 */
+/* File: mips/op_or_int.S */
+/* File: mips/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG(a1, a3)                       #  a1 <- vCC
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+                                  #  optional op
+    or a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int: /* 0x97 */
+/* File: mips/op_xor_int.S */
+/* File: mips/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG(a1, a3)                       #  a1 <- vCC
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+                                  #  optional op
+    xor a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int: /* 0x98 */
+/* File: mips/op_shl_int.S */
+/* File: mips/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG(a1, a3)                       #  a1 <- vCC
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+                                  #  optional op
+    sll a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int: /* 0x99 */
+/* File: mips/op_shr_int.S */
+/* File: mips/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG(a1, a3)                       #  a1 <- vCC
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+                                  #  optional op
+    sra a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int: /* 0x9a */
+/* File: mips/op_ushr_int.S */
+/* File: mips/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG(a1, a3)                       #  a1 <- vCC
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+                                  #  optional op
+    srl a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long: /* 0x9b */
+/* File: mips/op_add_long.S */
+/*
+ *  The compiler generates the following sequence for
+ *  [v1 v0] =  [a1 a0] + [a3 a2];
+ *    addu v0,a2,a0
+ *    addu a1,a3,a1
+ *    sltu v1,v0,a2
+ *    addu v1,v1,a1
+ */
+/* File: mips/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
+    EAS2(t1, rFP, a3)                      #  a3 <- &fp[CC]
+    LOAD64(a0, a1, a2)               #  a0/a1 <- vBB/vBB+1
+    LOAD64(a2, a3, t1)               #  a2/a3 <- vCC/vCC+1
+    .if 0
+    or        t0, a2, a3             #  second arg (a2-a3) is zero?
+    beqz      t0, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+    addu v0, a2, a0                              #  optional op
+    addu a1, a3, a1; sltu v1, v0, a2; addu v1, v1, a1                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vAA/vAA+1 <- v0/v1
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long: /* 0x9c */
+/* File: mips/op_sub_long.S */
+/*
+ * For little endian the code sequence looks as follows:
+ *    subu    v0,a0,a2
+ *    subu    v1,a1,a3
+ *    sltu    a0,a0,v0
+ *    subu    v1,v1,a0
+ */
+/* File: mips/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
+    EAS2(t1, rFP, a3)                      #  a3 <- &fp[CC]
+    LOAD64(a0, a1, a2)               #  a0/a1 <- vBB/vBB+1
+    LOAD64(a2, a3, t1)               #  a2/a3 <- vCC/vCC+1
+    .if 0
+    or        t0, a2, a3             #  second arg (a2-a3) is zero?
+    beqz      t0, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+    subu v0, a0, a2                              #  optional op
+    subu v1, a1, a3; sltu a0, a0, v0; subu v1, v1, a0                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vAA/vAA+1 <- v0/v1
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long: /* 0x9d */
+/* File: mips/op_mul_long.S */
+    /*
+     * Signed 64-bit integer multiply.
+     *         a1   a0
+     *   x     a3   a2
+     *   -------------
+     *       a2a1 a2a0
+     *       a3a0
+     *  a3a1 (<= unused)
+     *  ---------------
+     *         v1   v0
+     */
+    /* mul-long vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    and       t0, a0, 255                  #  a2 <- BB
+    srl       t1, a0, 8                    #  a3 <- CC
+    EAS2(t0, rFP, t0)                      #  t0 <- &fp[BB]
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vBB/vBB+1
+
+    EAS2(t1, rFP, t1)                      #  t0 <- &fp[CC]
+    LOAD64(a2, a3, t1)                     #  a2/a3 <- vCC/vCC+1
+
+    mul       v1, a3, a0                   #  v1= a3a0
+#ifdef MIPS32REVGE6
+    mulu      v0, a2, a0                   #  v0= a2a0
+    muhu      t1, a2, a0
+#else
+    multu     a2, a0
+    mfhi      t1
+    mflo      v0                           #  v0= a2a0
+#endif
+    mul       t0, a2, a1                   #  t0= a2a1
+    addu      v1, v1, t1                   #  v1+= hi(a2a0)
+    addu      v1, v1, t0                   #  v1= a3a0 + a2a1;
+
+    GET_OPA(a0)                            #  a0 <- AA
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    b         .Lop_mul_long_finish
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long: /* 0x9e */
+/* File: mips/op_div_long.S */
+/* File: mips/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
+    EAS2(t1, rFP, a3)                      #  a3 <- &fp[CC]
+    LOAD64(a0, a1, a2)               #  a0/a1 <- vBB/vBB+1
+    LOAD64(a2, a3, t1)               #  a2/a3 <- vCC/vCC+1
+    .if 1
+    or        t0, a2, a3             #  second arg (a2-a3) is zero?
+    beqz      t0, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    JAL(__divdi3)                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vAA/vAA+1 <- v0/v1
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long: /* 0x9f */
+/* File: mips/op_rem_long.S */
+/* File: mips/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
+    EAS2(t1, rFP, a3)                      #  a3 <- &fp[CC]
+    LOAD64(a0, a1, a2)               #  a0/a1 <- vBB/vBB+1
+    LOAD64(a2, a3, t1)               #  a2/a3 <- vCC/vCC+1
+    .if 1
+    or        t0, a2, a3             #  second arg (a2-a3) is zero?
+    beqz      t0, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    JAL(__moddi3)                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vAA/vAA+1 <- v0/v1
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long: /* 0xa0 */
+/* File: mips/op_and_long.S */
+/* File: mips/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
+    EAS2(t1, rFP, a3)                      #  a3 <- &fp[CC]
+    LOAD64(a0, a1, a2)               #  a0/a1 <- vBB/vBB+1
+    LOAD64(a2, a3, t1)               #  a2/a3 <- vCC/vCC+1
+    .if 0
+    or        t0, a2, a3             #  second arg (a2-a3) is zero?
+    beqz      t0, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+    and a0, a0, a2                              #  optional op
+    and a1, a1, a3                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vAA/vAA+1 <- a0/a1
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long: /* 0xa1 */
+/* File: mips/op_or_long.S */
+/* File: mips/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
+    EAS2(t1, rFP, a3)                      #  a3 <- &fp[CC]
+    LOAD64(a0, a1, a2)               #  a0/a1 <- vBB/vBB+1
+    LOAD64(a2, a3, t1)               #  a2/a3 <- vCC/vCC+1
+    .if 0
+    or        t0, a2, a3             #  second arg (a2-a3) is zero?
+    beqz      t0, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+    or a0, a0, a2                              #  optional op
+    or a1, a1, a3                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vAA/vAA+1 <- a0/a1
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long: /* 0xa2 */
+/* File: mips/op_xor_long.S */
+/* File: mips/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
+    EAS2(t1, rFP, a3)                      #  a3 <- &fp[CC]
+    LOAD64(a0, a1, a2)               #  a0/a1 <- vBB/vBB+1
+    LOAD64(a2, a3, t1)               #  a2/a3 <- vCC/vCC+1
+    .if 0
+    or        t0, a2, a3             #  second arg (a2-a3) is zero?
+    beqz      t0, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+    xor a0, a0, a2                              #  optional op
+    xor a1, a1, a3                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vAA/vAA+1 <- a0/a1
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long: /* 0xa3 */
+/* File: mips/op_shl_long.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* shl-long vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(t2)                            #  t2 <- AA
+    and       a3, a0, 255                  #  a3 <- BB
+    srl       a0, a0, 8                    #  a0 <- CC
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[BB]
+    GET_VREG(a2, a0)                       #  a2 <- vCC
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vBB/vBB+1
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+
+    andi    v1, a2, 0x20                   #  shift< shift & 0x20
+    sll     v0, a0, a2                     #  rlo<- alo << (shift&31)
+    bnez    v1, .Lop_shl_long_finish
+    not     v1, a2                         #  rhi<- 31-shift  (shift is 5b)
+    srl     a0, 1
+    srl     a0, v1                         #  alo<- alo >> (32-(shift&31))
+    sll     v1, a1, a2                     #  rhi<- ahi << (shift&31)
+    or      v1, a0                         #  rhi<- rhi | alo
+    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- a0/a1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long: /* 0xa4 */
+/* File: mips/op_shr_long.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* shr-long vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(t3)                            #  t3 <- AA
+    and       a3, a0, 255                  #  a3 <- BB
+    srl       a0, a0, 8                    #  a0 <- CC
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[BB]
+    GET_VREG(a2, a0)                       #  a2 <- vCC
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vBB/vBB+1
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+
+    andi    v0, a2, 0x20                   #  shift & 0x20
+    sra     v1, a1, a2                     #  rhi<- ahi >> (shift&31)
+    bnez    v0, .Lop_shr_long_finish
+    srl     v0, a0, a2                     #  rlo<- alo >> (shift&31)
+    not     a0, a2                         #  alo<- 31-shift (shift is 5b)
+    sll     a1, 1
+    sll     a1, a0                         #  ahi<- ahi << (32-(shift&31))
+    or      v0, a1                         #  rlo<- rlo | ahi
+    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/VAA+1 <- v0/v0
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long: /* 0xa5 */
+/* File: mips/op_ushr_long.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* ushr-long vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a3, a0, 255                  #  a3 <- BB
+    srl       a0, a0, 8                    #  a0 <- CC
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[BB]
+    GET_VREG(a2, a0)                       #  a2 <- vCC
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vBB/vBB+1
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+
+    andi      v0, a2, 0x20                 #  shift & 0x20
+    srl       v1, a1, a2                   #  rhi<- ahi >> (shift&31)
+    bnez      v0, .Lop_ushr_long_finish
+    srl       v0, a0, a2                   #  rlo<- alo >> (shift&31)
+    not       a0, a2                       #  alo<- 31-n  (shift is 5b)
+    sll       a1, 1
+    sll       a1, a0                       #  ahi<- ahi << (32-(shift&31))
+    or        v0, a1                       #  rlo<- rlo | ahi
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)      #  vAA/vAA+1 <- v0/v1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float: /* 0xa6 */
+/* File: mips/op_add_float.S */
+/* File: mips/fbinop.S */
+    /*
+     * Generic 32-bit binary float operation.
+     *
+     * For: add-fp, sub-fp, mul-fp, div-fp, rem-fp
+     */
+
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  s5 <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG_F(fa1, a3)                    #  a1 <- vCC
+    GET_VREG_F(fa0, a2)                    #  a0 <- vBB
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    add.s fv0, fa0, fa1                                 #  f0 = result
+    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float: /* 0xa7 */
+/* File: mips/op_sub_float.S */
+/* File: mips/fbinop.S */
+    /*
+     * Generic 32-bit binary float operation.
+     *
+     * For: add-fp, sub-fp, mul-fp, div-fp, rem-fp
+     */
+
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  s5 <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG_F(fa1, a3)                    #  a1 <- vCC
+    GET_VREG_F(fa0, a2)                    #  a0 <- vBB
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    sub.s fv0, fa0, fa1                                 #  f0 = result
+    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float: /* 0xa8 */
+/* File: mips/op_mul_float.S */
+/* File: mips/fbinop.S */
+    /*
+     * Generic 32-bit binary float operation.
+     *
+     * For: add-fp, sub-fp, mul-fp, div-fp, rem-fp
+     */
+
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  s5 <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG_F(fa1, a3)                    #  a1 <- vCC
+    GET_VREG_F(fa0, a2)                    #  a0 <- vBB
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    mul.s fv0, fa0, fa1                                 #  f0 = result
+    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float: /* 0xa9 */
+/* File: mips/op_div_float.S */
+/* File: mips/fbinop.S */
+    /*
+     * Generic 32-bit binary float operation.
+     *
+     * For: add-fp, sub-fp, mul-fp, div-fp, rem-fp
+     */
+
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  s5 <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG_F(fa1, a3)                    #  a1 <- vCC
+    GET_VREG_F(fa0, a2)                    #  a0 <- vBB
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    div.s fv0, fa0, fa1                                 #  f0 = result
+    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float: /* 0xaa */
+/* File: mips/op_rem_float.S */
+/* File: mips/fbinop.S */
+    /*
+     * Generic 32-bit binary float operation.
+     *
+     * For: add-fp, sub-fp, mul-fp, div-fp, rem-fp
+     */
+
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  s5 <- AA
+    srl       a3, a0, 8                    #  a3 <- CC
+    and       a2, a0, 255                  #  a2 <- BB
+    GET_VREG_F(fa1, a3)                    #  a1 <- vCC
+    GET_VREG_F(fa0, a2)                    #  a0 <- vBB
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    JAL(fmodf)                                 #  f0 = result
+    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double: /* 0xab */
+/* File: mips/op_add_double.S */
+/* File: mips/fbinopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be an MIPS instruction or a function call.
+     *
+     * for: add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  s5 <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
+    EAS2(t1, rFP, a3)                      #  a3 <- &fp[CC]
+    LOAD64_F(fa0, fa0f, a2)
+    LOAD64_F(fa1, fa1f, t1)
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    add.d fv0, fa0, fa1
+    SET_VREG64_F(fv0, fv0f, rOBJ)
+    b         .Lop_add_double_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double: /* 0xac */
+/* File: mips/op_sub_double.S */
+/* File: mips/fbinopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be an MIPS instruction or a function call.
+     *
+     * for: add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  s5 <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
+    EAS2(t1, rFP, a3)                      #  a3 <- &fp[CC]
+    LOAD64_F(fa0, fa0f, a2)
+    LOAD64_F(fa1, fa1f, t1)
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    sub.d fv0, fa0, fa1
+    SET_VREG64_F(fv0, fv0f, rOBJ)
+    b         .Lop_sub_double_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double: /* 0xad */
+/* File: mips/op_mul_double.S */
+/* File: mips/fbinopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be an MIPS instruction or a function call.
+     *
+     * for: add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  s5 <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
+    EAS2(t1, rFP, a3)                      #  a3 <- &fp[CC]
+    LOAD64_F(fa0, fa0f, a2)
+    LOAD64_F(fa1, fa1f, t1)
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    mul.d fv0, fa0, fa1
+    SET_VREG64_F(fv0, fv0f, rOBJ)
+    b         .Lop_mul_double_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double: /* 0xae */
+/* File: mips/op_div_double.S */
+/* File: mips/fbinopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be an MIPS instruction or a function call.
+     *
+     * for: add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  s5 <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
+    EAS2(t1, rFP, a3)                      #  a3 <- &fp[CC]
+    LOAD64_F(fa0, fa0f, a2)
+    LOAD64_F(fa1, fa1f, t1)
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    div.d fv0, fa0, fa1
+    SET_VREG64_F(fv0, fv0f, rOBJ)
+    b         .Lop_div_double_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double: /* 0xaf */
+/* File: mips/op_rem_double.S */
+/* File: mips/fbinopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be an MIPS instruction or a function call.
+     *
+     * for: add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH(a0, 1)                           #  a0 <- CCBB
+    GET_OPA(rOBJ)                          #  s5 <- AA
+    and       a2, a0, 255                  #  a2 <- BB
+    srl       a3, a0, 8                    #  a3 <- CC
+    EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
+    EAS2(t1, rFP, a3)                      #  a3 <- &fp[CC]
+    LOAD64_F(fa0, fa0f, a2)
+    LOAD64_F(fa1, fa1f, t1)
+
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    JAL(fmod)
+    SET_VREG64_F(fv0, fv0f, rOBJ)
+    b         .Lop_rem_double_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_2addr: /* 0xb0 */
+/* File: mips/op_add_int_2addr.S */
+/* File: mips/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a0, rOBJ)                     #  a0 <- vA
+    GET_VREG(a1, a3)                       #  a1 <- vB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    addu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int_2addr: /* 0xb1 */
+/* File: mips/op_sub_int_2addr.S */
+/* File: mips/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a0, rOBJ)                     #  a0 <- vA
+    GET_VREG(a1, a3)                       #  a1 <- vB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    subu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_2addr: /* 0xb2 */
+/* File: mips/op_mul_int_2addr.S */
+/* File: mips/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a0, rOBJ)                     #  a0 <- vA
+    GET_VREG(a1, a3)                       #  a1 <- vB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    mul a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_2addr: /* 0xb3 */
+/* File: mips/op_div_int_2addr.S */
+#ifdef MIPS32REVGE6
+/* File: mips/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a0, rOBJ)                     #  a0 <- vA
+    GET_VREG(a1, a3)                       #  a1 <- vB
+    .if 1
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    div a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+#else
+/* File: mips/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a0, rOBJ)                     #  a0 <- vA
+    GET_VREG(a1, a3)                       #  a1 <- vB
+    .if 1
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+    div zero, a0, a1                              #  optional op
+    mflo a0                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_2addr: /* 0xb4 */
+/* File: mips/op_rem_int_2addr.S */
+#ifdef MIPS32REVGE6
+/* File: mips/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a0, rOBJ)                     #  a0 <- vA
+    GET_VREG(a1, a3)                       #  a1 <- vB
+    .if 1
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    mod a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+#else
+/* File: mips/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a0, rOBJ)                     #  a0 <- vA
+    GET_VREG(a1, a3)                       #  a1 <- vB
+    .if 1
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+    div zero, a0, a1                              #  optional op
+    mfhi a0                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_2addr: /* 0xb5 */
+/* File: mips/op_and_int_2addr.S */
+/* File: mips/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a0, rOBJ)                     #  a0 <- vA
+    GET_VREG(a1, a3)                       #  a1 <- vB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    and a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_2addr: /* 0xb6 */
+/* File: mips/op_or_int_2addr.S */
+/* File: mips/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a0, rOBJ)                     #  a0 <- vA
+    GET_VREG(a1, a3)                       #  a1 <- vB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    or a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_2addr: /* 0xb7 */
+/* File: mips/op_xor_int_2addr.S */
+/* File: mips/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a0, rOBJ)                     #  a0 <- vA
+    GET_VREG(a1, a3)                       #  a1 <- vB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    xor a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_2addr: /* 0xb8 */
+/* File: mips/op_shl_int_2addr.S */
+/* File: mips/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a0, rOBJ)                     #  a0 <- vA
+    GET_VREG(a1, a3)                       #  a1 <- vB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    sll a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_2addr: /* 0xb9 */
+/* File: mips/op_shr_int_2addr.S */
+/* File: mips/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a0, rOBJ)                     #  a0 <- vA
+    GET_VREG(a1, a3)                       #  a1 <- vB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    sra a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_2addr: /* 0xba */
+/* File: mips/op_ushr_int_2addr.S */
+/* File: mips/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a0, rOBJ)                     #  a0 <- vA
+    GET_VREG(a1, a3)                       #  a1 <- vB
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    srl a0, a0, a1                                  #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long_2addr: /* 0xbb */
+/* File: mips/op_add_long_2addr.S */
+/*
+ * See op_add_long.S for details
+ */
+/* File: mips/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
+    EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    .if 0
+    or        t0, a2, a3             #  second arg (a2-a3) is zero?
+    beqz      t0, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+    addu v0, a2, a0                              #  optional op
+    addu a1, a3, a1; sltu v1, v0, a2; addu v1, v1, a1                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(v0, v1, rOBJ)   #  vAA/vAA+1 <- v0/v1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long_2addr: /* 0xbc */
+/* File: mips/op_sub_long_2addr.S */
+/*
+ * See op_sub_long.S for more details
+ */
+/* File: mips/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
+    EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    .if 0
+    or        t0, a2, a3             #  second arg (a2-a3) is zero?
+    beqz      t0, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+    subu v0, a0, a2                              #  optional op
+    subu v1, a1, a3; sltu a0, a0, v0; subu v1, v1, a0                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(v0, v1, rOBJ)   #  vAA/vAA+1 <- v0/v1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long_2addr: /* 0xbd */
+/* File: mips/op_mul_long_2addr.S */
+    /*
+     * See op_mul_long.S for more details
+     */
+    /* mul-long/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+
+    EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
+    LOAD64(a0, a1, t0)                     #  vAA.low / high
+
+    GET_OPB(t1)                            #  t1 <- B
+    EAS2(t1, rFP, t1)                      #  t1 <- &fp[B]
+    LOAD64(a2, a3, t1)                     #  vBB.low / high
+
+    mul       v1, a3, a0                   #  v1= a3a0
+#ifdef MIPS32REVGE6
+    mulu      v0, a2, a0                   #  v0= a2a0
+    muhu      t1, a2, a0
+#else
+    multu     a2, a0
+    mfhi      t1
+    mflo      v0                           #  v0= a2a0
+ #endif
+    mul       t2, a2, a1                   #  t2= a2a1
+    addu      v1, v1, t1                   #  v1= a3a0 + hi(a2a0)
+    addu      v1, v1, t2                   #  v1= v1 + a2a1;
+
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    # vAA <- v0 (low)
+    SET_VREG64(v0, v1, rOBJ)               #  vAA+1 <- v1 (high)
+    GOTO_OPCODE(t1)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long_2addr: /* 0xbe */
+/* File: mips/op_div_long_2addr.S */
+/* File: mips/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
+    EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    .if 1
+    or        t0, a2, a3             #  second arg (a2-a3) is zero?
+    beqz      t0, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    JAL(__divdi3)                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(v0, v1, rOBJ)   #  vAA/vAA+1 <- v0/v1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long_2addr: /* 0xbf */
+/* File: mips/op_rem_long_2addr.S */
+/* File: mips/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
+    EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    .if 1
+    or        t0, a2, a3             #  second arg (a2-a3) is zero?
+    beqz      t0, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    JAL(__moddi3)                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(v0, v1, rOBJ)   #  vAA/vAA+1 <- v0/v1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long_2addr: /* 0xc0 */
+/* File: mips/op_and_long_2addr.S */
+/* File: mips/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
+    EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    .if 0
+    or        t0, a2, a3             #  second arg (a2-a3) is zero?
+    beqz      t0, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+    and a0, a0, a2                              #  optional op
+    and a1, a1, a3                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(a0, a1, rOBJ)   #  vAA/vAA+1 <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long_2addr: /* 0xc1 */
+/* File: mips/op_or_long_2addr.S */
+/* File: mips/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
+    EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    .if 0
+    or        t0, a2, a3             #  second arg (a2-a3) is zero?
+    beqz      t0, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+    or a0, a0, a2                              #  optional op
+    or a1, a1, a3                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(a0, a1, rOBJ)   #  vAA/vAA+1 <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long_2addr: /* 0xc2 */
+/* File: mips/op_xor_long_2addr.S */
+/* File: mips/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
+    EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    .if 0
+    or        t0, a2, a3             #  second arg (a2-a3) is zero?
+    beqz      t0, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+    xor a0, a0, a2                              #  optional op
+    xor a1, a1, a3                                 #  result <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(a0, a1, rOBJ)   #  vAA/vAA+1 <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long_2addr: /* 0xc3 */
+/* File: mips/op_shl_long_2addr.S */
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* shl-long/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a2, a3)                       #  a2 <- vB
+    EAS2(t2, rFP, rOBJ)                    #  t2 <- &fp[A]
+    LOAD64(a0, a1, t2)                     #  a0/a1 <- vAA/vAA+1
+
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+
+    andi    v1, a2, 0x20                   #  shift< shift & 0x20
+    sll     v0, a0, a2                     #  rlo<- alo << (shift&31)
+    bnez    v1, .Lop_shl_long_2addr_finish
+    not     v1, a2                         #  rhi<- 31-shift  (shift is 5b)
+    srl     a0, 1
+    srl     a0, v1                         #  alo<- alo >> (32-(shift&31))
+    sll     v1, a1, a2                     #  rhi<- ahi << (shift&31)
+    or      v1, a0                         #  rhi<- rhi | alo
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)      #  vAA/vAA+1 <- a0/a1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long_2addr: /* 0xc4 */
+/* File: mips/op_shr_long_2addr.S */
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* shr-long/2addr vA, vB */
+    GET_OPA4(t2)                           #  t2 <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a2, a3)                       #  a2 <- vB
+    EAS2(t0, rFP, t2)                      #  t0 <- &fp[A]
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vAA/vAA+1
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+
+    andi    v0, a2, 0x20                   #  shift & 0x20
+    sra     v1, a1, a2                     #  rhi<- ahi >> (shift&31)
+    bnez    v0, .Lop_shr_long_2addr_finish
+    srl     v0, a0, a2                     #  rlo<- alo >> (shift&31)
+    not     a0, a2                         #  alo<- 31-shift (shift is 5b)
+    sll     a1, 1
+    sll     a1, a0                         #  ahi<- ahi << (32-(shift&31))
+    or      v0, a1                         #  rlo<- rlo | ahi
+    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- a0/a1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long_2addr: /* 0xc5 */
+/* File: mips/op_ushr_long_2addr.S */
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* ushr-long/2addr vA, vB */
+    GET_OPA4(t3)                           #  t3 <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG(a2, a3)                       #  a2 <- vB
+    EAS2(t0, rFP, t3)                      #  t0 <- &fp[A]
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vAA/vAA+1
+
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+
+    andi      v0, a2, 0x20                 #  shift & 0x20
+    srl       v1, a1, a2                   #  rhi<- ahi >> (shift&31)
+    bnez      v0, .Lop_ushr_long_2addr_finish
+    srl       v0, a0, a2                   #  rlo<- alo >> (shift&31)
+    not       a0, a2                       #  alo<- 31-n  (shift is 5b)
+    sll       a1, 1
+    sll       a1, a0                       #  ahi<- ahi << (32-(shift&31))
+    or        v0, a1                       #  rlo<- rlo | ahi
+    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/vAA+1 <- a0/a1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float_2addr: /* 0xc6 */
+/* File: mips/op_add_float_2addr.S */
+/* File: mips/fbinop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
+     * div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG_F(fa0, rOBJ)
+    GET_VREG_F(fa1, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+    add.s fv0, fa0, fa1
+    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float_2addr: /* 0xc7 */
+/* File: mips/op_sub_float_2addr.S */
+/* File: mips/fbinop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
+     * div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG_F(fa0, rOBJ)
+    GET_VREG_F(fa1, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+    sub.s fv0, fa0, fa1
+    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float_2addr: /* 0xc8 */
+/* File: mips/op_mul_float_2addr.S */
+/* File: mips/fbinop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
+     * div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG_F(fa0, rOBJ)
+    GET_VREG_F(fa1, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+    mul.s fv0, fa0, fa1
+    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float_2addr: /* 0xc9 */
+/* File: mips/op_div_float_2addr.S */
+/* File: mips/fbinop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
+     * div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG_F(fa0, rOBJ)
+    GET_VREG_F(fa1, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+    div.s fv0, fa0, fa1
+    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float_2addr: /* 0xca */
+/* File: mips/op_rem_float_2addr.S */
+/* File: mips/fbinop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
+     * div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG_F(fa0, rOBJ)
+    GET_VREG_F(fa1, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+    JAL(fmodf)
+    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double_2addr: /* 0xcb */
+/* File: mips/op_add_double_2addr.S */
+/* File: mips/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be an MIPS instruction or a function call.
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *  div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
+    EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
+    LOAD64_F(fa0, fa0f, t0)
+    LOAD64_F(fa1, fa1f, a1)
+
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    add.d fv0, fa0, fa1
+    SET_VREG64_F(fv0, fv0f, rOBJ)
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double_2addr: /* 0xcc */
+/* File: mips/op_sub_double_2addr.S */
+/* File: mips/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be an MIPS instruction or a function call.
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *  div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
+    EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
+    LOAD64_F(fa0, fa0f, t0)
+    LOAD64_F(fa1, fa1f, a1)
+
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    sub.d fv0, fa0, fa1
+    SET_VREG64_F(fv0, fv0f, rOBJ)
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double_2addr: /* 0xcd */
+/* File: mips/op_mul_double_2addr.S */
+/* File: mips/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be an MIPS instruction or a function call.
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *  div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
+    EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
+    LOAD64_F(fa0, fa0f, t0)
+    LOAD64_F(fa1, fa1f, a1)
+
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    mul.d fv0, fa0, fa1
+    SET_VREG64_F(fv0, fv0f, rOBJ)
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double_2addr: /* 0xce */
+/* File: mips/op_div_double_2addr.S */
+/* File: mips/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be an MIPS instruction or a function call.
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *  div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
+    EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
+    LOAD64_F(fa0, fa0f, t0)
+    LOAD64_F(fa1, fa1f, a1)
+
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    div.d fv0, fa0, fa1
+    SET_VREG64_F(fv0, fv0f, rOBJ)
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double_2addr: /* 0xcf */
+/* File: mips/op_rem_double_2addr.S */
+/* File: mips/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * This could be an MIPS instruction or a function call.
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *  div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a1)                            #  a1 <- B
+    EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
+    EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
+    LOAD64_F(fa0, fa0f, t0)
+    LOAD64_F(fa1, fa1f, a1)
+
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    JAL(fmod)
+    SET_VREG64_F(fv0, fv0f, rOBJ)
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit16: /* 0xd0 */
+/* File: mips/op_add_int_lit16.S */
+/* File: mips/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    # binop/lit16 vA, vB,                  /* +CCCC */
+    FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
+    GET_OPB(a2)                            #  a2 <- B
+    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_VREG(a0, a2)                       #  a0 <- vB
+    and       rOBJ, rOBJ, 15
+    .if 0
+    # cmp a1, 0; is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    addu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int: /* 0xd1 */
+/* File: mips/op_rsub_int.S */
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+/* File: mips/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    # binop/lit16 vA, vB,                  /* +CCCC */
+    FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
+    GET_OPB(a2)                            #  a2 <- B
+    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_VREG(a0, a2)                       #  a0 <- vB
+    and       rOBJ, rOBJ, 15
+    .if 0
+    # cmp a1, 0; is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    subu a0, a1, a0                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit16: /* 0xd2 */
+/* File: mips/op_mul_int_lit16.S */
+/* File: mips/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    # binop/lit16 vA, vB,                  /* +CCCC */
+    FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
+    GET_OPB(a2)                            #  a2 <- B
+    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_VREG(a0, a2)                       #  a0 <- vB
+    and       rOBJ, rOBJ, 15
+    .if 0
+    # cmp a1, 0; is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    mul a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit16: /* 0xd3 */
+/* File: mips/op_div_int_lit16.S */
+#ifdef MIPS32REVGE6
+/* File: mips/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    # binop/lit16 vA, vB,                  /* +CCCC */
+    FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
+    GET_OPB(a2)                            #  a2 <- B
+    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_VREG(a0, a2)                       #  a0 <- vB
+    and       rOBJ, rOBJ, 15
+    .if 1
+    # cmp a1, 0; is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    div a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+#else
+/* File: mips/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    # binop/lit16 vA, vB,                  /* +CCCC */
+    FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
+    GET_OPB(a2)                            #  a2 <- B
+    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_VREG(a0, a2)                       #  a0 <- vB
+    and       rOBJ, rOBJ, 15
+    .if 1
+    # cmp a1, 0; is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+    div zero, a0, a1                              #  optional op
+    mflo a0                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit16: /* 0xd4 */
+/* File: mips/op_rem_int_lit16.S */
+#ifdef MIPS32REVGE6
+/* File: mips/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    # binop/lit16 vA, vB,                  /* +CCCC */
+    FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
+    GET_OPB(a2)                            #  a2 <- B
+    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_VREG(a0, a2)                       #  a0 <- vB
+    and       rOBJ, rOBJ, 15
+    .if 1
+    # cmp a1, 0; is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    mod a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+#else
+/* File: mips/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    # binop/lit16 vA, vB,                  /* +CCCC */
+    FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
+    GET_OPB(a2)                            #  a2 <- B
+    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_VREG(a0, a2)                       #  a0 <- vB
+    and       rOBJ, rOBJ, 15
+    .if 1
+    # cmp a1, 0; is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+    div zero, a0, a1                              #  optional op
+    mfhi a0                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit16: /* 0xd5 */
+/* File: mips/op_and_int_lit16.S */
+/* File: mips/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    # binop/lit16 vA, vB,                  /* +CCCC */
+    FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
+    GET_OPB(a2)                            #  a2 <- B
+    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_VREG(a0, a2)                       #  a0 <- vB
+    and       rOBJ, rOBJ, 15
+    .if 0
+    # cmp a1, 0; is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    and a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit16: /* 0xd6 */
+/* File: mips/op_or_int_lit16.S */
+/* File: mips/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    # binop/lit16 vA, vB,                  /* +CCCC */
+    FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
+    GET_OPB(a2)                            #  a2 <- B
+    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_VREG(a0, a2)                       #  a0 <- vB
+    and       rOBJ, rOBJ, 15
+    .if 0
+    # cmp a1, 0; is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    or a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit16: /* 0xd7 */
+/* File: mips/op_xor_int_lit16.S */
+/* File: mips/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    # binop/lit16 vA, vB,                  /* +CCCC */
+    FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
+    GET_OPB(a2)                            #  a2 <- B
+    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_VREG(a0, a2)                       #  a0 <- vB
+    and       rOBJ, rOBJ, 15
+    .if 0
+    # cmp a1, 0; is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    xor a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit8: /* 0xd8 */
+/* File: mips/op_add_int_lit8.S */
+/* File: mips/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    # binop/lit8 vAA, vBB,                 /* +CC */
+    FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a3, 255                  #  a2 <- BB
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    sra       a1, a3, 8                    #  a1 <- ssssssCC (sign extended)
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    addu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int_lit8: /* 0xd9 */
+/* File: mips/op_rsub_int_lit8.S */
+/* File: mips/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    # binop/lit8 vAA, vBB,                 /* +CC */
+    FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a3, 255                  #  a2 <- BB
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    sra       a1, a3, 8                    #  a1 <- ssssssCC (sign extended)
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    subu a0, a1, a0                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit8: /* 0xda */
+/* File: mips/op_mul_int_lit8.S */
+/* File: mips/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    # binop/lit8 vAA, vBB,                 /* +CC */
+    FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a3, 255                  #  a2 <- BB
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    sra       a1, a3, 8                    #  a1 <- ssssssCC (sign extended)
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    mul a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit8: /* 0xdb */
+/* File: mips/op_div_int_lit8.S */
+#ifdef MIPS32REVGE6
+/* File: mips/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    # binop/lit8 vAA, vBB,                 /* +CC */
+    FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a3, 255                  #  a2 <- BB
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    sra       a1, a3, 8                    #  a1 <- ssssssCC (sign extended)
+    .if 1
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    div a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-12 instructions */
+
+#else
+/* File: mips/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    # binop/lit8 vAA, vBB,                 /* +CC */
+    FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a3, 255                  #  a2 <- BB
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    sra       a1, a3, 8                    #  a1 <- ssssssCC (sign extended)
+    .if 1
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+    div zero, a0, a1                              #  optional op
+    mflo a0                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-12 instructions */
+
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit8: /* 0xdc */
+/* File: mips/op_rem_int_lit8.S */
+#ifdef MIPS32REVGE6
+/* File: mips/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    # binop/lit8 vAA, vBB,                 /* +CC */
+    FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a3, 255                  #  a2 <- BB
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    sra       a1, a3, 8                    #  a1 <- ssssssCC (sign extended)
+    .if 1
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    mod a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-12 instructions */
+
+#else
+/* File: mips/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    # binop/lit8 vAA, vBB,                 /* +CC */
+    FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a3, 255                  #  a2 <- BB
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    sra       a1, a3, 8                    #  a1 <- ssssssCC (sign extended)
+    .if 1
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+    div zero, a0, a1                              #  optional op
+    mfhi a0                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-12 instructions */
+
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit8: /* 0xdd */
+/* File: mips/op_and_int_lit8.S */
+/* File: mips/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    # binop/lit8 vAA, vBB,                 /* +CC */
+    FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a3, 255                  #  a2 <- BB
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    sra       a1, a3, 8                    #  a1 <- ssssssCC (sign extended)
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    and a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit8: /* 0xde */
+/* File: mips/op_or_int_lit8.S */
+/* File: mips/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    # binop/lit8 vAA, vBB,                 /* +CC */
+    FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a3, 255                  #  a2 <- BB
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    sra       a1, a3, 8                    #  a1 <- ssssssCC (sign extended)
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    or a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit8: /* 0xdf */
+/* File: mips/op_xor_int_lit8.S */
+/* File: mips/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    # binop/lit8 vAA, vBB,                 /* +CC */
+    FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a3, 255                  #  a2 <- BB
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    sra       a1, a3, 8                    #  a1 <- ssssssCC (sign extended)
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    xor a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_lit8: /* 0xe0 */
+/* File: mips/op_shl_int_lit8.S */
+/* File: mips/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    # binop/lit8 vAA, vBB,                 /* +CC */
+    FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a3, 255                  #  a2 <- BB
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    sra       a1, a3, 8                    #  a1 <- ssssssCC (sign extended)
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    sll a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_lit8: /* 0xe1 */
+/* File: mips/op_shr_int_lit8.S */
+/* File: mips/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    # binop/lit8 vAA, vBB,                 /* +CC */
+    FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a3, 255                  #  a2 <- BB
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    sra       a1, a3, 8                    #  a1 <- ssssssCC (sign extended)
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    sra a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_lit8: /* 0xe2 */
+/* File: mips/op_ushr_int_lit8.S */
+/* File: mips/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    # binop/lit8 vAA, vBB,                 /* +CC */
+    FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
+    and       a2, a3, 255                  #  a2 <- BB
+    GET_VREG(a0, a2)                       #  a0 <- vBB
+    sra       a1, a3, 8                    #  a1 <- ssssssCC (sign extended)
+    .if 0
+    # is second operand zero?
+    beqz      a1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+
+                                  #  optional op
+    srl a0, a0, a1                                 #  a0 <- op, a0-a3 changed
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_quick: /* 0xe3 */
+/* File: mips/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    # op vA, vB, offset                    /* CCCC */
+    GET_OPB(a2)                            #  a2 <- B
+    GET_VREG(a3, a2)                       #  a3 <- object we're operating on
+    FETCH(a1, 1)                           #  a1 <- field byte offset
+    GET_OPA4(a2)                           #  a2 <- A(+)
+    # check object for null
+    beqz      a3, common_errNullObject     #  object was null
+    addu      t0, a3, a1
+    lw     a0, 0(t0)                    #  a0 <- obj.field (8/16/32 bits)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, a2, t0)              #  fp[A] <- a0
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide_quick: /* 0xe4 */
+/* File: mips/op_iget_wide_quick.S */
+    # iget-wide-quick vA, vB, offset       /* CCCC */
+    GET_OPB(a2)                            #  a2 <- B
+    GET_VREG(a3, a2)                       #  a3 <- object we're operating on
+    FETCH(a1, 1)                           #  a1 <- field byte offset
+    GET_OPA4(a2)                           #  a2 <- A(+)
+    # check object for null
+    beqz      a3, common_errNullObject     #  object was null
+    addu      t0, a3, a1                   #  t0 <- a3 + a1
+    LOAD64(a0, a1, t0)                     #  a0 <- obj.field (64 bits, aligned)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(a0, a1, a2)                 #  fp[A] <- a0/a1
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object_quick: /* 0xe5 */
+/* File: mips/op_iget_object_quick.S */
+    /* For: iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    GET_OPB(a2)                            #  a2 <- B
+    FETCH(a1, 1)                           #  a1 <- field byte offset
+    EXPORT_PC()
+    GET_VREG(a0, a2)                       #  a0 <- object we're operating on
+    JAL(artIGetObjectFromMterp)            #  v0 <- GetObj(obj, offset)
+    lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    GET_OPA4(a2)                           #  a2<- A+
+    PREFETCH_INST(2)                       #  load rINST
+    bnez a3, MterpPossibleException        #  bail out
+    SET_VREG_OBJECT(v0, a2)                #  fp[A] <- v0
+    ADVANCE(2)                             #  advance rPC
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_quick: /* 0xe6 */
+/* File: mips/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    # op vA, vB, offset                    /* CCCC */
+    GET_OPB(a2)                            #  a2 <- B
+    GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
+    FETCH(a1, 1)                           #  a1 <- field byte offset
+    GET_OPA4(a2)                           #  a2 <- A(+)
+    beqz      a3, common_errNullObject     #  object was null
+    GET_VREG(a0, a2)                       #  a0 <- fp[A]
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    addu      t0, a3, a1
+    sw    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide_quick: /* 0xe7 */
+/* File: mips/op_iput_wide_quick.S */
+    # iput-wide-quick vA, vB, offset       /* CCCC */
+    GET_OPA4(a0)                           #  a0 <- A(+)
+    GET_OPB(a1)                            #  a1 <- B
+    GET_VREG(a2, a1)                       #  a2 <- fp[B], the object pointer
+    # check object for null
+    beqz      a2, common_errNullObject     #  object was null
+    EAS2(a3, rFP, a0)                      #  a3 <- &fp[A]
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[A]
+    FETCH(a3, 1)                           #  a3 <- field byte offset
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    addu      a2, a2, a3                   #  obj.field (64 bits, aligned) <- a0/a1
+    STORE64(a0, a1, a2)                    #  obj.field (64 bits, aligned) <- a0/a1
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object_quick: /* 0xe8 */
+/* File: mips/op_iput_object_quick.S */
+    /* For: iput-object-quick */
+    # op vA, vB, offset                 /* CCCC */
+    EXPORT_PC()
+    addu   a0, rFP, OFF_FP_SHADOWFRAME
+    move   a1, rPC
+    move   a2, rINST
+    JAL(MterpIputObjectQuick)
+    beqz   v0, MterpException
+    FETCH_ADVANCE_INST(2)               # advance rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_quick: /* 0xe9 */
+/* File: mips/op_invoke_virtual_quick.S */
+/* File: mips/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
+    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    .extern MterpInvokeVirtualQuick
+    EXPORT_PC()
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    JAL(MterpInvokeVirtualQuick)
+    beqz    v0, MterpException
+    FETCH_ADVANCE_INST(3)
+    JAL(MterpShouldSwitchInterpreters)
+    bnez    v0, MterpFallback
+    GET_INST_OPCODE(t0)
+    GOTO_OPCODE(t0)
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range_quick: /* 0xea */
+/* File: mips/op_invoke_virtual_range_quick.S */
+/* File: mips/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
+    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    .extern MterpInvokeVirtualQuickRange
+    EXPORT_PC()
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    JAL(MterpInvokeVirtualQuickRange)
+    beqz    v0, MterpException
+    FETCH_ADVANCE_INST(3)
+    JAL(MterpShouldSwitchInterpreters)
+    bnez    v0, MterpFallback
+    GET_INST_OPCODE(t0)
+    GOTO_OPCODE(t0)
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean_quick: /* 0xeb */
+/* File: mips/op_iput_boolean_quick.S */
+/* File: mips/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    # op vA, vB, offset                    /* CCCC */
+    GET_OPB(a2)                            #  a2 <- B
+    GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
+    FETCH(a1, 1)                           #  a1 <- field byte offset
+    GET_OPA4(a2)                           #  a2 <- A(+)
+    beqz      a3, common_errNullObject     #  object was null
+    GET_VREG(a0, a2)                       #  a0 <- fp[A]
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    addu      t0, a3, a1
+    sb    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte_quick: /* 0xec */
+/* File: mips/op_iput_byte_quick.S */
+/* File: mips/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    # op vA, vB, offset                    /* CCCC */
+    GET_OPB(a2)                            #  a2 <- B
+    GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
+    FETCH(a1, 1)                           #  a1 <- field byte offset
+    GET_OPA4(a2)                           #  a2 <- A(+)
+    beqz      a3, common_errNullObject     #  object was null
+    GET_VREG(a0, a2)                       #  a0 <- fp[A]
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    addu      t0, a3, a1
+    sb    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char_quick: /* 0xed */
+/* File: mips/op_iput_char_quick.S */
+/* File: mips/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    # op vA, vB, offset                    /* CCCC */
+    GET_OPB(a2)                            #  a2 <- B
+    GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
+    FETCH(a1, 1)                           #  a1 <- field byte offset
+    GET_OPA4(a2)                           #  a2 <- A(+)
+    beqz      a3, common_errNullObject     #  object was null
+    GET_VREG(a0, a2)                       #  a0 <- fp[A]
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    addu      t0, a3, a1
+    sh    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short_quick: /* 0xee */
+/* File: mips/op_iput_short_quick.S */
+/* File: mips/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    # op vA, vB, offset                    /* CCCC */
+    GET_OPB(a2)                            #  a2 <- B
+    GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
+    FETCH(a1, 1)                           #  a1 <- field byte offset
+    GET_OPA4(a2)                           #  a2 <- A(+)
+    beqz      a3, common_errNullObject     #  object was null
+    GET_VREG(a0, a2)                       #  a0 <- fp[A]
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    addu      t0, a3, a1
+    sh    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean_quick: /* 0xef */
+/* File: mips/op_iget_boolean_quick.S */
+/* File: mips/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    # op vA, vB, offset                    /* CCCC */
+    GET_OPB(a2)                            #  a2 <- B
+    GET_VREG(a3, a2)                       #  a3 <- object we're operating on
+    FETCH(a1, 1)                           #  a1 <- field byte offset
+    GET_OPA4(a2)                           #  a2 <- A(+)
+    # check object for null
+    beqz      a3, common_errNullObject     #  object was null
+    addu      t0, a3, a1
+    lbu     a0, 0(t0)                    #  a0 <- obj.field (8/16/32 bits)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, a2, t0)              #  fp[A] <- a0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte_quick: /* 0xf0 */
+/* File: mips/op_iget_byte_quick.S */
+/* File: mips/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    # op vA, vB, offset                    /* CCCC */
+    GET_OPB(a2)                            #  a2 <- B
+    GET_VREG(a3, a2)                       #  a3 <- object we're operating on
+    FETCH(a1, 1)                           #  a1 <- field byte offset
+    GET_OPA4(a2)                           #  a2 <- A(+)
+    # check object for null
+    beqz      a3, common_errNullObject     #  object was null
+    addu      t0, a3, a1
+    lb     a0, 0(t0)                    #  a0 <- obj.field (8/16/32 bits)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, a2, t0)              #  fp[A] <- a0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char_quick: /* 0xf1 */
+/* File: mips/op_iget_char_quick.S */
+/* File: mips/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    # op vA, vB, offset                    /* CCCC */
+    GET_OPB(a2)                            #  a2 <- B
+    GET_VREG(a3, a2)                       #  a3 <- object we're operating on
+    FETCH(a1, 1)                           #  a1 <- field byte offset
+    GET_OPA4(a2)                           #  a2 <- A(+)
+    # check object for null
+    beqz      a3, common_errNullObject     #  object was null
+    addu      t0, a3, a1
+    lhu     a0, 0(t0)                    #  a0 <- obj.field (8/16/32 bits)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, a2, t0)              #  fp[A] <- a0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short_quick: /* 0xf2 */
+/* File: mips/op_iget_short_quick.S */
+/* File: mips/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    # op vA, vB, offset                    /* CCCC */
+    GET_OPB(a2)                            #  a2 <- B
+    GET_VREG(a3, a2)                       #  a3 <- object we're operating on
+    FETCH(a1, 1)                           #  a1 <- field byte offset
+    GET_OPA4(a2)                           #  a2 <- A(+)
+    # check object for null
+    beqz      a3, common_errNullObject     #  object was null
+    addu      t0, a3, a1
+    lh     a0, 0(t0)                    #  a0 <- obj.field (8/16/32 bits)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(a0, a2, t0)              #  fp[A] <- a0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f3: /* 0xf3 */
+/* File: mips/op_unused_f3.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f4: /* 0xf4 */
+/* File: mips/op_unused_f4.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f5: /* 0xf5 */
+/* File: mips/op_unused_f5.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f6: /* 0xf6 */
+/* File: mips/op_unused_f6.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f7: /* 0xf7 */
+/* File: mips/op_unused_f7.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f8: /* 0xf8 */
+/* File: mips/op_unused_f8.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f9: /* 0xf9 */
+/* File: mips/op_unused_f9.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fa: /* 0xfa */
+/* File: mips/op_unused_fa.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fb: /* 0xfb */
+/* File: mips/op_unused_fb.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fc: /* 0xfc */
+/* File: mips/op_unused_fc.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fd: /* 0xfd */
+/* File: mips/op_unused_fd.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fe: /* 0xfe */
+/* File: mips/op_unused_fe.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_ff: /* 0xff */
+/* File: mips/op_unused_ff.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+    .balign 128
+    .size   artMterpAsmInstructionStart, .-artMterpAsmInstructionStart
+    .global artMterpAsmInstructionEnd
+artMterpAsmInstructionEnd:
+
+/*
+ * ===========================================================================
+ *  Sister implementations
+ * ===========================================================================
+ */
+    .global artMterpAsmSisterStart
+    .type   artMterpAsmSisterStart, %function
+    .text
+    .balign 4
+artMterpAsmSisterStart:
+
+/* continuation for op_cmpl_float */
+
+.Lop_cmpl_float_nan:
+    li rTEMP, -1
+
+.Lop_cmpl_float_finish:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
+
+/* continuation for op_cmpg_float */
+
+.Lop_cmpg_float_nan:
+    li rTEMP, 1
+
+.Lop_cmpg_float_finish:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
+
+/* continuation for op_cmpl_double */
+
+.Lop_cmpl_double_nan:
+    li rTEMP, -1
+
+.Lop_cmpl_double_finish:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
+
+/* continuation for op_cmpg_double */
+
+.Lop_cmpg_double_nan:
+    li rTEMP, 1
+
+.Lop_cmpg_double_finish:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
+
+/* continuation for op_float_to_int */
+
+/*
+ * Not an entry point as it is used only once !!
+ */
+f2i_doconv:
+#ifdef MIPS32REVGE6
+    l.s       fa1, .LFLOAT_TO_INT_max
+    cmp.ule.s ft2, fa1, fa0
+    l.s       fv0, .LFLOAT_TO_INT_ret_max
+    bc1nez    ft2, .Lop_float_to_int_set_vreg_f
+
+    l.s       fa1, .LFLOAT_TO_INT_min
+    cmp.ule.s ft2, fa0, fa1
+    l.s       fv0, .LFLOAT_TO_INT_ret_min
+    bc1nez    ft2, .Lop_float_to_int_set_vreg_f
+
+    mov.s     fa1, fa0
+    cmp.un.s  ft2, fa0, fa1
+    li.s      fv0, 0
+    bc1nez    ft2, .Lop_float_to_int_set_vreg_f
+#else
+    l.s       fa1, .LFLOAT_TO_INT_max
+    c.ole.s   fcc0, fa1, fa0
+    l.s       fv0, .LFLOAT_TO_INT_ret_max
+    bc1t      .Lop_float_to_int_set_vreg_f
+
+    l.s       fa1, .LFLOAT_TO_INT_min
+    c.ole.s   fcc0, fa0, fa1
+    l.s       fv0, .LFLOAT_TO_INT_ret_min
+    bc1t      .Lop_float_to_int_set_vreg_f
+
+    mov.s     fa1, fa0
+    c.un.s    fcc0, fa0, fa1
+    li.s      fv0, 0
+    bc1t      .Lop_float_to_int_set_vreg_f
+#endif
+
+    trunc.w.s  fv0, fa0
+    b         .Lop_float_to_int_set_vreg_f
+
+.LFLOAT_TO_INT_max:
+    .word 0x4f000000
+.LFLOAT_TO_INT_min:
+    .word 0xcf000000
+.LFLOAT_TO_INT_ret_max:
+    .word 0x7fffffff
+.LFLOAT_TO_INT_ret_min:
+    .word 0x80000000
+
+/* continuation for op_float_to_long */
+
+f2l_doconv:
+#ifdef MIPS32REVGE6
+    l.s       fa1, .LLONG_TO_max
+    cmp.ule.s ft2, fa1, fa0
+    li        rRESULT0, ~0
+    li        rRESULT1, ~0x80000000
+    bc1nez    ft2, .Lop_float_to_long_set_vreg
+
+    l.s       fa1, .LLONG_TO_min
+    cmp.ule.s ft2, fa0, fa1
+    li        rRESULT0, 0
+    li        rRESULT1, 0x80000000
+    bc1nez    ft2, .Lop_float_to_long_set_vreg
+
+    mov.s     fa1, fa0
+    cmp.un.s  ft2, fa0, fa1
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1nez    ft2, .Lop_float_to_long_set_vreg
+#else
+    l.s       fa1, .LLONG_TO_max
+    c.ole.s   fcc0, fa1, fa0
+    li        rRESULT0, ~0
+    li        rRESULT1, ~0x80000000
+    bc1t      .Lop_float_to_long_set_vreg
+
+    l.s       fa1, .LLONG_TO_min
+    c.ole.s   fcc0, fa0, fa1
+    li        rRESULT0, 0
+    li        rRESULT1, 0x80000000
+    bc1t      .Lop_float_to_long_set_vreg
+
+    mov.s     fa1, fa0
+    c.un.s    fcc0, fa0, fa1
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1t      .Lop_float_to_long_set_vreg
+#endif
+
+    JAL(__fixsfdi)
+
+    b         .Lop_float_to_long_set_vreg
+
+.LLONG_TO_max:
+    .word 0x5f000000
+
+.LLONG_TO_min:
+    .word 0xdf000000
+
+/* continuation for op_double_to_int */
+
+d2i_doconv:
+#ifdef MIPS32REVGE6
+    la        t0, .LDOUBLE_TO_INT_max
+    LOAD64_F(fa1, fa1f, t0)
+    cmp.ule.d ft2, fa1, fa0
+    l.s       fv0, .LDOUBLE_TO_INT_maxret
+    bc1nez    ft2, .Lop_double_to_int_set_vreg_f
+
+    la        t0, .LDOUBLE_TO_INT_min
+    LOAD64_F(fa1, fa1f, t0)
+    cmp.ule.d ft2, fa0, fa1
+    l.s       fv0, .LDOUBLE_TO_INT_minret
+    bc1nez    ft2, .Lop_double_to_int_set_vreg_f
+
+    mov.d     fa1, fa0
+    cmp.un.d  ft2, fa0, fa1
+    li.s      fv0, 0
+    bc1nez    ft2, .Lop_double_to_int_set_vreg_f
+#else
+    la        t0, .LDOUBLE_TO_INT_max
+    LOAD64_F(fa1, fa1f, t0)
+    c.ole.d   fcc0, fa1, fa0
+    l.s       fv0, .LDOUBLE_TO_INT_maxret
+    bc1t      .Lop_double_to_int_set_vreg_f
+
+    la        t0, .LDOUBLE_TO_INT_min
+    LOAD64_F(fa1, fa1f, t0)
+    c.ole.d   fcc0, fa0, fa1
+    l.s       fv0, .LDOUBLE_TO_INT_minret
+    bc1t      .Lop_double_to_int_set_vreg_f
+
+    mov.d     fa1, fa0
+    c.un.d    fcc0, fa0, fa1
+    li.s      fv0, 0
+    bc1t      .Lop_double_to_int_set_vreg_f
+#endif
+
+    trunc.w.d  fv0, fa0
+    b         .Lop_double_to_int_set_vreg_f
+
+.LDOUBLE_TO_INT_max:
+    .dword 0x41dfffffffc00000
+.LDOUBLE_TO_INT_min:
+    .dword 0xc1e0000000000000              #  minint, as a double (high word)
+.LDOUBLE_TO_INT_maxret:
+    .word 0x7fffffff
+.LDOUBLE_TO_INT_minret:
+    .word 0x80000000
+
+/* continuation for op_double_to_long */
+
+d2l_doconv:
+#ifdef MIPS32REVGE6
+    la        t0, .LDOUBLE_TO_LONG_max
+    LOAD64_F(fa1, fa1f, t0)
+    cmp.ule.d ft2, fa1, fa0
+    la        t0, .LDOUBLE_TO_LONG_ret_max
+    LOAD64(rRESULT0, rRESULT1, t0)
+    bc1nez    ft2, .Lop_double_to_long_set_vreg
+
+    la        t0, .LDOUBLE_TO_LONG_min
+    LOAD64_F(fa1, fa1f, t0)
+    cmp.ule.d ft2, fa0, fa1
+    la        t0, .LDOUBLE_TO_LONG_ret_min
+    LOAD64(rRESULT0, rRESULT1, t0)
+    bc1nez    ft2, .Lop_double_to_long_set_vreg
+
+    mov.d     fa1, fa0
+    cmp.un.d  ft2, fa0, fa1
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1nez    ft2, .Lop_double_to_long_set_vreg
+#else
+    la        t0, .LDOUBLE_TO_LONG_max
+    LOAD64_F(fa1, fa1f, t0)
+    c.ole.d   fcc0, fa1, fa0
+    la        t0, .LDOUBLE_TO_LONG_ret_max
+    LOAD64(rRESULT0, rRESULT1, t0)
+    bc1t      .Lop_double_to_long_set_vreg
+
+    la        t0, .LDOUBLE_TO_LONG_min
+    LOAD64_F(fa1, fa1f, t0)
+    c.ole.d   fcc0, fa0, fa1
+    la        t0, .LDOUBLE_TO_LONG_ret_min
+    LOAD64(rRESULT0, rRESULT1, t0)
+    bc1t      .Lop_double_to_long_set_vreg
+
+    mov.d     fa1, fa0
+    c.un.d    fcc0, fa0, fa1
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1t      .Lop_double_to_long_set_vreg
+#endif
+    JAL(__fixdfdi)
+    b         .Lop_double_to_long_set_vreg
+
+.LDOUBLE_TO_LONG_max:
+    .dword 0x43e0000000000000              #  maxlong, as a double (high word)
+.LDOUBLE_TO_LONG_min:
+    .dword 0xc3e0000000000000              #  minlong, as a double (high word)
+.LDOUBLE_TO_LONG_ret_max:
+    .dword 0x7fffffffffffffff
+.LDOUBLE_TO_LONG_ret_min:
+    .dword 0x8000000000000000
+
+/* continuation for op_mul_long */
+
+.Lop_mul_long_finish:
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64(v0, v1, a0)                 #  vAA::vAA+1 <- v0(low) :: v1(high)
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* continuation for op_shl_long */
+
+.Lop_shl_long_finish:
+    SET_VREG64_GOTO(zero, v0, t2, t0)      #  vAA/vAA+1 <- rlo/rhi
+
+/* continuation for op_shr_long */
+
+.Lop_shr_long_finish:
+    sra     a3, a1, 31                     #  a3<- sign(ah)
+    SET_VREG64_GOTO(v1, a3, t3, t0)        #  vAA/VAA+1 <- rlo/rhi
+
+/* continuation for op_ushr_long */
+
+.Lop_ushr_long_finish:
+    SET_VREG64_GOTO(v1, zero, rOBJ, t0)    #  vAA/vAA+1 <- rlo/rhi
+
+/* continuation for op_add_double */
+
+.Lop_add_double_finish:
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* continuation for op_sub_double */
+
+.Lop_sub_double_finish:
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* continuation for op_mul_double */
+
+.Lop_mul_double_finish:
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* continuation for op_div_double */
+
+.Lop_div_double_finish:
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* continuation for op_rem_double */
+
+.Lop_rem_double_finish:
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
+
+/* continuation for op_shl_long_2addr */
+
+.Lop_shl_long_2addr_finish:
+    SET_VREG64_GOTO(zero, v0, rOBJ, t0)    #  vAA/vAA+1 <- rlo/rhi
+
+/* continuation for op_shr_long_2addr */
+
+.Lop_shr_long_2addr_finish:
+    sra     a3, a1, 31                     #  a3<- sign(ah)
+    SET_VREG64_GOTO(v1, a3, t2, t0)        #  vAA/vAA+1 <- rlo/rhi
+
+/* continuation for op_ushr_long_2addr */
+
+.Lop_ushr_long_2addr_finish:
+    SET_VREG64_GOTO(v1, zero, t3, t0)      #  vAA/vAA+1 <- rlo/rhi
+
+    .size   artMterpAsmSisterStart, .-artMterpAsmSisterStart
+    .global artMterpAsmSisterEnd
+artMterpAsmSisterEnd:
+
+
+    .global artMterpAsmAltInstructionStart
+    .type   artMterpAsmAltInstructionStart, %function
+    .text
+
+artMterpAsmAltInstructionStart = .L_ALT_op_nop
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_nop: /* 0x00 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (0 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move: /* 0x01 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (1 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_from16: /* 0x02 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (2 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_16: /* 0x03 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (3 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide: /* 0x04 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (4 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_from16: /* 0x05 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (5 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_16: /* 0x06 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (6 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object: /* 0x07 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (7 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_from16: /* 0x08 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (8 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_16: /* 0x09 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (9 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result: /* 0x0a */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (10 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_wide: /* 0x0b */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (11 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_object: /* 0x0c */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (12 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_exception: /* 0x0d */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (13 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void: /* 0x0e */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (14 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return: /* 0x0f */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (15 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_wide: /* 0x10 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (16 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_object: /* 0x11 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (17 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_4: /* 0x12 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (18 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_16: /* 0x13 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (19 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const: /* 0x14 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (20 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_high16: /* 0x15 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (21 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_16: /* 0x16 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (22 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_32: /* 0x17 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (23 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide: /* 0x18 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (24 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_high16: /* 0x19 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (25 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string: /* 0x1a */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (26 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string_jumbo: /* 0x1b */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (27 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_class: /* 0x1c */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (28 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_enter: /* 0x1d */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (29 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_exit: /* 0x1e */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (30 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_check_cast: /* 0x1f */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (31 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_instance_of: /* 0x20 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (32 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_array_length: /* 0x21 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (33 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_instance: /* 0x22 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (34 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_array: /* 0x23 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (35 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array: /* 0x24 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (36 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array_range: /* 0x25 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (37 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_fill_array_data: /* 0x26 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (38 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_throw: /* 0x27 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (39 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto: /* 0x28 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (40 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_16: /* 0x29 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (41 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_32: /* 0x2a */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (42 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_packed_switch: /* 0x2b */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (43 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sparse_switch: /* 0x2c */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (44 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_float: /* 0x2d */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (45 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_float: /* 0x2e */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (46 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_double: /* 0x2f */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (47 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_double: /* 0x30 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (48 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmp_long: /* 0x31 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (49 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eq: /* 0x32 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (50 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ne: /* 0x33 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (51 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lt: /* 0x34 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (52 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ge: /* 0x35 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (53 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gt: /* 0x36 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (54 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_le: /* 0x37 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (55 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eqz: /* 0x38 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (56 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_nez: /* 0x39 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (57 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ltz: /* 0x3a */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (58 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gez: /* 0x3b */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (59 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gtz: /* 0x3c */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (60 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lez: /* 0x3d */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (61 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3e: /* 0x3e */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (62 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3f: /* 0x3f */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (63 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_40: /* 0x40 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (64 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_41: /* 0x41 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (65 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_42: /* 0x42 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (66 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_43: /* 0x43 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (67 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget: /* 0x44 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (68 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_wide: /* 0x45 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (69 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_object: /* 0x46 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (70 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_boolean: /* 0x47 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (71 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_byte: /* 0x48 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (72 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_char: /* 0x49 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (73 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_short: /* 0x4a */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (74 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput: /* 0x4b */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (75 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_wide: /* 0x4c */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (76 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_object: /* 0x4d */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (77 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_boolean: /* 0x4e */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (78 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_byte: /* 0x4f */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (79 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_char: /* 0x50 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (80 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_short: /* 0x51 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (81 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget: /* 0x52 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (82 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide: /* 0x53 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (83 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object: /* 0x54 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (84 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean: /* 0x55 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (85 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte: /* 0x56 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (86 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char: /* 0x57 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (87 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short: /* 0x58 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (88 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput: /* 0x59 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (89 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide: /* 0x5a */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (90 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object: /* 0x5b */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (91 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean: /* 0x5c */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (92 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte: /* 0x5d */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (93 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char: /* 0x5e */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (94 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short: /* 0x5f */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (95 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget: /* 0x60 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (96 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_wide: /* 0x61 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (97 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_object: /* 0x62 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (98 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_boolean: /* 0x63 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (99 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_byte: /* 0x64 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (100 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_char: /* 0x65 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (101 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_short: /* 0x66 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (102 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput: /* 0x67 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (103 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_wide: /* 0x68 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (104 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_object: /* 0x69 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (105 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_boolean: /* 0x6a */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (106 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_byte: /* 0x6b */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (107 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_char: /* 0x6c */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (108 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_short: /* 0x6d */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (109 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual: /* 0x6e */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (110 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super: /* 0x6f */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (111 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct: /* 0x70 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (112 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static: /* 0x71 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (113 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface: /* 0x72 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (114 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void_no_barrier: /* 0x73 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (115 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range: /* 0x74 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (116 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super_range: /* 0x75 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (117 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct_range: /* 0x76 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (118 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static_range: /* 0x77 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (119 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface_range: /* 0x78 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (120 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_79: /* 0x79 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (121 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_7a: /* 0x7a */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (122 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_int: /* 0x7b */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (123 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_int: /* 0x7c */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (124 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_long: /* 0x7d */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (125 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_long: /* 0x7e */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (126 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_float: /* 0x7f */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (127 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_double: /* 0x80 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (128 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_long: /* 0x81 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (129 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_float: /* 0x82 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (130 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_double: /* 0x83 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (131 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_int: /* 0x84 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (132 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_float: /* 0x85 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (133 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_double: /* 0x86 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (134 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_int: /* 0x87 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (135 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_long: /* 0x88 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (136 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_double: /* 0x89 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (137 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_int: /* 0x8a */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (138 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_long: /* 0x8b */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (139 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_float: /* 0x8c */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (140 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_byte: /* 0x8d */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (141 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_char: /* 0x8e */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (142 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_short: /* 0x8f */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (143 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int: /* 0x90 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (144 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int: /* 0x91 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (145 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int: /* 0x92 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (146 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int: /* 0x93 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (147 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int: /* 0x94 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (148 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int: /* 0x95 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (149 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int: /* 0x96 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (150 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int: /* 0x97 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (151 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int: /* 0x98 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (152 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int: /* 0x99 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (153 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int: /* 0x9a */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (154 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long: /* 0x9b */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (155 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long: /* 0x9c */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (156 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long: /* 0x9d */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (157 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long: /* 0x9e */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (158 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long: /* 0x9f */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (159 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long: /* 0xa0 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (160 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long: /* 0xa1 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (161 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long: /* 0xa2 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (162 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long: /* 0xa3 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (163 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long: /* 0xa4 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (164 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long: /* 0xa5 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (165 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float: /* 0xa6 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (166 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float: /* 0xa7 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (167 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float: /* 0xa8 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (168 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float: /* 0xa9 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (169 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float: /* 0xaa */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (170 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double: /* 0xab */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (171 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double: /* 0xac */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (172 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double: /* 0xad */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (173 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double: /* 0xae */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (174 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double: /* 0xaf */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (175 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_2addr: /* 0xb0 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (176 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int_2addr: /* 0xb1 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (177 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_2addr: /* 0xb2 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (178 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_2addr: /* 0xb3 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (179 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_2addr: /* 0xb4 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (180 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_2addr: /* 0xb5 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (181 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_2addr: /* 0xb6 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (182 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_2addr: /* 0xb7 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (183 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_2addr: /* 0xb8 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (184 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_2addr: /* 0xb9 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (185 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_2addr: /* 0xba */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (186 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long_2addr: /* 0xbb */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (187 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long_2addr: /* 0xbc */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (188 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long_2addr: /* 0xbd */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (189 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long_2addr: /* 0xbe */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (190 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long_2addr: /* 0xbf */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (191 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long_2addr: /* 0xc0 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (192 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long_2addr: /* 0xc1 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (193 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long_2addr: /* 0xc2 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (194 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long_2addr: /* 0xc3 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (195 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long_2addr: /* 0xc4 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (196 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long_2addr: /* 0xc5 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (197 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float_2addr: /* 0xc6 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (198 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float_2addr: /* 0xc7 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (199 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float_2addr: /* 0xc8 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (200 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float_2addr: /* 0xc9 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (201 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float_2addr: /* 0xca */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (202 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double_2addr: /* 0xcb */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (203 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double_2addr: /* 0xcc */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (204 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double_2addr: /* 0xcd */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (205 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double_2addr: /* 0xce */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (206 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double_2addr: /* 0xcf */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (207 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit16: /* 0xd0 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (208 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int: /* 0xd1 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (209 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit16: /* 0xd2 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (210 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit16: /* 0xd3 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (211 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit16: /* 0xd4 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (212 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit16: /* 0xd5 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (213 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit16: /* 0xd6 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (214 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit16: /* 0xd7 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (215 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit8: /* 0xd8 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (216 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int_lit8: /* 0xd9 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (217 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit8: /* 0xda */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (218 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit8: /* 0xdb */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (219 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit8: /* 0xdc */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (220 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit8: /* 0xdd */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (221 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit8: /* 0xde */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (222 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit8: /* 0xdf */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (223 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_lit8: /* 0xe0 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (224 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_lit8: /* 0xe1 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (225 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_lit8: /* 0xe2 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (226 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_quick: /* 0xe3 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (227 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide_quick: /* 0xe4 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (228 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object_quick: /* 0xe5 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (229 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_quick: /* 0xe6 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (230 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide_quick: /* 0xe7 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (231 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object_quick: /* 0xe8 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (232 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_quick: /* 0xe9 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (233 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range_quick: /* 0xea */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (234 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean_quick: /* 0xeb */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (235 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte_quick: /* 0xec */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (236 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char_quick: /* 0xed */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (237 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short_quick: /* 0xee */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (238 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean_quick: /* 0xef */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (239 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte_quick: /* 0xf0 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (240 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char_quick: /* 0xf1 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (241 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short_quick: /* 0xf2 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (242 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f3: /* 0xf3 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (243 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f4: /* 0xf4 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (244 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f5: /* 0xf5 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (245 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f6: /* 0xf6 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (246 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f7: /* 0xf7 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (247 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f8: /* 0xf8 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (248 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f9: /* 0xf9 */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (249 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fa: /* 0xfa */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (250 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fb: /* 0xfb */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (251 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fc: /* 0xfc */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (252 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fd: /* 0xfd */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (253 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fe: /* 0xfe */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (254 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_ff: /* 0xff */
+/* File: mips/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.    Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    la     ra, artMterpAsmInstructionStart + (255 * 128)   # Addr of primary handler
+    lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
+    move   a0, rSELF                    # arg0
+    addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
+
+    .balign 128
+    .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
+    .global artMterpAsmAltInstructionEnd
+artMterpAsmAltInstructionEnd:
+/* File: mips/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align 2
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+common_errDivideByZero:
+    EXPORT_PC()
+#if MTERP_LOGGING
+    move  a0, rSELF
+    addu  a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpLogDivideByZeroException)
+#endif
+    b MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC()
+#if MTERP_LOGGING
+    move  a0, rSELF
+    addu  a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpLogArrayIndexException)
+#endif
+    b MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC()
+#if MTERP_LOGGING
+    move  a0, rSELF
+    addu  a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpLogNegativeArraySizeException)
+#endif
+    b MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC()
+#if MTERP_LOGGING
+    move  a0, rSELF
+    addu  a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpLogNoSuchMethodException)
+#endif
+    b MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC()
+#if MTERP_LOGGING
+    move  a0, rSELF
+    addu  a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpLogNullObjectException)
+#endif
+    b MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC()
+#if MTERP_LOGGING
+    move  a0, rSELF
+    addu  a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpLogExceptionThrownException)
+#endif
+    b MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC()
+#if MTERP_LOGGING
+    move  a0, rSELF
+    addu  a1, rFP, OFF_FP_SHADOWFRAME
+    lw    a2, THREAD_FLAGS_OFFSET(rSELF)
+    JAL(MterpLogSuspendFallback)
+#endif
+    b MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    lw      a0, THREAD_EXCEPTION_OFFSET(rSELF)
+    beqz    a0, MterpFallback          # If exception, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpHandleException)                    # (self, shadow_frame)
+    beqz    v0, MterpExceptionReturn             # no local catch, back to caller.
+    lw      a0, OFF_FP_CODE_ITEM(rFP)
+    lw      a1, OFF_FP_DEX_PC(rFP)
+    lw      rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
+    addu    rPC, a0, CODEITEM_INSNS_OFFSET
+    sll     a1, a1, 1
+    addu    rPC, rPC, a1                         # generate new dex_pc_ptr
+    /* Do we need to switch interpreters? */
+    JAL(MterpShouldSwitchInterpreters)
+    bnez    v0, MterpFallback
+    /* resume execution at catch block */
+    EXPORT_PC()
+    FETCH_INST()
+    GET_INST_OPCODE(t0)
+    GOTO_OPCODE(t0)
+    /* NOTE: no fallthrough */
+
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ */
+MterpCommonTakenBranchNoFlags:
+    bgtz    rINST, .L_forward_branch    # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    li      t0, JIT_CHECK_OSR
+    beq     rPROFILE, t0, .L_osr_check
+    blt     rPROFILE, t0, .L_resume_backward_branch
+    subu    rPROFILE, 1
+    beqz    rPROFILE, .L_add_batch      # counted down to zero - report
+.L_resume_backward_branch:
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    REFRESH_IBASE()
+    addu    a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB(a2)           # update rPC, load rINST
+    and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    bnez    ra, .L_suspend_request_pending
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+.L_suspend_request_pending:
+    EXPORT_PC()
+    move    a0, rSELF
+    JAL(MterpSuspendCheck)              # (self)
+    bnez    v0, MterpFallback
+    REFRESH_IBASE()                     # might have changed during suspend
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+.L_no_count_backwards:
+    li      t0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    bne     rPROFILE, t0, .L_resume_backward_branch
+.L_osr_check:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    li      t0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    beq     rPROFILE, t0, .L_check_osr_forward
+.L_resume_forward_branch:
+    add     a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB(a2)           # update rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+.L_check_osr_forward:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    lw      a0, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    JAL(MterpAddHotnessBatch)           # (method, shadow_frame, self)
+    move    rPROFILE, v0                # restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    li      a2, 2
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST(2)
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    JAL(MterpLogOSR)
+#endif
+    li      v0, 1                       # Signal normal return
+    b       MterpDone
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC()
+#if MTERP_LOGGING
+    move  a0, rSELF
+    addu  a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpLogFallback)
+#endif
+MterpCommonFallback:
+    move    v0, zero                    # signal retry with reference interpreter.
+    b       MterpDone
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and LR.  Here we restore SP, restore the registers, and then restore
+ * LR to PC.
+ *
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    li      v0, 1                       # signal return to caller.
+    b       MterpDone
+MterpReturn:
+    lw      a2, OFF_FP_RESULT_REGISTER(rFP)
+    sw      v0, 0(a2)
+    sw      v1, 4(a2)
+    li      v0, 1                       # signal return to caller.
+MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    blez    rPROFILE, .L_pop_and_return # if > 0, we may have some counts to report.
+
+MterpProfileActive:
+    move    rINST, v0                   # stash return value
+    /* Report cached hotness counts */
+    lw      a0, OFF_FP_METHOD(rFP)
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rSELF
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    JAL(MterpAddHotnessBatch)           # (method, shadow_frame, self)
+    move    v0, rINST                   # restore return value
+
+.L_pop_and_return:
+/* Restore from the stack and return. Frame size = STACK_SIZE */
+    STACK_LOAD_FULL()
+    jalr    zero, ra
+
+    .end ExecuteMterpImpl
+
diff --git a/runtime/interpreter/mterp/out/mterp_mips64.S b/runtime/interpreter/mterp/out/mterp_mips64.S
new file mode 100644
index 0000000..a061f1e
--- /dev/null
+++ b/runtime/interpreter/mterp/out/mterp_mips64.S
@@ -0,0 +1,12364 @@
+/*
+ * This file was generated automatically by gen-mterp.py for 'mips64'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+/* File: mips64/header.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <machine/regdef.h>
+
+/* TODO: add the missing file and use its FP register definitions. */
+/* #include <machine/fpregdef.h> */
+/* FP register definitions */
+#define f0  $f0
+#define f1  $f1
+#define f2  $f2
+#define f3  $f3
+#define f12 $f12
+#define f13 $f13
+
+/*
+ * It looks like the GNU assembler currently does not support the blec and bgtc
+ * idioms, which should translate into bgec and bltc respectively with swapped
+ * left and right register operands.
+ * TODO: remove these macros when the assembler is fixed.
+ */
+.macro blec lreg, rreg, target
+    bgec    \rreg, \lreg, \target
+.endm
+.macro bgtc lreg, rreg, target
+    bltc    \rreg, \lreg, \target
+.endm
+
+/*
+Mterp and MIPS64 notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  s0  rPC       interpreted program counter, used for fetching instructions
+  s1  rFP       interpreted frame pointer, used for accessing locals and args
+  s2  rSELF     self (Thread) pointer
+  s3  rINST     first 16-bit code unit of current instruction
+  s4  rIBASE    interpreted instruction base pointer, used for computed goto
+  s5  rREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  s6  rPROFILE  jit profile hotness countdown
+*/
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rPC      s0
+#define rFP      s1
+#define rSELF    s2
+#define rINST    s3
+#define rIBASE   s4
+#define rREFS    s5
+#define rPROFILE s6
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
+
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    sd      rPC, OFF_FP_DEX_PC_PTR(rFP)
+.endm
+
+/*
+ * Refresh handler table.
+ */
+.macro REFRESH_IBASE
+    ld      rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINST.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    lhu     rINST, 0(rPC)
+.endm
+
+/* Advance rPC by some number of code units. */
+.macro ADVANCE count
+    daddu   rPC, rPC, (\count) * 2
+.endm
+
+/*
+ * Fetch the next instruction from an offset specified by _reg and advance xPC.
+ * xPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.  Must not set flags.
+ *
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    daddu   rPC, rPC, \reg
+    FETCH_INST
+.endm
+
+/*
+ * Fetch the next instruction from the specified offset.  Advances rPC
+ * to point to the next instruction.
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss.  (This also implies that it must come after
+ * EXPORT_PC.)
+ */
+.macro FETCH_ADVANCE_INST count
+    ADVANCE \count
+    FETCH_INST
+.endm
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update rPC.  Used to load
+ * rINST ahead of possible exception point.  Be sure to manually advance rPC
+ * later.
+ */
+.macro PREFETCH_INST count
+    lhu     rINST, ((\count) * 2)(rPC)
+.endm
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+.macro GET_INST_OPCODE reg
+    and     \reg, rINST, 255
+.endm
+
+/*
+ * Begin executing the opcode in _reg.
+ */
+.macro GOTO_OPCODE reg
+    .set noat
+    sll     AT, \reg, 7
+    daddu   AT, rIBASE, AT
+    jic     AT, 0
+    .set at
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ * Note, GET_VREG does sign extension to 64 bits while
+ * GET_VREG_U does zero extension to 64 bits.
+ * One is useful for arithmetic while the other is
+ * useful for storing the result value as 64-bit.
+ */
+.macro GET_VREG reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    lw      \reg, 0(AT)
+    .set at
+.endm
+.macro GET_VREG_U reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    lwu     \reg, 0(AT)
+    .set at
+.endm
+.macro GET_VREG_FLOAT reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    lwc1    \reg, 0(AT)
+    .set at
+.endm
+.macro SET_VREG reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    sw      \reg, 0(AT)
+    dlsa    AT, \vreg, rREFS, 2
+    sw      zero, 0(AT)
+    .set at
+.endm
+.macro SET_VREG_OBJECT reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    sw      \reg, 0(AT)
+    dlsa    AT, \vreg, rREFS, 2
+    sw      \reg, 0(AT)
+    .set at
+.endm
+.macro SET_VREG_FLOAT reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    swc1    \reg, 0(AT)
+    dlsa    AT, \vreg, rREFS, 2
+    sw      zero, 0(AT)
+    .set at
+.endm
+
+/*
+ * Get/set the 64-bit value from a Dalvik register.
+ * Avoid unaligned memory accesses.
+ * Note, SET_VREG_WIDE clobbers the register containing the value being stored.
+ * Note, SET_VREG_DOUBLE clobbers the register containing the Dalvik register number.
+ */
+.macro GET_VREG_WIDE reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    lw      \reg, 0(AT)
+    lw      AT, 4(AT)
+    dinsu   \reg, AT, 32, 32
+    .set at
+.endm
+.macro GET_VREG_DOUBLE reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    lwc1    \reg, 0(AT)
+    lw      AT, 4(AT)
+    mthc1   AT, \reg
+    .set at
+.endm
+.macro SET_VREG_WIDE reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rFP, 2
+    sw      \reg, 0(AT)
+    drotr32 \reg, \reg, 0
+    sw      \reg, 4(AT)
+    dlsa    AT, \vreg, rREFS, 2
+    sw      zero, 0(AT)
+    sw      zero, 4(AT)
+    .set at
+.endm
+.macro SET_VREG_DOUBLE reg, vreg
+    .set noat
+    dlsa    AT, \vreg, rREFS, 2
+    sw      zero, 0(AT)
+    sw      zero, 4(AT)
+    dlsa    AT, \vreg, rFP, 2
+    swc1    \reg, 0(AT)
+    mfhc1   \vreg, \reg
+    sw      \vreg, 4(AT)
+    .set at
+.endm
+
+/*
+ * On-stack offsets for spilling/unspilling callee-saved registers
+ * and the frame size.
+ */
+#define STACK_OFFSET_RA 0
+#define STACK_OFFSET_GP 8
+#define STACK_OFFSET_S0 16
+#define STACK_OFFSET_S1 24
+#define STACK_OFFSET_S2 32
+#define STACK_OFFSET_S3 40
+#define STACK_OFFSET_S4 48
+#define STACK_OFFSET_S5 56
+#define STACK_OFFSET_S6 64
+#define STACK_SIZE      80    /* needs 16 byte alignment */
+
+/* Constants for float/double_to_int/long conversions */
+#define INT_MIN             0x80000000
+#define INT_MIN_AS_FLOAT    0xCF000000
+#define INT_MIN_AS_DOUBLE   0xC1E0000000000000
+#define LONG_MIN            0x8000000000000000
+#define LONG_MIN_AS_FLOAT   0xDF000000
+#define LONG_MIN_AS_DOUBLE  0xC3E0000000000000
+
+/* File: mips64/entry.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Interpreter entry point.
+ */
+
+    .set    reorder
+
+    .text
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+    .balign 16
+/*
+ * On entry:
+ *  a0  Thread* self
+ *  a1  code_item
+ *  a2  ShadowFrame
+ *  a3  JValue* result_register
+ *
+ */
+ExecuteMterpImpl:
+    .cfi_startproc
+    .cpsetup t9, t8, ExecuteMterpImpl
+
+    .cfi_def_cfa sp, 0
+    daddu   sp, sp, -STACK_SIZE
+    .cfi_adjust_cfa_offset STACK_SIZE
+
+    sd      t8, STACK_OFFSET_GP(sp)
+    .cfi_rel_offset 28, STACK_OFFSET_GP
+    sd      ra, STACK_OFFSET_RA(sp)
+    .cfi_rel_offset 31, STACK_OFFSET_RA
+
+    sd      s0, STACK_OFFSET_S0(sp)
+    .cfi_rel_offset 16, STACK_OFFSET_S0
+    sd      s1, STACK_OFFSET_S1(sp)
+    .cfi_rel_offset 17, STACK_OFFSET_S1
+    sd      s2, STACK_OFFSET_S2(sp)
+    .cfi_rel_offset 18, STACK_OFFSET_S2
+    sd      s3, STACK_OFFSET_S3(sp)
+    .cfi_rel_offset 19, STACK_OFFSET_S3
+    sd      s4, STACK_OFFSET_S4(sp)
+    .cfi_rel_offset 20, STACK_OFFSET_S4
+    sd      s5, STACK_OFFSET_S5(sp)
+    .cfi_rel_offset 21, STACK_OFFSET_S5
+    sd      s6, STACK_OFFSET_S6(sp)
+    .cfi_rel_offset 22, STACK_OFFSET_S6
+
+    /* Remember the return register */
+    sd      a3, SHADOWFRAME_RESULT_REGISTER_OFFSET(a2)
+
+    /* Remember the code_item */
+    sd      a1, SHADOWFRAME_CODE_ITEM_OFFSET(a2)
+
+    /* set up "named" registers */
+    move    rSELF, a0
+    daddu   rFP, a2, SHADOWFRAME_VREGS_OFFSET
+    lw      v0, SHADOWFRAME_NUMBER_OF_VREGS_OFFSET(a2)
+    dlsa    rREFS, v0, rFP, 2
+    daddu   rPC, a1, CODEITEM_INSNS_OFFSET
+    lw      v0, SHADOWFRAME_DEX_PC_OFFSET(a2)
+    dlsa    rPC, v0, rPC, 1
+    EXPORT_PC
+
+    /* Starting ibase */
+    REFRESH_IBASE
+
+    /* Set up for backwards branches & osr profiling */
+    ld      a0, OFF_FP_METHOD(rFP)
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    jal     MterpSetUpHotnessCountdown
+    move    rPROFILE, v0                # Starting hotness countdown to rPROFILE
+
+    /* start executing the instruction at rPC */
+    FETCH_INST
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
+
+    /* NOTE: no fallthrough */
+
+
+    .global artMterpAsmInstructionStart
+    .type   artMterpAsmInstructionStart, %function
+artMterpAsmInstructionStart = .L_op_nop
+    .text
+
+/* ------------------------------ */
+    .balign 128
+.L_op_nop: /* 0x00 */
+/* File: mips64/op_nop.S */
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move: /* 0x01 */
+/* File: mips64/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_VREG a0, a3                     # a0 <- vB
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT a0, a2              # vA <- vB
+    .else
+    SET_VREG a0, a2                     # vA <- vB
+    .endif
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_from16: /* 0x02 */
+/* File: mips64/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    lhu     a3, 2(rPC)                  # a3 <- BBBB
+    srl     a2, rINST, 8                # a2 <- AA
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_VREG a0, a3                     # a0 <- vBBBB
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT a0, a2              # vAA <- vBBBB
+    .else
+    SET_VREG a0, a2                     # vAA <- vBBBB
+    .endif
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_16: /* 0x03 */
+/* File: mips64/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    lhu     a3, 4(rPC)                  # a3 <- BBBB
+    lhu     a2, 2(rPC)                  # a2 <- AAAA
+    FETCH_ADVANCE_INST 3                # advance rPC, load rINST
+    GET_VREG a0, a3                     # a0 <- vBBBB
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT a0, a2              # vAAAA <- vBBBB
+    .else
+    SET_VREG a0, a2                     # vAAAA <- vBBBB
+    .endif
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide: /* 0x04 */
+/* File: mips64/op_move_wide.S */
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    ext     a3, rINST, 12, 4            # a3 <- B
+    ext     a2, rINST, 8, 4             # a2 <- A
+    GET_VREG_WIDE a0, a3                # a0 <- vB
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vA <- vB
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_from16: /* 0x05 */
+/* File: mips64/op_move_wide_from16.S */
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    lhu     a3, 2(rPC)                  # a3 <- BBBB
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG_WIDE a0, a3                # a0 <- vBBBB
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vAA <- vBBBB
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_16: /* 0x06 */
+/* File: mips64/op_move_wide_16.S */
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    lhu     a3, 4(rPC)                  # a3 <- BBBB
+    lhu     a2, 2(rPC)                  # a2 <- AAAA
+    GET_VREG_WIDE a0, a3                # a0 <- vBBBB
+    FETCH_ADVANCE_INST 3                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vAAAA <- vBBBB
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object: /* 0x07 */
+/* File: mips64/op_move_object.S */
+/* File: mips64/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_VREG a0, a3                     # a0 <- vB
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    .if 1
+    SET_VREG_OBJECT a0, a2              # vA <- vB
+    .else
+    SET_VREG a0, a2                     # vA <- vB
+    .endif
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_from16: /* 0x08 */
+/* File: mips64/op_move_object_from16.S */
+/* File: mips64/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    lhu     a3, 2(rPC)                  # a3 <- BBBB
+    srl     a2, rINST, 8                # a2 <- AA
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_VREG a0, a3                     # a0 <- vBBBB
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    .if 1
+    SET_VREG_OBJECT a0, a2              # vAA <- vBBBB
+    .else
+    SET_VREG a0, a2                     # vAA <- vBBBB
+    .endif
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_16: /* 0x09 */
+/* File: mips64/op_move_object_16.S */
+/* File: mips64/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    lhu     a3, 4(rPC)                  # a3 <- BBBB
+    lhu     a2, 2(rPC)                  # a2 <- AAAA
+    FETCH_ADVANCE_INST 3                # advance rPC, load rINST
+    GET_VREG a0, a3                     # a0 <- vBBBB
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    .if 1
+    SET_VREG_OBJECT a0, a2              # vAAAA <- vBBBB
+    .else
+    SET_VREG a0, a2                     # vAAAA <- vBBBB
+    .endif
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result: /* 0x0a */
+/* File: mips64/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    srl     a2, rINST, 8                # a2 <- AA
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    ld      a0, OFF_FP_RESULT_REGISTER(rFP)  # get pointer to result JType
+    lw      a0, 0(a0)                   # a0 <- result.i
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT a0, a2              # vAA <- result
+    .else
+    SET_VREG a0, a2                     # vAA <- result
+    .endif
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_wide: /* 0x0b */
+/* File: mips64/op_move_result_wide.S */
+    /* for: move-result-wide */
+    /* op vAA */
+    srl     a2, rINST, 8                # a2 <- AA
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    ld      a0, OFF_FP_RESULT_REGISTER(rFP)  # get pointer to result JType
+    ld      a0, 0(a0)                   # a0 <- result.j
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vAA <- result
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_object: /* 0x0c */
+/* File: mips64/op_move_result_object.S */
+/* File: mips64/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    srl     a2, rINST, 8                # a2 <- AA
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    ld      a0, OFF_FP_RESULT_REGISTER(rFP)  # get pointer to result JType
+    lw      a0, 0(a0)                   # a0 <- result.i
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    .if 1
+    SET_VREG_OBJECT a0, a2              # vAA <- result
+    .else
+    SET_VREG a0, a2                     # vAA <- result
+    .endif
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_exception: /* 0x0d */
+/* File: mips64/op_move_exception.S */
+    /* move-exception vAA */
+    srl     a2, rINST, 8                # a2 <- AA
+    ld      a0, THREAD_EXCEPTION_OFFSET(rSELF)  # load exception obj
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    SET_VREG_OBJECT a0, a2              # vAA <- exception obj
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    sd      zero, THREAD_EXCEPTION_OFFSET(rSELF)  # clear exception
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void: /* 0x0e */
+/* File: mips64/op_return_void.S */
+    .extern MterpThreadFenceForConstructor
+    .extern MterpSuspendCheck
+    jal     MterpThreadFenceForConstructor
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    move    a0, rSELF
+    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqzc   ra, 1f
+    jal     MterpSuspendCheck           # (self)
+1:
+    li      a0, 0
+    b       MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return: /* 0x0f */
+/* File: mips64/op_return.S */
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    .extern MterpSuspendCheck
+    jal     MterpThreadFenceForConstructor
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    move    a0, rSELF
+    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqzc   ra, 1f
+    jal     MterpSuspendCheck           # (self)
+1:
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vAA
+    b       MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_wide: /* 0x10 */
+/* File: mips64/op_return_wide.S */
+    /*
+     * Return a 64-bit value.
+     */
+    /* return-wide vAA */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    .extern MterpSuspendCheck
+    jal     MterpThreadFenceForConstructor
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    move    a0, rSELF
+    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqzc   ra, 1f
+    jal     MterpSuspendCheck           # (self)
+1:
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG_WIDE a0, a2                # a0 <- vAA
+    b       MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_object: /* 0x11 */
+/* File: mips64/op_return_object.S */
+/* File: mips64/op_return.S */
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    .extern MterpSuspendCheck
+    jal     MterpThreadFenceForConstructor
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    move    a0, rSELF
+    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqzc   ra, 1f
+    jal     MterpSuspendCheck           # (self)
+1:
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vAA
+    b       MterpReturn
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_4: /* 0x12 */
+/* File: mips64/op_const_4.S */
+    /* const/4 vA, #+B */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    seh     a0, rINST                   # sign extend B in rINST
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    sra     a0, a0, 12                  # shift B into its final position
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                     # vA <- +B
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_16: /* 0x13 */
+/* File: mips64/op_const_16.S */
+    /* const/16 vAA, #+BBBB */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      a0, 2(rPC)                  # a0 <- sign-extended BBBB
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                     # vAA <- +BBBB
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const: /* 0x14 */
+/* File: mips64/op_const.S */
+    /* const vAA, #+BBBBbbbb */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      a0, 2(rPC)                  # a0 <- bbbb (low)
+    lh      a1, 4(rPC)                  # a1 <- BBBB (high)
+    FETCH_ADVANCE_INST 3                # advance rPC, load rINST
+    ins     a0, a1, 16, 16              # a0 = BBBBbbbb
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                     # vAA <- +BBBBbbbb
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_high16: /* 0x15 */
+/* File: mips64/op_const_high16.S */
+    /* const/high16 vAA, #+BBBB0000 */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      a0, 2(rPC)                  # a0 <- BBBB
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    sll     a0, a0, 16                  # a0 <- BBBB0000
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                     # vAA <- +BBBB0000
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_16: /* 0x16 */
+/* File: mips64/op_const_wide_16.S */
+    /* const-wide/16 vAA, #+BBBB */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      a0, 2(rPC)                  # a0 <- sign-extended BBBB
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vAA <- +BBBB
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_32: /* 0x17 */
+/* File: mips64/op_const_wide_32.S */
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      a0, 2(rPC)                  # a0 <- bbbb (low)
+    lh      a1, 4(rPC)                  # a1 <- BBBB (high)
+    FETCH_ADVANCE_INST 3                # advance rPC, load rINST
+    ins     a0, a1, 16, 16              # a0 = BBBBbbbb
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vAA <- +BBBBbbbb
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide: /* 0x18 */
+/* File: mips64/op_const_wide.S */
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    srl     a4, rINST, 8                # a4 <- AA
+    lh      a0, 2(rPC)                  # a0 <- bbbb (low)
+    lh      a1, 4(rPC)                  # a1 <- BBBB (low middle)
+    lh      a2, 6(rPC)                  # a2 <- hhhh (high middle)
+    lh      a3, 8(rPC)                  # a3 <- HHHH (high)
+    FETCH_ADVANCE_INST 5                # advance rPC, load rINST
+    ins     a0, a1, 16, 16              # a0 = BBBBbbbb
+    ins     a2, a3, 16, 16              # a2 = HHHHhhhh
+    dinsu   a0, a2, 32, 32              # a0 = HHHHhhhhBBBBbbbb
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a4                # vAA <- +HHHHhhhhBBBBbbbb
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_high16: /* 0x19 */
+/* File: mips64/op_const_wide_high16.S */
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      a0, 2(rPC)                  # a0 <- BBBB
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    dsll32  a0, a0, 16                  # a0 <- BBBB000000000000
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vAA <- +BBBB000000000000
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string: /* 0x1a */
+/* File: mips64/op_const_string.S */
+    /* const/string vAA, String//BBBB */
+    .extern MterpConstString
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- BBBB
+    srl     a1, rINST, 8                # a1 <- AA
+    daddu   a2, rFP, OFF_FP_SHADOWFRAME
+    move    a3, rSELF
+    jal     MterpConstString            # (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2                     # load rINST
+    bnez    v0, MterpPossibleException  # let reference interpreter deal with it.
+    ADVANCE 2                           # advance rPC
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string_jumbo: /* 0x1b */
+/* File: mips64/op_const_string_jumbo.S */
+    /* const/string vAA, String//BBBBBBBB */
+    .extern MterpConstString
+    EXPORT_PC
+    lh      a0, 2(rPC)                  # a0 <- bbbb (low)
+    lh      a4, 4(rPC)                  # a4 <- BBBB (high)
+    srl     a1, rINST, 8                # a1 <- AA
+    ins     a0, a4, 16, 16              # a0 <- BBBBbbbb
+    daddu   a2, rFP, OFF_FP_SHADOWFRAME
+    move    a3, rSELF
+    jal     MterpConstString            # (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 3                     # load rINST
+    bnez    v0, MterpPossibleException  # let reference interpreter deal with it.
+    ADVANCE 3                           # advance rPC
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_class: /* 0x1c */
+/* File: mips64/op_const_class.S */
+    /* const/class vAA, Class//BBBB */
+    .extern MterpConstClass
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- BBBB
+    srl     a1, rINST, 8                # a1 <- AA
+    daddu   a2, rFP, OFF_FP_SHADOWFRAME
+    move    a3, rSELF
+    jal     MterpConstClass             # (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2                     # load rINST
+    bnez    v0, MterpPossibleException  # let reference interpreter deal with it.
+    ADVANCE 2                           # advance rPC
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_enter: /* 0x1d */
+/* File: mips64/op_monitor_enter.S */
+    /*
+     * Synchronize on an object.
+     */
+    /* monitor-enter vAA */
+    .extern artLockObjectFromCode
+    EXPORT_PC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vAA (object)
+    move    a1, rSELF                   # a1 <- self
+    jal     artLockObjectFromCode
+    bnezc   v0, MterpException
+    FETCH_ADVANCE_INST 1
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_exit: /* 0x1e */
+/* File: mips64/op_monitor_exit.S */
+    /*
+     * Unlock an object.
+     *
+     * Exceptions that occur when unlocking a monitor need to appear as
+     * if they happened at the following instruction.  See the Dalvik
+     * instruction spec.
+     */
+    /* monitor-exit vAA */
+    .extern artUnlockObjectFromCode
+    EXPORT_PC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vAA (object)
+    move    a1, rSELF                   # a1 <- self
+    jal     artUnlockObjectFromCode     # v0 <- success for unlock(self, obj)
+    bnezc   v0, MterpException
+    FETCH_ADVANCE_INST 1                # before throw: advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_check_cast: /* 0x1f */
+/* File: mips64/op_check_cast.S */
+    /*
+     * Check to see if a cast from one class to another is allowed.
+     */
+    /* check-cast vAA, class//BBBB */
+    .extern MterpCheckCast
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- BBBB
+    srl     a1, rINST, 8                # a1 <- AA
+    dlsa    a1, a1, rFP, 2              # a1 <- &object
+    ld      a2, OFF_FP_METHOD(rFP)      # a2 <- method
+    move    a3, rSELF                   # a3 <- self
+    jal     MterpCheckCast              # (index, &obj, method, self)
+    PREFETCH_INST 2
+    bnez    v0, MterpPossibleException
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_instance_of: /* 0x20 */
+/* File: mips64/op_instance_of.S */
+    /*
+     * Check to see if an object reference is an instance of a class.
+     *
+     * Most common situation is a non-null object, being compared against
+     * an already-resolved class.
+     */
+    /* instance-of vA, vB, class//CCCC */
+    .extern MterpInstanceOf
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- CCCC
+    srl     a1, rINST, 12               # a1 <- B
+    dlsa    a1, a1, rFP, 2              # a1 <- &object
+    ld      a2, OFF_FP_METHOD(rFP)      # a2 <- method
+    move    a3, rSELF                   # a3 <- self
+    jal     MterpInstanceOf             # (index, &obj, method, self)
+    ld      a1, THREAD_EXCEPTION_OFFSET(rSELF)
+    ext     a2, rINST, 8, 4             # a2 <- A
+    PREFETCH_INST 2
+    bnez    a1, MterpException
+    ADVANCE 2                           # advance rPC
+    SET_VREG v0, a2                     # vA <- v0
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_array_length: /* 0x21 */
+/* File: mips64/op_array_length.S */
+    /*
+     * Return the length of an array.
+     */
+    srl     a1, rINST, 12               # a1 <- B
+    GET_VREG_U a0, a1                   # a0 <- vB (object ref)
+    ext     a2, rINST, 8, 4             # a2 <- A
+    beqz    a0, common_errNullObject    # yup, fail
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- array length
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a3, a2                     # vB <- length
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_instance: /* 0x22 */
+/* File: mips64/op_new_instance.S */
+    /*
+     * Create a new instance of a class.
+     */
+    /* new-instance vAA, class//BBBB */
+    .extern MterpNewInstance
+    EXPORT_PC
+    daddu   a0, rFP, OFF_FP_SHADOWFRAME
+    move    a1, rSELF
+    move    a2, rINST
+    jal     MterpNewInstance            # (shadow_frame, self, inst_data)
+    beqzc   v0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_array: /* 0x23 */
+/* File: mips64/op_new_array.S */
+    /*
+     * Allocate an array of objects, specified with the array class
+     * and a count.
+     *
+     * The verifier guarantees that this is an array class, so we don't
+     * check for it here.
+     */
+    /* new-array vA, vB, class//CCCC */
+    .extern MterpNewArray
+    EXPORT_PC
+    daddu   a0, rFP, OFF_FP_SHADOWFRAME
+    move    a1, rPC
+    move    a2, rINST
+    move    a3, rSELF
+    jal     MterpNewArray
+    beqzc   v0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array: /* 0x24 */
+/* File: mips64/op_filled_new_array.S */
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class//CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type//BBBB */
+    .extern MterpFilledNewArray
+    EXPORT_PC
+    daddu   a0, rFP, OFF_FP_SHADOWFRAME
+    move    a1, rPC
+    move    a2, rSELF
+    jal     MterpFilledNewArray
+    beqzc   v0, MterpPossibleException
+    FETCH_ADVANCE_INST 3                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array_range: /* 0x25 */
+/* File: mips64/op_filled_new_array_range.S */
+/* File: mips64/op_filled_new_array.S */
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class//CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type//BBBB */
+    .extern MterpFilledNewArrayRange
+    EXPORT_PC
+    daddu   a0, rFP, OFF_FP_SHADOWFRAME
+    move    a1, rPC
+    move    a2, rSELF
+    jal     MterpFilledNewArrayRange
+    beqzc   v0, MterpPossibleException
+    FETCH_ADVANCE_INST 3                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_fill_array_data: /* 0x26 */
+/* File: mips64/op_fill_array_data.S */
+    /* fill-array-data vAA, +BBBBBBBB */
+    .extern MterpFillArrayData
+    EXPORT_PC
+    lh      a1, 2(rPC)                  # a1 <- bbbb (lo)
+    lh      a0, 4(rPC)                  # a0 <- BBBB (hi)
+    srl     a3, rINST, 8                # a3 <- AA
+    ins     a1, a0, 16, 16              # a1 <- BBBBbbbb
+    GET_VREG_U a0, a3                   # a0 <- vAA (array object)
+    dlsa    a1, a1, rPC, 1              # a1 <- PC + BBBBbbbb*2 (array data off.)
+    jal     MterpFillArrayData          # (obj, payload)
+    beqzc   v0, MterpPossibleException  # exception?
+    FETCH_ADVANCE_INST 3                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_throw: /* 0x27 */
+/* File: mips64/op_throw.S */
+    /*
+     * Throw an exception object in the current thread.
+     */
+    /* throw vAA */
+    EXPORT_PC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vAA (exception object)
+    beqzc   a0, common_errNullObject
+    sd      a0, THREAD_EXCEPTION_OFFSET(rSELF)  # thread->exception <- obj
+    b       MterpException
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto: /* 0x28 */
+/* File: mips64/op_goto.S */
+    /*
+     * Unconditional branch, 8-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto +AA */
+    srl     rINST, rINST, 8
+    seb     rINST, rINST                # rINST <- offset (sign-extended AA)
+    b       MterpCommonTakenBranchNoFlags
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_16: /* 0x29 */
+/* File: mips64/op_goto_16.S */
+    /*
+     * Unconditional branch, 16-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto/16 +AAAA */
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended AAAA)
+    b       MterpCommonTakenBranchNoFlags
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_32: /* 0x2a */
+/* File: mips64/op_goto_32.S */
+    /*
+     * Unconditional branch, 32-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     *
+     * Unlike most opcodes, this one is allowed to branch to itself, so
+     * our "backward branch" test must be "<=0" instead of "<0".
+     */
+    /* goto/32 +AAAAAAAA */
+    lh      rINST, 2(rPC)               # rINST <- aaaa (low)
+    lh      a1, 4(rPC)                  # a1 <- AAAA (high)
+    ins     rINST, a1, 16, 16           # rINST <- offset (sign-extended AAAAaaaa)
+    b       MterpCommonTakenBranchNoFlags
+
+/* ------------------------------ */
+    .balign 128
+.L_op_packed_switch: /* 0x2b */
+/* File: mips64/op_packed_switch.S */
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBBBBBB */
+    .extern MterpDoPackedSwitch
+    .extern MterpProfileBranch
+    lh      a0, 2(rPC)                  # a0 <- bbbb (lo)
+    lh      a1, 4(rPC)                  # a1 <- BBBB (hi)
+    srl     a3, rINST, 8                # a3 <- AA
+    ins     a0, a1, 16, 16              # a0 <- BBBBbbbb
+    GET_VREG a1, a3                     # a1 <- vAA
+    dlsa    a0, a0, rPC, 1              # a0 <- PC + BBBBbbbb*2
+    jal     MterpDoPackedSwitch                       # v0 <- code-unit branch offset
+    move    rINST, v0
+    b       MterpCommonTakenBranchNoFlags
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sparse_switch: /* 0x2c */
+/* File: mips64/op_sparse_switch.S */
+/* File: mips64/op_packed_switch.S */
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBBBBBB */
+    .extern MterpDoSparseSwitch
+    .extern MterpProfileBranch
+    lh      a0, 2(rPC)                  # a0 <- bbbb (lo)
+    lh      a1, 4(rPC)                  # a1 <- BBBB (hi)
+    srl     a3, rINST, 8                # a3 <- AA
+    ins     a0, a1, 16, 16              # a0 <- BBBBbbbb
+    GET_VREG a1, a3                     # a1 <- vAA
+    dlsa    a0, a0, rPC, 1              # a0 <- PC + BBBBbbbb*2
+    jal     MterpDoSparseSwitch                       # v0 <- code-unit branch offset
+    move    rINST, v0
+    b       MterpCommonTakenBranchNoFlags
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_float: /* 0x2d */
+/* File: mips64/op_cmpl_float.S */
+/* File: mips64/fcmp.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * For: cmpl-float, cmpg-float
+     */
+    /* op vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_FLOAT f0, a2               # f0 <- vBB
+    GET_VREG_FLOAT f1, a3               # f1 <- vCC
+    cmp.eq.s f2, f0, f1
+    li      a0, 0
+    bc1nez  f2, 1f                      # done if vBB == vCC (ordered)
+    .if 0
+    cmp.lt.s f2, f0, f1
+    li      a0, -1
+    bc1nez  f2, 1f                      # done if vBB < vCC (ordered)
+    li      a0, 1                       # vBB > vCC or unordered
+    .else
+    cmp.lt.s f2, f1, f0
+    li      a0, 1
+    bc1nez  f2, 1f                      # done if vBB > vCC (ordered)
+    li      a0, -1                      # vBB < vCC or unordered
+    .endif
+1:
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                     # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_float: /* 0x2e */
+/* File: mips64/op_cmpg_float.S */
+/* File: mips64/fcmp.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * For: cmpl-float, cmpg-float
+     */
+    /* op vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_FLOAT f0, a2               # f0 <- vBB
+    GET_VREG_FLOAT f1, a3               # f1 <- vCC
+    cmp.eq.s f2, f0, f1
+    li      a0, 0
+    bc1nez  f2, 1f                      # done if vBB == vCC (ordered)
+    .if 1
+    cmp.lt.s f2, f0, f1
+    li      a0, -1
+    bc1nez  f2, 1f                      # done if vBB < vCC (ordered)
+    li      a0, 1                       # vBB > vCC or unordered
+    .else
+    cmp.lt.s f2, f1, f0
+    li      a0, 1
+    bc1nez  f2, 1f                      # done if vBB > vCC (ordered)
+    li      a0, -1                      # vBB < vCC or unordered
+    .endif
+1:
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                     # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_double: /* 0x2f */
+/* File: mips64/op_cmpl_double.S */
+/* File: mips64/fcmpWide.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * For: cmpl-double, cmpg-double
+     */
+    /* op vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_DOUBLE f0, a2              # f0 <- vBB
+    GET_VREG_DOUBLE f1, a3              # f1 <- vCC
+    cmp.eq.d f2, f0, f1
+    li      a0, 0
+    bc1nez  f2, 1f                      # done if vBB == vCC (ordered)
+    .if 0
+    cmp.lt.d f2, f0, f1
+    li      a0, -1
+    bc1nez  f2, 1f                      # done if vBB < vCC (ordered)
+    li      a0, 1                       # vBB > vCC or unordered
+    .else
+    cmp.lt.d f2, f1, f0
+    li      a0, 1
+    bc1nez  f2, 1f                      # done if vBB > vCC (ordered)
+    li      a0, -1                      # vBB < vCC or unordered
+    .endif
+1:
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                     # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_double: /* 0x30 */
+/* File: mips64/op_cmpg_double.S */
+/* File: mips64/fcmpWide.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * For: cmpl-double, cmpg-double
+     */
+    /* op vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_DOUBLE f0, a2              # f0 <- vBB
+    GET_VREG_DOUBLE f1, a3              # f1 <- vCC
+    cmp.eq.d f2, f0, f1
+    li      a0, 0
+    bc1nez  f2, 1f                      # done if vBB == vCC (ordered)
+    .if 1
+    cmp.lt.d f2, f0, f1
+    li      a0, -1
+    bc1nez  f2, 1f                      # done if vBB < vCC (ordered)
+    li      a0, 1                       # vBB > vCC or unordered
+    .else
+    cmp.lt.d f2, f1, f0
+    li      a0, 1
+    bc1nez  f2, 1f                      # done if vBB > vCC (ordered)
+    li      a0, -1                      # vBB < vCC or unordered
+    .endif
+1:
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                     # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmp_long: /* 0x31 */
+/* File: mips64/op_cmp_long.S */
+    /* cmp-long vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_WIDE a0, a2                # a0 <- vBB
+    GET_VREG_WIDE a1, a3                # a1 <- vCC
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    slt     a2, a0, a1
+    slt     a0, a1, a0
+    subu    a0, a0, a2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                     # vAA <- result
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eq: /* 0x32 */
+/* File: mips64/op_if_eq.S */
+/* File: mips64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform, e.g. for
+     * "if-le" you would use "le".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    .extern MterpProfileBranch
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    beqc a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ne: /* 0x33 */
+/* File: mips64/op_if_ne.S */
+/* File: mips64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform, e.g. for
+     * "if-le" you would use "le".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    .extern MterpProfileBranch
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    bnec a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lt: /* 0x34 */
+/* File: mips64/op_if_lt.S */
+/* File: mips64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform, e.g. for
+     * "if-le" you would use "le".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    .extern MterpProfileBranch
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    bltc a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ge: /* 0x35 */
+/* File: mips64/op_if_ge.S */
+/* File: mips64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform, e.g. for
+     * "if-le" you would use "le".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    .extern MterpProfileBranch
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    bgec a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gt: /* 0x36 */
+/* File: mips64/op_if_gt.S */
+/* File: mips64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform, e.g. for
+     * "if-le" you would use "le".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    .extern MterpProfileBranch
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    bgtc a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_le: /* 0x37 */
+/* File: mips64/op_if_le.S */
+/* File: mips64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform, e.g. for
+     * "if-le" you would use "le".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+    .extern MterpProfileBranch
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    blec a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eqz: /* 0x38 */
+/* File: mips64/op_if_eqz.S */
+/* File: mips64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform, e.g. for
+     * "if-lez" you would use "le".
+     *
+     * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
+    GET_VREG a0, a2                     # a0 <- vAA
+    beqzc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_nez: /* 0x39 */
+/* File: mips64/op_if_nez.S */
+/* File: mips64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform, e.g. for
+     * "if-lez" you would use "le".
+     *
+     * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
+    GET_VREG a0, a2                     # a0 <- vAA
+    bnezc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ltz: /* 0x3a */
+/* File: mips64/op_if_ltz.S */
+/* File: mips64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform, e.g. for
+     * "if-lez" you would use "le".
+     *
+     * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
+    GET_VREG a0, a2                     # a0 <- vAA
+    bltzc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gez: /* 0x3b */
+/* File: mips64/op_if_gez.S */
+/* File: mips64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform, e.g. for
+     * "if-lez" you would use "le".
+     *
+     * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
+    GET_VREG a0, a2                     # a0 <- vAA
+    bgezc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gtz: /* 0x3c */
+/* File: mips64/op_if_gtz.S */
+/* File: mips64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform, e.g. for
+     * "if-lez" you would use "le".
+     *
+     * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
+    GET_VREG a0, a2                     # a0 <- vAA
+    bgtzc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lez: /* 0x3d */
+/* File: mips64/op_if_lez.S */
+/* File: mips64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform, e.g. for
+     * "if-lez" you would use "le".
+     *
+     * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+    srl     a2, rINST, 8                # a2 <- AA
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
+    GET_VREG a0, a2                     # a0 <- vAA
+    blezc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3e: /* 0x3e */
+/* File: mips64/op_unused_3e.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3f: /* 0x3f */
+/* File: mips64/op_unused_3f.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_40: /* 0x40 */
+/* File: mips64/op_unused_40.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_41: /* 0x41 */
+/* File: mips64/op_unused_41.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_42: /* 0x42 */
+/* File: mips64/op_unused_42.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_43: /* 0x43 */
+/* File: mips64/op_unused_43.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget: /* 0x44 */
+/* File: mips64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    beqz    a0, common_errNullObject    # bail if null array object
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- arrayObj->length
+    .if 2
+    # [d]lsa does not support shift count of 0.
+    dlsa    a0, a1, a0, 2          # a0 <- arrayObj + index*width
+    .else
+    daddu   a0, a1, a0                  # a0 <- arrayObj + index*width
+    .endif
+    bgeu    a1, a3, common_errArrayIndex  # unsigned compare: index >= length, bail
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    lw   a2, MIRROR_INT_ARRAY_DATA_OFFSET(a0)        # a2 <- vBB[vCC]
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a2, a4                     # vAA <- a2
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_wide: /* 0x45 */
+/* File: mips64/op_aget_wide.S */
+    /*
+     * Array get, 64 bits.  vAA <- vBB[vCC].
+     *
+     */
+    /* aget-wide vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    beqz    a0, common_errNullObject    # bail if null array object
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- arrayObj->length
+    dlsa    a0, a1, a0, 3               # a0 <- arrayObj + index*width
+    bgeu    a1, a3, common_errArrayIndex  # unsigned compare: index >= length, bail
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    lw      a2, MIRROR_WIDE_ARRAY_DATA_OFFSET(a0)
+    lw      a3, (MIRROR_WIDE_ARRAY_DATA_OFFSET+4)(a0)
+    dinsu   a2, a3, 32, 32              # a2 <- vBB[vCC]
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a2, a4                # vAA <- a2
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_object: /* 0x46 */
+/* File: mips64/op_aget_object.S */
+    /*
+     * Array object get.  vAA <- vBB[vCC].
+     *
+     * for: aget-object
+     */
+    /* op vAA, vBB, vCC */
+    .extern artAGetObjectFromMterp
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    EXPORT_PC
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    jal     artAGetObjectFromMterp      # (array, index)
+    ld      a1, THREAD_EXCEPTION_OFFSET(rSELF)
+    srl     a4, rINST, 8                # a4 <- AA
+    PREFETCH_INST 2
+    bnez    a1, MterpException
+    SET_VREG_OBJECT v0, a4              # vAA <- v0
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_boolean: /* 0x47 */
+/* File: mips64/op_aget_boolean.S */
+/* File: mips64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    beqz    a0, common_errNullObject    # bail if null array object
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- arrayObj->length
+    .if 0
+    # [d]lsa does not support shift count of 0.
+    dlsa    a0, a1, a0, 0          # a0 <- arrayObj + index*width
+    .else
+    daddu   a0, a1, a0                  # a0 <- arrayObj + index*width
+    .endif
+    bgeu    a1, a3, common_errArrayIndex  # unsigned compare: index >= length, bail
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    lbu   a2, MIRROR_BOOLEAN_ARRAY_DATA_OFFSET(a0)        # a2 <- vBB[vCC]
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a2, a4                     # vAA <- a2
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_byte: /* 0x48 */
+/* File: mips64/op_aget_byte.S */
+/* File: mips64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    beqz    a0, common_errNullObject    # bail if null array object
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- arrayObj->length
+    .if 0
+    # [d]lsa does not support shift count of 0.
+    dlsa    a0, a1, a0, 0          # a0 <- arrayObj + index*width
+    .else
+    daddu   a0, a1, a0                  # a0 <- arrayObj + index*width
+    .endif
+    bgeu    a1, a3, common_errArrayIndex  # unsigned compare: index >= length, bail
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    lb   a2, MIRROR_BYTE_ARRAY_DATA_OFFSET(a0)        # a2 <- vBB[vCC]
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a2, a4                     # vAA <- a2
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_char: /* 0x49 */
+/* File: mips64/op_aget_char.S */
+/* File: mips64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    beqz    a0, common_errNullObject    # bail if null array object
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- arrayObj->length
+    .if 1
+    # [d]lsa does not support shift count of 0.
+    dlsa    a0, a1, a0, 1          # a0 <- arrayObj + index*width
+    .else
+    daddu   a0, a1, a0                  # a0 <- arrayObj + index*width
+    .endif
+    bgeu    a1, a3, common_errArrayIndex  # unsigned compare: index >= length, bail
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    lhu   a2, MIRROR_CHAR_ARRAY_DATA_OFFSET(a0)        # a2 <- vBB[vCC]
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a2, a4                     # vAA <- a2
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_short: /* 0x4a */
+/* File: mips64/op_aget_short.S */
+/* File: mips64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    beqz    a0, common_errNullObject    # bail if null array object
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- arrayObj->length
+    .if 1
+    # [d]lsa does not support shift count of 0.
+    dlsa    a0, a1, a0, 1          # a0 <- arrayObj + index*width
+    .else
+    daddu   a0, a1, a0                  # a0 <- arrayObj + index*width
+    .endif
+    bgeu    a1, a3, common_errArrayIndex  # unsigned compare: index >= length, bail
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    lh   a2, MIRROR_SHORT_ARRAY_DATA_OFFSET(a0)        # a2 <- vBB[vCC]
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a2, a4                     # vAA <- a2
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput: /* 0x4b */
+/* File: mips64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    beqz    a0, common_errNullObject    # bail if null array object
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- arrayObj->length
+    .if 2
+    # [d]lsa does not support shift count of 0.
+    dlsa    a0, a1, a0, 2          # a0 <- arrayObj + index*width
+    .else
+    daddu   a0, a1, a0                  # a0 <- arrayObj + index*width
+    .endif
+    bgeu    a1, a3, common_errArrayIndex  # unsigned compare: index >= length, bail
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_VREG a2, a4                     # a2 <- vAA
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    sw  a2, MIRROR_INT_ARRAY_DATA_OFFSET(a0)        # vBB[vCC] <- a2
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_wide: /* 0x4c */
+/* File: mips64/op_aput_wide.S */
+    /*
+     * Array put, 64 bits.  vBB[vCC] <- vAA.
+     *
+     */
+    /* aput-wide vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    beqz    a0, common_errNullObject    # bail if null array object
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- arrayObj->length
+    dlsa    a0, a1, a0, 3               # a0 <- arrayObj + index*width
+    bgeu    a1, a3, common_errArrayIndex  # unsigned compare: index >= length, bail
+    GET_VREG_WIDE a2, a4                # a2 <- vAA
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    sw      a2, MIRROR_WIDE_ARRAY_DATA_OFFSET(a0)
+    dsrl32  a2, a2, 0
+    sw      a2, (MIRROR_WIDE_ARRAY_DATA_OFFSET+4)(a0)  # vBB[vCC] <- a2
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_object: /* 0x4d */
+/* File: mips64/op_aput_object.S */
+    /*
+     * Store an object into an array.  vBB[vCC] <- vAA.
+     */
+    /* op vAA, vBB, vCC */
+    .extern MterpAputObject
+    EXPORT_PC
+    daddu   a0, rFP, OFF_FP_SHADOWFRAME
+    move    a1, rPC
+    move    a2, rINST
+    jal     MterpAputObject
+    beqzc   v0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_boolean: /* 0x4e */
+/* File: mips64/op_aput_boolean.S */
+/* File: mips64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    beqz    a0, common_errNullObject    # bail if null array object
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- arrayObj->length
+    .if 0
+    # [d]lsa does not support shift count of 0.
+    dlsa    a0, a1, a0, 0          # a0 <- arrayObj + index*width
+    .else
+    daddu   a0, a1, a0                  # a0 <- arrayObj + index*width
+    .endif
+    bgeu    a1, a3, common_errArrayIndex  # unsigned compare: index >= length, bail
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_VREG a2, a4                     # a2 <- vAA
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    sb  a2, MIRROR_BOOLEAN_ARRAY_DATA_OFFSET(a0)        # vBB[vCC] <- a2
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_byte: /* 0x4f */
+/* File: mips64/op_aput_byte.S */
+/* File: mips64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    beqz    a0, common_errNullObject    # bail if null array object
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- arrayObj->length
+    .if 0
+    # [d]lsa does not support shift count of 0.
+    dlsa    a0, a1, a0, 0          # a0 <- arrayObj + index*width
+    .else
+    daddu   a0, a1, a0                  # a0 <- arrayObj + index*width
+    .endif
+    bgeu    a1, a3, common_errArrayIndex  # unsigned compare: index >= length, bail
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_VREG a2, a4                     # a2 <- vAA
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    sb  a2, MIRROR_BYTE_ARRAY_DATA_OFFSET(a0)        # vBB[vCC] <- a2
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_char: /* 0x50 */
+/* File: mips64/op_aput_char.S */
+/* File: mips64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    beqz    a0, common_errNullObject    # bail if null array object
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- arrayObj->length
+    .if 1
+    # [d]lsa does not support shift count of 0.
+    dlsa    a0, a1, a0, 1          # a0 <- arrayObj + index*width
+    .else
+    daddu   a0, a1, a0                  # a0 <- arrayObj + index*width
+    .endif
+    bgeu    a1, a3, common_errArrayIndex  # unsigned compare: index >= length, bail
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_VREG a2, a4                     # a2 <- vAA
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    sh  a2, MIRROR_CHAR_ARRAY_DATA_OFFSET(a0)        # vBB[vCC] <- a2
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_short: /* 0x51 */
+/* File: mips64/op_aput_short.S */
+/* File: mips64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    srl     a4, rINST, 8                # a4 <- AA
+    GET_VREG_U a0, a2                   # a0 <- vBB (array object)
+    GET_VREG a1, a3                     # a1 <- vCC (requested index)
+    beqz    a0, common_errNullObject    # bail if null array object
+    lw      a3, MIRROR_ARRAY_LENGTH_OFFSET(a0)  # a3 <- arrayObj->length
+    .if 1
+    # [d]lsa does not support shift count of 0.
+    dlsa    a0, a1, a0, 1          # a0 <- arrayObj + index*width
+    .else
+    daddu   a0, a1, a0                  # a0 <- arrayObj + index*width
+    .endif
+    bgeu    a1, a3, common_errArrayIndex  # unsigned compare: index >= length, bail
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_VREG a2, a4                     # a2 <- vAA
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    sh  a2, MIRROR_SHORT_ARRAY_DATA_OFFSET(a0)        # vBB[vCC] <- a2
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget: /* 0x52 */
+/* File: mips64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    .extern artGet32InstanceFromCode
+    EXPORT_PC
+    lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
+    srl      a1, rINST, 12              # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
+    move     a3, rSELF                  # a3 <- self
+    jal      artGet32InstanceFromCode
+    ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    ext      a2, rINST, 8, 4            # a2 <- A
+    PREFETCH_INST 2
+    bnez     a3, MterpPossibleException # bail out
+    .if 0
+    SET_VREG_OBJECT v0, a2              # fp[A] <- v0
+    .else
+    SET_VREG v0, a2                     # fp[A] <- v0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide: /* 0x53 */
+/* File: mips64/op_iget_wide.S */
+    /*
+     * 64-bit instance field get.
+     *
+     * for: iget-wide
+     */
+    .extern artGet64InstanceFromCode
+    EXPORT_PC
+    lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
+    srl      a1, rINST, 12              # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
+    move     a3, rSELF                  # a3 <- self
+    jal      artGet64InstanceFromCode
+    ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    ext      a2, rINST, 8, 4            # a2 <- A
+    PREFETCH_INST 2
+    bnez     a3, MterpPossibleException # bail out
+    SET_VREG_WIDE v0, a2                # fp[A] <- v0
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object: /* 0x54 */
+/* File: mips64/op_iget_object.S */
+/* File: mips64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    .extern artGetObjInstanceFromCode
+    EXPORT_PC
+    lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
+    srl      a1, rINST, 12              # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
+    move     a3, rSELF                  # a3 <- self
+    jal      artGetObjInstanceFromCode
+    ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    ext      a2, rINST, 8, 4            # a2 <- A
+    PREFETCH_INST 2
+    bnez     a3, MterpPossibleException # bail out
+    .if 1
+    SET_VREG_OBJECT v0, a2              # fp[A] <- v0
+    .else
+    SET_VREG v0, a2                     # fp[A] <- v0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean: /* 0x55 */
+/* File: mips64/op_iget_boolean.S */
+/* File: mips64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    .extern artGetBooleanInstanceFromCode
+    EXPORT_PC
+    lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
+    srl      a1, rINST, 12              # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
+    move     a3, rSELF                  # a3 <- self
+    jal      artGetBooleanInstanceFromCode
+    ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    ext      a2, rINST, 8, 4            # a2 <- A
+    PREFETCH_INST 2
+    bnez     a3, MterpPossibleException # bail out
+    .if 0
+    SET_VREG_OBJECT v0, a2              # fp[A] <- v0
+    .else
+    SET_VREG v0, a2                     # fp[A] <- v0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte: /* 0x56 */
+/* File: mips64/op_iget_byte.S */
+/* File: mips64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    .extern artGetByteInstanceFromCode
+    EXPORT_PC
+    lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
+    srl      a1, rINST, 12              # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
+    move     a3, rSELF                  # a3 <- self
+    jal      artGetByteInstanceFromCode
+    ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    ext      a2, rINST, 8, 4            # a2 <- A
+    PREFETCH_INST 2
+    bnez     a3, MterpPossibleException # bail out
+    .if 0
+    SET_VREG_OBJECT v0, a2              # fp[A] <- v0
+    .else
+    SET_VREG v0, a2                     # fp[A] <- v0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char: /* 0x57 */
+/* File: mips64/op_iget_char.S */
+/* File: mips64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    .extern artGetCharInstanceFromCode
+    EXPORT_PC
+    lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
+    srl      a1, rINST, 12              # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
+    move     a3, rSELF                  # a3 <- self
+    jal      artGetCharInstanceFromCode
+    ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    ext      a2, rINST, 8, 4            # a2 <- A
+    PREFETCH_INST 2
+    bnez     a3, MterpPossibleException # bail out
+    .if 0
+    SET_VREG_OBJECT v0, a2              # fp[A] <- v0
+    .else
+    SET_VREG v0, a2                     # fp[A] <- v0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short: /* 0x58 */
+/* File: mips64/op_iget_short.S */
+/* File: mips64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    .extern artGetShortInstanceFromCode
+    EXPORT_PC
+    lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
+    srl      a1, rINST, 12              # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
+    move     a3, rSELF                  # a3 <- self
+    jal      artGetShortInstanceFromCode
+    ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    ext      a2, rINST, 8, 4            # a2 <- A
+    PREFETCH_INST 2
+    bnez     a3, MterpPossibleException # bail out
+    .if 0
+    SET_VREG_OBJECT v0, a2              # fp[A] <- v0
+    .else
+    SET_VREG v0, a2                     # fp[A] <- v0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput: /* 0x59 */
+/* File: mips64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet32InstanceFromMterp
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref CCCC
+    srl     a1, rINST, 12               # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ext     a2, rINST, 8, 4             # a2 <- A
+    GET_VREG a2, a2                     # a2 <- fp[A]
+    ld      a3, OFF_FP_METHOD(rFP)      # a3 <- referrer
+    PREFETCH_INST 2
+    jal     artSet32InstanceFromMterp
+    bnez    v0, MterpPossibleException  # bail out
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide: /* 0x5a */
+/* File: mips64/op_iput_wide.S */
+    /* iput-wide vA, vB, field//CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
+    srl      a1, rINST, 12              # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ext      a2, rINST, 8, 4            # a2 <- A
+    dlsa     a2, a2, rFP, 2             # a2 <- &fp[A]
+    ld       a3, OFF_FP_METHOD(rFP)     # a3 <- referrer
+    PREFETCH_INST 2
+    jal      artSet64InstanceFromMterp
+    bnez     v0, MterpPossibleException # bail out
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object: /* 0x5b */
+/* File: mips64/op_iput_object.S */
+    .extern MterpIputObject
+    EXPORT_PC
+    daddu   a0, rFP, OFF_FP_SHADOWFRAME
+    move    a1, rPC
+    move    a2, rINST
+    move    a3, rSELF
+    jal     MterpIputObject
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean: /* 0x5c */
+/* File: mips64/op_iput_boolean.S */
+/* File: mips64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref CCCC
+    srl     a1, rINST, 12               # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ext     a2, rINST, 8, 4             # a2 <- A
+    GET_VREG a2, a2                     # a2 <- fp[A]
+    ld      a3, OFF_FP_METHOD(rFP)      # a3 <- referrer
+    PREFETCH_INST 2
+    jal     artSet8InstanceFromMterp
+    bnez    v0, MterpPossibleException  # bail out
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte: /* 0x5d */
+/* File: mips64/op_iput_byte.S */
+/* File: mips64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref CCCC
+    srl     a1, rINST, 12               # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ext     a2, rINST, 8, 4             # a2 <- A
+    GET_VREG a2, a2                     # a2 <- fp[A]
+    ld      a3, OFF_FP_METHOD(rFP)      # a3 <- referrer
+    PREFETCH_INST 2
+    jal     artSet8InstanceFromMterp
+    bnez    v0, MterpPossibleException  # bail out
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char: /* 0x5e */
+/* File: mips64/op_iput_char.S */
+/* File: mips64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref CCCC
+    srl     a1, rINST, 12               # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ext     a2, rINST, 8, 4             # a2 <- A
+    GET_VREG a2, a2                     # a2 <- fp[A]
+    ld      a3, OFF_FP_METHOD(rFP)      # a3 <- referrer
+    PREFETCH_INST 2
+    jal     artSet16InstanceFromMterp
+    bnez    v0, MterpPossibleException  # bail out
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short: /* 0x5f */
+/* File: mips64/op_iput_short.S */
+/* File: mips64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref CCCC
+    srl     a1, rINST, 12               # a1 <- B
+    GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
+    ext     a2, rINST, 8, 4             # a2 <- A
+    GET_VREG a2, a2                     # a2 <- fp[A]
+    ld      a3, OFF_FP_METHOD(rFP)      # a3 <- referrer
+    PREFETCH_INST 2
+    jal     artSet16InstanceFromMterp
+    bnez    v0, MterpPossibleException  # bail out
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget: /* 0x60 */
+/* File: mips64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+    .extern artGet32StaticFromCode
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    ld      a1, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    jal     artGet32StaticFromCode
+    ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    srl     a2, rINST, 8                # a2 <- AA
+    
+    PREFETCH_INST 2
+    bnez    a3, MterpException          # bail out
+    .if 0
+    SET_VREG_OBJECT v0, a2              # fp[AA] <- v0
+    .else
+    SET_VREG v0, a2                     # fp[AA] <- v0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_wide: /* 0x61 */
+/* File: mips64/op_sget_wide.S */
+    /*
+     * SGET_WIDE handler wrapper.
+     *
+     */
+    /* sget-wide vAA, field//BBBB */
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    ld      a1, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    jal     artGet64StaticFromCode
+    ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    srl     a4, rINST, 8                # a4 <- AA
+    bnez    a3, MterpException          # bail out
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    SET_VREG_WIDE v0, a4
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_object: /* 0x62 */
+/* File: mips64/op_sget_object.S */
+/* File: mips64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+    .extern artGetObjStaticFromCode
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    ld      a1, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    jal     artGetObjStaticFromCode
+    ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    srl     a2, rINST, 8                # a2 <- AA
+    
+    PREFETCH_INST 2
+    bnez    a3, MterpException          # bail out
+    .if 1
+    SET_VREG_OBJECT v0, a2              # fp[AA] <- v0
+    .else
+    SET_VREG v0, a2                     # fp[AA] <- v0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_boolean: /* 0x63 */
+/* File: mips64/op_sget_boolean.S */
+/* File: mips64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+    .extern artGetBooleanStaticFromCode
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    ld      a1, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    jal     artGetBooleanStaticFromCode
+    ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    srl     a2, rINST, 8                # a2 <- AA
+    and v0, v0, 0xff
+    PREFETCH_INST 2
+    bnez    a3, MterpException          # bail out
+    .if 0
+    SET_VREG_OBJECT v0, a2              # fp[AA] <- v0
+    .else
+    SET_VREG v0, a2                     # fp[AA] <- v0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_byte: /* 0x64 */
+/* File: mips64/op_sget_byte.S */
+/* File: mips64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+    .extern artGetByteStaticFromCode
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    ld      a1, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    jal     artGetByteStaticFromCode
+    ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    srl     a2, rINST, 8                # a2 <- AA
+    seb v0, v0
+    PREFETCH_INST 2
+    bnez    a3, MterpException          # bail out
+    .if 0
+    SET_VREG_OBJECT v0, a2              # fp[AA] <- v0
+    .else
+    SET_VREG v0, a2                     # fp[AA] <- v0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_char: /* 0x65 */
+/* File: mips64/op_sget_char.S */
+/* File: mips64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+    .extern artGetCharStaticFromCode
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    ld      a1, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    jal     artGetCharStaticFromCode
+    ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    srl     a2, rINST, 8                # a2 <- AA
+    and v0, v0, 0xffff
+    PREFETCH_INST 2
+    bnez    a3, MterpException          # bail out
+    .if 0
+    SET_VREG_OBJECT v0, a2              # fp[AA] <- v0
+    .else
+    SET_VREG v0, a2                     # fp[AA] <- v0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_short: /* 0x66 */
+/* File: mips64/op_sget_short.S */
+/* File: mips64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+    .extern artGetShortStaticFromCode
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    ld      a1, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    jal     artGetShortStaticFromCode
+    ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    srl     a2, rINST, 8                # a2 <- AA
+    seh v0, v0
+    PREFETCH_INST 2
+    bnez    a3, MterpException          # bail out
+    .if 0
+    SET_VREG_OBJECT v0, a2              # fp[AA] <- v0
+    .else
+    SET_VREG v0, a2                     # fp[AA] <- v0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput: /* 0x67 */
+/* File: mips64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    .extern artSet32StaticFromCode
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    srl     a3, rINST, 8                # a3 <- AA
+    GET_VREG a1, a3                     # a1 <- fp[AA]
+    ld      a2, OFF_FP_METHOD(rFP)
+    move    a3, rSELF
+    PREFETCH_INST 2                     # Get next inst, but don't advance rPC
+    jal     artSet32StaticFromCode
+    bnezc   v0, MterpException          # 0 on success
+    ADVANCE 2                           # Past exception point - now advance rPC
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_wide: /* 0x68 */
+/* File: mips64/op_sput_wide.S */
+    /*
+     * SPUT_WIDE handler wrapper.
+     *
+     */
+    /* sput-wide vAA, field//BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    ld      a1, OFF_FP_METHOD(rFP)
+    srl     a2, rINST, 8                # a2 <- AA
+    dlsa    a2, a2, rFP, 2
+    move    a3, rSELF
+    PREFETCH_INST 2                     # Get next inst, but don't advance rPC
+    jal     artSet64IndirectStaticFromMterp
+    bnezc   v0, MterpException          # 0 on success, -1 on failure
+    ADVANCE 2                           # Past exception point - now advance rPC
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_object: /* 0x69 */
+/* File: mips64/op_sput_object.S */
+    .extern MterpSputObject
+    EXPORT_PC
+    daddu   a0, rFP, OFF_FP_SHADOWFRAME
+    move    a1, rPC
+    move    a2, rINST
+    move    a3, rSELF
+    jal     MterpSputObject
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_boolean: /* 0x6a */
+/* File: mips64/op_sput_boolean.S */
+/* File: mips64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    .extern artSet8StaticFromCode
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    srl     a3, rINST, 8                # a3 <- AA
+    GET_VREG a1, a3                     # a1 <- fp[AA]
+    ld      a2, OFF_FP_METHOD(rFP)
+    move    a3, rSELF
+    PREFETCH_INST 2                     # Get next inst, but don't advance rPC
+    jal     artSet8StaticFromCode
+    bnezc   v0, MterpException          # 0 on success
+    ADVANCE 2                           # Past exception point - now advance rPC
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_byte: /* 0x6b */
+/* File: mips64/op_sput_byte.S */
+/* File: mips64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    .extern artSet8StaticFromCode
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    srl     a3, rINST, 8                # a3 <- AA
+    GET_VREG a1, a3                     # a1 <- fp[AA]
+    ld      a2, OFF_FP_METHOD(rFP)
+    move    a3, rSELF
+    PREFETCH_INST 2                     # Get next inst, but don't advance rPC
+    jal     artSet8StaticFromCode
+    bnezc   v0, MterpException          # 0 on success
+    ADVANCE 2                           # Past exception point - now advance rPC
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_char: /* 0x6c */
+/* File: mips64/op_sput_char.S */
+/* File: mips64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    .extern artSet16StaticFromCode
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    srl     a3, rINST, 8                # a3 <- AA
+    GET_VREG a1, a3                     # a1 <- fp[AA]
+    ld      a2, OFF_FP_METHOD(rFP)
+    move    a3, rSELF
+    PREFETCH_INST 2                     # Get next inst, but don't advance rPC
+    jal     artSet16StaticFromCode
+    bnezc   v0, MterpException          # 0 on success
+    ADVANCE 2                           # Past exception point - now advance rPC
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_short: /* 0x6d */
+/* File: mips64/op_sput_short.S */
+/* File: mips64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    .extern artSet16StaticFromCode
+    EXPORT_PC
+    lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
+    srl     a3, rINST, 8                # a3 <- AA
+    GET_VREG a1, a3                     # a1 <- fp[AA]
+    ld      a2, OFF_FP_METHOD(rFP)
+    move    a3, rSELF
+    PREFETCH_INST 2                     # Get next inst, but don't advance rPC
+    jal     artSet16StaticFromCode
+    bnezc   v0, MterpException          # 0 on success
+    ADVANCE 2                           # Past exception point - now advance rPC
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual: /* 0x6e */
+/* File: mips64/op_invoke_virtual.S */
+/* File: mips64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtual
+    .extern MterpShouldSwitchInterpreters
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    jal     MterpInvokeVirtual
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
+
+    /*
+     * Handle a virtual method call.
+     *
+     * for: invoke-virtual, invoke-virtual/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super: /* 0x6f */
+/* File: mips64/op_invoke_super.S */
+/* File: mips64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuper
+    .extern MterpShouldSwitchInterpreters
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    jal     MterpInvokeSuper
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
+
+    /*
+     * Handle a "super" method call.
+     *
+     * for: invoke-super, invoke-super/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct: /* 0x70 */
+/* File: mips64/op_invoke_direct.S */
+/* File: mips64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirect
+    .extern MterpShouldSwitchInterpreters
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    jal     MterpInvokeDirect
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static: /* 0x71 */
+/* File: mips64/op_invoke_static.S */
+/* File: mips64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStatic
+    .extern MterpShouldSwitchInterpreters
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    jal     MterpInvokeStatic
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface: /* 0x72 */
+/* File: mips64/op_invoke_interface.S */
+/* File: mips64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterface
+    .extern MterpShouldSwitchInterpreters
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    jal     MterpInvokeInterface
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
+
+    /*
+     * Handle an interface method call.
+     *
+     * for: invoke-interface, invoke-interface/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void_no_barrier: /* 0x73 */
+/* File: mips64/op_return_void_no_barrier.S */
+    .extern MterpSuspendCheck
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    move    a0, rSELF
+    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqzc   ra, 1f
+    jal     MterpSuspendCheck           # (self)
+1:
+    li      a0, 0
+    b       MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range: /* 0x74 */
+/* File: mips64/op_invoke_virtual_range.S */
+/* File: mips64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualRange
+    .extern MterpShouldSwitchInterpreters
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    jal     MterpInvokeVirtualRange
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super_range: /* 0x75 */
+/* File: mips64/op_invoke_super_range.S */
+/* File: mips64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuperRange
+    .extern MterpShouldSwitchInterpreters
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    jal     MterpInvokeSuperRange
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct_range: /* 0x76 */
+/* File: mips64/op_invoke_direct_range.S */
+/* File: mips64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirectRange
+    .extern MterpShouldSwitchInterpreters
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    jal     MterpInvokeDirectRange
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static_range: /* 0x77 */
+/* File: mips64/op_invoke_static_range.S */
+/* File: mips64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStaticRange
+    .extern MterpShouldSwitchInterpreters
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    jal     MterpInvokeStaticRange
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface_range: /* 0x78 */
+/* File: mips64/op_invoke_interface_range.S */
+/* File: mips64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterfaceRange
+    .extern MterpShouldSwitchInterpreters
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    jal     MterpInvokeInterfaceRange
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_79: /* 0x79 */
+/* File: mips64/op_unused_79.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_7a: /* 0x7a */
+/* File: mips64/op_unused_7a.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_int: /* 0x7b */
+/* File: mips64/op_neg_int.S */
+/* File: mips64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "a0 = op a0".
+     *
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      not-int, neg-int
+     */
+    /* unop vA, vB */
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB
+    ext     a2, rINST, 8, 4             # a2 <- A
+                               # optional op
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    subu    a0, zero, a0                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                     # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_int: /* 0x7c */
+/* File: mips64/op_not_int.S */
+/* File: mips64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "a0 = op a0".
+     *
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      not-int, neg-int
+     */
+    /* unop vA, vB */
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB
+    ext     a2, rINST, 8, 4             # a2 <- A
+                               # optional op
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    nor     a0, zero, a0                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                     # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_long: /* 0x7d */
+/* File: mips64/op_neg_long.S */
+/* File: mips64/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "a0 = op a0".
+     *
+     * For: not-long, neg-long
+     */
+    /* unop vA, vB */
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_WIDE a0, a3                # a0 <- vB
+    ext     a2, rINST, 8, 4             # a2 <- A
+                               # optional op
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    dsubu   a0, zero, a0                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_long: /* 0x7e */
+/* File: mips64/op_not_long.S */
+/* File: mips64/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "a0 = op a0".
+     *
+     * For: not-long, neg-long
+     */
+    /* unop vA, vB */
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_WIDE a0, a3                # a0 <- vB
+    ext     a2, rINST, 8, 4             # a2 <- A
+                               # optional op
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    nor     a0, zero, a0                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_float: /* 0x7f */
+/* File: mips64/op_neg_float.S */
+/* File: mips64/fcvtHeader.S */
+    /*
+     * Loads a specified register from vB. Used primarily for conversions
+     * from or to a floating-point type.
+     *
+     * Sets up a1 = A and a2 = B. a2 is later used by fcvtFooter.S to
+     * store the result in vA and jump to the next instruction.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     */
+    ext     a1, rINST, 8, 4             # a1 <- A
+    srl     a2, rINST, 12               # a2 <- B
+    GET_VREG_FLOAT f0, a2
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+
+    neg.s   f0, f0
+/* File: mips64/fcvtFooter.S */
+    /*
+     * Stores a specified register containing the result of conversion
+     * from or to a floating-point type and jumps to the next instruction.
+     *
+     * Expects a1 to contain the destination Dalvik register number.
+     * a1 is set up by fcvtHeader.S.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     *
+     * Note that this file can't be included after a break in other files
+     * and in those files its contents appear as a copy.
+     * See: float-to-int, float-to-long, double-to-int, double-to-long.
+     */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_double: /* 0x80 */
+/* File: mips64/op_neg_double.S */
+/* File: mips64/fcvtHeader.S */
+    /*
+     * Loads a specified register from vB. Used primarily for conversions
+     * from or to a floating-point type.
+     *
+     * Sets up a1 = A and a2 = B. a2 is later used by fcvtFooter.S to
+     * store the result in vA and jump to the next instruction.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     */
+    ext     a1, rINST, 8, 4             # a1 <- A
+    srl     a2, rINST, 12               # a2 <- B
+    GET_VREG_DOUBLE f0, a2
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+
+    neg.d   f0, f0
+/* File: mips64/fcvtFooter.S */
+    /*
+     * Stores a specified register containing the result of conversion
+     * from or to a floating-point type and jumps to the next instruction.
+     *
+     * Expects a1 to contain the destination Dalvik register number.
+     * a1 is set up by fcvtHeader.S.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     *
+     * Note that this file can't be included after a break in other files
+     * and in those files its contents appear as a copy.
+     * See: float-to-int, float-to-long, double-to-int, double-to-long.
+     */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_long: /* 0x81 */
+/* File: mips64/op_int_to_long.S */
+    /* int-to-long vA, vB */
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB (sign-extended to 64 bits)
+    ext     a2, rINST, 8, 4             # a2 <- A
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2                # vA <- vB
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_float: /* 0x82 */
+/* File: mips64/op_int_to_float.S */
+    /*
+     * Conversion from or to floating-point happens in a floating-point register.
+     * Therefore we load the input and store the output into or from a
+     * floating-point register irrespective of the type.
+     */
+/* File: mips64/fcvtHeader.S */
+    /*
+     * Loads a specified register from vB. Used primarily for conversions
+     * from or to a floating-point type.
+     *
+     * Sets up a1 = A and a2 = B. a2 is later used by fcvtFooter.S to
+     * store the result in vA and jump to the next instruction.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     */
+    ext     a1, rINST, 8, 4             # a1 <- A
+    srl     a2, rINST, 12               # a2 <- B
+    GET_VREG_FLOAT f0, a2
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+
+    cvt.s.w f0, f0
+/* File: mips64/fcvtFooter.S */
+    /*
+     * Stores a specified register containing the result of conversion
+     * from or to a floating-point type and jumps to the next instruction.
+     *
+     * Expects a1 to contain the destination Dalvik register number.
+     * a1 is set up by fcvtHeader.S.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     *
+     * Note that this file can't be included after a break in other files
+     * and in those files its contents appear as a copy.
+     * See: float-to-int, float-to-long, double-to-int, double-to-long.
+     */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_double: /* 0x83 */
+/* File: mips64/op_int_to_double.S */
+    /*
+     * Conversion from or to floating-point happens in a floating-point register.
+     * Therefore we load the input and store the output into or from a
+     * floating-point register irrespective of the type.
+     */
+/* File: mips64/fcvtHeader.S */
+    /*
+     * Loads a specified register from vB. Used primarily for conversions
+     * from or to a floating-point type.
+     *
+     * Sets up a1 = A and a2 = B. a2 is later used by fcvtFooter.S to
+     * store the result in vA and jump to the next instruction.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     */
+    ext     a1, rINST, 8, 4             # a1 <- A
+    srl     a2, rINST, 12               # a2 <- B
+    GET_VREG_FLOAT f0, a2
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+
+    cvt.d.w f0, f0
+/* File: mips64/fcvtFooter.S */
+    /*
+     * Stores a specified register containing the result of conversion
+     * from or to a floating-point type and jumps to the next instruction.
+     *
+     * Expects a1 to contain the destination Dalvik register number.
+     * a1 is set up by fcvtHeader.S.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     *
+     * Note that this file can't be included after a break in other files
+     * and in those files its contents appear as a copy.
+     * See: float-to-int, float-to-long, double-to-int, double-to-long.
+     */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_int: /* 0x84 */
+/* File: mips64/op_long_to_int.S */
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+/* File: mips64/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_VREG a0, a3                     # a0 <- vB
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT a0, a2              # vA <- vB
+    .else
+    SET_VREG a0, a2                     # vA <- vB
+    .endif
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_float: /* 0x85 */
+/* File: mips64/op_long_to_float.S */
+    /*
+     * Conversion from or to floating-point happens in a floating-point register.
+     * Therefore we load the input and store the output into or from a
+     * floating-point register irrespective of the type.
+     */
+/* File: mips64/fcvtHeader.S */
+    /*
+     * Loads a specified register from vB. Used primarily for conversions
+     * from or to a floating-point type.
+     *
+     * Sets up a1 = A and a2 = B. a2 is later used by fcvtFooter.S to
+     * store the result in vA and jump to the next instruction.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     */
+    ext     a1, rINST, 8, 4             # a1 <- A
+    srl     a2, rINST, 12               # a2 <- B
+    GET_VREG_DOUBLE f0, a2
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+
+    cvt.s.l f0, f0
+/* File: mips64/fcvtFooter.S */
+    /*
+     * Stores a specified register containing the result of conversion
+     * from or to a floating-point type and jumps to the next instruction.
+     *
+     * Expects a1 to contain the destination Dalvik register number.
+     * a1 is set up by fcvtHeader.S.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     *
+     * Note that this file can't be included after a break in other files
+     * and in those files its contents appear as a copy.
+     * See: float-to-int, float-to-long, double-to-int, double-to-long.
+     */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_double: /* 0x86 */
+/* File: mips64/op_long_to_double.S */
+    /*
+     * Conversion from or to floating-point happens in a floating-point register.
+     * Therefore we load the input and store the output into or from a
+     * floating-point register irrespective of the type.
+     */
+/* File: mips64/fcvtHeader.S */
+    /*
+     * Loads a specified register from vB. Used primarily for conversions
+     * from or to a floating-point type.
+     *
+     * Sets up a1 = A and a2 = B. a2 is later used by fcvtFooter.S to
+     * store the result in vA and jump to the next instruction.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     */
+    ext     a1, rINST, 8, 4             # a1 <- A
+    srl     a2, rINST, 12               # a2 <- B
+    GET_VREG_DOUBLE f0, a2
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+
+    cvt.d.l f0, f0
+/* File: mips64/fcvtFooter.S */
+    /*
+     * Stores a specified register containing the result of conversion
+     * from or to a floating-point type and jumps to the next instruction.
+     *
+     * Expects a1 to contain the destination Dalvik register number.
+     * a1 is set up by fcvtHeader.S.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     *
+     * Note that this file can't be included after a break in other files
+     * and in those files its contents appear as a copy.
+     * See: float-to-int, float-to-long, double-to-int, double-to-long.
+     */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_int: /* 0x87 */
+/* File: mips64/op_float_to_int.S */
+/* File: mips64/fcvtHeader.S */
+    /*
+     * Loads a specified register from vB. Used primarily for conversions
+     * from or to a floating-point type.
+     *
+     * Sets up a1 = A and a2 = B. a2 is later used by fcvtFooter.S to
+     * store the result in vA and jump to the next instruction.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     */
+    ext     a1, rINST, 8, 4             # a1 <- A
+    srl     a2, rINST, 12               # a2 <- B
+    GET_VREG_FLOAT f0, a2
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    li      t0, INT_MIN_AS_FLOAT
+    mtc1    t0, f1
+    cmp.le.s f1, f1, f0
+    bc1nez  f1, .Lop_float_to_int_trunc
+    cmp.eq.s f1, f0, f0
+    li      t0, INT_MIN
+    mfc1    t1, f1
+    and     t0, t0, t1
+    b       .Lop_float_to_int_done
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_long: /* 0x88 */
+/* File: mips64/op_float_to_long.S */
+/* File: mips64/fcvtHeader.S */
+    /*
+     * Loads a specified register from vB. Used primarily for conversions
+     * from or to a floating-point type.
+     *
+     * Sets up a1 = A and a2 = B. a2 is later used by fcvtFooter.S to
+     * store the result in vA and jump to the next instruction.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     */
+    ext     a1, rINST, 8, 4             # a1 <- A
+    srl     a2, rINST, 12               # a2 <- B
+    GET_VREG_FLOAT f0, a2
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    li      t0, LONG_MIN_AS_FLOAT
+    mtc1    t0, f1
+    cmp.le.s f1, f1, f0
+    bc1nez  f1, .Lop_float_to_long_trunc
+    cmp.eq.s f1, f0, f0
+    dli     t0, LONG_MIN
+    mfc1    t1, f1
+    and     t0, t0, t1
+    b       .Lop_float_to_long_done
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_double: /* 0x89 */
+/* File: mips64/op_float_to_double.S */
+    /*
+     * Conversion from or to floating-point happens in a floating-point register.
+     * Therefore we load the input and store the output into or from a
+     * floating-point register irrespective of the type.
+     */
+/* File: mips64/fcvtHeader.S */
+    /*
+     * Loads a specified register from vB. Used primarily for conversions
+     * from or to a floating-point type.
+     *
+     * Sets up a1 = A and a2 = B. a2 is later used by fcvtFooter.S to
+     * store the result in vA and jump to the next instruction.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     */
+    ext     a1, rINST, 8, 4             # a1 <- A
+    srl     a2, rINST, 12               # a2 <- B
+    GET_VREG_FLOAT f0, a2
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+
+    cvt.d.s f0, f0
+/* File: mips64/fcvtFooter.S */
+    /*
+     * Stores a specified register containing the result of conversion
+     * from or to a floating-point type and jumps to the next instruction.
+     *
+     * Expects a1 to contain the destination Dalvik register number.
+     * a1 is set up by fcvtHeader.S.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     *
+     * Note that this file can't be included after a break in other files
+     * and in those files its contents appear as a copy.
+     * See: float-to-int, float-to-long, double-to-int, double-to-long.
+     */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_int: /* 0x8a */
+/* File: mips64/op_double_to_int.S */
+/* File: mips64/fcvtHeader.S */
+    /*
+     * Loads a specified register from vB. Used primarily for conversions
+     * from or to a floating-point type.
+     *
+     * Sets up a1 = A and a2 = B. a2 is later used by fcvtFooter.S to
+     * store the result in vA and jump to the next instruction.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     */
+    ext     a1, rINST, 8, 4             # a1 <- A
+    srl     a2, rINST, 12               # a2 <- B
+    GET_VREG_DOUBLE f0, a2
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    dli     t0, INT_MIN_AS_DOUBLE
+    dmtc1   t0, f1
+    cmp.le.d f1, f1, f0
+    bc1nez  f1, .Lop_double_to_int_trunc
+    cmp.eq.d f1, f0, f0
+    li      t0, INT_MIN
+    mfc1    t1, f1
+    and     t0, t0, t1
+    b       .Lop_double_to_int_done
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_long: /* 0x8b */
+/* File: mips64/op_double_to_long.S */
+/* File: mips64/fcvtHeader.S */
+    /*
+     * Loads a specified register from vB. Used primarily for conversions
+     * from or to a floating-point type.
+     *
+     * Sets up a1 = A and a2 = B. a2 is later used by fcvtFooter.S to
+     * store the result in vA and jump to the next instruction.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     */
+    ext     a1, rINST, 8, 4             # a1 <- A
+    srl     a2, rINST, 12               # a2 <- B
+    GET_VREG_DOUBLE f0, a2
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    dli     t0, LONG_MIN_AS_DOUBLE
+    dmtc1   t0, f1
+    cmp.le.d f1, f1, f0
+    bc1nez  f1, .Lop_double_to_long_trunc
+    cmp.eq.d f1, f0, f0
+    dli     t0, LONG_MIN
+    mfc1    t1, f1
+    and     t0, t0, t1
+    b       .Lop_double_to_long_done
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_float: /* 0x8c */
+/* File: mips64/op_double_to_float.S */
+    /*
+     * Conversion from or to floating-point happens in a floating-point register.
+     * Therefore we load the input and store the output into or from a
+     * floating-point register irrespective of the type.
+     */
+/* File: mips64/fcvtHeader.S */
+    /*
+     * Loads a specified register from vB. Used primarily for conversions
+     * from or to a floating-point type.
+     *
+     * Sets up a1 = A and a2 = B. a2 is later used by fcvtFooter.S to
+     * store the result in vA and jump to the next instruction.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     */
+    ext     a1, rINST, 8, 4             # a1 <- A
+    srl     a2, rINST, 12               # a2 <- B
+    GET_VREG_DOUBLE f0, a2
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+
+    cvt.s.d f0, f0
+/* File: mips64/fcvtFooter.S */
+    /*
+     * Stores a specified register containing the result of conversion
+     * from or to a floating-point type and jumps to the next instruction.
+     *
+     * Expects a1 to contain the destination Dalvik register number.
+     * a1 is set up by fcvtHeader.S.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     *
+     * Note that this file can't be included after a break in other files
+     * and in those files its contents appear as a copy.
+     * See: float-to-int, float-to-long, double-to-int, double-to-long.
+     */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_byte: /* 0x8d */
+/* File: mips64/op_int_to_byte.S */
+/* File: mips64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "a0 = op a0".
+     *
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      not-int, neg-int
+     */
+    /* unop vA, vB */
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB
+    ext     a2, rINST, 8, 4             # a2 <- A
+                               # optional op
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    seb     a0, a0                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                     # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_char: /* 0x8e */
+/* File: mips64/op_int_to_char.S */
+/* File: mips64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "a0 = op a0".
+     *
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      not-int, neg-int
+     */
+    /* unop vA, vB */
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB
+    ext     a2, rINST, 8, 4             # a2 <- A
+                               # optional op
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    and     a0, a0, 0xffff                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                     # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_short: /* 0x8f */
+/* File: mips64/op_int_to_short.S */
+/* File: mips64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "a0 = op a0".
+     *
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      not-int, neg-int
+     */
+    /* unop vA, vB */
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB
+    ext     a2, rINST, 8, 4             # a2 <- A
+                               # optional op
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    seh     a0, a0                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                     # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int: /* 0x90 */
+/* File: mips64/op_add_int.S */
+/* File: mips64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG a0, a2                     # a0 <- vBB
+    GET_VREG a1, a3                     # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    addu a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int: /* 0x91 */
+/* File: mips64/op_sub_int.S */
+/* File: mips64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG a0, a2                     # a0 <- vBB
+    GET_VREG a1, a3                     # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    subu a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int: /* 0x92 */
+/* File: mips64/op_mul_int.S */
+/* File: mips64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG a0, a2                     # a0 <- vBB
+    GET_VREG a1, a3                     # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    mul a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int: /* 0x93 */
+/* File: mips64/op_div_int.S */
+/* File: mips64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG a0, a2                     # a0 <- vBB
+    GET_VREG a1, a3                     # a1 <- vCC
+    .if 1
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    div a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int: /* 0x94 */
+/* File: mips64/op_rem_int.S */
+/* File: mips64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG a0, a2                     # a0 <- vBB
+    GET_VREG a1, a3                     # a1 <- vCC
+    .if 1
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    mod a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int: /* 0x95 */
+/* File: mips64/op_and_int.S */
+/* File: mips64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG a0, a2                     # a0 <- vBB
+    GET_VREG a1, a3                     # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    and a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int: /* 0x96 */
+/* File: mips64/op_or_int.S */
+/* File: mips64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG a0, a2                     # a0 <- vBB
+    GET_VREG a1, a3                     # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    or a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int: /* 0x97 */
+/* File: mips64/op_xor_int.S */
+/* File: mips64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG a0, a2                     # a0 <- vBB
+    GET_VREG a1, a3                     # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    xor a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int: /* 0x98 */
+/* File: mips64/op_shl_int.S */
+/* File: mips64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG a0, a2                     # a0 <- vBB
+    GET_VREG a1, a3                     # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    sll a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int: /* 0x99 */
+/* File: mips64/op_shr_int.S */
+/* File: mips64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG a0, a2                     # a0 <- vBB
+    GET_VREG a1, a3                     # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    sra a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int: /* 0x9a */
+/* File: mips64/op_ushr_int.S */
+/* File: mips64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG a0, a2                     # a0 <- vBB
+    GET_VREG a1, a3                     # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    srl a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a4                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long: /* 0x9b */
+/* File: mips64/op_add_long.S */
+/* File: mips64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_WIDE a0, a2                # a0 <- vBB
+    GET_VREG_WIDE a1, a3                # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    daddu a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a4           # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long: /* 0x9c */
+/* File: mips64/op_sub_long.S */
+/* File: mips64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_WIDE a0, a2                # a0 <- vBB
+    GET_VREG_WIDE a1, a3                # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    dsubu a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a4           # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long: /* 0x9d */
+/* File: mips64/op_mul_long.S */
+/* File: mips64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_WIDE a0, a2                # a0 <- vBB
+    GET_VREG_WIDE a1, a3                # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    dmul a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a4           # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long: /* 0x9e */
+/* File: mips64/op_div_long.S */
+/* File: mips64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_WIDE a0, a2                # a0 <- vBB
+    GET_VREG_WIDE a1, a3                # a1 <- vCC
+    .if 1
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    ddiv a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a4           # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long: /* 0x9f */
+/* File: mips64/op_rem_long.S */
+/* File: mips64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_WIDE a0, a2                # a0 <- vBB
+    GET_VREG_WIDE a1, a3                # a1 <- vCC
+    .if 1
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    dmod a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a4           # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long: /* 0xa0 */
+/* File: mips64/op_and_long.S */
+/* File: mips64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_WIDE a0, a2                # a0 <- vBB
+    GET_VREG_WIDE a1, a3                # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    and a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a4           # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long: /* 0xa1 */
+/* File: mips64/op_or_long.S */
+/* File: mips64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_WIDE a0, a2                # a0 <- vBB
+    GET_VREG_WIDE a1, a3                # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    or a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a4           # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long: /* 0xa2 */
+/* File: mips64/op_xor_long.S */
+/* File: mips64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_WIDE a0, a2                # a0 <- vBB
+    GET_VREG_WIDE a1, a3                # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    xor a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a4           # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long: /* 0xa3 */
+/* File: mips64/op_shl_long.S */
+/* File: mips64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_WIDE a0, a2                # a0 <- vBB
+    GET_VREG_WIDE a1, a3                # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    dsll a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a4           # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long: /* 0xa4 */
+/* File: mips64/op_shr_long.S */
+/* File: mips64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_WIDE a0, a2                # a0 <- vBB
+    GET_VREG_WIDE a1, a3                # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    dsra a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a4           # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long: /* 0xa5 */
+/* File: mips64/op_ushr_long.S */
+/* File: mips64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_WIDE a0, a2                # a0 <- vBB
+    GET_VREG_WIDE a1, a3                # a1 <- vCC
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    dsrl a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a4           # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float: /* 0xa6 */
+/* File: mips64/op_add_float.S */
+/* File: mips64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float.
+     * form: <op> f0, f0, f1
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_FLOAT f0, a2               # f0 <- vBB
+    GET_VREG_FLOAT f1, a3               # f1 <- vCC
+    add.s f0, f0, f1                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a4               # vAA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float: /* 0xa7 */
+/* File: mips64/op_sub_float.S */
+/* File: mips64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float.
+     * form: <op> f0, f0, f1
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_FLOAT f0, a2               # f0 <- vBB
+    GET_VREG_FLOAT f1, a3               # f1 <- vCC
+    sub.s f0, f0, f1                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a4               # vAA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float: /* 0xa8 */
+/* File: mips64/op_mul_float.S */
+/* File: mips64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float.
+     * form: <op> f0, f0, f1
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_FLOAT f0, a2               # f0 <- vBB
+    GET_VREG_FLOAT f1, a3               # f1 <- vCC
+    mul.s f0, f0, f1                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a4               # vAA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float: /* 0xa9 */
+/* File: mips64/op_div_float.S */
+/* File: mips64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float.
+     * form: <op> f0, f0, f1
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_FLOAT f0, a2               # f0 <- vBB
+    GET_VREG_FLOAT f1, a3               # f1 <- vCC
+    div.s f0, f0, f1                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a4               # vAA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float: /* 0xaa */
+/* File: mips64/op_rem_float.S */
+    /* rem-float vAA, vBB, vCC */
+    .extern fmodf
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_FLOAT f12, a2              # f12 <- vBB
+    GET_VREG_FLOAT f13, a3              # f13 <- vCC
+    jal     fmodf                       # f0 <- f12 op f13
+    srl     a4, rINST, 8                # a4 <- AA
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a4               # vAA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double: /* 0xab */
+/* File: mips64/op_add_double.S */
+/* File: mips64/fbinopWide.S */
+    /*:
+     * Generic 64-bit floating-point operation.
+     *
+     * For: add-double, sub-double, mul-double, div-double.
+     * form: <op> f0, f0, f1
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_DOUBLE f0, a2              # f0 <- vBB
+    GET_VREG_DOUBLE f1, a3              # f1 <- vCC
+    add.d f0, f0, f1                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a4              # vAA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double: /* 0xac */
+/* File: mips64/op_sub_double.S */
+/* File: mips64/fbinopWide.S */
+    /*:
+     * Generic 64-bit floating-point operation.
+     *
+     * For: add-double, sub-double, mul-double, div-double.
+     * form: <op> f0, f0, f1
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_DOUBLE f0, a2              # f0 <- vBB
+    GET_VREG_DOUBLE f1, a3              # f1 <- vCC
+    sub.d f0, f0, f1                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a4              # vAA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double: /* 0xad */
+/* File: mips64/op_mul_double.S */
+/* File: mips64/fbinopWide.S */
+    /*:
+     * Generic 64-bit floating-point operation.
+     *
+     * For: add-double, sub-double, mul-double, div-double.
+     * form: <op> f0, f0, f1
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_DOUBLE f0, a2              # f0 <- vBB
+    GET_VREG_DOUBLE f1, a3              # f1 <- vCC
+    mul.d f0, f0, f1                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a4              # vAA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double: /* 0xae */
+/* File: mips64/op_div_double.S */
+/* File: mips64/fbinopWide.S */
+    /*:
+     * Generic 64-bit floating-point operation.
+     *
+     * For: add-double, sub-double, mul-double, div-double.
+     * form: <op> f0, f0, f1
+     */
+    /* binop vAA, vBB, vCC */
+    srl     a4, rINST, 8                # a4 <- AA
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_DOUBLE f0, a2              # f0 <- vBB
+    GET_VREG_DOUBLE f1, a3              # f1 <- vCC
+    div.d f0, f0, f1                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a4              # vAA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double: /* 0xaf */
+/* File: mips64/op_rem_double.S */
+    /* rem-double vAA, vBB, vCC */
+    .extern fmod
+    lbu     a2, 2(rPC)                  # a2 <- BB
+    lbu     a3, 3(rPC)                  # a3 <- CC
+    GET_VREG_DOUBLE f12, a2             # f12 <- vBB
+    GET_VREG_DOUBLE f13, a3             # f13 <- vCC
+    jal     fmod                        # f0 <- f12 op f13
+    srl     a4, rINST, 8                # a4 <- AA
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a4              # vAA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_2addr: /* 0xb0 */
+/* File: mips64/op_add_int_2addr.S */
+/* File: mips64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    addu a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int_2addr: /* 0xb1 */
+/* File: mips64/op_sub_int_2addr.S */
+/* File: mips64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    subu a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_2addr: /* 0xb2 */
+/* File: mips64/op_mul_int_2addr.S */
+/* File: mips64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    mul a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_2addr: /* 0xb3 */
+/* File: mips64/op_div_int_2addr.S */
+/* File: mips64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    .if 1
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    div a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_2addr: /* 0xb4 */
+/* File: mips64/op_rem_int_2addr.S */
+/* File: mips64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    .if 1
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    mod a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_2addr: /* 0xb5 */
+/* File: mips64/op_and_int_2addr.S */
+/* File: mips64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    and a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_2addr: /* 0xb6 */
+/* File: mips64/op_or_int_2addr.S */
+/* File: mips64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    or a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_2addr: /* 0xb7 */
+/* File: mips64/op_xor_int_2addr.S */
+/* File: mips64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    xor a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_2addr: /* 0xb8 */
+/* File: mips64/op_shl_int_2addr.S */
+/* File: mips64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    sll a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_2addr: /* 0xb9 */
+/* File: mips64/op_shr_int_2addr.S */
+/* File: mips64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    sra a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_2addr: /* 0xba */
+/* File: mips64/op_ushr_int_2addr.S */
+/* File: mips64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a2                     # a0 <- vA
+    GET_VREG a1, a3                     # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    srl a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long_2addr: /* 0xbb */
+/* File: mips64/op_add_long_2addr.S */
+/* File: mips64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      rem-long/2addr, and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_WIDE a0, a2                # a0 <- vA
+    GET_VREG_WIDE a1, a3                # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    daddu a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2           # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long_2addr: /* 0xbc */
+/* File: mips64/op_sub_long_2addr.S */
+/* File: mips64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      rem-long/2addr, and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_WIDE a0, a2                # a0 <- vA
+    GET_VREG_WIDE a1, a3                # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    dsubu a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2           # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long_2addr: /* 0xbd */
+/* File: mips64/op_mul_long_2addr.S */
+/* File: mips64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      rem-long/2addr, and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_WIDE a0, a2                # a0 <- vA
+    GET_VREG_WIDE a1, a3                # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    dmul a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2           # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long_2addr: /* 0xbe */
+/* File: mips64/op_div_long_2addr.S */
+/* File: mips64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      rem-long/2addr, and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_WIDE a0, a2                # a0 <- vA
+    GET_VREG_WIDE a1, a3                # a1 <- vB
+    .if 1
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    ddiv a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2           # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long_2addr: /* 0xbf */
+/* File: mips64/op_rem_long_2addr.S */
+/* File: mips64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      rem-long/2addr, and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_WIDE a0, a2                # a0 <- vA
+    GET_VREG_WIDE a1, a3                # a1 <- vB
+    .if 1
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    dmod a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2           # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long_2addr: /* 0xc0 */
+/* File: mips64/op_and_long_2addr.S */
+/* File: mips64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      rem-long/2addr, and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_WIDE a0, a2                # a0 <- vA
+    GET_VREG_WIDE a1, a3                # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    and a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2           # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long_2addr: /* 0xc1 */
+/* File: mips64/op_or_long_2addr.S */
+/* File: mips64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      rem-long/2addr, and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_WIDE a0, a2                # a0 <- vA
+    GET_VREG_WIDE a1, a3                # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    or a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2           # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long_2addr: /* 0xc2 */
+/* File: mips64/op_xor_long_2addr.S */
+/* File: mips64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      rem-long/2addr, and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_WIDE a0, a2                # a0 <- vA
+    GET_VREG_WIDE a1, a3                # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    xor a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2           # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long_2addr: /* 0xc3 */
+/* File: mips64/op_shl_long_2addr.S */
+/* File: mips64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      rem-long/2addr, and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_WIDE a0, a2                # a0 <- vA
+    GET_VREG_WIDE a1, a3                # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    dsll a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2           # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long_2addr: /* 0xc4 */
+/* File: mips64/op_shr_long_2addr.S */
+/* File: mips64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      rem-long/2addr, and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_WIDE a0, a2                # a0 <- vA
+    GET_VREG_WIDE a1, a3                # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    dsra a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2           # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long_2addr: /* 0xc5 */
+/* File: mips64/op_ushr_long_2addr.S */
+/* File: mips64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be a MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vB (a1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (LONG_MIN / -1) here, because the CPU handles it
+     * correctly.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      rem-long/2addr, and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_WIDE a0, a2                # a0 <- vA
+    GET_VREG_WIDE a1, a3                # a1 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+                               # optional op
+    dsrl a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE a0, a2           # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float_2addr: /* 0xc6 */
+/* File: mips64/op_add_float_2addr.S */
+/* File: mips64/fbinop2addr.S */
+    /*:
+     * Generic 32-bit "/2addr" floating-point operation.
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr.
+     * form: <op> f0, f0, f1
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_FLOAT f0, a2               # f0 <- vA
+    GET_VREG_FLOAT f1, a3               # f1 <- vB
+    add.s f0, f0, f1                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a2               # vA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float_2addr: /* 0xc7 */
+/* File: mips64/op_sub_float_2addr.S */
+/* File: mips64/fbinop2addr.S */
+    /*:
+     * Generic 32-bit "/2addr" floating-point operation.
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr.
+     * form: <op> f0, f0, f1
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_FLOAT f0, a2               # f0 <- vA
+    GET_VREG_FLOAT f1, a3               # f1 <- vB
+    sub.s f0, f0, f1                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a2               # vA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float_2addr: /* 0xc8 */
+/* File: mips64/op_mul_float_2addr.S */
+/* File: mips64/fbinop2addr.S */
+    /*:
+     * Generic 32-bit "/2addr" floating-point operation.
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr.
+     * form: <op> f0, f0, f1
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_FLOAT f0, a2               # f0 <- vA
+    GET_VREG_FLOAT f1, a3               # f1 <- vB
+    mul.s f0, f0, f1                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a2               # vA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float_2addr: /* 0xc9 */
+/* File: mips64/op_div_float_2addr.S */
+/* File: mips64/fbinop2addr.S */
+    /*:
+     * Generic 32-bit "/2addr" floating-point operation.
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr.
+     * form: <op> f0, f0, f1
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_FLOAT f0, a2               # f0 <- vA
+    GET_VREG_FLOAT f1, a3               # f1 <- vB
+    div.s f0, f0, f1                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a2               # vA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float_2addr: /* 0xca */
+/* File: mips64/op_rem_float_2addr.S */
+    /* rem-float/2addr vA, vB */
+    .extern fmodf
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_FLOAT f12, a2              # f12 <- vA
+    GET_VREG_FLOAT f13, a3              # f13 <- vB
+    jal     fmodf                       # f0 <- f12 op f13
+    ext     a2, rINST, 8, 4             # a2 <- A
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a2               # vA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double_2addr: /* 0xcb */
+/* File: mips64/op_add_double_2addr.S */
+/* File: mips64/fbinopWide2addr.S */
+    /*:
+     * Generic 64-bit "/2addr" floating-point operation.
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr, div-double/2addr.
+     * form: <op> f0, f0, f1
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_DOUBLE f0, a2              # f0 <- vA
+    GET_VREG_DOUBLE f1, a3              # f1 <- vB
+    add.d f0, f0, f1                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a2              # vA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double_2addr: /* 0xcc */
+/* File: mips64/op_sub_double_2addr.S */
+/* File: mips64/fbinopWide2addr.S */
+    /*:
+     * Generic 64-bit "/2addr" floating-point operation.
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr, div-double/2addr.
+     * form: <op> f0, f0, f1
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_DOUBLE f0, a2              # f0 <- vA
+    GET_VREG_DOUBLE f1, a3              # f1 <- vB
+    sub.d f0, f0, f1                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a2              # vA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double_2addr: /* 0xcd */
+/* File: mips64/op_mul_double_2addr.S */
+/* File: mips64/fbinopWide2addr.S */
+    /*:
+     * Generic 64-bit "/2addr" floating-point operation.
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr, div-double/2addr.
+     * form: <op> f0, f0, f1
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_DOUBLE f0, a2              # f0 <- vA
+    GET_VREG_DOUBLE f1, a3              # f1 <- vB
+    mul.d f0, f0, f1                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a2              # vA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double_2addr: /* 0xce */
+/* File: mips64/op_div_double_2addr.S */
+/* File: mips64/fbinopWide2addr.S */
+    /*:
+     * Generic 64-bit "/2addr" floating-point operation.
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr, div-double/2addr.
+     * form: <op> f0, f0, f1
+     */
+    /* binop/2addr vA, vB */
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_DOUBLE f0, a2              # f0 <- vA
+    GET_VREG_DOUBLE f1, a3              # f1 <- vB
+    div.d f0, f0, f1                              # f0 <- f0 op f1
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a2              # vA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double_2addr: /* 0xcf */
+/* File: mips64/op_rem_double_2addr.S */
+    /* rem-double/2addr vA, vB */
+    .extern fmod
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG_DOUBLE f12, a2             # f12 <- vA
+    GET_VREG_DOUBLE f13, a3             # f13 <- vB
+    jal     fmod                        # f0 <- f12 op f13
+    ext     a2, rINST, 8, 4             # a2 <- A
+    FETCH_ADVANCE_INST 1                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a2              # vA <- f0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit16: /* 0xd0 */
+/* File: mips64/op_add_int_lit16.S */
+/* File: mips64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CCCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    lh      a1, 2(rPC)                  # a1 <- sign-extended CCCC
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    addu a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int: /* 0xd1 */
+/* File: mips64/op_rsub_int.S */
+/* File: mips64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CCCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    lh      a1, 2(rPC)                  # a1 <- sign-extended CCCC
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    subu a0, a1, a0                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit16: /* 0xd2 */
+/* File: mips64/op_mul_int_lit16.S */
+/* File: mips64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CCCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    lh      a1, 2(rPC)                  # a1 <- sign-extended CCCC
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    mul a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit16: /* 0xd3 */
+/* File: mips64/op_div_int_lit16.S */
+/* File: mips64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CCCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    lh      a1, 2(rPC)                  # a1 <- sign-extended CCCC
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB
+    .if 1
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    div a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit16: /* 0xd4 */
+/* File: mips64/op_rem_int_lit16.S */
+/* File: mips64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CCCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    lh      a1, 2(rPC)                  # a1 <- sign-extended CCCC
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB
+    .if 1
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    mod a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit16: /* 0xd5 */
+/* File: mips64/op_and_int_lit16.S */
+/* File: mips64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CCCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    lh      a1, 2(rPC)                  # a1 <- sign-extended CCCC
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    and a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit16: /* 0xd6 */
+/* File: mips64/op_or_int_lit16.S */
+/* File: mips64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CCCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    lh      a1, 2(rPC)                  # a1 <- sign-extended CCCC
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    or a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit16: /* 0xd7 */
+/* File: mips64/op_xor_int_lit16.S */
+/* File: mips64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CCCC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    lh      a1, 2(rPC)                  # a1 <- sign-extended CCCC
+    ext     a2, rINST, 8, 4             # a2 <- A
+    ext     a3, rINST, 12, 4            # a3 <- B
+    GET_VREG a0, a3                     # a0 <- vB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    xor a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit8: /* 0xd8 */
+/* File: mips64/op_add_int_lit8.S */
+/* File: mips64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    lbu     a3, 2(rPC)                  # a3 <- BB
+    lb      a1, 3(rPC)                  # a1 <- sign-extended CC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG a0, a3                     # a0 <- vBB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    addu a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int_lit8: /* 0xd9 */
+/* File: mips64/op_rsub_int_lit8.S */
+/* File: mips64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    lbu     a3, 2(rPC)                  # a3 <- BB
+    lb      a1, 3(rPC)                  # a1 <- sign-extended CC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG a0, a3                     # a0 <- vBB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    subu a0, a1, a0                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit8: /* 0xda */
+/* File: mips64/op_mul_int_lit8.S */
+/* File: mips64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    lbu     a3, 2(rPC)                  # a3 <- BB
+    lb      a1, 3(rPC)                  # a1 <- sign-extended CC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG a0, a3                     # a0 <- vBB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    mul a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit8: /* 0xdb */
+/* File: mips64/op_div_int_lit8.S */
+/* File: mips64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    lbu     a3, 2(rPC)                  # a3 <- BB
+    lb      a1, 3(rPC)                  # a1 <- sign-extended CC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG a0, a3                     # a0 <- vBB
+    .if 1
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    div a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit8: /* 0xdc */
+/* File: mips64/op_rem_int_lit8.S */
+/* File: mips64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    lbu     a3, 2(rPC)                  # a3 <- BB
+    lb      a1, 3(rPC)                  # a1 <- sign-extended CC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG a0, a3                     # a0 <- vBB
+    .if 1
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    mod a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit8: /* 0xdd */
+/* File: mips64/op_and_int_lit8.S */
+/* File: mips64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    lbu     a3, 2(rPC)                  # a3 <- BB
+    lb      a1, 3(rPC)                  # a1 <- sign-extended CC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG a0, a3                     # a0 <- vBB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    and a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit8: /* 0xde */
+/* File: mips64/op_or_int_lit8.S */
+/* File: mips64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    lbu     a3, 2(rPC)                  # a3 <- BB
+    lb      a1, 3(rPC)                  # a1 <- sign-extended CC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG a0, a3                     # a0 <- vBB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    or a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit8: /* 0xdf */
+/* File: mips64/op_xor_int_lit8.S */
+/* File: mips64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    lbu     a3, 2(rPC)                  # a3 <- BB
+    lb      a1, 3(rPC)                  # a1 <- sign-extended CC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG a0, a3                     # a0 <- vBB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    xor a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_lit8: /* 0xe0 */
+/* File: mips64/op_shl_int_lit8.S */
+/* File: mips64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    lbu     a3, 2(rPC)                  # a3 <- BB
+    lb      a1, 3(rPC)                  # a1 <- sign-extended CC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG a0, a3                     # a0 <- vBB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    sll a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_lit8: /* 0xe1 */
+/* File: mips64/op_shr_int_lit8.S */
+/* File: mips64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    lbu     a3, 2(rPC)                  # a3 <- BB
+    lb      a1, 3(rPC)                  # a1 <- sign-extended CC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG a0, a3                     # a0 <- vBB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    sra a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_lit8: /* 0xe2 */
+/* File: mips64/op_ushr_int_lit8.S */
+/* File: mips64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = a0 op a1".
+     * This could be an MIPS instruction or a function call.  (If the result
+     * comes back in a register other than a0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * CC (a1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    lbu     a3, 2(rPC)                  # a3 <- BB
+    lb      a1, 3(rPC)                  # a1 <- sign-extended CC
+    srl     a2, rINST, 8                # a2 <- AA
+    GET_VREG a0, a3                     # a0 <- vBB
+    .if 0
+    beqz    a1, common_errDivideByZero  # is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+                               # optional op
+    srl a0, a0, a1                              # a0 <- op, a0-a3 changed
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG a0, a2                # vAA <- a0
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_quick: /* 0xe3 */
+/* File: mips64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a1, 2(rPC)                  # a1 <- field byte offset
+    GET_VREG_U a3, a2                   # a3 <- object we're operating on
+    ext     a4, rINST, 8, 4             # a4 <- A
+    daddu   a1, a1, a3
+    beqz    a3, common_errNullObject    # object was null
+    lw   a0, 0(a1)                   # a0 <- obj.field
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    SET_VREG a0, a4                     # fp[A] <- a0
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide_quick: /* 0xe4 */
+/* File: mips64/op_iget_wide_quick.S */
+    /* iget-wide-quick vA, vB, offset//CCCC */
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a4, 2(rPC)                  # a4 <- field byte offset
+    GET_VREG_U a3, a2                   # a3 <- object we're operating on
+    ext     a2, rINST, 8, 4             # a2 <- A
+    beqz    a3, common_errNullObject    # object was null
+    daddu   a4, a3, a4                  # create direct pointer
+    lw      a0, 0(a4)
+    lw      a1, 4(a4)
+    dinsu   a0, a1, 32, 32
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    SET_VREG_WIDE a0, a2
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object_quick: /* 0xe5 */
+/* File: mips64/op_iget_object_quick.S */
+    /* For: iget-object-quick */
+    /* op vA, vB, offset//CCCC */
+    .extern artIGetObjectFromMterp
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a1, 2(rPC)                  # a1 <- field byte offset
+    EXPORT_PC
+    GET_VREG_U a0, a2                   # a0 <- object we're operating on
+    jal     artIGetObjectFromMterp      # (obj, offset)
+    ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
+    ext     a2, rINST, 8, 4             # a2 <- A
+    PREFETCH_INST 2
+    bnez    a3, MterpPossibleException  # bail out
+    SET_VREG_OBJECT v0, a2              # fp[A] <- v0
+    ADVANCE 2                           # advance rPC
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_quick: /* 0xe6 */
+/* File: mips64/op_iput_quick.S */
+    /* For: iput-quick, iput-boolean-quick, iput-byte-quick, iput-char-quick, iput-short-quick */
+    /* op vA, vB, offset//CCCC */
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a1, 2(rPC)                  # a1 <- field byte offset
+    GET_VREG_U a3, a2                   # a3 <- fp[B], the object pointer
+    ext     a2, rINST, 8, 4             # a2 <- A
+    beqz    a3, common_errNullObject    # object was null
+    GET_VREG a0, a2                     # a0 <- fp[A]
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    daddu   a1, a1, a3
+    sw  a0, 0(a1)                   # obj.field <- a0
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide_quick: /* 0xe7 */
+/* File: mips64/op_iput_wide_quick.S */
+    /* iput-wide-quick vA, vB, offset//CCCC */
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a3, 2(rPC)                  # a3 <- field byte offset
+    GET_VREG_U a2, a2                   # a2 <- fp[B], the object pointer
+    ext     a0, rINST, 8, 4             # a0 <- A
+    beqz    a2, common_errNullObject    # object was null
+    GET_VREG_WIDE a0, a0                # a0 <- fp[A]
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    daddu   a1, a2, a3                  # create a direct pointer
+    sw      a0, 0(a1)
+    dsrl32  a0, a0, 0
+    sw      a0, 4(a1)
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object_quick: /* 0xe8 */
+/* File: mips64/op_iput_object_quick.S */
+    .extern MterpIputObjectQuick
+    EXPORT_PC
+    daddu   a0, rFP, OFF_FP_SHADOWFRAME
+    move    a1, rPC
+    move    a2, rINST
+    jal     MterpIputObjectQuick
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_quick: /* 0xe9 */
+/* File: mips64/op_invoke_virtual_quick.S */
+/* File: mips64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuick
+    .extern MterpShouldSwitchInterpreters
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    jal     MterpInvokeVirtualQuick
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range_quick: /* 0xea */
+/* File: mips64/op_invoke_virtual_range_quick.S */
+/* File: mips64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuickRange
+    .extern MterpShouldSwitchInterpreters
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    move    a3, rINST
+    jal     MterpInvokeVirtualQuickRange
+    beqzc   v0, MterpException
+    FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean_quick: /* 0xeb */
+/* File: mips64/op_iput_boolean_quick.S */
+/* File: mips64/op_iput_quick.S */
+    /* For: iput-quick, iput-boolean-quick, iput-byte-quick, iput-char-quick, iput-short-quick */
+    /* op vA, vB, offset//CCCC */
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a1, 2(rPC)                  # a1 <- field byte offset
+    GET_VREG_U a3, a2                   # a3 <- fp[B], the object pointer
+    ext     a2, rINST, 8, 4             # a2 <- A
+    beqz    a3, common_errNullObject    # object was null
+    GET_VREG a0, a2                     # a0 <- fp[A]
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    daddu   a1, a1, a3
+    sb  a0, 0(a1)                   # obj.field <- a0
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte_quick: /* 0xec */
+/* File: mips64/op_iput_byte_quick.S */
+/* File: mips64/op_iput_quick.S */
+    /* For: iput-quick, iput-boolean-quick, iput-byte-quick, iput-char-quick, iput-short-quick */
+    /* op vA, vB, offset//CCCC */
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a1, 2(rPC)                  # a1 <- field byte offset
+    GET_VREG_U a3, a2                   # a3 <- fp[B], the object pointer
+    ext     a2, rINST, 8, 4             # a2 <- A
+    beqz    a3, common_errNullObject    # object was null
+    GET_VREG a0, a2                     # a0 <- fp[A]
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    daddu   a1, a1, a3
+    sb  a0, 0(a1)                   # obj.field <- a0
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char_quick: /* 0xed */
+/* File: mips64/op_iput_char_quick.S */
+/* File: mips64/op_iput_quick.S */
+    /* For: iput-quick, iput-boolean-quick, iput-byte-quick, iput-char-quick, iput-short-quick */
+    /* op vA, vB, offset//CCCC */
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a1, 2(rPC)                  # a1 <- field byte offset
+    GET_VREG_U a3, a2                   # a3 <- fp[B], the object pointer
+    ext     a2, rINST, 8, 4             # a2 <- A
+    beqz    a3, common_errNullObject    # object was null
+    GET_VREG a0, a2                     # a0 <- fp[A]
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    daddu   a1, a1, a3
+    sh  a0, 0(a1)                   # obj.field <- a0
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short_quick: /* 0xee */
+/* File: mips64/op_iput_short_quick.S */
+/* File: mips64/op_iput_quick.S */
+    /* For: iput-quick, iput-boolean-quick, iput-byte-quick, iput-char-quick, iput-short-quick */
+    /* op vA, vB, offset//CCCC */
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a1, 2(rPC)                  # a1 <- field byte offset
+    GET_VREG_U a3, a2                   # a3 <- fp[B], the object pointer
+    ext     a2, rINST, 8, 4             # a2 <- A
+    beqz    a3, common_errNullObject    # object was null
+    GET_VREG a0, a2                     # a0 <- fp[A]
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    daddu   a1, a1, a3
+    sh  a0, 0(a1)                   # obj.field <- a0
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean_quick: /* 0xef */
+/* File: mips64/op_iget_boolean_quick.S */
+/* File: mips64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a1, 2(rPC)                  # a1 <- field byte offset
+    GET_VREG_U a3, a2                   # a3 <- object we're operating on
+    ext     a4, rINST, 8, 4             # a4 <- A
+    daddu   a1, a1, a3
+    beqz    a3, common_errNullObject    # object was null
+    lbu   a0, 0(a1)                   # a0 <- obj.field
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    SET_VREG a0, a4                     # fp[A] <- a0
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte_quick: /* 0xf0 */
+/* File: mips64/op_iget_byte_quick.S */
+/* File: mips64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a1, 2(rPC)                  # a1 <- field byte offset
+    GET_VREG_U a3, a2                   # a3 <- object we're operating on
+    ext     a4, rINST, 8, 4             # a4 <- A
+    daddu   a1, a1, a3
+    beqz    a3, common_errNullObject    # object was null
+    lb   a0, 0(a1)                   # a0 <- obj.field
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    SET_VREG a0, a4                     # fp[A] <- a0
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char_quick: /* 0xf1 */
+/* File: mips64/op_iget_char_quick.S */
+/* File: mips64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a1, 2(rPC)                  # a1 <- field byte offset
+    GET_VREG_U a3, a2                   # a3 <- object we're operating on
+    ext     a4, rINST, 8, 4             # a4 <- A
+    daddu   a1, a1, a3
+    beqz    a3, common_errNullObject    # object was null
+    lhu   a0, 0(a1)                   # a0 <- obj.field
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    SET_VREG a0, a4                     # fp[A] <- a0
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short_quick: /* 0xf2 */
+/* File: mips64/op_iget_short_quick.S */
+/* File: mips64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    srl     a2, rINST, 12               # a2 <- B
+    lhu     a1, 2(rPC)                  # a1 <- field byte offset
+    GET_VREG_U a3, a2                   # a3 <- object we're operating on
+    ext     a4, rINST, 8, 4             # a4 <- A
+    daddu   a1, a1, a3
+    beqz    a3, common_errNullObject    # object was null
+    lh   a0, 0(a1)                   # a0 <- obj.field
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
+    SET_VREG a0, a4                     # fp[A] <- a0
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f3: /* 0xf3 */
+/* File: mips64/op_unused_f3.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f4: /* 0xf4 */
+/* File: mips64/op_unused_f4.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f5: /* 0xf5 */
+/* File: mips64/op_unused_f5.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f6: /* 0xf6 */
+/* File: mips64/op_unused_f6.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f7: /* 0xf7 */
+/* File: mips64/op_unused_f7.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f8: /* 0xf8 */
+/* File: mips64/op_unused_f8.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f9: /* 0xf9 */
+/* File: mips64/op_unused_f9.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fa: /* 0xfa */
+/* File: mips64/op_unused_fa.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fb: /* 0xfb */
+/* File: mips64/op_unused_fb.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fc: /* 0xfc */
+/* File: mips64/op_unused_fc.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fd: /* 0xfd */
+/* File: mips64/op_unused_fd.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fe: /* 0xfe */
+/* File: mips64/op_unused_fe.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_ff: /* 0xff */
+/* File: mips64/op_unused_ff.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    b       MterpFallback
+
+
+    .balign 128
+    .size   artMterpAsmInstructionStart, .-artMterpAsmInstructionStart
+    .global artMterpAsmInstructionEnd
+artMterpAsmInstructionEnd:
+
+/*
+ * ===========================================================================
+ *  Sister implementations
+ * ===========================================================================
+ */
+    .global artMterpAsmSisterStart
+    .type   artMterpAsmSisterStart, %function
+    .text
+    .balign 4
+artMterpAsmSisterStart:
+
+/* continuation for op_float_to_int */
+.Lop_float_to_int_trunc:
+    trunc.w.s f0, f0
+    mfc1    t0, f0
+.Lop_float_to_int_done:
+    /* Can't include fcvtFooter.S after break */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG t0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* continuation for op_float_to_long */
+.Lop_float_to_long_trunc:
+    trunc.l.s f0, f0
+    dmfc1   t0, f0
+.Lop_float_to_long_done:
+    /* Can't include fcvtFooter.S after break */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE t0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* continuation for op_double_to_int */
+.Lop_double_to_int_trunc:
+    trunc.w.d f0, f0
+    mfc1    t0, f0
+.Lop_double_to_int_done:
+    /* Can't include fcvtFooter.S after break */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG t0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/* continuation for op_double_to_long */
+.Lop_double_to_long_trunc:
+    trunc.l.d f0, f0
+    dmfc1   t0, f0
+.Lop_double_to_long_done:
+    /* Can't include fcvtFooter.S after break */
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_WIDE t0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
+
+    .size   artMterpAsmSisterStart, .-artMterpAsmSisterStart
+    .global artMterpAsmSisterEnd
+artMterpAsmSisterEnd:
+
+
+    .global artMterpAsmAltInstructionStart
+    .type   artMterpAsmAltInstructionStart, %function
+    .text
+
+artMterpAsmAltInstructionStart = .L_ALT_op_nop
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_nop: /* 0x00 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (0 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move: /* 0x01 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (1 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_from16: /* 0x02 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (2 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_16: /* 0x03 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (3 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide: /* 0x04 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (4 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_from16: /* 0x05 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (5 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_16: /* 0x06 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (6 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object: /* 0x07 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (7 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_from16: /* 0x08 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (8 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_16: /* 0x09 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (9 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result: /* 0x0a */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (10 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_wide: /* 0x0b */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (11 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_object: /* 0x0c */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (12 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_exception: /* 0x0d */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (13 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void: /* 0x0e */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (14 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return: /* 0x0f */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (15 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_wide: /* 0x10 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (16 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_object: /* 0x11 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (17 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_4: /* 0x12 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (18 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_16: /* 0x13 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (19 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const: /* 0x14 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (20 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_high16: /* 0x15 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (21 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_16: /* 0x16 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (22 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_32: /* 0x17 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (23 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide: /* 0x18 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (24 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_high16: /* 0x19 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (25 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string: /* 0x1a */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (26 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string_jumbo: /* 0x1b */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (27 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_class: /* 0x1c */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (28 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_enter: /* 0x1d */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (29 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_exit: /* 0x1e */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (30 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_check_cast: /* 0x1f */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (31 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_instance_of: /* 0x20 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (32 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_array_length: /* 0x21 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (33 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_instance: /* 0x22 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (34 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_array: /* 0x23 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (35 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array: /* 0x24 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (36 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array_range: /* 0x25 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (37 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_fill_array_data: /* 0x26 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (38 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_throw: /* 0x27 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (39 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto: /* 0x28 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (40 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_16: /* 0x29 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (41 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_32: /* 0x2a */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (42 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_packed_switch: /* 0x2b */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (43 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sparse_switch: /* 0x2c */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (44 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_float: /* 0x2d */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (45 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_float: /* 0x2e */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (46 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_double: /* 0x2f */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (47 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_double: /* 0x30 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (48 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmp_long: /* 0x31 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (49 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eq: /* 0x32 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (50 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ne: /* 0x33 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (51 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lt: /* 0x34 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (52 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ge: /* 0x35 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (53 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gt: /* 0x36 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (54 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_le: /* 0x37 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (55 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eqz: /* 0x38 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (56 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_nez: /* 0x39 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (57 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ltz: /* 0x3a */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (58 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gez: /* 0x3b */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (59 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gtz: /* 0x3c */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (60 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lez: /* 0x3d */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (61 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3e: /* 0x3e */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (62 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3f: /* 0x3f */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (63 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_40: /* 0x40 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (64 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_41: /* 0x41 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (65 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_42: /* 0x42 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (66 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_43: /* 0x43 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (67 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget: /* 0x44 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (68 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_wide: /* 0x45 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (69 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_object: /* 0x46 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (70 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_boolean: /* 0x47 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (71 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_byte: /* 0x48 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (72 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_char: /* 0x49 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (73 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_short: /* 0x4a */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (74 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput: /* 0x4b */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (75 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_wide: /* 0x4c */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (76 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_object: /* 0x4d */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (77 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_boolean: /* 0x4e */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (78 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_byte: /* 0x4f */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (79 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_char: /* 0x50 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (80 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_short: /* 0x51 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (81 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget: /* 0x52 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (82 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide: /* 0x53 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (83 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object: /* 0x54 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (84 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean: /* 0x55 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (85 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte: /* 0x56 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (86 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char: /* 0x57 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (87 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short: /* 0x58 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (88 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput: /* 0x59 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (89 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide: /* 0x5a */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (90 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object: /* 0x5b */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (91 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean: /* 0x5c */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (92 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte: /* 0x5d */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (93 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char: /* 0x5e */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (94 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short: /* 0x5f */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (95 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget: /* 0x60 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (96 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_wide: /* 0x61 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (97 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_object: /* 0x62 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (98 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_boolean: /* 0x63 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (99 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_byte: /* 0x64 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (100 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_char: /* 0x65 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (101 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_short: /* 0x66 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (102 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput: /* 0x67 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (103 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_wide: /* 0x68 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (104 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_object: /* 0x69 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (105 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_boolean: /* 0x6a */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (106 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_byte: /* 0x6b */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (107 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_char: /* 0x6c */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (108 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_short: /* 0x6d */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (109 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual: /* 0x6e */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (110 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super: /* 0x6f */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (111 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct: /* 0x70 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (112 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static: /* 0x71 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (113 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface: /* 0x72 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (114 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void_no_barrier: /* 0x73 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (115 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range: /* 0x74 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (116 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super_range: /* 0x75 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (117 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct_range: /* 0x76 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (118 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static_range: /* 0x77 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (119 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface_range: /* 0x78 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (120 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_79: /* 0x79 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (121 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_7a: /* 0x7a */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (122 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_int: /* 0x7b */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (123 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_int: /* 0x7c */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (124 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_long: /* 0x7d */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (125 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_long: /* 0x7e */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (126 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_float: /* 0x7f */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (127 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_double: /* 0x80 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (128 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_long: /* 0x81 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (129 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_float: /* 0x82 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (130 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_double: /* 0x83 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (131 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_int: /* 0x84 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (132 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_float: /* 0x85 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (133 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_double: /* 0x86 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (134 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_int: /* 0x87 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (135 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_long: /* 0x88 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (136 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_double: /* 0x89 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (137 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_int: /* 0x8a */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (138 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_long: /* 0x8b */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (139 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_float: /* 0x8c */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (140 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_byte: /* 0x8d */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (141 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_char: /* 0x8e */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (142 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_short: /* 0x8f */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (143 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int: /* 0x90 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (144 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int: /* 0x91 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (145 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int: /* 0x92 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (146 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int: /* 0x93 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (147 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int: /* 0x94 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (148 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int: /* 0x95 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (149 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int: /* 0x96 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (150 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int: /* 0x97 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (151 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int: /* 0x98 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (152 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int: /* 0x99 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (153 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int: /* 0x9a */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (154 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long: /* 0x9b */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (155 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long: /* 0x9c */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (156 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long: /* 0x9d */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (157 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long: /* 0x9e */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (158 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long: /* 0x9f */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (159 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long: /* 0xa0 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (160 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long: /* 0xa1 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (161 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long: /* 0xa2 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (162 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long: /* 0xa3 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (163 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long: /* 0xa4 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (164 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long: /* 0xa5 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (165 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float: /* 0xa6 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (166 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float: /* 0xa7 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (167 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float: /* 0xa8 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (168 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float: /* 0xa9 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (169 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float: /* 0xaa */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (170 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double: /* 0xab */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (171 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double: /* 0xac */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (172 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double: /* 0xad */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (173 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double: /* 0xae */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (174 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double: /* 0xaf */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (175 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_2addr: /* 0xb0 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (176 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int_2addr: /* 0xb1 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (177 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_2addr: /* 0xb2 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (178 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_2addr: /* 0xb3 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (179 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_2addr: /* 0xb4 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (180 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_2addr: /* 0xb5 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (181 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_2addr: /* 0xb6 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (182 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_2addr: /* 0xb7 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (183 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_2addr: /* 0xb8 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (184 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_2addr: /* 0xb9 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (185 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_2addr: /* 0xba */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (186 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long_2addr: /* 0xbb */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (187 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long_2addr: /* 0xbc */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (188 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long_2addr: /* 0xbd */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (189 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long_2addr: /* 0xbe */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (190 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long_2addr: /* 0xbf */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (191 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long_2addr: /* 0xc0 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (192 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long_2addr: /* 0xc1 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (193 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long_2addr: /* 0xc2 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (194 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long_2addr: /* 0xc3 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (195 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long_2addr: /* 0xc4 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (196 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long_2addr: /* 0xc5 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (197 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float_2addr: /* 0xc6 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (198 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float_2addr: /* 0xc7 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (199 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float_2addr: /* 0xc8 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (200 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float_2addr: /* 0xc9 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (201 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float_2addr: /* 0xca */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (202 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double_2addr: /* 0xcb */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (203 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double_2addr: /* 0xcc */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (204 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double_2addr: /* 0xcd */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (205 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double_2addr: /* 0xce */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (206 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double_2addr: /* 0xcf */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (207 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit16: /* 0xd0 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (208 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int: /* 0xd1 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (209 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit16: /* 0xd2 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (210 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit16: /* 0xd3 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (211 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit16: /* 0xd4 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (212 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit16: /* 0xd5 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (213 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit16: /* 0xd6 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (214 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit16: /* 0xd7 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (215 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit8: /* 0xd8 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (216 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int_lit8: /* 0xd9 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (217 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit8: /* 0xda */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (218 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit8: /* 0xdb */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (219 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit8: /* 0xdc */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (220 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit8: /* 0xdd */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (221 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit8: /* 0xde */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (222 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit8: /* 0xdf */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (223 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_lit8: /* 0xe0 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (224 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_lit8: /* 0xe1 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (225 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_lit8: /* 0xe2 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (226 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_quick: /* 0xe3 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (227 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide_quick: /* 0xe4 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (228 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object_quick: /* 0xe5 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (229 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_quick: /* 0xe6 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (230 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide_quick: /* 0xe7 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (231 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object_quick: /* 0xe8 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (232 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_quick: /* 0xe9 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (233 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range_quick: /* 0xea */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (234 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean_quick: /* 0xeb */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (235 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte_quick: /* 0xec */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (236 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char_quick: /* 0xed */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (237 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short_quick: /* 0xee */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (238 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean_quick: /* 0xef */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (239 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte_quick: /* 0xf0 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (240 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char_quick: /* 0xf1 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (241 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short_quick: /* 0xf2 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (242 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f3: /* 0xf3 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (243 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f4: /* 0xf4 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (244 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f5: /* 0xf5 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (245 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f6: /* 0xf6 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (246 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f7: /* 0xf7 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (247 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f8: /* 0xf8 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (248 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f9: /* 0xf9 */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (249 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fa: /* 0xfa */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (250 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fb: /* 0xfb */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (251 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fc: /* 0xfc */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (252 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fd: /* 0xfd */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (253 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fe: /* 0xfe */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (254 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_ff: /* 0xff */
+/* File: mips64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    dla     ra, artMterpAsmInstructionStart
+    dla     t9, MterpCheckBefore
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
+    daddu   ra, ra, (255 * 128)            # Addr of primary handler.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+    .balign 128
+    .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
+    .global artMterpAsmAltInstructionEnd
+artMterpAsmAltInstructionEnd:
+/* File: mips64/footer.S */
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+
+    .extern MterpLogDivideByZeroException
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    jal     MterpLogDivideByZeroException
+#endif
+    b       MterpCommonFallback
+
+    .extern MterpLogArrayIndexException
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    jal     MterpLogArrayIndexException
+#endif
+    b       MterpCommonFallback
+
+    .extern MterpLogNullObjectException
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    jal     MterpLogNullObjectException
+#endif
+    b       MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    ld      a0, THREAD_EXCEPTION_OFFSET(rSELF)
+    beqzc   a0, MterpFallback                       # If not, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+    .extern MterpHandleException
+    .extern MterpShouldSwitchInterpreters
+MterpException:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    jal     MterpHandleException                    # (self, shadow_frame)
+    beqzc   v0, MterpExceptionReturn                # no local catch, back to caller.
+    ld      a0, OFF_FP_CODE_ITEM(rFP)
+    lwu     a1, OFF_FP_DEX_PC(rFP)
+    REFRESH_IBASE
+    daddu   rPC, a0, CODEITEM_INSNS_OFFSET
+    dlsa    rPC, a1, rPC, 1                         # generate new dex_pc_ptr
+    /* Do we need to switch interpreters? */
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
+    /* resume execution at catch block */
+    EXPORT_PC
+    FETCH_INST
+    GET_INST_OPCODE v0
+    GOTO_OPCODE v0
+    /* NOTE: no fallthrough */
+
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 64 bits)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
+ */
+MterpCommonTakenBranchNoFlags:
+    bgtzc   rINST, .L_forward_branch    # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+    li      v0, JIT_CHECK_OSR
+    beqc    rPROFILE, v0, .L_osr_check
+    bltc    rPROFILE, v0, .L_resume_backward_branch
+    dsubu   rPROFILE, 1
+    beqzc   rPROFILE, .L_add_batch      # counted down to zero - report
+.L_resume_backward_branch:
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    REFRESH_IBASE
+    daddu   a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB a2            # update rPC, load rINST
+    and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    bnezc   ra, .L_suspend_request_pending
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_suspend_request_pending:
+    EXPORT_PC
+    move    a0, rSELF
+    jal     MterpSuspendCheck           # (self)
+    bnezc   v0, MterpFallback
+    REFRESH_IBASE                       # might have changed during suspend
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_no_count_backwards:
+    li      v0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    bnec    rPROFILE, v0, .L_resume_backward_branch
+.L_osr_check:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC
+    jal MterpMaybeDoOnStackReplacement  # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    li      v0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    beqc    rPROFILE, v0, .L_check_osr_forward
+.L_resume_forward_branch:
+    daddu   a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB a2            # update rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_check_osr_forward:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC
+    jal     MterpMaybeDoOnStackReplacement # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    ld      a0, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    jal     MterpAddHotnessBatch        # (method, shadow_frame, self)
+    move    rPROFILE, v0                # restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    li      a2, 2
+    EXPORT_PC
+    jal     MterpMaybeDoOnStackReplacement # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2 
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST                               # rINST contains offset
+    jal     MterpLogOSR
+#endif
+    li      v0, 1                                   # Signal normal return
+    b       MterpDone
+
+/*
+ * Bail out to reference interpreter.
+ */
+    .extern MterpLogFallback
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    jal     MterpLogFallback
+#endif
+MterpCommonFallback:
+    li      v0, 0                                   # signal retry with reference interpreter.
+    b       MterpDone
+
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and RA.  Here we restore SP, restore the registers, and then restore
+ * RA to PC.
+ *
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    li      v0, 1                                   # signal return to caller.
+    b       MterpDone
+/*
+ * Returned value is expected in a0 and if it's not 64-bit, the 32 most
+ * significant bits of a0 must be 0.
+ */
+MterpReturn:
+    ld      a2, OFF_FP_RESULT_REGISTER(rFP)
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    sd      a0, 0(a2)
+    move    a0, rSELF
+    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    beqzc   ra, check2
+    jal     MterpSuspendCheck                       # (self)
+check2:
+    li      v0, 1                                   # signal return to caller.
+MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    blez    rPROFILE, .L_pop_and_return # if > 0, we may have some counts to report.
+
+MterpProfileActive:
+    move    rINST, v0                   # stash return value
+    /* Report cached hotness counts */
+    ld      a0, OFF_FP_METHOD(rFP)
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rSELF
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    jal     MterpAddHotnessBatch        # (method, shadow_frame, self)
+    move    v0, rINST                   # restore return value
+
+.L_pop_and_return:
+    ld      s6, STACK_OFFSET_S6(sp)
+    .cfi_restore 22
+    ld      s5, STACK_OFFSET_S5(sp)
+    .cfi_restore 21
+    ld      s4, STACK_OFFSET_S4(sp)
+    .cfi_restore 20
+    ld      s3, STACK_OFFSET_S3(sp)
+    .cfi_restore 19
+    ld      s2, STACK_OFFSET_S2(sp)
+    .cfi_restore 18
+    ld      s1, STACK_OFFSET_S1(sp)
+    .cfi_restore 17
+    ld      s0, STACK_OFFSET_S0(sp)
+    .cfi_restore 16
+
+    ld      ra, STACK_OFFSET_RA(sp)
+    .cfi_restore 31
+
+    ld      t8, STACK_OFFSET_GP(sp)
+    .cpreturn
+    .cfi_restore 28
+
+    .set    noreorder
+    jr      ra
+    daddu   sp, sp, STACK_SIZE
+    .cfi_adjust_cfa_offset -STACK_SIZE
+
+    .cfi_endproc
+    .set    reorder
+    .size ExecuteMterpImpl, .-ExecuteMterpImpl
+
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
new file mode 100644
index 0000000..29ee248
--- /dev/null
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -0,0 +1,12846 @@
+/*
+ * This file was generated automatically by gen-mterp.py for 'x86'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+/* File: x86/header.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+/*
+x86 ABI general notes:
+
+Caller save set:
+   eax, edx, ecx, st(0)-st(7)
+Callee save set:
+   ebx, esi, edi, ebp
+Return regs:
+   32-bit in eax
+   64-bit in edx:eax (low-order 32 in eax)
+   fp on top of fp stack st(0)
+
+Parameters passed on stack, pushed right-to-left.  On entry to target, first
+parm is at 4(%esp).  Traditional entry code is:
+
+functEntry:
+    push    %ebp             # save old frame pointer
+    mov     %ebp,%esp        # establish new frame pointer
+    sub     FrameSize,%esp   # Allocate storage for spill, locals & outs
+
+Once past the prologue, arguments are referenced at ((argno + 2)*4)(%ebp)
+
+Stack must be 16-byte aligned to support SSE in native code.
+
+If we're not doing variable stack allocation (alloca), the frame pointer can be
+eliminated and all arg references adjusted to be esp relative.
+*/
+
+/*
+Mterp and x86 notes:
+
+Some key interpreter variables will be assigned to registers.
+
+  nick     reg   purpose
+  rPC      esi   interpreted program counter, used for fetching instructions
+  rFP      edi   interpreted frame pointer, used for accessing locals and args
+  rINSTw   bx    first 16-bit code of current instruction
+  rINSTbl  bl    opcode portion of instruction word
+  rINSTbh  bh    high byte of inst word, usually contains src/tgt reg names
+  rIBASE   edx   base of instruction handler table
+  rREFS    ebp   base of object references in shadow frame.
+
+Notes:
+   o High order 16 bits of ebx must be zero on entry to handler
+   o rPC, rFP, rINSTw/rINSTbl valid on handler entry and exit
+   o eax and ecx are scratch, rINSTw/ebx sometimes scratch
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/*
+ * Handle mac compiler specific
+ */
+#if defined(__APPLE__)
+    #define MACRO_LITERAL(value) $(value)
+    #define FUNCTION_TYPE(name)
+    #define SIZE(start,end)
+    // Mac OS' symbols have an _ prefix.
+    #define SYMBOL(name) _ ## name
+#else
+    #define MACRO_LITERAL(value) $value
+    #define FUNCTION_TYPE(name) .type name, @function
+    #define SIZE(start,end) .size start, .-end
+    #define SYMBOL(name) name
+#endif
+
+.macro PUSH _reg
+    pushl \_reg
+    .cfi_adjust_cfa_offset 4
+    .cfi_rel_offset \_reg, 0
+.endm
+
+.macro POP _reg
+    popl \_reg
+    .cfi_adjust_cfa_offset -4
+    .cfi_restore \_reg
+.endm
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_COUNTDOWN_OFFSET OFF_FP(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
+
+/* Frame size must be 16-byte aligned.
+ * Remember about 4 bytes for return address + 4 * 4 for spills
+ */
+#define FRAME_SIZE     28
+
+/* Frame diagram while executing ExecuteMterpImpl, high to low addresses */
+#define IN_ARG3        (FRAME_SIZE + 16 + 16)
+#define IN_ARG2        (FRAME_SIZE + 16 + 12)
+#define IN_ARG1        (FRAME_SIZE + 16 +  8)
+#define IN_ARG0        (FRAME_SIZE + 16 +  4)
+/* Spill offsets relative to %esp */
+#define LOCAL0         (FRAME_SIZE -  4)
+#define LOCAL1         (FRAME_SIZE -  8)
+#define LOCAL2         (FRAME_SIZE - 12)
+/* Out Arg offsets, relative to %esp */
+#define OUT_ARG3       ( 12)
+#define OUT_ARG2       (  8)
+#define OUT_ARG1       (  4)
+#define OUT_ARG0       (  0)  /* <- ExecuteMterpImpl esp + 0 */
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rSELF    IN_ARG0(%esp)
+#define rPC      %esi
+#define rFP      %edi
+#define rINST    %ebx
+#define rINSTw   %bx
+#define rINSTbh  %bh
+#define rINSTbl  %bl
+#define rIBASE   %edx
+#define rREFS    %ebp
+#define rPROFILE OFF_FP_COUNTDOWN_OFFSET(rFP)
+
+#define MTERP_LOGGING 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    movl    rPC, OFF_FP_DEX_PC_PTR(rFP)
+.endm
+
+/*
+ * Refresh handler table.
+ */
+.macro REFRESH_IBASE
+    movl    rSELF, rIBASE
+    movl    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
+ * Refresh handler table.
+ * IBase handles uses the caller save register so we must restore it after each call.
+ * Also it is used as a result of some 64-bit operations (like imul) and we should
+ * restore it in such cases also.
+ *
+ * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
+ */
+.macro RESTORE_IBASE
+    movl    rSELF, rIBASE
+    movl    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
+ * If rSELF is already loaded then we can use it from known reg.
+ */
+.macro RESTORE_IBASE_FROM_SELF _reg
+    movl    THREAD_CURRENT_IBASE_OFFSET(\_reg), rIBASE
+.endm
+
+/*
+ * Refresh rINST.
+ * At enter to handler rINST does not contain the opcode number.
+ * However some utilities require the full value, so this macro
+ * restores the opcode number.
+ */
+.macro REFRESH_INST _opnum
+    movb    rINSTbl, rINSTbh
+    movb    MACRO_LITERAL(\_opnum), rINSTbl
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINSTw.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    movzwl  (rPC), rINST
+.endm
+
+/*
+ * Remove opcode from rINST, compute the address of handler and jump to it.
+ */
+.macro GOTO_NEXT
+    movzx   rINSTbl,%eax
+    movzbl  rINSTbh,rINST
+    shll    MACRO_LITERAL(7), %eax
+    addl    rIBASE, %eax
+    jmp     *%eax
+.endm
+
+/*
+ * Advance rPC by instruction count.
+ */
+.macro ADVANCE_PC _count
+    leal    2*\_count(rPC), rPC
+.endm
+
+/*
+ * Advance rPC by instruction count, fetch instruction and jump to handler.
+ */
+.macro ADVANCE_PC_FETCH_AND_GOTO_NEXT _count
+    ADVANCE_PC \_count
+    FETCH_INST
+    GOTO_NEXT
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+#define VREG_ADDRESS(_vreg) (rFP,_vreg,4)
+#define VREG_HIGH_ADDRESS(_vreg) 4(rFP,_vreg,4)
+#define VREG_REF_ADDRESS(_vreg) (rREFS,_vreg,4)
+#define VREG_REF_HIGH_ADDRESS(_vreg) 4(rREFS,_vreg,4)
+
+.macro GET_VREG _reg _vreg
+    movl    (rFP,\_vreg,4), \_reg
+.endm
+
+/* Read wide value to xmm. */
+.macro GET_WIDE_FP_VREG _reg _vreg
+    movq    (rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    MACRO_LITERAL(0), (rREFS,\_vreg,4)
+.endm
+
+/* Write wide value from xmm. xmm is clobbered. */
+.macro SET_WIDE_FP_VREG _reg _vreg
+    movq    \_reg, (rFP,\_vreg,4)
+    pxor    \_reg, \_reg
+    movq    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro SET_VREG_OBJECT _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro GET_VREG_HIGH _reg _vreg
+    movl    4(rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG_HIGH _reg _vreg
+    movl    \_reg, 4(rFP,\_vreg,4)
+    movl    MACRO_LITERAL(0), 4(rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_REF _vreg
+    movl    MACRO_LITERAL(0),  (rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_WIDE_REF _vreg
+    movl    MACRO_LITERAL(0),  (rREFS,\_vreg,4)
+    movl    MACRO_LITERAL(0), 4(rREFS,\_vreg,4)
+.endm
+
+/* File: x86/entry.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Interpreter entry point.
+ */
+
+    .text
+    .global SYMBOL(ExecuteMterpImpl)
+    FUNCTION_TYPE(ExecuteMterpImpl)
+
+/*
+ * On entry:
+ *  0  Thread* self
+ *  1  code_item
+ *  2  ShadowFrame
+ *  3  JValue* result_register
+ *
+ */
+
+SYMBOL(ExecuteMterpImpl):
+    .cfi_startproc
+    .cfi_def_cfa esp, 4
+
+    /* Spill callee save regs */
+    PUSH    %ebp
+    PUSH    %edi
+    PUSH    %esi
+    PUSH    %ebx
+
+    /* Allocate frame */
+    subl    $FRAME_SIZE, %esp
+    .cfi_adjust_cfa_offset FRAME_SIZE
+
+    /* Load ShadowFrame pointer */
+    movl    IN_ARG2(%esp), %edx
+
+    /* Remember the return register */
+    movl    IN_ARG3(%esp), %eax
+    movl    %eax, SHADOWFRAME_RESULT_REGISTER_OFFSET(%edx)
+
+    /* Remember the code_item */
+    movl    IN_ARG1(%esp), %ecx
+    movl    %ecx, SHADOWFRAME_CODE_ITEM_OFFSET(%edx)
+
+    /* set up "named" registers */
+    movl    SHADOWFRAME_NUMBER_OF_VREGS_OFFSET(%edx), %eax
+    leal    SHADOWFRAME_VREGS_OFFSET(%edx), rFP
+    leal    (rFP, %eax, 4), rREFS
+    movl    SHADOWFRAME_DEX_PC_OFFSET(%edx), %eax
+    lea     CODEITEM_INSNS_OFFSET(%ecx), rPC
+    lea     (rPC, %eax, 2), rPC
+    EXPORT_PC
+
+    /* Set up for backwards branches & osr profiling */
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpSetUpHotnessCountdown)
+
+    /* Starting ibase */
+    REFRESH_IBASE
+
+    /* start executing the instruction at rPC */
+    FETCH_INST
+    GOTO_NEXT
+    /* NOTE: no fallthrough */
+
+
+    .global SYMBOL(artMterpAsmInstructionStart)
+    FUNCTION_TYPE(SYMBOL(artMterpAsmInstructionStart))
+SYMBOL(artMterpAsmInstructionStart) = .L_op_nop
+    .text
+
+/* ------------------------------ */
+    .balign 128
+.L_op_nop: /* 0x00 */
+/* File: x86/op_nop.S */
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move: /* 0x01 */
+/* File: x86/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $0xf, %al                      # eax <- A
+    shrl    $4, rINST                      # rINST <- B
+    GET_VREG rINST, rINST
+    .if 0
+    SET_VREG_OBJECT rINST, %eax             # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST, %eax                    # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_from16: /* 0x02 */
+/* File: x86/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    movzx   rINSTbl, %eax                   # eax <- AA
+    movw    2(rPC), rINSTw                  # rINSTw <- BBBB
+    GET_VREG rINST, rINST                   # rINST <- fp[BBBB]
+    .if 0
+    SET_VREG_OBJECT rINST, %eax             # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST, %eax                    # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_16: /* 0x03 */
+/* File: x86/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    movzwl  4(rPC), %ecx                    # ecx <- BBBB
+    movzwl  2(rPC), %eax                    # eax <- AAAA
+    GET_VREG rINST, %ecx
+    .if 0
+    SET_VREG_OBJECT rINST, %eax             # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST, %eax                    # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide: /* 0x04 */
+/* File: x86/op_move_wide.S */
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    GET_WIDE_FP_VREG %xmm0, rINST           # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0, %ecx            # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_from16: /* 0x05 */
+/* File: x86/op_move_wide_from16.S */
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzwl  2(rPC), %ecx                    # ecx <- BBBB
+    movzbl  rINSTbl, %eax                   # eax <- AAAA
+    GET_WIDE_FP_VREG %xmm0, %ecx            # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0, %eax            # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_16: /* 0x06 */
+/* File: x86/op_move_wide_16.S */
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzwl  4(rPC), %ecx                    # ecx<- BBBB
+    movzwl  2(rPC), %eax                    # eax<- AAAA
+    GET_WIDE_FP_VREG %xmm0, %ecx            # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0, %eax            # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object: /* 0x07 */
+/* File: x86/op_move_object.S */
+/* File: x86/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $0xf, %al                      # eax <- A
+    shrl    $4, rINST                      # rINST <- B
+    GET_VREG rINST, rINST
+    .if 1
+    SET_VREG_OBJECT rINST, %eax             # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST, %eax                    # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_from16: /* 0x08 */
+/* File: x86/op_move_object_from16.S */
+/* File: x86/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    movzx   rINSTbl, %eax                   # eax <- AA
+    movw    2(rPC), rINSTw                  # rINSTw <- BBBB
+    GET_VREG rINST, rINST                   # rINST <- fp[BBBB]
+    .if 1
+    SET_VREG_OBJECT rINST, %eax             # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST, %eax                    # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_16: /* 0x09 */
+/* File: x86/op_move_object_16.S */
+/* File: x86/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    movzwl  4(rPC), %ecx                    # ecx <- BBBB
+    movzwl  2(rPC), %eax                    # eax <- AAAA
+    GET_VREG rINST, %ecx
+    .if 1
+    SET_VREG_OBJECT rINST, %eax             # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST, %eax                    # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result: /* 0x0a */
+/* File: x86/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    movl    OFF_FP_RESULT_REGISTER(rFP), %eax    # get pointer to result JType.
+    movl    (%eax), %eax                    # r0 <- result.i.
+    .if 0
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <- fp[B]
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_wide: /* 0x0b */
+/* File: x86/op_move_result_wide.S */
+    /* move-result-wide vAA */
+    movl    OFF_FP_RESULT_REGISTER(rFP), %eax    # get pointer to result JType.
+    movl    4(%eax), %ecx                   # Get high
+    movl    (%eax), %eax                    # Get low
+    SET_VREG %eax, rINST                    # v[AA+0] <- eax
+    SET_VREG_HIGH %ecx, rINST               # v[AA+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_object: /* 0x0c */
+/* File: x86/op_move_result_object.S */
+/* File: x86/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    movl    OFF_FP_RESULT_REGISTER(rFP), %eax    # get pointer to result JType.
+    movl    (%eax), %eax                    # r0 <- result.i.
+    .if 1
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <- fp[B]
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_exception: /* 0x0d */
+/* File: x86/op_move_exception.S */
+    /* move-exception vAA */
+    movl    rSELF, %ecx
+    movl    THREAD_EXCEPTION_OFFSET(%ecx), %eax
+    SET_VREG_OBJECT %eax, rINST             # fp[AA] <- exception object
+    movl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void: /* 0x0e */
+/* File: x86/op_return_void.S */
+    .extern MterpThreadFenceForConstructor
+    call    SYMBOL(MterpThreadFenceForConstructor)
+    movl    rSELF, %eax
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    xorl    %eax, %eax
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return: /* 0x0f */
+/* File: x86/op_return.S */
+/*
+ * Return a 32-bit value.
+ *
+ * for: return, return-object
+ */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    call    SYMBOL(MterpThreadFenceForConstructor)
+    movl    rSELF, %eax
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    GET_VREG %eax, rINST                    # eax <- vAA
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_wide: /* 0x10 */
+/* File: x86/op_return_wide.S */
+/*
+ * Return a 64-bit value.
+ */
+    /* return-wide vAA */
+    .extern MterpThreadFenceForConstructor
+    call    SYMBOL(MterpThreadFenceForConstructor)
+    movl    rSELF, %eax
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    GET_VREG %eax, rINST                    # eax <- v[AA+0]
+    GET_VREG_HIGH %ecx, rINST               # ecx <- v[AA+1]
+    jmp     MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_object: /* 0x11 */
+/* File: x86/op_return_object.S */
+/* File: x86/op_return.S */
+/*
+ * Return a 32-bit value.
+ *
+ * for: return, return-object
+ */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    call    SYMBOL(MterpThreadFenceForConstructor)
+    movl    rSELF, %eax
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    GET_VREG %eax, rINST                    # eax <- vAA
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_4: /* 0x12 */
+/* File: x86/op_const_4.S */
+    /* const/4 vA, #+B */
+    movsx   rINSTbl, %eax                   # eax <-ssssssBx
+    movl    $0xf, rINST
+    andl    %eax, rINST                     # rINST <- A
+    sarl    $4, %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_16: /* 0x13 */
+/* File: x86/op_const_16.S */
+    /* const/16 vAA, #+BBBB */
+    movswl  2(rPC), %ecx                    # ecx <- ssssBBBB
+    SET_VREG %ecx, rINST                    # vAA <- ssssBBBB
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const: /* 0x14 */
+/* File: x86/op_const.S */
+    /* const vAA, #+BBBBbbbb */
+    movl    2(rPC), %eax                    # grab all 32 bits at once
+    SET_VREG %eax, rINST                    # vAA<- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_high16: /* 0x15 */
+/* File: x86/op_const_high16.S */
+    /* const/high16 vAA, #+BBBB0000 */
+    movzwl  2(rPC), %eax                    # eax <- 0000BBBB
+    sall    $16, %eax                      # eax <- BBBB0000
+    SET_VREG %eax, rINST                    # vAA <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_16: /* 0x16 */
+/* File: x86/op_const_wide_16.S */
+    /* const-wide/16 vAA, #+BBBB */
+    movswl  2(rPC), %eax                    # eax <- ssssBBBB
+    movl    rIBASE, %ecx                    # preserve rIBASE (cltd trashes it)
+    cltd                                    # rIBASE:eax <- ssssssssssssBBBB
+    SET_VREG_HIGH rIBASE, rINST             # store msw
+    SET_VREG %eax, rINST                    # store lsw
+    movl    %ecx, rIBASE                    # restore rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_32: /* 0x17 */
+/* File: x86/op_const_wide_32.S */
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    movl    2(rPC), %eax                    # eax <- BBBBbbbb
+    movl    rIBASE, %ecx                    # preserve rIBASE (cltd trashes it)
+    cltd                                    # rIBASE:eax <- ssssssssssssBBBB
+    SET_VREG_HIGH rIBASE, rINST             # store msw
+    SET_VREG %eax, rINST                    # store lsw
+    movl    %ecx, rIBASE                    # restore rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide: /* 0x18 */
+/* File: x86/op_const_wide.S */
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    movl    2(rPC), %eax                    # eax <- lsw
+    movzbl  rINSTbl, %ecx                   # ecx <- AA
+    movl    6(rPC), rINST                   # rINST <- msw
+    SET_VREG %eax, %ecx
+    SET_VREG_HIGH  rINST, %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 5
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_high16: /* 0x19 */
+/* File: x86/op_const_wide_high16.S */
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    movzwl  2(rPC), %eax                    # eax <- 0000BBBB
+    sall    $16, %eax                      # eax <- BBBB0000
+    SET_VREG_HIGH %eax, rINST               # v[AA+1] <- eax
+    xorl    %eax, %eax
+    SET_VREG %eax, rINST                    # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string: /* 0x1a */
+/* File: x86/op_const_string.S */
+    /* const/string vAA, String@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
+    RESTORE_IBASE
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string_jumbo: /* 0x1b */
+/* File: x86/op_const_string_jumbo.S */
+    /* const/string vAA, String@BBBBBBBB */
+    EXPORT_PC
+    movl    2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
+    RESTORE_IBASE
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_class: /* 0x1c */
+/* File: x86/op_const_class.S */
+    /* const/class vAA, Class@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    SYMBOL(MterpConstClass)         # (index, tgt_reg, shadow_frame, self)
+    RESTORE_IBASE
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_enter: /* 0x1d */
+/* File: x86/op_monitor_enter.S */
+/*
+ * Synchronize on an object.
+ */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    GET_VREG %ecx, rINST
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    SYMBOL(artLockObjectFromCode)   # (object, self)
+    RESTORE_IBASE
+    testb   %al, %al
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_exit: /* 0x1e */
+/* File: x86/op_monitor_exit.S */
+/*
+ * Unlock an object.
+ *
+ * Exceptions that occur when unlocking a monitor need to appear as
+ * if they happened at the following instruction.  See the Dalvik
+ * instruction spec.
+ */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    GET_VREG %ecx, rINST
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    SYMBOL(artUnlockObjectFromCode) # (object, self)
+    RESTORE_IBASE
+    testb   %al, %al
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_check_cast: /* 0x1f */
+/* File: x86/op_check_cast.S */
+/*
+ * Check to see if a cast from one class to another is allowed.
+ */
+    /* check-cast vAA, class@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    leal    VREG_ADDRESS(rINST), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    OFF_FP_METHOD(rFP),%eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    SYMBOL(MterpCheckCast)          # (index, &obj, method, self)
+    RESTORE_IBASE
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_instance_of: /* 0x20 */
+/* File: x86/op_instance_of.S */
+/*
+ * Check to see if an object reference is an instance of a class.
+ *
+ * Most common situation is a non-null object, being compared against
+ * an already-resolved class.
+ */
+    /* instance-of vA, vB, class@CCCC */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, %eax                     # eax <- BA
+    sarl    $4, %eax                       # eax <- B
+    leal    VREG_ADDRESS(%eax), %ecx        # Get object address
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    OFF_FP_METHOD(rFP),%eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    SYMBOL(MterpInstanceOf)         # (index, &obj, method, self)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    andb    $0xf, rINSTbl                  # rINSTbl <- A
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_array_length: /* 0x21 */
+/* File: x86/op_array_length.S */
+/*
+ * Return the length of an array.
+ */
+    mov     rINST, %eax                     # eax <- BA
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %ecx, rINST                    # ecx <- vB (object ref)
+    testl   %ecx, %ecx                      # is null?
+    je      common_errNullObject
+    andb    $0xf, %al                      # eax <- A
+    movl    MIRROR_ARRAY_LENGTH_OFFSET(%ecx), rINST
+    SET_VREG rINST, %eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_instance: /* 0x22 */
+/* File: x86/op_new_instance.S */
+/*
+ * Create a new instance of a class.
+ */
+    /* new-instance vAA, class@BBBB */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    REFRESH_INST 34
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpNewInstance)
+    RESTORE_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_array: /* 0x23 */
+/* File: x86/op_new_array.S */
+/*
+ * Allocate an array of objects, specified with the array class
+ * and a count.
+ *
+ * The verifier guarantees that this is an array class, so we don't
+ * check for it here.
+ */
+    /* new-array vA, vB, class@CCCC */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST 35
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    SYMBOL(MterpNewArray)
+    RESTORE_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array: /* 0x24 */
+/* File: x86/op_filled_new_array.S */
+/*
+ * Create a new array with elements filled from registers.
+ *
+ * for: filled-new-array, filled-new-array/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern MterpFilledNewArray
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)
+    call    SYMBOL(MterpFilledNewArray)
+    REFRESH_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array_range: /* 0x25 */
+/* File: x86/op_filled_new_array_range.S */
+/* File: x86/op_filled_new_array.S */
+/*
+ * Create a new array with elements filled from registers.
+ *
+ * for: filled-new-array, filled-new-array/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern MterpFilledNewArrayRange
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)
+    call    SYMBOL(MterpFilledNewArrayRange)
+    REFRESH_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_fill_array_data: /* 0x26 */
+/* File: x86/op_fill_array_data.S */
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
+    leal    (rPC,%ecx,2), %ecx              # ecx <- PC + BBBBbbbb*2
+    GET_VREG %eax, rINST                    # eax <- vAA (array object)
+    movl    %eax, OUT_ARG0(%esp)
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpFillArrayData)      # (obj, payload)
+    REFRESH_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_throw: /* 0x27 */
+/* File: x86/op_throw.S */
+/*
+ * Throw an exception object in the current thread.
+ */
+    /* throw vAA */
+    EXPORT_PC
+    GET_VREG %eax, rINST                    # eax<- vAA (exception object)
+    testl   %eax, %eax
+    jz      common_errNullObject
+    movl    rSELF,%ecx
+    movl    %eax, THREAD_EXCEPTION_OFFSET(%ecx)
+    jmp     MterpException
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto: /* 0x28 */
+/* File: x86/op_goto.S */
+/*
+ * Unconditional branch, 8-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+    /* goto +AA */
+    movsbl  rINSTbl, rINST                  # rINST <- ssssssAA
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_16: /* 0x29 */
+/* File: x86/op_goto_16.S */
+/*
+ * Unconditional branch, 16-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+    /* goto/16 +AAAA */
+    movswl  2(rPC), rINST                   # rINST <- ssssAAAA
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_32: /* 0x2a */
+/* File: x86/op_goto_32.S */
+/*
+ * Unconditional branch, 32-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ *
+ * Unlike most opcodes, this one is allowed to branch to itself, so
+ * our "backward branch" test must be "<=0" instead of "<0".  Because
+ * we need the V bit set, we'll use an adds to convert from Dalvik
+ * offset to byte offset.
+ */
+    /* goto/32 +AAAAAAAA */
+    movl    2(rPC), rINST                   # rINST <- AAAAAAAA
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+
+/* ------------------------------ */
+    .balign 128
+.L_op_packed_switch: /* 0x2b */
+/* File: x86/op_packed_switch.S */
+/*
+ * Handle a packed-switch or sparse-switch instruction.  In both cases
+ * we decode it and hand it off to a helper function.
+ *
+ * We don't really expect backward branches in a switch statement, but
+ * they're perfectly legal, so we check for them here.
+ *
+ * for: packed-switch, sparse-switch
+ */
+    /* op vAA, +BBBB */
+    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
+    GET_VREG %eax, rINST                    # eax <- vAA
+    leal    (rPC,%ecx,2), %ecx              # ecx <- PC + BBBBbbbb*2
+    movl    %eax, OUT_ARG1(%esp)            # ARG1 <- vAA
+    movl    %ecx, OUT_ARG0(%esp)            # ARG0 <- switchData
+    call    SYMBOL(MterpDoPackedSwitch)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    movl    %eax, rINST
+    jmp     MterpCommonTakenBranch
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sparse_switch: /* 0x2c */
+/* File: x86/op_sparse_switch.S */
+/* File: x86/op_packed_switch.S */
+/*
+ * Handle a packed-switch or sparse-switch instruction.  In both cases
+ * we decode it and hand it off to a helper function.
+ *
+ * We don't really expect backward branches in a switch statement, but
+ * they're perfectly legal, so we check for them here.
+ *
+ * for: packed-switch, sparse-switch
+ */
+    /* op vAA, +BBBB */
+    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
+    GET_VREG %eax, rINST                    # eax <- vAA
+    leal    (rPC,%ecx,2), %ecx              # ecx <- PC + BBBBbbbb*2
+    movl    %eax, OUT_ARG1(%esp)            # ARG1 <- vAA
+    movl    %ecx, OUT_ARG0(%esp)            # ARG0 <- switchData
+    call    SYMBOL(MterpDoSparseSwitch)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    movl    %eax, rINST
+    jmp     MterpCommonTakenBranch
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_float: /* 0x2d */
+/* File: x86/op_cmpl_float.S */
+/* File: x86/fpcmp.S */
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx<- CC
+    movzbl  2(rPC), %eax                    # eax<- BB
+    movss VREG_ADDRESS(%eax), %xmm0
+    xor     %eax, %eax
+    ucomiss VREG_ADDRESS(%ecx), %xmm0
+    jp      .Lop_cmpl_float_nan_is_neg
+    je      .Lop_cmpl_float_finish
+    jb      .Lop_cmpl_float_less
+.Lop_cmpl_float_nan_is_pos:
+    incl    %eax
+    jmp     .Lop_cmpl_float_finish
+.Lop_cmpl_float_nan_is_neg:
+.Lop_cmpl_float_less:
+    decl    %eax
+.Lop_cmpl_float_finish:
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_float: /* 0x2e */
+/* File: x86/op_cmpg_float.S */
+/* File: x86/fpcmp.S */
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx<- CC
+    movzbl  2(rPC), %eax                    # eax<- BB
+    movss VREG_ADDRESS(%eax), %xmm0
+    xor     %eax, %eax
+    ucomiss VREG_ADDRESS(%ecx), %xmm0
+    jp      .Lop_cmpg_float_nan_is_pos
+    je      .Lop_cmpg_float_finish
+    jb      .Lop_cmpg_float_less
+.Lop_cmpg_float_nan_is_pos:
+    incl    %eax
+    jmp     .Lop_cmpg_float_finish
+.Lop_cmpg_float_nan_is_neg:
+.Lop_cmpg_float_less:
+    decl    %eax
+.Lop_cmpg_float_finish:
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_double: /* 0x2f */
+/* File: x86/op_cmpl_double.S */
+/* File: x86/fpcmp.S */
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx<- CC
+    movzbl  2(rPC), %eax                    # eax<- BB
+    movsd VREG_ADDRESS(%eax), %xmm0
+    xor     %eax, %eax
+    ucomisd VREG_ADDRESS(%ecx), %xmm0
+    jp      .Lop_cmpl_double_nan_is_neg
+    je      .Lop_cmpl_double_finish
+    jb      .Lop_cmpl_double_less
+.Lop_cmpl_double_nan_is_pos:
+    incl    %eax
+    jmp     .Lop_cmpl_double_finish
+.Lop_cmpl_double_nan_is_neg:
+.Lop_cmpl_double_less:
+    decl    %eax
+.Lop_cmpl_double_finish:
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_double: /* 0x30 */
+/* File: x86/op_cmpg_double.S */
+/* File: x86/fpcmp.S */
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx<- CC
+    movzbl  2(rPC), %eax                    # eax<- BB
+    movsd VREG_ADDRESS(%eax), %xmm0
+    xor     %eax, %eax
+    ucomisd VREG_ADDRESS(%ecx), %xmm0
+    jp      .Lop_cmpg_double_nan_is_pos
+    je      .Lop_cmpg_double_finish
+    jb      .Lop_cmpg_double_less
+.Lop_cmpg_double_nan_is_pos:
+    incl    %eax
+    jmp     .Lop_cmpg_double_finish
+.Lop_cmpg_double_nan_is_neg:
+.Lop_cmpg_double_less:
+    decl    %eax
+.Lop_cmpg_double_finish:
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmp_long: /* 0x31 */
+/* File: x86/op_cmp_long.S */
+/*
+ * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+ * register based on the results of the comparison.
+ */
+    /* cmp-long vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG_HIGH %eax, %eax                # eax <- v[BB+1], BB is clobbered
+    cmpl    VREG_HIGH_ADDRESS(%ecx), %eax
+    jl      .Lop_cmp_long_smaller
+    jg      .Lop_cmp_long_bigger
+    movzbl  2(rPC), %eax                    # eax <- BB, restore BB
+    GET_VREG %eax, %eax                     # eax <- v[BB]
+    sub     VREG_ADDRESS(%ecx), %eax
+    ja      .Lop_cmp_long_bigger
+    jb      .Lop_cmp_long_smaller
+.Lop_cmp_long_finish:
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.Lop_cmp_long_bigger:
+    movl    $1, %eax
+    jmp     .Lop_cmp_long_finish
+
+.Lop_cmp_long_smaller:
+    movl    $-1, %eax
+    jmp     .Lop_cmp_long_finish
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eq: /* 0x32 */
+/* File: x86/op_if_eq.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax, %ecx                     # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    jne   1f
+    movswl  2(rPC), rINST                   # Get signed branch offset
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+1:
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ne: /* 0x33 */
+/* File: x86/op_if_ne.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax, %ecx                     # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    je   1f
+    movswl  2(rPC), rINST                   # Get signed branch offset
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+1:
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lt: /* 0x34 */
+/* File: x86/op_if_lt.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax, %ecx                     # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    jge   1f
+    movswl  2(rPC), rINST                   # Get signed branch offset
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+1:
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ge: /* 0x35 */
+/* File: x86/op_if_ge.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax, %ecx                     # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    jl   1f
+    movswl  2(rPC), rINST                   # Get signed branch offset
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+1:
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gt: /* 0x36 */
+/* File: x86/op_if_gt.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax, %ecx                     # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    jle   1f
+    movswl  2(rPC), rINST                   # Get signed branch offset
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+1:
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_le: /* 0x37 */
+/* File: x86/op_if_le.S */
+/* File: x86/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax, %ecx                     # eax <- vA
+    sarl    $4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    jg   1f
+    movswl  2(rPC), rINST                   # Get signed branch offset
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+1:
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eqz: /* 0x38 */
+/* File: x86/op_if_eqz.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    jne   1f
+    movswl  2(rPC), rINST                   # fetch signed displacement
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+1:
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_nez: /* 0x39 */
+/* File: x86/op_if_nez.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    je   1f
+    movswl  2(rPC), rINST                   # fetch signed displacement
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+1:
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ltz: /* 0x3a */
+/* File: x86/op_if_ltz.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    jge   1f
+    movswl  2(rPC), rINST                   # fetch signed displacement
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+1:
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gez: /* 0x3b */
+/* File: x86/op_if_gez.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    jl   1f
+    movswl  2(rPC), rINST                   # fetch signed displacement
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+1:
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gtz: /* 0x3c */
+/* File: x86/op_if_gtz.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    jle   1f
+    movswl  2(rPC), rINST                   # fetch signed displacement
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+1:
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lez: /* 0x3d */
+/* File: x86/op_if_lez.S */
+/* File: x86/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    jg   1f
+    movswl  2(rPC), rINST                   # fetch signed displacement
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+1:
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3e: /* 0x3e */
+/* File: x86/op_unused_3e.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3f: /* 0x3f */
+/* File: x86/op_unused_3f.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_40: /* 0x40 */
+/* File: x86/op_unused_40.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_41: /* 0x41 */
+/* File: x86/op_unused_41.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_42: /* 0x42 */
+/* File: x86/op_unused_42.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_43: /* 0x43 */
+/* File: x86/op_unused_43.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget: /* 0x44 */
+/* File: x86/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    movl   MIRROR_INT_ARRAY_DATA_OFFSET(%eax,%ecx,4), %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_wide: /* 0x45 */
+/* File: x86/op_aget_wide.S */
+/*
+ * Array get, 64 bits.  vAA <- vBB[vCC].
+ */
+    /* aget-wide vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_WIDE_ARRAY_DATA_OFFSET(%eax,%ecx,8), %eax
+    movq    (%eax), %xmm0                   # xmm0 <- vBB[vCC]
+    SET_WIDE_FP_VREG %xmm0, rINST           # vAA <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_object: /* 0x46 */
+/* File: x86/op_aget_object.S */
+/*
+ * Array object get.  vAA <- vBB[vCC].
+ *
+ * for: aget-object
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecs <- vCC (requested index)
+    EXPORT_PC
+    movl    %eax, OUT_ARG0(%esp)
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(artAGetObjectFromMterp)  # (array, index)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    SET_VREG_OBJECT %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_boolean: /* 0x47 */
+/* File: x86/op_aget_boolean.S */
+/* File: x86/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    movzbl   MIRROR_BOOLEAN_ARRAY_DATA_OFFSET(%eax,%ecx,1), %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_byte: /* 0x48 */
+/* File: x86/op_aget_byte.S */
+/* File: x86/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    movsbl   MIRROR_BYTE_ARRAY_DATA_OFFSET(%eax,%ecx,1), %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_char: /* 0x49 */
+/* File: x86/op_aget_char.S */
+/* File: x86/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    movzwl   MIRROR_CHAR_ARRAY_DATA_OFFSET(%eax,%ecx,2), %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_short: /* 0x4a */
+/* File: x86/op_aget_short.S */
+/* File: x86/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    movswl   MIRROR_SHORT_ARRAY_DATA_OFFSET(%eax,%ecx,2), %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput: /* 0x4b */
+/* File: x86/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_INT_ARRAY_DATA_OFFSET(%eax,%ecx,4), %eax
+    GET_VREG rINST, rINST
+    movl  rINST, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_wide: /* 0x4c */
+/* File: x86/op_aput_wide.S */
+/*
+ * Array put, 64 bits.  vBB[vCC] <- vAA.
+ *
+ */
+    /* aput-wide vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_WIDE_ARRAY_DATA_OFFSET(%eax,%ecx,8), %eax
+    GET_WIDE_FP_VREG %xmm0, rINST           # xmm0 <- vAA
+    movq    %xmm0, (%eax)                   # vBB[vCC] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_object: /* 0x4d */
+/* File: x86/op_aput_object.S */
+/*
+ * Store an object into an array.  vBB[vCC] <- vAA.
+ */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST 77
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpAputObject)         # (array, index)
+    RESTORE_IBASE
+    testb   %al, %al
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_boolean: /* 0x4e */
+/* File: x86/op_aput_boolean.S */
+/* File: x86/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_BOOLEAN_ARRAY_DATA_OFFSET(%eax,%ecx,1), %eax
+    GET_VREG rINST, rINST
+    movb  rINSTbl, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_byte: /* 0x4f */
+/* File: x86/op_aput_byte.S */
+/* File: x86/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_BYTE_ARRAY_DATA_OFFSET(%eax,%ecx,1), %eax
+    GET_VREG rINST, rINST
+    movb  rINSTbl, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_char: /* 0x50 */
+/* File: x86/op_aput_char.S */
+/* File: x86/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_CHAR_ARRAY_DATA_OFFSET(%eax,%ecx,2), %eax
+    GET_VREG rINST, rINST
+    movw  rINSTw, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_short: /* 0x51 */
+/* File: x86/op_aput_short.S */
+/* File: x86/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_SHORT_ARRAY_DATA_OFFSET(%eax,%ecx,2), %eax
+    GET_VREG rINST, rINST
+    movw  rINSTw, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget: /* 0x52 */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL(artGet32InstanceFromCode)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <-value
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide: /* 0x53 */
+/* File: x86/op_iget_wide.S */
+/*
+ * 64-bit instance field get.
+ *
+ * for: iget-wide
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL(artGet64InstanceFromCode)
+    mov     rSELF, %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    SET_VREG %eax, rINST
+    SET_VREG_HIGH %edx, rINST
+    RESTORE_IBASE_FROM_SELF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object: /* 0x54 */
+/* File: x86/op_iget_object.S */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL(artGetObjInstanceFromCode)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 1
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <-value
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean: /* 0x55 */
+/* File: x86/op_iget_boolean.S */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL(artGetBooleanInstanceFromCode)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <-value
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte: /* 0x56 */
+/* File: x86/op_iget_byte.S */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL(artGetByteInstanceFromCode)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <-value
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char: /* 0x57 */
+/* File: x86/op_iget_char.S */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL(artGetCharInstanceFromCode)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <-value
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short: /* 0x58 */
+/* File: x86/op_iget_short.S */
+/* File: x86/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL(artGetShortInstanceFromCode)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <-value
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput: /* 0x59 */
+/* File: x86/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet32InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    SYMBOL(artSet32InstanceFromMterp)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide: /* 0x5a */
+/* File: x86/op_iput_wide.S */
+    /* iput-wide vA, vB, field@CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl,%ecx                    # ecx <- BA
+    sarl    $4,%ecx                        # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf,rINSTbl                   # rINST <- A
+    leal    VREG_ADDRESS(rINST), %eax
+    movl    %eax, OUT_ARG2(%esp)            # &fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    SYMBOL(artSet64InstanceFromMterp)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object: /* 0x5b */
+/* File: x86/op_iput_object.S */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST 91
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    SYMBOL(MterpIputObject)
+    testb   %al, %al
+    jz      MterpException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean: /* 0x5c */
+/* File: x86/op_iput_boolean.S */
+/* File: x86/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    SYMBOL(artSet8InstanceFromMterp)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte: /* 0x5d */
+/* File: x86/op_iput_byte.S */
+/* File: x86/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    SYMBOL(artSet8InstanceFromMterp)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char: /* 0x5e */
+/* File: x86/op_iput_char.S */
+/* File: x86/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    SYMBOL(artSet16InstanceFromMterp)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short: /* 0x5f */
+/* File: x86/op_iput_short.S */
+/* File: x86/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    SYMBOL(artSet16InstanceFromMterp)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget: /* 0x60 */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGet32StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    SYMBOL(artGet32StaticFromCode)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <- value
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_wide: /* 0x61 */
+/* File: x86/op_sget_wide.S */
+/*
+ * SGET_WIDE handler wrapper.
+ *
+ */
+    /* sget-wide vAA, field@BBBB */
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    SYMBOL(artGet64StaticFromCode)
+    movl    rSELF, %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    SET_VREG %eax, rINST                    # fp[A]<- low part
+    SET_VREG_HIGH %edx, rINST               # fp[A+1]<- high part
+    RESTORE_IBASE_FROM_SELF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_object: /* 0x62 */
+/* File: x86/op_sget_object.S */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetObjStaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    SYMBOL(artGetObjStaticFromCode)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 1
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <- value
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_boolean: /* 0x63 */
+/* File: x86/op_sget_boolean.S */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetBooleanStaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    SYMBOL(artGetBooleanStaticFromCode)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <- value
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_byte: /* 0x64 */
+/* File: x86/op_sget_byte.S */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetByteStaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    SYMBOL(artGetByteStaticFromCode)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <- value
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_char: /* 0x65 */
+/* File: x86/op_sget_char.S */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetCharStaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    SYMBOL(artGetCharStaticFromCode)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <- value
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_short: /* 0x66 */
+/* File: x86/op_sget_short.S */
+/* File: x86/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetShortStaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    SYMBOL(artGetShortStaticFromCode)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <- value
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput: /* 0x67 */
+/* File: x86/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet32StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST, rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL(artSet32StaticFromCode)
+    testb   %al, %al
+    jnz     MterpException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_wide: /* 0x68 */
+/* File: x86/op_sput_wide.S */
+/*
+ * SPUT_WIDE handler wrapper.
+ *
+ */
+    /* sput-wide vAA, field@BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    leal    VREG_ADDRESS(rINST), %eax
+    movl    %eax, OUT_ARG2(%esp)            # &fp[AA]
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL(artSet64IndirectStaticFromMterp)
+    testb   %al, %al
+    jnz     MterpException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_object: /* 0x69 */
+/* File: x86/op_sput_object.S */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST 105
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    SYMBOL(MterpSputObject)
+    testb   %al, %al
+    jz      MterpException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_boolean: /* 0x6a */
+/* File: x86/op_sput_boolean.S */
+/* File: x86/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet8StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST, rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL(artSet8StaticFromCode)
+    testb   %al, %al
+    jnz     MterpException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_byte: /* 0x6b */
+/* File: x86/op_sput_byte.S */
+/* File: x86/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet8StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST, rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL(artSet8StaticFromCode)
+    testb   %al, %al
+    jnz     MterpException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_char: /* 0x6c */
+/* File: x86/op_sput_char.S */
+/* File: x86/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet16StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST, rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL(artSet16StaticFromCode)
+    testb   %al, %al
+    jnz     MterpException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_short: /* 0x6d */
+/* File: x86/op_sput_short.S */
+/* File: x86/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet16StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST, rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL(artSet16StaticFromCode)
+    testb   %al, %al
+    jnz     MterpException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual: /* 0x6e */
+/* File: x86/op_invoke_virtual.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtual
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 110
+    movl    rINST, OUT_ARG3(%esp)
+    call    SYMBOL(MterpInvokeVirtual)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    RESTORE_IBASE
+    FETCH_INST
+    GOTO_NEXT
+
+/*
+ * Handle a virtual method call.
+ *
+ * for: invoke-virtual, invoke-virtual/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super: /* 0x6f */
+/* File: x86/op_invoke_super.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuper
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 111
+    movl    rINST, OUT_ARG3(%esp)
+    call    SYMBOL(MterpInvokeSuper)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    RESTORE_IBASE
+    FETCH_INST
+    GOTO_NEXT
+
+/*
+ * Handle a "super" method call.
+ *
+ * for: invoke-super, invoke-super/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct: /* 0x70 */
+/* File: x86/op_invoke_direct.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirect
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 112
+    movl    rINST, OUT_ARG3(%esp)
+    call    SYMBOL(MterpInvokeDirect)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    RESTORE_IBASE
+    FETCH_INST
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static: /* 0x71 */
+/* File: x86/op_invoke_static.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStatic
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 113
+    movl    rINST, OUT_ARG3(%esp)
+    call    SYMBOL(MterpInvokeStatic)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    RESTORE_IBASE
+    FETCH_INST
+    GOTO_NEXT
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface: /* 0x72 */
+/* File: x86/op_invoke_interface.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterface
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 114
+    movl    rINST, OUT_ARG3(%esp)
+    call    SYMBOL(MterpInvokeInterface)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    RESTORE_IBASE
+    FETCH_INST
+    GOTO_NEXT
+
+/*
+ * Handle an interface method call.
+ *
+ * for: invoke-interface, invoke-interface/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void_no_barrier: /* 0x73 */
+/* File: x86/op_return_void_no_barrier.S */
+    movl    rSELF, %eax
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    xorl    %eax, %eax
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range: /* 0x74 */
+/* File: x86/op_invoke_virtual_range.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 116
+    movl    rINST, OUT_ARG3(%esp)
+    call    SYMBOL(MterpInvokeVirtualRange)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    RESTORE_IBASE
+    FETCH_INST
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super_range: /* 0x75 */
+/* File: x86/op_invoke_super_range.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuperRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 117
+    movl    rINST, OUT_ARG3(%esp)
+    call    SYMBOL(MterpInvokeSuperRange)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    RESTORE_IBASE
+    FETCH_INST
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct_range: /* 0x76 */
+/* File: x86/op_invoke_direct_range.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirectRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 118
+    movl    rINST, OUT_ARG3(%esp)
+    call    SYMBOL(MterpInvokeDirectRange)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    RESTORE_IBASE
+    FETCH_INST
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static_range: /* 0x77 */
+/* File: x86/op_invoke_static_range.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStaticRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 119
+    movl    rINST, OUT_ARG3(%esp)
+    call    SYMBOL(MterpInvokeStaticRange)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    RESTORE_IBASE
+    FETCH_INST
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface_range: /* 0x78 */
+/* File: x86/op_invoke_interface_range.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterfaceRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 120
+    movl    rINST, OUT_ARG3(%esp)
+    call    SYMBOL(MterpInvokeInterfaceRange)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    RESTORE_IBASE
+    FETCH_INST
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_79: /* 0x79 */
+/* File: x86/op_unused_79.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_7a: /* 0x7a */
+/* File: x86/op_unused_7a.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_int: /* 0x7b */
+/* File: x86/op_neg_int.S */
+/* File: x86/unop.S */
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    GET_VREG %eax, rINST                    # eax <- vB
+    andb    $0xf,%cl                       # ecx <- A
+    negl    %eax
+    SET_VREG %eax, %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_int: /* 0x7c */
+/* File: x86/op_not_int.S */
+/* File: x86/unop.S */
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    GET_VREG %eax, rINST                    # eax <- vB
+    andb    $0xf,%cl                       # ecx <- A
+    notl %eax
+    SET_VREG %eax, %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_long: /* 0x7d */
+/* File: x86/op_neg_long.S */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, %ecx                     # eax <- v[B+0]
+    GET_VREG_HIGH %ecx, %ecx                # ecx <- v[B+1]
+    negl    %eax
+    adcl    $0, %ecx
+    negl    %ecx
+    SET_VREG %eax, rINST                    # v[A+0] <- eax
+    SET_VREG_HIGH %ecx, rINST               # v[A+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_long: /* 0x7e */
+/* File: x86/op_not_long.S */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, %ecx                     # eax <- v[B+0]
+    GET_VREG_HIGH %ecx, %ecx                # ecx <- v[B+1]
+    notl    %eax
+    notl    %ecx
+    SET_VREG %eax, rINST                    # v[A+0] <- eax
+    SET_VREG_HIGH %ecx, rINST               # v[A+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_float: /* 0x7f */
+/* File: x86/op_neg_float.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    flds   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    fchs
+    fstps  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 0
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_double: /* 0x80 */
+/* File: x86/op_neg_double.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fldl   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    fchs
+    fstpl  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 1
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_long: /* 0x81 */
+/* File: x86/op_int_to_long.S */
+    /* int to long vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- +A
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %eax                     # eax <- vB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    movl    rIBASE, %ecx                    # cltd trashes rIBASE/edx
+    cltd                                    # rINST:eax<- sssssssBBBBBBBB
+    SET_VREG_HIGH rIBASE, rINST             # v[A+1] <- rIBASE
+    SET_VREG %eax, rINST                    # v[A+0] <- %eax
+    movl    %ecx, rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_float: /* 0x82 */
+/* File: x86/op_int_to_float.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fildl   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstps  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 0
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_double: /* 0x83 */
+/* File: x86/op_int_to_double.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fildl   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstpl  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 1
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_int: /* 0x84 */
+/* File: x86/op_long_to_int.S */
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+/* File: x86/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $0xf, %al                      # eax <- A
+    shrl    $4, rINST                      # rINST <- B
+    GET_VREG rINST, rINST
+    .if 0
+    SET_VREG_OBJECT rINST, %eax             # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST, %eax                    # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_float: /* 0x85 */
+/* File: x86/op_long_to_float.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fildll   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstps  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 0
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_double: /* 0x86 */
+/* File: x86/op_long_to_double.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fildll   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstpl  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 1
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_int: /* 0x87 */
+/* File: x86/op_float_to_int.S */
+/* File: x86/cvtfp_int.S */
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.  This model
+ * differs from what is delivered normally via the x86 fpu, so we have
+ * to play some games.
+ */
+    /* float/double to int/long vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    .if 0
+    fldl    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .else
+    flds    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .endif
+    ftst
+    fnstcw  LOCAL0(%esp)                    # remember original rounding mode
+    movzwl  LOCAL0(%esp), %eax
+    movb    $0xc, %ah
+    movw    %ax, LOCAL0+2(%esp)
+    fldcw   LOCAL0+2(%esp)                  # set "to zero" rounding mode
+    andb    $0xf, %cl                      # ecx <- A
+    .if 0
+    fistpll VREG_ADDRESS(%ecx)              # convert and store
+    .else
+    fistpl  VREG_ADDRESS(%ecx)              # convert and store
+    .endif
+    fldcw   LOCAL0(%esp)                    # restore previous rounding mode
+    .if 0
+    movl    $0x80000000, %eax
+    xorl    VREG_HIGH_ADDRESS(%ecx), %eax
+    orl     VREG_ADDRESS(%ecx), %eax
+    .else
+    cmpl    $0x80000000, VREG_ADDRESS(%ecx)
+    .endif
+    je      .Lop_float_to_int_special_case # fix up result
+
+.Lop_float_to_int_finish:
+    xor     %eax, %eax
+    mov     %eax, VREG_REF_ADDRESS(%ecx)
+    .if 0
+    mov     %eax, VREG_REF_HIGH_ADDRESS(%ecx)
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_float_to_int_special_case:
+    fnstsw  %ax
+    sahf
+    jp      .Lop_float_to_int_isNaN
+    adcl    $-1, VREG_ADDRESS(%ecx)
+    .if 0
+    adcl    $-1, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+   jmp      .Lop_float_to_int_finish
+.Lop_float_to_int_isNaN:
+    movl    $0, VREG_ADDRESS(%ecx)
+    .if 0
+    movl    $0, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+    jmp     .Lop_float_to_int_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_long: /* 0x88 */
+/* File: x86/op_float_to_long.S */
+/* File: x86/cvtfp_int.S */
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.  This model
+ * differs from what is delivered normally via the x86 fpu, so we have
+ * to play some games.
+ */
+    /* float/double to int/long vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    .if 0
+    fldl    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .else
+    flds    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .endif
+    ftst
+    fnstcw  LOCAL0(%esp)                    # remember original rounding mode
+    movzwl  LOCAL0(%esp), %eax
+    movb    $0xc, %ah
+    movw    %ax, LOCAL0+2(%esp)
+    fldcw   LOCAL0+2(%esp)                  # set "to zero" rounding mode
+    andb    $0xf, %cl                      # ecx <- A
+    .if 1
+    fistpll VREG_ADDRESS(%ecx)              # convert and store
+    .else
+    fistpl  VREG_ADDRESS(%ecx)              # convert and store
+    .endif
+    fldcw   LOCAL0(%esp)                    # restore previous rounding mode
+    .if 1
+    movl    $0x80000000, %eax
+    xorl    VREG_HIGH_ADDRESS(%ecx), %eax
+    orl     VREG_ADDRESS(%ecx), %eax
+    .else
+    cmpl    $0x80000000, VREG_ADDRESS(%ecx)
+    .endif
+    je      .Lop_float_to_long_special_case # fix up result
+
+.Lop_float_to_long_finish:
+    xor     %eax, %eax
+    mov     %eax, VREG_REF_ADDRESS(%ecx)
+    .if 1
+    mov     %eax, VREG_REF_HIGH_ADDRESS(%ecx)
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_float_to_long_special_case:
+    fnstsw  %ax
+    sahf
+    jp      .Lop_float_to_long_isNaN
+    adcl    $-1, VREG_ADDRESS(%ecx)
+    .if 1
+    adcl    $-1, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+   jmp      .Lop_float_to_long_finish
+.Lop_float_to_long_isNaN:
+    movl    $0, VREG_ADDRESS(%ecx)
+    .if 1
+    movl    $0, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+    jmp     .Lop_float_to_long_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_double: /* 0x89 */
+/* File: x86/op_float_to_double.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    flds   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstpl  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 1
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_int: /* 0x8a */
+/* File: x86/op_double_to_int.S */
+/* File: x86/cvtfp_int.S */
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.  This model
+ * differs from what is delivered normally via the x86 fpu, so we have
+ * to play some games.
+ */
+    /* float/double to int/long vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    .if 1
+    fldl    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .else
+    flds    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .endif
+    ftst
+    fnstcw  LOCAL0(%esp)                    # remember original rounding mode
+    movzwl  LOCAL0(%esp), %eax
+    movb    $0xc, %ah
+    movw    %ax, LOCAL0+2(%esp)
+    fldcw   LOCAL0+2(%esp)                  # set "to zero" rounding mode
+    andb    $0xf, %cl                      # ecx <- A
+    .if 0
+    fistpll VREG_ADDRESS(%ecx)              # convert and store
+    .else
+    fistpl  VREG_ADDRESS(%ecx)              # convert and store
+    .endif
+    fldcw   LOCAL0(%esp)                    # restore previous rounding mode
+    .if 0
+    movl    $0x80000000, %eax
+    xorl    VREG_HIGH_ADDRESS(%ecx), %eax
+    orl     VREG_ADDRESS(%ecx), %eax
+    .else
+    cmpl    $0x80000000, VREG_ADDRESS(%ecx)
+    .endif
+    je      .Lop_double_to_int_special_case # fix up result
+
+.Lop_double_to_int_finish:
+    xor     %eax, %eax
+    mov     %eax, VREG_REF_ADDRESS(%ecx)
+    .if 0
+    mov     %eax, VREG_REF_HIGH_ADDRESS(%ecx)
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_double_to_int_special_case:
+    fnstsw  %ax
+    sahf
+    jp      .Lop_double_to_int_isNaN
+    adcl    $-1, VREG_ADDRESS(%ecx)
+    .if 0
+    adcl    $-1, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+   jmp      .Lop_double_to_int_finish
+.Lop_double_to_int_isNaN:
+    movl    $0, VREG_ADDRESS(%ecx)
+    .if 0
+    movl    $0, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+    jmp     .Lop_double_to_int_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_long: /* 0x8b */
+/* File: x86/op_double_to_long.S */
+/* File: x86/cvtfp_int.S */
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.  This model
+ * differs from what is delivered normally via the x86 fpu, so we have
+ * to play some games.
+ */
+    /* float/double to int/long vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    .if 1
+    fldl    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .else
+    flds    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .endif
+    ftst
+    fnstcw  LOCAL0(%esp)                    # remember original rounding mode
+    movzwl  LOCAL0(%esp), %eax
+    movb    $0xc, %ah
+    movw    %ax, LOCAL0+2(%esp)
+    fldcw   LOCAL0+2(%esp)                  # set "to zero" rounding mode
+    andb    $0xf, %cl                      # ecx <- A
+    .if 1
+    fistpll VREG_ADDRESS(%ecx)              # convert and store
+    .else
+    fistpl  VREG_ADDRESS(%ecx)              # convert and store
+    .endif
+    fldcw   LOCAL0(%esp)                    # restore previous rounding mode
+    .if 1
+    movl    $0x80000000, %eax
+    xorl    VREG_HIGH_ADDRESS(%ecx), %eax
+    orl     VREG_ADDRESS(%ecx), %eax
+    .else
+    cmpl    $0x80000000, VREG_ADDRESS(%ecx)
+    .endif
+    je      .Lop_double_to_long_special_case # fix up result
+
+.Lop_double_to_long_finish:
+    xor     %eax, %eax
+    mov     %eax, VREG_REF_ADDRESS(%ecx)
+    .if 1
+    mov     %eax, VREG_REF_HIGH_ADDRESS(%ecx)
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_double_to_long_special_case:
+    fnstsw  %ax
+    sahf
+    jp      .Lop_double_to_long_isNaN
+    adcl    $-1, VREG_ADDRESS(%ecx)
+    .if 1
+    adcl    $-1, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+   jmp      .Lop_double_to_long_finish
+.Lop_double_to_long_isNaN:
+    movl    $0, VREG_ADDRESS(%ecx)
+    .if 1
+    movl    $0, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+    jmp     .Lop_double_to_long_finish
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_float: /* 0x8c */
+/* File: x86/op_double_to_float.S */
+/* File: x86/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fldl   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    
+    fstps  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if 0
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_byte: /* 0x8d */
+/* File: x86/op_int_to_byte.S */
+/* File: x86/unop.S */
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    GET_VREG %eax, rINST                    # eax <- vB
+    andb    $0xf,%cl                       # ecx <- A
+    movsbl  %al, %eax
+    SET_VREG %eax, %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_char: /* 0x8e */
+/* File: x86/op_int_to_char.S */
+/* File: x86/unop.S */
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    GET_VREG %eax, rINST                    # eax <- vB
+    andb    $0xf,%cl                       # ecx <- A
+    movzwl  %ax,%eax
+    SET_VREG %eax, %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_short: /* 0x8f */
+/* File: x86/op_int_to_short.S */
+/* File: x86/unop.S */
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    GET_VREG %eax, rINST                    # eax <- vB
+    andb    $0xf,%cl                       # ecx <- A
+    movswl %ax, %eax
+    SET_VREG %eax, %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int: /* 0x90 */
+/* File: x86/op_add_int.S */
+/* File: x86/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB
+    addl    (rFP,%ecx,4), %eax                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int: /* 0x91 */
+/* File: x86/op_sub_int.S */
+/* File: x86/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB
+    subl    (rFP,%ecx,4), %eax                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int: /* 0x92 */
+/* File: x86/op_mul_int.S */
+    /*
+     * 32-bit binary multiplication.
+     */
+    /* mul vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB
+    mov     rIBASE, LOCAL0(%esp)
+    imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int: /* 0x93 */
+/* File: x86/op_div_int.S */
+/* File: x86/bindiv.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB
+    GET_VREG %ecx, %ecx                     # ecx <- vCC
+    mov     rIBASE, LOCAL0(%esp)
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    movl    %eax, %edx
+    orl     %ecx, %edx
+    testl   $0xFFFFFF00, %edx              # If both arguments are less
+                                            #   than 8-bit and +ve
+    jz      .Lop_div_int_8                   # Do 8-bit divide
+    testl   $0xFFFF0000, %edx              # If both arguments are less
+                                            #   than 16-bit and +ve
+    jz      .Lop_div_int_16                  # Do 16-bit divide
+    cmpl    $-1, %ecx
+    jne     .Lop_div_int_32
+    cmpl    $0x80000000, %eax
+    jne     .Lop_div_int_32
+    movl    $0x80000000, %eax
+    jmp     .Lop_div_int_finish
+.Lop_div_int_32:
+    cltd
+    idivl   %ecx
+    jmp     .Lop_div_int_finish
+.Lop_div_int_8:
+    div     %cl                             # 8-bit divide otherwise.
+                                            # Remainder in %ah, quotient in %al
+    .if 0
+    movl    %eax, %edx
+    shr     $8, %edx
+    .else
+    andl    $0x000000FF, %eax
+    .endif
+    jmp     .Lop_div_int_finish
+.Lop_div_int_16:
+    xorl    %edx, %edx                      # Clear %edx before divide
+    div     %cx
+.Lop_div_int_finish:
+    SET_VREG %eax, rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int: /* 0x94 */
+/* File: x86/op_rem_int.S */
+/* File: x86/bindiv.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB
+    GET_VREG %ecx, %ecx                     # ecx <- vCC
+    mov     rIBASE, LOCAL0(%esp)
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    movl    %eax, %edx
+    orl     %ecx, %edx
+    testl   $0xFFFFFF00, %edx              # If both arguments are less
+                                            #   than 8-bit and +ve
+    jz      .Lop_rem_int_8                   # Do 8-bit divide
+    testl   $0xFFFF0000, %edx              # If both arguments are less
+                                            #   than 16-bit and +ve
+    jz      .Lop_rem_int_16                  # Do 16-bit divide
+    cmpl    $-1, %ecx
+    jne     .Lop_rem_int_32
+    cmpl    $0x80000000, %eax
+    jne     .Lop_rem_int_32
+    movl    $0, rIBASE
+    jmp     .Lop_rem_int_finish
+.Lop_rem_int_32:
+    cltd
+    idivl   %ecx
+    jmp     .Lop_rem_int_finish
+.Lop_rem_int_8:
+    div     %cl                             # 8-bit divide otherwise.
+                                            # Remainder in %ah, quotient in %al
+    .if 1
+    movl    %eax, %edx
+    shr     $8, %edx
+    .else
+    andl    $0x000000FF, %eax
+    .endif
+    jmp     .Lop_rem_int_finish
+.Lop_rem_int_16:
+    xorl    %edx, %edx                      # Clear %edx before divide
+    div     %cx
+.Lop_rem_int_finish:
+    SET_VREG rIBASE, rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int: /* 0x95 */
+/* File: x86/op_and_int.S */
+/* File: x86/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB
+    andl    (rFP,%ecx,4), %eax                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int: /* 0x96 */
+/* File: x86/op_or_int.S */
+/* File: x86/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB
+    orl     (rFP,%ecx,4), %eax                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int: /* 0x97 */
+/* File: x86/op_xor_int.S */
+/* File: x86/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB
+    xorl    (rFP,%ecx,4), %eax                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int: /* 0x98 */
+/* File: x86/op_shl_int.S */
+/* File: x86/binop1.S */
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB
+    GET_VREG %ecx, %ecx                     # eax <- vBB
+    sall    %cl, %eax                                  # ex: addl    %ecx,%eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int: /* 0x99 */
+/* File: x86/op_shr_int.S */
+/* File: x86/binop1.S */
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB
+    GET_VREG %ecx, %ecx                     # eax <- vBB
+    sarl    %cl, %eax                                  # ex: addl    %ecx,%eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int: /* 0x9a */
+/* File: x86/op_ushr_int.S */
+/* File: x86/binop1.S */
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB
+    GET_VREG %ecx, %ecx                     # eax <- vBB
+    shrl    %cl, %eax                                  # ex: addl    %ecx,%eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long: /* 0x9b */
+/* File: x86/op_add_long.S */
+/* File: x86/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)            # save rIBASE
+    GET_VREG rIBASE, %eax                   # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax, %eax                # eax <- v[BB+1]
+    addl    (rFP,%ecx,4), rIBASE                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    adcl    4(rFP,%ecx,4), %eax                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE, rINST                  # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE            # restore rIBASE
+    SET_VREG_HIGH %eax, rINST               # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long: /* 0x9c */
+/* File: x86/op_sub_long.S */
+/* File: x86/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)            # save rIBASE
+    GET_VREG rIBASE, %eax                   # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax, %eax                # eax <- v[BB+1]
+    subl    (rFP,%ecx,4), rIBASE                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    sbbl    4(rFP,%ecx,4), %eax                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE, rINST                  # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE            # restore rIBASE
+    SET_VREG_HIGH %eax, rINST               # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long: /* 0x9d */
+/* File: x86/op_mul_long.S */
+/*
+ * Signed 64-bit integer multiply.
+ *
+ * We could definately use more free registers for
+ * this code.   We spill rINSTw (ebx),
+ * giving us eax, ebc, ecx and edx as computational
+ * temps.  On top of that, we'll spill edi (rFP)
+ * for use as the vB pointer and esi (rPC) for use
+ * as the vC pointer.  Yuck.
+ *
+ */
+    /* mul-long vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- B
+    movzbl  3(rPC), %ecx                    # ecx <- C
+    mov     rPC, LOCAL0(%esp)               # save Interpreter PC
+    mov     rFP, LOCAL1(%esp)               # save FP
+    mov     rIBASE, LOCAL2(%esp)            # save rIBASE
+    leal    (rFP,%eax,4), %esi              # esi <- &v[B]
+    leal    (rFP,%ecx,4), rFP               # rFP <- &v[C]
+    movl    4(%esi), %ecx                   # ecx <- Bmsw
+    imull   (rFP), %ecx                     # ecx <- (Bmsw*Clsw)
+    movl    4(rFP), %eax                    # eax <- Cmsw
+    imull   (%esi), %eax                    # eax <- (Cmsw*Blsw)
+    addl    %eax, %ecx                      # ecx <- (Bmsw*Clsw)+(Cmsw*Blsw)
+    movl    (rFP), %eax                     # eax <- Clsw
+    mull    (%esi)                          # eax <- (Clsw*Alsw)
+    mov     LOCAL0(%esp), rPC               # restore Interpreter PC
+    mov     LOCAL1(%esp), rFP               # restore FP
+    leal    (%ecx,rIBASE), rIBASE           # full result now in rIBASE:%eax
+    SET_VREG_HIGH rIBASE, rINST             # v[B+1] <- rIBASE
+    mov     LOCAL2(%esp), rIBASE            # restore IBASE
+    SET_VREG %eax, rINST                    # v[B] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long: /* 0x9e */
+/* File: x86/op_div_long.S */
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div vAA, vBB, vCC */
+    .extern art_quick_ldiv
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movzbl  3(rPC), %eax                    # eax <- CC
+    GET_VREG %ecx, %eax
+    GET_VREG_HIGH %ebx, %eax
+    movl    %ecx, %edx
+    orl     %ebx, %ecx
+    jz      common_errDivideByZero
+    movzbl  2(rPC), %eax                    # eax <- BB
+    GET_VREG_HIGH %ecx, %eax
+    GET_VREG %eax, %eax
+    call    SYMBOL(art_quick_ldiv)
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE, rINST
+    SET_VREG %eax, rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long: /* 0x9f */
+/* File: x86/op_rem_long.S */
+/* File: x86/op_div_long.S */
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div vAA, vBB, vCC */
+    .extern art_quick_lmod
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movzbl  3(rPC), %eax                    # eax <- CC
+    GET_VREG %ecx, %eax
+    GET_VREG_HIGH %ebx, %eax
+    movl    %ecx, %edx
+    orl     %ebx, %ecx
+    jz      common_errDivideByZero
+    movzbl  2(rPC), %eax                    # eax <- BB
+    GET_VREG_HIGH %ecx, %eax
+    GET_VREG %eax, %eax
+    call    SYMBOL(art_quick_lmod)
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE, rINST
+    SET_VREG %eax, rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long: /* 0xa0 */
+/* File: x86/op_and_long.S */
+/* File: x86/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)            # save rIBASE
+    GET_VREG rIBASE, %eax                   # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax, %eax                # eax <- v[BB+1]
+    andl    (rFP,%ecx,4), rIBASE                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    andl    4(rFP,%ecx,4), %eax                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE, rINST                  # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE            # restore rIBASE
+    SET_VREG_HIGH %eax, rINST               # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long: /* 0xa1 */
+/* File: x86/op_or_long.S */
+/* File: x86/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)            # save rIBASE
+    GET_VREG rIBASE, %eax                   # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax, %eax                # eax <- v[BB+1]
+    orl     (rFP,%ecx,4), rIBASE                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    orl     4(rFP,%ecx,4), %eax                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE, rINST                  # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE            # restore rIBASE
+    SET_VREG_HIGH %eax, rINST               # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long: /* 0xa2 */
+/* File: x86/op_xor_long.S */
+/* File: x86/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)            # save rIBASE
+    GET_VREG rIBASE, %eax                   # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax, %eax                # eax <- v[BB+1]
+    xorl    (rFP,%ecx,4), rIBASE                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    xorl    4(rFP,%ecx,4), %eax                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE, rINST                  # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE            # restore rIBASE
+    SET_VREG_HIGH %eax, rINST               # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long: /* 0xa3 */
+/* File: x86/op_shl_long.S */
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shl-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rINST */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE, %eax              # ecx <- v[BB+1]
+    GET_VREG %ecx, %ecx                     # ecx <- vCC
+    GET_VREG %eax, %eax                     # eax <- v[BB+0]
+    shldl   %eax,rIBASE
+    sall    %cl, %eax
+    testb   $32, %cl
+    je      2f
+    movl    %eax, rIBASE
+    xorl    %eax, %eax
+2:
+    SET_VREG_HIGH rIBASE, rINST             # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax, rINST                    # v[AA+0] <- %eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long: /* 0xa4 */
+/* File: x86/op_shr_long.S */
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shr-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE, %eax              # rIBASE<- v[BB+1]
+    GET_VREG %ecx, %ecx                     # ecx <- vCC
+    GET_VREG %eax, %eax                     # eax <- v[BB+0]
+    shrdl   rIBASE, %eax
+    sarl    %cl, rIBASE
+    testb   $32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    sarl    $31, rIBASE
+2:
+    SET_VREG_HIGH rIBASE, rINST             # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax, rINST                    # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long: /* 0xa5 */
+/* File: x86/op_ushr_long.S */
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shr-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE, %eax              # rIBASE <- v[BB+1]
+    GET_VREG %ecx, %ecx                     # ecx <- vCC
+    GET_VREG %eax, %eax                     # eax <- v[BB+0]
+    shrdl   rIBASE, %eax
+    shrl    %cl, rIBASE
+    testb   $32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    xorl    rIBASE, rIBASE
+2:
+    SET_VREG_HIGH rIBASE, rINST             # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax, rINST                    # v[BB+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float: /* 0xa6 */
+/* File: x86/op_add_float.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movss   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    addss VREG_ADDRESS(%eax), %xmm0
+    movss   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movss   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float: /* 0xa7 */
+/* File: x86/op_sub_float.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movss   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    subss VREG_ADDRESS(%eax), %xmm0
+    movss   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movss   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float: /* 0xa8 */
+/* File: x86/op_mul_float.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movss   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    mulss VREG_ADDRESS(%eax), %xmm0
+    movss   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movss   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float: /* 0xa9 */
+/* File: x86/op_div_float.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movss   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    divss VREG_ADDRESS(%eax), %xmm0
+    movss   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movss   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float: /* 0xaa */
+/* File: x86/op_rem_float.S */
+    /* rem_float vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx <- BB
+    movzbl  2(rPC), %eax                    # eax <- CC
+    flds    VREG_ADDRESS(%ecx)              # vBB to fp stack
+    flds    VREG_ADDRESS(%eax)              # vCC to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstps   VREG_ADDRESS(rINST)             # %st to vAA
+    CLEAR_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double: /* 0xab */
+/* File: x86/op_add_double.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movsd   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    addsd VREG_ADDRESS(%eax), %xmm0
+    movsd   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double: /* 0xac */
+/* File: x86/op_sub_double.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movsd   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    subsd VREG_ADDRESS(%eax), %xmm0
+    movsd   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double: /* 0xad */
+/* File: x86/op_mul_double.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movsd   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    mulsd VREG_ADDRESS(%eax), %xmm0
+    movsd   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double: /* 0xae */
+/* File: x86/op_div_double.S */
+/* File: x86/sseBinop.S */
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movsd   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    divsd VREG_ADDRESS(%eax), %xmm0
+    movsd   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double: /* 0xaf */
+/* File: x86/op_rem_double.S */
+    /* rem_double vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx <- BB
+    movzbl  2(rPC), %eax                    # eax <- CC
+    fldl    VREG_ADDRESS(%ecx)              # %st1 <- fp[vBB]
+    fldl    VREG_ADDRESS(%eax)              # %st0 <- fp[vCC]
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstpl   VREG_ADDRESS(rINST)             # fp[vAA] <- %st
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_2addr: /* 0xb0 */
+/* File: x86/op_add_int_2addr.S */
+/* File: x86/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax, rINST                    # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    addl    %eax, (rFP,%ecx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int_2addr: /* 0xb1 */
+/* File: x86/op_sub_int_2addr.S */
+/* File: x86/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax, rINST                    # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    subl    %eax, (rFP,%ecx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_2addr: /* 0xb2 */
+/* File: x86/op_mul_int_2addr.S */
+    /* mul vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax, rINST                    # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    movl    rIBASE, rINST
+    imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
+    movl    rINST, rIBASE
+    SET_VREG %eax, %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_2addr: /* 0xb3 */
+/* File: x86/op_div_int_2addr.S */
+/* File: x86/bindiv2addr.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    mov     rIBASE, LOCAL0(%esp)
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # eax <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, rINST                    # eax <- vBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $-1, %ecx
+    jne     .Lop_div_int_2addr_continue_div2addr
+    cmpl    $0x80000000, %eax
+    jne     .Lop_div_int_2addr_continue_div2addr
+    movl    $0x80000000, %eax
+    SET_VREG %eax, rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_div_int_2addr_continue_div2addr:
+    cltd
+    idivl   %ecx
+    SET_VREG %eax, rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_2addr: /* 0xb4 */
+/* File: x86/op_rem_int_2addr.S */
+/* File: x86/bindiv2addr.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    mov     rIBASE, LOCAL0(%esp)
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # eax <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, rINST                    # eax <- vBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $-1, %ecx
+    jne     .Lop_rem_int_2addr_continue_div2addr
+    cmpl    $0x80000000, %eax
+    jne     .Lop_rem_int_2addr_continue_div2addr
+    movl    $0, rIBASE
+    SET_VREG rIBASE, rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.Lop_rem_int_2addr_continue_div2addr:
+    cltd
+    idivl   %ecx
+    SET_VREG rIBASE, rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_2addr: /* 0xb5 */
+/* File: x86/op_and_int_2addr.S */
+/* File: x86/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax, rINST                    # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    andl    %eax, (rFP,%ecx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_2addr: /* 0xb6 */
+/* File: x86/op_or_int_2addr.S */
+/* File: x86/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax, rINST                    # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    orl     %eax, (rFP,%ecx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_2addr: /* 0xb7 */
+/* File: x86/op_xor_int_2addr.S */
+/* File: x86/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %eax, rINST                    # eax <- vB
+    andb    $0xf, %cl                      # ecx <- A
+    xorl    %eax, (rFP,%ecx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_2addr: /* 0xb8 */
+/* File: x86/op_shl_int_2addr.S */
+/* File: x86/shop2addr.S */
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # eax <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, rINST                    # eax <- vAA
+    sall    %cl, %eax                                  # ex: sarl %cl, %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_2addr: /* 0xb9 */
+/* File: x86/op_shr_int_2addr.S */
+/* File: x86/shop2addr.S */
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # eax <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, rINST                    # eax <- vAA
+    sarl    %cl, %eax                                  # ex: sarl %cl, %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_2addr: /* 0xba */
+/* File: x86/op_ushr_int_2addr.S */
+/* File: x86/shop2addr.S */
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # eax <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, rINST                    # eax <- vAA
+    shrl    %cl, %eax                                  # ex: sarl %cl, %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long_2addr: /* 0xbb */
+/* File: x86/op_add_long_2addr.S */
+/* File: x86/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %eax, %ecx                     # eax<- v[B+0]
+    GET_VREG_HIGH %ecx, %ecx                # eax<- v[B+1]
+    andb    $0xF, rINSTbl                  # rINST<- A
+    addl    %eax, (rFP,rINST,4)                                 # ex: addl   %eax,(rFP,rINST,4)
+    adcl    %ecx, 4(rFP,rINST,4)                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long_2addr: /* 0xbc */
+/* File: x86/op_sub_long_2addr.S */
+/* File: x86/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %eax, %ecx                     # eax<- v[B+0]
+    GET_VREG_HIGH %ecx, %ecx                # eax<- v[B+1]
+    andb    $0xF, rINSTbl                  # rINST<- A
+    subl    %eax, (rFP,rINST,4)                                 # ex: addl   %eax,(rFP,rINST,4)
+    sbbl    %ecx, 4(rFP,rINST,4)                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long_2addr: /* 0xbd */
+/* File: x86/op_mul_long_2addr.S */
+/*
+ * Signed 64-bit integer multiply, 2-addr version
+ *
+ * We could definately use more free registers for
+ * this code.  We must spill %edx (rIBASE) because it
+ * is used by imul.  We'll also spill rINST (ebx),
+ * giving us eax, ebc, ecx and rIBASE as computational
+ * temps.  On top of that, we'll spill %esi (edi)
+ * for use as the vA pointer and rFP (esi) for use
+ * as the vB pointer.  Yuck.
+ */
+    /* mul-long/2addr vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $0xf, %al                      # eax <- A
+    CLEAR_WIDE_REF %eax                     # clear refs in advance
+    sarl    $4, rINST                      # rINST <- B
+    mov     rPC, LOCAL0(%esp)               # save Interpreter PC
+    mov     rFP, LOCAL1(%esp)               # save FP
+    mov     rIBASE, LOCAL2(%esp)            # save rIBASE
+    leal    (rFP,%eax,4), %esi              # esi <- &v[A]
+    leal    (rFP,rINST,4), rFP              # rFP <- &v[B]
+    movl    4(%esi), %ecx                   # ecx <- Amsw
+    imull   (rFP), %ecx                     # ecx <- (Amsw*Blsw)
+    movl    4(rFP), %eax                    # eax <- Bmsw
+    imull   (%esi), %eax                    # eax <- (Bmsw*Alsw)
+    addl    %eax, %ecx                      # ecx <- (Amsw*Blsw)+(Bmsw*Alsw)
+    movl    (rFP), %eax                     # eax <- Blsw
+    mull    (%esi)                          # eax <- (Blsw*Alsw)
+    leal    (%ecx,rIBASE), rIBASE           # full result now in %edx:%eax
+    movl    rIBASE, 4(%esi)                 # v[A+1] <- rIBASE
+    movl    %eax, (%esi)                    # v[A] <- %eax
+    mov     LOCAL0(%esp), rPC               # restore Interpreter PC
+    mov     LOCAL2(%esp), rIBASE            # restore IBASE
+    mov     LOCAL1(%esp), rFP               # restore FP
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long_2addr: /* 0xbe */
+/* File: x86/op_div_long_2addr.S */
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div/2addr vA, vB */
+    .extern   art_quick_ldiv
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    movzbl  rINSTbl, %eax
+    shrl    $4, %eax                       # eax <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movl    %ebx, %ecx
+    GET_VREG %edx, %eax
+    GET_VREG_HIGH %ebx, %eax
+    movl    %edx, %eax
+    orl     %ebx, %eax
+    jz      common_errDivideByZero
+    GET_VREG %eax, %ecx
+    GET_VREG_HIGH %ecx, %ecx
+    call    SYMBOL(art_quick_ldiv)
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE, rINST
+    SET_VREG %eax, rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long_2addr: /* 0xbf */
+/* File: x86/op_rem_long_2addr.S */
+/* File: x86/op_div_long_2addr.S */
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div/2addr vA, vB */
+    .extern   art_quick_lmod
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    movzbl  rINSTbl, %eax
+    shrl    $4, %eax                       # eax <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movl    %ebx, %ecx
+    GET_VREG %edx, %eax
+    GET_VREG_HIGH %ebx, %eax
+    movl    %edx, %eax
+    orl     %ebx, %eax
+    jz      common_errDivideByZero
+    GET_VREG %eax, %ecx
+    GET_VREG_HIGH %ecx, %ecx
+    call    SYMBOL(art_quick_lmod)
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE, rINST
+    SET_VREG %eax, rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long_2addr: /* 0xc0 */
+/* File: x86/op_and_long_2addr.S */
+/* File: x86/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %eax, %ecx                     # eax<- v[B+0]
+    GET_VREG_HIGH %ecx, %ecx                # eax<- v[B+1]
+    andb    $0xF, rINSTbl                  # rINST<- A
+    andl    %eax, (rFP,rINST,4)                                 # ex: addl   %eax,(rFP,rINST,4)
+    andl    %ecx, 4(rFP,rINST,4)                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long_2addr: /* 0xc1 */
+/* File: x86/op_or_long_2addr.S */
+/* File: x86/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %eax, %ecx                     # eax<- v[B+0]
+    GET_VREG_HIGH %ecx, %ecx                # eax<- v[B+1]
+    andb    $0xF, rINSTbl                  # rINST<- A
+    orl     %eax, (rFP,rINST,4)                                 # ex: addl   %eax,(rFP,rINST,4)
+    orl     %ecx, 4(rFP,rINST,4)                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long_2addr: /* 0xc2 */
+/* File: x86/op_xor_long_2addr.S */
+/* File: x86/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG %eax, %ecx                     # eax<- v[B+0]
+    GET_VREG_HIGH %ecx, %ecx                # eax<- v[B+1]
+    andb    $0xF, rINSTbl                  # rINST<- A
+    xorl    %eax, (rFP,rINST,4)                                 # ex: addl   %eax,(rFP,rINST,4)
+    xorl    %ecx, 4(rFP,rINST,4)                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long_2addr: /* 0xc3 */
+/* File: x86/op_shl_long_2addr.S */
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, rINST                    # eax <- v[AA+0]
+    sarl    $4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE, rINST             # rIBASE <- v[AA+1]
+    GET_VREG %ecx, %ecx                     # ecx <- vBB
+    shldl   %eax, rIBASE
+    sall    %cl, %eax
+    testb   $32, %cl
+    je      2f
+    movl    %eax, rIBASE
+    xorl    %eax, %eax
+2:
+    SET_VREG_HIGH rIBASE, rINST             # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax, rINST                    # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long_2addr: /* 0xc4 */
+/* File: x86/op_shr_long_2addr.S */
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, rINST                    # eax <- v[AA+0]
+    sarl    $4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE, rINST             # rIBASE <- v[AA+1]
+    GET_VREG %ecx, %ecx                     # ecx <- vBB
+    shrdl   rIBASE, %eax
+    sarl    %cl, rIBASE
+    testb   $32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    sarl    $31, rIBASE
+2:
+    SET_VREG_HIGH rIBASE, rINST             # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax, rINST                    # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long_2addr: /* 0xc5 */
+/* File: x86/op_ushr_long_2addr.S */
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, rINST                    # eax <- v[AA+0]
+    sarl    $4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE, rINST             # rIBASE <- v[AA+1]
+    GET_VREG %ecx, %ecx                     # ecx <- vBB
+    shrdl   rIBASE, %eax
+    shrl    %cl, rIBASE
+    testb   $32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    xorl    rIBASE, rIBASE
+2:
+    SET_VREG_HIGH rIBASE, rINST             # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax, rINST                    # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float_2addr: /* 0xc6 */
+/* File: x86/op_add_float_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movss VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    addss VREG_ADDRESS(rINST), %xmm0
+    movss %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movss %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float_2addr: /* 0xc7 */
+/* File: x86/op_sub_float_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movss VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    subss VREG_ADDRESS(rINST), %xmm0
+    movss %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movss %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float_2addr: /* 0xc8 */
+/* File: x86/op_mul_float_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movss VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    mulss VREG_ADDRESS(rINST), %xmm0
+    movss %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movss %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float_2addr: /* 0xc9 */
+/* File: x86/op_div_float_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movss VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    divss VREG_ADDRESS(rINST), %xmm0
+    movss %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movss %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float_2addr: /* 0xca */
+/* File: x86/op_rem_float_2addr.S */
+    /* rem_float/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    flds    VREG_ADDRESS(rINST)             # vB to fp stack
+    andb    $0xf, %cl                      # ecx <- A
+    flds    VREG_ADDRESS(%ecx)              # vA to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstps   VREG_ADDRESS(%ecx)              # %st to vA
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double_2addr: /* 0xcb */
+/* File: x86/op_add_double_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movsd VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    addsd VREG_ADDRESS(rINST), %xmm0
+    movsd %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double_2addr: /* 0xcc */
+/* File: x86/op_sub_double_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movsd VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    subsd VREG_ADDRESS(rINST), %xmm0
+    movsd %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double_2addr: /* 0xcd */
+/* File: x86/op_mul_double_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movsd VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    mulsd VREG_ADDRESS(rINST), %xmm0
+    movsd %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double_2addr: /* 0xce */
+/* File: x86/op_div_double_2addr.S */
+/* File: x86/sseBinop2Addr.S */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movsd VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    divsd VREG_ADDRESS(rINST), %xmm0
+    movsd %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double_2addr: /* 0xcf */
+/* File: x86/op_rem_double_2addr.S */
+    /* rem_double/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fldl    VREG_ADDRESS(rINST)             # vB to fp stack
+    andb    $0xf, %cl                      # ecx <- A
+    fldl    VREG_ADDRESS(%ecx)              # vA to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstpl   VREG_ADDRESS(%ecx)              # %st to vA
+    CLEAR_WIDE_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit16: /* 0xd0 */
+/* File: x86/op_add_int_lit16.S */
+/* File: x86/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %eax                     # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    addl    %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int: /* 0xd1 */
+/* File: x86/op_rsub_int.S */
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+/* File: x86/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %eax                     # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    subl    %eax, %ecx                                  # for example: addl %ecx, %eax
+    SET_VREG %ecx, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit16: /* 0xd2 */
+/* File: x86/op_mul_int_lit16.S */
+    /* mul/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %eax                     # eax <- vB
+    movl    rIBASE, %ecx
+    movswl  2(rPC), rIBASE                  # rIBASE <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    imull   rIBASE, %eax                    # trashes rIBASE/edx
+    movl    %ecx, rIBASE
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit16: /* 0xd3 */
+/* File: x86/op_div_int_lit16.S */
+/* File: x86/bindivLit16.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %eax                     # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $-1, %ecx
+    jne     .Lop_div_int_lit16_continue_div
+    cmpl    $0x80000000, %eax
+    jne     .Lop_div_int_lit16_continue_div
+    movl    $0x80000000, %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.Lop_div_int_lit16_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG %eax, rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit16: /* 0xd4 */
+/* File: x86/op_rem_int_lit16.S */
+/* File: x86/bindivLit16.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %eax                     # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $-1, %ecx
+    jne     .Lop_rem_int_lit16_continue_div
+    cmpl    $0x80000000, %eax
+    jne     .Lop_rem_int_lit16_continue_div
+    movl    $0, %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.Lop_rem_int_lit16_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG rIBASE, rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit16: /* 0xd5 */
+/* File: x86/op_and_int_lit16.S */
+/* File: x86/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %eax                     # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    andl    %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit16: /* 0xd6 */
+/* File: x86/op_or_int_lit16.S */
+/* File: x86/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %eax                     # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    orl     %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit16: /* 0xd7 */
+/* File: x86/op_xor_int_lit16.S */
+/* File: x86/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %eax                     # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    xorl    %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit8: /* 0xd8 */
+/* File: x86/op_add_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax, %eax                     # eax <- rBB
+    addl    %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int_lit8: /* 0xd9 */
+/* File: x86/op_rsub_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax, %eax                     # eax <- rBB
+    subl    %eax, %ecx                                  # ex: addl %ecx,%eax
+    SET_VREG %ecx, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit8: /* 0xda */
+/* File: x86/op_mul_int_lit8.S */
+    /* mul/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movl    rIBASE, %ecx
+    GET_VREG  %eax, %eax                    # eax <- rBB
+    movsbl  3(rPC), rIBASE                  # rIBASE <- ssssssCC
+    imull   rIBASE, %eax                    # trashes rIBASE/edx
+    movl    %ecx, rIBASE
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit8: /* 0xdb */
+/* File: x86/op_div_int_lit8.S */
+/* File: x86/bindivLit8.S */
+/*
+ * 32-bit div/rem "lit8" binary operation.  Handles special case of
+ * op0=minint & op1=-1
+ */
+    /* div/rem/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG  %eax, %eax                    # eax <- rBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $0x80000000, %eax
+    jne     .Lop_div_int_lit8_continue_div
+    cmpl    $-1, %ecx
+    jne     .Lop_div_int_lit8_continue_div
+    movl    $0x80000000, %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.Lop_div_int_lit8_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG %eax, rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit8: /* 0xdc */
+/* File: x86/op_rem_int_lit8.S */
+/* File: x86/bindivLit8.S */
+/*
+ * 32-bit div/rem "lit8" binary operation.  Handles special case of
+ * op0=minint & op1=-1
+ */
+    /* div/rem/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG  %eax, %eax                    # eax <- rBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $0x80000000, %eax
+    jne     .Lop_rem_int_lit8_continue_div
+    cmpl    $-1, %ecx
+    jne     .Lop_rem_int_lit8_continue_div
+    movl    $0, %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.Lop_rem_int_lit8_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG rIBASE, rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit8: /* 0xdd */
+/* File: x86/op_and_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax, %eax                     # eax <- rBB
+    andl    %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit8: /* 0xde */
+/* File: x86/op_or_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax, %eax                     # eax <- rBB
+    orl     %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit8: /* 0xdf */
+/* File: x86/op_xor_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax, %eax                     # eax <- rBB
+    xorl    %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_lit8: /* 0xe0 */
+/* File: x86/op_shl_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax, %eax                     # eax <- rBB
+    sall    %cl, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_lit8: /* 0xe1 */
+/* File: x86/op_shr_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax, %eax                     # eax <- rBB
+    sarl    %cl, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_lit8: /* 0xe2 */
+/* File: x86/op_ushr_int_lit8.S */
+/* File: x86/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax, %eax                     # eax <- rBB
+    shrl    %cl, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_quick: /* 0xe3 */
+/* File: x86/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movl (%ecx,%eax,1), %eax
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax, rINST                    # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide_quick: /* 0xe4 */
+/* File: x86/op_iget_wide_quick.S */
+    /* iget-wide-quick vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movq    (%ecx,%eax,1), %xmm0
+    andb    $0xf, rINSTbl                  # rINST <- A
+    SET_WIDE_FP_VREG %xmm0, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object_quick: /* 0xe5 */
+/* File: x86/op_iget_object_quick.S */
+    /* For: iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    %eax, OUT_ARG1(%esp)
+    EXPORT_PC
+    call    SYMBOL(artIGetObjectFromMterp)  # (obj, offset)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_quick: /* 0xe6 */
+/* File: x86/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST, rINST                   # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movl    rINST, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide_quick: /* 0xe7 */
+/* File: x86/op_iput_wide_quick.S */
+    /* iput-wide-quick vA, vB, offset@CCCC */
+    movzbl    rINSTbl, %ecx                 # ecx<- BA
+    sarl      $4, %ecx                     # ecx<- B
+    GET_VREG  %ecx, %ecx                    # vB (object we're operating on)
+    testl     %ecx, %ecx                    # is object null?
+    je        common_errNullObject
+    movzwl    2(rPC), %eax                  # eax<- field byte offset
+    leal      (%ecx,%eax,1), %ecx           # ecx<- Address of 64-bit target
+    andb      $0xf, rINSTbl                # rINST<- A
+    GET_WIDE_FP_VREG %xmm0, rINST           # xmm0<- fp[A]/fp[A+1]
+    movq      %xmm0, (%ecx)                 # obj.field<- r0/r1
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object_quick: /* 0xe8 */
+/* File: x86/op_iput_object_quick.S */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST 232
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpIputObjectQuick)
+    testb   %al, %al
+    jz      MterpException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_quick: /* 0xe9 */
+/* File: x86/op_invoke_virtual_quick.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuick
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 233
+    movl    rINST, OUT_ARG3(%esp)
+    call    SYMBOL(MterpInvokeVirtualQuick)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    RESTORE_IBASE
+    FETCH_INST
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range_quick: /* 0xea */
+/* File: x86/op_invoke_virtual_range_quick.S */
+/* File: x86/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuickRange
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST 234
+    movl    rINST, OUT_ARG3(%esp)
+    call    SYMBOL(MterpInvokeVirtualQuickRange)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    RESTORE_IBASE
+    FETCH_INST
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean_quick: /* 0xeb */
+/* File: x86/op_iput_boolean_quick.S */
+/* File: x86/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST, rINST                   # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movb    rINSTbl, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte_quick: /* 0xec */
+/* File: x86/op_iput_byte_quick.S */
+/* File: x86/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST, rINST                   # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movb    rINSTbl, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char_quick: /* 0xed */
+/* File: x86/op_iput_char_quick.S */
+/* File: x86/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST, rINST                   # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movw    rINSTw, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short_quick: /* 0xee */
+/* File: x86/op_iput_short_quick.S */
+/* File: x86/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST, rINST                   # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movw    rINSTw, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean_quick: /* 0xef */
+/* File: x86/op_iget_boolean_quick.S */
+/* File: x86/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movsbl (%ecx,%eax,1), %eax
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax, rINST                    # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte_quick: /* 0xf0 */
+/* File: x86/op_iget_byte_quick.S */
+/* File: x86/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movsbl (%ecx,%eax,1), %eax
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax, rINST                    # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char_quick: /* 0xf1 */
+/* File: x86/op_iget_char_quick.S */
+/* File: x86/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movzwl (%ecx,%eax,1), %eax
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax, rINST                    # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short_quick: /* 0xf2 */
+/* File: x86/op_iget_short_quick.S */
+/* File: x86/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movswl (%ecx,%eax,1), %eax
+    andb    $0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax, rINST                    # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f3: /* 0xf3 */
+/* File: x86/op_unused_f3.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f4: /* 0xf4 */
+/* File: x86/op_unused_f4.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f5: /* 0xf5 */
+/* File: x86/op_unused_f5.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f6: /* 0xf6 */
+/* File: x86/op_unused_f6.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f7: /* 0xf7 */
+/* File: x86/op_unused_f7.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f8: /* 0xf8 */
+/* File: x86/op_unused_f8.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f9: /* 0xf9 */
+/* File: x86/op_unused_f9.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fa: /* 0xfa */
+/* File: x86/op_unused_fa.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fb: /* 0xfb */
+/* File: x86/op_unused_fb.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fc: /* 0xfc */
+/* File: x86/op_unused_fc.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fd: /* 0xfd */
+/* File: x86/op_unused_fd.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fe: /* 0xfe */
+/* File: x86/op_unused_fe.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_ff: /* 0xff */
+/* File: x86/op_unused_ff.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+    .balign 128
+    SIZE(SYMBOL(artMterpAsmInstructionStart),SYMBOL(artMterpAsmInstructionStart))
+    .global SYMBOL(artMterpAsmInstructionEnd)
+SYMBOL(artMterpAsmInstructionEnd):
+
+/*
+ * ===========================================================================
+ *  Sister implementations
+ * ===========================================================================
+ */
+    .global SYMBOL(artMterpAsmSisterStart)
+    FUNCTION_TYPE(SYMBOL(artMterpAsmSisterStart))
+    .text
+    .balign 4
+SYMBOL(artMterpAsmSisterStart):
+
+    SIZE(SYMBOL(artMterpAsmSisterStart),SYMBOL(artMterpAsmSisterStart))
+    .global SYMBOL(artMterpAsmSisterEnd)
+SYMBOL(artMterpAsmSisterEnd):
+
+
+    .global SYMBOL(artMterpAsmAltInstructionStart)
+    FUNCTION_TYPE(SYMBOL(artMterpAsmAltInstructionStart))
+    .text
+
+SYMBOL(artMterpAsmAltInstructionStart) = .L_ALT_op_nop
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_nop: /* 0x00 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(0*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move: /* 0x01 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(1*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_from16: /* 0x02 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(2*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_16: /* 0x03 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(3*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide: /* 0x04 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(4*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_from16: /* 0x05 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(5*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_16: /* 0x06 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(6*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object: /* 0x07 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(7*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_from16: /* 0x08 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(8*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_16: /* 0x09 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(9*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result: /* 0x0a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(10*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_wide: /* 0x0b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(11*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_object: /* 0x0c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(12*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_exception: /* 0x0d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(13*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void: /* 0x0e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(14*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return: /* 0x0f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(15*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_wide: /* 0x10 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(16*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_object: /* 0x11 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(17*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_4: /* 0x12 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(18*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_16: /* 0x13 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(19*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const: /* 0x14 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(20*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_high16: /* 0x15 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(21*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_16: /* 0x16 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(22*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_32: /* 0x17 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(23*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide: /* 0x18 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(24*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_high16: /* 0x19 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(25*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string: /* 0x1a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(26*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string_jumbo: /* 0x1b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(27*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_class: /* 0x1c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(28*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_enter: /* 0x1d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(29*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_exit: /* 0x1e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(30*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_check_cast: /* 0x1f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(31*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_instance_of: /* 0x20 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(32*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_array_length: /* 0x21 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(33*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_instance: /* 0x22 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(34*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_array: /* 0x23 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(35*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array: /* 0x24 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(36*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array_range: /* 0x25 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(37*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_fill_array_data: /* 0x26 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(38*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_throw: /* 0x27 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(39*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto: /* 0x28 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(40*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_16: /* 0x29 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(41*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_32: /* 0x2a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(42*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_packed_switch: /* 0x2b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(43*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sparse_switch: /* 0x2c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(44*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_float: /* 0x2d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(45*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_float: /* 0x2e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(46*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_double: /* 0x2f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(47*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_double: /* 0x30 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(48*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmp_long: /* 0x31 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(49*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eq: /* 0x32 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(50*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ne: /* 0x33 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(51*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lt: /* 0x34 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(52*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ge: /* 0x35 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(53*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gt: /* 0x36 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(54*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_le: /* 0x37 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(55*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eqz: /* 0x38 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(56*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_nez: /* 0x39 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(57*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ltz: /* 0x3a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(58*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gez: /* 0x3b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(59*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gtz: /* 0x3c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(60*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lez: /* 0x3d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(61*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3e: /* 0x3e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(62*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3f: /* 0x3f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(63*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_40: /* 0x40 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(64*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_41: /* 0x41 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(65*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_42: /* 0x42 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(66*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_43: /* 0x43 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(67*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget: /* 0x44 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(68*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_wide: /* 0x45 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(69*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_object: /* 0x46 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(70*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_boolean: /* 0x47 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(71*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_byte: /* 0x48 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(72*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_char: /* 0x49 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(73*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_short: /* 0x4a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(74*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput: /* 0x4b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(75*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_wide: /* 0x4c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(76*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_object: /* 0x4d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(77*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_boolean: /* 0x4e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(78*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_byte: /* 0x4f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(79*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_char: /* 0x50 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(80*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_short: /* 0x51 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(81*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget: /* 0x52 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(82*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide: /* 0x53 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(83*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object: /* 0x54 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(84*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean: /* 0x55 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(85*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte: /* 0x56 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(86*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char: /* 0x57 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(87*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short: /* 0x58 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(88*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput: /* 0x59 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(89*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide: /* 0x5a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(90*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object: /* 0x5b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(91*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean: /* 0x5c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(92*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte: /* 0x5d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(93*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char: /* 0x5e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(94*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short: /* 0x5f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(95*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget: /* 0x60 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(96*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_wide: /* 0x61 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(97*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_object: /* 0x62 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(98*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_boolean: /* 0x63 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(99*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_byte: /* 0x64 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(100*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_char: /* 0x65 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(101*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_short: /* 0x66 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(102*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput: /* 0x67 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(103*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_wide: /* 0x68 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(104*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_object: /* 0x69 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(105*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_boolean: /* 0x6a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(106*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_byte: /* 0x6b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(107*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_char: /* 0x6c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(108*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_short: /* 0x6d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(109*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual: /* 0x6e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(110*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super: /* 0x6f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(111*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct: /* 0x70 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(112*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static: /* 0x71 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(113*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface: /* 0x72 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(114*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void_no_barrier: /* 0x73 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(115*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range: /* 0x74 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(116*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super_range: /* 0x75 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(117*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct_range: /* 0x76 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(118*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static_range: /* 0x77 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(119*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface_range: /* 0x78 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(120*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_79: /* 0x79 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(121*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_7a: /* 0x7a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(122*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_int: /* 0x7b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(123*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_int: /* 0x7c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(124*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_long: /* 0x7d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(125*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_long: /* 0x7e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(126*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_float: /* 0x7f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(127*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_double: /* 0x80 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(128*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_long: /* 0x81 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(129*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_float: /* 0x82 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(130*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_double: /* 0x83 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(131*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_int: /* 0x84 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(132*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_float: /* 0x85 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(133*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_double: /* 0x86 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(134*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_int: /* 0x87 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(135*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_long: /* 0x88 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(136*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_double: /* 0x89 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(137*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_int: /* 0x8a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(138*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_long: /* 0x8b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(139*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_float: /* 0x8c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(140*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_byte: /* 0x8d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(141*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_char: /* 0x8e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(142*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_short: /* 0x8f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(143*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int: /* 0x90 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(144*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int: /* 0x91 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(145*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int: /* 0x92 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(146*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int: /* 0x93 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(147*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int: /* 0x94 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(148*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int: /* 0x95 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(149*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int: /* 0x96 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(150*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int: /* 0x97 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(151*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int: /* 0x98 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(152*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int: /* 0x99 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(153*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int: /* 0x9a */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(154*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long: /* 0x9b */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(155*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long: /* 0x9c */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(156*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long: /* 0x9d */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(157*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long: /* 0x9e */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(158*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long: /* 0x9f */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(159*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long: /* 0xa0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(160*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long: /* 0xa1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(161*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long: /* 0xa2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(162*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long: /* 0xa3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(163*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long: /* 0xa4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(164*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long: /* 0xa5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(165*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float: /* 0xa6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(166*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float: /* 0xa7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(167*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float: /* 0xa8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(168*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float: /* 0xa9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(169*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float: /* 0xaa */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(170*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double: /* 0xab */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(171*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double: /* 0xac */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(172*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double: /* 0xad */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(173*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double: /* 0xae */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(174*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double: /* 0xaf */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(175*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_2addr: /* 0xb0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(176*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int_2addr: /* 0xb1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(177*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_2addr: /* 0xb2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(178*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_2addr: /* 0xb3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(179*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_2addr: /* 0xb4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(180*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_2addr: /* 0xb5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(181*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_2addr: /* 0xb6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(182*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_2addr: /* 0xb7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(183*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_2addr: /* 0xb8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(184*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_2addr: /* 0xb9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(185*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_2addr: /* 0xba */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(186*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long_2addr: /* 0xbb */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(187*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long_2addr: /* 0xbc */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(188*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long_2addr: /* 0xbd */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(189*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long_2addr: /* 0xbe */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(190*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long_2addr: /* 0xbf */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(191*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long_2addr: /* 0xc0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(192*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long_2addr: /* 0xc1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(193*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long_2addr: /* 0xc2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(194*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long_2addr: /* 0xc3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(195*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long_2addr: /* 0xc4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(196*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long_2addr: /* 0xc5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(197*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float_2addr: /* 0xc6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(198*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float_2addr: /* 0xc7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(199*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float_2addr: /* 0xc8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(200*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float_2addr: /* 0xc9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(201*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float_2addr: /* 0xca */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(202*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double_2addr: /* 0xcb */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(203*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double_2addr: /* 0xcc */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(204*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double_2addr: /* 0xcd */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(205*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double_2addr: /* 0xce */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(206*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double_2addr: /* 0xcf */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(207*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit16: /* 0xd0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(208*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int: /* 0xd1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(209*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit16: /* 0xd2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(210*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit16: /* 0xd3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(211*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit16: /* 0xd4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(212*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit16: /* 0xd5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(213*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit16: /* 0xd6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(214*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit16: /* 0xd7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(215*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit8: /* 0xd8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(216*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int_lit8: /* 0xd9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(217*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit8: /* 0xda */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(218*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit8: /* 0xdb */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(219*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit8: /* 0xdc */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(220*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit8: /* 0xdd */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(221*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit8: /* 0xde */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(222*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit8: /* 0xdf */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(223*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_lit8: /* 0xe0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(224*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_lit8: /* 0xe1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(225*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_lit8: /* 0xe2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(226*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_quick: /* 0xe3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(227*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide_quick: /* 0xe4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(228*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object_quick: /* 0xe5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(229*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_quick: /* 0xe6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(230*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide_quick: /* 0xe7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(231*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object_quick: /* 0xe8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(232*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_quick: /* 0xe9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(233*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range_quick: /* 0xea */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(234*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean_quick: /* 0xeb */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(235*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte_quick: /* 0xec */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(236*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char_quick: /* 0xed */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(237*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short_quick: /* 0xee */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(238*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean_quick: /* 0xef */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(239*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte_quick: /* 0xf0 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(240*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char_quick: /* 0xf1 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(241*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short_quick: /* 0xf2 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(242*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f3: /* 0xf3 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(243*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f4: /* 0xf4 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(244*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f5: /* 0xf5 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(245*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f6: /* 0xf6 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(246*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f7: /* 0xf7 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(247*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f8: /* 0xf8 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(248*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f9: /* 0xf9 */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(249*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fa: /* 0xfa */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(250*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fb: /* 0xfb */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(251*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fc: /* 0xfc */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(252*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fd: /* 0xfd */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(253*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fe: /* 0xfe */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(254*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_ff: /* 0xff */
+/* File: x86/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(255*128)
+
+    .balign 128
+    SIZE(SYMBOL(artMterpAsmAltInstructionStart),SYMBOL(artMterpAsmAltInstructionStart))
+    .global SYMBOL(artMterpAsmAltInstructionEnd)
+SYMBOL(artMterpAsmAltInstructionEnd):
+/* File: x86/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpLogDivideByZeroException)
+#endif
+    jmp     MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpLogArrayIndexException)
+#endif
+    jmp     MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpLogNegativeArraySizeException)
+#endif
+    jmp     MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpLogNoSuchMethodException)
+#endif
+    jmp     MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpLogNullObjectException)
+#endif
+    jmp     MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    call    SYMBOL(MterpLogExceptionThrownException)
+#endif
+    jmp     MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    THREAD_FLAGS_OFFSET(%eax), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    call    SYMBOL(MterpLogSuspendFallback)
+#endif
+    jmp     MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    movl    rSELF, %eax
+    testl   $-1, THREAD_EXCEPTION_OFFSET(%eax)
+    jz      MterpFallback
+    /* intentional fallthrough - handle pending exception. */
+
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpHandleException)
+    testb   %al, %al
+    jz      MterpExceptionReturn
+    movl    OFF_FP_CODE_ITEM(rFP), %eax
+    movl    OFF_FP_DEX_PC(rFP), %ecx
+    lea     CODEITEM_INSNS_OFFSET(%eax), rPC
+    lea     (rPC, %ecx, 2), rPC
+    movl    rPC, OFF_FP_DEX_PC_PTR(rFP)
+    /* Do we need to switch interpreters? */
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    /* resume execution at catch block */
+    REFRESH_IBASE
+    FETCH_INST
+    GOTO_NEXT
+    /* NOTE: no fallthrough */
+
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
+ */
+MterpCommonTakenBranch:
+    jg      .L_forward_branch               # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_osr_check
+    decw    rPROFILE
+    je      .L_add_batch                    # counted down to zero - report
+.L_resume_backward_branch:
+    movl    rSELF, %eax
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    leal    (rPC, rINST, 2), rPC
+    FETCH_INST
+    jnz     .L_suspend_request_pending
+    REFRESH_IBASE
+    GOTO_NEXT
+
+.L_suspend_request_pending:
+    EXPORT_PC
+    movl    %eax, OUT_ARG0(%esp)            # rSELF in eax
+    call    SYMBOL(MterpSuspendCheck)       # (self)
+    testb   %al, %al
+    jnz     MterpFallback
+    REFRESH_IBASE                           # might have changed during suspend
+    GOTO_NEXT
+
+.L_no_count_backwards:
+    cmpw    $JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    jne     .L_resume_backward_branch
+.L_osr_check:
+    EXPORT_PC
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jz      .L_resume_backward_branch
+    jmp     MterpOnStackReplacement
+
+.L_forward_branch:
+    cmpw    $JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    je      .L_check_osr_forward
+.L_resume_forward_branch:
+    leal    (rPC, rINST, 2), rPC
+    FETCH_INST
+    GOTO_NEXT
+
+.L_check_osr_forward:
+    EXPORT_PC
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    REFRESH_IBASE
+    jz      .L_resume_forward_branch
+    jmp     MterpOnStackReplacement
+
+.L_add_batch:
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG2(%esp)
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    jmp     .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    EXPORT_PC
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    $2, OUT_ARG2(%esp)
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    REFRESH_IBASE
+    jnz     MterpOnStackReplacement
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpLogOSR)
+#endif
+    movl    $1, %eax
+    jmp     MterpDone
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpLogFallback)
+#endif
+MterpCommonFallback:
+    xor     %eax, %eax
+    jmp     MterpDone
+
+/*
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    movl    $1, %eax
+    jmp     MterpDone
+MterpReturn:
+    movl    OFF_FP_RESULT_REGISTER(rFP), %edx
+    movl    %eax, (%edx)
+    movl    %ecx, 4(%edx)
+    mov     $1, %eax
+MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmpw    $0, rPROFILE
+    jle     MRestoreFrame                   # if > 0, we may have some counts to report.
+
+    movl    %eax, rINST                     # stash return value
+    /* Report cached hotness counts */
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG2(%esp)
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    movl    rINST, %eax                     # restore return value
+
+    /* pop up frame */
+MRestoreFrame:
+    addl    $FRAME_SIZE, %esp
+    .cfi_adjust_cfa_offset -FRAME_SIZE
+
+    /* Restore callee save register */
+    POP     %ebx
+    POP     %esi
+    POP     %edi
+    POP     %ebp
+    ret
+    .cfi_endproc
+    SIZE(ExecuteMterpImpl,ExecuteMterpImpl)
+
diff --git a/runtime/interpreter/mterp/out/mterp_x86_64.S b/runtime/interpreter/mterp/out/mterp_x86_64.S
new file mode 100644
index 0000000..bc1abcc
--- /dev/null
+++ b/runtime/interpreter/mterp/out/mterp_x86_64.S
@@ -0,0 +1,12071 @@
+/*
+ * This file was generated automatically by gen-mterp.py for 'x86_64'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+/* File: x86_64/header.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+/*
+x86_64 ABI general notes:
+
+Caller save set:
+   rax, rdx, rcx, rsi, rdi, r8-r11, st(0)-st(7)
+Callee save set:
+   rbx, rbp, r12-r15
+Return regs:
+   32-bit in eax
+   64-bit in rax
+   fp on xmm0
+
+First 8 fp parameters came in xmm0-xmm7.
+First 6 non-fp parameters came in rdi, rsi, rdx, rcx, r8, r9.
+Other parameters passed on stack, pushed right-to-left.  On entry to target, first
+param is at 8(%esp).  Traditional entry code is:
+
+Stack must be 16-byte aligned to support SSE in native code.
+
+If we're not doing variable stack allocation (alloca), the frame pointer can be
+eliminated and all arg references adjusted to be esp relative.
+*/
+
+/*
+Mterp and x86_64 notes:
+
+Some key interpreter variables will be assigned to registers.
+
+  nick     reg   purpose
+  rPROFILE rbp   countdown register for jit profiling
+  rPC      r12   interpreted program counter, used for fetching instructions
+  rFP      r13   interpreted frame pointer, used for accessing locals and args
+  rINSTw   bx    first 16-bit code of current instruction
+  rINSTbl  bl    opcode portion of instruction word
+  rINSTbh  bh    high byte of inst word, usually contains src/tgt reg names
+  rIBASE   r14   base of instruction handler table
+  rREFS    r15   base of object references in shadow frame.
+
+Notes:
+   o High order 16 bits of ebx must be zero on entry to handler
+   o rPC, rFP, rINSTw/rINSTbl valid on handler entry and exit
+   o eax and ecx are scratch, rINSTw/ebx sometimes scratch
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/*
+ * Handle mac compiler specific
+ */
+#if defined(__APPLE__)
+    #define MACRO_LITERAL(value) $(value)
+    #define FUNCTION_TYPE(name)
+    #define SIZE(start,end)
+    // Mac OS' symbols have an _ prefix.
+    #define SYMBOL(name) _ ## name
+#else
+    #define MACRO_LITERAL(value) $value
+    #define FUNCTION_TYPE(name) .type name, @function
+    #define SIZE(start,end) .size start, .-end
+    #define SYMBOL(name) name
+#endif
+
+.macro PUSH _reg
+    pushq \_reg
+    .cfi_adjust_cfa_offset 8
+    .cfi_rel_offset \_reg, 0
+.endm
+
+.macro POP _reg
+    popq \_reg
+    .cfi_adjust_cfa_offset -8
+    .cfi_restore \_reg
+.endm
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_COUNTDOWN_OFFSET OFF_FP(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+/* Frame size must be 16-byte aligned.
+ * Remember about 8 bytes for return address + 6 * 8 for spills.
+ */
+#define FRAME_SIZE     8
+
+/* Frame diagram while executing ExecuteMterpImpl, high to low addresses */
+#define IN_ARG3        %rcx
+#define IN_ARG2        %rdx
+#define IN_ARG1        %rsi
+#define IN_ARG0        %rdi
+/* Spill offsets relative to %esp */
+#define SELF_SPILL     (FRAME_SIZE -  8)
+/* Out Args  */
+#define OUT_ARG3       %rcx
+#define OUT_ARG2       %rdx
+#define OUT_ARG1       %rsi
+#define OUT_ARG0       %rdi
+#define OUT_32_ARG3    %ecx
+#define OUT_32_ARG2    %edx
+#define OUT_32_ARG1    %esi
+#define OUT_32_ARG0    %edi
+#define OUT_FP_ARG1    %xmm1
+#define OUT_FP_ARG0    %xmm0
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rSELF    SELF_SPILL(%rsp)
+#define rPC      %r12
+#define rFP      %r13
+#define rINST    %ebx
+#define rINSTq   %rbx
+#define rINSTw   %bx
+#define rINSTbh  %bh
+#define rINSTbl  %bl
+#define rIBASE   %r14
+#define rREFS    %r15
+#define rPROFILE %ebp
+
+#define MTERP_LOGGING 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    movq    rPC, OFF_FP_DEX_PC_PTR(rFP)
+.endm
+
+/*
+ * Refresh handler table.
+ * IBase handles uses the caller save register so we must restore it after each call.
+ * Also it is used as a result of some 64-bit operations (like imul) and we should
+ * restore it in such cases also.
+ *
+ */
+.macro REFRESH_IBASE
+    movq    rSELF, rIBASE
+    movq    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
+ * Refresh rINST.
+ * At enter to handler rINST does not contain the opcode number.
+ * However some utilities require the full value, so this macro
+ * restores the opcode number.
+ */
+.macro REFRESH_INST _opnum
+    movb    rINSTbl, rINSTbh
+    movb    $\_opnum, rINSTbl
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINSTw.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    movzwq  (rPC), rINSTq
+.endm
+
+/*
+ * Remove opcode from rINST, compute the address of handler and jump to it.
+ */
+.macro GOTO_NEXT
+    movzx   rINSTbl,%eax
+    movzbl  rINSTbh,rINST
+    shll    MACRO_LITERAL(7), %eax
+    addq    rIBASE, %rax
+    jmp     *%rax
+.endm
+
+/*
+ * Advance rPC by instruction count.
+ */
+.macro ADVANCE_PC _count
+    leaq    2*\_count(rPC), rPC
+.endm
+
+/*
+ * Advance rPC by instruction count, fetch instruction and jump to handler.
+ */
+.macro ADVANCE_PC_FETCH_AND_GOTO_NEXT _count
+    ADVANCE_PC \_count
+    FETCH_INST
+    GOTO_NEXT
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+#define VREG_ADDRESS(_vreg) (rFP,_vreg,4)
+#define VREG_REF_ADDRESS(_vreg) (rREFS,_vreg,4)
+
+.macro GET_VREG _reg _vreg
+    movl    (rFP,\_vreg,4), \_reg
+.endm
+
+/* Read wide value. */
+.macro GET_WIDE_VREG _reg _vreg
+    movq    (rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    MACRO_LITERAL(0), (rREFS,\_vreg,4)
+.endm
+
+/* Write wide value. reg is clobbered. */
+.macro SET_WIDE_VREG _reg _vreg
+    movq    \_reg, (rFP,\_vreg,4)
+    xorq    \_reg, \_reg
+    movq    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro SET_VREG_OBJECT _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro GET_VREG_HIGH _reg _vreg
+    movl    4(rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG_HIGH _reg _vreg
+    movl    \_reg, 4(rFP,\_vreg,4)
+    movl    MACRO_LITERAL(0), 4(rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_REF _vreg
+    movl    MACRO_LITERAL(0),  (rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_WIDE_REF _vreg
+    movl    MACRO_LITERAL(0),  (rREFS,\_vreg,4)
+    movl    MACRO_LITERAL(0), 4(rREFS,\_vreg,4)
+.endm
+
+/* File: x86_64/entry.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Interpreter entry point.
+ */
+
+    .text
+    .global SYMBOL(ExecuteMterpImpl)
+    FUNCTION_TYPE(ExecuteMterpImpl)
+
+/*
+ * On entry:
+ *  0  Thread* self
+ *  1  code_item
+ *  2  ShadowFrame
+ *  3  JValue* result_register
+ *
+ */
+
+SYMBOL(ExecuteMterpImpl):
+    .cfi_startproc
+    .cfi_def_cfa rsp, 8
+
+    /* Spill callee save regs */
+    PUSH %rbx
+    PUSH %rbp
+    PUSH %r12
+    PUSH %r13
+    PUSH %r14
+    PUSH %r15
+
+    /* Allocate frame */
+    subq    $FRAME_SIZE, %rsp
+    .cfi_adjust_cfa_offset FRAME_SIZE
+
+    /* Remember the return register */
+    movq    IN_ARG3, SHADOWFRAME_RESULT_REGISTER_OFFSET(IN_ARG2)
+
+    /* Remember the code_item */
+    movq    IN_ARG1, SHADOWFRAME_CODE_ITEM_OFFSET(IN_ARG2)
+
+    /* set up "named" registers */
+    movl    SHADOWFRAME_NUMBER_OF_VREGS_OFFSET(IN_ARG2), %eax
+    leaq    SHADOWFRAME_VREGS_OFFSET(IN_ARG2), rFP
+    leaq    (rFP, %rax, 4), rREFS
+    movl    SHADOWFRAME_DEX_PC_OFFSET(IN_ARG2), %eax
+    leaq    CODEITEM_INSNS_OFFSET(IN_ARG1), rPC
+    leaq    (rPC, %rax, 2), rPC
+    EXPORT_PC
+
+    /* Starting ibase */
+    movq    IN_ARG0, rSELF
+    REFRESH_IBASE
+
+    /* Set up for backwards branches & osr profiling */
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpSetUpHotnessCountdown)
+    movswl  %ax, rPROFILE
+
+    /* start executing the instruction at rPC */
+    FETCH_INST
+    GOTO_NEXT
+    /* NOTE: no fallthrough */
+
+
+    .global SYMBOL(artMterpAsmInstructionStart)
+    FUNCTION_TYPE(SYMBOL(artMterpAsmInstructionStart))
+SYMBOL(artMterpAsmInstructionStart) = .L_op_nop
+    .text
+
+/* ------------------------------ */
+    .balign 128
+.L_op_nop: /* 0x00 */
+/* File: x86_64/op_nop.S */
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move: /* 0x01 */
+/* File: x86_64/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    movl    rINST, %eax                     # eax <- BA
+    andb    $0xf, %al                      # eax <- A
+    shrl    $4, rINST                      # rINST <- B
+    GET_VREG %edx, rINSTq
+    .if 0
+    SET_VREG_OBJECT %edx, %rax              # fp[A] <- fp[B]
+    .else
+    SET_VREG %edx, %rax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_from16: /* 0x02 */
+/* File: x86_64/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    movzwq  2(rPC), %rax                    # eax <- BBBB
+    GET_VREG %edx, %rax                     # edx <- fp[BBBB]
+    .if 0
+    SET_VREG_OBJECT %edx, rINSTq            # fp[A] <- fp[B]
+    .else
+    SET_VREG %edx, rINSTq                   # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_16: /* 0x03 */
+/* File: x86_64/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    movzwq  4(rPC), %rcx                    # ecx <- BBBB
+    movzwq  2(rPC), %rax                    # eax <- AAAA
+    GET_VREG %edx, %rcx
+    .if 0
+    SET_VREG_OBJECT %edx, %rax              # fp[A] <- fp[B]
+    .else
+    SET_VREG %edx, %rax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide: /* 0x04 */
+/* File: x86_64/op_move_wide.S */
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movl    rINST, %ecx                     # ecx <- BA
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    GET_WIDE_VREG %rdx, rINSTq              # rdx <- v[B]
+    SET_WIDE_VREG %rdx, %rcx                # v[A] <- rdx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_from16: /* 0x05 */
+/* File: x86_64/op_move_wide_from16.S */
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzwl  2(rPC), %ecx                    # ecx <- BBBB
+    GET_WIDE_VREG %rdx, %rcx                # rdx <- v[B]
+    SET_WIDE_VREG %rdx, rINSTq              # v[A] <- rdx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_16: /* 0x06 */
+/* File: x86_64/op_move_wide_16.S */
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzwq  4(rPC), %rcx                    # ecx<- BBBB
+    movzwq  2(rPC), %rax                    # eax<- AAAA
+    GET_WIDE_VREG %rdx, %rcx                # rdx <- v[B]
+    SET_WIDE_VREG %rdx, %rax                # v[A] <- rdx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object: /* 0x07 */
+/* File: x86_64/op_move_object.S */
+/* File: x86_64/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    movl    rINST, %eax                     # eax <- BA
+    andb    $0xf, %al                      # eax <- A
+    shrl    $4, rINST                      # rINST <- B
+    GET_VREG %edx, rINSTq
+    .if 1
+    SET_VREG_OBJECT %edx, %rax              # fp[A] <- fp[B]
+    .else
+    SET_VREG %edx, %rax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_from16: /* 0x08 */
+/* File: x86_64/op_move_object_from16.S */
+/* File: x86_64/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    movzwq  2(rPC), %rax                    # eax <- BBBB
+    GET_VREG %edx, %rax                     # edx <- fp[BBBB]
+    .if 1
+    SET_VREG_OBJECT %edx, rINSTq            # fp[A] <- fp[B]
+    .else
+    SET_VREG %edx, rINSTq                   # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_16: /* 0x09 */
+/* File: x86_64/op_move_object_16.S */
+/* File: x86_64/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    movzwq  4(rPC), %rcx                    # ecx <- BBBB
+    movzwq  2(rPC), %rax                    # eax <- AAAA
+    GET_VREG %edx, %rcx
+    .if 1
+    SET_VREG_OBJECT %edx, %rax              # fp[A] <- fp[B]
+    .else
+    SET_VREG %edx, %rax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result: /* 0x0a */
+/* File: x86_64/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    movq    OFF_FP_RESULT_REGISTER(rFP), %rax    # get pointer to result JType.
+    movl    (%rax), %eax                    # r0 <- result.i.
+    .if 0
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- fp[B]
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_wide: /* 0x0b */
+/* File: x86_64/op_move_result_wide.S */
+    /* move-result-wide vAA */
+    movq    OFF_FP_RESULT_REGISTER(rFP), %rax    # get pointer to result JType.
+    movq    (%rax), %rdx                         # Get wide
+    SET_WIDE_VREG %rdx, rINSTq                   # v[AA] <- rdx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_object: /* 0x0c */
+/* File: x86_64/op_move_result_object.S */
+/* File: x86_64/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    movq    OFF_FP_RESULT_REGISTER(rFP), %rax    # get pointer to result JType.
+    movl    (%rax), %eax                    # r0 <- result.i.
+    .if 1
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- fp[B]
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_exception: /* 0x0d */
+/* File: x86_64/op_move_exception.S */
+    /* move-exception vAA */
+    movq    rSELF, %rcx
+    movl    THREAD_EXCEPTION_OFFSET(%rcx), %eax
+    SET_VREG_OBJECT %eax, rINSTq            # fp[AA] <- exception object
+    movl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void: /* 0x0e */
+/* File: x86_64/op_return_void.S */
+    .extern MterpThreadFenceForConstructor
+    call    SYMBOL(MterpThreadFenceForConstructor)
+    movq    rSELF, OUT_ARG0
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    xorq    %rax, %rax
+    jmp     MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return: /* 0x0f */
+/* File: x86_64/op_return.S */
+/*
+ * Return a 32-bit value.
+ *
+ * for: return, return-object
+ */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    call    SYMBOL(MterpThreadFenceForConstructor)
+    movq    rSELF, OUT_ARG0
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    GET_VREG %eax, rINSTq                   # eax <- vAA
+    jmp     MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_wide: /* 0x10 */
+/* File: x86_64/op_return_wide.S */
+/*
+ * Return a 64-bit value.
+ */
+    /* return-wide vAA */
+    .extern MterpThreadFenceForConstructor
+    call    SYMBOL(MterpThreadFenceForConstructor)
+    movq    rSELF, OUT_ARG0
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    GET_WIDE_VREG %rax, rINSTq              # eax <- v[AA]
+    jmp     MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_object: /* 0x11 */
+/* File: x86_64/op_return_object.S */
+/* File: x86_64/op_return.S */
+/*
+ * Return a 32-bit value.
+ *
+ * for: return, return-object
+ */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    call    SYMBOL(MterpThreadFenceForConstructor)
+    movq    rSELF, OUT_ARG0
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    GET_VREG %eax, rINSTq                   # eax <- vAA
+    jmp     MterpReturn
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_4: /* 0x12 */
+/* File: x86_64/op_const_4.S */
+    /* const/4 vA, #+B */
+    movsbl  rINSTbl, %eax                   # eax <-ssssssBx
+    movl    $0xf, rINST
+    andl    %eax, rINST                     # rINST <- A
+    sarl    $4, %eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_16: /* 0x13 */
+/* File: x86_64/op_const_16.S */
+    /* const/16 vAA, #+BBBB */
+    movswl  2(rPC), %ecx                    # ecx <- ssssBBBB
+    SET_VREG %ecx, rINSTq                   # vAA <- ssssBBBB
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const: /* 0x14 */
+/* File: x86_64/op_const.S */
+    /* const vAA, #+BBBBbbbb */
+    movl    2(rPC), %eax                    # grab all 32 bits at once
+    SET_VREG %eax, rINSTq                   # vAA<- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_high16: /* 0x15 */
+/* File: x86_64/op_const_high16.S */
+    /* const/high16 vAA, #+BBBB0000 */
+    movzwl  2(rPC), %eax                    # eax <- 0000BBBB
+    sall    $16, %eax                      # eax <- BBBB0000
+    SET_VREG %eax, rINSTq                   # vAA <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_16: /* 0x16 */
+/* File: x86_64/op_const_wide_16.S */
+    /* const-wide/16 vAA, #+BBBB */
+    movswq  2(rPC), %rax                    # rax <- ssssBBBB
+    SET_WIDE_VREG %rax, rINSTq              # store
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_32: /* 0x17 */
+/* File: x86_64/op_const_wide_32.S */
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    movslq   2(rPC), %rax                   # eax <- ssssssssBBBBbbbb
+    SET_WIDE_VREG %rax, rINSTq              # store
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide: /* 0x18 */
+/* File: x86_64/op_const_wide.S */
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    movq    2(rPC), %rax                    # rax <- HHHHhhhhBBBBbbbb
+    SET_WIDE_VREG %rax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 5
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_high16: /* 0x19 */
+/* File: x86_64/op_const_wide_high16.S */
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    movzwq  2(rPC), %rax                    # eax <- 0000BBBB
+    salq    $48, %rax                      # eax <- BBBB0000
+    SET_WIDE_VREG %rax, rINSTq              # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string: /* 0x1a */
+/* File: x86_64/op_const_string.S */
+    /* const/string vAA, String@BBBB */
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # OUT_ARG0 <- BBBB
+    movq    rINSTq, OUT_ARG1
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG2
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string_jumbo: /* 0x1b */
+/* File: x86_64/op_const_string_jumbo.S */
+    /* const/string vAA, String@BBBBBBBB */
+    EXPORT_PC
+    movl    2(rPC), OUT_32_ARG0             # OUT_32_ARG0 <- BBBB
+    movq    rINSTq, OUT_ARG1
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG2
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_class: /* 0x1c */
+/* File: x86_64/op_const_class.S */
+    /* const/class vAA, Class@BBBB */
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # eax <- OUT_ARG0
+    movq    rINSTq, OUT_ARG1
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG2
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(MterpConstClass)         # (index, tgt_reg, shadow_frame, self)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_enter: /* 0x1d */
+/* File: x86_64/op_monitor_enter.S */
+/*
+ * Synchronize on an object.
+ */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    GET_VREG OUT_32_ARG0, rINSTq
+    movq    rSELF, OUT_ARG1
+    call    SYMBOL(artLockObjectFromCode)   # (object, self)
+    testq   %rax, %rax
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_exit: /* 0x1e */
+/* File: x86_64/op_monitor_exit.S */
+/*
+ * Unlock an object.
+ *
+ * Exceptions that occur when unlocking a monitor need to appear as
+ * if they happened at the following instruction.  See the Dalvik
+ * instruction spec.
+ */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    GET_VREG OUT_32_ARG0, rINSTq
+    movq    rSELF, OUT_ARG1
+    call    SYMBOL(artUnlockObjectFromCode) # (object, self)
+    testq   %rax, %rax
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_check_cast: /* 0x1f */
+/* File: x86_64/op_check_cast.S */
+/*
+ * Check to see if a cast from one class to another is allowed.
+ */
+    /* check-cast vAA, class@BBBB */
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # OUT_ARG0 <- BBBB
+    leaq    VREG_ADDRESS(rINSTq), OUT_ARG1
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(MterpCheckCast)          # (index, &obj, method, self)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_instance_of: /* 0x20 */
+/* File: x86_64/op_instance_of.S */
+/*
+ * Check to see if an object reference is an instance of a class.
+ *
+ * Most common situation is a non-null object, being compared against
+ * an already-resolved class.
+ */
+    /* instance-of vA, vB, class@CCCC */
+    EXPORT_PC
+    movzwl  2(rPC), OUT_32_ARG0             # OUT_32_ARG0 <- CCCC
+    movl    rINST, %eax                     # eax <- BA
+    sarl    $4, %eax                       # eax <- B
+    leaq    VREG_ADDRESS(%rax), OUT_ARG1    # Get object address
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(MterpInstanceOf)         # (index, &obj, method, self)
+    movsbl  %al, %eax
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException
+    andb    $0xf, rINSTbl                  # rINSTbl <- A
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_array_length: /* 0x21 */
+/* File: x86_64/op_array_length.S */
+/*
+ * Return the length of an array.
+ */
+    movl    rINST, %eax                     # eax <- BA
+    sarl    $4, rINST                      # rINST <- B
+    GET_VREG %ecx, rINSTq                   # ecx <- vB (object ref)
+    testl   %ecx, %ecx                      # is null?
+    je      common_errNullObject
+    andb    $0xf, %al                      # eax <- A
+    movl    MIRROR_ARRAY_LENGTH_OFFSET(%rcx), rINST
+    SET_VREG rINST, %rax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_instance: /* 0x22 */
+/* File: x86_64/op_new_instance.S */
+/*
+ * Create a new instance of a class.
+ */
+    /* new-instance vAA, class@BBBB */
+    EXPORT_PC
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG0
+    movq    rSELF, OUT_ARG1
+    REFRESH_INST 34
+    movq    rINSTq, OUT_ARG2
+    call    SYMBOL(MterpNewInstance)
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_array: /* 0x23 */
+/* File: x86_64/op_new_array.S */
+/*
+ * Allocate an array of objects, specified with the array class
+ * and a count.
+ *
+ * The verifier guarantees that this is an array class, so we don't
+ * check for it here.
+ */
+    /* new-array vA, vB, class@CCCC */
+    EXPORT_PC
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG0
+    movq    rPC, OUT_ARG1
+    REFRESH_INST 35
+    movq    rINSTq, OUT_ARG2
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(MterpNewArray)
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array: /* 0x24 */
+/* File: x86_64/op_filled_new_array.S */
+/*
+ * Create a new array with elements filled from registers.
+ *
+ * for: filled-new-array, filled-new-array/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern MterpFilledNewArray
+    EXPORT_PC
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG0
+    movq    rPC, OUT_ARG1
+    movq    rSELF, OUT_ARG2
+    call    SYMBOL(MterpFilledNewArray)
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array_range: /* 0x25 */
+/* File: x86_64/op_filled_new_array_range.S */
+/* File: x86_64/op_filled_new_array.S */
+/*
+ * Create a new array with elements filled from registers.
+ *
+ * for: filled-new-array, filled-new-array/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern MterpFilledNewArrayRange
+    EXPORT_PC
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG0
+    movq    rPC, OUT_ARG1
+    movq    rSELF, OUT_ARG2
+    call    SYMBOL(MterpFilledNewArrayRange)
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_fill_array_data: /* 0x26 */
+/* File: x86_64/op_fill_array_data.S */
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    movslq  2(rPC), %rcx                    # rcx <- ssssssssBBBBbbbb
+    leaq    (rPC,%rcx,2), OUT_ARG1          # OUT_ARG1 <- PC + ssssssssBBBBbbbb*2
+    GET_VREG OUT_32_ARG0, rINSTq            # OUT_ARG0 <- vAA (array object)
+    call    SYMBOL(MterpFillArrayData)      # (obj, payload)
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+/* ------------------------------ */
+    .balign 128
+.L_op_throw: /* 0x27 */
+/* File: x86_64/op_throw.S */
+/*
+ * Throw an exception object in the current thread.
+ */
+    /* throw vAA */
+    EXPORT_PC
+    GET_VREG %eax, rINSTq                   # eax<- vAA (exception object)
+    testb   %al, %al
+    jz      common_errNullObject
+    movq    rSELF, %rcx
+    movq    %rax, THREAD_EXCEPTION_OFFSET(%rcx)
+    jmp     MterpException
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto: /* 0x28 */
+/* File: x86_64/op_goto.S */
+/*
+ * Unconditional branch, 8-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+    /* goto +AA */
+    movsbq  rINSTbl, rINSTq                 # rINSTq <- ssssssAA
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_16: /* 0x29 */
+/* File: x86_64/op_goto_16.S */
+/*
+ * Unconditional branch, 16-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+    /* goto/16 +AAAA */
+    movswq  2(rPC), rINSTq                  # rINSTq <- ssssAAAA
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_32: /* 0x2a */
+/* File: x86_64/op_goto_32.S */
+/*
+ * Unconditional branch, 32-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ *
+ *  Because we need the SF bit set, we'll use an adds
+ * to convert from Dalvik offset to byte offset.
+ */
+    /* goto/32 +AAAAAAAA */
+    movslq  2(rPC), rINSTq                  # rINSTq <- AAAAAAAA
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+
+/* ------------------------------ */
+    .balign 128
+.L_op_packed_switch: /* 0x2b */
+/* File: x86_64/op_packed_switch.S */
+/*
+ * Handle a packed-switch or sparse-switch instruction.  In both cases
+ * we decode it and hand it off to a helper function.
+ *
+ * We don't really expect backward branches in a switch statement, but
+ * they're perfectly legal, so we check for them here.
+ *
+ * for: packed-switch, sparse-switch
+ */
+    /* op vAA, +BBBB */
+    movslq  2(rPC), OUT_ARG0                # rcx <- ssssssssBBBBbbbb
+    leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + ssssssssBBBBbbbb*2
+    GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
+    call    SYMBOL(MterpDoPackedSwitch)
+    testl   %eax, %eax
+    movslq  %eax, rINSTq
+    jmp     MterpCommonTakenBranch
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sparse_switch: /* 0x2c */
+/* File: x86_64/op_sparse_switch.S */
+/* File: x86_64/op_packed_switch.S */
+/*
+ * Handle a packed-switch or sparse-switch instruction.  In both cases
+ * we decode it and hand it off to a helper function.
+ *
+ * We don't really expect backward branches in a switch statement, but
+ * they're perfectly legal, so we check for them here.
+ *
+ * for: packed-switch, sparse-switch
+ */
+    /* op vAA, +BBBB */
+    movslq  2(rPC), OUT_ARG0                # rcx <- ssssssssBBBBbbbb
+    leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + ssssssssBBBBbbbb*2
+    GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
+    call    SYMBOL(MterpDoSparseSwitch)
+    testl   %eax, %eax
+    movslq  %eax, rINSTq
+    jmp     MterpCommonTakenBranch
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_float: /* 0x2d */
+/* File: x86_64/op_cmpl_float.S */
+/* File: x86_64/fpcmp.S */
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  3(rPC), %rcx                    # ecx<- CC
+    movzbq  2(rPC), %rax                    # eax<- BB
+    movss VREG_ADDRESS(%rax), %xmm0
+    xor     %eax, %eax
+    ucomiss VREG_ADDRESS(%rcx), %xmm0
+    jp      .Lop_cmpl_float_nan_is_neg
+    je      .Lop_cmpl_float_finish
+    jb      .Lop_cmpl_float_less
+.Lop_cmpl_float_nan_is_pos:
+    addb    $1, %al
+    jmp     .Lop_cmpl_float_finish
+.Lop_cmpl_float_nan_is_neg:
+.Lop_cmpl_float_less:
+    movl    $-1, %eax
+.Lop_cmpl_float_finish:
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_float: /* 0x2e */
+/* File: x86_64/op_cmpg_float.S */
+/* File: x86_64/fpcmp.S */
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  3(rPC), %rcx                    # ecx<- CC
+    movzbq  2(rPC), %rax                    # eax<- BB
+    movss VREG_ADDRESS(%rax), %xmm0
+    xor     %eax, %eax
+    ucomiss VREG_ADDRESS(%rcx), %xmm0
+    jp      .Lop_cmpg_float_nan_is_pos
+    je      .Lop_cmpg_float_finish
+    jb      .Lop_cmpg_float_less
+.Lop_cmpg_float_nan_is_pos:
+    addb    $1, %al
+    jmp     .Lop_cmpg_float_finish
+.Lop_cmpg_float_nan_is_neg:
+.Lop_cmpg_float_less:
+    movl    $-1, %eax
+.Lop_cmpg_float_finish:
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_double: /* 0x2f */
+/* File: x86_64/op_cmpl_double.S */
+/* File: x86_64/fpcmp.S */
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  3(rPC), %rcx                    # ecx<- CC
+    movzbq  2(rPC), %rax                    # eax<- BB
+    movsd VREG_ADDRESS(%rax), %xmm0
+    xor     %eax, %eax
+    ucomisd VREG_ADDRESS(%rcx), %xmm0
+    jp      .Lop_cmpl_double_nan_is_neg
+    je      .Lop_cmpl_double_finish
+    jb      .Lop_cmpl_double_less
+.Lop_cmpl_double_nan_is_pos:
+    addb    $1, %al
+    jmp     .Lop_cmpl_double_finish
+.Lop_cmpl_double_nan_is_neg:
+.Lop_cmpl_double_less:
+    movl    $-1, %eax
+.Lop_cmpl_double_finish:
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_double: /* 0x30 */
+/* File: x86_64/op_cmpg_double.S */
+/* File: x86_64/fpcmp.S */
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  3(rPC), %rcx                    # ecx<- CC
+    movzbq  2(rPC), %rax                    # eax<- BB
+    movsd VREG_ADDRESS(%rax), %xmm0
+    xor     %eax, %eax
+    ucomisd VREG_ADDRESS(%rcx), %xmm0
+    jp      .Lop_cmpg_double_nan_is_pos
+    je      .Lop_cmpg_double_finish
+    jb      .Lop_cmpg_double_less
+.Lop_cmpg_double_nan_is_pos:
+    addb    $1, %al
+    jmp     .Lop_cmpg_double_finish
+.Lop_cmpg_double_nan_is_neg:
+.Lop_cmpg_double_less:
+    movl    $-1, %eax
+.Lop_cmpg_double_finish:
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmp_long: /* 0x31 */
+/* File: x86_64/op_cmp_long.S */
+/*
+ * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+ * register based on the results of the comparison.
+ */
+    /* cmp-long vAA, vBB, vCC */
+    movzbq  2(rPC), %rdx                    # edx <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_WIDE_VREG %rdx, %rdx                # rdx <- v[BB]
+    xorl    %eax, %eax
+    xorl    %edi, %edi
+    addb    $1, %al
+    movl    $-1, %esi
+    cmpq    VREG_ADDRESS(%rcx), %rdx
+    cmovl   %esi, %edi
+    cmovg   %eax, %edi
+    SET_VREG %edi, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eq: /* 0x32 */
+/* File: x86_64/op_if_eq.S */
+/* File: x86_64/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # rcx <- A
+    GET_VREG %eax, %rcx                     # eax <- vA
+    cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
+    jne   1f
+    movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+1:
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ne: /* 0x33 */
+/* File: x86_64/op_if_ne.S */
+/* File: x86_64/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # rcx <- A
+    GET_VREG %eax, %rcx                     # eax <- vA
+    cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
+    je   1f
+    movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+1:
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lt: /* 0x34 */
+/* File: x86_64/op_if_lt.S */
+/* File: x86_64/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # rcx <- A
+    GET_VREG %eax, %rcx                     # eax <- vA
+    cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
+    jge   1f
+    movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+1:
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ge: /* 0x35 */
+/* File: x86_64/op_if_ge.S */
+/* File: x86_64/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # rcx <- A
+    GET_VREG %eax, %rcx                     # eax <- vA
+    cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
+    jl   1f
+    movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+1:
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gt: /* 0x36 */
+/* File: x86_64/op_if_gt.S */
+/* File: x86_64/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # rcx <- A
+    GET_VREG %eax, %rcx                     # eax <- vA
+    cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
+    jle   1f
+    movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+1:
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_le: /* 0x37 */
+/* File: x86_64/op_if_le.S */
+/* File: x86_64/bincmp.S */
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # rcx <- A
+    GET_VREG %eax, %rcx                     # eax <- vA
+    cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
+    jg   1f
+    movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+1:
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eqz: /* 0x38 */
+/* File: x86_64/op_if_eqz.S */
+/* File: x86_64/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
+    jne   1f
+    movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+1:
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_nez: /* 0x39 */
+/* File: x86_64/op_if_nez.S */
+/* File: x86_64/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
+    je   1f
+    movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+1:
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ltz: /* 0x3a */
+/* File: x86_64/op_if_ltz.S */
+/* File: x86_64/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
+    jge   1f
+    movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+1:
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gez: /* 0x3b */
+/* File: x86_64/op_if_gez.S */
+/* File: x86_64/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
+    jl   1f
+    movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+1:
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gtz: /* 0x3c */
+/* File: x86_64/op_if_gtz.S */
+/* File: x86_64/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
+    jle   1f
+    movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+1:
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lez: /* 0x3d */
+/* File: x86_64/op_if_lez.S */
+/* File: x86_64/zcmp.S */
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
+    jg   1f
+    movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+1:
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3e: /* 0x3e */
+/* File: x86_64/op_unused_3e.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3f: /* 0x3f */
+/* File: x86_64/op_unused_3f.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_40: /* 0x40 */
+/* File: x86_64/op_unused_40.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_41: /* 0x41 */
+/* File: x86_64/op_unused_41.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_42: /* 0x42 */
+/* File: x86_64/op_unused_42.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_43: /* 0x43 */
+/* File: x86_64/op_unused_43.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget: /* 0x44 */
+/* File: x86_64/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short, aget-wide
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %rcx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    .if 0
+    movq    MIRROR_INT_ARRAY_DATA_OFFSET(%rax,%rcx,8), %rax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    movl   MIRROR_INT_ARRAY_DATA_OFFSET(%rax,%rcx,4), %eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_wide: /* 0x45 */
+/* File: x86_64/op_aget_wide.S */
+/* File: x86_64/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short, aget-wide
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %rcx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    .if 1
+    movq    MIRROR_WIDE_ARRAY_DATA_OFFSET(%rax,%rcx,8), %rax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    movq   MIRROR_WIDE_ARRAY_DATA_OFFSET(%rax,%rcx,8), %eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_object: /* 0x46 */
+/* File: x86_64/op_aget_object.S */
+/*
+ * Array object get.  vAA <- vBB[vCC].
+ *
+ * for: aget-object
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    GET_VREG OUT_32_ARG0, %rax              # eax <- vBB (array object)
+    GET_VREG OUT_32_ARG1, %rcx              # ecx <- vCC (requested index)
+    EXPORT_PC
+    call    SYMBOL(artAGetObjectFromMterp)  # (array, index)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException
+    SET_VREG_OBJECT %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_boolean: /* 0x47 */
+/* File: x86_64/op_aget_boolean.S */
+/* File: x86_64/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short, aget-wide
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %rcx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    .if 0
+    movq    MIRROR_BOOLEAN_ARRAY_DATA_OFFSET(%rax,%rcx,8), %rax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    movzbl   MIRROR_BOOLEAN_ARRAY_DATA_OFFSET(%rax,%rcx,1), %eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_byte: /* 0x48 */
+/* File: x86_64/op_aget_byte.S */
+/* File: x86_64/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short, aget-wide
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %rcx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    .if 0
+    movq    MIRROR_BYTE_ARRAY_DATA_OFFSET(%rax,%rcx,8), %rax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    movsbl   MIRROR_BYTE_ARRAY_DATA_OFFSET(%rax,%rcx,1), %eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_char: /* 0x49 */
+/* File: x86_64/op_aget_char.S */
+/* File: x86_64/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short, aget-wide
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %rcx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    .if 0
+    movq    MIRROR_CHAR_ARRAY_DATA_OFFSET(%rax,%rcx,8), %rax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    movzwl   MIRROR_CHAR_ARRAY_DATA_OFFSET(%rax,%rcx,2), %eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_short: /* 0x4a */
+/* File: x86_64/op_aget_short.S */
+/* File: x86_64/op_aget.S */
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short, aget-wide
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %rcx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    .if 0
+    movq    MIRROR_SHORT_ARRAY_DATA_OFFSET(%rax,%rcx,8), %rax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    movswl   MIRROR_SHORT_ARRAY_DATA_OFFSET(%rax,%rcx,2), %eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput: /* 0x4b */
+/* File: x86_64/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short, aput-wide
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %rcx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    .if 0
+    GET_WIDE_VREG rINSTq, rINSTq
+    .else
+    GET_VREG rINST, rINSTq
+    .endif
+    movl    rINST, MIRROR_INT_ARRAY_DATA_OFFSET(%rax,%rcx,4)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_wide: /* 0x4c */
+/* File: x86_64/op_aput_wide.S */
+/* File: x86_64/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short, aput-wide
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %rcx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    .if 1
+    GET_WIDE_VREG rINSTq, rINSTq
+    .else
+    GET_VREG rINST, rINSTq
+    .endif
+    movq    rINSTq, MIRROR_WIDE_ARRAY_DATA_OFFSET(%rax,%rcx,8)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_object: /* 0x4d */
+/* File: x86_64/op_aput_object.S */
+/*
+ * Store an object into an array.  vBB[vCC] <- vAA.
+ */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG0
+    movq    rPC, OUT_ARG1
+    REFRESH_INST 77
+    movq    rINSTq, OUT_ARG2
+    call    SYMBOL(MterpAputObject)         # (array, index)
+    testb   %al, %al
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_boolean: /* 0x4e */
+/* File: x86_64/op_aput_boolean.S */
+/* File: x86_64/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short, aput-wide
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %rcx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    .if 0
+    GET_WIDE_VREG rINSTq, rINSTq
+    .else
+    GET_VREG rINST, rINSTq
+    .endif
+    movb    rINSTbl, MIRROR_BOOLEAN_ARRAY_DATA_OFFSET(%rax,%rcx,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_byte: /* 0x4f */
+/* File: x86_64/op_aput_byte.S */
+/* File: x86_64/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short, aput-wide
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %rcx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    .if 0
+    GET_WIDE_VREG rINSTq, rINSTq
+    .else
+    GET_VREG rINST, rINSTq
+    .endif
+    movb    rINSTbl, MIRROR_BYTE_ARRAY_DATA_OFFSET(%rax,%rcx,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_char: /* 0x50 */
+/* File: x86_64/op_aput_char.S */
+/* File: x86_64/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short, aput-wide
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %rcx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    .if 0
+    GET_WIDE_VREG rINSTq, rINSTq
+    .else
+    GET_VREG rINST, rINSTq
+    .endif
+    movw    rINSTw, MIRROR_CHAR_ARRAY_DATA_OFFSET(%rax,%rcx,2)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_short: /* 0x51 */
+/* File: x86_64/op_aput_short.S */
+/* File: x86_64/op_aput.S */
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short, aput-wide
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %rcx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    .if 0
+    GET_WIDE_VREG rINSTq, rINSTq
+    .else
+    GET_VREG rINST, rINSTq
+    .endif
+    movw    rINSTw, MIRROR_SHORT_ARRAY_DATA_OFFSET(%rax,%rcx,2)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget: /* 0x52 */
+/* File: x86_64/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short, iget-wide
+ */
+    EXPORT_PC
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    movzwl  2(rPC), OUT_32_ARG0             # eax <- field ref CCCC
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG OUT_32_ARG1, %rcx              # the object pointer
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(artGet32InstanceFromCode)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <-value
+    .else
+    .if 0
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <-value
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <-value
+    .endif
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide: /* 0x53 */
+/* File: x86_64/op_iget_wide.S */
+/* File: x86_64/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short, iget-wide
+ */
+    EXPORT_PC
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    movzwl  2(rPC), OUT_32_ARG0             # eax <- field ref CCCC
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG OUT_32_ARG1, %rcx              # the object pointer
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(artGet64InstanceFromCode)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <-value
+    .else
+    .if 1
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <-value
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <-value
+    .endif
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object: /* 0x54 */
+/* File: x86_64/op_iget_object.S */
+/* File: x86_64/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short, iget-wide
+ */
+    EXPORT_PC
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    movzwl  2(rPC), OUT_32_ARG0             # eax <- field ref CCCC
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG OUT_32_ARG1, %rcx              # the object pointer
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(artGetObjInstanceFromCode)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 1
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <-value
+    .else
+    .if 0
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <-value
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <-value
+    .endif
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean: /* 0x55 */
+/* File: x86_64/op_iget_boolean.S */
+/* File: x86_64/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short, iget-wide
+ */
+    EXPORT_PC
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    movzwl  2(rPC), OUT_32_ARG0             # eax <- field ref CCCC
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG OUT_32_ARG1, %rcx              # the object pointer
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(artGetBooleanInstanceFromCode)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <-value
+    .else
+    .if 0
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <-value
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <-value
+    .endif
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte: /* 0x56 */
+/* File: x86_64/op_iget_byte.S */
+/* File: x86_64/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short, iget-wide
+ */
+    EXPORT_PC
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    movzwl  2(rPC), OUT_32_ARG0             # eax <- field ref CCCC
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG OUT_32_ARG1, %rcx              # the object pointer
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(artGetByteInstanceFromCode)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <-value
+    .else
+    .if 0
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <-value
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <-value
+    .endif
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char: /* 0x57 */
+/* File: x86_64/op_iget_char.S */
+/* File: x86_64/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short, iget-wide
+ */
+    EXPORT_PC
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    movzwl  2(rPC), OUT_32_ARG0             # eax <- field ref CCCC
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG OUT_32_ARG1, %rcx              # the object pointer
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(artGetCharInstanceFromCode)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <-value
+    .else
+    .if 0
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <-value
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <-value
+    .endif
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short: /* 0x58 */
+/* File: x86_64/op_iget_short.S */
+/* File: x86_64/op_iget.S */
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short, iget-wide
+ */
+    EXPORT_PC
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    movzwl  2(rPC), OUT_32_ARG0             # eax <- field ref CCCC
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG OUT_32_ARG1, %rcx              # the object pointer
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(artGetShortInstanceFromCode)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <-value
+    .else
+    .if 0
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <-value
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <-value
+    .endif
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput: /* 0x59 */
+/* File: x86_64/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet32InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), OUT_32_ARG0             # field ref <- 0000CCCC
+    movzbq  rINSTbl, %rcx                   # rcx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG OUT_32_ARG1, %rcx              # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG OUT_32_ARG2, rINSTq            # fp[A]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG3    # referrer
+    call    SYMBOL(artSet32InstanceFromMterp)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide: /* 0x5a */
+/* File: x86_64/op_iput_wide.S */
+    /* iput-wide vA, vB, field@CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref CCCC
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG OUT_32_ARG1, %rcx              # the object pointer
+    andb    $0xf, rINSTbl                  # rINST <- A
+    leaq    VREG_ADDRESS(rINSTq), OUT_ARG2  # &fp[A]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG3    # referrer
+    call    SYMBOL(artSet64InstanceFromMterp)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object: /* 0x5b */
+/* File: x86_64/op_iput_object.S */
+    EXPORT_PC
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG0
+    movq    rPC, OUT_ARG1
+    REFRESH_INST 91
+    movl    rINST, OUT_32_ARG2
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(MterpIputObject)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean: /* 0x5c */
+/* File: x86_64/op_iput_boolean.S */
+/* File: x86_64/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), OUT_32_ARG0             # field ref <- 0000CCCC
+    movzbq  rINSTbl, %rcx                   # rcx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG OUT_32_ARG1, %rcx              # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG OUT_32_ARG2, rINSTq            # fp[A]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG3    # referrer
+    call    SYMBOL(artSet8InstanceFromMterp)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte: /* 0x5d */
+/* File: x86_64/op_iput_byte.S */
+/* File: x86_64/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), OUT_32_ARG0             # field ref <- 0000CCCC
+    movzbq  rINSTbl, %rcx                   # rcx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG OUT_32_ARG1, %rcx              # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG OUT_32_ARG2, rINSTq            # fp[A]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG3    # referrer
+    call    SYMBOL(artSet8InstanceFromMterp)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char: /* 0x5e */
+/* File: x86_64/op_iput_char.S */
+/* File: x86_64/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), OUT_32_ARG0             # field ref <- 0000CCCC
+    movzbq  rINSTbl, %rcx                   # rcx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG OUT_32_ARG1, %rcx              # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG OUT_32_ARG2, rINSTq            # fp[A]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG3    # referrer
+    call    SYMBOL(artSet16InstanceFromMterp)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short: /* 0x5f */
+/* File: x86_64/op_iput_short.S */
+/* File: x86_64/op_iput.S */
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), OUT_32_ARG0             # field ref <- 0000CCCC
+    movzbq  rINSTbl, %rcx                   # rcx<- BA
+    sarl    $4, %ecx                       # ecx<- B
+    GET_VREG OUT_32_ARG1, %rcx              # the object pointer
+    andb    $0xf, rINSTbl                  # rINST<- A
+    GET_VREG OUT_32_ARG2, rINSTq            # fp[A]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG3    # referrer
+    call    SYMBOL(artSet16InstanceFromMterp)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget: /* 0x60 */
+/* File: x86_64/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
+ */
+    /* op vAA, field@BBBB */
+    .extern artGet32StaticFromCode
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref CCCC
+    movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
+    movq    rSELF, OUT_ARG2                 # self
+    call    SYMBOL(artGet32StaticFromCode)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
+    .else
+    .if 0
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <- value
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <- value
+    .endif
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_wide: /* 0x61 */
+/* File: x86_64/op_sget_wide.S */
+/* File: x86_64/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
+ */
+    /* op vAA, field@BBBB */
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref CCCC
+    movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
+    movq    rSELF, OUT_ARG2                 # self
+    call    SYMBOL(artGet64StaticFromCode)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
+    .else
+    .if 1
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <- value
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <- value
+    .endif
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_object: /* 0x62 */
+/* File: x86_64/op_sget_object.S */
+/* File: x86_64/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetObjStaticFromCode
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref CCCC
+    movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
+    movq    rSELF, OUT_ARG2                 # self
+    call    SYMBOL(artGetObjStaticFromCode)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException
+    .if 1
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
+    .else
+    .if 0
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <- value
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <- value
+    .endif
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_boolean: /* 0x63 */
+/* File: x86_64/op_sget_boolean.S */
+/* File: x86_64/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetBooleanStaticFromCode
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref CCCC
+    movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
+    movq    rSELF, OUT_ARG2                 # self
+    call    SYMBOL(artGetBooleanStaticFromCode)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
+    .else
+    .if 0
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <- value
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <- value
+    .endif
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_byte: /* 0x64 */
+/* File: x86_64/op_sget_byte.S */
+/* File: x86_64/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetByteStaticFromCode
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref CCCC
+    movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
+    movq    rSELF, OUT_ARG2                 # self
+    call    SYMBOL(artGetByteStaticFromCode)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
+    .else
+    .if 0
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <- value
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <- value
+    .endif
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_char: /* 0x65 */
+/* File: x86_64/op_sget_char.S */
+/* File: x86_64/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetCharStaticFromCode
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref CCCC
+    movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
+    movq    rSELF, OUT_ARG2                 # self
+    call    SYMBOL(artGetCharStaticFromCode)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
+    .else
+    .if 0
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <- value
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <- value
+    .endif
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_short: /* 0x66 */
+/* File: x86_64/op_sget_short.S */
+/* File: x86_64/op_sget.S */
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
+ */
+    /* op vAA, field@BBBB */
+    .extern artGetShortStaticFromCode
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref CCCC
+    movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
+    movq    rSELF, OUT_ARG2                 # self
+    call    SYMBOL(artGetShortStaticFromCode)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException
+    .if 0
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
+    .else
+    .if 0
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <- value
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <- value
+    .endif
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput: /* 0x67 */
+/* File: x86_64/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet32StaticFromCode
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref BBBB
+    GET_VREG OUT_32_ARG1, rINSTq            # fp[AA]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
+    movq    rSELF, OUT_ARG3                 # self
+    call    SYMBOL(artSet32StaticFromCode)
+    testb   %al, %al
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_wide: /* 0x68 */
+/* File: x86_64/op_sput_wide.S */
+/*
+ * SPUT_WIDE handler wrapper.
+ *
+ */
+    /* sput-wide vAA, field@BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref BBBB
+    movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
+    leaq    VREG_ADDRESS(rINSTq), OUT_ARG2  # &fp[AA]
+    movq    rSELF, OUT_ARG3                 # self
+    call    SYMBOL(artSet64IndirectStaticFromMterp)
+    testb   %al, %al
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_object: /* 0x69 */
+/* File: x86_64/op_sput_object.S */
+    EXPORT_PC
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG0
+    movq    rPC, OUT_ARG1
+    REFRESH_INST 105
+    movq    rINSTq, OUT_ARG2
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(MterpSputObject)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_boolean: /* 0x6a */
+/* File: x86_64/op_sput_boolean.S */
+/* File: x86_64/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet8StaticFromCode
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref BBBB
+    GET_VREG OUT_32_ARG1, rINSTq            # fp[AA]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
+    movq    rSELF, OUT_ARG3                 # self
+    call    SYMBOL(artSet8StaticFromCode)
+    testb   %al, %al
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_byte: /* 0x6b */
+/* File: x86_64/op_sput_byte.S */
+/* File: x86_64/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet8StaticFromCode
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref BBBB
+    GET_VREG OUT_32_ARG1, rINSTq            # fp[AA]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
+    movq    rSELF, OUT_ARG3                 # self
+    call    SYMBOL(artSet8StaticFromCode)
+    testb   %al, %al
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_char: /* 0x6c */
+/* File: x86_64/op_sput_char.S */
+/* File: x86_64/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet16StaticFromCode
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref BBBB
+    GET_VREG OUT_32_ARG1, rINSTq            # fp[AA]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
+    movq    rSELF, OUT_ARG3                 # self
+    call    SYMBOL(artSet16StaticFromCode)
+    testb   %al, %al
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_short: /* 0x6d */
+/* File: x86_64/op_sput_short.S */
+/* File: x86_64/op_sput.S */
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern artSet16StaticFromCode
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref BBBB
+    GET_VREG OUT_32_ARG1, rINSTq            # fp[AA]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
+    movq    rSELF, OUT_ARG3                 # self
+    call    SYMBOL(artSet16StaticFromCode)
+    testb   %al, %al
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual: /* 0x6e */
+/* File: x86_64/op_invoke_virtual.S */
+/* File: x86_64/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtual
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    REFRESH_INST 110
+    movl    rINST, OUT_32_ARG3
+    call    SYMBOL(MterpInvokeVirtual)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    FETCH_INST
+    GOTO_NEXT
+
+/*
+ * Handle a virtual method call.
+ *
+ * for: invoke-virtual, invoke-virtual/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super: /* 0x6f */
+/* File: x86_64/op_invoke_super.S */
+/* File: x86_64/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuper
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    REFRESH_INST 111
+    movl    rINST, OUT_32_ARG3
+    call    SYMBOL(MterpInvokeSuper)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    FETCH_INST
+    GOTO_NEXT
+
+/*
+ * Handle a "super" method call.
+ *
+ * for: invoke-super, invoke-super/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct: /* 0x70 */
+/* File: x86_64/op_invoke_direct.S */
+/* File: x86_64/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirect
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    REFRESH_INST 112
+    movl    rINST, OUT_32_ARG3
+    call    SYMBOL(MterpInvokeDirect)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    FETCH_INST
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static: /* 0x71 */
+/* File: x86_64/op_invoke_static.S */
+/* File: x86_64/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStatic
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    REFRESH_INST 113
+    movl    rINST, OUT_32_ARG3
+    call    SYMBOL(MterpInvokeStatic)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    FETCH_INST
+    GOTO_NEXT
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface: /* 0x72 */
+/* File: x86_64/op_invoke_interface.S */
+/* File: x86_64/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterface
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    REFRESH_INST 114
+    movl    rINST, OUT_32_ARG3
+    call    SYMBOL(MterpInvokeInterface)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    FETCH_INST
+    GOTO_NEXT
+
+/*
+ * Handle an interface method call.
+ *
+ * for: invoke-interface, invoke-interface/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void_no_barrier: /* 0x73 */
+/* File: x86_64/op_return_void_no_barrier.S */
+    movq    rSELF, OUT_ARG0
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    xorq    %rax, %rax
+    jmp     MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range: /* 0x74 */
+/* File: x86_64/op_invoke_virtual_range.S */
+/* File: x86_64/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualRange
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    REFRESH_INST 116
+    movl    rINST, OUT_32_ARG3
+    call    SYMBOL(MterpInvokeVirtualRange)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    FETCH_INST
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super_range: /* 0x75 */
+/* File: x86_64/op_invoke_super_range.S */
+/* File: x86_64/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuperRange
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    REFRESH_INST 117
+    movl    rINST, OUT_32_ARG3
+    call    SYMBOL(MterpInvokeSuperRange)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    FETCH_INST
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct_range: /* 0x76 */
+/* File: x86_64/op_invoke_direct_range.S */
+/* File: x86_64/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirectRange
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    REFRESH_INST 118
+    movl    rINST, OUT_32_ARG3
+    call    SYMBOL(MterpInvokeDirectRange)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    FETCH_INST
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static_range: /* 0x77 */
+/* File: x86_64/op_invoke_static_range.S */
+/* File: x86_64/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStaticRange
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    REFRESH_INST 119
+    movl    rINST, OUT_32_ARG3
+    call    SYMBOL(MterpInvokeStaticRange)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    FETCH_INST
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface_range: /* 0x78 */
+/* File: x86_64/op_invoke_interface_range.S */
+/* File: x86_64/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterfaceRange
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    REFRESH_INST 120
+    movl    rINST, OUT_32_ARG3
+    call    SYMBOL(MterpInvokeInterfaceRange)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    FETCH_INST
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_79: /* 0x79 */
+/* File: x86_64/op_unused_79.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_7a: /* 0x7a */
+/* File: x86_64/op_unused_7a.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_int: /* 0x7b */
+/* File: x86_64/op_neg_int.S */
+/* File: x86_64/unop.S */
+/*
+ * Generic 32/64-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    .if 0
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vB
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vB
+    .endif
+    andb    $0xf,%cl                       # ecx <- A
+
+    negl    %eax
+    .if 0
+    SET_WIDE_VREG %rax, %rcx
+    .else
+    SET_VREG %eax, %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_int: /* 0x7c */
+/* File: x86_64/op_not_int.S */
+/* File: x86_64/unop.S */
+/*
+ * Generic 32/64-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    .if 0
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vB
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vB
+    .endif
+    andb    $0xf,%cl                       # ecx <- A
+
+    notl    %eax
+    .if 0
+    SET_WIDE_VREG %rax, %rcx
+    .else
+    SET_VREG %eax, %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_long: /* 0x7d */
+/* File: x86_64/op_neg_long.S */
+/* File: x86_64/unop.S */
+/*
+ * Generic 32/64-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    .if 1
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vB
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vB
+    .endif
+    andb    $0xf,%cl                       # ecx <- A
+
+    negq    %rax
+    .if 1
+    SET_WIDE_VREG %rax, %rcx
+    .else
+    SET_VREG %eax, %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_long: /* 0x7e */
+/* File: x86_64/op_not_long.S */
+/* File: x86_64/unop.S */
+/*
+ * Generic 32/64-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    .if 1
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vB
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vB
+    .endif
+    andb    $0xf,%cl                       # ecx <- A
+
+    notq    %rax
+    .if 1
+    SET_WIDE_VREG %rax, %rcx
+    .else
+    SET_VREG %eax, %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_float: /* 0x7f */
+/* File: x86_64/op_neg_float.S */
+/* File: x86_64/unop.S */
+/*
+ * Generic 32/64-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    .if 0
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vB
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vB
+    .endif
+    andb    $0xf,%cl                       # ecx <- A
+
+    xorl    $0x80000000, %eax
+    .if 0
+    SET_WIDE_VREG %rax, %rcx
+    .else
+    SET_VREG %eax, %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_double: /* 0x80 */
+/* File: x86_64/op_neg_double.S */
+/* File: x86_64/unop.S */
+/*
+ * Generic 32/64-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    .if 1
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vB
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vB
+    .endif
+    andb    $0xf,%cl                       # ecx <- A
+    movq    $0x8000000000000000, %rsi
+    xorq    %rsi, %rax
+    .if 1
+    SET_WIDE_VREG %rax, %rcx
+    .else
+    SET_VREG %eax, %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_long: /* 0x81 */
+/* File: x86_64/op_int_to_long.S */
+    /* int to long vA, vB */
+    movzbq  rINSTbl, %rax                   # rax <- +A
+    sarl    $4, %eax                       # eax <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    movslq  VREG_ADDRESS(%rax), %rax
+    SET_WIDE_VREG %rax, rINSTq              # v[A] <- %rax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_float: /* 0x82 */
+/* File: x86_64/op_int_to_float.S */
+/* File: x86_64/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    cvtsi2ssl    VREG_ADDRESS(rINSTq), %xmm0
+    .if 0
+    movsd   %xmm0, VREG_ADDRESS(%rcx)
+    CLEAR_WIDE_REF %rcx
+    .else
+    movss   %xmm0, VREG_ADDRESS(%rcx)
+    CLEAR_REF %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_double: /* 0x83 */
+/* File: x86_64/op_int_to_double.S */
+/* File: x86_64/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    cvtsi2sdl    VREG_ADDRESS(rINSTq), %xmm0
+    .if 1
+    movsd   %xmm0, VREG_ADDRESS(%rcx)
+    CLEAR_WIDE_REF %rcx
+    .else
+    movss   %xmm0, VREG_ADDRESS(%rcx)
+    CLEAR_REF %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_int: /* 0x84 */
+/* File: x86_64/op_long_to_int.S */
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+/* File: x86_64/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    movl    rINST, %eax                     # eax <- BA
+    andb    $0xf, %al                      # eax <- A
+    shrl    $4, rINST                      # rINST <- B
+    GET_VREG %edx, rINSTq
+    .if 0
+    SET_VREG_OBJECT %edx, %rax              # fp[A] <- fp[B]
+    .else
+    SET_VREG %edx, %rax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_float: /* 0x85 */
+/* File: x86_64/op_long_to_float.S */
+/* File: x86_64/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    cvtsi2ssq    VREG_ADDRESS(rINSTq), %xmm0
+    .if 0
+    movsd   %xmm0, VREG_ADDRESS(%rcx)
+    CLEAR_WIDE_REF %rcx
+    .else
+    movss   %xmm0, VREG_ADDRESS(%rcx)
+    CLEAR_REF %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_double: /* 0x86 */
+/* File: x86_64/op_long_to_double.S */
+/* File: x86_64/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    cvtsi2sdq    VREG_ADDRESS(rINSTq), %xmm0
+    .if 1
+    movsd   %xmm0, VREG_ADDRESS(%rcx)
+    CLEAR_WIDE_REF %rcx
+    .else
+    movss   %xmm0, VREG_ADDRESS(%rcx)
+    CLEAR_REF %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_int: /* 0x87 */
+/* File: x86_64/op_float_to_int.S */
+/* File: x86_64/cvtfp_int.S */
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.
+ */
+    /* float/double to int/long vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    movss   VREG_ADDRESS(rINSTq), %xmm0
+    movl  $0x7fffffff, %eax
+    cvtsi2ssl %eax, %xmm1
+    comiss    %xmm1, %xmm0
+    jae     1f
+    jp      2f
+    cvttss2sil  %xmm0, %eax
+    jmp     1f
+2:
+    xorl    %eax, %eax
+1:
+    .if 0
+    SET_WIDE_VREG %eax, %rcx
+    .else
+    SET_VREG %eax, %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_long: /* 0x88 */
+/* File: x86_64/op_float_to_long.S */
+/* File: x86_64/cvtfp_int.S */
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.
+ */
+    /* float/double to int/long vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    movss   VREG_ADDRESS(rINSTq), %xmm0
+    movq  $0x7fffffffffffffff, %rax
+    cvtsi2ssq %rax, %xmm1
+    comiss    %xmm1, %xmm0
+    jae     1f
+    jp      2f
+    cvttss2siq  %xmm0, %rax
+    jmp     1f
+2:
+    xorq    %rax, %rax
+1:
+    .if 1
+    SET_WIDE_VREG %rax, %rcx
+    .else
+    SET_VREG %rax, %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_double: /* 0x89 */
+/* File: x86_64/op_float_to_double.S */
+/* File: x86_64/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    cvtss2sd    VREG_ADDRESS(rINSTq), %xmm0
+    .if 1
+    movsd   %xmm0, VREG_ADDRESS(%rcx)
+    CLEAR_WIDE_REF %rcx
+    .else
+    movss   %xmm0, VREG_ADDRESS(%rcx)
+    CLEAR_REF %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_int: /* 0x8a */
+/* File: x86_64/op_double_to_int.S */
+/* File: x86_64/cvtfp_int.S */
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.
+ */
+    /* float/double to int/long vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    movsd   VREG_ADDRESS(rINSTq), %xmm0
+    movl  $0x7fffffff, %eax
+    cvtsi2sdl %eax, %xmm1
+    comisd    %xmm1, %xmm0
+    jae     1f
+    jp      2f
+    cvttsd2sil  %xmm0, %eax
+    jmp     1f
+2:
+    xorl    %eax, %eax
+1:
+    .if 0
+    SET_WIDE_VREG %eax, %rcx
+    .else
+    SET_VREG %eax, %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_long: /* 0x8b */
+/* File: x86_64/op_double_to_long.S */
+/* File: x86_64/cvtfp_int.S */
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.
+ */
+    /* float/double to int/long vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    movsd   VREG_ADDRESS(rINSTq), %xmm0
+    movq  $0x7fffffffffffffff, %rax
+    cvtsi2sdq %rax, %xmm1
+    comisd    %xmm1, %xmm0
+    jae     1f
+    jp      2f
+    cvttsd2siq  %xmm0, %rax
+    jmp     1f
+2:
+    xorq    %rax, %rax
+1:
+    .if 1
+    SET_WIDE_VREG %rax, %rcx
+    .else
+    SET_VREG %rax, %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_float: /* 0x8c */
+/* File: x86_64/op_double_to_float.S */
+/* File: x86_64/fpcvt.S */
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    cvtsd2ss    VREG_ADDRESS(rINSTq), %xmm0
+    .if 0
+    movsd   %xmm0, VREG_ADDRESS(%rcx)
+    CLEAR_WIDE_REF %rcx
+    .else
+    movss   %xmm0, VREG_ADDRESS(%rcx)
+    CLEAR_REF %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_byte: /* 0x8d */
+/* File: x86_64/op_int_to_byte.S */
+/* File: x86_64/unop.S */
+/*
+ * Generic 32/64-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    .if 0
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vB
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vB
+    .endif
+    andb    $0xf,%cl                       # ecx <- A
+
+movsbl  %al, %eax
+    .if 0
+    SET_WIDE_VREG %rax, %rcx
+    .else
+    SET_VREG %eax, %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_char: /* 0x8e */
+/* File: x86_64/op_int_to_char.S */
+/* File: x86_64/unop.S */
+/*
+ * Generic 32/64-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    .if 0
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vB
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vB
+    .endif
+    andb    $0xf,%cl                       # ecx <- A
+
+movzwl  %ax,%eax
+    .if 0
+    SET_WIDE_VREG %rax, %rcx
+    .else
+    SET_VREG %eax, %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_short: /* 0x8f */
+/* File: x86_64/op_int_to_short.S */
+/* File: x86_64/unop.S */
+/*
+ * Generic 32/64-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4,rINST                       # rINST <- B
+    .if 0
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vB
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vB
+    .endif
+    andb    $0xf,%cl                       # ecx <- A
+
+movswl %ax, %eax
+    .if 0
+    SET_WIDE_VREG %rax, %rcx
+    .else
+    SET_VREG %eax, %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int: /* 0x90 */
+/* File: x86_64/op_add_int.S */
+/* File: x86_64/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB
+    addl    (rFP,%rcx,4), %eax                                  # ex: addl    (rFP,%rcx,4),%eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int: /* 0x91 */
+/* File: x86_64/op_sub_int.S */
+/* File: x86_64/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB
+    subl    (rFP,%rcx,4), %eax                                  # ex: addl    (rFP,%rcx,4),%eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int: /* 0x92 */
+/* File: x86_64/op_mul_int.S */
+/* File: x86_64/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB
+    imull   (rFP,%rcx,4), %eax                                  # ex: addl    (rFP,%rcx,4),%eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int: /* 0x93 */
+/* File: x86_64/op_div_int.S */
+/* File: x86_64/bindiv.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op1=-1.
+ */
+    /* div/rem vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    .if 0
+    GET_WIDE_VREG %rax, %rax                # eax <- vBB
+    GET_WIDE_VREG %ecx, %rcx             # ecx <- vCC
+    .else
+    GET_VREG %eax, %rax                     # eax <- vBB
+    GET_VREG %ecx, %rcx                  # ecx <- vCC
+    .endif
+    testl   %ecx, %ecx
+    jz      common_errDivideByZero
+    cmpl  $-1, %ecx
+    je      2f
+    cdq                                    # rdx:rax <- sign-extended of rax
+    idivl   %ecx
+1:
+    .if 0
+    SET_WIDE_VREG %eax, rINSTq           # eax <- vBB
+    .else
+    SET_VREG %eax, rINSTq                # eax <- vBB
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+2:
+    .if 0
+    xorl %eax, %eax
+    .else
+    negl %eax
+    .endif
+    jmp     1b
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int: /* 0x94 */
+/* File: x86_64/op_rem_int.S */
+/* File: x86_64/bindiv.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op1=-1.
+ */
+    /* div/rem vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    .if 0
+    GET_WIDE_VREG %rax, %rax                # eax <- vBB
+    GET_WIDE_VREG %ecx, %rcx             # ecx <- vCC
+    .else
+    GET_VREG %eax, %rax                     # eax <- vBB
+    GET_VREG %ecx, %rcx                  # ecx <- vCC
+    .endif
+    testl   %ecx, %ecx
+    jz      common_errDivideByZero
+    cmpl  $-1, %ecx
+    je      2f
+    cdq                                    # rdx:rax <- sign-extended of rax
+    idivl   %ecx
+1:
+    .if 0
+    SET_WIDE_VREG %edx, rINSTq           # eax <- vBB
+    .else
+    SET_VREG %edx, rINSTq                # eax <- vBB
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+2:
+    .if 1
+    xorl %edx, %edx
+    .else
+    negl %edx
+    .endif
+    jmp     1b
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int: /* 0x95 */
+/* File: x86_64/op_and_int.S */
+/* File: x86_64/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB
+    andl    (rFP,%rcx,4), %eax                                  # ex: addl    (rFP,%rcx,4),%eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int: /* 0x96 */
+/* File: x86_64/op_or_int.S */
+/* File: x86_64/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB
+    orl     (rFP,%rcx,4), %eax                                  # ex: addl    (rFP,%rcx,4),%eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int: /* 0x97 */
+/* File: x86_64/op_xor_int.S */
+/* File: x86_64/binop.S */
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB
+    xorl    (rFP,%rcx,4), %eax                                  # ex: addl    (rFP,%rcx,4),%eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int: /* 0x98 */
+/* File: x86_64/op_shl_int.S */
+/* File: x86_64/binop1.S */
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_VREG %ecx, %rcx                     # eax <- vCC
+    .if 0
+    GET_WIDE_VREG %rax, %rax                # rax <- vBB
+    sall    %cl, %eax                                  # ex: addl    %ecx,%eax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    GET_VREG %eax, %rax                     # eax <- vBB
+    sall    %cl, %eax                                  # ex: addl    %ecx,%eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int: /* 0x99 */
+/* File: x86_64/op_shr_int.S */
+/* File: x86_64/binop1.S */
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_VREG %ecx, %rcx                     # eax <- vCC
+    .if 0
+    GET_WIDE_VREG %rax, %rax                # rax <- vBB
+    sarl    %cl, %eax                                  # ex: addl    %ecx,%eax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    GET_VREG %eax, %rax                     # eax <- vBB
+    sarl    %cl, %eax                                  # ex: addl    %ecx,%eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int: /* 0x9a */
+/* File: x86_64/op_ushr_int.S */
+/* File: x86_64/binop1.S */
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_VREG %ecx, %rcx                     # eax <- vCC
+    .if 0
+    GET_WIDE_VREG %rax, %rax                # rax <- vBB
+    shrl    %cl, %eax                                  # ex: addl    %ecx,%eax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    GET_VREG %eax, %rax                     # eax <- vBB
+    shrl    %cl, %eax                                  # ex: addl    %ecx,%eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long: /* 0x9b */
+/* File: x86_64/op_add_long.S */
+/* File: x86_64/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_WIDE_VREG %rax, %rax                # rax <- v[BB]
+    addq    (rFP,%rcx,4), %rax                                  # ex: addq   (rFP,%rcx,4),%rax
+    SET_WIDE_VREG %rax, rINSTq              # v[AA] <- rax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long: /* 0x9c */
+/* File: x86_64/op_sub_long.S */
+/* File: x86_64/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_WIDE_VREG %rax, %rax                # rax <- v[BB]
+    subq    (rFP,%rcx,4), %rax                                  # ex: addq   (rFP,%rcx,4),%rax
+    SET_WIDE_VREG %rax, rINSTq              # v[AA] <- rax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long: /* 0x9d */
+/* File: x86_64/op_mul_long.S */
+/* File: x86_64/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_WIDE_VREG %rax, %rax                # rax <- v[BB]
+    imulq   (rFP,%rcx,4), %rax                                  # ex: addq   (rFP,%rcx,4),%rax
+    SET_WIDE_VREG %rax, rINSTq              # v[AA] <- rax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long: /* 0x9e */
+/* File: x86_64/op_div_long.S */
+/* File: x86_64/bindiv.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op1=-1.
+ */
+    /* div/rem vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    .if 1
+    GET_WIDE_VREG %rax, %rax                # eax <- vBB
+    GET_WIDE_VREG %rcx, %rcx             # ecx <- vCC
+    .else
+    GET_VREG %eax, %rax                     # eax <- vBB
+    GET_VREG %rcx, %rcx                  # ecx <- vCC
+    .endif
+    testq   %rcx, %rcx
+    jz      common_errDivideByZero
+    cmpq  $-1, %rcx
+    je      2f
+    cqo                                    # rdx:rax <- sign-extended of rax
+    idivq   %rcx
+1:
+    .if 1
+    SET_WIDE_VREG %rax, rINSTq           # eax <- vBB
+    .else
+    SET_VREG %rax, rINSTq                # eax <- vBB
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+2:
+    .if 0
+    xorq %rax, %rax
+    .else
+    negq %rax
+    .endif
+    jmp     1b
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long: /* 0x9f */
+/* File: x86_64/op_rem_long.S */
+/* File: x86_64/bindiv.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op1=-1.
+ */
+    /* div/rem vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    .if 1
+    GET_WIDE_VREG %rax, %rax                # eax <- vBB
+    GET_WIDE_VREG %rcx, %rcx             # ecx <- vCC
+    .else
+    GET_VREG %eax, %rax                     # eax <- vBB
+    GET_VREG %rcx, %rcx                  # ecx <- vCC
+    .endif
+    testq   %rcx, %rcx
+    jz      common_errDivideByZero
+    cmpq  $-1, %rcx
+    je      2f
+    cqo                                    # rdx:rax <- sign-extended of rax
+    idivq   %rcx
+1:
+    .if 1
+    SET_WIDE_VREG %rdx, rINSTq           # eax <- vBB
+    .else
+    SET_VREG %rdx, rINSTq                # eax <- vBB
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+2:
+    .if 1
+    xorq %rdx, %rdx
+    .else
+    negq %rdx
+    .endif
+    jmp     1b
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long: /* 0xa0 */
+/* File: x86_64/op_and_long.S */
+/* File: x86_64/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_WIDE_VREG %rax, %rax                # rax <- v[BB]
+    andq    (rFP,%rcx,4), %rax                                  # ex: addq   (rFP,%rcx,4),%rax
+    SET_WIDE_VREG %rax, rINSTq              # v[AA] <- rax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long: /* 0xa1 */
+/* File: x86_64/op_or_long.S */
+/* File: x86_64/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_WIDE_VREG %rax, %rax                # rax <- v[BB]
+    orq     (rFP,%rcx,4), %rax                                  # ex: addq   (rFP,%rcx,4),%rax
+    SET_WIDE_VREG %rax, rINSTq              # v[AA] <- rax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long: /* 0xa2 */
+/* File: x86_64/op_xor_long.S */
+/* File: x86_64/binopWide.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_WIDE_VREG %rax, %rax                # rax <- v[BB]
+    xorq    (rFP,%rcx,4), %rax                                  # ex: addq   (rFP,%rcx,4),%rax
+    SET_WIDE_VREG %rax, rINSTq              # v[AA] <- rax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long: /* 0xa3 */
+/* File: x86_64/op_shl_long.S */
+/* File: x86_64/binop1.S */
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_VREG %ecx, %rcx                     # eax <- vCC
+    .if 1
+    GET_WIDE_VREG %rax, %rax                # rax <- vBB
+    salq    %cl, %rax                                  # ex: addl    %ecx,%eax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    GET_VREG %eax, %rax                     # eax <- vBB
+    salq    %cl, %rax                                  # ex: addl    %ecx,%eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long: /* 0xa4 */
+/* File: x86_64/op_shr_long.S */
+/* File: x86_64/binop1.S */
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_VREG %ecx, %rcx                     # eax <- vCC
+    .if 1
+    GET_WIDE_VREG %rax, %rax                # rax <- vBB
+    sarq    %cl, %rax                                  # ex: addl    %ecx,%eax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    GET_VREG %eax, %rax                     # eax <- vBB
+    sarq    %cl, %rax                                  # ex: addl    %ecx,%eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long: /* 0xa5 */
+/* File: x86_64/op_ushr_long.S */
+/* File: x86_64/binop1.S */
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_VREG %ecx, %rcx                     # eax <- vCC
+    .if 1
+    GET_WIDE_VREG %rax, %rax                # rax <- vBB
+    shrq    %cl, %rax                                  # ex: addl    %ecx,%eax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    GET_VREG %eax, %rax                     # eax <- vBB
+    shrq    %cl, %rax                                  # ex: addl    %ecx,%eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float: /* 0xa6 */
+/* File: x86_64/op_add_float.S */
+/* File: x86_64/sseBinop.S */
+    movzbq  2(rPC), %rcx                    # ecx <- BB
+    movzbq  3(rPC), %rax                    # eax <- CC
+    movss   VREG_ADDRESS(%rcx), %xmm0       # %xmm0 <- 1st src
+    addss VREG_ADDRESS(%rax), %xmm0
+    movss   %xmm0, VREG_ADDRESS(rINSTq)     # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movss   %xmm0, VREG_REF_ADDRESS(rINSTq) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float: /* 0xa7 */
+/* File: x86_64/op_sub_float.S */
+/* File: x86_64/sseBinop.S */
+    movzbq  2(rPC), %rcx                    # ecx <- BB
+    movzbq  3(rPC), %rax                    # eax <- CC
+    movss   VREG_ADDRESS(%rcx), %xmm0       # %xmm0 <- 1st src
+    subss VREG_ADDRESS(%rax), %xmm0
+    movss   %xmm0, VREG_ADDRESS(rINSTq)     # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movss   %xmm0, VREG_REF_ADDRESS(rINSTq) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float: /* 0xa8 */
+/* File: x86_64/op_mul_float.S */
+/* File: x86_64/sseBinop.S */
+    movzbq  2(rPC), %rcx                    # ecx <- BB
+    movzbq  3(rPC), %rax                    # eax <- CC
+    movss   VREG_ADDRESS(%rcx), %xmm0       # %xmm0 <- 1st src
+    mulss VREG_ADDRESS(%rax), %xmm0
+    movss   %xmm0, VREG_ADDRESS(rINSTq)     # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movss   %xmm0, VREG_REF_ADDRESS(rINSTq) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float: /* 0xa9 */
+/* File: x86_64/op_div_float.S */
+/* File: x86_64/sseBinop.S */
+    movzbq  2(rPC), %rcx                    # ecx <- BB
+    movzbq  3(rPC), %rax                    # eax <- CC
+    movss   VREG_ADDRESS(%rcx), %xmm0       # %xmm0 <- 1st src
+    divss VREG_ADDRESS(%rax), %xmm0
+    movss   %xmm0, VREG_ADDRESS(rINSTq)     # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movss   %xmm0, VREG_REF_ADDRESS(rINSTq) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float: /* 0xaa */
+/* File: x86_64/op_rem_float.S */
+    /* rem_float vAA, vBB, vCC */
+    movzbq  3(rPC), %rcx                    # ecx <- BB
+    movzbq  2(rPC), %rax                    # eax <- CC
+    flds    VREG_ADDRESS(%rcx)              # vBB to fp stack
+    flds    VREG_ADDRESS(%rax)              # vCC to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstps   VREG_ADDRESS(rINSTq)            # %st to vAA
+    CLEAR_REF rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double: /* 0xab */
+/* File: x86_64/op_add_double.S */
+/* File: x86_64/sseBinop.S */
+    movzbq  2(rPC), %rcx                    # ecx <- BB
+    movzbq  3(rPC), %rax                    # eax <- CC
+    movsd   VREG_ADDRESS(%rcx), %xmm0       # %xmm0 <- 1st src
+    addsd VREG_ADDRESS(%rax), %xmm0
+    movsd   %xmm0, VREG_ADDRESS(rINSTq)     # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd   %xmm0, VREG_REF_ADDRESS(rINSTq) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double: /* 0xac */
+/* File: x86_64/op_sub_double.S */
+/* File: x86_64/sseBinop.S */
+    movzbq  2(rPC), %rcx                    # ecx <- BB
+    movzbq  3(rPC), %rax                    # eax <- CC
+    movsd   VREG_ADDRESS(%rcx), %xmm0       # %xmm0 <- 1st src
+    subsd VREG_ADDRESS(%rax), %xmm0
+    movsd   %xmm0, VREG_ADDRESS(rINSTq)     # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd   %xmm0, VREG_REF_ADDRESS(rINSTq) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double: /* 0xad */
+/* File: x86_64/op_mul_double.S */
+/* File: x86_64/sseBinop.S */
+    movzbq  2(rPC), %rcx                    # ecx <- BB
+    movzbq  3(rPC), %rax                    # eax <- CC
+    movsd   VREG_ADDRESS(%rcx), %xmm0       # %xmm0 <- 1st src
+    mulsd VREG_ADDRESS(%rax), %xmm0
+    movsd   %xmm0, VREG_ADDRESS(rINSTq)     # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd   %xmm0, VREG_REF_ADDRESS(rINSTq) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double: /* 0xae */
+/* File: x86_64/op_div_double.S */
+/* File: x86_64/sseBinop.S */
+    movzbq  2(rPC), %rcx                    # ecx <- BB
+    movzbq  3(rPC), %rax                    # eax <- CC
+    movsd   VREG_ADDRESS(%rcx), %xmm0       # %xmm0 <- 1st src
+    divsd VREG_ADDRESS(%rax), %xmm0
+    movsd   %xmm0, VREG_ADDRESS(rINSTq)     # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd   %xmm0, VREG_REF_ADDRESS(rINSTq) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double: /* 0xaf */
+/* File: x86_64/op_rem_double.S */
+    /* rem_double vAA, vBB, vCC */
+    movzbq  3(rPC), %rcx                    # ecx <- BB
+    movzbq  2(rPC), %rax                    # eax <- CC
+    fldl    VREG_ADDRESS(%rcx)              # %st1 <- fp[vBB]
+    fldl    VREG_ADDRESS(%rax)              # %st0 <- fp[vCC]
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstpl   VREG_ADDRESS(rINSTq)            # fp[vAA] <- %st
+    CLEAR_WIDE_REF rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_2addr: /* 0xb0 */
+/* File: x86_64/op_add_int_2addr.S */
+/* File: x86_64/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax, rINSTq                   # eax <- vB
+    addl    %eax, (rFP,%rcx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int_2addr: /* 0xb1 */
+/* File: x86_64/op_sub_int_2addr.S */
+/* File: x86_64/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax, rINSTq                   # eax <- vB
+    subl    %eax, (rFP,%rcx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_2addr: /* 0xb2 */
+/* File: x86_64/op_mul_int_2addr.S */
+    /* mul vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax, %rcx                     # eax <- vA
+    imull   (rFP,rINSTq,4), %eax
+    SET_VREG %eax, %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_2addr: /* 0xb3 */
+/* File: x86_64/op_div_int_2addr.S */
+/* File: x86_64/bindiv2addr.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op1=-1.
+ */
+    /* div/rem/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- BA
+    sarl    $4, %ecx                       # rcx <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    GET_WIDE_VREG %rax, rINSTq              # eax <- vA
+    GET_WIDE_VREG %ecx, %rcx             # ecx <- vB
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vA
+    GET_VREG %ecx, %rcx                  # ecx <- vB
+    .endif
+    testl   %ecx, %ecx
+    jz      common_errDivideByZero
+    cmpl  $-1, %ecx
+    je      2f
+    cdq                                    # rdx:rax <- sign-extended of rax
+    idivl   %ecx
+1:
+    .if 0
+    SET_WIDE_VREG %eax, rINSTq           # vA <- result
+    .else
+    SET_VREG %eax, rINSTq                # vA <- result
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+2:
+    .if 0
+    xorl %eax, %eax
+    .else
+    negl %eax
+    .endif
+    jmp     1b
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_2addr: /* 0xb4 */
+/* File: x86_64/op_rem_int_2addr.S */
+/* File: x86_64/bindiv2addr.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op1=-1.
+ */
+    /* div/rem/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- BA
+    sarl    $4, %ecx                       # rcx <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    GET_WIDE_VREG %rax, rINSTq              # eax <- vA
+    GET_WIDE_VREG %ecx, %rcx             # ecx <- vB
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vA
+    GET_VREG %ecx, %rcx                  # ecx <- vB
+    .endif
+    testl   %ecx, %ecx
+    jz      common_errDivideByZero
+    cmpl  $-1, %ecx
+    je      2f
+    cdq                                    # rdx:rax <- sign-extended of rax
+    idivl   %ecx
+1:
+    .if 0
+    SET_WIDE_VREG %edx, rINSTq           # vA <- result
+    .else
+    SET_VREG %edx, rINSTq                # vA <- result
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+2:
+    .if 1
+    xorl %edx, %edx
+    .else
+    negl %edx
+    .endif
+    jmp     1b
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_2addr: /* 0xb5 */
+/* File: x86_64/op_and_int_2addr.S */
+/* File: x86_64/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax, rINSTq                   # eax <- vB
+    andl    %eax, (rFP,%rcx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_2addr: /* 0xb6 */
+/* File: x86_64/op_or_int_2addr.S */
+/* File: x86_64/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax, rINSTq                   # eax <- vB
+    orl     %eax, (rFP,%rcx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_2addr: /* 0xb7 */
+/* File: x86_64/op_xor_int_2addr.S */
+/* File: x86_64/binop2addr.S */
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    GET_VREG %eax, rINSTq                   # eax <- vB
+    xorl    %eax, (rFP,%rcx,4)                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_2addr: /* 0xb8 */
+/* File: x86_64/op_shl_int_2addr.S */
+/* File: x86_64/shop2addr.S */
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movl    rINST, %ecx                     # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # ecx <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vAA
+    sall    %cl, %eax                                  # ex: sarl %cl, %eax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vAA
+    sall    %cl, %eax                                  # ex: sarl %cl, %eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_2addr: /* 0xb9 */
+/* File: x86_64/op_shr_int_2addr.S */
+/* File: x86_64/shop2addr.S */
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movl    rINST, %ecx                     # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # ecx <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vAA
+    sarl    %cl, %eax                                  # ex: sarl %cl, %eax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vAA
+    sarl    %cl, %eax                                  # ex: sarl %cl, %eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_2addr: /* 0xba */
+/* File: x86_64/op_ushr_int_2addr.S */
+/* File: x86_64/shop2addr.S */
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movl    rINST, %ecx                     # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # ecx <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 0
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vAA
+    shrl    %cl, %eax                                  # ex: sarl %cl, %eax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vAA
+    shrl    %cl, %eax                                  # ex: sarl %cl, %eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long_2addr: /* 0xbb */
+/* File: x86_64/op_add_long_2addr.S */
+/* File: x86_64/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vB
+    addq    %rax, (rFP,%rcx,4)                                  # for ex: addq   %rax,(rFP,%rcx,4)
+    CLEAR_WIDE_REF %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long_2addr: /* 0xbc */
+/* File: x86_64/op_sub_long_2addr.S */
+/* File: x86_64/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vB
+    subq    %rax, (rFP,%rcx,4)                                  # for ex: addq   %rax,(rFP,%rcx,4)
+    CLEAR_WIDE_REF %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long_2addr: /* 0xbd */
+/* File: x86_64/op_mul_long_2addr.S */
+    /* mul vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    GET_WIDE_VREG %rax, %rcx                # rax <- vA
+    imulq   (rFP,rINSTq,4), %rax
+    SET_WIDE_VREG %rax, %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long_2addr: /* 0xbe */
+/* File: x86_64/op_div_long_2addr.S */
+/* File: x86_64/bindiv2addr.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op1=-1.
+ */
+    /* div/rem/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- BA
+    sarl    $4, %ecx                       # rcx <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 1
+    GET_WIDE_VREG %rax, rINSTq              # eax <- vA
+    GET_WIDE_VREG %rcx, %rcx             # ecx <- vB
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vA
+    GET_VREG %rcx, %rcx                  # ecx <- vB
+    .endif
+    testq   %rcx, %rcx
+    jz      common_errDivideByZero
+    cmpq  $-1, %rcx
+    je      2f
+    cqo                                    # rdx:rax <- sign-extended of rax
+    idivq   %rcx
+1:
+    .if 1
+    SET_WIDE_VREG %rax, rINSTq           # vA <- result
+    .else
+    SET_VREG %rax, rINSTq                # vA <- result
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+2:
+    .if 0
+    xorq %rax, %rax
+    .else
+    negq %rax
+    .endif
+    jmp     1b
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long_2addr: /* 0xbf */
+/* File: x86_64/op_rem_long_2addr.S */
+/* File: x86_64/bindiv2addr.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op1=-1.
+ */
+    /* div/rem/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- BA
+    sarl    $4, %ecx                       # rcx <- B
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 1
+    GET_WIDE_VREG %rax, rINSTq              # eax <- vA
+    GET_WIDE_VREG %rcx, %rcx             # ecx <- vB
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vA
+    GET_VREG %rcx, %rcx                  # ecx <- vB
+    .endif
+    testq   %rcx, %rcx
+    jz      common_errDivideByZero
+    cmpq  $-1, %rcx
+    je      2f
+    cqo                                    # rdx:rax <- sign-extended of rax
+    idivq   %rcx
+1:
+    .if 1
+    SET_WIDE_VREG %rdx, rINSTq           # vA <- result
+    .else
+    SET_VREG %rdx, rINSTq                # vA <- result
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+2:
+    .if 1
+    xorq %rdx, %rdx
+    .else
+    negq %rdx
+    .endif
+    jmp     1b
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long_2addr: /* 0xc0 */
+/* File: x86_64/op_and_long_2addr.S */
+/* File: x86_64/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vB
+    andq    %rax, (rFP,%rcx,4)                                  # for ex: addq   %rax,(rFP,%rcx,4)
+    CLEAR_WIDE_REF %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long_2addr: /* 0xc1 */
+/* File: x86_64/op_or_long_2addr.S */
+/* File: x86_64/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vB
+    orq     %rax, (rFP,%rcx,4)                                  # for ex: addq   %rax,(rFP,%rcx,4)
+    CLEAR_WIDE_REF %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long_2addr: /* 0xc2 */
+/* File: x86_64/op_xor_long_2addr.S */
+/* File: x86_64/binopWide2addr.S */
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    andb    $0xf, %cl                      # ecx <- A
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vB
+    xorq    %rax, (rFP,%rcx,4)                                  # for ex: addq   %rax,(rFP,%rcx,4)
+    CLEAR_WIDE_REF %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long_2addr: /* 0xc3 */
+/* File: x86_64/op_shl_long_2addr.S */
+/* File: x86_64/shop2addr.S */
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movl    rINST, %ecx                     # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # ecx <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 1
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vAA
+    salq    %cl, %rax                                  # ex: sarl %cl, %eax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vAA
+    salq    %cl, %rax                                  # ex: sarl %cl, %eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long_2addr: /* 0xc4 */
+/* File: x86_64/op_shr_long_2addr.S */
+/* File: x86_64/shop2addr.S */
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movl    rINST, %ecx                     # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # ecx <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 1
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vAA
+    sarq    %cl, %rax                                  # ex: sarl %cl, %eax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vAA
+    sarq    %cl, %rax                                  # ex: sarl %cl, %eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long_2addr: /* 0xc5 */
+/* File: x86_64/op_ushr_long_2addr.S */
+/* File: x86_64/shop2addr.S */
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movl    rINST, %ecx                     # ecx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # ecx <- vBB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    .if 1
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vAA
+    shrq    %cl, %rax                                  # ex: sarl %cl, %eax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vAA
+    shrq    %cl, %rax                                  # ex: sarl %cl, %eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float_2addr: /* 0xc6 */
+/* File: x86_64/op_add_float_2addr.S */
+/* File: x86_64/sseBinop2Addr.S */
+    movl    rINST, %ecx                     # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movss VREG_ADDRESS(%rcx), %xmm0        # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    addss VREG_ADDRESS(rINSTq), %xmm0
+    movss %xmm0, VREG_ADDRESS(%rcx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movss %xmm0, VREG_REF_ADDRESS(rINSTq)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float_2addr: /* 0xc7 */
+/* File: x86_64/op_sub_float_2addr.S */
+/* File: x86_64/sseBinop2Addr.S */
+    movl    rINST, %ecx                     # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movss VREG_ADDRESS(%rcx), %xmm0        # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    subss VREG_ADDRESS(rINSTq), %xmm0
+    movss %xmm0, VREG_ADDRESS(%rcx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movss %xmm0, VREG_REF_ADDRESS(rINSTq)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float_2addr: /* 0xc8 */
+/* File: x86_64/op_mul_float_2addr.S */
+/* File: x86_64/sseBinop2Addr.S */
+    movl    rINST, %ecx                     # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movss VREG_ADDRESS(%rcx), %xmm0        # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    mulss VREG_ADDRESS(rINSTq), %xmm0
+    movss %xmm0, VREG_ADDRESS(%rcx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movss %xmm0, VREG_REF_ADDRESS(rINSTq)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float_2addr: /* 0xc9 */
+/* File: x86_64/op_div_float_2addr.S */
+/* File: x86_64/sseBinop2Addr.S */
+    movl    rINST, %ecx                     # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movss VREG_ADDRESS(%rcx), %xmm0        # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    divss VREG_ADDRESS(rINSTq), %xmm0
+    movss %xmm0, VREG_ADDRESS(%rcx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movss %xmm0, VREG_REF_ADDRESS(rINSTq)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float_2addr: /* 0xca */
+/* File: x86_64/op_rem_float_2addr.S */
+    /* rem_float/2addr vA, vB */
+    movzbq  rINSTbl, %rcx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    flds    VREG_ADDRESS(rINSTq)            # vB to fp stack
+    andb    $0xf, %cl                      # ecx <- A
+    flds    VREG_ADDRESS(%rcx)              # vA to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstps   VREG_ADDRESS(%rcx)              # %st to vA
+    CLEAR_REF %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double_2addr: /* 0xcb */
+/* File: x86_64/op_add_double_2addr.S */
+/* File: x86_64/sseBinop2Addr.S */
+    movl    rINST, %ecx                     # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movsd VREG_ADDRESS(%rcx), %xmm0        # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    addsd VREG_ADDRESS(rINSTq), %xmm0
+    movsd %xmm0, VREG_ADDRESS(%rcx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd %xmm0, VREG_REF_ADDRESS(rINSTq)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double_2addr: /* 0xcc */
+/* File: x86_64/op_sub_double_2addr.S */
+/* File: x86_64/sseBinop2Addr.S */
+    movl    rINST, %ecx                     # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movsd VREG_ADDRESS(%rcx), %xmm0        # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    subsd VREG_ADDRESS(rINSTq), %xmm0
+    movsd %xmm0, VREG_ADDRESS(%rcx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd %xmm0, VREG_REF_ADDRESS(rINSTq)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double_2addr: /* 0xcd */
+/* File: x86_64/op_mul_double_2addr.S */
+/* File: x86_64/sseBinop2Addr.S */
+    movl    rINST, %ecx                     # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movsd VREG_ADDRESS(%rcx), %xmm0        # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    mulsd VREG_ADDRESS(rINSTq), %xmm0
+    movsd %xmm0, VREG_ADDRESS(%rcx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd %xmm0, VREG_REF_ADDRESS(rINSTq)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double_2addr: /* 0xce */
+/* File: x86_64/op_div_double_2addr.S */
+/* File: x86_64/sseBinop2Addr.S */
+    movl    rINST, %ecx                     # ecx <- A+
+    andl    $0xf, %ecx                     # ecx <- A
+    movsd VREG_ADDRESS(%rcx), %xmm0        # %xmm0 <- 1st src
+    sarl    $4, rINST                      # rINST<- B
+    divsd VREG_ADDRESS(rINSTq), %xmm0
+    movsd %xmm0, VREG_ADDRESS(%rcx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movsd %xmm0, VREG_REF_ADDRESS(rINSTq)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double_2addr: /* 0xcf */
+/* File: x86_64/op_rem_double_2addr.S */
+    /* rem_double/2addr vA, vB */
+    movzbq  rINSTbl, %rcx                   # ecx <- A+
+    sarl    $4, rINST                      # rINST <- B
+    fldl    VREG_ADDRESS(rINSTq)            # vB to fp stack
+    andb    $0xf, %cl                      # ecx <- A
+    fldl    VREG_ADDRESS(%rcx)              # vA to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstpl   VREG_ADDRESS(%rcx)              # %st to vA
+    CLEAR_WIDE_REF %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit16: /* 0xd0 */
+/* File: x86_64/op_add_int_lit16.S */
+/* File: x86_64/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movl    rINST, %eax                     # rax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %rax                     # eax <- vB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    addl    %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int: /* 0xd1 */
+/* File: x86_64/op_rsub_int.S */
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+/* File: x86_64/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movl    rINST, %eax                     # rax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %rax                     # eax <- vB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    subl    %eax, %ecx                                  # for example: addl %ecx, %eax
+    SET_VREG %ecx, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit16: /* 0xd2 */
+/* File: x86_64/op_mul_int_lit16.S */
+/* File: x86_64/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movl    rINST, %eax                     # rax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %rax                     # eax <- vB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    imull   %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit16: /* 0xd3 */
+/* File: x86_64/op_div_int_lit16.S */
+/* File: x86_64/bindivLit16.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op1=-1.
+ */
+    /* div/rem/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movl    rINST, %eax                     # rax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %rax                     # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    testl   %ecx, %ecx
+    jz      common_errDivideByZero
+    cmpl    $-1, %ecx
+    je      2f
+    cdq                                     # rax <- sign-extended of eax
+    idivl   %ecx
+1:
+    SET_VREG %eax, rINSTq                # vA <- result
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+2:
+    .if 0
+    xorl    %eax, %eax
+    .else
+    negl    %eax
+    .endif
+    jmp     1b
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit16: /* 0xd4 */
+/* File: x86_64/op_rem_int_lit16.S */
+/* File: x86_64/bindivLit16.S */
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op1=-1.
+ */
+    /* div/rem/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movl    rINST, %eax                     # rax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %rax                     # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $0xf, rINSTbl                  # rINST <- A
+    testl   %ecx, %ecx
+    jz      common_errDivideByZero
+    cmpl    $-1, %ecx
+    je      2f
+    cdq                                     # rax <- sign-extended of eax
+    idivl   %ecx
+1:
+    SET_VREG %edx, rINSTq                # vA <- result
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+2:
+    .if 1
+    xorl    %edx, %edx
+    .else
+    negl    %edx
+    .endif
+    jmp     1b
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit16: /* 0xd5 */
+/* File: x86_64/op_and_int_lit16.S */
+/* File: x86_64/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movl    rINST, %eax                     # rax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %rax                     # eax <- vB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andl    %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit16: /* 0xd6 */
+/* File: x86_64/op_or_int_lit16.S */
+/* File: x86_64/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movl    rINST, %eax                     # rax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %rax                     # eax <- vB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    orl     %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit16: /* 0xd7 */
+/* File: x86_64/op_xor_int_lit16.S */
+/* File: x86_64/binopLit16.S */
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movl    rINST, %eax                     # rax <- 000000BA
+    sarl    $4, %eax                       # eax <- B
+    GET_VREG %eax, %rax                     # eax <- vB
+    andb    $0xf, rINSTbl                  # rINST <- A
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    xorl    %ecx, %eax                                  # for example: addl %ecx, %eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit8: /* 0xd8 */
+/* File: x86_64/op_add_int_lit8.S */
+/* File: x86_64/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movsbl  3(rPC), %ecx                    # rcx <- ssssssCC
+    GET_VREG %eax, %rax                     # eax <- rBB
+    addl    %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int_lit8: /* 0xd9 */
+/* File: x86_64/op_rsub_int_lit8.S */
+/* File: x86_64/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movsbl  3(rPC), %ecx                    # rcx <- ssssssCC
+    GET_VREG %eax, %rax                     # eax <- rBB
+    subl    %eax, %ecx                                  # ex: addl %ecx,%eax
+    SET_VREG %ecx, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit8: /* 0xda */
+/* File: x86_64/op_mul_int_lit8.S */
+/* File: x86_64/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movsbl  3(rPC), %ecx                    # rcx <- ssssssCC
+    GET_VREG %eax, %rax                     # eax <- rBB
+    imull   %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit8: /* 0xdb */
+/* File: x86_64/op_div_int_lit8.S */
+/* File: x86_64/bindivLit8.S */
+/*
+ * 32-bit div/rem "lit8" binary operation.  Handles special case of
+ * op0=minint & op1=-1
+ */
+    /* div/rem/lit8 vAA, vBB, #+CC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG  %eax, %rax                    # eax <- rBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $-1, %ecx
+    je      2f
+    cdq                                     # rax <- sign-extended of eax
+    idivl   %ecx
+1:
+    SET_VREG %eax, rINSTq                # vA <- result
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+2:
+    .if 0
+    xorl    %eax, %eax
+    .else
+    negl    %eax
+    .endif
+    jmp     1b
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit8: /* 0xdc */
+/* File: x86_64/op_rem_int_lit8.S */
+/* File: x86_64/bindivLit8.S */
+/*
+ * 32-bit div/rem "lit8" binary operation.  Handles special case of
+ * op0=minint & op1=-1
+ */
+    /* div/rem/lit8 vAA, vBB, #+CC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG  %eax, %rax                    # eax <- rBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $-1, %ecx
+    je      2f
+    cdq                                     # rax <- sign-extended of eax
+    idivl   %ecx
+1:
+    SET_VREG %edx, rINSTq                # vA <- result
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+2:
+    .if 1
+    xorl    %edx, %edx
+    .else
+    negl    %edx
+    .endif
+    jmp     1b
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit8: /* 0xdd */
+/* File: x86_64/op_and_int_lit8.S */
+/* File: x86_64/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movsbl  3(rPC), %ecx                    # rcx <- ssssssCC
+    GET_VREG %eax, %rax                     # eax <- rBB
+    andl    %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit8: /* 0xde */
+/* File: x86_64/op_or_int_lit8.S */
+/* File: x86_64/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movsbl  3(rPC), %ecx                    # rcx <- ssssssCC
+    GET_VREG %eax, %rax                     # eax <- rBB
+    orl     %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit8: /* 0xdf */
+/* File: x86_64/op_xor_int_lit8.S */
+/* File: x86_64/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movsbl  3(rPC), %ecx                    # rcx <- ssssssCC
+    GET_VREG %eax, %rax                     # eax <- rBB
+    xorl    %ecx, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_lit8: /* 0xe0 */
+/* File: x86_64/op_shl_int_lit8.S */
+/* File: x86_64/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movsbl  3(rPC), %ecx                    # rcx <- ssssssCC
+    GET_VREG %eax, %rax                     # eax <- rBB
+    sall    %cl, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_lit8: /* 0xe1 */
+/* File: x86_64/op_shr_int_lit8.S */
+/* File: x86_64/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movsbl  3(rPC), %ecx                    # rcx <- ssssssCC
+    GET_VREG %eax, %rax                     # eax <- rBB
+    sarl    %cl, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_lit8: /* 0xe2 */
+/* File: x86_64/op_ushr_int_lit8.S */
+/* File: x86_64/binopLit8.S */
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movsbl  3(rPC), %ecx                    # rcx <- ssssssCC
+    GET_VREG %eax, %rax                     # eax <- rBB
+    shrl    %cl, %eax                                  # ex: addl %ecx,%eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_quick: /* 0xe3 */
+/* File: x86_64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick, iget-wide-quick */
+    /* op vA, vB, offset@CCCC */
+    movl    rINST, %ecx                     # rcx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # vB (object we're operating on)
+    movzwq  2(rPC), %rax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf,rINSTbl                   # rINST <- A
+    .if 0
+    movq (%rcx,%rax,1), %rax
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <- value
+    .else
+    movl (%rcx,%rax,1), %eax
+    SET_VREG %eax, rINSTq                   # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide_quick: /* 0xe4 */
+/* File: x86_64/op_iget_wide_quick.S */
+/* File: x86_64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick, iget-wide-quick */
+    /* op vA, vB, offset@CCCC */
+    movl    rINST, %ecx                     # rcx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # vB (object we're operating on)
+    movzwq  2(rPC), %rax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf,rINSTbl                   # rINST <- A
+    .if 1
+    movq (%rcx,%rax,1), %rax
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <- value
+    .else
+    movswl (%rcx,%rax,1), %eax
+    SET_VREG %eax, rINSTq                   # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object_quick: /* 0xe5 */
+/* File: x86_64/op_iget_object_quick.S */
+    /* For: iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    .extern artIGetObjectFromMterp
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG OUT_32_ARG0, %rcx              # vB (object we're operating on)
+    movzwl  2(rPC), OUT_32_ARG1             # eax <- field byte offset
+    EXPORT_PC
+    callq   SYMBOL(artIGetObjectFromMterp)  # (obj, offset)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException                  # bail out
+    andb    $0xf, rINSTbl                  # rINST <- A
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_quick: /* 0xe6 */
+/* File: x86_64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST, rINSTq                  # rINST <- v[A]
+    movzwq  2(rPC), %rax                    # rax <- field byte offset
+    movl    rINST, (%rcx,%rax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide_quick: /* 0xe7 */
+/* File: x86_64/op_iput_wide_quick.S */
+    /* iput-wide-quick vA, vB, offset@CCCC */
+    movzbq    rINSTbl, %rcx                 # rcx<- BA
+    sarl      $4, %ecx                     # ecx<- B
+    GET_VREG  %ecx, %rcx                    # vB (object we're operating on)
+    testl     %ecx, %ecx                    # is object null?
+    je        common_errNullObject
+    movzwq    2(rPC), %rax                  # rax<- field byte offset
+    leaq      (%rcx,%rax,1), %rcx           # ecx<- Address of 64-bit target
+    andb      $0xf, rINSTbl                # rINST<- A
+    GET_WIDE_VREG %rax, rINSTq              # rax<- fp[A]/fp[A+1]
+    movq      %rax, (%rcx)                  # obj.field<- r0/r1
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object_quick: /* 0xe8 */
+/* File: x86_64/op_iput_object_quick.S */
+    EXPORT_PC
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG0
+    movq    rPC, OUT_ARG1
+    REFRESH_INST 232
+    movl    rINST, OUT_32_ARG2
+    call    SYMBOL(MterpIputObjectQuick)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_quick: /* 0xe9 */
+/* File: x86_64/op_invoke_virtual_quick.S */
+/* File: x86_64/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuick
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    REFRESH_INST 233
+    movl    rINST, OUT_32_ARG3
+    call    SYMBOL(MterpInvokeVirtualQuick)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    FETCH_INST
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range_quick: /* 0xea */
+/* File: x86_64/op_invoke_virtual_range_quick.S */
+/* File: x86_64/invoke.S */
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuickRange
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    REFRESH_INST 234
+    movl    rINST, OUT_32_ARG3
+    call    SYMBOL(MterpInvokeVirtualQuickRange)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    FETCH_INST
+    GOTO_NEXT
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean_quick: /* 0xeb */
+/* File: x86_64/op_iput_boolean_quick.S */
+/* File: x86_64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST, rINSTq                  # rINST <- v[A]
+    movzwq  2(rPC), %rax                    # rax <- field byte offset
+    movb    rINSTbl, (%rcx,%rax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte_quick: /* 0xec */
+/* File: x86_64/op_iput_byte_quick.S */
+/* File: x86_64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST, rINSTq                  # rINST <- v[A]
+    movzwq  2(rPC), %rax                    # rax <- field byte offset
+    movb    rINSTbl, (%rcx,%rax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char_quick: /* 0xed */
+/* File: x86_64/op_iput_char_quick.S */
+/* File: x86_64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST, rINSTq                  # rINST <- v[A]
+    movzwq  2(rPC), %rax                    # rax <- field byte offset
+    movw    rINSTw, (%rcx,%rax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short_quick: /* 0xee */
+/* File: x86_64/op_iput_short_quick.S */
+/* File: x86_64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST, rINSTq                  # rINST <- v[A]
+    movzwq  2(rPC), %rax                    # rax <- field byte offset
+    movw    rINSTw, (%rcx,%rax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean_quick: /* 0xef */
+/* File: x86_64/op_iget_boolean_quick.S */
+/* File: x86_64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick, iget-wide-quick */
+    /* op vA, vB, offset@CCCC */
+    movl    rINST, %ecx                     # rcx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # vB (object we're operating on)
+    movzwq  2(rPC), %rax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf,rINSTbl                   # rINST <- A
+    .if 0
+    movq (%rcx,%rax,1), %rax
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <- value
+    .else
+    movsbl (%rcx,%rax,1), %eax
+    SET_VREG %eax, rINSTq                   # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte_quick: /* 0xf0 */
+/* File: x86_64/op_iget_byte_quick.S */
+/* File: x86_64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick, iget-wide-quick */
+    /* op vA, vB, offset@CCCC */
+    movl    rINST, %ecx                     # rcx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # vB (object we're operating on)
+    movzwq  2(rPC), %rax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf,rINSTbl                   # rINST <- A
+    .if 0
+    movq (%rcx,%rax,1), %rax
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <- value
+    .else
+    movsbl (%rcx,%rax,1), %eax
+    SET_VREG %eax, rINSTq                   # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char_quick: /* 0xf1 */
+/* File: x86_64/op_iget_char_quick.S */
+/* File: x86_64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick, iget-wide-quick */
+    /* op vA, vB, offset@CCCC */
+    movl    rINST, %ecx                     # rcx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # vB (object we're operating on)
+    movzwq  2(rPC), %rax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf,rINSTbl                   # rINST <- A
+    .if 0
+    movq (%rcx,%rax,1), %rax
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <- value
+    .else
+    movzwl (%rcx,%rax,1), %eax
+    SET_VREG %eax, rINSTq                   # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short_quick: /* 0xf2 */
+/* File: x86_64/op_iget_short_quick.S */
+/* File: x86_64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick, iget-wide-quick */
+    /* op vA, vB, offset@CCCC */
+    movl    rINST, %ecx                     # rcx <- BA
+    sarl    $4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # vB (object we're operating on)
+    movzwq  2(rPC), %rax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $0xf,rINSTbl                   # rINST <- A
+    .if 0
+    movq (%rcx,%rax,1), %rax
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <- value
+    .else
+    movswl (%rcx,%rax,1), %eax
+    SET_VREG %eax, rINSTq                   # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f3: /* 0xf3 */
+/* File: x86_64/op_unused_f3.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f4: /* 0xf4 */
+/* File: x86_64/op_unused_f4.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f5: /* 0xf5 */
+/* File: x86_64/op_unused_f5.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f6: /* 0xf6 */
+/* File: x86_64/op_unused_f6.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f7: /* 0xf7 */
+/* File: x86_64/op_unused_f7.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f8: /* 0xf8 */
+/* File: x86_64/op_unused_f8.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f9: /* 0xf9 */
+/* File: x86_64/op_unused_f9.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fa: /* 0xfa */
+/* File: x86_64/op_unused_fa.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fb: /* 0xfb */
+/* File: x86_64/op_unused_fb.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fc: /* 0xfc */
+/* File: x86_64/op_unused_fc.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fd: /* 0xfd */
+/* File: x86_64/op_unused_fd.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fe: /* 0xfe */
+/* File: x86_64/op_unused_fe.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_ff: /* 0xff */
+/* File: x86_64/op_unused_ff.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
+
+
+    .balign 128
+    SIZE(SYMBOL(artMterpAsmInstructionStart),SYMBOL(artMterpAsmInstructionStart))
+    .global SYMBOL(artMterpAsmInstructionEnd)
+SYMBOL(artMterpAsmInstructionEnd):
+
+/*
+ * ===========================================================================
+ *  Sister implementations
+ * ===========================================================================
+ */
+    .global SYMBOL(artMterpAsmSisterStart)
+    FUNCTION_TYPE(SYMBOL(artMterpAsmSisterStart))
+    .text
+    .balign 4
+SYMBOL(artMterpAsmSisterStart):
+
+    SIZE(SYMBOL(artMterpAsmSisterStart),SYMBOL(artMterpAsmSisterStart))
+    .global SYMBOL(artMterpAsmSisterEnd)
+SYMBOL(artMterpAsmSisterEnd):
+
+
+    .global SYMBOL(artMterpAsmAltInstructionStart)
+    FUNCTION_TYPE(SYMBOL(artMterpAsmAltInstructionStart))
+    .text
+
+SYMBOL(artMterpAsmAltInstructionStart) = .L_ALT_op_nop
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_nop: /* 0x00 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(0*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move: /* 0x01 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(1*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_from16: /* 0x02 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(2*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_16: /* 0x03 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(3*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide: /* 0x04 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(4*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_from16: /* 0x05 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(5*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_16: /* 0x06 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(6*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object: /* 0x07 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(7*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_from16: /* 0x08 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(8*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_16: /* 0x09 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(9*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result: /* 0x0a */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(10*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_wide: /* 0x0b */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(11*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_object: /* 0x0c */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(12*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_exception: /* 0x0d */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(13*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void: /* 0x0e */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(14*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return: /* 0x0f */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(15*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_wide: /* 0x10 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(16*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_object: /* 0x11 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(17*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_4: /* 0x12 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(18*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_16: /* 0x13 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(19*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const: /* 0x14 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(20*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_high16: /* 0x15 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(21*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_16: /* 0x16 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(22*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_32: /* 0x17 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(23*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide: /* 0x18 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(24*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_high16: /* 0x19 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(25*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string: /* 0x1a */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(26*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string_jumbo: /* 0x1b */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(27*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_class: /* 0x1c */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(28*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_enter: /* 0x1d */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(29*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_exit: /* 0x1e */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(30*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_check_cast: /* 0x1f */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(31*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_instance_of: /* 0x20 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(32*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_array_length: /* 0x21 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(33*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_instance: /* 0x22 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(34*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_array: /* 0x23 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(35*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array: /* 0x24 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(36*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array_range: /* 0x25 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(37*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_fill_array_data: /* 0x26 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(38*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_throw: /* 0x27 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(39*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto: /* 0x28 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(40*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_16: /* 0x29 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(41*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_32: /* 0x2a */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(42*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_packed_switch: /* 0x2b */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(43*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sparse_switch: /* 0x2c */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(44*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_float: /* 0x2d */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(45*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_float: /* 0x2e */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(46*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_double: /* 0x2f */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(47*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_double: /* 0x30 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(48*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmp_long: /* 0x31 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(49*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eq: /* 0x32 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(50*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ne: /* 0x33 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(51*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lt: /* 0x34 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(52*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ge: /* 0x35 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(53*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gt: /* 0x36 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(54*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_le: /* 0x37 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(55*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eqz: /* 0x38 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(56*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_nez: /* 0x39 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(57*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ltz: /* 0x3a */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(58*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gez: /* 0x3b */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(59*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gtz: /* 0x3c */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(60*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lez: /* 0x3d */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(61*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3e: /* 0x3e */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(62*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3f: /* 0x3f */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(63*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_40: /* 0x40 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(64*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_41: /* 0x41 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(65*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_42: /* 0x42 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(66*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_43: /* 0x43 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(67*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget: /* 0x44 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(68*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_wide: /* 0x45 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(69*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_object: /* 0x46 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(70*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_boolean: /* 0x47 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(71*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_byte: /* 0x48 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(72*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_char: /* 0x49 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(73*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_short: /* 0x4a */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(74*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput: /* 0x4b */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(75*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_wide: /* 0x4c */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(76*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_object: /* 0x4d */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(77*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_boolean: /* 0x4e */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(78*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_byte: /* 0x4f */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(79*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_char: /* 0x50 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(80*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_short: /* 0x51 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(81*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget: /* 0x52 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(82*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide: /* 0x53 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(83*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object: /* 0x54 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(84*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean: /* 0x55 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(85*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte: /* 0x56 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(86*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char: /* 0x57 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(87*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short: /* 0x58 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(88*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput: /* 0x59 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(89*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide: /* 0x5a */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(90*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object: /* 0x5b */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(91*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean: /* 0x5c */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(92*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte: /* 0x5d */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(93*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char: /* 0x5e */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(94*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short: /* 0x5f */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(95*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget: /* 0x60 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(96*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_wide: /* 0x61 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(97*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_object: /* 0x62 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(98*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_boolean: /* 0x63 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(99*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_byte: /* 0x64 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(100*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_char: /* 0x65 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(101*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_short: /* 0x66 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(102*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput: /* 0x67 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(103*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_wide: /* 0x68 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(104*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_object: /* 0x69 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(105*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_boolean: /* 0x6a */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(106*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_byte: /* 0x6b */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(107*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_char: /* 0x6c */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(108*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_short: /* 0x6d */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(109*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual: /* 0x6e */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(110*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super: /* 0x6f */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(111*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct: /* 0x70 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(112*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static: /* 0x71 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(113*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface: /* 0x72 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(114*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void_no_barrier: /* 0x73 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(115*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range: /* 0x74 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(116*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super_range: /* 0x75 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(117*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct_range: /* 0x76 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(118*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static_range: /* 0x77 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(119*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface_range: /* 0x78 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(120*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_79: /* 0x79 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(121*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_7a: /* 0x7a */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(122*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_int: /* 0x7b */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(123*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_int: /* 0x7c */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(124*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_long: /* 0x7d */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(125*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_long: /* 0x7e */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(126*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_float: /* 0x7f */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(127*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_double: /* 0x80 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(128*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_long: /* 0x81 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(129*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_float: /* 0x82 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(130*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_double: /* 0x83 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(131*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_int: /* 0x84 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(132*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_float: /* 0x85 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(133*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_double: /* 0x86 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(134*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_int: /* 0x87 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(135*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_long: /* 0x88 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(136*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_double: /* 0x89 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(137*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_int: /* 0x8a */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(138*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_long: /* 0x8b */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(139*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_float: /* 0x8c */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(140*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_byte: /* 0x8d */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(141*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_char: /* 0x8e */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(142*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_short: /* 0x8f */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(143*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int: /* 0x90 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(144*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int: /* 0x91 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(145*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int: /* 0x92 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(146*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int: /* 0x93 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(147*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int: /* 0x94 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(148*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int: /* 0x95 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(149*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int: /* 0x96 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(150*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int: /* 0x97 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(151*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int: /* 0x98 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(152*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int: /* 0x99 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(153*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int: /* 0x9a */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(154*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long: /* 0x9b */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(155*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long: /* 0x9c */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(156*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long: /* 0x9d */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(157*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long: /* 0x9e */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(158*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long: /* 0x9f */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(159*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long: /* 0xa0 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(160*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long: /* 0xa1 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(161*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long: /* 0xa2 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(162*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long: /* 0xa3 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(163*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long: /* 0xa4 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(164*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long: /* 0xa5 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(165*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float: /* 0xa6 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(166*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float: /* 0xa7 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(167*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float: /* 0xa8 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(168*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float: /* 0xa9 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(169*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float: /* 0xaa */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(170*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double: /* 0xab */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(171*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double: /* 0xac */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(172*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double: /* 0xad */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(173*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double: /* 0xae */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(174*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double: /* 0xaf */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(175*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_2addr: /* 0xb0 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(176*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int_2addr: /* 0xb1 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(177*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_2addr: /* 0xb2 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(178*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_2addr: /* 0xb3 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(179*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_2addr: /* 0xb4 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(180*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_2addr: /* 0xb5 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(181*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_2addr: /* 0xb6 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(182*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_2addr: /* 0xb7 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(183*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_2addr: /* 0xb8 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(184*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_2addr: /* 0xb9 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(185*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_2addr: /* 0xba */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(186*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long_2addr: /* 0xbb */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(187*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long_2addr: /* 0xbc */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(188*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long_2addr: /* 0xbd */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(189*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long_2addr: /* 0xbe */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(190*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long_2addr: /* 0xbf */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(191*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long_2addr: /* 0xc0 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(192*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long_2addr: /* 0xc1 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(193*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long_2addr: /* 0xc2 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(194*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long_2addr: /* 0xc3 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(195*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long_2addr: /* 0xc4 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(196*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long_2addr: /* 0xc5 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(197*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float_2addr: /* 0xc6 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(198*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float_2addr: /* 0xc7 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(199*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float_2addr: /* 0xc8 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(200*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float_2addr: /* 0xc9 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(201*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float_2addr: /* 0xca */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(202*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double_2addr: /* 0xcb */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(203*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double_2addr: /* 0xcc */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(204*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double_2addr: /* 0xcd */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(205*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double_2addr: /* 0xce */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(206*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double_2addr: /* 0xcf */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(207*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit16: /* 0xd0 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(208*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int: /* 0xd1 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(209*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit16: /* 0xd2 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(210*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit16: /* 0xd3 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(211*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit16: /* 0xd4 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(212*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit16: /* 0xd5 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(213*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit16: /* 0xd6 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(214*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit16: /* 0xd7 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(215*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit8: /* 0xd8 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(216*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int_lit8: /* 0xd9 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(217*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit8: /* 0xda */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(218*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit8: /* 0xdb */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(219*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit8: /* 0xdc */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(220*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit8: /* 0xdd */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(221*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit8: /* 0xde */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(222*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit8: /* 0xdf */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(223*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_lit8: /* 0xe0 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(224*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_lit8: /* 0xe1 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(225*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_lit8: /* 0xe2 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(226*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_quick: /* 0xe3 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(227*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide_quick: /* 0xe4 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(228*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object_quick: /* 0xe5 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(229*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_quick: /* 0xe6 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(230*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide_quick: /* 0xe7 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(231*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object_quick: /* 0xe8 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(232*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_quick: /* 0xe9 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(233*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range_quick: /* 0xea */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(234*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean_quick: /* 0xeb */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(235*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte_quick: /* 0xec */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(236*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char_quick: /* 0xed */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(237*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short_quick: /* 0xee */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(238*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean_quick: /* 0xef */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(239*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte_quick: /* 0xf0 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(240*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char_quick: /* 0xf1 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(241*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short_quick: /* 0xf2 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(242*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f3: /* 0xf3 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(243*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f4: /* 0xf4 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(244*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f5: /* 0xf5 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(245*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f6: /* 0xf6 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(246*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f7: /* 0xf7 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(247*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f8: /* 0xf8 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(248*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f9: /* 0xf9 */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(249*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fa: /* 0xfa */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(250*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fb: /* 0xfb */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(251*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fc: /* 0xfc */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(252*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fd: /* 0xfd */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(253*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fe: /* 0xfe */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(254*128)
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_ff: /* 0xff */
+/* File: x86_64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(255*128)
+
+    .balign 128
+    SIZE(SYMBOL(artMterpAsmAltInstructionStart),SYMBOL(artMterpAsmAltInstructionStart))
+    .global SYMBOL(artMterpAsmAltInstructionEnd)
+SYMBOL(artMterpAsmAltInstructionEnd):
+/* File: x86_64/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpLogDivideByZeroException)
+#endif
+    jmp     MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpLogArrayIndexException)
+#endif
+    jmp     MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpLogNegativeArraySizeException)
+#endif
+    jmp     MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpLogNoSuchMethodException)
+#endif
+    jmp     MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpLogNullObjectException)
+#endif
+    jmp     MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpLogExceptionThrownException)
+#endif
+    jmp     MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movl    THREAD_FLAGS_OFFSET(OUT_ARG0), OUT_32_ARG2
+    call    SYMBOL(MterpLogSuspendFallback)
+#endif
+    jmp     MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jz      MterpFallback
+    /* intentional fallthrough - handle pending exception. */
+
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpHandleException)
+    testb   %al, %al
+    jz      MterpExceptionReturn
+    movq    OFF_FP_CODE_ITEM(rFP), %rax
+    mov     OFF_FP_DEX_PC(rFP), %ecx
+    leaq    CODEITEM_INSNS_OFFSET(%rax), rPC
+    leaq    (rPC, %rcx, 2), rPC
+    movq    rPC, OFF_FP_DEX_PC_PTR(rFP)
+    /* Do we need to switch interpreters? */
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    /* resume execution at catch block */
+    REFRESH_IBASE
+    FETCH_INST
+    GOTO_NEXT
+    /* NOTE: no fallthrough */
+
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
+ */
+MterpCommonTakenBranch:
+    jg      .L_forward_branch               # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_osr_check
+    decl    rPROFILE
+    je      .L_add_batch                    # counted down to zero - report
+.L_resume_backward_branch:
+    movq    rSELF, %rax
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%rax)
+    REFRESH_IBASE
+    leaq    (rPC, rINSTq, 2), rPC
+    FETCH_INST
+    jnz     .L_suspend_request_pending
+    GOTO_NEXT
+
+.L_suspend_request_pending:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    call    SYMBOL(MterpSuspendCheck)       # (self)
+    testb   %al, %al
+    jnz     MterpFallback
+    REFRESH_IBASE                           # might have changed during suspend
+    GOTO_NEXT
+
+.L_no_count_backwards:
+    cmpl    $JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    jne     .L_resume_backward_branch
+.L_osr_check:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rINSTq, OUT_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jz      .L_resume_backward_branch
+    jmp     MterpOnStackReplacement
+
+.L_forward_branch:
+    cmpl    $JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    je      .L_check_osr_forward
+.L_resume_forward_branch:
+    leaq    (rPC, rINSTq, 2), rPC
+    FETCH_INST
+    GOTO_NEXT
+
+.L_check_osr_forward:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rINSTq, OUT_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jz      .L_resume_forward_branch
+    jmp     MterpOnStackReplacement
+
+.L_add_batch:
+    movl    rPROFILE, %eax
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movw    %ax, OFF_FP_COUNTDOWN_OFFSET(rFP)
+    movq    rSELF, OUT_ARG2
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    movswl  %ax, rPROFILE
+    jmp     .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movl    $2, OUT_32_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jnz     MterpOnStackReplacement
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movl    rINST, OUT_32_ARG2
+    call    SYMBOL(MterpLogOSR)
+#endif
+    movl    $1, %eax
+    jmp     MterpDone
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpLogFallback)
+#endif
+MterpCommonFallback:
+    xorl    %eax, %eax
+    jmp     MterpDone
+
+/*
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    movl    $1, %eax
+    jmp     MterpDone
+MterpReturn:
+    movq    OFF_FP_RESULT_REGISTER(rFP), %rdx
+    movq    %rax, (%rdx)
+    movl    $1, %eax
+MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    testl   rPROFILE, rPROFILE
+    jle     MRestoreFrame                   # if > 0, we may have some counts to report.
+
+    movl    %eax, rINST                     # stash return value
+    /* Report cached hotness counts */
+    movl    rPROFILE, %eax
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movw    %ax, OFF_FP_COUNTDOWN_OFFSET(rFP)
+    movq    rSELF, OUT_ARG2
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    movl    rINST, %eax                     # restore return value
+
+    /* pop up frame */
+MRestoreFrame:
+    addq    $FRAME_SIZE, %rsp
+    .cfi_adjust_cfa_offset -FRAME_SIZE
+
+    /* Restore callee save register */
+    POP %r15
+    POP %r14
+    POP %r13
+    POP %r12
+    POP %rbp
+    POP %rbx
+    ret
+    .cfi_endproc
+    SIZE(ExecuteMterpImpl,ExecuteMterpImpl)
+
diff --git a/runtime/interpreter/mterp/rebuild.sh b/runtime/interpreter/mterp/rebuild.sh
new file mode 100755
index 0000000..ca3dcd9
--- /dev/null
+++ b/runtime/interpreter/mterp/rebuild.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Rebuild for all known targets.  Necessary until the stuff in "out" gets
+# generated as part of the build.
+#
+set -e
+
+for arch in arm x86 mips arm64 x86_64 mips64; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done
diff --git a/runtime/interpreter/mterp/x86/alt_stub.S b/runtime/interpreter/mterp/x86/alt_stub.S
new file mode 100644
index 0000000..a5b39b8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/alt_stub.S
@@ -0,0 +1,19 @@
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    REFRESH_IBASE
+    jmp     .L_op_nop+(${opnum}*${handler_size_bytes})
diff --git a/runtime/interpreter/mterp/x86/bincmp.S b/runtime/interpreter/mterp/x86/bincmp.S
new file mode 100644
index 0000000..ee32278
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/bincmp.S
@@ -0,0 +1,21 @@
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andb    $$0xf, %cl                      # ecx <- A
+    GET_VREG %eax, %ecx                     # eax <- vA
+    sarl    $$4, rINST                      # rINST <- B
+    cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    j${revcmp}   1f
+    movswl  2(rPC), rINST                   # Get signed branch offset
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+1:
+    cmpw    $$JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/bindiv.S b/runtime/interpreter/mterp/x86/bindiv.S
new file mode 100644
index 0000000..e87ba45
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/bindiv.S
@@ -0,0 +1,48 @@
+%default {"result":"","special":"","rem":""}
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB
+    GET_VREG %ecx, %ecx                     # ecx <- vCC
+    mov     rIBASE, LOCAL0(%esp)
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    movl    %eax, %edx
+    orl     %ecx, %edx
+    testl   $$0xFFFFFF00, %edx              # If both arguments are less
+                                            #   than 8-bit and +ve
+    jz      .L${opcode}_8                   # Do 8-bit divide
+    testl   $$0xFFFF0000, %edx              # If both arguments are less
+                                            #   than 16-bit and +ve
+    jz      .L${opcode}_16                  # Do 16-bit divide
+    cmpl    $$-1, %ecx
+    jne     .L${opcode}_32
+    cmpl    $$0x80000000, %eax
+    jne     .L${opcode}_32
+    movl    $special, $result
+    jmp     .L${opcode}_finish
+.L${opcode}_32:
+    cltd
+    idivl   %ecx
+    jmp     .L${opcode}_finish
+.L${opcode}_8:
+    div     %cl                             # 8-bit divide otherwise.
+                                            # Remainder in %ah, quotient in %al
+    .if $rem
+    movl    %eax, %edx
+    shr     $$8, %edx
+    .else
+    andl    $$0x000000FF, %eax
+    .endif
+    jmp     .L${opcode}_finish
+.L${opcode}_16:
+    xorl    %edx, %edx                      # Clear %edx before divide
+    div     %cx
+.L${opcode}_finish:
+    SET_VREG $result, rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/bindiv2addr.S b/runtime/interpreter/mterp/x86/bindiv2addr.S
new file mode 100644
index 0000000..e620996
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/bindiv2addr.S
@@ -0,0 +1,29 @@
+%default {"result":"","special":""}
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    mov     rIBASE, LOCAL0(%esp)
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # eax <- vBB
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, rINST                    # eax <- vBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $$-1, %ecx
+    jne     .L${opcode}_continue_div2addr
+    cmpl    $$0x80000000, %eax
+    jne     .L${opcode}_continue_div2addr
+    movl    $special, $result
+    SET_VREG $result, rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.L${opcode}_continue_div2addr:
+    cltd
+    idivl   %ecx
+    SET_VREG $result, rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/bindivLit16.S b/runtime/interpreter/mterp/x86/bindivLit16.S
new file mode 100644
index 0000000..be094ae
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/bindivLit16.S
@@ -0,0 +1,29 @@
+%default {"result":"","special":""}
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op0=minint and
+ * op1=-1.
+ */
+    /* div/rem/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $$4, %eax                       # eax <- B
+    GET_VREG %eax, %eax                     # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $$-1, %ecx
+    jne     .L${opcode}_continue_div
+    cmpl    $$0x80000000, %eax
+    jne     .L${opcode}_continue_div
+    movl    $special, %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.L${opcode}_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG $result, rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/bindivLit8.S b/runtime/interpreter/mterp/x86/bindivLit8.S
new file mode 100644
index 0000000..fddb545
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/bindivLit8.S
@@ -0,0 +1,26 @@
+%default {"result":"","special":""}
+/*
+ * 32-bit div/rem "lit8" binary operation.  Handles special case of
+ * op0=minint & op1=-1
+ */
+    /* div/rem/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG  %eax, %eax                    # eax <- rBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $$0x80000000, %eax
+    jne     .L${opcode}_continue_div
+    cmpl    $$-1, %ecx
+    jne     .L${opcode}_continue_div
+    movl    $special, %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.L${opcode}_continue_div:
+    mov     rIBASE, LOCAL0(%esp)
+    cltd
+    idivl   %ecx
+    SET_VREG $result, rINST
+    mov     LOCAL0(%esp), rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binop.S b/runtime/interpreter/mterp/x86/binop.S
new file mode 100644
index 0000000..d895235
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binop.S
@@ -0,0 +1,17 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB
+    $instr                                  # ex: addl    (rFP,%ecx,4),%eax
+    SET_VREG $result, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binop1.S b/runtime/interpreter/mterp/x86/binop1.S
new file mode 100644
index 0000000..5049bb3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binop1.S
@@ -0,0 +1,13 @@
+%default {"result":"%eax","tmp":"%ecx"}
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC),%eax                     # eax <- BB
+    movzbl  3(rPC),%ecx                     # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB
+    GET_VREG %ecx, %ecx                     # eax <- vBB
+    $instr                                  # ex: addl    %ecx,%eax
+    SET_VREG $result, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binop2addr.S b/runtime/interpreter/mterp/x86/binop2addr.S
new file mode 100644
index 0000000..f126234
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binop2addr.S
@@ -0,0 +1,19 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    GET_VREG %eax, rINST                    # eax <- vB
+    andb    $$0xf, %cl                      # ecx <- A
+    $instr                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/binopLit16.S b/runtime/interpreter/mterp/x86/binopLit16.S
new file mode 100644
index 0000000..2fd59de
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binopLit16.S
@@ -0,0 +1,19 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $$4, %eax                       # eax <- B
+    GET_VREG %eax, %eax                     # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    $instr                                  # for example: addl %ecx, %eax
+    SET_VREG $result, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binopLit8.S b/runtime/interpreter/mterp/x86/binopLit8.S
new file mode 100644
index 0000000..67cead2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binopLit8.S
@@ -0,0 +1,18 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG %eax, %eax                     # eax <- rBB
+    $instr                                  # ex: addl %ecx,%eax
+    SET_VREG $result, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binopWide.S b/runtime/interpreter/mterp/x86/binopWide.S
new file mode 100644
index 0000000..da1293d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binopWide.S
@@ -0,0 +1,15 @@
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)            # save rIBASE
+    GET_VREG rIBASE, %eax                   # rIBASE <- v[BB+0]
+    GET_VREG_HIGH %eax, %eax                # eax <- v[BB+1]
+    $instr1                                 # ex: addl   (rFP,%ecx,4),rIBASE
+    $instr2                                 # ex: adcl   4(rFP,%ecx,4),%eax
+    SET_VREG rIBASE, rINST                  # v[AA+0] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE            # restore rIBASE
+    SET_VREG_HIGH %eax, rINST               # v[AA+1] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/binopWide2addr.S b/runtime/interpreter/mterp/x86/binopWide2addr.S
new file mode 100644
index 0000000..da816f4
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/binopWide2addr.S
@@ -0,0 +1,13 @@
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $$4, %ecx                       # ecx<- B
+    GET_VREG %eax, %ecx                     # eax<- v[B+0]
+    GET_VREG_HIGH %ecx, %ecx                # eax<- v[B+1]
+    andb    $$0xF, rINSTbl                  # rINST<- A
+    $instr1                                 # ex: addl   %eax,(rFP,rINST,4)
+    $instr2                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/cvtfp_int.S b/runtime/interpreter/mterp/x86/cvtfp_int.S
new file mode 100644
index 0000000..a8bad63
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/cvtfp_int.S
@@ -0,0 +1,61 @@
+%default {"srcdouble":"1","tgtlong":"1"}
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.  This model
+ * differs from what is delivered normally via the x86 fpu, so we have
+ * to play some games.
+ */
+    /* float/double to int/long vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    .if $srcdouble
+    fldl    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .else
+    flds    VREG_ADDRESS(rINST)             # %st0 <- vB
+    .endif
+    ftst
+    fnstcw  LOCAL0(%esp)                    # remember original rounding mode
+    movzwl  LOCAL0(%esp), %eax
+    movb    $$0xc, %ah
+    movw    %ax, LOCAL0+2(%esp)
+    fldcw   LOCAL0+2(%esp)                  # set "to zero" rounding mode
+    andb    $$0xf, %cl                      # ecx <- A
+    .if $tgtlong
+    fistpll VREG_ADDRESS(%ecx)              # convert and store
+    .else
+    fistpl  VREG_ADDRESS(%ecx)              # convert and store
+    .endif
+    fldcw   LOCAL0(%esp)                    # restore previous rounding mode
+    .if $tgtlong
+    movl    $$0x80000000, %eax
+    xorl    VREG_HIGH_ADDRESS(%ecx), %eax
+    orl     VREG_ADDRESS(%ecx), %eax
+    .else
+    cmpl    $$0x80000000, VREG_ADDRESS(%ecx)
+    .endif
+    je      .L${opcode}_special_case # fix up result
+
+.L${opcode}_finish:
+    xor     %eax, %eax
+    mov     %eax, VREG_REF_ADDRESS(%ecx)
+    .if $tgtlong
+    mov     %eax, VREG_REF_HIGH_ADDRESS(%ecx)
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
+.L${opcode}_special_case:
+    fnstsw  %ax
+    sahf
+    jp      .L${opcode}_isNaN
+    adcl    $$-1, VREG_ADDRESS(%ecx)
+    .if $tgtlong
+    adcl    $$-1, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+   jmp      .L${opcode}_finish
+.L${opcode}_isNaN:
+    movl    $$0, VREG_ADDRESS(%ecx)
+    .if $tgtlong
+    movl    $$0, VREG_HIGH_ADDRESS(%ecx)
+    .endif
+    jmp     .L${opcode}_finish
diff --git a/runtime/interpreter/mterp/x86/entry.S b/runtime/interpreter/mterp/x86/entry.S
new file mode 100644
index 0000000..384dd9a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/entry.S
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Interpreter entry point.
+ */
+
+    .text
+    .global SYMBOL(ExecuteMterpImpl)
+    FUNCTION_TYPE(ExecuteMterpImpl)
+
+/*
+ * On entry:
+ *  0  Thread* self
+ *  1  code_item
+ *  2  ShadowFrame
+ *  3  JValue* result_register
+ *
+ */
+
+SYMBOL(ExecuteMterpImpl):
+    .cfi_startproc
+    .cfi_def_cfa esp, 4
+
+    /* Spill callee save regs */
+    PUSH    %ebp
+    PUSH    %edi
+    PUSH    %esi
+    PUSH    %ebx
+
+    /* Allocate frame */
+    subl    $$FRAME_SIZE, %esp
+    .cfi_adjust_cfa_offset FRAME_SIZE
+
+    /* Load ShadowFrame pointer */
+    movl    IN_ARG2(%esp), %edx
+
+    /* Remember the return register */
+    movl    IN_ARG3(%esp), %eax
+    movl    %eax, SHADOWFRAME_RESULT_REGISTER_OFFSET(%edx)
+
+    /* Remember the code_item */
+    movl    IN_ARG1(%esp), %ecx
+    movl    %ecx, SHADOWFRAME_CODE_ITEM_OFFSET(%edx)
+
+    /* set up "named" registers */
+    movl    SHADOWFRAME_NUMBER_OF_VREGS_OFFSET(%edx), %eax
+    leal    SHADOWFRAME_VREGS_OFFSET(%edx), rFP
+    leal    (rFP, %eax, 4), rREFS
+    movl    SHADOWFRAME_DEX_PC_OFFSET(%edx), %eax
+    lea     CODEITEM_INSNS_OFFSET(%ecx), rPC
+    lea     (rPC, %eax, 2), rPC
+    EXPORT_PC
+
+    /* Set up for backwards branches & osr profiling */
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpSetUpHotnessCountdown)
+
+    /* Starting ibase */
+    REFRESH_IBASE
+
+    /* start executing the instruction at rPC */
+    FETCH_INST
+    GOTO_NEXT
+    /* NOTE: no fallthrough */
diff --git a/runtime/interpreter/mterp/x86/fallback.S b/runtime/interpreter/mterp/x86/fallback.S
new file mode 100644
index 0000000..8d61166
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/fallback.S
@@ -0,0 +1,3 @@
+/* Transfer stub to alternate interpreter */
+    jmp     MterpFallback
+
diff --git a/runtime/interpreter/mterp/x86/footer.S b/runtime/interpreter/mterp/x86/footer.S
new file mode 100644
index 0000000..e8c8ca8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/footer.S
@@ -0,0 +1,326 @@
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpLogDivideByZeroException)
+#endif
+    jmp     MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpLogArrayIndexException)
+#endif
+    jmp     MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpLogNegativeArraySizeException)
+#endif
+    jmp     MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpLogNoSuchMethodException)
+#endif
+    jmp     MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpLogNullObjectException)
+#endif
+    jmp     MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    call    SYMBOL(MterpLogExceptionThrownException)
+#endif
+    jmp     MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    THREAD_FLAGS_OFFSET(%eax), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    call    SYMBOL(MterpLogSuspendFallback)
+#endif
+    jmp     MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    movl    rSELF, %eax
+    testl   $$-1, THREAD_EXCEPTION_OFFSET(%eax)
+    jz      MterpFallback
+    /* intentional fallthrough - handle pending exception. */
+
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpHandleException)
+    testb   %al, %al
+    jz      MterpExceptionReturn
+    movl    OFF_FP_CODE_ITEM(rFP), %eax
+    movl    OFF_FP_DEX_PC(rFP), %ecx
+    lea     CODEITEM_INSNS_OFFSET(%eax), rPC
+    lea     (rPC, %ecx, 2), rPC
+    movl    rPC, OFF_FP_DEX_PC_PTR(rFP)
+    /* Do we need to switch interpreters? */
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    /* resume execution at catch block */
+    REFRESH_IBASE
+    FETCH_INST
+    GOTO_NEXT
+    /* NOTE: no fallthrough */
+
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
+ */
+MterpCommonTakenBranch:
+    jg      .L_forward_branch               # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    cmpw    $$JIT_CHECK_OSR, rPROFILE
+    je      .L_osr_check
+    decw    rPROFILE
+    je      .L_add_batch                    # counted down to zero - report
+.L_resume_backward_branch:
+    movl    rSELF, %eax
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    leal    (rPC, rINST, 2), rPC
+    FETCH_INST
+    jnz     .L_suspend_request_pending
+    REFRESH_IBASE
+    GOTO_NEXT
+
+.L_suspend_request_pending:
+    EXPORT_PC
+    movl    %eax, OUT_ARG0(%esp)            # rSELF in eax
+    call    SYMBOL(MterpSuspendCheck)       # (self)
+    testb   %al, %al
+    jnz     MterpFallback
+    REFRESH_IBASE                           # might have changed during suspend
+    GOTO_NEXT
+
+.L_no_count_backwards:
+    cmpw    $$JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    jne     .L_resume_backward_branch
+.L_osr_check:
+    EXPORT_PC
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jz      .L_resume_backward_branch
+    jmp     MterpOnStackReplacement
+
+.L_forward_branch:
+    cmpw    $$JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    je      .L_check_osr_forward
+.L_resume_forward_branch:
+    leal    (rPC, rINST, 2), rPC
+    FETCH_INST
+    GOTO_NEXT
+
+.L_check_osr_forward:
+    EXPORT_PC
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    REFRESH_IBASE
+    jz      .L_resume_forward_branch
+    jmp     MterpOnStackReplacement
+
+.L_add_batch:
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG2(%esp)
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    jmp     .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    EXPORT_PC
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    $$2, OUT_ARG2(%esp)
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    REFRESH_IBASE
+    jnz     MterpOnStackReplacement
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpLogOSR)
+#endif
+    movl    $$1, %eax
+    jmp     MterpDone
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    lea     OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpLogFallback)
+#endif
+MterpCommonFallback:
+    xor     %eax, %eax
+    jmp     MterpDone
+
+/*
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    movl    $$1, %eax
+    jmp     MterpDone
+MterpReturn:
+    movl    OFF_FP_RESULT_REGISTER(rFP), %edx
+    movl    %eax, (%edx)
+    movl    %ecx, 4(%edx)
+    mov     $$1, %eax
+MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmpw    $$0, rPROFILE
+    jle     MRestoreFrame                   # if > 0, we may have some counts to report.
+
+    movl    %eax, rINST                     # stash return value
+    /* Report cached hotness counts */
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG2(%esp)
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    movl    rINST, %eax                     # restore return value
+
+    /* pop up frame */
+MRestoreFrame:
+    addl    $$FRAME_SIZE, %esp
+    .cfi_adjust_cfa_offset -FRAME_SIZE
+
+    /* Restore callee save register */
+    POP     %ebx
+    POP     %esi
+    POP     %edi
+    POP     %ebp
+    ret
+    .cfi_endproc
+    SIZE(ExecuteMterpImpl,ExecuteMterpImpl)
diff --git a/runtime/interpreter/mterp/x86/fpcmp.S b/runtime/interpreter/mterp/x86/fpcmp.S
new file mode 100644
index 0000000..5f9eef9
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/fpcmp.S
@@ -0,0 +1,35 @@
+%default {"suff":"d","nanval":"pos"}
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx<- CC
+    movzbl  2(rPC), %eax                    # eax<- BB
+    movs${suff} VREG_ADDRESS(%eax), %xmm0
+    xor     %eax, %eax
+    ucomis${suff} VREG_ADDRESS(%ecx), %xmm0
+    jp      .L${opcode}_nan_is_${nanval}
+    je      .L${opcode}_finish
+    jb      .L${opcode}_less
+.L${opcode}_nan_is_pos:
+    incl    %eax
+    jmp     .L${opcode}_finish
+.L${opcode}_nan_is_neg:
+.L${opcode}_less:
+    decl    %eax
+.L${opcode}_finish:
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/fpcvt.S b/runtime/interpreter/mterp/x86/fpcvt.S
new file mode 100644
index 0000000..7808285
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/fpcvt.S
@@ -0,0 +1,17 @@
+%default {"instr":"","load":"","store":"","wide":"0"}
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    $load   VREG_ADDRESS(rINST)             # %st0 <- vB
+    andb    $$0xf, %cl                      # ecx <- A
+    $instr
+    $store  VREG_ADDRESS(%ecx)              # vA <- %st0
+    .if $wide
+    CLEAR_WIDE_REF %ecx
+    .else
+    CLEAR_REF %ecx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/header.S b/runtime/interpreter/mterp/x86/header.S
new file mode 100644
index 0000000..3a2dcb7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/header.S
@@ -0,0 +1,309 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+/*
+x86 ABI general notes:
+
+Caller save set:
+   eax, edx, ecx, st(0)-st(7)
+Callee save set:
+   ebx, esi, edi, ebp
+Return regs:
+   32-bit in eax
+   64-bit in edx:eax (low-order 32 in eax)
+   fp on top of fp stack st(0)
+
+Parameters passed on stack, pushed right-to-left.  On entry to target, first
+parm is at 4(%esp).  Traditional entry code is:
+
+functEntry:
+    push    %ebp             # save old frame pointer
+    mov     %ebp,%esp        # establish new frame pointer
+    sub     FrameSize,%esp   # Allocate storage for spill, locals & outs
+
+Once past the prologue, arguments are referenced at ((argno + 2)*4)(%ebp)
+
+Stack must be 16-byte aligned to support SSE in native code.
+
+If we're not doing variable stack allocation (alloca), the frame pointer can be
+eliminated and all arg references adjusted to be esp relative.
+*/
+
+/*
+Mterp and x86 notes:
+
+Some key interpreter variables will be assigned to registers.
+
+  nick     reg   purpose
+  rPC      esi   interpreted program counter, used for fetching instructions
+  rFP      edi   interpreted frame pointer, used for accessing locals and args
+  rINSTw   bx    first 16-bit code of current instruction
+  rINSTbl  bl    opcode portion of instruction word
+  rINSTbh  bh    high byte of inst word, usually contains src/tgt reg names
+  rIBASE   edx   base of instruction handler table
+  rREFS    ebp   base of object references in shadow frame.
+
+Notes:
+   o High order 16 bits of ebx must be zero on entry to handler
+   o rPC, rFP, rINSTw/rINSTbl valid on handler entry and exit
+   o eax and ecx are scratch, rINSTw/ebx sometimes scratch
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/*
+ * Handle mac compiler specific
+ */
+#if defined(__APPLE__)
+    #define MACRO_LITERAL(value) $$(value)
+    #define FUNCTION_TYPE(name)
+    #define SIZE(start,end)
+    // Mac OS' symbols have an _ prefix.
+    #define SYMBOL(name) _ ## name
+#else
+    #define MACRO_LITERAL(value) $$value
+    #define FUNCTION_TYPE(name) .type name, @function
+    #define SIZE(start,end) .size start, .-end
+    #define SYMBOL(name) name
+#endif
+
+.macro PUSH _reg
+    pushl \_reg
+    .cfi_adjust_cfa_offset 4
+    .cfi_rel_offset \_reg, 0
+.endm
+
+.macro POP _reg
+    popl \_reg
+    .cfi_adjust_cfa_offset -4
+    .cfi_restore \_reg
+.endm
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_COUNTDOWN_OFFSET OFF_FP(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
+
+/* Frame size must be 16-byte aligned.
+ * Remember about 4 bytes for return address + 4 * 4 for spills
+ */
+#define FRAME_SIZE     28
+
+/* Frame diagram while executing ExecuteMterpImpl, high to low addresses */
+#define IN_ARG3        (FRAME_SIZE + 16 + 16)
+#define IN_ARG2        (FRAME_SIZE + 16 + 12)
+#define IN_ARG1        (FRAME_SIZE + 16 +  8)
+#define IN_ARG0        (FRAME_SIZE + 16 +  4)
+/* Spill offsets relative to %esp */
+#define LOCAL0         (FRAME_SIZE -  4)
+#define LOCAL1         (FRAME_SIZE -  8)
+#define LOCAL2         (FRAME_SIZE - 12)
+/* Out Arg offsets, relative to %esp */
+#define OUT_ARG3       ( 12)
+#define OUT_ARG2       (  8)
+#define OUT_ARG1       (  4)
+#define OUT_ARG0       (  0)  /* <- ExecuteMterpImpl esp + 0 */
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rSELF    IN_ARG0(%esp)
+#define rPC      %esi
+#define rFP      %edi
+#define rINST    %ebx
+#define rINSTw   %bx
+#define rINSTbh  %bh
+#define rINSTbl  %bl
+#define rIBASE   %edx
+#define rREFS    %ebp
+#define rPROFILE OFF_FP_COUNTDOWN_OFFSET(rFP)
+
+#define MTERP_LOGGING 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    movl    rPC, OFF_FP_DEX_PC_PTR(rFP)
+.endm
+
+/*
+ * Refresh handler table.
+ */
+.macro REFRESH_IBASE
+    movl    rSELF, rIBASE
+    movl    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
+ * Refresh handler table.
+ * IBase handles uses the caller save register so we must restore it after each call.
+ * Also it is used as a result of some 64-bit operations (like imul) and we should
+ * restore it in such cases also.
+ *
+ * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
+ */
+.macro RESTORE_IBASE
+    movl    rSELF, rIBASE
+    movl    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
+ * If rSELF is already loaded then we can use it from known reg.
+ */
+.macro RESTORE_IBASE_FROM_SELF _reg
+    movl    THREAD_CURRENT_IBASE_OFFSET(\_reg), rIBASE
+.endm
+
+/*
+ * Refresh rINST.
+ * At enter to handler rINST does not contain the opcode number.
+ * However some utilities require the full value, so this macro
+ * restores the opcode number.
+ */
+.macro REFRESH_INST _opnum
+    movb    rINSTbl, rINSTbh
+    movb    MACRO_LITERAL(\_opnum), rINSTbl
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINSTw.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    movzwl  (rPC), rINST
+.endm
+
+/*
+ * Remove opcode from rINST, compute the address of handler and jump to it.
+ */
+.macro GOTO_NEXT
+    movzx   rINSTbl,%eax
+    movzbl  rINSTbh,rINST
+    shll    MACRO_LITERAL(${handler_size_bits}), %eax
+    addl    rIBASE, %eax
+    jmp     *%eax
+.endm
+
+/*
+ * Advance rPC by instruction count.
+ */
+.macro ADVANCE_PC _count
+    leal    2*\_count(rPC), rPC
+.endm
+
+/*
+ * Advance rPC by instruction count, fetch instruction and jump to handler.
+ */
+.macro ADVANCE_PC_FETCH_AND_GOTO_NEXT _count
+    ADVANCE_PC \_count
+    FETCH_INST
+    GOTO_NEXT
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+#define VREG_ADDRESS(_vreg) (rFP,_vreg,4)
+#define VREG_HIGH_ADDRESS(_vreg) 4(rFP,_vreg,4)
+#define VREG_REF_ADDRESS(_vreg) (rREFS,_vreg,4)
+#define VREG_REF_HIGH_ADDRESS(_vreg) 4(rREFS,_vreg,4)
+
+.macro GET_VREG _reg _vreg
+    movl    (rFP,\_vreg,4), \_reg
+.endm
+
+/* Read wide value to xmm. */
+.macro GET_WIDE_FP_VREG _reg _vreg
+    movq    (rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    MACRO_LITERAL(0), (rREFS,\_vreg,4)
+.endm
+
+/* Write wide value from xmm. xmm is clobbered. */
+.macro SET_WIDE_FP_VREG _reg _vreg
+    movq    \_reg, (rFP,\_vreg,4)
+    pxor    \_reg, \_reg
+    movq    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro SET_VREG_OBJECT _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro GET_VREG_HIGH _reg _vreg
+    movl    4(rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG_HIGH _reg _vreg
+    movl    \_reg, 4(rFP,\_vreg,4)
+    movl    MACRO_LITERAL(0), 4(rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_REF _vreg
+    movl    MACRO_LITERAL(0),  (rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_WIDE_REF _vreg
+    movl    MACRO_LITERAL(0),  (rREFS,\_vreg,4)
+    movl    MACRO_LITERAL(0), 4(rREFS,\_vreg,4)
+.endm
diff --git a/runtime/interpreter/mterp/x86/invoke.S b/runtime/interpreter/mterp/x86/invoke.S
new file mode 100644
index 0000000..c23053b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/invoke.S
@@ -0,0 +1,25 @@
+%default { "helper":"UndefinedInvokeHandler" }
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern $helper
+    EXPORT_PC
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rPC, OUT_ARG2(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG3(%esp)
+    call    SYMBOL($helper)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    RESTORE_IBASE
+    FETCH_INST
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86/op_add_double.S b/runtime/interpreter/mterp/x86/op_add_double.S
new file mode 100644
index 0000000..de2708f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_double.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"adds","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_add_double_2addr.S b/runtime/interpreter/mterp/x86/op_add_double_2addr.S
new file mode 100644
index 0000000..538c9ab
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_double_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"adds","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_add_float.S b/runtime/interpreter/mterp/x86/op_add_float.S
new file mode 100644
index 0000000..80b1736
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_float.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"adds","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_add_float_2addr.S b/runtime/interpreter/mterp/x86/op_add_float_2addr.S
new file mode 100644
index 0000000..6649253
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_float_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"adds","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_add_int.S b/runtime/interpreter/mterp/x86/op_add_int.S
new file mode 100644
index 0000000..f71a56b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_int.S
@@ -0,0 +1 @@
+%include "x86/binop.S" {"instr":"addl    (rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_add_int_2addr.S b/runtime/interpreter/mterp/x86/op_add_int_2addr.S
new file mode 100644
index 0000000..5d43b65
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/binop2addr.S" {"instr":"addl    %eax, (rFP,%ecx,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_add_int_lit16.S b/runtime/interpreter/mterp/x86/op_add_int_lit16.S
new file mode 100644
index 0000000..4f34d17
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/binopLit16.S" {"instr":"addl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_add_int_lit8.S b/runtime/interpreter/mterp/x86/op_add_int_lit8.S
new file mode 100644
index 0000000..3f14744
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"addl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_add_long.S b/runtime/interpreter/mterp/x86/op_add_long.S
new file mode 100644
index 0000000..dce0c26
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_long.S
@@ -0,0 +1 @@
+%include "x86/binopWide.S" {"instr1":"addl    (rFP,%ecx,4), rIBASE", "instr2":"adcl    4(rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_add_long_2addr.S b/runtime/interpreter/mterp/x86/op_add_long_2addr.S
new file mode 100644
index 0000000..7847640
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_add_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/binopWide2addr.S" {"instr1":"addl    %eax, (rFP,rINST,4)","instr2":"adcl    %ecx, 4(rFP,rINST,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_aget.S b/runtime/interpreter/mterp/x86/op_aget.S
new file mode 100644
index 0000000..338386f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget.S
@@ -0,0 +1,19 @@
+%default { "load":"movl", "shift":"4", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    $load   $data_offset(%eax,%ecx,$shift), %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_aget_boolean.S b/runtime/interpreter/mterp/x86/op_aget_boolean.S
new file mode 100644
index 0000000..d910c94
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_aget.S" { "load":"movzbl", "shift":"1", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aget_byte.S b/runtime/interpreter/mterp/x86/op_aget_byte.S
new file mode 100644
index 0000000..aba9ffc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_byte.S
@@ -0,0 +1 @@
+%include "x86/op_aget.S" { "load":"movsbl", "shift":"1", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aget_char.S b/runtime/interpreter/mterp/x86/op_aget_char.S
new file mode 100644
index 0000000..748e410
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_char.S
@@ -0,0 +1 @@
+%include "x86/op_aget.S" { "load":"movzwl", "shift":"2", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aget_object.S b/runtime/interpreter/mterp/x86/op_aget_object.S
new file mode 100644
index 0000000..35ec053
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_object.S
@@ -0,0 +1,20 @@
+/*
+ * Array object get.  vAA <- vBB[vCC].
+ *
+ * for: aget-object
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecs <- vCC (requested index)
+    EXPORT_PC
+    movl    %eax, OUT_ARG0(%esp)
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(artAGetObjectFromMterp)  # (array, index)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    SET_VREG_OBJECT %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_aget_short.S b/runtime/interpreter/mterp/x86/op_aget_short.S
new file mode 100644
index 0000000..6eaf5d9
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_short.S
@@ -0,0 +1 @@
+%include "x86/op_aget.S" { "load":"movswl", "shift":"2", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aget_wide.S b/runtime/interpreter/mterp/x86/op_aget_wide.S
new file mode 100644
index 0000000..92c612a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aget_wide.S
@@ -0,0 +1,16 @@
+/*
+ * Array get, 64 bits.  vAA <- vBB[vCC].
+ */
+    /* aget-wide vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_WIDE_ARRAY_DATA_OFFSET(%eax,%ecx,8), %eax
+    movq    (%eax), %xmm0                   # xmm0 <- vBB[vCC]
+    SET_WIDE_FP_VREG %xmm0, rINST           # vAA <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_and_int.S b/runtime/interpreter/mterp/x86/op_and_int.S
new file mode 100644
index 0000000..6272c4e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_int.S
@@ -0,0 +1 @@
+%include "x86/binop.S" {"instr":"andl    (rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_and_int_2addr.S b/runtime/interpreter/mterp/x86/op_and_int_2addr.S
new file mode 100644
index 0000000..95df873
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/binop2addr.S" {"instr":"andl    %eax, (rFP,%ecx,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_and_int_lit16.S b/runtime/interpreter/mterp/x86/op_and_int_lit16.S
new file mode 100644
index 0000000..b062064
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/binopLit16.S" {"instr":"andl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_and_int_lit8.S b/runtime/interpreter/mterp/x86/op_and_int_lit8.S
new file mode 100644
index 0000000..99915df
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"andl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_and_long.S b/runtime/interpreter/mterp/x86/op_and_long.S
new file mode 100644
index 0000000..f8514ea
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_long.S
@@ -0,0 +1 @@
+%include "x86/binopWide.S" {"instr1":"andl    (rFP,%ecx,4), rIBASE", "instr2":"andl    4(rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_and_long_2addr.S b/runtime/interpreter/mterp/x86/op_and_long_2addr.S
new file mode 100644
index 0000000..37249b8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_and_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/binopWide2addr.S" {"instr1":"andl    %eax, (rFP,rINST,4)","instr2":"andl    %ecx, 4(rFP,rINST,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_aput.S b/runtime/interpreter/mterp/x86/op_aput.S
new file mode 100644
index 0000000..9d8c52d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput.S
@@ -0,0 +1,20 @@
+%default { "reg":"rINST", "store":"movl", "shift":"4", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    $data_offset(%eax,%ecx,$shift), %eax
+    GET_VREG rINST, rINST
+    $store  $reg, (%eax)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_aput_boolean.S b/runtime/interpreter/mterp/x86/op_aput_boolean.S
new file mode 100644
index 0000000..e7fdd53
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_aput.S" { "reg":"rINSTbl", "store":"movb", "shift":"1", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aput_byte.S b/runtime/interpreter/mterp/x86/op_aput_byte.S
new file mode 100644
index 0000000..491d03c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_byte.S
@@ -0,0 +1 @@
+%include "x86/op_aput.S" { "reg":"rINSTbl", "store":"movb", "shift":"1", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aput_char.S b/runtime/interpreter/mterp/x86/op_aput_char.S
new file mode 100644
index 0000000..ca42cf0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_char.S
@@ -0,0 +1 @@
+%include "x86/op_aput.S" { "reg":"rINSTw", "store":"movw", "shift":"2", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aput_object.S b/runtime/interpreter/mterp/x86/op_aput_object.S
new file mode 100644
index 0000000..980b26a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_object.S
@@ -0,0 +1,15 @@
+/*
+ * Store an object into an array.  vBB[vCC] <- vAA.
+ */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpAputObject)         # (array, index)
+    RESTORE_IBASE
+    testb   %al, %al
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_aput_short.S b/runtime/interpreter/mterp/x86/op_aput_short.S
new file mode 100644
index 0000000..5e63482
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_short.S
@@ -0,0 +1 @@
+%include "x86/op_aput.S" { "reg":"rINSTw", "store":"movw", "shift":"2", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86/op_aput_wide.S b/runtime/interpreter/mterp/x86/op_aput_wide.S
new file mode 100644
index 0000000..43ef64a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_aput_wide.S
@@ -0,0 +1,17 @@
+/*
+ * Array put, 64 bits.  vBB[vCC] <- vAA.
+ *
+ */
+    /* aput-wide vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %ecx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    leal    MIRROR_WIDE_ARRAY_DATA_OFFSET(%eax,%ecx,8), %eax
+    GET_WIDE_FP_VREG %xmm0, rINST           # xmm0 <- vAA
+    movq    %xmm0, (%eax)                   # vBB[vCC] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_array_length.S b/runtime/interpreter/mterp/x86/op_array_length.S
new file mode 100644
index 0000000..60ed80b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_array_length.S
@@ -0,0 +1,12 @@
+/*
+ * Return the length of an array.
+ */
+    mov     rINST, %eax                     # eax <- BA
+    sarl    $$4, rINST                      # rINST <- B
+    GET_VREG %ecx, rINST                    # ecx <- vB (object ref)
+    testl   %ecx, %ecx                      # is null?
+    je      common_errNullObject
+    andb    $$0xf, %al                      # eax <- A
+    movl    MIRROR_ARRAY_LENGTH_OFFSET(%ecx), rINST
+    SET_VREG rINST, %eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_check_cast.S b/runtime/interpreter/mterp/x86/op_check_cast.S
new file mode 100644
index 0000000..d090aa3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_check_cast.S
@@ -0,0 +1,18 @@
+/*
+ * Check to see if a cast from one class to another is allowed.
+ */
+    /* check-cast vAA, class@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    leal    VREG_ADDRESS(rINST), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    OFF_FP_METHOD(rFP),%eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    SYMBOL(MterpCheckCast)          # (index, &obj, method, self)
+    RESTORE_IBASE
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_cmp_long.S b/runtime/interpreter/mterp/x86/op_cmp_long.S
new file mode 100644
index 0000000..1f729b0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_cmp_long.S
@@ -0,0 +1,27 @@
+/*
+ * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+ * register based on the results of the comparison.
+ */
+    /* cmp-long vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG_HIGH %eax, %eax                # eax <- v[BB+1], BB is clobbered
+    cmpl    VREG_HIGH_ADDRESS(%ecx), %eax
+    jl      .L${opcode}_smaller
+    jg      .L${opcode}_bigger
+    movzbl  2(rPC), %eax                    # eax <- BB, restore BB
+    GET_VREG %eax, %eax                     # eax <- v[BB]
+    sub     VREG_ADDRESS(%ecx), %eax
+    ja      .L${opcode}_bigger
+    jb      .L${opcode}_smaller
+.L${opcode}_finish:
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+.L${opcode}_bigger:
+    movl    $$1, %eax
+    jmp     .L${opcode}_finish
+
+.L${opcode}_smaller:
+    movl    $$-1, %eax
+    jmp     .L${opcode}_finish
diff --git a/runtime/interpreter/mterp/x86/op_cmpg_double.S b/runtime/interpreter/mterp/x86/op_cmpg_double.S
new file mode 100644
index 0000000..a73ba55
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_cmpg_double.S
@@ -0,0 +1 @@
+%include "x86/fpcmp.S" {"suff":"d","nanval":"pos"}
diff --git a/runtime/interpreter/mterp/x86/op_cmpg_float.S b/runtime/interpreter/mterp/x86/op_cmpg_float.S
new file mode 100644
index 0000000..648051b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_cmpg_float.S
@@ -0,0 +1 @@
+%include "x86/fpcmp.S" {"suff":"s","nanval":"pos"}
diff --git a/runtime/interpreter/mterp/x86/op_cmpl_double.S b/runtime/interpreter/mterp/x86/op_cmpl_double.S
new file mode 100644
index 0000000..058163e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_cmpl_double.S
@@ -0,0 +1 @@
+%include "x86/fpcmp.S" {"suff":"d","nanval":"neg"}
diff --git a/runtime/interpreter/mterp/x86/op_cmpl_float.S b/runtime/interpreter/mterp/x86/op_cmpl_float.S
new file mode 100644
index 0000000..302f078
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_cmpl_float.S
@@ -0,0 +1 @@
+%include "x86/fpcmp.S" {"suff":"s","nanval":"neg"}
diff --git a/runtime/interpreter/mterp/x86/op_const.S b/runtime/interpreter/mterp/x86/op_const.S
new file mode 100644
index 0000000..544d63b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const.S
@@ -0,0 +1,4 @@
+    /* const vAA, #+BBBBbbbb */
+    movl    2(rPC), %eax                    # grab all 32 bits at once
+    SET_VREG %eax, rINST                    # vAA<- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_const_16.S b/runtime/interpreter/mterp/x86/op_const_16.S
new file mode 100644
index 0000000..97cd5fa
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_16.S
@@ -0,0 +1,4 @@
+    /* const/16 vAA, #+BBBB */
+    movswl  2(rPC), %ecx                    # ecx <- ssssBBBB
+    SET_VREG %ecx, rINST                    # vAA <- ssssBBBB
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_4.S b/runtime/interpreter/mterp/x86/op_const_4.S
new file mode 100644
index 0000000..a60ba96
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_4.S
@@ -0,0 +1,7 @@
+    /* const/4 vA, #+B */
+    movsx   rINSTbl, %eax                   # eax <-ssssssBx
+    movl    $$0xf, rINST
+    andl    %eax, rINST                     # rINST <- A
+    sarl    $$4, %eax
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_const_class.S b/runtime/interpreter/mterp/x86/op_const_class.S
new file mode 100644
index 0000000..60be789
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_class.S
@@ -0,0 +1,14 @@
+    /* const/class vAA, Class@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    SYMBOL(MterpConstClass)         # (index, tgt_reg, shadow_frame, self)
+    RESTORE_IBASE
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_high16.S b/runtime/interpreter/mterp/x86/op_const_high16.S
new file mode 100644
index 0000000..576967a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_high16.S
@@ -0,0 +1,5 @@
+    /* const/high16 vAA, #+BBBB0000 */
+    movzwl  2(rPC), %eax                    # eax <- 0000BBBB
+    sall    $$16, %eax                      # eax <- BBBB0000
+    SET_VREG %eax, rINST                    # vAA <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_string.S b/runtime/interpreter/mterp/x86/op_const_string.S
new file mode 100644
index 0000000..ff93b23
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_string.S
@@ -0,0 +1,14 @@
+    /* const/string vAA, String@BBBB */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
+    RESTORE_IBASE
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_string_jumbo.S b/runtime/interpreter/mterp/x86/op_const_string_jumbo.S
new file mode 100644
index 0000000..e7f952a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_string_jumbo.S
@@ -0,0 +1,14 @@
+    /* const/string vAA, String@BBBBBBBB */
+    EXPORT_PC
+    movl    2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, OUT_ARG1(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
+    RESTORE_IBASE
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_const_wide.S b/runtime/interpreter/mterp/x86/op_const_wide.S
new file mode 100644
index 0000000..3750728
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_wide.S
@@ -0,0 +1,7 @@
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    movl    2(rPC), %eax                    # eax <- lsw
+    movzbl  rINSTbl, %ecx                   # ecx <- AA
+    movl    6(rPC), rINST                   # rINST <- msw
+    SET_VREG %eax, %ecx
+    SET_VREG_HIGH  rINST, %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 5
diff --git a/runtime/interpreter/mterp/x86/op_const_wide_16.S b/runtime/interpreter/mterp/x86/op_const_wide_16.S
new file mode 100644
index 0000000..1331c32
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_wide_16.S
@@ -0,0 +1,8 @@
+    /* const-wide/16 vAA, #+BBBB */
+    movswl  2(rPC), %eax                    # eax <- ssssBBBB
+    movl    rIBASE, %ecx                    # preserve rIBASE (cltd trashes it)
+    cltd                                    # rIBASE:eax <- ssssssssssssBBBB
+    SET_VREG_HIGH rIBASE, rINST             # store msw
+    SET_VREG %eax, rINST                    # store lsw
+    movl    %ecx, rIBASE                    # restore rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_wide_32.S b/runtime/interpreter/mterp/x86/op_const_wide_32.S
new file mode 100644
index 0000000..ed7d62b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_wide_32.S
@@ -0,0 +1,8 @@
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    movl    2(rPC), %eax                    # eax <- BBBBbbbb
+    movl    rIBASE, %ecx                    # preserve rIBASE (cltd trashes it)
+    cltd                                    # rIBASE:eax <- ssssssssssssBBBB
+    SET_VREG_HIGH rIBASE, rINST             # store msw
+    SET_VREG %eax, rINST                    # store lsw
+    movl    %ecx, rIBASE                    # restore rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_const_wide_high16.S b/runtime/interpreter/mterp/x86/op_const_wide_high16.S
new file mode 100644
index 0000000..11b9310
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_const_wide_high16.S
@@ -0,0 +1,7 @@
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    movzwl  2(rPC), %eax                    # eax <- 0000BBBB
+    sall    $$16, %eax                      # eax <- BBBB0000
+    SET_VREG_HIGH %eax, rINST               # v[AA+1] <- eax
+    xorl    %eax, %eax
+    SET_VREG %eax, rINST                    # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_div_double.S b/runtime/interpreter/mterp/x86/op_div_double.S
new file mode 100644
index 0000000..575716d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_double.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"divs","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_div_double_2addr.S b/runtime/interpreter/mterp/x86/op_div_double_2addr.S
new file mode 100644
index 0000000..8229a31
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_double_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"divs","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_div_float.S b/runtime/interpreter/mterp/x86/op_div_float.S
new file mode 100644
index 0000000..250f1dc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_float.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"divs","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_div_float_2addr.S b/runtime/interpreter/mterp/x86/op_div_float_2addr.S
new file mode 100644
index 0000000..c30d148
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_float_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"divs","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_div_int.S b/runtime/interpreter/mterp/x86/op_div_int.S
new file mode 100644
index 0000000..5fc8fa5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_int.S
@@ -0,0 +1 @@
+%include "x86/bindiv.S" {"result":"%eax","special":"$0x80000000","rem":"0"}
diff --git a/runtime/interpreter/mterp/x86/op_div_int_2addr.S b/runtime/interpreter/mterp/x86/op_div_int_2addr.S
new file mode 100644
index 0000000..04cf1ba
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/bindiv2addr.S" {"result":"%eax","special":"$0x80000000"}
diff --git a/runtime/interpreter/mterp/x86/op_div_int_lit16.S b/runtime/interpreter/mterp/x86/op_div_int_lit16.S
new file mode 100644
index 0000000..dd396bb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/bindivLit16.S" {"result":"%eax","special":"$0x80000000"}
diff --git a/runtime/interpreter/mterp/x86/op_div_int_lit8.S b/runtime/interpreter/mterp/x86/op_div_int_lit8.S
new file mode 100644
index 0000000..3cbd9d0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/bindivLit8.S" {"result":"%eax","special":"$0x80000000"}
diff --git a/runtime/interpreter/mterp/x86/op_div_long.S b/runtime/interpreter/mterp/x86/op_div_long.S
new file mode 100644
index 0000000..e56a035
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_long.S
@@ -0,0 +1,23 @@
+%default {"routine":"art_quick_ldiv"}
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div vAA, vBB, vCC */
+    .extern $routine
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movzbl  3(rPC), %eax                    # eax <- CC
+    GET_VREG %ecx, %eax
+    GET_VREG_HIGH %ebx, %eax
+    movl    %ecx, %edx
+    orl     %ebx, %ecx
+    jz      common_errDivideByZero
+    movzbl  2(rPC), %eax                    # eax <- BB
+    GET_VREG_HIGH %ecx, %eax
+    GET_VREG %eax, %eax
+    call    SYMBOL($routine)
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE, rINST
+    SET_VREG %eax, rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_div_long_2addr.S b/runtime/interpreter/mterp/x86/op_div_long_2addr.S
new file mode 100644
index 0000000..159cc44
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_div_long_2addr.S
@@ -0,0 +1,25 @@
+%default {"routine":"art_quick_ldiv"}
+/* art_quick_* methods has quick abi,
+ *   so use eax, ecx, edx, ebx for args
+ */
+    /* div/2addr vA, vB */
+    .extern   $routine
+    mov     rIBASE, LOCAL0(%esp)            # save rIBASE/%edx
+    movzbl  rINSTbl, %eax
+    shrl    $$4, %eax                       # eax <- B
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    mov     rINST, LOCAL1(%esp)             # save rINST/%ebx
+    movl    %ebx, %ecx
+    GET_VREG %edx, %eax
+    GET_VREG_HIGH %ebx, %eax
+    movl    %edx, %eax
+    orl     %ebx, %eax
+    jz      common_errDivideByZero
+    GET_VREG %eax, %ecx
+    GET_VREG_HIGH %ecx, %ecx
+    call    SYMBOL($routine)
+    mov     LOCAL1(%esp), rINST             # restore rINST/%ebx
+    SET_VREG_HIGH rIBASE, rINST
+    SET_VREG %eax, rINST
+    mov     LOCAL0(%esp), rIBASE            # restore rIBASE/%edx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_double_to_float.S b/runtime/interpreter/mterp/x86/op_double_to_float.S
new file mode 100644
index 0000000..5135d60
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_double_to_float.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"fldl","store":"fstps"}
diff --git a/runtime/interpreter/mterp/x86/op_double_to_int.S b/runtime/interpreter/mterp/x86/op_double_to_int.S
new file mode 100644
index 0000000..9c4e11c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_double_to_int.S
@@ -0,0 +1 @@
+%include "x86/cvtfp_int.S" {"srcdouble":"1","tgtlong":"0"}
diff --git a/runtime/interpreter/mterp/x86/op_double_to_long.S b/runtime/interpreter/mterp/x86/op_double_to_long.S
new file mode 100644
index 0000000..fe0eee2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_double_to_long.S
@@ -0,0 +1 @@
+%include "x86/cvtfp_int.S" {"srcdouble":"1","tgtlong":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_fill_array_data.S b/runtime/interpreter/mterp/x86/op_fill_array_data.S
new file mode 100644
index 0000000..5855284
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_fill_array_data.S
@@ -0,0 +1,12 @@
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
+    leal    (rPC,%ecx,2), %ecx              # ecx <- PC + BBBBbbbb*2
+    GET_VREG %eax, rINST                    # eax <- vAA (array object)
+    movl    %eax, OUT_ARG0(%esp)
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpFillArrayData)      # (obj, payload)
+    REFRESH_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_filled_new_array.S b/runtime/interpreter/mterp/x86/op_filled_new_array.S
new file mode 100644
index 0000000..35b2fe8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_filled_new_array.S
@@ -0,0 +1,20 @@
+%default { "helper":"MterpFilledNewArray" }
+/*
+ * Create a new array with elements filled from registers.
+ *
+ * for: filled-new-array, filled-new-array/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern $helper
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)
+    call    SYMBOL($helper)
+    REFRESH_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_filled_new_array_range.S b/runtime/interpreter/mterp/x86/op_filled_new_array_range.S
new file mode 100644
index 0000000..841059e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_filled_new_array_range.S
@@ -0,0 +1 @@
+%include "x86/op_filled_new_array.S" { "helper":"MterpFilledNewArrayRange" }
diff --git a/runtime/interpreter/mterp/x86/op_float_to_double.S b/runtime/interpreter/mterp/x86/op_float_to_double.S
new file mode 100644
index 0000000..12a3e14
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_float_to_double.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"flds","store":"fstpl","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_float_to_int.S b/runtime/interpreter/mterp/x86/op_float_to_int.S
new file mode 100644
index 0000000..ac57388
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_float_to_int.S
@@ -0,0 +1 @@
+%include "x86/cvtfp_int.S" {"srcdouble":"0","tgtlong":"0"}
diff --git a/runtime/interpreter/mterp/x86/op_float_to_long.S b/runtime/interpreter/mterp/x86/op_float_to_long.S
new file mode 100644
index 0000000..be1d982
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_float_to_long.S
@@ -0,0 +1 @@
+%include "x86/cvtfp_int.S" {"srcdouble":"0","tgtlong":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_goto.S b/runtime/interpreter/mterp/x86/op_goto.S
new file mode 100644
index 0000000..1827d68
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_goto.S
@@ -0,0 +1,10 @@
+/*
+ * Unconditional branch, 8-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+    /* goto +AA */
+    movsbl  rINSTbl, rINST                  # rINST <- ssssssAA
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86/op_goto_16.S b/runtime/interpreter/mterp/x86/op_goto_16.S
new file mode 100644
index 0000000..ea5ea90
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_goto_16.S
@@ -0,0 +1,10 @@
+/*
+ * Unconditional branch, 16-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+    /* goto/16 +AAAA */
+    movswl  2(rPC), rINST                   # rINST <- ssssAAAA
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86/op_goto_32.S b/runtime/interpreter/mterp/x86/op_goto_32.S
new file mode 100644
index 0000000..4becaf3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_goto_32.S
@@ -0,0 +1,15 @@
+/*
+ * Unconditional branch, 32-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ *
+ * Unlike most opcodes, this one is allowed to branch to itself, so
+ * our "backward branch" test must be "<=0" instead of "<0".  Because
+ * we need the V bit set, we'll use an adds to convert from Dalvik
+ * offset to byte offset.
+ */
+    /* goto/32 +AAAAAAAA */
+    movl    2(rPC), rINST                   # rINST <- AAAAAAAA
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86/op_if_eq.S b/runtime/interpreter/mterp/x86/op_if_eq.S
new file mode 100644
index 0000000..5413d98
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_eq.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"ne" }
diff --git a/runtime/interpreter/mterp/x86/op_if_eqz.S b/runtime/interpreter/mterp/x86/op_if_eqz.S
new file mode 100644
index 0000000..53dc99e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_eqz.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"ne" }
diff --git a/runtime/interpreter/mterp/x86/op_if_ge.S b/runtime/interpreter/mterp/x86/op_if_ge.S
new file mode 100644
index 0000000..c2ba3c6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_ge.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"l" }
diff --git a/runtime/interpreter/mterp/x86/op_if_gez.S b/runtime/interpreter/mterp/x86/op_if_gez.S
new file mode 100644
index 0000000..cd2c772
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_gez.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"l" }
diff --git a/runtime/interpreter/mterp/x86/op_if_gt.S b/runtime/interpreter/mterp/x86/op_if_gt.S
new file mode 100644
index 0000000..9fe84bb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_gt.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"le" }
diff --git a/runtime/interpreter/mterp/x86/op_if_gtz.S b/runtime/interpreter/mterp/x86/op_if_gtz.S
new file mode 100644
index 0000000..b454ffd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_gtz.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"le" }
diff --git a/runtime/interpreter/mterp/x86/op_if_le.S b/runtime/interpreter/mterp/x86/op_if_le.S
new file mode 100644
index 0000000..93571a7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_le.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"g" }
diff --git a/runtime/interpreter/mterp/x86/op_if_lez.S b/runtime/interpreter/mterp/x86/op_if_lez.S
new file mode 100644
index 0000000..779c77f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_lez.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"g" }
diff --git a/runtime/interpreter/mterp/x86/op_if_lt.S b/runtime/interpreter/mterp/x86/op_if_lt.S
new file mode 100644
index 0000000..1fb1521
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_lt.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"ge" }
diff --git a/runtime/interpreter/mterp/x86/op_if_ltz.S b/runtime/interpreter/mterp/x86/op_if_ltz.S
new file mode 100644
index 0000000..155c356
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_ltz.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"ge" }
diff --git a/runtime/interpreter/mterp/x86/op_if_ne.S b/runtime/interpreter/mterp/x86/op_if_ne.S
new file mode 100644
index 0000000..7e1b065
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_ne.S
@@ -0,0 +1 @@
+%include "x86/bincmp.S" { "revcmp":"e" }
diff --git a/runtime/interpreter/mterp/x86/op_if_nez.S b/runtime/interpreter/mterp/x86/op_if_nez.S
new file mode 100644
index 0000000..8951f5b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_if_nez.S
@@ -0,0 +1 @@
+%include "x86/zcmp.S" { "revcmp":"e" }
diff --git a/runtime/interpreter/mterp/x86/op_iget.S b/runtime/interpreter/mterp/x86/op_iget.S
new file mode 100644
index 0000000..e3304ba
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget.S
@@ -0,0 +1,29 @@
+%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL($helper)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    .if $is_object
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <-value
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <-value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iget_boolean.S b/runtime/interpreter/mterp/x86/op_iget_boolean.S
new file mode 100644
index 0000000..9ddad04
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_iget.S" { "helper":"artGetBooleanInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_boolean_quick.S b/runtime/interpreter/mterp/x86/op_iget_boolean_quick.S
new file mode 100644
index 0000000..02b0c16
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_boolean_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iget_quick.S" { "load":"movsbl" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_byte.S b/runtime/interpreter/mterp/x86/op_iget_byte.S
new file mode 100644
index 0000000..8250788
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_byte.S
@@ -0,0 +1 @@
+%include "x86/op_iget.S" { "helper":"artGetByteInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_byte_quick.S b/runtime/interpreter/mterp/x86/op_iget_byte_quick.S
new file mode 100644
index 0000000..02b0c16
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_byte_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iget_quick.S" { "load":"movsbl" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_char.S b/runtime/interpreter/mterp/x86/op_iget_char.S
new file mode 100644
index 0000000..e9d2156
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_char.S
@@ -0,0 +1 @@
+%include "x86/op_iget.S" { "helper":"artGetCharInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_char_quick.S b/runtime/interpreter/mterp/x86/op_iget_char_quick.S
new file mode 100644
index 0000000..a5d9712
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_char_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iget_quick.S" { "load":"movzwl" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_object.S b/runtime/interpreter/mterp/x86/op_iget_object.S
new file mode 100644
index 0000000..3abeefc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_object.S
@@ -0,0 +1 @@
+%include "x86/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_object_quick.S b/runtime/interpreter/mterp/x86/op_iget_object_quick.S
new file mode 100644
index 0000000..b1551a0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_object_quick.S
@@ -0,0 +1,17 @@
+    /* For: iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    %eax, OUT_ARG1(%esp)
+    EXPORT_PC
+    call    SYMBOL(artIGetObjectFromMterp)  # (obj, offset)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $$0xf,rINSTbl                   # rINST <- A
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iget_quick.S b/runtime/interpreter/mterp/x86/op_iget_quick.S
new file mode 100644
index 0000000..1b7440f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_quick.S
@@ -0,0 +1,13 @@
+%default { "load":"movl"}
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    ${load} (%ecx,%eax,1), %eax
+    andb    $$0xf,rINSTbl                   # rINST <- A
+    SET_VREG %eax, rINST                    # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iget_short.S b/runtime/interpreter/mterp/x86/op_iget_short.S
new file mode 100644
index 0000000..c8fad89
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_short.S
@@ -0,0 +1 @@
+%include "x86/op_iget.S" { "helper":"artGetShortInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_short_quick.S b/runtime/interpreter/mterp/x86/op_iget_short_quick.S
new file mode 100644
index 0000000..2c3aeb6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_short_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iget_quick.S" { "load":"movswl" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_wide.S b/runtime/interpreter/mterp/x86/op_iget_wide.S
new file mode 100644
index 0000000..a5d7e69
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_wide.S
@@ -0,0 +1,25 @@
+/*
+ * 64-bit instance field get.
+ *
+ * for: iget-wide
+ */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    mov     rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL(artGet64InstanceFromCode)
+    mov     rSELF, %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException                  # bail out
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    SET_VREG %eax, rINST
+    SET_VREG_HIGH %edx, rINST
+    RESTORE_IBASE_FROM_SELF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iget_wide_quick.S b/runtime/interpreter/mterp/x86/op_iget_wide_quick.S
new file mode 100644
index 0000000..7ce74cc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iget_wide_quick.S
@@ -0,0 +1,11 @@
+    /* iget-wide-quick vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # vB (object we're operating on)
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    movq    (%ecx,%eax,1), %xmm0
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    SET_WIDE_FP_VREG %xmm0, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_instance_of.S b/runtime/interpreter/mterp/x86/op_instance_of.S
new file mode 100644
index 0000000..e6fe5b2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_instance_of.S
@@ -0,0 +1,26 @@
+/*
+ * Check to see if an object reference is an instance of a class.
+ *
+ * Most common situation is a non-null object, being compared against
+ * an already-resolved class.
+ */
+    /* instance-of vA, vB, class@CCCC */
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- BBBB
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rINST, %eax                     # eax <- BA
+    sarl    $$4, %eax                       # eax <- B
+    leal    VREG_ADDRESS(%eax), %ecx        # Get object address
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    OFF_FP_METHOD(rFP),%eax
+    movl    %eax, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    SYMBOL(MterpInstanceOf)         # (index, &obj, method, self)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    andb    $$0xf, rINSTbl                  # rINSTbl <- A
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_int_to_byte.S b/runtime/interpreter/mterp/x86/op_int_to_byte.S
new file mode 100644
index 0000000..b4e8d22
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_byte.S
@@ -0,0 +1 @@
+%include "x86/unop.S" {"instr":"movsbl  %al, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_int_to_char.S b/runtime/interpreter/mterp/x86/op_int_to_char.S
new file mode 100644
index 0000000..4608971
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_char.S
@@ -0,0 +1 @@
+%include "x86/unop.S" {"instr":"movzwl  %ax,%eax"}
diff --git a/runtime/interpreter/mterp/x86/op_int_to_double.S b/runtime/interpreter/mterp/x86/op_int_to_double.S
new file mode 100644
index 0000000..3e9921e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_double.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"fildl","store":"fstpl","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_int_to_float.S b/runtime/interpreter/mterp/x86/op_int_to_float.S
new file mode 100644
index 0000000..849540d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_float.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"fildl","store":"fstps"}
diff --git a/runtime/interpreter/mterp/x86/op_int_to_long.S b/runtime/interpreter/mterp/x86/op_int_to_long.S
new file mode 100644
index 0000000..6f9ea26
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_long.S
@@ -0,0 +1,12 @@
+    /* int to long vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- +A
+    sarl    $$4, %eax                       # eax <- B
+    GET_VREG %eax, %eax                     # eax <- vB
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    movl    rIBASE, %ecx                    # cltd trashes rIBASE/edx
+    cltd                                    # rINST:eax<- sssssssBBBBBBBB
+    SET_VREG_HIGH rIBASE, rINST             # v[A+1] <- rIBASE
+    SET_VREG %eax, rINST                    # v[A+0] <- %eax
+    movl    %ecx, rIBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
diff --git a/runtime/interpreter/mterp/x86/op_int_to_short.S b/runtime/interpreter/mterp/x86/op_int_to_short.S
new file mode 100644
index 0000000..90d0ae6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_int_to_short.S
@@ -0,0 +1 @@
+%include "x86/unop.S" {"instr":"movswl %ax, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_invoke_direct.S b/runtime/interpreter/mterp/x86/op_invoke_direct.S
new file mode 100644
index 0000000..76fb9a6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_direct.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeDirect" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_direct_range.S b/runtime/interpreter/mterp/x86/op_invoke_direct_range.S
new file mode 100644
index 0000000..a6ab604
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_direct_range.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeDirectRange" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_interface.S b/runtime/interpreter/mterp/x86/op_invoke_interface.S
new file mode 100644
index 0000000..91c24f5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_interface.S
@@ -0,0 +1,8 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeInterface" }
+/*
+ * Handle an interface method call.
+ *
+ * for: invoke-interface, invoke-interface/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/x86/op_invoke_interface_range.S b/runtime/interpreter/mterp/x86/op_invoke_interface_range.S
new file mode 100644
index 0000000..e478beb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_interface_range.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeInterfaceRange" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_static.S b/runtime/interpreter/mterp/x86/op_invoke_static.S
new file mode 100644
index 0000000..b4c1236
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_static.S
@@ -0,0 +1,2 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeStatic" }
+
diff --git a/runtime/interpreter/mterp/x86/op_invoke_static_range.S b/runtime/interpreter/mterp/x86/op_invoke_static_range.S
new file mode 100644
index 0000000..3dc8a26
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_static_range.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeStaticRange" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_super.S b/runtime/interpreter/mterp/x86/op_invoke_super.S
new file mode 100644
index 0000000..be20edd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_super.S
@@ -0,0 +1,8 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeSuper" }
+/*
+ * Handle a "super" method call.
+ *
+ * for: invoke-super, invoke-super/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/x86/op_invoke_super_range.S b/runtime/interpreter/mterp/x86/op_invoke_super_range.S
new file mode 100644
index 0000000..f36bf72
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_super_range.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeSuperRange" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_virtual.S b/runtime/interpreter/mterp/x86/op_invoke_virtual.S
new file mode 100644
index 0000000..7e9c456
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_virtual.S
@@ -0,0 +1,8 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeVirtual" }
+/*
+ * Handle a virtual method call.
+ *
+ * for: invoke-virtual, invoke-virtual/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/x86/op_invoke_virtual_quick.S b/runtime/interpreter/mterp/x86/op_invoke_virtual_quick.S
new file mode 100644
index 0000000..2dc9ab6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_virtual_quick.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeVirtualQuick" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_virtual_range.S b/runtime/interpreter/mterp/x86/op_invoke_virtual_range.S
new file mode 100644
index 0000000..d1d20d2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_virtual_range.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeVirtualRange" }
diff --git a/runtime/interpreter/mterp/x86/op_invoke_virtual_range_quick.S b/runtime/interpreter/mterp/x86/op_invoke_virtual_range_quick.S
new file mode 100644
index 0000000..21bfc55
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_invoke_virtual_range_quick.S
@@ -0,0 +1 @@
+%include "x86/invoke.S" { "helper":"MterpInvokeVirtualQuickRange" }
diff --git a/runtime/interpreter/mterp/x86/op_iput.S b/runtime/interpreter/mterp/x86/op_iput.S
new file mode 100644
index 0000000..c847e2d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput.S
@@ -0,0 +1,25 @@
+%default { "handler":"artSet32InstanceFromMterp" }
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern $handler
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax<- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl, %ecx                   # ecx<- BA
+    sarl    $$4, %ecx                       # ecx<- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $$0xf, rINSTbl                  # rINST<- A
+    GET_VREG %eax, rINST
+    movl    %eax, OUT_ARG2(%esp)            # fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    SYMBOL($handler)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_boolean.S b/runtime/interpreter/mterp/x86/op_iput_boolean.S
new file mode 100644
index 0000000..11cab88
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_boolean_quick.S b/runtime/interpreter/mterp/x86/op_iput_boolean_quick.S
new file mode 100644
index 0000000..93865de
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_boolean_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iput_quick.S" { "reg":"rINSTbl", "store":"movb" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_byte.S b/runtime/interpreter/mterp/x86/op_iput_byte.S
new file mode 100644
index 0000000..11cab88
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_byte.S
@@ -0,0 +1 @@
+%include "x86/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_byte_quick.S b/runtime/interpreter/mterp/x86/op_iput_byte_quick.S
new file mode 100644
index 0000000..93865de
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_byte_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iput_quick.S" { "reg":"rINSTbl", "store":"movb" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_char.S b/runtime/interpreter/mterp/x86/op_iput_char.S
new file mode 100644
index 0000000..abbf2bd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_char.S
@@ -0,0 +1 @@
+%include "x86/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_char_quick.S b/runtime/interpreter/mterp/x86/op_iput_char_quick.S
new file mode 100644
index 0000000..4ec8029
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_char_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iput_quick.S" { "reg":"rINSTw", "store":"movw" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_object.S b/runtime/interpreter/mterp/x86/op_iput_object.S
new file mode 100644
index 0000000..e013697
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_object.S
@@ -0,0 +1,13 @@
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG3(%esp)
+    call    SYMBOL(MterpIputObject)
+    testb   %al, %al
+    jz      MterpException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_object_quick.S b/runtime/interpreter/mterp/x86/op_iput_object_quick.S
new file mode 100644
index 0000000..cb77929
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_object_quick.S
@@ -0,0 +1,11 @@
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpIputObjectQuick)
+    testb   %al, %al
+    jz      MterpException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_quick.S b/runtime/interpreter/mterp/x86/op_iput_quick.S
new file mode 100644
index 0000000..b67cee0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_quick.S
@@ -0,0 +1,13 @@
+%default { "reg":"rINST", "store":"movl" }
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST, rINST                   # rINST <- v[A]
+    movzwl  2(rPC), %eax                    # eax <- field byte offset
+    ${store}    ${reg}, (%ecx,%eax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_short.S b/runtime/interpreter/mterp/x86/op_iput_short.S
new file mode 100644
index 0000000..abbf2bd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_short.S
@@ -0,0 +1 @@
+%include "x86/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_short_quick.S b/runtime/interpreter/mterp/x86/op_iput_short_quick.S
new file mode 100644
index 0000000..4ec8029
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_short_quick.S
@@ -0,0 +1 @@
+%include "x86/op_iput_quick.S" { "reg":"rINSTw", "store":"movw" }
diff --git a/runtime/interpreter/mterp/x86/op_iput_wide.S b/runtime/interpreter/mterp/x86/op_iput_wide.S
new file mode 100644
index 0000000..122eecf
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_wide.S
@@ -0,0 +1,19 @@
+    /* iput-wide vA, vB, field@CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax                    # eax <- 0000CCCC
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movzbl  rINSTbl,%ecx                    # ecx <- BA
+    sarl    $$4,%ecx                        # ecx <- B
+    GET_VREG %ecx, %ecx
+    movl    %ecx, OUT_ARG1(%esp)            # the object pointer
+    andb    $$0xf,rINSTbl                   # rINST <- A
+    leal    VREG_ADDRESS(rINST), %eax
+    movl    %eax, OUT_ARG2(%esp)            # &fp[A]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG3(%esp)            # referrer
+    call    SYMBOL(artSet64InstanceFromMterp)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_wide_quick.S b/runtime/interpreter/mterp/x86/op_iput_wide_quick.S
new file mode 100644
index 0000000..17de6f8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_iput_wide_quick.S
@@ -0,0 +1,12 @@
+    /* iput-wide-quick vA, vB, offset@CCCC */
+    movzbl    rINSTbl, %ecx                 # ecx<- BA
+    sarl      $$4, %ecx                     # ecx<- B
+    GET_VREG  %ecx, %ecx                    # vB (object we're operating on)
+    testl     %ecx, %ecx                    # is object null?
+    je        common_errNullObject
+    movzwl    2(rPC), %eax                  # eax<- field byte offset
+    leal      (%ecx,%eax,1), %ecx           # ecx<- Address of 64-bit target
+    andb      $$0xf, rINSTbl                # rINST<- A
+    GET_WIDE_FP_VREG %xmm0, rINST           # xmm0<- fp[A]/fp[A+1]
+    movq      %xmm0, (%ecx)                 # obj.field<- r0/r1
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_long_to_double.S b/runtime/interpreter/mterp/x86/op_long_to_double.S
new file mode 100644
index 0000000..2c7f905
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_long_to_double.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"fildll","store":"fstpl","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_long_to_float.S b/runtime/interpreter/mterp/x86/op_long_to_float.S
new file mode 100644
index 0000000..e500e39
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_long_to_float.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"load":"fildll","store":"fstps"}
diff --git a/runtime/interpreter/mterp/x86/op_long_to_int.S b/runtime/interpreter/mterp/x86/op_long_to_int.S
new file mode 100644
index 0000000..1c39b96
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_long_to_int.S
@@ -0,0 +1,2 @@
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+%include "x86/op_move.S"
diff --git a/runtime/interpreter/mterp/x86/op_monitor_enter.S b/runtime/interpreter/mterp/x86/op_monitor_enter.S
new file mode 100644
index 0000000..b35c684
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_monitor_enter.S
@@ -0,0 +1,14 @@
+/*
+ * Synchronize on an object.
+ */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    GET_VREG %ecx, rINST
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    SYMBOL(artLockObjectFromCode)   # (object, self)
+    RESTORE_IBASE
+    testb   %al, %al
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_monitor_exit.S b/runtime/interpreter/mterp/x86/op_monitor_exit.S
new file mode 100644
index 0000000..2d17d5e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_monitor_exit.S
@@ -0,0 +1,18 @@
+/*
+ * Unlock an object.
+ *
+ * Exceptions that occur when unlocking a monitor need to appear as
+ * if they happened at the following instruction.  See the Dalvik
+ * instruction spec.
+ */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    GET_VREG %ecx, rINST
+    movl    %ecx, OUT_ARG0(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG1(%esp)
+    call    SYMBOL(artUnlockObjectFromCode) # (object, self)
+    RESTORE_IBASE
+    testb   %al, %al
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move.S b/runtime/interpreter/mterp/x86/op_move.S
new file mode 100644
index 0000000..ea173b9
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move.S
@@ -0,0 +1,13 @@
+%default { "is_object":"0" }
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $$0xf, %al                      # eax <- A
+    shrl    $$4, rINST                      # rINST <- B
+    GET_VREG rINST, rINST
+    .if $is_object
+    SET_VREG_OBJECT rINST, %eax             # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST, %eax                    # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move_16.S b/runtime/interpreter/mterp/x86/op_move_16.S
new file mode 100644
index 0000000..454deb5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_16.S
@@ -0,0 +1,12 @@
+%default { "is_object":"0" }
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    movzwl  4(rPC), %ecx                    # ecx <- BBBB
+    movzwl  2(rPC), %eax                    # eax <- AAAA
+    GET_VREG rINST, %ecx
+    .if $is_object
+    SET_VREG_OBJECT rINST, %eax             # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST, %eax                    # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_move_exception.S b/runtime/interpreter/mterp/x86/op_move_exception.S
new file mode 100644
index 0000000..d8dc74f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_exception.S
@@ -0,0 +1,6 @@
+    /* move-exception vAA */
+    movl    rSELF, %ecx
+    movl    THREAD_EXCEPTION_OFFSET(%ecx), %eax
+    SET_VREG_OBJECT %eax, rINST             # fp[AA] <- exception object
+    movl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move_from16.S b/runtime/interpreter/mterp/x86/op_move_from16.S
new file mode 100644
index 0000000..e869855
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_from16.S
@@ -0,0 +1,12 @@
+%default { "is_object":"0" }
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    movzx   rINSTbl, %eax                   # eax <- AA
+    movw    2(rPC), rINSTw                  # rINSTw <- BBBB
+    GET_VREG rINST, rINST                   # rINST <- fp[BBBB]
+    .if $is_object
+    SET_VREG_OBJECT rINST, %eax             # fp[A] <- fp[B]
+    .else
+    SET_VREG rINST, %eax                    # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_move_object.S b/runtime/interpreter/mterp/x86/op_move_object.S
new file mode 100644
index 0000000..a6a7c90
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_object.S
@@ -0,0 +1 @@
+%include "x86/op_move.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_move_object_16.S b/runtime/interpreter/mterp/x86/op_move_object_16.S
new file mode 100644
index 0000000..e0c8527
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_object_16.S
@@ -0,0 +1 @@
+%include "x86/op_move_16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_move_object_from16.S b/runtime/interpreter/mterp/x86/op_move_object_from16.S
new file mode 100644
index 0000000..e623820
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_object_from16.S
@@ -0,0 +1 @@
+%include "x86/op_move_from16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_move_result.S b/runtime/interpreter/mterp/x86/op_move_result.S
new file mode 100644
index 0000000..f6f2129
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_result.S
@@ -0,0 +1,11 @@
+%default { "is_object":"0" }
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    movl    OFF_FP_RESULT_REGISTER(rFP), %eax    # get pointer to result JType.
+    movl    (%eax), %eax                    # r0 <- result.i.
+    .if $is_object
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <- fp[B]
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move_result_object.S b/runtime/interpreter/mterp/x86/op_move_result_object.S
new file mode 100644
index 0000000..cbf5e1d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_result_object.S
@@ -0,0 +1 @@
+%include "x86/op_move_result.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_move_result_wide.S b/runtime/interpreter/mterp/x86/op_move_result_wide.S
new file mode 100644
index 0000000..7818cce
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_result_wide.S
@@ -0,0 +1,7 @@
+    /* move-result-wide vAA */
+    movl    OFF_FP_RESULT_REGISTER(rFP), %eax    # get pointer to result JType.
+    movl    4(%eax), %ecx                   # Get high
+    movl    (%eax), %eax                    # Get low
+    SET_VREG %eax, rINST                    # v[AA+0] <- eax
+    SET_VREG_HIGH %ecx, rINST               # v[AA+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move_wide.S b/runtime/interpreter/mterp/x86/op_move_wide.S
new file mode 100644
index 0000000..79ce7b7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_wide.S
@@ -0,0 +1,8 @@
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, rINST                      # rINST <- B
+    andb    $$0xf, %cl                      # ecx <- A
+    GET_WIDE_FP_VREG %xmm0, rINST           # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0, %ecx            # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_move_wide_16.S b/runtime/interpreter/mterp/x86/op_move_wide_16.S
new file mode 100644
index 0000000..a6b8596
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_wide_16.S
@@ -0,0 +1,7 @@
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzwl  4(rPC), %ecx                    # ecx<- BBBB
+    movzwl  2(rPC), %eax                    # eax<- AAAA
+    GET_WIDE_FP_VREG %xmm0, %ecx            # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0, %eax            # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_move_wide_from16.S b/runtime/interpreter/mterp/x86/op_move_wide_from16.S
new file mode 100644
index 0000000..ec344de
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_move_wide_from16.S
@@ -0,0 +1,7 @@
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzwl  2(rPC), %ecx                    # ecx <- BBBB
+    movzbl  rINSTbl, %eax                   # eax <- AAAA
+    GET_WIDE_FP_VREG %xmm0, %ecx            # xmm0 <- v[B]
+    SET_WIDE_FP_VREG %xmm0, %eax            # v[A] <- xmm0
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_double.S b/runtime/interpreter/mterp/x86/op_mul_double.S
new file mode 100644
index 0000000..7cef4c0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_double.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"muls","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_mul_double_2addr.S b/runtime/interpreter/mterp/x86/op_mul_double_2addr.S
new file mode 100644
index 0000000..bb722b6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_double_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"muls","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_mul_float.S b/runtime/interpreter/mterp/x86/op_mul_float.S
new file mode 100644
index 0000000..1156230
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_float.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"muls","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_mul_float_2addr.S b/runtime/interpreter/mterp/x86/op_mul_float_2addr.S
new file mode 100644
index 0000000..e9316df
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_float_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"muls","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_mul_int.S b/runtime/interpreter/mterp/x86/op_mul_int.S
new file mode 100644
index 0000000..77f4659
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_int.S
@@ -0,0 +1,12 @@
+    /*
+     * 32-bit binary multiplication.
+     */
+    /* mul vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    GET_VREG %eax, %eax                     # eax <- vBB
+    mov     rIBASE, LOCAL0(%esp)
+    imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
+    mov     LOCAL0(%esp), rIBASE
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_2addr.S b/runtime/interpreter/mterp/x86/op_mul_int_2addr.S
new file mode 100644
index 0000000..da699ae
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_int_2addr.S
@@ -0,0 +1,10 @@
+    /* mul vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    GET_VREG %eax, rINST                    # eax <- vB
+    andb    $$0xf, %cl                      # ecx <- A
+    movl    rIBASE, rINST
+    imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
+    movl    rINST, rIBASE
+    SET_VREG %eax, %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_lit16.S b/runtime/interpreter/mterp/x86/op_mul_int_lit16.S
new file mode 100644
index 0000000..056f491
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_int_lit16.S
@@ -0,0 +1,12 @@
+    /* mul/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movzbl  rINSTbl, %eax                   # eax <- 000000BA
+    sarl    $$4, %eax                       # eax <- B
+    GET_VREG %eax, %eax                     # eax <- vB
+    movl    rIBASE, %ecx
+    movswl  2(rPC), rIBASE                  # rIBASE <- ssssCCCC
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    imull   rIBASE, %eax                    # trashes rIBASE/edx
+    movl    %ecx, rIBASE
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_lit8.S b/runtime/interpreter/mterp/x86/op_mul_int_lit8.S
new file mode 100644
index 0000000..59b3844
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_int_lit8.S
@@ -0,0 +1,9 @@
+    /* mul/lit8 vAA, vBB, #+CC */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movl    rIBASE, %ecx
+    GET_VREG  %eax, %eax                    # eax <- rBB
+    movsbl  3(rPC), rIBASE                  # rIBASE <- ssssssCC
+    imull   rIBASE, %eax                    # trashes rIBASE/edx
+    movl    %ecx, rIBASE
+    SET_VREG %eax, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_long.S b/runtime/interpreter/mterp/x86/op_mul_long.S
new file mode 100644
index 0000000..f35ca13
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_long.S
@@ -0,0 +1,33 @@
+/*
+ * Signed 64-bit integer multiply.
+ *
+ * We could definately use more free registers for
+ * this code.   We spill rINSTw (ebx),
+ * giving us eax, ebc, ecx and edx as computational
+ * temps.  On top of that, we'll spill edi (rFP)
+ * for use as the vB pointer and esi (rPC) for use
+ * as the vC pointer.  Yuck.
+ *
+ */
+    /* mul-long vAA, vBB, vCC */
+    movzbl  2(rPC), %eax                    # eax <- B
+    movzbl  3(rPC), %ecx                    # ecx <- C
+    mov     rPC, LOCAL0(%esp)               # save Interpreter PC
+    mov     rFP, LOCAL1(%esp)               # save FP
+    mov     rIBASE, LOCAL2(%esp)            # save rIBASE
+    leal    (rFP,%eax,4), %esi              # esi <- &v[B]
+    leal    (rFP,%ecx,4), rFP               # rFP <- &v[C]
+    movl    4(%esi), %ecx                   # ecx <- Bmsw
+    imull   (rFP), %ecx                     # ecx <- (Bmsw*Clsw)
+    movl    4(rFP), %eax                    # eax <- Cmsw
+    imull   (%esi), %eax                    # eax <- (Cmsw*Blsw)
+    addl    %eax, %ecx                      # ecx <- (Bmsw*Clsw)+(Cmsw*Blsw)
+    movl    (rFP), %eax                     # eax <- Clsw
+    mull    (%esi)                          # eax <- (Clsw*Alsw)
+    mov     LOCAL0(%esp), rPC               # restore Interpreter PC
+    mov     LOCAL1(%esp), rFP               # restore FP
+    leal    (%ecx,rIBASE), rIBASE           # full result now in rIBASE:%eax
+    SET_VREG_HIGH rIBASE, rINST             # v[B+1] <- rIBASE
+    mov     LOCAL2(%esp), rIBASE            # restore IBASE
+    SET_VREG %eax, rINST                    # v[B] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_long_2addr.S b/runtime/interpreter/mterp/x86/op_mul_long_2addr.S
new file mode 100644
index 0000000..565a57c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_mul_long_2addr.S
@@ -0,0 +1,35 @@
+/*
+ * Signed 64-bit integer multiply, 2-addr version
+ *
+ * We could definately use more free registers for
+ * this code.  We must spill %edx (rIBASE) because it
+ * is used by imul.  We'll also spill rINST (ebx),
+ * giving us eax, ebc, ecx and rIBASE as computational
+ * temps.  On top of that, we'll spill %esi (edi)
+ * for use as the vA pointer and rFP (esi) for use
+ * as the vB pointer.  Yuck.
+ */
+    /* mul-long/2addr vA, vB */
+    movzbl  rINSTbl, %eax                   # eax <- BA
+    andb    $$0xf, %al                      # eax <- A
+    CLEAR_WIDE_REF %eax                     # clear refs in advance
+    sarl    $$4, rINST                      # rINST <- B
+    mov     rPC, LOCAL0(%esp)               # save Interpreter PC
+    mov     rFP, LOCAL1(%esp)               # save FP
+    mov     rIBASE, LOCAL2(%esp)            # save rIBASE
+    leal    (rFP,%eax,4), %esi              # esi <- &v[A]
+    leal    (rFP,rINST,4), rFP              # rFP <- &v[B]
+    movl    4(%esi), %ecx                   # ecx <- Amsw
+    imull   (rFP), %ecx                     # ecx <- (Amsw*Blsw)
+    movl    4(rFP), %eax                    # eax <- Bmsw
+    imull   (%esi), %eax                    # eax <- (Bmsw*Alsw)
+    addl    %eax, %ecx                      # ecx <- (Amsw*Blsw)+(Bmsw*Alsw)
+    movl    (rFP), %eax                     # eax <- Blsw
+    mull    (%esi)                          # eax <- (Blsw*Alsw)
+    leal    (%ecx,rIBASE), rIBASE           # full result now in %edx:%eax
+    movl    rIBASE, 4(%esi)                 # v[A+1] <- rIBASE
+    movl    %eax, (%esi)                    # v[A] <- %eax
+    mov     LOCAL0(%esp), rPC               # restore Interpreter PC
+    mov     LOCAL2(%esp), rIBASE            # restore IBASE
+    mov     LOCAL1(%esp), rFP               # restore FP
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_neg_double.S b/runtime/interpreter/mterp/x86/op_neg_double.S
new file mode 100644
index 0000000..fac4322
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_neg_double.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"instr":"fchs","load":"fldl","store":"fstpl","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_neg_float.S b/runtime/interpreter/mterp/x86/op_neg_float.S
new file mode 100644
index 0000000..30f071b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_neg_float.S
@@ -0,0 +1 @@
+%include "x86/fpcvt.S" {"instr":"fchs","load":"flds","store":"fstps"}
diff --git a/runtime/interpreter/mterp/x86/op_neg_int.S b/runtime/interpreter/mterp/x86/op_neg_int.S
new file mode 100644
index 0000000..67d4d18
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_neg_int.S
@@ -0,0 +1 @@
+%include "x86/unop.S" {"instr":"negl    %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_neg_long.S b/runtime/interpreter/mterp/x86/op_neg_long.S
new file mode 100644
index 0000000..30da247
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_neg_long.S
@@ -0,0 +1,13 @@
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, %ecx                     # eax <- v[B+0]
+    GET_VREG_HIGH %ecx, %ecx                # ecx <- v[B+1]
+    negl    %eax
+    adcl    $$0, %ecx
+    negl    %ecx
+    SET_VREG %eax, rINST                    # v[A+0] <- eax
+    SET_VREG_HIGH %ecx, rINST               # v[A+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
diff --git a/runtime/interpreter/mterp/x86/op_new_array.S b/runtime/interpreter/mterp/x86/op_new_array.S
new file mode 100644
index 0000000..16226e9
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_new_array.S
@@ -0,0 +1,21 @@
+/*
+ * Allocate an array of objects, specified with the array class
+ * and a count.
+ *
+ * The verifier guarantees that this is an array class, so we don't
+ * check for it here.
+ */
+    /* new-array vA, vB, class@CCCC */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    SYMBOL(MterpNewArray)
+    RESTORE_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_new_instance.S b/runtime/interpreter/mterp/x86/op_new_instance.S
new file mode 100644
index 0000000..f976acc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_new_instance.S
@@ -0,0 +1,16 @@
+/*
+ * Create a new instance of a class.
+ */
+    /* new-instance vAA, class@BBBB */
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpNewInstance)
+    RESTORE_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_nop.S b/runtime/interpreter/mterp/x86/op_nop.S
new file mode 100644
index 0000000..4cb68e3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_nop.S
@@ -0,0 +1 @@
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_not_int.S b/runtime/interpreter/mterp/x86/op_not_int.S
new file mode 100644
index 0000000..335ab09
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_not_int.S
@@ -0,0 +1 @@
+%include "x86/unop.S" {"instr":"notl %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_not_long.S b/runtime/interpreter/mterp/x86/op_not_long.S
new file mode 100644
index 0000000..8f706e1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_not_long.S
@@ -0,0 +1,11 @@
+    /* unop vA, vB */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, %ecx                     # eax <- v[B+0]
+    GET_VREG_HIGH %ecx, %ecx                # ecx <- v[B+1]
+    notl    %eax
+    notl    %ecx
+    SET_VREG %eax, rINST                    # v[A+0] <- eax
+    SET_VREG_HIGH %ecx, rINST               # v[A+1] <- ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_or_int.S b/runtime/interpreter/mterp/x86/op_or_int.S
new file mode 100644
index 0000000..ebe2ec2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_int.S
@@ -0,0 +1 @@
+%include "x86/binop.S" {"instr":"orl     (rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_or_int_2addr.S b/runtime/interpreter/mterp/x86/op_or_int_2addr.S
new file mode 100644
index 0000000..36c17db
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/binop2addr.S" {"instr":"orl     %eax, (rFP,%ecx,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_or_int_lit16.S b/runtime/interpreter/mterp/x86/op_or_int_lit16.S
new file mode 100644
index 0000000..0a88ff59
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/binopLit16.S" {"instr":"orl     %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_or_int_lit8.S b/runtime/interpreter/mterp/x86/op_or_int_lit8.S
new file mode 100644
index 0000000..0670b67
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"orl     %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_or_long.S b/runtime/interpreter/mterp/x86/op_or_long.S
new file mode 100644
index 0000000..09ca539
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_long.S
@@ -0,0 +1 @@
+%include "x86/binopWide.S" {"instr1":"orl     (rFP,%ecx,4), rIBASE", "instr2":"orl     4(rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_or_long_2addr.S b/runtime/interpreter/mterp/x86/op_or_long_2addr.S
new file mode 100644
index 0000000..2062e81
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_or_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/binopWide2addr.S" {"instr1":"orl     %eax, (rFP,rINST,4)","instr2":"orl     %ecx, 4(rFP,rINST,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_packed_switch.S b/runtime/interpreter/mterp/x86/op_packed_switch.S
new file mode 100644
index 0000000..fcb7509
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_packed_switch.S
@@ -0,0 +1,21 @@
+%default { "func":"MterpDoPackedSwitch" }
+/*
+ * Handle a packed-switch or sparse-switch instruction.  In both cases
+ * we decode it and hand it off to a helper function.
+ *
+ * We don't really expect backward branches in a switch statement, but
+ * they're perfectly legal, so we check for them here.
+ *
+ * for: packed-switch, sparse-switch
+ */
+    /* op vAA, +BBBB */
+    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
+    GET_VREG %eax, rINST                    # eax <- vAA
+    leal    (rPC,%ecx,2), %ecx              # ecx <- PC + BBBBbbbb*2
+    movl    %eax, OUT_ARG1(%esp)            # ARG1 <- vAA
+    movl    %ecx, OUT_ARG0(%esp)            # ARG0 <- switchData
+    call    SYMBOL($func)
+    REFRESH_IBASE
+    testl   %eax, %eax
+    movl    %eax, rINST
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86/op_rem_double.S b/runtime/interpreter/mterp/x86/op_rem_double.S
new file mode 100644
index 0000000..4b52a06
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_double.S
@@ -0,0 +1,14 @@
+    /* rem_double vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx <- BB
+    movzbl  2(rPC), %eax                    # eax <- CC
+    fldl    VREG_ADDRESS(%ecx)              # %st1 <- fp[vBB]
+    fldl    VREG_ADDRESS(%eax)              # %st0 <- fp[vCC]
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstpl   VREG_ADDRESS(rINST)             # fp[vAA] <- %st
+    CLEAR_WIDE_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_rem_double_2addr.S b/runtime/interpreter/mterp/x86/op_rem_double_2addr.S
new file mode 100644
index 0000000..5a0e669
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_double_2addr.S
@@ -0,0 +1,15 @@
+    /* rem_double/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    fldl    VREG_ADDRESS(rINST)             # vB to fp stack
+    andb    $$0xf, %cl                      # ecx <- A
+    fldl    VREG_ADDRESS(%ecx)              # vA to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstpl   VREG_ADDRESS(%ecx)              # %st to vA
+    CLEAR_WIDE_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_rem_float.S b/runtime/interpreter/mterp/x86/op_rem_float.S
new file mode 100644
index 0000000..05e0bf1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_float.S
@@ -0,0 +1,14 @@
+    /* rem_float vAA, vBB, vCC */
+    movzbl  3(rPC), %ecx                    # ecx <- BB
+    movzbl  2(rPC), %eax                    # eax <- CC
+    flds    VREG_ADDRESS(%ecx)              # vBB to fp stack
+    flds    VREG_ADDRESS(%eax)              # vCC to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstps   VREG_ADDRESS(rINST)             # %st to vAA
+    CLEAR_REF rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_rem_float_2addr.S b/runtime/interpreter/mterp/x86/op_rem_float_2addr.S
new file mode 100644
index 0000000..29f84e6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_float_2addr.S
@@ -0,0 +1,15 @@
+    /* rem_float/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    flds    VREG_ADDRESS(rINST)             # vB to fp stack
+    andb    $$0xf, %cl                      # ecx <- A
+    flds    VREG_ADDRESS(%ecx)              # vA to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstps   VREG_ADDRESS(%ecx)              # %st to vA
+    CLEAR_REF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_rem_int.S b/runtime/interpreter/mterp/x86/op_rem_int.S
new file mode 100644
index 0000000..d25b93c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_int.S
@@ -0,0 +1 @@
+%include "x86/bindiv.S" {"result":"rIBASE","special":"$0","rem":"1"}
diff --git a/runtime/interpreter/mterp/x86/op_rem_int_2addr.S b/runtime/interpreter/mterp/x86/op_rem_int_2addr.S
new file mode 100644
index 0000000..c788e0e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/bindiv2addr.S" {"result":"rIBASE","special":"$0"}
diff --git a/runtime/interpreter/mterp/x86/op_rem_int_lit16.S b/runtime/interpreter/mterp/x86/op_rem_int_lit16.S
new file mode 100644
index 0000000..3df9d39
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/bindivLit16.S" {"result":"rIBASE","special":"$0"}
diff --git a/runtime/interpreter/mterp/x86/op_rem_int_lit8.S b/runtime/interpreter/mterp/x86/op_rem_int_lit8.S
new file mode 100644
index 0000000..56e19c6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/bindivLit8.S" {"result":"rIBASE","special":"$0"}
diff --git a/runtime/interpreter/mterp/x86/op_rem_long.S b/runtime/interpreter/mterp/x86/op_rem_long.S
new file mode 100644
index 0000000..0ffe1f6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_long.S
@@ -0,0 +1 @@
+%include "x86/op_div_long.S" {"routine":"art_quick_lmod"}
diff --git a/runtime/interpreter/mterp/x86/op_rem_long_2addr.S b/runtime/interpreter/mterp/x86/op_rem_long_2addr.S
new file mode 100644
index 0000000..4b97735
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rem_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/op_div_long_2addr.S" {"routine":"art_quick_lmod"}
diff --git a/runtime/interpreter/mterp/x86/op_return.S b/runtime/interpreter/mterp/x86/op_return.S
new file mode 100644
index 0000000..8e3cfad
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_return.S
@@ -0,0 +1,17 @@
+/*
+ * Return a 32-bit value.
+ *
+ * for: return, return-object
+ */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    call    SYMBOL(MterpThreadFenceForConstructor)
+    movl    rSELF, %eax
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    GET_VREG %eax, rINST                    # eax <- vAA
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
diff --git a/runtime/interpreter/mterp/x86/op_return_object.S b/runtime/interpreter/mterp/x86/op_return_object.S
new file mode 100644
index 0000000..12c84b3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_return_object.S
@@ -0,0 +1 @@
+%include "x86/op_return.S"
diff --git a/runtime/interpreter/mterp/x86/op_return_void.S b/runtime/interpreter/mterp/x86/op_return_void.S
new file mode 100644
index 0000000..a14a4f6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_return_void.S
@@ -0,0 +1,11 @@
+    .extern MterpThreadFenceForConstructor
+    call    SYMBOL(MterpThreadFenceForConstructor)
+    movl    rSELF, %eax
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    xorl    %eax, %eax
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
diff --git a/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S b/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S
new file mode 100644
index 0000000..1d0e933
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S
@@ -0,0 +1,9 @@
+    movl    rSELF, %eax
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    xorl    %eax, %eax
+    xorl    %ecx, %ecx
+    jmp     MterpReturn
diff --git a/runtime/interpreter/mterp/x86/op_return_wide.S b/runtime/interpreter/mterp/x86/op_return_wide.S
new file mode 100644
index 0000000..7d1850a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_return_wide.S
@@ -0,0 +1,15 @@
+/*
+ * Return a 64-bit value.
+ */
+    /* return-wide vAA */
+    .extern MterpThreadFenceForConstructor
+    call    SYMBOL(MterpThreadFenceForConstructor)
+    movl    rSELF, %eax
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    GET_VREG %eax, rINST                    # eax <- v[AA+0]
+    GET_VREG_HIGH %ecx, rINST               # ecx <- v[AA+1]
+    jmp     MterpReturn
diff --git a/runtime/interpreter/mterp/x86/op_rsub_int.S b/runtime/interpreter/mterp/x86/op_rsub_int.S
new file mode 100644
index 0000000..d6449c6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rsub_int.S
@@ -0,0 +1,2 @@
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+%include "x86/binopLit16.S" {"instr":"subl    %eax, %ecx","result":"%ecx"}
diff --git a/runtime/interpreter/mterp/x86/op_rsub_int_lit8.S b/runtime/interpreter/mterp/x86/op_rsub_int_lit8.S
new file mode 100644
index 0000000..15d0e35
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_rsub_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"subl    %eax, %ecx" , "result":"%ecx"}
diff --git a/runtime/interpreter/mterp/x86/op_sget.S b/runtime/interpreter/mterp/x86/op_sget.S
new file mode 100644
index 0000000..0e9a3d8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget.S
@@ -0,0 +1,26 @@
+%default { "is_object":"0", "helper":"artGet32StaticFromCode" }
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+ */
+    /* op vAA, field@BBBB */
+    .extern $helper
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    SYMBOL($helper)
+    movl    rSELF, %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    .if $is_object
+    SET_VREG_OBJECT %eax, rINST             # fp[A] <- value
+    .else
+    SET_VREG %eax, rINST                    # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sget_boolean.S b/runtime/interpreter/mterp/x86/op_sget_boolean.S
new file mode 100644
index 0000000..f058dd8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_sget.S" {"helper":"artGetBooleanStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_byte.S b/runtime/interpreter/mterp/x86/op_sget_byte.S
new file mode 100644
index 0000000..c952f40
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_byte.S
@@ -0,0 +1 @@
+%include "x86/op_sget.S" {"helper":"artGetByteStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_char.S b/runtime/interpreter/mterp/x86/op_sget_char.S
new file mode 100644
index 0000000..d7bd410
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_char.S
@@ -0,0 +1 @@
+%include "x86/op_sget.S" {"helper":"artGetCharStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_object.S b/runtime/interpreter/mterp/x86/op_sget_object.S
new file mode 100644
index 0000000..1c95f9a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_object.S
@@ -0,0 +1 @@
+%include "x86/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_short.S b/runtime/interpreter/mterp/x86/op_sget_short.S
new file mode 100644
index 0000000..6475306
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_short.S
@@ -0,0 +1 @@
+%include "x86/op_sget.S" {"helper":"artGetShortStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_wide.S b/runtime/interpreter/mterp/x86/op_sget_wide.S
new file mode 100644
index 0000000..2b60303
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sget_wide.S
@@ -0,0 +1,21 @@
+/*
+ * SGET_WIDE handler wrapper.
+ *
+ */
+    /* sget-wide vAA, field@BBBB */
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG2(%esp)            # self
+    call    SYMBOL(artGet64StaticFromCode)
+    movl    rSELF, %ecx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
+    jnz     MterpException
+    SET_VREG %eax, rINST                    # fp[A]<- low part
+    SET_VREG_HIGH %edx, rINST               # fp[A+1]<- high part
+    RESTORE_IBASE_FROM_SELF %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_shl_int.S b/runtime/interpreter/mterp/x86/op_shl_int.S
new file mode 100644
index 0000000..6a41d1c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shl_int.S
@@ -0,0 +1 @@
+%include "x86/binop1.S" {"instr":"sall    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shl_int_2addr.S b/runtime/interpreter/mterp/x86/op_shl_int_2addr.S
new file mode 100644
index 0000000..72abb8e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shl_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/shop2addr.S" {"instr":"sall    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shl_int_lit8.S b/runtime/interpreter/mterp/x86/op_shl_int_lit8.S
new file mode 100644
index 0000000..b8d6069
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shl_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"sall    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shl_long.S b/runtime/interpreter/mterp/x86/op_shl_long.S
new file mode 100644
index 0000000..aa58a93
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shl_long.S
@@ -0,0 +1,29 @@
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shl-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rINST */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE, %eax              # ecx <- v[BB+1]
+    GET_VREG %ecx, %ecx                     # ecx <- vCC
+    GET_VREG %eax, %eax                     # eax <- v[BB+0]
+    shldl   %eax,rIBASE
+    sall    %cl, %eax
+    testb   $$32, %cl
+    je      2f
+    movl    %eax, rIBASE
+    xorl    %eax, %eax
+2:
+    SET_VREG_HIGH rIBASE, rINST             # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax, rINST                    # v[AA+0] <- %eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_shl_long_2addr.S b/runtime/interpreter/mterp/x86/op_shl_long_2addr.S
new file mode 100644
index 0000000..6bbf49c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shl_long_2addr.S
@@ -0,0 +1,26 @@
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, rINST                    # eax <- v[AA+0]
+    sarl    $$4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE, rINST             # rIBASE <- v[AA+1]
+    GET_VREG %ecx, %ecx                     # ecx <- vBB
+    shldl   %eax, rIBASE
+    sall    %cl, %eax
+    testb   $$32, %cl
+    je      2f
+    movl    %eax, rIBASE
+    xorl    %eax, %eax
+2:
+    SET_VREG_HIGH rIBASE, rINST             # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax, rINST                    # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_shr_int.S b/runtime/interpreter/mterp/x86/op_shr_int.S
new file mode 100644
index 0000000..687b2c3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shr_int.S
@@ -0,0 +1 @@
+%include "x86/binop1.S" {"instr":"sarl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shr_int_2addr.S b/runtime/interpreter/mterp/x86/op_shr_int_2addr.S
new file mode 100644
index 0000000..533b0e9
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shr_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/shop2addr.S" {"instr":"sarl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shr_int_lit8.S b/runtime/interpreter/mterp/x86/op_shr_int_lit8.S
new file mode 100644
index 0000000..ebd1bea
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shr_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"sarl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_shr_long.S b/runtime/interpreter/mterp/x86/op_shr_long.S
new file mode 100644
index 0000000..68aa0ee
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shr_long.S
@@ -0,0 +1,29 @@
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shr-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE, %eax              # rIBASE<- v[BB+1]
+    GET_VREG %ecx, %ecx                     # ecx <- vCC
+    GET_VREG %eax, %eax                     # eax <- v[BB+0]
+    shrdl   rIBASE, %eax
+    sarl    %cl, rIBASE
+    testb   $$32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    sarl    $$31, rIBASE
+2:
+    SET_VREG_HIGH rIBASE, rINST             # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax, rINST                    # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_shr_long_2addr.S b/runtime/interpreter/mterp/x86/op_shr_long_2addr.S
new file mode 100644
index 0000000..148bd1b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_shr_long_2addr.S
@@ -0,0 +1,26 @@
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, rINST                    # eax <- v[AA+0]
+    sarl    $$4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE, rINST             # rIBASE <- v[AA+1]
+    GET_VREG %ecx, %ecx                     # ecx <- vBB
+    shrdl   rIBASE, %eax
+    sarl    %cl, rIBASE
+    testb   $$32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    sarl    $$31, rIBASE
+2:
+    SET_VREG_HIGH rIBASE, rINST             # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax, rINST                    # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_sparse_switch.S b/runtime/interpreter/mterp/x86/op_sparse_switch.S
new file mode 100644
index 0000000..fdaec47
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sparse_switch.S
@@ -0,0 +1 @@
+%include "x86/op_packed_switch.S" { "func":"MterpDoSparseSwitch" }
diff --git a/runtime/interpreter/mterp/x86/op_sput.S b/runtime/interpreter/mterp/x86/op_sput.S
new file mode 100644
index 0000000..0b5de09
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput.S
@@ -0,0 +1,22 @@
+%default { "helper":"artSet32StaticFromCode"}
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern $helper
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    GET_VREG rINST, rINST
+    movl    rINST, OUT_ARG1(%esp)           # fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL($helper)
+    testb   %al, %al
+    jnz     MterpException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sput_boolean.S b/runtime/interpreter/mterp/x86/op_sput_boolean.S
new file mode 100644
index 0000000..63601bd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_boolean.S
@@ -0,0 +1 @@
+%include "x86/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_byte.S b/runtime/interpreter/mterp/x86/op_sput_byte.S
new file mode 100644
index 0000000..63601bd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_byte.S
@@ -0,0 +1 @@
+%include "x86/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_char.S b/runtime/interpreter/mterp/x86/op_sput_char.S
new file mode 100644
index 0000000..1749f7c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_char.S
@@ -0,0 +1 @@
+%include "x86/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_object.S b/runtime/interpreter/mterp/x86/op_sput_object.S
new file mode 100644
index 0000000..0db5177
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_object.S
@@ -0,0 +1,13 @@
+    EXPORT_PC
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    movl    rPC, OUT_ARG1(%esp)
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_ARG2(%esp)
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)
+    call    SYMBOL(MterpSputObject)
+    testb   %al, %al
+    jz      MterpException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sput_short.S b/runtime/interpreter/mterp/x86/op_sput_short.S
new file mode 100644
index 0000000..1749f7c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_short.S
@@ -0,0 +1 @@
+%include "x86/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_wide.S b/runtime/interpreter/mterp/x86/op_sput_wide.S
new file mode 100644
index 0000000..19cff0d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sput_wide.S
@@ -0,0 +1,20 @@
+/*
+ * SPUT_WIDE handler wrapper.
+ *
+ */
+    /* sput-wide vAA, field@BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    movzwl  2(rPC), %eax
+    movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)            # referrer
+    leal    VREG_ADDRESS(rINST), %eax
+    movl    %eax, OUT_ARG2(%esp)            # &fp[AA]
+    movl    rSELF, %ecx
+    movl    %ecx, OUT_ARG3(%esp)            # self
+    call    SYMBOL(artSet64IndirectStaticFromMterp)
+    testb   %al, %al
+    jnz     MterpException
+    RESTORE_IBASE
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sub_double.S b/runtime/interpreter/mterp/x86/op_sub_double.S
new file mode 100644
index 0000000..e83afeb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_double.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"subs","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_double_2addr.S b/runtime/interpreter/mterp/x86/op_sub_double_2addr.S
new file mode 100644
index 0000000..af9a2ab
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_double_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"subs","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_float.S b/runtime/interpreter/mterp/x86/op_sub_float.S
new file mode 100644
index 0000000..423d834
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_float.S
@@ -0,0 +1 @@
+%include "x86/sseBinop.S" {"instr":"subs","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_float_2addr.S b/runtime/interpreter/mterp/x86/op_sub_float_2addr.S
new file mode 100644
index 0000000..18de000
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_float_2addr.S
@@ -0,0 +1 @@
+%include "x86/sseBinop2Addr.S" {"instr":"subs","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_int.S b/runtime/interpreter/mterp/x86/op_sub_int.S
new file mode 100644
index 0000000..7fe03fb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_int.S
@@ -0,0 +1 @@
+%include "x86/binop.S" {"instr":"subl    (rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_int_2addr.S b/runtime/interpreter/mterp/x86/op_sub_int_2addr.S
new file mode 100644
index 0000000..cc9bf60
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/binop2addr.S" {"instr":"subl    %eax, (rFP,%ecx,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_long.S b/runtime/interpreter/mterp/x86/op_sub_long.S
new file mode 100644
index 0000000..014591e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_long.S
@@ -0,0 +1 @@
+%include "x86/binopWide.S" {"instr1":"subl    (rFP,%ecx,4), rIBASE", "instr2":"sbbl    4(rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_sub_long_2addr.S b/runtime/interpreter/mterp/x86/op_sub_long_2addr.S
new file mode 100644
index 0000000..7498029
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_sub_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/binopWide2addr.S" {"instr1":"subl    %eax, (rFP,rINST,4)","instr2":"sbbl    %ecx, 4(rFP,rINST,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_throw.S b/runtime/interpreter/mterp/x86/op_throw.S
new file mode 100644
index 0000000..a6e6b1e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_throw.S
@@ -0,0 +1,11 @@
+/*
+ * Throw an exception object in the current thread.
+ */
+    /* throw vAA */
+    EXPORT_PC
+    GET_VREG %eax, rINST                    # eax<- vAA (exception object)
+    testl   %eax, %eax
+    jz      common_errNullObject
+    movl    rSELF,%ecx
+    movl    %eax, THREAD_EXCEPTION_OFFSET(%ecx)
+    jmp     MterpException
diff --git a/runtime/interpreter/mterp/x86/op_unused_3e.S b/runtime/interpreter/mterp/x86/op_unused_3e.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_3e.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_3f.S b/runtime/interpreter/mterp/x86/op_unused_3f.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_3f.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_40.S b/runtime/interpreter/mterp/x86/op_unused_40.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_40.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_41.S b/runtime/interpreter/mterp/x86/op_unused_41.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_41.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_42.S b/runtime/interpreter/mterp/x86/op_unused_42.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_42.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_43.S b/runtime/interpreter/mterp/x86/op_unused_43.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_43.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_79.S b/runtime/interpreter/mterp/x86/op_unused_79.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_79.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_7a.S b/runtime/interpreter/mterp/x86/op_unused_7a.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_7a.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f3.S b/runtime/interpreter/mterp/x86/op_unused_f3.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f3.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f4.S b/runtime/interpreter/mterp/x86/op_unused_f4.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f4.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f5.S b/runtime/interpreter/mterp/x86/op_unused_f5.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f5.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f6.S b/runtime/interpreter/mterp/x86/op_unused_f6.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f6.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f7.S b/runtime/interpreter/mterp/x86/op_unused_f7.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f7.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f8.S b/runtime/interpreter/mterp/x86/op_unused_f8.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f8.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f9.S b/runtime/interpreter/mterp/x86/op_unused_f9.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f9.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_fa.S b/runtime/interpreter/mterp/x86/op_unused_fa.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_fa.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_fb.S b/runtime/interpreter/mterp/x86/op_unused_fb.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_fb.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_fc.S b/runtime/interpreter/mterp/x86/op_unused_fc.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_fc.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_fd.S b/runtime/interpreter/mterp/x86/op_unused_fd.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_fd.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_fe.S b/runtime/interpreter/mterp/x86/op_unused_fe.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_fe.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_ff.S b/runtime/interpreter/mterp/x86/op_unused_ff.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_ff.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_ushr_int.S b/runtime/interpreter/mterp/x86/op_ushr_int.S
new file mode 100644
index 0000000..dfe25ff
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_ushr_int.S
@@ -0,0 +1 @@
+%include "x86/binop1.S" {"instr":"shrl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_ushr_int_2addr.S b/runtime/interpreter/mterp/x86/op_ushr_int_2addr.S
new file mode 100644
index 0000000..c14bc98
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_ushr_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/shop2addr.S" {"instr":"shrl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_ushr_int_lit8.S b/runtime/interpreter/mterp/x86/op_ushr_int_lit8.S
new file mode 100644
index 0000000..e129f6b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_ushr_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"shrl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_ushr_long.S b/runtime/interpreter/mterp/x86/op_ushr_long.S
new file mode 100644
index 0000000..9527c9c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_ushr_long.S
@@ -0,0 +1,29 @@
+/*
+ * Long integer shift.  This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.  x86 shifts automatically mask off
+ * the low 5 bits of %cl, so have to handle the 64 > shiftcount > 31
+ * case specially.
+ */
+    /* shr-long vAA, vBB, vCC */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  2(rPC), %eax                    # eax <- BB
+    movzbl  3(rPC), %ecx                    # ecx <- CC
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE, %eax              # rIBASE <- v[BB+1]
+    GET_VREG %ecx, %ecx                     # ecx <- vCC
+    GET_VREG %eax, %eax                     # eax <- v[BB+0]
+    shrdl   rIBASE, %eax
+    shrl    %cl, rIBASE
+    testb   $$32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    xorl    rIBASE, rIBASE
+2:
+    SET_VREG_HIGH rIBASE, rINST             # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax, rINST                    # v[BB+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_ushr_long_2addr.S b/runtime/interpreter/mterp/x86/op_ushr_long_2addr.S
new file mode 100644
index 0000000..72fcc36
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_ushr_long_2addr.S
@@ -0,0 +1,26 @@
+/*
+ * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+    /* shl-long/2addr vA, vB */
+    /* ecx gets shift count */
+    /* Need to spill rIBASE */
+    /* rINSTw gets AA */
+    movzbl  rINSTbl, %ecx                   # ecx <- BA
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, rINST                    # eax <- v[AA+0]
+    sarl    $$4, %ecx                       # ecx <- B
+    movl    rIBASE, LOCAL0(%esp)
+    GET_VREG_HIGH rIBASE, rINST             # rIBASE <- v[AA+1]
+    GET_VREG %ecx, %ecx                     # ecx <- vBB
+    shrdl   rIBASE, %eax
+    shrl    %cl, rIBASE
+    testb   $$32, %cl
+    je      2f
+    movl    rIBASE, %eax
+    xorl    rIBASE, rIBASE
+2:
+    SET_VREG_HIGH rIBASE, rINST             # v[AA+1] <- rIBASE
+    movl    LOCAL0(%esp), rIBASE
+    SET_VREG %eax, rINST                    # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_xor_int.S b/runtime/interpreter/mterp/x86/op_xor_int.S
new file mode 100644
index 0000000..35aca6a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_int.S
@@ -0,0 +1 @@
+%include "x86/binop.S" {"instr":"xorl    (rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_xor_int_2addr.S b/runtime/interpreter/mterp/x86/op_xor_int_2addr.S
new file mode 100644
index 0000000..d7b70e2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_int_2addr.S
@@ -0,0 +1 @@
+%include "x86/binop2addr.S" {"instr":"xorl    %eax, (rFP,%ecx,4)"}
diff --git a/runtime/interpreter/mterp/x86/op_xor_int_lit16.S b/runtime/interpreter/mterp/x86/op_xor_int_lit16.S
new file mode 100644
index 0000000..115f0a0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_int_lit16.S
@@ -0,0 +1 @@
+%include "x86/binopLit16.S" {"instr":"xorl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_xor_int_lit8.S b/runtime/interpreter/mterp/x86/op_xor_int_lit8.S
new file mode 100644
index 0000000..243971c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_int_lit8.S
@@ -0,0 +1 @@
+%include "x86/binopLit8.S" {"instr":"xorl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_xor_long.S b/runtime/interpreter/mterp/x86/op_xor_long.S
new file mode 100644
index 0000000..0d3c0f5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_long.S
@@ -0,0 +1 @@
+%include "x86/binopWide.S" {"instr1":"xorl    (rFP,%ecx,4), rIBASE", "instr2":"xorl    4(rFP,%ecx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86/op_xor_long_2addr.S b/runtime/interpreter/mterp/x86/op_xor_long_2addr.S
new file mode 100644
index 0000000..b5000e4
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_xor_long_2addr.S
@@ -0,0 +1 @@
+%include "x86/binopWide2addr.S" {"instr1":"xorl    %eax, (rFP,rINST,4)","instr2":"xorl    %ecx, 4(rFP,rINST,4)"}
diff --git a/runtime/interpreter/mterp/x86/shop2addr.S b/runtime/interpreter/mterp/x86/shop2addr.S
new file mode 100644
index 0000000..96c9954
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/shop2addr.S
@@ -0,0 +1,13 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movzx   rINSTbl, %ecx                   # eax <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %ecx                     # eax <- vBB
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG %eax, rINST                    # eax <- vAA
+    $instr                                  # ex: sarl %cl, %eax
+    SET_VREG $result, rINST
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/sseBinop.S b/runtime/interpreter/mterp/x86/sseBinop.S
new file mode 100644
index 0000000..63a1e21
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/sseBinop.S
@@ -0,0 +1,9 @@
+%default {"instr":"","suff":""}
+    movzbl  2(rPC), %ecx                    # ecx <- BB
+    movzbl  3(rPC), %eax                    # eax <- CC
+    movs${suff}   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    ${instr}${suff} VREG_ADDRESS(%eax), %xmm0
+    movs${suff}   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movs${suff}   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/sseBinop2Addr.S b/runtime/interpreter/mterp/x86/sseBinop2Addr.S
new file mode 100644
index 0000000..d157e67
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/sseBinop2Addr.S
@@ -0,0 +1,10 @@
+%default {"instr":"","suff":""}
+    movzx   rINSTbl, %ecx                   # ecx <- A+
+    andl    $$0xf, %ecx                     # ecx <- A
+    movs${suff} VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    sarl    $$4, rINST                      # rINST<- B
+    ${instr}${suff} VREG_ADDRESS(rINST), %xmm0
+    movs${suff} %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movs${suff} %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/unop.S b/runtime/interpreter/mterp/x86/unop.S
new file mode 100644
index 0000000..db09fc0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/unop.S
@@ -0,0 +1,13 @@
+%default {"instr":""}
+/*
+ * Generic 32-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movzbl  rINSTbl,%ecx                    # ecx <- A+
+    sarl    $$4,rINST                       # rINST <- B
+    GET_VREG %eax, rINST                    # eax <- vB
+    andb    $$0xf,%cl                       # ecx <- A
+    $instr
+    SET_VREG %eax, %ecx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/unused.S b/runtime/interpreter/mterp/x86/unused.S
new file mode 100644
index 0000000..c95ef94
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/unused.S
@@ -0,0 +1,4 @@
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
diff --git a/runtime/interpreter/mterp/x86/zcmp.S b/runtime/interpreter/mterp/x86/zcmp.S
new file mode 100644
index 0000000..c116159
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/zcmp.S
@@ -0,0 +1,17 @@
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $$0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    j${revcmp}   1f
+    movswl  2(rPC), rINST                   # fetch signed displacement
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
+1:
+    cmpw    $$JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/alt_stub.S b/runtime/interpreter/mterp/x86_64/alt_stub.S
new file mode 100644
index 0000000..24cd1a8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/alt_stub.S
@@ -0,0 +1,17 @@
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Unlike the Arm handler, we can't do this as a tail call
+ * because rIBASE is caller save and we need to reload it.
+ *
+ * Note that unlike in the Arm implementation, we should never arrive
+ * here with a zero breakFlag because we always refresh rIBASE on
+ * return.
+ */
+    .extern MterpCheckBefore
+    REFRESH_IBASE
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
+    jmp     .L_op_nop+(${opnum}*${handler_size_bytes})
diff --git a/runtime/interpreter/mterp/x86_64/bincmp.S b/runtime/interpreter/mterp/x86_64/bincmp.S
new file mode 100644
index 0000000..6601483
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/bincmp.S
@@ -0,0 +1,21 @@
+/*
+ * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+    /* if-cmp vA, vB, +CCCC */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    andb    $$0xf, %cl                      # rcx <- A
+    GET_VREG %eax, %rcx                     # eax <- vA
+    cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
+    j${revcmp}   1f
+    movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+1:
+    cmpl    $$JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/bindiv.S b/runtime/interpreter/mterp/x86_64/bindiv.S
new file mode 100644
index 0000000..e10d1dc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/bindiv.S
@@ -0,0 +1,34 @@
+%default {"result":"","second":"","wide":"","suffix":"","rem":"0","ext":"cdq"}
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op1=-1.
+ */
+    /* div/rem vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    .if $wide
+    GET_WIDE_VREG %rax, %rax                # eax <- vBB
+    GET_WIDE_VREG $second, %rcx             # ecx <- vCC
+    .else
+    GET_VREG %eax, %rax                     # eax <- vBB
+    GET_VREG $second, %rcx                  # ecx <- vCC
+    .endif
+    test${suffix}   $second, $second
+    jz      common_errDivideByZero
+    cmp${suffix}  $$-1, $second
+    je      2f
+    $ext                                    # rdx:rax <- sign-extended of rax
+    idiv${suffix}   $second
+1:
+    .if $wide
+    SET_WIDE_VREG $result, rINSTq           # eax <- vBB
+    .else
+    SET_VREG $result, rINSTq                # eax <- vBB
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+2:
+    .if $rem
+    xor${suffix} $result, $result
+    .else
+    neg${suffix} $result
+    .endif
+    jmp     1b
diff --git a/runtime/interpreter/mterp/x86_64/bindiv2addr.S b/runtime/interpreter/mterp/x86_64/bindiv2addr.S
new file mode 100644
index 0000000..8b9bc95
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/bindiv2addr.S
@@ -0,0 +1,35 @@
+%default {"result":"","second":"","wide":"","suffix":"","rem":"0","ext":"cdq"}
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op1=-1.
+ */
+    /* div/rem/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- BA
+    sarl    $$4, %ecx                       # rcx <- B
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    .if $wide
+    GET_WIDE_VREG %rax, rINSTq              # eax <- vA
+    GET_WIDE_VREG $second, %rcx             # ecx <- vB
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vA
+    GET_VREG $second, %rcx                  # ecx <- vB
+    .endif
+    test${suffix}   $second, $second
+    jz      common_errDivideByZero
+    cmp${suffix}  $$-1, $second
+    je      2f
+    $ext                                    # rdx:rax <- sign-extended of rax
+    idiv${suffix}   $second
+1:
+    .if $wide
+    SET_WIDE_VREG $result, rINSTq           # vA <- result
+    .else
+    SET_VREG $result, rINSTq                # vA <- result
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+2:
+    .if $rem
+    xor${suffix} $result, $result
+    .else
+    neg${suffix} $result
+    .endif
+    jmp     1b
diff --git a/runtime/interpreter/mterp/x86_64/bindivLit16.S b/runtime/interpreter/mterp/x86_64/bindivLit16.S
new file mode 100644
index 0000000..80dbce2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/bindivLit16.S
@@ -0,0 +1,27 @@
+%default {"result":"","rem":"0"}
+/*
+ * 32-bit binary div/rem operation.  Handles special case of op1=-1.
+ */
+    /* div/rem/lit16 vA, vB, #+CCCC */
+    /* Need A in rINST, ssssCCCC in ecx, vB in eax */
+    movl    rINST, %eax                     # rax <- 000000BA
+    sarl    $$4, %eax                       # eax <- B
+    GET_VREG %eax, %rax                     # eax <- vB
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    testl   %ecx, %ecx
+    jz      common_errDivideByZero
+    cmpl    $$-1, %ecx
+    je      2f
+    cdq                                     # rax <- sign-extended of eax
+    idivl   %ecx
+1:
+    SET_VREG $result, rINSTq                # vA <- result
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+2:
+    .if $rem
+    xorl    $result, $result
+    .else
+    negl    $result
+    .endif
+    jmp     1b
diff --git a/runtime/interpreter/mterp/x86_64/bindivLit8.S b/runtime/interpreter/mterp/x86_64/bindivLit8.S
new file mode 100644
index 0000000..ab535f3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/bindivLit8.S
@@ -0,0 +1,25 @@
+%default {"result":"","rem":"0"}
+/*
+ * 32-bit div/rem "lit8" binary operation.  Handles special case of
+ * op0=minint & op1=-1
+ */
+    /* div/rem/lit8 vAA, vBB, #+CC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    GET_VREG  %eax, %rax                    # eax <- rBB
+    testl   %ecx, %ecx
+    je      common_errDivideByZero
+    cmpl    $$-1, %ecx
+    je      2f
+    cdq                                     # rax <- sign-extended of eax
+    idivl   %ecx
+1:
+    SET_VREG $result, rINSTq                # vA <- result
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+2:
+    .if $rem
+    xorl    $result, $result
+    .else
+    negl    $result
+    .endif
+    jmp     1b
diff --git a/runtime/interpreter/mterp/x86_64/binop.S b/runtime/interpreter/mterp/x86_64/binop.S
new file mode 100644
index 0000000..962dd61
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/binop.S
@@ -0,0 +1,17 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit binary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = eax op (rFP,%ecx,4)".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int, sub-int, and-int, or-int,
+ *      xor-int, shl-int, shr-int, ushr-int
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB
+    $instr                                  # ex: addl    (rFP,%rcx,4),%eax
+    SET_VREG $result, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/binop1.S b/runtime/interpreter/mterp/x86_64/binop1.S
new file mode 100644
index 0000000..bdd5732
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/binop1.S
@@ -0,0 +1,19 @@
+%default {"wide":"0"}
+/*
+ * Generic 32-bit binary operation in which both operands loaded to
+ * registers (op0 in eax, op1 in ecx).
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_VREG %ecx, %rcx                     # eax <- vCC
+    .if $wide
+    GET_WIDE_VREG %rax, %rax                # rax <- vBB
+    $instr                                  # ex: addl    %ecx,%eax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    GET_VREG %eax, %rax                     # eax <- vBB
+    $instr                                  # ex: addl    %ecx,%eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/binop2addr.S b/runtime/interpreter/mterp/x86_64/binop2addr.S
new file mode 100644
index 0000000..4448a81
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/binop2addr.S
@@ -0,0 +1,19 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an instruction or a function call.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+    /* binop/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    andb    $$0xf, %cl                      # ecx <- A
+    GET_VREG %eax, rINSTq                   # eax <- vB
+    $instr                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    CLEAR_REF %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/binopLit16.S b/runtime/interpreter/mterp/x86_64/binopLit16.S
new file mode 100644
index 0000000..de43b53
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/binopLit16.S
@@ -0,0 +1,19 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than eax, you can override "result".)
+ *
+ * For: add-int/lit16, rsub-int,
+ *      and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+    /* binop/lit16 vA, vB, #+CCCC */
+    movl    rINST, %eax                     # rax <- 000000BA
+    sarl    $$4, %eax                       # eax <- B
+    GET_VREG %eax, %rax                     # eax <- vB
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    $instr                                  # for example: addl %ecx, %eax
+    SET_VREG $result, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/binopLit8.S b/runtime/interpreter/mterp/x86_64/binopLit8.S
new file mode 100644
index 0000000..995002b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/binopLit8.S
@@ -0,0 +1,18 @@
+%default {"result":"%eax"}
+/*
+ * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+ * that specifies an instruction that performs "result = eax op ecx".
+ * This could be an x86 instruction or a function call.  (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * For: add-int/lit8, rsub-int/lit8
+ *      and-int/lit8, or-int/lit8, xor-int/lit8,
+ *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+    /* binop/lit8 vAA, vBB, #+CC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movsbl  3(rPC), %ecx                    # rcx <- ssssssCC
+    GET_VREG %eax, %rax                     # eax <- rBB
+    $instr                                  # ex: addl %ecx,%eax
+    SET_VREG $result, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/binopWide.S b/runtime/interpreter/mterp/x86_64/binopWide.S
new file mode 100644
index 0000000..f92f18e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/binopWide.S
@@ -0,0 +1,10 @@
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_WIDE_VREG %rax, %rax                # rax <- v[BB]
+    $instr                                  # ex: addq   (rFP,%rcx,4),%rax
+    SET_WIDE_VREG %rax, rINSTq              # v[AA] <- rax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/binopWide2addr.S b/runtime/interpreter/mterp/x86_64/binopWide2addr.S
new file mode 100644
index 0000000..d9e6cfb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/binopWide2addr.S
@@ -0,0 +1,11 @@
+/*
+ * Generic 64-bit binary operation.
+ */
+    /* binop/2addr vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    andb    $$0xf, %cl                      # ecx <- A
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vB
+    $instr                                  # for ex: addq   %rax,(rFP,%rcx,4)
+    CLEAR_WIDE_REF %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/cvtfp_int.S b/runtime/interpreter/mterp/x86_64/cvtfp_int.S
new file mode 100644
index 0000000..1472bd2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/cvtfp_int.S
@@ -0,0 +1,27 @@
+%default {"fp_suffix":"","i_suffix":"","max_const":"","result_reg":"","wide":""}
+/* On fp to int conversions, Java requires that
+ * if the result > maxint, it should be clamped to maxint.  If it is less
+ * than minint, it should be clamped to minint.  If it is a nan, the result
+ * should be zero.  Further, the rounding mode is to truncate.
+ */
+    /* float/double to int/long vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    andb    $$0xf, %cl                      # ecx <- A
+    movs${fp_suffix}   VREG_ADDRESS(rINSTq), %xmm0
+    mov${i_suffix}  ${max_const}, ${result_reg}
+    cvtsi2s${fp_suffix}${i_suffix} ${result_reg}, %xmm1
+    comis${fp_suffix}    %xmm1, %xmm0
+    jae     1f
+    jp      2f
+    cvtts${fp_suffix}2si${i_suffix}  %xmm0, ${result_reg}
+    jmp     1f
+2:
+    xor${i_suffix}    ${result_reg}, ${result_reg}
+1:
+    .if $wide
+    SET_WIDE_VREG ${result_reg}, %rcx
+    .else
+    SET_VREG ${result_reg}, %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/entry.S b/runtime/interpreter/mterp/x86_64/entry.S
new file mode 100644
index 0000000..d992956
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/entry.S
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Interpreter entry point.
+ */
+
+    .text
+    .global SYMBOL(ExecuteMterpImpl)
+    FUNCTION_TYPE(ExecuteMterpImpl)
+
+/*
+ * On entry:
+ *  0  Thread* self
+ *  1  code_item
+ *  2  ShadowFrame
+ *  3  JValue* result_register
+ *
+ */
+
+SYMBOL(ExecuteMterpImpl):
+    .cfi_startproc
+    .cfi_def_cfa rsp, 8
+
+    /* Spill callee save regs */
+    PUSH %rbx
+    PUSH %rbp
+    PUSH %r12
+    PUSH %r13
+    PUSH %r14
+    PUSH %r15
+
+    /* Allocate frame */
+    subq    $$FRAME_SIZE, %rsp
+    .cfi_adjust_cfa_offset FRAME_SIZE
+
+    /* Remember the return register */
+    movq    IN_ARG3, SHADOWFRAME_RESULT_REGISTER_OFFSET(IN_ARG2)
+
+    /* Remember the code_item */
+    movq    IN_ARG1, SHADOWFRAME_CODE_ITEM_OFFSET(IN_ARG2)
+
+    /* set up "named" registers */
+    movl    SHADOWFRAME_NUMBER_OF_VREGS_OFFSET(IN_ARG2), %eax
+    leaq    SHADOWFRAME_VREGS_OFFSET(IN_ARG2), rFP
+    leaq    (rFP, %rax, 4), rREFS
+    movl    SHADOWFRAME_DEX_PC_OFFSET(IN_ARG2), %eax
+    leaq    CODEITEM_INSNS_OFFSET(IN_ARG1), rPC
+    leaq    (rPC, %rax, 2), rPC
+    EXPORT_PC
+
+    /* Starting ibase */
+    movq    IN_ARG0, rSELF
+    REFRESH_IBASE
+
+    /* Set up for backwards branches & osr profiling */
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpSetUpHotnessCountdown)
+    movswl  %ax, rPROFILE
+
+    /* start executing the instruction at rPC */
+    FETCH_INST
+    GOTO_NEXT
+    /* NOTE: no fallthrough */
diff --git a/runtime/interpreter/mterp/x86_64/fallback.S b/runtime/interpreter/mterp/x86_64/fallback.S
new file mode 100644
index 0000000..8d61166
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/fallback.S
@@ -0,0 +1,3 @@
+/* Transfer stub to alternate interpreter */
+    jmp     MterpFallback
+
diff --git a/runtime/interpreter/mterp/x86_64/footer.S b/runtime/interpreter/mterp/x86_64/footer.S
new file mode 100644
index 0000000..f78f163
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/footer.S
@@ -0,0 +1,298 @@
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpLogDivideByZeroException)
+#endif
+    jmp     MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpLogArrayIndexException)
+#endif
+    jmp     MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpLogNegativeArraySizeException)
+#endif
+    jmp     MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpLogNoSuchMethodException)
+#endif
+    jmp     MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpLogNullObjectException)
+#endif
+    jmp     MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpLogExceptionThrownException)
+#endif
+    jmp     MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movl    THREAD_FLAGS_OFFSET(OUT_ARG0), OUT_32_ARG2
+    call    SYMBOL(MterpLogSuspendFallback)
+#endif
+    jmp     MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    movq    rSELF, %rcx
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jz      MterpFallback
+    /* intentional fallthrough - handle pending exception. */
+
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpHandleException)
+    testb   %al, %al
+    jz      MterpExceptionReturn
+    movq    OFF_FP_CODE_ITEM(rFP), %rax
+    mov     OFF_FP_DEX_PC(rFP), %ecx
+    leaq    CODEITEM_INSNS_OFFSET(%rax), rPC
+    leaq    (rPC, %rcx, 2), rPC
+    movq    rPC, OFF_FP_DEX_PC_PTR(rFP)
+    /* Do we need to switch interpreters? */
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    /* resume execution at catch block */
+    REFRESH_IBASE
+    FETCH_INST
+    GOTO_NEXT
+    /* NOTE: no fallthrough */
+
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
+ */
+MterpCommonTakenBranch:
+    jg      .L_forward_branch               # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    cmpl    $$JIT_CHECK_OSR, rPROFILE
+    je      .L_osr_check
+    decl    rPROFILE
+    je      .L_add_batch                    # counted down to zero - report
+.L_resume_backward_branch:
+    movq    rSELF, %rax
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%rax)
+    REFRESH_IBASE
+    leaq    (rPC, rINSTq, 2), rPC
+    FETCH_INST
+    jnz     .L_suspend_request_pending
+    GOTO_NEXT
+
+.L_suspend_request_pending:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    call    SYMBOL(MterpSuspendCheck)       # (self)
+    testb   %al, %al
+    jnz     MterpFallback
+    REFRESH_IBASE                           # might have changed during suspend
+    GOTO_NEXT
+
+.L_no_count_backwards:
+    cmpl    $$JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    jne     .L_resume_backward_branch
+.L_osr_check:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rINSTq, OUT_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jz      .L_resume_backward_branch
+    jmp     MterpOnStackReplacement
+
+.L_forward_branch:
+    cmpl    $$JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    je      .L_check_osr_forward
+.L_resume_forward_branch:
+    leaq    (rPC, rINSTq, 2), rPC
+    FETCH_INST
+    GOTO_NEXT
+
+.L_check_osr_forward:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rINSTq, OUT_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jz      .L_resume_forward_branch
+    jmp     MterpOnStackReplacement
+
+.L_add_batch:
+    movl    rPROFILE, %eax
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movw    %ax, OFF_FP_COUNTDOWN_OFFSET(rFP)
+    movq    rSELF, OUT_ARG2
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    movswl  %ax, rPROFILE
+    jmp     .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movl    $$2, OUT_32_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jnz     MterpOnStackReplacement
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movl    rINST, OUT_32_ARG2
+    call    SYMBOL(MterpLogOSR)
+#endif
+    movl    $$1, %eax
+    jmp     MterpDone
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpLogFallback)
+#endif
+MterpCommonFallback:
+    xorl    %eax, %eax
+    jmp     MterpDone
+
+/*
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    movl    $$1, %eax
+    jmp     MterpDone
+MterpReturn:
+    movq    OFF_FP_RESULT_REGISTER(rFP), %rdx
+    movq    %rax, (%rdx)
+    movl    $$1, %eax
+MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    testl   rPROFILE, rPROFILE
+    jle     MRestoreFrame                   # if > 0, we may have some counts to report.
+
+    movl    %eax, rINST                     # stash return value
+    /* Report cached hotness counts */
+    movl    rPROFILE, %eax
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movw    %ax, OFF_FP_COUNTDOWN_OFFSET(rFP)
+    movq    rSELF, OUT_ARG2
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    movl    rINST, %eax                     # restore return value
+
+    /* pop up frame */
+MRestoreFrame:
+    addq    $$FRAME_SIZE, %rsp
+    .cfi_adjust_cfa_offset -FRAME_SIZE
+
+    /* Restore callee save register */
+    POP %r15
+    POP %r14
+    POP %r13
+    POP %r12
+    POP %rbp
+    POP %rbx
+    ret
+    .cfi_endproc
+    SIZE(ExecuteMterpImpl,ExecuteMterpImpl)
diff --git a/runtime/interpreter/mterp/x86_64/fpcmp.S b/runtime/interpreter/mterp/x86_64/fpcmp.S
new file mode 100644
index 0000000..806bc2b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/fpcmp.S
@@ -0,0 +1,35 @@
+%default {"suff":"d","nanval":"pos"}
+/*
+ * Compare two floating-point values.  Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ *     if (x == y) {
+ *         return 0;
+ *     } else if (x < y) {
+ *         return -1;
+ *     } else if (x > y) {
+ *         return 1;
+ *     } else {
+ *         return nanval ? 1 : -1;
+ *     }
+ * }
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  3(rPC), %rcx                    # ecx<- CC
+    movzbq  2(rPC), %rax                    # eax<- BB
+    movs${suff} VREG_ADDRESS(%rax), %xmm0
+    xor     %eax, %eax
+    ucomis${suff} VREG_ADDRESS(%rcx), %xmm0
+    jp      .L${opcode}_nan_is_${nanval}
+    je      .L${opcode}_finish
+    jb      .L${opcode}_less
+.L${opcode}_nan_is_pos:
+    addb    $$1, %al
+    jmp     .L${opcode}_finish
+.L${opcode}_nan_is_neg:
+.L${opcode}_less:
+    movl    $$-1, %eax
+.L${opcode}_finish:
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/fpcvt.S b/runtime/interpreter/mterp/x86_64/fpcvt.S
new file mode 100644
index 0000000..657869e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/fpcvt.S
@@ -0,0 +1,17 @@
+%default {"source_suffix":"","dest_suffix":"","wide":""}
+/*
+ * Generic 32-bit FP conversion operation.
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    andb    $$0xf, %cl                      # ecx <- A
+    cvts${source_suffix}2s${dest_suffix}    VREG_ADDRESS(rINSTq), %xmm0
+    .if $wide
+    movsd   %xmm0, VREG_ADDRESS(%rcx)
+    CLEAR_WIDE_REF %rcx
+    .else
+    movss   %xmm0, VREG_ADDRESS(%rcx)
+    CLEAR_REF %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/header.S b/runtime/interpreter/mterp/x86_64/header.S
new file mode 100644
index 0000000..7699fc4
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/header.S
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+/*
+x86_64 ABI general notes:
+
+Caller save set:
+   rax, rdx, rcx, rsi, rdi, r8-r11, st(0)-st(7)
+Callee save set:
+   rbx, rbp, r12-r15
+Return regs:
+   32-bit in eax
+   64-bit in rax
+   fp on xmm0
+
+First 8 fp parameters came in xmm0-xmm7.
+First 6 non-fp parameters came in rdi, rsi, rdx, rcx, r8, r9.
+Other parameters passed on stack, pushed right-to-left.  On entry to target, first
+param is at 8(%esp).  Traditional entry code is:
+
+Stack must be 16-byte aligned to support SSE in native code.
+
+If we're not doing variable stack allocation (alloca), the frame pointer can be
+eliminated and all arg references adjusted to be esp relative.
+*/
+
+/*
+Mterp and x86_64 notes:
+
+Some key interpreter variables will be assigned to registers.
+
+  nick     reg   purpose
+  rPROFILE rbp   countdown register for jit profiling
+  rPC      r12   interpreted program counter, used for fetching instructions
+  rFP      r13   interpreted frame pointer, used for accessing locals and args
+  rINSTw   bx    first 16-bit code of current instruction
+  rINSTbl  bl    opcode portion of instruction word
+  rINSTbh  bh    high byte of inst word, usually contains src/tgt reg names
+  rIBASE   r14   base of instruction handler table
+  rREFS    r15   base of object references in shadow frame.
+
+Notes:
+   o High order 16 bits of ebx must be zero on entry to handler
+   o rPC, rFP, rINSTw/rINSTbl valid on handler entry and exit
+   o eax and ecx are scratch, rINSTw/ebx sometimes scratch
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/*
+ * Handle mac compiler specific
+ */
+#if defined(__APPLE__)
+    #define MACRO_LITERAL(value) $$(value)
+    #define FUNCTION_TYPE(name)
+    #define SIZE(start,end)
+    // Mac OS' symbols have an _ prefix.
+    #define SYMBOL(name) _ ## name
+#else
+    #define MACRO_LITERAL(value) $$value
+    #define FUNCTION_TYPE(name) .type name, @function
+    #define SIZE(start,end) .size start, .-end
+    #define SYMBOL(name) name
+#endif
+
+.macro PUSH _reg
+    pushq \_reg
+    .cfi_adjust_cfa_offset 8
+    .cfi_rel_offset \_reg, 0
+.endm
+
+.macro POP _reg
+    popq \_reg
+    .cfi_adjust_cfa_offset -8
+    .cfi_restore \_reg
+.endm
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_COUNTDOWN_OFFSET OFF_FP(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+/* Frame size must be 16-byte aligned.
+ * Remember about 8 bytes for return address + 6 * 8 for spills.
+ */
+#define FRAME_SIZE     8
+
+/* Frame diagram while executing ExecuteMterpImpl, high to low addresses */
+#define IN_ARG3        %rcx
+#define IN_ARG2        %rdx
+#define IN_ARG1        %rsi
+#define IN_ARG0        %rdi
+/* Spill offsets relative to %esp */
+#define SELF_SPILL     (FRAME_SIZE -  8)
+/* Out Args  */
+#define OUT_ARG3       %rcx
+#define OUT_ARG2       %rdx
+#define OUT_ARG1       %rsi
+#define OUT_ARG0       %rdi
+#define OUT_32_ARG3    %ecx
+#define OUT_32_ARG2    %edx
+#define OUT_32_ARG1    %esi
+#define OUT_32_ARG0    %edi
+#define OUT_FP_ARG1    %xmm1
+#define OUT_FP_ARG0    %xmm0
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rSELF    SELF_SPILL(%rsp)
+#define rPC      %r12
+#define rFP      %r13
+#define rINST    %ebx
+#define rINSTq   %rbx
+#define rINSTw   %bx
+#define rINSTbh  %bh
+#define rINSTbl  %bl
+#define rIBASE   %r14
+#define rREFS    %r15
+#define rPROFILE %ebp
+
+#define MTERP_LOGGING 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    movq    rPC, OFF_FP_DEX_PC_PTR(rFP)
+.endm
+
+/*
+ * Refresh handler table.
+ * IBase handles uses the caller save register so we must restore it after each call.
+ * Also it is used as a result of some 64-bit operations (like imul) and we should
+ * restore it in such cases also.
+ *
+ */
+.macro REFRESH_IBASE
+    movq    rSELF, rIBASE
+    movq    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
+ * Refresh rINST.
+ * At enter to handler rINST does not contain the opcode number.
+ * However some utilities require the full value, so this macro
+ * restores the opcode number.
+ */
+.macro REFRESH_INST _opnum
+    movb    rINSTbl, rINSTbh
+    movb    $$\_opnum, rINSTbl
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINSTw.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    movzwq  (rPC), rINSTq
+.endm
+
+/*
+ * Remove opcode from rINST, compute the address of handler and jump to it.
+ */
+.macro GOTO_NEXT
+    movzx   rINSTbl,%eax
+    movzbl  rINSTbh,rINST
+    shll    MACRO_LITERAL(${handler_size_bits}), %eax
+    addq    rIBASE, %rax
+    jmp     *%rax
+.endm
+
+/*
+ * Advance rPC by instruction count.
+ */
+.macro ADVANCE_PC _count
+    leaq    2*\_count(rPC), rPC
+.endm
+
+/*
+ * Advance rPC by instruction count, fetch instruction and jump to handler.
+ */
+.macro ADVANCE_PC_FETCH_AND_GOTO_NEXT _count
+    ADVANCE_PC \_count
+    FETCH_INST
+    GOTO_NEXT
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+#define VREG_ADDRESS(_vreg) (rFP,_vreg,4)
+#define VREG_REF_ADDRESS(_vreg) (rREFS,_vreg,4)
+
+.macro GET_VREG _reg _vreg
+    movl    (rFP,\_vreg,4), \_reg
+.endm
+
+/* Read wide value. */
+.macro GET_WIDE_VREG _reg _vreg
+    movq    (rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    MACRO_LITERAL(0), (rREFS,\_vreg,4)
+.endm
+
+/* Write wide value. reg is clobbered. */
+.macro SET_WIDE_VREG _reg _vreg
+    movq    \_reg, (rFP,\_vreg,4)
+    xorq    \_reg, \_reg
+    movq    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro SET_VREG_OBJECT _reg _vreg
+    movl    \_reg, (rFP,\_vreg,4)
+    movl    \_reg, (rREFS,\_vreg,4)
+.endm
+
+.macro GET_VREG_HIGH _reg _vreg
+    movl    4(rFP,\_vreg,4), \_reg
+.endm
+
+.macro SET_VREG_HIGH _reg _vreg
+    movl    \_reg, 4(rFP,\_vreg,4)
+    movl    MACRO_LITERAL(0), 4(rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_REF _vreg
+    movl    MACRO_LITERAL(0),  (rREFS,\_vreg,4)
+.endm
+
+.macro CLEAR_WIDE_REF _vreg
+    movl    MACRO_LITERAL(0),  (rREFS,\_vreg,4)
+    movl    MACRO_LITERAL(0), 4(rREFS,\_vreg,4)
+.endm
diff --git a/runtime/interpreter/mterp/x86_64/invoke.S b/runtime/interpreter/mterp/x86_64/invoke.S
new file mode 100644
index 0000000..f7e6155
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/invoke.S
@@ -0,0 +1,22 @@
+%default { "helper":"UndefinedInvokeHandler" }
+/*
+ * Generic invoke handler wrapper.
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern $helper
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rPC, OUT_ARG2
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_32_ARG3
+    call    SYMBOL($helper)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC 3
+    call    SYMBOL(MterpShouldSwitchInterpreters)
+    testb   %al, %al
+    jnz     MterpFallback
+    FETCH_INST
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86_64/op_add_double.S b/runtime/interpreter/mterp/x86_64/op_add_double.S
new file mode 100644
index 0000000..cb462cb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_add_double.S
@@ -0,0 +1 @@
+%include "x86_64/sseBinop.S" {"instr":"adds","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86_64/op_add_double_2addr.S b/runtime/interpreter/mterp/x86_64/op_add_double_2addr.S
new file mode 100644
index 0000000..063bde3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_add_double_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/sseBinop2Addr.S" {"instr":"adds","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86_64/op_add_float.S b/runtime/interpreter/mterp/x86_64/op_add_float.S
new file mode 100644
index 0000000..7753bf8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_add_float.S
@@ -0,0 +1 @@
+%include "x86_64/sseBinop.S" {"instr":"adds","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86_64/op_add_float_2addr.S b/runtime/interpreter/mterp/x86_64/op_add_float_2addr.S
new file mode 100644
index 0000000..6c8005b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_add_float_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/sseBinop2Addr.S" {"instr":"adds","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86_64/op_add_int.S b/runtime/interpreter/mterp/x86_64/op_add_int.S
new file mode 100644
index 0000000..e316be7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_add_int.S
@@ -0,0 +1 @@
+%include "x86_64/binop.S" {"instr":"addl    (rFP,%rcx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_add_int_2addr.S b/runtime/interpreter/mterp/x86_64/op_add_int_2addr.S
new file mode 100644
index 0000000..2ff8293
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_add_int_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/binop2addr.S" {"instr":"addl    %eax, (rFP,%rcx,4)"}
diff --git a/runtime/interpreter/mterp/x86_64/op_add_int_lit16.S b/runtime/interpreter/mterp/x86_64/op_add_int_lit16.S
new file mode 100644
index 0000000..bfeb7ca
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_add_int_lit16.S
@@ -0,0 +1 @@
+%include "x86_64/binopLit16.S" {"instr":"addl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_add_int_lit8.S b/runtime/interpreter/mterp/x86_64/op_add_int_lit8.S
new file mode 100644
index 0000000..8954844
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_add_int_lit8.S
@@ -0,0 +1 @@
+%include "x86_64/binopLit8.S" {"instr":"addl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_add_long.S b/runtime/interpreter/mterp/x86_64/op_add_long.S
new file mode 100644
index 0000000..89131ff
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_add_long.S
@@ -0,0 +1 @@
+%include "x86_64/binopWide.S" {"instr":"addq    (rFP,%rcx,4), %rax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_add_long_2addr.S b/runtime/interpreter/mterp/x86_64/op_add_long_2addr.S
new file mode 100644
index 0000000..fed98bc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_add_long_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/binopWide2addr.S" {"instr":"addq    %rax, (rFP,%rcx,4)"}
diff --git a/runtime/interpreter/mterp/x86_64/op_aget.S b/runtime/interpreter/mterp/x86_64/op_aget.S
new file mode 100644
index 0000000..58d4948
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_aget.S
@@ -0,0 +1,24 @@
+%default { "load":"movl", "shift":"4", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET", "wide":"0" }
+/*
+ * Array get, 32 bits or less.  vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short, aget-wide
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # eax <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %rcx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    .if $wide
+    movq    $data_offset(%rax,%rcx,8), %rax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    $load   $data_offset(%rax,%rcx,$shift), %eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_aget_boolean.S b/runtime/interpreter/mterp/x86_64/op_aget_boolean.S
new file mode 100644
index 0000000..cf7bdb5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_aget_boolean.S
@@ -0,0 +1 @@
+%include "x86_64/op_aget.S" { "load":"movzbl", "shift":"1", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86_64/op_aget_byte.S b/runtime/interpreter/mterp/x86_64/op_aget_byte.S
new file mode 100644
index 0000000..1cbb569
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_aget_byte.S
@@ -0,0 +1 @@
+%include "x86_64/op_aget.S" { "load":"movsbl", "shift":"1", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86_64/op_aget_char.S b/runtime/interpreter/mterp/x86_64/op_aget_char.S
new file mode 100644
index 0000000..45c9085
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_aget_char.S
@@ -0,0 +1 @@
+%include "x86_64/op_aget.S" { "load":"movzwl", "shift":"2", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86_64/op_aget_object.S b/runtime/interpreter/mterp/x86_64/op_aget_object.S
new file mode 100644
index 0000000..5f77a97
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_aget_object.S
@@ -0,0 +1,17 @@
+/*
+ * Array object get.  vAA <- vBB[vCC].
+ *
+ * for: aget-object
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    GET_VREG OUT_32_ARG0, %rax              # eax <- vBB (array object)
+    GET_VREG OUT_32_ARG1, %rcx              # ecx <- vCC (requested index)
+    EXPORT_PC
+    call    SYMBOL(artAGetObjectFromMterp)  # (array, index)
+    movq    rSELF, %rcx
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException
+    SET_VREG_OBJECT %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_aget_short.S b/runtime/interpreter/mterp/x86_64/op_aget_short.S
new file mode 100644
index 0000000..82c4a1d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_aget_short.S
@@ -0,0 +1 @@
+%include "x86_64/op_aget.S" { "load":"movswl", "shift":"2", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86_64/op_aget_wide.S b/runtime/interpreter/mterp/x86_64/op_aget_wide.S
new file mode 100644
index 0000000..4f2771b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_aget_wide.S
@@ -0,0 +1 @@
+%include "x86_64/op_aget.S" { "load":"movq", "shift":"8", "data_offset":"MIRROR_WIDE_ARRAY_DATA_OFFSET", "wide":"1" }
diff --git a/runtime/interpreter/mterp/x86_64/op_and_int.S b/runtime/interpreter/mterp/x86_64/op_and_int.S
new file mode 100644
index 0000000..4469889
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_and_int.S
@@ -0,0 +1 @@
+%include "x86_64/binop.S" {"instr":"andl    (rFP,%rcx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_and_int_2addr.S b/runtime/interpreter/mterp/x86_64/op_and_int_2addr.S
new file mode 100644
index 0000000..16315bb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_and_int_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/binop2addr.S" {"instr":"andl    %eax, (rFP,%rcx,4)"}
diff --git a/runtime/interpreter/mterp/x86_64/op_and_int_lit16.S b/runtime/interpreter/mterp/x86_64/op_and_int_lit16.S
new file mode 100644
index 0000000..63e851b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_and_int_lit16.S
@@ -0,0 +1 @@
+%include "x86_64/binopLit16.S" {"instr":"andl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_and_int_lit8.S b/runtime/interpreter/mterp/x86_64/op_and_int_lit8.S
new file mode 100644
index 0000000..da7a20f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_and_int_lit8.S
@@ -0,0 +1 @@
+%include "x86_64/binopLit8.S" {"instr":"andl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_and_long.S b/runtime/interpreter/mterp/x86_64/op_and_long.S
new file mode 100644
index 0000000..ce1dd26
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_and_long.S
@@ -0,0 +1 @@
+%include "x86_64/binopWide.S" {"instr":"andq    (rFP,%rcx,4), %rax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_and_long_2addr.S b/runtime/interpreter/mterp/x86_64/op_and_long_2addr.S
new file mode 100644
index 0000000..d17ab8d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_and_long_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/binopWide2addr.S" {"instr":"andq    %rax, (rFP,%rcx,4)"}
diff --git a/runtime/interpreter/mterp/x86_64/op_aput.S b/runtime/interpreter/mterp/x86_64/op_aput.S
new file mode 100644
index 0000000..11500ad
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_aput.S
@@ -0,0 +1,23 @@
+%default { "reg":"rINST", "store":"movl", "shift":"4", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET", "wide":"0" }
+/*
+ * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short, aput-wide
+ *
+ */
+    /* op vAA, vBB, vCC */
+    movzbq  2(rPC), %rax                    # rax <- BB
+    movzbq  3(rPC), %rcx                    # rcx <- CC
+    GET_VREG %eax, %rax                     # eax <- vBB (array object)
+    GET_VREG %ecx, %rcx                     # ecx <- vCC (requested index)
+    testl   %eax, %eax                      # null array object?
+    je      common_errNullObject            # bail if so
+    cmpl    MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ecx
+    jae     common_errArrayIndex            # index >= length, bail.
+    .if $wide
+    GET_WIDE_VREG rINSTq, rINSTq
+    .else
+    GET_VREG rINST, rINSTq
+    .endif
+    $store    $reg, $data_offset(%rax,%rcx,$shift)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_aput_boolean.S b/runtime/interpreter/mterp/x86_64/op_aput_boolean.S
new file mode 100644
index 0000000..7d77a86
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_aput_boolean.S
@@ -0,0 +1 @@
+%include "x86_64/op_aput.S" { "reg":"rINSTbl", "store":"movb", "shift":"1", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86_64/op_aput_byte.S b/runtime/interpreter/mterp/x86_64/op_aput_byte.S
new file mode 100644
index 0000000..7a1723e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_aput_byte.S
@@ -0,0 +1 @@
+%include "x86_64/op_aput.S" { "reg":"rINSTbl", "store":"movb", "shift":"1", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86_64/op_aput_char.S b/runtime/interpreter/mterp/x86_64/op_aput_char.S
new file mode 100644
index 0000000..f8f50a3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_aput_char.S
@@ -0,0 +1 @@
+%include "x86_64/op_aput.S" { "reg":"rINSTw", "store":"movw", "shift":"2", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86_64/op_aput_object.S b/runtime/interpreter/mterp/x86_64/op_aput_object.S
new file mode 100644
index 0000000..b1bae0f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_aput_object.S
@@ -0,0 +1,13 @@
+/*
+ * Store an object into an array.  vBB[vCC] <- vAA.
+ */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG0
+    movq    rPC, OUT_ARG1
+    REFRESH_INST ${opnum}
+    movq    rINSTq, OUT_ARG2
+    call    SYMBOL(MterpAputObject)         # (array, index)
+    testb   %al, %al
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_aput_short.S b/runtime/interpreter/mterp/x86_64/op_aput_short.S
new file mode 100644
index 0000000..481fd68
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_aput_short.S
@@ -0,0 +1 @@
+%include "x86_64/op_aput.S" { "reg":"rINSTw", "store":"movw", "shift":"2", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/x86_64/op_aput_wide.S b/runtime/interpreter/mterp/x86_64/op_aput_wide.S
new file mode 100644
index 0000000..5bbd39b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_aput_wide.S
@@ -0,0 +1 @@
+%include "x86_64/op_aput.S" { "reg":"rINSTq", "store":"movq", "shift":"8", "data_offset":"MIRROR_WIDE_ARRAY_DATA_OFFSET", "wide":"1" }
diff --git a/runtime/interpreter/mterp/x86_64/op_array_length.S b/runtime/interpreter/mterp/x86_64/op_array_length.S
new file mode 100644
index 0000000..e80d665
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_array_length.S
@@ -0,0 +1,12 @@
+/*
+ * Return the length of an array.
+ */
+    movl    rINST, %eax                     # eax <- BA
+    sarl    $$4, rINST                      # rINST <- B
+    GET_VREG %ecx, rINSTq                   # ecx <- vB (object ref)
+    testl   %ecx, %ecx                      # is null?
+    je      common_errNullObject
+    andb    $$0xf, %al                      # eax <- A
+    movl    MIRROR_ARRAY_LENGTH_OFFSET(%rcx), rINST
+    SET_VREG rINST, %rax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_check_cast.S b/runtime/interpreter/mterp/x86_64/op_check_cast.S
new file mode 100644
index 0000000..f8fa7b2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_check_cast.S
@@ -0,0 +1,13 @@
+/*
+ * Check to see if a cast from one class to another is allowed.
+ */
+    /* check-cast vAA, class@BBBB */
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # OUT_ARG0 <- BBBB
+    leaq    VREG_ADDRESS(rINSTq), OUT_ARG1
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(MterpCheckCast)          # (index, &obj, method, self)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_cmp_long.S b/runtime/interpreter/mterp/x86_64/op_cmp_long.S
new file mode 100644
index 0000000..23ca3e5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_cmp_long.S
@@ -0,0 +1,17 @@
+/*
+ * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+ * register based on the results of the comparison.
+ */
+    /* cmp-long vAA, vBB, vCC */
+    movzbq  2(rPC), %rdx                    # edx <- BB
+    movzbq  3(rPC), %rcx                    # ecx <- CC
+    GET_WIDE_VREG %rdx, %rdx                # rdx <- v[BB]
+    xorl    %eax, %eax
+    xorl    %edi, %edi
+    addb    $$1, %al
+    movl    $$-1, %esi
+    cmpq    VREG_ADDRESS(%rcx), %rdx
+    cmovl   %esi, %edi
+    cmovg   %eax, %edi
+    SET_VREG %edi, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_cmpg_double.S b/runtime/interpreter/mterp/x86_64/op_cmpg_double.S
new file mode 100644
index 0000000..7c0aa1b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_cmpg_double.S
@@ -0,0 +1 @@
+%include "x86_64/fpcmp.S" {"suff":"d","nanval":"pos"}
diff --git a/runtime/interpreter/mterp/x86_64/op_cmpg_float.S b/runtime/interpreter/mterp/x86_64/op_cmpg_float.S
new file mode 100644
index 0000000..14e8472
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_cmpg_float.S
@@ -0,0 +1 @@
+%include "x86_64/fpcmp.S" {"suff":"s","nanval":"pos"}
diff --git a/runtime/interpreter/mterp/x86_64/op_cmpl_double.S b/runtime/interpreter/mterp/x86_64/op_cmpl_double.S
new file mode 100644
index 0000000..1d4c424
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_cmpl_double.S
@@ -0,0 +1 @@
+%include "x86_64/fpcmp.S" {"suff":"d","nanval":"neg"}
diff --git a/runtime/interpreter/mterp/x86_64/op_cmpl_float.S b/runtime/interpreter/mterp/x86_64/op_cmpl_float.S
new file mode 100644
index 0000000..97a12a6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_cmpl_float.S
@@ -0,0 +1 @@
+%include "x86_64/fpcmp.S" {"suff":"s","nanval":"neg"}
diff --git a/runtime/interpreter/mterp/x86_64/op_const.S b/runtime/interpreter/mterp/x86_64/op_const.S
new file mode 100644
index 0000000..3cfafdb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_const.S
@@ -0,0 +1,4 @@
+    /* const vAA, #+BBBBbbbb */
+    movl    2(rPC), %eax                    # grab all 32 bits at once
+    SET_VREG %eax, rINSTq                   # vAA<- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86_64/op_const_16.S b/runtime/interpreter/mterp/x86_64/op_const_16.S
new file mode 100644
index 0000000..1a139c6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_const_16.S
@@ -0,0 +1,4 @@
+    /* const/16 vAA, #+BBBB */
+    movswl  2(rPC), %ecx                    # ecx <- ssssBBBB
+    SET_VREG %ecx, rINSTq                   # vAA <- ssssBBBB
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_const_4.S b/runtime/interpreter/mterp/x86_64/op_const_4.S
new file mode 100644
index 0000000..23c4816
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_const_4.S
@@ -0,0 +1,7 @@
+    /* const/4 vA, #+B */
+    movsbl  rINSTbl, %eax                   # eax <-ssssssBx
+    movl    $$0xf, rINST
+    andl    %eax, rINST                     # rINST <- A
+    sarl    $$4, %eax
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_const_class.S b/runtime/interpreter/mterp/x86_64/op_const_class.S
new file mode 100644
index 0000000..494920a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_const_class.S
@@ -0,0 +1,10 @@
+    /* const/class vAA, Class@BBBB */
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # eax <- OUT_ARG0
+    movq    rINSTq, OUT_ARG1
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG2
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(MterpConstClass)         # (index, tgt_reg, shadow_frame, self)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_const_high16.S b/runtime/interpreter/mterp/x86_64/op_const_high16.S
new file mode 100644
index 0000000..64e633c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_const_high16.S
@@ -0,0 +1,5 @@
+    /* const/high16 vAA, #+BBBB0000 */
+    movzwl  2(rPC), %eax                    # eax <- 0000BBBB
+    sall    $$16, %eax                      # eax <- BBBB0000
+    SET_VREG %eax, rINSTq                   # vAA <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_const_string.S b/runtime/interpreter/mterp/x86_64/op_const_string.S
new file mode 100644
index 0000000..7c199ec
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_const_string.S
@@ -0,0 +1,10 @@
+    /* const/string vAA, String@BBBB */
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # OUT_ARG0 <- BBBB
+    movq    rINSTq, OUT_ARG1
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG2
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_const_string_jumbo.S b/runtime/interpreter/mterp/x86_64/op_const_string_jumbo.S
new file mode 100644
index 0000000..ae03d20
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_const_string_jumbo.S
@@ -0,0 +1,10 @@
+    /* const/string vAA, String@BBBBBBBB */
+    EXPORT_PC
+    movl    2(rPC), OUT_32_ARG0             # OUT_32_ARG0 <- BBBB
+    movq    rINSTq, OUT_ARG1
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG2
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86_64/op_const_wide.S b/runtime/interpreter/mterp/x86_64/op_const_wide.S
new file mode 100644
index 0000000..5615177
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_const_wide.S
@@ -0,0 +1,4 @@
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    movq    2(rPC), %rax                    # rax <- HHHHhhhhBBBBbbbb
+    SET_WIDE_VREG %rax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 5
diff --git a/runtime/interpreter/mterp/x86_64/op_const_wide_16.S b/runtime/interpreter/mterp/x86_64/op_const_wide_16.S
new file mode 100644
index 0000000..593b624
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_const_wide_16.S
@@ -0,0 +1,4 @@
+    /* const-wide/16 vAA, #+BBBB */
+    movswq  2(rPC), %rax                    # rax <- ssssBBBB
+    SET_WIDE_VREG %rax, rINSTq              # store
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_const_wide_32.S b/runtime/interpreter/mterp/x86_64/op_const_wide_32.S
new file mode 100644
index 0000000..5ef3636
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_const_wide_32.S
@@ -0,0 +1,4 @@
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    movslq   2(rPC), %rax                   # eax <- ssssssssBBBBbbbb
+    SET_WIDE_VREG %rax, rINSTq              # store
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86_64/op_const_wide_high16.S b/runtime/interpreter/mterp/x86_64/op_const_wide_high16.S
new file mode 100644
index 0000000..b86b4e5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_const_wide_high16.S
@@ -0,0 +1,5 @@
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    movzwq  2(rPC), %rax                    # eax <- 0000BBBB
+    salq    $$48, %rax                      # eax <- BBBB0000
+    SET_WIDE_VREG %rax, rINSTq              # v[AA+0] <- eax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_div_double.S b/runtime/interpreter/mterp/x86_64/op_div_double.S
new file mode 100644
index 0000000..45c700c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_div_double.S
@@ -0,0 +1 @@
+%include "x86_64/sseBinop.S" {"instr":"divs","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86_64/op_div_double_2addr.S b/runtime/interpreter/mterp/x86_64/op_div_double_2addr.S
new file mode 100644
index 0000000..83f270e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_div_double_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/sseBinop2Addr.S" {"instr":"divs","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86_64/op_div_float.S b/runtime/interpreter/mterp/x86_64/op_div_float.S
new file mode 100644
index 0000000..aa90b24
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_div_float.S
@@ -0,0 +1 @@
+%include "x86_64/sseBinop.S" {"instr":"divs","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86_64/op_div_float_2addr.S b/runtime/interpreter/mterp/x86_64/op_div_float_2addr.S
new file mode 100644
index 0000000..f0f8f1a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_div_float_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/sseBinop2Addr.S" {"instr":"divs","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86_64/op_div_int.S b/runtime/interpreter/mterp/x86_64/op_div_int.S
new file mode 100644
index 0000000..bba5a17
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_div_int.S
@@ -0,0 +1 @@
+%include "x86_64/bindiv.S" {"result":"%eax","second":"%ecx","wide":"0","suffix":"l"}
diff --git a/runtime/interpreter/mterp/x86_64/op_div_int_2addr.S b/runtime/interpreter/mterp/x86_64/op_div_int_2addr.S
new file mode 100644
index 0000000..fa4255d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_div_int_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/bindiv2addr.S" {"result":"%eax","second":"%ecx","wide":"0","suffix":"l"}
diff --git a/runtime/interpreter/mterp/x86_64/op_div_int_lit16.S b/runtime/interpreter/mterp/x86_64/op_div_int_lit16.S
new file mode 100644
index 0000000..3fa1e09
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_div_int_lit16.S
@@ -0,0 +1 @@
+%include "x86_64/bindivLit16.S" {"result":"%eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_div_int_lit8.S b/runtime/interpreter/mterp/x86_64/op_div_int_lit8.S
new file mode 100644
index 0000000..859883e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_div_int_lit8.S
@@ -0,0 +1 @@
+%include "x86_64/bindivLit8.S" {"result":"%eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_div_long.S b/runtime/interpreter/mterp/x86_64/op_div_long.S
new file mode 100644
index 0000000..a061a88
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_div_long.S
@@ -0,0 +1 @@
+%include "x86_64/bindiv.S" {"result":"%rax","second":"%rcx","wide":"1","suffix":"q","ext":"cqo"}
diff --git a/runtime/interpreter/mterp/x86_64/op_div_long_2addr.S b/runtime/interpreter/mterp/x86_64/op_div_long_2addr.S
new file mode 100644
index 0000000..8886e68
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_div_long_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/bindiv2addr.S" {"result":"%rax","second":"%rcx","wide":"1","suffix":"q","ext":"cqo"}
diff --git a/runtime/interpreter/mterp/x86_64/op_double_to_float.S b/runtime/interpreter/mterp/x86_64/op_double_to_float.S
new file mode 100644
index 0000000..cea1482
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_double_to_float.S
@@ -0,0 +1 @@
+%include "x86_64/fpcvt.S" {"source_suffix":"d","dest_suffix":"s","wide":"0"}
diff --git a/runtime/interpreter/mterp/x86_64/op_double_to_int.S b/runtime/interpreter/mterp/x86_64/op_double_to_int.S
new file mode 100644
index 0000000..a9965ed
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_double_to_int.S
@@ -0,0 +1 @@
+%include "x86_64/cvtfp_int.S" {"fp_suffix":"d","i_suffix":"l","max_const":"$0x7fffffff","result_reg":"%eax","wide":"0"}
diff --git a/runtime/interpreter/mterp/x86_64/op_double_to_long.S b/runtime/interpreter/mterp/x86_64/op_double_to_long.S
new file mode 100644
index 0000000..179e6a1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_double_to_long.S
@@ -0,0 +1 @@
+%include "x86_64/cvtfp_int.S" {"fp_suffix":"d","i_suffix":"q","max_const":"$0x7fffffffffffffff","result_reg":"%rax","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_fill_array_data.S b/runtime/interpreter/mterp/x86_64/op_fill_array_data.S
new file mode 100644
index 0000000..7ea36a6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_fill_array_data.S
@@ -0,0 +1,9 @@
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    movslq  2(rPC), %rcx                    # rcx <- ssssssssBBBBbbbb
+    leaq    (rPC,%rcx,2), OUT_ARG1          # OUT_ARG1 <- PC + ssssssssBBBBbbbb*2
+    GET_VREG OUT_32_ARG0, rINSTq            # OUT_ARG0 <- vAA (array object)
+    call    SYMBOL(MterpFillArrayData)      # (obj, payload)
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86_64/op_filled_new_array.S b/runtime/interpreter/mterp/x86_64/op_filled_new_array.S
new file mode 100644
index 0000000..a7f7ddc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_filled_new_array.S
@@ -0,0 +1,17 @@
+%default { "helper":"MterpFilledNewArray" }
+/*
+ * Create a new array with elements filled from registers.
+ *
+ * for: filled-new-array, filled-new-array/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern $helper
+    EXPORT_PC
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG0
+    movq    rPC, OUT_ARG1
+    movq    rSELF, OUT_ARG2
+    call    SYMBOL($helper)
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86_64/op_filled_new_array_range.S b/runtime/interpreter/mterp/x86_64/op_filled_new_array_range.S
new file mode 100644
index 0000000..4ca79a3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_filled_new_array_range.S
@@ -0,0 +1 @@
+%include "x86_64/op_filled_new_array.S" { "helper":"MterpFilledNewArrayRange" }
diff --git a/runtime/interpreter/mterp/x86_64/op_float_to_double.S b/runtime/interpreter/mterp/x86_64/op_float_to_double.S
new file mode 100644
index 0000000..7855205
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_float_to_double.S
@@ -0,0 +1 @@
+%include "x86_64/fpcvt.S" {"source_suffix":"s","dest_suffix":"d","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_float_to_int.S b/runtime/interpreter/mterp/x86_64/op_float_to_int.S
new file mode 100644
index 0000000..cb90555
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_float_to_int.S
@@ -0,0 +1 @@
+%include "x86_64/cvtfp_int.S" {"fp_suffix":"s","i_suffix":"l","max_const":"$0x7fffffff","result_reg":"%eax","wide":"0"}
diff --git a/runtime/interpreter/mterp/x86_64/op_float_to_long.S b/runtime/interpreter/mterp/x86_64/op_float_to_long.S
new file mode 100644
index 0000000..96bb4ee
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_float_to_long.S
@@ -0,0 +1 @@
+%include "x86_64/cvtfp_int.S" {"fp_suffix":"s","i_suffix":"q","max_const":"$0x7fffffffffffffff","result_reg":"%rax","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_goto.S b/runtime/interpreter/mterp/x86_64/op_goto.S
new file mode 100644
index 0000000..9749901
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_goto.S
@@ -0,0 +1,10 @@
+/*
+ * Unconditional branch, 8-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+    /* goto +AA */
+    movsbq  rINSTbl, rINSTq                 # rINSTq <- ssssssAA
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86_64/op_goto_16.S b/runtime/interpreter/mterp/x86_64/op_goto_16.S
new file mode 100644
index 0000000..77688e0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_goto_16.S
@@ -0,0 +1,10 @@
+/*
+ * Unconditional branch, 16-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+    /* goto/16 +AAAA */
+    movswq  2(rPC), rINSTq                  # rINSTq <- ssssAAAA
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86_64/op_goto_32.S b/runtime/interpreter/mterp/x86_64/op_goto_32.S
new file mode 100644
index 0000000..29d777b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_goto_32.S
@@ -0,0 +1,13 @@
+/*
+ * Unconditional branch, 32-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ *
+ *  Because we need the SF bit set, we'll use an adds
+ * to convert from Dalvik offset to byte offset.
+ */
+    /* goto/32 +AAAAAAAA */
+    movslq  2(rPC), rINSTq                  # rINSTq <- AAAAAAAA
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86_64/op_if_eq.S b/runtime/interpreter/mterp/x86_64/op_if_eq.S
new file mode 100644
index 0000000..d56ce72
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_if_eq.S
@@ -0,0 +1 @@
+%include "x86_64/bincmp.S" { "revcmp":"ne" }
diff --git a/runtime/interpreter/mterp/x86_64/op_if_eqz.S b/runtime/interpreter/mterp/x86_64/op_if_eqz.S
new file mode 100644
index 0000000..a0fc444
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_if_eqz.S
@@ -0,0 +1 @@
+%include "x86_64/zcmp.S" { "revcmp":"ne" }
diff --git a/runtime/interpreter/mterp/x86_64/op_if_ge.S b/runtime/interpreter/mterp/x86_64/op_if_ge.S
new file mode 100644
index 0000000..a7832ef
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_if_ge.S
@@ -0,0 +1 @@
+%include "x86_64/bincmp.S" { "revcmp":"l" }
diff --git a/runtime/interpreter/mterp/x86_64/op_if_gez.S b/runtime/interpreter/mterp/x86_64/op_if_gez.S
new file mode 100644
index 0000000..f9af5db
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_if_gez.S
@@ -0,0 +1 @@
+%include "x86_64/zcmp.S" { "revcmp":"l" }
diff --git a/runtime/interpreter/mterp/x86_64/op_if_gt.S b/runtime/interpreter/mterp/x86_64/op_if_gt.S
new file mode 100644
index 0000000..70f2b9e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_if_gt.S
@@ -0,0 +1 @@
+%include "x86_64/bincmp.S" { "revcmp":"le" }
diff --git a/runtime/interpreter/mterp/x86_64/op_if_gtz.S b/runtime/interpreter/mterp/x86_64/op_if_gtz.S
new file mode 100644
index 0000000..2fb0d50
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_if_gtz.S
@@ -0,0 +1 @@
+%include "x86_64/zcmp.S" { "revcmp":"le" }
diff --git a/runtime/interpreter/mterp/x86_64/op_if_le.S b/runtime/interpreter/mterp/x86_64/op_if_le.S
new file mode 100644
index 0000000..321962a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_if_le.S
@@ -0,0 +1 @@
+%include "x86_64/bincmp.S" { "revcmp":"g" }
diff --git a/runtime/interpreter/mterp/x86_64/op_if_lez.S b/runtime/interpreter/mterp/x86_64/op_if_lez.S
new file mode 100644
index 0000000..d3dc334
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_if_lez.S
@@ -0,0 +1 @@
+%include "x86_64/zcmp.S" { "revcmp":"g" }
diff --git a/runtime/interpreter/mterp/x86_64/op_if_lt.S b/runtime/interpreter/mterp/x86_64/op_if_lt.S
new file mode 100644
index 0000000..f028005
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_if_lt.S
@@ -0,0 +1 @@
+%include "x86_64/bincmp.S" { "revcmp":"ge" }
diff --git a/runtime/interpreter/mterp/x86_64/op_if_ltz.S b/runtime/interpreter/mterp/x86_64/op_if_ltz.S
new file mode 100644
index 0000000..383d73a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_if_ltz.S
@@ -0,0 +1 @@
+%include "x86_64/zcmp.S" { "revcmp":"ge" }
diff --git a/runtime/interpreter/mterp/x86_64/op_if_ne.S b/runtime/interpreter/mterp/x86_64/op_if_ne.S
new file mode 100644
index 0000000..ac6e063
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_if_ne.S
@@ -0,0 +1 @@
+%include "x86_64/bincmp.S" { "revcmp":"e" }
diff --git a/runtime/interpreter/mterp/x86_64/op_if_nez.S b/runtime/interpreter/mterp/x86_64/op_if_nez.S
new file mode 100644
index 0000000..c96e4f3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_if_nez.S
@@ -0,0 +1 @@
+%include "x86_64/zcmp.S" { "revcmp":"e" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iget.S b/runtime/interpreter/mterp/x86_64/op_iget.S
new file mode 100644
index 0000000..df43efe
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iget.S
@@ -0,0 +1,28 @@
+%default { "is_object":"0", "helper":"artGet32InstanceFromCode", "wide":"0"}
+/*
+ * General instance field get.
+ *
+ * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short, iget-wide
+ */
+    EXPORT_PC
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    movzwl  2(rPC), OUT_32_ARG0             # eax <- field ref CCCC
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG OUT_32_ARG1, %rcx              # the object pointer
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL($helper)
+    movq    rSELF, %rcx
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException                  # bail out
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    .if $is_object
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <-value
+    .else
+    .if $wide
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <-value
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <-value
+    .endif
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_boolean.S b/runtime/interpreter/mterp/x86_64/op_iget_boolean.S
new file mode 100644
index 0000000..6ac5523
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iget_boolean.S
@@ -0,0 +1 @@
+%include "x86_64/op_iget.S" { "helper":"artGetBooleanInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_boolean_quick.S b/runtime/interpreter/mterp/x86_64/op_iget_boolean_quick.S
new file mode 100644
index 0000000..07139c7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iget_boolean_quick.S
@@ -0,0 +1 @@
+%include "x86_64/op_iget_quick.S" { "load":"movsbl" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_byte.S b/runtime/interpreter/mterp/x86_64/op_iget_byte.S
new file mode 100644
index 0000000..6a861b1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iget_byte.S
@@ -0,0 +1 @@
+%include "x86_64/op_iget.S" { "helper":"artGetByteInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_byte_quick.S b/runtime/interpreter/mterp/x86_64/op_iget_byte_quick.S
new file mode 100644
index 0000000..07139c7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iget_byte_quick.S
@@ -0,0 +1 @@
+%include "x86_64/op_iget_quick.S" { "load":"movsbl" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_char.S b/runtime/interpreter/mterp/x86_64/op_iget_char.S
new file mode 100644
index 0000000..021a0f1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iget_char.S
@@ -0,0 +1 @@
+%include "x86_64/op_iget.S" { "helper":"artGetCharInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_char_quick.S b/runtime/interpreter/mterp/x86_64/op_iget_char_quick.S
new file mode 100644
index 0000000..8cb3be3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iget_char_quick.S
@@ -0,0 +1 @@
+%include "x86_64/op_iget_quick.S" { "load":"movzwl" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_object.S b/runtime/interpreter/mterp/x86_64/op_iget_object.S
new file mode 100644
index 0000000..d92bc9c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iget_object.S
@@ -0,0 +1 @@
+%include "x86_64/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_object_quick.S b/runtime/interpreter/mterp/x86_64/op_iget_object_quick.S
new file mode 100644
index 0000000..176c954
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iget_object_quick.S
@@ -0,0 +1,15 @@
+    /* For: iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    .extern artIGetObjectFromMterp
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG OUT_32_ARG0, %rcx              # vB (object we're operating on)
+    movzwl  2(rPC), OUT_32_ARG1             # eax <- field byte offset
+    EXPORT_PC
+    callq   SYMBOL(artIGetObjectFromMterp)  # (obj, offset)
+    movq    rSELF, %rcx
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException                  # bail out
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_quick.S b/runtime/interpreter/mterp/x86_64/op_iget_quick.S
new file mode 100644
index 0000000..bfb7530
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iget_quick.S
@@ -0,0 +1,18 @@
+%default { "load":"movl", "wide":"0"}
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick, iget-wide-quick */
+    /* op vA, vB, offset@CCCC */
+    movl    rINST, %ecx                     # rcx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # vB (object we're operating on)
+    movzwq  2(rPC), %rax                    # eax <- field byte offset
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $$0xf,rINSTbl                   # rINST <- A
+    .if $wide
+    movq (%rcx,%rax,1), %rax
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <- value
+    .else
+    ${load} (%rcx,%rax,1), %eax
+    SET_VREG %eax, rINSTq                   # fp[A] <- value
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_short.S b/runtime/interpreter/mterp/x86_64/op_iget_short.S
new file mode 100644
index 0000000..f158bea
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iget_short.S
@@ -0,0 +1 @@
+%include "x86_64/op_iget.S" { "helper":"artGetShortInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_short_quick.S b/runtime/interpreter/mterp/x86_64/op_iget_short_quick.S
new file mode 100644
index 0000000..56ca858
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iget_short_quick.S
@@ -0,0 +1 @@
+%include "x86_64/op_iget_quick.S" { "load":"movswl" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_wide.S b/runtime/interpreter/mterp/x86_64/op_iget_wide.S
new file mode 100644
index 0000000..74bb9ff
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iget_wide.S
@@ -0,0 +1 @@
+%include "x86_64/op_iget.S" { "helper":"artGet64InstanceFromCode", "wide":"1" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_wide_quick.S b/runtime/interpreter/mterp/x86_64/op_iget_wide_quick.S
new file mode 100644
index 0000000..169d625
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iget_wide_quick.S
@@ -0,0 +1 @@
+%include "x86_64/op_iget_quick.S" { "load":"movswl", "wide":"1" }
diff --git a/runtime/interpreter/mterp/x86_64/op_instance_of.S b/runtime/interpreter/mterp/x86_64/op_instance_of.S
new file mode 100644
index 0000000..4819833
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_instance_of.S
@@ -0,0 +1,22 @@
+/*
+ * Check to see if an object reference is an instance of a class.
+ *
+ * Most common situation is a non-null object, being compared against
+ * an already-resolved class.
+ */
+    /* instance-of vA, vB, class@CCCC */
+    EXPORT_PC
+    movzwl  2(rPC), OUT_32_ARG0             # OUT_32_ARG0 <- CCCC
+    movl    rINST, %eax                     # eax <- BA
+    sarl    $$4, %eax                       # eax <- B
+    leaq    VREG_ADDRESS(%rax), OUT_ARG1    # Get object address
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(MterpInstanceOf)         # (index, &obj, method, self)
+    movsbl  %al, %eax
+    movq    rSELF, %rcx
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException
+    andb    $$0xf, rINSTbl                  # rINSTbl <- A
+    SET_VREG %eax, rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_int_to_byte.S b/runtime/interpreter/mterp/x86_64/op_int_to_byte.S
new file mode 100644
index 0000000..f4e578f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_int_to_byte.S
@@ -0,0 +1 @@
+%include "x86_64/unop.S" {"instr":"movsbl  %al, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_int_to_char.S b/runtime/interpreter/mterp/x86_64/op_int_to_char.S
new file mode 100644
index 0000000..c1bf17f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_int_to_char.S
@@ -0,0 +1 @@
+%include "x86_64/unop.S" {"instr":"movzwl  %ax,%eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_int_to_double.S b/runtime/interpreter/mterp/x86_64/op_int_to_double.S
new file mode 100644
index 0000000..27ebf42
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_int_to_double.S
@@ -0,0 +1 @@
+%include "x86_64/fpcvt.S" {"source_suffix":"i","dest_suffix":"dl","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_int_to_float.S b/runtime/interpreter/mterp/x86_64/op_int_to_float.S
new file mode 100644
index 0000000..5a98d44
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_int_to_float.S
@@ -0,0 +1 @@
+%include "x86_64/fpcvt.S" {"source_suffix":"i","dest_suffix":"sl","wide":"0"}
diff --git a/runtime/interpreter/mterp/x86_64/op_int_to_long.S b/runtime/interpreter/mterp/x86_64/op_int_to_long.S
new file mode 100644
index 0000000..9281137
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_int_to_long.S
@@ -0,0 +1,8 @@
+    /* int to long vA, vB */
+    movzbq  rINSTbl, %rax                   # rax <- +A
+    sarl    $$4, %eax                       # eax <- B
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    movslq  VREG_ADDRESS(%rax), %rax
+    SET_WIDE_VREG %rax, rINSTq              # v[A] <- %rax
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
+
diff --git a/runtime/interpreter/mterp/x86_64/op_int_to_short.S b/runtime/interpreter/mterp/x86_64/op_int_to_short.S
new file mode 100644
index 0000000..6ae6b50
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_int_to_short.S
@@ -0,0 +1 @@
+%include "x86_64/unop.S" {"instr":"movswl %ax, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_invoke_direct.S b/runtime/interpreter/mterp/x86_64/op_invoke_direct.S
new file mode 100644
index 0000000..9628589
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_invoke_direct.S
@@ -0,0 +1 @@
+%include "x86_64/invoke.S" { "helper":"MterpInvokeDirect" }
diff --git a/runtime/interpreter/mterp/x86_64/op_invoke_direct_range.S b/runtime/interpreter/mterp/x86_64/op_invoke_direct_range.S
new file mode 100644
index 0000000..09ac881
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_invoke_direct_range.S
@@ -0,0 +1 @@
+%include "x86_64/invoke.S" { "helper":"MterpInvokeDirectRange" }
diff --git a/runtime/interpreter/mterp/x86_64/op_invoke_interface.S b/runtime/interpreter/mterp/x86_64/op_invoke_interface.S
new file mode 100644
index 0000000..76d9cd4
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_invoke_interface.S
@@ -0,0 +1,8 @@
+%include "x86_64/invoke.S" { "helper":"MterpInvokeInterface" }
+/*
+ * Handle an interface method call.
+ *
+ * for: invoke-interface, invoke-interface/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/x86_64/op_invoke_interface_range.S b/runtime/interpreter/mterp/x86_64/op_invoke_interface_range.S
new file mode 100644
index 0000000..785b43c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_invoke_interface_range.S
@@ -0,0 +1 @@
+%include "x86_64/invoke.S" { "helper":"MterpInvokeInterfaceRange" }
diff --git a/runtime/interpreter/mterp/x86_64/op_invoke_static.S b/runtime/interpreter/mterp/x86_64/op_invoke_static.S
new file mode 100644
index 0000000..dd8027d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_invoke_static.S
@@ -0,0 +1,2 @@
+%include "x86_64/invoke.S" { "helper":"MterpInvokeStatic" }
+
diff --git a/runtime/interpreter/mterp/x86_64/op_invoke_static_range.S b/runtime/interpreter/mterp/x86_64/op_invoke_static_range.S
new file mode 100644
index 0000000..ee26074
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_invoke_static_range.S
@@ -0,0 +1 @@
+%include "x86_64/invoke.S" { "helper":"MterpInvokeStaticRange" }
diff --git a/runtime/interpreter/mterp/x86_64/op_invoke_super.S b/runtime/interpreter/mterp/x86_64/op_invoke_super.S
new file mode 100644
index 0000000..d07f8d5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_invoke_super.S
@@ -0,0 +1,8 @@
+%include "x86_64/invoke.S" { "helper":"MterpInvokeSuper" }
+/*
+ * Handle a "super" method call.
+ *
+ * for: invoke-super, invoke-super/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/x86_64/op_invoke_super_range.S b/runtime/interpreter/mterp/x86_64/op_invoke_super_range.S
new file mode 100644
index 0000000..7245cfd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_invoke_super_range.S
@@ -0,0 +1 @@
+%include "x86_64/invoke.S" { "helper":"MterpInvokeSuperRange" }
diff --git a/runtime/interpreter/mterp/x86_64/op_invoke_virtual.S b/runtime/interpreter/mterp/x86_64/op_invoke_virtual.S
new file mode 100644
index 0000000..19c708b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_invoke_virtual.S
@@ -0,0 +1,8 @@
+%include "x86_64/invoke.S" { "helper":"MterpInvokeVirtual" }
+/*
+ * Handle a virtual method call.
+ *
+ * for: invoke-virtual, invoke-virtual/range
+ */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/x86_64/op_invoke_virtual_quick.S b/runtime/interpreter/mterp/x86_64/op_invoke_virtual_quick.S
new file mode 100644
index 0000000..313bd05
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_invoke_virtual_quick.S
@@ -0,0 +1 @@
+%include "x86_64/invoke.S" { "helper":"MterpInvokeVirtualQuick" }
diff --git a/runtime/interpreter/mterp/x86_64/op_invoke_virtual_range.S b/runtime/interpreter/mterp/x86_64/op_invoke_virtual_range.S
new file mode 100644
index 0000000..424ad32
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_invoke_virtual_range.S
@@ -0,0 +1 @@
+%include "x86_64/invoke.S" { "helper":"MterpInvokeVirtualRange" }
diff --git a/runtime/interpreter/mterp/x86_64/op_invoke_virtual_range_quick.S b/runtime/interpreter/mterp/x86_64/op_invoke_virtual_range_quick.S
new file mode 100644
index 0000000..556f718
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_invoke_virtual_range_quick.S
@@ -0,0 +1 @@
+%include "x86_64/invoke.S" { "helper":"MterpInvokeVirtualQuickRange" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iput.S b/runtime/interpreter/mterp/x86_64/op_iput.S
new file mode 100644
index 0000000..6b7cb1c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iput.S
@@ -0,0 +1,20 @@
+%default { "handler":"artSet32InstanceFromMterp"}
+/*
+ * General 32-bit instance field put.
+ *
+ * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+ */
+    /* op vA, vB, field@CCCC */
+    .extern $handler
+    EXPORT_PC
+    movzwl  2(rPC), OUT_32_ARG0             # field ref <- 0000CCCC
+    movzbq  rINSTbl, %rcx                   # rcx<- BA
+    sarl    $$4, %ecx                       # ecx<- B
+    GET_VREG OUT_32_ARG1, %rcx              # the object pointer
+    andb    $$0xf, rINSTbl                  # rINST<- A
+    GET_VREG OUT_32_ARG2, rINSTq            # fp[A]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG3    # referrer
+    call    SYMBOL($handler)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_iput_boolean.S b/runtime/interpreter/mterp/x86_64/op_iput_boolean.S
new file mode 100644
index 0000000..cb4b1cd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iput_boolean.S
@@ -0,0 +1 @@
+%include "x86_64/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iput_boolean_quick.S b/runtime/interpreter/mterp/x86_64/op_iput_boolean_quick.S
new file mode 100644
index 0000000..6bd060e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iput_boolean_quick.S
@@ -0,0 +1 @@
+%include "x86_64/op_iput_quick.S" { "reg":"rINSTbl", "store":"movb" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iput_byte.S b/runtime/interpreter/mterp/x86_64/op_iput_byte.S
new file mode 100644
index 0000000..cb4b1cd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iput_byte.S
@@ -0,0 +1 @@
+%include "x86_64/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iput_byte_quick.S b/runtime/interpreter/mterp/x86_64/op_iput_byte_quick.S
new file mode 100644
index 0000000..6bd060e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iput_byte_quick.S
@@ -0,0 +1 @@
+%include "x86_64/op_iput_quick.S" { "reg":"rINSTbl", "store":"movb" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iput_char.S b/runtime/interpreter/mterp/x86_64/op_iput_char.S
new file mode 100644
index 0000000..b4e147c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iput_char.S
@@ -0,0 +1 @@
+%include "x86_64/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iput_char_quick.S b/runtime/interpreter/mterp/x86_64/op_iput_char_quick.S
new file mode 100644
index 0000000..3da96d5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iput_char_quick.S
@@ -0,0 +1 @@
+%include "x86_64/op_iput_quick.S" { "reg":"rINSTw", "store":"movw" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iput_object.S b/runtime/interpreter/mterp/x86_64/op_iput_object.S
new file mode 100644
index 0000000..828712d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iput_object.S
@@ -0,0 +1,10 @@
+    EXPORT_PC
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG0
+    movq    rPC, OUT_ARG1
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_32_ARG2
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(MterpIputObject)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_iput_object_quick.S b/runtime/interpreter/mterp/x86_64/op_iput_object_quick.S
new file mode 100644
index 0000000..b5b128a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iput_object_quick.S
@@ -0,0 +1,9 @@
+    EXPORT_PC
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG0
+    movq    rPC, OUT_ARG1
+    REFRESH_INST ${opnum}
+    movl    rINST, OUT_32_ARG2
+    call    SYMBOL(MterpIputObjectQuick)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_iput_quick.S b/runtime/interpreter/mterp/x86_64/op_iput_quick.S
new file mode 100644
index 0000000..ecaf98e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iput_quick.S
@@ -0,0 +1,13 @@
+%default { "reg":"rINST", "store":"movl" }
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # vB (object we're operating on)
+    testl   %ecx, %ecx                      # is object null?
+    je      common_errNullObject
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    GET_VREG rINST, rINSTq                  # rINST <- v[A]
+    movzwq  2(rPC), %rax                    # rax <- field byte offset
+    ${store}    ${reg}, (%rcx,%rax,1)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_iput_short.S b/runtime/interpreter/mterp/x86_64/op_iput_short.S
new file mode 100644
index 0000000..b4e147c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iput_short.S
@@ -0,0 +1 @@
+%include "x86_64/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iput_short_quick.S b/runtime/interpreter/mterp/x86_64/op_iput_short_quick.S
new file mode 100644
index 0000000..3da96d5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iput_short_quick.S
@@ -0,0 +1 @@
+%include "x86_64/op_iput_quick.S" { "reg":"rINSTw", "store":"movw" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iput_wide.S b/runtime/interpreter/mterp/x86_64/op_iput_wide.S
new file mode 100644
index 0000000..e59717b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iput_wide.S
@@ -0,0 +1,14 @@
+    /* iput-wide vA, vB, field@CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref CCCC
+    movzbq  rINSTbl, %rcx                   # rcx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG OUT_32_ARG1, %rcx              # the object pointer
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    leaq    VREG_ADDRESS(rINSTq), OUT_ARG2  # &fp[A]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG3    # referrer
+    call    SYMBOL(artSet64InstanceFromMterp)
+    testb   %al, %al
+    jnz     MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_iput_wide_quick.S b/runtime/interpreter/mterp/x86_64/op_iput_wide_quick.S
new file mode 100644
index 0000000..473189d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_iput_wide_quick.S
@@ -0,0 +1,12 @@
+    /* iput-wide-quick vA, vB, offset@CCCC */
+    movzbq    rINSTbl, %rcx                 # rcx<- BA
+    sarl      $$4, %ecx                     # ecx<- B
+    GET_VREG  %ecx, %rcx                    # vB (object we're operating on)
+    testl     %ecx, %ecx                    # is object null?
+    je        common_errNullObject
+    movzwq    2(rPC), %rax                  # rax<- field byte offset
+    leaq      (%rcx,%rax,1), %rcx           # ecx<- Address of 64-bit target
+    andb      $$0xf, rINSTbl                # rINST<- A
+    GET_WIDE_VREG %rax, rINSTq              # rax<- fp[A]/fp[A+1]
+    movq      %rax, (%rcx)                  # obj.field<- r0/r1
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_long_to_double.S b/runtime/interpreter/mterp/x86_64/op_long_to_double.S
new file mode 100644
index 0000000..7cdae32
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_long_to_double.S
@@ -0,0 +1 @@
+%include "x86_64/fpcvt.S" {"source_suffix":"i","dest_suffix":"dq","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_long_to_float.S b/runtime/interpreter/mterp/x86_64/op_long_to_float.S
new file mode 100644
index 0000000..7553348
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_long_to_float.S
@@ -0,0 +1 @@
+%include "x86_64/fpcvt.S" {"source_suffix":"i","dest_suffix":"sq","wide":"0"}
diff --git a/runtime/interpreter/mterp/x86_64/op_long_to_int.S b/runtime/interpreter/mterp/x86_64/op_long_to_int.S
new file mode 100644
index 0000000..7b50c8e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_long_to_int.S
@@ -0,0 +1,2 @@
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+%include "x86_64/op_move.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_monitor_enter.S b/runtime/interpreter/mterp/x86_64/op_monitor_enter.S
new file mode 100644
index 0000000..411091f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_monitor_enter.S
@@ -0,0 +1,11 @@
+/*
+ * Synchronize on an object.
+ */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    GET_VREG OUT_32_ARG0, rINSTq
+    movq    rSELF, OUT_ARG1
+    call    SYMBOL(artLockObjectFromCode)   # (object, self)
+    testq   %rax, %rax
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_monitor_exit.S b/runtime/interpreter/mterp/x86_64/op_monitor_exit.S
new file mode 100644
index 0000000..72d9a23
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_monitor_exit.S
@@ -0,0 +1,15 @@
+/*
+ * Unlock an object.
+ *
+ * Exceptions that occur when unlocking a monitor need to appear as
+ * if they happened at the following instruction.  See the Dalvik
+ * instruction spec.
+ */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    GET_VREG OUT_32_ARG0, rINSTq
+    movq    rSELF, OUT_ARG1
+    call    SYMBOL(artUnlockObjectFromCode) # (object, self)
+    testq   %rax, %rax
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_move.S b/runtime/interpreter/mterp/x86_64/op_move.S
new file mode 100644
index 0000000..ccaac2c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_move.S
@@ -0,0 +1,13 @@
+%default { "is_object":"0" }
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    movl    rINST, %eax                     # eax <- BA
+    andb    $$0xf, %al                      # eax <- A
+    shrl    $$4, rINST                      # rINST <- B
+    GET_VREG %edx, rINSTq
+    .if $is_object
+    SET_VREG_OBJECT %edx, %rax              # fp[A] <- fp[B]
+    .else
+    SET_VREG %edx, %rax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_move_16.S b/runtime/interpreter/mterp/x86_64/op_move_16.S
new file mode 100644
index 0000000..6a813eb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_move_16.S
@@ -0,0 +1,12 @@
+%default { "is_object":"0" }
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    movzwq  4(rPC), %rcx                    # ecx <- BBBB
+    movzwq  2(rPC), %rax                    # eax <- AAAA
+    GET_VREG %edx, %rcx
+    .if $is_object
+    SET_VREG_OBJECT %edx, %rax              # fp[A] <- fp[B]
+    .else
+    SET_VREG %edx, %rax                     # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86_64/op_move_exception.S b/runtime/interpreter/mterp/x86_64/op_move_exception.S
new file mode 100644
index 0000000..33db878
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_move_exception.S
@@ -0,0 +1,6 @@
+    /* move-exception vAA */
+    movq    rSELF, %rcx
+    movl    THREAD_EXCEPTION_OFFSET(%rcx), %eax
+    SET_VREG_OBJECT %eax, rINSTq            # fp[AA] <- exception object
+    movl    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_move_from16.S b/runtime/interpreter/mterp/x86_64/op_move_from16.S
new file mode 100644
index 0000000..150e9c2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_move_from16.S
@@ -0,0 +1,11 @@
+%default { "is_object":"0" }
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    movzwq  2(rPC), %rax                    # eax <- BBBB
+    GET_VREG %edx, %rax                     # edx <- fp[BBBB]
+    .if $is_object
+    SET_VREG_OBJECT %edx, rINSTq            # fp[A] <- fp[B]
+    .else
+    SET_VREG %edx, rINSTq                   # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_move_object.S b/runtime/interpreter/mterp/x86_64/op_move_object.S
new file mode 100644
index 0000000..0d86649
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_move_object.S
@@ -0,0 +1 @@
+%include "x86_64/op_move.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_move_object_16.S b/runtime/interpreter/mterp/x86_64/op_move_object_16.S
new file mode 100644
index 0000000..32541ff
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_move_object_16.S
@@ -0,0 +1 @@
+%include "x86_64/op_move_16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_move_object_from16.S b/runtime/interpreter/mterp/x86_64/op_move_object_from16.S
new file mode 100644
index 0000000..983e4ab
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_move_object_from16.S
@@ -0,0 +1 @@
+%include "x86_64/op_move_from16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_move_result.S b/runtime/interpreter/mterp/x86_64/op_move_result.S
new file mode 100644
index 0000000..8268344
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_move_result.S
@@ -0,0 +1,11 @@
+%default { "is_object":"0" }
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    movq    OFF_FP_RESULT_REGISTER(rFP), %rax    # get pointer to result JType.
+    movl    (%rax), %eax                    # r0 <- result.i.
+    .if $is_object
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- fp[B]
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <- fp[B]
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_move_result_object.S b/runtime/interpreter/mterp/x86_64/op_move_result_object.S
new file mode 100644
index 0000000..c5aac17
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_move_result_object.S
@@ -0,0 +1 @@
+%include "x86_64/op_move_result.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_move_result_wide.S b/runtime/interpreter/mterp/x86_64/op_move_result_wide.S
new file mode 100644
index 0000000..03de783
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_move_result_wide.S
@@ -0,0 +1,5 @@
+    /* move-result-wide vAA */
+    movq    OFF_FP_RESULT_REGISTER(rFP), %rax    # get pointer to result JType.
+    movq    (%rax), %rdx                         # Get wide
+    SET_WIDE_VREG %rdx, rINSTq                   # v[AA] <- rdx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_move_wide.S b/runtime/interpreter/mterp/x86_64/op_move_wide.S
new file mode 100644
index 0000000..508f8cc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_move_wide.S
@@ -0,0 +1,8 @@
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movl    rINST, %ecx                     # ecx <- BA
+    sarl    $$4, rINST                      # rINST <- B
+    andb    $$0xf, %cl                      # ecx <- A
+    GET_WIDE_VREG %rdx, rINSTq              # rdx <- v[B]
+    SET_WIDE_VREG %rdx, %rcx                # v[A] <- rdx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_move_wide_16.S b/runtime/interpreter/mterp/x86_64/op_move_wide_16.S
new file mode 100644
index 0000000..ce371a9
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_move_wide_16.S
@@ -0,0 +1,7 @@
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzwq  4(rPC), %rcx                    # ecx<- BBBB
+    movzwq  2(rPC), %rax                    # eax<- AAAA
+    GET_WIDE_VREG %rdx, %rcx                # rdx <- v[B]
+    SET_WIDE_VREG %rdx, %rax                # v[A] <- rdx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86_64/op_move_wide_from16.S b/runtime/interpreter/mterp/x86_64/op_move_wide_from16.S
new file mode 100644
index 0000000..0d6971a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_move_wide_from16.S
@@ -0,0 +1,6 @@
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    movzwl  2(rPC), %ecx                    # ecx <- BBBB
+    GET_WIDE_VREG %rdx, %rcx                # rdx <- v[B]
+    SET_WIDE_VREG %rdx, rINSTq              # v[A] <- rdx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_mul_double.S b/runtime/interpreter/mterp/x86_64/op_mul_double.S
new file mode 100644
index 0000000..1f4bcb3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_mul_double.S
@@ -0,0 +1 @@
+%include "x86_64/sseBinop.S" {"instr":"muls","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86_64/op_mul_double_2addr.S b/runtime/interpreter/mterp/x86_64/op_mul_double_2addr.S
new file mode 100644
index 0000000..9850a28
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_mul_double_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/sseBinop2Addr.S" {"instr":"muls","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86_64/op_mul_float.S b/runtime/interpreter/mterp/x86_64/op_mul_float.S
new file mode 100644
index 0000000..85960e9
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_mul_float.S
@@ -0,0 +1 @@
+%include "x86_64/sseBinop.S" {"instr":"muls","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86_64/op_mul_float_2addr.S b/runtime/interpreter/mterp/x86_64/op_mul_float_2addr.S
new file mode 100644
index 0000000..6d36b6a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_mul_float_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/sseBinop2Addr.S" {"instr":"muls","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86_64/op_mul_int.S b/runtime/interpreter/mterp/x86_64/op_mul_int.S
new file mode 100644
index 0000000..5f3923a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_mul_int.S
@@ -0,0 +1 @@
+%include "x86_64/binop.S" {"instr":"imull   (rFP,%rcx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_mul_int_2addr.S b/runtime/interpreter/mterp/x86_64/op_mul_int_2addr.S
new file mode 100644
index 0000000..0b5af8a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_mul_int_2addr.S
@@ -0,0 +1,8 @@
+    /* mul vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    andb    $$0xf, %cl                      # ecx <- A
+    GET_VREG %eax, %rcx                     # eax <- vA
+    imull   (rFP,rINSTq,4), %eax
+    SET_VREG %eax, %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_mul_int_lit16.S b/runtime/interpreter/mterp/x86_64/op_mul_int_lit16.S
new file mode 100644
index 0000000..a4cfdbc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_mul_int_lit16.S
@@ -0,0 +1 @@
+%include "x86_64/binopLit16.S" {"instr":"imull   %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_mul_int_lit8.S b/runtime/interpreter/mterp/x86_64/op_mul_int_lit8.S
new file mode 100644
index 0000000..89e9acb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_mul_int_lit8.S
@@ -0,0 +1 @@
+%include "x86_64/binopLit8.S" {"instr":"imull   %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_mul_long.S b/runtime/interpreter/mterp/x86_64/op_mul_long.S
new file mode 100644
index 0000000..2b85370
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_mul_long.S
@@ -0,0 +1 @@
+%include "x86_64/binopWide.S" {"instr":"imulq   (rFP,%rcx,4), %rax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_mul_long_2addr.S b/runtime/interpreter/mterp/x86_64/op_mul_long_2addr.S
new file mode 100644
index 0000000..167128b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_mul_long_2addr.S
@@ -0,0 +1,8 @@
+    /* mul vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    andb    $$0xf, %cl                      # ecx <- A
+    GET_WIDE_VREG %rax, %rcx                # rax <- vA
+    imulq   (rFP,rINSTq,4), %rax
+    SET_WIDE_VREG %rax, %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_neg_double.S b/runtime/interpreter/mterp/x86_64/op_neg_double.S
new file mode 100644
index 0000000..2c14b09
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_neg_double.S
@@ -0,0 +1 @@
+%include "x86_64/unop.S" {"preinstr":"    movq    $0x8000000000000000, %rsi", "instr":"    xorq    %rsi, %rax", "wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_neg_float.S b/runtime/interpreter/mterp/x86_64/op_neg_float.S
new file mode 100644
index 0000000..148b21e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_neg_float.S
@@ -0,0 +1 @@
+%include "x86_64/unop.S" {"instr":"    xorl    $0x80000000, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_neg_int.S b/runtime/interpreter/mterp/x86_64/op_neg_int.S
new file mode 100644
index 0000000..f90a937
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_neg_int.S
@@ -0,0 +1 @@
+%include "x86_64/unop.S" {"instr":"    negl    %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_neg_long.S b/runtime/interpreter/mterp/x86_64/op_neg_long.S
new file mode 100644
index 0000000..18fc3cc
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_neg_long.S
@@ -0,0 +1 @@
+%include "x86_64/unop.S" {"instr":"    negq    %rax", "wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_new_array.S b/runtime/interpreter/mterp/x86_64/op_new_array.S
new file mode 100644
index 0000000..9831a0b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_new_array.S
@@ -0,0 +1,18 @@
+/*
+ * Allocate an array of objects, specified with the array class
+ * and a count.
+ *
+ * The verifier guarantees that this is an array class, so we don't
+ * check for it here.
+ */
+    /* new-array vA, vB, class@CCCC */
+    EXPORT_PC
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG0
+    movq    rPC, OUT_ARG1
+    REFRESH_INST ${opnum}
+    movq    rINSTq, OUT_ARG2
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(MterpNewArray)
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_new_instance.S b/runtime/interpreter/mterp/x86_64/op_new_instance.S
new file mode 100644
index 0000000..fc8c8cd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_new_instance.S
@@ -0,0 +1,13 @@
+/*
+ * Create a new instance of a class.
+ */
+    /* new-instance vAA, class@BBBB */
+    EXPORT_PC
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG0
+    movq    rSELF, OUT_ARG1
+    REFRESH_INST ${opnum}
+    movq    rINSTq, OUT_ARG2
+    call    SYMBOL(MterpNewInstance)
+    testb   %al, %al                        # 0 means an exception is thrown
+    jz      MterpPossibleException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_nop.S b/runtime/interpreter/mterp/x86_64/op_nop.S
new file mode 100644
index 0000000..4cb68e3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_nop.S
@@ -0,0 +1 @@
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_not_int.S b/runtime/interpreter/mterp/x86_64/op_not_int.S
new file mode 100644
index 0000000..463d080
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_not_int.S
@@ -0,0 +1 @@
+%include "x86_64/unop.S" {"instr":"    notl    %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_not_long.S b/runtime/interpreter/mterp/x86_64/op_not_long.S
new file mode 100644
index 0000000..c97bb9e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_not_long.S
@@ -0,0 +1 @@
+%include "x86_64/unop.S" {"instr":"    notq    %rax", "wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_or_int.S b/runtime/interpreter/mterp/x86_64/op_or_int.S
new file mode 100644
index 0000000..730310f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_or_int.S
@@ -0,0 +1 @@
+%include "x86_64/binop.S" {"instr":"orl     (rFP,%rcx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_or_int_2addr.S b/runtime/interpreter/mterp/x86_64/op_or_int_2addr.S
new file mode 100644
index 0000000..f722e4d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_or_int_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/binop2addr.S" {"instr":"orl     %eax, (rFP,%rcx,4)"}
diff --git a/runtime/interpreter/mterp/x86_64/op_or_int_lit16.S b/runtime/interpreter/mterp/x86_64/op_or_int_lit16.S
new file mode 100644
index 0000000..fee86c7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_or_int_lit16.S
@@ -0,0 +1 @@
+%include "x86_64/binopLit16.S" {"instr":"orl     %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_or_int_lit8.S b/runtime/interpreter/mterp/x86_64/op_or_int_lit8.S
new file mode 100644
index 0000000..81104c7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_or_int_lit8.S
@@ -0,0 +1 @@
+%include "x86_64/binopLit8.S" {"instr":"orl     %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_or_long.S b/runtime/interpreter/mterp/x86_64/op_or_long.S
new file mode 100644
index 0000000..6c70a20
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_or_long.S
@@ -0,0 +1 @@
+%include "x86_64/binopWide.S" {"instr":"orq     (rFP,%rcx,4), %rax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_or_long_2addr.S b/runtime/interpreter/mterp/x86_64/op_or_long_2addr.S
new file mode 100644
index 0000000..546da1d
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_or_long_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/binopWide2addr.S" {"instr":"orq     %rax, (rFP,%rcx,4)"}
diff --git a/runtime/interpreter/mterp/x86_64/op_packed_switch.S b/runtime/interpreter/mterp/x86_64/op_packed_switch.S
new file mode 100644
index 0000000..148552f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_packed_switch.S
@@ -0,0 +1,18 @@
+%default { "func":"MterpDoPackedSwitch" }
+/*
+ * Handle a packed-switch or sparse-switch instruction.  In both cases
+ * we decode it and hand it off to a helper function.
+ *
+ * We don't really expect backward branches in a switch statement, but
+ * they're perfectly legal, so we check for them here.
+ *
+ * for: packed-switch, sparse-switch
+ */
+    /* op vAA, +BBBB */
+    movslq  2(rPC), OUT_ARG0                # rcx <- ssssssssBBBBbbbb
+    leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + ssssssssBBBBbbbb*2
+    GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
+    call    SYMBOL($func)
+    testl   %eax, %eax
+    movslq  %eax, rINSTq
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86_64/op_rem_double.S b/runtime/interpreter/mterp/x86_64/op_rem_double.S
new file mode 100644
index 0000000..00aed78
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_rem_double.S
@@ -0,0 +1,14 @@
+    /* rem_double vAA, vBB, vCC */
+    movzbq  3(rPC), %rcx                    # ecx <- BB
+    movzbq  2(rPC), %rax                    # eax <- CC
+    fldl    VREG_ADDRESS(%rcx)              # %st1 <- fp[vBB]
+    fldl    VREG_ADDRESS(%rax)              # %st0 <- fp[vCC]
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstpl   VREG_ADDRESS(rINSTq)            # fp[vAA] <- %st
+    CLEAR_WIDE_REF rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_rem_double_2addr.S b/runtime/interpreter/mterp/x86_64/op_rem_double_2addr.S
new file mode 100644
index 0000000..9768266
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_rem_double_2addr.S
@@ -0,0 +1,15 @@
+    /* rem_double/2addr vA, vB */
+    movzbq  rINSTbl, %rcx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    fldl    VREG_ADDRESS(rINSTq)            # vB to fp stack
+    andb    $$0xf, %cl                      # ecx <- A
+    fldl    VREG_ADDRESS(%rcx)              # vA to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstpl   VREG_ADDRESS(%rcx)              # %st to vA
+    CLEAR_WIDE_REF %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_rem_float.S b/runtime/interpreter/mterp/x86_64/op_rem_float.S
new file mode 100644
index 0000000..5af28ac
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_rem_float.S
@@ -0,0 +1,14 @@
+    /* rem_float vAA, vBB, vCC */
+    movzbq  3(rPC), %rcx                    # ecx <- BB
+    movzbq  2(rPC), %rax                    # eax <- CC
+    flds    VREG_ADDRESS(%rcx)              # vBB to fp stack
+    flds    VREG_ADDRESS(%rax)              # vCC to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstps   VREG_ADDRESS(rINSTq)            # %st to vAA
+    CLEAR_REF rINSTq
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_rem_float_2addr.S b/runtime/interpreter/mterp/x86_64/op_rem_float_2addr.S
new file mode 100644
index 0000000..e9282a8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_rem_float_2addr.S
@@ -0,0 +1,15 @@
+    /* rem_float/2addr vA, vB */
+    movzbq  rINSTbl, %rcx                   # ecx <- A+
+    sarl    $$4, rINST                      # rINST <- B
+    flds    VREG_ADDRESS(rINSTq)            # vB to fp stack
+    andb    $$0xf, %cl                      # ecx <- A
+    flds    VREG_ADDRESS(%rcx)              # vA to fp stack
+1:
+    fprem
+    fstsw   %ax
+    sahf
+    jp      1b
+    fstp    %st(1)
+    fstps   VREG_ADDRESS(%rcx)              # %st to vA
+    CLEAR_REF %rcx
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_rem_int.S b/runtime/interpreter/mterp/x86_64/op_rem_int.S
new file mode 100644
index 0000000..fd77d7c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_rem_int.S
@@ -0,0 +1 @@
+%include "x86_64/bindiv.S" {"result":"%edx","second":"%ecx","wide":"0","suffix":"l","rem":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_rem_int_2addr.S b/runtime/interpreter/mterp/x86_64/op_rem_int_2addr.S
new file mode 100644
index 0000000..25ffbf7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_rem_int_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/bindiv2addr.S" {"result":"%edx","second":"%ecx","wide":"0","suffix":"l","rem":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_rem_int_lit16.S b/runtime/interpreter/mterp/x86_64/op_rem_int_lit16.S
new file mode 100644
index 0000000..21cc370
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_rem_int_lit16.S
@@ -0,0 +1 @@
+%include "x86_64/bindivLit16.S" {"result":"%edx","rem":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_rem_int_lit8.S b/runtime/interpreter/mterp/x86_64/op_rem_int_lit8.S
new file mode 100644
index 0000000..2eb0150
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_rem_int_lit8.S
@@ -0,0 +1 @@
+%include "x86_64/bindivLit8.S" {"result":"%edx","rem":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_rem_long.S b/runtime/interpreter/mterp/x86_64/op_rem_long.S
new file mode 100644
index 0000000..efa7215
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_rem_long.S
@@ -0,0 +1 @@
+%include "x86_64/bindiv.S" {"result":"%rdx","second":"%rcx","wide":"1","suffix":"q","ext":"cqo","rem":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_rem_long_2addr.S b/runtime/interpreter/mterp/x86_64/op_rem_long_2addr.S
new file mode 100644
index 0000000..ce0dd86
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_rem_long_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/bindiv2addr.S" {"result":"%rdx","second":"%rcx","wide":"1","suffix":"q","rem":"1","ext":"cqo"}
diff --git a/runtime/interpreter/mterp/x86_64/op_return.S b/runtime/interpreter/mterp/x86_64/op_return.S
new file mode 100644
index 0000000..07e0e53
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_return.S
@@ -0,0 +1,15 @@
+/*
+ * Return a 32-bit value.
+ *
+ * for: return, return-object
+ */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    call    SYMBOL(MterpThreadFenceForConstructor)
+    movq    rSELF, OUT_ARG0
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    GET_VREG %eax, rINSTq                   # eax <- vAA
+    jmp     MterpReturn
diff --git a/runtime/interpreter/mterp/x86_64/op_return_object.S b/runtime/interpreter/mterp/x86_64/op_return_object.S
new file mode 100644
index 0000000..1ae69a5
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_return_object.S
@@ -0,0 +1 @@
+%include "x86_64/op_return.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_return_void.S b/runtime/interpreter/mterp/x86_64/op_return_void.S
new file mode 100644
index 0000000..6a12df3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_return_void.S
@@ -0,0 +1,9 @@
+    .extern MterpThreadFenceForConstructor
+    call    SYMBOL(MterpThreadFenceForConstructor)
+    movq    rSELF, OUT_ARG0
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    xorq    %rax, %rax
+    jmp     MterpReturn
diff --git a/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S b/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
new file mode 100644
index 0000000..822b2e8
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
@@ -0,0 +1,7 @@
+    movq    rSELF, OUT_ARG0
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    xorq    %rax, %rax
+    jmp     MterpReturn
diff --git a/runtime/interpreter/mterp/x86_64/op_return_wide.S b/runtime/interpreter/mterp/x86_64/op_return_wide.S
new file mode 100644
index 0000000..288eb96
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_return_wide.S
@@ -0,0 +1,13 @@
+/*
+ * Return a 64-bit value.
+ */
+    /* return-wide vAA */
+    .extern MterpThreadFenceForConstructor
+    call    SYMBOL(MterpThreadFenceForConstructor)
+    movq    rSELF, OUT_ARG0
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
+    call    SYMBOL(MterpSuspendCheck)
+1:
+    GET_WIDE_VREG %rax, rINSTq              # eax <- v[AA]
+    jmp     MterpReturn
diff --git a/runtime/interpreter/mterp/x86_64/op_rsub_int.S b/runtime/interpreter/mterp/x86_64/op_rsub_int.S
new file mode 100644
index 0000000..2dd2002
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_rsub_int.S
@@ -0,0 +1,2 @@
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+%include "x86_64/binopLit16.S" {"instr":"subl    %eax, %ecx","result":"%ecx"}
diff --git a/runtime/interpreter/mterp/x86_64/op_rsub_int_lit8.S b/runtime/interpreter/mterp/x86_64/op_rsub_int_lit8.S
new file mode 100644
index 0000000..64d0d8a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_rsub_int_lit8.S
@@ -0,0 +1 @@
+%include "x86_64/binopLit8.S" {"instr":"subl    %eax, %ecx" , "result":"%ecx"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sget.S b/runtime/interpreter/mterp/x86_64/op_sget.S
new file mode 100644
index 0000000..d39e6c4
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sget.S
@@ -0,0 +1,26 @@
+%default { "is_object":"0", "helper":"artGet32StaticFromCode", "wide":"0" }
+/*
+ * General SGET handler wrapper.
+ *
+ * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
+ */
+    /* op vAA, field@BBBB */
+    .extern $helper
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref CCCC
+    movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
+    movq    rSELF, OUT_ARG2                 # self
+    call    SYMBOL($helper)
+    movq    rSELF, %rcx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
+    jnz     MterpException
+    .if $is_object
+    SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
+    .else
+    .if $wide
+    SET_WIDE_VREG %rax, rINSTq              # fp[A] <- value
+    .else
+    SET_VREG %eax, rINSTq                   # fp[A] <- value
+    .endif
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_boolean.S b/runtime/interpreter/mterp/x86_64/op_sget_boolean.S
new file mode 100644
index 0000000..7d358da
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sget_boolean.S
@@ -0,0 +1 @@
+%include "x86_64/op_sget.S" {"helper":"artGetBooleanStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_byte.S b/runtime/interpreter/mterp/x86_64/op_sget_byte.S
new file mode 100644
index 0000000..79d9ff4
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sget_byte.S
@@ -0,0 +1 @@
+%include "x86_64/op_sget.S" {"helper":"artGetByteStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_char.S b/runtime/interpreter/mterp/x86_64/op_sget_char.S
new file mode 100644
index 0000000..4488610
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sget_char.S
@@ -0,0 +1 @@
+%include "x86_64/op_sget.S" {"helper":"artGetCharStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_object.S b/runtime/interpreter/mterp/x86_64/op_sget_object.S
new file mode 100644
index 0000000..09b627e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sget_object.S
@@ -0,0 +1 @@
+%include "x86_64/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_short.S b/runtime/interpreter/mterp/x86_64/op_sget_short.S
new file mode 100644
index 0000000..47ac238
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sget_short.S
@@ -0,0 +1 @@
+%include "x86_64/op_sget.S" {"helper":"artGetShortStaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_wide.S b/runtime/interpreter/mterp/x86_64/op_sget_wide.S
new file mode 100644
index 0000000..aa22343
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sget_wide.S
@@ -0,0 +1 @@
+%include "x86_64/op_sget.S" {"helper":"artGet64StaticFromCode", "wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_shl_int.S b/runtime/interpreter/mterp/x86_64/op_shl_int.S
new file mode 100644
index 0000000..fa1edb7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_shl_int.S
@@ -0,0 +1 @@
+%include "x86_64/binop1.S" {"instr":"sall    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_shl_int_2addr.S b/runtime/interpreter/mterp/x86_64/op_shl_int_2addr.S
new file mode 100644
index 0000000..dd96279
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_shl_int_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/shop2addr.S" {"instr":"sall    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_shl_int_lit8.S b/runtime/interpreter/mterp/x86_64/op_shl_int_lit8.S
new file mode 100644
index 0000000..39b23ae
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_shl_int_lit8.S
@@ -0,0 +1 @@
+%include "x86_64/binopLit8.S" {"instr":"sall    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_shl_long.S b/runtime/interpreter/mterp/x86_64/op_shl_long.S
new file mode 100644
index 0000000..fdc7cb6
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_shl_long.S
@@ -0,0 +1 @@
+%include "x86_64/binop1.S" {"instr":"salq    %cl, %rax","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_shl_long_2addr.S b/runtime/interpreter/mterp/x86_64/op_shl_long_2addr.S
new file mode 100644
index 0000000..546633f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_shl_long_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/shop2addr.S" {"instr":"salq    %cl, %rax","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_shr_int.S b/runtime/interpreter/mterp/x86_64/op_shr_int.S
new file mode 100644
index 0000000..fc289f4
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_shr_int.S
@@ -0,0 +1 @@
+%include "x86_64/binop1.S" {"instr":"sarl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_shr_int_2addr.S b/runtime/interpreter/mterp/x86_64/op_shr_int_2addr.S
new file mode 100644
index 0000000..0e5bca7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_shr_int_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/shop2addr.S" {"instr":"sarl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_shr_int_lit8.S b/runtime/interpreter/mterp/x86_64/op_shr_int_lit8.S
new file mode 100644
index 0000000..3cc9307
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_shr_int_lit8.S
@@ -0,0 +1 @@
+%include "x86_64/binopLit8.S" {"instr":"sarl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_shr_long.S b/runtime/interpreter/mterp/x86_64/op_shr_long.S
new file mode 100644
index 0000000..25028d3
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_shr_long.S
@@ -0,0 +1 @@
+%include "x86_64/binop1.S" {"instr":"sarq    %cl, %rax","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_shr_long_2addr.S b/runtime/interpreter/mterp/x86_64/op_shr_long_2addr.S
new file mode 100644
index 0000000..3738413
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_shr_long_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/shop2addr.S" {"instr":"sarq    %cl, %rax","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sparse_switch.S b/runtime/interpreter/mterp/x86_64/op_sparse_switch.S
new file mode 100644
index 0000000..0eaa514
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sparse_switch.S
@@ -0,0 +1 @@
+%include "x86_64/op_packed_switch.S" { "func":"MterpDoSparseSwitch" }
diff --git a/runtime/interpreter/mterp/x86_64/op_sput.S b/runtime/interpreter/mterp/x86_64/op_sput.S
new file mode 100644
index 0000000..e92b032
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sput.S
@@ -0,0 +1,17 @@
+%default { "helper":"artSet32StaticFromCode"}
+/*
+ * General SPUT handler wrapper.
+ *
+ * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+ */
+    /* op vAA, field@BBBB */
+    .extern $helper
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref BBBB
+    GET_VREG OUT_32_ARG1, rINSTq            # fp[AA]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
+    movq    rSELF, OUT_ARG3                 # self
+    call    SYMBOL($helper)
+    testb   %al, %al
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_sput_boolean.S b/runtime/interpreter/mterp/x86_64/op_sput_boolean.S
new file mode 100644
index 0000000..8718915
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sput_boolean.S
@@ -0,0 +1 @@
+%include "x86_64/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sput_byte.S b/runtime/interpreter/mterp/x86_64/op_sput_byte.S
new file mode 100644
index 0000000..8718915
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sput_byte.S
@@ -0,0 +1 @@
+%include "x86_64/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sput_char.S b/runtime/interpreter/mterp/x86_64/op_sput_char.S
new file mode 100644
index 0000000..2fe9d14
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sput_char.S
@@ -0,0 +1 @@
+%include "x86_64/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sput_object.S b/runtime/interpreter/mterp/x86_64/op_sput_object.S
new file mode 100644
index 0000000..eb5a376
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sput_object.S
@@ -0,0 +1,10 @@
+    EXPORT_PC
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG0
+    movq    rPC, OUT_ARG1
+    REFRESH_INST ${opnum}
+    movq    rINSTq, OUT_ARG2
+    movq    rSELF, OUT_ARG3
+    call    SYMBOL(MterpSputObject)
+    testb   %al, %al
+    jz      MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_sput_short.S b/runtime/interpreter/mterp/x86_64/op_sput_short.S
new file mode 100644
index 0000000..2fe9d14
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sput_short.S
@@ -0,0 +1 @@
+%include "x86_64/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sput_wide.S b/runtime/interpreter/mterp/x86_64/op_sput_wide.S
new file mode 100644
index 0000000..c4bc269
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sput_wide.S
@@ -0,0 +1,15 @@
+/*
+ * SPUT_WIDE handler wrapper.
+ *
+ */
+    /* sput-wide vAA, field@BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    movzwq  2(rPC), OUT_ARG0                # field ref BBBB
+    movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
+    leaq    VREG_ADDRESS(rINSTq), OUT_ARG2  # &fp[AA]
+    movq    rSELF, OUT_ARG3                 # self
+    call    SYMBOL(artSet64IndirectStaticFromMterp)
+    testb   %al, %al
+    jnz     MterpException
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_sub_double.S b/runtime/interpreter/mterp/x86_64/op_sub_double.S
new file mode 100644
index 0000000..952667e
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sub_double.S
@@ -0,0 +1 @@
+%include "x86_64/sseBinop.S" {"instr":"subs","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sub_double_2addr.S b/runtime/interpreter/mterp/x86_64/op_sub_double_2addr.S
new file mode 100644
index 0000000..0bd5dbb
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sub_double_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/sseBinop2Addr.S" {"instr":"subs","suff":"d"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sub_float.S b/runtime/interpreter/mterp/x86_64/op_sub_float.S
new file mode 100644
index 0000000..ea0ae14
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sub_float.S
@@ -0,0 +1 @@
+%include "x86_64/sseBinop.S" {"instr":"subs","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sub_float_2addr.S b/runtime/interpreter/mterp/x86_64/op_sub_float_2addr.S
new file mode 100644
index 0000000..9dd1780
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sub_float_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/sseBinop2Addr.S" {"instr":"subs","suff":"s"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sub_int.S b/runtime/interpreter/mterp/x86_64/op_sub_int.S
new file mode 100644
index 0000000..560394f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sub_int.S
@@ -0,0 +1 @@
+%include "x86_64/binop.S" {"instr":"subl    (rFP,%rcx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sub_int_2addr.S b/runtime/interpreter/mterp/x86_64/op_sub_int_2addr.S
new file mode 100644
index 0000000..6f50f78
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sub_int_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/binop2addr.S" {"instr":"subl    %eax, (rFP,%rcx,4)"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sub_long.S b/runtime/interpreter/mterp/x86_64/op_sub_long.S
new file mode 100644
index 0000000..7fa54e7
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sub_long.S
@@ -0,0 +1 @@
+%include "x86_64/binopWide.S" {"instr":"subq    (rFP,%rcx,4), %rax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sub_long_2addr.S b/runtime/interpreter/mterp/x86_64/op_sub_long_2addr.S
new file mode 100644
index 0000000..c18be10
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_sub_long_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/binopWide2addr.S" {"instr":"subq    %rax, (rFP,%rcx,4)"}
diff --git a/runtime/interpreter/mterp/x86_64/op_throw.S b/runtime/interpreter/mterp/x86_64/op_throw.S
new file mode 100644
index 0000000..8095c25
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_throw.S
@@ -0,0 +1,11 @@
+/*
+ * Throw an exception object in the current thread.
+ */
+    /* throw vAA */
+    EXPORT_PC
+    GET_VREG %eax, rINSTq                   # eax<- vAA (exception object)
+    testb   %al, %al
+    jz      common_errNullObject
+    movq    rSELF, %rcx
+    movq    %rax, THREAD_EXCEPTION_OFFSET(%rcx)
+    jmp     MterpException
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_3e.S b/runtime/interpreter/mterp/x86_64/op_unused_3e.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_3e.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_3f.S b/runtime/interpreter/mterp/x86_64/op_unused_3f.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_3f.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_40.S b/runtime/interpreter/mterp/x86_64/op_unused_40.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_40.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_41.S b/runtime/interpreter/mterp/x86_64/op_unused_41.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_41.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_42.S b/runtime/interpreter/mterp/x86_64/op_unused_42.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_42.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_43.S b/runtime/interpreter/mterp/x86_64/op_unused_43.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_43.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_79.S b/runtime/interpreter/mterp/x86_64/op_unused_79.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_79.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_7a.S b/runtime/interpreter/mterp/x86_64/op_unused_7a.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_7a.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f3.S b/runtime/interpreter/mterp/x86_64/op_unused_f3.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f3.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f4.S b/runtime/interpreter/mterp/x86_64/op_unused_f4.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f4.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f5.S b/runtime/interpreter/mterp/x86_64/op_unused_f5.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f5.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f6.S b/runtime/interpreter/mterp/x86_64/op_unused_f6.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f6.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f7.S b/runtime/interpreter/mterp/x86_64/op_unused_f7.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f7.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f8.S b/runtime/interpreter/mterp/x86_64/op_unused_f8.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f8.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f9.S b/runtime/interpreter/mterp/x86_64/op_unused_f9.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f9.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_fa.S b/runtime/interpreter/mterp/x86_64/op_unused_fa.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_fa.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_fb.S b/runtime/interpreter/mterp/x86_64/op_unused_fb.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_fb.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_fc.S b/runtime/interpreter/mterp/x86_64/op_unused_fc.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_fc.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_fd.S b/runtime/interpreter/mterp/x86_64/op_unused_fd.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_fd.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_fe.S b/runtime/interpreter/mterp/x86_64/op_unused_fe.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_fe.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_ff.S b/runtime/interpreter/mterp/x86_64/op_unused_ff.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_ff.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_ushr_int.S b/runtime/interpreter/mterp/x86_64/op_ushr_int.S
new file mode 100644
index 0000000..dd91086
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_ushr_int.S
@@ -0,0 +1 @@
+%include "x86_64/binop1.S" {"instr":"shrl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_ushr_int_2addr.S b/runtime/interpreter/mterp/x86_64/op_ushr_int_2addr.S
new file mode 100644
index 0000000..d38aedd
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_ushr_int_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/shop2addr.S" {"instr":"shrl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_ushr_int_lit8.S b/runtime/interpreter/mterp/x86_64/op_ushr_int_lit8.S
new file mode 100644
index 0000000..f7ff8ab
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_ushr_int_lit8.S
@@ -0,0 +1 @@
+%include "x86_64/binopLit8.S" {"instr":"shrl    %cl, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_ushr_long.S b/runtime/interpreter/mterp/x86_64/op_ushr_long.S
new file mode 100644
index 0000000..7c6daca
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_ushr_long.S
@@ -0,0 +1 @@
+%include "x86_64/binop1.S" {"instr":"shrq    %cl, %rax","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_ushr_long_2addr.S b/runtime/interpreter/mterp/x86_64/op_ushr_long_2addr.S
new file mode 100644
index 0000000..cd6a22c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_ushr_long_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/shop2addr.S" {"instr":"shrq    %cl, %rax","wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_xor_int.S b/runtime/interpreter/mterp/x86_64/op_xor_int.S
new file mode 100644
index 0000000..b295d74
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_xor_int.S
@@ -0,0 +1 @@
+%include "x86_64/binop.S" {"instr":"xorl    (rFP,%rcx,4), %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_xor_int_2addr.S b/runtime/interpreter/mterp/x86_64/op_xor_int_2addr.S
new file mode 100644
index 0000000..879bfc0
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_xor_int_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/binop2addr.S" {"instr":"xorl    %eax, (rFP,%rcx,4)"}
diff --git a/runtime/interpreter/mterp/x86_64/op_xor_int_lit16.S b/runtime/interpreter/mterp/x86_64/op_xor_int_lit16.S
new file mode 100644
index 0000000..5d375a1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_xor_int_lit16.S
@@ -0,0 +1 @@
+%include "x86_64/binopLit16.S" {"instr":"xorl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_xor_int_lit8.S b/runtime/interpreter/mterp/x86_64/op_xor_int_lit8.S
new file mode 100644
index 0000000..54cce9c
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_xor_int_lit8.S
@@ -0,0 +1 @@
+%include "x86_64/binopLit8.S" {"instr":"xorl    %ecx, %eax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_xor_long.S b/runtime/interpreter/mterp/x86_64/op_xor_long.S
new file mode 100644
index 0000000..52b44e2
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_xor_long.S
@@ -0,0 +1 @@
+%include "x86_64/binopWide.S" {"instr":"xorq    (rFP,%rcx,4), %rax"}
diff --git a/runtime/interpreter/mterp/x86_64/op_xor_long_2addr.S b/runtime/interpreter/mterp/x86_64/op_xor_long_2addr.S
new file mode 100644
index 0000000..d75c4ba
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_xor_long_2addr.S
@@ -0,0 +1 @@
+%include "x86_64/binopWide2addr.S" {"instr":"xorq    %rax, (rFP,%rcx,4)"}
diff --git a/runtime/interpreter/mterp/x86_64/shop2addr.S b/runtime/interpreter/mterp/x86_64/shop2addr.S
new file mode 100644
index 0000000..6b06d00
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/shop2addr.S
@@ -0,0 +1,19 @@
+%default {"wide":"0"}
+/*
+ * Generic 32-bit "shift/2addr" operation.
+ */
+    /* shift/2addr vA, vB */
+    movl    rINST, %ecx                     # ecx <- BA
+    sarl    $$4, %ecx                       # ecx <- B
+    GET_VREG %ecx, %rcx                     # ecx <- vBB
+    andb    $$0xf, rINSTbl                  # rINST <- A
+    .if $wide
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vAA
+    $instr                                  # ex: sarl %cl, %eax
+    SET_WIDE_VREG %rax, rINSTq
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vAA
+    $instr                                  # ex: sarl %cl, %eax
+    SET_VREG %eax, rINSTq
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/sseBinop.S b/runtime/interpreter/mterp/x86_64/sseBinop.S
new file mode 100644
index 0000000..09d3364
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/sseBinop.S
@@ -0,0 +1,9 @@
+%default {"instr":"","suff":""}
+    movzbq  2(rPC), %rcx                    # ecx <- BB
+    movzbq  3(rPC), %rax                    # eax <- CC
+    movs${suff}   VREG_ADDRESS(%rcx), %xmm0       # %xmm0 <- 1st src
+    ${instr}${suff} VREG_ADDRESS(%rax), %xmm0
+    movs${suff}   %xmm0, VREG_ADDRESS(rINSTq)     # vAA <- %xmm0
+    pxor    %xmm0, %xmm0
+    movs${suff}   %xmm0, VREG_REF_ADDRESS(rINSTq) # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/sseBinop2Addr.S b/runtime/interpreter/mterp/x86_64/sseBinop2Addr.S
new file mode 100644
index 0000000..084166b
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/sseBinop2Addr.S
@@ -0,0 +1,10 @@
+%default {"instr":"","suff":""}
+    movl    rINST, %ecx                     # ecx <- A+
+    andl    $$0xf, %ecx                     # ecx <- A
+    movs${suff} VREG_ADDRESS(%rcx), %xmm0        # %xmm0 <- 1st src
+    sarl    $$4, rINST                      # rINST<- B
+    ${instr}${suff} VREG_ADDRESS(rINSTq), %xmm0
+    movs${suff} %xmm0, VREG_ADDRESS(%rcx)   # vAA<- %xmm0
+    pxor    %xmm0, %xmm0
+    movs${suff} %xmm0, VREG_REF_ADDRESS(rINSTq)  # clear ref
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/unop.S b/runtime/interpreter/mterp/x86_64/unop.S
new file mode 100644
index 0000000..1777123
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/unop.S
@@ -0,0 +1,22 @@
+%default {"preinstr":"", "instr":"", "wide":"0"}
+/*
+ * Generic 32/64-bit unary operation.  Provide an "instr" line that
+ * specifies an instruction that performs "result = op eax".
+ */
+    /* unop vA, vB */
+    movl    rINST, %ecx                     # rcx <- A+
+    sarl    $$4,rINST                       # rINST <- B
+    .if ${wide}
+    GET_WIDE_VREG %rax, rINSTq              # rax <- vB
+    .else
+    GET_VREG %eax, rINSTq                   # eax <- vB
+    .endif
+    andb    $$0xf,%cl                       # ecx <- A
+$preinstr
+$instr
+    .if ${wide}
+    SET_WIDE_VREG %rax, %rcx
+    .else
+    SET_VREG %eax, %rcx
+    .endif
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/unused.S b/runtime/interpreter/mterp/x86_64/unused.S
new file mode 100644
index 0000000..c95ef94
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/unused.S
@@ -0,0 +1,4 @@
+/*
+ * Bail to reference interpreter to throw.
+ */
+    jmp     MterpFallback
diff --git a/runtime/interpreter/mterp/x86_64/zcmp.S b/runtime/interpreter/mterp/x86_64/zcmp.S
new file mode 100644
index 0000000..fb8ae6a
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/zcmp.S
@@ -0,0 +1,17 @@
+/*
+ * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+ * fragment that specifies the *reverse* comparison to perform, e.g.
+ * for "if-le" you would use "gt".
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+    /* if-cmp vAA, +BBBB */
+    cmpl    $$0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
+    j${revcmp}   1f
+    movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
+1:
+    cmpl    $$JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 92b6e4f..a0e0e62 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -16,26 +16,37 @@
 
 #include "unstarted_runtime.h"
 
+#include <ctype.h>
+#include <errno.h>
+#include <stdlib.h>
+
 #include <cmath>
+#include <limits>
+#include <locale>
 #include <unordered_map>
 
 #include "ScopedLocalRef.h"
 
 #include "art_method-inl.h"
+#include "base/casts.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "class_linker.h"
 #include "common_throws.h"
 #include "entrypoints/entrypoint_utils-inl.h"
+#include "gc/reference_processor.h"
 #include "handle_scope-inl.h"
 #include "interpreter/interpreter_common.h"
 #include "mirror/array-inl.h"
 #include "mirror/class.h"
 #include "mirror/field-inl.h"
+#include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
 #include "nth_caller_visitor.h"
+#include "reflection.h"
 #include "thread.h"
 #include "transaction.h"
 #include "well_known_classes.h"
@@ -64,6 +75,43 @@
   }
 }
 
+// Restricted support for character upper case / lower case. Only support ASCII, where
+// it's easy. Abort the transaction otherwise.
+static void CharacterLowerUpper(Thread* self,
+                                ShadowFrame* shadow_frame,
+                                JValue* result,
+                                size_t arg_offset,
+                                bool to_lower_case) SHARED_REQUIRES(Locks::mutator_lock_) {
+  uint32_t int_value = static_cast<uint32_t>(shadow_frame->GetVReg(arg_offset));
+
+  // Only ASCII (7-bit).
+  if (!isascii(int_value)) {
+    AbortTransactionOrFail(self,
+                           "Only support ASCII characters for toLowerCase/toUpperCase: %u",
+                           int_value);
+    return;
+  }
+
+  std::locale c_locale("C");
+  char char_value = static_cast<char>(int_value);
+
+  if (to_lower_case) {
+    result->SetI(std::tolower(char_value, c_locale));
+  } else {
+    result->SetI(std::toupper(char_value, c_locale));
+  }
+}
+
+void UnstartedRuntime::UnstartedCharacterToLowerCase(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  CharacterLowerUpper(self, shadow_frame, result, arg_offset, true);
+}
+
+void UnstartedRuntime::UnstartedCharacterToUpperCase(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  CharacterLowerUpper(self, shadow_frame, result, arg_offset, false);
+}
+
 // Helper function to deal with class loading in an unstarted runtime.
 static void UnstartedRuntimeFindClass(Thread* self, Handle<mirror::String> className,
                                       Handle<mirror::ClassLoader> class_loader, JValue* result,
@@ -128,8 +176,13 @@
   }
   StackHandleScope<1> hs(self);
   Handle<mirror::String> h_class_name(hs.NewHandle(class_name));
-  UnstartedRuntimeFindClass(self, h_class_name, NullHandle<mirror::ClassLoader>(), result,
-                            "Class.forName", true, false);
+  UnstartedRuntimeFindClass(self,
+                            h_class_name,
+                            ScopedNullHandle<mirror::ClassLoader>(),
+                            result,
+                            "Class.forName",
+                            true,
+                            false);
   CheckExceptionGenerateClassNotFound(self);
 }
 
@@ -249,11 +302,272 @@
                            PrettyDescriptor(klass).c_str());
     return;
   }
-  if (Runtime::Current()->IsActiveTransaction()) {
-    result->SetL(mirror::Field::CreateFromArtField<true>(self, found, true));
+  Runtime* runtime = Runtime::Current();
+  PointerSize pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
+  mirror::Field* field;
+  if (runtime->IsActiveTransaction()) {
+    if (pointer_size == PointerSize::k64) {
+      field = mirror::Field::CreateFromArtField<PointerSize::k64, true>(
+          self, found, true);
+    } else {
+      field = mirror::Field::CreateFromArtField<PointerSize::k32, true>(
+          self, found, true);
+    }
   } else {
-    result->SetL(mirror::Field::CreateFromArtField<false>(self, found, true));
+    if (pointer_size == PointerSize::k64) {
+      field = mirror::Field::CreateFromArtField<PointerSize::k64, false>(
+          self, found, true);
+    } else {
+      field = mirror::Field::CreateFromArtField<PointerSize::k32, false>(
+          self, found, true);
+    }
   }
+  result->SetL(field);
+}
+
+// This is required for Enum(Set) code, as that uses reflection to inspect enum classes.
+void UnstartedRuntime::UnstartedClassGetDeclaredMethod(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  // Special managed code cut-out to allow method lookup in a un-started runtime.
+  mirror::Class* klass = shadow_frame->GetVRegReference(arg_offset)->AsClass();
+  if (klass == nullptr) {
+    ThrowNullPointerExceptionForMethodAccess(shadow_frame->GetMethod(), InvokeType::kVirtual);
+    return;
+  }
+  mirror::String* name = shadow_frame->GetVRegReference(arg_offset + 1)->AsString();
+  mirror::ObjectArray<mirror::Class>* args =
+      shadow_frame->GetVRegReference(arg_offset + 2)->AsObjectArray<mirror::Class>();
+  Runtime* runtime = Runtime::Current();
+  bool transaction = runtime->IsActiveTransaction();
+  PointerSize pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
+  mirror::Method* method;
+  if (transaction) {
+    if (pointer_size == PointerSize::k64) {
+      method = mirror::Class::GetDeclaredMethodInternal<PointerSize::k64, true>(
+          self, klass, name, args);
+    } else {
+      method = mirror::Class::GetDeclaredMethodInternal<PointerSize::k32, true>(
+          self, klass, name, args);
+    }
+  } else {
+    if (pointer_size == PointerSize::k64) {
+      method = mirror::Class::GetDeclaredMethodInternal<PointerSize::k64, false>(
+          self, klass, name, args);
+    } else {
+      method = mirror::Class::GetDeclaredMethodInternal<PointerSize::k32, false>(
+          self, klass, name, args);
+    }
+  }
+  result->SetL(method);
+}
+
+// Special managed code cut-out to allow constructor lookup in a un-started runtime.
+void UnstartedRuntime::UnstartedClassGetDeclaredConstructor(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  mirror::Class* klass = shadow_frame->GetVRegReference(arg_offset)->AsClass();
+  if (klass == nullptr) {
+    ThrowNullPointerExceptionForMethodAccess(shadow_frame->GetMethod(), InvokeType::kVirtual);
+    return;
+  }
+  mirror::ObjectArray<mirror::Class>* args =
+      shadow_frame->GetVRegReference(arg_offset + 1)->AsObjectArray<mirror::Class>();
+  Runtime* runtime = Runtime::Current();
+  bool transaction = runtime->IsActiveTransaction();
+  PointerSize pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
+  mirror::Constructor* constructor;
+  if (transaction) {
+    if (pointer_size == PointerSize::k64) {
+      constructor = mirror::Class::GetDeclaredConstructorInternal<PointerSize::k64,
+                                                                  true>(self, klass, args);
+    } else {
+      constructor = mirror::Class::GetDeclaredConstructorInternal<PointerSize::k32,
+                                                                  true>(self, klass, args);
+    }
+  } else {
+    if (pointer_size == PointerSize::k64) {
+      constructor = mirror::Class::GetDeclaredConstructorInternal<PointerSize::k64,
+                                                                  false>(self, klass, args);
+    } else {
+      constructor = mirror::Class::GetDeclaredConstructorInternal<PointerSize::k32,
+                                                                  false>(self, klass, args);
+    }
+  }
+  result->SetL(constructor);
+}
+
+void UnstartedRuntime::UnstartedClassGetEnclosingClass(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> klass(hs.NewHandle(shadow_frame->GetVRegReference(arg_offset)->AsClass()));
+  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+    result->SetL(nullptr);
+  }
+  result->SetL(klass->GetDexFile().GetEnclosingClass(klass));
+}
+
+void UnstartedRuntime::UnstartedClassGetInnerClassFlags(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> klass(hs.NewHandle(
+      reinterpret_cast<mirror::Class*>(shadow_frame->GetVRegReference(arg_offset))));
+  const int32_t default_value = shadow_frame->GetVReg(arg_offset + 1);
+  result->SetI(mirror::Class::GetInnerClassFlags(klass, default_value));
+}
+
+static std::unique_ptr<MemMap> FindAndExtractEntry(const std::string& jar_file,
+                                                   const char* entry_name,
+                                                   size_t* size,
+                                                   std::string* error_msg) {
+  CHECK(size != nullptr);
+
+  std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(jar_file.c_str(), error_msg));
+  if (zip_archive == nullptr) {
+    return nullptr;;
+  }
+  std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(entry_name, error_msg));
+  if (zip_entry == nullptr) {
+    return nullptr;
+  }
+  std::unique_ptr<MemMap> tmp_map(
+      zip_entry->ExtractToMemMap(jar_file.c_str(), entry_name, error_msg));
+  if (tmp_map == nullptr) {
+    return nullptr;
+  }
+
+  // OK, from here everything seems fine.
+  *size = zip_entry->GetUncompressedLength();
+  return tmp_map;
+}
+
+static void GetResourceAsStream(Thread* self,
+                                ShadowFrame* shadow_frame,
+                                JValue* result,
+                                size_t arg_offset) SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::Object* resource_obj = shadow_frame->GetVRegReference(arg_offset + 1);
+  if (resource_obj == nullptr) {
+    AbortTransactionOrFail(self, "null name for getResourceAsStream");
+    return;
+  }
+  CHECK(resource_obj->IsString());
+  mirror::String* resource_name = resource_obj->AsString();
+
+  std::string resource_name_str = resource_name->ToModifiedUtf8();
+  if (resource_name_str.empty() || resource_name_str == "/") {
+    AbortTransactionOrFail(self,
+                           "Unsupported name %s for getResourceAsStream",
+                           resource_name_str.c_str());
+    return;
+  }
+  const char* resource_cstr = resource_name_str.c_str();
+  if (resource_cstr[0] == '/') {
+    resource_cstr++;
+  }
+
+  Runtime* runtime = Runtime::Current();
+
+  std::vector<std::string> split;
+  Split(runtime->GetBootClassPathString(), ':', &split);
+  if (split.empty()) {
+    AbortTransactionOrFail(self,
+                           "Boot classpath not set or split error:: %s",
+                           runtime->GetBootClassPathString().c_str());
+    return;
+  }
+
+  std::unique_ptr<MemMap> mem_map;
+  size_t map_size;
+  std::string last_error_msg;  // Only store the last message (we could concatenate).
+
+  for (const std::string& jar_file : split) {
+    mem_map = FindAndExtractEntry(jar_file, resource_cstr, &map_size, &last_error_msg);
+    if (mem_map != nullptr) {
+      break;
+    }
+  }
+
+  if (mem_map == nullptr) {
+    // Didn't find it. There's a good chance this will be the same at runtime, but still
+    // conservatively abort the transaction here.
+    AbortTransactionOrFail(self,
+                           "Could not find resource %s. Last error was %s.",
+                           resource_name_str.c_str(),
+                           last_error_msg.c_str());
+    return;
+  }
+
+  StackHandleScope<3> hs(self);
+
+  // Create byte array for content.
+  Handle<mirror::ByteArray> h_array(hs.NewHandle(mirror::ByteArray::Alloc(self, map_size)));
+  if (h_array.Get() == nullptr) {
+    AbortTransactionOrFail(self, "Could not find/create byte array class");
+    return;
+  }
+  // Copy in content.
+  memcpy(h_array->GetData(), mem_map->Begin(), map_size);
+  // Be proactive releasing memory.
+  mem_map.release();
+
+  // Create a ByteArrayInputStream.
+  Handle<mirror::Class> h_class(hs.NewHandle(
+      runtime->GetClassLinker()->FindClass(self,
+                                           "Ljava/io/ByteArrayInputStream;",
+                                           ScopedNullHandle<mirror::ClassLoader>())));
+  if (h_class.Get() == nullptr) {
+    AbortTransactionOrFail(self, "Could not find ByteArrayInputStream class");
+    return;
+  }
+  if (!runtime->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
+    AbortTransactionOrFail(self, "Could not initialize ByteArrayInputStream class");
+    return;
+  }
+
+  Handle<mirror::Object> h_obj(hs.NewHandle(h_class->AllocObject(self)));
+  if (h_obj.Get() == nullptr) {
+    AbortTransactionOrFail(self, "Could not allocate ByteArrayInputStream object");
+    return;
+  }
+
+  auto* cl = Runtime::Current()->GetClassLinker();
+  ArtMethod* constructor = h_class->FindDeclaredDirectMethod(
+      "<init>", "([B)V", cl->GetImagePointerSize());
+  if (constructor == nullptr) {
+    AbortTransactionOrFail(self, "Could not find ByteArrayInputStream constructor");
+    return;
+  }
+
+  uint32_t args[1];
+  args[0] = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(h_array.Get()));
+  EnterInterpreterFromInvoke(self, constructor, h_obj.Get(), args, nullptr);
+
+  if (self->IsExceptionPending()) {
+    AbortTransactionOrFail(self, "Could not run ByteArrayInputStream constructor");
+    return;
+  }
+
+  result->SetL(h_obj.Get());
+}
+
+void UnstartedRuntime::UnstartedClassLoaderGetResourceAsStream(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  {
+    mirror::Object* this_obj = shadow_frame->GetVRegReference(arg_offset);
+    CHECK(this_obj != nullptr);
+    CHECK(this_obj->IsClassLoader());
+
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Class> this_classloader_class(hs.NewHandle(this_obj->GetClass()));
+
+    if (self->DecodeJObject(WellKnownClasses::java_lang_BootClassLoader) !=
+            this_classloader_class.Get()) {
+      AbortTransactionOrFail(self,
+                            "Unsupported classloader type %s for getResourceAsStream",
+                            PrettyClass(this_classloader_class.Get()).c_str());
+      return;
+    }
+  }
+
+  GetResourceAsStream(self, shadow_frame, result, arg_offset);
 }
 
 void UnstartedRuntime::UnstartedVmClassLoaderFindLoadedClass(
@@ -317,28 +631,35 @@
   jint src_pos = shadow_frame->GetVReg(arg_offset + 1);
   jint dst_pos = shadow_frame->GetVReg(arg_offset + 3);
   jint length = shadow_frame->GetVReg(arg_offset + 4);
-  mirror::Array* src_array = shadow_frame->GetVRegReference(arg_offset)->AsArray();
-  mirror::Array* dst_array = shadow_frame->GetVRegReference(arg_offset + 2)->AsArray();
 
-  // Null checking.
-  if (src_array == nullptr) {
+  mirror::Object* src_obj = shadow_frame->GetVRegReference(arg_offset);
+  mirror::Object* dst_obj = shadow_frame->GetVRegReference(arg_offset + 2);
+  // Null checking. For simplicity, abort transaction.
+  if (src_obj == nullptr) {
     AbortTransactionOrFail(self, "src is null in arraycopy.");
     return;
   }
-  if (dst_array == nullptr) {
+  if (dst_obj == nullptr) {
     AbortTransactionOrFail(self, "dst is null in arraycopy.");
     return;
   }
+  // Test for arrayness. Throw ArrayStoreException.
+  if (!src_obj->IsArrayInstance() || !dst_obj->IsArrayInstance()) {
+    self->ThrowNewException("Ljava/lang/ArrayStoreException;", "src or trg is not an array");
+    return;
+  }
 
-  // Bounds checking.
+  mirror::Array* src_array = src_obj->AsArray();
+  mirror::Array* dst_array = dst_obj->AsArray();
+
+  // Bounds checking. Throw IndexOutOfBoundsException.
   if (UNLIKELY(src_pos < 0) || UNLIKELY(dst_pos < 0) || UNLIKELY(length < 0) ||
       UNLIKELY(src_pos > src_array->GetLength() - length) ||
       UNLIKELY(dst_pos > dst_array->GetLength() - length)) {
-    self->ThrowNewExceptionF("Ljava/lang/ArrayIndexOutOfBoundsException;",
+    self->ThrowNewExceptionF("Ljava/lang/IndexOutOfBoundsException;",
                              "src.length=%d srcPos=%d dst.length=%d dstPos=%d length=%d",
                              src_array->GetLength(), src_pos, dst_array->GetLength(), dst_pos,
                              length);
-    AbortTransactionOrFail(self, "Index out of bounds.");
     return;
   }
 
@@ -357,19 +678,11 @@
       return;
     }
 
-    // For simplicity only do this if the component types are the same. Otherwise we have to copy
-    // even more code from the object-array functions.
-    if (src_type != trg_type) {
-      AbortTransactionOrFail(self, "Types not the same in arraycopy: %s vs %s",
-                             PrettyDescriptor(src_array->GetClass()->GetComponentType()).c_str(),
-                             PrettyDescriptor(dst_array->GetClass()->GetComponentType()).c_str());
-      return;
-    }
-
     mirror::ObjectArray<mirror::Object>* src = src_array->AsObjectArray<mirror::Object>();
     mirror::ObjectArray<mirror::Object>* dst = dst_array->AsObjectArray<mirror::Object>();
     if (src == dst) {
       // Can overlap, but not have type mismatches.
+      // We cannot use ObjectArray::MemMove here, as it doesn't support transactions.
       const bool copy_forward = (dst_pos < src_pos) || (dst_pos - src_pos >= length);
       if (copy_forward) {
         for (int32_t i = 0; i < length; ++i) {
@@ -381,11 +694,19 @@
         }
       }
     } else {
-      // Can't overlap. Would need type checks, but we abort above.
-      for (int32_t i = 0; i < length; ++i) {
-        dst->Set(dst_pos + i, src->Get(src_pos + i));
+      // We're being lazy here. Optimally this could be a memcpy (if component types are
+      // assignable), but the ObjectArray implementation doesn't support transactions. The
+      // checking version, however, does.
+      if (Runtime::Current()->IsActiveTransaction()) {
+        dst->AssignableCheckingMemcpy<true>(
+            dst_pos, src, src_pos, length, true /* throw_exception */);
+      } else {
+        dst->AssignableCheckingMemcpy<false>(
+                    dst_pos, src, src_pos, length, true /* throw_exception */);
       }
     }
+  } else if (src_type->IsPrimitiveByte()) {
+    PrimitiveArrayCopy<uint8_t>(self, src_array, src_pos, dst_array, dst_pos, length);
   } else if (src_type->IsPrimitiveChar()) {
     PrimitiveArrayCopy<uint16_t>(self, src_array, src_pos, dst_array, dst_pos, length);
   } else if (src_type->IsPrimitiveInt()) {
@@ -396,6 +717,12 @@
   }
 }
 
+void UnstartedRuntime::UnstartedSystemArraycopyByte(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  // Just forward.
+  UnstartedRuntime::UnstartedSystemArraycopy(self, shadow_frame, result, arg_offset);
+}
+
 void UnstartedRuntime::UnstartedSystemArraycopyChar(
     Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
   // Just forward.
@@ -408,46 +735,137 @@
   UnstartedRuntime::UnstartedSystemArraycopy(self, shadow_frame, result, arg_offset);
 }
 
+void UnstartedRuntime::UnstartedSystemGetSecurityManager(
+    Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame ATTRIBUTE_UNUSED,
+    JValue* result, size_t arg_offset ATTRIBUTE_UNUSED) {
+  result->SetL(nullptr);
+}
+
+static constexpr const char* kAndroidHardcodedSystemPropertiesFieldName = "STATIC_PROPERTIES";
+
+static void GetSystemProperty(Thread* self,
+                              ShadowFrame* shadow_frame,
+                              JValue* result,
+                              size_t arg_offset,
+                              bool is_default_version)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  StackHandleScope<4> hs(self);
+  Handle<mirror::String> h_key(
+      hs.NewHandle(reinterpret_cast<mirror::String*>(shadow_frame->GetVRegReference(arg_offset))));
+  if (h_key.Get() == nullptr) {
+    AbortTransactionOrFail(self, "getProperty key was null");
+    return;
+  }
+
+  // This is overall inefficient, but reflecting the values here is not great, either. So
+  // for simplicity, and with the assumption that the number of getProperty calls is not
+  // too great, just iterate each time.
+
+  // Get the storage class.
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  Handle<mirror::Class> h_props_class(hs.NewHandle(
+      class_linker->FindClass(self,
+                              "Ljava/lang/AndroidHardcodedSystemProperties;",
+                              ScopedNullHandle<mirror::ClassLoader>())));
+  if (h_props_class.Get() == nullptr) {
+    AbortTransactionOrFail(self, "Could not find AndroidHardcodedSystemProperties");
+    return;
+  }
+  if (!class_linker->EnsureInitialized(self, h_props_class, true, true)) {
+    AbortTransactionOrFail(self, "Could not initialize AndroidHardcodedSystemProperties");
+    return;
+  }
+
+  // Get the storage array.
+  ArtField* static_properties =
+      h_props_class->FindDeclaredStaticField(kAndroidHardcodedSystemPropertiesFieldName,
+                                             "[[Ljava/lang/String;");
+  if (static_properties == nullptr) {
+    AbortTransactionOrFail(self,
+                           "Could not find %s field",
+                           kAndroidHardcodedSystemPropertiesFieldName);
+    return;
+  }
+  Handle<mirror::ObjectArray<mirror::ObjectArray<mirror::String>>> h_2string_array(
+      hs.NewHandle(reinterpret_cast<mirror::ObjectArray<mirror::ObjectArray<mirror::String>>*>(
+          static_properties->GetObject(h_props_class.Get()))));
+  if (h_2string_array.Get() == nullptr) {
+    AbortTransactionOrFail(self, "Field %s is null", kAndroidHardcodedSystemPropertiesFieldName);
+    return;
+  }
+
+  // Iterate over it.
+  const int32_t prop_count = h_2string_array->GetLength();
+  // Use the third handle as mutable.
+  MutableHandle<mirror::ObjectArray<mirror::String>> h_string_array(
+      hs.NewHandle<mirror::ObjectArray<mirror::String>>(nullptr));
+  for (int32_t i = 0; i < prop_count; ++i) {
+    h_string_array.Assign(h_2string_array->Get(i));
+    if (h_string_array.Get() == nullptr ||
+        h_string_array->GetLength() != 2 ||
+        h_string_array->Get(0) == nullptr) {
+      AbortTransactionOrFail(self,
+                             "Unexpected content of %s",
+                             kAndroidHardcodedSystemPropertiesFieldName);
+      return;
+    }
+    if (h_key->Equals(h_string_array->Get(0))) {
+      // Found a value.
+      if (h_string_array->Get(1) == nullptr && is_default_version) {
+        // Null is being delegated to the default map, and then resolved to the given default value.
+        // As there's no default map, return the given value.
+        result->SetL(shadow_frame->GetVRegReference(arg_offset + 1));
+      } else {
+        result->SetL(h_string_array->Get(1));
+      }
+      return;
+    }
+  }
+
+  // Key is not supported.
+  AbortTransactionOrFail(self, "getProperty key %s not supported", h_key->ToModifiedUtf8().c_str());
+}
+
+void UnstartedRuntime::UnstartedSystemGetProperty(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  GetSystemProperty(self, shadow_frame, result, arg_offset, false);
+}
+
+void UnstartedRuntime::UnstartedSystemGetPropertyWithDefault(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  GetSystemProperty(self, shadow_frame, result, arg_offset, true);
+}
+
 void UnstartedRuntime::UnstartedThreadLocalGet(
     Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset ATTRIBUTE_UNUSED) {
   std::string caller(PrettyMethod(shadow_frame->GetLink()->GetMethod()));
   bool ok = false;
-  if (caller == "java.lang.String java.lang.IntegralToString.convertInt"
-                "(java.lang.AbstractStringBuilder, int)") {
+  if (caller == "void java.lang.FloatingDecimal.developLongDigits(int, long, long)" ||
+      caller == "java.lang.String java.lang.FloatingDecimal.toJavaFormatString()") {
     // Allocate non-threadlocal buffer.
-    result->SetL(mirror::CharArray::Alloc(self, 11));
+    result->SetL(mirror::CharArray::Alloc(self, 26));
     ok = true;
-  } else if (caller == "java.lang.RealToString java.lang.RealToString.getInstance()") {
-    // Note: RealToString is implemented and used in a different fashion than IntegralToString.
-    // Conversion is done over an actual object of RealToString (the conversion method is an
-    // instance method). This means it is not as clear whether it is correct to return a new
-    // object each time. The caller needs to be inspected by hand to see whether it (incorrectly)
-    // stores the object for later use.
-    // See also b/19548084 for a possible rewrite and bringing it in line with IntegralToString.
-    if (shadow_frame->GetLink()->GetLink() != nullptr) {
-      std::string caller2(PrettyMethod(shadow_frame->GetLink()->GetLink()->GetMethod()));
-      if (caller2 == "java.lang.String java.lang.Double.toString(double)") {
-        // Allocate new object.
-        StackHandleScope<2> hs(self);
-        Handle<mirror::Class> h_real_to_string_class(hs.NewHandle(
-            shadow_frame->GetLink()->GetMethod()->GetDeclaringClass()));
-        Handle<mirror::Object> h_real_to_string_obj(hs.NewHandle(
-            h_real_to_string_class->AllocObject(self)));
-        if (h_real_to_string_obj.Get() != nullptr) {
-          auto* cl = Runtime::Current()->GetClassLinker();
-          ArtMethod* init_method = h_real_to_string_class->FindDirectMethod(
-              "<init>", "()V", cl->GetImagePointerSize());
-          if (init_method == nullptr) {
-            h_real_to_string_class->DumpClass(LOG(FATAL), mirror::Class::kDumpClassFullDetail);
-          } else {
-            JValue invoke_result;
-            EnterInterpreterFromInvoke(self, init_method, h_real_to_string_obj.Get(), nullptr,
-                                       nullptr);
-            if (!self->IsExceptionPending()) {
-              result->SetL(h_real_to_string_obj.Get());
-              ok = true;
-            }
-          }
+  } else if (caller ==
+             "java.lang.FloatingDecimal java.lang.FloatingDecimal.getThreadLocalInstance()") {
+    // Allocate new object.
+    StackHandleScope<2> hs(self);
+    Handle<mirror::Class> h_real_to_string_class(hs.NewHandle(
+        shadow_frame->GetLink()->GetMethod()->GetDeclaringClass()));
+    Handle<mirror::Object> h_real_to_string_obj(hs.NewHandle(
+        h_real_to_string_class->AllocObject(self)));
+    if (h_real_to_string_obj.Get() != nullptr) {
+      auto* cl = Runtime::Current()->GetClassLinker();
+      ArtMethod* init_method = h_real_to_string_class->FindDirectMethod(
+          "<init>", "()V", cl->GetImagePointerSize());
+      if (init_method == nullptr) {
+        h_real_to_string_class->DumpClass(LOG(FATAL), mirror::Class::kDumpClassFullDetail);
+      } else {
+        JValue invoke_result;
+        EnterInterpreterFromInvoke(self, init_method, h_real_to_string_obj.Get(), nullptr,
+                                   nullptr);
+        if (!self->IsExceptionPending()) {
+          result->SetL(h_real_to_string_obj.Get());
+          ok = true;
         }
       }
     }
@@ -460,17 +878,28 @@
 
 void UnstartedRuntime::UnstartedMathCeil(
     Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
-  double in = shadow_frame->GetVRegDouble(arg_offset);
-  double out;
-  // Special cases:
-  // 1) NaN, infinity, +0, -0 -> out := in. All are guaranteed by cmath.
-  // -1 < in < 0 -> out := -0.
-  if (-1.0 < in && in < 0) {
-    out = -0.0;
-  } else {
-    out = ceil(in);
-  }
-  result->SetD(out);
+  result->SetD(ceil(shadow_frame->GetVRegDouble(arg_offset)));
+}
+
+void UnstartedRuntime::UnstartedMathFloor(
+    Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  result->SetD(floor(shadow_frame->GetVRegDouble(arg_offset)));
+}
+
+void UnstartedRuntime::UnstartedMathSin(
+    Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  result->SetD(sin(shadow_frame->GetVRegDouble(arg_offset)));
+}
+
+void UnstartedRuntime::UnstartedMathCos(
+    Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  result->SetD(cos(shadow_frame->GetVRegDouble(arg_offset)));
+}
+
+void UnstartedRuntime::UnstartedMathPow(
+    Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  result->SetD(pow(shadow_frame->GetVRegDouble(arg_offset),
+                   shadow_frame->GetVRegDouble(arg_offset + 2)));
 }
 
 void UnstartedRuntime::UnstartedObjectHashCode(
@@ -649,98 +1078,6 @@
   UnstartedMemoryPeekArray(Primitive::kPrimByte, self, shadow_frame, arg_offset);
 }
 
-// This allows reading security.properties in an unstarted runtime and initialize Security.
-void UnstartedRuntime::UnstartedSecurityGetSecurityPropertiesReader(
-    Thread* self, ShadowFrame* shadow_frame ATTRIBUTE_UNUSED, JValue* result,
-    size_t arg_offset ATTRIBUTE_UNUSED) {
-  Runtime* runtime = Runtime::Current();
-  const std::vector<const DexFile*>& path = runtime->GetClassLinker()->GetBootClassPath();
-  std::string canonical(DexFile::GetDexCanonicalLocation(path[0]->GetLocation().c_str()));
-  mirror::String* string_data;
-
-  // Use a block to enclose the I/O and MemMap code so buffers are released early.
-  {
-    std::string error_msg;
-    std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(canonical.c_str(), &error_msg));
-    if (zip_archive.get() == nullptr) {
-      AbortTransactionOrFail(self, "Could not open zip file %s: %s", canonical.c_str(),
-                             error_msg.c_str());
-      return;
-    }
-    std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find("java/security/security.properties",
-                                                          &error_msg));
-    if (zip_entry.get() == nullptr) {
-      AbortTransactionOrFail(self, "Could not find security.properties file in %s: %s",
-                             canonical.c_str(), error_msg.c_str());
-      return;
-    }
-    std::unique_ptr<MemMap> map(zip_entry->ExtractToMemMap(canonical.c_str(),
-                                                           "java/security/security.properties",
-                                                           &error_msg));
-    if (map.get() == nullptr) {
-      AbortTransactionOrFail(self, "Could not unzip security.properties file in %s: %s",
-                             canonical.c_str(), error_msg.c_str());
-      return;
-    }
-
-    uint32_t length = zip_entry->GetUncompressedLength();
-    std::unique_ptr<char[]> tmp(new char[length + 1]);
-    memcpy(tmp.get(), map->Begin(), length);
-    tmp.get()[length] = 0;  // null terminator
-
-    string_data = mirror::String::AllocFromModifiedUtf8(self, tmp.get());
-  }
-
-  if (string_data == nullptr) {
-    AbortTransactionOrFail(self, "Could not create string from file content of %s",
-                           canonical.c_str());
-    return;
-  }
-
-  // Create a StringReader.
-  StackHandleScope<3> hs(self);
-  Handle<mirror::String> h_string(hs.NewHandle(string_data));
-
-  Handle<mirror::Class> h_class(hs.NewHandle(
-      runtime->GetClassLinker()->FindClass(self,
-                                           "Ljava/io/StringReader;",
-                                           NullHandle<mirror::ClassLoader>())));
-  if (h_class.Get() == nullptr) {
-    AbortTransactionOrFail(self, "Could not find StringReader class");
-    return;
-  }
-
-  if (!runtime->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
-    AbortTransactionOrFail(self, "Could not initialize StringReader class");
-    return;
-  }
-
-  Handle<mirror::Object> h_obj(hs.NewHandle(h_class->AllocObject(self)));
-  if (h_obj.Get() == nullptr) {
-    AbortTransactionOrFail(self, "Could not allocate StringReader object");
-    return;
-  }
-
-  auto* cl = Runtime::Current()->GetClassLinker();
-  ArtMethod* constructor = h_class->FindDeclaredDirectMethod(
-      "<init>", "(Ljava/lang/String;)V", cl->GetImagePointerSize());
-  if (constructor == nullptr) {
-    AbortTransactionOrFail(self, "Could not find StringReader constructor");
-    return;
-  }
-
-  uint32_t args[1];
-  args[0] = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(h_string.Get()));
-  EnterInterpreterFromInvoke(self, constructor, h_obj.Get(), args, nullptr);
-
-  if (self->IsExceptionPending()) {
-    AbortTransactionOrFail(self, "Could not run StringReader constructor");
-    return;
-  }
-
-  result->SetL(h_obj.Get());
-}
-
 // This allows reading the new style of String objects during compilation.
 void UnstartedRuntime::UnstartedStringGetCharsNoCheck(
     Thread* self, ShadowFrame* shadow_frame, JValue* result ATTRIBUTE_UNUSED, size_t arg_offset) {
@@ -845,6 +1182,290 @@
   result->SetL(string->ToCharArray(self));
 }
 
+// This allows statically initializing ConcurrentHashMap and SynchronousQueue.
+void UnstartedRuntime::UnstartedReferenceGetReferent(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  mirror::Reference* const ref = down_cast<mirror::Reference*>(
+      shadow_frame->GetVRegReference(arg_offset));
+  if (ref == nullptr) {
+    AbortTransactionOrFail(self, "Reference.getReferent() with null object");
+    return;
+  }
+  mirror::Object* const referent =
+      Runtime::Current()->GetHeap()->GetReferenceProcessor()->GetReferent(self, ref);
+  result->SetL(referent);
+}
+
+// This allows statically initializing ConcurrentHashMap and SynchronousQueue. We use a somewhat
+// conservative upper bound. We restrict the callers to SynchronousQueue and ConcurrentHashMap,
+// where we can predict the behavior (somewhat).
+// Note: this is required (instead of lazy initialization) as these classes are used in the static
+//       initialization of other classes, so will *use* the value.
+void UnstartedRuntime::UnstartedRuntimeAvailableProcessors(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset ATTRIBUTE_UNUSED) {
+  std::string caller(PrettyMethod(shadow_frame->GetLink()->GetMethod()));
+  if (caller == "void java.util.concurrent.SynchronousQueue.<clinit>()") {
+    // SynchronousQueue really only separates between single- and multiprocessor case. Return
+    // 8 as a conservative upper approximation.
+    result->SetI(8);
+  } else if (caller == "void java.util.concurrent.ConcurrentHashMap.<clinit>()") {
+    // ConcurrentHashMap uses it for striding. 8 still seems an OK general value, as it's likely
+    // a good upper bound.
+    // TODO: Consider resetting in the zygote?
+    result->SetI(8);
+  } else {
+    // Not supported.
+    AbortTransactionOrFail(self, "Accessing availableProcessors not allowed");
+  }
+}
+
+// This allows accessing ConcurrentHashMap/SynchronousQueue.
+
+void UnstartedRuntime::UnstartedUnsafeCompareAndSwapLong(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  // Argument 0 is the Unsafe instance, skip.
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg_offset + 1);
+  if (obj == nullptr) {
+    AbortTransactionOrFail(self, "Cannot access null object, retry at runtime.");
+    return;
+  }
+  int64_t offset = shadow_frame->GetVRegLong(arg_offset + 2);
+  int64_t expectedValue = shadow_frame->GetVRegLong(arg_offset + 4);
+  int64_t newValue = shadow_frame->GetVRegLong(arg_offset + 6);
+
+  // Must use non transactional mode.
+  if (kUseReadBarrier) {
+    // Need to make sure the reference stored in the field is a to-space one before attempting the
+    // CAS or the CAS could fail incorrectly.
+    mirror::HeapReference<mirror::Object>* field_addr =
+        reinterpret_cast<mirror::HeapReference<mirror::Object>*>(
+            reinterpret_cast<uint8_t*>(obj) + static_cast<size_t>(offset));
+    ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /*kAlwaysUpdateField*/true>(
+        obj,
+        MemberOffset(offset),
+        field_addr);
+  }
+  bool success;
+  // Check whether we're in a transaction, call accordingly.
+  if (Runtime::Current()->IsActiveTransaction()) {
+    success = obj->CasFieldStrongSequentiallyConsistent64<true>(MemberOffset(offset),
+                                                                expectedValue,
+                                                                newValue);
+  } else {
+    success = obj->CasFieldStrongSequentiallyConsistent64<false>(MemberOffset(offset),
+                                                                 expectedValue,
+                                                                 newValue);
+  }
+  result->SetZ(success ? 1 : 0);
+}
+
+void UnstartedRuntime::UnstartedUnsafeCompareAndSwapObject(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  // Argument 0 is the Unsafe instance, skip.
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg_offset + 1);
+  if (obj == nullptr) {
+    AbortTransactionOrFail(self, "Cannot access null object, retry at runtime.");
+    return;
+  }
+  int64_t offset = shadow_frame->GetVRegLong(arg_offset + 2);
+  mirror::Object* expected_value = shadow_frame->GetVRegReference(arg_offset + 4);
+  mirror::Object* newValue = shadow_frame->GetVRegReference(arg_offset + 5);
+
+  // Must use non transactional mode.
+  if (kUseReadBarrier) {
+    // Need to make sure the reference stored in the field is a to-space one before attempting the
+    // CAS or the CAS could fail incorrectly.
+    mirror::HeapReference<mirror::Object>* field_addr =
+        reinterpret_cast<mirror::HeapReference<mirror::Object>*>(
+            reinterpret_cast<uint8_t*>(obj) + static_cast<size_t>(offset));
+    ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /*kAlwaysUpdateField*/true>(
+        obj,
+        MemberOffset(offset),
+        field_addr);
+  }
+  bool success;
+  // Check whether we're in a transaction, call accordingly.
+  if (Runtime::Current()->IsActiveTransaction()) {
+    success = obj->CasFieldStrongSequentiallyConsistentObject<true>(MemberOffset(offset),
+                                                                    expected_value,
+                                                                    newValue);
+  } else {
+    success = obj->CasFieldStrongSequentiallyConsistentObject<false>(MemberOffset(offset),
+                                                                     expected_value,
+                                                                     newValue);
+  }
+  result->SetZ(success ? 1 : 0);
+}
+
+void UnstartedRuntime::UnstartedUnsafeGetObjectVolatile(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  // Argument 0 is the Unsafe instance, skip.
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg_offset + 1);
+  if (obj == nullptr) {
+    AbortTransactionOrFail(self, "Cannot access null object, retry at runtime.");
+    return;
+  }
+  int64_t offset = shadow_frame->GetVRegLong(arg_offset + 2);
+  mirror::Object* value = obj->GetFieldObjectVolatile<mirror::Object>(MemberOffset(offset));
+  result->SetL(value);
+}
+
+void UnstartedRuntime::UnstartedUnsafePutObjectVolatile(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result ATTRIBUTE_UNUSED, size_t arg_offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  // Argument 0 is the Unsafe instance, skip.
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg_offset + 1);
+  if (obj == nullptr) {
+    AbortTransactionOrFail(self, "Cannot access null object, retry at runtime.");
+    return;
+  }
+  int64_t offset = shadow_frame->GetVRegLong(arg_offset + 2);
+  mirror::Object* value = shadow_frame->GetVRegReference(arg_offset + 4);
+  if (Runtime::Current()->IsActiveTransaction()) {
+    obj->SetFieldObjectVolatile<true>(MemberOffset(offset), value);
+  } else {
+    obj->SetFieldObjectVolatile<false>(MemberOffset(offset), value);
+  }
+}
+
+void UnstartedRuntime::UnstartedUnsafePutOrderedObject(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result ATTRIBUTE_UNUSED, size_t arg_offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  // Argument 0 is the Unsafe instance, skip.
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg_offset + 1);
+  if (obj == nullptr) {
+    AbortTransactionOrFail(self, "Cannot access null object, retry at runtime.");
+    return;
+  }
+  int64_t offset = shadow_frame->GetVRegLong(arg_offset + 2);
+  mirror::Object* newValue = shadow_frame->GetVRegReference(arg_offset + 4);
+  QuasiAtomic::ThreadFenceRelease();
+  if (Runtime::Current()->IsActiveTransaction()) {
+    obj->SetFieldObject<true>(MemberOffset(offset), newValue);
+  } else {
+    obj->SetFieldObject<false>(MemberOffset(offset), newValue);
+  }
+}
+
+// A cutout for Integer.parseInt(String). Note: this code is conservative and will bail instead
+// of correctly handling the corner cases.
+void UnstartedRuntime::UnstartedIntegerParseInt(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg_offset);
+  if (obj == nullptr) {
+    AbortTransactionOrFail(self, "Cannot parse null string, retry at runtime.");
+    return;
+  }
+
+  std::string string_value = obj->AsString()->ToModifiedUtf8();
+  if (string_value.empty()) {
+    AbortTransactionOrFail(self, "Cannot parse empty string, retry at runtime.");
+    return;
+  }
+
+  const char* c_str = string_value.c_str();
+  char *end;
+  // Can we set errno to 0? Is this always a variable, and not a macro?
+  // Worst case, we'll incorrectly fail a transaction. Seems OK.
+  int64_t l = strtol(c_str, &end, 10);
+
+  if ((errno == ERANGE && l == LONG_MAX) || l > std::numeric_limits<int32_t>::max() ||
+      (errno == ERANGE && l == LONG_MIN) || l < std::numeric_limits<int32_t>::min()) {
+    AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+    return;
+  }
+  if (l == 0) {
+    // Check whether the string wasn't exactly zero.
+    if (string_value != "0") {
+      AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+      return;
+    }
+  } else if (*end != '\0') {
+    AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+    return;
+  }
+
+  result->SetI(static_cast<int32_t>(l));
+}
+
+// A cutout for Long.parseLong.
+//
+// Note: for now use code equivalent to Integer.parseInt, as the full range may not be supported
+//       well.
+void UnstartedRuntime::UnstartedLongParseLong(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg_offset);
+  if (obj == nullptr) {
+    AbortTransactionOrFail(self, "Cannot parse null string, retry at runtime.");
+    return;
+  }
+
+  std::string string_value = obj->AsString()->ToModifiedUtf8();
+  if (string_value.empty()) {
+    AbortTransactionOrFail(self, "Cannot parse empty string, retry at runtime.");
+    return;
+  }
+
+  const char* c_str = string_value.c_str();
+  char *end;
+  // Can we set errno to 0? Is this always a variable, and not a macro?
+  // Worst case, we'll incorrectly fail a transaction. Seems OK.
+  int64_t l = strtol(c_str, &end, 10);
+
+  // Note: comparing against int32_t min/max is intentional here.
+  if ((errno == ERANGE && l == LONG_MAX) || l > std::numeric_limits<int32_t>::max() ||
+      (errno == ERANGE && l == LONG_MIN) || l < std::numeric_limits<int32_t>::min()) {
+    AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+    return;
+  }
+  if (l == 0) {
+    // Check whether the string wasn't exactly zero.
+    if (string_value != "0") {
+      AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+      return;
+    }
+  } else if (*end != '\0') {
+    AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+    return;
+  }
+
+  result->SetJ(l);
+}
+
+void UnstartedRuntime::UnstartedMethodInvoke(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JNIEnvExt* env = self->GetJniEnv();
+  ScopedObjectAccessUnchecked soa(self);
+
+  mirror::Object* java_method_obj = shadow_frame->GetVRegReference(arg_offset);
+  ScopedLocalRef<jobject> java_method(env,
+      java_method_obj == nullptr ? nullptr :env->AddLocalReference<jobject>(java_method_obj));
+
+  mirror::Object* java_receiver_obj = shadow_frame->GetVRegReference(arg_offset + 1);
+  ScopedLocalRef<jobject> java_receiver(env,
+      java_receiver_obj == nullptr ? nullptr : env->AddLocalReference<jobject>(java_receiver_obj));
+
+  mirror::Object* java_args_obj = shadow_frame->GetVRegReference(arg_offset + 2);
+  ScopedLocalRef<jobject> java_args(env,
+      java_args_obj == nullptr ? nullptr : env->AddLocalReference<jobject>(java_args_obj));
+
+  ScopedLocalRef<jobject> result_jobj(env,
+      InvokeMethod(soa, java_method.get(), java_receiver.get(), java_args.get()));
+
+  result->SetL(self->DecodeJObject(result_jobj.get()));
+
+  // Conservatively flag all exceptions as transaction aborts. This way we don't need to unwrap
+  // InvocationTargetExceptions.
+  if (self->IsExceptionPending()) {
+    AbortTransactionOrFail(self, "Failed Method.invoke");
+  }
+}
+
+
 void UnstartedRuntime::UnstartedJNIVMRuntimeNewUnpaddedArray(
     Thread* self, ArtMethod* method ATTRIBUTE_UNUSED, mirror::Object* receiver ATTRIBUTE_UNUSED,
     uint32_t* args, JValue* result) {
@@ -891,6 +1512,17 @@
   result->SetD(exp(value.GetD()));
 }
 
+void UnstartedRuntime::UnstartedJNIAtomicLongVMSupportsCS8(
+    Thread* self ATTRIBUTE_UNUSED,
+    ArtMethod* method ATTRIBUTE_UNUSED,
+    mirror::Object* receiver ATTRIBUTE_UNUSED,
+    uint32_t* args ATTRIBUTE_UNUSED,
+    JValue* result) {
+  result->SetZ(QuasiAtomic::LongAtomicsUseMutexes(Runtime::Current()->GetInstructionSet())
+                   ? 0
+                   : 1);
+}
+
 void UnstartedRuntime::UnstartedJNIClassGetNameNative(
     Thread* self, ArtMethod* method ATTRIBUTE_UNUSED, mirror::Object* receiver,
     uint32_t* args ATTRIBUTE_UNUSED, JValue* result) {
@@ -898,6 +1530,13 @@
   result->SetL(mirror::Class::ComputeName(hs.NewHandle(receiver->AsClass())));
 }
 
+void UnstartedRuntime::UnstartedJNIDoubleLongBitsToDouble(
+    Thread* self ATTRIBUTE_UNUSED, ArtMethod* method ATTRIBUTE_UNUSED,
+    mirror::Object* receiver ATTRIBUTE_UNUSED, uint32_t* args, JValue* result) {
+  uint64_t long_input = args[0] | (static_cast<uint64_t>(args[1]) << 32);
+  result->SetD(bit_cast<double>(long_input));
+}
+
 void UnstartedRuntime::UnstartedJNIFloatFloatToRawIntBits(
     Thread* self ATTRIBUTE_UNUSED, ArtMethod* method ATTRIBUTE_UNUSED,
     mirror::Object* receiver ATTRIBUTE_UNUSED, uint32_t* args, JValue* result) {
@@ -1017,6 +1656,19 @@
   result->SetZ(success ? JNI_TRUE : JNI_FALSE);
 }
 
+void UnstartedRuntime::UnstartedJNIUnsafeGetIntVolatile(
+    Thread* self, ArtMethod* method ATTRIBUTE_UNUSED, mirror::Object* receiver ATTRIBUTE_UNUSED,
+    uint32_t* args, JValue* result) {
+  mirror::Object* obj = reinterpret_cast<mirror::Object*>(args[0]);
+  if (obj == nullptr) {
+    AbortTransactionOrFail(self, "Cannot access null object, retry at runtime.");
+    return;
+  }
+
+  jlong offset = (static_cast<uint64_t>(args[2]) << 32) | args[1];
+  result->SetI(obj->GetField32Volatile(MemberOffset(offset)));
+}
+
 void UnstartedRuntime::UnstartedJNIUnsafePutObject(
     Thread* self ATTRIBUTE_UNUSED, ArtMethod* method ATTRIBUTE_UNUSED,
     mirror::Object* receiver ATTRIBUTE_UNUSED, uint32_t* args, JValue* result ATTRIBUTE_UNUSED) {
@@ -1096,7 +1748,13 @@
   if (iter != invoke_handlers_.end()) {
     // Clear out the result in case it's not zeroed out.
     result->SetL(0);
+
+    // Push the shadow frame. This is so the failing method can be seen in abort dumps.
+    self->PushShadowFrame(shadow_frame);
+
     (*iter->second)(self, shadow_frame, result, arg_offset);
+
+    self->PopShadowFrame();
   } else {
     // Not special, continue with regular interpreter execution.
     ArtInterpreterToInterpreterBridge(self, code_item, shadow_frame, result);
diff --git a/runtime/interpreter/unstarted_runtime_list.h b/runtime/interpreter/unstarted_runtime_list.h
index 047e906..b8553b5 100644
--- a/runtime/interpreter/unstarted_runtime_list.h
+++ b/runtime/interpreter/unstarted_runtime_list.h
@@ -19,18 +19,33 @@
 
 // Methods that intercept available libcore implementations.
 #define UNSTARTED_RUNTIME_DIRECT_LIST(V)    \
+  V(CharacterToLowerCase, "int java.lang.Character.toLowerCase(int)") \
+  V(CharacterToUpperCase, "int java.lang.Character.toUpperCase(int)") \
   V(ClassForName, "java.lang.Class java.lang.Class.forName(java.lang.String)") \
   V(ClassForNameLong, "java.lang.Class java.lang.Class.forName(java.lang.String, boolean, java.lang.ClassLoader)") \
   V(ClassClassForName, "java.lang.Class java.lang.Class.classForName(java.lang.String, boolean, java.lang.ClassLoader)") \
   V(ClassNewInstance, "java.lang.Object java.lang.Class.newInstance()") \
   V(ClassGetDeclaredField, "java.lang.reflect.Field java.lang.Class.getDeclaredField(java.lang.String)") \
+  V(ClassGetDeclaredMethod, "java.lang.reflect.Method java.lang.Class.getDeclaredMethodInternal(java.lang.String, java.lang.Class[])") \
+  V(ClassGetDeclaredConstructor, "java.lang.reflect.Constructor java.lang.Class.getDeclaredConstructorInternal(java.lang.Class[])") \
+  V(ClassGetEnclosingClass, "java.lang.Class java.lang.Class.getEnclosingClass()") \
+  V(ClassGetInnerClassFlags, "int java.lang.Class.getInnerClassFlags(int)") \
+  V(ClassLoaderGetResourceAsStream, "java.io.InputStream java.lang.ClassLoader.getResourceAsStream(java.lang.String)") \
   V(VmClassLoaderFindLoadedClass, "java.lang.Class java.lang.VMClassLoader.findLoadedClass(java.lang.ClassLoader, java.lang.String)") \
   V(VoidLookupType, "java.lang.Class java.lang.Void.lookupType()") \
   V(SystemArraycopy, "void java.lang.System.arraycopy(java.lang.Object, int, java.lang.Object, int, int)") \
+  V(SystemArraycopyByte, "void java.lang.System.arraycopy(byte[], int, byte[], int, int)") \
   V(SystemArraycopyChar, "void java.lang.System.arraycopy(char[], int, char[], int, int)") \
   V(SystemArraycopyInt, "void java.lang.System.arraycopy(int[], int, int[], int, int)") \
+  V(SystemGetSecurityManager, "java.lang.SecurityManager java.lang.System.getSecurityManager()") \
+  V(SystemGetProperty, "java.lang.String java.lang.System.getProperty(java.lang.String)") \
+  V(SystemGetPropertyWithDefault, "java.lang.String java.lang.System.getProperty(java.lang.String, java.lang.String)") \
   V(ThreadLocalGet, "java.lang.Object java.lang.ThreadLocal.get()") \
   V(MathCeil, "double java.lang.Math.ceil(double)") \
+  V(MathFloor, "double java.lang.Math.floor(double)") \
+  V(MathSin, "double java.lang.Math.sin(double)") \
+  V(MathCos, "double java.lang.Math.cos(double)") \
+  V(MathPow, "double java.lang.Math.pow(double, double)") \
   V(ObjectHashCode, "int java.lang.Object.hashCode()") \
   V(DoubleDoubleToRawLongBits, "long java.lang.Double.doubleToRawLongBits(double)") \
   V(DexCacheGetDexNative, "com.android.dex.Dex java.lang.DexCache.getDexNative()") \
@@ -39,14 +54,23 @@
   V(MemoryPeekInt, "int libcore.io.Memory.peekIntNative(long)") \
   V(MemoryPeekLong, "long libcore.io.Memory.peekLongNative(long)") \
   V(MemoryPeekByteArray, "void libcore.io.Memory.peekByteArray(long, byte[], int, int)") \
-  V(SecurityGetSecurityPropertiesReader, "java.io.Reader java.security.Security.getSecurityPropertiesReader()") \
+  V(MethodInvoke, "java.lang.Object java.lang.reflect.Method.invoke(java.lang.Object, java.lang.Object[])") \
+  V(ReferenceGetReferent, "java.lang.Object java.lang.ref.Reference.getReferent()") \
+  V(RuntimeAvailableProcessors, "int java.lang.Runtime.availableProcessors()") \
   V(StringGetCharsNoCheck, "void java.lang.String.getCharsNoCheck(int, int, char[], int)") \
   V(StringCharAt, "char java.lang.String.charAt(int)") \
   V(StringSetCharAt, "void java.lang.String.setCharAt(int, char)") \
   V(StringFactoryNewStringFromChars, "java.lang.String java.lang.StringFactory.newStringFromChars(int, int, char[])") \
   V(StringFactoryNewStringFromString, "java.lang.String java.lang.StringFactory.newStringFromString(java.lang.String)") \
   V(StringFastSubstring, "java.lang.String java.lang.String.fastSubstring(int, int)") \
-  V(StringToCharArray, "char[] java.lang.String.toCharArray()")
+  V(StringToCharArray, "char[] java.lang.String.toCharArray()") \
+  V(UnsafeCompareAndSwapLong, "boolean sun.misc.Unsafe.compareAndSwapLong(java.lang.Object, long, long, long)") \
+  V(UnsafeCompareAndSwapObject, "boolean sun.misc.Unsafe.compareAndSwapObject(java.lang.Object, long, java.lang.Object, java.lang.Object)") \
+  V(UnsafeGetObjectVolatile, "java.lang.Object sun.misc.Unsafe.getObjectVolatile(java.lang.Object, long)") \
+  V(UnsafePutObjectVolatile, "void sun.misc.Unsafe.putObjectVolatile(java.lang.Object, long, java.lang.Object)") \
+  V(UnsafePutOrderedObject, "void sun.misc.Unsafe.putOrderedObject(java.lang.Object, long, java.lang.Object)") \
+  V(IntegerParseInt, "int java.lang.Integer.parseInt(java.lang.String)") \
+  V(LongParseLong, "long java.lang.Long.parseLong(java.lang.String)")
 
 // Methods that are native.
 #define UNSTARTED_RUNTIME_JNI_LIST(V)           \
@@ -55,7 +79,9 @@
   V(VMStackGetStackClass2, "java.lang.Class dalvik.system.VMStack.getStackClass2()") \
   V(MathLog, "double java.lang.Math.log(double)") \
   V(MathExp, "double java.lang.Math.exp(double)") \
+  V(AtomicLongVMSupportsCS8, "boolean java.util.concurrent.atomic.AtomicLong.VMSupportsCS8()") \
   V(ClassGetNameNative, "java.lang.String java.lang.Class.getNameNative()") \
+  V(DoubleLongBitsToDouble, "double java.lang.Double.longBitsToDouble(long)") \
   V(FloatFloatToRawIntBits, "int java.lang.Float.floatToRawIntBits(float)") \
   V(FloatIntBitsToFloat, "float java.lang.Float.intBitsToFloat(int)") \
   V(ObjectInternalClone, "java.lang.Object java.lang.Object.internalClone()") \
@@ -69,6 +95,7 @@
   V(SystemIdentityHashCode, "int java.lang.System.identityHashCode(java.lang.Object)") \
   V(ByteOrderIsLittleEndian, "boolean java.nio.ByteOrder.isLittleEndian()") \
   V(UnsafeCompareAndSwapInt, "boolean sun.misc.Unsafe.compareAndSwapInt(java.lang.Object, long, int, int)") \
+  V(UnsafeGetIntVolatile, "int sun.misc.Unsafe.getIntVolatile(java.lang.Object, long)") \
   V(UnsafePutObject, "void sun.misc.Unsafe.putObject(java.lang.Object, long, java.lang.Object)") \
   V(UnsafeGetArrayBaseOffsetForComponentType, "int sun.misc.Unsafe.getArrayBaseOffsetForComponentType(java.lang.Class)") \
   V(UnsafeGetArrayIndexScaleForComponentType, "int sun.misc.Unsafe.getArrayIndexScaleForComponentType(java.lang.Class)")
diff --git a/runtime/interpreter/unstarted_runtime_test.cc b/runtime/interpreter/unstarted_runtime_test.cc
index a1ae2aa..c324600 100644
--- a/runtime/interpreter/unstarted_runtime_test.cc
+++ b/runtime/interpreter/unstarted_runtime_test.cc
@@ -16,6 +16,12 @@
 
 #include "unstarted_runtime.h"
 
+#include <limits>
+#include <locale>
+
+#include "base/casts.h"
+#include "base/enums.h"
+#include "base/memory_tool.h"
 #include "class_linker.h"
 #include "common_runtime_test.h"
 #include "dex_instruction.h"
@@ -27,6 +33,7 @@
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
+#include "transaction.h"
 
 namespace art {
 namespace interpreter {
@@ -66,6 +73,129 @@
 #undef UNSTARTED_RUNTIME_DIRECT_LIST
 #undef UNSTARTED_RUNTIME_JNI_LIST
 #undef UNSTARTED_JNI
+
+  // Helpers for ArrayCopy.
+  //
+  // Note: as we have to use handles, we use StackHandleScope to transfer data. Hardcode a size
+  //       of three everywhere. That is enough to test all cases.
+
+  static mirror::ObjectArray<mirror::Object>* CreateObjectArray(
+      Thread* self,
+      mirror::Class* component_type,
+      const StackHandleScope<3>& data)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    mirror::Class* array_type = runtime->GetClassLinker()->FindArrayClass(self, &component_type);
+    CHECK(array_type != nullptr);
+    mirror::ObjectArray<mirror::Object>* result =
+        mirror::ObjectArray<mirror::Object>::Alloc(self, array_type, 3);
+    CHECK(result != nullptr);
+    for (size_t i = 0; i < 3; ++i) {
+      result->Set(static_cast<int32_t>(i), data.GetReference(i));
+      CHECK(!self->IsExceptionPending());
+    }
+    return result;
+  }
+
+  static void CheckObjectArray(mirror::ObjectArray<mirror::Object>* array,
+                               const StackHandleScope<3>& data)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    CHECK_EQ(array->GetLength(), 3);
+    CHECK_EQ(data.NumberOfReferences(), 3U);
+    for (size_t i = 0; i < 3; ++i) {
+      EXPECT_EQ(data.GetReference(i), array->Get(static_cast<int32_t>(i))) << i;
+    }
+  }
+
+  void RunArrayCopy(Thread* self,
+                    ShadowFrame* tmp,
+                    bool expect_exception,
+                    mirror::ObjectArray<mirror::Object>* src,
+                    int32_t src_pos,
+                    mirror::ObjectArray<mirror::Object>* dst,
+                    int32_t dst_pos,
+                    int32_t length)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    JValue result;
+    tmp->SetVRegReference(0, src);
+    tmp->SetVReg(1, src_pos);
+    tmp->SetVRegReference(2, dst);
+    tmp->SetVReg(3, dst_pos);
+    tmp->SetVReg(4, length);
+    UnstartedSystemArraycopy(self, tmp, &result, 0);
+    bool exception_pending = self->IsExceptionPending();
+    EXPECT_EQ(exception_pending, expect_exception);
+    if (exception_pending) {
+      self->ClearException();
+    }
+  }
+
+  void RunArrayCopy(Thread* self,
+                    ShadowFrame* tmp,
+                    bool expect_exception,
+                    mirror::Class* src_component_class,
+                    mirror::Class* dst_component_class,
+                    const StackHandleScope<3>& src_data,
+                    int32_t src_pos,
+                    const StackHandleScope<3>& dst_data,
+                    int32_t dst_pos,
+                    int32_t length,
+                    const StackHandleScope<3>& expected_result)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    StackHandleScope<3> hs_misc(self);
+    Handle<mirror::Class> dst_component_handle(hs_misc.NewHandle(dst_component_class));
+
+    Handle<mirror::ObjectArray<mirror::Object>> src_handle(
+        hs_misc.NewHandle(CreateObjectArray(self, src_component_class, src_data)));
+
+    Handle<mirror::ObjectArray<mirror::Object>> dst_handle(
+        hs_misc.NewHandle(CreateObjectArray(self, dst_component_handle.Get(), dst_data)));
+
+    RunArrayCopy(self,
+                 tmp,
+                 expect_exception,
+                 src_handle.Get(),
+                 src_pos,
+                 dst_handle.Get(),
+                 dst_pos,
+                 length);
+    CheckObjectArray(dst_handle.Get(), expected_result);
+  }
+
+  void TestCeilFloor(bool ceil,
+                     Thread* self,
+                     ShadowFrame* tmp,
+                     double const test_pairs[][2],
+                     size_t num_pairs)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    for (size_t i = 0; i < num_pairs; ++i) {
+      tmp->SetVRegDouble(0, test_pairs[i][0]);
+
+      JValue result;
+      if (ceil) {
+        UnstartedMathCeil(self, tmp, &result, 0);
+      } else {
+        UnstartedMathFloor(self, tmp, &result, 0);
+      }
+
+      ASSERT_FALSE(self->IsExceptionPending());
+
+      // We want precise results.
+      int64_t result_int64t = bit_cast<int64_t, double>(result.GetD());
+      int64_t expect_int64t = bit_cast<int64_t, double>(test_pairs[i][1]);
+      EXPECT_EQ(expect_int64t, result_int64t) << result.GetD() << " vs " << test_pairs[i][1];
+    }
+  }
+
+  // Prepare for aborts. Aborts assume that the exception class is already resolved, as the
+  // loading code doesn't work under transactions.
+  void PrepareForAborts() SHARED_REQUIRES(Locks::mutator_lock_) {
+    mirror::Object* result = Runtime::Current()->GetClassLinker()->FindClass(
+        Thread::Current(),
+        Transaction::kAbortExceptionSignature,
+        ScopedNullHandle<mirror::ClassLoader>());
+    CHECK(result != nullptr);
+  }
 };
 
 TEST_F(UnstartedRuntimeTest, MemoryPeekByte) {
@@ -254,7 +384,7 @@
   ScopedObjectAccess soa(self);
   mirror::Class* klass = mirror::String::GetJavaLangString();
   ArtMethod* method = klass->FindDeclaredDirectMethod("<init>", "(Ljava/lang/String;)V",
-                                                      sizeof(void*));
+                                                      kRuntimePointerSize);
 
   // create instruction data for invoke-direct {v0, v1} of method with fake index
   uint16_t inst_data[3] = { 0x2070, 0x0000, 0x0010 };
@@ -271,11 +401,488 @@
   interpreter::DoCall<false, false>(method, self, *shadow_frame, inst, inst_data[0], &result);
   mirror::String* string_result = reinterpret_cast<mirror::String*>(result.GetL());
   EXPECT_EQ(string_arg->GetLength(), string_result->GetLength());
-  EXPECT_EQ(memcmp(string_arg->GetValue(), string_result->GetValue(),
-                   string_arg->GetLength() * sizeof(uint16_t)), 0);
+
+  if (string_arg->IsCompressed() && string_result->IsCompressed()) {
+    EXPECT_EQ(memcmp(string_arg->GetValueCompressed(), string_result->GetValueCompressed(),
+                     string_arg->GetLength() * sizeof(uint8_t)), 0);
+  } else if (!string_arg->IsCompressed() && !string_result->IsCompressed()) {
+    EXPECT_EQ(memcmp(string_arg->GetValue(), string_result->GetValue(),
+                     string_arg->GetLength() * sizeof(uint16_t)), 0);
+  } else {
+    bool equal = true;
+    for (int i = 0; i < string_arg->GetLength(); ++i) {
+      if (string_arg->CharAt(i) != string_result->CharAt(i)) {
+        equal = false;
+        break;
+      }
+    }
+    EXPECT_EQ(equal, true);
+  }
 
   ShadowFrame::DeleteDeoptimizedFrame(shadow_frame);
 }
 
+// Tests the exceptions that should be checked before modifying the destination.
+// (Doesn't check the object vs primitive case ATM.)
+TEST_F(UnstartedRuntimeTest, SystemArrayCopyObjectArrayTestExceptions) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  JValue result;
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  // Note: all tests are not GC safe. Assume there's no GC running here with the few objects we
+  //       allocate.
+  StackHandleScope<2> hs_misc(self);
+  Handle<mirror::Class> object_class(
+      hs_misc.NewHandle(mirror::Class::GetJavaLangClass()->GetSuperClass()));
+
+  StackHandleScope<3> hs_data(self);
+  hs_data.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "1"));
+  hs_data.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "2"));
+  hs_data.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "3"));
+
+  Handle<mirror::ObjectArray<mirror::Object>> array(
+      hs_misc.NewHandle(CreateObjectArray(self, object_class.Get(), hs_data)));
+
+  RunArrayCopy(self, tmp, true, array.Get(), -1, array.Get(), 0, 0);
+  RunArrayCopy(self, tmp, true, array.Get(), 0, array.Get(), -1, 0);
+  RunArrayCopy(self, tmp, true, array.Get(), 0, array.Get(), 0, -1);
+  RunArrayCopy(self, tmp, true, array.Get(), 0, array.Get(), 0, 4);
+  RunArrayCopy(self, tmp, true, array.Get(), 0, array.Get(), 1, 3);
+  RunArrayCopy(self, tmp, true, array.Get(), 1, array.Get(), 0, 3);
+
+  mirror::ObjectArray<mirror::Object>* class_as_array =
+      reinterpret_cast<mirror::ObjectArray<mirror::Object>*>(object_class.Get());
+  RunArrayCopy(self, tmp, true, class_as_array, 0, array.Get(), 0, 0);
+  RunArrayCopy(self, tmp, true, array.Get(), 0, class_as_array, 0, 0);
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, SystemArrayCopyObjectArrayTest) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  JValue result;
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  StackHandleScope<1> hs_object(self);
+  Handle<mirror::Class> object_class(
+      hs_object.NewHandle(mirror::Class::GetJavaLangClass()->GetSuperClass()));
+
+  // Simple test:
+  // [1,2,3]{1 @ 2} into [4,5,6] = [4,2,6]
+  {
+    StackHandleScope<3> hs_src(self);
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "1"));
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "2"));
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "3"));
+
+    StackHandleScope<3> hs_dst(self);
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "4"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "5"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "6"));
+
+    StackHandleScope<3> hs_expected(self);
+    hs_expected.NewHandle(hs_dst.GetReference(0));
+    hs_expected.NewHandle(hs_dst.GetReference(1));
+    hs_expected.NewHandle(hs_src.GetReference(1));
+
+    RunArrayCopy(self,
+                 tmp,
+                 false,
+                 object_class.Get(),
+                 object_class.Get(),
+                 hs_src,
+                 1,
+                 hs_dst,
+                 2,
+                 1,
+                 hs_expected);
+  }
+
+  // Simple test:
+  // [1,2,3]{1 @ 1} into [4,5,6] = [4,2,6]  (with dst String[])
+  {
+    StackHandleScope<3> hs_src(self);
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "1"));
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "2"));
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "3"));
+
+    StackHandleScope<3> hs_dst(self);
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "4"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "5"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "6"));
+
+    StackHandleScope<3> hs_expected(self);
+    hs_expected.NewHandle(hs_dst.GetReference(0));
+    hs_expected.NewHandle(hs_src.GetReference(1));
+    hs_expected.NewHandle(hs_dst.GetReference(2));
+
+    RunArrayCopy(self,
+                 tmp,
+                 false,
+                 object_class.Get(),
+                 mirror::String::GetJavaLangString(),
+                 hs_src,
+                 1,
+                 hs_dst,
+                 1,
+                 1,
+                 hs_expected);
+  }
+
+  // Simple test:
+  // [1,*,3] into [4,5,6] = [1,5,6] + exc
+  {
+    StackHandleScope<3> hs_src(self);
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "1"));
+    hs_src.NewHandle(mirror::String::GetJavaLangString());
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "3"));
+
+    StackHandleScope<3> hs_dst(self);
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "4"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "5"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "6"));
+
+    StackHandleScope<3> hs_expected(self);
+    hs_expected.NewHandle(hs_src.GetReference(0));
+    hs_expected.NewHandle(hs_dst.GetReference(1));
+    hs_expected.NewHandle(hs_dst.GetReference(2));
+
+    RunArrayCopy(self,
+                 tmp,
+                 true,
+                 object_class.Get(),
+                 mirror::String::GetJavaLangString(),
+                 hs_src,
+                 0,
+                 hs_dst,
+                 0,
+                 3,
+                 hs_expected);
+  }
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, IntegerParseIntTest) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  // Test string. Should be valid, and between minimal values of LONG_MIN and LONG_MAX (for all
+  // suffixes).
+  constexpr const char* test_string = "-2147483646";
+  constexpr int32_t test_values[] = {
+                6,
+               46,
+              646,
+             3646,
+            83646,
+           483646,
+          7483646,
+         47483646,
+        147483646,
+       2147483646,
+      -2147483646
+  };
+
+  static_assert(arraysize(test_values) == 11U, "test_values");
+  CHECK_EQ(strlen(test_string), 11U);
+
+  for (size_t i = 0; i <= 10; ++i) {
+    const char* test_value = &test_string[10 - i];
+
+    StackHandleScope<1> hs_str(self);
+    Handle<mirror::String> h_str(
+        hs_str.NewHandle(mirror::String::AllocFromModifiedUtf8(self, test_value)));
+    ASSERT_NE(h_str.Get(), nullptr);
+    ASSERT_FALSE(self->IsExceptionPending());
+
+    tmp->SetVRegReference(0, h_str.Get());
+
+    JValue result;
+    UnstartedIntegerParseInt(self, tmp, &result, 0);
+
+    ASSERT_FALSE(self->IsExceptionPending());
+    EXPECT_EQ(result.GetI(), test_values[i]);
+  }
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+// Right now the same as Integer.Parse
+TEST_F(UnstartedRuntimeTest, LongParseLongTest) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  // Test string. Should be valid, and between minimal values of LONG_MIN and LONG_MAX (for all
+  // suffixes).
+  constexpr const char* test_string = "-2147483646";
+  constexpr int64_t test_values[] = {
+                6,
+               46,
+              646,
+             3646,
+            83646,
+           483646,
+          7483646,
+         47483646,
+        147483646,
+       2147483646,
+      -2147483646
+  };
+
+  static_assert(arraysize(test_values) == 11U, "test_values");
+  CHECK_EQ(strlen(test_string), 11U);
+
+  for (size_t i = 0; i <= 10; ++i) {
+    const char* test_value = &test_string[10 - i];
+
+    StackHandleScope<1> hs_str(self);
+    Handle<mirror::String> h_str(
+        hs_str.NewHandle(mirror::String::AllocFromModifiedUtf8(self, test_value)));
+    ASSERT_NE(h_str.Get(), nullptr);
+    ASSERT_FALSE(self->IsExceptionPending());
+
+    tmp->SetVRegReference(0, h_str.Get());
+
+    JValue result;
+    UnstartedLongParseLong(self, tmp, &result, 0);
+
+    ASSERT_FALSE(self->IsExceptionPending());
+    EXPECT_EQ(result.GetJ(), test_values[i]);
+  }
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, Ceil) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  constexpr double nan = std::numeric_limits<double>::quiet_NaN();
+  constexpr double inf = std::numeric_limits<double>::infinity();
+  constexpr double ld1 = static_cast<double>((UINT64_C(1) << 53) - 1);
+  constexpr double ld2 = static_cast<double>(UINT64_C(1) << 55);
+  constexpr double test_pairs[][2] = {
+      { -0.0, -0.0 },
+      {  0.0,  0.0 },
+      { -0.5, -0.0 },
+      { -1.0, -1.0 },
+      {  0.5,  1.0 },
+      {  1.0,  1.0 },
+      {  nan,  nan },
+      {  inf,  inf },
+      { -inf, -inf },
+      {  ld1,  ld1 },
+      {  ld2,  ld2 }
+  };
+
+  TestCeilFloor(true /* ceil */, self, tmp, test_pairs, arraysize(test_pairs));
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, Floor) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  constexpr double nan = std::numeric_limits<double>::quiet_NaN();
+  constexpr double inf = std::numeric_limits<double>::infinity();
+  constexpr double ld1 = static_cast<double>((UINT64_C(1) << 53) - 1);
+  constexpr double ld2 = static_cast<double>(UINT64_C(1) << 55);
+  constexpr double test_pairs[][2] = {
+      { -0.0, -0.0 },
+      {  0.0,  0.0 },
+      { -0.5, -1.0 },
+      { -1.0, -1.0 },
+      {  0.5,  0.0 },
+      {  1.0,  1.0 },
+      {  nan,  nan },
+      {  inf,  inf },
+      { -inf, -inf },
+      {  ld1,  ld1 },
+      {  ld2,  ld2 }
+  };
+
+  TestCeilFloor(false /* floor */, self, tmp, test_pairs, arraysize(test_pairs));
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, ToLowerUpper) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  std::locale c_locale("C");
+
+  // Check ASCII.
+  for (uint32_t i = 0; i < 128; ++i) {
+    bool c_upper = std::isupper(static_cast<char>(i), c_locale);
+    bool c_lower = std::islower(static_cast<char>(i), c_locale);
+    EXPECT_FALSE(c_upper && c_lower) << i;
+
+    // Check toLowerCase.
+    {
+      JValue result;
+      tmp->SetVReg(0, static_cast<int32_t>(i));
+      UnstartedCharacterToLowerCase(self, tmp, &result, 0);
+      ASSERT_FALSE(self->IsExceptionPending());
+      uint32_t lower_result = static_cast<uint32_t>(result.GetI());
+      if (c_lower) {
+        EXPECT_EQ(i, lower_result);
+      } else if (c_upper) {
+        EXPECT_EQ(static_cast<uint32_t>(std::tolower(static_cast<char>(i), c_locale)),
+                  lower_result);
+      } else {
+        EXPECT_EQ(i, lower_result);
+      }
+    }
+
+    // Check toUpperCase.
+    {
+      JValue result2;
+      tmp->SetVReg(0, static_cast<int32_t>(i));
+      UnstartedCharacterToUpperCase(self, tmp, &result2, 0);
+      ASSERT_FALSE(self->IsExceptionPending());
+      uint32_t upper_result = static_cast<uint32_t>(result2.GetI());
+      if (c_upper) {
+        EXPECT_EQ(i, upper_result);
+      } else if (c_lower) {
+        EXPECT_EQ(static_cast<uint32_t>(std::toupper(static_cast<char>(i), c_locale)),
+                  upper_result);
+      } else {
+        EXPECT_EQ(i, upper_result);
+      }
+    }
+  }
+
+  // Check abort for other things. Can't test all.
+
+  PrepareForAborts();
+
+  for (uint32_t i = 128; i < 256; ++i) {
+    {
+      JValue result;
+      tmp->SetVReg(0, static_cast<int32_t>(i));
+      Transaction transaction;
+      Runtime::Current()->EnterTransactionMode(&transaction);
+      UnstartedCharacterToLowerCase(self, tmp, &result, 0);
+      Runtime::Current()->ExitTransactionMode();
+      ASSERT_TRUE(self->IsExceptionPending());
+      ASSERT_TRUE(transaction.IsAborted());
+    }
+    {
+      JValue result;
+      tmp->SetVReg(0, static_cast<int32_t>(i));
+      Transaction transaction;
+      Runtime::Current()->EnterTransactionMode(&transaction);
+      UnstartedCharacterToUpperCase(self, tmp, &result, 0);
+      Runtime::Current()->ExitTransactionMode();
+      ASSERT_TRUE(self->IsExceptionPending());
+      ASSERT_TRUE(transaction.IsAborted());
+    }
+  }
+  for (uint64_t i = 256; i <= std::numeric_limits<uint32_t>::max(); i <<= 1) {
+    {
+      JValue result;
+      tmp->SetVReg(0, static_cast<int32_t>(i));
+      Transaction transaction;
+      Runtime::Current()->EnterTransactionMode(&transaction);
+      UnstartedCharacterToLowerCase(self, tmp, &result, 0);
+      Runtime::Current()->ExitTransactionMode();
+      ASSERT_TRUE(self->IsExceptionPending());
+      ASSERT_TRUE(transaction.IsAborted());
+    }
+    {
+      JValue result;
+      tmp->SetVReg(0, static_cast<int32_t>(i));
+      Transaction transaction;
+      Runtime::Current()->EnterTransactionMode(&transaction);
+      UnstartedCharacterToUpperCase(self, tmp, &result, 0);
+      Runtime::Current()->ExitTransactionMode();
+      ASSERT_TRUE(self->IsExceptionPending());
+      ASSERT_TRUE(transaction.IsAborted());
+    }
+  }
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, Sin) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  // Test an important value, PI/6. That's the one we see in practice.
+  constexpr uint64_t lvalue = UINT64_C(0x3fe0c152382d7365);
+  tmp->SetVRegLong(0, static_cast<int64_t>(lvalue));
+
+  JValue result;
+  UnstartedMathSin(self, tmp, &result, 0);
+
+  const uint64_t lresult = static_cast<uint64_t>(result.GetJ());
+  EXPECT_EQ(UINT64_C(0x3fdfffffffffffff), lresult);
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, Cos) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  // Test an important value, PI/6. That's the one we see in practice.
+  constexpr uint64_t lvalue = UINT64_C(0x3fe0c152382d7365);
+  tmp->SetVRegLong(0, static_cast<int64_t>(lvalue));
+
+  JValue result;
+  UnstartedMathCos(self, tmp, &result, 0);
+
+  const uint64_t lresult = static_cast<uint64_t>(result.GetJ());
+  EXPECT_EQ(UINT64_C(0x3febb67ae8584cab), lresult);
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, Pow) {
+  // Valgrind seems to get this wrong, actually. Disable for valgrind.
+  if (RUNNING_ON_MEMORY_TOOL != 0 && kMemoryToolIsValgrind) {
+    return;
+  }
+
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  // Test an important pair.
+  constexpr uint64_t lvalue1 = UINT64_C(0x4079000000000000);
+  constexpr uint64_t lvalue2 = UINT64_C(0xbfe6db6dc0000000);
+
+  tmp->SetVRegLong(0, static_cast<int64_t>(lvalue1));
+  tmp->SetVRegLong(2, static_cast<int64_t>(lvalue2));
+
+  JValue result;
+  UnstartedMathPow(self, tmp, &result, 0);
+
+  const uint64_t lresult = static_cast<uint64_t>(result.GetJ());
+  EXPECT_EQ(UINT64_C(0x3f8c5c51326aa7ee), lresult);
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index b5e28e9..2401bec 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -16,14 +16,13 @@
 
 #include "jni_internal.h"
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
-#include <cutils/trace.h>
 #include <dlfcn.h>
 
 #include "art_method.h"
 #include "base/dumpable.h"
 #include "base/mutex.h"
 #include "base/stl_util.h"
+#include "base/systrace.h"
 #include "check_jni.h"
 #include "dex_file-inl.h"
 #include "fault_handler.h"
@@ -31,6 +30,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "nativebridge/native_bridge.h"
+#include "nativeloader/native_loader.h"
 #include "java_vm_ext.h"
 #include "parsed_options.h"
 #include "runtime-inl.h"
@@ -48,7 +48,7 @@
 static const size_t kWeakGlobalsInitial = 16;  // Arbitrary.
 static const size_t kWeakGlobalsMax = 51200;  // Arbitrary sanity check. (Must fit in 16 bits.)
 
-static bool IsBadJniVersion(int version) {
+bool JavaVMExt::IsBadJniVersion(int version) {
   // We don't support JNI_VERSION_1_1. These are the only other valid versions.
   return version != JNI_VERSION_1_2 && version != JNI_VERSION_1_4 && version != JNI_VERSION_1_6;
 }
@@ -56,15 +56,17 @@
 class SharedLibrary {
  public:
   SharedLibrary(JNIEnv* env, Thread* self, const std::string& path, void* handle,
-                jobject class_loader)
+                jobject class_loader, void* class_loader_allocator)
       : path_(path),
         handle_(handle),
         needs_native_bridge_(false),
         class_loader_(env->NewWeakGlobalRef(class_loader)),
+        class_loader_allocator_(class_loader_allocator),
         jni_on_load_lock_("JNI_OnLoad lock"),
         jni_on_load_cond_("JNI_OnLoad condition variable", jni_on_load_lock_),
         jni_on_load_thread_id_(self->GetThreadId()),
         jni_on_load_result_(kPending) {
+    CHECK(class_loader_allocator_ != nullptr);
   }
 
   ~SharedLibrary() {
@@ -72,12 +74,20 @@
     if (self != nullptr) {
       self->GetJniEnv()->DeleteWeakGlobalRef(class_loader_);
     }
+
+    if (!needs_native_bridge_) {
+      android::CloseNativeLibrary(handle_);
+    }
   }
 
   jweak GetClassLoader() const {
     return class_loader_;
   }
 
+  const void* GetClassLoaderAllocator() const {
+    return class_loader_allocator_;
+  }
+
   const std::string& GetPath() const {
     return path_;
   }
@@ -169,6 +179,9 @@
   // The ClassLoader this library is associated with, a weak global JNI reference that is
   // created/deleted with the scope of the library.
   const jweak class_loader_;
+  // Used to do equality check on class loaders so we can avoid decoding the weak root and read
+  // barriers that mess with class unloading.
+  const void* class_loader_allocator_;
 
   // Guards remaining items.
   Mutex jni_on_load_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -224,11 +237,15 @@
       SHARED_REQUIRES(Locks::mutator_lock_) {
     std::string jni_short_name(JniShortName(m));
     std::string jni_long_name(JniLongName(m));
-    const mirror::ClassLoader* declaring_class_loader = m->GetDeclaringClass()->GetClassLoader();
+    mirror::ClassLoader* const declaring_class_loader = m->GetDeclaringClass()->GetClassLoader();
     ScopedObjectAccessUnchecked soa(Thread::Current());
+    void* const declaring_class_loader_allocator =
+        Runtime::Current()->GetClassLinker()->GetAllocatorForClassLoader(declaring_class_loader);
+    CHECK(declaring_class_loader_allocator != nullptr);
     for (const auto& lib : libraries_) {
       SharedLibrary* const library = lib.second;
-      if (soa.Decode<mirror::ClassLoader*>(library->GetClassLoader()) != declaring_class_loader) {
+      // Use the allocator address for class loader equality to avoid unnecessary weak root decode.
+      if (library->GetClassLoaderAllocator() != declaring_class_loader_allocator) {
         // We only search libraries loaded by the appropriate ClassLoader.
         continue;
       }
@@ -258,8 +275,7 @@
       REQUIRES(!Locks::jni_libraries_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     ScopedObjectAccessUnchecked soa(Thread::Current());
-    typedef void (*JNI_OnUnloadFn)(JavaVM*, void*);
-    std::vector<JNI_OnUnloadFn> unload_functions;
+    std::vector<SharedLibrary*> unload_libraries;
     {
       MutexLock mu(soa.Self(), *Locks::jni_libraries_lock_);
       for (auto it = libraries_.begin(); it != libraries_.end(); ) {
@@ -269,16 +285,8 @@
         // If class_loader is a null jobject then it is the boot class loader. We should not unload
         // the native libraries of the boot class loader.
         if (class_loader != nullptr &&
-            soa.Decode<mirror::ClassLoader*>(class_loader) == nullptr) {
-          void* const sym = library->FindSymbol("JNI_OnUnload", nullptr);
-          if (sym == nullptr) {
-            VLOG(jni) << "[No JNI_OnUnload found in \"" << library->GetPath() << "\"]";
-          } else {
-            VLOG(jni) << "[JNI_OnUnload found for \"" << library->GetPath() << "\"]";
-            JNI_OnUnloadFn jni_on_unload = reinterpret_cast<JNI_OnUnloadFn>(sym);
-            unload_functions.push_back(jni_on_unload);
-          }
-          delete library;
+            soa.Self()->IsJWeakCleared(class_loader)) {
+          unload_libraries.push_back(library);
           it = libraries_.erase(it);
         } else {
           ++it;
@@ -286,9 +294,17 @@
       }
     }
     // Do this without holding the jni libraries lock to prevent possible deadlocks.
-    for (JNI_OnUnloadFn fn : unload_functions) {
-      VLOG(jni) << "Calling JNI_OnUnload";
-      (*fn)(soa.Vm(), nullptr);
+    typedef void (*JNI_OnUnloadFn)(JavaVM*, void*);
+    for (auto library : unload_libraries) {
+      void* const sym = library->FindSymbol("JNI_OnUnload", nullptr);
+      if (sym == nullptr) {
+        VLOG(jni) << "[No JNI_OnUnload found in \"" << library->GetPath() << "\"]";
+      } else {
+        VLOG(jni) << "[JNI_OnUnload found for \"" << library->GetPath() << "\"]: Calling...";
+        JNI_OnUnloadFn jni_on_unload = reinterpret_cast<JNI_OnUnloadFn>(sym);
+        jni_on_unload(soa.Vm(), nullptr);
+      }
+      delete library;
     }
   }
 
@@ -305,6 +321,7 @@
     }
     JavaVMExt* raw_vm = reinterpret_cast<JavaVMExt*>(vm);
     delete raw_vm->GetRuntime();
+    android::ResetNativeLoader();
     return JNI_OK;
   }
 
@@ -327,13 +344,6 @@
   }
 
   static jint GetEnv(JavaVM* vm, void** env, jint version) {
-    // GetEnv always returns a JNIEnv* for the most current supported JNI version,
-    // and unlike other calls that take a JNI version doesn't care if you supply
-    // JNI_VERSION_1_1, which we don't otherwise support.
-    if (IsBadJniVersion(version) && version != JNI_VERSION_1_1) {
-      LOG(ERROR) << "Bad JNI version passed to GetEnv: " << version;
-      return JNI_EVERSION;
-    }
     if (vm == nullptr || env == nullptr) {
       return JNI_ERR;
     }
@@ -342,8 +352,8 @@
       *env = nullptr;
       return JNI_EDETACHED;
     }
-    *env = thread->GetJniEnv();
-    return JNI_OK;
+    JavaVMExt* raw_vm = reinterpret_cast<JavaVMExt*>(vm);
+    return raw_vm->HandleGetEnv(env, version);
   }
 
  private:
@@ -371,7 +381,7 @@
     const char* thread_name = nullptr;
     jobject thread_group = nullptr;
     if (args != nullptr) {
-      if (IsBadJniVersion(args->version)) {
+      if (JavaVMExt::IsBadJniVersion(args->version)) {
         LOG(ERROR) << "Bad JNI version passed to "
                    << (as_daemon ? "AttachCurrentThreadAsDaemon" : "AttachCurrentThread") << ": "
                    << args->version;
@@ -419,7 +429,8 @@
       weak_globals_lock_("JNI weak global reference table lock", kJniWeakGlobalsLock),
       weak_globals_(kWeakGlobalsInitial, kWeakGlobalsMax, kWeakGlobal),
       allow_accessing_weak_globals_(true),
-      weak_globals_add_condition_("weak globals add condition", weak_globals_lock_) {
+      weak_globals_add_condition_("weak globals add condition", weak_globals_lock_),
+      env_hooks_() {
   functions = unchecked_functions_;
   SetCheckJniEnabled(runtime_options.Exists(RuntimeArgumentMap::CheckJni));
 }
@@ -427,6 +438,26 @@
 JavaVMExt::~JavaVMExt() {
 }
 
+jint JavaVMExt::HandleGetEnv(/*out*/void** env, jint version) {
+  for (GetEnvHook hook : env_hooks_) {
+    jint res = hook(this, env, version);
+    if (res == JNI_OK) {
+      return JNI_OK;
+    } else if (res != JNI_EVERSION) {
+      LOG(ERROR) << "Error returned from a plugin GetEnv handler! " << res;
+      return res;
+    }
+  }
+  LOG(ERROR) << "Bad JNI version passed to GetEnv: " << version;
+  return JNI_EVERSION;
+}
+
+// Add a hook to handle getting environments from the GetEnv call.
+void JavaVMExt::AddEnvironmentHook(GetEnvHook hook) {
+  CHECK(hook != nullptr) << "environment hooks shouldn't be null!";
+  env_hooks_.push_back(hook);
+}
+
 void JavaVMExt::JniAbort(const char* jni_function_name, const char* msg) {
   Thread* self = Thread::Current();
   ScopedObjectAccess soa(self);
@@ -667,6 +698,19 @@
   return weak_globals_.SynchronizedGet(ref);
 }
 
+bool JavaVMExt::IsWeakGlobalCleared(Thread* self, IndirectRef ref) {
+  DCHECK_EQ(GetIndirectRefKind(ref), kWeakGlobal);
+  MutexLock mu(self, weak_globals_lock_);
+  while (UNLIKELY(!MayAccessWeakGlobals(self))) {
+    weak_globals_add_condition_.WaitHoldingLocks(self);
+  }
+  // When just checking a weak ref has been cleared, avoid triggering the read barrier in decode
+  // (DecodeWeakGlobal) so that we won't accidentally mark the object alive. Since the cleared
+  // sentinel is a non-moving object, we can compare the ref to it without the read barrier and
+  // decide if it's cleared.
+  return Runtime::Current()->IsClearedJniWeakGlobal(weak_globals_.Get<kWithoutReadBarrier>(ref));
+}
+
 void JavaVMExt::UpdateWeakGlobal(Thread* self, IndirectRef ref, mirror::Object* result) {
   MutexLock mu(self, weak_globals_lock_);
   weak_globals_.Update(ref, result);
@@ -688,7 +732,10 @@
   libraries_.get()->UnloadNativeLibraries();
 }
 
-bool JavaVMExt::LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject class_loader,
+bool JavaVMExt::LoadNativeLibrary(JNIEnv* env,
+                                  const std::string& path,
+                                  jobject class_loader,
+                                  jstring library_path,
                                   std::string* error_msg) {
   error_msg->clear();
 
@@ -703,8 +750,25 @@
     MutexLock mu(self, *Locks::jni_libraries_lock_);
     library = libraries_->Get(path);
   }
+  void* class_loader_allocator = nullptr;
+  {
+    ScopedObjectAccess soa(env);
+    // As the incoming class loader is reachable/alive during the call of this function,
+    // it's okay to decode it without worrying about unexpectedly marking it alive.
+    mirror::ClassLoader* loader = soa.Decode<mirror::ClassLoader*>(class_loader);
+
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    if (class_linker->IsBootClassLoader(soa, loader)) {
+      loader = nullptr;
+      class_loader = nullptr;
+    }
+
+    class_loader_allocator = class_linker->GetAllocatorForClassLoader(loader);
+    CHECK(class_loader_allocator != nullptr);
+  }
   if (library != nullptr) {
-    if (env->IsSameObject(library->GetClassLoader(), class_loader) == JNI_FALSE) {
+    // Use the allocator pointers for class loader equality to avoid unnecessary weak root decode.
+    if (library->GetClassLoaderAllocator() != class_loader_allocator) {
       // The library will be associated with class_loader. The JNI
       // spec says we can't load the same library into more than one
       // class loader.
@@ -737,7 +801,12 @@
 
   Locks::mutator_lock_->AssertNotHeld(self);
   const char* path_str = path.empty() ? nullptr : path.c_str();
-  void* handle = dlopen(path_str, RTLD_NOW);
+  void* handle = android::OpenNativeLibrary(env,
+                                            runtime_->GetTargetSdkVersion(),
+                                            path_str,
+                                            class_loader,
+                                            library_path);
+
   bool needs_native_bridge = false;
   if (handle == nullptr) {
     if (android::NativeBridgeIsSupported(path_str)) {
@@ -765,7 +834,7 @@
   {
     // Create SharedLibrary ahead of taking the libraries lock to maintain lock ordering.
     std::unique_ptr<SharedLibrary> new_library(
-        new SharedLibrary(env, self, path, handle, class_loader));
+        new SharedLibrary(env, self, path, handle, class_loader, class_loader_allocator));
     MutexLock mu(self, *Locks::jni_libraries_lock_);
     library = libraries_->Get(path);
     if (library == nullptr) {  // We won race to get libraries_lock.
@@ -811,7 +880,7 @@
 
     if (version == JNI_ERR) {
       StringAppendF(error_msg, "JNI_ERR returned from JNI_OnLoad in \"%s\"", path.c_str());
-    } else if (IsBadJniVersion(version)) {
+    } else if (JavaVMExt::IsBadJniVersion(version)) {
       StringAppendF(error_msg, "Bad JNI version returned from JNI_OnLoad in \"%s\": %d",
                     path.c_str(), version);
       // It's unwise to call dlclose() here, but we can mark it
@@ -882,11 +951,10 @@
 // JNI Invocation interface.
 
 extern "C" jint JNI_CreateJavaVM(JavaVM** p_vm, JNIEnv** p_env, void* vm_args) {
-  ATRACE_BEGIN(__FUNCTION__);
+  ScopedTrace trace(__FUNCTION__);
   const JavaVMInitArgs* args = static_cast<JavaVMInitArgs*>(vm_args);
-  if (IsBadJniVersion(args->version)) {
+  if (JavaVMExt::IsBadJniVersion(args->version)) {
     LOG(ERROR) << "Bad JNI version passed to CreateJavaVM: " << args->version;
-    ATRACE_END();
     return JNI_EVERSION;
   }
   RuntimeOptions options;
@@ -896,21 +964,24 @@
   }
   bool ignore_unrecognized = args->ignoreUnrecognized;
   if (!Runtime::Create(options, ignore_unrecognized)) {
-    ATRACE_END();
     return JNI_ERR;
   }
+
+  // Initialize native loader. This step makes sure we have
+  // everything set up before we start using JNI.
+  android::InitializeNativeLoader();
+
   Runtime* runtime = Runtime::Current();
   bool started = runtime->Start();
   if (!started) {
     delete Thread::Current()->GetJniEnv();
     delete runtime->GetJavaVM();
     LOG(WARNING) << "CreateJavaVM failed";
-    ATRACE_END();
     return JNI_ERR;
   }
+
   *p_env = Thread::Current()->GetJniEnv();
   *p_vm = runtime->GetJavaVM();
-  ATRACE_END();
   return JNI_OK;
 }
 
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index c1fbdc0..ed9d3ab 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -36,6 +36,10 @@
 class Runtime;
 struct RuntimeArgumentMap;
 
+class JavaVMExt;
+// Hook definition for runtime plugins.
+using GetEnvHook = jint (*)(JavaVMExt* vm, /*out*/void** new_env, jint version);
+
 class JavaVMExt : public JavaVM {
  public:
   JavaVMExt(Runtime* runtime, const RuntimeArgumentMap& runtime_options);
@@ -82,10 +86,13 @@
   /**
    * Loads the given shared library. 'path' is an absolute pathname.
    *
-   * Returns 'true' on success. On failure, sets 'detail' to a
+   * Returns 'true' on success. On failure, sets 'error_msg' to a
    * human-readable description of the error.
    */
-  bool LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject javaLoader,
+  bool LoadNativeLibrary(JNIEnv* env,
+                         const std::string& path,
+                         jobject class_loader,
+                         jstring library_path,
                          std::string* error_msg);
 
   // Unload native libraries with cleared class loaders.
@@ -149,6 +156,11 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!weak_globals_lock_);
 
+  // Checks if the weak global ref has been cleared by the GC without decode (read barrier.)
+  bool IsWeakGlobalCleared(Thread* self, IndirectRef ref)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!weak_globals_lock_);
+
   Mutex& WeakGlobalsLock() RETURN_CAPABILITY(weak_globals_lock_) {
     return weak_globals_lock_;
   }
@@ -163,6 +175,12 @@
   void TrimGlobals() SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!globals_lock_);
 
+  jint HandleGetEnv(/*out*/void** env, jint version);
+
+  void AddEnvironmentHook(GetEnvHook hook);
+
+  static bool IsBadJniVersion(int version);
+
  private:
   // Return true if self can currently access weak globals.
   bool MayAccessWeakGlobalsUnlocked(Thread* self) const SHARED_REQUIRES(Locks::mutator_lock_);
@@ -207,6 +225,9 @@
   Atomic<bool> allow_accessing_weak_globals_;
   ConditionVariable weak_globals_add_condition_ GUARDED_BY(weak_globals_lock_);
 
+  // TODO Maybe move this to Runtime.
+  std::vector<GetEnvHook> env_hooks_;
+
   DISALLOW_COPY_AND_ASSIGN(JavaVMExt);
 };
 
diff --git a/runtime/jdwp/jdwp_adb.cc b/runtime/jdwp/jdwp_adb.cc
index 51952c4..e9d6d07 100644
--- a/runtime/jdwp/jdwp_adb.cc
+++ b/runtime/jdwp/jdwp_adb.cc
@@ -24,7 +24,7 @@
 #include "base/stringprintf.h"
 #include "jdwp/jdwp_priv.h"
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include "cutils/sockets.h"
 #endif
 
@@ -224,7 +224,7 @@
        */
       int  ret = connect(control_sock_, &control_addr_.controlAddrPlain, control_addr_len_);
       if (!ret) {
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
         if (!socket_peer_is_trusted(control_sock_)) {
           if (shutdown(control_sock_, SHUT_RDWR)) {
             PLOG(ERROR) << "trouble shutting down socket";
diff --git a/runtime/jdwp/jdwp_bits.h b/runtime/jdwp/jdwp_bits.h
index f9cf9ca..33b98f3 100644
--- a/runtime/jdwp/jdwp_bits.h
+++ b/runtime/jdwp/jdwp_bits.h
@@ -59,13 +59,22 @@
   bytes.push_back(static_cast<uint8_t>(value));
 }
 
-static inline void AppendUtf16BE(std::vector<uint8_t>& bytes, const uint16_t* chars, size_t char_count) {
+static inline void AppendUtf16BE(std::vector<uint8_t>& bytes, const uint16_t* chars,
+                                 size_t char_count) {
   Append4BE(bytes, char_count);
   for (size_t i = 0; i < char_count; ++i) {
     Append2BE(bytes, chars[i]);
   }
 }
 
+static inline void AppendUtf16CompressedBE(std::vector<uint8_t>& bytes,
+                                           const uint8_t* chars, size_t char_count) {
+  Append4BE(bytes, char_count);
+  for (size_t i = 0; i < char_count; ++i) {
+    Append2BE(bytes, static_cast<uint16_t>(chars[i]));
+  }
+}
+
 // @deprecated
 static inline void Set1(uint8_t* buf, uint8_t val) {
   *buf = val;
diff --git a/runtime/jdwp/jdwp_expand_buf.cc b/runtime/jdwp/jdwp_expand_buf.cc
index e492d7e..961dd36 100644
--- a/runtime/jdwp/jdwp_expand_buf.cc
+++ b/runtime/jdwp/jdwp_expand_buf.cc
@@ -164,7 +164,7 @@
  * have stored null bytes in a multi-byte encoding).
  */
 void expandBufAddUtf8String(ExpandBuf* pBuf, const char* s) {
-  int strLen = strlen(s);
+  int strLen = (s != nullptr ? strlen(s) : 0);
   ensureSpace(pBuf, sizeof(uint32_t) + strLen);
   SetUtf8String(pBuf->storage + pBuf->curLen, s, strLen);
   pBuf->curLen += sizeof(uint32_t) + strLen;
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index df6936b..6278ef0 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -690,6 +690,19 @@
 }
 
 /*
+ * Invoke a static method on an interface.
+ */
+static JdwpError IT_InvokeMethod(JdwpState* state, Request* request,
+                                 ExpandBuf* pReply ATTRIBUTE_UNUSED)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  RefTypeId class_id = request->ReadRefTypeId();
+  ObjectId thread_id = request->ReadThreadId();
+  MethodId method_id = request->ReadMethodId();
+
+  return RequestInvoke(state, request, thread_id, 0, class_id, method_id, false);
+}
+
+/*
  * Return line number information for the method, if present.
  */
 static JdwpError M_LineTable(JdwpState*, Request* request, ExpandBuf* pReply)
@@ -745,6 +758,15 @@
   return ERR_NONE;
 }
 
+// Default implementation for IDEs relying on this command.
+static JdwpError M_IsObsolete(JdwpState*, Request* request, ExpandBuf* reply)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  request->ReadRefTypeId();  // unused reference type ID
+  request->ReadMethodId();   // unused method ID
+  expandBufAdd1(reply, false);  // a method is never obsolete.
+  return ERR_NONE;
+}
+
 /*
  * Given an object reference, return the runtime type of the object
  * (class or array).
@@ -1472,12 +1494,13 @@
   { 4,    1,  AT_newInstance,   "ArrayType.NewInstance" },
 
   /* InterfaceType command set (5) */
+  { 5,    1, IT_InvokeMethod,  "InterfaceType.InvokeMethod" },
 
   /* Method command set (6) */
   { 6,    1,  M_LineTable,                "Method.LineTable" },
   { 6,    2,  M_VariableTable,            "Method.VariableTable" },
   { 6,    3,  M_Bytecodes,                "Method.Bytecodes" },
-  { 6,    4,  nullptr,                    "Method.IsObsolete" },
+  { 6,    4,  M_IsObsolete,               "Method.IsObsolete" },
   { 6,    5,  M_VariableTableWithGeneric, "Method.VariableTableWithGeneric" },
 
   /* Field command set (8) */
@@ -1570,6 +1593,8 @@
     return command == kJDWPClassTypeInvokeMethodCmd || command == kJDWPClassTypeNewInstanceCmd;
   } else if (command_set == kJDWPObjectReferenceCmdSet) {
     return command == kJDWPObjectReferenceInvokeCmd;
+  } else if (command_set == kJDWPInterfaceTypeCmdSet) {
+    return command == kJDWPInterfaceTypeInvokeMethodCmd;
   } else {
     return false;
   }
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 668d5dc..dbf04fe 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -251,7 +251,7 @@
     case kJdwpTransportSocket:
       InitSocketTransport(state.get(), options);
       break;
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     case kJdwpTransportAndroidAdb:
       InitAdbTransport(state.get(), options);
       break;
diff --git a/runtime/jdwp/jdwp_priv.h b/runtime/jdwp/jdwp_priv.h
index 29314f6..4e1bda8 100644
--- a/runtime/jdwp/jdwp_priv.h
+++ b/runtime/jdwp/jdwp_priv.h
@@ -45,6 +45,8 @@
 static constexpr uint8_t kJDWPClassTypeCmdSet = 3U;
 static constexpr uint8_t kJDWPClassTypeInvokeMethodCmd = 3U;
 static constexpr uint8_t kJDWPClassTypeNewInstanceCmd = 4U;
+static constexpr uint8_t kJDWPInterfaceTypeCmdSet = 5U;
+static constexpr uint8_t kJDWPInterfaceTypeInvokeMethodCmd = 1U;
 static constexpr uint8_t kJDWPObjectReferenceCmdSet = 9U;
 static constexpr uint8_t kJDWPObjectReferenceInvokeCmd = 6U;
 
diff --git a/runtime/jdwp/jdwp_socket.cc b/runtime/jdwp/jdwp_socket.cc
index 4fb6df1..2507fe9 100644
--- a/runtime/jdwp/jdwp_socket.cc
+++ b/runtime/jdwp/jdwp_socket.cc
@@ -30,13 +30,13 @@
 #include "base/stringprintf.h"
 #include "jdwp/jdwp_priv.h"
 
-#define kBasePort           8000
-#define kMaxPort            8040
-
 namespace art {
 
 namespace JDWP {
 
+static constexpr uint16_t kBasePort = 8000;
+static constexpr uint16_t kMaxPort = 8040;
+
 /*
  * JDWP network state.
  *
@@ -275,11 +275,33 @@
    * Start by resolving the host name.
    */
 #if defined(__linux__)
+  // Initial size of the work buffer used in gethostbyname_r.
+  //
+  // The call to gethostbyname_r below requires a user-allocated buffer,
+  // the size of which depends on the system. The initial implementation
+  // used to use a 128-byte buffer, but that was not enough on some
+  // systems (maybe because of IPv6), causing failures in JDWP host
+  // testing; thus it was increased to 256.
+  //
+  // However, we should not use a fixed size: gethostbyname_r's
+  // documentation states that if the work buffer is too small (i.e. if
+  // gethostbyname_r returns `ERANGE`), then the function should be
+  // called again with a bigger buffer. Which we do now, starting with
+  // an initial 256-byte buffer, and doubling it until gethostbyname_r
+  // accepts this size.
+  static constexpr size_t kInitialAuxBufSize = 256;
+
+  std::vector<char> auxBuf(kInitialAuxBufSize);
   hostent he;
-  char auxBuf[128];
   int error;
-  int cc = gethostbyname_r(options->host.c_str(), &he, auxBuf, sizeof(auxBuf), &pEntry, &error);
-  if (cc != 0) {
+  int cc;
+  while ((cc = gethostbyname_r(
+             options->host.c_str(), &he, auxBuf.data(), auxBuf.size(), &pEntry, &error))
+         == ERANGE) {
+    // The work buffer `auxBuf` is too small; enlarge it.
+    auxBuf.resize(auxBuf.size() * 2);
+  }
+  if (cc != 0 || pEntry == nullptr) {
     LOG(WARNING) << "gethostbyname_r('" << options->host << "') failed: " << hstrerror(error);
     return false;
   }
@@ -298,7 +320,8 @@
 
   addr.addrInet.sin_port = htons(options->port);
 
-  LOG(INFO) << "Connecting out to " << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port);
+  LOG(INFO) << "Connecting out to " << inet_ntoa(addr.addrInet.sin_addr) << ":"
+            << ntohs(addr.addrInet.sin_port);
 
   /*
    * Create a socket.
@@ -313,13 +336,15 @@
    * Try to connect.
    */
   if (connect(clientSock, &addr.addrPlain, sizeof(addr)) != 0) {
-    PLOG(ERROR) << "Unable to connect to " << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port);
+    PLOG(ERROR) << "Unable to connect to " << inet_ntoa(addr.addrInet.sin_addr) << ":"
+                << ntohs(addr.addrInet.sin_port);
     close(clientSock);
     clientSock = -1;
     return false;
   }
 
-  LOG(INFO) << "Connection established to " << options->host << " (" << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port) << ")";
+  LOG(INFO) << "Connection established to " << options->host << " ("
+            << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port) << ")";
   SetAwaitingHandshake(true);
   input_count_ = 0;
 
@@ -438,7 +463,8 @@
         }
       }
       if (clientSock >= 0 && FD_ISSET(clientSock, &readfds)) {
-        readCount = read(clientSock, input_buffer_ + input_count_, sizeof(input_buffer_) - input_count_);
+        readCount =
+            read(clientSock, input_buffer_ + input_count_, sizeof(input_buffer_) - input_count_);
         if (readCount < 0) {
           /* read failed */
           if (errno != EINTR) {
@@ -479,7 +505,8 @@
     errno = 0;
     int cc = TEMP_FAILURE_RETRY(write(clientSock, input_buffer_, kMagicHandshakeLen));
     if (cc != kMagicHandshakeLen) {
-      PLOG(ERROR) << "Failed writing handshake bytes (" << cc << " of " << kMagicHandshakeLen << ")";
+      PLOG(ERROR) << "Failed writing handshake bytes ("
+                  << cc << " of " << kMagicHandshakeLen << ")";
       goto fail;
     }
 
diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc
new file mode 100644
index 0000000..7cdd7c5
--- /dev/null
+++ b/runtime/jit/debugger_interface.cc
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "debugger_interface.h"
+
+#include "base/logging.h"
+#include "base/mutex.h"
+#include "thread-inl.h"
+#include "thread.h"
+
+#include <unordered_map>
+
+namespace art {
+
+// -------------------------------------------------------------------
+// Binary GDB JIT Interface as described in
+//   http://sourceware.org/gdb/onlinedocs/gdb/Declarations.html
+// -------------------------------------------------------------------
+extern "C" {
+  typedef enum {
+    JIT_NOACTION = 0,
+    JIT_REGISTER_FN,
+    JIT_UNREGISTER_FN
+  } JITAction;
+
+  struct JITCodeEntry {
+    JITCodeEntry* next_;
+    JITCodeEntry* prev_;
+    const uint8_t *symfile_addr_;
+    uint64_t symfile_size_;
+  };
+
+  struct JITDescriptor {
+    uint32_t version_;
+    uint32_t action_flag_;
+    JITCodeEntry* relevant_entry_;
+    JITCodeEntry* first_entry_;
+  };
+
+  // GDB will place breakpoint into this function.
+  // To prevent GCC from inlining or removing it we place noinline attribute
+  // and inline assembler statement inside.
+  void __attribute__((noinline)) __jit_debug_register_code();
+  void __attribute__((noinline)) __jit_debug_register_code() {
+    __asm__("");
+  }
+
+  // Call __jit_debug_register_code indirectly via global variable.
+  // This gives the debugger an easy way to inject custom code to handle the events.
+  void (*__jit_debug_register_code_ptr)() = __jit_debug_register_code;
+
+  // GDB will inspect contents of this descriptor.
+  // Static initialization is necessary to prevent GDB from seeing
+  // uninitialized descriptor.
+  JITDescriptor __jit_debug_descriptor = { 1, JIT_NOACTION, nullptr, nullptr };
+}
+
+static Mutex g_jit_debug_mutex("JIT debug interface lock", kJitDebugInterfaceLock);
+
+static JITCodeEntry* CreateJITCodeEntryInternal(std::vector<uint8_t> symfile)
+    REQUIRES(g_jit_debug_mutex) {
+  DCHECK_NE(symfile.size(), 0u);
+
+  // Make a copy of the buffer. We want to shrink it anyway.
+  uint8_t* symfile_copy = new uint8_t[symfile.size()];
+  CHECK(symfile_copy != nullptr);
+  memcpy(symfile_copy, symfile.data(), symfile.size());
+
+  JITCodeEntry* entry = new JITCodeEntry;
+  CHECK(entry != nullptr);
+  entry->symfile_addr_ = symfile_copy;
+  entry->symfile_size_ = symfile.size();
+  entry->prev_ = nullptr;
+
+  entry->next_ = __jit_debug_descriptor.first_entry_;
+  if (entry->next_ != nullptr) {
+    entry->next_->prev_ = entry;
+  }
+  __jit_debug_descriptor.first_entry_ = entry;
+  __jit_debug_descriptor.relevant_entry_ = entry;
+
+  __jit_debug_descriptor.action_flag_ = JIT_REGISTER_FN;
+  (*__jit_debug_register_code_ptr)();
+  return entry;
+}
+
+static void DeleteJITCodeEntryInternal(JITCodeEntry* entry) REQUIRES(g_jit_debug_mutex) {
+  if (entry->prev_ != nullptr) {
+    entry->prev_->next_ = entry->next_;
+  } else {
+    __jit_debug_descriptor.first_entry_ = entry->next_;
+  }
+
+  if (entry->next_ != nullptr) {
+    entry->next_->prev_ = entry->prev_;
+  }
+
+  __jit_debug_descriptor.relevant_entry_ = entry;
+  __jit_debug_descriptor.action_flag_ = JIT_UNREGISTER_FN;
+  (*__jit_debug_register_code_ptr)();
+  delete[] entry->symfile_addr_;
+  delete entry;
+}
+
+JITCodeEntry* CreateJITCodeEntry(std::vector<uint8_t> symfile) {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, g_jit_debug_mutex);
+  return CreateJITCodeEntryInternal(std::move(symfile));
+}
+
+void DeleteJITCodeEntry(JITCodeEntry* entry) {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, g_jit_debug_mutex);
+  DeleteJITCodeEntryInternal(entry);
+}
+
+// Mapping from address to entry.  It takes ownership of the entries
+// so that the user of the JIT interface does not have to store them.
+static std::unordered_map<uintptr_t, JITCodeEntry*> g_jit_code_entries;
+
+void CreateJITCodeEntryForAddress(uintptr_t address, std::vector<uint8_t> symfile) {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, g_jit_debug_mutex);
+  DCHECK_NE(address, 0u);
+  DCHECK(g_jit_code_entries.find(address) == g_jit_code_entries.end());
+  JITCodeEntry* entry = CreateJITCodeEntryInternal(std::move(symfile));
+  g_jit_code_entries.emplace(address, entry);
+}
+
+bool DeleteJITCodeEntryForAddress(uintptr_t address) {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, g_jit_debug_mutex);
+  const auto& it = g_jit_code_entries.find(address);
+  if (it == g_jit_code_entries.end()) {
+    return false;
+  }
+  DeleteJITCodeEntryInternal(it->second);
+  g_jit_code_entries.erase(it);
+  return true;
+}
+
+}  // namespace art
diff --git a/runtime/jit/debugger_interface.h b/runtime/jit/debugger_interface.h
new file mode 100644
index 0000000..d9bf331
--- /dev/null
+++ b/runtime/jit/debugger_interface.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_DEBUGGER_INTERFACE_H_
+#define ART_RUNTIME_JIT_DEBUGGER_INTERFACE_H_
+
+#include <inttypes.h>
+#include <memory>
+#include <vector>
+
+namespace art {
+
+extern "C" {
+  struct JITCodeEntry;
+}
+
+// Notify native debugger about new JITed code by passing in-memory ELF.
+// It takes ownership of the in-memory ELF file.
+JITCodeEntry* CreateJITCodeEntry(std::vector<uint8_t> symfile);
+
+// Notify native debugger that JITed code has been removed.
+// It also releases the associated in-memory ELF file.
+void DeleteJITCodeEntry(JITCodeEntry* entry);
+
+// Notify native debugger about new JITed code by passing in-memory ELF.
+// The address is used only to uniquely identify the entry.
+// It takes ownership of the in-memory ELF file.
+void CreateJITCodeEntryForAddress(uintptr_t address, std::vector<uint8_t> symfile);
+
+// Notify native debugger that JITed code has been removed.
+// Returns false if entry for the given address was not found.
+bool DeleteJITCodeEntryForAddress(uintptr_t address);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_DEBUGGER_INTERFACE_H_
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 0607493..cff2354 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -19,68 +19,172 @@
 #include <dlfcn.h>
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "debugger.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "interpreter/interpreter.h"
 #include "jit_code_cache.h"
-#include "jit_instrumentation.h"
+#include "oat_file_manager.h"
+#include "oat_quick_method_header.h"
+#include "offline_profiling_info.h"
+#include "profile_saver.h"
 #include "runtime.h"
 #include "runtime_options.h"
+#include "stack_map.h"
 #include "thread_list.h"
 #include "utils.h"
 
 namespace art {
 namespace jit {
 
+static constexpr bool kEnableOnStackReplacement = true;
+// At what priority to schedule jit threads. 9 is the lowest foreground priority on device.
+static constexpr int kJitPoolThreadPthreadPriority = 9;
+
+// JIT compiler
+void* Jit::jit_library_handle_= nullptr;
+void* Jit::jit_compiler_handle_ = nullptr;
+void* (*Jit::jit_load_)(bool*) = nullptr;
+void (*Jit::jit_unload_)(void*) = nullptr;
+bool (*Jit::jit_compile_method_)(void*, ArtMethod*, Thread*, bool) = nullptr;
+void (*Jit::jit_types_loaded_)(void*, mirror::Class**, size_t count) = nullptr;
+bool Jit::generate_debug_info_ = false;
+
 JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& options) {
   auto* jit_options = new JitOptions;
-  jit_options->use_jit_ = options.GetOrDefault(RuntimeArgumentMap::UseJIT);
-  jit_options->code_cache_capacity_ =
-      options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheCapacity);
-  jit_options->compile_threshold_ =
-      options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold);
-  jit_options->warmup_threshold_ =
-      options.GetOrDefault(RuntimeArgumentMap::JITWarmupThreshold);
+  jit_options->use_jit_compilation_ = options.GetOrDefault(RuntimeArgumentMap::UseJitCompilation);
+
+  jit_options->code_cache_initial_capacity_ =
+      options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheInitialCapacity);
+  jit_options->code_cache_max_capacity_ =
+      options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheMaxCapacity);
   jit_options->dump_info_on_shutdown_ =
       options.Exists(RuntimeArgumentMap::DumpJITInfoOnShutdown);
+  jit_options->profile_saver_options_ =
+      options.GetOrDefault(RuntimeArgumentMap::ProfileSaverOpts);
+
+  jit_options->compile_threshold_ = options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold);
+  if (jit_options->compile_threshold_ > std::numeric_limits<uint16_t>::max()) {
+    LOG(FATAL) << "Method compilation threshold is above its internal limit.";
+  }
+
+  if (options.Exists(RuntimeArgumentMap::JITWarmupThreshold)) {
+    jit_options->warmup_threshold_ = *options.Get(RuntimeArgumentMap::JITWarmupThreshold);
+    if (jit_options->warmup_threshold_ > std::numeric_limits<uint16_t>::max()) {
+      LOG(FATAL) << "Method warmup threshold is above its internal limit.";
+    }
+  } else {
+    jit_options->warmup_threshold_ = jit_options->compile_threshold_ / 2;
+  }
+
+  if (options.Exists(RuntimeArgumentMap::JITOsrThreshold)) {
+    jit_options->osr_threshold_ = *options.Get(RuntimeArgumentMap::JITOsrThreshold);
+    if (jit_options->osr_threshold_ > std::numeric_limits<uint16_t>::max()) {
+      LOG(FATAL) << "Method on stack replacement threshold is above its internal limit.";
+    }
+  } else {
+    jit_options->osr_threshold_ = jit_options->compile_threshold_ * 2;
+    if (jit_options->osr_threshold_ > std::numeric_limits<uint16_t>::max()) {
+      jit_options->osr_threshold_ = std::numeric_limits<uint16_t>::max();
+    }
+  }
+
+  if (options.Exists(RuntimeArgumentMap::JITPriorityThreadWeight)) {
+    jit_options->priority_thread_weight_ =
+        *options.Get(RuntimeArgumentMap::JITPriorityThreadWeight);
+    if (jit_options->priority_thread_weight_ > jit_options->warmup_threshold_) {
+      LOG(FATAL) << "Priority thread weight is above the warmup threshold.";
+    } else if (jit_options->priority_thread_weight_ == 0) {
+      LOG(FATAL) << "Priority thread weight cannot be 0.";
+    }
+  } else {
+    jit_options->priority_thread_weight_ = std::max(
+        jit_options->warmup_threshold_ / Jit::kDefaultPriorityThreadWeightRatio,
+        static_cast<size_t>(1));
+  }
+
+  if (options.Exists(RuntimeArgumentMap::JITInvokeTransitionWeight)) {
+    jit_options->invoke_transition_weight_ =
+        *options.Get(RuntimeArgumentMap::JITInvokeTransitionWeight);
+    if (jit_options->invoke_transition_weight_ > jit_options->warmup_threshold_) {
+      LOG(FATAL) << "Invoke transition weight is above the warmup threshold.";
+    } else if (jit_options->invoke_transition_weight_  == 0) {
+      LOG(FATAL) << "Invoke transition weight cannot be 0.";
+    }
+  } else {
+    jit_options->invoke_transition_weight_ = std::max(
+        jit_options->warmup_threshold_ / Jit::kDefaultInvokeTransitionWeightRatio,
+        static_cast<size_t>(1));;
+  }
+
   return jit_options;
 }
 
+bool Jit::ShouldUsePriorityThreadWeight() {
+  return Runtime::Current()->InJankPerceptibleProcessState()
+      && Thread::Current()->IsJitSensitiveThread();
+}
+
 void Jit::DumpInfo(std::ostream& os) {
-  os << "Code cache size=" << PrettySize(code_cache_->CodeCacheSize())
-     << " data cache size=" << PrettySize(code_cache_->DataCacheSize())
-     << " num methods=" << code_cache_->NumMethods()
-     << "\n";
+  code_cache_->Dump(os);
   cumulative_timings_.Dump(os);
+  MutexLock mu(Thread::Current(), lock_);
+  memory_use_.PrintMemoryUse(os);
+}
+
+void Jit::DumpForSigQuit(std::ostream& os) {
+  DumpInfo(os);
+  ProfileSaver::DumpInstanceInfo(os);
 }
 
 void Jit::AddTimingLogger(const TimingLogger& logger) {
   cumulative_timings_.AddLogger(logger);
 }
 
-Jit::Jit()
-    : jit_library_handle_(nullptr), jit_compiler_handle_(nullptr), jit_load_(nullptr),
-      jit_compile_method_(nullptr), dump_info_on_shutdown_(false),
-      cumulative_timings_("JIT timings") {
-}
+Jit::Jit() : dump_info_on_shutdown_(false),
+             cumulative_timings_("JIT timings"),
+             memory_use_("Memory used for compilation", 16),
+             lock_("JIT memory use lock"),
+             use_jit_compilation_(true) {}
 
 Jit* Jit::Create(JitOptions* options, std::string* error_msg) {
+  DCHECK(options->UseJitCompilation() || options->GetProfileSaverOptions().IsEnabled());
   std::unique_ptr<Jit> jit(new Jit);
   jit->dump_info_on_shutdown_ = options->DumpJitInfoOnShutdown();
-  if (!jit->LoadCompiler(error_msg)) {
+  if (jit_compiler_handle_ == nullptr && !LoadCompiler(error_msg)) {
     return nullptr;
   }
-  jit->code_cache_.reset(JitCodeCache::Create(options->GetCodeCacheCapacity(), error_msg));
+  jit->code_cache_.reset(JitCodeCache::Create(
+      options->GetCodeCacheInitialCapacity(),
+      options->GetCodeCacheMaxCapacity(),
+      jit->generate_debug_info_,
+      error_msg));
   if (jit->GetCodeCache() == nullptr) {
     return nullptr;
   }
-  LOG(INFO) << "JIT created with code_cache_capacity="
-      << PrettySize(options->GetCodeCacheCapacity())
-      << " compile_threshold=" << options->GetCompileThreshold();
+  jit->use_jit_compilation_ = options->UseJitCompilation();
+  jit->profile_saver_options_ = options->GetProfileSaverOptions();
+  VLOG(jit) << "JIT created with initial_capacity="
+      << PrettySize(options->GetCodeCacheInitialCapacity())
+      << ", max_capacity=" << PrettySize(options->GetCodeCacheMaxCapacity())
+      << ", compile_threshold=" << options->GetCompileThreshold()
+      << ", profile_saver_options=" << options->GetProfileSaverOptions();
+
+
+  jit->hot_method_threshold_ = options->GetCompileThreshold();
+  jit->warm_method_threshold_ = options->GetWarmupThreshold();
+  jit->osr_method_threshold_ = options->GetOsrThreshold();
+  jit->priority_thread_weight_ = options->GetPriorityThreadWeight();
+  jit->invoke_transition_weight_ = options->GetInvokeTransitionWeight();
+
+  jit->CreateThreadPool();
+
+  // Notify native debugger about the classes already loaded before the creation of the jit.
+  jit->DumpTypeInfoForLoadedTypes(Runtime::Current()->GetClassLinker());
   return jit.release();
 }
 
-bool Jit::LoadCompiler(std::string* error_msg) {
+bool Jit::LoadCompilerLibrary(std::string* error_msg) {
   jit_library_handle_ = dlopen(
       kIsDebugBuild ? "libartd-compiler.so" : "libart-compiler.so", RTLD_NOW);
   if (jit_library_handle_ == nullptr) {
@@ -89,8 +193,7 @@
     *error_msg = oss.str();
     return false;
   }
-  jit_load_ = reinterpret_cast<void* (*)(CompilerCallbacks**)>(
-      dlsym(jit_library_handle_, "jit_load"));
+  jit_load_ = reinterpret_cast<void* (*)(bool*)>(dlsym(jit_library_handle_, "jit_load"));
   if (jit_load_ == nullptr) {
     dlclose(jit_library_handle_);
     *error_msg = "JIT couldn't find jit_load entry point";
@@ -103,77 +206,500 @@
     *error_msg = "JIT couldn't find jit_unload entry point";
     return false;
   }
-  jit_compile_method_ = reinterpret_cast<bool (*)(void*, ArtMethod*, Thread*)>(
+  jit_compile_method_ = reinterpret_cast<bool (*)(void*, ArtMethod*, Thread*, bool)>(
       dlsym(jit_library_handle_, "jit_compile_method"));
   if (jit_compile_method_ == nullptr) {
     dlclose(jit_library_handle_);
     *error_msg = "JIT couldn't find jit_compile_method entry point";
     return false;
   }
-  CompilerCallbacks* callbacks = nullptr;
+  jit_types_loaded_ = reinterpret_cast<void (*)(void*, mirror::Class**, size_t)>(
+      dlsym(jit_library_handle_, "jit_types_loaded"));
+  if (jit_types_loaded_ == nullptr) {
+    dlclose(jit_library_handle_);
+    *error_msg = "JIT couldn't find jit_types_loaded entry point";
+    return false;
+  }
+  return true;
+}
+
+bool Jit::LoadCompiler(std::string* error_msg) {
+  if (jit_library_handle_ == nullptr && !LoadCompilerLibrary(error_msg)) {
+    return false;
+  }
+  bool will_generate_debug_symbols = false;
   VLOG(jit) << "Calling JitLoad interpreter_only="
       << Runtime::Current()->GetInstrumentation()->InterpretOnly();
-  jit_compiler_handle_ = (jit_load_)(&callbacks);
+  jit_compiler_handle_ = (jit_load_)(&will_generate_debug_symbols);
   if (jit_compiler_handle_ == nullptr) {
     dlclose(jit_library_handle_);
     *error_msg = "JIT couldn't load compiler";
     return false;
   }
-  if (callbacks == nullptr) {
-    dlclose(jit_library_handle_);
-    *error_msg = "JIT compiler callbacks were not set";
-    jit_compiler_handle_ = nullptr;
-    return false;
-  }
-  compiler_callbacks_ = callbacks;
+  generate_debug_info_ = will_generate_debug_symbols;
   return true;
 }
 
-bool Jit::CompileMethod(ArtMethod* method, Thread* self) {
+bool Jit::CompileMethod(ArtMethod* method, Thread* self, bool osr) {
+  DCHECK(Runtime::Current()->UseJitCompilation());
   DCHECK(!method->IsRuntimeMethod());
+
+  // Don't compile the method if it has breakpoints.
   if (Dbg::IsDebuggerActive() && Dbg::MethodHasAnyBreakpoints(method)) {
     VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to breakpoint";
     return false;
   }
-  return jit_compile_method_(jit_compiler_handle_, method, self);
+
+  // Don't compile the method if we are supposed to be deoptimized.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (instrumentation->AreAllMethodsDeoptimized() || instrumentation->IsDeoptimized(method)) {
+    VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to deoptimization";
+    return false;
+  }
+
+  // If we get a request to compile a proxy method, we pass the actual Java method
+  // of that proxy method, as the compiler does not expect a proxy method.
+  ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
+  if (!code_cache_->NotifyCompilationOf(method_to_compile, self, osr)) {
+    return false;
+  }
+
+  VLOG(jit) << "Compiling method "
+            << PrettyMethod(method_to_compile)
+            << " osr=" << std::boolalpha << osr;
+  bool success = jit_compile_method_(jit_compiler_handle_, method_to_compile, self, osr);
+  code_cache_->DoneCompiling(method_to_compile, self, osr);
+  if (!success) {
+    VLOG(jit) << "Failed to compile method "
+              << PrettyMethod(method_to_compile)
+              << " osr=" << std::boolalpha << osr;
+  }
+  return success;
 }
 
 void Jit::CreateThreadPool() {
-  CHECK(instrumentation_cache_.get() != nullptr);
-  instrumentation_cache_->CreateThreadPool();
+  // There is a DCHECK in the 'AddSamples' method to ensure the tread pool
+  // is not null when we instrument.
+  thread_pool_.reset(new ThreadPool("Jit thread pool", 1));
+  thread_pool_->SetPthreadPriority(kJitPoolThreadPthreadPriority);
+  thread_pool_->StartWorkers(Thread::Current());
 }
 
 void Jit::DeleteThreadPool() {
-  if (instrumentation_cache_.get() != nullptr) {
-    instrumentation_cache_->DeleteThreadPool();
+  Thread* self = Thread::Current();
+  DCHECK(Runtime::Current()->IsShuttingDown(self));
+  if (thread_pool_ != nullptr) {
+    ThreadPool* cache = nullptr;
+    {
+      ScopedSuspendAll ssa(__FUNCTION__);
+      // Clear thread_pool_ field while the threads are suspended.
+      // A mutator in the 'AddSamples' method will check against it.
+      cache = thread_pool_.release();
+    }
+    cache->StopWorkers(self);
+    cache->RemoveAllTasks(self);
+    // We could just suspend all threads, but we know those threads
+    // will finish in a short period, so it's not worth adding a suspend logic
+    // here. Besides, this is only done for shutdown.
+    cache->Wait(self, false, false);
+    delete cache;
   }
 }
 
+void Jit::StartProfileSaver(const std::string& filename,
+                            const std::vector<std::string>& code_paths,
+                            const std::string& foreign_dex_profile_path,
+                            const std::string& app_dir) {
+  if (profile_saver_options_.IsEnabled()) {
+    ProfileSaver::Start(profile_saver_options_,
+                        filename,
+                        code_cache_.get(),
+                        code_paths,
+                        foreign_dex_profile_path,
+                        app_dir);
+  }
+}
+
+void Jit::StopProfileSaver() {
+  if (profile_saver_options_.IsEnabled() && ProfileSaver::IsStarted()) {
+    ProfileSaver::Stop(dump_info_on_shutdown_);
+  }
+}
+
+bool Jit::JitAtFirstUse() {
+  return HotMethodThreshold() == 0;
+}
+
+bool Jit::CanInvokeCompiledCode(ArtMethod* method) {
+  return code_cache_->ContainsPc(method->GetEntryPointFromQuickCompiledCode());
+}
+
 Jit::~Jit() {
+  DCHECK(!profile_saver_options_.IsEnabled() || !ProfileSaver::IsStarted());
   if (dump_info_on_shutdown_) {
     DumpInfo(LOG(INFO));
   }
   DeleteThreadPool();
   if (jit_compiler_handle_ != nullptr) {
     jit_unload_(jit_compiler_handle_);
+    jit_compiler_handle_ = nullptr;
   }
   if (jit_library_handle_ != nullptr) {
     dlclose(jit_library_handle_);
+    jit_library_handle_ = nullptr;
   }
 }
 
-void Jit::CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold) {
-  CHECK_GT(compile_threshold, 0U);
-  ScopedSuspendAll ssa(__FUNCTION__);
-  // Add Jit interpreter instrumentation, tells the interpreter when to notify the jit to compile
-  // something.
-  instrumentation_cache_.reset(
-      new jit::JitInstrumentationCache(compile_threshold, warmup_threshold));
-  Runtime::Current()->GetInstrumentation()->AddListener(
-      new jit::JitInstrumentationListener(instrumentation_cache_.get()),
-      instrumentation::Instrumentation::kMethodEntered |
-      instrumentation::Instrumentation::kBackwardBranch |
-      instrumentation::Instrumentation::kInvokeVirtualOrInterface);
+void Jit::NewTypeLoadedIfUsingJit(mirror::Class* type) {
+  if (!Runtime::Current()->UseJitCompilation()) {
+    // No need to notify if we only use the JIT to save profiles.
+    return;
+  }
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit->generate_debug_info_) {
+    DCHECK(jit->jit_types_loaded_ != nullptr);
+    jit->jit_types_loaded_(jit->jit_compiler_handle_, &type, 1);
+  }
+}
+
+void Jit::DumpTypeInfoForLoadedTypes(ClassLinker* linker) {
+  struct CollectClasses : public ClassVisitor {
+    bool operator()(mirror::Class* klass) override {
+      classes_.push_back(klass);
+      return true;
+    }
+    std::vector<mirror::Class*> classes_;
+  };
+
+  if (generate_debug_info_) {
+    ScopedObjectAccess so(Thread::Current());
+
+    CollectClasses visitor;
+    linker->VisitClasses(&visitor);
+    jit_types_loaded_(jit_compiler_handle_, visitor.classes_.data(), visitor.classes_.size());
+  }
+}
+
+extern "C" void art_quick_osr_stub(void** stack,
+                                   uint32_t stack_size_in_bytes,
+                                   const uint8_t* native_pc,
+                                   JValue* result,
+                                   const char* shorty,
+                                   Thread* self);
+
+bool Jit::MaybeDoOnStackReplacement(Thread* thread,
+                                    ArtMethod* method,
+                                    uint32_t dex_pc,
+                                    int32_t dex_pc_offset,
+                                    JValue* result) {
+  if (!kEnableOnStackReplacement) {
+    return false;
+  }
+
+  Jit* jit = Runtime::Current()->GetJit();
+  if (jit == nullptr) {
+    return false;
+  }
+
+  if (UNLIKELY(__builtin_frame_address(0) < thread->GetStackEnd())) {
+    // Don't attempt to do an OSR if we are close to the stack limit. Since
+    // the interpreter frames are still on stack, OSR has the potential
+    // to stack overflow even for a simple loop.
+    // b/27094810.
+    return false;
+  }
+
+  // Get the actual Java method if this method is from a proxy class. The compiler
+  // and the JIT code cache do not expect methods from proxy classes.
+  method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
+
+  // Cheap check if the method has been compiled already. That's an indicator that we should
+  // osr into it.
+  if (!jit->GetCodeCache()->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+    return false;
+  }
+
+  // Fetch some data before looking up for an OSR method. We don't want thread
+  // suspension once we hold an OSR method, as the JIT code cache could delete the OSR
+  // method while we are being suspended.
+  const size_t number_of_vregs = method->GetCodeItem()->registers_size_;
+  const char* shorty = method->GetShorty();
+  std::string method_name(VLOG_IS_ON(jit) ? PrettyMethod(method) : "");
+  void** memory = nullptr;
+  size_t frame_size = 0;
+  ShadowFrame* shadow_frame = nullptr;
+  const uint8_t* native_pc = nullptr;
+
+  {
+    ScopedAssertNoThreadSuspension sts(thread, "Holding OSR method");
+    const OatQuickMethodHeader* osr_method = jit->GetCodeCache()->LookupOsrMethodHeader(method);
+    if (osr_method == nullptr) {
+      // No osr method yet, just return to the interpreter.
+      return false;
+    }
+
+    CodeInfo code_info = osr_method->GetOptimizedCodeInfo();
+    CodeInfoEncoding encoding = code_info.ExtractEncoding();
+
+    // Find stack map starting at the target dex_pc.
+    StackMap stack_map = code_info.GetOsrStackMapForDexPc(dex_pc + dex_pc_offset, encoding);
+    if (!stack_map.IsValid()) {
+      // There is no OSR stack map for this dex pc offset. Just return to the interpreter in the
+      // hope that the next branch has one.
+      return false;
+    }
+
+    // Before allowing the jump, make sure the debugger is not active to avoid jumping from
+    // interpreter to OSR while e.g. single stepping. Note that we could selectively disable
+    // OSR when single stepping, but that's currently hard to know at this point.
+    if (Dbg::IsDebuggerActive()) {
+      return false;
+    }
+
+    // We found a stack map, now fill the frame with dex register values from the interpreter's
+    // shadow frame.
+    DexRegisterMap vreg_map =
+        code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_vregs);
+
+    frame_size = osr_method->GetFrameSizeInBytes();
+
+    // Allocate memory to put shadow frame values. The osr stub will copy that memory to
+    // stack.
+    // Note that we could pass the shadow frame to the stub, and let it copy the values there,
+    // but that is engineering complexity not worth the effort for something like OSR.
+    memory = reinterpret_cast<void**>(malloc(frame_size));
+    CHECK(memory != nullptr);
+    memset(memory, 0, frame_size);
+
+    // Art ABI: ArtMethod is at the bottom of the stack.
+    memory[0] = method;
+
+    shadow_frame = thread->PopShadowFrame();
+    if (!vreg_map.IsValid()) {
+      // If we don't have a dex register map, then there are no live dex registers at
+      // this dex pc.
+    } else {
+      for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
+        DexRegisterLocation::Kind location =
+            vreg_map.GetLocationKind(vreg, number_of_vregs, code_info, encoding);
+        if (location == DexRegisterLocation::Kind::kNone) {
+          // Dex register is dead or uninitialized.
+          continue;
+        }
+
+        if (location == DexRegisterLocation::Kind::kConstant) {
+          // We skip constants because the compiled code knows how to handle them.
+          continue;
+        }
+
+        DCHECK_EQ(location, DexRegisterLocation::Kind::kInStack);
+
+        int32_t vreg_value = shadow_frame->GetVReg(vreg);
+        int32_t slot_offset = vreg_map.GetStackOffsetInBytes(vreg,
+                                                             number_of_vregs,
+                                                             code_info,
+                                                             encoding);
+        DCHECK_LT(slot_offset, static_cast<int32_t>(frame_size));
+        DCHECK_GT(slot_offset, 0);
+        (reinterpret_cast<int32_t*>(memory))[slot_offset / sizeof(int32_t)] = vreg_value;
+      }
+    }
+
+    native_pc = stack_map.GetNativePcOffset(encoding.stack_map_encoding) +
+        osr_method->GetEntryPoint();
+    VLOG(jit) << "Jumping to "
+              << method_name
+              << "@"
+              << std::hex << reinterpret_cast<uintptr_t>(native_pc);
+  }
+
+  {
+    ManagedStack fragment;
+    thread->PushManagedStackFragment(&fragment);
+    (*art_quick_osr_stub)(memory,
+                          frame_size,
+                          native_pc,
+                          result,
+                          shorty,
+                          thread);
+
+    if (UNLIKELY(thread->GetException() == Thread::GetDeoptimizationException())) {
+      thread->DeoptimizeWithDeoptimizationException(result);
+    }
+    thread->PopManagedStackFragment(fragment);
+  }
+  free(memory);
+  thread->PushShadowFrame(shadow_frame);
+  VLOG(jit) << "Done running OSR code for " << method_name;
+  return true;
+}
+
+void Jit::AddMemoryUsage(ArtMethod* method, size_t bytes) {
+  if (bytes > 4 * MB) {
+    LOG(INFO) << "Compiler allocated "
+              << PrettySize(bytes)
+              << " to compile "
+              << PrettyMethod(method);
+  }
+  MutexLock mu(Thread::Current(), lock_);
+  memory_use_.AddValue(bytes);
+}
+
+class JitCompileTask FINAL : public Task {
+ public:
+  enum TaskKind {
+    kAllocateProfile,
+    kCompile,
+    kCompileOsr
+  };
+
+  JitCompileTask(ArtMethod* method, TaskKind kind) : method_(method), kind_(kind) {
+    ScopedObjectAccess soa(Thread::Current());
+    // Add a global ref to the class to prevent class unloading until compilation is done.
+    klass_ = soa.Vm()->AddGlobalRef(soa.Self(), method_->GetDeclaringClass());
+    CHECK(klass_ != nullptr);
+  }
+
+  ~JitCompileTask() {
+    ScopedObjectAccess soa(Thread::Current());
+    soa.Vm()->DeleteGlobalRef(soa.Self(), klass_);
+  }
+
+  void Run(Thread* self) OVERRIDE {
+    ScopedObjectAccess soa(self);
+    if (kind_ == kCompile) {
+      Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ false);
+    } else if (kind_ == kCompileOsr) {
+      Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ true);
+    } else {
+      DCHECK(kind_ == kAllocateProfile);
+      if (ProfilingInfo::Create(self, method_, /* retry_allocation */ true)) {
+        VLOG(jit) << "Start profiling " << PrettyMethod(method_);
+      }
+    }
+    ProfileSaver::NotifyJitActivity();
+  }
+
+  void Finalize() OVERRIDE {
+    delete this;
+  }
+
+ private:
+  ArtMethod* const method_;
+  const TaskKind kind_;
+  jobject klass_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
+};
+
+void Jit::AddSamples(Thread* self, ArtMethod* method, uint16_t count, bool with_backedges) {
+  if (thread_pool_ == nullptr) {
+    // Should only see this when shutting down.
+    DCHECK(Runtime::Current()->IsShuttingDown(self));
+    return;
+  }
+
+  if (method->IsClassInitializer() || method->IsNative() || !method->IsCompilable()) {
+    // We do not want to compile such methods.
+    return;
+  }
+  DCHECK(thread_pool_ != nullptr);
+  DCHECK_GT(warm_method_threshold_, 0);
+  DCHECK_GT(hot_method_threshold_, warm_method_threshold_);
+  DCHECK_GT(osr_method_threshold_, hot_method_threshold_);
+  DCHECK_GE(priority_thread_weight_, 1);
+  DCHECK_LE(priority_thread_weight_, hot_method_threshold_);
+
+  int32_t starting_count = method->GetCounter();
+  if (Jit::ShouldUsePriorityThreadWeight()) {
+    count *= priority_thread_weight_;
+  }
+  int32_t new_count = starting_count + count;   // int32 here to avoid wrap-around;
+  if (starting_count < warm_method_threshold_) {
+    if ((new_count >= warm_method_threshold_) &&
+        (method->GetProfilingInfo(kRuntimePointerSize) == nullptr)) {
+      bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false);
+      if (success) {
+        VLOG(jit) << "Start profiling " << PrettyMethod(method);
+      }
+
+      if (thread_pool_ == nullptr) {
+        // Calling ProfilingInfo::Create might put us in a suspended state, which could
+        // lead to the thread pool being deleted when we are shutting down.
+        DCHECK(Runtime::Current()->IsShuttingDown(self));
+        return;
+      }
+
+      if (!success) {
+        // We failed allocating. Instead of doing the collection on the Java thread, we push
+        // an allocation to a compiler thread, that will do the collection.
+        thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile));
+      }
+    }
+    // Avoid jumping more than one state at a time.
+    new_count = std::min(new_count, hot_method_threshold_ - 1);
+  } else if (use_jit_compilation_) {
+    if (starting_count < hot_method_threshold_) {
+      if ((new_count >= hot_method_threshold_) &&
+          !code_cache_->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+        DCHECK(thread_pool_ != nullptr);
+        thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
+      }
+      // Avoid jumping more than one state at a time.
+      new_count = std::min(new_count, osr_method_threshold_ - 1);
+    } else if (starting_count < osr_method_threshold_) {
+      if (!with_backedges) {
+        // If the samples don't contain any back edge, we don't increment the hotness.
+        return;
+      }
+      if ((new_count >= osr_method_threshold_) &&  !code_cache_->IsOsrCompiled(method)) {
+        DCHECK(thread_pool_ != nullptr);
+        thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr));
+      }
+    }
+  }
+  // Update hotness counter
+  method->SetCounter(new_count);
+}
+
+void Jit::MethodEntered(Thread* thread, ArtMethod* method) {
+  Runtime* runtime = Runtime::Current();
+  if (UNLIKELY(runtime->UseJitCompilation() && runtime->GetJit()->JitAtFirstUse())) {
+    // The compiler requires a ProfilingInfo object.
+    ProfilingInfo::Create(thread, method, /* retry_allocation */ true);
+    JitCompileTask compile_task(method, JitCompileTask::kCompile);
+    compile_task.Run(thread);
+    return;
+  }
+
+  ProfilingInfo* profiling_info = method->GetProfilingInfo(kRuntimePointerSize);
+  // Update the entrypoint if the ProfilingInfo has one. The interpreter will call it
+  // instead of interpreting the method.
+  if ((profiling_info != nullptr) && (profiling_info->GetSavedEntryPoint() != nullptr)) {
+    Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+        method, profiling_info->GetSavedEntryPoint());
+  } else {
+    AddSamples(thread, method, 1, /* with_backedges */false);
+  }
+}
+
+void Jit::InvokeVirtualOrInterface(Thread* thread,
+                                   mirror::Object* this_object,
+                                   ArtMethod* caller,
+                                   uint32_t dex_pc,
+                                   ArtMethod* callee ATTRIBUTE_UNUSED) {
+  ScopedAssertNoThreadSuspension ants(thread, __FUNCTION__);
+  DCHECK(this_object != nullptr);
+  ProfilingInfo* info = caller->GetProfilingInfo(kRuntimePointerSize);
+  if (info != nullptr) {
+    info->AddInvokeInfo(dex_pc, this_object->GetClass());
+  }
+}
+
+void Jit::WaitForCompilationToFinish(Thread* self) {
+  if (thread_pool_ != nullptr) {
+    thread_pool_->Wait(self, false, false);
+  }
 }
 
 }  // namespace jit
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index e73ba82..2aa6f3d 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -17,78 +17,187 @@
 #ifndef ART_RUNTIME_JIT_JIT_H_
 #define ART_RUNTIME_JIT_JIT_H_
 
-#include <unordered_map>
-
-#include "atomic.h"
+#include "base/arena_allocator.h"
+#include "base/histogram-inl.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "base/timing_logger.h"
-#include "gc_root.h"
-#include "jni.h"
 #include "object_callbacks.h"
+#include "offline_profiling_info.h"
+#include "jit/profile_saver_options.h"
 #include "thread_pool.h"
 
 namespace art {
 
 class ArtMethod;
-class CompilerCallbacks;
 struct RuntimeArgumentMap;
 
 namespace jit {
 
 class JitCodeCache;
-class JitInstrumentationCache;
 class JitOptions;
 
+static constexpr int16_t kJitCheckForOSR = -1;
+static constexpr int16_t kJitHotnessDisabled = -2;
+
 class Jit {
  public:
   static constexpr bool kStressMode = kIsDebugBuild;
-  static constexpr size_t kDefaultCompileThreshold = kStressMode ? 2 : 1000;
-  static constexpr size_t kDefaultWarmupThreshold = kDefaultCompileThreshold / 2;
+  static constexpr size_t kDefaultCompileThreshold = kStressMode ? 2 : 10000;
+  static constexpr size_t kDefaultPriorityThreadWeightRatio = 1000;
+  static constexpr size_t kDefaultInvokeTransitionWeightRatio = 500;
 
   virtual ~Jit();
   static Jit* Create(JitOptions* options, std::string* error_msg);
-  bool CompileMethod(ArtMethod* method, Thread* self)
+  bool CompileMethod(ArtMethod* method, Thread* self, bool osr)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold);
   void CreateThreadPool();
-  CompilerCallbacks* GetCompilerCallbacks() {
-    return compiler_callbacks_;
-  }
+
   const JitCodeCache* GetCodeCache() const {
     return code_cache_.get();
   }
+
   JitCodeCache* GetCodeCache() {
     return code_cache_.get();
   }
+
   void DeleteThreadPool();
   // Dump interesting info: #methods compiled, code vs data size, compile / verify cumulative
   // loggers.
-  void DumpInfo(std::ostream& os);
+  void DumpInfo(std::ostream& os) REQUIRES(!lock_);
   // Add a timing logger to cumulative_timings_.
   void AddTimingLogger(const TimingLogger& logger);
-  JitInstrumentationCache* GetInstrumentationCache() const {
-    return instrumentation_cache_.get();
+
+  void AddMemoryUsage(ArtMethod* method, size_t bytes)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  size_t OSRMethodThreshold() const {
+    return osr_method_threshold_;
   }
 
+  size_t HotMethodThreshold() const {
+    return hot_method_threshold_;
+  }
+
+  size_t WarmMethodThreshold() const {
+    return warm_method_threshold_;
+  }
+
+  uint16_t PriorityThreadWeight() const {
+    return priority_thread_weight_;
+  }
+
+  // Returns false if we only need to save profile information and not compile methods.
+  bool UseJitCompilation() const {
+    return use_jit_compilation_;
+  }
+
+  bool GetSaveProfilingInfo() const {
+    return profile_saver_options_.IsEnabled();
+  }
+
+  // Wait until there is no more pending compilation tasks.
+  void WaitForCompilationToFinish(Thread* self);
+
+  // Profiling methods.
+  void MethodEntered(Thread* thread, ArtMethod* method)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void AddSamples(Thread* self, ArtMethod* method, uint16_t samples, bool with_backedges)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void InvokeVirtualOrInterface(Thread* thread,
+                                mirror::Object* this_object,
+                                ArtMethod* caller,
+                                uint32_t dex_pc,
+                                ArtMethod* callee)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void NotifyInterpreterToCompiledCodeTransition(Thread* self, ArtMethod* caller)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    AddSamples(self, caller, invoke_transition_weight_, false);
+  }
+
+  void NotifyCompiledCodeToInterpreterTransition(Thread* self, ArtMethod* callee)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    AddSamples(self, callee, invoke_transition_weight_, false);
+  }
+
+  // Starts the profile saver if the config options allow profile recording.
+  // The profile will be stored in the specified `filename` and will contain
+  // information collected from the given `code_paths` (a set of dex locations).
+  // The `foreign_dex_profile_path` is the path where the saver will put the
+  // profile markers for loaded dex files which are not owned by the application.
+  // The `app_dir` is the application directory and is used to decide which
+  // dex files belong to the application.
+  void StartProfileSaver(const std::string& filename,
+                         const std::vector<std::string>& code_paths,
+                         const std::string& foreign_dex_profile_path,
+                         const std::string& app_dir);
+  void StopProfileSaver();
+
+  void DumpForSigQuit(std::ostream& os) REQUIRES(!lock_);
+
+  static void NewTypeLoadedIfUsingJit(mirror::Class* type)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // If debug info generation is turned on then write the type information for types already loaded
+  // into the specified class linker to the jit debug interface,
+  void DumpTypeInfoForLoadedTypes(ClassLinker* linker);
+
+  // Return whether we should try to JIT compiled code as soon as an ArtMethod is invoked.
+  bool JitAtFirstUse();
+
+  // Return whether we can invoke JIT code for `method`.
+  bool CanInvokeCompiledCode(ArtMethod* method);
+
+  // Return whether the runtime should use a priority thread weight when sampling.
+  static bool ShouldUsePriorityThreadWeight();
+
+  // If an OSR compiled version is available for `method`,
+  // and `dex_pc + dex_pc_offset` is an entry point of that compiled
+  // version, this method will jump to the compiled code, let it run,
+  // and return true afterwards. Return false otherwise.
+  static bool MaybeDoOnStackReplacement(Thread* thread,
+                                        ArtMethod* method,
+                                        uint32_t dex_pc,
+                                        int32_t dex_pc_offset,
+                                        JValue* result)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  static bool LoadCompilerLibrary(std::string* error_msg);
+
  private:
   Jit();
-  bool LoadCompiler(std::string* error_msg);
+
+  static bool LoadCompiler(std::string* error_msg);
 
   // JIT compiler
-  void* jit_library_handle_;
-  void* jit_compiler_handle_;
-  void* (*jit_load_)(CompilerCallbacks**);
-  void (*jit_unload_)(void*);
-  bool (*jit_compile_method_)(void*, ArtMethod*, Thread*);
+  static void* jit_library_handle_;
+  static void* jit_compiler_handle_;
+  static void* (*jit_load_)(bool*);
+  static void (*jit_unload_)(void*);
+  static bool (*jit_compile_method_)(void*, ArtMethod*, Thread*, bool);
+  static void (*jit_types_loaded_)(void*, mirror::Class**, size_t count);
 
   // Performance monitoring.
   bool dump_info_on_shutdown_;
   CumulativeLogger cumulative_timings_;
+  Histogram<uint64_t> memory_use_ GUARDED_BY(lock_);
+  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
-  std::unique_ptr<jit::JitInstrumentationCache> instrumentation_cache_;
   std::unique_ptr<jit::JitCodeCache> code_cache_;
-  CompilerCallbacks* compiler_callbacks_;  // Owned by the jit compiler.
+
+  bool use_jit_compilation_;
+  ProfileSaverOptions profile_saver_options_;
+  static bool generate_debug_info_;
+  uint16_t hot_method_threshold_;
+  uint16_t warm_method_threshold_;
+  uint16_t osr_method_threshold_;
+  uint16_t priority_thread_weight_;
+  uint16_t invoke_transition_weight_;
+  std::unique_ptr<ThreadPool> thread_pool_;
 
   DISALLOW_COPY_AND_ASSIGN(Jit);
 };
@@ -102,28 +211,62 @@
   size_t GetWarmupThreshold() const {
     return warmup_threshold_;
   }
-  size_t GetCodeCacheCapacity() const {
-    return code_cache_capacity_;
+  size_t GetOsrThreshold() const {
+    return osr_threshold_;
+  }
+  uint16_t GetPriorityThreadWeight() const {
+    return priority_thread_weight_;
+  }
+  size_t GetInvokeTransitionWeight() const {
+    return invoke_transition_weight_;
+  }
+  size_t GetCodeCacheInitialCapacity() const {
+    return code_cache_initial_capacity_;
+  }
+  size_t GetCodeCacheMaxCapacity() const {
+    return code_cache_max_capacity_;
   }
   bool DumpJitInfoOnShutdown() const {
     return dump_info_on_shutdown_;
   }
-  bool UseJIT() const {
-    return use_jit_;
+  const ProfileSaverOptions& GetProfileSaverOptions() const {
+    return profile_saver_options_;
   }
-  void SetUseJIT(bool b) {
-    use_jit_ = b;
+  bool GetSaveProfilingInfo() const {
+    return profile_saver_options_.IsEnabled();
+  }
+  bool UseJitCompilation() const {
+    return use_jit_compilation_;
+  }
+  void SetUseJitCompilation(bool b) {
+    use_jit_compilation_ = b;
+  }
+  void SetSaveProfilingInfo(bool save_profiling_info) {
+    profile_saver_options_.SetEnabled(save_profiling_info);
+  }
+  void SetJitAtFirstUse() {
+    use_jit_compilation_ = true;
+    compile_threshold_ = 0;
   }
 
  private:
-  bool use_jit_;
-  size_t code_cache_capacity_;
+  bool use_jit_compilation_;
+  size_t code_cache_initial_capacity_;
+  size_t code_cache_max_capacity_;
   size_t compile_threshold_;
   size_t warmup_threshold_;
+  size_t osr_threshold_;
+  uint16_t priority_thread_weight_;
+  size_t invoke_transition_weight_;
   bool dump_info_on_shutdown_;
+  ProfileSaverOptions profile_saver_options_;
 
-  JitOptions() : use_jit_(false), code_cache_capacity_(0), compile_threshold_(0),
-      dump_info_on_shutdown_(false) { }
+  JitOptions()
+      : use_jit_compilation_(false),
+        code_cache_initial_capacity_(0),
+        code_cache_max_capacity_(0),
+        compile_threshold_(0),
+        dump_info_on_shutdown_(false) {}
 
   DISALLOW_COPY_AND_ASSIGN(JitOptions);
 };
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 4187358..b1079dd 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -19,8 +19,21 @@
 #include <sstream>
 
 #include "art_method-inl.h"
+#include "base/enums.h"
+#include "base/stl_util.h"
+#include "base/systrace.h"
+#include "base/time_utils.h"
+#include "debugger_interface.h"
+#include "entrypoints/runtime_asm_entrypoints.h"
+#include "gc/accounting/bitmap-inl.h"
+#include "gc/scoped_gc_critical_section.h"
+#include "jit/jit.h"
+#include "jit/profiling_info.h"
+#include "linear_alloc.h"
 #include "mem_map.h"
 #include "oat_file-inl.h"
+#include "scoped_thread_state_change.h"
+#include "thread_list.h"
 
 namespace art {
 namespace jit {
@@ -29,6 +42,9 @@
 static constexpr int kProtData = PROT_READ | PROT_WRITE;
 static constexpr int kProtCode = PROT_READ | PROT_EXEC;
 
+static constexpr size_t kCodeSizeLogThreshold = 50 * KB;
+static constexpr size_t kStackMapSizeLogThreshold = 50 * KB;
+
 #define CHECKED_MPROTECT(memory, size, prot)                \
   do {                                                      \
     int rc = mprotect(memory, size, prot);                  \
@@ -38,80 +54,139 @@
     }                                                       \
   } while (false)                                           \
 
-JitCodeCache* JitCodeCache::Create(size_t capacity, std::string* error_msg) {
-  CHECK_GT(capacity, 0U);
-  CHECK_LT(capacity, kMaxCapacity);
-  std::string error_str;
-  // Map name specific for android_os_Debug.cpp accounting.
-  MemMap* data_map = MemMap::MapAnonymous(
-    "data-code-cache", nullptr, capacity, kProtAll, false, false, &error_str);
-  if (data_map == nullptr) {
-    std::ostringstream oss;
-    oss << "Failed to create read write execute cache: " << error_str << " size=" << capacity;
-    *error_msg = oss.str();
-    return nullptr;
-  }
+JitCodeCache* JitCodeCache::Create(size_t initial_capacity,
+                                   size_t max_capacity,
+                                   bool generate_debug_info,
+                                   std::string* error_msg) {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
+  CHECK_GE(max_capacity, initial_capacity);
 
-  // Data cache is 1 / 4 of the map.
-  // TODO: Make this variable?
-  size_t data_size = RoundUp(data_map->Size() / 4, kPageSize);
-  size_t code_size = data_map->Size() - data_size;
-  uint8_t* divider = data_map->Begin() + data_size;
+  // Generating debug information is mostly for using the 'perf' tool, which does
+  // not work with ashmem.
+  bool use_ashmem = !generate_debug_info;
+  // With 'perf', we want a 1-1 mapping between an address and a method.
+  bool garbage_collect_code = !generate_debug_info;
 
   // We need to have 32 bit offsets from method headers in code cache which point to things
   // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work.
-  MemMap* code_map = data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str);
-  if (code_map == nullptr) {
+  // Ensure we're below 1 GB to be safe.
+  if (max_capacity > 1 * GB) {
     std::ostringstream oss;
-    oss << "Failed to create read write execute cache: " << error_str << " size=" << capacity;
+    oss << "Maxium code cache capacity is limited to 1 GB, "
+        << PrettySize(max_capacity) << " is too big";
     *error_msg = oss.str();
     return nullptr;
   }
-  DCHECK_EQ(code_map->Size(), code_size);
+
+  std::string error_str;
+  // Map name specific for android_os_Debug.cpp accounting.
+  MemMap* data_map = MemMap::MapAnonymous(
+      "data-code-cache", nullptr, max_capacity, kProtAll, false, false, &error_str, use_ashmem);
+  if (data_map == nullptr) {
+    std::ostringstream oss;
+    oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
+    *error_msg = oss.str();
+    return nullptr;
+  }
+
+  // Align both capacities to page size, as that's the unit mspaces use.
+  initial_capacity = RoundDown(initial_capacity, 2 * kPageSize);
+  max_capacity = RoundDown(max_capacity, 2 * kPageSize);
+
+  // Data cache is 1 / 2 of the map.
+  // TODO: Make this variable?
+  size_t data_size = max_capacity / 2;
+  size_t code_size = max_capacity - data_size;
+  DCHECK_EQ(code_size + data_size, max_capacity);
+  uint8_t* divider = data_map->Begin() + data_size;
+
+  MemMap* code_map =
+      data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str, use_ashmem);
+  if (code_map == nullptr) {
+    std::ostringstream oss;
+    oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
+    *error_msg = oss.str();
+    return nullptr;
+  }
   DCHECK_EQ(code_map->Begin(), divider);
-  return new JitCodeCache(code_map, data_map);
+  data_size = initial_capacity / 2;
+  code_size = initial_capacity - data_size;
+  DCHECK_EQ(code_size + data_size, initial_capacity);
+  return new JitCodeCache(
+      code_map, data_map, code_size, data_size, max_capacity, garbage_collect_code);
 }
 
-JitCodeCache::JitCodeCache(MemMap* code_map, MemMap* data_map)
+JitCodeCache::JitCodeCache(MemMap* code_map,
+                           MemMap* data_map,
+                           size_t initial_code_capacity,
+                           size_t initial_data_capacity,
+                           size_t max_capacity,
+                           bool garbage_collect_code)
     : lock_("Jit code cache", kJitCodeCacheLock),
+      lock_cond_("Jit code cache variable", lock_),
+      collection_in_progress_(false),
       code_map_(code_map),
       data_map_(data_map),
-      num_methods_(0) {
+      max_capacity_(max_capacity),
+      current_capacity_(initial_code_capacity + initial_data_capacity),
+      code_end_(initial_code_capacity),
+      data_end_(initial_data_capacity),
+      last_collection_increased_code_cache_(false),
+      last_update_time_ns_(0),
+      garbage_collect_code_(garbage_collect_code),
+      used_memory_for_data_(0),
+      used_memory_for_code_(0),
+      number_of_compilations_(0),
+      number_of_osr_compilations_(0),
+      number_of_deoptimizations_(0),
+      number_of_collections_(0),
+      histogram_stack_map_memory_use_("Memory used for stack maps", 16),
+      histogram_code_memory_use_("Memory used for compiled code", 16),
+      histogram_profiling_info_memory_use_("Memory used for profiling info", 16) {
 
-  VLOG(jit) << "Created jit code cache: data size="
-            << PrettySize(data_map_->Size())
-            << ", code size="
-            << PrettySize(code_map_->Size());
-
-  code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_map_->Size(), false /*locked*/);
-  data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_map_->Size(), false /*locked*/);
+  DCHECK_GE(max_capacity, initial_code_capacity + initial_data_capacity);
+  code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/);
+  data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/);
 
   if (code_mspace_ == nullptr || data_mspace_ == nullptr) {
     PLOG(FATAL) << "create_mspace_with_base failed";
   }
 
-  // Prevent morecore requests from the mspace.
-  mspace_set_footprint_limit(code_mspace_, code_map_->Size());
-  mspace_set_footprint_limit(data_mspace_, data_map_->Size());
+  SetFootprintLimit(current_capacity_);
 
   CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtCode);
   CHECKED_MPROTECT(data_map_->Begin(), data_map_->Size(), kProtData);
+
+  VLOG(jit) << "Created jit code cache: initial data size="
+            << PrettySize(initial_data_capacity)
+            << ", initial code size="
+            << PrettySize(initial_code_capacity);
 }
 
-bool JitCodeCache::ContainsMethod(ArtMethod* method) const {
-  return ContainsCodePtr(method->GetEntryPointFromQuickCompiledCode());
-}
-
-bool JitCodeCache::ContainsCodePtr(const void* ptr) const {
+bool JitCodeCache::ContainsPc(const void* ptr) const {
   return code_map_->Begin() <= ptr && ptr < code_map_->End();
 }
 
-class ScopedCodeCacheWrite {
+bool JitCodeCache::ContainsMethod(ArtMethod* method) {
+  MutexLock mu(Thread::Current(), lock_);
+  for (auto& it : method_code_map_) {
+    if (it.second == method) {
+      return true;
+    }
+  }
+  return false;
+}
+
+class ScopedCodeCacheWrite : ScopedTrace {
  public:
-  explicit ScopedCodeCacheWrite(MemMap* code_map) : code_map_(code_map) {
+  explicit ScopedCodeCacheWrite(MemMap* code_map)
+      : ScopedTrace("ScopedCodeCacheWrite"),
+        code_map_(code_map) {
+    ScopedTrace trace("mprotect all");
     CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtAll);
   }
   ~ScopedCodeCacheWrite() {
+    ScopedTrace trace("mprotect code");
     CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtCode);
   }
  private:
@@ -121,14 +196,125 @@
 };
 
 uint8_t* JitCodeCache::CommitCode(Thread* self,
-                                  const uint8_t* mapping_table,
+                                  ArtMethod* method,
                                   const uint8_t* vmap_table,
-                                  const uint8_t* gc_map,
                                   size_t frame_size_in_bytes,
                                   size_t core_spill_mask,
                                   size_t fp_spill_mask,
                                   const uint8_t* code,
-                                  size_t code_size) {
+                                  size_t code_size,
+                                  bool osr) {
+  uint8_t* result = CommitCodeInternal(self,
+                                       method,
+                                       vmap_table,
+                                       frame_size_in_bytes,
+                                       core_spill_mask,
+                                       fp_spill_mask,
+                                       code,
+                                       code_size,
+                                       osr);
+  if (result == nullptr) {
+    // Retry.
+    GarbageCollectCache(self);
+    result = CommitCodeInternal(self,
+                                method,
+                                vmap_table,
+                                frame_size_in_bytes,
+                                core_spill_mask,
+                                fp_spill_mask,
+                                code,
+                                code_size,
+                                osr);
+  }
+  return result;
+}
+
+bool JitCodeCache::WaitForPotentialCollectionToComplete(Thread* self) {
+  bool in_collection = false;
+  while (collection_in_progress_) {
+    in_collection = true;
+    lock_cond_.Wait(self);
+  }
+  return in_collection;
+}
+
+static uintptr_t FromCodeToAllocation(const void* code) {
+  size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
+  return reinterpret_cast<uintptr_t>(code) - RoundUp(sizeof(OatQuickMethodHeader), alignment);
+}
+
+void JitCodeCache::FreeCode(const void* code_ptr, ArtMethod* method ATTRIBUTE_UNUSED) {
+  uintptr_t allocation = FromCodeToAllocation(code_ptr);
+  const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+  // Notify native debugger that we are about to remove the code.
+  // It does nothing if we are not using native debugger.
+  DeleteJITCodeEntryForAddress(reinterpret_cast<uintptr_t>(code_ptr));
+
+  // Use the offset directly to prevent sanity check that the method is
+  // compiled with optimizing.
+  // TODO(ngeoffray): Clean up.
+  if (method_header->vmap_table_offset_ != 0) {
+    const uint8_t* data = method_header->code_ - method_header->vmap_table_offset_;
+    FreeData(const_cast<uint8_t*>(data));
+  }
+  FreeCode(reinterpret_cast<uint8_t*>(allocation));
+}
+
+void JitCodeCache::RemoveMethodsIn(Thread* self, const LinearAlloc& alloc) {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
+  MutexLock mu(self, lock_);
+  // We do not check if a code cache GC is in progress, as this method comes
+  // with the classlinker_classes_lock_ held, and suspending ourselves could
+  // lead to a deadlock.
+  {
+    ScopedCodeCacheWrite scc(code_map_.get());
+    for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
+      if (alloc.ContainsUnsafe(it->second)) {
+        FreeCode(it->first, it->second);
+        it = method_code_map_.erase(it);
+      } else {
+        ++it;
+      }
+    }
+  }
+  for (auto it = osr_code_map_.begin(); it != osr_code_map_.end();) {
+    if (alloc.ContainsUnsafe(it->first)) {
+      // Note that the code has already been removed in the loop above.
+      it = osr_code_map_.erase(it);
+    } else {
+      ++it;
+    }
+  }
+  for (auto it = profiling_infos_.begin(); it != profiling_infos_.end();) {
+    ProfilingInfo* info = *it;
+    if (alloc.ContainsUnsafe(info->GetMethod())) {
+      info->GetMethod()->SetProfilingInfo(nullptr);
+      FreeData(reinterpret_cast<uint8_t*>(info));
+      it = profiling_infos_.erase(it);
+    } else {
+      ++it;
+    }
+  }
+}
+
+void JitCodeCache::ClearGcRootsInInlineCaches(Thread* self) {
+  MutexLock mu(self, lock_);
+  for (ProfilingInfo* info : profiling_infos_) {
+    if (!info->IsInUseByCompiler()) {
+      info->ClearGcRootsInInlineCaches();
+    }
+  }
+}
+
+uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
+                                          ArtMethod* method,
+                                          const uint8_t* vmap_table,
+                                          size_t frame_size_in_bytes,
+                                          size_t core_spill_mask,
+                                          size_t fp_spill_mask,
+                                          const uint8_t* code,
+                                          size_t code_size,
+                                          bool osr) {
   size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
   // Ensure the header ends up at expected instruction alignment.
   size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
@@ -136,89 +322,723 @@
 
   OatQuickMethodHeader* method_header = nullptr;
   uint8_t* code_ptr = nullptr;
-
-  MutexLock mu(self, lock_);
+  uint8_t* memory = nullptr;
   {
-    ScopedCodeCacheWrite scc(code_map_.get());
-    uint8_t* result = reinterpret_cast<uint8_t*>(
-        mspace_memalign(code_mspace_, alignment, total_size));
-    if (result == nullptr) {
-      return nullptr;
-    }
-    code_ptr = result + header_size;
-    DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(code_ptr), alignment);
+    ScopedThreadSuspension sts(self, kSuspended);
+    MutexLock mu(self, lock_);
+    WaitForPotentialCollectionToComplete(self);
+    {
+      ScopedCodeCacheWrite scc(code_map_.get());
+      memory = AllocateCode(total_size);
+      if (memory == nullptr) {
+        return nullptr;
+      }
+      code_ptr = memory + header_size;
 
-    std::copy(code, code + code_size, code_ptr);
-    method_header = reinterpret_cast<OatQuickMethodHeader*>(code_ptr) - 1;
-    new (method_header) OatQuickMethodHeader(
-        (mapping_table == nullptr) ? 0 : code_ptr - mapping_table,
-        (vmap_table == nullptr) ? 0 : code_ptr - vmap_table,
-        (gc_map == nullptr) ? 0 : code_ptr - gc_map,
-        frame_size_in_bytes,
-        core_spill_mask,
-        fp_spill_mask,
-        code_size);
+      std::copy(code, code + code_size, code_ptr);
+      method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+      new (method_header) OatQuickMethodHeader(
+          (vmap_table == nullptr) ? 0 : code_ptr - vmap_table,
+          frame_size_in_bytes,
+          core_spill_mask,
+          fp_spill_mask,
+          code_size);
+    }
+
+    FlushInstructionCache(reinterpret_cast<char*>(code_ptr),
+                          reinterpret_cast<char*>(code_ptr + code_size));
+    number_of_compilations_++;
+  }
+  // We need to update the entry point in the runnable state for the instrumentation.
+  {
+    MutexLock mu(self, lock_);
+    method_code_map_.Put(code_ptr, method);
+    if (osr) {
+      number_of_osr_compilations_++;
+      osr_code_map_.Put(method, code_ptr);
+    } else {
+      Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+          method, method_header->GetEntryPoint());
+    }
+    if (collection_in_progress_) {
+      // We need to update the live bitmap if there is a GC to ensure it sees this new
+      // code.
+      GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr));
+    }
+    last_update_time_ns_.StoreRelease(NanoTime());
+    VLOG(jit)
+        << "JIT added (osr=" << std::boolalpha << osr << std::noboolalpha << ") "
+        << PrettyMethod(method) << "@" << method
+        << " ccache_size=" << PrettySize(CodeCacheSizeLocked()) << ": "
+        << " dcache_size=" << PrettySize(DataCacheSizeLocked()) << ": "
+        << reinterpret_cast<const void*>(method_header->GetEntryPoint()) << ","
+        << reinterpret_cast<const void*>(method_header->GetEntryPoint() + method_header->code_size_);
+    histogram_code_memory_use_.AddValue(code_size);
+    if (code_size > kCodeSizeLogThreshold) {
+      LOG(INFO) << "JIT allocated "
+                << PrettySize(code_size)
+                << " for compiled code of "
+                << PrettyMethod(method);
+    }
   }
 
-  __builtin___clear_cache(reinterpret_cast<char*>(code_ptr),
-                          reinterpret_cast<char*>(code_ptr + code_size));
-
-  ++num_methods_;  // TODO: This is hacky but works since each method has exactly one code region.
   return reinterpret_cast<uint8_t*>(method_header);
 }
 
 size_t JitCodeCache::CodeCacheSize() {
   MutexLock mu(Thread::Current(), lock_);
-  size_t bytes_allocated = 0;
-  mspace_inspect_all(code_mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
-  return bytes_allocated;
+  return CodeCacheSizeLocked();
+}
+
+size_t JitCodeCache::CodeCacheSizeLocked() {
+  return used_memory_for_code_;
 }
 
 size_t JitCodeCache::DataCacheSize() {
   MutexLock mu(Thread::Current(), lock_);
-  size_t bytes_allocated = 0;
-  mspace_inspect_all(data_mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
-  return bytes_allocated;
+  return DataCacheSizeLocked();
 }
 
-uint8_t* JitCodeCache::ReserveData(Thread* self, size_t size) {
-  size = RoundUp(size, sizeof(void*));
+size_t JitCodeCache::DataCacheSizeLocked() {
+  return used_memory_for_data_;
+}
+
+void JitCodeCache::ClearData(Thread* self, void* data) {
   MutexLock mu(self, lock_);
-  return reinterpret_cast<uint8_t*>(mspace_malloc(data_mspace_, size));
+  FreeData(reinterpret_cast<uint8_t*>(data));
 }
 
-uint8_t* JitCodeCache::AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end) {
-  uint8_t* result = ReserveData(self, end - begin);
-  if (result == nullptr) {
-    return nullptr;  // Out of space in the data cache.
+uint8_t* JitCodeCache::ReserveData(Thread* self, size_t size, ArtMethod* method) {
+  size = RoundUp(size, sizeof(void*));
+  uint8_t* result = nullptr;
+
+  {
+    ScopedThreadSuspension sts(self, kSuspended);
+    MutexLock mu(self, lock_);
+    WaitForPotentialCollectionToComplete(self);
+    result = AllocateData(size);
   }
-  std::copy(begin, end, result);
+
+  if (result == nullptr) {
+    // Retry.
+    GarbageCollectCache(self);
+    ScopedThreadSuspension sts(self, kSuspended);
+    MutexLock mu(self, lock_);
+    WaitForPotentialCollectionToComplete(self);
+    result = AllocateData(size);
+  }
+
+  MutexLock mu(self, lock_);
+  histogram_stack_map_memory_use_.AddValue(size);
+  if (size > kStackMapSizeLogThreshold) {
+    LOG(INFO) << "JIT allocated "
+              << PrettySize(size)
+              << " for stack maps of "
+              << PrettyMethod(method);
+  }
   return result;
 }
 
-const void* JitCodeCache::GetCodeFor(ArtMethod* method) {
-  const void* code = method->GetEntryPointFromQuickCompiledCode();
-  if (ContainsCodePtr(code)) {
-    return code;
+class MarkCodeVisitor FINAL : public StackVisitor {
+ public:
+  MarkCodeVisitor(Thread* thread_in, JitCodeCache* code_cache_in)
+      : StackVisitor(thread_in, nullptr, StackVisitor::StackWalkKind::kSkipInlinedFrames),
+        code_cache_(code_cache_in),
+        bitmap_(code_cache_->GetLiveBitmap()) {}
+
+  bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+    if (method_header == nullptr) {
+      return true;
+    }
+    const void* code = method_header->GetCode();
+    if (code_cache_->ContainsPc(code)) {
+      // Use the atomic set version, as multiple threads are executing this code.
+      bitmap_->AtomicTestAndSet(FromCodeToAllocation(code));
+    }
+    return true;
   }
-  MutexLock mu(Thread::Current(), lock_);
-  auto it = method_code_map_.find(method);
-  if (it != method_code_map_.end()) {
-    return it->second;
+
+ private:
+  JitCodeCache* const code_cache_;
+  CodeCacheBitmap* const bitmap_;
+};
+
+class MarkCodeClosure FINAL : public Closure {
+ public:
+  MarkCodeClosure(JitCodeCache* code_cache, Barrier* barrier)
+      : code_cache_(code_cache), barrier_(barrier) {}
+
+  void Run(Thread* thread) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    ScopedTrace trace(__PRETTY_FUNCTION__);
+    DCHECK(thread == Thread::Current() || thread->IsSuspended());
+    MarkCodeVisitor visitor(thread, code_cache_);
+    visitor.WalkStack();
+    if (kIsDebugBuild) {
+      // The stack walking code queries the side instrumentation stack if it
+      // sees an instrumentation exit pc, so the JIT code of methods in that stack
+      // must have been seen. We sanity check this below.
+      for (const instrumentation::InstrumentationStackFrame& frame
+              : *thread->GetInstrumentationStack()) {
+        // The 'method_' in InstrumentationStackFrame is the one that has return_pc_ in
+        // its stack frame, it is not the method owning return_pc_. We just pass null to
+        // LookupMethodHeader: the method is only checked against in debug builds.
+        OatQuickMethodHeader* method_header =
+            code_cache_->LookupMethodHeader(frame.return_pc_, nullptr);
+        if (method_header != nullptr) {
+          const void* code = method_header->GetCode();
+          CHECK(code_cache_->GetLiveBitmap()->Test(FromCodeToAllocation(code)));
+        }
+      }
+    }
+    barrier_->Pass(Thread::Current());
   }
-  return nullptr;
+
+ private:
+  JitCodeCache* const code_cache_;
+  Barrier* const barrier_;
+};
+
+void JitCodeCache::NotifyCollectionDone(Thread* self) {
+  collection_in_progress_ = false;
+  lock_cond_.Broadcast(self);
 }
 
-void JitCodeCache::SaveCompiledCode(ArtMethod* method, const void* old_code_ptr) {
-  DCHECK_EQ(method->GetEntryPointFromQuickCompiledCode(), old_code_ptr);
-  DCHECK(ContainsCodePtr(old_code_ptr)) << PrettyMethod(method) << " old_code_ptr="
-      << old_code_ptr;
-  MutexLock mu(Thread::Current(), lock_);
-  auto it = method_code_map_.find(method);
-  if (it != method_code_map_.end()) {
+void JitCodeCache::SetFootprintLimit(size_t new_footprint) {
+  size_t per_space_footprint = new_footprint / 2;
+  DCHECK(IsAlignedParam(per_space_footprint, kPageSize));
+  DCHECK_EQ(per_space_footprint * 2, new_footprint);
+  mspace_set_footprint_limit(data_mspace_, per_space_footprint);
+  {
+    ScopedCodeCacheWrite scc(code_map_.get());
+    mspace_set_footprint_limit(code_mspace_, per_space_footprint);
+  }
+}
+
+bool JitCodeCache::IncreaseCodeCacheCapacity() {
+  if (current_capacity_ == max_capacity_) {
+    return false;
+  }
+
+  // Double the capacity if we're below 1MB, or increase it by 1MB if
+  // we're above.
+  if (current_capacity_ < 1 * MB) {
+    current_capacity_ *= 2;
+  } else {
+    current_capacity_ += 1 * MB;
+  }
+  if (current_capacity_ > max_capacity_) {
+    current_capacity_ = max_capacity_;
+  }
+
+  if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
+    LOG(INFO) << "Increasing code cache capacity to " << PrettySize(current_capacity_);
+  }
+
+  SetFootprintLimit(current_capacity_);
+
+  return true;
+}
+
+void JitCodeCache::MarkCompiledCodeOnThreadStacks(Thread* self) {
+  Barrier barrier(0);
+  size_t threads_running_checkpoint = 0;
+  MarkCodeClosure closure(this, &barrier);
+  threads_running_checkpoint = Runtime::Current()->GetThreadList()->RunCheckpoint(&closure);
+  // Now that we have run our checkpoint, move to a suspended state and wait
+  // for other threads to run the checkpoint.
+  ScopedThreadSuspension sts(self, kSuspended);
+  if (threads_running_checkpoint != 0) {
+    barrier.Increment(self, threads_running_checkpoint);
+  }
+}
+
+bool JitCodeCache::ShouldDoFullCollection() {
+  if (current_capacity_ == max_capacity_) {
+    // Always do a full collection when the code cache is full.
+    return true;
+  } else if (current_capacity_ < kReservedCapacity) {
+    // Always do partial collection when the code cache size is below the reserved
+    // capacity.
+    return false;
+  } else if (last_collection_increased_code_cache_) {
+    // This time do a full collection.
+    return true;
+  } else {
+    // This time do a partial collection.
+    return false;
+  }
+}
+
+void JitCodeCache::GarbageCollectCache(Thread* self) {
+  ScopedTrace trace(__FUNCTION__);
+  if (!garbage_collect_code_) {
+    MutexLock mu(self, lock_);
+    IncreaseCodeCacheCapacity();
     return;
   }
-  method_code_map_.Put(method, old_code_ptr);
+
+  // Wait for an existing collection, or let everyone know we are starting one.
+  {
+    ScopedThreadSuspension sts(self, kSuspended);
+    MutexLock mu(self, lock_);
+    if (WaitForPotentialCollectionToComplete(self)) {
+      return;
+    } else {
+      number_of_collections_++;
+      live_bitmap_.reset(CodeCacheBitmap::Create(
+          "code-cache-bitmap",
+          reinterpret_cast<uintptr_t>(code_map_->Begin()),
+          reinterpret_cast<uintptr_t>(code_map_->Begin() + current_capacity_ / 2)));
+      collection_in_progress_ = true;
+    }
+  }
+
+  TimingLogger logger("JIT code cache timing logger", true, VLOG_IS_ON(jit));
+  {
+    TimingLogger::ScopedTiming st("Code cache collection", &logger);
+
+    bool do_full_collection = false;
+    {
+      MutexLock mu(self, lock_);
+      do_full_collection = ShouldDoFullCollection();
+    }
+
+    if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
+      LOG(INFO) << "Do "
+                << (do_full_collection ? "full" : "partial")
+                << " code cache collection, code="
+                << PrettySize(CodeCacheSize())
+                << ", data=" << PrettySize(DataCacheSize());
+    }
+
+    DoCollection(self, /* collect_profiling_info */ do_full_collection);
+
+    if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
+      LOG(INFO) << "After code cache collection, code="
+                << PrettySize(CodeCacheSize())
+                << ", data=" << PrettySize(DataCacheSize());
+    }
+
+    {
+      MutexLock mu(self, lock_);
+
+      // Increase the code cache only when we do partial collections.
+      // TODO: base this strategy on how full the code cache is?
+      if (do_full_collection) {
+        last_collection_increased_code_cache_ = false;
+      } else {
+        last_collection_increased_code_cache_ = true;
+        IncreaseCodeCacheCapacity();
+      }
+
+      bool next_collection_will_be_full = ShouldDoFullCollection();
+
+      // Start polling the liveness of compiled code to prepare for the next full collection.
+      if (next_collection_will_be_full) {
+        // Save the entry point of methods we have compiled, and update the entry
+        // point of those methods to the interpreter. If the method is invoked, the
+        // interpreter will update its entry point to the compiled code and call it.
+        for (ProfilingInfo* info : profiling_infos_) {
+          const void* entry_point = info->GetMethod()->GetEntryPointFromQuickCompiledCode();
+          if (ContainsPc(entry_point)) {
+            info->SetSavedEntryPoint(entry_point);
+            Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+                info->GetMethod(), GetQuickToInterpreterBridge());
+          }
+        }
+
+        DCHECK(CheckLiveCompiledCodeHasProfilingInfo());
+      }
+      live_bitmap_.reset(nullptr);
+      NotifyCollectionDone(self);
+    }
+  }
+  Runtime::Current()->GetJit()->AddTimingLogger(logger);
+}
+
+void JitCodeCache::RemoveUnmarkedCode(Thread* self) {
+  ScopedTrace trace(__FUNCTION__);
+  MutexLock mu(self, lock_);
+  ScopedCodeCacheWrite scc(code_map_.get());
+  // Iterate over all compiled code and remove entries that are not marked.
+  for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
+    const void* code_ptr = it->first;
+    ArtMethod* method = it->second;
+    uintptr_t allocation = FromCodeToAllocation(code_ptr);
+    if (GetLiveBitmap()->Test(allocation)) {
+      ++it;
+    } else {
+      FreeCode(code_ptr, method);
+      it = method_code_map_.erase(it);
+    }
+  }
+}
+
+void JitCodeCache::DoCollection(Thread* self, bool collect_profiling_info) {
+  ScopedTrace trace(__FUNCTION__);
+  {
+    MutexLock mu(self, lock_);
+    if (collect_profiling_info) {
+      // Clear the profiling info of methods that do not have compiled code as entrypoint.
+      // Also remove the saved entry point from the ProfilingInfo objects.
+      for (ProfilingInfo* info : profiling_infos_) {
+        const void* ptr = info->GetMethod()->GetEntryPointFromQuickCompiledCode();
+        if (!ContainsPc(ptr) && !info->IsInUseByCompiler()) {
+          info->GetMethod()->SetProfilingInfo(nullptr);
+        }
+
+        if (info->GetSavedEntryPoint() != nullptr) {
+          info->SetSavedEntryPoint(nullptr);
+          // We are going to move this method back to interpreter. Clear the counter now to
+          // give it a chance to be hot again.
+          info->GetMethod()->ClearCounter();
+        }
+      }
+    } else if (kIsDebugBuild) {
+      // Sanity check that the profiling infos do not have a dangling entry point.
+      for (ProfilingInfo* info : profiling_infos_) {
+        DCHECK(info->GetSavedEntryPoint() == nullptr);
+      }
+    }
+
+    // Mark compiled code that are entrypoints of ArtMethods. Compiled code that is not
+    // an entry point is either:
+    // - an osr compiled code, that will be removed if not in a thread call stack.
+    // - discarded compiled code, that will be removed if not in a thread call stack.
+    for (const auto& it : method_code_map_) {
+      ArtMethod* method = it.second;
+      const void* code_ptr = it.first;
+      const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+      if (method_header->GetEntryPoint() == method->GetEntryPointFromQuickCompiledCode()) {
+        GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr));
+      }
+    }
+
+    // Empty osr method map, as osr compiled code will be deleted (except the ones
+    // on thread stacks).
+    osr_code_map_.clear();
+  }
+
+  // Run a checkpoint on all threads to mark the JIT compiled code they are running.
+  MarkCompiledCodeOnThreadStacks(self);
+
+  // At this point, mutator threads are still running, and entrypoints of methods can
+  // change. We do know they cannot change to a code cache entry that is not marked,
+  // therefore we can safely remove those entries.
+  RemoveUnmarkedCode(self);
+
+  if (collect_profiling_info) {
+    ScopedThreadSuspension sts(self, kSuspended);
+    gc::ScopedGCCriticalSection gcs(
+        self, gc::kGcCauseJitCodeCache, gc::kCollectorTypeJitCodeCache);
+    MutexLock mu(self, lock_);
+    // Free all profiling infos of methods not compiled nor being compiled.
+    auto profiling_kept_end = std::remove_if(profiling_infos_.begin(), profiling_infos_.end(),
+      [this] (ProfilingInfo* info) NO_THREAD_SAFETY_ANALYSIS {
+        const void* ptr = info->GetMethod()->GetEntryPointFromQuickCompiledCode();
+        // We have previously cleared the ProfilingInfo pointer in the ArtMethod in the hope
+        // that the compiled code would not get revived. As mutator threads run concurrently,
+        // they may have revived the compiled code, and now we are in the situation where
+        // a method has compiled code but no ProfilingInfo.
+        // We make sure compiled methods have a ProfilingInfo object. It is needed for
+        // code cache collection.
+        if (ContainsPc(ptr) &&
+            info->GetMethod()->GetProfilingInfo(kRuntimePointerSize) == nullptr) {
+          // We clear the inline caches as classes in it might be stalled.
+          info->ClearGcRootsInInlineCaches();
+          // Do a fence to make sure the clearing is seen before attaching to the method.
+          QuasiAtomic::ThreadFenceRelease();
+          info->GetMethod()->SetProfilingInfo(info);
+        } else if (info->GetMethod()->GetProfilingInfo(kRuntimePointerSize) != info) {
+          // No need for this ProfilingInfo object anymore.
+          FreeData(reinterpret_cast<uint8_t*>(info));
+          return true;
+        }
+        return false;
+      });
+    profiling_infos_.erase(profiling_kept_end, profiling_infos_.end());
+    DCHECK(CheckLiveCompiledCodeHasProfilingInfo());
+  }
+}
+
+bool JitCodeCache::CheckLiveCompiledCodeHasProfilingInfo() {
+  ScopedTrace trace(__FUNCTION__);
+  // Check that methods we have compiled do have a ProfilingInfo object. We would
+  // have memory leaks of compiled code otherwise.
+  for (const auto& it : method_code_map_) {
+    ArtMethod* method = it.second;
+    if (method->GetProfilingInfo(kRuntimePointerSize) == nullptr) {
+      const void* code_ptr = it.first;
+      const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+      if (method_header->GetEntryPoint() == method->GetEntryPointFromQuickCompiledCode()) {
+        // If the code is not dead, then we have a problem. Note that this can even
+        // happen just after a collection, as mutator threads are running in parallel
+        // and could deoptimize an existing compiled code.
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+OatQuickMethodHeader* JitCodeCache::LookupMethodHeader(uintptr_t pc, ArtMethod* method) {
+  static_assert(kRuntimeISA != kThumb2, "kThumb2 cannot be a runtime ISA");
+  if (kRuntimeISA == kArm) {
+    // On Thumb-2, the pc is offset by one.
+    --pc;
+  }
+  if (!ContainsPc(reinterpret_cast<const void*>(pc))) {
+    return nullptr;
+  }
+
+  MutexLock mu(Thread::Current(), lock_);
+  if (method_code_map_.empty()) {
+    return nullptr;
+  }
+  auto it = method_code_map_.lower_bound(reinterpret_cast<const void*>(pc));
+  --it;
+
+  const void* code_ptr = it->first;
+  OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+  if (!method_header->Contains(pc)) {
+    return nullptr;
+  }
+  if (kIsDebugBuild && method != nullptr) {
+    DCHECK_EQ(it->second, method)
+        << PrettyMethod(method) << " " << PrettyMethod(it->second) << " " << std::hex << pc;
+  }
+  return method_header;
+}
+
+OatQuickMethodHeader* JitCodeCache::LookupOsrMethodHeader(ArtMethod* method) {
+  MutexLock mu(Thread::Current(), lock_);
+  auto it = osr_code_map_.find(method);
+  if (it == osr_code_map_.end()) {
+    return nullptr;
+  }
+  return OatQuickMethodHeader::FromCodePointer(it->second);
+}
+
+ProfilingInfo* JitCodeCache::AddProfilingInfo(Thread* self,
+                                              ArtMethod* method,
+                                              const std::vector<uint32_t>& entries,
+                                              bool retry_allocation)
+    // No thread safety analysis as we are using TryLock/Unlock explicitly.
+    NO_THREAD_SAFETY_ANALYSIS {
+  ProfilingInfo* info = nullptr;
+  if (!retry_allocation) {
+    // If we are allocating for the interpreter, just try to lock, to avoid
+    // lock contention with the JIT.
+    if (lock_.ExclusiveTryLock(self)) {
+      info = AddProfilingInfoInternal(self, method, entries);
+      lock_.ExclusiveUnlock(self);
+    }
+  } else {
+    {
+      MutexLock mu(self, lock_);
+      info = AddProfilingInfoInternal(self, method, entries);
+    }
+
+    if (info == nullptr) {
+      GarbageCollectCache(self);
+      MutexLock mu(self, lock_);
+      info = AddProfilingInfoInternal(self, method, entries);
+    }
+  }
+  return info;
+}
+
+ProfilingInfo* JitCodeCache::AddProfilingInfoInternal(Thread* self ATTRIBUTE_UNUSED,
+                                                      ArtMethod* method,
+                                                      const std::vector<uint32_t>& entries) {
+  size_t profile_info_size = RoundUp(
+      sizeof(ProfilingInfo) + sizeof(InlineCache) * entries.size(),
+      sizeof(void*));
+
+  // Check whether some other thread has concurrently created it.
+  ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
+  if (info != nullptr) {
+    return info;
+  }
+
+  uint8_t* data = AllocateData(profile_info_size);
+  if (data == nullptr) {
+    return nullptr;
+  }
+  info = new (data) ProfilingInfo(method, entries);
+
+  // Make sure other threads see the data in the profiling info object before the
+  // store in the ArtMethod's ProfilingInfo pointer.
+  QuasiAtomic::ThreadFenceRelease();
+
+  method->SetProfilingInfo(info);
+  profiling_infos_.push_back(info);
+  histogram_profiling_info_memory_use_.AddValue(profile_info_size);
+  return info;
+}
+
+// NO_THREAD_SAFETY_ANALYSIS as this is called from mspace code, at which point the lock
+// is already held.
+void* JitCodeCache::MoreCore(const void* mspace, intptr_t increment) NO_THREAD_SAFETY_ANALYSIS {
+  if (code_mspace_ == mspace) {
+    size_t result = code_end_;
+    code_end_ += increment;
+    return reinterpret_cast<void*>(result + code_map_->Begin());
+  } else {
+    DCHECK_EQ(data_mspace_, mspace);
+    size_t result = data_end_;
+    data_end_ += increment;
+    return reinterpret_cast<void*>(result + data_map_->Begin());
+  }
+}
+
+void JitCodeCache::GetProfiledMethods(const std::set<std::string>& dex_base_locations,
+                                      std::vector<MethodReference>& methods) {
+  ScopedTrace trace(__FUNCTION__);
+  MutexLock mu(Thread::Current(), lock_);
+  for (const ProfilingInfo* info : profiling_infos_) {
+    ArtMethod* method = info->GetMethod();
+    const DexFile* dex_file = method->GetDexFile();
+    if (ContainsElement(dex_base_locations, dex_file->GetBaseLocation())) {
+      methods.emplace_back(dex_file,  method->GetDexMethodIndex());
+    }
+  }
+}
+
+uint64_t JitCodeCache::GetLastUpdateTimeNs() const {
+  return last_update_time_ns_.LoadAcquire();
+}
+
+bool JitCodeCache::IsOsrCompiled(ArtMethod* method) {
+  MutexLock mu(Thread::Current(), lock_);
+  return osr_code_map_.find(method) != osr_code_map_.end();
+}
+
+bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self, bool osr) {
+  if (!osr && ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+    return false;
+  }
+
+  MutexLock mu(self, lock_);
+  if (osr && (osr_code_map_.find(method) != osr_code_map_.end())) {
+    return false;
+  }
+
+  ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
+  if (info == nullptr) {
+    VLOG(jit) << PrettyMethod(method) << " needs a ProfilingInfo to be compiled";
+    // Because the counter is not atomic, there are some rare cases where we may not
+    // hit the threshold for creating the ProfilingInfo. Reset the counter now to
+    // "correct" this.
+    method->ClearCounter();
+    return false;
+  }
+
+  if (info->IsMethodBeingCompiled(osr)) {
+    return false;
+  }
+
+  info->SetIsMethodBeingCompiled(true, osr);
+  return true;
+}
+
+ProfilingInfo* JitCodeCache::NotifyCompilerUse(ArtMethod* method, Thread* self) {
+  MutexLock mu(self, lock_);
+  ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
+  if (info != nullptr) {
+    info->IncrementInlineUse();
+  }
+  return info;
+}
+
+void JitCodeCache::DoneCompilerUse(ArtMethod* method, Thread* self) {
+  MutexLock mu(self, lock_);
+  ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
+  DCHECK(info != nullptr);
+  info->DecrementInlineUse();
+}
+
+void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSED, bool osr) {
+  ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
+  DCHECK(info->IsMethodBeingCompiled(osr));
+  info->SetIsMethodBeingCompiled(false, osr);
+}
+
+size_t JitCodeCache::GetMemorySizeOfCodePointer(const void* ptr) {
+  MutexLock mu(Thread::Current(), lock_);
+  return mspace_usable_size(reinterpret_cast<const void*>(FromCodeToAllocation(ptr)));
+}
+
+void JitCodeCache::InvalidateCompiledCodeFor(ArtMethod* method,
+                                             const OatQuickMethodHeader* header) {
+  ProfilingInfo* profiling_info = method->GetProfilingInfo(kRuntimePointerSize);
+  if ((profiling_info != nullptr) &&
+      (profiling_info->GetSavedEntryPoint() == header->GetEntryPoint())) {
+    // Prevent future uses of the compiled code.
+    profiling_info->SetSavedEntryPoint(nullptr);
+  }
+
+  if (method->GetEntryPointFromQuickCompiledCode() == header->GetEntryPoint()) {
+    // The entrypoint is the one to invalidate, so we just update
+    // it to the interpreter entry point and clear the counter to get the method
+    // Jitted again.
+    Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+        method, GetQuickToInterpreterBridge());
+    method->ClearCounter();
+  } else {
+    MutexLock mu(Thread::Current(), lock_);
+    auto it = osr_code_map_.find(method);
+    if (it != osr_code_map_.end() && OatQuickMethodHeader::FromCodePointer(it->second) == header) {
+      // Remove the OSR method, to avoid using it again.
+      osr_code_map_.erase(it);
+    }
+  }
+  MutexLock mu(Thread::Current(), lock_);
+  number_of_deoptimizations_++;
+}
+
+uint8_t* JitCodeCache::AllocateCode(size_t code_size) {
+  size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
+  uint8_t* result = reinterpret_cast<uint8_t*>(
+      mspace_memalign(code_mspace_, alignment, code_size));
+  size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
+  // Ensure the header ends up at expected instruction alignment.
+  DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(result + header_size), alignment);
+  used_memory_for_code_ += mspace_usable_size(result);
+  return result;
+}
+
+void JitCodeCache::FreeCode(uint8_t* code) {
+  used_memory_for_code_ -= mspace_usable_size(code);
+  mspace_free(code_mspace_, code);
+}
+
+uint8_t* JitCodeCache::AllocateData(size_t data_size) {
+  void* result = mspace_malloc(data_mspace_, data_size);
+  used_memory_for_data_ += mspace_usable_size(result);
+  return reinterpret_cast<uint8_t*>(result);
+}
+
+void JitCodeCache::FreeData(uint8_t* data) {
+  used_memory_for_data_ -= mspace_usable_size(data);
+  mspace_free(data_mspace_, data);
+}
+
+void JitCodeCache::Dump(std::ostream& os) {
+  MutexLock mu(Thread::Current(), lock_);
+  os << "Current JIT code cache size: " << PrettySize(used_memory_for_code_) << "\n"
+     << "Current JIT data cache size: " << PrettySize(used_memory_for_data_) << "\n"
+     << "Current JIT capacity: " << PrettySize(current_capacity_) << "\n"
+     << "Current number of JIT code cache entries: " << method_code_map_.size() << "\n"
+     << "Total number of JIT compilations: " << number_of_compilations_ << "\n"
+     << "Total number of JIT compilations for on stack replacement: "
+        << number_of_osr_compilations_ << "\n"
+     << "Total number of deoptimizations: " << number_of_deoptimizations_ << "\n"
+     << "Total number of JIT code cache collections: " << number_of_collections_ << std::endl;
+  histogram_stack_map_memory_use_.PrintMemoryUse(os);
+  histogram_code_memory_use_.PrintMemoryUse(os);
+  histogram_profiling_info_memory_use_.PrintMemoryUse(os);
 }
 
 }  // namespace jit
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index fa90c18..1938221 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -20,11 +20,13 @@
 #include "instrumentation.h"
 
 #include "atomic.h"
+#include "base/histogram-inl.h"
 #include "base/macros.h"
 #include "base/mutex.h"
-#include "gc/allocator/dlmalloc.h"
+#include "gc/accounting/bitmap.h"
 #include "gc_root.h"
 #include "jni.h"
+#include "method_reference.h"
 #include "oat_file.h"
 #include "object_callbacks.h"
 #include "safe_map.h"
@@ -33,90 +35,307 @@
 namespace art {
 
 class ArtMethod;
-class CompiledMethod;
-class CompilerCallbacks;
+class LinearAlloc;
+class ProfilingInfo;
 
 namespace jit {
 
 class JitInstrumentationCache;
 
+// Alignment in bits that will suit all architectures.
+static constexpr int kJitCodeAlignment = 16;
+using CodeCacheBitmap = gc::accounting::MemoryRangeBitmap<kJitCodeAlignment>;
+
 class JitCodeCache {
  public:
-  static constexpr size_t kMaxCapacity = 1 * GB;
-  static constexpr size_t kDefaultCapacity = 2 * MB;
+  static constexpr size_t kMaxCapacity = 64 * MB;
+  // Put the default to a very low amount for debug builds to stress the code cache
+  // collection.
+  static constexpr size_t kInitialCapacity = kIsDebugBuild ? 8 * KB : 64 * KB;
+
+  // By default, do not GC until reaching 256KB.
+  static constexpr size_t kReservedCapacity = kInitialCapacity * 4;
 
   // Create the code cache with a code + data capacity equal to "capacity", error message is passed
   // in the out arg error_msg.
-  static JitCodeCache* Create(size_t capacity, std::string* error_msg);
+  static JitCodeCache* Create(size_t initial_capacity,
+                              size_t max_capacity,
+                              bool generate_debug_info,
+                              std::string* error_msg);
 
-  size_t NumMethods() const {
-    return num_methods_;
-  }
-
+  // Number of bytes allocated in the code cache.
   size_t CodeCacheSize() REQUIRES(!lock_);
 
+  // Number of bytes allocated in the data cache.
   size_t DataCacheSize() REQUIRES(!lock_);
 
+  bool NotifyCompilationOf(ArtMethod* method, Thread* self, bool osr)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
+  // Notify to the code cache that the compiler wants to use the
+  // profiling info of `method` to drive optimizations,
+  // and therefore ensure the returned profiling info object is not
+  // collected.
+  ProfilingInfo* NotifyCompilerUse(ArtMethod* method, Thread* self)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
+  void DoneCompiling(ArtMethod* method, Thread* self, bool osr)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
+  void DoneCompilerUse(ArtMethod* method, Thread* self)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
   // Allocate and write code and its metadata to the code cache.
   uint8_t* CommitCode(Thread* self,
-                      const uint8_t* mapping_table,
+                      ArtMethod* method,
                       const uint8_t* vmap_table,
-                      const uint8_t* gc_map,
                       size_t frame_size_in_bytes,
                       size_t core_spill_mask,
                       size_t fp_spill_mask,
                       const uint8_t* code,
-                      size_t code_size)
+                      size_t code_size,
+                      bool osr)
+      SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
-  // Return true if the code cache contains the code pointer which si the entrypoint of the method.
-  bool ContainsMethod(ArtMethod* method) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
+  // Return true if the code cache contains this pc.
+  bool ContainsPc(const void* pc) const;
 
-  // Return true if the code cache contains a code ptr.
-  bool ContainsCodePtr(const void* ptr) const;
+  // Return true if the code cache contains this method.
+  bool ContainsMethod(ArtMethod* method) REQUIRES(!lock_);
 
   // Reserve a region of data of size at least "size". Returns null if there is no more room.
-  uint8_t* ReserveData(Thread* self, size_t size) REQUIRES(!lock_);
-
-  // Add a data array of size (end - begin) with the associated contents, returns null if there
-  // is no more room.
-  uint8_t* AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end)
+  uint8_t* ReserveData(Thread* self, size_t size, ArtMethod* method)
+      SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
-  // Get code for a method, returns null if it is not in the jit cache.
-  const void* GetCodeFor(ArtMethod* method)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_);
+  // Clear data from the data portion of the code cache.
+  void ClearData(Thread* self, void* data)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
 
-  // Save the compiled code for a method so that GetCodeFor(method) will return old_code_ptr if the
-  // entrypoint isn't within the cache.
-  void SaveCompiledCode(ArtMethod* method, const void* old_code_ptr)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_);
+  CodeCacheBitmap* GetLiveBitmap() const {
+    return live_bitmap_.get();
+  }
+
+  // Return whether we should do a full collection given the current state of the cache.
+  bool ShouldDoFullCollection()
+      REQUIRES(lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Perform a collection on the code cache.
+  void GarbageCollectCache(Thread* self)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Given the 'pc', try to find the JIT compiled code associated with it.
+  // Return null if 'pc' is not in the code cache. 'method' is passed for
+  // sanity check.
+  OatQuickMethodHeader* LookupMethodHeader(uintptr_t pc, ArtMethod* method)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  OatQuickMethodHeader* LookupOsrMethodHeader(ArtMethod* method)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Remove all methods in our cache that were allocated by 'alloc'.
+  void RemoveMethodsIn(Thread* self, const LinearAlloc& alloc)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void ClearGcRootsInInlineCaches(Thread* self) REQUIRES(!lock_);
+
+  // Create a 'ProfileInfo' for 'method'. If 'retry_allocation' is true,
+  // will collect and retry if the first allocation is unsuccessful.
+  ProfilingInfo* AddProfilingInfo(Thread* self,
+                                  ArtMethod* method,
+                                  const std::vector<uint32_t>& entries,
+                                  bool retry_allocation)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool OwnsSpace(const void* mspace) const NO_THREAD_SAFETY_ANALYSIS {
+    return mspace == code_mspace_ || mspace == data_mspace_;
+  }
+
+  void* MoreCore(const void* mspace, intptr_t increment);
+
+  // Adds to `methods` all profiled methods which are part of any of the given dex locations.
+  void GetProfiledMethods(const std::set<std::string>& dex_base_locations,
+                          std::vector<MethodReference>& methods)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  uint64_t GetLastUpdateTimeNs() const;
+
+  size_t GetCurrentCapacity() REQUIRES(!lock_) {
+    MutexLock lock(Thread::Current(), lock_);
+    return current_capacity_;
+  }
+
+  size_t GetMemorySizeOfCodePointer(const void* ptr) REQUIRES(!lock_);
+
+  void InvalidateCompiledCodeFor(ArtMethod* method, const OatQuickMethodHeader* code)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void Dump(std::ostream& os) REQUIRES(!lock_);
+
+  bool IsOsrCompiled(ArtMethod* method) REQUIRES(!lock_);
 
  private:
-  // Takes ownership of code_mem_map.
-  JitCodeCache(MemMap* code_map, MemMap* data_map);
+  // Take ownership of maps.
+  JitCodeCache(MemMap* code_map,
+               MemMap* data_map,
+               size_t initial_code_capacity,
+               size_t initial_data_capacity,
+               size_t max_capacity,
+               bool garbage_collect_code);
 
-  // Lock which guards.
+  // Internal version of 'CommitCode' that will not retry if the
+  // allocation fails. Return null if the allocation fails.
+  uint8_t* CommitCodeInternal(Thread* self,
+                              ArtMethod* method,
+                              const uint8_t* vmap_table,
+                              size_t frame_size_in_bytes,
+                              size_t core_spill_mask,
+                              size_t fp_spill_mask,
+                              const uint8_t* code,
+                              size_t code_size,
+                              bool osr)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ProfilingInfo* AddProfilingInfoInternal(Thread* self,
+                                          ArtMethod* method,
+                                          const std::vector<uint32_t>& entries)
+      REQUIRES(lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // If a collection is in progress, wait for it to finish. Return
+  // whether the thread actually waited.
+  bool WaitForPotentialCollectionToComplete(Thread* self)
+      REQUIRES(lock_) REQUIRES(!Locks::mutator_lock_);
+
+  // Free in the mspace allocations taken by 'method'.
+  void FreeCode(const void* code_ptr, ArtMethod* method) REQUIRES(lock_);
+
+  // Number of bytes allocated in the code cache.
+  size_t CodeCacheSizeLocked() REQUIRES(lock_);
+
+  // Number of bytes allocated in the data cache.
+  size_t DataCacheSizeLocked() REQUIRES(lock_);
+
+  // Notify all waiting threads that a collection is done.
+  void NotifyCollectionDone(Thread* self) REQUIRES(lock_);
+
+  // Try to increase the current capacity of the code cache. Return whether we
+  // succeeded at doing so.
+  bool IncreaseCodeCacheCapacity() REQUIRES(lock_);
+
+  // Set the footprint limit of the code cache.
+  void SetFootprintLimit(size_t new_footprint) REQUIRES(lock_);
+
+  void DoCollection(Thread* self, bool collect_profiling_info)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void RemoveUnmarkedCode(Thread* self)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void MarkCompiledCodeOnThreadStacks(Thread* self)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool CheckLiveCompiledCodeHasProfilingInfo()
+      REQUIRES(lock_);
+
+  void FreeCode(uint8_t* code) REQUIRES(lock_);
+  uint8_t* AllocateCode(size_t code_size) REQUIRES(lock_);
+  void FreeData(uint8_t* data) REQUIRES(lock_);
+  uint8_t* AllocateData(size_t data_size) REQUIRES(lock_);
+
+  // Lock for guarding allocations, collections, and the method_code_map_.
   Mutex lock_;
+  // Condition to wait on during collection.
+  ConditionVariable lock_cond_ GUARDED_BY(lock_);
+  // Whether there is a code cache collection in progress.
+  bool collection_in_progress_ GUARDED_BY(lock_);
   // Mem map which holds code.
   std::unique_ptr<MemMap> code_map_;
   // Mem map which holds data (stack maps and profiling info).
   std::unique_ptr<MemMap> data_map_;
   // The opaque mspace for allocating code.
-  void* code_mspace_;
+  void* code_mspace_ GUARDED_BY(lock_);
   // The opaque mspace for allocating data.
-  void* data_mspace_;
-  // Number of compiled methods.
-  size_t num_methods_;
-  // This map holds code for methods if they were deoptimized by the instrumentation stubs. This is
-  // required since we have to implement ClassLinker::GetQuickOatCodeFor for walking stacks.
-  SafeMap<ArtMethod*, const void*> method_code_map_ GUARDED_BY(lock_);
+  void* data_mspace_ GUARDED_BY(lock_);
+  // Bitmap for collecting code and data.
+  std::unique_ptr<CodeCacheBitmap> live_bitmap_;
+  // Holds compiled code associated to the ArtMethod.
+  SafeMap<const void*, ArtMethod*> method_code_map_ GUARDED_BY(lock_);
+  // Holds osr compiled code associated to the ArtMethod.
+  SafeMap<ArtMethod*, const void*> osr_code_map_ GUARDED_BY(lock_);
+  // ProfilingInfo objects we have allocated.
+  std::vector<ProfilingInfo*> profiling_infos_ GUARDED_BY(lock_);
+
+  // The maximum capacity in bytes this code cache can go to.
+  size_t max_capacity_ GUARDED_BY(lock_);
+
+  // The current capacity in bytes of the code cache.
+  size_t current_capacity_ GUARDED_BY(lock_);
+
+  // The current footprint in bytes of the code portion of the code cache.
+  size_t code_end_ GUARDED_BY(lock_);
+
+  // The current footprint in bytes of the data portion of the code cache.
+  size_t data_end_ GUARDED_BY(lock_);
+
+  // Whether the last collection round increased the code cache.
+  bool last_collection_increased_code_cache_ GUARDED_BY(lock_);
+
+  // Last time the the code_cache was updated.
+  // It is atomic to avoid locking when reading it.
+  Atomic<uint64_t> last_update_time_ns_;
+
+  // Whether we can do garbage collection.
+  const bool garbage_collect_code_;
+
+  // The size in bytes of used memory for the data portion of the code cache.
+  size_t used_memory_for_data_ GUARDED_BY(lock_);
+
+  // The size in bytes of used memory for the code portion of the code cache.
+  size_t used_memory_for_code_ GUARDED_BY(lock_);
+
+  // Number of compilations done throughout the lifetime of the JIT.
+  size_t number_of_compilations_ GUARDED_BY(lock_);
+
+  // Number of compilations for on-stack-replacement done throughout the lifetime of the JIT.
+  size_t number_of_osr_compilations_ GUARDED_BY(lock_);
+
+  // Number of deoptimizations done throughout the lifetime of the JIT.
+  size_t number_of_deoptimizations_ GUARDED_BY(lock_);
+
+  // Number of code cache collections done throughout the lifetime of the JIT.
+  size_t number_of_collections_ GUARDED_BY(lock_);
+
+  // Histograms for keeping track of stack map size statistics.
+  Histogram<uint64_t> histogram_stack_map_memory_use_ GUARDED_BY(lock_);
+
+  // Histograms for keeping track of code size statistics.
+  Histogram<uint64_t> histogram_code_memory_use_ GUARDED_BY(lock_);
+
+  // Histograms for keeping track of profiling info statistics.
+  Histogram<uint64_t> histogram_profiling_info_memory_use_ GUARDED_BY(lock_);
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
 
-
 }  // namespace jit
 }  // namespace art
 
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
deleted file mode 100644
index 9b9c5d2..0000000
--- a/runtime/jit/jit_instrumentation.cc
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "jit_instrumentation.h"
-
-#include "art_method-inl.h"
-#include "jit.h"
-#include "jit_code_cache.h"
-#include "scoped_thread_state_change.h"
-
-namespace art {
-namespace jit {
-
-class JitCompileTask FINAL : public Task {
- public:
-  explicit JitCompileTask(ArtMethod* method) : method_(method) {
-    ScopedObjectAccess soa(Thread::Current());
-    // Add a global ref to the class to prevent class unloading until compilation is done.
-    klass_ = soa.Vm()->AddGlobalRef(soa.Self(), method_->GetDeclaringClass());
-    CHECK(klass_ != nullptr);
-  }
-
-  ~JitCompileTask() {
-    ScopedObjectAccess soa(Thread::Current());
-    soa.Vm()->DeleteGlobalRef(soa.Self(), klass_);
-  }
-
-  void Run(Thread* self) OVERRIDE {
-    ScopedObjectAccess soa(self);
-    VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
-    if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
-      VLOG(jit) << "Failed to compile method " << PrettyMethod(method_);
-    }
-  }
-
-  void Finalize() OVERRIDE {
-    delete this;
-  }
-
- private:
-  ArtMethod* const method_;
-  jobject klass_;
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
-};
-
-JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold,
-                                                 size_t warm_method_threshold)
-    : hot_method_threshold_(hot_method_threshold),
-      warm_method_threshold_(warm_method_threshold) {
-}
-
-void JitInstrumentationCache::CreateThreadPool() {
-  thread_pool_.reset(new ThreadPool("Jit thread pool", 1));
-}
-
-void JitInstrumentationCache::DeleteThreadPool() {
-  DCHECK(Runtime::Current()->IsShuttingDown(Thread::Current()));
-  thread_pool_.reset();
-}
-
-void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t) {
-  ScopedObjectAccessUnchecked soa(self);
-  // Since we don't have on-stack replacement, some methods can remain in the interpreter longer
-  // than we want resulting in samples even after the method is compiled.
-  if (method->IsClassInitializer() || method->IsNative() ||
-      Runtime::Current()->GetJit()->GetCodeCache()->ContainsMethod(method)) {
-    return;
-  }
-  if (thread_pool_.get() == nullptr) {
-    DCHECK(Runtime::Current()->IsShuttingDown(self));
-    return;
-  }
-  uint16_t sample_count = method->IncrementCounter();
-  if (sample_count == warm_method_threshold_) {
-    ProfilingInfo* info = method->CreateProfilingInfo();
-    if (info != nullptr) {
-      VLOG(jit) << "Start profiling " << PrettyMethod(method);
-    }
-  }
-  if (sample_count == hot_method_threshold_) {
-    thread_pool_->AddTask(self, new JitCompileTask(
-        method->GetInterfaceMethodIfProxy(sizeof(void*))));
-    thread_pool_->StartWorkers(self);
-  }
-}
-
-JitInstrumentationListener::JitInstrumentationListener(JitInstrumentationCache* cache)
-    : instrumentation_cache_(cache) {
-  CHECK(instrumentation_cache_ != nullptr);
-}
-
-void JitInstrumentationListener::InvokeVirtualOrInterface(Thread* thread,
-                                                          mirror::Object* this_object,
-                                                          ArtMethod* caller,
-                                                          uint32_t dex_pc,
-                                                          ArtMethod* callee ATTRIBUTE_UNUSED) {
-  DCHECK(this_object != nullptr);
-  ProfilingInfo* info = caller->GetProfilingInfo(sizeof(void*));
-  if (info != nullptr) {
-    // Since the instrumentation is marked from the declaring class we need to mark the card so
-    // that mod-union tables and card rescanning know about the update.
-    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(caller->GetDeclaringClass());
-    info->AddInvokeInfo(thread, dex_pc, this_object->GetClass());
-  }
-}
-
-void JitInstrumentationCache::WaitForCompilationToFinish(Thread* self) {
-  thread_pool_->Wait(self, false, false);
-}
-
-}  // namespace jit
-}  // namespace art
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
deleted file mode 100644
index 9eb464b..0000000
--- a/runtime/jit/jit_instrumentation.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_JIT_JIT_INSTRUMENTATION_H_
-#define ART_RUNTIME_JIT_JIT_INSTRUMENTATION_H_
-
-#include <unordered_map>
-
-#include "instrumentation.h"
-
-#include "atomic.h"
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "gc_root.h"
-#include "jni.h"
-#include "object_callbacks.h"
-#include "thread_pool.h"
-
-namespace art {
-namespace mirror {
-  class Class;
-  class Object;
-  class Throwable;
-}  // namespace mirror
-class ArtField;
-class ArtMethod;
-union JValue;
-class Thread;
-
-namespace jit {
-
-// Keeps track of which methods are hot.
-class JitInstrumentationCache {
- public:
-  JitInstrumentationCache(size_t hot_method_threshold, size_t warm_method_threshold);
-  void AddSamples(Thread* self, ArtMethod* method, size_t samples)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  void CreateThreadPool();
-  void DeleteThreadPool();
-  // Wait until there is no more pending compilation tasks.
-  void WaitForCompilationToFinish(Thread* self);
-
- private:
-  size_t hot_method_threshold_;
-  size_t warm_method_threshold_;
-  std::unique_ptr<ThreadPool> thread_pool_;
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationCache);
-};
-
-class JitInstrumentationListener : public instrumentation::InstrumentationListener {
- public:
-  explicit JitInstrumentationListener(JitInstrumentationCache* cache);
-
-  void MethodEntered(Thread* thread, mirror::Object* /*this_object*/,
-                     ArtMethod* method, uint32_t /*dex_pc*/)
-      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    instrumentation_cache_->AddSamples(thread, method, 1);
-  }
-  void MethodExited(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                    ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                    const JValue& /*return_value*/)
-      OVERRIDE { }
-  void MethodUnwind(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                    ArtMethod* /*method*/, uint32_t /*dex_pc*/) OVERRIDE { }
-  void FieldRead(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                 ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                 ArtField* /*field*/) OVERRIDE { }
-  void FieldWritten(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                    ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                    ArtField* /*field*/, const JValue& /*field_value*/)
-      OVERRIDE { }
-  void ExceptionCaught(Thread* /*thread*/,
-                       mirror::Throwable* /*exception_object*/) OVERRIDE { }
-
-  void DexPcMoved(Thread* /*self*/, mirror::Object* /*this_object*/,
-                  ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
-
-  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
-      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    CHECK_LE(dex_pc_offset, 0);
-    instrumentation_cache_->AddSamples(thread, method, 1);
-  }
-
-  void InvokeVirtualOrInterface(Thread* thread,
-                                mirror::Object* this_object,
-                                ArtMethod* caller,
-                                uint32_t dex_pc,
-                                ArtMethod* callee)
-      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
-
- private:
-  JitInstrumentationCache* const instrumentation_cache_;
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationListener);
-};
-
-}  // namespace jit
-}  // namespace art
-
-#endif  // ART_RUNTIME_JIT_JIT_INSTRUMENTATION_H_
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
new file mode 100644
index 0000000..aa606a2
--- /dev/null
+++ b/runtime/jit/offline_profiling_info.cc
@@ -0,0 +1,715 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "offline_profiling_info.h"
+
+#include "errno.h"
+#include <limits.h>
+#include <vector>
+#include <stdlib.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+
+#include "art_method-inl.h"
+#include "base/mutex.h"
+#include "base/scoped_flock.h"
+#include "base/stl_util.h"
+#include "base/systrace.h"
+#include "base/unix_file/fd_file.h"
+#include "jit/profiling_info.h"
+#include "os.h"
+#include "safe_map.h"
+
+namespace art {
+
+const uint8_t ProfileCompilationInfo::kProfileMagic[] = { 'p', 'r', 'o', '\0' };
+const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '1', '\0' };
+
+static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
+
+// Debug flag to ignore checksums when testing if a method or a class is present in the profile.
+// Used to facilitate testing profile guided compilation across a large number of apps
+// using the same test profile.
+static constexpr bool kDebugIgnoreChecksum = false;
+
+// Transform the actual dex location into relative paths.
+// Note: this is OK because we don't store profiles of different apps into the same file.
+// Apps with split apks don't cause trouble because each split has a different name and will not
+// collide with other entries.
+std::string ProfileCompilationInfo::GetProfileDexFileKey(const std::string& dex_location) {
+  DCHECK(!dex_location.empty());
+  size_t last_sep_index = dex_location.find_last_of('/');
+  if (last_sep_index == std::string::npos) {
+    return dex_location;
+  } else {
+    DCHECK(last_sep_index < dex_location.size());
+    return dex_location.substr(last_sep_index + 1);
+  }
+}
+
+bool ProfileCompilationInfo::AddMethodsAndClasses(
+    const std::vector<MethodReference>& methods,
+    const std::set<DexCacheResolvedClasses>& resolved_classes) {
+  for (const MethodReference& method : methods) {
+    if (!AddMethodIndex(GetProfileDexFileKey(method.dex_file->GetLocation()),
+                        method.dex_file->GetLocationChecksum(),
+                        method.dex_method_index)) {
+      return false;
+    }
+  }
+  for (const DexCacheResolvedClasses& dex_cache : resolved_classes) {
+    if (!AddResolvedClasses(dex_cache)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool ProfileCompilationInfo::MergeAndSave(const std::string& filename,
+                                          uint64_t* bytes_written,
+                                          bool force) {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
+  ScopedFlock flock;
+  std::string error;
+  if (!flock.Init(filename.c_str(), O_RDWR | O_NOFOLLOW | O_CLOEXEC, /* block */ false, &error)) {
+    LOG(WARNING) << "Couldn't lock the profile file " << filename << ": " << error;
+    return false;
+  }
+
+  int fd = flock.GetFile()->Fd();
+
+  // Load the file but keep a copy around to be able to infer if the content has changed.
+  ProfileCompilationInfo fileInfo;
+  ProfileLoadSatus status = fileInfo.LoadInternal(fd, &error);
+  if (status == kProfileLoadSuccess) {
+    // Merge the content of file into the current object.
+    if (MergeWith(fileInfo)) {
+      // If after the merge we have the same data as what is the file there's no point
+      // in actually doing the write. The file will be exactly the same as before.
+      if (Equals(fileInfo)) {
+        if (bytes_written != nullptr) {
+          *bytes_written = 0;
+        }
+        return true;
+      }
+    } else {
+      LOG(WARNING) << "Could not merge previous profile data from file " << filename;
+      if (!force) {
+        return false;
+      }
+    }
+  } else if (force &&
+        ((status == kProfileLoadVersionMismatch) || (status == kProfileLoadBadData))) {
+      // Log a warning but don't return false. We will clear the profile anyway.
+      LOG(WARNING) << "Clearing bad or obsolete profile data from file "
+          << filename << ": " << error;
+  } else {
+    LOG(WARNING) << "Could not load profile data from file " << filename << ": " << error;
+    return false;
+  }
+
+  // We need to clear the data because we don't support appending to the profiles yet.
+  if (!flock.GetFile()->ClearContent()) {
+    PLOG(WARNING) << "Could not clear profile file: " << filename;
+    return false;
+  }
+
+  // This doesn't need locking because we are trying to lock the file for exclusive
+  // access and fail immediately if we can't.
+  bool result = Save(fd);
+  if (result) {
+    VLOG(profiler) << "Successfully saved profile info to " << filename
+        << " Size: " << GetFileSizeBytes(filename);
+    if (bytes_written != nullptr) {
+      *bytes_written = GetFileSizeBytes(filename);
+    }
+  } else {
+    VLOG(profiler) << "Failed to save profile info to " << filename;
+  }
+  return result;
+}
+
+// Returns true if all the bytes were successfully written to the file descriptor.
+static bool WriteBuffer(int fd, const uint8_t* buffer, size_t byte_count) {
+  while (byte_count > 0) {
+    int bytes_written = TEMP_FAILURE_RETRY(write(fd, buffer, byte_count));
+    if (bytes_written == -1) {
+      return false;
+    }
+    byte_count -= bytes_written;  // Reduce the number of remaining bytes.
+    buffer += bytes_written;  // Move the buffer forward.
+  }
+  return true;
+}
+
+// Add the string bytes to the buffer.
+static void AddStringToBuffer(std::vector<uint8_t>* buffer, const std::string& value) {
+  buffer->insert(buffer->end(), value.begin(), value.end());
+}
+
+// Insert each byte, from low to high into the buffer.
+template <typename T>
+static void AddUintToBuffer(std::vector<uint8_t>* buffer, T value) {
+  for (size_t i = 0; i < sizeof(T); i++) {
+    buffer->push_back((value >> (i * kBitsPerByte)) & 0xff);
+  }
+}
+
+static constexpr size_t kLineHeaderSize =
+    3 * sizeof(uint16_t) +  // method_set.size + class_set.size + dex_location.size
+    sizeof(uint32_t);       // checksum
+
+/**
+ * Serialization format:
+ *    magic,version,number_of_lines
+ *    dex_location1,number_of_methods1,number_of_classes1,dex_location_checksum1, \
+ *        method_id11,method_id12...,class_id1,class_id2...
+ *    dex_location2,number_of_methods2,number_of_classes2,dex_location_checksum2, \
+ *        method_id21,method_id22...,,class_id1,class_id2...
+ *    .....
+ **/
+bool ProfileCompilationInfo::Save(int fd) {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
+  DCHECK_GE(fd, 0);
+
+  // Cache at most 5KB before writing.
+  static constexpr size_t kMaxSizeToKeepBeforeWriting = 5 * KB;
+  // Use a vector wrapper to avoid keeping track of offsets when we add elements.
+  std::vector<uint8_t> buffer;
+  WriteBuffer(fd, kProfileMagic, sizeof(kProfileMagic));
+  WriteBuffer(fd, kProfileVersion, sizeof(kProfileVersion));
+  AddUintToBuffer(&buffer, static_cast<uint16_t>(info_.size()));
+
+  for (const auto& it : info_) {
+    if (buffer.size() > kMaxSizeToKeepBeforeWriting) {
+      if (!WriteBuffer(fd, buffer.data(), buffer.size())) {
+        return false;
+      }
+      buffer.clear();
+    }
+    const std::string& dex_location = it.first;
+    const DexFileData& dex_data = it.second;
+    if (dex_data.method_set.empty() && dex_data.class_set.empty()) {
+      continue;
+    }
+
+    if (dex_location.size() >= kMaxDexFileKeyLength) {
+      LOG(WARNING) << "DexFileKey exceeds allocated limit";
+      return false;
+    }
+
+    // Make sure that the buffer has enough capacity to avoid repeated resizings
+    // while we add data.
+    size_t required_capacity = buffer.size() +
+        kLineHeaderSize +
+        dex_location.size() +
+        sizeof(uint16_t) * (dex_data.class_set.size() + dex_data.method_set.size());
+
+    buffer.reserve(required_capacity);
+
+    DCHECK_LE(dex_location.size(), std::numeric_limits<uint16_t>::max());
+    DCHECK_LE(dex_data.method_set.size(), std::numeric_limits<uint16_t>::max());
+    DCHECK_LE(dex_data.class_set.size(), std::numeric_limits<uint16_t>::max());
+    AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_location.size()));
+    AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.method_set.size()));
+    AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.class_set.size()));
+    AddUintToBuffer(&buffer, dex_data.checksum);  // uint32_t
+
+    AddStringToBuffer(&buffer, dex_location);
+
+    for (auto method_it : dex_data.method_set) {
+      AddUintToBuffer(&buffer, method_it);
+    }
+    for (auto class_id : dex_data.class_set) {
+      AddUintToBuffer(&buffer, class_id);
+    }
+    DCHECK_EQ(required_capacity, buffer.size())
+        << "Failed to add the expected number of bytes in the buffer";
+  }
+
+  return WriteBuffer(fd, buffer.data(), buffer.size());
+}
+
+ProfileCompilationInfo::DexFileData* ProfileCompilationInfo::GetOrAddDexFileData(
+    const std::string& dex_location,
+    uint32_t checksum) {
+  auto info_it = info_.find(dex_location);
+  if (info_it == info_.end()) {
+    info_it = info_.Put(dex_location, DexFileData(checksum));
+  }
+  if (info_it->second.checksum != checksum) {
+    LOG(WARNING) << "Checksum mismatch for dex " << dex_location;
+    return nullptr;
+  }
+  return &info_it->second;
+}
+
+bool ProfileCompilationInfo::AddResolvedClasses(const DexCacheResolvedClasses& classes) {
+  const std::string dex_location = GetProfileDexFileKey(classes.GetDexLocation());
+  const uint32_t checksum = classes.GetLocationChecksum();
+  DexFileData* const data = GetOrAddDexFileData(dex_location, checksum);
+  if (data == nullptr) {
+    return false;
+  }
+  data->class_set.insert(classes.GetClasses().begin(), classes.GetClasses().end());
+  return true;
+}
+
+bool ProfileCompilationInfo::AddMethodIndex(const std::string& dex_location,
+                                            uint32_t checksum,
+                                            uint16_t method_idx) {
+  DexFileData* const data = GetOrAddDexFileData(dex_location, checksum);
+  if (data == nullptr) {
+    return false;
+  }
+  data->method_set.insert(method_idx);
+  return true;
+}
+
+bool ProfileCompilationInfo::AddClassIndex(const std::string& dex_location,
+                                           uint32_t checksum,
+                                           uint16_t class_idx) {
+  DexFileData* const data = GetOrAddDexFileData(dex_location, checksum);
+  if (data == nullptr) {
+    return false;
+  }
+  data->class_set.insert(class_idx);
+  return true;
+}
+
+bool ProfileCompilationInfo::ProcessLine(SafeBuffer& line_buffer,
+                                         uint16_t method_set_size,
+                                         uint16_t class_set_size,
+                                         uint32_t checksum,
+                                         const std::string& dex_location) {
+  for (uint16_t i = 0; i < method_set_size; i++) {
+    uint16_t method_idx = line_buffer.ReadUintAndAdvance<uint16_t>();
+    if (!AddMethodIndex(dex_location, checksum, method_idx)) {
+      return false;
+    }
+  }
+
+  for (uint16_t i = 0; i < class_set_size; i++) {
+    uint16_t class_def_idx = line_buffer.ReadUintAndAdvance<uint16_t>();
+    if (!AddClassIndex(dex_location, checksum, class_def_idx)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// Tests for EOF by trying to read 1 byte from the descriptor.
+// Returns:
+//   0 if the descriptor is at the EOF,
+//  -1 if there was an IO error
+//   1 if the descriptor has more content to read
+static int testEOF(int fd) {
+  uint8_t buffer[1];
+  return TEMP_FAILURE_RETRY(read(fd, buffer, 1));
+}
+
+// Reads an uint value previously written with AddUintToBuffer.
+template <typename T>
+T ProfileCompilationInfo::SafeBuffer::ReadUintAndAdvance() {
+  static_assert(std::is_unsigned<T>::value, "Type is not unsigned");
+  CHECK_LE(ptr_current_ + sizeof(T), ptr_end_);
+  T value = 0;
+  for (size_t i = 0; i < sizeof(T); i++) {
+    value += ptr_current_[i] << (i * kBitsPerByte);
+  }
+  ptr_current_ += sizeof(T);
+  return value;
+}
+
+bool ProfileCompilationInfo::SafeBuffer::CompareAndAdvance(const uint8_t* data, size_t data_size) {
+  if (ptr_current_ + data_size > ptr_end_) {
+    return false;
+  }
+  if (memcmp(ptr_current_, data, data_size) == 0) {
+    ptr_current_ += data_size;
+    return true;
+  }
+  return false;
+}
+
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::SafeBuffer::FillFromFd(
+      int fd,
+      const std::string& source,
+      /*out*/std::string* error) {
+  size_t byte_count = ptr_end_ - ptr_current_;
+  uint8_t* buffer = ptr_current_;
+  while (byte_count > 0) {
+    int bytes_read = TEMP_FAILURE_RETRY(read(fd, buffer, byte_count));
+    if (bytes_read == 0) {
+      *error += "Profile EOF reached prematurely for " + source;
+      return kProfileLoadBadData;
+    } else if (bytes_read < 0) {
+      *error += "Profile IO error for " + source + strerror(errno);
+      return kProfileLoadIOError;
+    }
+    byte_count -= bytes_read;
+    buffer += bytes_read;
+  }
+  return kProfileLoadSuccess;
+}
+
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileHeader(
+      int fd,
+      /*out*/uint16_t* number_of_lines,
+      /*out*/std::string* error) {
+  // Read magic and version
+  const size_t kMagicVersionSize =
+    sizeof(kProfileMagic) +
+    sizeof(kProfileVersion) +
+    sizeof(uint16_t);  // number of lines
+
+  SafeBuffer safe_buffer(kMagicVersionSize);
+
+  ProfileLoadSatus status = safe_buffer.FillFromFd(fd, "ReadProfileHeader", error);
+  if (status != kProfileLoadSuccess) {
+    return status;
+  }
+
+  if (!safe_buffer.CompareAndAdvance(kProfileMagic, sizeof(kProfileMagic))) {
+    *error = "Profile missing magic";
+    return kProfileLoadVersionMismatch;
+  }
+  if (!safe_buffer.CompareAndAdvance(kProfileVersion, sizeof(kProfileVersion))) {
+    *error = "Profile version mismatch";
+    return kProfileLoadVersionMismatch;
+  }
+  *number_of_lines = safe_buffer.ReadUintAndAdvance<uint16_t>();
+  return kProfileLoadSuccess;
+}
+
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileLineHeader(
+      int fd,
+      /*out*/ProfileLineHeader* line_header,
+      /*out*/std::string* error) {
+  SafeBuffer header_buffer(kLineHeaderSize);
+  ProfileLoadSatus status = header_buffer.FillFromFd(fd, "ReadProfileHeader", error);
+  if (status != kProfileLoadSuccess) {
+    return status;
+  }
+
+  uint16_t dex_location_size = header_buffer.ReadUintAndAdvance<uint16_t>();
+  line_header->method_set_size = header_buffer.ReadUintAndAdvance<uint16_t>();
+  line_header->class_set_size = header_buffer.ReadUintAndAdvance<uint16_t>();
+  line_header->checksum = header_buffer.ReadUintAndAdvance<uint32_t>();
+
+  if (dex_location_size == 0 || dex_location_size > kMaxDexFileKeyLength) {
+    *error = "DexFileKey has an invalid size: " +
+        std::to_string(static_cast<uint32_t>(dex_location_size));
+    return kProfileLoadBadData;
+  }
+
+  SafeBuffer location_buffer(dex_location_size);
+  status = location_buffer.FillFromFd(fd, "ReadProfileHeaderDexLocation", error);
+  if (status != kProfileLoadSuccess) {
+    return status;
+  }
+  line_header->dex_location.assign(
+      reinterpret_cast<char*>(location_buffer.Get()), dex_location_size);
+  return kProfileLoadSuccess;
+}
+
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileLine(
+      int fd,
+      const ProfileLineHeader& line_header,
+      /*out*/std::string* error) {
+  // Make sure that we don't try to read everything in memory (in case the profile if full).
+  // Split readings in chunks of at most 10kb.
+  static constexpr uint16_t kMaxNumberOfEntriesToRead = 5120;
+  uint16_t methods_left_to_read = line_header.method_set_size;
+  uint16_t classes_left_to_read = line_header.class_set_size;
+
+  while ((methods_left_to_read > 0) || (classes_left_to_read > 0)) {
+    uint16_t methods_to_read = std::min(kMaxNumberOfEntriesToRead, methods_left_to_read);
+    uint16_t max_classes_to_read = kMaxNumberOfEntriesToRead - methods_to_read;
+    uint16_t classes_to_read = std::min(max_classes_to_read, classes_left_to_read);
+
+    size_t line_size = sizeof(uint16_t) * (methods_to_read + classes_to_read);
+    SafeBuffer line_buffer(line_size);
+
+    ProfileLoadSatus status = line_buffer.FillFromFd(fd, "ReadProfileLine", error);
+    if (status != kProfileLoadSuccess) {
+      return status;
+    }
+    if (!ProcessLine(line_buffer,
+                     methods_to_read,
+                     classes_to_read,
+                     line_header.checksum,
+                     line_header.dex_location)) {
+      *error = "Error when reading profile file line";
+      return kProfileLoadBadData;
+    }
+    methods_left_to_read -= methods_to_read;
+    classes_left_to_read -= classes_to_read;
+  }
+  return kProfileLoadSuccess;
+}
+
+bool ProfileCompilationInfo::Load(int fd) {
+  std::string error;
+  ProfileLoadSatus status = LoadInternal(fd, &error);
+
+  if (status == kProfileLoadSuccess) {
+    return true;
+  } else {
+    PLOG(WARNING) << "Error when reading profile " << error;
+    return false;
+  }
+}
+
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::LoadInternal(
+      int fd, std::string* error) {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
+  DCHECK_GE(fd, 0);
+
+  struct stat stat_buffer;
+  if (fstat(fd, &stat_buffer) != 0) {
+    return kProfileLoadIOError;
+  }
+  // We allow empty profile files.
+  // Profiles may be created by ActivityManager or installd before we manage to
+  // process them in the runtime or profman.
+  if (stat_buffer.st_size == 0) {
+    return kProfileLoadSuccess;
+  }
+  // Read profile header: magic + version + number_of_lines.
+  uint16_t number_of_lines;
+  ProfileLoadSatus status = ReadProfileHeader(fd, &number_of_lines, error);
+  if (status != kProfileLoadSuccess) {
+    return status;
+  }
+
+  while (number_of_lines > 0) {
+    ProfileLineHeader line_header;
+    // First, read the line header to get the amount of data we need to read.
+    status = ReadProfileLineHeader(fd, &line_header, error);
+    if (status != kProfileLoadSuccess) {
+      return status;
+    }
+
+    // Now read the actual profile line.
+    status = ReadProfileLine(fd, line_header, error);
+    if (status != kProfileLoadSuccess) {
+      return status;
+    }
+    number_of_lines--;
+  }
+
+  // Check that we read everything and that profiles don't contain junk data.
+  int result = testEOF(fd);
+  if (result == 0) {
+    return kProfileLoadSuccess;
+  } else if (result < 0) {
+    return kProfileLoadIOError;
+  } else {
+    *error = "Unexpected content in the profile file";
+    return kProfileLoadBadData;
+  }
+}
+
+bool ProfileCompilationInfo::MergeWith(const ProfileCompilationInfo& other) {
+  // First verify that all checksums match. This will avoid adding garbage to
+  // the current profile info.
+  // Note that the number of elements should be very small, so this should not
+  // be a performance issue.
+  for (const auto& other_it : other.info_) {
+    auto info_it = info_.find(other_it.first);
+    if ((info_it != info_.end()) && (info_it->second.checksum != other_it.second.checksum)) {
+      LOG(WARNING) << "Checksum mismatch for dex " << other_it.first;
+      return false;
+    }
+  }
+  // All checksums match. Import the data.
+  for (const auto& other_it : other.info_) {
+    const std::string& other_dex_location = other_it.first;
+    const DexFileData& other_dex_data = other_it.second;
+    auto info_it = info_.find(other_dex_location);
+    if (info_it == info_.end()) {
+      info_it = info_.Put(other_dex_location, DexFileData(other_dex_data.checksum));
+    }
+    info_it->second.method_set.insert(other_dex_data.method_set.begin(),
+                                      other_dex_data.method_set.end());
+    info_it->second.class_set.insert(other_dex_data.class_set.begin(),
+                                     other_dex_data.class_set.end());
+  }
+  return true;
+}
+
+static bool ChecksumMatch(const DexFile& dex_file, uint32_t checksum) {
+  return kDebugIgnoreChecksum || dex_file.GetLocationChecksum() == checksum;
+}
+
+bool ProfileCompilationInfo::ContainsMethod(const MethodReference& method_ref) const {
+  auto info_it = info_.find(GetProfileDexFileKey(method_ref.dex_file->GetLocation()));
+  if (info_it != info_.end()) {
+    if (!ChecksumMatch(*method_ref.dex_file, info_it->second.checksum)) {
+      return false;
+    }
+    const std::set<uint16_t>& methods = info_it->second.method_set;
+    return methods.find(method_ref.dex_method_index) != methods.end();
+  }
+  return false;
+}
+
+bool ProfileCompilationInfo::ContainsClass(const DexFile& dex_file, uint16_t class_def_idx) const {
+  auto info_it = info_.find(GetProfileDexFileKey(dex_file.GetLocation()));
+  if (info_it != info_.end()) {
+    if (!ChecksumMatch(dex_file, info_it->second.checksum)) {
+      return false;
+    }
+    const std::set<uint16_t>& classes = info_it->second.class_set;
+    return classes.find(class_def_idx) != classes.end();
+  }
+  return false;
+}
+
+uint32_t ProfileCompilationInfo::GetNumberOfMethods() const {
+  uint32_t total = 0;
+  for (const auto& it : info_) {
+    total += it.second.method_set.size();
+  }
+  return total;
+}
+
+uint32_t ProfileCompilationInfo::GetNumberOfResolvedClasses() const {
+  uint32_t total = 0;
+  for (const auto& it : info_) {
+    total += it.second.class_set.size();
+  }
+  return total;
+}
+
+std::string ProfileCompilationInfo::DumpInfo(const std::vector<const DexFile*>* dex_files,
+                                             bool print_full_dex_location) const {
+  std::ostringstream os;
+  if (info_.empty()) {
+    return "ProfileInfo: empty";
+  }
+
+  os << "ProfileInfo:";
+
+  const std::string kFirstDexFileKeySubstitute = ":classes.dex";
+  for (const auto& it : info_) {
+    os << "\n";
+    const std::string& location = it.first;
+    const DexFileData& dex_data = it.second;
+    if (print_full_dex_location) {
+      os << location;
+    } else {
+      // Replace the (empty) multidex suffix of the first key with a substitute for easier reading.
+      std::string multidex_suffix = DexFile::GetMultiDexSuffix(location);
+      os << (multidex_suffix.empty() ? kFirstDexFileKeySubstitute : multidex_suffix);
+    }
+    const DexFile* dex_file = nullptr;
+    if (dex_files != nullptr) {
+      for (size_t i = 0; i < dex_files->size(); i++) {
+        if (location == (*dex_files)[i]->GetLocation()) {
+          dex_file = (*dex_files)[i];
+        }
+      }
+    }
+    os << "\n\tmethods: ";
+    for (const auto method_it : dex_data.method_set) {
+      if (dex_file != nullptr) {
+        os << "\n\t\t" << PrettyMethod(method_it, *dex_file, true);
+      } else {
+        os << method_it << ",";
+      }
+    }
+    os << "\n\tclasses: ";
+    for (const auto class_it : dex_data.class_set) {
+      if (dex_file != nullptr) {
+        os << "\n\t\t" << dex_file->GetClassDescriptor(dex_file->GetClassDef(class_it));
+      } else {
+        os << class_it << ",";
+      }
+    }
+  }
+  return os.str();
+}
+
+bool ProfileCompilationInfo::Equals(const ProfileCompilationInfo& other) {
+  return info_.Equals(other.info_);
+}
+
+std::set<DexCacheResolvedClasses> ProfileCompilationInfo::GetResolvedClasses() const {
+  std::set<DexCacheResolvedClasses> ret;
+  for (auto&& pair : info_) {
+    const std::string& profile_key = pair.first;
+    const DexFileData& data = pair.second;
+    // TODO: Is it OK to use the same location for both base and dex location here?
+    DexCacheResolvedClasses classes(profile_key, profile_key, data.checksum);
+    classes.AddClasses(data.class_set.begin(), data.class_set.end());
+    ret.insert(classes);
+  }
+  return ret;
+}
+
+void ProfileCompilationInfo::ClearResolvedClasses() {
+  for (auto& pair : info_) {
+    pair.second.class_set.clear();
+  }
+}
+
+// Naive implementation to generate a random profile file suitable for testing.
+bool ProfileCompilationInfo::GenerateTestProfile(int fd,
+                                                 uint16_t number_of_dex_files,
+                                                 uint16_t method_ratio,
+                                                 uint16_t class_ratio) {
+  const std::string base_dex_location = "base.apk";
+  ProfileCompilationInfo info;
+  // The limits are defined by the dex specification.
+  uint16_t max_method = std::numeric_limits<uint16_t>::max();
+  uint16_t max_classes = std::numeric_limits<uint16_t>::max();
+  uint16_t number_of_methods = max_method * method_ratio / 100;
+  uint16_t number_of_classes = max_classes * class_ratio / 100;
+
+  srand(MicroTime());
+
+  // Make sure we generate more samples with a low index value.
+  // This makes it more likely to hit valid method/class indices in small apps.
+  const uint16_t kFavorFirstN = 10000;
+  const uint16_t kFavorSplit = 2;
+
+  for (uint16_t i = 0; i < number_of_dex_files; i++) {
+    std::string dex_location = DexFile::GetMultiDexLocation(i, base_dex_location.c_str());
+    std::string profile_key = GetProfileDexFileKey(dex_location);
+
+    for (uint16_t m = 0; m < number_of_methods; m++) {
+      uint16_t method_idx = rand() % max_method;
+      if (m < (number_of_methods / kFavorSplit)) {
+        method_idx %= kFavorFirstN;
+      }
+      info.AddMethodIndex(profile_key, 0, method_idx);
+    }
+
+    for (uint16_t c = 0; c < number_of_classes; c++) {
+      uint16_t class_idx = rand() % max_classes;
+      if (c < (number_of_classes / kFavorSplit)) {
+        class_idx %= kFavorFirstN;
+      }
+      info.AddClassIndex(profile_key, 0, class_idx);
+    }
+  }
+  return info.Save(fd);
+}
+
+}  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
new file mode 100644
index 0000000..0b26f9b
--- /dev/null
+++ b/runtime/jit/offline_profiling_info.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
+#define ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
+
+#include <set>
+#include <vector>
+
+#include "atomic.h"
+#include "dex_cache_resolved_classes.h"
+#include "dex_file.h"
+#include "method_reference.h"
+#include "safe_map.h"
+
+namespace art {
+
+// TODO: rename file.
+/**
+ * Profile information in a format suitable to be queried by the compiler and
+ * performing profile guided compilation.
+ * It is a serialize-friendly format based on information collected by the
+ * interpreter (ProfileInfo).
+ * Currently it stores only the hot compiled methods.
+ */
+class ProfileCompilationInfo {
+ public:
+  static const uint8_t kProfileMagic[];
+  static const uint8_t kProfileVersion[];
+
+  // Add the given methods and classes to the current profile object.
+  bool AddMethodsAndClasses(const std::vector<MethodReference>& methods,
+                            const std::set<DexCacheResolvedClasses>& resolved_classes);
+  // Loads profile information from the given file descriptor.
+  bool Load(int fd);
+  // Merge the data from another ProfileCompilationInfo into the current object.
+  bool MergeWith(const ProfileCompilationInfo& info);
+  // Saves the profile data to the given file descriptor.
+  bool Save(int fd);
+  // Loads and merges profile information from the given file into the current
+  // object and tries to save it back to disk.
+  // If `force` is true then the save will go through even if the given file
+  // has bad data or its version does not match. In this cases the profile content
+  // is ignored.
+  bool MergeAndSave(const std::string& filename, uint64_t* bytes_written, bool force);
+
+  // Returns the number of methods that were profiled.
+  uint32_t GetNumberOfMethods() const;
+  // Returns the number of resolved classes that were profiled.
+  uint32_t GetNumberOfResolvedClasses() const;
+
+  // Returns true if the method reference is present in the profiling info.
+  bool ContainsMethod(const MethodReference& method_ref) const;
+
+  // Returns true if the class is present in the profiling info.
+  bool ContainsClass(const DexFile& dex_file, uint16_t class_def_idx) const;
+
+  // Dumps all the loaded profile info into a string and returns it.
+  // If dex_files is not null then the method indices will be resolved to their
+  // names.
+  // This is intended for testing and debugging.
+  std::string DumpInfo(const std::vector<const DexFile*>* dex_files,
+                       bool print_full_dex_location = true) const;
+
+  bool Equals(const ProfileCompilationInfo& other);
+
+  static std::string GetProfileDexFileKey(const std::string& dex_location);
+
+  // Returns the class descriptors for all of the classes in the profiles' class sets.
+  // Note the dex location is actually the profile key, the caller needs to call back in to the
+  // profile info stuff to generate a map back to the dex location.
+  std::set<DexCacheResolvedClasses> GetResolvedClasses() const;
+
+  // Clears the resolved classes from the current object.
+  void ClearResolvedClasses();
+
+  static bool GenerateTestProfile(int fd,
+                                  uint16_t number_of_dex_files,
+                                  uint16_t method_ratio,
+                                  uint16_t class_ratio);
+
+ private:
+  enum ProfileLoadSatus {
+    kProfileLoadIOError,
+    kProfileLoadVersionMismatch,
+    kProfileLoadBadData,
+    kProfileLoadSuccess
+  };
+
+  struct DexFileData {
+    explicit DexFileData(uint32_t location_checksum) : checksum(location_checksum) {}
+    uint32_t checksum;
+    std::set<uint16_t> method_set;
+    std::set<uint16_t> class_set;
+
+    bool operator==(const DexFileData& other) const {
+      return checksum == other.checksum && method_set == other.method_set;
+    }
+  };
+
+  using DexFileToProfileInfoMap = SafeMap<const std::string, DexFileData>;
+
+  DexFileData* GetOrAddDexFileData(const std::string& dex_location, uint32_t checksum);
+  bool AddMethodIndex(const std::string& dex_location, uint32_t checksum, uint16_t method_idx);
+  bool AddClassIndex(const std::string& dex_location, uint32_t checksum, uint16_t class_idx);
+  bool AddResolvedClasses(const DexCacheResolvedClasses& classes);
+
+  // Parsing functionality.
+
+  struct ProfileLineHeader {
+    std::string dex_location;
+    uint16_t method_set_size;
+    uint16_t class_set_size;
+    uint32_t checksum;
+  };
+
+  // A helper structure to make sure we don't read past our buffers in the loops.
+  struct SafeBuffer {
+   public:
+    explicit SafeBuffer(size_t size) : storage_(new uint8_t[size]) {
+      ptr_current_ = storage_.get();
+      ptr_end_ = ptr_current_ + size;
+    }
+
+    // Reads the content of the descriptor at the current position.
+    ProfileLoadSatus FillFromFd(int fd,
+                                const std::string& source,
+                                /*out*/std::string* error);
+
+    // Reads an uint value (high bits to low bits) and advances the current pointer
+    // with the number of bits read.
+    template <typename T> T ReadUintAndAdvance();
+
+    // Compares the given data with the content current pointer. If the contents are
+    // equal it advances the current pointer by data_size.
+    bool CompareAndAdvance(const uint8_t* data, size_t data_size);
+
+    // Get the underlying raw buffer.
+    uint8_t* Get() { return storage_.get(); }
+
+   private:
+    std::unique_ptr<uint8_t> storage_;
+    uint8_t* ptr_current_;
+    uint8_t* ptr_end_;
+  };
+
+  ProfileLoadSatus LoadInternal(int fd, std::string* error);
+
+  ProfileLoadSatus ReadProfileHeader(int fd,
+                                     /*out*/uint16_t* number_of_lines,
+                                     /*out*/std::string* error);
+
+  ProfileLoadSatus ReadProfileLineHeader(int fd,
+                                         /*out*/ProfileLineHeader* line_header,
+                                         /*out*/std::string* error);
+  ProfileLoadSatus ReadProfileLine(int fd,
+                                   const ProfileLineHeader& line_header,
+                                   /*out*/std::string* error);
+
+  bool ProcessLine(SafeBuffer& line_buffer,
+                   uint16_t method_set_size,
+                   uint16_t class_set_size,
+                   uint32_t checksum,
+                   const std::string& dex_location);
+
+  friend class ProfileCompilationInfoTest;
+  friend class CompilerDriverProfileTest;
+  friend class ProfileAssistantTest;
+
+  DexFileToProfileInfoMap info_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
new file mode 100644
index 0000000..c8f4d94
--- /dev/null
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "base/unix_file/fd_file.h"
+#include "art_method-inl.h"
+#include "class_linker-inl.h"
+#include "common_runtime_test.h"
+#include "dex_file.h"
+#include "method_reference.h"
+#include "mirror/class-inl.h"
+#include "mirror/class_loader.h"
+#include "handle_scope-inl.h"
+#include "jit/offline_profiling_info.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+class ProfileCompilationInfoTest : public CommonRuntimeTest {
+ protected:
+  std::vector<ArtMethod*> GetVirtualMethods(jobject class_loader,
+                                            const std::string& clazz) {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ClassLoader> h_loader(hs.NewHandle(
+        reinterpret_cast<mirror::ClassLoader*>(self->DecodeJObject(class_loader))));
+    mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader);
+
+    const auto pointer_size = class_linker->GetImagePointerSize();
+    std::vector<ArtMethod*> methods;
+    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+      methods.push_back(&m);
+    }
+    return methods;
+  }
+
+  bool AddMethod(const std::string& dex_location,
+                 uint32_t checksum,
+                 uint16_t method_index,
+                 ProfileCompilationInfo* info) {
+    return info->AddMethodIndex(dex_location, checksum, method_index);
+  }
+
+  bool AddClass(const std::string& dex_location,
+                uint32_t checksum,
+                uint16_t class_index,
+                ProfileCompilationInfo* info) {
+    return info->AddMethodIndex(dex_location, checksum, class_index);
+  }
+
+  uint32_t GetFd(const ScratchFile& file) {
+    return static_cast<uint32_t>(file.GetFd());
+  }
+
+  bool SaveProfilingInfo(
+      const std::string& filename,
+      const std::vector<ArtMethod*>& methods,
+      const std::set<DexCacheResolvedClasses>& resolved_classes) {
+    ProfileCompilationInfo info;
+    std::vector<MethodReference> method_refs;
+    ScopedObjectAccess soa(Thread::Current());
+    for (ArtMethod* method : methods) {
+      method_refs.emplace_back(method->GetDexFile(), method->GetDexMethodIndex());
+    }
+    if (!info.AddMethodsAndClasses(method_refs, resolved_classes)) {
+      return false;
+    }
+    return info.MergeAndSave(filename, nullptr, false);
+  }
+
+  // Cannot sizeof the actual arrays so hardcode the values here.
+  // They should not change anyway.
+  static constexpr int kProfileMagicSize = 4;
+  static constexpr int kProfileVersionSize = 4;
+};
+
+TEST_F(ProfileCompilationInfoTest, SaveArtMethods) {
+  ScratchFile profile;
+
+  Thread* self = Thread::Current();
+  jobject class_loader;
+  {
+    ScopedObjectAccess soa(self);
+    class_loader = LoadDex("ProfileTestMultiDex");
+  }
+  ASSERT_NE(class_loader, nullptr);
+
+  // Save virtual methods from Main.
+  std::set<DexCacheResolvedClasses> resolved_classes;
+  std::vector<ArtMethod*> main_methods = GetVirtualMethods(class_loader, "LMain;");
+  ASSERT_TRUE(SaveProfilingInfo(profile.GetFilename(), main_methods, resolved_classes));
+
+  // Check that what we saved is in the profile.
+  ProfileCompilationInfo info1;
+  ASSERT_TRUE(info1.Load(GetFd(profile)));
+  ASSERT_EQ(info1.GetNumberOfMethods(), main_methods.size());
+  {
+    ScopedObjectAccess soa(self);
+    for (ArtMethod* m : main_methods) {
+      ASSERT_TRUE(info1.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+    }
+  }
+
+  // Save virtual methods from Second.
+  std::vector<ArtMethod*> second_methods = GetVirtualMethods(class_loader, "LSecond;");
+  ASSERT_TRUE(SaveProfilingInfo(profile.GetFilename(), second_methods, resolved_classes));
+
+  // Check that what we saved is in the profile (methods form Main and Second).
+  ProfileCompilationInfo info2;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(info2.Load(GetFd(profile)));
+  ASSERT_EQ(info2.GetNumberOfMethods(), main_methods.size() + second_methods.size());
+  {
+    ScopedObjectAccess soa(self);
+    for (ArtMethod* m : main_methods) {
+      ASSERT_TRUE(info2.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+    }
+    for (ArtMethod* m : second_methods) {
+      ASSERT_TRUE(info2.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+    }
+  }
+}
+
+TEST_F(ProfileCompilationInfoTest, SaveFd) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  // Save a few methods.
+  for (uint16_t i = 0; i < 10; i++) {
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddMethod("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+  }
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back what we saved.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(saved_info));
+
+  // Save more methods.
+  for (uint16_t i = 0; i < 100; i++) {
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddMethod("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddMethod("dex_location3", /* checksum */ 3, /* method_idx */ i, &saved_info));
+  }
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back everything we saved.
+  ProfileCompilationInfo loaded_info2;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info2.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info2.Equals(saved_info));
+}
+
+TEST_F(ProfileCompilationInfoTest, AddMethodsAndClassesFail) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo info;
+  ASSERT_TRUE(AddMethod("dex_location", /* checksum */ 1, /* method_idx */ 1, &info));
+  // Trying to add info for an existing file but with a different checksum.
+  ASSERT_FALSE(AddMethod("dex_location", /* checksum */ 2, /* method_idx */ 2, &info));
+}
+
+TEST_F(ProfileCompilationInfoTest, MergeFail) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo info1;
+  ASSERT_TRUE(AddMethod("dex_location", /* checksum */ 1, /* method_idx */ 1, &info1));
+  // Use the same file, change the checksum.
+  ProfileCompilationInfo info2;
+  ASSERT_TRUE(AddMethod("dex_location", /* checksum */ 2, /* method_idx */ 2, &info2));
+
+  ASSERT_FALSE(info1.MergeWith(info2));
+}
+
+TEST_F(ProfileCompilationInfoTest, SaveMaxMethods) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  // Save the maximum number of methods
+  for (uint16_t i = 0; i < std::numeric_limits<uint16_t>::max(); i++) {
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddMethod("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+  }
+  // Save the maximum number of classes
+  for (uint16_t i = 0; i < std::numeric_limits<uint16_t>::max(); i++) {
+    ASSERT_TRUE(AddClass("dex_location1", /* checksum */ 1, /* class_idx */ i, &saved_info));
+    ASSERT_TRUE(AddClass("dex_location2", /* checksum */ 2, /* class_idx */ i, &saved_info));
+  }
+
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back what we saved.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(saved_info));
+}
+
+TEST_F(ProfileCompilationInfoTest, SaveEmpty) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back what we saved.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(saved_info));
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadEmpty) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo empyt_info;
+
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(empyt_info));
+}
+
+TEST_F(ProfileCompilationInfoTest, BadMagic) {
+  ScratchFile profile;
+  uint8_t buffer[] = { 1, 2, 3, 4 };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(buffer, sizeof(buffer)));
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(profile)));
+}
+
+TEST_F(ProfileCompilationInfoTest, BadVersion) {
+  ScratchFile profile;
+
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileMagic, kProfileMagicSize));
+  uint8_t version[] = { 'v', 'e', 'r', 's', 'i', 'o', 'n' };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(version, sizeof(version)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(profile)));
+}
+
+TEST_F(ProfileCompilationInfoTest, Incomplete) {
+  ScratchFile profile;
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileMagic, kProfileMagicSize));
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileVersion, kProfileVersionSize));
+  // Write that we have at least one line.
+  uint8_t line_number[] = { 0, 1 };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(line_number, sizeof(line_number)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(profile)));
+}
+
+TEST_F(ProfileCompilationInfoTest, TooLongDexLocation) {
+  ScratchFile profile;
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileMagic, kProfileMagicSize));
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileVersion, kProfileVersionSize));
+  // Write that we have at least one line.
+  uint8_t line_number[] = { 0, 1 };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(line_number, sizeof(line_number)));
+
+  // dex_location_size, methods_size, classes_size, checksum.
+  // Dex location size is too big and should be rejected.
+  uint8_t line[] = { 255, 255, 0, 1, 0, 1, 0, 0, 0, 0 };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(line, sizeof(line)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(profile)));
+}
+
+TEST_F(ProfileCompilationInfoTest, UnexpectedContent) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  // Save the maximum number of methods
+  for (uint16_t i = 0; i < 10; i++) {
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+  }
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+
+  uint8_t random_data[] = { 1, 2, 3};
+  ASSERT_TRUE(profile.GetFile()->WriteFully(random_data, sizeof(random_data)));
+
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we fail because of unexpected data at the end of the file.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(profile)));
+}
+
+}  // namespace art
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
new file mode 100644
index 0000000..b35c958
--- /dev/null
+++ b/runtime/jit/profile_saver.cc
@@ -0,0 +1,670 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "profile_saver.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "art_method-inl.h"
+#include "base/enums.h"
+#include "base/systrace.h"
+#include "base/time_utils.h"
+#include "compiler_filter.h"
+#include "oat_file_manager.h"
+#include "scoped_thread_state_change.h"
+
+
+namespace art {
+
+ProfileSaver* ProfileSaver::instance_ = nullptr;
+pthread_t ProfileSaver::profiler_pthread_ = 0U;
+
+ProfileSaver::ProfileSaver(const ProfileSaverOptions& options,
+                           const std::string& output_filename,
+                           jit::JitCodeCache* jit_code_cache,
+                           const std::vector<std::string>& code_paths,
+                           const std::string& foreign_dex_profile_path,
+                           const std::string& app_data_dir)
+    : jit_code_cache_(jit_code_cache),
+      foreign_dex_profile_path_(foreign_dex_profile_path),
+      shutting_down_(false),
+      last_save_number_of_methods_(0),
+      last_save_number_of_classes_(0),
+      last_time_ns_saver_woke_up_(0),
+      jit_activity_notifications_(0),
+      wait_lock_("ProfileSaver wait lock"),
+      period_condition_("ProfileSaver period condition", wait_lock_),
+      total_bytes_written_(0),
+      total_number_of_writes_(0),
+      total_number_of_code_cache_queries_(0),
+      total_number_of_skipped_writes_(0),
+      total_number_of_failed_writes_(0),
+      total_ms_of_sleep_(0),
+      total_ns_of_work_(0),
+      total_number_of_foreign_dex_marks_(0),
+      max_number_of_profile_entries_cached_(0),
+      total_number_of_hot_spikes_(0),
+      total_number_of_wake_ups_(0),
+      options_(options) {
+  DCHECK(options_.IsEnabled());
+  AddTrackedLocations(output_filename, app_data_dir, code_paths);
+  if (!app_data_dir.empty()) {
+    // The application directory is used to determine which dex files are owned by app.
+    // Since it could be a symlink (e.g. /data/data instead of /data/user/0), and we
+    // don't have control over how the dex files are actually loaded (symlink or canonical path),
+    // store it's canonical form to be sure we use the same base when comparing.
+    UniqueCPtr<const char[]> app_data_dir_real_path(realpath(app_data_dir.c_str(), nullptr));
+    if (app_data_dir_real_path != nullptr) {
+      app_data_dirs_.emplace(app_data_dir_real_path.get());
+    } else {
+      LOG(WARNING) << "Failed to get the real path for app dir: " << app_data_dir
+          << ". The app dir will not be used to determine which dex files belong to the app";
+    }
+  }
+}
+
+void ProfileSaver::Run() {
+  Thread* self = Thread::Current();
+
+  // Fetch the resolved classes for the app images after sleeping for
+  // options_.GetSaveResolvedClassesDelayMs().
+  // TODO(calin) This only considers the case of the primary profile file.
+  // Anything that gets loaded in the same VM will not have their resolved
+  // classes save (unless they started before the initial saving was done).
+  {
+    MutexLock mu(self, wait_lock_);
+    const uint64_t end_time = NanoTime() + MsToNs(options_.GetSaveResolvedClassesDelayMs());
+    while (true) {
+      const uint64_t current_time = NanoTime();
+      if (current_time >= end_time) {
+        break;
+      }
+      period_condition_.TimedWait(self, NsToMs(end_time - current_time), 0);
+    }
+    total_ms_of_sleep_ += options_.GetSaveResolvedClassesDelayMs();
+  }
+  FetchAndCacheResolvedClassesAndMethods();
+
+  // Loop for the profiled methods.
+  while (!ShuttingDown(self)) {
+    uint64_t sleep_start = NanoTime();
+    {
+      uint64_t sleep_time = 0;
+      {
+        MutexLock mu(self, wait_lock_);
+        period_condition_.Wait(self);
+        sleep_time = NanoTime() - sleep_start;
+      }
+      // Check if the thread was woken up for shutdown.
+      if (ShuttingDown(self)) {
+        break;
+      }
+      total_number_of_wake_ups_++;
+      // We might have been woken up by a huge number of notifications to guarantee saving.
+      // If we didn't meet the minimum saving period go back to sleep (only if missed by
+      // a reasonable margin).
+      uint64_t min_save_period_ns = MsToNs(options_.GetMinSavePeriodMs());
+      while (min_save_period_ns * 0.9 > sleep_time) {
+        {
+          MutexLock mu(self, wait_lock_);
+          period_condition_.TimedWait(self, NsToMs(min_save_period_ns - sleep_time), 0);
+          sleep_time = NanoTime() - sleep_start;
+        }
+        // Check if the thread was woken up for shutdown.
+        if (ShuttingDown(self)) {
+          break;
+        }
+        total_number_of_wake_ups_++;
+      }
+    }
+    total_ms_of_sleep_ += NsToMs(NanoTime() - sleep_start);
+
+    if (ShuttingDown(self)) {
+      break;
+    }
+
+    uint16_t new_methods = 0;
+    uint64_t start_work = NanoTime();
+    bool profile_saved_to_disk = ProcessProfilingInfo(&new_methods);
+    // Update the notification counter based on result. Note that there might be contention on this
+    // but we don't care about to be 100% precise.
+    if (!profile_saved_to_disk) {
+      // If we didn't save to disk it may be because we didn't have enough new methods.
+      // Set the jit activity notifications to new_methods so we can wake up earlier if needed.
+      jit_activity_notifications_ = new_methods;
+    }
+    total_ns_of_work_ += NanoTime() - start_work;
+  }
+}
+
+void ProfileSaver::NotifyJitActivity() {
+  MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+  if (instance_ == nullptr || instance_->shutting_down_) {
+    return;
+  }
+  instance_->NotifyJitActivityInternal();
+}
+
+void ProfileSaver::WakeUpSaver() {
+  jit_activity_notifications_ = 0;
+  last_time_ns_saver_woke_up_ = NanoTime();
+  period_condition_.Signal(Thread::Current());
+}
+
+void ProfileSaver::NotifyJitActivityInternal() {
+  // Unlikely to overflow but if it happens,
+  // we would have waken up the saver long before that.
+  jit_activity_notifications_++;
+  // Note that we are not as precise as we could be here but we don't want to wake the saver
+  // every time we see a hot method.
+  if (jit_activity_notifications_ > options_.GetMinNotificationBeforeWake()) {
+    MutexLock wait_mutex(Thread::Current(), wait_lock_);
+    if ((NanoTime() - last_time_ns_saver_woke_up_) > MsToNs(options_.GetMinSavePeriodMs())) {
+      WakeUpSaver();
+    } else if (jit_activity_notifications_ > options_.GetMaxNotificationBeforeWake()) {
+      // Make sure to wake up the saver if we see a spike in the number of notifications.
+      // This is a precaution to avoid losing a big number of methods in case
+      // this is a spike with no jit after.
+      total_number_of_hot_spikes_++;
+      WakeUpSaver();
+    }
+  }
+}
+
+ProfileCompilationInfo* ProfileSaver::GetCachedProfiledInfo(const std::string& filename) {
+  auto info_it = profile_cache_.find(filename);
+  if (info_it == profile_cache_.end()) {
+    info_it = profile_cache_.Put(filename, ProfileCompilationInfo());
+  }
+  return &info_it->second;
+}
+
+// Get resolved methods that have a profile info or more than kStartupMethodSamples samples.
+// Excludes native methods and classes in the boot image.
+class GetMethodsVisitor : public ClassVisitor {
+ public:
+  GetMethodsVisitor(std::vector<MethodReference>* methods, uint32_t startup_method_samples)
+    : methods_(methods),
+      startup_method_samples_(startup_method_samples) {}
+
+  virtual bool operator()(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
+      return true;
+    }
+    for (ArtMethod& method : klass->GetMethods(kRuntimePointerSize)) {
+      if (!method.IsNative()) {
+        if (method.GetCounter() >= startup_method_samples_ ||
+            method.GetProfilingInfo(kRuntimePointerSize) != nullptr) {
+          // Have samples, add to profile.
+          const DexFile* dex_file =
+              method.GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetDexFile();
+          methods_->push_back(MethodReference(dex_file, method.GetDexMethodIndex()));
+        }
+      }
+    }
+    return true;
+  }
+
+ private:
+  std::vector<MethodReference>* const methods_;
+  uint32_t startup_method_samples_;
+};
+
+void ProfileSaver::FetchAndCacheResolvedClassesAndMethods() {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  std::set<DexCacheResolvedClasses> resolved_classes =
+      class_linker->GetResolvedClasses(/*ignore boot classes*/ true);
+
+  std::vector<MethodReference> methods;
+  {
+    ScopedTrace trace2("Get hot methods");
+    GetMethodsVisitor visitor(&methods, options_.GetStartupMethodSamples());
+    ScopedObjectAccess soa(Thread::Current());
+    class_linker->VisitClasses(&visitor);
+    VLOG(profiler) << "Methods with samples greater than "
+                   << options_.GetStartupMethodSamples() << " = " << methods.size();
+  }
+  MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+  uint64_t total_number_of_profile_entries_cached = 0;
+
+  for (const auto& it : tracked_dex_base_locations_) {
+    std::set<DexCacheResolvedClasses> resolved_classes_for_location;
+    const std::string& filename = it.first;
+    const std::set<std::string>& locations = it.second;
+    std::vector<MethodReference> methods_for_location;
+    for (const MethodReference& ref : methods) {
+      if (locations.find(ref.dex_file->GetBaseLocation()) != locations.end()) {
+        methods_for_location.push_back(ref);
+      }
+    }
+    for (const DexCacheResolvedClasses& classes : resolved_classes) {
+      if (locations.find(classes.GetBaseLocation()) != locations.end()) {
+        VLOG(profiler) << "Added " << classes.GetClasses().size() << " classes for location "
+                       << classes.GetBaseLocation() << " (" << classes.GetDexLocation() << ")";
+        resolved_classes_for_location.insert(classes);
+      } else {
+        VLOG(profiler) << "Location not found " << classes.GetBaseLocation()
+                       << " (" << classes.GetDexLocation() << ")";
+      }
+    }
+    ProfileCompilationInfo* info = GetCachedProfiledInfo(filename);
+    info->AddMethodsAndClasses(methods_for_location, resolved_classes_for_location);
+    total_number_of_profile_entries_cached += resolved_classes_for_location.size();
+  }
+  max_number_of_profile_entries_cached_ = std::max(
+      max_number_of_profile_entries_cached_,
+      total_number_of_profile_entries_cached);
+}
+
+bool ProfileSaver::ProcessProfilingInfo(uint16_t* new_methods) {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
+  SafeMap<std::string, std::set<std::string>> tracked_locations;
+  {
+    // Make a copy so that we don't hold the lock while doing I/O.
+    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+    tracked_locations = tracked_dex_base_locations_;
+  }
+
+  bool profile_file_saved = false;
+  uint64_t total_number_of_profile_entries_cached = 0;
+  *new_methods = 0;
+
+  for (const auto& it : tracked_locations) {
+    if (ShuttingDown(Thread::Current())) {
+      return true;
+    }
+    const std::string& filename = it.first;
+    const std::set<std::string>& locations = it.second;
+    std::vector<MethodReference> methods;
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      jit_code_cache_->GetProfiledMethods(locations, methods);
+      total_number_of_code_cache_queries_++;
+    }
+
+    ProfileCompilationInfo* cached_info = GetCachedProfiledInfo(filename);
+    cached_info->AddMethodsAndClasses(methods, std::set<DexCacheResolvedClasses>());
+    int64_t delta_number_of_methods =
+        cached_info->GetNumberOfMethods() -
+        static_cast<int64_t>(last_save_number_of_methods_);
+    int64_t delta_number_of_classes =
+        cached_info->GetNumberOfResolvedClasses() -
+        static_cast<int64_t>(last_save_number_of_classes_);
+
+    if (delta_number_of_methods < options_.GetMinMethodsToSave() &&
+        delta_number_of_classes < options_.GetMinClassesToSave()) {
+      VLOG(profiler) << "Not enough information to save to: " << filename
+          << " Number of methods: " << delta_number_of_methods
+          << " Number of classes: " << delta_number_of_classes;
+      total_number_of_skipped_writes_++;
+      continue;
+    }
+    *new_methods = std::max(static_cast<uint16_t>(delta_number_of_methods), *new_methods);
+    uint64_t bytes_written;
+    // Force the save. In case the profile data is corrupted or the the profile
+    // has the wrong version this will "fix" the file to the correct format.
+    if (cached_info->MergeAndSave(filename, &bytes_written, /*force*/ true)) {
+      last_save_number_of_methods_ = cached_info->GetNumberOfMethods();
+      last_save_number_of_classes_ = cached_info->GetNumberOfResolvedClasses();
+      // Clear resolved classes. No need to store them around as
+      // they don't change after the first write.
+      cached_info->ClearResolvedClasses();
+      if (bytes_written > 0) {
+        total_number_of_writes_++;
+        total_bytes_written_ += bytes_written;
+        profile_file_saved = true;
+      } else {
+        // At this point we could still have avoided the write.
+        // We load and merge the data from the file lazily at its first ever
+        // save attempt. So, whatever we are trying to save could already be
+        // in the file.
+        total_number_of_skipped_writes_++;
+      }
+    } else {
+      LOG(WARNING) << "Could not save profiling info to " << filename;
+      total_number_of_failed_writes_++;
+    }
+    total_number_of_profile_entries_cached +=
+        cached_info->GetNumberOfMethods() +
+        cached_info->GetNumberOfResolvedClasses();
+  }
+  max_number_of_profile_entries_cached_ = std::max(
+      max_number_of_profile_entries_cached_,
+      total_number_of_profile_entries_cached);
+  return profile_file_saved;
+}
+
+void* ProfileSaver::RunProfileSaverThread(void* arg) {
+  Runtime* runtime = Runtime::Current();
+
+  bool attached = runtime->AttachCurrentThread("Profile Saver",
+                                               /*as_daemon*/true,
+                                               runtime->GetSystemThreadGroup(),
+                                               /*create_peer*/true);
+  if (!attached) {
+    CHECK(runtime->IsShuttingDown(Thread::Current()));
+    return nullptr;
+  }
+
+  ProfileSaver* profile_saver = reinterpret_cast<ProfileSaver*>(arg);
+  profile_saver->Run();
+
+  runtime->DetachCurrentThread();
+  VLOG(profiler) << "Profile saver shutdown";
+  return nullptr;
+}
+
+static bool ShouldProfileLocation(const std::string& location) {
+  OatFileManager& oat_manager = Runtime::Current()->GetOatFileManager();
+  const OatFile* oat_file = oat_manager.FindOpenedOatFileFromDexLocation(location);
+  if (oat_file == nullptr) {
+    // This can happen if we fallback to run code directly from the APK.
+    // Profile it with the hope that the background dexopt will get us back into
+    // a good state.
+    VLOG(profiler) << "Asked to profile a location without an oat file:" << location;
+    return true;
+  }
+  CompilerFilter::Filter filter = oat_file->GetCompilerFilter();
+  if ((filter == CompilerFilter::kSpeed) || (filter == CompilerFilter::kEverything)) {
+    VLOG(profiler)
+        << "Skip profiling oat file because it's already speed|everything compiled: "
+        << location << " oat location: " << oat_file->GetLocation();
+    return false;
+  }
+  return true;
+}
+
+void ProfileSaver::Start(const ProfileSaverOptions& options,
+                         const std::string& output_filename,
+                         jit::JitCodeCache* jit_code_cache,
+                         const std::vector<std::string>& code_paths,
+                         const std::string& foreign_dex_profile_path,
+                         const std::string& app_data_dir) {
+  DCHECK(options.IsEnabled());
+  DCHECK(Runtime::Current()->GetJit() != nullptr);
+  DCHECK(!output_filename.empty());
+  DCHECK(jit_code_cache != nullptr);
+
+  std::vector<std::string> code_paths_to_profile;
+
+  for (const std::string& location : code_paths) {
+    if (ShouldProfileLocation(location))  {
+      code_paths_to_profile.push_back(location);
+    }
+  }
+  if (code_paths_to_profile.empty()) {
+    VLOG(profiler) << "No code paths should be profiled.";
+    return;
+  }
+
+  MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+  if (instance_ != nullptr) {
+    // If we already have an instance, make sure it uses the same jit_code_cache.
+    // This may be called multiple times via Runtime::registerAppInfo (e.g. for
+    // apps which share the same runtime).
+    DCHECK_EQ(instance_->jit_code_cache_, jit_code_cache);
+    // Add the code_paths to the tracked locations.
+    instance_->AddTrackedLocations(output_filename, app_data_dir, code_paths_to_profile);
+    return;
+  }
+
+  VLOG(profiler) << "Starting profile saver using output file: " << output_filename
+      << ". Tracking: " << Join(code_paths_to_profile, ':');
+
+  instance_ = new ProfileSaver(options,
+                               output_filename,
+                               jit_code_cache,
+                               code_paths_to_profile,
+                               foreign_dex_profile_path,
+                               app_data_dir);
+
+  // Create a new thread which does the saving.
+  CHECK_PTHREAD_CALL(
+      pthread_create,
+      (&profiler_pthread_, nullptr, &RunProfileSaverThread, reinterpret_cast<void*>(instance_)),
+      "Profile saver thread");
+}
+
+void ProfileSaver::Stop(bool dump_info) {
+  ProfileSaver* profile_saver = nullptr;
+  pthread_t profiler_pthread = 0U;
+
+  {
+    MutexLock profiler_mutex(Thread::Current(), *Locks::profiler_lock_);
+    VLOG(profiler) << "Stopping profile saver thread";
+    profile_saver = instance_;
+    profiler_pthread = profiler_pthread_;
+    if (instance_ == nullptr) {
+      DCHECK(false) << "Tried to stop a profile saver which was not started";
+      return;
+    }
+    if (instance_->shutting_down_) {
+      DCHECK(false) << "Tried to stop the profile saver twice";
+      return;
+    }
+    instance_->shutting_down_ = true;
+    if (dump_info) {
+      instance_->DumpInfo(LOG(INFO));
+    }
+  }
+
+  {
+    // Wake up the saver thread if it is sleeping to allow for a clean exit.
+    MutexLock wait_mutex(Thread::Current(), profile_saver->wait_lock_);
+    profile_saver->period_condition_.Signal(Thread::Current());
+  }
+
+  // Wait for the saver thread to stop.
+  CHECK_PTHREAD_CALL(pthread_join, (profiler_pthread, nullptr), "profile saver thread shutdown");
+
+  {
+    MutexLock profiler_mutex(Thread::Current(), *Locks::profiler_lock_);
+    instance_ = nullptr;
+    profiler_pthread_ = 0U;
+  }
+  delete profile_saver;
+}
+
+bool ProfileSaver::ShuttingDown(Thread* self) {
+  MutexLock mu(self, *Locks::profiler_lock_);
+  return shutting_down_;
+}
+
+bool ProfileSaver::IsStarted() {
+  MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+  return instance_ != nullptr;
+}
+
+void ProfileSaver::AddTrackedLocations(const std::string& output_filename,
+                                       const std::string& app_data_dir,
+                                       const std::vector<std::string>& code_paths) {
+  auto it = tracked_dex_base_locations_.find(output_filename);
+  if (it == tracked_dex_base_locations_.end()) {
+    tracked_dex_base_locations_.Put(output_filename,
+                                    std::set<std::string>(code_paths.begin(), code_paths.end()));
+    app_data_dirs_.insert(app_data_dir);
+  } else {
+    it->second.insert(code_paths.begin(), code_paths.end());
+  }
+}
+
+void ProfileSaver::NotifyDexUse(const std::string& dex_location) {
+  if (!ShouldProfileLocation(dex_location)) {
+    return;
+  }
+  std::set<std::string> app_code_paths;
+  std::string foreign_dex_profile_path;
+  std::set<std::string> app_data_dirs;
+  {
+    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+    if (instance_ == nullptr) {
+      return;
+    }
+    // Make a copy so that we don't hold the lock while doing I/O.
+    for (const auto& it : instance_->tracked_dex_base_locations_) {
+      app_code_paths.insert(it.second.begin(), it.second.end());
+    }
+    foreign_dex_profile_path = instance_->foreign_dex_profile_path_;
+    app_data_dirs.insert(instance_->app_data_dirs_.begin(), instance_->app_data_dirs_.end());
+  }
+
+  bool mark_created = MaybeRecordDexUseInternal(dex_location,
+                                                app_code_paths,
+                                                foreign_dex_profile_path,
+                                                app_data_dirs);
+  if (mark_created) {
+    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+    if (instance_ != nullptr) {
+      instance_->total_number_of_foreign_dex_marks_++;
+    }
+  }
+}
+
+bool ProfileSaver::MaybeRecordDexUseInternal(
+      const std::string& dex_location,
+      const std::set<std::string>& app_code_paths,
+      const std::string& foreign_dex_profile_path,
+      const std::set<std::string>& app_data_dirs) {
+  if (dex_location.empty()) {
+    LOG(WARNING) << "Asked to record foreign dex use with an empty dex location.";
+    return false;
+  }
+  if (foreign_dex_profile_path.empty()) {
+    LOG(WARNING) << "Asked to record foreign dex use without a valid profile path ";
+    return false;
+  }
+
+  UniqueCPtr<const char[]> dex_location_real_path(realpath(dex_location.c_str(), nullptr));
+  if (dex_location_real_path == nullptr) {
+    PLOG(WARNING) << "Could not get realpath for " << dex_location;
+  }
+  std::string dex_location_real_path_str((dex_location_real_path == nullptr)
+    ? dex_location.c_str()
+    : dex_location_real_path.get());
+
+  if (app_data_dirs.find(dex_location_real_path_str) != app_data_dirs.end()) {
+    // The dex location is under the application folder. Nothing to record.
+    return false;
+  }
+
+  if (app_code_paths.find(dex_location) != app_code_paths.end()) {
+    // The dex location belongs to the application code paths. Nothing to record.
+    return false;
+  }
+  // Do another round of checks with the real paths.
+  // Note that we could cache all the real locations in the saver (since it's an expensive
+  // operation). However we expect that app_code_paths is small (usually 1 element), and
+  // NotifyDexUse is called just a few times in the app lifetime. So we make the compromise
+  // to save some bytes of memory usage.
+  for (const auto& app_code_location : app_code_paths) {
+    UniqueCPtr<const char[]> real_app_code_location(realpath(app_code_location.c_str(), nullptr));
+    if (real_app_code_location == nullptr) {
+      PLOG(WARNING) << "Could not get realpath for " << app_code_location;
+    }
+    std::string real_app_code_location_str((real_app_code_location == nullptr)
+        ? app_code_location.c_str()
+        : real_app_code_location.get());
+    if (real_app_code_location_str == dex_location_real_path_str) {
+      // The dex location belongs to the application code paths. Nothing to record.
+      return false;
+    }
+  }
+
+  // For foreign dex files we record a flag on disk. PackageManager will (potentially) take this
+  // into account when deciding how to optimize the loaded dex file.
+  // The expected flag name is the canonical path of the apk where '/' is substituted to '@'.
+  // (it needs to be kept in sync with
+  // frameworks/base/services/core/java/com/android/server/pm/PackageDexOptimizer.java)
+  std::replace(dex_location_real_path_str.begin(), dex_location_real_path_str.end(), '/', '@');
+  std::string flag_path = foreign_dex_profile_path + "/" + dex_location_real_path_str;
+  // We use O_RDONLY as the access mode because we must supply some access
+  // mode, and there is no access mode that means 'create but do not read' the
+  // file. We will not not actually read from the file.
+  int fd = TEMP_FAILURE_RETRY(open(flag_path.c_str(),
+        O_CREAT | O_RDONLY | O_EXCL | O_CLOEXEC | O_NOFOLLOW, 0));
+  if (fd != -1) {
+    if (close(fd) != 0) {
+      PLOG(WARNING) << "Could not close file after flagging foreign dex use " << flag_path;
+    }
+    return true;
+  } else {
+    if (errno != EEXIST && errno != EACCES) {
+      // Another app could have already created the file, and selinux may not
+      // allow the read access to the file implied by the call to open.
+      PLOG(WARNING) << "Could not create foreign dex use mark " << flag_path;
+      return false;
+    }
+    return true;
+  }
+}
+
+void ProfileSaver::DumpInstanceInfo(std::ostream& os) {
+  MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+  if (instance_ != nullptr) {
+    instance_->DumpInfo(os);
+  }
+}
+
+void ProfileSaver::DumpInfo(std::ostream& os) {
+  os << "ProfileSaver total_bytes_written=" << total_bytes_written_ << '\n'
+     << "ProfileSaver total_number_of_writes=" << total_number_of_writes_ << '\n'
+     << "ProfileSaver total_number_of_code_cache_queries="
+     << total_number_of_code_cache_queries_ << '\n'
+     << "ProfileSaver total_number_of_skipped_writes=" << total_number_of_skipped_writes_ << '\n'
+     << "ProfileSaver total_number_of_failed_writes=" << total_number_of_failed_writes_ << '\n'
+     << "ProfileSaver total_ms_of_sleep=" << total_ms_of_sleep_ << '\n'
+     << "ProfileSaver total_ms_of_work=" << NsToMs(total_ns_of_work_) << '\n'
+     << "ProfileSaver total_number_of_foreign_dex_marks="
+     << total_number_of_foreign_dex_marks_ << '\n'
+     << "ProfileSaver max_number_profile_entries_cached="
+     << max_number_of_profile_entries_cached_ << '\n'
+     << "ProfileSaver total_number_of_hot_spikes=" << total_number_of_hot_spikes_ << '\n'
+     << "ProfileSaver total_number_of_wake_ups=" << total_number_of_wake_ups_ << '\n';
+}
+
+
+void ProfileSaver::ForceProcessProfiles() {
+  ProfileSaver* saver = nullptr;
+  {
+    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+    saver = instance_;
+  }
+  // TODO(calin): this is not actually thread safe as the instance_ may have been deleted,
+  // but we only use this in testing when we now this won't happen.
+  // Refactor the way we handle the instance so that we don't end up in this situation.
+  if (saver != nullptr) {
+    uint16_t new_methods;
+    saver->ProcessProfilingInfo(&new_methods);
+  }
+}
+
+bool ProfileSaver::HasSeenMethod(const std::string& profile,
+                                 const DexFile* dex_file,
+                                 uint16_t method_idx) {
+  MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+  if (instance_ != nullptr) {
+    ProfileCompilationInfo* info = instance_->GetCachedProfiledInfo(profile);
+    if (info != nullptr) {
+      return info->ContainsMethod(MethodReference(dex_file, method_idx));
+    }
+  }
+  return false;
+}
+
+}   // namespace art
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
new file mode 100644
index 0000000..59e2c94
--- /dev/null
+++ b/runtime/jit/profile_saver.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_PROFILE_SAVER_H_
+#define ART_RUNTIME_JIT_PROFILE_SAVER_H_
+
+#include "base/mutex.h"
+#include "jit_code_cache.h"
+#include "offline_profiling_info.h"
+#include "profile_saver_options.h"
+#include "safe_map.h"
+
+namespace art {
+
+class ProfileSaver {
+ public:
+  // Starts the profile saver thread if not already started.
+  // If the saver is already running it adds (output_filename, code_paths) to its tracked locations.
+  static void Start(const ProfileSaverOptions& options,
+                    const std::string& output_filename,
+                    jit::JitCodeCache* jit_code_cache,
+                    const std::vector<std::string>& code_paths,
+                    const std::string& foreign_dex_profile_path,
+                    const std::string& app_data_dir)
+      REQUIRES(!Locks::profiler_lock_, !wait_lock_);
+
+  // Stops the profile saver thread.
+  // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
+  static void Stop(bool dump_info_)
+      REQUIRES(!Locks::profiler_lock_, !wait_lock_)
+      NO_THREAD_SAFETY_ANALYSIS;
+
+  // Returns true if the profile saver is started.
+  static bool IsStarted() REQUIRES(!Locks::profiler_lock_);
+
+  static void NotifyDexUse(const std::string& dex_location);
+
+  // If the profile saver is running, dumps statistics to the `os`. Otherwise it does nothing.
+  static void DumpInstanceInfo(std::ostream& os);
+
+  // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
+  static void NotifyJitActivity()
+      REQUIRES(!Locks::profiler_lock_, !wait_lock_)
+      NO_THREAD_SAFETY_ANALYSIS;
+
+  // Just for testing purpose.
+  static void ForceProcessProfiles();
+  static bool HasSeenMethod(const std::string& profile,
+                            const DexFile* dex_file,
+                            uint16_t method_idx);
+
+ private:
+  ProfileSaver(const ProfileSaverOptions& options,
+               const std::string& output_filename,
+               jit::JitCodeCache* jit_code_cache,
+               const std::vector<std::string>& code_paths,
+               const std::string& foreign_dex_profile_path,
+               const std::string& app_data_dir);
+
+  // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
+  static void* RunProfileSaverThread(void* arg)
+      REQUIRES(!Locks::profiler_lock_, !wait_lock_)
+      NO_THREAD_SAFETY_ANALYSIS;
+
+  // The run loop for the saver.
+  void Run() REQUIRES(!Locks::profiler_lock_, !wait_lock_);
+  // Processes the existing profiling info from the jit code cache and returns
+  // true if it needed to be saved to disk.
+  bool ProcessProfilingInfo(uint16_t* new_methods)
+    REQUIRES(!Locks::profiler_lock_)
+    REQUIRES(!Locks::mutator_lock_);
+
+  void NotifyJitActivityInternal() REQUIRES(!wait_lock_);
+  void WakeUpSaver() REQUIRES(wait_lock_);
+
+  // Returns true if the saver is shutting down (ProfileSaver::Stop() has been called).
+  bool ShuttingDown(Thread* self) REQUIRES(!Locks::profiler_lock_);
+
+  void AddTrackedLocations(const std::string& output_filename,
+                           const std::string& app_data_dir,
+                           const std::vector<std::string>& code_paths)
+      REQUIRES(Locks::profiler_lock_);
+
+  // Retrieves the cached profile compilation info for the given profile file.
+  // If no entry exists, a new empty one will be created, added to the cache and
+  // then returned.
+  ProfileCompilationInfo* GetCachedProfiledInfo(const std::string& filename);
+  // Fetches the current resolved classes and methods from the ClassLinker and stores them in the
+  // profile_cache_ for later save.
+  void FetchAndCacheResolvedClassesAndMethods();
+
+  static bool MaybeRecordDexUseInternal(
+      const std::string& dex_location,
+      const std::set<std::string>& tracked_locations,
+      const std::string& foreign_dex_profile_path,
+      const std::set<std::string>& app_data_dirs);
+
+  void DumpInfo(std::ostream& os);
+
+  // The only instance of the saver.
+  static ProfileSaver* instance_ GUARDED_BY(Locks::profiler_lock_);
+  // Profile saver thread.
+  static pthread_t profiler_pthread_ GUARDED_BY(Locks::profiler_lock_);
+
+  jit::JitCodeCache* jit_code_cache_;
+
+  // Collection of code paths that the profiles tracks.
+  // It maps profile locations to code paths (dex base locations).
+  SafeMap<std::string, std::set<std::string>> tracked_dex_base_locations_
+      GUARDED_BY(Locks::profiler_lock_);
+  // The directory were the we should store the code paths.
+  std::string foreign_dex_profile_path_;
+
+  // A list of application directories, used to infer if a loaded dex belongs
+  // to the application or not. Multiple application data directories are possible when
+  // different apps share the same runtime.
+  std::set<std::string> app_data_dirs_ GUARDED_BY(Locks::profiler_lock_);
+
+  bool shutting_down_ GUARDED_BY(Locks::profiler_lock_);
+  uint32_t last_save_number_of_methods_;
+  uint32_t last_save_number_of_classes_;
+  uint64_t last_time_ns_saver_woke_up_ GUARDED_BY(wait_lock_);
+  uint32_t jit_activity_notifications_;
+
+  // A local cache for the profile information. Maps each tracked file to its
+  // profile information. The size of this cache is usually very small and tops
+  // to just a few hundreds entries in the ProfileCompilationInfo objects.
+  // It helps avoiding unnecessary writes to disk.
+  SafeMap<std::string, ProfileCompilationInfo> profile_cache_;
+
+  // Save period condition support.
+  Mutex wait_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  ConditionVariable period_condition_ GUARDED_BY(wait_lock_);
+
+  uint64_t total_bytes_written_;
+  uint64_t total_number_of_writes_;
+  uint64_t total_number_of_code_cache_queries_;
+  uint64_t total_number_of_skipped_writes_;
+  uint64_t total_number_of_failed_writes_;
+  uint64_t total_ms_of_sleep_;
+  uint64_t total_ns_of_work_;
+  uint64_t total_number_of_foreign_dex_marks_;
+  // TODO(calin): replace with an actual size.
+  uint64_t max_number_of_profile_entries_cached_;
+  uint64_t total_number_of_hot_spikes_;
+  uint64_t total_number_of_wake_ups_;
+
+  const ProfileSaverOptions options_;
+  DISALLOW_COPY_AND_ASSIGN(ProfileSaver);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_PROFILE_SAVER_H_
diff --git a/runtime/jit/profile_saver_options.h b/runtime/jit/profile_saver_options.h
new file mode 100644
index 0000000..a6385d7
--- /dev/null
+++ b/runtime/jit/profile_saver_options.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_PROFILE_SAVER_OPTIONS_H_
+#define ART_RUNTIME_JIT_PROFILE_SAVER_OPTIONS_H_
+
+#include <string>
+#include <ostream>
+
+namespace art {
+
+struct ProfileSaverOptions {
+ public:
+  static constexpr uint32_t kMinSavePeriodMs = 20 * 1000;  // 20 seconds
+  static constexpr uint32_t kSaveResolvedClassesDelayMs = 2 * 1000;  // 2 seconds
+  // Minimum number of JIT samples during launch to include a method into the profile.
+  static constexpr uint32_t kStartupMethodSamples = 1;
+  static constexpr uint32_t kMinMethodsToSave = 10;
+  static constexpr uint32_t kMinClassesToSave = 10;
+  static constexpr uint32_t kMinNotificationBeforeWake = 10;
+  static constexpr uint32_t kMaxNotificationBeforeWake = 50;
+
+  ProfileSaverOptions() :
+    enabled_(false),
+    min_save_period_ms_(kMinSavePeriodMs),
+    save_resolved_classes_delay_ms_(kSaveResolvedClassesDelayMs),
+    startup_method_samples_(kStartupMethodSamples),
+    min_methods_to_save_(kMinMethodsToSave),
+    min_classes_to_save_(kMinClassesToSave),
+    min_notification_before_wake_(kMinNotificationBeforeWake),
+    max_notification_before_wake_(kMaxNotificationBeforeWake) {}
+
+  ProfileSaverOptions(
+      bool enabled,
+      uint32_t min_save_period_ms,
+      uint32_t save_resolved_classes_delay_ms,
+      uint32_t startup_method_samples,
+      uint32_t min_methods_to_save,
+      uint32_t min_classes_to_save,
+      uint32_t min_notification_before_wake,
+      uint32_t max_notification_before_wake):
+    enabled_(enabled),
+    min_save_period_ms_(min_save_period_ms),
+    save_resolved_classes_delay_ms_(save_resolved_classes_delay_ms),
+    startup_method_samples_(startup_method_samples),
+    min_methods_to_save_(min_methods_to_save),
+    min_classes_to_save_(min_classes_to_save),
+    min_notification_before_wake_(min_notification_before_wake),
+    max_notification_before_wake_(max_notification_before_wake) {}
+
+  bool IsEnabled() const {
+    return enabled_;
+  }
+  void SetEnabled(bool enabled) {
+    enabled_ = enabled;
+  }
+
+  uint32_t GetMinSavePeriodMs() const {
+    return min_save_period_ms_;
+  }
+  uint32_t GetSaveResolvedClassesDelayMs() const {
+    return save_resolved_classes_delay_ms_;
+  }
+  uint32_t GetStartupMethodSamples() const {
+    return startup_method_samples_;
+  }
+  uint32_t GetMinMethodsToSave() const {
+    return min_methods_to_save_;
+  }
+  uint32_t GetMinClassesToSave() const {
+    return min_classes_to_save_;
+  }
+  uint32_t GetMinNotificationBeforeWake() const {
+    return min_notification_before_wake_;
+  }
+  uint32_t GetMaxNotificationBeforeWake() const {
+    return max_notification_before_wake_;
+  }
+
+  friend std::ostream & operator<<(std::ostream &os, const ProfileSaverOptions& pso) {
+    os << "enabled_" << pso.enabled_
+        << ", min_save_period_ms_" << pso.min_save_period_ms_
+        << ", save_resolved_classes_delay_ms_" << pso.save_resolved_classes_delay_ms_
+        << ", startup_method_samples_" << pso.startup_method_samples_
+        << ", min_methods_to_save_" << pso.min_methods_to_save_
+        << ", min_classes_to_save_" << pso.min_classes_to_save_
+        << ", min_notification_before_wake_" << pso.min_notification_before_wake_
+        << ", max_notification_before_wake_" << pso.max_notification_before_wake_;
+    return os;
+  }
+
+  bool enabled_;
+  uint32_t min_save_period_ms_;
+  uint32_t save_resolved_classes_delay_ms_;
+  uint32_t startup_method_samples_;
+  uint32_t min_methods_to_save_;
+  uint32_t min_classes_to_save_;
+  uint32_t min_notification_before_wake_;
+  uint32_t max_notification_before_wake_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_PROFILE_SAVER_OPTIONS_H_
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 0c039f2..216df2f 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -25,18 +25,36 @@
 
 namespace art {
 
-ProfilingInfo* ProfilingInfo::Create(ArtMethod* method) {
+ProfilingInfo::ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
+      : number_of_inline_caches_(entries.size()),
+        method_(method),
+        is_method_being_compiled_(false),
+        is_osr_method_being_compiled_(false),
+        current_inline_uses_(0),
+        saved_entry_point_(nullptr) {
+  memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
+  for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+    cache_[i].dex_pc_ = entries[i];
+  }
+  if (method->IsCopied()) {
+    // GetHoldingClassOfCopiedMethod is expensive, but creating a profiling info for a copied method
+    // appears to happen very rarely in practice.
+    holding_class_ = GcRoot<mirror::Class>(
+        Runtime::Current()->GetClassLinker()->GetHoldingClassOfCopiedMethod(method));
+  } else {
+    holding_class_ = GcRoot<mirror::Class>(method->GetDeclaringClass());
+  }
+  DCHECK(!holding_class_.IsNull());
+}
+
+bool ProfilingInfo::Create(Thread* self, ArtMethod* method, bool retry_allocation) {
   // Walk over the dex instructions of the method and keep track of
   // instructions we are interested in profiling.
-  const uint16_t* code_ptr = nullptr;
-  const uint16_t* code_end = nullptr;
-  {
-    ScopedObjectAccess soa(Thread::Current());
-    DCHECK(!method->IsNative());
-    const DexFile::CodeItem& code_item = *method->GetCodeItem();
-    code_ptr = code_item.insns_;
-    code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
-  }
+  DCHECK(!method->IsNative());
+
+  const DexFile::CodeItem& code_item = *method->GetCodeItem();
+  const uint16_t* code_ptr = code_item.insns_;
+  const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
 
   uint32_t dex_pc = 0;
   std::vector<uint32_t> entries;
@@ -59,39 +77,31 @@
     code_ptr += instruction.SizeInCodeUnits();
   }
 
-  // If there is no instruction we are interested in, no need to create a `ProfilingInfo`
-  // object, it will never be filled.
-  if (entries.empty()) {
-    return nullptr;
-  }
+  // We always create a `ProfilingInfo` object, even if there is no instruction we are
+  // interested in. The JIT code cache internally uses it.
 
   // Allocate the `ProfilingInfo` object int the JIT's data space.
   jit::JitCodeCache* code_cache = Runtime::Current()->GetJit()->GetCodeCache();
-  size_t profile_info_size = sizeof(ProfilingInfo) + sizeof(InlineCache) * entries.size();
-  uint8_t* data = code_cache->ReserveData(Thread::Current(), profile_info_size);
-
-  if (data == nullptr) {
-    VLOG(jit) << "Cannot allocate profiling info anymore";
-    return nullptr;
-  }
-
-  return new (data) ProfilingInfo(entries);
+  return code_cache->AddProfilingInfo(self, method, entries, retry_allocation) != nullptr;
 }
 
-void ProfilingInfo::AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* cls) {
+InlineCache* ProfilingInfo::GetInlineCache(uint32_t dex_pc) {
   InlineCache* cache = nullptr;
   // TODO: binary search if array is too long.
   for (size_t i = 0; i < number_of_inline_caches_; ++i) {
-    if (cache_[i].dex_pc == dex_pc) {
+    if (cache_[i].dex_pc_ == dex_pc) {
       cache = &cache_[i];
       break;
     }
   }
-  DCHECK(cache != nullptr);
+  return cache;
+}
 
-  ScopedObjectAccess soa(self);
+void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) {
+  InlineCache* cache = GetInlineCache(dex_pc);
+  CHECK(cache != nullptr) << PrettyMethod(method_) << "@" << dex_pc;
   for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
-    mirror::Class* existing = cache->classes_[i].Read<kWithoutReadBarrier>();
+    mirror::Class* existing = cache->classes_[i].Read();
     if (existing == cls) {
       // Receiver type is already in the cache, nothing else to do.
       return;
@@ -106,12 +116,20 @@
         --i;
       } else {
         // We successfully set `cls`, just return.
+        // Since the instrumentation is marked from the declaring class we need to mark the card so
+        // that mod-union tables and card rescanning know about the update.
+        // Note that the declaring class is not necessarily the holding class if the method is
+        // copied. We need the card mark to be in the holding class since that is from where we
+        // will visit the profiling info.
+        if (!holding_class_.IsNull()) {
+          Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(holding_class_.Read());
+        }
         return;
       }
     }
   }
-  // Unsuccessfull - cache is full, making it megamorphic.
-  DCHECK(cache->IsMegamorphic());
+  // Unsuccessfull - cache is full, making it megamorphic. We do not DCHECK it though,
+  // as the garbage collector might clear the entries concurrently.
 }
 
 }  // namespace art
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index 73ca41a..a890fbb 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -25,25 +25,87 @@
 namespace art {
 
 class ArtMethod;
+class ProfilingInfo;
+
+namespace jit {
+class JitCodeCache;
+}
 
 namespace mirror {
 class Class;
 }
 
+// Structure to store the classes seen at runtime for a specific instruction.
+// Once the classes_ array is full, we consider the INVOKE to be megamorphic.
+class InlineCache {
+ public:
+  bool IsMonomorphic() const {
+    DCHECK_GE(kIndividualCacheSize, 2);
+    return !classes_[0].IsNull() && classes_[1].IsNull();
+  }
+
+  bool IsMegamorphic() const {
+    for (size_t i = 0; i < kIndividualCacheSize; ++i) {
+      if (classes_[i].IsNull()) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  mirror::Class* GetMonomorphicType() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    // Note that we cannot ensure the inline cache is actually monomorphic
+    // at this point, as other threads may have updated it.
+    DCHECK(!classes_[0].IsNull());
+    return classes_[0].Read();
+  }
+
+  bool IsUninitialized() const {
+    return classes_[0].IsNull();
+  }
+
+  bool IsPolymorphic() const {
+    DCHECK_GE(kIndividualCacheSize, 3);
+    return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
+  }
+
+  mirror::Class* GetTypeAt(size_t i) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return classes_[i].Read();
+  }
+
+  static constexpr uint16_t kIndividualCacheSize = 5;
+
+ private:
+  uint32_t dex_pc_;
+  GcRoot<mirror::Class> classes_[kIndividualCacheSize];
+
+  friend class ProfilingInfo;
+
+  DISALLOW_COPY_AND_ASSIGN(InlineCache);
+};
+
 /**
  * Profiling info for a method, created and filled by the interpreter once the
  * method is warm, and used by the compiler to drive optimizations.
  */
 class ProfilingInfo {
  public:
-  static ProfilingInfo* Create(ArtMethod* method);
+  // Create a ProfilingInfo for 'method'. Return whether it succeeded, or if it is
+  // not needed in case the method does not have virtual/interface invocations.
+  static bool Create(Thread* self, ArtMethod* method, bool retry_allocation)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Add information from an executed INVOKE instruction to the profile.
-  void AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* cls);
+  void AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls)
+      // Method should not be interruptible, as it manipulates the ProfilingInfo
+      // which can be concurrently collected.
+      REQUIRES(Roles::uninterruptible_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires.
   template<typename RootVisitorType>
   void VisitRoots(RootVisitorType& visitor) NO_THREAD_SAFETY_ANALYSIS {
+    visitor.VisitRootIfNonNull(holding_class_.AddressWithoutBarrier());
     for (size_t i = 0; i < number_of_inline_caches_; ++i) {
       InlineCache* cache = &cache_[i];
       for (size_t j = 0; j < InlineCache::kIndividualCacheSize; ++j) {
@@ -52,52 +114,89 @@
     }
   }
 
- private:
-  // Structure to store the classes seen at runtime for a specific instruction.
-  // Once the classes_ array is full, we consider the INVOKE to be megamorphic.
-  struct InlineCache {
-    bool IsMonomorphic() const {
-      DCHECK_GE(kIndividualCacheSize, 2);
-      return !classes_[0].IsNull() && classes_[1].IsNull();
-    }
+  ArtMethod* GetMethod() const {
+    return method_;
+  }
 
-    bool IsMegamorphic() const {
-      for (size_t i = 0; i < kIndividualCacheSize; ++i) {
-        if (classes_[i].IsNull()) {
-          return false;
-        }
-      }
-      return true;
-    }
+  InlineCache* GetInlineCache(uint32_t dex_pc);
 
-    bool IsUnitialized() const {
-      return classes_[0].IsNull();
-    }
+  bool IsMethodBeingCompiled(bool osr) const {
+    return osr
+        ? is_osr_method_being_compiled_
+        : is_method_being_compiled_;
+  }
 
-    bool IsPolymorphic() const {
-      DCHECK_GE(kIndividualCacheSize, 3);
-      return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
-    }
-
-    static constexpr uint16_t kIndividualCacheSize = 5;
-    uint32_t dex_pc;
-    GcRoot<mirror::Class> classes_[kIndividualCacheSize];
-  };
-
-  explicit ProfilingInfo(const std::vector<uint32_t>& entries)
-      : number_of_inline_caches_(entries.size()) {
-    memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
-    for (size_t i = 0; i < number_of_inline_caches_; ++i) {
-      cache_[i].dex_pc = entries[i];
+  void SetIsMethodBeingCompiled(bool value, bool osr) {
+    if (osr) {
+      is_osr_method_being_compiled_ = value;
+    } else {
+      is_method_being_compiled_ = value;
     }
   }
 
+  void SetSavedEntryPoint(const void* entry_point) {
+    saved_entry_point_ = entry_point;
+  }
+
+  const void* GetSavedEntryPoint() const {
+    return saved_entry_point_;
+  }
+
+  void ClearGcRootsInInlineCaches() {
+    for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+      InlineCache* cache = &cache_[i];
+      memset(&cache->classes_[0],
+             0,
+             InlineCache::kIndividualCacheSize * sizeof(GcRoot<mirror::Class>));
+    }
+  }
+
+  void IncrementInlineUse() {
+    DCHECK_NE(current_inline_uses_, std::numeric_limits<uint16_t>::max());
+    current_inline_uses_++;
+  }
+
+  void DecrementInlineUse() {
+    DCHECK_GT(current_inline_uses_, 0);
+    current_inline_uses_--;
+  }
+
+  bool IsInUseByCompiler() const {
+    return IsMethodBeingCompiled(/*osr*/ true) || IsMethodBeingCompiled(/*osr*/ false) ||
+        (current_inline_uses_ > 0);
+  }
+
+ private:
+  ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries);
+
   // Number of instructions we are profiling in the ArtMethod.
   const uint32_t number_of_inline_caches_;
 
+  // Method this profiling info is for.
+  ArtMethod* const method_;
+
+  // Holding class for the method in case method is a copied method.
+  GcRoot<mirror::Class> holding_class_;
+
+  // Whether the ArtMethod is currently being compiled. This flag
+  // is implicitly guarded by the JIT code cache lock.
+  // TODO: Make the JIT code cache lock global.
+  bool is_method_being_compiled_;
+  bool is_osr_method_being_compiled_;
+
+  // When the compiler inlines the method associated to this ProfilingInfo,
+  // it updates this counter so that the GC does not try to clear the inline caches.
+  uint16_t current_inline_uses_;
+
+  // Entry point of the corresponding ArtMethod, while the JIT code cache
+  // is poking for the liveness of compiled code.
+  const void* saved_entry_point_;
+
   // Dynamically allocated array of size `number_of_inline_caches_`.
   InlineCache cache_[0];
 
+  friend class jit::JitCodeCache;
+
   DISALLOW_COPY_AND_ASSIGN(ProfilingInfo);
 };
 
diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc
index dab1040..40efc89 100644
--- a/runtime/jni_env_ext.cc
+++ b/runtime/jni_env_ext.cc
@@ -45,6 +45,20 @@
   return in->locals.IsValid();
 }
 
+jint JNIEnvExt::GetEnvHandler(JavaVMExt* vm, /*out*/void** env, jint version) {
+  UNUSED(vm);
+  // GetEnv always returns a JNIEnv* for the most current supported JNI version,
+  // and unlike other calls that take a JNI version doesn't care if you supply
+  // JNI_VERSION_1_1, which we don't otherwise support.
+  if (JavaVMExt::IsBadJniVersion(version) && version != JNI_VERSION_1_1) {
+    return JNI_EVERSION;
+  }
+  Thread* thread = Thread::Current();
+  CHECK(thread != nullptr);
+  *env = thread->GetJniEnv();
+  return JNI_OK;
+}
+
 JNIEnvExt* JNIEnvExt::Create(Thread* self_in, JavaVMExt* vm_in) {
   std::unique_ptr<JNIEnvExt> ret(new JNIEnvExt(self_in, vm_in));
   if (CheckLocalsValid(ret.get())) {
@@ -59,6 +73,7 @@
       local_ref_cookie(IRT_FIRST_SEGMENT),
       locals(kLocalsInitial, kLocalsMax, kLocal, false),
       check_jni(false),
+      runtime_deleted(false),
       critical(0),
       monitors("monitors", kMonitorsInitial, kMonitorsMax) {
   functions = unchecked_functions = GetJniNativeInterface();
@@ -67,6 +82,11 @@
   }
 }
 
+void JNIEnvExt::SetFunctionsToRuntimeShutdownFunctions() {
+  functions = GetRuntimeShutdownNativeInterface();
+  runtime_deleted = true;
+}
+
 JNIEnvExt::~JNIEnvExt() {
 }
 
@@ -105,9 +125,32 @@
   stacked_local_ref_cookies.pop_back();
 }
 
-Offset JNIEnvExt::SegmentStateOffset() {
-  return Offset(OFFSETOF_MEMBER(JNIEnvExt, locals) +
-                IndirectReferenceTable::SegmentStateOffset().Int32Value());
+// Note: the offset code is brittle, as we can't use OFFSETOF_MEMBER or offsetof easily. Thus, there
+//       are tests in jni_internal_test to match the results against the actual values.
+
+// This is encoding the knowledge of the structure and layout of JNIEnv fields.
+static size_t JNIEnvSize(size_t pointer_size) {
+  // A single pointer.
+  return pointer_size;
+}
+
+Offset JNIEnvExt::SegmentStateOffset(size_t pointer_size) {
+  size_t locals_offset = JNIEnvSize(pointer_size) +
+                         2 * pointer_size +          // Thread* self + JavaVMExt* vm.
+                         4 +                         // local_ref_cookie.
+                         (pointer_size - 4);         // Padding.
+  size_t irt_segment_state_offset =
+      IndirectReferenceTable::SegmentStateOffset(pointer_size).Int32Value();
+  return Offset(locals_offset + irt_segment_state_offset);
+}
+
+Offset JNIEnvExt::LocalRefCookieOffset(size_t pointer_size) {
+  return Offset(JNIEnvSize(pointer_size) +
+                2 * pointer_size);          // Thread* self + JavaVMExt* vm
+}
+
+Offset JNIEnvExt::SelfOffset(size_t pointer_size) {
+  return Offset(JNIEnvSize(pointer_size));
 }
 
 // Use some defining part of the caller's frame as the identifying mark for the JNI segment.
diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h
index 3828ff0..ac287d4 100644
--- a/runtime/jni_env_ext.h
+++ b/runtime/jni_env_ext.h
@@ -50,15 +50,11 @@
   T AddLocalReference(mirror::Object* obj)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static Offset SegmentStateOffset();
+  static Offset SegmentStateOffset(size_t pointer_size);
+  static Offset LocalRefCookieOffset(size_t pointer_size);
+  static Offset SelfOffset(size_t pointer_size);
 
-  static Offset LocalRefCookieOffset() {
-    return Offset(OFFSETOF_MEMBER(JNIEnvExt, local_ref_cookie));
-  }
-
-  static Offset SelfOffset() {
-    return Offset(OFFSETOF_MEMBER(JNIEnvExt, self));
-  }
+  static jint GetEnvHandler(JavaVMExt* vm, /*out*/void** out, jint version);
 
   jobject NewLocalRef(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
   void DeleteLocalRef(jobject obj) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -80,6 +76,9 @@
   // Frequently-accessed fields cached from JavaVM.
   bool check_jni;
 
+  // If we are a JNI env for a daemon thread with a deleted runtime.
+  bool runtime_deleted;
+
   // How many nested "critical" JNI calls are we in?
   int critical;
 
@@ -101,6 +100,9 @@
   // Check that no monitors are held that have been acquired in this JNI "segment."
   void CheckNoHeldMonitors() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Set the functions to the runtime shutdown functions.
+  void SetFunctionsToRuntimeShutdownFunctions();
+
  private:
   // The constructor should not be called directly. It may leave the object in an erronuous state,
   // and the result needs to be checked.
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 234a733..7bcadd8 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -27,6 +27,7 @@
 #include "art_method-inl.h"
 #include "atomic.h"
 #include "base/allocator.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/mutex.h"
 #include "base/stl_util.h"
@@ -301,13 +302,13 @@
     CHECK_NON_NULL_ARGUMENT_FN_NAME(__FUNCTION__, value, return_val)
 
 #define CHECK_NON_NULL_ARGUMENT_FN_NAME(name, value, return_val) \
-  if (UNLIKELY(value == nullptr)) { \
+  if (UNLIKELY((value) == nullptr)) { \
     JavaVmExtFromEnv(env)->JniAbortF(name, #value " == null"); \
     return return_val; \
   }
 
 #define CHECK_NON_NULL_MEMCPY_ARGUMENT(length, value) \
-  if (UNLIKELY(length != 0 && value == nullptr)) { \
+  if (UNLIKELY((length) != 0 && (value) == nullptr)) { \
     JavaVmExtFromEnv(env)->JniAbortF(__FUNCTION__, #value " == null"); \
     return; \
   }
@@ -316,12 +317,7 @@
 static ArtMethod* FindMethod(mirror::Class* c, const StringPiece& name, const StringPiece& sig)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-  for (auto& method : c->GetDirectMethods(pointer_size)) {
-    if (kNative == method.IsNative() && name == method.GetName() && method.GetSignature() == sig) {
-      return &method;
-    }
-  }
-  for (auto& method : c->GetVirtualMethods(pointer_size)) {
+  for (auto& method : c->GetMethods(pointer_size)) {
     if (kNative == method.IsNative() && name == method.GetName() && method.GetSignature() == sig) {
       return &method;
     }
@@ -380,10 +376,12 @@
     ScopedObjectAccess soa(env);
     ArtMethod* m = soa.DecodeMethod(mid);
     mirror::AbstractMethod* method;
+    DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
+    DCHECK(!Runtime::Current()->IsActiveTransaction());
     if (m->IsConstructor()) {
-      method = mirror::Constructor::CreateFromArtMethod(soa.Self(), m);
+      method = mirror::Constructor::CreateFromArtMethod<kRuntimePointerSize, false>(soa.Self(), m);
     } else {
-      method = mirror::Method::CreateFromArtMethod(soa.Self(), m);
+      method = mirror::Method::CreateFromArtMethod<kRuntimePointerSize, false>(soa.Self(), m);
     }
     return soa.AddLocalReference<jobject>(method);
   }
@@ -392,7 +390,8 @@
     CHECK_NON_NULL_ARGUMENT(fid);
     ScopedObjectAccess soa(env);
     ArtField* f = soa.DecodeField(fid);
-    return soa.AddLocalReference<jobject>(mirror::Field::CreateFromArtField(soa.Self(), f, true));
+    return soa.AddLocalReference<jobject>(
+        mirror::Field::CreateFromArtField<kRuntimePointerSize>(soa.Self(), f, true));
   }
 
   static jclass GetObjectClass(JNIEnv* env, jobject java_object) {
@@ -593,9 +592,8 @@
     }
     if (c->IsStringClass()) {
       gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
-      mirror::SetStringCountVisitor visitor(0);
-      return soa.AddLocalReference<jobject>(mirror::String::Alloc<true>(soa.Self(), 0,
-                                                                        allocator_type, visitor));
+      return soa.AddLocalReference<jobject>(mirror::String::AllocEmptyString<true>(soa.Self(),
+                                                                              allocator_type));
     }
     return soa.AddLocalReference<jobject>(c->AllocObject(soa.Self()));
   }
@@ -1670,12 +1668,18 @@
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    if (start < 0 || length < 0 || start + length > s->GetLength()) {
+    if (start < 0 || length < 0 || length > s->GetLength() - start) {
       ThrowSIOOBE(soa, start, length, s->GetLength());
     } else {
       CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
-      const jchar* chars = s->GetValue();
-      memcpy(buf, chars + start, length * sizeof(jchar));
+      if (s->IsCompressed()) {
+        for (int i = 0; i < length; ++i) {
+          buf[i] = static_cast<jchar>(s->CharAt(start+i));
+        }
+      } else {
+        const jchar* chars = static_cast<jchar*>(s->GetValue());
+        memcpy(buf, chars + start, length * sizeof(jchar));
+      }
     }
   }
 
@@ -1684,12 +1688,19 @@
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    if (start < 0 || length < 0 || start + length > s->GetLength()) {
+    if (start < 0 || length < 0 || length > s->GetLength() - start) {
       ThrowSIOOBE(soa, start, length, s->GetLength());
     } else {
       CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
-      const jchar* chars = s->GetValue();
-      ConvertUtf16ToModifiedUtf8(buf, chars + start, length);
+      if (s->IsCompressed()) {
+        for (int i = 0; i < length; ++i) {
+          buf[i] = s->CharAt(start+i);
+        }
+      } else {
+        const jchar* chars = s->GetValue();
+        size_t bytes = CountUtf8Bytes(chars + start, length);
+        ConvertUtf16ToModifiedUtf8(buf, bytes, chars + start, length);
+      }
     }
   }
 
@@ -1698,9 +1709,16 @@
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    if (heap->IsMovableObject(s)) {
+    if (heap->IsMovableObject(s) || s->IsCompressed()) {
       jchar* chars = new jchar[s->GetLength()];
-      memcpy(chars, s->GetValue(), sizeof(jchar) * s->GetLength());
+      if (s->IsCompressed()) {
+        int32_t length = s->GetLength();
+        for (int i = 0; i < length; ++i) {
+          chars[i] = s->CharAt(i);
+        }
+      } else {
+        memcpy(chars, s->GetValue(), sizeof(jchar) * s->GetLength());
+      }
       if (is_copy != nullptr) {
         *is_copy = JNI_TRUE;
       }
@@ -1716,7 +1734,7 @@
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    if (chars != s->GetValue()) {
+    if (s->IsCompressed() || (s->IsCompressed() == false && chars != s->GetValue())) {
       delete[] chars;
     }
   }
@@ -1737,15 +1755,27 @@
         heap->IncrementDisableThreadFlip(soa.Self());
       }
     }
-    if (is_copy != nullptr) {
-      *is_copy = JNI_FALSE;
+    if (s->IsCompressed()) {
+      if (is_copy != nullptr) {
+        *is_copy = JNI_TRUE;
+      }
+      int32_t length = s->GetLength();
+      jchar* chars = new jchar[length];
+      for (int i = 0; i < length; ++i) {
+        chars[i] = s->CharAt(i);
+      }
+      return chars;
+    } else {
+      if (is_copy != nullptr) {
+        *is_copy = JNI_FALSE;
+      }
+      return static_cast<jchar*>(s->GetValue());
     }
-    return static_cast<jchar*>(s->GetValue());
   }
 
   static void ReleaseStringCritical(JNIEnv* env,
                                     jstring java_string,
-                                    const jchar* chars ATTRIBUTE_UNUSED) {
+                                    const jchar* chars) {
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
     ScopedObjectAccess soa(env);
     gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -1757,6 +1787,9 @@
         heap->DecrementDisableThreadFlip(soa.Self());
       }
     }
+    if (s->IsCompressed() || (s->IsCompressed() == false && s->GetValue() != chars)) {
+      delete[] chars;
+    }
   }
 
   static const char* GetStringUTFChars(JNIEnv* env, jstring java_string, jboolean* is_copy) {
@@ -1771,8 +1804,14 @@
     size_t byte_count = s->GetUtfLength();
     char* bytes = new char[byte_count + 1];
     CHECK(bytes != nullptr);  // bionic aborts anyway.
-    const uint16_t* chars = s->GetValue();
-    ConvertUtf16ToModifiedUtf8(bytes, chars, s->GetLength());
+    if (s->IsCompressed()) {
+      for (size_t i = 0; i < byte_count; ++i) {
+        bytes[i] = s->CharAt(i);
+      }
+    } else {
+      const uint16_t* chars = s->GetValue();
+      ConvertUtf16ToModifiedUtf8(bytes, byte_count, chars, s->GetLength());
+    }
     bytes[byte_count] = '\0';
     return bytes;
   }
@@ -2205,6 +2244,7 @@
 
       VLOG(jni) << "[Registering JNI native method " << PrettyMethod(m) << "]";
 
+      is_fast = is_fast || m->IsFastNative();  // Merge with @FastNative state.
       m->RegisterNative(fnPtr, is_fast);
     }
     return JNI_OK;
@@ -2219,13 +2259,7 @@
 
     size_t unregistered_count = 0;
     auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-    for (auto& m : c->GetDirectMethods(pointer_size)) {
-      if (m.IsNative()) {
-        m.UnregisterNative();
-        unregistered_count++;
-      }
-    }
-    for (auto& m : c->GetVirtualMethods(pointer_size)) {
+    for (auto& m : c->GetMethods(pointer_size)) {
       if (m.IsNative()) {
         m.UnregisterNative();
         unregistered_count++;
@@ -2429,8 +2463,6 @@
     gc::Heap* heap = Runtime::Current()->GetHeap();
     bool is_copy = array_data != elements;
     size_t bytes = array->GetLength() * component_size;
-    VLOG(heap) << "Release primitive array " << soa.Env() << " array_data " << array_data
-               << " elements " << elements;
     if (is_copy) {
       // Sanity check: If elements is not the same as the java array's data, it better not be a
       // heap address. TODO: This might be slow to check, may be worth keeping track of which
@@ -2473,7 +2505,7 @@
                                                               "GetPrimitiveArrayRegion",
                                                               "get region of");
     if (array != nullptr) {
-      if (start < 0 || length < 0 || start + length > array->GetLength()) {
+      if (start < 0 || length < 0 || length > array->GetLength() - start) {
         ThrowAIOOBE(soa, array, start, length, "src");
       } else {
         CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
@@ -2493,7 +2525,7 @@
                                                               "SetPrimitiveArrayRegion",
                                                               "set region of");
     if (array != nullptr) {
-      if (start < 0 || length < 0 || start + length > array->GetLength()) {
+      if (start < 0 || length < 0 || length > array->GetLength() - start) {
         ThrowAIOOBE(soa, array, start, length, "dst");
       } else {
         CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
@@ -2744,6 +2776,246 @@
   return &gJniNativeInterface;
 }
 
+void (*gJniSleepForeverStub[])()  = {
+  nullptr,  // reserved0.
+  nullptr,  // reserved1.
+  nullptr,  // reserved2.
+  nullptr,  // reserved3.
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+};
+
+const JNINativeInterface* GetRuntimeShutdownNativeInterface() {
+  return reinterpret_cast<JNINativeInterface*>(&gJniSleepForeverStub);
+}
+
 void RegisterNativeMethods(JNIEnv* env, const char* jni_class_name, const JNINativeMethod* methods,
                            jint method_count) {
   ScopedLocalRef<jclass> c(env, env->FindClass(jni_class_name));
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index 48b10f5..b829934 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -24,12 +24,20 @@
 #define NATIVE_METHOD(className, functionName, signature) \
   { #functionName, signature, reinterpret_cast<void*>(className ## _ ## functionName) }
 #endif
+
+// TODO: Can we do a better job of supporting overloading ?
+#ifndef OVERLOADED_NATIVE_METHOD
+#define OVERLOADED_NATIVE_METHOD(className, functionName, signature, identifier) \
+    { #functionName, signature, reinterpret_cast<void*>(className ## _ ## identifier) }
+#endif
+
 #define REGISTER_NATIVE_METHODS(jni_class_name) \
   RegisterNativeMethods(env, jni_class_name, gMethods, arraysize(gMethods))
 
 namespace art {
 
 const JNINativeInterface* GetJniNativeInterface();
+const JNINativeInterface* GetRuntimeShutdownNativeInterface();
 
 // Similar to RegisterNatives except its passed a descriptor for a class name and failures are
 // fatal.
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 41b368e..6495474 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -18,7 +18,9 @@
 
 #include "art_method-inl.h"
 #include "common_compiler_test.h"
+#include "indirect_reference_table.h"
 #include "java_vm_ext.h"
+#include "jni_env_ext.h"
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
@@ -878,8 +880,15 @@
   ASSERT_NE(fid2, nullptr);
   // Make sure we can actually use it.
   jstring s = env_->NewStringUTF("poop");
-  ASSERT_EQ(4, env_->GetIntField(s, fid2));
-
+  if (mirror::kUseStringCompression) {
+    // Negative because s is compressed (first bit is 1)
+    ASSERT_EQ(-2147483644, env_->GetIntField(s, fid2));
+    // Create incompressible string
+    jstring s_16 = env_->NewStringUTF("\u0444\u0444");
+    ASSERT_EQ(2, env_->GetIntField(s_16, fid2));
+  } else {
+    ASSERT_EQ(4, env_->GetIntField(s, fid2));
+  }
   // Bad arguments.
   GetFromReflectedField_ToReflectedFieldBadArgumentTest(false);
   GetFromReflectedField_ToReflectedFieldBadArgumentTest(true);
@@ -1077,6 +1086,12 @@
   env_->set_region_fn(a, size - 1, size, nullptr); \
   ExpectException(aioobe_); \
   \
+  /* Regression test against integer overflow in range check. */ \
+  env_->get_region_fn(a, 0x7fffffff, 0x7fffffff, nullptr); \
+  ExpectException(aioobe_); \
+  env_->set_region_fn(a, 0x7fffffff, 0x7fffffff, nullptr); \
+  ExpectException(aioobe_); \
+  \
   /* It's okay for the buffer to be null as long as the length is 0. */ \
   env_->get_region_fn(a, 2, 0, nullptr); \
   /* Even if the offset is invalid... */ \
@@ -1507,6 +1522,9 @@
   ExpectException(sioobe_);
   env_->GetStringRegion(s, 10, 1, nullptr);
   ExpectException(sioobe_);
+  // Regression test against integer overflow in range check.
+  env_->GetStringRegion(s, 0x7fffffff, 0x7fffffff, nullptr);
+  ExpectException(sioobe_);
 
   jchar chars[4] = { 'x', 'x', 'x', 'x' };
   env_->GetStringRegion(s, 1, 2, &chars[1]);
@@ -1529,6 +1547,9 @@
   ExpectException(sioobe_);
   env_->GetStringUTFRegion(s, 10, 1, nullptr);
   ExpectException(sioobe_);
+  // Regression test against integer overflow in range check.
+  env_->GetStringUTFRegion(s, 0x7fffffff, 0x7fffffff, nullptr);
+  ExpectException(sioobe_);
 
   char bytes[4] = { 'x', 'x', 'x', 'x' };
   env_->GetStringUTFRegion(s, 1, 2, &bytes[1]);
@@ -1618,13 +1639,28 @@
 
   jboolean is_copy = JNI_TRUE;
   chars = env_->GetStringCritical(s, &is_copy);
-  EXPECT_EQ(JNI_FALSE, is_copy);
+  if (mirror::kUseStringCompression) {
+    // is_copy has to be JNI_TRUE because "hello" is all-ASCII
+    EXPECT_EQ(JNI_TRUE, is_copy);
+  } else {
+    EXPECT_EQ(JNI_FALSE, is_copy);
+  }
   EXPECT_EQ(expected[0], chars[0]);
   EXPECT_EQ(expected[1], chars[1]);
   EXPECT_EQ(expected[2], chars[2]);
   EXPECT_EQ(expected[3], chars[3]);
   EXPECT_EQ(expected[4], chars[4]);
   env_->ReleaseStringCritical(s, chars);
+
+  if (mirror::kUseStringCompression) {
+    // is_copy has to be JNI_FALSE because "\xed\xa0\x81\xed\xb0\x80" is incompressible
+    jboolean is_copy_16 = JNI_TRUE;
+    jstring s_16 = env_->NewStringUTF("\xed\xa0\x81\xed\xb0\x80");
+    chars = env_->GetStringCritical(s_16, &is_copy_16);
+    EXPECT_EQ(2, env_->GetStringLength(s_16));
+    EXPECT_EQ(4, env_->GetStringUTFLength(s_16));
+    env_->ReleaseStringCritical(s_16, chars);
+  }
 }
 
 TEST_F(JniInternalTest, GetObjectArrayElement_SetObjectArrayElement) {
@@ -2077,8 +2113,7 @@
   MakeExecutable(nullptr, "java.lang.Class");
   MakeExecutable(nullptr, "java.lang.Object");
   MakeExecutable(nullptr, "java.nio.DirectByteBuffer");
-  MakeExecutable(nullptr, "java.nio.MemoryBlock");
-  MakeExecutable(nullptr, "java.nio.MemoryBlock$UnmanagedBlock");
+  MakeExecutable(nullptr, "java.nio.Bits");
   MakeExecutable(nullptr, "java.nio.MappedByteBuffer");
   MakeExecutable(nullptr, "java.nio.ByteBuffer");
   MakeExecutable(nullptr, "java.nio.Buffer");
@@ -2198,4 +2233,92 @@
   check_jni_abort_catcher.Check("Still holding a locked object on JNI end");
 }
 
+static bool IsLocked(JNIEnv* env, jobject jobj) {
+  ScopedObjectAccess soa(env);
+  LockWord lock_word = soa.Decode<mirror::Object*>(jobj)->GetLockWord(true);
+  switch (lock_word.GetState()) {
+    case LockWord::kHashCode:
+    case LockWord::kUnlocked:
+      return false;
+    case LockWord::kThinLocked:
+      return true;
+    case LockWord::kFatLocked:
+      return lock_word.FatLockMonitor()->IsLocked();
+    default: {
+      LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
+      UNREACHABLE();
+    }
+  }
+}
+
+TEST_F(JniInternalTest, DetachThreadUnlockJNIMonitors) {
+  // We need to lock an object, detach, reattach, and check the locks.
+  //
+  // As re-attaching will create a different thread, we need to use a global
+  // ref to keep the object around.
+
+  // Create an object to torture.
+  jobject global_ref;
+  {
+    jclass object_class = env_->FindClass("java/lang/Object");
+    ASSERT_NE(object_class, nullptr);
+    jobject object = env_->AllocObject(object_class);
+    ASSERT_NE(object, nullptr);
+    global_ref = env_->NewGlobalRef(object);
+  }
+
+  // Lock it.
+  env_->MonitorEnter(global_ref);
+  ASSERT_TRUE(IsLocked(env_, global_ref));
+
+  // Detach and re-attach.
+  jint detach_result = vm_->DetachCurrentThread();
+  ASSERT_EQ(detach_result, JNI_OK);
+  jint attach_result = vm_->AttachCurrentThread(&env_, nullptr);
+  ASSERT_EQ(attach_result, JNI_OK);
+
+  // Look at the global ref, check whether it's still locked.
+  ASSERT_FALSE(IsLocked(env_, global_ref));
+
+  // Delete the global ref.
+  env_->DeleteGlobalRef(global_ref);
+}
+
+// Test the offset computation of IndirectReferenceTable offsets. b/26071368.
+TEST_F(JniInternalTest, IndirectReferenceTableOffsets) {
+  // The segment_state_ field is private, and we want to avoid friend declaration. So we'll check
+  // by modifying memory.
+  // The parameters don't really matter here.
+  IndirectReferenceTable irt(5, 5, IndirectRefKind::kGlobal, true);
+  uint32_t old_state = irt.GetSegmentState();
+
+  // Write some new state directly. We invert parts of old_state to ensure a new value.
+  uint32_t new_state = old_state ^ 0x07705005;
+  ASSERT_NE(old_state, new_state);
+
+  uint8_t* base = reinterpret_cast<uint8_t*>(&irt);
+  int32_t segment_state_offset =
+      IndirectReferenceTable::SegmentStateOffset(sizeof(void*)).Int32Value();
+  *reinterpret_cast<uint32_t*>(base + segment_state_offset) = new_state;
+
+  // Read and compare.
+  EXPECT_EQ(new_state, irt.GetSegmentState());
+}
+
+// Test the offset computation of JNIEnvExt offsets. b/26071368.
+TEST_F(JniInternalTest, JNIEnvExtOffsets) {
+  EXPECT_EQ(OFFSETOF_MEMBER(JNIEnvExt, local_ref_cookie),
+            JNIEnvExt::LocalRefCookieOffset(sizeof(void*)).Uint32Value());
+
+  EXPECT_EQ(OFFSETOF_MEMBER(JNIEnvExt, self), JNIEnvExt::SelfOffset(sizeof(void*)).Uint32Value());
+
+  // segment_state_ is private in the IndirectReferenceTable. So this test isn't as good as we'd
+  // hope it to be.
+  uint32_t segment_state_now =
+      OFFSETOF_MEMBER(JNIEnvExt, locals) +
+      IndirectReferenceTable::SegmentStateOffset(sizeof(void*)).Uint32Value();
+  uint32_t segment_state_computed = JNIEnvExt::SegmentStateOffset(sizeof(void*)).Uint32Value();
+  EXPECT_EQ(segment_state_now, segment_state_computed);
+}
+
 }  // namespace art
diff --git a/runtime/lambda/art_lambda_method.cc b/runtime/lambda/art_lambda_method.cc
deleted file mode 100644
index 6f9f8bb..0000000
--- a/runtime/lambda/art_lambda_method.cc
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "lambda/art_lambda_method.h"
-
-#include "base/logging.h"
-#include "lambda/shorty_field_type.h"
-
-namespace art {
-namespace lambda {
-
-ArtLambdaMethod::ArtLambdaMethod(ArtMethod* target_method,
-                                 const char* captured_variables_type_descriptor,
-                                 const char* captured_variables_shorty,
-                                 bool innate_lambda)
-    : method_(target_method),
-      captured_variables_type_descriptor_(captured_variables_type_descriptor),
-      captured_variables_shorty_(captured_variables_shorty),
-      innate_lambda_(innate_lambda) {
-  DCHECK(target_method != nullptr);
-  DCHECK(captured_variables_type_descriptor != nullptr);
-  DCHECK(captured_variables_shorty != nullptr);
-
-  // Calculate the static closure size from the captured variables.
-  size_t size = sizeof(ArtLambdaMethod*);  // Initial size is just this method.
-  bool static_size = true;
-  const char* shorty = captured_variables_shorty_;
-  while (shorty != nullptr && *shorty != '\0') {
-    // Each captured variable also appends to the size.
-    ShortyFieldType shorty_field{*shorty};  // NOLINT [readability/braces] [4]
-    size += shorty_field.GetStaticSize();
-    static_size &= shorty_field.IsStaticSize();
-    ++shorty;
-  }
-  closure_size_ = size;
-
-  // We determine whether or not the size is dynamic by checking for nested lambdas.
-  //
-  // This is conservative, since in theory an optimization could determine the size
-  // of the nested lambdas recursively. In practice it's probably better to flatten out
-  // nested lambdas and inline all their code if they are known statically.
-  dynamic_size_ = !static_size;
-
-  if (kIsDebugBuild) {
-    // Double check that the number of captured variables match in both strings.
-    size_t shorty_count = strlen(captured_variables_shorty);
-
-    size_t long_count = 0;
-    const char* long_type = captured_variables_type_descriptor;
-    ShortyFieldType out;
-    while ((long_type = ShortyFieldType::ParseFromFieldTypeDescriptor(long_type, &out))
-           != nullptr) {
-      ++long_count;
-    }
-
-    DCHECK_EQ(shorty_count, long_count)
-        << "number of captured variables in long type '" << captured_variables_type_descriptor
-        << "' (" << long_count << ")" << " did not match short type '"
-        << captured_variables_shorty << "' (" << shorty_count << ")";
-  }
-}
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/art_lambda_method.h b/runtime/lambda/art_lambda_method.h
deleted file mode 100644
index ea13eb7..0000000
--- a/runtime/lambda/art_lambda_method.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_ART_LAMBDA_METHOD_H_
-#define ART_RUNTIME_LAMBDA_ART_LAMBDA_METHOD_H_
-
-#include "base/macros.h"
-#include "art_method.h"
-
-#include <stdint.h>
-
-namespace art {
-namespace lambda {
-
-class ArtLambdaMethod {
- public:
-  // Construct an art lambda method.
-  // The target method is the one invoked by invoke-lambda.
-  // The type descriptor describes the types of variables captured, e.g. "ZFLObject;\FI;[Z"
-  // The shorty drops the object name and treats arrays as objects, e.g. "ZFL\L"
-  // Innate lambda means that the lambda was originally created via invoke-lambda.
-  // -- Non-innate lambdas (learned lambdas) come from a regular class that was boxed to lambda.
-  // (Ownership of strings is retained by the caller and the lifetime should exceed this class).
-  ArtLambdaMethod(ArtMethod* target_method,
-                  const char* captured_variables_type_descriptor,
-                  const char* captured_variables_shorty,
-                  bool innate_lambda = true);
-
-  // Get the target method for this lambda that would be used by the invoke-lambda dex instruction.
-  ArtMethod* GetArtMethod() const {
-    return method_;
-  }
-
-  // Get the compile-time size of lambda closures for this method in bytes.
-  // This is circular (that is, it includes the size of the ArtLambdaMethod pointer).
-  // One should also check if the size is dynamic since nested lambdas have a runtime size.
-  size_t GetStaticClosureSize() const {
-    return closure_size_;
-  }
-
-  // Get the type descriptor for the list of captured variables.
-  // e.g. "ZFLObject;\FI;[Z" means a captured int, float, class Object, lambda FI, array of ints
-  const char* GetCapturedVariablesTypeDescriptor() const {
-    return captured_variables_type_descriptor_;
-  }
-
-  // Get the shorty 'field' type descriptor list of captured variables.
-  // This follows the same rules as a string of ShortyFieldType in the dex specification.
-  // Every captured variable is represented by exactly one character.
-  // - Objects become 'L'.
-  // - Arrays become 'L'.
-  // - Lambdas become '\'.
-  const char* GetCapturedVariablesShortyTypeDescriptor() const {
-    return captured_variables_shorty_;
-  }
-
-  // Will the size of this lambda change at runtime?
-  // Only returns true if there is a nested lambda that we can't determine statically the size of.
-  bool IsDynamicSize() const {
-    return dynamic_size_;
-  }
-
-  // Will the size of this lambda always be constant at runtime?
-  // This generally means there's no nested lambdas, or we were able to successfully determine
-  // their size statically at compile time.
-  bool IsStaticSize() const {
-    return !IsDynamicSize();
-  }
-  // Is this a lambda that was originally created via invoke-lambda?
-  // -- Non-innate lambdas (learned lambdas) come from a regular class that was boxed to lambda.
-  bool IsInnateLambda() const {
-    return innate_lambda_;
-  }
-
-  // How many variables were captured?
-  // (Each nested lambda counts as 1 captured var regardless of how many captures it itself has).
-  size_t GetNumberOfCapturedVariables() const {
-    return strlen(captured_variables_shorty_);
-  }
-
- private:
-  // TODO: ArtMethod, or at least the entry points should be inlined into this struct
-  // to avoid an extra indirect load when doing invokes.
-  // Target method that invoke-lambda will jump to.
-  ArtMethod* method_;
-  // How big the closure is (in bytes). Only includes the constant size.
-  size_t closure_size_;
-  // The type descriptor for the captured variables, e.g. "IS" for [int, short]
-  const char* captured_variables_type_descriptor_;
-  // The shorty type descriptor for captured vars, (e.g. using 'L' instead of 'LObject;')
-  const char* captured_variables_shorty_;
-  // Whether or not the size is dynamic. If it is, copiers need to read the Closure size at runtime.
-  bool dynamic_size_;
-  // True if this lambda was originally made with create-lambda,
-  // false if it came from a class instance (through new-instance and then unbox-lambda).
-  bool innate_lambda_;
-
-  DISALLOW_COPY_AND_ASSIGN(ArtLambdaMethod);
-};
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_ART_LAMBDA_METHOD_H_
diff --git a/runtime/lambda/box_table.cc b/runtime/lambda/box_table.cc
deleted file mode 100644
index 8eef10b..0000000
--- a/runtime/lambda/box_table.cc
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "lambda/box_table.h"
-
-#include "base/mutex.h"
-#include "common_throws.h"
-#include "gc_root-inl.h"
-#include "lambda/closure.h"
-#include "lambda/leaking_allocator.h"
-#include "mirror/method.h"
-#include "mirror/object-inl.h"
-#include "thread.h"
-
-#include <vector>
-
-namespace art {
-namespace lambda {
-// Temporarily represent the lambda Closure as its raw bytes in an array.
-// TODO: Generate a proxy class for the closure when boxing the first time.
-using BoxedClosurePointerType = mirror::ByteArray*;
-
-static mirror::Class* GetBoxedClosureClass() SHARED_REQUIRES(Locks::mutator_lock_) {
-  return mirror::ByteArray::GetArrayClass();
-}
-
-namespace {
-  // Convenience functions to allocating/deleting box table copies of the closures.
-  struct ClosureAllocator {
-    // Deletes a Closure that was allocated through ::Allocate.
-    static void Delete(Closure* ptr) {
-      delete[] reinterpret_cast<char*>(ptr);
-    }
-
-    // Returns a well-aligned pointer to a newly allocated Closure on the 'new' heap.
-    static Closure* Allocate(size_t size) {
-      DCHECK_GE(size, sizeof(Closure));
-
-      // TODO: Maybe point to the interior of the boxed closure object after we add proxy support?
-      Closure* closure = reinterpret_cast<Closure*>(new char[size]);
-      DCHECK_ALIGNED(closure, alignof(Closure));
-      return closure;
-    }
-  };
-}  // namespace
-
-BoxTable::BoxTable()
-  : allow_new_weaks_(true),
-    new_weaks_condition_("lambda box table allowed weaks", *Locks::lambda_table_lock_) {}
-
-BoxTable::~BoxTable() {
-  // Free all the copies of our closures.
-  for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ++map_iterator) {
-    std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
-
-    Closure* closure = key_value_pair.first;
-
-    // Remove from the map first, so that it doesn't try to access dangling pointer.
-    map_iterator = map_.Erase(map_iterator);
-
-    // Safe to delete, no dangling pointers.
-    ClosureAllocator::Delete(closure);
-  }
-}
-
-mirror::Object* BoxTable::BoxLambda(const ClosureType& closure) {
-  Thread* self = Thread::Current();
-
-  {
-    // TODO: Switch to ReaderMutexLock if ConditionVariable ever supports RW Mutexes
-    /*Reader*/MutexLock mu(self, *Locks::lambda_table_lock_);
-    BlockUntilWeaksAllowed();
-
-    // Attempt to look up this object, it's possible it was already boxed previously.
-    // If this is the case we *must* return the same object as before to maintain
-    // referential equality.
-    //
-    // In managed code:
-    //   Functional f = () -> 5;  // vF = create-lambda
-    //   Object a = f;            // vA = box-lambda vA
-    //   Object b = f;            // vB = box-lambda vB
-    //   assert(a == f)
-    ValueType value = FindBoxedLambda(closure);
-    if (!value.IsNull()) {
-      return value.Read();
-    }
-
-    // Otherwise we need to box ourselves and insert it into the hash map
-  }
-
-  // Release the lambda table lock here, so that thread suspension is allowed.
-
-  // Convert the Closure into a managed byte[] which will serve
-  // as the temporary 'boxed' version of the lambda. This is good enough
-  // to check all the basic object identities that a boxed lambda must retain.
-  // It's also good enough to contain all the captured primitive variables.
-
-  // TODO: Boxing an innate lambda (i.e. made with create-lambda) should make a proxy class
-  // TODO: Boxing a learned lambda (i.e. made with unbox-lambda) should return the original object
-  BoxedClosurePointerType closure_as_array_object =
-      mirror::ByteArray::Alloc(self, closure->GetSize());
-
-  // There are no thread suspension points after this, so we don't need to put it into a handle.
-
-  if (UNLIKELY(closure_as_array_object == nullptr)) {
-    // Most likely an OOM has occurred.
-    CHECK(self->IsExceptionPending());
-    return nullptr;
-  }
-
-  // Write the raw closure data into the byte[].
-  closure->CopyTo(closure_as_array_object->GetRawData(sizeof(uint8_t),  // component size
-                                                      0 /*index*/),     // index
-                  closure_as_array_object->GetLength());
-
-  // The method has been successfully boxed into an object, now insert it into the hash map.
-  {
-    MutexLock mu(self, *Locks::lambda_table_lock_);
-    BlockUntilWeaksAllowed();
-
-    // Lookup the object again, it's possible another thread already boxed it while
-    // we were allocating the object before.
-    ValueType value = FindBoxedLambda(closure);
-    if (UNLIKELY(!value.IsNull())) {
-      // Let the GC clean up method_as_object at a later time.
-      return value.Read();
-    }
-
-    // Otherwise we need to insert it into the hash map in this thread.
-
-    // Make a copy for the box table to keep, in case the closure gets collected from the stack.
-    // TODO: GC may need to sweep for roots in the box table's copy of the closure.
-    Closure* closure_table_copy = ClosureAllocator::Allocate(closure->GetSize());
-    closure->CopyTo(closure_table_copy, closure->GetSize());
-
-    // The closure_table_copy needs to be deleted by us manually when we erase it from the map.
-
-    // Actually insert into the table.
-    map_.Insert({closure_table_copy, ValueType(closure_as_array_object)});
-  }
-
-  return closure_as_array_object;
-}
-
-bool BoxTable::UnboxLambda(mirror::Object* object, ClosureType* out_closure) {
-  DCHECK(object != nullptr);
-  *out_closure = nullptr;
-
-  Thread* self = Thread::Current();
-
-  // Note that we do not need to access lambda_table_lock_ here
-  // since we don't need to look at the map.
-
-  mirror::Object* boxed_closure_object = object;
-
-  // Raise ClassCastException if object is not instanceof byte[]
-  if (UNLIKELY(!boxed_closure_object->InstanceOf(GetBoxedClosureClass()))) {
-    ThrowClassCastException(GetBoxedClosureClass(), boxed_closure_object->GetClass());
-    return false;
-  }
-
-  // TODO(iam): We must check that the closure object extends/implements the type
-  // specified in [type id]. This is not currently implemented since it's always a byte[].
-
-  // If we got this far, the inputs are valid.
-  // Shuffle the byte[] back into a raw closure, then allocate it, copy, and return it.
-  BoxedClosurePointerType boxed_closure_as_array =
-      down_cast<BoxedClosurePointerType>(boxed_closure_object);
-
-  const int8_t* unaligned_interior_closure = boxed_closure_as_array->GetData();
-
-  // Allocate a copy that can "escape" and copy the closure data into that.
-  Closure* unboxed_closure =
-      LeakingAllocator::MakeFlexibleInstance<Closure>(self, boxed_closure_as_array->GetLength());
-  // TODO: don't just memcpy the closure, it's unsafe when we add references to the mix.
-  memcpy(unboxed_closure, unaligned_interior_closure, boxed_closure_as_array->GetLength());
-
-  DCHECK_EQ(unboxed_closure->GetSize(), static_cast<size_t>(boxed_closure_as_array->GetLength()));
-
-  *out_closure = unboxed_closure;
-  return true;
-}
-
-BoxTable::ValueType BoxTable::FindBoxedLambda(const ClosureType& closure) const {
-  auto map_iterator = map_.Find(closure);
-  if (map_iterator != map_.end()) {
-    const std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
-    const ValueType& value = key_value_pair.second;
-
-    DCHECK(!value.IsNull());  // Never store null boxes.
-    return value;
-  }
-
-  return ValueType(nullptr);
-}
-
-void BoxTable::BlockUntilWeaksAllowed() {
-  Thread* self = Thread::Current();
-  while (UNLIKELY((!kUseReadBarrier && !allow_new_weaks_) ||
-                  (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
-    new_weaks_condition_.WaitHoldingLocks(self);  // wait while holding mutator lock
-  }
-}
-
-void BoxTable::SweepWeakBoxedLambdas(IsMarkedVisitor* visitor) {
-  DCHECK(visitor != nullptr);
-
-  Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::lambda_table_lock_);
-
-  /*
-   * Visit every weak root in our lambda box table.
-   * Remove unmarked objects, update marked objects to new address.
-   */
-  std::vector<ClosureType> remove_list;
-  for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ) {
-    std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
-
-    const ValueType& old_value = key_value_pair.second;
-
-    // This does not need a read barrier because this is called by GC.
-    mirror::Object* old_value_raw = old_value.Read<kWithoutReadBarrier>();
-    mirror::Object* new_value = visitor->IsMarked(old_value_raw);
-
-    if (new_value == nullptr) {
-      // The object has been swept away.
-      const ClosureType& closure = key_value_pair.first;
-
-      // Delete the entry from the map.
-      map_iterator = map_.Erase(map_iterator);
-
-      // Clean up the memory by deleting the closure.
-      ClosureAllocator::Delete(closure);
-
-    } else {
-      // The object has been moved.
-      // Update the map.
-      key_value_pair.second = ValueType(new_value);
-      ++map_iterator;
-    }
-  }
-
-  // Occasionally shrink the map to avoid growing very large.
-  if (map_.CalculateLoadFactor() < kMinimumLoadFactor) {
-    map_.ShrinkToMaximumLoad();
-  }
-}
-
-void BoxTable::DisallowNewWeakBoxedLambdas() {
-  CHECK(!kUseReadBarrier);
-  Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::lambda_table_lock_);
-
-  allow_new_weaks_ = false;
-}
-
-void BoxTable::AllowNewWeakBoxedLambdas() {
-  CHECK(!kUseReadBarrier);
-  Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::lambda_table_lock_);
-
-  allow_new_weaks_ = true;
-  new_weaks_condition_.Broadcast(self);
-}
-
-void BoxTable::BroadcastForNewWeakBoxedLambdas() {
-  CHECK(kUseReadBarrier);
-  Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::lambda_table_lock_);
-  new_weaks_condition_.Broadcast(self);
-}
-
-void BoxTable::EmptyFn::MakeEmpty(std::pair<UnorderedMapKeyType, ValueType>& item) const {
-  item.first = nullptr;
-
-  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
-  item.second = ValueType();  // Also clear the GC root.
-}
-
-bool BoxTable::EmptyFn::IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const {
-  return item.first == nullptr;
-}
-
-bool BoxTable::EqualsFn::operator()(const UnorderedMapKeyType& lhs,
-                                    const UnorderedMapKeyType& rhs) const {
-  // Nothing needs this right now, but leave this assertion for later when
-  // we need to look at the references inside of the closure.
-  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
-
-  return lhs->ReferenceEquals(rhs);
-}
-
-size_t BoxTable::HashFn::operator()(const UnorderedMapKeyType& key) const {
-  const lambda::Closure* closure = key;
-  DCHECK_ALIGNED(closure, alignof(lambda::Closure));
-
-  // Need to hold mutator_lock_ before calling into Closure::GetHashCode.
-  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
-  return closure->GetHashCode();
-}
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/box_table.h b/runtime/lambda/box_table.h
deleted file mode 100644
index adb7332..0000000
--- a/runtime/lambda/box_table.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_BOX_TABLE_H_
-#define ART_RUNTIME_LAMBDA_BOX_TABLE_H_
-
-#include "base/allocator.h"
-#include "base/hash_map.h"
-#include "gc_root.h"
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "object_callbacks.h"
-
-#include <stdint.h>
-
-namespace art {
-
-class ArtMethod;  // forward declaration
-
-namespace mirror {
-class Object;  // forward declaration
-}  // namespace mirror
-
-namespace lambda {
-struct Closure;  // forward declaration
-
-/*
- * Store a table of boxed lambdas. This is required to maintain object referential equality
- * when a lambda is re-boxed.
- *
- * Conceptually, we store a mapping of Closures -> Weak Reference<Boxed Lambda Object>.
- * When too many objects get GCd, we shrink the underlying table to use less space.
- */
-class BoxTable FINAL {
- public:
-  using ClosureType = art::lambda::Closure*;
-
-  // Boxes a closure into an object. Returns null and throws an exception on failure.
-  mirror::Object* BoxLambda(const ClosureType& closure)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::lambda_table_lock_);
-
-  // Unboxes an object back into the lambda. Returns false and throws an exception on failure.
-  bool UnboxLambda(mirror::Object* object, ClosureType* out_closure)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Sweep weak references to lambda boxes. Update the addresses if the objects have been
-  // moved, and delete them from the table if the objects have been cleaned up.
-  void SweepWeakBoxedLambdas(IsMarkedVisitor* visitor)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::lambda_table_lock_);
-
-  // GC callback: Temporarily block anyone from touching the map.
-  void DisallowNewWeakBoxedLambdas()
-      REQUIRES(!Locks::lambda_table_lock_);
-
-  // GC callback: Unblock any readers who have been queued waiting to touch the map.
-  void AllowNewWeakBoxedLambdas()
-      REQUIRES(!Locks::lambda_table_lock_);
-
-  // GC callback: Unblock any readers who have been queued waiting to touch the map.
-  void BroadcastForNewWeakBoxedLambdas()
-      REQUIRES(!Locks::lambda_table_lock_);
-
-  BoxTable();
-  ~BoxTable();
-
- private:
-  // Explanation:
-  // - After all threads are suspended (exclusive mutator lock),
-  //   the concurrent-copying GC can move objects from the "from" space to the "to" space.
-  // If an object is moved at that time and *before* SweepSystemWeaks are called then
-  // we don't know if the move has happened yet.
-  // Successive reads will then (incorrectly) look at the objects in the "from" space,
-  // which is a problem since the objects have been already forwarded and mutations
-  // would not be visible in the right space.
-  // Instead, use a GcRoot here which will be automatically updated by the GC.
-  //
-  // Also, any reads should be protected by a read barrier to always give us the "to" space address.
-  using ValueType = GcRoot<mirror::Object>;
-
-  // Attempt to look up the lambda in the map, or return null if it's not there yet.
-  ValueType FindBoxedLambda(const ClosureType& closure) const
-      SHARED_REQUIRES(Locks::lambda_table_lock_);
-
-  // If the GC has come in and temporarily disallowed touching weaks, block until is it allowed.
-  void BlockUntilWeaksAllowed()
-      SHARED_REQUIRES(Locks::lambda_table_lock_);
-
-  // Wrap the Closure into a unique_ptr so that the HashMap can delete its memory automatically.
-  using UnorderedMapKeyType = ClosureType;
-
-  // EmptyFn implementation for art::HashMap
-  struct EmptyFn {
-    void MakeEmpty(std::pair<UnorderedMapKeyType, ValueType>& item) const
-        NO_THREAD_SAFETY_ANALYSIS;  // SHARED_REQUIRES(Locks::mutator_lock_)
-
-    bool IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const;
-  };
-
-  // HashFn implementation for art::HashMap
-  struct HashFn {
-    size_t operator()(const UnorderedMapKeyType& key) const
-        NO_THREAD_SAFETY_ANALYSIS;  // SHARED_REQUIRES(Locks::mutator_lock_)
-  };
-
-  // EqualsFn implementation for art::HashMap
-  struct EqualsFn {
-    bool operator()(const UnorderedMapKeyType& lhs, const UnorderedMapKeyType& rhs) const
-        NO_THREAD_SAFETY_ANALYSIS;  // SHARED_REQUIRES(Locks::mutator_lock_)
-  };
-
-  using UnorderedMap = art::HashMap<UnorderedMapKeyType,
-                                    ValueType,
-                                    EmptyFn,
-                                    HashFn,
-                                    EqualsFn,
-                                    TrackingAllocator<std::pair<ClosureType, ValueType>,
-                                                      kAllocatorTagLambdaBoxTable>>;
-
-  UnorderedMap map_                                          GUARDED_BY(Locks::lambda_table_lock_);
-  bool allow_new_weaks_                                      GUARDED_BY(Locks::lambda_table_lock_);
-  ConditionVariable new_weaks_condition_                     GUARDED_BY(Locks::lambda_table_lock_);
-
-  // Shrink the map when we get below this load factor.
-  // (This is an arbitrary value that should be large enough to prevent aggressive map erases
-  // from shrinking the table too often.)
-  static constexpr double kMinimumLoadFactor = UnorderedMap::kDefaultMinLoadFactor / 2;
-
-  DISALLOW_COPY_AND_ASSIGN(BoxTable);
-};
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_BOX_TABLE_H_
diff --git a/runtime/lambda/closure.cc b/runtime/lambda/closure.cc
deleted file mode 100644
index 179e4ee..0000000
--- a/runtime/lambda/closure.cc
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "lambda/closure.h"
-
-#include "base/logging.h"
-#include "lambda/art_lambda_method.h"
-#include "runtime/mirror/object_reference.h"
-
-static constexpr const bool kClosureSupportsReferences = false;
-static constexpr const bool kClosureSupportsGarbageCollection = false;
-
-namespace art {
-namespace lambda {
-
-template <typename T>
-// TODO: can I return T __attribute__((__aligned__(1)))* here instead?
-const uint8_t* Closure::GetUnsafeAtOffset(size_t offset) const {
-  // Do not DCHECK here with existing helpers since most of them will call into this function.
-  return reinterpret_cast<const uint8_t*>(captured_) + offset;
-}
-
-size_t Closure::GetCapturedVariableSize(ShortyFieldType variable_type, size_t offset) const {
-  switch (variable_type) {
-    case ShortyFieldType::kLambda:
-    {
-      return GetClosureSize(GetUnsafeAtOffset<Closure>(offset));
-    }
-    default:
-      DCHECK(variable_type.IsStaticSize());
-      return variable_type.GetStaticSize();
-  }
-}
-
-// Templatize the flags to give the compiler a fighting chance to eliminate
-// any unnecessary code through different uses of this function.
-template <Closure::VariableInfo::Flags flags>
-inline Closure::VariableInfo Closure::ParseTypeDescriptor(const char* type_descriptor,
-                                                          size_t upto_index) const {
-  DCHECK(type_descriptor != nullptr);
-
-  VariableInfo result;
-
-  ShortyFieldType last_type;
-  size_t offset = (flags & VariableInfo::kOffset) ? GetStartingOffset() : 0;
-  size_t prev_offset = 0;
-  size_t count = 0;
-
-  while ((type_descriptor =
-      ShortyFieldType::ParseFromFieldTypeDescriptor(type_descriptor, &last_type)) != nullptr) {
-    count++;
-
-    if (flags & VariableInfo::kOffset) {
-      // Accumulate the sizes of all preceding captured variables as the current offset only.
-      offset += prev_offset;
-      prev_offset = GetCapturedVariableSize(last_type, offset);
-    }
-
-    if ((count > upto_index)) {
-      break;
-    }
-  }
-
-  if (flags & VariableInfo::kVariableType) {
-    result.variable_type_ = last_type;
-  }
-
-  if (flags & VariableInfo::kIndex) {
-    result.index_ = count;
-  }
-
-  if (flags & VariableInfo::kCount) {
-    result.count_ = count;
-  }
-
-  if (flags & VariableInfo::kOffset) {
-    result.offset_ = offset;
-  }
-
-  // TODO: We should probably store the result of this in the ArtLambdaMethod,
-  // to avoid re-computing the data every single time for static closures.
-  return result;
-}
-
-size_t Closure::GetCapturedVariablesSize() const {
-  const size_t captured_variable_offset = offsetof(Closure, captured_);
-  DCHECK_GE(GetSize(), captured_variable_offset);  // Prevent underflows.
-  return GetSize() - captured_variable_offset;
-}
-
-size_t Closure::GetSize() const {
-  const size_t static_closure_size = lambda_info_->GetStaticClosureSize();
-  if (LIKELY(lambda_info_->IsStaticSize())) {
-    return static_closure_size;
-  }
-
-  DCHECK_GE(static_closure_size, sizeof(captured_[0].dynamic_.size_));
-  const size_t dynamic_closure_size = captured_[0].dynamic_.size_;
-  // The dynamic size better be at least as big as the static size.
-  DCHECK_GE(dynamic_closure_size, static_closure_size);
-
-  return dynamic_closure_size;
-}
-
-void Closure::CopyTo(void* target, size_t target_size) const {
-  DCHECK_GE(target_size, GetSize());
-
-  // TODO: using memcpy is unsafe with read barriers, fix this once we add reference support
-  static_assert(kClosureSupportsReferences == false,
-                "Do not use memcpy with readbarrier references");
-  memcpy(target, this, GetSize());
-}
-
-ArtMethod* Closure::GetTargetMethod() const {
-  return const_cast<ArtMethod*>(lambda_info_->GetArtMethod());
-}
-
-uint32_t Closure::GetHashCode() const {
-  // Start with a non-zero constant, a prime number.
-  uint32_t result = 17;
-
-  // Include the hash with the ArtMethod.
-  {
-    uintptr_t method = reinterpret_cast<uintptr_t>(GetTargetMethod());
-    result = 31 * result + Low32Bits(method);
-    if (sizeof(method) == sizeof(uint64_t)) {
-      result = 31 * result + High32Bits(method);
-    }
-  }
-
-  // Include a hash for each captured variable.
-  for (size_t i = 0; i < GetCapturedVariablesSize(); ++i) {
-    // TODO: not safe for GC-able values since the address can move and the hash code would change.
-    uint8_t captured_variable_raw_value;
-    CopyUnsafeAtOffset<uint8_t>(i, /*out*/&captured_variable_raw_value);  // NOLINT: [whitespace/comma] [3]
-
-    result = 31 * result + captured_variable_raw_value;
-  }
-
-  // TODO: Fix above loop to work for objects and lambdas.
-  static_assert(kClosureSupportsGarbageCollection == false,
-               "Need to update above loop to read the hash code from the "
-                "objects and lambdas recursively");
-
-  return result;
-}
-
-bool Closure::ReferenceEquals(const Closure* other) const {
-  DCHECK(other != nullptr);
-
-  // TODO: Need rework to use read barriers once closures have references inside of them that can
-  // move. Until then, it's safe to just compare the data inside of it directly.
-  static_assert(kClosureSupportsReferences == false,
-                "Unsafe to use memcmp in read barrier collector");
-
-  if (GetSize() != other->GetSize()) {
-    return false;
-  }
-
-  return memcmp(this, other, GetSize());
-}
-
-size_t Closure::GetNumberOfCapturedVariables() const {
-  // TODO: refactor into art_lambda_method.h. Parsing should only be required here as a DCHECK.
-  VariableInfo variable_info =
-      ParseTypeDescriptor<VariableInfo::kCount>(GetCapturedVariablesTypeDescriptor(),
-                                                VariableInfo::kUpToIndexMax);
-  size_t count = variable_info.count_;
-  // Assuming each variable was 1 byte, the size should always be greater or equal than the count.
-  DCHECK_LE(count, GetCapturedVariablesSize());
-  return count;
-}
-
-const char* Closure::GetCapturedVariablesTypeDescriptor() const {
-  return lambda_info_->GetCapturedVariablesTypeDescriptor();
-}
-
-ShortyFieldType Closure::GetCapturedShortyType(size_t index) const {
-  DCHECK_LT(index, GetNumberOfCapturedVariables());
-
-  VariableInfo variable_info =
-      ParseTypeDescriptor<VariableInfo::kVariableType>(GetCapturedVariablesTypeDescriptor(),
-                                                       index);
-
-  return variable_info.variable_type_;
-}
-
-uint32_t Closure::GetCapturedPrimitiveNarrow(size_t index) const {
-  DCHECK(GetCapturedShortyType(index).IsPrimitiveNarrow());
-
-  ShortyFieldType variable_type;
-  size_t offset;
-  GetCapturedVariableTypeAndOffset(index, &variable_type, &offset);
-
-  // TODO: Restructure to use template specialization, e.g. GetCapturedPrimitive<T>
-  // so that we can avoid this nonsense regarding memcpy always overflowing.
-  // Plus, this additional switching seems redundant since the interpreter
-  // would've done it already, and knows the exact type.
-  uint32_t result = 0;
-  static_assert(ShortyFieldTypeTraits::IsPrimitiveNarrowType<decltype(result)>(),
-                "result must be a primitive narrow type");
-  switch (variable_type) {
-    case ShortyFieldType::kBoolean:
-      CopyUnsafeAtOffset<bool>(offset, &result);
-      break;
-    case ShortyFieldType::kByte:
-      CopyUnsafeAtOffset<uint8_t>(offset, &result);
-      break;
-    case ShortyFieldType::kChar:
-      CopyUnsafeAtOffset<uint16_t>(offset, &result);
-      break;
-    case ShortyFieldType::kShort:
-      CopyUnsafeAtOffset<int16_t>(offset, &result);
-      break;
-    case ShortyFieldType::kInt:
-      CopyUnsafeAtOffset<int32_t>(offset, &result);
-      break;
-    case ShortyFieldType::kFloat:
-      // XX: Maybe there should just be a GetCapturedPrimitive<T> to avoid this shuffle?
-      // The interpreter's invoke seems to only special case references and wides,
-      // everything else is treated as a generic 32-bit pattern.
-      CopyUnsafeAtOffset<float>(offset, &result);
-      break;
-    default:
-      LOG(FATAL)
-          << "expected a valid narrow primitive shorty type but got "
-          << static_cast<char>(variable_type);
-      UNREACHABLE();
-  }
-
-  return result;
-}
-
-uint64_t Closure::GetCapturedPrimitiveWide(size_t index) const {
-  DCHECK(GetCapturedShortyType(index).IsPrimitiveWide());
-
-  ShortyFieldType variable_type;
-  size_t offset;
-  GetCapturedVariableTypeAndOffset(index, &variable_type, &offset);
-
-  // TODO: Restructure to use template specialization, e.g. GetCapturedPrimitive<T>
-  // so that we can avoid this nonsense regarding memcpy always overflowing.
-  // Plus, this additional switching seems redundant since the interpreter
-  // would've done it already, and knows the exact type.
-  uint64_t result = 0;
-  static_assert(ShortyFieldTypeTraits::IsPrimitiveWideType<decltype(result)>(),
-                "result must be a primitive wide type");
-  switch (variable_type) {
-    case ShortyFieldType::kLong:
-      CopyUnsafeAtOffset<int64_t>(offset, &result);
-      break;
-    case ShortyFieldType::kDouble:
-      CopyUnsafeAtOffset<double>(offset, &result);
-      break;
-    default:
-      LOG(FATAL)
-          << "expected a valid primitive wide shorty type but got "
-          << static_cast<char>(variable_type);
-      UNREACHABLE();
-  }
-
-  return result;
-}
-
-mirror::Object* Closure::GetCapturedObject(size_t index) const {
-  DCHECK(GetCapturedShortyType(index).IsObject());
-
-  ShortyFieldType variable_type;
-  size_t offset;
-  GetCapturedVariableTypeAndOffset(index, &variable_type, &offset);
-
-  // TODO: Restructure to use template specialization, e.g. GetCapturedPrimitive<T>
-  // so that we can avoid this nonsense regarding memcpy always overflowing.
-  // Plus, this additional switching seems redundant since the interpreter
-  // would've done it already, and knows the exact type.
-  mirror::Object* result = nullptr;
-  static_assert(ShortyFieldTypeTraits::IsObjectType<decltype(result)>(),
-                "result must be an object type");
-  switch (variable_type) {
-    case ShortyFieldType::kObject:
-      // TODO: This seems unsafe. This may need to use gcroots.
-      static_assert(kClosureSupportsGarbageCollection == false,
-                    "May need GcRoots and definitely need mutator locks");
-      {
-        mirror::CompressedReference<mirror::Object> compressed_result;
-        CopyUnsafeAtOffset<uint32_t>(offset, &compressed_result);
-        result = compressed_result.AsMirrorPtr();
-      }
-      break;
-    default:
-      CHECK(false)
-          << "expected a valid shorty type but got " << static_cast<char>(variable_type);
-      UNREACHABLE();
-  }
-
-  return result;
-}
-
-size_t Closure::GetCapturedClosureSize(size_t index) const {
-  DCHECK(GetCapturedShortyType(index).IsLambda());
-  size_t offset = GetCapturedVariableOffset(index);
-
-  auto* captured_ptr = reinterpret_cast<const uint8_t*>(&captured_);
-  size_t closure_size = GetClosureSize(captured_ptr + offset);
-
-  return closure_size;
-}
-
-void Closure::CopyCapturedClosure(size_t index, void* destination, size_t destination_room) const {
-  DCHECK(GetCapturedShortyType(index).IsLambda());
-  size_t offset = GetCapturedVariableOffset(index);
-
-  auto* captured_ptr = reinterpret_cast<const uint8_t*>(&captured_);
-  size_t closure_size = GetClosureSize(captured_ptr + offset);
-
-  static_assert(ShortyFieldTypeTraits::IsLambdaType<Closure*>(),
-                "result must be a lambda type");
-
-  CopyUnsafeAtOffset<Closure>(offset, destination, closure_size, destination_room);
-}
-
-size_t Closure::GetCapturedVariableOffset(size_t index) const {
-  VariableInfo variable_info =
-      ParseTypeDescriptor<VariableInfo::kOffset>(GetCapturedVariablesTypeDescriptor(),
-                                                 index);
-
-  size_t offset = variable_info.offset_;
-
-  return offset;
-}
-
-void Closure::GetCapturedVariableTypeAndOffset(size_t index,
-                                               ShortyFieldType* out_type,
-                                               size_t* out_offset) const {
-  DCHECK(out_type != nullptr);
-  DCHECK(out_offset != nullptr);
-
-  static constexpr const VariableInfo::Flags kVariableTypeAndOffset =
-      static_cast<VariableInfo::Flags>(VariableInfo::kVariableType | VariableInfo::kOffset);
-  VariableInfo variable_info =
-      ParseTypeDescriptor<kVariableTypeAndOffset>(GetCapturedVariablesTypeDescriptor(),
-                                                  index);
-
-  ShortyFieldType variable_type = variable_info.variable_type_;
-  size_t offset = variable_info.offset_;
-
-  *out_type = variable_type;
-  *out_offset = offset;
-}
-
-template <typename T>
-void Closure::CopyUnsafeAtOffset(size_t offset,
-                                 void* destination,
-                                 size_t src_size,
-                                 size_t destination_room) const {
-  DCHECK_GE(destination_room, src_size);
-  const uint8_t* data_ptr = GetUnsafeAtOffset<T>(offset);
-  memcpy(destination, data_ptr, sizeof(T));
-}
-
-// TODO: This is kind of ugly. I would prefer an unaligned_ptr<Closure> here.
-// Unfortunately C++ doesn't let you lower the alignment (i.e. alignas(1) Closure*) is not legal.
-size_t Closure::GetClosureSize(const uint8_t* closure) {
-  DCHECK(closure != nullptr);
-
-  static_assert(!std::is_base_of<mirror::Object, Closure>::value,
-                "It might be unsafe to call memcpy on a managed object");
-
-  // Safe as long as it's not a mirror Object.
-  // TODO: Should probably wrap this in like MemCpyNative or some such which statically asserts
-  // we aren't trying to copy mirror::Object data around.
-  ArtLambdaMethod* closure_info;
-  memcpy(&closure_info, closure + offsetof(Closure, lambda_info_), sizeof(closure_info));
-
-  if (LIKELY(closure_info->IsStaticSize())) {
-    return closure_info->GetStaticClosureSize();
-  }
-
-  // The size is dynamic, so we need to read it from captured_variables_ portion.
-  size_t dynamic_size;
-  memcpy(&dynamic_size,
-         closure + offsetof(Closure, captured_[0].dynamic_.size_),
-         sizeof(dynamic_size));
-  static_assert(sizeof(dynamic_size) == sizeof(captured_[0].dynamic_.size_),
-                "Dynamic size type must match the structural type of the size");
-
-  DCHECK_GE(dynamic_size, closure_info->GetStaticClosureSize());
-  return dynamic_size;
-}
-
-size_t Closure::GetStartingOffset() const {
-  static constexpr const size_t captured_offset = offsetof(Closure, captured_);
-  if (LIKELY(lambda_info_->IsStaticSize())) {
-    return offsetof(Closure, captured_[0].static_variables_) - captured_offset;
-  } else {
-    return offsetof(Closure, captured_[0].dynamic_.variables_) - captured_offset;
-  }
-}
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/closure.h b/runtime/lambda/closure.h
deleted file mode 100644
index 31ff194..0000000
--- a/runtime/lambda/closure.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_CLOSURE_H_
-#define ART_RUNTIME_LAMBDA_CLOSURE_H_
-
-#include "base/macros.h"
-#include "base/mutex.h"  // For Locks::mutator_lock_.
-#include "lambda/shorty_field_type.h"
-
-#include <stdint.h>
-
-namespace art {
-class ArtMethod;  // forward declaration
-
-namespace mirror {
-class Object;  // forward declaration
-}  // namespace mirror
-
-namespace lambda {
-class ArtLambdaMethod;  // forward declaration
-class ClosureBuilder;   // forward declaration
-
-// Inline representation of a lambda closure.
-// Contains the target method and the set of packed captured variables as a copy.
-//
-// The closure itself is logically immutable, although in practice any object references
-// it (recursively) contains can be moved and updated by the GC.
-struct PACKED(sizeof(ArtLambdaMethod*)) Closure {
-  // Get the size of the Closure in bytes.
-  // This is necessary in order to allocate a large enough area to copy the Closure into.
-  // Do *not* copy the closure with memcpy, since references also need to get moved.
-  size_t GetSize() const;
-
-  // Copy this closure into the target, whose memory size is specified by target_size.
-  // Any object references are fixed up during the copy (if there was a read barrier).
-  // The target_size must be at least as large as GetSize().
-  void CopyTo(void* target, size_t target_size) const;
-
-  // Get the target method, i.e. the method that will be dispatched into with invoke-lambda.
-  ArtMethod* GetTargetMethod() const;
-
-  // Calculates the hash code. Value is recomputed each time.
-  uint32_t GetHashCode() const SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Is this the same closure as other? e.g. same target method, same variables captured.
-  //
-  // Determines whether the two Closures are interchangeable instances.
-  // Does *not* call Object#equals recursively. If two Closures compare ReferenceEquals true that
-  // means that they are interchangeable values (usually for the purpose of boxing/unboxing).
-  bool ReferenceEquals(const Closure* other) const SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // How many variables were captured?
-  size_t GetNumberOfCapturedVariables() const;
-
-  // Returns a type descriptor string that represents each captured variable.
-  // e.g. "Ljava/lang/Object;ZB" would mean a capture tuple of (Object, boolean, byte)
-  const char* GetCapturedVariablesTypeDescriptor() const;
-
-  // Returns the short type for the captured variable at index.
-  // Index must be less than the number of captured variables.
-  ShortyFieldType GetCapturedShortyType(size_t index) const;
-
-  // Returns the 32-bit representation of a non-wide primitive at the captured variable index.
-  // Smaller types are zero extended.
-  // Index must be less than the number of captured variables.
-  uint32_t GetCapturedPrimitiveNarrow(size_t index) const;
-  // Returns the 64-bit representation of a wide primitive at the captured variable index.
-  // Smaller types are zero extended.
-  // Index must be less than the number of captured variables.
-  uint64_t GetCapturedPrimitiveWide(size_t index) const;
-  // Returns the object reference at the captured variable index.
-  // The type at the index *must* be an object reference or a CHECK failure will occur.
-  // Index must be less than the number of captured variables.
-  mirror::Object* GetCapturedObject(size_t index) const SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Gets the size of a nested capture closure in bytes, at the captured variable index.
-  // The type at the index *must* be a lambda closure or a CHECK failure will occur.
-  size_t GetCapturedClosureSize(size_t index) const;
-
-  // Copies a nested lambda closure at the captured variable index.
-  // The destination must have enough room for the closure (see GetCapturedClosureSize).
-  void CopyCapturedClosure(size_t index, void* destination, size_t destination_room) const;
-
- private:
-  // Read out any non-lambda value as a copy.
-  template <typename T>
-  T GetCapturedVariable(size_t index) const;
-
-  // Reconstruct the closure's captured variable info at runtime.
-  struct VariableInfo {
-    size_t index_;
-    ShortyFieldType variable_type_;
-    size_t offset_;
-    size_t count_;
-
-    enum Flags {
-      kIndex = 0x1,
-      kVariableType = 0x2,
-      kOffset = 0x4,
-      kCount = 0x8,
-    };
-
-    // Traverse to the end of the type descriptor list instead of stopping at some particular index.
-    static constexpr size_t kUpToIndexMax = static_cast<size_t>(-1);
-  };
-
-  // Parse a type descriptor, stopping at index "upto_index".
-  // Returns only the information requested in flags. All other fields are indeterminate.
-  template <VariableInfo::Flags flags>
-  inline VariableInfo ALWAYS_INLINE ParseTypeDescriptor(const char* type_descriptor,
-                                                        size_t upto_index) const;
-
-  // Convenience function to call ParseTypeDescriptor with just the type and offset.
-  void GetCapturedVariableTypeAndOffset(size_t index,
-                                        ShortyFieldType* out_type,
-                                        size_t* out_offset) const;
-
-  // How many bytes do the captured variables take up? Runtime sizeof(captured_variables).
-  size_t GetCapturedVariablesSize() const;
-  // Get the size in bytes of the variable_type which is potentially stored at offset.
-  size_t GetCapturedVariableSize(ShortyFieldType variable_type, size_t offset) const;
-  // Get the starting offset (in bytes) for the 0th captured variable.
-  // All offsets are relative to 'captured_'.
-  size_t GetStartingOffset() const;
-  // Get the offset for this index.
-  // All offsets are relative to 'captuerd_'.
-  size_t GetCapturedVariableOffset(size_t index) const;
-
-  // Cast the data at '(char*)captured_[offset]' into T, returning its address.
-  // This value should not be de-referenced directly since its unaligned.
-  template <typename T>
-  inline const uint8_t* GetUnsafeAtOffset(size_t offset) const;
-
-  // Copy the data at the offset into the destination. DCHECKs that
-  // the destination_room is large enough (in bytes) to fit the data.
-  template <typename T>
-  inline void CopyUnsafeAtOffset(size_t offset,
-                                 void* destination,
-                                 size_t src_size = sizeof(T),
-                                 size_t destination_room = sizeof(T)) const;
-
-  // Get the closure size from an unaligned (i.e. interior) closure pointer.
-  static size_t GetClosureSize(const uint8_t* closure);
-
-  ///////////////////////////////////////////////////////////////////////////////////
-
-  // Compile-time known lambda information such as the type descriptor and size.
-  ArtLambdaMethod* lambda_info_;
-
-  // A contiguous list of captured variables, and possibly the closure size.
-  // The runtime size can always be determined through GetSize().
-  union {
-    // Read from here if the closure size is static (ArtLambdaMethod::IsStatic)
-    uint8_t static_variables_[0];
-    struct {
-      // Read from here if the closure size is dynamic (ArtLambdaMethod::IsDynamic)
-      size_t size_;  // The lambda_info_ and the size_ itself is also included as part of the size.
-      uint8_t variables_[0];
-    } dynamic_;
-  } captured_[0];
-  // captured_ will always consist of one array element at runtime.
-  // Set to [0] so that 'size_' is not counted in sizeof(Closure).
-
-  friend class ClosureBuilder;
-  friend class ClosureTest;
-};
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_CLOSURE_H_
diff --git a/runtime/lambda/closure_builder-inl.h b/runtime/lambda/closure_builder-inl.h
deleted file mode 100644
index 3cec21f..0000000
--- a/runtime/lambda/closure_builder-inl.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_INL_H_
-#define ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_INL_H_
-
-#include "lambda/closure_builder.h"
-#include <string.h>
-
-namespace art {
-namespace lambda {
-
-template <typename T, ClosureBuilder::ShortyTypeEnum kShortyType>
-void ClosureBuilder::CaptureVariablePrimitive(T value) {
-  static_assert(ShortyFieldTypeTraits::IsPrimitiveType<T>(), "T must be a primitive type");
-  const size_t type_size = ShortyFieldType(kShortyType).GetStaticSize();
-  DCHECK_EQ(type_size, sizeof(T));
-
-  // Copy the data while retaining the bit pattern. Strict-aliasing safe.
-  ShortyFieldTypeTraits::MaxType value_storage = 0;
-  memcpy(&value_storage, &value, sizeof(T));
-
-  values_.push_back(value_storage);
-  size_ += sizeof(T);
-
-  shorty_types_ += kShortyType;
-}
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_INL_H_
diff --git a/runtime/lambda/closure_builder.cc b/runtime/lambda/closure_builder.cc
deleted file mode 100644
index 739e965..0000000
--- a/runtime/lambda/closure_builder.cc
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "lambda/closure_builder.h"
-
-#include "base/macros.h"
-#include "base/value_object.h"
-#include "lambda/art_lambda_method.h"
-#include "lambda/closure.h"
-#include "lambda/shorty_field_type.h"
-#include "runtime/mirror/object_reference.h"
-
-#include <stdint.h>
-#include <vector>
-
-namespace art {
-namespace lambda {
-
-/*
- * GC support TODOs:
- * (Although there's some code for storing objects, it is UNIMPLEMENTED(FATAL) because it is
- * incomplete).
- *
- * 1) GC needs to be able to traverse the Closure and visit any references.
- *    It might be possible to get away with global roots in the short term.
- *
- * 2) Add brooks read barrier support. We can store the black/gray/white bits
- *    in the lower 2 bits of the lambda art method pointer. Whenever a closure is copied
- *    [to the stack] we'd need to add a cold path to turn it black.
- *    (since there's only 3 colors, I can use the 4th value to indicate no-refs).
- *    e.g. 0x0 = gray, 0x1 = white, 0x2 = black, 0x3 = no-nested-references
- *    - Alternatively the GC can mark reference-less closures as always-black,
- *      although it would need extra work to check for references.
- */
-
-void ClosureBuilder::CaptureVariableObject(mirror::Object* object) {
-  auto compressed_reference = mirror::CompressedReference<mirror::Object>::FromMirrorPtr(object);
-  ShortyFieldTypeTraits::MaxType storage = 0;
-
-  static_assert(sizeof(storage) >= sizeof(compressed_reference),
-                "not enough room to store a compressed reference");
-  memcpy(&storage, &compressed_reference, sizeof(compressed_reference));
-
-  values_.push_back(storage);
-  size_ += kObjectReferenceSize;
-
-  static_assert(kObjectReferenceSize == sizeof(compressed_reference), "reference size mismatch");
-
-  // TODO: needs more work to support concurrent GC
-  if (kIsDebugBuild) {
-    if (kUseReadBarrier) {
-      UNIMPLEMENTED(FATAL) << "can't yet safely capture objects with read barrier";
-    }
-  }
-
-  shorty_types_ += ShortyFieldType::kObject;
-}
-
-void ClosureBuilder::CaptureVariableLambda(Closure* closure) {
-  DCHECK(closure != nullptr);  // null closures not allowed, target method must be null instead.
-  values_.push_back(reinterpret_cast<ShortyFieldTypeTraits::MaxType>(closure));
-
-  if (LIKELY(is_dynamic_size_ == false)) {
-    // Write in the extra bytes to store the dynamic size the first time.
-    is_dynamic_size_ = true;
-    size_ += sizeof(Closure::captured_[0].dynamic_.size_);
-  }
-
-  // A closure may be sized dynamically, so always query it for the true size.
-  size_ += closure->GetSize();
-
-  shorty_types_ += ShortyFieldType::kLambda;
-}
-
-size_t ClosureBuilder::GetSize() const {
-  return size_;
-}
-
-size_t ClosureBuilder::GetCaptureCount() const {
-  DCHECK_EQ(values_.size(), shorty_types_.size());
-  return values_.size();
-}
-
-const std::string& ClosureBuilder::GetCapturedVariableShortyTypes() const {
-  DCHECK_EQ(values_.size(), shorty_types_.size());
-  return shorty_types_;
-}
-
-Closure* ClosureBuilder::CreateInPlace(void* memory, ArtLambdaMethod* target_method) const {
-  DCHECK(memory != nullptr);
-  DCHECK(target_method != nullptr);
-  DCHECK_EQ(is_dynamic_size_, target_method->IsDynamicSize());
-
-  CHECK_EQ(target_method->GetNumberOfCapturedVariables(), values_.size())
-    << "number of variables captured at runtime does not match "
-    << "number of variables captured at compile time";
-
-  Closure* closure = new (memory) Closure;
-  closure->lambda_info_ = target_method;
-
-  static_assert(offsetof(Closure, captured_) == kInitialSize, "wrong initial size");
-
-  size_t written_size;
-  if (UNLIKELY(is_dynamic_size_)) {
-    // The closure size must be set dynamically (i.e. nested lambdas).
-    closure->captured_[0].dynamic_.size_ = GetSize();
-    size_t header_size = offsetof(Closure, captured_[0].dynamic_.variables_);
-    DCHECK_LE(header_size, GetSize());
-    size_t variables_size = GetSize() - header_size;
-    written_size =
-        WriteValues(target_method,
-                    closure->captured_[0].dynamic_.variables_,
-                    header_size,
-                    variables_size);
-  } else {
-    // The closure size is known statically (i.e. no nested lambdas).
-    DCHECK(GetSize() == target_method->GetStaticClosureSize());
-    size_t header_size = offsetof(Closure, captured_[0].static_variables_);
-    DCHECK_LE(header_size, GetSize());
-    size_t variables_size = GetSize() - header_size;
-    written_size =
-        WriteValues(target_method,
-                    closure->captured_[0].static_variables_,
-                    header_size,
-                    variables_size);
-  }
-
-  DCHECK_EQ(written_size, closure->GetSize());
-
-  return closure;
-}
-
-size_t ClosureBuilder::WriteValues(ArtLambdaMethod* target_method,
-                                   uint8_t variables[],
-                                   size_t header_size,
-                                   size_t variables_size) const {
-  size_t total_size = header_size;
-  const char* shorty_types = target_method->GetCapturedVariablesShortyTypeDescriptor();
-  DCHECK_STREQ(shorty_types, shorty_types_.c_str());
-
-  size_t variables_offset = 0;
-  size_t remaining_size = variables_size;
-
-  const size_t shorty_count = target_method->GetNumberOfCapturedVariables();
-  DCHECK_EQ(shorty_count, GetCaptureCount());
-
-  for (size_t i = 0; i < shorty_count; ++i) {
-    ShortyFieldType shorty{shorty_types[i]};  // NOLINT [readability/braces] [4]
-
-    size_t var_size;
-    if (LIKELY(shorty.IsStaticSize())) {
-      // TODO: needs more work to support concurrent GC, e.g. read barriers
-      if (kUseReadBarrier == false) {
-        if (UNLIKELY(shorty.IsObject())) {
-          UNIMPLEMENTED(FATAL) << "can't yet safely write objects with read barrier";
-        }
-      } else {
-        if (UNLIKELY(shorty.IsObject())) {
-          UNIMPLEMENTED(FATAL) << "writing objects not yet supported, no GC support";
-        }
-      }
-
-      var_size = shorty.GetStaticSize();
-      DCHECK_LE(var_size, sizeof(values_[i]));
-
-      // Safe even for objects (non-read barrier case) if we never suspend
-      // while the ClosureBuilder is live.
-      // FIXME: Need to add GC support for references in a closure.
-      memcpy(&variables[variables_offset], &values_[i], var_size);
-    } else {
-      DCHECK(shorty.IsLambda())
-          << " don't support writing dynamically sized types other than lambda";
-
-      ShortyFieldTypeTraits::MaxType closure_raw = values_[i];
-      Closure* nested_closure = reinterpret_cast<Closure*>(closure_raw);
-
-      DCHECK(nested_closure != nullptr);
-      nested_closure->CopyTo(&variables[variables_offset], remaining_size);
-
-      var_size = nested_closure->GetSize();
-    }
-
-    total_size += var_size;
-    DCHECK_GE(remaining_size, var_size);
-    remaining_size -= var_size;
-
-    variables_offset += var_size;
-  }
-
-  DCHECK_EQ('\0', shorty_types[shorty_count]);
-  DCHECK_EQ(variables_offset, variables_size);
-
-  return total_size;
-}
-
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/closure_builder.h b/runtime/lambda/closure_builder.h
deleted file mode 100644
index 23eb484..0000000
--- a/runtime/lambda/closure_builder.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_H_
-#define ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_H_
-
-#include "base/macros.h"
-#include "base/mutex.h"  // For Locks::mutator_lock_.
-#include "base/value_object.h"
-#include "lambda/shorty_field_type.h"
-
-#include <stdint.h>
-#include <vector>
-
-namespace art {
-class ArtMethod;  // forward declaration
-
-namespace mirror {
-class Object;  // forward declaration
-}  // namespace mirror
-
-namespace lambda {
-class ArtLambdaMethod;  // forward declaration
-
-// Build a closure by capturing variables one at a time.
-// When all variables have been marked captured, the closure can be created in-place into
-// a target memory address.
-//
-// The mutator lock must be held for the duration of the lifetime of this object,
-// since it needs to temporarily store heap references into an internal list.
-class ClosureBuilder {
- public:
-  using ShortyTypeEnum = decltype(ShortyFieldType::kByte);
-
-  // Mark this primitive value to be captured as the specified type.
-  template <typename T, ShortyTypeEnum kShortyType = ShortyFieldTypeSelectEnum<T>::value>
-  void CaptureVariablePrimitive(T value);
-
-  // Mark this object reference to be captured.
-  void CaptureVariableObject(mirror::Object* object) SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Mark this lambda closure to be captured.
-  void CaptureVariableLambda(Closure* closure);
-
-  // Get the size (in bytes) of the closure.
-  // This size is used to be able to allocate memory large enough to write the closure into.
-  // Call 'CreateInPlace' to actually write the closure out.
-  size_t GetSize() const;
-
-  // Returns how many variables have been captured so far.
-  size_t GetCaptureCount() const;
-
-  // Get the list of captured variables' shorty field types.
-  const std::string& GetCapturedVariableShortyTypes() const;
-
-  // Creates a closure in-place and writes out the data into 'memory'.
-  // Memory must be at least 'GetSize' bytes large.
-  // All previously marked data to be captured is now written out.
-  Closure* CreateInPlace(void* memory, ArtLambdaMethod* target_method) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Locks need to be held for entire lifetime of ClosureBuilder.
-  ClosureBuilder() SHARED_REQUIRES(Locks::mutator_lock_)
-  {}
-
-  // Locks need to be held for entire lifetime of ClosureBuilder.
-  ~ClosureBuilder() SHARED_REQUIRES(Locks::mutator_lock_)
-  {}
-
- private:
-  // Initial size a closure starts out before any variables are written.
-  // Header size only.
-  static constexpr size_t kInitialSize = sizeof(ArtLambdaMethod*);
-
-  // Write a Closure's variables field from the captured variables.
-  // variables_size specified in bytes, and only includes enough room to write variables into.
-  // Returns the calculated actual size of the closure.
-  size_t WriteValues(ArtLambdaMethod* target_method,
-                     uint8_t variables[],
-                     size_t header_size,
-                     size_t variables_size) const SHARED_REQUIRES(Locks::mutator_lock_);
-
-  size_t size_ = kInitialSize;
-  bool is_dynamic_size_ = false;
-  std::vector<ShortyFieldTypeTraits::MaxType> values_;
-  std::string shorty_types_;
-};
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_H_
diff --git a/runtime/lambda/closure_test.cc b/runtime/lambda/closure_test.cc
deleted file mode 100644
index 7c1bd0d..0000000
--- a/runtime/lambda/closure_test.cc
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "art_method.h"
-#include "lambda/art_lambda_method.h"
-#include "lambda/closure.h"
-#include "lambda/closure_builder.h"
-#include "lambda/closure_builder-inl.h"
-#include "utils.h"
-
-#include <numeric>
-#include <stdint.h>
-#include <type_traits>
-#include "gtest/gtest.h"
-
-// Turn this on for some extra printfs to help with debugging, since some code is optimized out.
-static constexpr const bool kDebuggingClosureTest = true;
-
-namespace std {
-  using Closure = art::lambda::Closure;
-
-  // Specialize std::default_delete so it knows how to properly delete closures
-  // through the way we allocate them in this test.
-  //
-  // This is test-only because we don't want the rest of Art to do this.
-  template <>
-  struct default_delete<Closure> {
-    void operator()(Closure* closure) const {
-      delete[] reinterpret_cast<char*>(closure);
-    }
-  };
-}  // namespace std
-
-namespace art {
-
-// Fake lock acquisition to please clang lock checker.
-// This doesn't actually acquire any locks because we don't need multiple threads in this gtest.
-struct SCOPED_CAPABILITY ScopedFakeLock {
-  explicit ScopedFakeLock(MutatorMutex& mu) ACQUIRE(mu)
-      : mu_(mu) {
-  }
-
-  ~ScopedFakeLock() RELEASE()
-  {}
-
-  MutatorMutex& mu_;
-};
-
-namespace lambda {
-
-class ClosureTest : public ::testing::Test {
- public:
-  ClosureTest() = default;
-  ~ClosureTest() = default;
-
- protected:
-  static void SetUpTestCase() {
-  }
-
-  virtual void SetUp() {
-    // Create a completely dummy method here.
-    // It's "OK" because the Closure never needs to look inside of the ArtMethod
-    // (it just needs to be non-null).
-    uintptr_t ignore = 0xbadbad;
-    fake_method_ = reinterpret_cast<ArtMethod*>(ignore);
-  }
-
-  static ::testing::AssertionResult IsResultSuccessful(bool result) {
-    if (result) {
-      return ::testing::AssertionSuccess();
-    } else {
-      return ::testing::AssertionFailure();
-    }
-  }
-
-  // Create a closure that captures the static variables from 'args' by-value.
-  // The lambda method's captured variables types must match the ones in 'args'.
-  // -- This creates the closure directly in-memory by using memcpy.
-  template <typename ... Args>
-  static std::unique_ptr<Closure> CreateClosureStaticVariables(ArtLambdaMethod* lambda_method,
-                                                               Args&& ... args) {
-    constexpr size_t header_size = sizeof(ArtLambdaMethod*);
-    const size_t static_size = GetArgsSize(args ...) + header_size;
-    EXPECT_GE(static_size, sizeof(Closure));
-
-    // Can't just 'new' the Closure since we don't know the size up front.
-    char* closure_as_char_array = new char[static_size];
-    Closure* closure_ptr = new (closure_as_char_array) Closure;
-
-    // Set up the data
-    closure_ptr->lambda_info_ = lambda_method;
-    CopyArgs(closure_ptr->captured_[0].static_variables_, args ...);
-
-    // Make sure the entire thing is deleted once the unique_ptr goes out of scope.
-    return std::unique_ptr<Closure>(closure_ptr);  // NOLINT [whitespace/braces] [5]
-  }
-
-  // Copy variadic arguments into the destination array with memcpy.
-  template <typename T, typename ... Args>
-  static void CopyArgs(uint8_t destination[], T&& arg, Args&& ... args) {
-    memcpy(destination, &arg, sizeof(arg));
-    CopyArgs(destination + sizeof(arg), args ...);
-  }
-
-  // Base case: Done.
-  static void CopyArgs(uint8_t destination[]) {
-    UNUSED(destination);
-  }
-
-  // Create a closure that captures the static variables from 'args' by-value.
-  // The lambda method's captured variables types must match the ones in 'args'.
-  // -- This uses ClosureBuilder interface to set up the closure indirectly.
-  template <typename ... Args>
-  static std::unique_ptr<Closure> CreateClosureStaticVariablesFromBuilder(
-      ArtLambdaMethod* lambda_method,
-      Args&& ... args) {
-    // Acquire a fake lock since closure_builder needs it.
-    ScopedFakeLock fake_lock(*Locks::mutator_lock_);
-
-    ClosureBuilder closure_builder;
-    CaptureVariableFromArgsList(/*out*/closure_builder, args ...);
-
-    EXPECT_EQ(sizeof...(args), closure_builder.GetCaptureCount());
-
-    constexpr size_t header_size = sizeof(ArtLambdaMethod*);
-    const size_t static_size = GetArgsSize(args ...) + header_size;
-    EXPECT_GE(static_size, sizeof(Closure));
-
-    // For static variables, no nested closure, so size must match exactly.
-    EXPECT_EQ(static_size, closure_builder.GetSize());
-
-    // Can't just 'new' the Closure since we don't know the size up front.
-    char* closure_as_char_array = new char[static_size];
-    Closure* closure_ptr = new (closure_as_char_array) Closure;
-
-    // The closure builder packs the captured variables into a Closure.
-    closure_builder.CreateInPlace(closure_ptr, lambda_method);
-
-    // Make sure the entire thing is deleted once the unique_ptr goes out of scope.
-    return std::unique_ptr<Closure>(closure_ptr);  // NOLINT [whitespace/braces] [5]
-  }
-
-  // Call the correct ClosureBuilder::CaptureVariableXYZ function based on the type of args.
-  // Invokes for each arg in args.
-  template <typename ... Args>
-  static void CaptureVariableFromArgsList(/*out*/ClosureBuilder& closure_builder, Args ... args) {
-    int ignore[] = {
-        (CaptureVariableFromArgs(/*out*/closure_builder, args),0)...  // NOLINT [whitespace/comma] [3]
-    };
-    UNUSED(ignore);
-  }
-
-  // ClosureBuilder::CaptureVariablePrimitive for types that are primitive only.
-  template <typename T>
-  typename std::enable_if<ShortyFieldTypeTraits::IsPrimitiveType<T>()>::type
-  static CaptureVariableFromArgs(/*out*/ClosureBuilder& closure_builder, T value) {
-    static_assert(ShortyFieldTypeTraits::IsPrimitiveType<T>(), "T must be a shorty primitive");
-    closure_builder.CaptureVariablePrimitive<T, ShortyFieldTypeSelectEnum<T>::value>(value);
-  }
-
-  // ClosureBuilder::CaptureVariableObject for types that are objects only.
-  template <typename T>
-  typename std::enable_if<ShortyFieldTypeTraits::IsObjectType<T>()>::type
-  static CaptureVariableFromArgs(/*out*/ClosureBuilder& closure_builder, const T* object) {
-    ScopedFakeLock fake_lock(*Locks::mutator_lock_);
-    closure_builder.CaptureVariableObject(object);
-  }
-
-  // Sum of sizeof(Args...).
-  template <typename T, typename ... Args>
-  static constexpr size_t GetArgsSize(T&& arg, Args&& ... args) {
-    return sizeof(arg) + GetArgsSize(args ...);
-  }
-
-  // Base case: Done.
-  static constexpr size_t GetArgsSize() {
-    return 0;
-  }
-
-  // Take "U" and memcpy it into a "T". T starts out as (T)0.
-  template <typename T, typename U>
-  static T ExpandingBitCast(const U& val) {
-    static_assert(sizeof(T) >= sizeof(U), "U too large");
-    T new_val = static_cast<T>(0);
-    memcpy(&new_val, &val, sizeof(U));
-    return new_val;
-  }
-
-  // Templatized extraction from closures by checking their type with enable_if.
-  template <typename T>
-  static typename std::enable_if<ShortyFieldTypeTraits::IsPrimitiveNarrowType<T>()>::type
-  ExpectCapturedVariable(const Closure* closure, size_t index, T value) {
-    EXPECT_EQ(ExpandingBitCast<uint32_t>(value), closure->GetCapturedPrimitiveNarrow(index))
-        << " with index " << index;
-  }
-
-  template <typename T>
-  static typename std::enable_if<ShortyFieldTypeTraits::IsPrimitiveWideType<T>()>::type
-  ExpectCapturedVariable(const Closure* closure, size_t index, T value) {
-    EXPECT_EQ(ExpandingBitCast<uint64_t>(value), closure->GetCapturedPrimitiveWide(index))
-        << " with index " << index;
-  }
-
-  // Templatized SFINAE for Objects so we can get better error messages.
-  template <typename T>
-  static typename std::enable_if<ShortyFieldTypeTraits::IsObjectType<T>()>::type
-  ExpectCapturedVariable(const Closure* closure, size_t index, const T* object) {
-    EXPECT_EQ(object, closure->GetCapturedObject(index))
-        << " with index " << index;
-  }
-
-  template <typename ... Args>
-  void TestPrimitive(const char *descriptor, Args ... args) {
-    const char* shorty = descriptor;
-
-    SCOPED_TRACE(descriptor);
-
-    ASSERT_EQ(strlen(shorty), sizeof...(args))
-        << "test error: descriptor must have same # of types as the # of captured variables";
-
-    // Important: This fake lambda method needs to out-live any Closures we create with it.
-    ArtLambdaMethod lambda_method{fake_method_,                    // NOLINT [whitespace/braces] [5]
-                                  descriptor,                      // NOLINT [whitespace/blank_line] [2]
-                                  shorty,
-                                 };
-
-    std::unique_ptr<Closure> closure_a;
-    std::unique_ptr<Closure> closure_b;
-
-    // Test the closure twice when it's constructed in different ways.
-    {
-      // Create the closure in a "raw" manner, that is directly with memcpy
-      // since we know the underlying data format.
-      // This simulates how the compiler would lay out the data directly.
-      SCOPED_TRACE("raw closure");
-      std::unique_ptr<Closure> closure_raw = CreateClosureStaticVariables(&lambda_method, args ...);
-
-      if (kDebuggingClosureTest) {
-        std::cerr << "closure raw address: " << closure_raw.get() << std::endl;
-      }
-      TestPrimitiveWithClosure(closure_raw.get(), descriptor, shorty, args ...);
-      closure_a = std::move(closure_raw);
-    }
-
-    {
-      // Create the closure with the ClosureBuilder, which is done indirectly.
-      // This simulates how the interpreter would create the closure dynamically at runtime.
-      SCOPED_TRACE("closure from builder");
-      std::unique_ptr<Closure> closure_built =
-          CreateClosureStaticVariablesFromBuilder(&lambda_method, args ...);
-      if (kDebuggingClosureTest) {
-        std::cerr << "closure built address: " << closure_built.get() << std::endl;
-      }
-      TestPrimitiveWithClosure(closure_built.get(), descriptor, shorty, args ...);
-      closure_b = std::move(closure_built);
-    }
-
-    // The closures should be identical memory-wise as well.
-    EXPECT_EQ(closure_a->GetSize(), closure_b->GetSize());
-    EXPECT_TRUE(memcmp(closure_a.get(),
-                       closure_b.get(),
-                       std::min(closure_a->GetSize(), closure_b->GetSize())) == 0);
-  }
-
-  template <typename ... Args>
-  static void TestPrimitiveWithClosure(Closure* closure,
-                                       const char* descriptor,
-                                       const char* shorty,
-                                       Args ... args) {
-    EXPECT_EQ(sizeof(ArtLambdaMethod*) + GetArgsSize(args...), closure->GetSize());
-    EXPECT_EQ(sizeof...(args), closure->GetNumberOfCapturedVariables());
-    EXPECT_STREQ(descriptor, closure->GetCapturedVariablesTypeDescriptor());
-    TestPrimitiveExpects(closure, shorty, /*index*/0, args ...);
-  }
-
-  // Call EXPECT_EQ for each argument in the closure's #GetCapturedX.
-  template <typename T, typename ... Args>
-  static void TestPrimitiveExpects(
-      const Closure* closure, const char* shorty, size_t index, T arg, Args ... args) {
-    ASSERT_EQ(ShortyFieldType(shorty[index]).GetStaticSize(), sizeof(T))
-        << "Test error: Type mismatch at index " << index;
-    ExpectCapturedVariable(closure, index, arg);
-    EXPECT_EQ(ShortyFieldType(shorty[index]), closure->GetCapturedShortyType(index));
-    TestPrimitiveExpects(closure, shorty, index + 1, args ...);
-  }
-
-  // Base case for EXPECT_EQ.
-  static void TestPrimitiveExpects(const Closure* closure, const char* shorty, size_t index) {
-    UNUSED(closure, shorty, index);
-  }
-
-  ArtMethod* fake_method_;
-};
-
-TEST_F(ClosureTest, TestTrivial) {
-  ArtLambdaMethod lambda_method{fake_method_,                    // NOLINT [whitespace/braces] [5]
-                                "",  // No captured variables    // NOLINT [whitespace/blank_line] [2]
-                                "",  // No captured variables
-                               };
-
-  std::unique_ptr<Closure> closure = CreateClosureStaticVariables(&lambda_method);
-
-  EXPECT_EQ(sizeof(ArtLambdaMethod*), closure->GetSize());
-  EXPECT_EQ(0u, closure->GetNumberOfCapturedVariables());
-}  // TEST_F
-
-TEST_F(ClosureTest, TestPrimitiveSingle) {
-  TestPrimitive("Z", true);
-  TestPrimitive("B", int8_t(0xde));
-  TestPrimitive("C", uint16_t(0xbeef));
-  TestPrimitive("S", int16_t(0xdead));
-  TestPrimitive("I", int32_t(0xdeadbeef));
-  TestPrimitive("F", 0.123f);
-  TestPrimitive("J", int64_t(0xdeadbeef00c0ffee));
-  TestPrimitive("D", 123.456);
-}  // TEST_F
-
-TEST_F(ClosureTest, TestPrimitiveMany) {
-  TestPrimitive("ZZ", true, false);
-  TestPrimitive("ZZZ", true, false, true);
-  TestPrimitive("BBBB", int8_t(0xde), int8_t(0xa0), int8_t(0xff), int8_t(0xcc));
-  TestPrimitive("CC", uint16_t(0xbeef), uint16_t(0xdead));
-  TestPrimitive("SSSS", int16_t(0xdead), int16_t(0xc0ff), int16_t(0xf000), int16_t(0xbaba));
-  TestPrimitive("III", int32_t(0xdeadbeef), int32_t(0xc0ffee), int32_t(0xbeefdead));
-  TestPrimitive("FF", 0.123f, 555.666f);
-  TestPrimitive("JJJ", int64_t(0xdeadbeef00c0ffee), int64_t(0x123), int64_t(0xc0ffee));
-  TestPrimitive("DD", 123.456, 777.888);
-}  // TEST_F
-
-TEST_F(ClosureTest, TestPrimitiveMixed) {
-  TestPrimitive("ZZBBCCSSIIFFJJDD",
-                true, false,
-                int8_t(0xde), int8_t(0xa0),
-                uint16_t(0xbeef), uint16_t(0xdead),
-                int16_t(0xdead), int16_t(0xc0ff),
-                int32_t(0xdeadbeef), int32_t(0xc0ffee),
-                0.123f, 555.666f,
-                int64_t(0xdeadbeef00c0ffee), int64_t(0x123),
-                123.456, 777.888);
-}  // TEST_F
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/leaking_allocator.cc b/runtime/lambda/leaking_allocator.cc
deleted file mode 100644
index 4910732..0000000
--- a/runtime/lambda/leaking_allocator.cc
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "lambda/leaking_allocator.h"
-#include "linear_alloc.h"
-#include "runtime.h"
-
-namespace art {
-namespace lambda {
-
-void* LeakingAllocator::AllocateMemory(Thread* self, size_t byte_size) {
-  // TODO: use GetAllocatorForClassLoader to allocate lambda ArtMethod data.
-  return Runtime::Current()->GetLinearAlloc()->Alloc(self, byte_size);
-}
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/leaking_allocator.h b/runtime/lambda/leaking_allocator.h
deleted file mode 100644
index c3222d0..0000000
--- a/runtime/lambda/leaking_allocator.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_
-#define ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_
-
-#include <utility>  // std::forward
-
-namespace art {
-class Thread;  // forward declaration
-
-namespace lambda {
-
-// Temporary class to centralize all the leaking allocations.
-// Allocations made through this class are never freed, but it is a placeholder
-// that means that the calling code needs to be rewritten to properly:
-//
-// (a) Have a lifetime scoped to some other entity.
-// (b) Not be allocated over and over again if it was already allocated once (immutable data).
-//
-// TODO: do all of the above a/b for each callsite, and delete this class.
-class LeakingAllocator {
- public:
-  // Allocate byte_size bytes worth of memory. Never freed.
-  static void* AllocateMemory(Thread* self, size_t byte_size);
-
-  // Make a new instance of T, flexibly sized, in-place at newly allocated memory. Never freed.
-  template <typename T, typename... Args>
-  static T* MakeFlexibleInstance(Thread* self, size_t byte_size, Args&&... args) {
-    return new (AllocateMemory(self, byte_size)) T(std::forward<Args>(args)...);
-  }
-
-  // Make a new instance of T in-place at newly allocated memory. Never freed.
-  template <typename T, typename... Args>
-  static T* MakeInstance(Thread* self, Args&&... args) {
-    return new (AllocateMemory(self, sizeof(T))) T(std::forward<Args>(args)...);
-  }
-};
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_
diff --git a/runtime/lambda/shorty_field_type.h b/runtime/lambda/shorty_field_type.h
deleted file mode 100644
index 46ddaa9..0000000
--- a/runtime/lambda/shorty_field_type.h
+++ /dev/null
@@ -1,475 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_SHORTY_FIELD_TYPE_H_
-#define ART_RUNTIME_LAMBDA_SHORTY_FIELD_TYPE_H_
-
-#include "base/logging.h"
-#include "base/macros.h"
-#include "base/value_object.h"
-#include "globals.h"
-#include "runtime/primitive.h"
-
-#include <ostream>
-
-namespace art {
-
-namespace mirror {
-class Object;  // forward declaration
-}  // namespace mirror
-
-namespace lambda {
-
-struct Closure;  // forward declaration
-
-// TODO: Refactor together with primitive.h
-
-// The short form of a field type descriptor. Corresponds to ShortyFieldType in dex specification.
-// Only types usable by a field (and locals) are allowed (i.e. no void type).
-// Note that arrays and objects are treated both as 'L'.
-//
-// This is effectively a 'char' enum-like zero-cost type-safe wrapper with extra helper functions.
-struct ShortyFieldType : ValueObject {
-  // Use as if this was an enum class, e.g. 'ShortyFieldType::kBoolean'.
-  enum : char {
-    // Primitives (Narrow):
-    kBoolean = 'Z',
-    kByte = 'B',
-    kChar = 'C',
-    kShort = 'S',
-    kInt = 'I',
-    kFloat = 'F',
-    // Primitives (Wide):
-    kLong = 'J',
-    kDouble = 'D',
-    // Managed types:
-    kObject = 'L',  // This can also be an array (which is otherwise '[' in a non-shorty).
-    kLambda = '\\',
-  };  // NOTE: This is an anonymous enum so we can get exhaustive switch checking from the compiler.
-
-  // Implicitly construct from the enum above. Value must be one of the enum list members above.
-  // Always safe to use, does not do any DCHECKs.
-  inline constexpr ShortyFieldType(decltype(kByte) c) : value_(c) {
-  }
-
-  // Default constructor. The initial value is undefined. Initialize before calling methods.
-  // This is very unsafe but exists as a convenience to having undefined values.
-  explicit ShortyFieldType() : value_(StaticCastValue(0)) {
-  }
-
-  // Explicitly construct from a char. Value must be one of the enum list members above.
-  // Conversion is potentially unsafe, so DCHECKing is performed.
-  explicit inline ShortyFieldType(char c) : value_(StaticCastValue(c)) {
-    if (kIsDebugBuild) {
-      // Verify at debug-time that our conversion is safe.
-      ShortyFieldType ignored;
-      DCHECK(MaybeCreate(c, &ignored)) << "unknown shorty field type '" << c << "'";
-    }
-  }
-
-  // Attempts to parse the character in 'shorty_field_type' into its strongly typed version.
-  // Returns false if the character was out of range of the grammar.
-  static bool MaybeCreate(char shorty_field_type, ShortyFieldType* out) {
-    DCHECK(out != nullptr);
-    switch (shorty_field_type) {
-      case kBoolean:
-      case kByte:
-      case kChar:
-      case kShort:
-      case kInt:
-      case kFloat:
-      case kLong:
-      case kDouble:
-      case kObject:
-      case kLambda:
-        *out = ShortyFieldType(static_cast<decltype(kByte)>(shorty_field_type));
-        return true;
-      default:
-        break;
-    }
-
-    return false;
-  }
-
-  // Convert the first type in a field type descriptor string into a shorty.
-  // Arrays are converted into objects.
-  // Does not work for 'void' types (as they are illegal in a field type descriptor).
-  static ShortyFieldType CreateFromFieldTypeDescriptor(const char* field_type_descriptor) {
-    DCHECK(field_type_descriptor != nullptr);
-    char c = *field_type_descriptor;
-    if (UNLIKELY(c == kArray)) {  // Arrays are treated as object references.
-      c = kObject;
-    }
-    return ShortyFieldType{c};  // NOLINT [readability/braces] [4]
-  }
-
-  // Parse the first type in the field type descriptor string into a shorty.
-  // See CreateFromFieldTypeDescriptor for more details.
-  //
-  // Returns the pointer offset into the middle of the field_type_descriptor
-  // that would either point to the next shorty type, or to null if there are
-  // no more types.
-  //
-  // DCHECKs that each of the nested types is a valid shorty field type. This
-  // means the type descriptor must be already valid.
-  static const char* ParseFromFieldTypeDescriptor(const char* field_type_descriptor,
-                                                  ShortyFieldType* out_type) {
-    DCHECK(field_type_descriptor != nullptr);
-
-    if (UNLIKELY(field_type_descriptor[0] == '\0')) {
-      // Handle empty strings by immediately returning null.
-      return nullptr;
-    }
-
-    // All non-empty strings must be a valid list of field type descriptors, otherwise
-    // the DCHECKs will kick in and the program will crash.
-    const char shorter_type = *field_type_descriptor;
-
-    ShortyFieldType safe_type;
-    bool type_set = MaybeCreate(shorter_type, &safe_type);
-
-    // Lambda that keeps skipping characters until it sees ';'.
-    // Stops one character -after- the ';'.
-    auto skip_until_semicolon = [&field_type_descriptor]() {
-      while (*field_type_descriptor != ';' && *field_type_descriptor != '\0') {
-        ++field_type_descriptor;
-      }
-      DCHECK_NE(*field_type_descriptor, '\0')
-          << " type descriptor terminated too early: " << field_type_descriptor;
-      ++field_type_descriptor;  // Skip the ';'
-    };
-
-    ++field_type_descriptor;
-    switch (shorter_type) {
-      case kObject:
-        skip_until_semicolon();
-
-        DCHECK(type_set);
-        DCHECK(safe_type == kObject);
-        break;
-      case kArray:
-        // Strip out all of the leading [[[[[s, we don't care if it's a multi-dimensional array.
-        while (*field_type_descriptor == '[' && *field_type_descriptor != '\0') {
-          ++field_type_descriptor;
-        }
-        DCHECK_NE(*field_type_descriptor, '\0')
-            << " type descriptor terminated too early: " << field_type_descriptor;
-        // Either a primitive, object, or closure left. No more arrays.
-        {
-          // Now skip all the characters that form the array's interior-most element type
-          // (which itself is guaranteed not to be an array).
-          ShortyFieldType array_interior_type;
-          type_set = MaybeCreate(*field_type_descriptor, &array_interior_type);
-          DCHECK(type_set) << " invalid remaining type descriptor " << field_type_descriptor;
-
-          // Handle array-of-objects case like [[[[[LObject; and array-of-closures like [[[[[\Foo;
-          if (*field_type_descriptor == kObject || *field_type_descriptor == kLambda) {
-            skip_until_semicolon();
-          } else {
-            // Handle primitives which are exactly one character we can skip.
-            DCHECK(array_interior_type.IsPrimitive());
-            ++field_type_descriptor;
-          }
-        }
-
-        safe_type = kObject;
-        type_set = true;
-        break;
-      case kLambda:
-        skip_until_semicolon();
-
-        DCHECK(safe_type == kLambda);
-        DCHECK(type_set);
-        break;
-      default:
-        DCHECK_NE(kVoid, shorter_type) << "cannot make a ShortyFieldType from a void type";
-        break;
-    }
-
-    DCHECK(type_set) << "invalid shorty type descriptor " << shorter_type;
-
-    *out_type = safe_type;
-    return type_set ? field_type_descriptor : nullptr;
-  }
-
-  // Explicitly convert to a char.
-  inline explicit operator char() const {
-    return value_;
-  }
-
-  // Is this a primitive?
-  inline bool IsPrimitive() const {
-    return IsPrimitiveNarrow() || IsPrimitiveWide();
-  }
-
-  // Is this a narrow primitive (i.e. can fit into 1 virtual register)?
-  inline bool IsPrimitiveNarrow() const {
-    switch (value_) {
-      case kBoolean:
-      case kByte:
-      case kChar:
-      case kShort:
-      case kInt:
-      case kFloat:
-        return true;
-      default:
-        return false;
-    }
-  }
-
-  // Is this a wide primitive (i.e. needs exactly 2 virtual registers)?
-  inline bool IsPrimitiveWide() const {
-    switch (value_) {
-      case kLong:
-      case kDouble:
-        return true;
-      default:
-        return false;
-    }
-  }
-
-  // Is this an object reference (which can also be an array)?
-  inline bool IsObject() const {
-    return value_ == kObject;
-  }
-
-  // Is this a lambda?
-  inline bool IsLambda() const {
-    return value_ == kLambda;
-  }
-
-  // Is the size of this (to store inline as a field) always known at compile-time?
-  inline bool IsStaticSize() const {
-    return !IsLambda();
-  }
-
-  // Get the compile-time size (to be able to store it inline as a field or on stack).
-  // Dynamically-sized values such as lambdas return the guaranteed lower bound.
-  inline size_t GetStaticSize() const {
-    switch (value_) {
-      case kBoolean:
-        return sizeof(bool);
-      case kByte:
-        return sizeof(uint8_t);
-      case kChar:
-        return sizeof(int16_t);
-      case kShort:
-        return sizeof(uint16_t);
-      case kInt:
-        return sizeof(int32_t);
-      case kLong:
-        return sizeof(int64_t);
-      case kFloat:
-        return sizeof(float);
-      case kDouble:
-        return sizeof(double);
-      case kObject:
-        return kObjectReferenceSize;
-      case kLambda:
-        return sizeof(void*);  // Large enough to store the ArtLambdaMethod
-      default:
-        DCHECK(false) << "unknown shorty field type '" << static_cast<char>(value_) << "'";
-        UNREACHABLE();
-    }
-  }
-
-  // Implicitly convert to the anonymous nested inner type. Used for exhaustive switch detection.
-  inline operator decltype(kByte)() const {
-    return value_;
-  }
-
-  // Returns a read-only static string representing the enum name, useful for printing/debug only.
-  inline const char* ToString() const {
-    switch (value_) {
-      case kBoolean:
-        return "kBoolean";
-      case kByte:
-        return "kByte";
-      case kChar:
-        return "kChar";
-      case kShort:
-        return "kShort";
-      case kInt:
-        return "kInt";
-      case kLong:
-        return "kLong";
-      case kFloat:
-        return "kFloat";
-      case kDouble:
-        return "kDouble";
-      case kObject:
-        return "kObject";
-      case kLambda:
-        return "kLambda";
-      default:
-        // Undefined behavior if we get this far. Pray the compiler gods are merciful.
-        return "<undefined>";
-    }
-  }
-
- private:
-  static constexpr const char kArray = '[';
-  static constexpr const char kVoid  = 'V';
-
-  // Helper to statically cast anything into our nested anonymous enum type.
-  template <typename T>
-  inline static decltype(kByte) StaticCastValue(const T& anything) {
-    return static_cast<decltype(value_)>(anything);
-  }
-
-  // The only field in this struct.
-  decltype(kByte) value_;
-};
-
-
-  // Print to an output stream.
-inline std::ostream& operator<<(std::ostream& ostream, ShortyFieldType shorty) {
-  return ostream << shorty.ToString();
-}
-
-static_assert(sizeof(ShortyFieldType) == sizeof(char),
-              "ShortyFieldType must be lightweight just like a char");
-
-// Compile-time trait information regarding the ShortyFieldType.
-// Used by static_asserts to verify that the templates are correctly used at compile-time.
-//
-// For example,
-//     ShortyFieldTypeTraits::IsPrimitiveNarrowType<int64_t>() == true
-//     ShortyFieldTypeTraits::IsObjectType<mirror::Object*>() == true
-struct ShortyFieldTypeTraits {
-  // A type guaranteed to be large enough to holds any of the shorty field types.
-  using MaxType = uint64_t;
-
-  // Type traits: Returns true if 'T' is a valid type that can be represented by a shorty field type.
-  template <typename T>
-  static inline constexpr bool IsType() {
-    return IsPrimitiveType<T>() || IsObjectType<T>() || IsLambdaType<T>();
-  }
-
-  // Returns true if 'T' is a primitive type (i.e. a built-in without nested references).
-  template <typename T>
-  static inline constexpr bool IsPrimitiveType() {
-    return IsPrimitiveNarrowType<T>() || IsPrimitiveWideType<T>();
-  }
-
-  // Returns true if 'T' is a primitive type that is narrow (i.e. can be stored into 1 vreg).
-  template <typename T>
-  static inline constexpr bool IsPrimitiveNarrowType() {
-    return IsPrimitiveNarrowTypeImpl(static_cast<T* const>(nullptr));
-  }
-
-  // Returns true if 'T' is a primitive type that is wide (i.e. needs 2 vregs for storage).
-  template <typename T>
-  static inline constexpr bool IsPrimitiveWideType() {
-    return IsPrimitiveWideTypeImpl(static_cast<T* const>(nullptr));
-  }
-
-  // Returns true if 'T' is an object (i.e. it is a managed GC reference).
-  // Note: This is equivalent to std::base_of<mirror::Object*, T>::value
-  template <typename T>
-  static inline constexpr bool IsObjectType() {
-    return IsObjectTypeImpl(static_cast<T* const>(nullptr));
-  }
-
-  // Returns true if 'T' is a lambda (i.e. it is a closure with unknown static data);
-  template <typename T>
-  static inline constexpr bool IsLambdaType() {
-    return IsLambdaTypeImpl(static_cast<T* const>(nullptr));
-  }
-
- private:
-#define IS_VALID_TYPE_SPECIALIZATION(type, name) \
-  static inline constexpr bool Is ## name ## TypeImpl(type* const  = 0) { \
-    return true; \
-  } \
-  \
-  static_assert(sizeof(MaxType) >= sizeof(type), "MaxType too small")
-
-  IS_VALID_TYPE_SPECIALIZATION(bool, PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION(int8_t, PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION(uint8_t, PrimitiveNarrow);  // Not strictly true, but close enough.
-  IS_VALID_TYPE_SPECIALIZATION(int16_t, PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION(uint16_t, PrimitiveNarrow);  // Chars are unsigned.
-  IS_VALID_TYPE_SPECIALIZATION(int32_t, PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION(uint32_t, PrimitiveNarrow);  // Not strictly true, but close enough.
-  IS_VALID_TYPE_SPECIALIZATION(float, PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION(int64_t, PrimitiveWide);
-  IS_VALID_TYPE_SPECIALIZATION(uint64_t, PrimitiveWide);  // Not strictly true, but close enough.
-  IS_VALID_TYPE_SPECIALIZATION(double, PrimitiveWide);
-  IS_VALID_TYPE_SPECIALIZATION(mirror::Object*, Object);
-  IS_VALID_TYPE_SPECIALIZATION(Closure*, Lambda);
-#undef IS_VALID_TYPE_SPECIALIZATION
-
-#define IS_VALID_TYPE_SPECIALIZATION_IMPL(name) \
-  template <typename T> \
-  static inline constexpr bool Is ## name ## TypeImpl(T* const = 0) { \
-    return false; \
-  }
-
-  IS_VALID_TYPE_SPECIALIZATION_IMPL(PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION_IMPL(PrimitiveWide);
-  IS_VALID_TYPE_SPECIALIZATION_IMPL(Object);
-  IS_VALID_TYPE_SPECIALIZATION_IMPL(Lambda);
-
-#undef IS_VALID_TYPE_SPECIALIZATION_IMPL
-};
-
-// Maps the ShortyFieldType enum into it's C++ type equivalent, into the "type" typedef.
-// For example:
-//     ShortyFieldTypeSelectType<ShortyFieldType::kBoolean>::type => bool
-//     ShortyFieldTypeSelectType<ShortyFieldType::kLong>::type => int64_t
-//
-// Invalid enums will not have the type defined.
-template <decltype(ShortyFieldType::kByte) Shorty>
-struct ShortyFieldTypeSelectType {
-};
-
-// Maps the C++ type into it's ShortyFieldType enum equivalent, into the "value" constexpr.
-// For example:
-//     ShortyFieldTypeSelectEnum<bool>::value => ShortyFieldType::kBoolean
-//     ShortyFieldTypeSelectEnum<int64_t>::value => ShortyFieldType::kLong
-//
-// Signed-ness must match for a valid select, e.g. uint64_t will not map to kLong, but int64_t will.
-// Invalid types will not have the value defined (see e.g. ShortyFieldTypeTraits::IsType<T>())
-template <typename T>
-struct ShortyFieldTypeSelectEnum {
-};
-
-#define SHORTY_FIELD_TYPE_SELECT_IMPL(cpp_type, enum_element)      \
-template <> \
-struct ShortyFieldTypeSelectType<ShortyFieldType::enum_element> { \
-  using type = cpp_type; \
-}; \
-\
-template <> \
-struct ShortyFieldTypeSelectEnum<cpp_type> { \
-  static constexpr const auto value = ShortyFieldType::enum_element; \
-}; \
-
-SHORTY_FIELD_TYPE_SELECT_IMPL(bool, kBoolean);
-SHORTY_FIELD_TYPE_SELECT_IMPL(int8_t, kByte);
-SHORTY_FIELD_TYPE_SELECT_IMPL(int16_t, kShort);
-SHORTY_FIELD_TYPE_SELECT_IMPL(uint16_t, kChar);
-SHORTY_FIELD_TYPE_SELECT_IMPL(int32_t, kInt);
-SHORTY_FIELD_TYPE_SELECT_IMPL(float, kFloat);
-SHORTY_FIELD_TYPE_SELECT_IMPL(int64_t, kLong);
-SHORTY_FIELD_TYPE_SELECT_IMPL(double, kDouble);
-SHORTY_FIELD_TYPE_SELECT_IMPL(mirror::Object*, kObject);
-SHORTY_FIELD_TYPE_SELECT_IMPL(Closure*, kLambda);
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_SHORTY_FIELD_TYPE_H_
diff --git a/runtime/lambda/shorty_field_type_test.cc b/runtime/lambda/shorty_field_type_test.cc
deleted file mode 100644
index 32bade9..0000000
--- a/runtime/lambda/shorty_field_type_test.cc
+++ /dev/null
@@ -1,354 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "lambda/shorty_field_type.h"
-#include "mirror/object_reference.h"
-
-#include "utils.h"
-#include <numeric>
-#include <stdint.h>
-#include "gtest/gtest.h"
-
-#define EXPECT_NULL(expected) EXPECT_EQ(reinterpret_cast<const void*>(expected), \
-                                        reinterpret_cast<void*>(nullptr));
-
-namespace art {
-namespace lambda {
-
-class ShortyFieldTypeTest : public ::testing::Test {
- public:
-  ShortyFieldTypeTest() = default;
-  ~ShortyFieldTypeTest() = default;
-
- protected:
-  static void SetUpTestCase() {
-  }
-
-  virtual void SetUp() {
-  }
-
-  static ::testing::AssertionResult IsResultSuccessful(bool result) {
-    if (result) {
-      return ::testing::AssertionSuccess();
-    } else {
-      return ::testing::AssertionFailure();
-    }
-  }
-
-  template <typename T>
-  static std::string ListToString(const T& list) {
-    std::stringstream stream;
-
-    stream << "[";
-    for (auto&& val : list) {
-      stream << val << ", ";
-    }
-    stream << "]";
-
-    return stream.str();
-  }
-
-  // Compare two vector-like types for equality.
-  template <typename T>
-  static ::testing::AssertionResult AreListsEqual(const T& expected, const T& actual) {
-    bool success = true;
-    std::stringstream stream;
-
-    if (expected.size() != actual.size()) {
-      success = false;
-      stream << "Expected list size: " << expected.size()
-             << ", but got list size: " << actual.size();
-      stream << std::endl;
-    }
-
-    for (size_t j = 0; j < std::min(expected.size(), actual.size()); ++j) {
-      if (expected[j] != actual[j]) {
-        success = false;
-        stream << "Expected element '" << j << "' to be '" << expected[j] << "', but got actual: '"
-               << actual[j] << "'.";
-        stream << std::endl;
-      }
-    }
-
-    if (success) {
-      return ::testing::AssertionSuccess();
-    }
-
-    stream << "Expected list was: " << ListToString(expected)
-           << ", actual list was: " << ListToString(actual);
-
-    return ::testing::AssertionFailure() << stream.str();
-  }
-
-  static std::vector<ShortyFieldType> ParseLongTypeDescriptorsToList(const char* type_descriptor) {
-    std::vector<ShortyFieldType> lst;
-
-    ShortyFieldType shorty;
-
-    const char* parsed = type_descriptor;
-    while ((parsed = ShortyFieldType::ParseFromFieldTypeDescriptor(parsed, &shorty)) != nullptr) {
-      lst.push_back(shorty);
-    }
-
-    return lst;
-  }
-
- protected:
-  // Shorthands for the ShortyFieldType constants.
-  // The letters are the same as JNI letters, with kS_ being a lambda since \ is not available.
-  static constexpr ShortyFieldType kSZ = ShortyFieldType::kBoolean;
-  static constexpr ShortyFieldType kSB = ShortyFieldType::kByte;
-  static constexpr ShortyFieldType kSC = ShortyFieldType::kChar;
-  static constexpr ShortyFieldType kSS = ShortyFieldType::kShort;
-  static constexpr ShortyFieldType kSI = ShortyFieldType::kInt;
-  static constexpr ShortyFieldType kSF = ShortyFieldType::kFloat;
-  static constexpr ShortyFieldType kSJ = ShortyFieldType::kLong;
-  static constexpr ShortyFieldType kSD = ShortyFieldType::kDouble;
-  static constexpr ShortyFieldType kSL = ShortyFieldType::kObject;
-  static constexpr ShortyFieldType kS_ = ShortyFieldType::kLambda;
-};
-
-TEST_F(ShortyFieldTypeTest, TestMaybeCreate) {
-  ShortyFieldType shorty;
-
-  std::vector<char> shorties = {'Z', 'B', 'C', 'S', 'I', 'F', 'J', 'D', 'L', '\\'};
-
-  // All valid 'shorty' characters are created successfully.
-  for (const char c : shorties) {
-    EXPECT_TRUE(ShortyFieldType::MaybeCreate(c, &shorty)) << c;
-    EXPECT_EQ(c, static_cast<char>(c));
-  }
-
-  // All other characters can never be created.
-  for (unsigned char c = 0; c < std::numeric_limits<unsigned char>::max(); ++c) {
-    // Skip the valid characters.
-    if (std::find(shorties.begin(), shorties.end(), c) != shorties.end()) { continue; }
-    // All invalid characters should fail.
-    EXPECT_FALSE(ShortyFieldType::MaybeCreate(static_cast<char>(c), &shorty)) << c;
-  }
-}  // TEST_F
-
-TEST_F(ShortyFieldTypeTest, TestCreateFromFieldTypeDescriptor) {
-  // Sample input.
-  std::vector<const char*> lengthies = {
-      "Z", "B", "C", "S", "I", "F", "J", "D", "LObject;", "\\Closure;",
-      "[Z", "[[B", "[[LObject;"
-  };
-
-  // Expected output.
-  std::vector<ShortyFieldType> expected = {
-      ShortyFieldType::kBoolean,
-      ShortyFieldType::kByte,
-      ShortyFieldType::kChar,
-      ShortyFieldType::kShort,
-      ShortyFieldType::kInt,
-      ShortyFieldType::kFloat,
-      ShortyFieldType::kLong,
-      ShortyFieldType::kDouble,
-      ShortyFieldType::kObject,
-      ShortyFieldType::kLambda,
-      // Arrays are always treated as objects.
-      ShortyFieldType::kObject,
-      ShortyFieldType::kObject,
-      ShortyFieldType::kObject,
-  };
-
-  // All valid lengthy types are correctly turned into the expected shorty type.
-  for (size_t i = 0; i < lengthies.size(); ++i) {
-    EXPECT_EQ(expected[i], ShortyFieldType::CreateFromFieldTypeDescriptor(lengthies[i]));
-  }
-}  // TEST_F
-
-TEST_F(ShortyFieldTypeTest, TestParseFromFieldTypeDescriptor) {
-  // Sample input.
-  std::vector<const char*> lengthies = {
-      // Empty list
-      "",
-      // Primitives
-      "Z", "B", "C", "S", "I", "F", "J", "D",
-      // Non-primitives
-      "LObject;", "\\Closure;",
-      // Arrays. The biggest PITA.
-      "[Z", "[[B", "[[LObject;", "[[[[\\Closure;",
-      // Multiple things at once:
-      "ZBCSIFJD",
-      "LObject;LObject;SSI",
-      "[[ZDDZ",
-      "[[LObject;[[Z[F\\Closure;LObject;",
-  };
-
-  // Expected output.
-  std::vector<std::vector<ShortyFieldType>> expected = {
-      // Empty list
-      {},
-      // Primitives
-      {kSZ}, {kSB}, {kSC}, {kSS}, {kSI}, {kSF}, {kSJ}, {kSD},
-      // Non-primitives.
-      { ShortyFieldType::kObject }, { ShortyFieldType::kLambda },
-      // Arrays are always treated as objects.
-      { kSL }, { kSL }, { kSL }, { kSL },
-      // Multiple things at once:
-      { kSZ, kSB, kSC, kSS, kSI, kSF, kSJ, kSD },
-      { kSL, kSL, kSS, kSS, kSI },
-      { kSL, kSD, kSD, kSZ },
-      { kSL, kSL, kSL, kS_, kSL },
-  };
-
-  // Sanity check that the expected/actual lists are the same size.. when adding new entries.
-  ASSERT_EQ(expected.size(), lengthies.size());
-
-  // All valid lengthy types are correctly turned into the expected shorty type.
-  for (size_t i = 0; i < expected.size(); ++i) {
-    const std::vector<ShortyFieldType>& expected_list = expected[i];
-    std::vector<ShortyFieldType> actual_list = ParseLongTypeDescriptorsToList(lengthies[i]);
-    EXPECT_TRUE(AreListsEqual(expected_list, actual_list));
-  }
-}  // TEST_F
-
-// Helper class to probe a shorty's characteristics by minimizing copy-and-paste tests.
-template <typename T, decltype(ShortyFieldType::kByte) kShortyEnum>
-struct ShortyTypeCharacteristics {
-  bool is_primitive_ = false;
-  bool is_primitive_narrow_ = false;
-  bool is_primitive_wide_ = false;
-  bool is_object_ = false;
-  bool is_lambda_ = false;
-  size_t size_ = sizeof(T);
-  bool is_dynamic_sized_ = false;
-
-  void CheckExpects() {
-    ShortyFieldType shorty = kShortyEnum;
-
-    // Test the main non-parsing-related ShortyFieldType characteristics.
-    EXPECT_EQ(is_primitive_, shorty.IsPrimitive());
-    EXPECT_EQ(is_primitive_narrow_, shorty.IsPrimitiveNarrow());
-    EXPECT_EQ(is_primitive_wide_, shorty.IsPrimitiveWide());
-    EXPECT_EQ(is_object_, shorty.IsObject());
-    EXPECT_EQ(is_lambda_, shorty.IsLambda());
-    EXPECT_EQ(size_, shorty.GetStaticSize());
-    EXPECT_EQ(is_dynamic_sized_, !shorty.IsStaticSize());
-
-    // Test compile-time ShortyFieldTypeTraits.
-    EXPECT_TRUE(ShortyFieldTypeTraits::IsType<T>());
-    EXPECT_EQ(is_primitive_, ShortyFieldTypeTraits::IsPrimitiveType<T>());
-    EXPECT_EQ(is_primitive_narrow_, ShortyFieldTypeTraits::IsPrimitiveNarrowType<T>());
-    EXPECT_EQ(is_primitive_wide_, ShortyFieldTypeTraits::IsPrimitiveWideType<T>());
-    EXPECT_EQ(is_object_, ShortyFieldTypeTraits::IsObjectType<T>());
-    EXPECT_EQ(is_lambda_, ShortyFieldTypeTraits::IsLambdaType<T>());
-
-    // Test compile-time ShortyFieldType selectors
-    static_assert(std::is_same<T, typename ShortyFieldTypeSelectType<kShortyEnum>::type>::value,
-                  "ShortyFieldType Enum->Type incorrect mapping");
-    auto kActualEnum = ShortyFieldTypeSelectEnum<T>::value;  // Do not ODR-use, avoid linker error.
-    EXPECT_EQ(kShortyEnum, kActualEnum);
-  }
-};
-
-TEST_F(ShortyFieldTypeTest, TestCharacteristicsAndTraits) {
-  // Boolean test
-  {
-    SCOPED_TRACE("boolean");
-    ShortyTypeCharacteristics<bool, ShortyFieldType::kBoolean> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Byte test
-  {
-    SCOPED_TRACE("byte");
-    ShortyTypeCharacteristics<int8_t, ShortyFieldType::kByte> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Char test
-  {
-    SCOPED_TRACE("char");
-    ShortyTypeCharacteristics<uint16_t, ShortyFieldType::kChar> chars;  // Char is unsigned.
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Short test
-  {
-    SCOPED_TRACE("short");
-    ShortyTypeCharacteristics<int16_t, ShortyFieldType::kShort> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Int test
-  {
-    SCOPED_TRACE("int");
-    ShortyTypeCharacteristics<int32_t, ShortyFieldType::kInt> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Long test
-  {
-    SCOPED_TRACE("long");
-    ShortyTypeCharacteristics<int64_t, ShortyFieldType::kLong> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_wide_ = true;
-    chars.CheckExpects();
-  }
-
-  // Float test
-  {
-    SCOPED_TRACE("float");
-    ShortyTypeCharacteristics<float, ShortyFieldType::kFloat> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Double test
-  {
-    SCOPED_TRACE("double");
-    ShortyTypeCharacteristics<double, ShortyFieldType::kDouble> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_wide_ = true;
-    chars.CheckExpects();
-  }
-
-  // Object test
-  {
-    SCOPED_TRACE("object");
-    ShortyTypeCharacteristics<mirror::Object*, ShortyFieldType::kObject> chars;
-    chars.is_object_ = true;
-    chars.size_ = kObjectReferenceSize;
-    chars.CheckExpects();
-    EXPECT_EQ(kObjectReferenceSize, sizeof(mirror::CompressedReference<mirror::Object>));
-  }
-
-  // Lambda test
-  {
-    SCOPED_TRACE("lambda");
-    ShortyTypeCharacteristics<Closure*, ShortyFieldType::kLambda> chars;
-    chars.is_lambda_ = true;
-    chars.is_dynamic_sized_ = true;
-    chars.CheckExpects();
-  }
-}
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/length_prefixed_array.h b/runtime/length_prefixed_array.h
deleted file mode 100644
index 0ff6d7a..0000000
--- a/runtime/length_prefixed_array.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_LENGTH_PREFIXED_ARRAY_H_
-#define ART_RUNTIME_LENGTH_PREFIXED_ARRAY_H_
-
-#include <stddef.h>  // for offsetof()
-
-#include "stride_iterator.h"
-#include "base/bit_utils.h"
-#include "base/casts.h"
-#include "base/iteration_range.h"
-
-namespace art {
-
-template<typename T>
-class LengthPrefixedArray {
- public:
-  explicit LengthPrefixedArray(size_t length)
-      : length_(dchecked_integral_cast<uint32_t>(length)) {}
-
-  T& At(size_t index, size_t element_size = sizeof(T), size_t alignment = alignof(T)) {
-    DCHECK_LT(index, length_);
-    return AtUnchecked(index, element_size, alignment);
-  }
-
-  StrideIterator<T> Begin(size_t element_size = sizeof(T), size_t alignment = alignof(T)) {
-    return StrideIterator<T>(&AtUnchecked(0, element_size, alignment), element_size);
-  }
-
-  StrideIterator<T> End(size_t element_size = sizeof(T), size_t alignment = alignof(T)) {
-    return StrideIterator<T>(&AtUnchecked(length_, element_size, alignment), element_size);
-  }
-
-  static size_t OffsetOfElement(size_t index,
-                                size_t element_size = sizeof(T),
-                                size_t alignment = alignof(T)) {
-    DCHECK_ALIGNED_PARAM(element_size, alignment);
-    return RoundUp(offsetof(LengthPrefixedArray<T>, data), alignment) + index * element_size;
-  }
-
-  static size_t ComputeSize(size_t num_elements,
-                            size_t element_size = sizeof(T),
-                            size_t alignment = alignof(T)) {
-    size_t result = OffsetOfElement(num_elements, element_size, alignment);
-    DCHECK_ALIGNED_PARAM(result, alignment);
-    return result;
-  }
-
-  uint64_t Length() const {
-    return length_;
-  }
-
-  // Update the length but does not reallocate storage.
-  void SetLength(size_t length) {
-    length_ = dchecked_integral_cast<uint32_t>(length);
-  }
-
- private:
-  T& AtUnchecked(size_t index, size_t element_size, size_t alignment) {
-    return *reinterpret_cast<T*>(
-        reinterpret_cast<uintptr_t>(this) + OffsetOfElement(index, element_size, alignment));
-  }
-
-  uint32_t length_;
-  uint8_t data[0];
-};
-
-// Returns empty iteration range if the array is null.
-template<typename T>
-IterationRange<StrideIterator<T>> MakeIterationRangeFromLengthPrefixedArray(
-    LengthPrefixedArray<T>* arr, size_t element_size = sizeof(T), size_t alignment = alignof(T)) {
-  return arr != nullptr ?
-      MakeIterationRange(arr->Begin(element_size, alignment), arr->End(element_size, alignment)) :
-      MakeEmptyIterationRange(StrideIterator<T>(nullptr, 0));
-}
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_LENGTH_PREFIXED_ARRAY_H_
diff --git a/runtime/linear_alloc.cc b/runtime/linear_alloc.cc
index 43e81d9..f91b0ed 100644
--- a/runtime/linear_alloc.cc
+++ b/runtime/linear_alloc.cc
@@ -48,4 +48,8 @@
   return allocator_.Contains(ptr);
 }
 
+bool LinearAlloc::ContainsUnsafe(void* ptr) const {
+  return allocator_.Contains(ptr);
+}
+
 }  // namespace art
diff --git a/runtime/linear_alloc.h b/runtime/linear_alloc.h
index 1b21527..df7f17d 100644
--- a/runtime/linear_alloc.h
+++ b/runtime/linear_alloc.h
@@ -47,6 +47,10 @@
   // Return true if the linear alloc contrains an address.
   bool Contains(void* ptr) const REQUIRES(!lock_);
 
+  // Unsafe version of 'Contains' only to be used when the allocator is going
+  // to be deleted.
+  bool ContainsUnsafe(void* ptr) const NO_THREAD_SAFETY_ANALYSIS;
+
  private:
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   ArenaAllocator allocator_ GUARDED_BY(lock_);
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
index 341501b..4a2a293 100644
--- a/runtime/lock_word-inl.h
+++ b/runtime/lock_word-inl.h
@@ -43,17 +43,15 @@
 
 inline size_t LockWord::ForwardingAddress() const {
   DCHECK_EQ(GetState(), kForwardingAddress);
-  return value_ << kStateSize;
+  return value_ << kForwardingAddressShift;
 }
 
 inline LockWord::LockWord() : value_(0) {
   DCHECK_EQ(GetState(), kUnlocked);
 }
 
-inline LockWord::LockWord(Monitor* mon, uint32_t rb_state)
-    : value_(mon->GetMonitorId() | (rb_state << kReadBarrierStateShift) |
-             (kStateFat << kStateShift)) {
-  DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
+inline LockWord::LockWord(Monitor* mon, uint32_t gc_state)
+    : value_(mon->GetMonitorId() | (gc_state << kGCStateShift) | (kStateFat << kStateShift)) {
 #ifndef __LP64__
   DCHECK_ALIGNED(mon, kMonitorIdAlignment);
 #endif
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 5d0d204..538b6eb 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -35,27 +35,27 @@
  * the state. The four possible states are fat locked, thin/unlocked, hash code, and forwarding
  * address. When the lock word is in the "thin" state and its bits are formatted as follows:
  *
- *  |33|22|222222221111|1111110000000000|
- *  |10|98|765432109876|5432109876543210|
- *  |00|rb| lock count |thread id owner |
+ *  |33|2|2|222222221111|1111110000000000|
+ *  |10|9|8|765432109876|5432109876543210|
+ *  |00|m|r| lock count |thread id owner |
  *
  * When the lock word is in the "fat" state and its bits are formatted as follows:
  *
- *  |33|22|2222222211111111110000000000|
- *  |10|98|7654321098765432109876543210|
- *  |01|rb| MonitorId                  |
+ *  |33|2|2|2222222211111111110000000000|
+ *  |10|9|8|7654321098765432109876543210|
+ *  |01|m|r| MonitorId                  |
  *
  * When the lock word is in hash state and its bits are formatted as follows:
  *
- *  |33|22|2222222211111111110000000000|
- *  |10|98|7654321098765432109876543210|
- *  |10|rb| HashCode                   |
+ *  |33|2|2|2222222211111111110000000000|
+ *  |10|9|8|7654321098765432109876543210|
+ *  |10|m|r| HashCode                   |
  *
- * When the lock word is in fowarding address state and its bits are formatted as follows:
+ * When the lock word is in forwarding address state and its bits are formatted as follows:
  *
- *  |33|22|2222222211111111110000000000|
- *  |10|98|7654321098765432109876543210|
- *  |11| ForwardingAddress             |
+ *  |33|2|22222222211111111110000000000|
+ *  |10|9|87654321098765432109876543210|
+ *  |11|0| ForwardingAddress           |
  *
  * The rb bits store the read barrier state.
  */
@@ -64,11 +64,13 @@
   enum SizeShiftsAndMasks {  // private marker to avoid generate-operator-out.py from processing.
     // Number of bits to encode the state, currently just fat or thin/unlocked or hash code.
     kStateSize = 2,
-    kReadBarrierStateSize = 2,
+    kReadBarrierStateSize = 1,
+    kMarkBitStateSize = 1,
     // Number of bits to encode the thin lock owner.
     kThinLockOwnerSize = 16,
     // Remaining bits are the recursive lock count.
-    kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize,
+    kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize -
+        kMarkBitStateSize,
     // Thin lock bits. Owner in lowest bits.
 
     kThinLockOwnerShift = 0,
@@ -81,25 +83,43 @@
     kThinLockCountOne = 1 << kThinLockCountShift,  // == 65536 (0x10000)
 
     // State in the highest bits.
-    kStateShift = kReadBarrierStateSize + kThinLockCountSize + kThinLockCountShift,
+    kStateShift = kReadBarrierStateSize + kThinLockCountSize + kThinLockCountShift +
+        kMarkBitStateSize,
     kStateMask = (1 << kStateSize) - 1,
     kStateMaskShifted = kStateMask << kStateShift,
     kStateThinOrUnlocked = 0,
     kStateFat = 1,
     kStateHash = 2,
     kStateForwardingAddress = 3,
+
+    // Read barrier bit.
     kReadBarrierStateShift = kThinLockCountSize + kThinLockCountShift,
     kReadBarrierStateMask = (1 << kReadBarrierStateSize) - 1,
     kReadBarrierStateMaskShifted = kReadBarrierStateMask << kReadBarrierStateShift,
     kReadBarrierStateMaskShiftedToggled = ~kReadBarrierStateMaskShifted,
 
+    // Mark bit.
+    kMarkBitStateShift = kReadBarrierStateSize + kReadBarrierStateShift,
+    kMarkBitStateMask = (1 << kMarkBitStateSize) - 1,
+    kMarkBitStateMaskShifted = kMarkBitStateMask << kMarkBitStateShift,
+    kMarkBitStateMaskShiftedToggled = ~kMarkBitStateMaskShifted,
+
+    // GC state is mark bit and read barrier state.
+    kGCStateSize = kReadBarrierStateSize + kMarkBitStateSize,
+    kGCStateShift = kReadBarrierStateShift,
+    kGCStateMaskShifted = kReadBarrierStateMaskShifted | kMarkBitStateMaskShifted,
+    kGCStateMaskShiftedToggled = ~kGCStateMaskShifted,
+
     // When the state is kHashCode, the non-state bits hold the hashcode.
     // Note Object.hashCode() has the hash code layout hardcoded.
     kHashShift = 0,
-    kHashSize = 32 - kStateSize - kReadBarrierStateSize,
+    kHashSize = 32 - kStateSize - kReadBarrierStateSize - kMarkBitStateSize,
     kHashMask = (1 << kHashSize) - 1,
     kMaxHash = kHashMask,
 
+    // Forwarding address shift.
+    kForwardingAddressShift = kObjectAlignmentShift,
+
     kMonitorIdShift = kHashShift,
     kMonitorIdSize = kHashSize,
     kMonitorIdMask = kHashMask,
@@ -108,31 +128,31 @@
     kMaxMonitorId = kMaxHash
   };
 
-  static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t rb_state) {
+  static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t gc_state) {
     CHECK_LE(thread_id, static_cast<uint32_t>(kThinLockMaxOwner));
     CHECK_LE(count, static_cast<uint32_t>(kThinLockMaxCount));
-    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
-    return LockWord((thread_id << kThinLockOwnerShift) | (count << kThinLockCountShift) |
-                    (rb_state << kReadBarrierStateShift) |
+    // DCHECK_EQ(gc_bits & kGCStateMaskToggled, 0U);
+    return LockWord((thread_id << kThinLockOwnerShift) |
+                    (count << kThinLockCountShift) |
+                    (gc_state << kGCStateShift) |
                     (kStateThinOrUnlocked << kStateShift));
   }
 
   static LockWord FromForwardingAddress(size_t target) {
     DCHECK_ALIGNED(target, (1 << kStateSize));
-    return LockWord((target >> kStateSize) | (kStateForwardingAddress << kStateShift));
+    return LockWord((target >> kForwardingAddressShift) | (kStateForwardingAddress << kStateShift));
   }
 
-  static LockWord FromHashCode(uint32_t hash_code, uint32_t rb_state) {
+  static LockWord FromHashCode(uint32_t hash_code, uint32_t gc_state) {
     CHECK_LE(hash_code, static_cast<uint32_t>(kMaxHash));
-    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
+    // DCHECK_EQ(gc_bits & kGCStateMaskToggled, 0U);
     return LockWord((hash_code << kHashShift) |
-                    (rb_state << kReadBarrierStateShift) |
+                    (gc_state << kGCStateShift) |
                     (kStateHash << kStateShift));
   }
 
-  static LockWord FromDefault(uint32_t rb_state) {
-    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
-    return LockWord(rb_state << kReadBarrierStateShift);
+  static LockWord FromDefault(uint32_t gc_state) {
+    return LockWord(gc_state << kGCStateShift);
   }
 
   static bool IsDefault(LockWord lw) {
@@ -154,7 +174,7 @@
   LockState GetState() const {
     CheckReadBarrierState();
     if ((!kUseReadBarrier && UNLIKELY(value_ == 0)) ||
-        (kUseReadBarrier && UNLIKELY((value_ & kReadBarrierStateMaskShiftedToggled) == 0))) {
+        (kUseReadBarrier && UNLIKELY((value_ & kGCStateMaskShiftedToggled) == 0))) {
       return kUnlocked;
     } else {
       uint32_t internal_state = (value_ >> kStateShift) & kStateMask;
@@ -176,6 +196,10 @@
     return (value_ >> kReadBarrierStateShift) & kReadBarrierStateMask;
   }
 
+  uint32_t GCState() const {
+    return (value_ & kGCStateMaskShifted) >> kGCStateShift;
+  }
+
   void SetReadBarrierState(uint32_t rb_state) {
     DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
     DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress));
@@ -184,6 +208,19 @@
     value_ |= (rb_state & kReadBarrierStateMask) << kReadBarrierStateShift;
   }
 
+
+  uint32_t MarkBitState() const {
+    return (value_ >> kMarkBitStateShift) & kMarkBitStateMask;
+  }
+
+  void SetMarkBitState(uint32_t mark_bit) {
+    DCHECK_EQ(mark_bit & ~kMarkBitStateMask, 0U);
+    DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress));
+    // Clear and or the bits.
+    value_ &= kMarkBitStateMaskShiftedToggled;
+    value_ |= mark_bit << kMarkBitStateShift;
+  }
+
   // Return the owner thin lock thread id.
   uint32_t ThinLockOwner() const;
 
@@ -197,7 +234,7 @@
   size_t ForwardingAddress() const;
 
   // Constructor a lock word for inflation to use a Monitor.
-  LockWord(Monitor* mon, uint32_t rb_state);
+  LockWord(Monitor* mon, uint32_t gc_state);
 
   // Return the hash code stored in the lock word, must be kHashCode state.
   int32_t GetHashCode() const;
@@ -207,7 +244,7 @@
     if (kIncludeReadBarrierState) {
       return lw1.GetValue() == lw2.GetValue();
     }
-    return lw1.GetValueWithoutReadBarrierState() == lw2.GetValueWithoutReadBarrierState();
+    return lw1.GetValueWithoutGCState() == lw2.GetValueWithoutGCState();
   }
 
   void Dump(std::ostream& os) {
@@ -248,9 +285,9 @@
     return value_;
   }
 
-  uint32_t GetValueWithoutReadBarrierState() const {
+  uint32_t GetValueWithoutGCState() const {
     CheckReadBarrierState();
-    return value_ & ~(kReadBarrierStateMask << kReadBarrierStateShift);
+    return value_ & kGCStateMaskShiftedToggled;
   }
 
   // Only Object should be converting LockWords to/from uints.
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 2d3581d..bb07fcb 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -25,23 +25,16 @@
 #include <sstream>
 
 #include "base/stringprintf.h"
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wshadow"
-#include "ScopedFd.h"
-#pragma GCC diagnostic pop
-
+#include "base/unix_file/fd_file.h"
+#include "os.h"
 #include "thread-inl.h"
 #include "utils.h"
 
-#define USE_ASHMEM 1
-
-#ifdef USE_ASHMEM
 #include <cutils/ashmem.h>
+
 #ifndef ANDROID_OS
 #include <sys/resource.h>
 #endif
-#endif
 
 #ifndef MAP_ANONYMOUS
 #define MAP_ANONYMOUS MAP_ANON
@@ -154,19 +147,25 @@
   }
 
   std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid(), true));
-  if (map.get() == nullptr) {
-    *error_msg = StringPrintf("Failed to build process map");
+  if (map == nullptr) {
+    if (error_msg != nullptr) {
+      *error_msg = StringPrintf("Failed to build process map");
+    }
     return false;
   }
+
+  ScopedBacktraceMapIteratorLock lock(map.get());
   for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) {
     if ((begin >= it->start && begin < it->end)  // start of new within old
         && (end > it->start && end <= it->end)) {  // end of new within old
       return true;
     }
   }
-  PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
-  *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " does not overlap "
-                            "any existing map. See process maps in the log.", begin, end);
+  if (error_msg != nullptr) {
+    PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
+    *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " does not overlap "
+                              "any existing map. See process maps in the log.", begin, end);
+  }
   return false;
 }
 
@@ -179,6 +178,7 @@
     *error_msg = StringPrintf("Failed to build process map");
     return false;
   }
+  ScopedBacktraceMapIteratorLock(map.get());
   for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) {
     if ((begin >= it->start && begin < it->end)      // start of new within old
         || (end > it->start && end < it->end)        // end of new within old
@@ -229,32 +229,36 @@
     PLOG(WARNING) << StringPrintf("munmap(%p, %zd) failed", actual_ptr, byte_count);
   }
 
-  // We call this here so that we can try and generate a full error
-  // message with the overlapping mapping. There's no guarantee that
-  // that there will be an overlap though, since
-  // - The kernel is not *required* to honor expected_ptr unless MAP_FIXED is
-  //   true, even if there is no overlap
-  // - There might have been an overlap at the point of mmap, but the
-  //   overlapping region has since been unmapped.
-  std::string error_detail;
-  CheckNonOverlapping(expected, limit, &error_detail);
-
-  std::ostringstream os;
-  os <<  StringPrintf("Failed to mmap at expected address, mapped at "
-                      "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR,
-                      actual, expected);
-  if (!error_detail.empty()) {
-    os << " : " << error_detail;
+  if (error_msg != nullptr) {
+    // We call this here so that we can try and generate a full error
+    // message with the overlapping mapping. There's no guarantee that
+    // that there will be an overlap though, since
+    // - The kernel is not *required* to honor expected_ptr unless MAP_FIXED is
+    //   true, even if there is no overlap
+    // - There might have been an overlap at the point of mmap, but the
+    //   overlapping region has since been unmapped.
+    std::string error_detail;
+    CheckNonOverlapping(expected, limit, &error_detail);
+    std::ostringstream os;
+    os <<  StringPrintf("Failed to mmap at expected address, mapped at "
+                        "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR,
+                        actual, expected);
+    if (!error_detail.empty()) {
+      os << " : " << error_detail;
+    }
+    *error_msg = os.str();
   }
-
-  *error_msg = os.str();
   return false;
 }
 
 #if USE_ART_LOW_4G_ALLOCATOR
-static inline void* TryMemMapLow4GB(void* ptr, size_t page_aligned_byte_count, int prot, int flags,
-                                    int fd) {
-  void* actual = mmap(ptr, page_aligned_byte_count, prot, flags, fd, 0);
+static inline void* TryMemMapLow4GB(void* ptr,
+                                    size_t page_aligned_byte_count,
+                                    int prot,
+                                    int flags,
+                                    int fd,
+                                    off_t offset) {
+  void* actual = mmap(ptr, page_aligned_byte_count, prot, flags, fd, offset);
   if (actual != MAP_FAILED) {
     // Since we didn't use MAP_FIXED the kernel may have mapped it somewhere not in the low
     // 4GB. If this is the case, unmap and retry.
@@ -267,8 +271,14 @@
 }
 #endif
 
-MemMap* MemMap::MapAnonymous(const char* name, uint8_t* expected_ptr, size_t byte_count, int prot,
-                             bool low_4gb, bool reuse, std::string* error_msg) {
+MemMap* MemMap::MapAnonymous(const char* name,
+                             uint8_t* expected_ptr,
+                             size_t byte_count,
+                             int prot,
+                             bool low_4gb,
+                             bool reuse,
+                             std::string* error_msg,
+                             bool use_ashmem) {
 #ifndef __LP64__
   UNUSED(low_4gb);
 #endif
@@ -287,159 +297,61 @@
     flags |= MAP_FIXED;
   }
 
-  ScopedFd fd(-1);
+  File fd;
 
-#ifdef USE_ASHMEM
-#ifdef __ANDROID__
-  const bool use_ashmem = true;
-#else
-  // When not on Android ashmem is faked using files in /tmp. Ensure that such files won't
-  // fail due to ulimit restrictions. If they will then use a regular mmap.
-  struct rlimit rlimit_fsize;
-  CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlimit_fsize), 0);
-  const bool use_ashmem = (rlimit_fsize.rlim_cur == RLIM_INFINITY) ||
-      (page_aligned_byte_count < rlimit_fsize.rlim_cur);
-#endif
+  if (use_ashmem) {
+    if (!kIsTargetBuild) {
+      // When not on Android (either host or assuming a linux target) ashmem is faked using
+      // files in /tmp. Ensure that such files won't fail due to ulimit restrictions. If they
+      // will then use a regular mmap.
+      struct rlimit rlimit_fsize;
+      CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlimit_fsize), 0);
+      use_ashmem = (rlimit_fsize.rlim_cur == RLIM_INFINITY) ||
+        (page_aligned_byte_count < rlimit_fsize.rlim_cur);
+    }
+  }
+
   if (use_ashmem) {
     // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
     // prefixed "dalvik-".
     std::string debug_friendly_name("dalvik-");
     debug_friendly_name += name;
-    fd.reset(ashmem_create_region(debug_friendly_name.c_str(), page_aligned_byte_count));
-    if (fd.get() == -1) {
+    fd.Reset(ashmem_create_region(debug_friendly_name.c_str(), page_aligned_byte_count),
+             /* check_usage */ false);
+    if (fd.Fd() == -1) {
       *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s", name, strerror(errno));
       return nullptr;
     }
     flags &= ~MAP_ANONYMOUS;
   }
-#endif
 
   // We need to store and potentially set an error number for pretty printing of errors
   int saved_errno = 0;
 
-#ifdef __LP64__
-  // When requesting low_4g memory and having an expectation, the requested range should fit into
-  // 4GB.
-  if (low_4gb && (
-      // Start out of bounds.
-      (reinterpret_cast<uintptr_t>(expected_ptr) >> 32) != 0 ||
-      // End out of bounds. For simplicity, this will fail for the last page of memory.
-      (reinterpret_cast<uintptr_t>(expected_ptr + page_aligned_byte_count) >> 32) != 0)) {
-    *error_msg = StringPrintf("The requested address space (%p, %p) cannot fit in low_4gb",
-                              expected_ptr, expected_ptr + page_aligned_byte_count);
-    return nullptr;
-  }
-#endif
-
-  // TODO:
-  // A page allocator would be a useful abstraction here, as
-  // 1) It is doubtful that MAP_32BIT on x86_64 is doing the right job for us
-  // 2) The linear scheme, even with simple saving of the last known position, is very crude
-#if USE_ART_LOW_4G_ALLOCATOR
-  // MAP_32BIT only available on x86_64.
-  void* actual = MAP_FAILED;
-  if (low_4gb && expected_ptr == nullptr) {
-    bool first_run = true;
-
-    MutexLock mu(Thread::Current(), *Locks::mem_maps_lock_);
-    for (uintptr_t ptr = next_mem_pos_; ptr < 4 * GB; ptr += kPageSize) {
-      // Use maps_ as an optimization to skip over large maps.
-      // Find the first map which is address > ptr.
-      auto it = maps_->upper_bound(reinterpret_cast<void*>(ptr));
-      if (it != maps_->begin()) {
-        auto before_it = it;
-        --before_it;
-        // Start at the end of the map before the upper bound.
-        ptr = std::max(ptr, reinterpret_cast<uintptr_t>(before_it->second->BaseEnd()));
-        CHECK_ALIGNED(ptr, kPageSize);
-      }
-      while (it != maps_->end()) {
-        // How much space do we have until the next map?
-        size_t delta = reinterpret_cast<uintptr_t>(it->first) - ptr;
-        // If the space may be sufficient, break out of the loop.
-        if (delta >= page_aligned_byte_count) {
-          break;
-        }
-        // Otherwise, skip to the end of the map.
-        ptr = reinterpret_cast<uintptr_t>(it->second->BaseEnd());
-        CHECK_ALIGNED(ptr, kPageSize);
-        ++it;
-      }
-
-      // Try to see if we get lucky with this address since none of the ART maps overlap.
-      actual = TryMemMapLow4GB(reinterpret_cast<void*>(ptr), page_aligned_byte_count, prot, flags,
-                               fd.get());
-      if (actual != MAP_FAILED) {
-        next_mem_pos_ = reinterpret_cast<uintptr_t>(actual) + page_aligned_byte_count;
-        break;
-      }
-
-      if (4U * GB - ptr < page_aligned_byte_count) {
-        // Not enough memory until 4GB.
-        if (first_run) {
-          // Try another time from the bottom;
-          ptr = LOW_MEM_START - kPageSize;
-          first_run = false;
-          continue;
-        } else {
-          // Second try failed.
-          break;
-        }
-      }
-
-      uintptr_t tail_ptr;
-
-      // Check pages are free.
-      bool safe = true;
-      for (tail_ptr = ptr; tail_ptr < ptr + page_aligned_byte_count; tail_ptr += kPageSize) {
-        if (msync(reinterpret_cast<void*>(tail_ptr), kPageSize, 0) == 0) {
-          safe = false;
-          break;
-        } else {
-          DCHECK_EQ(errno, ENOMEM);
-        }
-      }
-
-      next_mem_pos_ = tail_ptr;  // update early, as we break out when we found and mapped a region
-
-      if (safe == true) {
-        actual = TryMemMapLow4GB(reinterpret_cast<void*>(ptr), page_aligned_byte_count, prot, flags,
-                                 fd.get());
-        if (actual != MAP_FAILED) {
-            break;
-        }
-      } else {
-        // Skip over last page.
-        ptr = tail_ptr;
-      }
-    }
-
-    if (actual == MAP_FAILED) {
-      LOG(ERROR) << "Could not find contiguous low-memory space.";
-      saved_errno = ENOMEM;
-    }
-  } else {
-    actual = mmap(expected_ptr, page_aligned_byte_count, prot, flags, fd.get(), 0);
-    saved_errno = errno;
-  }
-
-#else
-#if defined(__LP64__)
-  if (low_4gb && expected_ptr == nullptr) {
-    flags |= MAP_32BIT;
-  }
-#endif
-
-  void* actual = mmap(expected_ptr, page_aligned_byte_count, prot, flags, fd.get(), 0);
+  void* actual = MapInternal(expected_ptr,
+                             page_aligned_byte_count,
+                             prot,
+                             flags,
+                             fd.Fd(),
+                             0,
+                             low_4gb);
   saved_errno = errno;
-#endif
 
   if (actual == MAP_FAILED) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+    if (error_msg != nullptr) {
+      if (kIsDebugBuild || VLOG_IS_ON(oat)) {
+        PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      }
 
-    *error_msg = StringPrintf("Failed anonymous mmap(%p, %zd, 0x%x, 0x%x, %d, 0): %s. See process "
-                              "maps in the log.", expected_ptr, page_aligned_byte_count, prot,
-                              flags, fd.get(), strerror(saved_errno));
+      *error_msg = StringPrintf("Failed anonymous mmap(%p, %zd, 0x%x, 0x%x, %d, 0): %s. "
+                                    "See process maps in the log.",
+                                expected_ptr,
+                                page_aligned_byte_count,
+                                prot,
+                                flags,
+                                fd.Fd(),
+                                strerror(saved_errno));
+    }
     return nullptr;
   }
   std::ostringstream check_map_request_error_msg;
@@ -458,8 +370,15 @@
   return new MemMap(name, addr, byte_count, addr, page_aligned_byte_count, 0, true /* reuse */);
 }
 
-MemMap* MemMap::MapFileAtAddress(uint8_t* expected_ptr, size_t byte_count, int prot, int flags,
-                                 int fd, off_t start, bool reuse, const char* filename,
+MemMap* MemMap::MapFileAtAddress(uint8_t* expected_ptr,
+                                 size_t byte_count,
+                                 int prot,
+                                 int flags,
+                                 int fd,
+                                 off_t start,
+                                 bool low_4gb,
+                                 bool reuse,
+                                 const char* filename,
                                  std::string* error_msg) {
   CHECK_NE(0, prot);
   CHECK_NE(0, flags & (MAP_SHARED | MAP_PRIVATE));
@@ -471,7 +390,8 @@
     // Only use this if you actually made the page reservation yourself.
     CHECK(expected_ptr != nullptr);
 
-    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << *error_msg;
+    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg))
+        << ((error_msg != nullptr) ? *error_msg : std::string());
     flags |= MAP_FIXED;
   } else {
     CHECK_EQ(0, flags & MAP_FIXED);
@@ -498,22 +418,27 @@
     page_aligned_byte_count += redzone_size;
   }
 
-  uint8_t* actual = reinterpret_cast<uint8_t*>(mmap(page_aligned_expected,
-                                              page_aligned_byte_count,
-                                              prot,
-                                              flags,
-                                              fd,
-                                              page_aligned_offset));
+  uint8_t* actual = reinterpret_cast<uint8_t*>(MapInternal(page_aligned_expected,
+                                                           page_aligned_byte_count,
+                                                           prot,
+                                                           flags,
+                                                           fd,
+                                                           page_aligned_offset,
+                                                           low_4gb));
   if (actual == MAP_FAILED) {
-    auto saved_errno = errno;
+    if (error_msg != nullptr) {
+      auto saved_errno = errno;
 
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      if (kIsDebugBuild || VLOG_IS_ON(oat)) {
+        PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      }
 
-    *error_msg = StringPrintf("mmap(%p, %zd, 0x%x, 0x%x, %d, %" PRId64
-                              ") of file '%s' failed: %s. See process maps in the log.",
-                              page_aligned_expected, page_aligned_byte_count, prot, flags, fd,
-                              static_cast<int64_t>(page_aligned_offset), filename,
-                              strerror(saved_errno));
+      *error_msg = StringPrintf("mmap(%p, %zd, 0x%x, 0x%x, %d, %" PRId64
+                                ") of file '%s' failed: %s. See process maps in the log.",
+                                page_aligned_expected, page_aligned_byte_count, prot, flags, fd,
+                                static_cast<int64_t>(page_aligned_offset), filename,
+                                strerror(saved_errno));
+    }
     return nullptr;
   }
   std::ostringstream check_map_request_error_msg;
@@ -591,7 +516,7 @@
 }
 
 MemMap* MemMap::RemapAtEnd(uint8_t* new_end, const char* tail_name, int tail_prot,
-                           std::string* error_msg) {
+                           std::string* error_msg, bool use_ashmem) {
   DCHECK_GE(new_end, Begin());
   DCHECK_LE(new_end, End());
   DCHECK_LE(begin_ + size_, reinterpret_cast<uint8_t*>(base_begin_) + base_size_);
@@ -615,23 +540,22 @@
   DCHECK_EQ(tail_base_begin + tail_base_size, old_base_end);
   DCHECK_ALIGNED(tail_base_size, kPageSize);
 
-#ifdef USE_ASHMEM
-  // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
-  // prefixed "dalvik-".
-  std::string debug_friendly_name("dalvik-");
-  debug_friendly_name += tail_name;
-  ScopedFd fd(ashmem_create_region(debug_friendly_name.c_str(), tail_base_size));
-  int flags = MAP_PRIVATE | MAP_FIXED;
-  if (fd.get() == -1) {
-    *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s",
-                              tail_name, strerror(errno));
-    return nullptr;
-  }
-#else
-  ScopedFd fd(-1);
+  int int_fd = -1;
   int flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#endif
-
+  if (use_ashmem) {
+    // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
+    // prefixed "dalvik-".
+    std::string debug_friendly_name("dalvik-");
+    debug_friendly_name += tail_name;
+    int_fd = ashmem_create_region(debug_friendly_name.c_str(), tail_base_size);
+    flags = MAP_PRIVATE | MAP_FIXED;
+    if (int_fd == -1) {
+      *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s",
+                                tail_name, strerror(errno));
+      return nullptr;
+    }
+  }
+  File fd(int_fd, /* check_usage */ false);
 
   MEMORY_TOOL_MAKE_UNDEFINED(tail_base_begin, tail_base_size);
   // Unmap/map the tail region.
@@ -647,12 +571,12 @@
   // region. Note this isn't perfect as there's no way to prevent
   // other threads to try to take this memory region here.
   uint8_t* actual = reinterpret_cast<uint8_t*>(mmap(tail_base_begin, tail_base_size, tail_prot,
-                                              flags, fd.get(), 0));
+                                              flags, fd.Fd(), 0));
   if (actual == MAP_FAILED) {
     PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
     *error_msg = StringPrintf("anonymous mmap(%p, %zd, 0x%x, 0x%x, %d, 0) failed. See process "
                               "maps in the log.", tail_base_begin, tail_base_size, tail_prot, flags,
-                              fd.get());
+                              fd.Fd());
     return nullptr;
   }
   return new MemMap(tail_name, actual, tail_size, actual, tail_base_size, tail_prot, false);
@@ -670,6 +594,22 @@
   }
 }
 
+bool MemMap::Sync() {
+  bool result;
+  if (redzone_size_ != 0) {
+    // To avoid valgrind errors, temporarily lift the lower-end noaccess protection before passing
+    // it to msync() as it only accepts page-aligned base address, and exclude the higher-end
+    // noaccess protection from the msync range. b/27552451.
+    uint8_t* base_begin = reinterpret_cast<uint8_t*>(base_begin_);
+    MEMORY_TOOL_MAKE_DEFINED(base_begin, begin_ - base_begin);
+    result = msync(BaseBegin(), End() - base_begin, MS_SYNC) == 0;
+    MEMORY_TOOL_MAKE_NOACCESS(base_begin, begin_ - base_begin);
+  } else {
+    result = msync(BaseBegin(), BaseSize(), MS_SYNC) == 0;
+  }
+  return result;
+}
+
 bool MemMap::Protect(int prot) {
   if (base_begin_ == nullptr && base_size_ == 0) {
     prot_ = prot;
@@ -827,6 +767,132 @@
   size_ = new_size;
 }
 
+void* MemMap::MapInternal(void* addr,
+                          size_t length,
+                          int prot,
+                          int flags,
+                          int fd,
+                          off_t offset,
+                          bool low_4gb) {
+#ifdef __LP64__
+  // When requesting low_4g memory and having an expectation, the requested range should fit into
+  // 4GB.
+  if (low_4gb && (
+      // Start out of bounds.
+      (reinterpret_cast<uintptr_t>(addr) >> 32) != 0 ||
+      // End out of bounds. For simplicity, this will fail for the last page of memory.
+      ((reinterpret_cast<uintptr_t>(addr) + length) >> 32) != 0)) {
+    LOG(ERROR) << "The requested address space (" << addr << ", "
+               << reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(addr) + length)
+               << ") cannot fit in low_4gb";
+    return MAP_FAILED;
+  }
+#else
+  UNUSED(low_4gb);
+#endif
+  DCHECK_ALIGNED(length, kPageSize);
+  if (low_4gb) {
+    DCHECK_EQ(flags & MAP_FIXED, 0);
+  }
+  // TODO:
+  // A page allocator would be a useful abstraction here, as
+  // 1) It is doubtful that MAP_32BIT on x86_64 is doing the right job for us
+  void* actual = MAP_FAILED;
+#if USE_ART_LOW_4G_ALLOCATOR
+  // MAP_32BIT only available on x86_64.
+  if (low_4gb && addr == nullptr) {
+    bool first_run = true;
+
+    MutexLock mu(Thread::Current(), *Locks::mem_maps_lock_);
+    for (uintptr_t ptr = next_mem_pos_; ptr < 4 * GB; ptr += kPageSize) {
+      // Use maps_ as an optimization to skip over large maps.
+      // Find the first map which is address > ptr.
+      auto it = maps_->upper_bound(reinterpret_cast<void*>(ptr));
+      if (it != maps_->begin()) {
+        auto before_it = it;
+        --before_it;
+        // Start at the end of the map before the upper bound.
+        ptr = std::max(ptr, reinterpret_cast<uintptr_t>(before_it->second->BaseEnd()));
+        CHECK_ALIGNED(ptr, kPageSize);
+      }
+      while (it != maps_->end()) {
+        // How much space do we have until the next map?
+        size_t delta = reinterpret_cast<uintptr_t>(it->first) - ptr;
+        // If the space may be sufficient, break out of the loop.
+        if (delta >= length) {
+          break;
+        }
+        // Otherwise, skip to the end of the map.
+        ptr = reinterpret_cast<uintptr_t>(it->second->BaseEnd());
+        CHECK_ALIGNED(ptr, kPageSize);
+        ++it;
+      }
+
+      // Try to see if we get lucky with this address since none of the ART maps overlap.
+      actual = TryMemMapLow4GB(reinterpret_cast<void*>(ptr), length, prot, flags, fd, offset);
+      if (actual != MAP_FAILED) {
+        next_mem_pos_ = reinterpret_cast<uintptr_t>(actual) + length;
+        return actual;
+      }
+
+      if (4U * GB - ptr < length) {
+        // Not enough memory until 4GB.
+        if (first_run) {
+          // Try another time from the bottom;
+          ptr = LOW_MEM_START - kPageSize;
+          first_run = false;
+          continue;
+        } else {
+          // Second try failed.
+          break;
+        }
+      }
+
+      uintptr_t tail_ptr;
+
+      // Check pages are free.
+      bool safe = true;
+      for (tail_ptr = ptr; tail_ptr < ptr + length; tail_ptr += kPageSize) {
+        if (msync(reinterpret_cast<void*>(tail_ptr), kPageSize, 0) == 0) {
+          safe = false;
+          break;
+        } else {
+          DCHECK_EQ(errno, ENOMEM);
+        }
+      }
+
+      next_mem_pos_ = tail_ptr;  // update early, as we break out when we found and mapped a region
+
+      if (safe == true) {
+        actual = TryMemMapLow4GB(reinterpret_cast<void*>(ptr), length, prot, flags, fd, offset);
+        if (actual != MAP_FAILED) {
+          return actual;
+        }
+      } else {
+        // Skip over last page.
+        ptr = tail_ptr;
+      }
+    }
+
+    if (actual == MAP_FAILED) {
+      LOG(ERROR) << "Could not find contiguous low-memory space.";
+      errno = ENOMEM;
+    }
+  } else {
+    actual = mmap(addr, length, prot, flags, fd, offset);
+  }
+
+#else
+#if defined(__LP64__)
+  if (low_4gb && addr == nullptr) {
+    flags |= MAP_32BIT;
+  }
+#endif
+  actual = mmap(addr, length, prot, flags, fd, offset);
+#endif
+  return actual;
+}
+
 std::ostream& operator<<(std::ostream& os, const MemMap& mem_map) {
   os << StringPrintf("[MemMap: %p-%p prot=0x%x %s]",
                      mem_map.BaseBegin(), mem_map.BaseEnd(), mem_map.GetProtect(),
@@ -834,4 +900,22 @@
   return os;
 }
 
+void MemMap::TryReadable() {
+  if (base_begin_ == nullptr && base_size_ == 0) {
+    return;
+  }
+  CHECK_NE(prot_ & PROT_READ, 0);
+  volatile uint8_t* begin = reinterpret_cast<volatile uint8_t*>(base_begin_);
+  volatile uint8_t* end = begin + base_size_;
+  DCHECK(IsAligned<kPageSize>(begin));
+  DCHECK(IsAligned<kPageSize>(end));
+  // Read the first byte of each page. Use volatile to prevent the compiler from optimizing away the
+  // reads.
+  for (volatile uint8_t* ptr = begin; ptr < end; ptr += kPageSize) {
+    // This read could fault if protection wasn't set correctly.
+    uint8_t value = *ptr;
+    UNUSED(value);
+  }
+}
+
 }  // namespace art
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 7c11ceb..597f0d4 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -57,12 +57,18 @@
   // "reuse" allows re-mapping an address range from an existing mapping.
   //
   // The word "anonymous" in this context means "not backed by a file". The supplied
-  // 'ashmem_name' will be used -- on systems that support it -- to give the mapping
+  // 'name' will be used -- on systems that support it -- to give the mapping
   // a name.
   //
   // On success, returns returns a MemMap instance.  On failure, returns null.
-  static MemMap* MapAnonymous(const char* ashmem_name, uint8_t* addr, size_t byte_count, int prot,
-                              bool low_4gb, bool reuse, std::string* error_msg);
+  static MemMap* MapAnonymous(const char* name,
+                              uint8_t* addr,
+                              size_t byte_count,
+                              int prot,
+                              bool low_4gb,
+                              bool reuse,
+                              std::string* error_msg,
+                              bool use_ashmem = !kIsTargetLinux);
 
   // Create placeholder for a region allocated by direct call to mmap.
   // This is useful when we do not have control over the code calling mmap,
@@ -74,21 +80,43 @@
   // "start" offset is absolute, not relative.
   //
   // On success, returns returns a MemMap instance.  On failure, returns null.
-  static MemMap* MapFile(size_t byte_count, int prot, int flags, int fd, off_t start,
-                         const char* filename, std::string* error_msg) {
-    return MapFileAtAddress(
-        nullptr, byte_count, prot, flags, fd, start, false, filename, error_msg);
+  static MemMap* MapFile(size_t byte_count,
+                         int prot,
+                         int flags,
+                         int fd,
+                         off_t start,
+                         bool low_4gb,
+                         const char* filename,
+                         std::string* error_msg) {
+    return MapFileAtAddress(nullptr,
+                            byte_count,
+                            prot,
+                            flags,
+                            fd,
+                            start,
+                            /*low_4gb*/low_4gb,
+                            /*reuse*/false,
+                            filename,
+                            error_msg);
   }
 
-  // Map part of a file, taking care of non-page aligned offsets.  The
-  // "start" offset is absolute, not relative. This version allows
-  // requesting a specific address for the base of the
-  // mapping. "reuse" allows us to create a view into an existing
-  // mapping where we do not take ownership of the memory.
+  // Map part of a file, taking care of non-page aligned offsets.  The "start" offset is absolute,
+  // not relative. This version allows requesting a specific address for the base of the mapping.
+  // "reuse" allows us to create a view into an existing mapping where we do not take ownership of
+  // the memory. If error_msg is null then we do not print /proc/maps to the log if
+  // MapFileAtAddress fails. This helps improve performance of the fail case since reading and
+  // printing /proc/maps takes several milliseconds in the worst case.
   //
   // On success, returns returns a MemMap instance.  On failure, returns null.
-  static MemMap* MapFileAtAddress(uint8_t* addr, size_t byte_count, int prot, int flags, int fd,
-                                  off_t start, bool reuse, const char* filename,
+  static MemMap* MapFileAtAddress(uint8_t* addr,
+                                  size_t byte_count,
+                                  int prot,
+                                  int flags,
+                                  int fd,
+                                  off_t start,
+                                  bool low_4gb,
+                                  bool reuse,
+                                  const char* filename,
                                   std::string* error_msg);
 
   // Releases the memory mapping.
@@ -98,6 +126,8 @@
     return name_;
   }
 
+  bool Sync();
+
   bool Protect(int prot);
 
   void MadviseDontNeedAndZero();
@@ -138,8 +168,11 @@
   }
 
   // Unmap the pages at end and remap them to create another memory map.
-  MemMap* RemapAtEnd(uint8_t* new_end, const char* tail_name, int tail_prot,
-                     std::string* error_msg);
+  MemMap* RemapAtEnd(uint8_t* new_end,
+                     const char* tail_name,
+                     int tail_prot,
+                     std::string* error_msg,
+                     bool use_ashmem = !kIsTargetLinux);
 
   static bool CheckNoGaps(MemMap* begin_map, MemMap* end_map)
       REQUIRES(!Locks::mem_maps_lock_);
@@ -151,9 +184,20 @@
   static void Init() REQUIRES(!Locks::mem_maps_lock_);
   static void Shutdown() REQUIRES(!Locks::mem_maps_lock_);
 
+  // If the map is PROT_READ, try to read each page of the map to check it is in fact readable (not
+  // faulting). This is used to diagnose a bug b/19894268 where mprotect doesn't seem to be working
+  // intermittently.
+  void TryReadable();
+
  private:
-  MemMap(const std::string& name, uint8_t* begin, size_t size, void* base_begin, size_t base_size,
-         int prot, bool reuse, size_t redzone_size = 0) REQUIRES(!Locks::mem_maps_lock_);
+  MemMap(const std::string& name,
+         uint8_t* begin,
+         size_t size,
+         void* base_begin,
+         size_t base_size,
+         int prot,
+         bool reuse,
+         size_t redzone_size = 0) REQUIRES(!Locks::mem_maps_lock_);
 
   static void DumpMapsLocked(std::ostream& os, bool terse)
       REQUIRES(Locks::mem_maps_lock_);
@@ -164,6 +208,15 @@
   static bool ContainedWithinExistingMap(uint8_t* ptr, size_t size, std::string* error_msg)
       REQUIRES(!Locks::mem_maps_lock_);
 
+  // Internal version of mmap that supports low 4gb emulation.
+  static void* MapInternal(void* addr,
+                           size_t length,
+                           int prot,
+                           int flags,
+                           int fd,
+                           off_t offset,
+                           bool low_4gb);
+
   const std::string name_;
   uint8_t* const begin_;  // Start of data.
   size_t size_;  // Length of data.
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index 13bf5b7..e703b78 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -18,21 +18,36 @@
 
 #include <memory>
 
+#include "common_runtime_test.h"
 #include "base/memory_tool.h"
-
-#include "gtest/gtest.h"
+#include "base/unix_file/fd_file.h"
 
 namespace art {
 
-class MemMapTest : public testing::Test {
+class MemMapTest : public CommonRuntimeTest {
  public:
   static uint8_t* BaseBegin(MemMap* mem_map) {
     return reinterpret_cast<uint8_t*>(mem_map->base_begin_);
   }
+
   static size_t BaseSize(MemMap* mem_map) {
     return mem_map->base_size_;
   }
 
+  static uint8_t* GetValidMapAddress(size_t size, bool low_4gb) {
+    // Find a valid map address and unmap it before returning.
+    std::string error_msg;
+    std::unique_ptr<MemMap> map(MemMap::MapAnonymous("temp",
+                                                     nullptr,
+                                                     size,
+                                                     PROT_READ,
+                                                     low_4gb,
+                                                     false,
+                                                     &error_msg));
+    CHECK(map != nullptr);
+    return map->Begin();
+  }
+
   static void RemapAtEndTest(bool low_4gb) {
     std::string error_msg;
     // Cast the page size to size_t.
@@ -149,6 +164,19 @@
   ASSERT_TRUE(error_msg.empty());
 }
 
+TEST_F(MemMapTest, MapAnonymousFailNullError) {
+  CommonInit();
+  // Test that we don't crash with a null error_str when mapping at an invalid location.
+  std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousInvalid",
+                                                    reinterpret_cast<uint8_t*>(kPageSize),
+                                                    0x20000,
+                                                    PROT_READ | PROT_WRITE,
+                                                    false,
+                                                    false,
+                                                    nullptr));
+  ASSERT_EQ(nullptr, map.get());
+}
+
 #ifdef __LP64__
 TEST_F(MemMapTest, MapAnonymousEmpty32bit) {
   CommonInit();
@@ -164,14 +192,36 @@
   ASSERT_TRUE(error_msg.empty());
   ASSERT_LT(reinterpret_cast<uintptr_t>(BaseBegin(map.get())), 1ULL << 32);
 }
+TEST_F(MemMapTest, MapFile32Bit) {
+  CommonInit();
+  std::string error_msg;
+  ScratchFile scratch_file;
+  constexpr size_t kMapSize = kPageSize;
+  std::unique_ptr<uint8_t[]> data(new uint8_t[kMapSize]());
+  ASSERT_TRUE(scratch_file.GetFile()->WriteFully(&data[0], kMapSize));
+  std::unique_ptr<MemMap> map(MemMap::MapFile(/*byte_count*/kMapSize,
+                                              PROT_READ,
+                                              MAP_PRIVATE,
+                                              scratch_file.GetFd(),
+                                              /*start*/0,
+                                              /*low_4gb*/true,
+                                              scratch_file.GetFilename().c_str(),
+                                              &error_msg));
+  ASSERT_TRUE(map != nullptr) << error_msg;
+  ASSERT_TRUE(error_msg.empty());
+  ASSERT_EQ(map->Size(), kMapSize);
+  ASSERT_LT(reinterpret_cast<uintptr_t>(BaseBegin(map.get())), 1ULL << 32);
+}
 #endif
 
 TEST_F(MemMapTest, MapAnonymousExactAddr) {
   CommonInit();
   std::string error_msg;
+  // Find a valid address.
+  uint8_t* valid_address = GetValidMapAddress(kPageSize, /*low_4gb*/false);
   // Map at an address that should work, which should succeed.
   std::unique_ptr<MemMap> map0(MemMap::MapAnonymous("MapAnonymous0",
-                                                    reinterpret_cast<uint8_t*>(ART_BASE_ADDRESS),
+                                                    valid_address,
                                                     kPageSize,
                                                     PROT_READ | PROT_WRITE,
                                                     false,
@@ -179,7 +229,7 @@
                                                     &error_msg));
   ASSERT_TRUE(map0.get() != nullptr) << error_msg;
   ASSERT_TRUE(error_msg.empty());
-  ASSERT_TRUE(map0->BaseBegin() == reinterpret_cast<void*>(ART_BASE_ADDRESS));
+  ASSERT_TRUE(map0->BaseBegin() == valid_address);
   // Map at an unspecified address, which should succeed.
   std::unique_ptr<MemMap> map1(MemMap::MapAnonymous("MapAnonymous1",
                                                     nullptr,
@@ -214,21 +264,34 @@
 #endif
 
 TEST_F(MemMapTest, MapAnonymousExactAddr32bitHighAddr) {
+  // Some MIPS32 hardware (namely the Creator Ci20 development board)
+  // cannot allocate in the 2GB-4GB region.
+  TEST_DISABLED_FOR_MIPS();
+
   CommonInit();
   // This test may not work under valgrind.
   if (RUNNING_ON_MEMORY_TOOL == 0) {
-    uintptr_t start_addr = ART_BASE_ADDRESS + 0x1000000;
+    constexpr size_t size = 0x100000;
+    // Try all addresses starting from 2GB to 4GB.
+    size_t start_addr = 2 * GB;
     std::string error_msg;
-    std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousExactAddr32bitHighAddr",
-                                                     reinterpret_cast<uint8_t*>(start_addr),
-                                                     0x21000000,
-                                                     PROT_READ | PROT_WRITE,
-                                                     true,
-                                                     false,
-                                                     &error_msg));
+    std::unique_ptr<MemMap> map;
+    for (; start_addr <= std::numeric_limits<uint32_t>::max() - size; start_addr += size) {
+      map.reset(MemMap::MapAnonymous("MapAnonymousExactAddr32bitHighAddr",
+                                     reinterpret_cast<uint8_t*>(start_addr),
+                                     size,
+                                     PROT_READ | PROT_WRITE,
+                                     /*low_4gb*/true,
+                                     false,
+                                     &error_msg));
+      if (map != nullptr) {
+        break;
+      }
+    }
     ASSERT_TRUE(map.get() != nullptr) << error_msg;
+    ASSERT_GE(reinterpret_cast<uintptr_t>(map->End()), 2u * GB);
     ASSERT_TRUE(error_msg.empty());
-    ASSERT_EQ(reinterpret_cast<uintptr_t>(BaseBegin(map.get())), start_addr);
+    ASSERT_EQ(BaseBegin(map.get()), reinterpret_cast<void*>(start_addr));
   }
 }
 
diff --git a/runtime/memory_region.h b/runtime/memory_region.h
index 13c69ac..f018c1f 100644
--- a/runtime/memory_region.h
+++ b/runtime/memory_region.h
@@ -138,7 +138,7 @@
   // bit of the stored `value`.  `value` must not be larger than `length`
   // bits.
   void StoreBits(uintptr_t bit_offset, uint32_t value, size_t length) {
-    CHECK_LT(value, 2u << length);
+    CHECK_LE(value, MaxInt<uint32_t>(length));
     for (size_t i = 0; i < length; ++i) {
       bool ith_bit = value & (1 << i);
       StoreBit(bit_offset + i, ith_bit);
@@ -178,9 +178,9 @@
   }
 
   // Is `address` aligned on a machine word?
-  template<typename T> static bool IsWordAligned(const T* address) {
+  template<typename T> static constexpr bool IsWordAligned(const T* address) {
     // Word alignment in bytes.
-    size_t kWordAlignment = GetInstructionSetPointerSize(kRuntimeISA);
+    size_t kWordAlignment = static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA));
     return IsAlignedParam(address, kWordAlignment);
   }
 
diff --git a/runtime/mirror/abstract_method.cc b/runtime/mirror/abstract_method.cc
index 91a9870..b4dce58 100644
--- a/runtime/mirror/abstract_method.cc
+++ b/runtime/mirror/abstract_method.cc
@@ -21,25 +21,39 @@
 namespace art {
 namespace mirror {
 
+template <PointerSize kPointerSize, bool kTransactionActive>
 bool AbstractMethod::CreateFromArtMethod(ArtMethod* method) {
-  auto* interface_method = method->GetInterfaceMethodIfProxy(sizeof(void*));
-  SetArtMethod(method);
-  SetFieldObject<false>(DeclaringClassOffset(), method->GetDeclaringClass());
-  SetFieldObject<false>(
+  auto* interface_method = method->GetInterfaceMethodIfProxy(kPointerSize);
+  SetArtMethod<kTransactionActive>(method);
+  SetFieldObject<kTransactionActive>(DeclaringClassOffset(), method->GetDeclaringClass());
+  SetFieldObject<kTransactionActive>(
       DeclaringClassOfOverriddenMethodOffset(), interface_method->GetDeclaringClass());
-  SetField32<false>(AccessFlagsOffset(), method->GetAccessFlags());
-  SetField32<false>(DexMethodIndexOffset(), method->GetDexMethodIndex());
+  SetField32<kTransactionActive>(AccessFlagsOffset(), method->GetAccessFlags());
+  SetField32<kTransactionActive>(DexMethodIndexOffset(), method->GetDexMethodIndex());
   return true;
 }
 
+template bool AbstractMethod::CreateFromArtMethod<PointerSize::k32, false>(
+    ArtMethod* method);
+template bool AbstractMethod::CreateFromArtMethod<PointerSize::k32, true>(
+    ArtMethod* method);
+template bool AbstractMethod::CreateFromArtMethod<PointerSize::k64, false>(
+    ArtMethod* method);
+template bool AbstractMethod::CreateFromArtMethod<PointerSize::k64, true>(
+    ArtMethod* method);
+
 ArtMethod* AbstractMethod::GetArtMethod() {
   return reinterpret_cast<ArtMethod*>(GetField64(ArtMethodOffset()));
 }
 
+template <bool kTransactionActive>
 void AbstractMethod::SetArtMethod(ArtMethod* method) {
-  SetField64<false>(ArtMethodOffset(), reinterpret_cast<uint64_t>(method));
+  SetField64<kTransactionActive>(ArtMethodOffset(), reinterpret_cast<uint64_t>(method));
 }
 
+template void AbstractMethod::SetArtMethod<false>(ArtMethod* method);
+template void AbstractMethod::SetArtMethod<true>(ArtMethod* method);
+
 mirror::Class* AbstractMethod::GetDeclaringClass() {
   return GetFieldObject<mirror::Class>(DeclaringClassOffset());
 }
diff --git a/runtime/mirror/abstract_method.h b/runtime/mirror/abstract_method.h
index dc084be..cfbe492 100644
--- a/runtime/mirror/abstract_method.h
+++ b/runtime/mirror/abstract_method.h
@@ -34,11 +34,13 @@
 class MANAGED AbstractMethod : public AccessibleObject {
  public:
   // Called from Constructor::CreateFromArtMethod, Method::CreateFromArtMethod.
+  template <PointerSize kPointerSize, bool kTransactionActive>
   bool CreateFromArtMethod(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
   ArtMethod* GetArtMethod() SHARED_REQUIRES(Locks::mutator_lock_);
   // Only used by the image writer.
+  template <bool kTransactionActive = false>
   void SetArtMethod(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::Class* GetDeclaringClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index ec7d758..014e54b 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -30,7 +30,7 @@
 namespace art {
 namespace mirror {
 
-inline uint32_t Array::ClassSize(size_t pointer_size) {
+inline uint32_t Array::ClassSize(PointerSize pointer_size) {
   uint32_t vtable_entries = Object::kVTableLength;
   return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
 }
@@ -370,27 +370,47 @@
   }
 }
 
-template<typename T>
-inline T PointerArray::GetElementPtrSize(uint32_t idx, size_t ptr_size) {
+template<typename T, VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+inline T PointerArray::GetElementPtrSize(uint32_t idx, PointerSize ptr_size) {
   // C style casts here since we sometimes have T be a pointer, or sometimes an integer
   // (for stack traces).
-  if (ptr_size == 8) {
-    return (T)static_cast<uintptr_t>(AsLongArray()->GetWithoutChecks(idx));
+  if (ptr_size == PointerSize::k64) {
+    return (T)static_cast<uintptr_t>(
+        AsLongArray<kVerifyFlags, kReadBarrierOption>()->GetWithoutChecks(idx));
   }
-  DCHECK_EQ(ptr_size, 4u);
-  return (T)static_cast<uintptr_t>(AsIntArray()->GetWithoutChecks(idx));
+  return (T)static_cast<uintptr_t>(
+      AsIntArray<kVerifyFlags, kReadBarrierOption>()->GetWithoutChecks(idx));
+}
+
+template<bool kTransactionActive, bool kUnchecked>
+inline void PointerArray::SetElementPtrSize(uint32_t idx, uint64_t element, PointerSize ptr_size) {
+  if (ptr_size == PointerSize::k64) {
+    (kUnchecked ? down_cast<LongArray*>(static_cast<Object*>(this)) : AsLongArray())->
+        SetWithoutChecks<kTransactionActive>(idx, element);
+  } else {
+    DCHECK_LE(element, static_cast<uint64_t>(0xFFFFFFFFu));
+    (kUnchecked ? down_cast<IntArray*>(static_cast<Object*>(this)) : AsIntArray())
+        ->SetWithoutChecks<kTransactionActive>(idx, static_cast<uint32_t>(element));
+  }
 }
 
 template<bool kTransactionActive, bool kUnchecked, typename T>
-inline void PointerArray::SetElementPtrSize(uint32_t idx, T element, size_t ptr_size) {
-  if (ptr_size == 8) {
-    (kUnchecked ? down_cast<LongArray*>(static_cast<Object*>(this)) : AsLongArray())->
-        SetWithoutChecks<kTransactionActive>(idx, (uint64_t)(element));
-  } else {
-    DCHECK_EQ(ptr_size, 4u);
-    DCHECK_LE((uintptr_t)element, 0xFFFFFFFFu);
-    (kUnchecked ? down_cast<IntArray*>(static_cast<Object*>(this)) : AsIntArray())
-        ->SetWithoutChecks<kTransactionActive>(idx, static_cast<uint32_t>((uintptr_t)element));
+inline void PointerArray::SetElementPtrSize(uint32_t idx, T* element, PointerSize ptr_size) {
+  SetElementPtrSize<kTransactionActive, kUnchecked>(idx,
+                                                    reinterpret_cast<uintptr_t>(element),
+                                                    ptr_size);
+}
+
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, typename Visitor>
+inline void PointerArray::Fixup(mirror::PointerArray* dest,
+                                PointerSize pointer_size,
+                                const Visitor& visitor) {
+  for (size_t i = 0, count = GetLength(); i < count; ++i) {
+    void* ptr = GetElementPtrSize<void*, kVerifyFlags, kReadBarrierOption>(i, pointer_size);
+    void* new_ptr = visitor(ptr);
+    if (ptr != new_ptr) {
+      dest->SetElementPtrSize<false, true>(i, new_ptr, pointer_size);
+    }
   }
 }
 
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index b27a884..ec10a43 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_MIRROR_ARRAY_H_
 #define ART_RUNTIME_MIRROR_ARRAY_H_
 
+#include "base/enums.h"
 #include "gc_root.h"
 #include "gc/allocator_type.h"
 #include "object.h"
@@ -31,7 +32,7 @@
 class MANAGED Array : public Object {
  public:
   // The size of a java.lang.Class representing an array.
-  static uint32_t ClassSize(size_t pointer_size);
+  static uint32_t ClassSize(PointerSize pointer_size);
 
   // Allocates an array with the given properties, if kFillUsable is true the array will be of at
   // least component_count size, however, if there's usable space at the end of the allocation the
@@ -122,7 +123,7 @@
   T Get(int32_t i) ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_);
 
   T GetWithoutChecks(int32_t i) ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(CheckIsValidIndex(i));
+    DCHECK(CheckIsValidIndex(i)) << "i=" << i << " length=" << GetLength();
     return GetData()[i];
   }
 
@@ -162,9 +163,10 @@
     array_class_ = GcRoot<Class>(array_class);
   }
 
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   static Class* GetArrayClass() SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(!array_class_.IsNull());
-    return array_class_.Read();
+    return array_class_.Read<kReadBarrierOption>();
   }
 
   static void ResetArrayClass() {
@@ -183,12 +185,25 @@
 // Either an IntArray or a LongArray.
 class PointerArray : public Array {
  public:
-  template<typename T>
-  T GetElementPtrSize(uint32_t idx, size_t ptr_size)
+  template<typename T,
+           VerifyObjectFlags kVerifyFlags = kVerifyNone,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  T GetElementPtrSize(uint32_t idx, PointerSize ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template<bool kTransactionActive = false, bool kUnchecked = false>
+  void SetElementPtrSize(uint32_t idx, uint64_t element, PointerSize ptr_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   template<bool kTransactionActive = false, bool kUnchecked = false, typename T>
-  void SetElementPtrSize(uint32_t idx, T element, size_t ptr_size)
+  void SetElementPtrSize(uint32_t idx, T* element, PointerSize ptr_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Fixup the pointers in the dest arrays by passing our pointers through the visitor. Only copies
+  // to dest if visitor(source_ptr) != source_ptr.
+  template <VerifyObjectFlags kVerifyFlags = kVerifyNone,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+            typename Visitor>
+  void Fixup(mirror::PointerArray* dest, PointerSize pointer_size, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 };
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index a528c3b..0f2aac2 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -22,13 +22,13 @@
 #include "art_field-inl.h"
 #include "art_method.h"
 #include "art_method-inl.h"
+#include "base/array_slice.h"
+#include "base/length_prefixed_array.h"
 #include "class_loader.h"
 #include "common_throws.h"
-#include "dex_cache.h"
 #include "dex_file.h"
 #include "gc/heap-inl.h"
 #include "iftable.h"
-#include "length_prefixed_array.h"
 #include "object_array-inl.h"
 #include "read_barrier-inl.h"
 #include "reference-inl.h"
@@ -46,11 +46,15 @@
   return GetField32(ObjectSizeOffset());
 }
 
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline Class* Class::GetSuperClass() {
   // Can only get super class for loaded classes (hack for when runtime is
   // initializing)
-  DCHECK(IsLoaded() || IsErroneous() || !Runtime::Current()->IsStarted()) << IsLoaded();
-  return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, super_class_));
+  DCHECK(IsLoaded<kVerifyFlags>() ||
+         IsErroneous<kVerifyFlags>() ||
+         !Runtime::Current()->IsStarted()) << IsLoaded();
+  return GetFieldObject<Class, kVerifyFlags, kReadBarrierOption>(
+      OFFSET_OF_OBJECT_MEMBER(Class, super_class_));
 }
 
 inline ClassLoader* Class::GetClassLoader() {
@@ -62,89 +66,171 @@
   return GetFieldObject<DexCache, kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_));
 }
 
-inline LengthPrefixedArray<ArtMethod>* Class::GetDirectMethodsPtr() {
+inline uint32_t Class::GetCopiedMethodsStartOffset() {
+  return GetFieldShort(OFFSET_OF_OBJECT_MEMBER(Class, copied_methods_offset_));
+}
+
+inline uint32_t Class::GetDirectMethodsStartOffset() {
+  return 0;
+}
+
+inline uint32_t Class::GetVirtualMethodsStartOffset() {
+  return GetFieldShort(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_offset_));
+}
+
+template<VerifyObjectFlags kVerifyFlags>
+inline ArraySlice<ArtMethod> Class::GetDirectMethodsSlice(PointerSize pointer_size) {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetDirectMethodsPtrUnchecked();
+  return GetDirectMethodsSliceUnchecked(pointer_size);
 }
 
-inline LengthPrefixedArray<ArtMethod>* Class::GetDirectMethodsPtrUnchecked() {
-  return reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
-      GetField64(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_)));
-}
-
-inline LengthPrefixedArray<ArtMethod>* Class::GetVirtualMethodsPtrUnchecked() {
-  return reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
-      GetField64(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_)));
-}
-
-inline void Class::SetDirectMethodsPtr(LengthPrefixedArray<ArtMethod>* new_direct_methods) {
-  DCHECK(GetDirectMethodsPtrUnchecked() == nullptr);
-  SetDirectMethodsPtrUnchecked(new_direct_methods);
-}
-
-inline void Class::SetDirectMethodsPtrUnchecked(
-    LengthPrefixedArray<ArtMethod>* new_direct_methods) {
-  SetField64<false>(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_),
-                    reinterpret_cast<uint64_t>(new_direct_methods));
-}
-
-inline ArtMethod* Class::GetDirectMethodUnchecked(size_t i, size_t pointer_size) {
-  CheckPointerSize(pointer_size);
-  auto* methods = GetDirectMethodsPtrUnchecked();
-  DCHECK(methods != nullptr);
-  return &methods->At(i,
-                      ArtMethod::Size(pointer_size),
-                      ArtMethod::Alignment(pointer_size));
-}
-
-inline ArtMethod* Class::GetDirectMethod(size_t i, size_t pointer_size) {
-  CheckPointerSize(pointer_size);
-  auto* methods = GetDirectMethodsPtr();
-  DCHECK(methods != nullptr);
-  return &methods->At(i,
-                      ArtMethod::Size(pointer_size),
-                      ArtMethod::Alignment(pointer_size));
+inline ArraySlice<ArtMethod> Class::GetDirectMethodsSliceUnchecked(PointerSize pointer_size) {
+  return ArraySlice<ArtMethod>(GetMethodsPtr(),
+                               GetDirectMethodsStartOffset(),
+                               GetVirtualMethodsStartOffset(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
 }
 
 template<VerifyObjectFlags kVerifyFlags>
-inline LengthPrefixedArray<ArtMethod>* Class::GetVirtualMethodsPtr() {
-  DCHECK(IsLoaded<kVerifyFlags>() || IsErroneous<kVerifyFlags>());
-  return GetVirtualMethodsPtrUnchecked();
+inline ArraySlice<ArtMethod> Class::GetDeclaredMethodsSlice(PointerSize pointer_size) {
+  DCHECK(IsLoaded() || IsErroneous());
+  return GetDeclaredMethodsSliceUnchecked(pointer_size);
 }
 
-inline void Class::SetVirtualMethodsPtr(LengthPrefixedArray<ArtMethod>* new_virtual_methods) {
-  // TODO: we reassign virtual methods to grow the table for miranda
-  // methods.. they should really just be assigned once.
-  SetField64<false>(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_),
-                    reinterpret_cast<uint64_t>(new_virtual_methods));
+inline ArraySlice<ArtMethod> Class::GetDeclaredMethodsSliceUnchecked(PointerSize pointer_size) {
+  return ArraySlice<ArtMethod>(GetMethodsPtr(),
+                               GetDirectMethodsStartOffset(),
+                               GetCopiedMethodsStartOffset(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
+}
+template<VerifyObjectFlags kVerifyFlags>
+inline ArraySlice<ArtMethod> Class::GetDeclaredVirtualMethodsSlice(PointerSize pointer_size) {
+  DCHECK(IsLoaded() || IsErroneous());
+  return GetDeclaredVirtualMethodsSliceUnchecked(pointer_size);
+}
+
+inline ArraySlice<ArtMethod> Class::GetDeclaredVirtualMethodsSliceUnchecked(
+    PointerSize pointer_size) {
+  return ArraySlice<ArtMethod>(GetMethodsPtr(),
+                               GetVirtualMethodsStartOffset(),
+                               GetCopiedMethodsStartOffset(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
 }
 
 template<VerifyObjectFlags kVerifyFlags>
-inline ArtMethod* Class::GetVirtualMethod(size_t i, size_t pointer_size) {
+inline ArraySlice<ArtMethod> Class::GetVirtualMethodsSlice(PointerSize pointer_size) {
+  DCHECK(IsLoaded() || IsErroneous());
+  return GetVirtualMethodsSliceUnchecked(pointer_size);
+}
+
+inline ArraySlice<ArtMethod> Class::GetVirtualMethodsSliceUnchecked(PointerSize pointer_size) {
+  LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
+  return ArraySlice<ArtMethod>(methods,
+                               GetVirtualMethodsStartOffset(),
+                               NumMethods(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
+}
+
+template<VerifyObjectFlags kVerifyFlags>
+inline ArraySlice<ArtMethod> Class::GetCopiedMethodsSlice(PointerSize pointer_size) {
+  DCHECK(IsLoaded() || IsErroneous());
+  return GetCopiedMethodsSliceUnchecked(pointer_size);
+}
+
+inline ArraySlice<ArtMethod> Class::GetCopiedMethodsSliceUnchecked(PointerSize pointer_size) {
+  LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
+  return ArraySlice<ArtMethod>(methods,
+                               GetCopiedMethodsStartOffset(),
+                               NumMethods(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
+}
+
+inline LengthPrefixedArray<ArtMethod>* Class::GetMethodsPtr() {
+  return reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
+      static_cast<uintptr_t>(GetField64(OFFSET_OF_OBJECT_MEMBER(Class, methods_))));
+}
+
+template<VerifyObjectFlags kVerifyFlags>
+inline ArraySlice<ArtMethod> Class::GetMethodsSlice(PointerSize pointer_size) {
+  DCHECK(IsLoaded() || IsErroneous());
+  LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
+  return ArraySlice<ArtMethod>(methods,
+                               0,
+                               NumMethods(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
+}
+
+
+inline uint32_t Class::NumMethods() {
+  LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
+  return (methods == nullptr) ? 0 : methods->size();
+}
+
+inline ArtMethod* Class::GetDirectMethodUnchecked(size_t i, PointerSize pointer_size) {
+  CheckPointerSize(pointer_size);
+  return &GetDirectMethodsSliceUnchecked(pointer_size).At(i);
+}
+
+inline ArtMethod* Class::GetDirectMethod(size_t i, PointerSize pointer_size) {
+  CheckPointerSize(pointer_size);
+  return &GetDirectMethodsSlice(pointer_size).At(i);
+}
+
+inline void Class::SetMethodsPtr(LengthPrefixedArray<ArtMethod>* new_methods,
+                                 uint32_t num_direct,
+                                 uint32_t num_virtual) {
+  DCHECK(GetMethodsPtr() == nullptr);
+  SetMethodsPtrUnchecked(new_methods, num_direct, num_virtual);
+}
+
+
+inline void Class::SetMethodsPtrUnchecked(LengthPrefixedArray<ArtMethod>* new_methods,
+                                          uint32_t num_direct,
+                                          uint32_t num_virtual) {
+  DCHECK_LE(num_direct + num_virtual, (new_methods == nullptr) ? 0 : new_methods->size());
+  SetMethodsPtrInternal(new_methods);
+  SetFieldShort<false>(OFFSET_OF_OBJECT_MEMBER(Class, copied_methods_offset_),
+                    dchecked_integral_cast<uint16_t>(num_direct + num_virtual));
+  SetFieldShort<false>(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_offset_),
+                       dchecked_integral_cast<uint16_t>(num_direct));
+}
+
+inline void Class::SetMethodsPtrInternal(LengthPrefixedArray<ArtMethod>* new_methods) {
+  SetField64<false>(OFFSET_OF_OBJECT_MEMBER(Class, methods_),
+                    static_cast<uint64_t>(reinterpret_cast<uintptr_t>(new_methods)));
+}
+
+template<VerifyObjectFlags kVerifyFlags>
+inline ArtMethod* Class::GetVirtualMethod(size_t i, PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
   DCHECK(IsResolved<kVerifyFlags>() || IsErroneous<kVerifyFlags>())
       << PrettyClass(this) << " status=" << GetStatus();
   return GetVirtualMethodUnchecked(i, pointer_size);
 }
 
-inline ArtMethod* Class::GetVirtualMethodDuringLinking(size_t i, size_t pointer_size) {
+inline ArtMethod* Class::GetVirtualMethodDuringLinking(size_t i, PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
   DCHECK(IsLoaded() || IsErroneous());
   return GetVirtualMethodUnchecked(i, pointer_size);
 }
 
-inline ArtMethod* Class::GetVirtualMethodUnchecked(size_t i, size_t pointer_size) {
+inline ArtMethod* Class::GetVirtualMethodUnchecked(size_t i, PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
-  LengthPrefixedArray<ArtMethod>* methods = GetVirtualMethodsPtrUnchecked();
-  DCHECK(methods != nullptr);
-  return &methods->At(i,
-                      ArtMethod::Size(pointer_size),
-                      ArtMethod::Alignment(pointer_size));
+  return &GetVirtualMethodsSliceUnchecked(pointer_size).At(i);
 }
 
+template<VerifyObjectFlags kVerifyFlags,
+         ReadBarrierOption kReadBarrierOption>
 inline PointerArray* Class::GetVTable() {
-  DCHECK(IsResolved() || IsErroneous());
-  return GetFieldObject<PointerArray>(OFFSET_OF_OBJECT_MEMBER(Class, vtable_));
+  DCHECK(IsResolved<kVerifyFlags>() || IsErroneous<kVerifyFlags>());
+  return GetFieldObject<PointerArray, kVerifyFlags, kReadBarrierOption>(
+      OFFSET_OF_OBJECT_MEMBER(Class, vtable_));
 }
 
 inline PointerArray* Class::GetVTableDuringLinking() {
@@ -156,36 +242,19 @@
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), new_vtable);
 }
 
-inline MemberOffset Class::EmbeddedImTableEntryOffset(uint32_t i, size_t pointer_size) {
-  DCHECK_LT(i, kImtSize);
-  return MemberOffset(
-      EmbeddedImTableOffset(pointer_size).Uint32Value() + i * ImTableEntrySize(pointer_size));
-}
-
-inline ArtMethod* Class::GetEmbeddedImTableEntry(uint32_t i, size_t pointer_size) {
-  DCHECK(ShouldHaveEmbeddedImtAndVTable());
-  return GetFieldPtrWithSize<ArtMethod*>(
-      EmbeddedImTableEntryOffset(i, pointer_size), pointer_size);
-}
-
-inline void Class::SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size) {
-  DCHECK(ShouldHaveEmbeddedImtAndVTable());
-  SetFieldPtrWithSize<false>(EmbeddedImTableEntryOffset(i, pointer_size), method, pointer_size);
-}
-
 inline bool Class::HasVTable() {
-  return GetVTable() != nullptr || ShouldHaveEmbeddedImtAndVTable();
+  return GetVTable() != nullptr || ShouldHaveEmbeddedVTable();
 }
 
 inline int32_t Class::GetVTableLength() {
-  if (ShouldHaveEmbeddedImtAndVTable()) {
+  if (ShouldHaveEmbeddedVTable()) {
     return GetEmbeddedVTableLength();
   }
   return GetVTable() != nullptr ? GetVTable()->GetLength() : 0;
 }
 
-inline ArtMethod* Class::GetVTableEntry(uint32_t i, size_t pointer_size) {
-  if (ShouldHaveEmbeddedImtAndVTable()) {
+inline ArtMethod* Class::GetVTableEntry(uint32_t i, PointerSize pointer_size) {
+  if (ShouldHaveEmbeddedVTable()) {
     return GetEmbeddedVTableEntry(i, pointer_size);
   }
   auto* vtable = GetVTable();
@@ -201,21 +270,29 @@
   SetField32<false>(MemberOffset(EmbeddedVTableLengthOffset()), len);
 }
 
-inline MemberOffset Class::EmbeddedVTableEntryOffset(uint32_t i, size_t pointer_size) {
+inline ImTable* Class::GetImt(PointerSize pointer_size) {
+  return GetFieldPtrWithSize<ImTable*>(MemberOffset(ImtPtrOffset(pointer_size)), pointer_size);
+}
+
+inline void Class::SetImt(ImTable* imt, PointerSize pointer_size) {
+  return SetFieldPtrWithSize<false>(MemberOffset(ImtPtrOffset(pointer_size)), imt, pointer_size);
+}
+
+inline MemberOffset Class::EmbeddedVTableEntryOffset(uint32_t i, PointerSize pointer_size) {
   return MemberOffset(
       EmbeddedVTableOffset(pointer_size).Uint32Value() + i * VTableEntrySize(pointer_size));
 }
 
-inline ArtMethod* Class::GetEmbeddedVTableEntry(uint32_t i, size_t pointer_size) {
+inline ArtMethod* Class::GetEmbeddedVTableEntry(uint32_t i, PointerSize pointer_size) {
   return GetFieldPtrWithSize<ArtMethod*>(EmbeddedVTableEntryOffset(i, pointer_size), pointer_size);
 }
 
 inline void Class::SetEmbeddedVTableEntryUnchecked(
-    uint32_t i, ArtMethod* method, size_t pointer_size) {
+    uint32_t i, ArtMethod* method, PointerSize pointer_size) {
   SetFieldPtrWithSize<false>(EmbeddedVTableEntryOffset(i, pointer_size), method, pointer_size);
 }
 
-inline void Class::SetEmbeddedVTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size) {
+inline void Class::SetEmbeddedVTableEntry(uint32_t i, ArtMethod* method, PointerSize pointer_size) {
   auto* vtable = GetVTableDuringLinking();
   CHECK_EQ(method, vtable->GetElementPtrSize<ArtMethod*>(i, pointer_size));
   SetEmbeddedVTableEntryUnchecked(i, method, pointer_size);
@@ -295,8 +372,6 @@
       }
       return false;
     }
-    DCHECK_EQ(this->CanAccessMember(access_to, field->GetAccessFlags()),
-              this->CanAccessMember(dex_access_to, field->GetAccessFlags()));
   }
   if (LIKELY(this->CanAccessMember(access_to, field->GetAccessFlags()))) {
     return true;
@@ -328,8 +403,6 @@
       }
       return false;
     }
-    DCHECK_EQ(this->CanAccessMember(access_to, method->GetAccessFlags()),
-              this->CanAccessMember(dex_access_to, method->GetAccessFlags()));
   }
   if (LIKELY(this->CanAccessMember(access_to, method->GetAccessFlags()))) {
     return true;
@@ -375,10 +448,12 @@
   return false;
 }
 
-inline ArtMethod* Class::FindVirtualMethodForInterface(ArtMethod* method, size_t pointer_size) {
+inline ArtMethod* Class::FindVirtualMethodForInterface(ArtMethod* method,
+                                                       PointerSize pointer_size) {
   Class* declaring_class = method->GetDeclaringClass();
   DCHECK(declaring_class != nullptr) << PrettyClass(this);
   DCHECK(declaring_class->IsInterface()) << PrettyMethod(method);
+  DCHECK(!method->IsCopied());
   // TODO cache to improve lookup speed
   const int32_t iftable_count = GetIfTableCount();
   IfTable* iftable = GetIfTable();
@@ -391,7 +466,7 @@
   return nullptr;
 }
 
-inline ArtMethod* Class::FindVirtualMethodForVirtual(ArtMethod* method, size_t pointer_size) {
+inline ArtMethod* Class::FindVirtualMethodForVirtual(ArtMethod* method, PointerSize pointer_size) {
   // Only miranda or default methods may come from interfaces and be used as a virtual.
   DCHECK(!method->GetDeclaringClass()->IsInterface() || method->IsDefault() || method->IsMiranda());
   // The argument method may from a super class.
@@ -399,24 +474,27 @@
   return GetVTableEntry(method->GetMethodIndex(), pointer_size);
 }
 
-inline ArtMethod* Class::FindVirtualMethodForSuper(ArtMethod* method, size_t pointer_size) {
+inline ArtMethod* Class::FindVirtualMethodForSuper(ArtMethod* method, PointerSize pointer_size) {
   DCHECK(!method->GetDeclaringClass()->IsInterface());
   return GetSuperClass()->GetVTableEntry(method->GetMethodIndex(), pointer_size);
 }
 
 inline ArtMethod* Class::FindVirtualMethodForVirtualOrInterface(ArtMethod* method,
-                                                                size_t pointer_size) {
+                                                                PointerSize pointer_size) {
   if (method->IsDirect()) {
     return method;
   }
-  if (method->GetDeclaringClass()->IsInterface() && !method->IsMiranda()) {
+  if (method->GetDeclaringClass()->IsInterface() && !method->IsCopied()) {
     return FindVirtualMethodForInterface(method, pointer_size);
   }
   return FindVirtualMethodForVirtual(method, pointer_size);
 }
 
+template<VerifyObjectFlags kVerifyFlags,
+         ReadBarrierOption kReadBarrierOption>
 inline IfTable* Class::GetIfTable() {
-  return GetFieldObject<IfTable>(OFFSET_OF_OBJECT_MEMBER(Class, iftable_));
+  return GetFieldObject<IfTable, kVerifyFlags, kReadBarrierOption>(
+      OFFSET_OF_OBJECT_MEMBER(Class, iftable_));
 }
 
 inline int32_t Class::GetIfTableCount() {
@@ -432,22 +510,24 @@
 }
 
 inline LengthPrefixedArray<ArtField>* Class::GetIFieldsPtr() {
-  DCHECK(IsLoaded() || IsErroneous());
+  DCHECK(IsLoaded() || IsErroneous()) << GetStatus();
   return GetFieldPtr<LengthPrefixedArray<ArtField>*>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_));
 }
 
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline MemberOffset Class::GetFirstReferenceInstanceFieldOffset() {
-  Class* super_class = GetSuperClass();
+  Class* super_class = GetSuperClass<kVerifyFlags, kReadBarrierOption>();
   return (super_class != nullptr)
-      ? MemberOffset(RoundUp(super_class->GetObjectSize(),
+      ? MemberOffset(RoundUp(super_class->GetObjectSize<kVerifyFlags, kReadBarrierOption>(),
                              sizeof(mirror::HeapReference<mirror::Object>)))
       : ClassOffset();
 }
 
-inline MemberOffset Class::GetFirstReferenceStaticFieldOffset(size_t pointer_size) {
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+inline MemberOffset Class::GetFirstReferenceStaticFieldOffset(PointerSize pointer_size) {
   DCHECK(IsResolved());
   uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
-  if (ShouldHaveEmbeddedImtAndVTable()) {
+  if (ShouldHaveEmbeddedVTable<kVerifyFlags, kReadBarrierOption>()) {
     // Static fields come after the embedded tables.
     base = mirror::Class::ComputeClassSize(
         true, GetEmbeddedVTableLength(), 0, 0, 0, 0, 0, pointer_size);
@@ -455,10 +535,11 @@
   return MemberOffset(base);
 }
 
-inline MemberOffset Class::GetFirstReferenceStaticFieldOffsetDuringLinking(size_t pointer_size) {
+inline MemberOffset Class::GetFirstReferenceStaticFieldOffsetDuringLinking(
+    PointerSize pointer_size) {
   DCHECK(IsLoaded());
   uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
-  if (ShouldHaveEmbeddedImtAndVTable()) {
+  if (ShouldHaveEmbeddedVTable()) {
     // Static fields come after the embedded tables.
     base = mirror::Class::ComputeClassSize(true, GetVTableDuringLinking()->GetLength(),
                                            0, 0, 0, 0, 0, pointer_size);
@@ -520,15 +601,6 @@
   }
 }
 
-inline void Class::SetVerifyErrorClass(Class* klass) {
-  CHECK(klass != nullptr) << PrettyClass(this);
-  if (Runtime::Current()->IsActiveTransaction()) {
-    SetFieldObject<true>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_class_), klass);
-  } else {
-    SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_class_), klass);
-  }
-}
-
 template<VerifyObjectFlags kVerifyFlags>
 inline uint32_t Class::GetAccessFlags() {
   // Check class is loaded/retired or this is java.lang.String that has a
@@ -541,6 +613,7 @@
       << " IsErroneous=" <<
           IsErroneous<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis)>()
       << " IsString=" << (this == String::GetJavaLangString())
+      << " status= " << GetStatus<kVerifyFlags>()
       << " descriptor=" << PrettyDescriptor(this);
   return GetField32<kVerifyFlags>(AccessFlagsOffset());
 }
@@ -562,8 +635,9 @@
   static_assert(sizeof(Primitive::Type) == sizeof(int32_t),
                 "art::Primitive::Type and int32_t have different sizes.");
   int32_t v32 = GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_));
-  Primitive::Type type = static_cast<Primitive::Type>(v32 & 0xFFFF);
-  DCHECK_EQ(static_cast<size_t>(v32 >> 16), Primitive::ComponentSizeShift(type));
+  Primitive::Type type = static_cast<Primitive::Type>(v32 & kPrimitiveTypeMask);
+  DCHECK_EQ(static_cast<size_t>(v32 >> kPrimitiveTypeSizeShiftShift),
+            Primitive::ComponentSizeShift(type));
   return type;
 }
 
@@ -572,8 +646,9 @@
   static_assert(sizeof(Primitive::Type) == sizeof(int32_t),
                 "art::Primitive::Type and int32_t have different sizes.");
   int32_t v32 = GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_));
-  size_t size_shift = static_cast<Primitive::Type>(v32 >> 16);
-  DCHECK_EQ(size_shift, Primitive::ComponentSizeShift(static_cast<Primitive::Type>(v32 & 0xFFFF)));
+  size_t size_shift = static_cast<Primitive::Type>(v32 >> kPrimitiveTypeSizeShiftShift);
+  DCHECK_EQ(size_shift,
+            Primitive::ComponentSizeShift(static_cast<Primitive::Type>(v32 & kPrimitiveTypeMask)));
   return size_shift;
 }
 
@@ -625,22 +700,21 @@
   return Alloc<true>(self, Runtime::Current()->GetHeap()->GetCurrentNonMovingAllocator());
 }
 
-inline uint32_t Class::ComputeClassSize(bool has_embedded_tables,
+inline uint32_t Class::ComputeClassSize(bool has_embedded_vtable,
                                         uint32_t num_vtable_entries,
                                         uint32_t num_8bit_static_fields,
                                         uint32_t num_16bit_static_fields,
                                         uint32_t num_32bit_static_fields,
                                         uint32_t num_64bit_static_fields,
                                         uint32_t num_ref_static_fields,
-                                        size_t pointer_size) {
+                                        PointerSize pointer_size) {
   // Space used by java.lang.Class and its instance fields.
   uint32_t size = sizeof(Class);
   // Space used by embedded tables.
-  if (has_embedded_tables) {
-    const uint32_t embedded_imt_size = kImtSize * ImTableEntrySize(pointer_size);
-    const uint32_t embedded_vtable_size = num_vtable_entries * VTableEntrySize(pointer_size);
-    size = RoundUp(size + sizeof(uint32_t) /* embedded vtable len */, pointer_size) +
-        embedded_imt_size + embedded_vtable_size;
+  if (has_embedded_vtable) {
+    size = RoundUp(size + sizeof(uint32_t), static_cast<size_t>(pointer_size));
+    size += static_cast<size_t>(pointer_size);  // size of pointer to IMT
+    size += num_vtable_entries * VTableEntrySize(pointer_size);
   }
 
   // Space used by reference statics.
@@ -671,9 +745,12 @@
   return size;
 }
 
-template <typename Visitor>
+template <bool kVisitNativeRoots,
+          VerifyObjectFlags kVerifyFlags,
+          ReadBarrierOption kReadBarrierOption,
+          typename Visitor>
 inline void Class::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
-  VisitInstanceFieldsReferences(klass, visitor);
+  VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
   // Right after a class is allocated, but not yet loaded
   // (kStatusNotReady, see ClassLinker::LoadClass()), GC may find it
   // and scan it. IsTemp() may call Class::GetAccessFlags() but may
@@ -681,14 +758,17 @@
   // status is kStatusNotReady. To avoid it, rely on IsResolved()
   // only. This is fine because a temp class never goes into the
   // kStatusResolved state.
-  if (IsResolved()) {
+  if (IsResolved<kVerifyFlags>()) {
     // Temp classes don't ever populate imt/vtable or static fields and they are not even
     // allocated with the right size for those. Also, unresolved classes don't have fields
     // linked yet.
-    VisitStaticFieldsReferences(this, visitor);
+    VisitStaticFieldsReferences<kVerifyFlags, kReadBarrierOption>(this, visitor);
   }
-  // Since this class is reachable, we must also visit the associated roots when we scan it.
-  VisitNativeRoots(visitor, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
+  if (kVisitNativeRoots) {
+    // Since this class is reachable, we must also visit the associated roots when we scan it.
+    VisitNativeRoots<kReadBarrierOption>(
+        visitor, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
+  }
 }
 
 template<ReadBarrierOption kReadBarrierOption>
@@ -818,48 +898,65 @@
   }
 }
 
-inline void Class::SetDexCacheStrings(GcRoot<String>* new_dex_cache_strings) {
+inline void Class::SetDexCacheStrings(StringDexCacheType* new_dex_cache_strings) {
   SetFieldPtr<false>(DexCacheStringsOffset(), new_dex_cache_strings);
 }
 
-inline GcRoot<String>* Class::GetDexCacheStrings() {
-  return GetFieldPtr<GcRoot<String>*>(DexCacheStringsOffset());
+inline StringDexCacheType* Class::GetDexCacheStrings() {
+  return GetFieldPtr64<StringDexCacheType*>(DexCacheStringsOffset());
 }
 
-template<class Visitor>
-void mirror::Class::VisitNativeRoots(Visitor& visitor, size_t pointer_size) {
+template<ReadBarrierOption kReadBarrierOption, class Visitor>
+void mirror::Class::VisitNativeRoots(Visitor& visitor, PointerSize pointer_size) {
   for (ArtField& field : GetSFieldsUnchecked()) {
     // Visit roots first in case the declaring class gets moved.
     field.VisitRoots(visitor);
     if (kIsDebugBuild && IsResolved()) {
-      CHECK_EQ(field.GetDeclaringClass(), this) << GetStatus();
+      CHECK_EQ(field.GetDeclaringClass<kReadBarrierOption>(), this) << GetStatus();
     }
   }
   for (ArtField& field : GetIFieldsUnchecked()) {
     // Visit roots first in case the declaring class gets moved.
     field.VisitRoots(visitor);
     if (kIsDebugBuild && IsResolved()) {
-      CHECK_EQ(field.GetDeclaringClass(), this) << GetStatus();
+      CHECK_EQ(field.GetDeclaringClass<kReadBarrierOption>(), this) << GetStatus();
     }
   }
-  for (ArtMethod& method : GetDirectMethods(pointer_size)) {
-    method.VisitRoots(visitor, pointer_size);
-  }
-  for (ArtMethod& method : GetVirtualMethods(pointer_size)) {
-    method.VisitRoots(visitor, pointer_size);
+  for (ArtMethod& method : GetMethods(pointer_size)) {
+    method.VisitRoots<kReadBarrierOption>(visitor, pointer_size);
   }
 }
 
-inline IterationRange<StrideIterator<ArtMethod>> Class::GetDirectMethods(size_t pointer_size) {
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetDirectMethods(PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
-  return MakeIterationRangeFromLengthPrefixedArray(GetDirectMethodsPtrUnchecked(),
-                                                   ArtMethod::Size(pointer_size),
-                                                   ArtMethod::Alignment(pointer_size));
+  return GetDirectMethodsSliceUnchecked(pointer_size).AsRange();
 }
 
-inline IterationRange<StrideIterator<ArtMethod>> Class::GetVirtualMethods(size_t pointer_size) {
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetDeclaredMethods(
+      PointerSize pointer_size) {
+  return GetDeclaredMethodsSliceUnchecked(pointer_size).AsRange();
+}
+
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetDeclaredVirtualMethods(
+      PointerSize pointer_size) {
+  return GetDeclaredVirtualMethodsSliceUnchecked(pointer_size).AsRange();
+}
+
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetVirtualMethods(
+    PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
-  return MakeIterationRangeFromLengthPrefixedArray(GetVirtualMethodsPtrUnchecked(),
+  return GetVirtualMethodsSliceUnchecked(pointer_size).AsRange();
+}
+
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetCopiedMethods(PointerSize pointer_size) {
+  CheckPointerSize(pointer_size);
+  return GetCopiedMethodsSliceUnchecked(pointer_size).AsRange();
+}
+
+
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetMethods(PointerSize pointer_size) {
+  CheckPointerSize(pointer_size);
+  return MakeIterationRangeFromLengthPrefixedArray(GetMethodsPtr(),
                                                    ArtMethod::Size(pointer_size),
                                                    ArtMethod::Alignment(pointer_size));
 }
@@ -880,22 +977,12 @@
   return MakeIterationRangeFromLengthPrefixedArray(GetSFieldsPtrUnchecked());
 }
 
-inline MemberOffset Class::EmbeddedImTableOffset(size_t pointer_size) {
+inline MemberOffset Class::EmbeddedVTableOffset(PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
-  // Round up since we want the embedded imt and vtable to be pointer size aligned in case 64 bits.
-  // Add 32 bits for embedded vtable length.
-  return MemberOffset(
-      RoundUp(EmbeddedVTableLengthOffset().Uint32Value() + sizeof(uint32_t), pointer_size));
+  return MemberOffset(ImtPtrOffset(pointer_size).Uint32Value() + static_cast<size_t>(pointer_size));
 }
 
-inline MemberOffset Class::EmbeddedVTableOffset(size_t pointer_size) {
-  CheckPointerSize(pointer_size);
-  return MemberOffset(EmbeddedImTableOffset(pointer_size).Uint32Value() +
-                      kImtSize * ImTableEntrySize(pointer_size));
-}
-
-inline void Class::CheckPointerSize(size_t pointer_size) {
-  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
+inline void Class::CheckPointerSize(PointerSize pointer_size) {
   DCHECK_EQ(pointer_size, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
 }
 
@@ -927,23 +1014,67 @@
 }
 
 inline uint32_t Class::NumDirectMethods() {
-  LengthPrefixedArray<ArtMethod>* arr = GetDirectMethodsPtrUnchecked();
-  return arr != nullptr ? arr->Length() : 0u;
+  return GetVirtualMethodsStartOffset();
+}
+
+inline uint32_t Class::NumDeclaredVirtualMethods() {
+  return GetCopiedMethodsStartOffset() - GetVirtualMethodsStartOffset();
 }
 
 inline uint32_t Class::NumVirtualMethods() {
-  LengthPrefixedArray<ArtMethod>* arr = GetVirtualMethodsPtrUnchecked();
-  return arr != nullptr ? arr->Length() : 0u;
+  return NumMethods() - GetVirtualMethodsStartOffset();
 }
 
 inline uint32_t Class::NumInstanceFields() {
   LengthPrefixedArray<ArtField>* arr = GetIFieldsPtrUnchecked();
-  return arr != nullptr ? arr->Length() : 0u;
+  return arr != nullptr ? arr->size() : 0u;
 }
 
 inline uint32_t Class::NumStaticFields() {
   LengthPrefixedArray<ArtField>* arr = GetSFieldsPtrUnchecked();
-  return arr != nullptr ? arr->Length() : 0u;
+  return arr != nullptr ? arr->size() : 0u;
+}
+
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, typename Visitor>
+inline void Class::FixupNativePointers(mirror::Class* dest,
+                                       PointerSize pointer_size,
+                                       const Visitor& visitor) {
+  // Update the field arrays.
+  LengthPrefixedArray<ArtField>* const sfields = GetSFieldsPtr();
+  LengthPrefixedArray<ArtField>* const new_sfields = visitor(sfields);
+  if (sfields != new_sfields) {
+    dest->SetSFieldsPtrUnchecked(new_sfields);
+  }
+  LengthPrefixedArray<ArtField>* const ifields = GetIFieldsPtr();
+  LengthPrefixedArray<ArtField>* const new_ifields = visitor(ifields);
+  if (ifields != new_ifields) {
+    dest->SetIFieldsPtrUnchecked(new_ifields);
+  }
+  // Update method array.
+  LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
+  LengthPrefixedArray<ArtMethod>* new_methods = visitor(methods);
+  if (methods != new_methods) {
+    dest->SetMethodsPtrInternal(new_methods);
+  }
+  // Update dex cache strings.
+  StringDexCacheType* strings = GetDexCacheStrings();
+  StringDexCacheType* new_strings = visitor(strings);
+  if (strings != new_strings) {
+    dest->SetDexCacheStrings(new_strings);
+  }
+  // Fix up embedded tables.
+  if (!IsTemp() && ShouldHaveEmbeddedVTable<kVerifyNone, kReadBarrierOption>()) {
+    for (int32_t i = 0, count = GetEmbeddedVTableLength(); i < count; ++i) {
+      ArtMethod* method = GetEmbeddedVTableEntry(i, pointer_size);
+      ArtMethod* new_method = visitor(method);
+      if (method != new_method) {
+        dest->SetEmbeddedVTableEntryUnchecked(i, new_method, pointer_size);
+      }
+    }
+  }
+  if (!IsTemp() && ShouldHaveImt<kVerifyNone, kReadBarrierOption>()) {
+    dest->SetImt(visitor(GetImt(pointer_size)), pointer_size);
+  }
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 53fedab..f948be7 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -57,6 +57,15 @@
   java_lang_Class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
+inline void Class::SetVerifyError(mirror::Object* error) {
+  CHECK(error != nullptr) << PrettyClass(this);
+  if (Runtime::Current()->IsActiveTransaction()) {
+    SetFieldObject<true>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_), error);
+  } else {
+    SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_), error);
+  }
+}
+
 void Class::SetStatus(Handle<Class> h_this, Status new_status, Thread* self) {
   Status old_status = h_this->GetStatus();
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -85,36 +94,9 @@
       }
     }
 
-    // Stash current exception.
-    StackHandleScope<1> hs(self);
-    Handle<mirror::Throwable> old_exception(hs.NewHandle(self->GetException()));
-    CHECK(old_exception.Get() != nullptr);
-    Class* eiie_class;
-    // Do't attempt to use FindClass if we have an OOM error since this can try to do more
-    // allocations and may cause infinite loops.
-    bool throw_eiie = (old_exception.Get() == nullptr);
-    if (!throw_eiie) {
-      std::string temp;
-      const char* old_exception_descriptor = old_exception->GetClass()->GetDescriptor(&temp);
-      throw_eiie = (strcmp(old_exception_descriptor, "Ljava/lang/OutOfMemoryError;") != 0);
-    }
-    if (throw_eiie) {
-      // Clear exception to call FindSystemClass.
-      self->ClearException();
-      eiie_class = Runtime::Current()->GetClassLinker()->FindSystemClass(
-          self, "Ljava/lang/ExceptionInInitializerError;");
-      CHECK(!self->IsExceptionPending());
-      // Only verification errors, not initialization problems, should set a verify error.
-      // This is to ensure that ThrowEarlierClassFailure will throw NoClassDefFoundError in that
-      // case.
-      Class* exception_class = old_exception->GetClass();
-      if (!eiie_class->IsAssignableFrom(exception_class)) {
-        h_this->SetVerifyErrorClass(exception_class);
-      }
-    }
-
-    // Restore exception.
-    self->SetException(old_exception.Get());
+    // Remember the current exception.
+    CHECK(self->GetException() != nullptr);
+    h_this->SetVerifyError(self->GetException());
   }
   static_assert(sizeof(Status) == sizeof(uint32_t), "Size of status not equal to uint32");
   if (Runtime::Current()->IsActiveTransaction()) {
@@ -352,8 +334,9 @@
   }
 }
 
-ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const StringPiece& signature,
-                                      size_t pointer_size) {
+ArtMethod* Class::FindInterfaceMethod(const StringPiece& name,
+                                      const StringPiece& signature,
+                                      PointerSize pointer_size) {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(name, signature, pointer_size);
   if (method != nullptr) {
@@ -371,8 +354,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const Signature& signature,
-                                      size_t pointer_size) {
+ArtMethod* Class::FindInterfaceMethod(const StringPiece& name,
+                                      const Signature& signature,
+                                      PointerSize pointer_size) {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(name, signature, pointer_size);
   if (method != nullptr) {
@@ -390,8 +374,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
-                                      size_t pointer_size) {
+ArtMethod* Class::FindInterfaceMethod(const DexCache* dex_cache,
+                                      uint32_t dex_method_idx,
+                                      PointerSize pointer_size) {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(dex_cache, dex_method_idx, pointer_size);
   if (method != nullptr) {
@@ -410,8 +395,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature,
-                                           size_t pointer_size) {
+ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name,
+                                           const StringPiece& signature,
+                                           PointerSize pointer_size) {
   for (auto& method : GetDirectMethods(pointer_size)) {
     if (name == method.GetName() && method.GetSignature() == signature) {
       return &method;
@@ -420,8 +406,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature,
-                                           size_t pointer_size) {
+ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name,
+                                           const Signature& signature,
+                                           PointerSize pointer_size) {
   for (auto& method : GetDirectMethods(pointer_size)) {
     if (name == method.GetName() && signature == method.GetSignature()) {
       return &method;
@@ -430,8 +417,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
-                                           size_t pointer_size) {
+ArtMethod* Class::FindDeclaredDirectMethod(const DexCache* dex_cache,
+                                           uint32_t dex_method_idx,
+                                           PointerSize pointer_size) {
   if (GetDexCache() == dex_cache) {
     for (auto& method : GetDirectMethods(pointer_size)) {
       if (method.GetDexMethodIndex() == dex_method_idx) {
@@ -442,8 +430,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDirectMethod(const StringPiece& name, const StringPiece& signature,
-                                   size_t pointer_size) {
+ArtMethod* Class::FindDirectMethod(const StringPiece& name,
+                                   const StringPiece& signature,
+                                   PointerSize pointer_size) {
   for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature, pointer_size);
     if (method != nullptr) {
@@ -453,8 +442,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDirectMethod(const StringPiece& name, const Signature& signature,
-                                   size_t pointer_size) {
+ArtMethod* Class::FindDirectMethod(const StringPiece& name,
+                                   const Signature& signature,
+                                   PointerSize pointer_size) {
   for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature, pointer_size);
     if (method != nullptr) {
@@ -465,7 +455,7 @@
 }
 
 ArtMethod* Class::FindDirectMethod(
-    const DexCache* dex_cache, uint32_t dex_method_idx, size_t pointer_size) {
+    const DexCache* dex_cache, uint32_t dex_method_idx, PointerSize pointer_size) {
   for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(dex_cache, dex_method_idx, pointer_size);
     if (method != nullptr) {
@@ -475,8 +465,24 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature,
-                                            size_t pointer_size) {
+ArtMethod* Class::FindDeclaredDirectMethodByName(const StringPiece& name,
+                                                 PointerSize pointer_size) {
+  for (auto& method : GetDirectMethods(pointer_size)) {
+    ArtMethod* const np_method = method.GetInterfaceMethodIfProxy(pointer_size);
+    if (name == np_method->GetName()) {
+      return &method;
+    }
+  }
+  return nullptr;
+}
+
+// TODO These should maybe be changed to be named FindOwnedVirtualMethod or something similar
+// because they do not only find 'declared' methods and will return copied methods. This behavior is
+// desired and correct but the naming can lead to confusion because in the java language declared
+// excludes interface methods which might be found by this.
+ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name,
+                                            const StringPiece& signature,
+                                            PointerSize pointer_size) {
   for (auto& method : GetVirtualMethods(pointer_size)) {
     ArtMethod* const np_method = method.GetInterfaceMethodIfProxy(pointer_size);
     if (name == np_method->GetName() && np_method->GetSignature() == signature) {
@@ -486,8 +492,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const Signature& signature,
-                                            size_t pointer_size) {
+ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name,
+                                            const Signature& signature,
+                                            PointerSize pointer_size) {
   for (auto& method : GetVirtualMethods(pointer_size)) {
     ArtMethod* const np_method = method.GetInterfaceMethodIfProxy(pointer_size);
     if (name == np_method->GetName() && signature == np_method->GetSignature()) {
@@ -497,13 +504,12 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
-                                            size_t pointer_size) {
+ArtMethod* Class::FindDeclaredVirtualMethod(const DexCache* dex_cache,
+                                            uint32_t dex_method_idx,
+                                            PointerSize pointer_size) {
   if (GetDexCache() == dex_cache) {
-    for (auto& method : GetVirtualMethods(pointer_size)) {
-      // A miranda method may have a different DexCache and is always created by linking,
-      // never *declared* in the class.
-      if (method.GetDexMethodIndex() == dex_method_idx && !method.IsMiranda()) {
+    for (auto& method : GetDeclaredVirtualMethods(pointer_size)) {
+      if (method.GetDexMethodIndex() == dex_method_idx) {
         return &method;
       }
     }
@@ -511,7 +517,8 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDeclaredVirtualMethodByName(const StringPiece& name, size_t pointer_size) {
+ArtMethod* Class::FindDeclaredVirtualMethodByName(const StringPiece& name,
+                                                  PointerSize pointer_size) {
   for (auto& method : GetVirtualMethods(pointer_size)) {
     ArtMethod* const np_method = method.GetInterfaceMethodIfProxy(pointer_size);
     if (name == np_method->GetName()) {
@@ -522,7 +529,7 @@
 }
 
 ArtMethod* Class::FindVirtualMethod(
-    const StringPiece& name, const StringPiece& signature, size_t pointer_size) {
+    const StringPiece& name, const StringPiece& signature, PointerSize pointer_size) {
   for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature, pointer_size);
     if (method != nullptr) {
@@ -533,7 +540,7 @@
 }
 
 ArtMethod* Class::FindVirtualMethod(
-    const StringPiece& name, const Signature& signature, size_t pointer_size) {
+    const StringPiece& name, const Signature& signature, PointerSize pointer_size) {
   for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature, pointer_size);
     if (method != nullptr) {
@@ -544,7 +551,7 @@
 }
 
 ArtMethod* Class::FindVirtualMethod(
-    const DexCache* dex_cache, uint32_t dex_method_idx, size_t pointer_size) {
+    const DexCache* dex_cache, uint32_t dex_method_idx, PointerSize pointer_size) {
   for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(dex_cache, dex_method_idx, pointer_size);
     if (method != nullptr) {
@@ -554,7 +561,72 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindClassInitializer(size_t pointer_size) {
+ArtMethod* Class::FindVirtualMethodForInterfaceSuper(ArtMethod* method, PointerSize pointer_size) {
+  DCHECK(method->GetDeclaringClass()->IsInterface());
+  DCHECK(IsInterface()) << "Should only be called on a interface class";
+  // Check if we have one defined on this interface first. This includes searching copied ones to
+  // get any conflict methods. Conflict methods are copied into each subtype from the supertype. We
+  // don't do any indirect method checks here.
+  for (ArtMethod& iface_method : GetVirtualMethods(pointer_size)) {
+    if (method->HasSameNameAndSignature(&iface_method)) {
+      return &iface_method;
+    }
+  }
+
+  std::vector<ArtMethod*> abstract_methods;
+  // Search through the IFTable for a working version. We don't need to check for conflicts
+  // because if there was one it would appear in this classes virtual_methods_ above.
+
+  Thread* self = Thread::Current();
+  StackHandleScope<2> hs(self);
+  MutableHandle<mirror::IfTable> iftable(hs.NewHandle(GetIfTable()));
+  MutableHandle<mirror::Class> iface(hs.NewHandle<mirror::Class>(nullptr));
+  size_t iftable_count = GetIfTableCount();
+  // Find the method. We don't need to check for conflicts because they would have been in the
+  // copied virtuals of this interface.  Order matters, traverse in reverse topological order; most
+  // subtypiest interfaces get visited first.
+  for (size_t k = iftable_count; k != 0;) {
+    k--;
+    DCHECK_LT(k, iftable->Count());
+    iface.Assign(iftable->GetInterface(k));
+    // Iterate through every declared method on this interface. Each direct method's name/signature
+    // is unique so the order of the inner loop doesn't matter.
+    for (auto& method_iter : iface->GetDeclaredVirtualMethods(pointer_size)) {
+      ArtMethod* current_method = &method_iter;
+      if (current_method->HasSameNameAndSignature(method)) {
+        if (current_method->IsDefault()) {
+          // Handle JLS soft errors, a default method from another superinterface tree can
+          // "override" an abstract method(s) from another superinterface tree(s).  To do this,
+          // ignore any [default] method which are dominated by the abstract methods we've seen so
+          // far. Check if overridden by any in abstract_methods. We do not need to check for
+          // default_conflicts because we would hit those before we get to this loop.
+          bool overridden = false;
+          for (ArtMethod* possible_override : abstract_methods) {
+            DCHECK(possible_override->HasSameNameAndSignature(current_method));
+            if (iface->IsAssignableFrom(possible_override->GetDeclaringClass())) {
+              overridden = true;
+              break;
+            }
+          }
+          if (!overridden) {
+            return current_method;
+          }
+        } else {
+          // Is not default.
+          // This might override another default method. Just stash it for now.
+          abstract_methods.push_back(current_method);
+        }
+      }
+    }
+  }
+  // If we reach here we either never found any declaration of the method (in which case
+  // 'abstract_methods' is empty or we found no non-overriden default methods in which case
+  // 'abstract_methods' contains a number of abstract implementations of the methods. We choose one
+  // of these arbitrarily.
+  return abstract_methods.empty() ? nullptr : abstract_methods[0];
+}
+
+ArtMethod* Class::FindClassInitializer(PointerSize pointer_size) {
   for (ArtMethod& method : GetDirectMethods(pointer_size)) {
     if (method.IsClassInitializer()) {
       DCHECK_STREQ(method.GetName(), "<clinit>");
@@ -574,7 +646,7 @@
     return nullptr;
   }
   size_t low = 0;
-  size_t high = fields->Length();
+  size_t high = fields->size();
   ArtField* ret = nullptr;
   while (low < high) {
     size_t mid = (low + high) / 2;
@@ -689,21 +761,24 @@
   return nullptr;
 }
 
-ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const DexCache* dex_cache,
+ArtField* Class::FindStaticField(Thread* self,
+                                 Class* klass,
+                                 const DexCache* dex_cache,
                                  uint32_t dex_field_idx) {
-  for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
+  for (Class* k = klass; k != nullptr; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredStaticField(dex_cache, dex_field_idx);
     if (f != nullptr) {
       return f;
     }
-    // Wrap k incase it moves during GetDirectInterface.
+    // Though GetDirectInterface() should not cause thread suspension when called
+    // from here, it takes a Handle as an argument, so we need to wrap `k`.
+    ScopedAssertNoThreadSuspension ants(self, __FUNCTION__);
     StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Class> h_k(hs.NewHandleWrapper(&k));
+    Handle<mirror::Class> h_k(hs.NewHandle(k));
     // Is this field in any of this class' interfaces?
     for (uint32_t i = 0; i < h_k->NumDirectInterfaces(); ++i) {
-      StackHandleScope<1> hs2(self);
-      Handle<mirror::Class> interface(hs2.NewHandle(GetDirectInterface(self, h_k, i)));
+      mirror::Class* interface = GetDirectInterface(self, h_k, i);
       f = FindStaticField(self, interface, dex_cache, dex_field_idx);
       if (f != nullptr) {
         return f;
@@ -741,16 +816,11 @@
   return nullptr;
 }
 
-void Class::SetPreverifiedFlagOnAllMethods(size_t pointer_size) {
+void Class::SetSkipAccessChecksFlagOnAllMethods(PointerSize pointer_size) {
   DCHECK(IsVerified());
-  for (auto& m : GetDirectMethods(pointer_size)) {
-    if (!m.IsNative() && !m.IsAbstract()) {
-      m.SetPreverified();
-    }
-  }
-  for (auto& m : GetVirtualMethods(pointer_size)) {
-    if (!m.IsNative() && !m.IsAbstract()) {
-      m.SetPreverified();
+  for (auto& m : GetMethods(pointer_size)) {
+    if (!m.IsNative() && m.IsInvokable()) {
+      m.SetSkipAccessChecks();
     }
   }
 }
@@ -820,6 +890,19 @@
   }
 }
 
+mirror::Class* Class::GetCommonSuperClass(Handle<Class> klass) {
+  DCHECK(klass.Get() != nullptr);
+  DCHECK(!klass->IsInterface());
+  DCHECK(!IsInterface());
+  mirror::Class* common_super_class = this;
+  while (!common_super_class->IsAssignableFrom(klass.Get())) {
+    mirror::Class* old_common = common_super_class;
+    common_super_class = old_common->GetSuperClass();
+    DCHECK(common_super_class != nullptr) << PrettyClass(old_common);
+  }
+  return common_super_class;
+}
+
 const char* Class::GetSourceFile() {
   const DexFile& dex_file = GetDexFile();
   const DexFile::ClassDef* dex_class_def = GetClassDef();
@@ -847,13 +930,7 @@
   return GetDexFile().GetInterfacesList(*class_def);
 }
 
-void Class::PopulateEmbeddedImtAndVTable(ArtMethod* const (&methods)[kImtSize],
-                                         size_t pointer_size) {
-  for (size_t i = 0; i < kImtSize; i++) {
-    auto method = methods[i];
-    DCHECK(method != nullptr);
-    SetEmbeddedImTableEntry(i, method, pointer_size);
-  }
+void Class::PopulateEmbeddedVTable(PointerSize pointer_size) {
   PointerArray* table = GetVTableDuringLinking();
   CHECK(table != nullptr) << PrettyClass(this);
   const size_t table_length = table->GetLength();
@@ -899,9 +976,12 @@
 // The pre-fence visitor for Class::CopyOf().
 class CopyClassVisitor {
  public:
-  CopyClassVisitor(Thread* self, Handle<mirror::Class>* orig, size_t new_length,
-                   size_t copy_bytes, ArtMethod* const (&imt)[mirror::Class::kImtSize],
-                   size_t pointer_size)
+  CopyClassVisitor(Thread* self,
+                   Handle<mirror::Class>* orig,
+                   size_t new_length,
+                   size_t copy_bytes,
+                   ImTable* imt,
+                   PointerSize pointer_size)
       : self_(self), orig_(orig), new_length_(new_length),
         copy_bytes_(copy_bytes), imt_(imt), pointer_size_(pointer_size) {
   }
@@ -912,7 +992,8 @@
     Handle<mirror::Class> h_new_class_obj(hs.NewHandle(obj->AsClass()));
     mirror::Object::CopyObject(self_, h_new_class_obj.Get(), orig_->Get(), copy_bytes_);
     mirror::Class::SetStatus(h_new_class_obj, Class::kStatusResolving, self_);
-    h_new_class_obj->PopulateEmbeddedImtAndVTable(imt_, pointer_size_);
+    h_new_class_obj->PopulateEmbeddedVTable(pointer_size_);
+    h_new_class_obj->SetImt(imt_, pointer_size_);
     h_new_class_obj->SetClassSize(new_length_);
     // Visit all of the references to make sure there is no from space references in the native
     // roots.
@@ -925,13 +1006,12 @@
   Handle<mirror::Class>* const orig_;
   const size_t new_length_;
   const size_t copy_bytes_;
-  ArtMethod* const (&imt_)[mirror::Class::kImtSize];
-  const size_t pointer_size_;
+  ImTable* imt_;
+  const PointerSize pointer_size_;
   DISALLOW_COPY_AND_ASSIGN(CopyClassVisitor);
 };
 
-Class* Class::CopyOf(Thread* self, int32_t new_length,
-                     ArtMethod* const (&imt)[mirror::Class::kImtSize], size_t pointer_size) {
+Class* Class::CopyOf(Thread* self, int32_t new_length, ImTable* imt, PointerSize pointer_size) {
   DCHECK_GE(new_length, static_cast<int32_t>(sizeof(Class)));
   // We may get copied by a compacting GC.
   StackHandleScope<1> hs(self);
@@ -957,14 +1037,14 @@
 
 // TODO: Move this to java_lang_Class.cc?
 ArtMethod* Class::GetDeclaredConstructor(
-    Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args) {
-  for (auto& m : GetDirectMethods(sizeof(void*))) {
+    Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args, PointerSize pointer_size) {
+  for (auto& m : GetDirectMethods(pointer_size)) {
     // Skip <clinit> which is a static constructor, as well as non constructors.
     if (m.IsStatic() || !m.IsConstructor()) {
       continue;
     }
     // May cause thread suspension and exceptions.
-    if (m.GetInterfaceMethodIfProxy(sizeof(void*))->EqualParameters(args)) {
+    if (m.GetInterfaceMethodIfProxy(kRuntimePointerSize)->EqualParameters(args)) {
       return &m;
     }
     if (UNLIKELY(self->IsExceptionPending())) {
@@ -982,5 +1062,152 @@
   return depth;
 }
 
+uint32_t Class::FindTypeIndexInOtherDexFile(const DexFile& dex_file) {
+  std::string temp;
+  const DexFile::TypeId* type_id = dex_file.FindTypeId(GetDescriptor(&temp));
+  return (type_id == nullptr) ? DexFile::kDexNoIndex : dex_file.GetIndexForTypeId(*type_id);
+}
+
+template <PointerSize kPointerSize, bool kTransactionActive>
+mirror::Method* Class::GetDeclaredMethodInternal(Thread* self,
+                                                 mirror::Class* klass,
+                                                 mirror::String* name,
+                                                 mirror::ObjectArray<mirror::Class>* args) {
+  // Covariant return types permit the class to define multiple
+  // methods with the same name and parameter types. Prefer to
+  // return a non-synthetic method in such situations. We may
+  // still return a synthetic method to handle situations like
+  // escalated visibility. We never return miranda methods that
+  // were synthesized by the runtime.
+  constexpr uint32_t kSkipModifiers = kAccMiranda | kAccSynthetic;
+  StackHandleScope<3> hs(self);
+  auto h_method_name = hs.NewHandle(name);
+  if (UNLIKELY(h_method_name.Get() == nullptr)) {
+    ThrowNullPointerException("name == null");
+    return nullptr;
+  }
+  auto h_args = hs.NewHandle(args);
+  Handle<mirror::Class> h_klass = hs.NewHandle(klass);
+  ArtMethod* result = nullptr;
+  for (auto& m : h_klass->GetDeclaredVirtualMethods(kPointerSize)) {
+    auto* np_method = m.GetInterfaceMethodIfProxy(kPointerSize);
+    // May cause thread suspension.
+    mirror::String* np_name = np_method->GetNameAsString(self);
+    if (!np_name->Equals(h_method_name.Get()) || !np_method->EqualParameters(h_args)) {
+      if (UNLIKELY(self->IsExceptionPending())) {
+        return nullptr;
+      }
+      continue;
+    }
+    auto modifiers = m.GetAccessFlags();
+    if ((modifiers & kSkipModifiers) == 0) {
+      return mirror::Method::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, &m);
+    }
+    if ((modifiers & kAccMiranda) == 0) {
+      result = &m;  // Remember as potential result if it's not a miranda method.
+    }
+  }
+  if (result == nullptr) {
+    for (auto& m : h_klass->GetDirectMethods(kPointerSize)) {
+      auto modifiers = m.GetAccessFlags();
+      if ((modifiers & kAccConstructor) != 0) {
+        continue;
+      }
+      auto* np_method = m.GetInterfaceMethodIfProxy(kPointerSize);
+      // May cause thread suspension.
+      mirror::String* np_name = np_method->GetNameAsString(self);
+      if (np_name == nullptr) {
+        self->AssertPendingException();
+        return nullptr;
+      }
+      if (!np_name->Equals(h_method_name.Get()) || !np_method->EqualParameters(h_args)) {
+        if (UNLIKELY(self->IsExceptionPending())) {
+          return nullptr;
+        }
+        continue;
+      }
+      if ((modifiers & kSkipModifiers) == 0) {
+        return mirror::Method::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, &m);
+      }
+      // Direct methods cannot be miranda methods, so this potential result must be synthetic.
+      result = &m;
+    }
+  }
+  return result != nullptr
+      ? mirror::Method::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, result)
+      : nullptr;
+}
+
+template
+mirror::Method* Class::GetDeclaredMethodInternal<PointerSize::k32, false>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::String* name,
+    mirror::ObjectArray<mirror::Class>* args);
+template
+mirror::Method* Class::GetDeclaredMethodInternal<PointerSize::k32, true>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::String* name,
+    mirror::ObjectArray<mirror::Class>* args);
+template
+mirror::Method* Class::GetDeclaredMethodInternal<PointerSize::k64, false>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::String* name,
+    mirror::ObjectArray<mirror::Class>* args);
+template
+mirror::Method* Class::GetDeclaredMethodInternal<PointerSize::k64, true>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::String* name,
+    mirror::ObjectArray<mirror::Class>* args);
+
+template <PointerSize kPointerSize, bool kTransactionActive>
+mirror::Constructor* Class::GetDeclaredConstructorInternal(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::ObjectArray<mirror::Class>* args) {
+  StackHandleScope<1> hs(self);
+  ArtMethod* result = klass->GetDeclaredConstructor(self, hs.NewHandle(args), kPointerSize);
+  return result != nullptr
+      ? mirror::Constructor::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, result)
+      : nullptr;
+}
+
+// mirror::Constructor::CreateFromArtMethod<kTransactionActive>(self, result)
+
+template
+mirror::Constructor* Class::GetDeclaredConstructorInternal<PointerSize::k32, false>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::ObjectArray<mirror::Class>* args);
+template
+mirror::Constructor* Class::GetDeclaredConstructorInternal<PointerSize::k32, true>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::ObjectArray<mirror::Class>* args);
+template
+mirror::Constructor* Class::GetDeclaredConstructorInternal<PointerSize::k64, false>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::ObjectArray<mirror::Class>* args);
+template
+mirror::Constructor* Class::GetDeclaredConstructorInternal<PointerSize::k64, true>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::ObjectArray<mirror::Class>* args);
+
+int32_t Class::GetInnerClassFlags(Handle<Class> h_this, int32_t default_value) {
+  if (h_this->IsProxyClass() || h_this->GetDexCache() == nullptr) {
+    return default_value;
+  }
+  uint32_t flags;
+  if (!h_this->GetDexFile().GetInnerClassFlags(h_this, &flags)) {
+    return default_value;
+  }
+  return flags;
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 8219d69..e2cd649 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -17,11 +17,13 @@
 #ifndef ART_RUNTIME_MIRROR_CLASS_H_
 #define ART_RUNTIME_MIRROR_CLASS_H_
 
+#include "base/enums.h"
 #include "base/iteration_range.h"
 #include "dex_file.h"
 #include "class_flags.h"
 #include "gc_root.h"
 #include "gc/allocator_type.h"
+#include "imtable.h"
 #include "invoke_type.h"
 #include "modifiers.h"
 #include "object.h"
@@ -33,10 +35,6 @@
 #include "thread.h"
 #include "utils.h"
 
-#ifndef IMT_SIZE
-#error IMT_SIZE not defined
-#endif
-
 namespace art {
 
 class ArtField;
@@ -44,6 +42,7 @@
 struct ClassOffsets;
 template<class T> class Handle;
 template<typename T> class LengthPrefixedArray;
+template<typename T> class ArraySlice;
 class Signature;
 class StringPiece;
 template<size_t kNumReferences> class PACKED(4) StackHandleScope;
@@ -54,6 +53,10 @@
 class Constructor;
 class DexCache;
 class IfTable;
+class Method;
+struct StringDexCachePair;
+
+using StringDexCacheType = std::atomic<mirror::StringDexCachePair>;
 
 // C++ mirror of java.lang.Class
 class MANAGED Class FINAL : public Object {
@@ -64,10 +67,11 @@
   // 2 ref instance fields.]
   static constexpr uint32_t kClassWalkSuper = 0xC0000000;
 
-  // Interface method table size. Increasing this value reduces the chance of two interface methods
-  // colliding in the interface method table but increases the size of classes that implement
-  // (non-marker) interfaces.
-  static constexpr size_t kImtSize = IMT_SIZE;
+  // Shift primitive type by kPrimitiveTypeSizeShiftShift to get the component type size shift
+  // Used for computing array size as follows:
+  // array_bytes = header_size + (elements << (primitive_type >> kPrimitiveTypeSizeShiftShift))
+  static constexpr uint32_t kPrimitiveTypeSizeShiftShift = 16;
+  static constexpr uint32_t kPrimitiveTypeMask = (1u << kPrimitiveTypeSizeShiftShift) - 1;
 
   // Class Status
   //
@@ -286,14 +290,20 @@
     return (GetAccessFlags() & kAccSynthetic) != 0;
   }
 
-  // Returns true if the class can avoid access checks.
-  bool IsPreverified() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return (GetAccessFlags() & kAccPreverified) != 0;
+  // Return whether the class had run the verifier at least once.
+  // This does not necessarily mean that access checks are avoidable,
+  // since the class methods might still need to be run with access checks.
+  bool WasVerificationAttempted() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return (GetAccessFlags() & kAccSkipAccessChecks) != 0;
   }
 
-  void SetPreverified() SHARED_REQUIRES(Locks::mutator_lock_) {
+  // Mark the class as having gone through a verification attempt.
+  // Mutually exclusive from whether or not each method is allowed to skip access checks.
+  void SetVerificationAttempted() SHARED_REQUIRES(Locks::mutator_lock_) {
     uint32_t flags = GetField32(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_));
-    SetAccessFlags(flags | kAccPreverified);
+    if ((flags & kAccVerificationAttempted) == 0) {
+      SetAccessFlags(flags | kAccVerificationAttempted);
+    }
   }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -343,7 +353,7 @@
   // be replaced with a class with the right size for embedded imt/vtable.
   bool IsTemp() SHARED_REQUIRES(Locks::mutator_lock_) {
     Status s = GetStatus();
-    return s < Status::kStatusResolving && ShouldHaveEmbeddedImtAndVTable();
+    return s < Status::kStatusResolving && ShouldHaveEmbeddedVTable();
   }
 
   String* GetName() SHARED_REQUIRES(Locks::mutator_lock_);  // Returns the cached name.
@@ -370,10 +380,10 @@
 
   void SetPrimitiveType(Primitive::Type new_type) SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t));
-    int32_t v32 = static_cast<int32_t>(new_type);
-    DCHECK_EQ(v32 & 0xFFFF, v32) << "upper 16 bits aren't zero";
+    uint32_t v32 = static_cast<uint32_t>(new_type);
+    DCHECK_EQ(v32 & kPrimitiveTypeMask, v32) << "upper 16 bits aren't zero";
     // Store the component size shift in the upper 16 bits.
-    v32 |= Primitive::ComponentSizeShift(new_type) << 16;
+    v32 |= Primitive::ComponentSizeShift(new_type) << kPrimitiveTypeSizeShiftShift;
     SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), v32);
   }
 
@@ -443,7 +453,6 @@
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-
   bool IsArrayClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -472,7 +481,7 @@
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   size_t GetComponentSize() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return 1U << GetComponentSizeShift();
+    return 1U << GetComponentSizeShift<kReadBarrierOption>();
   }
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
@@ -488,15 +497,18 @@
     return !IsPrimitive() && !IsInterface() && !IsAbstract() && !IsArrayClass();
   }
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsInstantiable() SHARED_REQUIRES(Locks::mutator_lock_) {
     return (!IsPrimitive() && !IsInterface() && !IsAbstract()) ||
-        (IsAbstract() && IsArrayClass());
+        (IsAbstract() && IsArrayClass<kVerifyFlags, kReadBarrierOption>());
   }
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsObjectArrayClass() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetComponentType<kVerifyFlags>() != nullptr &&
-        !GetComponentType<kVerifyFlags>()->IsPrimitive();
+    mirror::Class* const component_type = GetComponentType<kVerifyFlags, kReadBarrierOption>();
+    return component_type != nullptr && !component_type->IsPrimitive();
   }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -547,24 +559,24 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Compute how many bytes would be used a class with the given elements.
-  static uint32_t ComputeClassSize(bool has_embedded_tables,
+  static uint32_t ComputeClassSize(bool has_embedded_vtable,
                                    uint32_t num_vtable_entries,
                                    uint32_t num_8bit_static_fields,
                                    uint32_t num_16bit_static_fields,
                                    uint32_t num_32bit_static_fields,
                                    uint32_t num_64bit_static_fields,
                                    uint32_t num_ref_static_fields,
-                                   size_t pointer_size);
+                                   PointerSize pointer_size);
 
   // The size of java.lang.Class.class.
-  static uint32_t ClassClassSize(size_t pointer_size) {
+  static uint32_t ClassClassSize(PointerSize pointer_size) {
     // The number of vtable entries in java.lang.Class.
-    uint32_t vtable_entries = Object::kVTableLength + 65;
-    return ComputeClassSize(true, vtable_entries, 0, 0, 0, 1, 0, pointer_size);
+    uint32_t vtable_entries = Object::kVTableLength + 70;
+    return ComputeClassSize(true, vtable_entries, 0, 0, 4, 1, 0, pointer_size);
   }
 
   // The size of a java.lang.Class representing a primitive such as int.class.
-  static uint32_t PrimitiveClassSize(size_t pointer_size) {
+  static uint32_t PrimitiveClassSize(PointerSize pointer_size) {
     return ComputeClassSize(false, 0, 0, 0, 0, 0, 0, pointer_size);
   }
 
@@ -655,9 +667,15 @@
   // to themselves. Classes for primitive types may not assign to each other.
   ALWAYS_INLINE bool IsAssignableFrom(Class* src) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE Class* GetSuperClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void SetSuperClass(Class *new_super_class) SHARED_REQUIRES(Locks::mutator_lock_) {
+  // Get first common super class. It will never return null.
+  // `This` and `klass` must be classes.
+  Class* GetCommonSuperClass(Handle<Class> klass) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void SetSuperClass(Class* new_super_class) SHARED_REQUIRES(Locks::mutator_lock_) {
     // Super class is assigned once, except during class linker initialization.
     Class* old_super_class = GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, super_class_));
     DCHECK(old_super_class == nullptr || old_super_class == new_super_class);
@@ -695,49 +713,112 @@
   // Also updates the dex_cache_strings_ variable from new_dex_cache.
   void SetDexCache(DexCache* new_dex_cache) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetDirectMethods(size_t pointer_size)
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetDirectMethods(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  LengthPrefixedArray<ArtMethod>* GetDirectMethodsPtr() SHARED_REQUIRES(Locks::mutator_lock_);
+  ALWAYS_INLINE LengthPrefixedArray<ArtMethod>* GetMethodsPtr()
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void SetDirectMethodsPtr(LengthPrefixedArray<ArtMethod>* new_direct_methods)
+  static MemberOffset MethodsOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(Class, methods_));
+  }
+
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetMethods(PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void SetMethodsPtr(LengthPrefixedArray<ArtMethod>* new_methods,
+                     uint32_t num_direct,
+                     uint32_t num_virtual)
       SHARED_REQUIRES(Locks::mutator_lock_);
   // Used by image writer.
-  void SetDirectMethodsPtrUnchecked(LengthPrefixedArray<ArtMethod>* new_direct_methods)
+  void SetMethodsPtrUnchecked(LengthPrefixedArray<ArtMethod>* new_methods,
+                              uint32_t num_direct,
+                              uint32_t num_virtual)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE ArtMethod* GetDirectMethod(size_t i, size_t pointer_size)
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDirectMethodsSlice(PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE ArtMethod* GetDirectMethod(size_t i, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Use only when we are allocating populating the method arrays.
-  ALWAYS_INLINE ArtMethod* GetDirectMethodUnchecked(size_t i, size_t pointer_size)
+  ALWAYS_INLINE ArtMethod* GetDirectMethodUnchecked(size_t i, PointerSize pointer_size)
         SHARED_REQUIRES(Locks::mutator_lock_);
-  ALWAYS_INLINE ArtMethod* GetVirtualMethodUnchecked(size_t i, size_t pointer_size)
+  ALWAYS_INLINE ArtMethod* GetVirtualMethodUnchecked(size_t i, PointerSize pointer_size)
         SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns the number of static, private, and constructor methods.
   ALWAYS_INLINE uint32_t NumDirectMethods() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ALWAYS_INLINE LengthPrefixedArray<ArtMethod>* GetVirtualMethodsPtr()
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetMethodsSlice(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetVirtualMethods(size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  void SetVirtualMethodsPtr(LengthPrefixedArray<ArtMethod>* new_virtual_methods)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Returns the number of non-inherited virtual methods.
-  ALWAYS_INLINE uint32_t NumVirtualMethods() SHARED_REQUIRES(Locks::mutator_lock_);
-
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ArtMethod* GetVirtualMethod(size_t i, size_t pointer_size)
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredMethodsSlice(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ArtMethod* GetVirtualMethodDuringLinking(size_t i, size_t pointer_size)
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetDeclaredMethods(
+        PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template <PointerSize kPointerSize, bool kTransactionActive>
+  static Method* GetDeclaredMethodInternal(Thread* self,
+                                           mirror::Class* klass,
+                                           mirror::String* name,
+                                           mirror::ObjectArray<mirror::Class>* args)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  template <PointerSize kPointerSize, bool kTransactionActive>
+  static Constructor* GetDeclaredConstructorInternal(Thread* self,
+                                                     mirror::Class* klass,
+                                                     mirror::ObjectArray<mirror::Class>* args)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredVirtualMethodsSlice(PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetDeclaredVirtualMethods(
+        PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetCopiedMethodsSlice(PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetCopiedMethods(PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetVirtualMethodsSlice(PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetVirtualMethods(
+      PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Returns the number of non-inherited virtual methods (sum of declared and copied methods).
+  ALWAYS_INLINE uint32_t NumVirtualMethods() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Returns the number of copied virtual methods.
+  ALWAYS_INLINE uint32_t NumCopiedVirtualMethods() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Returns the number of declared virtual methods.
+  ALWAYS_INLINE uint32_t NumDeclaredVirtualMethods() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE uint32_t NumMethods() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ArtMethod* GetVirtualMethod(size_t i, PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ArtMethod* GetVirtualMethodDuringLinking(size_t i, PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE PointerArray* GetVTable() SHARED_REQUIRES(Locks::mutator_lock_);
 
   ALWAYS_INLINE PointerArray* GetVTableDuringLinking() SHARED_REQUIRES(Locks::mutator_lock_);
@@ -752,126 +833,146 @@
     return MemberOffset(sizeof(Class));
   }
 
-  bool ShouldHaveEmbeddedImtAndVTable() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return IsInstantiable();
+  static MemberOffset ImtPtrOffset(PointerSize pointer_size) {
+    return MemberOffset(
+        RoundUp(EmbeddedVTableLengthOffset().Uint32Value() + sizeof(uint32_t),
+                static_cast<size_t>(pointer_size)));
+  }
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  bool ShouldHaveImt() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return ShouldHaveEmbeddedVTable<kVerifyFlags, kReadBarrierOption>();
+  }
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  bool ShouldHaveEmbeddedVTable() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return IsInstantiable<kVerifyFlags, kReadBarrierOption>();
   }
 
   bool HasVTable() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static MemberOffset EmbeddedImTableEntryOffset(uint32_t i, size_t pointer_size);
-
-  static MemberOffset EmbeddedVTableEntryOffset(uint32_t i, size_t pointer_size);
-
-  ArtMethod* GetEmbeddedImTableEntry(uint32_t i, size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  void SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+  static MemberOffset EmbeddedVTableEntryOffset(uint32_t i, PointerSize pointer_size);
 
   int32_t GetVTableLength() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ArtMethod* GetVTableEntry(uint32_t i, size_t pointer_size)
+  ArtMethod* GetVTableEntry(uint32_t i, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   int32_t GetEmbeddedVTableLength() SHARED_REQUIRES(Locks::mutator_lock_);
 
   void SetEmbeddedVTableLength(int32_t len) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ArtMethod* GetEmbeddedVTableEntry(uint32_t i, size_t pointer_size)
+  ImTable* GetImt(PointerSize pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void SetImt(ImTable* imt, PointerSize pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ArtMethod* GetEmbeddedVTableEntry(uint32_t i, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void SetEmbeddedVTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size)
+  void SetEmbeddedVTableEntry(uint32_t i, ArtMethod* method, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  inline void SetEmbeddedVTableEntryUnchecked(uint32_t i, ArtMethod* method, size_t pointer_size)
+  inline void SetEmbeddedVTableEntryUnchecked(uint32_t i,
+                                              ArtMethod* method,
+                                              PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void PopulateEmbeddedImtAndVTable(ArtMethod* const (&methods)[kImtSize], size_t pointer_size)
+  void PopulateEmbeddedVTable(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Given a method implemented by this class but potentially from a super class, return the
   // specific implementation method for this class.
-  ArtMethod* FindVirtualMethodForVirtual(ArtMethod* method, size_t pointer_size)
+  ArtMethod* FindVirtualMethodForVirtual(ArtMethod* method, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Given a method implemented by this class' super class, return the specific implementation
   // method for this class.
-  ArtMethod* FindVirtualMethodForSuper(ArtMethod* method, size_t pointer_size)
+  ArtMethod* FindVirtualMethodForSuper(ArtMethod* method, PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Given a method from some implementor of this interface, return the specific implementation
+  // method for this class.
+  ArtMethod* FindVirtualMethodForInterfaceSuper(ArtMethod* method, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Given a method implemented by this class, but potentially from a
   // super class or interface, return the specific implementation
   // method for this class.
-  ArtMethod* FindVirtualMethodForInterface(ArtMethod* method, size_t pointer_size)
+  ArtMethod* FindVirtualMethodForInterface(ArtMethod* method, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_) ALWAYS_INLINE;
 
-  ArtMethod* FindVirtualMethodForVirtualOrInterface(ArtMethod* method, size_t pointer_size)
+  ArtMethod* FindVirtualMethodForVirtualOrInterface(ArtMethod* method, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindInterfaceMethod(const StringPiece& name, const StringPiece& signature,
-                                 size_t pointer_size)
+                                 PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindInterfaceMethod(const StringPiece& name, const Signature& signature,
-                                 size_t pointer_size)
+                                 PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
-                                 size_t pointer_size)
+                                 PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature,
-                                      size_t pointer_size)
+                                      PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature,
-                                      size_t pointer_size)
+                                      PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
-                                      size_t pointer_size)
+                                      PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDirectMethod(const StringPiece& name, const StringPiece& signature,
-                              size_t pointer_size)
+                              PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDirectMethod(const StringPiece& name, const Signature& signature,
-                              size_t pointer_size)
+                              PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
-                              size_t pointer_size)
+                              PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature,
-                                       size_t pointer_size)
+                                       PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const Signature& signature,
-                                       size_t pointer_size)
+                                       PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
-                                       size_t pointer_size)
+                                       PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredVirtualMethodByName(const StringPiece& name, size_t pointer_size)
+  ArtMethod* FindDeclaredVirtualMethodByName(const StringPiece& name, PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ArtMethod* FindDeclaredDirectMethodByName(const StringPiece& name, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindVirtualMethod(const StringPiece& name, const StringPiece& signature,
-                               size_t pointer_size)
+                               PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindVirtualMethod(const StringPiece& name, const Signature& signature,
-                               size_t pointer_size)
+                               PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
-                               size_t pointer_size)
+                               PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ArtMethod* FindClassInitializer(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+  ArtMethod* FindClassInitializer(PointerSize pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool HasDefaultMethods() SHARED_REQUIRES(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccHasDefaultMethod) != 0;
@@ -883,6 +984,8 @@
 
   ALWAYS_INLINE int32_t GetIfTableCount() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE IfTable* GetIfTable() SHARED_REQUIRES(Locks::mutator_lock_);
 
   ALWAYS_INLINE void SetIfTable(IfTable* new_iftable) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -927,6 +1030,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Get the offset of the first reference instance field. Other reference instance fields follow.
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   MemberOffset GetFirstReferenceInstanceFieldOffset()
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -947,11 +1052,13 @@
   }
 
   // Get the offset of the first reference static field. Other reference static fields follow.
-  MemberOffset GetFirstReferenceStaticFieldOffset(size_t pointer_size)
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  MemberOffset GetFirstReferenceStaticFieldOffset(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Get the offset of the first reference static field. Other reference static fields follow.
-  MemberOffset GetFirstReferenceStaticFieldOffsetDuringLinking(size_t pointer_size)
+  MemberOffset GetFirstReferenceStaticFieldOffsetDuringLinking(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Gets the static fields of the class.
@@ -998,7 +1105,9 @@
 
   // Finds the given static field in this class or superclass, only searches classes that
   // have the same dex cache.
-  static ArtField* FindStaticField(Thread* self, Handle<Class> klass, const DexCache* dex_cache,
+  static ArtField* FindStaticField(Thread* self,
+                                   Class* klass,
+                                   const DexCache* dex_cache,
                                    uint32_t dex_field_idx)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -1009,15 +1118,15 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   pid_t GetClinitThreadId() SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsIdxLoaded() || IsErroneous());
+    DCHECK(IsIdxLoaded() || IsErroneous()) << PrettyClass(this);
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, clinit_thread_id_));
   }
 
   void SetClinitThreadId(pid_t new_clinit_thread_id) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  Class* GetVerifyErrorClass() SHARED_REQUIRES(Locks::mutator_lock_) {
+  Object* GetVerifyError() SHARED_REQUIRES(Locks::mutator_lock_) {
     // DCHECK(IsErroneous());
-    return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_class_));
+    return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_));
   }
 
   uint16_t GetDexClassDefIndex() SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -1038,6 +1147,9 @@
     SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, dex_type_idx_), type_idx);
   }
 
+  uint32_t FindTypeIndexInOtherDexFile(const DexFile& dex_file)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   static Class* GetJavaLangClass() SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(HasJavaLangClass());
     return java_lang_Class_.Read();
@@ -1055,12 +1167,12 @@
 
   // Visit native roots visits roots which are keyed off the native pointers such as ArtFields and
   // ArtMethods.
-  template<class Visitor>
-  void VisitNativeRoots(Visitor& visitor, size_t pointer_size)
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier, class Visitor>
+  void VisitNativeRoots(Visitor& visitor, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // When class is verified, set the kAccPreverified flag on each method.
-  void SetPreverifiedFlagOnAllMethods(size_t pointer_size)
+  // When class is verified, set the kAccSkipAccessChecks flag on each method.
+  void SetSkipAccessChecksFlagOnAllMethods(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Get the descriptor of the class. In a few cases a std::string is required, rather than
@@ -1094,8 +1206,8 @@
   void AssertInitializedOrInitializingInThread(Thread* self)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  Class* CopyOf(Thread* self, int32_t new_length, ArtMethod* const (&imt)[mirror::Class::kImtSize],
-                size_t pointer_size)
+  Class* CopyOf(Thread* self, int32_t new_length, ImTable* imt,
+                PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
   // For proxy class only.
@@ -1110,8 +1222,8 @@
   bool GetSlowPathEnabled() SHARED_REQUIRES(Locks::mutator_lock_);
   void SetSlowPath(bool enabled) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  GcRoot<String>* GetDexCacheStrings() SHARED_REQUIRES(Locks::mutator_lock_);
-  void SetDexCacheStrings(GcRoot<String>* new_dex_cache_strings)
+  StringDexCacheType* GetDexCacheStrings() SHARED_REQUIRES(Locks::mutator_lock_);
+  void SetDexCacheStrings(StringDexCacheType* new_dex_cache_strings)
       SHARED_REQUIRES(Locks::mutator_lock_);
   static MemberOffset DexCacheStringsOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_strings_);
@@ -1119,7 +1231,10 @@
 
   // May cause thread suspension due to EqualParameters.
   ArtMethod* GetDeclaredConstructor(
-      Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args)
+      Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args, PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  static int32_t GetInnerClassFlags(Handle<Class> h_this, int32_t default_value)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Used to initialize a class in the allocation code path to ensure it is guarded by a StoreStore
@@ -1143,22 +1258,45 @@
     return GetClassLoader() == nullptr;
   }
 
-  static size_t ImTableEntrySize(size_t pointer_size) {
-    return pointer_size;
+  static size_t ImTableEntrySize(PointerSize pointer_size) {
+    return static_cast<size_t>(pointer_size);
   }
 
-  static size_t VTableEntrySize(size_t pointer_size) {
-    return pointer_size;
+  static size_t VTableEntrySize(PointerSize pointer_size) {
+    return static_cast<size_t>(pointer_size);
   }
 
-  ALWAYS_INLINE LengthPrefixedArray<ArtMethod>* GetDirectMethodsPtrUnchecked()
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDirectMethodsSliceUnchecked(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE LengthPrefixedArray<ArtMethod>* GetVirtualMethodsPtrUnchecked()
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetVirtualMethodsSliceUnchecked(PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredMethodsSliceUnchecked(PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredVirtualMethodsSliceUnchecked(
+      PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetCopiedMethodsSliceUnchecked(PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Fix up all of the native pointers in the class by running them through the visitor. Only sets
+  // the corresponding entry in dest if visitor(obj) != obj to prevent dirty memory. Dest should be
+  // initialized to a copy of *this to prevent issues. Does not visit the ArtMethod and ArtField
+  // roots.
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+            typename Visitor>
+  void FixupNativePointers(mirror::Class* dest, PointerSize pointer_size, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
-  void SetVerifyErrorClass(Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+  ALWAYS_INLINE void SetMethodsPtrInternal(LengthPrefixedArray<ArtMethod>* new_methods)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void SetVerifyError(Object* klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
   template <bool throw_on_failure, bool use_referrers_cache>
   bool ResolvedFieldAccessTest(Class* access_to, ArtField* field,
@@ -1183,18 +1321,31 @@
   IterationRange<StrideIterator<ArtField>> GetIFieldsUnchecked()
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // The index in the methods_ array where the first declared virtual method is.
+  ALWAYS_INLINE uint32_t GetVirtualMethodsStartOffset() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // The index in the methods_ array where the first direct method is.
+  ALWAYS_INLINE uint32_t GetDirectMethodsStartOffset() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // The index in the methods_ array where the first copied method is.
+  ALWAYS_INLINE uint32_t GetCopiedMethodsStartOffset() SHARED_REQUIRES(Locks::mutator_lock_);
+
   bool ProxyDescriptorEquals(const char* match) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Check that the pointer size mathces the one in the class linker.
-  ALWAYS_INLINE static void CheckPointerSize(size_t pointer_size);
+  // Check that the pointer size matches the one in the class linker.
+  ALWAYS_INLINE static void CheckPointerSize(PointerSize pointer_size);
 
-  static MemberOffset EmbeddedImTableOffset(size_t pointer_size);
-  static MemberOffset EmbeddedVTableOffset(size_t pointer_size);
-
-  template <typename Visitor>
+  static MemberOffset EmbeddedVTableOffset(PointerSize pointer_size);
+  template <bool kVisitNativeRoots,
+            VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+            typename Visitor>
   void VisitReferences(mirror::Class* klass, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // 'Class' Object Fields
+  // Order governed by java field ordering. See art::ClassLinker::LinkFields.
+
   // Defining class loader, or null for the "bootstrap" system loader.
   HeapReference<ClassLoader> class_loader_;
 
@@ -1230,8 +1381,9 @@
   // check for interfaces and return null.
   HeapReference<Class> super_class_;
 
-  // If class verify fails, we must return same error on subsequent tries.
-  HeapReference<Class> verify_error_class_;
+  // If class verify fails, we must return same error on subsequent tries. We may store either
+  // the class of the error, or an actual instance of Throwable here.
+  HeapReference<Object> verify_error_;
 
   // Virtual method table (vtable), for use by "invoke-virtual".  The vtable from the superclass is
   // copied in, and virtual methods from our class either replace those from the super or are
@@ -1242,9 +1394,6 @@
   // Short cuts to dex_cache_ member for fast compiled code access.
   uint64_t dex_cache_strings_;
 
-  // static, private, and <init> methods. Pointer to an ArtMethod length-prefixed array.
-  uint64_t direct_methods_;
-
   // instance fields
   //
   // These describe the layout of the contents of an Object.
@@ -1256,13 +1405,26 @@
   // ArtFields.
   uint64_t ifields_;
 
+  // Pointer to an ArtMethod length-prefixed array. All the methods where this class is the place
+  // where they are logically defined. This includes all private, static, final and virtual methods
+  // as well as inherited default methods and miranda methods.
+  //
+  // The slice methods_ [0, virtual_methods_offset_) are the direct (static, private, init) methods
+  // declared by this class.
+  //
+  // The slice methods_ [virtual_methods_offset_, copied_methods_offset_) are the virtual methods
+  // declared by this class.
+  //
+  // The slice methods_ [copied_methods_offset_, |methods_|) are the methods that are copied from
+  // interfaces such as miranda or default methods. These are copied for resolution purposes as this
+  // class is where they are (logically) declared as far as the virtual dispatch is concerned.
+  //
+  // Note that this field is used by the native debugger as the unique identifier for the type.
+  uint64_t methods_;
+
   // Static fields length-prefixed array.
   uint64_t sfields_;
 
-  // Virtual methods defined in this class; invoked through vtable. Pointer to an ArtMethod
-  // length-prefixed array.
-  uint64_t virtual_methods_;
-
   // Access flags; low 16 bits are defined by VM spec.
   uint32_t access_flags_;
 
@@ -1305,6 +1467,14 @@
   // State of class initialization.
   Status status_;
 
+  // The offset of the first virtual method that is copied from an interface. This includes miranda,
+  // default, and default-conflict methods. Having a hard limit of ((2 << 16) - 1) for methods
+  // defined on a single class is well established in Java so we will use only uint16_t's here.
+  uint16_t copied_methods_offset_;
+
+  // The offset of the first declared virtual methods in the methods_ array.
+  uint16_t virtual_methods_offset_;
+
   // TODO: ?
   // initiating class loader list
   // NOTE: for classes with low serialNumber, these are unused, and the
diff --git a/runtime/mirror/class_loader-inl.h b/runtime/mirror/class_loader-inl.h
index e22ddd7..cc910b0 100644
--- a/runtime/mirror/class_loader-inl.h
+++ b/runtime/mirror/class_loader-inl.h
@@ -25,15 +25,19 @@
 namespace art {
 namespace mirror {
 
-template <VerifyObjectFlags kVerifyFlags, typename Visitor>
+template <bool kVisitClasses,
+          VerifyObjectFlags kVerifyFlags,
+          ReadBarrierOption kReadBarrierOption,
+          typename Visitor>
 inline void ClassLoader::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
   // Visit instance fields first.
-  VisitInstanceFieldsReferences(klass, visitor);
-  // Visit classes loaded after.
-  ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  ClassTable* const class_table = GetClassTable();
-  if (class_table != nullptr) {
-    class_table->VisitRoots(visitor);
+  VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
+  if (kVisitClasses) {
+    // Visit classes loaded after.
+    ClassTable* const class_table = GetClassTable();
+    if (class_table != nullptr) {
+      class_table->VisitRoots(visitor);
+    }
   }
 }
 
diff --git a/runtime/mirror/class_loader.h b/runtime/mirror/class_loader.h
index c2a65d6..1957e13 100644
--- a/runtime/mirror/class_loader.h
+++ b/runtime/mirror/class_loader.h
@@ -63,7 +63,10 @@
  private:
   // Visit instance fields of the class loader as well as its associated classes.
   // Null class loader is handled by ClassLinker::VisitClassRoots.
-  template <VerifyObjectFlags kVerifyFlags, typename Visitor>
+  template <bool kVisitClasses,
+            VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+            typename Visitor>
   void VisitReferences(mirror::Class* klass, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::classlinker_classes_lock_);
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index f8ccfb1..a3071b7 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -22,27 +22,33 @@
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/casts.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "mirror/class.h"
 #include "runtime.h"
 
+#include <atomic>
+
 namespace art {
 namespace mirror {
 
-inline uint32_t DexCache::ClassSize(size_t pointer_size) {
+inline uint32_t DexCache::ClassSize(PointerSize pointer_size) {
   uint32_t vtable_entries = Object::kVTableLength + 5;
   return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
 }
 
-inline String* DexCache::GetResolvedString(uint32_t string_idx) {
-  DCHECK_LT(string_idx, NumStrings());
-  return GetStrings()[string_idx].Read();
+inline mirror::String* DexCache::GetResolvedString(uint32_t string_idx) {
+  DCHECK_LT(string_idx, GetDexFile()->NumStringIds());
+  return StringDexCachePair::LookupString(GetStrings(), string_idx, NumStrings()).Read();
 }
 
-inline void DexCache::SetResolvedString(uint32_t string_idx, String* resolved) {
-  DCHECK_LT(string_idx, NumStrings());
+inline void DexCache::SetResolvedString(uint32_t string_idx, mirror::String* resolved) {
+  DCHECK_LT(string_idx % NumStrings(), NumStrings());
   // TODO default transaction support.
-  GetStrings()[string_idx] = GcRoot<String>(resolved);
+  StringDexCachePair idx_ptr;
+  idx_ptr.string_index = string_idx;
+  idx_ptr.string_pointer = GcRoot<String>(resolved);
+  GetStrings()[string_idx % NumStrings()].store(idx_ptr, std::memory_order_relaxed);
   // TODO: Fine-grained marking, so that we don't need to go through all arrays in full.
   Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(this);
 }
@@ -55,13 +61,12 @@
 inline void DexCache::SetResolvedType(uint32_t type_idx, Class* resolved) {
   DCHECK_LT(type_idx, NumResolvedTypes());  // NOTE: Unchecked, i.e. not throwing AIOOB.
   // TODO default transaction support.
-  DCHECK(resolved == nullptr || !resolved->IsErroneous());
   GetResolvedTypes()[type_idx] = GcRoot<Class>(resolved);
   // TODO: Fine-grained marking, so that we don't need to go through all arrays in full.
   Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(this);
 }
 
-inline ArtField* DexCache::GetResolvedField(uint32_t field_idx, size_t ptr_size) {
+inline ArtField* DexCache::GetResolvedField(uint32_t field_idx, PointerSize ptr_size) {
   DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), ptr_size);
   DCHECK_LT(field_idx, NumResolvedFields());  // NOTE: Unchecked, i.e. not throwing AIOOB.
   ArtField* field = GetElementPtrSize(GetResolvedFields(), field_idx, ptr_size);
@@ -71,13 +76,13 @@
   return field;
 }
 
-inline void DexCache::SetResolvedField(uint32_t field_idx, ArtField* field, size_t ptr_size) {
+inline void DexCache::SetResolvedField(uint32_t field_idx, ArtField* field, PointerSize ptr_size) {
   DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), ptr_size);
   DCHECK_LT(field_idx, NumResolvedFields());  // NOTE: Unchecked, i.e. not throwing AIOOB.
   SetElementPtrSize(GetResolvedFields(), field_idx, field, ptr_size);
 }
 
-inline ArtMethod* DexCache::GetResolvedMethod(uint32_t method_idx, size_t ptr_size) {
+inline ArtMethod* DexCache::GetResolvedMethod(uint32_t method_idx, PointerSize ptr_size) {
   DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), ptr_size);
   DCHECK_LT(method_idx, NumResolvedMethods());  // NOTE: Unchecked, i.e. not throwing AIOOB.
   ArtMethod* method = GetElementPtrSize<ArtMethod*>(GetResolvedMethods(), method_idx, ptr_size);
@@ -89,19 +94,20 @@
   return method;
 }
 
-inline void DexCache::SetResolvedMethod(uint32_t method_idx, ArtMethod* method, size_t ptr_size) {
+inline void DexCache::SetResolvedMethod(uint32_t method_idx,
+                                        ArtMethod* method,
+                                        PointerSize ptr_size) {
   DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), ptr_size);
   DCHECK_LT(method_idx, NumResolvedMethods());  // NOTE: Unchecked, i.e. not throwing AIOOB.
   SetElementPtrSize(GetResolvedMethods(), method_idx, method, ptr_size);
 }
 
 template <typename PtrType>
-inline PtrType DexCache::GetElementPtrSize(PtrType* ptr_array, size_t idx, size_t ptr_size) {
-  if (ptr_size == 8u) {
+inline PtrType DexCache::GetElementPtrSize(PtrType* ptr_array, size_t idx, PointerSize ptr_size) {
+  if (ptr_size == PointerSize::k64) {
     uint64_t element = reinterpret_cast<const uint64_t*>(ptr_array)[idx];
     return reinterpret_cast<PtrType>(dchecked_integral_cast<uintptr_t>(element));
   } else {
-    DCHECK_EQ(ptr_size, 4u);
     uint32_t element = reinterpret_cast<const uint32_t*>(ptr_array)[idx];
     return reinterpret_cast<PtrType>(dchecked_integral_cast<uintptr_t>(element));
   }
@@ -111,29 +117,62 @@
 inline void DexCache::SetElementPtrSize(PtrType* ptr_array,
                                         size_t idx,
                                         PtrType ptr,
-                                        size_t ptr_size) {
-  if (ptr_size == 8u) {
+                                        PointerSize ptr_size) {
+  if (ptr_size == PointerSize::k64) {
     reinterpret_cast<uint64_t*>(ptr_array)[idx] =
         dchecked_integral_cast<uint64_t>(reinterpret_cast<uintptr_t>(ptr));
   } else {
-    DCHECK_EQ(ptr_size, 4u);
     reinterpret_cast<uint32_t*>(ptr_array)[idx] =
         dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(ptr));
   }
 }
 
-template <VerifyObjectFlags kVerifyFlags, typename Visitor>
+template <bool kVisitNativeRoots,
+          VerifyObjectFlags kVerifyFlags,
+          ReadBarrierOption kReadBarrierOption,
+          typename Visitor>
 inline void DexCache::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
   // Visit instance fields first.
-  VisitInstanceFieldsReferences(klass, visitor);
+  VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
   // Visit arrays after.
-  GcRoot<mirror::String>* strings = GetStrings();
-  for (size_t i = 0, num_strings = NumStrings(); i != num_strings; ++i) {
-    visitor.VisitRootIfNonNull(strings[i].AddressWithoutBarrier());
+  if (kVisitNativeRoots) {
+    mirror::StringDexCacheType* strings = GetStrings();
+    for (size_t i = 0, num_strings = NumStrings(); i != num_strings; ++i) {
+      StringDexCachePair source = strings[i].load(std::memory_order_relaxed);
+      mirror::String* before = source.string_pointer.Read<kReadBarrierOption>();
+      GcRoot<mirror::String> root(before);
+      visitor.VisitRootIfNonNull(root.AddressWithoutBarrier());
+      if (root.Read() != before) {
+        source.string_pointer = GcRoot<String>(root.Read());
+        strings[i].store(source, std::memory_order_relaxed);
+      }
+    }
+    GcRoot<mirror::Class>* resolved_types = GetResolvedTypes();
+    for (size_t i = 0, num_types = NumResolvedTypes(); i != num_types; ++i) {
+      visitor.VisitRootIfNonNull(resolved_types[i].AddressWithoutBarrier());
+    }
   }
-  GcRoot<mirror::Class>* resolved_types = GetResolvedTypes();
-  for (size_t i = 0, num_types = NumResolvedTypes(); i != num_types; ++i) {
-    visitor.VisitRootIfNonNull(resolved_types[i].AddressWithoutBarrier());
+}
+
+template <ReadBarrierOption kReadBarrierOption, typename Visitor>
+inline void DexCache::FixupStrings(mirror::StringDexCacheType* dest, const Visitor& visitor) {
+  mirror::StringDexCacheType* src = GetStrings();
+  for (size_t i = 0, count = NumStrings(); i < count; ++i) {
+    StringDexCachePair source = src[i].load(std::memory_order_relaxed);
+    mirror::String* ptr = source.string_pointer.Read<kReadBarrierOption>();
+    mirror::String* new_source = visitor(ptr);
+    source.string_pointer = GcRoot<String>(new_source);
+    dest[i].store(source, std::memory_order_relaxed);
+  }
+}
+
+template <ReadBarrierOption kReadBarrierOption, typename Visitor>
+inline void DexCache::FixupResolvedTypes(GcRoot<mirror::Class>* dest, const Visitor& visitor) {
+  GcRoot<mirror::Class>* src = GetResolvedTypes();
+  for (size_t i = 0, count = NumResolvedTypes(); i < count; ++i) {
+    mirror::Class* source = src[i].Read<kReadBarrierOption>();
+    mirror::Class* new_source = visitor(source);
+    dest[i] = GcRoot<mirror::Class>(new_source);
   }
 }
 
diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc
index 349a319..cfcec9c 100644
--- a/runtime/mirror/dex_cache.cc
+++ b/runtime/mirror/dex_cache.cc
@@ -33,7 +33,7 @@
 
 void DexCache::Init(const DexFile* dex_file,
                     String* location,
-                    GcRoot<String>* strings,
+                    StringDexCacheType* strings,
                     uint32_t num_strings,
                     GcRoot<Class>* resolved_types,
                     uint32_t num_resolved_types,
@@ -41,7 +41,7 @@
                     uint32_t num_resolved_methods,
                     ArtField** resolved_fields,
                     uint32_t num_resolved_fields,
-                    size_t pointer_size) {
+                    PointerSize pointer_size) {
   CHECK(dex_file != nullptr);
   CHECK(location != nullptr);
   CHECK_EQ(num_strings != 0u, strings != nullptr);
@@ -50,11 +50,11 @@
   CHECK_EQ(num_resolved_fields != 0u, resolved_fields != nullptr);
 
   SetDexFile(dex_file);
-  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(DexCache, location_), location);
-  SetField64<false>(StringsOffset(), reinterpret_cast<uintptr_t>(strings));
-  SetField64<false>(ResolvedTypesOffset(), reinterpret_cast<uintptr_t>(resolved_types));
-  SetField64<false>(ResolvedMethodsOffset(), reinterpret_cast<uintptr_t>(resolved_methods));
-  SetField64<false>(ResolvedFieldsOffset(), reinterpret_cast<uintptr_t>(resolved_fields));
+  SetLocation(location);
+  SetStrings(strings);
+  SetResolvedTypes(resolved_types);
+  SetResolvedMethods(resolved_methods);
+  SetResolvedFields(resolved_fields);
   SetField32<false>(NumStringsOffset(), num_strings);
   SetField32<false>(NumResolvedTypesOffset(), num_resolved_types);
   SetField32<false>(NumResolvedMethodsOffset(), num_resolved_methods);
@@ -67,7 +67,7 @@
   }
 }
 
-void DexCache::Fixup(ArtMethod* trampoline, size_t pointer_size) {
+void DexCache::Fixup(ArtMethod* trampoline, PointerSize pointer_size) {
   // Fixup the resolve methods array to contain trampoline for resolution.
   CHECK(trampoline != nullptr);
   CHECK(trampoline->IsRuntimeMethod());
@@ -79,5 +79,9 @@
   }
 }
 
+void DexCache::SetLocation(mirror::String* location) {
+  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(DexCache, location_), location);
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 3144553..770c45d 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -35,11 +35,61 @@
 
 class String;
 
+struct PACKED(8) StringDexCachePair {
+  GcRoot<String> string_pointer;
+  uint32_t string_index;
+  // The array is initially [ {0,0}, {0,0}, {0,0} ... ]
+  // We maintain the invariant that once a dex cache entry is populated,
+  // the pointer is always non-0
+  // Any given entry would thus be:
+  // {non-0, non-0} OR {0,0}
+  //
+  // It's generally sufficiently enough then to check if the
+  // lookup string index matches the stored string index (for a >0 string index)
+  // because if it's true the pointer is also non-null.
+  //
+  // For the 0th entry which is a special case, the value is either
+  // {0,0} (initial state) or {non-0, 0} which indicates
+  // that a valid string is stored at that index for a dex string id of 0.
+  //
+  // As an optimization, we want to avoid branching on the string pointer since
+  // it's always non-null if the string id branch succeeds (except for the 0th string id).
+  // Set the initial state for the 0th entry to be {0,1} which is guaranteed to fail
+  // the lookup string id == stored id branch.
+  static void Initialize(StringDexCacheType* strings) {
+    DCHECK(StringDexCacheType().is_lock_free());
+    mirror::StringDexCachePair first_elem;
+    first_elem.string_pointer = GcRoot<String>(nullptr);
+    first_elem.string_index = 1;
+    strings[0].store(first_elem, std::memory_order_relaxed);
+  }
+  static GcRoot<String> LookupString(StringDexCacheType* dex_cache,
+                                     uint32_t string_idx,
+                                     uint32_t cache_size) {
+    StringDexCachePair index_string = dex_cache[string_idx % cache_size]
+        .load(std::memory_order_relaxed);
+    if (string_idx != index_string.string_index) return GcRoot<String>(nullptr);
+    DCHECK(!index_string.string_pointer.IsNull());
+    return index_string.string_pointer;
+  }
+};
+using StringDexCacheType = std::atomic<StringDexCachePair>;
+
+
 // C++ mirror of java.lang.DexCache.
 class MANAGED DexCache FINAL : public Object {
  public:
   // Size of java.lang.DexCache.class.
-  static uint32_t ClassSize(size_t pointer_size);
+  static uint32_t ClassSize(PointerSize pointer_size);
+
+  // Size of string dex cache. Needs to be a power of 2 for entrypoint assumptions to hold.
+  static constexpr size_t kDexCacheStringCacheSize = 1024;
+  static_assert(IsPowerOfTwo(kDexCacheStringCacheSize),
+                "String dex cache size is not a power of 2.");
+
+  static constexpr size_t StaticStringSize() {
+    return kDexCacheStringCacheSize;
+  }
 
   // Size of an instance of java.lang.DexCache not including referenced values.
   static constexpr uint32_t InstanceSize() {
@@ -48,7 +98,7 @@
 
   void Init(const DexFile* dex_file,
             String* location,
-            GcRoot<String>* strings,
+            StringDexCacheType* strings,
             uint32_t num_strings,
             GcRoot<Class>* resolved_types,
             uint32_t num_resolved_types,
@@ -56,9 +106,17 @@
             uint32_t num_resolved_methods,
             ArtField** resolved_fields,
             uint32_t num_resolved_fields,
-            size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+            PointerSize pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void Fixup(ArtMethod* trampoline, size_t pointer_size)
+  void Fixup(ArtMethod* trampoline, PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename Visitor>
+  void FixupStrings(StringDexCacheType* dest, const Visitor& visitor)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename Visitor>
+  void FixupResolvedTypes(GcRoot<mirror::Class>* dest, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   String* GetLocation() SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -101,46 +159,70 @@
     return OFFSET_OF_OBJECT_MEMBER(DexCache, num_resolved_methods_);
   }
 
-  String* GetResolvedString(uint32_t string_idx) ALWAYS_INLINE
+  mirror::String* GetResolvedString(uint32_t string_idx) ALWAYS_INLINE
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void SetResolvedString(uint32_t string_idx, String* resolved) ALWAYS_INLINE
+  void SetResolvedString(uint32_t string_idx, mirror::String* resolved) ALWAYS_INLINE
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   Class* GetResolvedType(uint32_t type_idx) SHARED_REQUIRES(Locks::mutator_lock_);
 
   void SetResolvedType(uint32_t type_idx, Class* resolved) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE ArtMethod* GetResolvedMethod(uint32_t method_idx, size_t ptr_size)
+  ALWAYS_INLINE ArtMethod* GetResolvedMethod(uint32_t method_idx, PointerSize ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE void SetResolvedMethod(uint32_t method_idx, ArtMethod* resolved, size_t ptr_size)
+  ALWAYS_INLINE void SetResolvedMethod(uint32_t method_idx,
+                                       ArtMethod* resolved,
+                                       PointerSize ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Pointer sized variant, used for patching.
-  ALWAYS_INLINE ArtField* GetResolvedField(uint32_t idx, size_t ptr_size)
+  ALWAYS_INLINE ArtField* GetResolvedField(uint32_t idx, PointerSize ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Pointer sized variant, used for patching.
-  ALWAYS_INLINE void SetResolvedField(uint32_t idx, ArtField* field, size_t ptr_size)
+  ALWAYS_INLINE void SetResolvedField(uint32_t idx, ArtField* field, PointerSize ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  GcRoot<String>* GetStrings() ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetFieldPtr<GcRoot<String>*>(StringsOffset());
+  StringDexCacheType* GetStrings() ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
+    return GetFieldPtr64<StringDexCacheType*>(StringsOffset());
+  }
+
+  void SetStrings(StringDexCacheType* strings) ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
+    SetFieldPtr<false>(StringsOffset(), strings);
   }
 
   GcRoot<Class>* GetResolvedTypes() ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
     return GetFieldPtr<GcRoot<Class>*>(ResolvedTypesOffset());
   }
 
+  void SetResolvedTypes(GcRoot<Class>* resolved_types)
+      ALWAYS_INLINE
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    SetFieldPtr<false>(ResolvedTypesOffset(), resolved_types);
+  }
+
   ArtMethod** GetResolvedMethods() ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
     return GetFieldPtr<ArtMethod**>(ResolvedMethodsOffset());
   }
 
+  void SetResolvedMethods(ArtMethod** resolved_methods)
+      ALWAYS_INLINE
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    SetFieldPtr<false>(ResolvedMethodsOffset(), resolved_methods);
+  }
+
   ArtField** GetResolvedFields() ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
     return GetFieldPtr<ArtField**>(ResolvedFieldsOffset());
   }
 
+  void SetResolvedFields(ArtField** resolved_fields)
+      ALWAYS_INLINE
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    SetFieldPtr<false>(ResolvedFieldsOffset(), resolved_fields);
+  }
+
   size_t NumStrings() SHARED_REQUIRES(Locks::mutator_lock_) {
     return GetField32(NumStringsOffset());
   }
@@ -161,24 +243,28 @@
     return GetFieldPtr<const DexFile*>(OFFSET_OF_OBJECT_MEMBER(DexCache, dex_file_));
   }
 
-  void SetDexFile(const DexFile* dex_file) SHARED_REQUIRES(Locks::mutator_lock_)
-      ALWAYS_INLINE {
-    return SetFieldPtr<false>(OFFSET_OF_OBJECT_MEMBER(DexCache, dex_file_), dex_file);
+  void SetDexFile(const DexFile* dex_file) SHARED_REQUIRES(Locks::mutator_lock_) {
+    SetFieldPtr<false>(OFFSET_OF_OBJECT_MEMBER(DexCache, dex_file_), dex_file);
   }
 
+  void SetLocation(mirror::String* location) SHARED_REQUIRES(Locks::mutator_lock_);
+
   // NOTE: Get/SetElementPtrSize() are intended for working with ArtMethod** and ArtField**
   // provided by GetResolvedMethods/Fields() and ArtMethod::GetDexCacheResolvedMethods(),
   // so they need to be public.
 
   template <typename PtrType>
-  static PtrType GetElementPtrSize(PtrType* ptr_array, size_t idx, size_t ptr_size);
+  static PtrType GetElementPtrSize(PtrType* ptr_array, size_t idx, PointerSize ptr_size);
 
   template <typename PtrType>
-  static void SetElementPtrSize(PtrType* ptr_array, size_t idx, PtrType ptr, size_t ptr_size);
+  static void SetElementPtrSize(PtrType* ptr_array, size_t idx, PtrType ptr, PointerSize ptr_size);
 
  private:
   // Visit instance fields of the dex cache as well as its associated arrays.
-  template <VerifyObjectFlags kVerifyFlags, typename Visitor>
+  template <bool kVisitNativeRoots,
+            VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+            typename Visitor>
   void VisitReferences(mirror::Class* klass, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
 
@@ -188,7 +274,8 @@
   uint64_t resolved_fields_;    // ArtField*, array with num_resolved_fields_ elements.
   uint64_t resolved_methods_;   // ArtMethod*, array with num_resolved_methods_ elements.
   uint64_t resolved_types_;     // GcRoot<Class>*, array with num_resolved_types_ elements.
-  uint64_t strings_;            // GcRoot<String>*, array with num_strings_ elements.
+  uint64_t strings_;            // std::atomic<StringDexCachePair>*,
+                                // array with num_strings_ elements.
   uint32_t num_resolved_fields_;    // Number of elements in the resolved_fields_ array.
   uint32_t num_resolved_methods_;   // Number of elements in the resolved_methods_ array.
   uint32_t num_resolved_types_;     // Number of elements in the resolved_types_ array.
diff --git a/runtime/mirror/dex_cache_test.cc b/runtime/mirror/dex_cache_test.cc
index 48f2ca5..175997c 100644
--- a/runtime/mirror/dex_cache_test.cc
+++ b/runtime/mirror/dex_cache_test.cc
@@ -22,6 +22,7 @@
 #include "common_runtime_test.h"
 #include "linear_alloc.h"
 #include "mirror/class_loader-inl.h"
+#include "mirror/dex_cache-inl.h"
 #include "handle_scope-inl.h"
 #include "scoped_thread_state_change.h"
 
@@ -40,7 +41,8 @@
                                                 Runtime::Current()->GetLinearAlloc())));
   ASSERT_TRUE(dex_cache.Get() != nullptr);
 
-  EXPECT_EQ(java_lang_dex_file_->NumStringIds(), dex_cache->NumStrings());
+  EXPECT_TRUE(dex_cache->StaticStringSize() == dex_cache->NumStrings()
+      || java_lang_dex_file_->NumStringIds() == dex_cache->NumStrings());
   EXPECT_EQ(java_lang_dex_file_->NumTypeIds(),   dex_cache->NumResolvedTypes());
   EXPECT_EQ(java_lang_dex_file_->NumMethodIds(), dex_cache->NumResolvedMethods());
   EXPECT_EQ(java_lang_dex_file_->NumFieldIds(),  dex_cache->NumResolvedFields());
diff --git a/runtime/mirror/field-inl.h b/runtime/mirror/field-inl.h
index 8a0daec..8b0f8ce 100644
--- a/runtime/mirror/field-inl.h
+++ b/runtime/mirror/field-inl.h
@@ -27,9 +27,8 @@
 
 namespace mirror {
 
-template <bool kTransactionActive>
-inline mirror::Field* Field::CreateFromArtField(Thread* self, ArtField* field,
-                                                bool force_resolve) {
+template <PointerSize kPointerSize, bool kTransactionActive>
+inline mirror::Field* Field::CreateFromArtField(Thread* self, ArtField* field, bool force_resolve) {
   StackHandleScope<2> hs(self);
   // Try to resolve type before allocating since this is a thread suspension point.
   Handle<mirror::Class> type = hs.NewHandle(field->GetType<true>());
@@ -54,10 +53,8 @@
     self->AssertPendingOOMException();
     return nullptr;
   }
-  const auto pointer_size = kTransactionActive ?
-      Runtime::Current()->GetClassLinker()->GetImagePointerSize() : sizeof(void*);
   auto dex_field_index = field->GetDexFieldIndex();
-  auto* resolved_field = field->GetDexCache()->GetResolvedField(dex_field_index, pointer_size);
+  auto* resolved_field = field->GetDexCache()->GetResolvedField(dex_field_index, kPointerSize);
   if (field->GetDeclaringClass()->IsProxyClass()) {
     DCHECK(field->IsStatic());
     DCHECK_LT(dex_field_index, 2U);
@@ -70,7 +67,7 @@
     } else {
       // We rely on the field being resolved so that we can back to the ArtField
       // (i.e. FromReflectedMethod).
-      field->GetDexCache()->SetResolvedField(dex_field_index, field, pointer_size);
+      field->GetDexCache()->SetResolvedField(dex_field_index, field, kPointerSize);
     }
   }
   ret->SetType<kTransactionActive>(type.Get());
diff --git a/runtime/mirror/field.cc b/runtime/mirror/field.cc
index ff6847c..65f6b16 100644
--- a/runtime/mirror/field.cc
+++ b/runtime/mirror/field.cc
@@ -68,7 +68,7 @@
     }
   }
   mirror::DexCache* const dex_cache = declaring_class->GetDexCache();
-  ArtField* const art_field = dex_cache->GetResolvedField(GetDexFieldIndex(), sizeof(void*));
+  ArtField* const art_field = dex_cache->GetResolvedField(GetDexFieldIndex(), kRuntimePointerSize);
   CHECK(art_field != nullptr);
   CHECK_EQ(declaring_class, art_field->GetDeclaringClass());
   return art_field;
diff --git a/runtime/mirror/field.h b/runtime/mirror/field.h
index edaddbd..93fd7f1 100644
--- a/runtime/mirror/field.h
+++ b/runtime/mirror/field.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_MIRROR_FIELD_H_
 
 #include "accessible_object.h"
+#include "base/enums.h"
 #include "gc_root.h"
 #include "object.h"
 #include "object_callbacks.h"
@@ -92,7 +93,7 @@
   // Slow, try to use only for PrettyField and such.
   ArtField* GetArtField() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template <bool kTransactionActive = false>
+  template <PointerSize kPointerSize, bool kTransactionActive = false>
   static mirror::Field* CreateFromArtField(Thread* self, ArtField* field,
                                            bool force_resolve)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
diff --git a/runtime/mirror/iftable.h b/runtime/mirror/iftable.h
index b21ecdf..d6571f2 100644
--- a/runtime/mirror/iftable.h
+++ b/runtime/mirror/iftable.h
@@ -34,14 +34,20 @@
   ALWAYS_INLINE void SetInterface(int32_t i, Class* interface)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   PointerArray* GetMethodArray(int32_t i) SHARED_REQUIRES(Locks::mutator_lock_) {
-    auto* method_array = down_cast<PointerArray*>(Get((i * kMax) + kMethodArray));
+    auto* method_array = down_cast<PointerArray*>(Get<kVerifyFlags, kReadBarrierOption>(
+        (i * kMax) + kMethodArray));
     DCHECK(method_array != nullptr);
     return method_array;
   }
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   size_t GetMethodArrayCount(int32_t i) SHARED_REQUIRES(Locks::mutator_lock_) {
-    auto* method_array = down_cast<PointerArray*>(Get((i * kMax) + kMethodArray));
+    auto* method_array = down_cast<PointerArray*>(
+        Get<kVerifyFlags, kReadBarrierOption>((i * kMax) + kMethodArray));
     return method_array == nullptr ? 0u : method_array->GetLength();
   }
 
diff --git a/runtime/mirror/method.cc b/runtime/mirror/method.cc
index 85c52e9..ef16719 100644
--- a/runtime/mirror/method.cc
+++ b/runtime/mirror/method.cc
@@ -51,15 +51,26 @@
   array_class_ = GcRoot<Class>(nullptr);
 }
 
+template <PointerSize kPointerSize, bool kTransactionActive>
 Method* Method::CreateFromArtMethod(Thread* self, ArtMethod* method) {
   DCHECK(!method->IsConstructor()) << PrettyMethod(method);
   auto* ret = down_cast<Method*>(StaticClass()->AllocObject(self));
   if (LIKELY(ret != nullptr)) {
-    static_cast<AbstractMethod*>(ret)->CreateFromArtMethod(method);
+    static_cast<AbstractMethod*>(ret)->
+        CreateFromArtMethod<kPointerSize, kTransactionActive>(method);
   }
   return ret;
 }
 
+template Method* Method::CreateFromArtMethod<PointerSize::k32, false>(Thread* self,
+                                                                      ArtMethod* method);
+template Method* Method::CreateFromArtMethod<PointerSize::k32, true>(Thread* self,
+                                                                     ArtMethod* method);
+template Method* Method::CreateFromArtMethod<PointerSize::k64, false>(Thread* self,
+                                                                      ArtMethod* method);
+template Method* Method::CreateFromArtMethod<PointerSize::k64, true>(Thread* self,
+                                                                     ArtMethod* method);
+
 void Method::VisitRoots(RootVisitor* visitor) {
   static_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
   array_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
@@ -92,14 +103,25 @@
   array_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
+template <PointerSize kPointerSize, bool kTransactionActive>
 Constructor* Constructor::CreateFromArtMethod(Thread* self, ArtMethod* method) {
   DCHECK(method->IsConstructor()) << PrettyMethod(method);
   auto* ret = down_cast<Constructor*>(StaticClass()->AllocObject(self));
   if (LIKELY(ret != nullptr)) {
-    static_cast<AbstractMethod*>(ret)->CreateFromArtMethod(method);
+    static_cast<AbstractMethod*>(ret)->
+        CreateFromArtMethod<kPointerSize, kTransactionActive>(method);
   }
   return ret;
 }
 
+template Constructor* Constructor::CreateFromArtMethod<PointerSize::k32, false>(
+    Thread* self, ArtMethod* method);
+template Constructor* Constructor::CreateFromArtMethod<PointerSize::k32, true>(
+    Thread* self, ArtMethod* method);
+template Constructor* Constructor::CreateFromArtMethod<PointerSize::k64, false>(
+    Thread* self, ArtMethod* method);
+template Constructor* Constructor::CreateFromArtMethod<PointerSize::k64, true>(
+    Thread* self, ArtMethod* method);
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/method.h b/runtime/mirror/method.h
index 0c28e4f..be51784 100644
--- a/runtime/mirror/method.h
+++ b/runtime/mirror/method.h
@@ -28,6 +28,7 @@
 // C++ mirror of java.lang.reflect.Method.
 class MANAGED Method : public AbstractMethod {
  public:
+  template <PointerSize kPointerSize, bool kTransactionActive>
   static Method* CreateFromArtMethod(Thread* self, ArtMethod* method)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
@@ -59,6 +60,7 @@
 // C++ mirror of java.lang.reflect.Constructor.
 class MANAGED Constructor: public AbstractMethod {
  public:
+  template <PointerSize kPointerSize, bool kTransactionActive>
   static Constructor* CreateFromArtMethod(Thread* self, ArtMethod* method)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 90180c5..0495c95 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -40,7 +40,7 @@
 namespace art {
 namespace mirror {
 
-inline uint32_t Object::ClassSize(size_t pointer_size) {
+inline uint32_t Object::ClassSize(PointerSize pointer_size) {
   uint32_t vtable_entries = kVTableLength;
   return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
 }
@@ -95,12 +95,22 @@
       OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
 }
 
+inline bool Object::CasLockWordWeakRelease(LockWord old_val, LockWord new_val) {
+  // Force use of non-transactional mode and do not check.
+  return CasFieldWeakRelease32<false, false>(
+      OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
+}
+
 inline uint32_t Object::GetLockOwnerThreadId() {
   return Monitor::GetLockOwnerThreadId(this);
 }
 
 inline mirror::Object* Object::MonitorEnter(Thread* self) {
-  return Monitor::MonitorEnter(self, this);
+  return Monitor::MonitorEnter(self, this, /*trylock*/false);
+}
+
+inline mirror::Object* Object::MonitorTryEnter(Thread* self) {
+  return Monitor::MonitorEnter(self, this, /*trylock*/true);
 }
 
 inline bool Object::MonitorExit(Thread* self) {
@@ -137,10 +147,20 @@
 #endif
 }
 
+inline uint32_t Object::GetMarkBit() {
+#ifdef USE_READ_BARRIER
+  return GetLockWord(false).MarkBitState();
+#else
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+#endif
+}
+
 inline void Object::SetReadBarrierPointer(Object* rb_ptr) {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
   DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
+  DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
   LockWord lw = GetLockWord(false);
   lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
   SetLockWord(lw, false);
@@ -157,11 +177,14 @@
 #endif
 }
 
+template<bool kCasRelease>
 inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr) {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
   DCHECK_EQ(reinterpret_cast<uint64_t>(expected_rb_ptr) >> 32, 0U);
   DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
+  DCHECK_NE(expected_rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
+  DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
   LockWord expected_lw;
   LockWord new_lw;
   do {
@@ -175,7 +198,13 @@
         static_cast<uint32_t>(reinterpret_cast<uintptr_t>(expected_rb_ptr)));
     new_lw = lw;
     new_lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
-  } while (!CasLockWordWeakSequentiallyConsistent(expected_lw, new_lw));
+    // ConcurrentCopying::ProcessMarkStackRef uses this with kCasRelease == true.
+    // If kCasRelease == true, use a CAS release so that when GC updates all the fields of
+    // an object and then changes the object from gray to black, the field updates (stores) will be
+    // visible (won't be reordered after this CAS.)
+  } while (!(kCasRelease ?
+             CasLockWordWeakRelease(expected_lw, new_lw) :
+             CasLockWordWeakRelaxed(expected_lw, new_lw)));
   return true;
 #elif USE_BROOKS_READ_BARRIER
   DCHECK(kUseBrooksReadBarrier);
@@ -199,6 +228,24 @@
 #endif
 }
 
+inline bool Object::AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit) {
+  LockWord expected_lw;
+  LockWord new_lw;
+  do {
+    LockWord lw = GetLockWord(false);
+    if (UNLIKELY(lw.MarkBitState() != expected_mark_bit)) {
+      // Lost the race.
+      return false;
+    }
+    expected_lw = lw;
+    new_lw = lw;
+    new_lw.SetMarkBitState(mark_bit);
+    // Since this is only set from the mutator, we can use the non release Cas.
+  } while (!CasLockWordWeakRelaxed(expected_lw, new_lw));
+  return true;
+}
+
+
 inline void Object::AssertReadBarrierPointer() const {
   if (kUseBakerReadBarrier) {
     Object* obj = const_cast<Object*>(this);
@@ -242,16 +289,17 @@
   return down_cast<Class*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsObjectArray() {
   constexpr auto kNewFlags = static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis);
-  return IsArrayInstance<kVerifyFlags>() &&
-      !GetClass<kNewFlags>()->template GetComponentType<kNewFlags>()->IsPrimitive();
+  return IsArrayInstance<kVerifyFlags, kReadBarrierOption>() &&
+      !GetClass<kNewFlags, kReadBarrierOption>()->
+          template GetComponentType<kNewFlags, kReadBarrierOption>()->IsPrimitive();
 }
 
-template<class T, VerifyObjectFlags kVerifyFlags>
+template<class T, VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline ObjectArray<T>* Object::AsObjectArray() {
-  DCHECK(IsObjectArray<kVerifyFlags>());
+  DCHECK((IsObjectArray<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<ObjectArray<T>*>(this);
 }
 
@@ -261,14 +309,14 @@
       template IsArrayClass<kVerifyFlags, kReadBarrierOption>();
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsReferenceInstance() {
-  return GetClass<kVerifyFlags>()->IsTypeOfReferenceClass();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->IsTypeOfReferenceClass();
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline Reference* Object::AsReference() {
-  DCHECK(IsReferenceInstance<kVerifyFlags>());
+  DCHECK((IsReferenceInstance<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<Reference*>(this);
 }
 
@@ -328,29 +376,31 @@
   return down_cast<ShortArray*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsIntArray() {
   constexpr auto kNewFlags = static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis);
-  auto* component_type = GetClass<kVerifyFlags>()->GetComponentType();
+  mirror::Class* klass = GetClass<kVerifyFlags, kReadBarrierOption>();
+  mirror::Class* component_type = klass->GetComponentType<kVerifyFlags, kReadBarrierOption>();
   return component_type != nullptr && component_type->template IsPrimitiveInt<kNewFlags>();
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline IntArray* Object::AsIntArray() {
-  DCHECK(IsIntArray<kVerifyFlags>());
+  DCHECK((IsIntArray<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<IntArray*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsLongArray() {
   constexpr auto kNewFlags = static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis);
-  auto* component_type = GetClass<kVerifyFlags>()->GetComponentType();
+  mirror::Class* klass = GetClass<kVerifyFlags, kReadBarrierOption>();
+  mirror::Class* component_type = klass->GetComponentType<kVerifyFlags, kReadBarrierOption>();
   return component_type != nullptr && component_type->template IsPrimitiveLong<kNewFlags>();
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline LongArray* Object::AsLongArray() {
-  DCHECK(IsLongArray<kVerifyFlags>());
+  DCHECK((IsLongArray<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<LongArray*>(this);
 }
 
@@ -671,6 +721,24 @@
 }
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldWeakRelease32(MemberOffset field_offset,
+                                          int32_t old_value, int32_t new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteField32(this, field_offset, old_value, true);
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
+  AtomicInteger* atomic_addr = reinterpret_cast<AtomicInteger*>(raw_addr);
+
+  return atomic_addr->CompareExchangeWeakRelease(old_value, new_value);
+}
+
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
 inline bool Object::CasFieldStrongSequentiallyConsistent32(MemberOffset field_offset,
                                                            int32_t old_value, int32_t new_value) {
   if (kCheckTransaction) {
@@ -944,7 +1012,66 @@
   return success;
 }
 
-template<bool kIsStatic, typename Visitor>
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldWeakRelaxedObjectWithoutWriteBarrier(
+    MemberOffset field_offset, Object* old_value, Object* new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  if (kVerifyFlags & kVerifyWrites) {
+    VerifyObject(new_value);
+  }
+  if (kVerifyFlags & kVerifyReads) {
+    VerifyObject(old_value);
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
+  }
+  HeapReference<Object> old_ref(HeapReference<Object>::FromMirrorPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_value));
+  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
+  Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
+
+  bool success = atomic_addr->CompareExchangeWeakRelaxed(old_ref.reference_,
+                                                         new_ref.reference_);
+  return success;
+}
+
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldStrongRelaxedObjectWithoutWriteBarrier(
+    MemberOffset field_offset, Object* old_value, Object* new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  if (kVerifyFlags & kVerifyWrites) {
+    VerifyObject(new_value);
+  }
+  if (kVerifyFlags & kVerifyReads) {
+    VerifyObject(old_value);
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
+  }
+  HeapReference<Object> old_ref(HeapReference<Object>::FromMirrorPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_value));
+  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
+  Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
+
+  bool success = atomic_addr->CompareExchangeStrongRelaxed(old_ref.reference_,
+                                                           new_ref.reference_);
+  return success;
+}
+
+template<bool kIsStatic,
+         VerifyObjectFlags kVerifyFlags,
+         ReadBarrierOption kReadBarrierOption,
+         typename Visitor>
 inline void Object::VisitFieldsReferences(uint32_t ref_offsets, const Visitor& visitor) {
   if (!kIsStatic && (ref_offsets != mirror::Class::kClassWalkSuper)) {
     // Instance fields and not the slow-path.
@@ -960,9 +1087,12 @@
     // There is no reference offset bitmap. In the non-static case, walk up the class
     // inheritance hierarchy and find reference offsets the hard way. In the static case, just
     // consider this class.
-    for (mirror::Class* klass = kIsStatic ? AsClass() : GetClass(); klass != nullptr;
-        klass = kIsStatic ? nullptr : klass->GetSuperClass()) {
-      size_t num_reference_fields =
+    for (mirror::Class* klass = kIsStatic
+            ? AsClass<kVerifyFlags, kReadBarrierOption>()
+            : GetClass<kVerifyFlags, kReadBarrierOption>();
+        klass != nullptr;
+        klass = kIsStatic ? nullptr : klass->GetSuperClass<kVerifyFlags, kReadBarrierOption>()) {
+      const size_t num_reference_fields =
           kIsStatic ? klass->NumReferenceStaticFields() : klass->NumReferenceInstanceFields();
       if (num_reference_fields == 0u) {
         continue;
@@ -970,9 +1100,9 @@
       // Presumably GC can happen when we are cross compiling, it should not cause performance
       // problems to do pointer size logic.
       MemberOffset field_offset = kIsStatic
-          ? klass->GetFirstReferenceStaticFieldOffset(
+          ? klass->GetFirstReferenceStaticFieldOffset<kVerifyFlags, kReadBarrierOption>(
               Runtime::Current()->GetClassLinker()->GetImagePointerSize())
-          : klass->GetFirstReferenceInstanceFieldOffset();
+          : klass->GetFirstReferenceInstanceFieldOffset<kVerifyFlags, kReadBarrierOption>();
       for (size_t i = 0u; i < num_reference_fields; ++i) {
         // TODO: Do a simpler check?
         if (field_offset.Uint32Value() != ClassOffset().Uint32Value()) {
@@ -985,74 +1115,85 @@
   }
 }
 
-template<typename Visitor>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, typename Visitor>
 inline void Object::VisitInstanceFieldsReferences(mirror::Class* klass, const Visitor& visitor) {
-  VisitFieldsReferences<false>(klass->GetReferenceInstanceOffsets<kVerifyNone>(), visitor);
+  VisitFieldsReferences<false, kVerifyFlags, kReadBarrierOption>(
+      klass->GetReferenceInstanceOffsets<kVerifyFlags>(), visitor);
 }
 
-template<typename Visitor>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, typename Visitor>
 inline void Object::VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor) {
   DCHECK(!klass->IsTemp());
-  klass->VisitFieldsReferences<true>(0, visitor);
+  klass->VisitFieldsReferences<true, kVerifyFlags, kReadBarrierOption>(0, visitor);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsClassLoader() {
-  return GetClass<kVerifyFlags>()->IsClassLoaderClass();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->IsClassLoaderClass();
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline mirror::ClassLoader* Object::AsClassLoader() {
-  DCHECK(IsClassLoader<kVerifyFlags>());
+  DCHECK((IsClassLoader<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<mirror::ClassLoader*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsDexCache() {
-  return GetClass<kVerifyFlags>()->IsDexCacheClass();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->IsDexCacheClass();
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline mirror::DexCache* Object::AsDexCache() {
-  DCHECK(IsDexCache<kVerifyFlags>());
+  DCHECK((IsDexCache<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<mirror::DexCache*>(this);
 }
 
-template <VerifyObjectFlags kVerifyFlags, typename Visitor, typename JavaLangRefVisitor>
+template <bool kVisitNativeRoots,
+          VerifyObjectFlags kVerifyFlags,
+          ReadBarrierOption kReadBarrierOption,
+          typename Visitor,
+          typename JavaLangRefVisitor>
 inline void Object::VisitReferences(const Visitor& visitor,
                                     const JavaLangRefVisitor& ref_visitor) {
-  mirror::Class* klass = GetClass<kVerifyFlags>();
+  mirror::Class* klass = GetClass<kVerifyFlags, kReadBarrierOption>();
   visitor(this, ClassOffset(), false);
   const uint32_t class_flags = klass->GetClassFlags<kVerifyNone>();
   if (LIKELY(class_flags == kClassFlagNormal)) {
-    DCHECK(!klass->IsVariableSize());
-    VisitInstanceFieldsReferences(klass, visitor);
-    DCHECK(!klass->IsClassClass());
+    DCHECK((!klass->IsVariableSize<kVerifyFlags, kReadBarrierOption>()));
+    VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
+    DCHECK((!klass->IsClassClass<kVerifyFlags, kReadBarrierOption>()));
     DCHECK(!klass->IsStringClass());
     DCHECK(!klass->IsClassLoaderClass());
-    DCHECK(!klass->IsArrayClass());
+    DCHECK((!klass->IsArrayClass<kVerifyFlags, kReadBarrierOption>()));
   } else {
     if ((class_flags & kClassFlagNoReferenceFields) == 0) {
       DCHECK(!klass->IsStringClass());
       if (class_flags == kClassFlagClass) {
-        DCHECK(klass->IsClassClass());
-        AsClass<kVerifyNone>()->VisitReferences(klass, visitor);
+        DCHECK((klass->IsClassClass<kVerifyFlags, kReadBarrierOption>()));
+        mirror::Class* as_klass = AsClass<kVerifyNone, kReadBarrierOption>();
+        as_klass->VisitReferences<kVisitNativeRoots, kVerifyFlags, kReadBarrierOption>(klass,
+                                                                                       visitor);
       } else if (class_flags == kClassFlagObjectArray) {
-        DCHECK(klass->IsObjectArrayClass());
-        AsObjectArray<mirror::Object, kVerifyNone>()->VisitReferences(visitor);
+        DCHECK((klass->IsObjectArrayClass<kVerifyFlags, kReadBarrierOption>()));
+        AsObjectArray<mirror::Object, kVerifyNone, kReadBarrierOption>()->VisitReferences(visitor);
       } else if ((class_flags & kClassFlagReference) != 0) {
-        VisitInstanceFieldsReferences(klass, visitor);
-        ref_visitor(klass, AsReference());
+        VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
+        ref_visitor(klass, AsReference<kVerifyFlags, kReadBarrierOption>());
       } else if (class_flags == kClassFlagDexCache) {
-        mirror::DexCache* const dex_cache = AsDexCache<kVerifyFlags>();
-        dex_cache->VisitReferences<kVerifyFlags>(klass, visitor);
+        mirror::DexCache* const dex_cache = AsDexCache<kVerifyFlags, kReadBarrierOption>();
+        dex_cache->VisitReferences<kVisitNativeRoots,
+                                   kVerifyFlags,
+                                   kReadBarrierOption>(klass, visitor);
       } else {
-        mirror::ClassLoader* const class_loader = AsClassLoader<kVerifyFlags>();
-        class_loader->VisitReferences<kVerifyFlags>(klass, visitor);
+        mirror::ClassLoader* const class_loader = AsClassLoader<kVerifyFlags, kReadBarrierOption>();
+        class_loader->VisitReferences<kVisitNativeRoots,
+                                      kVerifyFlags,
+                                      kReadBarrierOption>(klass, visitor);
       }
     } else if (kIsDebugBuild) {
-      CHECK(!klass->IsClassClass());
-      CHECK(!klass->IsObjectArrayClass());
+      CHECK((!klass->IsClassClass<kVerifyFlags, kReadBarrierOption>()));
+      CHECK((!klass->IsObjectArrayClass<kVerifyFlags, kReadBarrierOption>()));
       // String still has instance fields for reflection purposes but these don't exist in
       // actual string instances.
       if (!klass->IsStringClass()) {
@@ -1060,7 +1201,7 @@
         mirror::Class* super_class = klass;
         do {
           total_reference_instance_fields += super_class->NumReferenceInstanceFields();
-          super_class = super_class->GetSuperClass();
+          super_class = super_class->GetSuperClass<kVerifyFlags, kReadBarrierOption>();
         } while (super_class != nullptr);
         // The only reference field should be the object's class. This field is handled at the
         // beginning of the function.
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 4d94130..13c536e 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -163,8 +163,7 @@
       case LockWord::kUnlocked: {
         // Try to compare and swap in a new hash, if we succeed we will return the hash on the next
         // loop iteration.
-        LockWord hash_word = LockWord::FromHashCode(GenerateIdentityHashCode(),
-                                                    lw.ReadBarrierState());
+        LockWord hash_word = LockWord::FromHashCode(GenerateIdentityHashCode(), lw.GCState());
         DCHECK_EQ(hash_word.GetState(), LockWord::kHashCode);
         if (const_cast<Object*>(this)->CasLockWordWeakRelaxed(lw, hash_word)) {
           return hash_word.GetHashCode();
@@ -183,7 +182,7 @@
         break;
       }
       case LockWord::kFatLocked: {
-        // Already inflated, return the has stored in the monitor.
+        // Already inflated, return the hash stored in the monitor.
         Monitor* monitor = lw.FatLockMonitor();
         DCHECK(monitor != nullptr);
         return monitor->GetHashCode();
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index f75b8ae..5b129bf 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_MIRROR_OBJECT_H_
 #define ART_RUNTIME_MIRROR_OBJECT_H_
 
+#include "base/casts.h"
+#include "base/enums.h"
 #include "globals.h"
 #include "object_reference.h"
 #include "offsets.h"
@@ -73,7 +75,7 @@
   static constexpr size_t kVTableLength = 11;
 
   // The size of the java.lang.Class representing a java.lang.Object.
-  static uint32_t ClassSize(size_t pointer_size);
+  static uint32_t ClassSize(PointerSize pointer_size);
 
   // Size of an instance of java.lang.Object.
   static constexpr uint32_t InstanceSize() {
@@ -91,16 +93,23 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   void SetClass(Class* new_klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // TODO: Clean this up and change to return int32_t
   Object* GetReadBarrierPointer() SHARED_REQUIRES(Locks::mutator_lock_);
+
 #ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
   NO_RETURN
 #endif
   void SetReadBarrierPointer(Object* rb_ptr) SHARED_REQUIRES(Locks::mutator_lock_);
-#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
-  NO_RETURN
-#endif
-  bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
+
+  template<bool kCasRelease = false>
+  ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
       SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE uint32_t GetMarkBit() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE bool AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   void AssertReadBarrierPointer() const SHARED_REQUIRES(Locks::mutator_lock_);
 
   // The verifier treats all interfaces as java.lang.Object and relies on runtime checks in
@@ -135,8 +144,15 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   bool CasLockWordWeakRelaxed(LockWord old_val, LockWord new_val)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  bool CasLockWordWeakRelease(LockWord old_val, LockWord new_val)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   uint32_t GetLockOwnerThreadId();
 
+  // Try to enter the monitor, returns non null if we succeeded.
+  mirror::Object* MonitorTryEnter(Thread* self)
+      EXCLUSIVE_LOCK_FUNCTION()
+      REQUIRES(!Roles::uninterruptible_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::Object* MonitorEnter(Thread* self)
       EXCLUSIVE_LOCK_FUNCTION()
       REQUIRES(!Roles::uninterruptible_)
@@ -157,19 +173,26 @@
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Class* AsClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsObjectArray() SHARED_REQUIRES(Locks::mutator_lock_);
-  template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<class T,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ObjectArray<T>* AsObjectArray() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsClassLoader() SHARED_REQUIRES(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ClassLoader* AsClassLoader() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsDexCache() SHARED_REQUIRES(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   DexCache* AsDexCache() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -193,14 +216,18 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ShortArray* AsShortSizedArray() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsIntArray() SHARED_REQUIRES(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   IntArray* AsIntArray() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsLongArray() SHARED_REQUIRES(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   LongArray* AsLongArray() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -224,9 +251,11 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   Throwable* AsThrowable() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsReferenceInstance() SHARED_REQUIRES(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Reference* AsReference() SHARED_REQUIRES(Locks::mutator_lock_);
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool IsWeakReferenceInstance() SHARED_REQUIRES(Locks::mutator_lock_);
@@ -276,7 +305,6 @@
                                                                    Object* old_value,
                                                                    Object* new_value)
       SHARED_REQUIRES(Locks::mutator_lock_);
-
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool CasFieldStrongSequentiallyConsistentObject(MemberOffset field_offset, Object* old_value,
@@ -288,6 +316,18 @@
                                                                      Object* old_value,
                                                                      Object* new_value)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  template<bool kTransactionActive, bool kCheckTransaction = true,
+      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldWeakRelaxedObjectWithoutWriteBarrier(MemberOffset field_offset,
+                                                    Object* old_value,
+                                                    Object* new_value)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  template<bool kTransactionActive, bool kCheckTransaction = true,
+      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldStrongRelaxedObjectWithoutWriteBarrier(MemberOffset field_offset,
+                                                      Object* old_value,
+                                                      Object* new_value)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   HeapReference<Object>* GetFieldObjectReferenceAddr(MemberOffset field_offset);
@@ -396,6 +436,12 @@
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldWeakRelease32(MemberOffset field_offset, int32_t old_value,
+                             int32_t new_value) ALWAYS_INLINE
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<bool kTransactionActive, bool kCheckTransaction = true,
+      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool CasFieldStrongSequentiallyConsistent32(MemberOffset field_offset, int32_t old_value,
                                               int32_t new_value) ALWAYS_INLINE
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -435,28 +481,37 @@
   void SetFieldPtr(MemberOffset field_offset, T new_value)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     SetFieldPtrWithSize<kTransactionActive, kCheckTransaction, kVerifyFlags>(
-        field_offset, new_value, sizeof(void*));
+        field_offset, new_value, kRuntimePointerSize);
+  }
+  template<bool kTransactionActive, bool kCheckTransaction = true,
+      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, typename T>
+  void SetFieldPtr64(MemberOffset field_offset, T new_value)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    SetFieldPtrWithSize<kTransactionActive, kCheckTransaction, kVerifyFlags>(
+        field_offset, new_value, 8u);
   }
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, typename T>
-  ALWAYS_INLINE void SetFieldPtrWithSize(MemberOffset field_offset, T new_value,
-                                         size_t pointer_size)
+  ALWAYS_INLINE void SetFieldPtrWithSize(MemberOffset field_offset,
+                                         T new_value,
+                                         PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(pointer_size == 4 || pointer_size == 8) << pointer_size;
-    if (pointer_size == 4) {
+    if (pointer_size == PointerSize::k32) {
       intptr_t ptr  = reinterpret_cast<intptr_t>(new_value);
       DCHECK_EQ(static_cast<int32_t>(ptr), ptr);  // Check that we dont lose any non 0 bits.
       SetField32<kTransactionActive, kCheckTransaction, kVerifyFlags>(
           field_offset, static_cast<int32_t>(ptr));
     } else {
       SetField64<kTransactionActive, kCheckTransaction, kVerifyFlags>(
-          field_offset, static_cast<int64_t>(reinterpret_cast<uintptr_t>(new_value)));
+          field_offset, reinterpret_cast64<int64_t>(new_value));
     }
   }
   // TODO fix thread safety analysis broken by the use of template. This should be
   // SHARED_REQUIRES(Locks::mutator_lock_).
-  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+  template <bool kVisitNativeRoots = true,
+            VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
             typename Visitor,
             typename JavaLangRefVisitor = VoidFunctor>
   void VisitReferences(const Visitor& visitor, const JavaLangRefVisitor& ref_visitor)
@@ -474,31 +529,41 @@
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
   T GetFieldPtr(MemberOffset field_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetFieldPtrWithSize<T, kVerifyFlags, kIsVolatile>(field_offset, sizeof(void*));
+    return GetFieldPtrWithSize<T, kVerifyFlags, kIsVolatile>(field_offset, kRuntimePointerSize);
+  }
+  template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
+  T GetFieldPtr64(MemberOffset field_offset)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    return GetFieldPtrWithSize<T, kVerifyFlags, kIsVolatile>(field_offset,
+                                                             PointerSize::k64);
   }
 
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
-  ALWAYS_INLINE T GetFieldPtrWithSize(MemberOffset field_offset, size_t pointer_size)
+  ALWAYS_INLINE T GetFieldPtrWithSize(MemberOffset field_offset, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(pointer_size == 4 || pointer_size == 8) << pointer_size;
-    if (pointer_size == 4) {
+    if (pointer_size == PointerSize::k32) {
       return reinterpret_cast<T>(GetField32<kVerifyFlags, kIsVolatile>(field_offset));
     } else {
       int64_t v = GetField64<kVerifyFlags, kIsVolatile>(field_offset);
-      // Check that we dont lose any non 0 bits.
-      DCHECK_EQ(static_cast<int64_t>(static_cast<uintptr_t>(v)), v);
-      return reinterpret_cast<T>(static_cast<uintptr_t>(v));
+      return reinterpret_cast64<T>(v);
     }
   }
 
   // TODO: Fixme when anotatalysis works with visitors.
-  template<bool kIsStatic, typename Visitor>
+  template<bool kIsStatic,
+          VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+          ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+          typename Visitor>
   void VisitFieldsReferences(uint32_t ref_offsets, const Visitor& visitor) HOT_ATTR
       NO_THREAD_SAFETY_ANALYSIS;
-  template<typename Visitor>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+           typename Visitor>
   void VisitInstanceFieldsReferences(mirror::Class* klass, const Visitor& visitor) HOT_ATTR
       SHARED_REQUIRES(Locks::mutator_lock_);
-  template<typename Visitor>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+           typename Visitor>
   void VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor) HOT_ATTR
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 5b73557..c3c5231 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -55,13 +55,13 @@
                Runtime::Current()->GetHeap()->GetCurrentAllocator());
 }
 
-template<class T>
+template<class T> template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline T* ObjectArray<T>::Get(int32_t i) {
   if (!CheckIsValidIndex(i)) {
     DCHECK(Thread::Current()->IsExceptionPending());
     return nullptr;
   }
-  return GetFieldObject<T>(OffsetOfElement(i));
+  return GetFieldObject<T, kVerifyFlags, kReadBarrierOption>(OffsetOfElement(i));
 }
 
 template<class T> template<VerifyObjectFlags kVerifyFlags>
@@ -197,6 +197,7 @@
 }
 
 template<class T>
+template<bool kTransactionActive>
 inline void ObjectArray<T>::AssignableCheckingMemcpy(int32_t dst_pos, ObjectArray<T>* src,
                                                      int32_t src_pos, int32_t count,
                                                      bool throw_exception) {
@@ -215,15 +216,15 @@
     o = src->GetWithoutChecks(src_pos + i);
     if (o == nullptr) {
       // Null is always assignable.
-      SetWithoutChecks<false>(dst_pos + i, nullptr);
+      SetWithoutChecks<kTransactionActive>(dst_pos + i, nullptr);
     } else {
       // TODO: use the underlying class reference to avoid uncompression when not necessary.
       Class* o_class = o->GetClass();
       if (LIKELY(lastAssignableElementClass == o_class)) {
-        SetWithoutChecks<false>(dst_pos + i, o);
+        SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
       } else if (LIKELY(dst_class->IsAssignableFrom(o_class))) {
         lastAssignableElementClass = o_class;
-        SetWithoutChecks<false>(dst_pos + i, o);
+        SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
       } else {
         // Can't put this element into the array, break to perform write-barrier and throw
         // exception.
@@ -270,7 +271,7 @@
 }
 
 template<class T> template<typename Visitor>
-void ObjectArray<T>::VisitReferences(const Visitor& visitor) {
+inline void ObjectArray<T>::VisitReferences(const Visitor& visitor) {
   const size_t length = static_cast<size_t>(GetLength());
   for (size_t i = 0; i < length; ++i) {
     visitor(this, OffsetOfElement(i), false);
diff --git a/runtime/mirror/object_array.h b/runtime/mirror/object_array.h
index b45cafd..a99d616 100644
--- a/runtime/mirror/object_array.h
+++ b/runtime/mirror/object_array.h
@@ -26,7 +26,7 @@
 class MANAGED ObjectArray: public Array {
  public:
   // The size of Object[].class.
-  static uint32_t ClassSize(size_t pointer_size) {
+  static uint32_t ClassSize(PointerSize pointer_size) {
     return Array::ClassSize(pointer_size);
   }
 
@@ -37,7 +37,9 @@
   static ObjectArray<T>* Alloc(Thread* self, Class* object_array_class, int32_t length)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
-  T* Get(int32_t i) ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_);
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  ALWAYS_INLINE T* Get(int32_t i) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns true if the object can be stored into the array. If not, throws
   // an ArrayStoreException and returns false.
@@ -76,6 +78,7 @@
                         int32_t count) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Copy src into this array with assignability checks.
+  template<bool kTransactionActive>
   void AssignableCheckingMemcpy(int32_t dst_pos, ObjectArray<T>* src, int32_t src_pos,
                                 int32_t count, bool throw_exception)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index f5a0445..b35a479 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -24,6 +24,7 @@
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "asm_support.h"
+#include "base/enums.h"
 #include "class-inl.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
@@ -61,7 +62,7 @@
     Handle<String> string(
         hs.NewHandle(String::AllocFromModifiedUtf8(self, expected_utf16_length, utf8_in)));
     ASSERT_EQ(expected_utf16_length, string->GetLength());
-    ASSERT_TRUE(string->GetValue() != nullptr);
+    ASSERT_EQ(string->IsValueNull(), false);
     // strlen is necessary because the 1-character string "\x00\x00" is interpreted as ""
     ASSERT_TRUE(string->Equals(utf8_in) || (expected_utf16_length == 1 && strlen(utf8_in) == 0));
     ASSERT_TRUE(string->Equals(StringPiece(utf8_in)) ||
@@ -78,9 +79,11 @@
   EXPECT_EQ(kObjectReferenceSize, sizeof(HeapReference<Object>));
   EXPECT_EQ(kObjectHeaderSize, sizeof(Object));
   EXPECT_EQ(ART_METHOD_QUICK_CODE_OFFSET_32,
-            ArtMethod::EntryPointFromQuickCompiledCodeOffset(4).Int32Value());
+            ArtMethod::EntryPointFromQuickCompiledCodeOffset(PointerSize::k32).
+                Int32Value());
   EXPECT_EQ(ART_METHOD_QUICK_CODE_OFFSET_64,
-            ArtMethod::EntryPointFromQuickCompiledCodeOffset(8).Int32Value());
+            ArtMethod::EntryPointFromQuickCompiledCodeOffset(PointerSize::k64).
+                Int32Value());
 }
 
 TEST_F(ObjectTest, IsInSamePackage) {
@@ -306,11 +309,8 @@
   // pretend we are trying to call 'new char[3]' from String.toCharArray
   ScopedObjectAccess soa(Thread::Current());
   Class* java_util_Arrays = class_linker_->FindSystemClass(soa.Self(), "Ljava/util/Arrays;");
-  ArtMethod* sort = java_util_Arrays->FindDirectMethod("sort", "([I)V", sizeof(void*));
-  const DexFile::StringId* string_id = java_lang_dex_file_->FindStringId("[I");
-  ASSERT_TRUE(string_id != nullptr);
-  const DexFile::TypeId* type_id = java_lang_dex_file_->FindTypeId(
-      java_lang_dex_file_->GetIndexForStringId(*string_id));
+  ArtMethod* sort = java_util_Arrays->FindDirectMethod("sort", "([I)V", kRuntimePointerSize);
+  const DexFile::TypeId* type_id = java_lang_dex_file_->FindTypeId("[I");
   ASSERT_TRUE(type_id != nullptr);
   uint32_t type_idx = java_lang_dex_file_->GetIndexForTypeId(*type_id);
   Object* array = CheckAndAllocArrayFromCodeInstrumented(
@@ -366,17 +366,11 @@
   StackHandleScope<2> hs(soa.Self());
   Handle<mirror::ClassLoader> loader(hs.NewHandle(soa.Decode<ClassLoader*>(class_loader)));
   Class* klass = class_linker_->FindClass(soa.Self(), "LStaticsFromCode;", loader);
-  ArtMethod* clinit = klass->FindClassInitializer(sizeof(void*));
-  const DexFile::StringId* klass_string_id = dex_file->FindStringId("LStaticsFromCode;");
-  ASSERT_TRUE(klass_string_id != nullptr);
-  const DexFile::TypeId* klass_type_id = dex_file->FindTypeId(
-      dex_file->GetIndexForStringId(*klass_string_id));
+  ArtMethod* clinit = klass->FindClassInitializer(kRuntimePointerSize);
+  const DexFile::TypeId* klass_type_id = dex_file->FindTypeId("LStaticsFromCode;");
   ASSERT_TRUE(klass_type_id != nullptr);
 
-  const DexFile::StringId* type_string_id = dex_file->FindStringId("Ljava/lang/Object;");
-  ASSERT_TRUE(type_string_id != nullptr);
-  const DexFile::TypeId* type_type_id = dex_file->FindTypeId(
-      dex_file->GetIndexForStringId(*type_string_id));
+  const DexFile::TypeId* type_type_id = dex_file->FindTypeId("Ljava/lang/Object;");
   ASSERT_TRUE(type_type_id != nullptr);
 
   const DexFile::StringId* name_str_id = dex_file->FindStringId("s0");
@@ -508,22 +502,22 @@
   Class* klass2 = linker->FindClass(soa.Self(), "LProtoCompare2;", class_loader_2);
   ASSERT_TRUE(klass2 != nullptr);
 
-  ArtMethod* m1_1 = klass1->GetVirtualMethod(0, sizeof(void*));
+  ArtMethod* m1_1 = klass1->GetVirtualMethod(0, kRuntimePointerSize);
   EXPECT_STREQ(m1_1->GetName(), "m1");
-  ArtMethod* m2_1 = klass1->GetVirtualMethod(1, sizeof(void*));
+  ArtMethod* m2_1 = klass1->GetVirtualMethod(1, kRuntimePointerSize);
   EXPECT_STREQ(m2_1->GetName(), "m2");
-  ArtMethod* m3_1 = klass1->GetVirtualMethod(2, sizeof(void*));
+  ArtMethod* m3_1 = klass1->GetVirtualMethod(2, kRuntimePointerSize);
   EXPECT_STREQ(m3_1->GetName(), "m3");
-  ArtMethod* m4_1 = klass1->GetVirtualMethod(3, sizeof(void*));
+  ArtMethod* m4_1 = klass1->GetVirtualMethod(3, kRuntimePointerSize);
   EXPECT_STREQ(m4_1->GetName(), "m4");
 
-  ArtMethod* m1_2 = klass2->GetVirtualMethod(0, sizeof(void*));
+  ArtMethod* m1_2 = klass2->GetVirtualMethod(0, kRuntimePointerSize);
   EXPECT_STREQ(m1_2->GetName(), "m1");
-  ArtMethod* m2_2 = klass2->GetVirtualMethod(1, sizeof(void*));
+  ArtMethod* m2_2 = klass2->GetVirtualMethod(1, kRuntimePointerSize);
   EXPECT_STREQ(m2_2->GetName(), "m2");
-  ArtMethod* m3_2 = klass2->GetVirtualMethod(2, sizeof(void*));
+  ArtMethod* m3_2 = klass2->GetVirtualMethod(2, kRuntimePointerSize);
   EXPECT_STREQ(m3_2->GetName(), "m3");
-  ArtMethod* m4_2 = klass2->GetVirtualMethod(3, sizeof(void*));
+  ArtMethod* m4_2 = klass2->GetVirtualMethod(3, kRuntimePointerSize);
   EXPECT_STREQ(m4_2->GetName(), "m4");
 }
 
diff --git a/runtime/mirror/reference-inl.h b/runtime/mirror/reference-inl.h
index 01e99b9..039989b 100644
--- a/runtime/mirror/reference-inl.h
+++ b/runtime/mirror/reference-inl.h
@@ -22,19 +22,11 @@
 namespace art {
 namespace mirror {
 
-inline uint32_t Reference::ClassSize(size_t pointer_size) {
-  uint32_t vtable_entries = Object::kVTableLength + 5;
+inline uint32_t Reference::ClassSize(PointerSize pointer_size) {
+  uint32_t vtable_entries = Object::kVTableLength + 4;
   return Class::ComputeClassSize(false, vtable_entries, 2, 0, 0, 0, 0, pointer_size);
 }
 
-inline bool Reference::IsEnqueuable() {
-  // Not using volatile reads as an optimization since this is only called with all the mutators
-  // suspended.
-  const Object* queue = GetFieldObject<mirror::Object>(QueueOffset());
-  const Object* queue_next = GetFieldObject<mirror::Object>(QueueNextOffset());
-  return queue != nullptr && queue_next == nullptr;
-}
-
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index 51ae760..38c6616 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -17,11 +17,13 @@
 #ifndef ART_RUNTIME_MIRROR_REFERENCE_H_
 #define ART_RUNTIME_MIRROR_REFERENCE_H_
 
+#include "base/enums.h"
 #include "class.h"
 #include "gc_root.h"
 #include "object.h"
 #include "object_callbacks.h"
 #include "read_barrier_option.h"
+#include "runtime.h"
 #include "thread.h"
 
 namespace art {
@@ -42,7 +44,7 @@
 class MANAGED Reference : public Object {
  public:
   // Size of java.lang.ref.Reference.class.
-  static uint32_t ClassSize(size_t pointer_size);
+  static uint32_t ClassSize(PointerSize pointer_size);
 
   // Size of an instance of java.lang.ref.Reference.
   static constexpr uint32_t InstanceSize() {
@@ -74,24 +76,36 @@
   void ClearReferent() SHARED_REQUIRES(Locks::mutator_lock_) {
     SetFieldObjectVolatile<kTransactionActive>(ReferentOffset(), nullptr);
   }
-  // Volatile read/write is not necessary since the java pending next is only accessed from
-  // the java threads for cleared references. Once these cleared references have a null referent,
-  // we never end up reading their pending next from the GC again.
+
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Reference* GetPendingNext() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetFieldObject<Reference>(PendingNextOffset());
-  }
-  template<bool kTransactionActive>
-  void SetPendingNext(Reference* pending_next) SHARED_REQUIRES(Locks::mutator_lock_) {
-    SetFieldObject<kTransactionActive>(PendingNextOffset(), pending_next);
+    return GetFieldObject<Reference, kDefaultVerifyFlags, kReadBarrierOption>(PendingNextOffset());
   }
 
-  bool IsEnqueued() SHARED_REQUIRES(Locks::mutator_lock_) {
-    // Since the references are stored as cyclic lists it means that once enqueued, the pending
-    // next is always non-null.
-    return GetPendingNext() != nullptr;
+  void SetPendingNext(Reference* pending_next)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (Runtime::Current()->IsActiveTransaction()) {
+      SetFieldObject<true>(PendingNextOffset(), pending_next);
+    } else {
+      SetFieldObject<false>(PendingNextOffset(), pending_next);
+    }
   }
 
-  bool IsEnqueuable() SHARED_REQUIRES(Locks::mutator_lock_);
+  // Returns true if the reference's pendingNext is null, indicating it is
+  // okay to process this reference.
+  //
+  // If pendingNext is not null, then one of the following cases holds:
+  // 1. The reference has already been enqueued to a java ReferenceQueue. In
+  // this case the referent should not be considered for reference processing
+  // ever again.
+  // 2. The reference is currently part of a list of references that may
+  // shortly be enqueued on a java ReferenceQueue. In this case the reference
+  // should not be processed again until and unless the reference has been
+  // removed from the list after having determined the reference is not ready
+  // to be enqueued on a java ReferenceQueue.
+  bool IsUnprocessed() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return GetPendingNext<kWithoutReadBarrier>() == nullptr;
+  }
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   static Class* GetJavaLangRefReference() SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -109,9 +123,9 @@
   }
 
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  HeapReference<Reference> pending_next_;  // Note this is Java volatile:
-  HeapReference<Object> queue_;  // Note this is Java volatile:
-  HeapReference<Reference> queue_next_;  // Note this is Java volatile:
+  HeapReference<Reference> pending_next_;
+  HeapReference<Object> queue_;
+  HeapReference<Reference> queue_next_;
   HeapReference<Object> referent_;  // Note this is Java volatile:
 
   static GcRoot<Class> java_lang_ref_Reference_;
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index 28a830d..bc39ea8 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #ifndef ART_RUNTIME_MIRROR_STRING_INL_H_
 #define ART_RUNTIME_MIRROR_STRING_INL_H_
 
 #include "array.h"
 #include "base/bit_utils.h"
 #include "class.h"
+#include "common_throws.h"
 #include "gc/heap-inl.h"
 #include "globals.h"
 #include "intern_table.h"
@@ -32,9 +32,9 @@
 namespace art {
 namespace mirror {
 
-inline uint32_t String::ClassSize(size_t pointer_size) {
-  uint32_t vtable_entries = Object::kVTableLength + 52;
-  return Class::ComputeClassSize(true, vtable_entries, 0, 1, 0, 1, 2, pointer_size);
+inline uint32_t String::ClassSize(PointerSize pointer_size) {
+  uint32_t vtable_entries = Object::kVTableLength + 57;
+  return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 1, 2, pointer_size);
 }
 
 // Sets string count in the allocation code path to ensure it is guarded by a CAS.
@@ -48,6 +48,7 @@
     // Avoid AsString as object is not yet in live bitmap or allocation stack.
     String* string = down_cast<String*>(obj);
     string->SetCount(count_);
+    DCHECK(!string->IsCompressed() || kUseStringCompression);
   }
 
  private:
@@ -67,10 +68,19 @@
     // Avoid AsString as object is not yet in live bitmap or allocation stack.
     String* string = down_cast<String*>(obj);
     string->SetCount(count_);
-    uint16_t* value = string->GetValue();
+    DCHECK(!string->IsCompressed() || kUseStringCompression);
+    int32_t length = String::GetLengthFromCount(count_);
     const uint8_t* const src = reinterpret_cast<uint8_t*>(src_array_->GetData()) + offset_;
-    for (int i = 0; i < count_; i++) {
-      value[i] = high_byte_ + (src[i] & 0xFF);
+    if (string->IsCompressed()) {
+      uint8_t* valueCompressed = string->GetValueCompressed();
+      for (int i = 0; i < length; i++) {
+        valueCompressed[i] = (src[i] & 0xFF);
+      }
+    } else {
+      uint16_t* value = string->GetValue();
+      for (int i = 0; i < length; i++) {
+        value[i] = high_byte_ + (src[i] & 0xFF);
+      }
     }
   }
 
@@ -95,7 +105,16 @@
     String* string = down_cast<String*>(obj);
     string->SetCount(count_);
     const uint16_t* const src = src_array_->GetData() + offset_;
-    memcpy(string->GetValue(), src, count_ * sizeof(uint16_t));
+    const int32_t length = String::GetLengthFromCount(count_);
+    bool compressible = kUseStringCompression && String::GetCompressionFlagFromCount(count_);
+    DCHECK(!compressible || kUseStringCompression);
+    if (compressible) {
+      for (int i = 0; i < length; ++i) {
+        string->GetValueCompressed()[i] = static_cast<uint8_t>(src[i]);
+      }
+    } else {
+      memcpy(string->GetValue(), src, length * sizeof(uint16_t));
+    }
   }
 
  private:
@@ -117,8 +136,22 @@
     // Avoid AsString as object is not yet in live bitmap or allocation stack.
     String* string = down_cast<String*>(obj);
     string->SetCount(count_);
-    const uint16_t* const src = src_string_->GetValue() + offset_;
-    memcpy(string->GetValue(), src, count_ * sizeof(uint16_t));
+    const int32_t length = String::GetLengthFromCount(count_);
+    bool compressible = kUseStringCompression && String::GetCompressionFlagFromCount(count_);
+    DCHECK(!compressible || kUseStringCompression);
+    if (src_string_->IsCompressed()) {
+      const uint8_t* const src = src_string_->GetValueCompressed() + offset_;
+      memcpy(string->GetValueCompressed(), src, length * sizeof(uint8_t));
+    } else {
+      const uint16_t* const src = src_string_->GetValue() + offset_;
+      if (compressible) {
+        for (int i = 0; i < length; ++i) {
+          string->GetValueCompressed()[i] = static_cast<uint8_t>(src[i]);
+        }
+      } else {
+        memcpy(string->GetValue(), src, length * sizeof(uint16_t));
+      }
+    }
   }
 
  private:
@@ -132,19 +165,38 @@
 }
 
 inline uint16_t String::CharAt(int32_t index) {
-  int32_t count = GetField32(OFFSET_OF_OBJECT_MEMBER(String, count_));
+  int32_t count = GetLength();
   if (UNLIKELY((index < 0) || (index >= count))) {
-    Thread* self = Thread::Current();
-    self->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
-                             "length=%i; index=%i", count, index);
+    ThrowStringIndexOutOfBoundsException(index, count);
     return 0;
   }
-  return GetValue()[index];
+  if (IsCompressed()) {
+    return GetValueCompressed()[index];
+  } else {
+    return GetValue()[index];
+  }
+}
+
+template <typename MemoryType>
+int32_t String::FastIndexOf(MemoryType* chars, int32_t ch, int32_t start) {
+  const MemoryType* p = chars + start;
+  const MemoryType* end = chars + GetLength();
+  while (p < end) {
+    if (*p++ == ch) {
+      return (p - 1) - chars;
+    }
+  }
+  return -1;
 }
 
 template<VerifyObjectFlags kVerifyFlags>
 inline size_t String::SizeOf() {
-  size_t size = sizeof(String) + (sizeof(uint16_t) * GetLength<kVerifyFlags>());
+  size_t size = sizeof(String);
+  if (IsCompressed()) {
+    size += (sizeof(uint8_t) * GetLength<kVerifyFlags>());
+  } else {
+    size += (sizeof(uint16_t) * GetLength<kVerifyFlags>());
+  }
   // String.equals() intrinsics assume zero-padding up to kObjectAlignment,
   // so make sure the zero-padding is actually copied around if GC compaction
   // chooses to copy only SizeOf() bytes.
@@ -153,31 +205,35 @@
 }
 
 template <bool kIsInstrumented, typename PreFenceVisitor>
-inline String* String::Alloc(Thread* self, int32_t utf16_length, gc::AllocatorType allocator_type,
+inline String* String::Alloc(Thread* self, int32_t utf16_length_with_flag,
+                             gc::AllocatorType allocator_type,
                              const PreFenceVisitor& pre_fence_visitor) {
   constexpr size_t header_size = sizeof(String);
-  static_assert(sizeof(utf16_length) <= sizeof(size_t),
+  const bool compressible = kUseStringCompression &&
+                            String::GetCompressionFlagFromCount(utf16_length_with_flag);
+  const size_t block_size = (compressible) ? sizeof(uint8_t) : sizeof(uint16_t);
+  size_t length = String::GetLengthFromCount(utf16_length_with_flag);
+  static_assert(sizeof(length) <= sizeof(size_t),
                 "static_cast<size_t>(utf16_length) must not lose bits.");
-  size_t length = static_cast<size_t>(utf16_length);
-  size_t data_size = sizeof(uint16_t) * length;
+  size_t data_size = block_size * length;
   size_t size = header_size + data_size;
   // String.equals() intrinsics assume zero-padding up to kObjectAlignment,
   // so make sure the allocator clears the padding as well.
   // http://b/23528461
   size_t alloc_size = RoundUp(size, kObjectAlignment);
-  Class* string_class = GetJavaLangString();
 
+  Class* string_class = GetJavaLangString();
   // Check for overflow and throw OutOfMemoryError if this was an unreasonable request.
   // Do this by comparing with the maximum length that will _not_ cause an overflow.
-  constexpr size_t overflow_length = (-header_size) / sizeof(uint16_t);  // Unsigned arithmetic.
-  constexpr size_t max_alloc_length = overflow_length - 1u;
+  const size_t overflow_length = (-header_size) / block_size;   // Unsigned arithmetic.
+  const size_t max_alloc_length = overflow_length - 1u;
   static_assert(IsAligned<sizeof(uint16_t)>(kObjectAlignment),
                 "kObjectAlignment must be at least as big as Java char alignment");
-  constexpr size_t max_length = RoundDown(max_alloc_length, kObjectAlignment / sizeof(uint16_t));
+  const size_t max_length = RoundDown(max_alloc_length, kObjectAlignment / block_size);
   if (UNLIKELY(length > max_length)) {
     self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow",
                                              PrettyDescriptor(string_class).c_str(),
-                                             utf16_length).c_str());
+                                             static_cast<int>(length)).c_str());
     return nullptr;
   }
 
@@ -188,11 +244,22 @@
 }
 
 template <bool kIsInstrumented>
+inline String* String::AllocEmptyString(Thread* self, gc::AllocatorType allocator_type) {
+  SetStringCountVisitor visitor(0);
+  return Alloc<kIsInstrumented>(self, 0, allocator_type, visitor);
+}
+
+template <bool kIsInstrumented>
 inline String* String::AllocFromByteArray(Thread* self, int32_t byte_length,
                                           Handle<ByteArray> array, int32_t offset,
                                           int32_t high_byte, gc::AllocatorType allocator_type) {
-  SetStringCountAndBytesVisitor visitor(byte_length, array, offset, high_byte << 8);
-  String* string = Alloc<kIsInstrumented>(self, byte_length, allocator_type, visitor);
+  const uint8_t* const src = reinterpret_cast<uint8_t*>(array->GetData()) + offset;
+  const bool compressible = kUseStringCompression && String::AllASCII<uint8_t>(src, byte_length)
+                                            && (high_byte == 0);
+  const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(byte_length)
+                                                  : byte_length;
+  SetStringCountAndBytesVisitor visitor(length_with_flag, array, offset, high_byte << 8);
+  String* string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
   return string;
 }
 
@@ -202,16 +269,24 @@
                                           gc::AllocatorType allocator_type) {
   // It is a caller error to have a count less than the actual array's size.
   DCHECK_GE(array->GetLength(), count);
-  SetStringCountAndValueVisitorFromCharArray visitor(count, array, offset);
-  String* new_string = Alloc<kIsInstrumented>(self, count, allocator_type, visitor);
+  const bool compressible = kUseStringCompression &&
+                            String::AllASCII<uint16_t>(array->GetData() + offset, count);
+  const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(count) : count;
+  SetStringCountAndValueVisitorFromCharArray visitor(length_with_flag, array, offset);
+  String* new_string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
   return new_string;
 }
 
 template <bool kIsInstrumented>
 inline String* String::AllocFromString(Thread* self, int32_t string_length, Handle<String> string,
                                        int32_t offset, gc::AllocatorType allocator_type) {
-  SetStringCountAndValueVisitorFromString visitor(string_length, string, offset);
-  String* new_string = Alloc<kIsInstrumented>(self, string_length, allocator_type, visitor);
+  const bool compressible = kUseStringCompression &&
+      ((string->IsCompressed()) ? true : String::AllASCII<uint16_t>(string->GetValue() + offset,
+                                                                    string_length));
+  const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(string_length)
+                                                  : string_length;
+  SetStringCountAndValueVisitorFromString visitor(length_with_flag, string, offset);
+  String* new_string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
   return new_string;
 }
 
@@ -220,11 +295,28 @@
   if (UNLIKELY(result == 0)) {
     result = ComputeHashCode();
   }
-  DCHECK(result != 0 || ComputeUtf16Hash(GetValue(), GetLength()) == 0)
-      << ToModifiedUtf8() << " " << result;
+  if (kIsDebugBuild) {
+    if (IsCompressed()) {
+      DCHECK(result != 0 || ComputeUtf16Hash(GetValueCompressed(), GetLength()) == 0)
+          << ToModifiedUtf8() << " " << result;
+    } else {
+      DCHECK(result != 0 || ComputeUtf16Hash(GetValue(), GetLength()) == 0)
+          << ToModifiedUtf8() << " " << result;
+    }
+  }
   return result;
 }
 
+template<typename MemoryType>
+bool String::AllASCII(const MemoryType* const chars, const int length) {
+  for (int i = 0; i < length; ++i) {
+    if (chars[i] > 0x80) {
+      return false;
+    }
+  }
+  return true;
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 45610dc..46caa4d 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -41,15 +41,11 @@
   } else if (start > count) {
     start = count;
   }
-  const uint16_t* chars = GetValue();
-  const uint16_t* p = chars + start;
-  const uint16_t* end = chars + count;
-  while (p < end) {
-    if (*p++ == ch) {
-      return (p - 1) - chars;
-    }
+  if (IsCompressed()) {
+    return FastIndexOf<uint8_t>(GetValueCompressed(), ch, start);
+  } else {
+    return FastIndexOf<uint16_t>(GetValue(), ch, start);
   }
-  return -1;
 }
 
 void String::SetClass(Class* java_lang_String) {
@@ -65,64 +61,122 @@
 }
 
 int String::ComputeHashCode() {
-  const int32_t hash_code = ComputeUtf16Hash(GetValue(), GetLength());
+  int32_t hash_code = 0;
+  if (IsCompressed()) {
+    hash_code = ComputeUtf16Hash(GetValueCompressed(), GetLength());
+  } else {
+    hash_code = ComputeUtf16Hash(GetValue(), GetLength());
+  }
   SetHashCode(hash_code);
   return hash_code;
 }
 
 int32_t String::GetUtfLength() {
-  return CountUtf8Bytes(GetValue(), GetLength());
+  if (IsCompressed()) {
+    return GetLength();
+  } else {
+    return CountUtf8Bytes(GetValue(), GetLength());
+  }
 }
 
 void String::SetCharAt(int32_t index, uint16_t c) {
-  DCHECK((index >= 0) && (index < count_));
-  GetValue()[index] = c;
+  DCHECK((index >= 0) && (index < GetLength()));
+  if (IsCompressed()) {
+    // TODO: Handle the case where String is compressed and c is non-ASCII
+    GetValueCompressed()[index] = static_cast<uint8_t>(c);
+  } else {
+    GetValue()[index] = c;
+  }
 }
 
 String* String::AllocFromStrings(Thread* self, Handle<String> string, Handle<String> string2) {
   int32_t length = string->GetLength();
   int32_t length2 = string2->GetLength();
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
-  SetStringCountVisitor visitor(length + length2);
-  String* new_string = Alloc<true>(self, length + length2, allocator_type, visitor);
+  const bool compressible = kUseStringCompression && (string->IsCompressed() && string2->IsCompressed());
+  const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(length + length2)
+                                                  : (length + length2);
+
+  SetStringCountVisitor visitor(length_with_flag);
+  String* new_string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
   if (UNLIKELY(new_string == nullptr)) {
     return nullptr;
   }
-  uint16_t* new_value = new_string->GetValue();
-  memcpy(new_value, string->GetValue(), length * sizeof(uint16_t));
-  memcpy(new_value + length, string2->GetValue(), length2 * sizeof(uint16_t));
+  if (compressible) {
+    uint8_t* new_value = new_string->GetValueCompressed();
+    memcpy(new_value, string->GetValueCompressed(), length * sizeof(uint8_t));
+    memcpy(new_value + length, string2->GetValueCompressed(), length2 * sizeof(uint8_t));
+  } else {
+    uint16_t* new_value = new_string->GetValue();
+    if (string->IsCompressed()) {
+      for (int i = 0; i < length; ++i) {
+        new_value[i] = string->CharAt(i);
+      }
+    } else {
+      memcpy(new_value, string->GetValue(), length * sizeof(uint16_t));
+    }
+    if (string2->IsCompressed()) {
+      for (int i = 0; i < length2; ++i) {
+        new_value[i+length] = string2->CharAt(i);
+      }
+    } else {
+      memcpy(new_value + length, string2->GetValue(), length2 * sizeof(uint16_t));
+    }
+  }
   return new_string;
 }
 
 String* String::AllocFromUtf16(Thread* self, int32_t utf16_length, const uint16_t* utf16_data_in) {
   CHECK(utf16_data_in != nullptr || utf16_length == 0);
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
-  SetStringCountVisitor visitor(utf16_length);
-  String* string = Alloc<true>(self, utf16_length, allocator_type, visitor);
+  const bool compressible = kUseStringCompression &&
+                            String::AllASCII<uint16_t>(utf16_data_in, utf16_length);
+  int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(utf16_length)
+                                            : utf16_length;
+  SetStringCountVisitor visitor(length_with_flag);
+  String* string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
   if (UNLIKELY(string == nullptr)) {
     return nullptr;
   }
-  uint16_t* array = string->GetValue();
-  memcpy(array, utf16_data_in, utf16_length * sizeof(uint16_t));
+  if (compressible) {
+    for (int i = 0; i < utf16_length; ++i) {
+      string->GetValueCompressed()[i] = static_cast<uint8_t>(utf16_data_in[i]);
+    }
+  } else {
+    uint16_t* array = string->GetValue();
+    memcpy(array, utf16_data_in, utf16_length * sizeof(uint16_t));
+  }
   return string;
 }
 
 String* String::AllocFromModifiedUtf8(Thread* self, const char* utf) {
   DCHECK(utf != nullptr);
-  size_t char_count = CountModifiedUtf8Chars(utf);
-  return AllocFromModifiedUtf8(self, char_count, utf);
+  size_t byte_count = strlen(utf);
+  size_t char_count = CountModifiedUtf8Chars(utf, byte_count);
+  return AllocFromModifiedUtf8(self, char_count, utf, byte_count);
+}
+
+String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in) {
+  return AllocFromModifiedUtf8(self, utf16_length, utf8_data_in, strlen(utf8_data_in));
 }
 
 String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
-                                      const char* utf8_data_in) {
+                                      const char* utf8_data_in, int32_t utf8_length) {
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
-  SetStringCountVisitor visitor(utf16_length);
-  String* string = Alloc<true>(self, utf16_length, allocator_type, visitor);
+  const bool compressible = kUseStringCompression && (utf16_length == utf8_length);
+  const int32_t utf16_length_with_flag = (compressible) ? String::GetFlaggedCount(utf16_length)
+                                                        : utf16_length;
+  SetStringCountVisitor visitor(utf16_length_with_flag);
+  String* string = Alloc<true>(self, utf16_length_with_flag, allocator_type, visitor);
   if (UNLIKELY(string == nullptr)) {
     return nullptr;
   }
-  uint16_t* utf16_data_out = string->GetValue();
-  ConvertModifiedUtf8ToUtf16(utf16_data_out, utf8_data_in);
+  if (compressible) {
+    memcpy(string->GetValueCompressed(), utf8_data_in, utf16_length * sizeof(uint8_t));
+  } else {
+    uint16_t* utf16_data_out = string->GetValue();
+    ConvertModifiedUtf8ToUtf16(utf16_data_out, utf16_length, utf8_data_in, utf8_length);
+  }
   return string;
 }
 
@@ -214,10 +268,16 @@
 
 // Create a modified UTF-8 encoded std::string from a java/lang/String object.
 std::string String::ToModifiedUtf8() {
-  const uint16_t* chars = GetValue();
   size_t byte_count = GetUtfLength();
   std::string result(byte_count, static_cast<char>(0));
-  ConvertUtf16ToModifiedUtf8(&result[0], chars, GetLength());
+  if (IsCompressed()) {
+    for (size_t i = 0; i < byte_count; ++i) {
+      result[i] = static_cast<char>(CharAt(i));
+    }
+  } else {
+    const uint16_t* chars = GetValue();
+    ConvertUtf16ToModifiedUtf8(&result[0], byte_count, chars, GetLength());
+  }
   return result;
 }
 
@@ -237,11 +297,24 @@
   int32_t rhsCount = rhs->GetLength();
   int32_t countDiff = lhsCount - rhsCount;
   int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
-  const uint16_t* lhsChars = lhs->GetValue();
-  const uint16_t* rhsChars = rhs->GetValue();
-  int32_t otherRes = MemCmp16(lhsChars, rhsChars, minCount);
-  if (otherRes != 0) {
-    return otherRes;
+  if (lhs->IsCompressed() && rhs->IsCompressed()) {
+    int32_t comparison = memcmp(lhs->GetValueCompressed(), rhs->GetValueCompressed(), minCount * sizeof(uint8_t));
+    if (comparison != 0) {
+      return comparison;
+    }
+  } else if (lhs->IsCompressed() || rhs->IsCompressed()) {
+    for (int32_t i = 0; i < minCount; ++i) {
+      if (lhs->CharAt(i) != rhs->CharAt(i)) {
+        return static_cast<int32_t>(lhs->CharAt(i)) - static_cast<int32_t>(rhs->CharAt(i));
+      }
+    }
+  } else {
+    const uint16_t* lhsChars = lhs->GetValue();
+    const uint16_t* rhsChars = rhs->GetValue();
+    int32_t otherRes = MemCmp16(lhsChars, rhsChars, minCount);
+    if (otherRes != 0) {
+      return otherRes;
+    }
   }
   return countDiff;
 }
@@ -254,14 +327,35 @@
   StackHandleScope<1> hs(self);
   Handle<String> string(hs.NewHandle(this));
   CharArray* result = CharArray::Alloc(self, GetLength());
-  memcpy(result->GetData(), string->GetValue(), string->GetLength() * sizeof(uint16_t));
+  if (result != nullptr) {
+    if (string->IsCompressed()) {
+      int32_t length = string->GetLength();
+      for (int i = 0; i < length; ++i) {
+        result->GetData()[i] = string->CharAt(i);
+      }
+    } else {
+      memcpy(result->GetData(), string->GetValue(), string->GetLength() * sizeof(uint16_t));
+    }
+  } else {
+    self->AssertPendingOOMException();
+  }
   return result;
 }
 
 void String::GetChars(int32_t start, int32_t end, Handle<CharArray> array, int32_t index) {
   uint16_t* data = array->GetData() + index;
-  uint16_t* value = GetValue() + start;
-  memcpy(data, value, (end - start) * sizeof(uint16_t));
+  if (IsCompressed()) {
+    for (int i = start; i < end; ++i) {
+      data[i-start] = CharAt(i);
+    }
+  } else {
+    uint16_t* value = GetValue() + start;
+    memcpy(data, value, (end - start) * sizeof(uint16_t));
+  }
+}
+
+bool String::IsValueNull() {
+  return (IsCompressed()) ? (GetValueCompressed() == nullptr) : (GetValue() == nullptr);
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index fbee2d7..8695fe8 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -27,14 +27,18 @@
 template<class T> class Handle;
 struct StringOffsets;
 class StringPiece;
+class StubTest_ReadBarrierForRoot_Test;
 
 namespace mirror {
 
+// String Compression
+static constexpr bool kUseStringCompression = false;
+
 // C++ mirror of java.lang.String
 class MANAGED String FINAL : public Object {
  public:
   // Size of java.lang.String.class.
-  static uint32_t ClassSize(size_t pointer_size);
+  static uint32_t ClassSize(PointerSize pointer_size);
 
   // Size of an instance of java.lang.String not including its value array.
   static constexpr uint32_t InstanceSize() {
@@ -53,18 +57,28 @@
     return &value_[0];
   }
 
+  uint8_t* GetValueCompressed() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return &value_compressed_[0];
+  }
+
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   size_t SizeOf() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Taking out the first/uppermost bit because it is not part of actual length value
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   int32_t GetLength() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return GetLengthFromCount(GetCount<kVerifyFlags>());
+  }
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  int32_t GetCount() SHARED_REQUIRES(Locks::mutator_lock_) {
     return GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(String, count_));
   }
 
   void SetCount(int32_t new_count) SHARED_REQUIRES(Locks::mutator_lock_) {
     // Count is invariant so use non-transactional mode. Also disable check as we may run inside
     // a transaction.
-    DCHECK_LE(0, new_count);
+    DCHECK_LE(0, (new_count & INT32_MAX));
     SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, count_), new_count);
   }
 
@@ -81,12 +95,6 @@
 
   String* Intern() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template <bool kIsInstrumented, typename PreFenceVisitor>
-  ALWAYS_INLINE static String* Alloc(Thread* self, int32_t utf16_length,
-                                     gc::AllocatorType allocator_type,
-                                     const PreFenceVisitor& pre_fence_visitor)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
-
   template <bool kIsInstrumented>
   ALWAYS_INLINE static String* AllocFromByteArray(Thread* self, int32_t byte_length,
                                                   Handle<ByteArray> array, int32_t offset,
@@ -106,6 +114,11 @@
                                                gc::AllocatorType allocator_type)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
+  template <bool kIsInstrumented>
+  ALWAYS_INLINE static String* AllocEmptyString(Thread* self,
+                                                gc::AllocatorType allocator_type)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+
   static String* AllocFromStrings(Thread* self, Handle<String> string, Handle<String> string2)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
@@ -115,6 +128,10 @@
   static String* AllocFromModifiedUtf8(Thread* self, const char* utf)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
+  static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
+                                       const char* utf8_data_in, int32_t utf8_length)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+
   static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
@@ -144,6 +161,10 @@
 
   int32_t FastIndexOf(int32_t ch, int32_t start) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template <typename MemoryType>
+  int32_t FastIndexOf(MemoryType* chars, int32_t ch, int32_t start)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   int32_t CompareTo(String* other) SHARED_REQUIRES(Locks::mutator_lock_);
 
   CharArray* ToCharArray(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_)
@@ -152,6 +173,28 @@
   void GetChars(int32_t start, int32_t end, Handle<CharArray> array, int32_t index)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool IsCompressed() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return kUseStringCompression && GetCompressionFlagFromCount(GetCount());
+  }
+
+  bool IsValueNull() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<typename MemoryType>
+  static bool AllASCII(const MemoryType* const chars, const int length);
+
+  ALWAYS_INLINE static bool GetCompressionFlagFromCount(const int32_t count) {
+    return kUseStringCompression && ((count & (1u << 31)) != 0);
+  }
+
+  ALWAYS_INLINE static int32_t GetLengthFromCount(const int32_t count) {
+    return kUseStringCompression ? (count & INT32_MAX) : count;
+  }
+
+  ALWAYS_INLINE static int32_t GetFlaggedCount(const int32_t count) {
+    return kUseStringCompression ? (count | (1u << 31)) : count;
+  }
+
   static Class* GetJavaLangString() SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(!java_lang_String_.IsNull());
     return java_lang_String_.Read();
@@ -169,17 +212,29 @@
     SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), new_hash_code);
   }
 
+  template <bool kIsInstrumented, typename PreFenceVisitor>
+  ALWAYS_INLINE static String* Alloc(Thread* self, int32_t utf16_length_with_flag,
+                                     gc::AllocatorType allocator_type,
+                                     const PreFenceVisitor& pre_fence_visitor)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
+  // First bit (uppermost/leftmost) is taken out for Compressed/Uncompressed flag
+  // [0] Uncompressed: string uses 16-bit memory | [1] Compressed: 8-bit memory
   int32_t count_;
 
   uint32_t hash_code_;
 
-  uint16_t value_[0];
+  // Compression of all-ASCII into 8-bit memory leads to usage one of these fields
+  union {
+    uint16_t value_[0];
+    uint8_t value_compressed_[0];
+  };
 
   static GcRoot<Class> java_lang_String_;
 
   friend struct art::StringOffsets;  // for verifying offset information
-  ART_FRIEND_TEST(ObjectTest, StringLength);  // for SetOffset and SetCount
+  ART_FRIEND_TEST(art::StubTest, ReadBarrierForRoot);  // For java_lang_String_.
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(String);
 };
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index e215994..0bccc8b 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -17,6 +17,7 @@
 #include "throwable.h"
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "class-inl.h"
 #include "dex_file-inl.h"
 #include "gc/accounting/card_table-inl.h"
@@ -56,9 +57,9 @@
 void Throwable::SetStackState(Object* state) SHARED_REQUIRES(Locks::mutator_lock_) {
   CHECK(state != nullptr);
   if (Runtime::Current()->IsActiveTransaction()) {
-    SetFieldObjectVolatile<true>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_state_), state);
+    SetFieldObjectVolatile<true>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_), state);
   } else {
-    SetFieldObjectVolatile<false>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_state_), state);
+    SetFieldObjectVolatile<false>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_), state);
   }
 }
 
@@ -106,7 +107,7 @@
     if (depth == 0) {
       result += "(Throwable with empty stack trace)";
     } else {
-      const size_t ptr_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+      const PointerSize ptr_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
       for (int32_t i = 0; i < depth; ++i) {
         ArtMethod* method = method_trace->GetElementPtrSize<ArtMethod*>(i, ptr_size);
         uintptr_t dex_pc = method_trace->GetElementPtrSize<uintptr_t>(i + depth, ptr_size);
diff --git a/runtime/mirror/throwable.h b/runtime/mirror/throwable.h
index 0f488dc..6aacc8d 100644
--- a/runtime/mirror/throwable.h
+++ b/runtime/mirror/throwable.h
@@ -60,16 +60,16 @@
 
  private:
   Object* GetStackState() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_state_));
+    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
   }
   Object* GetStackTrace() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_trace_));
+    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
   }
 
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
+  HeapReference<Object> backtrace_;  // Note this is Java volatile:
   HeapReference<Throwable> cause_;
   HeapReference<String> detail_message_;
-  HeapReference<Object> stack_state_;  // Note this is Java volatile:
   HeapReference<Object> stack_trace_;
   HeapReference<Object> suppressed_exceptions_;
 
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index 116cbe9..fd7a125 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -42,14 +42,31 @@
 
 static constexpr uint32_t kAccJavaFlagsMask = 0xffff;  // bits set from Java sources (low 16)
 
-static constexpr uint32_t kAccConstructor =          0x00010000;  // method (dex only) <(cl)init>
-static constexpr uint32_t kAccDeclaredSynchronized = 0x00020000;  // method (dex only)
-static constexpr uint32_t kAccClassIsProxy =         0x00040000;  // class  (dex only)
-static constexpr uint32_t kAccPreverified =          0x00080000;  // class (runtime),
-                                                                  // method (dex only)
-static constexpr uint32_t kAccFastNative =           0x00080000;  // method (dex only)
-static constexpr uint32_t kAccMiranda =              0x00200000;  // method (dex only)
-static constexpr uint32_t kAccDefault =              0x00400000;  // method (runtime)
+static constexpr uint32_t kAccConstructor =           0x00010000;  // method (dex only) <(cl)init>
+static constexpr uint32_t kAccDeclaredSynchronized =  0x00020000;  // method (dex only)
+static constexpr uint32_t kAccClassIsProxy =          0x00040000;  // class  (dex only)
+// Used by a method to denote that its execution does not need to go through slow path interpreter.
+static constexpr uint32_t kAccSkipAccessChecks =      0x00080000;  // method (dex only)
+// Used by a class to denote that the verifier has attempted to check it at least once.
+static constexpr uint32_t kAccVerificationAttempted = 0x00080000;  // class (runtime)
+static constexpr uint32_t kAccFastNative =            0x00080000;  // method (dex only)
+// This is set by the class linker during LinkInterfaceMethods. It is used by a method to represent
+// that it was copied from its declaring class into another class. All methods marked kAccMiranda
+// and kAccDefaultConflict will have this bit set. Any kAccDefault method contained in the methods_
+// array of a concrete class will also have this bit set.
+static constexpr uint32_t kAccCopied =                0x00100000;  // method (runtime)
+static constexpr uint32_t kAccMiranda =               0x00200000;  // method (dex only)
+static constexpr uint32_t kAccDefault =               0x00400000;  // method (runtime)
+// This is set by the class linker during LinkInterfaceMethods. Prior to that point we do not know
+// if any particular method needs to be a default conflict. Used to figure out at runtime if
+// invoking this method will throw an exception.
+static constexpr uint32_t kAccDefaultConflict =       0x00800000;  // method (runtime)
+
+// Set by the verifier for a method we do not want the compiler to compile.
+static constexpr uint32_t kAccCompileDontBother =     0x01000000;  // method (runtime)
+
+// Set by the verifier for a method that could not be verified to follow structured locking.
+static constexpr uint32_t kAccMustCountLocks =        0x02000000;  // method (runtime)
 
 // Special runtime-only flags.
 // Interface and all its super-interfaces with default methods have been recursively initialized.
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 81e7e6d..e863ea9 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -16,18 +16,16 @@
 
 #include "monitor.h"
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
-
-#include <cutils/trace.h>
 #include <vector>
 
 #include "art_method-inl.h"
 #include "base/mutex.h"
 #include "base/stl_util.h"
+#include "base/systrace.h"
 #include "base/time_utils.h"
 #include "class_linker.h"
 #include "dex_file-inl.h"
-#include "dex_instruction.h"
+#include "dex_instruction-inl.h"
 #include "lock_word-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -69,19 +67,10 @@
  * at any given time.
  */
 
-bool (*Monitor::is_sensitive_thread_hook_)() = nullptr;
 uint32_t Monitor::lock_profiling_threshold_ = 0;
 
-bool Monitor::IsSensitiveThread() {
-  if (is_sensitive_thread_hook_ != nullptr) {
-    return (*is_sensitive_thread_hook_)();
-  }
-  return false;
-}
-
-void Monitor::Init(uint32_t lock_profiling_threshold, bool (*is_sensitive_thread_hook)()) {
+void Monitor::Init(uint32_t lock_profiling_threshold) {
   lock_profiling_threshold_ = lock_profiling_threshold;
-  is_sensitive_thread_hook_ = is_sensitive_thread_hook;
 }
 
 Monitor::Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
@@ -166,7 +155,7 @@
       return false;
     }
   }
-  LockWord fat(this, lw.ReadBarrierState());
+  LockWord fat(this, lw.GCState());
   // Publish the updated lock word, which may race with other threads.
   bool success = GetObject()->CasLockWordWeakSequentiallyConsistent(lw, fat);
   // Lock profiling.
@@ -226,20 +215,132 @@
   obj_ = GcRoot<mirror::Object>(object);
 }
 
+// Note: Adapted from CurrentMethodVisitor in thread.cc. We must not resolve here.
+
+struct NthCallerWithDexPcVisitor FINAL : public StackVisitor {
+  explicit NthCallerWithDexPcVisitor(Thread* thread, size_t frame)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        method_(nullptr),
+        dex_pc_(0),
+        current_frame_number_(0),
+        wanted_frame_number_(frame) {}
+  bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtMethod* m = GetMethod();
+    if (m == nullptr || m->IsRuntimeMethod()) {
+      // Runtime method, upcall, or resolution issue. Skip.
+      return true;
+    }
+
+    // Is this the requested frame?
+    if (current_frame_number_ == wanted_frame_number_) {
+      method_ = m;
+      dex_pc_ = GetDexPc(false /* abort_on_error*/);
+      return false;
+    }
+
+    // Look for more.
+    current_frame_number_++;
+    return true;
+  }
+
+  ArtMethod* method_;
+  uint32_t dex_pc_;
+
+ private:
+  size_t current_frame_number_;
+  const size_t wanted_frame_number_;
+};
+
+// This function is inlined and just helps to not have the VLOG and ATRACE check at all the
+// potential tracing points.
+void Monitor::AtraceMonitorLock(Thread* self, mirror::Object* obj, bool is_wait) {
+  if (UNLIKELY(VLOG_IS_ON(systrace_lock_logging) && ATRACE_ENABLED())) {
+    AtraceMonitorLockImpl(self, obj, is_wait);
+  }
+}
+
+void Monitor::AtraceMonitorLockImpl(Thread* self, mirror::Object* obj, bool is_wait) {
+  // Wait() requires a deeper call stack to be useful. Otherwise you'll see "Waiting at
+  // Object.java". Assume that we'll wait a nontrivial amount, so it's OK to do a longer
+  // stack walk than if !is_wait.
+  NthCallerWithDexPcVisitor visitor(self, is_wait ? 1U : 0U);
+  visitor.WalkStack(false);
+  const char* prefix = is_wait ? "Waiting on " : "Locking ";
+
+  const char* filename;
+  int32_t line_number;
+  TranslateLocation(visitor.method_, visitor.dex_pc_, &filename, &line_number);
+
+  // It would be nice to have a stable "ID" for the object here. However, the only stable thing
+  // would be the identity hashcode. But we cannot use IdentityHashcode here: For one, there are
+  // times when it is unsafe to make that call (see stack dumping for an explanation). More
+  // importantly, we would have to give up on thin-locking when adding systrace locks, as the
+  // identity hashcode is stored in the lockword normally (so can't be used with thin-locks).
+  //
+  // Because of thin-locks we also cannot use the monitor id (as there is no monitor). Monitor ids
+  // also do not have to be stable, as the monitor may be deflated.
+  std::string tmp = StringPrintf("%s %d at %s:%d",
+      prefix,
+      (obj == nullptr ? -1 : static_cast<int32_t>(reinterpret_cast<uintptr_t>(obj))),
+      (filename != nullptr ? filename : "null"),
+      line_number);
+  ATRACE_BEGIN(tmp.c_str());
+}
+
+void Monitor::AtraceMonitorUnlock() {
+  if (UNLIKELY(VLOG_IS_ON(systrace_lock_logging))) {
+    ATRACE_END();
+  }
+}
+
+std::string Monitor::PrettyContentionInfo(const std::string& owner_name,
+                                          pid_t owner_tid,
+                                          ArtMethod* owners_method,
+                                          uint32_t owners_dex_pc,
+                                          size_t num_waiters) {
+  const char* owners_filename;
+  int32_t owners_line_number = 0;
+  if (owners_method != nullptr) {
+    TranslateLocation(owners_method, owners_dex_pc, &owners_filename, &owners_line_number);
+  }
+  std::ostringstream oss;
+  oss << "monitor contention with owner " << owner_name << " (" << owner_tid << ")";
+  if (owners_method != nullptr) {
+    oss << " at " << PrettyMethod(owners_method);
+    oss << "(" << owners_filename << ":" << owners_line_number << ")";
+  }
+  oss << " waiters=" << num_waiters;
+  return oss.str();
+}
+
+bool Monitor::TryLockLocked(Thread* self) {
+  if (owner_ == nullptr) {  // Unowned.
+    owner_ = self;
+    CHECK_EQ(lock_count_, 0);
+    // When debugging, save the current monitor holder for future
+    // acquisition failures to use in sampled logging.
+    if (lock_profiling_threshold_ != 0) {
+      locking_method_ = self->GetCurrentMethod(&locking_dex_pc_);
+    }
+  } else if (owner_ == self) {  // Recursive.
+    lock_count_++;
+  } else {
+    return false;
+  }
+  AtraceMonitorLock(self, GetObject(), false /* is_wait */);
+  return true;
+}
+
+bool Monitor::TryLock(Thread* self) {
+  MutexLock mu(self, monitor_lock_);
+  return TryLockLocked(self);
+}
+
 void Monitor::Lock(Thread* self) {
   MutexLock mu(self, monitor_lock_);
   while (true) {
-    if (owner_ == nullptr) {  // Unowned.
-      owner_ = self;
-      CHECK_EQ(lock_count_, 0);
-      // When debugging, save the current monitor holder for future
-      // acquisition failures to use in sampled logging.
-      if (lock_profiling_threshold_ != 0) {
-        locking_method_ = self->GetCurrentMethod(&locking_dex_pc_);
-      }
-      return;
-    } else if (owner_ == self) {  // Recursive.
-      lock_count_++;
+    if (TryLockLocked(self)) {
       return;
     }
     // Contended.
@@ -253,36 +354,86 @@
     monitor_lock_.Unlock(self);  // Let go of locks in order.
     self->SetMonitorEnterObject(GetObject());
     {
+      uint32_t original_owner_thread_id = 0u;
       ScopedThreadStateChange tsc(self, kBlocked);  // Change to blocked and give up mutator_lock_.
-      // Reacquire monitor_lock_ without mutator_lock_ for Wait.
-      MutexLock mu2(self, monitor_lock_);
-      if (owner_ != nullptr) {  // Did the owner_ give the lock up?
-        if (ATRACE_ENABLED()) {
-          std::string name;
-          owner_->GetThreadName(name);
-          ATRACE_BEGIN(("Contended on monitor with owner " + name).c_str());
+      {
+        // Reacquire monitor_lock_ without mutator_lock_ for Wait.
+        MutexLock mu2(self, monitor_lock_);
+        if (owner_ != nullptr) {  // Did the owner_ give the lock up?
+          original_owner_thread_id = owner_->GetThreadId();
+          if (ATRACE_ENABLED()) {
+            std::ostringstream oss;
+            std::string name;
+            owner_->GetThreadName(name);
+            oss << PrettyContentionInfo(name,
+                                        owner_->GetTid(),
+                                        owners_method,
+                                        owners_dex_pc,
+                                        num_waiters);
+            // Add info for contending thread.
+            uint32_t pc;
+            ArtMethod* m = self->GetCurrentMethod(&pc);
+            const char* filename;
+            int32_t line_number;
+            TranslateLocation(m, pc, &filename, &line_number);
+            oss << " blocking from "
+                << PrettyMethod(m) << "(" << (filename != nullptr ? filename : "null") << ":"
+                << line_number << ")";
+            ATRACE_BEGIN(oss.str().c_str());
+          }
+          monitor_contenders_.Wait(self);  // Still contended so wait.
         }
-        monitor_contenders_.Wait(self);  // Still contended so wait.
+      }
+      if (original_owner_thread_id != 0u) {
         // Woken from contention.
         if (log_contention) {
-          uint64_t wait_ms = MilliTime() - wait_start_ms;
-          uint32_t sample_percent;
-          if (wait_ms >= lock_profiling_threshold_) {
-            sample_percent = 100;
-          } else {
-            sample_percent = 100 * wait_ms / lock_profiling_threshold_;
-          }
-          if (sample_percent != 0 && (static_cast<uint32_t>(rand() % 100) < sample_percent)) {
-            const char* owners_filename;
-            int32_t owners_line_number;
-            TranslateLocation(owners_method, owners_dex_pc, &owners_filename, &owners_line_number);
-            if (wait_ms > kLongWaitMs && owners_method != nullptr) {
-              LOG(WARNING) << "Long monitor contention event with owner method="
-                  << PrettyMethod(owners_method) << " from " << owners_filename << ":"
-                  << owners_line_number << " waiters=" << num_waiters << " for "
-                  << PrettyDuration(MsToNs(wait_ms));
+          uint32_t original_owner_tid = 0;
+          std::string original_owner_name;
+          {
+            MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_);
+            // Re-find the owner in case the thread got killed.
+            Thread* original_owner = Runtime::Current()->GetThreadList()->FindThreadByThreadId(
+                original_owner_thread_id);
+            // Do not do any work that requires the mutator lock.
+            if (original_owner != nullptr) {
+              original_owner_tid = original_owner->GetTid();
+              original_owner->GetThreadName(original_owner_name);
             }
-            LogContentionEvent(self, wait_ms, sample_percent, owners_filename, owners_line_number);
+          }
+
+          if (original_owner_tid != 0u) {
+            uint64_t wait_ms = MilliTime() - wait_start_ms;
+            uint32_t sample_percent;
+            if (wait_ms >= lock_profiling_threshold_) {
+              sample_percent = 100;
+            } else {
+              sample_percent = 100 * wait_ms / lock_profiling_threshold_;
+            }
+            if (sample_percent != 0 && (static_cast<uint32_t>(rand() % 100) < sample_percent)) {
+              if (wait_ms > kLongWaitMs && owners_method != nullptr) {
+                uint32_t pc;
+                ArtMethod* m = self->GetCurrentMethod(&pc);
+                // TODO: We should maybe check that original_owner is still a live thread.
+                LOG(WARNING) << "Long "
+                    << PrettyContentionInfo(original_owner_name,
+                                            original_owner_tid,
+                                            owners_method,
+                                            owners_dex_pc,
+                                            num_waiters)
+                    << " in " << PrettyMethod(m) << " for " << PrettyDuration(MsToNs(wait_ms));
+              }
+              const char* owners_filename;
+              int32_t owners_line_number;
+              TranslateLocation(owners_method,
+                                owners_dex_pc,
+                                &owners_filename,
+                                &owners_line_number);
+              LogContentionEvent(self,
+                                 wait_ms,
+                                 sample_percent,
+                                 owners_filename,
+                                 owners_line_number);
+            }
           }
         }
         ATRACE_END();
@@ -322,25 +473,34 @@
   return oss.str();
 }
 
-void Monitor::FailedUnlock(mirror::Object* o, Thread* expected_owner, Thread* found_owner,
+void Monitor::FailedUnlock(mirror::Object* o,
+                           uint32_t expected_owner_thread_id,
+                           uint32_t found_owner_thread_id,
                            Monitor* monitor) {
-  Thread* current_owner = nullptr;
+  // Acquire thread list lock so threads won't disappear from under us.
   std::string current_owner_string;
   std::string expected_owner_string;
   std::string found_owner_string;
+  uint32_t current_owner_thread_id = 0u;
   {
-    // TODO: isn't this too late to prevent threads from disappearing?
-    // Acquire thread list lock so threads won't disappear from under us.
     MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+    ThreadList* const thread_list = Runtime::Current()->GetThreadList();
+    Thread* expected_owner = thread_list->FindThreadByThreadId(expected_owner_thread_id);
+    Thread* found_owner = thread_list->FindThreadByThreadId(found_owner_thread_id);
+
     // Re-read owner now that we hold lock.
-    current_owner = (monitor != nullptr) ? monitor->GetOwner() : nullptr;
+    Thread* current_owner = (monitor != nullptr) ? monitor->GetOwner() : nullptr;
+    if (current_owner != nullptr) {
+      current_owner_thread_id = current_owner->GetThreadId();
+    }
     // Get short descriptions of the threads involved.
     current_owner_string = ThreadToString(current_owner);
-    expected_owner_string = ThreadToString(expected_owner);
-    found_owner_string = ThreadToString(found_owner);
+    expected_owner_string = expected_owner != nullptr ? ThreadToString(expected_owner) : "unnamed";
+    found_owner_string = found_owner != nullptr ? ThreadToString(found_owner) : "unnamed";
   }
-  if (current_owner == nullptr) {
-    if (found_owner == nullptr) {
+
+  if (current_owner_thread_id == 0u) {
+    if (found_owner_thread_id == 0u) {
       ThrowIllegalMonitorStateExceptionF("unlock of unowned monitor on object of type '%s'"
                                          " on thread '%s'",
                                          PrettyTypeOf(o).c_str(),
@@ -354,7 +514,7 @@
                                          expected_owner_string.c_str());
     }
   } else {
-    if (found_owner == nullptr) {
+    if (found_owner_thread_id == 0u) {
       // Race: originally there was no owner, there is now
       ThrowIllegalMonitorStateExceptionF("unlock of monitor owned by '%s' on object of type '%s'"
                                          " (originally believed to be unowned) on thread '%s'",
@@ -362,7 +522,7 @@
                                          PrettyTypeOf(o).c_str(),
                                          expected_owner_string.c_str());
     } else {
-      if (found_owner != current_owner) {
+      if (found_owner_thread_id != current_owner_thread_id) {
         // Race: originally found and current owner have changed
         ThrowIllegalMonitorStateExceptionF("unlock of monitor originally owned by '%s' (now"
                                            " owned by '%s') on object of type '%s' on thread '%s'",
@@ -383,27 +543,32 @@
 
 bool Monitor::Unlock(Thread* self) {
   DCHECK(self != nullptr);
-  MutexLock mu(self, monitor_lock_);
-  Thread* owner = owner_;
-  if (owner == self) {
-    // We own the monitor, so nobody else can be in here.
-    if (lock_count_ == 0) {
-      owner_ = nullptr;
-      locking_method_ = nullptr;
-      locking_dex_pc_ = 0;
-      // Wake a contender.
-      monitor_contenders_.Signal(self);
-    } else {
-      --lock_count_;
+  uint32_t owner_thread_id = 0u;
+  {
+    MutexLock mu(self, monitor_lock_);
+    Thread* owner = owner_;
+    if (owner != nullptr) {
+      owner_thread_id = owner->GetThreadId();
     }
-  } else {
-    // We don't own this, so we're not allowed to unlock it.
-    // The JNI spec says that we should throw IllegalMonitorStateException
-    // in this case.
-    FailedUnlock(GetObject(), self, owner, this);
-    return false;
+    if (owner == self) {
+      // We own the monitor, so nobody else can be in here.
+      AtraceMonitorUnlock();
+      if (lock_count_ == 0) {
+        owner_ = nullptr;
+        locking_method_ = nullptr;
+        locking_dex_pc_ = 0;
+        // Wake a contender.
+        monitor_contenders_.Signal(self);
+      } else {
+        --lock_count_;
+      }
+      return true;
+    }
   }
-  return true;
+  // We don't own this, so we're not allowed to unlock it.
+  // The JNI spec says that we should throw IllegalMonitorStateException in this case.
+  FailedUnlock(GetObject(), self->GetThreadId(), owner_thread_id, this);
+  return false;
 }
 
 void Monitor::Wait(Thread* self, int64_t ms, int32_t ns,
@@ -454,6 +619,11 @@
   uintptr_t saved_dex_pc = locking_dex_pc_;
   locking_dex_pc_ = 0;
 
+  AtraceMonitorUnlock();  // For the implict Unlock() just above. This will only end the deepest
+                          // nesting, but that is enough for the visualization, and corresponds to
+                          // the single Lock() we do afterwards.
+  AtraceMonitorLock(self, GetObject(), true /* is_wait */);
+
   bool was_interrupted = false;
   {
     // Update thread state. If the GC wakes up, it'll ignore us, knowing
@@ -485,10 +655,7 @@
         DCHECK(why == kTimedWaiting || why == kSleeping) << why;
         self->GetWaitConditionVariable()->TimedWait(self, ms, ns);
       }
-      if (self->IsInterruptedLocked()) {
-        was_interrupted = true;
-      }
-      self->SetInterruptedLocked(false);
+      was_interrupted = self->IsInterruptedLocked();
     }
   }
 
@@ -502,6 +669,26 @@
     self->SetWaitMonitor(nullptr);
   }
 
+  // Allocate the interrupted exception not holding the monitor lock since it may cause a GC.
+  // If the GC requires acquiring the monitor for enqueuing cleared references, this would
+  // cause a deadlock if the monitor is held.
+  if (was_interrupted && interruptShouldThrow) {
+    /*
+     * We were interrupted while waiting, or somebody interrupted an
+     * un-interruptible thread earlier and we're bailing out immediately.
+     *
+     * The doc sayeth: "The interrupted status of the current thread is
+     * cleared when this exception is thrown."
+     */
+    {
+      MutexLock mu(self, *self->GetWaitMutex());
+      self->SetInterruptedLocked(false);
+    }
+    self->ThrowNewException("Ljava/lang/InterruptedException;", nullptr);
+  }
+
+  AtraceMonitorUnlock();  // End Wait().
+
   // Re-acquire the monitor and lock.
   Lock(self);
   monitor_lock_.Lock(self);
@@ -521,23 +708,6 @@
   RemoveFromWaitSet(self);
 
   monitor_lock_.Unlock(self);
-
-  if (was_interrupted) {
-    /*
-     * We were interrupted while waiting, or somebody interrupted an
-     * un-interruptible thread earlier and we're bailing out immediately.
-     *
-     * The doc sayeth: "The interrupted status of the current thread is
-     * cleared when this exception is thrown."
-     */
-    {
-      MutexLock mu(self, *self->GetWaitMutex());
-      self->SetInterruptedLocked(false);
-    }
-    if (interruptShouldThrow) {
-      self->ThrowNewException("Ljava/lang/InterruptedException;", nullptr);
-    }
-  }
 }
 
 void Monitor::Notify(Thread* self) {
@@ -604,20 +774,21 @@
         return false;
       }
       // Deflate to a thin lock.
-      LockWord new_lw = LockWord::FromThinLockId(owner->GetThreadId(), monitor->lock_count_,
-                                                 lw.ReadBarrierState());
+      LockWord new_lw = LockWord::FromThinLockId(owner->GetThreadId(),
+                                                 monitor->lock_count_,
+                                                 lw.GCState());
       // Assume no concurrent read barrier state changes as mutators are suspended.
       obj->SetLockWord(new_lw, false);
       VLOG(monitor) << "Deflated " << obj << " to thin lock " << owner->GetTid() << " / "
           << monitor->lock_count_;
     } else if (monitor->HasHashCode()) {
-      LockWord new_lw = LockWord::FromHashCode(monitor->GetHashCode(), lw.ReadBarrierState());
+      LockWord new_lw = LockWord::FromHashCode(monitor->GetHashCode(), lw.GCState());
       // Assume no concurrent read barrier state changes as mutators are suspended.
       obj->SetLockWord(new_lw, false);
       VLOG(monitor) << "Deflated " << obj << " to hash monitor " << monitor->GetHashCode();
     } else {
       // No lock and no hash, just put an empty lock word inside the object.
-      LockWord new_lw = LockWord::FromDefault(lw.ReadBarrierState());
+      LockWord new_lw = LockWord::FromDefault(lw.GCState());
       // Assume no concurrent read barrier state changes as mutators are suspended.
       obj->SetLockWord(new_lw, false);
       VLOG(monitor) << "Deflated" << obj << " to empty lock word";
@@ -693,7 +864,7 @@
   return obj;
 }
 
-mirror::Object* Monitor::MonitorEnter(Thread* self, mirror::Object* obj) {
+mirror::Object* Monitor::MonitorEnter(Thread* self, mirror::Object* obj, bool trylock) {
   DCHECK(self != nullptr);
   DCHECK(obj != nullptr);
   self->AssertThreadSuspensionIsAllowable();
@@ -706,8 +877,9 @@
     LockWord lock_word = h_obj->GetLockWord(true);
     switch (lock_word.GetState()) {
       case LockWord::kUnlocked: {
-        LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.ReadBarrierState()));
+        LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.GCState()));
         if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, thin_locked)) {
+          AtraceMonitorLock(self, h_obj.Get(), false /* is_wait */);
           // CasLockWord enforces more than the acquire ordering we need here.
           return h_obj.Get();  // Success!
         }
@@ -719,14 +891,17 @@
           // We own the lock, increase the recursion count.
           uint32_t new_count = lock_word.ThinLockCount() + 1;
           if (LIKELY(new_count <= LockWord::kThinLockMaxCount)) {
-            LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count,
-                                                          lock_word.ReadBarrierState()));
+            LockWord thin_locked(LockWord::FromThinLockId(thread_id,
+                                                          new_count,
+                                                          lock_word.GCState()));
             if (!kUseReadBarrier) {
               h_obj->SetLockWord(thin_locked, true);
+              AtraceMonitorLock(self, h_obj.Get(), false /* is_wait */);
               return h_obj.Get();  // Success!
             } else {
               // Use CAS to preserve the read barrier state.
               if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, thin_locked)) {
+                AtraceMonitorLock(self, h_obj.Get(), false /* is_wait */);
                 return h_obj.Get();  // Success!
               }
             }
@@ -736,6 +911,9 @@
             InflateThinLocked(self, h_obj, lock_word, 0);
           }
         } else {
+          if (trylock) {
+            return nullptr;
+          }
           // Contention.
           contention_count++;
           Runtime* runtime = Runtime::Current();
@@ -754,8 +932,12 @@
       }
       case LockWord::kFatLocked: {
         Monitor* mon = lock_word.FatLockMonitor();
-        mon->Lock(self);
-        return h_obj.Get();  // Success!
+        if (trylock) {
+          return mon->TryLock(self) ? h_obj.Get() : nullptr;
+        } else {
+          mon->Lock(self);
+          return h_obj.Get();  // Success!
+        }
       }
       case LockWord::kHashCode:
         // Inflate with the existing hashcode.
@@ -763,7 +945,7 @@
         continue;  // Start from the beginning.
       default: {
         LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
-        return h_obj.Get();
+        UNREACHABLE();
       }
     }
   }
@@ -782,34 +964,33 @@
       case LockWord::kHashCode:
         // Fall-through.
       case LockWord::kUnlocked:
-        FailedUnlock(h_obj.Get(), self, nullptr, nullptr);
+        FailedUnlock(h_obj.Get(), self->GetThreadId(), 0u, nullptr);
         return false;  // Failure.
       case LockWord::kThinLocked: {
         uint32_t thread_id = self->GetThreadId();
         uint32_t owner_thread_id = lock_word.ThinLockOwner();
         if (owner_thread_id != thread_id) {
-          // TODO: there's a race here with the owner dying while we unlock.
-          Thread* owner =
-              Runtime::Current()->GetThreadList()->FindThreadByThreadId(lock_word.ThinLockOwner());
-          FailedUnlock(h_obj.Get(), self, owner, nullptr);
+          FailedUnlock(h_obj.Get(), thread_id, owner_thread_id, nullptr);
           return false;  // Failure.
         } else {
           // We own the lock, decrease the recursion count.
           LockWord new_lw = LockWord::Default();
           if (lock_word.ThinLockCount() != 0) {
             uint32_t new_count = lock_word.ThinLockCount() - 1;
-            new_lw = LockWord::FromThinLockId(thread_id, new_count, lock_word.ReadBarrierState());
+            new_lw = LockWord::FromThinLockId(thread_id, new_count, lock_word.GCState());
           } else {
-            new_lw = LockWord::FromDefault(lock_word.ReadBarrierState());
+            new_lw = LockWord::FromDefault(lock_word.GCState());
           }
           if (!kUseReadBarrier) {
             DCHECK_EQ(new_lw.ReadBarrierState(), 0U);
             h_obj->SetLockWord(new_lw, true);
+            AtraceMonitorUnlock();
             // Success!
             return true;
           } else {
             // Use CAS to preserve the read barrier state.
             if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, new_lw)) {
+              AtraceMonitorUnlock();
               // Success!
               return true;
             }
@@ -1034,15 +1215,15 @@
   for (uint32_t monitor_dex_pc : monitor_enter_dex_pcs) {
     // The verifier works in terms of the dex pcs of the monitor-enter instructions.
     // We want the registers used by those instructions (so we can read the values out of them).
-    uint16_t monitor_enter_instruction = code_item->insns_[monitor_dex_pc];
+    const Instruction* monitor_enter_instruction =
+        Instruction::At(&code_item->insns_[monitor_dex_pc]);
 
     // Quick sanity check.
-    if ((monitor_enter_instruction & 0xff) != Instruction::MONITOR_ENTER) {
-      LOG(FATAL) << "expected monitor-enter @" << monitor_dex_pc << "; was "
-                 << reinterpret_cast<void*>(monitor_enter_instruction);
-    }
+    CHECK_EQ(monitor_enter_instruction->Opcode(), Instruction::MONITOR_ENTER)
+      << "expected monitor-enter @" << monitor_dex_pc << "; was "
+      << reinterpret_cast<const void*>(monitor_enter_instruction);
 
-    uint16_t monitor_register = ((monitor_enter_instruction >> 8) & 0xff);
+    uint16_t monitor_register = monitor_enter_instruction->VRegA();
     uint32_t value;
     bool success = stack_visitor->GetVReg(m, monitor_register, kReferenceVReg, &value);
     CHECK(success) << "Failed to read v" << monitor_register << " of kind "
@@ -1085,8 +1266,10 @@
   return owner_ != nullptr;
 }
 
-void Monitor::TranslateLocation(ArtMethod* method, uint32_t dex_pc,
-                                const char** source_file, int32_t* line_number) const {
+void Monitor::TranslateLocation(ArtMethod* method,
+                                uint32_t dex_pc,
+                                const char** source_file,
+                                int32_t* line_number) {
   // If method is null, location is unknown
   if (method == nullptr) {
     *source_file = "";
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 707d0f1..1d829e1 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -55,15 +55,14 @@
 
   ~Monitor();
 
-  static bool IsSensitiveThread();
-  static void Init(uint32_t lock_profiling_threshold, bool (*is_sensitive_thread_hook)());
+  static void Init(uint32_t lock_profiling_threshold);
 
   // Return the thread id of the lock owner or 0 when there is no owner.
   static uint32_t GetLockOwnerThreadId(mirror::Object* obj)
       NO_THREAD_SAFETY_ANALYSIS;  // TODO: Reading lock owner without holding lock is racy.
 
   // NO_THREAD_SAFETY_ANALYSIS for mon->Lock.
-  static mirror::Object* MonitorEnter(Thread* thread, mirror::Object* obj)
+  static mirror::Object* MonitorEnter(Thread* thread, mirror::Object* obj, bool trylock)
       EXCLUSIVE_LOCK_FUNCTION(obj)
       NO_THREAD_SAFETY_ANALYSIS
       REQUIRES(!Roles::uninterruptible_)
@@ -186,9 +185,21 @@
                           const char* owner_filename, int32_t owner_line_number)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static void FailedUnlock(mirror::Object* obj, Thread* expected_owner, Thread* found_owner,
+  static void FailedUnlock(mirror::Object* obj,
+                           uint32_t expected_owner_thread_id,
+                           uint32_t found_owner_thread_id,
                            Monitor* mon)
-      REQUIRES(!Locks::thread_list_lock_)
+      REQUIRES(!Locks::thread_list_lock_,
+               !monitor_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to lock without blocking, returns true if we acquired the lock.
+  bool TryLock(Thread* self)
+      REQUIRES(!monitor_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  // Variant for already holding the monitor lock.
+  bool TryLockLocked(Thread* self)
+      REQUIRES(monitor_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void Lock(Thread* self)
@@ -209,6 +220,13 @@
       REQUIRES(!monitor_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  static std::string PrettyContentionInfo(const std::string& owner_name,
+                                          pid_t owner_tid,
+                                          ArtMethod* owners_method,
+                                          uint32_t owners_dex_pc,
+                                          size_t num_waiters)
+      REQUIRES(!Locks::thread_list_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Wait on a monitor until timeout, interrupt, or notification.  Used for Object.wait() and
   // (somewhat indirectly) Thread.sleep() and Thread.join().
@@ -234,13 +252,24 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Translates the provided method and pc into its declaring class' source file and line number.
-  void TranslateLocation(ArtMethod* method, uint32_t pc,
-                         const char** source_file, int32_t* line_number) const
+  static void TranslateLocation(ArtMethod* method, uint32_t pc,
+                                const char** source_file,
+                                int32_t* line_number)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   uint32_t GetOwnerThreadId() REQUIRES(!monitor_lock_);
 
-  static bool (*is_sensitive_thread_hook_)();
+  // Support for systrace output of monitor operations.
+  ALWAYS_INLINE static void AtraceMonitorLock(Thread* self,
+                                              mirror::Object* obj,
+                                              bool is_wait)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  static void AtraceMonitorLockImpl(Thread* self,
+                                    mirror::Object* obj,
+                                    bool is_wait)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  ALWAYS_INLINE static void AtraceMonitorUnlock();
+
   static uint32_t lock_profiling_threshold_;
 
   Mutex monitor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
diff --git a/runtime/monitor_android.cc b/runtime/monitor_android.cc
index 82ef2d8..671cb60 100644
--- a/runtime/monitor_android.cc
+++ b/runtime/monitor_android.cc
@@ -66,7 +66,7 @@
   cp = EventLogWriteString(cp, procName, len);
 
   // Emit the sensitive thread ("main thread") status, 5 bytes.
-  cp = EventLogWriteInt(cp, Monitor::IsSensitiveThread());
+  cp = EventLogWriteInt(cp, Thread::IsSensitiveThread());
 
   // Emit self thread name string, <= 37 bytes.
   std::string thread_name;
diff --git a/runtime/monitor_pool.cc b/runtime/monitor_pool.cc
index 2832e32..a47a4b2 100644
--- a/runtime/monitor_pool.cc
+++ b/runtime/monitor_pool.cc
@@ -28,7 +28,11 @@
 }  // namespace mirror
 
 MonitorPool::MonitorPool()
-    : num_chunks_(0), capacity_(0), first_free_(nullptr) {
+    : current_chunk_list_index_(0), num_chunks_(0), current_chunk_list_capacity_(0),
+    first_free_(nullptr) {
+  for (size_t i = 0; i < kMaxChunkLists; ++i) {
+    monitor_chunks_[i] = nullptr;  // Not absolutely required, but ...
+  }
   AllocateChunk();  // Get our first chunk.
 }
 
@@ -37,23 +41,19 @@
 void MonitorPool::AllocateChunk() {
   DCHECK(first_free_ == nullptr);
 
-  // Do we need to resize?
-  if (num_chunks_ == capacity_) {
-    if (capacity_ == 0U) {
-      // Initialization.
-      capacity_ = kInitialChunkStorage;
-      uintptr_t* new_backing = new uintptr_t[capacity_];
-      monitor_chunks_.StoreRelaxed(new_backing);
-    } else {
-      size_t new_capacity = 2 * capacity_;
-      uintptr_t* new_backing = new uintptr_t[new_capacity];
-      uintptr_t* old_backing = monitor_chunks_.LoadRelaxed();
-      memcpy(new_backing, old_backing, sizeof(uintptr_t) * capacity_);
-      monitor_chunks_.StoreRelaxed(new_backing);
-      capacity_ = new_capacity;
-      old_chunk_arrays_.push_back(old_backing);
-      VLOG(monitor) << "Resizing to capacity " << capacity_;
-    }
+  // Do we need to allocate another chunk list?
+  if (num_chunks_ == current_chunk_list_capacity_) {
+    if (current_chunk_list_capacity_ != 0U) {
+      ++current_chunk_list_index_;
+      CHECK_LT(current_chunk_list_index_, kMaxChunkLists) << "Out of space for inflated monitors";
+      VLOG(monitor) << "Expanding to capacity "
+          << 2 * ChunkListCapacity(current_chunk_list_index_) - kInitialChunkStorage;
+    }  // else we're initializing
+    current_chunk_list_capacity_ = ChunkListCapacity(current_chunk_list_index_);
+    uintptr_t* new_list = new uintptr_t[current_chunk_list_capacity_]();
+    DCHECK(monitor_chunks_[current_chunk_list_index_] == nullptr);
+    monitor_chunks_[current_chunk_list_index_] = new_list;
+    num_chunks_ = 0;
   }
 
   // Allocate the chunk.
@@ -64,7 +64,7 @@
   CHECK_EQ(0U, reinterpret_cast<uintptr_t>(chunk) % kMonitorAlignment);
 
   // Add the chunk.
-  *(monitor_chunks_.LoadRelaxed() + num_chunks_) = reinterpret_cast<uintptr_t>(chunk);
+  monitor_chunks_[current_chunk_list_index_][num_chunks_] = reinterpret_cast<uintptr_t>(chunk);
   num_chunks_++;
 
   // Set up the free list
@@ -72,8 +72,8 @@
                                              (kChunkCapacity - 1) * kAlignedMonitorSize);
   last->next_free_ = nullptr;
   // Eagerly compute id.
-  last->monitor_id_ = OffsetToMonitorId((num_chunks_ - 1) * kChunkSize +
-                                        (kChunkCapacity - 1) * kAlignedMonitorSize);
+  last->monitor_id_ = OffsetToMonitorId(current_chunk_list_index_* (kMaxListSize * kChunkSize)
+      + (num_chunks_ - 1) * kChunkSize + (kChunkCapacity - 1) * kAlignedMonitorSize);
   for (size_t i = 0; i < kChunkCapacity - 1; ++i) {
     Monitor* before = reinterpret_cast<Monitor*>(reinterpret_cast<uintptr_t>(last) -
                                                  kAlignedMonitorSize);
@@ -88,6 +88,23 @@
   first_free_ = last;
 }
 
+void MonitorPool::FreeInternal() {
+  // This is on shutdown with NO_THREAD_SAFETY_ANALYSIS, can't/don't need to lock.
+  DCHECK_NE(current_chunk_list_capacity_, 0UL);
+  for (size_t i = 0; i <= current_chunk_list_index_; ++i) {
+    DCHECK_NE(monitor_chunks_[i], static_cast<uintptr_t*>(nullptr));
+    for (size_t j = 0; j < ChunkListCapacity(i); ++j) {
+      if (i < current_chunk_list_index_ || j < num_chunks_) {
+        DCHECK_NE(monitor_chunks_[i][j], 0U);
+        allocator_.deallocate(reinterpret_cast<uint8_t*>(monitor_chunks_[i][j]), kChunkSize);
+      } else {
+        DCHECK_EQ(monitor_chunks_[i][j], 0U);
+      }
+    }
+    delete[] monitor_chunks_[i];
+  }
+}
+
 Monitor* MonitorPool::CreateMonitorInPool(Thread* self, Thread* owner, mirror::Object* obj,
                                           int32_t hash_code)
     SHARED_REQUIRES(Locks::mutator_lock_) {
diff --git a/runtime/monitor_pool.h b/runtime/monitor_pool.h
index 240ca61..99810e0 100644
--- a/runtime/monitor_pool.h
+++ b/runtime/monitor_pool.h
@@ -104,6 +104,12 @@
 #endif
   }
 
+  ~MonitorPool() {
+#ifdef __LP64__
+    FreeInternal();
+#endif
+  }
+
  private:
 #ifdef __LP64__
   // When we create a monitor pool, threads have not been initialized, yet, so ignore thread-safety
@@ -112,18 +118,27 @@
 
   void AllocateChunk() REQUIRES(Locks::allocated_monitor_ids_lock_);
 
+  // Release all chunks and metadata. This is done on shutdown, where threads have been destroyed,
+  // so ignore thead-safety analysis.
+  void FreeInternal() NO_THREAD_SAFETY_ANALYSIS;
+
   Monitor* CreateMonitorInPool(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ReleaseMonitorToPool(Thread* self, Monitor* monitor);
   void ReleaseMonitorsToPool(Thread* self, MonitorList::Monitors* monitors);
 
-  // Note: This is safe as we do not ever move chunks.
+  // Note: This is safe as we do not ever move chunks.  All needed entries in the monitor_chunks_
+  // data structure are read-only once we get here.  Updates happen-before this call because
+  // the lock word was stored with release semantics and we read it with acquire semantics to
+  // retrieve the id.
   Monitor* LookupMonitor(MonitorId mon_id) {
     size_t offset = MonitorIdToOffset(mon_id);
     size_t index = offset / kChunkSize;
+    size_t top_index = index / kMaxListSize;
+    size_t list_index = index % kMaxListSize;
     size_t offset_in_chunk = offset % kChunkSize;
-    uintptr_t base = *(monitor_chunks_.LoadRelaxed()+index);
+    uintptr_t base = monitor_chunks_[top_index][list_index];
     return reinterpret_cast<Monitor*>(base + offset_in_chunk);
   }
 
@@ -132,28 +147,37 @@
     return base_addr <= mon_ptr && (mon_ptr - base_addr < kChunkSize);
   }
 
-  // Note: This is safe as we do not ever move chunks.
   MonitorId ComputeMonitorIdInPool(Monitor* mon, Thread* self) {
     MutexLock mu(self, *Locks::allocated_monitor_ids_lock_);
-    for (size_t index = 0; index < num_chunks_; ++index) {
-      uintptr_t chunk_addr = *(monitor_chunks_.LoadRelaxed() + index);
-      if (IsInChunk(chunk_addr, mon)) {
-        return OffsetToMonitorId(
-            reinterpret_cast<uintptr_t>(mon) - chunk_addr + index * kChunkSize);
+    for (size_t i = 0; i <= current_chunk_list_index_; ++i) {
+      for (size_t j = 0; j < ChunkListCapacity(i); ++j) {
+        if (j >= num_chunks_ && i == current_chunk_list_index_) {
+          break;
+        }
+        uintptr_t chunk_addr = monitor_chunks_[i][j];
+        if (IsInChunk(chunk_addr, mon)) {
+          return OffsetToMonitorId(
+              reinterpret_cast<uintptr_t>(mon) - chunk_addr
+              + i * (kMaxListSize * kChunkSize) + j * kChunkSize);
+        }
       }
     }
     LOG(FATAL) << "Did not find chunk that contains monitor.";
     return 0;
   }
 
-  static size_t MonitorIdToOffset(MonitorId id) {
+  static constexpr size_t MonitorIdToOffset(MonitorId id) {
     return id << 3;
   }
 
-  static MonitorId OffsetToMonitorId(size_t offset) {
+  static constexpr MonitorId OffsetToMonitorId(size_t offset) {
     return static_cast<MonitorId>(offset >> 3);
   }
 
+  static constexpr size_t ChunkListCapacity(size_t index) {
+    return kInitialChunkStorage << index;
+  }
+
   // TODO: There are assumptions in the code that monitor addresses are 8B aligned (>>3).
   static constexpr size_t kMonitorAlignment = 8;
   // Size of a monitor, rounded up to a multiple of alignment.
@@ -164,19 +188,47 @@
   // Chunk size that is referenced in the id. We can collapse this to the actually used storage
   // in a chunk, i.e., kChunkCapacity * kAlignedMonitorSize, but this will mean proper divisions.
   static constexpr size_t kChunkSize = kPageSize;
-  // The number of initial chunks storable in monitor_chunks_. The number is large enough to make
-  // resizing unlikely, but small enough to not waste too much memory.
-  static constexpr size_t kInitialChunkStorage = 8U;
+  static_assert(IsPowerOfTwo(kChunkSize), "kChunkSize must be power of 2");
+  // The number of chunks of storage that can be referenced by the initial chunk list.
+  // The total number of usable monitor chunks is typically 255 times this number, so it
+  // should be large enough that we don't run out. We run out of address bits if it's > 512.
+  // Currently we set it a bit smaller, to save half a page per process.  We make it tiny in
+  // debug builds to catch growth errors. The only value we really expect to tune.
+  static constexpr size_t kInitialChunkStorage = kIsDebugBuild ? 1U : 256U;
+  static_assert(IsPowerOfTwo(kInitialChunkStorage), "kInitialChunkStorage must be power of 2");
+  // The number of lists, each containing pointers to storage chunks.
+  static constexpr size_t kMaxChunkLists = 8;  //  Dictated by 3 bit index. Don't increase above 8.
+  static_assert(IsPowerOfTwo(kMaxChunkLists), "kMaxChunkLists must be power of 2");
+  static constexpr size_t kMaxListSize = kInitialChunkStorage << (kMaxChunkLists - 1);
+  // We lose 3 bits in monitor id due to 3 bit monitor_chunks_ index, and gain it back from
+  // the 3 bit alignment constraint on monitors:
+  static_assert(kMaxListSize * kChunkSize < (1 << LockWord::kMonitorIdSize),
+      "Monitor id bits don't fit");
+  static_assert(IsPowerOfTwo(kMaxListSize), "kMaxListSize must be power of 2");
 
-  // List of memory chunks. Each chunk is kChunkSize.
-  Atomic<uintptr_t*> monitor_chunks_;
-  // Number of chunks stored.
+  // Array of pointers to lists (again arrays) of pointers to chunks containing monitors.
+  // Zeroth entry points to a list (array) of kInitialChunkStorage pointers to chunks.
+  // Each subsequent list as twice as large as the preceding one.
+  // Monitor Ids are interpreted as follows:
+  //     Top 3 bits (of 28): index into monitor_chunks_.
+  //     Next 16 bits: index into the chunk list, i.e. monitor_chunks_[i].
+  //     Last 9 bits: offset within chunk, expressed as multiple of kMonitorAlignment.
+  // If we set kInitialChunkStorage to 512, this would allow us to use roughly 128K chunks of
+  // monitors, which is 0.5GB of monitors.  With this maximum setting, the largest chunk list
+  // contains 64K entries, and we make full use of the available index space. With a
+  // kInitialChunkStorage value of 256, this is proportionately reduced to 0.25GB of monitors.
+  // Updates to monitor_chunks_ are guarded by allocated_monitor_ids_lock_ .
+  // No field in this entire data structure is ever updated once a monitor id whose lookup
+  // requires it has been made visible to another thread.  Thus readers never race with
+  // updates, in spite of the fact that they acquire no locks.
+  uintptr_t* monitor_chunks_[kMaxChunkLists];  //  uintptr_t is really a Monitor* .
+  // Highest currently used index in monitor_chunks_ . Used for newly allocated chunks.
+  size_t current_chunk_list_index_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
+  // Number of chunk pointers stored in monitor_chunks_[current_chunk_list_index_] so far.
   size_t num_chunks_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
-  // Number of chunks storable.
-  size_t capacity_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
-
-  // To avoid race issues when resizing, we keep all the previous arrays.
-  std::vector<uintptr_t*> old_chunk_arrays_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
+  // After the initial allocation, this is always equal to
+  // ChunkListCapacity(current_chunk_list_index_).
+  size_t current_chunk_list_capacity_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
 
   typedef TrackingAllocator<uint8_t, kAllocatorTagMonitorPool> Allocator;
   Allocator allocator_;
diff --git a/runtime/monitor_test.cc b/runtime/monitor_test.cc
index 83e0c0d..48d256c 100644
--- a/runtime/monitor_test.cc
+++ b/runtime/monitor_test.cc
@@ -26,6 +26,7 @@
 #include "handle_scope-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/string-inl.h"  // Strings are easiest to allocate
+#include "object_lock.h"
 #include "scoped_thread_state_change.h"
 #include "thread_pool.h"
 
@@ -374,4 +375,60 @@
                   "Monitor test thread pool 3");
 }
 
+class TryLockTask : public Task {
+ public:
+  explicit TryLockTask(Handle<mirror::Object> obj) : obj_(obj) {}
+
+  void Run(Thread* self) {
+    ScopedObjectAccess soa(self);
+    // Lock is held by other thread, try lock should fail.
+    ObjectTryLock<mirror::Object> lock(self, obj_);
+    EXPECT_FALSE(lock.Acquired());
+  }
+
+  void Finalize() {
+    delete this;
+  }
+
+ private:
+  Handle<mirror::Object> obj_;
+};
+
+// Test trylock in deadlock scenarios.
+TEST_F(MonitorTest, TestTryLock) {
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
+  Thread* const self = Thread::Current();
+  ThreadPool thread_pool("the pool", 2);
+  ScopedObjectAccess soa(self);
+  StackHandleScope<3> hs(self);
+  Handle<mirror::Object> obj1(
+      hs.NewHandle<mirror::Object>(mirror::String::AllocFromModifiedUtf8(self, "hello, world!")));
+  Handle<mirror::Object> obj2(
+      hs.NewHandle<mirror::Object>(mirror::String::AllocFromModifiedUtf8(self, "hello, world!")));
+  {
+    ObjectLock<mirror::Object> lock1(self, obj1);
+    ObjectLock<mirror::Object> lock2(self, obj1);
+    {
+      ObjectTryLock<mirror::Object> trylock(self, obj1);
+      EXPECT_TRUE(trylock.Acquired());
+    }
+    // Test failure case.
+    thread_pool.AddTask(self, new TryLockTask(obj1));
+    thread_pool.StartWorkers(self);
+    ScopedThreadSuspension sts(self, kSuspended);
+    thread_pool.Wait(Thread::Current(), /*do_work*/false, /*may_hold_locks*/false);
+  }
+  // Test that the trylock actually locks the object.
+  {
+    ObjectTryLock<mirror::Object> trylock(self, obj1);
+    EXPECT_TRUE(trylock.Acquired());
+    obj1->Notify(self);
+    // Since we hold the lock there should be no monitor state exeception.
+    self->AssertNoPendingException();
+  }
+  thread_pool.StopWorkers(self);
+}
+
+
 }  // namespace art
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 8b2f4d8..b2349fc 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -16,20 +16,23 @@
 
 #include "dalvik_system_DexFile.h"
 
+#include <sstream>
+
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/stringprintf.h"
 #include "class_linker.h"
 #include "common_throws.h"
+#include "compiler_filter.h"
 #include "dex_file-inl.h"
 #include "jni_internal.h"
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
+#include "oat_file.h"
 #include "oat_file_assistant.h"
 #include "oat_file_manager.h"
 #include "os.h"
-#include "profiler.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
@@ -149,8 +152,13 @@
   void operator=(const NullableScopedUtfChars&);
 };
 
-static jobject DexFile_openDexFileNative(
-    JNIEnv* env, jclass, jstring javaSourceName, jstring javaOutputName, jint) {
+static jobject DexFile_openDexFileNative(JNIEnv* env,
+                                         jclass,
+                                         jstring javaSourceName,
+                                         jstring javaOutputName,
+                                         jint flags ATTRIBUTE_UNUSED,
+                                         jobject class_loader,
+                                         jobjectArray dex_elements) {
   ScopedUtfChars sourceName(env, javaSourceName);
   if (sourceName.c_str() == nullptr) {
     return 0;
@@ -159,7 +167,6 @@
   if (env->ExceptionCheck()) {
     return 0;
   }
-
   Runtime* const runtime = Runtime::Current();
   ClassLinker* linker = runtime->GetClassLinker();
   std::vector<std::unique_ptr<const DexFile>> dex_files;
@@ -168,6 +175,8 @@
 
   dex_files = runtime->GetOatFileManager().OpenDexFilesFromOat(sourceName.c_str(),
                                                                outputName.c_str(),
+                                                               class_loader,
+                                                               dex_elements,
                                                                /*out*/ &oat_file,
                                                                /*out*/ &error_msgs);
 
@@ -268,9 +277,7 @@
       StackHandleScope<1> hs(soa.Self());
       Handle<mirror::ClassLoader> class_loader(
           hs.NewHandle(soa.Decode<mirror::ClassLoader*>(javaLoader)));
-      class_linker->RegisterDexFile(
-          *dex_file,
-          class_linker->GetOrCreateAllocatorForClassLoader(class_loader.Get()));
+      class_linker->RegisterDexFile(*dex_file, class_loader.Get());
       mirror::Class* result = class_linker->DefineClass(soa.Self(),
                                                         descriptor.c_str(),
                                                         hash,
@@ -341,15 +348,15 @@
 
 static jint GetDexOptNeeded(JNIEnv* env,
                             const char* filename,
-                            const char* pkgname,
                             const char* instruction_set,
-                            const jboolean defer) {
+                            const char* compiler_filter_name,
+                            bool profile_changed) {
   if ((filename == nullptr) || !OS::FileExists(filename)) {
     LOG(ERROR) << "DexFile_getDexOptNeeded file '" << filename << "' does not exist";
     ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException"));
     const char* message = (filename == nullptr) ? "<empty file name>" : filename;
     env->ThrowNew(fnfe.get(), message);
-    return OatFileAssistant::kNoDexOptNeeded;
+    return -1;
   }
 
   const InstructionSet target_instruction_set = GetInstructionSetFromString(instruction_set);
@@ -357,74 +364,227 @@
     ScopedLocalRef<jclass> iae(env, env->FindClass("java/lang/IllegalArgumentException"));
     std::string message(StringPrintf("Instruction set %s is invalid.", instruction_set));
     env->ThrowNew(iae.get(), message.c_str());
-    return 0;
+    return -1;
+  }
+
+  CompilerFilter::Filter filter;
+  if (!CompilerFilter::ParseCompilerFilter(compiler_filter_name, &filter)) {
+    ScopedLocalRef<jclass> iae(env, env->FindClass("java/lang/IllegalArgumentException"));
+    std::string message(StringPrintf("Compiler filter %s is invalid.", compiler_filter_name));
+    env->ThrowNew(iae.get(), message.c_str());
+    return -1;
   }
 
   // TODO: Verify the dex location is well formed, and throw an IOException if
   // not?
 
-  OatFileAssistant oat_file_assistant(filename, target_instruction_set, false, pkgname);
+  OatFileAssistant oat_file_assistant(filename, target_instruction_set, false);
 
   // Always treat elements of the bootclasspath as up-to-date.
   if (oat_file_assistant.IsInBootClassPath()) {
     return OatFileAssistant::kNoDexOptNeeded;
   }
+  return oat_file_assistant.GetDexOptNeeded(filter, profile_changed);
+}
 
-  // TODO: Checking the profile should probably be done in the GetStatus()
-  // function. We have it here because GetStatus() should not be copying
-  // profile files. But who should be copying profile files?
-  if (oat_file_assistant.OdexFileIsOutOfDate()) {
-    // Needs recompile if profile has changed significantly.
-    if (Runtime::Current()->GetProfilerOptions().IsEnabled()) {
-      if (oat_file_assistant.IsProfileChangeSignificant()) {
-        if (!defer) {
-          oat_file_assistant.CopyProfileFile();
-        }
-        return OatFileAssistant::kDex2OatNeeded;
-      } else if (oat_file_assistant.ProfileExists()
-          && !oat_file_assistant.OldProfileExists()) {
-        if (!defer) {
-          oat_file_assistant.CopyProfileFile();
-        }
-      }
-    }
+static jstring DexFile_getDexFileStatus(JNIEnv* env,
+                                        jclass,
+                                        jstring javaFilename,
+                                        jstring javaInstructionSet) {
+  ScopedUtfChars filename(env, javaFilename);
+  if (env->ExceptionCheck()) {
+    return nullptr;
   }
 
-  return oat_file_assistant.GetDexOptNeeded();
+  ScopedUtfChars instruction_set(env, javaInstructionSet);
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+
+  const InstructionSet target_instruction_set = GetInstructionSetFromString(
+      instruction_set.c_str());
+  if (target_instruction_set == kNone) {
+    ScopedLocalRef<jclass> iae(env, env->FindClass("java/lang/IllegalArgumentException"));
+    std::string message(StringPrintf("Instruction set %s is invalid.", instruction_set.c_str()));
+    env->ThrowNew(iae.get(), message.c_str());
+    return nullptr;
+  }
+
+  OatFileAssistant oat_file_assistant(filename.c_str(), target_instruction_set,
+                                      false /* load_executable */);
+
+  std::ostringstream status;
+  bool oat_file_exists = false;
+  bool odex_file_exists = false;
+  if (oat_file_assistant.OatFileExists()) {
+    oat_file_exists = true;
+    status << *oat_file_assistant.OatFileName() << " [compilation_filter=";
+    status << CompilerFilter::NameOfFilter(oat_file_assistant.OatFileCompilerFilter());
+    status << ", status=" << oat_file_assistant.OatFileStatus();
+  }
+
+  if (oat_file_assistant.OdexFileExists()) {
+    odex_file_exists = true;
+    if (oat_file_exists) {
+      status << "] ";
+    }
+    status << *oat_file_assistant.OdexFileName() << " [compilation_filter=";
+    status << CompilerFilter::NameOfFilter(oat_file_assistant.OdexFileCompilerFilter());
+    status << ", status=" << oat_file_assistant.OdexFileStatus();
+  }
+
+  if (!oat_file_exists && !odex_file_exists) {
+    status << "invalid[";
+  }
+
+  status << "]";
+  return env->NewStringUTF(status.str().c_str());
 }
 
 static jint DexFile_getDexOptNeeded(JNIEnv* env,
                                     jclass,
                                     jstring javaFilename,
-                                    jstring javaPkgname,
                                     jstring javaInstructionSet,
-                                    jboolean defer) {
+                                    jstring javaTargetCompilerFilter,
+                                    jboolean newProfile) {
   ScopedUtfChars filename(env, javaFilename);
   if (env->ExceptionCheck()) {
-    return 0;
+    return -1;
   }
 
-  NullableScopedUtfChars pkgname(env, javaPkgname);
-
   ScopedUtfChars instruction_set(env, javaInstructionSet);
   if (env->ExceptionCheck()) {
-    return 0;
+    return -1;
+  }
+
+  ScopedUtfChars target_compiler_filter(env, javaTargetCompilerFilter);
+  if (env->ExceptionCheck()) {
+    return -1;
   }
 
   return GetDexOptNeeded(env,
                          filename.c_str(),
-                         pkgname.c_str(),
                          instruction_set.c_str(),
-                         defer);
+                         target_compiler_filter.c_str(),
+                         newProfile == JNI_TRUE);
 }
 
-// public API, null pkgname
+// public API
 static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename) {
-  const char* instruction_set = GetInstructionSetString(kRuntimeISA);
+  ScopedUtfChars filename_utf(env, javaFilename);
+  if (env->ExceptionCheck()) {
+    return JNI_FALSE;
+  }
+
+  const char* filename = filename_utf.c_str();
+  if ((filename == nullptr) || !OS::FileExists(filename)) {
+    LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename << "' does not exist";
+    ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException"));
+    const char* message = (filename == nullptr) ? "<empty file name>" : filename;
+    env->ThrowNew(fnfe.get(), message);
+    return JNI_FALSE;
+  }
+
+  OatFileAssistant oat_file_assistant(filename, kRuntimeISA, false);
+  return oat_file_assistant.IsUpToDate() ? JNI_FALSE : JNI_TRUE;
+}
+
+static jboolean DexFile_isValidCompilerFilter(JNIEnv* env,
+                                            jclass javeDexFileClass ATTRIBUTE_UNUSED,
+                                            jstring javaCompilerFilter) {
+  ScopedUtfChars compiler_filter(env, javaCompilerFilter);
+  if (env->ExceptionCheck()) {
+    return -1;
+  }
+
+  CompilerFilter::Filter filter;
+  return CompilerFilter::ParseCompilerFilter(compiler_filter.c_str(), &filter)
+      ? JNI_TRUE : JNI_FALSE;
+}
+
+static jboolean DexFile_isProfileGuidedCompilerFilter(JNIEnv* env,
+                                                      jclass javeDexFileClass ATTRIBUTE_UNUSED,
+                                                      jstring javaCompilerFilter) {
+  ScopedUtfChars compiler_filter(env, javaCompilerFilter);
+  if (env->ExceptionCheck()) {
+    return -1;
+  }
+
+  CompilerFilter::Filter filter;
+  if (!CompilerFilter::ParseCompilerFilter(compiler_filter.c_str(), &filter)) {
+    return JNI_FALSE;
+  }
+  return CompilerFilter::DependsOnProfile(filter) ? JNI_TRUE : JNI_FALSE;
+}
+
+static jstring DexFile_getNonProfileGuidedCompilerFilter(JNIEnv* env,
+                                                         jclass javeDexFileClass ATTRIBUTE_UNUSED,
+                                                         jstring javaCompilerFilter) {
+  ScopedUtfChars compiler_filter(env, javaCompilerFilter);
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+
+  CompilerFilter::Filter filter;
+  if (!CompilerFilter::ParseCompilerFilter(compiler_filter.c_str(), &filter)) {
+    return javaCompilerFilter;
+  }
+
+  CompilerFilter::Filter new_filter = CompilerFilter::GetNonProfileDependentFilterFrom(filter);
+
+  // Filter stayed the same, return input.
+  if (filter == new_filter) {
+    return javaCompilerFilter;
+  }
+
+  // Create a new string object and return.
+  std::string new_filter_str = CompilerFilter::NameOfFilter(new_filter);
+  return env->NewStringUTF(new_filter_str.c_str());
+}
+
+static jboolean DexFile_isBackedByOatFile(JNIEnv* env, jclass, jobject cookie) {
+  const OatFile* oat_file = nullptr;
+  std::vector<const DexFile*> dex_files;
+  if (!ConvertJavaArrayToDexFiles(env, cookie, /*out */ dex_files, /* out */ oat_file)) {
+    DCHECK(env->ExceptionCheck());
+    return false;
+  }
+  return oat_file != nullptr;
+}
+
+static jstring DexFile_getDexFileOutputPath(JNIEnv* env,
+                                            jclass,
+                                            jstring javaFilename,
+                                            jstring javaInstructionSet) {
   ScopedUtfChars filename(env, javaFilename);
-  jint status = GetDexOptNeeded(env, filename.c_str(), nullptr /* pkgname */,
-                                instruction_set, false /* defer */);
-  return (status != OatFileAssistant::kNoDexOptNeeded) ? JNI_TRUE : JNI_FALSE;
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+
+  ScopedUtfChars instruction_set(env, javaInstructionSet);
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+
+  const InstructionSet target_instruction_set = GetInstructionSetFromString(
+      instruction_set.c_str());
+  if (target_instruction_set == kNone) {
+    ScopedLocalRef<jclass> iae(env, env->FindClass("java/lang/IllegalArgumentException"));
+    std::string message(StringPrintf("Instruction set %s is invalid.", instruction_set.c_str()));
+    env->ThrowNew(iae.get(), message.c_str());
+    return nullptr;
+  }
+
+  OatFileAssistant oat_file_assistant(filename.c_str(),
+                                      target_instruction_set,
+                                      false /* load_executable */);
+
+  std::unique_ptr<OatFile> best_oat_file = oat_file_assistant.GetBestOatFile();
+  if (best_oat_file == nullptr) {
+    return nullptr;
+  }
+
+  return env->NewStringUTF(best_oat_file->GetLocation().c_str());
 }
 
 static JNINativeMethod gMethods[] = {
@@ -441,7 +601,22 @@
   NATIVE_METHOD(DexFile, getDexOptNeeded,
                 "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)I"),
   NATIVE_METHOD(DexFile, openDexFileNative,
-                "(Ljava/lang/String;Ljava/lang/String;I)Ljava/lang/Object;"),
+                "(Ljava/lang/String;"
+                "Ljava/lang/String;"
+                "I"
+                "Ljava/lang/ClassLoader;"
+                "[Ldalvik/system/DexPathList$Element;"
+                ")Ljava/lang/Object;"),
+  NATIVE_METHOD(DexFile, isValidCompilerFilter, "(Ljava/lang/String;)Z"),
+  NATIVE_METHOD(DexFile, isProfileGuidedCompilerFilter, "(Ljava/lang/String;)Z"),
+  NATIVE_METHOD(DexFile,
+                getNonProfileGuidedCompilerFilter,
+                "(Ljava/lang/String;)Ljava/lang/String;"),
+  NATIVE_METHOD(DexFile, isBackedByOatFile, "(Ljava/lang/Object;)Z"),
+  NATIVE_METHOD(DexFile, getDexFileStatus,
+                "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"),
+  NATIVE_METHOD(DexFile, getDexFileOutputPath,
+                "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;")
 };
 
 void register_dalvik_system_DexFile(JNIEnv* env) {
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index 8febb62..8f108fa 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -314,32 +314,33 @@
   size_t largeObjectsSize = 0;
   size_t largeObjectsUsed = 0;
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  for (gc::space::ContinuousSpace* space : heap->GetContinuousSpaces()) {
-    if (space->IsImageSpace()) {
-      // Currently don't include the image space.
-    } else if (space->IsZygoteSpace()) {
-      gc::space::ZygoteSpace* zygote_space = space->AsZygoteSpace();
-      zygoteSize += zygote_space->Size();
-      zygoteUsed += zygote_space->GetBytesAllocated();
-    } else if (space->IsMallocSpace()) {
-      // This is a malloc space.
-      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
-      allocSize += malloc_space->GetFootprint();
-      allocUsed += malloc_space->GetBytesAllocated();
-    } else if (space->IsBumpPointerSpace()) {
-      ScopedObjectAccess soa(env);
-      gc::space::BumpPointerSpace* bump_pointer_space = space->AsBumpPointerSpace();
-      allocSize += bump_pointer_space->Size();
-      allocUsed += bump_pointer_space->GetBytesAllocated();
+  {
+    ScopedObjectAccess soa(env);
+    for (gc::space::ContinuousSpace* space : heap->GetContinuousSpaces()) {
+      if (space->IsImageSpace()) {
+        // Currently don't include the image space.
+      } else if (space->IsZygoteSpace()) {
+        gc::space::ZygoteSpace* zygote_space = space->AsZygoteSpace();
+        zygoteSize += zygote_space->Size();
+        zygoteUsed += zygote_space->GetBytesAllocated();
+      } else if (space->IsMallocSpace()) {
+        // This is a malloc space.
+        gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+        allocSize += malloc_space->GetFootprint();
+        allocUsed += malloc_space->GetBytesAllocated();
+      } else if (space->IsBumpPointerSpace()) {
+        gc::space::BumpPointerSpace* bump_pointer_space = space->AsBumpPointerSpace();
+        allocSize += bump_pointer_space->Size();
+        allocUsed += bump_pointer_space->GetBytesAllocated();
+      }
+    }
+    for (gc::space::DiscontinuousSpace* space : heap->GetDiscontinuousSpaces()) {
+      if (space->IsLargeObjectSpace()) {
+        largeObjectsSize += space->AsLargeObjectSpace()->GetBytesAllocated();
+        largeObjectsUsed += largeObjectsSize;
+      }
     }
   }
-  for (gc::space::DiscontinuousSpace* space : heap->GetDiscontinuousSpaces()) {
-    if (space->IsLargeObjectSpace()) {
-      largeObjectsSize += space->AsLargeObjectSpace()->GetBytesAllocated();
-      largeObjectsUsed += largeObjectsSize;
-    }
-  }
-
   size_t allocFree = allocSize - allocUsed;
   size_t zygoteFree = zygoteSize - zygoteUsed;
   size_t largeObjectsFree = largeObjectsSize - largeObjectsUsed;
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 4c5dc3a..45e49e2 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -16,7 +16,7 @@
 
 #include "dalvik_system_VMRuntime.h"
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 extern "C" void android_set_application_target_sdk_version(uint32_t version);
 #endif
 #include <limits.h>
@@ -29,6 +29,7 @@
 
 #include "art_method-inl.h"
 #include "arch/instruction_set.h"
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "common_throws.h"
 #include "debugger.h"
@@ -145,6 +146,10 @@
   return Dbg::IsDebuggerActive();
 }
 
+static jboolean VMRuntime_isNativeDebuggable(JNIEnv*, jobject) {
+  return Runtime::Current()->IsNativeDebuggable();
+}
+
 static jobjectArray VMRuntime_properties(JNIEnv* env, jobject) {
   return toStringArray(env, Runtime::Current()->GetProperties());
 }
@@ -196,7 +201,7 @@
   // Note that targetSdkVersion may be 0, meaning "current".
   Runtime::Current()->SetTargetSdkVersion(target_sdk_version);
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   // This part is letting libc/dynamic linker know about current app's
   // target sdk version to enable compatibility workarounds.
   android_set_application_target_sdk_version(static_cast<uint32_t>(target_sdk_version));
@@ -212,6 +217,10 @@
   Runtime::Current()->GetHeap()->RegisterNativeAllocation(env, static_cast<size_t>(bytes));
 }
 
+static void VMRuntime_registerSensitiveThread(JNIEnv*, jobject) {
+  Runtime::Current()->RegisterSensitiveThread();
+}
+
 static void VMRuntime_registerNativeFree(JNIEnv* env, jobject, jint bytes) {
   if (UNLIKELY(bytes < 0)) {
     ScopedObjectAccess soa(env);
@@ -223,8 +232,7 @@
 
 static void VMRuntime_updateProcessState(JNIEnv*, jobject, jint process_state) {
   Runtime* runtime = Runtime::Current();
-  runtime->GetHeap()->UpdateProcessState(static_cast<gc::ProcessState>(process_state));
-  runtime->UpdateProfilerState(process_state);
+  runtime->UpdateProcessState(static_cast<ProcessState>(process_state));
 }
 
 static void VMRuntime_trimHeap(JNIEnv* env, jobject) {
@@ -322,7 +330,7 @@
 static void PreloadDexCachesResolveField(Handle<mirror::DexCache> dex_cache, uint32_t field_idx,
                                          bool is_static)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  ArtField* field = dex_cache->GetResolvedField(field_idx, sizeof(void*));
+  ArtField* field = dex_cache->GetResolvedField(field_idx, kRuntimePointerSize);
   if (field != nullptr) {
     return;
   }
@@ -335,7 +343,7 @@
     return;
   }
   if (is_static) {
-    field = mirror::Class::FindStaticField(self, klass, dex_cache.Get(), field_idx);
+    field = mirror::Class::FindStaticField(self, klass.Get(), dex_cache.Get(), field_idx);
   } else {
     field = klass->FindInstanceField(dex_cache.Get(), field_idx);
   }
@@ -343,14 +351,14 @@
     return;
   }
   // LOG(INFO) << "VMRuntime.preloadDexCaches resolved field " << PrettyField(field);
-  dex_cache->SetResolvedField(field_idx, field, sizeof(void*));
+  dex_cache->SetResolvedField(field_idx, field, kRuntimePointerSize);
 }
 
 // Based on ClassLinker::ResolveMethod.
 static void PreloadDexCachesResolveMethod(Handle<mirror::DexCache> dex_cache, uint32_t method_idx,
                                           InvokeType invoke_type)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  ArtMethod* method = dex_cache->GetResolvedMethod(method_idx, sizeof(void*));
+  ArtMethod* method = dex_cache->GetResolvedMethod(method_idx, kRuntimePointerSize);
   if (method != nullptr) {
     return;
   }
@@ -363,14 +371,14 @@
   switch (invoke_type) {
     case kDirect:
     case kStatic:
-      method = klass->FindDirectMethod(dex_cache.Get(), method_idx, sizeof(void*));
+      method = klass->FindDirectMethod(dex_cache.Get(), method_idx, kRuntimePointerSize);
       break;
     case kInterface:
-      method = klass->FindInterfaceMethod(dex_cache.Get(), method_idx, sizeof(void*));
+      method = klass->FindInterfaceMethod(dex_cache.Get(), method_idx, kRuntimePointerSize);
       break;
     case kSuper:
     case kVirtual:
-      method = klass->FindVirtualMethod(dex_cache.Get(), method_idx, sizeof(void*));
+      method = klass->FindVirtualMethod(dex_cache.Get(), method_idx, kRuntimePointerSize);
       break;
     default:
       LOG(FATAL) << "Unreachable - invocation type: " << invoke_type;
@@ -380,7 +388,7 @@
     return;
   }
   // LOG(INFO) << "VMRuntime.preloadDexCaches resolved method " << PrettyMethod(method);
-  dex_cache->SetResolvedMethod(method_idx, method, sizeof(void*));
+  dex_cache->SetResolvedMethod(method_idx, method, kRuntimePointerSize);
 }
 
 struct DexCacheStats {
@@ -455,7 +463,7 @@
       }
     }
     for (size_t j = 0; j < dex_cache->NumResolvedMethods(); j++) {
-      ArtMethod* method = dex_cache->GetResolvedMethod(j, sizeof(void*));
+      ArtMethod* method = dex_cache->GetResolvedMethod(j, kRuntimePointerSize);
       if (method != nullptr) {
         filled->num_methods++;
       }
@@ -497,8 +505,7 @@
     const DexFile* dex_file = boot_class_path[i];
     CHECK(dex_file != nullptr);
     StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::DexCache> dex_cache(
-        hs.NewHandle(linker->RegisterDexFile(*dex_file, runtime->GetLinearAlloc())));
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->RegisterDexFile(*dex_file, nullptr)));
 
     if (kPreloadDexCachesStrings) {
       for (size_t j = 0; j < dex_cache->NumStrings(); j++) {
@@ -562,18 +569,43 @@
 
 /*
  * This is called by the framework when it knows the application directory and
- * process name.  We use this information to start up the sampling profiler for
- * for ART.
+ * process name.
  */
-static void VMRuntime_registerAppInfo(JNIEnv* env, jclass, jstring pkgName,
-                                      jstring appDir ATTRIBUTE_UNUSED,
-                                      jstring procName ATTRIBUTE_UNUSED) {
-  const char *pkgNameChars = env->GetStringUTFChars(pkgName, nullptr);
-  std::string profileFile = StringPrintf("/data/dalvik-cache/profiles/%s", pkgNameChars);
+static void VMRuntime_registerAppInfo(JNIEnv* env,
+                                      jclass clazz ATTRIBUTE_UNUSED,
+                                      jstring profile_file,
+                                      jstring app_dir,
+                                      jobjectArray code_paths,
+                                      jstring foreign_dex_profile_path) {
+  std::vector<std::string> code_paths_vec;
+  int code_paths_length = env->GetArrayLength(code_paths);
+  for (int i = 0; i < code_paths_length; i++) {
+    jstring code_path = reinterpret_cast<jstring>(env->GetObjectArrayElement(code_paths, i));
+    const char* raw_code_path = env->GetStringUTFChars(code_path, nullptr);
+    code_paths_vec.push_back(raw_code_path);
+    env->ReleaseStringUTFChars(code_path, raw_code_path);
+  }
 
-  Runtime::Current()->StartProfiler(profileFile.c_str());
+  const char* raw_profile_file = env->GetStringUTFChars(profile_file, nullptr);
+  std::string profile_file_str(raw_profile_file);
+  env->ReleaseStringUTFChars(profile_file, raw_profile_file);
 
-  env->ReleaseStringUTFChars(pkgName, pkgNameChars);
+  std::string foreign_dex_profile_path_str = "";
+  if (foreign_dex_profile_path != nullptr) {
+    const char* raw_foreign_dex_profile_path =
+        env->GetStringUTFChars(foreign_dex_profile_path, nullptr);
+    foreign_dex_profile_path_str.assign(raw_foreign_dex_profile_path);
+    env->ReleaseStringUTFChars(foreign_dex_profile_path, raw_foreign_dex_profile_path);
+  }
+
+  const char* raw_app_dir = env->GetStringUTFChars(app_dir, nullptr);
+  std::string app_dir_str(raw_app_dir);
+  env->ReleaseStringUTFChars(app_dir, raw_app_dir);
+
+  Runtime::Current()->RegisterAppInfo(code_paths_vec,
+                                      profile_file_str,
+                                      foreign_dex_profile_path_str,
+                                      app_dir_str);
 }
 
 static jboolean VMRuntime_isBootClassPathOnDisk(JNIEnv* env, jclass, jstring java_instruction_set) {
@@ -598,6 +630,11 @@
   return env->NewStringUTF(GetInstructionSetString(kRuntimeISA));
 }
 
+static jboolean VMRuntime_didPruneDalvikCache(JNIEnv* env ATTRIBUTE_UNUSED,
+                                              jclass klass ATTRIBUTE_UNUSED) {
+  return Runtime::Current()->GetPrunedDalvikCache() ? JNI_TRUE : JNI_FALSE;
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(VMRuntime, addressOf, "!(Ljava/lang/Object;)J"),
   NATIVE_METHOD(VMRuntime, bootClassPath, "()Ljava/lang/String;"),
@@ -608,12 +645,14 @@
   NATIVE_METHOD(VMRuntime, disableJitCompilation, "()V"),
   NATIVE_METHOD(VMRuntime, getTargetHeapUtilization, "()F"),
   NATIVE_METHOD(VMRuntime, isDebuggerActive, "!()Z"),
+  NATIVE_METHOD(VMRuntime, isNativeDebuggable, "!()Z"),
   NATIVE_METHOD(VMRuntime, nativeSetTargetHeapUtilization, "(F)V"),
   NATIVE_METHOD(VMRuntime, newNonMovableArray, "!(Ljava/lang/Class;I)Ljava/lang/Object;"),
   NATIVE_METHOD(VMRuntime, newUnpaddedArray, "!(Ljava/lang/Class;I)Ljava/lang/Object;"),
   NATIVE_METHOD(VMRuntime, properties, "()[Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, setTargetSdkVersionNative, "(I)V"),
   NATIVE_METHOD(VMRuntime, registerNativeAllocation, "(I)V"),
+  NATIVE_METHOD(VMRuntime, registerSensitiveThread, "()V"),
   NATIVE_METHOD(VMRuntime, registerNativeFree, "(I)V"),
   NATIVE_METHOD(VMRuntime, requestConcurrentGC, "()V"),
   NATIVE_METHOD(VMRuntime, requestHeapTrim, "()V"),
@@ -630,9 +669,10 @@
   NATIVE_METHOD(VMRuntime, isCheckJniEnabled, "!()Z"),
   NATIVE_METHOD(VMRuntime, preloadDexCaches, "()V"),
   NATIVE_METHOD(VMRuntime, registerAppInfo,
-                "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V"),
+                "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;Ljava/lang/String;)V"),
   NATIVE_METHOD(VMRuntime, isBootClassPathOnDisk, "(Ljava/lang/String;)Z"),
   NATIVE_METHOD(VMRuntime, getCurrentInstructionSet, "()Ljava/lang/String;"),
+  NATIVE_METHOD(VMRuntime, didPruneDalvikCache, "()Z"),
 };
 
 void register_dalvik_system_VMRuntime(JNIEnv* env) {
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 1d06706..fe3cbe7 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -18,8 +18,6 @@
 
 #include <stdlib.h>
 
-#include <cutils/process_name.h>
-
 #include "arch/instruction_set.h"
 #include "debugger.h"
 #include "java_vm_ext.h"
@@ -46,6 +44,16 @@
   if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {
     PLOG(ERROR) << "prctl(PR_SET_DUMPABLE) failed for pid " << getpid();
   }
+
+  // Even if Yama is on a non-privileged native debugger should
+  // be able to attach to the debuggable app.
+  if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == -1) {
+    // if Yama is off prctl(PR_SET_PTRACER) returns EINVAL - don't log in this
+    // case since it's expected behaviour.
+    if (errno != EINVAL) {
+      PLOG(ERROR) << "prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) failed for pid " << getpid();
+    }
+  }
 #endif
   // We don't want core dumps, though, so set the core dump size to 0.
   rlimit rl;
@@ -64,8 +72,9 @@
     DEBUG_ENABLE_ASSERT             = 1 << 2,
     DEBUG_ENABLE_SAFEMODE           = 1 << 3,
     DEBUG_ENABLE_JNI_LOGGING        = 1 << 4,
-    DEBUG_ENABLE_JIT                = 1 << 5,
-    DEBUG_GENERATE_DEBUG_INFO       = 1 << 6,
+    DEBUG_GENERATE_DEBUG_INFO       = 1 << 5,
+    DEBUG_ALWAYS_JIT                = 1 << 6,
+    DEBUG_NATIVE_DEBUGGABLE         = 1 << 7,
   };
 
   Runtime* const runtime = Runtime::Current();
@@ -97,21 +106,10 @@
   if (safe_mode) {
     // Ensure that any (secondary) oat files will be interpreted.
     runtime->AddCompilerOption("--compiler-filter=interpret-only");
+    runtime->SetSafeMode(true);
     debug_flags &= ~DEBUG_ENABLE_SAFEMODE;
   }
 
-  bool use_jit = false;
-  if ((debug_flags & DEBUG_ENABLE_JIT) != 0) {
-    if (safe_mode) {
-      LOG(INFO) << "Not enabling JIT due to safe mode";
-    } else {
-      use_jit = true;
-      LOG(INFO) << "Late-enabling JIT";
-    }
-    debug_flags &= ~DEBUG_ENABLE_JIT;
-  }
-  runtime->GetJITOptions()->SetUseJIT(use_jit);
-
   const bool generate_debug_info = (debug_flags & DEBUG_GENERATE_DEBUG_INFO) != 0;
   if (generate_debug_info) {
     runtime->AddCompilerOption("--generate-debug-info");
@@ -121,6 +119,20 @@
   // This is for backwards compatibility with Dalvik.
   debug_flags &= ~DEBUG_ENABLE_ASSERT;
 
+  if ((debug_flags & DEBUG_ALWAYS_JIT) != 0) {
+    jit::JitOptions* jit_options = runtime->GetJITOptions();
+    CHECK(jit_options != nullptr);
+    jit_options->SetJitAtFirstUse();
+    debug_flags &= ~DEBUG_ALWAYS_JIT;
+  }
+
+  if ((debug_flags & DEBUG_NATIVE_DEBUGGABLE) != 0) {
+    runtime->AddCompilerOption("--debuggable");
+    runtime->AddCompilerOption("--generate-debug-info");
+    runtime->SetNativeDebuggable(true);
+    debug_flags &= ~DEBUG_NATIVE_DEBUGGABLE;
+  }
+
   if (debug_flags != 0) {
     LOG(ERROR) << StringPrintf("Unknown bits set in debug_flags: %#x", debug_flags);
   }
@@ -141,7 +153,11 @@
   return reinterpret_cast<jlong>(ThreadForEnv(env));
 }
 
-static void ZygoteHooks_nativePostForkChild(JNIEnv* env, jclass, jlong token, jint debug_flags,
+static void ZygoteHooks_nativePostForkChild(JNIEnv* env,
+                                            jclass,
+                                            jlong token,
+                                            jint debug_flags,
+                                            jboolean is_system_server,
                                             jstring instruction_set) {
   Thread* thread = reinterpret_cast<Thread*>(token);
   // Our system thread ID, etc, has changed so reset Thread state.
@@ -160,54 +176,67 @@
     // Only restart if it was streaming mode.
     // TODO: Expose buffer size, so we can also do file mode.
     if (output_mode == Trace::TraceOutputMode::kStreaming) {
-      const char* proc_name_cutils = get_process_name();
+      static constexpr size_t kMaxProcessNameLength = 100;
+      char name_buf[kMaxProcessNameLength] = {};
+      int rc = pthread_getname_np(pthread_self(), name_buf, kMaxProcessNameLength);
       std::string proc_name;
-      if (proc_name_cutils != nullptr) {
-        proc_name = proc_name_cutils;
+
+      if (rc == 0) {
+          // On success use the pthread name.
+          proc_name = name_buf;
       }
-      if (proc_name_cutils == nullptr || proc_name == "zygote" || proc_name == "zygote64") {
+
+      if (proc_name.empty() || proc_name == "zygote" || proc_name == "zygote64") {
         // Either no process name, or the name hasn't been changed, yet. Just use pid.
         pid_t pid = getpid();
         proc_name = StringPrintf("%u", static_cast<uint32_t>(pid));
       }
 
-      std::string profiles_dir(GetDalvikCache("profiles", false /* create_if_absent */));
-      if (!profiles_dir.empty()) {
-        std::string trace_file = StringPrintf("%s/%s.trace.bin", profiles_dir.c_str(),
-                                              proc_name.c_str());
-        Trace::Start(trace_file.c_str(),
-                     -1,
-                     buffer_size,
-                     0,   // TODO: Expose flags.
-                     output_mode,
-                     trace_mode,
-                     0);  // TODO: Expose interval.
-        if (thread->IsExceptionPending()) {
-          ScopedObjectAccess soa(env);
-          thread->ClearException();
-        }
-      } else {
-        LOG(ERROR) << "Profiles dir is empty?!?!";
+      std::string trace_file = StringPrintf("/data/misc/trace/%s.trace.bin", proc_name.c_str());
+      Trace::Start(trace_file.c_str(),
+                   -1,
+                   buffer_size,
+                   0,   // TODO: Expose flags.
+                   output_mode,
+                   trace_mode,
+                   0);  // TODO: Expose interval.
+      if (thread->IsExceptionPending()) {
+        ScopedObjectAccess soa(env);
+        thread->ClearException();
       }
     }
   }
 
-  if (instruction_set != nullptr) {
+  if (instruction_set != nullptr && !is_system_server) {
     ScopedUtfChars isa_string(env, instruction_set);
     InstructionSet isa = GetInstructionSetFromString(isa_string.c_str());
     Runtime::NativeBridgeAction action = Runtime::NativeBridgeAction::kUnload;
     if (isa != kNone && isa != kRuntimeISA) {
       action = Runtime::NativeBridgeAction::kInitialize;
     }
-    Runtime::Current()->DidForkFromZygote(env, action, isa_string.c_str());
+    Runtime::Current()->InitNonZygoteOrPostFork(
+        env, is_system_server, action, isa_string.c_str());
   } else {
-    Runtime::Current()->DidForkFromZygote(env, Runtime::NativeBridgeAction::kUnload, nullptr);
+    Runtime::Current()->InitNonZygoteOrPostFork(
+        env, is_system_server, Runtime::NativeBridgeAction::kUnload, nullptr);
   }
 }
 
+static void ZygoteHooks_startZygoteNoThreadCreation(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                    jclass klass ATTRIBUTE_UNUSED) {
+  Runtime::Current()->SetZygoteNoThreadSection(true);
+}
+
+static void ZygoteHooks_stopZygoteNoThreadCreation(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                   jclass klass ATTRIBUTE_UNUSED) {
+  Runtime::Current()->SetZygoteNoThreadSection(false);
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(ZygoteHooks, nativePreFork, "()J"),
-  NATIVE_METHOD(ZygoteHooks, nativePostForkChild, "(JILjava/lang/String;)V"),
+  NATIVE_METHOD(ZygoteHooks, nativePostForkChild, "(JIZLjava/lang/String;)V"),
+  NATIVE_METHOD(ZygoteHooks, startZygoteNoThreadCreation, "()V"),
+  NATIVE_METHOD(ZygoteHooks, stopZygoteNoThreadCreation, "()V"),
 };
 
 void register_dalvik_system_ZygoteHooks(JNIEnv* env) {
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 3a73900..d4e54cf 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -16,7 +16,10 @@
 
 #include "java_lang_Class.h"
 
+#include <iostream>
+
 #include "art_field-inl.h"
+#include "base/enums.h"
 #include "class_linker.h"
 #include "common_throws.h"
 #include "dex_file-inl.h"
@@ -134,7 +137,9 @@
   }
   for (ArtField& field : ifields) {
     if (!public_only || field.IsPublic()) {
-      auto* reflect_field = mirror::Field::CreateFromArtField(self, &field, force_resolve);
+      auto* reflect_field = mirror::Field::CreateFromArtField<kRuntimePointerSize>(self,
+                                                                                   &field,
+                                                                                   force_resolve);
       if (reflect_field == nullptr) {
         if (kIsDebugBuild) {
           self->AssertPendingException();
@@ -147,7 +152,9 @@
   }
   for (ArtField& field : sfields) {
     if (!public_only || field.IsPublic()) {
-      auto* reflect_field = mirror::Field::CreateFromArtField(self, &field, force_resolve);
+      auto* reflect_field = mirror::Field::CreateFromArtField<kRuntimePointerSize>(self,
+                                                                                   &field,
+                                                                                   force_resolve);
       if (reflect_field == nullptr) {
         if (kIsDebugBuild) {
           self->AssertPendingException();
@@ -190,13 +197,26 @@
     return nullptr;
   }
   size_t low = 0;
-  size_t high = fields->Length();
-  const uint16_t* const data = name->GetValue();
+  size_t high = fields->size();
+  const bool is_name_compressed = name->IsCompressed();
+  const uint16_t* const data = (is_name_compressed) ? nullptr : name->GetValue();
+  const uint8_t* const data_compressed = (is_name_compressed) ? name->GetValueCompressed()
+                                                              : nullptr;
   const size_t length = name->GetLength();
   while (low < high) {
     auto mid = (low + high) / 2;
     ArtField& field = fields->At(mid);
-    int result = CompareModifiedUtf8ToUtf16AsCodePointValues(field.GetName(), data, length);
+    int result = 0;
+    if (is_name_compressed) {
+      size_t field_length = strlen(field.GetName());
+      size_t min_size = (length < field_length) ? length : field_length;
+      result = memcmp(field.GetName(), data_compressed, min_size);
+      if (result == 0) {
+        result = field_length - length;
+      }
+    } else {
+      result = CompareModifiedUtf8ToUtf16AsCodePointValues(field.GetName(), data, length);
+    }
     // Alternate approach, only a few % faster at the cost of more allocations.
     // int result = field->GetStringName(self, true)->CompareTo(name);
     if (result < 0) {
@@ -220,11 +240,11 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtField* art_field = FindFieldByName(self, name, c->GetIFieldsPtr());
   if (art_field != nullptr) {
-    return mirror::Field::CreateFromArtField(self, art_field, true);
+    return mirror::Field::CreateFromArtField<kRuntimePointerSize>(self, art_field, true);
   }
   art_field = FindFieldByName(self, name, c->GetSFieldsPtr());
   if (art_field != nullptr) {
-    return mirror::Field::CreateFromArtField(self, art_field, true);
+    return mirror::Field::CreateFromArtField<kRuntimePointerSize>(self, art_field, true);
   }
   return nullptr;
 }
@@ -236,12 +256,13 @@
   DCHECK(name != nullptr);
   DCHECK(self != nullptr);
 
-  StackHandleScope<1> hs(self);
+  StackHandleScope<2> hs(self);
   MutableHandle<mirror::Class> h_clazz(hs.NewHandle(clazz));
+  Handle<mirror::String> h_name(hs.NewHandle(name));
 
   // We search the current class, its direct interfaces then its superclass.
   while (h_clazz.Get() != nullptr) {
-    mirror::Field* result = GetDeclaredField(self, h_clazz.Get(), name);
+    mirror::Field* result = GetDeclaredField(self, h_clazz.Get(), h_name.Get());
     if ((result != nullptr) && (result->GetAccessFlags() & kAccPublic)) {
       return result;
     } else if (UNLIKELY(self->IsExceptionPending())) {
@@ -256,7 +277,7 @@
         self->AssertPendingException();
         return nullptr;
       }
-      result = GetPublicFieldRecursive(self, iface, name);
+      result = GetPublicFieldRecursive(self, iface, h_name.Get());
       if (result != nullptr) {
         DCHECK(result->GetAccessFlags() & kAccPublic);
         return result;
@@ -288,13 +309,6 @@
       GetPublicFieldRecursive(soa.Self(), DecodeClass(soa, javaThis), name_string));
 }
 
-static jobject Class_getDeclaredFieldInternal(JNIEnv* env, jobject javaThis, jstring name) {
-  ScopedFastNativeObjectAccess soa(env);
-  auto* name_string = soa.Decode<mirror::String*>(name);
-  return soa.AddLocalReference<jobject>(
-      GetDeclaredField(soa.Self(), DecodeClass(soa, javaThis), name_string));
-}
-
 static jobject Class_getDeclaredField(JNIEnv* env, jobject javaThis, jstring name) {
   ScopedFastNativeObjectAccess soa(env);
   auto* name_string = soa.Decode<mirror::String*>(name);
@@ -306,6 +320,15 @@
   mirror::Field* result = GetDeclaredField(soa.Self(), klass, name_string);
   if (result == nullptr) {
     std::string name_str = name_string->ToModifiedUtf8();
+    if (name_str == "value" && klass->IsStringClass()) {
+      // We log the error for this specific case, as the user might just swallow the exception.
+      // This helps diagnose crashes when applications rely on the String#value field being
+      // there.
+      // Also print on the error stream to test it through run-test.
+      std::string message("The String#value field is not present on Android versions >= 6.0");
+      LOG(ERROR) << message;
+      std::cerr << message << std::endl;
+    }
     // We may have a pending exception if we failed to resolve.
     if (!soa.Self()->IsExceptionPending()) {
       ThrowNoSuchFieldException(DecodeClass(soa, javaThis), name_str.c_str());
@@ -318,15 +341,14 @@
 static jobject Class_getDeclaredConstructorInternal(
     JNIEnv* env, jobject javaThis, jobjectArray args) {
   ScopedFastNativeObjectAccess soa(env);
-  auto* klass = DecodeClass(soa, javaThis);
-  auto* params = soa.Decode<mirror::ObjectArray<mirror::Class>*>(args);
-  StackHandleScope<1> hs(soa.Self());
-  auto* declared_constructor = klass->GetDeclaredConstructor(soa.Self(), hs.NewHandle(params));
-  if (declared_constructor != nullptr) {
-    return soa.AddLocalReference<jobject>(
-        mirror::Constructor::CreateFromArtMethod(soa.Self(), declared_constructor));
-  }
-  return nullptr;
+  DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  mirror::Constructor* result = mirror::Class::GetDeclaredConstructorInternal<kRuntimePointerSize,
+                                                                              false>(
+      soa.Self(),
+      DecodeClass(soa, javaThis),
+      soa.Decode<mirror::ObjectArray<mirror::Class>*>(args));
+  return soa.AddLocalReference<jobject>(result);
 }
 
 static ALWAYS_INLINE inline bool MethodMatchesConstructor(ArtMethod* m, bool public_only)
@@ -342,7 +364,7 @@
   Handle<mirror::Class> h_klass = hs.NewHandle(DecodeClass(soa, javaThis));
   size_t constructor_count = 0;
   // Two pass approach for speed.
-  for (auto& m : h_klass->GetDirectMethods(sizeof(void*))) {
+  for (auto& m : h_klass->GetDirectMethods(kRuntimePointerSize)) {
     constructor_count += MethodMatchesConstructor(&m, publicOnly != JNI_FALSE) ? 1u : 0u;
   }
   auto h_constructors = hs.NewHandle(mirror::ObjectArray<mirror::Constructor>::Alloc(
@@ -352,9 +374,12 @@
     return nullptr;
   }
   constructor_count = 0;
-  for (auto& m : h_klass->GetDirectMethods(sizeof(void*))) {
+  for (auto& m : h_klass->GetDirectMethods(kRuntimePointerSize)) {
     if (MethodMatchesConstructor(&m, publicOnly != JNI_FALSE)) {
-      auto* constructor = mirror::Constructor::CreateFromArtMethod(soa.Self(), &m);
+      DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
+      DCHECK(!Runtime::Current()->IsActiveTransaction());
+      auto* constructor = mirror::Constructor::CreateFromArtMethod<kRuntimePointerSize, false>(
+          soa.Self(), &m);
       if (UNLIKELY(constructor == nullptr)) {
         soa.Self()->AssertPendingOOMException();
         return nullptr;
@@ -367,70 +392,15 @@
 
 static jobject Class_getDeclaredMethodInternal(JNIEnv* env, jobject javaThis,
                                                jobject name, jobjectArray args) {
-  // Covariant return types permit the class to define multiple
-  // methods with the same name and parameter types. Prefer to
-  // return a non-synthetic method in such situations. We may
-  // still return a synthetic method to handle situations like
-  // escalated visibility. We never return miranda methods that
-  // were synthesized by the runtime.
-  constexpr uint32_t kSkipModifiers = kAccMiranda | kAccSynthetic;
   ScopedFastNativeObjectAccess soa(env);
-  StackHandleScope<3> hs(soa.Self());
-  auto h_method_name = hs.NewHandle(soa.Decode<mirror::String*>(name));
-  if (UNLIKELY(h_method_name.Get() == nullptr)) {
-    ThrowNullPointerException("name == null");
-    return nullptr;
-  }
-  auto h_args = hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Class>*>(args));
-  Handle<mirror::Class> h_klass = hs.NewHandle(DecodeClass(soa, javaThis));
-  ArtMethod* result = nullptr;
-  for (auto& m : h_klass->GetVirtualMethods(sizeof(void*))) {
-    auto* np_method = m.GetInterfaceMethodIfProxy(sizeof(void*));
-    // May cause thread suspension.
-    mirror::String* np_name = np_method->GetNameAsString(soa.Self());
-    if (!np_name->Equals(h_method_name.Get()) || !np_method->EqualParameters(h_args)) {
-      if (UNLIKELY(soa.Self()->IsExceptionPending())) {
-        return nullptr;
-      }
-      continue;
-    }
-    auto modifiers = m.GetAccessFlags();
-    if ((modifiers & kSkipModifiers) == 0) {
-      return soa.AddLocalReference<jobject>(mirror::Method::CreateFromArtMethod(soa.Self(), &m));
-    }
-    if ((modifiers & kAccMiranda) == 0) {
-      result = &m;  // Remember as potential result if it's not a miranda method.
-    }
-  }
-  if (result == nullptr) {
-    for (auto& m : h_klass->GetDirectMethods(sizeof(void*))) {
-      auto modifiers = m.GetAccessFlags();
-      if ((modifiers & kAccConstructor) != 0) {
-        continue;
-      }
-      auto* np_method = m.GetInterfaceMethodIfProxy(sizeof(void*));
-      // May cause thread suspension.
-      mirror::String* np_name = np_method->GetNameAsString(soa.Self());
-      if (np_name == nullptr) {
-        soa.Self()->AssertPendingException();
-        return nullptr;
-      }
-      if (!np_name->Equals(h_method_name.Get()) || !np_method->EqualParameters(h_args)) {
-        if (UNLIKELY(soa.Self()->IsExceptionPending())) {
-          return nullptr;
-        }
-        continue;
-      }
-      if ((modifiers & kSkipModifiers) == 0) {
-        return soa.AddLocalReference<jobject>(mirror::Method::CreateFromArtMethod(soa.Self(), &m));
-      }
-      // Direct methods cannot be miranda methods, so this potential result must be synthetic.
-      result = &m;
-    }
-  }
-  return result != nullptr ?
-      soa.AddLocalReference<jobject>(mirror::Method::CreateFromArtMethod(soa.Self(), result)) :
-      nullptr;
+  DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  mirror::Method* result = mirror::Class::GetDeclaredMethodInternal<kRuntimePointerSize, false>(
+      soa.Self(),
+      DecodeClass(soa, javaThis),
+      soa.Decode<mirror::String*>(name),
+      soa.Decode<mirror::ObjectArray<mirror::Class>*>(args));
+  return soa.AddLocalReference<jobject>(result);
 }
 
 static jobjectArray Class_getDeclaredMethodsUnchecked(JNIEnv* env, jobject javaThis,
@@ -439,16 +409,9 @@
   StackHandleScope<2> hs(soa.Self());
   Handle<mirror::Class> klass = hs.NewHandle(DecodeClass(soa, javaThis));
   size_t num_methods = 0;
-  for (auto& m : klass->GetVirtualMethods(sizeof(void*))) {
+  for (auto& m : klass->GetDeclaredMethods(kRuntimePointerSize)) {
     auto modifiers = m.GetAccessFlags();
-    if ((publicOnly == JNI_FALSE || (modifiers & kAccPublic) != 0) &&
-        (modifiers & kAccMiranda) == 0) {
-      ++num_methods;
-    }
-  }
-  for (auto& m : klass->GetDirectMethods(sizeof(void*))) {
-    auto modifiers = m.GetAccessFlags();
-    // Add non-constructor direct/static methods.
+    // Add non-constructor declared methods.
     if ((publicOnly == JNI_FALSE || (modifiers & kAccPublic) != 0) &&
         (modifiers & kAccConstructor) == 0) {
       ++num_methods;
@@ -457,24 +420,14 @@
   auto ret = hs.NewHandle(mirror::ObjectArray<mirror::Method>::Alloc(
       soa.Self(), mirror::Method::ArrayClass(), num_methods));
   num_methods = 0;
-  for (auto& m : klass->GetVirtualMethods(sizeof(void*))) {
+  for (auto& m : klass->GetDeclaredMethods(kRuntimePointerSize)) {
     auto modifiers = m.GetAccessFlags();
     if ((publicOnly == JNI_FALSE || (modifiers & kAccPublic) != 0) &&
-        (modifiers & kAccMiranda) == 0) {
-      auto* method = mirror::Method::CreateFromArtMethod(soa.Self(), &m);
-      if (method == nullptr) {
-        soa.Self()->AssertPendingException();
-        return nullptr;
-      }
-      ret->SetWithoutChecks<false>(num_methods++, method);
-    }
-  }
-  for (auto& m : klass->GetDirectMethods(sizeof(void*))) {
-    auto modifiers = m.GetAccessFlags();
-    // Add non-constructor direct/static methods.
-    if ((publicOnly == JNI_FALSE || (modifiers & kAccPublic) != 0) &&
         (modifiers & kAccConstructor) == 0) {
-      auto* method = mirror::Method::CreateFromArtMethod(soa.Self(), &m);
+      DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
+      DCHECK(!Runtime::Current()->IsActiveTransaction());
+      auto* method =
+          mirror::Method::CreateFromArtMethod<kRuntimePointerSize, false>(soa.Self(), &m);
       if (method == nullptr) {
         soa.Self()->AssertPendingException();
         return nullptr;
@@ -485,14 +438,21 @@
   return soa.AddLocalReference<jobjectArray>(ret.Get());
 }
 
-static jobject Class_getDeclaredAnnotation(JNIEnv* env, jobject javaThis, jclass annotationType) {
+static jobject Class_getDeclaredAnnotation(JNIEnv* env, jobject javaThis, jclass annotationClass) {
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<2> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
+
+  // Handle public contract to throw NPE if the "annotationClass" argument was null.
+  if (UNLIKELY(annotationClass == nullptr)) {
+    ThrowNullPointerException("annotationClass");
+    return nullptr;
+  }
+
   if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
     return nullptr;
   }
-  Handle<mirror::Class> annotation_class(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
+  Handle<mirror::Class> annotation_class(hs.NewHandle(soa.Decode<mirror::Class*>(annotationClass)));
   return soa.AddLocalReference<jobject>(
       klass->GetDexFile().GetAnnotationForClass(klass, annotation_class));
 }
@@ -587,14 +547,7 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
-  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
-    return defaultValue;
-  }
-  uint32_t flags;
-  if (!klass->GetDexFile().GetInnerClassFlags(klass, &flags)) {
-    return defaultValue;
-  }
-  return flags;
+  return mirror::Class::GetInnerClassFlags(klass, defaultValue);
 }
 
 static jstring Class_getInnerClassName(JNIEnv* env, jobject javaThis) {
@@ -611,6 +564,17 @@
   return soa.AddLocalReference<jstring>(class_name);
 }
 
+static jobjectArray Class_getSignatureAnnotation(JNIEnv* env, jobject javaThis) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
+  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+    return nullptr;
+  }
+  return soa.AddLocalReference<jobjectArray>(
+      klass->GetDexFile().GetSignatureAnnotationForClass(klass));
+}
+
 static jboolean Class_isAnonymousClass(JNIEnv* env, jobject javaThis) {
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
@@ -673,7 +637,9 @@
     }
   }
   auto* constructor = klass->GetDeclaredConstructor(
-      soa.Self(), NullHandle<mirror::ObjectArray<mirror::Class>>());
+      soa.Self(),
+      ScopedNullHandle<mirror::ObjectArray<mirror::Class>>(),
+      kRuntimePointerSize);
   if (UNLIKELY(constructor == nullptr)) {
     soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
                                    "%s has no zero argument constructor",
@@ -683,8 +649,7 @@
   // Invoke the string allocator to return an empty string for the string class.
   if (klass->IsStringClass()) {
     gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
-    mirror::SetStringCountVisitor visitor(0);
-    mirror::Object* obj = mirror::String::Alloc<true>(soa.Self(), 0, allocator_type, visitor);
+    mirror::Object* obj = mirror::String::AllocEmptyString<true>(soa.Self(), allocator_type);
     if (UNLIKELY(soa.Self()->IsExceptionPending())) {
       return nullptr;
     } else {
@@ -742,7 +707,6 @@
   NATIVE_METHOD(Class, getDeclaredConstructorsInternal, "!(Z)[Ljava/lang/reflect/Constructor;"),
   NATIVE_METHOD(Class, getDeclaredField, "!(Ljava/lang/String;)Ljava/lang/reflect/Field;"),
   NATIVE_METHOD(Class, getPublicFieldRecursive, "!(Ljava/lang/String;)Ljava/lang/reflect/Field;"),
-  NATIVE_METHOD(Class, getDeclaredFieldInternal, "!(Ljava/lang/String;)Ljava/lang/reflect/Field;"),
   NATIVE_METHOD(Class, getDeclaredFields, "!()[Ljava/lang/reflect/Field;"),
   NATIVE_METHOD(Class, getDeclaredFieldsUnchecked, "!(Z)[Ljava/lang/reflect/Field;"),
   NATIVE_METHOD(Class, getDeclaredMethodInternal,
@@ -758,6 +722,7 @@
   NATIVE_METHOD(Class, getNameNative, "!()Ljava/lang/String;"),
   NATIVE_METHOD(Class, getProxyInterfaces, "!()[Ljava/lang/Class;"),
   NATIVE_METHOD(Class, getPublicDeclaredFields, "!()[Ljava/lang/reflect/Field;"),
+  NATIVE_METHOD(Class, getSignatureAnnotation, "!()[Ljava/lang/String;"),
   NATIVE_METHOD(Class, isAnonymousClass, "!()Z"),
   NATIVE_METHOD(Class, isDeclaredAnnotationPresent, "!(Ljava/lang/Class;)Z"),
   NATIVE_METHOD(Class, newInstance, "!()Ljava/lang/Object;"),
diff --git a/runtime/native/java_lang_DexCache.cc b/runtime/native/java_lang_DexCache.cc
index 994ccb1..f0140a3 100644
--- a/runtime/native/java_lang_DexCache.cc
+++ b/runtime/native/java_lang_DexCache.cc
@@ -59,7 +59,7 @@
 static jobject DexCache_getResolvedString(JNIEnv* env, jobject javaDexCache, jint string_index) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache);
-  CHECK_LT(static_cast<size_t>(string_index), dex_cache->NumStrings());
+  CHECK_LT(static_cast<size_t>(string_index), dex_cache->GetDexFile()->NumStringIds());
   return soa.AddLocalReference<jobject>(dex_cache->GetResolvedString(string_index));
 }
 
@@ -75,7 +75,7 @@
                                        jobject string) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache);
-  CHECK_LT(static_cast<size_t>(string_index), dex_cache->NumStrings());
+  CHECK_LT(static_cast<size_t>(string_index), dex_cache->GetDexFile()->NumStringIds());
   dex_cache->SetResolvedString(string_index, soa.Decode<mirror::String*>(string));
 }
 
diff --git a/runtime/native/java_lang_Object.cc b/runtime/native/java_lang_Object.cc
index 49cacdf..2a36059 100644
--- a/runtime/native/java_lang_Object.cc
+++ b/runtime/native/java_lang_Object.cc
@@ -20,10 +20,6 @@
 #include "mirror/object-inl.h"
 #include "scoped_fast_native_object_access.h"
 
-// TODO: better support for overloading.
-#undef NATIVE_METHOD
-#define NATIVE_METHOD(className, functionName, signature, identifier) \
-    { #functionName, signature, reinterpret_cast<void*>(className ## _ ## identifier) }
 
 namespace art {
 
@@ -58,11 +54,11 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Object, internalClone, "!()Ljava/lang/Object;", internalClone),
-  NATIVE_METHOD(Object, notify, "!()V", notify),
-  NATIVE_METHOD(Object, notifyAll, "!()V", notifyAll),
-  NATIVE_METHOD(Object, wait, "!()V", wait),
-  NATIVE_METHOD(Object, wait, "!(JI)V", waitJI),
+  NATIVE_METHOD(Object, internalClone, "!()Ljava/lang/Object;"),
+  NATIVE_METHOD(Object, notify, "!()V"),
+  NATIVE_METHOD(Object, notifyAll, "!()V"),
+  OVERLOADED_NATIVE_METHOD(Object, wait, "!()V", wait),
+  OVERLOADED_NATIVE_METHOD(Object, wait, "!(JI)V", waitJI),
 };
 
 void register_java_lang_Object(JNIEnv* env) {
diff --git a/runtime/native/java_lang_Runtime.cc b/runtime/native/java_lang_Runtime.cc
deleted file mode 100644
index 856a3e7..0000000
--- a/runtime/native/java_lang_Runtime.cc
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "java_lang_Runtime.h"
-
-#include <dlfcn.h>
-#include <limits.h>
-#include <unistd.h>
-
-#include "base/macros.h"
-#include "gc/heap.h"
-#include "handle_scope-inl.h"
-#include "jni_internal.h"
-#include "mirror/class_loader.h"
-#include "runtime.h"
-#include "scoped_thread_state_change.h"
-#include "ScopedUtfChars.h"
-#include "verify_object-inl.h"
-
-#include <sstream>
-#ifdef __ANDROID__
-// This function is provided by android linker.
-extern "C" void android_update_LD_LIBRARY_PATH(const char* ld_library_path);
-#endif  // __ANDROID__
-
-namespace art {
-
-static void Runtime_gc(JNIEnv*, jclass) {
-  if (Runtime::Current()->IsExplicitGcDisabled()) {
-      LOG(INFO) << "Explicit GC skipped.";
-      return;
-  }
-  Runtime::Current()->GetHeap()->CollectGarbage(false);
-}
-
-NO_RETURN static void Runtime_nativeExit(JNIEnv*, jclass, jint status) {
-  LOG(INFO) << "System.exit called, status: " << status;
-  Runtime::Current()->CallExitHook(status);
-  exit(status);
-}
-
-static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPathJstr) {
-#ifdef __ANDROID__
-  if (javaLdLibraryPathJstr != nullptr) {
-    ScopedUtfChars ldLibraryPath(env, javaLdLibraryPathJstr);
-    if (ldLibraryPath.c_str() != nullptr) {
-      android_update_LD_LIBRARY_PATH(ldLibraryPath.c_str());
-    }
-  }
-
-#else
-  LOG(WARNING) << "android_update_LD_LIBRARY_PATH not found; .so dependencies will not work!";
-  UNUSED(javaLdLibraryPathJstr, env);
-#endif
-}
-
-static jstring Runtime_nativeLoad(JNIEnv* env, jclass, jstring javaFilename, jobject javaLoader,
-                                  jstring javaLdLibraryPathJstr) {
-  ScopedUtfChars filename(env, javaFilename);
-  if (filename.c_str() == nullptr) {
-    return nullptr;
-  }
-
-  SetLdLibraryPath(env, javaLdLibraryPathJstr);
-
-  std::string error_msg;
-  {
-    JavaVMExt* vm = Runtime::Current()->GetJavaVM();
-    bool success = vm->LoadNativeLibrary(env, filename.c_str(), javaLoader, &error_msg);
-    if (success) {
-      return nullptr;
-    }
-  }
-
-  // Don't let a pending exception from JNI_OnLoad cause a CheckJNI issue with NewStringUTF.
-  env->ExceptionClear();
-  return env->NewStringUTF(error_msg.c_str());
-}
-
-static jlong Runtime_maxMemory(JNIEnv*, jclass) {
-  return Runtime::Current()->GetHeap()->GetMaxMemory();
-}
-
-static jlong Runtime_totalMemory(JNIEnv*, jclass) {
-  return Runtime::Current()->GetHeap()->GetTotalMemory();
-}
-
-static jlong Runtime_freeMemory(JNIEnv*, jclass) {
-  return Runtime::Current()->GetHeap()->GetFreeMemory();
-}
-
-static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Runtime, freeMemory, "!()J"),
-  NATIVE_METHOD(Runtime, gc, "()V"),
-  NATIVE_METHOD(Runtime, maxMemory, "!()J"),
-  NATIVE_METHOD(Runtime, nativeExit, "(I)V"),
-  NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;"),
-  NATIVE_METHOD(Runtime, totalMemory, "!()J"),
-};
-
-void register_java_lang_Runtime(JNIEnv* env) {
-  REGISTER_NATIVE_METHODS("java/lang/Runtime");
-}
-
-}  // namespace art
diff --git a/runtime/native/java_lang_Runtime.h b/runtime/native/java_lang_Runtime.h
deleted file mode 100644
index ceda06b..0000000
--- a/runtime/native/java_lang_Runtime.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_NATIVE_JAVA_LANG_RUNTIME_H_
-#define ART_RUNTIME_NATIVE_JAVA_LANG_RUNTIME_H_
-
-#include <jni.h>
-
-namespace art {
-
-void register_java_lang_Runtime(JNIEnv* env);
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_NATIVE_JAVA_LANG_RUNTIME_H_
diff --git a/runtime/native/java_lang_StringFactory.cc b/runtime/native/java_lang_StringFactory.cc
index 34d6a37..5a219ef 100644
--- a/runtime/native/java_lang_StringFactory.cc
+++ b/runtime/native/java_lang_StringFactory.cc
@@ -50,8 +50,10 @@
   return soa.AddLocalReference<jstring>(result);
 }
 
+// The char array passed as `java_data` must not be a null reference.
 static jstring StringFactory_newStringFromChars(JNIEnv* env, jclass, jint offset,
                                                 jint char_count, jcharArray java_data) {
+  DCHECK(java_data != nullptr);
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::CharArray> char_array(hs.NewHandle(soa.Decode<mirror::CharArray*>(java_data)));
diff --git a/runtime/native/java_lang_System.cc b/runtime/native/java_lang_System.cc
index d9863c5..9e2d68d 100644
--- a/runtime/native/java_lang_System.cc
+++ b/runtime/native/java_lang_System.cc
@@ -149,7 +149,9 @@
     dstObjArray->AssignableMemcpy(dstPos, srcObjArray, srcPos, count);
     return;
   }
-  dstObjArray->AssignableCheckingMemcpy(dstPos, srcObjArray, srcPos, count, true);
+  // This code is never run under a transaction.
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  dstObjArray->AssignableCheckingMemcpy<false>(dstPos, srcObjArray, srcPos, count, true);
 }
 
 // Template to convert general array to that of its specific primitive type.
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index c75ff78..a742e81 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -47,6 +47,15 @@
 
 static void Thread_nativeCreate(JNIEnv* env, jclass, jobject java_thread, jlong stack_size,
                                 jboolean daemon) {
+  // There are sections in the zygote that forbid thread creation.
+  Runtime* runtime = Runtime::Current();
+  if (runtime->IsZygote() && runtime->IsZygoteNoThreadSection()) {
+    jclass internal_error = env->FindClass("java/lang/InternalError");
+    CHECK(internal_error != nullptr);
+    env->ThrowNew(internal_error, "Cannot create threads in zygote");
+    return;
+  }
+
   Thread::CreateNativeThread(env, java_thread, stack_size, daemon == JNI_TRUE);
 }
 
@@ -89,7 +98,7 @@
     case kWaitingInMainSignalCatcherLoop: return kJavaWaiting;
     case kWaitingForMethodTracingStart:   return kJavaWaiting;
     case kWaitingForVisitObjects:         return kJavaWaiting;
-    case kWaitingWeakGcRootRead:          return kJavaWaiting;
+    case kWaitingWeakGcRootRead:          return kJavaRunnable;
     case kWaitingForGcThreadFlip:         return kJavaWaiting;
     case kSuspended:                      return kJavaRunnable;
     // Don't add a 'default' here so the compiler can spot incompatible enum changes.
diff --git a/runtime/native/java_lang_VMClassLoader.cc b/runtime/native/java_lang_VMClassLoader.cc
index 1515630..6f735aa 100644
--- a/runtime/native/java_lang_VMClassLoader.cc
+++ b/runtime/native/java_lang_VMClassLoader.cc
@@ -41,6 +41,23 @@
   if (c != nullptr && c->IsResolved()) {
     return soa.AddLocalReference<jclass>(c);
   }
+  // If class is erroneous, throw the earlier failure, wrapped in certain cases. See b/28787733.
+  if (c != nullptr && c->IsErroneous()) {
+    cl->ThrowEarlierClassFailure(c);
+    Thread* self = soa.Self();
+    mirror::Class* eiie_class =
+        self->DecodeJObject(WellKnownClasses::java_lang_ExceptionInInitializerError)->AsClass();
+    mirror::Class* iae_class =
+        self->DecodeJObject(WellKnownClasses::java_lang_IllegalAccessError)->AsClass();
+    mirror::Class* ncdfe_class =
+        self->DecodeJObject(WellKnownClasses::java_lang_NoClassDefFoundError)->AsClass();
+    mirror::Class* exception = self->GetException()->GetClass();
+    if (exception == eiie_class || exception == iae_class || exception == ncdfe_class) {
+      self->ThrowNewWrappedException("Ljava/lang/ClassNotFoundException;",
+                                     PrettyDescriptor(c).c_str());
+    }
+    return nullptr;
+  }
   if (loader != nullptr) {
     // Try the common case.
     StackHandleScope<1> hs(soa.Self());
diff --git a/runtime/native/java_lang_reflect_AbstractMethod.cc b/runtime/native/java_lang_reflect_AbstractMethod.cc
new file mode 100644
index 0000000..7e11c11
--- /dev/null
+++ b/runtime/native/java_lang_reflect_AbstractMethod.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "java_lang_reflect_AbstractMethod.h"
+
+#include "art_method-inl.h"
+#include "jni_internal.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/object_array-inl.h"
+#include "reflection.h"
+#include "scoped_fast_native_object_access.h"
+#include "well_known_classes.h"
+
+namespace art {
+
+static jobjectArray AbstractMethod_getDeclaredAnnotations(JNIEnv* env, jobject javaMethod) {
+  ScopedFastNativeObjectAccess soa(env);
+  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  if (method->GetDeclaringClass()->IsProxyClass()) {
+    // Return an empty array instead of a null pointer.
+    mirror::Class* annotation_array_class =
+        soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_annotation_Annotation__array);
+    mirror::ObjectArray<mirror::Object>* empty_array =
+        mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), annotation_array_class, 0);
+    return soa.AddLocalReference<jobjectArray>(empty_array);
+  }
+  return soa.AddLocalReference<jobjectArray>(method->GetDexFile()->GetAnnotationsForMethod(method));
+}
+
+static jobjectArray AbstractMethod_getSignatureAnnotation(JNIEnv* env, jobject javaMethod) {
+  ScopedFastNativeObjectAccess soa(env);
+  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  if (method->GetDeclaringClass()->IsProxyClass()) {
+    return nullptr;
+  }
+  StackHandleScope<1> hs(soa.Self());
+  return soa.AddLocalReference<jobjectArray>(
+      method->GetDexFile()->GetSignatureAnnotationForMethod(method));
+}
+
+
+static jboolean AbstractMethod_isAnnotationPresentNative(JNIEnv* env, jobject javaMethod,
+                                                         jclass annotationType) {
+  ScopedFastNativeObjectAccess soa(env);
+  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  if (method->GetDeclaringClass()->IsProxyClass()) {
+    return false;
+  }
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
+  return method->GetDexFile()->IsMethodAnnotationPresent(method, klass);
+}
+
+static JNINativeMethod gMethods[] = {
+  NATIVE_METHOD(AbstractMethod, getDeclaredAnnotations, "!()[Ljava/lang/annotation/Annotation;"),
+  NATIVE_METHOD(AbstractMethod, getSignatureAnnotation, "!()[Ljava/lang/String;"),
+  NATIVE_METHOD(AbstractMethod, isAnnotationPresentNative, "!(Ljava/lang/Class;)Z"),
+};
+
+void register_java_lang_reflect_AbstractMethod(JNIEnv* env) {
+  REGISTER_NATIVE_METHODS("java/lang/reflect/AbstractMethod");
+}
+
+}  // namespace art
diff --git a/runtime/native/java_lang_reflect_AbstractMethod.h b/runtime/native/java_lang_reflect_AbstractMethod.h
new file mode 100644
index 0000000..222e5a0
--- /dev/null
+++ b/runtime/native/java_lang_reflect_AbstractMethod.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_NATIVE_JAVA_LANG_REFLECT_ABSTRACTMETHOD_H_
+#define ART_RUNTIME_NATIVE_JAVA_LANG_REFLECT_ABSTRACTMETHOD_H_
+
+#include <jni.h>
+
+namespace art {
+
+void register_java_lang_reflect_AbstractMethod(JNIEnv* env);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_NATIVE_JAVA_LANG_REFLECT_ABSTRACTMETHOD_H_
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index 45b9484..dd46233 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -17,6 +17,7 @@
 #include "java_lang_reflect_Constructor.h"
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
 #include "jni_internal.h"
@@ -34,20 +35,38 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
-  return soa.AddLocalReference<jobject>(
-      method->GetDexFile()->GetAnnotationForMethod(method, klass));
+  if (method->IsProxyMethod()) {
+    return nullptr;
+  } else {
+    Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
+    return soa.AddLocalReference<jobject>(
+        method->GetDexFile()->GetAnnotationForMethod(method, klass));
+  }
 }
 
 static jobjectArray Constructor_getDeclaredAnnotations(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  return soa.AddLocalReference<jobjectArray>(method->GetDexFile()->GetAnnotationsForMethod(method));
+  if (method->IsProxyMethod()) {
+    mirror::Class* class_class = mirror::Class::GetJavaLangClass();
+    mirror::Class* class_array_class =
+        Runtime::Current()->GetClassLinker()->FindArrayClass(soa.Self(), &class_class);
+    if (class_array_class == nullptr) {
+      return nullptr;
+    }
+    mirror::ObjectArray<mirror::Class>* empty_array =
+        mirror::ObjectArray<mirror::Class>::Alloc(soa.Self(), class_array_class, 0);
+    return soa.AddLocalReference<jobjectArray>(empty_array);
+  } else {
+    return soa.AddLocalReference<jobjectArray>(
+        method->GetDexFile()->GetAnnotationsForMethod(method));
+  }
 }
 
 static jobjectArray Constructor_getExceptionTypes(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
-  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod)
+      ->GetInterfaceMethodIfProxy(kRuntimePointerSize);
   mirror::ObjectArray<mirror::Class>* result_array =
       method->GetDexFile()->GetExceptionTypesForMethod(method);
   if (result_array == nullptr) {
@@ -69,7 +88,12 @@
 static jobjectArray Constructor_getParameterAnnotationsNative(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  return soa.AddLocalReference<jobjectArray>(method->GetDexFile()->GetParameterAnnotations(method));
+  if (method->IsProxyMethod()) {
+    return nullptr;
+  } else {
+    return soa.AddLocalReference<jobjectArray>(
+        method->GetDexFile()->GetParameterAnnotations(method));
+  }
 }
 
 static jboolean Constructor_isAnnotationPresentNative(JNIEnv* env, jobject javaMethod,
@@ -77,6 +101,10 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  if (method->IsProxyMethod()) {
+    // Proxies have no annotations.
+    return false;
+  }
   Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
   return method->GetDexFile()->IsMethodAnnotationPresent(method, klass);
 }
@@ -86,7 +114,7 @@
  * with an interface, array, or primitive class. If this is coming from
  * native, it is OK to avoid access checks since JNI does not enforce them.
  */
-static jobject Constructor_newInstance(JNIEnv* env, jobject javaMethod, jobjectArray javaArgs) {
+static jobject Constructor_newInstance0(JNIEnv* env, jobject javaMethod, jobjectArray javaArgs) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Constructor* m = soa.Decode<mirror::Constructor*>(javaMethod);
   StackHandleScope<1> hs(soa.Self());
@@ -99,7 +127,9 @@
   }
   // Verify that we can access the class.
   if (!m->IsAccessible() && !c->IsPublic()) {
-    auto* caller = GetCallingClass(soa.Self(), 1);
+    // Go 2 frames back, this method is always called from newInstance0, which is called from
+    // Constructor.newInstance(Object... args).
+    auto* caller = GetCallingClass(soa.Self(), 2);
     // If caller is null, then we called from JNI, just avoid the check since JNI avoids most
     // access checks anyways. TODO: Investigate if this the correct behavior.
     if (caller != nullptr && !caller->CanAccess(c.Get())) {
@@ -127,7 +157,7 @@
 
   // String constructor is replaced by a StringFactory method in InvokeMethod.
   if (c->IsStringClass()) {
-    return InvokeMethod(soa, javaMethod, nullptr, javaArgs, 1);
+    return InvokeMethod(soa, javaMethod, nullptr, javaArgs, 2);
   }
 
   mirror::Object* receiver =
@@ -136,11 +166,18 @@
     return nullptr;
   }
   jobject javaReceiver = soa.AddLocalReference<jobject>(receiver);
-  InvokeMethod(soa, javaMethod, javaReceiver, javaArgs, 1);
+  InvokeMethod(soa, javaMethod, javaReceiver, javaArgs, 2);
   // Constructors are ()V methods, so we shouldn't touch the result of InvokeMethod.
   return javaReceiver;
 }
 
+static jobject Constructor_newInstanceFromSerialization(JNIEnv* env, jclass unused ATTRIBUTE_UNUSED,
+                                                        jclass ctorClass, jclass allocClass) {
+    jmethodID ctor = env->GetMethodID(ctorClass, "<init>", "()V");
+    DCHECK(ctor != NULL);
+    return env->NewObject(allocClass, ctor);
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Constructor, getAnnotationNative,
                 "!(Ljava/lang/Class;)Ljava/lang/annotation/Annotation;"),
@@ -149,7 +186,8 @@
   NATIVE_METHOD(Constructor, getParameterAnnotationsNative,
                 "!()[[Ljava/lang/annotation/Annotation;"),
   NATIVE_METHOD(Constructor, isAnnotationPresentNative, "!(Ljava/lang/Class;)Z"),
-  NATIVE_METHOD(Constructor, newInstance, "!([Ljava/lang/Object;)Ljava/lang/Object;"),
+  NATIVE_METHOD(Constructor, newInstance0, "!([Ljava/lang/Object;)Ljava/lang/Object;"),
+  NATIVE_METHOD(Constructor, newInstanceFromSerialization, "!(Ljava/lang/Class;Ljava/lang/Class;)Ljava/lang/Object;"),
 };
 
 void register_java_lang_reflect_Constructor(JNIEnv* env) {
diff --git a/runtime/native/java_lang_reflect_Method.cc b/runtime/native/java_lang_reflect_Method.cc
index caacba6..c3f2a27 100644
--- a/runtime/native/java_lang_reflect_Method.cc
+++ b/runtime/native/java_lang_reflect_Method.cc
@@ -17,6 +17,7 @@
 #include "java_lang_reflect_Method.h"
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
 #include "jni_internal.h"
@@ -41,20 +42,6 @@
       method->GetDexFile()->GetAnnotationForMethod(method, klass));
 }
 
-static jobjectArray Method_getDeclaredAnnotations(JNIEnv* env, jobject javaMethod) {
-  ScopedFastNativeObjectAccess soa(env);
-  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  if (method->GetDeclaringClass()->IsProxyClass()) {
-    // Return an empty array instead of a null pointer.
-    mirror::Class* annotation_array_class =
-        soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_annotation_Annotation__array);
-    mirror::ObjectArray<mirror::Object>* empty_array =
-        mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), annotation_array_class, 0);
-    return soa.AddLocalReference<jobjectArray>(empty_array);
-  }
-  return soa.AddLocalReference<jobjectArray>(method->GetDexFile()->GetAnnotationsForMethod(method));
-}
-
 static jobject Method_getDefaultValue(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
@@ -71,7 +58,7 @@
     mirror::Class* klass = method->GetDeclaringClass();
     int throws_index = -1;
     size_t i = 0;
-    for (const auto& m : klass->GetVirtualMethods(sizeof(void*))) {
+    for (const auto& m : klass->GetDeclaredVirtualMethods(kRuntimePointerSize)) {
       if (&m == method) {
         throws_index = i;
         break;
@@ -116,27 +103,13 @@
   return InvokeMethod(soa, javaMethod, javaReceiver, javaArgs);
 }
 
-static jboolean Method_isAnnotationPresentNative(JNIEnv* env, jobject javaMethod,
-                                                 jclass annotationType) {
-  ScopedFastNativeObjectAccess soa(env);
-  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  if (method->GetDeclaringClass()->IsProxyClass()) {
-    return false;
-  }
-  StackHandleScope<1> hs(soa.Self());
-  Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
-  return method->GetDexFile()->IsMethodAnnotationPresent(method, klass);
-}
-
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Method, getAnnotationNative,
                 "!(Ljava/lang/Class;)Ljava/lang/annotation/Annotation;"),
-  NATIVE_METHOD(Method, getDeclaredAnnotations, "!()[Ljava/lang/annotation/Annotation;"),
   NATIVE_METHOD(Method, getDefaultValue, "!()Ljava/lang/Object;"),
   NATIVE_METHOD(Method, getExceptionTypes, "!()[Ljava/lang/Class;"),
   NATIVE_METHOD(Method, getParameterAnnotationsNative, "!()[[Ljava/lang/annotation/Annotation;"),
   NATIVE_METHOD(Method, invoke, "!(Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object;"),
-  NATIVE_METHOD(Method, isAnnotationPresentNative, "!(Ljava/lang/Class;)Z"),
 };
 
 void register_java_lang_reflect_Method(JNIEnv* env) {
diff --git a/runtime/native/java_util_concurrent_atomic_AtomicLong.cc b/runtime/native/java_util_concurrent_atomic_AtomicLong.cc
index 04f0ba0..4d2ea67 100644
--- a/runtime/native/java_util_concurrent_atomic_AtomicLong.cc
+++ b/runtime/native/java_util_concurrent_atomic_AtomicLong.cc
@@ -16,13 +16,14 @@
 
 #include "java_util_concurrent_atomic_AtomicLong.h"
 
+#include "arch/instruction_set.h"
 #include "atomic.h"
 #include "jni_internal.h"
 
 namespace art {
 
 static jboolean AtomicLong_VMSupportsCS8(JNIEnv*, jclass) {
-  return QuasiAtomic::LongAtomicsUseMutexes() ? JNI_FALSE : JNI_TRUE;
+  return QuasiAtomic::LongAtomicsUseMutexes(kRuntimeISA) ? JNI_FALSE : JNI_TRUE;
 }
 
 static JNINativeMethod gMethods[] = {
diff --git a/runtime/native/libcore_util_CharsetUtils.cc b/runtime/native/libcore_util_CharsetUtils.cc
index 1216824..64d56f6 100644
--- a/runtime/native/libcore_util_CharsetUtils.cc
+++ b/runtime/native/libcore_util_CharsetUtils.cc
@@ -165,10 +165,9 @@
     return nullptr;
   }
 
-  const jchar* src = &(string->GetValue()[offset]);
   jbyte* dst = &bytes[0];
-  for (int i = length - 1; i >= 0; --i) {
-    jchar ch = *src++;
+  for (int i = 0; i < length; ++i) {
+    jchar ch = string->CharAt(offset + i);
     if (ch > maxValidChar) {
       ch = '?';
     }
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index 770644c..858849f 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -15,7 +15,7 @@
  */
 
 #include "sun_misc_Unsafe.h"
-
+#include "common_throws.h"
 #include "gc/accounting/card_table-inl.h"
 #include "jni_internal.h"
 #include "mirror/array.h"
@@ -23,6 +23,11 @@
 #include "mirror/object-inl.h"
 #include "scoped_fast_native_object_access.h"
 
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <atomic>
+
 namespace art {
 
 static jboolean Unsafe_compareAndSwapInt(JNIEnv* env, jobject, jobject javaObj, jlong offset,
@@ -52,6 +57,17 @@
   mirror::Object* expectedValue = soa.Decode<mirror::Object*>(javaExpectedValue);
   mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue);
   // JNI must use non transactional mode.
+  if (kUseReadBarrier) {
+    // Need to make sure the reference stored in the field is a to-space one before attempting the
+    // CAS or the CAS could fail incorrectly.
+    mirror::HeapReference<mirror::Object>* field_addr =
+        reinterpret_cast<mirror::HeapReference<mirror::Object>*>(
+            reinterpret_cast<uint8_t*>(obj) + static_cast<size_t>(offset));
+    ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /*kAlwaysUpdateField*/true>(
+        obj,
+        MemberOffset(offset),
+        field_addr);
+  }
   bool success = obj->CasFieldStrongSequentiallyConsistentObject<false>(MemberOffset(offset),
                                                                         expectedValue, newValue);
   return success ? JNI_TRUE : JNI_FALSE;
@@ -185,6 +201,291 @@
   return Primitive::ComponentSize(primitive_type);
 }
 
+static jint Unsafe_addressSize(JNIEnv* env ATTRIBUTE_UNUSED, jobject ob ATTRIBUTE_UNUSED) {
+  return sizeof(void*);
+}
+
+static jint Unsafe_pageSize(JNIEnv* env ATTRIBUTE_UNUSED, jobject ob ATTRIBUTE_UNUSED) {
+  return sysconf(_SC_PAGESIZE);
+}
+
+static jlong Unsafe_allocateMemory(JNIEnv* env, jobject, jlong bytes) {
+  ScopedFastNativeObjectAccess soa(env);
+  // bytes is nonnegative and fits into size_t
+  if (bytes < 0 || bytes != (jlong)(size_t) bytes) {
+    ThrowIllegalAccessException("wrong number of bytes");
+    return 0;
+  }
+  void* mem = malloc(bytes);
+  if (mem == nullptr) {
+    soa.Self()->ThrowOutOfMemoryError("native alloc");
+    return 0;
+  }
+  return (uintptr_t) mem;
+}
+
+static void Unsafe_freeMemory(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  free(reinterpret_cast<void*>(static_cast<uintptr_t>(address)));
+}
+
+static void Unsafe_setMemory(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jlong bytes, jbyte value) {
+  memset(reinterpret_cast<void*>(static_cast<uintptr_t>(address)), value, bytes);
+}
+
+static jbyte Unsafe_getByteJ(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jbyte*>(address);
+}
+
+static void Unsafe_putByteJB(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jbyte value) {
+  *reinterpret_cast<jbyte*>(address) = value;
+}
+
+static jshort Unsafe_getShortJ(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jshort*>(address);
+}
+
+static void Unsafe_putShortJS(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jshort value) {
+  *reinterpret_cast<jshort*>(address) = value;
+}
+
+static jchar Unsafe_getCharJ(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jchar*>(address);
+}
+
+static void Unsafe_putCharJC(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jchar value) {
+  *reinterpret_cast<jchar*>(address) = value;
+}
+
+static jint Unsafe_getIntJ(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jint*>(address);
+}
+
+static void Unsafe_putIntJI(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jint value) {
+  *reinterpret_cast<jint*>(address) = value;
+}
+
+static jlong Unsafe_getLongJ(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jlong*>(address);
+}
+
+static void Unsafe_putLongJJ(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jlong value) {
+  *reinterpret_cast<jlong*>(address) = value;
+}
+
+static jfloat Unsafe_getFloatJ(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jfloat*>(address);
+}
+
+static void Unsafe_putFloatJF(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jfloat value) {
+  *reinterpret_cast<jfloat*>(address) = value;
+}
+static jdouble Unsafe_getDoubleJ(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jdouble*>(address);
+}
+
+static void Unsafe_putDoubleJD(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jdouble value) {
+  *reinterpret_cast<jdouble*>(address) = value;
+}
+
+static void Unsafe_copyMemory(JNIEnv *env, jobject unsafe ATTRIBUTE_UNUSED, jlong src,
+                              jlong dst, jlong size) {
+    if (size == 0) {
+        return;
+    }
+    // size is nonnegative and fits into size_t
+    if (size < 0 || size != (jlong)(size_t) size) {
+        ScopedFastNativeObjectAccess soa(env);
+        ThrowIllegalAccessException("wrong number of bytes");
+    }
+    size_t sz = (size_t)size;
+    memcpy(reinterpret_cast<void *>(dst), reinterpret_cast<void *>(src), sz);
+}
+
+template<typename T>
+static void copyToArray(jlong srcAddr, mirror::PrimitiveArray<T>* array,
+                        size_t array_offset,
+                        size_t size)
+        SHARED_REQUIRES(Locks::mutator_lock_) {
+    const T* src = reinterpret_cast<T*>(srcAddr);
+    size_t sz = size / sizeof(T);
+    size_t of = array_offset / sizeof(T);
+    for (size_t i = 0; i < sz; ++i) {
+        array->Set(i + of, *(src + i));
+    }
+}
+
+template<typename T>
+static void copyFromArray(jlong dstAddr, mirror::PrimitiveArray<T>* array,
+                          size_t array_offset,
+                          size_t size)
+        SHARED_REQUIRES(Locks::mutator_lock_) {
+    T* dst = reinterpret_cast<T*>(dstAddr);
+    size_t sz = size / sizeof(T);
+    size_t of = array_offset / sizeof(T);
+    for (size_t i = 0; i < sz; ++i) {
+        *(dst + i) = array->Get(i + of);
+    }
+}
+
+static void Unsafe_copyMemoryToPrimitiveArray(JNIEnv *env,
+                                              jobject unsafe ATTRIBUTE_UNUSED,
+                                              jlong srcAddr,
+                                              jobject dstObj,
+                                              jlong dstOffset,
+                                              jlong size) {
+    ScopedObjectAccess soa(env);
+    if (size == 0) {
+        return;
+    }
+    // size is nonnegative and fits into size_t
+    if (size < 0 || size != (jlong)(size_t) size) {
+        ThrowIllegalAccessException("wrong number of bytes");
+    }
+    size_t sz = (size_t)size;
+    size_t dst_offset = (size_t)dstOffset;
+    mirror::Object* dst = soa.Decode<mirror::Object*>(dstObj);
+    mirror::Class* component_type = dst->GetClass()->GetComponentType();
+    if (component_type->IsPrimitiveByte() || component_type->IsPrimitiveBoolean()) {
+        copyToArray(srcAddr, dst->AsByteSizedArray(), dst_offset, sz);
+    } else if (component_type->IsPrimitiveShort() || component_type->IsPrimitiveChar()) {
+        copyToArray(srcAddr, dst->AsShortSizedArray(), dst_offset, sz);
+    } else if (component_type->IsPrimitiveInt() || component_type->IsPrimitiveFloat()) {
+        copyToArray(srcAddr, dst->AsIntArray(), dst_offset, sz);
+    } else if (component_type->IsPrimitiveLong() || component_type->IsPrimitiveDouble()) {
+        copyToArray(srcAddr, dst->AsLongArray(), dst_offset, sz);
+    } else {
+        ThrowIllegalAccessException("not a primitive array");
+    }
+}
+
+static void Unsafe_copyMemoryFromPrimitiveArray(JNIEnv *env,
+                                                jobject unsafe ATTRIBUTE_UNUSED,
+                                                jobject srcObj,
+                                                jlong srcOffset,
+                                                jlong dstAddr,
+                                                jlong size) {
+    ScopedObjectAccess soa(env);
+    if (size == 0) {
+        return;
+    }
+    // size is nonnegative and fits into size_t
+    if (size < 0 || size != (jlong)(size_t) size) {
+        ThrowIllegalAccessException("wrong number of bytes");
+    }
+    size_t sz = (size_t)size;
+    size_t src_offset = (size_t)srcOffset;
+    mirror::Object* src = soa.Decode<mirror::Object*>(srcObj);
+    mirror::Class* component_type = src->GetClass()->GetComponentType();
+    if (component_type->IsPrimitiveByte() || component_type->IsPrimitiveBoolean()) {
+        copyFromArray(dstAddr, src->AsByteSizedArray(), src_offset, sz);
+    } else if (component_type->IsPrimitiveShort() || component_type->IsPrimitiveChar()) {
+        copyFromArray(dstAddr, src->AsShortSizedArray(), src_offset, sz);
+    } else if (component_type->IsPrimitiveInt() || component_type->IsPrimitiveFloat()) {
+        copyFromArray(dstAddr, src->AsIntArray(), src_offset, sz);
+    } else if (component_type->IsPrimitiveLong() || component_type->IsPrimitiveDouble()) {
+        copyFromArray(dstAddr, src->AsLongArray(), src_offset, sz);
+    } else {
+        ThrowIllegalAccessException("not a primitive array");
+    }
+}
+static jboolean Unsafe_getBoolean(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    return obj->GetFieldBoolean(MemberOffset(offset));
+}
+
+static void Unsafe_putBoolean(JNIEnv* env, jobject, jobject javaObj, jlong offset, jboolean newValue) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    // JNI must use non transactional mode (SetField8 is non-transactional).
+    obj->SetFieldBoolean<false>(MemberOffset(offset), newValue);
+}
+
+static jbyte Unsafe_getByte(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    return obj->GetFieldByte(MemberOffset(offset));
+}
+
+static void Unsafe_putByte(JNIEnv* env, jobject, jobject javaObj, jlong offset, jbyte newValue) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    // JNI must use non transactional mode.
+    obj->SetFieldByte<false>(MemberOffset(offset), newValue);
+}
+
+static jchar Unsafe_getChar(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    return obj->GetFieldChar(MemberOffset(offset));
+}
+
+static void Unsafe_putChar(JNIEnv* env, jobject, jobject javaObj, jlong offset, jchar newValue) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    // JNI must use non transactional mode.
+    obj->SetFieldChar<false>(MemberOffset(offset), newValue);
+}
+
+static jshort Unsafe_getShort(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    return obj->GetFieldShort(MemberOffset(offset));
+}
+
+static void Unsafe_putShort(JNIEnv* env, jobject, jobject javaObj, jlong offset, jshort newValue) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    // JNI must use non transactional mode.
+    obj->SetFieldShort<false>(MemberOffset(offset), newValue);
+}
+
+static jfloat Unsafe_getFloat(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+  union {int32_t val; jfloat converted;} conv;
+  conv.val = obj->GetField32(MemberOffset(offset));
+  return conv.converted;
+}
+
+static void Unsafe_putFloat(JNIEnv* env, jobject, jobject javaObj, jlong offset, jfloat newValue) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+  union {int32_t converted; jfloat val;} conv;
+  conv.val = newValue;
+  // JNI must use non transactional mode.
+  obj->SetField32<false>(MemberOffset(offset), conv.converted);
+}
+
+static jdouble Unsafe_getDouble(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+  union {int64_t val; jdouble converted;} conv;
+  conv.val = obj->GetField64(MemberOffset(offset));
+  return conv.converted;
+}
+
+static void Unsafe_putDouble(JNIEnv* env, jobject, jobject javaObj, jlong offset, jdouble newValue) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+  union {int64_t converted; jdouble val;} conv;
+  conv.val = newValue;
+  // JNI must use non transactional mode.
+  obj->SetField64<false>(MemberOffset(offset), conv.converted);
+}
+
+static void Unsafe_loadFence(JNIEnv*, jobject) {
+  std::atomic_thread_fence(std::memory_order_acquire);
+}
+
+static void Unsafe_storeFence(JNIEnv*, jobject) {
+  std::atomic_thread_fence(std::memory_order_release);
+}
+
+static void Unsafe_fullFence(JNIEnv*, jobject) {
+  std::atomic_thread_fence(std::memory_order_seq_cst);
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Unsafe, compareAndSwapInt, "!(Ljava/lang/Object;JII)Z"),
   NATIVE_METHOD(Unsafe, compareAndSwapLong, "!(Ljava/lang/Object;JJJ)Z"),
@@ -206,6 +507,49 @@
   NATIVE_METHOD(Unsafe, putOrderedObject, "!(Ljava/lang/Object;JLjava/lang/Object;)V"),
   NATIVE_METHOD(Unsafe, getArrayBaseOffsetForComponentType, "!(Ljava/lang/Class;)I"),
   NATIVE_METHOD(Unsafe, getArrayIndexScaleForComponentType, "!(Ljava/lang/Class;)I"),
+  NATIVE_METHOD(Unsafe, addressSize, "!()I"),
+  NATIVE_METHOD(Unsafe, pageSize, "!()I"),
+  NATIVE_METHOD(Unsafe, allocateMemory, "!(J)J"),
+  NATIVE_METHOD(Unsafe, freeMemory, "!(J)V"),
+  NATIVE_METHOD(Unsafe, setMemory, "!(JJB)V"),
+  NATIVE_METHOD(Unsafe, copyMemory, "!(JJJ)V"),
+  NATIVE_METHOD(Unsafe, copyMemoryToPrimitiveArray, "!(JLjava/lang/Object;JJ)V"),
+  NATIVE_METHOD(Unsafe, copyMemoryFromPrimitiveArray, "!(Ljava/lang/Object;JJJ)V"),
+  NATIVE_METHOD(Unsafe, getBoolean, "!(Ljava/lang/Object;J)Z"),
+
+  NATIVE_METHOD(Unsafe, getByte, "!(Ljava/lang/Object;J)B"),
+  NATIVE_METHOD(Unsafe, getChar, "!(Ljava/lang/Object;J)C"),
+  NATIVE_METHOD(Unsafe, getShort, "!(Ljava/lang/Object;J)S"),
+  NATIVE_METHOD(Unsafe, getFloat, "!(Ljava/lang/Object;J)F"),
+  NATIVE_METHOD(Unsafe, getDouble, "!(Ljava/lang/Object;J)D"),
+  NATIVE_METHOD(Unsafe, putBoolean, "!(Ljava/lang/Object;JZ)V"),
+  NATIVE_METHOD(Unsafe, putByte, "!(Ljava/lang/Object;JB)V"),
+  NATIVE_METHOD(Unsafe, putChar, "!(Ljava/lang/Object;JC)V"),
+  NATIVE_METHOD(Unsafe, putShort, "!(Ljava/lang/Object;JS)V"),
+  NATIVE_METHOD(Unsafe, putFloat, "!(Ljava/lang/Object;JF)V"),
+  NATIVE_METHOD(Unsafe, putDouble, "!(Ljava/lang/Object;JD)V"),
+
+  // Each of the getFoo variants are overloaded with a call that operates
+  // directively on a native pointer.
+  OVERLOADED_NATIVE_METHOD(Unsafe, getByte, "!(J)B", getByteJ),
+  OVERLOADED_NATIVE_METHOD(Unsafe, getChar, "!(J)C", getCharJ),
+  OVERLOADED_NATIVE_METHOD(Unsafe, getShort, "!(J)S", getShortJ),
+  OVERLOADED_NATIVE_METHOD(Unsafe, getInt, "!(J)I", getIntJ),
+  OVERLOADED_NATIVE_METHOD(Unsafe, getLong, "!(J)J", getLongJ),
+  OVERLOADED_NATIVE_METHOD(Unsafe, getFloat, "!(J)F", getFloatJ),
+  OVERLOADED_NATIVE_METHOD(Unsafe, getDouble, "!(J)D", getDoubleJ),
+  OVERLOADED_NATIVE_METHOD(Unsafe, putByte, "!(JB)V", putByteJB),
+  OVERLOADED_NATIVE_METHOD(Unsafe, putChar, "!(JC)V", putCharJC),
+  OVERLOADED_NATIVE_METHOD(Unsafe, putShort, "!(JS)V", putShortJS),
+  OVERLOADED_NATIVE_METHOD(Unsafe, putInt, "!(JI)V", putIntJI),
+  OVERLOADED_NATIVE_METHOD(Unsafe, putLong, "!(JJ)V", putLongJJ),
+  OVERLOADED_NATIVE_METHOD(Unsafe, putFloat, "!(JF)V", putFloatJF),
+  OVERLOADED_NATIVE_METHOD(Unsafe, putDouble, "!(JD)V", putDoubleJD),
+
+  // CAS
+  NATIVE_METHOD(Unsafe, loadFence, "!()V"),
+  NATIVE_METHOD(Unsafe, storeFence, "!()V"),
+  NATIVE_METHOD(Unsafe, fullFence, "!()V"),
 };
 
 void register_sun_misc_Unsafe(JNIEnv* env) {
diff --git a/runtime/native_bridge_art_interface.cc b/runtime/native_bridge_art_interface.cc
index 46cc5aa..155c008 100644
--- a/runtime/native_bridge_art_interface.cc
+++ b/runtime/native_bridge_art_interface.cc
@@ -21,6 +21,7 @@
 #include "nativebridge/native_bridge.h"
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "dex_file-inl.h"
@@ -45,10 +46,7 @@
   mirror::Class* c = soa.Decode<mirror::Class*>(clazz);
 
   uint32_t native_method_count = 0;
-  for (auto& m : c->GetDirectMethods(sizeof(void*))) {
-    native_method_count += m.IsNative() ? 1u : 0u;
-  }
-  for (auto& m : c->GetVirtualMethods(sizeof(void*))) {
+  for (auto& m : c->GetMethods(kRuntimePointerSize)) {
     native_method_count += m.IsNative() ? 1u : 0u;
   }
   return native_method_count;
@@ -63,19 +61,7 @@
   mirror::Class* c = soa.Decode<mirror::Class*>(clazz);
 
   uint32_t count = 0;
-  for (auto& m : c->GetDirectMethods(sizeof(void*))) {
-    if (m.IsNative()) {
-      if (count < method_count) {
-        methods[count].name = m.GetName();
-        methods[count].signature = m.GetShorty();
-        methods[count].fnPtr = m.GetEntryPointFromJni();
-        count++;
-      } else {
-        LOG(WARNING) << "Output native method array too small. Skipping " << PrettyMethod(&m);
-      }
-    }
-  }
-  for (auto& m : c->GetVirtualMethods(sizeof(void*))) {
+  for (auto& m : c->GetMethods(kRuntimePointerSize)) {
     if (m.IsNative()) {
       if (count < method_count) {
         methods[count].name = m.GetName();
diff --git a/runtime/native_stack_dump.cc b/runtime/native_stack_dump.cc
new file mode 100644
index 0000000..c20c8b8
--- /dev/null
+++ b/runtime/native_stack_dump.cc
@@ -0,0 +1,422 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "native_stack_dump.h"
+
+#include <ostream>
+
+#include <stdio.h>
+
+#include "art_method.h"
+
+// For DumpNativeStack.
+#include <backtrace/Backtrace.h>
+#include <backtrace/BacktraceMap.h>
+
+#if defined(__linux__)
+
+#include <memory>
+#include <vector>
+
+#include <linux/unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include "arch/instruction_set.h"
+#include "base/memory_tool.h"
+#include "base/mutex.h"
+#include "base/stringprintf.h"
+#include "base/unix_file/fd_file.h"
+#include "oat_quick_method_header.h"
+#include "os.h"
+#include "thread-inl.h"
+#include "utils.h"
+
+#endif
+
+namespace art {
+
+#if defined(__linux__)
+
+static constexpr bool kUseAddr2line = !kIsTargetBuild;
+
+ALWAYS_INLINE
+static inline void WritePrefix(std::ostream& os, const char* prefix, bool odd) {
+  if (prefix != nullptr) {
+    os << prefix;
+  }
+  os << "  ";
+  if (!odd) {
+    os << " ";
+  }
+}
+
+// The state of an open pipe to addr2line. In "server" mode, addr2line takes input on stdin
+// and prints the result to stdout. This struct keeps the state of the open connection.
+struct Addr2linePipe {
+  Addr2linePipe(int in_fd, int out_fd, const std::string& file_name, pid_t pid)
+      : in(in_fd, false), out(out_fd, false), file(file_name), child_pid(pid), odd(true) {}
+
+  ~Addr2linePipe() {
+    kill(child_pid, SIGKILL);
+  }
+
+  File in;      // The file descriptor that is connected to the output of addr2line.
+  File out;     // The file descriptor that is connected to the input of addr2line.
+
+  const std::string file;     // The file addr2line is working on, so that we know when to close
+                              // and restart.
+  const pid_t child_pid;      // The pid of the child, which we should kill when we're done.
+  bool odd;                   // Print state for indentation of lines.
+};
+
+static std::unique_ptr<Addr2linePipe> Connect(const std::string& name, const char* args[]) {
+  int caller_to_addr2line[2];
+  int addr2line_to_caller[2];
+
+  if (pipe(caller_to_addr2line) == -1) {
+    return nullptr;
+  }
+  if (pipe(addr2line_to_caller) == -1) {
+    close(caller_to_addr2line[0]);
+    close(caller_to_addr2line[1]);
+    return nullptr;
+  }
+
+  pid_t pid = fork();
+  if (pid == -1) {
+    close(caller_to_addr2line[0]);
+    close(caller_to_addr2line[1]);
+    close(addr2line_to_caller[1]);
+    close(addr2line_to_caller[1]);
+    return nullptr;
+  }
+
+  if (pid == 0) {
+    dup2(caller_to_addr2line[0], STDIN_FILENO);
+    dup2(addr2line_to_caller[1], STDOUT_FILENO);
+
+    close(caller_to_addr2line[0]);
+    close(caller_to_addr2line[1]);
+    close(addr2line_to_caller[0]);
+    close(addr2line_to_caller[1]);
+
+    execv(args[0], const_cast<char* const*>(args));
+    exit(1);
+  } else {
+    close(caller_to_addr2line[0]);
+    close(addr2line_to_caller[1]);
+    return std::unique_ptr<Addr2linePipe>(new Addr2linePipe(addr2line_to_caller[0],
+                                                            caller_to_addr2line[1],
+                                                            name,
+                                                            pid));
+  }
+}
+
+static void Drain(size_t expected,
+                  const char* prefix,
+                  std::unique_ptr<Addr2linePipe>* pipe /* inout */,
+                  std::ostream& os) {
+  DCHECK(pipe != nullptr);
+  DCHECK(pipe->get() != nullptr);
+  int in = pipe->get()->in.Fd();
+  DCHECK_GE(in, 0);
+
+  bool prefix_written = false;
+
+  for (;;) {
+    constexpr uint32_t kWaitTimeExpectedMicros = 500 * 1000;
+    constexpr uint32_t kWaitTimeUnexpectedMicros = 50 * 1000;
+
+    struct timeval tv;
+    tv.tv_sec = 0;
+    tv.tv_usec = expected > 0 ? kWaitTimeExpectedMicros : kWaitTimeUnexpectedMicros;
+
+    fd_set rfds;
+    FD_ZERO(&rfds);
+    FD_SET(in, &rfds);
+
+    int retval = TEMP_FAILURE_RETRY(select(in + 1, &rfds, nullptr, nullptr, &tv));
+
+    if (retval < 0) {
+      // Other side may have crashed or other errors.
+      pipe->reset();
+      return;
+    }
+
+    if (retval == 0) {
+      // Timeout.
+      return;
+    }
+
+    DCHECK_EQ(retval, 1);
+
+    constexpr size_t kMaxBuffer = 128;  // Relatively small buffer. Should be OK as we're on an
+    // alt stack, but just to be sure...
+    char buffer[kMaxBuffer];
+    memset(buffer, 0, kMaxBuffer);
+    int bytes_read = TEMP_FAILURE_RETRY(read(in, buffer, kMaxBuffer - 1));
+
+    if (bytes_read < 0) {
+      // This should not really happen...
+      pipe->reset();
+      return;
+    }
+
+    char* tmp = buffer;
+    while (*tmp != 0) {
+      if (!prefix_written) {
+        WritePrefix(os, prefix, (*pipe)->odd);
+        prefix_written = true;
+      }
+      char* new_line = strchr(tmp, '\n');
+      if (new_line == nullptr) {
+        os << tmp;
+
+        break;
+      } else {
+        char saved = *(new_line + 1);
+        *(new_line + 1) = 0;
+        os << tmp;
+        *(new_line + 1) = saved;
+
+        tmp = new_line + 1;
+        prefix_written = false;
+        (*pipe)->odd = !(*pipe)->odd;
+
+        if (expected > 0) {
+          expected--;
+        }
+      }
+    }
+  }
+}
+
+static void Addr2line(const std::string& map_src,
+                      uintptr_t offset,
+                      std::ostream& os,
+                      const char* prefix,
+                      std::unique_ptr<Addr2linePipe>* pipe /* inout */) {
+  DCHECK(pipe != nullptr);
+
+  if (map_src == "[vdso]") {
+    // Special-case this, our setup has problems with this.
+    return;
+  }
+
+  if (*pipe == nullptr || (*pipe)->file != map_src) {
+    if (*pipe != nullptr) {
+      Drain(0, prefix, pipe, os);
+    }
+    pipe->reset();  // Close early.
+
+    const char* args[7] = {
+        "/usr/bin/addr2line",
+        "--functions",
+        "--inlines",
+        "--demangle",
+        "-e",
+        map_src.c_str(),
+        nullptr
+    };
+    *pipe = Connect(map_src, args);
+  }
+
+  Addr2linePipe* pipe_ptr = pipe->get();
+  if (pipe_ptr == nullptr) {
+    // Failed...
+    return;
+  }
+
+  // Send the offset.
+  const std::string hex_offset = StringPrintf("%zx\n", offset);
+
+  if (!pipe_ptr->out.WriteFully(hex_offset.data(), hex_offset.length())) {
+    // Error. :-(
+    pipe->reset();
+    return;
+  }
+
+  // Now drain (expecting two lines).
+  Drain(2U, prefix, pipe, os);
+}
+
+static bool RunCommand(std::string cmd) {
+  FILE* stream = popen(cmd.c_str(), "r");
+  if (stream) {
+    pclose(stream);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+static bool PcIsWithinQuickCode(ArtMethod* method, uintptr_t pc) NO_THREAD_SAFETY_ANALYSIS {
+  uintptr_t code = reinterpret_cast<uintptr_t>(EntryPointToCodePointer(
+      method->GetEntryPointFromQuickCompiledCode()));
+  if (code == 0) {
+    return pc == 0;
+  }
+  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
+  return code <= pc && pc <= (code + code_size);
+}
+
+void DumpNativeStack(std::ostream& os,
+                     pid_t tid,
+                     BacktraceMap* existing_map,
+                     const char* prefix,
+                     ArtMethod* current_method,
+                     void* ucontext_ptr) {
+  // b/18119146
+  if (RUNNING_ON_MEMORY_TOOL != 0) {
+    return;
+  }
+
+  BacktraceMap* map = existing_map;
+  std::unique_ptr<BacktraceMap> tmp_map;
+  if (map == nullptr) {
+    tmp_map.reset(BacktraceMap::Create(getpid()));
+    map = tmp_map.get();
+  }
+  std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
+  if (!backtrace->Unwind(0, reinterpret_cast<ucontext*>(ucontext_ptr))) {
+    os << prefix << "(backtrace::Unwind failed for thread " << tid
+       << ": " <<  backtrace->GetErrorString(backtrace->GetError()) << ")\n";
+    return;
+  } else if (backtrace->NumFrames() == 0) {
+    os << prefix << "(no native stack frames for thread " << tid << ")\n";
+    return;
+  }
+
+  // Check whether we have and should use addr2line.
+  bool use_addr2line;
+  if (kUseAddr2line) {
+    // Try to run it to see whether we have it. Push an argument so that it doesn't assume a.out
+    // and print to stderr.
+    use_addr2line = (gAborting > 0) && RunCommand("addr2line -h");
+  } else {
+    use_addr2line = false;
+  }
+
+  std::unique_ptr<Addr2linePipe> addr2line_state;
+
+  for (Backtrace::const_iterator it = backtrace->begin();
+       it != backtrace->end(); ++it) {
+    // We produce output like this:
+    // ]    #00 pc 000075bb8  /system/lib/libc.so (unwind_backtrace_thread+536)
+    // In order for parsing tools to continue to function, the stack dump
+    // format must at least adhere to this format:
+    //  #XX pc <RELATIVE_ADDR>  <FULL_PATH_TO_SHARED_LIBRARY> ...
+    // The parsers require a single space before and after pc, and two spaces
+    // after the <RELATIVE_ADDR>. There can be any prefix data before the
+    // #XX. <RELATIVE_ADDR> has to be a hex number but with no 0x prefix.
+    os << prefix << StringPrintf("#%02zu pc ", it->num);
+    bool try_addr2line = false;
+    if (!BacktraceMap::IsValid(it->map)) {
+      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  ???"
+                                                            : "%08" PRIxPTR "  ???",
+                         it->pc);
+    } else {
+      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  "
+                                                            : "%08" PRIxPTR "  ",
+                         BacktraceMap::GetRelativePc(it->map, it->pc));
+      os << it->map.name;
+      os << " (";
+      if (!it->func_name.empty()) {
+        os << it->func_name;
+        if (it->func_offset != 0) {
+          os << "+" << it->func_offset;
+        }
+        try_addr2line = true;
+      } else if (current_method != nullptr &&
+          Locks::mutator_lock_->IsSharedHeld(Thread::Current()) &&
+          PcIsWithinQuickCode(current_method, it->pc)) {
+        const void* start_of_code = current_method->GetEntryPointFromQuickCompiledCode();
+        os << JniLongName(current_method) << "+"
+           << (it->pc - reinterpret_cast<uintptr_t>(start_of_code));
+      } else {
+        os << "???";
+      }
+      os << ")";
+    }
+    os << "\n";
+    if (try_addr2line && use_addr2line) {
+      Addr2line(it->map.name, it->pc - it->map.start, os, prefix, &addr2line_state);
+    }
+  }
+
+  if (addr2line_state != nullptr) {
+    Drain(0, prefix, &addr2line_state, os);
+  }
+}
+
+void DumpKernelStack(std::ostream& os, pid_t tid, const char* prefix, bool include_count) {
+  if (tid == GetTid()) {
+    // There's no point showing that we're reading our stack out of /proc!
+    return;
+  }
+
+  std::string kernel_stack_filename(StringPrintf("/proc/self/task/%d/stack", tid));
+  std::string kernel_stack;
+  if (!ReadFileToString(kernel_stack_filename, &kernel_stack)) {
+    os << prefix << "(couldn't read " << kernel_stack_filename << ")\n";
+    return;
+  }
+
+  std::vector<std::string> kernel_stack_frames;
+  Split(kernel_stack, '\n', &kernel_stack_frames);
+  // We skip the last stack frame because it's always equivalent to "[<ffffffff>] 0xffffffff",
+  // which looking at the source appears to be the kernel's way of saying "that's all, folks!".
+  kernel_stack_frames.pop_back();
+  for (size_t i = 0; i < kernel_stack_frames.size(); ++i) {
+    // Turn "[<ffffffff8109156d>] futex_wait_queue_me+0xcd/0x110"
+    // into "futex_wait_queue_me+0xcd/0x110".
+    const char* text = kernel_stack_frames[i].c_str();
+    const char* close_bracket = strchr(text, ']');
+    if (close_bracket != nullptr) {
+      text = close_bracket + 2;
+    }
+    os << prefix;
+    if (include_count) {
+      os << StringPrintf("#%02zd ", i);
+    }
+    os << text << "\n";
+  }
+}
+
+#elif defined(__APPLE__)
+
+void DumpNativeStack(std::ostream& os ATTRIBUTE_UNUSED,
+                     pid_t tid ATTRIBUTE_UNUSED,
+                     BacktraceMap* existing_map ATTRIBUTE_UNUSED,
+                     const char* prefix ATTRIBUTE_UNUSED,
+                     ArtMethod* current_method ATTRIBUTE_UNUSED,
+                     void* ucontext_ptr ATTRIBUTE_UNUSED) {
+}
+
+void DumpKernelStack(std::ostream& os ATTRIBUTE_UNUSED,
+                     pid_t tid ATTRIBUTE_UNUSED,
+                     const char* prefix ATTRIBUTE_UNUSED,
+                     bool include_count ATTRIBUTE_UNUSED) {
+}
+
+#else
+#error "Unsupported architecture for native stack dumps."
+#endif
+
+}  // namespace art
diff --git a/runtime/native_stack_dump.h b/runtime/native_stack_dump.h
new file mode 100644
index 0000000..d64bc82
--- /dev/null
+++ b/runtime/native_stack_dump.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_NATIVE_STACK_DUMP_H_
+#define ART_RUNTIME_NATIVE_STACK_DUMP_H_
+
+#include <unistd.h>
+
+#include <iosfwd>
+
+#include "base/macros.h"
+
+class BacktraceMap;
+
+namespace art {
+
+class ArtMethod;
+
+// Dumps the native stack for thread 'tid' to 'os'.
+void DumpNativeStack(std::ostream& os,
+                     pid_t tid,
+                     BacktraceMap* map = nullptr,
+                     const char* prefix = "",
+                     ArtMethod* current_method = nullptr,
+                     void* ucontext = nullptr)
+    NO_THREAD_SAFETY_ANALYSIS;
+
+// Dumps the kernel stack for thread 'tid' to 'os'. Note that this is only available on linux-x86.
+void DumpKernelStack(std::ostream& os,
+                     pid_t tid,
+                     const char* prefix = "",
+                     bool include_count = true);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_NATIVE_STACK_DUMP_H_
diff --git a/runtime/noop_compiler_callbacks.h b/runtime/noop_compiler_callbacks.h
index 1cbf2bb..02081cb 100644
--- a/runtime/noop_compiler_callbacks.h
+++ b/runtime/noop_compiler_callbacks.h
@@ -26,8 +26,7 @@
   NoopCompilerCallbacks() : CompilerCallbacks(CompilerCallbacks::CallbackMode::kCompileApp) {}
   ~NoopCompilerCallbacks() {}
 
-  bool MethodVerified(verifier::MethodVerifier* verifier ATTRIBUTE_UNUSED) OVERRIDE {
-    return true;
+  void MethodVerified(verifier::MethodVerifier* verifier ATTRIBUTE_UNUSED) OVERRIDE {
   }
 
   void ClassRejected(ClassReference ref ATTRIBUTE_UNUSED) OVERRIDE {}
diff --git a/runtime/nth_caller_visitor.h b/runtime/nth_caller_visitor.h
index 2295cb4..e9b0d3c 100644
--- a/runtime/nth_caller_visitor.h
+++ b/runtime/nth_caller_visitor.h
@@ -46,6 +46,7 @@
       DCHECK(caller == nullptr);
       if (count == n) {
         caller = m;
+        caller_pc = GetCurrentQuickFramePc();
         return false;
       }
       count++;
@@ -57,6 +58,7 @@
   const bool include_runtime_and_upcalls_;
   size_t count;
   ArtMethod* caller;
+  uintptr_t caller_pc;
 };
 
 }  // namespace art
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 40aca0d..aab0e81 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -45,9 +45,7 @@
 
 OatHeader* OatHeader::Create(InstructionSet instruction_set,
                              const InstructionSetFeatures* instruction_set_features,
-                             const std::vector<const DexFile*>* dex_files,
-                             uint32_t image_file_location_oat_checksum,
-                             uint32_t image_file_location_oat_data_begin,
+                             uint32_t dex_file_count,
                              const SafeMap<std::string, std::string>* variable_data) {
   // Estimate size of optional data.
   size_t needed_size = ComputeOatHeaderSize(variable_data);
@@ -58,18 +56,29 @@
   // Create the OatHeader in-place.
   return new (memory) OatHeader(instruction_set,
                                 instruction_set_features,
-                                dex_files,
-                                image_file_location_oat_checksum,
-                                image_file_location_oat_data_begin,
+                                dex_file_count,
                                 variable_data);
 }
 
 OatHeader::OatHeader(InstructionSet instruction_set,
                      const InstructionSetFeatures* instruction_set_features,
-                     const std::vector<const DexFile*>* dex_files,
-                     uint32_t image_file_location_oat_checksum,
-                     uint32_t image_file_location_oat_data_begin,
-                     const SafeMap<std::string, std::string>* variable_data) {
+                     uint32_t dex_file_count,
+                     const SafeMap<std::string, std::string>* variable_data)
+    : adler32_checksum_(adler32(0L, Z_NULL, 0)),
+      instruction_set_(instruction_set),
+      instruction_set_features_bitmap_(instruction_set_features->AsBitmap()),
+      dex_file_count_(dex_file_count),
+      executable_offset_(0),
+      interpreter_to_interpreter_bridge_offset_(0),
+      interpreter_to_compiled_code_bridge_offset_(0),
+      jni_dlsym_lookup_offset_(0),
+      quick_generic_jni_trampoline_offset_(0),
+      quick_imt_conflict_trampoline_offset_(0),
+      quick_resolution_trampoline_offset_(0),
+      quick_to_interpreter_bridge_offset_(0),
+      image_patch_delta_(0),
+      image_file_location_oat_checksum_(0),
+      image_file_location_oat_data_begin_(0) {
   // Don't want asserts in header as they would be checked in each file that includes it. But the
   // fields are private, so we check inside a method.
   static_assert(sizeof(magic_) == sizeof(kOatMagic),
@@ -79,46 +88,11 @@
 
   memcpy(magic_, kOatMagic, sizeof(kOatMagic));
   memcpy(version_, kOatVersion, sizeof(kOatVersion));
-  executable_offset_ = 0;
-  image_patch_delta_ = 0;
-
-  adler32_checksum_ = adler32(0L, Z_NULL, 0);
 
   CHECK_NE(instruction_set, kNone);
-  instruction_set_ = instruction_set;
-  UpdateChecksum(&instruction_set_, sizeof(instruction_set_));
-
-  instruction_set_features_bitmap_ = instruction_set_features->AsBitmap();
-  UpdateChecksum(&instruction_set_features_bitmap_, sizeof(instruction_set_features_bitmap_));
-
-  dex_file_count_ = dex_files->size();
-  UpdateChecksum(&dex_file_count_, sizeof(dex_file_count_));
-
-  image_file_location_oat_checksum_ = image_file_location_oat_checksum;
-  UpdateChecksum(&image_file_location_oat_checksum_, sizeof(image_file_location_oat_checksum_));
-
-  CHECK_ALIGNED(image_file_location_oat_data_begin, kPageSize);
-  image_file_location_oat_data_begin_ = image_file_location_oat_data_begin;
-  UpdateChecksum(&image_file_location_oat_data_begin_, sizeof(image_file_location_oat_data_begin_));
 
   // Flatten the map. Will also update variable_size_data_size_.
   Flatten(variable_data);
-
-  // Update checksum for variable data size.
-  UpdateChecksum(&key_value_store_size_, sizeof(key_value_store_size_));
-
-  // Update for data, if existing.
-  if (key_value_store_size_ > 0U) {
-    UpdateChecksum(&key_value_store_, key_value_store_size_);
-  }
-
-  interpreter_to_interpreter_bridge_offset_ = 0;
-  interpreter_to_compiled_code_bridge_offset_ = 0;
-  jni_dlsym_lookup_offset_ = 0;
-  quick_generic_jni_trampoline_offset_ = 0;
-  quick_imt_conflict_trampoline_offset_ = 0;
-  quick_resolution_trampoline_offset_ = 0;
-  quick_to_interpreter_bridge_offset_ = 0;
 }
 
 bool OatHeader::IsValid() const {
@@ -175,10 +149,45 @@
   return adler32_checksum_;
 }
 
+void OatHeader::UpdateChecksumWithHeaderData() {
+  UpdateChecksum(&instruction_set_, sizeof(instruction_set_));
+  UpdateChecksum(&instruction_set_features_bitmap_, sizeof(instruction_set_features_bitmap_));
+  UpdateChecksum(&dex_file_count_, sizeof(dex_file_count_));
+  UpdateChecksum(&image_file_location_oat_checksum_, sizeof(image_file_location_oat_checksum_));
+  UpdateChecksum(&image_file_location_oat_data_begin_, sizeof(image_file_location_oat_data_begin_));
+
+  // Update checksum for variable data size.
+  UpdateChecksum(&key_value_store_size_, sizeof(key_value_store_size_));
+
+  // Update for data, if existing.
+  if (key_value_store_size_ > 0U) {
+    UpdateChecksum(&key_value_store_, key_value_store_size_);
+  }
+
+  UpdateChecksum(&executable_offset_, sizeof(executable_offset_));
+  UpdateChecksum(&interpreter_to_interpreter_bridge_offset_,
+                 sizeof(interpreter_to_interpreter_bridge_offset_));
+  UpdateChecksum(&interpreter_to_compiled_code_bridge_offset_,
+                 sizeof(interpreter_to_compiled_code_bridge_offset_));
+  UpdateChecksum(&jni_dlsym_lookup_offset_, sizeof(jni_dlsym_lookup_offset_));
+  UpdateChecksum(&quick_generic_jni_trampoline_offset_,
+                 sizeof(quick_generic_jni_trampoline_offset_));
+  UpdateChecksum(&quick_imt_conflict_trampoline_offset_,
+                 sizeof(quick_imt_conflict_trampoline_offset_));
+  UpdateChecksum(&quick_resolution_trampoline_offset_,
+                 sizeof(quick_resolution_trampoline_offset_));
+  UpdateChecksum(&quick_to_interpreter_bridge_offset_,
+                 sizeof(quick_to_interpreter_bridge_offset_));
+}
+
 void OatHeader::UpdateChecksum(const void* data, size_t length) {
   DCHECK(IsValid());
-  const uint8_t* bytes = reinterpret_cast<const uint8_t*>(data);
-  adler32_checksum_ = adler32(adler32_checksum_, bytes, length);
+  if (data != nullptr) {
+    const uint8_t* bytes = reinterpret_cast<const uint8_t*>(data);
+    adler32_checksum_ = adler32(adler32_checksum_, bytes, length);
+  } else {
+    DCHECK_EQ(0U, length);
+  }
 }
 
 InstructionSet OatHeader::GetInstructionSet() const {
@@ -205,7 +214,6 @@
   DCHECK_EQ(executable_offset_, 0U);
 
   executable_offset_ = executable_offset;
-  UpdateChecksum(&executable_offset_, sizeof(executable_offset));
 }
 
 const void* OatHeader::GetInterpreterToInterpreterBridge() const {
@@ -225,7 +233,6 @@
   DCHECK_EQ(interpreter_to_interpreter_bridge_offset_, 0U) << offset;
 
   interpreter_to_interpreter_bridge_offset_ = offset;
-  UpdateChecksum(&interpreter_to_interpreter_bridge_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetInterpreterToCompiledCodeBridge() const {
@@ -244,7 +251,6 @@
   DCHECK_EQ(interpreter_to_compiled_code_bridge_offset_, 0U) << offset;
 
   interpreter_to_compiled_code_bridge_offset_ = offset;
-  UpdateChecksum(&interpreter_to_compiled_code_bridge_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetJniDlsymLookup() const {
@@ -263,7 +269,6 @@
   DCHECK_EQ(jni_dlsym_lookup_offset_, 0U) << offset;
 
   jni_dlsym_lookup_offset_ = offset;
-  UpdateChecksum(&jni_dlsym_lookup_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetQuickGenericJniTrampoline() const {
@@ -282,7 +287,6 @@
   DCHECK_EQ(quick_generic_jni_trampoline_offset_, 0U) << offset;
 
   quick_generic_jni_trampoline_offset_ = offset;
-  UpdateChecksum(&quick_generic_jni_trampoline_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetQuickImtConflictTrampoline() const {
@@ -301,7 +305,6 @@
   DCHECK_EQ(quick_imt_conflict_trampoline_offset_, 0U) << offset;
 
   quick_imt_conflict_trampoline_offset_ = offset;
-  UpdateChecksum(&quick_imt_conflict_trampoline_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetQuickResolutionTrampoline() const {
@@ -320,7 +323,6 @@
   DCHECK_EQ(quick_resolution_trampoline_offset_, 0U) << offset;
 
   quick_resolution_trampoline_offset_ = offset;
-  UpdateChecksum(&quick_resolution_trampoline_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetQuickToInterpreterBridge() const {
@@ -339,7 +341,6 @@
   DCHECK_EQ(quick_to_interpreter_bridge_offset_, 0U) << offset;
 
   quick_to_interpreter_bridge_offset_ = offset;
-  UpdateChecksum(&quick_to_interpreter_bridge_offset_, sizeof(offset));
 }
 
 int32_t OatHeader::GetImagePatchDelta() const {
@@ -367,11 +368,22 @@
   return image_file_location_oat_checksum_;
 }
 
+void OatHeader::SetImageFileLocationOatChecksum(uint32_t image_file_location_oat_checksum) {
+  CHECK(IsValid());
+  image_file_location_oat_checksum_ = image_file_location_oat_checksum;
+}
+
 uint32_t OatHeader::GetImageFileLocationOatDataBegin() const {
   CHECK(IsValid());
   return image_file_location_oat_data_begin_;
 }
 
+void OatHeader::SetImageFileLocationOatDataBegin(uint32_t image_file_location_oat_data_begin) {
+  CHECK(IsValid());
+  CHECK_ALIGNED(image_file_location_oat_data_begin, kPageSize);
+  image_file_location_oat_data_begin_ = image_file_location_oat_data_begin;
+}
+
 uint32_t OatHeader::GetKeyValueStoreSize() const {
   CHECK(IsValid());
   return key_value_store_size_;
@@ -454,13 +466,34 @@
   return IsKeyEnabled(OatHeader::kPicKey);
 }
 
+bool OatHeader::HasPatchInfo() const {
+  return IsKeyEnabled(OatHeader::kHasPatchInfoKey);
+}
+
 bool OatHeader::IsDebuggable() const {
   return IsKeyEnabled(OatHeader::kDebuggableKey);
 }
 
-bool OatHeader::IsKeyEnabled(const char* key) const {
+bool OatHeader::IsNativeDebuggable() const {
+  return IsKeyEnabled(OatHeader::kNativeDebuggableKey);
+}
+
+CompilerFilter::Filter OatHeader::GetCompilerFilter() const {
+  CompilerFilter::Filter filter;
+  const char* key_value = GetStoreValueByKey(kCompilerFilter);
+  CHECK(key_value != nullptr) << "compiler-filter not found in oat header";
+  CHECK(CompilerFilter::ParseCompilerFilter(key_value, &filter))
+      << "Invalid compiler-filter in oat header: " << key_value;
+  return filter;
+}
+
+bool OatHeader::KeyHasValue(const char* key, const char* value, size_t value_size) const {
   const char* key_value = GetStoreValueByKey(key);
-  return (key_value != nullptr && strncmp(key_value, kTrueValue, sizeof(kTrueValue)) == 0);
+  return (key_value != nullptr && strncmp(key_value, value, value_size) == 0);
+}
+
+bool OatHeader::IsKeyEnabled(const char* key) const {
+  return KeyHasValue(key, kTrueValue, sizeof(kTrueValue));
 }
 
 void OatHeader::Flatten(const SafeMap<std::string, std::string>* key_value_store) {
diff --git a/runtime/oat.h b/runtime/oat.h
index 276e7f3..7c84fe9 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -21,6 +21,7 @@
 
 #include "arch/instruction_set.h"
 #include "base/macros.h"
+#include "compiler_filter.h"
 #include "dex_file.h"
 #include "safe_map.h"
 
@@ -31,29 +32,33 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '7', '2', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '8', '6', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
   static constexpr const char* kDex2OatHostKey = "dex2oat-host";
   static constexpr const char* kPicKey = "pic";
+  static constexpr const char* kHasPatchInfoKey = "has-patch-info";
   static constexpr const char* kDebuggableKey = "debuggable";
+  static constexpr const char* kNativeDebuggableKey = "native-debuggable";
+  static constexpr const char* kCompilerFilter = "compiler-filter";
   static constexpr const char* kClassPathKey = "classpath";
+  static constexpr const char* kBootClassPathKey = "bootclasspath";
 
   static constexpr const char kTrueValue[] = "true";
   static constexpr const char kFalseValue[] = "false";
 
+
   static OatHeader* Create(InstructionSet instruction_set,
                            const InstructionSetFeatures* instruction_set_features,
-                           const std::vector<const DexFile*>* dex_files,
-                           uint32_t image_file_location_oat_checksum,
-                           uint32_t image_file_location_oat_data_begin,
+                           uint32_t dex_file_count,
                            const SafeMap<std::string, std::string>* variable_data);
 
   bool IsValid() const;
   std::string GetValidationErrorMessage() const;
   const char* GetMagic() const;
   uint32_t GetChecksum() const;
+  void UpdateChecksumWithHeaderData();
   void UpdateChecksum(const void* data, size_t length);
   uint32_t GetDexFileCount() const {
     DCHECK(IsValid());
@@ -92,8 +97,11 @@
 
   InstructionSet GetInstructionSet() const;
   uint32_t GetInstructionSetFeaturesBitmap() const;
+
   uint32_t GetImageFileLocationOatChecksum() const;
+  void SetImageFileLocationOatChecksum(uint32_t image_file_location_oat_checksum);
   uint32_t GetImageFileLocationOatDataBegin() const;
+  void SetImageFileLocationOatDataBegin(uint32_t image_file_location_oat_data_begin);
 
   uint32_t GetKeyValueStoreSize() const;
   const uint8_t* GetKeyValueStore() const;
@@ -102,14 +110,17 @@
 
   size_t GetHeaderSize() const;
   bool IsPic() const;
+  bool HasPatchInfo() const;
   bool IsDebuggable() const;
+  bool IsNativeDebuggable() const;
+  CompilerFilter::Filter GetCompilerFilter() const;
 
  private:
+  bool KeyHasValue(const char* key, const char* value, size_t value_size) const;
+
   OatHeader(InstructionSet instruction_set,
             const InstructionSetFeatures* instruction_set_features,
-            const std::vector<const DexFile*>* dex_files,
-            uint32_t image_file_location_oat_checksum,
-            uint32_t image_file_location_oat_data_begin,
+            uint32_t dex_file_count,
             const SafeMap<std::string, std::string>* variable_data);
 
   // Returns true if the value of the given key is "true", false otherwise.
diff --git a/runtime/oat_file-inl.h b/runtime/oat_file-inl.h
index 7b92120..d7d0c4f 100644
--- a/runtime/oat_file-inl.h
+++ b/runtime/oat_file-inl.h
@@ -71,44 +71,6 @@
   return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.FpSpillMask();
 }
 
-inline const uint8_t* OatFile::OatMethod::GetGcMap() const {
-  const void* code = EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
-  if (code == nullptr) {
-    return nullptr;
-  }
-  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].gc_map_offset_;
-  if (UNLIKELY(offset == 0u)) {
-    return nullptr;
-  }
-  return reinterpret_cast<const uint8_t*>(code) - offset;
-}
-
-inline uint32_t OatFile::OatMethod::GetGcMapOffset() const {
-  const uint8_t* gc_map = GetGcMap();
-  return static_cast<uint32_t>(gc_map != nullptr ? gc_map - begin_ : 0u);
-}
-
-inline uint32_t OatFile::OatMethod::GetGcMapOffsetOffset() const {
-  const OatQuickMethodHeader* method_header = GetOatQuickMethodHeader();
-  if (method_header == nullptr) {
-    return 0u;
-  }
-  return reinterpret_cast<const uint8_t*>(&method_header->gc_map_offset_) - begin_;
-}
-
-inline uint32_t OatFile::OatMethod::GetMappingTableOffset() const {
-  const uint8_t* mapping_table = GetMappingTable();
-  return static_cast<uint32_t>(mapping_table != nullptr ? mapping_table - begin_ : 0u);
-}
-
-inline uint32_t OatFile::OatMethod::GetMappingTableOffsetOffset() const {
-  const OatQuickMethodHeader* method_header = GetOatQuickMethodHeader();
-  if (method_header == nullptr) {
-    return 0u;
-  }
-  return reinterpret_cast<const uint8_t*>(&method_header->mapping_table_offset_) - begin_;
-}
-
 inline uint32_t OatFile::OatMethod::GetVmapTableOffset() const {
   const uint8_t* vmap_table = GetVmapTable();
   return static_cast<uint32_t>(vmap_table != nullptr ? vmap_table - begin_ : 0u);
@@ -122,18 +84,6 @@
   return reinterpret_cast<const uint8_t*>(&method_header->vmap_table_offset_) - begin_;
 }
 
-inline const uint8_t* OatFile::OatMethod::GetMappingTable() const {
-  const void* code = EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
-  if (code == nullptr) {
-    return nullptr;
-  }
-  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].mapping_table_offset_;
-  if (UNLIKELY(offset == 0u)) {
-    return nullptr;
-  }
-  return reinterpret_cast<const uint8_t*>(code) - offset;
-}
-
 inline const uint8_t* OatFile::OatMethod::GetVmapTable() const {
   const void* code = EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
   if (code == nullptr) {
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index a162a4e..5752fd9 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -28,13 +28,15 @@
 #include <sstream>
 
 // dlopen_ext support from bionic.
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include "android/dlext.h"
 #endif
 
 #include "art_method-inl.h"
 #include "base/bit_vector.h"
+#include "base/enums.h"
 #include "base/stl_util.h"
+#include "base/systrace.h"
 #include "base/unix_file/fd_file.h"
 #include "elf_file.h"
 #include "elf_utils.h"
@@ -46,16 +48,16 @@
 #include "oat_file_manager.h"
 #include "os.h"
 #include "runtime.h"
+#include "type_lookup_table.h"
 #include "utils.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
-#include "vmap_table.h"
 
 namespace art {
 
-// Whether OatFile::Open will try DlOpen() first. Fallback is our own ELF loader.
+// Whether OatFile::Open will try dlopen. Fallback is our own ELF loader.
 static constexpr bool kUseDlopen = true;
 
-// Whether OatFile::Open will try DlOpen() on the host. On the host we're not linking against
+// Whether OatFile::Open will try dlopen on the host. On the host we're not linking against
 // bionic, so cannot take advantage of the support for changed semantics (loading the same soname
 // multiple times). However, if/when we switch the above, we likely want to switch this, too,
 // to get test coverage of the code paths.
@@ -64,348 +66,152 @@
 // For debugging, Open will print DlOpen error message if set to true.
 static constexpr bool kPrintDlOpenErrorMessage = false;
 
-std::string OatFile::ResolveRelativeEncodedDexLocation(
-      const char* abs_dex_location, const std::string& rel_dex_location) {
-  if (abs_dex_location != nullptr && rel_dex_location[0] != '/') {
-    // Strip :classes<N>.dex used for secondary multidex files.
-    std::string base = DexFile::GetBaseLocation(rel_dex_location);
-    std::string multidex_suffix = DexFile::GetMultiDexSuffix(rel_dex_location);
+// Note for OatFileBase and descendents:
+//
+// These are used in OatFile::Open to try all our loaders.
+//
+// The process is simple:
+//
+// 1) Allocate an instance through the standard constructor (location, executable)
+// 2) Load() to try to open the file.
+// 3) ComputeFields() to populate the OatFile fields like begin_, using FindDynamicSymbolAddress.
+// 4) PreSetup() for any steps that should be done before the final setup.
+// 5) Setup() to complete the procedure.
 
-    // Check if the base is a suffix of the provided abs_dex_location.
-    std::string target_suffix = "/" + base;
-    std::string abs_location(abs_dex_location);
-    if (abs_location.size() > target_suffix.size()) {
-      size_t pos = abs_location.size() - target_suffix.size();
-      if (abs_location.compare(pos, std::string::npos, target_suffix) == 0) {
-        return abs_location + multidex_suffix;
-      }
-    }
-  }
-  return rel_dex_location;
-}
+class OatFileBase : public OatFile {
+ public:
+  virtual ~OatFileBase() {}
 
-void OatFile::CheckLocation(const std::string& location) {
-  CHECK(!location.empty());
-}
-
-OatFile* OatFile::OpenWithElfFile(ElfFile* elf_file,
+  template <typename kOatFileBaseSubType>
+  static OatFileBase* OpenOatFile(const std::string& elf_filename,
                                   const std::string& location,
+                                  uint8_t* requested_base,
+                                  uint8_t* oat_file_begin,
+                                  bool writable,
+                                  bool executable,
+                                  bool low_4gb,
                                   const char* abs_dex_location,
-                                  std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, false));
-  oat_file->elf_file_.reset(elf_file);
-  uint64_t offset, size;
-  bool has_section = elf_file->GetSectionOffsetAndSize(".rodata", &offset, &size);
-  CHECK(has_section);
-  oat_file->begin_ = elf_file->Begin() + offset;
-  oat_file->end_ = elf_file->Begin() + size + offset;
-  // Ignore the optional .bss section when opening non-executable.
-  return oat_file->Setup(abs_dex_location, error_msg) ? oat_file.release() : nullptr;
-}
+                                  std::string* error_msg);
 
-OatFile* OatFile::Open(const std::string& filename,
-                       const std::string& location,
-                       uint8_t* requested_base,
-                       uint8_t* oat_file_begin,
-                       bool executable,
-                       const char* abs_dex_location,
-                       std::string* error_msg) {
-  CHECK(!filename.empty()) << location;
-  CheckLocation(location);
-  std::unique_ptr<OatFile> ret;
+ protected:
+  OatFileBase(const std::string& filename, bool executable) : OatFile(filename, executable) {}
 
-  // Use dlopen only when flagged to do so, and when it's OK to load things executable.
-  // TODO: Also try when not executable? The issue here could be re-mapping as writable (as
-  //       !executable is a sign that we may want to patch), which may not be allowed for
-  //       various reasons.
-  // dlopen always returns the same library if it is already opened on the host. For this reason
-  // we only use dlopen if we are the target or we do not already have the dex file opened. Having
-  // the same library loaded multiple times at different addresses is required for class unloading
-  // and for having dex caches arrays in the .bss section.
-  Runtime* const runtime = Runtime::Current();
-  OatFileManager* const manager = (runtime != nullptr) ? &runtime->GetOatFileManager() : nullptr;
-  if (kUseDlopen && executable) {
-    bool success = kIsTargetBuild;
-    bool reserved_location = false;
-      // Manager may be null if we are running without a runtime.
-    if (!success && kUseDlopenOnHost && manager != nullptr) {
-      // RegisterOatFileLocation returns false if we are not the first caller to register that
-      // location.
-      reserved_location = manager->RegisterOatFileLocation(location);
-      success = reserved_location;
-    }
-    if (success) {
-      // Try to use dlopen. This may fail for various reasons, outlined below. We try dlopen, as
-      // this will register the oat file with the linker and allows libunwind to find our info.
-      ret.reset(OpenDlopen(filename, location, requested_base, abs_dex_location, error_msg));
-      if (reserved_location) {
-        manager->UnRegisterOatFileLocation(location);
-      }
-      if (ret != nullptr) {
-        return ret.release();
-      }
-      if (kPrintDlOpenErrorMessage) {
-        LOG(ERROR) << "Failed to dlopen: " << *error_msg;
-      }
-    }
+  virtual const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                                  std::string* error_msg) const = 0;
+
+  virtual void PreLoad() = 0;
+
+  virtual bool Load(const std::string& elf_filename,
+                    uint8_t* oat_file_begin,
+                    bool writable,
+                    bool executable,
+                    bool low_4gb,
+                    std::string* error_msg) = 0;
+
+  bool ComputeFields(uint8_t* requested_base,
+                     const std::string& file_path,
+                     std::string* error_msg);
+
+  virtual void PreSetup(const std::string& elf_filename) = 0;
+
+  bool Setup(const char* abs_dex_location, std::string* error_msg);
+
+  // Setters exposed for ElfOatFile.
+
+  void SetBegin(const uint8_t* begin) {
+    begin_ = begin;
   }
 
-  // If we aren't trying to execute, we just use our own ElfFile loader for a couple reasons:
-  //
-  // On target, dlopen may fail when compiling due to selinux restrictions on installd.
-  //
-  // We use our own ELF loader for Quick to deal with legacy apps that
-  // open a generated dex file by name, remove the file, then open
-  // another generated dex file with the same name. http://b/10614658
-  //
-  // On host, dlopen is expected to fail when cross compiling, so fall back to OpenElfFile.
-  //
-  //
-  // Another independent reason is the absolute placement of boot.oat. dlopen on the host usually
-  // does honor the virtual address encoded in the ELF file only for ET_EXEC files, not ET_DYN.
-  std::unique_ptr<File> file(OS::OpenFileForReading(filename.c_str()));
-  if (file == nullptr) {
-    *error_msg = StringPrintf("Failed to open oat filename for reading: %s", strerror(errno));
+  void SetEnd(const uint8_t* end) {
+    end_ = end;
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(OatFileBase);
+};
+
+template <typename kOatFileBaseSubType>
+OatFileBase* OatFileBase::OpenOatFile(const std::string& elf_filename,
+                                      const std::string& location,
+                                      uint8_t* requested_base,
+                                      uint8_t* oat_file_begin,
+                                      bool writable,
+                                      bool executable,
+                                      bool low_4gb,
+                                      const char* abs_dex_location,
+                                      std::string* error_msg) {
+  std::unique_ptr<OatFileBase> ret(new kOatFileBaseSubType(location, executable));
+
+  ret->PreLoad();
+
+  if (!ret->Load(elf_filename,
+                 oat_file_begin,
+                 writable,
+                 executable,
+                 low_4gb,
+                 error_msg)) {
     return nullptr;
   }
-  ret.reset(OpenElfFile(file.get(), location, requested_base, oat_file_begin, false, executable,
-                        abs_dex_location, error_msg));
 
-  // It would be nice to unlink here. But we might have opened the file created by the
-  // ScopedLock, which we better not delete to avoid races. TODO: Investigate how to fix the API
-  // to allow removal when we know the ELF must be borked.
+  if (!ret->ComputeFields(requested_base, elf_filename, error_msg)) {
+    return nullptr;
+  }
+
+  ret->PreSetup(elf_filename);
+
+  if (!ret->Setup(abs_dex_location, error_msg)) {
+    return nullptr;
+  }
+
   return ret.release();
 }
 
-OatFile* OatFile::OpenWritable(File* file, const std::string& location,
-                               const char* abs_dex_location,
-                               std::string* error_msg) {
-  CheckLocation(location);
-  return OpenElfFile(file, location, nullptr, nullptr, true, false, abs_dex_location, error_msg);
-}
-
-OatFile* OatFile::OpenReadable(File* file, const std::string& location,
-                               const char* abs_dex_location,
-                               std::string* error_msg) {
-  CheckLocation(location);
-  return OpenElfFile(file, location, nullptr, nullptr, false, false, abs_dex_location, error_msg);
-}
-
-OatFile* OatFile::OpenDlopen(const std::string& elf_filename,
-                             const std::string& location,
-                             uint8_t* requested_base,
-                             const char* abs_dex_location,
-                             std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, true));
-  bool success = oat_file->Dlopen(elf_filename, requested_base, abs_dex_location, error_msg);
-  if (!success) {
-    return nullptr;
-  }
-  return oat_file.release();
-}
-
-OatFile* OatFile::OpenElfFile(File* file,
-                              const std::string& location,
-                              uint8_t* requested_base,
-                              uint8_t* oat_file_begin,
-                              bool writable,
-                              bool executable,
-                              const char* abs_dex_location,
-                              std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, executable));
-  bool success = oat_file->ElfFileOpen(file, requested_base, oat_file_begin, writable, executable,
-                                       abs_dex_location, error_msg);
-  if (!success) {
-    CHECK(!error_msg->empty());
-    return nullptr;
-  }
-  return oat_file.release();
-}
-
-OatFile::OatFile(const std::string& location, bool is_executable)
-    : location_(location), begin_(nullptr), end_(nullptr), bss_begin_(nullptr), bss_end_(nullptr),
-      is_executable_(is_executable), dlopen_handle_(nullptr),
-      secondary_lookup_lock_("OatFile secondary lookup lock", kOatFileSecondaryLookupLock) {
-  CHECK(!location_.empty());
-  Runtime* const runtime = Runtime::Current();
-  if (runtime != nullptr && !runtime->IsAotCompiler()) {
-    runtime->GetOatFileManager().RegisterOatFileLocation(location);
-  }
-}
-
-OatFile::~OatFile() {
-  STLDeleteElements(&oat_dex_files_storage_);
-  if (dlopen_handle_ != nullptr) {
-    dlclose(dlopen_handle_);
-  }
-  Runtime* const runtime = Runtime::Current();
-  if (runtime != nullptr && !runtime->IsAotCompiler()) {
-    runtime->GetOatFileManager().UnRegisterOatFileLocation(location_);
-  }
-}
-
-bool OatFile::Dlopen(const std::string& elf_filename, uint8_t* requested_base,
-                     const char* abs_dex_location, std::string* error_msg) {
-#ifdef __APPLE__
-  // The dl_iterate_phdr syscall is missing.  There is similar API on OSX,
-  // but let's fallback to the custom loading code for the time being.
-  UNUSED(elf_filename, requested_base, abs_dex_location, error_msg);
-  return false;
-#else
-  {
-    UniqueCPtr<char> absolute_path(realpath(elf_filename.c_str(), nullptr));
-    if (absolute_path == nullptr) {
-      *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str());
-      return false;
-    }
-#ifdef __ANDROID__
-    android_dlextinfo extinfo;
-    extinfo.flags = ANDROID_DLEXT_FORCE_LOAD | ANDROID_DLEXT_FORCE_FIXED_VADDR;
-    dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
-#else
-    dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
-#endif
-  }
-  if (dlopen_handle_ == nullptr) {
-    *error_msg = StringPrintf("Failed to dlopen '%s': %s", elf_filename.c_str(), dlerror());
-    return false;
-  }
-  begin_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatdata"));
+bool OatFileBase::ComputeFields(uint8_t* requested_base,
+                                const std::string& file_path,
+                                std::string* error_msg) {
+  std::string symbol_error_msg;
+  begin_ = FindDynamicSymbolAddress("oatdata", &symbol_error_msg);
   if (begin_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s': %s", elf_filename.c_str(),
-                              dlerror());
+    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s' %s",
+                              file_path.c_str(),
+                              symbol_error_msg.c_str());
     return false;
   }
   if (requested_base != nullptr && begin_ != requested_base) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+    // Host can fail this check. Do not dump there to avoid polluting the output.
+    if (kIsTargetBuild && (kIsDebugBuild || VLOG_IS_ON(oat))) {
+      PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+    }
     *error_msg = StringPrintf("Failed to find oatdata symbol at expected address: "
-                              "oatdata=%p != expected=%p, %s. See process maps in the log.",
-                              begin_, requested_base, elf_filename.c_str());
+        "oatdata=%p != expected=%p. See process maps in the log.",
+        begin_, requested_base);
     return false;
   }
-  end_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatlastword"));
+  end_ = FindDynamicSymbolAddress("oatlastword", &symbol_error_msg);
   if (end_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s': %s", elf_filename.c_str(),
-                              dlerror());
+    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s' %s",
+                              file_path.c_str(),
+                              symbol_error_msg.c_str());
     return false;
   }
   // Readjust to be non-inclusive upper bound.
   end_ += sizeof(uint32_t);
 
-  bss_begin_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatbss"));
+  bss_begin_ = const_cast<uint8_t*>(FindDynamicSymbolAddress("oatbss", &symbol_error_msg));
   if (bss_begin_ == nullptr) {
-    // No .bss section. Clear dlerror().
+    // No .bss section.
     bss_end_ = nullptr;
-    dlerror();
   } else {
-    bss_end_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatbsslastword"));
+    bss_end_ = const_cast<uint8_t*>(FindDynamicSymbolAddress("oatbsslastword", &symbol_error_msg));
     if (bss_end_ == nullptr) {
-      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'",
-                                elf_filename.c_str());
+      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'", file_path.c_str());
       return false;
     }
     // Readjust to be non-inclusive upper bound.
     bss_end_ += sizeof(uint32_t);
   }
 
-  // Ask the linker where it mmaped the file and notify our mmap wrapper of the regions.
-  struct dl_iterate_context {
-    static int callback(struct dl_phdr_info *info, size_t /* size */, void *data) {
-      auto* context = reinterpret_cast<dl_iterate_context*>(data);
-      // See whether this callback corresponds to the file which we have just loaded.
-      bool contains_begin = false;
-      for (int i = 0; i < info->dlpi_phnum; i++) {
-        if (info->dlpi_phdr[i].p_type == PT_LOAD) {
-          uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
-                                                      info->dlpi_phdr[i].p_vaddr);
-          size_t memsz = info->dlpi_phdr[i].p_memsz;
-          if (vaddr <= context->begin_ && context->begin_ < vaddr + memsz) {
-            contains_begin = true;
-            break;
-          }
-        }
-      }
-      // Add dummy mmaps for this file.
-      if (contains_begin) {
-        for (int i = 0; i < info->dlpi_phnum; i++) {
-          if (info->dlpi_phdr[i].p_type == PT_LOAD) {
-            uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
-                                                        info->dlpi_phdr[i].p_vaddr);
-            size_t memsz = info->dlpi_phdr[i].p_memsz;
-            MemMap* mmap = MemMap::MapDummy(info->dlpi_name, vaddr, memsz);
-            context->dlopen_mmaps_->push_back(std::unique_ptr<MemMap>(mmap));
-          }
-        }
-        return 1;  // Stop iteration and return 1 from dl_iterate_phdr.
-      }
-      return 0;  // Continue iteration and return 0 from dl_iterate_phdr when finished.
-    }
-    const uint8_t* const begin_;
-    std::vector<std::unique_ptr<MemMap>>* const dlopen_mmaps_;
-  } context = { begin_, &dlopen_mmaps_ };
-
-  if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps.";
-  }
-
-  return Setup(abs_dex_location, error_msg);
-#endif  // __APPLE__
-}
-
-bool OatFile::ElfFileOpen(File* file, uint8_t* requested_base, uint8_t* oat_file_begin,
-                          bool writable, bool executable,
-                          const char* abs_dex_location,
-                          std::string* error_msg) {
-  // TODO: rename requested_base to oat_data_begin
-  elf_file_.reset(ElfFile::Open(file, writable, /*program_header_only*/true, error_msg,
-                                oat_file_begin));
-  if (elf_file_ == nullptr) {
-    DCHECK(!error_msg->empty());
-    return false;
-  }
-  bool loaded = elf_file_->Load(executable, error_msg);
-  if (!loaded) {
-    DCHECK(!error_msg->empty());
-    return false;
-  }
-  begin_ = elf_file_->FindDynamicSymbolAddress("oatdata");
-  if (begin_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s'", file->GetPath().c_str());
-    return false;
-  }
-  if (requested_base != nullptr && begin_ != requested_base) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    *error_msg = StringPrintf("Failed to find oatdata symbol at expected address: "
-                              "oatdata=%p != expected=%p. See process maps in the log.",
-                              begin_, requested_base);
-    return false;
-  }
-  end_ = elf_file_->FindDynamicSymbolAddress("oatlastword");
-  if (end_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s'", file->GetPath().c_str());
-    return false;
-  }
-  // Readjust to be non-inclusive upper bound.
-  end_ += sizeof(uint32_t);
-
-  bss_begin_ = const_cast<uint8_t*>(elf_file_->FindDynamicSymbolAddress("oatbss"));
-  if (bss_begin_ == nullptr) {
-    // No .bss section. Clear dlerror().
-    bss_end_ = nullptr;
-    dlerror();
-  } else {
-    bss_end_ = const_cast<uint8_t*>(elf_file_->FindDynamicSymbolAddress("oatbsslastword"));
-    if (bss_end_ == nullptr) {
-      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'",
-                                file->GetPath().c_str());
-      return false;
-    }
-    // Readjust to be non-inclusive upper bound.
-    bss_end_ += sizeof(uint32_t);
-  }
-
-  return Setup(abs_dex_location, error_msg);
+  return true;
 }
 
 // Read an unaligned entry from the OatDexFile data in OatFile and advance the read
@@ -428,7 +234,7 @@
   return true;
 }
 
-bool OatFile::Setup(const char* abs_dex_location, std::string* error_msg) {
+bool OatFileBase::Setup(const char* abs_dex_location, std::string* error_msg) {
   if (!GetOatHeader().IsValid()) {
     std::string cause = GetOatHeader().GetValidationErrorMessage();
     *error_msg = StringPrintf("Invalid oat header for '%s': %s",
@@ -455,7 +261,7 @@
     return false;
   }
 
-  size_t pointer_size = GetInstructionSetPointerSize(GetOatHeader().GetInstructionSet());
+  PointerSize pointer_size = GetInstructionSetPointerSize(GetOatHeader().GetInstructionSet());
   uint8_t* dex_cache_arrays = bss_begin_;
   uint32_t dex_file_count = GetOatHeader().GetDexFileCount();
   oat_dex_files_storage_.reserve(dex_file_count);
@@ -474,16 +280,15 @@
                                 i);
       return false;
     }
-
-    const char* dex_file_location_data = reinterpret_cast<const char*>(oat);
-    oat += dex_file_location_size;
-    if (UNLIKELY(oat > End())) {
+    if (UNLIKELY(static_cast<size_t>(End() - oat) < dex_file_location_size)) {
       *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu with truncated dex file "
                                     "location",
                                 GetLocation().c_str(),
                                 i);
       return false;
     }
+    const char* dex_file_location_data = reinterpret_cast<const char*>(oat);
+    oat += dex_file_location_size;
 
     std::string dex_file_location = ResolveRelativeEncodedDexLocation(
         abs_dex_location,
@@ -526,6 +331,17 @@
                                 Size());
       return false;
     }
+    if (UNLIKELY(Size() - dex_file_offset < sizeof(DexFile::Header))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with dex file "
+                                    "offset %u of %zu but the size of dex file header is %zu",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                dex_file_offset,
+                                Size(),
+                                sizeof(DexFile::Header));
+      return false;
+    }
 
     const uint8_t* dex_file_pointer = Begin() + dex_file_offset;
     if (UNLIKELY(!DexFile::IsMagicValid(dex_file_pointer))) {
@@ -547,17 +363,77 @@
       return false;
     }
     const DexFile::Header* header = reinterpret_cast<const DexFile::Header*>(dex_file_pointer);
-    const uint32_t* methods_offsets_pointer = reinterpret_cast<const uint32_t*>(oat);
+    if (Size() - dex_file_offset < header->file_size_) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with dex file "
+                                    "offset %u and size %u truncated at %zu",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                dex_file_offset,
+                                header->file_size_,
+                                Size());
+      return false;
+    }
 
-    oat += (sizeof(*methods_offsets_pointer) * header->class_defs_size_);
-    if (UNLIKELY(oat > End())) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with truncated "
-                                    "method offsets",
+    uint32_t class_offsets_offset;
+    if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &class_offsets_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' truncated "
+                                    "after class offsets offset",
                                 GetLocation().c_str(),
                                 i,
                                 dex_file_location.c_str());
       return false;
     }
+    if (UNLIKELY(class_offsets_offset > Size()) ||
+        UNLIKELY((Size() - class_offsets_offset) / sizeof(uint32_t) < header->class_defs_size_)) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with truncated "
+                                    "class offsets, offset %u of %zu, class defs %u",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                class_offsets_offset,
+                                Size(),
+                                header->class_defs_size_);
+      return false;
+    }
+    if (UNLIKELY(!IsAligned<alignof(uint32_t)>(class_offsets_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with unaligned "
+                                    "class offsets, offset %u",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                class_offsets_offset);
+      return false;
+    }
+    const uint32_t* class_offsets_pointer =
+        reinterpret_cast<const uint32_t*>(Begin() + class_offsets_offset);
+
+    uint32_t lookup_table_offset;
+    if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &lookup_table_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' truncated "
+                                    "after lookup table offset",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str());
+      return false;
+    }
+    const uint8_t* lookup_table_data = lookup_table_offset != 0u
+        ? Begin() + lookup_table_offset
+        : nullptr;
+    if (lookup_table_offset != 0u &&
+        (UNLIKELY(lookup_table_offset > Size()) ||
+            UNLIKELY(Size() - lookup_table_offset <
+                     TypeLookupTable::RawDataLength(header->class_defs_size_)))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with truncated "
+                                    "type lookup table, offset %u of %zu, class defs %u",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                lookup_table_offset,
+                                Size(),
+                                header->class_defs_size_);
+      return false;
+    }
 
     uint8_t* current_dex_cache_arrays = nullptr;
     if (dex_cache_arrays != nullptr) {
@@ -586,7 +462,8 @@
                                               canonical_location,
                                               dex_file_checksum,
                                               dex_file_pointer,
-                                              methods_offsets_pointer,
+                                              lookup_table_data,
+                                              class_offsets_pointer,
                                               current_dex_cache_arrays);
     oat_dex_files_storage_.push_back(oat_dex_file);
 
@@ -610,6 +487,568 @@
   return true;
 }
 
+////////////////////////
+// OatFile via dlopen //
+////////////////////////
+
+class DlOpenOatFile FINAL : public OatFileBase {
+ public:
+  DlOpenOatFile(const std::string& filename, bool executable)
+      : OatFileBase(filename, executable),
+        dlopen_handle_(nullptr),
+        shared_objects_before_(0) {
+  }
+
+  ~DlOpenOatFile() {
+    if (dlopen_handle_ != nullptr) {
+      if (!kIsTargetBuild) {
+        MutexLock mu(Thread::Current(), *Locks::host_dlopen_handles_lock_);
+        host_dlopen_handles_.erase(dlopen_handle_);
+        dlclose(dlopen_handle_);
+      } else {
+        dlclose(dlopen_handle_);
+      }
+    }
+  }
+
+ protected:
+  const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                          std::string* error_msg) const OVERRIDE {
+    const uint8_t* ptr =
+        reinterpret_cast<const uint8_t*>(dlsym(dlopen_handle_, symbol_name.c_str()));
+    if (ptr == nullptr) {
+      *error_msg = dlerror();
+    }
+    return ptr;
+  }
+
+  void PreLoad() OVERRIDE;
+
+  bool Load(const std::string& elf_filename,
+            uint8_t* oat_file_begin,
+            bool writable,
+            bool executable,
+            bool low_4gb,
+            std::string* error_msg) OVERRIDE;
+
+  // Ask the linker where it mmaped the file and notify our mmap wrapper of the regions.
+  void PreSetup(const std::string& elf_filename) OVERRIDE;
+
+ private:
+  bool Dlopen(const std::string& elf_filename,
+              uint8_t* oat_file_begin,
+              std::string* error_msg);
+
+  // On the host, if the same library is loaded again with dlopen the same
+  // file handle is returned. This differs from the behavior of dlopen on the
+  // target, where dlopen reloads the library at a different address every
+  // time you load it. The runtime relies on the target behavior to ensure
+  // each instance of the loaded library has a unique dex cache. To avoid
+  // problems, we fall back to our own linker in the case when the same
+  // library is opened multiple times on host. dlopen_handles_ is used to
+  // detect that case.
+  // Guarded by host_dlopen_handles_lock_;
+  static std::unordered_set<void*> host_dlopen_handles_;
+
+  // dlopen handle during runtime.
+  void* dlopen_handle_;  // TODO: Unique_ptr with custom deleter.
+
+  // Dummy memory map objects corresponding to the regions mapped by dlopen.
+  std::vector<std::unique_ptr<MemMap>> dlopen_mmaps_;
+
+  // The number of shared objects the linker told us about before loading. Used to
+  // (optimistically) optimize the PreSetup stage (see comment there).
+  size_t shared_objects_before_;
+
+  DISALLOW_COPY_AND_ASSIGN(DlOpenOatFile);
+};
+
+std::unordered_set<void*> DlOpenOatFile::host_dlopen_handles_;
+
+void DlOpenOatFile::PreLoad() {
+#ifdef __APPLE__
+  UNUSED(shared_objects_before_);
+  LOG(FATAL) << "Should not reach here.";
+  UNREACHABLE();
+#else
+  // Count the entries in dl_iterate_phdr we get at this point in time.
+  struct dl_iterate_context {
+    static int callback(struct dl_phdr_info *info ATTRIBUTE_UNUSED,
+                        size_t size ATTRIBUTE_UNUSED,
+                        void *data) {
+      reinterpret_cast<dl_iterate_context*>(data)->count++;
+      return 0;  // Continue iteration.
+    }
+    size_t count = 0;
+  } context;
+
+  dl_iterate_phdr(dl_iterate_context::callback, &context);
+  shared_objects_before_ = context.count;
+#endif
+}
+
+bool DlOpenOatFile::Load(const std::string& elf_filename,
+                         uint8_t* oat_file_begin,
+                         bool writable,
+                         bool executable,
+                         bool low_4gb,
+                         std::string* error_msg) {
+  // Use dlopen only when flagged to do so, and when it's OK to load things executable.
+  // TODO: Also try when not executable? The issue here could be re-mapping as writable (as
+  //       !executable is a sign that we may want to patch), which may not be allowed for
+  //       various reasons.
+  if (!kUseDlopen) {
+    *error_msg = "DlOpen is disabled.";
+    return false;
+  }
+  if (low_4gb) {
+    *error_msg = "DlOpen does not support low 4gb loading.";
+    return false;
+  }
+  if (writable) {
+    *error_msg = "DlOpen does not support writable loading.";
+    return false;
+  }
+  if (!executable) {
+    *error_msg = "DlOpen does not support non-executable loading.";
+    return false;
+  }
+
+  // dlopen always returns the same library if it is already opened on the host. For this reason
+  // we only use dlopen if we are the target or we do not already have the dex file opened. Having
+  // the same library loaded multiple times at different addresses is required for class unloading
+  // and for having dex caches arrays in the .bss section.
+  if (!kIsTargetBuild) {
+    if (!kUseDlopenOnHost) {
+      *error_msg = "DlOpen disabled for host.";
+      return false;
+    }
+  }
+
+  bool success = Dlopen(elf_filename, oat_file_begin, error_msg);
+  DCHECK(dlopen_handle_ != nullptr || !success);
+
+  return success;
+}
+
+bool DlOpenOatFile::Dlopen(const std::string& elf_filename,
+                           uint8_t* oat_file_begin,
+                           std::string* error_msg) {
+#ifdef __APPLE__
+  // The dl_iterate_phdr syscall is missing.  There is similar API on OSX,
+  // but let's fallback to the custom loading code for the time being.
+  UNUSED(elf_filename, oat_file_begin);
+  *error_msg = "Dlopen unsupported on Mac.";
+  return false;
+#else
+  {
+    UniqueCPtr<char> absolute_path(realpath(elf_filename.c_str(), nullptr));
+    if (absolute_path == nullptr) {
+      *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str());
+      return false;
+    }
+#ifdef ART_TARGET_ANDROID
+    android_dlextinfo extinfo;
+    extinfo.flags = ANDROID_DLEXT_FORCE_LOAD |                  // Force-load, don't reuse handle
+                                                                //   (open oat files multiple
+                                                                //    times).
+                    ANDROID_DLEXT_FORCE_FIXED_VADDR;            // Take a non-zero vaddr as absolute
+                                                                //   (non-pic boot image).
+    if (oat_file_begin != nullptr) {                            //
+      extinfo.flags |= ANDROID_DLEXT_LOAD_AT_FIXED_ADDRESS;     // Use the requested addr if
+      extinfo.reserved_addr = oat_file_begin;                   // vaddr = 0.
+    }                                                           //   (pic boot image).
+    dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
+#else
+    UNUSED(oat_file_begin);
+    static_assert(!kIsTargetBuild, "host_dlopen_handles_ will leak handles");
+    MutexLock mu(Thread::Current(), *Locks::host_dlopen_handles_lock_);
+    dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
+    if (dlopen_handle_ != nullptr) {
+      if (!host_dlopen_handles_.insert(dlopen_handle_).second) {
+        dlclose(dlopen_handle_);
+        dlopen_handle_ = nullptr;
+        *error_msg = StringPrintf("host dlopen re-opened '%s'", elf_filename.c_str());
+        return false;
+      }
+    }
+#endif  // ART_TARGET_ANDROID
+  }
+  if (dlopen_handle_ == nullptr) {
+    *error_msg = StringPrintf("Failed to dlopen '%s': %s", elf_filename.c_str(), dlerror());
+    return false;
+  }
+  return true;
+#endif
+}
+
+void DlOpenOatFile::PreSetup(const std::string& elf_filename) {
+#ifdef __APPLE__
+  UNUSED(elf_filename);
+  LOG(FATAL) << "Should not reach here.";
+  UNREACHABLE();
+#else
+  struct dl_iterate_context {
+    static int callback(struct dl_phdr_info *info, size_t /* size */, void *data) {
+      auto* context = reinterpret_cast<dl_iterate_context*>(data);
+      context->shared_objects_seen++;
+      if (context->shared_objects_seen < context->shared_objects_before) {
+        // We haven't been called yet for anything we haven't seen before. Just continue.
+        // Note: this is aggressively optimistic. If another thread was unloading a library,
+        //       we may miss out here. However, this does not happen often in practice.
+        return 0;
+      }
+
+      // See whether this callback corresponds to the file which we have just loaded.
+      bool contains_begin = false;
+      for (int i = 0; i < info->dlpi_phnum; i++) {
+        if (info->dlpi_phdr[i].p_type == PT_LOAD) {
+          uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
+              info->dlpi_phdr[i].p_vaddr);
+          size_t memsz = info->dlpi_phdr[i].p_memsz;
+          if (vaddr <= context->begin_ && context->begin_ < vaddr + memsz) {
+            contains_begin = true;
+            break;
+          }
+        }
+      }
+      // Add dummy mmaps for this file.
+      if (contains_begin) {
+        for (int i = 0; i < info->dlpi_phnum; i++) {
+          if (info->dlpi_phdr[i].p_type == PT_LOAD) {
+            uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
+                info->dlpi_phdr[i].p_vaddr);
+            size_t memsz = info->dlpi_phdr[i].p_memsz;
+            MemMap* mmap = MemMap::MapDummy(info->dlpi_name, vaddr, memsz);
+            context->dlopen_mmaps_->push_back(std::unique_ptr<MemMap>(mmap));
+          }
+        }
+        return 1;  // Stop iteration and return 1 from dl_iterate_phdr.
+      }
+      return 0;  // Continue iteration and return 0 from dl_iterate_phdr when finished.
+    }
+    const uint8_t* const begin_;
+    std::vector<std::unique_ptr<MemMap>>* const dlopen_mmaps_;
+    const size_t shared_objects_before;
+    size_t shared_objects_seen;
+  };
+  dl_iterate_context context = { Begin(), &dlopen_mmaps_, shared_objects_before_, 0};
+
+  if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
+    // Hm. Maybe our optimization went wrong. Try another time with shared_objects_before == 0
+    // before giving up. This should be unusual.
+    VLOG(oat) << "Need a second run in PreSetup, didn't find with shared_objects_before="
+              << shared_objects_before_;
+    dl_iterate_context context0 = { Begin(), &dlopen_mmaps_, 0, 0};
+    if (dl_iterate_phdr(dl_iterate_context::callback, &context0) == 0) {
+      // OK, give up and print an error.
+      PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but cannot find its mmaps.";
+    }
+  }
+#endif
+}
+
+////////////////////////////////////////////////
+// OatFile via our own ElfFile implementation //
+////////////////////////////////////////////////
+
+class ElfOatFile FINAL : public OatFileBase {
+ public:
+  ElfOatFile(const std::string& filename, bool executable) : OatFileBase(filename, executable) {}
+
+  static ElfOatFile* OpenElfFile(File* file,
+                                 const std::string& location,
+                                 uint8_t* requested_base,
+                                 uint8_t* oat_file_begin,  // Override base if not null
+                                 bool writable,
+                                 bool executable,
+                                 bool low_4gb,
+                                 const char* abs_dex_location,
+                                 std::string* error_msg);
+
+  bool InitializeFromElfFile(ElfFile* elf_file,
+                             const char* abs_dex_location,
+                             std::string* error_msg);
+
+ protected:
+  const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                          std::string* error_msg) const OVERRIDE {
+    const uint8_t* ptr = elf_file_->FindDynamicSymbolAddress(symbol_name);
+    if (ptr == nullptr) {
+      *error_msg = "(Internal implementation could not find symbol)";
+    }
+    return ptr;
+  }
+
+  void PreLoad() OVERRIDE {
+  }
+
+  bool Load(const std::string& elf_filename,
+            uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+            bool writable,
+            bool executable,
+            bool low_4gb,
+            std::string* error_msg) OVERRIDE;
+
+  void PreSetup(const std::string& elf_filename ATTRIBUTE_UNUSED) OVERRIDE {
+  }
+
+ private:
+  bool ElfFileOpen(File* file,
+                   uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+                   bool writable,
+                   bool executable,
+                   bool low_4gb,
+                   std::string* error_msg);
+
+ private:
+  // Backing memory map for oat file during cross compilation.
+  std::unique_ptr<ElfFile> elf_file_;
+
+  DISALLOW_COPY_AND_ASSIGN(ElfOatFile);
+};
+
+ElfOatFile* ElfOatFile::OpenElfFile(File* file,
+                                    const std::string& location,
+                                    uint8_t* requested_base,
+                                    uint8_t* oat_file_begin,  // Override base if not null
+                                    bool writable,
+                                    bool executable,
+                                    bool low_4gb,
+                                    const char* abs_dex_location,
+                                    std::string* error_msg) {
+  ScopedTrace trace("Open elf file " + location);
+  std::unique_ptr<ElfOatFile> oat_file(new ElfOatFile(location, executable));
+  bool success = oat_file->ElfFileOpen(file,
+                                       oat_file_begin,
+                                       writable,
+                                       low_4gb,
+                                       executable,
+                                       error_msg);
+  if (!success) {
+    CHECK(!error_msg->empty());
+    return nullptr;
+  }
+
+  // Complete the setup.
+  if (!oat_file->ComputeFields(requested_base, file->GetPath(), error_msg)) {
+    return nullptr;
+  }
+
+  if (!oat_file->Setup(abs_dex_location, error_msg)) {
+    return nullptr;
+  }
+
+  return oat_file.release();
+}
+
+bool ElfOatFile::InitializeFromElfFile(ElfFile* elf_file,
+                                       const char* abs_dex_location,
+                                       std::string* error_msg) {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
+  if (IsExecutable()) {
+    *error_msg = "Cannot initialize from elf file in executable mode.";
+    return false;
+  }
+  elf_file_.reset(elf_file);
+  uint64_t offset, size;
+  bool has_section = elf_file->GetSectionOffsetAndSize(".rodata", &offset, &size);
+  CHECK(has_section);
+  SetBegin(elf_file->Begin() + offset);
+  SetEnd(elf_file->Begin() + size + offset);
+  // Ignore the optional .bss section when opening non-executable.
+  return Setup(abs_dex_location, error_msg);
+}
+
+bool ElfOatFile::Load(const std::string& elf_filename,
+                      uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+                      bool writable,
+                      bool executable,
+                      bool low_4gb,
+                      std::string* error_msg) {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
+  std::unique_ptr<File> file(OS::OpenFileForReading(elf_filename.c_str()));
+  if (file == nullptr) {
+    *error_msg = StringPrintf("Failed to open oat filename for reading: %s", strerror(errno));
+    return false;
+  }
+  return ElfOatFile::ElfFileOpen(file.get(),
+                                 oat_file_begin,
+                                 writable,
+                                 executable,
+                                 low_4gb,
+                                 error_msg);
+}
+
+bool ElfOatFile::ElfFileOpen(File* file,
+                             uint8_t* oat_file_begin,
+                             bool writable,
+                             bool executable,
+                             bool low_4gb,
+                             std::string* error_msg) {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
+  // TODO: rename requested_base to oat_data_begin
+  elf_file_.reset(ElfFile::Open(file,
+                                writable,
+                                /*program_header_only*/true,
+                                low_4gb,
+                                error_msg,
+                                oat_file_begin));
+  if (elf_file_ == nullptr) {
+    DCHECK(!error_msg->empty());
+    return false;
+  }
+  bool loaded = elf_file_->Load(executable, low_4gb, error_msg);
+  DCHECK(loaded || !error_msg->empty());
+  return loaded;
+}
+
+//////////////////////////
+// General OatFile code //
+//////////////////////////
+
+std::string OatFile::ResolveRelativeEncodedDexLocation(
+      const char* abs_dex_location, const std::string& rel_dex_location) {
+  if (abs_dex_location != nullptr && rel_dex_location[0] != '/') {
+    // Strip :classes<N>.dex used for secondary multidex files.
+    std::string base = DexFile::GetBaseLocation(rel_dex_location);
+    std::string multidex_suffix = DexFile::GetMultiDexSuffix(rel_dex_location);
+
+    // Check if the base is a suffix of the provided abs_dex_location.
+    std::string target_suffix = "/" + base;
+    std::string abs_location(abs_dex_location);
+    if (abs_location.size() > target_suffix.size()) {
+      size_t pos = abs_location.size() - target_suffix.size();
+      if (abs_location.compare(pos, std::string::npos, target_suffix) == 0) {
+        return abs_location + multidex_suffix;
+      }
+    }
+  }
+  return rel_dex_location;
+}
+
+static void CheckLocation(const std::string& location) {
+  CHECK(!location.empty());
+}
+
+OatFile* OatFile::OpenWithElfFile(ElfFile* elf_file,
+                                  const std::string& location,
+                                  const char* abs_dex_location,
+                                  std::string* error_msg) {
+  std::unique_ptr<ElfOatFile> oat_file(new ElfOatFile(location, false /* executable */));
+  return oat_file->InitializeFromElfFile(elf_file, abs_dex_location, error_msg)
+      ? oat_file.release()
+      : nullptr;
+}
+
+OatFile* OatFile::Open(const std::string& filename,
+                       const std::string& location,
+                       uint8_t* requested_base,
+                       uint8_t* oat_file_begin,
+                       bool executable,
+                       bool low_4gb,
+                       const char* abs_dex_location,
+                       std::string* error_msg) {
+  ScopedTrace trace("Open oat file " + location);
+  CHECK(!filename.empty()) << location;
+  CheckLocation(location);
+
+  // Check that the file even exists, fast-fail.
+  if (!OS::FileExists(filename.c_str())) {
+    *error_msg = StringPrintf("File %s does not exist.", filename.c_str());
+    return nullptr;
+  }
+
+  // Try dlopen first, as it is required for native debuggability. This will fail fast if dlopen is
+  // disabled.
+  OatFile* with_dlopen = OatFileBase::OpenOatFile<DlOpenOatFile>(filename,
+                                                                 location,
+                                                                 requested_base,
+                                                                 oat_file_begin,
+                                                                 false,
+                                                                 executable,
+                                                                 low_4gb,
+                                                                 abs_dex_location,
+                                                                 error_msg);
+  if (with_dlopen != nullptr) {
+    return with_dlopen;
+  }
+  if (kPrintDlOpenErrorMessage) {
+    LOG(ERROR) << "Failed to dlopen: " << filename << " with error " << *error_msg;
+  }
+  // If we aren't trying to execute, we just use our own ElfFile loader for a couple reasons:
+  //
+  // On target, dlopen may fail when compiling due to selinux restrictions on installd.
+  //
+  // We use our own ELF loader for Quick to deal with legacy apps that
+  // open a generated dex file by name, remove the file, then open
+  // another generated dex file with the same name. http://b/10614658
+  //
+  // On host, dlopen is expected to fail when cross compiling, so fall back to OpenElfFile.
+  //
+  //
+  // Another independent reason is the absolute placement of boot.oat. dlopen on the host usually
+  // does honor the virtual address encoded in the ELF file only for ET_EXEC files, not ET_DYN.
+  OatFile* with_internal = OatFileBase::OpenOatFile<ElfOatFile>(filename,
+                                                                location,
+                                                                requested_base,
+                                                                oat_file_begin,
+                                                                false,
+                                                                executable,
+                                                                low_4gb,
+                                                                abs_dex_location,
+                                                                error_msg);
+  return with_internal;
+}
+
+OatFile* OatFile::OpenWritable(File* file,
+                               const std::string& location,
+                               const char* abs_dex_location,
+                               std::string* error_msg) {
+  CheckLocation(location);
+  return ElfOatFile::OpenElfFile(file,
+                                 location,
+                                 nullptr,
+                                 nullptr,
+                                 true,
+                                 false,
+                                 /*low_4gb*/false,
+                                 abs_dex_location,
+                                 error_msg);
+}
+
+OatFile* OatFile::OpenReadable(File* file,
+                               const std::string& location,
+                               const char* abs_dex_location,
+                               std::string* error_msg) {
+  CheckLocation(location);
+  return ElfOatFile::OpenElfFile(file,
+                                 location,
+                                 nullptr,
+                                 nullptr,
+                                 false,
+                                 false,
+                                 /*low_4gb*/false,
+                                 abs_dex_location,
+                                 error_msg);
+}
+
+OatFile::OatFile(const std::string& location, bool is_executable)
+    : location_(location),
+      begin_(nullptr),
+      end_(nullptr),
+      bss_begin_(nullptr),
+      bss_end_(nullptr),
+      is_executable_(is_executable),
+      secondary_lookup_lock_("OatFile secondary lookup lock", kOatFileSecondaryLookupLock) {
+  CHECK(!location_.empty());
+}
+
+OatFile::~OatFile() {
+  STLDeleteElements(&oat_dex_files_storage_);
+}
+
 const OatHeader& OatFile::GetOatHeader() const {
   return *reinterpret_cast<const OatHeader*>(Begin());
 }
@@ -634,7 +1073,7 @@
 
 const OatFile::OatDexFile* OatFile::GetOatDexFile(const char* dex_location,
                                                   const uint32_t* dex_location_checksum,
-                                                  bool warn_if_not_found) const {
+                                                  std::string* error_msg) const {
   // NOTE: We assume here that the canonical location for a given dex_location never
   // changes. If it does (i.e. some symlink used by the filename changes) we may return
   // an incorrect OatDexFile. As long as we have a checksum to check, we shall return
@@ -676,32 +1115,29 @@
       secondary_oat_dex_files_.PutBefore(secondary_lb, key_copy, oat_dex_file);
     }
   }
-  if (oat_dex_file != nullptr &&
-      (dex_location_checksum == nullptr ||
-       oat_dex_file->GetDexFileLocationChecksum() == *dex_location_checksum)) {
-    return oat_dex_file;
+
+  if (oat_dex_file == nullptr) {
+    if (error_msg != nullptr) {
+      std::string dex_canonical_location = DexFile::GetDexCanonicalLocation(dex_location);
+      *error_msg = "Failed to find OatDexFile for DexFile " + std::string(dex_location)
+          + " (canonical path " + dex_canonical_location + ") in OatFile " + GetLocation();
+    }
+    return nullptr;
   }
 
-  if (warn_if_not_found) {
-    std::string dex_canonical_location = DexFile::GetDexCanonicalLocation(dex_location);
-    std::string checksum("<unspecified>");
-    if (dex_location_checksum != nullptr) {
-      checksum = StringPrintf("0x%08x", *dex_location_checksum);
+  if (dex_location_checksum != nullptr &&
+      oat_dex_file->GetDexFileLocationChecksum() != *dex_location_checksum) {
+    if (error_msg != nullptr) {
+      std::string dex_canonical_location = DexFile::GetDexCanonicalLocation(dex_location);
+      std::string checksum = StringPrintf("0x%08x", oat_dex_file->GetDexFileLocationChecksum());
+      std::string required_checksum = StringPrintf("0x%08x", *dex_location_checksum);
+      *error_msg = "OatDexFile for DexFile " + std::string(dex_location)
+          + " (canonical path " + dex_canonical_location + ") in OatFile " + GetLocation()
+          + " has checksum " + checksum + " but " + required_checksum + " was required";
     }
-    LOG(WARNING) << "Failed to find OatDexFile for DexFile " << dex_location
-                 << " ( canonical path " << dex_canonical_location << ")"
-                 << " with checksum " << checksum << " in OatFile " << GetLocation();
-    if (kIsDebugBuild) {
-      for (const OatDexFile* odf : oat_dex_files_storage_) {
-        LOG(WARNING) << "OatFile " << GetLocation()
-                     << " contains OatDexFile " << odf->GetDexFileLocation()
-                     << " (canonical path " << odf->GetCanonicalDexFileLocation() << ")"
-                     << " with checksum 0x" << std::hex << odf->GetDexFileLocationChecksum();
-      }
-    }
+    return nullptr;
   }
-
-  return nullptr;
+  return oat_dex_file;
 }
 
 OatFile::OatDexFile::OatDexFile(const OatFile* oat_file,
@@ -709,6 +1145,7 @@
                                 const std::string& canonical_dex_file_location,
                                 uint32_t dex_file_location_checksum,
                                 const uint8_t* dex_file_pointer,
+                                const uint8_t* lookup_table_data,
                                 const uint32_t* oat_class_offsets_pointer,
                                 uint8_t* dex_cache_arrays)
     : oat_file_(oat_file),
@@ -716,6 +1153,7 @@
       canonical_dex_file_location_(canonical_dex_file_location),
       dex_file_location_checksum_(dex_file_location_checksum),
       dex_file_pointer_(dex_file_pointer),
+      lookup_table_data_(lookup_table_data),
       oat_class_offsets_pointer_(oat_class_offsets_pointer),
       dex_cache_arrays_(dex_cache_arrays) {}
 
@@ -726,8 +1164,17 @@
 }
 
 std::unique_ptr<const DexFile> OatFile::OatDexFile::OpenDexFile(std::string* error_msg) const {
-  return DexFile::Open(dex_file_pointer_, FileSize(), dex_file_location_,
-                       dex_file_location_checksum_, this, error_msg);
+  ScopedTrace trace(__PRETTY_FUNCTION__);
+  static constexpr bool kVerify = false;
+  static constexpr bool kVerifyChecksum = false;
+  return DexFile::Open(dex_file_pointer_,
+                       FileSize(),
+                       dex_file_location_,
+                       dex_file_location_checksum_,
+                       this,
+                       kVerify,
+                       kVerifyChecksum,
+                       error_msg);
 }
 
 uint32_t OatFile::OatDexFile::GetOatClassOffset(uint16_t class_def_index) const {
@@ -861,6 +1308,10 @@
   method->SetEntryPointFromQuickCompiledCode(GetQuickCode());
 }
 
+bool OatFile::HasPatchInfo() const {
+  return GetOatHeader().HasPatchInfo();
+}
+
 bool OatFile::IsPic() const {
   return GetOatHeader().IsPic();
   // TODO: Check against oat_patches. b/18144996
@@ -870,6 +1321,10 @@
   return GetOatHeader().IsDebuggable();
 }
 
+CompilerFilter::Filter OatFile::GetCompilerFilter() const {
+  return GetOatHeader().GetCompilerFilter();
+}
+
 static constexpr char kDexClassPathEncodingSeparator = '*';
 
 std::string OatFile::EncodeDexFileDependencies(const std::vector<const DexFile*>& dex_files) {
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 6acdf86..f5ab9dc 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -40,8 +40,17 @@
 class OatHeader;
 class OatDexFile;
 
-class OatFile FINAL {
+namespace gc {
+namespace collector {
+class DummyOatFile;
+}  // namespace collector
+}  // namespace gc
+
+class OatFile {
  public:
+  // Special classpath that skips shared library check.
+  static constexpr const char* kSpecialSharedLibrary = "&";
+
   typedef art::OatDexFile OatDexFile;
 
   // Opens an oat file contained within the given elf file. This is always opened as
@@ -58,6 +67,7 @@
                        uint8_t* requested_base,
                        uint8_t* oat_file_begin,
                        bool executable,
+                       bool low_4gb,
                        const char* abs_dex_location,
                        std::string* error_msg);
 
@@ -74,22 +84,20 @@
                                const char* abs_dex_location,
                                std::string* error_msg);
 
-  ~OatFile();
+  virtual ~OatFile();
 
   bool IsExecutable() const {
     return is_executable_;
   }
 
+  bool HasPatchInfo() const;
+
   bool IsPic() const;
 
   // Indicates whether the oat file was compiled with full debugging capability.
   bool IsDebuggable() const;
 
-  ElfFile* GetElfFile() const {
-    CHECK_NE(reinterpret_cast<uintptr_t>(elf_file_.get()), reinterpret_cast<uintptr_t>(nullptr))
-        << "Cannot get an elf file from " << GetLocation();
-    return elf_file_.get();
-  }
+  CompilerFilter::Filter GetCompilerFilter() const;
 
   const std::string& GetLocation() const {
     return location_;
@@ -118,18 +126,10 @@
     uint32_t GetCoreSpillMask() const;
     uint32_t GetFpSpillMask() const;
 
-    const uint8_t* GetMappingTable() const;
-    uint32_t GetMappingTableOffset() const;
-    uint32_t GetMappingTableOffsetOffset() const;
-
     const uint8_t* GetVmapTable() const;
     uint32_t GetVmapTableOffset() const;
     uint32_t GetVmapTableOffsetOffset() const;
 
-    const uint8_t* GetGcMap() const;
-    uint32_t GetGcMapOffset() const;
-    uint32_t GetGcMapOffsetOffset() const;
-
     // Create an OatMethod with offsets relative to the given base address
     OatMethod(const uint8_t* base, const uint32_t code_offset)
         : begin_(base), code_offset_(code_offset) {
@@ -213,9 +213,15 @@
 
     friend class art::OatDexFile;
   };
+
+  // Get the OatDexFile for the given dex_location within this oat file.
+  // If dex_location_checksum is non-null, the OatDexFile will only be
+  // returned if it has a matching checksum.
+  // If error_msg is non-null and no OatDexFile is returned, error_msg will
+  // be updated with a description of why no OatDexFile was returned.
   const OatDexFile* GetOatDexFile(const char* dex_location,
                                   const uint32_t* const dex_location_checksum,
-                                  bool exception_if_not_found = true) const
+                                  /*out*/std::string* error_msg = nullptr) const
       REQUIRES(!secondary_lookup_lock_);
 
   const std::vector<const OatDexFile*>& GetOatDexFiles() const {
@@ -226,6 +232,10 @@
     return End() - Begin();
   }
 
+  bool Contains(const void* p) const {
+    return p >= Begin() && p < End();
+  }
+
   size_t BssSize() const {
     return BssEnd() - BssBegin();
   }
@@ -260,35 +270,10 @@
   static bool GetDexLocationsFromDependencies(const char* dex_dependencies,
                                               std::vector<std::string>* locations);
 
+ protected:
+  OatFile(const std::string& filename, bool executable);
+
  private:
-  static void CheckLocation(const std::string& location);
-
-  static OatFile* OpenDlopen(const std::string& elf_filename,
-                             const std::string& location,
-                             uint8_t* requested_base,
-                             const char* abs_dex_location,
-                             std::string* error_msg);
-
-  static OatFile* OpenElfFile(File* file,
-                              const std::string& location,
-                              uint8_t* requested_base,
-                              uint8_t* oat_file_begin,  // Override base if not null
-                              bool writable,
-                              bool executable,
-                              const char* abs_dex_location,
-                              std::string* error_msg);
-
-  explicit OatFile(const std::string& filename, bool executable);
-  bool Dlopen(const std::string& elf_filename, uint8_t* requested_base,
-              const char* abs_dex_location, std::string* error_msg);
-  bool ElfFileOpen(File* file, uint8_t* requested_base,
-                   uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
-                   bool writable, bool executable,
-                   const char* abs_dex_location,
-                   std::string* error_msg);
-
-  bool Setup(const char* abs_dex_location, std::string* error_msg);
-
   // The oat file name.
   //
   // The image will embed this to link its associated oat file.
@@ -309,18 +294,6 @@
   // Was this oat_file loaded executable?
   const bool is_executable_;
 
-  // Backing memory map for oat file during when opened by ElfWriter during initial compilation.
-  std::unique_ptr<MemMap> mem_map_;
-
-  // Backing memory map for oat file during cross compilation.
-  std::unique_ptr<ElfFile> elf_file_;
-
-  // dlopen handle during runtime.
-  void* dlopen_handle_;
-
-  // Dummy memory map objects corresponding to the regions mapped by dlopen.
-  std::vector<std::unique_ptr<MemMap>> dlopen_mmaps_;
-
   // Owning storage for the OatDexFile objects.
   std::vector<const OatDexFile*> oat_dex_files_storage_;
 
@@ -353,9 +326,11 @@
   // elements. std::list<> and std::deque<> satisfy this requirement, std::vector<> doesn't.
   mutable std::list<std::string> string_cache_ GUARDED_BY(secondary_lookup_lock_);
 
+  friend class gc::collector::DummyOatFile;  // For modifying begin_ and end_.
   friend class OatClass;
   friend class art::OatDexFile;
   friend class OatDumper;  // For GetBase and GetLimit
+  friend class OatFileBase;
   DISALLOW_COPY_AND_ASSIGN(OatFile);
 };
 
@@ -400,6 +375,14 @@
     return dex_cache_arrays_;
   }
 
+  const uint8_t* GetLookupTableData() const {
+    return lookup_table_data_;
+  }
+
+  const uint8_t* GetDexFilePointer() const {
+    return dex_file_pointer_;
+  }
+
   ~OatDexFile();
 
  private:
@@ -408,6 +391,7 @@
              const std::string& canonical_dex_file_location,
              uint32_t dex_file_checksum,
              const uint8_t* dex_file_pointer,
+             const uint8_t* lookup_table_data,
              const uint32_t* oat_class_offsets_pointer,
              uint8_t* dex_cache_arrays);
 
@@ -416,10 +400,12 @@
   const std::string canonical_dex_file_location_;
   const uint32_t dex_file_location_checksum_;
   const uint8_t* const dex_file_pointer_;
+  const uint8_t* lookup_table_data_;
   const uint32_t* const oat_class_offsets_pointer_;
   uint8_t* const dex_cache_arrays_;
 
   friend class OatFile;
+  friend class OatFileBase;
   DISALLOW_COPY_AND_ASSIGN(OatDexFile);
 };
 
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 8d5418d..fe6332d 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -16,56 +16,51 @@
 
 #include "oat_file_assistant.h"
 
-#include <fcntl.h>
-#ifdef __linux__
-#include <sys/sendfile.h>
-#else
-#include <sys/socket.h>
-#endif
-#include <sys/types.h>
 #include <sys/stat.h>
-#include <unistd.h>
-
-#include <set>
-
 #include "base/logging.h"
 #include "base/stringprintf.h"
+#include "compiler_filter.h"
 #include "class_linker.h"
 #include "gc/heap.h"
 #include "gc/space/image_space.h"
 #include "image.h"
 #include "oat.h"
 #include "os.h"
-#include "profiler.h"
 #include "runtime.h"
-#include "ScopedFd.h"
+#include "scoped_thread_state_change.h"
 #include "utils.h"
 
 namespace art {
 
+std::ostream& operator << (std::ostream& stream, const OatFileAssistant::OatStatus status) {
+  switch (status) {
+    case OatFileAssistant::kOatOutOfDate:
+      stream << "kOatOutOfDate";
+      break;
+    case OatFileAssistant::kOatUpToDate:
+      stream << "kOatUpToDate";
+      break;
+    case OatFileAssistant::kOatNeedsRelocation:
+      stream << "kOatNeedsRelocation";
+      break;
+    default:
+      UNREACHABLE();
+  }
+
+  return stream;
+}
+
 OatFileAssistant::OatFileAssistant(const char* dex_location,
                                    const InstructionSet isa,
                                    bool load_executable)
-    : OatFileAssistant(dex_location, nullptr, isa, load_executable, nullptr) { }
+    : OatFileAssistant(dex_location, nullptr, isa, load_executable)
+{ }
 
 OatFileAssistant::OatFileAssistant(const char* dex_location,
                                    const char* oat_location,
                                    const InstructionSet isa,
                                    bool load_executable)
-    : OatFileAssistant(dex_location, oat_location, isa, load_executable, nullptr) { }
-
-OatFileAssistant::OatFileAssistant(const char* dex_location,
-                                   const InstructionSet isa,
-                                   bool load_executable,
-                                   const char* package_name)
-    : OatFileAssistant(dex_location, nullptr, isa, load_executable, package_name) { }
-
-OatFileAssistant::OatFileAssistant(const char* dex_location,
-                                   const char* oat_location,
-                                   const InstructionSet isa,
-                                   bool load_executable,
-                                   const char* package_name)
-    : isa_(isa), package_name_(package_name), load_executable_(load_executable) {
+    : isa_(isa), load_executable_(load_executable), odex_(this), oat_(this) {
   CHECK(dex_location != nullptr) << "OatFileAssistant: null dex location";
   dex_location_.assign(dex_location);
 
@@ -75,31 +70,33 @@
     load_executable_ = false;
   }
 
-  // If the user gave a target oat location, save that as the cached oat
-  // location now so we won't try to construct the default location later.
-  if (oat_location != nullptr) {
-    cached_oat_file_name_ = std::string(oat_location);
-    cached_oat_file_name_attempted_ = true;
-    cached_oat_file_name_found_ = true;
+  // Get the odex filename.
+  std::string error_msg;
+  std::string odex_file_name;
+  if (DexLocationToOdexFilename(dex_location_, isa_, &odex_file_name, &error_msg)) {
+    odex_.Reset(odex_file_name);
+  } else {
+    LOG(WARNING) << "Failed to determine odex file name: " << error_msg;
   }
 
-  // If there is no package name given, we will not be able to find any
-  // profiles associated with this dex location. Preemptively mark that to
-  // be the case, rather than trying to find and load the profiles later.
-  // Similarly, if profiling is disabled.
-  if (package_name == nullptr
-      || !Runtime::Current()->GetProfilerOptions().IsEnabled()) {
-    profile_load_attempted_ = true;
-    profile_load_succeeded_ = false;
-    old_profile_load_attempted_ = true;
-    old_profile_load_succeeded_ = false;
+  // Get the oat filename.
+  if (oat_location != nullptr) {
+    oat_.Reset(oat_location);
+  } else {
+    std::string oat_file_name;
+    if (DexLocationToOatFilename(dex_location_, isa_, &oat_file_name, &error_msg)) {
+      oat_.Reset(oat_file_name);
+    } else {
+      LOG(WARNING) << "Failed to determine oat file name for dex location "
+        << dex_location_ << ": " << error_msg;
+    }
   }
 }
 
 OatFileAssistant::~OatFileAssistant() {
   // Clean up the lock file.
   if (flock_.HasFile()) {
-    TEMP_FAILURE_RETRY(unlink(flock_.GetFile()->GetPath().c_str()));
+    unlink(flock_.GetFile()->GetPath().c_str());
   }
 }
 
@@ -124,44 +121,105 @@
   CHECK(error_msg != nullptr);
   CHECK(!flock_.HasFile()) << "OatFileAssistant::Lock already acquired";
 
-  if (OatFileName() == nullptr) {
+  const std::string* oat_file_name = oat_.Filename();
+  if (oat_file_name == nullptr) {
     *error_msg = "Failed to determine lock file";
     return false;
   }
-  std::string lock_file_name = *OatFileName() + ".flock";
+  std::string lock_file_name = *oat_file_name + ".flock";
 
   if (!flock_.Init(lock_file_name.c_str(), error_msg)) {
-    TEMP_FAILURE_RETRY(unlink(lock_file_name.c_str()));
+    unlink(lock_file_name.c_str());
     return false;
   }
   return true;
 }
 
-OatFileAssistant::DexOptNeeded OatFileAssistant::GetDexOptNeeded() {
-  // TODO: If the profiling code is ever restored, it's worth considering
-  // whether we should check to see if the profile is out of date here.
+OatFileAssistant::DexOptNeeded
+OatFileAssistant::GetDexOptNeeded(CompilerFilter::Filter target, bool profile_changed) {
+  bool compilation_desired = CompilerFilter::IsBytecodeCompilationEnabled(target);
 
-  if (OatFileIsUpToDate() || OdexFileIsUpToDate()) {
-    return kNoDexOptNeeded;
+  // See if the oat file is in good shape as is.
+  bool oat_okay = oat_.CompilerFilterIsOkay(target, profile_changed);
+  if (oat_okay) {
+    if (compilation_desired) {
+      if (oat_.IsUpToDate()) {
+        return kNoDexOptNeeded;
+      }
+    } else {
+      if (!oat_.IsOutOfDate()) {
+        return kNoDexOptNeeded;
+      }
+    }
   }
 
-  if (OdexFileNeedsRelocation()) {
-    return kPatchOatNeeded;
+  // See if the odex file is in good shape as is.
+  bool odex_okay = odex_.CompilerFilterIsOkay(target, profile_changed);
+  if (odex_okay) {
+    if (compilation_desired) {
+      if (odex_.IsUpToDate()) {
+        return kNoDexOptNeeded;
+      }
+    } else {
+      if (!odex_.IsOutOfDate()) {
+        return kNoDexOptNeeded;
+      }
+    }
   }
 
-  if (OatFileNeedsRelocation()) {
-    return kSelfPatchOatNeeded;
+  // See if we can get an up-to-date file by running patchoat.
+  if (compilation_desired) {
+    if (odex_okay && odex_.NeedsRelocation() && odex_.HasPatchInfo()) {
+      return kPatchOatNeeded;
+    }
+
+    if (oat_okay && oat_.NeedsRelocation() && oat_.HasPatchInfo()) {
+      return kSelfPatchOatNeeded;
+    }
   }
 
+  // We can only run dex2oat if there are original dex files.
   return HasOriginalDexFiles() ? kDex2OatNeeded : kNoDexOptNeeded;
 }
 
-bool OatFileAssistant::MakeUpToDate(std::string* error_msg) {
-  switch (GetDexOptNeeded()) {
-    case kNoDexOptNeeded: return true;
+// Figure out the currently specified compile filter option in the runtime.
+// Returns true on success, false if the compiler filter is invalid, in which
+// case error_msg describes the problem.
+static bool GetRuntimeCompilerFilterOption(CompilerFilter::Filter* filter,
+                                           std::string* error_msg) {
+  CHECK(filter != nullptr);
+  CHECK(error_msg != nullptr);
+
+  *filter = CompilerFilter::kDefaultCompilerFilter;
+  for (StringPiece option : Runtime::Current()->GetCompilerOptions()) {
+    if (option.starts_with("--compiler-filter=")) {
+      const char* compiler_filter_string = option.substr(strlen("--compiler-filter=")).data();
+      if (!CompilerFilter::ParseCompilerFilter(compiler_filter_string, filter)) {
+        *error_msg = std::string("Unknown --compiler-filter value: ")
+                   + std::string(compiler_filter_string);
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool OatFileAssistant::IsUpToDate() {
+  return OatFileIsUpToDate() || OdexFileIsUpToDate();
+}
+
+OatFileAssistant::ResultOfAttemptToUpdate
+OatFileAssistant::MakeUpToDate(bool profile_changed, std::string* error_msg) {
+  CompilerFilter::Filter target;
+  if (!GetRuntimeCompilerFilterOption(&target, error_msg)) {
+    return kUpdateNotAttempted;
+  }
+
+  switch (GetDexOptNeeded(target, profile_changed)) {
+    case kNoDexOptNeeded: return kUpdateSucceeded;
     case kDex2OatNeeded: return GenerateOatFile(error_msg);
-    case kPatchOatNeeded: return RelocateOatFile(OdexFileName(), error_msg);
-    case kSelfPatchOatNeeded: return RelocateOatFile(OatFileName(), error_msg);
+    case kPatchOatNeeded: return RelocateOatFile(odex_.Filename(), error_msg);
+    case kSelfPatchOatNeeded: return RelocateOatFile(oat_.Filename(), error_msg);
   }
   UNREACHABLE();
 }
@@ -172,46 +230,40 @@
   // 2. Not out-of-date files that are already opened non-executable.
   // 3. Not out-of-date files that we must reopen non-executable.
 
-  if (OatFileIsUpToDate()) {
-    oat_file_released_ = true;
-    return std::move(cached_oat_file_);
+  if (oat_.IsUpToDate()) {
+    return oat_.ReleaseFile();
   }
 
-  if (OdexFileIsUpToDate()) {
-    oat_file_released_ = true;
-    return std::move(cached_odex_file_);
+  if (odex_.IsUpToDate()) {
+    return odex_.ReleaseFile();
   }
 
   VLOG(oat) << "Oat File Assistant: No relocated oat file found,"
     << " attempting to fall back to interpreting oat file instead.";
 
-  if (!OatFileIsOutOfDate() && !OatFileIsExecutable()) {
-    oat_file_released_ = true;
-    return std::move(cached_oat_file_);
+  if (!oat_.IsOutOfDate() && !oat_.IsExecutable()) {
+    return oat_.ReleaseFile();
   }
 
-  if (!OdexFileIsOutOfDate() && !OdexFileIsExecutable()) {
-    oat_file_released_ = true;
-    return std::move(cached_odex_file_);
+  if (!odex_.IsOutOfDate() && !odex_.IsExecutable()) {
+    return odex_.ReleaseFile();
   }
 
-  if (!OatFileIsOutOfDate()) {
+  if (!oat_.IsOutOfDate()) {
     load_executable_ = false;
-    ClearOatFileCache();
-    if (!OatFileIsOutOfDate()) {
-      CHECK(!OatFileIsExecutable());
-      oat_file_released_ = true;
-      return std::move(cached_oat_file_);
+    oat_.Reset();
+    if (!oat_.IsOutOfDate()) {
+      CHECK(!oat_.IsExecutable());
+      return oat_.ReleaseFile();
     }
   }
 
-  if (!OdexFileIsOutOfDate()) {
+  if (!odex_.IsOutOfDate()) {
     load_executable_ = false;
-    ClearOdexFileCache();
-    if (!OdexFileIsOutOfDate()) {
-      CHECK(!OdexFileIsExecutable());
-      oat_file_released_ = true;
-      return std::move(cached_odex_file_);
+    odex_.Reset();
+    if (!odex_.IsOutOfDate()) {
+      CHECK(!odex_.IsExecutable());
+      return odex_.ReleaseFile();
     }
   }
 
@@ -225,10 +277,9 @@
   // Load the primary dex file.
   std::string error_msg;
   const OatFile::OatDexFile* oat_dex_file = oat_file.GetOatDexFile(
-      dex_location, nullptr, false);
+      dex_location, nullptr, &error_msg);
   if (oat_dex_file == nullptr) {
-    LOG(WARNING) << "Attempt to load out-of-date oat file "
-      << oat_file.GetLocation() << " for dex location " << dex_location;
+    LOG(WARNING) << error_msg;
     return std::vector<std::unique_ptr<const DexFile>>();
   }
 
@@ -242,7 +293,7 @@
   // Load secondary multidex files
   for (size_t i = 1; ; i++) {
     std::string secondary_dex_location = DexFile::GetMultiDexLocation(i, dex_location);
-    oat_dex_file = oat_file.GetOatDexFile(secondary_dex_location.c_str(), nullptr, false);
+    oat_dex_file = oat_file.GetOatDexFile(secondary_dex_location.c_str(), nullptr);
     if (oat_dex_file == nullptr) {
       // There are no more secondary dex files to load.
       break;
@@ -267,168 +318,95 @@
 }
 
 const std::string* OatFileAssistant::OdexFileName() {
-  if (!cached_odex_file_name_attempted_) {
-    cached_odex_file_name_attempted_ = true;
-
-    std::string error_msg;
-    cached_odex_file_name_found_ = DexFilenameToOdexFilename(
-        dex_location_, isa_, &cached_odex_file_name_, &error_msg);
-    if (!cached_odex_file_name_found_) {
-      // If we can't figure out the odex file, we treat it as if the odex
-      // file was inaccessible.
-      LOG(WARNING) << "Failed to determine odex file name: " << error_msg;
-    }
-  }
-  return cached_odex_file_name_found_ ? &cached_odex_file_name_ : nullptr;
+  return odex_.Filename();
 }
 
 bool OatFileAssistant::OdexFileExists() {
-  return GetOdexFile() != nullptr;
+  return odex_.Exists();
 }
 
 OatFileAssistant::OatStatus OatFileAssistant::OdexFileStatus() {
-  if (OdexFileIsOutOfDate()) {
-    return kOatOutOfDate;
-  }
-  if (OdexFileIsUpToDate()) {
-    return kOatUpToDate;
-  }
-  return kOatNeedsRelocation;
+  return odex_.Status();
 }
 
 bool OatFileAssistant::OdexFileIsOutOfDate() {
-  if (!odex_file_is_out_of_date_attempted_) {
-    odex_file_is_out_of_date_attempted_ = true;
-    const OatFile* odex_file = GetOdexFile();
-    if (odex_file == nullptr) {
-      cached_odex_file_is_out_of_date_ = true;
-    } else {
-      cached_odex_file_is_out_of_date_ = GivenOatFileIsOutOfDate(*odex_file);
-    }
-  }
-  return cached_odex_file_is_out_of_date_;
+  return odex_.IsOutOfDate();
 }
 
 bool OatFileAssistant::OdexFileNeedsRelocation() {
-  return OdexFileStatus() == kOatNeedsRelocation;
+  return odex_.NeedsRelocation();
 }
 
 bool OatFileAssistant::OdexFileIsUpToDate() {
-  if (!odex_file_is_up_to_date_attempted_) {
-    odex_file_is_up_to_date_attempted_ = true;
-    const OatFile* odex_file = GetOdexFile();
-    if (odex_file == nullptr) {
-      cached_odex_file_is_up_to_date_ = false;
-    } else {
-      cached_odex_file_is_up_to_date_ = GivenOatFileIsUpToDate(*odex_file);
-    }
+  return odex_.IsUpToDate();
+}
+
+CompilerFilter::Filter OatFileAssistant::OdexFileCompilerFilter() {
+  return odex_.CompilerFilter();
+}
+
+static std::string ArtFileName(const OatFile* oat_file) {
+  const std::string oat_file_location = oat_file->GetLocation();
+  // Replace extension with .art
+  const size_t last_ext = oat_file_location.find_last_of('.');
+  if (last_ext == std::string::npos) {
+    LOG(ERROR) << "No extension in oat file " << oat_file_location;
+    return std::string();
   }
-  return cached_odex_file_is_up_to_date_;
+  return oat_file_location.substr(0, last_ext) + ".art";
 }
 
 const std::string* OatFileAssistant::OatFileName() {
-  if (!cached_oat_file_name_attempted_) {
-    cached_oat_file_name_attempted_ = true;
-
-    // Compute the oat file name from the dex location.
-    // TODO: The oat file assistant should be the definitive place for
-    // determining the oat file name from the dex location, not
-    // GetDalvikCacheFilename.
-    std::string cache_dir = StringPrintf("%s%s",
-        DalvikCacheDirectory().c_str(), GetInstructionSetString(isa_));
-    std::string error_msg;
-    cached_oat_file_name_found_ = GetDalvikCacheFilename(dex_location_.c_str(),
-        cache_dir.c_str(), &cached_oat_file_name_, &error_msg);
-    if (!cached_oat_file_name_found_) {
-      // If we can't determine the oat file name, we treat the oat file as
-      // inaccessible.
-      LOG(WARNING) << "Failed to determine oat file name for dex location "
-        << dex_location_ << ": " << error_msg;
-    }
-  }
-  return cached_oat_file_name_found_ ? &cached_oat_file_name_ : nullptr;
+  return oat_.Filename();
 }
 
 bool OatFileAssistant::OatFileExists() {
-  return GetOatFile() != nullptr;
+  return oat_.Exists();
 }
 
 OatFileAssistant::OatStatus OatFileAssistant::OatFileStatus() {
-  if (OatFileIsOutOfDate()) {
-    return kOatOutOfDate;
-  }
-  if (OatFileIsUpToDate()) {
-    return kOatUpToDate;
-  }
-  return kOatNeedsRelocation;
+  return oat_.Status();
 }
 
 bool OatFileAssistant::OatFileIsOutOfDate() {
-  if (!oat_file_is_out_of_date_attempted_) {
-    oat_file_is_out_of_date_attempted_ = true;
-    const OatFile* oat_file = GetOatFile();
-    if (oat_file == nullptr) {
-      cached_oat_file_is_out_of_date_ = true;
-    } else {
-      cached_oat_file_is_out_of_date_ = GivenOatFileIsOutOfDate(*oat_file);
-    }
-  }
-  return cached_oat_file_is_out_of_date_;
+  return oat_.IsOutOfDate();
 }
 
 bool OatFileAssistant::OatFileNeedsRelocation() {
-  return OatFileStatus() == kOatNeedsRelocation;
+  return oat_.NeedsRelocation();
 }
 
 bool OatFileAssistant::OatFileIsUpToDate() {
-  if (!oat_file_is_up_to_date_attempted_) {
-    oat_file_is_up_to_date_attempted_ = true;
-    const OatFile* oat_file = GetOatFile();
-    if (oat_file == nullptr) {
-      cached_oat_file_is_up_to_date_ = false;
-    } else {
-      cached_oat_file_is_up_to_date_ = GivenOatFileIsUpToDate(*oat_file);
-    }
-  }
-  return cached_oat_file_is_up_to_date_;
+  return oat_.IsUpToDate();
+}
+
+CompilerFilter::Filter OatFileAssistant::OatFileCompilerFilter() {
+  return oat_.CompilerFilter();
 }
 
 OatFileAssistant::OatStatus OatFileAssistant::GivenOatFileStatus(const OatFile& file) {
-  // TODO: This could cause GivenOatFileIsOutOfDate to be called twice, which
-  // is more work than we need to do. If performance becomes a concern, and
-  // this method is actually called, this should be fixed.
-  if (GivenOatFileIsOutOfDate(file)) {
-    return kOatOutOfDate;
-  }
-  if (GivenOatFileIsUpToDate(file)) {
-    return kOatUpToDate;
-  }
-  return kOatNeedsRelocation;
-}
-
-bool OatFileAssistant::GivenOatFileIsOutOfDate(const OatFile& file) {
   // Verify the dex checksum.
   // Note: GetOatDexFile will return null if the dex checksum doesn't match
   // what we provide, which verifies the primary dex checksum for us.
+  std::string error_msg;
   const uint32_t* dex_checksum_pointer = GetRequiredDexChecksum();
   const OatFile::OatDexFile* oat_dex_file = file.GetOatDexFile(
-      dex_location_.c_str(), dex_checksum_pointer, false);
+      dex_location_.c_str(), dex_checksum_pointer, &error_msg);
   if (oat_dex_file == nullptr) {
-    return true;
+    VLOG(oat) << error_msg;
+    return kOatOutOfDate;
   }
 
   // Verify the dex checksums for any secondary multidex files
   for (size_t i = 1; ; i++) {
-    std::string secondary_dex_location
-      = DexFile::GetMultiDexLocation(i, dex_location_.c_str());
+    std::string secondary_dex_location = DexFile::GetMultiDexLocation(i, dex_location_.c_str());
     const OatFile::OatDexFile* secondary_oat_dex_file
-      = file.GetOatDexFile(secondary_dex_location.c_str(), nullptr, false);
+      = file.GetOatDexFile(secondary_dex_location.c_str(), nullptr);
     if (secondary_oat_dex_file == nullptr) {
       // There are no more secondary dex files to check.
       break;
     }
 
-    std::string error_msg;
     uint32_t expected_secondary_checksum = 0;
     if (DexFile::GetChecksum(secondary_dex_location.c_str(),
           &expected_secondary_checksum, &error_msg)) {
@@ -439,7 +417,7 @@
           << secondary_dex_location
           << ". Expected: " << expected_secondary_checksum
           << ", Actual: " << actual_secondary_checksum;
-        return true;
+        return kOatOutOfDate;
       }
     } else {
       // If we can't get the checksum for the secondary location, we assume
@@ -449,194 +427,105 @@
     }
   }
 
+  CompilerFilter::Filter current_compiler_filter = file.GetCompilerFilter();
+
   // Verify the image checksum
-  const ImageInfo* image_info = GetImageInfo();
-  if (image_info == nullptr) {
-    VLOG(oat) << "No image for oat image checksum to match against.";
-    return true;
+  if (CompilerFilter::DependsOnImageChecksum(current_compiler_filter)) {
+    const ImageInfo* image_info = GetImageInfo();
+    if (image_info == nullptr) {
+      VLOG(oat) << "No image for oat image checksum to match against.";
+
+      if (HasOriginalDexFiles()) {
+        return kOatOutOfDate;
+      }
+
+      // If there is no original dex file to fall back to, grudgingly accept
+      // the oat file. This could technically lead to crashes, but there's no
+      // way we could find a better oat file to use for this dex location,
+      // and it's better than being stuck in a boot loop with no way out.
+      // The problem will hopefully resolve itself the next time the runtime
+      // starts up.
+      LOG(WARNING) << "Dex location " << dex_location_ << " does not seem to include dex file. "
+        << "Allow oat file use. This is potentially dangerous.";
+    } else if (file.GetOatHeader().GetImageFileLocationOatChecksum()
+        != GetCombinedImageChecksum()) {
+      VLOG(oat) << "Oat image checksum does not match image checksum.";
+      return kOatOutOfDate;
+    }
+  } else {
+    VLOG(oat) << "Image checksum test skipped for compiler filter " << current_compiler_filter;
   }
 
-  if (file.GetOatHeader().GetImageFileLocationOatChecksum() != image_info->oat_checksum) {
-    VLOG(oat) << "Oat image checksum does not match image checksum.";
-    return true;
-  }
+  if (CompilerFilter::IsBytecodeCompilationEnabled(current_compiler_filter)) {
+    if (!file.IsPic()) {
+      const ImageInfo* image_info = GetImageInfo();
+      if (image_info == nullptr) {
+        VLOG(oat) << "No image to check oat relocation against.";
+        return kOatNeedsRelocation;
+      }
 
-  // The checksums are all good; the dex file is not out of date.
-  return false;
+      // Verify the oat_data_begin recorded for the image in the oat file matches
+      // the actual oat_data_begin for boot.oat in the image.
+      const OatHeader& oat_header = file.GetOatHeader();
+      uintptr_t oat_data_begin = oat_header.GetImageFileLocationOatDataBegin();
+      if (oat_data_begin != image_info->oat_data_begin) {
+        VLOG(oat) << file.GetLocation() <<
+          ": Oat file image oat_data_begin (" << oat_data_begin << ")"
+          << " does not match actual image oat_data_begin ("
+          << image_info->oat_data_begin << ")";
+        return kOatNeedsRelocation;
+      }
+
+      // Verify the oat_patch_delta recorded for the image in the oat file matches
+      // the actual oat_patch_delta for the image.
+      int32_t oat_patch_delta = oat_header.GetImagePatchDelta();
+      if (oat_patch_delta != image_info->patch_delta) {
+        VLOG(oat) << file.GetLocation() <<
+          ": Oat file image patch delta (" << oat_patch_delta << ")"
+          << " does not match actual image patch delta ("
+          << image_info->patch_delta << ")";
+        return kOatNeedsRelocation;
+      }
+    } else {
+      // Oat files compiled in PIC mode do not require relocation.
+      VLOG(oat) << "Oat relocation test skipped for PIC oat file";
+    }
+  } else {
+    VLOG(oat) << "Oat relocation test skipped for compiler filter " << current_compiler_filter;
+  }
+  return kOatUpToDate;
 }
 
-bool OatFileAssistant::GivenOatFileNeedsRelocation(const OatFile& file) {
-  return GivenOatFileStatus(file) == kOatNeedsRelocation;
-}
-
-bool OatFileAssistant::GivenOatFileIsUpToDate(const OatFile& file) {
-  if (GivenOatFileIsOutOfDate(file)) {
-    return false;
-  }
-
-  if (file.IsPic()) {
-    return true;
-  }
-
-  const ImageInfo* image_info = GetImageInfo();
-  if (image_info == nullptr) {
-    VLOG(oat) << "No image to check oat relocation against.";
-    return false;
-  }
-
-  // Verify the oat_data_begin recorded for the image in the oat file matches
-  // the actual oat_data_begin for boot.oat in the image.
-  const OatHeader& oat_header = file.GetOatHeader();
-  uintptr_t oat_data_begin = oat_header.GetImageFileLocationOatDataBegin();
-  if (oat_data_begin != image_info->oat_data_begin) {
-    VLOG(oat) << file.GetLocation() <<
-      ": Oat file image oat_data_begin (" << oat_data_begin << ")"
-      << " does not match actual image oat_data_begin ("
-      << image_info->oat_data_begin << ")";
-    return false;
-  }
-
-  // Verify the oat_patch_delta recorded for the image in the oat file matches
-  // the actual oat_patch_delta for the image.
-  int32_t oat_patch_delta = oat_header.GetImagePatchDelta();
-  if (oat_patch_delta != image_info->patch_delta) {
-    VLOG(oat) << file.GetLocation() <<
-      ": Oat file image patch delta (" << oat_patch_delta << ")"
-      << " does not match actual image patch delta ("
-      << image_info->patch_delta << ")";
-    return false;
-  }
-  return true;
-}
-
-bool OatFileAssistant::ProfileExists() {
-  return GetProfile() != nullptr;
-}
-
-bool OatFileAssistant::OldProfileExists() {
-  return GetOldProfile() != nullptr;
-}
-
-// TODO: The IsProfileChangeSignificant implementation was copied from likely
-// bit-rotted code.
-bool OatFileAssistant::IsProfileChangeSignificant() {
-  ProfileFile* profile = GetProfile();
-  if (profile == nullptr) {
-    return false;
-  }
-
-  ProfileFile* old_profile = GetOldProfile();
-  if (old_profile == nullptr) {
-    return false;
-  }
-
-  // TODO: The following code to compare two profile files should live with
-  // the rest of the profiler code, not the oat file assistant code.
-
-  // A change in profile is considered significant if X% (change_thr property)
-  // of the top K% (compile_thr property) samples has changed.
-  const ProfilerOptions& options = Runtime::Current()->GetProfilerOptions();
-  const double top_k_threshold = options.GetTopKThreshold();
-  const double change_threshold = options.GetTopKChangeThreshold();
-  std::set<std::string> top_k, old_top_k;
-  profile->GetTopKSamples(top_k, top_k_threshold);
-  old_profile->GetTopKSamples(old_top_k, top_k_threshold);
-  std::set<std::string> diff;
-  std::set_difference(top_k.begin(), top_k.end(), old_top_k.begin(),
-      old_top_k.end(), std::inserter(diff, diff.end()));
-
-  // TODO: consider using the usedPercentage instead of the plain diff count.
-  double change_percent = 100.0 * static_cast<double>(diff.size())
-                                / static_cast<double>(top_k.size());
-  std::set<std::string>::iterator end = diff.end();
-  for (std::set<std::string>::iterator it = diff.begin(); it != end; it++) {
-    VLOG(oat) << "Profile new in topK: " << *it;
-  }
-
-  if (change_percent > change_threshold) {
-      VLOG(oat) << "Oat File Assistant: Profile for " << dex_location_
-        << "has changed significantly: (top "
-        << top_k_threshold << "% samples changed in proportion of "
-        << change_percent << "%)";
-      return true;
-  }
-  return false;
-}
-
-// TODO: The CopyProfileFile implementation was copied from likely bit-rotted
-// code.
-void OatFileAssistant::CopyProfileFile() {
-  if (!ProfileExists()) {
-    return;
-  }
-
-  std::string profile_name = ProfileFileName();
-  std::string old_profile_name = OldProfileFileName();
-
-  ScopedFd src(open(old_profile_name.c_str(), O_RDONLY));
-  if (src.get() == -1) {
-    PLOG(WARNING) << "Failed to open profile file " << old_profile_name
-      << ". My uid:gid is " << getuid() << ":" << getgid();
-    return;
-  }
-
-  struct stat stat_src;
-  if (fstat(src.get(), &stat_src) == -1) {
-    PLOG(WARNING) << "Failed to get stats for profile file  " << old_profile_name
-      << ". My uid:gid is " << getuid() << ":" << getgid();
-    return;
-  }
-
-  // Create the copy with rw------- (only accessible by system)
-  ScopedFd dst(open(profile_name.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0600));
-  if (dst.get()  == -1) {
-    PLOG(WARNING) << "Failed to create/write prev profile file " << profile_name
-      << ".  My uid:gid is " << getuid() << ":" << getgid();
-    return;
-  }
-
-#ifdef __linux__
-  if (sendfile(dst.get(), src.get(), nullptr, stat_src.st_size) == -1) {
-#else
-  off_t len;
-  if (sendfile(dst.get(), src.get(), 0, &len, nullptr, 0) == -1) {
-#endif
-    PLOG(WARNING) << "Failed to copy profile file " << old_profile_name
-      << " to " << profile_name << ". My uid:gid is " << getuid()
-      << ":" << getgid();
-  }
-}
-
-bool OatFileAssistant::RelocateOatFile(const std::string* input_file,
-                                       std::string* error_msg) {
+OatFileAssistant::ResultOfAttemptToUpdate
+OatFileAssistant::RelocateOatFile(const std::string* input_file, std::string* error_msg) {
   CHECK(error_msg != nullptr);
 
   if (input_file == nullptr) {
     *error_msg = "Patching of oat file for dex location " + dex_location_
       + " not attempted because the input file name could not be determined.";
-    return false;
+    return kUpdateNotAttempted;
   }
   const std::string& input_file_name = *input_file;
 
-  if (OatFileName() == nullptr) {
+  if (oat_.Filename() == nullptr) {
     *error_msg = "Patching of oat file for dex location " + dex_location_
       + " not attempted because the oat file name could not be determined.";
-    return false;
+    return kUpdateNotAttempted;
   }
-  const std::string& oat_file_name = *OatFileName();
+  const std::string& oat_file_name = *oat_.Filename();
 
   const ImageInfo* image_info = GetImageInfo();
   Runtime* runtime = Runtime::Current();
   if (image_info == nullptr) {
     *error_msg = "Patching of oat file " + oat_file_name
       + " not attempted because no image location was found.";
-    return false;
+    return kUpdateNotAttempted;
   }
 
   if (!runtime->IsDex2OatEnabled()) {
     *error_msg = "Patching of oat file " + oat_file_name
       + " not attempted because dex2oat is disabled";
-    return false;
+    return kUpdateNotAttempted;
   }
 
   std::vector<std::string> argv;
@@ -650,54 +539,78 @@
   if (!Exec(argv, error_msg)) {
     // Manually delete the file. This ensures there is no garbage left over if
     // the process unexpectedly died.
-    TEMP_FAILURE_RETRY(unlink(oat_file_name.c_str()));
-    return false;
+    unlink(oat_file_name.c_str());
+    return kUpdateFailed;
   }
 
   // Mark that the oat file has changed and we should try to reload.
-  ClearOatFileCache();
-  return true;
+  oat_.Reset();
+  return kUpdateSucceeded;
 }
 
-bool OatFileAssistant::GenerateOatFile(std::string* error_msg) {
+OatFileAssistant::ResultOfAttemptToUpdate
+OatFileAssistant::GenerateOatFile(std::string* error_msg) {
   CHECK(error_msg != nullptr);
 
-  if (OatFileName() == nullptr) {
-    *error_msg = "Generation of oat file for dex location " + dex_location_
-      + " not attempted because the oat file name could not be determined.";
-    return false;
-  }
-  const std::string& oat_file_name = *OatFileName();
-
   Runtime* runtime = Runtime::Current();
   if (!runtime->IsDex2OatEnabled()) {
-    *error_msg = "Generation of oat file " + oat_file_name
-      + " not attempted because dex2oat is disabled";
-    return false;
+    *error_msg = "Generation of oat file for dex location " + dex_location_
+      + " not attempted because dex2oat is disabled.";
+    return kUpdateNotAttempted;
   }
 
-  std::vector<std::string> args;
-  args.push_back("--dex-file=" + dex_location_);
-  args.push_back("--oat-file=" + oat_file_name);
+  if (oat_.Filename() == nullptr) {
+    *error_msg = "Generation of oat file for dex location " + dex_location_
+      + " not attempted because the oat file name could not be determined.";
+    return kUpdateNotAttempted;
+  }
+  const std::string& oat_file_name = *oat_.Filename();
 
   // dex2oat ignores missing dex files and doesn't report an error.
   // Check explicitly here so we can detect the error properly.
   // TODO: Why does dex2oat behave that way?
   if (!OS::FileExists(dex_location_.c_str())) {
     *error_msg = "Dex location " + dex_location_ + " does not exists.";
-    return false;
+    return kUpdateNotAttempted;
   }
 
+  std::unique_ptr<File> oat_file;
+  oat_file.reset(OS::CreateEmptyFile(oat_file_name.c_str()));
+  if (oat_file.get() == nullptr) {
+    *error_msg = "Generation of oat file " + oat_file_name
+      + " not attempted because the oat file could not be created.";
+    return kUpdateNotAttempted;
+  }
+
+  if (fchmod(oat_file->Fd(), 0644) != 0) {
+    *error_msg = "Generation of oat file " + oat_file_name
+      + " not attempted because the oat file could not be made world readable.";
+    oat_file->Erase();
+    return kUpdateNotAttempted;
+  }
+
+  std::vector<std::string> args;
+  args.push_back("--dex-file=" + dex_location_);
+  args.push_back("--oat-fd=" + std::to_string(oat_file->Fd()));
+  args.push_back("--oat-location=" + oat_file_name);
+
   if (!Dex2Oat(args, error_msg)) {
     // Manually delete the file. This ensures there is no garbage left over if
     // the process unexpectedly died.
-    TEMP_FAILURE_RETRY(unlink(oat_file_name.c_str()));
-    return false;
+    oat_file->Erase();
+    unlink(oat_file_name.c_str());
+    return kUpdateFailed;
+  }
+
+  if (oat_file->FlushCloseOrErase() != 0) {
+    *error_msg = "Unable to close oat file " + oat_file_name;
+    unlink(oat_file_name.c_str());
+    return kUpdateFailed;
   }
 
   // Mark that the oat file has changed and we should try to reload.
-  ClearOatFileCache();
-  return true;
+  oat_.Reset();
+  return kUpdateSucceeded;
 }
 
 bool OatFileAssistant::Dex2Oat(const std::vector<std::string>& args,
@@ -714,7 +627,11 @@
   argv.push_back("--runtime-arg");
   argv.push_back("-classpath");
   argv.push_back("--runtime-arg");
-  argv.push_back(runtime->GetClassPathString());
+  std::string class_path = runtime->GetClassPathString();
+  if (class_path == "") {
+    class_path = OatFile::kSpecialSharedLibrary;
+  }
+  argv.push_back(class_path);
   if (runtime->IsDebuggable()) {
     argv.push_back("--debuggable");
   }
@@ -747,8 +664,10 @@
   return Exec(argv, error_msg);
 }
 
-bool OatFileAssistant::DexFilenameToOdexFilename(const std::string& location,
-    InstructionSet isa, std::string* odex_filename, std::string* error_msg) {
+bool OatFileAssistant::DexLocationToOdexFilename(const std::string& location,
+                                                 InstructionSet isa,
+                                                 std::string* odex_filename,
+                                                 std::string* error_msg) {
   CHECK(odex_filename != nullptr);
   CHECK(error_msg != nullptr);
 
@@ -787,47 +706,33 @@
   return true;
 }
 
-std::string OatFileAssistant::DalvikCacheDirectory() {
-  // Note: We don't cache this, because it will only be called once by
-  // OatFileName, and we don't care about the performance of the profiling
-  // code, which isn't used in practice.
+bool OatFileAssistant::DexLocationToOatFilename(const std::string& location,
+                                                InstructionSet isa,
+                                                std::string* oat_filename,
+                                                std::string* error_msg) {
+  CHECK(oat_filename != nullptr);
+  CHECK(error_msg != nullptr);
 
-  // TODO: The work done in GetDalvikCache is overkill for what we need.
-  // Ideally a new API for getting the DalvikCacheDirectory the way we want
-  // (without existence testing, creation, or death) is provided with the rest
-  // of the GetDalvikCache family of functions. Until such an API is in place,
-  // we use GetDalvikCache to avoid duplicating the logic for determining the
-  // dalvik cache directory.
-  std::string result;
-  bool have_android_data;
-  bool dalvik_cache_exists;
-  bool is_global_cache;
-  GetDalvikCache("", false, &result, &have_android_data, &dalvik_cache_exists, &is_global_cache);
-  return result;
-}
-
-std::string OatFileAssistant::ProfileFileName() {
-  if (package_name_ != nullptr) {
-    return DalvikCacheDirectory() + std::string("profiles/") + package_name_;
+  std::string cache_dir = GetDalvikCache(GetInstructionSetString(isa));
+  if (cache_dir.empty()) {
+    *error_msg = "Dalvik cache directory does not exist";
+    return false;
   }
-  return "";
-}
 
-std::string OatFileAssistant::OldProfileFileName() {
-  std::string profile_name = ProfileFileName();
-  if (profile_name.empty()) {
-    return "";
-  }
-  return profile_name + "@old";
+  // TODO: The oat file assistant should be the definitive place for
+  // determining the oat file name from the dex location, not
+  // GetDalvikCacheFilename.
+  return GetDalvikCacheFilename(location.c_str(), cache_dir.c_str(), oat_filename, error_msg);
 }
 
 std::string OatFileAssistant::ImageLocation() {
   Runtime* runtime = Runtime::Current();
-  const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetImageSpace();
-  if (image_space == nullptr) {
+  const std::vector<gc::space::ImageSpace*>& image_spaces =
+      runtime->GetHeap()->GetBootImageSpaces();
+  if (image_spaces.empty()) {
     return "";
   }
-  return image_space->GetImageLocation();
+  return image_spaces[0]->GetImageLocation();
 }
 
 const uint32_t* OatFileAssistant::GetRequiredDexChecksum() {
@@ -845,10 +750,10 @@
       has_original_dex_files_ = false;
 
       // Get the checksum from the odex if we can.
-      const OatFile* odex_file = GetOdexFile();
+      const OatFile* odex_file = odex_.GetFile();
       if (odex_file != nullptr) {
-        const OatFile::OatDexFile* odex_dex_file = odex_file->GetOatDexFile(
-            dex_location_.c_str(), nullptr, false);
+        const OatFile::OatDexFile* odex_dex_file
+            = odex_file->GetOatDexFile(dex_location_.c_str(), nullptr);
         if (odex_dex_file != nullptr) {
           cached_required_dex_checksum_ = odex_dex_file->GetDexFileLocationChecksum();
           required_dex_checksum_found_ = true;
@@ -859,122 +764,194 @@
   return required_dex_checksum_found_ ? &cached_required_dex_checksum_ : nullptr;
 }
 
-const OatFile* OatFileAssistant::GetOdexFile() {
-  CHECK(!oat_file_released_) << "OdexFile called after oat file released.";
-  if (!odex_file_load_attempted_) {
-    odex_file_load_attempted_ = true;
-    if (OdexFileName() != nullptr) {
-      const std::string& odex_file_name = *OdexFileName();
-      std::string error_msg;
-      cached_odex_file_.reset(OatFile::Open(odex_file_name.c_str(),
-            odex_file_name.c_str(), nullptr, nullptr, load_executable_,
-            dex_location_.c_str(), &error_msg));
-      if (cached_odex_file_.get() == nullptr) {
-        VLOG(oat) << "OatFileAssistant test for existing pre-compiled oat file "
-          << odex_file_name << ": " << error_msg;
-      }
-    }
-  }
-  return cached_odex_file_.get();
-}
-
-bool OatFileAssistant::OdexFileIsExecutable() {
-  const OatFile* odex_file = GetOdexFile();
-  return (odex_file != nullptr && odex_file->IsExecutable());
-}
-
-void OatFileAssistant::ClearOdexFileCache() {
-  odex_file_load_attempted_ = false;
-  cached_odex_file_.reset();
-  odex_file_is_out_of_date_attempted_ = false;
-  odex_file_is_up_to_date_attempted_ = false;
-}
-
-const OatFile* OatFileAssistant::GetOatFile() {
-  CHECK(!oat_file_released_) << "OatFile called after oat file released.";
-  if (!oat_file_load_attempted_) {
-    oat_file_load_attempted_ = true;
-    if (OatFileName() != nullptr) {
-      const std::string& oat_file_name = *OatFileName();
-      std::string error_msg;
-      cached_oat_file_.reset(OatFile::Open(oat_file_name.c_str(),
-            oat_file_name.c_str(), nullptr, nullptr, load_executable_,
-            dex_location_.c_str(), &error_msg));
-      if (cached_oat_file_.get() == nullptr) {
-        VLOG(oat) << "OatFileAssistant test for existing oat file "
-          << oat_file_name << ": " << error_msg;
-      }
-    }
-  }
-  return cached_oat_file_.get();
-}
-
-bool OatFileAssistant::OatFileIsExecutable() {
-  const OatFile* oat_file = GetOatFile();
-  return (oat_file != nullptr && oat_file->IsExecutable());
-}
-
-void OatFileAssistant::ClearOatFileCache() {
-  oat_file_load_attempted_ = false;
-  cached_oat_file_.reset();
-  oat_file_is_out_of_date_attempted_ = false;
-  oat_file_is_up_to_date_attempted_ = false;
-}
-
 const OatFileAssistant::ImageInfo* OatFileAssistant::GetImageInfo() {
   if (!image_info_load_attempted_) {
     image_info_load_attempted_ = true;
 
     Runtime* runtime = Runtime::Current();
-    const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetImageSpace();
-    if (image_space != nullptr) {
-      cached_image_info_.location = image_space->GetImageLocation();
+    std::vector<gc::space::ImageSpace*> image_spaces = runtime->GetHeap()->GetBootImageSpaces();
+    if (!image_spaces.empty()) {
+      cached_image_info_.location = image_spaces[0]->GetImageLocation();
 
       if (isa_ == kRuntimeISA) {
-        const ImageHeader& image_header = image_space->GetImageHeader();
+        const ImageHeader& image_header = image_spaces[0]->GetImageHeader();
         cached_image_info_.oat_checksum = image_header.GetOatChecksum();
-        cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin());
+        cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>(
+            image_header.GetOatDataBegin());
         cached_image_info_.patch_delta = image_header.GetPatchDelta();
       } else {
+        std::string error_msg;
         std::unique_ptr<ImageHeader> image_header(
-            gc::space::ImageSpace::ReadImageHeaderOrDie(
-                cached_image_info_.location.c_str(), isa_));
+            gc::space::ImageSpace::ReadImageHeader(cached_image_info_.location.c_str(),
+                                                   isa_,
+                                                   &error_msg));
+        CHECK(image_header != nullptr) << error_msg;
         cached_image_info_.oat_checksum = image_header->GetOatChecksum();
-        cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>(image_header->GetOatDataBegin());
+        cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>(
+            image_header->GetOatDataBegin());
         cached_image_info_.patch_delta = image_header->GetPatchDelta();
       }
     }
-    image_info_load_succeeded_ = (image_space != nullptr);
+    image_info_load_succeeded_ = (!image_spaces.empty());
+
+    combined_image_checksum_ = CalculateCombinedImageChecksum(isa_);
   }
   return image_info_load_succeeded_ ? &cached_image_info_ : nullptr;
 }
 
-ProfileFile* OatFileAssistant::GetProfile() {
-  if (!profile_load_attempted_) {
-    CHECK(package_name_ != nullptr)
-      << "pakage_name_ is nullptr: "
-      << "profile_load_attempted_ should have been true";
-    profile_load_attempted_ = true;
-    std::string profile_name = ProfileFileName();
-    if (!profile_name.empty()) {
-      profile_load_succeeded_ = cached_profile_.LoadFile(profile_name);
+// TODO: Use something better than xor.
+uint32_t OatFileAssistant::CalculateCombinedImageChecksum(InstructionSet isa) {
+  uint32_t checksum = 0;
+  std::vector<gc::space::ImageSpace*> image_spaces =
+      Runtime::Current()->GetHeap()->GetBootImageSpaces();
+  if (isa == kRuntimeISA) {
+    for (gc::space::ImageSpace* image_space : image_spaces) {
+      checksum ^= image_space->GetImageHeader().GetOatChecksum();
+    }
+  } else {
+    for (gc::space::ImageSpace* image_space : image_spaces) {
+      std::string location = image_space->GetImageLocation();
+      std::string error_msg;
+      std::unique_ptr<ImageHeader> image_header(
+          gc::space::ImageSpace::ReadImageHeader(location.c_str(), isa, &error_msg));
+      CHECK(image_header != nullptr) << error_msg;
+      checksum ^= image_header->GetOatChecksum();
     }
   }
-  return profile_load_succeeded_ ? &cached_profile_ : nullptr;
+  return checksum;
 }
 
-ProfileFile* OatFileAssistant::GetOldProfile() {
-  if (!old_profile_load_attempted_) {
-    CHECK(package_name_ != nullptr)
-      << "pakage_name_ is nullptr: "
-      << "old_profile_load_attempted_ should have been true";
-    old_profile_load_attempted_ = true;
-    std::string old_profile_name = OldProfileFileName();
-    if (!old_profile_name.empty()) {
-      old_profile_load_succeeded_ = cached_old_profile_.LoadFile(old_profile_name);
+uint32_t OatFileAssistant::GetCombinedImageChecksum() {
+  if (!image_info_load_attempted_) {
+    GetImageInfo();
+  }
+  return combined_image_checksum_;
+}
+
+std::unique_ptr<gc::space::ImageSpace> OatFileAssistant::OpenImageSpace(const OatFile* oat_file) {
+  DCHECK(oat_file != nullptr);
+  std::string art_file = ArtFileName(oat_file);
+  if (art_file.empty()) {
+    return nullptr;
+  }
+  std::string error_msg;
+  ScopedObjectAccess soa(Thread::Current());
+  std::unique_ptr<gc::space::ImageSpace> ret =
+      gc::space::ImageSpace::CreateFromAppImage(art_file.c_str(), oat_file, &error_msg);
+  if (ret == nullptr && (VLOG_IS_ON(image) || OS::FileExists(art_file.c_str()))) {
+    LOG(INFO) << "Failed to open app image " << art_file.c_str() << " " << error_msg;
+  }
+  return ret;
+}
+
+OatFileAssistant::OatFileInfo::OatFileInfo(OatFileAssistant* oat_file_assistant)
+  : oat_file_assistant_(oat_file_assistant)
+{}
+
+const std::string* OatFileAssistant::OatFileInfo::Filename() {
+  return filename_provided_ ? &filename_ : nullptr;
+}
+
+bool OatFileAssistant::OatFileInfo::Exists() {
+  return GetFile() != nullptr;
+}
+
+OatFileAssistant::OatStatus OatFileAssistant::OatFileInfo::Status() {
+  if (!status_attempted_) {
+    status_attempted_ = true;
+    const OatFile* file = GetFile();
+    if (file == nullptr) {
+      status_ = kOatOutOfDate;
+    } else {
+      status_ = oat_file_assistant_->GivenOatFileStatus(*file);
+      VLOG(oat) << file->GetLocation() << " is " << status_
+          << " with filter " << file->GetCompilerFilter();
     }
   }
-  return old_profile_load_succeeded_ ? &cached_old_profile_ : nullptr;
+  return status_;
+}
+
+bool OatFileAssistant::OatFileInfo::IsOutOfDate() {
+  return Status() == kOatOutOfDate;
+}
+
+bool OatFileAssistant::OatFileInfo::NeedsRelocation() {
+  return Status() == kOatNeedsRelocation;
+}
+
+bool OatFileAssistant::OatFileInfo::IsUpToDate() {
+  return Status() == kOatUpToDate;
+}
+
+CompilerFilter::Filter OatFileAssistant::OatFileInfo::CompilerFilter() {
+  const OatFile* file = GetFile();
+  CHECK(file != nullptr);
+  return file->GetCompilerFilter();
+}
+
+const OatFile* OatFileAssistant::OatFileInfo::GetFile() {
+  CHECK(!file_released_) << "GetFile called after oat file released.";
+  if (!load_attempted_) {
+    load_attempted_ = true;
+    if (filename_provided_) {
+      std::string error_msg;
+      file_.reset(OatFile::Open(filename_.c_str(),
+                                filename_.c_str(),
+                                nullptr,
+                                nullptr,
+                                oat_file_assistant_->load_executable_,
+                                /*low_4gb*/false,
+                                oat_file_assistant_->dex_location_.c_str(),
+                                &error_msg));
+      if (file_.get() == nullptr) {
+        VLOG(oat) << "OatFileAssistant test for existing oat file "
+          << filename_ << ": " << error_msg;
+      }
+    }
+  }
+  return file_.get();
+}
+
+bool OatFileAssistant::OatFileInfo::CompilerFilterIsOkay(
+    CompilerFilter::Filter target, bool profile_changed) {
+  const OatFile* file = GetFile();
+  if (file == nullptr) {
+    return false;
+  }
+
+  CompilerFilter::Filter current = file->GetCompilerFilter();
+  if (profile_changed && CompilerFilter::DependsOnProfile(current)) {
+    VLOG(oat) << "Compiler filter not okay because Profile changed";
+    return false;
+  }
+  return CompilerFilter::IsAsGoodAs(current, target);
+}
+
+bool OatFileAssistant::OatFileInfo::IsExecutable() {
+  const OatFile* file = GetFile();
+  return (file != nullptr && file->IsExecutable());
+}
+
+bool OatFileAssistant::OatFileInfo::HasPatchInfo() {
+  const OatFile* file = GetFile();
+  return (file != nullptr && file->HasPatchInfo());
+}
+
+void OatFileAssistant::OatFileInfo::Reset() {
+  load_attempted_ = false;
+  file_.reset();
+  status_attempted_ = false;
+}
+
+void OatFileAssistant::OatFileInfo::Reset(const std::string& filename) {
+  filename_provided_ = true;
+  filename_ = filename;
+  Reset();
+}
+
+std::unique_ptr<OatFile> OatFileAssistant::OatFileInfo::ReleaseFile() {
+  file_released_ = true;
+  return std::move(file_);
 }
 
 }  // namespace art
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index f781532..3f018dc 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -19,17 +19,24 @@
 
 #include <cstdint>
 #include <memory>
+#include <sstream>
 #include <string>
 
 #include "arch/instruction_set.h"
 #include "base/scoped_flock.h"
 #include "base/unix_file/fd_file.h"
+#include "compiler_filter.h"
 #include "oat_file.h"
 #include "os.h"
-#include "profiler.h"
 
 namespace art {
 
+namespace gc {
+namespace space {
+class ImageSpace;
+}  // namespace space
+}  // namespace gc
+
 // Class for assisting with oat file management.
 //
 // This class collects common utilities for determining the status of an oat
@@ -38,9 +45,6 @@
 // The oat file assistant is intended to be used with dex locations not on the
 // boot class path. See the IsInBootClassPath method for a way to check if the
 // dex location is in the boot class path.
-//
-// TODO: All the profiling related code is old and untested. It should either
-// be restored and tested, or removed.
 class OatFileAssistant {
  public:
   enum DexOptNeeded {
@@ -67,8 +71,8 @@
 
   enum OatStatus {
     // kOatOutOfDate - An oat file is said to be out of date if the file does
-    // not exist, or is out of date with respect to the dex file or boot
-    // image.
+    // not exist, is out of date with respect to the dex file or boot image,
+    // or does not meet the target compilation type.
     kOatOutOfDate,
 
     // kOatNeedsRelocation - An oat file is said to need relocation if the
@@ -96,28 +100,19 @@
   // The isa should be either the 32 bit or 64 bit variant for the current
   // device. For example, on an arm device, use arm or arm64. An oat file can
   // be loaded executable only if the ISA matches the current runtime.
-  OatFileAssistant(const char* dex_location, const InstructionSet isa,
+  //
+  // load_executable should be true if the caller intends to try and load
+  // executable code for this dex location.
+  OatFileAssistant(const char* dex_location,
+                   const InstructionSet isa,
                    bool load_executable);
 
   // Constructs an OatFileAssistant, providing an explicit target oat_location
   // to use instead of the standard oat location.
-  OatFileAssistant(const char* dex_location, const char* oat_location,
-                   const InstructionSet isa, bool load_executable);
-
-  // Constructs an OatFileAssistant, providing an additional package_name used
-  // solely for the purpose of locating profile files.
-  //
-  // TODO: Why is the name of the profile file based on the package name and
-  // not the dex location? If there is no technical reason the dex_location
-  // can't be used, we should prefer that instead.
-  OatFileAssistant(const char* dex_location, const InstructionSet isa,
-                   bool load_executable, const char* package_name);
-
-  // Constructs an OatFileAssistant with user specified oat location and a
-  // package name.
-  OatFileAssistant(const char* dex_location, const char* oat_location,
-                   const InstructionSet isa, bool load_executable,
-                   const char* package_name);
+  OatFileAssistant(const char* dex_location,
+                   const char* oat_location,
+                   const InstructionSet isa,
+                   bool load_executable);
 
   ~OatFileAssistant();
 
@@ -144,16 +139,37 @@
   bool Lock(std::string* error_msg);
 
   // Return what action needs to be taken to produce up-to-date code for this
-  // dex location.
-  DexOptNeeded GetDexOptNeeded();
+  // dex location that is at least as good as an oat file generated with the
+  // given compiler filter. profile_changed should be true to indicate the
+  // profile has recently changed for this dex location.
+  DexOptNeeded GetDexOptNeeded(CompilerFilter::Filter target_compiler_filter,
+                               bool profile_changed = false);
+
+  // Returns true if there is up-to-date code for this dex location,
+  // irrespective of the compiler filter of the up-to-date code.
+  bool IsUpToDate();
+
+  // Return code used when attempting to generate updated code.
+  enum ResultOfAttemptToUpdate {
+    kUpdateFailed,        // We tried making the code up to date, but
+                          // encountered an unexpected failure.
+    kUpdateNotAttempted,  // We wanted to update the code, but determined we
+                          // should not make the attempt.
+    kUpdateSucceeded      // We successfully made the code up to date
+                          // (possibly by doing nothing).
+  };
 
   // Attempts to generate or relocate the oat file as needed to make it up to
-  // date.
-  // Returns true on success.
+  // date based on the current runtime and compiler options.
+  // profile_changed should be true to indicate the profile has recently
+  // changed for this dex location.
   //
-  // If there is a failure, the value of error_msg will be set to a string
-  // describing why there was failure. error_msg must not be null.
-  bool MakeUpToDate(std::string* error_msg);
+  // Returns the result of attempting to update the code.
+  //
+  // If the result is not kUpdateSucceeded, the value of error_msg will be set
+  // to a string describing why there was a failure or the update was not
+  // attempted. error_msg must not be null.
+  ResultOfAttemptToUpdate MakeUpToDate(bool profile_changed, std::string* error_msg);
 
   // Returns an oat file that can be used for loading dex files.
   // Returns null if no suitable oat file was found.
@@ -163,6 +179,9 @@
   // the OatFileAssistant object.
   std::unique_ptr<OatFile> GetBestOatFile();
 
+  // Open and returns an image space associated with the oat file.
+  static std::unique_ptr<gc::space::ImageSpace> OpenImageSpace(const OatFile* oat_file);
+
   // Loads the dex files in the given oat file for the given dex location.
   // The oat file should be up to date for the given dex location.
   // This loads multiple dex files in the case of multidex.
@@ -197,6 +216,9 @@
   bool OdexFileIsOutOfDate();
   bool OdexFileNeedsRelocation();
   bool OdexFileIsUpToDate();
+  // Must only be called if the associated odex file exists, i.e, if
+  // |OdexFileExists() == true|.
+  CompilerFilter::Filter OdexFileCompilerFilter();
 
   // When the dex files is compiled on the target device, the oat file is the
   // result. The oat file will have been relocated to some
@@ -213,55 +235,32 @@
   bool OatFileIsOutOfDate();
   bool OatFileNeedsRelocation();
   bool OatFileIsUpToDate();
+  // Must only be called if the associated oat file exists, i.e, if
+  // |OatFileExists() == true|.
+  CompilerFilter::Filter OatFileCompilerFilter();
 
-  // These methods return the status for a given opened oat file with respect
-  // to the dex location.
-  OatStatus GivenOatFileStatus(const OatFile& file);
-  bool GivenOatFileIsOutOfDate(const OatFile& file);
-  bool GivenOatFileNeedsRelocation(const OatFile& file);
-  bool GivenOatFileIsUpToDate(const OatFile& file);
-
-  // Returns true if there is an accessible profile associated with the dex
+  // Return the status for a given opened oat file with respect to the dex
   // location.
-  // This returns false if profiling is disabled.
-  bool ProfileExists();
-
-  // The old profile is a file containing a previous snapshot of profiling
-  // information associated with the dex file code. This is used to track how
-  // the profiling information has changed over time.
-  //
-  // Returns true if there is an accessible old profile associated with the
-  // dex location.
-  // This returns false if profiling is disabled.
-  bool OldProfileExists();
-
-  // Returns true if there has been a significant change between the old
-  // profile and the current profile.
-  // This returns false if profiling is disabled.
-  bool IsProfileChangeSignificant();
-
-  // Copy the current profile to the old profile location.
-  void CopyProfileFile();
+  OatStatus GivenOatFileStatus(const OatFile& file);
 
   // Generates the oat file by relocation from the named input file.
   // This does not check the current status before attempting to relocate the
   // oat file.
-  // Returns true on success.
-  // This will fail if dex2oat is not enabled in the current runtime.
   //
-  // If there is a failure, the value of error_msg will be set to a string
-  // describing why there was failure. error_msg must not be null.
-  bool RelocateOatFile(const std::string* input_file, std::string* error_msg);
+  // If the result is not kUpdateSucceeded, the value of error_msg will be set
+  // to a string describing why there was a failure or the update was not
+  // attempted. error_msg must not be null.
+  ResultOfAttemptToUpdate RelocateOatFile(const std::string* input_file, std::string* error_msg);
 
-  // Generate the oat file from the dex file.
+  // Generate the oat file from the dex file using the current runtime
+  // compiler options.
   // This does not check the current status before attempting to generate the
   // oat file.
-  // Returns true on success.
-  // This will fail if dex2oat is not enabled in the current runtime.
   //
-  // If there is a failure, the value of error_msg will be set to a string
-  // describing why there was failure. error_msg must not be null.
-  bool GenerateOatFile(std::string* error_msg);
+  // If the result is not kUpdateSucceeded, the value of error_msg will be set
+  // to a string describing why there was a failure or the update was not
+  // attempted. error_msg must not be null.
+  ResultOfAttemptToUpdate GenerateOatFile(std::string* error_msg);
 
   // Executes dex2oat using the current runtime configuration overridden with
   // the given arguments. This does not check to see if dex2oat is enabled in
@@ -278,10 +277,26 @@
   // Constructs the odex file name for the given dex location.
   // Returns true on success, in which case odex_filename is set to the odex
   // file name.
-  // Returns false on error, in which case error_msg describes the error.
+  // Returns false on error, in which case error_msg describes the error and
+  // odex_filename is not changed.
   // Neither odex_filename nor error_msg may be null.
-  static bool DexFilenameToOdexFilename(const std::string& location,
-      InstructionSet isa, std::string* odex_filename, std::string* error_msg);
+  static bool DexLocationToOdexFilename(const std::string& location,
+                                        InstructionSet isa,
+                                        std::string* odex_filename,
+                                        std::string* error_msg);
+
+  // Constructs the oat file name for the given dex location.
+  // Returns true on success, in which case oat_filename is set to the oat
+  // file name.
+  // Returns false on error, in which case error_msg describes the error and
+  // oat_filename is not changed.
+  // Neither oat_filename nor error_msg may be null.
+  static bool DexLocationToOatFilename(const std::string& location,
+                                       InstructionSet isa,
+                                       std::string* oat_filename,
+                                       std::string* error_msg);
+
+  static uint32_t CalculateCombinedImageChecksum(InstructionSet isa = kRuntimeISA);
 
  private:
   struct ImageInfo {
@@ -291,21 +306,73 @@
     std::string location;
   };
 
-  // Returns the path to the dalvik cache directory.
-  // Does not check existence of the cache or try to create it.
-  // Includes the trailing slash.
-  // Returns an empty string if we can't get the dalvik cache directory path.
-  std::string DalvikCacheDirectory();
+  class OatFileInfo {
+   public:
+    // Initially the info is for no file in particular. It will treat the
+    // file as out of date until Reset is called with a real filename to use
+    // the cache for.
+    explicit OatFileInfo(OatFileAssistant* oat_file_assistant);
 
-  // Constructs the filename for the profile file.
-  // Returns an empty string if we do not have the necessary information to
-  // construct the filename.
-  std::string ProfileFileName();
+    const std::string* Filename();
+    bool Exists();
+    OatStatus Status();
+    bool IsOutOfDate();
+    bool NeedsRelocation();
+    bool IsUpToDate();
+    // Must only be called if the associated file exists, i.e, if
+    // |Exists() == true|.
+    CompilerFilter::Filter CompilerFilter();
 
-  // Constructs the filename for the old profile file.
-  // Returns an empty string if we do not have the necessary information to
-  // construct the filename.
-  std::string OldProfileFileName();
+    // Returns the loaded file.
+    // Loads the file if needed. Returns null if the file failed to load.
+    // The caller shouldn't clean up or free the returned pointer.
+    const OatFile* GetFile();
+
+    // Returns true if the compiler filter used to generate the file is at
+    // least as good as the given target filter. profile_changed should be
+    // true to indicate the profile has recently changed for this dex
+    // location.
+    bool CompilerFilterIsOkay(CompilerFilter::Filter target, bool profile_changed);
+
+    // Returns true if the file is opened executable.
+    bool IsExecutable();
+
+    // Returns true if the file has patch info required to run patchoat.
+    bool HasPatchInfo();
+
+    // Clear any cached information about the file that depends on the
+    // contents of the file. This does not reset the provided filename.
+    void Reset();
+
+    // Clear any cached information and switch to getting info about the oat
+    // file with the given filename.
+    void Reset(const std::string& filename);
+
+    // Release the loaded oat file.
+    // Returns null if the oat file hasn't been loaded.
+    //
+    // After this call, no other methods of the OatFileInfo should be
+    // called, because access to the loaded oat file has been taken away from
+    // the OatFileInfo object.
+    std::unique_ptr<OatFile> ReleaseFile();
+
+   private:
+    OatFileAssistant* oat_file_assistant_;
+
+    bool filename_provided_ = false;
+    std::string filename_;
+
+    bool load_attempted_ = false;
+    std::unique_ptr<OatFile> file_;
+
+    bool status_attempted_ = false;
+    OatStatus status_;
+
+    // For debugging only.
+    // If this flag is set, the file has been released to the user and the
+    // OatFileInfo object is in a bad state and should no longer be used.
+    bool file_released_ = false;
+  };
 
   // Returns the current image location.
   // Returns an empty string if the image location could not be retrieved.
@@ -322,47 +389,13 @@
   // found for the dex_location_ dex file.
   const uint32_t* GetRequiredDexChecksum();
 
-  // Returns the loaded odex file.
-  // Loads the file if needed. Returns null if the file failed to load.
-  // The caller shouldn't clean up or free the returned pointer.
-  const OatFile* GetOdexFile();
-
-  // Returns true if the odex file is opened executable.
-  bool OdexFileIsExecutable();
-
-  // Clear any cached information about the odex file that depends on the
-  // contents of the file.
-  void ClearOdexFileCache();
-
-  // Returns the loaded oat file.
-  // Loads the file if needed. Returns null if the file failed to load.
-  // The caller shouldn't clean up or free the returned pointer.
-  const OatFile* GetOatFile();
-
-  // Returns true if the oat file is opened executable.
-  bool OatFileIsExecutable();
-
-  // Clear any cached information about the oat file that depends on the
-  // contents of the file.
-  void ClearOatFileCache();
-
   // Returns the loaded image info.
   // Loads the image info if needed. Returns null if the image info failed
   // to load.
   // The caller shouldn't clean up or free the returned pointer.
   const ImageInfo* GetImageInfo();
 
-  // Returns the loaded profile.
-  // Loads the profile if needed. Returns null if the profile failed
-  // to load.
-  // The caller shouldn't clean up or free the returned pointer.
-  ProfileFile* GetProfile();
-
-  // Returns the loaded old profile.
-  // Loads the old profile if needed. Returns null if the old profile
-  // failed to load.
-  // The caller shouldn't clean up or free the returned pointer.
-  ProfileFile* GetOldProfile();
+  uint32_t GetCombinedImageChecksum();
 
   // To implement Lock(), we lock a dummy file where the oat file would go
   // (adding ".flock" to the target file name) and retain the lock for the
@@ -375,12 +408,6 @@
   // the 32 or 64 bit variant for the current device.
   const InstructionSet isa_ = kNone;
 
-  // The package name, used solely to find the profile file.
-  // This may be null in a properly constructed object. In this case,
-  // profile_load_attempted_ and old_profile_load_attempted_ will be true, and
-  // profile_load_succeeded_ and old_profile_load_succeeded_ will be false.
-  const char* package_name_ = nullptr;
-
   // Whether we will attempt to load oat files executable.
   bool load_executable_ = false;
 
@@ -391,45 +418,8 @@
   bool required_dex_checksum_found_;
   bool has_original_dex_files_;
 
-  // Cached value of the odex file name.
-  // This should be accessed only by the OdexFileName() method.
-  bool cached_odex_file_name_attempted_ = false;
-  bool cached_odex_file_name_found_;
-  std::string cached_odex_file_name_;
-
-  // Cached value of the loaded odex file.
-  // Use the GetOdexFile method rather than accessing this directly, unless you
-  // know the odex file isn't out of date.
-  bool odex_file_load_attempted_ = false;
-  std::unique_ptr<OatFile> cached_odex_file_;
-
-  // Cached results for OdexFileIsOutOfDate
-  bool odex_file_is_out_of_date_attempted_ = false;
-  bool cached_odex_file_is_out_of_date_;
-
-  // Cached results for OdexFileIsUpToDate
-  bool odex_file_is_up_to_date_attempted_ = false;
-  bool cached_odex_file_is_up_to_date_;
-
-  // Cached value of the oat file name.
-  // This should be accessed only by the OatFileName() method.
-  bool cached_oat_file_name_attempted_ = false;
-  bool cached_oat_file_name_found_;
-  std::string cached_oat_file_name_;
-
-  // Cached value of the loaded oat file.
-  // Use the GetOatFile method rather than accessing this directly, unless you
-  // know the oat file isn't out of date.
-  bool oat_file_load_attempted_ = false;
-  std::unique_ptr<OatFile> cached_oat_file_;
-
-  // Cached results for OatFileIsOutOfDate
-  bool oat_file_is_out_of_date_attempted_ = false;
-  bool cached_oat_file_is_out_of_date_;
-
-  // Cached results for OatFileIsUpToDate
-  bool oat_file_is_up_to_date_attempted_ = false;
-  bool cached_oat_file_is_up_to_date_;
+  OatFileInfo odex_;
+  OatFileInfo oat_;
 
   // Cached value of the image info.
   // Use the GetImageInfo method rather than accessing these directly.
@@ -438,28 +428,13 @@
   bool image_info_load_attempted_ = false;
   bool image_info_load_succeeded_ = false;
   ImageInfo cached_image_info_;
-
-  // Cached value of the profile file.
-  // Use the GetProfile method rather than accessing these directly.
-  bool profile_load_attempted_ = false;
-  bool profile_load_succeeded_ = false;
-  ProfileFile cached_profile_;
-
-  // Cached value of the profile file.
-  // Use the GetOldProfile method rather than accessing these directly.
-  bool old_profile_load_attempted_ = false;
-  bool old_profile_load_succeeded_ = false;
-  ProfileFile cached_old_profile_;
-
-  // For debugging only.
-  // If this flag is set, the oat or odex file has been released to the user
-  // of the OatFileAssistant object and the OatFileAssistant object is in a
-  // bad state and should no longer be used.
-  bool oat_file_released_ = false;
+  uint32_t combined_image_checksum_ = 0;
 
   DISALLOW_COPY_AND_ASSIGN(OatFileAssistant);
 };
 
+std::ostream& operator << (std::ostream& stream, const OatFileAssistant::OatStatus status);
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_OAT_FILE_ASSISTANT_H_
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 2c81edd..05c5a22 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include "oat_file_assistant.h"
-
 #include <algorithm>
 #include <fstream>
 #include <string>
@@ -29,8 +27,10 @@
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "compiler_callbacks.h"
+#include "dex2oat_environment_test.h"
 #include "gc/space/image_space.h"
 #include "mem_map.h"
+#include "oat_file_assistant.h"
 #include "oat_file_manager.h"
 #include "os.h"
 #include "scoped_thread_state_change.h"
@@ -39,203 +39,52 @@
 
 namespace art {
 
-class OatFileAssistantTest : public CommonRuntimeTest {
+class OatFileAssistantTest : public Dex2oatEnvironmentTest {
  public:
-  virtual void SetUp() {
+  virtual void SetUp() OVERRIDE {
     ReserveImageSpace();
-    CommonRuntimeTest::SetUp();
-
-    // Create a scratch directory to work from.
-    scratch_dir_ = android_data_ + "/OatFileAssistantTest";
-    ASSERT_EQ(0, mkdir(scratch_dir_.c_str(), 0700));
-
-    // Create a subdirectory in scratch for odex files.
-    odex_oat_dir_ = scratch_dir_ + "/oat";
-    ASSERT_EQ(0, mkdir(odex_oat_dir_.c_str(), 0700));
-
-    odex_dir_ = odex_oat_dir_ + "/" + std::string(GetInstructionSetString(kRuntimeISA));
-    ASSERT_EQ(0, mkdir(odex_dir_.c_str(), 0700));
-
-
-    // Verify the environment is as we expect
-    uint32_t checksum;
-    std::string error_msg;
-    ASSERT_TRUE(OS::FileExists(GetImageFile().c_str()))
-      << "Expected pre-compiled boot image to be at: " << GetImageFile();
-    ASSERT_TRUE(OS::FileExists(GetDexSrc1().c_str()))
-      << "Expected dex file to be at: " << GetDexSrc1();
-    ASSERT_TRUE(OS::FileExists(GetStrippedDexSrc1().c_str()))
-      << "Expected stripped dex file to be at: " << GetStrippedDexSrc1();
-    ASSERT_FALSE(DexFile::GetChecksum(GetStrippedDexSrc1().c_str(), &checksum, &error_msg))
-      << "Expected stripped dex file to be stripped: " << GetStrippedDexSrc1();
-    ASSERT_TRUE(OS::FileExists(GetDexSrc2().c_str()))
-      << "Expected dex file to be at: " << GetDexSrc2();
-
-    // GetMultiDexSrc2 should have the same primary dex checksum as
-    // GetMultiDexSrc1, but a different secondary dex checksum.
-    std::vector<std::unique_ptr<const DexFile>> multi1;
-    ASSERT_TRUE(DexFile::Open(GetMultiDexSrc1().c_str(),
-          GetMultiDexSrc1().c_str(), &error_msg, &multi1)) << error_msg;
-    ASSERT_GT(multi1.size(), 1u);
-
-    std::vector<std::unique_ptr<const DexFile>> multi2;
-    ASSERT_TRUE(DexFile::Open(GetMultiDexSrc2().c_str(),
-          GetMultiDexSrc2().c_str(), &error_msg, &multi2)) << error_msg;
-    ASSERT_GT(multi2.size(), 1u);
-
-    ASSERT_EQ(multi1[0]->GetLocationChecksum(), multi2[0]->GetLocationChecksum());
-    ASSERT_NE(multi1[1]->GetLocationChecksum(), multi2[1]->GetLocationChecksum());
+    Dex2oatEnvironmentTest::SetUp();
   }
 
-  virtual void SetUpRuntimeOptions(RuntimeOptions* options) {
-    // options->push_back(std::make_pair("-verbose:oat", nullptr));
+  // Pre-Relocate the image to a known non-zero offset so we don't have to
+  // deal with the runtime randomly relocating the image by 0 and messing up
+  // the expected results of the tests.
+  bool PreRelocateImage(std::string* error_msg) {
+    std::string image;
+    if (!GetCachedImageFile(&image, error_msg)) {
+      return false;
+    }
 
-    // Set up the image location.
-    options->push_back(std::make_pair("-Ximage:" + GetImageLocation(),
-          nullptr));
-    // Make sure compilercallbacks are not set so that relocation will be
-    // enabled.
-    callbacks_.reset();
+    std::string patchoat = GetAndroidRoot();
+    patchoat += kIsDebugBuild ? "/bin/patchoatd" : "/bin/patchoat";
+
+    std::vector<std::string> argv;
+    argv.push_back(patchoat);
+    argv.push_back("--input-image-location=" + GetImageLocation());
+    argv.push_back("--output-image-file=" + image);
+    argv.push_back("--instruction-set=" + std::string(GetInstructionSetString(kRuntimeISA)));
+    argv.push_back("--base-offset-delta=0x00008000");
+    return Exec(argv, error_msg);
   }
 
   virtual void PreRuntimeCreate() {
+    std::string error_msg;
+    ASSERT_TRUE(PreRelocateImage(&error_msg)) << error_msg;
+
     UnreserveImageSpace();
   }
 
-  virtual void PostRuntimeCreate() {
+  virtual void PostRuntimeCreate() OVERRIDE {
     ReserveImageSpace();
   }
 
-  virtual void TearDown() {
-    ClearDirectory(odex_dir_.c_str());
-    ASSERT_EQ(0, rmdir(odex_dir_.c_str()));
-
-    ClearDirectory(odex_oat_dir_.c_str());
-    ASSERT_EQ(0, rmdir(odex_oat_dir_.c_str()));
-
-    ClearDirectory(scratch_dir_.c_str());
-    ASSERT_EQ(0, rmdir(scratch_dir_.c_str()));
-
-    CommonRuntimeTest::TearDown();
-  }
-
-  void Copy(std::string src, std::string dst) {
-    std::ifstream  src_stream(src, std::ios::binary);
-    std::ofstream  dst_stream(dst, std::ios::binary);
-
-    dst_stream << src_stream.rdbuf();
-  }
-
-  // Returns the directory where the pre-compiled core.art can be found.
-  // TODO: We should factor out this into common tests somewhere rather than
-  // re-hardcoding it here (This was copied originally from the elf writer
-  // test).
-  std::string GetImageDirectory() {
-    if (IsHost()) {
-      const char* host_dir = getenv("ANDROID_HOST_OUT");
-      CHECK(host_dir != nullptr);
-      return std::string(host_dir) + "/framework";
-    } else {
-      return std::string("/data/art-test");
-    }
-  }
-
-  std::string GetImageLocation() {
-    return GetImageDirectory() + "/core.art";
-  }
-
-  std::string GetImageFile() {
-    return GetImageDirectory() + "/" + GetInstructionSetString(kRuntimeISA)
-      + "/core.art";
-  }
-
-  std::string GetDexSrc1() {
-    return GetTestDexFileName("Main");
-  }
-
-  // Returns the path to a dex file equivalent to GetDexSrc1, but with the dex
-  // file stripped.
-  std::string GetStrippedDexSrc1() {
-    return GetTestDexFileName("MainStripped");
-  }
-
-  std::string GetMultiDexSrc1() {
-    return GetTestDexFileName("MultiDex");
-  }
-
-  // Returns the path to a multidex file equivalent to GetMultiDexSrc2, but
-  // with the contents of the secondary dex file changed.
-  std::string GetMultiDexSrc2() {
-    return GetTestDexFileName("MultiDexModifiedSecondary");
-  }
-
-  std::string GetDexSrc2() {
-    return GetTestDexFileName("Nested");
-  }
-
-  // Scratch directory, for dex and odex files (oat files will go in the
-  // dalvik cache).
-  std::string GetScratchDir() {
-    return scratch_dir_;
-  }
-
-  // Odex directory is the subdirectory in the scratch directory where odex
-  // files should be located.
-  std::string GetOdexDir() {
-    return odex_dir_;
-  }
-
   // Generate a non-PIC odex file for the purposes of test.
   // The generated odex file will be un-relocated.
   void GenerateOdexForTest(const std::string& dex_location,
-                           const std::string& odex_location) {
-    // To generate an un-relocated odex file, we first compile a relocated
-    // version of the file, then manually call patchoat to make it look as if
-    // it is unrelocated.
-    std::string relocated_odex_location = odex_location + ".relocated";
-    std::vector<std::string> args;
-    args.push_back("--dex-file=" + dex_location);
-    args.push_back("--oat-file=" + relocated_odex_location);
-    args.push_back("--include-patch-information");
-
-    // We need to use the quick compiler to generate non-PIC code, because
-    // the optimizing compiler always generates PIC.
-    args.push_back("--compiler-backend=Quick");
-
-    std::string error_msg;
-    ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
-
-    // Use patchoat to unrelocate the relocated odex file.
-    Runtime* runtime = Runtime::Current();
-    std::vector<std::string> argv;
-    argv.push_back(runtime->GetPatchoatExecutable());
-    argv.push_back("--instruction-set=" + std::string(GetInstructionSetString(kRuntimeISA)));
-    argv.push_back("--input-oat-file=" + relocated_odex_location);
-    argv.push_back("--output-oat-file=" + odex_location);
-    argv.push_back("--base-offset-delta=0x00008000");
-    std::string command_line(Join(argv, ' '));
-    ASSERT_TRUE(Exec(argv, &error_msg)) << error_msg;
-
-    // Verify the odex file was generated as expected and really is
-    // unrelocated.
-    std::unique_ptr<OatFile> odex_file(OatFile::Open(
-        odex_location.c_str(), odex_location.c_str(), nullptr, nullptr,
-        false, dex_location.c_str(), &error_msg));
-    ASSERT_TRUE(odex_file.get() != nullptr) << error_msg;
-
-    const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetImageSpace();
-    ASSERT_TRUE(image_space != nullptr);
-    const ImageHeader& image_header = image_space->GetImageHeader();
-    const OatHeader& oat_header = odex_file->GetOatHeader();
-    EXPECT_FALSE(odex_file->IsPic());
-    EXPECT_EQ(image_header.GetOatChecksum(), oat_header.GetImageFileLocationOatChecksum());
-    EXPECT_NE(reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin()),
-        oat_header.GetImageFileLocationOatDataBegin());
-    EXPECT_NE(image_header.GetPatchDelta(), oat_header.GetImagePatchDelta());
-  }
-
-  void GeneratePicOdexForTest(const std::string& dex_location,
-                              const std::string& odex_location) {
+                           const std::string& odex_location,
+                           CompilerFilter::Filter filter,
+                           bool pic = false,
+                           bool with_patch_info = true) {
     // Temporarily redirect the dalvik cache so dex2oat doesn't find the
     // relocated image file.
     std::string android_data_tmp = GetScratchDir() + "AndroidDataTmp";
@@ -243,19 +92,63 @@
     std::vector<std::string> args;
     args.push_back("--dex-file=" + dex_location);
     args.push_back("--oat-file=" + odex_location);
-    args.push_back("--compile-pic");
+    args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
     args.push_back("--runtime-arg");
     args.push_back("-Xnorelocate");
+
+    if (pic) {
+      args.push_back("--compile-pic");
+    }
+
+    if (with_patch_info) {
+      args.push_back("--include-patch-information");
+    }
+
     std::string error_msg;
     ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
     setenv("ANDROID_DATA", android_data_.c_str(), 1);
 
-    // Verify the odex file was generated as expected.
-    std::unique_ptr<OatFile> odex_file(OatFile::Open(
-        odex_location.c_str(), odex_location.c_str(), nullptr, nullptr,
-        false, dex_location.c_str(), &error_msg));
+    // Verify the odex file was generated as expected and really is
+    // unrelocated.
+    std::unique_ptr<OatFile> odex_file(OatFile::Open(odex_location.c_str(),
+                                                     odex_location.c_str(),
+                                                     nullptr,
+                                                     nullptr,
+                                                     false,
+                                                     /*low_4gb*/false,
+                                                     dex_location.c_str(),
+                                                     &error_msg));
     ASSERT_TRUE(odex_file.get() != nullptr) << error_msg;
-    EXPECT_TRUE(odex_file->IsPic());
+    EXPECT_EQ(pic, odex_file->IsPic());
+    EXPECT_EQ(with_patch_info, odex_file->HasPatchInfo());
+    EXPECT_EQ(filter, odex_file->GetCompilerFilter());
+
+    if (CompilerFilter::IsBytecodeCompilationEnabled(filter)) {
+      const std::vector<gc::space::ImageSpace*> image_spaces =
+        Runtime::Current()->GetHeap()->GetBootImageSpaces();
+      ASSERT_TRUE(!image_spaces.empty() && image_spaces[0] != nullptr);
+      const ImageHeader& image_header = image_spaces[0]->GetImageHeader();
+      const OatHeader& oat_header = odex_file->GetOatHeader();
+      uint32_t combined_checksum = OatFileAssistant::CalculateCombinedImageChecksum();
+      EXPECT_EQ(combined_checksum, oat_header.GetImageFileLocationOatChecksum());
+      EXPECT_NE(reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin()),
+          oat_header.GetImageFileLocationOatDataBegin());
+      EXPECT_NE(image_header.GetPatchDelta(), oat_header.GetImagePatchDelta());
+    }
+  }
+
+  void GeneratePicOdexForTest(const std::string& dex_location,
+                              const std::string& odex_location,
+                              CompilerFilter::Filter filter) {
+    GenerateOdexForTest(dex_location, odex_location, filter, true, false);
+  }
+
+  // Generate a non-PIC odex file without patch information for the purposes
+  // of test.  The generated odex file will be un-relocated.
+  void GenerateNoPatchOdexForTest(const std::string& dex_location,
+                                  const std::string& odex_location,
+                                  CompilerFilter::Filter filter) {
+    GenerateOdexForTest(dex_location, odex_location, filter, false, false);
   }
 
  private:
@@ -267,11 +160,10 @@
     MemMap::Init();
 
     // Ensure a chunk of memory is reserved for the image space.
-    uintptr_t reservation_start = ART_BASE_ADDRESS + ART_BASE_ADDRESS_MIN_DELTA;
-    uintptr_t reservation_end = ART_BASE_ADDRESS + ART_BASE_ADDRESS_MAX_DELTA
-        // Include the main space that has to come right after the
-        // image in case of the GSS collector.
-        + 384 * MB;
+    // The reservation_end includes room for the main space that has to come
+    // right after the image in case of the GSS collector.
+    uintptr_t reservation_start = ART_BASE_ADDRESS;
+    uintptr_t reservation_end = ART_BASE_ADDRESS + 384 * MB;
 
     std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid(), true));
     ASSERT_TRUE(map.get() != nullptr) << "Failed to build process map";
@@ -306,9 +198,6 @@
     image_reservation_.clear();
   }
 
-  std::string scratch_dir_;
-  std::string odex_oat_dir_;
-  std::string odex_dir_;
   std::vector<std::unique_ptr<MemMap>> image_reservation_;
 };
 
@@ -322,11 +211,32 @@
 
 // Generate an oat file for the purposes of test, as opposed to testing
 // generation of oat files.
-static void GenerateOatForTest(const char* dex_location) {
-  OatFileAssistant oat_file_assistant(dex_location, kRuntimeISA, false);
-
+static void GenerateOatForTest(const char* dex_location, CompilerFilter::Filter filter) {
+  // Use an oat file assistant to find the proper oat location.
+  std::string oat_location;
   std::string error_msg;
-  ASSERT_TRUE(oat_file_assistant.GenerateOatFile(&error_msg)) << error_msg;
+  ASSERT_TRUE(OatFileAssistant::DexLocationToOatFilename(
+        dex_location, kRuntimeISA, &oat_location, &error_msg)) << error_msg;
+
+  std::vector<std::string> args;
+  args.push_back("--dex-file=" + std::string(dex_location));
+  args.push_back("--oat-file=" + oat_location);
+  args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
+  args.push_back("--runtime-arg");
+  args.push_back("-Xnorelocate");
+  ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
+
+  // Verify the oat file was generated as expected.
+  std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_location.c_str(),
+                                                  oat_location.c_str(),
+                                                  nullptr,
+                                                  nullptr,
+                                                  false,
+                                                  /*low_4gb*/false,
+                                                  dex_location,
+                                                  &error_msg));
+  ASSERT_TRUE(oat_file.get() != nullptr) << error_msg;
+  EXPECT_EQ(filter, oat_file->GetCompilerFilter());
 }
 
 // Case: We have a DEX file, but no OAT file for it.
@@ -337,7 +247,14 @@
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
@@ -360,12 +277,13 @@
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
   EXPECT_FALSE(oat_file_assistant.HasOriginalDexFiles());
 
   // Trying to make the oat file up to date should not fail or crash.
   std::string error_msg;
-  EXPECT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg));
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded, oat_file_assistant.MakeUpToDate(false, &error_msg));
 
   // Trying to get the best oat file should fail, but not crash.
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
@@ -377,11 +295,78 @@
 TEST_F(OatFileAssistantTest, OatUpToDate) {
   std::string dex_location = GetScratchDir() + "/OatUpToDate.jar";
   Copy(GetDexSrc1(), dex_location);
-  GenerateOatForTest(dex_location.c_str());
+  GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
+  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
+  EXPECT_TRUE(oat_file_assistant.OatFileExists());
+  EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
+  EXPECT_TRUE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus());
+  EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
+}
+
+// Case: We have a DEX file and ODEX file for a different dex location.
+// Expect: The status is kDex2OatNeeded.
+TEST_F(OatFileAssistantTest, OatForDifferentDex) {
+  // Generate an odex file for OatForDifferentDex_A.jar
+  std::string dex_location_a = GetScratchDir() + "/OatForDifferentDex_A.jar";
+  std::string odex_location = GetOdexDir() + "/OatForDifferentDex.odex";
+  Copy(GetDexSrc1(), dex_location_a);
+  GenerateOdexForTest(dex_location_a, odex_location, CompilerFilter::kSpeed);
+
+  // Try to use that odex file for OatForDifferentDex.jar
+  std::string dex_location = GetScratchDir() + "/OatForDifferentDex.jar";
+  Copy(GetDexSrc1(), dex_location);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_TRUE(oat_file_assistant.OdexFileExists());
+  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
+  EXPECT_FALSE(oat_file_assistant.OatFileExists());
+  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
+  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+}
+
+// Case: We have a DEX file and speed-profile OAT file for it.
+// Expect: The status is kNoDexOptNeeded if the profile hasn't changed, but
+// kDex2Oat if the profile has changed.
+TEST_F(OatFileAssistantTest, ProfileOatUpToDate) {
+  std::string dex_location = GetScratchDir() + "/ProfileOatUpToDate.jar";
+  Copy(GetDexSrc1(), dex_location);
+  GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeedProfile);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile, false));
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly, false));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile, true));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly, true));
+
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
   EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
@@ -399,10 +384,11 @@
 TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) {
   std::string dex_location = GetScratchDir() + "/MultiDexOatUpToDate.jar";
   Copy(GetMultiDexSrc1(), dex_location);
-  GenerateOatForTest(dex_location.c_str());
+  GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false));
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 
   // Verify we can load both dex files.
@@ -421,14 +407,15 @@
 
   // Compile code for GetMultiDexSrc1.
   Copy(GetMultiDexSrc1(), dex_location);
-  GenerateOatForTest(dex_location.c_str());
+  GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
   // Now overwrite the dex file with GetMultiDexSrc2 so the secondary checksum
   // is out of date.
   Copy(GetMultiDexSrc2(), dex_location);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false));
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
 
@@ -447,6 +434,7 @@
   args.push_back("--dex-file=" + dex_location);
   args.push_back("--dex-location=" + std::string("RelativeEncodedDexLocation.jar"));
   args.push_back("--oat-file=" + oat_location);
+  args.push_back("--compiler-filter=speed");
 
   std::string error_msg;
   ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
@@ -471,11 +459,14 @@
   // We create a dex, generate an oat for it, then overwrite the dex with a
   // different dex to make the oat out of date.
   Copy(GetDexSrc1(), dex_location);
-  GenerateOatForTest(dex_location.c_str());
+  GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
   Copy(GetDexSrc2(), dex_location);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
@@ -495,12 +486,15 @@
 
   // Create the dex and odex files
   Copy(GetDexSrc1(), dex_location);
-  GenerateOdexForTest(dex_location, odex_location);
+  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Verify the status.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -525,7 +519,7 @@
 
   // Create the dex and odex files
   Copy(GetDexSrc1(), dex_location);
-  GenerateOdexForTest(dex_location, odex_location);
+  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Strip the dex file
   Copy(GetStrippedDexSrc1(), dex_location);
@@ -533,7 +527,8 @@
   // Verify the status.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -546,9 +541,12 @@
 
   // Make the oat file up to date.
   std::string error_msg;
-  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -576,11 +574,11 @@
 
   // Create the oat file from a different dex file so it looks out of date.
   Copy(GetDexSrc2(), dex_location);
-  GenerateOatForTest(dex_location.c_str());
+  GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
   // Create the odex file
   Copy(GetDexSrc1(), dex_location);
-  GenerateOdexForTest(dex_location, odex_location);
+  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Strip the dex file.
   Copy(GetStrippedDexSrc1(), dex_location);
@@ -588,7 +586,12 @@
   // Verify the status.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,  // Can't run dex2oat because dex file is stripped.
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -602,9 +605,14 @@
 
   // Make the oat file up to date.
   std::string error_msg;
-  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,  // Can't run dex2oat because dex file is stripped.
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -636,7 +644,12 @@
   // Verify the status.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
@@ -650,9 +663,12 @@
 
   // Make the oat file up to date. This should have no effect.
   std::string error_msg;
-  EXPECT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
@@ -674,12 +690,17 @@
 
   // Create the dex and odex files
   Copy(GetDexSrc1(), dex_location);
-  GenerateOdexForTest(dex_location, oat_location);
+  GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
       oat_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kSelfPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+  EXPECT_EQ(OatFileAssistant::kSelfPatchOatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
@@ -694,9 +715,12 @@
 
   // Make the oat file up to date.
   std::string error_msg;
-  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
@@ -717,6 +741,39 @@
   EXPECT_EQ(1u, dex_files.size());
 }
 
+// Case: We have a DEX file, no ODEX file and an OAT file that needs
+// relocation but doesn't have patch info.
+// Expect: The status is kDex2OatNeeded, because we can't run patchoat.
+TEST_F(OatFileAssistantTest, NoSelfRelocation) {
+  std::string dex_location = GetScratchDir() + "/NoSelfRelocation.jar";
+  std::string oat_location = GetOdexDir() + "/NoSelfRelocation.oat";
+
+  // Create the dex and odex files
+  Copy(GetDexSrc1(), dex_location);
+  GenerateNoPatchOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      oat_location.c_str(), kRuntimeISA, true);
+
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+
+  // Make the oat file up to date.
+  std::string error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+
+  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
+  ASSERT_TRUE(oat_file.get() != nullptr);
+  EXPECT_TRUE(oat_file->IsExecutable());
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str());
+  EXPECT_EQ(1u, dex_files.size());
+}
+
 // Case: We have a DEX file, an ODEX file and an OAT file, where the ODEX and
 // OAT files both have patch delta of 0.
 // Expect: It shouldn't crash, and status is kPatchOatNeeded.
@@ -727,7 +784,7 @@
 
   // Create the dex and odex files
   Copy(GetDexSrc1(), dex_location);
-  GenerateOdexForTest(dex_location, odex_location);
+  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Create the oat file by copying the odex so they are located in the same
   // place in memory.
@@ -737,7 +794,8 @@
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
       oat_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -766,12 +824,43 @@
 
   // Create the dex and odex files
   Copy(GetDexSrc1(), dex_location);
-  GeneratePicOdexForTest(dex_location, odex_location);
+  GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Verify the status.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_TRUE(oat_file_assistant.OdexFileExists());
+  EXPECT_FALSE(oat_file_assistant.OdexFileIsOutOfDate());
+  EXPECT_TRUE(oat_file_assistant.OdexFileIsUpToDate());
+  EXPECT_FALSE(oat_file_assistant.OatFileExists());
+  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
+}
+
+// Case: We have a DEX file and a VerifyAtRuntime ODEX file, but no OAT file.
+// Expect: The status is kNoDexOptNeeded, because VerifyAtRuntime contains no code.
+TEST_F(OatFileAssistantTest, DexVerifyAtRuntimeOdexNoOat) {
+  std::string dex_location = GetScratchDir() + "/DexVerifyAtRuntimeOdexNoOat.jar";
+  std::string odex_location = GetOdexDir() + "/DexVerifyAtRuntimeOdexNoOat.odex";
+
+  // Create the dex and odex files
+  Copy(GetDexSrc1(), dex_location);
+  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kVerifyAtRuntime);
+
+  // Verify the status.
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -789,7 +878,26 @@
   std::string dex_location = GetScratchDir() + "/LoadOatUpToDate.jar";
 
   Copy(GetDexSrc1(), dex_location);
-  GenerateOatForTest(dex_location.c_str());
+  GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
+
+  // Load the oat using an oat file assistant.
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+
+  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
+  ASSERT_TRUE(oat_file.get() != nullptr);
+  EXPECT_TRUE(oat_file->IsExecutable());
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str());
+  EXPECT_EQ(1u, dex_files.size());
+}
+
+// Case: We have a DEX file and up-to-date interpret-only OAT file for it.
+// Expect: We should still load the oat file as executable.
+TEST_F(OatFileAssistantTest, LoadExecInterpretOnlyOatUpToDate) {
+  std::string dex_location = GetScratchDir() + "/LoadExecInterpretOnlyOatUpToDate.jar";
+
+  Copy(GetDexSrc1(), dex_location);
+  GenerateOatForTest(dex_location.c_str(), CompilerFilter::kInterpretOnly);
 
   // Load the oat using an oat file assistant.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
@@ -808,7 +916,7 @@
   std::string dex_location = GetScratchDir() + "/LoadNoExecOatUpToDate.jar";
 
   Copy(GetDexSrc1(), dex_location);
-  GenerateOatForTest(dex_location.c_str());
+  GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
   // Load the oat using an oat file assistant.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
@@ -833,7 +941,9 @@
   OatFileAssistant oat_file_assistant(
       dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
   std::string error_msg;
-  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -849,6 +959,42 @@
   EXPECT_FALSE(ofm.OatFileExists());
 }
 
+// Case: We have a DEX file but can't write the oat file.
+// Expect: We should fail to make the oat file up to date.
+TEST_F(OatFileAssistantTest, LoadDexUnwriteableAlternateOat) {
+  std::string dex_location = GetScratchDir() + "/LoadDexUnwriteableAlternateOat.jar";
+
+  // Make the oat location unwritable by inserting some non-existent
+  // intermediate directories.
+  std::string oat_location = GetScratchDir() + "/foo/bar/LoadDexUnwriteableAlternateOat.oat";
+
+  Copy(GetDexSrc1(), dex_location);
+
+  OatFileAssistant oat_file_assistant(
+      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
+  std::string error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  ASSERT_EQ(OatFileAssistant::kUpdateNotAttempted,
+      oat_file_assistant.MakeUpToDate(false, &error_msg));
+
+  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
+  ASSERT_TRUE(oat_file.get() == nullptr);
+}
+
+// Case: We don't have a DEX file and can't write the oat file.
+// Expect: We should fail to generate the oat file without crashing.
+TEST_F(OatFileAssistantTest, GenNoDex) {
+  std::string dex_location = GetScratchDir() + "/GenNoDex.jar";
+  std::string oat_location = GetScratchDir() + "/GenNoDex.oat";
+
+  OatFileAssistant oat_file_assistant(
+      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
+  std::string error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  EXPECT_EQ(OatFileAssistant::kUpdateNotAttempted,
+      oat_file_assistant.GenerateOatFile(&error_msg));
+}
+
 // Turn an absolute path into a path relative to the current working
 // directory.
 static std::string MakePathRelative(std::string target) {
@@ -895,7 +1041,8 @@
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
   EXPECT_FALSE(oat_file_assistant.OatFileExists());
   EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
@@ -912,7 +1059,8 @@
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
   EXPECT_FALSE(oat_file_assistant.OatFileExists());
   EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
@@ -923,7 +1071,9 @@
 
   // Trying to make it up to date should have no effect.
   std::string error_msg;
-  EXPECT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg));
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(false, &error_msg));
   EXPECT_TRUE(error_msg.empty());
 }
 
@@ -935,7 +1085,8 @@
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
@@ -950,8 +1101,7 @@
 class RaceGenerateTask : public Task {
  public:
   explicit RaceGenerateTask(const std::string& dex_location, const std::string& oat_location)
-    : dex_location_(dex_location), oat_location_(oat_location),
-      loaded_oat_file_(nullptr)
+    : dex_location_(dex_location), oat_location_(oat_location), loaded_oat_file_(nullptr)
   {}
 
   void Run(Thread* self ATTRIBUTE_UNUSED) {
@@ -963,6 +1113,8 @@
     dex_files = Runtime::Current()->GetOatFileManager().OpenDexFilesFromOat(
         dex_location_.c_str(),
         oat_location_.c_str(),
+        /*class_loader*/nullptr,
+        /*dex_elements*/nullptr,
         &oat_file,
         &error_msgs);
     CHECK(!dex_files.empty()) << Join(error_msgs, '\n');
@@ -993,7 +1145,7 @@
 
   // We use the lib core dex file, because it's large, and hopefully should
   // take a while to generate.
-  Copy(GetLibCoreDexFileName(), dex_location);
+  Copy(GetLibCoreDexFileNames()[0], dex_location);
 
   const int kNumThreads = 32;
   Thread* self = Thread::Current();
@@ -1025,7 +1177,7 @@
 
   // Create the dex and odex files
   Copy(GetDexSrc1(), dex_location);
-  GenerateOdexForTest(dex_location, odex_location);
+  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Load the oat using an executable oat file assistant.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
@@ -1047,7 +1199,7 @@
 
   // Create the dex and odex files
   Copy(GetMultiDexSrc1(), dex_location);
-  GenerateOdexForTest(dex_location, odex_location);
+  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Load the oat using an executable oat file assistant.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
@@ -1060,21 +1212,49 @@
   EXPECT_EQ(2u, dex_files.size());
 }
 
-TEST(OatFileAssistantUtilsTest, DexFilenameToOdexFilename) {
+TEST_F(OatFileAssistantTest, RuntimeCompilerFilterOptionUsed) {
+  std::string dex_location = GetScratchDir() + "/RuntimeCompilerFilterOptionUsed.jar";
+  Copy(GetDexSrc1(), dex_location);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  std::string error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=interpret-only");
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+
+  Runtime::Current()->AddCompilerOption("--compiler-filter=bogus");
+  EXPECT_EQ(OatFileAssistant::kUpdateNotAttempted,
+      oat_file_assistant.MakeUpToDate(false, &error_msg));
+}
+
+TEST(OatFileAssistantUtilsTest, DexLocationToOdexFilename) {
   std::string error_msg;
   std::string odex_file;
 
-  EXPECT_TRUE(OatFileAssistant::DexFilenameToOdexFilename(
+  EXPECT_TRUE(OatFileAssistant::DexLocationToOdexFilename(
         "/foo/bar/baz.jar", kArm, &odex_file, &error_msg)) << error_msg;
   EXPECT_EQ("/foo/bar/oat/arm/baz.odex", odex_file);
 
-  EXPECT_TRUE(OatFileAssistant::DexFilenameToOdexFilename(
+  EXPECT_TRUE(OatFileAssistant::DexLocationToOdexFilename(
         "/foo/bar/baz.funnyext", kArm, &odex_file, &error_msg)) << error_msg;
   EXPECT_EQ("/foo/bar/oat/arm/baz.odex", odex_file);
 
-  EXPECT_FALSE(OatFileAssistant::DexFilenameToOdexFilename(
+  EXPECT_FALSE(OatFileAssistant::DexLocationToOdexFilename(
         "nopath.jar", kArm, &odex_file, &error_msg));
-  EXPECT_FALSE(OatFileAssistant::DexFilenameToOdexFilename(
+  EXPECT_FALSE(OatFileAssistant::DexLocationToOdexFilename(
         "/foo/bar/baz_noext", kArm, &odex_file, &error_msg));
 }
 
@@ -1115,10 +1295,12 @@
 }
 
 // TODO: More Tests:
+//  * Image checksum change is out of date for kIntepretOnly, but not
+//    kVerifyAtRuntime. But target of kVerifyAtRuntime still says current
+//    kInterpretOnly is out of date.
 //  * Test class linker falls back to unquickened dex for DexNoOat
 //  * Test class linker falls back to unquickened dex for MultiDexNoOat
 //  * Test using secondary isa
-//  * Test with profiling info?
 //  * Test for status of oat while oat is being generated (how?)
 //  * Test case where 32 and 64 bit boot class paths differ,
 //      and we ask IsInBootClassPath for a class in exactly one of the 32 or
@@ -1127,5 +1309,7 @@
 //    - Dex is stripped, don't have odex.
 //    - Oat file corrupted after status check, before reload unexecutable
 //    because it's unrelocated and no dex2oat
+//  * Test unrelocated specific target compilation type can be relocated to
+//    make it up to date.
 
 }  // namespace art
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 9eee156..2e67ffe 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -22,17 +22,22 @@
 
 #include "base/logging.h"
 #include "base/stl_util.h"
+#include "base/systrace.h"
+#include "class_linker.h"
 #include "dex_file-inl.h"
+#include "gc/scoped_gc_critical_section.h"
 #include "gc/space/image_space.h"
+#include "handle_scope-inl.h"
+#include "mirror/class_loader.h"
 #include "oat_file_assistant.h"
+#include "scoped_thread_state_change.h"
 #include "thread-inl.h"
+#include "thread_list.h"
 
 namespace art {
 
-// For b/21333911.
-// Only enabled for debug builds to prevent bit rot. There are too many performance regressions for
-// normal builds.
-static constexpr bool kDuplicateClassesCheck = kIsDebugBuild;
+// If true, then we attempt to load the application image if it exists.
+static constexpr bool kEnableAppImage = true;
 
 const OatFile* OatFileManager::RegisterOatFile(std::unique_ptr<const OatFile> oat_file) {
   WriterMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
@@ -62,6 +67,20 @@
   compare.release();
 }
 
+const OatFile* OatFileManager::FindOpenedOatFileFromDexLocation(
+    const std::string& dex_base_location) const {
+  ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
+  for (const std::unique_ptr<const OatFile>& oat_file : oat_files_) {
+    const std::vector<const OatDexFile*>& oat_dex_files = oat_file->GetOatDexFiles();
+    for (const OatDexFile* oat_dex_file : oat_dex_files) {
+      if (DexFile::GetBaseLocation(oat_dex_file->GetDexFileLocation()) == dex_base_location) {
+        return oat_file.get();
+      }
+    }
+  }
+  return nullptr;
+}
+
 const OatFile* OatFileManager::FindOpenedOatFileFromOatLocation(const std::string& oat_location)
     const {
   ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
@@ -78,20 +97,23 @@
   return nullptr;
 }
 
-const OatFile* OatFileManager::GetBootOatFile() const {
-  gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
-  if (image_space == nullptr) {
-    return nullptr;
+std::vector<const OatFile*> OatFileManager::GetBootOatFiles() const {
+  std::vector<const OatFile*> oat_files;
+  std::vector<gc::space::ImageSpace*> image_spaces =
+      Runtime::Current()->GetHeap()->GetBootImageSpaces();
+  for (gc::space::ImageSpace* image_space : image_spaces) {
+    oat_files.push_back(image_space->GetOatFile());
   }
-  return image_space->GetOatFile();
+  return oat_files;
 }
 
 const OatFile* OatFileManager::GetPrimaryOatFile() const {
   ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
-  const OatFile* boot_oat_file = GetBootOatFile();
-  if (boot_oat_file != nullptr) {
+  std::vector<const OatFile*> boot_oat_files = GetBootOatFiles();
+  if (!boot_oat_files.empty()) {
     for (const std::unique_ptr<const OatFile>& oat_file : oat_files_) {
-      if (oat_file.get() != boot_oat_file) {
+      if (std::find(boot_oat_files.begin(), boot_oat_files.end(), oat_file.get()) ==
+          boot_oat_files.end()) {
         return oat_file.get();
       }
     }
@@ -105,8 +127,13 @@
   oat_files_.clear();
 }
 
-const OatFile* OatFileManager::RegisterImageOatFile(gc::space::ImageSpace* space) {
-  return RegisterOatFile(space->ReleaseOatFile());
+std::vector<const OatFile*> OatFileManager::RegisterImageOatFiles(
+    std::vector<gc::space::ImageSpace*> spaces) {
+  std::vector<const OatFile*> oat_files;
+  for (gc::space::ImageSpace* space : spaces) {
+    oat_files.push_back(RegisterOatFile(space->ReleaseOatFile()));
+  }
+  return oat_files;
 }
 
 class DexFileAndClassPair : ValueObject {
@@ -141,7 +168,7 @@
 
   void Next() {
     ++current_class_index_;
-    cached_descriptor_ = GetClassDescriptor(dex_file_.get(), current_class_index_);
+    cached_descriptor_ = GetClassDescriptor(dex_file_, current_class_index_);
   }
 
   size_t GetCurrentClassIndex() const {
@@ -153,7 +180,7 @@
   }
 
   const DexFile* GetDexFile() const {
-    return dex_file_.get();
+    return dex_file_;
   }
 
  private:
@@ -164,7 +191,7 @@
   }
 
   const char* cached_descriptor_;
-  std::shared_ptr<const DexFile> dex_file_;
+  const DexFile* dex_file_;
   size_t current_class_index_;
   bool from_loaded_oat_;  // We only need to compare mismatches between what we load now
                           // and what was loaded before. Any old duplicates must have been
@@ -174,14 +201,16 @@
 
 static void AddDexFilesFromOat(const OatFile* oat_file,
                                bool already_loaded,
-                               /*out*/std::priority_queue<DexFileAndClassPair>* heap) {
+                               /*out*/std::priority_queue<DexFileAndClassPair>* heap,
+                               std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
   for (const OatDexFile* oat_dex_file : oat_file->GetOatDexFiles()) {
     std::string error;
     std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error);
     if (dex_file == nullptr) {
       LOG(WARNING) << "Could not create dex file from oat file: " << error;
     } else if (dex_file->NumClassDefs() > 0U) {
-      heap->emplace(dex_file.release(), /*current_class_index*/0U, already_loaded);
+      heap->emplace(dex_file.get(), /*current_class_index*/0U, already_loaded);
+      opened_dex_files->push_back(std::move(dex_file));
     }
   }
 }
@@ -194,51 +223,287 @@
   }
 }
 
+static void IterateOverJavaDexFile(mirror::Object* dex_file,
+                                   ArtField* const cookie_field,
+                                   std::function<bool(const DexFile*)> fn)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (dex_file != nullptr) {
+    mirror::LongArray* long_array = cookie_field->GetObject(dex_file)->AsLongArray();
+    if (long_array == nullptr) {
+      // This should never happen so log a warning.
+      LOG(WARNING) << "Null DexFile::mCookie";
+      return;
+    }
+    int32_t long_array_size = long_array->GetLength();
+    // Start from 1 to skip the oat file.
+    for (int32_t j = 1; j < long_array_size; ++j) {
+      const DexFile* cp_dex_file = reinterpret_cast<const DexFile*>(static_cast<uintptr_t>(
+          long_array->GetWithoutChecks(j)));
+      if (!fn(cp_dex_file)) {
+        return;
+      }
+    }
+  }
+}
+
+static void IterateOverPathClassLoader(
+    ScopedObjectAccessAlreadyRunnable& soa,
+    Handle<mirror::ClassLoader> class_loader,
+    MutableHandle<mirror::ObjectArray<mirror::Object>> dex_elements,
+    std::function<bool(const DexFile*)> fn) SHARED_REQUIRES(Locks::mutator_lock_) {
+  // Handle this step.
+  // Handle as if this is the child PathClassLoader.
+  // The class loader is a PathClassLoader which inherits from BaseDexClassLoader.
+  // We need to get the DexPathList and loop through it.
+  ArtField* const cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* const dex_file_field =
+      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+  mirror::Object* dex_path_list =
+      soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList)->
+      GetObject(class_loader.Get());
+  if (dex_path_list != nullptr && dex_file_field != nullptr && cookie_field != nullptr) {
+    // DexPathList has an array dexElements of Elements[] which each contain a dex file.
+    mirror::Object* dex_elements_obj =
+        soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
+        GetObject(dex_path_list);
+    // Loop through each dalvik.system.DexPathList$Element's dalvik.system.DexFile and look
+    // at the mCookie which is a DexFile vector.
+    if (dex_elements_obj != nullptr) {
+      dex_elements.Assign(dex_elements_obj->AsObjectArray<mirror::Object>());
+      for (int32_t i = 0; i < dex_elements->GetLength(); ++i) {
+        mirror::Object* element = dex_elements->GetWithoutChecks(i);
+        if (element == nullptr) {
+          // Should never happen, fall back to java code to throw a NPE.
+          break;
+        }
+        mirror::Object* dex_file = dex_file_field->GetObject(element);
+        IterateOverJavaDexFile(dex_file, cookie_field, fn);
+      }
+    }
+  }
+}
+
+static bool GetDexFilesFromClassLoader(
+    ScopedObjectAccessAlreadyRunnable& soa,
+    mirror::ClassLoader* class_loader,
+    std::priority_queue<DexFileAndClassPair>* queue) SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (ClassLinker::IsBootClassLoader(soa, class_loader)) {
+    // The boot class loader. We don't load any of these files, as we know we compiled against
+    // them correctly.
+    return true;
+  }
+
+  // Unsupported class-loader?
+  if (class_loader->GetClass() !=
+      soa.Decode<mirror::Class*>(WellKnownClasses::dalvik_system_PathClassLoader)) {
+    VLOG(class_linker) << "Unsupported class-loader " << PrettyClass(class_loader->GetClass());
+    return false;
+  }
+
+  bool recursive_result = GetDexFilesFromClassLoader(soa, class_loader->GetParent(), queue);
+  if (!recursive_result) {
+    // Something wrong up the chain.
+    return false;
+  }
+
+  // Collect all the dex files.
+  auto GetDexFilesFn = [&] (const DexFile* cp_dex_file)
+            SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (cp_dex_file->NumClassDefs() > 0) {
+      queue->emplace(cp_dex_file, 0U, true);
+    }
+    return true;  // Continue looking.
+  };
+
+  // Handle for dex-cache-element.
+  StackHandleScope<3> hs(soa.Self());
+  MutableHandle<mirror::ObjectArray<mirror::Object>> dex_elements(
+      hs.NewHandle<mirror::ObjectArray<mirror::Object>>(nullptr));
+  Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(class_loader));
+
+  IterateOverPathClassLoader(soa, h_class_loader, dex_elements, GetDexFilesFn);
+
+  return true;
+}
+
+static void GetDexFilesFromDexElementsArray(
+    ScopedObjectAccessAlreadyRunnable& soa,
+    Handle<mirror::ObjectArray<mirror::Object>> dex_elements,
+    std::priority_queue<DexFileAndClassPair>* queue) SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (dex_elements.Get() == nullptr) {
+    // Nothing to do.
+    return;
+  }
+
+  ArtField* const cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* const dex_file_field =
+      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+  const mirror::Class* const element_class = soa.Decode<mirror::Class*>(
+      WellKnownClasses::dalvik_system_DexPathList__Element);
+  const mirror::Class* const dexfile_class = soa.Decode<mirror::Class*>(
+        WellKnownClasses::dalvik_system_DexFile);
+
+  // Collect all the dex files.
+  auto GetDexFilesFn = [&] (const DexFile* cp_dex_file)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (cp_dex_file != nullptr && cp_dex_file->NumClassDefs() > 0) {
+      queue->emplace(cp_dex_file, 0U, true);
+    }
+    return true;  // Continue looking.
+  };
+
+  for (int32_t i = 0; i < dex_elements->GetLength(); ++i) {
+    mirror::Object* element = dex_elements->GetWithoutChecks(i);
+    if (element == nullptr) {
+      continue;
+    }
+
+    // We support this being dalvik.system.DexPathList$Element and dalvik.system.DexFile.
+
+    mirror::Object* dex_file;
+    if (element->GetClass() == element_class) {
+      dex_file = dex_file_field->GetObject(element);
+    } else if (element->GetClass() == dexfile_class) {
+      dex_file = element;
+    } else {
+      LOG(WARNING) << "Unsupported element in dex_elements: " << PrettyClass(element->GetClass());
+      continue;
+    }
+
+    IterateOverJavaDexFile(dex_file, cookie_field, GetDexFilesFn);
+  }
+}
+
+static bool AreSharedLibrariesOk(const std::string shared_libraries,
+                                 std::priority_queue<DexFileAndClassPair>& queue) {
+  if (shared_libraries.empty()) {
+    if (queue.empty()) {
+      // No shared libraries or oat files, as expected.
+      return true;
+    }
+  } else {
+    if (shared_libraries.compare(OatFile::kSpecialSharedLibrary) == 0) {
+      // If we find the special shared library, skip the shared libraries check.
+      return true;
+    }
+    // Shared libraries is a series of dex file paths and their checksums, each separated by '*'.
+    std::vector<std::string> shared_libraries_split;
+    Split(shared_libraries, '*', &shared_libraries_split);
+
+    size_t index = 0;
+    std::priority_queue<DexFileAndClassPair> temp = queue;
+    while (!temp.empty() && index < shared_libraries_split.size() - 1) {
+      DexFileAndClassPair pair(temp.top());
+      const DexFile* dex_file = pair.GetDexFile();
+      std::string dex_filename(dex_file->GetLocation());
+      uint32_t dex_checksum = dex_file->GetLocationChecksum();
+      if (dex_filename != shared_libraries_split[index] ||
+          dex_checksum != std::stoul(shared_libraries_split[index + 1])) {
+        break;
+      }
+      temp.pop();
+      index += 2;
+    }
+
+    // Check is successful if it made it through the queue and all the shared libraries.
+    return temp.empty() && index == shared_libraries_split.size();
+  }
+  return false;
+}
+
 // Check for class-def collisions in dex files.
 //
-// This works by maintaining a heap with one class from each dex file, sorted by the class
-// descriptor. Then a dex-file/class pair is continually removed from the heap and compared
+// This first walks the class loader chain, getting all the dex files from the class loader. If
+// the class loader is null or one of the class loaders in the chain is unsupported, we collect
+// dex files from all open non-boot oat files to be safe.
+//
+// This first checks whether the shared libraries are in the expected order and the oat files
+// have the expected checksums. If so, we exit early. Otherwise, we do the collision check.
+//
+// The collision check works by maintaining a heap with one class from each dex file, sorted by the
+// class descriptor. Then a dex-file/class pair is continually removed from the heap and compared
 // against the following top element. If the descriptor is the same, it is now checked whether
 // the two elements agree on whether their dex file was from an already-loaded oat-file or the
 // new oat file. Any disagreement indicates a collision.
 bool OatFileManager::HasCollisions(const OatFile* oat_file,
+                                   jobject class_loader,
+                                   jobjectArray dex_elements,
                                    std::string* error_msg /*out*/) const {
   DCHECK(oat_file != nullptr);
   DCHECK(error_msg != nullptr);
-  if (!kDuplicateClassesCheck) {
-    return false;
+
+  std::priority_queue<DexFileAndClassPair> queue;
+
+  // Try to get dex files from the given class loader. If the class loader is null, or we do
+  // not support one of the class loaders in the chain, conservatively compare against all
+  // (non-boot) oat files.
+  bool class_loader_ok = false;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<2> hs(Thread::Current());
+    Handle<mirror::ClassLoader> h_class_loader =
+        hs.NewHandle(soa.Decode<mirror::ClassLoader*>(class_loader));
+    Handle<mirror::ObjectArray<mirror::Object>> h_dex_elements =
+        hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Object>*>(dex_elements));
+    if (h_class_loader.Get() != nullptr &&
+        GetDexFilesFromClassLoader(soa, h_class_loader.Get(), &queue)) {
+      class_loader_ok = true;
+
+      // In this case, also take into account the dex_elements array, if given. We don't need to
+      // read it otherwise, as we'll compare against all open oat files anyways.
+      GetDexFilesFromDexElementsArray(soa, h_dex_elements, &queue);
+    } else if (h_class_loader.Get() != nullptr) {
+      VLOG(class_linker) << "Something unsupported with "
+                         << PrettyClass(h_class_loader->GetClass());
+    }
   }
 
   // Dex files are registered late - once a class is actually being loaded. We have to compare
   // against the open oat files. Take the oat_file_manager_lock_ that protects oat_files_ accesses.
   ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
 
-  std::priority_queue<DexFileAndClassPair> queue;
+  // Vector that holds the newly opened dex files live, this is done to prevent leaks.
+  std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
 
-  // Add dex files from already loaded oat files, but skip boot.
-  const OatFile* boot_oat = GetBootOatFile();
-  // The same OatFile can be loaded multiple times at different addresses. In this case, we don't
-  // need to check both against each other since they would have resolved the same way at compile
-  // time.
-  std::unordered_set<std::string> unique_locations;
-  for (const std::unique_ptr<const OatFile>& loaded_oat_file : oat_files_) {
-    DCHECK_NE(loaded_oat_file.get(), oat_file);
-    const std::string& location = loaded_oat_file->GetLocation();
-    if (loaded_oat_file.get() != boot_oat &&
-        location != oat_file->GetLocation() &&
-        unique_locations.find(location) == unique_locations.end()) {
-      unique_locations.insert(location);
-      AddDexFilesFromOat(loaded_oat_file.get(), /*already_loaded*/true, &queue);
+  if (!class_loader_ok) {
+    // Add dex files from already loaded oat files, but skip boot.
+
+    // Clean up the queue.
+    while (!queue.empty()) {
+      queue.pop();
+    }
+
+    std::vector<const OatFile*> boot_oat_files = GetBootOatFiles();
+    // The same OatFile can be loaded multiple times at different addresses. In this case, we don't
+    // need to check both against each other since they would have resolved the same way at compile
+    // time.
+    std::unordered_set<std::string> unique_locations;
+    for (const std::unique_ptr<const OatFile>& loaded_oat_file : oat_files_) {
+      DCHECK_NE(loaded_oat_file.get(), oat_file);
+      const std::string& location = loaded_oat_file->GetLocation();
+      if (std::find(boot_oat_files.begin(), boot_oat_files.end(), loaded_oat_file.get()) ==
+          boot_oat_files.end() && location != oat_file->GetLocation() &&
+          unique_locations.find(location) == unique_locations.end()) {
+        unique_locations.insert(location);
+        AddDexFilesFromOat(loaded_oat_file.get(),
+                           /*already_loaded*/true,
+                           &queue,
+                           /*out*/&opened_dex_files);
+      }
     }
   }
 
-  if (queue.empty()) {
-    // No other oat files, return early.
+  // Exit if shared libraries are ok. Do a full duplicate classes check otherwise.
+  const std::string
+      shared_libraries(oat_file->GetOatHeader().GetStoreValueByKey(OatHeader::kClassPathKey));
+  if (AreSharedLibrariesOk(shared_libraries, queue)) {
     return false;
   }
 
+  ScopedTrace st("Collision check");
+
   // Add dex files from the oat file to check.
-  AddDexFilesFromOat(oat_file, /*already_loaded*/false, &queue);
+  AddDexFilesFromOat(oat_file, /*already_loaded*/false, &queue, &opened_dex_files);
 
   // Now drain the queue.
   while (!queue.empty()) {
@@ -276,19 +541,24 @@
 std::vector<std::unique_ptr<const DexFile>> OatFileManager::OpenDexFilesFromOat(
     const char* dex_location,
     const char* oat_location,
+    jobject class_loader,
+    jobjectArray dex_elements,
     const OatFile** out_oat_file,
     std::vector<std::string>* error_msgs) {
+  ScopedTrace trace(__FUNCTION__);
   CHECK(dex_location != nullptr);
   CHECK(error_msgs != nullptr);
 
   // Verify we aren't holding the mutator lock, which could starve GC if we
   // have to generate or relocate an oat file.
-  Locks::mutator_lock_->AssertNotHeld(Thread::Current());
+  Thread* const self = Thread::Current();
+  Locks::mutator_lock_->AssertNotHeld(self);
+  Runtime* const runtime = Runtime::Current();
 
   OatFileAssistant oat_file_assistant(dex_location,
                                       oat_location,
                                       kRuntimeISA,
-                                      !Runtime::Current()->IsAotCompiler());
+                                      !runtime->IsAotCompiler());
 
   // Lock the target oat location to avoid races generating and loading the
   // oat file.
@@ -301,17 +571,34 @@
 
   const OatFile* source_oat_file = nullptr;
 
-  // Update the oat file on disk if we can. This may fail, but that's okay.
-  // Best effort is all that matters here.
-  if (!oat_file_assistant.MakeUpToDate(/*out*/&error_msg)) {
-    LOG(WARNING) << error_msg;
+  if (!oat_file_assistant.IsUpToDate()) {
+    // Update the oat file on disk if we can, based on the --compiler-filter
+    // option derived from the current runtime options.
+    // This may fail, but that's okay. Best effort is all that matters here.
+    switch (oat_file_assistant.MakeUpToDate(/*profile_changed*/false, /*out*/ &error_msg)) {
+      case OatFileAssistant::kUpdateFailed:
+        LOG(WARNING) << error_msg;
+        break;
+
+      case OatFileAssistant::kUpdateNotAttempted:
+        // Avoid spamming the logs if we decided not to attempt making the oat
+        // file up to date.
+        VLOG(oat) << error_msg;
+        break;
+
+      case OatFileAssistant::kUpdateSucceeded:
+        // Nothing to do.
+        break;
+    }
   }
 
   // Get the oat file on disk.
   std::unique_ptr<const OatFile> oat_file(oat_file_assistant.GetBestOatFile().release());
+
   if (oat_file != nullptr) {
     // Take the file only if it has no collisions, or we must take it because of preopting.
-    bool accept_oat_file = !HasCollisions(oat_file.get(), /*out*/ &error_msg);
+    bool accept_oat_file =
+        !HasCollisions(oat_file.get(), class_loader, dex_elements, /*out*/ &error_msg);
     if (!accept_oat_file) {
       // Failed the collision check. Print warning.
       if (Runtime::Current()->IsDexFileFallbackEnabled()) {
@@ -325,7 +612,7 @@
 
       // However, if the app was part of /system and preopted, there is no original dex file
       // available. In that case grudgingly accept the oat file.
-      if (!DexFile::MaybeDex(dex_location)) {
+      if (!oat_file_assistant.HasOriginalDexFiles()) {
         accept_oat_file = true;
         LOG(WARNING) << "Dex location " << dex_location << " does not seem to include dex file. "
                      << "Allow oat file use. This is potentially dangerous.";
@@ -343,7 +630,61 @@
 
   // Load the dex files from the oat file.
   if (source_oat_file != nullptr) {
-    dex_files = oat_file_assistant.LoadDexFiles(*source_oat_file, dex_location);
+    bool added_image_space = false;
+    if (source_oat_file->IsExecutable()) {
+      std::unique_ptr<gc::space::ImageSpace> image_space =
+          kEnableAppImage ? oat_file_assistant.OpenImageSpace(source_oat_file) : nullptr;
+      if (image_space != nullptr) {
+        ScopedObjectAccess soa(self);
+        StackHandleScope<1> hs(self);
+        Handle<mirror::ClassLoader> h_loader(
+            hs.NewHandle(soa.Decode<mirror::ClassLoader*>(class_loader)));
+        // Can not load app image without class loader.
+        if (h_loader.Get() != nullptr) {
+          std::string temp_error_msg;
+          // Add image space has a race condition since other threads could be reading from the
+          // spaces array.
+          {
+            ScopedThreadSuspension sts(self, kSuspended);
+            gc::ScopedGCCriticalSection gcs(self,
+                                            gc::kGcCauseAddRemoveAppImageSpace,
+                                            gc::kCollectorTypeAddRemoveAppImageSpace);
+            ScopedSuspendAll ssa("Add image space");
+            runtime->GetHeap()->AddSpace(image_space.get());
+          }
+          {
+            ScopedTrace trace2(StringPrintf("Adding image space for location %s", dex_location));
+            added_image_space = runtime->GetClassLinker()->AddImageSpace(image_space.get(),
+                                                                         h_loader,
+                                                                         dex_elements,
+                                                                         dex_location,
+                                                                         /*out*/&dex_files,
+                                                                         /*out*/&temp_error_msg);
+          }
+          if (added_image_space) {
+            // Successfully added image space to heap, release the map so that it does not get
+            // freed.
+            image_space.release();
+          } else {
+            LOG(INFO) << "Failed to add image file " << temp_error_msg;
+            dex_files.clear();
+            {
+              ScopedThreadSuspension sts(self, kSuspended);
+              gc::ScopedGCCriticalSection gcs(self,
+                                              gc::kGcCauseAddRemoveAppImageSpace,
+                                              gc::kCollectorTypeAddRemoveAppImageSpace);
+              ScopedSuspendAll ssa("Remove image space");
+              runtime->GetHeap()->RemoveSpace(image_space.get());
+            }
+            // Non-fatal, don't update error_msg.
+          }
+        }
+      }
+    }
+    if (!added_image_space) {
+      DCHECK(dex_files.empty());
+      dex_files = oat_file_assistant.LoadDexFiles(*source_oat_file, dex_location);
+    }
     if (dex_files.empty()) {
       error_msgs->push_back("Failed to open dex files from " + source_oat_file->GetLocation());
     }
@@ -354,9 +695,12 @@
   if (dex_files.empty()) {
     if (oat_file_assistant.HasOriginalDexFiles()) {
       if (Runtime::Current()->IsDexFileFallbackEnabled()) {
-        if (!DexFile::Open(dex_location, dex_location, /*out*/ &error_msg, &dex_files)) {
+        static constexpr bool kVerifyChecksum = true;
+        if (!DexFile::Open(
+            dex_location, dex_location, kVerifyChecksum, /*out*/ &error_msg, &dex_files)) {
           LOG(WARNING) << error_msg;
-          error_msgs->push_back("Failed to open dex files from " + std::string(dex_location));
+          error_msgs->push_back("Failed to open dex files from " + std::string(dex_location)
+                                + " because: " + error_msg);
         }
       } else {
         error_msgs->push_back("Fallback mode disabled, skipping dex files.");
@@ -366,28 +710,21 @@
           + std::string(dex_location));
     }
   }
+
+  // TODO(calin): Consider optimizing this knowing that is useless to record the
+  // use of fully compiled apks.
+  Runtime::Current()->NotifyDexLoaded(dex_location);
   return dex_files;
 }
 
-bool OatFileManager::RegisterOatFileLocation(const std::string& oat_location) {
-  WriterMutexLock mu(Thread::Current(), *Locks::oat_file_count_lock_);
-  auto it = oat_file_count_.find(oat_location);
-  if (it != oat_file_count_.end()) {
-    ++it->second;
-    return false;
-  }
-  oat_file_count_.insert(std::pair<std::string, size_t>(oat_location, 1u));
-  return true;
-}
-
-void OatFileManager::UnRegisterOatFileLocation(const std::string& oat_location) {
-  WriterMutexLock mu(Thread::Current(), *Locks::oat_file_count_lock_);
-  auto it = oat_file_count_.find(oat_location);
-  if (it != oat_file_count_.end()) {
-    --it->second;
-    if (it->second == 0) {
-      oat_file_count_.erase(it);
+void OatFileManager::DumpForSigQuit(std::ostream& os) {
+  ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
+  std::vector<const OatFile*> boot_oat_files = GetBootOatFiles();
+  for (const std::unique_ptr<const OatFile>& oat_file : oat_files_) {
+    if (ContainsElement(boot_oat_files, oat_file.get())) {
+      continue;
     }
+    os << oat_file->GetLocation() << ": " << oat_file->GetCompilerFilter() << "\n";
   }
 }
 
diff --git a/runtime/oat_file_manager.h b/runtime/oat_file_manager.h
index af7efb4..45ac4b7 100644
--- a/runtime/oat_file_manager.h
+++ b/runtime/oat_file_manager.h
@@ -25,6 +25,7 @@
 
 #include "base/macros.h"
 #include "base/mutex.h"
+#include "jni.h"
 
 namespace art {
 
@@ -58,30 +59,25 @@
   const OatFile* FindOpenedOatFileFromOatLocation(const std::string& oat_location) const
       REQUIRES(!Locks::oat_file_manager_lock_);
 
-  // Attempt to reserve a location, returns false if it is already reserved or already in used by
-  // an oat file.
-  bool RegisterOatFileLocation(const std::string& oat_location)
-      REQUIRES(!Locks::oat_file_count_lock_);
-
-  // Unreserve oat file location, should only be used for error cases since RegisterOatFile will
-  // remove the reserved location.
-  void UnRegisterOatFileLocation(const std::string& oat_location)
-      REQUIRES(!Locks::oat_file_count_lock_);
+  // Find the oat file which contains a dex files with the given dex base location,
+  // returns null if there are none.
+  const OatFile* FindOpenedOatFileFromDexLocation(const std::string& dex_base_location) const
+      REQUIRES(!Locks::oat_file_manager_lock_);
 
   // Returns true if we have a non pic oat file.
   bool HaveNonPicOatFile() const {
     return have_non_pic_oat_file_;
   }
 
-  // Returns the boot image oat file.
-  const OatFile* GetBootOatFile() const;
+  // Returns the boot image oat files.
+  std::vector<const OatFile*> GetBootOatFiles() const;
 
   // Returns the first non-image oat file in the class path.
   const OatFile* GetPrimaryOatFile() const REQUIRES(!Locks::oat_file_manager_lock_);
 
-  // Return the oat file for an image, registers the oat file. Takes ownership of the imagespace's
-  // underlying oat file.
-  const OatFile* RegisterImageOatFile(gc::space::ImageSpace* space)
+  // Returns the oat files for the images, registers the oat files.
+  // Takes ownership of the imagespace's underlying oat files.
+  std::vector<const OatFile*> RegisterImageOatFiles(std::vector<gc::space::ImageSpace*> spaces)
       REQUIRES(!Locks::oat_file_manager_lock_);
 
   // Finds or creates the oat file holding dex_location. Then loads and returns
@@ -101,22 +97,33 @@
   std::vector<std::unique_ptr<const DexFile>> OpenDexFilesFromOat(
       const char* dex_location,
       const char* oat_location,
+      jobject class_loader,
+      jobjectArray dex_elements,
       /*out*/ const OatFile** out_oat_file,
       /*out*/ std::vector<std::string>* error_msgs)
       REQUIRES(!Locks::oat_file_manager_lock_, !Locks::mutator_lock_);
 
+  void DumpForSigQuit(std::ostream& os);
+
  private:
-  // Check for duplicate class definitions of the given oat file against all open oat files.
+  // Check that the shared libraries in the given oat file match those in the given class loader and
+  // dex elements. If the class loader is null or we do not support one of the class loaders in the
+  // chain, compare against all non-boot oat files instead. If the shared libraries are not ok,
+  // check for duplicate class definitions of the given oat file against the oat files (either from
+  // the class loader and dex elements if possible or all non-boot oat files otherwise).
   // Return true if there are any class definition collisions in the oat_file.
-  bool HasCollisions(const OatFile* oat_file, /*out*/std::string* error_msg) const
+  bool HasCollisions(const OatFile* oat_file,
+                     jobject class_loader,
+                     jobjectArray dex_elements,
+                     /*out*/ std::string* error_msg) const
       REQUIRES(!Locks::oat_file_manager_lock_);
 
   const OatFile* FindOpenedOatFileFromOatLocationLocked(const std::string& oat_location) const
       REQUIRES(Locks::oat_file_manager_lock_);
 
   std::set<std::unique_ptr<const OatFile>> oat_files_ GUARDED_BY(Locks::oat_file_manager_lock_);
-  std::unordered_map<std::string, size_t> oat_file_count_ GUARDED_BY(Locks::oat_file_count_lock_);
   bool have_non_pic_oat_file_;
+
   DISALLOW_COPY_AND_ASSIGN(OatFileManager);
 };
 
diff --git a/runtime/oat_quick_method_header.cc b/runtime/oat_quick_method_header.cc
index 9786c05..0ab2bfe 100644
--- a/runtime/oat_quick_method_header.cc
+++ b/runtime/oat_quick_method_header.cc
@@ -17,23 +17,18 @@
 #include "oat_quick_method_header.h"
 
 #include "art_method.h"
-#include "mapping_table.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
 
 namespace art {
 
 OatQuickMethodHeader::OatQuickMethodHeader(
-    uint32_t mapping_table_offset,
     uint32_t vmap_table_offset,
-    uint32_t gc_map_offset,
     uint32_t frame_size_in_bytes,
     uint32_t core_spill_mask,
     uint32_t fp_spill_mask,
     uint32_t code_size)
-    : mapping_table_offset_(mapping_table_offset),
-      vmap_table_offset_(vmap_table_offset),
-      gc_map_offset_(gc_map_offset),
+    : vmap_table_offset_(vmap_table_offset),
       frame_info_(frame_size_in_bytes, core_spill_mask, fp_spill_mask),
       code_size_(code_size) {}
 
@@ -46,34 +41,14 @@
   uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(entry_point);
   if (IsOptimized()) {
     CodeInfo code_info = GetOptimizedCodeInfo();
-    StackMapEncoding encoding = code_info.ExtractEncoding();
+    CodeInfoEncoding encoding = code_info.ExtractEncoding();
     StackMap stack_map = code_info.GetStackMapForNativePcOffset(sought_offset, encoding);
     if (stack_map.IsValid()) {
-      return stack_map.GetDexPc(encoding);
+      return stack_map.GetDexPc(encoding.stack_map_encoding);
     }
   } else {
-    MappingTable table(GetMappingTable());
-    // NOTE: Special methods (see Mir2Lir::GenSpecialCase()) have an empty mapping
-    // but they have no suspend checks and, consequently, we never call ToDexPc() for them.
-    if (table.TotalSize() == 0) {
-      DCHECK(method->IsNative());
-      return DexFile::kDexNoIndex;
-    }
-
-    // Assume the caller wants a pc-to-dex mapping so check here first.
-    typedef MappingTable::PcToDexIterator It;
-    for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
-      if (cur.NativePcOffset() == sought_offset) {
-        return cur.DexPc();
-      }
-    }
-    // Now check dex-to-pc mappings.
-    typedef MappingTable::DexToPcIterator It2;
-    for (It2 cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
-      if (cur.NativePcOffset() == sought_offset) {
-        return cur.DexPc();
-      }
-    }
+    DCHECK(method->IsNative());
+    return DexFile::kDexNoIndex;
   }
   if (abort_on_failure) {
     ScopedObjectAccess soa(Thread::Current());
@@ -91,43 +66,22 @@
                                                 bool is_for_catch_handler,
                                                 bool abort_on_failure) const {
   const void* entry_point = GetEntryPoint();
-  if (IsOptimized()) {
-    // Optimized code does not have a mapping table. Search for the dex-to-pc
-    // mapping in stack maps.
-    CodeInfo code_info = GetOptimizedCodeInfo();
-    StackMapEncoding encoding = code_info.ExtractEncoding();
+  DCHECK(!method->IsNative());
+  DCHECK(IsOptimized());
+  // Search for the dex-to-pc mapping in stack maps.
+  CodeInfo code_info = GetOptimizedCodeInfo();
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
 
-    // All stack maps are stored in the same CodeItem section, safepoint stack
-    // maps first, then catch stack maps. We use `is_for_catch_handler` to select
-    // the order of iteration.
-    StackMap stack_map =
-        LIKELY(is_for_catch_handler) ? code_info.GetCatchStackMapForDexPc(dex_pc, encoding)
-                                     : code_info.GetStackMapForDexPc(dex_pc, encoding);
-    if (stack_map.IsValid()) {
-      return reinterpret_cast<uintptr_t>(entry_point) + stack_map.GetNativePcOffset(encoding);
-    }
-  } else {
-    MappingTable table(GetMappingTable());
-    if (table.TotalSize() == 0) {
-      DCHECK_EQ(dex_pc, 0U);
-      return 0;   // Special no mapping/pc == 0 case
-    }
-    // Assume the caller wants a dex-to-pc mapping so check here first.
-    typedef MappingTable::DexToPcIterator It;
-    for (It cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
-      if (cur.DexPc() == dex_pc) {
-        return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
-      }
-    }
-    // Now check pc-to-dex mappings.
-    typedef MappingTable::PcToDexIterator It2;
-    for (It2 cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
-      if (cur.DexPc() == dex_pc) {
-        return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
-      }
-    }
+  // All stack maps are stored in the same CodeItem section, safepoint stack
+  // maps first, then catch stack maps. We use `is_for_catch_handler` to select
+  // the order of iteration.
+  StackMap stack_map =
+      LIKELY(is_for_catch_handler) ? code_info.GetCatchStackMapForDexPc(dex_pc, encoding)
+                                   : code_info.GetStackMapForDexPc(dex_pc, encoding);
+  if (stack_map.IsValid()) {
+    return reinterpret_cast<uintptr_t>(entry_point) +
+           stack_map.GetNativePcOffset(encoding.stack_map_encoding);
   }
-
   if (abort_on_failure) {
     ScopedObjectAccess soa(Thread::Current());
     LOG(FATAL) << "Failed to find native offset for dex pc 0x" << std::hex << dex_pc
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index 6eadd87..abddc6d 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -21,6 +21,7 @@
 #include "base/macros.h"
 #include "quick/quick_method_frame_info.h"
 #include "stack_map.h"
+#include "utils.h"
 
 namespace art {
 
@@ -29,9 +30,7 @@
 // OatQuickMethodHeader precedes the raw code chunk generated by the compiler.
 class PACKED(4) OatQuickMethodHeader {
  public:
-  OatQuickMethodHeader(uint32_t mapping_table_offset = 0U,
-                       uint32_t vmap_table_offset = 0U,
-                       uint32_t gc_map_offset = 0U,
+  OatQuickMethodHeader(uint32_t vmap_table_offset = 0U,
                        uint32_t frame_size_in_bytes = 0U,
                        uint32_t core_spill_mask = 0U,
                        uint32_t fp_spill_mask = 0U,
@@ -39,6 +38,19 @@
 
   ~OatQuickMethodHeader();
 
+  static OatQuickMethodHeader* FromCodePointer(const void* code_ptr) {
+    uintptr_t code = reinterpret_cast<uintptr_t>(code_ptr);
+    uintptr_t header = code - OFFSETOF_MEMBER(OatQuickMethodHeader, code_);
+    DCHECK(IsAlignedParam(code, GetInstructionSetAlignment(kRuntimeISA)) ||
+           IsAlignedParam(header, GetInstructionSetAlignment(kRuntimeISA)))
+        << std::hex << code << " " << std::hex << header;
+    return reinterpret_cast<OatQuickMethodHeader*>(header);
+  }
+
+  static OatQuickMethodHeader* FromEntryPoint(const void* entry_point) {
+    return FromCodePointer(EntryPointToCodePointer(entry_point));
+  }
+
   OatQuickMethodHeader& operator=(const OatQuickMethodHeader&) = default;
 
   uintptr_t NativeQuickPcOffset(const uintptr_t pc) const {
@@ -46,25 +58,25 @@
   }
 
   bool IsOptimized() const {
-    return gc_map_offset_ == 0 && vmap_table_offset_ != 0;
+    return code_size_ != 0 && vmap_table_offset_ != 0;
+  }
+
+  const void* GetOptimizedCodeInfoPtr() const {
+    DCHECK(IsOptimized());
+    const void* data = reinterpret_cast<const void*>(code_ - vmap_table_offset_);
+    return data;
   }
 
   CodeInfo GetOptimizedCodeInfo() const {
-    DCHECK(IsOptimized());
-    const void* data = reinterpret_cast<const void*>(code_ - vmap_table_offset_);
-    return CodeInfo(data);
+    return CodeInfo(GetOptimizedCodeInfoPtr());
   }
 
   const uint8_t* GetCode() const {
     return code_;
   }
 
-  const uint8_t* GetNativeGcMap() const {
-    return (gc_map_offset_ == 0) ? nullptr : code_ - gc_map_offset_;
-  }
-
-  const uint8_t* GetMappingTable() const {
-    return (mapping_table_offset_ == 0) ? nullptr : code_ - mapping_table_offset_;
+  uint32_t GetCodeSize() const {
+    return code_size_;
   }
 
   const uint8_t* GetVmapTable() const {
@@ -74,6 +86,11 @@
 
   bool Contains(uintptr_t pc) const {
     uintptr_t code_start = reinterpret_cast<uintptr_t>(code_);
+    static_assert(kRuntimeISA != kThumb2, "kThumb2 cannot be a runtime ISA");
+    if (kRuntimeISA == kArm) {
+      // On Thumb-2, the pc is offset by one.
+      code_start++;
+    }
     return code_start <= pc && pc <= (code_start + code_size_);
   }
 
@@ -89,10 +106,10 @@
   }
 
   template <bool kCheckFrameSize = true>
-  uint32_t GetFrameSizeInBytes() {
+  uint32_t GetFrameSizeInBytes() const {
     uint32_t result = frame_info_.FrameSizeInBytes();
     if (kCheckFrameSize) {
-      DCHECK_LE(static_cast<size_t>(kStackAlignment), result);
+      DCHECK_ALIGNED(result, kStackAlignment);
     }
     return result;
   }
@@ -108,12 +125,8 @@
 
   uint32_t ToDexPc(ArtMethod* method, const uintptr_t pc, bool abort_on_failure = true) const;
 
-  // The offset in bytes from the start of the mapping table to the end of the header.
-  uint32_t mapping_table_offset_;
   // The offset in bytes from the start of the vmap table to the end of the header.
   uint32_t vmap_table_offset_;
-  // The offset in bytes from the start of the gc map to the end of the header.
-  uint32_t gc_map_offset_;
   // The stack frame information.
   QuickMethodFrameInfo frame_info_;
   // The code size in bytes.
diff --git a/runtime/object_lock.cc b/runtime/object_lock.cc
index f7accc0..b8754a4 100644
--- a/runtime/object_lock.cc
+++ b/runtime/object_lock.cc
@@ -47,7 +47,22 @@
   obj_->NotifyAll(self_);
 }
 
+template <typename T>
+ObjectTryLock<T>::ObjectTryLock(Thread* self, Handle<T> object) : self_(self), obj_(object) {
+  CHECK(object.Get() != nullptr);
+  acquired_ = obj_->MonitorTryEnter(self_) != nullptr;
+}
+
+template <typename T>
+ObjectTryLock<T>::~ObjectTryLock() {
+  if (acquired_) {
+    obj_->MonitorExit(self_);
+  }
+}
+
 template class ObjectLock<mirror::Class>;
 template class ObjectLock<mirror::Object>;
+template class ObjectTryLock<mirror::Class>;
+template class ObjectTryLock<mirror::Object>;
 
 }  // namespace art
diff --git a/runtime/object_lock.h b/runtime/object_lock.h
index eb7cbd8..7f02b37 100644
--- a/runtime/object_lock.h
+++ b/runtime/object_lock.h
@@ -45,6 +45,27 @@
   DISALLOW_COPY_AND_ASSIGN(ObjectLock);
 };
 
+template <typename T>
+class ObjectTryLock {
+ public:
+  ObjectTryLock(Thread* self, Handle<T> object) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ~ObjectTryLock() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool Acquired() const {
+    return acquired_;
+  }
+
+ private:
+  Thread* const self_;
+  Handle<T> const obj_;
+  bool acquired_;
+
+
+  DISALLOW_COPY_AND_ASSIGN(ObjectTryLock);
+};
+
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_OBJECT_LOCK_H_
diff --git a/runtime/offsets.h b/runtime/offsets.h
index 9d5063f..aaf5c0c 100644
--- a/runtime/offsets.h
+++ b/runtime/offsets.h
@@ -19,6 +19,7 @@
 
 #include <ostream>
 
+#include "base/enums.h"
 #include "globals.h"
 
 namespace art {
@@ -51,12 +52,15 @@
 };
 
 // Offsets relative to the current running thread.
-template<size_t pointer_size>
+template<PointerSize pointer_size>
 class ThreadOffset : public Offset {
  public:
   explicit ThreadOffset(size_t val) : Offset(val) {}
 };
 
+using ThreadOffset32 = ThreadOffset<PointerSize::k32>;
+using ThreadOffset64 = ThreadOffset<PointerSize::k64>;
+
 // Offsets relative to an object.
 class MemberOffset : public Offset {
  public:
diff --git a/runtime/openjdkjvm/Android.mk b/runtime/openjdkjvm/Android.mk
new file mode 100644
index 0000000..9b7404e
--- /dev/null
+++ b/runtime/openjdkjvm/Android.mk
@@ -0,0 +1,20 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+LOCAL_MODULE := openjdkjvm-phony
+include $(BUILD_PHONY_PACKAGE)
diff --git a/test/530-checker-loops/expected.txt b/runtime/openjdkjvm/MODULE_LICENSE_GPL_WITH_CLASSPATH_EXCEPTION
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to runtime/openjdkjvm/MODULE_LICENSE_GPL_WITH_CLASSPATH_EXCEPTION
diff --git a/runtime/openjdkjvm/NOTICE b/runtime/openjdkjvm/NOTICE
new file mode 100644
index 0000000..700a206
--- /dev/null
+++ b/runtime/openjdkjvm/NOTICE
@@ -0,0 +1,29 @@
+Copyright (C) 2014 The Android Open Source Project
+DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+This file implements interfaces from the file jvm.h. This implementation
+is licensed under the same terms as the file jvm.h.  The
+copyright and license information for the file jvm.h follows.
+
+Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License version 2 only, as
+published by the Free Software Foundation.  Oracle designates this
+particular file as subject to the "Classpath" exception as provided
+by Oracle in the LICENSE file that accompanied this code.
+
+This code is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+version 2 for more details (a copy is included in the LICENSE file that
+accompanied this code).
+
+You should have received a copy of the GNU General Public License version
+2 along with this work; if not, write to the Free Software Foundation,
+Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+or visit www.oracle.com if you need additional information or have any
+questions.
diff --git a/runtime/openjdkjvm/OpenjdkJvm.cc b/runtime/openjdkjvm/OpenjdkJvm.cc
new file mode 100644
index 0000000..54ec5d3
--- /dev/null
+++ b/runtime/openjdkjvm/OpenjdkJvm.cc
@@ -0,0 +1,475 @@
+/* Copyright (C) 2014 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvm.h. This implementation
+ * is licensed under the same terms as the file jvm.h.  The
+ * copyright and license information for the file jvm.h follows.
+ *
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * Services that OpenJDK expects the VM to provide.
+ */
+#include<stdio.h>
+#include <dlfcn.h>
+#include <limits.h>
+#include <unistd.h>
+
+#include "common_throws.h"
+#include "gc/heap.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "runtime.h"
+#include "handle_scope-inl.h"
+#include "scoped_thread_state_change.h"
+#include "ScopedUtfChars.h"
+#include "mirror/class_loader.h"
+#include "verify_object-inl.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "../../libcore/ojluni/src/main/native/jvm.h"  // TODO(narayan): fix it
+#include "jni_internal.h"
+#include "mirror/string-inl.h"
+#include "native/scoped_fast_native_object_access.h"
+#include "ScopedLocalRef.h"
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+
+#undef LOG_TAG
+#define LOG_TAG "artopenjdk"
+
+using art::WARNING;
+using art::INFO;
+using art::ERROR;
+using art::FATAL;
+
+/* posix open() with extensions; used by e.g. ZipFile */
+JNIEXPORT jint JVM_Open(const char* fname, jint flags, jint mode) {
+    /*
+     * Some code seems to want the special return value JVM_EEXIST if the
+     * file open fails due to O_EXCL.
+     */
+    // Don't use JVM_O_DELETE, it's problematic with FUSE, see b/28901232.
+    if (flags & JVM_O_DELETE) {
+        LOG(FATAL) << "JVM_O_DELETE option is not supported (while opening: '"
+                   << fname << "')";
+    }
+
+    int fd = TEMP_FAILURE_RETRY(open(fname, flags & ~JVM_O_DELETE, mode));
+    if (fd < 0) {
+        int err = errno;
+        if (err == EEXIST) {
+            return JVM_EEXIST;
+        } else {
+            return -1;
+        }
+    }
+
+    return fd;
+}
+
+/* posix close() */
+JNIEXPORT jint JVM_Close(jint fd) {
+    // don't want TEMP_FAILURE_RETRY here -- file is closed even if EINTR
+    return close(fd);
+}
+
+/* posix read() */
+JNIEXPORT jint JVM_Read(jint fd, char* buf, jint nbytes) {
+    return TEMP_FAILURE_RETRY(read(fd, buf, nbytes));
+}
+
+/* posix write(); is used to write messages to stderr */
+JNIEXPORT jint JVM_Write(jint fd, char* buf, jint nbytes) {
+    return TEMP_FAILURE_RETRY(write(fd, buf, nbytes));
+}
+
+/* posix lseek() */
+JNIEXPORT jlong JVM_Lseek(jint fd, jlong offset, jint whence) {
+#if !defined(__APPLE__)
+    // NOTE: Using TEMP_FAILURE_RETRY here is busted for LP32 on glibc - the return
+    // value will be coerced into an int32_t.
+    //
+    // lseek64 isn't specified to return EINTR so it shouldn't be necessary
+    // anyway.
+    return lseek64(fd, offset, whence);
+#else
+    // NOTE: This code is compiled for Mac OS but isn't ever run on that
+    // platform.
+    return lseek(fd, offset, whence);
+#endif
+}
+
+/*
+ * "raw monitors" seem to be expected to behave like non-recursive pthread
+ * mutexes.  They're used by ZipFile.
+ */
+JNIEXPORT void* JVM_RawMonitorCreate(void) {
+    pthread_mutex_t* mutex =
+        reinterpret_cast<pthread_mutex_t*>(malloc(sizeof(pthread_mutex_t)));
+    CHECK(mutex != nullptr);
+    CHECK_PTHREAD_CALL(pthread_mutex_init, (mutex, nullptr), "JVM_RawMonitorCreate");
+    return mutex;
+}
+
+JNIEXPORT void JVM_RawMonitorDestroy(void* mon) {
+    CHECK_PTHREAD_CALL(pthread_mutex_destroy,
+                       (reinterpret_cast<pthread_mutex_t*>(mon)),
+                       "JVM_RawMonitorDestroy");
+    free(mon);
+}
+
+JNIEXPORT jint JVM_RawMonitorEnter(void* mon) {
+    return pthread_mutex_lock(reinterpret_cast<pthread_mutex_t*>(mon));
+}
+
+JNIEXPORT void JVM_RawMonitorExit(void* mon) {
+    CHECK_PTHREAD_CALL(pthread_mutex_unlock,
+                       (reinterpret_cast<pthread_mutex_t*>(mon)),
+                       "JVM_RawMonitorExit");
+}
+
+JNIEXPORT char* JVM_NativePath(char* path) {
+    return path;
+}
+
+JNIEXPORT jint JVM_GetLastErrorString(char* buf, int len) {
+#if defined(__GLIBC__) || defined(__BIONIC__)
+  if (len == 0) {
+    return 0;
+  }
+
+  const int err = errno;
+  char* result = strerror_r(err, buf, len);
+  if (result != buf) {
+    strncpy(buf, result, len);
+    buf[len - 1] = '\0';
+  }
+
+  return strlen(buf);
+#else
+  UNUSED(buf);
+  UNUSED(len);
+  return -1;
+#endif
+}
+
+JNIEXPORT int jio_fprintf(FILE* fp, const char* fmt, ...) {
+    va_list args;
+
+    va_start(args, fmt);
+    int len = jio_vfprintf(fp, fmt, args);
+    va_end(args);
+
+    return len;
+}
+
+JNIEXPORT int jio_vfprintf(FILE* fp, const char* fmt, va_list args) {
+    assert(fp != NULL);
+    return vfprintf(fp, fmt, args);
+}
+
+/* posix fsync() */
+JNIEXPORT jint JVM_Sync(jint fd) {
+    return TEMP_FAILURE_RETRY(fsync(fd));
+}
+
+JNIEXPORT void* JVM_FindLibraryEntry(void* handle, const char* name) {
+    return dlsym(handle, name);
+}
+
+JNIEXPORT jlong JVM_CurrentTimeMillis(JNIEnv* env ATTRIBUTE_UNUSED,
+                                      jclass clazz ATTRIBUTE_UNUSED) {
+    struct timeval tv;
+    gettimeofday(&tv, (struct timezone *) NULL);
+    jlong when = tv.tv_sec * 1000LL + tv.tv_usec / 1000;
+    return when;
+}
+
+JNIEXPORT jint JVM_Socket(jint domain, jint type, jint protocol) {
+    return TEMP_FAILURE_RETRY(socket(domain, type, protocol));
+}
+
+JNIEXPORT jint JVM_InitializeSocketLibrary() {
+  return 0;
+}
+
+int jio_vsnprintf(char *str, size_t count, const char *fmt, va_list args) {
+  if ((intptr_t)count <= 0) return -1;
+  return vsnprintf(str, count, fmt, args);
+}
+
+int jio_snprintf(char *str, size_t count, const char *fmt, ...) {
+  va_list args;
+  int len;
+  va_start(args, fmt);
+  len = jio_vsnprintf(str, count, fmt, args);
+  va_end(args);
+  return len;
+}
+
+JNIEXPORT jint JVM_SetSockOpt(jint fd, int level, int optname,
+    const char* optval, int optlen) {
+  return TEMP_FAILURE_RETRY(setsockopt(fd, level, optname, optval, optlen));
+}
+
+JNIEXPORT jint JVM_SocketShutdown(jint fd, jint howto) {
+  return TEMP_FAILURE_RETRY(shutdown(fd, howto));
+}
+
+JNIEXPORT jint JVM_GetSockOpt(jint fd, int level, int optname, char* optval,
+  int* optlen) {
+  socklen_t len = *optlen;
+  int cc = TEMP_FAILURE_RETRY(getsockopt(fd, level, optname, optval, &len));
+  *optlen = len;
+  return cc;
+}
+
+JNIEXPORT jint JVM_GetSockName(jint fd, struct sockaddr* addr, int* addrlen) {
+  socklen_t len = *addrlen;
+  int cc = TEMP_FAILURE_RETRY(getsockname(fd, addr, &len));
+  *addrlen = len;
+  return cc;
+}
+
+JNIEXPORT jint JVM_SocketAvailable(jint fd, jint* result) {
+  if (TEMP_FAILURE_RETRY(ioctl(fd, FIONREAD, result)) < 0) {
+      return JNI_FALSE;
+  }
+
+  return JNI_TRUE;
+}
+
+JNIEXPORT jint JVM_Send(jint fd, char* buf, jint nBytes, jint flags) {
+  return TEMP_FAILURE_RETRY(send(fd, buf, nBytes, flags));
+}
+
+JNIEXPORT jint JVM_SocketClose(jint fd) {
+  // Don't want TEMP_FAILURE_RETRY here -- file is closed even if EINTR.
+  return close(fd);
+}
+
+JNIEXPORT jint JVM_Listen(jint fd, jint count) {
+  return TEMP_FAILURE_RETRY(listen(fd, count));
+}
+
+JNIEXPORT jint JVM_Connect(jint fd, struct sockaddr* addr, jint addrlen) {
+  return TEMP_FAILURE_RETRY(connect(fd, addr, addrlen));
+}
+
+JNIEXPORT int JVM_GetHostName(char* name, int namelen) {
+  return TEMP_FAILURE_RETRY(gethostname(name, namelen));
+}
+
+JNIEXPORT jstring JVM_InternString(JNIEnv* env, jstring jstr) {
+  art::ScopedFastNativeObjectAccess soa(env);
+  art::mirror::String* s = soa.Decode<art::mirror::String*>(jstr);
+  art::mirror::String* result = s->Intern();
+  return soa.AddLocalReference<jstring>(result);
+}
+
+JNIEXPORT jlong JVM_FreeMemory(void) {
+  return art::Runtime::Current()->GetHeap()->GetFreeMemory();
+}
+
+JNIEXPORT jlong JVM_TotalMemory(void) {
+  return art::Runtime::Current()->GetHeap()->GetTotalMemory();
+}
+
+JNIEXPORT jlong JVM_MaxMemory(void) {
+  return art::Runtime::Current()->GetHeap()->GetMaxMemory();
+}
+
+JNIEXPORT void JVM_GC(void) {
+  if (art::Runtime::Current()->IsExplicitGcDisabled()) {
+      LOG(INFO) << "Explicit GC skipped.";
+      return;
+  }
+  art::Runtime::Current()->GetHeap()->CollectGarbage(false);
+}
+
+JNIEXPORT __attribute__((noreturn)) void JVM_Exit(jint status) {
+  LOG(INFO) << "System.exit called, status: " << status;
+  art::Runtime::Current()->CallExitHook(status);
+  exit(status);
+}
+
+JNIEXPORT jstring JVM_NativeLoad(JNIEnv* env,
+                                 jstring javaFilename,
+                                 jobject javaLoader,
+                                 jstring javaLibrarySearchPath) {
+  ScopedUtfChars filename(env, javaFilename);
+  if (filename.c_str() == NULL) {
+    return NULL;
+  }
+
+  std::string error_msg;
+  {
+    art::JavaVMExt* vm = art::Runtime::Current()->GetJavaVM();
+    bool success = vm->LoadNativeLibrary(env,
+                                         filename.c_str(),
+                                         javaLoader,
+                                         javaLibrarySearchPath,
+                                         &error_msg);
+    if (success) {
+      return nullptr;
+    }
+  }
+
+  // Don't let a pending exception from JNI_OnLoad cause a CheckJNI issue with NewStringUTF.
+  env->ExceptionClear();
+  return env->NewStringUTF(error_msg.c_str());
+}
+
+JNIEXPORT void JVM_StartThread(JNIEnv* env, jobject jthread, jlong stack_size, jboolean daemon) {
+  art::Thread::CreateNativeThread(env, jthread, stack_size, daemon == JNI_TRUE);
+}
+
+JNIEXPORT void JVM_SetThreadPriority(JNIEnv* env, jobject jthread, jint prio) {
+  art::ScopedObjectAccess soa(env);
+  art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
+  art::Thread* thread = art::Thread::FromManagedThread(soa, jthread);
+  if (thread != NULL) {
+    thread->SetNativePriority(prio);
+  }
+}
+
+JNIEXPORT void JVM_Yield(JNIEnv* env ATTRIBUTE_UNUSED, jclass threadClass ATTRIBUTE_UNUSED) {
+  sched_yield();
+}
+
+JNIEXPORT void JVM_Sleep(JNIEnv* env, jclass threadClass ATTRIBUTE_UNUSED,
+                         jobject java_lock, jlong millis) {
+  art::ScopedFastNativeObjectAccess soa(env);
+  art::mirror::Object* lock = soa.Decode<art::mirror::Object*>(java_lock);
+  art::Monitor::Wait(art::Thread::Current(), lock, millis, 0, true, art::kSleeping);
+}
+
+JNIEXPORT jobject JVM_CurrentThread(JNIEnv* env, jclass unused ATTRIBUTE_UNUSED) {
+  art::ScopedFastNativeObjectAccess soa(env);
+  return soa.AddLocalReference<jobject>(soa.Self()->GetPeer());
+}
+
+JNIEXPORT void JVM_Interrupt(JNIEnv* env, jobject jthread) {
+  art::ScopedFastNativeObjectAccess soa(env);
+  art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
+  art::Thread* thread = art::Thread::FromManagedThread(soa, jthread);
+  if (thread != nullptr) {
+    thread->Interrupt(soa.Self());
+  }
+}
+
+JNIEXPORT jboolean JVM_IsInterrupted(JNIEnv* env, jobject jthread, jboolean clearInterrupted) {
+  if (clearInterrupted) {
+    return static_cast<art::JNIEnvExt*>(env)->self->Interrupted() ? JNI_TRUE : JNI_FALSE;
+  } else {
+    art::ScopedFastNativeObjectAccess soa(env);
+    art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
+    art::Thread* thread = art::Thread::FromManagedThread(soa, jthread);
+    return (thread != nullptr) ? thread->IsInterrupted() : JNI_FALSE;
+  }
+}
+
+JNIEXPORT jboolean JVM_HoldsLock(JNIEnv* env, jclass unused ATTRIBUTE_UNUSED, jobject jobj) {
+  art::ScopedObjectAccess soa(env);
+  art::mirror::Object* object = soa.Decode<art::mirror::Object*>(jobj);
+  if (object == NULL) {
+    art::ThrowNullPointerException("object == null");
+    return JNI_FALSE;
+  }
+  return soa.Self()->HoldsLock(object);
+}
+
+JNIEXPORT void JVM_SetNativeThreadName(JNIEnv* env, jobject jthread, jstring java_name) {
+  ScopedUtfChars name(env, java_name);
+  {
+    art::ScopedObjectAccess soa(env);
+    if (soa.Decode<art::mirror::Object*>(jthread) == soa.Self()->GetPeer()) {
+      soa.Self()->SetThreadName(name.c_str());
+      return;
+    }
+  }
+  // Suspend thread to avoid it from killing itself while we set its name. We don't just hold the
+  // thread list lock to avoid this, as setting the thread name causes mutator to lock/unlock
+  // in the DDMS send code.
+  art::ThreadList* thread_list = art::Runtime::Current()->GetThreadList();
+  bool timed_out;
+  // Take suspend thread lock to avoid races with threads trying to suspend this one.
+  art::Thread* thread;
+  {
+    thread = thread_list->SuspendThreadByPeer(jthread, true, false, &timed_out);
+  }
+  if (thread != NULL) {
+    {
+      art::ScopedObjectAccess soa(env);
+      thread->SetThreadName(name.c_str());
+    }
+    thread_list->Resume(thread, false);
+  } else if (timed_out) {
+    LOG(ERROR) << "Trying to set thread name to '" << name.c_str() << "' failed as the thread "
+        "failed to suspend within a generous timeout.";
+  }
+}
+
+JNIEXPORT jint JVM_IHashCode(JNIEnv* env ATTRIBUTE_UNUSED,
+                             jobject javaObject ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "JVM_IHashCode is not implemented";
+  return 0;
+}
+
+JNIEXPORT jlong JVM_NanoTime(JNIEnv* env ATTRIBUTE_UNUSED, jclass unused ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "JVM_NanoTime is not implemented";
+  return 0L;
+}
+
+JNIEXPORT void JVM_ArrayCopy(JNIEnv* /* env */, jclass /* unused */, jobject /* javaSrc */,
+                             jint /* srcPos */, jobject /* javaDst */, jint /* dstPos */,
+                             jint /* length */) {
+  UNIMPLEMENTED(FATAL) << "JVM_ArrayCopy is not implemented";
+}
+
+JNIEXPORT jint JVM_FindSignal(const char* name ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "JVM_FindSignal is not implemented";
+  return 0;
+}
+
+JNIEXPORT void* JVM_RegisterSignal(jint signum ATTRIBUTE_UNUSED, void* handler ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "JVM_RegisterSignal is not implemented";
+  return nullptr;
+}
+
+JNIEXPORT jboolean JVM_RaiseSignal(jint signum ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "JVM_RaiseSignal is not implemented";
+  return JNI_FALSE;
+}
+
+JNIEXPORT __attribute__((noreturn))  void JVM_Halt(jint code) {
+  exit(code);
+}
+
+JNIEXPORT jboolean JVM_IsNaN(jdouble d) {
+  return isnan(d);
+}
diff --git a/runtime/openjdkjvmti/Android.mk b/runtime/openjdkjvmti/Android.mk
new file mode 100644
index 0000000..1de20e8
--- /dev/null
+++ b/runtime/openjdkjvmti/Android.mk
@@ -0,0 +1,20 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+LOCAL_MODULE := openjdkjvmti-phony
+include $(BUILD_PHONY_PACKAGE)
diff --git a/test/530-checker-loops/expected.txt b/runtime/openjdkjvmti/MODULE_LICENSE_GPL_WITH_CLASSPATH_EXCEPTION
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to runtime/openjdkjvmti/MODULE_LICENSE_GPL_WITH_CLASSPATH_EXCEPTION
diff --git a/runtime/openjdkjvmti/NOTICE b/runtime/openjdkjvmti/NOTICE
new file mode 100644
index 0000000..6ec62cd
--- /dev/null
+++ b/runtime/openjdkjvmti/NOTICE
@@ -0,0 +1,29 @@
+Copyright (C) 2016 The Android Open Source Project
+DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+This file implements interfaces from the file jvmti.h. This implementation
+is licensed under the same terms as the file jvmti.h.  The
+copyright and license information for the file jvmti.h follows.
+
+Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License version 2 only, as
+published by the Free Software Foundation.  Oracle designates this
+particular file as subject to the "Classpath" exception as provided
+by Oracle in the LICENSE file that accompanied this code.
+
+This code is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+version 2 for more details (a copy is included in the LICENSE file that
+accompanied this code).
+
+You should have received a copy of the GNU General Public License version
+2 along with this work; if not, write to the Free Software Foundation,
+Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+or visit www.oracle.com if you need additional information or have any
+questions.
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
new file mode 100644
index 0000000..339c457
--- /dev/null
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -0,0 +1,1120 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include <jni.h>
+#include "openjdkjvmti/jvmti.h"
+
+#include "gc_root-inl.h"
+#include "globals.h"
+#include "jni_env_ext-inl.h"
+#include "scoped_thread_state_change.h"
+#include "thread_list.h"
+
+// TODO Remove this at some point by annotating all the methods. It was put in to make the skeleton
+// easier to create.
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+
+namespace openjdkjvmti {
+
+extern const jvmtiInterface_1 gJvmtiInterface;
+
+// A structure that is a jvmtiEnv with additional information for the runtime.
+struct ArtJvmTiEnv : public jvmtiEnv {
+  art::JavaVMExt* art_vm;
+  void* local_data;
+
+  explicit ArtJvmTiEnv(art::JavaVMExt* runtime) : art_vm(runtime), local_data(nullptr) {
+    functions = &gJvmtiInterface;
+  }
+};
+
+// Macro and constexpr to make error values less annoying to write.
+#define ERR(e) JVMTI_ERROR_ ## e
+static constexpr jvmtiError OK = JVMTI_ERROR_NONE;
+
+// Special error code for unimplemented functions in JVMTI
+static constexpr jvmtiError ERR(NOT_IMPLEMENTED) = JVMTI_ERROR_NOT_AVAILABLE;
+
+class JvmtiFunctions {
+ private:
+  static bool IsValidEnv(jvmtiEnv* env) {
+    return env != nullptr;
+  }
+
+ public:
+  static jvmtiError Allocate(jvmtiEnv* env, jlong size, unsigned char** mem_ptr) {
+    if (!IsValidEnv(env)) {
+      return ERR(INVALID_ENVIRONMENT);
+    }
+    if (mem_ptr == nullptr) {
+      return ERR(NULL_POINTER);
+    }
+    if (size < 0) {
+      return ERR(ILLEGAL_ARGUMENT);
+    } else if (size == 0) {
+      *mem_ptr = nullptr;
+      return OK;
+    }
+    *mem_ptr = static_cast<unsigned char*>(malloc(size));
+    return (*mem_ptr != nullptr) ? OK : ERR(OUT_OF_MEMORY);
+  }
+
+  static jvmtiError Deallocate(jvmtiEnv* env, unsigned char* mem) {
+    if (!IsValidEnv(env)) {
+      return ERR(INVALID_ENVIRONMENT);
+    }
+    if (mem != nullptr) {
+      free(mem);
+    }
+    return OK;
+  }
+
+  static jvmtiError GetThreadState(jvmtiEnv* env, jthread thread, jint* thread_state_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetCurrentThread(jvmtiEnv* env, jthread* thread_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetAllThreads(jvmtiEnv* env, jint* threads_count_ptr, jthread** threads_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SuspendThread(jvmtiEnv* env, jthread thread) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SuspendThreadList(jvmtiEnv* env,
+                                      jint request_count,
+                                      const jthread* request_list,
+                                      jvmtiError* results) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ResumeThread(jvmtiEnv* env, jthread thread) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ResumeThreadList(jvmtiEnv* env,
+                                     jint request_count,
+                                     const jthread* request_list,
+                                     jvmtiError* results) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError StopThread(jvmtiEnv* env, jthread thread, jobject exception) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError InterruptThread(jvmtiEnv* env, jthread thread) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetThreadInfo(jvmtiEnv* env, jthread thread, jvmtiThreadInfo* info_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetOwnedMonitorInfo(jvmtiEnv* env,
+                                        jthread thread,
+                                        jint* owned_monitor_count_ptr,
+                                        jobject** owned_monitors_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetOwnedMonitorStackDepthInfo(jvmtiEnv* env,
+                                                  jthread thread,
+                                                  jint* monitor_info_count_ptr,
+                                                  jvmtiMonitorStackDepthInfo** monitor_info_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetCurrentContendedMonitor(jvmtiEnv* env,
+                                               jthread thread,
+                                               jobject* monitor_ptr) {
+  return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RunAgentThread(jvmtiEnv* env,
+                                   jthread thread,
+                                   jvmtiStartFunction proc,
+                                   const void* arg,
+                                   jint priority) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetThreadLocalStorage(jvmtiEnv* env, jthread thread, const void* data) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetThreadLocalStorage(jvmtiEnv* env, jthread thread, void** data_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetTopThreadGroups(jvmtiEnv* env,
+                                       jint* group_count_ptr,
+                                       jthreadGroup** groups_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetThreadGroupInfo(jvmtiEnv* env,
+                                       jthreadGroup group,
+                                       jvmtiThreadGroupInfo* info_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetThreadGroupChildren(jvmtiEnv* env,
+                                           jthreadGroup group,
+                                           jint* thread_count_ptr,
+                                           jthread** threads_ptr,
+                                           jint* group_count_ptr,
+                                           jthreadGroup** groups_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetStackTrace(jvmtiEnv* env,
+                                  jthread thread,
+                                  jint start_depth,
+                                  jint max_frame_count,
+                                  jvmtiFrameInfo* frame_buffer,
+                                  jint* count_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetAllStackTraces(jvmtiEnv* env,
+                                      jint max_frame_count,
+                                      jvmtiStackInfo** stack_info_ptr,
+                                      jint* thread_count_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetThreadListStackTraces(jvmtiEnv* env,
+                                             jint thread_count,
+                                             const jthread* thread_list,
+                                             jint max_frame_count,
+                                             jvmtiStackInfo** stack_info_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetFrameCount(jvmtiEnv* env, jthread thread, jint* count_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError PopFrame(jvmtiEnv* env, jthread thread) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetFrameLocation(jvmtiEnv* env,
+                                     jthread thread,
+                                     jint depth,
+                                     jmethodID* method_ptr,
+                                     jlocation* location_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError NotifyFramePop(jvmtiEnv* env, jthread thread, jint depth) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ForceEarlyReturnObject(jvmtiEnv* env, jthread thread, jobject value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ForceEarlyReturnInt(jvmtiEnv* env, jthread thread, jint value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ForceEarlyReturnLong(jvmtiEnv* env, jthread thread, jlong value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ForceEarlyReturnFloat(jvmtiEnv* env, jthread thread, jfloat value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ForceEarlyReturnDouble(jvmtiEnv* env, jthread thread, jdouble value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ForceEarlyReturnVoid(jvmtiEnv* env, jthread thread) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError FollowReferences(jvmtiEnv* env,
+                                     jint heap_filter,
+                                     jclass klass,
+                                     jobject initial_object,
+                                     const jvmtiHeapCallbacks* callbacks,
+                                     const void* user_data) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IterateThroughHeap(jvmtiEnv* env,
+                                       jint heap_filter,
+                                       jclass klass,
+                                       const jvmtiHeapCallbacks* callbacks,
+                                       const void* user_data) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetTag(jvmtiEnv* env, jobject object, jlong* tag_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetTag(jvmtiEnv* env, jobject object, jlong tag) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetObjectsWithTags(jvmtiEnv* env,
+                                       jint tag_count,
+                                       const jlong* tags,
+                                       jint* count_ptr,
+                                       jobject** object_result_ptr,
+                                       jlong** tag_result_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ForceGarbageCollection(jvmtiEnv* env) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IterateOverObjectsReachableFromObject(
+      jvmtiEnv* env,
+      jobject object,
+      jvmtiObjectReferenceCallback object_reference_callback,
+      const void* user_data) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IterateOverReachableObjects(jvmtiEnv* env,
+                                                jvmtiHeapRootCallback heap_root_callback,
+                                                jvmtiStackReferenceCallback stack_ref_callback,
+                                                jvmtiObjectReferenceCallback object_ref_callback,
+                                                const void* user_data) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IterateOverHeap(jvmtiEnv* env,
+                                    jvmtiHeapObjectFilter object_filter,
+                                    jvmtiHeapObjectCallback heap_object_callback,
+                                    const void* user_data) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IterateOverInstancesOfClass(jvmtiEnv* env,
+                                                jclass klass,
+                                                jvmtiHeapObjectFilter object_filter,
+                                                jvmtiHeapObjectCallback heap_object_callback,
+                                                const void* user_data) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLocalObject(jvmtiEnv* env,
+                                   jthread thread,
+                                   jint depth,
+                                   jint slot,
+                                   jobject* value_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLocalInstance(jvmtiEnv* env,
+                                     jthread thread,
+                                     jint depth,
+                                     jobject* value_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLocalInt(jvmtiEnv* env,
+                                jthread thread,
+                                jint depth,
+                                jint slot,
+                                jint* value_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLocalLong(jvmtiEnv* env,
+                                 jthread thread,
+                                 jint depth,
+                                 jint slot,
+                                 jlong* value_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLocalFloat(jvmtiEnv* env,
+                                  jthread thread,
+                                  jint depth,
+                                  jint slot,
+                                  jfloat* value_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLocalDouble(jvmtiEnv* env,
+                                   jthread thread,
+                                   jint depth,
+                                   jint slot,
+                                   jdouble* value_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetLocalObject(jvmtiEnv* env,
+                                   jthread thread,
+                                   jint depth,
+                                   jint slot,
+                                   jobject value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetLocalInt(jvmtiEnv* env,
+                                jthread thread,
+                                jint depth,
+                                jint slot,
+                                jint value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetLocalLong(jvmtiEnv* env,
+                                 jthread thread,
+                                 jint depth,
+                                 jint slot,
+                                 jlong value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetLocalFloat(jvmtiEnv* env,
+                                  jthread thread,
+                                  jint depth,
+                                  jint slot,
+                                  jfloat value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetLocalDouble(jvmtiEnv* env,
+                                   jthread thread,
+                                   jint depth,
+                                   jint slot,
+                                   jdouble value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetBreakpoint(jvmtiEnv* env, jmethodID method, jlocation location) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ClearBreakpoint(jvmtiEnv* env, jmethodID method, jlocation location) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetFieldAccessWatch(jvmtiEnv* env, jclass klass, jfieldID field) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ClearFieldAccessWatch(jvmtiEnv* env, jclass klass, jfieldID field) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetFieldModificationWatch(jvmtiEnv* env, jclass klass, jfieldID field) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ClearFieldModificationWatch(jvmtiEnv* env, jclass klass, jfieldID field) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLoadedClasses(jvmtiEnv* env, jint* class_count_ptr, jclass** classes_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetClassLoaderClasses(jvmtiEnv* env,
+                                          jobject initiating_loader,
+                                          jint* class_count_ptr,
+                                          jclass** classes_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetClassSignature(jvmtiEnv* env,
+                                      jclass klass,
+                                      char** signature_ptr,
+                                      char** generic_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetClassStatus(jvmtiEnv* env, jclass klass, jint* status_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetSourceFileName(jvmtiEnv* env, jclass klass, char** source_name_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetClassModifiers(jvmtiEnv* env, jclass klass, jint* modifiers_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetClassMethods(jvmtiEnv* env,
+                                    jclass klass,
+                                    jint* method_count_ptr,
+                                    jmethodID** methods_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetClassFields(jvmtiEnv* env,
+                                   jclass klass,
+                                   jint* field_count_ptr,
+                                   jfieldID** fields_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetImplementedInterfaces(jvmtiEnv* env,
+                                             jclass klass,
+                                             jint* interface_count_ptr,
+                                             jclass** interfaces_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetClassVersionNumbers(jvmtiEnv* env,
+                                           jclass klass,
+                                           jint* minor_version_ptr,
+                                           jint* major_version_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetConstantPool(jvmtiEnv* env,
+                                    jclass klass,
+                                    jint* constant_pool_count_ptr,
+                                    jint* constant_pool_byte_count_ptr,
+                                    unsigned char** constant_pool_bytes_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IsInterface(jvmtiEnv* env, jclass klass, jboolean* is_interface_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IsArrayClass(jvmtiEnv* env,
+                                 jclass klass,
+                                 jboolean* is_array_class_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IsModifiableClass(jvmtiEnv* env,
+                                      jclass klass,
+                                      jboolean* is_modifiable_class_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetClassLoader(jvmtiEnv* env, jclass klass, jobject* classloader_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetSourceDebugExtension(jvmtiEnv* env,
+                                            jclass klass,
+                                            char** source_debug_extension_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RetransformClasses(jvmtiEnv* env, jint class_count, const jclass* classes) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RedefineClasses(jvmtiEnv* env,
+                                    jint class_count,
+                                    const jvmtiClassDefinition* class_definitions) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetObjectSize(jvmtiEnv* env, jobject object, jlong* size_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetObjectHashCode(jvmtiEnv* env, jobject object, jint* hash_code_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetObjectMonitorUsage(jvmtiEnv* env,
+                                          jobject object,
+                                          jvmtiMonitorUsage* info_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetFieldName(jvmtiEnv* env,
+                                 jclass klass,
+                                 jfieldID field,
+                                 char** name_ptr,
+                                 char** signature_ptr,
+                                 char** generic_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetFieldDeclaringClass(jvmtiEnv* env,
+                                           jclass klass,
+                                           jfieldID field,
+                                           jclass* declaring_class_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetFieldModifiers(jvmtiEnv* env,
+                                      jclass klass,
+                                      jfieldID field,
+                                      jint* modifiers_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IsFieldSynthetic(jvmtiEnv* env,
+                                     jclass klass,
+                                     jfieldID field,
+                                     jboolean* is_synthetic_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetMethodName(jvmtiEnv* env,
+                                  jmethodID method,
+                                  char** name_ptr,
+                                  char** signature_ptr,
+                                  char** generic_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetMethodDeclaringClass(jvmtiEnv* env,
+                                            jmethodID method,
+                                            jclass* declaring_class_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetMethodModifiers(jvmtiEnv* env,
+                                       jmethodID method,
+                                       jint* modifiers_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetMaxLocals(jvmtiEnv* env,
+                                 jmethodID method,
+                                 jint* max_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetArgumentsSize(jvmtiEnv* env,
+                                     jmethodID method,
+                                     jint* size_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLineNumberTable(jvmtiEnv* env,
+                                       jmethodID method,
+                                       jint* entry_count_ptr,
+                                       jvmtiLineNumberEntry** table_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetMethodLocation(jvmtiEnv* env,
+                                      jmethodID method,
+                                      jlocation* start_location_ptr,
+                                      jlocation* end_location_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLocalVariableTable(jvmtiEnv* env,
+                                          jmethodID method,
+                                          jint* entry_count_ptr,
+                                          jvmtiLocalVariableEntry** table_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetBytecodes(jvmtiEnv* env,
+                                 jmethodID method,
+                                 jint* bytecode_count_ptr,
+                                 unsigned char** bytecodes_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IsMethodNative(jvmtiEnv* env, jmethodID method, jboolean* is_native_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IsMethodSynthetic(jvmtiEnv* env, jmethodID method, jboolean* is_synthetic_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IsMethodObsolete(jvmtiEnv* env, jmethodID method, jboolean* is_obsolete_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetNativeMethodPrefix(jvmtiEnv* env, const char* prefix) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetNativeMethodPrefixes(jvmtiEnv* env, jint prefix_count, char** prefixes) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError CreateRawMonitor(jvmtiEnv* env, const char* name, jrawMonitorID* monitor_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError DestroyRawMonitor(jvmtiEnv* env, jrawMonitorID monitor) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RawMonitorEnter(jvmtiEnv* env, jrawMonitorID monitor) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RawMonitorExit(jvmtiEnv* env, jrawMonitorID monitor) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RawMonitorWait(jvmtiEnv* env, jrawMonitorID monitor, jlong millis) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RawMonitorNotify(jvmtiEnv* env, jrawMonitorID monitor) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RawMonitorNotifyAll(jvmtiEnv* env, jrawMonitorID monitor) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetJNIFunctionTable(jvmtiEnv* env, const jniNativeInterface* function_table) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetJNIFunctionTable(jvmtiEnv* env, jniNativeInterface** function_table) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetEventCallbacks(jvmtiEnv* env,
+                                      const jvmtiEventCallbacks* callbacks,
+                                      jint size_of_callbacks) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetEventNotificationMode(jvmtiEnv* env,
+                                             jvmtiEventMode mode,
+                                             jvmtiEvent event_type,
+                                             jthread event_thread,
+                                             ...) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GenerateEvents(jvmtiEnv* env, jvmtiEvent event_type) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetExtensionFunctions(jvmtiEnv* env,
+                                          jint* extension_count_ptr,
+                                          jvmtiExtensionFunctionInfo** extensions) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetExtensionEvents(jvmtiEnv* env,
+                                       jint* extension_count_ptr,
+                                       jvmtiExtensionEventInfo** extensions) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetExtensionEventCallback(jvmtiEnv* env,
+                                              jint extension_event_index,
+                                              jvmtiExtensionEvent callback) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetPotentialCapabilities(jvmtiEnv* env, jvmtiCapabilities* capabilities_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError AddCapabilities(jvmtiEnv* env, const jvmtiCapabilities* capabilities_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RelinquishCapabilities(jvmtiEnv* env,
+                                           const jvmtiCapabilities* capabilities_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetCapabilities(jvmtiEnv* env, jvmtiCapabilities* capabilities_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetCurrentThreadCpuTimerInfo(jvmtiEnv* env, jvmtiTimerInfo* info_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetCurrentThreadCpuTime(jvmtiEnv* env, jlong* nanos_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetThreadCpuTimerInfo(jvmtiEnv* env, jvmtiTimerInfo* info_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetThreadCpuTime(jvmtiEnv* env, jthread thread, jlong* nanos_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetTimerInfo(jvmtiEnv* env, jvmtiTimerInfo* info_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetTime(jvmtiEnv* env, jlong* nanos_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetAvailableProcessors(jvmtiEnv* env, jint* processor_count_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError AddToBootstrapClassLoaderSearch(jvmtiEnv* env, const char* segment) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError AddToSystemClassLoaderSearch(jvmtiEnv* env, const char* segment) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetSystemProperties(jvmtiEnv* env, jint* count_ptr, char*** property_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetSystemProperty(jvmtiEnv* env, const char* property, char** value_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetSystemProperty(jvmtiEnv* env, const char* property, const char* value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetPhase(jvmtiEnv* env, jvmtiPhase* phase_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError DisposeEnvironment(jvmtiEnv* env) {
+    if (!IsValidEnv(env)) {
+      return ERR(INVALID_ENVIRONMENT);
+    }
+    delete env;
+    return OK;
+  }
+
+  static jvmtiError SetEnvironmentLocalStorage(jvmtiEnv* env, const void* data) {
+    if (!IsValidEnv(env)) {
+      return ERR(INVALID_ENVIRONMENT);
+    }
+    reinterpret_cast<ArtJvmTiEnv*>(env)->local_data = const_cast<void*>(data);
+    return OK;
+  }
+
+  static jvmtiError GetEnvironmentLocalStorage(jvmtiEnv* env, void** data_ptr) {
+    if (!IsValidEnv(env)) {
+      return ERR(INVALID_ENVIRONMENT);
+    }
+    *data_ptr = reinterpret_cast<ArtJvmTiEnv*>(env)->local_data;
+    return OK;
+  }
+
+  static jvmtiError GetVersionNumber(jvmtiEnv* env, jint* version_ptr) {
+    if (!IsValidEnv(env)) {
+      return ERR(INVALID_ENVIRONMENT);
+    }
+    *version_ptr = JVMTI_VERSION;
+    return OK;
+  }
+
+  static jvmtiError GetErrorName(jvmtiEnv* env, jvmtiError error,  char** name_ptr) {
+    if (!IsValidEnv(env)) {
+      return ERR(INVALID_ENVIRONMENT);
+    }
+    if (name_ptr == nullptr) {
+      return ERR(NULL_POINTER);
+    }
+    switch (error) {
+#define ERROR_CASE(e) case (JVMTI_ERROR_ ## e) : do { \
+          *name_ptr = const_cast<char*>("JVMTI_ERROR_"#e); \
+          return OK; \
+        } while (false)
+      ERROR_CASE(NONE);
+      ERROR_CASE(INVALID_THREAD);
+      ERROR_CASE(INVALID_THREAD_GROUP);
+      ERROR_CASE(INVALID_PRIORITY);
+      ERROR_CASE(THREAD_NOT_SUSPENDED);
+      ERROR_CASE(THREAD_NOT_ALIVE);
+      ERROR_CASE(INVALID_OBJECT);
+      ERROR_CASE(INVALID_CLASS);
+      ERROR_CASE(CLASS_NOT_PREPARED);
+      ERROR_CASE(INVALID_METHODID);
+      ERROR_CASE(INVALID_LOCATION);
+      ERROR_CASE(INVALID_FIELDID);
+      ERROR_CASE(NO_MORE_FRAMES);
+      ERROR_CASE(OPAQUE_FRAME);
+      ERROR_CASE(TYPE_MISMATCH);
+      ERROR_CASE(INVALID_SLOT);
+      ERROR_CASE(DUPLICATE);
+      ERROR_CASE(NOT_FOUND);
+      ERROR_CASE(INVALID_MONITOR);
+      ERROR_CASE(NOT_MONITOR_OWNER);
+      ERROR_CASE(INTERRUPT);
+      ERROR_CASE(INVALID_CLASS_FORMAT);
+      ERROR_CASE(CIRCULAR_CLASS_DEFINITION);
+      ERROR_CASE(FAILS_VERIFICATION);
+      ERROR_CASE(UNSUPPORTED_REDEFINITION_METHOD_ADDED);
+      ERROR_CASE(UNSUPPORTED_REDEFINITION_SCHEMA_CHANGED);
+      ERROR_CASE(INVALID_TYPESTATE);
+      ERROR_CASE(UNSUPPORTED_REDEFINITION_HIERARCHY_CHANGED);
+      ERROR_CASE(UNSUPPORTED_REDEFINITION_METHOD_DELETED);
+      ERROR_CASE(UNSUPPORTED_VERSION);
+      ERROR_CASE(NAMES_DONT_MATCH);
+      ERROR_CASE(UNSUPPORTED_REDEFINITION_CLASS_MODIFIERS_CHANGED);
+      ERROR_CASE(UNSUPPORTED_REDEFINITION_METHOD_MODIFIERS_CHANGED);
+      ERROR_CASE(UNMODIFIABLE_CLASS);
+      ERROR_CASE(NOT_AVAILABLE);
+      ERROR_CASE(MUST_POSSESS_CAPABILITY);
+      ERROR_CASE(NULL_POINTER);
+      ERROR_CASE(ABSENT_INFORMATION);
+      ERROR_CASE(INVALID_EVENT_TYPE);
+      ERROR_CASE(ILLEGAL_ARGUMENT);
+      ERROR_CASE(NATIVE_METHOD);
+      ERROR_CASE(CLASS_LOADER_UNSUPPORTED);
+      ERROR_CASE(OUT_OF_MEMORY);
+      ERROR_CASE(ACCESS_DENIED);
+      ERROR_CASE(WRONG_PHASE);
+      ERROR_CASE(INTERNAL);
+      ERROR_CASE(UNATTACHED_THREAD);
+      ERROR_CASE(INVALID_ENVIRONMENT);
+#undef ERROR_CASE
+      default: {
+        *name_ptr = const_cast<char*>("JVMTI_ERROR_UNKNOWN");
+        return ERR(ILLEGAL_ARGUMENT);
+      }
+    }
+  }
+
+  static jvmtiError SetVerboseFlag(jvmtiEnv* env, jvmtiVerboseFlag flag, jboolean value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetJLocationFormat(jvmtiEnv* env, jvmtiJlocationFormat* format_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+};
+
+static bool IsJvmtiVersion(jint version) {
+  return version ==  JVMTI_VERSION_1 ||
+         version == JVMTI_VERSION_1_0 ||
+         version == JVMTI_VERSION_1_1 ||
+         version == JVMTI_VERSION_1_2 ||
+         version == JVMTI_VERSION;
+}
+
+// Creates a jvmtiEnv and returns it with the art::ti::Env that is associated with it. new_art_ti
+// is a pointer to the uninitialized memory for an art::ti::Env.
+static void CreateArtJvmTiEnv(art::JavaVMExt* vm, /*out*/void** new_jvmtiEnv) {
+  struct ArtJvmTiEnv* env = new ArtJvmTiEnv(vm);
+  *new_jvmtiEnv = env;
+}
+
+// A hook that the runtime uses to allow plugins to handle GetEnv calls. It returns true and
+// places the return value in 'env' if this library can handle the GetEnv request. Otherwise
+// returns false and does not modify the 'env' pointer.
+static jint GetEnvHandler(art::JavaVMExt* vm, /*out*/void** env, jint version) {
+  if (IsJvmtiVersion(version)) {
+    CreateArtJvmTiEnv(vm, env);
+    return JNI_OK;
+  } else {
+    printf("version 0x%x is not valid!", version);
+    return JNI_EVERSION;
+  }
+}
+
+// The plugin initialization function. This adds the jvmti environment.
+extern "C" bool ArtPlugin_Initialize() {
+  art::Runtime::Current()->GetJavaVM()->AddEnvironmentHook(GetEnvHandler);
+  return true;
+}
+
+// The actual struct holding all of the entrypoints into the jvmti interface.
+const jvmtiInterface_1 gJvmtiInterface = {
+  nullptr,  // reserved1
+  JvmtiFunctions::SetEventNotificationMode,
+  nullptr,  // reserved3
+  JvmtiFunctions::GetAllThreads,
+  JvmtiFunctions::SuspendThread,
+  JvmtiFunctions::ResumeThread,
+  JvmtiFunctions::StopThread,
+  JvmtiFunctions::InterruptThread,
+  JvmtiFunctions::GetThreadInfo,
+  JvmtiFunctions::GetOwnedMonitorInfo,  // 10
+  JvmtiFunctions::GetCurrentContendedMonitor,
+  JvmtiFunctions::RunAgentThread,
+  JvmtiFunctions::GetTopThreadGroups,
+  JvmtiFunctions::GetThreadGroupInfo,
+  JvmtiFunctions::GetThreadGroupChildren,
+  JvmtiFunctions::GetFrameCount,
+  JvmtiFunctions::GetThreadState,
+  JvmtiFunctions::GetCurrentThread,
+  JvmtiFunctions::GetFrameLocation,
+  JvmtiFunctions::NotifyFramePop,  // 20
+  JvmtiFunctions::GetLocalObject,
+  JvmtiFunctions::GetLocalInt,
+  JvmtiFunctions::GetLocalLong,
+  JvmtiFunctions::GetLocalFloat,
+  JvmtiFunctions::GetLocalDouble,
+  JvmtiFunctions::SetLocalObject,
+  JvmtiFunctions::SetLocalInt,
+  JvmtiFunctions::SetLocalLong,
+  JvmtiFunctions::SetLocalFloat,
+  JvmtiFunctions::SetLocalDouble,  // 30
+  JvmtiFunctions::CreateRawMonitor,
+  JvmtiFunctions::DestroyRawMonitor,
+  JvmtiFunctions::RawMonitorEnter,
+  JvmtiFunctions::RawMonitorExit,
+  JvmtiFunctions::RawMonitorWait,
+  JvmtiFunctions::RawMonitorNotify,
+  JvmtiFunctions::RawMonitorNotifyAll,
+  JvmtiFunctions::SetBreakpoint,
+  JvmtiFunctions::ClearBreakpoint,
+  nullptr,  // reserved40
+  JvmtiFunctions::SetFieldAccessWatch,
+  JvmtiFunctions::ClearFieldAccessWatch,
+  JvmtiFunctions::SetFieldModificationWatch,
+  JvmtiFunctions::ClearFieldModificationWatch,
+  JvmtiFunctions::IsModifiableClass,
+  JvmtiFunctions::Allocate,
+  JvmtiFunctions::Deallocate,
+  JvmtiFunctions::GetClassSignature,
+  JvmtiFunctions::GetClassStatus,
+  JvmtiFunctions::GetSourceFileName,  // 50
+  JvmtiFunctions::GetClassModifiers,
+  JvmtiFunctions::GetClassMethods,
+  JvmtiFunctions::GetClassFields,
+  JvmtiFunctions::GetImplementedInterfaces,
+  JvmtiFunctions::IsInterface,
+  JvmtiFunctions::IsArrayClass,
+  JvmtiFunctions::GetClassLoader,
+  JvmtiFunctions::GetObjectHashCode,
+  JvmtiFunctions::GetObjectMonitorUsage,
+  JvmtiFunctions::GetFieldName,  // 60
+  JvmtiFunctions::GetFieldDeclaringClass,
+  JvmtiFunctions::GetFieldModifiers,
+  JvmtiFunctions::IsFieldSynthetic,
+  JvmtiFunctions::GetMethodName,
+  JvmtiFunctions::GetMethodDeclaringClass,
+  JvmtiFunctions::GetMethodModifiers,
+  nullptr,  // reserved67
+  JvmtiFunctions::GetMaxLocals,
+  JvmtiFunctions::GetArgumentsSize,
+  JvmtiFunctions::GetLineNumberTable,  // 70
+  JvmtiFunctions::GetMethodLocation,
+  JvmtiFunctions::GetLocalVariableTable,
+  JvmtiFunctions::SetNativeMethodPrefix,
+  JvmtiFunctions::SetNativeMethodPrefixes,
+  JvmtiFunctions::GetBytecodes,
+  JvmtiFunctions::IsMethodNative,
+  JvmtiFunctions::IsMethodSynthetic,
+  JvmtiFunctions::GetLoadedClasses,
+  JvmtiFunctions::GetClassLoaderClasses,
+  JvmtiFunctions::PopFrame,  // 80
+  JvmtiFunctions::ForceEarlyReturnObject,
+  JvmtiFunctions::ForceEarlyReturnInt,
+  JvmtiFunctions::ForceEarlyReturnLong,
+  JvmtiFunctions::ForceEarlyReturnFloat,
+  JvmtiFunctions::ForceEarlyReturnDouble,
+  JvmtiFunctions::ForceEarlyReturnVoid,
+  JvmtiFunctions::RedefineClasses,
+  JvmtiFunctions::GetVersionNumber,
+  JvmtiFunctions::GetCapabilities,
+  JvmtiFunctions::GetSourceDebugExtension,  // 90
+  JvmtiFunctions::IsMethodObsolete,
+  JvmtiFunctions::SuspendThreadList,
+  JvmtiFunctions::ResumeThreadList,
+  nullptr,  // reserved94
+  nullptr,  // reserved95
+  nullptr,  // reserved96
+  nullptr,  // reserved97
+  nullptr,  // reserved98
+  nullptr,  // reserved99
+  JvmtiFunctions::GetAllStackTraces,  // 100
+  JvmtiFunctions::GetThreadListStackTraces,
+  JvmtiFunctions::GetThreadLocalStorage,
+  JvmtiFunctions::SetThreadLocalStorage,
+  JvmtiFunctions::GetStackTrace,
+  nullptr,  // reserved105
+  JvmtiFunctions::GetTag,
+  JvmtiFunctions::SetTag,
+  JvmtiFunctions::ForceGarbageCollection,
+  JvmtiFunctions::IterateOverObjectsReachableFromObject,
+  JvmtiFunctions::IterateOverReachableObjects,  // 110
+  JvmtiFunctions::IterateOverHeap,
+  JvmtiFunctions::IterateOverInstancesOfClass,
+  nullptr,  // reserved113
+  JvmtiFunctions::GetObjectsWithTags,
+  JvmtiFunctions::FollowReferences,
+  JvmtiFunctions::IterateThroughHeap,
+  nullptr,  // reserved117
+  nullptr,  // reserved118
+  nullptr,  // reserved119
+  JvmtiFunctions::SetJNIFunctionTable,  // 120
+  JvmtiFunctions::GetJNIFunctionTable,
+  JvmtiFunctions::SetEventCallbacks,
+  JvmtiFunctions::GenerateEvents,
+  JvmtiFunctions::GetExtensionFunctions,
+  JvmtiFunctions::GetExtensionEvents,
+  JvmtiFunctions::SetExtensionEventCallback,
+  JvmtiFunctions::DisposeEnvironment,
+  JvmtiFunctions::GetErrorName,
+  JvmtiFunctions::GetJLocationFormat,
+  JvmtiFunctions::GetSystemProperties,  // 130
+  JvmtiFunctions::GetSystemProperty,
+  JvmtiFunctions::SetSystemProperty,
+  JvmtiFunctions::GetPhase,
+  JvmtiFunctions::GetCurrentThreadCpuTimerInfo,
+  JvmtiFunctions::GetCurrentThreadCpuTime,
+  JvmtiFunctions::GetThreadCpuTimerInfo,
+  JvmtiFunctions::GetThreadCpuTime,
+  JvmtiFunctions::GetTimerInfo,
+  JvmtiFunctions::GetTime,
+  JvmtiFunctions::GetPotentialCapabilities,  // 140
+  nullptr,  // reserved141
+  JvmtiFunctions::AddCapabilities,
+  JvmtiFunctions::RelinquishCapabilities,
+  JvmtiFunctions::GetAvailableProcessors,
+  JvmtiFunctions::GetClassVersionNumbers,
+  JvmtiFunctions::GetConstantPool,
+  JvmtiFunctions::GetEnvironmentLocalStorage,
+  JvmtiFunctions::SetEnvironmentLocalStorage,
+  JvmtiFunctions::AddToBootstrapClassLoaderSearch,
+  JvmtiFunctions::SetVerboseFlag,  // 150
+  JvmtiFunctions::AddToSystemClassLoaderSearch,
+  JvmtiFunctions::RetransformClasses,
+  JvmtiFunctions::GetOwnedMonitorStackDepthInfo,
+  JvmtiFunctions::GetObjectSize,
+  JvmtiFunctions::GetLocalInstance,
+};
+
+};  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/README.md b/runtime/openjdkjvmti/README.md
new file mode 100644
index 0000000..b8bab57
--- /dev/null
+++ b/runtime/openjdkjvmti/README.md
@@ -0,0 +1,7 @@
+openjdkjvmti plugin
+====
+
+This is a partial implementation of the JVMTI v1.2 interface for the android
+runtime as a plugin. This allows the use of agents that can modify the running
+state of the program by modifying dex files in memory and performing other
+operations on the global runtime state.
diff --git a/runtime/openjdkjvmti/jvmti.h b/runtime/openjdkjvmti/jvmti.h
new file mode 100644
index 0000000..ee708cb
--- /dev/null
+++ b/runtime/openjdkjvmti/jvmti.h
@@ -0,0 +1,2534 @@
+/*
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+    /* AUTOMATICALLY GENERATED FILE - DO NOT EDIT */
+
+
+    /* Include file for the Java(tm) Virtual Machine Tool Interface */
+
+#ifndef _JAVA_JVMTI_H_
+#define _JAVA_JVMTI_H_
+
+#include "jni.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+    JVMTI_VERSION_1   = 0x30010000,
+    JVMTI_VERSION_1_0 = 0x30010000,
+    JVMTI_VERSION_1_1 = 0x30010100,
+    JVMTI_VERSION_1_2 = 0x30010200,
+
+    JVMTI_VERSION = 0x30000000 + (1 * 0x10000) + (2 * 0x100) + 1  /* version: 1.2.1 */
+};
+
+JNIEXPORT jint JNICALL
+Agent_OnLoad(JavaVM *vm, char *options, void *reserved);
+
+JNIEXPORT jint JNICALL
+Agent_OnAttach(JavaVM* vm, char* options, void* reserved);
+
+JNIEXPORT void JNICALL
+Agent_OnUnload(JavaVM *vm);
+
+    /* Forward declaration of the environment */
+
+struct _jvmtiEnv;
+
+struct jvmtiInterface_1_;
+
+#ifdef __cplusplus
+typedef _jvmtiEnv jvmtiEnv;
+#else
+typedef const struct jvmtiInterface_1_ *jvmtiEnv;
+#endif /* __cplusplus */
+
+/* Derived Base Types */
+
+typedef jobject jthread;
+typedef jobject jthreadGroup;
+typedef jlong jlocation;
+struct _jrawMonitorID;
+typedef struct _jrawMonitorID *jrawMonitorID;
+typedef struct JNINativeInterface_ jniNativeInterface;
+
+    /* Constants */
+
+
+    /* Thread State Flags */
+
+enum {
+    JVMTI_THREAD_STATE_ALIVE = 0x0001,
+    JVMTI_THREAD_STATE_TERMINATED = 0x0002,
+    JVMTI_THREAD_STATE_RUNNABLE = 0x0004,
+    JVMTI_THREAD_STATE_BLOCKED_ON_MONITOR_ENTER = 0x0400,
+    JVMTI_THREAD_STATE_WAITING = 0x0080,
+    JVMTI_THREAD_STATE_WAITING_INDEFINITELY = 0x0010,
+    JVMTI_THREAD_STATE_WAITING_WITH_TIMEOUT = 0x0020,
+    JVMTI_THREAD_STATE_SLEEPING = 0x0040,
+    JVMTI_THREAD_STATE_IN_OBJECT_WAIT = 0x0100,
+    JVMTI_THREAD_STATE_PARKED = 0x0200,
+    JVMTI_THREAD_STATE_SUSPENDED = 0x100000,
+    JVMTI_THREAD_STATE_INTERRUPTED = 0x200000,
+    JVMTI_THREAD_STATE_IN_NATIVE = 0x400000,
+    JVMTI_THREAD_STATE_VENDOR_1 = 0x10000000,
+    JVMTI_THREAD_STATE_VENDOR_2 = 0x20000000,
+    JVMTI_THREAD_STATE_VENDOR_3 = 0x40000000
+};
+
+    /* java.lang.Thread.State Conversion Masks */
+
+enum {
+    JVMTI_JAVA_LANG_THREAD_STATE_MASK = JVMTI_THREAD_STATE_TERMINATED | JVMTI_THREAD_STATE_ALIVE | JVMTI_THREAD_STATE_RUNNABLE | JVMTI_THREAD_STATE_BLOCKED_ON_MONITOR_ENTER | JVMTI_THREAD_STATE_WAITING | JVMTI_THREAD_STATE_WAITING_INDEFINITELY | JVMTI_THREAD_STATE_WAITING_WITH_TIMEOUT,
+    JVMTI_JAVA_LANG_THREAD_STATE_NEW = 0,
+    JVMTI_JAVA_LANG_THREAD_STATE_TERMINATED = JVMTI_THREAD_STATE_TERMINATED,
+    JVMTI_JAVA_LANG_THREAD_STATE_RUNNABLE = JVMTI_THREAD_STATE_ALIVE | JVMTI_THREAD_STATE_RUNNABLE,
+    JVMTI_JAVA_LANG_THREAD_STATE_BLOCKED = JVMTI_THREAD_STATE_ALIVE | JVMTI_THREAD_STATE_BLOCKED_ON_MONITOR_ENTER,
+    JVMTI_JAVA_LANG_THREAD_STATE_WAITING = JVMTI_THREAD_STATE_ALIVE | JVMTI_THREAD_STATE_WAITING | JVMTI_THREAD_STATE_WAITING_INDEFINITELY,
+    JVMTI_JAVA_LANG_THREAD_STATE_TIMED_WAITING = JVMTI_THREAD_STATE_ALIVE | JVMTI_THREAD_STATE_WAITING | JVMTI_THREAD_STATE_WAITING_WITH_TIMEOUT
+};
+
+    /* Thread Priority Constants */
+
+enum {
+    JVMTI_THREAD_MIN_PRIORITY = 1,
+    JVMTI_THREAD_NORM_PRIORITY = 5,
+    JVMTI_THREAD_MAX_PRIORITY = 10
+};
+
+    /* Heap Filter Flags */
+
+enum {
+    JVMTI_HEAP_FILTER_TAGGED = 0x4,
+    JVMTI_HEAP_FILTER_UNTAGGED = 0x8,
+    JVMTI_HEAP_FILTER_CLASS_TAGGED = 0x10,
+    JVMTI_HEAP_FILTER_CLASS_UNTAGGED = 0x20
+};
+
+    /* Heap Visit Control Flags */
+
+enum {
+    JVMTI_VISIT_OBJECTS = 0x100,
+    JVMTI_VISIT_ABORT = 0x8000
+};
+
+    /* Heap Reference Enumeration */
+
+typedef enum {
+    JVMTI_HEAP_REFERENCE_CLASS = 1,
+    JVMTI_HEAP_REFERENCE_FIELD = 2,
+    JVMTI_HEAP_REFERENCE_ARRAY_ELEMENT = 3,
+    JVMTI_HEAP_REFERENCE_CLASS_LOADER = 4,
+    JVMTI_HEAP_REFERENCE_SIGNERS = 5,
+    JVMTI_HEAP_REFERENCE_PROTECTION_DOMAIN = 6,
+    JVMTI_HEAP_REFERENCE_INTERFACE = 7,
+    JVMTI_HEAP_REFERENCE_STATIC_FIELD = 8,
+    JVMTI_HEAP_REFERENCE_CONSTANT_POOL = 9,
+    JVMTI_HEAP_REFERENCE_SUPERCLASS = 10,
+    JVMTI_HEAP_REFERENCE_JNI_GLOBAL = 21,
+    JVMTI_HEAP_REFERENCE_SYSTEM_CLASS = 22,
+    JVMTI_HEAP_REFERENCE_MONITOR = 23,
+    JVMTI_HEAP_REFERENCE_STACK_LOCAL = 24,
+    JVMTI_HEAP_REFERENCE_JNI_LOCAL = 25,
+    JVMTI_HEAP_REFERENCE_THREAD = 26,
+    JVMTI_HEAP_REFERENCE_OTHER = 27
+} jvmtiHeapReferenceKind;
+
+    /* Primitive Type Enumeration */
+
+typedef enum {
+    JVMTI_PRIMITIVE_TYPE_BOOLEAN = 90,
+    JVMTI_PRIMITIVE_TYPE_BYTE = 66,
+    JVMTI_PRIMITIVE_TYPE_CHAR = 67,
+    JVMTI_PRIMITIVE_TYPE_SHORT = 83,
+    JVMTI_PRIMITIVE_TYPE_INT = 73,
+    JVMTI_PRIMITIVE_TYPE_LONG = 74,
+    JVMTI_PRIMITIVE_TYPE_FLOAT = 70,
+    JVMTI_PRIMITIVE_TYPE_DOUBLE = 68
+} jvmtiPrimitiveType;
+
+    /* Heap Object Filter Enumeration */
+
+typedef enum {
+    JVMTI_HEAP_OBJECT_TAGGED = 1,
+    JVMTI_HEAP_OBJECT_UNTAGGED = 2,
+    JVMTI_HEAP_OBJECT_EITHER = 3
+} jvmtiHeapObjectFilter;
+
+    /* Heap Root Kind Enumeration */
+
+typedef enum {
+    JVMTI_HEAP_ROOT_JNI_GLOBAL = 1,
+    JVMTI_HEAP_ROOT_SYSTEM_CLASS = 2,
+    JVMTI_HEAP_ROOT_MONITOR = 3,
+    JVMTI_HEAP_ROOT_STACK_LOCAL = 4,
+    JVMTI_HEAP_ROOT_JNI_LOCAL = 5,
+    JVMTI_HEAP_ROOT_THREAD = 6,
+    JVMTI_HEAP_ROOT_OTHER = 7
+} jvmtiHeapRootKind;
+
+    /* Object Reference Enumeration */
+
+typedef enum {
+    JVMTI_REFERENCE_CLASS = 1,
+    JVMTI_REFERENCE_FIELD = 2,
+    JVMTI_REFERENCE_ARRAY_ELEMENT = 3,
+    JVMTI_REFERENCE_CLASS_LOADER = 4,
+    JVMTI_REFERENCE_SIGNERS = 5,
+    JVMTI_REFERENCE_PROTECTION_DOMAIN = 6,
+    JVMTI_REFERENCE_INTERFACE = 7,
+    JVMTI_REFERENCE_STATIC_FIELD = 8,
+    JVMTI_REFERENCE_CONSTANT_POOL = 9
+} jvmtiObjectReferenceKind;
+
+    /* Iteration Control Enumeration */
+
+typedef enum {
+    JVMTI_ITERATION_CONTINUE = 1,
+    JVMTI_ITERATION_IGNORE = 2,
+    JVMTI_ITERATION_ABORT = 0
+} jvmtiIterationControl;
+
+    /* Class Status Flags */
+
+enum {
+    JVMTI_CLASS_STATUS_VERIFIED = 1,
+    JVMTI_CLASS_STATUS_PREPARED = 2,
+    JVMTI_CLASS_STATUS_INITIALIZED = 4,
+    JVMTI_CLASS_STATUS_ERROR = 8,
+    JVMTI_CLASS_STATUS_ARRAY = 16,
+    JVMTI_CLASS_STATUS_PRIMITIVE = 32
+};
+
+    /* Event Enable/Disable */
+
+typedef enum {
+    JVMTI_ENABLE = 1,
+    JVMTI_DISABLE = 0
+} jvmtiEventMode;
+
+    /* Extension Function/Event Parameter Types */
+
+typedef enum {
+    JVMTI_TYPE_JBYTE = 101,
+    JVMTI_TYPE_JCHAR = 102,
+    JVMTI_TYPE_JSHORT = 103,
+    JVMTI_TYPE_JINT = 104,
+    JVMTI_TYPE_JLONG = 105,
+    JVMTI_TYPE_JFLOAT = 106,
+    JVMTI_TYPE_JDOUBLE = 107,
+    JVMTI_TYPE_JBOOLEAN = 108,
+    JVMTI_TYPE_JOBJECT = 109,
+    JVMTI_TYPE_JTHREAD = 110,
+    JVMTI_TYPE_JCLASS = 111,
+    JVMTI_TYPE_JVALUE = 112,
+    JVMTI_TYPE_JFIELDID = 113,
+    JVMTI_TYPE_JMETHODID = 114,
+    JVMTI_TYPE_CCHAR = 115,
+    JVMTI_TYPE_CVOID = 116,
+    JVMTI_TYPE_JNIENV = 117
+} jvmtiParamTypes;
+
+    /* Extension Function/Event Parameter Kinds */
+
+typedef enum {
+    JVMTI_KIND_IN = 91,
+    JVMTI_KIND_IN_PTR = 92,
+    JVMTI_KIND_IN_BUF = 93,
+    JVMTI_KIND_ALLOC_BUF = 94,
+    JVMTI_KIND_ALLOC_ALLOC_BUF = 95,
+    JVMTI_KIND_OUT = 96,
+    JVMTI_KIND_OUT_BUF = 97
+} jvmtiParamKind;
+
+    /* Timer Kinds */
+
+typedef enum {
+    JVMTI_TIMER_USER_CPU = 30,
+    JVMTI_TIMER_TOTAL_CPU = 31,
+    JVMTI_TIMER_ELAPSED = 32
+} jvmtiTimerKind;
+
+    /* Phases of execution */
+
+typedef enum {
+    JVMTI_PHASE_ONLOAD = 1,
+    JVMTI_PHASE_PRIMORDIAL = 2,
+    JVMTI_PHASE_START = 6,
+    JVMTI_PHASE_LIVE = 4,
+    JVMTI_PHASE_DEAD = 8
+} jvmtiPhase;
+
+    /* Version Interface Types */
+
+enum {
+    JVMTI_VERSION_INTERFACE_JNI = 0x00000000,
+    JVMTI_VERSION_INTERFACE_JVMTI = 0x30000000
+};
+
+    /* Version Masks */
+
+enum {
+    JVMTI_VERSION_MASK_INTERFACE_TYPE = 0x70000000,
+    JVMTI_VERSION_MASK_MAJOR = 0x0FFF0000,
+    JVMTI_VERSION_MASK_MINOR = 0x0000FF00,
+    JVMTI_VERSION_MASK_MICRO = 0x000000FF
+};
+
+    /* Version Shifts */
+
+enum {
+    JVMTI_VERSION_SHIFT_MAJOR = 16,
+    JVMTI_VERSION_SHIFT_MINOR = 8,
+    JVMTI_VERSION_SHIFT_MICRO = 0
+};
+
+    /* Verbose Flag Enumeration */
+
+typedef enum {
+    JVMTI_VERBOSE_OTHER = 0,
+    JVMTI_VERBOSE_GC = 1,
+    JVMTI_VERBOSE_CLASS = 2,
+    JVMTI_VERBOSE_JNI = 4
+} jvmtiVerboseFlag;
+
+    /* JLocation Format Enumeration */
+
+typedef enum {
+    JVMTI_JLOCATION_JVMBCI = 1,
+    JVMTI_JLOCATION_MACHINEPC = 2,
+    JVMTI_JLOCATION_OTHER = 0
+} jvmtiJlocationFormat;
+
+    /* Resource Exhaustion Flags */
+
+enum {
+    JVMTI_RESOURCE_EXHAUSTED_OOM_ERROR = 0x0001,
+    JVMTI_RESOURCE_EXHAUSTED_JAVA_HEAP = 0x0002,
+    JVMTI_RESOURCE_EXHAUSTED_THREADS = 0x0004
+};
+
+    /* Errors */
+
+typedef enum {
+    JVMTI_ERROR_NONE = 0,
+    JVMTI_ERROR_INVALID_THREAD = 10,
+    JVMTI_ERROR_INVALID_THREAD_GROUP = 11,
+    JVMTI_ERROR_INVALID_PRIORITY = 12,
+    JVMTI_ERROR_THREAD_NOT_SUSPENDED = 13,
+    JVMTI_ERROR_THREAD_SUSPENDED = 14,
+    JVMTI_ERROR_THREAD_NOT_ALIVE = 15,
+    JVMTI_ERROR_INVALID_OBJECT = 20,
+    JVMTI_ERROR_INVALID_CLASS = 21,
+    JVMTI_ERROR_CLASS_NOT_PREPARED = 22,
+    JVMTI_ERROR_INVALID_METHODID = 23,
+    JVMTI_ERROR_INVALID_LOCATION = 24,
+    JVMTI_ERROR_INVALID_FIELDID = 25,
+    JVMTI_ERROR_NO_MORE_FRAMES = 31,
+    JVMTI_ERROR_OPAQUE_FRAME = 32,
+    JVMTI_ERROR_TYPE_MISMATCH = 34,
+    JVMTI_ERROR_INVALID_SLOT = 35,
+    JVMTI_ERROR_DUPLICATE = 40,
+    JVMTI_ERROR_NOT_FOUND = 41,
+    JVMTI_ERROR_INVALID_MONITOR = 50,
+    JVMTI_ERROR_NOT_MONITOR_OWNER = 51,
+    JVMTI_ERROR_INTERRUPT = 52,
+    JVMTI_ERROR_INVALID_CLASS_FORMAT = 60,
+    JVMTI_ERROR_CIRCULAR_CLASS_DEFINITION = 61,
+    JVMTI_ERROR_FAILS_VERIFICATION = 62,
+    JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_ADDED = 63,
+    JVMTI_ERROR_UNSUPPORTED_REDEFINITION_SCHEMA_CHANGED = 64,
+    JVMTI_ERROR_INVALID_TYPESTATE = 65,
+    JVMTI_ERROR_UNSUPPORTED_REDEFINITION_HIERARCHY_CHANGED = 66,
+    JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_DELETED = 67,
+    JVMTI_ERROR_UNSUPPORTED_VERSION = 68,
+    JVMTI_ERROR_NAMES_DONT_MATCH = 69,
+    JVMTI_ERROR_UNSUPPORTED_REDEFINITION_CLASS_MODIFIERS_CHANGED = 70,
+    JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_MODIFIERS_CHANGED = 71,
+    JVMTI_ERROR_UNMODIFIABLE_CLASS = 79,
+    JVMTI_ERROR_NOT_AVAILABLE = 98,
+    JVMTI_ERROR_MUST_POSSESS_CAPABILITY = 99,
+    JVMTI_ERROR_NULL_POINTER = 100,
+    JVMTI_ERROR_ABSENT_INFORMATION = 101,
+    JVMTI_ERROR_INVALID_EVENT_TYPE = 102,
+    JVMTI_ERROR_ILLEGAL_ARGUMENT = 103,
+    JVMTI_ERROR_NATIVE_METHOD = 104,
+    JVMTI_ERROR_CLASS_LOADER_UNSUPPORTED = 106,
+    JVMTI_ERROR_OUT_OF_MEMORY = 110,
+    JVMTI_ERROR_ACCESS_DENIED = 111,
+    JVMTI_ERROR_WRONG_PHASE = 112,
+    JVMTI_ERROR_INTERNAL = 113,
+    JVMTI_ERROR_UNATTACHED_THREAD = 115,
+    JVMTI_ERROR_INVALID_ENVIRONMENT = 116,
+    JVMTI_ERROR_MAX = 116
+} jvmtiError;
+
+    /* Event IDs */
+
+typedef enum {
+    JVMTI_MIN_EVENT_TYPE_VAL = 50,
+    JVMTI_EVENT_VM_INIT = 50,
+    JVMTI_EVENT_VM_DEATH = 51,
+    JVMTI_EVENT_THREAD_START = 52,
+    JVMTI_EVENT_THREAD_END = 53,
+    JVMTI_EVENT_CLASS_FILE_LOAD_HOOK = 54,
+    JVMTI_EVENT_CLASS_LOAD = 55,
+    JVMTI_EVENT_CLASS_PREPARE = 56,
+    JVMTI_EVENT_VM_START = 57,
+    JVMTI_EVENT_EXCEPTION = 58,
+    JVMTI_EVENT_EXCEPTION_CATCH = 59,
+    JVMTI_EVENT_SINGLE_STEP = 60,
+    JVMTI_EVENT_FRAME_POP = 61,
+    JVMTI_EVENT_BREAKPOINT = 62,
+    JVMTI_EVENT_FIELD_ACCESS = 63,
+    JVMTI_EVENT_FIELD_MODIFICATION = 64,
+    JVMTI_EVENT_METHOD_ENTRY = 65,
+    JVMTI_EVENT_METHOD_EXIT = 66,
+    JVMTI_EVENT_NATIVE_METHOD_BIND = 67,
+    JVMTI_EVENT_COMPILED_METHOD_LOAD = 68,
+    JVMTI_EVENT_COMPILED_METHOD_UNLOAD = 69,
+    JVMTI_EVENT_DYNAMIC_CODE_GENERATED = 70,
+    JVMTI_EVENT_DATA_DUMP_REQUEST = 71,
+    JVMTI_EVENT_MONITOR_WAIT = 73,
+    JVMTI_EVENT_MONITOR_WAITED = 74,
+    JVMTI_EVENT_MONITOR_CONTENDED_ENTER = 75,
+    JVMTI_EVENT_MONITOR_CONTENDED_ENTERED = 76,
+    JVMTI_EVENT_RESOURCE_EXHAUSTED = 80,
+    JVMTI_EVENT_GARBAGE_COLLECTION_START = 81,
+    JVMTI_EVENT_GARBAGE_COLLECTION_FINISH = 82,
+    JVMTI_EVENT_OBJECT_FREE = 83,
+    JVMTI_EVENT_VM_OBJECT_ALLOC = 84,
+    JVMTI_MAX_EVENT_TYPE_VAL = 84
+} jvmtiEvent;
+
+
+    /* Pre-Declarations */
+struct _jvmtiThreadInfo;
+typedef struct _jvmtiThreadInfo jvmtiThreadInfo;
+struct _jvmtiMonitorStackDepthInfo;
+typedef struct _jvmtiMonitorStackDepthInfo jvmtiMonitorStackDepthInfo;
+struct _jvmtiThreadGroupInfo;
+typedef struct _jvmtiThreadGroupInfo jvmtiThreadGroupInfo;
+struct _jvmtiFrameInfo;
+typedef struct _jvmtiFrameInfo jvmtiFrameInfo;
+struct _jvmtiStackInfo;
+typedef struct _jvmtiStackInfo jvmtiStackInfo;
+struct _jvmtiHeapReferenceInfoField;
+typedef struct _jvmtiHeapReferenceInfoField jvmtiHeapReferenceInfoField;
+struct _jvmtiHeapReferenceInfoArray;
+typedef struct _jvmtiHeapReferenceInfoArray jvmtiHeapReferenceInfoArray;
+struct _jvmtiHeapReferenceInfoConstantPool;
+typedef struct _jvmtiHeapReferenceInfoConstantPool jvmtiHeapReferenceInfoConstantPool;
+struct _jvmtiHeapReferenceInfoStackLocal;
+typedef struct _jvmtiHeapReferenceInfoStackLocal jvmtiHeapReferenceInfoStackLocal;
+struct _jvmtiHeapReferenceInfoJniLocal;
+typedef struct _jvmtiHeapReferenceInfoJniLocal jvmtiHeapReferenceInfoJniLocal;
+struct _jvmtiHeapReferenceInfoReserved;
+typedef struct _jvmtiHeapReferenceInfoReserved jvmtiHeapReferenceInfoReserved;
+union _jvmtiHeapReferenceInfo;
+typedef union _jvmtiHeapReferenceInfo jvmtiHeapReferenceInfo;
+struct _jvmtiHeapCallbacks;
+typedef struct _jvmtiHeapCallbacks jvmtiHeapCallbacks;
+struct _jvmtiClassDefinition;
+typedef struct _jvmtiClassDefinition jvmtiClassDefinition;
+struct _jvmtiMonitorUsage;
+typedef struct _jvmtiMonitorUsage jvmtiMonitorUsage;
+struct _jvmtiLineNumberEntry;
+typedef struct _jvmtiLineNumberEntry jvmtiLineNumberEntry;
+struct _jvmtiLocalVariableEntry;
+typedef struct _jvmtiLocalVariableEntry jvmtiLocalVariableEntry;
+struct _jvmtiParamInfo;
+typedef struct _jvmtiParamInfo jvmtiParamInfo;
+struct _jvmtiExtensionFunctionInfo;
+typedef struct _jvmtiExtensionFunctionInfo jvmtiExtensionFunctionInfo;
+struct _jvmtiExtensionEventInfo;
+typedef struct _jvmtiExtensionEventInfo jvmtiExtensionEventInfo;
+struct _jvmtiTimerInfo;
+typedef struct _jvmtiTimerInfo jvmtiTimerInfo;
+struct _jvmtiAddrLocationMap;
+typedef struct _jvmtiAddrLocationMap jvmtiAddrLocationMap;
+
+    /* Function Types */
+
+typedef void (JNICALL *jvmtiStartFunction)
+    (jvmtiEnv* jvmti_env, JNIEnv* jni_env, void* arg);
+
+typedef jint (JNICALL *jvmtiHeapIterationCallback)
+    (jlong class_tag, jlong size, jlong* tag_ptr, jint length, void* user_data);
+
+typedef jint (JNICALL *jvmtiHeapReferenceCallback)
+    (jvmtiHeapReferenceKind reference_kind, const jvmtiHeapReferenceInfo* reference_info, jlong class_tag, jlong referrer_class_tag, jlong size, jlong* tag_ptr, jlong* referrer_tag_ptr, jint length, void* user_data);
+
+typedef jint (JNICALL *jvmtiPrimitiveFieldCallback)
+    (jvmtiHeapReferenceKind kind, const jvmtiHeapReferenceInfo* info, jlong object_class_tag, jlong* object_tag_ptr, jvalue value, jvmtiPrimitiveType value_type, void* user_data);
+
+typedef jint (JNICALL *jvmtiArrayPrimitiveValueCallback)
+    (jlong class_tag, jlong size, jlong* tag_ptr, jint element_count, jvmtiPrimitiveType element_type, const void* elements, void* user_data);
+
+typedef jint (JNICALL *jvmtiStringPrimitiveValueCallback)
+    (jlong class_tag, jlong size, jlong* tag_ptr, const jchar* value, jint value_length, void* user_data);
+
+typedef jint (JNICALL *jvmtiReservedCallback)
+    ();
+
+typedef jvmtiIterationControl (JNICALL *jvmtiHeapObjectCallback)
+    (jlong class_tag, jlong size, jlong* tag_ptr, void* user_data);
+
+typedef jvmtiIterationControl (JNICALL *jvmtiHeapRootCallback)
+    (jvmtiHeapRootKind root_kind, jlong class_tag, jlong size, jlong* tag_ptr, void* user_data);
+
+typedef jvmtiIterationControl (JNICALL *jvmtiStackReferenceCallback)
+    (jvmtiHeapRootKind root_kind, jlong class_tag, jlong size, jlong* tag_ptr, jlong thread_tag, jint depth, jmethodID method, jint slot, void* user_data);
+
+typedef jvmtiIterationControl (JNICALL *jvmtiObjectReferenceCallback)
+    (jvmtiObjectReferenceKind reference_kind, jlong class_tag, jlong size, jlong* tag_ptr, jlong referrer_tag, jint referrer_index, void* user_data);
+
+typedef jvmtiError (JNICALL *jvmtiExtensionFunction)
+    (jvmtiEnv* jvmti_env,  ...);
+
+typedef void (JNICALL *jvmtiExtensionEvent)
+    (jvmtiEnv* jvmti_env,  ...);
+
+
+    /* Structure Types */
+struct _jvmtiThreadInfo {
+    char* name;
+    jint priority;
+    jboolean is_daemon;
+    jthreadGroup thread_group;
+    jobject context_class_loader;
+};
+struct _jvmtiMonitorStackDepthInfo {
+    jobject monitor;
+    jint stack_depth;
+};
+struct _jvmtiThreadGroupInfo {
+    jthreadGroup parent;
+    char* name;
+    jint max_priority;
+    jboolean is_daemon;
+};
+struct _jvmtiFrameInfo {
+    jmethodID method;
+    jlocation location;
+};
+struct _jvmtiStackInfo {
+    jthread thread;
+    jint state;
+    jvmtiFrameInfo* frame_buffer;
+    jint frame_count;
+};
+struct _jvmtiHeapReferenceInfoField {
+    jint index;
+};
+struct _jvmtiHeapReferenceInfoArray {
+    jint index;
+};
+struct _jvmtiHeapReferenceInfoConstantPool {
+    jint index;
+};
+struct _jvmtiHeapReferenceInfoStackLocal {
+    jlong thread_tag;
+    jlong thread_id;
+    jint depth;
+    jmethodID method;
+    jlocation location;
+    jint slot;
+};
+struct _jvmtiHeapReferenceInfoJniLocal {
+    jlong thread_tag;
+    jlong thread_id;
+    jint depth;
+    jmethodID method;
+};
+struct _jvmtiHeapReferenceInfoReserved {
+    jlong reserved1;
+    jlong reserved2;
+    jlong reserved3;
+    jlong reserved4;
+    jlong reserved5;
+    jlong reserved6;
+    jlong reserved7;
+    jlong reserved8;
+};
+union _jvmtiHeapReferenceInfo {
+    jvmtiHeapReferenceInfoField field;
+    jvmtiHeapReferenceInfoArray array;
+    jvmtiHeapReferenceInfoConstantPool constant_pool;
+    jvmtiHeapReferenceInfoStackLocal stack_local;
+    jvmtiHeapReferenceInfoJniLocal jni_local;
+    jvmtiHeapReferenceInfoReserved other;
+};
+struct _jvmtiHeapCallbacks {
+    jvmtiHeapIterationCallback heap_iteration_callback;
+    jvmtiHeapReferenceCallback heap_reference_callback;
+    jvmtiPrimitiveFieldCallback primitive_field_callback;
+    jvmtiArrayPrimitiveValueCallback array_primitive_value_callback;
+    jvmtiStringPrimitiveValueCallback string_primitive_value_callback;
+    jvmtiReservedCallback reserved5;
+    jvmtiReservedCallback reserved6;
+    jvmtiReservedCallback reserved7;
+    jvmtiReservedCallback reserved8;
+    jvmtiReservedCallback reserved9;
+    jvmtiReservedCallback reserved10;
+    jvmtiReservedCallback reserved11;
+    jvmtiReservedCallback reserved12;
+    jvmtiReservedCallback reserved13;
+    jvmtiReservedCallback reserved14;
+    jvmtiReservedCallback reserved15;
+};
+struct _jvmtiClassDefinition {
+    jclass klass;
+    jint class_byte_count;
+    const unsigned char* class_bytes;
+};
+struct _jvmtiMonitorUsage {
+    jthread owner;
+    jint entry_count;
+    jint waiter_count;
+    jthread* waiters;
+    jint notify_waiter_count;
+    jthread* notify_waiters;
+};
+struct _jvmtiLineNumberEntry {
+    jlocation start_location;
+    jint line_number;
+};
+struct _jvmtiLocalVariableEntry {
+    jlocation start_location;
+    jint length;
+    char* name;
+    char* signature;
+    char* generic_signature;
+    jint slot;
+};
+struct _jvmtiParamInfo {
+    char* name;
+    jvmtiParamKind kind;
+    jvmtiParamTypes base_type;
+    jboolean null_ok;
+};
+struct _jvmtiExtensionFunctionInfo {
+    jvmtiExtensionFunction func;
+    char* id;
+    char* short_description;
+    jint param_count;
+    jvmtiParamInfo* params;
+    jint error_count;
+    jvmtiError* errors;
+};
+struct _jvmtiExtensionEventInfo {
+    jint extension_event_index;
+    char* id;
+    char* short_description;
+    jint param_count;
+    jvmtiParamInfo* params;
+};
+struct _jvmtiTimerInfo {
+    jlong max_value;
+    jboolean may_skip_forward;
+    jboolean may_skip_backward;
+    jvmtiTimerKind kind;
+    jlong reserved1;
+    jlong reserved2;
+};
+struct _jvmtiAddrLocationMap {
+    const void* start_address;
+    jlocation location;
+};
+
+typedef struct {
+    unsigned int can_tag_objects : 1;
+    unsigned int can_generate_field_modification_events : 1;
+    unsigned int can_generate_field_access_events : 1;
+    unsigned int can_get_bytecodes : 1;
+    unsigned int can_get_synthetic_attribute : 1;
+    unsigned int can_get_owned_monitor_info : 1;
+    unsigned int can_get_current_contended_monitor : 1;
+    unsigned int can_get_monitor_info : 1;
+    unsigned int can_pop_frame : 1;
+    unsigned int can_redefine_classes : 1;
+    unsigned int can_signal_thread : 1;
+    unsigned int can_get_source_file_name : 1;
+    unsigned int can_get_line_numbers : 1;
+    unsigned int can_get_source_debug_extension : 1;
+    unsigned int can_access_local_variables : 1;
+    unsigned int can_maintain_original_method_order : 1;
+    unsigned int can_generate_single_step_events : 1;
+    unsigned int can_generate_exception_events : 1;
+    unsigned int can_generate_frame_pop_events : 1;
+    unsigned int can_generate_breakpoint_events : 1;
+    unsigned int can_suspend : 1;
+    unsigned int can_redefine_any_class : 1;
+    unsigned int can_get_current_thread_cpu_time : 1;
+    unsigned int can_get_thread_cpu_time : 1;
+    unsigned int can_generate_method_entry_events : 1;
+    unsigned int can_generate_method_exit_events : 1;
+    unsigned int can_generate_all_class_hook_events : 1;
+    unsigned int can_generate_compiled_method_load_events : 1;
+    unsigned int can_generate_monitor_events : 1;
+    unsigned int can_generate_vm_object_alloc_events : 1;
+    unsigned int can_generate_native_method_bind_events : 1;
+    unsigned int can_generate_garbage_collection_events : 1;
+    unsigned int can_generate_object_free_events : 1;
+    unsigned int can_force_early_return : 1;
+    unsigned int can_get_owned_monitor_stack_depth_info : 1;
+    unsigned int can_get_constant_pool : 1;
+    unsigned int can_set_native_method_prefix : 1;
+    unsigned int can_retransform_classes : 1;
+    unsigned int can_retransform_any_class : 1;
+    unsigned int can_generate_resource_exhaustion_heap_events : 1;
+    unsigned int can_generate_resource_exhaustion_threads_events : 1;
+    unsigned int : 7;
+    unsigned int : 16;
+    unsigned int : 16;
+    unsigned int : 16;
+    unsigned int : 16;
+    unsigned int : 16;
+} jvmtiCapabilities;
+
+
+    /* Event Definitions */
+
+typedef void (JNICALL *jvmtiEventReserved)(void);
+
+
+typedef void (JNICALL *jvmtiEventBreakpoint)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     jlocation location);
+
+typedef void (JNICALL *jvmtiEventClassFileLoadHook)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jclass class_being_redefined,
+     jobject loader,
+     const char* name,
+     jobject protection_domain,
+     jint class_data_len,
+     const unsigned char* class_data,
+     jint* new_class_data_len,
+     unsigned char** new_class_data);
+
+typedef void (JNICALL *jvmtiEventClassLoad)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jclass klass);
+
+typedef void (JNICALL *jvmtiEventClassPrepare)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jclass klass);
+
+typedef void (JNICALL *jvmtiEventCompiledMethodLoad)
+    (jvmtiEnv *jvmti_env,
+     jmethodID method,
+     jint code_size,
+     const void* code_addr,
+     jint map_length,
+     const jvmtiAddrLocationMap* map,
+     const void* compile_info);
+
+typedef void (JNICALL *jvmtiEventCompiledMethodUnload)
+    (jvmtiEnv *jvmti_env,
+     jmethodID method,
+     const void* code_addr);
+
+typedef void (JNICALL *jvmtiEventDataDumpRequest)
+    (jvmtiEnv *jvmti_env);
+
+typedef void (JNICALL *jvmtiEventDynamicCodeGenerated)
+    (jvmtiEnv *jvmti_env,
+     const char* name,
+     const void* address,
+     jint length);
+
+typedef void (JNICALL *jvmtiEventException)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     jlocation location,
+     jobject exception,
+     jmethodID catch_method,
+     jlocation catch_location);
+
+typedef void (JNICALL *jvmtiEventExceptionCatch)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     jlocation location,
+     jobject exception);
+
+typedef void (JNICALL *jvmtiEventFieldAccess)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     jlocation location,
+     jclass field_klass,
+     jobject object,
+     jfieldID field);
+
+typedef void (JNICALL *jvmtiEventFieldModification)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     jlocation location,
+     jclass field_klass,
+     jobject object,
+     jfieldID field,
+     char signature_type,
+     jvalue new_value);
+
+typedef void (JNICALL *jvmtiEventFramePop)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     jboolean was_popped_by_exception);
+
+typedef void (JNICALL *jvmtiEventGarbageCollectionFinish)
+    (jvmtiEnv *jvmti_env);
+
+typedef void (JNICALL *jvmtiEventGarbageCollectionStart)
+    (jvmtiEnv *jvmti_env);
+
+typedef void (JNICALL *jvmtiEventMethodEntry)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method);
+
+typedef void (JNICALL *jvmtiEventMethodExit)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     jboolean was_popped_by_exception,
+     jvalue return_value);
+
+typedef void (JNICALL *jvmtiEventMonitorContendedEnter)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jobject object);
+
+typedef void (JNICALL *jvmtiEventMonitorContendedEntered)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jobject object);
+
+typedef void (JNICALL *jvmtiEventMonitorWait)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jobject object,
+     jlong timeout);
+
+typedef void (JNICALL *jvmtiEventMonitorWaited)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jobject object,
+     jboolean timed_out);
+
+typedef void (JNICALL *jvmtiEventNativeMethodBind)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     void* address,
+     void** new_address_ptr);
+
+typedef void (JNICALL *jvmtiEventObjectFree)
+    (jvmtiEnv *jvmti_env,
+     jlong tag);
+
+typedef void (JNICALL *jvmtiEventResourceExhausted)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jint flags,
+     const void* reserved,
+     const char* description);
+
+typedef void (JNICALL *jvmtiEventSingleStep)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     jlocation location);
+
+typedef void (JNICALL *jvmtiEventThreadEnd)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread);
+
+typedef void (JNICALL *jvmtiEventThreadStart)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread);
+
+typedef void (JNICALL *jvmtiEventVMDeath)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env);
+
+typedef void (JNICALL *jvmtiEventVMInit)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread);
+
+typedef void (JNICALL *jvmtiEventVMObjectAlloc)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jobject object,
+     jclass object_klass,
+     jlong size);
+
+typedef void (JNICALL *jvmtiEventVMStart)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env);
+
+    /* Event Callback Structure */
+
+typedef struct {
+                              /*   50 : VM Initialization Event */
+    jvmtiEventVMInit VMInit;
+                              /*   51 : VM Death Event */
+    jvmtiEventVMDeath VMDeath;
+                              /*   52 : Thread Start */
+    jvmtiEventThreadStart ThreadStart;
+                              /*   53 : Thread End */
+    jvmtiEventThreadEnd ThreadEnd;
+                              /*   54 : Class File Load Hook */
+    jvmtiEventClassFileLoadHook ClassFileLoadHook;
+                              /*   55 : Class Load */
+    jvmtiEventClassLoad ClassLoad;
+                              /*   56 : Class Prepare */
+    jvmtiEventClassPrepare ClassPrepare;
+                              /*   57 : VM Start Event */
+    jvmtiEventVMStart VMStart;
+                              /*   58 : Exception */
+    jvmtiEventException Exception;
+                              /*   59 : Exception Catch */
+    jvmtiEventExceptionCatch ExceptionCatch;
+                              /*   60 : Single Step */
+    jvmtiEventSingleStep SingleStep;
+                              /*   61 : Frame Pop */
+    jvmtiEventFramePop FramePop;
+                              /*   62 : Breakpoint */
+    jvmtiEventBreakpoint Breakpoint;
+                              /*   63 : Field Access */
+    jvmtiEventFieldAccess FieldAccess;
+                              /*   64 : Field Modification */
+    jvmtiEventFieldModification FieldModification;
+                              /*   65 : Method Entry */
+    jvmtiEventMethodEntry MethodEntry;
+                              /*   66 : Method Exit */
+    jvmtiEventMethodExit MethodExit;
+                              /*   67 : Native Method Bind */
+    jvmtiEventNativeMethodBind NativeMethodBind;
+                              /*   68 : Compiled Method Load */
+    jvmtiEventCompiledMethodLoad CompiledMethodLoad;
+                              /*   69 : Compiled Method Unload */
+    jvmtiEventCompiledMethodUnload CompiledMethodUnload;
+                              /*   70 : Dynamic Code Generated */
+    jvmtiEventDynamicCodeGenerated DynamicCodeGenerated;
+                              /*   71 : Data Dump Request */
+    jvmtiEventDataDumpRequest DataDumpRequest;
+                              /*   72 */
+    jvmtiEventReserved reserved72;
+                              /*   73 : Monitor Wait */
+    jvmtiEventMonitorWait MonitorWait;
+                              /*   74 : Monitor Waited */
+    jvmtiEventMonitorWaited MonitorWaited;
+                              /*   75 : Monitor Contended Enter */
+    jvmtiEventMonitorContendedEnter MonitorContendedEnter;
+                              /*   76 : Monitor Contended Entered */
+    jvmtiEventMonitorContendedEntered MonitorContendedEntered;
+                              /*   77 */
+    jvmtiEventReserved reserved77;
+                              /*   78 */
+    jvmtiEventReserved reserved78;
+                              /*   79 */
+    jvmtiEventReserved reserved79;
+                              /*   80 : Resource Exhausted */
+    jvmtiEventResourceExhausted ResourceExhausted;
+                              /*   81 : Garbage Collection Start */
+    jvmtiEventGarbageCollectionStart GarbageCollectionStart;
+                              /*   82 : Garbage Collection Finish */
+    jvmtiEventGarbageCollectionFinish GarbageCollectionFinish;
+                              /*   83 : Object Free */
+    jvmtiEventObjectFree ObjectFree;
+                              /*   84 : VM Object Allocation */
+    jvmtiEventVMObjectAlloc VMObjectAlloc;
+} jvmtiEventCallbacks;
+
+
+    /* Function Interface */
+
+typedef struct jvmtiInterface_1_ {
+
+  /*   1 :  RESERVED */
+  void *reserved1;
+
+  /*   2 : Set Event Notification Mode */
+  jvmtiError (JNICALL *SetEventNotificationMode) (jvmtiEnv* env,
+    jvmtiEventMode mode,
+    jvmtiEvent event_type,
+    jthread event_thread,
+     ...);
+
+  /*   3 :  RESERVED */
+  void *reserved3;
+
+  /*   4 : Get All Threads */
+  jvmtiError (JNICALL *GetAllThreads) (jvmtiEnv* env,
+    jint* threads_count_ptr,
+    jthread** threads_ptr);
+
+  /*   5 : Suspend Thread */
+  jvmtiError (JNICALL *SuspendThread) (jvmtiEnv* env,
+    jthread thread);
+
+  /*   6 : Resume Thread */
+  jvmtiError (JNICALL *ResumeThread) (jvmtiEnv* env,
+    jthread thread);
+
+  /*   7 : Stop Thread */
+  jvmtiError (JNICALL *StopThread) (jvmtiEnv* env,
+    jthread thread,
+    jobject exception);
+
+  /*   8 : Interrupt Thread */
+  jvmtiError (JNICALL *InterruptThread) (jvmtiEnv* env,
+    jthread thread);
+
+  /*   9 : Get Thread Info */
+  jvmtiError (JNICALL *GetThreadInfo) (jvmtiEnv* env,
+    jthread thread,
+    jvmtiThreadInfo* info_ptr);
+
+  /*   10 : Get Owned Monitor Info */
+  jvmtiError (JNICALL *GetOwnedMonitorInfo) (jvmtiEnv* env,
+    jthread thread,
+    jint* owned_monitor_count_ptr,
+    jobject** owned_monitors_ptr);
+
+  /*   11 : Get Current Contended Monitor */
+  jvmtiError (JNICALL *GetCurrentContendedMonitor) (jvmtiEnv* env,
+    jthread thread,
+    jobject* monitor_ptr);
+
+  /*   12 : Run Agent Thread */
+  jvmtiError (JNICALL *RunAgentThread) (jvmtiEnv* env,
+    jthread thread,
+    jvmtiStartFunction proc,
+    const void* arg,
+    jint priority);
+
+  /*   13 : Get Top Thread Groups */
+  jvmtiError (JNICALL *GetTopThreadGroups) (jvmtiEnv* env,
+    jint* group_count_ptr,
+    jthreadGroup** groups_ptr);
+
+  /*   14 : Get Thread Group Info */
+  jvmtiError (JNICALL *GetThreadGroupInfo) (jvmtiEnv* env,
+    jthreadGroup group,
+    jvmtiThreadGroupInfo* info_ptr);
+
+  /*   15 : Get Thread Group Children */
+  jvmtiError (JNICALL *GetThreadGroupChildren) (jvmtiEnv* env,
+    jthreadGroup group,
+    jint* thread_count_ptr,
+    jthread** threads_ptr,
+    jint* group_count_ptr,
+    jthreadGroup** groups_ptr);
+
+  /*   16 : Get Frame Count */
+  jvmtiError (JNICALL *GetFrameCount) (jvmtiEnv* env,
+    jthread thread,
+    jint* count_ptr);
+
+  /*   17 : Get Thread State */
+  jvmtiError (JNICALL *GetThreadState) (jvmtiEnv* env,
+    jthread thread,
+    jint* thread_state_ptr);
+
+  /*   18 : Get Current Thread */
+  jvmtiError (JNICALL *GetCurrentThread) (jvmtiEnv* env,
+    jthread* thread_ptr);
+
+  /*   19 : Get Frame Location */
+  jvmtiError (JNICALL *GetFrameLocation) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jmethodID* method_ptr,
+    jlocation* location_ptr);
+
+  /*   20 : Notify Frame Pop */
+  jvmtiError (JNICALL *NotifyFramePop) (jvmtiEnv* env,
+    jthread thread,
+    jint depth);
+
+  /*   21 : Get Local Variable - Object */
+  jvmtiError (JNICALL *GetLocalObject) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jobject* value_ptr);
+
+  /*   22 : Get Local Variable - Int */
+  jvmtiError (JNICALL *GetLocalInt) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jint* value_ptr);
+
+  /*   23 : Get Local Variable - Long */
+  jvmtiError (JNICALL *GetLocalLong) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jlong* value_ptr);
+
+  /*   24 : Get Local Variable - Float */
+  jvmtiError (JNICALL *GetLocalFloat) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jfloat* value_ptr);
+
+  /*   25 : Get Local Variable - Double */
+  jvmtiError (JNICALL *GetLocalDouble) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jdouble* value_ptr);
+
+  /*   26 : Set Local Variable - Object */
+  jvmtiError (JNICALL *SetLocalObject) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jobject value);
+
+  /*   27 : Set Local Variable - Int */
+  jvmtiError (JNICALL *SetLocalInt) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jint value);
+
+  /*   28 : Set Local Variable - Long */
+  jvmtiError (JNICALL *SetLocalLong) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jlong value);
+
+  /*   29 : Set Local Variable - Float */
+  jvmtiError (JNICALL *SetLocalFloat) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jfloat value);
+
+  /*   30 : Set Local Variable - Double */
+  jvmtiError (JNICALL *SetLocalDouble) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jdouble value);
+
+  /*   31 : Create Raw Monitor */
+  jvmtiError (JNICALL *CreateRawMonitor) (jvmtiEnv* env,
+    const char* name,
+    jrawMonitorID* monitor_ptr);
+
+  /*   32 : Destroy Raw Monitor */
+  jvmtiError (JNICALL *DestroyRawMonitor) (jvmtiEnv* env,
+    jrawMonitorID monitor);
+
+  /*   33 : Raw Monitor Enter */
+  jvmtiError (JNICALL *RawMonitorEnter) (jvmtiEnv* env,
+    jrawMonitorID monitor);
+
+  /*   34 : Raw Monitor Exit */
+  jvmtiError (JNICALL *RawMonitorExit) (jvmtiEnv* env,
+    jrawMonitorID monitor);
+
+  /*   35 : Raw Monitor Wait */
+  jvmtiError (JNICALL *RawMonitorWait) (jvmtiEnv* env,
+    jrawMonitorID monitor,
+    jlong millis);
+
+  /*   36 : Raw Monitor Notify */
+  jvmtiError (JNICALL *RawMonitorNotify) (jvmtiEnv* env,
+    jrawMonitorID monitor);
+
+  /*   37 : Raw Monitor Notify All */
+  jvmtiError (JNICALL *RawMonitorNotifyAll) (jvmtiEnv* env,
+    jrawMonitorID monitor);
+
+  /*   38 : Set Breakpoint */
+  jvmtiError (JNICALL *SetBreakpoint) (jvmtiEnv* env,
+    jmethodID method,
+    jlocation location);
+
+  /*   39 : Clear Breakpoint */
+  jvmtiError (JNICALL *ClearBreakpoint) (jvmtiEnv* env,
+    jmethodID method,
+    jlocation location);
+
+  /*   40 :  RESERVED */
+  void *reserved40;
+
+  /*   41 : Set Field Access Watch */
+  jvmtiError (JNICALL *SetFieldAccessWatch) (jvmtiEnv* env,
+    jclass klass,
+    jfieldID field);
+
+  /*   42 : Clear Field Access Watch */
+  jvmtiError (JNICALL *ClearFieldAccessWatch) (jvmtiEnv* env,
+    jclass klass,
+    jfieldID field);
+
+  /*   43 : Set Field Modification Watch */
+  jvmtiError (JNICALL *SetFieldModificationWatch) (jvmtiEnv* env,
+    jclass klass,
+    jfieldID field);
+
+  /*   44 : Clear Field Modification Watch */
+  jvmtiError (JNICALL *ClearFieldModificationWatch) (jvmtiEnv* env,
+    jclass klass,
+    jfieldID field);
+
+  /*   45 : Is Modifiable Class */
+  jvmtiError (JNICALL *IsModifiableClass) (jvmtiEnv* env,
+    jclass klass,
+    jboolean* is_modifiable_class_ptr);
+
+  /*   46 : Allocate */
+  jvmtiError (JNICALL *Allocate) (jvmtiEnv* env,
+    jlong size,
+    unsigned char** mem_ptr);
+
+  /*   47 : Deallocate */
+  jvmtiError (JNICALL *Deallocate) (jvmtiEnv* env,
+    unsigned char* mem);
+
+  /*   48 : Get Class Signature */
+  jvmtiError (JNICALL *GetClassSignature) (jvmtiEnv* env,
+    jclass klass,
+    char** signature_ptr,
+    char** generic_ptr);
+
+  /*   49 : Get Class Status */
+  jvmtiError (JNICALL *GetClassStatus) (jvmtiEnv* env,
+    jclass klass,
+    jint* status_ptr);
+
+  /*   50 : Get Source File Name */
+  jvmtiError (JNICALL *GetSourceFileName) (jvmtiEnv* env,
+    jclass klass,
+    char** source_name_ptr);
+
+  /*   51 : Get Class Modifiers */
+  jvmtiError (JNICALL *GetClassModifiers) (jvmtiEnv* env,
+    jclass klass,
+    jint* modifiers_ptr);
+
+  /*   52 : Get Class Methods */
+  jvmtiError (JNICALL *GetClassMethods) (jvmtiEnv* env,
+    jclass klass,
+    jint* method_count_ptr,
+    jmethodID** methods_ptr);
+
+  /*   53 : Get Class Fields */
+  jvmtiError (JNICALL *GetClassFields) (jvmtiEnv* env,
+    jclass klass,
+    jint* field_count_ptr,
+    jfieldID** fields_ptr);
+
+  /*   54 : Get Implemented Interfaces */
+  jvmtiError (JNICALL *GetImplementedInterfaces) (jvmtiEnv* env,
+    jclass klass,
+    jint* interface_count_ptr,
+    jclass** interfaces_ptr);
+
+  /*   55 : Is Interface */
+  jvmtiError (JNICALL *IsInterface) (jvmtiEnv* env,
+    jclass klass,
+    jboolean* is_interface_ptr);
+
+  /*   56 : Is Array Class */
+  jvmtiError (JNICALL *IsArrayClass) (jvmtiEnv* env,
+    jclass klass,
+    jboolean* is_array_class_ptr);
+
+  /*   57 : Get Class Loader */
+  jvmtiError (JNICALL *GetClassLoader) (jvmtiEnv* env,
+    jclass klass,
+    jobject* classloader_ptr);
+
+  /*   58 : Get Object Hash Code */
+  jvmtiError (JNICALL *GetObjectHashCode) (jvmtiEnv* env,
+    jobject object,
+    jint* hash_code_ptr);
+
+  /*   59 : Get Object Monitor Usage */
+  jvmtiError (JNICALL *GetObjectMonitorUsage) (jvmtiEnv* env,
+    jobject object,
+    jvmtiMonitorUsage* info_ptr);
+
+  /*   60 : Get Field Name (and Signature) */
+  jvmtiError (JNICALL *GetFieldName) (jvmtiEnv* env,
+    jclass klass,
+    jfieldID field,
+    char** name_ptr,
+    char** signature_ptr,
+    char** generic_ptr);
+
+  /*   61 : Get Field Declaring Class */
+  jvmtiError (JNICALL *GetFieldDeclaringClass) (jvmtiEnv* env,
+    jclass klass,
+    jfieldID field,
+    jclass* declaring_class_ptr);
+
+  /*   62 : Get Field Modifiers */
+  jvmtiError (JNICALL *GetFieldModifiers) (jvmtiEnv* env,
+    jclass klass,
+    jfieldID field,
+    jint* modifiers_ptr);
+
+  /*   63 : Is Field Synthetic */
+  jvmtiError (JNICALL *IsFieldSynthetic) (jvmtiEnv* env,
+    jclass klass,
+    jfieldID field,
+    jboolean* is_synthetic_ptr);
+
+  /*   64 : Get Method Name (and Signature) */
+  jvmtiError (JNICALL *GetMethodName) (jvmtiEnv* env,
+    jmethodID method,
+    char** name_ptr,
+    char** signature_ptr,
+    char** generic_ptr);
+
+  /*   65 : Get Method Declaring Class */
+  jvmtiError (JNICALL *GetMethodDeclaringClass) (jvmtiEnv* env,
+    jmethodID method,
+    jclass* declaring_class_ptr);
+
+  /*   66 : Get Method Modifiers */
+  jvmtiError (JNICALL *GetMethodModifiers) (jvmtiEnv* env,
+    jmethodID method,
+    jint* modifiers_ptr);
+
+  /*   67 :  RESERVED */
+  void *reserved67;
+
+  /*   68 : Get Max Locals */
+  jvmtiError (JNICALL *GetMaxLocals) (jvmtiEnv* env,
+    jmethodID method,
+    jint* max_ptr);
+
+  /*   69 : Get Arguments Size */
+  jvmtiError (JNICALL *GetArgumentsSize) (jvmtiEnv* env,
+    jmethodID method,
+    jint* size_ptr);
+
+  /*   70 : Get Line Number Table */
+  jvmtiError (JNICALL *GetLineNumberTable) (jvmtiEnv* env,
+    jmethodID method,
+    jint* entry_count_ptr,
+    jvmtiLineNumberEntry** table_ptr);
+
+  /*   71 : Get Method Location */
+  jvmtiError (JNICALL *GetMethodLocation) (jvmtiEnv* env,
+    jmethodID method,
+    jlocation* start_location_ptr,
+    jlocation* end_location_ptr);
+
+  /*   72 : Get Local Variable Table */
+  jvmtiError (JNICALL *GetLocalVariableTable) (jvmtiEnv* env,
+    jmethodID method,
+    jint* entry_count_ptr,
+    jvmtiLocalVariableEntry** table_ptr);
+
+  /*   73 : Set Native Method Prefix */
+  jvmtiError (JNICALL *SetNativeMethodPrefix) (jvmtiEnv* env,
+    const char* prefix);
+
+  /*   74 : Set Native Method Prefixes */
+  jvmtiError (JNICALL *SetNativeMethodPrefixes) (jvmtiEnv* env,
+    jint prefix_count,
+    char** prefixes);
+
+  /*   75 : Get Bytecodes */
+  jvmtiError (JNICALL *GetBytecodes) (jvmtiEnv* env,
+    jmethodID method,
+    jint* bytecode_count_ptr,
+    unsigned char** bytecodes_ptr);
+
+  /*   76 : Is Method Native */
+  jvmtiError (JNICALL *IsMethodNative) (jvmtiEnv* env,
+    jmethodID method,
+    jboolean* is_native_ptr);
+
+  /*   77 : Is Method Synthetic */
+  jvmtiError (JNICALL *IsMethodSynthetic) (jvmtiEnv* env,
+    jmethodID method,
+    jboolean* is_synthetic_ptr);
+
+  /*   78 : Get Loaded Classes */
+  jvmtiError (JNICALL *GetLoadedClasses) (jvmtiEnv* env,
+    jint* class_count_ptr,
+    jclass** classes_ptr);
+
+  /*   79 : Get Classloader Classes */
+  jvmtiError (JNICALL *GetClassLoaderClasses) (jvmtiEnv* env,
+    jobject initiating_loader,
+    jint* class_count_ptr,
+    jclass** classes_ptr);
+
+  /*   80 : Pop Frame */
+  jvmtiError (JNICALL *PopFrame) (jvmtiEnv* env,
+    jthread thread);
+
+  /*   81 : Force Early Return - Object */
+  jvmtiError (JNICALL *ForceEarlyReturnObject) (jvmtiEnv* env,
+    jthread thread,
+    jobject value);
+
+  /*   82 : Force Early Return - Int */
+  jvmtiError (JNICALL *ForceEarlyReturnInt) (jvmtiEnv* env,
+    jthread thread,
+    jint value);
+
+  /*   83 : Force Early Return - Long */
+  jvmtiError (JNICALL *ForceEarlyReturnLong) (jvmtiEnv* env,
+    jthread thread,
+    jlong value);
+
+  /*   84 : Force Early Return - Float */
+  jvmtiError (JNICALL *ForceEarlyReturnFloat) (jvmtiEnv* env,
+    jthread thread,
+    jfloat value);
+
+  /*   85 : Force Early Return - Double */
+  jvmtiError (JNICALL *ForceEarlyReturnDouble) (jvmtiEnv* env,
+    jthread thread,
+    jdouble value);
+
+  /*   86 : Force Early Return - Void */
+  jvmtiError (JNICALL *ForceEarlyReturnVoid) (jvmtiEnv* env,
+    jthread thread);
+
+  /*   87 : Redefine Classes */
+  jvmtiError (JNICALL *RedefineClasses) (jvmtiEnv* env,
+    jint class_count,
+    const jvmtiClassDefinition* class_definitions);
+
+  /*   88 : Get Version Number */
+  jvmtiError (JNICALL *GetVersionNumber) (jvmtiEnv* env,
+    jint* version_ptr);
+
+  /*   89 : Get Capabilities */
+  jvmtiError (JNICALL *GetCapabilities) (jvmtiEnv* env,
+    jvmtiCapabilities* capabilities_ptr);
+
+  /*   90 : Get Source Debug Extension */
+  jvmtiError (JNICALL *GetSourceDebugExtension) (jvmtiEnv* env,
+    jclass klass,
+    char** source_debug_extension_ptr);
+
+  /*   91 : Is Method Obsolete */
+  jvmtiError (JNICALL *IsMethodObsolete) (jvmtiEnv* env,
+    jmethodID method,
+    jboolean* is_obsolete_ptr);
+
+  /*   92 : Suspend Thread List */
+  jvmtiError (JNICALL *SuspendThreadList) (jvmtiEnv* env,
+    jint request_count,
+    const jthread* request_list,
+    jvmtiError* results);
+
+  /*   93 : Resume Thread List */
+  jvmtiError (JNICALL *ResumeThreadList) (jvmtiEnv* env,
+    jint request_count,
+    const jthread* request_list,
+    jvmtiError* results);
+
+  /*   94 :  RESERVED */
+  void *reserved94;
+
+  /*   95 :  RESERVED */
+  void *reserved95;
+
+  /*   96 :  RESERVED */
+  void *reserved96;
+
+  /*   97 :  RESERVED */
+  void *reserved97;
+
+  /*   98 :  RESERVED */
+  void *reserved98;
+
+  /*   99 :  RESERVED */
+  void *reserved99;
+
+  /*   100 : Get All Stack Traces */
+  jvmtiError (JNICALL *GetAllStackTraces) (jvmtiEnv* env,
+    jint max_frame_count,
+    jvmtiStackInfo** stack_info_ptr,
+    jint* thread_count_ptr);
+
+  /*   101 : Get Thread List Stack Traces */
+  jvmtiError (JNICALL *GetThreadListStackTraces) (jvmtiEnv* env,
+    jint thread_count,
+    const jthread* thread_list,
+    jint max_frame_count,
+    jvmtiStackInfo** stack_info_ptr);
+
+  /*   102 : Get Thread Local Storage */
+  jvmtiError (JNICALL *GetThreadLocalStorage) (jvmtiEnv* env,
+    jthread thread,
+    void** data_ptr);
+
+  /*   103 : Set Thread Local Storage */
+  jvmtiError (JNICALL *SetThreadLocalStorage) (jvmtiEnv* env,
+    jthread thread,
+    const void* data);
+
+  /*   104 : Get Stack Trace */
+  jvmtiError (JNICALL *GetStackTrace) (jvmtiEnv* env,
+    jthread thread,
+    jint start_depth,
+    jint max_frame_count,
+    jvmtiFrameInfo* frame_buffer,
+    jint* count_ptr);
+
+  /*   105 :  RESERVED */
+  void *reserved105;
+
+  /*   106 : Get Tag */
+  jvmtiError (JNICALL *GetTag) (jvmtiEnv* env,
+    jobject object,
+    jlong* tag_ptr);
+
+  /*   107 : Set Tag */
+  jvmtiError (JNICALL *SetTag) (jvmtiEnv* env,
+    jobject object,
+    jlong tag);
+
+  /*   108 : Force Garbage Collection */
+  jvmtiError (JNICALL *ForceGarbageCollection) (jvmtiEnv* env);
+
+  /*   109 : Iterate Over Objects Reachable From Object */
+  jvmtiError (JNICALL *IterateOverObjectsReachableFromObject) (jvmtiEnv* env,
+    jobject object,
+    jvmtiObjectReferenceCallback object_reference_callback,
+    const void* user_data);
+
+  /*   110 : Iterate Over Reachable Objects */
+  jvmtiError (JNICALL *IterateOverReachableObjects) (jvmtiEnv* env,
+    jvmtiHeapRootCallback heap_root_callback,
+    jvmtiStackReferenceCallback stack_ref_callback,
+    jvmtiObjectReferenceCallback object_ref_callback,
+    const void* user_data);
+
+  /*   111 : Iterate Over Heap */
+  jvmtiError (JNICALL *IterateOverHeap) (jvmtiEnv* env,
+    jvmtiHeapObjectFilter object_filter,
+    jvmtiHeapObjectCallback heap_object_callback,
+    const void* user_data);
+
+  /*   112 : Iterate Over Instances Of Class */
+  jvmtiError (JNICALL *IterateOverInstancesOfClass) (jvmtiEnv* env,
+    jclass klass,
+    jvmtiHeapObjectFilter object_filter,
+    jvmtiHeapObjectCallback heap_object_callback,
+    const void* user_data);
+
+  /*   113 :  RESERVED */
+  void *reserved113;
+
+  /*   114 : Get Objects With Tags */
+  jvmtiError (JNICALL *GetObjectsWithTags) (jvmtiEnv* env,
+    jint tag_count,
+    const jlong* tags,
+    jint* count_ptr,
+    jobject** object_result_ptr,
+    jlong** tag_result_ptr);
+
+  /*   115 : Follow References */
+  jvmtiError (JNICALL *FollowReferences) (jvmtiEnv* env,
+    jint heap_filter,
+    jclass klass,
+    jobject initial_object,
+    const jvmtiHeapCallbacks* callbacks,
+    const void* user_data);
+
+  /*   116 : Iterate Through Heap */
+  jvmtiError (JNICALL *IterateThroughHeap) (jvmtiEnv* env,
+    jint heap_filter,
+    jclass klass,
+    const jvmtiHeapCallbacks* callbacks,
+    const void* user_data);
+
+  /*   117 :  RESERVED */
+  void *reserved117;
+
+  /*   118 :  RESERVED */
+  void *reserved118;
+
+  /*   119 :  RESERVED */
+  void *reserved119;
+
+  /*   120 : Set JNI Function Table */
+  jvmtiError (JNICALL *SetJNIFunctionTable) (jvmtiEnv* env,
+    const jniNativeInterface* function_table);
+
+  /*   121 : Get JNI Function Table */
+  jvmtiError (JNICALL *GetJNIFunctionTable) (jvmtiEnv* env,
+    jniNativeInterface** function_table);
+
+  /*   122 : Set Event Callbacks */
+  jvmtiError (JNICALL *SetEventCallbacks) (jvmtiEnv* env,
+    const jvmtiEventCallbacks* callbacks,
+    jint size_of_callbacks);
+
+  /*   123 : Generate Events */
+  jvmtiError (JNICALL *GenerateEvents) (jvmtiEnv* env,
+    jvmtiEvent event_type);
+
+  /*   124 : Get Extension Functions */
+  jvmtiError (JNICALL *GetExtensionFunctions) (jvmtiEnv* env,
+    jint* extension_count_ptr,
+    jvmtiExtensionFunctionInfo** extensions);
+
+  /*   125 : Get Extension Events */
+  jvmtiError (JNICALL *GetExtensionEvents) (jvmtiEnv* env,
+    jint* extension_count_ptr,
+    jvmtiExtensionEventInfo** extensions);
+
+  /*   126 : Set Extension Event Callback */
+  jvmtiError (JNICALL *SetExtensionEventCallback) (jvmtiEnv* env,
+    jint extension_event_index,
+    jvmtiExtensionEvent callback);
+
+  /*   127 : Dispose Environment */
+  jvmtiError (JNICALL *DisposeEnvironment) (jvmtiEnv* env);
+
+  /*   128 : Get Error Name */
+  jvmtiError (JNICALL *GetErrorName) (jvmtiEnv* env,
+    jvmtiError error,
+    char** name_ptr);
+
+  /*   129 : Get JLocation Format */
+  jvmtiError (JNICALL *GetJLocationFormat) (jvmtiEnv* env,
+    jvmtiJlocationFormat* format_ptr);
+
+  /*   130 : Get System Properties */
+  jvmtiError (JNICALL *GetSystemProperties) (jvmtiEnv* env,
+    jint* count_ptr,
+    char*** property_ptr);
+
+  /*   131 : Get System Property */
+  jvmtiError (JNICALL *GetSystemProperty) (jvmtiEnv* env,
+    const char* property,
+    char** value_ptr);
+
+  /*   132 : Set System Property */
+  jvmtiError (JNICALL *SetSystemProperty) (jvmtiEnv* env,
+    const char* property,
+    const char* value);
+
+  /*   133 : Get Phase */
+  jvmtiError (JNICALL *GetPhase) (jvmtiEnv* env,
+    jvmtiPhase* phase_ptr);
+
+  /*   134 : Get Current Thread CPU Timer Information */
+  jvmtiError (JNICALL *GetCurrentThreadCpuTimerInfo) (jvmtiEnv* env,
+    jvmtiTimerInfo* info_ptr);
+
+  /*   135 : Get Current Thread CPU Time */
+  jvmtiError (JNICALL *GetCurrentThreadCpuTime) (jvmtiEnv* env,
+    jlong* nanos_ptr);
+
+  /*   136 : Get Thread CPU Timer Information */
+  jvmtiError (JNICALL *GetThreadCpuTimerInfo) (jvmtiEnv* env,
+    jvmtiTimerInfo* info_ptr);
+
+  /*   137 : Get Thread CPU Time */
+  jvmtiError (JNICALL *GetThreadCpuTime) (jvmtiEnv* env,
+    jthread thread,
+    jlong* nanos_ptr);
+
+  /*   138 : Get Timer Information */
+  jvmtiError (JNICALL *GetTimerInfo) (jvmtiEnv* env,
+    jvmtiTimerInfo* info_ptr);
+
+  /*   139 : Get Time */
+  jvmtiError (JNICALL *GetTime) (jvmtiEnv* env,
+    jlong* nanos_ptr);
+
+  /*   140 : Get Potential Capabilities */
+  jvmtiError (JNICALL *GetPotentialCapabilities) (jvmtiEnv* env,
+    jvmtiCapabilities* capabilities_ptr);
+
+  /*   141 :  RESERVED */
+  void *reserved141;
+
+  /*   142 : Add Capabilities */
+  jvmtiError (JNICALL *AddCapabilities) (jvmtiEnv* env,
+    const jvmtiCapabilities* capabilities_ptr);
+
+  /*   143 : Relinquish Capabilities */
+  jvmtiError (JNICALL *RelinquishCapabilities) (jvmtiEnv* env,
+    const jvmtiCapabilities* capabilities_ptr);
+
+  /*   144 : Get Available Processors */
+  jvmtiError (JNICALL *GetAvailableProcessors) (jvmtiEnv* env,
+    jint* processor_count_ptr);
+
+  /*   145 : Get Class Version Numbers */
+  jvmtiError (JNICALL *GetClassVersionNumbers) (jvmtiEnv* env,
+    jclass klass,
+    jint* minor_version_ptr,
+    jint* major_version_ptr);
+
+  /*   146 : Get Constant Pool */
+  jvmtiError (JNICALL *GetConstantPool) (jvmtiEnv* env,
+    jclass klass,
+    jint* constant_pool_count_ptr,
+    jint* constant_pool_byte_count_ptr,
+    unsigned char** constant_pool_bytes_ptr);
+
+  /*   147 : Get Environment Local Storage */
+  jvmtiError (JNICALL *GetEnvironmentLocalStorage) (jvmtiEnv* env,
+    void** data_ptr);
+
+  /*   148 : Set Environment Local Storage */
+  jvmtiError (JNICALL *SetEnvironmentLocalStorage) (jvmtiEnv* env,
+    const void* data);
+
+  /*   149 : Add To Bootstrap Class Loader Search */
+  jvmtiError (JNICALL *AddToBootstrapClassLoaderSearch) (jvmtiEnv* env,
+    const char* segment);
+
+  /*   150 : Set Verbose Flag */
+  jvmtiError (JNICALL *SetVerboseFlag) (jvmtiEnv* env,
+    jvmtiVerboseFlag flag,
+    jboolean value);
+
+  /*   151 : Add To System Class Loader Search */
+  jvmtiError (JNICALL *AddToSystemClassLoaderSearch) (jvmtiEnv* env,
+    const char* segment);
+
+  /*   152 : Retransform Classes */
+  jvmtiError (JNICALL *RetransformClasses) (jvmtiEnv* env,
+    jint class_count,
+    const jclass* classes);
+
+  /*   153 : Get Owned Monitor Stack Depth Info */
+  jvmtiError (JNICALL *GetOwnedMonitorStackDepthInfo) (jvmtiEnv* env,
+    jthread thread,
+    jint* monitor_info_count_ptr,
+    jvmtiMonitorStackDepthInfo** monitor_info_ptr);
+
+  /*   154 : Get Object Size */
+  jvmtiError (JNICALL *GetObjectSize) (jvmtiEnv* env,
+    jobject object,
+    jlong* size_ptr);
+
+  /*   155 : Get Local Instance */
+  jvmtiError (JNICALL *GetLocalInstance) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jobject* value_ptr);
+
+} jvmtiInterface_1;
+
+struct _jvmtiEnv {
+    const struct jvmtiInterface_1_ *functions;
+#ifdef __cplusplus
+
+
+  jvmtiError Allocate(jlong size,
+            unsigned char** mem_ptr) {
+    return functions->Allocate(this, size, mem_ptr);
+  }
+
+  jvmtiError Deallocate(unsigned char* mem) {
+    return functions->Deallocate(this, mem);
+  }
+
+  jvmtiError GetThreadState(jthread thread,
+            jint* thread_state_ptr) {
+    return functions->GetThreadState(this, thread, thread_state_ptr);
+  }
+
+  jvmtiError GetCurrentThread(jthread* thread_ptr) {
+    return functions->GetCurrentThread(this, thread_ptr);
+  }
+
+  jvmtiError GetAllThreads(jint* threads_count_ptr,
+            jthread** threads_ptr) {
+    return functions->GetAllThreads(this, threads_count_ptr, threads_ptr);
+  }
+
+  jvmtiError SuspendThread(jthread thread) {
+    return functions->SuspendThread(this, thread);
+  }
+
+  jvmtiError SuspendThreadList(jint request_count,
+            const jthread* request_list,
+            jvmtiError* results) {
+    return functions->SuspendThreadList(this, request_count, request_list, results);
+  }
+
+  jvmtiError ResumeThread(jthread thread) {
+    return functions->ResumeThread(this, thread);
+  }
+
+  jvmtiError ResumeThreadList(jint request_count,
+            const jthread* request_list,
+            jvmtiError* results) {
+    return functions->ResumeThreadList(this, request_count, request_list, results);
+  }
+
+  jvmtiError StopThread(jthread thread,
+            jobject exception) {
+    return functions->StopThread(this, thread, exception);
+  }
+
+  jvmtiError InterruptThread(jthread thread) {
+    return functions->InterruptThread(this, thread);
+  }
+
+  jvmtiError GetThreadInfo(jthread thread,
+            jvmtiThreadInfo* info_ptr) {
+    return functions->GetThreadInfo(this, thread, info_ptr);
+  }
+
+  jvmtiError GetOwnedMonitorInfo(jthread thread,
+            jint* owned_monitor_count_ptr,
+            jobject** owned_monitors_ptr) {
+    return functions->GetOwnedMonitorInfo(this, thread, owned_monitor_count_ptr, owned_monitors_ptr);
+  }
+
+  jvmtiError GetOwnedMonitorStackDepthInfo(jthread thread,
+            jint* monitor_info_count_ptr,
+            jvmtiMonitorStackDepthInfo** monitor_info_ptr) {
+    return functions->GetOwnedMonitorStackDepthInfo(this, thread, monitor_info_count_ptr, monitor_info_ptr);
+  }
+
+  jvmtiError GetCurrentContendedMonitor(jthread thread,
+            jobject* monitor_ptr) {
+    return functions->GetCurrentContendedMonitor(this, thread, monitor_ptr);
+  }
+
+  jvmtiError RunAgentThread(jthread thread,
+            jvmtiStartFunction proc,
+            const void* arg,
+            jint priority) {
+    return functions->RunAgentThread(this, thread, proc, arg, priority);
+  }
+
+  jvmtiError SetThreadLocalStorage(jthread thread,
+            const void* data) {
+    return functions->SetThreadLocalStorage(this, thread, data);
+  }
+
+  jvmtiError GetThreadLocalStorage(jthread thread,
+            void** data_ptr) {
+    return functions->GetThreadLocalStorage(this, thread, data_ptr);
+  }
+
+  jvmtiError GetTopThreadGroups(jint* group_count_ptr,
+            jthreadGroup** groups_ptr) {
+    return functions->GetTopThreadGroups(this, group_count_ptr, groups_ptr);
+  }
+
+  jvmtiError GetThreadGroupInfo(jthreadGroup group,
+            jvmtiThreadGroupInfo* info_ptr) {
+    return functions->GetThreadGroupInfo(this, group, info_ptr);
+  }
+
+  jvmtiError GetThreadGroupChildren(jthreadGroup group,
+            jint* thread_count_ptr,
+            jthread** threads_ptr,
+            jint* group_count_ptr,
+            jthreadGroup** groups_ptr) {
+    return functions->GetThreadGroupChildren(this, group, thread_count_ptr, threads_ptr, group_count_ptr, groups_ptr);
+  }
+
+  jvmtiError GetStackTrace(jthread thread,
+            jint start_depth,
+            jint max_frame_count,
+            jvmtiFrameInfo* frame_buffer,
+            jint* count_ptr) {
+    return functions->GetStackTrace(this, thread, start_depth, max_frame_count, frame_buffer, count_ptr);
+  }
+
+  jvmtiError GetAllStackTraces(jint max_frame_count,
+            jvmtiStackInfo** stack_info_ptr,
+            jint* thread_count_ptr) {
+    return functions->GetAllStackTraces(this, max_frame_count, stack_info_ptr, thread_count_ptr);
+  }
+
+  jvmtiError GetThreadListStackTraces(jint thread_count,
+            const jthread* thread_list,
+            jint max_frame_count,
+            jvmtiStackInfo** stack_info_ptr) {
+    return functions->GetThreadListStackTraces(this, thread_count, thread_list, max_frame_count, stack_info_ptr);
+  }
+
+  jvmtiError GetFrameCount(jthread thread,
+            jint* count_ptr) {
+    return functions->GetFrameCount(this, thread, count_ptr);
+  }
+
+  jvmtiError PopFrame(jthread thread) {
+    return functions->PopFrame(this, thread);
+  }
+
+  jvmtiError GetFrameLocation(jthread thread,
+            jint depth,
+            jmethodID* method_ptr,
+            jlocation* location_ptr) {
+    return functions->GetFrameLocation(this, thread, depth, method_ptr, location_ptr);
+  }
+
+  jvmtiError NotifyFramePop(jthread thread,
+            jint depth) {
+    return functions->NotifyFramePop(this, thread, depth);
+  }
+
+  jvmtiError ForceEarlyReturnObject(jthread thread,
+            jobject value) {
+    return functions->ForceEarlyReturnObject(this, thread, value);
+  }
+
+  jvmtiError ForceEarlyReturnInt(jthread thread,
+            jint value) {
+    return functions->ForceEarlyReturnInt(this, thread, value);
+  }
+
+  jvmtiError ForceEarlyReturnLong(jthread thread,
+            jlong value) {
+    return functions->ForceEarlyReturnLong(this, thread, value);
+  }
+
+  jvmtiError ForceEarlyReturnFloat(jthread thread,
+            jfloat value) {
+    return functions->ForceEarlyReturnFloat(this, thread, value);
+  }
+
+  jvmtiError ForceEarlyReturnDouble(jthread thread,
+            jdouble value) {
+    return functions->ForceEarlyReturnDouble(this, thread, value);
+  }
+
+  jvmtiError ForceEarlyReturnVoid(jthread thread) {
+    return functions->ForceEarlyReturnVoid(this, thread);
+  }
+
+  jvmtiError FollowReferences(jint heap_filter,
+            jclass klass,
+            jobject initial_object,
+            const jvmtiHeapCallbacks* callbacks,
+            const void* user_data) {
+    return functions->FollowReferences(this, heap_filter, klass, initial_object, callbacks, user_data);
+  }
+
+  jvmtiError IterateThroughHeap(jint heap_filter,
+            jclass klass,
+            const jvmtiHeapCallbacks* callbacks,
+            const void* user_data) {
+    return functions->IterateThroughHeap(this, heap_filter, klass, callbacks, user_data);
+  }
+
+  jvmtiError GetTag(jobject object,
+            jlong* tag_ptr) {
+    return functions->GetTag(this, object, tag_ptr);
+  }
+
+  jvmtiError SetTag(jobject object,
+            jlong tag) {
+    return functions->SetTag(this, object, tag);
+  }
+
+  jvmtiError GetObjectsWithTags(jint tag_count,
+            const jlong* tags,
+            jint* count_ptr,
+            jobject** object_result_ptr,
+            jlong** tag_result_ptr) {
+    return functions->GetObjectsWithTags(this, tag_count, tags, count_ptr, object_result_ptr, tag_result_ptr);
+  }
+
+  jvmtiError ForceGarbageCollection() {
+    return functions->ForceGarbageCollection(this);
+  }
+
+  jvmtiError IterateOverObjectsReachableFromObject(jobject object,
+            jvmtiObjectReferenceCallback object_reference_callback,
+            const void* user_data) {
+    return functions->IterateOverObjectsReachableFromObject(this, object, object_reference_callback, user_data);
+  }
+
+  jvmtiError IterateOverReachableObjects(jvmtiHeapRootCallback heap_root_callback,
+            jvmtiStackReferenceCallback stack_ref_callback,
+            jvmtiObjectReferenceCallback object_ref_callback,
+            const void* user_data) {
+    return functions->IterateOverReachableObjects(this, heap_root_callback, stack_ref_callback, object_ref_callback, user_data);
+  }
+
+  jvmtiError IterateOverHeap(jvmtiHeapObjectFilter object_filter,
+            jvmtiHeapObjectCallback heap_object_callback,
+            const void* user_data) {
+    return functions->IterateOverHeap(this, object_filter, heap_object_callback, user_data);
+  }
+
+  jvmtiError IterateOverInstancesOfClass(jclass klass,
+            jvmtiHeapObjectFilter object_filter,
+            jvmtiHeapObjectCallback heap_object_callback,
+            const void* user_data) {
+    return functions->IterateOverInstancesOfClass(this, klass, object_filter, heap_object_callback, user_data);
+  }
+
+  jvmtiError GetLocalObject(jthread thread,
+            jint depth,
+            jint slot,
+            jobject* value_ptr) {
+    return functions->GetLocalObject(this, thread, depth, slot, value_ptr);
+  }
+
+  jvmtiError GetLocalInstance(jthread thread,
+            jint depth,
+            jobject* value_ptr) {
+    return functions->GetLocalInstance(this, thread, depth, value_ptr);
+  }
+
+  jvmtiError GetLocalInt(jthread thread,
+            jint depth,
+            jint slot,
+            jint* value_ptr) {
+    return functions->GetLocalInt(this, thread, depth, slot, value_ptr);
+  }
+
+  jvmtiError GetLocalLong(jthread thread,
+            jint depth,
+            jint slot,
+            jlong* value_ptr) {
+    return functions->GetLocalLong(this, thread, depth, slot, value_ptr);
+  }
+
+  jvmtiError GetLocalFloat(jthread thread,
+            jint depth,
+            jint slot,
+            jfloat* value_ptr) {
+    return functions->GetLocalFloat(this, thread, depth, slot, value_ptr);
+  }
+
+  jvmtiError GetLocalDouble(jthread thread,
+            jint depth,
+            jint slot,
+            jdouble* value_ptr) {
+    return functions->GetLocalDouble(this, thread, depth, slot, value_ptr);
+  }
+
+  jvmtiError SetLocalObject(jthread thread,
+            jint depth,
+            jint slot,
+            jobject value) {
+    return functions->SetLocalObject(this, thread, depth, slot, value);
+  }
+
+  jvmtiError SetLocalInt(jthread thread,
+            jint depth,
+            jint slot,
+            jint value) {
+    return functions->SetLocalInt(this, thread, depth, slot, value);
+  }
+
+  jvmtiError SetLocalLong(jthread thread,
+            jint depth,
+            jint slot,
+            jlong value) {
+    return functions->SetLocalLong(this, thread, depth, slot, value);
+  }
+
+  jvmtiError SetLocalFloat(jthread thread,
+            jint depth,
+            jint slot,
+            jfloat value) {
+    return functions->SetLocalFloat(this, thread, depth, slot, value);
+  }
+
+  jvmtiError SetLocalDouble(jthread thread,
+            jint depth,
+            jint slot,
+            jdouble value) {
+    return functions->SetLocalDouble(this, thread, depth, slot, value);
+  }
+
+  jvmtiError SetBreakpoint(jmethodID method,
+            jlocation location) {
+    return functions->SetBreakpoint(this, method, location);
+  }
+
+  jvmtiError ClearBreakpoint(jmethodID method,
+            jlocation location) {
+    return functions->ClearBreakpoint(this, method, location);
+  }
+
+  jvmtiError SetFieldAccessWatch(jclass klass,
+            jfieldID field) {
+    return functions->SetFieldAccessWatch(this, klass, field);
+  }
+
+  jvmtiError ClearFieldAccessWatch(jclass klass,
+            jfieldID field) {
+    return functions->ClearFieldAccessWatch(this, klass, field);
+  }
+
+  jvmtiError SetFieldModificationWatch(jclass klass,
+            jfieldID field) {
+    return functions->SetFieldModificationWatch(this, klass, field);
+  }
+
+  jvmtiError ClearFieldModificationWatch(jclass klass,
+            jfieldID field) {
+    return functions->ClearFieldModificationWatch(this, klass, field);
+  }
+
+  jvmtiError GetLoadedClasses(jint* class_count_ptr,
+            jclass** classes_ptr) {
+    return functions->GetLoadedClasses(this, class_count_ptr, classes_ptr);
+  }
+
+  jvmtiError GetClassLoaderClasses(jobject initiating_loader,
+            jint* class_count_ptr,
+            jclass** classes_ptr) {
+    return functions->GetClassLoaderClasses(this, initiating_loader, class_count_ptr, classes_ptr);
+  }
+
+  jvmtiError GetClassSignature(jclass klass,
+            char** signature_ptr,
+            char** generic_ptr) {
+    return functions->GetClassSignature(this, klass, signature_ptr, generic_ptr);
+  }
+
+  jvmtiError GetClassStatus(jclass klass,
+            jint* status_ptr) {
+    return functions->GetClassStatus(this, klass, status_ptr);
+  }
+
+  jvmtiError GetSourceFileName(jclass klass,
+            char** source_name_ptr) {
+    return functions->GetSourceFileName(this, klass, source_name_ptr);
+  }
+
+  jvmtiError GetClassModifiers(jclass klass,
+            jint* modifiers_ptr) {
+    return functions->GetClassModifiers(this, klass, modifiers_ptr);
+  }
+
+  jvmtiError GetClassMethods(jclass klass,
+            jint* method_count_ptr,
+            jmethodID** methods_ptr) {
+    return functions->GetClassMethods(this, klass, method_count_ptr, methods_ptr);
+  }
+
+  jvmtiError GetClassFields(jclass klass,
+            jint* field_count_ptr,
+            jfieldID** fields_ptr) {
+    return functions->GetClassFields(this, klass, field_count_ptr, fields_ptr);
+  }
+
+  jvmtiError GetImplementedInterfaces(jclass klass,
+            jint* interface_count_ptr,
+            jclass** interfaces_ptr) {
+    return functions->GetImplementedInterfaces(this, klass, interface_count_ptr, interfaces_ptr);
+  }
+
+  jvmtiError GetClassVersionNumbers(jclass klass,
+            jint* minor_version_ptr,
+            jint* major_version_ptr) {
+    return functions->GetClassVersionNumbers(this, klass, minor_version_ptr, major_version_ptr);
+  }
+
+  jvmtiError GetConstantPool(jclass klass,
+            jint* constant_pool_count_ptr,
+            jint* constant_pool_byte_count_ptr,
+            unsigned char** constant_pool_bytes_ptr) {
+    return functions->GetConstantPool(this, klass, constant_pool_count_ptr, constant_pool_byte_count_ptr, constant_pool_bytes_ptr);
+  }
+
+  jvmtiError IsInterface(jclass klass,
+            jboolean* is_interface_ptr) {
+    return functions->IsInterface(this, klass, is_interface_ptr);
+  }
+
+  jvmtiError IsArrayClass(jclass klass,
+            jboolean* is_array_class_ptr) {
+    return functions->IsArrayClass(this, klass, is_array_class_ptr);
+  }
+
+  jvmtiError IsModifiableClass(jclass klass,
+            jboolean* is_modifiable_class_ptr) {
+    return functions->IsModifiableClass(this, klass, is_modifiable_class_ptr);
+  }
+
+  jvmtiError GetClassLoader(jclass klass,
+            jobject* classloader_ptr) {
+    return functions->GetClassLoader(this, klass, classloader_ptr);
+  }
+
+  jvmtiError GetSourceDebugExtension(jclass klass,
+            char** source_debug_extension_ptr) {
+    return functions->GetSourceDebugExtension(this, klass, source_debug_extension_ptr);
+  }
+
+  jvmtiError RetransformClasses(jint class_count,
+            const jclass* classes) {
+    return functions->RetransformClasses(this, class_count, classes);
+  }
+
+  jvmtiError RedefineClasses(jint class_count,
+            const jvmtiClassDefinition* class_definitions) {
+    return functions->RedefineClasses(this, class_count, class_definitions);
+  }
+
+  jvmtiError GetObjectSize(jobject object,
+            jlong* size_ptr) {
+    return functions->GetObjectSize(this, object, size_ptr);
+  }
+
+  jvmtiError GetObjectHashCode(jobject object,
+            jint* hash_code_ptr) {
+    return functions->GetObjectHashCode(this, object, hash_code_ptr);
+  }
+
+  jvmtiError GetObjectMonitorUsage(jobject object,
+            jvmtiMonitorUsage* info_ptr) {
+    return functions->GetObjectMonitorUsage(this, object, info_ptr);
+  }
+
+  jvmtiError GetFieldName(jclass klass,
+            jfieldID field,
+            char** name_ptr,
+            char** signature_ptr,
+            char** generic_ptr) {
+    return functions->GetFieldName(this, klass, field, name_ptr, signature_ptr, generic_ptr);
+  }
+
+  jvmtiError GetFieldDeclaringClass(jclass klass,
+            jfieldID field,
+            jclass* declaring_class_ptr) {
+    return functions->GetFieldDeclaringClass(this, klass, field, declaring_class_ptr);
+  }
+
+  jvmtiError GetFieldModifiers(jclass klass,
+            jfieldID field,
+            jint* modifiers_ptr) {
+    return functions->GetFieldModifiers(this, klass, field, modifiers_ptr);
+  }
+
+  jvmtiError IsFieldSynthetic(jclass klass,
+            jfieldID field,
+            jboolean* is_synthetic_ptr) {
+    return functions->IsFieldSynthetic(this, klass, field, is_synthetic_ptr);
+  }
+
+  jvmtiError GetMethodName(jmethodID method,
+            char** name_ptr,
+            char** signature_ptr,
+            char** generic_ptr) {
+    return functions->GetMethodName(this, method, name_ptr, signature_ptr, generic_ptr);
+  }
+
+  jvmtiError GetMethodDeclaringClass(jmethodID method,
+            jclass* declaring_class_ptr) {
+    return functions->GetMethodDeclaringClass(this, method, declaring_class_ptr);
+  }
+
+  jvmtiError GetMethodModifiers(jmethodID method,
+            jint* modifiers_ptr) {
+    return functions->GetMethodModifiers(this, method, modifiers_ptr);
+  }
+
+  jvmtiError GetMaxLocals(jmethodID method,
+            jint* max_ptr) {
+    return functions->GetMaxLocals(this, method, max_ptr);
+  }
+
+  jvmtiError GetArgumentsSize(jmethodID method,
+            jint* size_ptr) {
+    return functions->GetArgumentsSize(this, method, size_ptr);
+  }
+
+  jvmtiError GetLineNumberTable(jmethodID method,
+            jint* entry_count_ptr,
+            jvmtiLineNumberEntry** table_ptr) {
+    return functions->GetLineNumberTable(this, method, entry_count_ptr, table_ptr);
+  }
+
+  jvmtiError GetMethodLocation(jmethodID method,
+            jlocation* start_location_ptr,
+            jlocation* end_location_ptr) {
+    return functions->GetMethodLocation(this, method, start_location_ptr, end_location_ptr);
+  }
+
+  jvmtiError GetLocalVariableTable(jmethodID method,
+            jint* entry_count_ptr,
+            jvmtiLocalVariableEntry** table_ptr) {
+    return functions->GetLocalVariableTable(this, method, entry_count_ptr, table_ptr);
+  }
+
+  jvmtiError GetBytecodes(jmethodID method,
+            jint* bytecode_count_ptr,
+            unsigned char** bytecodes_ptr) {
+    return functions->GetBytecodes(this, method, bytecode_count_ptr, bytecodes_ptr);
+  }
+
+  jvmtiError IsMethodNative(jmethodID method,
+            jboolean* is_native_ptr) {
+    return functions->IsMethodNative(this, method, is_native_ptr);
+  }
+
+  jvmtiError IsMethodSynthetic(jmethodID method,
+            jboolean* is_synthetic_ptr) {
+    return functions->IsMethodSynthetic(this, method, is_synthetic_ptr);
+  }
+
+  jvmtiError IsMethodObsolete(jmethodID method,
+            jboolean* is_obsolete_ptr) {
+    return functions->IsMethodObsolete(this, method, is_obsolete_ptr);
+  }
+
+  jvmtiError SetNativeMethodPrefix(const char* prefix) {
+    return functions->SetNativeMethodPrefix(this, prefix);
+  }
+
+  jvmtiError SetNativeMethodPrefixes(jint prefix_count,
+            char** prefixes) {
+    return functions->SetNativeMethodPrefixes(this, prefix_count, prefixes);
+  }
+
+  jvmtiError CreateRawMonitor(const char* name,
+            jrawMonitorID* monitor_ptr) {
+    return functions->CreateRawMonitor(this, name, monitor_ptr);
+  }
+
+  jvmtiError DestroyRawMonitor(jrawMonitorID monitor) {
+    return functions->DestroyRawMonitor(this, monitor);
+  }
+
+  jvmtiError RawMonitorEnter(jrawMonitorID monitor) {
+    return functions->RawMonitorEnter(this, monitor);
+  }
+
+  jvmtiError RawMonitorExit(jrawMonitorID monitor) {
+    return functions->RawMonitorExit(this, monitor);
+  }
+
+  jvmtiError RawMonitorWait(jrawMonitorID monitor,
+            jlong millis) {
+    return functions->RawMonitorWait(this, monitor, millis);
+  }
+
+  jvmtiError RawMonitorNotify(jrawMonitorID monitor) {
+    return functions->RawMonitorNotify(this, monitor);
+  }
+
+  jvmtiError RawMonitorNotifyAll(jrawMonitorID monitor) {
+    return functions->RawMonitorNotifyAll(this, monitor);
+  }
+
+  jvmtiError SetJNIFunctionTable(const jniNativeInterface* function_table) {
+    return functions->SetJNIFunctionTable(this, function_table);
+  }
+
+  jvmtiError GetJNIFunctionTable(jniNativeInterface** function_table) {
+    return functions->GetJNIFunctionTable(this, function_table);
+  }
+
+  jvmtiError SetEventCallbacks(const jvmtiEventCallbacks* callbacks,
+            jint size_of_callbacks) {
+    return functions->SetEventCallbacks(this, callbacks, size_of_callbacks);
+  }
+
+  jvmtiError SetEventNotificationMode(jvmtiEventMode mode,
+            jvmtiEvent event_type,
+            jthread event_thread,
+             ...) {
+    return functions->SetEventNotificationMode(this, mode, event_type, event_thread);
+  }
+
+  jvmtiError GenerateEvents(jvmtiEvent event_type) {
+    return functions->GenerateEvents(this, event_type);
+  }
+
+  jvmtiError GetExtensionFunctions(jint* extension_count_ptr,
+            jvmtiExtensionFunctionInfo** extensions) {
+    return functions->GetExtensionFunctions(this, extension_count_ptr, extensions);
+  }
+
+  jvmtiError GetExtensionEvents(jint* extension_count_ptr,
+            jvmtiExtensionEventInfo** extensions) {
+    return functions->GetExtensionEvents(this, extension_count_ptr, extensions);
+  }
+
+  jvmtiError SetExtensionEventCallback(jint extension_event_index,
+            jvmtiExtensionEvent callback) {
+    return functions->SetExtensionEventCallback(this, extension_event_index, callback);
+  }
+
+  jvmtiError GetPotentialCapabilities(jvmtiCapabilities* capabilities_ptr) {
+    return functions->GetPotentialCapabilities(this, capabilities_ptr);
+  }
+
+  jvmtiError AddCapabilities(const jvmtiCapabilities* capabilities_ptr) {
+    return functions->AddCapabilities(this, capabilities_ptr);
+  }
+
+  jvmtiError RelinquishCapabilities(const jvmtiCapabilities* capabilities_ptr) {
+    return functions->RelinquishCapabilities(this, capabilities_ptr);
+  }
+
+  jvmtiError GetCapabilities(jvmtiCapabilities* capabilities_ptr) {
+    return functions->GetCapabilities(this, capabilities_ptr);
+  }
+
+  jvmtiError GetCurrentThreadCpuTimerInfo(jvmtiTimerInfo* info_ptr) {
+    return functions->GetCurrentThreadCpuTimerInfo(this, info_ptr);
+  }
+
+  jvmtiError GetCurrentThreadCpuTime(jlong* nanos_ptr) {
+    return functions->GetCurrentThreadCpuTime(this, nanos_ptr);
+  }
+
+  jvmtiError GetThreadCpuTimerInfo(jvmtiTimerInfo* info_ptr) {
+    return functions->GetThreadCpuTimerInfo(this, info_ptr);
+  }
+
+  jvmtiError GetThreadCpuTime(jthread thread,
+            jlong* nanos_ptr) {
+    return functions->GetThreadCpuTime(this, thread, nanos_ptr);
+  }
+
+  jvmtiError GetTimerInfo(jvmtiTimerInfo* info_ptr) {
+    return functions->GetTimerInfo(this, info_ptr);
+  }
+
+  jvmtiError GetTime(jlong* nanos_ptr) {
+    return functions->GetTime(this, nanos_ptr);
+  }
+
+  jvmtiError GetAvailableProcessors(jint* processor_count_ptr) {
+    return functions->GetAvailableProcessors(this, processor_count_ptr);
+  }
+
+  jvmtiError AddToBootstrapClassLoaderSearch(const char* segment) {
+    return functions->AddToBootstrapClassLoaderSearch(this, segment);
+  }
+
+  jvmtiError AddToSystemClassLoaderSearch(const char* segment) {
+    return functions->AddToSystemClassLoaderSearch(this, segment);
+  }
+
+  jvmtiError GetSystemProperties(jint* count_ptr,
+            char*** property_ptr) {
+    return functions->GetSystemProperties(this, count_ptr, property_ptr);
+  }
+
+  jvmtiError GetSystemProperty(const char* property,
+            char** value_ptr) {
+    return functions->GetSystemProperty(this, property, value_ptr);
+  }
+
+  jvmtiError SetSystemProperty(const char* property,
+            const char* value) {
+    return functions->SetSystemProperty(this, property, value);
+  }
+
+  jvmtiError GetPhase(jvmtiPhase* phase_ptr) {
+    return functions->GetPhase(this, phase_ptr);
+  }
+
+  jvmtiError DisposeEnvironment() {
+    return functions->DisposeEnvironment(this);
+  }
+
+  jvmtiError SetEnvironmentLocalStorage(const void* data) {
+    return functions->SetEnvironmentLocalStorage(this, data);
+  }
+
+  jvmtiError GetEnvironmentLocalStorage(void** data_ptr) {
+    return functions->GetEnvironmentLocalStorage(this, data_ptr);
+  }
+
+  jvmtiError GetVersionNumber(jint* version_ptr) {
+    return functions->GetVersionNumber(this, version_ptr);
+  }
+
+  jvmtiError GetErrorName(jvmtiError error,
+            char** name_ptr) {
+    return functions->GetErrorName(this, error, name_ptr);
+  }
+
+  jvmtiError SetVerboseFlag(jvmtiVerboseFlag flag,
+            jboolean value) {
+    return functions->SetVerboseFlag(this, flag, value);
+  }
+
+  jvmtiError GetJLocationFormat(jvmtiJlocationFormat* format_ptr) {
+    return functions->GetJLocationFormat(this, format_ptr);
+  }
+
+#endif /* __cplusplus */
+};
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* !_JAVA_JVMTI_H_ */
+
diff --git a/runtime/os.h b/runtime/os.h
index befe2e8..46d89fb 100644
--- a/runtime/os.h
+++ b/runtime/os.h
@@ -39,6 +39,10 @@
   // already exists, it is *not* overwritten, but unlinked, and a new inode will be used.
   static File* CreateEmptyFile(const char* name);
 
+  // Create an empty file with write access. This is a *new* file, that is, if the file
+  // already exists, it is *not* overwritten, but unlinked, and a new inode will be used.
+  static File* CreateEmptyFileWriteOnly(const char* name);
+
   // Open a file with the specified open(2) flags.
   static File* OpenFileWithFlags(const char* name, int flags);
 
diff --git a/runtime/os_linux.cc b/runtime/os_linux.cc
index 675699d..1db09b4 100644
--- a/runtime/os_linux.cc
+++ b/runtime/os_linux.cc
@@ -35,18 +35,27 @@
   return OpenFileWithFlags(name, O_RDWR);
 }
 
-File* OS::CreateEmptyFile(const char* name) {
+static File* CreateEmptyFile(const char* name, int extra_flags) {
   // In case the file exists, unlink it so we get a new file. This is necessary as the previous
   // file may be in use and must not be changed.
   unlink(name);
 
-  return OpenFileWithFlags(name, O_RDWR | O_CREAT | O_TRUNC);
+  return OS::OpenFileWithFlags(name, O_CREAT | extra_flags);
+}
+
+File* OS::CreateEmptyFile(const char* name) {
+  return art::CreateEmptyFile(name, O_RDWR | O_TRUNC);
+}
+
+File* OS::CreateEmptyFileWriteOnly(const char* name) {
+  return art::CreateEmptyFile(name, O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC);
 }
 
 File* OS::OpenFileWithFlags(const char* name, int flags) {
   CHECK(name != nullptr);
-  std::unique_ptr<File> file(new File);
-  if (!file->Open(name, flags, 0666)) {
+  bool read_only = ((flags & O_ACCMODE) == O_RDONLY);
+  std::unique_ptr<File> file(new File(name, flags, 0666, !read_only));
+  if (!file->IsOpened()) {
     return nullptr;
   }
   return file.release();
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index ae16c7f..4f70b04 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -23,6 +23,7 @@
 #include "gc/heap.h"
 #include "monitor.h"
 #include "runtime.h"
+#include "ti/agent.h"
 #include "trace.h"
 #include "utils.h"
 
@@ -41,15 +42,13 @@
                                                     // Runtime::Abort
 }
 
-ParsedOptions* ParsedOptions::Create(const RuntimeOptions& options, bool ignore_unrecognized,
-                                     RuntimeArgumentMap* runtime_options) {
+bool ParsedOptions::Parse(const RuntimeOptions& options,
+                          bool ignore_unrecognized,
+                          RuntimeArgumentMap* runtime_options) {
   CHECK(runtime_options != nullptr);
 
-  std::unique_ptr<ParsedOptions> parsed(new ParsedOptions());
-  if (parsed->Parse(options, ignore_unrecognized, runtime_options)) {
-    return parsed.release();
-  }
-  return nullptr;
+  ParsedOptions parser;
+  return parser.DoParse(options, ignore_unrecognized, runtime_options);
 }
 
 using RuntimeParser = CmdlineParser<RuntimeArgumentMap, RuntimeArgumentMap::Key>;
@@ -92,6 +91,13 @@
       .Define({"-Xrunjdwp:_", "-agentlib:jdwp=_"})
           .WithType<JDWP::JdwpOptions>()
           .IntoKey(M::JdwpOptions)
+      // TODO Re-enable -agentlib: once I have a good way to transform the values.
+      // .Define("-agentlib:_")
+      //     .WithType<std::vector<ti::Agent>>().AppendValues()
+      //     .IntoKey(M::AgentLib)
+      .Define("-agentpath:_")
+          .WithType<std::vector<ti::Agent>>().AppendValues()
+          .IntoKey(M::AgentPath)
       .Define("-Xms_")
           .WithType<MemoryKiB>()
           .IntoKey(M::MemoryInitialSize)
@@ -148,19 +154,43 @@
       .Define({"-XX:EnableHSpaceCompactForOOM", "-XX:DisableHSpaceCompactForOOM"})
           .WithValues({true, false})
           .IntoKey(M::EnableHSpaceCompactForOOM)
+      .Define("-XX:DumpNativeStackOnSigQuit:_")
+          .WithType<bool>()
+          .WithValueMap({{"false", false}, {"true", true}})
+          .IntoKey(M::DumpNativeStackOnSigQuit)
       .Define("-Xusejit:_")
           .WithType<bool>()
           .WithValueMap({{"false", false}, {"true", true}})
-          .IntoKey(M::UseJIT)
-      .Define("-Xjitcodecachesize:_")
+          .IntoKey(M::UseJitCompilation)
+      .Define("-Xjitinitialsize:_")
           .WithType<MemoryKiB>()
-          .IntoKey(M::JITCodeCacheCapacity)
+          .IntoKey(M::JITCodeCacheInitialCapacity)
+      .Define("-Xjitmaxsize:_")
+          .WithType<MemoryKiB>()
+          .IntoKey(M::JITCodeCacheMaxCapacity)
       .Define("-Xjitthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::JITCompileThreshold)
       .Define("-Xjitwarmupthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::JITWarmupThreshold)
+      .Define("-Xjitosrthreshold:_")
+          .WithType<unsigned int>()
+          .IntoKey(M::JITOsrThreshold)
+      .Define("-Xjitprithreadweight:_")
+          .WithType<unsigned int>()
+          .IntoKey(M::JITPriorityThreadWeight)
+      .Define("-Xjittransitionweight:_")
+          .WithType<unsigned int>()
+          .IntoKey(M::JITInvokeTransitionWeight)
+      .Define("-Xjitsaveprofilinginfo")
+          .WithType<ProfileSaverOptions>()
+          .AppendValues()
+          .IntoKey(M::ProfileSaverOpts)
+      .Define("-Xps-_")  // profile saver options -Xps-<key>:<value>
+          .WithType<ProfileSaverOptions>()
+          .AppendValues()
+          .IntoKey(M::ProfileSaverOpts)  // NOTE: Appends into same key as -Xjitsaveprofilinginfo
       .Define("-XX:HspaceCompactForOOMMinIntervalMs=_")  // in ms
           .WithType<MillisecondsToNanoseconds>()  // store as ns
           .IntoKey(M::HSpaceCompactForOOMMinIntervalsMs)
@@ -227,14 +257,6 @@
                          {"wallclock",      TraceClockSource::kWall},
                          {"dualclock",      TraceClockSource::kDual}})
           .IntoKey(M::ProfileClock)
-      .Define("-Xenable-profiler")
-          .WithType<TestProfilerOptions>()
-          .AppendValues()
-          .IntoKey(M::ProfilerOpts)  // NOTE: Appends into same key as -Xprofile-*
-      .Define("-Xprofile-_")  // -Xprofile-<key>:<value>
-          .WithType<TestProfilerOptions>()
-          .AppendValues()
-          .IntoKey(M::ProfilerOpts)  // NOTE: Appends into same key as -Xenable-profiler
       .Define("-Xcompiler:_")
           .WithType<std::string>()
           .IntoKey(M::Compiler)
@@ -273,6 +295,11 @@
           .WithType<ExperimentalFlags>()
           .AppendValues()
           .IntoKey(M::Experimental)
+      .Define("-Xforce-nb-testing")
+          .IntoKey(M::ForceNativeBridge)
+      .Define("-Xplugin:_")
+          .WithType<std::vector<Plugin>>().AppendValues()
+          .IntoKey(M::Plugins)
       .Ignore({
           "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
           "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",
@@ -305,8 +332,8 @@
     const std::string option(options[i].first);
       // TODO: support -Djava.class.path
     if (option == "bootclasspath") {
-      auto boot_class_path
-          = reinterpret_cast<const std::vector<const DexFile*>*>(options[i].second);
+      auto boot_class_path = static_cast<std::vector<std::unique_ptr<const DexFile>>*>(
+          const_cast<void*>(options[i].second));
 
       if (runtime_options != nullptr) {
         runtime_options->Set(M::BootClassPathDexList, boot_class_path);
@@ -384,6 +411,7 @@
 // Intended for local changes only.
 static void MaybeOverrideVerbosity() {
   //  gLogVerbosity.class_linker = true;  // TODO: don't check this in!
+  //  gLogVerbosity.collector = true;  // TODO: don't check this in!
   //  gLogVerbosity.compiler = true;  // TODO: don't check this in!
   //  gLogVerbosity.deopt = true;  // TODO: don't check this in!
   //  gLogVerbosity.gc = true;  // TODO: don't check this in!
@@ -395,14 +423,16 @@
   //  gLogVerbosity.oat = true;  // TODO: don't check this in!
   //  gLogVerbosity.profiler = true;  // TODO: don't check this in!
   //  gLogVerbosity.signals = true;  // TODO: don't check this in!
+  //  gLogVerbosity.simulator = true; // TODO: don't check this in!
   //  gLogVerbosity.startup = true;  // TODO: don't check this in!
   //  gLogVerbosity.third_party_jni = true;  // TODO: don't check this in!
   //  gLogVerbosity.threads = true;  // TODO: don't check this in!
   //  gLogVerbosity.verifier = true;  // TODO: don't check this in!
 }
 
-bool ParsedOptions::Parse(const RuntimeOptions& options, bool ignore_unrecognized,
-                          RuntimeArgumentMap* runtime_options) {
+bool ParsedOptions::DoParse(const RuntimeOptions& options,
+                            bool ignore_unrecognized,
+                            RuntimeArgumentMap* runtime_options) {
   for (size_t i = 0; i < options.size(); ++i) {
     if (true && options[0].first == "-Xzygote") {
       LOG(INFO) << "option[" << i << "]=" << options[i].first;
@@ -449,6 +479,11 @@
     LOG(INFO) << "setting boot class path to " << *args.Get(M::BootClassPath);
   }
 
+  if (args.GetOrDefault(M::UseJitCompilation) && args.GetOrDefault(M::Interpret)) {
+    Usage("-Xusejit:true and -Xint cannot be specified together");
+    Exit(0);
+  }
+
   // Set a default boot class path if we didn't get an explicit one via command line.
   if (getenv("BOOTCLASSPATH") != nullptr) {
     args.SetIfMissing(M::BootClassPath, std::string(getenv("BOOTCLASSPATH")));
@@ -463,7 +498,7 @@
   args.SetIfMissing(M::ParallelGCThreads, gc::Heap::kDefaultEnableParallelGC ?
       static_cast<unsigned int>(sysconf(_SC_NPROCESSORS_CONF) - 1u) : 0u);
 
-  // -Xverbose:
+  // -verbose:
   {
     LogVerbosity *log_verbosity = args.Get(M::Verbose);
     if (log_verbosity != nullptr) {
@@ -553,21 +588,46 @@
     args.Set(M::Image, image);
   }
 
-  if (args.GetOrDefault(M::HeapGrowthLimit) == 0u) {  // 0 means no growth limit
+  // 0 means no growth limit, and growth limit should be always <= heap size
+  if (args.GetOrDefault(M::HeapGrowthLimit) <= 0u ||
+      args.GetOrDefault(M::HeapGrowthLimit) > args.GetOrDefault(M::MemoryMaximumSize)) {
     args.Set(M::HeapGrowthLimit, args.GetOrDefault(M::MemoryMaximumSize));
   }
 
-  if (args.GetOrDefault(M::Experimental) & ExperimentalFlags::kDefaultMethods) {
-    LOG(WARNING) << "Default method support has been enabled. The verifier will be less strict "
-                 << "in some cases. All existing invoke opcodes have an unstable updated "
-                 << "specification and are nearly guaranteed to change over time. Do not attempt "
-                 << "to write shipping code against the invoke opcodes with this flag.";
+  if (args.GetOrDefault(M::Experimental) & ExperimentalFlags::kRuntimePlugins) {
+    LOG(WARNING) << "Experimental runtime plugin support has been enabled. No guarantees are made "
+                 << "about stability or usage of this plugin support. Use at your own risk. Do "
+                 << "not attempt to write shipping code that relies on the implementation of "
+                 << "runtime plugins.";
+  } else if (!args.GetOrDefault(M::Plugins).empty()) {
+    LOG(WARNING) << "Experimental runtime plugin support has not been enabled. Ignored options: ";
+    for (auto& op : args.GetOrDefault(M::Plugins)) {
+      LOG(WARNING) << "    -plugin:" << op.GetLibrary();
+    }
   }
 
-  if (args.GetOrDefault(M::Experimental) & ExperimentalFlags::kLambdas) {
-    LOG(WARNING) << "Experimental lambdas have been enabled. All lambda opcodes have "
-                 << "an unstable specification and are nearly guaranteed to change over time. "
-                 << "Do not attempt to write shipping code against these opcodes.";
+  if (args.GetOrDefault(M::Experimental) & ExperimentalFlags::kAgents) {
+    LOG(WARNING) << "Experimental runtime agent support has been enabled. No guarantees are made "
+                 << "the completeness, accuracy, reliability, or stability of the agent "
+                 << "implementation. Use at your own risk. Do not attempt to write shipping code "
+                 << "that relies on the implementation of any part of this api.";
+  } else if (!args.GetOrDefault(M::AgentLib).empty() || !args.GetOrDefault(M::AgentPath).empty()) {
+    LOG(WARNING) << "agent support has not been enabled. Enable experimental agent "
+                 << " support with '-XExperimental:agent'. Ignored options are:";
+    for (auto op : args.GetOrDefault(M::AgentLib)) {
+      if (op.HasArgs()) {
+        LOG(WARNING) << "    -agentlib:" << op.GetName() << "=" << op.GetArgs();
+      } else {
+        LOG(WARNING) << "    -agentlib:" << op.GetName();
+      }
+    }
+    for (auto op : args.GetOrDefault(M::AgentPath)) {
+      if (op.HasArgs()) {
+        LOG(WARNING) << "    -agentpath:" << op.GetName() << "=" << op.GetArgs();
+      } else {
+        LOG(WARNING) << "    -agentpath:" << op.GetName();
+      }
+    }
   }
 
   *runtime_options = std::move(args);
@@ -614,6 +674,11 @@
   UsageMessage(stream, "  -showversion\n");
   UsageMessage(stream, "  -help\n");
   UsageMessage(stream, "  -agentlib:jdwp=options\n");
+  // TODO add back in once -agentlib actually does something.
+  // UsageMessage(stream, "  -agentlib:library=options (Experimental feature, "
+  //                      "requires -Xexperimental:agent, some features might not be supported)\n");
+  UsageMessage(stream, "  -agentpath:library_path=options (Experimental feature, "
+                       "requires -Xexperimental:agent, some features might not be supported)\n");
   UsageMessage(stream, "\n");
 
   UsageMessage(stream, "The following extended options are supported:\n");
@@ -640,7 +705,6 @@
   UsageMessage(stream, "  -XX:ForegroundHeapGrowthMultiplier=doublevalue\n");
   UsageMessage(stream, "  -XX:LowMemoryMode\n");
   UsageMessage(stream, "  -Xprofile:{threadcpuclock,wallclock,dualclock}\n");
-  UsageMessage(stream, "  -Xjitcodecachesize:N\n");
   UsageMessage(stream, "  -Xjitthreshold:integervalue\n");
   UsageMessage(stream, "\n");
 
@@ -665,32 +729,38 @@
   UsageMessage(stream, "  -XX:BackgroundGC=none\n");
   UsageMessage(stream, "  -XX:LargeObjectSpace={disabled,map,freelist}\n");
   UsageMessage(stream, "  -XX:LargeObjectThreshold=N\n");
+  UsageMessage(stream, "  -XX:DumpNativeStackOnSigQuit=booleanvalue\n");
   UsageMessage(stream, "  -Xmethod-trace\n");
   UsageMessage(stream, "  -Xmethod-trace-file:filename");
   UsageMessage(stream, "  -Xmethod-trace-file-size:integervalue\n");
-  UsageMessage(stream, "  -Xenable-profiler\n");
-  UsageMessage(stream, "  -Xprofile-filename:filename\n");
-  UsageMessage(stream, "  -Xprofile-period:integervalue\n");
-  UsageMessage(stream, "  -Xprofile-duration:integervalue\n");
-  UsageMessage(stream, "  -Xprofile-interval:integervalue\n");
-  UsageMessage(stream, "  -Xprofile-backoff:doublevalue\n");
-  UsageMessage(stream, "  -Xprofile-start-immediately\n");
-  UsageMessage(stream, "  -Xprofile-top-k-threshold:doublevalue\n");
-  UsageMessage(stream, "  -Xprofile-top-k-change-threshold:doublevalue\n");
-  UsageMessage(stream, "  -Xprofile-type:{method,stack}\n");
-  UsageMessage(stream, "  -Xprofile-max-stack-depth:integervalue\n");
+  UsageMessage(stream, "  -Xps-min-save-period-ms:integervalue\n");
+  UsageMessage(stream, "  -Xps-save-resolved-classes-delay-ms:integervalue\n");
+  UsageMessage(stream, "  -Xps-startup-method-samples:integervalue\n");
+  UsageMessage(stream, "  -Xps-min-methods-to-save:integervalue\n");
+  UsageMessage(stream, "  -Xps-min-classes-to-save:integervalue\n");
+  UsageMessage(stream, "  -Xps-min-notification-before-wake:integervalue\n");
+  UsageMessage(stream, "  -Xps-max-notification-before-wake:integervalue\n");
   UsageMessage(stream, "  -Xcompiler:filename\n");
   UsageMessage(stream, "  -Xcompiler-option dex2oat-option\n");
   UsageMessage(stream, "  -Ximage-compiler-option dex2oat-option\n");
   UsageMessage(stream, "  -Xpatchoat:filename\n");
   UsageMessage(stream, "  -Xusejit:booleanvalue\n");
+  UsageMessage(stream, "  -Xjitinitialsize:N\n");
+  UsageMessage(stream, "  -Xjitmaxsize:N\n");
+  UsageMessage(stream, "  -Xjitwarmupthreshold:integervalue\n");
+  UsageMessage(stream, "  -Xjitosrthreshold:integervalue\n");
+  UsageMessage(stream, "  -Xjitprithreadweight:integervalue\n");
   UsageMessage(stream, "  -X[no]relocate\n");
   UsageMessage(stream, "  -X[no]dex2oat (Whether to invoke dex2oat on the application)\n");
   UsageMessage(stream, "  -X[no]image-dex2oat (Whether to create and use a boot image)\n");
   UsageMessage(stream, "  -Xno-dex-file-fallback "
                        "(Don't fall back to dex files without oat files)\n");
-  UsageMessage(stream, "  -Xexperimental:{lambdas,default-methods} "
-                       "(Enable new experimental dalvik opcodes and semantics, off by default)\n");
+  UsageMessage(stream, "  -Xplugin:<library.so> "
+                       "(Load a runtime plugin, requires -Xexperimental:runtime-plugins)\n");
+  UsageMessage(stream, "  -Xexperimental:runtime-plugins"
+                       "(Enable new and experimental agent support)\n");
+  UsageMessage(stream, "  -Xexperimental:agents"
+                       "(Enable new and experimental agent support)\n");
   UsageMessage(stream, "\n");
 
   UsageMessage(stream, "The following previously supported Dalvik options are ignored:\n");
@@ -718,6 +788,7 @@
   UsageMessage(stream, "  -Xjitblocking\n");
   UsageMessage(stream, "  -Xjitmethod:signature[,signature]* (eg Ljava/lang/String\\;replace)\n");
   UsageMessage(stream, "  -Xjitclass:classname[,classname]*\n");
+  UsageMessage(stream, "  -Xjitcodecachesize:N\n");
   UsageMessage(stream, "  -Xjitoffset:offset[,offset]\n");
   UsageMessage(stream, "  -Xjitconfig:filename\n");
   UsageMessage(stream, "  -Xjitcheckcg\n");
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index 529dd5c..1f5beb9 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -26,7 +26,7 @@
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
 #include "arch/instruction_set.h"
-#include "profiler_options.h"
+#include "jit/profile_saver_options.h"
 #include "runtime_options.h"
 
 namespace art {
@@ -50,8 +50,9 @@
   static std::unique_ptr<RuntimeParser> MakeParser(bool ignore_unrecognized);
 
   // returns true if parsing succeeds, and stores the resulting options into runtime_options
-  static ParsedOptions* Create(const RuntimeOptions& options, bool ignore_unrecognized,
-                               RuntimeArgumentMap* runtime_options);
+  static bool Parse(const RuntimeOptions& options,
+                    bool ignore_unrecognized,
+                    RuntimeArgumentMap* runtime_options);
 
   bool (*hook_is_sensitive_thread_)();
   jint (*hook_vfprintf_)(FILE* stream, const char* format, va_list ap);
@@ -72,8 +73,9 @@
   void Exit(int status);
   void Abort();
 
-  bool Parse(const RuntimeOptions& options,  bool ignore_unrecognized,
-             RuntimeArgumentMap* runtime_options);
+  bool DoParse(const RuntimeOptions& options,
+               bool ignore_unrecognized,
+               RuntimeArgumentMap* runtime_options);
 };
 
 }  // namespace art
diff --git a/runtime/parsed_options_test.cc b/runtime/parsed_options_test.cc
index a8575de..5b90c6a 100644
--- a/runtime/parsed_options_test.cc
+++ b/runtime/parsed_options_test.cc
@@ -18,6 +18,8 @@
 
 #include <memory>
 
+#include "arch/instruction_set.h"
+#include "base/stringprintf.h"
 #include "common_runtime_test.h"
 
 namespace art {
@@ -34,18 +36,28 @@
   void* test_abort = reinterpret_cast<void*>(0xb);
   void* test_exit = reinterpret_cast<void*>(0xc);
 
-  std::string lib_core(CommonRuntimeTest::GetLibCoreDexFileName());
-
   std::string boot_class_path;
+  std::string class_path;
   boot_class_path += "-Xbootclasspath:";
-  boot_class_path += lib_core;
+
+  bool first_dex_file = true;
+  for (const std::string &dex_file_name :
+           CommonRuntimeTest::GetLibCoreDexFileNames()) {
+    if (!first_dex_file) {
+      class_path += ":";
+    } else {
+      first_dex_file = false;
+    }
+    class_path += dex_file_name;
+  }
+  boot_class_path += class_path;
 
   RuntimeOptions options;
   options.push_back(std::make_pair(boot_class_path.c_str(), nullptr));
   options.push_back(std::make_pair("-classpath", nullptr));
-  options.push_back(std::make_pair(lib_core.c_str(), nullptr));
+  options.push_back(std::make_pair(class_path.c_str(), nullptr));
   options.push_back(std::make_pair("-cp", nullptr));
-  options.push_back(std::make_pair(lib_core.c_str(), nullptr));
+  options.push_back(std::make_pair(class_path.c_str(), nullptr));
   options.push_back(std::make_pair("-Ximage:boot_image", nullptr));
   options.push_back(std::make_pair("-Xcheck:jni", nullptr));
   options.push_back(std::make_pair("-Xms2048", nullptr));
@@ -60,8 +72,8 @@
   options.push_back(std::make_pair("exit", test_exit));
 
   RuntimeArgumentMap map;
-  std::unique_ptr<ParsedOptions> parsed(ParsedOptions::Create(options, false, &map));
-  ASSERT_TRUE(parsed.get() != nullptr);
+  bool parsed = ParsedOptions::Parse(options, false, &map);
+  ASSERT_TRUE(parsed);
   ASSERT_NE(0u, map.Size());
 
   using Opt = RuntimeArgumentMap;
@@ -69,8 +81,8 @@
 #define EXPECT_PARSED_EQ(expected, actual_key) EXPECT_EQ(expected, map.GetOrDefault(actual_key))
 #define EXPECT_PARSED_EXISTS(actual_key) EXPECT_TRUE(map.Exists(actual_key))
 
-  EXPECT_PARSED_EQ(lib_core, Opt::BootClassPath);
-  EXPECT_PARSED_EQ(lib_core, Opt::ClassPath);
+  EXPECT_PARSED_EQ(class_path, Opt::BootClassPath);
+  EXPECT_PARSED_EQ(class_path, Opt::ClassPath);
   EXPECT_PARSED_EQ(std::string("boot_image"), Opt::Image);
   EXPECT_PARSED_EXISTS(Opt::CheckJni);
   EXPECT_PARSED_EQ(2048U, Opt::MemoryInitialSize);
@@ -87,6 +99,8 @@
   EXPECT_FALSE(VLOG_IS_ON(jdwp));
   EXPECT_TRUE(VLOG_IS_ON(jni));
   EXPECT_FALSE(VLOG_IS_ON(monitor));
+  EXPECT_FALSE(VLOG_IS_ON(signals));
+  EXPECT_FALSE(VLOG_IS_ON(simulator));
   EXPECT_FALSE(VLOG_IS_ON(startup));
   EXPECT_FALSE(VLOG_IS_ON(third_party_jni));
   EXPECT_FALSE(VLOG_IS_ON(threads));
@@ -102,8 +116,8 @@
   options.push_back(std::make_pair("-Xgc:MC", nullptr));
 
   RuntimeArgumentMap map;
-  std::unique_ptr<ParsedOptions> parsed(ParsedOptions::Create(options, false, &map));
-  ASSERT_TRUE(parsed.get() != nullptr);
+  bool parsed = ParsedOptions::Parse(options, false, &map);
+  ASSERT_TRUE(parsed);
   ASSERT_NE(0u, map.Size());
 
   using Opt = RuntimeArgumentMap;
@@ -111,6 +125,40 @@
   EXPECT_TRUE(map.Exists(Opt::GcOption));
 
   XGcOption xgc = map.GetOrDefault(Opt::GcOption);
-  EXPECT_EQ(gc::kCollectorTypeMC, xgc.collector_type_);}
+  EXPECT_EQ(gc::kCollectorTypeMC, xgc.collector_type_);
+}
+
+TEST_F(ParsedOptionsTest, ParsedOptionsInstructionSet) {
+  using Opt = RuntimeArgumentMap;
+
+  {
+    // Nothing set, should be kRuntimeISA.
+    RuntimeOptions options;
+    RuntimeArgumentMap map;
+    bool parsed = ParsedOptions::Parse(options, false, &map);
+    ASSERT_TRUE(parsed);
+    InstructionSet isa = map.GetOrDefault(Opt::ImageInstructionSet);
+    EXPECT_EQ(kRuntimeISA, isa);
+  }
+
+  const char* isa_strings[] = { "arm", "arm64", "x86", "x86_64", "mips", "mips64" };
+  InstructionSet ISAs[] = { InstructionSet::kArm,
+                            InstructionSet::kArm64,
+                            InstructionSet::kX86,
+                            InstructionSet::kX86_64,
+                            InstructionSet::kMips,
+                            InstructionSet::kMips64 };
+  static_assert(arraysize(isa_strings) == arraysize(ISAs), "Need same amount.");
+
+  for (size_t i = 0; i < arraysize(isa_strings); ++i) {
+    RuntimeOptions options;
+    options.push_back(std::make_pair("imageinstructionset", isa_strings[i]));
+    RuntimeArgumentMap map;
+    bool parsed = ParsedOptions::Parse(options, false, &map);
+    ASSERT_TRUE(parsed);
+    InstructionSet isa = map.GetOrDefault(Opt::ImageInstructionSet);
+    EXPECT_EQ(ISAs[i], isa);
+  }
+}
 
 }  // namespace art
diff --git a/runtime/plugin.cc b/runtime/plugin.cc
new file mode 100644
index 0000000..481b1ca
--- /dev/null
+++ b/runtime/plugin.cc
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "plugin.h"
+
+#include <dlfcn.h>
+#include "base/stringprintf.h"
+#include "base/logging.h"
+
+namespace art {
+
+const char* PLUGIN_INITIALIZATION_FUNCTION_NAME = "ArtPlugin_Initialize";
+const char* PLUGIN_DEINITIALIZATION_FUNCTION_NAME = "ArtPlugin_Deinitialize";
+
+Plugin::Plugin(const Plugin& other) : library_(other.library_), dlopen_handle_(nullptr) {
+  if (other.IsLoaded()) {
+    std::string err;
+    Load(&err);
+  }
+}
+
+bool Plugin::Load(/*out*/std::string* error_msg) {
+  DCHECK(!IsLoaded());
+  void* res = dlopen(library_.c_str(), RTLD_LAZY);
+  if (res == nullptr) {
+    *error_msg = StringPrintf("dlopen failed: %s", dlerror());
+    return false;
+  }
+  // Get the initializer function
+  PluginInitializationFunction init = reinterpret_cast<PluginInitializationFunction>(
+      dlsym(res, PLUGIN_INITIALIZATION_FUNCTION_NAME));
+  if (init != nullptr) {
+    if (!init()) {
+      dlclose(res);
+      *error_msg = StringPrintf("Initialization of plugin failed");
+      return false;
+    }
+  } else {
+    LOG(WARNING) << this << " does not include an initialization function";
+  }
+  dlopen_handle_ = res;
+  return true;
+}
+
+bool Plugin::Unload() {
+  DCHECK(IsLoaded());
+  bool ret = true;
+  void* handle = dlopen_handle_;
+  PluginDeinitializationFunction deinit = reinterpret_cast<PluginDeinitializationFunction>(
+      dlsym(handle, PLUGIN_DEINITIALIZATION_FUNCTION_NAME));
+  if (deinit != nullptr) {
+    if (!deinit()) {
+      LOG(WARNING) << this << " failed deinitialization";
+      ret = false;
+    }
+  } else {
+    LOG(WARNING) << this << " does not include a deinitialization function";
+  }
+  dlopen_handle_ = nullptr;
+  if (dlclose(handle) != 0) {
+    LOG(ERROR) << this << " failed to dlclose: " << dlerror();
+    ret = false;
+  }
+  return ret;
+}
+
+std::ostream& operator<<(std::ostream &os, const Plugin* m) {
+  return os << *m;
+}
+
+std::ostream& operator<<(std::ostream &os, Plugin const& m) {
+  return os << "Plugin { library=\"" << m.library_ << "\", handle=" << m.dlopen_handle_ << " }";
+}
+
+}  // namespace art
diff --git a/runtime/plugin.h b/runtime/plugin.h
new file mode 100644
index 0000000..18f3977
--- /dev/null
+++ b/runtime/plugin.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_PLUGIN_H_
+#define ART_RUNTIME_PLUGIN_H_
+
+#include <string>
+#include "base/logging.h"
+
+namespace art {
+
+// This function is loaded from the plugin (if present) and called during runtime initialization.
+// By the time this has been called the runtime has been fully initialized but not other native
+// libraries have been loaded yet. Failure to initialize is considered a fatal error.
+// TODO might want to give initialization function some arguments
+using PluginInitializationFunction = bool (*)();
+using PluginDeinitializationFunction = bool (*)();
+
+// A class encapsulating a plugin. There is no stable plugin ABI or API and likely never will be.
+// TODO Might want to put some locking in this but ATM we only load these at initialization in a
+// single-threaded fashion so not much need
+class Plugin {
+ public:
+  static Plugin Create(std::string lib) {
+    return Plugin(lib);
+  }
+
+  bool IsLoaded() const {
+    return dlopen_handle_ != nullptr;
+  }
+
+  const std::string& GetLibrary() const {
+    return library_;
+  }
+
+  bool Load(/*out*/std::string* error_msg);
+  bool Unload();
+
+
+  ~Plugin() {
+    if (IsLoaded() && !Unload()) {
+      LOG(ERROR) << "Error unloading " << this;
+    }
+  }
+
+  Plugin(const Plugin& other);
+
+  // Create move constructor for putting this in a list
+  Plugin(Plugin&& other)
+      : library_(other.library_),
+        dlopen_handle_(other.dlopen_handle_) {
+    other.dlopen_handle_ = nullptr;
+  }
+
+ private:
+  explicit Plugin(std::string library) : library_(library), dlopen_handle_(nullptr) { }
+
+  std::string library_;
+  void* dlopen_handle_;
+
+  friend std::ostream& operator<<(std::ostream &os, Plugin const& m);
+};
+
+std::ostream& operator<<(std::ostream &os, Plugin const& m);
+std::ostream& operator<<(std::ostream &os, const Plugin* m);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_PLUGIN_H_
diff --git a/runtime/prebuilt_tools_test.cc b/runtime/prebuilt_tools_test.cc
index a7f7bcd..c2b34c8 100644
--- a/runtime/prebuilt_tools_test.cc
+++ b/runtime/prebuilt_tools_test.cc
@@ -23,7 +23,7 @@
 namespace art {
 
 // Run the tests only on host.
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 
 class PrebuiltToolsTest : public CommonRuntimeTest {
 };
@@ -34,7 +34,7 @@
     struct stat exec_st;
     std::string exec_path = tools_dir + tool;
     if (stat(exec_path.c_str(), &exec_st) != 0) {
-      ADD_FAILURE() << "Can not find " << tool << " in " << tools_dir;
+      ADD_FAILURE() << "Cannot find " << tool << " in " << tools_dir;
     }
   }
 }
@@ -42,7 +42,7 @@
 TEST_F(PrebuiltToolsTest, CheckHostTools) {
   std::string tools_dir = GetAndroidHostToolsDir();
   if (tools_dir.empty()) {
-    ADD_FAILURE() << "Can not find Android tools directory for host";
+    ADD_FAILURE() << "Cannot find Android tools directory for host";
   } else {
     CheckToolsExist(tools_dir);
   }
@@ -54,13 +54,13 @@
   for (InstructionSet isa : isas) {
     std::string tools_dir = GetAndroidTargetToolsDir(isa);
     if (tools_dir.empty()) {
-      ADD_FAILURE() << "Can not find Android tools directory for " << isa;
+      ADD_FAILURE() << "Cannot find Android tools directory for " << isa;
     } else {
       CheckToolsExist(tools_dir);
     }
   }
 }
 
-#endif  // __ANDROID__
+#endif  // ART_TARGET_ANDROID
 
 }  // namespace art
diff --git a/runtime/primitive.h b/runtime/primitive.h
index ca42c47..18f45ff 100644
--- a/runtime/primitive.h
+++ b/runtime/primitive.h
@@ -46,6 +46,7 @@
     kPrimFloat,
     kPrimDouble,
     kPrimVoid,
+    kPrimLast = kPrimVoid
   };
 
   static Type GetType(char type) {
@@ -165,6 +166,60 @@
     return type == kPrimLong || type == kPrimDouble;
   }
 
+  // Return the general kind of `type`, fusing integer-like types as kPrimInt.
+  static Type PrimitiveKind(Type type) {
+    switch (type) {
+      case kPrimBoolean:
+      case kPrimByte:
+      case kPrimShort:
+      case kPrimChar:
+      case kPrimInt:
+        return kPrimInt;
+      default:
+        return type;
+    }
+  }
+
+  static int64_t MinValueOfIntegralType(Type type) {
+    switch (type) {
+      case kPrimBoolean:
+        return std::numeric_limits<bool>::min();
+      case kPrimByte:
+        return std::numeric_limits<int8_t>::min();
+      case kPrimChar:
+        return std::numeric_limits<uint16_t>::min();
+      case kPrimShort:
+        return std::numeric_limits<int16_t>::min();
+      case kPrimInt:
+        return std::numeric_limits<int32_t>::min();
+      case kPrimLong:
+        return std::numeric_limits<int64_t>::min();
+      default:
+        LOG(FATAL) << "non integral type";
+    }
+    return 0;
+  }
+
+  static int64_t MaxValueOfIntegralType(Type type) {
+    switch (type) {
+      case kPrimBoolean:
+        return std::numeric_limits<bool>::max();
+      case kPrimByte:
+        return std::numeric_limits<int8_t>::max();
+      case kPrimChar:
+        return std::numeric_limits<uint16_t>::max();
+      case kPrimShort:
+        return std::numeric_limits<int16_t>::max();
+      case kPrimInt:
+        return std::numeric_limits<int32_t>::max();
+      case kPrimLong:
+        return std::numeric_limits<int64_t>::max();
+      default:
+        LOG(FATAL) << "non integral type";
+    }
+    return 0;
+  }
+
  private:
   DISALLOW_IMPLICIT_CONSTRUCTORS(Primitive);
 };
diff --git a/runtime/process_state.h b/runtime/process_state.h
new file mode 100644
index 0000000..e8797d6
--- /dev/null
+++ b/runtime/process_state.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_PROCESS_STATE_H_
+#define ART_RUNTIME_PROCESS_STATE_H_
+
+namespace art {
+
+// The process state passed in from the activity manager, used to determine when to do trimming
+// and compaction.
+enum ProcessState {
+  kProcessStateJankPerceptible = 0,
+  kProcessStateJankImperceptible = 1,
+};
+
+std::ostream& operator<<(std::ostream& os, const ProcessState& process_state);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_PROCESS_STATE_H_
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
deleted file mode 100644
index 6a77a9e..0000000
--- a/runtime/profiler.cc
+++ /dev/null
@@ -1,920 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "profiler.h"
-
-#include <sys/file.h>
-#include <sys/stat.h>
-#include <sys/uio.h>
-
-#include <fstream>
-
-#include "art_method-inl.h"
-#include "base/stl_util.h"
-#include "base/time_utils.h"
-#include "base/unix_file/fd_file.h"
-#include "class_linker.h"
-#include "common_throws.h"
-#include "dex_file-inl.h"
-#include "instrumentation.h"
-#include "mirror/class-inl.h"
-#include "mirror/dex_cache.h"
-#include "mirror/object_array-inl.h"
-#include "mirror/object-inl.h"
-#include "os.h"
-#include "scoped_thread_state_change.h"
-#include "ScopedLocalRef.h"
-#include "thread.h"
-#include "thread_list.h"
-#include "utils.h"
-
-#include "entrypoints/quick/quick_entrypoints.h"
-
-namespace art {
-
-BackgroundMethodSamplingProfiler* BackgroundMethodSamplingProfiler::profiler_ = nullptr;
-pthread_t BackgroundMethodSamplingProfiler::profiler_pthread_ = 0U;
-volatile bool BackgroundMethodSamplingProfiler::shutting_down_ = false;
-
-// TODO: this profiler runs regardless of the state of the machine.  Maybe we should use the
-// wakelock or something to modify the run characteristics.  This can be done when we
-// have some performance data after it's been used for a while.
-
-// Walk through the method within depth of max_depth_ on the Java stack
-class BoundedStackVisitor : public StackVisitor {
- public:
-  BoundedStackVisitor(std::vector<std::pair<ArtMethod*, uint32_t>>* stack,
-                      Thread* thread,
-                      uint32_t max_depth)
-      SHARED_REQUIRES(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
-        stack_(stack),
-        max_depth_(max_depth),
-        depth_(0) {}
-
-  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
-    ArtMethod* m = GetMethod();
-    if (m->IsRuntimeMethod()) {
-      return true;
-    }
-    uint32_t dex_pc_ = GetDexPc();
-    stack_->push_back(std::make_pair(m, dex_pc_));
-    ++depth_;
-    if (depth_ < max_depth_) {
-      return true;
-    } else {
-      return false;
-    }
-  }
-
- private:
-  std::vector<std::pair<ArtMethod*, uint32_t>>* const stack_;
-  const uint32_t max_depth_;
-  uint32_t depth_;
-
-  DISALLOW_COPY_AND_ASSIGN(BoundedStackVisitor);
-};
-
-// This is called from either a thread list traversal or from a checkpoint.  Regardless
-// of which caller, the mutator lock must be held.
-static void GetSample(Thread* thread, void* arg) SHARED_REQUIRES(Locks::mutator_lock_) {
-  BackgroundMethodSamplingProfiler* profiler =
-      reinterpret_cast<BackgroundMethodSamplingProfiler*>(arg);
-  const ProfilerOptions profile_options = profiler->GetProfilerOptions();
-  switch (profile_options.GetProfileType()) {
-    case kProfilerMethod: {
-      ArtMethod* method = thread->GetCurrentMethod(nullptr);
-      if ((false) && method == nullptr) {
-        LOG(INFO) << "No current method available";
-        std::ostringstream os;
-        thread->Dump(os);
-        std::string data(os.str());
-        LOG(INFO) << data;
-      }
-      profiler->RecordMethod(method);
-      break;
-    }
-    case kProfilerBoundedStack: {
-      std::vector<InstructionLocation> stack;
-      uint32_t max_depth = profile_options.GetMaxStackDepth();
-      BoundedStackVisitor bounded_stack_visitor(&stack, thread, max_depth);
-      bounded_stack_visitor.WalkStack();
-      profiler->RecordStack(stack);
-      break;
-    }
-    default:
-      LOG(INFO) << "This profile type is not implemented.";
-  }
-}
-
-// A closure that is called by the thread checkpoint code.
-class SampleCheckpoint FINAL : public Closure {
- public:
-  explicit SampleCheckpoint(BackgroundMethodSamplingProfiler* const profiler) :
-    profiler_(profiler) {}
-
-  void Run(Thread* thread) OVERRIDE {
-    Thread* self = Thread::Current();
-    if (thread == nullptr) {
-      LOG(ERROR) << "Checkpoint with nullptr thread";
-      return;
-    }
-
-    // Grab the mutator lock (shared access).
-    ScopedObjectAccess soa(self);
-
-    // Grab a sample.
-    GetSample(thread, this->profiler_);
-
-    // And finally tell the barrier that we're done.
-    this->profiler_->GetBarrier().Pass(self);
-  }
-
- private:
-  BackgroundMethodSamplingProfiler* const profiler_;
-};
-
-bool BackgroundMethodSamplingProfiler::ShuttingDown(Thread* self) {
-  MutexLock mu(self, *Locks::profiler_lock_);
-  return shutting_down_;
-}
-
-void* BackgroundMethodSamplingProfiler::RunProfilerThread(void* arg) {
-  Runtime* runtime = Runtime::Current();
-  BackgroundMethodSamplingProfiler* profiler =
-      reinterpret_cast<BackgroundMethodSamplingProfiler*>(arg);
-
-  // Add a random delay for the first time run so that we don't hammer the CPU
-  // with all profiles running at the same time.
-  const int kRandomDelayMaxSecs = 30;
-  const double kMaxBackoffSecs = 24*60*60;   // Max backoff time.
-
-  srand(MicroTime() * getpid());
-  int startup_delay = rand() % kRandomDelayMaxSecs;   // random delay for startup.
-
-
-  CHECK(runtime->AttachCurrentThread("Profiler", true, runtime->GetSystemThreadGroup(),
-                                      !runtime->IsAotCompiler()));
-
-  Thread* self = Thread::Current();
-
-  double backoff = 1.0;
-  while (true) {
-    if (ShuttingDown(self)) {
-      break;
-    }
-
-    {
-      // wait until we need to run another profile
-      uint64_t delay_secs = profiler->options_.GetPeriodS() * backoff;
-
-      // Add a startup delay to prevent all the profiles running at once.
-      delay_secs += startup_delay;
-
-      // Immediate startup for benchmarking?
-      if (profiler->options_.GetStartImmediately() && startup_delay > 0) {
-        delay_secs = 0;
-      }
-
-      startup_delay = 0;
-
-      VLOG(profiler) << "Delaying profile start for " << delay_secs << " secs";
-      MutexLock mu(self, profiler->wait_lock_);
-      profiler->period_condition_.TimedWait(self, delay_secs * 1000, 0);
-      // We were either signaled by Stop or timedout, in either case ignore the timed out result.
-
-      // Expand the backoff by its coefficient, but don't go beyond the max.
-      backoff = std::min(backoff * profiler->options_.GetBackoffCoefficient(), kMaxBackoffSecs);
-    }
-
-    if (ShuttingDown(self)) {
-      break;
-    }
-
-
-    uint64_t start_us = MicroTime();
-    uint64_t end_us = start_us + profiler->options_.GetDurationS() * UINT64_C(1000000);
-    uint64_t now_us = start_us;
-
-    VLOG(profiler) << "Starting profiling run now for "
-                   << PrettyDuration((end_us - start_us) * 1000);
-
-    SampleCheckpoint check_point(profiler);
-
-    size_t valid_samples = 0;
-    while (now_us < end_us) {
-      if (ShuttingDown(self)) {
-        break;
-      }
-
-      usleep(profiler->options_.GetIntervalUs());    // Non-interruptible sleep.
-
-      ThreadList* thread_list = runtime->GetThreadList();
-
-      profiler->profiler_barrier_->Init(self, 0);
-      size_t barrier_count = thread_list->RunCheckpointOnRunnableThreads(&check_point);
-
-      // All threads are suspended, nothing to do.
-      if (barrier_count == 0) {
-        now_us = MicroTime();
-        continue;
-      }
-
-      valid_samples += barrier_count;
-
-      ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
-
-      // Wait for the barrier to be crossed by all runnable threads.  This wait
-      // is done with a timeout so that we can detect problems with the checkpoint
-      // running code.  We should never see this.
-      const uint32_t kWaitTimeoutMs = 10000;
-
-      // Wait for all threads to pass the barrier.
-      bool timed_out =  profiler->profiler_barrier_->Increment(self, barrier_count, kWaitTimeoutMs);
-
-      // We should never get a timeout.  If we do, it suggests a problem with the checkpoint
-      // code.  Crash the process in this case.
-      CHECK(!timed_out);
-
-      // Update the current time.
-      now_us = MicroTime();
-    }
-
-    if (valid_samples > 0) {
-      // After the profile has been taken, write it out.
-      ScopedObjectAccess soa(self);   // Acquire the mutator lock.
-      uint32_t size = profiler->WriteProfile();
-      VLOG(profiler) << "Profile size: " << size;
-    }
-  }
-
-  LOG(INFO) << "Profiler shutdown";
-  runtime->DetachCurrentThread();
-  return nullptr;
-}
-
-// Write out the profile file if we are generating a profile.
-uint32_t BackgroundMethodSamplingProfiler::WriteProfile() {
-  std::string full_name = output_filename_;
-  VLOG(profiler) << "Saving profile to " << full_name;
-
-  int fd = open(full_name.c_str(), O_RDWR);
-  if (fd < 0) {
-    // Open failed.
-    LOG(ERROR) << "Failed to open profile file " << full_name;
-    return 0;
-  }
-
-  // Lock the file for exclusive access.  This will block if another process is using
-  // the file.
-  int err = flock(fd, LOCK_EX);
-  if (err < 0) {
-    LOG(ERROR) << "Failed to lock profile file " << full_name;
-    return 0;
-  }
-
-  // Read the previous profile.
-  profile_table_.ReadPrevious(fd, options_.GetProfileType());
-
-  // Move back to the start of the file.
-  lseek(fd, 0, SEEK_SET);
-
-  // Format the profile output and write to the file.
-  std::ostringstream os;
-  uint32_t num_methods = DumpProfile(os);
-  std::string data(os.str());
-  const char *p = data.c_str();
-  size_t length = data.length();
-  size_t full_length = length;
-  do {
-    int n = ::write(fd, p, length);
-    p += n;
-    length -= n;
-  } while (length > 0);
-
-  // Truncate the file to the new length.
-  if (ftruncate(fd, full_length) == -1) {
-    LOG(ERROR) << "Failed to truncate profile file " << full_name;
-  }
-
-  // Now unlock the file, allowing another process in.
-  err = flock(fd, LOCK_UN);
-  if (err < 0) {
-    LOG(ERROR) << "Failed to unlock profile file " << full_name;
-  }
-
-  // Done, close the file.
-  ::close(fd);
-
-  // Clean the profile for the next time.
-  CleanProfile();
-
-  return num_methods;
-}
-
-bool BackgroundMethodSamplingProfiler::Start(
-    const std::string& output_filename, const ProfilerOptions& options) {
-  if (!options.IsEnabled()) {
-    return false;
-  }
-
-  CHECK(!output_filename.empty());
-
-  Thread* self = Thread::Current();
-  {
-    MutexLock mu(self, *Locks::profiler_lock_);
-    // Don't start two profiler threads.
-    if (profiler_ != nullptr) {
-      return true;
-    }
-  }
-
-  LOG(INFO) << "Starting profiler using output file: " << output_filename
-            << " and options: " << options;
-  {
-    MutexLock mu(self, *Locks::profiler_lock_);
-    profiler_ = new BackgroundMethodSamplingProfiler(output_filename, options);
-
-    CHECK_PTHREAD_CALL(pthread_create, (&profiler_pthread_, nullptr, &RunProfilerThread,
-        reinterpret_cast<void*>(profiler_)),
-                       "Profiler thread");
-  }
-  return true;
-}
-
-
-
-void BackgroundMethodSamplingProfiler::Stop() {
-  BackgroundMethodSamplingProfiler* profiler = nullptr;
-  pthread_t profiler_pthread = 0U;
-  {
-    MutexLock trace_mu(Thread::Current(), *Locks::profiler_lock_);
-    CHECK(!shutting_down_);
-    profiler = profiler_;
-    shutting_down_ = true;
-    profiler_pthread = profiler_pthread_;
-  }
-
-  // Now wake up the sampler thread if it sleeping.
-  {
-    MutexLock profile_mu(Thread::Current(), profiler->wait_lock_);
-    profiler->period_condition_.Signal(Thread::Current());
-  }
-  // Wait for the sample thread to stop.
-  CHECK_PTHREAD_CALL(pthread_join, (profiler_pthread, nullptr), "profiler thread shutdown");
-
-  {
-    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
-    profiler_ = nullptr;
-  }
-  delete profiler;
-}
-
-
-void BackgroundMethodSamplingProfiler::Shutdown() {
-  Stop();
-}
-
-BackgroundMethodSamplingProfiler::BackgroundMethodSamplingProfiler(
-  const std::string& output_filename, const ProfilerOptions& options)
-    : output_filename_(output_filename),
-      options_(options),
-      wait_lock_("Profile wait lock"),
-      period_condition_("Profile condition", wait_lock_),
-      profile_table_(wait_lock_),
-      profiler_barrier_(new Barrier(0)) {
-  // Populate the filtered_methods set.
-  // This is empty right now, but to add a method, do this:
-  //
-  // filtered_methods_.insert("void java.lang.Object.wait(long, int)");
-}
-
-// Filter out methods the profiler doesn't want to record.
-// We require mutator lock since some statistics will be updated here.
-bool BackgroundMethodSamplingProfiler::ProcessMethod(ArtMethod* method) {
-  if (method == nullptr) {
-    profile_table_.NullMethod();
-    // Don't record a null method.
-    return false;
-  }
-
-  mirror::Class* cls = method->GetDeclaringClass();
-  if (cls != nullptr) {
-    if (cls->GetClassLoader() == nullptr) {
-      // Don't include things in the boot
-      profile_table_.BootMethod();
-      return false;
-    }
-  }
-
-  bool is_filtered = false;
-
-  if (strcmp(method->GetName(), "<clinit>") == 0) {
-    // always filter out class init
-    is_filtered = true;
-  }
-
-  // Filter out methods by name if there are any.
-  if (!is_filtered && filtered_methods_.size() > 0) {
-    std::string method_full_name = PrettyMethod(method);
-
-    // Don't include specific filtered methods.
-    is_filtered = filtered_methods_.count(method_full_name) != 0;
-  }
-  return !is_filtered;
-}
-
-// A method has been hit, record its invocation in the method map.
-// The mutator_lock must be held (shared) when this is called.
-void BackgroundMethodSamplingProfiler::RecordMethod(ArtMethod* method) {
-  // Add to the profile table unless it is filtered out.
-  if (ProcessMethod(method)) {
-    profile_table_.Put(method);
-  }
-}
-
-// Record the current bounded stack into sampling results.
-void BackgroundMethodSamplingProfiler::RecordStack(const std::vector<InstructionLocation>& stack) {
-  if (stack.size() == 0) {
-    return;
-  }
-  // Get the method on top of the stack. We use this method to perform filtering.
-  ArtMethod* method = stack.front().first;
-  if (ProcessMethod(method)) {
-      profile_table_.PutStack(stack);
-  }
-}
-
-// Clean out any recordings for the method traces.
-void BackgroundMethodSamplingProfiler::CleanProfile() {
-  profile_table_.Clear();
-}
-
-uint32_t BackgroundMethodSamplingProfiler::DumpProfile(std::ostream& os) {
-  return profile_table_.Write(os, options_.GetProfileType());
-}
-
-// Profile Table.
-// This holds a mapping of ArtMethod* to a count of how many times a sample
-// hit it at the top of the stack.
-ProfileSampleResults::ProfileSampleResults(Mutex& lock)
-    : lock_(lock),
-      num_samples_(0U),
-      num_null_methods_(0U),
-      num_boot_methods_(0U),
-      previous_num_samples_(0U),
-      previous_num_null_methods_(0U),
-      previous_num_boot_methods_(0U) {
-  for (int i = 0; i < kHashSize; i++) {
-    table[i] = nullptr;
-  }
-  method_context_table = nullptr;
-  stack_trie_root_ = nullptr;
-}
-
-ProfileSampleResults::~ProfileSampleResults() {
-  Clear();
-}
-
-// Add a method to the profile table.  If it's the first time the method
-// has been seen, add it with count=1, otherwise increment the count.
-void ProfileSampleResults::Put(ArtMethod* method) {
-  MutexLock mu(Thread::Current(), lock_);
-  uint32_t index = Hash(method);
-  if (table[index] == nullptr) {
-    table[index] = new Map();
-  }
-  Map::iterator i = table[index]->find(method);
-  if (i == table[index]->end()) {
-    (*table[index])[method] = 1;
-  } else {
-    i->second++;
-  }
-  num_samples_++;
-}
-
-// Add a bounded stack to the profile table. Only the count of the method on
-// top of the frame will be increased.
-void ProfileSampleResults::PutStack(const std::vector<InstructionLocation>& stack) {
-  MutexLock mu(Thread::Current(), lock_);
-  ScopedObjectAccess soa(Thread::Current());
-  if (stack_trie_root_ == nullptr) {
-    // The root of the stack trie is a dummy node so that we don't have to maintain
-    // a collection of tries.
-    stack_trie_root_ = new StackTrieNode();
-  }
-
-  StackTrieNode* current = stack_trie_root_;
-  if (stack.size() == 0) {
-    current->IncreaseCount();
-    return;
-  }
-
-  for (std::vector<InstructionLocation>::const_reverse_iterator iter = stack.rbegin();
-       iter != stack.rend(); ++iter) {
-    InstructionLocation inst_loc = *iter;
-    ArtMethod* method = inst_loc.first;
-    if (method == nullptr) {
-      // skip null method
-      continue;
-    }
-    uint32_t dex_pc = inst_loc.second;
-    uint32_t method_idx = method->GetDexMethodIndex();
-    const DexFile* dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
-    MethodReference method_ref(dex_file, method_idx);
-    StackTrieNode* child = current->FindChild(method_ref, dex_pc);
-    if (child != nullptr) {
-      current = child;
-    } else {
-      uint32_t method_size = 0;
-      const DexFile::CodeItem* codeitem = method->GetCodeItem();
-      if (codeitem != nullptr) {
-        method_size = codeitem->insns_size_in_code_units_;
-      }
-      StackTrieNode* new_node = new StackTrieNode(method_ref, dex_pc, method_size, current);
-      current->AppendChild(new_node);
-      current = new_node;
-    }
-  }
-
-  if (current != stack_trie_root_ && current->GetCount() == 0) {
-    // Insert into method_context table;
-    if (method_context_table == nullptr) {
-      method_context_table = new MethodContextMap();
-    }
-    MethodReference method = current->GetMethod();
-    MethodContextMap::iterator i = method_context_table->find(method);
-    if (i == method_context_table->end()) {
-      TrieNodeSet* node_set = new TrieNodeSet();
-      node_set->insert(current);
-      (*method_context_table)[method] = node_set;
-    } else {
-      TrieNodeSet* node_set = i->second;
-      node_set->insert(current);
-    }
-  }
-  current->IncreaseCount();
-  num_samples_++;
-}
-
-// Write the profile table to the output stream.  Also merge with the previous profile.
-uint32_t ProfileSampleResults::Write(std::ostream& os, ProfileDataType type) {
-  ScopedObjectAccess soa(Thread::Current());
-  num_samples_ += previous_num_samples_;
-  num_null_methods_ += previous_num_null_methods_;
-  num_boot_methods_ += previous_num_boot_methods_;
-
-  VLOG(profiler) << "Profile: "
-                 << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_;
-  os << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_ << "\n";
-  uint32_t num_methods = 0;
-  if (type == kProfilerMethod) {
-    for (int i = 0 ; i < kHashSize; i++) {
-      Map *map = table[i];
-      if (map != nullptr) {
-        for (const auto &meth_iter : *map) {
-          ArtMethod *method = meth_iter.first;
-          std::string method_name = PrettyMethod(method);
-
-          const DexFile::CodeItem* codeitem = method->GetCodeItem();
-          uint32_t method_size = 0;
-          if (codeitem != nullptr) {
-            method_size = codeitem->insns_size_in_code_units_;
-          }
-          uint32_t count = meth_iter.second;
-
-          // Merge this profile entry with one from a previous run (if present).  Also
-          // remove the previous entry.
-          PreviousProfile::iterator pi = previous_.find(method_name);
-          if (pi != previous_.end()) {
-            count += pi->second.count_;
-            previous_.erase(pi);
-          }
-          os << StringPrintf("%s/%u/%u\n",  method_name.c_str(), count, method_size);
-          ++num_methods;
-        }
-      }
-    }
-  } else if (type == kProfilerBoundedStack) {
-    if (method_context_table != nullptr) {
-      for (const auto &method_iter : *method_context_table) {
-        MethodReference method = method_iter.first;
-        TrieNodeSet* node_set = method_iter.second;
-        std::string method_name = PrettyMethod(method.dex_method_index, *(method.dex_file));
-        uint32_t method_size = 0;
-        uint32_t total_count = 0;
-        PreviousContextMap new_context_map;
-        for (const auto &trie_node_i : *node_set) {
-          StackTrieNode* node = trie_node_i;
-          method_size = node->GetMethodSize();
-          uint32_t count = node->GetCount();
-          uint32_t dexpc = node->GetDexPC();
-          total_count += count;
-
-          StackTrieNode* current = node->GetParent();
-          // We go backward on the trie to retrieve context and dex_pc until the dummy root.
-          // The format of the context is "method_1@pc_1@method_2@pc_2@..."
-          std::vector<std::string> context_vector;
-          while (current != nullptr && current->GetParent() != nullptr) {
-            context_vector.push_back(StringPrintf("%s@%u",
-                PrettyMethod(current->GetMethod().dex_method_index, *(current->GetMethod().dex_file)).c_str(),
-                current->GetDexPC()));
-            current = current->GetParent();
-          }
-          std::string context_sig = Join(context_vector, '@');
-          new_context_map[std::make_pair(dexpc, context_sig)] = count;
-        }
-
-        PreviousProfile::iterator pi = previous_.find(method_name);
-        if (pi != previous_.end()) {
-          total_count += pi->second.count_;
-          PreviousContextMap* previous_context_map = pi->second.context_map_;
-          if (previous_context_map != nullptr) {
-            for (const auto &context_i : *previous_context_map) {
-              uint32_t count = context_i.second;
-              PreviousContextMap::iterator ci = new_context_map.find(context_i.first);
-              if (ci == new_context_map.end()) {
-                new_context_map[context_i.first] = count;
-              } else {
-                ci->second += count;
-              }
-            }
-          }
-          delete previous_context_map;
-          previous_.erase(pi);
-        }
-        // We write out profile data with dex pc and context information in the following format:
-        // "method/total_count/size/[pc_1:count_1:context_1#pc_2:count_2:context_2#...]".
-        std::vector<std::string> context_count_vector;
-        for (const auto &context_i : new_context_map) {
-          context_count_vector.push_back(StringPrintf("%u:%u:%s", context_i.first.first,
-              context_i.second, context_i.first.second.c_str()));
-        }
-        os << StringPrintf("%s/%u/%u/[%s]\n", method_name.c_str(), total_count,
-            method_size, Join(context_count_vector, '#').c_str());
-        ++num_methods;
-      }
-    }
-  }
-
-  // Now we write out the remaining previous methods.
-  for (const auto &pi : previous_) {
-    if (type == kProfilerMethod) {
-      os << StringPrintf("%s/%u/%u\n",  pi.first.c_str(), pi.second.count_, pi.second.method_size_);
-    } else if (type == kProfilerBoundedStack) {
-      os << StringPrintf("%s/%u/%u/[",  pi.first.c_str(), pi.second.count_, pi.second.method_size_);
-      PreviousContextMap* previous_context_map = pi.second.context_map_;
-      if (previous_context_map != nullptr) {
-        std::vector<std::string> context_count_vector;
-        for (const auto &context_i : *previous_context_map) {
-          context_count_vector.push_back(StringPrintf("%u:%u:%s", context_i.first.first,
-              context_i.second, context_i.first.second.c_str()));
-        }
-        os << Join(context_count_vector, '#');
-      }
-      os << "]\n";
-    }
-    ++num_methods;
-  }
-  return num_methods;
-}
-
-void ProfileSampleResults::Clear() {
-  num_samples_ = 0;
-  num_null_methods_ = 0;
-  num_boot_methods_ = 0;
-  for (int i = 0; i < kHashSize; i++) {
-    delete table[i];
-    table[i] = nullptr;
-  }
-  if (stack_trie_root_ != nullptr) {
-    stack_trie_root_->DeleteChildren();
-    delete stack_trie_root_;
-    stack_trie_root_ = nullptr;
-    if (method_context_table != nullptr) {
-      delete method_context_table;
-      method_context_table = nullptr;
-    }
-  }
-  for (auto &pi : previous_) {
-    if (pi.second.context_map_ != nullptr) {
-      delete pi.second.context_map_;
-      pi.second.context_map_ = nullptr;
-    }
-  }
-  previous_.clear();
-}
-
-uint32_t ProfileSampleResults::Hash(ArtMethod* method) {
-  return (PointerToLowMemUInt32(method) >> 3) % kHashSize;
-}
-
-// Read a single line into the given string.  Returns true if everything OK, false
-// on EOF or error.
-static bool ReadProfileLine(int fd, std::string& line) {
-  char buf[4];
-  line.clear();
-  while (true) {
-    int n = read(fd, buf, 1);     // TODO: could speed this up but is it worth it?
-    if (n != 1) {
-      return false;
-    }
-    if (buf[0] == '\n') {
-      break;
-    }
-    line += buf[0];
-  }
-  return true;
-}
-
-void ProfileSampleResults::ReadPrevious(int fd, ProfileDataType type) {
-  // Reset counters.
-  previous_num_samples_ = previous_num_null_methods_ = previous_num_boot_methods_ = 0;
-
-  std::string line;
-
-  // The first line contains summary information.
-  if (!ReadProfileLine(fd, line)) {
-    return;
-  }
-  std::vector<std::string> summary_info;
-  Split(line, '/', &summary_info);
-  if (summary_info.size() != 3) {
-    // Bad summary info.  It should be count/nullcount/bootcount
-    return;
-  }
-  previous_num_samples_ = strtoul(summary_info[0].c_str(), nullptr, 10);
-  previous_num_null_methods_ = strtoul(summary_info[1].c_str(), nullptr, 10);
-  previous_num_boot_methods_ = strtoul(summary_info[2].c_str(), nullptr, 10);
-
-  // Now read each line until the end of file.  Each line consists of 3 or 4 fields separated by /
-  while (true) {
-    if (!ReadProfileLine(fd, line)) {
-      break;
-    }
-    std::vector<std::string> info;
-    Split(line, '/', &info);
-    if (info.size() != 3 && info.size() != 4) {
-      // Malformed.
-      break;
-    }
-    std::string methodname = info[0];
-    uint32_t total_count = strtoul(info[1].c_str(), nullptr, 10);
-    uint32_t size = strtoul(info[2].c_str(), nullptr, 10);
-    PreviousContextMap* context_map = nullptr;
-    if (type == kProfilerBoundedStack && info.size() == 4) {
-      context_map = new PreviousContextMap();
-      std::string context_counts_str = info[3].substr(1, info[3].size() - 2);
-      std::vector<std::string> context_count_pairs;
-      Split(context_counts_str, '#', &context_count_pairs);
-      for (uint32_t i = 0; i < context_count_pairs.size(); ++i) {
-        std::vector<std::string> context_count;
-        Split(context_count_pairs[i], ':', &context_count);
-        if (context_count.size() == 2) {
-          // Handles the situtation when the profile file doesn't contain context information.
-          uint32_t dexpc = strtoul(context_count[0].c_str(), nullptr, 10);
-          uint32_t count = strtoul(context_count[1].c_str(), nullptr, 10);
-          (*context_map)[std::make_pair(dexpc, "")] = count;
-        } else {
-          // Handles the situtation when the profile file contains context information.
-          uint32_t dexpc = strtoul(context_count[0].c_str(), nullptr, 10);
-          uint32_t count = strtoul(context_count[1].c_str(), nullptr, 10);
-          std::string context = context_count[2];
-          (*context_map)[std::make_pair(dexpc, context)] = count;
-        }
-      }
-    }
-    previous_[methodname] = PreviousValue(total_count, size, context_map);
-  }
-}
-
-bool ProfileFile::LoadFile(const std::string& fileName) {
-  LOG(VERBOSE) << "reading profile file " << fileName;
-  struct stat st;
-  int err = stat(fileName.c_str(), &st);
-  if (err == -1) {
-    LOG(VERBOSE) << "not found";
-    return false;
-  }
-  if (st.st_size == 0) {
-    return false;  // Empty profiles are invalid.
-  }
-  std::ifstream in(fileName.c_str());
-  if (!in) {
-    LOG(VERBOSE) << "profile file " << fileName << " exists but can't be opened";
-    LOG(VERBOSE) << "file owner: " << st.st_uid << ":" << st.st_gid;
-    LOG(VERBOSE) << "me: " << getuid() << ":" << getgid();
-    LOG(VERBOSE) << "file permissions: " << std::oct << st.st_mode;
-    LOG(VERBOSE) << "errno: " << errno;
-    return false;
-  }
-  // The first line contains summary information.
-  std::string line;
-  std::getline(in, line);
-  if (in.eof()) {
-    return false;
-  }
-  std::vector<std::string> summary_info;
-  Split(line, '/', &summary_info);
-  if (summary_info.size() != 3) {
-    // Bad summary info.  It should be total/null/boot.
-    return false;
-  }
-  // This is the number of hits in all profiled methods (without null or boot methods)
-  uint32_t total_count = strtoul(summary_info[0].c_str(), nullptr, 10);
-
-  // Now read each line until the end of file.  Each line consists of 3 fields separated by '/'.
-  // Store the info in descending order given by the most used methods.
-  typedef std::set<std::pair<int, std::vector<std::string>>> ProfileSet;
-  ProfileSet countSet;
-  while (!in.eof()) {
-    std::getline(in, line);
-    if (in.eof()) {
-      break;
-    }
-    std::vector<std::string> info;
-    Split(line, '/', &info);
-    if (info.size() != 3 && info.size() != 4) {
-      // Malformed.
-      return false;
-    }
-    int count = atoi(info[1].c_str());
-    countSet.insert(std::make_pair(-count, info));
-  }
-
-  uint32_t curTotalCount = 0;
-  ProfileSet::iterator end = countSet.end();
-  const ProfileData* prevData = nullptr;
-  for (ProfileSet::iterator it = countSet.begin(); it != end ; it++) {
-    const std::string& methodname = it->second[0];
-    uint32_t count = -it->first;
-    uint32_t size = strtoul(it->second[2].c_str(), nullptr, 10);
-    double usedPercent = (count * 100.0) / total_count;
-
-    curTotalCount += count;
-    // Methods with the same count should be part of the same top K percentage bucket.
-    double topKPercentage = (prevData != nullptr) && (prevData->GetCount() == count)
-      ? prevData->GetTopKUsedPercentage()
-      : 100 * static_cast<double>(curTotalCount) / static_cast<double>(total_count);
-
-    // Add it to the profile map.
-    ProfileData curData = ProfileData(methodname, count, size, usedPercent, topKPercentage);
-    profile_map_[methodname] = curData;
-    prevData = &curData;
-  }
-  return true;
-}
-
-bool ProfileFile::GetProfileData(ProfileFile::ProfileData* data, const std::string& method_name) {
-  ProfileMap::iterator i = profile_map_.find(method_name);
-  if (i == profile_map_.end()) {
-    return false;
-  }
-  *data = i->second;
-  return true;
-}
-
-bool ProfileFile::GetTopKSamples(std::set<std::string>& topKSamples, double topKPercentage) {
-  ProfileMap::iterator end = profile_map_.end();
-  for (ProfileMap::iterator it = profile_map_.begin(); it != end; it++) {
-    if (it->second.GetTopKUsedPercentage() < topKPercentage) {
-      topKSamples.insert(it->first);
-    }
-  }
-  return true;
-}
-
-StackTrieNode* StackTrieNode::FindChild(MethodReference method, uint32_t dex_pc) {
-  if (children_.size() == 0) {
-    return nullptr;
-  }
-  // Create a dummy node for searching.
-  StackTrieNode* node = new StackTrieNode(method, dex_pc, 0, nullptr);
-  std::set<StackTrieNode*, StackTrieNodeComparator>::iterator i = children_.find(node);
-  delete node;
-  return (i == children_.end()) ? nullptr : *i;
-}
-
-void StackTrieNode::DeleteChildren() {
-  for (auto &child : children_) {
-    if (child != nullptr) {
-      child->DeleteChildren();
-      delete child;
-    }
-  }
-}
-
-}  // namespace art
diff --git a/runtime/profiler.h b/runtime/profiler.h
deleted file mode 100644
index bd29f71..0000000
--- a/runtime/profiler.h
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_PROFILER_H_
-#define ART_RUNTIME_PROFILER_H_
-
-#include <memory>
-#include <ostream>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "barrier.h"
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "globals.h"
-#include "instrumentation.h"
-#include "profiler_options.h"
-#include "os.h"
-#include "safe_map.h"
-#include "method_reference.h"
-
-namespace art {
-
-namespace mirror {
-  class Class;
-}  // namespace mirror
-class ArtMethod;
-class Thread;
-
-typedef std::pair<ArtMethod*, uint32_t> InstructionLocation;
-
-// This class stores the sampled bounded stacks in a trie structure. A path of the trie represents
-// a particular context with the method on top of the stack being a leaf or an internal node of the
-// trie rather than the root.
-class StackTrieNode {
- public:
-  StackTrieNode(MethodReference method, uint32_t dex_pc, uint32_t method_size,
-      StackTrieNode* parent) :
-      parent_(parent), method_(method), dex_pc_(dex_pc),
-      count_(0), method_size_(method_size) {
-  }
-  StackTrieNode() : parent_(nullptr), method_(nullptr, 0),
-      dex_pc_(0), count_(0), method_size_(0) {
-  }
-  StackTrieNode* GetParent() { return parent_; }
-  MethodReference GetMethod() { return method_; }
-  uint32_t GetCount() { return count_; }
-  uint32_t GetDexPC() { return dex_pc_; }
-  uint32_t GetMethodSize() { return method_size_; }
-  void AppendChild(StackTrieNode* child) { children_.insert(child); }
-  StackTrieNode* FindChild(MethodReference method, uint32_t dex_pc);
-  void DeleteChildren();
-  void IncreaseCount() { ++count_; }
-
- private:
-  // Comparator for stack trie node.
-  struct StackTrieNodeComparator {
-    bool operator()(StackTrieNode* node1, StackTrieNode* node2) const {
-      MethodReference mr1 = node1->GetMethod();
-      MethodReference mr2 = node2->GetMethod();
-      if (mr1.dex_file == mr2.dex_file) {
-        if (mr1.dex_method_index == mr2.dex_method_index) {
-          return node1->GetDexPC() < node2->GetDexPC();
-        } else {
-          return mr1.dex_method_index < mr2.dex_method_index;
-        }
-      } else {
-        return mr1.dex_file < mr2.dex_file;
-      }
-    }
-  };
-
-  std::set<StackTrieNode*, StackTrieNodeComparator> children_;
-  StackTrieNode* parent_;
-  MethodReference method_;
-  uint32_t dex_pc_;
-  uint32_t count_;
-  uint32_t method_size_;
-};
-
-//
-// This class holds all the results for all runs of the profiler.  It also
-// counts the number of null methods (where we can't determine the method) and
-// the number of methods in the boot path (where we have already compiled the method).
-//
-// This object is an internal profiler object and uses the same locking as the profiler
-// itself.
-class ProfileSampleResults {
- public:
-  explicit ProfileSampleResults(Mutex& lock);
-  ~ProfileSampleResults();
-
-  void Put(ArtMethod* method) REQUIRES(!lock_);
-  void PutStack(const std::vector<InstructionLocation>& stack_dump) REQUIRES(!lock_);
-  uint32_t Write(std::ostream &os, ProfileDataType type);
-  void ReadPrevious(int fd, ProfileDataType type);
-  void Clear();
-  uint32_t GetNumSamples() { return num_samples_; }
-  void NullMethod() { ++num_null_methods_; }
-  void BootMethod() { ++num_boot_methods_; }
-
- private:
-  uint32_t Hash(ArtMethod* method);
-  static constexpr int kHashSize = 17;
-  Mutex& lock_;                  // Reference to the main profiler lock - we don't need two of them.
-  uint32_t num_samples_;         // Total number of samples taken.
-  uint32_t num_null_methods_;    // Number of samples where can don't know the method.
-  uint32_t num_boot_methods_;    // Number of samples in the boot path.
-
-  typedef std::map<ArtMethod*, uint32_t> Map;  // Map of method vs its count.
-  Map *table[kHashSize];
-
-  typedef std::set<StackTrieNode*> TrieNodeSet;
-  // Map of method hit by profiler vs the set of stack trie nodes for this method.
-  typedef std::map<MethodReference, TrieNodeSet*, MethodReferenceComparator> MethodContextMap;
-  MethodContextMap *method_context_table;
-  StackTrieNode* stack_trie_root_;  // Root of the trie that stores sampled stack information.
-
-  // Map from <pc, context> to counts.
-  typedef std::map<std::pair<uint32_t, std::string>, uint32_t> PreviousContextMap;
-  struct PreviousValue {
-    PreviousValue() : count_(0), method_size_(0), context_map_(nullptr) {}
-    PreviousValue(uint32_t count, uint32_t method_size, PreviousContextMap* context_map)
-      : count_(count), method_size_(method_size), context_map_(context_map) {}
-    uint32_t count_;
-    uint32_t method_size_;
-    PreviousContextMap* context_map_;
-  };
-
-  typedef std::map<std::string, PreviousValue> PreviousProfile;
-  PreviousProfile previous_;
-  uint32_t previous_num_samples_;
-  uint32_t previous_num_null_methods_;     // Number of samples where can don't know the method.
-  uint32_t previous_num_boot_methods_;     // Number of samples in the boot path.
-};
-
-//
-// The BackgroundMethodSamplingProfiler runs in a thread.  Most of the time it is sleeping but
-// occasionally wakes up and counts the number of times a method is called.  Each time
-// it ticks, it looks at the current method and records it in the ProfileSampleResults
-// table.
-//
-// The timing is controlled by a number of variables:
-// 1.  Period: the time between sampling runs.
-// 2.  Interval: the time between each sample in a run.
-// 3.  Duration: the duration of a run.
-//
-// So the profiler thread is sleeping for the 'period' time.  It wakes up and runs for the
-// 'duration'.  The run consists of a series of samples, each of which is 'interval' microseconds
-// apart.  At the end of a run, it writes the results table to a file and goes back to sleep.
-
-class BackgroundMethodSamplingProfiler {
- public:
-  // Start a profile thread with the user-supplied arguments.
-  // Returns true if the profile was started or if it was already running. Returns false otherwise.
-  static bool Start(const std::string& output_filename, const ProfilerOptions& options)
-      REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_,
-               !Locks::profiler_lock_);
-
-  // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
-  static void Stop() REQUIRES(!Locks::profiler_lock_, !wait_lock_, !Locks::profiler_lock_)
-      NO_THREAD_SAFETY_ANALYSIS;
-  // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
-  static void Shutdown() REQUIRES(!Locks::profiler_lock_) NO_THREAD_SAFETY_ANALYSIS;
-
-  void RecordMethod(ArtMethod *method) SHARED_REQUIRES(Locks::mutator_lock_);
-  void RecordStack(const std::vector<InstructionLocation>& stack)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  bool ProcessMethod(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_);
-  const ProfilerOptions& GetProfilerOptions() const { return options_; }
-
-  Barrier& GetBarrier() {
-    return *profiler_barrier_;
-  }
-
- private:
-  explicit BackgroundMethodSamplingProfiler(
-    const std::string& output_filename, const ProfilerOptions& options);
-
-  // The sampling interval in microseconds is passed as an argument.
-  // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
-  static void* RunProfilerThread(void* arg) REQUIRES(!Locks::profiler_lock_)
-      NO_THREAD_SAFETY_ANALYSIS;
-
-  uint32_t WriteProfile() SHARED_REQUIRES(Locks::mutator_lock_);
-
-  void CleanProfile();
-  uint32_t DumpProfile(std::ostream& os) SHARED_REQUIRES(Locks::mutator_lock_);
-  static bool ShuttingDown(Thread* self) REQUIRES(!Locks::profiler_lock_);
-
-  static BackgroundMethodSamplingProfiler* profiler_ GUARDED_BY(Locks::profiler_lock_);
-
-  // We need to shut the sample thread down at exit.  Setting this to true will do that.
-  static volatile bool shutting_down_ GUARDED_BY(Locks::profiler_lock_);
-
-  // Sampling thread, non-zero when sampling.
-  static pthread_t profiler_pthread_;
-
-  // Some measure of the number of samples that are significant.
-  static constexpr uint32_t kSignificantSamples = 10;
-
-  // The name of the file where profile data will be written.
-  std::string output_filename_;
-  // The options used to start the profiler.
-  const ProfilerOptions& options_;
-
-
-  // Profile condition support.
-  Mutex wait_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  ConditionVariable period_condition_ GUARDED_BY(wait_lock_);
-
-  ProfileSampleResults profile_table_;
-
-  std::unique_ptr<Barrier> profiler_barrier_;
-
-  // Set of methods to be filtered out.  This will probably be rare because
-  // most of the methods we want to be filtered reside in the boot path and
-  // are automatically filtered.
-  typedef std::set<std::string> FilteredMethods;
-  FilteredMethods filtered_methods_;
-
-  DISALLOW_COPY_AND_ASSIGN(BackgroundMethodSamplingProfiler);
-};
-
-//
-// Contains profile data generated from previous runs of the program and stored
-// in a file.  It is used to determine whether to compile a particular method or not.
-class ProfileFile {
- public:
-  class ProfileData {
-   public:
-    ProfileData() : count_(0), method_size_(0), used_percent_(0), top_k_used_percentage_(0) {}
-    ProfileData(const std::string& method_name, uint32_t count, uint32_t method_size,
-      double used_percent, double top_k_used_percentage) :
-      method_name_(method_name), count_(count), method_size_(method_size),
-      used_percent_(used_percent), top_k_used_percentage_(top_k_used_percentage) {
-      // TODO: currently method_size_ is unused
-      UNUSED(method_size_);
-    }
-
-    double GetUsedPercent() const { return used_percent_; }
-    uint32_t GetCount() const { return count_; }
-    double GetTopKUsedPercentage() const { return top_k_used_percentage_; }
-
-   private:
-    std::string method_name_;       // Method name.
-    uint32_t count_;                // Number of times it has been called.
-    uint32_t method_size_;          // Size of the method on dex instructions.
-    double used_percent_;           // Percentage of how many times this method was called.
-    double top_k_used_percentage_;  // The percentage of the group that comprise K% of the total
-                                    // used methods this methods belongs to.
-  };
-
- public:
-  // Loads profile data from the given file. The new data are merged with any existing data.
-  // Returns true if the file was loaded successfully and false otherwise.
-  bool LoadFile(const std::string& filename);
-
-  // Computes the group that comprise top_k_percentage of the total used methods.
-  bool GetTopKSamples(std::set<std::string>& top_k_methods, double top_k_percentage);
-
-  // If the given method has an entry in the profile table it updates the data
-  // and returns true. Otherwise returns false and leaves the data unchanged.
-  bool GetProfileData(ProfileData* data, const std::string& method_name);
-
- private:
-  // Profile data is stored in a map, indexed by the full method name.
-  typedef std::map<std::string, ProfileData> ProfileMap;
-  ProfileMap profile_map_;
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_PROFILER_H_
diff --git a/runtime/profiler_options.h b/runtime/profiler_options.h
deleted file mode 100644
index 1db2f05..0000000
--- a/runtime/profiler_options.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_PROFILER_OPTIONS_H_
-#define ART_RUNTIME_PROFILER_OPTIONS_H_
-
-#include <string>
-#include <ostream>
-
-namespace art {
-
-enum ProfileDataType {
-  kProfilerMethod,          // Method only
-  kProfilerBoundedStack,    // Methods with Dex PC on top of the stack
-};
-std::ostream& operator<<(std::ostream& os, const ProfileDataType& rhs);
-
-class ProfilerOptions {
- public:
-  static constexpr bool kDefaultEnabled = false;
-  static constexpr uint32_t kDefaultPeriodS = 10;
-  static constexpr uint32_t kDefaultDurationS = 20;
-  static constexpr uint32_t kDefaultIntervalUs = 500;
-  static constexpr double kDefaultBackoffCoefficient = 2.0;
-  static constexpr bool kDefaultStartImmediately = false;
-  static constexpr double kDefaultTopKThreshold = 90.0;
-  static constexpr double kDefaultChangeInTopKThreshold = 10.0;
-  static constexpr ProfileDataType kDefaultProfileData = kProfilerMethod;
-  static constexpr uint32_t kDefaultMaxStackDepth = 3;
-
-  ProfilerOptions() :
-    enabled_(kDefaultEnabled),
-    period_s_(kDefaultPeriodS),
-    duration_s_(kDefaultDurationS),
-    interval_us_(kDefaultIntervalUs),
-    backoff_coefficient_(kDefaultBackoffCoefficient),
-    start_immediately_(kDefaultStartImmediately),
-    top_k_threshold_(kDefaultTopKThreshold),
-    top_k_change_threshold_(kDefaultChangeInTopKThreshold),
-    profile_type_(kDefaultProfileData),
-    max_stack_depth_(kDefaultMaxStackDepth) {}
-
-  ProfilerOptions(bool enabled,
-                 uint32_t period_s,
-                 uint32_t duration_s,
-                 uint32_t interval_us,
-                 double backoff_coefficient,
-                 bool start_immediately,
-                 double top_k_threshold,
-                 double top_k_change_threshold,
-                 ProfileDataType profile_type,
-                 uint32_t max_stack_depth):
-    enabled_(enabled),
-    period_s_(period_s),
-    duration_s_(duration_s),
-    interval_us_(interval_us),
-    backoff_coefficient_(backoff_coefficient),
-    start_immediately_(start_immediately),
-    top_k_threshold_(top_k_threshold),
-    top_k_change_threshold_(top_k_change_threshold),
-    profile_type_(profile_type),
-    max_stack_depth_(max_stack_depth) {}
-
-  bool IsEnabled() const {
-    return enabled_;
-  }
-
-  uint32_t GetPeriodS() const {
-    return period_s_;
-  }
-
-  uint32_t GetDurationS() const {
-    return duration_s_;
-  }
-
-  uint32_t GetIntervalUs() const {
-    return interval_us_;
-  }
-
-  double GetBackoffCoefficient() const {
-    return backoff_coefficient_;
-  }
-
-  bool GetStartImmediately() const {
-    return start_immediately_;
-  }
-
-  double GetTopKThreshold() const {
-    return top_k_threshold_;
-  }
-
-  double GetTopKChangeThreshold() const {
-    return top_k_change_threshold_;
-  }
-
-  ProfileDataType GetProfileType() const {
-    return profile_type_;
-  }
-
-  uint32_t GetMaxStackDepth() const {
-    return max_stack_depth_;
-  }
-
- private:
-  friend std::ostream & operator<<(std::ostream &os, const ProfilerOptions& po) {
-    os << "enabled=" << po.enabled_
-       << ", period_s=" << po.period_s_
-       << ", duration_s=" << po.duration_s_
-       << ", interval_us=" << po.interval_us_
-       << ", backoff_coefficient=" << po.backoff_coefficient_
-       << ", start_immediately=" << po.start_immediately_
-       << ", top_k_threshold=" << po.top_k_threshold_
-       << ", top_k_change_threshold=" << po.top_k_change_threshold_
-       << ", profile_type=" << po.profile_type_
-       << ", max_stack_depth=" << po.max_stack_depth_;
-    return os;
-  }
-
-  friend class ParsedOptions;
-
-  // Whether or not the applications should be profiled.
-  bool enabled_;
-  // Generate profile every n seconds.
-  uint32_t period_s_;
-  // Run profile for n seconds.
-  uint32_t duration_s_;
-  // Microseconds between samples.
-  uint32_t interval_us_;
-  // Coefficient to exponential backoff.
-  double backoff_coefficient_;
-  // Whether the profile should start upon app startup or be delayed by some random offset.
-  bool start_immediately_;
-  // Top K% of samples that are considered relevant when deciding if the app should be recompiled.
-  double top_k_threshold_;
-  // How much the top K% samples needs to change in order for the app to be recompiled.
-  double top_k_change_threshold_;
-  // The type of profile data dumped to the disk.
-  ProfileDataType profile_type_;
-  // The max depth of the stack collected by the profiler
-  uint32_t max_stack_depth_;
-};
-
-}  // namespace art
-
-
-#endif  // ART_RUNTIME_PROFILER_OPTIONS_H_
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index bc9ba37..82e57b4 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -18,6 +18,7 @@
 #include <vector>
 
 #include "art_field-inl.h"
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "common_compiler_test.h"
 #include "mirror/field-inl.h"
@@ -60,29 +61,31 @@
 
     jsize array_index = 0;
     // Fill the method array
+    DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
     ArtMethod* method = javaLangObject->FindDeclaredVirtualMethod(
-        "equals", "(Ljava/lang/Object;)Z", sizeof(void*));
+        "equals", "(Ljava/lang/Object;)Z", kRuntimePointerSize);
+    CHECK(method != nullptr);
+    DCHECK(!Runtime::Current()->IsActiveTransaction());
+    soa.Env()->SetObjectArrayElement(
+        proxyClassMethods, array_index++, soa.AddLocalReference<jobject>(
+            mirror::Method::CreateFromArtMethod<kRuntimePointerSize, false>(soa.Self(), method)));
+    method = javaLangObject->FindDeclaredVirtualMethod("hashCode", "()I", kRuntimePointerSize);
     CHECK(method != nullptr);
     soa.Env()->SetObjectArrayElement(
         proxyClassMethods, array_index++, soa.AddLocalReference<jobject>(
-            mirror::Method::CreateFromArtMethod(soa.Self(), method)));
-    method = javaLangObject->FindDeclaredVirtualMethod("hashCode", "()I", sizeof(void*));
-    CHECK(method != nullptr);
-    soa.Env()->SetObjectArrayElement(
-        proxyClassMethods, array_index++, soa.AddLocalReference<jobject>(
-            mirror::Method::CreateFromArtMethod(soa.Self(), method)));
+            mirror::Method::CreateFromArtMethod<kRuntimePointerSize, false>(soa.Self(), method)));
     method = javaLangObject->FindDeclaredVirtualMethod(
-        "toString", "()Ljava/lang/String;", sizeof(void*));
+        "toString", "()Ljava/lang/String;", kRuntimePointerSize);
     CHECK(method != nullptr);
     soa.Env()->SetObjectArrayElement(
         proxyClassMethods, array_index++, soa.AddLocalReference<jobject>(
-            mirror::Method::CreateFromArtMethod(soa.Self(), method)));
+            mirror::Method::CreateFromArtMethod<kRuntimePointerSize, false>(soa.Self(), method)));
     // Now adds all interfaces virtual methods.
     for (mirror::Class* interface : interfaces) {
-      for (auto& m : interface->GetVirtualMethods(sizeof(void*))) {
+      for (auto& m : interface->GetDeclaredVirtualMethods(kRuntimePointerSize)) {
         soa.Env()->SetObjectArrayElement(
             proxyClassMethods, array_index++, soa.AddLocalReference<jobject>(
-                mirror::Method::CreateFromArtMethod(soa.Self(), &m)));
+                mirror::Method::CreateFromArtMethod<kRuntimePointerSize, false>(soa.Self(), &m)));
       }
     }
     CHECK_EQ(array_index, methods_count);
@@ -216,24 +219,30 @@
 
   LengthPrefixedArray<ArtField>* static_fields0 = proxyClass0->GetSFieldsPtr();
   ASSERT_TRUE(static_fields0 != nullptr);
-  ASSERT_EQ(2u, static_fields0->Length());
+  ASSERT_EQ(2u, static_fields0->size());
   LengthPrefixedArray<ArtField>* static_fields1 = proxyClass1->GetSFieldsPtr();
   ASSERT_TRUE(static_fields1 != nullptr);
-  ASSERT_EQ(2u, static_fields1->Length());
+  ASSERT_EQ(2u, static_fields1->size());
 
   EXPECT_EQ(static_fields0->At(0).GetDeclaringClass(), proxyClass0.Get());
   EXPECT_EQ(static_fields0->At(1).GetDeclaringClass(), proxyClass0.Get());
   EXPECT_EQ(static_fields1->At(0).GetDeclaringClass(), proxyClass1.Get());
   EXPECT_EQ(static_fields1->At(1).GetDeclaringClass(), proxyClass1.Get());
 
+  ASSERT_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
+  ASSERT_FALSE(Runtime::Current()->IsActiveTransaction());
   Handle<mirror::Field> field00 =
-      hs.NewHandle(mirror::Field::CreateFromArtField(soa.Self(), &static_fields0->At(0), true));
+      hs.NewHandle(mirror::Field::CreateFromArtField<kRuntimePointerSize, false>(
+          soa.Self(), &static_fields0->At(0), true));
   Handle<mirror::Field> field01 =
-      hs.NewHandle(mirror::Field::CreateFromArtField(soa.Self(), &static_fields0->At(1), true));
+      hs.NewHandle(mirror::Field::CreateFromArtField<kRuntimePointerSize, false>(
+          soa.Self(), &static_fields0->At(1), true));
   Handle<mirror::Field> field10 =
-      hs.NewHandle(mirror::Field::CreateFromArtField(soa.Self(), &static_fields1->At(0), true));
+      hs.NewHandle(mirror::Field::CreateFromArtField<kRuntimePointerSize, false>(
+          soa.Self(), &static_fields1->At(0), true));
   Handle<mirror::Field> field11 =
-      hs.NewHandle(mirror::Field::CreateFromArtField(soa.Self(), &static_fields1->At(1), true));
+      hs.NewHandle(mirror::Field::CreateFromArtField<kRuntimePointerSize, false>(
+          soa.Self(), &static_fields1->At(1), true));
   EXPECT_EQ(field00->GetArtField(), &static_fields0->At(0));
   EXPECT_EQ(field01->GetArtField(), &static_fields0->At(1));
   EXPECT_EQ(field10->GetArtField(), &static_fields1->At(0));
diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc
index 99e262e..a6e3693 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/runtime/quick/inline_method_analyser.cc
@@ -18,10 +18,12 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
 #include "dex_instruction-inl.h"
+#include "dex_instruction_utils.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
 #include "verifier/method_verifier-inl.h"
@@ -33,6 +35,366 @@
 
 namespace art {
 
+namespace {  // anonymous namespace
+
+// Helper class for matching a pattern.
+class Matcher {
+ public:
+  // Match function type.
+  typedef bool MatchFn(Matcher* matcher);
+
+  template <size_t size>
+  static bool Match(const DexFile::CodeItem* code_item, MatchFn* const (&pattern)[size]);
+
+  // Match and advance.
+
+  static bool Mark(Matcher* matcher);
+
+  template <bool (Matcher::*Fn)()>
+  static bool Required(Matcher* matcher);
+
+  template <bool (Matcher::*Fn)()>
+  static bool Repeated(Matcher* matcher);  // On match, returns to the mark.
+
+  // Match an individual instruction.
+
+  template <Instruction::Code opcode> bool Opcode();
+  bool Const0();
+  bool IPutOnThis();
+
+ private:
+  explicit Matcher(const DexFile::CodeItem* code_item)
+      : code_item_(code_item),
+        instruction_(Instruction::At(code_item->insns_)),
+        pos_(0u),
+        mark_(0u) { }
+
+  static bool DoMatch(const DexFile::CodeItem* code_item, MatchFn* const* pattern, size_t size);
+
+  const DexFile::CodeItem* const code_item_;
+  const Instruction* instruction_;
+  size_t pos_;
+  size_t mark_;
+};
+
+template <size_t size>
+bool Matcher::Match(const DexFile::CodeItem* code_item, MatchFn* const (&pattern)[size]) {
+  return DoMatch(code_item, pattern, size);
+}
+
+bool Matcher::Mark(Matcher* matcher) {
+  matcher->pos_ += 1u;  // Advance to the next match function before marking.
+  matcher->mark_ = matcher->pos_;
+  return true;
+}
+
+template <bool (Matcher::*Fn)()>
+bool Matcher::Required(Matcher* matcher) {
+  if (!(matcher->*Fn)()) {
+    return false;
+  }
+  matcher->pos_ += 1u;
+  matcher->instruction_ = matcher->instruction_->Next();
+  return true;
+}
+
+template <bool (Matcher::*Fn)()>
+bool Matcher::Repeated(Matcher* matcher) {
+  if (!(matcher->*Fn)()) {
+    // Didn't match optional instruction, try the next match function.
+    matcher->pos_ += 1u;
+    return true;
+  }
+  matcher->pos_ = matcher->mark_;
+  matcher->instruction_ = matcher->instruction_->Next();
+  return true;
+}
+
+template <Instruction::Code opcode>
+bool Matcher::Opcode() {
+  return instruction_->Opcode() == opcode;
+}
+
+// Match const 0.
+bool Matcher::Const0() {
+  return IsInstructionDirectConst(instruction_->Opcode()) &&
+      (instruction_->Opcode() == Instruction::CONST_WIDE ? instruction_->VRegB_51l() == 0
+                                                         : instruction_->VRegB() == 0);
+}
+
+bool Matcher::IPutOnThis() {
+  DCHECK_NE(code_item_->ins_size_, 0u);
+  return IsInstructionIPut(instruction_->Opcode()) &&
+      instruction_->VRegB_22c() == code_item_->registers_size_ - code_item_->ins_size_;
+}
+
+bool Matcher::DoMatch(const DexFile::CodeItem* code_item, MatchFn* const* pattern, size_t size) {
+  Matcher matcher(code_item);
+  while (matcher.pos_ != size) {
+    if (!pattern[matcher.pos_](&matcher)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// Used for a single invoke in a constructor. In that situation, the method verifier makes
+// sure we invoke a constructor either in the same class or superclass with at least "this".
+ArtMethod* GetTargetConstructor(ArtMethod* method, const Instruction* invoke_direct)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  DCHECK_EQ(invoke_direct->Opcode(), Instruction::INVOKE_DIRECT);
+  DCHECK_EQ(invoke_direct->VRegC_35c(),
+            method->GetCodeItem()->registers_size_ - method->GetCodeItem()->ins_size_);
+  uint32_t method_index = invoke_direct->VRegB_35c();
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  ArtMethod* target_method =
+      method->GetDexCache()->GetResolvedMethod(method_index, pointer_size);
+  if (kIsDebugBuild && target_method != nullptr) {
+    CHECK(!target_method->IsStatic());
+    CHECK(target_method->IsConstructor());
+    CHECK(target_method->GetDeclaringClass() == method->GetDeclaringClass() ||
+          target_method->GetDeclaringClass() == method->GetDeclaringClass()->GetSuperClass());
+  }
+  return target_method;
+}
+
+// Return the forwarded arguments and check that all remaining arguments are zero.
+// If the check fails, return static_cast<size_t>(-1).
+size_t CountForwardedConstructorArguments(const DexFile::CodeItem* code_item,
+                                          const Instruction* invoke_direct,
+                                          uint16_t zero_vreg_mask) {
+  DCHECK_EQ(invoke_direct->Opcode(), Instruction::INVOKE_DIRECT);
+  size_t number_of_args = invoke_direct->VRegA_35c();
+  DCHECK_NE(number_of_args, 0u);
+  uint32_t args[Instruction::kMaxVarArgRegs];
+  invoke_direct->GetVarArgs(args);
+  uint16_t this_vreg = args[0];
+  DCHECK_EQ(this_vreg, code_item->registers_size_ - code_item->ins_size_);  // Checked by verifier.
+  size_t forwarded = 1u;
+  while (forwarded < number_of_args &&
+      args[forwarded] == this_vreg + forwarded &&
+      (zero_vreg_mask & (1u << args[forwarded])) == 0) {
+    ++forwarded;
+  }
+  for (size_t i = forwarded; i != number_of_args; ++i) {
+    if ((zero_vreg_mask & (1u << args[i])) == 0) {
+      return static_cast<size_t>(-1);
+    }
+  }
+  return forwarded;
+}
+
+uint16_t GetZeroVRegMask(const Instruction* const0) {
+  DCHECK(IsInstructionDirectConst(const0->Opcode()));
+  DCHECK((const0->Opcode() == Instruction::CONST_WIDE) ? const0->VRegB_51l() == 0u
+                                                       : const0->VRegB() == 0);
+  uint16_t base_mask = IsInstructionConstWide(const0->Opcode()) ? 3u : 1u;
+  return base_mask << const0->VRegA();
+}
+
+// We limit the number of IPUTs storing parameters. There can be any number
+// of IPUTs that store the value 0 as they are useless in a constructor as
+// the object always starts zero-initialized. We also eliminate all but the
+// last store to any field as they are not observable; not even if the field
+// is volatile as no reference to the object can escape from a constructor
+// with this pattern.
+static constexpr size_t kMaxConstructorIPuts = 3u;
+
+struct ConstructorIPutData {
+  ConstructorIPutData() : field_index(DexFile::kDexNoIndex16), arg(0u) { }
+
+  uint16_t field_index;
+  uint16_t arg;
+};
+
+bool RecordConstructorIPut(ArtMethod* method,
+                           const Instruction* new_iput,
+                           uint16_t this_vreg,
+                           uint16_t zero_vreg_mask,
+                           /*inout*/ ConstructorIPutData (&iputs)[kMaxConstructorIPuts])
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  DCHECK(IsInstructionIPut(new_iput->Opcode()));
+  uint32_t field_index = new_iput->VRegC_22c();
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  mirror::DexCache* dex_cache = method->GetDexCache();
+  ArtField* field = dex_cache->GetResolvedField(field_index, pointer_size);
+  if (UNLIKELY(field == nullptr)) {
+    return false;
+  }
+  // Remove previous IPUT to the same field, if any. Different field indexes may refer
+  // to the same field, so we need to compare resolved fields from the dex cache.
+  for (size_t old_pos = 0; old_pos != arraysize(iputs); ++old_pos) {
+    if (iputs[old_pos].field_index == DexFile::kDexNoIndex16) {
+      break;
+    }
+    ArtField* f = dex_cache->GetResolvedField(iputs[old_pos].field_index, pointer_size);
+    DCHECK(f != nullptr);
+    if (f == field) {
+      auto back_it = std::copy(iputs + old_pos + 1, iputs + arraysize(iputs), iputs + old_pos);
+      *back_it = ConstructorIPutData();
+      break;
+    }
+  }
+  // If the stored value isn't zero, record the IPUT.
+  if ((zero_vreg_mask & (1u << new_iput->VRegA_22c())) == 0u) {
+    size_t new_pos = 0;
+    while (new_pos != arraysize(iputs) && iputs[new_pos].field_index != DexFile::kDexNoIndex16) {
+      ++new_pos;
+    }
+    if (new_pos == arraysize(iputs)) {
+      return false;  // Exceeded capacity of the output array.
+    }
+    iputs[new_pos].field_index = field_index;
+    iputs[new_pos].arg = new_iput->VRegA_22c() - this_vreg;
+  }
+  return true;
+}
+
+bool DoAnalyseConstructor(const DexFile::CodeItem* code_item,
+                          ArtMethod* method,
+                          /*inout*/ ConstructorIPutData (&iputs)[kMaxConstructorIPuts])
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  // On entry we should not have any IPUTs yet.
+  DCHECK_EQ(0, std::count_if(
+      iputs,
+      iputs + arraysize(iputs),
+      [](const ConstructorIPutData& iput_data) {
+        return iput_data.field_index != DexFile::kDexNoIndex16;
+      }));
+
+  // Limit the maximum number of code units we're willing to match.
+  static constexpr size_t kMaxCodeUnits = 16u;
+
+  // Limit the number of registers that the constructor may use to 16.
+  // Given that IPUTs must use low 16 registers and we do not match MOVEs,
+  // this is a reasonable limitation.
+  static constexpr size_t kMaxVRegs = 16u;
+
+  // We try to match a constructor that calls another constructor (either in
+  // superclass or in the same class) with the same parameters, or with some
+  // parameters truncated (allowed only for calls to superclass constructor)
+  // or with extra parameters with value 0 (with any type, including null).
+  // This call can be followed by optional IPUTs on "this" storing either one
+  // of the parameters or 0 and the code must then finish with RETURN_VOID.
+  // The called constructor must be either java.lang.Object.<init>() or it
+  // must also match the same pattern.
+  static Matcher::MatchFn* const kConstructorPattern[] = {
+      &Matcher::Mark,
+      &Matcher::Repeated<&Matcher::Const0>,
+      &Matcher::Required<&Matcher::Opcode<Instruction::INVOKE_DIRECT>>,
+      &Matcher::Mark,
+      &Matcher::Repeated<&Matcher::Const0>,
+      &Matcher::Repeated<&Matcher::IPutOnThis>,
+      &Matcher::Required<&Matcher::Opcode<Instruction::RETURN_VOID>>,
+  };
+
+  DCHECK(method != nullptr);
+  DCHECK(!method->IsStatic());
+  DCHECK(method->IsConstructor());
+  DCHECK(code_item != nullptr);
+  if (!method->GetDeclaringClass()->IsVerified() ||
+      code_item->insns_size_in_code_units_ > kMaxCodeUnits ||
+      code_item->registers_size_ > kMaxVRegs ||
+      !Matcher::Match(code_item, kConstructorPattern)) {
+    return false;
+  }
+
+  // Verify the invoke, prevent a few odd cases and collect IPUTs.
+  uint16_t this_vreg = code_item->registers_size_ - code_item->ins_size_;
+  uint16_t zero_vreg_mask = 0u;
+  for (const Instruction* instruction = Instruction::At(code_item->insns_);
+      instruction->Opcode() != Instruction::RETURN_VOID;
+      instruction = instruction->Next()) {
+    if (instruction->Opcode() == Instruction::INVOKE_DIRECT) {
+      ArtMethod* target_method = GetTargetConstructor(method, instruction);
+      if (target_method == nullptr) {
+        return false;
+      }
+      // We allow forwarding constructors only if they pass more arguments
+      // to prevent infinite recursion.
+      if (target_method->GetDeclaringClass() == method->GetDeclaringClass() &&
+          instruction->VRegA_35c() <= code_item->ins_size_) {
+        return false;
+      }
+      size_t forwarded = CountForwardedConstructorArguments(code_item, instruction, zero_vreg_mask);
+      if (forwarded == static_cast<size_t>(-1)) {
+        return false;
+      }
+      if (target_method->GetDeclaringClass()->IsObjectClass()) {
+        DCHECK_EQ(Instruction::At(target_method->GetCodeItem()->insns_)->Opcode(),
+                  Instruction::RETURN_VOID);
+      } else {
+        const DexFile::CodeItem* target_code_item = target_method->GetCodeItem();
+        if (target_code_item == nullptr) {
+          return false;  // Native constructor?
+        }
+        if (!DoAnalyseConstructor(target_code_item, target_method, iputs)) {
+          return false;
+        }
+        // Prune IPUTs with zero input.
+        auto kept_end = std::remove_if(
+            iputs,
+            iputs + arraysize(iputs),
+            [forwarded](const ConstructorIPutData& iput_data) {
+              return iput_data.arg >= forwarded;
+            });
+        std::fill(kept_end, iputs + arraysize(iputs), ConstructorIPutData());
+        // If we have any IPUTs from the call, check that the target method is in the same
+        // dex file (compare DexCache references), otherwise field_indexes would be bogus.
+        if (iputs[0].field_index != DexFile::kDexNoIndex16 &&
+            target_method->GetDexCache() != method->GetDexCache()) {
+          return false;
+        }
+      }
+    } else if (IsInstructionDirectConst(instruction->Opcode())) {
+      zero_vreg_mask |= GetZeroVRegMask(instruction);
+      if ((zero_vreg_mask & (1u << this_vreg)) != 0u) {
+        return false;  // Overwriting `this` is unsupported.
+      }
+    } else {
+      DCHECK(IsInstructionIPut(instruction->Opcode()));
+      DCHECK_EQ(instruction->VRegB_22c(), this_vreg);
+      if (!RecordConstructorIPut(method, instruction, this_vreg, zero_vreg_mask, iputs)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+}  // anonymous namespace
+
+bool AnalyseConstructor(const DexFile::CodeItem* code_item,
+                        ArtMethod* method,
+                        InlineMethod* result)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ConstructorIPutData iputs[kMaxConstructorIPuts];
+  if (!DoAnalyseConstructor(code_item, method, iputs)) {
+    return false;
+  }
+  static_assert(kMaxConstructorIPuts == 3, "Unexpected limit");  // Code below depends on this.
+  DCHECK(iputs[0].field_index != DexFile::kDexNoIndex16 ||
+         iputs[1].field_index == DexFile::kDexNoIndex16);
+  DCHECK(iputs[1].field_index != DexFile::kDexNoIndex16 ||
+         iputs[2].field_index == DexFile::kDexNoIndex16);
+
+#define STORE_IPUT(n)                                                         \
+  do {                                                                        \
+    result->d.constructor_data.iput##n##_field_index = iputs[n].field_index;  \
+    result->d.constructor_data.iput##n##_arg = iputs[n].arg;                  \
+  } while (false)
+
+  STORE_IPUT(0);
+  STORE_IPUT(1);
+  STORE_IPUT(2);
+#undef STORE_IPUT
+
+  result->opcode = kInlineOpConstructor;
+  result->flags = kInlineSpecial;
+  result->d.constructor_data.reserved = 0u;
+  return true;
+}
+
 static_assert(InlineMethodAnalyser::IsInstructionIGet(Instruction::IGET), "iget type");
 static_assert(InlineMethodAnalyser::IsInstructionIGet(Instruction::IGET_WIDE), "iget_wide type");
 static_assert(InlineMethodAnalyser::IsInstructionIGet(Instruction::IGET_OBJECT),
@@ -71,37 +433,71 @@
 // we need to be able to detect possibly inlined method, we pass a null inline method to indicate
 // we don't want to take unresolved methods and fields into account during analysis.
 bool InlineMethodAnalyser::AnalyseMethodCode(verifier::MethodVerifier* verifier,
-                                             InlineMethod* method) {
+                                             InlineMethod* result) {
   DCHECK(verifier != nullptr);
-  DCHECK_EQ(Runtime::Current()->IsCompiler(), method != nullptr);
-  if (!Runtime::Current()->UseJit()) {
-    DCHECK_EQ(verifier->CanLoadClasses(), method != nullptr);
+  if (!Runtime::Current()->UseJitCompilation()) {
+    DCHECK_EQ(verifier->CanLoadClasses(), result != nullptr);
   }
+
+  // Note: verifier->GetMethod() may be null.
+  return AnalyseMethodCode(verifier->CodeItem(),
+                           verifier->GetMethodReference(),
+                           (verifier->GetAccessFlags() & kAccStatic) != 0u,
+                           verifier->GetMethod(),
+                           result);
+}
+
+bool InlineMethodAnalyser::AnalyseMethodCode(ArtMethod* method, InlineMethod* result) {
+  const DexFile::CodeItem* code_item = method->GetCodeItem();
+  if (code_item == nullptr) {
+    // Native or abstract.
+    return false;
+  }
+  return AnalyseMethodCode(
+      code_item, method->ToMethodReference(), method->IsStatic(), method, result);
+}
+
+bool InlineMethodAnalyser::AnalyseMethodCode(const DexFile::CodeItem* code_item,
+                                             const MethodReference& method_ref,
+                                             bool is_static,
+                                             ArtMethod* method,
+                                             InlineMethod* result) {
   // We currently support only plain return or 2-instruction methods.
 
-  const DexFile::CodeItem* code_item = verifier->CodeItem();
   DCHECK_NE(code_item->insns_size_in_code_units_, 0u);
   const Instruction* instruction = Instruction::At(code_item->insns_);
   Instruction::Code opcode = instruction->Opcode();
 
   switch (opcode) {
     case Instruction::RETURN_VOID:
-      if (method != nullptr) {
-        method->opcode = kInlineOpNop;
-        method->flags = kInlineSpecial;
-        method->d.data = 0u;
+      if (result != nullptr) {
+        result->opcode = kInlineOpNop;
+        result->flags = kInlineSpecial;
+        result->d.data = 0u;
       }
       return true;
     case Instruction::RETURN:
     case Instruction::RETURN_OBJECT:
     case Instruction::RETURN_WIDE:
-      return AnalyseReturnMethod(code_item, method);
+      return AnalyseReturnMethod(code_item, result);
     case Instruction::CONST:
     case Instruction::CONST_4:
     case Instruction::CONST_16:
     case Instruction::CONST_HIGH16:
       // TODO: Support wide constants (RETURN_WIDE).
-      return AnalyseConstMethod(code_item, method);
+      if (AnalyseConstMethod(code_item, result)) {
+        return true;
+      }
+      FALLTHROUGH_INTENDED;
+    case Instruction::CONST_WIDE:
+    case Instruction::CONST_WIDE_16:
+    case Instruction::CONST_WIDE_32:
+    case Instruction::CONST_WIDE_HIGH16:
+    case Instruction::INVOKE_DIRECT:
+      if (method != nullptr && !method->IsStatic() && method->IsConstructor()) {
+        return AnalyseConstructor(code_item, method, result);
+      }
+      return false;
     case Instruction::IGET:
     case Instruction::IGET_OBJECT:
     case Instruction::IGET_BOOLEAN:
@@ -113,7 +509,7 @@
     // case Instruction::IGET_QUICK:
     // case Instruction::IGET_WIDE_QUICK:
     // case Instruction::IGET_OBJECT_QUICK:
-      return AnalyseIGetMethod(verifier, method);
+      return AnalyseIGetMethod(code_item, method_ref, is_static, method, result);
     case Instruction::IPUT:
     case Instruction::IPUT_OBJECT:
     case Instruction::IPUT_BOOLEAN:
@@ -125,7 +521,7 @@
     // case Instruction::IPUT_QUICK:
     // case Instruction::IPUT_WIDE_QUICK:
     // case Instruction::IPUT_OBJECT_QUICK:
-      return AnalyseIPutMethod(verifier, method);
+      return AnalyseIPutMethod(code_item, method_ref, is_static, method, result);
     default:
       return false;
   }
@@ -195,9 +591,11 @@
   return true;
 }
 
-bool InlineMethodAnalyser::AnalyseIGetMethod(verifier::MethodVerifier* verifier,
+bool InlineMethodAnalyser::AnalyseIGetMethod(const DexFile::CodeItem* code_item,
+                                             const MethodReference& method_ref,
+                                             bool is_static,
+                                             ArtMethod* method,
                                              InlineMethod* result) {
-  const DexFile::CodeItem* code_item = verifier->CodeItem();
   const Instruction* instruction = Instruction::At(code_item->insns_);
   Instruction::Code opcode = instruction->Opcode();
   DCHECK(IsInstructionIGet(opcode));
@@ -228,10 +626,10 @@
     return false;  // Not returning the value retrieved by IGET?
   }
 
-  if ((verifier->GetAccessFlags() & kAccStatic) != 0u || object_arg != 0u) {
+  if (is_static || object_arg != 0u) {
     // TODO: Implement inlining of IGET on non-"this" registers (needs correct stack trace for NPE).
     // Allow synthetic accessors. We don't care about losing their stack frame in NPE.
-    if (!IsSyntheticAccessor(verifier->GetMethodReference())) {
+    if (!IsSyntheticAccessor(method_ref)) {
       return false;
     }
   }
@@ -244,13 +642,13 @@
 
   if (result != nullptr) {
     InlineIGetIPutData* data = &result->d.ifield_data;
-    if (!ComputeSpecialAccessorInfo(field_idx, false, verifier, data)) {
+    if (!ComputeSpecialAccessorInfo(method, field_idx, false, data)) {
       return false;
     }
     result->opcode = kInlineOpIGet;
     result->flags = kInlineSpecial;
     data->op_variant = IGetVariant(opcode);
-    data->method_is_static = (verifier->GetAccessFlags() & kAccStatic) != 0u ? 1u : 0u;
+    data->method_is_static = is_static ? 1u : 0u;
     data->object_arg = object_arg;  // Allow IGET on any register, not just "this".
     data->src_arg = 0u;
     data->return_arg_plus1 = 0u;
@@ -258,9 +656,11 @@
   return true;
 }
 
-bool InlineMethodAnalyser::AnalyseIPutMethod(verifier::MethodVerifier* verifier,
+bool InlineMethodAnalyser::AnalyseIPutMethod(const DexFile::CodeItem* code_item,
+                                             const MethodReference& method_ref,
+                                             bool is_static,
+                                             ArtMethod* method,
                                              InlineMethod* result) {
-  const DexFile::CodeItem* code_item = verifier->CodeItem();
   const Instruction* instruction = Instruction::At(code_item->insns_);
   Instruction::Code opcode = instruction->Opcode();
   DCHECK(IsInstructionIPut(opcode));
@@ -293,10 +693,10 @@
   uint32_t object_arg = object_reg - arg_start;
   uint32_t src_arg = src_reg - arg_start;
 
-  if ((verifier->GetAccessFlags() & kAccStatic) != 0u || object_arg != 0u) {
+  if (is_static || object_arg != 0u) {
     // TODO: Implement inlining of IPUT on non-"this" registers (needs correct stack trace for NPE).
     // Allow synthetic accessors. We don't care about losing their stack frame in NPE.
-    if (!IsSyntheticAccessor(verifier->GetMethodReference())) {
+    if (!IsSyntheticAccessor(method_ref)) {
       return false;
     }
   }
@@ -311,13 +711,13 @@
 
   if (result != nullptr) {
     InlineIGetIPutData* data = &result->d.ifield_data;
-    if (!ComputeSpecialAccessorInfo(field_idx, true, verifier, data)) {
+    if (!ComputeSpecialAccessorInfo(method, field_idx, true, data)) {
       return false;
     }
     result->opcode = kInlineOpIPut;
     result->flags = kInlineSpecial;
     data->op_variant = IPutVariant(opcode);
-    data->method_is_static = (verifier->GetAccessFlags() & kAccStatic) != 0u ? 1u : 0u;
+    data->method_is_static = is_static ? 1u : 0u;
     data->object_arg = object_arg;  // Allow IPUT on any register, not just "this".
     data->src_arg = src_arg;
     data->return_arg_plus1 = return_arg_plus1;
@@ -325,15 +725,17 @@
   return true;
 }
 
-bool InlineMethodAnalyser::ComputeSpecialAccessorInfo(uint32_t field_idx, bool is_put,
-                                                      verifier::MethodVerifier* verifier,
+bool InlineMethodAnalyser::ComputeSpecialAccessorInfo(ArtMethod* method,
+                                                      uint32_t field_idx,
+                                                      bool is_put,
                                                       InlineIGetIPutData* result) {
-  mirror::DexCache* dex_cache = verifier->GetDexCache();
-  uint32_t method_idx = verifier->GetMethodReference().dex_method_index;
-  auto* cl = Runtime::Current()->GetClassLinker();
-  ArtMethod* method = dex_cache->GetResolvedMethod(method_idx, cl->GetImagePointerSize());
-  ArtField* field = cl->GetResolvedField(field_idx, dex_cache);
-  if (method == nullptr || field == nullptr || field->IsStatic()) {
+  if (method == nullptr) {
+    return false;
+  }
+  mirror::DexCache* dex_cache = method->GetDexCache();
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  ArtField* field = dex_cache->GetResolvedField(field_idx, pointer_size);
+  if (field == nullptr || field->IsStatic()) {
     return false;
   }
   mirror::Class* method_class = method->GetDeclaringClass();
@@ -343,9 +745,12 @@
     return false;
   }
   DCHECK_GE(field->GetOffset().Int32Value(), 0);
+  // Do not interleave function calls with bit field writes to placate valgrind. Bug: 27552451.
+  uint32_t field_offset = field->GetOffset().Uint32Value();
+  bool is_volatile = field->IsVolatile();
   result->field_idx = field_idx;
-  result->field_offset = field->GetOffset().Int32Value();
-  result->is_volatile = field->IsVolatile();
+  result->field_offset = field_offset;
+  result->is_volatile = is_volatile ? 1u : 0u;
   return true;
 }
 
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 837662d..0e12d73 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -37,12 +37,23 @@
 enum InlineMethodOpcode : uint16_t {
   kIntrinsicDoubleCvt,
   kIntrinsicFloatCvt,
+  kIntrinsicFloat2Int,
+  kIntrinsicDouble2Long,
+  kIntrinsicFloatIsInfinite,
+  kIntrinsicDoubleIsInfinite,
+  kIntrinsicFloatIsNaN,
+  kIntrinsicDoubleIsNaN,
   kIntrinsicReverseBits,
   kIntrinsicReverseBytes,
+  kIntrinsicBitCount,
+  kIntrinsicCompare,
+  kIntrinsicHighestOneBit,
+  kIntrinsicLowestOneBit,
   kIntrinsicNumberOfLeadingZeros,
   kIntrinsicNumberOfTrailingZeros,
   kIntrinsicRotateRight,
   kIntrinsicRotateLeft,
+  kIntrinsicSignum,
   kIntrinsicAbsInt,
   kIntrinsicAbsLong,
   kIntrinsicAbsFloat,
@@ -51,6 +62,23 @@
   kIntrinsicMinMaxLong,
   kIntrinsicMinMaxFloat,
   kIntrinsicMinMaxDouble,
+  kIntrinsicCos,
+  kIntrinsicSin,
+  kIntrinsicAcos,
+  kIntrinsicAsin,
+  kIntrinsicAtan,
+  kIntrinsicAtan2,
+  kIntrinsicCbrt,
+  kIntrinsicCosh,
+  kIntrinsicExp,
+  kIntrinsicExpm1,
+  kIntrinsicHypot,
+  kIntrinsicLog,
+  kIntrinsicLog10,
+  kIntrinsicNextAfter,
+  kIntrinsicSinh,
+  kIntrinsicTan,
+  kIntrinsicTanh,
   kIntrinsicSqrt,
   kIntrinsicCeil,
   kIntrinsicFloor,
@@ -73,6 +101,17 @@
   kIntrinsicCas,
   kIntrinsicUnsafeGet,
   kIntrinsicUnsafePut,
+
+  // 1.8.
+  kIntrinsicUnsafeGetAndAddInt,
+  kIntrinsicUnsafeGetAndAddLong,
+  kIntrinsicUnsafeGetAndSetInt,
+  kIntrinsicUnsafeGetAndSetLong,
+  kIntrinsicUnsafeGetAndSetObject,
+  kIntrinsicUnsafeLoadFence,
+  kIntrinsicUnsafeStoreFence,
+  kIntrinsicUnsafeFullFence,
+
   kIntrinsicSystemArrayCopyCharArray,
   kIntrinsicSystemArrayCopy,
 
@@ -81,6 +120,7 @@
   kInlineOpNonWideConst,
   kInlineOpIGet,
   kInlineOpIPut,
+  kInlineOpConstructor,
   kInlineStringInit,
 };
 std::ostream& operator<<(std::ostream& os, const InlineMethodOpcode& rhs);
@@ -142,6 +182,19 @@
 static_assert(sizeof(InlineReturnArgData) == sizeof(uint64_t),
               "Invalid size of InlineReturnArgData");
 
+struct InlineConstructorData {
+  // There can be up to 3 IPUTs, unused fields are marked with kNoDexIndex16.
+  uint16_t iput0_field_index;
+  uint16_t iput1_field_index;
+  uint16_t iput2_field_index;
+  uint16_t iput0_arg : 4;
+  uint16_t iput1_arg : 4;
+  uint16_t iput2_arg : 4;
+  uint16_t reserved : 4;
+};
+static_assert(sizeof(InlineConstructorData) == sizeof(uint64_t),
+              "Invalid size of InlineConstructorData");
+
 struct InlineMethod {
   InlineMethodOpcode opcode;
   InlineMethodFlags flags;
@@ -149,6 +202,7 @@
     uint64_t data;
     InlineIGetIPutData ifield_data;
     InlineReturnArgData return_data;
+    InlineConstructorData constructor_data;
   } d;
 };
 
@@ -162,7 +216,9 @@
    * @param method placeholder for the inline method data.
    * @return true if the method is a candidate for inlining, false otherwise.
    */
-  static bool AnalyseMethodCode(verifier::MethodVerifier* verifier, InlineMethod* method)
+  static bool AnalyseMethodCode(verifier::MethodVerifier* verifier, InlineMethod* result)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  static bool AnalyseMethodCode(ArtMethod* method, InlineMethod* result)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static constexpr bool IsInstructionIGet(Instruction::Code opcode) {
@@ -185,17 +241,32 @@
   static bool IsSyntheticAccessor(MethodReference ref);
 
  private:
+  static bool AnalyseMethodCode(const DexFile::CodeItem* code_item,
+                                const MethodReference& method_ref,
+                                bool is_static,
+                                ArtMethod* method,
+                                InlineMethod* result)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   static bool AnalyseReturnMethod(const DexFile::CodeItem* code_item, InlineMethod* result);
   static bool AnalyseConstMethod(const DexFile::CodeItem* code_item, InlineMethod* result);
-  static bool AnalyseIGetMethod(verifier::MethodVerifier* verifier, InlineMethod* result)
+  static bool AnalyseIGetMethod(const DexFile::CodeItem* code_item,
+                                const MethodReference& method_ref,
+                                bool is_static,
+                                ArtMethod* method,
+                                InlineMethod* result)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  static bool AnalyseIPutMethod(verifier::MethodVerifier* verifier, InlineMethod* result)
+  static bool AnalyseIPutMethod(const DexFile::CodeItem* code_item,
+                                const MethodReference& method_ref,
+                                bool is_static,
+                                ArtMethod* method,
+                                InlineMethod* result)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Can we fast path instance field access in a verified accessor?
   // If yes, computes field's offset and volatility and whether the method is static or not.
-  static bool ComputeSpecialAccessorInfo(uint32_t field_idx, bool is_put,
-                                         verifier::MethodVerifier* verifier,
+  static bool ComputeSpecialAccessorInfo(ArtMethod* method,
+                                         uint32_t field_idx,
+                                         bool is_put,
                                          InlineIGetIPutData* result)
       SHARED_REQUIRES(Locks::mutator_lock_);
 };
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 53b4f3a..46d9e7f 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -18,11 +18,14 @@
 
 #include "arch/context.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "dex_instruction.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "handle_scope-inl.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/throwable.h"
@@ -48,7 +51,8 @@
       handler_method_(nullptr),
       handler_dex_pc_(0),
       clear_exception_(false),
-      handler_frame_depth_(kInvalidFrameDepth) {}
+      handler_frame_depth_(kInvalidFrameDepth),
+      full_fragment_done_(false) {}
 
 // Finds catch handler.
 class CatchBlockStackVisitor FINAL : public StackVisitor {
@@ -202,8 +206,7 @@
       return VRegKind::kDoubleHiVReg;
 
     default:
-      LOG(FATAL) << "Unexpected vreg location "
-                 << DexRegisterLocation::PrettyDescriptor(kind);
+      LOG(FATAL) << "Unexpected vreg location " << kind;
       UNREACHABLE();
   }
 }
@@ -219,7 +222,16 @@
 
   const size_t number_of_vregs = handler_method_->GetCodeItem()->registers_size_;
   CodeInfo code_info = handler_method_header_->GetOptimizedCodeInfo();
-  StackMapEncoding encoding = code_info.ExtractEncoding();
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+
+  // Find stack map of the catch block.
+  StackMap catch_stack_map = code_info.GetCatchStackMapForDexPc(GetHandlerDexPc(), encoding);
+  DCHECK(catch_stack_map.IsValid());
+  DexRegisterMap catch_vreg_map =
+      code_info.GetDexRegisterMapOf(catch_stack_map, encoding, number_of_vregs);
+  if (!catch_vreg_map.IsValid()) {
+    return;
+  }
 
   // Find stack map of the throwing instruction.
   StackMap throw_stack_map =
@@ -227,12 +239,7 @@
   DCHECK(throw_stack_map.IsValid());
   DexRegisterMap throw_vreg_map =
       code_info.GetDexRegisterMapOf(throw_stack_map, encoding, number_of_vregs);
-
-  // Find stack map of the catch block.
-  StackMap catch_stack_map = code_info.GetCatchStackMapForDexPc(GetHandlerDexPc(), encoding);
-  DCHECK(catch_stack_map.IsValid());
-  DexRegisterMap catch_vreg_map =
-      code_info.GetDexRegisterMapOf(catch_stack_map, encoding, number_of_vregs);
+  DCHECK(throw_vreg_map.IsValid());
 
   // Copy values between them.
   for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
@@ -283,26 +290,48 @@
         prev_shadow_frame_(nullptr),
         stacked_shadow_frame_pushed_(false),
         single_frame_deopt_(single_frame),
-        single_frame_done_(false) {
+        single_frame_done_(false),
+        single_frame_deopt_method_(nullptr),
+        single_frame_deopt_quick_method_header_(nullptr),
+        callee_method_(nullptr) {
+  }
+
+  ArtMethod* GetSingleFrameDeoptMethod() const {
+    return single_frame_deopt_method_;
+  }
+
+  const OatQuickMethodHeader* GetSingleFrameDeoptQuickMethodHeader() const {
+    return single_frame_deopt_quick_method_header_;
+  }
+
+  void FinishStackWalk() SHARED_REQUIRES(Locks::mutator_lock_) {
+    // This is the upcall, or the next full frame in single-frame deopt, or the
+    // code isn't deoptimizeable. We remember the frame and last pc so that we
+    // may long jump to them.
+    exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc());
+    exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
+    exception_handler_->SetHandlerMethodHeader(GetCurrentOatQuickMethodHeader());
+    if (!stacked_shadow_frame_pushed_) {
+      // In case there is no deoptimized shadow frame for this upcall, we still
+      // need to push a nullptr to the stack since there is always a matching pop after
+      // the long jump.
+      GetThread()->PushStackedShadowFrame(nullptr,
+                                          StackedShadowFrameType::kDeoptimizationShadowFrame);
+      stacked_shadow_frame_pushed_ = true;
+    }
+    if (GetMethod() == nullptr) {
+      exception_handler_->SetFullFragmentDone(true);
+    } else {
+      CHECK(callee_method_ != nullptr) << art::PrettyMethod(GetMethod(), false);
+      exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(callee_method_));
+    }
   }
 
   bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     exception_handler_->SetHandlerFrameDepth(GetFrameDepth());
     ArtMethod* method = GetMethod();
     if (method == nullptr || single_frame_done_) {
-      // This is the upcall (or the next full frame in single-frame deopt), we remember the frame
-      // and last pc so that we may long jump to them.
-      exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc());
-      exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
-      exception_handler_->SetHandlerMethodHeader(GetCurrentOatQuickMethodHeader());
-      if (!stacked_shadow_frame_pushed_) {
-        // In case there is no deoptimized shadow frame for this upcall, we still
-        // need to push a nullptr to the stack since there is always a matching pop after
-        // the long jump.
-        GetThread()->PushStackedShadowFrame(nullptr,
-                                            StackedShadowFrameType::kDeoptimizationShadowFrame);
-        stacked_shadow_frame_pushed_ = true;
-      }
+      FinishStackWalk();
       return false;  // End stack walk.
     } else if (method->IsRuntimeMethod()) {
       // Ignore callee save method.
@@ -313,7 +342,14 @@
       // the native method.
       // The top method is a runtime method, the native method comes next.
       CHECK_EQ(GetFrameDepth(), 1U);
+      callee_method_ = method;
       return true;
+    } else if (!single_frame_deopt_ &&
+               !Runtime::Current()->IsDeoptimizeable(GetCurrentQuickFramePc())) {
+      // We hit some code that's not deoptimizeable. However, Single-frame deoptimization triggered
+      // from compiled code is always allowed since HDeoptimize always saves the full environment.
+      FinishStackWalk();
+      return false;  // End stack walk.
     } else {
       // Check if a shadow frame already exists for debugger's set-local-value purpose.
       const size_t frame_id = GetFrameId();
@@ -327,11 +363,7 @@
         updated_vregs = GetThread()->GetUpdatedVRegFlags(frame_id);
         DCHECK(updated_vregs != nullptr);
       }
-      if (GetCurrentOatQuickMethodHeader()->IsOptimized()) {
-        HandleOptimizingDeoptimization(method, new_frame, updated_vregs);
-      } else {
-        HandleQuickDeoptimization(method, new_frame, updated_vregs);
-      }
+      HandleOptimizingDeoptimization(method, new_frame, updated_vregs);
       if (updated_vregs != nullptr) {
         // Calling Thread::RemoveDebuggerShadowFrameMapping will also delete the updated_vregs
         // array so this must come after we processed the frame.
@@ -345,18 +377,17 @@
         // right before interpreter::EnterInterpreterFromDeoptimize().
         stacked_shadow_frame_pushed_ = true;
         GetThread()->PushStackedShadowFrame(
-            new_frame,
-            single_frame_deopt_
-                ? StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame
-                : StackedShadowFrameType::kDeoptimizationShadowFrame);
+            new_frame, StackedShadowFrameType::kDeoptimizationShadowFrame);
       }
       prev_shadow_frame_ = new_frame;
 
       if (single_frame_deopt_ && !IsInInlinedFrame()) {
         // Single-frame deopt ends at the first non-inlined frame and needs to store that method.
-        exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method));
         single_frame_done_ = true;
+        single_frame_deopt_method_ = method;
+        single_frame_deopt_quick_method_header_ = GetCurrentOatQuickMethodHeader();
       }
+      callee_method_ = method;
       return true;
     }
   }
@@ -369,12 +400,20 @@
     const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
     CodeInfo code_info = method_header->GetOptimizedCodeInfo();
     uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc());
-    StackMapEncoding encoding = code_info.ExtractEncoding();
+    CodeInfoEncoding encoding = code_info.ExtractEncoding();
     StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
     const size_t number_of_vregs = m->GetCodeItem()->registers_size_;
-    DexRegisterMap vreg_map = code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_vregs);
-    MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
-    uint32_t register_mask = stack_map.GetRegisterMask(encoding);
+    uint32_t register_mask = stack_map.GetRegisterMask(encoding.stack_map_encoding);
+    DexRegisterMap vreg_map = IsInInlinedFrame()
+        ? code_info.GetDexRegisterMapAtDepth(GetCurrentInliningDepth() - 1,
+                                             code_info.GetInlineInfoOf(stack_map, encoding),
+                                             encoding,
+                                             number_of_vregs)
+        : code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_vregs);
+
+    if (!vreg_map.IsValid()) {
+      return;
+    }
 
     for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
       if (updated_vregs != nullptr && updated_vregs[vreg]) {
@@ -397,7 +436,8 @@
           const uint8_t* addr = reinterpret_cast<const uint8_t*>(GetCurrentQuickFrame()) + offset;
           value = *reinterpret_cast<const uint32_t*>(addr);
           uint32_t bit = (offset >> 2);
-          if (stack_mask.size_in_bits() > bit && stack_mask.LoadBit(bit)) {
+          if (stack_map.GetNumberOfStackMaskBits(encoding.stack_map_encoding) > bit &&
+              stack_map.GetStackMaskBit(encoding.stack_map_encoding, bit)) {
             is_reference = true;
           }
           break;
@@ -429,12 +469,11 @@
         }
         default: {
           LOG(FATAL)
-              << "Unexpected location kind"
-              << DexRegisterLocation::PrettyDescriptor(
-                    vreg_map.GetLocationInternalKind(vreg,
-                                                     number_of_vregs,
-                                                     code_info,
-                                                     encoding));
+              << "Unexpected location kind "
+              << vreg_map.GetLocationInternalKind(vreg,
+                                                  number_of_vregs,
+                                                  code_info,
+                                                  encoding);
           UNREACHABLE();
         }
       }
@@ -450,141 +489,33 @@
     return static_cast<VRegKind>(kinds.at(reg * 2));
   }
 
-  void HandleQuickDeoptimization(ArtMethod* m,
-                                 ShadowFrame* new_frame,
-                                 const bool* updated_vregs)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    const DexFile::CodeItem* code_item = m->GetCodeItem();
-    CHECK(code_item != nullptr) << "No code item for " << PrettyMethod(m);
-    uint16_t num_regs = code_item->registers_size_;
-    uint32_t dex_pc = GetDexPc();
-    StackHandleScope<2> hs(GetThread());  // Dex cache and class loader.
-    mirror::Class* declaring_class = m->GetDeclaringClass();
-    Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
-    Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
-    verifier::MethodVerifier verifier(GetThread(), h_dex_cache->GetDexFile(), h_dex_cache,
-                                      h_class_loader, &m->GetClassDef(), code_item,
-                                      m->GetDexMethodIndex(), m, m->GetAccessFlags(), true, true,
-                                      true, true);
-    bool verifier_success = verifier.Verify();
-    CHECK(verifier_success) << PrettyMethod(m);
-    {
-      ScopedStackedShadowFramePusher pusher(GetThread(), new_frame,
-                                            StackedShadowFrameType::kShadowFrameUnderConstruction);
-      const std::vector<int32_t> kinds(verifier.DescribeVRegs(dex_pc));
-
-      // Markers for dead values, used when the verifier knows a Dex register is undefined,
-      // or when the compiler knows the register has not been initialized, or is not used
-      // anymore in the method.
-      static constexpr uint32_t kDeadValue = 0xEBADDE09;
-      static constexpr uint64_t kLongDeadValue = 0xEBADDE09EBADDE09;
-      for (uint16_t reg = 0; reg < num_regs; ++reg) {
-        if (updated_vregs != nullptr && updated_vregs[reg]) {
-          // Keep the value set by debugger.
-          continue;
-        }
-        VRegKind kind = GetVRegKind(reg, kinds);
-        switch (kind) {
-          case kUndefined:
-            new_frame->SetVReg(reg, kDeadValue);
-            break;
-          case kConstant:
-            new_frame->SetVReg(reg, kinds.at((reg * 2) + 1));
-            break;
-          case kReferenceVReg: {
-            uint32_t value = 0;
-            // Check IsReferenceVReg in case the compiled GC map doesn't agree with the verifier.
-            // We don't want to copy a stale reference into the shadow frame as a reference.
-            // b/20736048
-            if (GetVReg(m, reg, kind, &value) && IsReferenceVReg(m, reg)) {
-              new_frame->SetVRegReference(reg, reinterpret_cast<mirror::Object*>(value));
-            } else {
-              new_frame->SetVReg(reg, kDeadValue);
-            }
-            break;
-          }
-          case kLongLoVReg:
-            if (GetVRegKind(reg + 1, kinds) == kLongHiVReg) {
-              // Treat it as a "long" register pair.
-              uint64_t value = 0;
-              if (GetVRegPair(m, reg, kLongLoVReg, kLongHiVReg, &value)) {
-                new_frame->SetVRegLong(reg, value);
-              } else {
-                new_frame->SetVRegLong(reg, kLongDeadValue);
-              }
-            } else {
-              uint32_t value = 0;
-              if (GetVReg(m, reg, kind, &value)) {
-                new_frame->SetVReg(reg, value);
-              } else {
-                new_frame->SetVReg(reg, kDeadValue);
-              }
-            }
-            break;
-          case kLongHiVReg:
-            if (GetVRegKind(reg - 1, kinds) == kLongLoVReg) {
-              // Nothing to do: we treated it as a "long" register pair.
-            } else {
-              uint32_t value = 0;
-              if (GetVReg(m, reg, kind, &value)) {
-                new_frame->SetVReg(reg, value);
-              } else {
-                new_frame->SetVReg(reg, kDeadValue);
-              }
-            }
-            break;
-          case kDoubleLoVReg:
-            if (GetVRegKind(reg + 1, kinds) == kDoubleHiVReg) {
-              uint64_t value = 0;
-              if (GetVRegPair(m, reg, kDoubleLoVReg, kDoubleHiVReg, &value)) {
-                // Treat it as a "double" register pair.
-                new_frame->SetVRegLong(reg, value);
-              } else {
-                new_frame->SetVRegLong(reg, kLongDeadValue);
-              }
-            } else {
-              uint32_t value = 0;
-              if (GetVReg(m, reg, kind, &value)) {
-                new_frame->SetVReg(reg, value);
-              } else {
-                new_frame->SetVReg(reg, kDeadValue);
-              }
-            }
-            break;
-          case kDoubleHiVReg:
-            if (GetVRegKind(reg - 1, kinds) == kDoubleLoVReg) {
-              // Nothing to do: we treated it as a "double" register pair.
-            } else {
-              uint32_t value = 0;
-              if (GetVReg(m, reg, kind, &value)) {
-                new_frame->SetVReg(reg, value);
-              } else {
-                new_frame->SetVReg(reg, kDeadValue);
-              }
-            }
-            break;
-          default:
-            uint32_t value = 0;
-            if (GetVReg(m, reg, kind, &value)) {
-              new_frame->SetVReg(reg, value);
-            } else {
-              new_frame->SetVReg(reg, kDeadValue);
-            }
-            break;
-        }
-      }
-    }
-  }
-
   QuickExceptionHandler* const exception_handler_;
   ShadowFrame* prev_shadow_frame_;
   bool stacked_shadow_frame_pushed_;
   const bool single_frame_deopt_;
   bool single_frame_done_;
+  ArtMethod* single_frame_deopt_method_;
+  const OatQuickMethodHeader* single_frame_deopt_quick_method_header_;
+  ArtMethod* callee_method_;
 
   DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor);
 };
 
+void QuickExceptionHandler::PrepareForLongJumpToInvokeStubOrInterpreterBridge() {
+  if (full_fragment_done_) {
+    // Restore deoptimization exception. When returning from the invoke stub,
+    // ArtMethod::Invoke() will see the special exception to know deoptimization
+    // is needed.
+    self_->SetException(Thread::GetDeoptimizationException());
+  } else {
+    // PC needs to be of the quick-to-interpreter bridge.
+    int32_t offset;
+    offset = GetThreadOffset<kRuntimePointerSize>(kQuickQuickToInterpreterBridge).Int32Value();
+    handler_quick_frame_pc_ = *reinterpret_cast<uintptr_t*>(
+        reinterpret_cast<uint8_t*>(self_) + offset);
+  }
+}
+
 void QuickExceptionHandler::DeoptimizeStack() {
   DCHECK(is_deoptimization_);
   if (kDebugExceptionDelivery) {
@@ -593,9 +524,7 @@
 
   DeoptimizeStackVisitor visitor(self_, context_, this, false);
   visitor.WalkStack(true);
-
-  // Restore deoptimization exception
-  self_->SetException(Thread::GetDeoptimizationException());
+  PrepareForLongJumpToInvokeStubOrInterpreterBridge();
 }
 
 void QuickExceptionHandler::DeoptimizeSingleFrame() {
@@ -609,20 +538,33 @@
   DeoptimizeStackVisitor visitor(self_, context_, this, true);
   visitor.WalkStack(true);
 
-  // PC needs to be of the quick-to-interpreter bridge.
-  int32_t offset;
-  #ifdef __LP64__
-      offset = GetThreadOffset<8>(kQuickQuickToInterpreterBridge).Int32Value();
-  #else
-      offset = GetThreadOffset<4>(kQuickQuickToInterpreterBridge).Int32Value();
-  #endif
-  handler_quick_frame_pc_ = *reinterpret_cast<uintptr_t*>(
-      reinterpret_cast<uint8_t*>(self_) + offset);
+  // Compiled code made an explicit deoptimization.
+  ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod();
+  DCHECK(deopt_method != nullptr);
+  if (Runtime::Current()->UseJitCompilation()) {
+    Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
+        deopt_method, visitor.GetSingleFrameDeoptQuickMethodHeader());
+  } else {
+    // Transfer the code to interpreter.
+    Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+        deopt_method, GetQuickToInterpreterBridge());
+  }
+
+  PrepareForLongJumpToInvokeStubOrInterpreterBridge();
 }
 
-void QuickExceptionHandler::DeoptimizeSingleFrameArchDependentFixup() {
-  // Architecture-dependent work. This is to get the LR right for x86 and x86-64.
+void QuickExceptionHandler::DeoptimizePartialFragmentFixup(uintptr_t return_pc) {
+  // At this point, the instrumentation stack has been updated. We need to install
+  // the real return pc on stack, in case instrumentation stub is stored there,
+  // so that the interpreter bridge code can return to the right place.
+  if (return_pc != 0) {
+    uintptr_t* pc_addr = reinterpret_cast<uintptr_t*>(handler_quick_frame_);
+    CHECK(pc_addr != nullptr);
+    pc_addr--;
+    *reinterpret_cast<uintptr_t*>(pc_addr) = return_pc;
+  }
 
+  // Architecture-dependent work. This is to get the LR right for x86 and x86-64.
   if (kRuntimeISA == InstructionSet::kX86 || kRuntimeISA == InstructionSet::kX86_64) {
     // On x86, the return address is on the stack, so just reuse it. Otherwise we would have to
     // change how longjump works.
@@ -672,7 +614,8 @@
   DISALLOW_COPY_AND_ASSIGN(InstrumentationStackVisitor);
 };
 
-void QuickExceptionHandler::UpdateInstrumentationStack() {
+uintptr_t QuickExceptionHandler::UpdateInstrumentationStack() {
+  uintptr_t return_pc = 0;
   if (method_tracing_active_) {
     InstrumentationStackVisitor visitor(self_, handler_frame_depth_);
     visitor.WalkStack(true);
@@ -680,9 +623,10 @@
     size_t instrumentation_frames_to_pop = visitor.GetInstrumentationFramesToPop();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     for (size_t i = 0; i < instrumentation_frames_to_pop; ++i) {
-      instrumentation->PopMethodForUnwind(self_, is_deoptimization_);
+      return_pc = instrumentation->PopMethodForUnwind(self_, is_deoptimization_);
     }
   }
+  return return_pc;
 }
 
 void QuickExceptionHandler::DoLongJump(bool smash_caller_saves) {
@@ -702,7 +646,7 @@
 // Prints out methods with their type of frame.
 class DumpFramesWithTypeStackVisitor FINAL : public StackVisitor {
  public:
-  DumpFramesWithTypeStackVisitor(Thread* self, bool show_details = false)
+  explicit DumpFramesWithTypeStackVisitor(Thread* self, bool show_details = false)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : StackVisitor(self, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         show_details_(show_details) {}
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index eedf83f..74b7d0d 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -46,15 +46,29 @@
   // Find the catch handler for the given exception.
   void FindCatch(mirror::Throwable* exception) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Deoptimize the stack to the upcall. For every compiled frame, we create a "copy"
-  // shadow frame that will be executed with the interpreter.
+  // Deoptimize the stack to the upcall/some code that's not deoptimizeable. For
+  // every compiled frame, we create a "copy" shadow frame that will be executed
+  // with the interpreter.
   void DeoptimizeStack() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Deoptimize a single frame. It's directly triggered from compiled code. It
+  // has the following properties:
+  // - It deoptimizes a single frame, which can include multiple inlined frames.
+  // - It doesn't have return result or pending exception at the deoptimization point.
+  // - It always deoptimizes, even if IsDeoptimizeable() returns false for the
+  //   code, since HDeoptimize always saves the full environment. So it overrides
+  //   the result of IsDeoptimizeable().
+  // - It can be either full-fragment, or partial-fragment deoptimization, depending
+  //   on whether that single frame covers full or partial fragment.
   void DeoptimizeSingleFrame() SHARED_REQUIRES(Locks::mutator_lock_);
-  void DeoptimizeSingleFrameArchDependentFixup() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void DeoptimizePartialFragmentFixup(uintptr_t return_pc)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Update the instrumentation stack by removing all methods that will be unwound
   // by the exception being thrown.
-  void UpdateInstrumentationStack() SHARED_REQUIRES(Locks::mutator_lock_);
+  // Return the return pc of the last frame that's unwound.
+  uintptr_t UpdateInstrumentationStack() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Set up environment before delivering an exception to optimized code.
   void SetCatchEnvironmentForOptimizedHandler(StackVisitor* stack_visitor)
@@ -103,8 +117,16 @@
     handler_frame_depth_ = frame_depth;
   }
 
+  bool IsFullFragmentDone() const {
+    return full_fragment_done_;
+  }
+
+  void SetFullFragmentDone(bool full_fragment_done) {
+    full_fragment_done_ = full_fragment_done;
+  }
+
   // Walk the stack frames of the given thread, printing out non-runtime methods with their types
-  // of frames. Helps to verify that single-frame deopt really only deopted one frame.
+  // of frames. Helps to verify that partial-fragment deopt really works as expected.
   static void DumpFramesWithType(Thread* self, bool details = false)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -131,6 +153,13 @@
   bool clear_exception_;
   // Frame depth of the catch handler or the upcall.
   size_t handler_frame_depth_;
+  // Does the handler successfully walk the full fragment (not stopped
+  // by some code that's not deoptimizeable)? Even single-frame deoptimization
+  // can set this to true if the fragment contains only one quick frame.
+  bool full_fragment_done_;
+
+  void PrepareForLongJumpToInvokeStubOrInterpreterBridge()
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   DISALLOW_COPY_AND_ASSIGN(QuickExceptionHandler);
 };
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 85ac4aa..92efa21 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -19,7 +19,7 @@
 
 #include "read_barrier.h"
 
-#include "gc/collector/concurrent_copying.h"
+#include "gc/collector/concurrent_copying-inl.h"
 #include "gc/heap.h"
 #include "mirror/object_reference.h"
 #include "mirror/reference.h"
@@ -28,91 +28,120 @@
 
 namespace art {
 
-template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kMaybeDuringStartup>
+template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kAlwaysUpdateField>
 inline MirrorType* ReadBarrier::Barrier(
     mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) {
   constexpr bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
-  if (with_read_barrier && kUseBakerReadBarrier) {
-    // The higher bits of the rb ptr, rb_ptr_high_bits (must be zero)
-    // is used to create artificial data dependency from the is_gray
-    // load to the ref field (ptr) load to avoid needing a load-load
-    // barrier between the two.
-    uintptr_t rb_ptr_high_bits;
-    bool is_gray = HasGrayReadBarrierPointer(obj, &rb_ptr_high_bits);
-    ref_addr = reinterpret_cast<mirror::HeapReference<MirrorType>*>(
-        rb_ptr_high_bits | reinterpret_cast<uintptr_t>(ref_addr));
-    MirrorType* ref = ref_addr->AsMirrorPtr();
-    if (is_gray) {
-      // Slow-path.
-      ref = reinterpret_cast<MirrorType*>(Mark(ref));
-    }
-    if (kEnableReadBarrierInvariantChecks) {
-      CHECK_EQ(rb_ptr_high_bits, 0U) << obj << " rb_ptr=" << obj->GetReadBarrierPointer();
-    }
-    AssertToSpaceInvariant(obj, offset, ref);
-    return ref;
-  } else if (with_read_barrier && kUseBrooksReadBarrier) {
-    // To be implemented.
-    return ref_addr->AsMirrorPtr();
-  } else if (with_read_barrier && kUseTableLookupReadBarrier) {
-    MirrorType* ref = ref_addr->AsMirrorPtr();
-    MirrorType* old_ref = ref;
-    // The heap or the collector can be null at startup. TODO: avoid the need for this null check.
-    gc::Heap* heap = Runtime::Current()->GetHeap();
-    if (heap != nullptr && heap->GetReadBarrierTable()->IsSet(old_ref)) {
-      ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
-      // Update the field atomically. This may fail if mutator updates before us, but it's ok.
-      if (ref != old_ref) {
-        obj->CasFieldStrongSequentiallyConsistentObjectWithoutWriteBarrier<false, false>(
-            offset, old_ref, ref);
+  if (kUseReadBarrier && with_read_barrier) {
+    if (kIsDebugBuild) {
+      Thread* const self = Thread::Current();
+      if (self != nullptr) {
+        CHECK_EQ(self->GetDebugDisallowReadBarrierCount(), 0u);
       }
     }
-    AssertToSpaceInvariant(obj, offset, ref);
-    return ref;
+    if (kUseBakerReadBarrier) {
+      // The higher bits of the rb_ptr, rb_ptr_high_bits (must be zero)
+      // is used to create artificial data dependency from the is_gray
+      // load to the ref field (ptr) load to avoid needing a load-load
+      // barrier between the two.
+      uintptr_t rb_ptr_high_bits;
+      bool is_gray = HasGrayReadBarrierPointer(obj, &rb_ptr_high_bits);
+      ref_addr = reinterpret_cast<mirror::HeapReference<MirrorType>*>(
+          rb_ptr_high_bits | reinterpret_cast<uintptr_t>(ref_addr));
+      MirrorType* ref = ref_addr->AsMirrorPtr();
+      MirrorType* old_ref = ref;
+      if (is_gray) {
+        // Slow-path.
+        ref = reinterpret_cast<MirrorType*>(Mark(ref));
+        // If kAlwaysUpdateField is true, update the field atomically. This may fail if mutator
+        // updates before us, but it's ok.
+        if (kAlwaysUpdateField && ref != old_ref) {
+          obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
+              offset, old_ref, ref);
+        }
+      }
+      if (kEnableReadBarrierInvariantChecks) {
+        CHECK_EQ(rb_ptr_high_bits, 0U) << obj << " rb_ptr=" << obj->GetReadBarrierPointer();
+      }
+      AssertToSpaceInvariant(obj, offset, ref);
+      return ref;
+    } else if (kUseBrooksReadBarrier) {
+      // To be implemented.
+      return ref_addr->AsMirrorPtr();
+    } else if (kUseTableLookupReadBarrier) {
+      MirrorType* ref = ref_addr->AsMirrorPtr();
+      MirrorType* old_ref = ref;
+      // The heap or the collector can be null at startup. TODO: avoid the need for this null check.
+      gc::Heap* heap = Runtime::Current()->GetHeap();
+      if (heap != nullptr && heap->GetReadBarrierTable()->IsSet(old_ref)) {
+        ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
+        // Update the field atomically. This may fail if mutator updates before us, but it's ok.
+        if (ref != old_ref) {
+          obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
+              offset, old_ref, ref);
+        }
+      }
+      AssertToSpaceInvariant(obj, offset, ref);
+      return ref;
+    } else {
+      LOG(FATAL) << "Unexpected read barrier type";
+      UNREACHABLE();
+    }
   } else {
     // No read barrier.
     return ref_addr->AsMirrorPtr();
   }
 }
 
-template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kMaybeDuringStartup>
+template <typename MirrorType, ReadBarrierOption kReadBarrierOption>
 inline MirrorType* ReadBarrier::BarrierForRoot(MirrorType** root,
                                                GcRootSource* gc_root_source) {
   MirrorType* ref = *root;
   const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
-  if (with_read_barrier && kUseBakerReadBarrier) {
-    // TODO: separate the read barrier code from the collector code more.
-    Thread* self = Thread::Current();
-    if (self != nullptr && self->GetIsGcMarking()) {
-      ref = reinterpret_cast<MirrorType*>(Mark(ref));
-    }
-    AssertToSpaceInvariant(gc_root_source, ref);
-    return ref;
-  } else if (with_read_barrier && kUseBrooksReadBarrier) {
-    // To be implemented.
-    return ref;
-  } else if (with_read_barrier && kUseTableLookupReadBarrier) {
-    Thread* self = Thread::Current();
-    if (self != nullptr &&
-        self->GetIsGcMarking() &&
-        Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) {
-      MirrorType* old_ref = ref;
-      ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
-      // Update the field atomically. This may fail if mutator updates before us, but it's ok.
-      if (ref != old_ref) {
-        Atomic<mirror::Object*>* atomic_root = reinterpret_cast<Atomic<mirror::Object*>*>(root);
-        atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, ref);
+  if (kUseReadBarrier && with_read_barrier) {
+    if (kIsDebugBuild) {
+      Thread* const self = Thread::Current();
+      if (self != nullptr) {
+        CHECK_EQ(self->GetDebugDisallowReadBarrierCount(), 0u);
       }
     }
-    AssertToSpaceInvariant(gc_root_source, ref);
-    return ref;
+    if (kUseBakerReadBarrier) {
+      // TODO: separate the read barrier code from the collector code more.
+      Thread* self = Thread::Current();
+      if (self != nullptr && self->GetIsGcMarking()) {
+        ref = reinterpret_cast<MirrorType*>(Mark(ref));
+      }
+      AssertToSpaceInvariant(gc_root_source, ref);
+      return ref;
+    } else if (kUseBrooksReadBarrier) {
+      // To be implemented.
+      return ref;
+    } else if (kUseTableLookupReadBarrier) {
+      Thread* self = Thread::Current();
+      if (self != nullptr &&
+          self->GetIsGcMarking() &&
+          Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) {
+        MirrorType* old_ref = ref;
+        ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
+        // Update the field atomically. This may fail if mutator updates before us, but it's ok.
+        if (ref != old_ref) {
+          Atomic<mirror::Object*>* atomic_root = reinterpret_cast<Atomic<mirror::Object*>*>(root);
+          atomic_root->CompareExchangeStrongRelaxed(old_ref, ref);
+        }
+      }
+      AssertToSpaceInvariant(gc_root_source, ref);
+      return ref;
+    } else {
+      LOG(FATAL) << "Unexpected read barrier type";
+      UNREACHABLE();
+    }
   } else {
     return ref;
   }
 }
 
 // TODO: Reduce copy paste
-template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kMaybeDuringStartup>
+template <typename MirrorType, ReadBarrierOption kReadBarrierOption>
 inline MirrorType* ReadBarrier::BarrierForRoot(mirror::CompressedReference<MirrorType>* root,
                                                GcRootSource* gc_root_source) {
   MirrorType* ref = root->AsMirrorPtr();
@@ -140,7 +169,7 @@
       if (new_ref.AsMirrorPtr() != old_ref.AsMirrorPtr()) {
         auto* atomic_root =
             reinterpret_cast<Atomic<mirror::CompressedReference<MirrorType>>*>(root);
-        atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, new_ref);
+        atomic_root->CompareExchangeStrongRelaxed(old_ref, new_ref);
       }
     }
     AssertToSpaceInvariant(gc_root_source, ref);
@@ -191,7 +220,7 @@
 }
 
 inline mirror::Object* ReadBarrier::Mark(mirror::Object* obj) {
-  return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->Mark(obj);
+  return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->MarkFromReadBarrier(obj);
 }
 
 inline bool ReadBarrier::HasGrayReadBarrierPointer(mirror::Object* obj,
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index e7ad731..5d32c09 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -37,32 +37,29 @@
 
 class ReadBarrier {
  public:
-  // TODO: disable thse flags for production use.
   // Enable the to-space invariant checks.
-  static constexpr bool kEnableToSpaceInvariantChecks = true;
+  static constexpr bool kEnableToSpaceInvariantChecks = kIsDebugBuild;
   // Enable the read barrier checks.
-  static constexpr bool kEnableReadBarrierInvariantChecks = true;
+  static constexpr bool kEnableReadBarrierInvariantChecks = kIsDebugBuild;
 
-  // It's up to the implementation whether the given field gets
-  // updated whereas the return value must be an updated reference.
+  // It's up to the implementation whether the given field gets updated whereas the return value
+  // must be an updated reference unless kAlwaysUpdateField is true.
   template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
-            bool kMaybeDuringStartup = false>
+            bool kAlwaysUpdateField = false>
   ALWAYS_INLINE static MirrorType* Barrier(
       mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // It's up to the implementation whether the given root gets updated
   // whereas the return value must be an updated reference.
-  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
-            bool kMaybeDuringStartup = false>
+  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE static MirrorType* BarrierForRoot(MirrorType** root,
                                                   GcRootSource* gc_root_source = nullptr)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // It's up to the implementation whether the given root gets updated
   // whereas the return value must be an updated reference.
-  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
-            bool kMaybeDuringStartup = false>
+  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE static MirrorType* BarrierForRoot(mirror::CompressedReference<MirrorType>* root,
                                                   GcRootSource* gc_root_source = nullptr)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -82,6 +79,7 @@
   static void AssertToSpaceInvariant(GcRootSource* gc_root_source, mirror::Object* ref)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // ALWAYS_INLINE on this caused a performance regression b/26744236.
   static mirror::Object* Mark(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
 
   static mirror::Object* WhitePtr() {
@@ -101,8 +99,9 @@
   // Note: These couldn't be constexpr pointers as reinterpret_cast isn't compatible with them.
   static constexpr uintptr_t white_ptr_ = 0x0;    // Not marked.
   static constexpr uintptr_t gray_ptr_ = 0x1;     // Marked, but not marked through. On mark stack.
+  // TODO: black_ptr_ is unused, we should remove it.
   static constexpr uintptr_t black_ptr_ = 0x2;    // Marked through. Used for non-moving objects.
-  static constexpr uintptr_t rb_ptr_mask_ = 0x3;  // The low 2 bits for white|gray|black.
+  static constexpr uintptr_t rb_ptr_mask_ = 0x1;  // The low bits for white|gray.
 };
 
 }  // namespace art
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 324bd9f..8a531d9 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -18,10 +18,10 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "class_linker.h"
 #include "common_throws.h"
 #include "dex_file-inl.h"
-#include "entrypoints/entrypoint_utils.h"
 #include "indirect_reference_table-inl.h"
 #include "jni_internal.h"
 #include "mirror/abstract_method.h"
@@ -223,7 +223,7 @@
     for (size_t i = 1, args_offset = 0; i < shorty_len_; ++i, ++args_offset) {
       mirror::Object* arg = args->Get(args_offset);
       if (((shorty_[i] == 'L') && (arg != nullptr)) || ((arg == nullptr && shorty_[i] != 'L'))) {
-        size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+        PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
         mirror::Class* dst_class =
             m->GetClassFromTypeIndex(classes->GetTypeItem(args_offset).type_idx_,
                                      true /* resolve */,
@@ -359,7 +359,7 @@
   }
   // TODO: If args contain object references, it may cause problems.
   Thread* const self = Thread::Current();
-  size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   for (uint32_t i = 0; i < num_params; i++) {
     uint16_t type_idx = params->GetTypeItem(i).type_idx_;
     mirror::Class* param_type = m->GetClassFromTypeIndex(type_idx,
@@ -425,7 +425,7 @@
 
 static ArtMethod* FindVirtualMethod(mirror::Object* receiver, ArtMethod* method)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  return receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(method, sizeof(void*));
+  return receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(method, kRuntimePointerSize);
 }
 
 
@@ -435,7 +435,7 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   uint32_t* args = arg_array->GetArray();
   if (UNLIKELY(soa.Env()->check_jni)) {
-    CheckMethodArguments(soa.Vm(), method->GetInterfaceMethodIfProxy(sizeof(void*)), args);
+    CheckMethodArguments(soa.Vm(), method->GetInterfaceMethodIfProxy(kRuntimePointerSize), args);
   }
   method->Invoke(soa.Self(), args, arg_array->GetNumBytes(), result, shorty);
 }
@@ -459,7 +459,8 @@
   }
   mirror::Object* receiver = method->IsStatic() ? nullptr : soa.Decode<mirror::Object*>(obj);
   uint32_t shorty_len = 0;
-  const char* shorty = method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty(&shorty_len);
+  const char* shorty =
+      method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty(&shorty_len);
   JValue result;
   ArgArray arg_array(shorty, shorty_len);
   arg_array.BuildArgArrayFromVarArgs(soa, receiver, args);
@@ -489,7 +490,8 @@
   }
   mirror::Object* receiver = method->IsStatic() ? nullptr : soa.Decode<mirror::Object*>(obj);
   uint32_t shorty_len = 0;
-  const char* shorty = method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty(&shorty_len);
+  const char* shorty =
+      method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty(&shorty_len);
   JValue result;
   ArgArray arg_array(shorty, shorty_len);
   arg_array.BuildArgArrayFromJValues(soa, receiver, args);
@@ -520,7 +522,8 @@
     receiver = nullptr;
   }
   uint32_t shorty_len = 0;
-  const char* shorty = method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty(&shorty_len);
+  const char* shorty =
+      method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty(&shorty_len);
   JValue result;
   ArgArray arg_array(shorty, shorty_len);
   arg_array.BuildArgArrayFromJValues(soa, receiver, args);
@@ -551,7 +554,8 @@
     receiver = nullptr;
   }
   uint32_t shorty_len = 0;
-  const char* shorty = method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty(&shorty_len);
+  const char* shorty =
+      method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty(&shorty_len);
   JValue result;
   ArgArray arg_array(shorty, shorty_len);
   arg_array.BuildArgArrayFromVarArgs(soa, receiver, args);
@@ -603,13 +607,13 @@
       }
 
       // Find the actual implementation of the virtual method.
-      m = receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(m, sizeof(void*));
+      m = receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(m, kRuntimePointerSize);
     }
   }
 
   // Get our arrays of arguments and their types, and check they're the same size.
   auto* objects = soa.Decode<mirror::ObjectArray<mirror::Object>*>(javaArgs);
-  auto* np_method = m->GetInterfaceMethodIfProxy(sizeof(void*));
+  auto* np_method = m->GetInterfaceMethodIfProxy(kRuntimePointerSize);
   const DexFile::TypeList* classes = np_method->GetParameterTypeList();
   uint32_t classes_size = (classes == nullptr) ? 0 : classes->Size();
   uint32_t arg_count = (objects != nullptr) ? objects->GetLength() : 0;
@@ -776,8 +780,9 @@
                                                  UnboxingFailureKind(f).c_str(),
                                                  PrettyDescriptor(dst_class).c_str()).c_str());
     } else {
-      ThrowNullPointerException(StringPrintf("Expected to unbox a '%s' primitive type but was returned null",
-                                             PrettyDescriptor(dst_class).c_str()).c_str());
+      ThrowNullPointerException(
+          StringPrintf("Expected to unbox a '%s' primitive type but was returned null",
+                       PrettyDescriptor(dst_class).c_str()).c_str());
     }
     return false;
   }
diff --git a/runtime/reflection_test.cc b/runtime/reflection_test.cc
index c7c2709..016f3c7 100644
--- a/runtime/reflection_test.cc
+++ b/runtime/reflection_test.cc
@@ -21,6 +21,7 @@
 #include "ScopedLocalRef.h"
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "common_compiler_test.h"
 #include "scoped_thread_state_change.h"
 
@@ -107,8 +108,8 @@
                                                 class_loader);
     CHECK(c != nullptr);
 
-    *method = is_static ? c->FindDirectMethod(method_name, method_signature, sizeof(void*))
-                        : c->FindVirtualMethod(method_name, method_signature, sizeof(void*));
+    *method = is_static ? c->FindDirectMethod(method_name, method_signature, kRuntimePointerSize)
+                        : c->FindVirtualMethod(method_name, method_signature, kRuntimePointerSize);
     CHECK(method != nullptr);
 
     if (is_static) {
@@ -506,7 +507,7 @@
 };
 
 TEST_F(ReflectionTest, StaticMainMethod) {
-  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("Main");
   StackHandleScope<1> hs(soa.Self());
@@ -517,7 +518,9 @@
   mirror::Class* klass = class_linker_->FindClass(soa.Self(), "LMain;", class_loader);
   ASSERT_TRUE(klass != nullptr);
 
-  ArtMethod* method = klass->FindDirectMethod("main", "([Ljava/lang/String;)V", sizeof(void*));
+  ArtMethod* method = klass->FindDirectMethod("main",
+                                              "([Ljava/lang/String;)V",
+                                              kRuntimePointerSize);
   ASSERT_TRUE(method != nullptr);
 
   // Start runtime.
diff --git a/runtime/runtime-inl.h b/runtime/runtime-inl.h
index bfa8c54..3245ba0 100644
--- a/runtime/runtime-inl.h
+++ b/runtime/runtime-inl.h
@@ -41,13 +41,15 @@
   DCHECK_NE(method, GetImtConflictMethod());
   DCHECK_NE(method, GetResolutionMethod());
   // Don't use GetCalleeSaveMethod(), some tests don't set all callee save methods.
-  if (method == GetCalleeSaveMethodUnchecked(Runtime::kRefsAndArgs)) {
-    return GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
-  } else if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveAll)) {
-    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveAll);
+  if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveRefsAndArgs)) {
+    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsAndArgs);
+  } else if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveAllCalleeSaves)) {
+    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveAllCalleeSaves);
+  } else if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveRefsOnly)) {
+    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsOnly);
   } else {
-    DCHECK_EQ(method, GetCalleeSaveMethodUnchecked(Runtime::kRefsOnly));
-    return GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly);
+    DCHECK_EQ(method, GetCalleeSaveMethodUnchecked(Runtime::kSaveEverything));
+    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveEverything);
   }
 }
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 556ba56..ddcfb6d 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -23,8 +23,6 @@
 #include <sys/prctl.h>
 #endif
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
-#include <cutils/trace.h>
 #include <signal.h>
 #include <sys/syscall.h>
 #include "base/memory_tool.h"
@@ -57,7 +55,9 @@
 #include "atomic.h"
 #include "base/arena_allocator.h"
 #include "base/dumpable.h"
+#include "base/enums.h"
 #include "base/stl_util.h"
+#include "base/systrace.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
 #include "compiler_callbacks.h"
@@ -71,14 +71,13 @@
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
 #include "handle_scope-inl.h"
-#include "image.h"
+#include "image-inl.h"
 #include "instrumentation.h"
 #include "intern_table.h"
 #include "interpreter/interpreter.h"
 #include "jit/jit.h"
 #include "jni_internal.h"
 #include "linear_alloc.h"
-#include "lambda/box_table.h"
 #include "mirror/array.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
@@ -95,7 +94,6 @@
 #include "native/java_lang_Class.h"
 #include "native/java_lang_DexCache.h"
 #include "native/java_lang_Object.h"
-#include "native/java_lang_Runtime.h"
 #include "native/java_lang_String.h"
 #include "native/java_lang_StringFactory.h"
 #include "native/java_lang_System.h"
@@ -104,6 +102,7 @@
 #include "native/java_lang_VMClassLoader.h"
 #include "native/java_lang_ref_FinalizerReference.h"
 #include "native/java_lang_ref_Reference.h"
+#include "native/java_lang_reflect_AbstractMethod.h"
 #include "native/java_lang_reflect_Array.h"
 #include "native/java_lang_reflect_Constructor.h"
 #include "native/java_lang_reflect_Field.h"
@@ -115,11 +114,12 @@
 #include "native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.h"
 #include "native/sun_misc_Unsafe.h"
 #include "native_bridge_art_interface.h"
+#include "native_stack_dump.h"
 #include "oat_file.h"
 #include "oat_file_manager.h"
 #include "os.h"
 #include "parsed_options.h"
-#include "profiler.h"
+#include "jit/profile_saver.h"
 #include "quick/quick_method_frame_info.h"
 #include "reflection.h"
 #include "runtime_options.h"
@@ -130,6 +130,7 @@
 #include "signal_set.h"
 #include "thread.h"
 #include "thread_list.h"
+#include "ti/agent.h"
 #include "trace.h"
 #include "transaction.h"
 #include "utils.h"
@@ -190,7 +191,6 @@
       abort_(nullptr),
       stats_enabled_(false),
       is_running_on_memory_tool_(RUNNING_ON_MEMORY_TOOL),
-      profiler_started_(false),
       instrumentation_(),
       main_thread_group_(nullptr),
       system_thread_group_(nullptr),
@@ -204,19 +204,30 @@
       implicit_so_checks_(false),
       implicit_suspend_checks_(false),
       no_sig_chain_(false),
+      force_native_bridge_(false),
       is_native_bridge_loaded_(false),
+      is_native_debuggable_(false),
       zygote_max_failed_boots_(0),
       experimental_flags_(ExperimentalFlags::kNone),
       oat_file_manager_(nullptr),
-      is_low_memory_mode_(false) {
+      is_low_memory_mode_(false),
+      safe_mode_(false),
+      dump_native_stack_on_sig_quit_(true),
+      pruned_dalvik_cache_(false),
+      // Initially assume we perceive jank in case the process state is never updated.
+      process_state_(kProcessStateJankPerceptible),
+      zygote_no_threads_(false) {
   CheckAsmSupportOffsetsAndSizes();
   std::fill(callee_save_methods_, callee_save_methods_ + arraysize(callee_save_methods_), 0u);
+  interpreter::CheckInterpreterAsmConstants();
 }
 
 Runtime::~Runtime() {
+  ScopedTrace trace("Runtime shutdown");
   if (is_native_bridge_loaded_) {
     UnloadNativeBridge();
   }
+
   if (dump_gc_performance_on_shutdown_) {
     // This can't be called from the Heap destructor below because it
     // could call RosAlloc::InspectAll() which needs the thread_list
@@ -234,6 +245,7 @@
   }
 
   {
+    ScopedTrace trace2("Wait for shutdown cond");
     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
     shutting_down_started_ = true;
     while (threads_being_born_ > 0) {
@@ -244,6 +256,7 @@
   // Shutdown and wait for the daemons.
   CHECK(self != nullptr);
   if (IsFinishedStarting()) {
+    ScopedTrace trace2("Waiting for Daemons");
     self->ClearException();
     self->GetJniEnv()->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons,
                                             WellKnownClasses::java_lang_Daemons_stop);
@@ -256,19 +269,27 @@
     self = nullptr;
   }
 
-  // Shut down background profiler before the runtime exits.
-  if (profiler_started_) {
-    BackgroundMethodSamplingProfiler::Shutdown();
-  }
-
   // Make sure to let the GC complete if it is running.
   heap_->WaitForGcToComplete(gc::kGcCauseBackground, self);
   heap_->DeleteThreadPool();
-  if (jit_.get() != nullptr) {
+  if (jit_ != nullptr) {
+    ScopedTrace trace2("Delete jit");
     VLOG(jit) << "Deleting jit thread pool";
     // Delete thread pool before the thread list since we don't want to wait forever on the
     // JIT compiler threads.
     jit_->DeleteThreadPool();
+    // Similarly, stop the profile saver thread before deleting the thread list.
+    jit_->StopProfileSaver();
+  }
+
+  // TODO Maybe do some locking.
+  for (auto& agent : agents_) {
+    agent.Unload();
+  }
+
+  // TODO Maybe do some locking
+  for (auto& plugin : plugins_) {
+    plugin.Unload();
   }
 
   // Make sure our internal threads are dead before we start tearing down things they're using.
@@ -276,11 +297,13 @@
   delete signal_catcher_;
 
   // Make sure all other non-daemon threads have terminated, and all daemon threads are suspended.
-  delete thread_list_;
-
+  {
+    ScopedTrace trace2("Delete thread list");
+    delete thread_list_;
+  }
   // Delete the JIT after thread list to ensure that there is no remaining threads which could be
   // accessing the instrumentation when we delete it.
-  if (jit_.get() != nullptr) {
+  if (jit_ != nullptr) {
     VLOG(jit) << "Deleting jit";
     jit_.reset(nullptr);
   }
@@ -288,6 +311,7 @@
   // Shutdown the fault manager if it was initialized.
   fault_manager.Shutdown();
 
+  ScopedTrace trace2("Delete state");
   delete monitor_list_;
   delete monitor_pool_;
   delete class_linker_;
@@ -303,6 +327,7 @@
   linear_alloc_.reset();
   low_4gb_arena_pool_.reset();
   arena_pool_.reset();
+  jit_arena_pool_.reset();
   MemMap::Shutdown();
 
   // TODO: acquire a static mutex on Runtime to avoid racing.
@@ -320,6 +345,7 @@
     os << "Runtime aborting...\n";
     if (Runtime::Current() == nullptr) {
       os << "(Runtime does not yet exist!)\n";
+      DumpNativeStack(os, GetTid(), nullptr, "  native: ", nullptr);
       return;
     }
     Thread* self = Thread::Current();
@@ -427,17 +453,27 @@
   GetMonitorList()->SweepMonitorList(visitor);
   GetJavaVM()->SweepJniWeakGlobals(visitor);
   GetHeap()->SweepAllocationRecords(visitor);
-  GetLambdaBoxTable()->SweepWeakBoxedLambdas(visitor);
 }
 
-bool Runtime::Create(const RuntimeOptions& options, bool ignore_unrecognized) {
+bool Runtime::ParseOptions(const RuntimeOptions& raw_options,
+                           bool ignore_unrecognized,
+                           RuntimeArgumentMap* runtime_options) {
+  InitLogging(/* argv */ nullptr);  // Calls Locks::Init() as a side effect.
+  bool parsed = ParsedOptions::Parse(raw_options, ignore_unrecognized, runtime_options);
+  if (!parsed) {
+    LOG(ERROR) << "Failed to parse options";
+    return false;
+  }
+  return true;
+}
+
+bool Runtime::Create(RuntimeArgumentMap&& runtime_options) {
   // TODO: acquire a static mutex on Runtime to avoid racing.
   if (Runtime::instance_ != nullptr) {
     return false;
   }
-  InitLogging(nullptr);  // Calls Locks::Init() as a side effect.
   instance_ = new Runtime;
-  if (!instance_->Init(options, ignore_unrecognized)) {
+  if (!instance_->Init(std::move(runtime_options))) {
     // TODO: Currently deleting the instance will abort the runtime on destruction. Now This will
     // leak memory, instead. Fix the destructor. b/19100793.
     // delete instance_;
@@ -447,6 +483,12 @@
   return true;
 }
 
+bool Runtime::Create(const RuntimeOptions& raw_options, bool ignore_unrecognized) {
+  RuntimeArgumentMap runtime_options;
+  return ParseOptions(raw_options, ignore_unrecognized, &runtime_options) &&
+      Create(std::move(runtime_options));
+}
+
 static jobject CreateSystemClassLoader(Runtime* runtime) {
   if (runtime->IsAotCompiler() && !runtime->GetCompilerCallbacks()->IsBootImage()) {
     return nullptr;
@@ -512,7 +554,7 @@
 
   // If a debug host build, disable ptrace restriction for debugging and test timeout thread dump.
   // Only 64-bit as prctl() may fail in 32 bit userspace on a 64-bit kernel.
-#if defined(__linux__) && !defined(__ANDROID__) && defined(__x86_64__)
+#if defined(__linux__) && !defined(ART_TARGET_ANDROID) && defined(__x86_64__)
   if (kIsDebugBuild) {
     CHECK_EQ(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY), 0);
   }
@@ -525,38 +567,42 @@
 
   started_ = true;
 
-  // Use !IsAotCompiler so that we get test coverage, tests are never the zygote.
-  if (!IsAotCompiler()) {
-    ScopedObjectAccess soa(self);
-    gc::space::ImageSpace* image_space = heap_->GetImageSpace();
-    if (image_space != nullptr) {
-      ATRACE_BEGIN("AddImageStringsToTable");
-      GetInternTable()->AddImageStringsToTable(image_space);
-      ATRACE_END();
-      ATRACE_BEGIN("MoveImageClassesToClassTable");
-      GetClassLinker()->MoveImageClassesToClassTable();
-      ATRACE_END();
+  // Create the JIT either if we have to use JIT compilation or save profiling info.
+  // TODO(calin): We use the JIT class as a proxy for JIT compilation and for
+  // recoding profiles. Maybe we should consider changing the name to be more clear it's
+  // not only about compiling. b/28295073.
+  if (jit_options_->UseJitCompilation() || jit_options_->GetSaveProfilingInfo()) {
+    std::string error_msg;
+    if (!IsZygote()) {
+    // If we are the zygote then we need to wait until after forking to create the code cache
+    // due to SELinux restrictions on r/w/x memory regions.
+      CreateJit();
+    } else if (jit_options_->UseJitCompilation()) {
+      if (!jit::Jit::LoadCompilerLibrary(&error_msg)) {
+        // Try to load compiler pre zygote to reduce PSS. b/27744947
+        LOG(WARNING) << "Failed to load JIT compiler with error " << error_msg;
+      }
     }
   }
 
-  // If we are the zygote then we need to wait until after forking to create the code cache
-  // due to SELinux restrictions on r/w/x memory regions.
-  if (!IsZygote() && jit_options_->UseJIT()) {
-    CreateJit();
-  }
-
-  if (!IsImageDex2OatEnabled() || !GetHeap()->HasImageSpace()) {
+  if (!IsImageDex2OatEnabled() || !GetHeap()->HasBootImageSpace()) {
     ScopedObjectAccess soa(self);
-    StackHandleScope<1> hs(soa.Self());
-    auto klass(hs.NewHandle<mirror::Class>(mirror::Class::GetJavaLangClass()));
-    class_linker_->EnsureInitialized(soa.Self(), klass, true, true);
+    StackHandleScope<2> hs(soa.Self());
+
+    auto class_class(hs.NewHandle<mirror::Class>(mirror::Class::GetJavaLangClass()));
+    auto field_class(hs.NewHandle<mirror::Class>(mirror::Field::StaticClass()));
+
+    class_linker_->EnsureInitialized(soa.Self(), class_class, true, true);
+    // Field class is needed for register_java_net_InetAddress in libcore, b/28153851.
+    class_linker_->EnsureInitialized(soa.Self(), field_class, true, true);
   }
 
   // InitNativeMethods needs to be after started_ so that the classes
   // it touches will have methods linked to the oat file if necessary.
-  ATRACE_BEGIN("InitNativeMethods");
-  InitNativeMethods();
-  ATRACE_END();
+  {
+    ScopedTrace trace2("InitNativeMethods");
+    InitNativeMethods();
+  }
 
   // Initialize well known thread group values that may be accessed threads while attaching.
   InitThreadGroups(self);
@@ -573,13 +619,16 @@
     if (is_native_bridge_loaded_) {
       PreInitializeNativeBridge(".");
     }
-    DidForkFromZygote(self->GetJniEnv(), NativeBridgeAction::kInitialize,
-                      GetInstructionSetString(kRuntimeISA));
+    NativeBridgeAction action = force_native_bridge_
+        ? NativeBridgeAction::kInitialize
+        : NativeBridgeAction::kUnload;
+    InitNonZygoteOrPostFork(self->GetJniEnv(),
+                            /* is_system_server */ false,
+                            action,
+                            GetInstructionSetString(kRuntimeISA));
   }
 
-  ATRACE_BEGIN("StartDaemonThreads");
   StartDaemonThreads();
-  ATRACE_END();
 
   {
     ScopedObjectAccess soa(self);
@@ -589,19 +638,6 @@
   VLOG(startup) << "Runtime::Start exiting";
   finished_starting_ = true;
 
-  if (profiler_options_.IsEnabled() && !profile_output_filename_.empty()) {
-    // User has asked for a profile using -Xenable-profiler.
-    // Create the profile file if it doesn't exist.
-    int fd = open(profile_output_filename_.c_str(), O_RDWR|O_CREAT|O_EXCL, 0660);
-    if (fd >= 0) {
-      close(fd);
-    } else if (errno != EEXIST) {
-      LOG(INFO) << "Failed to access the profile file. Profiler disabled.";
-      return true;
-    }
-    StartProfiler(profile_output_filename_.c_str());
-  }
-
   if (trace_config_.get() != nullptr && trace_config_->trace_file != "") {
     ScopedThreadStateChange tsc(self, kWaitingForMethodTracingStart);
     Trace::Start(trace_config_->trace_file.c_str(),
@@ -663,7 +699,8 @@
 #endif
 }
 
-void Runtime::DidForkFromZygote(JNIEnv* env, NativeBridgeAction action, const char* isa) {
+void Runtime::InitNonZygoteOrPostFork(
+    JNIEnv* env, bool is_system_server, NativeBridgeAction action, const char* isa) {
   is_zygote_ = false;
 
   if (is_native_bridge_loaded_) {
@@ -685,8 +722,13 @@
   // before fork aren't attributed to an app.
   heap_->ResetGcPerformanceInfo();
 
-  if (jit_.get() == nullptr && jit_options_->UseJIT()) {
-    // Create the JIT if the flag is set and we haven't already create it (happens for run-tests).
+
+  if (!is_system_server &&
+      !safe_mode_ &&
+      (jit_options_->UseJitCompilation() || jit_options_->GetSaveProfilingInfo()) &&
+      jit_.get() == nullptr) {
+    // Note that when running ART standalone (not zygote, nor zygote fork),
+    // the jit may have already been created.
     CreateJit();
   }
 
@@ -714,6 +756,7 @@
 }
 
 void Runtime::StartDaemonThreads() {
+  ScopedTrace trace(__FUNCTION__);
   VLOG(startup) << "Runtime::StartDaemonThreads entering";
 
   Thread* self = Thread::Current();
@@ -732,61 +775,96 @@
   VLOG(startup) << "Runtime::StartDaemonThreads exiting";
 }
 
+// Attempts to open dex files from image(s). Given the image location, try to find the oat file
+// and open it to get the stored dex file. If the image is the first for a multi-image boot
+// classpath, go on and also open the other images.
 static bool OpenDexFilesFromImage(const std::string& image_location,
                                   std::vector<std::unique_ptr<const DexFile>>* dex_files,
                                   size_t* failures) {
   DCHECK(dex_files != nullptr) << "OpenDexFilesFromImage: out-param is nullptr";
-  std::string system_filename;
-  bool has_system = false;
-  std::string cache_filename_unused;
-  bool dalvik_cache_exists_unused;
-  bool has_cache_unused;
-  bool is_global_cache_unused;
-  bool found_image = gc::space::ImageSpace::FindImageFilename(image_location.c_str(),
-                                                              kRuntimeISA,
-                                                              &system_filename,
-                                                              &has_system,
-                                                              &cache_filename_unused,
-                                                              &dalvik_cache_exists_unused,
-                                                              &has_cache_unused,
-                                                              &is_global_cache_unused);
-  *failures = 0;
-  if (!found_image || !has_system) {
-    return false;
-  }
-  std::string error_msg;
-  // We are falling back to non-executable use of the oat file because patching failed, presumably
-  // due to lack of space.
-  std::string oat_filename = ImageHeader::GetOatLocationFromImageLocation(system_filename.c_str());
-  std::string oat_location = ImageHeader::GetOatLocationFromImageLocation(image_location.c_str());
-  std::unique_ptr<File> file(OS::OpenFileForReading(oat_filename.c_str()));
-  if (file.get() == nullptr) {
-    return false;
-  }
-  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file.release(), false, false, &error_msg));
-  if (elf_file.get() == nullptr) {
-    return false;
-  }
-  std::unique_ptr<const OatFile> oat_file(
-      OatFile::OpenWithElfFile(elf_file.release(), oat_location, nullptr, &error_msg));
-  if (oat_file == nullptr) {
-    LOG(INFO) << "Unable to use '" << oat_filename << "' because " << error_msg;
-    return false;
-  }
 
-  for (const OatFile::OatDexFile* oat_dex_file : oat_file->GetOatDexFiles()) {
-    if (oat_dex_file == nullptr) {
-      *failures += 1;
-      continue;
+  // Use a work-list approach, so that we can easily reuse the opening code.
+  std::vector<std::string> image_locations;
+  image_locations.push_back(image_location);
+
+  for (size_t index = 0; index < image_locations.size(); ++index) {
+    std::string system_filename;
+    bool has_system = false;
+    std::string cache_filename_unused;
+    bool dalvik_cache_exists_unused;
+    bool has_cache_unused;
+    bool is_global_cache_unused;
+    bool found_image = gc::space::ImageSpace::FindImageFilename(image_locations[index].c_str(),
+                                                                kRuntimeISA,
+                                                                &system_filename,
+                                                                &has_system,
+                                                                &cache_filename_unused,
+                                                                &dalvik_cache_exists_unused,
+                                                                &has_cache_unused,
+                                                                &is_global_cache_unused);
+
+    if (!found_image || !has_system) {
+      return false;
     }
-    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
-    if (dex_file.get() == nullptr) {
-      *failures += 1;
-    } else {
-      dex_files->push_back(std::move(dex_file));
+
+    // We are falling back to non-executable use of the oat file because patching failed, presumably
+    // due to lack of space.
+    std::string oat_filename =
+        ImageHeader::GetOatLocationFromImageLocation(system_filename.c_str());
+    std::string oat_location =
+        ImageHeader::GetOatLocationFromImageLocation(image_locations[index].c_str());
+    // Note: in the multi-image case, the image location may end in ".jar," and not ".art." Handle
+    //       that here.
+    if (EndsWith(oat_location, ".jar")) {
+      oat_location.replace(oat_location.length() - 3, 3, "oat");
     }
+
+    std::unique_ptr<File> file(OS::OpenFileForReading(oat_filename.c_str()));
+    if (file.get() == nullptr) {
+      return false;
+    }
+    std::string error_msg;
+    std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file.release(),
+                                                    false,
+                                                    false,
+                                                    /*low_4gb*/false,
+                                                    &error_msg));
+    if (elf_file.get() == nullptr) {
+      return false;
+    }
+    std::unique_ptr<const OatFile> oat_file(
+        OatFile::OpenWithElfFile(elf_file.release(), oat_location, nullptr, &error_msg));
+    if (oat_file == nullptr) {
+      LOG(WARNING) << "Unable to use '" << oat_filename << "' because " << error_msg;
+      return false;
+    }
+
+    for (const OatFile::OatDexFile* oat_dex_file : oat_file->GetOatDexFiles()) {
+      if (oat_dex_file == nullptr) {
+        *failures += 1;
+        continue;
+      }
+      std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
+      if (dex_file.get() == nullptr) {
+        *failures += 1;
+      } else {
+        dex_files->push_back(std::move(dex_file));
+      }
+    }
+
+    if (index == 0) {
+      // First file. See if this is a multi-image environment, and if so, enqueue the other images.
+      const OatHeader& boot_oat_header = oat_file->GetOatHeader();
+      const char* boot_cp = boot_oat_header.GetStoreValueByKey(OatHeader::kBootClassPathKey);
+      if (boot_cp != nullptr) {
+        gc::space::ImageSpace::CreateMultiImageLocations(image_locations[0],
+                                                         boot_cp,
+                                                         &image_locations);
+      }
+    }
+
+    Runtime::Current()->GetOatFileManager().RegisterOatFile(std::move(oat_file));
   }
-  Runtime::Current()->GetOatFileManager().RegisterOatFile(std::move(oat_file));
   return true;
 }
 
@@ -804,12 +882,13 @@
   for (size_t i = 0; i < dex_filenames.size(); i++) {
     const char* dex_filename = dex_filenames[i].c_str();
     const char* dex_location = dex_locations[i].c_str();
+    static constexpr bool kVerifyChecksum = true;
     std::string error_msg;
     if (!OS::FileExists(dex_filename)) {
       LOG(WARNING) << "Skipping non-existent dex file '" << dex_filename << "'";
       continue;
     }
-    if (!DexFile::Open(dex_filename, dex_location, &error_msg, dex_files)) {
+    if (!DexFile::Open(dex_filename, dex_location, kVerifyChecksum, &error_msg, dex_files)) {
       LOG(WARNING) << "Failed to open .dex from file '" << dex_filename << "': " << error_msg;
       ++failure_count;
     }
@@ -820,32 +899,26 @@
 void Runtime::SetSentinel(mirror::Object* sentinel) {
   CHECK(sentinel_.Read() == nullptr);
   CHECK(sentinel != nullptr);
+  CHECK(!heap_->IsMovableObject(sentinel));
   sentinel_ = GcRoot<mirror::Object>(sentinel);
 }
 
-bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) {
-  ATRACE_BEGIN("Runtime::Init");
+bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) {
+  RuntimeArgumentMap runtime_options(std::move(runtime_options_in));
+  ScopedTrace trace(__FUNCTION__);
   CHECK_EQ(sysconf(_SC_PAGE_SIZE), kPageSize);
 
   MemMap::Init();
 
   using Opt = RuntimeArgumentMap;
-  RuntimeArgumentMap runtime_options;
-  std::unique_ptr<ParsedOptions> parsed_options(
-      ParsedOptions::Create(raw_options, ignore_unrecognized, &runtime_options));
-  if (parsed_options.get() == nullptr) {
-    LOG(ERROR) << "Failed to parse options";
-    ATRACE_END();
-    return false;
-  }
   VLOG(startup) << "Runtime::Init -verbose:startup enabled";
 
   QuasiAtomic::Startup();
 
   oat_file_manager_ = new OatFileManager;
 
-  Monitor::Init(runtime_options.GetOrDefault(Opt::LockProfThreshold),
-                runtime_options.GetOrDefault(Opt::HookIsSensitiveThread));
+  Thread::SetSensitiveThreadHook(runtime_options.GetOrDefault(Opt::HookIsSensitiveThread));
+  Monitor::Init(runtime_options.GetOrDefault(Opt::LockProfThreshold));
 
   boot_class_path_string_ = runtime_options.ReleaseOrDefault(Opt::BootClassPath);
   class_path_string_ = runtime_options.ReleaseOrDefault(Opt::ClassPath);
@@ -858,6 +931,7 @@
   is_explicit_gc_disabled_ = runtime_options.Exists(Opt::DisableExplicitGC);
   dex2oat_enabled_ = runtime_options.GetOrDefault(Opt::Dex2Oat);
   image_dex2oat_enabled_ = runtime_options.GetOrDefault(Opt::ImageDex2Oat);
+  dump_native_stack_on_sig_quit_ = runtime_options.GetOrDefault(Opt::DumpNativeStackOnSigQuit);
 
   vfprintf_ = runtime_options.GetOrDefault(Opt::HookVfprintf);
   exit_ = runtime_options.GetOrDefault(Opt::HookExit);
@@ -883,6 +957,7 @@
   allow_dex_file_fallback_ = !runtime_options.Exists(Opt::NoDexFileFallback);
 
   no_sig_chain_ = runtime_options.Exists(Opt::NoSigChain);
+  force_native_bridge_ = runtime_options.Exists(Opt::ForceNativeBridge);
 
   Split(runtime_options.GetOrDefault(Opt::CpuAbiList), ',', &cpu_abilist_);
 
@@ -896,8 +971,17 @@
   experimental_flags_ = runtime_options.GetOrDefault(Opt::Experimental);
   is_low_memory_mode_ = runtime_options.Exists(Opt::LowMemoryMode);
 
+  if (experimental_flags_ & ExperimentalFlags::kRuntimePlugins) {
+    plugins_ = runtime_options.ReleaseOrDefault(Opt::Plugins);
+  }
+  if (experimental_flags_ & ExperimentalFlags::kAgents) {
+    agents_ = runtime_options.ReleaseOrDefault(Opt::AgentPath);
+    // TODO Add back in -agentlib
+    // for (auto lib : runtime_options.ReleaseOrDefault(Opt::AgentLib)) {
+    //   agents_.push_back(lib);
+    // }
+  }
   XGcOption xgc_option = runtime_options.GetOrDefault(Opt::GcOption);
-  ATRACE_BEGIN("CreateHeap");
   heap_ = new gc::Heap(runtime_options.GetOrDefault(Opt::MemoryInitialSize),
                        runtime_options.GetOrDefault(Opt::HeapGrowthLimit),
                        runtime_options.GetOrDefault(Opt::HeapMinFree),
@@ -926,13 +1010,12 @@
                        xgc_option.verify_pre_sweeping_rosalloc_,
                        xgc_option.verify_post_gc_rosalloc_,
                        xgc_option.gcstress_,
+                       xgc_option.measure_,
                        runtime_options.GetOrDefault(Opt::EnableHSpaceCompactForOOM),
                        runtime_options.GetOrDefault(Opt::HSpaceCompactForOOMMinIntervalsMs));
-  ATRACE_END();
 
-  if (heap_->GetImageSpace() == nullptr && !allow_dex_file_fallback_) {
+  if (!heap_->HasBootImageSpace() && !allow_dex_file_fallback_) {
     LOG(ERROR) << "Dex file fallback disabled, cannot continue without image.";
-    ATRACE_END();
     return false;
   }
 
@@ -948,19 +1031,20 @@
     // this case.
     // If runtime_options doesn't have UseJIT set to true then CreateFromRuntimeArguments returns
     // null and we don't create the jit.
-    jit_options_->SetUseJIT(false);
+    jit_options_->SetUseJitCompilation(false);
+    jit_options_->SetSaveProfilingInfo(false);
   }
 
-  // Allocate a global table of boxed lambda objects <-> closures.
-  lambda_box_table_ = MakeUnique<lambda::BoxTable>();
-
   // Use MemMap arena pool for jit, malloc otherwise. Malloc arenas are faster to allocate but
   // can't be trimmed as easily.
   const bool use_malloc = IsAotCompiler();
-  arena_pool_.reset(new ArenaPool(use_malloc, false));
+  arena_pool_.reset(new ArenaPool(use_malloc, /* low_4gb */ false));
+  jit_arena_pool_.reset(
+      new ArenaPool(/* use_malloc */ false, /* low_4gb */ false, "CompilerMetadata"));
+
   if (IsAotCompiler() && Is64BitInstructionSet(kRuntimeISA)) {
     // 4gb, no malloc. Explanation in header.
-    low_4gb_arena_pool_.reset(new ArenaPool(false, true));
+    low_4gb_arena_pool_.reset(new ArenaPool(/* use_malloc */ false, /* low_4gb */ true));
   }
   linear_alloc_.reset(CreateLinearAlloc());
 
@@ -1021,6 +1105,10 @@
 
   java_vm_ = new JavaVMExt(this, runtime_options);
 
+  // Add the JniEnv handler.
+  // TODO Refactor this stuff.
+  java_vm_->AddEnvironmentHook(JNIEnvExt::GetEnvHandler);
+
   Thread::Startup();
 
   // ClassLinker needs an attached thread, but we can't fully attach a thread without creating
@@ -1038,12 +1126,17 @@
 
   CHECK_GE(GetHeap()->GetContinuousSpaces().size(), 1U);
   class_linker_ = new ClassLinker(intern_table_);
-  if (GetHeap()->HasImageSpace()) {
-    ATRACE_BEGIN("InitFromImage");
-    class_linker_->InitFromImage();
-    ATRACE_END();
+  if (GetHeap()->HasBootImageSpace()) {
+    std::string error_msg;
+    bool result = class_linker_->InitFromBootImage(&error_msg);
+    if (!result) {
+      LOG(ERROR) << "Could not initialize from image: " << error_msg;
+      return false;
+    }
     if (kIsDebugBuild) {
-      GetHeap()->GetImageSpace()->VerifyImageAllocations();
+      for (auto image_space : GetHeap()->GetBootImageSpaces()) {
+        image_space->VerifyImageAllocations();
+      }
     }
     if (boot_class_path_string_.empty()) {
       // The bootclasspath is not explicitly specified: construct it from the loaded dex files.
@@ -1055,6 +1148,14 @@
       }
       boot_class_path_string_ = Join(dex_locations, ':');
     }
+    {
+      ScopedTrace trace2("AddImageStringsToTable");
+      GetInternTable()->AddImagesStringsToTable(heap_->GetBootImageSpaces());
+    }
+    {
+      ScopedTrace trace2("MoveImageClassesToClassTable");
+      GetClassLinker()->AddBootImageClassesToClassTable();
+    }
   } else {
     std::vector<std::string> dex_filenames;
     Split(boot_class_path_string_, ':', &dex_filenames);
@@ -1068,12 +1169,20 @@
     }
 
     std::vector<std::unique_ptr<const DexFile>> boot_class_path;
-    OpenDexFiles(dex_filenames,
-                 dex_locations,
-                 runtime_options.GetOrDefault(Opt::Image),
-                 &boot_class_path);
+    if (runtime_options.Exists(Opt::BootClassPathDexList)) {
+      boot_class_path.swap(*runtime_options.GetOrDefault(Opt::BootClassPathDexList));
+    } else {
+      OpenDexFiles(dex_filenames,
+                   dex_locations,
+                   runtime_options.GetOrDefault(Opt::Image),
+                   &boot_class_path);
+    }
     instruction_set_ = runtime_options.GetOrDefault(Opt::ImageInstructionSet);
-    class_linker_->InitWithoutImage(std::move(boot_class_path));
+    std::string error_msg;
+    if (!class_linker_->InitWithoutImage(std::move(boot_class_path), &error_msg)) {
+      LOG(ERROR) << "Could not initialize without image: " << error_msg;
+      return false;
+    }
 
     // TODO: Should we move the following to InitWithoutImage?
     SetInstructionSet(instruction_set_);
@@ -1099,26 +1208,6 @@
         Trace::TraceOutputMode::kFile;
   }
 
-  {
-    auto&& profiler_options = runtime_options.ReleaseOrDefault(Opt::ProfilerOpts);
-    profile_output_filename_ = profiler_options.output_file_name_;
-
-    // TODO: Don't do this, just change ProfilerOptions to include the output file name?
-    ProfilerOptions other_options(
-        profiler_options.enabled_,
-        profiler_options.period_s_,
-        profiler_options.duration_s_,
-        profiler_options.interval_us_,
-        profiler_options.backoff_coefficient_,
-        profiler_options.start_immediately_,
-        profiler_options.top_k_threshold_,
-        profiler_options.top_k_change_threshold_,
-        profiler_options.profile_type_,
-        profiler_options.max_stack_depth_);
-
-    profiler_options_ = other_options;
-  }
-
   // TODO: move this to just be an Trace::Start argument
   Trace::SetDefaultClockSource(runtime_options.GetOrDefault(Opt::ProfileClock));
 
@@ -1136,6 +1225,16 @@
   pre_allocated_NoClassDefFoundError_ = GcRoot<mirror::Throwable>(self->GetException());
   self->ClearException();
 
+  // Runtime initialization is largely done now.
+  // We load plugins first since that can modify the runtime state slightly.
+  // Load all plugins
+  for (auto& plugin : plugins_) {
+    std::string err;
+    if (!plugin.Load(&err)) {
+      LOG(FATAL) << plugin << " failed to load: " << err;
+    }
+  }
+
   // Look for a native bridge.
   //
   // The intended flow here is, in the case of a running system:
@@ -1168,9 +1267,21 @@
     is_native_bridge_loaded_ = LoadNativeBridge(native_bridge_file_name);
   }
 
-  VLOG(startup) << "Runtime::Init exiting";
+  // Startup agents
+  // TODO Maybe we should start a new thread to run these on. Investigate RI behavior more.
+  for (auto& agent : agents_) {
+    // TODO Check err
+    int res = 0;
+    std::string err = "";
+    ti::Agent::LoadError result = agent.Load(&res, &err);
+    if (result == ti::Agent::kInitializationError) {
+      LOG(FATAL) << "Unable to initialize agent!";
+    } else if (result != ti::Agent::kNoError) {
+      LOG(ERROR) << "Unable to load an agent: " << err;
+    }
+  }
 
-  ATRACE_END();
+  VLOG(startup) << "Runtime::Init exiting";
 
   return true;
 }
@@ -1186,18 +1297,30 @@
   // First set up JniConstants, which is used by both the runtime's built-in native
   // methods and libcore.
   JniConstants::init(env);
-  WellKnownClasses::Init(env);
 
   // Then set up the native methods provided by the runtime itself.
   RegisterRuntimeNativeMethods(env);
 
-  // Then set up libcore, which is just a regular JNI library with a regular JNI_OnLoad.
-  // Most JNI libraries can just use System.loadLibrary, but libcore can't because it's
-  // the library that implements System.loadLibrary!
+  // Initialize classes used in JNI. The initialization requires runtime native
+  // methods to be loaded first.
+  WellKnownClasses::Init(env);
+
+  // Then set up libjavacore / libopenjdk, which are just a regular JNI libraries with
+  // a regular JNI_OnLoad. Most JNI libraries can just use System.loadLibrary, but
+  // libcore can't because it's the library that implements System.loadLibrary!
   {
-    std::string reason;
-    if (!java_vm_->LoadNativeLibrary(env, "libjavacore.so", nullptr, &reason)) {
-      LOG(FATAL) << "LoadNativeLibrary failed for \"libjavacore.so\": " << reason;
+    std::string error_msg;
+    if (!java_vm_->LoadNativeLibrary(env, "libjavacore.so", nullptr, nullptr, &error_msg)) {
+      LOG(FATAL) << "LoadNativeLibrary failed for \"libjavacore.so\": " << error_msg;
+    }
+  }
+  {
+    constexpr const char* kOpenJdkLibrary = kIsDebugBuild
+                                                ? "libopenjdkd.so"
+                                                : "libopenjdk.so";
+    std::string error_msg;
+    if (!java_vm_->LoadNativeLibrary(env, kOpenJdkLibrary, nullptr, nullptr, &error_msg)) {
+      LOG(FATAL) << "LoadNativeLibrary failed for \"" << kOpenJdkLibrary << "\": " << error_msg;
     }
   }
 
@@ -1207,6 +1330,10 @@
   VLOG(startup) << "Runtime::InitNativeMethods exiting";
 }
 
+void Runtime::ReclaimArenaPoolMemory() {
+  arena_pool_->LockReclaimMemory();
+}
+
 void Runtime::InitThreadGroups(Thread* self) {
   JNIEnvExt* env = self->GetJniEnv();
   ScopedJniEnvLocalRefState env_state(env);
@@ -1247,13 +1374,13 @@
   register_java_lang_DexCache(env);
   register_java_lang_Object(env);
   register_java_lang_ref_FinalizerReference(env);
+  register_java_lang_reflect_AbstractMethod(env);
   register_java_lang_reflect_Array(env);
   register_java_lang_reflect_Constructor(env);
   register_java_lang_reflect_Field(env);
   register_java_lang_reflect_Method(env);
   register_java_lang_reflect_Proxy(env);
   register_java_lang_ref_Reference(env);
-  register_java_lang_Runtime(env);
   register_java_lang_String(env);
   register_java_lang_StringFactory(env);
   register_java_lang_System(env);
@@ -1268,10 +1395,21 @@
 }
 
 void Runtime::DumpForSigQuit(std::ostream& os) {
+  // Dumping for SIGQIT may cause deadlocks if the the debugger is active. b/26118154
+  if (Dbg::IsDebuggerActive()) {
+    LOG(INFO) << "Skipping DumpForSigQuit due to active debugger";
+    return;
+  }
   GetClassLinker()->DumpForSigQuit(os);
   GetInternTable()->DumpForSigQuit(os);
   GetJavaVM()->DumpForSigQuit(os);
   GetHeap()->DumpForSigQuit(os);
+  oat_file_manager_->DumpForSigQuit(os);
+  if (GetJit() != nullptr) {
+    GetJit()->DumpForSigQuit(os);
+  } else {
+    os << "Running non JIT\n";
+  }
   TrackedAllocators::Dump(os);
   os << "\n";
 
@@ -1361,10 +1499,12 @@
 
 bool Runtime::AttachCurrentThread(const char* thread_name, bool as_daemon, jobject thread_group,
                                   bool create_peer) {
+  ScopedTrace trace(__FUNCTION__);
   return Thread::Attach(thread_name, as_daemon, thread_group, create_peer) != nullptr;
 }
 
 void Runtime::DetachCurrentThread() {
+  ScopedTrace trace(__FUNCTION__);
   Thread* self = Thread::Current();
   if (self == nullptr) {
     LOG(FATAL) << "attempting to detach thread that is not attached";
@@ -1414,7 +1554,7 @@
   // Visiting the roots of these ArtMethods is not currently required since all the GcRoots are
   // null.
   BufferedRootVisitor<16> buffered_visitor(visitor, RootInfo(kRootVMInternal));
-  const size_t pointer_size = GetClassLinker()->GetImagePointerSize();
+  const PointerSize pointer_size = GetClassLinker()->GetImagePointerSize();
   if (HasResolutionMethod()) {
     resolution_method_->VisitRoots(buffered_visitor, pointer_size);
   }
@@ -1494,15 +1634,19 @@
   }
 }
 
-ArtMethod* Runtime::CreateImtConflictMethod() {
-  auto* method = Runtime::Current()->GetClassLinker()->CreateRuntimeMethod();
+ArtMethod* Runtime::CreateImtConflictMethod(LinearAlloc* linear_alloc) {
+  ClassLinker* const class_linker = GetClassLinker();
+  ArtMethod* method = class_linker->CreateRuntimeMethod(linear_alloc);
   // When compiling, the code pointer will get set later when the image is loaded.
+  const PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set_);
   if (IsAotCompiler()) {
-    size_t pointer_size = GetInstructionSetPointerSize(instruction_set_);
     method->SetEntryPointFromQuickCompiledCodePtrSize(nullptr, pointer_size);
   } else {
     method->SetEntryPointFromQuickCompiledCode(GetQuickImtConflictStub());
   }
+  // Create empty conflict table.
+  method->SetImtConflictTable(class_linker->CreateImtConflictTable(/*count*/0u, linear_alloc),
+                              pointer_size);
   return method;
 }
 
@@ -1513,10 +1657,10 @@
 }
 
 ArtMethod* Runtime::CreateResolutionMethod() {
-  auto* method = Runtime::Current()->GetClassLinker()->CreateRuntimeMethod();
+  auto* method = GetClassLinker()->CreateRuntimeMethod(GetLinearAlloc());
   // When compiling, the code pointer will get set later when the image is loaded.
   if (IsAotCompiler()) {
-    size_t pointer_size = GetInstructionSetPointerSize(instruction_set_);
+    PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set_);
     method->SetEntryPointFromQuickCompiledCodePtrSize(nullptr, pointer_size);
   } else {
     method->SetEntryPointFromQuickCompiledCode(GetQuickResolutionStub());
@@ -1525,8 +1669,8 @@
 }
 
 ArtMethod* Runtime::CreateCalleeSaveMethod() {
-  auto* method = Runtime::Current()->GetClassLinker()->CreateRuntimeMethod();
-  size_t pointer_size = GetInstructionSetPointerSize(instruction_set_);
+  auto* method = GetClassLinker()->CreateRuntimeMethod(GetLinearAlloc());
+  PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set_);
   method->SetEntryPointFromQuickCompiledCodePtrSize(nullptr, pointer_size);
   DCHECK_NE(instruction_set_, kNone);
   DCHECK(method->IsRuntimeMethod());
@@ -1539,7 +1683,6 @@
   intern_table_->ChangeWeakRootState(gc::kWeakRootStateNoReadsOrWrites);
   java_vm_->DisallowNewWeakGlobals();
   heap_->DisallowNewAllocationRecords();
-  lambda_box_table_->DisallowNewWeakBoxedLambdas();
 }
 
 void Runtime::AllowNewSystemWeaks() {
@@ -1548,7 +1691,6 @@
   intern_table_->ChangeWeakRootState(gc::kWeakRootStateNormal);  // TODO: Do this in the sweeping.
   java_vm_->AllowNewWeakGlobals();
   heap_->AllowNewAllocationRecords();
-  lambda_box_table_->AllowNewWeakBoxedLambdas();
 }
 
 void Runtime::BroadcastForNewSystemWeaks() {
@@ -1559,7 +1701,6 @@
   intern_table_->BroadcastForNewInterns();
   java_vm_->BroadcastForNewWeakGlobals();
   heap_->BroadcastForNewAllocationRecords();
-  lambda_box_table_->BroadcastForNewWeakBoxedLambdas();
 }
 
 void Runtime::SetInstructionSet(InstructionSet instruction_set) {
@@ -1605,10 +1746,43 @@
   callee_save_methods_[type] = reinterpret_cast<uintptr_t>(method);
 }
 
-void Runtime::StartProfiler(const char* profile_output_filename) {
-  profile_output_filename_ = profile_output_filename;
-  profiler_started_ =
-      BackgroundMethodSamplingProfiler::Start(profile_output_filename_, profiler_options_);
+void Runtime::RegisterAppInfo(const std::vector<std::string>& code_paths,
+                              const std::string& profile_output_filename,
+                              const std::string& foreign_dex_profile_path,
+                              const std::string& app_dir) {
+  if (jit_.get() == nullptr) {
+    // We are not JITing. Nothing to do.
+    return;
+  }
+
+  VLOG(profiler) << "Register app with " << profile_output_filename
+      << " " << Join(code_paths, ':');
+
+  if (profile_output_filename.empty()) {
+    LOG(WARNING) << "JIT profile information will not be recorded: profile filename is empty.";
+    return;
+  }
+  if (!FileExists(profile_output_filename)) {
+    LOG(WARNING) << "JIT profile information will not be recorded: profile file does not exits.";
+    return;
+  }
+  if (code_paths.empty()) {
+    LOG(WARNING) << "JIT profile information will not be recorded: code paths is empty.";
+    return;
+  }
+
+  jit_->StartProfileSaver(profile_output_filename,
+                          code_paths,
+                          foreign_dex_profile_path,
+                          app_dir);
+}
+
+void Runtime::NotifyDexLoaded(const std::string& dex_location) {
+  VLOG(profiler) << "Notify dex loaded: " << dex_location;
+  // We know that if the ProfileSaver is started then we can record profile information.
+  if (ProfileSaver::IsStarted()) {
+    ProfileSaver::NotifyDexUse(dex_location);
+  }
 }
 
 // Transaction support.
@@ -1737,7 +1911,7 @@
 
 void Runtime::AddCurrentRuntimeFeaturesAsDex2OatArguments(std::vector<std::string>* argv)
     const {
-  if (GetInstrumentation()->InterpretOnly() || UseJit()) {
+  if (GetInstrumentation()->InterpretOnly()) {
     argv->push_back("--compiler-filter=interpret-only");
   }
 
@@ -1754,24 +1928,14 @@
   argv->push_back(feature_string);
 }
 
-void Runtime::UpdateProfilerState(int state) {
-  VLOG(profiler) << "Profiler state updated to " << state;
-}
-
 void Runtime::CreateJit() {
   CHECK(!IsAotCompiler());
-  if (GetInstrumentation()->IsForcedInterpretOnly()) {
-    // Don't create JIT if forced interpret only.
-    return;
+  if (kIsDebugBuild && GetInstrumentation()->IsForcedInterpretOnly()) {
+    DCHECK(!jit_options_->UseJitCompilation());
   }
   std::string error_msg;
   jit_.reset(jit::Jit::Create(jit_options_.get(), &error_msg));
-  if (jit_.get() != nullptr) {
-    compiler_callbacks_ = jit_->GetCompilerCallbacks();
-    jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold(),
-                                     jit_options_->GetWarmupThreshold());
-    jit_->CreateThreadPool();
-  } else {
+  if (jit_.get() == nullptr) {
     LOG(WARNING) << "Failed to create JIT " << error_msg;
   }
 }
@@ -1796,14 +1960,35 @@
   imt_unimplemented_method_ = method;
 }
 
+void Runtime::FixupConflictTables() {
+  // We can only do this after the class linker is created.
+  const PointerSize pointer_size = GetClassLinker()->GetImagePointerSize();
+  if (imt_unimplemented_method_->GetImtConflictTable(pointer_size) == nullptr) {
+    imt_unimplemented_method_->SetImtConflictTable(
+        ClassLinker::CreateImtConflictTable(/*count*/0u, GetLinearAlloc(), pointer_size),
+        pointer_size);
+  }
+  if (imt_conflict_method_->GetImtConflictTable(pointer_size) == nullptr) {
+    imt_conflict_method_->SetImtConflictTable(
+          ClassLinker::CreateImtConflictTable(/*count*/0u, GetLinearAlloc(), pointer_size),
+          pointer_size);
+  }
+}
+
 bool Runtime::IsVerificationEnabled() const {
-  return verify_ == verifier::VerifyMode::kEnable;
+  return verify_ == verifier::VerifyMode::kEnable ||
+      verify_ == verifier::VerifyMode::kSoftFail;
 }
 
 bool Runtime::IsVerificationSoftFail() const {
   return verify_ == verifier::VerifyMode::kSoftFail;
 }
 
+bool Runtime::IsDeoptimizeable(uintptr_t code) const
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return !heap_->IsInBootImageOatFile(reinterpret_cast<void *>(code));
+}
+
 LinearAlloc* Runtime::CreateLinearAlloc() {
   // For 64 bit compilers, it needs to be in low 4GB in the case where we are cross compiling for a
   // 32 bit target. In this case, we have 32 bit pointers in the dex cache arrays which can't hold
@@ -1821,4 +2006,19 @@
   return is_low_memory_mode_ ? kLowMemoryMaxLoadFactor : kNormalMaxLoadFactor;
 }
 
+void Runtime::UpdateProcessState(ProcessState process_state) {
+  ProcessState old_process_state = process_state_;
+  process_state_ = process_state;
+  GetHeap()->UpdateProcessState(old_process_state, process_state);
+}
+
+void Runtime::RegisterSensitiveThread() const {
+  Thread::SetJitSensitiveThread();
+}
+
+// Returns true if JIT compilations are enabled. GetJit() will be not null in this case.
+bool Runtime::UseJitCompilation() const {
+  return (jit_ != nullptr) && jit_->UseJitCompilation();
+}
+
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 7b1fdb2..6da60f2 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -35,7 +35,7 @@
 #include "method_reference.h"
 #include "object_callbacks.h"
 #include "offsets.h"
-#include "profiler_options.h"
+#include "process_state.h"
 #include "quick/quick_method_frame_info.h"
 #include "runtime_stats.h"
 #include "safe_map.h"
@@ -54,10 +54,6 @@
   class JitOptions;
 }  // namespace jit
 
-namespace lambda {
-  class BoxTable;
-}  // namespace lambda
-
 namespace mirror {
   class ClassLoader;
   class Array;
@@ -67,6 +63,9 @@
   class String;
   class Throwable;
 }  // namespace mirror
+namespace ti {
+  class Agent;
+}  // namespace ti
 namespace verifier {
   class MethodVerifier;
   enum class VerifyMode : int8_t;
@@ -84,6 +83,8 @@
 class MonitorPool;
 class NullPointerHandler;
 class OatFileManager;
+class Plugin;
+struct RuntimeArgumentMap;
 class SignalCatcher;
 class StackOverflowHandler;
 class SuspensionHandler;
@@ -93,8 +94,6 @@
 class Transaction;
 
 typedef std::vector<std::pair<std::string, const void*>> RuntimeOptions;
-typedef SafeMap<MethodReference, SafeMap<uint32_t, std::set<uint32_t>>,
-    MethodReferenceComparator> MethodRefToStringInitRegMap;
 
 // Not all combinations of flags are valid. You may not visit all roots as well as the new roots
 // (no logical reason to do this). You also may not start logging new roots and stop logging new
@@ -112,13 +111,22 @@
 
 class Runtime {
  public:
+  // Parse raw runtime options.
+  static bool ParseOptions(const RuntimeOptions& raw_options,
+                           bool ignore_unrecognized,
+                           RuntimeArgumentMap* runtime_options);
+
   // Creates and initializes a new runtime.
-  static bool Create(const RuntimeOptions& options, bool ignore_unrecognized)
+  static bool Create(RuntimeArgumentMap&& runtime_options)
+      SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_);
+
+  // Creates and initializes a new runtime.
+  static bool Create(const RuntimeOptions& raw_options, bool ignore_unrecognized)
       SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_);
 
   // IsAotCompiler for compilers that don't have a running runtime. Only dex2oat currently.
   bool IsAotCompiler() const {
-    return !UseJit() && IsCompiler();
+    return !UseJitCompilation() && IsCompiler();
   }
 
   // IsCompiler is any runtime which has a running compiler, either dex2oat or JIT.
@@ -151,6 +159,11 @@
     return compiler_callbacks_;
   }
 
+  void SetCompilerCallbacks(CompilerCallbacks* callbacks) {
+    CHECK(callbacks != nullptr);
+    compiler_callbacks_ = callbacks;
+  }
+
   bool IsZygote() const {
     return is_zygote_;
   }
@@ -178,10 +191,6 @@
     return image_location_;
   }
 
-  const ProfilerOptions& GetProfilerOptions() const {
-    return profiler_options_;
-  }
-
   // Starts a runtime, which may cause threads to be started and code to run.
   bool Start() UNLOCK_FUNCTION(Locks::mutator_lock_);
 
@@ -351,7 +360,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns a special method that calls into a trampoline for runtime method resolution
-  ArtMethod* GetResolutionMethod() SHARED_REQUIRES(Locks::mutator_lock_);
+  ArtMethod* GetResolutionMethod();
 
   bool HasResolutionMethod() const {
     return resolution_method_ != nullptr;
@@ -362,24 +371,27 @@
   ArtMethod* CreateResolutionMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns a special method that calls into a trampoline for runtime imt conflicts.
-  ArtMethod* GetImtConflictMethod() SHARED_REQUIRES(Locks::mutator_lock_);
-  ArtMethod* GetImtUnimplementedMethod() SHARED_REQUIRES(Locks::mutator_lock_);
+  ArtMethod* GetImtConflictMethod();
+  ArtMethod* GetImtUnimplementedMethod();
 
   bool HasImtConflictMethod() const {
     return imt_conflict_method_ != nullptr;
   }
 
+  void FixupConflictTables();
   void SetImtConflictMethod(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_);
   void SetImtUnimplementedMethod(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ArtMethod* CreateImtConflictMethod() SHARED_REQUIRES(Locks::mutator_lock_);
+  ArtMethod* CreateImtConflictMethod(LinearAlloc* linear_alloc)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns a special method that describes all callee saves being spilled to the stack.
   enum CalleeSaveType {
-    kSaveAll,
-    kRefsOnly,
-    kRefsAndArgs,
-    kLastCalleeSaveType  // Value used for iteration
+    kSaveAllCalleeSaves,  // All callee-save registers.
+    kSaveRefsOnly,        // Only those callee-save registers that can hold references.
+    kSaveRefsAndArgs,     // References (see above) and arguments (usually caller-save registers).
+    kSaveEverything,      // All registers, including both callee-save and caller-save.
+    kLastCalleeSaveType   // Value used for iteration
   };
 
   bool HasCalleeSaveMethod(CalleeSaveType type) const {
@@ -436,13 +448,14 @@
   jit::Jit* GetJit() {
     return jit_.get();
   }
-  bool UseJit() const {
-    return jit_.get() != nullptr;
-  }
+
+  // Returns true if JIT compilations are enabled. GetJit() will be not null in this case.
+  bool UseJitCompilation() const;
 
   void PreZygoteFork();
   bool InitZygote();
-  void DidForkFromZygote(JNIEnv* env, NativeBridgeAction action, const char* isa);
+  void InitNonZygoteOrPostFork(
+      JNIEnv* env, bool is_system_server, NativeBridgeAction action, const char* isa);
 
   const instrumentation::Instrumentation* GetInstrumentation() const {
     return &instrumentation_;
@@ -452,8 +465,11 @@
     return &instrumentation_;
   }
 
-  void StartProfiler(const char* profile_output_filename);
-  void UpdateProfilerState(int state);
+  void RegisterAppInfo(const std::vector<std::string>& code_paths,
+                       const std::string& profile_output_filename,
+                       const std::string& foreign_dex_profile_path,
+                       const std::string& app_dir);
+  void NotifyDexLoaded(const std::string& dex_location);
 
   // Transaction support.
   bool IsActiveTransaction() const {
@@ -537,19 +553,21 @@
     return (experimental_flags_ & flags) != ExperimentalFlags::kNone;
   }
 
-  lambda::BoxTable* GetLambdaBoxTable() const {
-    return lambda_box_table_.get();
-  }
-
   // Create the JIT and instrumentation and code cache.
   void CreateJit();
 
   ArenaPool* GetArenaPool() {
     return arena_pool_.get();
   }
+  ArenaPool* GetJitArenaPool() {
+    return jit_arena_pool_.get();
+  }
   const ArenaPool* GetArenaPool() const {
     return arena_pool_.get();
   }
+
+  void ReclaimArenaPoolMemory();
+
   LinearAlloc* GetLinearAlloc() {
     return linear_alloc_.get();
   }
@@ -558,11 +576,15 @@
     return jit_options_.get();
   }
 
-  MethodRefToStringInitRegMap& GetStringInitMap() {
-    return method_ref_string_init_reg_map_;
+  bool IsDebuggable() const;
+
+  bool IsNativeDebuggable() const {
+    return is_native_debuggable_;
   }
 
-  bool IsDebuggable() const;
+  void SetNativeDebuggable(bool value) {
+    is_native_debuggable_ = value;
+  }
 
   // Returns the build fingerprint, if set. Otherwise an empty string is returned.
   std::string GetFingerprint() {
@@ -583,6 +605,43 @@
   double GetHashTableMinLoadFactor() const;
   double GetHashTableMaxLoadFactor() const;
 
+  void SetSafeMode(bool mode) {
+    safe_mode_ = mode;
+  }
+
+  bool GetDumpNativeStackOnSigQuit() const {
+    return dump_native_stack_on_sig_quit_;
+  }
+
+  bool GetPrunedDalvikCache() const {
+    return pruned_dalvik_cache_;
+  }
+
+  void SetPrunedDalvikCache(bool pruned) {
+    pruned_dalvik_cache_ = pruned;
+  }
+
+  void UpdateProcessState(ProcessState process_state);
+
+  // Returns true if we currently care about long mutator pause.
+  bool InJankPerceptibleProcessState() const {
+    return process_state_ == kProcessStateJankPerceptible;
+  }
+
+  void RegisterSensitiveThread() const;
+
+  void SetZygoteNoThreadSection(bool val) {
+    zygote_no_threads_ = val;
+  }
+
+  bool IsZygoteNoThreadSection() const {
+    return zygote_no_threads_;
+  }
+
+  // Returns if the code can be deoptimized. Code may be compiled with some
+  // optimization that makes it impossible to deoptimize.
+  bool IsDeoptimizeable(uintptr_t code) const SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -590,7 +649,7 @@
 
   void BlockSignals();
 
-  bool Init(const RuntimeOptions& options, bool ignore_unrecognized)
+  bool Init(RuntimeArgumentMap&& runtime_options)
       SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_);
   void InitNativeMethods() REQUIRES(!Locks::mutator_lock_);
   void InitThreadGroups(Thread* self);
@@ -599,12 +658,14 @@
   void StartDaemonThreads();
   void StartSignalCatcher();
 
+  void MaybeSaveJitProfilingInfo();
+
   // A pointer to the active runtime or null.
   static Runtime* instance_;
 
   // NOTE: these must match the gc::ProcessState values as they come directly from the framework.
   static constexpr int kProfileForground = 0;
-  static constexpr int kProfileBackgrouud = 1;
+  static constexpr int kProfileBackground = 1;
 
   // 64 bit so that we can share the same asm offsets for both 32 and 64 bits.
   uint64_t callee_save_methods_[kLastCalleeSaveType];
@@ -641,11 +702,15 @@
   std::string class_path_string_;
   std::vector<std::string> properties_;
 
+  std::vector<ti::Agent> agents_;
+  std::vector<Plugin> plugins_;
+
   // The default stack size for managed threads created by the runtime.
   size_t default_stack_size_;
 
   gc::Heap* heap_;
 
+  std::unique_ptr<ArenaPool> jit_arena_pool_;
   std::unique_ptr<ArenaPool> arena_pool_;
   // Special low 4gb pool for compiler linear alloc. We need ArtFields to be in low 4gb if we are
   // compiling using a 32 bit image on a 64 bit compiler in case we resolve things in the image
@@ -674,8 +739,6 @@
   std::unique_ptr<jit::Jit> jit_;
   std::unique_ptr<jit::JitOptions> jit_options_;
 
-  std::unique_ptr<lambda::BoxTable> lambda_box_table_;
-
   // Fault message, printed when we get a SIGSEGV.
   Mutex fault_message_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::string fault_message_ GUARDED_BY(fault_message_lock_);
@@ -710,10 +773,6 @@
 
   const bool is_running_on_memory_tool_;
 
-  std::string profile_output_filename_;
-  ProfilerOptions profiler_options_;
-  bool profiler_started_;
-
   std::unique_ptr<TraceConfig> trace_config_;
 
   instrumentation::Instrumentation instrumentation_;
@@ -753,6 +812,9 @@
   // building a statically link version of dex2oat.
   bool no_sig_chain_;
 
+  // Force the use of native bridge even if the app ISA matches the runtime ISA.
+  bool force_native_bridge_;
+
   // Whether or not a native bridge has been loaded.
   //
   // The native bridge allows running native code compiled for a foreign ISA. The way it works is,
@@ -764,6 +826,9 @@
   // that there's no native bridge.
   bool is_native_bridge_loaded_;
 
+  // Whether we are running under native debugger.
+  bool is_native_debuggable_;
+
   // The maximum number of failed boots we allow before pruning the dalvik cache
   // and trying again. This option is only inspected when we're running as a
   // zygote.
@@ -775,8 +840,6 @@
   // Experimental opcodes should not be used by other production code.
   ExperimentalFlags experimental_flags_;
 
-  MethodRefToStringInitRegMap method_ref_string_init_reg_map_;
-
   // Contains the build fingerprint, if given as a parameter.
   std::string fingerprint_;
 
@@ -786,6 +849,21 @@
   // Whether or not we are on a low RAM device.
   bool is_low_memory_mode_;
 
+  // Whether the application should run in safe mode, that is, interpreter only.
+  bool safe_mode_;
+
+  // Whether threads should dump their native stack on SIGQUIT.
+  bool dump_native_stack_on_sig_quit_;
+
+  // Whether the dalvik cache was pruned when initializing the runtime.
+  bool pruned_dalvik_cache_;
+
+  // Whether or not we currently care about pause times.
+  ProcessState process_state_;
+
+  // Whether zygote code is in a section that should not start threads.
+  bool zygote_no_threads_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index 122dcb1..60ebabc 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -28,6 +28,7 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "base/stringprintf.h"
+#include "native_stack_dump.h"
 #include "thread-inl.h"
 #include "thread_list.h"
 #include "utils.h"
@@ -36,6 +37,7 @@
 
 static constexpr bool kDumpHeapObjectOnSigsevg = false;
 static constexpr bool kUseSigRTTimeout = true;
+static constexpr bool kDumpNativeStackOnTimeout = true;
 
 struct Backtrace {
  public:
@@ -114,6 +116,9 @@
       switch (signal_code) {
         case SEGV_MAPERR: return "SEGV_MAPERR";
         case SEGV_ACCERR: return "SEGV_ACCERR";
+#if defined(SEGV_BNDERR)
+        case SEGV_BNDERR: return "SEGV_BNDERR";
+#endif
       }
       break;
     case SIGTRAP:
@@ -347,7 +352,9 @@
   if (runtime != nullptr) {
     if (IsTimeoutSignal(signal_number)) {
       // Special timeout signal. Try to dump all threads.
-      runtime->GetThreadList()->DumpForSigQuit(LOG(INTERNAL_FATAL));
+      // Note: Do not use DumpForSigQuit, as that might disable native unwind, but the native parts
+      //       are of value here.
+      runtime->GetThreadList()->Dump(LOG(INTERNAL_FATAL), kDumpNativeStackOnTimeout);
     }
     gc::Heap* heap = runtime->GetHeap();
     LOG(INTERNAL_FATAL) << "Fault message: " << runtime->GetFaultMessage();
diff --git a/runtime/runtime_options.cc b/runtime/runtime_options.cc
index c54461e..e75481c 100644
--- a/runtime/runtime_options.cc
+++ b/runtime/runtime_options.cc
@@ -13,8 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include "runtime_options.h"
 
+#include <memory>
+
 #include "gc/heap.h"
 #include "monitor.h"
 #include "runtime.h"
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 7b5bc1a..146afc7 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -66,10 +66,15 @@
 RUNTIME_OPTIONS_KEY (Unit,                LowMemoryMode)
 RUNTIME_OPTIONS_KEY (bool,                UseTLAB,                        (kUseTlab || kUseReadBarrier))
 RUNTIME_OPTIONS_KEY (bool,                EnableHSpaceCompactForOOM,      true)
-RUNTIME_OPTIONS_KEY (bool,                UseJIT,                         false)
+RUNTIME_OPTIONS_KEY (bool,                UseJitCompilation,              false)
+RUNTIME_OPTIONS_KEY (bool,                DumpNativeStackOnSigQuit,       true)
 RUNTIME_OPTIONS_KEY (unsigned int,        JITCompileThreshold,            jit::Jit::kDefaultCompileThreshold)
-RUNTIME_OPTIONS_KEY (unsigned int,        JITWarmupThreshold,             jit::Jit::kDefaultWarmupThreshold)
-RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheCapacity,           jit::JitCodeCache::kDefaultCapacity)
+RUNTIME_OPTIONS_KEY (unsigned int,        JITWarmupThreshold)
+RUNTIME_OPTIONS_KEY (unsigned int,        JITOsrThreshold)
+RUNTIME_OPTIONS_KEY (unsigned int,        JITPriorityThreadWeight)
+RUNTIME_OPTIONS_KEY (unsigned int,        JITInvokeTransitionWeight)
+RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheInitialCapacity,    jit::JitCodeCache::kInitialCapacity)
+RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheMaxCapacity,        jit::JitCodeCache::kMaxCapacity)
 RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \
                                           HSpaceCompactForOOMMinIntervalsMs,\
                                                                           MsToNs(100 * 1000))  // 100s
@@ -80,10 +85,7 @@
 RUNTIME_OPTIONS_KEY (bool,                Relocate,                       kDefaultMustRelocate)
 RUNTIME_OPTIONS_KEY (bool,                Dex2Oat,                        true)
 RUNTIME_OPTIONS_KEY (bool,                ImageDex2Oat,                   true)
-                                                        // kUseReadBarrier currently works with
-                                                        // the interpreter only.
-                                                        // TODO: make it work with the compiler.
-RUNTIME_OPTIONS_KEY (bool,                Interpret,                      kUseReadBarrier) // -Xint
+RUNTIME_OPTIONS_KEY (bool,                Interpret,                      false) // -Xint
                                                         // Disable the compiler for CC (for now).
 RUNTIME_OPTIONS_KEY (XGcOption,           GcOption)  // -Xgc:
 RUNTIME_OPTIONS_KEY (gc::space::LargeObjectSpaceType, \
@@ -93,15 +95,16 @@
 
 RUNTIME_OPTIONS_KEY (Unit,                DisableExplicitGC)
 RUNTIME_OPTIONS_KEY (Unit,                NoSigChain)
+RUNTIME_OPTIONS_KEY (Unit,                ForceNativeBridge)
 RUNTIME_OPTIONS_KEY (LogVerbosity,        Verbose)
 RUNTIME_OPTIONS_KEY (unsigned int,        LockProfThreshold)
 RUNTIME_OPTIONS_KEY (std::string,         StackTraceFile)
 RUNTIME_OPTIONS_KEY (Unit,                MethodTrace)
-RUNTIME_OPTIONS_KEY (std::string,         MethodTraceFile,                "/data/method-trace-file.bin")
+RUNTIME_OPTIONS_KEY (std::string,         MethodTraceFile,                "/data/misc/trace/method-trace-file.bin")
 RUNTIME_OPTIONS_KEY (unsigned int,        MethodTraceFileSize,            10 * MB)
 RUNTIME_OPTIONS_KEY (Unit,                MethodTraceStreaming)
 RUNTIME_OPTIONS_KEY (TraceClockSource,    ProfileClock,                   kDefaultTraceClockSource)  // -Xprofile:
-RUNTIME_OPTIONS_KEY (TestProfilerOptions, ProfilerOpts)  // -Xenable-profiler, -Xprofile-*
+RUNTIME_OPTIONS_KEY (ProfileSaverOptions, ProfileSaverOpts)  // -Xjitsaveprofilinginfo, -Xps-*
 RUNTIME_OPTIONS_KEY (std::string,         Compiler)
 RUNTIME_OPTIONS_KEY (std::vector<std::string>, \
                                           CompilerOptions)  // -Xcompiler-option ...
@@ -114,19 +117,24 @@
 RUNTIME_OPTIONS_KEY (Unit,                NoDexFileFallback)
 RUNTIME_OPTIONS_KEY (std::string,         CpuAbiList)
 RUNTIME_OPTIONS_KEY (std::string,         Fingerprint)
-RUNTIME_OPTIONS_KEY (ExperimentalFlags,   Experimental,     ExperimentalFlags::kNone) // -Xexperimental:{, lambdas, default-methods}
+RUNTIME_OPTIONS_KEY (ExperimentalFlags,   Experimental,     ExperimentalFlags::kNone) // -Xexperimental:{none, agents}
+RUNTIME_OPTIONS_KEY (std::vector<ti::Agent>,         AgentLib)  // -agentlib:<libname>=<options>, Requires -Xexperimental:agents
+RUNTIME_OPTIONS_KEY (std::vector<ti::Agent>,         AgentPath)  // -agentpath:<libname>=<options>, Requires -Xexperimental:agents
+RUNTIME_OPTIONS_KEY (std::vector<Plugin>,            Plugins)  // -Xplugin:<library> Requires -Xexperimental:runtime-plugins
 
 // Not parse-able from command line, but can be provided explicitly.
 // (Do not add anything here that is defined in ParsedOptions::MakeParser)
-RUNTIME_OPTIONS_KEY (const std::vector<const DexFile*>*, \
-                                          BootClassPathDexList)  // TODO: make unique_ptr
+RUNTIME_OPTIONS_KEY (std::vector<std::unique_ptr<const DexFile>>*, \
+                                          BootClassPathDexList)
 RUNTIME_OPTIONS_KEY (InstructionSet,      ImageInstructionSet,            kRuntimeISA)
 RUNTIME_OPTIONS_KEY (CompilerCallbacks*,  CompilerCallbacksPtr)  // TDOO: make unique_ptr
 RUNTIME_OPTIONS_KEY (bool (*)(),          HookIsSensitiveThread)
 RUNTIME_OPTIONS_KEY (int32_t (*)(FILE* stream, const char* format, va_list ap), \
                                           HookVfprintf,                   vfprintf)
+// Use _exit instead of exit so that we won't get DCHECK failures in global data
+// destructors. b/28106055.
 RUNTIME_OPTIONS_KEY (void (*)(int32_t status), \
-                                          HookExit,                       exit)
+                                          HookExit,                       _exit)
                                                                           // We don't call abort(3) by default; see
                                                                           // Runtime::Abort.
 RUNTIME_OPTIONS_KEY (void (*)(),          HookAbort,                      nullptr)
diff --git a/runtime/runtime_options.h b/runtime/runtime_options.h
index 88ac00a..5fcb86e 100644
--- a/runtime/runtime_options.h
+++ b/runtime/runtime_options.h
@@ -17,21 +17,20 @@
 #ifndef ART_RUNTIME_RUNTIME_OPTIONS_H_
 #define ART_RUNTIME_RUNTIME_OPTIONS_H_
 
-#include "runtime/base/variant_map.h"
-#include "cmdline/cmdline_types.h"  // TODO: don't need to include this file here
+#include "base/variant_map.h"
+#include "cmdline_types.h"  // TODO: don't need to include this file here
 
 // Map keys
 #include <vector>
 #include <string>
-#include "runtime/base/logging.h"
-#include "cmdline/unit.h"
+#include "base/logging.h"
 #include "jdwp/jdwp.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
-#include "profiler_options.h"
 #include "arch/instruction_set.h"
+#include "jit/profile_saver_options.h"
 #include "verifier/verify_mode.h"
 #include <stdio.h>
 #include <stdarg.h>
@@ -42,7 +41,6 @@
 class DexFile;
 struct XGcOption;
 struct BackgroundGcOption;
-struct TestProfilerOptions;
 
 #define DECLARE_KEY(Type, Name) static const Key<Type> Name
 
@@ -74,7 +72,7 @@
     using Key = RuntimeArgumentMapKey<TValue>;
 
     // List of key declarations, shorthand for 'static const Key<T> Name'
-#define RUNTIME_OPTIONS_KEY(Type, Name, ...) static const Key<Type> Name;
+#define RUNTIME_OPTIONS_KEY(Type, Name, ...) static const Key<Type> (Name);
 #include "runtime_options.def"
   };
 
diff --git a/runtime/safe_map.h b/runtime/safe_map.h
index 7ac17b6..49f80f3 100644
--- a/runtime/safe_map.h
+++ b/runtime/safe_map.h
@@ -19,6 +19,7 @@
 
 #include <map>
 #include <memory>
+#include <type_traits>
 
 #include "base/allocator.h"
 #include "base/logging.h"
@@ -92,23 +93,23 @@
     DCHECK(result.second);  // Check we didn't accidentally overwrite an existing value.
     return result.first;
   }
-  iterator Put(const K& k, const V&& v) {
+  iterator Put(const K& k, V&& v) {
     std::pair<iterator, bool> result = map_.emplace(k, std::move(v));
     DCHECK(result.second);  // Check we didn't accidentally overwrite an existing value.
     return result.first;
   }
 
   // Used to insert a new mapping at a known position for better performance.
-  iterator PutBefore(iterator pos, const K& k, const V& v) {
+  iterator PutBefore(const_iterator pos, const K& k, const V& v) {
     // Check that we're using the correct position and the key is not in the map.
     DCHECK(pos == map_.end() || map_.key_comp()(k, pos->first));
-    DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k));
+    DCHECK(pos == map_.begin() || map_.key_comp()((--const_iterator(pos))->first, k));
     return map_.emplace_hint(pos, k, v);
   }
-  iterator PutBefore(iterator pos, const K& k, const V&& v) {
+  iterator PutBefore(const_iterator pos, const K& k, V&& v) {
     // Check that we're using the correct position and the key is not in the map.
     DCHECK(pos == map_.end() || map_.key_comp()(k, pos->first));
-    DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k));
+    DCHECK(pos == map_.begin() || map_.key_comp()((--const_iterator(pos))->first, k));
     return map_.emplace_hint(pos, k, std::move(v));
   }
 
@@ -124,6 +125,18 @@
     return result.first;
   }
 
+  template <typename CreateFn>
+  V GetOrCreate(const K& k, CreateFn create) {
+    static_assert(std::is_same<V, typename std::result_of<CreateFn()>::type>::value,
+                  "Argument `create` should return a value of type V.");
+    auto lb = lower_bound(k);
+    if (lb != end() && !key_comp()(k, lb->first)) {
+      return lb->second;
+    }
+    auto it = PutBefore(lb, k, create());
+    return it->second;
+  }
+
   bool Equals(const Self& rhs) const {
     return map_ == rhs.map_;
   }
@@ -146,7 +159,7 @@
 
 template<class Key, class T, AllocatorTag kTag, class Compare = std::less<Key>>
 class AllocationTrackingSafeMap : public SafeMap<
-    Key, T, Compare, TrackingAllocator<std::pair<Key, T>, kTag>> {
+    Key, T, Compare, TrackingAllocator<std::pair<const Key, T>, kTag>> {
 };
 
 }  // namespace art
diff --git a/runtime/signal_set.h b/runtime/signal_set.h
index c272514..6f88852 100644
--- a/runtime/signal_set.h
+++ b/runtime/signal_set.h
@@ -38,8 +38,8 @@
   }
 
   void Block() {
-    if (sigprocmask(SIG_BLOCK, &set_, nullptr) == -1) {
-      PLOG(FATAL) << "sigprocmask failed";
+    if (pthread_sigmask(SIG_BLOCK, &set_, nullptr) != 0) {
+      PLOG(FATAL) << "pthread_sigmask failed";
     }
   }
 
diff --git a/runtime/simulator/Android.mk b/runtime/simulator/Android.mk
new file mode 100644
index 0000000..e39af2d
--- /dev/null
+++ b/runtime/simulator/Android.mk
@@ -0,0 +1,111 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(call my-dir)
+
+include art/build/Android.common_build.mk
+
+LIBART_SIMULATOR_SRC_FILES := \
+  code_simulator.cc \
+  code_simulator_arm64.cc
+
+LIBART_SIMULATOR_CFLAGS := \
+  -DVIXL_INCLUDE_SIMULATOR_AARCH64
+
+# $(1): target or host
+# $(2): ndebug or debug
+define build-libart-simulator
+  ifneq ($(1),target)
+    ifneq ($(1),host)
+      $$(error expected target or host for argument 1, received $(1))
+    endif
+  endif
+  ifneq ($(2),ndebug)
+    ifneq ($(2),debug)
+      $$(error expected ndebug or debug for argument 2, received $(2))
+    endif
+  endif
+
+  art_target_or_host := $(1)
+  art_ndebug_or_debug := $(2)
+
+  include $(CLEAR_VARS)
+  ifeq ($$(art_target_or_host),host)
+     LOCAL_IS_HOST_MODULE := true
+  endif
+  LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
+  ifeq ($$(art_ndebug_or_debug),ndebug)
+    LOCAL_MODULE := libart-simulator
+  else # debug
+    LOCAL_MODULE := libartd-simulator
+  endif
+
+  LOCAL_MODULE_TAGS := optional
+  LOCAL_MODULE_CLASS := SHARED_LIBRARIES
+
+  LOCAL_SRC_FILES := $$(LIBART_SIMULATOR_SRC_FILES)
+  LOCAL_CFLAGS := $$(LIBART_SIMULATOR_CFLAGS)
+
+  ifeq ($$(art_target_or_host),target)
+    $(call set-target-local-clang-vars)
+    $(call set-target-local-cflags-vars,$(2))
+  else # host
+    LOCAL_CLANG := $(ART_HOST_CLANG)
+    LOCAL_LDLIBS := $(ART_HOST_LDLIBS)
+    LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
+    LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
+    ifeq ($$(art_ndebug_or_debug),debug)
+      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
+    else
+      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
+    endif
+  endif
+
+  LOCAL_SHARED_LIBRARIES += liblog
+  ifeq ($$(art_ndebug_or_debug),debug)
+    LOCAL_SHARED_LIBRARIES += libartd
+  else
+    LOCAL_SHARED_LIBRARIES += libart
+  endif
+
+  LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
+  LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)
+  LOCAL_MULTILIB := both
+
+  LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
+  LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
+  LOCAL_NATIVE_COVERAGE := $(ART_COVERAGE)
+  # For simulator_arm64.
+  ifeq ($$(art_ndebug_or_debug),debug)
+    LOCAL_SHARED_LIBRARIES += libvixld-arm64
+  else
+    LOCAL_SHARED_LIBRARIES += libvixl-arm64
+  endif
+  ifeq ($$(art_target_or_host),target)
+    include $(BUILD_SHARED_LIBRARY)
+  else # host
+    include $(BUILD_HOST_SHARED_LIBRARY)
+  endif
+endef
+
+ifeq ($(ART_BUILD_HOST_NDEBUG),true)
+  $(eval $(call build-libart-simulator,host,ndebug))
+endif
+ifeq ($(ART_BUILD_HOST_DEBUG),true)
+  $(eval $(call build-libart-simulator,host,debug))
+endif
diff --git a/runtime/simulator/code_simulator.cc b/runtime/simulator/code_simulator.cc
new file mode 100644
index 0000000..1a11160
--- /dev/null
+++ b/runtime/simulator/code_simulator.cc
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "simulator/code_simulator.h"
+#include "simulator/code_simulator_arm64.h"
+
+namespace art {
+
+CodeSimulator* CodeSimulator::CreateCodeSimulator(InstructionSet target_isa) {
+  switch (target_isa) {
+    case kArm64:
+      return arm64::CodeSimulatorArm64::CreateCodeSimulatorArm64();
+    default:
+      return nullptr;
+  }
+}
+
+CodeSimulator* CreateCodeSimulator(InstructionSet target_isa) {
+  return CodeSimulator::CreateCodeSimulator(target_isa);
+}
+
+}  // namespace art
diff --git a/runtime/simulator/code_simulator.h b/runtime/simulator/code_simulator.h
new file mode 100644
index 0000000..bd48909
--- /dev/null
+++ b/runtime/simulator/code_simulator.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_H_
+#define ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_H_
+
+#include "arch/instruction_set.h"
+
+namespace art {
+
+class CodeSimulator {
+ public:
+  CodeSimulator() {}
+  virtual ~CodeSimulator() {}
+  // Returns a null pointer if a simulator cannot be found for target_isa.
+  static CodeSimulator* CreateCodeSimulator(InstructionSet target_isa);
+
+  virtual void RunFrom(intptr_t code_buffer) = 0;
+
+  // Get return value according to C ABI.
+  virtual bool GetCReturnBool() const = 0;
+  virtual int32_t GetCReturnInt32() const = 0;
+  virtual int64_t GetCReturnInt64() const = 0;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(CodeSimulator);
+};
+
+extern "C" CodeSimulator* CreateCodeSimulator(InstructionSet target_isa);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_H_
diff --git a/runtime/simulator/code_simulator_arm64.cc b/runtime/simulator/code_simulator_arm64.cc
new file mode 100644
index 0000000..897d4f5
--- /dev/null
+++ b/runtime/simulator/code_simulator_arm64.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "simulator/code_simulator_arm64.h"
+
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
+
+namespace art {
+namespace arm64 {
+
+// VIXL has not been tested on 32bit architectures, so Simulator is not always
+// available. To avoid linker error on these architectures, we check if we can simulate
+// in the beginning of following methods, with compile time constant `kCanSimulate`.
+// TODO: when Simulator is always available, remove the these checks.
+
+CodeSimulatorArm64* CodeSimulatorArm64::CreateCodeSimulatorArm64() {
+  if (kCanSimulate) {
+    return new CodeSimulatorArm64();
+  } else {
+    return nullptr;
+  }
+}
+
+CodeSimulatorArm64::CodeSimulatorArm64()
+    : CodeSimulator(), decoder_(nullptr), simulator_(nullptr) {
+  DCHECK(kCanSimulate);
+  decoder_ = new Decoder();
+  simulator_ = new Simulator(decoder_);
+}
+
+CodeSimulatorArm64::~CodeSimulatorArm64() {
+  DCHECK(kCanSimulate);
+  delete simulator_;
+  delete decoder_;
+}
+
+void CodeSimulatorArm64::RunFrom(intptr_t code_buffer) {
+  DCHECK(kCanSimulate);
+  simulator_->RunFrom(reinterpret_cast<const Instruction*>(code_buffer));
+}
+
+bool CodeSimulatorArm64::GetCReturnBool() const {
+  DCHECK(kCanSimulate);
+  return simulator_->ReadWRegister(0);
+}
+
+int32_t CodeSimulatorArm64::GetCReturnInt32() const {
+  DCHECK(kCanSimulate);
+  return simulator_->ReadWRegister(0);
+}
+
+int64_t CodeSimulatorArm64::GetCReturnInt64() const {
+  DCHECK(kCanSimulate);
+  return simulator_->ReadXRegister(0);
+}
+
+}  // namespace arm64
+}  // namespace art
diff --git a/runtime/simulator/code_simulator_arm64.h b/runtime/simulator/code_simulator_arm64.h
new file mode 100644
index 0000000..59ea34f
--- /dev/null
+++ b/runtime/simulator/code_simulator_arm64.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_ARM64_H_
+#define ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_ARM64_H_
+
+#include "memory"
+#include "simulator/code_simulator.h"
+
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch64/simulator-aarch64.h"
+#pragma GCC diagnostic pop
+
+namespace art {
+namespace arm64 {
+
+class CodeSimulatorArm64 : public CodeSimulator {
+ public:
+  static CodeSimulatorArm64* CreateCodeSimulatorArm64();
+  virtual ~CodeSimulatorArm64();
+
+  void RunFrom(intptr_t code_buffer) OVERRIDE;
+
+  bool GetCReturnBool() const OVERRIDE;
+  int32_t GetCReturnInt32() const OVERRIDE;
+  int64_t GetCReturnInt64() const OVERRIDE;
+
+ private:
+  CodeSimulatorArm64();
+
+  vixl::aarch64::Decoder* decoder_;
+  vixl::aarch64::Simulator* simulator_;
+
+  // TODO: Enable CodeSimulatorArm64 for more host ISAs once Simulator supports them.
+  static constexpr bool kCanSimulate = (kRuntimeISA == kX86_64);
+
+  DISALLOW_COPY_AND_ASSIGN(CodeSimulatorArm64);
+};
+
+}  // namespace arm64
+}  // namespace art
+
+#endif  // ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_ARM64_H_
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 9359d27..ababf78 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -18,10 +18,10 @@
 
 #include "arch/context.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/hex_dump.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
-#include "gc_map.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
 #include "jit/jit.h"
@@ -36,7 +36,6 @@
 #include "thread.h"
 #include "thread_list.h"
 #include "verify_object-inl.h"
-#include "vmap_table.h"
 
 namespace art {
 
@@ -117,7 +116,7 @@
   const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
   uint32_t native_pc_offset = method_header->NativeQuickPcOffset(cur_quick_frame_pc_);
   CodeInfo code_info = method_header->GetOptimizedCodeInfo();
-  StackMapEncoding encoding = code_info.ExtractEncoding();
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
   StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
   DCHECK(stack_map.IsValid());
   return code_info.GetInlineInfoOf(stack_map, encoding);
@@ -130,7 +129,13 @@
     if (IsInInlinedFrame()) {
       size_t depth_in_stack_map = current_inlining_depth_ - 1;
       InlineInfo inline_info = GetCurrentInlineInfo();
-      return GetResolvedMethod(*GetCurrentQuickFrame(), inline_info, depth_in_stack_map);
+      const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+      CodeInfoEncoding encoding = method_header->GetOptimizedCodeInfo().ExtractEncoding();
+      DCHECK(walk_kind_ != StackWalkKind::kSkipInlinedFrames);
+      return GetResolvedMethod(*GetCurrentQuickFrame(),
+                               inline_info,
+                               encoding.inline_info_encoding,
+                               depth_in_stack_map);
     } else {
       return *cur_quick_frame_;
     }
@@ -144,7 +149,10 @@
   } else if (cur_quick_frame_ != nullptr) {
     if (IsInInlinedFrame()) {
       size_t depth_in_stack_map = current_inlining_depth_ - 1;
-      return GetCurrentInlineInfo().GetDexPcAtDepth(depth_in_stack_map);
+      const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+      CodeInfoEncoding encoding = method_header->GetOptimizedCodeInfo().ExtractEncoding();
+      return GetCurrentInlineInfo().GetDexPcAtDepth(encoding.inline_info_encoding,
+                                                    depth_in_stack_map);
     } else if (cur_oat_quick_method_header_ == nullptr) {
       return DexFile::kDexNoIndex;
     } else {
@@ -160,7 +168,7 @@
     SHARED_REQUIRES(Locks::mutator_lock_);
 
 mirror::Object* StackVisitor::GetThisObject() const {
-  DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
+  DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
   ArtMethod* m = GetMethod();
   if (m->IsStatic()) {
     return nullptr;
@@ -200,33 +208,6 @@
   return GetCurrentOatQuickMethodHeader()->NativeQuickPcOffset(cur_quick_frame_pc_);
 }
 
-bool StackVisitor::IsReferenceVReg(ArtMethod* m, uint16_t vreg) {
-  DCHECK_EQ(m, GetMethod());
-  // Process register map (which native and runtime methods don't have)
-  if (m->IsNative() || m->IsRuntimeMethod() || m->IsProxyMethod()) {
-    return false;
-  }
-  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
-  if (method_header->IsOptimized()) {
-    return true;  // TODO: Implement.
-  }
-  const uint8_t* native_gc_map = method_header->GetNativeGcMap();
-  CHECK(native_gc_map != nullptr) << PrettyMethod(m);
-  const DexFile::CodeItem* code_item = m->GetCodeItem();
-  // Can't be null or how would we compile its instructions?
-  DCHECK(code_item != nullptr) << PrettyMethod(m);
-  NativePcOffsetToReferenceMap map(native_gc_map);
-  size_t num_regs = std::min(map.RegWidth() * 8, static_cast<size_t>(code_item->registers_size_));
-  const uint8_t* reg_bitmap = nullptr;
-  if (num_regs > 0) {
-    uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc());
-    reg_bitmap = map.FindBitMap(native_pc_offset);
-    DCHECK(reg_bitmap != nullptr);
-  }
-  // Does this register hold a reference?
-  return vreg < num_regs && TestBitmap(vreg, reg_bitmap);
-}
-
 bool StackVisitor::GetVRegFromDebuggerShadowFrame(uint16_t vreg,
                                                   VRegKind kind,
                                                   uint32_t* val) const {
@@ -258,37 +239,16 @@
     if (GetVRegFromDebuggerShadowFrame(vreg, kind, val)) {
       return true;
     }
-    if (cur_oat_quick_method_header_->IsOptimized()) {
-      return GetVRegFromOptimizedCode(m, vreg, kind, val);
-    } else {
-      return GetVRegFromQuickCode(m, vreg, kind, val);
-    }
+    DCHECK(cur_oat_quick_method_header_->IsOptimized());
+    return GetVRegFromOptimizedCode(m, vreg, kind, val);
   } else {
     DCHECK(cur_shadow_frame_ != nullptr);
-    *val = cur_shadow_frame_->GetVReg(vreg);
-    return true;
-  }
-}
-
-bool StackVisitor::GetVRegFromQuickCode(ArtMethod* m, uint16_t vreg, VRegKind kind,
-                                        uint32_t* val) const {
-  DCHECK_EQ(m, GetMethod());
-  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
-  QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
-  const VmapTable vmap_table(method_header->GetVmapTable());
-  uint32_t vmap_offset;
-  // TODO: IsInContext stops before spotting floating point registers.
-  if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
-    bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
-    uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
-    uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
-    return GetRegisterIfAccessible(reg, kind, val);
-  } else {
-    const DexFile::CodeItem* code_item = m->GetCodeItem();
-    DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
-                                                      // its instructions?
-    *val = *GetVRegAddrFromQuickCode(cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
-                                     frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
+    if (kind == kReferenceVReg) {
+      *val = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(
+          cur_shadow_frame_->GetVRegReference(vreg)));
+    } else {
+      *val = cur_shadow_frame_->GetVReg(vreg);
+    }
     return true;
   }
 }
@@ -303,7 +263,7 @@
   DCHECK_LT(vreg, code_item->registers_size_);
   const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
   CodeInfo code_info = method_header->GetOptimizedCodeInfo();
-  StackMapEncoding encoding = code_info.ExtractEncoding();
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
 
   uint32_t native_pc_offset = method_header->NativeQuickPcOffset(cur_quick_frame_pc_);
   StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
@@ -317,6 +277,9 @@
                                            number_of_dex_registers)
       : code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
 
+  if (!dex_register_map.IsValid()) {
+    return false;
+  }
   DexRegisterLocation::Kind location_kind =
       dex_register_map.GetLocationKind(vreg, number_of_dex_registers, code_info, encoding);
   switch (location_kind) {
@@ -344,12 +307,11 @@
       return false;
     default:
       LOG(FATAL)
-          << "Unexpected location kind"
-          << DexRegisterLocation::PrettyDescriptor(
-                dex_register_map.GetLocationInternalKind(vreg,
-                                                         number_of_dex_registers,
-                                                         code_info,
-                                                         encoding));
+          << "Unexpected location kind "
+          << dex_register_map.GetLocationInternalKind(vreg,
+                                                      number_of_dex_registers,
+                                                      code_info,
+                                                      encoding);
       UNREACHABLE();
   }
 }
@@ -410,11 +372,8 @@
   if (cur_quick_frame_ != nullptr) {
     DCHECK(context_ != nullptr);  // You can't reliably read registers without a context.
     DCHECK(m == GetMethod());
-    if (cur_oat_quick_method_header_->IsOptimized()) {
-      return GetVRegPairFromOptimizedCode(m, vreg, kind_lo, kind_hi, val);
-    } else {
-      return GetVRegPairFromQuickCode(m, vreg, kind_lo, kind_hi, val);
-    }
+    DCHECK(cur_oat_quick_method_header_->IsOptimized());
+    return GetVRegPairFromOptimizedCode(m, vreg, kind_lo, kind_hi, val);
   } else {
     DCHECK(cur_shadow_frame_ != nullptr);
     *val = cur_shadow_frame_->GetVRegLong(vreg);
@@ -422,33 +381,6 @@
   }
 }
 
-bool StackVisitor::GetVRegPairFromQuickCode(ArtMethod* m, uint16_t vreg, VRegKind kind_lo,
-                                            VRegKind kind_hi, uint64_t* val) const {
-  DCHECK_EQ(m, GetMethod());
-  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
-  QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
-  const VmapTable vmap_table(method_header->GetVmapTable());
-  uint32_t vmap_offset_lo, vmap_offset_hi;
-  // TODO: IsInContext stops before spotting floating point registers.
-  if (vmap_table.IsInContext(vreg, kind_lo, &vmap_offset_lo) &&
-      vmap_table.IsInContext(vreg + 1, kind_hi, &vmap_offset_hi)) {
-    bool is_float = (kind_lo == kDoubleLoVReg);
-    uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
-    uint32_t reg_lo = vmap_table.ComputeRegister(spill_mask, vmap_offset_lo, kind_lo);
-    uint32_t reg_hi = vmap_table.ComputeRegister(spill_mask, vmap_offset_hi, kind_hi);
-    return GetRegisterPairIfAccessible(reg_lo, reg_hi, kind_lo, val);
-  } else {
-    const DexFile::CodeItem* code_item = m->GetCodeItem();
-    DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
-                                                      // its instructions?
-    uint32_t* addr = GetVRegAddrFromQuickCode(
-        cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
-        frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
-    *val = *reinterpret_cast<uint64_t*>(addr);
-    return true;
-  }
-}
-
 bool StackVisitor::GetVRegPairFromOptimizedCode(ArtMethod* m, uint16_t vreg,
                                                 VRegKind kind_lo, VRegKind kind_hi,
                                                 uint64_t* val) const {
@@ -481,52 +413,10 @@
   return true;
 }
 
-bool StackVisitor::SetVReg(ArtMethod* m, uint16_t vreg, uint32_t new_value,
+bool StackVisitor::SetVReg(ArtMethod* m,
+                           uint16_t vreg,
+                           uint32_t new_value,
                            VRegKind kind) {
-  if (cur_quick_frame_ != nullptr) {
-    DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
-    DCHECK(m == GetMethod());
-    if (cur_oat_quick_method_header_->IsOptimized()) {
-      return false;
-    } else {
-      return SetVRegFromQuickCode(m, vreg, new_value, kind);
-    }
-  } else {
-    cur_shadow_frame_->SetVReg(vreg, new_value);
-    return true;
-  }
-}
-
-bool StackVisitor::SetVRegFromQuickCode(ArtMethod* m, uint16_t vreg, uint32_t new_value,
-                                        VRegKind kind) {
-  DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
-  DCHECK(m == GetMethod());
-  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
-  QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
-  const VmapTable vmap_table(method_header->GetVmapTable());
-  uint32_t vmap_offset;
-  // TODO: IsInContext stops before spotting floating point registers.
-  if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
-    bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
-    uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
-    uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
-    return SetRegisterIfAccessible(reg, new_value, kind);
-  } else {
-    const DexFile::CodeItem* code_item = m->GetCodeItem();
-    DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
-                                                      // its instructions?
-    uint32_t* addr = GetVRegAddrFromQuickCode(
-        cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
-        frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
-    *addr = new_value;
-    return true;
-  }
-}
-
-bool StackVisitor::SetVRegFromDebugger(ArtMethod* m,
-                                       uint16_t vreg,
-                                       uint32_t new_value,
-                                       VRegKind kind) {
   const DexFile::CodeItem* code_item = m->GetCodeItem();
   if (code_item == nullptr) {
     return false;
@@ -551,93 +441,11 @@
   return true;
 }
 
-bool StackVisitor::SetRegisterIfAccessible(uint32_t reg, uint32_t new_value, VRegKind kind) {
-  const bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
-  if (!IsAccessibleRegister(reg, is_float)) {
-    return false;
-  }
-  const bool target64 = Is64BitInstructionSet(kRuntimeISA);
-
-  // Create a new value that can hold both low 32 and high 32 bits, in
-  // case we are running 64 bits.
-  uintptr_t full_new_value = new_value;
-  // Deal with 32 or 64-bit wide registers in a way that builds on all targets.
-  if (target64) {
-    bool wide_lo = (kind == kLongLoVReg) || (kind == kDoubleLoVReg);
-    bool wide_hi = (kind == kLongHiVReg) || (kind == kDoubleHiVReg);
-    if (wide_lo || wide_hi) {
-      uintptr_t old_reg_val = GetRegister(reg, is_float);
-      uint64_t new_vreg_portion = static_cast<uint64_t>(new_value);
-      uint64_t old_reg_val_as_wide = static_cast<uint64_t>(old_reg_val);
-      uint64_t mask = 0xffffffff;
-      if (wide_lo) {
-        mask = mask << 32;
-      } else {
-        new_vreg_portion = new_vreg_portion << 32;
-      }
-      full_new_value = static_cast<uintptr_t>((old_reg_val_as_wide & mask) | new_vreg_portion);
-    }
-  }
-  SetRegister(reg, full_new_value, is_float);
-  return true;
-}
-
-bool StackVisitor::SetVRegPair(ArtMethod* m, uint16_t vreg, uint64_t new_value,
-                               VRegKind kind_lo, VRegKind kind_hi) {
-  if (kind_lo == kLongLoVReg) {
-    DCHECK_EQ(kind_hi, kLongHiVReg);
-  } else if (kind_lo == kDoubleLoVReg) {
-    DCHECK_EQ(kind_hi, kDoubleHiVReg);
-  } else {
-    LOG(FATAL) << "Expected long or double: kind_lo=" << kind_lo << ", kind_hi=" << kind_hi;
-  }
-  if (cur_quick_frame_ != nullptr) {
-    DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
-    DCHECK(m == GetMethod());
-    if (cur_oat_quick_method_header_->IsOptimized()) {
-      return false;
-    } else {
-      return SetVRegPairFromQuickCode(m, vreg, new_value, kind_lo, kind_hi);
-    }
-  } else {
-    DCHECK(cur_shadow_frame_ != nullptr);
-    cur_shadow_frame_->SetVRegLong(vreg, new_value);
-    return true;
-  }
-}
-
-bool StackVisitor::SetVRegPairFromQuickCode(
-    ArtMethod* m, uint16_t vreg, uint64_t new_value, VRegKind kind_lo, VRegKind kind_hi) {
-  DCHECK_EQ(m, GetMethod());
-  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
-  QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
-  const VmapTable vmap_table(method_header->GetVmapTable());
-  uint32_t vmap_offset_lo, vmap_offset_hi;
-  // TODO: IsInContext stops before spotting floating point registers.
-  if (vmap_table.IsInContext(vreg, kind_lo, &vmap_offset_lo) &&
-      vmap_table.IsInContext(vreg + 1, kind_hi, &vmap_offset_hi)) {
-    bool is_float = (kind_lo == kDoubleLoVReg);
-    uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
-    uint32_t reg_lo = vmap_table.ComputeRegister(spill_mask, vmap_offset_lo, kind_lo);
-    uint32_t reg_hi = vmap_table.ComputeRegister(spill_mask, vmap_offset_hi, kind_hi);
-    return SetRegisterPairIfAccessible(reg_lo, reg_hi, new_value, is_float);
-  } else {
-    const DexFile::CodeItem* code_item = m->GetCodeItem();
-    DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
-                                                      // its instructions?
-    uint32_t* addr = GetVRegAddrFromQuickCode(
-        cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
-        frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
-    *reinterpret_cast<uint64_t*>(addr) = new_value;
-    return true;
-  }
-}
-
-bool StackVisitor::SetVRegPairFromDebugger(ArtMethod* m,
-                                           uint16_t vreg,
-                                           uint64_t new_value,
-                                           VRegKind kind_lo,
-                                           VRegKind kind_hi) {
+bool StackVisitor::SetVRegPair(ArtMethod* m,
+                               uint16_t vreg,
+                               uint64_t new_value,
+                               VRegKind kind_lo,
+                               VRegKind kind_hi) {
   if (kind_lo == kLongLoVReg) {
     DCHECK_EQ(kind_hi, kLongHiVReg);
   } else if (kind_lo == kDoubleLoVReg) {
@@ -666,25 +474,6 @@
   return true;
 }
 
-bool StackVisitor::SetRegisterPairIfAccessible(uint32_t reg_lo, uint32_t reg_hi,
-                                               uint64_t new_value, bool is_float) {
-  if (!IsAccessibleRegister(reg_lo, is_float) || !IsAccessibleRegister(reg_hi, is_float)) {
-    return false;
-  }
-  uintptr_t new_value_lo = static_cast<uintptr_t>(new_value & 0xFFFFFFFF);
-  uintptr_t new_value_hi = static_cast<uintptr_t>(new_value >> 32);
-  bool target64 = Is64BitInstructionSet(kRuntimeISA);
-  // Deal with 32 or 64-bit wide registers in a way that builds on all targets.
-  if (target64) {
-    DCHECK_EQ(reg_lo, reg_hi);
-    SetRegister(reg_lo, new_value, is_float);
-  } else {
-    SetRegister(reg_lo, new_value_lo, is_float);
-    SetRegister(reg_hi, new_value_hi, is_float);
-  }
-  return true;
-}
-
 bool StackVisitor::IsAccessibleGPR(uint32_t reg) const {
   DCHECK(context_ != nullptr);
   return context_->IsAccessibleGPR(reg);
@@ -702,12 +491,6 @@
   return context_->GetGPR(reg);
 }
 
-void StackVisitor::SetGPR(uint32_t reg, uintptr_t value) {
-  DCHECK(cur_quick_frame_ != nullptr) << "This is a quick frame routine";
-  DCHECK(context_ != nullptr);
-  context_->SetGPR(reg, value);
-}
-
 bool StackVisitor::IsAccessibleFPR(uint32_t reg) const {
   DCHECK(context_ != nullptr);
   return context_->IsAccessibleFPR(reg);
@@ -719,12 +502,6 @@
   return context_->GetFPR(reg);
 }
 
-void StackVisitor::SetFPR(uint32_t reg, uintptr_t value) {
-  DCHECK(cur_quick_frame_ != nullptr) << "This is a quick frame routine";
-  DCHECK(context_ != nullptr);
-  context_->SetFPR(reg, value);
-}
-
 uintptr_t StackVisitor::GetReturnPc() const {
   uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame());
   DCHECK(sp != nullptr);
@@ -855,20 +632,18 @@
 
   // If we are the JIT then we may have just compiled the method after the
   // IsQuickToInterpreterBridge check.
-  jit::Jit* const jit = Runtime::Current()->GetJit();
-  if (jit != nullptr &&
-      jit->GetCodeCache()->ContainsCodePtr(reinterpret_cast<const void*>(code))) {
+  Runtime* runtime = Runtime::Current();
+  if (runtime->UseJitCompilation() && runtime->GetJit()->GetCodeCache()->ContainsPc(code)) {
     return;
   }
 
-  uint32_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(
-      EntryPointToCodePointer(code))[-1].code_size_;
+  uint32_t code_size = OatQuickMethodHeader::FromEntryPoint(code)->code_size_;
   uintptr_t code_start = reinterpret_cast<uintptr_t>(code);
   CHECK(code_start <= pc && pc <= (code_start + code_size))
       << PrettyMethod(method)
       << " pc=" << std::hex << pc
-      << " code=" << code
-      << " size=" << code_size;
+      << " code_start=" << code_start
+      << " code_size=" << code_size;
 }
 
 void StackVisitor::SanityCheckFrame() const {
@@ -889,7 +664,7 @@
       // Check class linker linear allocs.
       mirror::Class* klass = method->GetDeclaringClass();
       LinearAlloc* const class_linear_alloc = (klass != nullptr)
-          ? ClassLinker::GetAllocatorForClassLoader(klass->GetClassLoader())
+          ? runtime->GetClassLinker()->GetAllocatorForClassLoader(klass->GetClassLoader())
           : linear_alloc;
       if (!class_linear_alloc->Contains(method)) {
         // Check image space.
@@ -898,8 +673,10 @@
           if (space->IsImageSpace()) {
             auto* image_space = space->AsImageSpace();
             const auto& header = image_space->GetImageHeader();
-            const auto* methods = &header.GetMethodsSection();
-            if (methods->Contains(reinterpret_cast<const uint8_t*>(method) - image_space->Begin())) {
+            const ImageSection& methods = header.GetMethodsSection();
+            const ImageSection& runtime_methods = header.GetRuntimeMethodsSection();
+            const size_t offset =  reinterpret_cast<const uint8_t*>(method) - image_space->Begin();
+            if (methods.Contains(offset) || runtime_methods.Contains(offset)) {
               in_image = true;
               break;
             }
@@ -952,7 +729,7 @@
   Runtime* runtime = Runtime::Current();
 
   if (method->IsAbstract()) {
-    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsAndArgs);
   }
 
   // This goes before IsProxyMethod since runtime methods have a null declaring class.
@@ -960,32 +737,26 @@
     return runtime->GetRuntimeMethodFrameInfo(method);
   }
 
-  // For Proxy method we add special handling for the direct method case  (there is only one
-  // direct method - constructor). Direct method is cloned from original
-  // java.lang.reflect.Proxy class together with code and as a result it is executed as usual
-  // quick compiled method without any stubs. So the frame info should be returned as it is a
-  // quick method not a stub. However, if instrumentation stubs are installed, the
-  // instrumentation->GetQuickCodeFor() returns the artQuickProxyInvokeHandler instead of an
-  // oat code pointer, thus we have to add a special case here.
   if (method->IsProxyMethod()) {
-    if (method->IsDirect()) {
-      CHECK(method->IsConstructor());
-      const void* code_pointer =
-          EntryPointToCodePointer(method->GetEntryPointFromQuickCompiledCode());
-      return reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].frame_info_;
-    } else {
-      return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
-    }
+    // There is only one direct method of a proxy class: the constructor. A direct method is
+    // cloned from the original java.lang.reflect.Proxy and is executed as usual quick
+    // compiled method without any stubs. Therefore the method must have a OatQuickMethodHeader.
+    DCHECK(!method->IsDirect() && !method->IsConstructor())
+        << "Constructors of proxy classes must have a OatQuickMethodHeader";
+    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsAndArgs);
   }
 
-  ClassLinker* class_linker = runtime->GetClassLinker();
+  // The only remaining case is if the method is native and uses the generic JNI stub.
   DCHECK(method->IsNative());
-  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(method, sizeof(void*));
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(method,
+                                                                           kRuntimePointerSize);
   DCHECK(class_linker->IsQuickGenericJniStub(entry_point)) << PrettyMethod(method);
   // Generic JNI frame.
   uint32_t handle_refs = GetNumberOfReferenceArgsWithoutReceiver(method) + 1;
   size_t scope_size = HandleScope::SizeOf(handle_refs);
-  QuickMethodFrameInfo callee_info = runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+  QuickMethodFrameInfo callee_info =
+      runtime->GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsAndArgs);
 
   // Callee saves + handle scope + method ref + alignment
   // Note: -sizeof(void*) since callee-save frame stores a whole method pointer.
@@ -1021,14 +792,14 @@
             && (cur_oat_quick_method_header_ != nullptr)
             && cur_oat_quick_method_header_->IsOptimized()) {
           CodeInfo code_info = cur_oat_quick_method_header_->GetOptimizedCodeInfo();
-          StackMapEncoding encoding = code_info.ExtractEncoding();
+          CodeInfoEncoding encoding = code_info.ExtractEncoding();
           uint32_t native_pc_offset =
               cur_oat_quick_method_header_->NativeQuickPcOffset(cur_quick_frame_pc_);
           StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
-          if (stack_map.IsValid() && stack_map.HasInlineInfo(encoding)) {
+          if (stack_map.IsValid() && stack_map.HasInlineInfo(encoding.stack_map_encoding)) {
             InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
             DCHECK_EQ(current_inlining_depth_, 0u);
-            for (current_inlining_depth_ = inline_info.GetDepth();
+            for (current_inlining_depth_ = inline_info.GetDepth(encoding.inline_info_encoding);
                  current_inlining_depth_ != 0;
                  --current_inlining_depth_) {
               bool should_continue = VisitFrame();
@@ -1063,10 +834,12 @@
             const instrumentation::InstrumentationStackFrame& instrumentation_frame =
                 GetInstrumentationStackFrame(thread_, instrumentation_stack_depth);
             instrumentation_stack_depth++;
-            if (GetMethod() == Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveAll)) {
+            if (GetMethod() ==
+                Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves)) {
               // Skip runtime save all callee frames which are used to deliver exceptions.
             } else if (instrumentation_frame.interpreter_entry_) {
-              ArtMethod* callee = Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
+              ArtMethod* callee =
+                  Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs);
               CHECK_EQ(GetMethod(), callee) << "Expected: " << PrettyMethod(callee) << " Found: "
                                             << PrettyMethod(GetMethod());
             } else {
@@ -1140,7 +913,7 @@
 int StackVisitor::GetVRegOffsetFromQuickCode(const DexFile::CodeItem* code_item,
                                              uint32_t core_spills, uint32_t fp_spills,
                                              size_t frame_size, int reg, InstructionSet isa) {
-  size_t pointer_size = InstructionSetPointerSize(isa);
+  PointerSize pointer_size = InstructionSetPointerSize(isa);
   if (kIsDebugBuild) {
     auto* runtime = Runtime::Current();
     if (runtime != nullptr) {
@@ -1163,7 +936,8 @@
      * Special temporaries may have custom locations and the logic above deals with that.
      * However, non-special temporaries are placed relative to the outs.
      */
-    int temps_start = code_item->outs_size_ * sizeof(uint32_t) + pointer_size /* art method */;
+    int temps_start = code_item->outs_size_ * sizeof(uint32_t)
+        + static_cast<size_t>(pointer_size) /* art method */;
     int relative_offset = (reg - (temp_threshold + max_num_special_temps)) * sizeof(uint32_t);
     return temps_start + relative_offset;
   }  else if (reg < num_regs) {
@@ -1171,11 +945,12 @@
     return locals_start + (reg * sizeof(uint32_t));
   } else {
     // Handle ins.
-    return frame_size + ((reg - num_regs) * sizeof(uint32_t)) + pointer_size /* art method */;
+    return frame_size + ((reg - num_regs) * sizeof(uint32_t))
+        + static_cast<size_t>(pointer_size) /* art method */;
   }
 }
 
-void LockCountData::AddMonitorInternal(Thread* self, mirror::Object* obj) {
+void LockCountData::AddMonitor(Thread* self, mirror::Object* obj) {
   if (obj == nullptr) {
     return;
   }
@@ -1192,7 +967,7 @@
   monitors_->push_back(obj);
 }
 
-void LockCountData::RemoveMonitorInternal(Thread* self, const mirror::Object* obj) {
+void LockCountData::RemoveMonitorOrThrow(Thread* self, const mirror::Object* obj) {
   if (obj == nullptr) {
     return;
   }
@@ -1225,7 +1000,7 @@
   obj->MonitorExit(self);
 }
 
-bool LockCountData::CheckAllMonitorsReleasedInternal(Thread* self) {
+bool LockCountData::CheckAllMonitorsReleasedOrThrow(Thread* self) {
   DCHECK(self != nullptr);
   if (monitors_ != nullptr) {
     if (!monitors_->empty()) {
diff --git a/runtime/stack.h b/runtime/stack.h
index 1276b24..cf33ae1 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -80,39 +80,18 @@
  public:
   // Add the given object to the list of monitors, that is, objects that have been locked. This
   // will not throw (but be skipped if there is an exception pending on entry).
-  template <bool kLockCounting>
-  void AddMonitor(Thread* self, mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(self != nullptr);
-    if (!kLockCounting) {
-      return;
-    }
-    AddMonitorInternal(self, obj);
-  }
+  void AddMonitor(Thread* self, mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Try to remove the given object from the monitor list, indicating an unlock operation.
   // This will throw an IllegalMonitorStateException (clearing any already pending exception), in
   // case that there wasn't a lock recorded for the object.
-  template <bool kLockCounting>
   void RemoveMonitorOrThrow(Thread* self,
-                            const mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(self != nullptr);
-    if (!kLockCounting) {
-      return;
-    }
-    RemoveMonitorInternal(self, obj);
-  }
+                            const mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Check whether all acquired monitors have been released. This will potentially throw an
   // IllegalMonitorStateException, clearing any already pending exception. Returns true if the
   // check shows that everything is OK wrt/ lock counting, false otherwise.
-  template <bool kLockCounting>
-  bool CheckAllMonitorsReleasedOrThrow(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(self != nullptr);
-    if (!kLockCounting) {
-      return true;
-    }
-    return CheckAllMonitorsReleasedInternal(self);
-  }
+  bool CheckAllMonitorsReleasedOrThrow(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
 
   template <typename T, typename... Args>
   void VisitMonitors(T visitor, Args&&... args) SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -125,12 +104,6 @@
   }
 
  private:
-  // Internal implementations.
-  void AddMonitorInternal(Thread* self, mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
-  void RemoveMonitorInternal(Thread* self, const mirror::Object* obj)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  bool CheckAllMonitorsReleasedInternal(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
-
   // Stores references to the locked-on objects. As noted, this should be visited during thread
   // marking.
   std::unique_ptr<std::vector<mirror::Object*>> monitors_;
@@ -184,11 +157,28 @@
   }
 
   uint32_t GetDexPC() const {
-    return dex_pc_;
+    return (dex_pc_ptr_ == nullptr) ? dex_pc_ : dex_pc_ptr_ - code_item_->insns_;
+  }
+
+  int16_t GetCachedHotnessCountdown() const {
+    return cached_hotness_countdown_;
+  }
+
+  void SetCachedHotnessCountdown(int16_t cached_hotness_countdown) {
+    cached_hotness_countdown_ = cached_hotness_countdown;
+  }
+
+  int16_t GetHotnessCountdown() const {
+    return hotness_countdown_;
+  }
+
+  void SetHotnessCountdown(int16_t hotness_countdown) {
+    hotness_countdown_ = hotness_countdown;
   }
 
   void SetDexPC(uint32_t dex_pc) {
     dex_pc_ = dex_pc;
+    dex_pc_ptr_ = nullptr;
   }
 
   ShadowFrame* GetLink() const {
@@ -206,6 +196,24 @@
     return *reinterpret_cast<const int32_t*>(vreg);
   }
 
+  uint32_t* GetVRegAddr(size_t i) {
+    return &vregs_[i];
+  }
+
+  uint32_t* GetShadowRefAddr(size_t i) {
+    DCHECK(HasReferenceArray());
+    DCHECK_LT(i, NumberOfVRegs());
+    return &vregs_[i + NumberOfVRegs()];
+  }
+
+  void SetCodeItem(const DexFile::CodeItem* code_item) {
+    code_item_ = code_item;
+  }
+
+  const DexFile::CodeItem* GetCodeItem() const {
+    return code_item_;
+  }
+
   float GetVRegFloat(size_t i) const {
     DCHECK_LT(i, NumberOfVRegs());
     // NOTE: Strict-aliasing?
@@ -346,6 +354,10 @@
     return lock_count_data_;
   }
 
+  static size_t LockCountDataOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, lock_count_data_);
+  }
+
   static size_t LinkOffset() {
     return OFFSETOF_MEMBER(ShadowFrame, link_);
   }
@@ -366,6 +378,26 @@
     return OFFSETOF_MEMBER(ShadowFrame, vregs_);
   }
 
+  static size_t ResultRegisterOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, result_register_);
+  }
+
+  static size_t DexPCPtrOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, dex_pc_ptr_);
+  }
+
+  static size_t CodeItemOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, code_item_);
+  }
+
+  static size_t CachedHotnessCountdownOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, cached_hotness_countdown_);
+  }
+
+  static size_t HotnessCountdownOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, hotness_countdown_);
+  }
+
   // Create ShadowFrame for interpreter using provided memory.
   static ShadowFrame* CreateShadowFrameImpl(uint32_t num_vregs,
                                             ShadowFrame* link,
@@ -375,10 +407,23 @@
     return new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true);
   }
 
+  const uint16_t* GetDexPCPtr() {
+    return dex_pc_ptr_;
+  }
+
+  void SetDexPCPtr(uint16_t* dex_pc_ptr) {
+    dex_pc_ptr_ = dex_pc_ptr;
+  }
+
+  JValue* GetResultRegister() {
+    return result_register_;
+  }
+
  private:
   ShadowFrame(uint32_t num_vregs, ShadowFrame* link, ArtMethod* method,
               uint32_t dex_pc, bool has_reference_array)
-      : number_of_vregs_(num_vregs), link_(link), method_(method), dex_pc_(dex_pc) {
+      : link_(link), method_(method), result_register_(nullptr), dex_pc_ptr_(nullptr),
+        code_item_(nullptr), number_of_vregs_(num_vregs), dex_pc_(dex_pc) {
     // TODO(iam): Remove this parameter, it's an an artifact of portable removal
     DCHECK(has_reference_array);
     if (has_reference_array) {
@@ -399,12 +444,17 @@
         const_cast<const ShadowFrame*>(this)->References());
   }
 
-  const uint32_t number_of_vregs_;
   // Link to previous shadow frame or null.
   ShadowFrame* link_;
   ArtMethod* method_;
-  uint32_t dex_pc_;
+  JValue* result_register_;
+  const uint16_t* dex_pc_ptr_;
+  const DexFile::CodeItem* code_item_;
   LockCountData lock_count_data_;  // This may contain GC roots when lock counting is active.
+  const uint32_t number_of_vregs_;
+  uint32_t dex_pc_;
+  int16_t cached_hotness_countdown_;
+  int16_t hotness_countdown_;
 
   // This is a two-part array:
   //  - [0..number_of_vregs) holds the raw virtual registers, and each element here is always 4
@@ -590,9 +640,6 @@
   bool GetNextMethodAndDexPc(ArtMethod** next_method, uint32_t* next_dex_pc)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool IsReferenceVReg(ArtMethod* m, uint16_t vreg)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   bool GetVReg(ArtMethod* m, uint16_t vreg, VRegKind kind, uint32_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -600,22 +647,18 @@
                    uint64_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Values will be set in debugger shadow frames. Debugger will make sure deoptimization
+  // is triggered to make the values effective.
   bool SetVReg(ArtMethod* m, uint16_t vreg, uint32_t new_value, VRegKind kind)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Values will be set in debugger shadow frames. Debugger will make sure deoptimization
   // is triggered to make the values effective.
-  bool SetVRegFromDebugger(ArtMethod* m, uint16_t vreg, uint32_t new_value, VRegKind kind)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  bool SetVRegPair(ArtMethod* m, uint16_t vreg, uint64_t new_value,
-                   VRegKind kind_lo, VRegKind kind_hi)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Values will be set in debugger shadow frames. Debugger will make sure deoptimization
-  // is triggered to make the values effective.
-  bool SetVRegPairFromDebugger(ArtMethod* m, uint16_t vreg, uint64_t new_value,
-                               VRegKind kind_lo, VRegKind kind_hi)
+  bool SetVRegPair(ArtMethod* m,
+                   uint16_t vreg,
+                   uint64_t new_value,
+                   VRegKind kind_lo,
+                   VRegKind kind_hi)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   uintptr_t* GetGPRAddress(uint32_t reg) const;
@@ -691,13 +734,17 @@
 
   static int GetOutVROffset(uint16_t out_num, InstructionSet isa) {
     // According to stack model, the first out is above the Method referernce.
-    return InstructionSetPointerSize(isa) + out_num * sizeof(uint32_t);
+    return static_cast<size_t>(InstructionSetPointerSize(isa)) + out_num * sizeof(uint32_t);
   }
 
   bool IsInInlinedFrame() const {
     return current_inlining_depth_ != 0;
   }
 
+  size_t GetCurrentInliningDepth() const {
+    return current_inlining_depth_;
+  }
+
   uintptr_t GetCurrentQuickFramePc() const {
     return cur_quick_frame_pc_;
   }
@@ -745,28 +792,15 @@
     DCHECK(IsAccessibleRegister(reg, is_float));
     return is_float ? GetFPR(reg) : GetGPR(reg);
   }
-  void SetRegister(uint32_t reg, uintptr_t value, bool is_float) {
-    DCHECK(IsAccessibleRegister(reg, is_float));
-    if (is_float) {
-      SetFPR(reg, value);
-    } else {
-      SetGPR(reg, value);
-    }
-  }
 
   bool IsAccessibleGPR(uint32_t reg) const;
   uintptr_t GetGPR(uint32_t reg) const;
-  void SetGPR(uint32_t reg, uintptr_t value);
 
   bool IsAccessibleFPR(uint32_t reg) const;
   uintptr_t GetFPR(uint32_t reg) const;
-  void SetFPR(uint32_t reg, uintptr_t value);
 
   bool GetVRegFromDebuggerShadowFrame(uint16_t vreg, VRegKind kind, uint32_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool GetVRegFromQuickCode(ArtMethod* m, uint16_t vreg, VRegKind kind,
-                            uint32_t* val) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
   bool GetVRegFromOptimizedCode(ArtMethod* m, uint16_t vreg, VRegKind kind,
                                 uint32_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -774,9 +808,6 @@
   bool GetVRegPairFromDebuggerShadowFrame(uint16_t vreg, VRegKind kind_lo, VRegKind kind_hi,
                                           uint64_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool GetVRegPairFromQuickCode(ArtMethod* m, uint16_t vreg, VRegKind kind_lo,
-                                VRegKind kind_hi, uint64_t* val) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
   bool GetVRegPairFromOptimizedCode(ArtMethod* m, uint16_t vreg,
                                     VRegKind kind_lo, VRegKind kind_hi,
                                     uint64_t* val) const
@@ -785,19 +816,6 @@
                                    uint64_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool SetVRegFromQuickCode(ArtMethod* m, uint16_t vreg, uint32_t new_value,
-                            VRegKind kind)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  bool SetRegisterIfAccessible(uint32_t reg, uint32_t new_value, VRegKind kind)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  bool SetVRegPairFromQuickCode(ArtMethod* m, uint16_t vreg, uint64_t new_value,
-                                VRegKind kind_lo, VRegKind kind_hi)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  bool SetRegisterPairIfAccessible(uint32_t reg_lo, uint32_t reg_hi, uint64_t new_value,
-                                   bool is_float)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   void SanityCheckFrame() const SHARED_REQUIRES(Locks::mutator_lock_);
 
   InlineInfo GetCurrentInlineInfo() const SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index 5544507..a7e7c21 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -27,99 +27,106 @@
 constexpr uint32_t StackMap::kNoDexRegisterMap;
 constexpr uint32_t StackMap::kNoInlineInfo;
 
+std::ostream& operator<<(std::ostream& stream, const DexRegisterLocation::Kind& kind) {
+  using Kind = DexRegisterLocation::Kind;
+  switch (kind) {
+    case Kind::kNone:
+      return stream << "none";
+    case Kind::kInStack:
+      return stream << "in stack";
+    case Kind::kInRegister:
+      return stream << "in register";
+    case Kind::kInRegisterHigh:
+      return stream << "in register high";
+    case Kind::kInFpuRegister:
+      return stream << "in fpu register";
+    case Kind::kInFpuRegisterHigh:
+      return stream << "in fpu register high";
+    case Kind::kConstant:
+      return stream << "as constant";
+    case Kind::kInStackLargeOffset:
+      return stream << "in stack (large offset)";
+    case Kind::kConstantLargeValue:
+      return stream << "as constant (large value)";
+  }
+  return stream << "Kind<" << static_cast<uint32_t>(kind) << ">";
+}
+
 DexRegisterLocation::Kind DexRegisterMap::GetLocationInternalKind(
     uint16_t dex_register_number,
     uint16_t number_of_dex_registers,
     const CodeInfo& code_info,
-    const StackMapEncoding& enc) const {
+    const CodeInfoEncoding& enc) const {
   DexRegisterLocationCatalog dex_register_location_catalog =
       code_info.GetDexRegisterLocationCatalog(enc);
   size_t location_catalog_entry_index = GetLocationCatalogEntryIndex(
       dex_register_number,
       number_of_dex_registers,
-      code_info.GetNumberOfLocationCatalogEntries());
+      code_info.GetNumberOfLocationCatalogEntries(enc));
   return dex_register_location_catalog.GetLocationInternalKind(location_catalog_entry_index);
 }
 
 DexRegisterLocation DexRegisterMap::GetDexRegisterLocation(uint16_t dex_register_number,
                                                            uint16_t number_of_dex_registers,
                                                            const CodeInfo& code_info,
-                                                           const StackMapEncoding& enc) const {
+                                                           const CodeInfoEncoding& enc) const {
   DexRegisterLocationCatalog dex_register_location_catalog =
       code_info.GetDexRegisterLocationCatalog(enc);
   size_t location_catalog_entry_index = GetLocationCatalogEntryIndex(
       dex_register_number,
       number_of_dex_registers,
-      code_info.GetNumberOfLocationCatalogEntries());
+      code_info.GetNumberOfLocationCatalogEntries(enc));
   return dex_register_location_catalog.GetDexRegisterLocation(location_catalog_entry_index);
 }
 
-uint32_t StackMap::LoadAt(size_t number_of_bytes, size_t offset, bool check_max) const {
-  if (number_of_bytes == 0u) {
-    DCHECK(!check_max);
-    return 0;
-  } else if (number_of_bytes == 1u) {
-    uint8_t value = region_.LoadUnaligned<uint8_t>(offset);
-    return (check_max && value == 0xFF) ? -1 : value;
-  } else if (number_of_bytes == 2u) {
-    uint16_t value = region_.LoadUnaligned<uint16_t>(offset);
-    return (check_max && value == 0xFFFF) ? -1 : value;
-  } else if (number_of_bytes == 3u) {
-    uint16_t low = region_.LoadUnaligned<uint16_t>(offset);
-    uint16_t high = region_.LoadUnaligned<uint8_t>(offset + sizeof(uint16_t));
-    uint32_t value = (high << 16) + low;
-    return (check_max && value == 0xFFFFFF) ? -1 : value;
-  } else {
-    DCHECK_EQ(number_of_bytes, 4u);
-    return region_.LoadUnaligned<uint32_t>(offset);
-  }
-}
-
-void StackMap::StoreAt(size_t number_of_bytes, size_t offset, uint32_t value) const {
-  if (number_of_bytes == 0u) {
-    DCHECK_EQ(value, 0u);
-  } else if (number_of_bytes == 1u) {
-    region_.StoreUnaligned<uint8_t>(offset, value);
-  } else if (number_of_bytes == 2u) {
-    region_.StoreUnaligned<uint16_t>(offset, value);
-  } else if (number_of_bytes == 3u) {
-    region_.StoreUnaligned<uint16_t>(offset, Low16Bits(value));
-    region_.StoreUnaligned<uint8_t>(offset + sizeof(uint16_t), High16Bits(value));
-  } else {
-    region_.StoreUnaligned<uint32_t>(offset, value);
-    DCHECK_EQ(number_of_bytes, 4u);
-  }
-}
-
 static void DumpRegisterMapping(std::ostream& os,
                                 size_t dex_register_num,
                                 DexRegisterLocation location,
                                 const std::string& prefix = "v",
                                 const std::string& suffix = "") {
   os << prefix << dex_register_num << ": "
-     << DexRegisterLocation::PrettyDescriptor(location.GetInternalKind())
+     << location.GetInternalKind()
      << " (" << location.GetValue() << ")" << suffix << '\n';
 }
 
+void StackMapEncoding::Dump(VariableIndentationOutputStream* vios) const {
+  vios->Stream()
+      << "StackMapEncoding"
+      << " (native_pc_bit_offset=" << static_cast<uint32_t>(kNativePcBitOffset)
+      << ", dex_pc_bit_offset=" << static_cast<uint32_t>(dex_pc_bit_offset_)
+      << ", dex_register_map_bit_offset=" << static_cast<uint32_t>(dex_register_map_bit_offset_)
+      << ", inline_info_bit_offset=" << static_cast<uint32_t>(inline_info_bit_offset_)
+      << ", register_mask_bit_offset=" << static_cast<uint32_t>(register_mask_bit_offset_)
+      << ", stack_mask_bit_offset=" << static_cast<uint32_t>(stack_mask_bit_offset_)
+      << ")\n";
+}
+
+void InlineInfoEncoding::Dump(VariableIndentationOutputStream* vios) const {
+  vios->Stream()
+      << "InlineInfoEncoding"
+      << " (method_index_bit_offset=" << static_cast<uint32_t>(kMethodIndexBitOffset)
+      << ", dex_pc_bit_offset=" << static_cast<uint32_t>(dex_pc_bit_offset_)
+      << ", invoke_type_bit_offset=" << static_cast<uint32_t>(invoke_type_bit_offset_)
+      << ", dex_register_map_bit_offset=" << static_cast<uint32_t>(dex_register_map_bit_offset_)
+      << ", total_bit_size=" << static_cast<uint32_t>(total_bit_size_)
+      << ")\n";
+}
+
 void CodeInfo::Dump(VariableIndentationOutputStream* vios,
                     uint32_t code_offset,
                     uint16_t number_of_dex_registers,
                     bool dump_stack_maps) const {
-  StackMapEncoding encoding = ExtractEncoding();
-  uint32_t code_info_size = GetOverallSize();
-  size_t number_of_stack_maps = GetNumberOfStackMaps();
+  CodeInfoEncoding encoding = ExtractEncoding();
+  size_t number_of_stack_maps = GetNumberOfStackMaps(encoding);
   vios->Stream()
-      << "Optimized CodeInfo (size=" << code_info_size
-      << ", number_of_dex_registers=" << number_of_dex_registers
+      << "Optimized CodeInfo (number_of_dex_registers=" << number_of_dex_registers
       << ", number_of_stack_maps=" << number_of_stack_maps
-      << ", has_inline_info=" << encoding.HasInlineInfo()
-      << ", number_of_bytes_for_inline_info=" << encoding.NumberOfBytesForInlineInfo()
-      << ", number_of_bytes_for_dex_register_map=" << encoding.NumberOfBytesForDexRegisterMap()
-      << ", number_of_bytes_for_dex_pc=" << encoding.NumberOfBytesForDexPc()
-      << ", number_of_bytes_for_native_pc=" << encoding.NumberOfBytesForNativePc()
-      << ", number_of_bytes_for_register_mask=" << encoding.NumberOfBytesForRegisterMask()
       << ")\n";
   ScopedIndentation indent1(vios);
+  encoding.stack_map_encoding.Dump(vios);
+  if (HasInlineInfo(encoding)) {
+    encoding.inline_info_encoding.Dump(vios);
+  }
   // Display the Dex register location catalog.
   GetDexRegisterLocationCatalog(encoding).Dump(vios, *this);
   // Display stack maps along with (live) Dex register maps.
@@ -140,8 +147,8 @@
 
 void DexRegisterLocationCatalog::Dump(VariableIndentationOutputStream* vios,
                                       const CodeInfo& code_info) {
-  StackMapEncoding encoding = code_info.ExtractEncoding();
-  size_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  size_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
   size_t location_catalog_size_in_bytes = code_info.GetDexRegisterLocationCatalogSize(encoding);
   vios->Stream()
       << "DexRegisterLocationCatalog (number_of_entries=" << number_of_location_catalog_entries
@@ -156,8 +163,8 @@
 void DexRegisterMap::Dump(VariableIndentationOutputStream* vios,
                           const CodeInfo& code_info,
                           uint16_t number_of_dex_registers) const {
-  StackMapEncoding encoding = code_info.ExtractEncoding();
-  size_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  size_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
   // TODO: Display the bit mask of live Dex registers.
   for (size_t j = 0; j < number_of_dex_registers; ++j) {
     if (IsDexRegisterLive(j)) {
@@ -177,32 +184,32 @@
 
 void StackMap::Dump(VariableIndentationOutputStream* vios,
                     const CodeInfo& code_info,
-                    const StackMapEncoding& encoding,
+                    const CodeInfoEncoding& encoding,
                     uint32_t code_offset,
                     uint16_t number_of_dex_registers,
                     const std::string& header_suffix) const {
+  StackMapEncoding stack_map_encoding = encoding.stack_map_encoding;
   vios->Stream()
       << "StackMap" << header_suffix
       << std::hex
-      << " [native_pc=0x" << code_offset + GetNativePcOffset(encoding) << "]"
-      << " (dex_pc=0x" << GetDexPc(encoding)
-      << ", native_pc_offset=0x" << GetNativePcOffset(encoding)
-      << ", dex_register_map_offset=0x" << GetDexRegisterMapOffset(encoding)
-      << ", inline_info_offset=0x" << GetInlineDescriptorOffset(encoding)
-      << ", register_mask=0x" << GetRegisterMask(encoding)
+      << " [native_pc=0x" << code_offset + GetNativePcOffset(stack_map_encoding) << "]"
+      << " (dex_pc=0x" << GetDexPc(stack_map_encoding)
+      << ", native_pc_offset=0x" << GetNativePcOffset(stack_map_encoding)
+      << ", dex_register_map_offset=0x" << GetDexRegisterMapOffset(stack_map_encoding)
+      << ", inline_info_offset=0x" << GetInlineDescriptorOffset(stack_map_encoding)
+      << ", register_mask=0x" << GetRegisterMask(stack_map_encoding)
       << std::dec
       << ", stack_mask=0b";
-  MemoryRegion stack_mask = GetStackMask(encoding);
-  for (size_t i = 0, e = stack_mask.size_in_bits(); i < e; ++i) {
-    vios->Stream() << stack_mask.LoadBit(e - i - 1);
+  for (size_t i = 0, e = GetNumberOfStackMaskBits(stack_map_encoding); i < e; ++i) {
+    vios->Stream() << GetStackMaskBit(stack_map_encoding, e - i - 1);
   }
   vios->Stream() << ")\n";
-  if (HasDexRegisterMap(encoding)) {
+  if (HasDexRegisterMap(stack_map_encoding)) {
     DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(
         *this, encoding, number_of_dex_registers);
     dex_register_map.Dump(vios, code_info, number_of_dex_registers);
   }
-  if (HasInlineInfo(encoding)) {
+  if (HasInlineInfo(stack_map_encoding)) {
     InlineInfo inline_info = code_info.GetInlineInfoOf(*this, encoding);
     // We do not know the length of the dex register maps of inlined frames
     // at this level, so we just pass null to `InlineInfo::Dump` to tell
@@ -214,19 +221,23 @@
 void InlineInfo::Dump(VariableIndentationOutputStream* vios,
                       const CodeInfo& code_info,
                       uint16_t number_of_dex_registers[]) const {
-  vios->Stream() << "InlineInfo with depth " << static_cast<uint32_t>(GetDepth()) << "\n";
+  InlineInfoEncoding inline_info_encoding = code_info.ExtractEncoding().inline_info_encoding;
+  vios->Stream() << "InlineInfo with depth "
+                 << static_cast<uint32_t>(GetDepth(inline_info_encoding))
+                 << "\n";
 
-  for (size_t i = 0; i < GetDepth(); ++i) {
+  for (size_t i = 0; i < GetDepth(inline_info_encoding); ++i) {
     vios->Stream()
         << " At depth " << i
         << std::hex
-        << " (dex_pc=0x" << GetDexPcAtDepth(i)
+        << " (dex_pc=0x" << GetDexPcAtDepth(inline_info_encoding, i)
         << std::dec
-        << ", method_index=" << GetMethodIndexAtDepth(i)
-        << ", invoke_type=" << static_cast<InvokeType>(GetInvokeTypeAtDepth(i))
+        << ", method_index=" << GetMethodIndexAtDepth(inline_info_encoding, i)
+        << ", invoke_type=" << static_cast<InvokeType>(GetInvokeTypeAtDepth(inline_info_encoding,
+                                                                            i))
         << ")\n";
-    if (HasDexRegisterMapAtDepth(i) && (number_of_dex_registers != nullptr)) {
-      StackMapEncoding encoding = code_info.ExtractEncoding();
+    if (HasDexRegisterMapAtDepth(inline_info_encoding, i) && (number_of_dex_registers != nullptr)) {
+      CodeInfoEncoding encoding = code_info.ExtractEncoding();
       DexRegisterMap dex_register_map =
           code_info.GetDexRegisterMapAtDepth(i, *this, encoding, number_of_dex_registers[i]);
       ScopedIndentation indent1(vios);
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index a15a081..dd7e531 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -19,16 +19,12 @@
 
 #include "base/bit_vector.h"
 #include "base/bit_utils.h"
+#include "dex_file.h"
 #include "memory_region.h"
+#include "leb128.h"
 
 namespace art {
 
-#define ELEMENT_BYTE_OFFSET_AFTER(PreviousElement) \
-  k ## PreviousElement ## Offset + sizeof(PreviousElement ## Type)
-
-#define ELEMENT_BIT_OFFSET_AFTER(PreviousElement) \
-  k ## PreviousElement ## BitOffset + PreviousElement ## BitSize
-
 class VariableIndentationOutputStream;
 
 // Size of a frame slot, in bytes.  This constant is a signed value,
@@ -39,13 +35,9 @@
 // Size of Dex virtual registers.
 static constexpr size_t kVRegSize = 4;
 
-// We encode the number of bytes needed for writing a value on 3 bits
-// (i.e. up to 8 values), for values that we know are maximum 32-bit
-// long.
-static constexpr size_t kNumberOfBitForNumberOfBytesForEncoding = 3;
-
 class CodeInfo;
 class StackMapEncoding;
+struct CodeInfoEncoding;
 
 /**
  * Classes in the following file are wrapper on stack map information backed
@@ -110,30 +102,6 @@
       sizeof(Kind) == 1u,
       "art::DexRegisterLocation::Kind has a size different from one byte.");
 
-  static const char* PrettyDescriptor(Kind kind) {
-    switch (kind) {
-      case Kind::kNone:
-        return "none";
-      case Kind::kInStack:
-        return "in stack";
-      case Kind::kInRegister:
-        return "in register";
-      case Kind::kInRegisterHigh:
-        return "in register high";
-      case Kind::kInFpuRegister:
-        return "in fpu register";
-      case Kind::kInFpuRegisterHigh:
-        return "in fpu register high";
-      case Kind::kConstant:
-        return "as constant";
-      case Kind::kInStackLargeOffset:
-        return "in stack (large offset)";
-      case Kind::kConstantLargeValue:
-        return "as constant (large value)";
-    }
-    UNREACHABLE();
-  }
-
   static bool IsShortLocationKind(Kind kind) {
     switch (kind) {
       case Kind::kInStack:
@@ -149,7 +117,7 @@
         return false;
 
       case Kind::kNone:
-        LOG(FATAL) << "Unexpected location kind " << PrettyDescriptor(kind);
+        LOG(FATAL) << "Unexpected location kind";
     }
     UNREACHABLE();
   }
@@ -215,6 +183,8 @@
   friend class DexRegisterLocationHashFn;
 };
 
+std::ostream& operator<<(std::ostream& stream, const DexRegisterLocation::Kind& kind);
+
 /**
  * Store information on unique Dex register locations used in a method.
  * The information is of the form:
@@ -349,7 +319,7 @@
       case DexRegisterLocation::Kind::kConstantLargeValue:
       case DexRegisterLocation::Kind::kInStackLargeOffset:
       case DexRegisterLocation::Kind::kNone:
-        LOG(FATAL) << "Unexpected location kind " << DexRegisterLocation::PrettyDescriptor(kind);
+        LOG(FATAL) << "Unexpected location kind " << kind;
     }
     UNREACHABLE();
   }
@@ -373,7 +343,7 @@
       case DexRegisterLocation::Kind::kConstantLargeValue:
       case DexRegisterLocation::Kind::kInStackLargeOffset:
       case DexRegisterLocation::Kind::kNone:
-        LOG(FATAL) << "Unexpected location kind " << DexRegisterLocation::PrettyDescriptor(kind);
+        LOG(FATAL) << "Unexpected location kind " << kind;
     }
     UNREACHABLE();
   }
@@ -473,12 +443,15 @@
 class DexRegisterMap {
  public:
   explicit DexRegisterMap(MemoryRegion region) : region_(region) {}
+  DexRegisterMap() {}
+
+  bool IsValid() const { return region_.pointer() != nullptr; }
 
   // Get the surface kind of Dex register `dex_register_number`.
   DexRegisterLocation::Kind GetLocationKind(uint16_t dex_register_number,
                                             uint16_t number_of_dex_registers,
                                             const CodeInfo& code_info,
-                                            const StackMapEncoding& enc) const {
+                                            const CodeInfoEncoding& enc) const {
     return DexRegisterLocation::ConvertToSurfaceKind(
         GetLocationInternalKind(dex_register_number, number_of_dex_registers, code_info, enc));
   }
@@ -487,18 +460,18 @@
   DexRegisterLocation::Kind GetLocationInternalKind(uint16_t dex_register_number,
                                                     uint16_t number_of_dex_registers,
                                                     const CodeInfo& code_info,
-                                                    const StackMapEncoding& enc) const;
+                                                    const CodeInfoEncoding& enc) const;
 
   // Get the Dex register location `dex_register_number`.
   DexRegisterLocation GetDexRegisterLocation(uint16_t dex_register_number,
                                              uint16_t number_of_dex_registers,
                                              const CodeInfo& code_info,
-                                             const StackMapEncoding& enc) const;
+                                             const CodeInfoEncoding& enc) const;
 
   int32_t GetStackOffsetInBytes(uint16_t dex_register_number,
                                 uint16_t number_of_dex_registers,
                                 const CodeInfo& code_info,
-                                const StackMapEncoding& enc) const {
+                                const CodeInfoEncoding& enc) const {
     DexRegisterLocation location =
         GetDexRegisterLocation(dex_register_number, number_of_dex_registers, code_info, enc);
     DCHECK(location.GetKind() == DexRegisterLocation::Kind::kInStack);
@@ -509,25 +482,24 @@
   int32_t GetConstant(uint16_t dex_register_number,
                       uint16_t number_of_dex_registers,
                       const CodeInfo& code_info,
-                      const StackMapEncoding& enc) const {
+                      const CodeInfoEncoding& enc) const {
     DexRegisterLocation location =
         GetDexRegisterLocation(dex_register_number, number_of_dex_registers, code_info, enc);
-    DCHECK(location.GetKind() == DexRegisterLocation::Kind::kConstant)
-        << DexRegisterLocation::PrettyDescriptor(location.GetKind());
+    DCHECK_EQ(location.GetKind(), DexRegisterLocation::Kind::kConstant);
     return location.GetValue();
   }
 
   int32_t GetMachineRegister(uint16_t dex_register_number,
                              uint16_t number_of_dex_registers,
                              const CodeInfo& code_info,
-                             const StackMapEncoding& enc) const {
+                             const CodeInfoEncoding& enc) const {
     DexRegisterLocation location =
         GetDexRegisterLocation(dex_register_number, number_of_dex_registers, code_info, enc);
     DCHECK(location.GetInternalKind() == DexRegisterLocation::Kind::kInRegister ||
            location.GetInternalKind() == DexRegisterLocation::Kind::kInRegisterHigh ||
            location.GetInternalKind() == DexRegisterLocation::Kind::kInFpuRegister ||
            location.GetInternalKind() == DexRegisterLocation::Kind::kInFpuRegisterHigh)
-        << DexRegisterLocation::PrettyDescriptor(location.GetInternalKind());
+        << location.GetInternalKind();
     return location.GetValue();
   }
 
@@ -677,109 +649,131 @@
   friend class StackMapStream;
 };
 
+// Represents bit range of bit-packed integer field.
+// We reuse the idea from ULEB128p1 to support encoding of -1 (aka 0xFFFFFFFF).
+// If min_value is set to -1, we implicitly subtract one from any loaded value,
+// and add one to any stored value. This is generalized to any negative values.
+// In other words, min_value acts as a base and the stored value is added to it.
+struct FieldEncoding {
+  FieldEncoding(size_t start_offset, size_t end_offset, int32_t min_value = 0)
+      : start_offset_(start_offset), end_offset_(end_offset), min_value_(min_value) {
+    DCHECK_LE(start_offset_, end_offset_);
+    DCHECK_LE(BitSize(), 32u);
+  }
+
+  ALWAYS_INLINE size_t BitSize() const { return end_offset_ - start_offset_; }
+
+  ALWAYS_INLINE int32_t Load(const MemoryRegion& region) const {
+    DCHECK_LE(end_offset_, region.size_in_bits());
+    const size_t bit_count = BitSize();
+    if (bit_count == 0) {
+      // Do not touch any memory if the range is empty.
+      return min_value_;
+    }
+    uint8_t* address = region.start() + start_offset_ / kBitsPerByte;
+    const uint32_t shift = start_offset_ & (kBitsPerByte - 1);
+    // Load the value (reading only the strictly needed bytes).
+    const uint32_t load_bit_count = shift + bit_count;
+    uint32_t value = *address++ >> shift;
+    if (load_bit_count > 8) {
+      value |= static_cast<uint32_t>(*address++) << (8 - shift);
+      if (load_bit_count > 16) {
+        value |= static_cast<uint32_t>(*address++) << (16 - shift);
+        if (load_bit_count > 24) {
+          value |= static_cast<uint32_t>(*address++) << (24 - shift);
+          if (load_bit_count > 32) {
+            value |= static_cast<uint32_t>(*address++) << (32 - shift);
+          }
+        }
+      }
+    }
+    // Clear unwanted most significant bits.
+    uint32_t clear_bit_count = 32 - bit_count;
+    value = (value << clear_bit_count) >> clear_bit_count;
+    return value + min_value_;
+  }
+
+  ALWAYS_INLINE void Store(MemoryRegion region, int32_t value) const {
+    region.StoreBits(start_offset_, value - min_value_, BitSize());
+    DCHECK_EQ(Load(region), value);
+  }
+
+ private:
+  size_t start_offset_;
+  size_t end_offset_;
+  int32_t min_value_;
+};
+
 class StackMapEncoding {
  public:
   StackMapEncoding() {}
 
-  StackMapEncoding(size_t stack_mask_size,
-                   size_t bytes_for_inline_info,
-                   size_t bytes_for_dex_register_map,
-                   size_t bytes_for_dex_pc,
-                   size_t bytes_for_native_pc,
-                   size_t bytes_for_register_mask)
-      : bytes_for_stack_mask_(stack_mask_size),
-        bytes_for_inline_info_(bytes_for_inline_info),
-        bytes_for_dex_register_map_(bytes_for_dex_register_map),
-        bytes_for_dex_pc_(bytes_for_dex_pc),
-        bytes_for_native_pc_(bytes_for_native_pc),
-        bytes_for_register_mask_(bytes_for_register_mask) {}
+  // Set stack map bit layout based on given sizes.
+  // Returns the size of stack map in bytes.
+  size_t SetFromSizes(size_t native_pc_max,
+                      size_t dex_pc_max,
+                      size_t dex_register_map_size,
+                      size_t inline_info_size,
+                      size_t register_mask_max,
+                      size_t stack_mask_bit_size) {
+    size_t bit_offset = 0;
+    DCHECK_EQ(kNativePcBitOffset, bit_offset);
+    bit_offset += MinimumBitsToStore(native_pc_max);
 
-  static StackMapEncoding CreateFromSizes(size_t stack_mask_size,
-                                          size_t inline_info_size,
-                                          size_t dex_register_map_size,
-                                          size_t dex_pc_max,
-                                          size_t native_pc_max,
-                                          size_t register_mask_max) {
-    return StackMapEncoding(
-        stack_mask_size,
-        // + 1 to also encode kNoInlineInfo: if an inline info offset
-        // is at 0xFF, we want to overflow to a larger encoding, because it will
-        // conflict with kNoInlineInfo.
-        // The offset is relative to the dex register map. TODO: Change this.
-        inline_info_size == 0
-          ? 0
-          : EncodingSizeInBytes(dex_register_map_size + inline_info_size + 1),
-        // + 1 to also encode kNoDexRegisterMap: if a dex register map offset
-        // is at 0xFF, we want to overflow to a larger encoding, because it will
-        // conflict with kNoDexRegisterMap.
-        EncodingSizeInBytes(dex_register_map_size + 1),
-        EncodingSizeInBytes(dex_pc_max),
-        EncodingSizeInBytes(native_pc_max),
-        EncodingSizeInBytes(register_mask_max));
+    dex_pc_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
+    bit_offset += MinimumBitsToStore(1 /* kNoDexPc */ + dex_pc_max);
+
+    // We also need +1 for kNoDexRegisterMap, but since the size is strictly
+    // greater than any offset we might try to encode, we already implicitly have it.
+    dex_register_map_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
+    bit_offset += MinimumBitsToStore(dex_register_map_size);
+
+    // We also need +1 for kNoInlineInfo, but since the inline_info_size is strictly
+    // greater than the offset we might try to encode, we already implicitly have it.
+    // If inline_info_size is zero, we can encode only kNoInlineInfo (in zero bits).
+    inline_info_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
+    if (inline_info_size != 0) {
+      bit_offset += MinimumBitsToStore(dex_register_map_size + inline_info_size);
+    }
+
+    register_mask_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
+    bit_offset += MinimumBitsToStore(register_mask_max);
+
+    stack_mask_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
+    bit_offset += stack_mask_bit_size;
+
+    return RoundUp(bit_offset, kBitsPerByte) / kBitsPerByte;
   }
 
-  // Get the size of one stack map of this CodeInfo object, in bytes.
-  // All stack maps of a CodeInfo have the same size.
-  size_t ComputeStackMapSize() const {
-    return bytes_for_register_mask_
-         + bytes_for_stack_mask_
-         + bytes_for_inline_info_
-         + bytes_for_dex_register_map_
-         + bytes_for_dex_pc_
-         + bytes_for_native_pc_;
+  ALWAYS_INLINE FieldEncoding GetNativePcEncoding() const {
+    return FieldEncoding(kNativePcBitOffset, dex_pc_bit_offset_);
+  }
+  ALWAYS_INLINE FieldEncoding GetDexPcEncoding() const {
+    return FieldEncoding(dex_pc_bit_offset_, dex_register_map_bit_offset_, -1 /* min_value */);
+  }
+  ALWAYS_INLINE FieldEncoding GetDexRegisterMapEncoding() const {
+    return FieldEncoding(dex_register_map_bit_offset_, inline_info_bit_offset_, -1 /* min_value */);
+  }
+  ALWAYS_INLINE FieldEncoding GetInlineInfoEncoding() const {
+    return FieldEncoding(inline_info_bit_offset_, register_mask_bit_offset_, -1 /* min_value */);
+  }
+  ALWAYS_INLINE FieldEncoding GetRegisterMaskEncoding() const {
+    return FieldEncoding(register_mask_bit_offset_, stack_mask_bit_offset_);
+  }
+  ALWAYS_INLINE size_t GetStackMaskBitOffset() const {
+    // The end offset is not encoded. It is implicitly the end of stack map entry.
+    return stack_mask_bit_offset_;
   }
 
-  bool HasInlineInfo() const { return bytes_for_inline_info_ > 0; }
-
-  size_t NumberOfBytesForStackMask() const { return bytes_for_stack_mask_; }
-  size_t NumberOfBytesForInlineInfo() const { return bytes_for_inline_info_; }
-  size_t NumberOfBytesForDexRegisterMap() const { return bytes_for_dex_register_map_; }
-  size_t NumberOfBytesForDexPc() const { return bytes_for_dex_pc_; }
-  size_t NumberOfBytesForNativePc() const { return bytes_for_native_pc_; }
-  size_t NumberOfBytesForRegisterMask() const { return bytes_for_register_mask_; }
-
-  size_t ComputeStackMapRegisterMaskOffset() const {
-    return kRegisterMaskOffset;
-  }
-
-  size_t ComputeStackMapStackMaskOffset() const {
-    return ComputeStackMapRegisterMaskOffset() + bytes_for_register_mask_;
-  }
-
-  size_t ComputeStackMapDexPcOffset() const {
-    return ComputeStackMapStackMaskOffset() + bytes_for_stack_mask_;
-  }
-
-  size_t ComputeStackMapNativePcOffset() const {
-    return ComputeStackMapDexPcOffset() + bytes_for_dex_pc_;
-  }
-
-  size_t ComputeStackMapDexRegisterMapOffset() const {
-    return ComputeStackMapNativePcOffset() + bytes_for_native_pc_;
-  }
-
-  size_t ComputeStackMapInlineInfoOffset() const {
-    return ComputeStackMapDexRegisterMapOffset() + bytes_for_dex_register_map_;
-  }
+  void Dump(VariableIndentationOutputStream* vios) const;
 
  private:
-  static size_t EncodingSizeInBytes(size_t max_element) {
-    DCHECK(IsUint<32>(max_element));
-    return (max_element == 0) ? 0
-         : IsUint<8>(max_element) ? 1
-         : IsUint<16>(max_element) ? 2
-         : IsUint<24>(max_element) ? 3
-         : 4;
-  }
-
-  static constexpr int kRegisterMaskOffset = 0;
-
-  size_t bytes_for_stack_mask_;
-  size_t bytes_for_inline_info_;
-  size_t bytes_for_dex_register_map_;
-  size_t bytes_for_dex_pc_;
-  size_t bytes_for_native_pc_;
-  size_t bytes_for_register_mask_;
+  static constexpr size_t kNativePcBitOffset = 0;
+  uint8_t dex_pc_bit_offset_;
+  uint8_t dex_register_map_bit_offset_;
+  uint8_t inline_info_bit_offset_;
+  uint8_t register_mask_bit_offset_;
+  uint8_t stack_mask_bit_offset_;
 };
 
 /**
@@ -792,7 +786,7 @@
  *
  * The information is of the form:
  *
- *   [dex_pc, native_pc_offset, dex_register_map_offset, inlining_info_offset, register_mask,
+ *   [native_pc_offset, dex_pc, dex_register_map_offset, inlining_info_offset, register_mask,
  *   stack_mask].
  */
 class StackMap {
@@ -800,89 +794,75 @@
   StackMap() {}
   explicit StackMap(MemoryRegion region) : region_(region) {}
 
-  bool IsValid() const { return region_.pointer() != nullptr; }
+  ALWAYS_INLINE bool IsValid() const { return region_.pointer() != nullptr; }
 
-  uint32_t GetDexPc(const StackMapEncoding& encoding) const {
-    return LoadAt(encoding.NumberOfBytesForDexPc(), encoding.ComputeStackMapDexPcOffset());
+  ALWAYS_INLINE uint32_t GetDexPc(const StackMapEncoding& encoding) const {
+    return encoding.GetDexPcEncoding().Load(region_);
   }
 
-  void SetDexPc(const StackMapEncoding& encoding, uint32_t dex_pc) {
-    StoreAt(encoding.NumberOfBytesForDexPc(), encoding.ComputeStackMapDexPcOffset(), dex_pc);
+  ALWAYS_INLINE void SetDexPc(const StackMapEncoding& encoding, uint32_t dex_pc) {
+    encoding.GetDexPcEncoding().Store(region_, dex_pc);
   }
 
-  uint32_t GetNativePcOffset(const StackMapEncoding& encoding) const {
-    return LoadAt(encoding.NumberOfBytesForNativePc(), encoding.ComputeStackMapNativePcOffset());
+  ALWAYS_INLINE uint32_t GetNativePcOffset(const StackMapEncoding& encoding) const {
+    return encoding.GetNativePcEncoding().Load(region_);
   }
 
-  void SetNativePcOffset(const StackMapEncoding& encoding, uint32_t native_pc_offset) {
-    StoreAt(encoding.NumberOfBytesForNativePc(),
-            encoding.ComputeStackMapNativePcOffset(),
-            native_pc_offset);
+  ALWAYS_INLINE void SetNativePcOffset(const StackMapEncoding& encoding, uint32_t native_pc_offset) {
+    encoding.GetNativePcEncoding().Store(region_, native_pc_offset);
   }
 
-  uint32_t GetDexRegisterMapOffset(const StackMapEncoding& encoding) const {
-    return LoadAt(encoding.NumberOfBytesForDexRegisterMap(),
-                  encoding.ComputeStackMapDexRegisterMapOffset(),
-                  /* check_max */ true);
+  ALWAYS_INLINE uint32_t GetDexRegisterMapOffset(const StackMapEncoding& encoding) const {
+    return encoding.GetDexRegisterMapEncoding().Load(region_);
   }
 
-  void SetDexRegisterMapOffset(const StackMapEncoding& encoding, uint32_t offset) {
-    StoreAt(encoding.NumberOfBytesForDexRegisterMap(),
-            encoding.ComputeStackMapDexRegisterMapOffset(),
-            offset);
+  ALWAYS_INLINE void SetDexRegisterMapOffset(const StackMapEncoding& encoding, uint32_t offset) {
+    encoding.GetDexRegisterMapEncoding().Store(region_, offset);
   }
 
-  uint32_t GetInlineDescriptorOffset(const StackMapEncoding& encoding) const {
-    if (!encoding.HasInlineInfo()) return kNoInlineInfo;
-    return LoadAt(encoding.NumberOfBytesForInlineInfo(),
-                  encoding.ComputeStackMapInlineInfoOffset(),
-                  /* check_max */ true);
+  ALWAYS_INLINE uint32_t GetInlineDescriptorOffset(const StackMapEncoding& encoding) const {
+    return encoding.GetInlineInfoEncoding().Load(region_);
   }
 
-  void SetInlineDescriptorOffset(const StackMapEncoding& encoding, uint32_t offset) {
-    DCHECK(encoding.HasInlineInfo());
-    StoreAt(encoding.NumberOfBytesForInlineInfo(),
-            encoding.ComputeStackMapInlineInfoOffset(),
-            offset);
+  ALWAYS_INLINE void SetInlineDescriptorOffset(const StackMapEncoding& encoding, uint32_t offset) {
+    encoding.GetInlineInfoEncoding().Store(region_, offset);
   }
 
-  uint32_t GetRegisterMask(const StackMapEncoding& encoding) const {
-    return LoadAt(encoding.NumberOfBytesForRegisterMask(),
-                  encoding.ComputeStackMapRegisterMaskOffset());
+  ALWAYS_INLINE uint32_t GetRegisterMask(const StackMapEncoding& encoding) const {
+    return encoding.GetRegisterMaskEncoding().Load(region_);
   }
 
-  void SetRegisterMask(const StackMapEncoding& encoding, uint32_t mask) {
-    StoreAt(encoding.NumberOfBytesForRegisterMask(),
-            encoding.ComputeStackMapRegisterMaskOffset(),
-            mask);
+  ALWAYS_INLINE void SetRegisterMask(const StackMapEncoding& encoding, uint32_t mask) {
+    encoding.GetRegisterMaskEncoding().Store(region_, mask);
   }
 
-  MemoryRegion GetStackMask(const StackMapEncoding& encoding) const {
-    return region_.Subregion(encoding.ComputeStackMapStackMaskOffset(),
-                             encoding.NumberOfBytesForStackMask());
+  ALWAYS_INLINE size_t GetNumberOfStackMaskBits(const StackMapEncoding& encoding) const {
+    return region_.size_in_bits() - encoding.GetStackMaskBitOffset();
   }
 
-  void SetStackMask(const StackMapEncoding& encoding, const BitVector& sp_map) {
-    MemoryRegion region = GetStackMask(encoding);
-    sp_map.CopyTo(region.start(), region.size());
+  ALWAYS_INLINE bool GetStackMaskBit(const StackMapEncoding& encoding, size_t index) const {
+    return region_.LoadBit(encoding.GetStackMaskBitOffset() + index);
   }
 
-  bool HasDexRegisterMap(const StackMapEncoding& encoding) const {
+  ALWAYS_INLINE void SetStackMaskBit(const StackMapEncoding& encoding, size_t index, bool value) {
+    region_.StoreBit(encoding.GetStackMaskBitOffset() + index, value);
+  }
+
+  ALWAYS_INLINE bool HasDexRegisterMap(const StackMapEncoding& encoding) const {
     return GetDexRegisterMapOffset(encoding) != kNoDexRegisterMap;
   }
 
-  bool HasInlineInfo(const StackMapEncoding& encoding) const {
+  ALWAYS_INLINE bool HasInlineInfo(const StackMapEncoding& encoding) const {
     return GetInlineDescriptorOffset(encoding) != kNoInlineInfo;
   }
 
-  bool Equals(const StackMap& other) const {
-    return region_.pointer() == other.region_.pointer()
-       && region_.size() == other.region_.size();
+  ALWAYS_INLINE bool Equals(const StackMap& other) const {
+    return region_.pointer() == other.region_.pointer() && region_.size() == other.region_.size();
   }
 
   void Dump(VariableIndentationOutputStream* vios,
             const CodeInfo& code_info,
-            const StackMapEncoding& encoding,
+            const CodeInfoEncoding& encoding,
             uint32_t code_offset,
             uint16_t number_of_dex_registers,
             const std::string& header_suffix = "") const;
@@ -898,276 +878,309 @@
  private:
   static constexpr int kFixedSize = 0;
 
-  // Loads `number_of_bytes` at the given `offset` and assemble a uint32_t. If `check_max` is true,
-  // this method converts a maximum value of size `number_of_bytes` into a uint32_t 0xFFFFFFFF.
-  uint32_t LoadAt(size_t number_of_bytes, size_t offset, bool check_max = false) const;
-  void StoreAt(size_t number_of_bytes, size_t offset, uint32_t value) const;
-
   MemoryRegion region_;
 
   friend class StackMapStream;
 };
 
+class InlineInfoEncoding {
+ public:
+  void SetFromSizes(size_t method_index_max,
+                    size_t dex_pc_max,
+                    size_t invoke_type_max,
+                    size_t dex_register_map_size) {
+    total_bit_size_ = kMethodIndexBitOffset;
+    total_bit_size_ += MinimumBitsToStore(method_index_max);
+
+    dex_pc_bit_offset_ = dchecked_integral_cast<uint8_t>(total_bit_size_);
+    // Note: We're not encoding the dex pc if there is none. That's the case
+    // for an intrinsified native method, such as String.charAt().
+    if (dex_pc_max != DexFile::kDexNoIndex) {
+      total_bit_size_ += MinimumBitsToStore(1 /* kNoDexPc */ + dex_pc_max);
+    }
+
+    invoke_type_bit_offset_ = dchecked_integral_cast<uint8_t>(total_bit_size_);
+    total_bit_size_ += MinimumBitsToStore(invoke_type_max);
+
+    // We also need +1 for kNoDexRegisterMap, but since the size is strictly
+    // greater than any offset we might try to encode, we already implicitly have it.
+    dex_register_map_bit_offset_ = dchecked_integral_cast<uint8_t>(total_bit_size_);
+    total_bit_size_ += MinimumBitsToStore(dex_register_map_size);
+  }
+
+  ALWAYS_INLINE FieldEncoding GetMethodIndexEncoding() const {
+    return FieldEncoding(kMethodIndexBitOffset, dex_pc_bit_offset_);
+  }
+  ALWAYS_INLINE FieldEncoding GetDexPcEncoding() const {
+    return FieldEncoding(dex_pc_bit_offset_, invoke_type_bit_offset_, -1 /* min_value */);
+  }
+  ALWAYS_INLINE FieldEncoding GetInvokeTypeEncoding() const {
+    return FieldEncoding(invoke_type_bit_offset_, dex_register_map_bit_offset_);
+  }
+  ALWAYS_INLINE FieldEncoding GetDexRegisterMapEncoding() const {
+    return FieldEncoding(dex_register_map_bit_offset_, total_bit_size_, -1 /* min_value */);
+  }
+  ALWAYS_INLINE size_t GetEntrySize() const {
+    return RoundUp(total_bit_size_, kBitsPerByte) / kBitsPerByte;
+  }
+
+  void Dump(VariableIndentationOutputStream* vios) const;
+
+ private:
+  static constexpr uint8_t kIsLastBitOffset = 0;
+  static constexpr uint8_t kMethodIndexBitOffset = 1;
+  uint8_t dex_pc_bit_offset_;
+  uint8_t invoke_type_bit_offset_;
+  uint8_t dex_register_map_bit_offset_;
+  uint8_t total_bit_size_;
+};
+
 /**
  * Inline information for a specific PC. The information is of the form:
  *
- *   [inlining_depth, entry+]
- *
- * where `entry` is of the form:
- *
- *   [dex_pc, method_index, dex_register_map_offset].
+ *   [is_last, method_index, dex_pc, invoke_type, dex_register_map_offset]+.
  */
 class InlineInfo {
  public:
-  // Memory layout: fixed contents.
-  typedef uint8_t DepthType;
-  // Memory layout: single entry contents.
-  typedef uint32_t MethodIndexType;
-  typedef uint32_t DexPcType;
-  typedef uint8_t InvokeTypeType;
-  typedef uint32_t DexRegisterMapType;
-
-  explicit InlineInfo(MemoryRegion region) : region_(region) {}
-
-  DepthType GetDepth() const {
-    return region_.LoadUnaligned<DepthType>(kDepthOffset);
+  explicit InlineInfo(MemoryRegion region) : region_(region) {
   }
 
-  void SetDepth(DepthType depth) {
-    region_.StoreUnaligned<DepthType>(kDepthOffset, depth);
+  ALWAYS_INLINE uint32_t GetDepth(const InlineInfoEncoding& encoding) const {
+    size_t depth = 0;
+    while (!GetRegionAtDepth(encoding, depth++).LoadBit(0)) { }  // Check is_last bit.
+    return depth;
   }
 
-  MethodIndexType GetMethodIndexAtDepth(DepthType depth) const {
-    return region_.LoadUnaligned<MethodIndexType>(
-        kFixedSize + depth * SingleEntrySize() + kMethodIndexOffset);
+  ALWAYS_INLINE void SetDepth(const InlineInfoEncoding& encoding, uint32_t depth) {
+    DCHECK_GT(depth, 0u);
+    for (size_t d = 0; d < depth; ++d) {
+      GetRegionAtDepth(encoding, d).StoreBit(0, d == depth - 1);  // Set is_last bit.
+    }
   }
 
-  void SetMethodIndexAtDepth(DepthType depth, MethodIndexType index) {
-    region_.StoreUnaligned<MethodIndexType>(
-        kFixedSize + depth * SingleEntrySize() + kMethodIndexOffset, index);
+  ALWAYS_INLINE uint32_t GetMethodIndexAtDepth(const InlineInfoEncoding& encoding,
+                                               uint32_t depth) const {
+    return encoding.GetMethodIndexEncoding().Load(GetRegionAtDepth(encoding, depth));
   }
 
-  DexPcType GetDexPcAtDepth(DepthType depth) const {
-    return region_.LoadUnaligned<DexPcType>(
-        kFixedSize + depth * SingleEntrySize() + kDexPcOffset);
+  ALWAYS_INLINE void SetMethodIndexAtDepth(const InlineInfoEncoding& encoding,
+                                           uint32_t depth,
+                                           uint32_t index) {
+    encoding.GetMethodIndexEncoding().Store(GetRegionAtDepth(encoding, depth), index);
   }
 
-  void SetDexPcAtDepth(DepthType depth, DexPcType dex_pc) {
-    region_.StoreUnaligned<DexPcType>(
-        kFixedSize + depth * SingleEntrySize() + kDexPcOffset, dex_pc);
+  ALWAYS_INLINE uint32_t GetDexPcAtDepth(const InlineInfoEncoding& encoding,
+                                         uint32_t depth) const {
+    return encoding.GetDexPcEncoding().Load(GetRegionAtDepth(encoding, depth));
   }
 
-  InvokeTypeType GetInvokeTypeAtDepth(DepthType depth) const {
-    return region_.LoadUnaligned<InvokeTypeType>(
-        kFixedSize + depth * SingleEntrySize() + kInvokeTypeOffset);
+  ALWAYS_INLINE void SetDexPcAtDepth(const InlineInfoEncoding& encoding,
+                                     uint32_t depth,
+                                     uint32_t dex_pc) {
+    encoding.GetDexPcEncoding().Store(GetRegionAtDepth(encoding, depth), dex_pc);
   }
 
-  void SetInvokeTypeAtDepth(DepthType depth, InvokeTypeType invoke_type) {
-    region_.StoreUnaligned<InvokeTypeType>(
-        kFixedSize + depth * SingleEntrySize() + kInvokeTypeOffset, invoke_type);
+  ALWAYS_INLINE uint32_t GetInvokeTypeAtDepth(const InlineInfoEncoding& encoding,
+                                              uint32_t depth) const {
+    return encoding.GetInvokeTypeEncoding().Load(GetRegionAtDepth(encoding, depth));
   }
 
-  DexRegisterMapType GetDexRegisterMapOffsetAtDepth(DepthType depth) const {
-    return region_.LoadUnaligned<DexRegisterMapType>(
-        kFixedSize + depth * SingleEntrySize() + kDexRegisterMapOffset);
+  ALWAYS_INLINE void SetInvokeTypeAtDepth(const InlineInfoEncoding& encoding,
+                                          uint32_t depth,
+                                          uint32_t invoke_type) {
+    encoding.GetInvokeTypeEncoding().Store(GetRegionAtDepth(encoding, depth), invoke_type);
   }
 
-  void SetDexRegisterMapOffsetAtDepth(DepthType depth, DexRegisterMapType offset) {
-    region_.StoreUnaligned<DexRegisterMapType>(
-        kFixedSize + depth * SingleEntrySize() + kDexRegisterMapOffset, offset);
+  ALWAYS_INLINE uint32_t GetDexRegisterMapOffsetAtDepth(const InlineInfoEncoding& encoding,
+                                                        uint32_t depth) const {
+    return encoding.GetDexRegisterMapEncoding().Load(GetRegionAtDepth(encoding, depth));
   }
 
-  bool HasDexRegisterMapAtDepth(DepthType depth) const {
-    return GetDexRegisterMapOffsetAtDepth(depth) != StackMap::kNoDexRegisterMap;
+  ALWAYS_INLINE void SetDexRegisterMapOffsetAtDepth(const InlineInfoEncoding& encoding,
+                                                    uint32_t depth,
+                                                    uint32_t offset) {
+    encoding.GetDexRegisterMapEncoding().Store(GetRegionAtDepth(encoding, depth), offset);
   }
 
-  static size_t SingleEntrySize() {
-    return kFixedEntrySize;
+  ALWAYS_INLINE bool HasDexRegisterMapAtDepth(const InlineInfoEncoding& encoding,
+                                              uint32_t depth) const {
+    return GetDexRegisterMapOffsetAtDepth(encoding, depth) != StackMap::kNoDexRegisterMap;
   }
 
   void Dump(VariableIndentationOutputStream* vios,
-            const CodeInfo& info, uint16_t* number_of_dex_registers) const;
-
+            const CodeInfo& info,
+            uint16_t* number_of_dex_registers) const;
 
  private:
-  static constexpr int kDepthOffset = 0;
-  static constexpr int kFixedSize = ELEMENT_BYTE_OFFSET_AFTER(Depth);
-
-  static constexpr int kMethodIndexOffset = 0;
-  static constexpr int kDexPcOffset = ELEMENT_BYTE_OFFSET_AFTER(MethodIndex);
-  static constexpr int kInvokeTypeOffset = ELEMENT_BYTE_OFFSET_AFTER(DexPc);
-  static constexpr int kDexRegisterMapOffset = ELEMENT_BYTE_OFFSET_AFTER(InvokeType);
-  static constexpr int kFixedEntrySize = ELEMENT_BYTE_OFFSET_AFTER(DexRegisterMap);
+  ALWAYS_INLINE MemoryRegion GetRegionAtDepth(const InlineInfoEncoding& encoding,
+                                              uint32_t depth) const {
+    size_t entry_size = encoding.GetEntrySize();
+    DCHECK_GT(entry_size, 0u);
+    return region_.Subregion(depth * entry_size, entry_size);
+  }
 
   MemoryRegion region_;
+};
 
-  friend class CodeInfo;
-  friend class StackMap;
-  friend class StackMapStream;
+// Most of the fields are encoded as ULEB128 to save space.
+struct CodeInfoEncoding {
+  uint32_t non_header_size;
+  uint32_t number_of_stack_maps;
+  uint32_t stack_map_size_in_bytes;
+  uint32_t number_of_location_catalog_entries;
+  StackMapEncoding stack_map_encoding;
+  InlineInfoEncoding inline_info_encoding;
+  uint8_t header_size;
+
+  CodeInfoEncoding() { }
+
+  explicit CodeInfoEncoding(const void* data) {
+    const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data);
+    non_header_size = DecodeUnsignedLeb128(&ptr);
+    number_of_stack_maps = DecodeUnsignedLeb128(&ptr);
+    stack_map_size_in_bytes = DecodeUnsignedLeb128(&ptr);
+    number_of_location_catalog_entries = DecodeUnsignedLeb128(&ptr);
+    static_assert(alignof(StackMapEncoding) == 1,
+                  "StackMapEncoding should not require alignment");
+    stack_map_encoding = *reinterpret_cast<const StackMapEncoding*>(ptr);
+    ptr += sizeof(StackMapEncoding);
+    if (stack_map_encoding.GetInlineInfoEncoding().BitSize() > 0) {
+      static_assert(alignof(InlineInfoEncoding) == 1,
+                    "InlineInfoEncoding should not require alignment");
+      inline_info_encoding = *reinterpret_cast<const InlineInfoEncoding*>(ptr);
+      ptr += sizeof(InlineInfoEncoding);
+    } else {
+      inline_info_encoding = InlineInfoEncoding{};  // NOLINT.
+    }
+    header_size = dchecked_integral_cast<uint8_t>(ptr - reinterpret_cast<const uint8_t*>(data));
+  }
+
+  template<typename Vector>
+  void Compress(Vector* dest) const {
+    EncodeUnsignedLeb128(dest, non_header_size);
+    EncodeUnsignedLeb128(dest, number_of_stack_maps);
+    EncodeUnsignedLeb128(dest, stack_map_size_in_bytes);
+    EncodeUnsignedLeb128(dest, number_of_location_catalog_entries);
+    const uint8_t* stack_map_ptr = reinterpret_cast<const uint8_t*>(&stack_map_encoding);
+    dest->insert(dest->end(), stack_map_ptr, stack_map_ptr + sizeof(StackMapEncoding));
+    if (stack_map_encoding.GetInlineInfoEncoding().BitSize() > 0) {
+      const uint8_t* inline_info_ptr = reinterpret_cast<const uint8_t*>(&inline_info_encoding);
+      dest->insert(dest->end(), inline_info_ptr, inline_info_ptr + sizeof(InlineInfoEncoding));
+    }
+  }
 };
 
 /**
  * Wrapper around all compiler information collected for a method.
  * The information is of the form:
  *
- *   [overall_size, encoding_info, number_of_location_catalog_entries, number_of_stack_maps,
- *   stack_mask_size, DexRegisterLocationCatalog+, StackMap+, DexRegisterMap+, InlineInfo*]
+ *   [CodeInfoEncoding, StackMap+, DexRegisterLocationCatalog+, DexRegisterMap+, InlineInfo*]
  *
- * where `encoding_info` is of the form:
+ * where CodeInfoEncoding is of the form:
  *
- *  [has_inline_info, inline_info_size_in_bytes, dex_register_map_size_in_bytes,
- *  dex_pc_size_in_bytes, native_pc_size_in_bytes, register_mask_size_in_bytes].
+ *   [non_header_size, number_of_stack_maps, stack_map_size_in_bytes,
+ *    number_of_location_catalog_entries, StackMapEncoding]
  */
 class CodeInfo {
  public:
-  // Memory layout: fixed contents.
-  typedef uint32_t OverallSizeType;
-  typedef uint16_t EncodingInfoType;
-  typedef uint32_t NumberOfLocationCatalogEntriesType;
-  typedef uint32_t NumberOfStackMapsType;
-  typedef uint32_t StackMaskSizeType;
-
-  // Memory (bit) layout: encoding info.
-  static constexpr int HasInlineInfoBitSize = 1;
-  static constexpr int InlineInfoBitSize = kNumberOfBitForNumberOfBytesForEncoding;
-  static constexpr int DexRegisterMapBitSize = kNumberOfBitForNumberOfBytesForEncoding;
-  static constexpr int DexPcBitSize = kNumberOfBitForNumberOfBytesForEncoding;
-  static constexpr int NativePcBitSize = kNumberOfBitForNumberOfBytesForEncoding;
-  static constexpr int RegisterMaskBitSize = kNumberOfBitForNumberOfBytesForEncoding;
-
-  explicit CodeInfo(MemoryRegion region) : region_(region) {}
+  explicit CodeInfo(MemoryRegion region) : region_(region) {
+  }
 
   explicit CodeInfo(const void* data) {
-    uint32_t size = reinterpret_cast<const uint32_t*>(data)[0];
-    region_ = MemoryRegion(const_cast<void*>(data), size);
+    CodeInfoEncoding encoding = CodeInfoEncoding(data);
+    region_ = MemoryRegion(const_cast<void*>(data),
+                           encoding.header_size + encoding.non_header_size);
   }
 
-  StackMapEncoding ExtractEncoding() const {
-    return StackMapEncoding(region_.LoadUnaligned<uint32_t>(kStackMaskSizeOffset),
-                            GetNumberOfBytesForEncoding(kInlineInfoBitOffset),
-                            GetNumberOfBytesForEncoding(kDexRegisterMapBitOffset),
-                            GetNumberOfBytesForEncoding(kDexPcBitOffset),
-                            GetNumberOfBytesForEncoding(kNativePcBitOffset),
-                            GetNumberOfBytesForEncoding(kRegisterMaskBitOffset));
+  CodeInfoEncoding ExtractEncoding() const {
+    return CodeInfoEncoding(region_.start());
   }
 
-  void SetEncoding(const StackMapEncoding& encoding) {
-    region_.StoreUnaligned<uint32_t>(kStackMaskSizeOffset, encoding.NumberOfBytesForStackMask());
-    region_.StoreBit(kHasInlineInfoBitOffset, encoding.NumberOfBytesForInlineInfo() != 0);
-    SetEncodingAt(kInlineInfoBitOffset, encoding.NumberOfBytesForInlineInfo());
-    SetEncodingAt(kDexRegisterMapBitOffset, encoding.NumberOfBytesForDexRegisterMap());
-    SetEncodingAt(kDexPcBitOffset, encoding.NumberOfBytesForDexPc());
-    SetEncodingAt(kNativePcBitOffset, encoding.NumberOfBytesForNativePc());
-    SetEncodingAt(kRegisterMaskBitOffset, encoding.NumberOfBytesForRegisterMask());
+  bool HasInlineInfo(const CodeInfoEncoding& encoding) const {
+    return encoding.stack_map_encoding.GetInlineInfoEncoding().BitSize() > 0;
   }
 
-  void SetEncodingAt(size_t bit_offset, size_t number_of_bytes) {
-    region_.StoreBits(bit_offset, number_of_bytes, kNumberOfBitForNumberOfBytesForEncoding);
-  }
-
-  size_t GetNumberOfBytesForEncoding(size_t bit_offset) const {
-    return region_.LoadBits(bit_offset, kNumberOfBitForNumberOfBytesForEncoding);
-  }
-
-  bool HasInlineInfo() const {
-    return region_.LoadBit(kHasInlineInfoBitOffset);
-  }
-
-  DexRegisterLocationCatalog GetDexRegisterLocationCatalog(const StackMapEncoding& encoding) const {
+  DexRegisterLocationCatalog GetDexRegisterLocationCatalog(const CodeInfoEncoding& encoding) const {
     return DexRegisterLocationCatalog(region_.Subregion(
         GetDexRegisterLocationCatalogOffset(encoding),
         GetDexRegisterLocationCatalogSize(encoding)));
   }
 
-  StackMap GetStackMapAt(size_t i, const StackMapEncoding& encoding) const {
-    size_t stack_map_size = encoding.ComputeStackMapSize();
+  StackMap GetStackMapAt(size_t i, const CodeInfoEncoding& encoding) const {
+    size_t stack_map_size = encoding.stack_map_size_in_bytes;
     return StackMap(GetStackMaps(encoding).Subregion(i * stack_map_size, stack_map_size));
   }
 
-  OverallSizeType GetOverallSize() const {
-    return region_.LoadUnaligned<OverallSizeType>(kOverallSizeOffset);
+  uint32_t GetNumberOfLocationCatalogEntries(const CodeInfoEncoding& encoding) const {
+    return encoding.number_of_location_catalog_entries;
   }
 
-  void SetOverallSize(OverallSizeType size) {
-    region_.StoreUnaligned<OverallSizeType>(kOverallSizeOffset, size);
-  }
-
-  NumberOfLocationCatalogEntriesType GetNumberOfLocationCatalogEntries() const {
-    return region_.LoadUnaligned<NumberOfLocationCatalogEntriesType>(
-        kNumberOfLocationCatalogEntriesOffset);
-  }
-
-  void SetNumberOfLocationCatalogEntries(NumberOfLocationCatalogEntriesType num_entries) {
-    region_.StoreUnaligned<NumberOfLocationCatalogEntriesType>(
-        kNumberOfLocationCatalogEntriesOffset, num_entries);
-  }
-
-  uint32_t GetDexRegisterLocationCatalogSize(const StackMapEncoding& encoding) const {
+  uint32_t GetDexRegisterLocationCatalogSize(const CodeInfoEncoding& encoding) const {
     return ComputeDexRegisterLocationCatalogSize(GetDexRegisterLocationCatalogOffset(encoding),
-                                                 GetNumberOfLocationCatalogEntries());
+                                                 GetNumberOfLocationCatalogEntries(encoding));
   }
 
-  NumberOfStackMapsType GetNumberOfStackMaps() const {
-    return region_.LoadUnaligned<NumberOfStackMapsType>(kNumberOfStackMapsOffset);
-  }
-
-  void SetNumberOfStackMaps(NumberOfStackMapsType number_of_stack_maps) {
-    region_.StoreUnaligned<NumberOfStackMapsType>(kNumberOfStackMapsOffset, number_of_stack_maps);
+  uint32_t GetNumberOfStackMaps(const CodeInfoEncoding& encoding) const {
+    return encoding.number_of_stack_maps;
   }
 
   // Get the size of all the stack maps of this CodeInfo object, in bytes.
-  size_t GetStackMapsSize(const StackMapEncoding& encoding) const {
-    return encoding.ComputeStackMapSize() * GetNumberOfStackMaps();
+  size_t GetStackMapsSize(const CodeInfoEncoding& encoding) const {
+    return encoding.stack_map_size_in_bytes * GetNumberOfStackMaps(encoding);
   }
 
-  uint32_t GetDexRegisterLocationCatalogOffset(const StackMapEncoding& encoding) const {
-    return GetStackMapsOffset() + GetStackMapsSize(encoding);
+  uint32_t GetDexRegisterLocationCatalogOffset(const CodeInfoEncoding& encoding) const {
+    return GetStackMapsOffset(encoding) + GetStackMapsSize(encoding);
   }
 
-  size_t GetDexRegisterMapsOffset(const StackMapEncoding& encoding) const {
+  size_t GetDexRegisterMapsOffset(const CodeInfoEncoding& encoding) const {
     return GetDexRegisterLocationCatalogOffset(encoding)
          + GetDexRegisterLocationCatalogSize(encoding);
   }
 
-  uint32_t GetStackMapsOffset() const {
-    return kFixedSize;
+  uint32_t GetStackMapsOffset(const CodeInfoEncoding& encoding) const {
+    return encoding.header_size;
   }
 
   DexRegisterMap GetDexRegisterMapOf(StackMap stack_map,
-                                     const StackMapEncoding& encoding,
+                                     const CodeInfoEncoding& encoding,
                                      uint32_t number_of_dex_registers) const {
-    DCHECK(stack_map.HasDexRegisterMap(encoding));
-    uint32_t offset = GetDexRegisterMapsOffset(encoding)
-                      + stack_map.GetDexRegisterMapOffset(encoding);
-    size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
-    return DexRegisterMap(region_.Subregion(offset, size));
+    if (!stack_map.HasDexRegisterMap(encoding.stack_map_encoding)) {
+      return DexRegisterMap();
+    } else {
+      uint32_t offset = GetDexRegisterMapsOffset(encoding)
+                        + stack_map.GetDexRegisterMapOffset(encoding.stack_map_encoding);
+      size_t size = ComputeDexRegisterMapSizeOf(encoding, offset, number_of_dex_registers);
+      return DexRegisterMap(region_.Subregion(offset, size));
+    }
   }
 
   // Return the `DexRegisterMap` pointed by `inline_info` at depth `depth`.
   DexRegisterMap GetDexRegisterMapAtDepth(uint8_t depth,
                                           InlineInfo inline_info,
-                                          const StackMapEncoding& encoding,
+                                          const CodeInfoEncoding& encoding,
                                           uint32_t number_of_dex_registers) const {
-    DCHECK(inline_info.HasDexRegisterMapAtDepth(depth));
-    uint32_t offset = GetDexRegisterMapsOffset(encoding)
-                      + inline_info.GetDexRegisterMapOffsetAtDepth(depth);
-    size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
-    return DexRegisterMap(region_.Subregion(offset, size));
+    if (!inline_info.HasDexRegisterMapAtDepth(encoding.inline_info_encoding, depth)) {
+      return DexRegisterMap();
+    } else {
+      uint32_t offset = GetDexRegisterMapsOffset(encoding) +
+          inline_info.GetDexRegisterMapOffsetAtDepth(encoding.inline_info_encoding, depth);
+      size_t size = ComputeDexRegisterMapSizeOf(encoding, offset, number_of_dex_registers);
+      return DexRegisterMap(region_.Subregion(offset, size));
+    }
   }
 
-  InlineInfo GetInlineInfoOf(StackMap stack_map, const StackMapEncoding& encoding) const {
-    DCHECK(stack_map.HasInlineInfo(encoding));
-    uint32_t offset = stack_map.GetInlineDescriptorOffset(encoding)
+  InlineInfo GetInlineInfoOf(StackMap stack_map, const CodeInfoEncoding& encoding) const {
+    DCHECK(stack_map.HasInlineInfo(encoding.stack_map_encoding));
+    uint32_t offset = stack_map.GetInlineDescriptorOffset(encoding.stack_map_encoding)
                       + GetDexRegisterMapsOffset(encoding);
-    uint8_t depth = region_.LoadUnaligned<uint8_t>(offset);
-    return InlineInfo(region_.Subregion(offset,
-        InlineInfo::kFixedSize + depth * InlineInfo::SingleEntrySize()));
+    return InlineInfo(region_.Subregion(offset, region_.size() - offset));
   }
 
-  StackMap GetStackMapForDexPc(uint32_t dex_pc, const StackMapEncoding& encoding) const {
-    for (size_t i = 0, e = GetNumberOfStackMaps(); i < e; ++i) {
+  StackMap GetStackMapForDexPc(uint32_t dex_pc, const CodeInfoEncoding& encoding) const {
+    for (size_t i = 0, e = GetNumberOfStackMaps(encoding); i < e; ++i) {
       StackMap stack_map = GetStackMapAt(i, encoding);
-      if (stack_map.GetDexPc(encoding) == dex_pc) {
+      if (stack_map.GetDexPc(encoding.stack_map_encoding) == dex_pc) {
         return stack_map;
       }
     }
@@ -1176,24 +1189,55 @@
 
   // Searches the stack map list backwards because catch stack maps are stored
   // at the end.
-  StackMap GetCatchStackMapForDexPc(uint32_t dex_pc, const StackMapEncoding& encoding) const {
-    for (size_t i = GetNumberOfStackMaps(); i > 0; --i) {
+  StackMap GetCatchStackMapForDexPc(uint32_t dex_pc, const CodeInfoEncoding& encoding) const {
+    for (size_t i = GetNumberOfStackMaps(encoding); i > 0; --i) {
       StackMap stack_map = GetStackMapAt(i - 1, encoding);
-      if (stack_map.GetDexPc(encoding) == dex_pc) {
+      if (stack_map.GetDexPc(encoding.stack_map_encoding) == dex_pc) {
         return stack_map;
       }
     }
     return StackMap();
   }
 
+  StackMap GetOsrStackMapForDexPc(uint32_t dex_pc, const CodeInfoEncoding& encoding) const {
+    size_t e = GetNumberOfStackMaps(encoding);
+    if (e == 0) {
+      // There cannot be OSR stack map if there is no stack map.
+      return StackMap();
+    }
+    // Walk over all stack maps. If two consecutive stack maps are identical, then we
+    // have found a stack map suitable for OSR.
+    const StackMapEncoding& stack_map_encoding = encoding.stack_map_encoding;
+    for (size_t i = 0; i < e - 1; ++i) {
+      StackMap stack_map = GetStackMapAt(i, encoding);
+      if (stack_map.GetDexPc(stack_map_encoding) == dex_pc) {
+        StackMap other = GetStackMapAt(i + 1, encoding);
+        if (other.GetDexPc(stack_map_encoding) == dex_pc &&
+            other.GetNativePcOffset(stack_map_encoding) ==
+                stack_map.GetNativePcOffset(stack_map_encoding)) {
+          DCHECK_EQ(other.GetDexRegisterMapOffset(stack_map_encoding),
+                    stack_map.GetDexRegisterMapOffset(stack_map_encoding));
+          DCHECK(!stack_map.HasInlineInfo(stack_map_encoding));
+          if (i < e - 2) {
+            // Make sure there are not three identical stack maps following each other.
+            DCHECK_NE(stack_map.GetNativePcOffset(stack_map_encoding),
+                      GetStackMapAt(i + 2, encoding).GetNativePcOffset(stack_map_encoding));
+          }
+          return stack_map;
+        }
+      }
+    }
+    return StackMap();
+  }
+
   StackMap GetStackMapForNativePcOffset(uint32_t native_pc_offset,
-                                        const StackMapEncoding& encoding) const {
+                                        const CodeInfoEncoding& encoding) const {
     // TODO: Safepoint stack maps are sorted by native_pc_offset but catch stack
     //       maps are not. If we knew that the method does not have try/catch,
     //       we could do binary search.
-    for (size_t i = 0, e = GetNumberOfStackMaps(); i < e; ++i) {
+    for (size_t i = 0, e = GetNumberOfStackMaps(encoding); i < e; ++i) {
       StackMap stack_map = GetStackMapAt(i, encoding);
-      if (stack_map.GetNativePcOffset(encoding) == native_pc_offset) {
+      if (stack_map.GetNativePcOffset(encoding.stack_map_encoding) == native_pc_offset) {
         return stack_map;
       }
     }
@@ -1211,38 +1255,16 @@
             bool dump_stack_maps) const;
 
  private:
-  static constexpr int kOverallSizeOffset = 0;
-  static constexpr int kEncodingInfoOffset = ELEMENT_BYTE_OFFSET_AFTER(OverallSize);
-  static constexpr int kNumberOfLocationCatalogEntriesOffset =
-      ELEMENT_BYTE_OFFSET_AFTER(EncodingInfo);
-  static constexpr int kNumberOfStackMapsOffset =
-      ELEMENT_BYTE_OFFSET_AFTER(NumberOfLocationCatalogEntries);
-  static constexpr int kStackMaskSizeOffset = ELEMENT_BYTE_OFFSET_AFTER(NumberOfStackMaps);
-  static constexpr int kFixedSize = ELEMENT_BYTE_OFFSET_AFTER(StackMaskSize);
-
-  static constexpr int kHasInlineInfoBitOffset = kEncodingInfoOffset * kBitsPerByte;
-  static constexpr int kInlineInfoBitOffset = ELEMENT_BIT_OFFSET_AFTER(HasInlineInfo);
-  static constexpr int kDexRegisterMapBitOffset = ELEMENT_BIT_OFFSET_AFTER(InlineInfo);
-  static constexpr int kDexPcBitOffset = ELEMENT_BIT_OFFSET_AFTER(DexRegisterMap);
-  static constexpr int kNativePcBitOffset = ELEMENT_BIT_OFFSET_AFTER(DexPc);
-  static constexpr int kRegisterMaskBitOffset = ELEMENT_BIT_OFFSET_AFTER(NativePc);
-
-  static constexpr int kEncodingInfoPastTheEndBitOffset = ELEMENT_BIT_OFFSET_AFTER(RegisterMask);
-  static constexpr int kEncodingInfoOverallBitSize =
-      kEncodingInfoPastTheEndBitOffset - kHasInlineInfoBitOffset;
-
-  static_assert(kEncodingInfoOverallBitSize <= (sizeof(EncodingInfoType) * kBitsPerByte),
-                "art::CodeInfo::EncodingInfoType is too short to hold all encoding info elements.");
-
-  MemoryRegion GetStackMaps(const StackMapEncoding& encoding) const {
+  MemoryRegion GetStackMaps(const CodeInfoEncoding& encoding) const {
     return region_.size() == 0
         ? MemoryRegion()
-        : region_.Subregion(GetStackMapsOffset(), GetStackMapsSize(encoding));
+        : region_.Subregion(GetStackMapsOffset(encoding), GetStackMapsSize(encoding));
   }
 
   // Compute the size of the Dex register map associated to the stack map at
   // `dex_register_map_offset_in_code_info`.
-  size_t ComputeDexRegisterMapSizeOf(uint32_t dex_register_map_offset_in_code_info,
+  size_t ComputeDexRegisterMapSizeOf(const CodeInfoEncoding& encoding,
+                                     uint32_t dex_register_map_offset_in_code_info,
                                      uint16_t number_of_dex_registers) const {
     // Offset where the actual mapping data starts within art::DexRegisterMap.
     size_t location_mapping_data_offset_in_dex_register_map =
@@ -1255,7 +1277,7 @@
     size_t number_of_live_dex_registers =
         dex_register_map_without_locations.GetNumberOfLiveDexRegisters(number_of_dex_registers);
     size_t location_mapping_data_size_in_bits =
-        DexRegisterMap::SingleEntrySizeInBits(GetNumberOfLocationCatalogEntries())
+        DexRegisterMap::SingleEntrySizeInBits(GetNumberOfLocationCatalogEntries(encoding))
         * number_of_live_dex_registers;
     size_t location_mapping_data_size_in_bytes =
         RoundUp(location_mapping_data_size_in_bits, kBitsPerByte) / kBitsPerByte;
diff --git a/runtime/stride_iterator.h b/runtime/stride_iterator.h
index a9da51b..ac04c3b 100644
--- a/runtime/stride_iterator.h
+++ b/runtime/stride_iterator.h
@@ -19,6 +19,8 @@
 
 #include <iterator>
 
+#include "base/logging.h"
+
 namespace art {
 
 template<typename T>
diff --git a/runtime/string_reference.h b/runtime/string_reference.h
new file mode 100644
index 0000000..c75c218
--- /dev/null
+++ b/runtime/string_reference.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_STRING_REFERENCE_H_
+#define ART_RUNTIME_STRING_REFERENCE_H_
+
+#include <stdint.h>
+
+#include "base/logging.h"
+#include "dex_file-inl.h"
+#include "utf-inl.h"
+
+namespace art {
+
+// A string is located by its DexFile and the string_ids_ table index into that DexFile.
+struct StringReference {
+  StringReference(const DexFile* file, uint32_t index) : dex_file(file), string_index(index) { }
+
+  const char* GetStringData() const {
+    return dex_file->GetStringData(dex_file->GetStringId(string_index));
+  }
+
+  const DexFile* dex_file;
+  uint32_t string_index;
+};
+
+// Compare only the reference and not the string contents.
+struct StringReferenceComparator {
+  bool operator()(const StringReference& a, const StringReference& b) {
+    if (a.dex_file != b.dex_file) {
+      return a.dex_file < b.dex_file;
+    }
+    return a.string_index < b.string_index;
+  }
+};
+
+// Compare the actual referenced string values. Used for string reference deduplication.
+struct StringReferenceValueComparator {
+  bool operator()(StringReference sr1, StringReference sr2) const {
+    // Note that we want to deduplicate identical strings even if they are referenced
+    // by different dex files, so we need some (any) total ordering of strings, rather
+    // than references. However, the references should usually be from the same dex file,
+    // so we choose the dex file string ordering so that we can simply compare indexes
+    // and avoid the costly string comparison in the most common case.
+    if (sr1.dex_file == sr2.dex_file) {
+      // Use the string order enforced by the dex file verifier.
+      DCHECK_EQ(
+          sr1.string_index < sr2.string_index,
+          CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(sr1.GetStringData(),
+                                                                  sr2.GetStringData()) < 0);
+      return sr1.string_index < sr2.string_index;
+    } else {
+      // Cannot compare indexes, so do the string comparison.
+      return CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(sr1.GetStringData(),
+                                                                     sr2.GetStringData()) < 0;
+    }
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_STRING_REFERENCE_H_
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index f5d20bd..216d8a7 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -19,7 +19,7 @@
 
 #include "thread.h"
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include <bionic_tls.h>  // Access to our own TLS slot.
 #endif
 
@@ -45,7 +45,7 @@
   if (!is_started_) {
     return nullptr;
   } else {
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     void* thread = __get_tls()[TLS_SLOT_ART_THREAD_SELF];
 #else
     void* thread = pthread_getspecific(Thread::pthread_key_self_);
@@ -93,6 +93,18 @@
   return static_cast<ThreadState>(old_state_and_flags.as_struct.state);
 }
 
+inline bool Thread::IsThreadSuspensionAllowable() const {
+  if (tls32_.no_thread_suspension != 0) {
+    return false;
+  }
+  for (int i = kLockLevelCount - 1; i >= 0; --i) {
+    if (i != kMutatorLock && GetHeldMutex(static_cast<LockLevel>(i)) != nullptr) {
+      return false;
+    }
+  }
+  return true;
+}
+
 inline void Thread::AssertThreadSuspensionIsAllowable(bool check_locks) const {
   if (kIsDebugBuild) {
     if (gAborting == 0) {
@@ -201,12 +213,23 @@
                  << " state=" << old_state_and_flags.as_struct.state;
     } else if ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) {
       // Wait while our suspend count is non-zero.
-      MutexLock mu(this, *Locks::thread_suspend_count_lock_);
+
+      // We pass null to the MutexLock as we may be in a situation where the
+      // runtime is shutting down. Guarding ourselves from that situation
+      // requires to take the shutdown lock, which is undesirable here.
+      Thread* thread_to_pass = nullptr;
+      if (kIsDebugBuild && !IsDaemon()) {
+        // We know we can make our debug locking checks on non-daemon threads,
+        // so re-enable them on debug builds.
+        thread_to_pass = this;
+      }
+      MutexLock mu(thread_to_pass, *Locks::thread_suspend_count_lock_);
+      ScopedTransitioningToRunnable scoped_transitioning_to_runnable(this);
       old_state_and_flags.as_int = tls32_.state_and_flags.as_int;
       DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
       while ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) {
         // Re-check when Thread::resume_cond_ is notified.
-        Thread::resume_cond_->Wait(this);
+        Thread::resume_cond_->Wait(thread_to_pass);
         old_state_and_flags.as_int = tls32_.state_and_flags.as_int;
         DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
       }
diff --git a/runtime/thread.cc b/runtime/thread.cc
index b0cf418..79b9f02 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -14,11 +14,8 @@
  * limitations under the License.
  */
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
-
 #include "thread.h"
 
-#include <cutils/trace.h>
 #include <pthread.h>
 #include <signal.h>
 #include <sys/resource.h>
@@ -39,16 +36,17 @@
 #include "base/mutex.h"
 #include "base/timing_logger.h"
 #include "base/to_str.h"
+#include "base/systrace.h"
 #include "class_linker-inl.h"
 #include "debugger.h"
 #include "dex_file-inl.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
-#include "gc_map.h"
 #include "gc/accounting/card_table-inl.h"
+#include "gc/accounting/heap_bitmap-inl.h"
 #include "gc/allocator/rosalloc.h"
 #include "gc/heap.h"
-#include "gc/space/space.h"
+#include "gc/space/space-inl.h"
 #include "handle_scope-inl.h"
 #include "indirect_reference_table-inl.h"
 #include "jni_internal.h"
@@ -57,6 +55,8 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/stack_trace_element.h"
 #include "monitor.h"
+#include "native_stack_dump.h"
+#include "nth_caller_visitor.h"
 #include "oat_quick_method_header.h"
 #include "object_lock.h"
 #include "quick_exception_handler.h"
@@ -71,11 +71,10 @@
 #include "thread_list.h"
 #include "thread-inl.h"
 #include "utils.h"
-#include "verifier/dex_gc_map.h"
 #include "verifier/method_verifier.h"
 #include "verify_object-inl.h"
-#include "vmap_table.h"
 #include "well_known_classes.h"
+#include "interpreter/interpreter.h"
 
 #if ART_USE_FUTEXES
 #include "linux/futex.h"
@@ -87,10 +86,16 @@
 
 namespace art {
 
+extern "C" NO_RETURN void artDeoptimize(Thread* self);
+
 bool Thread::is_started_ = false;
 pthread_key_t Thread::pthread_key_self_;
 ConditionVariable* Thread::resume_cond_ = nullptr;
 const size_t Thread::kStackOverflowImplicitCheckSize = GetStackOverflowReservedBytes(kRuntimeISA);
+bool (*Thread::is_sensitive_thread_hook_)() = nullptr;
+Thread* Thread::jit_sensitive_thread_ = nullptr;
+
+static constexpr bool kVerifyImageObjectsMarked = kIsDebugBuild;
 
 // For implicit overflow checks we reserve an extra piece of memory at the bottom
 // of the stack (lowest memory).  The higher portion of the memory
@@ -164,14 +169,20 @@
 
 class DeoptimizationContextRecord {
  public:
-  DeoptimizationContextRecord(const JValue& ret_val, bool is_reference,
+  DeoptimizationContextRecord(const JValue& ret_val,
+                              bool is_reference,
+                              bool from_code,
                               mirror::Throwable* pending_exception,
                               DeoptimizationContextRecord* link)
-      : ret_val_(ret_val), is_reference_(is_reference), pending_exception_(pending_exception),
+      : ret_val_(ret_val),
+        is_reference_(is_reference),
+        from_code_(from_code),
+        pending_exception_(pending_exception),
         link_(link) {}
 
   JValue GetReturnValue() const { return ret_val_; }
   bool IsReference() const { return is_reference_; }
+  bool GetFromCode() const { return from_code_; }
   mirror::Throwable* GetPendingException() const { return pending_exception_; }
   DeoptimizationContextRecord* GetLink() const { return link_; }
   mirror::Object** GetReturnValueAsGCRoot() {
@@ -189,6 +200,9 @@
   // Indicates whether the returned value is a reference. If so, the GC will visit it.
   const bool is_reference_;
 
+  // Whether the context was created from an explicit deoptimization in the code.
+  const bool from_code_;
+
   // The exception that was pending before deoptimization (or null if there was no pending
   // exception).
   mirror::Throwable* pending_exception_;
@@ -220,22 +234,28 @@
   DISALLOW_COPY_AND_ASSIGN(StackedShadowFrameRecord);
 };
 
-void Thread::PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+void Thread::PushDeoptimizationContext(const JValue& return_value,
+                                       bool is_reference,
+                                       bool from_code,
                                        mirror::Throwable* exception) {
   DeoptimizationContextRecord* record = new DeoptimizationContextRecord(
       return_value,
       is_reference,
+      from_code,
       exception,
       tlsPtr_.deoptimization_context_stack);
   tlsPtr_.deoptimization_context_stack = record;
 }
 
-void Thread::PopDeoptimizationContext(JValue* result, mirror::Throwable** exception) {
+void Thread::PopDeoptimizationContext(JValue* result,
+                                      mirror::Throwable** exception,
+                                      bool* from_code) {
   AssertHasDeoptimizationContext();
   DeoptimizationContextRecord* record = tlsPtr_.deoptimization_context_stack;
   tlsPtr_.deoptimization_context_stack = record->GetLink();
   result->SetJ(record->GetReturnValue().GetJ());
   *exception = record->GetPendingException();
+  *from_code = record->GetFromCode();
   delete record;
 }
 
@@ -254,7 +274,6 @@
   StackedShadowFrameRecord* record = tlsPtr_.stacked_shadow_frame_record;
   if (must_be_present) {
     DCHECK(record != nullptr);
-    DCHECK_EQ(record->GetType(), type);
   } else {
     if (record == nullptr || record->GetType() != type) {
       return nullptr;
@@ -497,51 +516,73 @@
   return stack_size;
 }
 
-// Global variable to prevent the compiler optimizing away the page reads for the stack.
-uint8_t dont_optimize_this;
+// Return the nearest page-aligned address below the current stack top.
+NO_INLINE
+static uint8_t* FindStackTop() {
+  return reinterpret_cast<uint8_t*>(
+      AlignDown(__builtin_frame_address(0), kPageSize));
+}
 
 // Install a protected region in the stack.  This is used to trigger a SIGSEGV if a stack
 // overflow is detected.  It is located right below the stack_begin_.
-//
-// There is a little complexity here that deserves a special mention.  On some
-// architectures, the stack created using a VM_GROWSDOWN flag
-// to prevent memory being allocated when it's not needed.  This flag makes the
-// kernel only allocate memory for the stack by growing down in memory.  Because we
-// want to put an mprotected region far away from that at the stack top, we need
-// to make sure the pages for the stack are mapped in before we call mprotect.  We do
-// this by reading every page from the stack bottom (highest address) to the stack top.
-// We then madvise this away.
-
-// AddressSanitizer does not like the part of this functions that reads every stack page.
-// Looks a lot like an out-of-bounds access.
 ATTRIBUTE_NO_SANITIZE_ADDRESS
 void Thread::InstallImplicitProtection() {
   uint8_t* pregion = tlsPtr_.stack_begin - kStackOverflowProtectedSize;
-  uint8_t* stack_himem = tlsPtr_.stack_end;
-  uint8_t* stack_top = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(&stack_himem) &
-      ~(kPageSize - 1));    // Page containing current top of stack.
+  // Page containing current top of stack.
+  uint8_t* stack_top = FindStackTop();
 
-  // First remove the protection on the protected region as will want to read and
-  // write it.  This may fail (on the first attempt when the stack is not mapped)
-  // but we ignore that.
+  // Try to directly protect the stack.
+  VLOG(threads) << "installing stack protected region at " << std::hex <<
+        static_cast<void*>(pregion) << " to " <<
+        static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
+  if (ProtectStack(/* fatal_on_error */ false)) {
+    // Tell the kernel that we won't be needing these pages any more.
+    // NB. madvise will probably write zeroes into the memory (on linux it does).
+    uint32_t unwanted_size = stack_top - pregion - kPageSize;
+    madvise(pregion, unwanted_size, MADV_DONTNEED);
+    return;
+  }
+
+  // There is a little complexity here that deserves a special mention.  On some
+  // architectures, the stack is created using a VM_GROWSDOWN flag
+  // to prevent memory being allocated when it's not needed.  This flag makes the
+  // kernel only allocate memory for the stack by growing down in memory.  Because we
+  // want to put an mprotected region far away from that at the stack top, we need
+  // to make sure the pages for the stack are mapped in before we call mprotect.
+  //
+  // The failed mprotect in UnprotectStack is an indication of a thread with VM_GROWSDOWN
+  // with a non-mapped stack (usually only the main thread).
+  //
+  // We map in the stack by reading every page from the stack bottom (highest address)
+  // to the stack top. (We then madvise this away.) This must be done by reading from the
+  // current stack pointer downwards. Any access more than a page below the current SP
+  // might cause a segv.
+  // TODO: This comment may be out of date. It seems possible to speed this up. As
+  //       this is normally done once in the zygote on startup, ignore for now.
+  //
+  // AddressSanitizer does not like the part of this functions that reads every stack page.
+  // Looks a lot like an out-of-bounds access.
+
+  // (Defensively) first remove the protection on the protected region as will want to read
+  // and write it. Ignore errors.
   UnprotectStack();
 
-  // Map in the stack.  This must be done by reading from the
-  // current stack pointer downwards as the stack may be mapped using VM_GROWSDOWN
-  // in the kernel.  Any access more than a page below the current SP might cause
-  // a segv.
+  VLOG(threads) << "Need to map in stack for thread at " << std::hex <<
+      static_cast<void*>(pregion);
 
   // Read every page from the high address to the low.
+  volatile uint8_t dont_optimize_this;
+  UNUSED(dont_optimize_this);
   for (uint8_t* p = stack_top; p >= pregion; p -= kPageSize) {
     dont_optimize_this = *p;
   }
 
-  VLOG(threads) << "installing stack protected region at " << std::hex <<
+  VLOG(threads) << "(again) installing stack protected region at " << std::hex <<
       static_cast<void*>(pregion) << " to " <<
       static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
 
   // Protect the bottom of the stack to prevent read/write to it.
-  ProtectStack();
+  ProtectStack(/* fatal_on_error */ true);
 
   // Tell the kernel that we won't be needing these pages any more.
   // NB. madvise will probably write zeroes into the memory (on linux it does).
@@ -671,8 +712,9 @@
   RemoveSuspendTrigger();
   InitCardTable();
   InitTid();
+  interpreter::InitInterpreterTls(this);
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   __get_tls()[TLS_SLOT_ART_THREAD_SELF] = this;
 #else
   CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, this), "attach self");
@@ -707,7 +749,7 @@
   {
     MutexLock mu(nullptr, *Locks::runtime_shutdown_lock_);
     if (runtime->IsShuttingDownLocked()) {
-      LOG(ERROR) << "Thread attaching while runtime is shutting down: " << thread_name;
+      LOG(WARNING) << "Thread attaching while runtime is shutting down: " << thread_name;
       return nullptr;
     } else {
       Runtime::Current()->StartThreadBirth();
@@ -883,10 +925,22 @@
 
   Runtime* runtime = Runtime::Current();
   bool implicit_stack_check = !runtime->ExplicitStackOverflowChecks() && !runtime->IsAotCompiler();
+
+  // Valgrind on arm doesn't give the right values here. Do not install the guard page, and
+  // effectively disable stack overflow checks (we'll get segfaults, potentially) by setting
+  // stack_begin to 0.
+  const bool valgrind_on_arm =
+      (kRuntimeISA == kArm || kRuntimeISA == kArm64) &&
+      kMemoryToolIsValgrind &&
+      RUNNING_ON_MEMORY_TOOL != 0;
+  if (valgrind_on_arm) {
+    tlsPtr_.stack_begin = nullptr;
+  }
+
   ResetDefaultStackEnd();
 
   // Install the protected region if we are doing implicit overflow checks.
-  if (implicit_stack_check) {
+  if (implicit_stack_check && !valgrind_on_arm) {
     // The thread might have protected region at the bottom.  We need
     // to install our own region so we need to move the limits
     // of the stack to make room for it.
@@ -899,8 +953,7 @@
   }
 
   // Sanity check.
-  int stack_variable;
-  CHECK_GT(&stack_variable, reinterpret_cast<void*>(tlsPtr_.stack_end));
+  CHECK_GT(FindStackTop(), reinterpret_cast<void*>(tlsPtr_.stack_end));
 
   return true;
 }
@@ -919,9 +972,9 @@
      << "]";
 }
 
-void Thread::Dump(std::ostream& os, BacktraceMap* backtrace_map) const {
+void Thread::Dump(std::ostream& os, bool dump_native_stack, BacktraceMap* backtrace_map) const {
   DumpState(os);
-  DumpStack(os, backtrace_map);
+  DumpStack(os, dump_native_stack, backtrace_map);
 }
 
 mirror::String* Thread::GetThreadName(const ScopedObjectAccessAlreadyRunnable& soa) const {
@@ -1082,33 +1135,36 @@
 }
 
 void Thread::RunCheckpointFunction() {
-  Closure *checkpoints[kMaxCheckpoints];
-
-  // Grab the suspend_count lock and copy the current set of
-  // checkpoints.  Then clear the list and the flag.  The RequestCheckpoint
-  // function will also grab this lock so we prevent a race between setting
-  // the kCheckpointRequest flag and clearing it.
-  {
-    MutexLock mu(this, *Locks::thread_suspend_count_lock_);
-    for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
-      checkpoints[i] = tlsPtr_.checkpoint_functions[i];
-      tlsPtr_.checkpoint_functions[i] = nullptr;
+  bool done = false;
+  do {
+    // Grab the suspend_count lock and copy the checkpoints one by one. When the last checkpoint is
+    // copied, clear the list and the flag. The RequestCheckpoint function will also grab this lock
+    // to prevent a race between setting the kCheckpointRequest flag and clearing it.
+    Closure* checkpoint = nullptr;
+    {
+      MutexLock mu(this, *Locks::thread_suspend_count_lock_);
+      if (tlsPtr_.checkpoint_function != nullptr) {
+        checkpoint = tlsPtr_.checkpoint_function;
+        if (!checkpoint_overflow_.empty()) {
+          // Overflow list not empty, copy the first one out and continue.
+          tlsPtr_.checkpoint_function = checkpoint_overflow_.front();
+          checkpoint_overflow_.pop_front();
+        } else {
+          // No overflow checkpoints, this means that we are on the last pending checkpoint.
+          tlsPtr_.checkpoint_function = nullptr;
+          AtomicClearFlag(kCheckpointRequest);
+          done = true;
+        }
+      } else {
+        LOG(FATAL) << "Checkpoint flag set without pending checkpoint";
+      }
     }
-    AtomicClearFlag(kCheckpointRequest);
-  }
 
-  // Outside the lock, run all the checkpoint functions that
-  // we collected.
-  bool found_checkpoint = false;
-  for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
-    if (checkpoints[i] != nullptr) {
-      ATRACE_BEGIN("Checkpoint function");
-      checkpoints[i]->Run(this);
-      ATRACE_END();
-      found_checkpoint = true;
-    }
-  }
-  CHECK(found_checkpoint);
+    // Outside the lock, run the checkpoint functions that we collected.
+    ScopedTrace trace("Run checkpoint function");
+    DCHECK(checkpoint != nullptr);
+    checkpoint->Run(this);
+  } while (!done);
 }
 
 bool Thread::RequestCheckpoint(Closure* function) {
@@ -1118,20 +1174,6 @@
     return false;  // Fail, thread is suspended and so can't run a checkpoint.
   }
 
-  uint32_t available_checkpoint = kMaxCheckpoints;
-  for (uint32_t i = 0 ; i < kMaxCheckpoints; ++i) {
-    if (tlsPtr_.checkpoint_functions[i] == nullptr) {
-      available_checkpoint = i;
-      break;
-    }
-  }
-  if (available_checkpoint == kMaxCheckpoints) {
-    // No checkpoint functions available, we can't run a checkpoint
-    return false;
-  }
-  tlsPtr_.checkpoint_functions[available_checkpoint] = function;
-
-  // Checkpoint function installed now install flag bit.
   // We must be runnable to request a checkpoint.
   DCHECK_EQ(old_state_and_flags.as_struct.state, kRunnable);
   union StateAndFlags new_state_and_flags;
@@ -1139,11 +1181,13 @@
   new_state_and_flags.as_struct.flags |= kCheckpointRequest;
   bool success = tls32_.state_and_flags.as_atomic_int.CompareExchangeStrongSequentiallyConsistent(
       old_state_and_flags.as_int, new_state_and_flags.as_int);
-  if (UNLIKELY(!success)) {
-    // The thread changed state before the checkpoint was installed.
-    CHECK_EQ(tlsPtr_.checkpoint_functions[available_checkpoint], function);
-    tlsPtr_.checkpoint_functions[available_checkpoint] = nullptr;
-  } else {
+  if (success) {
+    // Succeeded setting checkpoint flag, now insert the actual checkpoint.
+    if (tlsPtr_.checkpoint_function == nullptr) {
+      tlsPtr_.checkpoint_function = function;
+    } else {
+      checkpoint_overflow_.push_back(function);
+    }
     CHECK_EQ(ReadFlag(kCheckpointRequest), true);
     TriggerSuspend();
   }
@@ -1170,14 +1214,11 @@
 }
 
 void Thread::FullSuspendCheck() {
+  ScopedTrace trace(__FUNCTION__);
   VLOG(threads) << this << " self-suspending";
-  ATRACE_BEGIN("Full suspend check");
   // Make thread appear suspended to other threads, release mutator_lock_.
-  tls32_.suspended_at_suspend_check = true;
   // Transition to suspended and back to runnable, re-acquire share on mutator_lock_.
   ScopedThreadSuspension(this, kSuspended);
-  tls32_.suspended_at_suspend_check = false;
-  ATRACE_END();
   VLOG(threads) << this << " self-reviving";
 }
 
@@ -1341,7 +1382,7 @@
     if (m->IsRuntimeMethod()) {
       return true;
     }
-    m = m->GetInterfaceMethodIfProxy(sizeof(void*));
+    m = m->GetInterfaceMethodIfProxy(kRuntimePointerSize);
     const int kMaxRepetition = 3;
     mirror::Class* c = m->GetDeclaringClass();
     mirror::DexCache* dex_cache = c->GetDexCache();
@@ -1390,6 +1431,12 @@
     if (o == nullptr) {
       os << "an unknown object";
     } else {
+      if (kUseReadBarrier && Thread::Current()->GetIsGcMarking()) {
+        // We may call Thread::Dump() in the middle of the CC thread flip and this thread's stack
+        // may have not been flipped yet and "o" may be a from-space (stale) ref, in which case the
+        // IdentityHashCode call below will crash. So explicitly mark/forward it here.
+        o = ReadBarrier::Mark(o);
+      }
       if ((o->GetLockWord(false).GetState() == LockWord::kThinLocked) &&
           Locks::mutator_lock_->IsExclusiveHeld(Thread::Current())) {
         // Getting the identity hashcode here would result in lock inflation and suspension of the
@@ -1480,7 +1527,9 @@
   }
 }
 
-void Thread::DumpStack(std::ostream& os, BacktraceMap* backtrace_map) const {
+void Thread::DumpStack(std::ostream& os,
+                       bool dump_native_stack,
+                       BacktraceMap* backtrace_map) const {
   // TODO: we call this code when dying but may not have suspended the thread ourself. The
   //       IsSuspended check is therefore racy with the use for dumping (normally we inhibit
   //       the race with the thread_suspend_count_lock_).
@@ -1493,7 +1542,7 @@
   }
   if (safe_to_dump) {
     // If we're currently in native code, dump that stack before dumping the managed stack.
-    if (dump_for_abort || ShouldShowNativeStack(this)) {
+    if (dump_native_stack && (dump_for_abort || ShouldShowNativeStack(this))) {
       DumpKernelStack(os, GetTid(), "  kernel: ", false);
       ArtMethod* method = GetCurrentMethod(nullptr, !dump_for_abort);
       DumpNativeStack(os, GetTid(), backtrace_map, "  native: ", method);
@@ -1510,7 +1559,7 @@
     LOG(WARNING) << "Native thread exiting without having called DetachCurrentThread (maybe it's "
         "going to use a pthread_key_create destructor?): " << *self;
     CHECK(is_started_);
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     __get_tls()[TLS_SLOT_ART_THREAD_SELF] = self;
 #else
     CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, self), "reattach self");
@@ -1582,17 +1631,15 @@
   tls32_.state_and_flags.as_struct.state = kNative;
   memset(&tlsPtr_.held_mutexes[0], 0, sizeof(tlsPtr_.held_mutexes));
   std::fill(tlsPtr_.rosalloc_runs,
-            tlsPtr_.rosalloc_runs + kNumRosAllocThreadLocalSizeBrackets,
+            tlsPtr_.rosalloc_runs + kNumRosAllocThreadLocalSizeBracketsInThread,
             gc::allocator::RosAlloc::GetDedicatedFullRun());
-  for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
-    tlsPtr_.checkpoint_functions[i] = nullptr;
-  }
+  tlsPtr_.checkpoint_function = nullptr;
   for (uint32_t i = 0; i < kMaxSuspendBarriers; ++i) {
     tlsPtr_.active_suspend_barriers[i] = nullptr;
   }
   tlsPtr_.flip_function = nullptr;
   tlsPtr_.thread_local_mark_stack = nullptr;
-  tls32_.suspended_at_suspend_check = false;
+  tls32_.is_transitioning_to_runnable = false;
 }
 
 bool Thread::IsStillStarting() const {
@@ -1727,11 +1774,10 @@
   }
   CHECK_NE(GetState(), kRunnable);
   CHECK_NE(ReadFlag(kCheckpointRequest), true);
-  CHECK(tlsPtr_.checkpoint_functions[0] == nullptr);
-  CHECK(tlsPtr_.checkpoint_functions[1] == nullptr);
-  CHECK(tlsPtr_.checkpoint_functions[2] == nullptr);
+  CHECK(tlsPtr_.checkpoint_function == nullptr);
+  CHECK_EQ(checkpoint_overflow_.size(), 0u);
   CHECK(tlsPtr_.flip_function == nullptr);
-  CHECK_EQ(tls32_.suspended_at_suspend_check, false);
+  CHECK_EQ(tls32_.is_transitioning_to_runnable, false);
 
   // Make sure we processed all deoptimization requests.
   CHECK(tlsPtr_.deoptimization_context_stack == nullptr) << "Missed deoptimization";
@@ -1776,22 +1822,12 @@
   ScopedLocalRef<jthrowable> exception(tlsPtr_.jni_env, tlsPtr_.jni_env->ExceptionOccurred());
   tlsPtr_.jni_env->ExceptionClear();
 
-  // If the thread has its own handler, use that.
-  ScopedLocalRef<jobject> handler(tlsPtr_.jni_env,
-                                  tlsPtr_.jni_env->GetObjectField(peer.get(),
-                                      WellKnownClasses::java_lang_Thread_uncaughtHandler));
-  if (handler.get() == nullptr) {
-    // Otherwise use the thread group's default handler.
-    handler.reset(tlsPtr_.jni_env->GetObjectField(peer.get(),
-                                                  WellKnownClasses::java_lang_Thread_group));
-  }
+  // Call the Thread instance's dispatchUncaughtException(Throwable)
+  tlsPtr_.jni_env->CallVoidMethod(peer.get(),
+      WellKnownClasses::java_lang_Thread_dispatchUncaughtException,
+      exception.get());
 
-  // Call the handler.
-  tlsPtr_.jni_env->CallVoidMethod(handler.get(),
-      WellKnownClasses::java_lang_Thread__UncaughtExceptionHandler_uncaughtException,
-      peer.get(), exception.get());
-
-  // If the handler threw, clear that exception too.
+  // If the dispatchUncaughtException threw, clear that exception too.
   tlsPtr_.jni_env->ExceptionClear();
 }
 
@@ -1886,6 +1922,14 @@
   return result;
 }
 
+bool Thread::IsJWeakCleared(jweak obj) const {
+  CHECK(obj != nullptr);
+  IndirectRef ref = reinterpret_cast<IndirectRef>(obj);
+  IndirectRefKind kind = GetIndirectRefKind(ref);
+  CHECK_EQ(kind, kWeakGlobal);
+  return tlsPtr_.jni_env->vm->IsWeakGlobalCleared(const_cast<Thread*>(this), ref);
+}
+
 // Implements java.lang.Thread.interrupted.
 bool Thread::Interrupted() {
   MutexLock mu(Thread::Current(), *wait_mutex_);
@@ -2062,7 +2106,7 @@
   // the i'th frame.
   mirror::ObjectArray<mirror::Object>* trace_;
   // For cross compilation.
-  const size_t pointer_size_;
+  const PointerSize pointer_size_;
 
   DISALLOW_COPY_AND_ASSIGN(BuildInternalStackTraceVisitor);
 };
@@ -2149,9 +2193,9 @@
     mirror::PointerArray* const method_trace =
         down_cast<mirror::PointerArray*>(decoded_traces->Get(0));
     // Prepare parameters for StackTraceElement(String cls, String method, String file, int line)
-    ArtMethod* method = method_trace->GetElementPtrSize<ArtMethod*>(i, sizeof(void*));
+    ArtMethod* method = method_trace->GetElementPtrSize<ArtMethod*>(i, kRuntimePointerSize);
     uint32_t dex_pc = method_trace->GetElementPtrSize<uint32_t>(
-        i + method_trace->GetLength() / 2, sizeof(void*));
+        i + method_trace->GetLength() / 2, kRuntimePointerSize);
     int32_t line_number;
     StackHandleScope<3> hs(soa.Self());
     auto class_name_object(hs.NewHandle<mirror::String>(nullptr));
@@ -2182,7 +2226,7 @@
         }
       }
     }
-    const char* method_name = method->GetInterfaceMethodIfProxy(sizeof(void*))->GetName();
+    const char* method_name = method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetName();
     CHECK(method_name != nullptr);
     Handle<mirror::String> method_name_object(
         hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), method_name)));
@@ -2352,21 +2396,23 @@
   std::string str(ss.str());
   // log to stderr for debugging command line processes
   std::cerr << str;
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   // log to logcat for debugging frameworks processes
   LOG(INFO) << str;
 #endif
 }
 
 // Explicitly instantiate 32 and 64bit thread offset dumping support.
-template void Thread::DumpThreadOffset<4>(std::ostream& os, uint32_t offset);
-template void Thread::DumpThreadOffset<8>(std::ostream& os, uint32_t offset);
+template
+void Thread::DumpThreadOffset<PointerSize::k32>(std::ostream& os, uint32_t offset);
+template
+void Thread::DumpThreadOffset<PointerSize::k64>(std::ostream& os, uint32_t offset);
 
-template<size_t ptr_size>
+template<PointerSize ptr_size>
 void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) {
 #define DO_THREAD_OFFSET(x, y) \
-    if (offset == x.Uint32Value()) { \
-      os << y; \
+    if (offset == (x).Uint32Value()) { \
+      os << (y); \
       return; \
     }
   DO_THREAD_OFFSET(ThreadFlagsOffset<ptr_size>(), "state_and_flags")
@@ -2455,6 +2501,23 @@
   QUICK_ENTRY_POINT_INFO(pCmpgFloat)
   QUICK_ENTRY_POINT_INFO(pCmplDouble)
   QUICK_ENTRY_POINT_INFO(pCmplFloat)
+  QUICK_ENTRY_POINT_INFO(pCos)
+  QUICK_ENTRY_POINT_INFO(pSin)
+  QUICK_ENTRY_POINT_INFO(pAcos)
+  QUICK_ENTRY_POINT_INFO(pAsin)
+  QUICK_ENTRY_POINT_INFO(pAtan)
+  QUICK_ENTRY_POINT_INFO(pAtan2)
+  QUICK_ENTRY_POINT_INFO(pCbrt)
+  QUICK_ENTRY_POINT_INFO(pCosh)
+  QUICK_ENTRY_POINT_INFO(pExp)
+  QUICK_ENTRY_POINT_INFO(pExpm1)
+  QUICK_ENTRY_POINT_INFO(pHypot)
+  QUICK_ENTRY_POINT_INFO(pLog)
+  QUICK_ENTRY_POINT_INFO(pLog10)
+  QUICK_ENTRY_POINT_INFO(pNextAfter)
+  QUICK_ENTRY_POINT_INFO(pSinh)
+  QUICK_ENTRY_POINT_INFO(pTan)
+  QUICK_ENTRY_POINT_INFO(pTanh)
   QUICK_ENTRY_POINT_INFO(pFmod)
   QUICK_ENTRY_POINT_INFO(pL2d)
   QUICK_ENTRY_POINT_INFO(pFmodf)
@@ -2508,7 +2571,41 @@
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuffer)
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuilder)
   QUICK_ENTRY_POINT_INFO(pReadBarrierJni)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg00)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg01)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg02)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg03)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg04)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg05)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg06)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg07)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg08)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg09)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg10)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg11)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg12)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg13)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg14)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg15)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg16)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg17)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg18)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg19)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg20)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg21)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg22)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg23)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg24)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg25)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg26)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg27)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg28)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg29)
   QUICK_ENTRY_POINT_INFO(pReadBarrierSlow)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierForRootSlow)
+
+  QUICK_ENTRY_POINT_INFO(pJniMethodFastStart)
+  QUICK_ENTRY_POINT_INFO(pJniMethodFastEnd)
 #undef QUICK_ENTRY_POINT_INFO
 
   os << offset;
@@ -2518,37 +2615,42 @@
   // Get exception from thread.
   mirror::Throwable* exception = GetException();
   CHECK(exception != nullptr);
-  bool is_deoptimization = (exception == GetDeoptimizationException());
-  if (!is_deoptimization) {
-    // This is a real exception: let the instrumentation know about it.
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-    if (instrumentation->HasExceptionCaughtListeners() &&
-        IsExceptionThrownByCurrentMethod(exception)) {
-      // Instrumentation may cause GC so keep the exception object safe.
-      StackHandleScope<1> hs(this);
-      HandleWrapper<mirror::Throwable> h_exception(hs.NewHandleWrapper(&exception));
-      instrumentation->ExceptionCaughtEvent(this, exception);
-    }
-    // Does instrumentation need to deoptimize the stack?
-    // Note: we do this *after* reporting the exception to instrumentation in case it
-    // now requires deoptimization. It may happen if a debugger is attached and requests
-    // new events (single-step, breakpoint, ...) when the exception is reported.
-    is_deoptimization = Dbg::IsForcedInterpreterNeededForException(this);
-    if (is_deoptimization) {
+  if (exception == GetDeoptimizationException()) {
+    artDeoptimize(this);
+    UNREACHABLE();
+  }
+
+  // This is a real exception: let the instrumentation know about it.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (instrumentation->HasExceptionCaughtListeners() &&
+      IsExceptionThrownByCurrentMethod(exception)) {
+    // Instrumentation may cause GC so keep the exception object safe.
+    StackHandleScope<1> hs(this);
+    HandleWrapper<mirror::Throwable> h_exception(hs.NewHandleWrapper(&exception));
+    instrumentation->ExceptionCaughtEvent(this, exception);
+  }
+  // Does instrumentation need to deoptimize the stack?
+  // Note: we do this *after* reporting the exception to instrumentation in case it
+  // now requires deoptimization. It may happen if a debugger is attached and requests
+  // new events (single-step, breakpoint, ...) when the exception is reported.
+  if (Dbg::IsForcedInterpreterNeededForException(this)) {
+    NthCallerVisitor visitor(this, 0, false);
+    visitor.WalkStack();
+    if (Runtime::Current()->IsDeoptimizeable(visitor.caller_pc)) {
       // Save the exception into the deoptimization context so it can be restored
       // before entering the interpreter.
-      PushDeoptimizationContext(JValue(), false, exception);
+      PushDeoptimizationContext(
+          JValue(), /*is_reference */ false, /* from_code */ false, exception);
+      artDeoptimize(this);
+      UNREACHABLE();
     }
   }
+
   // Don't leave exception visible while we try to find the handler, which may cause class
   // resolution.
   ClearException();
-  QuickExceptionHandler exception_handler(this, is_deoptimization);
-  if (is_deoptimization) {
-    exception_handler.DeoptimizeStack();
-  } else {
-    exception_handler.FindCatch(exception);
-  }
+  QuickExceptionHandler exception_handler(this, false);
+  exception_handler.FindCatch(exception);
   exception_handler.UpdateInstrumentationStack();
   exception_handler.DoLongJump();
 }
@@ -2657,11 +2759,37 @@
 
  private:
   // Visiting the declaring class is necessary so that we don't unload the class of a method that
-  // is executing. We need to ensure that the code stays mapped.
-  void VisitDeclaringClass(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
-    mirror::Class* klass = method->GetDeclaringClassNoBarrier();
+  // is executing. We need to ensure that the code stays mapped. NO_THREAD_SAFETY_ANALYSIS since
+  // the threads do not all hold the heap bitmap lock for parallel GC.
+  void VisitDeclaringClass(ArtMethod* method)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      NO_THREAD_SAFETY_ANALYSIS {
+    mirror::Class* klass = method->GetDeclaringClassUnchecked<kWithoutReadBarrier>();
     // klass can be null for runtime methods.
     if (klass != nullptr) {
+      if (kVerifyImageObjectsMarked) {
+        gc::Heap* const heap = Runtime::Current()->GetHeap();
+        gc::space::ContinuousSpace* space = heap->FindContinuousSpaceFromObject(klass,
+                                                                                /*fail_ok*/true);
+        if (space != nullptr && space->IsImageSpace()) {
+          bool failed = false;
+          if (!space->GetLiveBitmap()->Test(klass)) {
+            failed = true;
+            LOG(INTERNAL_FATAL) << "Unmarked object in image " << *space;
+          } else if (!heap->GetLiveBitmap()->Test(klass)) {
+            failed = true;
+            LOG(INTERNAL_FATAL) << "Unmarked object in image through live bitmap " << *space;
+          }
+          if (failed) {
+            GetThread()->Dump(LOG(INTERNAL_FATAL));
+            space->AsImageSpace()->DumpSections(LOG(INTERNAL_FATAL));
+            LOG(INTERNAL_FATAL) << "Method@" << method->GetDexMethodIndex() << ":" << method
+                                << " klass@" << klass;
+            // Pretty info last in case it crashes.
+            LOG(FATAL) << "Method " << PrettyMethod(method) << " klass " << PrettyClass(klass);
+          }
+        }
+      }
       mirror::Object* new_ref = klass;
       visitor_(&new_ref, -1, this);
       if (new_ref != klass) {
@@ -2677,85 +2805,38 @@
     VisitDeclaringClass(m);
 
     // Process register map (which native and runtime methods don't have)
-    if (!m->IsNative() && !m->IsRuntimeMethod() && !m->IsProxyMethod()) {
+    if (!m->IsNative() && !m->IsRuntimeMethod() && (!m->IsProxyMethod() || m->IsConstructor())) {
       const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
-      if (method_header->IsOptimized()) {
-        auto* vreg_base = reinterpret_cast<StackReference<mirror::Object>*>(
-            reinterpret_cast<uintptr_t>(cur_quick_frame));
-        uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc());
-        CodeInfo code_info = method_header->GetOptimizedCodeInfo();
-        StackMapEncoding encoding = code_info.ExtractEncoding();
-        StackMap map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
-        DCHECK(map.IsValid());
-        MemoryRegion mask = map.GetStackMask(encoding);
-        // Visit stack entries that hold pointers.
-        for (size_t i = 0; i < mask.size_in_bits(); ++i) {
-          if (mask.LoadBit(i)) {
-            auto* ref_addr = vreg_base + i;
-            mirror::Object* ref = ref_addr->AsMirrorPtr();
-            if (ref != nullptr) {
-              mirror::Object* new_ref = ref;
-              visitor_(&new_ref, -1, this);
-              if (ref != new_ref) {
-                ref_addr->Assign(new_ref);
-              }
+      DCHECK(method_header->IsOptimized());
+      auto* vreg_base = reinterpret_cast<StackReference<mirror::Object>*>(
+          reinterpret_cast<uintptr_t>(cur_quick_frame));
+      uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc());
+      CodeInfo code_info = method_header->GetOptimizedCodeInfo();
+      CodeInfoEncoding encoding = code_info.ExtractEncoding();
+      StackMap map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
+      DCHECK(map.IsValid());
+      // Visit stack entries that hold pointers.
+      size_t number_of_bits = map.GetNumberOfStackMaskBits(encoding.stack_map_encoding);
+      for (size_t i = 0; i < number_of_bits; ++i) {
+        if (map.GetStackMaskBit(encoding.stack_map_encoding, i)) {
+          auto* ref_addr = vreg_base + i;
+          mirror::Object* ref = ref_addr->AsMirrorPtr();
+          if (ref != nullptr) {
+            mirror::Object* new_ref = ref;
+            visitor_(&new_ref, -1, this);
+            if (ref != new_ref) {
+              ref_addr->Assign(new_ref);
             }
           }
         }
-        // Visit callee-save registers that hold pointers.
-        uint32_t register_mask = map.GetRegisterMask(encoding);
-        for (size_t i = 0; i < BitSizeOf<uint32_t>(); ++i) {
-          if (register_mask & (1 << i)) {
-            mirror::Object** ref_addr = reinterpret_cast<mirror::Object**>(GetGPRAddress(i));
-            if (*ref_addr != nullptr) {
-              visitor_(ref_addr, -1, this);
-            }
-          }
-        }
-      } else {
-        const uint8_t* native_gc_map = method_header->GetNativeGcMap();
-        CHECK(native_gc_map != nullptr) << PrettyMethod(m);
-        const DexFile::CodeItem* code_item = m->GetCodeItem();
-        // Can't be null or how would we compile its instructions?
-        DCHECK(code_item != nullptr) << PrettyMethod(m);
-        NativePcOffsetToReferenceMap map(native_gc_map);
-        size_t num_regs = map.RegWidth() * 8;
-        if (num_regs > 0) {
-          uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc());
-          const uint8_t* reg_bitmap = map.FindBitMap(native_pc_offset);
-          DCHECK(reg_bitmap != nullptr);
-          const VmapTable vmap_table(method_header->GetVmapTable());
-          QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
-          // For all dex registers in the bitmap
-          DCHECK(cur_quick_frame != nullptr);
-          for (size_t reg = 0; reg < num_regs; ++reg) {
-            // Does this register hold a reference?
-            if (TestBitmap(reg, reg_bitmap)) {
-              uint32_t vmap_offset;
-              if (vmap_table.IsInContext(reg, kReferenceVReg, &vmap_offset)) {
-                int vmap_reg = vmap_table.ComputeRegister(frame_info.CoreSpillMask(), vmap_offset,
-                                                          kReferenceVReg);
-                // This is sound as spilled GPRs will be word sized (ie 32 or 64bit).
-                mirror::Object** ref_addr =
-                    reinterpret_cast<mirror::Object**>(GetGPRAddress(vmap_reg));
-                if (*ref_addr != nullptr) {
-                  visitor_(ref_addr, reg, this);
-                }
-              } else {
-                StackReference<mirror::Object>* ref_addr =
-                    reinterpret_cast<StackReference<mirror::Object>*>(GetVRegAddrFromQuickCode(
-                        cur_quick_frame, code_item, frame_info.CoreSpillMask(),
-                        frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), reg));
-                mirror::Object* ref = ref_addr->AsMirrorPtr();
-                if (ref != nullptr) {
-                  mirror::Object* new_ref = ref;
-                  visitor_(&new_ref, reg, this);
-                  if (ref != new_ref) {
-                    ref_addr->Assign(new_ref);
-                  }
-                }
-              }
-            }
+      }
+      // Visit callee-save registers that hold pointers.
+      uint32_t register_mask = map.GetRegisterMask(encoding.stack_map_encoding);
+      for (size_t i = 0; i < BitSizeOf<uint32_t>(); ++i) {
+        if (register_mask & (1 << i)) {
+          mirror::Object** ref_addr = reinterpret_cast<mirror::Object**>(GetGPRAddress(i));
+          if (*ref_addr != nullptr) {
+            visitor_(ref_addr, -1, this);
           }
         }
       }
@@ -2902,14 +2983,18 @@
   return os;
 }
 
-void Thread::ProtectStack() {
+bool Thread::ProtectStack(bool fatal_on_error) {
   void* pregion = tlsPtr_.stack_begin - kStackOverflowProtectedSize;
   VLOG(threads) << "Protecting stack at " << pregion;
   if (mprotect(pregion, kStackOverflowProtectedSize, PROT_NONE) == -1) {
-    LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. "
-        "Reason: "
-        << strerror(errno) << " size:  " << kStackOverflowProtectedSize;
+    if (fatal_on_error) {
+      LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. "
+          "Reason: "
+          << strerror(errno) << " size:  " << kStackOverflowProtectedSize;
+    }
+    return false;
   }
+  return true;
 }
 
 bool Thread::UnprotectStack() {
@@ -2967,4 +3052,30 @@
   return count;
 }
 
+void Thread::DeoptimizeWithDeoptimizationException(JValue* result) {
+  DCHECK_EQ(GetException(), Thread::GetDeoptimizationException());
+  ClearException();
+  ShadowFrame* shadow_frame =
+      PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
+  mirror::Throwable* pending_exception = nullptr;
+  bool from_code = false;
+  PopDeoptimizationContext(result, &pending_exception, &from_code);
+  SetTopOfStack(nullptr);
+  SetTopOfShadowStack(shadow_frame);
+
+  // Restore the exception that was pending before deoptimization then interpret the
+  // deoptimized frames.
+  if (pending_exception != nullptr) {
+    SetException(pending_exception);
+  }
+  interpreter::EnterInterpreterFromDeoptimize(this, shadow_frame, from_code, result);
+}
+
+void Thread::SetException(mirror::Throwable* new_exception) {
+  CHECK(new_exception != nullptr);
+  // TODO: DCHECK(!IsExceptionPending());
+  tlsPtr_.exception = new_exception;
+  // LOG(ERROR) << new_exception->Dump();
+}
+
 }  // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index 138c143..1c2d4ab 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -28,6 +28,7 @@
 #include "arch/context.h"
 #include "arch/instruction_set.h"
 #include "atomic.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "entrypoints/jni/jni_entrypoints.h"
@@ -110,10 +111,10 @@
 enum class StackedShadowFrameType {
   kShadowFrameUnderConstruction,
   kDeoptimizationShadowFrame,
-  kSingleFrameDeoptimizationShadowFrame
 };
 
-static constexpr size_t kNumRosAllocThreadLocalSizeBrackets = 34;
+// This should match RosAlloc::kNumThreadLocalSizeBrackets.
+static constexpr size_t kNumRosAllocThreadLocalSizeBracketsInThread = 16;
 
 // Thread's stack layout for implicit stack overflow checks:
 //
@@ -179,14 +180,16 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Translates 172 to pAllocArrayFromCode and so on.
-  template<size_t size_of_pointers>
+  template<PointerSize size_of_pointers>
   static void DumpThreadOffset(std::ostream& os, uint32_t offset);
 
   // Dumps a one-line summary of thread state (used for operator<<).
   void ShortDump(std::ostream& os) const;
 
   // Dumps the detailed thread state and the thread stack (used for SIGQUIT).
-  void Dump(std::ostream& os, BacktraceMap* backtrace_map = nullptr) const
+  void Dump(std::ostream& os,
+            bool dump_native_stack = true,
+            BacktraceMap* backtrace_map = nullptr) const
       REQUIRES(!Locks::thread_suspend_count_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -285,6 +288,9 @@
 
   void AssertThreadSuspensionIsAllowable(bool check_locks = true) const;
 
+  // Return true if thread suspension is allowable.
+  bool IsThreadSuspensionAllowable() const;
+
   bool IsDaemon() const {
     return tls32_.daemon;
   }
@@ -309,6 +315,7 @@
    */
   static int GetNativePriority();
 
+  // Guaranteed to be non-zero.
   uint32_t GetThreadId() const {
     return tls32_.thin_lock_thread_id;
   }
@@ -359,12 +366,7 @@
   void AssertNoPendingException() const;
   void AssertNoPendingExceptionForNewException(const char* msg) const;
 
-  void SetException(mirror::Throwable* new_exception)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    CHECK(new_exception != nullptr);
-    // TODO: DCHECK(!IsExceptionPending());
-    tlsPtr_.exception = new_exception;
-  }
+  void SetException(mirror::Throwable* new_exception) SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ClearException() SHARED_REQUIRES(Locks::mutator_lock_) {
     tlsPtr_.exception = nullptr;
@@ -445,6 +447,8 @@
 
   // Convert a jobject into a Object*
   mirror::Object* DecodeJObject(jobject obj) const SHARED_REQUIRES(Locks::mutator_lock_);
+  // Checks if the weak global ref has been cleared by the GC without decoding it.
+  bool IsJWeakCleared(jweak obj) const SHARED_REQUIRES(Locks::mutator_lock_);
 
   mirror::Object* GetMonitorEnterObject() const SHARED_REQUIRES(Locks::mutator_lock_) {
     return tlsPtr_.monitor_enter_object;
@@ -529,118 +533,150 @@
   // Offsets of various members of native Thread class, used by compiled code.
   //
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThinLockIdOffset() {
     return ThreadOffset<pointer_size>(
         OFFSETOF_MEMBER(Thread, tls32_) +
         OFFSETOF_MEMBER(tls_32bit_sized_values, thin_lock_thread_id));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadFlagsOffset() {
     return ThreadOffset<pointer_size>(
         OFFSETOF_MEMBER(Thread, tls32_) +
         OFFSETOF_MEMBER(tls_32bit_sized_values, state_and_flags));
   }
 
+  template<PointerSize pointer_size>
+  static ThreadOffset<pointer_size> IsGcMarkingOffset() {
+    return ThreadOffset<pointer_size>(
+        OFFSETOF_MEMBER(Thread, tls32_) +
+        OFFSETOF_MEMBER(tls_32bit_sized_values, is_gc_marking));
+  }
+
+  // Deoptimize the Java stack.
+  void DeoptimizeWithDeoptimizationException(JValue* result) SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadOffsetFromTlsPtr(size_t tls_ptr_offset) {
     size_t base = OFFSETOF_MEMBER(Thread, tlsPtr_);
     size_t scale;
     size_t shrink;
-    if (pointer_size == sizeof(void*)) {
+    if (pointer_size == kRuntimePointerSize) {
       scale = 1;
       shrink = 1;
-    } else if (pointer_size > sizeof(void*)) {
-      scale = pointer_size / sizeof(void*);
+    } else if (pointer_size > kRuntimePointerSize) {
+      scale = static_cast<size_t>(pointer_size) / static_cast<size_t>(kRuntimePointerSize);
       shrink = 1;
     } else {
-      DCHECK_GT(sizeof(void*), pointer_size);
+      DCHECK_GT(kRuntimePointerSize, pointer_size);
       scale = 1;
-      shrink = sizeof(void*) / pointer_size;
+      shrink = static_cast<size_t>(kRuntimePointerSize) / static_cast<size_t>(pointer_size);
     }
     return ThreadOffset<pointer_size>(base + ((tls_ptr_offset * scale) / shrink));
   }
 
  public:
   static uint32_t QuickEntryPointOffsetWithSize(size_t quick_entrypoint_offset,
-                                                size_t pointer_size) {
-    DCHECK(pointer_size == 4 || pointer_size == 8) << pointer_size;
-    if (pointer_size == 4) {
-      return QuickEntryPointOffset<4>(quick_entrypoint_offset).Uint32Value();
+                                                PointerSize pointer_size) {
+    if (pointer_size == PointerSize::k32) {
+      return QuickEntryPointOffset<PointerSize::k32>(quick_entrypoint_offset).
+          Uint32Value();
     } else {
-      return QuickEntryPointOffset<8>(quick_entrypoint_offset).Uint32Value();
+      return QuickEntryPointOffset<PointerSize::k64>(quick_entrypoint_offset).
+          Uint32Value();
     }
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> QuickEntryPointOffset(size_t quick_entrypoint_offset) {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, quick_entrypoints) + quick_entrypoint_offset);
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> JniEntryPointOffset(size_t jni_entrypoint_offset) {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, jni_entrypoints) + jni_entrypoint_offset);
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> SelfOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, self));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
+  static ThreadOffset<pointer_size> MterpCurrentIBaseOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(
+        OFFSETOF_MEMBER(tls_ptr_sized_values, mterp_current_ibase));
+  }
+
+  template<PointerSize pointer_size>
+  static ThreadOffset<pointer_size> MterpDefaultIBaseOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(
+        OFFSETOF_MEMBER(tls_ptr_sized_values, mterp_default_ibase));
+  }
+
+  template<PointerSize pointer_size>
+  static ThreadOffset<pointer_size> MterpAltIBaseOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(
+        OFFSETOF_MEMBER(tls_ptr_sized_values, mterp_alt_ibase));
+  }
+
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ExceptionOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, exception));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> PeerOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, opeer));
   }
 
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> CardTableOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, card_table));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadSuspendTriggerOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, suspend_trigger));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadLocalPosOffset() {
-    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_pos));
+    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
+                                                                thread_local_pos));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadLocalEndOffset() {
-    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_end));
+    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
+                                                                thread_local_end));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadLocalObjectsOffset() {
-    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_objects));
+    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
+                                                                thread_local_objects));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> RosAllocRunsOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
                                                                 rosalloc_runs));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadLocalAllocStackTopOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
                                                                 thread_local_alloc_stack_top));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadLocalAllocStackEndOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
                                                                 thread_local_alloc_stack_end));
@@ -682,19 +718,19 @@
     return tlsPtr_.stack_end == tlsPtr_.stack_begin;
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> StackEndOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, stack_end));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> JniEnvOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, jni_env));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> TopOfManagedStackOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, managed_stack) +
@@ -721,7 +757,7 @@
     return tlsPtr_.managed_stack.PopShadowFrame();
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> TopShadowFrameOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, managed_stack) +
@@ -763,7 +799,7 @@
     return handle_scope;
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> TopHandleScopeOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
                                                                 top_handle_scope));
@@ -819,6 +855,22 @@
     tls32_.weak_ref_access_enabled = enabled;
   }
 
+  uint32_t GetDisableThreadFlipCount() const {
+    CHECK(kUseReadBarrier);
+    return tls32_.disable_thread_flip_count;
+  }
+
+  void IncrementDisableThreadFlipCount() {
+    CHECK(kUseReadBarrier);
+    ++tls32_.disable_thread_flip_count;
+  }
+
+  void DecrementDisableThreadFlipCount() {
+    CHECK(kUseReadBarrier);
+    DCHECK_GT(tls32_.disable_thread_flip_count, 0U);
+    --tls32_.disable_thread_flip_count;
+  }
+
   // Activates single step control for debugging. The thread takes the
   // ownership of the given SingleStepControl*. It is deleted by a call
   // to DeactivateSingleStepControl or upon thread destruction.
@@ -847,10 +899,14 @@
   // and execute Java code, so there might be nested deoptimizations happening.
   // We need to save the ongoing deoptimization shadow frames and return
   // values on stacks.
-  void PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+  // 'from_code' denotes whether the deoptimization was explicitly made from
+  // compiled code.
+  void PushDeoptimizationContext(const JValue& return_value,
+                                 bool is_reference,
+                                 bool from_code,
                                  mirror::Throwable* exception)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void PopDeoptimizationContext(JValue* result, mirror::Throwable** exception)
+  void PopDeoptimizationContext(JValue* result, mirror::Throwable** exception, bool* from_code)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void AssertHasDeoptimizationContext()
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -985,9 +1041,35 @@
     tlsPtr_.rosalloc_runs[index] = run;
   }
 
-  void ProtectStack();
+  bool ProtectStack(bool fatal_on_error = true);
   bool UnprotectStack();
 
+  void SetMterpDefaultIBase(void* ibase) {
+    tlsPtr_.mterp_default_ibase = ibase;
+  }
+
+  void SetMterpCurrentIBase(void* ibase) {
+    tlsPtr_.mterp_current_ibase = ibase;
+  }
+
+  void SetMterpAltIBase(void* ibase) {
+    tlsPtr_.mterp_alt_ibase = ibase;
+  }
+
+  const void* GetMterpDefaultIBase() const {
+    return tlsPtr_.mterp_default_ibase;
+  }
+
+  const void* GetMterpCurrentIBase() const {
+    return tlsPtr_.mterp_current_ibase;
+  }
+
+  const void* GetMterpAltIBase() const {
+    return tlsPtr_.mterp_alt_ibase;
+  }
+
+  // Notify that a signal is being handled. This is to protect us from doing recursive
+  // NPE handling after a SIGSEGV.
   void NoteSignalBeingHandled() {
     if (tls32_.handling_signal_) {
       LOG(FATAL) << "Detected signal while processing a signal";
@@ -1003,8 +1085,12 @@
     return tlsPtr_.nested_signal_state;
   }
 
-  bool IsSuspendedAtSuspendCheck() const {
-    return tls32_.suspended_at_suspend_check;
+  bool IsTransitioningToRunnable() const {
+    return tls32_.is_transitioning_to_runnable;
+  }
+
+  void SetIsTransitioningToRunnable(bool value) {
+    tls32_.is_transitioning_to_runnable = value;
   }
 
   void PushVerifier(verifier::MethodVerifier* verifier);
@@ -1012,6 +1098,27 @@
 
   void InitStringEntryPoints();
 
+  void ModifyDebugDisallowReadBarrier(int8_t delta) {
+    debug_disallow_read_barrier_ += delta;
+  }
+
+  uint8_t GetDebugDisallowReadBarrierCount() const {
+    return debug_disallow_read_barrier_;
+  }
+
+  // Returns true if the current thread is the jit sensitive thread.
+  bool IsJitSensitiveThread() const {
+    return this == jit_sensitive_thread_;
+  }
+
+  // Returns true if StrictMode events are traced for the current thread.
+  static bool IsSensitiveThread() {
+    if (is_sensitive_thread_hook_ != nullptr) {
+      return (*is_sensitive_thread_hook_)();
+    }
+    return false;
+  }
+
  private:
   explicit Thread(bool daemon);
   ~Thread() REQUIRES(!Locks::mutator_lock_, !Locks::thread_suspend_count_lock_);
@@ -1025,7 +1132,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Avoid use, callers should use SetState. Used only by SignalCatcher::HandleSigQuit, ~Thread and
-  // Dbg::Disconnected.
+  // Dbg::ManageDeoptimization.
   ThreadState SetStateUnsafe(ThreadState new_state) {
     ThreadState old_state = GetState();
     if (old_state == kRunnable && new_state != kRunnable) {
@@ -1044,7 +1151,9 @@
   void VerifyStackImpl() SHARED_REQUIRES(Locks::mutator_lock_);
 
   void DumpState(std::ostream& os) const SHARED_REQUIRES(Locks::mutator_lock_);
-  void DumpStack(std::ostream& os, BacktraceMap* backtrace_map = nullptr) const
+  void DumpStack(std::ostream& os,
+                 bool dump_native_stack = true,
+                 BacktraceMap* backtrace_map = nullptr) const
       REQUIRES(!Locks::thread_suspend_count_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -1085,6 +1194,20 @@
   ALWAYS_INLINE void PassActiveSuspendBarriers()
       REQUIRES(!Locks::thread_suspend_count_lock_, !Roles::uninterruptible_);
 
+  // Registers the current thread as the jit sensitive thread. Should be called just once.
+  static void SetJitSensitiveThread() {
+    if (jit_sensitive_thread_ == nullptr) {
+      jit_sensitive_thread_ = Thread::Current();
+    } else {
+      LOG(WARNING) << "Attempt to set the sensitive thread twice. Tid:"
+          << Thread::Current()->GetTid();
+    }
+  }
+
+  static void SetSensitiveThreadHook(bool (*is_sensitive_thread_hook)()) {
+    is_sensitive_thread_hook_ = is_sensitive_thread_hook;
+  }
+
   // 32 bits of atomically changed state and flags. Keeping as 32 bits allows and atomic CAS to
   // change from being Suspended to Runnable without a suspend request occurring.
   union PACKED(4) StateAndFlags {
@@ -1111,9 +1234,6 @@
 
   static void ThreadExitCallback(void* arg);
 
-  // Maximum number of checkpoint functions.
-  static constexpr uint32_t kMaxCheckpoints = 3;
-
   // Maximum number of suspend barriers.
   static constexpr uint32_t kMaxSuspendBarriers = 3;
 
@@ -1127,6 +1247,12 @@
   // their suspend count is > 0.
   static ConditionVariable* resume_cond_ GUARDED_BY(Locks::thread_suspend_count_lock_);
 
+  // Hook passed by framework which returns true
+  // when StrictMode events are traced for the current thread.
+  static bool (*is_sensitive_thread_hook_)();
+  // Stores the jit sensitive thread (which for now is the UI thread).
+  static Thread* jit_sensitive_thread_;
+
   /***********************************************************************************************/
   // Thread local storage. Fields are grouped by size to enable 32 <-> 64 searching to account for
   // pointer size differences. To encourage shorter encoding, more frequently used values appear
@@ -1142,8 +1268,9 @@
       suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0),
       daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
       thread_exit_check_count(0), handling_signal_(false),
-      suspended_at_suspend_check(false), ready_for_debug_invoke(false),
-      debug_method_entry_(false), is_gc_marking(false), weak_ref_access_enabled(true) {
+      is_transitioning_to_runnable(false), ready_for_debug_invoke(false),
+      debug_method_entry_(false), is_gc_marking(false), weak_ref_access_enabled(true),
+      disable_thread_flip_count(0) {
     }
 
     union StateAndFlags state_and_flags;
@@ -1183,10 +1310,10 @@
     // True if signal is being handled by this thread.
     bool32_t handling_signal_;
 
-    // True if the thread is suspended in FullSuspendCheck(). This is
-    // used to distinguish runnable threads that are suspended due to
-    // a normal suspend check from other threads.
-    bool32_t suspended_at_suspend_check;
+    // True if the thread is in TransitionFromSuspendedToRunnable(). This is used to distinguish the
+    // non-runnable threads (eg. kNative, kWaiting) that are about to transition to runnable from
+    // the rest of them.
+    bool32_t is_transitioning_to_runnable;
 
     // True if the thread has been suspended by a debugger event. This is
     // used to invoke method from the debugger which is only allowed when
@@ -1210,6 +1337,11 @@
     // pause, this is not an issue.) Other collectors use Runtime::DisallowNewSystemWeaks() and
     // ReferenceProcessor::EnableSlowPath().
     bool32_t weak_ref_access_enabled;
+
+    // A thread local version of Heap::disable_thread_flip_count_. This keeps track of how many
+    // levels of (nested) JNI critical sections the thread is in and is used to detect a nested JNI
+    // critical section enter.
+    uint32_t disable_thread_flip_count;
   } tls32_;
 
   struct PACKED(8) tls_64bit_sized_values {
@@ -1222,7 +1354,7 @@
     RuntimeStats stats;
   } tls64_;
 
-  struct PACKED(4) tls_ptr_sized_values {
+  struct PACKED(sizeof(void*)) tls_ptr_sized_values {
       tls_ptr_sized_values() : card_table(nullptr), exception(nullptr), stack_end(nullptr),
       managed_stack(), suspend_trigger(nullptr), jni_env(nullptr), tmp_jni_env(nullptr),
       self(nullptr), opeer(nullptr), jpeer(nullptr), stack_begin(nullptr), stack_size(0),
@@ -1232,7 +1364,8 @@
       stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr),
       frame_id_to_shadow_frame(nullptr), name(nullptr), pthread_self(0),
       last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr),
-      thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0),
+      thread_local_objects(0), thread_local_pos(nullptr), thread_local_end(nullptr),
+      mterp_current_ibase(nullptr), mterp_default_ibase(nullptr), mterp_alt_ibase(nullptr),
       thread_local_alloc_stack_top(nullptr), thread_local_alloc_stack_end(nullptr),
       nested_signal_state(nullptr), flip_function(nullptr), method_verifier(nullptr),
       thread_local_mark_stack(nullptr) {
@@ -1330,9 +1463,9 @@
     // If no_thread_suspension_ is > 0, what is causing that assertion.
     const char* last_no_thread_suspension_cause;
 
-    // Pending checkpoint function or null if non-pending. Installation guarding by
-    // Locks::thread_suspend_count_lock_.
-    Closure* checkpoint_functions[kMaxCheckpoints];
+    // Pending checkpoint function or null if non-pending. If this checkpoint is set and someone\
+    // requests another checkpoint, it goes to the checkpoint overflow list.
+    Closure* checkpoint_function GUARDED_BY(Locks::thread_suspend_count_lock_);
 
     // Pending barriers that require passing or NULL if non-pending. Installation guarding by
     // Locks::thread_suspend_count_lock_.
@@ -1347,12 +1480,19 @@
 
     // Thread-local allocation pointer.
     uint8_t* thread_local_start;
+    size_t thread_local_objects;
+    // thread_local_pos and thread_local_end must be consecutive for ldrd and are 8 byte aligned for
+    // potentially better performance.
     uint8_t* thread_local_pos;
     uint8_t* thread_local_end;
-    size_t thread_local_objects;
+
+    // Mterp jump table bases.
+    void* mterp_current_ibase;
+    void* mterp_default_ibase;
+    void* mterp_alt_ibase;
 
     // There are RosAlloc::kNumThreadLocalSizeBrackets thread-local size brackets per thread.
-    void* rosalloc_runs[kNumRosAllocThreadLocalSizeBrackets];
+    void* rosalloc_runs[kNumRosAllocThreadLocalSizeBracketsInThread];
 
     // Thread-local allocation stack data/routines.
     StackReference<mirror::Object>* thread_local_alloc_stack_top;
@@ -1385,6 +1525,12 @@
   // Thread "interrupted" status; stays raised until queried or thrown.
   bool interrupted_ GUARDED_BY(wait_mutex_);
 
+  // Debug disable read barrier count, only is checked for debug builds and only in the runtime.
+  uint8_t debug_disallow_read_barrier_ = 0;
+
+  // Pending extra checkpoints if checkpoint_function_ is already used.
+  std::list<Closure*> checkpoint_overflow_ GUARDED_BY(Locks::thread_suspend_count_lock_);
+
   friend class Dbg;  // For SetStateUnsafe.
   friend class gc::collector::SemiSpace;  // For getting stack traces.
   friend class Runtime;  // For CreatePeer.
@@ -1432,6 +1578,40 @@
   DISALLOW_COPY_AND_ASSIGN(ScopedStackedShadowFramePusher);
 };
 
+// Only works for debug builds.
+class ScopedDebugDisallowReadBarriers {
+ public:
+  explicit ScopedDebugDisallowReadBarriers(Thread* self) : self_(self) {
+    self_->ModifyDebugDisallowReadBarrier(1);
+  }
+  ~ScopedDebugDisallowReadBarriers() {
+    self_->ModifyDebugDisallowReadBarrier(-1);
+  }
+
+ private:
+  Thread* const self_;
+};
+
+class ScopedTransitioningToRunnable : public ValueObject {
+ public:
+  explicit ScopedTransitioningToRunnable(Thread* self)
+      : self_(self) {
+    DCHECK_EQ(self, Thread::Current());
+    if (kUseReadBarrier) {
+      self_->SetIsTransitioningToRunnable(true);
+    }
+  }
+
+  ~ScopedTransitioningToRunnable() {
+    if (kUseReadBarrier) {
+      self_->SetIsTransitioningToRunnable(false);
+    }
+  }
+
+ private:
+  Thread* const self_;
+};
+
 std::ostream& operator<<(std::ostream& os, const Thread& thread);
 std::ostream& operator<<(std::ostream& os, const StackedShadowFrameType& thread);
 
diff --git a/runtime/thread_linux.cc b/runtime/thread_linux.cc
index 9563b99..b922d94 100644
--- a/runtime/thread_linux.cc
+++ b/runtime/thread_linux.cc
@@ -44,7 +44,7 @@
 
 void Thread::SetUpAlternateSignalStack() {
   // Create and set an alternate signal stack.
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   LOG(FATAL) << "Invalid use of alternate signal stack on Android";
 #endif
   stack_t ss;
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index bdd5d10..688514c 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -16,10 +16,7 @@
 
 #include "thread_list.h"
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
-
 #include <backtrace/BacktraceMap.h>
-#include <cutils/trace.h>
 #include <dirent.h>
 #include <ScopedLocalRef.h>
 #include <ScopedUtfChars.h>
@@ -30,6 +27,7 @@
 
 #include "base/histogram-inl.h"
 #include "base/mutex-inl.h"
+#include "base/systrace.h"
 #include "base/time_utils.h"
 #include "base/timing_logger.h"
 #include "debugger.h"
@@ -37,6 +35,7 @@
 #include "jni_internal.h"
 #include "lock_word.h"
 #include "monitor.h"
+#include "native_stack_dump.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "trace.h"
@@ -59,19 +58,27 @@
 static constexpr useconds_t kThreadSuspendMaxYieldUs = 3000;
 static constexpr useconds_t kThreadSuspendMaxSleepUs = 5000;
 
+// Whether we should try to dump the native stack of unattached threads. See commit ed8b723 for
+// some history.
+static constexpr bool kDumpUnattachedThreadNativeStack = true;
+
 ThreadList::ThreadList()
-    : suspend_all_count_(0), debug_suspend_all_count_(0), unregistering_count_(0),
-      suspend_all_historam_("suspend all histogram", 16, 64), long_suspend_(false) {
+    : suspend_all_count_(0),
+      debug_suspend_all_count_(0),
+      unregistering_count_(0),
+      suspend_all_historam_("suspend all histogram", 16, 64),
+      long_suspend_(false) {
   CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U)));
 }
 
 ThreadList::~ThreadList() {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
   // Detach the current thread if necessary. If we failed to start, there might not be any threads.
   // We need to detach the current thread here in case there's another thread waiting to join with
   // us.
   bool contains = false;
+  Thread* self = Thread::Current();
   {
-    Thread* self = Thread::Current();
     MutexLock mu(self, *Locks::thread_list_lock_);
     contains = Contains(self);
   }
@@ -85,10 +92,9 @@
   heap->DisableGCForShutdown();
   // In case a GC is in progress, wait for it to finish.
   heap->WaitForGcToComplete(gc::kGcCauseBackground, Thread::Current());
-
   // TODO: there's an unaddressed race here where a thread may attach during shutdown, see
   //       Thread::Init.
-  SuspendAllDaemonThreads();
+  SuspendAllDaemonThreadsForShutdown();
 }
 
 bool ThreadList::Contains(Thread* thread) {
@@ -128,24 +134,24 @@
       suspend_all_historam_.PrintConfidenceIntervals(os, 0.99, data);  // Dump time to suspend.
     }
   }
-  Dump(os);
-  DumpUnattachedThreads(os);
+  bool dump_native_stack = Runtime::Current()->GetDumpNativeStackOnSigQuit();
+  Dump(os, dump_native_stack);
+  DumpUnattachedThreads(os, dump_native_stack);
 }
 
-static void DumpUnattachedThread(std::ostream& os, pid_t tid) NO_THREAD_SAFETY_ANALYSIS {
+static void DumpUnattachedThread(std::ostream& os, pid_t tid, bool dump_native_stack)
+    NO_THREAD_SAFETY_ANALYSIS {
   // TODO: No thread safety analysis as DumpState with a null thread won't access fields, should
   // refactor DumpState to avoid skipping analysis.
   Thread::DumpState(os, nullptr, tid);
   DumpKernelStack(os, tid, "  kernel: ", false);
-  // TODO: Reenable this when the native code in system_server can handle it.
-  // Currently "adb shell kill -3 `pid system_server`" will cause it to exit.
-  if (false) {
+  if (dump_native_stack && kDumpUnattachedThreadNativeStack) {
     DumpNativeStack(os, tid, nullptr, "  native: ");
   }
   os << "\n";
 }
 
-void ThreadList::DumpUnattachedThreads(std::ostream& os) {
+void ThreadList::DumpUnattachedThreads(std::ostream& os, bool dump_native_stack) {
   DIR* d = opendir("/proc/self/task");
   if (!d) {
     return;
@@ -163,22 +169,25 @@
         contains = Contains(tid);
       }
       if (!contains) {
-        DumpUnattachedThread(os, tid);
+        DumpUnattachedThread(os, tid, dump_native_stack);
       }
     }
   }
   closedir(d);
 }
 
-// Dump checkpoint timeout in milliseconds. Larger amount on the host, as dumping will invoke
-// addr2line when available.
-static constexpr uint32_t kDumpWaitTimeout = kIsTargetBuild ? 10000 : 20000;
+// Dump checkpoint timeout in milliseconds. Larger amount on the target, since the device could be
+// overloaded with ANR dumps.
+static constexpr uint32_t kDumpWaitTimeout = kIsTargetBuild ? 100000 : 20000;
 
 // A closure used by Thread::Dump.
 class DumpCheckpoint FINAL : public Closure {
  public:
-  explicit DumpCheckpoint(std::ostream* os)
-      : os_(os), barrier_(0), backtrace_map_(BacktraceMap::Create(GetTid())) {}
+  DumpCheckpoint(std::ostream* os, bool dump_native_stack)
+      : os_(os),
+        barrier_(0),
+        backtrace_map_(dump_native_stack ? BacktraceMap::Create(getpid()) : nullptr),
+        dump_native_stack_(dump_native_stack) {}
 
   void Run(Thread* thread) OVERRIDE {
     // Note thread and self may not be equal if thread was already suspended at the point of the
@@ -187,7 +196,7 @@
     std::ostringstream local_os;
     {
       ScopedObjectAccess soa(self);
-      thread->Dump(local_os, backtrace_map_.get());
+      thread->Dump(local_os, dump_native_stack_, backtrace_map_.get());
     }
     local_os << "\n";
     {
@@ -195,9 +204,7 @@
       MutexLock mu(self, *Locks::logging_lock_);
       *os_ << local_os.str();
     }
-    if (thread->GetState() == kRunnable) {
-      barrier_.Pass(self);
-    }
+    barrier_.Pass(self);
   }
 
   void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint) {
@@ -218,15 +225,22 @@
   Barrier barrier_;
   // A backtrace map, so that all threads use a shared info and don't reacquire/parse separately.
   std::unique_ptr<BacktraceMap> backtrace_map_;
+  // Whether we should dump the native stack.
+  const bool dump_native_stack_;
 };
 
-void ThreadList::Dump(std::ostream& os) {
+void ThreadList::Dump(std::ostream& os, bool dump_native_stack) {
   {
     MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
     os << "DALVIK THREADS (" << list_.size() << "):\n";
   }
-  DumpCheckpoint checkpoint(&os);
-  size_t threads_running_checkpoint = RunCheckpoint(&checkpoint);
+  DumpCheckpoint checkpoint(&os, dump_native_stack);
+  size_t threads_running_checkpoint;
+  {
+    // Use SOA to prevent deadlocks if multiple threads are calling Dump() at the same time.
+    ScopedObjectAccess soa(Thread::Current());
+    threads_running_checkpoint = RunCheckpoint(&checkpoint);
+  }
   if (threads_running_checkpoint != 0) {
     checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint);
   }
@@ -274,9 +288,6 @@
   Locks::mutator_lock_->AssertNotExclusiveHeld(self);
   Locks::thread_list_lock_->AssertNotHeld(self);
   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
-  if (kDebugLocking && gAborting == 0) {
-    CHECK_NE(self->GetState(), kRunnable);
-  }
 
   std::vector<Thread*> suspended_count_modified_threads;
   size_t count = 0;
@@ -285,12 +296,12 @@
     // manually called.
     MutexLock mu(self, *Locks::thread_list_lock_);
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    count = list_.size();
     for (const auto& thread : list_) {
       if (thread != self) {
         while (true) {
           if (thread->RequestCheckpoint(checkpoint_function)) {
             // This thread will run its checkpoint some time in the near future.
-            count++;
             break;
           } else {
             // We are probably suspended, try to make sure that we stay suspended.
@@ -383,7 +394,8 @@
 // from-space to to-space refs. Used to synchronize threads at a point
 // to mark the initiation of marking while maintaining the to-space
 // invariant.
-size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
+size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
+                                   Closure* flip_callback,
                                    gc::collector::GarbageCollector* collector) {
   TimingLogger::ScopedTiming split("ThreadListFlip", collector->GetTimings());
   const uint64_t start_time = NanoTime();
@@ -393,6 +405,8 @@
   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
   CHECK_NE(self->GetState(), kRunnable);
 
+  collector->GetHeap()->ThreadFlipBegin(self);  // Sync with JNI critical calls.
+
   SuspendAllInternal(self, self, nullptr);
 
   // Run the flip callback for the collector.
@@ -402,26 +416,31 @@
   collector->RegisterPause(NanoTime() - start_time);
 
   // Resume runnable threads.
-  std::vector<Thread*> runnable_threads;
+  size_t runnable_thread_count = 0;
   std::vector<Thread*> other_threads;
   {
+    TimingLogger::ScopedTiming split2("ResumeRunnableThreads", collector->GetTimings());
     MutexLock mu(self, *Locks::thread_list_lock_);
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     --suspend_all_count_;
     for (const auto& thread : list_) {
+      // Set the flip function for all threads because Thread::DumpState/DumpJavaStack() (invoked by
+      // a checkpoint) may cause the flip function to be run for a runnable/suspended thread before
+      // a runnable thread runs it for itself or we run it for a suspended thread below.
+      thread->SetFlipFunction(thread_flip_visitor);
       if (thread == self) {
         continue;
       }
-      // Set the flip function for both runnable and suspended threads
-      // because Thread::DumpState/DumpJavaStack() (invoked by a
-      // checkpoint) may cause the flip function to be run for a
-      // runnable/suspended thread before a runnable threads runs it
-      // for itself or we run it for a suspended thread below.
-      thread->SetFlipFunction(thread_flip_visitor);
-      if (thread->IsSuspendedAtSuspendCheck()) {
+      // Resume early the threads that were runnable but are suspended just for this thread flip or
+      // about to transition from non-runnable (eg. kNative at the SOA entry in a JNI function) to
+      // runnable (both cases waiting inside Thread::TransitionFromSuspendedToRunnable), or waiting
+      // for the thread flip to end at the JNI critical section entry (kWaitingForGcThreadFlip),
+      ThreadState state = thread->GetState();
+      if (state == kWaitingForGcThreadFlip ||
+          thread->IsTransitioningToRunnable()) {
         // The thread will resume right after the broadcast.
         thread->ModifySuspendCount(self, -1, nullptr, false);
-        runnable_threads.push_back(thread);
+        ++runnable_thread_count;
       } else {
         other_threads.push_back(thread);
       }
@@ -429,8 +448,11 @@
     Thread::resume_cond_->Broadcast(self);
   }
 
+  collector->GetHeap()->ThreadFlipEnd(self);
+
   // Run the closure on the other threads and let them resume.
   {
+    TimingLogger::ScopedTiming split3("FlipOtherThreads", collector->GetTimings());
     ReaderMutexLock mu(self, *Locks::mutator_lock_);
     for (const auto& thread : other_threads) {
       Closure* flip_func = thread->GetFlipFunction();
@@ -439,11 +461,15 @@
       }
     }
     // Run it for self.
-    thread_flip_visitor->Run(self);
+    Closure* flip_func = self->GetFlipFunction();
+    if (flip_func != nullptr) {
+      flip_func->Run(self);
+    }
   }
 
   // Resume other threads.
   {
+    TimingLogger::ScopedTiming split4("ResumeOtherThreads", collector->GetTimings());
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (const auto& thread : other_threads) {
       thread->ModifySuspendCount(self, -1, nullptr, false);
@@ -451,7 +477,7 @@
     Thread::resume_cond_->Broadcast(self);
   }
 
-  return runnable_threads.size() + other_threads.size() + 1;  // +1 for self.
+  return runnable_thread_count + other_threads.size() + 1;  // +1 for self.
 }
 
 void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
@@ -462,42 +488,42 @@
   } else {
     VLOG(threads) << "Thread[null] SuspendAll for " << cause << " starting...";
   }
-  ATRACE_BEGIN("Suspending mutator threads");
-  const uint64_t start_time = NanoTime();
+  {
+    ScopedTrace trace("Suspending mutator threads");
+    const uint64_t start_time = NanoTime();
 
-  SuspendAllInternal(self, self);
-  // All threads are known to have suspended (but a thread may still own the mutator lock)
-  // Make sure this thread grabs exclusive access to the mutator lock and its protected data.
+    SuspendAllInternal(self, self);
+    // All threads are known to have suspended (but a thread may still own the mutator lock)
+    // Make sure this thread grabs exclusive access to the mutator lock and its protected data.
 #if HAVE_TIMED_RWLOCK
-  while (true) {
-    if (Locks::mutator_lock_->ExclusiveLockWithTimeout(self, kThreadSuspendTimeoutMs, 0)) {
-      break;
-    } else if (!long_suspend_) {
-      // Reading long_suspend without the mutator lock is slightly racy, in some rare cases, this
-      // could result in a thread suspend timeout.
-      // Timeout if we wait more than kThreadSuspendTimeoutMs seconds.
-      UnsafeLogFatalForThreadSuspendAllTimeout();
+    while (true) {
+      if (Locks::mutator_lock_->ExclusiveLockWithTimeout(self, kThreadSuspendTimeoutMs, 0)) {
+        break;
+      } else if (!long_suspend_) {
+        // Reading long_suspend without the mutator lock is slightly racy, in some rare cases, this
+        // could result in a thread suspend timeout.
+        // Timeout if we wait more than kThreadSuspendTimeoutMs seconds.
+        UnsafeLogFatalForThreadSuspendAllTimeout();
+      }
     }
-  }
 #else
-  Locks::mutator_lock_->ExclusiveLock(self);
+    Locks::mutator_lock_->ExclusiveLock(self);
 #endif
 
-  long_suspend_ = long_suspend;
+    long_suspend_ = long_suspend;
 
-  const uint64_t end_time = NanoTime();
-  const uint64_t suspend_time = end_time - start_time;
-  suspend_all_historam_.AdjustAndAddValue(suspend_time);
-  if (suspend_time > kLongThreadSuspendThreshold) {
-    LOG(WARNING) << "Suspending all threads took: " << PrettyDuration(suspend_time);
+    const uint64_t end_time = NanoTime();
+    const uint64_t suspend_time = end_time - start_time;
+    suspend_all_historam_.AdjustAndAddValue(suspend_time);
+    if (suspend_time > kLongThreadSuspendThreshold) {
+      LOG(WARNING) << "Suspending all threads took: " << PrettyDuration(suspend_time);
+    }
+
+    if (kDebugLocking) {
+      // Debug check that all threads are suspended.
+      AssertThreadsAreSuspended(self, self);
+    }
   }
-
-  if (kDebugLocking) {
-    // Debug check that all threads are suspended.
-    AssertThreadsAreSuspended(self, self);
-  }
-
-  ATRACE_END();
   ATRACE_BEGIN((std::string("Mutator threads suspended for ") + cause).c_str());
 
   if (self != nullptr) {
@@ -511,7 +537,9 @@
 // Debugger thread might be set to kRunnable for a short period of time after the
 // SuspendAllInternal. This is safe because it will be set back to suspended state before
 // the SuspendAll returns.
-void ThreadList::SuspendAllInternal(Thread* self, Thread* ignore1, Thread* ignore2,
+void ThreadList::SuspendAllInternal(Thread* self,
+                                    Thread* ignore1,
+                                    Thread* ignore2,
                                     bool debug_suspend) {
   Locks::mutator_lock_->AssertNotExclusiveHeld(self);
   Locks::thread_list_lock_->AssertNotHeld(self);
@@ -600,11 +628,7 @@
             PLOG(FATAL) << "futex wait failed for SuspendAllInternal()";
           }
         }
-      } else {
-        cur_val = pending_threads.LoadRelaxed();
-        CHECK_EQ(cur_val, 0);
-        break;
-      }
+      }  // else re-check pending_threads in the next iteration (this may be a spurious wake-up).
 #else
       // Spin wait. This is likely to be slow, but on most architecture ART_USE_FUTEXES is set.
 #endif
@@ -625,7 +649,8 @@
   }
 
   ATRACE_END();
-  ATRACE_BEGIN("Resuming mutator threads");
+
+  ScopedTrace trace("Resuming mutator threads");
 
   if (kDebugLocking) {
     // Debug check that all threads are suspended.
@@ -657,7 +682,6 @@
     }
     Thread::resume_cond_->Broadcast(self);
   }
-  ATRACE_END();
 
   if (self != nullptr) {
     VLOG(threads) << *self << " ResumeAll complete";
@@ -700,12 +724,14 @@
   VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") complete";
 }
 
-static void ThreadSuspendByPeerWarning(Thread* self, LogSeverity severity, const char* message,
+static void ThreadSuspendByPeerWarning(Thread* self,
+                                       LogSeverity severity,
+                                       const char* message,
                                        jobject peer) {
   JNIEnvExt* env = self->GetJniEnv();
   ScopedLocalRef<jstring>
-      scoped_name_string(env, (jstring)env->GetObjectField(
-          peer, WellKnownClasses::java_lang_Thread_name));
+      scoped_name_string(env, static_cast<jstring>(env->GetObjectField(
+          peer, WellKnownClasses::java_lang_Thread_name)));
   ScopedUtfChars scoped_name_chars(env, scoped_name_string.get());
   if (scoped_name_chars.c_str() == nullptr) {
       LOG(severity) << message << ": " << peer;
@@ -715,8 +741,10 @@
   }
 }
 
-Thread* ThreadList::SuspendThreadByPeer(jobject peer, bool request_suspension,
-                                        bool debug_suspension, bool* timed_out) {
+Thread* ThreadList::SuspendThreadByPeer(jobject peer,
+                                        bool request_suspension,
+                                        bool debug_suspension,
+                                        bool* timed_out) {
   const uint64_t start_time = NanoTime();
   useconds_t sleep_us = kThreadSuspendInitialSleepUs;
   *timed_out = false;
@@ -813,12 +841,14 @@
   }
 }
 
-static void ThreadSuspendByThreadIdWarning(LogSeverity severity, const char* message,
+static void ThreadSuspendByThreadIdWarning(LogSeverity severity,
+                                           const char* message,
                                            uint32_t thread_id) {
   LOG(severity) << StringPrintf("%s: %d", message, thread_id);
 }
 
-Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id, bool debug_suspension,
+Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id,
+                                            bool debug_suspension,
                                             bool* timed_out) {
   const uint64_t start_time = NanoTime();
   useconds_t sleep_us = kThreadSuspendInitialSleepUs;
@@ -909,12 +939,9 @@
   }
 }
 
-Thread* ThreadList::FindThreadByThreadId(uint32_t thin_lock_id) {
-  Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::thread_list_lock_);
+Thread* ThreadList::FindThreadByThreadId(uint32_t thread_id) {
   for (const auto& thread : list_) {
-    if (thread->GetThreadId() == thin_lock_id) {
-      CHECK(thread == self || thread->IsSuspended());
+    if (thread->GetThreadId() == thread_id) {
       return thread;
     }
   }
@@ -941,7 +968,12 @@
   Locks::mutator_lock_->ExclusiveLock(self);
   Locks::mutator_lock_->ExclusiveUnlock(self);
 #endif
-  AssertThreadsAreSuspended(self, self, debug_thread);
+  // Disabled for the following race condition:
+  // Thread 1 calls SuspendAllForDebugger, gets preempted after pulsing the mutator lock.
+  // Thread 2 calls SuspendAll and SetStateUnsafe (perhaps from Dbg::Disconnected).
+  // Thread 1 fails assertion that all threads are suspended due to thread 2 being in a runnable
+  // state (from SetStateUnsafe).
+  // AssertThreadsAreSuspended(self, self, debug_thread);
 
   VLOG(threads) << *self << " SuspendAllForDebugger complete";
 }
@@ -1091,6 +1123,7 @@
 }
 
 void ThreadList::WaitForOtherNonDaemonThreadsToExit() {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertNotHeld(self);
   while (true) {
@@ -1121,10 +1154,13 @@
   }
 }
 
-void ThreadList::SuspendAllDaemonThreads() {
+void ThreadList::SuspendAllDaemonThreadsForShutdown() {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
   Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::thread_list_lock_);
-  {  // Tell all the daemons it's time to suspend.
+  size_t daemons_left = 0;
+  {
+    // Tell all the daemons it's time to suspend.
+    MutexLock mu(self, *Locks::thread_list_lock_);
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (const auto& thread : list_) {
       // This is only run after all non-daemon threads have exited, so the remainder should all be
@@ -1132,29 +1168,47 @@
       CHECK(thread->IsDaemon()) << *thread;
       if (thread != self) {
         thread->ModifySuspendCount(self, +1, nullptr, false);
+        ++daemons_left;
       }
+      // We are shutting down the runtime, set the JNI functions of all the JNIEnvs to be
+      // the sleep forever one.
+      thread->GetJniEnv()->SetFunctionsToRuntimeShutdownFunctions();
     }
   }
+  // If we have any daemons left, wait 200ms to ensure they are not stuck in a place where they
+  // are about to access runtime state and are not in a runnable state. Examples: Monitor code
+  // or waking up from a condition variable. TODO: Try and see if there is a better way to wait
+  // for daemon threads to be in a blocked state.
+  if (daemons_left > 0) {
+    static constexpr size_t kDaemonSleepTime = 200 * 1000;
+    usleep(kDaemonSleepTime);
+  }
   // Give the threads a chance to suspend, complaining if they're slow.
   bool have_complained = false;
-  for (int i = 0; i < 10; ++i) {
-    usleep(200 * 1000);
+  static constexpr size_t kTimeoutMicroseconds = 2000 * 1000;
+  static constexpr size_t kSleepMicroseconds = 1000;
+  for (size_t i = 0; i < kTimeoutMicroseconds / kSleepMicroseconds; ++i) {
     bool all_suspended = true;
-    for (const auto& thread : list_) {
-      if (thread != self && thread->GetState() == kRunnable) {
-        if (!have_complained) {
-          LOG(WARNING) << "daemon thread not yet suspended: " << *thread;
-          have_complained = true;
+    {
+      MutexLock mu(self, *Locks::thread_list_lock_);
+      for (const auto& thread : list_) {
+        if (thread != self && thread->GetState() == kRunnable) {
+          if (!have_complained) {
+            LOG(WARNING) << "daemon thread not yet suspended: " << *thread;
+            have_complained = true;
+          }
+          all_suspended = false;
         }
-        all_suspended = false;
       }
     }
     if (all_suspended) {
       return;
     }
+    usleep(kSleepMicroseconds);
   }
-  LOG(ERROR) << "suspend all daemons failed";
+  LOG(WARNING) << "timed out suspending all daemon threads";
 }
+
 void ThreadList::Register(Thread* self) {
   DCHECK_EQ(self, Thread::Current());
 
@@ -1243,7 +1297,7 @@
 
   // Clear the TLS data, so that the underlying native thread is recognizably detached.
   // (It may wish to reattach later.)
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   __get_tls()[TLS_SLOT_ART_THREAD_SELF] = nullptr;
 #else
   CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, nullptr), "detach self");
@@ -1261,6 +1315,39 @@
   }
 }
 
+void ThreadList::VisitRootsForSuspendedThreads(RootVisitor* visitor) {
+  Thread* const self = Thread::Current();
+  std::vector<Thread*> threads_to_visit;
+
+  // Tell threads to suspend and copy them into list.
+  {
+    MutexLock mu(self, *Locks::thread_list_lock_);
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    for (Thread* thread : list_) {
+      thread->ModifySuspendCount(self, +1, nullptr, false);
+      if (thread == self || thread->IsSuspended()) {
+        threads_to_visit.push_back(thread);
+      } else {
+        thread->ModifySuspendCount(self, -1, nullptr, false);
+      }
+    }
+  }
+
+  // Visit roots without holding thread_list_lock_ and thread_suspend_count_lock_ to prevent lock
+  // order violations.
+  for (Thread* thread : threads_to_visit) {
+    thread->VisitRoots(visitor);
+  }
+
+  // Restore suspend counts.
+  {
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    for (Thread* thread : threads_to_visit) {
+      thread->ModifySuspendCount(self, -1, nullptr, false);
+    }
+  }
+}
+
 void ThreadList::VisitRoots(RootVisitor* visitor) const {
   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
   for (const auto& thread : list_) {
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index c727432..49f65e1 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -49,14 +49,14 @@
   void DumpForSigQuit(std::ostream& os)
       REQUIRES(!Locks::thread_list_lock_, !Locks::mutator_lock_);
   // For thread suspend timeout dumps.
-  void Dump(std::ostream& os)
+  void Dump(std::ostream& os, bool dump_native_stack = true)
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
   pid_t GetLockOwner();  // For SignalCatcher.
 
   // Thread suspension support.
   void ResumeAll()
-      UNLOCK_FUNCTION(Locks::mutator_lock_)
-      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
+      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_)
+      UNLOCK_FUNCTION(Locks::mutator_lock_);
   void Resume(Thread* thread, bool for_debugger = false)
       REQUIRES(!Locks::thread_suspend_count_lock_);
 
@@ -76,7 +76,8 @@
   // is set to true.
   Thread* SuspendThreadByPeer(jobject peer, bool request_suspension, bool debug_suspension,
                               bool* timed_out)
-      REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_,
+      REQUIRES(!Locks::mutator_lock_,
+               !Locks::thread_list_lock_,
                !Locks::thread_suspend_count_lock_);
 
   // Suspend a thread using its thread id, typically used by lock/monitor inflation. Returns the
@@ -84,14 +85,16 @@
   // the thread terminating. Note that as thread ids are recycled this may not suspend the expected
   // thread, that may be terminating. If the suspension times out then *timeout is set to true.
   Thread* SuspendThreadByThreadId(uint32_t thread_id, bool debug_suspension, bool* timed_out)
-      REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_,
+      REQUIRES(!Locks::mutator_lock_,
+               !Locks::thread_list_lock_,
                !Locks::thread_suspend_count_lock_);
 
-  // Find an already suspended thread (or self) by its id.
-  Thread* FindThreadByThreadId(uint32_t thin_lock_id);
+  // Find an existing thread (or self) by its thread id (not tid).
+  Thread* FindThreadByThreadId(uint32_t thread_id) REQUIRES(Locks::thread_list_lock_);
 
   // Run a checkpoint on threads, running threads are not suspended but run the checkpoint inside
-  // of the suspend check. Returns how many checkpoints we should expect to run.
+  // of the suspend check. Returns how many checkpoints that are expected to run, including for
+  // already suspended threads for b/24191051.
   size_t RunCheckpoint(Closure* checkpoint_function)
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
@@ -100,14 +103,17 @@
 
   // Flip thread roots from from-space refs to to-space refs. Used by
   // the concurrent copying collector.
-  size_t FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
+  size_t FlipThreadRoots(Closure* thread_flip_visitor,
+                         Closure* flip_callback,
                          gc::collector::GarbageCollector* collector)
-      REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_,
+      REQUIRES(!Locks::mutator_lock_,
+               !Locks::thread_list_lock_,
                !Locks::thread_suspend_count_lock_);
 
   // Suspends all threads
   void SuspendAllForDebugger()
-      REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_,
+      REQUIRES(!Locks::mutator_lock_,
+               !Locks::thread_list_lock_,
                !Locks::thread_suspend_count_lock_);
 
   void SuspendSelfForDebugger()
@@ -126,14 +132,22 @@
 
   // Add/remove current thread from list.
   void Register(Thread* self)
-      REQUIRES(Locks::runtime_shutdown_lock_, !Locks::mutator_lock_, !Locks::thread_list_lock_,
+      REQUIRES(Locks::runtime_shutdown_lock_)
+      REQUIRES(!Locks::mutator_lock_,
+               !Locks::thread_list_lock_,
                !Locks::thread_suspend_count_lock_);
-  void Unregister(Thread* self) REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_,
-                                         !Locks::thread_suspend_count_lock_);
+  void Unregister(Thread* self)
+      REQUIRES(!Locks::mutator_lock_,
+               !Locks::thread_list_lock_,
+               !Locks::thread_suspend_count_lock_);
 
   void VisitRoots(RootVisitor* visitor) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  void VisitRootsForSuspendedThreads(RootVisitor* visitor)
+      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Return a copy of the thread list.
   std::list<Thread*> GetList() REQUIRES(Locks::thread_list_lock_) {
     return list_;
@@ -151,15 +165,17 @@
   size_t RunCheckpoint(Closure* checkpoint_function, bool includeSuspended)
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
-  void DumpUnattachedThreads(std::ostream& os)
+  void DumpUnattachedThreads(std::ostream& os, bool dump_native_stack)
       REQUIRES(!Locks::thread_list_lock_);
 
-  void SuspendAllDaemonThreads()
+  void SuspendAllDaemonThreadsForShutdown()
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
   void WaitForOtherNonDaemonThreadsToExit()
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
-  void SuspendAllInternal(Thread* self, Thread* ignore1, Thread* ignore2 = nullptr,
+  void SuspendAllInternal(Thread* self,
+                          Thread* ignore1,
+                          Thread* ignore2 = nullptr,
                           bool debug_suspend = false)
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
@@ -200,8 +216,8 @@
               !Locks::mutator_lock_);
   // No REQUIRES(mutator_lock_) since the unlock function already asserts this.
   ~ScopedSuspendAll()
-      UNLOCK_FUNCTION(Locks::mutator_lock_)
-      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
+      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_)
+      UNLOCK_FUNCTION(Locks::mutator_lock_);
 };
 
 }  // namespace art
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index 0527d3a..b14f340 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -16,6 +16,11 @@
 
 #include "thread_pool.h"
 
+#include <pthread.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
 #include "base/bit_utils.h"
 #include "base/casts.h"
 #include "base/logging.h"
@@ -53,6 +58,19 @@
   CHECK_PTHREAD_CALL(pthread_join, (pthread_, nullptr), "thread pool worker shutdown");
 }
 
+void ThreadPoolWorker::SetPthreadPriority(int priority) {
+  CHECK_GE(priority, PRIO_MIN);
+  CHECK_LE(priority, PRIO_MAX);
+#if defined(ART_TARGET_ANDROID)
+  int result = setpriority(PRIO_PROCESS, pthread_gettid_np(pthread_), priority);
+  if (result != 0) {
+    PLOG(ERROR) << "Failed to setpriority to :" << priority;
+  }
+#else
+  UNUSED(priority);
+#endif
+}
+
 void ThreadPoolWorker::Run() {
   Thread* self = Thread::Current();
   Task* task = nullptr;
@@ -82,6 +100,11 @@
   }
 }
 
+void ThreadPool::RemoveAllTasks(Thread* self) {
+  MutexLock mu(self, task_queue_lock_);
+  tasks_.clear();
+}
+
 ThreadPool::ThreadPool(const char* name, size_t num_threads)
   : name_(name),
     task_queue_lock_("task queue lock"),
@@ -209,4 +232,10 @@
   return tasks_.size();
 }
 
+void ThreadPool::SetPthreadPriority(int priority) {
+  for (ThreadPoolWorker* worker : threads_) {
+    worker->SetPthreadPriority(priority);
+  }
+}
+
 }  // namespace art
diff --git a/runtime/thread_pool.h b/runtime/thread_pool.h
index a2338d6..b6c6f02 100644
--- a/runtime/thread_pool.h
+++ b/runtime/thread_pool.h
@@ -59,6 +59,9 @@
 
   virtual ~ThreadPoolWorker();
 
+  // Set the "nice" priorty for this worker.
+  void SetPthreadPriority(int priority);
+
  protected:
   ThreadPoolWorker(ThreadPool* thread_pool, const std::string& name, size_t stack_size);
   static void* Callback(void* arg) REQUIRES(!Locks::mutator_lock_);
@@ -91,6 +94,9 @@
   // after running it, it is the caller's responsibility.
   void AddTask(Thread* self, Task* task) REQUIRES(!task_queue_lock_);
 
+  // Remove all tasks in the queue.
+  void RemoveAllTasks(Thread* self) REQUIRES(!task_queue_lock_);
+
   ThreadPool(const char* name, size_t num_threads);
   virtual ~ThreadPool();
 
@@ -108,6 +114,9 @@
   // thread count of the thread pool.
   void SetMaxActiveWorkers(size_t threads) REQUIRES(!task_queue_lock_);
 
+  // Set the "nice" priorty for threads in the pool.
+  void SetPthreadPriority(int priority);
+
  protected:
   // get a task to run, blocks if there are no tasks left
   virtual Task* GetTask(Thread* self) REQUIRES(!task_queue_lock_);
diff --git a/runtime/ti/agent.cc b/runtime/ti/agent.cc
new file mode 100644
index 0000000..7c0ea64
--- /dev/null
+++ b/runtime/ti/agent.cc
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "agent.h"
+#include "java_vm_ext.h"
+#include "runtime.h"
+
+namespace art {
+namespace ti {
+
+const char* AGENT_ON_LOAD_FUNCTION_NAME = "Agent_OnLoad";
+const char* AGENT_ON_ATTACH_FUNCTION_NAME = "Agent_OnAttach";
+const char* AGENT_ON_UNLOAD_FUNCTION_NAME = "Agent_OnUnload";
+
+Agent Agent::Create(std::string arg) {
+  size_t eq = arg.find_first_of('=');
+  if (eq == std::string::npos) {
+    return Agent(arg, "");
+  } else {
+    return Agent(arg.substr(0, eq), arg.substr(eq + 1, arg.length()));
+  }
+}
+
+// TODO We need to acquire some locks probably.
+Agent::LoadError Agent::Load(/*out*/jint* call_res, /*out*/ std::string* error_msg) {
+  DCHECK(call_res != nullptr);
+  DCHECK(error_msg != nullptr);
+
+  if (IsStarted()) {
+    *error_msg = StringPrintf("the agent at %s has already been started!", name_.c_str());
+    VLOG(agents) << "err: " << *error_msg;
+    return kAlreadyStarted;
+  }
+  LoadError err = DoDlOpen(error_msg);
+  if (err != kNoError) {
+    VLOG(agents) << "err: " << *error_msg;
+    return err;
+  }
+  if (onload_ == nullptr) {
+    *error_msg = StringPrintf("Unable to start agent %s: No Agent_OnLoad function found",
+                              name_.c_str());
+    VLOG(agents) << "err: " << *error_msg;
+    return kLoadingError;
+  }
+  // Need to let the function fiddle with the array.
+  std::unique_ptr<char[]> copied_args(new char[args_.size() + 1]);
+  strcpy(copied_args.get(), args_.c_str());
+  // TODO Need to do some checks that we are at a good spot etc.
+  *call_res = onload_(static_cast<JavaVM*>(Runtime::Current()->GetJavaVM()),
+                      copied_args.get(),
+                      nullptr);
+  if (*call_res != 0) {
+    *error_msg = StringPrintf("Initialization of %s returned non-zero value of %d",
+                              name_.c_str(), *call_res);
+    VLOG(agents) << "err: " << *error_msg;
+    return kInitializationError;
+  } else {
+    return kNoError;
+  }
+}
+
+Agent::LoadError Agent::DoDlOpen(/*out*/std::string* error_msg) {
+  DCHECK(error_msg != nullptr);
+  dlopen_handle_ = dlopen(name_.c_str(), RTLD_LAZY);
+  if (dlopen_handle_ == nullptr) {
+    *error_msg = StringPrintf("Unable to dlopen %s: %s", name_.c_str(), dlerror());
+    return kLoadingError;
+  }
+
+  onload_ = reinterpret_cast<AgentOnLoadFunction>(dlsym(dlopen_handle_,
+                                                        AGENT_ON_LOAD_FUNCTION_NAME));
+  if (onload_ == nullptr) {
+    VLOG(agents) << "Unable to find 'Agent_OnLoad' symbol in " << this;
+  }
+  onattach_ = reinterpret_cast<AgentOnAttachFunction>(dlsym(dlopen_handle_,
+                                                            AGENT_ON_ATTACH_FUNCTION_NAME));
+  if (onattach_ == nullptr) {
+    VLOG(agents) << "Unable to find 'Agent_OnAttach' symbol in " << this;
+  }
+  onunload_= reinterpret_cast<AgentOnUnloadFunction>(dlsym(dlopen_handle_,
+                                                           AGENT_ON_UNLOAD_FUNCTION_NAME));
+  if (onunload_ == nullptr) {
+    VLOG(agents) << "Unable to find 'Agent_OnUnload' symbol in " << this;
+  }
+  return kNoError;
+}
+
+// TODO Lock some stuff probably.
+void Agent::Unload() {
+  if (dlopen_handle_ != nullptr) {
+    if (onunload_ != nullptr) {
+      onunload_(Runtime::Current()->GetJavaVM());
+    }
+    dlclose(dlopen_handle_);
+    dlopen_handle_ = nullptr;
+  } else {
+    VLOG(agents) << this << " is not currently loaded!";
+  }
+}
+
+Agent::Agent(const Agent& other)
+  : name_(other.name_),
+    args_(other.args_),
+    dlopen_handle_(other.dlopen_handle_),
+    onload_(other.onload_),
+    onattach_(other.onattach_),
+    onunload_(other.onunload_) {
+  if (other.dlopen_handle_ != nullptr) {
+    dlopen(other.name_.c_str(), 0);
+  }
+}
+
+Agent::~Agent() {
+  if (dlopen_handle_ != nullptr) {
+    dlclose(dlopen_handle_);
+  }
+}
+
+std::ostream& operator<<(std::ostream &os, const Agent* m) {
+  return os << *m;
+}
+
+std::ostream& operator<<(std::ostream &os, Agent const& m) {
+  return os << "Agent { name=\"" << m.name_ << "\", args=\"" << m.args_ << "\", handle="
+            << m.dlopen_handle_ << " }";
+}
+
+}  // namespace ti
+}  // namespace art
diff --git a/runtime/ti/agent.h b/runtime/ti/agent.h
new file mode 100644
index 0000000..521e21e
--- /dev/null
+++ b/runtime/ti/agent.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_TI_AGENT_H_
+#define ART_RUNTIME_TI_AGENT_H_
+
+#include <dlfcn.h>
+#include <jni.h>  // for jint, JavaVM* etc declarations
+
+#include "base/stringprintf.h"
+#include "runtime.h"
+#include "utils.h"
+
+namespace art {
+namespace ti {
+
+using AgentOnLoadFunction = jint (*)(JavaVM*, const char*, void*);
+using AgentOnAttachFunction = jint (*)(JavaVM*, const char*, void*);
+using AgentOnUnloadFunction = void (*)(JavaVM*);
+
+class Agent {
+ public:
+  enum LoadError {
+    kNoError,              // No error occurred..
+    kAlreadyStarted,       // The agent has already been loaded.
+    kLoadingError,         // dlopen or dlsym returned an error.
+    kInitializationError,  // The entrypoint did not return 0. This might require an abort.
+  };
+
+  bool IsStarted() const {
+    return dlopen_handle_ != nullptr;
+  }
+
+  const std::string& GetName() const {
+    return name_;
+  }
+
+  const std::string& GetArgs() const {
+    return args_;
+  }
+
+  bool HasArgs() const {
+    return !GetArgs().empty();
+  }
+
+  // TODO We need to acquire some locks probably.
+  LoadError Load(/*out*/jint* call_res, /*out*/std::string* error_msg);
+
+  // TODO We need to acquire some locks probably.
+  void Unload();
+
+  // Tries to attach the agent using its OnAttach method. Returns true on success.
+  // TODO We need to acquire some locks probably.
+  LoadError Attach(std::string* error_msg) {
+    // TODO
+    *error_msg = "Attach has not yet been implemented!";
+    return kLoadingError;
+  }
+
+  static Agent Create(std::string arg);
+
+  static Agent Create(std::string name, std::string args) {
+    return Agent(name, args);
+  }
+
+  ~Agent();
+
+  // We need move constructor and copy for vectors
+  Agent(const Agent& other);
+
+  Agent(Agent&& other)
+      : name_(other.name_),
+        args_(other.args_),
+        dlopen_handle_(nullptr),
+        onload_(nullptr),
+        onattach_(nullptr),
+        onunload_(nullptr) {
+    other.dlopen_handle_ = nullptr;
+    other.onload_ = nullptr;
+    other.onattach_ = nullptr;
+    other.onunload_ = nullptr;
+  }
+
+  // We don't need an operator=
+  void operator=(const Agent&) = delete;
+
+ private:
+  Agent(std::string name, std::string args)
+      : name_(name),
+        args_(args),
+        dlopen_handle_(nullptr),
+        onload_(nullptr),
+        onattach_(nullptr),
+        onunload_(nullptr) { }
+
+  LoadError DoDlOpen(/*out*/std::string* error_msg);
+
+  const std::string name_;
+  const std::string args_;
+  void* dlopen_handle_;
+
+  // The entrypoints.
+  AgentOnLoadFunction onload_;
+  AgentOnAttachFunction onattach_;
+  AgentOnUnloadFunction onunload_;
+
+  friend std::ostream& operator<<(std::ostream &os, Agent const& m);
+};
+
+std::ostream& operator<<(std::ostream &os, Agent const& m);
+std::ostream& operator<<(std::ostream &os, const Agent* m);
+
+}  // namespace ti
+}  // namespace art
+
+#endif  // ART_RUNTIME_TI_AGENT_H_
+
diff --git a/runtime/trace.cc b/runtime/trace.cc
index ab342aa..56a26de 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -19,12 +19,11 @@
 #include <sys/uio.h>
 #include <unistd.h>
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
-#include "cutils/trace.h"
-
 #include "art_method-inl.h"
 #include "base/casts.h"
+#include "base/enums.h"
 #include "base/stl_util.h"
+#include "base/systrace.h"
 #include "base/time_utils.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
@@ -286,7 +285,7 @@
 
   while (true) {
     usleep(interval_us);
-    ATRACE_BEGIN("Profile sampling");
+    ScopedTrace trace("Profile sampling");
     Thread* self = Thread::Current();
     Trace* the_trace;
     {
@@ -301,7 +300,6 @@
       MutexLock mu(self, *Locks::thread_list_lock_);
       runtime->GetThreadList()->ForEach(GetSample, the_trace);
     }
-    ATRACE_END();
   }
 
   runtime->DetachCurrentThread();
@@ -331,7 +329,7 @@
   std::unique_ptr<File> trace_file;
   if (output_mode != TraceOutputMode::kDDMS) {
     if (trace_fd < 0) {
-      trace_file.reset(OS::CreateEmptyFile(trace_filename));
+      trace_file.reset(OS::CreateEmptyFileWriteOnly(trace_filename));
     } else {
       trace_file.reset(new File(trace_fd, "tracefile"));
       trace_file->DisableAutoClose();
@@ -389,9 +387,10 @@
   bool stop_alloc_counting = false;
   Runtime* const runtime = Runtime::Current();
   Trace* the_trace = nullptr;
+  Thread* const self = Thread::Current();
   pthread_t sampling_pthread = 0U;
   {
-    MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+    MutexLock mu(self, *Locks::trace_lock_);
     if (the_trace_ == nullptr) {
       LOG(ERROR) << "Trace stop requested, but no trace currently running";
     } else {
@@ -409,6 +408,9 @@
   }
 
   {
+    gc::ScopedGCCriticalSection gcs(self,
+                                    gc::kGcCauseInstrumentation,
+                                    gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa(__FUNCTION__);
     if (the_trace != nullptr) {
       stop_alloc_counting = (the_trace->flags_ & Trace::kTraceCountAllocs) != 0;
@@ -417,7 +419,7 @@
       }
 
       if (the_trace->trace_mode_ == TraceMode::kSampling) {
-        MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+        MutexLock mu(self, *Locks::thread_list_lock_);
         runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
       } else {
         runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey);
@@ -644,31 +646,11 @@
   }
 }
 
-static void GetVisitedMethodsFromBitSets(
-    const std::map<const DexFile*, DexIndexBitSet*>& seen_methods,
-    std::set<ArtMethod*>* visited_methods) SHARED_REQUIRES(Locks::mutator_lock_) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  Thread* const self = Thread::Current();
-  for (auto& e : seen_methods) {
-    DexIndexBitSet* bit_set = e.second;
-    // TODO: Visit trace methods as roots.
-    mirror::DexCache* dex_cache = class_linker->FindDexCache(self, *e.first, false);
-    for (uint32_t i = 0; i < bit_set->size(); ++i) {
-      if ((*bit_set)[i]) {
-        visited_methods->insert(dex_cache->GetResolvedMethod(i, sizeof(void*)));
-      }
-    }
-  }
-}
-
 void Trace::FinishTracing() {
   size_t final_offset = 0;
 
   std::set<ArtMethod*> visited_methods;
   if (trace_output_mode_ == TraceOutputMode::kStreaming) {
-    // Write the secondary file with all the method names.
-    GetVisitedMethodsFromBitSets(seen_methods_, &visited_methods);
-
     // Clean up.
     STLDeleteValues(&seen_methods_);
   } else {
@@ -714,8 +696,8 @@
   std::string header(os.str());
 
   if (trace_output_mode_ == TraceOutputMode::kStreaming) {
-    File file;
-    if (!file.Open(streaming_file_name_ + ".sec", O_CREAT | O_WRONLY)) {
+    File file(streaming_file_name_ + ".sec", O_CREAT | O_WRONLY, true);
+    if (!file.IsOpened()) {
       LOG(WARNING) << "Could not open secondary trace file!";
       return;
     }
@@ -815,10 +797,10 @@
   LOG(ERROR) << "Unexpected exception caught event in tracing";
 }
 
-void Trace::BackwardBranch(Thread* /*thread*/, ArtMethod* method,
-                           int32_t /*dex_pc_offset*/)
+void Trace::Branch(Thread* /*thread*/, ArtMethod* method,
+                   uint32_t /*dex_pc*/, int32_t /*dex_pc_offset*/)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-  LOG(ERROR) << "Unexpected backward branch event in tracing" << PrettyMethod(method);
+  LOG(ERROR) << "Unexpected branch event in tracing" << PrettyMethod(method);
 }
 
 void Trace::InvokeVirtualOrInterface(Thread*,
@@ -849,11 +831,6 @@
 bool Trace::RegisterMethod(ArtMethod* method) {
   mirror::DexCache* dex_cache = method->GetDexCache();
   const DexFile* dex_file = dex_cache->GetDexFile();
-  auto* resolved_method = dex_cache->GetResolvedMethod(method->GetDexMethodIndex(), sizeof(void*));
-  if (resolved_method != method) {
-    DCHECK(resolved_method == nullptr);
-    dex_cache->SetResolvedMethod(method->GetDexMethodIndex(), method, sizeof(void*));
-  }
   if (seen_methods_.find(dex_file) == seen_methods_.end()) {
     seen_methods_.insert(std::make_pair(dex_file, new DexIndexBitSet()));
   }
@@ -868,7 +845,7 @@
 bool Trace::RegisterThread(Thread* thread) {
   pid_t tid = thread->GetTid();
   CHECK_LT(0U, static_cast<uint32_t>(tid));
-  CHECK_LT(static_cast<uint32_t>(tid), 65536U);
+  CHECK_LT(static_cast<uint32_t>(tid), kMaxThreadIdNumber);
 
   if (!(*seen_threads_)[tid]) {
     seen_threads_->set(tid);
@@ -878,9 +855,8 @@
 }
 
 std::string Trace::GetMethodLine(ArtMethod* method) {
-  method = method->GetInterfaceMethodIfProxy(sizeof(void*));
-  return StringPrintf("%p\t%s\t%s\t%s\t%s\n",
-                      reinterpret_cast<void*>((EncodeTraceMethod(method) << TraceActionBits)),
+  method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
+  return StringPrintf("%#x\t%s\t%s\t%s\t%s\n", (EncodeTraceMethod(method) << TraceActionBits),
       PrettyDescriptor(method->GetDeclaringClassDescriptor()).c_str(), method->GetName(),
       method->GetSignature().ToString().c_str(), method->GetDeclaringClassSourceFile());
 }
diff --git a/runtime/trace.h b/runtime/trace.h
index 356a81f..9b29fb9 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -41,7 +41,9 @@
 class Thread;
 
 using DexIndexBitSet = std::bitset<65536>;
-using ThreadIDBitSet = std::bitset<65536>;
+
+constexpr size_t kMaxThreadIdNumber = kIsTargetBuild ? 65536U : 1048576U;
+using ThreadIDBitSet = std::bitset<kMaxThreadIdNumber>;
 
 enum TracingMode {
   kTracingInactive,
@@ -164,7 +166,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
   void ExceptionCaught(Thread* thread, mirror::Throwable* exception_object)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
-  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
   void InvokeVirtualOrInterface(Thread* thread,
                                 mirror::Object* this_object,
diff --git a/runtime/type_lookup_table.cc b/runtime/type_lookup_table.cc
new file mode 100644
index 0000000..fc9faec
--- /dev/null
+++ b/runtime/type_lookup_table.cc
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "type_lookup_table.h"
+
+#include "base/bit_utils.h"
+#include "dex_file-inl.h"
+#include "utf-inl.h"
+#include "utils.h"
+
+#include <memory>
+#include <cstring>
+
+namespace art {
+
+static uint16_t MakeData(uint16_t class_def_idx, uint32_t hash, uint32_t mask) {
+  uint16_t hash_mask = static_cast<uint16_t>(~mask);
+  return (static_cast<uint16_t>(hash) & hash_mask) | class_def_idx;
+}
+
+TypeLookupTable::~TypeLookupTable() {
+  if (!owns_entries_) {
+    // We don't actually own the entries, don't let the unique_ptr release them.
+    entries_.release();
+  }
+}
+
+uint32_t TypeLookupTable::RawDataLength() const {
+  return RawDataLength(dex_file_);
+}
+
+uint32_t TypeLookupTable::RawDataLength(const DexFile& dex_file) {
+  return RawDataLength(dex_file.NumClassDefs());
+}
+
+uint32_t TypeLookupTable::RawDataLength(uint32_t num_class_defs) {
+  return SupportedSize(num_class_defs) ? RoundUpToPowerOfTwo(num_class_defs) * sizeof(Entry) : 0u;
+}
+
+uint32_t TypeLookupTable::CalculateMask(uint32_t num_class_defs) {
+  return SupportedSize(num_class_defs) ? RoundUpToPowerOfTwo(num_class_defs) - 1u : 0u;
+}
+
+bool TypeLookupTable::SupportedSize(uint32_t num_class_defs) {
+  return num_class_defs != 0u && num_class_defs <= std::numeric_limits<uint16_t>::max();
+}
+
+TypeLookupTable* TypeLookupTable::Create(const DexFile& dex_file, uint8_t* storage) {
+  const uint32_t num_class_defs = dex_file.NumClassDefs();
+  return SupportedSize(num_class_defs)
+      ? new TypeLookupTable(dex_file, storage)
+      : nullptr;
+}
+
+TypeLookupTable* TypeLookupTable::Open(const uint8_t* raw_data, const DexFile& dex_file) {
+  return new TypeLookupTable(raw_data, dex_file);
+}
+
+TypeLookupTable::TypeLookupTable(const DexFile& dex_file, uint8_t* storage)
+    : dex_file_(dex_file),
+      mask_(CalculateMask(dex_file.NumClassDefs())),
+      entries_(storage != nullptr ? reinterpret_cast<Entry*>(storage) : new Entry[mask_ + 1]),
+      owns_entries_(storage == nullptr) {
+  static_assert(alignof(Entry) == 4u, "Expecting Entry to be 4-byte aligned.");
+  DCHECK_ALIGNED(storage, alignof(Entry));
+  std::vector<uint16_t> conflict_class_defs;
+  // The first stage. Put elements on their initial positions. If an initial position is already
+  // occupied then delay the insertion of the element to the second stage to reduce probing
+  // distance.
+  for (size_t i = 0; i < dex_file.NumClassDefs(); ++i) {
+    const DexFile::ClassDef& class_def = dex_file.GetClassDef(i);
+    const DexFile::TypeId& type_id = dex_file.GetTypeId(class_def.class_idx_);
+    const DexFile::StringId& str_id = dex_file.GetStringId(type_id.descriptor_idx_);
+    const uint32_t hash = ComputeModifiedUtf8Hash(dex_file.GetStringData(str_id));
+    Entry entry;
+    entry.str_offset = str_id.string_data_off_;
+    entry.data = MakeData(i, hash, GetSizeMask());
+    if (!SetOnInitialPos(entry, hash)) {
+      conflict_class_defs.push_back(i);
+    }
+  }
+  // The second stage. The initial position of these elements had a collision. Put these elements
+  // into the nearest free cells and link them together by updating next_pos_delta.
+  for (uint16_t class_def_idx : conflict_class_defs) {
+    const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_idx);
+    const DexFile::TypeId& type_id = dex_file.GetTypeId(class_def.class_idx_);
+    const DexFile::StringId& str_id = dex_file.GetStringId(type_id.descriptor_idx_);
+    const uint32_t hash = ComputeModifiedUtf8Hash(dex_file.GetStringData(str_id));
+    Entry entry;
+    entry.str_offset = str_id.string_data_off_;
+    entry.data = MakeData(class_def_idx, hash, GetSizeMask());
+    Insert(entry, hash);
+  }
+}
+
+TypeLookupTable::TypeLookupTable(const uint8_t* raw_data, const DexFile& dex_file)
+    : dex_file_(dex_file),
+      mask_(CalculateMask(dex_file.NumClassDefs())),
+      entries_(reinterpret_cast<Entry*>(const_cast<uint8_t*>(raw_data))),
+      owns_entries_(false) {}
+
+bool TypeLookupTable::SetOnInitialPos(const Entry& entry, uint32_t hash) {
+  const uint32_t pos = hash & GetSizeMask();
+  if (!entries_[pos].IsEmpty()) {
+    return false;
+  }
+  entries_[pos] = entry;
+  entries_[pos].next_pos_delta = 0;
+  return true;
+}
+
+void TypeLookupTable::Insert(const Entry& entry, uint32_t hash) {
+  uint32_t pos = FindLastEntryInBucket(hash & GetSizeMask());
+  uint32_t next_pos = (pos + 1) & GetSizeMask();
+  while (!entries_[next_pos].IsEmpty()) {
+    next_pos = (next_pos + 1) & GetSizeMask();
+  }
+  const uint32_t delta = (next_pos >= pos) ? (next_pos - pos) : (next_pos + Size() - pos);
+  entries_[pos].next_pos_delta = delta;
+  entries_[next_pos] = entry;
+  entries_[next_pos].next_pos_delta = 0;
+}
+
+uint32_t TypeLookupTable::FindLastEntryInBucket(uint32_t pos) const {
+  const Entry* entry = &entries_[pos];
+  while (!entry->IsLast()) {
+    pos = (pos + entry->next_pos_delta) & GetSizeMask();
+    entry = &entries_[pos];
+  }
+  return pos;
+}
+
+}  // namespace art
diff --git a/runtime/type_lookup_table.h b/runtime/type_lookup_table.h
new file mode 100644
index 0000000..d74d01d
--- /dev/null
+++ b/runtime/type_lookup_table.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_TYPE_LOOKUP_TABLE_H_
+#define ART_RUNTIME_TYPE_LOOKUP_TABLE_H_
+
+#include "dex_file.h"
+#include "leb128.h"
+#include "utf.h"
+
+namespace art {
+
+/**
+ * TypeLookupTable used to find class_def_idx by class descriptor quickly.
+ * Implementation of TypeLookupTable is based on hash table.
+ * This class instantiated at compile time by calling Create() method and written into OAT file.
+ * At runtime, the raw data is read from memory-mapped file by calling Open() method. The table
+ * memory remains clean.
+ */
+class TypeLookupTable {
+ public:
+  ~TypeLookupTable();
+
+  // Return the number of buckets in the lookup table.
+  uint32_t Size() const {
+    return mask_ + 1;
+  }
+
+  // Method search class_def_idx by class descriptor and it's hash.
+  // If no data found then the method returns DexFile::kDexNoIndex
+  ALWAYS_INLINE uint32_t Lookup(const char* str, uint32_t hash) const {
+    uint32_t pos = hash & GetSizeMask();
+    // Thanks to special insertion algorithm, the element at position pos can be empty or start of
+    // bucket.
+    const Entry* entry = &entries_[pos];
+    while (!entry->IsEmpty()) {
+      if (CmpHashBits(entry->data, hash) && IsStringsEquals(str, entry->str_offset)) {
+        return GetClassDefIdx(entry->data);
+      }
+      if (entry->IsLast()) {
+        return DexFile::kDexNoIndex;
+      }
+      pos = (pos + entry->next_pos_delta) & GetSizeMask();
+      entry = &entries_[pos];
+    }
+    return DexFile::kDexNoIndex;
+  }
+
+  // Method creates lookup table for dex file
+  static TypeLookupTable* Create(const DexFile& dex_file, uint8_t* storage = nullptr);
+
+  // Method opens lookup table from binary data. Lookup table does not owns binary data.
+  static TypeLookupTable* Open(const uint8_t* raw_data, const DexFile& dex_file);
+
+  // Method returns pointer to binary data of lookup table. Used by the oat writer.
+  const uint8_t* RawData() const {
+    return reinterpret_cast<const uint8_t*>(entries_.get());
+  }
+
+  // Method returns length of binary data. Used by the oat writer.
+  uint32_t RawDataLength() const;
+
+  // Method returns length of binary data for the specified dex file.
+  static uint32_t RawDataLength(const DexFile& dex_file);
+
+  // Method returns length of binary data for the specified number of class definitions.
+  static uint32_t RawDataLength(uint32_t num_class_defs);
+
+ private:
+   /**
+    * To find element we need to compare strings.
+    * It is faster to compare first hashes and then strings itself.
+    * But we have no full hash of element of table. But we can use 2 ideas.
+    * 1. All minor bits of hash inside one bucket are equals.
+    * 2. If dex file contains N classes and size of hash table is 2^n (where N <= 2^n)
+    *    then 16-n bits are free. So we can encode part of element's hash into these bits.
+    * So hash of element can be divided on three parts:
+    * XXXX XXXX XXXX YYYY YZZZ ZZZZ ZZZZZ
+    * Z - a part of hash encoded in bucket (these bits of has are same for all elements in bucket) -
+    * n bits
+    * Y - a part of hash that we can write into free 16-n bits (because only n bits used to store
+    * class_def_idx)
+    * X - a part of has that we can't use without increasing increase
+    * So the data element of Entry used to store class_def_idx and part of hash of the entry.
+    */
+  struct Entry {
+    uint32_t str_offset;
+    uint16_t data;
+    uint16_t next_pos_delta;
+
+    Entry() : str_offset(0), data(0), next_pos_delta(0) {}
+
+    bool IsEmpty() const {
+      return str_offset == 0;
+    }
+
+    bool IsLast() const {
+      return next_pos_delta == 0;
+    }
+  };
+
+  static uint32_t CalculateMask(uint32_t num_class_defs);
+  static bool SupportedSize(uint32_t num_class_defs);
+
+  // Construct from a dex file.
+  explicit TypeLookupTable(const DexFile& dex_file, uint8_t* storage);
+
+  // Construct from a dex file with existing data.
+  TypeLookupTable(const uint8_t* raw_data, const DexFile& dex_file);
+
+  bool IsStringsEquals(const char* str, uint32_t str_offset) const {
+    const uint8_t* ptr = dex_file_.Begin() + str_offset;
+    // Skip string length.
+    DecodeUnsignedLeb128(&ptr);
+    return CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(
+        str, reinterpret_cast<const char*>(ptr)) == 0;
+  }
+
+  // Method extracts hash bits from element's data and compare them with
+  // the corresponding bits of the specified hash
+  bool CmpHashBits(uint32_t data, uint32_t hash) const {
+    uint32_t mask = static_cast<uint16_t>(~GetSizeMask());
+    return (hash & mask) == (data & mask);
+  }
+
+  uint32_t GetClassDefIdx(uint32_t data) const {
+    return data & mask_;
+  }
+
+  uint32_t GetSizeMask() const {
+    return mask_;
+  }
+
+  // Attempt to set an entry on it's hash' slot. If there is alrady something there, return false.
+  // Otherwise return true.
+  bool SetOnInitialPos(const Entry& entry, uint32_t hash);
+
+  // Insert an entry, probes until there is an empty slot.
+  void Insert(const Entry& entry, uint32_t hash);
+
+  // Find the last entry in a chain.
+  uint32_t FindLastEntryInBucket(uint32_t cur_pos) const;
+
+  const DexFile& dex_file_;
+  const uint32_t mask_;
+  std::unique_ptr<Entry[]> entries_;
+  // owns_entries_ specifies if the lookup table owns the entries_ array.
+  const bool owns_entries_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(TypeLookupTable);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_TYPE_LOOKUP_TABLE_H_
diff --git a/runtime/type_lookup_table_test.cc b/runtime/type_lookup_table_test.cc
new file mode 100644
index 0000000..ea4d8b5
--- /dev/null
+++ b/runtime/type_lookup_table_test.cc
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <memory>
+
+#include "common_runtime_test.h"
+#include "dex_file-inl.h"
+#include "scoped_thread_state_change.h"
+#include "type_lookup_table.h"
+#include "utf-inl.h"
+
+namespace art {
+
+static const size_t kDexNoIndex = DexFile::kDexNoIndex;  // Make copy to prevent linking errors.
+
+using DescriptorClassDefIdxPair = std::pair<const char*, uint32_t>;
+class TypeLookupTableTest : public CommonRuntimeTestWithParam<DescriptorClassDefIdxPair> {};
+
+TEST_F(TypeLookupTableTest, CreateLookupTable) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::unique_ptr<const DexFile> dex_file(OpenTestDexFile("Lookup"));
+  std::unique_ptr<TypeLookupTable> table(TypeLookupTable::Create(*dex_file));
+  ASSERT_NE(nullptr, table.get());
+  ASSERT_NE(nullptr, table->RawData());
+  ASSERT_EQ(32U, table->RawDataLength());
+}
+
+TEST_P(TypeLookupTableTest, Find) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::unique_ptr<const DexFile> dex_file(OpenTestDexFile("Lookup"));
+  std::unique_ptr<TypeLookupTable> table(TypeLookupTable::Create(*dex_file));
+  ASSERT_NE(nullptr, table.get());
+  auto pair = GetParam();
+  const char* descriptor = pair.first;
+  size_t hash = ComputeModifiedUtf8Hash(descriptor);
+  uint32_t class_def_idx = table->Lookup(descriptor, hash);
+  ASSERT_EQ(pair.second, class_def_idx);
+}
+
+INSTANTIATE_TEST_CASE_P(FindNonExistingClassWithoutCollisions,
+                        TypeLookupTableTest,
+                        testing::Values(DescriptorClassDefIdxPair("LAB;", 1U)));
+INSTANTIATE_TEST_CASE_P(FindNonExistingClassWithCollisions,
+                        TypeLookupTableTest,
+                        testing::Values(DescriptorClassDefIdxPair("LDA;", kDexNoIndex)));
+INSTANTIATE_TEST_CASE_P(FindClassNoCollisions,
+                        TypeLookupTableTest,
+                        testing::Values(DescriptorClassDefIdxPair("LC;", 2U)));
+INSTANTIATE_TEST_CASE_P(FindClassWithCollisions,
+                        TypeLookupTableTest,
+                        testing::Values(DescriptorClassDefIdxPair("LAB;", 1U)));
+}  // namespace art
diff --git a/runtime/utf.cc b/runtime/utf.cc
index 10600e2..7e06482 100644
--- a/runtime/utf.cc
+++ b/runtime/utf.cc
@@ -23,28 +23,50 @@
 
 namespace art {
 
+// This is used only from debugger and test code.
 size_t CountModifiedUtf8Chars(const char* utf8) {
+  return CountModifiedUtf8Chars(utf8, strlen(utf8));
+}
+
+/*
+ * This does not validate UTF8 rules (nor did older code). But it gets the right answer
+ * for valid UTF-8 and that's fine because it's used only to size a buffer for later
+ * conversion.
+ *
+ * Modified UTF-8 consists of a series of bytes up to 21 bit Unicode code points as follows:
+ * U+0001  - U+007F   0xxxxxxx
+ * U+0080  - U+07FF   110xxxxx 10xxxxxx
+ * U+0800  - U+FFFF   1110xxxx 10xxxxxx 10xxxxxx
+ * U+10000 - U+1FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ *
+ * U+0000 is encoded using the 2nd form to avoid nulls inside strings (this differs from
+ * standard UTF-8).
+ * The four byte encoding converts to two utf16 characters.
+ */
+size_t CountModifiedUtf8Chars(const char* utf8, size_t byte_count) {
+  DCHECK_LE(byte_count, strlen(utf8));
   size_t len = 0;
-  int ic;
-  while ((ic = *utf8++) != '\0') {
+  const char* end = utf8 + byte_count;
+  for (; utf8 < end; ++utf8) {
+    int ic = *utf8;
     len++;
-    if ((ic & 0x80) == 0) {
-      // one-byte encoding
+    if (LIKELY((ic & 0x80) == 0)) {
+      // One-byte encoding.
       continue;
     }
-    // two- or three-byte encoding
+    // Two- or three-byte encoding.
     utf8++;
     if ((ic & 0x20) == 0) {
-      // two-byte encoding
+      // Two-byte encoding.
       continue;
     }
     utf8++;
     if ((ic & 0x10) == 0) {
-      // three-byte encoding
+      // Three-byte encoding.
       continue;
     }
 
-    // four-byte encoding: needs to be converted into a surrogate
+    // Four-byte encoding: needs to be converted into a surrogate
     // pair.
     utf8++;
     len++;
@@ -52,6 +74,7 @@
   return len;
 }
 
+// This is used only from debugger and test code.
 void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, const char* utf8_data_in) {
   while (*utf8_data_in != '\0') {
     const uint32_t ch = GetUtf16FromUtf8(&utf8_data_in);
@@ -65,13 +88,53 @@
   }
 }
 
-void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count) {
+void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, size_t out_chars,
+                                const char* utf8_data_in, size_t in_bytes) {
+  const char *in_start = utf8_data_in;
+  const char *in_end = utf8_data_in + in_bytes;
+  uint16_t *out_p = utf16_data_out;
+
+  if (LIKELY(out_chars == in_bytes)) {
+    // Common case where all characters are ASCII.
+    for (const char *p = in_start; p < in_end;) {
+      // Safe even if char is signed because ASCII characters always have
+      // the high bit cleared.
+      *out_p++ = dchecked_integral_cast<uint16_t>(*p++);
+    }
+    return;
+  }
+
+  // String contains non-ASCII characters.
+  for (const char *p = in_start; p < in_end;) {
+    const uint32_t ch = GetUtf16FromUtf8(&p);
+    const uint16_t leading = GetLeadingUtf16Char(ch);
+    const uint16_t trailing = GetTrailingUtf16Char(ch);
+
+    *out_p++ = leading;
+    if (trailing != 0) {
+      *out_p++ = trailing;
+    }
+  }
+}
+
+void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count,
+                                const uint16_t* utf16_in, size_t char_count) {
+  if (LIKELY(byte_count == char_count)) {
+    // Common case where all characters are ASCII.
+    const uint16_t *utf16_end = utf16_in + char_count;
+    for (const uint16_t *p = utf16_in; p < utf16_end;) {
+      *utf8_out++ = dchecked_integral_cast<char>(*p++);
+    }
+    return;
+  }
+
+  // String contains non-ASCII characters.
   while (char_count--) {
     const uint16_t ch = *utf16_in++;
     if (ch > 0 && ch <= 0x7f) {
       *utf8_out++ = ch;
     } else {
-      // char_count == 0 here implies we've encountered an unpaired
+      // Char_count == 0 here implies we've encountered an unpaired
       // surrogate and we have no choice but to encode it as 3-byte UTF
       // sequence. Note that unpaired surrogates can occur as a part of
       // "normal" operation.
@@ -107,16 +170,25 @@
   }
 }
 
-int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count) {
+int32_t ComputeUtf16HashFromModifiedUtf8(const char* utf8, size_t utf16_length) {
   uint32_t hash = 0;
-  while (char_count--) {
-    hash = hash * 31 + *chars++;
+  while (utf16_length != 0u) {
+    const uint32_t pair = GetUtf16FromUtf8(&utf8);
+    const uint16_t first = GetLeadingUtf16Char(pair);
+    hash = hash * 31 + first;
+    --utf16_length;
+    const uint16_t second = GetTrailingUtf16Char(pair);
+    if (second != 0) {
+      hash = hash * 31 + second;
+      DCHECK_NE(utf16_length, 0u);
+      --utf16_length;
+    }
   }
   return static_cast<int32_t>(hash);
 }
 
-size_t ComputeModifiedUtf8Hash(const char* chars) {
-  size_t hash = 0;
+uint32_t ComputeModifiedUtf8Hash(const char* chars) {
+  uint32_t hash = 0;
   while (*chars != '\0') {
     hash = hash * 31 + *chars++;
   }
@@ -161,34 +233,31 @@
 
 size_t CountUtf8Bytes(const uint16_t* chars, size_t char_count) {
   size_t result = 0;
-  while (char_count--) {
+  const uint16_t *end = chars + char_count;
+  while (chars < end) {
     const uint16_t ch = *chars++;
-    if (ch > 0 && ch <= 0x7f) {
-      ++result;
-    } else if (ch >= 0xd800 && ch <= 0xdbff) {
-      if (char_count > 0) {
+    if (LIKELY(ch != 0 && ch < 0x80)) {
+      result++;
+      continue;
+    }
+    if (ch < 0x800) {
+      result += 2;
+      continue;
+    }
+    if (ch >= 0xd800 && ch < 0xdc00) {
+      if (chars < end) {
         const uint16_t ch2 = *chars;
         // If we find a properly paired surrogate, we emit it as a 4 byte
         // UTF sequence. If we find an unpaired leading or trailing surrogate,
         // we emit it as a 3 byte sequence like would have done earlier.
-        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+        if (ch2 >= 0xdc00 && ch2 < 0xe000) {
           chars++;
-          char_count--;
-
           result += 4;
-        } else {
-          result += 3;
+          continue;
         }
-      } else {
-        // This implies we found an unpaired trailing surrogate at the end
-        // of a string.
-        result += 3;
       }
-    } else if (ch > 0x7ff) {
-      result += 3;
-    } else {
-      result += 2;
     }
+    result += 3;
   }
   return result;
 }
diff --git a/runtime/utf.h b/runtime/utf.h
index 1193d29..7c9c333 100644
--- a/runtime/utf.h
+++ b/runtime/utf.h
@@ -40,6 +40,7 @@
  * Returns the number of UTF-16 characters in the given modified UTF-8 string.
  */
 size_t CountModifiedUtf8Chars(const char* utf8);
+size_t CountModifiedUtf8Chars(const char* utf8, size_t byte_count);
 
 /*
  * Returns the number of modified UTF-8 bytes needed to represent the given
@@ -51,6 +52,8 @@
  * Convert from Modified UTF-8 to UTF-16.
  */
 void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_out, const char* utf8_in);
+void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_out, size_t out_chars,
+                                const char* utf8_in, size_t in_bytes);
 
 /*
  * Compare two modified UTF-8 strings as UTF-16 code point values in a non-locale sensitive manner
@@ -71,18 +74,29 @@
  * this anyway, so if you want a NUL-terminated string, you know where to
  * put the NUL byte.
  */
-void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count);
+void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count,
+                                const uint16_t* utf16_in, size_t char_count);
 
 /*
  * The java.lang.String hashCode() algorithm.
  */
 int32_t ComputeUtf16Hash(mirror::CharArray* chars, int32_t offset, size_t char_count)
     SHARED_REQUIRES(Locks::mutator_lock_);
-int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count);
+
+template<typename MemoryType>
+int32_t ComputeUtf16Hash(const MemoryType* chars, size_t char_count) {
+  uint32_t hash = 0;
+  while (char_count--) {
+    hash = hash * 31 + *chars++;
+  }
+  return static_cast<int32_t>(hash);
+}
+
+int32_t ComputeUtf16HashFromModifiedUtf8(const char* utf8, size_t utf16_length);
 
 // Compute a hash code of a modified UTF-8 string. Not the standard java hash since it returns a
-// size_t and hashes individual chars instead of codepoint words.
-size_t ComputeModifiedUtf8Hash(const char* chars);
+// uint32_t and hashes individual chars instead of codepoint words.
+uint32_t ComputeModifiedUtf8Hash(const char* chars);
 
 /*
  * Retrieve the next UTF-16 character or surrogate pair from a UTF-8 string.
diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc
index 94a6ea5..3284925 100644
--- a/runtime/utf_test.cc
+++ b/runtime/utf_test.cc
@@ -19,6 +19,7 @@
 #include "common_runtime_test.h"
 #include "utf-inl.h"
 
+#include <map>
 #include <vector>
 
 namespace art {
@@ -48,7 +49,7 @@
 };
 
 // A test string that contains a UTF-8 encoding of a surrogate pair
-// (code point = U+10400)
+// (code point = U+10400).
 static const uint8_t kSurrogateEncoding[] = {
     0xed, 0xa0, 0x81,
     0xed, 0xb0, 0x80,
@@ -66,13 +67,13 @@
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
   EXPECT_ARRAY_POSITION(1, ptr, start);
 
-  // Two byte sequence
+  // Two byte sequence.
   pair = GetUtf16FromUtf8(&ptr);
   EXPECT_EQ(0xa2, GetLeadingUtf16Char(pair));
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
   EXPECT_ARRAY_POSITION(3, ptr, start);
 
-  // Three byte sequence
+  // Three byte sequence.
   pair = GetUtf16FromUtf8(&ptr);
   EXPECT_EQ(0x20ac, GetLeadingUtf16Char(pair));
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
@@ -84,7 +85,7 @@
   EXPECT_EQ(0xdfe0, GetTrailingUtf16Char(pair));
   EXPECT_ARRAY_POSITION(10, ptr, start);
 
-  // Null terminator
+  // Null terminator.
   pair = GetUtf16FromUtf8(&ptr);
   EXPECT_EQ(0, GetLeadingUtf16Char(pair));
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
@@ -117,7 +118,8 @@
   ASSERT_EQ(expected.size(), CountUtf8Bytes(&input[0], input.size()));
 
   std::vector<uint8_t> output(expected.size());
-  ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), &input[0], input.size());
+  ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), expected.size(),
+                             &input[0], input.size());
   EXPECT_EQ(expected, output);
 }
 
@@ -139,10 +141,10 @@
   AssertConversion({ 'h', 'e', 'l', 'l', 'o' }, { 0x68, 0x65, 0x6c, 0x6c, 0x6f });
 
   AssertConversion({
-      0xd802, 0xdc02,  // Surrogate pair
-      0xdef0, 0xdcff,  // Three byte encodings
-      0x0101, 0x0000,  // Two byte encodings
-      'p'   , 'p'      // One byte encoding
+      0xd802, 0xdc02,  // Surrogate pair.
+      0xdef0, 0xdcff,  // Three byte encodings.
+      0x0101, 0x0000,  // Two byte encodings.
+      'p'   , 'p'      // One byte encoding.
     }, {
       0xf0, 0x90, 0xa0, 0x82,
       0xed, 0xbb, 0xb0, 0xed, 0xb3, 0xbf,
@@ -155,9 +157,225 @@
   // Unpaired trailing surrogate at the end of input.
   AssertConversion({ 'h', 'e', 0xd801 }, { 'h', 'e', 0xed, 0xa0, 0x81 });
   // Unpaired (or incorrectly paired) surrogates in the middle of the input.
-  AssertConversion({ 'h', 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 'e' });
-  AssertConversion({ 'h', 0xd801, 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81, 'e' });
-  AssertConversion({ 'h', 0xdc00, 0xdc00, 'e' }, { 'h', 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 'e' });
+  const std::map<std::vector<uint16_t>, std::vector<uint8_t>> prefixes {
+      {{ 'h' }, { 'h' }},
+      {{ 0 }, { 0xc0, 0x80 }},
+      {{ 0x81 }, { 0xc2, 0x81 }},
+      {{ 0x801 }, { 0xe0, 0xa0, 0x81 }},
+  };
+  const std::map<std::vector<uint16_t>, std::vector<uint8_t>> suffixes {
+      {{ 'e' }, { 'e' }},
+      {{ 0 }, { 0xc0, 0x80 }},
+      {{ 0x7ff }, { 0xdf, 0xbf }},
+      {{ 0xffff }, { 0xef, 0xbf, 0xbf }},
+  };
+  const std::map<std::vector<uint16_t>, std::vector<uint8_t>> tests {
+      {{ 0xd801 }, { 0xed, 0xa0, 0x81 }},
+      {{ 0xdc00 }, { 0xed, 0xb0, 0x80 }},
+      {{ 0xd801, 0xd801 }, { 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81 }},
+      {{ 0xdc00, 0xdc00 }, { 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80 }},
+  };
+  for (const auto& prefix : prefixes) {
+    const std::vector<uint16_t>& prefix_in = prefix.first;
+    const std::vector<uint8_t>& prefix_out = prefix.second;
+    for (const auto& test : tests) {
+      const std::vector<uint16_t>& test_in = test.first;
+      const std::vector<uint8_t>& test_out = test.second;
+      for (const auto& suffix : suffixes) {
+        const std::vector<uint16_t>& suffix_in = suffix.first;
+        const std::vector<uint8_t>& suffix_out = suffix.second;
+        std::vector<uint16_t> in = prefix_in;
+        in.insert(in.end(), test_in.begin(), test_in.end());
+        in.insert(in.end(), suffix_in.begin(), suffix_in.end());
+        std::vector<uint8_t> out = prefix_out;
+        out.insert(out.end(), test_out.begin(), test_out.end());
+        out.insert(out.end(), suffix_out.begin(), suffix_out.end());
+        AssertConversion(in, out);
+      }
+    }
+  }
+}
+
+// Old versions of functions, here to compare answers with optimized versions.
+
+size_t CountModifiedUtf8Chars_reference(const char* utf8) {
+  size_t len = 0;
+  int ic;
+  while ((ic = *utf8++) != '\0') {
+    len++;
+    if ((ic & 0x80) == 0) {
+      // one-byte encoding
+      continue;
+    }
+    // two- or three-byte encoding
+    utf8++;
+    if ((ic & 0x20) == 0) {
+      // two-byte encoding
+      continue;
+    }
+    utf8++;
+    if ((ic & 0x10) == 0) {
+      // three-byte encoding
+      continue;
+    }
+
+    // four-byte encoding: needs to be converted into a surrogate
+    // pair.
+    utf8++;
+    len++;
+  }
+  return len;
+}
+
+static size_t CountUtf8Bytes_reference(const uint16_t* chars, size_t char_count) {
+  size_t result = 0;
+  while (char_count--) {
+    const uint16_t ch = *chars++;
+    if (ch > 0 && ch <= 0x7f) {
+      ++result;
+    } else if (ch >= 0xd800 && ch <= 0xdbff) {
+      if (char_count > 0) {
+        const uint16_t ch2 = *chars;
+        // If we find a properly paired surrogate, we emit it as a 4 byte
+        // UTF sequence. If we find an unpaired leading or trailing surrogate,
+        // we emit it as a 3 byte sequence like would have done earlier.
+        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+          chars++;
+          char_count--;
+
+          result += 4;
+        } else {
+          result += 3;
+        }
+      } else {
+        // This implies we found an unpaired trailing surrogate at the end
+        // of a string.
+        result += 3;
+      }
+    } else if (ch > 0x7ff) {
+      result += 3;
+    } else {
+      result += 2;
+    }
+  }
+  return result;
+}
+
+static void ConvertUtf16ToModifiedUtf8_reference(char* utf8_out, const uint16_t* utf16_in,
+                                                 size_t char_count) {
+  while (char_count--) {
+    const uint16_t ch = *utf16_in++;
+    if (ch > 0 && ch <= 0x7f) {
+      *utf8_out++ = ch;
+    } else {
+      // Char_count == 0 here implies we've encountered an unpaired
+      // surrogate and we have no choice but to encode it as 3-byte UTF
+      // sequence. Note that unpaired surrogates can occur as a part of
+      // "normal" operation.
+      if ((ch >= 0xd800 && ch <= 0xdbff) && (char_count > 0)) {
+        const uint16_t ch2 = *utf16_in;
+
+        // Check if the other half of the pair is within the expected
+        // range. If it isn't, we will have to emit both "halves" as
+        // separate 3 byte sequences.
+        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+          utf16_in++;
+          char_count--;
+          const uint32_t code_point = (ch << 10) + ch2 - 0x035fdc00;
+          *utf8_out++ = (code_point >> 18) | 0xf0;
+          *utf8_out++ = ((code_point >> 12) & 0x3f) | 0x80;
+          *utf8_out++ = ((code_point >> 6) & 0x3f) | 0x80;
+          *utf8_out++ = (code_point & 0x3f) | 0x80;
+          continue;
+        }
+      }
+
+      if (ch > 0x07ff) {
+        // Three byte encoding.
+        *utf8_out++ = (ch >> 12) | 0xe0;
+        *utf8_out++ = ((ch >> 6) & 0x3f) | 0x80;
+        *utf8_out++ = (ch & 0x3f) | 0x80;
+      } else /*(ch > 0x7f || ch == 0)*/ {
+        // Two byte encoding.
+        *utf8_out++ = (ch >> 6) | 0xc0;
+        *utf8_out++ = (ch & 0x3f) | 0x80;
+      }
+    }
+  }
+}
+
+// Exhaustive test of converting a single code point to UTF-16, then UTF-8, and back again.
+
+static void codePointToSurrogatePair(uint32_t code_point, uint16_t &first, uint16_t &second) {
+  first = (code_point >> 10) + 0xd7c0;
+  second = (code_point & 0x03ff) + 0xdc00;
+}
+
+static void testConversions(uint16_t *buf, int char_count) {
+  char bytes_test[8] = { 0 }, bytes_reference[8] = { 0 };
+  uint16_t out_buf_test[4] = { 0 }, out_buf_reference[4] = { 0 };
+  int byte_count_test, byte_count_reference;
+  int char_count_test, char_count_reference;
+
+  // Calculate the number of utf-8 bytes for the utf-16 chars.
+  byte_count_reference = CountUtf8Bytes_reference(buf, char_count);
+  byte_count_test = CountUtf8Bytes(buf, char_count);
+  EXPECT_EQ(byte_count_reference, byte_count_test);
+
+  // Convert the utf-16 string to utf-8 bytes.
+  ConvertUtf16ToModifiedUtf8_reference(bytes_reference, buf, char_count);
+  ConvertUtf16ToModifiedUtf8(bytes_test, byte_count_test, buf, char_count);
+  for (int i = 0; i < byte_count_test; ++i) {
+    EXPECT_EQ(bytes_reference[i], bytes_test[i]);
+  }
+
+  // Calculate the number of utf-16 chars from the utf-8 bytes.
+  bytes_reference[byte_count_reference] = 0;  // Reference function needs null termination.
+  char_count_reference = CountModifiedUtf8Chars_reference(bytes_reference);
+  char_count_test = CountModifiedUtf8Chars(bytes_test, byte_count_test);
+  EXPECT_EQ(char_count, char_count_reference);
+  EXPECT_EQ(char_count, char_count_test);
+
+  // Convert the utf-8 bytes back to utf-16 chars.
+  // Does not need copied _reference version of the function because the original
+  // function with the old API is retained for debug/testing code.
+  ConvertModifiedUtf8ToUtf16(out_buf_reference, bytes_reference);
+  ConvertModifiedUtf8ToUtf16(out_buf_test, char_count_test, bytes_test, byte_count_test);
+  for (int i = 0; i < char_count_test; ++i) {
+    EXPECT_EQ(buf[i], out_buf_reference[i]);
+    EXPECT_EQ(buf[i], out_buf_test[i]);
+  }
+}
+
+TEST_F(UtfTest, ExhaustiveBidirectionalCodePointCheck) {
+  for (int codePoint = 0; codePoint <= 0x10ffff; ++codePoint) {
+    uint16_t buf[4] = { 0 };
+    if (codePoint <= 0xffff) {
+      if (codePoint >= 0xd800 && codePoint <= 0xdfff) {
+        // According to the Unicode standard, no character will ever
+        // be assigned to these code points, and they cannot be encoded
+        // into either utf-16 or utf-8.
+        continue;
+      }
+      buf[0] = 'h';
+      buf[1] = codePoint;
+      buf[2] = 'e';
+      testConversions(buf, 2);
+      testConversions(buf, 3);
+      testConversions(buf + 1, 1);
+      testConversions(buf + 1, 2);
+    } else {
+      buf[0] = 'h';
+      codePointToSurrogatePair(codePoint, buf[1], buf[2]);
+      buf[3] = 'e';
+      testConversions(buf, 2);
+      testConversions(buf, 3);
+      testConversions(buf, 4);
+      testConversions(buf + 1, 1);
+      testConversions(buf + 1, 2);
+      testConversions(buf + 1, 3);
+    }
+  }
 }
 
 }  // namespace art
diff --git a/runtime/utils.cc b/runtime/utils.cc
index dee4f9c..b676ae5 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -46,20 +46,12 @@
 #include <sys/syscall.h>
 #endif
 
-// For DumpNativeStack.
-#include <backtrace/Backtrace.h>
-#include <backtrace/BacktraceMap.h>
-
 #if defined(__linux__)
 #include <linux/unistd.h>
 #endif
 
 namespace art {
 
-#if defined(__linux__)
-static constexpr bool kUseAddr2line = !kIsTargetBuild;
-#endif
-
 pid_t GetTid() {
 #if defined(__APPLE__)
   uint64_t owner;
@@ -136,8 +128,8 @@
 }
 
 bool ReadFileToString(const std::string& file_name, std::string* result) {
-  File file;
-  if (!file.Open(file_name, O_RDONLY)) {
+  File file(file_name, O_RDONLY, false);
+  if (!file.IsOpened()) {
     return false;
   }
 
@@ -155,8 +147,8 @@
 }
 
 bool PrintFileToLog(const std::string& file_name, LogSeverity level) {
-  File file;
-  if (!file.Open(file_name, O_RDONLY)) {
+  File file(file_name, O_RDONLY, false);
+  if (!file.IsOpened()) {
     return false;
   }
 
@@ -1026,209 +1018,6 @@
   return "";
 }
 
-#if defined(__linux__)
-
-ALWAYS_INLINE
-static inline void WritePrefix(std::ostream* os, const char* prefix, bool odd) {
-  if (prefix != nullptr) {
-    *os << prefix;
-  }
-  *os << "  ";
-  if (!odd) {
-    *os << " ";
-  }
-}
-
-static bool RunCommand(std::string cmd, std::ostream* os, const char* prefix) {
-  FILE* stream = popen(cmd.c_str(), "r");
-  if (stream) {
-    if (os != nullptr) {
-      bool odd_line = true;               // We indent them differently.
-      bool wrote_prefix = false;          // Have we already written a prefix?
-      constexpr size_t kMaxBuffer = 128;  // Relatively small buffer. Should be OK as we're on an
-                                          // alt stack, but just to be sure...
-      char buffer[kMaxBuffer];
-      while (!feof(stream)) {
-        if (fgets(buffer, kMaxBuffer, stream) != nullptr) {
-          // Split on newlines.
-          char* tmp = buffer;
-          for (;;) {
-            char* new_line = strchr(tmp, '\n');
-            if (new_line == nullptr) {
-              // Print the rest.
-              if (*tmp != 0) {
-                if (!wrote_prefix) {
-                  WritePrefix(os, prefix, odd_line);
-                }
-                wrote_prefix = true;
-                *os << tmp;
-              }
-              break;
-            }
-            if (!wrote_prefix) {
-              WritePrefix(os, prefix, odd_line);
-            }
-            char saved = *(new_line + 1);
-            *(new_line + 1) = 0;
-            *os << tmp;
-            *(new_line + 1) = saved;
-            tmp = new_line + 1;
-            odd_line = !odd_line;
-            wrote_prefix = false;
-          }
-        }
-      }
-    }
-    pclose(stream);
-    return true;
-  } else {
-    return false;
-  }
-}
-
-static void Addr2line(const std::string& map_src, uintptr_t offset, std::ostream& os,
-                      const char* prefix) {
-  std::string cmdline(StringPrintf("addr2line --functions --inlines --demangle -e %s %zx",
-                                   map_src.c_str(), offset));
-  RunCommand(cmdline.c_str(), &os, prefix);
-}
-
-static bool PcIsWithinQuickCode(ArtMethod* method, uintptr_t pc) NO_THREAD_SAFETY_ANALYSIS {
-  uintptr_t code = reinterpret_cast<uintptr_t>(EntryPointToCodePointer(
-      method->GetEntryPointFromQuickCompiledCode()));
-  if (code == 0) {
-    return pc == 0;
-  }
-  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
-  return code <= pc && pc <= (code + code_size);
-}
-#endif
-
-void DumpNativeStack(std::ostream& os, pid_t tid, BacktraceMap* existing_map, const char* prefix,
-    ArtMethod* current_method, void* ucontext_ptr) {
-#if __linux__
-  // b/18119146
-  if (RUNNING_ON_MEMORY_TOOL != 0) {
-    return;
-  }
-
-  BacktraceMap* map = existing_map;
-  std::unique_ptr<BacktraceMap> tmp_map;
-  if (map == nullptr) {
-    tmp_map.reset(BacktraceMap::Create(tid));
-    map = tmp_map.get();
-  }
-  std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
-  if (!backtrace->Unwind(0, reinterpret_cast<ucontext*>(ucontext_ptr))) {
-    os << prefix << "(backtrace::Unwind failed for thread " << tid << ")\n";
-    return;
-  } else if (backtrace->NumFrames() == 0) {
-    os << prefix << "(no native stack frames for thread " << tid << ")\n";
-    return;
-  }
-
-  // Check whether we have and should use addr2line.
-  bool use_addr2line;
-  if (kUseAddr2line) {
-    // Try to run it to see whether we have it. Push an argument so that it doesn't assume a.out
-    // and print to stderr.
-    use_addr2line = (gAborting > 0) && RunCommand("addr2line -h", nullptr, nullptr);
-  } else {
-    use_addr2line = false;
-  }
-
-  for (Backtrace::const_iterator it = backtrace->begin();
-       it != backtrace->end(); ++it) {
-    // We produce output like this:
-    // ]    #00 pc 000075bb8  /system/lib/libc.so (unwind_backtrace_thread+536)
-    // In order for parsing tools to continue to function, the stack dump
-    // format must at least adhere to this format:
-    //  #XX pc <RELATIVE_ADDR>  <FULL_PATH_TO_SHARED_LIBRARY> ...
-    // The parsers require a single space before and after pc, and two spaces
-    // after the <RELATIVE_ADDR>. There can be any prefix data before the
-    // #XX. <RELATIVE_ADDR> has to be a hex number but with no 0x prefix.
-    os << prefix << StringPrintf("#%02zu pc ", it->num);
-    bool try_addr2line = false;
-    if (!BacktraceMap::IsValid(it->map)) {
-      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  ???"
-                                                            : "%08" PRIxPTR "  ???",
-                         it->pc);
-    } else {
-      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  "
-                                                            : "%08" PRIxPTR "  ",
-                         BacktraceMap::GetRelativePc(it->map, it->pc));
-      os << it->map.name;
-      os << " (";
-      if (!it->func_name.empty()) {
-        os << it->func_name;
-        if (it->func_offset != 0) {
-          os << "+" << it->func_offset;
-        }
-        try_addr2line = true;
-      } else if (current_method != nullptr &&
-          Locks::mutator_lock_->IsSharedHeld(Thread::Current()) &&
-          PcIsWithinQuickCode(current_method, it->pc)) {
-        const void* start_of_code = current_method->GetEntryPointFromQuickCompiledCode();
-        os << JniLongName(current_method) << "+"
-           << (it->pc - reinterpret_cast<uintptr_t>(start_of_code));
-      } else {
-        os << "???";
-      }
-      os << ")";
-    }
-    os << "\n";
-    if (try_addr2line && use_addr2line) {
-      Addr2line(it->map.name, it->pc - it->map.start, os, prefix);
-    }
-  }
-#else
-  UNUSED(os, tid, existing_map, prefix, current_method, ucontext_ptr);
-#endif
-}
-
-#if defined(__APPLE__)
-
-// TODO: is there any way to get the kernel stack on Mac OS?
-void DumpKernelStack(std::ostream&, pid_t, const char*, bool) {}
-
-#else
-
-void DumpKernelStack(std::ostream& os, pid_t tid, const char* prefix, bool include_count) {
-  if (tid == GetTid()) {
-    // There's no point showing that we're reading our stack out of /proc!
-    return;
-  }
-
-  std::string kernel_stack_filename(StringPrintf("/proc/self/task/%d/stack", tid));
-  std::string kernel_stack;
-  if (!ReadFileToString(kernel_stack_filename, &kernel_stack)) {
-    os << prefix << "(couldn't read " << kernel_stack_filename << ")\n";
-    return;
-  }
-
-  std::vector<std::string> kernel_stack_frames;
-  Split(kernel_stack, '\n', &kernel_stack_frames);
-  // We skip the last stack frame because it's always equivalent to "[<ffffffff>] 0xffffffff",
-  // which looking at the source appears to be the kernel's way of saying "that's all, folks!".
-  kernel_stack_frames.pop_back();
-  for (size_t i = 0; i < kernel_stack_frames.size(); ++i) {
-    // Turn "[<ffffffff8109156d>] futex_wait_queue_me+0xcd/0x110"
-    // into "futex_wait_queue_me+0xcd/0x110".
-    const char* text = kernel_stack_frames[i].c_str();
-    const char* close_bracket = strchr(text, ']');
-    if (close_bracket != nullptr) {
-      text = close_bracket + 2;
-    }
-    os << prefix;
-    if (include_count) {
-      os << StringPrintf("#%02zd ", i);
-    }
-    os << text << "\n";
-  }
-}
-
-#endif
-
 const char* GetAndroidRoot() {
   const char* android_root = getenv("ANDROID_ROOT");
   if (android_root == nullptr) {
@@ -1298,58 +1087,18 @@
   }
 }
 
-static std::string GetDalvikCacheImpl(const char* subdir,
-                                      const bool create_if_absent,
-                                      const bool abort_on_error) {
+std::string GetDalvikCache(const char* subdir) {
   CHECK(subdir != nullptr);
   const char* android_data = GetAndroidData();
   const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", android_data));
   const std::string dalvik_cache = dalvik_cache_root + subdir;
   if (!OS::DirectoryExists(dalvik_cache.c_str())) {
-    if (!create_if_absent) {
-      // TODO: Check callers. Traditional behavior is to not to abort, even when abort_on_error.
-      return "";
-    }
-
-    // Don't create the system's /data/dalvik-cache/... because it needs special permissions.
-    if (strcmp(android_data, "/data") == 0) {
-      if (abort_on_error) {
-        LOG(FATAL) << "Failed to find dalvik-cache directory " << dalvik_cache
-                   << ", cannot create /data dalvik-cache.";
-        UNREACHABLE();
-      }
-      return "";
-    }
-
-    int result = mkdir(dalvik_cache_root.c_str(), 0700);
-    if (result != 0 && errno != EEXIST) {
-      if (abort_on_error) {
-        PLOG(FATAL) << "Failed to create dalvik-cache root directory " << dalvik_cache_root;
-        UNREACHABLE();
-      }
-      return "";
-    }
-
-    result = mkdir(dalvik_cache.c_str(), 0700);
-    if (result != 0) {
-      if (abort_on_error) {
-        PLOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache;
-        UNREACHABLE();
-      }
-      return "";
-    }
+    // TODO: Check callers. Traditional behavior is to not abort.
+    return "";
   }
   return dalvik_cache;
 }
 
-std::string GetDalvikCache(const char* subdir, const bool create_if_absent) {
-  return GetDalvikCacheImpl(subdir, create_if_absent, false);
-}
-
-std::string GetDalvikCacheOrDie(const char* subdir, const bool create_if_absent) {
-  return GetDalvikCacheImpl(subdir, create_if_absent, true);
-}
-
 bool GetDalvikCacheFilename(const char* location, const char* cache_location,
                             std::string* filename, std::string* error_msg) {
   if (location[0] != '/') {
@@ -1366,15 +1115,6 @@
   return true;
 }
 
-std::string GetDalvikCacheFilenameOrDie(const char* location, const char* cache_location) {
-  std::string ret;
-  std::string error_msg;
-  if (!GetDalvikCacheFilename(location, cache_location, &ret, &error_msg)) {
-    LOG(FATAL) << error_msg;
-  }
-  return ret;
-}
-
 static void InsertIsaDirectory(const InstructionSet isa, std::string* filename) {
   // in = /foo/bar/baz
   // out = /foo/bar/<isa>/baz
@@ -1392,24 +1132,8 @@
   return filename;
 }
 
-bool IsZipMagic(uint32_t magic) {
-  return (('P' == ((magic >> 0) & 0xff)) &&
-          ('K' == ((magic >> 8) & 0xff)));
-}
-
-bool IsDexMagic(uint32_t magic) {
-  return DexFile::IsMagicValid(reinterpret_cast<const uint8_t*>(&magic));
-}
-
-bool IsOatMagic(uint32_t magic) {
-  return (memcmp(reinterpret_cast<const uint8_t*>(magic),
-                 OatHeader::kOatMagic,
-                 sizeof(OatHeader::kOatMagic)) == 0);
-}
-
-bool Exec(std::vector<std::string>& arg_vector, std::string* error_msg) {
+int ExecAndReturnCode(std::vector<std::string>& arg_vector, std::string* error_msg) {
   const std::string command_line(Join(arg_vector, ' '));
-
   CHECK_GE(arg_vector.size(), 1U) << command_line;
 
   // Convert the args to char pointers.
@@ -1432,34 +1156,56 @@
     setpgid(0, 0);
 
     execv(program, &args[0]);
-
     PLOG(ERROR) << "Failed to execv(" << command_line << ")";
-    exit(1);
+    // _exit to avoid atexit handlers in child.
+    _exit(1);
   } else {
     if (pid == -1) {
       *error_msg = StringPrintf("Failed to execv(%s) because fork failed: %s",
                                 command_line.c_str(), strerror(errno));
-      return false;
+      return -1;
     }
 
     // wait for subprocess to finish
-    int status;
+    int status = -1;
     pid_t got_pid = TEMP_FAILURE_RETRY(waitpid(pid, &status, 0));
     if (got_pid != pid) {
       *error_msg = StringPrintf("Failed after fork for execv(%s) because waitpid failed: "
                                 "wanted %d, got %d: %s",
                                 command_line.c_str(), pid, got_pid, strerror(errno));
-      return false;
+      return -1;
     }
-    if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
-      *error_msg = StringPrintf("Failed execv(%s) because non-0 exit status",
-                                command_line.c_str());
-      return false;
+    if (WIFEXITED(status)) {
+      return WEXITSTATUS(status);
     }
+    return -1;
+  }
+}
+
+bool Exec(std::vector<std::string>& arg_vector, std::string* error_msg) {
+  int status = ExecAndReturnCode(arg_vector, error_msg);
+  if (status != 0) {
+    const std::string command_line(Join(arg_vector, ' '));
+    *error_msg = StringPrintf("Failed execv(%s) because non-0 exit status",
+                              command_line.c_str());
+    return false;
   }
   return true;
 }
 
+bool FileExists(const std::string& filename) {
+  struct stat buffer;
+  return stat(filename.c_str(), &buffer) == 0;
+}
+
+bool FileExistsAndNotEmpty(const std::string& filename) {
+  struct stat buffer;
+  if (stat(filename.c_str(), &buffer) != 0) {
+    return false;
+  }
+  return buffer.st_size > 0;
+}
+
 std::string PrettyDescriptor(Primitive::Type type) {
   return PrettyDescriptor(Primitive::Descriptor(type));
 }
@@ -1835,4 +1581,55 @@
   os << "Something went wrong, didn't find the method in the class data.";
 }
 
+static void ParseStringAfterChar(const std::string& s,
+                                 char c,
+                                 std::string* parsed_value,
+                                 UsageFn Usage) {
+  std::string::size_type colon = s.find(c);
+  if (colon == std::string::npos) {
+    Usage("Missing char %c in option %s\n", c, s.c_str());
+  }
+  // Add one to remove the char we were trimming until.
+  *parsed_value = s.substr(colon + 1);
+}
+
+void ParseDouble(const std::string& option,
+                 char after_char,
+                 double min,
+                 double max,
+                 double* parsed_value,
+                 UsageFn Usage) {
+  std::string substring;
+  ParseStringAfterChar(option, after_char, &substring, Usage);
+  bool sane_val = true;
+  double value;
+  if ((false)) {
+    // TODO: this doesn't seem to work on the emulator.  b/15114595
+    std::stringstream iss(substring);
+    iss >> value;
+    // Ensure that we have a value, there was no cruft after it and it satisfies a sensible range.
+    sane_val = iss.eof() && (value >= min) && (value <= max);
+  } else {
+    char* end = nullptr;
+    value = strtod(substring.c_str(), &end);
+    sane_val = *end == '\0' && value >= min && value <= max;
+  }
+  if (!sane_val) {
+    Usage("Invalid double value %s for option %s\n", substring.c_str(), option.c_str());
+  }
+  *parsed_value = value;
+}
+
+int64_t GetFileSizeBytes(const std::string& filename) {
+  struct stat stat_buf;
+  int rc = stat(filename.c_str(), &stat_buf);
+  return rc == 0 ? stat_buf.st_size : -1;
+}
+
+void SleepForever() {
+  while (true) {
+    usleep(1000000);
+  }
+}
+
 }  // namespace art
diff --git a/runtime/utils.h b/runtime/utils.h
index bd52b68..84079e2 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -18,16 +18,20 @@
 #define ART_RUNTIME_UTILS_H_
 
 #include <pthread.h>
+#include <stdlib.h>
 
 #include <limits>
 #include <memory>
+#include <random>
 #include <string>
 #include <type_traits>
 #include <vector>
 
 #include "arch/instruction_set.h"
+#include "base/casts.h"
 #include "base/logging.h"
 #include "base/mutex.h"
+#include "base/stringpiece.h"
 #include "globals.h"
 #include "primitive.h"
 
@@ -35,7 +39,6 @@
 
 namespace art {
 
-class ArtCode;
 class ArtField;
 class ArtMethod;
 class DexFile;
@@ -96,6 +99,23 @@
   return (ch < ' ' || ch > '~');
 }
 
+template <typename T> T SafeAbs(T value) {
+  // std::abs has undefined behavior on min limits.
+  DCHECK_NE(value, std::numeric_limits<T>::min());
+  return std::abs(value);
+}
+
+template <typename T> T AbsOrMin(T value) {
+  return (value == std::numeric_limits<T>::min())
+      ? value
+      : std::abs(value);
+}
+
+template <typename T>
+inline typename std::make_unsigned<T>::type MakeUnsigned(T x) {
+  return static_cast<typename std::make_unsigned<T>::type>(x);
+}
+
 std::string PrintableChar(uint16_t ch);
 
 // Returns an ASCII string corresponding to the given UTF-8 string.
@@ -222,21 +242,6 @@
 // implementation-defined limit.
 void SetThreadName(const char* thread_name);
 
-// Dumps the native stack for thread 'tid' to 'os'.
-void DumpNativeStack(std::ostream& os,
-                     pid_t tid,
-                     BacktraceMap* map = nullptr,
-                     const char* prefix = "",
-                     ArtMethod* current_method = nullptr,
-                     void* ucontext = nullptr)
-    NO_THREAD_SAFETY_ANALYSIS;
-
-// Dumps the kernel stack for thread 'tid' to 'os'. Note that this is only available on linux-x86.
-void DumpKernelStack(std::ostream& os,
-                     pid_t tid,
-                     const char* prefix = "",
-                     bool include_count = true);
-
 // Find $ANDROID_ROOT, /system, or abort.
 const char* GetAndroidRoot();
 
@@ -246,11 +251,8 @@
 const char* GetAndroidDataSafe(std::string* error_msg);
 
 // Returns the dalvik-cache location, with subdir appended. Returns the empty string if the cache
-// could not be found (or created).
-std::string GetDalvikCache(const char* subdir, bool create_if_absent = true);
-// Returns the dalvik-cache location, or dies trying. subdir will be
-// appended to the cache location.
-std::string GetDalvikCacheOrDie(const char* subdir, bool create_if_absent = true);
+// could not be found.
+std::string GetDalvikCache(const char* subdir);
 // Return true if we found the dalvik cache and stored it in the dalvik_cache argument.
 // have_android_data will be set to true if we have an ANDROID_DATA that exists,
 // dalvik_cache_exists will be true if there is a dalvik-cache directory that is present.
@@ -262,21 +264,17 @@
 // rooted at cache_location.
 bool GetDalvikCacheFilename(const char* file_location, const char* cache_location,
                             std::string* filename, std::string* error_msg);
-// Returns the absolute dalvik-cache path for a DexFile or OatFile, or
-// dies trying. The path returned will be rooted at cache_location.
-std::string GetDalvikCacheFilenameOrDie(const char* file_location,
-                                        const char* cache_location);
 
 // Returns the system location for an image
 std::string GetSystemImageFilename(const char* location, InstructionSet isa);
 
-// Check whether the given magic matches a known file type.
-bool IsZipMagic(uint32_t magic);
-bool IsDexMagic(uint32_t magic);
-bool IsOatMagic(uint32_t magic);
-
 // Wrapper on fork/execv to run a command in a subprocess.
 bool Exec(std::vector<std::string>& arg_vector, std::string* error_msg);
+int ExecAndReturnCode(std::vector<std::string>& arg_vector, std::string* error_msg);
+
+// Returns true if the file exists.
+bool FileExists(const std::string& filename);
+bool FileExistsAndNotEmpty(const std::string& filename);
 
 class VoidFunctor {
  public:
@@ -321,6 +319,84 @@
   return reinterpret_cast<const void*>(code);
 }
 
+using UsageFn = void (*)(const char*, ...);
+
+template <typename T>
+static void ParseUintOption(const StringPiece& option,
+                            const std::string& option_name,
+                            T* out,
+                            UsageFn Usage,
+                            bool is_long_option = true) {
+  std::string option_prefix = option_name + (is_long_option ? "=" : "");
+  DCHECK(option.starts_with(option_prefix)) << option << " " << option_prefix;
+  const char* value_string = option.substr(option_prefix.size()).data();
+  int64_t parsed_integer_value = 0;
+  if (!ParseInt(value_string, &parsed_integer_value)) {
+    Usage("Failed to parse %s '%s' as an integer", option_name.c_str(), value_string);
+  }
+  if (parsed_integer_value < 0) {
+    Usage("%s passed a negative value %d", option_name.c_str(), parsed_integer_value);
+  }
+  *out = dchecked_integral_cast<T>(parsed_integer_value);
+}
+
+void ParseDouble(const std::string& option,
+                 char after_char,
+                 double min,
+                 double max,
+                 double* parsed_value,
+                 UsageFn Usage);
+
+#if defined(__BIONIC__)
+struct Arc4RandomGenerator {
+  typedef uint32_t result_type;
+  static constexpr uint32_t min() { return std::numeric_limits<uint32_t>::min(); }
+  static constexpr uint32_t max() { return std::numeric_limits<uint32_t>::max(); }
+  uint32_t operator() () { return arc4random(); }
+};
+using RNG = Arc4RandomGenerator;
+#else
+using RNG = std::random_device;
+#endif
+
+template <typename T>
+static T GetRandomNumber(T min, T max) {
+  CHECK_LT(min, max);
+  std::uniform_int_distribution<T> dist(min, max);
+  RNG rng;
+  return dist(rng);
+}
+
+// All of the elements from one container to another.
+template <typename Dest, typename Src>
+static void AddAll(Dest& dest, const Src& src) {
+  dest.insert(src.begin(), src.end());
+}
+
+// Return the file size in bytes or -1 if the file does not exists.
+int64_t GetFileSizeBytes(const std::string& filename);
+
+// Sleep forever and never come back.
+NO_RETURN void SleepForever();
+
+inline void FlushInstructionCache(char* begin, char* end) {
+  // Only use __builtin___clear_cache with Clang or with GCC >= 4.3.0
+  // (__builtin___clear_cache was introduced in GCC 4.3.0).
+#if defined(__clang__) || GCC_VERSION >= 40300
+  __builtin___clear_cache(begin, end);
+#else
+  // Only warn on non-Intel platforms, as x86 and x86-64 do not need
+  // cache flush instructions, as long as the "code uses the same
+  // linear address for modifying and fetching the instruction". See
+  // "Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+  // Volume 3A: System Programming Guide, Part 1", section 11.6
+  // "Self-Modifying Code".
+#if !defined(__i386__) && !defined(__x86_64__)
+  UNIMPLEMENTED(WARNING) << "cache flush";
+#endif
+#endif
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_
diff --git a/runtime/utils/dex_cache_arrays_layout-inl.h b/runtime/utils/dex_cache_arrays_layout-inl.h
index 90e24b9..4c63156 100644
--- a/runtime/utils/dex_cache_arrays_layout-inl.h
+++ b/runtime/utils/dex_cache_arrays_layout-inl.h
@@ -23,44 +23,50 @@
 #include "base/logging.h"
 #include "gc_root.h"
 #include "globals.h"
+#include "mirror/dex_cache.h"
 #include "primitive.h"
 
 namespace art {
 
-inline DexCacheArraysLayout::DexCacheArraysLayout(size_t pointer_size,
+inline DexCacheArraysLayout::DexCacheArraysLayout(PointerSize pointer_size,
                                                   const DexFile::Header& header)
     : pointer_size_(pointer_size),
       /* types_offset_ is always 0u, so it's constexpr */
-      methods_offset_(types_offset_ +
-                      RoundUp(TypesSize(header.type_ids_size_), MethodsAlignment())),
-      strings_offset_(methods_offset_ +
-                      RoundUp(MethodsSize(header.method_ids_size_), StringsAlignment())),
-      fields_offset_(strings_offset_ +
-                     RoundUp(StringsSize(header.string_ids_size_), FieldsAlignment())),
-      size_(fields_offset_ +
-            RoundUp(FieldsSize(header.field_ids_size_), Alignment())) {
-  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
+      methods_offset_(
+          RoundUp(types_offset_ + TypesSize(header.type_ids_size_), MethodsAlignment())),
+      strings_offset_(
+          RoundUp(methods_offset_ + MethodsSize(header.method_ids_size_), StringsAlignment())),
+      fields_offset_(
+          RoundUp(strings_offset_ + StringsSize(header.string_ids_size_), FieldsAlignment())),
+      size_(
+          RoundUp(fields_offset_ + FieldsSize(header.field_ids_size_), Alignment())) {
 }
 
-inline DexCacheArraysLayout::DexCacheArraysLayout(size_t pointer_size, const DexFile* dex_file)
+inline DexCacheArraysLayout::DexCacheArraysLayout(PointerSize pointer_size, const DexFile* dex_file)
     : DexCacheArraysLayout(pointer_size, dex_file->GetHeader()) {
 }
 
-inline size_t DexCacheArraysLayout::Alignment() const {
+inline constexpr size_t DexCacheArraysLayout::Alignment() {
   // GcRoot<> alignment is 4, i.e. lower than or equal to the pointer alignment.
   static_assert(alignof(GcRoot<mirror::Class>) == 4, "Expecting alignof(GcRoot<>) == 4");
-  static_assert(alignof(GcRoot<mirror::String>) == 4, "Expecting alignof(GcRoot<>) == 4");
-  DCHECK(pointer_size_ == 4u || pointer_size_ == 8u);
-  // Pointer alignment is the same as pointer size.
-  return pointer_size_;
+  static_assert(alignof(mirror::StringDexCacheType) == 8, "Expecting alignof(StringDexCacheType) == 8");
+  return alignof(mirror::StringDexCacheType);
+}
+
+template <typename T>
+static constexpr PointerSize GcRootAsPointerSize() {
+  return ConvertToPointerSize(sizeof(GcRoot<T>));
 }
 
 inline size_t DexCacheArraysLayout::TypeOffset(uint32_t type_idx) const {
-  return types_offset_ + ElementOffset(sizeof(GcRoot<mirror::Class>), type_idx);
+  return types_offset_ + ElementOffset(GcRootAsPointerSize<mirror::Class>(), type_idx);
 }
 
 inline size_t DexCacheArraysLayout::TypesSize(size_t num_elements) const {
-  return ArraySize(sizeof(GcRoot<mirror::Class>), num_elements);
+  // App image patching relies on having enough room for a forwarding pointer in the types array.
+  // See FixupArtMethodArrayVisitor and ClassLinker::AddImageSpace.
+  return std::max(ArraySize(GcRootAsPointerSize<mirror::Class>(), num_elements),
+                  static_cast<size_t>(pointer_size_));
 }
 
 inline size_t DexCacheArraysLayout::TypesAlignment() const {
@@ -72,23 +78,31 @@
 }
 
 inline size_t DexCacheArraysLayout::MethodsSize(size_t num_elements) const {
-  return ArraySize(pointer_size_, num_elements);
+  // App image patching relies on having enough room for a forwarding pointer in the methods array.
+  return std::max(ArraySize(pointer_size_, num_elements), static_cast<size_t>(pointer_size_));
 }
 
 inline size_t DexCacheArraysLayout::MethodsAlignment() const {
-  return pointer_size_;
+  return static_cast<size_t>(pointer_size_);
 }
 
 inline size_t DexCacheArraysLayout::StringOffset(uint32_t string_idx) const {
-  return strings_offset_ + ElementOffset(sizeof(GcRoot<mirror::String>), string_idx);
+  return strings_offset_ + ElementOffset(PointerSize::k64,
+                                         string_idx % mirror::DexCache::kDexCacheStringCacheSize);
 }
 
 inline size_t DexCacheArraysLayout::StringsSize(size_t num_elements) const {
-  return ArraySize(sizeof(GcRoot<mirror::String>), num_elements);
+  size_t cache_size = mirror::DexCache::kDexCacheStringCacheSize;
+  if (num_elements < cache_size) {
+    cache_size = num_elements;
+  }
+  return ArraySize(PointerSize::k64, cache_size);
 }
 
 inline size_t DexCacheArraysLayout::StringsAlignment() const {
-  return alignof(GcRoot<mirror::String>);
+  static_assert(alignof(mirror::StringDexCacheType) == 8,
+                "Expecting alignof(StringDexCacheType) == 8");
+  return alignof(mirror::StringDexCacheType);
 }
 
 inline size_t DexCacheArraysLayout::FieldOffset(uint32_t field_idx) const {
@@ -100,15 +114,15 @@
 }
 
 inline size_t DexCacheArraysLayout::FieldsAlignment() const {
-  return pointer_size_;
+  return static_cast<size_t>(pointer_size_);
 }
 
-inline size_t DexCacheArraysLayout::ElementOffset(size_t element_size, uint32_t idx) {
-  return element_size * idx;
+inline size_t DexCacheArraysLayout::ElementOffset(PointerSize element_size, uint32_t idx) {
+  return static_cast<size_t>(element_size) * idx;
 }
 
-inline size_t DexCacheArraysLayout::ArraySize(size_t element_size, uint32_t num_elements) {
-  return element_size * num_elements;
+inline size_t DexCacheArraysLayout::ArraySize(PointerSize element_size, uint32_t num_elements) {
+  return static_cast<size_t>(element_size) * num_elements;
 }
 
 }  // namespace art
diff --git a/runtime/utils/dex_cache_arrays_layout.h b/runtime/utils/dex_cache_arrays_layout.h
index cd84460..20ffa90 100644
--- a/runtime/utils/dex_cache_arrays_layout.h
+++ b/runtime/utils/dex_cache_arrays_layout.h
@@ -31,7 +31,7 @@
   // Construct an invalid layout.
   DexCacheArraysLayout()
       : /* types_offset_ is always 0u */
-        pointer_size_(0u),
+        pointer_size_(kRuntimePointerSize),
         methods_offset_(0u),
         strings_offset_(0u),
         fields_offset_(0u),
@@ -39,10 +39,10 @@
   }
 
   // Construct a layout for a particular dex file header.
-  DexCacheArraysLayout(size_t pointer_size, const DexFile::Header& header);
+  DexCacheArraysLayout(PointerSize pointer_size, const DexFile::Header& header);
 
   // Construct a layout for a particular dex file.
-  DexCacheArraysLayout(size_t pointer_size, const DexFile* dex_file);
+  DexCacheArraysLayout(PointerSize pointer_size, const DexFile* dex_file);
 
   bool Valid() const {
     return Size() != 0u;
@@ -52,7 +52,7 @@
     return size_;
   }
 
-  size_t Alignment() const;
+  static constexpr size_t Alignment();
 
   size_t TypesOffset() const {
     return types_offset_;
@@ -96,17 +96,17 @@
 
  private:
   static constexpr size_t types_offset_ = 0u;
-  const size_t pointer_size_;  // Must be first for construction initialization order.
+  const PointerSize pointer_size_;  // Must be first for construction initialization order.
   const size_t methods_offset_;
   const size_t strings_offset_;
   const size_t fields_offset_;
   const size_t size_;
 
-  static size_t Alignment(size_t pointer_size);
+  static size_t Alignment(PointerSize pointer_size);
 
-  static size_t ElementOffset(size_t element_size, uint32_t idx);
+  static size_t ElementOffset(PointerSize element_size, uint32_t idx);
 
-  static size_t ArraySize(size_t element_size, uint32_t num_elements);
+  static size_t ArraySize(PointerSize element_size, uint32_t num_elements);
 };
 
 }  // namespace art
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index f00edff..0a01cdb 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -16,6 +16,7 @@
 
 #include "utils.h"
 
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "mirror/array.h"
@@ -187,17 +188,17 @@
   ASSERT_TRUE(c != nullptr);
   ArtMethod* m;
 
-  m = c->FindVirtualMethod("charAt", "(I)C", sizeof(void*));
+  m = c->FindVirtualMethod("charAt", "(I)C", kRuntimePointerSize);
   ASSERT_TRUE(m != nullptr);
   EXPECT_EQ("Java_java_lang_String_charAt", JniShortName(m));
   EXPECT_EQ("Java_java_lang_String_charAt__I", JniLongName(m));
 
-  m = c->FindVirtualMethod("indexOf", "(Ljava/lang/String;I)I", sizeof(void*));
+  m = c->FindVirtualMethod("indexOf", "(Ljava/lang/String;I)I", kRuntimePointerSize);
   ASSERT_TRUE(m != nullptr);
   EXPECT_EQ("Java_java_lang_String_indexOf", JniShortName(m));
   EXPECT_EQ("Java_java_lang_String_indexOf__Ljava_lang_String_2I", JniLongName(m));
 
-  m = c->FindDirectMethod("copyValueOf", "([CII)Ljava/lang/String;", sizeof(void*));
+  m = c->FindDirectMethod("copyValueOf", "([CII)Ljava/lang/String;", kRuntimePointerSize);
   ASSERT_TRUE(m != nullptr);
   EXPECT_EQ("Java_java_lang_String_copyValueOf", JniShortName(m));
   EXPECT_EQ("Java_java_lang_String_copyValueOf___3CII", JniLongName(m));
@@ -321,26 +322,30 @@
   EXPECT_FALSE(EndsWith("oo", "foo"));
 }
 
-TEST_F(UtilsTest, GetDalvikCacheFilenameOrDie) {
-  EXPECT_STREQ("/foo/system@app@Foo.apk@classes.dex",
-               GetDalvikCacheFilenameOrDie("/system/app/Foo.apk", "/foo").c_str());
+TEST_F(UtilsTest, GetDalvikCacheFilename) {
+  std::string name;
+  std::string error;
 
-  EXPECT_STREQ("/foo/data@app@foo-1.apk@classes.dex",
-               GetDalvikCacheFilenameOrDie("/data/app/foo-1.apk", "/foo").c_str());
-  EXPECT_STREQ("/foo/system@framework@core.jar@classes.dex",
-               GetDalvikCacheFilenameOrDie("/system/framework/core.jar", "/foo").c_str());
-  EXPECT_STREQ("/foo/system@framework@boot.art",
-               GetDalvikCacheFilenameOrDie("/system/framework/boot.art", "/foo").c_str());
-  EXPECT_STREQ("/foo/system@framework@boot.oat",
-               GetDalvikCacheFilenameOrDie("/system/framework/boot.oat", "/foo").c_str());
+  EXPECT_TRUE(GetDalvikCacheFilename("/system/app/Foo.apk", "/foo", &name, &error)) << error;
+  EXPECT_EQ("/foo/system@app@Foo.apk@classes.dex", name);
+
+  EXPECT_TRUE(GetDalvikCacheFilename("/data/app/foo-1.apk", "/foo", &name, &error)) << error;
+  EXPECT_EQ("/foo/data@app@foo-1.apk@classes.dex", name);
+
+  EXPECT_TRUE(GetDalvikCacheFilename("/system/framework/core.jar", "/foo", &name, &error)) << error;
+  EXPECT_EQ("/foo/system@framework@core.jar@classes.dex", name);
+
+  EXPECT_TRUE(GetDalvikCacheFilename("/system/framework/boot.art", "/foo", &name, &error)) << error;
+  EXPECT_EQ("/foo/system@framework@boot.art", name);
+
+  EXPECT_TRUE(GetDalvikCacheFilename("/system/framework/boot.oat", "/foo", &name, &error)) << error;
+  EXPECT_EQ("/foo/system@framework@boot.oat", name);
 }
 
 TEST_F(UtilsTest, GetDalvikCache) {
-  EXPECT_STREQ("", GetDalvikCache("should-not-exist123", false).c_str());
+  EXPECT_STREQ("", GetDalvikCache("should-not-exist123").c_str());
 
-  EXPECT_STREQ((android_data_ + "/dalvik-cache/.").c_str(), GetDalvikCache(".", false).c_str());
-  EXPECT_STREQ((android_data_ + "/dalvik-cache/should-not-be-there").c_str(),
-               GetDalvikCache("should-not-be-there", true).c_str());
+  EXPECT_STREQ((android_data_ + "/dalvik-cache/.").c_str(), GetDalvikCache(".").c_str());
 }
 
 
diff --git a/runtime/verifier/dex_gc_map.cc b/runtime/verifier/dex_gc_map.cc
deleted file mode 100644
index c435f9f..0000000
--- a/runtime/verifier/dex_gc_map.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "verifier/dex_gc_map.h"
-
-#include "base/logging.h"
-
-namespace art {
-namespace verifier {
-
-const uint8_t* DexPcToReferenceMap::FindBitMap(uint16_t dex_pc, bool error_if_not_present) const {
-  size_t num_entries = NumEntries();
-  // Do linear or binary search?
-  static const size_t kSearchThreshold = 8;
-  if (num_entries < kSearchThreshold) {
-    for (size_t i = 0; i < num_entries; i++)  {
-      if (GetDexPc(i) == dex_pc) {
-        return GetBitMap(i);
-      }
-    }
-  } else {
-    int lo = 0;
-    int hi = num_entries -1;
-    while (hi >= lo) {
-      int mid = (hi + lo) / 2;
-      int mid_pc = GetDexPc(mid);
-      if (dex_pc > mid_pc) {
-        lo = mid + 1;
-      } else if (dex_pc < mid_pc) {
-        hi = mid - 1;
-      } else {
-        return GetBitMap(mid);
-      }
-    }
-  }
-  if (error_if_not_present) {
-    LOG(ERROR) << "Didn't find reference bit map for dex_pc " << dex_pc;
-  }
-  return nullptr;
-}
-
-}  // namespace verifier
-}  // namespace art
diff --git a/runtime/verifier/dex_gc_map.h b/runtime/verifier/dex_gc_map.h
deleted file mode 100644
index 03a7821..0000000
--- a/runtime/verifier/dex_gc_map.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_VERIFIER_DEX_GC_MAP_H_
-#define ART_RUNTIME_VERIFIER_DEX_GC_MAP_H_
-
-#include <stdint.h>
-
-#include "base/logging.h"
-#include "base/macros.h"
-
-namespace art {
-namespace verifier {
-
-/*
- * Format enumeration for RegisterMap data area.
- */
-enum RegisterMapFormat {
-  kRegMapFormatUnknown = 0,
-  kRegMapFormatNone = 1,       // Indicates no map data follows.
-  kRegMapFormatCompact8 = 2,   // Compact layout, 8-bit addresses.
-  kRegMapFormatCompact16 = 3,  // Compact layout, 16-bit addresses.
-};
-
-// Lightweight wrapper for Dex PC to reference bit maps.
-class DexPcToReferenceMap {
- public:
-  explicit DexPcToReferenceMap(const uint8_t* data) : data_(data) {
-    CHECK(data_ != nullptr);
-  }
-
-  // The total size of the reference bit map including header.
-  size_t RawSize() const {
-    return EntryWidth() * NumEntries() + 4u /* header */;
-  }
-
-  // The number of entries in the table
-  size_t NumEntries() const {
-    return GetData()[2] | (GetData()[3] << 8);
-  }
-
-  // Get the Dex PC at the given index
-  uint16_t GetDexPc(size_t index) const {
-    size_t entry_offset = index * EntryWidth();
-    if (DexPcWidth() == 1) {
-      return Table()[entry_offset];
-    } else {
-      return Table()[entry_offset] | (Table()[entry_offset + 1] << 8);
-    }
-  }
-
-  // Return address of bitmap encoding what are live references
-  const uint8_t* GetBitMap(size_t index) const {
-    size_t entry_offset = index * EntryWidth();
-    return &Table()[entry_offset + DexPcWidth()];
-  }
-
-  // Find the bitmap associated with the given dex pc
-  const uint8_t* FindBitMap(uint16_t dex_pc, bool error_if_not_present = true) const;
-
-  // The number of bytes used to encode registers
-  size_t RegWidth() const {
-    return GetData()[1] | ((GetData()[0] & ~kRegMapFormatMask) << kRegMapFormatShift);
-  }
-
- private:
-  // Table of num_entries * (dex pc, bitmap)
-  const uint8_t* Table() const {
-    return GetData() + 4;
-  }
-
-  // The format of the table of the PCs for the table
-  RegisterMapFormat Format() const {
-    return static_cast<RegisterMapFormat>(GetData()[0] & kRegMapFormatMask);
-  }
-
-  // Number of bytes used to encode a dex pc
-  size_t DexPcWidth() const {
-    RegisterMapFormat format = Format();
-    switch (format) {
-      case kRegMapFormatCompact8:
-        return 1;
-      case kRegMapFormatCompact16:
-        return 2;
-      default:
-        LOG(FATAL) << "Invalid format " << static_cast<int>(format);
-        return -1;
-    }
-  }
-
-  // The width of an entry in the table
-  size_t EntryWidth() const {
-    return DexPcWidth() + RegWidth();
-  }
-
-  const uint8_t* GetData() const {
-    return data_;
-  }
-
-  static const int kRegMapFormatShift = 5;
-  static const uint8_t kRegMapFormatMask = 0x7;
-
-  const uint8_t* const data_;  // The header and table data
-};
-
-}  // namespace verifier
-}  // namespace art
-
-#endif  // ART_RUNTIME_VERIFIER_DEX_GC_MAP_H_
diff --git a/runtime/verifier/method_verifier-inl.h b/runtime/verifier/method_verifier-inl.h
index f52d011..def61db 100644
--- a/runtime/verifier/method_verifier-inl.h
+++ b/runtime/verifier/method_verifier-inl.h
@@ -50,6 +50,10 @@
   return dex_cache_.Get();
 }
 
+inline ArtMethod* MethodVerifier::GetMethod() const {
+  return mirror_method_;
+}
+
 inline MethodReference MethodVerifier::GetMethodReference() const {
   return MethodReference(dex_file_, dex_method_idx_);
 }
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index e1d4160..40f12e9 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -20,9 +20,11 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/mutex-inl.h"
 #include "base/stl_util.h"
+#include "base/systrace.h"
 #include "base/time_utils.h"
 #include "class_linker.h"
 #include "compiler_callbacks.h"
@@ -46,18 +48,21 @@
 #include "scoped_thread_state_change.h"
 #include "utils.h"
 #include "handle_scope-inl.h"
-#include "verifier/dex_gc_map.h"
 
 namespace art {
 namespace verifier {
 
 static constexpr bool kTimeVerifyMethod = !kIsDebugBuild;
-static constexpr bool gDebugVerify = false;
+static constexpr bool kDebugVerify = false;
 // TODO: Add a constant to method_verifier to turn on verbose logging?
 
 // On VLOG(verifier), should we dump the whole state when we run into a hard failure?
 static constexpr bool kDumpRegLinesOnHardFailureIfVLOG = true;
 
+// We print a warning blurb about "dx --no-optimize" when we find monitor-locking issues. Make
+// sure we only print this once.
+static bool gPrintedDxMonitorText = false;
+
 PcToRegisterLineTable::PcToRegisterLineTable(ScopedArenaAllocator& arena)
     : register_lines_(arena.Adapter(kArenaAllocVerifier)) {}
 
@@ -114,21 +119,11 @@
   reg_line->MarkAllRegistersAsConflicts(verifier);
 }
 
-MethodVerifier::FailureKind MethodVerifier::VerifyMethod(
-    ArtMethod* method, bool allow_soft_failures, std::string* error ATTRIBUTE_UNUSED) {
-  StackHandleScope<2> hs(Thread::Current());
-  mirror::Class* klass = method->GetDeclaringClass();
-  auto h_dex_cache(hs.NewHandle(klass->GetDexCache()));
-  auto h_class_loader(hs.NewHandle(klass->GetClassLoader()));
-  return VerifyMethod(hs.Self(), method->GetDexMethodIndex(), method->GetDexFile(), h_dex_cache,
-                      h_class_loader, klass->GetClassDef(), method->GetCodeItem(), method,
-                      method->GetAccessFlags(), allow_soft_failures, false);
-}
-
-
 MethodVerifier::FailureKind MethodVerifier::VerifyClass(Thread* self,
                                                         mirror::Class* klass,
+                                                        CompilerCallbacks* callbacks,
                                                         bool allow_soft_failures,
+                                                        LogSeverity log_level,
                                                         std::string* error) {
   if (klass->IsVerified()) {
     return kNoFailure;
@@ -151,17 +146,118 @@
   }
   if (early_failure) {
     *error = "Verifier rejected class " + PrettyDescriptor(klass) + failure_message;
-    if (Runtime::Current()->IsAotCompiler()) {
+    if (callbacks != nullptr) {
       ClassReference ref(&dex_file, klass->GetDexClassDefIndex());
-      Runtime::Current()->GetCompilerCallbacks()->ClassRejected(ref);
+      callbacks->ClassRejected(ref);
     }
     return kHardFailure;
   }
   StackHandleScope<2> hs(self);
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
-  return VerifyClass(
-      self, &dex_file, dex_cache, class_loader, class_def, allow_soft_failures, error);
+  return VerifyClass(self,
+                     &dex_file,
+                     dex_cache,
+                     class_loader,
+                     class_def,
+                     callbacks,
+                     allow_soft_failures,
+                     log_level,
+                     error);
+}
+
+template <bool kDirect>
+static bool HasNextMethod(ClassDataItemIterator* it) {
+  return kDirect ? it->HasNextDirectMethod() : it->HasNextVirtualMethod();
+}
+
+static MethodVerifier::FailureKind FailureKindMax(MethodVerifier::FailureKind fk1,
+                                                  MethodVerifier::FailureKind fk2) {
+  static_assert(MethodVerifier::FailureKind::kNoFailure <
+                    MethodVerifier::FailureKind::kSoftFailure
+                && MethodVerifier::FailureKind::kSoftFailure <
+                       MethodVerifier::FailureKind::kHardFailure,
+                "Unexpected FailureKind order");
+  return std::max(fk1, fk2);
+}
+
+void MethodVerifier::FailureData::Merge(const MethodVerifier::FailureData& fd) {
+  kind = FailureKindMax(kind, fd.kind);
+  types |= fd.types;
+}
+
+template <bool kDirect>
+MethodVerifier::FailureData MethodVerifier::VerifyMethods(Thread* self,
+                                                          ClassLinker* linker,
+                                                          const DexFile* dex_file,
+                                                          const DexFile::ClassDef* class_def,
+                                                          ClassDataItemIterator* it,
+                                                          Handle<mirror::DexCache> dex_cache,
+                                                          Handle<mirror::ClassLoader> class_loader,
+                                                          CompilerCallbacks* callbacks,
+                                                          bool allow_soft_failures,
+                                                          LogSeverity log_level,
+                                                          bool need_precise_constants,
+                                                          std::string* error_string) {
+  DCHECK(it != nullptr);
+
+  MethodVerifier::FailureData failure_data;
+
+  int64_t previous_method_idx = -1;
+  while (HasNextMethod<kDirect>(it)) {
+    self->AllowThreadSuspension();
+    uint32_t method_idx = it->GetMemberIndex();
+    if (method_idx == previous_method_idx) {
+      // smali can create dex files with two encoded_methods sharing the same method_idx
+      // http://code.google.com/p/smali/issues/detail?id=119
+      it->Next();
+      continue;
+    }
+    previous_method_idx = method_idx;
+    InvokeType type = it->GetMethodInvokeType(*class_def);
+    ArtMethod* method = linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+        *dex_file, method_idx, dex_cache, class_loader, nullptr, type);
+    if (method == nullptr) {
+      DCHECK(self->IsExceptionPending());
+      // We couldn't resolve the method, but continue regardless.
+      self->ClearException();
+    } else {
+      DCHECK(method->GetDeclaringClassUnchecked() != nullptr) << type;
+    }
+    StackHandleScope<1> hs(self);
+    std::string hard_failure_msg;
+    MethodVerifier::FailureData result = VerifyMethod(self,
+                                                      method_idx,
+                                                      dex_file,
+                                                      dex_cache,
+                                                      class_loader,
+                                                      class_def,
+                                                      it->GetMethodCodeItem(),
+                                                      method,
+                                                      it->GetMethodAccessFlags(),
+                                                      callbacks,
+                                                      allow_soft_failures,
+                                                      log_level,
+                                                      need_precise_constants,
+                                                      &hard_failure_msg);
+    if (result.kind == kHardFailure) {
+      if (failure_data.kind == kHardFailure) {
+        // If we logged an error before, we need a newline.
+        *error_string += "\n";
+      } else {
+        // If we didn't log a hard failure before, print the header of the message.
+        *error_string += "Verifier rejected class ";
+        *error_string += PrettyDescriptor(dex_file->GetClassDescriptor(*class_def));
+        *error_string += ":";
+      }
+      *error_string += " ";
+      *error_string += hard_failure_msg;
+    }
+    failure_data.Merge(result);
+    it->Next();
+  }
+
+  return failure_data;
 }
 
 MethodVerifier::FailureKind MethodVerifier::VerifyClass(Thread* self,
@@ -169,9 +265,12 @@
                                                         Handle<mirror::DexCache> dex_cache,
                                                         Handle<mirror::ClassLoader> class_loader,
                                                         const DexFile::ClassDef* class_def,
+                                                        CompilerCallbacks* callbacks,
                                                         bool allow_soft_failures,
+                                                        LogSeverity log_level,
                                                         std::string* error) {
   DCHECK(class_def != nullptr);
+  ScopedTrace trace(__FUNCTION__);
 
   // A class must not be abstract and final.
   if ((class_def->access_flags_ & (kAccAbstract | kAccFinal)) == (kAccAbstract | kAccFinal)) {
@@ -190,101 +289,53 @@
   while (it.HasNextStaticField() || it.HasNextInstanceField()) {
     it.Next();
   }
-  size_t error_count = 0;
-  bool hard_fail = false;
   ClassLinker* linker = Runtime::Current()->GetClassLinker();
-  int64_t previous_direct_method_idx = -1;
-  while (it.HasNextDirectMethod()) {
-    self->AllowThreadSuspension();
-    uint32_t method_idx = it.GetMemberIndex();
-    if (method_idx == previous_direct_method_idx) {
-      // smali can create dex files with two encoded_methods sharing the same method_idx
-      // http://code.google.com/p/smali/issues/detail?id=119
-      it.Next();
-      continue;
-    }
-    previous_direct_method_idx = method_idx;
-    InvokeType type = it.GetMethodInvokeType(*class_def);
-    ArtMethod* method = linker->ResolveMethod(
-        *dex_file, method_idx, dex_cache, class_loader, nullptr, type);
-    if (method == nullptr) {
-      DCHECK(self->IsExceptionPending());
-      // We couldn't resolve the method, but continue regardless.
-      self->ClearException();
-    } else {
-      DCHECK(method->GetDeclaringClassUnchecked() != nullptr) << type;
-    }
-    StackHandleScope<1> hs(self);
-    MethodVerifier::FailureKind result = VerifyMethod(self,
-                                                      method_idx,
-                                                      dex_file,
-                                                      dex_cache,
-                                                      class_loader,
-                                                      class_def,
-                                                      it.GetMethodCodeItem(),
-        method, it.GetMethodAccessFlags(), allow_soft_failures, false);
-    if (result != kNoFailure) {
-      if (result == kHardFailure) {
-        hard_fail = true;
-        if (error_count > 0) {
-          *error += "\n";
-        }
-        *error = "Verifier rejected class ";
-        *error += PrettyDescriptor(dex_file->GetClassDescriptor(*class_def));
-        *error += " due to bad method ";
-        *error += PrettyMethod(method_idx, *dex_file);
-      }
-      ++error_count;
-    }
-    it.Next();
-  }
-  int64_t previous_virtual_method_idx = -1;
-  while (it.HasNextVirtualMethod()) {
-    self->AllowThreadSuspension();
-    uint32_t method_idx = it.GetMemberIndex();
-    if (method_idx == previous_virtual_method_idx) {
-      // smali can create dex files with two encoded_methods sharing the same method_idx
-      // http://code.google.com/p/smali/issues/detail?id=119
-      it.Next();
-      continue;
-    }
-    previous_virtual_method_idx = method_idx;
-    InvokeType type = it.GetMethodInvokeType(*class_def);
-    ArtMethod* method = linker->ResolveMethod(
-        *dex_file, method_idx, dex_cache, class_loader, nullptr, type);
-    if (method == nullptr) {
-      DCHECK(self->IsExceptionPending());
-      // We couldn't resolve the method, but continue regardless.
-      self->ClearException();
-    }
-    StackHandleScope<1> hs(self);
-    MethodVerifier::FailureKind result = VerifyMethod(self,
-                                                      method_idx,
-                                                      dex_file,
-                                                      dex_cache,
-                                                      class_loader,
-                                                      class_def,
-                                                      it.GetMethodCodeItem(),
-        method, it.GetMethodAccessFlags(), allow_soft_failures, false);
-    if (result != kNoFailure) {
-      if (result == kHardFailure) {
-        hard_fail = true;
-        if (error_count > 0) {
-          *error += "\n";
-        }
-        *error = "Verifier rejected class ";
-        *error += PrettyDescriptor(dex_file->GetClassDescriptor(*class_def));
-        *error += " due to bad method ";
-        *error += PrettyMethod(method_idx, *dex_file);
-      }
-      ++error_count;
-    }
-    it.Next();
-  }
-  if (error_count == 0) {
+  // Direct methods.
+  MethodVerifier::FailureData data1 = VerifyMethods<true>(self,
+                                                          linker,
+                                                          dex_file,
+                                                          class_def,
+                                                          &it,
+                                                          dex_cache,
+                                                          class_loader,
+                                                          callbacks,
+                                                          allow_soft_failures,
+                                                          log_level,
+                                                          false /* need precise constants */,
+                                                          error);
+  // Virtual methods.
+  MethodVerifier::FailureData data2 = VerifyMethods<false>(self,
+                                                           linker,
+                                                           dex_file,
+                                                           class_def,
+                                                           &it,
+                                                           dex_cache,
+                                                           class_loader,
+                                                           callbacks,
+                                                           allow_soft_failures,
+                                                           log_level,
+                                                           false /* need precise constants */,
+                                                           error);
+
+  data1.Merge(data2);
+
+  if (data1.kind == kNoFailure) {
     return kNoFailure;
   } else {
-    return hard_fail ? kHardFailure : kSoftFailure;
+    if ((data1.types & VERIFY_ERROR_LOCKING) != 0) {
+      // Print a warning about expected slow-down. Use a string temporary to print one contiguous
+      // warning.
+      std::string tmp =
+          StringPrintf("Class %s failed lock verification and will run slower.",
+                       PrettyDescriptor(dex_file->GetClassDescriptor(*class_def)).c_str());
+      if (!gPrintedDxMonitorText) {
+        tmp = tmp + "\nCommon causes for lock verification issues are non-optimized dex code\n"
+                    "and incorrect proguard optimizations.";
+        gPrintedDxMonitorText = true;
+      }
+      LOG(WARNING) << tmp;
+    }
+    return data1.kind;
   }
 }
 
@@ -299,7 +350,8 @@
   return registers_size * insns_size > 4*1024*1024;
 }
 
-MethodVerifier::FailureKind MethodVerifier::VerifyMethod(Thread* self, uint32_t method_idx,
+MethodVerifier::FailureData MethodVerifier::VerifyMethod(Thread* self,
+                                                         uint32_t method_idx,
                                                          const DexFile* dex_file,
                                                          Handle<mirror::DexCache> dex_cache,
                                                          Handle<mirror::ClassLoader> class_loader,
@@ -307,24 +359,56 @@
                                                          const DexFile::CodeItem* code_item,
                                                          ArtMethod* method,
                                                          uint32_t method_access_flags,
+                                                         CompilerCallbacks* callbacks,
                                                          bool allow_soft_failures,
-                                                         bool need_precise_constants) {
-  MethodVerifier::FailureKind result = kNoFailure;
+                                                         LogSeverity log_level,
+                                                         bool need_precise_constants,
+                                                         std::string* hard_failure_msg) {
+  MethodVerifier::FailureData result;
   uint64_t start_ns = kTimeVerifyMethod ? NanoTime() : 0;
 
-  MethodVerifier verifier(self, dex_file, dex_cache, class_loader, class_def, code_item,
-                          method_idx, method, method_access_flags, true, allow_soft_failures,
-                          need_precise_constants, true);
+  MethodVerifier verifier(self,
+                          dex_file,
+                          dex_cache,
+                          class_loader,
+                          class_def,
+                          code_item,
+                          method_idx,
+                          method,
+                          method_access_flags,
+                          true /* can_load_classes */,
+                          allow_soft_failures,
+                          need_precise_constants,
+                          false /* verify to dump */,
+                          true /* allow_thread_suspension */);
   if (verifier.Verify()) {
     // Verification completed, however failures may be pending that didn't cause the verification
     // to hard fail.
     CHECK(!verifier.have_pending_hard_failure_);
+
+    if (code_item != nullptr && callbacks != nullptr) {
+      // Let the interested party know that the method was verified.
+      callbacks->MethodVerified(&verifier);
+    }
+
     if (verifier.failures_.size() != 0) {
       if (VLOG_IS_ON(verifier)) {
-          verifier.DumpFailures(VLOG_STREAM(verifier) << "Soft verification failures in "
-                                << PrettyMethod(method_idx, *dex_file) << "\n");
+        verifier.DumpFailures(VLOG_STREAM(verifier) << "Soft verification failures in "
+                                                    << PrettyMethod(method_idx, *dex_file) << "\n");
       }
-      result = kSoftFailure;
+      result.kind = kSoftFailure;
+      if (method != nullptr &&
+          !CanCompilerHandleVerificationFailure(verifier.encountered_failure_types_)) {
+        method->SetAccessFlags(method->GetAccessFlags() | kAccCompileDontBother);
+      }
+    }
+    if (method != nullptr) {
+      if (verifier.HasInstructionThatWillThrow()) {
+        method->SetAccessFlags(method->GetAccessFlags() | kAccCompileDontBother);
+      }
+      if ((verifier.encountered_failure_types_ & VerifyError::VERIFY_ERROR_LOCKING) != 0) {
+        method->SetAccessFlags(method->GetAccessFlags() | kAccMustCountLocks);
+      }
     }
   } else {
     // Bad method data.
@@ -333,14 +417,30 @@
     if (UNLIKELY(verifier.have_pending_experimental_failure_)) {
       // Failed due to being forced into interpreter. This is ok because
       // we just want to skip verification.
-      result = kSoftFailure;
+      result.kind = kSoftFailure;
     } else {
       CHECK(verifier.have_pending_hard_failure_);
-      verifier.DumpFailures(LOG(INFO) << "Verification error in "
-                                      << PrettyMethod(method_idx, *dex_file) << "\n");
-      result = kHardFailure;
+      if (VLOG_IS_ON(verifier)) {
+        log_level = LogSeverity::VERBOSE;
+      }
+      if (log_level > LogSeverity::VERBOSE) {
+        verifier.DumpFailures(LOG(log_level) << "Verification error in "
+                                             << PrettyMethod(method_idx, *dex_file) << "\n");
+      }
+      if (hard_failure_msg != nullptr) {
+        CHECK(!verifier.failure_messages_.empty());
+        *hard_failure_msg =
+            verifier.failure_messages_[verifier.failure_messages_.size() - 1]->str();
+      }
+      result.kind = kHardFailure;
+
+      if (callbacks != nullptr) {
+        // Let the interested party know that we failed the class.
+        ClassReference ref(dex_file, dex_file->GetIndexForClassDef(*class_def));
+        callbacks->ClassRejected(ref);
+      }
     }
-    if (gDebugVerify) {
+    if (VLOG_IS_ON(verifier)) {
       std::cout << "\n" << verifier.info_messages_.str();
       verifier.Dump(std::cout);
     }
@@ -353,6 +453,7 @@
                    << (IsLargeMethod(code_item) ? " (large method)" : "");
     }
   }
+  result.types = verifier.encountered_failure_types_;
   return result;
 }
 
@@ -366,9 +467,20 @@
                                                     const DexFile::CodeItem* code_item,
                                                     ArtMethod* method,
                                                     uint32_t method_access_flags) {
-  MethodVerifier* verifier = new MethodVerifier(self, dex_file, dex_cache, class_loader,
-                                                class_def, code_item, dex_method_idx, method,
-                                                method_access_flags, true, true, true, true);
+  MethodVerifier* verifier = new MethodVerifier(self,
+                                                dex_file,
+                                                dex_cache,
+                                                class_loader,
+                                                class_def,
+                                                code_item,
+                                                dex_method_idx,
+                                                method,
+                                                method_access_flags,
+                                                true /* can_load_classes */,
+                                                true /* allow_soft_failures */,
+                                                true /* need_precise_constants */,
+                                                true /* verify_to_dump */,
+                                                true /* allow_thread_suspension */);
   verifier->Verify();
   verifier->DumpFailures(vios->Stream());
   vios->Stream() << verifier->info_messages_.str();
@@ -384,13 +496,18 @@
 }
 
 MethodVerifier::MethodVerifier(Thread* self,
-                               const DexFile* dex_file, Handle<mirror::DexCache> dex_cache,
+                               const DexFile* dex_file,
+                               Handle<mirror::DexCache> dex_cache,
                                Handle<mirror::ClassLoader> class_loader,
                                const DexFile::ClassDef* class_def,
-                               const DexFile::CodeItem* code_item, uint32_t dex_method_idx,
-                               ArtMethod* method, uint32_t method_access_flags,
-                               bool can_load_classes, bool allow_soft_failures,
-                               bool need_precise_constants, bool verify_to_dump,
+                               const DexFile::CodeItem* code_item,
+                               uint32_t dex_method_idx,
+                               ArtMethod* method,
+                               uint32_t method_access_flags,
+                               bool can_load_classes,
+                               bool allow_soft_failures,
+                               bool need_precise_constants,
+                               bool verify_to_dump,
                                bool allow_thread_suspension)
     : self_(self),
       arena_stack_(Runtime::Current()->GetArenaPool()),
@@ -440,9 +557,20 @@
   StackHandleScope<2> hs(Thread::Current());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(m->GetDexCache()));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(m->GetClassLoader()));
-  MethodVerifier verifier(hs.Self(), m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(),
-                          m->GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(),
-                          false, true, false, false);
+  MethodVerifier verifier(hs.Self(),
+                          m->GetDexFile(),
+                          dex_cache,
+                          class_loader,
+                          &m->GetClassDef(),
+                          m->GetCodeItem(),
+                          m->GetDexMethodIndex(),
+                          m,
+                          m->GetAccessFlags(),
+                          false /* can_load_classes */,
+                          true  /* allow_soft_failures */,
+                          false /* need_precise_constants */,
+                          false /* verify_to_dump */,
+                          false /* allow_thread_suspension */);
   verifier.interesting_dex_pc_ = dex_pc;
   verifier.monitor_enter_dex_pcs_ = monitor_enter_dex_pcs;
   verifier.FindLocksAtDexPc();
@@ -484,9 +612,20 @@
   StackHandleScope<2> hs(Thread::Current());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(m->GetDexCache()));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(m->GetClassLoader()));
-  MethodVerifier verifier(hs.Self(), m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(),
-                          m->GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), true,
-                          true, false, true);
+  MethodVerifier verifier(hs.Self(),
+                          m->GetDexFile(),
+                          dex_cache,
+                          class_loader,
+                          &m->GetClassDef(),
+                          m->GetCodeItem(),
+                          m->GetDexMethodIndex(),
+                          m,
+                          m->GetAccessFlags(),
+                          true  /* can_load_classes */,
+                          true  /* allow_soft_failures */,
+                          false /* need_precise_constants */,
+                          false /* verify_to_dump */,
+                          true  /* allow_thread_suspension */);
   return verifier.FindAccessedFieldAtDexPc(dex_pc);
 }
 
@@ -513,9 +652,20 @@
   StackHandleScope<2> hs(Thread::Current());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(m->GetDexCache()));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(m->GetClassLoader()));
-  MethodVerifier verifier(hs.Self(), m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(),
-                          m->GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), true,
-                          true, false, true);
+  MethodVerifier verifier(hs.Self(),
+                          m->GetDexFile(),
+                          dex_cache,
+                          class_loader,
+                          &m->GetClassDef(),
+                          m->GetCodeItem(),
+                          m->GetDexMethodIndex(),
+                          m,
+                          m->GetAccessFlags(),
+                          true  /* can_load_classes */,
+                          true  /* allow_soft_failures */,
+                          false /* need_precise_constants */,
+                          false /* verify_to_dump */,
+                          true  /* allow_thread_suspension */);
   return verifier.FindInvokedMethodAtDexPc(dex_pc);
 }
 
@@ -539,27 +689,9 @@
   return GetQuickInvokedMethod(inst, register_line, is_range, false);
 }
 
-SafeMap<uint32_t, std::set<uint32_t>> MethodVerifier::FindStringInitMap(ArtMethod* m) {
-  Thread* self = Thread::Current();
-  StackHandleScope<2> hs(self);
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(m->GetDexCache()));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(m->GetClassLoader()));
-  MethodVerifier verifier(self, m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(),
-                          m->GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(),
-                          true, true, false, true);
-  // Avoid copying: The map is moved out of the verifier before the verifier is destroyed.
-  return std::move(verifier.FindStringInitMap());
-}
-
-SafeMap<uint32_t, std::set<uint32_t>>& MethodVerifier::FindStringInitMap() {
-  Verify();
-  return GetStringInitPcRegMap();
-}
-
 bool MethodVerifier::Verify() {
   // Some older code doesn't correctly mark constructors as such. Test for this case by looking at
   // the name.
-  Runtime* runtime = Runtime::Current();
   const DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx_);
   const char* method_name = dex_file_->StringDataByIdx(method_id.name_idx_);
   bool instance_constructor_by_name = strcmp("<init>", method_name) == 0;
@@ -605,13 +737,15 @@
 
   // If there aren't any instructions, make sure that's expected, then exit successfully.
   if (code_item_ == nullptr) {
+    // Only native or abstract methods may not have code.
+    if ((method_access_flags_ & (kAccNative | kAccAbstract)) == 0) {
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "zero-length code in concrete non-native method";
+      return false;
+    }
+
     // This should have been rejected by the dex file verifier. Only do in debug build.
+    // Note: the above will also be rejected in the dex file verifier, starting in dex version 37.
     if (kIsDebugBuild) {
-      // Only native or abstract methods may not have code.
-      if ((method_access_flags_ & (kAccNative | kAccAbstract)) == 0) {
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "zero-length code in concrete non-native method";
-        return false;
-      }
       if ((method_access_flags_ & kAccAbstract) != 0) {
         // Abstract methods are not allowed to have the following flags.
         static constexpr uint32_t kForbidden =
@@ -629,12 +763,9 @@
       }
       if ((class_def_->GetJavaAccessFlags() & kAccInterface) != 0) {
         // Interface methods must be public and abstract (if default methods are disabled).
-        bool default_methods_supported =
-            runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods);
-        uint32_t kRequired = kAccPublic | (default_methods_supported ? 0 : kAccAbstract);
+        uint32_t kRequired = kAccPublic;
         if ((method_access_flags_ & kRequired) != kRequired) {
-          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interface methods must be public"
-                                            << (default_methods_supported ? "" : " and abstract");
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interface methods must be public";
           return false;
         }
         // In addition to the above, interface methods must not be protected.
@@ -665,20 +796,24 @@
       // Interfaces may always have static initializers for their fields. If we are running with
       // default methods enabled we also allow other public, static, non-final methods to have code.
       // Otherwise that is the only type of method allowed.
-      if (runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods)) {
+      if (!(IsConstructor() && IsStatic())) {
         if (IsInstanceConstructor()) {
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interfaces may not have non-static constructor";
           return false;
         } else if (method_access_flags_ & kAccFinal) {
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interfaces may not have final methods";
           return false;
-        } else if (!(method_access_flags_ & kAccPublic)) {
-          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interfaces may not have non-public members";
-          return false;
+        } else {
+          uint32_t access_flag_options = kAccPublic;
+          if (dex_file_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+            access_flag_options |= kAccPrivate;
+          }
+          if (!(method_access_flags_ & access_flag_options)) {
+            Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+                << "interfaces may not have protected or package-private members";
+            return false;
+          }
         }
-      } else if (!IsConstructor() || !IsStatic()) {
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interface methods must be abstract";
-        return false;
       }
     }
 
@@ -713,10 +848,7 @@
   result = result && VerifyInstructions();
   // Perform code-flow analysis and return.
   result = result && VerifyCodeFlow();
-  // Compute information for compiler.
-  if (result && runtime->IsCompiler()) {
-    result = runtime->GetCompilerCallbacks()->MethodVerified(this);
-  }
+
   return result;
 }
 
@@ -776,10 +908,6 @@
       // Hard verification failures at compile time will still fail at runtime, so the class is
       // marked as rejected to prevent it from being compiled.
     case VERIFY_ERROR_BAD_CLASS_HARD: {
-      if (Runtime::Current()->IsAotCompiler()) {
-        ClassReference ref(dex_file_, dex_file_->GetIndexForClassDef(*class_def_));
-        Runtime::Current()->GetCompilerCallbacks()->ClassRejected(ref);
-      }
       have_pending_hard_failure_ = true;
       if (VLOG_IS_ON(verifier) && kDumpRegLinesOnHardFailureIfVLOG) {
         ScopedObjectAccess soa(Thread::Current());
@@ -1029,9 +1157,6 @@
     case Instruction::kVerifyRegCWide:
       result = result && CheckWideRegisterIndex(inst->VRegC());
       break;
-    case Instruction::kVerifyRegCString:
-      result = result && CheckStringIndex(inst->VRegC());
-      break;
   }
   switch (inst->GetVerifyExtraFlags()) {
     case Instruction::kVerifyArrayData:
@@ -1130,6 +1255,10 @@
   if (descriptor[0] != 'L') {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "can't call new-instance on type '" << descriptor << "'";
     return false;
+  } else if (strcmp(descriptor, "Ljava/lang/Class;") == 0) {
+    // An unlikely new instance on Class is not allowed. Fall back to interpreter to ensure an
+    // exception is thrown when this statement is executed (compiled code would not do that).
+    Fail(VERIFY_ERROR_INSTANTIATION);
   }
   return true;
 }
@@ -1739,7 +1868,7 @@
     GetInstructionFlags(insn_idx).ClearChanged();
   }
 
-  if (gDebugVerify) {
+  if (kDebugVerify) {
     /*
      * Scan for dead code. There's nothing "evil" about dead code
      * (besides the wasted space), but it indicates a flaw somewhere
@@ -1872,7 +2001,7 @@
 
   int32_t branch_target = 0;
   bool just_set_result = false;
-  if (gDebugVerify) {
+  if (kDebugVerify) {
     // Generate processing back trace to debug verifier
     LogVerifyInfo() << "Processing " << inst->DumpString(dex_file_) << "\n"
                     << work_line_->Dump(this) << "\n";
@@ -1896,8 +2025,8 @@
   // We need to ensure the work line is consistent while performing validation. When we spot a
   // peephole pattern we compute a new line for either the fallthrough instruction or the
   // branch target.
-  ArenaUniquePtr<RegisterLine> branch_line;
-  ArenaUniquePtr<RegisterLine> fallthrough_line;
+  RegisterLineArenaUniquePtr branch_line;
+  RegisterLineArenaUniquePtr fallthrough_line;
 
   switch (inst->Opcode()) {
     case Instruction::NOP:
@@ -2041,8 +2170,12 @@
           } else if (reg_type.IsConflict()) {
             Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "returning register with conflict";
           } else if (reg_type.IsUninitializedTypes()) {
-            Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "returning uninitialized object '"
+            Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "returning uninitialized object '"
                                               << reg_type << "'";
+          } else if (!reg_type.IsReferenceTypes()) {
+            // We really do expect a reference here.
+            Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "return-object returns a non-reference type "
+                                              << reg_type;
           } else if (!return_type.IsAssignableFrom(reg_type)) {
             if (reg_type.IsUnresolvedTypes() || return_type.IsUnresolvedTypes()) {
               Fail(VERIFY_ERROR_NO_CLASS) << " can't resolve returned type '" << return_type
@@ -2194,7 +2327,6 @@
       opcode_flags &= ~Instruction::kThrow;
       work_line_->PopMonitor(this, inst->VRegA_11x());
       break;
-
     case Instruction::CHECK_CAST:
     case Instruction::INSTANCE_OF: {
       /*
@@ -2240,6 +2372,14 @@
         } else {
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "instance-of on non-reference in v" << orig_type_reg;
         }
+      } else if (orig_type.IsUninitializedTypes()) {
+        if (is_checkcast) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "check-cast on uninitialized reference in v"
+                                            << orig_type_reg;
+        } else {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "instance-of on uninitialized reference in v"
+                                            << orig_type_reg;
+        }
       } else {
         if (is_checkcast) {
           work_line_->SetRegisterType<LockOp::kKeep>(this, inst->VRegA_21c(), res_type);
@@ -2334,8 +2474,14 @@
     case Instruction::THROW: {
       const RegType& res_type = work_line_->GetRegisterType(this, inst->VRegA_11x());
       if (!reg_types_.JavaLangThrowable(false).IsAssignableFrom(res_type)) {
-        Fail(res_type.IsUnresolvedTypes() ? VERIFY_ERROR_NO_CLASS : VERIFY_ERROR_BAD_CLASS_SOFT)
-            << "thrown class " << res_type << " not instanceof Throwable";
+        if (res_type.IsUninitializedTypes()) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "thrown exception not initialized";
+        } else if (!res_type.IsReferenceTypes()) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "thrown value of non-reference type " << res_type;
+        } else {
+          Fail(res_type.IsUnresolvedTypes() ? VERIFY_ERROR_NO_CLASS : VERIFY_ERROR_BAD_CLASS_SOFT)
+                << "thrown class " << res_type << " not instanceof Throwable";
+        }
       }
       break;
     }
@@ -2359,6 +2505,10 @@
         if (!array_type.IsArrayTypes()) {
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid fill-array-data with array type "
                                             << array_type;
+        } else if (array_type.IsUnresolvedTypes()) {
+          // If it's an unresolved array type, it must be non-primitive.
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid fill-array-data for array of type "
+                                            << array_type;
         } else {
           const RegType& component_type = reg_types_.GetComponentType(array_type, GetClassLoader());
           DCHECK(!component_type.IsConflict());
@@ -2687,10 +2837,11 @@
                        inst->Opcode() == Instruction::INVOKE_SUPER_RANGE);
       bool is_super = (inst->Opcode() == Instruction::INVOKE_SUPER ||
                        inst->Opcode() == Instruction::INVOKE_SUPER_RANGE);
-      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_VIRTUAL, is_range, is_super);
+      MethodType type = is_super ? METHOD_SUPER : METHOD_VIRTUAL;
+      ArtMethod* called_method = VerifyInvocationArgs(inst, type, is_range);
       const RegType* return_type = nullptr;
       if (called_method != nullptr) {
-        size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+        PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
         mirror::Class* return_type_class = called_method->GetReturnType(can_load_classes_,
                                                                         pointer_size);
         if (return_type_class != nullptr) {
@@ -2720,7 +2871,7 @@
     case Instruction::INVOKE_DIRECT:
     case Instruction::INVOKE_DIRECT_RANGE: {
       bool is_range = (inst->Opcode() == Instruction::INVOKE_DIRECT_RANGE);
-      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_DIRECT, is_range, false);
+      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_DIRECT, is_range);
       const char* return_type_descriptor;
       bool is_constructor;
       const RegType* return_type = nullptr;
@@ -2733,7 +2884,7 @@
       } else {
         is_constructor = called_method->IsConstructor();
         return_type_descriptor = called_method->GetReturnTypeDescriptor();
-        size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+        PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
         mirror::Class* return_type_class = called_method->GetReturnType(can_load_classes_,
                                                                         pointer_size);
         if (return_type_class != nullptr) {
@@ -2783,8 +2934,7 @@
          * Replace the uninitialized reference with an initialized one. We need to do this for all
          * registers that have the same object instance in them, not just the "this" register.
          */
-        const uint32_t this_reg = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
-        work_line_->MarkRefsAsInitialized(this, this_type, this_reg, work_insn_idx_);
+        work_line_->MarkRefsAsInitialized(this, this_type);
       }
       if (return_type == nullptr) {
         return_type = &reg_types_.FromDescriptor(GetClassLoader(), return_type_descriptor, false);
@@ -2800,7 +2950,7 @@
     case Instruction::INVOKE_STATIC:
     case Instruction::INVOKE_STATIC_RANGE: {
         bool is_range = (inst->Opcode() == Instruction::INVOKE_STATIC_RANGE);
-        ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_STATIC, is_range, false);
+        ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_STATIC, is_range);
         const char* descriptor;
         if (called_method == nullptr) {
           uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
@@ -2822,7 +2972,7 @@
     case Instruction::INVOKE_INTERFACE:
     case Instruction::INVOKE_INTERFACE_RANGE: {
       bool is_range =  (inst->Opcode() == Instruction::INVOKE_INTERFACE_RANGE);
-      ArtMethod* abs_method = VerifyInvocationArgs(inst, METHOD_INTERFACE, is_range, false);
+      ArtMethod* abs_method = VerifyInvocationArgs(inst, METHOD_INTERFACE, is_range);
       if (abs_method != nullptr) {
         mirror::Class* called_interface = abs_method->GetDeclaringClass();
         if (!called_interface->IsInterface() && !called_interface->IsObjectClass()) {
@@ -3178,69 +3328,15 @@
       }
       break;
     }
-    case Instruction::INVOKE_LAMBDA: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement invoke-lambda verification
-      break;
-    }
-    case Instruction::CAPTURE_VARIABLE: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement capture-variable verification
-      break;
-    }
-    case Instruction::CREATE_LAMBDA: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement create-lambda verification
-      break;
-    }
-    case Instruction::LIBERATE_VARIABLE: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement liberate-variable verification
-      break;
-    }
-
-    case Instruction::UNUSED_F4: {
-      DCHECK(false);  // TODO(iam): Implement opcodes for lambdas
-      // Conservatively fail verification on release builds.
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Unexpected opcode " << inst->DumpString(dex_file_);
-      break;
-    }
-
-    case Instruction::BOX_LAMBDA: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement box-lambda verification
-
-      // Partial verification. Sets the resulting type to always be an object, which
-      // is good enough for some other verification to occur without hard-failing.
-      const uint32_t vreg_target_object = inst->VRegA_22x();  // box-lambda vA, vB
-      const RegType& reg_type = reg_types_.JavaLangObject(need_precise_constants_);
-      work_line_->SetRegisterType<LockOp::kClear>(this, vreg_target_object, reg_type);
-      break;
-    }
-
-     case Instruction::UNBOX_LAMBDA: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement unbox-lambda verification
-      break;
-    }
 
     /* These should never appear during verification. */
     case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
-    case Instruction::UNUSED_FA ... Instruction::UNUSED_FF:
+    case Instruction::UNUSED_F3 ... Instruction::UNUSED_F9:
+    case Instruction::UNUSED_FC ... Instruction::UNUSED_FF:
     case Instruction::UNUSED_79:
     case Instruction::UNUSED_7A:
+    case Instruction::INVOKE_POLYMORPHIC:
+    case Instruction::INVOKE_POLYMORPHIC_RANGE:
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Unexpected opcode " << inst->DumpString(dex_file_);
       break;
 
@@ -3556,6 +3652,7 @@
           } else {
             const RegType& exception = ResolveClassAndCheckAccess(iterator.GetHandlerTypeIndex());
             if (!reg_types_.JavaLangThrowable(false).IsAssignableFrom(exception)) {
+              DCHECK(!exception.IsUninitializedTypes());  // Comes from dex, shouldn't be uninit.
               if (exception.IsUnresolvedTypes()) {
                 // We don't know enough about the type. Fail here and let runtime handle it.
                 Fail(VERIFY_ERROR_NO_CLASS) << "unresolved exception class " << exception;
@@ -3608,7 +3705,9 @@
   const RegType& referrer = GetDeclaringClass();
   auto* cl = Runtime::Current()->GetClassLinker();
   auto pointer_size = cl->GetImagePointerSize();
+
   ArtMethod* res_method = dex_cache_->GetResolvedMethod(dex_method_idx, pointer_size);
+  bool stash_method = false;
   if (res_method == nullptr) {
     const char* name = dex_file_->GetMethodName(method_id);
     const Signature signature = dex_file_->GetMethodSignature(method_id);
@@ -3617,16 +3716,21 @@
       res_method = klass->FindDirectMethod(name, signature, pointer_size);
     } else if (method_type == METHOD_INTERFACE) {
       res_method = klass->FindInterfaceMethod(name, signature, pointer_size);
+    } else if (method_type == METHOD_SUPER && klass->IsInterface()) {
+      res_method = klass->FindInterfaceMethod(name, signature, pointer_size);
     } else {
+      DCHECK(method_type == METHOD_VIRTUAL || method_type == METHOD_SUPER);
       res_method = klass->FindVirtualMethod(name, signature, pointer_size);
     }
     if (res_method != nullptr) {
-      dex_cache_->SetResolvedMethod(dex_method_idx, res_method, pointer_size);
+      stash_method = true;
     } else {
       // If a virtual or interface method wasn't found with the expected type, look in
       // the direct methods. This can happen when the wrong invoke type is used or when
       // a class has changed, and will be flagged as an error in later checks.
-      if (method_type == METHOD_INTERFACE || method_type == METHOD_VIRTUAL) {
+      if (method_type == METHOD_INTERFACE ||
+          method_type == METHOD_VIRTUAL ||
+          method_type == METHOD_SUPER) {
         res_method = klass->FindDirectMethod(name, signature, pointer_size);
       }
       if (res_method == nullptr) {
@@ -3650,6 +3754,39 @@
                                       << PrettyMethod(res_method);
     return nullptr;
   }
+
+  // Check that interface methods are static or match interface classes.
+  // We only allow statics if we don't have default methods enabled.
+  //
+  // Note: this check must be after the initializer check, as those are required to fail a class,
+  //       while this check implies an IncompatibleClassChangeError.
+  if (klass->IsInterface()) {
+    // methods called on interfaces should be invoke-interface, invoke-super, invoke-direct (if
+    // dex file version is 37 or greater), or invoke-static.
+    if (method_type != METHOD_INTERFACE &&
+        method_type != METHOD_STATIC &&
+        ((dex_file_->GetVersion() < DexFile::kDefaultMethodsVersion) ||
+         method_type != METHOD_DIRECT) &&
+        method_type != METHOD_SUPER) {
+      Fail(VERIFY_ERROR_CLASS_CHANGE)
+          << "non-interface method " << PrettyMethod(dex_method_idx, *dex_file_)
+          << " is in an interface class " << PrettyClass(klass);
+      return nullptr;
+    }
+  } else {
+    if (method_type == METHOD_INTERFACE) {
+      Fail(VERIFY_ERROR_CLASS_CHANGE)
+          << "interface method " << PrettyMethod(dex_method_idx, *dex_file_)
+          << " is in a non-interface class " << PrettyClass(klass);
+      return nullptr;
+    }
+  }
+
+  // Only stash after the above passed. Otherwise the method wasn't guaranteed to be correct.
+  if (stash_method) {
+    dex_cache_->SetResolvedMethod(dex_method_idx, res_method, pointer_size);
+  }
+
   // Check if access is allowed.
   if (!referrer.CanAccessMember(res_method->GetDeclaringClass(), res_method->GetAccessFlags())) {
     Fail(VERIFY_ERROR_ACCESS_METHOD) << "illegal method access (call " << PrettyMethod(res_method)
@@ -3657,26 +3794,18 @@
     return res_method;
   }
   // Check that invoke-virtual and invoke-super are not used on private methods of the same class.
-  if (res_method->IsPrivate() && method_type == METHOD_VIRTUAL) {
+  if (res_method->IsPrivate() && (method_type == METHOD_VIRTUAL || method_type == METHOD_SUPER)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invoke-super/virtual can't be used on private method "
                                       << PrettyMethod(res_method);
     return nullptr;
   }
-  // Check that interface methods match interface classes.
-  if (klass->IsInterface() && method_type != METHOD_INTERFACE) {
-    Fail(VERIFY_ERROR_CLASS_CHANGE) << "non-interface method " << PrettyMethod(res_method)
-                                    << " is in an interface class " << PrettyClass(klass);
-    return nullptr;
-  } else if (!klass->IsInterface() && method_type == METHOD_INTERFACE) {
-    Fail(VERIFY_ERROR_CLASS_CHANGE) << "interface method " << PrettyMethod(res_method)
-                                    << " is in a non-interface class " << PrettyClass(klass);
-    return nullptr;
-  }
   // See if the method type implied by the invoke instruction matches the access flags for the
   // target method.
   if ((method_type == METHOD_DIRECT && (!res_method->IsDirect() || res_method->IsStatic())) ||
       (method_type == METHOD_STATIC && !res_method->IsStatic()) ||
-      ((method_type == METHOD_VIRTUAL || method_type == METHOD_INTERFACE) && res_method->IsDirect())
+      ((method_type == METHOD_SUPER ||
+        method_type == METHOD_VIRTUAL ||
+        method_type == METHOD_INTERFACE) && res_method->IsDirect())
       ) {
     Fail(VERIFY_ERROR_CLASS_CHANGE) << "invoke type (" << method_type << ") does not match method "
                                        " type of " << PrettyMethod(res_method);
@@ -3717,7 +3846,8 @@
       CHECK(have_pending_hard_failure_);
       return nullptr;
     }
-    if (actual_arg_type.IsUninitializedReference()) {
+    bool is_init = false;
+    if (actual_arg_type.IsUninitializedTypes()) {
       if (res_method) {
         if (!res_method->IsConstructor()) {
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "'this' arg must be initialized";
@@ -3731,8 +3861,12 @@
           return nullptr;
         }
       }
+      is_init = true;
     }
-    if (method_type != METHOD_INTERFACE && !actual_arg_type.IsZero()) {
+    const RegType& adjusted_type = is_init
+                                       ? GetRegTypeCache()->FromUninitialized(actual_arg_type)
+                                       : actual_arg_type;
+    if (method_type != METHOD_INTERFACE && !adjusted_type.IsZero()) {
       const RegType* res_method_class;
       // Miranda methods have the declaring interface as their declaring class, not the abstract
       // class. It would be wrong to use this for the type check (interface type checks are
@@ -3750,10 +3884,12 @@
             dex_file_->StringByTypeIdx(class_idx),
             false);
       }
-      if (!res_method_class->IsAssignableFrom(actual_arg_type)) {
-        Fail(actual_arg_type.IsUnresolvedTypes() ? VERIFY_ERROR_NO_CLASS:
-            VERIFY_ERROR_BAD_CLASS_SOFT) << "'this' argument '" << actual_arg_type
-                << "' not instance of '" << *res_method_class << "'";
+      if (!res_method_class->IsAssignableFrom(adjusted_type)) {
+        Fail(adjusted_type.IsUnresolvedTypes()
+                 ? VERIFY_ERROR_NO_CLASS
+                 : VERIFY_ERROR_BAD_CLASS_SOFT)
+            << "'this' argument '" << actual_arg_type << "' not instance of '"
+            << *res_method_class << "'";
         // Continue on soft failures. We need to find possible hard failures to avoid problems in
         // the compiler.
         if (have_pending_hard_failure_) {
@@ -3862,7 +3998,7 @@
 };
 
 ArtMethod* MethodVerifier::VerifyInvocationArgs(
-    const Instruction* inst, MethodType method_type, bool is_range, bool is_super) {
+    const Instruction* inst, MethodType method_type, bool is_range) {
   // Resolve the method. This could be an abstract or concrete method depending on what sort of call
   // we're making.
   const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
@@ -3877,24 +4013,51 @@
   }
 
   // If we're using invoke-super(method), make sure that the executing method's class' superclass
-  // has a vtable entry for the target method.
-  if (is_super) {
-    DCHECK(method_type == METHOD_VIRTUAL);
-    const RegType& super = GetDeclaringClass().GetSuperClass(&reg_types_);
-    if (super.IsUnresolvedTypes()) {
-      Fail(VERIFY_ERROR_NO_METHOD) << "unknown super class in invoke-super from "
-                                   << PrettyMethod(dex_method_idx_, *dex_file_)
-                                   << " to super " << PrettyMethod(res_method);
+  // has a vtable entry for the target method. Or the target is on a interface.
+  if (method_type == METHOD_SUPER) {
+    uint16_t class_idx = dex_file_->GetMethodId(method_idx).class_idx_;
+    mirror::Class* reference_class = dex_cache_->GetResolvedType(class_idx);
+    if (reference_class == nullptr) {
+      Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "Unable to find referenced class from invoke-super";
       return nullptr;
     }
-    mirror::Class* super_klass = super.GetClass();
-    if (res_method->GetMethodIndex() >= super_klass->GetVTableLength()) {
-      Fail(VERIFY_ERROR_NO_METHOD) << "invalid invoke-super from "
-                                   << PrettyMethod(dex_method_idx_, *dex_file_)
-                                   << " to super " << super
-                                   << "." << res_method->GetName()
-                                   << res_method->GetSignature();
-      return nullptr;
+    if (reference_class->IsInterface()) {
+      // TODO Can we verify anything else.
+      if (class_idx == class_def_->class_idx_) {
+        Fail(VERIFY_ERROR_CLASS_CHANGE) << "Cannot invoke-super on self as interface";
+        return nullptr;
+      }
+      // TODO Revisit whether we want to allow invoke-super on direct interfaces only like the JLS
+      // does.
+      if (!GetDeclaringClass().HasClass()) {
+        Fail(VERIFY_ERROR_NO_CLASS) << "Unable to resolve the full class of 'this' used in an"
+                                    << "interface invoke-super";
+        return nullptr;
+      } else if (!reference_class->IsAssignableFrom(GetDeclaringClass().GetClass())) {
+        Fail(VERIFY_ERROR_CLASS_CHANGE)
+            << "invoke-super in " << PrettyClass(GetDeclaringClass().GetClass()) << " in method "
+            << PrettyMethod(dex_method_idx_, *dex_file_) << " to method "
+            << PrettyMethod(method_idx, *dex_file_) << " references "
+            << "non-super-interface type " << PrettyClass(reference_class);
+        return nullptr;
+      }
+    } else {
+      const RegType& super = GetDeclaringClass().GetSuperClass(&reg_types_);
+      if (super.IsUnresolvedTypes()) {
+        Fail(VERIFY_ERROR_NO_METHOD) << "unknown super class in invoke-super from "
+                                    << PrettyMethod(dex_method_idx_, *dex_file_)
+                                    << " to super " << PrettyMethod(res_method);
+        return nullptr;
+      }
+      if (!reference_class->IsAssignableFrom(GetDeclaringClass().GetClass()) ||
+          (res_method->GetMethodIndex() >= super.GetClass()->GetVTableLength())) {
+        Fail(VERIFY_ERROR_NO_METHOD) << "invalid invoke-super from "
+                                    << PrettyMethod(dex_method_idx_, *dex_file_)
+                                    << " to super " << super
+                                    << "." << res_method->GetName()
+                                    << res_method->GetSignature();
+        return nullptr;
+      }
     }
   }
 
@@ -3991,7 +4154,8 @@
    * For an interface class, we don't do the full interface merge (see JoinClass), so we can't do a
    * rigorous check here (which is okay since we have to do it at runtime).
    */
-  if (actual_arg_type.IsUninitializedReference() && !res_method->IsConstructor()) {
+  // Note: given an uninitialized type, this should always fail. Constructors aren't virtual.
+  if (actual_arg_type.IsUninitializedTypes() && !res_method->IsConstructor()) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "'this' arg must be initialized";
     return nullptr;
   }
@@ -4001,8 +4165,11 @@
     const RegType& res_method_class =
         FromClass(klass->GetDescriptor(&temp), klass, klass->CannotBeAssignedFromOtherTypes());
     if (!res_method_class.IsAssignableFrom(actual_arg_type)) {
-      Fail(actual_arg_type.IsUnresolvedTypes() ? VERIFY_ERROR_NO_CLASS :
-          VERIFY_ERROR_BAD_CLASS_SOFT) << "'this' argument '" << actual_arg_type
+      Fail(actual_arg_type.IsUninitializedTypes()    // Just overcautious - should have never
+               ? VERIFY_ERROR_BAD_CLASS_HARD         // quickened this.
+               : actual_arg_type.IsUnresolvedTypes()
+                     ? VERIFY_ERROR_NO_CLASS
+                     : VERIFY_ERROR_BAD_CLASS_SOFT) << "'this' argument '" << actual_arg_type
           << "' not instance of '" << res_method_class << "'";
       return nullptr;
     }
@@ -4075,6 +4242,7 @@
       const RegType& precise_type = reg_types_.FromUninitialized(res_type);
       work_line_->SetRegisterType<LockOp::kClear>(this, inst->VRegA_22c(), precise_type);
     } else {
+      DCHECK(!res_type.IsUnresolvedMergedReference());
       // Verify each register. If "arg_count" is bad, VerifyRegisterType() will run off the end of
       // the list and fail. It's legal, if silly, for arg_count to be zero.
       const RegType& expected_type = reg_types_.GetComponentType(res_type, GetClassLoader());
@@ -4119,6 +4287,19 @@
       }
     } else if (!array_type.IsArrayTypes()) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "not array type " << array_type << " with aget";
+    } else if (array_type.IsUnresolvedMergedReference()) {
+      // Unresolved array types must be reference array types.
+      if (is_primitive) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "reference array type " << array_type
+                    << " source for category 1 aget";
+      } else {
+        Fail(VERIFY_ERROR_NO_CLASS) << "cannot verify aget for " << array_type
+            << " because of missing class";
+        // Approximate with java.lang.Object[].
+        work_line_->SetRegisterType<LockOp::kClear>(this,
+                                                    inst->VRegA_23x(),
+                                                    reg_types_.JavaLangObject(false));
+      }
     } else {
       /* verify the class */
       const RegType& component_type = reg_types_.GetComponentType(array_type, GetClassLoader());
@@ -4229,6 +4410,15 @@
       work_line_->VerifyRegisterType(this, inst->VRegA_23x(), *modified_reg_type);
     } else if (!array_type.IsArrayTypes()) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "not array type " << array_type << " with aput";
+    } else if (array_type.IsUnresolvedMergedReference()) {
+      // Unresolved array types must be reference array types.
+      if (is_primitive) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "put insn has type '" << insn_type
+                                          << "' but unresolved type '" << array_type << "'";
+      } else {
+        Fail(VERIFY_ERROR_NO_CLASS) << "cannot verify aput for " << array_type
+                                    << " because of missing class";
+      }
     } else {
       const RegType& component_type = reg_types_.GetComponentType(array_type, GetClassLoader());
       const uint32_t vregA = inst->VRegA_23x();
@@ -4286,7 +4476,7 @@
 
 ArtField* MethodVerifier::GetInstanceField(const RegType& obj_type, int field_idx) {
   const DexFile::FieldId& field_id = dex_file_->GetFieldId(field_idx);
-  // Check access to class
+  // Check access to class.
   const RegType& klass_type = ResolveClassAndCheckAccess(field_id.class_idx_);
   if (klass_type.IsConflict()) {
     AppendToLastFailMessage(StringPrintf(" in attempt to access instance field %d (%s) in %s",
@@ -4307,8 +4497,55 @@
     DCHECK(self_->IsExceptionPending());
     self_->ClearException();
     return nullptr;
-  } else if (!GetDeclaringClass().CanAccessMember(field->GetDeclaringClass(),
-                                                  field->GetAccessFlags())) {
+  } else if (obj_type.IsZero()) {
+    // Cannot infer and check type, however, access will cause null pointer exception.
+    // Fall through into a few last soft failure checks below.
+  } else if (!obj_type.IsReferenceTypes()) {
+    // Trying to read a field from something that isn't a reference.
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "instance field access on object that has "
+                                      << "non-reference type " << obj_type;
+    return nullptr;
+  } else {
+    std::string temp;
+    mirror::Class* klass = field->GetDeclaringClass();
+    const RegType& field_klass =
+        FromClass(klass->GetDescriptor(&temp), klass, klass->CannotBeAssignedFromOtherTypes());
+    if (obj_type.IsUninitializedTypes()) {
+      // Field accesses through uninitialized references are only allowable for constructors where
+      // the field is declared in this class.
+      // Note: this IsConstructor check is technically redundant, as UninitializedThis should only
+      //       appear in constructors.
+      if (!obj_type.IsUninitializedThisReference() ||
+          !IsConstructor() ||
+          !field_klass.Equals(GetDeclaringClass())) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "cannot access instance field " << PrettyField(field)
+                                          << " of a not fully initialized object within the context"
+                                          << " of " << PrettyMethod(dex_method_idx_, *dex_file_);
+        return nullptr;
+      }
+    } else if (!field_klass.IsAssignableFrom(obj_type)) {
+      // Trying to access C1.field1 using reference of type C2, which is neither C1 or a sub-class
+      // of C1. For resolution to occur the declared class of the field must be compatible with
+      // obj_type, we've discovered this wasn't so, so report the field didn't exist.
+      VerifyError type;
+      bool is_aot = Runtime::Current()->IsAotCompiler();
+      if (is_aot && (field_klass.IsUnresolvedTypes() || obj_type.IsUnresolvedTypes())) {
+        // Compiler & unresolved types involved, retry at runtime.
+        type = VerifyError::VERIFY_ERROR_NO_CLASS;
+      } else {
+        // Classes known (resolved; and thus assignability check is precise), or we are at runtime
+        // and still missing classes. This is a hard failure.
+        type = VerifyError::VERIFY_ERROR_BAD_CLASS_HARD;
+      }
+      Fail(type) << "cannot access instance field " << PrettyField(field)
+                 << " from object of type " << obj_type;
+      return nullptr;
+    }
+  }
+
+  // Few last soft failure checks.
+  if (!GetDeclaringClass().CanAccessMember(field->GetDeclaringClass(),
+                                           field->GetAccessFlags())) {
     Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot access instance field " << PrettyField(field)
                                     << " from " << GetDeclaringClass();
     return nullptr;
@@ -4316,39 +4553,9 @@
     Fail(VERIFY_ERROR_CLASS_CHANGE) << "expected field " << PrettyField(field)
                                     << " to not be static";
     return nullptr;
-  } else if (obj_type.IsZero()) {
-    // Cannot infer and check type, however, access will cause null pointer exception
-    return field;
-  } else if (!obj_type.IsReferenceTypes()) {
-    // Trying to read a field from something that isn't a reference
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "instance field access on object that has "
-                                      << "non-reference type " << obj_type;
-    return nullptr;
-  } else {
-    mirror::Class* klass = field->GetDeclaringClass();
-    const RegType& field_klass =
-        FromClass(dex_file_->GetFieldDeclaringClassDescriptor(field_id),
-                  klass, klass->CannotBeAssignedFromOtherTypes());
-    if (obj_type.IsUninitializedTypes() &&
-        (!IsConstructor() || GetDeclaringClass().Equals(obj_type) ||
-            !field_klass.Equals(GetDeclaringClass()))) {
-      // Field accesses through uninitialized references are only allowable for constructors where
-      // the field is declared in this class
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "cannot access instance field " << PrettyField(field)
-                                        << " of a not fully initialized object within the context"
-                                        << " of " << PrettyMethod(dex_method_idx_, *dex_file_);
-      return nullptr;
-    } else if (!field_klass.IsAssignableFrom(obj_type)) {
-      // Trying to access C1.field1 using reference of type C2, which is neither C1 or a sub-class
-      // of C1. For resolution to occur the declared class of the field must be compatible with
-      // obj_type, we've discovered this wasn't so, so report the field didn't exist.
-      Fail(VERIFY_ERROR_NO_FIELD) << "cannot access instance field " << PrettyField(field)
-                                  << " from object of type " << obj_type;
-      return nullptr;
-    } else {
-      return field;
-    }
   }
+
+  return field;
 }
 
 template <MethodVerifier::FieldAccessType kAccType>
@@ -4360,10 +4567,34 @@
     field = GetStaticField(field_idx);
   } else {
     const RegType& object_type = work_line_->GetRegisterType(this, inst->VRegB_22c());
-    field = GetInstanceField(object_type, field_idx);
+
+    // One is not allowed to access fields on uninitialized references, except to write to
+    // fields in the constructor (before calling another constructor).
+    // GetInstanceField does an assignability check which will fail for uninitialized types.
+    // We thus modify the type if the uninitialized reference is a "this" reference (this also
+    // checks at the same time that we're verifying a constructor).
+    bool should_adjust = (kAccType == FieldAccessType::kAccPut) &&
+                         object_type.IsUninitializedThisReference();
+    const RegType& adjusted_type = should_adjust
+                                       ? GetRegTypeCache()->FromUninitialized(object_type)
+                                       : object_type;
+    field = GetInstanceField(adjusted_type, field_idx);
     if (UNLIKELY(have_pending_hard_failure_)) {
       return;
     }
+    if (should_adjust) {
+      if (field == nullptr) {
+        Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "Might be accessing a superclass instance field prior "
+                                          << "to the superclass being initialized in "
+                                          << PrettyMethod(dex_method_idx_, *dex_file_);
+      } else if (field->GetDeclaringClass() != GetDeclaringClass().GetClass()) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "cannot access superclass instance field "
+                                          << PrettyField(field) << " of a not fully initialized "
+                                          << "object within the context of "
+                                          << PrettyMethod(dex_method_idx_, *dex_file_);
+        return;
+      }
+    }
   }
   const RegType* field_type = nullptr;
   if (field != nullptr) {
@@ -4371,7 +4602,7 @@
       if (field->IsFinal() && field->GetDeclaringClass() != GetDeclaringClass().GetClass()) {
         Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot modify final field " << PrettyField(field)
                                         << " from other class " << GetDeclaringClass();
-        return;
+        // Keep hunting for possible hard fails.
       }
     }
 
@@ -4647,10 +4878,14 @@
       // Initialize them as conflicts so they don't add to GC and deoptimization information.
       const Instruction* ret_inst = Instruction::At(code_item_->insns_ + next_insn);
       AdjustReturnLine(this, ret_inst, target_line);
+      // Directly bail if a hard failure was found.
+      if (have_pending_hard_failure_) {
+        return false;
+      }
     }
   } else {
-    ArenaUniquePtr<RegisterLine> copy;
-    if (gDebugVerify) {
+    RegisterLineArenaUniquePtr copy;
+    if (kDebugVerify) {
       copy.reset(RegisterLine::Create(target_line->NumRegs(), this));
       copy->CopyFromLine(target_line);
     }
@@ -4658,7 +4893,7 @@
     if (have_pending_hard_failure_) {
       return false;
     }
-    if (gDebugVerify && changed) {
+    if (kDebugVerify && changed) {
       LogVerifyInfo() << "Merging at [" << reinterpret_cast<void*>(work_insn_idx_) << "]"
                       << " to [" << reinterpret_cast<void*>(next_insn) << "]: " << "\n"
                       << copy->Dump(this) << "  MERGE\n"
@@ -4682,7 +4917,7 @@
 const RegType& MethodVerifier::GetMethodReturnType() {
   if (return_type_ == nullptr) {
     if (mirror_method_ != nullptr) {
-      size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+      PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
       mirror::Class* return_type_class = mirror_method_->GetReturnType(can_load_classes_,
                                                                        pointer_size);
       if (return_type_class != nullptr) {
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 7b51d6e..5fe95c2 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -25,14 +25,17 @@
 #include "base/macros.h"
 #include "base/scoped_arena_containers.h"
 #include "base/stl_util.h"
+#include "base/value_object.h"
 #include "dex_file.h"
 #include "handle.h"
 #include "instruction_flags.h"
 #include "method_reference.h"
+#include "register_line.h"
 #include "reg_type_cache.h"
 
 namespace art {
 
+class CompilerCallbacks;
 class Instruction;
 struct ReferenceMap2Visitor;
 class Thread;
@@ -40,9 +43,9 @@
 
 namespace verifier {
 
-class DexPcToReferenceMap;
 class MethodVerifier;
 class RegisterLine;
+using RegisterLineArenaUniquePtr = std::unique_ptr<RegisterLine, RegisterLineArenaDelete>;
 class RegType;
 
 /*
@@ -56,7 +59,8 @@
   METHOD_UNKNOWN  = 0,
   METHOD_DIRECT,      // <init>, private
   METHOD_STATIC,      // static
-  METHOD_VIRTUAL,     // virtual, super
+  METHOD_VIRTUAL,     // virtual
+  METHOD_SUPER,       // super
   METHOD_INTERFACE    // interface
 };
 std::ostream& operator<<(std::ostream& os, const MethodType& rhs);
@@ -82,13 +86,10 @@
   VERIFY_ERROR_ACCESS_METHOD = 128,       // IllegalAccessError.
   VERIFY_ERROR_CLASS_CHANGE = 256,        // IncompatibleClassChangeError.
   VERIFY_ERROR_INSTANTIATION = 512,       // InstantiationError.
-  // For opcodes that don't have complete verifier support (such as lambda opcodes),
-  // we need a way to continue execution at runtime without attempting to re-verify
-  // (since we know it will fail no matter what). Instead, run as the interpreter
-  // in a special "do access checks" mode which will perform verifier-like checking
-  // on the fly.
-  //
-  // TODO: Once all new opcodes have implemented full verifier support, this can be removed.
+  // For opcodes that don't have complete verifier support,  we need a way to continue
+  // execution at runtime without attempting to re-verify (since we know it will fail no
+  // matter what). Instead, run as the interpreter in a special "do access checks" mode
+  // which will perform verifier-like checking on the fly.
   VERIFY_ERROR_FORCE_INTERPRETER = 1024,  // Skip the verification phase at runtime;
                                           // force the interpreter to do access checks.
                                           // (sets a soft fail at compile time).
@@ -124,7 +125,7 @@
   }
 
  private:
-  ScopedArenaVector<ArenaUniquePtr<RegisterLine>> register_lines_;
+  ScopedArenaVector<RegisterLineArenaUniquePtr> register_lines_;
 
   DISALLOW_COPY_AND_ASSIGN(PcToRegisterLineTable);
 };
@@ -138,15 +139,31 @@
     kHardFailure,
   };
 
-  /* Verify a class. Returns "kNoFailure" on success. */
-  static FailureKind VerifyClass(Thread* self, mirror::Class* klass, bool allow_soft_failures,
+  static bool CanCompilerHandleVerificationFailure(uint32_t encountered_failure_types) {
+    constexpr uint32_t unresolved_mask = verifier::VerifyError::VERIFY_ERROR_NO_CLASS
+        | verifier::VerifyError::VERIFY_ERROR_ACCESS_CLASS
+        | verifier::VerifyError::VERIFY_ERROR_ACCESS_FIELD
+        | verifier::VerifyError::VERIFY_ERROR_ACCESS_METHOD;
+    return (encountered_failure_types & (~unresolved_mask)) == 0;
+  }
+
+  // Verify a class. Returns "kNoFailure" on success.
+  static FailureKind VerifyClass(Thread* self,
+                                 mirror::Class* klass,
+                                 CompilerCallbacks* callbacks,
+                                 bool allow_soft_failures,
+                                 LogSeverity log_level,
                                  std::string* error)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  static FailureKind VerifyClass(Thread* self, const DexFile* dex_file,
+  static FailureKind VerifyClass(Thread* self,
+                                 const DexFile* dex_file,
                                  Handle<mirror::DexCache> dex_cache,
                                  Handle<mirror::ClassLoader> class_loader,
                                  const DexFile::ClassDef* class_def,
-                                 bool allow_soft_failures, std::string* error)
+                                 CompilerCallbacks* callbacks,
+                                 bool allow_soft_failures,
+                                 LogSeverity log_level,
+                                 std::string* error)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static MethodVerifier* VerifyMethodAndDump(Thread* self,
@@ -160,9 +177,6 @@
                                              uint32_t method_access_flags)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static FailureKind VerifyMethod(ArtMethod* method, bool allow_soft_failures,
-                                  std::string* error) SHARED_REQUIRES(Locks::mutator_lock_);
-
   uint8_t EncodePcToReferenceMapData() const;
 
   uint32_t DexFileVersion() const {
@@ -203,9 +217,6 @@
   static ArtMethod* FindInvokedMethodAtDexPc(ArtMethod* m, uint32_t dex_pc)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static SafeMap<uint32_t, std::set<uint32_t>> FindStringInitMap(ArtMethod* m)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   static void Init() SHARED_REQUIRES(Locks::mutator_lock_);
   static void Shutdown();
 
@@ -213,17 +224,6 @@
     return can_load_classes_;
   }
 
-  MethodVerifier(Thread* self, const DexFile* dex_file, Handle<mirror::DexCache> dex_cache,
-                 Handle<mirror::ClassLoader> class_loader, const DexFile::ClassDef* class_def,
-                 const DexFile::CodeItem* code_item, uint32_t method_idx,
-                 ArtMethod* method,
-                 uint32_t access_flags, bool can_load_classes, bool allow_soft_failures,
-                 bool need_precise_constants, bool allow_thread_suspension)
-          SHARED_REQUIRES(Locks::mutator_lock_)
-      : MethodVerifier(self, dex_file, dex_cache, class_loader, class_def, code_item, method_idx,
-                       method, access_flags, can_load_classes, allow_soft_failures,
-                       need_precise_constants, false, allow_thread_suspension) {}
-
   ~MethodVerifier();
 
   // Run verification on the method. Returns true if verification completes and false if the input
@@ -245,6 +245,7 @@
   ALWAYS_INLINE InstructionFlags& GetInstructionFlags(size_t index);
   mirror::ClassLoader* GetClassLoader() SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::DexCache* GetDexCache() SHARED_REQUIRES(Locks::mutator_lock_);
+  ArtMethod* GetMethod() const SHARED_REQUIRES(Locks::mutator_lock_);
   MethodReference GetMethodReference() const;
   uint32_t GetAccessFlags() const;
   bool HasCheckCasts() const;
@@ -265,10 +266,6 @@
   ArtField* GetQuickFieldAccess(const Instruction* inst, RegisterLine* reg_line)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  SafeMap<uint32_t, std::set<uint32_t>>& GetStringInitPcRegMap() {
-    return string_init_pc_reg_map_;
-  }
-
   uint32_t GetEncounteredFailureTypes() {
     return encountered_failure_types_;
   }
@@ -282,6 +279,22 @@
   }
 
  private:
+  MethodVerifier(Thread* self,
+                 const DexFile* dex_file,
+                 Handle<mirror::DexCache> dex_cache,
+                 Handle<mirror::ClassLoader> class_loader,
+                 const DexFile::ClassDef* class_def,
+                 const DexFile::CodeItem* code_item,
+                 uint32_t method_idx,
+                 ArtMethod* method,
+                 uint32_t access_flags,
+                 bool can_load_classes,
+                 bool allow_soft_failures,
+                 bool need_precise_constants,
+                 bool verify_to_dump,
+                 bool allow_thread_suspension)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   void UninstantiableError(const char* descriptor);
   static bool IsInstantiableOrPrimitive(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -295,21 +308,39 @@
     return (method_access_flags_ & kAccStatic) != 0;
   }
 
-  // Private constructor for dumping.
-  MethodVerifier(Thread* self, const DexFile* dex_file, Handle<mirror::DexCache> dex_cache,
-                 Handle<mirror::ClassLoader> class_loader, const DexFile::ClassDef* class_def,
-                 const DexFile::CodeItem* code_item, uint32_t method_idx,
-                 ArtMethod* method, uint32_t access_flags,
-                 bool can_load_classes, bool allow_soft_failures, bool need_precise_constants,
-                 bool verify_to_dump, bool allow_thread_suspension)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   // Adds the given string to the beginning of the last failure message.
   void PrependToLastFailMessage(std::string);
 
   // Adds the given string to the end of the last failure message.
   void AppendToLastFailMessage(std::string);
 
+  // Verification result for method(s). Includes a (maximum) failure kind, and (the union of)
+  // all failure types.
+  struct FailureData : ValueObject {
+    FailureKind kind = kNoFailure;
+    uint32_t types = 0U;
+
+    // Merge src into this. Uses the most severe failure kind, and the union of types.
+    void Merge(const FailureData& src);
+  };
+
+  // Verify all direct or virtual methods of a class. The method assumes that the iterator is
+  // positioned correctly, and the iterator will be updated.
+  template <bool kDirect>
+  static FailureData VerifyMethods(Thread* self,
+                                   ClassLinker* linker,
+                                   const DexFile* dex_file,
+                                   const DexFile::ClassDef* class_def,
+                                   ClassDataItemIterator* it,
+                                   Handle<mirror::DexCache> dex_cache,
+                                   Handle<mirror::ClassLoader> class_loader,
+                                   CompilerCallbacks* callbacks,
+                                   bool allow_soft_failures,
+                                   LogSeverity log_level,
+                                   bool need_precise_constants,
+                                   std::string* error_string)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   /*
    * Perform verification on a single method.
    *
@@ -321,13 +352,19 @@
    *  (3) Iterate through the method, checking type safety and looking
    *      for code flow problems.
    */
-  static FailureKind VerifyMethod(Thread* self, uint32_t method_idx, const DexFile* dex_file,
+  static FailureData VerifyMethod(Thread* self, uint32_t method_idx,
+                                  const DexFile* dex_file,
                                   Handle<mirror::DexCache> dex_cache,
                                   Handle<mirror::ClassLoader> class_loader,
                                   const DexFile::ClassDef* class_def_idx,
                                   const DexFile::CodeItem* code_item,
-                                  ArtMethod* method, uint32_t method_access_flags,
-                                  bool allow_soft_failures, bool need_precise_constants)
+                                  ArtMethod* method,
+                                  uint32_t method_access_flags,
+                                  CompilerCallbacks* callbacks,
+                                  bool allow_soft_failures,
+                                  LogSeverity log_level,
+                                  bool need_precise_constants,
+                                  std::string* hard_failure_msg)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void FindLocksAtDexPc() SHARED_REQUIRES(Locks::mutator_lock_);
@@ -622,9 +659,7 @@
    * Returns the resolved method on success, null on failure (with *failure
    * set appropriately).
    */
-  ArtMethod* VerifyInvocationArgs(const Instruction* inst,
-                                          MethodType method_type,
-                                          bool is_range, bool is_super)
+  ArtMethod* VerifyInvocationArgs(const Instruction* inst, MethodType method_type, bool is_range)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Similar checks to the above, but on the proto. Will be used when the method cannot be
@@ -705,14 +740,14 @@
   PcToRegisterLineTable reg_table_;
 
   // Storage for the register status we're currently working on.
-  ArenaUniquePtr<RegisterLine> work_line_;
+  RegisterLineArenaUniquePtr work_line_;
 
   // The address of the instruction we're currently working on, note that this is in 2 byte
   // quantities
   uint32_t work_insn_idx_;
 
   // Storage for the register status we're saving for later.
-  ArenaUniquePtr<RegisterLine> saved_line_;
+  RegisterLineArenaUniquePtr saved_line_;
 
   const uint32_t dex_method_idx_;  // The method we're working on.
   // Its object representation if known.
@@ -807,11 +842,6 @@
 
   friend class art::Thread;
 
-  // Map of dex pcs of invocations of java.lang.String.<init> to the set of other registers that
-  // contain the uninitialized this pointer to that invoke. Will contain no entry if there are
-  // no other registers.
-  SafeMap<uint32_t, std::set<uint32_t>> string_init_pc_reg_map_;
-
   DISALLOW_COPY_AND_ASSIGN(MethodVerifier);
 };
 std::ostream& operator<<(std::ostream& os, const MethodVerifier::FailureKind& rhs);
diff --git a/runtime/verifier/method_verifier_test.cc b/runtime/verifier/method_verifier_test.cc
index 2ab6b4a..b036313 100644
--- a/runtime/verifier/method_verifier_test.cc
+++ b/runtime/verifier/method_verifier_test.cc
@@ -37,8 +37,13 @@
 
     // Verify the class
     std::string error_msg;
-    ASSERT_TRUE(MethodVerifier::VerifyClass(self, klass, true, &error_msg) == MethodVerifier::kNoFailure)
-        << error_msg;
+    MethodVerifier::FailureKind failure = MethodVerifier::VerifyClass(self,
+                                                                      klass,
+                                                                      nullptr,
+                                                                      true,
+                                                                      LogSeverity::WARNING,
+                                                                      &error_msg);
+    ASSERT_TRUE(failure == MethodVerifier::kNoFailure) << error_msg;
   }
 
   void VerifyDexFile(const DexFile& dex)
diff --git a/runtime/verifier/reg_type-inl.h b/runtime/verifier/reg_type-inl.h
index 11a53e5..861db3c 100644
--- a/runtime/verifier/reg_type-inl.h
+++ b/runtime/verifier/reg_type-inl.h
@@ -93,6 +93,10 @@
         return true;  // All reference types can be assigned null.
       } else if (!rhs.IsReferenceTypes()) {
         return false;  // Expect rhs to be a reference type.
+      } else if (lhs.IsUninitializedTypes() || rhs.IsUninitializedTypes()) {
+        // Uninitialized types are only allowed to be assigned to themselves.
+        // TODO: Once we have a proper "reference" super type, this needs to be extended.
+        return false;
       } else if (lhs.IsJavaLangObject()) {
         return true;  // All reference types can be assigned to Object.
       } else if (!strict && !lhs.IsUnresolvedTypes() && lhs.GetClass()->IsInterface()) {
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 16cab03..85daba9 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -517,9 +517,21 @@
   }
 }
 
+bool RegType::IsJavaLangObject() const SHARED_REQUIRES(Locks::mutator_lock_) {
+  return IsReference() && GetClass()->IsObjectClass();
+}
+
 bool RegType::IsObjectArrayTypes() const SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (IsUnresolvedTypes() && !IsUnresolvedMergedReference() && !IsUnresolvedSuperClass()) {
-    // Primitive arrays will always resolve
+  if (IsUnresolvedTypes()) {
+    DCHECK(!IsUnresolvedMergedReference());
+
+    if (IsUnresolvedSuperClass()) {
+      // Cannot be an array, as the superclass of arrays is java.lang.Object (which cannot be
+      // unresolved).
+      return false;
+    }
+
+    // Primitive arrays will always resolve.
     DCHECK(descriptor_[1] == 'L' || descriptor_[1] == '[');
     return descriptor_[0] == '[';
   } else if (HasClass()) {
@@ -530,12 +542,15 @@
   }
 }
 
-bool RegType::IsJavaLangObject() const SHARED_REQUIRES(Locks::mutator_lock_) {
-  return IsReference() && GetClass()->IsObjectClass();
-}
-
 bool RegType::IsArrayTypes() const SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (IsUnresolvedTypes() && !IsUnresolvedMergedReference() && !IsUnresolvedSuperClass()) {
+  if (IsUnresolvedTypes()) {
+    DCHECK(!IsUnresolvedMergedReference());
+
+    if (IsUnresolvedSuperClass()) {
+      // Cannot be an array, as the superclass of arrays is java.lang.Object (which cannot be
+      // unresolved).
+      return false;
+    }
     return descriptor_[0] == '[';
   } else if (HasClass()) {
     return GetClass()->IsArrayClass();
@@ -667,13 +682,13 @@
     // float/long/double MERGE float/long/double_constant => float/long/double
     return SelectNonConstant(*this, incoming_type);
   } else if (IsReferenceTypes() && incoming_type.IsReferenceTypes()) {
-    if (IsZero() || incoming_type.IsZero()) {
-      return SelectNonConstant(*this, incoming_type);  // 0 MERGE ref => ref
-    } else if (IsUninitializedTypes() || incoming_type.IsUninitializedTypes()) {
+    if (IsUninitializedTypes() || incoming_type.IsUninitializedTypes()) {
       // Something that is uninitialized hasn't had its constructor called. Unitialized types are
       // special. They may only ever be merged with themselves (must be taken care of by the
       // caller of Merge(), see the DCHECK on entry). So mark any other merge as conflicting here.
       return conflict;
+    } else if (IsZero() || incoming_type.IsZero()) {
+      return SelectNonConstant(*this, incoming_type);  // 0 MERGE ref => ref
     } else if (IsJavaLangObject() || incoming_type.IsJavaLangObject()) {
       return reg_types->JavaLangObject(false);  // Object MERGE ref => Object
     } else if (IsUnresolvedTypes() || incoming_type.IsUnresolvedTypes()) {
@@ -758,6 +773,8 @@
   }
   if (!klass_.IsNull()) {
     CHECK(!descriptor_.empty()) << *this;
+    std::string temp;
+    CHECK_EQ(descriptor_.ToString(), klass_.Read()->GetDescriptor(&temp)) << *this;
   }
 }
 
@@ -793,11 +810,50 @@
   }
 }
 void UnresolvedMergedType::CheckInvariants() const {
+  CHECK(reg_type_cache_ != nullptr);
+
   // Unresolved merged types: merged types should be defined.
   CHECK(descriptor_.empty()) << *this;
   CHECK(klass_.IsNull()) << *this;
+
+  CHECK(!resolved_part_.IsConflict());
   CHECK(resolved_part_.IsReferenceTypes());
   CHECK(!resolved_part_.IsUnresolvedTypes());
+
+  CHECK(resolved_part_.IsZero() ||
+        !(resolved_part_.IsArrayTypes() && !resolved_part_.IsObjectArrayTypes()));
+
+  CHECK_GT(unresolved_types_.NumSetBits(), 0U);
+  bool unresolved_is_array =
+      reg_type_cache_->GetFromId(unresolved_types_.GetHighestBitSet()).IsArrayTypes();
+  for (uint32_t idx : unresolved_types_.Indexes()) {
+    const RegType& t = reg_type_cache_->GetFromId(idx);
+    CHECK_EQ(unresolved_is_array, t.IsArrayTypes());
+  }
+
+  if (!resolved_part_.IsZero()) {
+    CHECK_EQ(resolved_part_.IsArrayTypes(), unresolved_is_array);
+  }
+}
+
+bool UnresolvedMergedType::IsArrayTypes() const {
+  // For a merge to be an array, both the resolved and the unresolved part need to be object
+  // arrays.
+  // (Note: we encode a missing resolved part [which doesn't need to be an array] as zero.)
+
+  if (!resolved_part_.IsZero() && !resolved_part_.IsArrayTypes()) {
+    return false;
+  }
+
+  // It is enough to check just one of the merged types. Otherwise the merge should have been
+  // collapsed (checked in CheckInvariants on construction).
+  uint32_t idx = unresolved_types_.GetHighestBitSet();
+  const RegType& unresolved = reg_type_cache_->GetFromId(idx);
+  return unresolved.IsArrayTypes();
+}
+bool UnresolvedMergedType::IsObjectArrayTypes() const {
+  // Same as IsArrayTypes, as primitive arrays are always resolved.
+  return IsArrayTypes();
 }
 
 void UnresolvedReferenceType::CheckInvariants() const {
@@ -824,6 +880,14 @@
     return false;
   }
 
+  if (IsUnresolvedMergedReference() || src.IsUnresolvedMergedReference()) {
+    // An unresolved array type means that it's an array of some reference type. Reference arrays
+    // can never be assigned to primitive-type arrays, and vice versa. So it is a soft error if
+    // both arrays are reference arrays, otherwise a hard error.
+    *soft_error = IsObjectArrayTypes() && src.IsObjectArrayTypes();
+    return false;
+  }
+
   const RegType& cmp1 = reg_types.GetComponentType(*this, class_loader.Get());
   const RegType& cmp2 = reg_types.GetComponentType(src, class_loader.Get());
 
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 80b751c..4837490 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -172,8 +172,8 @@
   }
   virtual bool HasClassVirtual() const { return false; }
   bool IsJavaLangObject() const SHARED_REQUIRES(Locks::mutator_lock_);
-  bool IsArrayTypes() const SHARED_REQUIRES(Locks::mutator_lock_);
-  bool IsObjectArrayTypes() const SHARED_REQUIRES(Locks::mutator_lock_);
+  virtual bool IsArrayTypes() const SHARED_REQUIRES(Locks::mutator_lock_);
+  virtual bool IsObjectArrayTypes() const SHARED_REQUIRES(Locks::mutator_lock_);
   Primitive::Type GetPrimitiveType() const;
   bool IsJavaLangObjectArray() const
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -246,28 +246,18 @@
   }
 
   /*
-   * A basic Join operation on classes. For a pair of types S and T the Join,
-   *written S v T = J, is
-   * S <: J, T <: J and for-all U such that S <: U, T <: U then J <: U. That is
-   *J is the parent of
-   * S and T such that there isn't a parent of both S and T that isn't also the
-   *parent of J (ie J
+   * A basic Join operation on classes. For a pair of types S and T the Join, written S v T = J, is
+   * S <: J, T <: J and for-all U such that S <: U, T <: U then J <: U. That is J is the parent of
+   * S and T such that there isn't a parent of both S and T that isn't also the parent of J (ie J
    * is the deepest (lowest upper bound) parent of S and T).
    *
-   * This operation applies for regular classes and arrays, however, for
-   *interface types there
-   * needn't be a partial ordering on the types. We could solve the problem of a
-   *lack of a partial
-   * order by introducing sets of types, however, the only operation permissible
-   *on an interface is
-   * invoke-interface. In the tradition of Java verifiers [1] we defer the
-   *verification of interface
-   * types until an invoke-interface call on the interface typed reference at
-   *runtime and allow
-   * the perversion of Object being assignable to an interface type (note,
-   *however, that we don't
-   * allow assignment of Object or Interface to any concrete class and are
-   *therefore type safe).
+   * This operation applies for regular classes and arrays, however, for interface types there
+   * needn't be a partial ordering on the types. We could solve the problem of a lack of a partial
+   * order by introducing sets of types, however, the only operation permissible on an interface is
+   * invoke-interface. In the tradition of Java verifiers [1] we defer the verification of interface
+   * types until an invoke-interface call on the interface typed reference at runtime and allow
+   * the perversion of Object being assignable to an interface type (note, however, that we don't
+   * allow assignment of Object or Interface to any concrete class and are therefore type safe).
    *
    * [1] Java bytecode verification: algorithms and formalizations, Xavier Leroy
    */
@@ -915,6 +905,9 @@
 
   bool IsUnresolvedTypes() const OVERRIDE { return true; }
 
+  bool IsArrayTypes() const OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
+  bool IsObjectArrayTypes() const OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
+
   std::string Dump() const OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index 71ed4a2..71c2a90 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -17,6 +17,7 @@
 #include "reg_type_cache-inl.h"
 
 #include "base/arena_bit_vector.h"
+#include "base/bit_vector-inl.h"
 #include "base/casts.h"
 #include "base/scoped_arena_allocator.h"
 #include "base/stl_util.h"
@@ -155,7 +156,7 @@
   } else {
     klass = class_linker->LookupClass(self, descriptor, ComputeModifiedUtf8Hash(descriptor),
                                       loader);
-    if (klass != nullptr && !klass->IsLoaded()) {
+    if (klass != nullptr && !klass->IsResolved()) {
       // We found the class but without it being loaded its not safe for use.
       klass = nullptr;
     }
@@ -346,27 +347,39 @@
                        kDefaultArenaBitVectorBytes * kBitsPerByte,  // Allocate at least 8 bytes.
                        true);                                       // Is expandable.
   const RegType* left_resolved;
+  bool left_unresolved_is_array;
   if (left.IsUnresolvedMergedReference()) {
-    const UnresolvedMergedType* left_merge = down_cast<const UnresolvedMergedType*>(&left);
-    types.Copy(&left_merge->GetUnresolvedTypes());
-    left_resolved = &left_merge->GetResolvedPart();
+    const UnresolvedMergedType& left_merge = *down_cast<const UnresolvedMergedType*>(&left);
+
+    types.Copy(&left_merge.GetUnresolvedTypes());
+    left_resolved = &left_merge.GetResolvedPart();
+    left_unresolved_is_array = left.IsArrayTypes();
   } else if (left.IsUnresolvedTypes()) {
+    types.ClearAllBits();
     types.SetBit(left.GetId());
     left_resolved = &Zero();
+    left_unresolved_is_array = left.IsArrayTypes();
   } else {
+    types.ClearAllBits();
     left_resolved = &left;
+    left_unresolved_is_array = false;
   }
 
   const RegType* right_resolved;
+  bool right_unresolved_is_array;
   if (right.IsUnresolvedMergedReference()) {
-    const UnresolvedMergedType* right_merge = down_cast<const UnresolvedMergedType*>(&right);
-    types.Union(&right_merge->GetUnresolvedTypes());
-    right_resolved = &right_merge->GetResolvedPart();
+    const UnresolvedMergedType& right_merge = *down_cast<const UnresolvedMergedType*>(&right);
+
+    types.Union(&right_merge.GetUnresolvedTypes());
+    right_resolved = &right_merge.GetResolvedPart();
+    right_unresolved_is_array = right.IsArrayTypes();
   } else if (right.IsUnresolvedTypes()) {
     types.SetBit(right.GetId());
     right_resolved = &Zero();
+    right_unresolved_is_array = right.IsArrayTypes();
   } else {
     right_resolved = &right;
+    right_unresolved_is_array = false;
   }
 
   // Merge the resolved parts. Left and right might be equal, so use SafeMerge.
@@ -376,6 +389,23 @@
     return Conflict();
   }
 
+  bool resolved_merged_is_array = resolved_parts_merged.IsArrayTypes();
+  if (left_unresolved_is_array || right_unresolved_is_array || resolved_merged_is_array) {
+    // Arrays involved, see if we need to merge to Object.
+
+    // Is the resolved part a primitive array?
+    if (resolved_merged_is_array && !resolved_parts_merged.IsObjectArrayTypes()) {
+      return JavaLangObject(false /* precise */);
+    }
+
+    // Is any part not an array (but exists)?
+    if ((!left_unresolved_is_array && left_resolved != &left) ||
+        (!right_unresolved_is_array && right_resolved != &right) ||
+        !resolved_merged_is_array) {
+      return JavaLangObject(false /* precise */);
+    }
+  }
+
   // Check if entry already exists.
   for (size_t i = primitive_count_; i < entries_.size(); i++) {
     const RegType* cur_entry = entries_[i];
@@ -581,6 +611,7 @@
   if (!array.IsArrayTypes()) {
     return Conflict();
   } else if (array.IsUnresolvedTypes()) {
+    DCHECK(!array.IsUnresolvedMergedReference());  // Caller must make sure not to ask for this.
     const std::string descriptor(array.GetDescriptor().as_string());
     return FromDescriptor(loader, descriptor.c_str() + 1, false);
   } else {
diff --git a/runtime/verifier/reg_type_test.cc b/runtime/verifier/reg_type_test.cc
index 22ac7e4..42a74f8 100644
--- a/runtime/verifier/reg_type_test.cc
+++ b/runtime/verifier/reg_type_test.cc
@@ -30,23 +30,14 @@
 namespace art {
 namespace verifier {
 
-class BaseRegTypeTest : public CommonRuntimeTest {
- public:
-  void PostRuntimeCreate() OVERRIDE {
-    stack.reset(new ArenaStack(Runtime::Current()->GetArenaPool()));
-    allocator.reset(new ScopedArenaAllocator(stack.get()));
-  }
-
-  std::unique_ptr<ArenaStack> stack;
-  std::unique_ptr<ScopedArenaAllocator> allocator;
-};
-
-class RegTypeTest : public BaseRegTypeTest {};
+class RegTypeTest : public CommonRuntimeTest {};
 
 TEST_F(RegTypeTest, ConstLoHi) {
   // Tests creating primitive types types.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
   const RegType& ref_type_const_0 = cache.FromCat1Const(10, true);
   const RegType& ref_type_const_1 = cache.FromCat1Const(10, true);
   const RegType& ref_type_const_2 = cache.FromCat1Const(30, true);
@@ -67,8 +58,10 @@
 }
 
 TEST_F(RegTypeTest, Pairs) {
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
   int64_t val = static_cast<int32_t>(1234);
   const RegType& precise_lo = cache.FromCat2ConstLo(static_cast<int32_t>(val), true);
   const RegType& precise_hi = cache.FromCat2ConstHi(static_cast<int32_t>(val >> 32), true);
@@ -91,8 +84,10 @@
 }
 
 TEST_F(RegTypeTest, Primitives) {
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
 
   const RegType& bool_reg_type = cache.Boolean();
   EXPECT_FALSE(bool_reg_type.IsUndefined());
@@ -359,13 +354,15 @@
   EXPECT_TRUE(double_reg_type.HasClass());
 }
 
-class RegTypeReferenceTest : public BaseRegTypeTest {};
+class RegTypeReferenceTest : public CommonRuntimeTest {};
 
 TEST_F(RegTypeReferenceTest, JavalangObjectImprecise) {
   // Tests matching precisions. A reference type that was created precise doesn't
   // match the one that is imprecise.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
   const RegType& imprecise_obj = cache.JavaLangObject(false);
   const RegType& precise_obj = cache.JavaLangObject(true);
   const RegType& precise_obj_2 = cache.FromDescriptor(nullptr, "Ljava/lang/Object;", true);
@@ -379,8 +376,10 @@
 TEST_F(RegTypeReferenceTest, UnresolvedType) {
   // Tests creating unresolved types. Miss for the first time asking the cache and
   // a hit second time.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
   const RegType& ref_type_0 = cache.FromDescriptor(nullptr, "Ljava/lang/DoesNotExist;", true);
   EXPECT_TRUE(ref_type_0.IsUnresolvedReference());
   EXPECT_TRUE(ref_type_0.IsNonZeroReferenceTypes());
@@ -395,8 +394,10 @@
 
 TEST_F(RegTypeReferenceTest, UnresolvedUnintializedType) {
   // Tests creating types uninitialized types from unresolved types.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
   const RegType& ref_type_0 = cache.FromDescriptor(nullptr, "Ljava/lang/DoesNotExist;", true);
   EXPECT_TRUE(ref_type_0.IsUnresolvedReference());
   const RegType& ref_type = cache.FromDescriptor(nullptr, "Ljava/lang/DoesNotExist;", true);
@@ -417,8 +418,10 @@
 
 TEST_F(RegTypeReferenceTest, Dump) {
   // Tests types for proper Dump messages.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
   const RegType& unresolved_ref = cache.FromDescriptor(nullptr, "Ljava/lang/DoesNotExist;", true);
   const RegType& unresolved_ref_another = cache.FromDescriptor(nullptr, "Ljava/lang/DoesNotExistEither;", true);
   const RegType& resolved_ref = cache.JavaLangString();
@@ -442,8 +445,10 @@
   // Add a class to the cache then look for the same class and make sure it is  a
   // Hit the second time. Then check for the same effect when using
   // The JavaLangObject method instead of FromDescriptor. String class is final.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
   const RegType& ref_type = cache.JavaLangString();
   const RegType& ref_type_2 = cache.JavaLangString();
   const RegType& ref_type_3 = cache.FromDescriptor(nullptr, "Ljava/lang/String;", true);
@@ -462,8 +467,10 @@
   // Add a class to the cache then look for the same class and make sure it is  a
   // Hit the second time. Then I am checking for the same effect when using
   // The JavaLangObject method instead of FromDescriptor. Object Class in not final.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
   const RegType& ref_type = cache.JavaLangObject(true);
   const RegType& ref_type_2 = cache.JavaLangObject(true);
   const RegType& ref_type_3 = cache.FromDescriptor(nullptr, "Ljava/lang/Object;", true);
@@ -476,7 +483,9 @@
   // Tests merging logic
   // String and object , LUB is object.
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache_new(true, *allocator);
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
+  RegTypeCache cache_new(true, allocator);
   const RegType& string = cache_new.JavaLangString();
   const RegType& Object = cache_new.JavaLangObject(true);
   EXPECT_TRUE(string.Merge(Object, &cache_new).IsJavaLangObject());
@@ -498,8 +507,10 @@
 
 TEST_F(RegTypeTest, MergingFloat) {
   // Testing merging logic with float and float constants.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache_new(true, *allocator);
+  RegTypeCache cache_new(true, allocator);
 
   constexpr int32_t kTestConstantValue = 10;
   const RegType& float_type = cache_new.Float();
@@ -529,8 +540,10 @@
 
 TEST_F(RegTypeTest, MergingLong) {
   // Testing merging logic with long and long constants.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache_new(true, *allocator);
+  RegTypeCache cache_new(true, allocator);
 
   constexpr int32_t kTestConstantValue = 10;
   const RegType& long_lo_type = cache_new.LongLo();
@@ -583,8 +596,10 @@
 
 TEST_F(RegTypeTest, MergingDouble) {
   // Testing merging logic with double and double constants.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache_new(true, *allocator);
+  RegTypeCache cache_new(true, allocator);
 
   constexpr int32_t kTestConstantValue = 10;
   const RegType& double_lo_type = cache_new.DoubleLo();
@@ -637,8 +652,10 @@
 
 TEST_F(RegTypeTest, ConstPrecision) {
   // Tests creating primitive types types.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache_new(true, *allocator);
+  RegTypeCache cache_new(true, allocator);
   const RegType& imprecise_const = cache_new.FromCat1Const(10, false);
   const RegType& precise_const = cache_new.FromCat1Const(10, true);
 
diff --git a/runtime/verifier/register_line-inl.h b/runtime/verifier/register_line-inl.h
index 57fb701..d2f3485 100644
--- a/runtime/verifier/register_line-inl.h
+++ b/runtime/verifier/register_line-inl.h
@@ -127,17 +127,6 @@
   }
 }
 
-inline size_t RegisterLine::GetMaxNonZeroReferenceReg(MethodVerifier* verifier,
-                                                      size_t max_ref_reg) const {
-  size_t i = static_cast<int>(max_ref_reg) < 0 ? 0 : max_ref_reg;
-  for (; i < num_regs_; i++) {
-    if (GetRegisterType(verifier, i).IsNonZeroReferenceTypes()) {
-      max_ref_reg = i;
-    }
-  }
-  return max_ref_reg;
-}
-
 inline bool RegisterLine::VerifyRegisterType(MethodVerifier* verifier, uint32_t vsrc,
                                              const RegType& check_type) {
   // Verify the src register type against the check type refining the type of the register
@@ -147,6 +136,9 @@
     if (!check_type.IsNonZeroReferenceTypes() || !src_type.IsNonZeroReferenceTypes()) {
       // Hard fail if one of the types is primitive, since they are concretely known.
       fail_type = VERIFY_ERROR_BAD_CLASS_HARD;
+    } else if (check_type.IsUninitializedTypes() || src_type.IsUninitializedTypes()) {
+      // Hard fail for uninitialized types, which don't match anything but themselves.
+      fail_type = VERIFY_ERROR_BAD_CLASS_HARD;
     } else if (check_type.IsUnresolvedTypes() || src_type.IsUnresolvedTypes()) {
       fail_type = VERIFY_ERROR_NO_CLASS;
     } else {
@@ -175,16 +167,19 @@
   if (MonitorStackDepth() != 0) {
     verifier->Fail(VERIFY_ERROR_LOCKING);
     if (kDumpLockFailures) {
-      LOG(WARNING) << "expected empty monitor stack in "
-                   << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                   *verifier->GetMethodReference().dex_file);
+      VLOG(verifier) << "expected empty monitor stack in "
+                     << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                     *verifier->GetMethodReference().dex_file);
     }
   }
 }
 
+inline size_t RegisterLine::ComputeSize(size_t num_regs) {
+  return OFFSETOF_MEMBER(RegisterLine, line_) + num_regs * sizeof(uint16_t);
+}
+
 inline RegisterLine* RegisterLine::Create(size_t num_regs, MethodVerifier* verifier) {
-  void* memory = verifier->GetArena().Alloc(OFFSETOF_MEMBER(RegisterLine, line_) +
-                                                (num_regs * sizeof(uint16_t)));
+  void* memory = verifier->GetArena().Alloc(ComputeSize(num_regs));
   return new (memory) RegisterLine(num_regs, verifier);
 }
 
@@ -197,6 +192,13 @@
   SetResultTypeToUnknown(verifier);
 }
 
+inline void RegisterLineArenaDelete::operator()(RegisterLine* ptr) const {
+  if (ptr != nullptr) {
+    ptr->~RegisterLine();
+    ProtectMemory(ptr, RegisterLine::ComputeSize(ptr->NumRegs()));
+  }
+}
+
 }  // namespace verifier
 }  // namespace art
 
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index 37343b5..71aa94e 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -91,25 +91,14 @@
   return true;
 }
 
-void RegisterLine::MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type,
-                                         uint32_t this_reg, uint32_t dex_pc) {
+void RegisterLine::MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type) {
   DCHECK(uninit_type.IsUninitializedTypes());
-  bool is_string = !uninit_type.IsUnresolvedTypes() && uninit_type.GetClass()->IsStringClass();
   const RegType& init_type = verifier->GetRegTypeCache()->FromUninitialized(uninit_type);
   size_t changed = 0;
   for (uint32_t i = 0; i < num_regs_; i++) {
     if (GetRegisterType(verifier, i).Equals(uninit_type)) {
       line_[i] = init_type.GetId();
       changed++;
-      if (is_string && i != this_reg) {
-        auto it = verifier->GetStringInitPcRegMap().find(dex_pc);
-        if (it != verifier->GetStringInitPcRegMap().end()) {
-          it->second.insert(i);
-        } else {
-          std::set<uint32_t> reg_set = { i };
-          verifier->GetStringInitPcRegMap().Put(dex_pc, reg_set);
-        }
-      }
     }
   }
   // Is this initializing "this"?
@@ -348,9 +337,9 @@
   } else if (monitors_.size() >= 32) {
     verifier->Fail(VERIFY_ERROR_LOCKING);
     if (kDumpLockFailures) {
-      LOG(WARNING) << "monitor-enter stack overflow while verifying "
-                   << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                   *verifier->GetMethodReference().dex_file);
+      VLOG(verifier) << "monitor-enter stack overflow while verifying "
+                     << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                     *verifier->GetMethodReference().dex_file);
     }
   } else {
     if (SetRegToLockDepth(reg_idx, monitors_.size())) {
@@ -364,9 +353,9 @@
     } else {
       verifier->Fail(VERIFY_ERROR_LOCKING);
       if (kDumpLockFailures) {
-        LOG(WARNING) << "unexpected monitor-enter on register v" <<  reg_idx << " in "
-                     << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                     *verifier->GetMethodReference().dex_file);
+        VLOG(verifier) << "unexpected monitor-enter on register v" <<  reg_idx << " in "
+                       << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                       *verifier->GetMethodReference().dex_file);
       }
     }
   }
@@ -379,9 +368,9 @@
   } else if (monitors_.empty()) {
     verifier->Fail(VERIFY_ERROR_LOCKING);
     if (kDumpLockFailures) {
-      LOG(WARNING) << "monitor-exit stack underflow while verifying "
-                   << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                   *verifier->GetMethodReference().dex_file);
+      VLOG(verifier) << "monitor-exit stack underflow while verifying "
+                     << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                     *verifier->GetMethodReference().dex_file);
     }
   } else {
     monitors_.pop_back();
@@ -400,9 +389,9 @@
     if (!success) {
       verifier->Fail(VERIFY_ERROR_LOCKING);
       if (kDumpLockFailures) {
-        LOG(WARNING) << "monitor-exit not unlocking the top of the monitor stack while verifying "
-                     << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                     *verifier->GetMethodReference().dex_file);
+        VLOG(verifier) << "monitor-exit not unlocking the top of the monitor stack while verifying "
+                       << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                       *verifier->GetMethodReference().dex_file);
       }
     } else {
       // Record the register was unlocked. This clears all aliases, thus it will also clear the
@@ -453,10 +442,10 @@
     if (monitors_.size() != incoming_line->monitors_.size()) {
       verifier->Fail(VERIFY_ERROR_LOCKING);
       if (kDumpLockFailures) {
-        LOG(WARNING) << "mismatched stack depths (depth=" << MonitorStackDepth()
-                     << ", incoming depth=" << incoming_line->MonitorStackDepth() << ") in "
-                     << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                     *verifier->GetMethodReference().dex_file);
+        VLOG(verifier) << "mismatched stack depths (depth=" << MonitorStackDepth()
+                       << ", incoming depth=" << incoming_line->MonitorStackDepth() << ") in "
+                       << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                       *verifier->GetMethodReference().dex_file);
       }
     } else if (reg_to_lock_depths_ != incoming_line->reg_to_lock_depths_) {
       for (uint32_t idx = 0; idx < num_regs_; idx++) {
@@ -488,10 +477,10 @@
                                        reg_to_lock_depths_)) {
             verifier->Fail(VERIFY_ERROR_LOCKING);
             if (kDumpLockFailures) {
-              LOG(WARNING) << "mismatched stack depths for register v" << idx
-                           << ": " << depths  << " != " << incoming_depths << " in "
-                           << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                           *verifier->GetMethodReference().dex_file);
+              VLOG(verifier) << "mismatched stack depths for register v" << idx
+                             << ": " << depths  << " != " << incoming_depths << " in "
+                             << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                             *verifier->GetMethodReference().dex_file);
             }
             break;
           }
@@ -530,11 +519,11 @@
               // No aliases for both current and incoming, we'll lose information.
               verifier->Fail(VERIFY_ERROR_LOCKING);
               if (kDumpLockFailures) {
-                LOG(WARNING) << "mismatched lock levels for register v" << idx << ": "
-                    << std::hex << locked_levels << std::dec  << " != "
-                    << std::hex << incoming_locked_levels << std::dec << " in "
-                    << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                    *verifier->GetMethodReference().dex_file);
+                VLOG(verifier) << "mismatched lock levels for register v" << idx << ": "
+                               << std::hex << locked_levels << std::dec  << " != "
+                               << std::hex << incoming_locked_levels << std::dec << " in "
+                               << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                               *verifier->GetMethodReference().dex_file);
               }
               break;
             }
@@ -554,24 +543,5 @@
   return changed;
 }
 
-void RegisterLine::WriteReferenceBitMap(MethodVerifier* verifier,
-                                        std::vector<uint8_t>* data, size_t max_bytes) {
-  for (size_t i = 0; i < num_regs_; i += 8) {
-    uint8_t val = 0;
-    for (size_t j = 0; j < 8 && (i + j) < num_regs_; j++) {
-      // Note: we write 1 for a Reference but not for Null
-      if (GetRegisterType(verifier, i + j).IsNonZeroReferenceTypes()) {
-        val |= 1 << j;
-      }
-    }
-    if ((i / 8) >= max_bytes) {
-      DCHECK_EQ(0, val);
-      continue;
-    }
-    DCHECK_LT(i / 8, max_bytes) << "val=" << static_cast<uint32_t>(val);
-    data->push_back(val);
-  }
-}
-
 }  // namespace verifier
 }  // namespace art
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index b2f5555..56846c1 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -99,11 +99,14 @@
   // available now. An example is sharpening types after a check-cast. Note that when given kKeep,
   // the new_type is dchecked to be a reference type.
   template <LockOp kLockOp>
-  ALWAYS_INLINE bool SetRegisterType(MethodVerifier* verifier, uint32_t vdst,
+  ALWAYS_INLINE bool SetRegisterType(MethodVerifier* verifier,
+                                     uint32_t vdst,
                                      const RegType& new_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool SetRegisterTypeWide(MethodVerifier* verifier, uint32_t vdst, const RegType& new_type1,
+  bool SetRegisterTypeWide(MethodVerifier* verifier,
+                           uint32_t vdst,
+                           const RegType& new_type1,
                            const RegType& new_type2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -117,11 +120,14 @@
   // Get the type of register vsrc.
   const RegType& GetRegisterType(MethodVerifier* verifier, uint32_t vsrc) const;
 
-  ALWAYS_INLINE bool VerifyRegisterType(MethodVerifier* verifier, uint32_t vsrc,
+  ALWAYS_INLINE bool VerifyRegisterType(MethodVerifier* verifier,
+                                        uint32_t vsrc,
                                         const RegType& check_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool VerifyRegisterTypeWide(MethodVerifier* verifier, uint32_t vsrc, const RegType& check_type1,
+  bool VerifyRegisterTypeWide(MethodVerifier* verifier,
+                              uint32_t vsrc,
+                              const RegType& check_type1,
                               const RegType& check_type2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -155,8 +161,7 @@
    * reference type. This is called when an appropriate constructor is invoked -- all copies of
    * the reference must be marked as initialized.
    */
-  void MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type,
-                             uint32_t this_reg, uint32_t dex_pc)
+  void MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   /*
@@ -197,6 +202,9 @@
     return num_regs_;
   }
 
+  // Return how many bytes of memory a register line uses.
+  ALWAYS_INLINE static size_t ComputeSize(size_t num_regs);
+
   /*
    * Get the "this" pointer from a non-static method invocation. This returns the RegType so the
    * caller can decide whether it needs the reference to be initialized or not. (Can also return
@@ -207,31 +215,42 @@
    * allow_failure will return Conflict() instead of causing a verification failure if there is an
    * error.
    */
-  const RegType& GetInvocationThis(MethodVerifier* verifier, const Instruction* inst,
-                                   bool is_range, bool allow_failure = false)
+  const RegType& GetInvocationThis(MethodVerifier* verifier,
+                                   const Instruction* inst,
+                                   bool is_range,
+                                   bool allow_failure = false)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   /*
    * Verify types for a simple two-register instruction (e.g. "neg-int").
    * "dst_type" is stored into vA, and "src_type" is verified against vB.
    */
-  void CheckUnaryOp(MethodVerifier* verifier, const Instruction* inst, const RegType& dst_type,
+  void CheckUnaryOp(MethodVerifier* verifier,
+                    const Instruction* inst,
+                    const RegType& dst_type,
                     const RegType& src_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckUnaryOpWide(MethodVerifier* verifier, const Instruction* inst,
-                        const RegType& dst_type1, const RegType& dst_type2,
-                        const RegType& src_type1, const RegType& src_type2)
+  void CheckUnaryOpWide(MethodVerifier* verifier,
+                        const Instruction* inst,
+                        const RegType& dst_type1,
+                        const RegType& dst_type2,
+                        const RegType& src_type1,
+                        const RegType& src_type2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckUnaryOpToWide(MethodVerifier* verifier, const Instruction* inst,
-                          const RegType& dst_type1, const RegType& dst_type2,
+  void CheckUnaryOpToWide(MethodVerifier* verifier,
+                          const Instruction* inst,
+                          const RegType& dst_type1,
+                          const RegType& dst_type2,
                           const RegType& src_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckUnaryOpFromWide(MethodVerifier* verifier, const Instruction* inst,
+  void CheckUnaryOpFromWide(MethodVerifier* verifier,
+                            const Instruction* inst,
                             const RegType& dst_type,
-                            const RegType& src_type1, const RegType& src_type2)
+                            const RegType& src_type1,
+                            const RegType& src_type2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   /*
@@ -239,19 +258,28 @@
    * "dst_type" is stored into vA, and "src_type1"/"src_type2" are verified
    * against vB/vC.
    */
-  void CheckBinaryOp(MethodVerifier* verifier, const Instruction* inst,
-                     const RegType& dst_type, const RegType& src_type1, const RegType& src_type2,
+  void CheckBinaryOp(MethodVerifier* verifier,
+                     const Instruction* inst,
+                     const RegType& dst_type,
+                     const RegType& src_type1,
+                     const RegType& src_type2,
                      bool check_boolean_op)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckBinaryOpWide(MethodVerifier* verifier, const Instruction* inst,
-                         const RegType& dst_type1, const RegType& dst_type2,
-                         const RegType& src_type1_1, const RegType& src_type1_2,
-                         const RegType& src_type2_1, const RegType& src_type2_2)
+  void CheckBinaryOpWide(MethodVerifier* verifier,
+                         const Instruction* inst,
+                         const RegType& dst_type1,
+                         const RegType& dst_type2,
+                         const RegType& src_type1_1,
+                         const RegType& src_type1_2,
+                         const RegType& src_type2_1,
+                         const RegType& src_type2_2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckBinaryOpWideShift(MethodVerifier* verifier, const Instruction* inst,
-                              const RegType& long_lo_type, const RegType& long_hi_type,
+  void CheckBinaryOpWideShift(MethodVerifier* verifier,
+                              const Instruction* inst,
+                              const RegType& long_lo_type,
+                              const RegType& long_hi_type,
                               const RegType& int_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -259,20 +287,28 @@
    * Verify types for a binary "2addr" operation. "src_type1"/"src_type2"
    * are verified against vA/vB, then "dst_type" is stored into vA.
    */
-  void CheckBinaryOp2addr(MethodVerifier* verifier, const Instruction* inst,
+  void CheckBinaryOp2addr(MethodVerifier* verifier,
+                          const Instruction* inst,
                           const RegType& dst_type,
-                          const RegType& src_type1, const RegType& src_type2,
+                          const RegType& src_type1,
+                          const RegType& src_type2,
                           bool check_boolean_op)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckBinaryOp2addrWide(MethodVerifier* verifier, const Instruction* inst,
-                              const RegType& dst_type1, const RegType& dst_type2,
-                              const RegType& src_type1_1, const RegType& src_type1_2,
-                              const RegType& src_type2_1, const RegType& src_type2_2)
+  void CheckBinaryOp2addrWide(MethodVerifier* verifier,
+                              const Instruction* inst,
+                              const RegType& dst_type1,
+                              const RegType& dst_type2,
+                              const RegType& src_type1_1,
+                              const RegType& src_type1_2,
+                              const RegType& src_type2_1,
+                              const RegType& src_type2_2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckBinaryOp2addrWideShift(MethodVerifier* verifier, const Instruction* inst,
-                                   const RegType& long_lo_type, const RegType& long_hi_type,
+  void CheckBinaryOp2addrWideShift(MethodVerifier* verifier,
+                                   const Instruction* inst,
+                                   const RegType& long_lo_type,
+                                   const RegType& long_hi_type,
                                    const RegType& int_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -282,9 +318,12 @@
    *
    * If "check_boolean_op" is set, we use the constant value in vC.
    */
-  void CheckLiteralOp(MethodVerifier* verifier, const Instruction* inst,
-                      const RegType& dst_type, const RegType& src_type,
-                      bool check_boolean_op, bool is_lit16)
+  void CheckLiteralOp(MethodVerifier* verifier,
+                      const Instruction* inst,
+                      const RegType& dst_type,
+                      const RegType& src_type,
+                      bool check_boolean_op,
+                      bool is_lit16)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Verify/push monitor onto the monitor stack, locking the value in reg_idx at location insn_idx.
@@ -307,11 +346,6 @@
   bool MergeRegisters(MethodVerifier* verifier, const RegisterLine* incoming_line)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  size_t GetMaxNonZeroReferenceReg(MethodVerifier* verifier, size_t max_ref_reg) const;
-
-  // Write a bit at each register location that holds a reference.
-  void WriteReferenceBitMap(MethodVerifier* verifier, std::vector<uint8_t>* data, size_t max_bytes);
-
   size_t GetMonitorEnterCount() const {
     return monitors_.size();
   }
@@ -401,6 +435,11 @@
   DISALLOW_COPY_AND_ASSIGN(RegisterLine);
 };
 
+class RegisterLineArenaDelete : public ArenaDelete<RegisterLine> {
+ public:
+  void operator()(RegisterLine* ptr) const;
+};
+
 }  // namespace verifier
 }  // namespace art
 
diff --git a/runtime/vmap_table.h b/runtime/vmap_table.h
deleted file mode 100644
index db9e1ea..0000000
--- a/runtime/vmap_table.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_VMAP_TABLE_H_
-#define ART_RUNTIME_VMAP_TABLE_H_
-
-#include "base/logging.h"
-#include "leb128.h"
-#include "stack.h"
-
-namespace art {
-
-class VmapTable {
- public:
-  // For efficient encoding of special values, entries are adjusted by 2.
-  static constexpr uint16_t kEntryAdjustment = 2u;
-  static constexpr uint16_t kAdjustedFpMarker = static_cast<uint16_t>(0xffffu + kEntryAdjustment);
-
-  explicit VmapTable(const uint8_t* table) : table_(table) {
-  }
-
-  // Look up nth entry, not called from performance critical code.
-  uint16_t operator[](size_t n) const {
-    const uint8_t* table = table_;
-    size_t size = DecodeUnsignedLeb128(&table);
-    CHECK_LT(n, size);
-    uint16_t adjusted_entry = DecodeUnsignedLeb128(&table);
-    for (size_t i = 0; i < n; ++i) {
-      adjusted_entry = DecodeUnsignedLeb128(&table);
-    }
-    return adjusted_entry - kEntryAdjustment;
-  }
-
-  size_t Size() const {
-    const uint8_t* table = table_;
-    return DecodeUnsignedLeb128(&table);
-  }
-
-  // Is the dex register 'vreg' in the context or on the stack? Should not be called when the
-  // 'kind' is unknown or constant.
-  bool IsInContext(size_t vreg, VRegKind kind, uint32_t* vmap_offset) const {
-    DCHECK(kind == kReferenceVReg || kind == kIntVReg || kind == kFloatVReg ||
-           kind == kLongLoVReg || kind == kLongHiVReg || kind == kDoubleLoVReg ||
-           kind == kDoubleHiVReg || kind == kImpreciseConstant);
-    *vmap_offset = 0xEBAD0FF5;
-    // TODO: take advantage of the registers being ordered
-    // TODO: we treat kImpreciseConstant as an integer below, need to ensure that such values
-    //       are never promoted to floating point registers.
-    bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
-    bool in_floats = false;
-    const uint8_t* table = table_;
-    uint16_t adjusted_vreg = vreg + kEntryAdjustment;
-    size_t end = DecodeUnsignedLeb128(&table);
-    bool high_reg = (kind == kLongHiVReg) || (kind == kDoubleHiVReg);
-    bool target64 = (kRuntimeISA == kArm64) || (kRuntimeISA == kX86_64) || (kRuntimeISA == kMips64);
-    if (target64 && high_reg) {
-      // Wide promoted registers are associated with the sreg of the low portion.
-      adjusted_vreg--;
-    }
-    for (size_t i = 0; i < end; ++i) {
-      // Stop if we find what we are are looking for.
-      uint16_t adjusted_entry = DecodeUnsignedLeb128(&table);
-      if ((adjusted_entry == adjusted_vreg) && (in_floats == is_float)) {
-        *vmap_offset = i;
-        return true;
-      }
-      // 0xffff is the marker for LR (return PC on x86), following it are spilled float registers.
-      if (adjusted_entry == kAdjustedFpMarker) {
-        in_floats = true;
-      }
-    }
-    return false;
-  }
-
-  // Compute the register number that corresponds to the entry in the vmap (vmap_offset, computed
-  // by IsInContext above). If the kind is floating point then the result will be a floating point
-  // register number, otherwise it will be an integer register number.
-  uint32_t ComputeRegister(uint32_t spill_mask, uint32_t vmap_offset, VRegKind kind) const {
-    // Compute the register we need to load from the context.
-    DCHECK(kind == kReferenceVReg || kind == kIntVReg || kind == kFloatVReg ||
-           kind == kLongLoVReg || kind == kLongHiVReg || kind == kDoubleLoVReg ||
-           kind == kDoubleHiVReg || kind == kImpreciseConstant);
-    // TODO: we treat kImpreciseConstant as an integer below, need to ensure that such values
-    //       are never promoted to floating point registers.
-    bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
-    uint32_t matches = 0;
-    if (UNLIKELY(is_float)) {
-      const uint8_t* table = table_;
-      DecodeUnsignedLeb128(&table);  // Skip size.
-      while (DecodeUnsignedLeb128(&table) != kAdjustedFpMarker) {
-        matches++;
-      }
-      matches++;
-    }
-    CHECK_LT(vmap_offset - matches, static_cast<uint32_t>(POPCOUNT(spill_mask)));
-    uint32_t spill_shifts = 0;
-    while (matches != (vmap_offset + 1)) {
-      DCHECK_NE(spill_mask, 0u);
-      matches += spill_mask & 1;  // Add 1 if the low bit is set
-      spill_mask >>= 1;
-      spill_shifts++;
-    }
-    spill_shifts--;  // wind back one as we want the last match
-    return spill_shifts;
-  }
-
- private:
-  const uint8_t* const table_;
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_VMAP_TABLE_H_
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index e2c3afb..2c99275 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -30,6 +30,7 @@
 namespace art {
 
 jclass WellKnownClasses::com_android_dex_Dex;
+jclass WellKnownClasses::dalvik_annotation_optimization_FastNative;
 jclass WellKnownClasses::dalvik_system_DexFile;
 jclass WellKnownClasses::dalvik_system_DexPathList;
 jclass WellKnownClasses::dalvik_system_DexPathList__Element;
@@ -41,6 +42,9 @@
 jclass WellKnownClasses::java_lang_ClassNotFoundException;
 jclass WellKnownClasses::java_lang_Daemons;
 jclass WellKnownClasses::java_lang_Error;
+jclass WellKnownClasses::java_lang_ExceptionInInitializerError;
+jclass WellKnownClasses::java_lang_IllegalAccessError;
+jclass WellKnownClasses::java_lang_NoClassDefFoundError;
 jclass WellKnownClasses::java_lang_Object;
 jclass WellKnownClasses::java_lang_OutOfMemoryError;
 jclass WellKnownClasses::java_lang_reflect_AbstractMethod;
@@ -54,7 +58,6 @@
 jclass WellKnownClasses::java_lang_StringFactory;
 jclass WellKnownClasses::java_lang_System;
 jclass WellKnownClasses::java_lang_Thread;
-jclass WellKnownClasses::java_lang_Thread__UncaughtExceptionHandler;
 jclass WellKnownClasses::java_lang_ThreadGroup;
 jclass WellKnownClasses::java_lang_Throwable;
 jclass WellKnownClasses::java_nio_DirectByteBuffer;
@@ -118,9 +121,9 @@
 jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromCodePoints;
 jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromStringBuilder;
 jmethodID WellKnownClasses::java_lang_System_runFinalization = nullptr;
+jmethodID WellKnownClasses::java_lang_Thread_dispatchUncaughtException;
 jmethodID WellKnownClasses::java_lang_Thread_init;
 jmethodID WellKnownClasses::java_lang_Thread_run;
-jmethodID WellKnownClasses::java_lang_Thread__UncaughtExceptionHandler_uncaughtException;
 jmethodID WellKnownClasses::java_lang_ThreadGroup_removeThread;
 jmethodID WellKnownClasses::java_nio_DirectByteBuffer_init;
 jmethodID WellKnownClasses::libcore_reflect_AnnotationFactory_createAnnotation;
@@ -129,6 +132,7 @@
 jmethodID WellKnownClasses::org_apache_harmony_dalvik_ddmc_DdmServer_dispatch;
 
 jfieldID WellKnownClasses::dalvik_system_DexFile_cookie;
+jfieldID WellKnownClasses::dalvik_system_DexFile_fileName;
 jfieldID WellKnownClasses::dalvik_system_PathClassLoader_pathList;
 jfieldID WellKnownClasses::dalvik_system_DexPathList_dexElements;
 jfieldID WellKnownClasses::dalvik_system_DexPathList__Element_dexFile;
@@ -137,9 +141,9 @@
 jfieldID WellKnownClasses::java_lang_Thread_lock;
 jfieldID WellKnownClasses::java_lang_Thread_name;
 jfieldID WellKnownClasses::java_lang_Thread_priority;
-jfieldID WellKnownClasses::java_lang_Thread_uncaughtHandler;
 jfieldID WellKnownClasses::java_lang_Thread_nativePeer;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_groups;
+jfieldID WellKnownClasses::java_lang_ThreadGroup_ngroups;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_mainThreadGroup;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_name;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_parent;
@@ -177,7 +181,7 @@
   if (fid == nullptr) {
     ScopedObjectAccess soa(env);
     if (soa.Self()->IsExceptionPending()) {
-      LOG(INTERNAL_FATAL) << soa.Self()->GetException()->Dump() << '\n';
+      LOG(INTERNAL_FATAL) << soa.Self()->GetException()->Dump();
     }
     std::ostringstream os;
     WellKnownClasses::ToClass(c)->DumpClass(os, mirror::Class::kDumpClassFullDetail);
@@ -194,7 +198,7 @@
   if (mid == nullptr) {
     ScopedObjectAccess soa(env);
     if (soa.Self()->IsExceptionPending()) {
-      LOG(INTERNAL_FATAL) << soa.Self()->GetException()->Dump() << '\n';
+      LOG(INTERNAL_FATAL) << soa.Self()->GetException()->Dump();
     }
     std::ostringstream os;
     WellKnownClasses::ToClass(c)->DumpClass(os, mirror::Class::kDumpClassFullDetail);
@@ -212,6 +216,7 @@
 
 void WellKnownClasses::Init(JNIEnv* env) {
   com_android_dex_Dex = CacheClass(env, "com/android/dex/Dex");
+  dalvik_annotation_optimization_FastNative = CacheClass(env, "dalvik/annotation/optimization/FastNative");
   dalvik_system_DexFile = CacheClass(env, "dalvik/system/DexFile");
   dalvik_system_DexPathList = CacheClass(env, "dalvik/system/DexPathList");
   dalvik_system_DexPathList__Element = CacheClass(env, "dalvik/system/DexPathList$Element");
@@ -226,6 +231,9 @@
   java_lang_Object = CacheClass(env, "java/lang/Object");
   java_lang_OutOfMemoryError = CacheClass(env, "java/lang/OutOfMemoryError");
   java_lang_Error = CacheClass(env, "java/lang/Error");
+  java_lang_ExceptionInInitializerError = CacheClass(env, "java/lang/ExceptionInInitializerError");
+  java_lang_IllegalAccessError = CacheClass(env, "java/lang/IllegalAccessError");
+  java_lang_NoClassDefFoundError = CacheClass(env, "java/lang/NoClassDefFoundError");
   java_lang_reflect_AbstractMethod = CacheClass(env, "java/lang/reflect/AbstractMethod");
   java_lang_reflect_Constructor = CacheClass(env, "java/lang/reflect/Constructor");
   java_lang_reflect_Field = CacheClass(env, "java/lang/reflect/Field");
@@ -237,8 +245,6 @@
   java_lang_StringFactory = CacheClass(env, "java/lang/StringFactory");
   java_lang_System = CacheClass(env, "java/lang/System");
   java_lang_Thread = CacheClass(env, "java/lang/Thread");
-  java_lang_Thread__UncaughtExceptionHandler = CacheClass(env,
-      "java/lang/Thread$UncaughtExceptionHandler");
   java_lang_ThreadGroup = CacheClass(env, "java/lang/ThreadGroup");
   java_lang_Throwable = CacheClass(env, "java/lang/Throwable");
   java_nio_DirectByteBuffer = CacheClass(env, "java/nio/DirectByteBuffer");
@@ -265,10 +271,10 @@
   java_lang_ref_ReferenceQueue_add = CacheMethod(env, java_lang_ref_ReferenceQueue.get(), true, "add", "(Ljava/lang/ref/Reference;)V");
 
   java_lang_reflect_Proxy_invoke = CacheMethod(env, java_lang_reflect_Proxy, true, "invoke", "(Ljava/lang/reflect/Proxy;Ljava/lang/reflect/Method;[Ljava/lang/Object;)Ljava/lang/Object;");
+  java_lang_Thread_dispatchUncaughtException = CacheMethod(env, java_lang_Thread, false, "dispatchUncaughtException", "(Ljava/lang/Throwable;)V");
   java_lang_Thread_init = CacheMethod(env, java_lang_Thread, false, "<init>", "(Ljava/lang/ThreadGroup;Ljava/lang/String;IZ)V");
   java_lang_Thread_run = CacheMethod(env, java_lang_Thread, false, "run", "()V");
-  java_lang_Thread__UncaughtExceptionHandler_uncaughtException = CacheMethod(env, java_lang_Thread__UncaughtExceptionHandler, false, "uncaughtException", "(Ljava/lang/Thread;Ljava/lang/Throwable;)V");
-  java_lang_ThreadGroup_removeThread = CacheMethod(env, java_lang_ThreadGroup, false, "removeThread", "(Ljava/lang/Thread;)V");
+  java_lang_ThreadGroup_removeThread = CacheMethod(env, java_lang_ThreadGroup, false, "threadTerminated", "(Ljava/lang/Thread;)V");
   java_nio_DirectByteBuffer_init = CacheMethod(env, java_nio_DirectByteBuffer, false, "<init>", "(JI)V");
   libcore_reflect_AnnotationFactory_createAnnotation = CacheMethod(env, libcore_reflect_AnnotationFactory, true, "createAnnotation", "(Ljava/lang/Class;[Llibcore/reflect/AnnotationMember;)Ljava/lang/annotation/Annotation;");
   libcore_reflect_AnnotationMember_init = CacheMethod(env, libcore_reflect_AnnotationMember, false, "<init>", "(Ljava/lang/String;Ljava/lang/Object;Ljava/lang/Class;Ljava/lang/reflect/Method;)V");
@@ -332,6 +338,7 @@
        true, "newStringFromStringBuilder", "(Ljava/lang/StringBuilder;)Ljava/lang/String;");
 
   dalvik_system_DexFile_cookie = CacheField(env, dalvik_system_DexFile, false, "mCookie", "Ljava/lang/Object;");
+  dalvik_system_DexFile_fileName = CacheField(env, dalvik_system_DexFile, false, "mFileName", "Ljava/lang/String;");
   dalvik_system_PathClassLoader_pathList = CacheField(env, dalvik_system_PathClassLoader, false, "pathList", "Ldalvik/system/DexPathList;");
   dalvik_system_DexPathList_dexElements = CacheField(env, dalvik_system_DexPathList, false, "dexElements", "[Ldalvik/system/DexPathList$Element;");
   dalvik_system_DexPathList__Element_dexFile = CacheField(env, dalvik_system_DexPathList__Element, false, "dexFile", "Ldalvik/system/DexFile;");
@@ -340,9 +347,9 @@
   java_lang_Thread_lock = CacheField(env, java_lang_Thread, false, "lock", "Ljava/lang/Object;");
   java_lang_Thread_name = CacheField(env, java_lang_Thread, false, "name", "Ljava/lang/String;");
   java_lang_Thread_priority = CacheField(env, java_lang_Thread, false, "priority", "I");
-  java_lang_Thread_uncaughtHandler = CacheField(env, java_lang_Thread, false, "uncaughtHandler", "Ljava/lang/Thread$UncaughtExceptionHandler;");
   java_lang_Thread_nativePeer = CacheField(env, java_lang_Thread, false, "nativePeer", "J");
-  java_lang_ThreadGroup_groups = CacheField(env, java_lang_ThreadGroup, false, "groups", "Ljava/util/List;");
+  java_lang_ThreadGroup_groups = CacheField(env, java_lang_ThreadGroup, false, "groups", "[Ljava/lang/ThreadGroup;");
+  java_lang_ThreadGroup_ngroups = CacheField(env, java_lang_ThreadGroup, false, "ngroups", "I");
   java_lang_ThreadGroup_mainThreadGroup = CacheField(env, java_lang_ThreadGroup, true, "mainThreadGroup", "Ljava/lang/ThreadGroup;");
   java_lang_ThreadGroup_name = CacheField(env, java_lang_ThreadGroup, false, "name", "Ljava/lang/String;");
   java_lang_ThreadGroup_parent = CacheField(env, java_lang_ThreadGroup, false, "parent", "Ljava/lang/ThreadGroup;");
@@ -350,13 +357,13 @@
   java_lang_Throwable_cause = CacheField(env, java_lang_Throwable, false, "cause", "Ljava/lang/Throwable;");
   java_lang_Throwable_detailMessage = CacheField(env, java_lang_Throwable, false, "detailMessage", "Ljava/lang/String;");
   java_lang_Throwable_stackTrace = CacheField(env, java_lang_Throwable, false, "stackTrace", "[Ljava/lang/StackTraceElement;");
-  java_lang_Throwable_stackState = CacheField(env, java_lang_Throwable, false, "stackState", "Ljava/lang/Object;");
+  java_lang_Throwable_stackState = CacheField(env, java_lang_Throwable, false, "backtrace", "Ljava/lang/Object;");
   java_lang_Throwable_suppressedExceptions = CacheField(env, java_lang_Throwable, false, "suppressedExceptions", "Ljava/util/List;");
   java_lang_reflect_AbstractMethod_artMethod = CacheField(env, java_lang_reflect_AbstractMethod, false, "artMethod", "J");
   java_lang_reflect_Proxy_h = CacheField(env, java_lang_reflect_Proxy, false, "h", "Ljava/lang/reflect/InvocationHandler;");
   java_nio_DirectByteBuffer_capacity = CacheField(env, java_nio_DirectByteBuffer, false, "capacity", "I");
-  java_nio_DirectByteBuffer_effectiveDirectAddress = CacheField(env, java_nio_DirectByteBuffer, false, "effectiveDirectAddress", "J");
-  java_util_ArrayList_array = CacheField(env, java_util_ArrayList, false, "array", "[Ljava/lang/Object;");
+  java_nio_DirectByteBuffer_effectiveDirectAddress = CacheField(env, java_nio_DirectByteBuffer, false, "address", "J");
+  java_util_ArrayList_array = CacheField(env, java_util_ArrayList, false, "elementData", "[Ljava/lang/Object;");
   java_util_ArrayList_size = CacheField(env, java_util_ArrayList, false, "size", "I");
   java_util_Collections_EMPTY_LIST = CacheField(env, java_util_Collections, true, "EMPTY_LIST", "Ljava/util/List;");
   libcore_util_EmptyArray_STACK_TRACE_ELEMENT = CacheField(env, libcore_util_EmptyArray, true, "STACK_TRACE_ELEMENT", "[Ljava/lang/StackTraceElement;");
@@ -379,7 +386,10 @@
 
 void WellKnownClasses::LateInit(JNIEnv* env) {
   ScopedLocalRef<jclass> java_lang_Runtime(env, env->FindClass("java/lang/Runtime"));
-  java_lang_Runtime_nativeLoad = CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad", "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;");
+  java_lang_Runtime_nativeLoad =
+      CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad",
+                  "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)"
+                      "Ljava/lang/String;");
 }
 
 mirror::Class* WellKnownClasses::ToClass(jclass global_jclass) {
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index c856291..b8e05b8 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -41,6 +41,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static jclass com_android_dex_Dex;
+  static jclass dalvik_annotation_optimization_FastNative;
   static jclass dalvik_system_DexFile;
   static jclass dalvik_system_DexPathList;
   static jclass dalvik_system_DexPathList__Element;
@@ -52,6 +53,9 @@
   static jclass java_lang_ClassNotFoundException;
   static jclass java_lang_Daemons;
   static jclass java_lang_Error;
+  static jclass java_lang_ExceptionInInitializerError;
+  static jclass java_lang_IllegalAccessError;
+  static jclass java_lang_NoClassDefFoundError;
   static jclass java_lang_Object;
   static jclass java_lang_OutOfMemoryError;
   static jclass java_lang_reflect_AbstractMethod;
@@ -66,7 +70,6 @@
   static jclass java_lang_System;
   static jclass java_lang_Thread;
   static jclass java_lang_ThreadGroup;
-  static jclass java_lang_Thread__UncaughtExceptionHandler;
   static jclass java_lang_Throwable;
   static jclass java_util_ArrayList;
   static jclass java_util_Collections;
@@ -129,9 +132,9 @@
   static jmethodID java_lang_StringFactory_newStringFromCodePoints;
   static jmethodID java_lang_StringFactory_newStringFromStringBuilder;
   static jmethodID java_lang_System_runFinalization;
+  static jmethodID java_lang_Thread_dispatchUncaughtException;
   static jmethodID java_lang_Thread_init;
   static jmethodID java_lang_Thread_run;
-  static jmethodID java_lang_Thread__UncaughtExceptionHandler_uncaughtException;
   static jmethodID java_lang_ThreadGroup_removeThread;
   static jmethodID java_nio_DirectByteBuffer_init;
   static jmethodID libcore_reflect_AnnotationFactory_createAnnotation;
@@ -140,6 +143,7 @@
   static jmethodID org_apache_harmony_dalvik_ddmc_DdmServer_dispatch;
 
   static jfieldID dalvik_system_DexFile_cookie;
+  static jfieldID dalvik_system_DexFile_fileName;
   static jfieldID dalvik_system_DexPathList_dexElements;
   static jfieldID dalvik_system_DexPathList__Element_dexFile;
   static jfieldID dalvik_system_PathClassLoader_pathList;
@@ -150,9 +154,9 @@
   static jfieldID java_lang_Thread_lock;
   static jfieldID java_lang_Thread_name;
   static jfieldID java_lang_Thread_priority;
-  static jfieldID java_lang_Thread_uncaughtHandler;
   static jfieldID java_lang_Thread_nativePeer;
   static jfieldID java_lang_ThreadGroup_groups;
+  static jfieldID java_lang_ThreadGroup_ngroups;
   static jfieldID java_lang_ThreadGroup_mainThreadGroup;
   static jfieldID java_lang_ThreadGroup_name;
   static jfieldID java_lang_ThreadGroup_parent;
diff --git a/runtime/zip_archive.cc b/runtime/zip_archive.cc
index 9daaf8e..d96fb42 100644
--- a/runtime/zip_archive.cc
+++ b/runtime/zip_archive.cc
@@ -133,4 +133,8 @@
   return new ZipEntry(handle_, zip_entry.release());
 }
 
+ZipArchive::~ZipArchive() {
+  CloseArchive(handle_);
+}
+
 }  // namespace art
diff --git a/runtime/zip_archive.h b/runtime/zip_archive.h
index 717eb8c..42bf55c 100644
--- a/runtime/zip_archive.h
+++ b/runtime/zip_archive.h
@@ -63,9 +63,7 @@
 
   ZipEntry* Find(const char* name, std::string* error_msg) const;
 
-  ~ZipArchive() {
-    CloseArchive(handle_);
-  }
+  ~ZipArchive();
 
  private:
   explicit ZipArchive(ZipArchiveHandle handle) : handle_(handle) {}
diff --git a/runtime/zip_archive_test.cc b/runtime/zip_archive_test.cc
index aded30c..4fc7ee2 100644
--- a/runtime/zip_archive_test.cc
+++ b/runtime/zip_archive_test.cc
@@ -32,7 +32,7 @@
 
 TEST_F(ZipArchiveTest, FindAndExtract) {
   std::string error_msg;
-  std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(GetLibCoreDexFileName().c_str(), &error_msg));
+  std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(GetLibCoreDexFileNames()[0].c_str(), &error_msg));
   ASSERT_TRUE(zip_archive.get() != nullptr) << error_msg;
   ASSERT_TRUE(error_msg.empty());
   std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find("classes.dex", &error_msg));
diff --git a/sigchainlib/Android.mk b/sigchainlib/Android.mk
index b9e37a1..e1120e4 100644
--- a/sigchainlib/Android.mk
+++ b/sigchainlib/Android.mk
@@ -24,7 +24,7 @@
 LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
 LOCAL_ASFLAGS += $(ART_TARGET_ASFLAGS)
 LOCAL_SRC_FILES := sigchain_dummy.cc
-LOCAL_CLANG = $(ART_TARGET_CLANG)
+LOCAL_CLANG := $(ART_TARGET_CLANG)
 LOCAL_MODULE:= libsigchain
 LOCAL_SHARED_LIBRARIES := liblog
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
@@ -39,7 +39,7 @@
 LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
 LOCAL_ASFLAGS += $(ART_TARGET_ASFLAGS)
 LOCAL_SRC_FILES := sigchain.cc
-LOCAL_CLANG = $(ART_TARGET_CLANG)
+LOCAL_CLANG := $(ART_TARGET_CLANG)
 LOCAL_MODULE:= libsigchain
 LOCAL_SHARED_LIBRARIES := liblog
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
@@ -54,11 +54,11 @@
 LOCAL_IS_HOST_MODULE := true
 LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
 LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
-LOCAL_CLANG = $(ART_HOST_CLANG)
+LOCAL_CLANG := $(ART_HOST_CLANG)
 LOCAL_SRC_FILES := sigchain_dummy.cc
 LOCAL_MODULE:= libsigchain
 LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
-LOCAL_LDLIBS = -ldl
+LOCAL_LDLIBS := -ldl
 LOCAL_MULTILIB := both
 LOCAL_NATIVE_COVERAGE := $(ART_COVERAGE)
 include $(BUILD_HOST_SHARED_LIBRARY)
@@ -69,11 +69,11 @@
 LOCAL_IS_HOST_MODULE := true
 LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
 LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
-LOCAL_CLANG = $(ART_HOST_CLANG)
+LOCAL_CLANG := $(ART_HOST_CLANG)
 LOCAL_SRC_FILES := sigchain.cc
 LOCAL_MODULE:= libsigchain
 LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
-LOCAL_LDLIBS = -ldl
+LOCAL_LDLIBS := -ldl
 LOCAL_MULTILIB := both
 include $(BUILD_HOST_STATIC_LIBRARY)
 
@@ -85,10 +85,10 @@
 LOCAL_MODULE_TAGS := optional
 LOCAL_IS_HOST_MODULE := true
 LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
-LOCAL_CLANG = $(ART_HOST_CLANG)
+LOCAL_CLANG := $(ART_HOST_CLANG)
 LOCAL_SRC_FILES := sigchain_dummy.cc
 LOCAL_MODULE:= libsigchain_dummy
 LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
-LOCAL_LDLIBS = -ldl
+LOCAL_LDLIBS := -ldl
 LOCAL_MULTILIB := both
 include $(BUILD_HOST_STATIC_LIBRARY)
diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc
index b76555b..c1efecd 100644
--- a/sigchainlib/sigchain.cc
+++ b/sigchainlib/sigchain.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include <android/log.h>
 #else
 #include <stdarg.h>
@@ -103,7 +103,7 @@
   va_list ap;
   va_start(ap, format);
   vsnprintf(buf, sizeof(buf), format, ap);
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   __android_log_write(ANDROID_LOG_ERROR, "libsigchain", buf);
 #else
   std::cout << buf << "\n";
@@ -233,7 +233,7 @@
   return linked_sigaction(signal, new_action, old_action);
 }
 
-extern "C" sighandler_t signal(int signal, sighandler_t handler) {
+static sighandler_t signal_impl(int signal, sighandler_t handler) {
   struct sigaction sa;
   sigemptyset(&sa.sa_mask);
   sa.sa_handler = handler;
@@ -272,6 +272,16 @@
   return reinterpret_cast<sighandler_t>(sa.sa_handler);
 }
 
+extern "C" sighandler_t signal(int signal, sighandler_t handler) {
+  return signal_impl(signal, handler);
+}
+
+#if !defined(__LP64__)
+extern "C" sighandler_t bsd_signal(int signal, sighandler_t handler) {
+  return signal_impl(signal, handler);
+}
+#endif
+
 extern "C" int sigprocmask(int how, const sigset_t* bionic_new_set, sigset_t* bionic_old_set) {
   const sigset_t* new_set_ptr = bionic_new_set;
   sigset_t tmpset;
diff --git a/sigchainlib/sigchain_dummy.cc b/sigchainlib/sigchain_dummy.cc
index dfe0c6f..aa3c360 100644
--- a/sigchainlib/sigchain_dummy.cc
+++ b/sigchainlib/sigchain_dummy.cc
@@ -17,7 +17,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include <android/log.h>
 #else
 #include <stdarg.h>
@@ -38,7 +38,7 @@
   va_list ap;
   va_start(ap, format);
   vsnprintf(buf, sizeof(buf), format, ap);
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   __android_log_write(ANDROID_LOG_ERROR, "libsigchain", buf);
 #else
   std::cout << buf << "\n";
diff --git a/sigchainlib/version-script32.txt b/sigchainlib/version-script32.txt
new file mode 100644
index 0000000..eec9103
--- /dev/null
+++ b/sigchainlib/version-script32.txt
@@ -0,0 +1,15 @@
+{
+global:
+  ClaimSignalChain;
+  UnclaimSignalChain;
+  InvokeUserSignalHandler;
+  InitializeSignalChain;
+  EnsureFrontOfChain;
+  SetSpecialSignalHandlerFn;
+  bsd_signal;
+  sigaction;
+  signal;
+  sigprocmask;
+local:
+  *;
+};
diff --git a/sigchainlib/version-script.txt b/sigchainlib/version-script64.txt
similarity index 100%
rename from sigchainlib/version-script.txt
rename to sigchainlib/version-script64.txt
diff --git a/test/003-omnibus-opcodes/build b/test/003-omnibus-opcodes/build
index faa2983..56e8784 100644
--- a/test/003-omnibus-opcodes/build
+++ b/test/003-omnibus-opcodes/build
@@ -23,8 +23,8 @@
 ${JAVAC} -d classes `find src2 -name '*.java'`
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   ${DX} -JXmx256m --debug --dex --output=classes.dex classes
   fi
diff --git a/test/003-omnibus-opcodes/expected.txt b/test/003-omnibus-opcodes/expected.txt
index b591a7a..ee25ec1 100644
--- a/test/003-omnibus-opcodes/expected.txt
+++ b/test/003-omnibus-opcodes/expected.txt
@@ -31,15 +31,7 @@
 FloatMath.checkConvI
 FloatMath.checkConvL
 FloatMath.checkConvF
- 0: -2.0054409E9
- 1: -8.613303E18
- 2: -3.1415927
--2.0054409E9, -8.6133031E18, -3.1415927
 FloatMath.checkConvD
- 0: -2.005440939E9
- 1: -8.613303245920329E18
- 2: 123.45600128173828
--2.005440939E9, -8.6133032459203287E18, 123.4560012817382
 FloatMath.checkConsts
 FloatMath.jlmTests
 IntMath.testIntCompare
diff --git a/test/003-omnibus-opcodes/src/FloatMath.java b/test/003-omnibus-opcodes/src/FloatMath.java
index a0bc9f4..fcdb4fe 100644
--- a/test/003-omnibus-opcodes/src/FloatMath.java
+++ b/test/003-omnibus-opcodes/src/FloatMath.java
@@ -135,7 +135,8 @@
     static float[] floatOperTest(float x, float y) {
         System.out.println("FloatMath.floatOperTest");
 
-        float[] results = new float[9];
+        float[] results = new float[10];
+        float tmp;
 
         /* this seems to generate "op-float" instructions */
         results[0] = x + y;
@@ -145,7 +146,21 @@
         results[4] = x % -y;
 
         /* this seems to generate "op-float/2addr" instructions */
-        results[8] = x + (((((x + y) - y) * y) / y) % y);
+        tmp = x;
+        tmp += y;
+        results[5] = tmp;
+        tmp = x;
+        tmp -= y;
+        results[6] = tmp;
+        tmp = x;
+        tmp *= y;
+        results[7] = tmp;
+        tmp = x;
+        tmp /= y;
+        results[8] = tmp;
+        tmp = x;
+        tmp %= -y;
+        results[9] = tmp;
 
         return results;
     }
@@ -155,7 +170,11 @@
         Main.assertTrue(results[2] > -210000.01f && results[2] < -209999.99f);
         Main.assertTrue(results[3] > -23333.34f && results[3] < -23333.32f);
         Main.assertTrue(results[4] > 0.999f && results[4] < 1.001f);
-        Main.assertTrue(results[8] > 70000.99f && results[8] < 70001.01f);
+        Main.assertTrue(results[5] > 69996.99f && results[5] < 69997.01f);
+        Main.assertTrue(results[6] > 70002.99f && results[6] < 70003.01f);
+        Main.assertTrue(results[7] > -210000.01f && results[7] < -209999.99f);
+        Main.assertTrue(results[8] > -23333.34f && results[8] < -23333.32f);
+        Main.assertTrue(results[9] > 0.999f && results[9] < 1.001f);
     }
 
     /*
@@ -165,7 +184,8 @@
     static double[] doubleOperTest(double x, double y) {
         System.out.println("FloatMath.doubleOperTest");
 
-        double[] results = new double[9];
+        double[] results = new double[10];
+        double tmp;
 
         /* this seems to generate "op-double" instructions */
         results[0] = x + y;
@@ -175,7 +195,21 @@
         results[4] = x % -y;
 
         /* this seems to generate "op-double/2addr" instructions */
-        results[8] = x + (((((x + y) - y) * y) / y) % y);
+        tmp = x;
+        tmp += y;
+        results[5] = tmp;
+        tmp = x;
+        tmp -= y;
+        results[6] = tmp;
+        tmp = x;
+        tmp *= y;
+        results[7] = tmp;
+        tmp = x;
+        tmp /= y;
+        results[8] = tmp;
+        tmp = x;
+        tmp %= -y;
+        results[9] = tmp;
 
         return results;
     }
@@ -185,7 +219,11 @@
         Main.assertTrue(results[2] > -210000.01 && results[2] < -209999.99);
         Main.assertTrue(results[3] > -23333.34 && results[3] < -23333.32);
         Main.assertTrue(results[4] > 0.999 && results[4] < 1.001);
-        Main.assertTrue(results[8] > 70000.99 && results[8] < 70001.01);
+        Main.assertTrue(results[5] > 69996.99 && results[5] < 69997.01);
+        Main.assertTrue(results[6] > 70002.99 && results[6] < 70003.01);
+        Main.assertTrue(results[7] > -210000.01 && results[7] < -209999.99);
+        Main.assertTrue(results[8] > -23333.34 && results[8] < -23333.32);
+        Main.assertTrue(results[9] > 0.999 && results[9] < 1.001);
     }
 
     /*
@@ -245,10 +283,9 @@
     }
     static void checkConvF(float[] results) {
         System.out.println("FloatMath.checkConvF");
-        // TODO: Main.assertTrue values
-        for (int i = 0; i < results.length; i++)
-            System.out.println(" " + i + ": " + results[i]);
-        System.out.println("-2.0054409E9, -8.6133031E18, -3.1415927");
+        Main.assertTrue(results[0] == -2.0054409E9f);
+        Main.assertTrue(results[1] == -8.613303E18f);
+        Main.assertTrue(results[2] == -3.1415927f);
     }
 
     static double[] convD(int i, long l, float f) {
@@ -260,10 +297,9 @@
     }
     static void checkConvD(double[] results) {
         System.out.println("FloatMath.checkConvD");
-        // TODO: Main.assertTrue values
-        for (int i = 0; i < results.length; i++)
-            System.out.println(" " + i + ": " + results[i]);
-        System.out.println("-2.005440939E9, -8.6133032459203287E18, 123.4560012817382");
+        Main.assertTrue(results[0] == -2.005440939E9);
+        Main.assertTrue(results[1] == -8.6133032459203287E18);
+        Main.assertTrue(results[2] == 123.45600128173828);
     }
 
     static void checkConsts() {
diff --git a/test/004-JniTest/build b/test/004-JniTest/build
new file mode 100755
index 0000000..e8e9f31
--- /dev/null
+++ b/test/004-JniTest/build
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Make us exit on a failure.
+#
+set -e
+
+# Hard-wired use of experimental jack.
+# TODO: fix this temporary work-around for lambdas, see b/19467889
+export USE_JACK=true
+# export JACK_SERVER=false
+# export JACK_REPOSITORY="${ANDROID_BUILD_TOP}/prebuilts/sdk/tools/jacks"
+
+# e.g. /foo/bar/jack-3.10.ALPHA.jar -> 3.10.ALPHA
+# export JACK_VERSION="$(find "$JACK_REPOSITORY" -name '*ALPHA*' | sed 's/.*jack-//g' | sed 's/[.]jar//g')"
+./default-build "$@" --experimental lambdas
diff --git a/test/004-JniTest/expected.txt b/test/004-JniTest/expected.txt
index 86ab37e..f7e404d 100644
--- a/test/004-JniTest/expected.txt
+++ b/test/004-JniTest/expected.txt
@@ -28,3 +28,33 @@
 RUNNING sub object, sub class, sub nonstatic
 Subclass.nonstaticMethod
 PASSED sub object, sub class, sub nonstatic
+Calling method ConcreteClass->JniCallNonOverridenDefaultMethod on object of type ConcreteClass
+DefaultInterface.JniCallNonOverridenDefaultMethod
+Calling method ConcreteClass->JniCallOverridenDefaultMethod on object of type ConcreteClass
+ConcreteClass.JniCallOverridenDefaultMethod
+Calling method ConcreteClass->JniCallOverridenDefaultMethodWithSuper on object of type ConcreteClass
+ConcreteClass.JniCallOverridenDefaultMethodWithSuper
+DefaultInterface.JniCallOverridenDefaultMethod
+Calling method ConcreteClass->JniCallOverridenAbstractMethod on object of type ConcreteClass
+ConcreteClass.JniCallOverridenAbstractMethod
+Calling method ConcreteClass->JniCallConflictDefaultMethod on object of type ConcreteClass
+EXCEPTION OCCURED: java.lang.IncompatibleClassChangeError: Conflicting default method implementations void ConflictInterface.JniCallConflictDefaultMethod()
+Calling method ConcreteClass->JniCallSoftConflictMethod on object of type ConcreteClass
+DefaultInterface.JniCallSoftConflictMethod
+Calling method DefaultInterface->JniCallNonOverridenDefaultMethod on object of type ConcreteClass
+DefaultInterface.JniCallNonOverridenDefaultMethod
+Calling method DefaultInterface->JniCallOverridenDefaultMethod on object of type ConcreteClass
+ConcreteClass.JniCallOverridenDefaultMethod
+Calling method DefaultInterface->JniCallOverridenAbstractMethod on object of type ConcreteClass
+ConcreteClass.JniCallOverridenAbstractMethod
+Calling method DefaultInterface->JniCallConflictDefaultMethod on object of type ConcreteClass
+EXCEPTION OCCURED: java.lang.IncompatibleClassChangeError: Conflicting default method implementations void ConflictInterface.JniCallConflictDefaultMethod()
+Calling method DefaultInterface->JniCallSoftConflictMethod on object of type ConcreteClass
+DefaultInterface.JniCallSoftConflictMethod
+Calling method AbstractInterface->JniCallSoftConflictMethod on object of type ConcreteClass
+DefaultInterface.JniCallSoftConflictMethod
+Calling method ConflictInterface->JniCallConflictDefaultMethod on object of type ConcreteClass
+EXCEPTION OCCURED: java.lang.IncompatibleClassChangeError: Conflicting default method implementations void ConflictInterface.JniCallConflictDefaultMethod()
+hi-lambda: λ
+hi-default δλ
+hi-default δλ
diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc
index be7888b..bb18a70 100644
--- a/test/004-JniTest/jni_test.cc
+++ b/test/004-JniTest/jni_test.cc
@@ -14,49 +14,55 @@
  * limitations under the License.
  */
 
-#include <assert.h>
 #include <iostream>
 #include <pthread.h>
 #include <stdio.h>
 #include <vector>
 
+#include "art_method-inl.h"
+#include "base/logging.h"
 #include "jni.h"
 
-#if defined(NDEBUG)
-#error test code compiled without NDEBUG
-#endif
+namespace art {
 
 static JavaVM* jvm = nullptr;
 
+static jint Java_Main_intFastNativeMethod(JNIEnv*, jclass, jint a, jint b, jint c);
+
+static JNINativeMethod sMainMethods[] = {
+  {"intFastNativeMethod", "(III)I", reinterpret_cast<void*>(Java_Main_intFastNativeMethod) }
+};
+
 extern "C" JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void*) {
-  assert(vm != nullptr);
-  assert(jvm == nullptr);
+  CHECK(vm != nullptr);
+  CHECK(jvm == nullptr);
   jvm = vm;
   std::cout << "JNI_OnLoad called" << std::endl;
+
   return JNI_VERSION_1_6;
 }
 
 extern "C" JNIEXPORT void JNI_OnUnload(JavaVM*, void*) {
   // std::cout since LOG(INFO) adds extra stuff like pid.
   std::cout << "JNI_OnUnload called" << std::endl;
-  // Clear jvm for assert in test 004-JniTest.
+  // Clear jvm for CHECK in test 004-JniTest.
   jvm = nullptr;
 }
 
 static void* AttachHelper(void* arg) {
-  assert(jvm != nullptr);
+  CHECK(jvm != nullptr);
 
   JNIEnv* env = nullptr;
   JavaVMAttachArgs args = { JNI_VERSION_1_6, __FUNCTION__, nullptr };
   int attach_result = jvm->AttachCurrentThread(&env, &args);
-  assert(attach_result == 0);
+  CHECK_EQ(attach_result, 0);
 
   typedef void (*Fn)(JNIEnv*);
   Fn fn = reinterpret_cast<Fn>(arg);
   fn(env);
 
   int detach_result = jvm->DetachCurrentThread();
-  assert(detach_result == 0);
+  CHECK_EQ(detach_result, 0);
   return nullptr;
 }
 
@@ -64,19 +70,19 @@
   pthread_t pthread;
   int pthread_create_result = pthread_create(&pthread, nullptr, AttachHelper,
                                              reinterpret_cast<void*>(fn));
-  assert(pthread_create_result == 0);
+  CHECK_EQ(pthread_create_result, 0);
   int pthread_join_result = pthread_join(pthread, nullptr);
-  assert(pthread_join_result == 0);
+  CHECK_EQ(pthread_join_result, 0);
 }
 
 static void testFindClassOnAttachedNativeThread(JNIEnv* env) {
   jclass clazz = env->FindClass("Main");
-  assert(clazz != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(clazz != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jobjectArray array = env->NewObjectArray(0, clazz, nullptr);
-  assert(array != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(array != nullptr);
+  CHECK(!env->ExceptionCheck());
 }
 
 // http://b/10994325
@@ -86,12 +92,12 @@
 
 static void testFindFieldOnAttachedNativeThread(JNIEnv* env) {
   jclass clazz = env->FindClass("Main");
-  assert(clazz != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(clazz != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jfieldID field = env->GetStaticFieldID(clazz, "testFindFieldOnAttachedNativeThreadField", "Z");
-  assert(field != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(field != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   env->SetStaticBooleanField(clazz, field, JNI_TRUE);
 }
@@ -103,38 +109,38 @@
 
 static void testReflectFieldGetFromAttachedNativeThread(JNIEnv* env) {
   jclass clazz = env->FindClass("Main");
-  assert(clazz != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(clazz != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jclass class_clazz = env->FindClass("java/lang/Class");
-  assert(class_clazz != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(class_clazz != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jmethodID getFieldMetodId = env->GetMethodID(class_clazz, "getField",
                                                "(Ljava/lang/String;)Ljava/lang/reflect/Field;");
-  assert(getFieldMetodId != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(getFieldMetodId != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jstring field_name = env->NewStringUTF("testReflectFieldGetFromAttachedNativeThreadField");
-  assert(field_name != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(field_name != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jobject field = env->CallObjectMethod(clazz, getFieldMetodId, field_name);
-  assert(field != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(field != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jclass field_clazz = env->FindClass("java/lang/reflect/Field");
-  assert(field_clazz != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(field_clazz != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jmethodID getBooleanMetodId = env->GetMethodID(field_clazz, "getBoolean",
                                                  "(Ljava/lang/Object;)Z");
-  assert(getBooleanMetodId != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(getBooleanMetodId != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jboolean value = env->CallBooleanMethod(field, getBooleanMetodId, /* ignored */ clazz);
-  assert(value == false);
-  assert(!env->ExceptionCheck());
+  CHECK(value == false);
+  CHECK(!env->ExceptionCheck());
 }
 
 // http://b/15539150
@@ -148,22 +154,22 @@
 extern "C" JNIEXPORT void JNICALL Java_Main_testCallStaticVoidMethodOnSubClassNative(JNIEnv* env,
                                                                                      jclass) {
   jclass super_class = env->FindClass("Main$testCallStaticVoidMethodOnSubClass_SuperClass");
-  assert(super_class != nullptr);
+  CHECK(super_class != nullptr);
 
   jmethodID execute = env->GetStaticMethodID(super_class, "execute", "()V");
-  assert(execute != nullptr);
+  CHECK(execute != nullptr);
 
   jclass sub_class = env->FindClass("Main$testCallStaticVoidMethodOnSubClass_SubClass");
-  assert(sub_class != nullptr);
+  CHECK(sub_class != nullptr);
 
   env->CallStaticVoidMethod(sub_class, execute);
 }
 
 extern "C" JNIEXPORT jobject JNICALL Java_Main_testGetMirandaMethodNative(JNIEnv* env, jclass) {
   jclass abstract_class = env->FindClass("Main$testGetMirandaMethod_MirandaAbstract");
-  assert(abstract_class != nullptr);
+  CHECK(abstract_class != nullptr);
   jmethodID miranda_method = env->GetMethodID(abstract_class, "inInterface", "()Z");
-  assert(miranda_method != nullptr);
+  CHECK(miranda_method != nullptr);
   return env->ToReflectedMethod(abstract_class, miranda_method, JNI_FALSE);
 }
 
@@ -171,11 +177,11 @@
 extern "C" void JNICALL Java_Main_testZeroLengthByteBuffers(JNIEnv* env, jclass) {
   std::vector<uint8_t> buffer(1);
   jobject byte_buffer = env->NewDirectByteBuffer(&buffer[0], 0);
-  assert(byte_buffer != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(byte_buffer != nullptr);
+  CHECK(!env->ExceptionCheck());
 
-  assert(env->GetDirectBufferAddress(byte_buffer) == &buffer[0]);
-  assert(env->GetDirectBufferCapacity(byte_buffer) == 0);
+  CHECK_EQ(env->GetDirectBufferAddress(byte_buffer), &buffer[0]);
+  CHECK_EQ(env->GetDirectBufferCapacity(byte_buffer), 0);
 }
 
 constexpr size_t kByteReturnSize = 7;
@@ -185,18 +191,18 @@
                                               jbyte b3, jbyte b4, jbyte b5, jbyte b6,
                                               jbyte b7, jbyte b8, jbyte b9, jbyte b10) {
   // We use b1 to drive the output.
-  assert(b2 == 2);
-  assert(b3 == -3);
-  assert(b4 == 4);
-  assert(b5 == -5);
-  assert(b6 == 6);
-  assert(b7 == -7);
-  assert(b8 == 8);
-  assert(b9 == -9);
-  assert(b10 == 10);
+  CHECK_EQ(b2, 2);
+  CHECK_EQ(b3, -3);
+  CHECK_EQ(b4, 4);
+  CHECK_EQ(b5, -5);
+  CHECK_EQ(b6, 6);
+  CHECK_EQ(b7, -7);
+  CHECK_EQ(b8, 8);
+  CHECK_EQ(b9, -9);
+  CHECK_EQ(b10, 10);
 
-  assert(0 <= b1);
-  assert(b1 < static_cast<jbyte>(kByteReturnSize));
+  CHECK_LE(0, b1);
+  CHECK_LT(b1, static_cast<jbyte>(kByteReturnSize));
 
   return byte_returns[b1];
 }
@@ -210,18 +216,18 @@
                                                 jshort s3, jshort s4, jshort s5, jshort s6,
                                                 jshort s7, jshort s8, jshort s9, jshort s10) {
   // We use s1 to drive the output.
-  assert(s2 == 2);
-  assert(s3 == -3);
-  assert(s4 == 4);
-  assert(s5 == -5);
-  assert(s6 == 6);
-  assert(s7 == -7);
-  assert(s8 == 8);
-  assert(s9 == -9);
-  assert(s10 == 10);
+  CHECK_EQ(s2, 2);
+  CHECK_EQ(s3, -3);
+  CHECK_EQ(s4, 4);
+  CHECK_EQ(s5, -5);
+  CHECK_EQ(s6, 6);
+  CHECK_EQ(s7, -7);
+  CHECK_EQ(s8, 8);
+  CHECK_EQ(s9, -9);
+  CHECK_EQ(s10, 10);
 
-  assert(0 <= s1);
-  assert(s1 < static_cast<jshort>(kShortReturnSize));
+  CHECK_LE(0, s1);
+  CHECK_LT(s1, static_cast<jshort>(kShortReturnSize));
 
   return short_returns[s1];
 }
@@ -231,17 +237,17 @@
                                                     jboolean b5, jboolean b6, jboolean b7,
                                                     jboolean b8, jboolean b9, jboolean b10) {
   // We use b1 to drive the output.
-  assert(b2 == JNI_TRUE);
-  assert(b3 == JNI_FALSE);
-  assert(b4 == JNI_TRUE);
-  assert(b5 == JNI_FALSE);
-  assert(b6 == JNI_TRUE);
-  assert(b7 == JNI_FALSE);
-  assert(b8 == JNI_TRUE);
-  assert(b9 == JNI_FALSE);
-  assert(b10 == JNI_TRUE);
+  CHECK_EQ(b2, JNI_TRUE);
+  CHECK_EQ(b3, JNI_FALSE);
+  CHECK_EQ(b4, JNI_TRUE);
+  CHECK_EQ(b5, JNI_FALSE);
+  CHECK_EQ(b6, JNI_TRUE);
+  CHECK_EQ(b7, JNI_FALSE);
+  CHECK_EQ(b8, JNI_TRUE);
+  CHECK_EQ(b9, JNI_FALSE);
+  CHECK_EQ(b10, JNI_TRUE);
 
-  assert(b1 == JNI_TRUE || b1 == JNI_FALSE);
+  CHECK(b1 == JNI_TRUE || b1 == JNI_FALSE);
   return b1;
 }
 
@@ -252,17 +258,17 @@
                                               jchar c3, jchar c4, jchar c5, jchar c6, jchar c7,
                                               jchar c8, jchar c9, jchar c10) {
   // We use c1 to drive the output.
-  assert(c2 == 'a');
-  assert(c3 == 'b');
-  assert(c4 == 'c');
-  assert(c5 == '0');
-  assert(c6 == '1');
-  assert(c7 == '2');
-  assert(c8 == 1234);
-  assert(c9 == 2345);
-  assert(c10 == 3456);
+  CHECK_EQ(c2, 'a');
+  CHECK_EQ(c3, 'b');
+  CHECK_EQ(c4, 'c');
+  CHECK_EQ(c5, '0');
+  CHECK_EQ(c6, '1');
+  CHECK_EQ(c7, '2');
+  CHECK_EQ(c8, 1234);
+  CHECK_EQ(c9, 2345);
+  CHECK_EQ(c10, 3456);
 
-  assert(c1 < static_cast<jchar>(kCharReturnSize));
+  CHECK_LT(c1, static_cast<jchar>(kCharReturnSize));
 
   return char_returns[c1];
 }
@@ -281,39 +287,39 @@
   // Test direct call.
   {
     jclass vmstack_clazz = env->FindClass("dalvik/system/VMStack");
-    assert(vmstack_clazz != nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(vmstack_clazz != nullptr);
+    CHECK(!env->ExceptionCheck());
 
     jmethodID getCallingClassLoaderMethodId = env->GetStaticMethodID(vmstack_clazz,
                                                                      "getCallingClassLoader",
                                                                      "()Ljava/lang/ClassLoader;");
-    assert(getCallingClassLoaderMethodId != nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(getCallingClassLoaderMethodId != nullptr);
+    CHECK(!env->ExceptionCheck());
 
     jobject class_loader = env->CallStaticObjectMethod(vmstack_clazz,
                                                        getCallingClassLoaderMethodId);
-    assert(class_loader == nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(class_loader == nullptr);
+    CHECK(!env->ExceptionCheck());
   }
 
   // Test one-level call. Use System.loadLibrary().
   {
     jclass system_clazz = env->FindClass("java/lang/System");
-    assert(system_clazz != nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(system_clazz != nullptr);
+    CHECK(!env->ExceptionCheck());
 
     jmethodID loadLibraryMethodId = env->GetStaticMethodID(system_clazz, "loadLibrary",
                                                            "(Ljava/lang/String;)V");
-    assert(loadLibraryMethodId != nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(loadLibraryMethodId != nullptr);
+    CHECK(!env->ExceptionCheck());
 
     // Create a string object.
     jobject library_string = env->NewStringUTF("non_existing_library");
-    assert(library_string != nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(library_string != nullptr);
+    CHECK(!env->ExceptionCheck());
 
     env->CallStaticVoidMethod(system_clazz, loadLibraryMethodId, library_string);
-    assert(env->ExceptionCheck());
+    CHECK(env->ExceptionCheck());
 
     // We expect UnsatisfiedLinkError.
     jthrowable thrown = env->ExceptionOccurred();
@@ -321,7 +327,7 @@
 
     jclass unsatisfied_link_error_clazz = env->FindClass("java/lang/UnsatisfiedLinkError");
     jclass thrown_class = env->GetObjectClass(thrown);
-    assert(env->IsSameObject(unsatisfied_link_error_clazz, thrown_class));
+    CHECK(env->IsSameObject(unsatisfied_link_error_clazz, thrown_class));
   }
 }
 
@@ -333,31 +339,31 @@
 
 static void testShallowGetStackClass2(JNIEnv* env) {
   jclass vmstack_clazz = env->FindClass("dalvik/system/VMStack");
-  assert(vmstack_clazz != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(vmstack_clazz != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   // Test direct call.
   {
     jmethodID getStackClass2MethodId = env->GetStaticMethodID(vmstack_clazz, "getStackClass2",
                                                               "()Ljava/lang/Class;");
-    assert(getStackClass2MethodId != nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(getStackClass2MethodId != nullptr);
+    CHECK(!env->ExceptionCheck());
 
     jobject caller_class = env->CallStaticObjectMethod(vmstack_clazz, getStackClass2MethodId);
-    assert(caller_class == nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(caller_class == nullptr);
+    CHECK(!env->ExceptionCheck());
   }
 
   // Test one-level call. Use VMStack.getStackClass1().
   {
     jmethodID getStackClass1MethodId = env->GetStaticMethodID(vmstack_clazz, "getStackClass1",
                                                               "()Ljava/lang/Class;");
-    assert(getStackClass1MethodId != nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(getStackClass1MethodId != nullptr);
+    CHECK(!env->ExceptionCheck());
 
     jobject caller_class = env->CallStaticObjectMethod(vmstack_clazz, getStackClass1MethodId);
-    assert(caller_class == nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(caller_class == nullptr);
+    CHECK(!env->ExceptionCheck());
   }
 
   // For better testing we would need to compile against libcore and have a two-deep stack
@@ -416,8 +422,8 @@
       env_->ExceptionDescribe();
       env_->FatalError(__FUNCTION__);
     }
-    assert(!env_->ExceptionCheck());
-    assert(c != nullptr);
+    CHECK(!env_->ExceptionCheck());
+    CHECK(c != nullptr);
     return c;
   }
 
@@ -429,7 +435,7 @@
       env_->ExceptionDescribe();
       env_->FatalError(__FUNCTION__);
     }
-    assert(m != nullptr);
+    CHECK(m != nullptr);
     return m;
   }
 
@@ -439,7 +445,7 @@
       env_->ExceptionDescribe();
       env_->FatalError(__FUNCTION__);
     }
-    assert(o != nullptr);
+    CHECK(o != nullptr);
     return o;
   }
 
@@ -467,7 +473,7 @@
       env_->ExceptionDescribe();
       env_->FatalError(__FUNCTION__);
     }
-    assert(m != nullptr);
+    CHECK(m != nullptr);
     return m;
   }
 
@@ -508,21 +514,21 @@
     jobject sub_super = CallConstructor(sub_, super_constructor_);
     jobject sub_sub = CallConstructor(sub_, sub_constructor_);
 
-    assert(env_->IsInstanceOf(super_super, super_));
-    assert(!env_->IsInstanceOf(super_super, sub_));
+    CHECK(env_->IsInstanceOf(super_super, super_));
+    CHECK(!env_->IsInstanceOf(super_super, sub_));
 
     // Note that even though we called (and ran) the subclass
     // constructor, we are not the subclass.
-    assert(env_->IsInstanceOf(super_sub, super_));
-    assert(!env_->IsInstanceOf(super_sub, sub_));
+    CHECK(env_->IsInstanceOf(super_sub, super_));
+    CHECK(!env_->IsInstanceOf(super_sub, sub_));
 
     // Note that even though we called the superclass constructor, we
     // are still the subclass.
-    assert(env_->IsInstanceOf(sub_super, super_));
-    assert(env_->IsInstanceOf(sub_super, sub_));
+    CHECK(env_->IsInstanceOf(sub_super, super_));
+    CHECK(env_->IsInstanceOf(sub_super, sub_));
 
-    assert(env_->IsInstanceOf(sub_sub, super_));
-    assert(env_->IsInstanceOf(sub_sub, sub_));
+    CHECK(env_->IsInstanceOf(sub_sub, super_));
+    CHECK(env_->IsInstanceOf(sub_sub, sub_));
   }
 
   void TestnonstaticCallNonvirtualMethod(bool super_object, bool super_class, bool super_method, const char* test_case) {
@@ -542,8 +548,8 @@
     CallMethod(o, c, m, true, test_case);
     jboolean super_field = GetBooleanField(o, super_field_);
     jboolean sub_field = GetBooleanField(o, sub_field_);
-    assert(super_field == super_method);
-    assert(sub_field != super_method);
+    CHECK_EQ(super_field, super_method);
+    CHECK_NE(sub_field, super_method);
   }
 
   void TestnonstaticCallNonvirtualMethod() {
@@ -565,20 +571,20 @@
 
 extern "C" JNIEXPORT void JNICALL Java_Main_testNewStringObject(JNIEnv* env, jclass) {
   jclass c = env->FindClass("java/lang/String");
-  assert(c != nullptr);
+  CHECK(c != nullptr);
 
   jmethodID mid1 = env->GetMethodID(c, "<init>", "()V");
-  assert(mid1 != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(mid1 != nullptr);
+  CHECK(!env->ExceptionCheck());
   jmethodID mid2 = env->GetMethodID(c, "<init>", "([B)V");
-  assert(mid2 != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(mid2 != nullptr);
+  CHECK(!env->ExceptionCheck());
   jmethodID mid3 = env->GetMethodID(c, "<init>", "([C)V");
-  assert(mid3 != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(mid3 != nullptr);
+  CHECK(!env->ExceptionCheck());
   jmethodID mid4 = env->GetMethodID(c, "<init>", "(Ljava/lang/String;)V");
-  assert(mid4 != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(mid4 != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   const char* test_array = "Test";
   int byte_array_length = strlen(test_array);
@@ -587,22 +593,22 @@
 
   // Test NewObject
   jstring s = reinterpret_cast<jstring>(env->NewObject(c, mid2, byte_array));
-  assert(s != nullptr);
-  assert(env->GetStringLength(s) == byte_array_length);
-  assert(env->GetStringUTFLength(s) == byte_array_length);
+  CHECK(s != nullptr);
+  CHECK_EQ(env->GetStringLength(s), byte_array_length);
+  CHECK_EQ(env->GetStringUTFLength(s), byte_array_length);
   const char* chars = env->GetStringUTFChars(s, nullptr);
-  assert(strcmp(test_array, chars) == 0);
+  CHECK_EQ(strcmp(test_array, chars), 0);
   env->ReleaseStringUTFChars(s, chars);
 
   // Test AllocObject and Call(Nonvirtual)VoidMethod
   jstring s1 = reinterpret_cast<jstring>(env->AllocObject(c));
-  assert(s1 != nullptr);
+  CHECK(s1 != nullptr);
   jstring s2 = reinterpret_cast<jstring>(env->AllocObject(c));
-  assert(s2 != nullptr);
+  CHECK(s2 != nullptr);
   jstring s3 = reinterpret_cast<jstring>(env->AllocObject(c));
-  assert(s3 != nullptr);
+  CHECK(s3 != nullptr);
   jstring s4 = reinterpret_cast<jstring>(env->AllocObject(c));
-  assert(s4 != nullptr);
+  CHECK(s4 != nullptr);
 
   jcharArray char_array = env->NewCharArray(5);
   jstring string_arg = env->NewStringUTF("helloworld");
@@ -621,21 +627,144 @@
 
   // Test with global and weak global references
   jstring s5 = reinterpret_cast<jstring>(env->AllocObject(c));
-  assert(s5 != nullptr);
+  CHECK(s5 != nullptr);
   s5 = reinterpret_cast<jstring>(env->NewGlobalRef(s5));
   jstring s6 = reinterpret_cast<jstring>(env->AllocObject(c));
-  assert(s6 != nullptr);
+  CHECK(s6 != nullptr);
   s6 = reinterpret_cast<jstring>(env->NewWeakGlobalRef(s6));
 
   env->CallVoidMethod(s5, mid1);
   env->CallNonvirtualVoidMethod(s6, c, mid2, byte_array);
-  assert(env->GetStringLength(s5) == 0);
-  assert(env->GetStringLength(s6) == byte_array_length);
+  CHECK_EQ(env->GetStringLength(s5), 0);
+  CHECK_EQ(env->GetStringLength(s6), byte_array_length);
   const char* chars6 = env->GetStringUTFChars(s6, nullptr);
-  assert(strcmp(test_array, chars6) == 0);
+  CHECK_EQ(strcmp(test_array, chars6), 0);
   env->ReleaseStringUTFChars(s6, chars6);
 }
 
 extern "C" JNIEXPORT jlong JNICALL Java_Main_testGetMethodID(JNIEnv* env, jclass, jclass c) {
   return reinterpret_cast<jlong>(env->GetMethodID(c, "a", "()V"));
 }
+
+extern "C" JNIEXPORT void JNICALL Java_Main_enterJniCriticalSection(JNIEnv* env, jclass,
+                                                                    jint arraySize,
+                                                                    jbyteArray array0,
+                                                                    jbyteArray array1) {
+  for (int i = 0; i < 50000; ++i) {
+    char* data0 = reinterpret_cast<char*>(env->GetPrimitiveArrayCritical(array0, nullptr));
+    char* data1 = reinterpret_cast<char*>(env->GetPrimitiveArrayCritical(array1, nullptr));
+    bool up = i % 2 == 0;
+    for (int j = 0; j < arraySize; ++j) {
+      if (up) {
+        data1[j] = data0[j] + 1;
+      } else {
+        data0[j] = data1[j] + 1;
+      }
+    }
+    env->ReleasePrimitiveArrayCritical(array1, data1, 0);
+    env->ReleasePrimitiveArrayCritical(array0, data0, 0);
+  }
+}
+
+class JniCallDefaultMethodsTest {
+ public:
+  explicit JniCallDefaultMethodsTest(JNIEnv* env)
+      : env_(env), concrete_class_(env_->FindClass("ConcreteClass")) {
+    CHECK(!env_->ExceptionCheck());
+    CHECK(concrete_class_ != nullptr);
+  }
+
+  void Test() {
+    TestCalls("ConcreteClass", { "JniCallNonOverridenDefaultMethod",
+                                 "JniCallOverridenDefaultMethod",
+                                 "JniCallOverridenDefaultMethodWithSuper",
+                                 "JniCallOverridenAbstractMethod",
+                                 "JniCallConflictDefaultMethod",
+                                 "JniCallSoftConflictMethod" });
+    TestCalls("DefaultInterface", { "JniCallNonOverridenDefaultMethod",
+                                    "JniCallOverridenDefaultMethod",
+                                    "JniCallOverridenAbstractMethod",
+                                    "JniCallConflictDefaultMethod",
+                                    "JniCallSoftConflictMethod" });
+    TestCalls("AbstractInterface", { "JniCallSoftConflictMethod" });
+    TestCalls("ConflictInterface", { "JniCallConflictDefaultMethod" });
+  }
+
+ private:
+  void TestCalls(const char* declaring_class, std::vector<const char*> methods) {
+    jmethodID new_method = env_->GetMethodID(concrete_class_, "<init>", "()V");
+    jobject obj = env_->NewObject(concrete_class_, new_method);
+    CHECK(!env_->ExceptionCheck());
+    CHECK(obj != nullptr);
+    jclass decl_class = env_->FindClass(declaring_class);
+    CHECK(!env_->ExceptionCheck());
+    CHECK(decl_class != nullptr);
+    for (const char* method : methods) {
+      jmethodID method_id = env_->GetMethodID(decl_class, method, "()V");
+      CHECK(!env_->ExceptionCheck());
+      printf("Calling method %s->%s on object of type ConcreteClass\n", declaring_class, method);
+      env_->CallVoidMethod(obj, method_id);
+      if (env_->ExceptionCheck()) {
+        jthrowable thrown = env_->ExceptionOccurred();
+        env_->ExceptionClear();
+        jmethodID to_string = env_->GetMethodID(
+            env_->FindClass("java/lang/Object"), "toString", "()Ljava/lang/String;");
+        jstring exception_string = (jstring) env_->CallObjectMethod(thrown, to_string);
+        CHECK(!env_->ExceptionCheck());
+        const char* exception_string_utf8 = env_->GetStringUTFChars(exception_string, nullptr);
+        CHECK(!env_->ExceptionCheck());
+        CHECK(exception_string_utf8 != nullptr);
+        printf("EXCEPTION OCCURED: %s\n", exception_string_utf8);
+        env_->ReleaseStringUTFChars(exception_string, exception_string_utf8);
+      }
+    }
+  }
+
+  JNIEnv* env_;
+  jclass concrete_class_;
+};
+
+extern "C" JNIEXPORT void JNICALL Java_Main_testCallDefaultMethods(JNIEnv* env) {
+  JniCallDefaultMethodsTest(env).Test();
+}
+
+static void InvokeSpecificMethod(JNIEnv* env, jobject obj, const char* method) {
+  jclass lambda_class = env->FindClass("LambdaInterface");
+  CHECK(!env->ExceptionCheck());
+  CHECK(lambda_class != nullptr);
+  jmethodID method_id = env->GetMethodID(lambda_class, method, "()V");
+  CHECK(!env->ExceptionCheck());
+  env->CallVoidMethod(obj, method_id);
+  CHECK(!env->ExceptionCheck());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_testInvokeLambdaDefaultMethod(
+    JNIEnv* e, jclass, jobject l) {
+  InvokeSpecificMethod(e, l, "sayHiTwice");
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_testInvokeLambdaMethod(JNIEnv* e, jclass, jobject l) {
+  InvokeSpecificMethod(e, l, "sayHi");
+}
+
+// Register on-demand because many tests share this JNI library and
+// we can't unconditionally register them.
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_registerNativesJniTest(JNIEnv* e, jclass kls) {
+  const size_t numMethods = sizeof(sMainMethods)/sizeof(JNINativeMethod);
+
+  if (e->RegisterNatives(kls, sMainMethods, numMethods) < 0) {
+      std::cerr << "RegisterNatives failed for 'Main'" << std::endl;
+      return JNI_FALSE;
+  }
+
+  return JNI_TRUE;
+}
+
+// Annotated with @FastNative in Java code. Doesn't need to be explicitly registered with "!".
+// NOTE: Has to be registered explicitly to avoid mutator lock check failures.
+static jint Java_Main_intFastNativeMethod(JNIEnv*, jclass, jint a, jint b, jint c) {
+  return a + b + c;
+}
+
+}  // namespace art
+
diff --git a/test/004-JniTest/smali/AbstractInterface.smali b/test/004-JniTest/smali/AbstractInterface.smali
new file mode 100644
index 0000000..52b2fc5
--- /dev/null
+++ b/test/004-JniTest/smali/AbstractInterface.smali
@@ -0,0 +1,26 @@
+# /*
+#  * Copyright 2016 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public interface LAbstractInterface;
+.super Ljava/lang/Object;
+
+# public interface AbstractInterface {
+#     public void JniCallSoftConflictMethod();
+# }
+
+.method public abstract JniCallSoftConflictMethod()V
+.end method
+
diff --git a/test/004-JniTest/smali/ConcreteClass.smali b/test/004-JniTest/smali/ConcreteClass.smali
new file mode 100644
index 0000000..a9c072f
--- /dev/null
+++ b/test/004-JniTest/smali/ConcreteClass.smali
@@ -0,0 +1,72 @@
+# /*
+#  * Copyright 2016 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public LConcreteClass;
+.super Ljava/lang/Object;
+.implements LDefaultInterface;
+.implements LConflictInterface;
+.implements LAbstractInterface;
+
+# public class ConcreteClass implements DefaultInterface, ConflictInterface, AbstractInterface {
+#     public void JniCallOverridenAbstractMethod() {
+#         System.out.println("ConcreteClass.JniCallOverridenAbstractMethod");
+#     }
+#
+#     public void JniCallOverridenDefaultMethod() {
+#         System.out.println("ConcreteClass.JniCallOverridenDefaultMethod");
+#     }
+#
+#     public void JniCallOverridenDefaultMethodWithSuper() {
+#         System.out.println("ConcreteClass.JniCallOverridenDefaultMethodWithSuper");
+#         DefaultInterface.super.JniCallOverridenDefaultMethod();
+#     }
+# }
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public JniCallOverridenAbstractMethod()V
+    .locals 2
+
+    const-string v0, "ConcreteClass.JniCallOverridenAbstractMethod"
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
+
+.method public JniCallOverridenDefaultMethod()V
+    .locals 2
+
+    const-string v0, "ConcreteClass.JniCallOverridenDefaultMethod"
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
+
+.method public JniCallOverridenDefaultMethodWithSuper()V
+    .locals 2
+
+    const-string v0, "ConcreteClass.JniCallOverridenDefaultMethodWithSuper"
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-super {p0}, LDefaultInterface;->JniCallOverridenDefaultMethod()V
+
+    return-void
+.end method
diff --git a/test/004-JniTest/smali/ConflictInterface.smali b/test/004-JniTest/smali/ConflictInterface.smali
new file mode 100644
index 0000000..fc3d474
--- /dev/null
+++ b/test/004-JniTest/smali/ConflictInterface.smali
@@ -0,0 +1,35 @@
+# /*
+#  * Copyright 2016 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public interface LConflictInterface;
+.super Ljava/lang/Object;
+
+# public interface ConflictInterface {
+#     public default void JniCallConflictDefaultMethod() {
+#         System.out.println("ConflictInterface.JniCallConflictDefaultMethod");
+#     }
+#
+# }
+
+.method public JniCallConflictDefaultMethod()V
+    .locals 2
+
+    const-string v0, "ConflictInterface.JniCallConflictDefaultMethod"
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
+
diff --git a/test/004-JniTest/smali/DefaultInterface.smali b/test/004-JniTest/smali/DefaultInterface.smali
new file mode 100644
index 0000000..1ee8721
--- /dev/null
+++ b/test/004-JniTest/smali/DefaultInterface.smali
@@ -0,0 +1,77 @@
+# /*
+#  * Copyright 2016 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public interface LDefaultInterface;
+.super Ljava/lang/Object;
+
+# public interface DefaultInterface {
+#     public default void JniCallNonOverridenDefaultMethod() {
+#         System.out.println("DefaultInterface.JniCallNonOverridenDefaultMethod");
+#     }
+#
+#     public default void JniCallOverridenDefaultMethod() {
+#         System.out.println("DefaultInterface.JniCallOverridenDefaultMethod");
+#     }
+#
+#     public void JniCallOverridenAbstractMethod();
+#
+#     public default void JniCallConflictDefaultMethod() {
+#         System.out.println("DefaultInterface.JniCallConflictDefaultMethod");
+#     }
+#
+#     public default void JniCallSoftConflictMethod() {
+#         System.out.println("DefaultInterface.JniCallSoftConflictMethod");
+#     }
+# }
+
+.method public JniCallNonOverridenDefaultMethod()V
+    .locals 2
+
+    const-string v0, "DefaultInterface.JniCallNonOverridenDefaultMethod"
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
+
+.method public JniCallOverridenDefaultMethod()V
+    .locals 2
+
+    const-string v0, "DefaultInterface.JniCallOverridenDefaultMethod"
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
+
+.method public abstract JniCallOverridenAbstractMethod()V
+.end method
+
+.method public JniCallConflictDefaultMethod()V
+    .locals 2
+
+    const-string v0, "DefaultInterface.JniCallConflictDefaultMethod"
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
+
+.method public JniCallSoftConflictMethod()V
+    .locals 2
+
+    const-string v0, "DefaultInterface.JniCallSoftConflictMethod"
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
diff --git a/test/004-JniTest/src/Main.java b/test/004-JniTest/src/Main.java
index ee3a3b9..573afdb 100644
--- a/test/004-JniTest/src/Main.java
+++ b/test/004-JniTest/src/Main.java
@@ -18,6 +18,8 @@
 import java.lang.reflect.Method;
 import java.lang.reflect.Proxy;
 
+import dalvik.annotation.optimization.FastNative;
+
 public class Main {
     public static void main(String[] args) {
         System.loadLibrary(args[0]);
@@ -38,8 +40,21 @@
         testNewStringObject();
         testRemoveLocalObject();
         testProxyGetMethodID();
+        testJniCriticalSectionAndGc();
+        testCallDefaultMethods();
+        String lambda = "λ";
+        testInvokeLambdaMethod(() -> { System.out.println("hi-lambda: " + lambda); });
+        String def = "δ";
+        testInvokeLambdaDefaultMethod(() -> { System.out.println("hi-default " + def + lambda); });
+
+        registerNativesJniTest();
+        testFastNativeMethods();
     }
 
+    private static native boolean registerNativesJniTest();
+
+    private static native void testCallDefaultMethods();
+
     private static native void testFindClassOnAttachedNativeThread();
 
     private static boolean testFindFieldOnAttachedNativeThreadField;
@@ -120,7 +135,7 @@
     private static void testRemoveLocalObject() {
         removeLocalObject(new Object());
     }
-    
+
     private static native short shortMethod(short s1, short s2, short s3, short s4, short s5, short s6, short s7,
         short s8, short s9, short s10);
 
@@ -212,7 +227,7 @@
         InvocationHandler handler = new DummyInvocationHandler();
         SimpleInterface proxy =
                 (SimpleInterface) Proxy.newProxyInstance(SimpleInterface.class.getClassLoader(),
-                        new Class[] {SimpleInterface.class}, handler);
+                        new Class<?>[] {SimpleInterface.class}, handler);
         if (testGetMethodID(SimpleInterface.class) == 0) {
             throw new AssertionError();
         }
@@ -222,6 +237,67 @@
     }
 
     private static native long testGetMethodID(Class<?> c);
+
+    // Exercise GC and JNI critical sections in parallel.
+    private static void testJniCriticalSectionAndGc() {
+        Thread runGcThread = new Thread(new Runnable() {
+            @Override
+            public void run() {
+                for (int i = 0; i < 10; ++i) {
+                    Runtime.getRuntime().gc();
+                }
+            }
+        });
+        Thread jniCriticalThread = new Thread(new Runnable() {
+            @Override
+            public void run() {
+                final int arraySize = 32;
+                byte[] array0 = new byte[arraySize];
+                byte[] array1 = new byte[arraySize];
+                enterJniCriticalSection(arraySize, array0, array1);
+            }
+        });
+        jniCriticalThread.start();
+        runGcThread.start();
+        try {
+            jniCriticalThread.join();
+            runGcThread.join();
+        } catch (InterruptedException ignored) {}
+    }
+
+    private static native void enterJniCriticalSection(int arraySize, byte[] array0, byte[] array);
+
+    private static native void testInvokeLambdaMethod(LambdaInterface iface);
+
+    private static native void testInvokeLambdaDefaultMethod(LambdaInterface iface);
+
+    // Test invoking @FastNative methods works correctly.
+
+    // Return sum of a+b+c.
+    @FastNative
+    static native int intFastNativeMethod(int a, int b, int c);
+
+    private static void testFastNativeMethods() {
+      int returns[] = { 0, 3, 6, 9, 12 };
+      for (int i = 0; i < returns.length; i++) {
+        int result = intFastNativeMethod(i, i, i);
+        if (returns[i] != result) {
+          System.out.println("FastNative Int Run " + i + " with " + returns[i] + " vs " + result);
+          throw new AssertionError();
+        }
+      }
+    }
+
+
+}
+
+@FunctionalInterface
+interface LambdaInterface {
+  public void sayHi();
+  public default void sayHiTwice() {
+    sayHi();
+    sayHi();
+  }
 }
 
 class JniCallNonvirtualTest {
diff --git a/test/004-ReferenceMap/build b/test/004-ReferenceMap/build
deleted file mode 100644
index 08987b5..0000000
--- a/test/004-ReferenceMap/build
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Stop if something fails.
-set -e
-
-# The test relies on DEX file produced by javac+dx so keep building with them for now
-# (see b/19467889)
-mkdir classes
-${JAVAC} -d classes `find src -name '*.java'`
-${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex \
-  --dump-width=1000 ${DX_FLAGS} classes
-zip $TEST_NAME.jar classes.dex
diff --git a/test/004-ReferenceMap/stack_walk_refmap_jni.cc b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
index 34fb3f8..5304590 100644
--- a/test/004-ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "art_method-inl.h"
 #include "check_reference_map_visitor.h"
 #include "jni.h"
 
@@ -49,11 +50,7 @@
     if (m_name.compare("f") == 0) {
       CHECK_REGS_CONTAIN_REFS(0x03U, true, 8);  // v8: this
       CHECK_REGS_CONTAIN_REFS(0x06U, true, 8, 1);  // v8: this, v1: x
-      CHECK_REGS_CONTAIN_REFS(0x08U, true, 8, 3, 1);  // v8: this, v3: y, v1: x
       CHECK_REGS_CONTAIN_REFS(0x0cU, true, 8, 3, 1);  // v8: this, v3: y, v1: x
-      if (!GetCurrentOatQuickMethodHeader()->IsOptimized()) {
-        CHECK_REGS_CONTAIN_REFS(0x0eU, true, 8, 3, 1);  // v8: this, v3: y, v1: x
-      }
       CHECK_REGS_CONTAIN_REFS(0x10U, true, 8, 3, 1);  // v8: this, v3: y, v1: x
       // v2 is added because of the instruction at DexPC 0024. Object merges with 0 is Object. See:
       //   0024: move-object v3, v2
@@ -63,18 +60,10 @@
       // We eliminate the non-live registers at a return, so only v3 is live.
       // Note that it is OK for a compiler to not have a dex map at this dex PC because
       // a return is not necessarily a safepoint.
-      CHECK_REGS_CONTAIN_REFS(0x13U, false, 3);  // v3: y
+      CHECK_REGS_CONTAIN_REFS(0x14U, false, 2);  // v2: y
       // Note that v0: ex can be eliminated because it's a dead merge of two different exceptions.
       CHECK_REGS_CONTAIN_REFS(0x18U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
-      CHECK_REGS_CONTAIN_REFS(0x1aU, true, 8, 5, 2, 1);  // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
-      if (!GetCurrentOatQuickMethodHeader()->IsOptimized()) {
-        // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
-        CHECK_REGS_CONTAIN_REFS(0x1dU, true, 8, 5, 2, 1);
-        // v5 is removed from the root set because there is a "merge" operation.
-        // See 0015: if-nez v2, 001f.
-        CHECK_REGS_CONTAIN_REFS(0x1fU, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
-      }
-      CHECK_REGS_CONTAIN_REFS(0x21U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
+      CHECK_REGS_CONTAIN_REFS(0x22U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
 
       if (!GetCurrentOatQuickMethodHeader()->IsOptimized()) {
         CHECK_REGS_CONTAIN_REFS(0x27U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
@@ -91,124 +80,79 @@
   }
 };
 
-// Dex instructions for the function 'f' in ReferenceMap.java
-// Virtual methods   -
-//    #0              : (in LReferenceMap;)
-//      name          : 'f'
-//      type          : '()Ljava/lang/Object;'
-//      access        : 0x0000 ()
-//      code          -
-//      registers     : 9
-//      ins           : 1
-//      outs          : 2
-//      insns size    : 51 16-bit code units
-//      |[0001e8] ReferenceMap.f:()Ljava/lang/Object;
-//      |0000: const/4 v4, #int 2 // #2
-//      |0001: const/4 v7, #int 0 // #0
-//      |0002: const/4 v6, #int 1 // #1
+// DEX code
 //
-// 0:[Unknown],1:[Unknown],2:[Unknown],3:[Unknown],4:[32-bit Constant: 2],5:[Unknown],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |0003: new-array v1, v4, [Ljava/lang/Object;  // type@0007
-//      |0005: const/4 v2, #int 0 // #0
-
-// 0:[Unknown],1:[Reference: java.lang.Object[]],2:[Zero],3:[Unknown],4:[32-bit Constant: 2],5:[Unknown],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |0006: new-instance v3, Ljava/lang/Object;  // type@0003
-
-// [Unknown],1:[Reference: java.lang.Object[]],2:[Zero],3:[Uninitialized Reference: java.lang.Object],4:[32-bit Constant: 2],5:[Unknown],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |0008: +invoke-object-init/range {}, Ljava/lang/Object;.<init>:()V // method@0005
-//      |000b: const/4 v4, #int 2 // #2
-
-// 0:[Unknown],1:[Reference: java.lang.Object[]],2:[Zero],3:[Reference: java.lang.Object],4:[32-bit Constant: 2],5:[Unknown],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |000c: aput-object v3, v1, v4
-
-// 0:[Unknown],1:[Reference: java.lang.Object[]],2:[Zero],3:[Reference: java.lang.Object],4:[32-bit Constant: 2],5:[Unknown],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |000e: aput-object v3, v1, v6
-
-// 0:[Unknown],1:[Reference: java.lang.Object[]],2:[Zero],3:[Reference: java.lang.Object],4:[32-bit Constant: 2],5:[Unknown],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |0010: +invoke-virtual-quick {v8, v7}, [000c] // vtable #000c
-
-// 0:[Conflict],1:[Conflict],2:[Conflict],3:[Reference: java.lang.Object],4:[Conflict],5:[Conflict],6:[Conflict],7:[Conflict],8:[Conflict],
-//      |0013: return-object v3
-//      |0014: move-exception v0
-
-// 0:[Reference: java.lang.Exception],1:[Reference: java.lang.Object[]],2:[Reference: java.lang.Object],3:[Conflict],4:[32-bit Constant: 2],5:[Unknown],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |0015: if-nez v2, 001f // +000a
-//      |0017: const/4 v4, #int 1 // #1
-
-// 0:[Reference: java.lang.Exception],1:[Reference: java.lang.Object[]],2:[Reference: java.lang.Object],3:[Conflict],4:[32-bit Constant: 1],5:[Unknown],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |0018: new-instance v5, Ljava/lang/Object;  // type@0003
-
-// 0:[Reference: java.lang.Exception],1:[Reference: java.lang.Object[]],2:[Reference: java.lang.Object],3:[Conflict],4:[32-bit Constant: 1],5:[Uninitialized Reference: java.lang.Object],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |001a: +invoke-object-init/range {}, Ljava/lang/Object;.<init>:()V // method@0005
-
-// 0:[Reference: java.lang.Exception],1:[Reference: java.lang.Object[]],2:[Reference: java.lang.Object],3:[Conflict],4:[32-bit Constant: 1],5:[Reference: java.lang.Object],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |001d: aput-object v5, v1, v4
-
-// 0:[Reference: java.lang.Exception],1:[Reference: java.lang.Object[]],2:[Reference: java.lang.Object],3:[Conflict],4:[32-bit Constant: 2],5:[Conflict],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |001f: aput-object v2, v1, v6
-
-// 0:[Reference: java.lang.Exception],1:[Reference: java.lang.Object[]],2:[Reference: java.lang.Object],3:[Conflict],4:[32-bit Constant: 2],5:[Conflict],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |0021: +invoke-virtual-quick {v8, v7}, [000c] // vtable #000c
-//      |0024: move-object v3, v2
-
-// 0:[Reference: java.lang.Exception],1:[Reference: java.lang.Object[]],2:[Reference: java.lang.Object],3:[Reference: java.lang.Object],4:[32-bit Constant: 2],5:[Conflict],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |0025: goto 0013 // -0012
-//      |0026: move-exception v4
-
-// 0:[Conflict],1:[Reference: java.lang.Object[]],2:[Reference: java.lang.Object],3:[Conflict],4:[Reference: java.lang.Throwable],5:[Conflict],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |0027: aput-object v2, v1, v6
-
-// 0:[Conflict],1:[Reference: java.lang.Object[]],2:[Reference: java.lang.Object],3:[Conflict],4:[Reference: java.lang.Throwable],5:[Conflict],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |0029: +invoke-virtual-quick {v8, v7}, [000c] // vtable #000c
-
-// 0:[Conflict],1:[Reference: java.lang.Object[]],2:[Reference: java.lang.Object],3:[Conflict],4:[Reference: java.lang.Throwable],5:[Conflict],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |002c: throw v4
-//      |002d: move-exception v4
-//      |002e: move-object v2, v3
-
-// 0:[Unknown],1:[Reference: java.lang.Object[]],2:[Reference: java.lang.Object],3:[Reference: java.lang.Object],4:[Reference: java.lang.Throwable],5:[Unknown],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |002f: goto 0027 // -0008
-//      |0030: move-exception v0
-//      |0031: move-object v2, v3
-
-// 0:[Reference: java.lang.Exception],1:[Reference: java.lang.Object[]],2:[Reference: java.lang.Object],3:[Reference: java.lang.Object],4:[32-bit Constant: 2],5:[Unknown],6:[32-bit Constant: 1],7:[Zero],8:[Reference: ReferenceMap],
-//      |0032: goto 0015 // -001d
-//      catches       : 3
-//        0x0006 - 0x000b
-//          Ljava/lang/Exception; -> 0x0014
-//          <any> -> 0x0026
-//        0x000c - 0x000e
-//          Ljava/lang/Exception; -> 0x0030
-//          <any> -> 0x002d
-//        0x0018 - 0x001f
-//          <any> -> 0x0026
-//      positions     :
-//        0x0003 line=8
-//        0x0005 line=9
-//        0x0006 line=11
-//        0x000b line=12
-//        0x000e line=18
-//        0x0010 line=19
-//        0x0013 line=21
-//        0x0014 line=13
-//        0x0015 line=14
-//        0x0017 line=15
-//        0x001f line=18
-//        0x0021 line=19
-//        0x0025 line=20
-//        0x0026 line=18
-//        0x0029 line=19
-//        0x002d line=18
-//        0x0030 line=13
-//      locals        :
-//        0x0006 - 0x000b reg=2 y Ljava/lang/Object;
-//        0x000b - 0x0013 reg=3 y Ljava/lang/Object;
-//        0x0014 - 0x0015 reg=2 y Ljava/lang/Object;
-//        0x0015 - 0x0026 reg=0 ex Ljava/lang/Exception;
-//        0x002d - 0x0032 reg=3 y Ljava/lang/Object;
-//        0x0005 - 0x0033 reg=1 x [Ljava/lang/Object;
-//        0x0032 - 0x0033 reg=2 y Ljava/lang/Object;
-//        0x0000 - 0x0033 reg=8 this LReferenceMap;
+// 0000: const/4 v4, #int 2 // #2
+// 0001: const/4 v7, #int 0 // #0
+// 0002: const/4 v6, #int 1 // #1
+// 0003: new-array v1, v4, [Ljava/lang/Object; // type@0007
+// 0005: const/4 v2, #int 0 // #0
+// 0006: new-instance v3, Ljava/lang/Object; // type@0003
+// 0008: invoke-direct {v3}, Ljava/lang/Object;.<init>:()V // method@0004
+// 000b: const/4 v4, #int 2 // #2
+// 000c: aput-object v3, v1, v4
+// 000e: aput-object v3, v1, v6
+// 0010: invoke-virtual {v8, v7}, LMain;.refmap:(I)I // method@0003
+// 0013: move-object v2, v3
+// 0014: return-object v2
+// 0015: move-exception v0
+// 0016: if-nez v2, 0020 // +000a
+// 0018: new-instance v4, Ljava/lang/Object; // type@0003
+// 001a: invoke-direct {v4}, Ljava/lang/Object;.<init>:()V // method@0004
+// 001d: const/4 v5, #int 1 // #1
+// 001e: aput-object v4, v1, v5
+// 0020: aput-object v2, v1, v6
+// 0022: invoke-virtual {v8, v7}, LMain;.refmap:(I)I // method@0003
+// 0025: goto 0014 // -0011
+// 0026: move-exception v4
+// 0027: aput-object v2, v1, v6
+// 0029: invoke-virtual {v8, v7}, LMain;.refmap:(I)I // method@0003
+// 002c: throw v4
+// 002d: move-exception v4
+// 002e: move-object v2, v3
+// 002f: goto 0027 // -0008
+// 0030: move-exception v0
+// 0031: move-object v2, v3
+// 0032: goto 0016 // -001c
+//    catches       : 3
+//      0x0006 - 0x000b
+//        Ljava/lang/Exception; -> 0x0015
+//        <any> -> 0x0026
+//      0x000c - 0x000e
+//        Ljava/lang/Exception; -> 0x0030
+//        <any> -> 0x002d
+//      0x0018 - 0x0020
+//        <any> -> 0x0026
+//    positions     :
+//      0x0003 line=22
+//      0x0005 line=23
+//      0x0006 line=25
+//      0x000b line=26
+//      0x000e line=32
+//      0x0010 line=33
+//      0x0014 line=35
+//      0x0015 line=27
+//      0x0016 line=28
+//      0x0018 line=29
+//      0x0020 line=32
+//      0x0022 line=33
+//      0x0026 line=31
+//      0x0027 line=32
+//      0x0029 line=33
+//      0x002c line=31
+//      0x0030 line=27
+//    locals        :
+//      0x0006 - 0x000b reg=2 y Ljava/lang/Object;
+//      0x000b - 0x0014 reg=3 y Ljava/lang/Object;
+//      0x0015 - 0x0016 reg=2 y Ljava/lang/Object;
+//      0x0016 - 0x0026 reg=0 ex Ljava/lang/Exception;
+//      0x002d - 0x002f reg=3 y Ljava/lang/Object;
+//      0x002f - 0x0030 reg=2 y Ljava/lang/Object;
+//      0x0030 - 0x0032 reg=3 y Ljava/lang/Object;
+//      0x0031 - 0x0033 reg=0 ex Ljava/lang/Exception;
+//      0x0005 - 0x0033 reg=1 x [Ljava/lang/Object;
+//      0x0032 - 0x0033 reg=2 y Ljava/lang/Object;
+//      0x0000 - 0x0033 reg=8 this LMain;
 
 extern "C" JNIEXPORT jint JNICALL Java_Main_refmap(JNIEnv*, jobject, jint count) {
   // Visitor
diff --git a/test/004-StackWalk/build b/test/004-StackWalk/build
deleted file mode 100644
index 08987b5..0000000
--- a/test/004-StackWalk/build
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Stop if something fails.
-set -e
-
-# The test relies on DEX file produced by javac+dx so keep building with them for now
-# (see b/19467889)
-mkdir classes
-${JAVAC} -d classes `find src -name '*.java'`
-${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex \
-  --dump-width=1000 ${DX_FLAGS} classes
-zip $TEST_NAME.jar classes.dex
diff --git a/test/004-StackWalk/src/Main.java b/test/004-StackWalk/src/Main.java
index 883ce2c..072b1d0 100644
--- a/test/004-StackWalk/src/Main.java
+++ b/test/004-StackWalk/src/Main.java
@@ -2,14 +2,14 @@
   public Main() {
   }
 
+  boolean doThrow = false;
+
   int $noinline$f() throws Exception {
     g(1);
     g(2);
 
-    // This loop currently defeats inlining of `f`.
-    for (int i = 0; i < 10; i++) {
-      Thread.sleep(0);
-    }
+    // This currently defeats inlining of `f`.
+    if (doThrow) { throw new Error(); }
     return 0;
   }
 
diff --git a/test/004-StackWalk/stack_walk_jni.cc b/test/004-StackWalk/stack_walk_jni.cc
index 3a5854b..420224d 100644
--- a/test/004-StackWalk/stack_walk_jni.cc
+++ b/test/004-StackWalk/stack_walk_jni.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "art_method-inl.h"
 #include "check_reference_map_visitor.h"
 #include "jni.h"
 
@@ -42,31 +43,31 @@
     // Given the method name and the number of times the method has been called,
     // we know the Dex registers with live reference values. Assert that what we
     // find is what is expected.
-    if (m_name == "f") {
+    if (m_name == "$noinline$f") {
       if (gJava_StackWalk_refmap_calls == 1) {
         CHECK_EQ(1U, GetDexPc());
-        CHECK_REGS(4);
+        CHECK_REGS(1);  // v1: this
       } else {
         CHECK_EQ(gJava_StackWalk_refmap_calls, 2);
         CHECK_EQ(5U, GetDexPc());
-        CHECK_REGS(4);
+        CHECK_REGS(1);  // v1: this
       }
     } else if (m_name == "g") {
       if (gJava_StackWalk_refmap_calls == 1) {
-        CHECK_EQ(0xcU, GetDexPc());
-        CHECK_REGS(0, 2);  // Note that v1 is not in the minimal root set
+        CHECK_EQ(0xdU, GetDexPc());
+        CHECK_REGS(0, 2);  // v2: this (Note that v1 is not in the minimal root set)
       } else {
         CHECK_EQ(gJava_StackWalk_refmap_calls, 2);
-        CHECK_EQ(0xcU, GetDexPc());
+        CHECK_EQ(0xdU, GetDexPc());
         CHECK_REGS(0, 2);
       }
     } else if (m_name == "shlemiel") {
       if (gJava_StackWalk_refmap_calls == 1) {
-        CHECK_EQ(0x380U, GetDexPc());
+        CHECK_EQ(0x393U, GetDexPc());
         CHECK_REGS(2, 4, 5, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 25);
       } else {
         CHECK_EQ(gJava_StackWalk_refmap_calls, 2);
-        CHECK_EQ(0x380U, GetDexPc());
+        CHECK_EQ(0x393U, GetDexPc());
         CHECK_REGS(2, 4, 5, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 25);
       }
     }
diff --git a/test/004-ThreadStress/check b/test/004-ThreadStress/check
index ffbb8cf..77e4cdb 100755
--- a/test/004-ThreadStress/check
+++ b/test/004-ThreadStress/check
@@ -14,5 +14,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Only compare the last line.
-tail -n 1 "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
\ No newline at end of file
+# Do not compare numbers, so replace numbers with 'N'.
+sed '-es/[0-9][0-9]*/N/g' "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
\ No newline at end of file
diff --git a/test/004-ThreadStress/expected.txt b/test/004-ThreadStress/expected.txt
index a26fb4f..772faf6 100644
--- a/test/004-ThreadStress/expected.txt
+++ b/test/004-ThreadStress/expected.txt
@@ -1 +1,11 @@
+JNI_OnLoad called
+Starting worker for N
+Starting worker for N
+Starting worker for N
+Starting worker for N
+Starting worker for N
+Finishing worker
+Finishing worker
+Finishing worker
+Finishing worker
 Finishing worker
diff --git a/test/004-ThreadStress/src/Main.java b/test/004-ThreadStress/src/Main.java
index 9461c0b..5cae398 100644
--- a/test/004-ThreadStress/src/Main.java
+++ b/test/004-ThreadStress/src/Main.java
@@ -57,12 +57,14 @@
     }
 
     private final static class OOM extends Operation {
+        private final static int ALLOC_SIZE = 1024;
+
         @Override
         public boolean perform() {
             try {
                 List<byte[]> l = new ArrayList<byte[]>();
                 while (true) {
-                    l.add(new byte[1024]);
+                    l.add(new byte[ALLOC_SIZE]);
                 }
             } catch (OutOfMemoryError e) {
             }
@@ -91,9 +93,7 @@
 
                 killTemp = osClass.getDeclaredMethod("kill", int.class, int.class);
             } catch (Exception e) {
-                if (!e.getClass().getName().equals("ErrnoException")) {
-                    e.printStackTrace(System.out);
-                }
+                Main.printThrowable(e);
             }
 
             pid = pidTemp;
@@ -105,9 +105,10 @@
         public boolean perform() {
             try {
                 kill.invoke(null, pid, sigquit);
+            } catch (OutOfMemoryError e) {
             } catch (Exception e) {
-                if (!e.getClass().getName().equals("ErrnoException")) {
-                    e.printStackTrace(System.out);
+                if (!e.getClass().getName().equals(Main.errnoExceptionName)) {
+                    Main.printThrowable(e);
                 }
             }
             return true;
@@ -115,12 +116,33 @@
     }
 
     private final static class Alloc extends Operation {
+        private final static int ALLOC_SIZE = 1024;  // Needs to be small enough to not be in LOS.
+        private final static int ALLOC_COUNT = 1024;
+
         @Override
         public boolean perform() {
             try {
                 List<byte[]> l = new ArrayList<byte[]>();
-                for (int i = 0; i < 1024; i++) {
-                    l.add(new byte[1024]);
+                for (int i = 0; i < ALLOC_COUNT; i++) {
+                    l.add(new byte[ALLOC_SIZE]);
+                }
+            } catch (OutOfMemoryError e) {
+            }
+            return true;
+        }
+    }
+
+    private final static class LargeAlloc extends Operation {
+        private final static int PAGE_SIZE = 4096;
+        private final static int PAGE_SIZE_MODIFIER = 10;  // Needs to be large enough for LOS.
+        private final static int ALLOC_COUNT = 100;
+
+        @Override
+        public boolean perform() {
+            try {
+                List<byte[]> l = new ArrayList<byte[]>();
+                for (int i = 0; i < ALLOC_COUNT; i++) {
+                    l.add(new byte[PAGE_SIZE_MODIFIER * PAGE_SIZE]);
                 }
             } catch (OutOfMemoryError e) {
             }
@@ -131,7 +153,10 @@
     private final static class StackTrace extends Operation {
         @Override
         public boolean perform() {
-            Thread.currentThread().getStackTrace();
+            try {
+                Thread.currentThread().getStackTrace();
+            } catch (OutOfMemoryError e) {
+            }
             return true;
         }
     }
@@ -144,10 +169,12 @@
     }
 
     private final static class Sleep extends Operation {
+        private final static int SLEEP_TIME = 100;
+
         @Override
         public boolean perform() {
             try {
-                Thread.sleep(100);
+                Thread.sleep(SLEEP_TIME);
             } catch (InterruptedException ignored) {
             }
             return true;
@@ -155,6 +182,8 @@
     }
 
     private final static class TimedWait extends Operation {
+        private final static int SLEEP_TIME = 100;
+
         private final Object lock;
 
         public TimedWait(Object lock) {
@@ -165,7 +194,7 @@
         public boolean perform() {
             synchronized (lock) {
                 try {
-                    lock.wait(100, 0);
+                    lock.wait(SLEEP_TIME, 0);
                 } catch (InterruptedException ignored) {
                 }
             }
@@ -215,7 +244,8 @@
         Map<Operation, Double> frequencyMap = new HashMap<Operation, Double>();
         frequencyMap.put(new OOM(), 0.005);             //  1/200
         frequencyMap.put(new SigQuit(), 0.095);         // 19/200
-        frequencyMap.put(new Alloc(), 0.3);             // 60/200
+        frequencyMap.put(new Alloc(), 0.25);            // 50/200
+        frequencyMap.put(new LargeAlloc(), 0.05);       // 10/200
         frequencyMap.put(new StackTrace(), 0.1);        // 20/200
         frequencyMap.put(new Exit(), 0.25);             // 50/200
         frequencyMap.put(new Sleep(), 0.125);           // 25/200
@@ -236,6 +266,7 @@
     }
 
     public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
         parseAndRun(args);
     }
 
@@ -261,6 +292,8 @@
             op = new SigQuit();
         } else if (split[0].equals("-alloc")) {
             op = new Alloc();
+        } else if (split[0].equals("-largealloc")) {
+            op = new LargeAlloc();
         } else if (split[0].equals("-stacktrace")) {
             op = new StackTrace();
         } else if (split[0].equals("-exit")) {
@@ -365,12 +398,21 @@
             System.out.println(frequencyMap);
         }
 
-        runTest(numberOfThreads, numberOfDaemons, operationsPerThread, lock, frequencyMap);
+        try {
+            runTest(numberOfThreads, numberOfDaemons, operationsPerThread, lock, frequencyMap);
+        } catch (Throwable t) {
+            // In this case, the output should not contain all the required
+            // "Finishing worker" lines.
+            Main.printThrowable(t);
+        }
     }
 
     public static void runTest(final int numberOfThreads, final int numberOfDaemons,
                                final int operationsPerThread, final Object lock,
                                Map<Operation, Double> frequencyMap) throws Exception {
+        final Thread mainThread = Thread.currentThread();
+        final Barrier startBarrier = new Barrier(numberOfThreads + numberOfDaemons + 1);
+
         // Each normal thread is going to do operationsPerThread
         // operations. Each daemon thread will loop over all
         // the operations and will not stop.
@@ -404,8 +446,9 @@
             }
             // Randomize the operation order
             Collections.shuffle(Arrays.asList(operations));
-            threadStresses[t] = t < numberOfThreads ? new Main(lock, t, operations) :
-                                                      new Daemon(lock, t, operations);
+            threadStresses[t] = (t < numberOfThreads)
+                    ? new Main(lock, t, operations)
+                    : new Daemon(lock, t, operations, mainThread, startBarrier);
         }
 
         // Enable to dump operation counts per thread to make sure its
@@ -440,32 +483,41 @@
             runners[r] = new Thread("Runner thread " + r) {
                 final Main threadStress = ts;
                 public void run() {
-                    int id = threadStress.id;
-                    System.out.println("Starting worker for " + id);
-                    while (threadStress.nextOperation < operationsPerThread) {
-                        try {
-                            Thread thread = new Thread(ts, "Worker thread " + id);
-                            thread.start();
+                    try {
+                        int id = threadStress.id;
+                        // No memory hungry task are running yet, so println() should succeed.
+                        System.out.println("Starting worker for " + id);
+                        // Wait until all runners and daemons reach the starting point.
+                        startBarrier.await();
+                        // Run the stress tasks.
+                        while (threadStress.nextOperation < operationsPerThread) {
                             try {
+                                Thread thread = new Thread(ts, "Worker thread " + id);
+                                thread.start();
                                 thread.join();
-                            } catch (InterruptedException e) {
-                            }
 
-                            System.out.println("Thread exited for " + id + " with "
-                                               + (operationsPerThread - threadStress.nextOperation)
-                                               + " operations remaining.");
-                        } catch (OutOfMemoryError e) {
-                            // Ignore OOME since we need to print "Finishing worker" for the test
-                            // to pass.
+                                if (DEBUG) {
+                                    System.out.println(
+                                        "Thread exited for " + id + " with " +
+                                        (operationsPerThread - threadStress.nextOperation) +
+                                        " operations remaining.");
+                                }
+                            } catch (OutOfMemoryError e) {
+                                // Ignore OOME since we need to print "Finishing worker"
+                                // for the test to pass. This OOM can come from creating
+                                // the Thread or from the DEBUG output.
+                                // Note that the Thread creation may fail repeatedly,
+                                // preventing the runner from making any progress,
+                                // especially if the number of daemons is too high.
+                            }
                         }
-                    }
-                    // Keep trying to print "Finishing worker" until it succeeds.
-                    while (true) {
-                        try {
-                            System.out.println("Finishing worker");
-                            break;
-                        } catch (OutOfMemoryError e) {
-                        }
+                        // Print "Finishing worker" through JNI to avoid OOME.
+                        Main.printString(Main.finishingWorkerMessage);
+                    } catch (Throwable t) {
+                        Main.printThrowable(t);
+                        // Interrupt the main thread, so that it can orderly shut down
+                        // instead of waiting indefinitely for some Barrier.
+                        mainThread.interrupt();
                     }
                 }
             };
@@ -498,6 +550,9 @@
         for (int r = 0; r < runners.length; r++) {
             runners[r].start();
         }
+        // Wait for all threads to reach the starting point.
+        startBarrier.await();
+        // Wait for runners to finish.
         for (int r = 0; r < runners.length; r++) {
             runners[r].join();
         }
@@ -540,8 +595,14 @@
     }
 
     private static class Daemon extends Main {
-        private Daemon(Object lock, int id, Operation[] operations) {
+        private Daemon(Object lock,
+                       int id,
+                       Operation[] operations,
+                       Thread mainThread,
+                       Barrier startBarrier) {
             super(lock, id, operations);
+            this.mainThread = mainThread;
+            this.startBarrier = startBarrier;
         }
 
         public void run() {
@@ -549,26 +610,74 @@
                 if (DEBUG) {
                     System.out.println("Starting ThreadStress Daemon " + id);
                 }
-                int i = 0;
-                while (true) {
-                    Operation operation = operations[i];
-                    if (DEBUG) {
-                        System.out.println("ThreadStress Daemon " + id
-                                           + " operation " + i
-                                           + " is " + operation);
+                startBarrier.await();
+                try {
+                    int i = 0;
+                    while (true) {
+                        Operation operation = operations[i];
+                        if (DEBUG) {
+                            System.out.println("ThreadStress Daemon " + id
+                                               + " operation " + i
+                                               + " is " + operation);
+                        }
+                        operation.perform();
+                        i = (i + 1) % operations.length;
                     }
-                    operation.perform();
-                    i = (i + 1) % operations.length;
+                } catch (OutOfMemoryError e) {
+                    // Catch OutOfMemoryErrors since these can cause the test to fail it they print
+                    // the stack trace after "Finishing worker". Note that operations should catch
+                    // their own OOME, this guards only agains OOME in the DEBUG output.
                 }
-            } catch (OutOfMemoryError e) {
-                // Catch OutOfMemoryErrors since these can cause the test to fail it they print
-                // the stack trace after "Finishing worker".
-            } finally {
                 if (DEBUG) {
                     System.out.println("Finishing ThreadStress Daemon for " + id);
                 }
+            } catch (Throwable t) {
+                Main.printThrowable(t);
+                // Interrupt the main thread, so that it can orderly shut down
+                // instead of waiting indefinitely for some Barrier.
+                mainThread.interrupt();
             }
         }
+
+        final Thread mainThread;
+        final Barrier startBarrier;
     }
 
+    // Note: java.util.concurrent.CyclicBarrier.await() allocates memory and may throw OOM.
+    // That is highly undesirable in this test, so we use our own simple barrier class.
+    // The only memory allocation that can happen here is the lock inflation which uses
+    // a native allocation. As such, it should succeed even if the Java heap is full.
+    // If the native allocation surprisingly fails, the program shall abort().
+    private static class Barrier {
+        public Barrier(int initialCount) {
+            count = initialCount;
+        }
+
+        public synchronized void await() throws InterruptedException {
+            --count;
+            if (count != 0) {
+                do {
+                    wait();
+                } while (count != 0);  // Check for spurious wakeup.
+            } else {
+                notifyAll();
+            }
+        }
+
+        private int count;
+    }
+
+    // Printing a String/Throwable through JNI requires only native memory and space
+    // in the local reference table, so it should succeed even if the Java heap is full.
+    private static native void printString(String s);
+    private static native void printThrowable(Throwable t);
+
+    static final String finishingWorkerMessage;
+    static final String errnoExceptionName;
+    static {
+        // We pre-allocate the strings in class initializer to avoid const-string
+        // instructions in code using these strings later as they may throw OOME.
+        finishingWorkerMessage = "Finishing worker\n";
+        errnoExceptionName = "ErrnoException";
+    }
 }
diff --git a/test/004-ThreadStress/thread_stress.cc b/test/004-ThreadStress/thread_stress.cc
new file mode 100644
index 0000000..573c352
--- /dev/null
+++ b/test/004-ThreadStress/thread_stress.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+
+#include "jni.h"
+#include "mirror/string.h"
+#include "mirror/throwable.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+extern "C" JNIEXPORT void JNICALL Java_Main_printString(JNIEnv*, jclass, jstring s) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::cout << soa.Decode<mirror::String*>(s)->ToModifiedUtf8();
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_printThrowable(JNIEnv*, jclass, jthrowable t) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::cout << soa.Decode<mirror::Throwable*>(t)->Dump();
+}
+
+}  // namespace art
diff --git a/test/004-UnsafeTest/src/Main.java b/test/004-UnsafeTest/src/Main.java
index 5b22e88..d43d374 100644
--- a/test/004-UnsafeTest/src/Main.java
+++ b/test/004-UnsafeTest/src/Main.java
@@ -39,16 +39,24 @@
     }
   }
 
-  private static Unsafe getUnsafe() throws Exception {
-    Class<?> unsafeClass = Class.forName("sun.misc.Unsafe");
+  private static Unsafe getUnsafe() throws NoSuchFieldException, IllegalAccessException {
+    Class<?> unsafeClass = Unsafe.class;
     Field f = unsafeClass.getDeclaredField("theUnsafe");
     f.setAccessible(true);
     return (Unsafe) f.get(null);
   }
 
-  public static void main(String[] args) throws Exception {
+  public static void main(String[] args) throws NoSuchFieldException, IllegalAccessException {
     System.loadLibrary(args[0]);
     Unsafe unsafe = getUnsafe();
+
+    testArrayBaseOffset(unsafe);
+    testArrayIndexScale(unsafe);
+    testGetAndPutAndCAS(unsafe);
+    testGetAndPutVolatile(unsafe);
+  }
+
+  private static void testArrayBaseOffset(Unsafe unsafe) {
     check(unsafe.arrayBaseOffset(boolean[].class), vmArrayBaseOffset(boolean[].class),
         "Unsafe.arrayBaseOffset(boolean[])");
     check(unsafe.arrayBaseOffset(byte[].class), vmArrayBaseOffset(byte[].class),
@@ -65,7 +73,9 @@
         "Unsafe.arrayBaseOffset(long[])");
     check(unsafe.arrayBaseOffset(Object[].class), vmArrayBaseOffset(Object[].class),
         "Unsafe.arrayBaseOffset(Object[])");
+  }
 
+  private static void testArrayIndexScale(Unsafe unsafe) {
     check(unsafe.arrayIndexScale(boolean[].class), vmArrayIndexScale(boolean[].class),
         "Unsafe.arrayIndexScale(boolean[])");
     check(unsafe.arrayIndexScale(byte[].class), vmArrayIndexScale(byte[].class),
@@ -82,7 +92,9 @@
         "Unsafe.arrayIndexScale(long[])");
     check(unsafe.arrayIndexScale(Object[].class), vmArrayIndexScale(Object[].class),
         "Unsafe.arrayIndexScale(Object[])");
+  }
 
+  private static void testGetAndPutAndCAS(Unsafe unsafe) throws NoSuchFieldException {
     TestClass t = new TestClass();
 
     int intValue = 12345678;
@@ -110,23 +122,35 @@
     check(unsafe.getObject(t, objectOffset), objectValue, "Unsafe.getObject(Object, long)");
 
     if (unsafe.compareAndSwapInt(t, intOffset, 0, 1)) {
-        System.out.println("Unexpectedly succeeding compareAndSwap...");
+      System.out.println("Unexpectedly succeeding compareAndSwapInt(t, intOffset, 0, 1)");
     }
     if (!unsafe.compareAndSwapInt(t, intOffset, intValue, 0)) {
-        System.out.println("Unexpectedly not succeeding compareAndSwap...");
+      System.out.println(
+          "Unexpectedly not succeeding compareAndSwapInt(t, intOffset, intValue, 0)");
     }
     if (!unsafe.compareAndSwapInt(t, intOffset, 0, 1)) {
-        System.out.println("Unexpectedly not succeeding compareAndSwap...");
+      System.out.println("Unexpectedly not succeeding compareAndSwapInt(t, intOffset, 0, 1)");
+    }
+    // Exercise sun.misc.Unsafe.compareAndSwapInt using the same
+    // integer (1) for the `expectedValue` and `newValue` arguments.
+    if (!unsafe.compareAndSwapInt(t, intOffset, 1, 1)) {
+      System.out.println("Unexpectedly not succeeding compareAndSwapInt(t, intOffset, 1, 1)");
     }
 
     if (unsafe.compareAndSwapLong(t, longOffset, 0, 1)) {
-        System.out.println("Unexpectedly succeeding compareAndSwapLong...");
+      System.out.println("Unexpectedly succeeding compareAndSwapLong(t, longOffset, 0, 1)");
     }
     if (!unsafe.compareAndSwapLong(t, longOffset, longValue, 0)) {
-        System.out.println("Unexpectedly not succeeding compareAndSwapLong...");
+      System.out.println(
+          "Unexpectedly not succeeding compareAndSwapLong(t, longOffset, longValue, 0)");
     }
     if (!unsafe.compareAndSwapLong(t, longOffset, 0, 1)) {
-        System.out.println("Unexpectedly not succeeding compareAndSwapLong...");
+      System.out.println("Unexpectedly not succeeding compareAndSwapLong(t, longOffset, 0, 1)");
+    }
+    // Exercise sun.misc.Unsafe.compareAndSwapLong using the same
+    // integer (1) for the `expectedValue` and `newValue` arguments.
+    if (!unsafe.compareAndSwapLong(t, longOffset, 1, 1)) {
+      System.out.println("Unexpectedly not succeeding compareAndSwapLong(t, longOffset, 1, 1)");
     }
 
     // We do not use `null` as argument to sun.misc.Unsafe.compareAndSwapObject
@@ -135,40 +159,96 @@
     // references).  This way, when heap poisoning is enabled, we can
     // better exercise its implementation within that method.
     if (unsafe.compareAndSwapObject(t, objectOffset, new Object(), new Object())) {
-        System.out.println("Unexpectedly succeeding compareAndSwapObject...");
+      System.out.println("Unexpectedly succeeding " +
+          "compareAndSwapObject(t, objectOffset, new Object(), new Object())");
     }
     Object objectValue2 = new Object();
     if (!unsafe.compareAndSwapObject(t, objectOffset, objectValue, objectValue2)) {
-        System.out.println("Unexpectedly not succeeding compareAndSwapObject...");
+      System.out.println("Unexpectedly not succeeding " +
+          "compareAndSwapObject(t, objectOffset, objectValue, objectValue2)");
     }
     Object objectValue3 = new Object();
     if (!unsafe.compareAndSwapObject(t, objectOffset, objectValue2, objectValue3)) {
-        System.out.println("Unexpectedly not succeeding compareAndSwapObject...");
+      System.out.println("Unexpectedly not succeeding " +
+          "compareAndSwapObject(t, objectOffset, objectValue2, objectValue3)");
     }
-
+    // Exercise sun.misc.Unsafe.compareAndSwapObject using the same
+    // object (`objectValue3`) for the `expectedValue` and `newValue` arguments.
+    if (!unsafe.compareAndSwapObject(t, objectOffset, objectValue3, objectValue3)) {
+      System.out.println("Unexpectedly not succeeding " +
+          "compareAndSwapObject(t, objectOffset, objectValue3, objectValue3)");
+    }
     // Exercise sun.misc.Unsafe.compareAndSwapObject using the same
     // object (`t`) for the `obj` and `newValue` arguments.
     if (!unsafe.compareAndSwapObject(t, objectOffset, objectValue3, t)) {
-        System.out.println("Unexpectedly not succeeding compareAndSwapObject...");
+      System.out.println(
+          "Unexpectedly not succeeding compareAndSwapObject(t, objectOffset, objectValue3, t)");
     }
     // Exercise sun.misc.Unsafe.compareAndSwapObject using the same
     // object (`t`) for the `obj`, `expectedValue` and `newValue` arguments.
     if (!unsafe.compareAndSwapObject(t, objectOffset, t, t)) {
-        System.out.println("Unexpectedly not succeeding compareAndSwapObject...");
+      System.out.println("Unexpectedly not succeeding compareAndSwapObject(t, objectOffset, t, t)");
     }
     // Exercise sun.misc.Unsafe.compareAndSwapObject using the same
     // object (`t`) for the `obj` and `expectedValue` arguments.
     if (!unsafe.compareAndSwapObject(t, objectOffset, t, new Object())) {
-        System.out.println("Unexpectedly not succeeding compareAndSwapObject...");
+      System.out.println(
+          "Unexpectedly not succeeding compareAndSwapObject(t, objectOffset, t, new Object())");
     }
   }
 
+  private static void testGetAndPutVolatile(Unsafe unsafe) throws NoSuchFieldException {
+    TestVolatileClass tv = new TestVolatileClass();
+
+    int intValue = 12345678;
+    Field volatileIntField = TestVolatileClass.class.getDeclaredField("volatileIntVar");
+    long volatileIntOffset = unsafe.objectFieldOffset(volatileIntField);
+    check(unsafe.getIntVolatile(tv, volatileIntOffset),
+          0,
+          "Unsafe.getIntVolatile(Object, long) - initial");
+    unsafe.putIntVolatile(tv, volatileIntOffset, intValue);
+    check(tv.volatileIntVar, intValue, "Unsafe.putIntVolatile(Object, long, int)");
+    check(unsafe.getIntVolatile(tv, volatileIntOffset),
+          intValue,
+          "Unsafe.getIntVolatile(Object, long)");
+
+    long longValue = 1234567887654321L;
+    Field volatileLongField = TestVolatileClass.class.getDeclaredField("volatileLongVar");
+    long volatileLongOffset = unsafe.objectFieldOffset(volatileLongField);
+    check(unsafe.getLongVolatile(tv, volatileLongOffset),
+          0,
+          "Unsafe.getLongVolatile(Object, long) - initial");
+    unsafe.putLongVolatile(tv, volatileLongOffset, longValue);
+    check(tv.volatileLongVar, longValue, "Unsafe.putLongVolatile(Object, long, long)");
+    check(unsafe.getLongVolatile(tv, volatileLongOffset),
+          longValue,
+          "Unsafe.getLongVolatile(Object, long)");
+
+    Object objectValue = new Object();
+    Field volatileObjectField = TestVolatileClass.class.getDeclaredField("volatileObjectVar");
+    long volatileObjectOffset = unsafe.objectFieldOffset(volatileObjectField);
+    check(unsafe.getObjectVolatile(tv, volatileObjectOffset),
+          null,
+          "Unsafe.getObjectVolatile(Object, long) - initial");
+    unsafe.putObjectVolatile(tv, volatileObjectOffset, objectValue);
+    check(tv.volatileObjectVar, objectValue, "Unsafe.putObjectVolatile(Object, long, Object)");
+    check(unsafe.getObjectVolatile(tv, volatileObjectOffset),
+          objectValue,
+          "Unsafe.getObjectVolatile(Object, long)");
+  }
+
   private static class TestClass {
     public int intVar = 0;
     public long longVar = 0;
     public Object objectVar = null;
   }
 
-  private static native int vmArrayBaseOffset(Class clazz);
-  private static native int vmArrayIndexScale(Class clazz);
+  private static class TestVolatileClass {
+    public volatile int volatileIntVar = 0;
+    public volatile long volatileLongVar = 0;
+    public volatile Object volatileObjectVar = null;
+  }
+
+  private static native int vmArrayBaseOffset(Class<?> clazz);
+  private static native int vmArrayIndexScale(Class<?> clazz);
 }
diff --git a/test/004-checker-UnsafeTest18/expected.txt b/test/004-checker-UnsafeTest18/expected.txt
new file mode 100644
index 0000000..651da72
--- /dev/null
+++ b/test/004-checker-UnsafeTest18/expected.txt
@@ -0,0 +1,2 @@
+starting
+passed
diff --git a/test/004-checker-UnsafeTest18/info.txt b/test/004-checker-UnsafeTest18/info.txt
new file mode 100644
index 0000000..0fca5eb
--- /dev/null
+++ b/test/004-checker-UnsafeTest18/info.txt
@@ -0,0 +1 @@
+Test support for 1.8 sun.misc.Unsafe.
diff --git a/test/004-checker-UnsafeTest18/src/Main.java b/test/004-checker-UnsafeTest18/src/Main.java
new file mode 100644
index 0000000..282f9ce
--- /dev/null
+++ b/test/004-checker-UnsafeTest18/src/Main.java
@@ -0,0 +1,377 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import sun.misc.Unsafe;
+
+/**
+ * Checker test on the 1.8 unsafe operations. Note, this is by no means an
+ * exhaustive unit test for these CAS (compare-and-swap) and fence operations.
+ * Instead, this test ensures the methods are recognized as intrinsic and behave
+ * as expected.
+ */
+public class Main {
+
+  private static final Unsafe unsafe = getUnsafe();
+
+  private static Thread[] sThreads = new Thread[10];
+
+  //
+  // Fields accessed by setters and adders, and by memory fence tests.
+  //
+
+  public int i = 0;
+  public long l = 0;
+  public Object o = null;
+
+  public int x_value;
+  public int y_value;
+  public volatile boolean running;
+
+  //
+  // Setters.
+  //
+
+  /// CHECK-START: int Main.set32(java.lang.Object, long, int) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeVirtual intrinsic:UnsafeGetAndSetInt
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int set32(Object o, long offset, int newValue) {
+    return unsafe.getAndSetInt(o, offset, newValue);
+  }
+
+  /// CHECK-START: long Main.set64(java.lang.Object, long, long) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:j\d+>> InvokeVirtual intrinsic:UnsafeGetAndSetLong
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static long set64(Object o, long offset, long newValue) {
+    return unsafe.getAndSetLong(o, offset, newValue);
+  }
+
+  /// CHECK-START: java.lang.Object Main.setObj(java.lang.Object, long, java.lang.Object) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:l\d+>> InvokeVirtual intrinsic:UnsafeGetAndSetObject
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static Object setObj(Object o, long offset, Object newValue) {
+    return unsafe.getAndSetObject(o, offset, newValue);
+  }
+
+  //
+  // Adders.
+  //
+
+  /// CHECK-START: int Main.add32(java.lang.Object, long, int) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeVirtual intrinsic:UnsafeGetAndAddInt
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int add32(Object o, long offset, int delta) {
+    return unsafe.getAndAddInt(o, offset, delta);
+  }
+
+  /// CHECK-START: long Main.add64(java.lang.Object, long, long) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:j\d+>> InvokeVirtual intrinsic:UnsafeGetAndAddLong
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static long add64(Object o, long offset, long delta) {
+    return unsafe.getAndAddLong(o, offset, delta);
+  }
+
+  //
+  // Fences (native).
+  //
+
+  /// CHECK-START: void Main.load() intrinsics_recognition (after)
+  /// CHECK-DAG: InvokeVirtual intrinsic:UnsafeLoadFence
+  //
+  /// CHECK-START: void Main.load() instruction_simplifier (after)
+  /// CHECK-NOT: InvokeVirtual intrinsic:UnsafeLoadFence
+  //
+  /// CHECK-START: void Main.load() instruction_simplifier (after)
+  /// CHECK-DAG: MemoryBarrier kind:LoadAny
+  private static void load() {
+    unsafe.loadFence();
+  }
+
+  /// CHECK-START: void Main.store() intrinsics_recognition (after)
+  /// CHECK-DAG: InvokeVirtual intrinsic:UnsafeStoreFence
+  //
+  /// CHECK-START: void Main.store() instruction_simplifier (after)
+  /// CHECK-NOT: InvokeVirtual intrinsic:UnsafeStoreFence
+  //
+  /// CHECK-START: void Main.store() instruction_simplifier (after)
+  /// CHECK-DAG: MemoryBarrier kind:AnyStore
+  private static void store() {
+    unsafe.storeFence();
+  }
+
+  /// CHECK-START: void Main.full() intrinsics_recognition (after)
+  /// CHECK-DAG: InvokeVirtual intrinsic:UnsafeFullFence
+  //
+  /// CHECK-START: void Main.full() instruction_simplifier (after)
+  /// CHECK-NOT: InvokeVirtual intrinsic:UnsafeFullFence
+  //
+  /// CHECK-START: void Main.full() instruction_simplifier (after)
+  /// CHECK-DAG: MemoryBarrier kind:AnyAny
+  private static void full() {
+    unsafe.fullFence();
+  }
+
+  //
+  // Thread fork/join.
+  //
+
+  private static void fork(Runnable r) {
+    for (int i = 0; i < 10; i++) {
+      sThreads[i] = new Thread(r);
+    }
+    // Start the threads only after the full array has been written with new threads,
+    // because one test relies on the contents of this array to be consistent.
+    for (int i = 0; i < 10; i++) {
+      sThreads[i].start();
+    }
+  }
+
+  private static void join() {
+    try {
+      for (int i = 0; i < 10; i++) {
+        sThreads[i].join();
+      }
+    } catch (InterruptedException e) {
+      throw new Error("Failed join: " + e);
+    }
+  }
+
+  //
+  // Driver.
+  //
+
+  public static void main(String[] args) {
+    System.out.println("starting");
+
+    final Main m = new Main();
+
+    // Get the offsets.
+
+    final long intOffset, longOffset, objOffset;
+    try {
+      Field intField = Main.class.getDeclaredField("i");
+      Field longField = Main.class.getDeclaredField("l");
+      Field objField = Main.class.getDeclaredField("o");
+
+      intOffset = unsafe.objectFieldOffset(intField);
+      longOffset = unsafe.objectFieldOffset(longField);
+      objOffset = unsafe.objectFieldOffset(objField);
+
+    } catch (NoSuchFieldException e) {
+      throw new Error("No offset: " + e);
+    }
+
+    // Some sanity on setters and adders within same thread.
+
+    set32(m, intOffset, 3);
+    expectEqual32(3, m.i);
+
+    set64(m, longOffset, 7L);
+    expectEqual64(7L, m.l);
+
+    setObj(m, objOffset, m);
+    expectEqualObj(m, m.o);
+
+    add32(m, intOffset, 11);
+    expectEqual32(14, m.i);
+
+    add64(m, longOffset, 13L);
+    expectEqual64(20L, m.l);
+
+    // Some sanity on setters within different threads.
+
+    fork(new Runnable() {
+      public void run() {
+        for (int i = 0; i < 10; i++)
+          set32(m, intOffset, i);
+      }
+    });
+    join();
+    expectEqual32(9, m.i);  // one thread's last value wins
+
+    fork(new Runnable() {
+      public void run() {
+        for (int i = 0; i < 10; i++)
+          set64(m, longOffset, (long) (100 + i));
+      }
+    });
+    join();
+    expectEqual64(109L, m.l);  // one thread's last value wins
+
+    fork(new Runnable() {
+      public void run() {
+        for (int i = 0; i < 10; i++)
+          setObj(m, objOffset, sThreads[i]);
+      }
+    });
+    join();
+    expectEqualObj(sThreads[9], m.o);  // one thread's last value wins
+
+    // Some sanity on adders within different threads.
+
+    fork(new Runnable() {
+      public void run() {
+        for (int i = 0; i < 10; i++)
+          add32(m, intOffset, i + 1);
+      }
+    });
+    join();
+    expectEqual32(559, m.i);  // all values accounted for
+
+    fork(new Runnable() {
+      public void run() {
+        for (int i = 0; i < 10; i++)
+          add64(m, longOffset, (long) (i + 1));
+      }
+    });
+    join();
+    expectEqual64(659L, m.l);  // all values accounted for
+
+    // Some sanity on fences within same thread. Note that memory fences within one
+    // thread make little sense, but the sanity check ensures nothing bad happens.
+
+    m.i = -1;
+    m.l = -2L;
+    m.o = null;
+
+    load();
+    store();
+    full();
+
+    expectEqual32(-1, m.i);
+    expectEqual64(-2L, m.l);
+    expectEqualObj(null, m.o);
+
+    // Some sanity on full fence within different threads. We write the non-volatile m.l after
+    // the fork(), which means there is no happens-before relation in the Java memory model
+    // with respect to the read in the threads. This relation is enforced by the memory fences
+    // and the weak-set() -> get() guard. Note that the guard semantics used here are actually
+    // too strong and already enforce total memory visibility, but this test illustrates what
+    // should still happen if Java had a true relaxed memory guard.
+
+    final AtomicBoolean guard1 = new AtomicBoolean();
+    m.l = 0L;
+
+    fork(new Runnable() {
+      public void run() {
+        while (!guard1.get());  // busy-waiting
+        full();
+        expectEqual64(-123456789L, m.l);
+      }
+    });
+
+    m.l = -123456789L;
+    full();
+    while (!guard1.weakCompareAndSet(false, true));  // relaxed memory order
+    join();
+
+    // Some sanity on release/acquire fences within different threads. We write the non-volatile
+    // m.l after the fork(), which means there is no happens-before relation in the Java memory
+    // model with respect to the read in the threads. This relation is enforced by the memory fences
+    // and the weak-set() -> get() guard. Note that the guard semantics used here are actually
+    // too strong and already enforce total memory visibility, but this test illustrates what
+    // should still happen if Java had a true relaxed memory guard.
+
+    final AtomicBoolean guard2 = new AtomicBoolean();
+    m.l = 0L;
+
+    fork(new Runnable() {
+      public void run() {
+        while (!guard2.get());  // busy-waiting
+        load();
+        expectEqual64(-987654321L, m.l);
+      }
+    });
+
+    m.l = -987654321L;
+    store();
+    while (!guard2.weakCompareAndSet(false, true));  // relaxed memory order
+    join();
+
+    // Some sanity on release/acquire fences within different threads using a test suggested by
+    // Hans Boehm. Even this test remains with the realm of sanity only, since having the threads
+    // read the same value consistently would be a valid outcome.
+
+    m.x_value = -1;
+    m.y_value = -1;
+    m.running = true;
+
+    fork(new Runnable() {
+      public void run() {
+        while (m.running) {
+          for (int few_times = 0; few_times < 1000; few_times++) {
+            // Read y first, then load fence, then read x.
+            // They should appear in order, if seen at all.
+            int local_y = m.y_value;
+            load();
+            int local_x = m.x_value;
+            expectLessThanOrEqual32(local_y, local_x);
+          }
+        }
+      }
+    });
+
+    for (int many_times = 0; many_times < 100000; many_times++) {
+      m.x_value = many_times;
+      store();
+      m.y_value = many_times;
+    }
+    m.running = false;
+    join();
+
+    // All done!
+
+    System.out.println("passed");
+  }
+
+  // Use reflection to implement "Unsafe.getUnsafe()";
+  private static Unsafe getUnsafe() {
+    try {
+      Class<?> unsafeClass = Unsafe.class;
+      Field f = unsafeClass.getDeclaredField("theUnsafe");
+      f.setAccessible(true);
+      return (Unsafe) f.get(null);
+    } catch (Exception e) {
+      throw new Error("Cannot get Unsafe instance");
+    }
+  }
+
+  private static void expectEqual32(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectLessThanOrEqual32(int val1, int val2) {
+    if (val1 > val2) {
+      throw new Error("Expected: " + val1 + " <= " + val2);
+    }
+  }
+
+  private static void expectEqual64(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEqualObj(Object expected, Object result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/005-annotations/build b/test/005-annotations/build
index 3f00a1a..93bee50 100644
--- a/test/005-annotations/build
+++ b/test/005-annotations/build
@@ -21,14 +21,16 @@
 
 # android.test.anno.MissingAnnotation is available at compile time...
 ${JAVAC} -d classes `find src -name '*.java'`
+# overwrite RenamedEnum
+${JAVAC} -d classes `find src2 -name '*.java'`
 
 # ...but not at run time.
 rm 'classes/android/test/anno/MissingAnnotation.class'
 rm 'classes/android/test/anno/ClassWithInnerAnnotationClass$MissingInnerAnnotationClass.class'
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   ${DX} -JXmx256m --debug --dex --output=classes.dex classes
 fi
diff --git a/test/005-annotations/expected.txt b/test/005-annotations/expected.txt
index e1c3dad..ee5b0c7 100644
--- a/test/005-annotations/expected.txt
+++ b/test/005-annotations/expected.txt
@@ -89,10 +89,11 @@
   annotations on FIELD int android.test.anno.FullyNoted.mBar:
     @android.test.anno.AnnoFancyField(nombre=fubar)
       interface android.test.anno.AnnoFancyField
-    aff: @android.test.anno.AnnoFancyField(nombre=fubar) / class $Proxy13
+    aff: @android.test.anno.AnnoFancyField(nombre=fubar) / true
     --> nombre is 'fubar'
 
 SimplyNoted.get(AnnoSimpleType) = @android.test.anno.AnnoSimpleType()
+SimplyNoted.get(AnnoSimpleTypeInvis) = null
 SubNoted.get(AnnoSimpleType) = @android.test.anno.AnnoSimpleType()
 
 Package annotations:
@@ -108,3 +109,4 @@
 
 Get annotation with missing class should not throw
 Got expected TypeNotPresentException
+Got expected NoSuchFieldError
diff --git a/test/005-annotations/src/android/test/anno/AnnoFancyMethod.java b/test/005-annotations/src/android/test/anno/AnnoFancyMethod.java
index 3088866..aa7808f 100644
--- a/test/005-annotations/src/android/test/anno/AnnoFancyMethod.java
+++ b/test/005-annotations/src/android/test/anno/AnnoFancyMethod.java
@@ -10,5 +10,5 @@
     boolean callMe() default false;
     boolean biteMe();
     AnnoFancyMethodEnum enumerated() default AnnoFancyMethodEnum.FOO;
-    Class someClass() default SomeClass.class;
+    Class<?> someClass() default SomeClass.class;
 }
diff --git a/test/005-annotations/src/android/test/anno/AnnoMissingClass.java b/test/005-annotations/src/android/test/anno/AnnoMissingClass.java
index c32e9a2..7933b80 100644
--- a/test/005-annotations/src/android/test/anno/AnnoMissingClass.java
+++ b/test/005-annotations/src/android/test/anno/AnnoMissingClass.java
@@ -20,5 +20,5 @@
 
 @Retention(RetentionPolicy.RUNTIME)
 public @interface AnnoMissingClass {
-    Class value();
+    Class<?> value();
 }
diff --git a/test/005-annotations/src/android/test/anno/AnnoRenamedEnumMethod.java b/test/005-annotations/src/android/test/anno/AnnoRenamedEnumMethod.java
new file mode 100644
index 0000000..7a15c64
--- /dev/null
+++ b/test/005-annotations/src/android/test/anno/AnnoRenamedEnumMethod.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+import java.lang.annotation.*;
+
+@Target(ElementType.METHOD)
+@Retention(RetentionPolicy.RUNTIME)
+
+public @interface AnnoRenamedEnumMethod {
+    RenamedEnumClass.RenamedEnum renamed() default RenamedEnumClass.RenamedEnum.FOO;
+}
diff --git a/test/005-annotations/src/android/test/anno/RenamedEnumClass.java b/test/005-annotations/src/android/test/anno/RenamedEnumClass.java
new file mode 100644
index 0000000..cfba819
--- /dev/null
+++ b/test/005-annotations/src/android/test/anno/RenamedEnumClass.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+import java.lang.annotation.*;
+
+@Target(ElementType.METHOD)
+@Retention(RetentionPolicy.RUNTIME)
+
+public @interface RenamedEnumClass {
+    enum RenamedEnum { FOO, BAR };
+}
diff --git a/test/005-annotations/src/android/test/anno/RenamedNoted.java b/test/005-annotations/src/android/test/anno/RenamedNoted.java
new file mode 100644
index 0000000..aae3a3f
--- /dev/null
+++ b/test/005-annotations/src/android/test/anno/RenamedNoted.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+public class RenamedNoted {
+    @AnnoRenamedEnumMethod(renamed=RenamedEnumClass.RenamedEnum.BAR)
+    public int bar() {
+        return 0;
+    }
+}
diff --git a/test/005-annotations/src/android/test/anno/TestAnnotations.java b/test/005-annotations/src/android/test/anno/TestAnnotations.java
index 7b74a73..8ea8e8e 100644
--- a/test/005-annotations/src/android/test/anno/TestAnnotations.java
+++ b/test/005-annotations/src/android/test/anno/TestAnnotations.java
@@ -1,9 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package android.test.anno;
 
 import java.lang.annotation.Annotation;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Field;
 import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
 import java.util.TreeMap;
 
 public class TestAnnotations {
@@ -25,7 +42,7 @@
         }
     }
 
-    static void printAnnotations(Class clazz) {
+    static void printAnnotations(Class<?> clazz) {
         Annotation[] annos;
         Annotation[][] parAnnos;
 
@@ -35,7 +52,7 @@
         printAnnotationArray("", annos);
         System.out.println();
 
-        for (Constructor c: clazz.getDeclaredConstructors()) {
+        for (Constructor<?> c: clazz.getDeclaredConstructors()) {
             annos = c.getDeclaredAnnotations();
             System.out.println("  annotations on CTOR " + c + ":");
             printAnnotationArray("  ", annos);
@@ -65,7 +82,7 @@
             AnnoFancyField aff;
             aff = (AnnoFancyField) f.getAnnotation(AnnoFancyField.class);
             if (aff != null) {
-                System.out.println("    aff: " + aff + " / " + aff.getClass());
+                System.out.println("    aff: " + aff + " / " + Proxy.isProxyClass(aff.getClass()));
                 System.out.println("    --> nombre is '" + aff.nombre() + "'");
             }
         }
@@ -122,8 +139,7 @@
         final IntToString[] mapping;
 
         try {
-            meth = TestAnnotations.class.getMethod("getFocusType",
-                    (Class[])null);
+            meth = TestAnnotations.class.getMethod("getFocusType");
         } catch (NoSuchMethodException nsme) {
             throw new RuntimeException(nsme);
         }
@@ -142,7 +158,23 @@
         System.out.println("");
     }
 
-
+    public static void testVisibilityCompatibility() throws Exception {
+        if (!VMRuntime.isAndroid()) {
+            return;
+        }
+        Object runtime = VMRuntime.getRuntime();
+        int currentSdkVersion = VMRuntime.getTargetSdkVersion(runtime);
+        // SDK version 23 is M.
+        int oldSdkVersion = 23;
+        VMRuntime.setTargetSdkVersion(runtime, oldSdkVersion);
+        // This annotation has CLASS retention, but is visible to the runtime in M and earlier.
+        Annotation anno = SimplyNoted.class.getAnnotation(AnnoSimpleTypeInvis.class);
+        if (anno == null) {
+            System.out.println("testVisibilityCompatibility failed: " +
+                    "SimplyNoted.get(AnnoSimpleTypeInvis) should not be null");
+        }
+        VMRuntime.setTargetSdkVersion(runtime, currentSdkVersion);
+    }
 
     public static void main(String[] args) {
         System.out.println("TestAnnotations...");
@@ -168,6 +200,9 @@
         // this is expected to be non-null
         Annotation anno = SimplyNoted.class.getAnnotation(AnnoSimpleType.class);
         System.out.println("SimplyNoted.get(AnnoSimpleType) = " + anno);
+        // this is expected to be null
+        anno = SimplyNoted.class.getAnnotation(AnnoSimpleTypeInvis.class);
+        System.out.println("SimplyNoted.get(AnnoSimpleTypeInvis) = " + anno);
         // this is non-null if the @Inherited tag is present
         anno = SubNoted.class.getAnnotation(AnnoSimpleType.class);
         System.out.println("SubNoted.get(AnnoSimpleType) = " + anno);
@@ -199,5 +234,65 @@
         } catch (TypeNotPresentException expected) {
             System.out.println("Got expected TypeNotPresentException");
         }
+
+        // Test renamed enums.
+        try {
+            for (Method m: RenamedNoted.class.getDeclaredMethods()) {
+                Annotation[] annos = m.getDeclaredAnnotations();
+                System.out.println("  annotations on METH " + m + ":");
+            }
+        } catch (NoSuchFieldError expected) {
+            System.out.println("Got expected NoSuchFieldError");
+        }
+
+        // Test if annotations marked VISIBILITY_BUILD are visible to runtime in M and earlier.
+        try {
+            testVisibilityCompatibility();
+        } catch (Exception e) {
+            System.out.println("testVisibilityCompatibility failed: " + e);
+        }
+    }
+
+    private static class VMRuntime {
+        private static Class<?> vmRuntimeClass;
+        private static Method getRuntimeMethod;
+        private static Method getTargetSdkVersionMethod;
+        private static Method setTargetSdkVersionMethod;
+        static {
+            init();
+        }
+
+        private static void init() {
+            try {
+                vmRuntimeClass = Class.forName("dalvik.system.VMRuntime");
+            } catch (Exception e) {
+                return;
+            }
+            try {
+                getRuntimeMethod = vmRuntimeClass.getDeclaredMethod("getRuntime");
+                getTargetSdkVersionMethod =
+                        vmRuntimeClass.getDeclaredMethod("getTargetSdkVersion");
+                setTargetSdkVersionMethod =
+                        vmRuntimeClass.getDeclaredMethod("setTargetSdkVersion", Integer.TYPE);
+            } catch (Exception e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        public static boolean isAndroid() {
+            return vmRuntimeClass != null;
+        }
+
+        public static Object getRuntime() throws Exception {
+            return getRuntimeMethod.invoke(null);
+        }
+
+        public static int getTargetSdkVersion(Object runtime) throws Exception {
+            return (int) getTargetSdkVersionMethod.invoke(runtime);
+        }
+
+        public static void setTargetSdkVersion(Object runtime, int version) throws Exception {
+            setTargetSdkVersionMethod.invoke(runtime, version);
+        }
     }
 }
diff --git a/test/005-annotations/src2/android/test/anno/RenamedEnumClass.java b/test/005-annotations/src2/android/test/anno/RenamedEnumClass.java
new file mode 100644
index 0000000..5a2fe36
--- /dev/null
+++ b/test/005-annotations/src2/android/test/anno/RenamedEnumClass.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+import java.lang.annotation.*;
+
+@Target(ElementType.METHOD)
+@Retention(RetentionPolicy.RUNTIME)
+
+public @interface RenamedEnumClass {
+    enum RenamedEnum { FOOBAR };
+}
diff --git a/test/008-exceptions/expected.txt b/test/008-exceptions/expected.txt
index 92c79dc..083ecf7 100644
--- a/test/008-exceptions/expected.txt
+++ b/test/008-exceptions/expected.txt
@@ -1,12 +1,17 @@
 Got an NPE: second throw
 java.lang.NullPointerException: second throw
-	at Main.catchAndRethrow(Main.java:58)
-	at Main.exceptions_007(Main.java:41)
-	at Main.main(Main.java:49)
+	at Main.catchAndRethrow(Main.java:77)
+	at Main.exceptions_007(Main.java:59)
+	at Main.main(Main.java:67)
 Caused by: java.lang.NullPointerException: first throw
-	at Main.throwNullPointerException(Main.java:65)
-	at Main.catchAndRethrow(Main.java:55)
+	at Main.throwNullPointerException(Main.java:84)
+	at Main.catchAndRethrow(Main.java:74)
 	... 2 more
 Static Init
-BadError: This is bad by convention
-BadError: This is bad by convention
+BadError: This is bad by convention: BadInit
+java.lang.NoClassDefFoundError: BadInit
+BadError: This is bad by convention: BadInit
+Static BadInitNoStringInit
+BadErrorNoStringInit: This is bad by convention
+java.lang.NoClassDefFoundError: BadInitNoStringInit
+BadErrorNoStringInit: This is bad by convention
diff --git a/test/008-exceptions/src/Main.java b/test/008-exceptions/src/Main.java
index 7f6d0c5..b8231f1 100644
--- a/test/008-exceptions/src/Main.java
+++ b/test/008-exceptions/src/Main.java
@@ -14,20 +14,38 @@
  * limitations under the License.
  */
 
-// An exception that doesn't have a <init>(String) method.
+// An error class.
 class BadError extends Error {
-    public BadError() {
-        super("This is bad by convention");
+    public BadError(String s) {
+        super("This is bad by convention: " + s);
     }
 }
 
-// A class that throws BadException during static initialization.
+// A class that throws BadError during static initialization.
 class BadInit {
     static int dummy;
     static {
         System.out.println("Static Init");
         if (true) {
-            throw new BadError();
+            throw new BadError("BadInit");
+        }
+    }
+}
+
+// An error that doesn't have a <init>(String) method.
+class BadErrorNoStringInit extends Error {
+    public BadErrorNoStringInit() {
+        super("This is bad by convention");
+    }
+}
+
+// A class that throws BadErrorNoStringInit during static initialization.
+class BadInitNoStringInit {
+    static int dummy;
+    static {
+        System.out.println("Static BadInitNoStringInit");
+        if (true) {
+            throw new BadErrorNoStringInit();
         }
     }
 }
@@ -42,12 +60,13 @@
         } catch (NullPointerException npe) {
             System.out.print("Got an NPE: ");
             System.out.println(npe.getMessage());
-            npe.printStackTrace();
+            npe.printStackTrace(System.out);
         }
     }
     public static void main (String args[]) {
         exceptions_007();
         exceptionsRethrowClassInitFailure();
+        exceptionsRethrowClassInitFailureNoStringInit();
     }
 
     private static void catchAndRethrow() {
@@ -79,11 +98,35 @@
             try {
                 BadInit.dummy = 1;
                 throw new IllegalStateException("Should not reach here.");
-            } catch (BadError e) {
+            } catch (NoClassDefFoundError e) {
                 System.out.println(e);
+                System.out.println(e.getCause());
             }
         } catch (Exception error) {
-            error.printStackTrace();
+            error.printStackTrace(System.out);
+        }
+    }
+
+    private static void exceptionsRethrowClassInitFailureNoStringInit() {
+        try {
+            try {
+                BadInitNoStringInit.dummy = 1;
+                throw new IllegalStateException("Should not reach here.");
+            } catch (BadErrorNoStringInit e) {
+                System.out.println(e);
+            }
+
+            // Check if it works a second time.
+
+            try {
+                BadInitNoStringInit.dummy = 1;
+                throw new IllegalStateException("Should not reach here.");
+            } catch (NoClassDefFoundError e) {
+                System.out.println(e);
+                System.out.println(e.getCause());
+            }
+        } catch (Exception error) {
+            error.printStackTrace(System.out);
         }
     }
 }
diff --git a/test/011-array-copy/src/Main.java b/test/011-array-copy/src/Main.java
index 96e1dbf..d9b61e7 100644
--- a/test/011-array-copy/src/Main.java
+++ b/test/011-array-copy/src/Main.java
@@ -69,6 +69,11 @@
             array[i] = (long) i;
         }
     }
+    static void initCharArray(char[] array) {
+        for (int i = 0; i < ARRAY_SIZE; i++) {
+            array[i] = (char) i;
+        }
+    }
 
     /*
      * Perform an array copy operation on primitive arrays with different
@@ -79,16 +84,19 @@
         short[] shortArray = new short[ARRAY_SIZE];
         int[] intArray = new int[ARRAY_SIZE];
         long[] longArray = new long[ARRAY_SIZE];
+        char[] charArray = new char[ARRAY_SIZE];
 
         initByteArray(byteArray);
         initShortArray(shortArray);
         initIntArray(intArray);
         initLongArray(longArray);
+        initCharArray(charArray);
 
         System.arraycopy(byteArray, srcPos, byteArray, dstPos, length);
         System.arraycopy(shortArray, srcPos, shortArray, dstPos, length);
         System.arraycopy(intArray, srcPos, intArray, dstPos, length);
         System.arraycopy(longArray, srcPos, longArray, dstPos, length);
+        System.arraycopy(charArray, srcPos, charArray, dstPos, length);
 
         for (int i = 0; i < ARRAY_SIZE; i++) {
             if (intArray[i] != byteArray[i]) {
@@ -103,6 +111,10 @@
                 System.out.println("mismatch int vs long at " + i + " : " +
                     Arrays.toString(longArray));
                 break;
+            } else if (intArray[i] != charArray[i]) {
+                System.out.println("mismatch int vs char at " + i + " : " +
+                    Arrays.toString(charArray));
+                break;
             }
         }
 
diff --git a/test/020-string/expected.txt b/test/020-string/expected.txt
index 081fea3..83a0835 100644
--- a/test/020-string/expected.txt
+++ b/test/020-string/expected.txt
@@ -1,7 +1,13 @@
 testStr is 'This is a very nice string'
 This is a very nice string
-Compare result is 32
+Compare result is greater than zero
 Compare unicode: -65302
 Got expected exception
 subStr is 'uick brown fox jumps over the lazy '
 Indexes are: 0:-1:0:43:33:-1:18:13:13:-1:18:18:-1:13:-1:-1:-1
+Got expected exception
+Got expected exception
+Got expected exception
+Got expected exception
+Got expected exception
+llo And
diff --git a/test/020-string/src/Main.java b/test/020-string/src/Main.java
index b876e6a..ccf94aa 100644
--- a/test/020-string/src/Main.java
+++ b/test/020-string/src/Main.java
@@ -25,6 +25,7 @@
         basicTest();
         indexTest();
         constructorTest();
+        copyTest();
     }
 
     public static void basicTest() {
@@ -44,7 +45,14 @@
         if (testStr.length() != testStr2.length())
             System.out.println("WARNING: stringTest length mismatch");
 
-        System.out.println("Compare result is " + testStr.compareTo(testStr2));
+        int compareResult = testStr.compareTo(testStr2);
+        if (compareResult > 0) {
+          System.out.println("Compare result is greater than zero");
+        } else if (compareResult == 0) {
+          System.out.println("Compare result is equal to zero");
+        } else {
+          System.out.println("Compare result is less than zero");
+        }
 
         // expected: -65302
         String s1 = "\u0c6d\u0cb6\u0d00\u0000\u0080\u0080\u0080\u0000\u0002\u0002\u0002\u0000\u00e9\u00e9\u00e9";
@@ -117,4 +125,48 @@
         String s14 = new String(codePoints, 1, 3);
         String s15 = new String(stringBuilder);
     }
+
+    public static void copyTest() {
+        String src = new String("Hello Android");
+        char[] dst = new char[7];
+        char[] tmp = null;
+
+        try {
+            src.getChars(2, 9, tmp, 0);
+            System.out.println("GLITCH: expected exception");
+        } catch (NullPointerException npe) {
+            System.out.println("Got expected exception");
+        }
+
+        try {
+            src.getChars(-1, 9, dst, 0);
+            System.out.println("GLITCH: expected exception");
+        } catch (StringIndexOutOfBoundsException sioobe) {
+            System.out.println("Got expected exception");
+        }
+
+        try {
+            src.getChars(2, 19, dst, 0);
+            System.out.println("GLITCH: expected exception");
+        } catch (StringIndexOutOfBoundsException sioobe) {
+            System.out.println("Got expected exception");
+        }
+
+        try {
+            src.getChars(2, 1, dst, 0);
+            System.out.println("GLITCH: expected exception");
+        } catch (StringIndexOutOfBoundsException sioobe) {
+            System.out.println("Got expected exception");
+        }
+
+        try {
+            src.getChars(2, 10, dst, 0);
+            System.out.println("GLITCH: expected exception");
+        } catch (ArrayIndexOutOfBoundsException aioobe) {
+            System.out.println("Got expected exception");
+        }
+
+        src.getChars(2, 9, dst, 0);
+        System.out.println(new String(dst));
+    }
 }
diff --git a/test/021-string2/src/Main.java b/test/021-string2/src/Main.java
index 0226614..d1ea0b1 100644
--- a/test/021-string2/src/Main.java
+++ b/test/021-string2/src/Main.java
@@ -85,7 +85,7 @@
         Assert.assertEquals("this is a path", test.replaceAll("/", " "));
         Assert.assertEquals("this is a path", test.replace("/", " "));
 
-        Class Strings = Class.forName("com.android.org.bouncycastle.util.Strings");
+        Class<?> Strings = Class.forName("com.android.org.bouncycastle.util.Strings");
         Method fromUTF8ByteArray = Strings.getDeclaredMethod("fromUTF8ByteArray", byte[].class);
         String result = (String) fromUTF8ByteArray.invoke(null, new byte[] {'O', 'K'});
         System.out.println(result);
diff --git a/test/022-interface/build b/test/022-interface/build
index 3f8915c..5cfc7f2 100644
--- a/test/022-interface/build
+++ b/test/022-interface/build
@@ -20,8 +20,8 @@
 # Use classes that are compiled with ecj that exposes an invokeinterface
 # issue when interfaces override methods in Object
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   ${DX} --debug --dex --dump-to=classes.lst --output=classes.dex classes
 fi
diff --git a/test/031-class-attributes/src/ClassAttrs.java b/test/031-class-attributes/src/ClassAttrs.java
index c2e41c5..346e13d 100644
--- a/test/031-class-attributes/src/ClassAttrs.java
+++ b/test/031-class-attributes/src/ClassAttrs.java
@@ -1,6 +1,7 @@
 import otherpackage.OtherPackageClass;
 
 import java.io.Serializable;
+import java.lang.reflect.AbstractMethod;
 import java.lang.reflect.AccessibleObject;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Field;
@@ -117,14 +118,13 @@
         printClassAttrs(FancyClass.class);
 
         try {
-            Constructor cons;
-            cons = MemberClass.class.getConstructor(
-                    new Class[] { MemberClass.class });
+            Constructor<?> cons;
+            cons = MemberClass.class.getConstructor(MemberClass.class);
             System.out.println("constructor signature: "
                     + getSignatureAttribute(cons));
 
             Method meth;
-            meth = MemberClass.class.getMethod("foo", (Class[]) null);
+            meth = MemberClass.class.getMethod("foo");
             System.out.println("method signature: "
                     + getSignatureAttribute(meth));
 
@@ -221,8 +221,11 @@
     public static String getSignatureAttribute(Object obj) {
         Method method;
         try {
-            Class c = Class.forName("libcore.reflect.AnnotationAccess");
-            method = c.getDeclaredMethod("getSignature", java.lang.reflect.AnnotatedElement.class);
+            Class<?> c = obj.getClass();
+            if (c == Method.class || c == Constructor.class) {
+              c = AbstractMethod.class;
+            }
+            method = c.getDeclaredMethod("getSignatureAttribute");
             method.setAccessible(true);
         } catch (Exception ex) {
             ex.printStackTrace();
@@ -230,7 +233,7 @@
         }
 
         try {
-            return (String) method.invoke(null, obj);
+            return (String) method.invoke(obj);
         } catch (IllegalAccessException ex) {
             throw new RuntimeException(ex);
         } catch (InvocationTargetException ex) {
@@ -259,9 +262,7 @@
     /*
      * Dump a variety of class attributes.
      */
-    public static void printClassAttrs(Class clazz) {
-        Class clazz2;
-
+    public static <T> void printClassAttrs(Class<T> clazz) {
         System.out.println("***** " + clazz + ":");
 
         System.out.println("  name: "
@@ -317,7 +318,7 @@
         System.out.println("  genericInterfaces: "
             + stringifyTypeArray(clazz.getGenericInterfaces()));
 
-        TypeVariable<Class<?>>[] typeParameters = clazz.getTypeParameters();
+        TypeVariable<Class<T>>[] typeParameters = clazz.getTypeParameters();
         System.out.println("  typeParameters: "
             + stringifyTypeArray(typeParameters));
     }
diff --git a/test/032-concrete-sub/src/ConcreteSub.java b/test/032-concrete-sub/src/ConcreteSub.java
index 083f25d..95adf63 100644
--- a/test/032-concrete-sub/src/ConcreteSub.java
+++ b/test/032-concrete-sub/src/ConcreteSub.java
@@ -37,13 +37,13 @@
         /*
          * Check reflection stuff.
          */
-        Class absClass = AbstractBase.class;
+        Class<?> absClass = AbstractBase.class;
         Method meth;
 
         System.out.println("class modifiers=" + absClass.getModifiers());
 
         try {
-            meth = absClass.getMethod("redefineMe", (Class[]) null);
+            meth = absClass.getMethod("redefineMe");
         } catch (NoSuchMethodException nsme) {
             nsme.printStackTrace();
             return;
diff --git a/test/033-class-init-deadlock/expected.txt b/test/033-class-init-deadlock/expected.txt
index 182d0da..9e843a0 100644
--- a/test/033-class-init-deadlock/expected.txt
+++ b/test/033-class-init-deadlock/expected.txt
@@ -1,6 +1,4 @@
 Deadlock test starting.
-A initializing...
-B initializing...
 Deadlock test interrupting threads.
 Deadlock test main thread bailing.
 A initialized: false
diff --git a/test/033-class-init-deadlock/src/Main.java b/test/033-class-init-deadlock/src/Main.java
index 3233230..bd4d4ab 100644
--- a/test/033-class-init-deadlock/src/Main.java
+++ b/test/033-class-init-deadlock/src/Main.java
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+import java.util.concurrent.CyclicBarrier;
+
 /**
  * This causes most VMs to lock up.
  *
@@ -23,6 +25,8 @@
     public static boolean aInitialized = false;
     public static boolean bInitialized = false;
 
+    public static CyclicBarrier barrier = new CyclicBarrier(3);
+
     static public void main(String[] args) {
         Thread thread1, thread2;
 
@@ -30,10 +34,10 @@
         thread1 = new Thread() { public void run() { new A(); } };
         thread2 = new Thread() { public void run() { new B(); } };
         thread1.start();
-        // Give thread1 a chance to start before starting thread2.
-        try { Thread.sleep(1000); } catch (InterruptedException ie) { }
         thread2.start();
 
+        // Not expecting any exceptions, so print them out if we get them.
+        try { barrier.await(); } catch (Exception e) { System.out.println(e); }
         try { Thread.sleep(6000); } catch (InterruptedException ie) { }
 
         System.out.println("Deadlock test interrupting threads.");
@@ -48,8 +52,8 @@
 
 class A {
     static {
-        System.out.println("A initializing...");
-        try { Thread.sleep(3000); } catch (InterruptedException ie) { }
+        // Not expecting any exceptions, so print them out if we get them.
+        try { Main.barrier.await(); } catch (Exception e) { System.out.println(e); }
         new B();
         System.out.println("A initialized");
         Main.aInitialized = true;
@@ -58,8 +62,8 @@
 
 class B {
     static {
-        System.out.println("B initializing...");
-        try { Thread.sleep(3000); } catch (InterruptedException ie) { }
+        // Not expecting any exceptions, so print them out if we get them.
+        try { Main.barrier.await(); } catch (Exception e) { System.out.println(e); }
         new A();
         System.out.println("B initialized");
         Main.bInitialized = true;
diff --git a/test/034-call-null/expected.txt b/test/034-call-null/expected.txt
index 343226f..4e0281e 100644
--- a/test/034-call-null/expected.txt
+++ b/test/034-call-null/expected.txt
@@ -1,2 +1,2 @@
-java.lang.NullPointerException: Attempt to invoke direct method 'void Main.doStuff(int, int[][], java.lang.String, java.lang.String[][])' on a null object reference
+Exception in thread "main" java.lang.NullPointerException: Attempt to invoke direct method 'void Main.doStuff(int, int[][], java.lang.String, java.lang.String[][])' on a null object reference
 	at Main.main(Main.java:26)
diff --git a/test/038-inner-null/expected.txt b/test/038-inner-null/expected.txt
index ba411f0..2e92564 100644
--- a/test/038-inner-null/expected.txt
+++ b/test/038-inner-null/expected.txt
@@ -1,4 +1,4 @@
 new Special()
-java.lang.NullPointerException: Attempt to invoke virtual method 'void Main$Blort.repaint()' on a null object reference
+Exception in thread "main" java.lang.NullPointerException: Attempt to invoke virtual method 'void Main$Blort.repaint()' on a null object reference
 	at Main$Special.callInner(Main.java:31)
 	at Main.main(Main.java:20)
diff --git a/test/042-new-instance/expected.txt b/test/042-new-instance/expected.txt
index 7d843d1..c5de313 100644
--- a/test/042-new-instance/expected.txt
+++ b/test/042-new-instance/expected.txt
@@ -9,3 +9,4 @@
 Cons got expected PackageAccess complaint
 Cons got expected InstantationException
 Cons got expected PackageAccess2 complaint
+Cons ConstructorAccess succeeded
diff --git a/test/042-new-instance/src/Main.java b/test/042-new-instance/src/Main.java
index b0a5fd4..755d62e 100644
--- a/test/042-new-instance/src/Main.java
+++ b/test/042-new-instance/src/Main.java
@@ -33,7 +33,7 @@
     static void testClassNewInstance() {
         // should succeed
         try {
-            Class c = Class.forName("LocalClass");
+            Class<?> c = Class.forName("LocalClass");
             Object obj = c.newInstance();
             System.out.println("LocalClass succeeded");
         } catch (Exception ex) {
@@ -43,7 +43,7 @@
 
         // should fail
         try {
-            Class c = Class.forName("otherpackage.PackageAccess");
+            Class<?> c = Class.forName("otherpackage.PackageAccess");
             Object obj = c.newInstance();
             System.err.println("ERROR: PackageAccess succeeded unexpectedly");
         } catch (IllegalAccessException iae) {
@@ -71,8 +71,8 @@
     static void testConstructorNewInstance() {
         // should fail -- getConstructor only returns public constructors
         try {
-            Class c = Class.forName("LocalClass");
-            Constructor cons = c.getConstructor(new Class[0] /*(Class[])null*/);
+            Class<?> c = Class.forName("LocalClass");
+            Constructor<?> cons = c.getConstructor();
             System.err.println("Cons LocalClass succeeded unexpectedly");
         } catch (NoSuchMethodException nsme) {
             System.out.println("Cons LocalClass failed as expected");
@@ -83,8 +83,8 @@
 
         // should succeed
         try {
-            Class c = Class.forName("LocalClass2");
-            Constructor cons = c.getConstructor((Class[]) null);
+            Class<?> c = Class.forName("LocalClass2");
+            Constructor<?> cons = c.getConstructor();
             Object obj = cons.newInstance();
             System.out.println("Cons LocalClass2 succeeded");
         } catch (Exception ex) {
@@ -94,8 +94,8 @@
 
         // should succeed
         try {
-            Class c = Class.forName("Main$InnerClass");
-            Constructor cons = c.getDeclaredConstructor(new Class<?>[]{Main.class});
+            Class<?> c = Class.forName("Main$InnerClass");
+            Constructor<?> cons = c.getDeclaredConstructor(Main.class);
             Object obj = cons.newInstance(new Main());
             System.out.println("Cons InnerClass succeeded");
         } catch (Exception ex) {
@@ -105,8 +105,8 @@
 
         // should succeed
         try {
-            Class c = Class.forName("Main$StaticInnerClass");
-            Constructor cons = c.getDeclaredConstructor((Class[]) null);
+            Class<?> c = Class.forName("Main$StaticInnerClass");
+            Constructor<?> cons = c.getDeclaredConstructor();
             Object obj = cons.newInstance();
             System.out.println("Cons StaticInnerClass succeeded");
         } catch (Exception ex) {
@@ -116,8 +116,8 @@
 
         // should fail
         try {
-            Class c = Class.forName("otherpackage.PackageAccess");
-            Constructor cons = c.getConstructor(new Class[0] /*(Class[])null*/);
+            Class<?> c = Class.forName("otherpackage.PackageAccess");
+            Constructor<?> cons = c.getConstructor();
             System.err.println("ERROR: Cons PackageAccess succeeded unexpectedly");
         } catch (NoSuchMethodException nsme) {
             // constructor isn't public
@@ -129,8 +129,8 @@
 
         // should fail
         try {
-            Class c = Class.forName("MaybeAbstract");
-            Constructor cons = c.getConstructor(new Class[0] /*(Class[])null*/);
+            Class<?> c = Class.forName("MaybeAbstract");
+            Constructor<?> cons = c.getConstructor();
             Object obj = cons.newInstance();
             System.err.println("ERROR: Cons MaybeAbstract succeeded unexpectedly");
         } catch (InstantiationException ie) {
@@ -143,8 +143,8 @@
 
         // should fail
         try {
-            Class c = Class.forName("otherpackage.PackageAccess2");
-            Constructor cons = c.getConstructor((Class[]) null);
+            Class<?> c = Class.forName("otherpackage.PackageAccess2");
+            Constructor<?> cons = c.getConstructor();
             if (!FULL_ACCESS_CHECKS) { throw new IllegalAccessException(); }
             Object obj = cons.newInstance();
             System.err.println("ERROR: Cons PackageAccess2 succeeded unexpectedly");
@@ -156,6 +156,14 @@
             ex.printStackTrace();
         }
 
+        // should succeed
+        try {
+            otherpackage.ConstructorAccess.newConstructorInstance();
+            System.out.println("Cons ConstructorAccess succeeded");
+        } catch (Exception ex) {
+            System.err.println("Cons ConstructorAccess failed");
+            ex.printStackTrace();
+        }
     }
 
     class InnerClass {
@@ -173,7 +181,6 @@
     public LocalClass2() {}
 }
 
-
 class LocalClass3 {
     public static void main() {
         try {
@@ -190,7 +197,7 @@
 
         static Object newInstance() {
             try {
-                Class c = CC.class;
+                Class<?> c = CC.class;
                 return c.newInstance();
             } catch (Exception ex) {
                 ex.printStackTrace();
diff --git a/test/042-new-instance/src/otherpackage/ConstructorAccess.java b/test/042-new-instance/src/otherpackage/ConstructorAccess.java
new file mode 100644
index 0000000..79d572c
--- /dev/null
+++ b/test/042-new-instance/src/otherpackage/ConstructorAccess.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package otherpackage;
+
+import java.lang.reflect.Constructor;
+
+public class ConstructorAccess {
+
+    static class Inner {
+      Inner() {}
+    }
+
+    // Test for regression in b/25817515. Inner class constructor should
+    // be accessible from this static method, but if we over-shoot and check
+    // accessibility using the frame below (in Main class), we will see an
+    // IllegalAccessException from #newInstance
+    static public void newConstructorInstance() throws Exception {
+      Class<?> c = Inner.class;
+      Constructor cons = c.getDeclaredConstructor();
+      Object obj = cons.newInstance();
+    }
+}
diff --git a/test/044-proxy/expected.txt b/test/044-proxy/expected.txt
index 052c8fa..2a5f0b9 100644
--- a/test/044-proxy/expected.txt
+++ b/test/044-proxy/expected.txt
@@ -42,7 +42,7 @@
  (no args)
 --- blob
 Success: method blob res=mix
-$Proxy1.getTrace null:-1
+$PROXY_CLASS_NAME0$.getTrace null:-1
 Invoke public abstract void Shapes.upChuck()
  (no args)
 Got expected ioobe
@@ -51,7 +51,7 @@
 Got expected ie
 
 Proxy interfaces: [interface Quads, interface Colors, interface Trace]
-Proxy methods: [public final java.lang.String $Proxy1.blob(), public final double $Proxy1.blue(int), public final R0a $Proxy1.checkMe(), public final R0aa $Proxy1.checkMe(), public final R0base $Proxy1.checkMe(), public final void $Proxy1.circle(int), public final boolean $Proxy1.equals(java.lang.Object), public final void $Proxy1.getTrace(), public final int $Proxy1.green(double), public final int $Proxy1.hashCode(), public final int $Proxy1.mauve(java.lang.String), public final int $Proxy1.rectangle(int,int), public final int $Proxy1.red(float), public final int $Proxy1.square(int,int), public final java.lang.String $Proxy1.toString(), public final int $Proxy1.trapezoid(int,double,int), public final void $Proxy1.upCheck() throws java.lang.InterruptedException, public final void $Proxy1.upChuck()]
+Proxy methods: [public final java.lang.String $PROXY_CLASS_NAME0$.blob(), public final double $PROXY_CLASS_NAME0$.blue(int), public final R0a $PROXY_CLASS_NAME0$.checkMe(), public final R0aa $PROXY_CLASS_NAME0$.checkMe(), public final R0base $PROXY_CLASS_NAME0$.checkMe(), public final void $PROXY_CLASS_NAME0$.circle(int), public final boolean $PROXY_CLASS_NAME0$.equals(java.lang.Object), public final void $PROXY_CLASS_NAME0$.getTrace(), public final int $PROXY_CLASS_NAME0$.green(double), public final int $PROXY_CLASS_NAME0$.hashCode(), public final int $PROXY_CLASS_NAME0$.mauve(java.lang.String), public final int $PROXY_CLASS_NAME0$.rectangle(int,int), public final int $PROXY_CLASS_NAME0$.red(float), public final int $PROXY_CLASS_NAME0$.square(int,int), public final java.lang.String $PROXY_CLASS_NAME0$.toString(), public final int $PROXY_CLASS_NAME0$.trapezoid(int,double,int), public final void $PROXY_CLASS_NAME0$.upCheck() throws java.lang.InterruptedException, public final void $PROXY_CLASS_NAME0$.upChuck()]
 Decl annos: []
 Param annos (0) : []
 Modifiers: 17
@@ -84,7 +84,7 @@
 Invoke public abstract void InterfaceW1.bothThrowBase() throws BaseException,SubException,SubSubException
  (no args)
 Got expected exception
-Proxy methods: [public final boolean $Proxy3.equals(java.lang.Object), public final java.lang.Object $Proxy3.foo(), public final java.lang.String $Proxy3.foo(), public final int $Proxy3.hashCode(), public final java.lang.String $Proxy3.toString()]
+Proxy methods: [public final boolean $PROXY_CLASS_NAME1$.equals(java.lang.Object), public final java.lang.Object $PROXY_CLASS_NAME1$.foo(), public final java.lang.String $PROXY_CLASS_NAME1$.foo(), public final int $PROXY_CLASS_NAME1$.hashCode(), public final java.lang.String $PROXY_CLASS_NAME1$.toString()]
 Invocation of public abstract java.lang.String NarrowingTest$I2.foo()
 Invoking foo using I2 type: hello
 Invocation of public abstract java.lang.Object NarrowingTest$I1.foo()
@@ -95,3 +95,5 @@
 5.8
 JNI_OnLoad called
 callback
+Found constructor.
+Found constructors with 0 exceptions
diff --git a/test/044-proxy/src/BasicTest.java b/test/044-proxy/src/BasicTest.java
index 1573297..5f04b93 100644
--- a/test/044-proxy/src/BasicTest.java
+++ b/test/044-proxy/src/BasicTest.java
@@ -84,7 +84,8 @@
         });
         System.out.println("Proxy interfaces: " +
             Arrays.deepToString(proxy.getClass().getInterfaces()));
-        System.out.println("Proxy methods: " + Arrays.deepToString(methods));
+        System.out.println("Proxy methods: " +
+            Main.replaceProxyClassNamesForOutput(Arrays.deepToString(methods)));
         Method meth = methods[methods.length -1];
         System.out.println("Decl annos: " + Arrays.deepToString(meth.getDeclaredAnnotations()));
         Annotation[][] paramAnnos = meth.getParameterAnnotations();
@@ -98,17 +99,16 @@
         InvocationHandler handler = new MyInvocationHandler(proxyMe);
 
         /* create the proxy class */
-        Class proxyClass = Proxy.getProxyClass(Shapes.class.getClassLoader(),
-                            new Class[] { Quads.class, Colors.class, Trace.class });
+        Class<?> proxyClass = Proxy.getProxyClass(Shapes.class.getClassLoader(),
+                Quads.class, Colors.class, Trace.class);
+        Main.registerProxyClassName(proxyClass.getCanonicalName());
 
         /* create a proxy object, passing the handler object in */
         Object proxy = null;
         try {
-            Constructor<Class> cons;
-            cons = proxyClass.getConstructor(
-                            new Class[] { InvocationHandler.class });
+            Constructor<?> cons = proxyClass.getConstructor(InvocationHandler.class);
             //System.out.println("Constructor is " + cons);
-            proxy = cons.newInstance(new Object[] { handler });
+            proxy = cons.newInstance(handler);
         } catch (NoSuchMethodException nsme) {
             System.err.println("failed: " + nsme);
         } catch (InstantiationException ie) {
@@ -262,7 +262,8 @@
             for (int i = 0; i < stackTrace.length; i++) {
                 StackTraceElement ste = stackTrace[i];
                 if (ste.getMethodName().equals("getTrace")) {
-                  System.out.println(ste.getClassName() + "." + ste.getMethodName() + " " +
+                  String outputClassName = Main.replaceProxyClassNamesForOutput(ste.getClassName());
+                  System.out.println(outputClassName + "." + ste.getMethodName() + " " +
                                      ste.getFileName() + ":" + ste.getLineNumber());
                 }
             }
@@ -276,7 +277,8 @@
             for (int i = 0; i < stackTrace.length; i++) {
                 StackTraceElement ste = stackTrace[i];
                 if (ste.getMethodName().equals("getTrace")) {
-                  System.out.println(ste.getClassName() + "." + ste.getMethodName() + " " +
+                  String outputClassName = Main.replaceProxyClassNamesForOutput(ste.getClassName());
+                  System.out.println(outputClassName + "." + ste.getMethodName() + " " +
                                      ste.getFileName() + ":" + ste.getLineNumber());
                 }
             }
diff --git a/test/044-proxy/src/Clash.java b/test/044-proxy/src/Clash.java
index adeffdc..d000112 100644
--- a/test/044-proxy/src/Clash.java
+++ b/test/044-proxy/src/Clash.java
@@ -30,7 +30,7 @@
         /* try passing in the same interface twice */
         try {
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
-                new Class[] { Interface1A.class, Interface1A.class },
+                new Class<?>[] { Interface1A.class, Interface1A.class },
                 handler);
             System.err.println("Dupe did not throw expected exception");
         } catch (IllegalArgumentException iae) {
@@ -39,7 +39,7 @@
 
         try {
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
-                new Class[] { Interface1A.class, Interface1B.class },
+                new Class<?>[] { Interface1A.class, Interface1B.class },
                 handler);
             System.err.println("Clash did not throw expected exception");
         } catch (IllegalArgumentException iae) {
diff --git a/test/044-proxy/src/Clash2.java b/test/044-proxy/src/Clash2.java
index 2a384f4..e405cfe 100644
--- a/test/044-proxy/src/Clash2.java
+++ b/test/044-proxy/src/Clash2.java
@@ -29,7 +29,7 @@
 
         try {
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
-                new Class[] { Interface2A.class, Interface2B.class },
+                new Class<?>[] { Interface2A.class, Interface2B.class },
                 handler);
             System.err.println("Clash2 did not throw expected exception");
         } catch (IllegalArgumentException iae) {
diff --git a/test/044-proxy/src/Clash3.java b/test/044-proxy/src/Clash3.java
index 6d6f2f2..44806ce 100644
--- a/test/044-proxy/src/Clash3.java
+++ b/test/044-proxy/src/Clash3.java
@@ -29,7 +29,7 @@
 
         try {
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
-                new Class[] {
+                new Class<?>[] {
                     Interface3a.class,
                     Interface3base.class,
                     Interface3aa.class,
diff --git a/test/044-proxy/src/Clash4.java b/test/044-proxy/src/Clash4.java
index 1bfb37f..ca5c3ab 100644
--- a/test/044-proxy/src/Clash4.java
+++ b/test/044-proxy/src/Clash4.java
@@ -29,7 +29,7 @@
 
         try {
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
-                new Class[] {
+                new Class<?>[] {
                     Interface4a.class,
                     Interface4aa.class,
                     Interface4base.class,
diff --git a/test/044-proxy/src/ConstructorProxy.java b/test/044-proxy/src/ConstructorProxy.java
new file mode 100644
index 0000000..95d150c
--- /dev/null
+++ b/test/044-proxy/src/ConstructorProxy.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
+
+/**
+ * Tests proxies when used with constructor methods.
+ */
+class ConstructorProxy implements InvocationHandler {
+  public static void main() {
+    try {
+      new ConstructorProxy().runTest();
+    } catch (Exception e) {
+      System.out.println("Unexpected failure occured");
+      e.printStackTrace();
+    }
+  }
+
+  public void runTest() throws Exception {
+    Class<?> proxyClass = Proxy.getProxyClass(
+            getClass().getClassLoader(),
+            new Class<?>[] { Runnable.class }
+    );
+    Constructor<?> constructor = proxyClass.getConstructor(InvocationHandler.class);
+    System.out.println("Found constructor.");
+    // We used to crash when asking the exception types of the constructor, because the runtime was
+    // not using the non-proxy ArtMethod
+    Object[] exceptions = constructor.getExceptionTypes();
+    System.out.println("Found constructors with " + exceptions.length + " exceptions");
+  }
+
+  @Override
+  public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
+    return args[0];
+  }
+}
+
diff --git a/test/044-proxy/src/FloatSelect.java b/test/044-proxy/src/FloatSelect.java
index febe697..217ccaf 100644
--- a/test/044-proxy/src/FloatSelect.java
+++ b/test/044-proxy/src/FloatSelect.java
@@ -34,7 +34,7 @@
     public static void main(String[] args) {
         FloatSelectI proxyObject = (FloatSelectI) Proxy.newProxyInstance(
             FloatSelectI.class.getClassLoader(),
-            new Class[] { FloatSelectI.class },
+            new Class<?>[] { FloatSelectI.class },
             new FloatSelectIInvoke1());
 
         float floatResult = proxyObject.method(2.1f, 5.8f);
diff --git a/test/044-proxy/src/Main.java b/test/044-proxy/src/Main.java
index 05e8e5b..9dadb7c 100644
--- a/test/044-proxy/src/Main.java
+++ b/test/044-proxy/src/Main.java
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+import java.util.HashMap;
+
 /**
  * Test java.lang.reflect.Proxy
  */
@@ -29,5 +31,26 @@
         NarrowingTest.main(null);
         FloatSelect.main(null);
         NativeProxy.main(args);
+        ConstructorProxy.main();
     }
+
+    // The following code maps from the actual proxy class names (eg $Proxy2) to their test output
+    // names (eg $PROXY_CLASS_NAME1$). This is to avoid the flaky test failures due to potentially
+    // undeterministic proxy class naming.
+
+    public static void registerProxyClassName(String proxyClassName) {
+        proxyClassNameMap.put(proxyClassName,
+                              "$PROXY_CLASS_NAME" + (uniqueTestProxyClassNum++) + "$");
+    }
+
+    public static String replaceProxyClassNamesForOutput(String str) {
+        for (String key : proxyClassNameMap.keySet()) {
+            str = str.replace(key, proxyClassNameMap.get(key));
+        }
+        return str;
+    }
+
+    private static final HashMap<String, String> proxyClassNameMap = new HashMap<String, String>();
+
+    private static int uniqueTestProxyClassNum = 0;
 }
diff --git a/test/044-proxy/src/NarrowingTest.java b/test/044-proxy/src/NarrowingTest.java
index 3b94b76..5b80d72 100644
--- a/test/044-proxy/src/NarrowingTest.java
+++ b/test/044-proxy/src/NarrowingTest.java
@@ -45,9 +45,11 @@
                        }
                    }
                });
+       Main.registerProxyClassName(proxy.getClass().getCanonicalName());
 
        Method[] methods = proxy.getClass().getDeclaredMethods();
-       System.out.println("Proxy methods: " + Arrays.deepToString(methods));
+       System.out.println("Proxy methods: " +
+                          Main.replaceProxyClassNamesForOutput(Arrays.deepToString(methods)));
 
        System.out.println("Invoking foo using I2 type: " + proxy.foo());
 
diff --git a/test/044-proxy/src/NativeProxy.java b/test/044-proxy/src/NativeProxy.java
index b425da8..c609dc2 100644
--- a/test/044-proxy/src/NativeProxy.java
+++ b/test/044-proxy/src/NativeProxy.java
@@ -40,7 +40,7 @@
         try {
             NativeInterface inf = (NativeInterface)Proxy.newProxyInstance(
                     NativeProxy.class.getClassLoader(),
-                    new Class[] { NativeInterface.class },
+                    new Class<?>[] { NativeInterface.class },
                     new NativeInvocationHandler());
 
             nativeCall(inf);
diff --git a/test/044-proxy/src/ReturnsAndArgPassing.java b/test/044-proxy/src/ReturnsAndArgPassing.java
index 225cc5b..3d8ebf0 100644
--- a/test/044-proxy/src/ReturnsAndArgPassing.java
+++ b/test/044-proxy/src/ReturnsAndArgPassing.java
@@ -98,7 +98,7 @@
     MyInvocationHandler myHandler = new MyInvocationHandler();
     MyInterface proxyMyInterface =
         (MyInterface)Proxy.newProxyInstance(ReturnsAndArgPassing.class.getClassLoader(),
-                                            new Class[] { MyInterface.class },
+                                            new Class<?>[] { MyInterface.class },
                                             myHandler);
     check(fooInvocations == 0);
     proxyMyInterface.voidFoo();
@@ -441,7 +441,7 @@
     MyInvocationHandler myHandler = new MyInvocationHandler();
     MyInterface proxyMyInterface =
         (MyInterface)Proxy.newProxyInstance(ReturnsAndArgPassing.class.getClassLoader(),
-                                            new Class[] { MyInterface.class },
+                                            new Class<?>[] { MyInterface.class },
                                             myHandler);
 
     check((Integer)proxyMyInterface.selectArg(0, Integer.MAX_VALUE, Long.MAX_VALUE,
diff --git a/test/044-proxy/src/WrappedThrow.java b/test/044-proxy/src/WrappedThrow.java
index 27ae84e..643ba05 100644
--- a/test/044-proxy/src/WrappedThrow.java
+++ b/test/044-proxy/src/WrappedThrow.java
@@ -32,7 +32,7 @@
 
         try {
             proxy = Proxy.newProxyInstance(WrappedThrow.class.getClassLoader(),
-                new Class[] { InterfaceW1.class, InterfaceW2.class },
+                new Class<?>[] { InterfaceW1.class, InterfaceW2.class },
                 handler);
         } catch (IllegalArgumentException iae) {
             System.out.println("WT init failed");
diff --git a/test/046-reflect/expected.txt b/test/046-reflect/expected.txt
index d657d44..06932b9 100644
--- a/test/046-reflect/expected.txt
+++ b/test/046-reflect/expected.txt
@@ -96,8 +96,8 @@
 got expected exception for Constructor.newInstance
 ReflectTest done!
 public method
-static java.lang.Object java.util.Collections.checkType(java.lang.Object,java.lang.Class) accessible=false
-static java.lang.Object java.util.Collections.checkType(java.lang.Object,java.lang.Class) accessible=true
+private static void java.util.Collections.swap(java.lang.Object[],int,int) accessible=false
+private static void java.util.Collections.swap(java.lang.Object[],int,int) accessible=true
 checkType invoking null
 checkType got expected exception
 calling const-class FieldNoisyInitUser.class
diff --git a/test/046-reflect/src/Main.java b/test/046-reflect/src/Main.java
index 0c90109..10dad8d 100644
--- a/test/046-reflect/src/Main.java
+++ b/test/046-reflect/src/Main.java
@@ -32,7 +32,7 @@
     public Main(ArrayList<Integer> stuff) {}
 
     void printMethodInfo(Method meth) {
-        Class[] params, exceptions;
+        Class<?>[] params, exceptions;
         int i;
 
         System.out.println("Method name is " + meth.getName());
@@ -62,7 +62,7 @@
     private void showStrings(Target instance)
         throws NoSuchFieldException, IllegalAccessException {
 
-        Class target = Target.class;
+        Class<?> target = Target.class;
         String one, two, three, four;
         Field field = null;
 
@@ -80,15 +80,15 @@
 
     public static void checkAccess() {
         try {
-            Class target = otherpackage.Other.class;
+            Class<?> target = otherpackage.Other.class;
             Object instance = new otherpackage.Other();
             Method meth;
 
-            meth = target.getMethod("publicMethod", (Class[]) null);
+            meth = target.getMethod("publicMethod");
             meth.invoke(instance);
 
             try {
-                meth = target.getMethod("packageMethod", (Class[]) null);
+                meth = target.getMethod("packageMethod");
                 System.err.println("succeeded on package-scope method");
             } catch (NoSuchMethodException nsme) {
                 // good
@@ -97,7 +97,7 @@
 
             instance = otherpackage.Other.getInnerClassInstance();
             target = instance.getClass();
-            meth = target.getMethod("innerMethod", (Class[]) null);
+            meth = target.getMethod("innerMethod");
             try {
                 if (!FULL_ACCESS_CHECKS) { throw new IllegalAccessException(); }
                 meth.invoke(instance);
@@ -121,26 +121,25 @@
     }
 
     public void run() {
-        Class target = Target.class;
+        Class<Target> target = Target.class;
         Method meth = null;
         Field field = null;
         boolean excep;
 
         try {
-            meth = target.getMethod("myMethod", new Class[] { int.class });
+            meth = target.getMethod("myMethod", int.class);
 
             if (meth.getDeclaringClass() != target)
                 throw new RuntimeException();
             printMethodInfo(meth);
 
-            meth = target.getMethod("myMethod", new Class[] { float.class });
+            meth = target.getMethod("myMethod", float.class);
             printMethodInfo(meth);
 
-            meth = target.getMethod("myNoargMethod", (Class[]) null);
+            meth = target.getMethod("myNoargMethod");
             printMethodInfo(meth);
 
-            meth = target.getMethod("myMethod",
-                new Class[] { String[].class, float.class, char.class });
+            meth = target.getMethod("myMethod", String[].class, float.class, char.class);
             printMethodInfo(meth);
 
             Target instance = new Target();
@@ -157,11 +156,11 @@
             System.out.println("Result of invoke: " + boxval.intValue());
 
             System.out.println("Calling no-arg void-return method");
-            meth = target.getMethod("myNoargMethod", (Class[]) null);
+            meth = target.getMethod("myNoargMethod");
             meth.invoke(instance, (Object[]) null);
 
             /* try invoking a method that throws an exception */
-            meth = target.getMethod("throwingMethod", (Class[]) null);
+            meth = target.getMethod("throwingMethod");
             try {
                 meth.invoke(instance, (Object[]) null);
                 System.out.println("GLITCH: didn't throw");
@@ -372,7 +371,7 @@
             Target targ;
             Object[] args;
 
-            cons = target.getConstructor(new Class[] { int.class,float.class });
+            cons = target.getConstructor(int.class, float.class);
             args = new Object[] { new Integer(7), new Float(3.3333) };
             System.out.println("cons modifiers=" + cons.getModifiers());
             targ = cons.newInstance(args);
@@ -407,12 +406,13 @@
         System.out.println("ReflectTest done!");
     }
 
-    public static void checkType() {
+    public static void checkSwap() {
         Method m;
 
+        final Object[] objects = new Object[2];
         try {
-            m = Collections.class.getDeclaredMethod("checkType",
-                            Object.class, Class.class);
+            m = Collections.class.getDeclaredMethod("swap",
+                            Object[].class, int.class, int.class);
         } catch (NoSuchMethodException nsme) {
             nsme.printStackTrace();
             return;
@@ -421,7 +421,7 @@
         m.setAccessible(true);
         System.out.println(m + " accessible=" + m.isAccessible());
         try {
-            m.invoke(null, new Object(), Object.class);
+            m.invoke(null, objects, 0, 1);
         } catch (IllegalAccessException iae) {
             iae.printStackTrace();
             return;
@@ -432,7 +432,7 @@
 
         try {
             String s = "Should be ignored";
-            m.invoke(s, new Object(), Object.class);
+            m.invoke(s, objects, 0, 1);
         } catch (IllegalAccessException iae) {
             iae.printStackTrace();
             return;
@@ -443,7 +443,8 @@
 
         try {
             System.out.println("checkType invoking null");
-            m.invoke(null, new Object(), int.class);
+            // Trigger an NPE at the target.
+            m.invoke(null, null, 0, 1);
             System.out.println("ERROR: should throw InvocationTargetException");
         } catch (InvocationTargetException ite) {
             System.out.println("checkType got expected exception");
@@ -456,7 +457,7 @@
     public static void checkClinitForFields() throws Exception {
       // Loading a class constant shouldn't run <clinit>.
       System.out.println("calling const-class FieldNoisyInitUser.class");
-      Class niuClass = FieldNoisyInitUser.class;
+      Class<?> niuClass = FieldNoisyInitUser.class;
       System.out.println("called const-class FieldNoisyInitUser.class");
 
       // Getting the declared fields doesn't run <clinit>.
@@ -478,14 +479,14 @@
     public static void checkClinitForMethods() throws Exception {
       // Loading a class constant shouldn't run <clinit>.
       System.out.println("calling const-class MethodNoisyInitUser.class");
-      Class niuClass = MethodNoisyInitUser.class;
+      Class<?> niuClass = MethodNoisyInitUser.class;
       System.out.println("called const-class MethodNoisyInitUser.class");
 
       // Getting the declared methods doesn't run <clinit>.
       Method[] methods = niuClass.getDeclaredMethods();
       System.out.println("got methods");
 
-      Method method = niuClass.getMethod("staticMethod", (Class[]) null);
+      Method method = niuClass.getMethod("staticMethod");
       System.out.println("got method");
       method.invoke(null);
       System.out.println("invoked method");
@@ -515,8 +516,7 @@
 
         Method method;
         try {
-            method = Main.class.getMethod("fancyMethod",
-                new Class[] { ArrayList.class });
+            method = Main.class.getMethod("fancyMethod", ArrayList.class);
         } catch (NoSuchMethodException nsme) {
             throw new RuntimeException(nsme);
         }
@@ -525,9 +525,9 @@
         System.out.println("generic method " + method.getName() + " params='"
             + stringifyTypeArray(parmTypes) + "' ret='" + ret + "'");
 
-        Constructor ctor;
+        Constructor<?> ctor;
         try {
-            ctor = Main.class.getConstructor(new Class[] { ArrayList.class });
+            ctor = Main.class.getConstructor( ArrayList.class);
         } catch (NoSuchMethodException nsme) {
             throw new RuntimeException(nsme);
         }
@@ -578,8 +578,8 @@
         }
         Method method1, method2;
         try {
-            method1 = Main.class.getMethod("fancyMethod", new Class[] { ArrayList.class });
-            method2 = Main.class.getMethod("fancyMethod", new Class[] { ArrayList.class });
+            method1 = Main.class.getMethod("fancyMethod", ArrayList.class);
+            method2 = Main.class.getMethod("fancyMethod", ArrayList.class);
         } catch (NoSuchMethodException nsme) {
             throw new RuntimeException(nsme);
         }
@@ -710,27 +710,27 @@
     private static void checkGetDeclaredConstructor() {
         try {
             Method.class.getDeclaredConstructor().setAccessible(true);
-            System.out.print("Didn't get an exception from Method.class.getDeclaredConstructor().setAccessible");
+            System.out.println("Didn't get an exception from Method.class.getDeclaredConstructor().setAccessible");
         } catch (SecurityException e) {
         } catch (NoSuchMethodException e) {
         } catch (Exception e) {
-            System.out.print(e);
+            System.out.println(e);
         }
         try {
             Field.class.getDeclaredConstructor().setAccessible(true);
-            System.out.print("Didn't get an exception from Field.class.getDeclaredConstructor().setAccessible");
+            System.out.println("Didn't get an exception from Field.class.getDeclaredConstructor().setAccessible");
         } catch (SecurityException e) {
         } catch (NoSuchMethodException e) {
         } catch (Exception e) {
-            System.out.print(e);
+            System.out.println(e);
         }
         try {
             Class.class.getDeclaredConstructor().setAccessible(true);
-            System.out.print("Didn't get an exception from Class.class.getDeclaredConstructor().setAccessible");
+            System.out.println("Didn't get an exception from Class.class.getDeclaredConstructor().setAccessible");
         } catch (SecurityException e) {
         } catch (NoSuchMethodException e) {
         } catch (Exception e) {
-            System.out.print(e);
+            System.out.println(e);
         }
     }
 
@@ -744,7 +744,7 @@
 
         checkGetDeclaredConstructor();
         checkAccess();
-        checkType();
+        checkSwap();
         checkClinitForFields();
         checkClinitForMethods();
         checkGeneric();
diff --git a/test/048-reflect-v8/build b/test/048-reflect-v8/build
new file mode 100644
index 0000000..3552b5c
--- /dev/null
+++ b/test/048-reflect-v8/build
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Make us exit on a failure.
+set -e
+
+# Hard-wired use of experimental jack.
+# TODO: fix this temporary work-around for lambdas, see b/19467889
+export USE_JACK=true
+
+./default-build "$@" --experimental default-methods
diff --git a/test/048-reflect-v8/expected.txt b/test/048-reflect-v8/expected.txt
new file mode 100644
index 0000000..54aede9
--- /dev/null
+++ b/test/048-reflect-v8/expected.txt
@@ -0,0 +1,104 @@
+==============================
+Are These Methods Default:
+==============================
+IsDefaultTest$DefaultInterface is default = yes
+IsDefaultTest$RegularInterface is default = no
+IsDefaultTest$ImplementsWithDefault is default = yes
+IsDefaultTest$ImplementsWithRegular is default = no
+==============================
+Are These Methods found by getDeclaredMethod:
+==============================
+No error thrown for class interface DefaultDeclared$DefaultInterface
+No error thrown for class interface DefaultDeclared$RegularInterface
+NoSuchMethodException thrown for class class DefaultDeclared$ImplementsWithDefault
+No error thrown for class class DefaultDeclared$ImplementsWithDeclared
+No error thrown for class class DefaultDeclared$ImplementsWithRegular
+NoSuchMethodException thrown for class class DefaultDeclared$UnimplementedWithRegular
+==============================
+Class annotations by type:
+==============================
+Annotations by type, defined by class SingleUser with annotation Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Annotations by type, defined by class SingleUser with annotation Calendars: <empty>
+Annotations by type, defined by class User with annotation Calendar: @Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)
+Annotations by type, defined by class User with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Annotations by type, defined by class User2 with annotation Calendar: @Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)
+Annotations by type, defined by class User2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)])
+Annotations by type, defined by class UserComplex with annotation Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)
+Annotations by type, defined by class UserComplex with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+Annotations by type, defined by class UserSub with annotation Calendar: @Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)
+Annotations by type, defined by class UserSub with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Annotations by type, defined by class UserSub2 with annotation Calendar: @Calendar(dayOfMonth=sub2, dayOfWeek=unspecified_week, hour=6)
+Annotations by type, defined by class UserSub2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+-----------------------------
+-----------------------------
+==============================
+Class declared annotation:
+==============================
+Declared annotations by class class SingleUser, annotation interface Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Declared annotations by class class SingleUser, annotation interface Calendars: <null>
+Declared annotations by class class User, annotation interface Calendar: <null>
+Declared annotations by class class User, annotation interface Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Declared annotations by class class UserComplex, annotation interface Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6)
+Declared annotations by class class UserComplex, annotation interface Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+Declared annotations by class class UserSub, annotation interface Calendar: <null>
+Declared annotations by class class UserSub, annotation interface Calendars: <null>
+Declared annotations by class class UserSub2, annotation interface Calendar: @Calendar(dayOfMonth=sub2, dayOfWeek=unspecified_week, hour=6)
+Declared annotations by class class UserSub2, annotation interface Calendars: <null>
+-----------------------------
+-----------------------------
+==============================
+Declared class annotations by type:
+==============================
+Declared annnotations by type, defined by class SingleUser with annotation Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Declared annnotations by type, defined by class SingleUser with annotation Calendars: <empty>
+Declared annnotations by type, defined by class User with annotation Calendar: @Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)
+Declared annnotations by type, defined by class User with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Declared annnotations by type, defined by class User2 with annotation Calendar: @Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)
+Declared annnotations by type, defined by class User2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)])
+Declared annnotations by type, defined by class UserComplex with annotation Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)
+Declared annnotations by type, defined by class UserComplex with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+Declared annnotations by type, defined by class UserSub with annotation Calendar: <empty>
+Declared annnotations by type, defined by class UserSub with annotation Calendars: <empty>
+Declared annnotations by type, defined by class UserSub2 with annotation Calendar: @Calendar(dayOfMonth=sub2, dayOfWeek=unspecified_week, hour=6)
+Declared annnotations by type, defined by class UserSub2 with annotation Calendars: <empty>
+-----------------------------
+-----------------------------
+==============================
+Method annotations by type:
+==============================
+Annotations by type, defined by method singleUser with annotation Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Annotations by type, defined by method singleUser with annotation Calendars: <empty>
+Annotations by type, defined by method user with annotation Calendar: @Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)
+Annotations by type, defined by method user with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Annotations by type, defined by method user2 with annotation Calendar: @Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)
+Annotations by type, defined by method user2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)])
+Annotations by type, defined by method userComplex with annotation Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)
+Annotations by type, defined by method userComplex with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+-----------------------------
+-----------------------------
+==============================
+Declared method annotations:
+==============================
+Annotations declared by method singleUser with annotation Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Annotations declared by method singleUser with annotation Calendars: <null>
+Annotations declared by method user with annotation Calendar: <null>
+Annotations declared by method user with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Annotations declared by method user2 with annotation Calendar: <null>
+Annotations declared by method user2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)])
+Annotations declared by method userComplex with annotation Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6)
+Annotations declared by method userComplex with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+-----------------------------
+-----------------------------
+==============================
+Declared method annotations by type:
+==============================
+Annotations by type, defined by method singleUser with annotation Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Annotations by type, defined by method singleUser with annotation Calendars: <empty>
+Annotations by type, defined by method user with annotation Calendar: @Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)
+Annotations by type, defined by method user with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Annotations by type, defined by method user2 with annotation Calendar: @Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)
+Annotations by type, defined by method user2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)])
+Annotations by type, defined by method userComplex with annotation Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)
+Annotations by type, defined by method userComplex with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+-----------------------------
+-----------------------------
diff --git a/test/048-reflect-v8/info.txt b/test/048-reflect-v8/info.txt
new file mode 100644
index 0000000..a336d30
--- /dev/null
+++ b/test/048-reflect-v8/info.txt
@@ -0,0 +1 @@
+Test reflection for 1.8 APIs
diff --git a/test/048-reflect-v8/src/AnnotationTest.java b/test/048-reflect-v8/src/AnnotationTest.java
new file mode 100644
index 0000000..75e6845
--- /dev/null
+++ b/test/048-reflect-v8/src/AnnotationTest.java
@@ -0,0 +1,291 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Method;
+
+public class AnnotationTest extends AnnotationTestHelpers {
+  public static void testAnnotationsByType() {
+    System.out.println("==============================");
+    System.out.println("Class annotations by type:");
+    System.out.println("==============================");
+
+    // Print associated annotations:
+    // * A is directly present or repeatably present on an element E;
+    // * No annotation of A is directly/repeatably present on an element
+    //   AND E is a class AND A's type is inheritable, AND A is associated with its superclass.
+    // (Looks through subtypes recursively only if there's 0 result at each level,
+    // and the annotation is @Inheritable).
+    printAnnotationsByType(Calendar.class, SingleUser.class);
+    printAnnotationsByType(Calendars.class, SingleUser.class);
+
+    printAnnotationsByType(Calendar.class, User.class);
+    printAnnotationsByType(Calendars.class, User.class);
+
+    printAnnotationsByType(Calendar.class, User2.class);  // Enforce ordering 'z,x,y'
+    printAnnotationsByType(Calendars.class, User2.class);
+
+    // NOTE:
+    //    Order of outer-most annotations Calendars[C,C],S vs C,Calendars[C,C] is unspecified.
+    //    In particular it's the order of #getDeclaredAnnotations which is completely unmentioned.
+    //    The only requirement for #getAnnotationsByType is to have same ordering as
+    //    #getDeclaredAnnotations.
+    //    (Calendars[] itself has to maintain value() order).
+    printAnnotationsByType(Calendar.class, UserComplex.class);  // Cs(C,C),C collapses into C,C,C.
+    printAnnotationsByType(Calendars.class, UserComplex.class);
+
+    printAnnotationsByType(Calendar.class, UserSub.class);
+    printAnnotationsByType(Calendars.class, UserSub.class);
+
+    printAnnotationsByType(Calendar.class, UserSub2.class);
+    // The directly present "Calendar" annotation masks all the repeatably present
+    // "Calendar" annotations coming from User.
+    printAnnotationsByType(Calendars.class, UserSub2.class);
+    // Edge case: UserSub2 doesn't directly have a Calendars annotation,
+    // so it doesn't mask the "User" Calendars annotation.
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+
+  }
+
+  public static void testDeclaredAnnotation() {
+    System.out.println("==============================");
+    System.out.println("Class declared annotation:");
+    System.out.println("==============================");
+
+    // Print directly present annotations:
+    //
+    // The element E has an annotation_item for it (accessible through an
+    // annotations_directory_item) corresponding to an annotation A,
+    // and A's type_idx must match that on the encoded_annotation (from the annotation_item).
+    // (Does not look through the subtypes recursively)
+    printDeclaredAnnotation(SingleUser.class, Calendar.class);
+    printDeclaredAnnotation(SingleUser.class, Calendars.class);
+
+    printDeclaredAnnotation(User.class, Calendar.class);
+    printDeclaredAnnotation(User.class, Calendars.class);
+
+    printDeclaredAnnotation(UserComplex.class, Calendar.class);
+    printDeclaredAnnotation(UserComplex.class, Calendars.class);
+
+    printDeclaredAnnotation(UserSub.class, Calendar.class);
+    printDeclaredAnnotation(UserSub.class, Calendars.class);
+
+    printDeclaredAnnotation(UserSub2.class, Calendar.class);
+    printDeclaredAnnotation(UserSub2.class, Calendars.class);
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+  }
+
+  public static void testDeclaredAnnotationsByType() {
+    System.out.println("==============================");
+    System.out.println("Declared class annotations by type:");
+    System.out.println("==============================");
+
+    // A is directly present or repeatably present on an element E;
+    // -- (does not do any recursion for classes regardless of @Inherited)
+    printDeclaredAnnotationsByType(Calendar.class, SingleUser.class);
+    printDeclaredAnnotationsByType(Calendars.class, SingleUser.class);
+
+    printDeclaredAnnotationsByType(Calendar.class, User.class);
+    printDeclaredAnnotationsByType(Calendars.class, User.class);
+
+    printDeclaredAnnotationsByType(Calendar.class, User2.class);  // Enforce ordering 'z,x,y'
+    printDeclaredAnnotationsByType(Calendars.class, User2.class);
+
+    printDeclaredAnnotationsByType(Calendar.class, UserComplex.class);
+    printDeclaredAnnotationsByType(Calendars.class, UserComplex.class);
+
+    printDeclaredAnnotationsByType(Calendar.class, UserSub.class);
+    printDeclaredAnnotationsByType(Calendars.class, UserSub.class);
+
+    printDeclaredAnnotationsByType(Calendar.class, UserSub2.class);
+    // The directly present "Calendar" annotation masks all the repeatably present "Calendar"
+    // annotations coming from User.
+    printDeclaredAnnotationsByType(Calendars.class, UserSub2.class);
+    // Edge case: UserSub2 doesn't directly have a Calendars annotation,
+    // so it doesn't mask the "User" Calendars annotation.
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+  }
+
+  // Print the annotation "annotationClass" that is associated with an element denoted by
+  // "annotationUseClass."
+  private static <A extends Annotation> void printAnnotationsByType(Class<A> annotationClass,
+      Class<?> annotationUseClass) {
+    A[] annotationsByType = annotationUseClass.getAnnotationsByType(annotationClass);
+
+    String msg = "Annotations by type, defined by class "
+        + annotationUseClass.getName() + " with annotation " + annotationClass.getName() + ": "
+        + asString(annotationsByType);
+
+
+    System.out.println(msg);
+  }
+
+  private static <A extends Annotation> void printDeclaredAnnotation(Class<?> annotationUseClass,
+      Class<A> annotationDefClass) {
+    A anno = annotationUseClass.getDeclaredAnnotation(annotationDefClass);
+
+    String msg = asString(anno);
+
+    System.out.println("Declared annotations by class " + annotationUseClass
+        + ", annotation " + annotationDefClass + ": " + msg);
+  }
+
+  // Print the annotation "annotationClass" that is directly/indirectly present with an element
+  // denoted by "annotationUseClass."
+  private static <A extends Annotation> void printDeclaredAnnotationsByType(
+      Class<A> annotationClass, Class<?> annotationUseClass) {
+    A[] annotationsByType = annotationUseClass.getDeclaredAnnotationsByType(annotationClass);
+
+    String msg = "Declared annnotations by type, defined by class " + annotationUseClass.getName()
+        + " with annotation " + annotationClass.getName() + ": "
+        + asString(annotationsByType);
+
+    System.out.println(msg);
+  }
+
+  public static void testMethodAnnotationsByType() {
+    System.out.println("==============================");
+    System.out.println("Method annotations by type:");
+    System.out.println("==============================");
+
+    // Print associated annotations:
+    // * A is directly present or repeatably present on an element E;
+    // * No annotation of A is directly/repeatably present on an element AND E is a class
+    //   AND A's type is inheritable, AND A is associated with its superclass.
+    // (Looks through subtypes recursively only if there's 0 result at each level,
+    // and the annotation is @Inheritable).
+    printMethodAnnotationsByType(Calendar.class, "singleUser", AnnotationTestFixture.class);
+    printMethodAnnotationsByType(Calendars.class, "singleUser", AnnotationTestFixture.class);
+
+    printMethodAnnotationsByType(Calendar.class, "user", AnnotationTestFixture.class);
+    printMethodAnnotationsByType(Calendars.class, "user", AnnotationTestFixture.class);
+
+    printMethodAnnotationsByType(Calendar.class, "user2", AnnotationTestFixture.class);
+    printMethodAnnotationsByType(Calendars.class, "user2", AnnotationTestFixture.class);
+
+    printMethodAnnotationsByType(Calendar.class, "userComplex", AnnotationTestFixture.class);
+    printMethodAnnotationsByType(Calendars.class, "userComplex", AnnotationTestFixture.class);
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+  }
+
+  // Print the annotation "annotationClass" that is associated with an element denoted by
+  // "annotationUseClass" method methodName.
+  private static <A extends Annotation> void printMethodAnnotationsByType(Class<A> annotationClass,
+      String methodName, Class<?> annotationUseClass) {
+    Method m = null;
+    try {
+      m = annotationUseClass.getDeclaredMethod(methodName);
+    } catch (Throwable t) {
+      throw new AssertionError(t);
+    }
+    A[] annotationsByType = m.getAnnotationsByType(annotationClass);
+
+    String msg = "Annotations by type, defined by method " + m.getName() + " with annotation " +
+      annotationClass.getName() + ": " +
+      asString(annotationsByType);
+
+    System.out.println(msg);
+  }
+
+  public static void testMethodDeclaredAnnotations() {
+    System.out.println("==============================");
+    System.out.println("Declared method annotations:");
+    System.out.println("==============================");
+
+    printMethodDeclaredAnnotation(Calendar.class, "singleUser", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotation(Calendars.class, "singleUser", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotation(Calendar.class, "user", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotation(Calendars.class, "user", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotation(Calendar.class, "user2", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotation(Calendars.class, "user2", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotation(Calendar.class, "userComplex", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotation(Calendars.class, "userComplex", AnnotationTestFixture.class);
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+  }
+
+  // Print the annotation "annotationClass" that is associated with an element denoted by
+  // methodName in annotationUseClass.
+  private static <A extends Annotation> void printMethodDeclaredAnnotation(Class<A> annotationClass,
+      String methodName, Class<?> annotationUseClass) {
+    Method m = null;
+    try {
+      m = annotationUseClass.getDeclaredMethod(methodName);
+    } catch (Throwable t) {
+      throw new AssertionError(t);
+    }
+    Annotation annotationsByType = m.getDeclaredAnnotation(annotationClass);
+
+    String msg = "Annotations declared by method " + m.getName() + " with annotation "
+        + annotationClass.getName() + ": "
+        + asString(annotationsByType);
+
+    System.out.println(msg);
+  }
+
+  public static void testMethodDeclaredAnnotationsByType() {
+    System.out.println("==============================");
+    System.out.println("Declared method annotations by type:");
+    System.out.println("==============================");
+
+    printMethodDeclaredAnnotationByType(Calendar.class, "singleUser", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotationByType(Calendars.class, "singleUser", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotationByType(Calendar.class, "user", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotationByType(Calendars.class, "user", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotationByType(Calendar.class, "user2", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotationByType(Calendars.class, "user2", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotationByType(Calendar.class, "userComplex", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotationByType(Calendars.class, "userComplex",
+        AnnotationTestFixture.class);
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+  }
+
+  // Print the annotation "annotationClass" that is associated with an element denoted by
+  // methodName in annotationUseClass.
+  private static <A extends Annotation> void printMethodDeclaredAnnotationByType(
+      Class<A> annotationClass, String methodName, Class<?> annotationUseClass) {
+    Method m = null;
+    try {
+      m = annotationUseClass.getDeclaredMethod(methodName);
+    } catch (Throwable t) {
+      throw new AssertionError(t);
+    }
+    A[] annotationsByType = m.getDeclaredAnnotationsByType(annotationClass);
+
+    String msg = "Annotations by type, defined by method " + m.getName() + " with annotation "
+        + annotationClass.getName() + ": "
+        + asString(annotationsByType);
+
+    System.out.println(msg);
+  }
+}
diff --git a/test/048-reflect-v8/src/AnnotationTestFixture.java b/test/048-reflect-v8/src/AnnotationTestFixture.java
new file mode 100644
index 0000000..248dfac
--- /dev/null
+++ b/test/048-reflect-v8/src/AnnotationTestFixture.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class AnnotationTestFixture {
+
+  @Calendar(dayOfWeek="single", hour=23)
+  public static void singleUser() {
+
+  }
+  @Calendars ({
+    @Calendar(dayOfMonth="last"),
+    @Calendar(dayOfWeek="Fri", hour=23)
+  })
+  public static void user() {
+
+  }
+
+  @Calendars ({
+    @Calendar(dayOfMonth="z"),
+    @Calendar(dayOfMonth="x"),
+    @Calendar(dayOfMonth="y")
+  })
+  public static void user2() {
+
+  }
+
+  @Calendar(dayOfMonth="afirst")
+  @Calendars ({
+    @Calendar(dayOfMonth="zsecond"),
+    @Calendar(dayOfMonth="athird", hour=23)
+  })
+  public static void userComplex() {
+
+  }
+}
diff --git a/test/048-reflect-v8/src/AnnotationTestHelpers.java b/test/048-reflect-v8/src/AnnotationTestHelpers.java
new file mode 100644
index 0000000..6b5bea2
--- /dev/null
+++ b/test/048-reflect-v8/src/AnnotationTestHelpers.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.Annotation;
+
+public class AnnotationTestHelpers {
+  // Provide custom print function that print a deterministic output.
+  // Note that Annotation#toString has unspecified order: it prints out the
+  // fields, which is why we can't rely on it.
+
+  public static String asString(Annotation anno) {
+    if (anno instanceof Calendar) {
+      return asString((Calendar)anno);
+    } else if (anno instanceof Calendars) {
+      return asString((Calendars)anno);
+    } else {
+      if (anno == null) {
+        return "<null>";
+      }
+      // Fall-back, usually would only go here in a test failure.
+      return anno.toString();
+    }
+  }
+
+  public static String asString(Annotation[] annos) {
+    String msg = "";
+
+    if (annos == null) {
+      msg += "<null>";
+    } else if (annos.length == 0) {
+      msg += "<empty>";
+    } else {
+      for (int i = 0; i < annos.length; ++i) {
+        msg += asString(annos[i]);
+
+        if (i != annos.length - 1) {
+          msg += ", ";
+        }
+      }
+    }
+
+    return msg;
+  }
+
+  public static String asString(Calendar calendar) {
+    if (calendar == null) {
+      return "<null>";
+    }
+
+    return "@Calendar(dayOfMonth=" + calendar.dayOfMonth() + ", dayOfWeek=" +
+      calendar.dayOfWeek() + ", hour=" + calendar.hour() + ")";
+  }
+
+  public static String asString(Calendars calendars) {
+    if (calendars == null) {
+      return "<null>";
+    }
+
+    String s = "@Calendars(value=[";
+
+    Calendar[] allValues = calendars.value();
+    for (int i = 0; i < allValues.length; ++i) {
+      s += asString(allValues[i]);
+      if (i != allValues.length - 1) {
+        s += ", ";
+      }
+    }
+
+    s += "])";
+
+    return s;
+  }
+}
diff --git a/test/048-reflect-v8/src/Calendar.java b/test/048-reflect-v8/src/Calendar.java
new file mode 100644
index 0000000..4a16573
--- /dev/null
+++ b/test/048-reflect-v8/src/Calendar.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.Inherited;
+import java.lang.annotation.Repeatable;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+
+// This is a plain old non-1.8 annotation. At runtime we can see that it has a
+// "Repeatable" annotation if we query with getDeclaredAnnotation(Repeatable.class)
+@Retention(RetentionPolicy.RUNTIME)
+@Repeatable(Calendars.class)
+@Inherited  // note: container must also be @Inherited by JLS.
+public @interface Calendar {
+    String dayOfMonth() default "unspecified_month";
+    String dayOfWeek() default "unspecified_week";
+    int hour() default 6;
+}
+
diff --git a/test/048-reflect-v8/src/Calendars.java b/test/048-reflect-v8/src/Calendars.java
new file mode 100644
index 0000000..caeda52
--- /dev/null
+++ b/test/048-reflect-v8/src/Calendars.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.Inherited;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+
+// Plain old annotation, there's nothing 1.8 specific about it.
+@Retention(RetentionPolicy.RUNTIME)
+@Inherited  // note: elements must also be @Inherited by JLS.
+public @interface Calendars {
+  Calendar[] value();
+}
diff --git a/test/048-reflect-v8/src/DefaultDeclared.java b/test/048-reflect-v8/src/DefaultDeclared.java
new file mode 100644
index 0000000..16e8a24
--- /dev/null
+++ b/test/048-reflect-v8/src/DefaultDeclared.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class DefaultDeclared {
+  interface DefaultInterface {
+    default void sayHi() {
+      System.out.println("hi default");
+    }
+  }
+
+  interface RegularInterface {
+    void sayHi();
+  }
+
+  class ImplementsWithDefault implements DefaultInterface {}
+
+  class ImplementsWithDeclared implements DefaultInterface {
+    public void sayHi() {
+      System.out.println("hello specific from default");
+    }
+  }
+
+  abstract class UnimplementedWithRegular implements RegularInterface { }
+
+  class ImplementsWithRegular implements RegularInterface {
+    public void sayHi() {
+      System.out.println("hello specific");
+    }
+  }
+
+  private static void printGetMethod(Class<?> klass) {
+    Method m;
+    try {
+      m = klass.getDeclaredMethod("sayHi");
+      System.out.println("No error thrown for class " + klass.toString());
+    } catch (NoSuchMethodException e) {
+      System.out.println("NoSuchMethodException thrown for class " + klass.toString());
+    } catch (Throwable t) {
+      System.out.println("Unknown error thrown for class " + klass.toString());
+      t.printStackTrace();
+    }
+  }
+
+  public static void test() {
+    System.out.println("==============================");
+    System.out.println("Are These Methods found by getDeclaredMethod:");
+    System.out.println("==============================");
+
+    printGetMethod(DefaultInterface.class);
+    printGetMethod(RegularInterface.class);
+    printGetMethod(ImplementsWithDefault.class);
+    printGetMethod(ImplementsWithDeclared.class);
+    printGetMethod(ImplementsWithRegular.class);
+    printGetMethod(UnimplementedWithRegular.class);
+  }
+}
diff --git a/test/048-reflect-v8/src/IFaceA.java b/test/048-reflect-v8/src/IFaceA.java
new file mode 100644
index 0000000..9b1f610
--- /dev/null
+++ b/test/048-reflect-v8/src/IFaceA.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Stored as a complex annotation Calendars(Calendar,Calendar)
+// in the binary.
+@Calendars ({
+  @Calendar(dayOfMonth="if_a_first"),
+  @Calendar(dayOfMonth="if_b_last")
+})
+public interface IFaceA {
+}
diff --git a/test/048-reflect-v8/src/IFaceSimple.java b/test/048-reflect-v8/src/IFaceSimple.java
new file mode 100644
index 0000000..93cf610
--- /dev/null
+++ b/test/048-reflect-v8/src/IFaceSimple.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Simple annotation, no container.
+@Calendar(dayOfMonth="if_simple_first")
+public interface IFaceSimple {
+
+}
diff --git a/test/048-reflect-v8/src/IsDefaultTest.java b/test/048-reflect-v8/src/IsDefaultTest.java
new file mode 100644
index 0000000..177dcf1
--- /dev/null
+++ b/test/048-reflect-v8/src/IsDefaultTest.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class IsDefaultTest {
+  interface DefaultInterface {
+    default void sayHi() {
+      System.out.println("hi default");
+    }
+  }
+
+  interface RegularInterface {
+    void sayHi();
+  }
+
+  class ImplementsWithDefault implements DefaultInterface {}
+  class ImplementsWithRegular implements RegularInterface {
+    public void sayHi() {
+      System.out.println("hello specific");
+    }
+  }
+
+  private static void printIsDefault(Class<?> klass) {
+    Method m;
+    try {
+      m = klass.getMethod("sayHi");
+    } catch (Throwable t) {
+      System.out.println(t);
+      return;
+    }
+
+    boolean isDefault = m.isDefault();
+    System.out.println(klass.getName() + " is default = " + (isDefault ? "yes" : "no"));
+  }
+
+  public static void test() {
+    System.out.println("==============================");
+    System.out.println("Are These Methods Default:");
+    System.out.println("==============================");
+
+    printIsDefault(DefaultInterface.class);
+    printIsDefault(RegularInterface.class);
+    printIsDefault(ImplementsWithDefault.class);
+    printIsDefault(ImplementsWithRegular.class);
+  }
+}
diff --git a/test/048-reflect-v8/src/Main.java b/test/048-reflect-v8/src/Main.java
new file mode 100644
index 0000000..b270e68
--- /dev/null
+++ b/test/048-reflect-v8/src/Main.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    IsDefaultTest.test();
+    DefaultDeclared.test();
+    AnnotationTest.testAnnotationsByType();
+    AnnotationTest.testDeclaredAnnotation();
+    AnnotationTest.testDeclaredAnnotationsByType();
+    AnnotationTest.testMethodAnnotationsByType();
+    AnnotationTest.testMethodDeclaredAnnotations();
+    AnnotationTest.testMethodDeclaredAnnotationsByType();
+  }
+}
diff --git a/test/048-reflect-v8/src/SingleUser.java b/test/048-reflect-v8/src/SingleUser.java
new file mode 100644
index 0000000..0f9c430
--- /dev/null
+++ b/test/048-reflect-v8/src/SingleUser.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Stored as a single "Calendar" annotation in the binary.
+@Calendar(dayOfWeek="single", hour=23)
+public class SingleUser {
+
+}
diff --git a/test/048-reflect-v8/src/User.java b/test/048-reflect-v8/src/User.java
new file mode 100644
index 0000000..003ceeb
--- /dev/null
+++ b/test/048-reflect-v8/src/User.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Stored as a complex annotation Calendars(Calendar,Calendar)
+// in the binary.
+//
+/* FIXME: Use this code instead, when Jack supports repeatable annotations properly.
+ *
+ * @Calendar(dayOfMonth="last")
+ * @Calendar(dayOfWeek="Fri", hour=23)
+ */
+@Calendars ({
+  @Calendar(dayOfMonth="last"),
+  @Calendar(dayOfWeek="Fri", hour=23)
+})
+public class User {
+
+}
diff --git a/test/048-reflect-v8/src/User2.java b/test/048-reflect-v8/src/User2.java
new file mode 100644
index 0000000..1a6049f
--- /dev/null
+++ b/test/048-reflect-v8/src/User2.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Stored as a complex annotation Calendars(Calendar,Calendar,Calendar)
+// in the binary.
+// (Check for order, should be z,x,y)
+@Calendars ({
+  @Calendar(dayOfMonth="z"),
+  @Calendar(dayOfMonth="x"),
+  @Calendar(dayOfMonth="y")
+})
+public class User2 {
+
+}
diff --git a/test/048-reflect-v8/src/UserComplex.java b/test/048-reflect-v8/src/UserComplex.java
new file mode 100644
index 0000000..e262349
--- /dev/null
+++ b/test/048-reflect-v8/src/UserComplex.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Stored as a complex annotation Calendars(Calendar,Calendar)
+// followed by a Calendar in the binary.
+// In other words { Calendars([C,C]), C }
+//
+// Note that trying to do {C,Calendars,C} or similar
+// is illegal by the JLS.
+@Calendar(dayOfMonth="afirst")
+@Calendars ({
+  @Calendar(dayOfMonth="zsecond"),
+  @Calendar(dayOfMonth="athird", hour=23)
+})
+// @Calendar(dayOfMonth="zlast")  // Leave for future ordering test
+public class UserComplex {
+
+}
diff --git a/test/048-reflect-v8/src/UserSub.java b/test/048-reflect-v8/src/UserSub.java
new file mode 100644
index 0000000..d60aa6a
--- /dev/null
+++ b/test/048-reflect-v8/src/UserSub.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class UserSub
+  extends User
+  implements IFaceA, IFaceSimple {
+
+}
diff --git a/test/048-reflect-v8/src/UserSub2.java b/test/048-reflect-v8/src/UserSub2.java
new file mode 100644
index 0000000..13e2eb0
--- /dev/null
+++ b/test/048-reflect-v8/src/UserSub2.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This calendar subsumes anything else we would've normally gotten from the subclass.
+@Calendar(dayOfMonth="sub2")
+public class UserSub2
+  extends User
+  implements IFaceA, IFaceSimple {
+
+}
diff --git a/test/051-thread/thread_test.cc b/test/051-thread/thread_test.cc
index 4215207..079ad40 100644
--- a/test/051-thread/thread_test.cc
+++ b/test/051-thread/thread_test.cc
@@ -28,7 +28,7 @@
 extern "C" JNIEXPORT jboolean JNICALL Java_Main_supportsThreadPriorities(
     JNIEnv* env ATTRIBUTE_UNUSED,
     jclass clazz ATTRIBUTE_UNUSED) {
-#if defined(__ANDROID__)
+#if defined(ART_TARGET_ANDROID)
   return JNI_TRUE;
 #else
   return JNI_FALSE;
diff --git a/test/055-enum-performance/src/Main.java b/test/055-enum-performance/src/Main.java
index d5903af..d6bb211 100644
--- a/test/055-enum-performance/src/Main.java
+++ b/test/055-enum-performance/src/Main.java
@@ -20,7 +20,7 @@
             throw new AssertionError();
         } catch (InvocationTargetException expected) {
             IllegalArgumentException iae = (IllegalArgumentException) expected.getCause();
-            if (!iae.getMessage().equals("class java.lang.String is not an enum type")) {
+            if (!iae.getMessage().equals("class java.lang.String is not an enum type.")) {
                 throw new AssertionError();
             }
         }
diff --git a/test/061-out-of-memory/expected.txt b/test/061-out-of-memory/expected.txt
index ca87629..c31980c 100644
--- a/test/061-out-of-memory/expected.txt
+++ b/test/061-out-of-memory/expected.txt
@@ -4,4 +4,5 @@
 testOomeLarge succeeded
 testOomeSmall beginning
 testOomeSmall succeeded
+Got expected toCharArray OOM
 tests succeeded
diff --git a/test/061-out-of-memory/src/Main.java b/test/061-out-of-memory/src/Main.java
index c812c81..bda978e 100644
--- a/test/061-out-of-memory/src/Main.java
+++ b/test/061-out-of-memory/src/Main.java
@@ -26,6 +26,7 @@
         testHugeArray();
         testOomeLarge();
         testOomeSmall();
+        testOomeToCharArray();
         System.out.println("tests succeeded");
     }
 
@@ -106,4 +107,21 @@
         }
         System.out.println("testOomeSmall succeeded");
     }
+
+    private static void testOomeToCharArray() {
+        Object[] o = new Object[2000000];
+        String test = "test";
+        int i = 0;
+        try {
+            for (; i < o.length; ++i) o[i] = new char[1000000];
+        } catch (OutOfMemoryError oom) {}
+        try {
+            for (; i < o.length; ++i) {
+                o[i] = test.toCharArray();
+            }
+        } catch (OutOfMemoryError oom) {
+            o = null;
+            System.out.println("Got expected toCharArray OOM");
+        }
+    }
 }
diff --git a/test/063-process-manager/expected.txt b/test/063-process-manager/expected.txt
index 8360239..8c01bf0 100644
--- a/test/063-process-manager/expected.txt
+++ b/test/063-process-manager/expected.txt
@@ -4,12 +4,12 @@
 spawning child
 process manager: RUNNABLE
 child died
-process manager: WAITING
+process manager: TIMED_WAITING
 
 spawning child #2
 spawning child
 process manager: RUNNABLE
 child died
-process manager: WAITING
+process manager: TIMED_WAITING
 
 done!
diff --git a/test/063-process-manager/src/Main.java b/test/063-process-manager/src/Main.java
index 68bf878..e9e522c 100644
--- a/test/063-process-manager/src/Main.java
+++ b/test/063-process-manager/src/Main.java
@@ -30,7 +30,7 @@
                  traces.entrySet()) {
             Thread t = entry.getKey();
             String name = t.getName();
-            if (name.equals("java.lang.ProcessManager")) {
+            if (name.indexOf("process reaper") >= 0) {
                 System.out.println("process manager: " + t.getState());
                 found = true;
             }
diff --git a/test/064-field-access/expected.txt b/test/064-field-access/expected.txt
index 0af56ba..69a586c 100644
--- a/test/064-field-access/expected.txt
+++ b/test/064-field-access/expected.txt
@@ -1,2 +1,3 @@
 good
 Got expected failure
+Got expected failure
diff --git a/test/064-field-access/smali/SubClassUsingInaccessibleField.smali b/test/064-field-access/smali/SubClassUsingInaccessibleField.smali
new file mode 100644
index 0000000..224b431
--- /dev/null
+++ b/test/064-field-access/smali/SubClassUsingInaccessibleField.smali
@@ -0,0 +1,32 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LSubClassUsingInaccessibleField;
+
+.super Lother/PublicClass;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Lother/PublicClass;-><init>()V
+    return-void
+.end method
+
+# Regression test for compiler DCHECK() failure (bogus check) when referencing
+# a package-private field from an indirectly inherited package-private class,
+# using this very class as the declaring class in the FieldId, bug: 27684368 .
+.method public test()I
+    .registers 2
+    iget v0, p0, LSubClassUsingInaccessibleField;->otherProtectedClassPackageIntInstanceField:I
+    return v0
+.end method
diff --git a/test/064-field-access/src/Main.java b/test/064-field-access/src/Main.java
index 8dd22ba..50ad5b9 100644
--- a/test/064-field-access/src/Main.java
+++ b/test/064-field-access/src/Main.java
@@ -16,6 +16,7 @@
 
 import other.PublicClass;
 import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 
 /*
@@ -35,6 +36,20 @@
       // reference
       System.out.println("Got expected failure");
     }
+
+    try {
+      Class<?> c = Class.forName("SubClassUsingInaccessibleField");
+      Object o = c.newInstance();
+      c.getMethod("test").invoke(o, null);
+    } catch (InvocationTargetException ite) {
+      if (ite.getCause() instanceof IllegalAccessError) {
+        System.out.println("Got expected failure");
+      } else {
+        System.out.println("Got unexpected failure " + ite.getCause());
+      }
+    } catch (Exception e) {
+      System.out.println("Got unexpected failure " + e);
+    }
   }
 
   /*
@@ -49,7 +64,7 @@
    * On success, the boxed value retrieved is returned.
    */
   public Object getValue(Field field, Object obj, char type,
-      Class expectedException) {
+      Class<?> expectedException) {
     Object result = null;
     try {
       switch (type) {
@@ -623,7 +638,7 @@
    * reflection call is significant]
    */
   public Object getValue(Field field, Object obj, char type,
-      Class expectedException) {
+      Class<?> expectedException) {
     Object result = null;
     try {
       switch (type) {
@@ -683,7 +698,7 @@
     return result;
   }
 
-  public Object invoke(Method method, Object obj, Class expectedException) {
+  public Object invoke(Method method, Object obj, Class<?> expectedException) {
     Object result = null;
     try {
       result = method.invoke(obj);
diff --git a/test/068-classloader/expected.txt b/test/068-classloader/expected.txt
index 8725799..ae937e0 100644
--- a/test/068-classloader/expected.txt
+++ b/test/068-classloader/expected.txt
@@ -13,3 +13,4 @@
 Got LinkageError on IDI (early)
 class Main
 Got expected ClassNotFoundException
+Loaded class into null class loader
diff --git a/test/068-classloader/src/FancyLoader.java b/test/068-classloader/src/FancyLoader.java
index 6a153cc..e616bfc 100644
--- a/test/068-classloader/src/FancyLoader.java
+++ b/test/068-classloader/src/FancyLoader.java
@@ -41,7 +41,7 @@
     static final String DEX_FILE = System.getenv("DEX_LOCATION") + "/068-classloader-ex.jar";
 
     /* on Dalvik, this is a DexFile; otherwise, it's null */
-    private Class mDexClass;
+    private Class<?> mDexClass;
 
     private Object mDexFile;
 
@@ -82,12 +82,12 @@
 
         if (mDexFile == null) {
             synchronized (FancyLoader.class) {
-                Constructor ctor;
+                Constructor<?> ctor;
                 /*
                  * Construct a DexFile object through reflection.
                  */
                 try {
-                    ctor = mDexClass.getConstructor(new Class[] {String.class});
+                    ctor = mDexClass.getConstructor(String.class);
                 } catch (NoSuchMethodException nsme) {
                     throw new ClassNotFoundException("getConstructor failed",
                         nsme);
@@ -111,8 +111,7 @@
         Method meth;
 
         try {
-            meth = mDexClass.getMethod("loadClass",
-                    new Class[] { String.class, ClassLoader.class });
+            meth = mDexClass.getMethod("loadClass", String.class, ClassLoader.class);
         } catch (NoSuchMethodException nsme) {
             throw new ClassNotFoundException("getMethod failed", nsme);
         }
@@ -184,7 +183,7 @@
     protected Class<?> loadClass(String name, boolean resolve)
         throws ClassNotFoundException
     {
-        Class res;
+        Class<?> res;
 
         /*
          * 1. Invoke findLoadedClass(String) to check if the class has
diff --git a/test/068-classloader/src/Main.java b/test/068-classloader/src/Main.java
index 361e293..01539b7 100644
--- a/test/068-classloader/src/Main.java
+++ b/test/068-classloader/src/Main.java
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+
 /**
  * Class loader test.
  */
@@ -62,6 +65,27 @@
         testSeparation();
 
         testClassForName();
+
+        testNullClassLoader();
+    }
+
+    static void testNullClassLoader() {
+        try {
+            /* this is the "alternate" DEX/Jar file */
+            String DEX_FILE = System.getenv("DEX_LOCATION") + "/068-classloader-ex.jar";
+            /* on Dalvik, this is a DexFile; otherwise, it's null */
+            Class<?> mDexClass = Class.forName("dalvik.system.DexFile");
+            Constructor<?> ctor = mDexClass.getConstructor(String.class);
+            Object mDexFile = ctor.newInstance(DEX_FILE);
+            Method meth = mDexClass.getMethod("loadClass", String.class, ClassLoader.class);
+            Object klass = meth.invoke(mDexFile, "Mutator", null);
+            if (klass == null) {
+                throw new AssertionError("loadClass with nullclass loader failed");
+            }
+        } catch (Exception e) {
+            System.out.println(e);
+        }
+        System.out.println("Loaded class into null class loader");
     }
 
     static void testSeparation() {
@@ -69,15 +93,15 @@
         FancyLoader loader2 = new FancyLoader(ClassLoader.getSystemClassLoader());
 
         try {
-            Class target1 = loader1.loadClass("MutationTarget");
-            Class target2 = loader2.loadClass("MutationTarget");
+            Class<?> target1 = loader1.loadClass("MutationTarget");
+            Class<?> target2 = loader2.loadClass("MutationTarget");
 
             if (target1 == target2) {
                 throw new RuntimeException("target1 should not be equal to target2");
             }
 
-            Class mutator1 = loader1.loadClass("Mutator");
-            Class mutator2 = loader2.loadClass("Mutator");
+            Class<?> mutator1 = loader1.loadClass("Mutator");
+            Class<?> mutator2 = loader2.loadClass("Mutator");
 
             if (mutator1 == mutator2) {
                 throw new RuntimeException("mutator1 should not be equal to mutator2");
@@ -109,12 +133,12 @@
         }
     }
 
-    private static void runMutator(Class c, int v) throws Exception {
+    private static void runMutator(Class<?> c, int v) throws Exception {
         java.lang.reflect.Method m = c.getDeclaredMethod("mutate", int.class);
         m.invoke(null, v);
     }
 
-    private static int getMutationTargetValue(Class c) throws Exception {
+    private static int getMutationTargetValue(Class<?> c) throws Exception {
         java.lang.reflect.Field f = c.getDeclaredField("value");
         return f.getInt(null);
     }
@@ -124,7 +148,7 @@
      * able to load it but not instantiate it.
      */
     static void testAccess1(ClassLoader loader) {
-        Class altClass;
+        Class<?> altClass;
 
         try {
             altClass = loader.loadClass("Inaccessible1");
@@ -154,7 +178,7 @@
      * (though the base *is* accessible to us).
      */
     static void testAccess2(ClassLoader loader) {
-        Class altClass;
+        Class<?> altClass;
 
         try {
             altClass = loader.loadClass("Inaccessible2");
@@ -174,7 +198,7 @@
      * See if we can load a class with an inaccessible interface.
      */
     static void testAccess3(ClassLoader loader) {
-        Class altClass;
+        Class<?> altClass;
 
         try {
             altClass = loader.loadClass("Inaccessible3");
@@ -194,7 +218,7 @@
      * Test a doubled class that extends the base class.
      */
     static void testExtend(ClassLoader loader) {
-        Class doubledExtendClass;
+        Class<?> doubledExtendClass;
         Object obj;
 
         /* get the "alternate" version of DoubledExtend */
@@ -243,7 +267,7 @@
      * it doesn't override the base class method.
      */
     static void testExtendOkay(ClassLoader loader) {
-        Class doubledExtendOkayClass;
+        Class<?> doubledExtendOkayClass;
         Object obj;
 
         /* get the "alternate" version of DoubledExtendOkay */
@@ -291,7 +315,7 @@
      * an interface declared in a different class.
      */
     static void testInterface(ClassLoader loader) {
-        Class getDoubledClass;
+        Class<?> getDoubledClass;
         Object obj;
 
         /* get GetDoubled from the "alternate" class loader */
@@ -337,7 +361,7 @@
      * Throw an abstract class into the middle and see what happens.
      */
     static void testAbstract(ClassLoader loader) {
-        Class abstractGetClass;
+        Class<?> abstractGetClass;
         Object obj;
 
         /* get AbstractGet from the "alternate" loader */
@@ -382,7 +406,7 @@
      * Test a doubled class that implements a common interface.
      */
     static void testImplement(ClassLoader loader) {
-        Class doubledImplementClass;
+        Class<?> doubledImplementClass;
         Object obj;
 
         useImplement(new DoubledImplement(), true);
@@ -440,7 +464,7 @@
      * that refers to a doubled class.
      */
     static void testIfaceImplement(ClassLoader loader) {
-        Class ifaceImplClass;
+        Class<?> ifaceImplClass;
         Object obj;
 
         /*
diff --git a/test/071-dexfile/src/Main.java b/test/071-dexfile/src/Main.java
index 2f85790..c3a9671 100644
--- a/test/071-dexfile/src/Main.java
+++ b/test/071-dexfile/src/Main.java
@@ -66,7 +66,7 @@
      */
     private static void testDexClassLoader() throws Exception {
         ClassLoader dexClassLoader = getDexClassLoader();
-        Class Another = dexClassLoader.loadClass("Another");
+        Class<?> Another = dexClassLoader.loadClass("Another");
         Object another = Another.newInstance();
         // not expected to work; just exercises the call
         dexClassLoader.getResource("nonexistent");
@@ -79,18 +79,21 @@
      */
     private static ClassLoader getDexClassLoader() throws Exception {
         ClassLoader classLoader = Main.class.getClassLoader();
-        Class DexClassLoader = classLoader.loadClass("dalvik.system.DexClassLoader");
-        Constructor DexClassLoader_init = DexClassLoader.getConstructor(String.class,
-                                                                        String.class,
-                                                                        String.class,
-                                                                        ClassLoader.class);
+        Class<?> DexClassLoader = classLoader.loadClass("dalvik.system.DexClassLoader");
+        Constructor<?> DexClassLoader_init = DexClassLoader.getConstructor(String.class,
+                                                                           String.class,
+                                                                           String.class,
+                                                                           ClassLoader.class);
         // create an instance, using the path we found
-        return (ClassLoader) DexClassLoader_init.newInstance(CLASS_PATH, getOdexDir(), LIB_DIR, classLoader);
+        return (ClassLoader) DexClassLoader_init.newInstance(CLASS_PATH,
+                                                             getOdexDir(),
+                                                             LIB_DIR,
+                                                             classLoader);
     }
 
     private static void testDexFile() throws Exception {
         ClassLoader classLoader = Main.class.getClassLoader();
-        Class DexFile = classLoader.loadClass("dalvik.system.DexFile");
+        Class<?> DexFile = classLoader.loadClass("dalvik.system.DexFile");
         Method DexFile_loadDex = DexFile.getMethod("loadDex",
                                                    String.class,
                                                    String.class,
diff --git a/test/074-gc-thrash/src/Main.java b/test/074-gc-thrash/src/Main.java
index f947d0b..df04793 100644
--- a/test/074-gc-thrash/src/Main.java
+++ b/test/074-gc-thrash/src/Main.java
@@ -69,7 +69,7 @@
      */
     private static Method getDumpHprofDataMethod() {
         ClassLoader myLoader = Main.class.getClassLoader();
-        Class vmdClass;
+        Class<?> vmdClass;
         try {
             vmdClass = myLoader.loadClass("dalvik.system.VMDebug");
         } catch (ClassNotFoundException cnfe) {
@@ -78,8 +78,7 @@
 
         Method meth;
         try {
-            meth = vmdClass.getMethod("dumpHprofData",
-                    new Class[] { String.class });
+            meth = vmdClass.getMethod("dumpHprofData", String.class);
         } catch (NoSuchMethodException nsme) {
             System.err.println("Found VMDebug but not dumpHprofData method");
             return null;
diff --git a/test/080-oom-throw/src/Main.java b/test/080-oom-throw/src/Main.java
index f007b25..0ae92a9 100644
--- a/test/080-oom-throw/src/Main.java
+++ b/test/080-oom-throw/src/Main.java
@@ -105,7 +105,7 @@
     static boolean triggerReflectionOOM() {
         try {
             Class<?> c = Main.class;
-            Method m = c.getMethod("blowup", (Class[]) null);
+            Method m = c.getMethod("blowup");
             holder = new Object[1000000];
             m.invoke(null);
             holder = null;
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 5913c40..06f193a 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -40,6 +40,10 @@
     test_Math_rint();
     test_Math_round_D();
     test_Math_round_F();
+    test_Math_isNaN_D();
+    test_Math_isNaN_F();
+    test_Math_isInfinite_D();
+    test_Math_isInfinite_F();
     test_Short_reverseBytes();
     test_Integer_reverseBytes();
     test_Long_reverseBytes();
@@ -803,10 +807,22 @@
     Assert.assertEquals(Math.round(-2.5d), -2l);
     Assert.assertEquals(Math.round(-2.9d), -3l);
     Assert.assertEquals(Math.round(-3.0d), -3l);
-    Assert.assertEquals(Math.round(0.49999999999999994d), 1l);
+    Assert.assertEquals(Math.round(0.49999999999999994d), 0l);
+    Assert.assertEquals(Math.round(4503599627370495.0d), 4503599627370495l);  // 2^52 - 1
+    Assert.assertEquals(Math.round(4503599627370495.5d), 4503599627370496l);  // 2^52 - 0.5
+    Assert.assertEquals(Math.round(4503599627370496.0d), 4503599627370496l);  // 2^52
+    Assert.assertEquals(Math.round(-4503599627370495.0d), -4503599627370495l);  // -(2^52 - 1)
+    Assert.assertEquals(Math.round(-4503599627370495.5d), -4503599627370495l);  // -(2^52 - 0.5)
+    Assert.assertEquals(Math.round(-4503599627370496.0d), -4503599627370496l);  // -2^52
+    Assert.assertEquals(Math.round(9007199254740991.0d), 9007199254740991l);  // 2^53 - 1
+    Assert.assertEquals(Math.round(-9007199254740991.0d), -9007199254740991l);  // -(2^53 - 1)
     Assert.assertEquals(Math.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(Math.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(Math.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
+    Assert.assertEquals(Math.round(Double.longBitsToDouble(0x43F0000000000000l)),
+                        Long.MAX_VALUE); // 2^64
+    Assert.assertEquals(Math.round(Double.longBitsToDouble(0xC3F0000000000000l)),
+                        Long.MIN_VALUE); // -2^64
     Assert.assertEquals(Math.round(Double.POSITIVE_INFINITY), Long.MAX_VALUE);
     Assert.assertEquals(Math.round(Double.NEGATIVE_INFINITY), Long.MIN_VALUE);
   }
@@ -825,13 +841,129 @@
     Assert.assertEquals(Math.round(-2.5f), -2);
     Assert.assertEquals(Math.round(-2.9f), -3);
     Assert.assertEquals(Math.round(-3.0f), -3);
+    // 0.4999999701976776123046875
+    Assert.assertEquals(Math.round(Float.intBitsToFloat(0x3EFFFFFF)), (int)+0.0f);
+    Assert.assertEquals(Math.round(8388607.0f), 8388607);  // 2^23 - 1
+    Assert.assertEquals(Math.round(8388607.5f), 8388608);  // 2^23 - 0.5
+    Assert.assertEquals(Math.round(8388608.0f), 8388608);  // 2^23
+    Assert.assertEquals(Math.round(-8388607.0f), -8388607);  // -(2^23 - 1)
+    Assert.assertEquals(Math.round(-8388607.5f), -8388607);  // -(2^23 - 0.5)
+    Assert.assertEquals(Math.round(-8388608.0f), -8388608);  // -2^23
+    Assert.assertEquals(Math.round(16777215.0f), 16777215);  // 2^24 - 1
+    Assert.assertEquals(Math.round(16777216.0f), 16777216);  // 2^24
+    Assert.assertEquals(Math.round(-16777215.0f), -16777215);  // -(2^24 - 1)
+    Assert.assertEquals(Math.round(-16777216.0f), -16777216);  // -2^24
     Assert.assertEquals(Math.round(Float.NaN), (int)+0.0f);
     Assert.assertEquals(Math.round(Integer.MAX_VALUE + 1.0f), Integer.MAX_VALUE);
     Assert.assertEquals(Math.round(Integer.MIN_VALUE - 1.0f), Integer.MIN_VALUE);
+    Assert.assertEquals(Math.round(Float.intBitsToFloat(0x4F800000)),
+                        Integer.MAX_VALUE); // 2^32
+    Assert.assertEquals(Math.round(Float.intBitsToFloat(0xCF800000)),
+                        Integer.MIN_VALUE); // -2^32
     Assert.assertEquals(Math.round(Float.POSITIVE_INFINITY), Integer.MAX_VALUE);
     Assert.assertEquals(Math.round(Float.NEGATIVE_INFINITY), Integer.MIN_VALUE);
   }
 
+  public static void test_Math_isNaN_D() {
+    // Quiet NaN.
+    Assert.assertTrue(Double.isNaN(Double.longBitsToDouble(0x7FF4000000000000l)));
+    Assert.assertTrue(Double.isNaN(Double.longBitsToDouble(0xFFF4000000000000l)));
+    // Signaling NaN.
+    Assert.assertTrue(Double.isNaN(Double.longBitsToDouble(0x7FF8000000000000l)));
+    Assert.assertTrue(Double.isNaN(Double.longBitsToDouble(0xFFF8000000000000l)));
+    // Distinct from +/- infinity.
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x7FF0000000000000l)));
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0xFFF0000000000000l)));
+    // Distinct from normal numbers.
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x7FE0000000000000l)));
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0xFFE0000000000000l)));
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x0010000000000000l)));
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x8010000000000000l)));
+    // Distinct from +/- zero.
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x0000000000000000l)));
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x8000000000000000l)));
+    // Distinct from subnormal numbers.
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x0008000000000000l)));
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x8008000000000000l)));
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x0000000000000001l)));
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x8000000000000001l)));
+  }
+
+  public static void test_Math_isNaN_F() {
+    // Quiet NaN.
+    Assert.assertTrue(Float.isNaN(Float.intBitsToFloat(0x7FA00000)));
+    Assert.assertTrue(Float.isNaN(Float.intBitsToFloat(0xFFA00000)));
+    // Signaling NaN.
+    Assert.assertTrue(Float.isNaN(Float.intBitsToFloat(0x7FC00000)));
+    Assert.assertTrue(Float.isNaN(Float.intBitsToFloat(0xFFC00000)));
+    // Distinct from +/- infinity.
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x7F800000)));
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0xFF800000)));
+    // Distinct from normal numbers.
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x7F000000)));
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0xFF000000)));
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x00800000)));
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x80800000)));
+    // Distinct from +/- zero.
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x00000000)));
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x80000000)));
+    // Distinct from subnormal numbers.
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x00400000)));
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x80400000)));
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x00000001)));
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x80000001)));
+  }
+
+  public static void test_Math_isInfinite_D() {
+    // Distinct from Quiet NaN.
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x7FF4000000000000l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0xFFF4000000000000l)));
+    // Distinct from Signaling NaN.
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x7FF8000000000000l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0xFFF8000000000000l)));
+    // +/- infinity.
+    Assert.assertTrue(Double.isInfinite(Double.longBitsToDouble(0x7FF0000000000000l)));
+    Assert.assertTrue(Double.isInfinite(Double.longBitsToDouble(0xFFF0000000000000l)));
+    // Distinct from normal numbers.
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x7FE0000000000000l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0xFFE0000000000000l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x0010000000000000l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x8010000000000000l)));
+    // Distinct from +/- zero.
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x0000000000000000l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x8000000000000000l)));
+    // Distinct from subnormal numbers.
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x0008000000000000l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x8008000000000000l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x0000000000000001l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x8000000000000001l)));
+  }
+
+  public static void test_Math_isInfinite_F() {
+    // Distinct from Quiet NaN.
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x7FA00000)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0xFFA00000)));
+    // Distinct from Signaling NaN.
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x7FC00000)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0xFFC00000)));
+    // +/- infinity.
+    Assert.assertTrue(Float.isInfinite(Float.intBitsToFloat(0x7F800000)));
+    Assert.assertTrue(Float.isInfinite(Float.intBitsToFloat(0xFF800000)));
+    // Distinct from normal numbers.
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x7F000000)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0xFF000000)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x00800000)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x80800000)));
+    // Distinct from +/- zero.
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x00000000)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x80000000)));
+    // Distinct from subnormal numbers.
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x00400000)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x80400000)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x00000001)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x80000001)));
+  }
+
   public static void test_StrictMath_abs_I() {
     StrictMath.abs(-1);
     Assert.assertEquals(StrictMath.abs(0), 0);
@@ -1034,10 +1166,22 @@
     Assert.assertEquals(StrictMath.round(-2.5d), -2l);
     Assert.assertEquals(StrictMath.round(-2.9d), -3l);
     Assert.assertEquals(StrictMath.round(-3.0d), -3l);
-    Assert.assertEquals(StrictMath.round(0.49999999999999994d), 1l);
+    Assert.assertEquals(StrictMath.round(0.49999999999999994d), 0l);
+    Assert.assertEquals(StrictMath.round(4503599627370495.0d), 4503599627370495l);  // 2^52 - 1
+    Assert.assertEquals(StrictMath.round(4503599627370495.5d), 4503599627370496l);  // 2^52 - 0.5
+    Assert.assertEquals(StrictMath.round(4503599627370496.0d), 4503599627370496l);  // 2^52
+    Assert.assertEquals(StrictMath.round(-4503599627370495.0d), -4503599627370495l);  // -(2^52 - 1)
+    Assert.assertEquals(StrictMath.round(-4503599627370495.5d), -4503599627370495l);  // -(2^52 - 0.5)
+    Assert.assertEquals(StrictMath.round(-4503599627370496.0d), -4503599627370496l);  // -2^52
+    Assert.assertEquals(StrictMath.round(9007199254740991.0d), 9007199254740991l);  // 2^53 - 1
+    Assert.assertEquals(StrictMath.round(-9007199254740991.0d), -9007199254740991l);  // -(2^53 - 1)
     Assert.assertEquals(StrictMath.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(StrictMath.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
+    Assert.assertEquals(StrictMath.round(Double.longBitsToDouble(0x43F0000000000000l)),
+                        Long.MAX_VALUE); // 2^64
+    Assert.assertEquals(StrictMath.round(Double.longBitsToDouble(0xC3F0000000000000l)),
+                        Long.MIN_VALUE); // -2^64
     Assert.assertEquals(StrictMath.round(Double.POSITIVE_INFINITY), Long.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Double.NEGATIVE_INFINITY), Long.MIN_VALUE);
   }
@@ -1056,9 +1200,25 @@
     Assert.assertEquals(StrictMath.round(-2.5f), -2);
     Assert.assertEquals(StrictMath.round(-2.9f), -3);
     Assert.assertEquals(StrictMath.round(-3.0f), -3);
+    // 0.4999999701976776123046875
+    Assert.assertEquals(StrictMath.round(Float.intBitsToFloat(0x3EFFFFFF)), (int)+0.0f);
+    Assert.assertEquals(StrictMath.round(8388607.0f), 8388607);  // 2^23 - 1
+    Assert.assertEquals(StrictMath.round(8388607.5f), 8388608);  // 2^23 - 0.5
+    Assert.assertEquals(StrictMath.round(8388608.0f), 8388608);  // 2^23
+    Assert.assertEquals(StrictMath.round(-8388607.0f), -8388607);  // -(2^23 - 1)
+    Assert.assertEquals(StrictMath.round(-8388607.5f), -8388607);  // -(2^23 - 0.5)
+    Assert.assertEquals(StrictMath.round(-8388608.0f), -8388608);  // -2^23
+    Assert.assertEquals(StrictMath.round(16777215.0f), 16777215);  // 2^24 - 1
+    Assert.assertEquals(StrictMath.round(16777216.0f), 16777216);  // 2^24
+    Assert.assertEquals(StrictMath.round(-16777215.0f), -16777215);  // -(2^24 - 1)
+    Assert.assertEquals(StrictMath.round(-16777216.0f), -16777216);  // -2^24
     Assert.assertEquals(StrictMath.round(Float.NaN), (int)+0.0f);
     Assert.assertEquals(StrictMath.round(Integer.MAX_VALUE + 1.0f), Integer.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Integer.MIN_VALUE - 1.0f), Integer.MIN_VALUE);
+    Assert.assertEquals(StrictMath.round(Float.intBitsToFloat(0x4F800000)),
+                        Integer.MAX_VALUE); // 2^32
+    Assert.assertEquals(StrictMath.round(Float.intBitsToFloat(0xCF800000)),
+                        Integer.MIN_VALUE); // -2^32
     Assert.assertEquals(StrictMath.round(Float.POSITIVE_INFINITY), Integer.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Float.NEGATIVE_INFINITY), Integer.MIN_VALUE);
   }
diff --git a/test/085-old-style-inner-class/build b/test/085-old-style-inner-class/build
index 6f50a76..21dc662 100644
--- a/test/085-old-style-inner-class/build
+++ b/test/085-old-style-inner-class/build
@@ -23,8 +23,8 @@
 ${JAVAC} -source 1.4 -target 1.4 -d classes `find src -name '*.java'`
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   # Suppress stderr to keep the inner class warnings out of the expected output.
   ${DX} --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes 2>/dev/null
diff --git a/test/086-null-super/src/Main.java b/test/086-null-super/src/Main.java
index 060737f..8bd1786 100644
--- a/test/086-null-super/src/Main.java
+++ b/test/086-null-super/src/Main.java
@@ -75,14 +75,12 @@
                  * Find the DexFile class, and construct a DexFile object
                  * through reflection, then call loadCLass on it.
                  */
-                Class mDexClass = ClassLoader.getSystemClassLoader().
+                Class<?> mDexClass = ClassLoader.getSystemClassLoader().
                         loadClass("dalvik.system.DexFile");
-                Constructor ctor = mDexClass.
-                        getConstructor(new Class[] {String.class});
+                Constructor<?> ctor = mDexClass.getConstructor(String.class);
                 Object mDexFile = ctor.newInstance(DEX_FILE);
                 Method meth = mDexClass.
-                        getMethod("loadClass",
-                            new Class[] { String.class, ClassLoader.class });
+                        getMethod("loadClass", String.class, ClassLoader.class);
                 /*
                  * Invoking loadClass on CLASS_NAME is expected to
                  * throw an InvocationTargetException. Anything else
diff --git a/test/087-gc-after-link/src/Main.java b/test/087-gc-after-link/src/Main.java
index 7c47e99..698af0b 100644
--- a/test/087-gc-after-link/src/Main.java
+++ b/test/087-gc-after-link/src/Main.java
@@ -70,7 +70,7 @@
                 throws TestFailed, InvocationTargetException
         {
             Object dexFile = null;
-            Class dexClass = null;
+            Class<?> dexClass = null;
 
             try {
                 try {
@@ -80,11 +80,9 @@
                      */
                     dexClass = ClassLoader.getSystemClassLoader().
                             loadClass("dalvik.system.DexFile");
-                    Constructor ctor = dexClass.
-                            getConstructor(new Class[] {String.class});
+                    Constructor<?> ctor = dexClass.getConstructor(String.class);
                     dexFile = ctor.newInstance(DEX_FILE);
-                    Method meth = dexClass.getMethod("loadClass",
-                            new Class[] { String.class, ClassLoader.class });
+                    Method meth = dexClass.getMethod("loadClass", String.class, ClassLoader.class);
                     /*
                      * Invoking loadClass on CLASS_NAME is expected to
                      * throw an InvocationTargetException. Anything else
@@ -95,7 +93,7 @@
                 } finally {
                     if (dexFile != null) {
                         /* close the DexFile to make CloseGuard happy */
-                        Method meth = dexClass.getMethod("close", (Class[]) null);
+                        Method meth = dexClass.getMethod("close");
                         meth.invoke(dexFile);
                     }
                 }
diff --git a/test/088-monitor-verification/src/Main.java b/test/088-monitor-verification/src/Main.java
index 212c894..a6f0e64 100644
--- a/test/088-monitor-verification/src/Main.java
+++ b/test/088-monitor-verification/src/Main.java
@@ -100,7 +100,7 @@
      */
     void constantLock() {
         assertIsManaged();
-        Class thing = Thread.class;
+        Class<?> thing = Thread.class;
         synchronized (Thread.class) {}
     }
 
diff --git a/test/089-many-methods/build b/test/089-many-methods/build
index ff77c60..58144e1 100644
--- a/test/089-many-methods/build
+++ b/test/089-many-methods/build
@@ -43,8 +43,4 @@
     printf("}\n") > fileName;
 }'
 
-# The test relies on the error message produced by dx, not jack, so keep building with dx for now
-# (b/19467889).
-mkdir classes
-${JAVAC} -d classes `find src -name '*.java'`
-${DX} -JXmx1024m --dex --no-optimize classes
+./default-build
diff --git a/test/089-many-methods/expected.txt b/test/089-many-methods/expected.txt
index b74e0ee..bfee8b3 100644
--- a/test/089-many-methods/expected.txt
+++ b/test/089-many-methods/expected.txt
@@ -1,6 +1,2 @@
-
-trouble writing output: Too many field references: 131000; max is 65536.
-You may try using --multi-dex option.
-References by package:
-131000 default
-build exit status: 2
+ERROR: Dex writing phase: classes.dex has too many IDs. Try using multi-dex
+build exit status: 4
diff --git a/test/091-override-package-private-method/build b/test/091-override-package-private-method/build
new file mode 100755
index 0000000..073a4ba
--- /dev/null
+++ b/test/091-override-package-private-method/build
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stop if something fails.
+set -e
+
+mkdir classes
+${JAVAC} -d classes `find src -name '*.java'`
+
+mkdir classes-ex
+mv classes/OverridePackagePrivateMethodSuper.class classes-ex
+
+if [ ${USE_JACK} = "true" ]; then
+  jar cf classes.jill.jar -C classes .
+  jar cf classes-ex.jill.jar -C classes-ex .
+
+  ${JACK} --import classes.jill.jar --output-dex .
+  zip $TEST_NAME.jar classes.dex
+  ${JACK} --import classes-ex.jill.jar --output-dex .
+  zip ${TEST_NAME}-ex.jar classes.dex
+else
+  if [ ${NEED_DEX} = "true" ]; then
+    ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
+    zip $TEST_NAME.jar classes.dex
+    ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
+    zip ${TEST_NAME}-ex.jar classes.dex
+  fi
+fi
diff --git a/test/091-override-package-private-method/expected.txt b/test/091-override-package-private-method/expected.txt
new file mode 100644
index 0000000..286cfcd
--- /dev/null
+++ b/test/091-override-package-private-method/expected.txt
@@ -0,0 +1 @@
+OverridePackagePrivateMethodTest
diff --git a/test/091-override-package-private-method/info.txt b/test/091-override-package-private-method/info.txt
new file mode 100644
index 0000000..8e183bf
--- /dev/null
+++ b/test/091-override-package-private-method/info.txt
@@ -0,0 +1,3 @@
+Test features with a secondary dex file.
+
+- Regression test to ensure AOT compiler correctly manages overriden package-private method.
diff --git a/test/127-secondarydex/run b/test/091-override-package-private-method/run
similarity index 100%
copy from test/127-secondarydex/run
copy to test/091-override-package-private-method/run
diff --git a/test/091-override-package-private-method/src/Main.java b/test/091-override-package-private-method/src/Main.java
new file mode 100644
index 0000000..6543c98
--- /dev/null
+++ b/test/091-override-package-private-method/src/Main.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *  Override package-private method test.
+ */
+public class Main {
+    public static void main(String[] args) {
+        try {
+            new OverridePackagePrivateMethodTest().test(new Object());
+        } catch (Exception e) {
+            System.out.println("Got unexpected exception " + e);
+        }
+    }
+}
diff --git a/test/091-override-package-private-method/src/OverridePackagePrivateMethodSuper.java b/test/091-override-package-private-method/src/OverridePackagePrivateMethodSuper.java
new file mode 100644
index 0000000..4ad051e
--- /dev/null
+++ b/test/091-override-package-private-method/src/OverridePackagePrivateMethodSuper.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class OverridePackagePrivateMethodSuper {
+    void print() {
+        System.out.println("OverridePackagePrivateMethodSuper");
+    }
+}
diff --git a/test/091-override-package-private-method/src/OverridePackagePrivateMethodTest.java b/test/091-override-package-private-method/src/OverridePackagePrivateMethodTest.java
new file mode 100644
index 0000000..2f2b7ca
--- /dev/null
+++ b/test/091-override-package-private-method/src/OverridePackagePrivateMethodTest.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class OverridePackagePrivateMethodTest extends OverridePackagePrivateMethodSuper {
+    public void test(Object obj) {
+        if (obj == null) {
+            throw new NullPointerException("Got null");
+        }
+        print();
+    }
+
+    void print() {
+        System.out.println("OverridePackagePrivateMethodTest");
+    }
+}
diff --git a/test/097-duplicate-method/build b/test/097-duplicate-method/build
deleted file mode 100644
index a855873..0000000
--- a/test/097-duplicate-method/build
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2012 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Stop if something fails.
-set -e
-
-mkdir classes
-
-if [ ${USE_JACK} = "true" ]; then
-  ${JACK} --output-jack src.jack src
-
-  ${JASMIN} -d classes src/*.j
-  ${JILL} classes --output jasmin.jack
-
-  # We set jack.import.type.policy=keep-first to consider class definitions from jasmin first.
-  ${JACK} --import jasmin.jack --import src.jack -D jack.import.type.policy=keep-first --output-dex .
-else
-  ${JAVAC} -d classes src/*.java
-  ${JASMIN} -d classes src/*.j
-
-  ${DX} --debug --dex --dump-to=classes.lst --output=classes.dex classes
-fi
-zip $TEST_NAME.jar classes.dex
diff --git a/test/097-duplicate-method/classes.dex b/test/097-duplicate-method/classes.dex
new file mode 100644
index 0000000..18f8958
--- /dev/null
+++ b/test/097-duplicate-method/classes.dex
Binary files differ
diff --git a/test/097-duplicate-method/info.txt b/test/097-duplicate-method/info.txt
index 4e7e0ee..ed0daed 100644
--- a/test/097-duplicate-method/info.txt
+++ b/test/097-duplicate-method/info.txt
@@ -1,2 +1,7 @@
 This is a test to verify that duplicate methods in a dex file are handled
 properly (all but the first are ignored).
+
+We need to build a dex file with duplicate methods. We cannot do that
+with Jack (this is invalid) or smali (it does not keep duplicate
+methods, only one is in the dex). Therefore, having a precompiled
+dex file allows to run the test on whatever toolchain.
diff --git a/test/097-duplicate-method/src/Main.java b/test/097-duplicate-method/src/Main.java
deleted file mode 100644
index bb3d36a..0000000
--- a/test/097-duplicate-method/src/Main.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Certain class files generated by smali can have encoded methods with an
- * identical method_idx. In these cases, the behavior should be to only use the
- * first one, and to ignore all following duplicates.
- */
-public class Main {
-    public static void main(String args[]) {
-        if (Test.run() != null) {
-          System.out.println("Success!");
-        }
-    }
-}
diff --git a/test/097-duplicate-method/src/Test.j b/test/097-duplicate-method/src/Test.j
deleted file mode 100644
index f96a9a3..0000000
--- a/test/097-duplicate-method/src/Test.j
+++ /dev/null
@@ -1,29 +0,0 @@
-; Copyright (C) 2012 The Android Open Source Project
-;
-; Licensed under the Apache License, Version 2.0 (the "License");
-; you may not use this file except in compliance with the License.
-; You may obtain a copy of the License at
-;
-;      http://www.apache.org/licenses/LICENSE-2.0
-;
-; Unless required by applicable law or agreed to in writing, software
-; distributed under the License is distributed on an "AS IS" BASIS,
-; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-; See the License for the specific language governing permissions and
-; limitations under the License.
-
-.class Test
-.super java/lang/Object
-
-.method public static run()Ljava/lang/Object;
-    .limit stack 2
-    new java/lang/Object
-    dup
-    invokespecial java/lang/Object/<init>()V
-    areturn
-.end method
-
-.method public static run()Ljava/lang/Object;
-    aconst_null
-    areturn
-.end method
diff --git a/test/097-duplicate-method/src/Test.java b/test/097-duplicate-method/src/Test.java
deleted file mode 100644
index 7dd61e6..0000000
--- a/test/097-duplicate-method/src/Test.java
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * This class and method will be overwritten by smali to return non-null.
- */
-public class Test {
-    public static Object run() {
-        return null;
-    }
-}
diff --git a/test/098-ddmc/src/Main.java b/test/098-ddmc/src/Main.java
index f41ff2a..72c5a28 100644
--- a/test/098-ddmc/src/Main.java
+++ b/test/098-ddmc/src/Main.java
@@ -44,7 +44,12 @@
         System.out.println("Confirm when we overflow, we don't roll over to zero. b/17392248");
         final int overflowAllocations = 64 * 1024;  // Won't fit in unsigned 16-bit value.
         for (int i = 0; i < overflowAllocations; i++) {
-            new Object();
+            new Object() {
+                // Add a finalizer so that the allocation won't be eliminated.
+                public void finalize() {
+                    System.out.print("");
+                }
+            };
         }
         Allocations after = new Allocations(DdmVmInternal.getRecentAllocations());
         System.out.println("before < overflowAllocations=" + (before.numberOfEntries < overflowAllocations));
@@ -131,7 +136,7 @@
         private static final Method getRecentAllocationsMethod;
         static {
             try {
-                Class c = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
+                Class<?> c = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
                 enableRecentAllocationsMethod = c.getDeclaredMethod("enableRecentAllocations",
                                                                     Boolean.TYPE);
                 getRecentAllocationStatusMethod = c.getDeclaredMethod("getRecentAllocationStatus");
diff --git a/test/099-vmdebug/src/Main.java b/test/099-vmdebug/src/Main.java
index 1be5765..90ad315 100644
--- a/test/099-vmdebug/src/Main.java
+++ b/test/099-vmdebug/src/Main.java
@@ -133,7 +133,7 @@
             System.out.println("Got null string");
             return;
         }
-        long n = Long.valueOf(s);
+        long n = Long.parseLong(s);
         if (n < 0) {
             System.out.println("Got negative number " + n);
         }
@@ -157,8 +157,8 @@
                 System.out.println("Got bad bucket " + bucket);
                 continue;
             }
-            long key = Long.valueOf(kv[0]);
-            long value = Long.valueOf(kv[1]);
+            long key = Long.parseLong(kv[0]);
+            long value = Long.parseLong(kv[1]);
             if (key < 0 || value < 0) {
                 System.out.println("Got negative key or value " + bucket);
                 continue;
@@ -242,7 +242,7 @@
         System.out.println("Instances of null " + VMDebug.countInstancesofClass(null, false));
         System.out.println("Instances of ClassA assignable " +
                 VMDebug.countInstancesofClass(ClassA.class, true));
-        Class[] classes = new Class[]{ClassA.class, ClassB.class, null};
+        Class<?>[] classes = new Class<?>[] {ClassA.class, ClassB.class, null};
         long[] counts = VMDebug.countInstancesofClasses(classes, false);
         System.out.println("Array counts " + Arrays.toString(counts));
         counts = VMDebug.countInstancesofClasses(classes, true);
@@ -259,7 +259,7 @@
         private static final Method countInstancesOfClassesMethod;
         static {
             try {
-                Class c = Class.forName("dalvik.system.VMDebug");
+                Class<?> c = Class.forName("dalvik.system.VMDebug");
                 startMethodTracingMethod = c.getDeclaredMethod("startMethodTracing", String.class,
                         Integer.TYPE, Integer.TYPE, Boolean.TYPE, Integer.TYPE);
                 stopMethodTracingMethod = c.getDeclaredMethod("stopMethodTracing");
@@ -292,10 +292,10 @@
         public static Map<String, String> getRuntimeStats() throws Exception {
             return (Map<String, String>) getRuntimeStatsMethod.invoke(null);
         }
-        public static long countInstancesofClass(Class c, boolean assignable) throws Exception {
+        public static long countInstancesofClass(Class<?> c, boolean assignable) throws Exception {
             return (long) countInstancesOfClassMethod.invoke(null, new Object[]{c, assignable});
         }
-        public static long[] countInstancesofClasses(Class[] classes, boolean assignable)
+        public static long[] countInstancesofClasses(Class<?>[] classes, boolean assignable)
                 throws Exception {
             return (long[]) countInstancesOfClassesMethod.invoke(
                     null, new Object[]{classes, assignable});
diff --git a/test/100-reflect2/expected.txt b/test/100-reflect2/expected.txt
index c932761..dd89d64 100644
--- a/test/100-reflect2/expected.txt
+++ b/test/100-reflect2/expected.txt
@@ -31,9 +31,9 @@
 30 (class java.lang.Integer)
 62 (class java.lang.Long)
 14 (class java.lang.Short)
-[public java.lang.String(), java.lang.String(int,int,char[]), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int)]
-[private final int java.lang.String.count, private int java.lang.String.hashCode, private static final char[] java.lang.String.ASCII, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER, private static final char java.lang.String.REPLACEMENT_CHAR, private static final long java.lang.String.serialVersionUID]
-[public native char java.lang.String.charAt(int), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public native int java.lang.String.compareTo(java.lang.String), public int java.lang.String.compareToIgnoreCase(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public void java.lang.String.getBytes(int,int,byte[],int), public [B java.lang.String.getBytes(), public [B java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public [B java.lang.String.getBytes(java.nio.charset.Charset), public void java.lang.String.getChars(int,int,char[],int), native void java.lang.String.getCharsNoCheck(int,int,char[],int), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public native java.lang.String java.lang.String.intern(), public boolean java.lang.String.isEmpty(), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public boolean java.lang.String.matches(java.lang.String), public int java.lang.String.offsetByCodePoints(int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), native void java.lang.String.setCharAt(int,char), public [Ljava.lang.String; java.lang.String.split(java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public native [C java.lang.String.toCharArray(), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.failedBoundsCheck(int,int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), private char java.lang.String.foldCase(char), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), private java.lang.StringIndexOutOfBoundsException java.lang.String.indexAndLength(int), private static int java.lang.String.indexOf(java.lang.String,java.lang.String,int,int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.startEndAndLength(int,int), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(long), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int)]
+[java.lang.String(int,int,char[]), public java.lang.String(), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder)]
+[private final int java.lang.String.count, private int java.lang.String.hash, private static final java.io.ObjectStreamField[] java.lang.String.serialPersistentFields, private static final long java.lang.String.serialVersionUID, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER]
+[native void java.lang.String.getCharsNoCheck(int,int,char[],int), native void java.lang.String.setCharAt(int,char), private boolean java.lang.String.nonSyncContentEquals(java.lang.AbstractStringBuilder), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public byte[] java.lang.String.getBytes(), public byte[] java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public byte[] java.lang.String.getBytes(java.nio.charset.Charset), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public java.lang.String[] java.lang.String.split(java.lang.String), public java.lang.String[] java.lang.String.split(java.lang.String,int), public native char java.lang.String.charAt(int), public native char[] java.lang.String.toCharArray(), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.join(java.lang.CharSequence,java.lang.CharSequence[]), public static java.lang.String java.lang.String.join(java.lang.CharSequence,java.lang.Iterable), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int), static int java.lang.String.indexOf(char[],int,int,char[],int,int,int), static int java.lang.String.indexOf(java.lang.String,java.lang.String,int), static int java.lang.String.lastIndexOf(char[],int,int,char[],int,int,int), static int java.lang.String.lastIndexOf(java.lang.String,java.lang.String,int), void java.lang.String.getChars(char[],int)]
 []
 [interface java.io.Serializable, interface java.lang.Comparable, interface java.lang.CharSequence]
 0
diff --git a/test/100-reflect2/src/Main.java b/test/100-reflect2/src/Main.java
index bf3a574..91ba307 100644
--- a/test/100-reflect2/src/Main.java
+++ b/test/100-reflect2/src/Main.java
@@ -157,10 +157,28 @@
     System.out.println(o + " (" + (o != null ? o.getClass() : "null") + ")");
   }
 
+  /**
+   * Sorts the input array using the comparator and returns the sorted array.
+   */
+  private static Object[] sort(Object[] objects, Comparator<Object> comp) {
+    Arrays.sort(objects, comp);
+    return objects;
+  }
+
   public static void testMethodReflection() throws Exception {
-    System.out.println(Arrays.toString(String.class.getDeclaredConstructors()));
-    System.out.println(Arrays.toString(String.class.getDeclaredFields()));
-    System.out.println(Arrays.toString(String.class.getDeclaredMethods()));
+    Comparator<Object> comp = new Comparator<Object>() {
+      public int compare(Object a, Object b) {
+        return a.toString().compareTo(b.toString());
+      }
+      public boolean equals(Object b) {
+        return this == b;
+      }
+    };
+
+    // Sort the return values by their string values since the order is undefined by the spec.
+    System.out.println(Arrays.toString(sort(String.class.getDeclaredConstructors(), comp)));
+    System.out.println(Arrays.toString(sort(String.class.getDeclaredFields(), comp)));
+    System.out.println(Arrays.toString(sort(String.class.getDeclaredMethods(), comp)));
 
     System.out.println(Arrays.toString(Main.class.getInterfaces()));
     System.out.println(Arrays.toString(String.class.getInterfaces()));
@@ -257,10 +275,8 @@
   }
 
   public static void testConstructorReflection() throws Exception {
-    Constructor<?> ctor;
-
-    ctor = String.class.getConstructor(new Class[0]);
-    show(ctor.newInstance((Object[]) null));
+    Constructor<String> ctor = String.class.getConstructor();
+    show(ctor.newInstance());
 
     ctor = String.class.getConstructor(char[].class, int.class, int.class);
     show(ctor.newInstance(new char[] { '\u2714', 'y', 'z', '!' }, 1, 2));
@@ -269,7 +285,7 @@
   private static void testPackagePrivateConstructor() {
     try {
       Class<?> c = Class.forName("sub.PPClass");
-      Constructor cons = c.getConstructor();
+      Constructor<?> cons = c.getConstructor();
       cons.newInstance();
       throw new RuntimeException("Expected IllegalAccessException.");
     } catch (IllegalAccessException e) {
@@ -283,7 +299,7 @@
   private static void testPackagePrivateAccessibleConstructor() {
     try {
       Class<?> c = Class.forName("sub.PPClass");
-      Constructor cons = c.getConstructor();
+      Constructor<?> cons = c.getConstructor();
       cons.setAccessible(true);  // ensure we prevent IllegalAccessException
       cons.newInstance();
     } catch (Exception e) {
diff --git a/test/107-int-math2/src/Main.java b/test/107-int-math2/src/Main.java
index 0c91d44..ec5678d 100644
--- a/test/107-int-math2/src/Main.java
+++ b/test/107-int-math2/src/Main.java
@@ -104,7 +104,7 @@
     }
 
     static int constClassTest(int x) {
-        Class c = String.class;
+        Class<?> c = String.class;
         if (c != null) {
            return x * 2;
         } else {
diff --git a/test/111-unresolvable-exception/build b/test/111-unresolvable-exception/build
index e772fb8..58ac26d 100644
--- a/test/111-unresolvable-exception/build
+++ b/test/111-unresolvable-exception/build
@@ -22,8 +22,8 @@
 rm classes/TestException.class
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex classes
 fi
diff --git a/test/113-multidex/build b/test/113-multidex/build
index 8ef5c0e..4557ccd 100644
--- a/test/113-multidex/build
+++ b/test/113-multidex/build
@@ -28,14 +28,12 @@
 rm classes2/Second.class classes2/FillerA.class classes2/FillerB.class classes2/Inf*.class
 
 if [ ${USE_JACK} = "true" ]; then
-  # Create .jack files from classes generated with javac.
-  ${JILL} classes --output classes.jack
-  ${JILL} classes2 --output classes2.jack
+  jar cf classes.jill.jar -C classes .
+  jar cf classes2.jill.jar -C classes2 .
 
-  # Create DEX files from .jack files.
-  ${JACK} --import classes.jack --output-dex .
+  ${JACK} --import classes.jill.jar --output-dex .
   mv classes.dex classes-1.dex
-  ${JACK} --import classes2.jack --output-dex .
+  ${JACK} --import classes2.jill.jar --output-dex .
   mv classes.dex classes2.dex
   mv classes-1.dex classes.dex
 else
diff --git a/test/115-native-bridge/expected.txt b/test/115-native-bridge/expected.txt
index b003307..852ec2e 100644
--- a/test/115-native-bridge/expected.txt
+++ b/test/115-native-bridge/expected.txt
@@ -1,4 +1,3 @@
-Code cache exists: './code_cache'.
 Native bridge initialized.
 Checking for getEnvValues.
 Ready for native bridge tests.
diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc
index e9946c8..aca356b 100644
--- a/test/115-native-bridge/nativebridge.cc
+++ b/test/115-native-bridge/nativebridge.cc
@@ -267,11 +267,16 @@
                                          const char* app_code_cache_dir,
                                          const char* isa ATTRIBUTE_UNUSED) {
   struct stat st;
-  if ((app_code_cache_dir != nullptr)
-      && (stat(app_code_cache_dir, &st) == 0)
-      && S_ISDIR(st.st_mode)) {
-    printf("Code cache exists: '%s'.\n", app_code_cache_dir);
+  if (app_code_cache_dir != nullptr) {
+    if (stat(app_code_cache_dir, &st) == 0) {
+      if (!S_ISDIR(st.st_mode)) {
+        printf("Code cache is not a directory.\n");
+      }
+    } else {
+      perror("Error when stat-ing the code_cache:");
+    }
   }
+
   if (art_cbs != nullptr) {
     gNativeBridgeArtCallbacks = art_cbs;
     printf("Native bridge initialized.\n");
diff --git a/test/115-native-bridge/run b/test/115-native-bridge/run
index ea2045b..aeb5721 100644
--- a/test/115-native-bridge/run
+++ b/test/115-native-bridge/run
@@ -28,4 +28,4 @@
 LEFT=$(echo ${ARGS} | sed -r 's/-Djava.library.path.*//')
 RIGHT=$(echo ${ARGS} | sed -r 's/.*Djava.library.path[^ ]* //')
 MODARGS="${LEFT} -Djava.library.path=`pwd` ${RIGHT}"
-exec ${RUN} --runtime-option -XX:NativeBridge=libnativebridgetest.so ${MODARGS} NativeBridgeMain
+exec ${RUN} --runtime-option -Xforce-nb-testing --runtime-option -XX:NativeBridge=libnativebridgetest.so ${MODARGS} NativeBridgeMain
diff --git a/test/117-nopatchoat/nopatchoat.cc b/test/117-nopatchoat/nopatchoat.cc
index 3e533ad..c6a2e9a 100644
--- a/test/117-nopatchoat/nopatchoat.cc
+++ b/test/117-nopatchoat/nopatchoat.cc
@@ -35,8 +35,9 @@
   }
 
   static bool isRelocationDeltaZero() {
-    gc::space::ImageSpace* space = Runtime::Current()->GetHeap()->GetImageSpace();
-    return space != nullptr && space->GetImageHeader().GetPatchDelta() == 0;
+    std::vector<gc::space::ImageSpace*> spaces =
+        Runtime::Current()->GetHeap()->GetBootImageSpaces();
+    return !spaces.empty() && spaces[0]->GetImageHeader().GetPatchDelta() == 0;
   }
 
   static bool hasExecutableOat(jclass cls) {
@@ -45,7 +46,7 @@
     return oat_dex_file != nullptr && oat_dex_file->GetOatFile()->IsExecutable();
   }
 
-  static bool isPic(jclass cls) {
+  static bool needsRelocation(jclass cls) {
     const OatFile::OatDexFile* oat_dex_file = getOatDexFile(cls);
 
     if (oat_dex_file == nullptr) {
@@ -53,7 +54,8 @@
     }
 
     const OatFile* oat_file = oat_dex_file->GetOatFile();
-    return oat_file->IsPic();
+    return !oat_file->IsPic()
+        && CompilerFilter::IsBytecodeCompilationEnabled(oat_file->GetCompilerFilter());
   }
 };
 
@@ -65,8 +67,8 @@
   return NoPatchoatTest::hasExecutableOat(cls);
 }
 
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isPic(JNIEnv*, jclass cls) {
-  return NoPatchoatTest::isPic(cls);
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_needsRelocation(JNIEnv*, jclass cls) {
+  return NoPatchoatTest::needsRelocation(cls);
 }
 
 }  // namespace art
diff --git a/test/117-nopatchoat/src/Main.java b/test/117-nopatchoat/src/Main.java
index 425cf48..816eb17 100644
--- a/test/117-nopatchoat/src/Main.java
+++ b/test/117-nopatchoat/src/Main.java
@@ -22,9 +22,9 @@
     // ANDROID_DATA has been relocated, since a non-relocated oat file always has a 0 delta.
     // Hitting this condition should be rare and ideally we would prevent it from happening but
     // there is no way to do so without major changes to the run-test framework.
-    boolean executable_correct = (isPic() ?
-        hasExecutableOat() == true :
-        hasExecutableOat() == (isDex2OatEnabled() || isRelocationDeltaZero()));
+    boolean executable_correct = (needsRelocation() ?
+        hasExecutableOat() == (isDex2OatEnabled() || isRelocationDeltaZero()) :
+        hasExecutableOat() == true);
 
     System.out.println(
         "dex2oat & patchoat are " + ((isDex2OatEnabled()) ? "enabled" : "disabled") +
@@ -49,7 +49,7 @@
 
   private native static boolean isDex2OatEnabled();
 
-  private native static boolean isPic();
+  private native static boolean needsRelocation();
 
   private native static boolean hasOatFile();
 
diff --git a/test/118-noimage-dex2oat/run b/test/118-noimage-dex2oat/run
index 4b1d0ce..07bdb08 100644
--- a/test/118-noimage-dex2oat/run
+++ b/test/118-noimage-dex2oat/run
@@ -41,7 +41,6 @@
 bpath="${framework}/core-libart${bpath_suffix}.jar"
 bpath="${bpath}:${framework}/conscrypt${bpath_suffix}.jar"
 bpath="${bpath}:${framework}/okhttp${bpath_suffix}.jar"
-bpath="${bpath}:${framework}/core-junit${bpath_suffix}.jar"
 bpath="${bpath}:${framework}/bouncycastle${bpath_suffix}.jar"
 bpath_arg="--runtime-option -Xbootclasspath:${bpath}"
 
diff --git a/test/118-noimage-dex2oat/src/Main.java b/test/118-noimage-dex2oat/src/Main.java
index dba9166..cc19107 100644
--- a/test/118-noimage-dex2oat/src/Main.java
+++ b/test/118-noimage-dex2oat/src/Main.java
@@ -51,7 +51,7 @@
     private static final Method isBootClassPathOnDiskMethod;
     static {
         try {
-            Class c = Class.forName("dalvik.system.VMRuntime");
+            Class<?> c = Class.forName("dalvik.system.VMRuntime");
             getCurrentInstructionSetMethod = c.getDeclaredMethod("getCurrentInstructionSet");
             isBootClassPathOnDiskMethod = c.getDeclaredMethod("isBootClassPathOnDisk",
                                                               String.class);
diff --git a/test/121-modifiers/build b/test/121-modifiers/build
index 85b69e9..771dd51 100644
--- a/test/121-modifiers/build
+++ b/test/121-modifiers/build
@@ -31,9 +31,9 @@
 # mv Main.class A.class A\$B.class A\$C.class classes/
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
+  jar cf classes.jill.jar -C classes .
   # Workaround b/19561685: disable sanity checks to produce a DEX file with invalid modifiers.
-  ${JACK} --sanity-checks off --import classes.jack --output-dex .
+  ${JACK} --sanity-checks off --import classes.jill.jar --output-dex .
 else
   ${DX} --debug --dex --dump-to=classes.lst --output=classes.dex classes
 fi
diff --git a/test/123-inline-execute2/expected.txt b/test/123-inline-execute2/expected.txt
new file mode 100644
index 0000000..aa74fa3
--- /dev/null
+++ b/test/123-inline-execute2/expected.txt
@@ -0,0 +1,299 @@
+Math.sin(0.0) = 0.000000000000
+Math.sinh(0.0) = 0.000000000000
+Math.asin(0.0) = 0.000000000000
+Math.cos(0.0) = 1.000000000000
+Math.cosh(0.0) = 1.000000000000
+Math.acos(0.0) = 1.570796326795
+Math.tan(0.0) = 0.000000000000
+Math.tanh(0.0) = 0.000000000000
+Math.atan(0.0) = 0.000000000000
+Math.atan2(0.0, 1.0) = 0.000000000000
+Math.sin(0.7853981633974483) = 0.707106781187
+Math.sinh(0.7853981633974483) = 0.868670961486
+Math.asin(0.7853981633974483) = 0.903339110767
+Math.cos(0.7853981633974483) = 0.707106781187
+Math.cosh(0.7853981633974483) = 1.324609089252
+Math.acos(0.7853981633974483) = 0.667457216028
+Math.tan(0.7853981633974483) = 1.000000000000
+Math.tanh(0.7853981633974483) = 0.655794202633
+Math.atan(0.7853981633974483) = 0.665773750028
+Math.atan2(0.7853981633974483, 1.7853981633974483) = 0.414423800577
+Math.sin(1.5707963267948966) = 1.000000000000
+Math.sinh(1.5707963267948966) = 2.301298902307
+Math.asin(1.5707963267948966) = NaN
+Math.cos(1.5707963267948966) = 0.000000000000
+Math.cosh(1.5707963267948966) = 2.509178478658
+Math.acos(1.5707963267948966) = NaN
+Math.tanh(1.5707963267948966) = 0.917152335667
+Math.atan(1.5707963267948966) = 1.003884821854
+Math.atan2(1.5707963267948966, 2.5707963267948966) = 0.548479764417
+Math.sin(2.356194490192345) = 0.707106781187
+Math.sinh(2.356194490192345) = 5.227971924678
+Math.asin(2.356194490192345) = NaN
+Math.cos(2.356194490192345) = -0.707106781187
+Math.cosh(2.356194490192345) = 5.322752149520
+Math.acos(2.356194490192345) = NaN
+Math.tan(2.356194490192345) = -1.000000000000
+Math.tanh(2.356194490192345) = 0.982193380007
+Math.atan(2.356194490192345) = 1.169422824816
+Math.atan2(2.356194490192345, 3.356194490192345) = 0.612096117380
+Math.sin(3.141592653589793) = 0.000000000000
+Math.sinh(3.141592653589793) = 11.548739357258
+Math.asin(3.141592653589793) = NaN
+Math.cos(3.141592653589793) = -1.000000000000
+Math.cosh(3.141592653589793) = 11.591953275522
+Math.acos(3.141592653589793) = NaN
+Math.tan(3.141592653589793) = -0.000000000000
+Math.tanh(3.141592653589793) = 0.996272076221
+Math.atan(3.141592653589793) = 1.262627255679
+Math.atan2(3.141592653589793, 4.141592653589793) = 0.648948780815
+Math.sin(3.9269908169872414) = -0.707106781187
+Math.sinh(3.9269908169872414) = 25.367158319374
+Math.asin(3.9269908169872414) = NaN
+Math.cos(3.9269908169872414) = -0.707106781187
+Math.cosh(3.9269908169872414) = 25.386861192361
+Math.acos(3.9269908169872414) = NaN
+Math.tan(3.9269908169872414) = 1.000000000000
+Math.tanh(3.9269908169872414) = 0.999223894879
+Math.atan(3.9269908169872414) = 1.321447967784
+Math.atan2(3.9269908169872414, 4.926990816987241) = 0.672931229191
+Math.sin(4.71238898038469) = -1.000000000000
+Math.sinh(4.71238898038469) = 55.654397599418
+Math.asin(4.71238898038469) = NaN
+Math.cos(4.71238898038469) = -0.000000000000
+Math.cosh(4.71238898038469) = 55.663380890439
+Math.acos(4.71238898038469) = NaN
+Math.tanh(4.71238898038469) = 0.999838613989
+Math.atan(4.71238898038469) = 1.361691682971
+Math.atan2(4.71238898038469, 5.71238898038469) = 0.689765469251
+Math.sin(5.497787143782138) = -0.707106781187
+Math.sinh(5.497787143782138) = 122.073483514693
+Math.asin(5.497787143782138) = NaN
+Math.cos(5.497787143782138) = 0.707106781187
+Math.cosh(5.497787143782138) = 122.077579339582
+Math.acos(5.497787143782138) = NaN
+Math.tan(5.497787143782138) = -1.000000000000
+Math.tanh(5.497787143782138) = 0.999966449000
+Math.atan(5.497787143782138) = 1.390871988014
+Math.atan2(5.497787143782138, 6.497787143782138) = 0.702226398171
+Math.sin(6.283185307179586) = -0.000000000000
+Math.sinh(6.283185307179586) = 267.744894041016
+Math.asin(6.283185307179586) = NaN
+Math.cos(6.283185307179586) = 1.000000000000
+Math.cosh(6.283185307179586) = 267.746761483748
+Math.acos(6.283185307179586) = NaN
+Math.tan(6.283185307179586) = -0.000000000000
+Math.tanh(6.283185307179586) = 0.999993025340
+Math.atan(6.283185307179586) = 1.412965136507
+Math.atan2(6.283185307179586, 7.283185307179586) = 0.711819549590
+Math.cbrt(-3.0) = -1.442249570307
+Math.log(-3.0) = NaN
+Math.log10(-3.0) = NaN
+Math.log1p(-3.0) = NaN
+Math.exp(-3.0) = 0.049787068368
+Math.expm1(-3.0) = -0.950212931632
+Math.pow(-3.0, -2.0) = 0.111111111111
+Math.hypot(-3.0, -2.0) = 3.605551275464
+Math.cbrt(-2.0) = -1.259921049895
+Math.log(-2.0) = NaN
+Math.log10(-2.0) = NaN
+Math.log1p(-2.0) = NaN
+Math.exp(-2.0) = 0.135335283237
+Math.expm1(-2.0) = -0.864664716763
+Math.pow(-2.0, -1.0) = -0.500000000000
+Math.hypot(-2.0, -1.0) = 2.236067977500
+Math.cbrt(-1.0) = -1.000000000000
+Math.log(-1.0) = NaN
+Math.log10(-1.0) = NaN
+Math.log1p(-1.0) = -Infinity
+Math.exp(-1.0) = 0.367879441171
+Math.expm1(-1.0) = -0.632120558829
+Math.pow(-1.0, 0.0) = 1.000000000000
+Math.hypot(-1.0, 0.0) = 1.000000000000
+Math.cbrt(0.0) = 0.000000000000
+Math.log(0.0) = -Infinity
+Math.log10(0.0) = -Infinity
+Math.log1p(0.0) = 0.000000000000
+Math.exp(0.0) = 1.000000000000
+Math.expm1(0.0) = 0.000000000000
+Math.pow(0.0, 1.0) = 0.000000000000
+Math.hypot(0.0, 1.0) = 1.000000000000
+Math.cbrt(1.0) = 1.000000000000
+Math.log(1.0) = 0.000000000000
+Math.log10(1.0) = 0.000000000000
+Math.log1p(1.0) = 0.693147180560
+Math.exp(1.0) = 2.718281828459
+Math.expm1(1.0) = 1.718281828459
+Math.pow(1.0, 2.0) = 1.000000000000
+Math.hypot(1.0, 2.0) = 2.236067977500
+Math.cbrt(2.0) = 1.259921049895
+Math.log(2.0) = 0.693147180560
+Math.log10(2.0) = 0.301029995664
+Math.log1p(2.0) = 1.098612288668
+Math.exp(2.0) = 7.389056098931
+Math.expm1(2.0) = 6.389056098931
+Math.pow(2.0, 3.0) = 8.000000000000
+Math.hypot(2.0, 3.0) = 3.605551275464
+Math.cbrt(3.0) = 1.442249570307
+Math.log(3.0) = 1.098612288668
+Math.log10(3.0) = 0.477121254720
+Math.log1p(3.0) = 1.386294361120
+Math.exp(3.0) = 20.085536923188
+Math.expm1(3.0) = 19.085536923188
+Math.pow(3.0, 4.0) = 81.000000000000
+Math.hypot(3.0, 4.0) = 5.000000000000
+Math.ceil(0.0001) = 1.000000000000
+Math.floor(0.0001) = 0.000000000000
+Math.nextAfter(1.0, 2.0) = 1.000000000000
+Math.nextAfter(2.0, 1.0) = 2.000000000000
+Math.rint(0.5000001) = 1.000000000000
+StrictMath.sin(0.0) = 0.0
+StrictMath.sinh(0.0) = 0.0
+StrictMath.asin(0.0) = 0.0
+StrictMath.cos(0.0) = 1.0
+StrictMath.cosh(0.0) = 1.0
+StrictMath.acos(0.0) = 1.5707963267948966
+StrictMath.tan(0.0) = 0.0
+StrictMath.tanh(0.0) = 0.0
+StrictMath.atan(0.0) = 0.0
+StrictMath.atan2(0.0, 1.0) = 0.0
+StrictMath.sin(0.7853981633974483) = 0.7071067811865475
+StrictMath.sinh(0.7853981633974483) = 0.8686709614860095
+StrictMath.asin(0.7853981633974483) = 0.9033391107665127
+StrictMath.cos(0.7853981633974483) = 0.7071067811865476
+StrictMath.cosh(0.7853981633974483) = 1.3246090892520057
+StrictMath.acos(0.7853981633974483) = 0.6674572160283838
+StrictMath.tan(0.7853981633974483) = 0.9999999999999999
+StrictMath.tanh(0.7853981633974483) = 0.6557942026326724
+StrictMath.atan(0.7853981633974483) = 0.6657737500283538
+StrictMath.atan2(0.7853981633974483, 1.7853981633974483) = 0.41442380057704103
+StrictMath.sin(1.5707963267948966) = 1.0
+StrictMath.sinh(1.5707963267948966) = 2.3012989023072947
+StrictMath.asin(1.5707963267948966) = NaN
+StrictMath.cos(1.5707963267948966) = 6.123233995736766E-17
+StrictMath.cosh(1.5707963267948966) = 2.5091784786580567
+StrictMath.acos(1.5707963267948966) = NaN
+StrictMath.tan(1.5707963267948966) = 1.633123935319537E16
+StrictMath.tanh(1.5707963267948966) = 0.9171523356672744
+StrictMath.atan(1.5707963267948966) = 1.0038848218538872
+StrictMath.atan2(1.5707963267948966, 2.5707963267948966) = 0.5484797644174059
+StrictMath.sin(2.356194490192345) = 0.7071067811865476
+StrictMath.sinh(2.356194490192345) = 5.227971924677803
+StrictMath.asin(2.356194490192345) = NaN
+StrictMath.cos(2.356194490192345) = -0.7071067811865475
+StrictMath.cosh(2.356194490192345) = 5.322752149519959
+StrictMath.acos(2.356194490192345) = NaN
+StrictMath.tan(2.356194490192345) = -1.0000000000000002
+StrictMath.tanh(2.356194490192345) = 0.9821933800072388
+StrictMath.atan(2.356194490192345) = 1.1694228248157563
+StrictMath.atan2(2.356194490192345, 3.356194490192345) = 0.6120961173796371
+StrictMath.sin(3.141592653589793) = 1.2246467991473532E-16
+StrictMath.sinh(3.141592653589793) = 11.548739357257748
+StrictMath.asin(3.141592653589793) = NaN
+StrictMath.cos(3.141592653589793) = -1.0
+StrictMath.cosh(3.141592653589793) = 11.591953275521519
+StrictMath.acos(3.141592653589793) = NaN
+StrictMath.tan(3.141592653589793) = -1.2246467991473532E-16
+StrictMath.tanh(3.141592653589793) = 0.99627207622075
+StrictMath.atan(3.141592653589793) = 1.2626272556789115
+StrictMath.atan2(3.141592653589793, 4.141592653589793) = 0.6489487808147751
+StrictMath.sin(3.9269908169872414) = -0.7071067811865475
+StrictMath.sinh(3.9269908169872414) = 25.367158319374152
+StrictMath.asin(3.9269908169872414) = NaN
+StrictMath.cos(3.9269908169872414) = -0.7071067811865477
+StrictMath.cosh(3.9269908169872414) = 25.386861192360772
+StrictMath.acos(3.9269908169872414) = NaN
+StrictMath.tan(3.9269908169872414) = 0.9999999999999997
+StrictMath.tanh(3.9269908169872414) = 0.9992238948786412
+StrictMath.atan(3.9269908169872414) = 1.3214479677837223
+StrictMath.atan2(3.9269908169872414, 4.926990816987241) = 0.6729312291908799
+StrictMath.sin(4.71238898038469) = -1.0
+StrictMath.sinh(4.71238898038469) = 55.65439759941754
+StrictMath.asin(4.71238898038469) = NaN
+StrictMath.cos(4.71238898038469) = -1.8369701987210297E-16
+StrictMath.cosh(4.71238898038469) = 55.66338089043867
+StrictMath.acos(4.71238898038469) = NaN
+StrictMath.tan(4.71238898038469) = 5.443746451065123E15
+StrictMath.tanh(4.71238898038469) = 0.9998386139886326
+StrictMath.atan(4.71238898038469) = 1.3616916829711636
+StrictMath.atan2(4.71238898038469, 5.71238898038469) = 0.6897654692509959
+StrictMath.sin(5.497787143782138) = -0.7071067811865477
+StrictMath.sinh(5.497787143782138) = 122.07348351469281
+StrictMath.asin(5.497787143782138) = NaN
+StrictMath.cos(5.497787143782138) = 0.7071067811865474
+StrictMath.cosh(5.497787143782138) = 122.07757933958217
+StrictMath.acos(5.497787143782138) = NaN
+StrictMath.tan(5.497787143782138) = -1.0000000000000004
+StrictMath.tanh(5.497787143782138) = 0.9999664489997958
+StrictMath.atan(5.497787143782138) = 1.390871988014422
+StrictMath.atan2(5.497787143782138, 6.497787143782138) = 0.7022263981709682
+StrictMath.sin(6.283185307179586) = -2.4492935982947064E-16
+StrictMath.sinh(6.283185307179586) = 267.74489404101644
+StrictMath.asin(6.283185307179586) = NaN
+StrictMath.cos(6.283185307179586) = 1.0
+StrictMath.cosh(6.283185307179586) = 267.7467614837482
+StrictMath.acos(6.283185307179586) = NaN
+StrictMath.tan(6.283185307179586) = -2.4492935982947064E-16
+StrictMath.tanh(6.283185307179586) = 0.9999930253396107
+StrictMath.atan(6.283185307179586) = 1.4129651365067377
+StrictMath.atan2(6.283185307179586, 7.283185307179586) = 0.7118195495895945
+StrictMath.cbrt(-3.0) = -1.4422495703074083
+StrictMath.log(-3.0) = NaN
+StrictMath.log10(-3.0) = NaN
+StrictMath.log1p(-3.0) = NaN
+StrictMath.exp(-3.0) = 0.049787068367863944
+StrictMath.expm1(-3.0) = -0.950212931632136
+StrictMath.pow(-3.0, -2.0) = 0.1111111111111111
+StrictMath.hypot(-3.0, -2.0) = 3.605551275463989
+StrictMath.cbrt(-2.0) = -1.2599210498948732
+StrictMath.log(-2.0) = NaN
+StrictMath.log10(-2.0) = NaN
+StrictMath.log1p(-2.0) = NaN
+StrictMath.exp(-2.0) = 0.1353352832366127
+StrictMath.expm1(-2.0) = -0.8646647167633873
+StrictMath.pow(-2.0, -1.0) = -0.5
+StrictMath.hypot(-2.0, -1.0) = 2.23606797749979
+StrictMath.cbrt(-1.0) = -1.0
+StrictMath.log(-1.0) = NaN
+StrictMath.log10(-1.0) = NaN
+StrictMath.log1p(-1.0) = -Infinity
+StrictMath.exp(-1.0) = 0.36787944117144233
+StrictMath.expm1(-1.0) = -0.6321205588285577
+StrictMath.pow(-1.0, 0.0) = 1.0
+StrictMath.hypot(-1.0, 0.0) = 1.0
+StrictMath.cbrt(0.0) = 0.0
+StrictMath.log(0.0) = -Infinity
+StrictMath.log10(0.0) = -Infinity
+StrictMath.log1p(0.0) = 0.0
+StrictMath.exp(0.0) = 1.0
+StrictMath.expm1(0.0) = 0.0
+StrictMath.pow(0.0, 1.0) = 0.0
+StrictMath.hypot(0.0, 1.0) = 1.0
+StrictMath.cbrt(1.0) = 1.0
+StrictMath.log(1.0) = 0.0
+StrictMath.log10(1.0) = 0.0
+StrictMath.log1p(1.0) = 0.6931471805599453
+StrictMath.exp(1.0) = 2.7182818284590455
+StrictMath.expm1(1.0) = 1.718281828459045
+StrictMath.pow(1.0, 2.0) = 1.0
+StrictMath.hypot(1.0, 2.0) = 2.23606797749979
+StrictMath.cbrt(2.0) = 1.2599210498948732
+StrictMath.log(2.0) = 0.6931471805599453
+StrictMath.log10(2.0) = 0.3010299956639812
+StrictMath.log1p(2.0) = 1.0986122886681096
+StrictMath.exp(2.0) = 7.38905609893065
+StrictMath.expm1(2.0) = 6.38905609893065
+StrictMath.pow(2.0, 3.0) = 8.0
+StrictMath.hypot(2.0, 3.0) = 3.605551275463989
+StrictMath.cbrt(3.0) = 1.4422495703074083
+StrictMath.log(3.0) = 1.0986122886681096
+StrictMath.log10(3.0) = 0.47712125471966244
+StrictMath.log1p(3.0) = 1.3862943611198906
+StrictMath.exp(3.0) = 20.085536923187668
+StrictMath.expm1(3.0) = 19.085536923187668
+StrictMath.pow(3.0, 4.0) = 81.0
+StrictMath.hypot(3.0, 4.0) = 5.0
+StrictMath.ceil(0.0001) = 1.0
+StrictMath.floor(0.0001) = 0.0
+StrictMath.nextAfter(1.0, 2.0) = 1.0000000000000002
+StrictMath.rint(0.5000001) = 1.0
diff --git a/test/123-inline-execute2/info.txt b/test/123-inline-execute2/info.txt
new file mode 100644
index 0000000..4a728a7
--- /dev/null
+++ b/test/123-inline-execute2/info.txt
@@ -0,0 +1 @@
+Sanity checks for added InlineNative methods.
diff --git a/test/123-inline-execute2/src/Main.java b/test/123-inline-execute2/src/Main.java
new file mode 100644
index 0000000..9fadcfd
--- /dev/null
+++ b/test/123-inline-execute2/src/Main.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Locale;
+
+public class Main {
+  public static void main(String args[]) {
+    for (int i = 0; i <= 360; i += 45) {
+      double d = i * (Math.PI / 180.0);
+      System.out.println("Math.sin(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.sin(d)));
+
+      System.out.println("Math.sinh(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.sinh(d)));
+      System.out.println("Math.asin(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.asin(d)));
+      System.out.println("Math.cos(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.cos(d)));
+      System.out.println("Math.cosh(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.cosh(d)));
+      System.out.println("Math.acos(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.acos(d)));
+      if ((i + 90) % 180 != 0) {
+        System.out.println("Math.tan(" + d + ") = "
+            + String.format(Locale.US, "%.12f", Math.tan(d)));
+      }
+      System.out.println("Math.tanh(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.tanh(d)));
+      System.out.println("Math.atan(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.atan(d)));
+      System.out.println("Math.atan2(" + d + ", " + (d + 1.0) + ") = "
+          + String.format(Locale.US, "%.12f", Math.atan2(d, d + 1.0)));
+    }
+
+    for (int j = -3; j <= 3; j++) {
+      double e = (double) j;
+      System.out.println("Math.cbrt(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.cbrt(e)));
+      System.out.println("Math.log(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.log(e)));
+      System.out.println("Math.log10(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.log10(e)));
+      System.out.println("Math.log1p(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.log1p(e)));
+      System.out.println("Math.exp(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.exp(e)));
+      System.out.println("Math.expm1(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.expm1(e)));
+      System.out.println("Math.pow(" + e + ", " + (e + 1.0) + ") = "
+          + String.format(Locale.US, "%.12f", Math.pow(e, e + 1.0)));
+      System.out.println("Math.hypot(" + e + ", " + (e + 1.0) + ") = "
+          + String.format(Locale.US, "%.12f", Math.hypot(e, e + 1.0)));
+    }
+
+    System.out.println("Math.ceil(0.0001) = "
+        + String.format(Locale.US, "%.12f", Math.ceil(0.0001)));
+    System.out.println("Math.floor(0.0001) = "
+        + String.format(Locale.US, "%.12f", Math.floor(0.0001)));
+    System.out.println("Math.nextAfter(1.0, 2.0) = "
+        + String.format(Locale.US, "%.12f", Math.nextAfter(1.0, 2.0)));
+    System.out.println("Math.nextAfter(2.0, 1.0) = "
+        + String.format(Locale.US, "%.12f", Math.nextAfter(2.0, 1.0)));
+    System.out.println("Math.rint(0.5000001) = "
+        + String.format(Locale.US, "%.12f", Math.rint(0.5000001)));
+
+    for (int i = 0; i <= 360; i += 45) {
+      double d = i * (StrictMath.PI / 180.0);
+      System.out.println("StrictMath.sin(" + d + ") = " + StrictMath.sin(d));
+      System.out.println("StrictMath.sinh(" + d + ") = " + StrictMath.sinh(d));
+      System.out.println("StrictMath.asin(" + d + ") = " + StrictMath.asin(d));
+      System.out.println("StrictMath.cos(" + d + ") = " + StrictMath.cos(d));
+      System.out.println("StrictMath.cosh(" + d + ") = " + StrictMath.cosh(d));
+      System.out.println("StrictMath.acos(" + d + ") = " + StrictMath.acos(d));
+      System.out.println("StrictMath.tan(" + d + ") = " + StrictMath.tan(d));
+      System.out.println("StrictMath.tanh(" + d + ") = " + StrictMath.tanh(d));
+      System.out.println("StrictMath.atan(" + d + ") = " + StrictMath.atan(d));
+      System.out.println("StrictMath.atan2(" + d + ", " + (d + 1.0) + ") = "
+          + StrictMath.atan2(d, d + 1.0));
+    }
+
+    for (int j = -3; j <= 3; j++) {
+      double e = (double) j;
+      System.out.println("StrictMath.cbrt(" + e + ") = " + StrictMath.cbrt(e));
+      System.out.println("StrictMath.log(" + e + ") = " + StrictMath.log(e));
+      System.out.println("StrictMath.log10(" + e + ") = " + StrictMath.log10(e));
+      System.out.println("StrictMath.log1p(" + e + ") = " + StrictMath.log1p(e));
+      System.out.println("StrictMath.exp(" + e + ") = " + StrictMath.exp(e));
+      System.out.println("StrictMath.expm1(" + e + ") = " + StrictMath.expm1(e));
+      System.out.println("StrictMath.pow(" + e + ", " + (e + 1.0) + ") = "
+          + StrictMath.pow(e, e + 1.0));
+      System.out.println("StrictMath.hypot(" + e + ", " + (e + 1.0) + ") = "
+          + StrictMath.hypot(e, e + 1.0));
+    }
+
+    System.out.println("StrictMath.ceil(0.0001) = " + StrictMath.ceil(0.0001));
+    System.out.println("StrictMath.floor(0.0001) = " + StrictMath.floor(0.0001));
+    System.out.println("StrictMath.nextAfter(1.0, 2.0) = " + StrictMath.nextAfter(1.0, 2.0));
+    System.out.println("StrictMath.rint(0.5000001) = " + StrictMath.rint(0.5000001));
+  }
+
+}
diff --git a/test/124-missing-classes/build b/test/124-missing-classes/build
index b92ecf9..0a340a2 100644
--- a/test/124-missing-classes/build
+++ b/test/124-missing-classes/build
@@ -27,8 +27,8 @@
 rm 'classes/Main$MissingInnerClass.class'
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   ${DX} -JXmx256m --debug --dex --output=classes.dex classes
 fi
diff --git a/test/125-gc-and-classloading/src/Main.java b/test/125-gc-and-classloading/src/Main.java
index 61e123d..e81ef7b 100644
--- a/test/125-gc-and-classloading/src/Main.java
+++ b/test/125-gc-and-classloading/src/Main.java
@@ -57,7 +57,7 @@
         public void run() {
             try {
                 cdl.await();
-                Class c0 = Class.forName("Main$BigClass");
+                Class<?> c0 = Class.forName("Main$BigClass");
             } catch (Exception e) {
                 throw new RuntimeException(e);
             }
diff --git a/test/126-miranda-multidex/build b/test/126-miranda-multidex/build
index b7f2118..00b9ba0 100644
--- a/test/126-miranda-multidex/build
+++ b/test/126-miranda-multidex/build
@@ -28,14 +28,12 @@
 rm classes2/Main.class classes2/MirandaAbstract.class classes2/MirandaClass*.class classes2/MirandaInterface2*.class
 
 if [ ${USE_JACK} = "true" ]; then
-  # Create .jack files from classes generated with javac.
-  ${JILL} classes --output classes.jack
-  ${JILL} classes2 --output classes2.jack
+  jar cf classes.jill.jar -C classes .
+  jar cf classes2.jill.jar -C classes2 .
 
-  # Create DEX files from .jack files.
-  ${JACK} --import classes.jack --output-dex .
+  ${JACK} --import classes.jill.jar --output-dex .
   mv classes.dex classes-1.dex
-  ${JACK} --import classes2.jack --output-dex .
+  ${JACK} --import classes2.jill.jar --output-dex .
   mv classes.dex classes2.dex
   mv classes-1.dex classes.dex
 else
diff --git a/test/127-checker-secondarydex/build b/test/127-checker-secondarydex/build
new file mode 100755
index 0000000..7ce46ac
--- /dev/null
+++ b/test/127-checker-secondarydex/build
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stop if something fails.
+set -e
+
+mkdir classes
+${JAVAC} -d classes `find src -name '*.java'`
+
+mkdir classes-ex
+mv classes/Super.class classes-ex
+
+if [ ${USE_JACK} = "true" ]; then
+  jar cf classes.jill.jar -C classes .
+  jar cf classes-ex.jill.jar -C classes-ex .
+
+  ${JACK} --import classes.jill.jar --output-dex .
+  zip $TEST_NAME.jar classes.dex
+  ${JACK} --import classes-ex.jill.jar --output-dex .
+  zip ${TEST_NAME}-ex.jar classes.dex
+else
+  if [ ${NEED_DEX} = "true" ]; then
+    ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
+    zip $TEST_NAME.jar classes.dex
+    ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
+    zip ${TEST_NAME}-ex.jar classes.dex
+  fi
+fi
diff --git a/test/127-secondarydex/expected.txt b/test/127-checker-secondarydex/expected.txt
similarity index 100%
rename from test/127-secondarydex/expected.txt
rename to test/127-checker-secondarydex/expected.txt
diff --git a/test/127-secondarydex/info.txt b/test/127-checker-secondarydex/info.txt
similarity index 100%
rename from test/127-secondarydex/info.txt
rename to test/127-checker-secondarydex/info.txt
diff --git a/test/127-secondarydex/run b/test/127-checker-secondarydex/run
similarity index 100%
rename from test/127-secondarydex/run
rename to test/127-checker-secondarydex/run
diff --git a/test/127-secondarydex/src/Main.java b/test/127-checker-secondarydex/src/Main.java
similarity index 100%
rename from test/127-secondarydex/src/Main.java
rename to test/127-checker-secondarydex/src/Main.java
diff --git a/test/127-secondarydex/src/Super.java b/test/127-checker-secondarydex/src/Super.java
similarity index 100%
rename from test/127-secondarydex/src/Super.java
rename to test/127-checker-secondarydex/src/Super.java
diff --git a/test/127-checker-secondarydex/src/Test.java b/test/127-checker-secondarydex/src/Test.java
new file mode 100644
index 0000000..438e854
--- /dev/null
+++ b/test/127-checker-secondarydex/src/Test.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Test extends Super {
+    public void test(Test t) {
+        t.print();
+    }
+
+    private void print() {
+        System.out.println("Test");
+    }
+
+    /// CHECK-START: java.lang.Integer Test.toInteger() builder (after)
+    /// CHECK:         LoadClass needs_access_check:false klass:java.lang.Integer
+
+    public Integer toInteger() {
+        return new Integer(42);
+    }
+
+    public String toString() {
+        return new String("Test");
+    }
+}
diff --git a/test/127-secondarydex/build b/test/127-secondarydex/build
deleted file mode 100755
index 0d9f4d6..0000000
--- a/test/127-secondarydex/build
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2014 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Stop if something fails.
-set -e
-
-mkdir classes
-${JAVAC} -d classes `find src -name '*.java'`
-
-mkdir classes-ex
-mv classes/Super.class classes-ex
-
-if [ ${USE_JACK} = "true" ]; then
-  # Create .jack files from classes generated with javac.
-  ${JILL} classes --output classes.jack
-  ${JILL} classes-ex --output classes-ex.jack
-
-  # Create DEX files from .jack files.
-  ${JACK} --import classes.jack --output-dex .
-  zip $TEST_NAME.jar classes.dex
-  ${JACK} --import classes-ex.jack --output-dex .
-  zip ${TEST_NAME}-ex.jar classes.dex
-else
-  if [ ${NEED_DEX} = "true" ]; then
-    ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
-    zip $TEST_NAME.jar classes.dex
-    ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
-    zip ${TEST_NAME}-ex.jar classes.dex
-  fi
-fi
diff --git a/test/127-secondarydex/src/Test.java b/test/127-secondarydex/src/Test.java
deleted file mode 100644
index 8547e79..0000000
--- a/test/127-secondarydex/src/Test.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-public class Test extends Super {
-    public void test(Test t) {
-        t.print();
-    }
-
-    private void print() {
-        System.out.println("Test");
-    }
-
-    public String toString() {
-        return new String("Test");
-    }
-}
diff --git a/test/130-hprof/src-ex/Allocator.java b/test/130-hprof/src-ex/Allocator.java
new file mode 100644
index 0000000..ee75a14
--- /dev/null
+++ b/test/130-hprof/src-ex/Allocator.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Simple allocator that returns a boot class path object.
+public class Allocator {
+    public static Object allocObject() {
+        return new Object();
+    }
+}
diff --git a/test/130-hprof/src/Main.java b/test/130-hprof/src/Main.java
index 67e5232..c145f27 100644
--- a/test/130-hprof/src/Main.java
+++ b/test/130-hprof/src/Main.java
@@ -16,6 +16,7 @@
 
 import java.io.File;
 import java.lang.ref.WeakReference;
+import java.lang.reflect.Constructor;
 import java.lang.reflect.Method;
 import java.lang.reflect.InvocationTargetException;
 
@@ -34,24 +35,21 @@
         }
     }
 
-    public static void main(String[] args) {
-        // Create some data.
-        Object data[] = new Object[TEST_LENGTH];
-        for (int i = 0; i < data.length; i++) {
-            if (makeArray(i)) {
-                data[i] = new Object[TEST_LENGTH];
-            } else {
-                data[i] = String.valueOf(i);
-            }
+    private static Object allocInDifferentLoader() throws Exception {
+        final String DEX_FILE = System.getenv("DEX_LOCATION") + "/130-hprof-ex.jar";
+        Class<?> pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
+        if (pathClassLoader == null) {
+            throw new AssertionError("Couldn't find path class loader class");
         }
-        for (int i = 0; i < data.length; i++) {
-            if (makeArray(i)) {
-                Object data2[] = (Object[]) data[i];
-                fillArray(data, data2, i);
-            }
-        }
-        System.out.println("Generated data.");
+        Constructor<?> constructor =
+            pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class);
+        ClassLoader loader = (ClassLoader)constructor.newInstance(
+                DEX_FILE, ClassLoader.getSystemClassLoader());
+        Class<?> allocator = loader.loadClass("Allocator");
+        return allocator.getDeclaredMethod("allocObject", null).invoke(null);
+    }
 
+    private static void createDumpAndConv() throws RuntimeException {
         File dumpFile = null;
         File convFile = null;
 
@@ -88,6 +86,43 @@
         }
     }
 
+    public static void main(String[] args) throws Exception {
+        // Create some data.
+        Object data[] = new Object[TEST_LENGTH];
+        for (int i = 0; i < data.length; i++) {
+            if (makeArray(i)) {
+                data[i] = new Object[TEST_LENGTH];
+            } else {
+                data[i] = String.valueOf(i);
+            }
+        }
+        for (int i = 0; i < data.length; i++) {
+            if (makeArray(i)) {
+                Object data2[] = (Object[]) data[i];
+                fillArray(data, data2, i);
+            }
+        }
+        System.out.println("Generated data.");
+
+        createDumpAndConv();
+        Class<?> klass = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
+        if (klass == null) {
+            throw new AssertionError("Couldn't find path class loader class");
+        }
+        Method enableMethod = klass.getDeclaredMethod("enableRecentAllocations",
+                Boolean.TYPE);
+        if (enableMethod == null) {
+            throw new AssertionError("Couldn't find path class loader class");
+        }
+        enableMethod.invoke(null, true);
+        Object o = allocInDifferentLoader();
+        // Run GC to cause class unloading.
+        Runtime.getRuntime().gc();
+        createDumpAndConv();
+        // TODO: Somehow check contents of hprof file.
+        enableMethod.invoke(null, false);
+    }
+
     private static File getHprofConf() {
         // Use the java.library.path. It points to the lib directory.
         File libDir = new File(System.getProperty("java.library.path"));
@@ -118,7 +153,7 @@
      */
     private static Method getDumpHprofDataMethod() {
         ClassLoader myLoader = Main.class.getClassLoader();
-        Class vmdClass;
+        Class<?> vmdClass;
         try {
             vmdClass = myLoader.loadClass("dalvik.system.VMDebug");
         } catch (ClassNotFoundException cnfe) {
@@ -127,8 +162,7 @@
 
         Method meth;
         try {
-            meth = vmdClass.getMethod("dumpHprofData",
-                    new Class[] { String.class });
+            meth = vmdClass.getMethod("dumpHprofData", String.class);
         } catch (NoSuchMethodException nsme) {
             System.err.println("Found VMDebug but not dumpHprofData method");
             return null;
diff --git a/test/134-reg-promotion/src/Main.java b/test/134-reg-promotion/src/Main.java
index 008ac58..f633524 100644
--- a/test/134-reg-promotion/src/Main.java
+++ b/test/134-reg-promotion/src/Main.java
@@ -32,13 +32,13 @@
 
     public static void main(String args[]) throws Exception {
         Class<?> c = Class.forName("Test");
-        Method m = c.getMethod("run", (Class[]) null);
+        Method m = c.getMethod("run");
         for (int i = 0; i < 10; i++) {
             holder = new char[128 * 1024][];
             m.invoke(null, (Object[]) null);
             holder = null;
         }
-        m = c.getMethod("run2", (Class[]) null);
+        m = c.getMethod("run2");
         for (int i = 0; i < 10; i++) {
             holder = new char[128 * 1024][];
             m.invoke(null, (Object[]) null);
diff --git a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
new file mode 100644
index 0000000..b729301
--- /dev/null
+++ b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <dlfcn.h>
+#include <iostream>
+
+#include "base/casts.h"
+#include "base/macros.h"
+#include "java_vm_ext.h"
+#include "jni_env_ext.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace {
+
+static volatile std::atomic<bool> vm_was_shutdown(false);
+static const int kThreadCount = 4;
+
+static std::atomic<int> barrier_count(kThreadCount + 1);
+
+static void JniThreadBarrierWait() {
+  barrier_count--;
+  while (barrier_count.load() != 0) {
+    usleep(1000);
+  }
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_waitAndCallIntoJniEnv(JNIEnv* env, jclass) {
+  // Wait for all threads to enter JNI together.
+  JniThreadBarrierWait();
+  // Wait until the runtime is shutdown.
+  while (!vm_was_shutdown.load()) {
+    usleep(1000);
+  }
+  std::cout << "About to call exception check\n";
+  env->ExceptionCheck();
+  LOG(ERROR) << "Should not be reached!";
+}
+
+// NO_RETURN does not work with extern "C" for target builds.
+extern "C" JNIEXPORT void JNICALL Java_Main_destroyJavaVMAndExit(JNIEnv* env, jclass) {
+  // Wait for all threads to enter JNI together.
+  JniThreadBarrierWait();
+  // Fake up the managed stack so we can detach.
+  Thread* const self = Thread::Current();
+  self->SetTopOfStack(nullptr);
+  self->SetTopOfShadowStack(nullptr);
+  JavaVM* vm = down_cast<JNIEnvExt*>(env)->vm;
+  vm->DetachCurrentThread();
+  // Open ourself again to make sure the native library does not get unloaded from
+  // underneath us due to DestroyJavaVM. b/28406866
+  void* handle = dlopen(kIsDebugBuild ? "libarttestd.so" : "libarttest.so", RTLD_NOW);
+  CHECK(handle != nullptr);
+  vm->DestroyJavaVM();
+  vm_was_shutdown.store(true);
+  // Give threads some time to get stuck in ExceptionCheck.
+  usleep(1000000);
+  if (env != nullptr) {
+    // Use env != nullptr to trick noreturn.
+    exit(0);
+  }
+}
+
+}  // namespace
+}  // namespace art
diff --git a/test/136-daemon-jni-shutdown/expected.txt b/test/136-daemon-jni-shutdown/expected.txt
new file mode 100644
index 0000000..f0b6353
--- /dev/null
+++ b/test/136-daemon-jni-shutdown/expected.txt
@@ -0,0 +1,5 @@
+JNI_OnLoad called
+About to call exception check
+About to call exception check
+About to call exception check
+About to call exception check
diff --git a/test/136-daemon-jni-shutdown/info.txt b/test/136-daemon-jni-shutdown/info.txt
new file mode 100644
index 0000000..06a12df
--- /dev/null
+++ b/test/136-daemon-jni-shutdown/info.txt
@@ -0,0 +1 @@
+Test that daemon threads that call into a JNI env after the runtime is shutdown do not crash.
\ No newline at end of file
diff --git a/test/136-daemon-jni-shutdown/src/Main.java b/test/136-daemon-jni-shutdown/src/Main.java
new file mode 100644
index 0000000..6eceb75
--- /dev/null
+++ b/test/136-daemon-jni-shutdown/src/Main.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Test that daemon threads that call into a JNI env after the runtime is shutdown do not crash.
+ */
+public class Main {
+
+    public final static int THREAD_COUNT = 4;
+
+    public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
+
+        for (int i = 0; i < THREAD_COUNT; i++) {
+            Thread t = new Thread(new DaemonRunnable());
+            t.setDaemon(true);
+            t.start();
+        }
+        // Give threads time to start and become stuck in waitAndCallIntoJniEnv.
+        Thread.sleep(1000);
+        destroyJavaVMAndExit();
+    }
+
+    static native void waitAndCallIntoJniEnv();
+    static native void destroyJavaVMAndExit();
+
+    private static class DaemonRunnable implements Runnable {
+        public void run() {
+            for (;;) {
+                waitAndCallIntoJniEnv();
+            }
+        }
+    }
+}
diff --git a/test/137-cfi/cfi.cc b/test/137-cfi/cfi.cc
index 78f8842..45251b8 100644
--- a/test/137-cfi/cfi.cc
+++ b/test/137-cfi/cfi.cc
@@ -53,6 +53,7 @@
 
 extern "C" JNIEXPORT jboolean JNICALL Java_Main_sleep(JNIEnv*, jobject, jint, jboolean, jdouble) {
   // Keep pausing.
+  printf("Going to sleep\n");
   for (;;) {
     pause();
   }
@@ -76,7 +77,7 @@
     }
   }
 
-  printf("Can not find %s in backtrace:\n", seq[cur_search_index].c_str());
+  printf("Cannot find %s in backtrace:\n", seq[cur_search_index].c_str());
   for (Backtrace::const_iterator it = bt->begin(); it != bt->end(); ++it) {
     if (BacktraceMap::IsValid(it->map)) {
       printf("  %s\n", it->func_name.c_str());
@@ -92,15 +93,21 @@
 // detecting this.
 #if __linux__
 static bool IsPicImage() {
-  gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
-  CHECK(image_space != nullptr);  // We should be running with an image.
-  const OatFile* oat_file = image_space->GetOatFile();
+  std::vector<gc::space::ImageSpace*> image_spaces =
+      Runtime::Current()->GetHeap()->GetBootImageSpaces();
+  CHECK(!image_spaces.empty());  // We should be running with an image.
+  const OatFile* oat_file = image_spaces[0]->GetOatFile();
   CHECK(oat_file != nullptr);     // We should have an oat file to go with the image.
   return oat_file->IsPic();
 }
 #endif
 
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindInProcess(JNIEnv*, jobject, jint, jboolean) {
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindInProcess(
+    JNIEnv*,
+    jobject,
+    jboolean full_signatrues,
+    jint,
+    jboolean) {
 #if __linux__
   if (IsPicImage()) {
     LOG(INFO) << "Image is pic, in-process unwinding check bypassed.";
@@ -111,7 +118,7 @@
 
   std::unique_ptr<Backtrace> bt(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, GetTid()));
   if (!bt->Unwind(0, nullptr)) {
-    printf("Can not unwind in process.\n");
+    printf("Cannot unwind in process.\n");
     return JNI_FALSE;
   } else if (bt->NumFrames() == 0) {
     printf("No frames for unwind in process.\n");
@@ -121,14 +128,21 @@
   // We cannot really parse an exact stack, as the optimizing compiler may inline some functions.
   // This is also risky, as deduping might play a trick on us, so the test needs to make sure that
   // only unique functions are being expected.
+  // "mini-debug-info" does not include parameters to save space.
   std::vector<std::string> seq = {
       "Java_Main_unwindInProcess",                   // This function.
-      "boolean Main.unwindInProcess(int, boolean)",  // The corresponding Java native method frame.
+      "Main.unwindInProcess",                        // The corresponding Java native method frame.
+      "int java.util.Arrays.binarySearch(java.lang.Object[], int, int, java.lang.Object, java.util.Comparator)",  // Framework method.
+      "Main.main"                                    // The Java entry method.
+  };
+  std::vector<std::string> full_seq = {
+      "Java_Main_unwindInProcess",                   // This function.
+      "boolean Main.unwindInProcess(boolean, int, boolean)",  // The corresponding Java native method frame.
       "int java.util.Arrays.binarySearch(java.lang.Object[], int, int, java.lang.Object, java.util.Comparator)",  // Framework method.
       "void Main.main(java.lang.String[])"           // The Java entry method.
   };
 
-  bool result = CheckStack(bt.get(), seq);
+  bool result = CheckStack(bt.get(), full_signatrues ? full_seq : seq);
   if (!kCauseSegfault) {
     return result ? JNI_TRUE : JNI_FALSE;
   } else {
@@ -177,7 +191,11 @@
 }
 #endif
 
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindOtherProcess(JNIEnv*, jobject, jint pid_int) {
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindOtherProcess(
+    JNIEnv*,
+    jobject,
+    jboolean full_signatrues,
+    jint pid_int) {
 #if __linux__
   // TODO: What to do on Valgrind?
   pid_t pid = static_cast<pid_t>(pid_int);
@@ -204,7 +222,7 @@
   std::unique_ptr<Backtrace> bt(Backtrace::Create(pid, BACKTRACE_CURRENT_THREAD));
   bool result = true;
   if (!bt->Unwind(0, nullptr)) {
-    printf("Can not unwind other process.\n");
+    printf("Cannot unwind other process.\n");
     result = false;
   } else if (bt->NumFrames() == 0) {
     printf("No frames for unwind of other process.\n");
@@ -213,17 +231,27 @@
 
   if (result) {
     // See comment in unwindInProcess for non-exact stack matching.
+    // "mini-debug-info" does not include parameters to save space.
     std::vector<std::string> seq = {
         // "Java_Main_sleep",                        // The sleep function being executed in the
                                                      // other runtime.
                                                      // Note: For some reason, the name isn't
                                                      // resolved, so don't look for it right now.
+        "Main.sleep",                                // The corresponding Java native method frame.
+        "int java.util.Arrays.binarySearch(java.lang.Object[], int, int, java.lang.Object, java.util.Comparator)",  // Framework method.
+        "Main.main"                                  // The Java entry method.
+    };
+    std::vector<std::string> full_seq = {
+        // "Java_Main_sleep",                        // The sleep function being executed in the
+                                                     // other runtime.
+                                                     // Note: For some reason, the name isn't
+                                                     // resolved, so don't look for it right now.
         "boolean Main.sleep(int, boolean, double)",  // The corresponding Java native method frame.
         "int java.util.Arrays.binarySearch(java.lang.Object[], int, int, java.lang.Object, java.util.Comparator)",  // Framework method.
         "void Main.main(java.lang.String[])"         // The Java entry method.
     };
 
-    result = CheckStack(bt.get(), seq);
+    result = CheckStack(bt.get(), full_signatrues ? full_seq : seq);
   }
 
   if (ptrace(PTRACE_DETACH, pid, 0, 0) != 0) {
diff --git a/test/137-cfi/expected.txt b/test/137-cfi/expected.txt
index 6a5618e..8db7853 100644
--- a/test/137-cfi/expected.txt
+++ b/test/137-cfi/expected.txt
@@ -1 +1,2 @@
 JNI_OnLoad called
+JNI_OnLoad called
diff --git a/test/137-cfi/run b/test/137-cfi/run
index ecbbbc7..ebc729b 100755
--- a/test/137-cfi/run
+++ b/test/137-cfi/run
@@ -14,4 +14,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} "$@"
+# Test with full DWARF debugging information.
+# Check full signatures of methods.
+${RUN} "$@" -Xcompiler-option --generate-debug-info \
+  --args --full-signatures --args --test-local --args --test-remote
+
+# Test with minimal compressed debugging information.
+# Check only method names (parameters are omitted to save space).
+# Check only remote unwinding since decompression is disabled in local unwinds (b/27391690).
+${RUN} "$@" -Xcompiler-option --generate-mini-debug-info --args --test-remote
diff --git a/test/137-cfi/src/Main.java b/test/137-cfi/src/Main.java
index dc3ef7e..5cfe33d 100644
--- a/test/137-cfi/src/Main.java
+++ b/test/137-cfi/src/Main.java
@@ -16,42 +16,53 @@
 
 import java.io.BufferedReader;
 import java.io.FileReader;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
+import java.io.InputStreamReader;
 import java.util.Arrays;
 import java.util.Comparator;
 
 public class Main implements Comparator<Main> {
-  // Whether to test local unwinding. Libunwind uses linker info to find executables. As we do
-  // not dlopen at the moment, this doesn't work, so keep it off for now.
-  public final static boolean TEST_LOCAL_UNWINDING = true;
+  // Whether to test local unwinding.
+  private boolean testLocal;
 
-  // Unwinding another process, modelling debuggerd. This doesn't use the linker, so should work
-  // no matter whether we're using dlopen or not.
-  public final static boolean TEST_REMOTE_UNWINDING = true;
+  // Unwinding another process, modelling debuggerd.
+  private boolean testRemote;
 
+  // We fork ourself to create the secondary process for remote unwinding.
   private boolean secondary;
 
+  // Expect the symbols to contain full method signatures including parameters.
+  private boolean fullSignatures;
+
   private boolean passed;
 
-  public Main(boolean secondary) {
-      this.secondary = secondary;
+  public Main(String[] args) throws Exception {
+      System.loadLibrary(args[0]);
+      for (String arg : args) {
+          if (arg.equals("--test-local")) {
+              testLocal = true;
+          }
+          if (arg.equals("--test-remote")) {
+              testRemote = true;
+          }
+          if (arg.equals("--secondary")) {
+              secondary = true;
+          }
+          if (arg.equals("--full-signatures")) {
+              fullSignatures = true;
+          }
+      }
+      if (!testLocal && !testRemote) {
+          System.out.println("No test selected.");
+      }
   }
 
   public static void main(String[] args) throws Exception {
-    System.loadLibrary(args[0]);
-      boolean secondary = false;
-      if (args.length > 0 && args[args.length - 1].equals("--secondary")) {
-          secondary = true;
-      }
-      new Main(secondary).run();
+      new Main(args).run();
   }
 
   private void run() {
       if (secondary) {
-          if (!TEST_REMOTE_UNWINDING) {
+          if (!testRemote) {
               throw new RuntimeException("Should not be running secondary!");
           }
           runSecondary();
@@ -67,11 +78,11 @@
 
   private void runPrimary() {
       // First do the in-process unwinding.
-      if (TEST_LOCAL_UNWINDING && !foo()) {
+      if (testLocal && !foo()) {
           System.out.println("Unwinding self failed.");
       }
 
-      if (!TEST_REMOTE_UNWINDING) {
+      if (!testRemote) {
           // Skip the remote step.
           return;
       }
@@ -89,14 +100,17 @@
               throw new RuntimeException("Couldn't parse process");
           }
 
-          // Wait a bit, so the forked process has time to run until its sleep phase.
+          // Wait until the forked process had time to run until its sleep phase.
           try {
-              Thread.sleep(5000);
+              InputStreamReader stdout = new InputStreamReader(p.getInputStream(), "UTF-8");
+              BufferedReader lineReader = new BufferedReader(stdout);
+              while (!lineReader.readLine().contains("Going to sleep")) {
+              }
           } catch (Exception e) {
               throw new RuntimeException(e);
           }
 
-          if (!unwindOtherProcess(pid)) {
+          if (!unwindOtherProcess(fullSignatures, pid)) {
               System.out.println("Unwinding other process failed.");
           }
       } finally {
@@ -117,7 +131,7 @@
       // Could do reflection for the private pid field, but String parsing is easier.
       String s = p.toString();
       if (s.startsWith("Process[pid=")) {
-          return Integer.parseInt(s.substring("Process[pid=".length(), s.length() - 1));
+          return Integer.parseInt(s.substring("Process[pid=".length(), s.indexOf(",")));
       } else {
           return -1;
       }
@@ -154,7 +168,7 @@
       if (b) {
           return sleep(2, b, 1.0);
       } else {
-          return unwindInProcess(1, b);
+          return unwindInProcess(fullSignatures, 1, b);
       }
   }
 
@@ -162,6 +176,6 @@
 
   public native boolean sleep(int i, boolean b, double dummy);
 
-  public native boolean unwindInProcess(int i, boolean b);
-  public native boolean unwindOtherProcess(int pid);
+  public native boolean unwindInProcess(boolean fullSignatures, int i, boolean b);
+  public native boolean unwindOtherProcess(boolean fullSignatures, int pid);
 }
diff --git a/test/138-duplicate-classes-check/src/FancyLoader.java b/test/138-duplicate-classes-check/src/FancyLoader.java
deleted file mode 100644
index 03ec948..0000000
--- a/test/138-duplicate-classes-check/src/FancyLoader.java
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.RandomAccessFile;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.Method;
-import java.lang.reflect.InvocationTargetException;
-
-/**
- * A class loader with atypical behavior: we try to load a private
- * class implementation before asking the system or boot loader.  This
- * is used to create multiple classes with identical names in a single VM.
- *
- * If DexFile is available, we use that; if not, we assume we're not in
- * Dalvik and instantiate the class with defineClass().
- *
- * The location of the DEX files and class data is dependent upon the
- * test framework.
- */
-public class FancyLoader extends ClassLoader {
-    /* this is where the "alternate" .class files live */
-    static final String CLASS_PATH = "classes-ex/";
-
-    /* this is the "alternate" DEX/Jar file */
-    static final String DEX_FILE = System.getenv("DEX_LOCATION") +
-            "/138-duplicate-classes-check-ex.jar";
-
-    /* on Dalvik, this is a DexFile; otherwise, it's null */
-    private Class mDexClass;
-
-    private Object mDexFile;
-
-    /**
-     * Construct FancyLoader, grabbing a reference to the DexFile class
-     * if we're running under Dalvik.
-     */
-    public FancyLoader(ClassLoader parent) {
-        super(parent);
-
-        try {
-            mDexClass = parent.loadClass("dalvik.system.DexFile");
-        } catch (ClassNotFoundException cnfe) {
-            // ignore -- not running Dalvik
-        }
-    }
-
-    /**
-     * Finds the class with the specified binary name.
-     *
-     * We search for a file in CLASS_PATH or pull an entry from DEX_FILE.
-     * If we don't find a match, we throw an exception.
-     */
-    protected Class<?> findClass(String name) throws ClassNotFoundException
-    {
-        if (mDexClass != null) {
-            return findClassDalvik(name);
-        } else {
-            return findClassNonDalvik(name);
-        }
-    }
-
-    /**
-     * Finds the class with the specified binary name, from a DEX file.
-     */
-    private Class<?> findClassDalvik(String name)
-        throws ClassNotFoundException {
-
-        if (mDexFile == null) {
-            synchronized (FancyLoader.class) {
-                Constructor ctor;
-                /*
-                 * Construct a DexFile object through reflection.
-                 */
-                try {
-                    ctor = mDexClass.getConstructor(new Class[] {String.class});
-                } catch (NoSuchMethodException nsme) {
-                    throw new ClassNotFoundException("getConstructor failed",
-                        nsme);
-                }
-
-                try {
-                    mDexFile = ctor.newInstance(DEX_FILE);
-                } catch (InstantiationException ie) {
-                    throw new ClassNotFoundException("newInstance failed", ie);
-                } catch (IllegalAccessException iae) {
-                    throw new ClassNotFoundException("newInstance failed", iae);
-                } catch (InvocationTargetException ite) {
-                    throw new ClassNotFoundException("newInstance failed", ite);
-                }
-            }
-        }
-
-        /*
-         * Call DexFile.loadClass(String, ClassLoader).
-         */
-        Method meth;
-
-        try {
-            meth = mDexClass.getMethod("loadClass",
-                    new Class[] { String.class, ClassLoader.class });
-        } catch (NoSuchMethodException nsme) {
-            throw new ClassNotFoundException("getMethod failed", nsme);
-        }
-
-        try {
-            meth.invoke(mDexFile, name, this);
-        } catch (IllegalAccessException iae) {
-            throw new ClassNotFoundException("loadClass failed", iae);
-        } catch (InvocationTargetException ite) {
-            throw new ClassNotFoundException("loadClass failed",
-                ite.getCause());
-        }
-
-        return null;
-    }
-
-    /**
-     * Finds the class with the specified binary name, from .class files.
-     */
-    private Class<?> findClassNonDalvik(String name)
-        throws ClassNotFoundException {
-
-        String pathName = CLASS_PATH + name + ".class";
-        //System.out.println("--- Fancy: looking for " + pathName);
-
-        File path = new File(pathName);
-        RandomAccessFile raf;
-
-        try {
-            raf = new RandomAccessFile(path, "r");
-        } catch (FileNotFoundException fnfe) {
-            throw new ClassNotFoundException("Not found: " + pathName);
-        }
-
-        /* read the entire file in */
-        byte[] fileData;
-        try {
-            fileData = new byte[(int) raf.length()];
-            raf.readFully(fileData);
-        } catch (IOException ioe) {
-            throw new ClassNotFoundException("Read error: " + pathName);
-        } finally {
-            try {
-                raf.close();
-            } catch (IOException ioe) {
-                // drop
-            }
-        }
-
-        /* create the class */
-        //System.out.println("--- Fancy: defining " + name);
-        try {
-            return defineClass(name, fileData, 0, fileData.length);
-        } catch (Throwable th) {
-            throw new ClassNotFoundException("defineClass failed", th);
-        }
-    }
-
-    /**
-     * Load a class.
-     *
-     * Normally a class loader wouldn't override this, but we want our
-     * version of the class to take precedence over an already-loaded
-     * version.
-     *
-     * We still want the system classes (e.g. java.lang.Object) from the
-     * bootstrap class loader.
-     */
-    protected Class<?> loadClass(String name, boolean resolve)
-        throws ClassNotFoundException
-    {
-        Class res;
-
-        /*
-         * 1. Invoke findLoadedClass(String) to check if the class has
-         * already been loaded.
-         *
-         * This doesn't change.
-         */
-        res = findLoadedClass(name);
-        if (res != null) {
-            System.out.println("FancyLoader.loadClass: "
-                + name + " already loaded");
-            if (resolve)
-                resolveClass(res);
-            return res;
-        }
-
-        /*
-         * 3. Invoke the findClass(String) method to find the class.
-         */
-        try {
-            res = findClass(name);
-            if (resolve)
-                resolveClass(res);
-        }
-        catch (ClassNotFoundException e) {
-            // we couldn't find it, so eat the exception and keep going
-        }
-
-        /*
-         * 2. Invoke the loadClass method on the parent class loader.  If
-         * the parent loader is null the class loader built-in to the
-         * virtual machine is used, instead.
-         *
-         * (Since we're not in java.lang, we can't actually invoke the
-         * parent's loadClass() method, but we passed our parent to the
-         * super-class which can take care of it for us.)
-         */
-        res = super.loadClass(name, resolve);   // returns class or throws
-        return res;
-    }
-}
diff --git a/test/138-duplicate-classes-check/src/Main.java b/test/138-duplicate-classes-check/src/Main.java
index a9b5bb0..5ffceb9 100644
--- a/test/138-duplicate-classes-check/src/Main.java
+++ b/test/138-duplicate-classes-check/src/Main.java
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+import dalvik.system.DexClassLoader;
 import java.io.File;
 import java.lang.reflect.Method;
 
@@ -30,10 +31,14 @@
 
         // Now run the class from the -ex file.
 
-        FancyLoader loader = new FancyLoader(getClass().getClassLoader());
+        String dexPath = System.getenv("DEX_LOCATION") + "/138-duplicate-classes-check-ex.jar";
+        String optimizedDirectory = System.getenv("DEX_LOCATION");
+        String librarySearchPath = null;
+        DexClassLoader loader = new DexClassLoader(dexPath, optimizedDirectory, librarySearchPath,
+                getClass().getClassLoader());
 
         try {
-            Class testEx = loader.loadClass("TestEx");
+            Class<?> testEx = loader.loadClass("TestEx");
             Method test = testEx.getDeclaredMethod("test");
             test.invoke(null);
         } catch (Exception exc) {
diff --git a/test/138-duplicate-classes-check2/build b/test/138-duplicate-classes-check2/build
index abcbbb8..d346251 100755
--- a/test/138-duplicate-classes-check2/build
+++ b/test/138-duplicate-classes-check2/build
@@ -24,9 +24,17 @@
 ${JAVAC} -d classes-ex `find src-ex -name '*.java'`
 rm classes-ex/A.class
 
-if [ ${NEED_DEX} = "true" ]; then
+if [ ${USE_JACK} = "true" ]; then
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
+  zip ${TEST_NAME}.jar classes.dex
+
+  jar cf classes-ex.jill.jar -C classes-ex .
+  ${JACK} --import classes-ex.jill.jar --output-dex .
+  zip ${TEST_NAME}-ex.jar classes.dex
+elif [ ${NEED_DEX} = "true" ]; then
   ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
-  zip $TEST_NAME.jar classes.dex
+  zip ${TEST_NAME}.jar classes.dex
   ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
   zip ${TEST_NAME}-ex.jar classes.dex
 fi
diff --git a/test/138-duplicate-classes-check2/src/FancyLoader.java b/test/138-duplicate-classes-check2/src/FancyLoader.java
index 7e2bb08..58b7ec4 100644
--- a/test/138-duplicate-classes-check2/src/FancyLoader.java
+++ b/test/138-duplicate-classes-check2/src/FancyLoader.java
@@ -42,7 +42,7 @@
             "/138-duplicate-classes-check2-ex.jar";
 
     /* on Dalvik, this is a DexFile; otherwise, it's null */
-    private Class mDexClass;
+    private Class<?> mDexClass;
 
     private Object mDexFile;
 
@@ -83,12 +83,12 @@
 
         if (mDexFile == null) {
             synchronized (FancyLoader.class) {
-                Constructor ctor;
+                Constructor<?> ctor;
                 /*
                  * Construct a DexFile object through reflection.
                  */
                 try {
-                    ctor = mDexClass.getConstructor(new Class[] {String.class});
+                    ctor = mDexClass.getConstructor(String.class);
                 } catch (NoSuchMethodException nsme) {
                     throw new ClassNotFoundException("getConstructor failed",
                         nsme);
@@ -112,8 +112,7 @@
         Method meth;
 
         try {
-            meth = mDexClass.getMethod("loadClass",
-                    new Class[] { String.class, ClassLoader.class });
+            meth = mDexClass.getMethod("loadClass", String.class, ClassLoader.class);
         } catch (NoSuchMethodException nsme) {
             throw new ClassNotFoundException("getMethod failed", nsme);
         }
@@ -185,7 +184,7 @@
     protected Class<?> loadClass(String name, boolean resolve)
         throws ClassNotFoundException
     {
-        Class res;
+        Class<?> res;
 
         /*
          * 1. Invoke findLoadedClass(String) to check if the class has
diff --git a/test/138-duplicate-classes-check2/src/Main.java b/test/138-duplicate-classes-check2/src/Main.java
index a9b5bb0..a0d6977 100644
--- a/test/138-duplicate-classes-check2/src/Main.java
+++ b/test/138-duplicate-classes-check2/src/Main.java
@@ -33,7 +33,7 @@
         FancyLoader loader = new FancyLoader(getClass().getClassLoader());
 
         try {
-            Class testEx = loader.loadClass("TestEx");
+            Class<?> testEx = loader.loadClass("TestEx");
             Method test = testEx.getDeclaredMethod("test");
             test.invoke(null);
         } catch (Exception exc) {
diff --git a/test/139-register-natives/src/Main.java b/test/139-register-natives/src/Main.java
index 8dd2131..11bd53f 100644
--- a/test/139-register-natives/src/Main.java
+++ b/test/139-register-natives/src/Main.java
@@ -47,7 +47,7 @@
     }
   }
 
-  private native static int registerNatives(Class c);
+  private native static int registerNatives(Class<?> c);
 
   private static void expectThrows(Base b) {
     try {
diff --git a/test/141-class-unload/expected.txt b/test/141-class-unload/expected.txt
index 11de660..2b77b29 100644
--- a/test/141-class-unload/expected.txt
+++ b/test/141-class-unload/expected.txt
@@ -12,7 +12,6 @@
 JNI_OnUnload called
 null
 loader null false
-loader null false
 JNI_OnLoad called
 JNI_OnUnload called
 null
diff --git a/test/141-class-unload/jni_unload.cc b/test/141-class-unload/jni_unload.cc
index d913efe..bbbb0a6 100644
--- a/test/141-class-unload/jni_unload.cc
+++ b/test/141-class-unload/jni_unload.cc
@@ -19,7 +19,6 @@
 #include <iostream>
 
 #include "jit/jit.h"
-#include "jit/jit_instrumentation.h"
 #include "runtime.h"
 #include "thread-inl.h"
 
@@ -29,7 +28,7 @@
 extern "C" JNIEXPORT void JNICALL Java_IntHolder_waitForCompilation(JNIEnv*, jclass) {
   jit::Jit* jit = Runtime::Current()->GetJit();
   if (jit != nullptr) {
-    jit->GetInstrumentationCache()->WaitForCompilationToFinish(Thread::Current());
+    jit->WaitForCompilationToFinish(Thread::Current());
   }
 }
 
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index 0640b36..f9b6180 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -23,21 +23,20 @@
 
 public class Main {
     static final String DEX_FILE = System.getenv("DEX_LOCATION") + "/141-class-unload-ex.jar";
+    static final String LIBRARY_SEARCH_PATH = System.getProperty("java.library.path");
     static String nativeLibraryName;
 
     public static void main(String[] args) throws Exception {
         nativeLibraryName = args[0];
-        Class pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
+        Class<?> pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
         if (pathClassLoader == null) {
             throw new AssertionError("Couldn't find path class loader class");
         }
-        Constructor constructor =
-            pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class);
+        Constructor<?> constructor =
+            pathClassLoader.getDeclaredConstructor(String.class, String.class, ClassLoader.class);
         try {
             testUnloadClass(constructor);
             testUnloadLoader(constructor);
-            // Test that we don't unload if we have a Method keeping the class live.
-            testNoUnloadInvoke(constructor);
             // Test that we don't unload if we have an instance.
             testNoUnloadInstance(constructor);
             // Test JNI_OnLoad and JNI_OnUnload.
@@ -49,7 +48,7 @@
             // Test that the oat files are unloaded.
             testOatFilesUnloaded(getPid());
         } catch (Exception e) {
-            System.out.println(e);
+            e.printStackTrace();
         }
     }
 
@@ -68,7 +67,7 @@
         System.out.println("Number of loaded unload-ex maps " + count);
     }
 
-    private static void stressTest(Constructor constructor) throws Exception {
+    private static void stressTest(Constructor<?> constructor) throws Exception {
         for (int i = 0; i <= 100; ++i) {
             setUpUnloadLoader(constructor, false);
             if (i % 10 == 0) {
@@ -77,69 +76,80 @@
         }
     }
 
-    private static void testUnloadClass(Constructor constructor) throws Exception {
-        WeakReference<Class> klass = setUpUnloadClass(constructor);
-        // No strong refernces to class loader, should get unloaded.
+    private static void testUnloadClass(Constructor<?> constructor) throws Exception {
+        WeakReference<Class> klass = setUpUnloadClassWeak(constructor);
+        // No strong references to class loader, should get unloaded.
         Runtime.getRuntime().gc();
-        WeakReference<Class> klass2 = setUpUnloadClass(constructor);
+        WeakReference<Class> klass2 = setUpUnloadClassWeak(constructor);
         Runtime.getRuntime().gc();
         // If the weak reference is cleared, then it was unloaded.
         System.out.println(klass.get());
         System.out.println(klass2.get());
     }
 
-    private static void testUnloadLoader(Constructor constructor)
+    private static void testUnloadLoader(Constructor<?> constructor)
         throws Exception {
       WeakReference<ClassLoader> loader = setUpUnloadLoader(constructor, true);
-      // No strong refernces to class loader, should get unloaded.
+      // No strong references to class loader, should get unloaded.
       Runtime.getRuntime().gc();
       // If the weak reference is cleared, then it was unloaded.
       System.out.println(loader.get());
     }
 
-    private static void testStackTrace(Constructor constructor) throws Exception {
-        WeakReference<Class> klass = setUpUnloadClass(constructor);
-        Method stackTraceMethod = klass.get().getDeclaredMethod("generateStackTrace");
-        Throwable throwable = (Throwable) stackTraceMethod.invoke(klass.get());
+    private static void testStackTrace(Constructor<?> constructor) throws Exception {
+        Class<?> klass = setUpUnloadClass(constructor);
+        WeakReference<Class> weak_klass = new WeakReference(klass);
+        Method stackTraceMethod = klass.getDeclaredMethod("generateStackTrace");
+        Throwable throwable = (Throwable) stackTraceMethod.invoke(klass);
         stackTraceMethod = null;
+        klass = null;
         Runtime.getRuntime().gc();
-        boolean isNull = klass.get() == null;
+        boolean isNull = weak_klass.get() == null;
         System.out.println("class null " + isNull + " " + throwable.getMessage());
     }
 
-    private static void testLoadAndUnloadLibrary(Constructor constructor) throws Exception {
+    private static void testLoadAndUnloadLibrary(Constructor<?> constructor) throws Exception {
         WeakReference<ClassLoader> loader = setUpLoadLibrary(constructor);
-        // No strong refernces to class loader, should get unloaded.
+        // No strong references to class loader, should get unloaded.
         Runtime.getRuntime().gc();
         // If the weak reference is cleared, then it was unloaded.
         System.out.println(loader.get());
     }
 
-    private static void testNoUnloadInvoke(Constructor constructor) throws Exception {
-        WeakReference<ClassLoader> loader =
-            new WeakReference((ClassLoader) constructor.newInstance(
-                DEX_FILE, ClassLoader.getSystemClassLoader()));
-        WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder"));
-        intHolder.get().getDeclaredMethod("runGC").invoke(intHolder.get());
-        boolean isNull = loader.get() == null;
-        System.out.println("loader null " + isNull);
+    private static Object testNoUnloadHelper(ClassLoader loader) throws Exception {
+        Class<?> intHolder = loader.loadClass("IntHolder");
+        return intHolder.newInstance();
     }
 
-    private static void testNoUnloadInstance(Constructor constructor) throws Exception {
-        WeakReference<ClassLoader> loader =
-            new WeakReference((ClassLoader) constructor.newInstance(
-                DEX_FILE, ClassLoader.getSystemClassLoader()));
-        WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder"));
-        Object o = intHolder.get().newInstance();
-        Runtime.getRuntime().gc();
-        boolean isNull = loader.get() == null;
-        System.out.println("loader null " + isNull);
+    static class Pair {
+      public Pair(Object o, ClassLoader l) {
+        object = o;
+        classLoader = new WeakReference<ClassLoader>(l);
+      }
+
+      public Object object;
+      public WeakReference<ClassLoader> classLoader;
     }
 
-    private static WeakReference<Class> setUpUnloadClass(Constructor constructor) throws Exception {
+    private static Pair testNoUnloadInstanceHelper(Constructor<?> constructor) throws Exception {
         ClassLoader loader = (ClassLoader) constructor.newInstance(
-            DEX_FILE, ClassLoader.getSystemClassLoader());
-        Class intHolder = loader.loadClass("IntHolder");
+            DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
+        Object o = testNoUnloadHelper(loader);
+        return new Pair(o, loader);
+    }
+
+    private static void testNoUnloadInstance(Constructor<?> constructor) throws Exception {
+        Pair p = testNoUnloadInstanceHelper(constructor);
+        Runtime.getRuntime().gc();
+        // If the class loader was unloded too early due to races, just pass the test.
+        boolean isNull = p.classLoader.get() == null;
+        System.out.println("loader null " + isNull);
+    }
+
+    private static Class<?> setUpUnloadClass(Constructor<?> constructor) throws Exception {
+        ClassLoader loader = (ClassLoader) constructor.newInstance(
+            DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
+        Class<?> intHolder = loader.loadClass("IntHolder");
         Method getValue = intHolder.getDeclaredMethod("getValue");
         Method setValue = intHolder.getDeclaredMethod("setValue", Integer.TYPE);
         // Make sure we don't accidentally preserve the value in the int holder, the class
@@ -148,15 +158,20 @@
         setValue.invoke(intHolder, 2);
         System.out.println((int) getValue.invoke(intHolder));
         waitForCompilation(intHolder);
-        return new WeakReference(intHolder);
+        return intHolder;
     }
 
-    private static WeakReference<ClassLoader> setUpUnloadLoader(Constructor constructor,
+    private static WeakReference<Class> setUpUnloadClassWeak(Constructor<?> constructor)
+            throws Exception {
+        return new WeakReference<Class>(setUpUnloadClass(constructor));
+    }
+
+    private static WeakReference<ClassLoader> setUpUnloadLoader(Constructor<?> constructor,
                                                                 boolean waitForCompilation)
         throws Exception {
         ClassLoader loader = (ClassLoader) constructor.newInstance(
-            DEX_FILE, ClassLoader.getSystemClassLoader());
-        Class intHolder = loader.loadClass("IntHolder");
+            DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
+        Class<?> intHolder = loader.loadClass("IntHolder");
         Method setValue = intHolder.getDeclaredMethod("setValue", Integer.TYPE);
         setValue.invoke(intHolder, 2);
         if (waitForCompilation) {
@@ -165,7 +180,7 @@
         return new WeakReference(loader);
     }
 
-    private static void waitForCompilation(Class intHolder) throws Exception {
+    private static void waitForCompilation(Class<?> intHolder) throws Exception {
       // Load the native library so that we can call waitForCompilation.
       Method loadLibrary = intHolder.getDeclaredMethod("loadLibrary", String.class);
       loadLibrary.invoke(intHolder, nativeLibraryName);
@@ -174,13 +189,14 @@
       waitForCompilation.invoke(intHolder);
     }
 
-    private static WeakReference<ClassLoader> setUpLoadLibrary(Constructor constructor)
+    private static WeakReference<ClassLoader> setUpLoadLibrary(Constructor<?> constructor)
         throws Exception {
         ClassLoader loader = (ClassLoader) constructor.newInstance(
-            DEX_FILE, ClassLoader.getSystemClassLoader());
-        Class intHolder = loader.loadClass("IntHolder");
+            DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
+        Class<?> intHolder = loader.loadClass("IntHolder");
         Method loadLibrary = intHolder.getDeclaredMethod("loadLibrary", String.class);
         loadLibrary.invoke(intHolder, nativeLibraryName);
+        waitForCompilation(intHolder);
         return new WeakReference(loader);
     }
 
diff --git a/test/142-classloader2/smali/B.smali b/test/142-classloader2/smali/B.smali
new file mode 100644
index 0000000..01bd593
--- /dev/null
+++ b/test/142-classloader2/smali/B.smali
@@ -0,0 +1,10 @@
+.class public LB;
+
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+  .registers 1
+  invoke-direct {p1}, Ljava/lang/Object;-><init>()V
+  return-void
+.end method
+
diff --git a/test/142-classloader2/src/Main.java b/test/142-classloader2/src/Main.java
index 86c61eb..80b00e7 100644
--- a/test/142-classloader2/src/Main.java
+++ b/test/142-classloader2/src/Main.java
@@ -25,8 +25,8 @@
     private static ClassLoader createClassLoader(String dexPath, ClassLoader parent) {
         try {
             Class<?> myClassLoaderClass = Class.forName("MyPathClassLoader");
-            Constructor constructor = myClassLoaderClass.getConstructor(String.class,
-                                                                        ClassLoader.class);
+            Constructor<?> constructor = myClassLoaderClass.getConstructor(String.class,
+                                                                           ClassLoader.class);
             return (ClassLoader)constructor.newInstance(dexPath, parent);
         } catch (Exception e) {
             // Ups, not available?!?!
@@ -71,6 +71,21 @@
             throw new IllegalStateException("Expected Ex-A, found " + exValue);
         }
 
+        // Try to load a dex file with bad dex code. Use new instance to force verification.
+        try {
+          Class<?> badClass = Main.class.getClassLoader().loadClass("B");
+          badClass.newInstance();
+          System.out.println("Should not be able to load class from bad dex file.");
+        } catch (VerifyError e) {
+        }
+
+        // Make sure the same error is rethrown when reloading the bad class.
+        try {
+          Class<?> badClass = Main.class.getClassLoader().loadClass("B");
+          System.out.println("Should not be able to load class from bad dex file.");
+        } catch (VerifyError e) {
+        }
+
         System.out.println("Everything OK.");
     }
 }
diff --git a/test/143-string-value/check b/test/143-string-value/check
new file mode 100755
index 0000000..92f6e90
--- /dev/null
+++ b/test/143-string-value/check
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Strip error log messages.
+sed -e '/^art E.*\] /d' "$2" > "$2.tmp"
+
+diff --strip-trailing-cr -q "$1" "$2.tmp" >/dev/null
diff --git a/test/143-string-value/expected.txt b/test/143-string-value/expected.txt
new file mode 100644
index 0000000..06cdb89
--- /dev/null
+++ b/test/143-string-value/expected.txt
@@ -0,0 +1 @@
+The String#value field is not present on Android versions >= 6.0
diff --git a/test/143-string-value/info.txt b/test/143-string-value/info.txt
new file mode 100644
index 0000000..61ec816
--- /dev/null
+++ b/test/143-string-value/info.txt
@@ -0,0 +1,2 @@
+Test to ensure we emit an error message when being asked
+for String#value.
diff --git a/test/143-string-value/src/Main.java b/test/143-string-value/src/Main.java
new file mode 100644
index 0000000..e970692
--- /dev/null
+++ b/test/143-string-value/src/Main.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    try {
+      String.class.getDeclaredField("value");
+      throw new Error("Expected to fail");
+    } catch (ReflectiveOperationException e) {
+      // Ignore...
+    }
+  }
+}
diff --git a/test/144-static-field-sigquit/expected.txt b/test/144-static-field-sigquit/expected.txt
new file mode 100644
index 0000000..e0c3e90
--- /dev/null
+++ b/test/144-static-field-sigquit/expected.txt
@@ -0,0 +1,4 @@
+Starting threads...
+Performing sigquits for 5 seconds
+Got date field
+Joined threads
diff --git a/test/144-static-field-sigquit/info.txt b/test/144-static-field-sigquit/info.txt
new file mode 100644
index 0000000..5dcfc76
--- /dev/null
+++ b/test/144-static-field-sigquit/info.txt
@@ -0,0 +1,8 @@
+Regression test for ag/853775
+
+Tests that unresolved classes are not put into the dex cache by the verifier.
+This was potentially happening when receiving a signal while in the static
+initilizer of a class and also within a synchronized block.
+
+This test is flaky and produces the issue rarely, but it should be good enough
+to trigger occasionally with the buildbots.
diff --git a/test/144-static-field-sigquit/src/ClassWithStaticField.java b/test/144-static-field-sigquit/src/ClassWithStaticField.java
new file mode 100644
index 0000000..0b2c855
--- /dev/null
+++ b/test/144-static-field-sigquit/src/ClassWithStaticField.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Date;
+
+public class ClassWithStaticField {
+    public static Date mDate = new Date();
+}
diff --git a/test/144-static-field-sigquit/src/Main.java b/test/144-static-field-sigquit/src/Main.java
new file mode 100644
index 0000000..ab94da3
--- /dev/null
+++ b/test/144-static-field-sigquit/src/Main.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+    public static void main(String[] args) throws Exception {
+        Thread thread1 = new Thread(new SigQuit());
+        Thread thread2 = new Thread(new SynchronizedUse());
+
+        System.out.println("Starting threads...");
+        thread1.start();
+        Thread.sleep(2000);
+        thread2.start();
+
+        thread1.join();
+        thread2.join();
+        System.out.println("Joined threads");
+    }
+}
diff --git a/test/144-static-field-sigquit/src/SigQuit.java b/test/144-static-field-sigquit/src/SigQuit.java
new file mode 100644
index 0000000..bed23e4
--- /dev/null
+++ b/test/144-static-field-sigquit/src/SigQuit.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.*;
+
+public class SigQuit implements Runnable {
+    private final static int sigquit;
+    private final static Method kill;
+    private final static int pid;
+
+    static {
+        int pidTemp = -1;
+        int sigquitTemp = -1;
+        Method killTemp = null;
+
+        try {
+            Class<?> osClass = Class.forName("android.system.Os");
+            Method getpid = osClass.getDeclaredMethod("getpid");
+            pidTemp = (Integer) getpid.invoke(null);
+
+            Class<?> osConstants = Class.forName("android.system.OsConstants");
+            Field sigquitField = osConstants.getDeclaredField("SIGQUIT");
+            sigquitTemp = (Integer) sigquitField.get(null);
+
+            killTemp = osClass.getDeclaredMethod("kill", int.class, int.class);
+        } catch (Exception e) {
+            if (!e.getClass().getName().equals("ErrnoException")) {
+                e.printStackTrace(System.out);
+            }
+        }
+
+        pid = pidTemp;
+        sigquit = sigquitTemp;
+        kill = killTemp;
+    }
+
+    public boolean perform() {
+        try {
+            kill.invoke(null, pid, sigquit);
+        } catch (Exception e) {
+            if (!e.getClass().getName().equals("ErrnoException")) {
+                e.printStackTrace(System.out);
+            }
+        }
+        return true;
+    }
+
+    public void run() {
+        long endTime = System.currentTimeMillis() + 5000;
+        System.out.println("Performing sigquits for 5 seconds");
+        while (System.currentTimeMillis() < endTime) {
+            perform();
+        }
+    }
+}
diff --git a/test/144-static-field-sigquit/src/SynchronizedUse.java b/test/144-static-field-sigquit/src/SynchronizedUse.java
new file mode 100644
index 0000000..43af1d9
--- /dev/null
+++ b/test/144-static-field-sigquit/src/SynchronizedUse.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Date;
+
+public class SynchronizedUse implements Runnable {
+    public void run() {
+        synchronized (this) {
+            Date dateField = ClassWithStaticField.mDate;
+            System.out.println("Got date field");
+        }
+    }
+}
diff --git a/test/145-alloc-tracking-stress/expected.txt b/test/145-alloc-tracking-stress/expected.txt
new file mode 100644
index 0000000..134d8d0
--- /dev/null
+++ b/test/145-alloc-tracking-stress/expected.txt
@@ -0,0 +1 @@
+Finishing
diff --git a/test/145-alloc-tracking-stress/info.txt b/test/145-alloc-tracking-stress/info.txt
new file mode 100644
index 0000000..443062d
--- /dev/null
+++ b/test/145-alloc-tracking-stress/info.txt
@@ -0,0 +1 @@
+Regression test for b/18661622
diff --git a/test/145-alloc-tracking-stress/src/Main.java b/test/145-alloc-tracking-stress/src/Main.java
new file mode 100644
index 0000000..4a67a80
--- /dev/null
+++ b/test/145-alloc-tracking-stress/src/Main.java
@@ -0,0 +1,74 @@
+/*
+
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.util.Map;
+
+public class Main implements Runnable {
+    static final int numberOfThreads = 4;
+    static final int totalOperations = 1000;
+    static Method enableAllocTrackingMethod;
+    static Object holder;
+    static volatile boolean trackingThreadDone = false;
+    int threadIndex;
+
+    Main(int index) {
+        threadIndex = index;
+    }
+
+    public static void main(String[] args) throws Exception {
+      Class<?> klass = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
+      if (klass == null) {
+          throw new AssertionError("Couldn't find DdmVmInternal class");
+      }
+      enableAllocTrackingMethod = klass.getDeclaredMethod("enableRecentAllocations",
+              Boolean.TYPE);
+      if (enableAllocTrackingMethod == null) {
+          throw new AssertionError("Couldn't find enableRecentAllocations method");
+      }
+
+      final Thread[] threads = new Thread[numberOfThreads];
+      for (int t = 0; t < threads.length; t++) {
+          threads[t] = new Thread(new Main(t));
+          threads[t].start();
+      }
+      for (Thread t : threads) {
+          t.join();
+      }
+      System.out.println("Finishing");
+    }
+
+    public void run() {
+        if (threadIndex == 0) {
+            for (int i = 0; i < totalOperations; ++i) {
+                try {
+                    enableAllocTrackingMethod.invoke(null, true);
+                    holder = new Object();
+                    enableAllocTrackingMethod.invoke(null, false);
+                } catch (Exception e) {
+                    System.out.println(e);
+                    return;
+                }
+            }
+            trackingThreadDone = true;
+        } else {
+            while (!trackingThreadDone) {
+                holder = new Object();
+            }
+        }
+    }
+}
diff --git a/test/146-bad-interface/build b/test/146-bad-interface/build
new file mode 100755
index 0000000..0dd8573
--- /dev/null
+++ b/test/146-bad-interface/build
@@ -0,0 +1,27 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm
+  # Hard-wired use of experimental jack.
+  # TODO: fix this temporary work-around for default-methods, see b/19467889
+  export USE_JACK=true
+fi
+
+./default-build "$@" --experimental default-methods
diff --git a/test/146-bad-interface/expected.txt b/test/146-bad-interface/expected.txt
new file mode 100644
index 0000000..3441966
--- /dev/null
+++ b/test/146-bad-interface/expected.txt
@@ -0,0 +1 @@
+running invoke
diff --git a/test/146-bad-interface/info.txt b/test/146-bad-interface/info.txt
new file mode 100644
index 0000000..38f188e
--- /dev/null
+++ b/test/146-bad-interface/info.txt
@@ -0,0 +1 @@
+Check whether a duplicate class can invoke-interface on an unresolved method.
diff --git a/test/146-bad-interface/run b/test/146-bad-interface/run
new file mode 100755
index 0000000..ceef6b8
--- /dev/null
+++ b/test/146-bad-interface/run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# We want to run as no-dex-file-fallback to confirm that even though the -ex file has a symbolic
+# reference to A, there's no class-def, so we don't detect a collision.
+exec ${RUN} --secondary "${@}"
diff --git a/test/146-bad-interface/smali/invoke_inf.smali b/test/146-bad-interface/smali/invoke_inf.smali
new file mode 100644
index 0000000..c5101e0
--- /dev/null
+++ b/test/146-bad-interface/smali/invoke_inf.smali
@@ -0,0 +1,24 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeInf;
+.super Ljava/lang/Object;
+
+.method public static doInvoke(LIface;)V
+.locals 0
+    invoke-interface {p0}, LIface;->invoke()V
+    return-void
+.end method
+
diff --git a/test/146-bad-interface/src-ex/A.java b/test/146-bad-interface/src-ex/A.java
new file mode 100644
index 0000000..a30a5f2
--- /dev/null
+++ b/test/146-bad-interface/src-ex/A.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class A implements Iface {
+}
diff --git a/test/146-bad-interface/src-ex/Iface.java b/test/146-bad-interface/src-ex/Iface.java
new file mode 100644
index 0000000..921e25c
--- /dev/null
+++ b/test/146-bad-interface/src-ex/Iface.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface Iface {
+  public default void aPadding() {}
+  public default void bPadding() {}
+  public default void cPadding() {}
+  public default void dPadding() {}
+  public default void invoke() {
+    System.out.println("running invoke");
+  }
+  public default void wPadding() {}
+  public default void xPadding() {}
+  public default void yPadding() {}
+  public default void zPadding() {}
+}
diff --git a/test/146-bad-interface/src/Main.java b/test/146-bad-interface/src/Main.java
new file mode 100644
index 0000000..5534bb4
--- /dev/null
+++ b/test/146-bad-interface/src/Main.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import dalvik.system.PathClassLoader;
+
+/**
+ * Structural hazard test.
+ */
+public class Main {
+  static final String DEX_LOCATION = System.getenv("DEX_LOCATION");
+  static final String DEX_FILES =
+      DEX_LOCATION + "/146-bad-interface-ex.jar" + ":" +
+      DEX_LOCATION + "/146-bad-interface.jar";
+  public static void main(String[] args) {
+    try {
+      PathClassLoader p = new PathClassLoader(DEX_FILES, Main.class.getClassLoader());
+      Class<?> c = Class.forName("A", true, p);
+      Object o = c.newInstance();
+      Class<?> runner = Class.forName("InvokeInf", true, p);
+      Class<?> arg = Class.forName("Iface", true, p);
+      Method r = runner.getDeclaredMethod("doInvoke", arg);
+      r.invoke(null, o);
+    } catch (Throwable t) {
+      System.out.println("Error occurred");
+      System.out.println(t);
+      t.printStackTrace();
+    }
+  }
+}
diff --git a/test/147-stripped-dex-fallback/expected.txt b/test/147-stripped-dex-fallback/expected.txt
new file mode 100644
index 0000000..af5626b
--- /dev/null
+++ b/test/147-stripped-dex-fallback/expected.txt
@@ -0,0 +1 @@
+Hello, world!
diff --git a/test/147-stripped-dex-fallback/info.txt b/test/147-stripped-dex-fallback/info.txt
new file mode 100644
index 0000000..72a2ca8
--- /dev/null
+++ b/test/147-stripped-dex-fallback/info.txt
@@ -0,0 +1,2 @@
+Verify that we fallback to running out of dex code in the oat file if there is
+no image and the original dex code has been stripped.
diff --git a/test/147-stripped-dex-fallback/run b/test/147-stripped-dex-fallback/run
new file mode 100755
index 0000000..e594010
--- /dev/null
+++ b/test/147-stripped-dex-fallback/run
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ensure flags includes prebuild.
+flags="$@"
+if [[ "${flags}" == *--no-prebuild* ]] ; then
+  echo "Test 147-stripped-dex-fallback is not intended to run in no-prebuild mode."
+  exit 1
+fi
+
+${RUN} ${flags} --strip-dex --no-dex2oat
diff --git a/test/147-stripped-dex-fallback/src/Main.java b/test/147-stripped-dex-fallback/src/Main.java
new file mode 100644
index 0000000..1ef6289
--- /dev/null
+++ b/test/147-stripped-dex-fallback/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.out.println("Hello, world!");
+  }
+}
diff --git a/test/148-multithread-gc-annotations/check b/test/148-multithread-gc-annotations/check
new file mode 100755
index 0000000..842bdc6
--- /dev/null
+++ b/test/148-multithread-gc-annotations/check
@@ -0,0 +1,22 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Check that the string "error" isn't present
+if grep error "$2"; then
+    exit 1
+else
+    exit 0
+fi
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/148-multithread-gc-annotations/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/148-multithread-gc-annotations/expected.txt
diff --git a/test/148-multithread-gc-annotations/gc_coverage.cc b/test/148-multithread-gc-annotations/gc_coverage.cc
new file mode 100644
index 0000000..263eefd
--- /dev/null
+++ b/test/148-multithread-gc-annotations/gc_coverage.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gc/heap.h"
+#include "jni.h"
+#include "runtime.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace {
+
+extern "C" JNIEXPORT jboolean JNICALL Java_MovingGCThread_performHomogeneousSpaceCompact(JNIEnv*, jclass) {
+  return Runtime::Current()->GetHeap()->PerformHomogeneousSpaceCompact() == gc::kSuccess ?
+      JNI_TRUE : JNI_FALSE;
+}
+
+extern "C" JNIEXPORT jboolean JNICALL Java_MovingGCThread_supportHomogeneousSpaceCompact(JNIEnv*, jclass) {
+  return Runtime::Current()->GetHeap()->SupportHomogeneousSpaceCompactAndCollectorTransitions() ?
+      JNI_TRUE : JNI_FALSE;
+}
+
+extern "C" JNIEXPORT jlong JNICALL Java_MovingGCThread_objectAddress(JNIEnv* env, jclass, jobject object) {
+  ScopedObjectAccess soa(env);
+  return reinterpret_cast<jlong>(soa.Decode<mirror::Object*>(object));
+}
+
+}  // namespace
+}  // namespace art
diff --git a/test/148-multithread-gc-annotations/info.txt b/test/148-multithread-gc-annotations/info.txt
new file mode 100644
index 0000000..c62e544
--- /dev/null
+++ b/test/148-multithread-gc-annotations/info.txt
@@ -0,0 +1 @@
+Tests that getting annotations works during moving gc.
diff --git a/test/148-multithread-gc-annotations/src/AnnoClass1.java b/test/148-multithread-gc-annotations/src/AnnoClass1.java
new file mode 100644
index 0000000..3eb45ae
--- /dev/null
+++ b/test/148-multithread-gc-annotations/src/AnnoClass1.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.*;
+
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface AnnoClass1 {
+    Class<?> value();
+}
diff --git a/test/148-multithread-gc-annotations/src/AnnoClass2.java b/test/148-multithread-gc-annotations/src/AnnoClass2.java
new file mode 100644
index 0000000..b17490f
--- /dev/null
+++ b/test/148-multithread-gc-annotations/src/AnnoClass2.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.*;
+
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface AnnoClass2 {
+    Class<?> value();
+}
diff --git a/test/148-multithread-gc-annotations/src/AnnoClass3.java b/test/148-multithread-gc-annotations/src/AnnoClass3.java
new file mode 100644
index 0000000..7d600a8
--- /dev/null
+++ b/test/148-multithread-gc-annotations/src/AnnoClass3.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.*;
+
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface AnnoClass3 {
+    Class<?> value();
+}
diff --git a/test/148-multithread-gc-annotations/src/AnnotationThread.java b/test/148-multithread-gc-annotations/src/AnnotationThread.java
new file mode 100644
index 0000000..ebc14e9
--- /dev/null
+++ b/test/148-multithread-gc-annotations/src/AnnotationThread.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.*;
+
+@AnnoClass1(AnnoClass2.class)
+@AnnoClass2(AnnoClass3.class)
+@AnnoClass3(AnnoClass1.class)
+public class AnnotationThread implements Runnable {
+    public void run() {
+        for (int i = 0; i < 20; i++) {
+            Annotation[] annotations = AnnotationThread.class.getAnnotations();
+            if (annotations == null) {
+                System.out.println("error: AnnotationThread class has no annotations");
+                return;
+            }
+        }
+    }
+}
diff --git a/test/148-multithread-gc-annotations/src/Main.java b/test/148-multithread-gc-annotations/src/Main.java
new file mode 100644
index 0000000..b652ed6
--- /dev/null
+++ b/test/148-multithread-gc-annotations/src/Main.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+    public static void main(String[] args) {
+        System.loadLibrary(args[0]);
+        Thread annoThread = new Thread(new AnnotationThread(), "Annotation thread");
+        Thread gcThread = new Thread(new MovingGCThread(), "Moving GC thread");
+        annoThread.start();
+        gcThread.start();
+        try {
+            annoThread.join();
+            gcThread.join();
+        } catch (InterruptedException e) {
+            System.out.println("error: " + e);
+        }
+        System.out.println("Done.");
+    }
+}
diff --git a/test/148-multithread-gc-annotations/src/MovingGCThread.java b/test/148-multithread-gc-annotations/src/MovingGCThread.java
new file mode 100644
index 0000000..87de9f4
--- /dev/null
+++ b/test/148-multithread-gc-annotations/src/MovingGCThread.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.TreeMap;
+
+public class MovingGCThread implements Runnable {
+    private static TreeMap treeMap = new TreeMap();
+
+    public void run() {
+        for (int i = 0; i < 20; i++) {
+            testHomogeneousCompaction();
+        }
+    }
+
+    public static void testHomogeneousCompaction() {
+        final boolean supportHSC = supportHomogeneousSpaceCompact();
+        if (!supportHSC) {
+            return;
+        }
+        Object o = new Object();
+        long addressBefore = objectAddress(o);
+        allocateStuff();
+        final boolean success = performHomogeneousSpaceCompact();
+        allocateStuff();
+        if (!success) {
+            System.out.println("error: Expected " + supportHSC + " but got " + success);
+        }
+        allocateStuff();
+        long addressAfter = objectAddress(o);
+        // This relies on the compaction copying from one space to another space and there being
+        // no overlap.
+        if (addressBefore == addressAfter) {
+            System.out.println("error: Expected different adddress " + addressBefore + " vs " +
+                    addressAfter);
+        }
+    }
+
+    private static void allocateStuff() {
+        for (int i = 0; i < 1000; ++i) {
+            Object o = new Object();
+            treeMap.put(o.hashCode(), o);
+        }
+    }
+
+    // Methods to get access to ART internals.
+    private static native boolean supportHomogeneousSpaceCompact();
+    private static native boolean performHomogeneousSpaceCompact();
+    private static native long objectAddress(Object object);
+}
diff --git a/test/149-suspend-all-stress/check b/test/149-suspend-all-stress/check
new file mode 100755
index 0000000..d30b888
--- /dev/null
+++ b/test/149-suspend-all-stress/check
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Only compare the last line.
+tail -n 1 "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
\ No newline at end of file
diff --git a/test/149-suspend-all-stress/expected.txt b/test/149-suspend-all-stress/expected.txt
new file mode 100644
index 0000000..134d8d0
--- /dev/null
+++ b/test/149-suspend-all-stress/expected.txt
@@ -0,0 +1 @@
+Finishing
diff --git a/test/149-suspend-all-stress/info.txt b/test/149-suspend-all-stress/info.txt
new file mode 100644
index 0000000..29b414c
--- /dev/null
+++ b/test/149-suspend-all-stress/info.txt
@@ -0,0 +1 @@
+Stress test for multiple threads calling SuspendAll
diff --git a/test/149-suspend-all-stress/src/Main.java b/test/149-suspend-all-stress/src/Main.java
new file mode 100644
index 0000000..6a27c4b
--- /dev/null
+++ b/test/149-suspend-all-stress/src/Main.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+
+public class Main implements Runnable {
+    static final int numberOfThreads = 8;
+
+    public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
+        final Thread[] threads = new Thread[numberOfThreads];
+        for (int t = 0; t < threads.length; t++) {
+            threads[t] = new Thread(new Main());
+            threads[t].start();
+        }
+        for (Thread t : threads) {
+            t.join();
+        }
+        System.out.println("Finishing");
+    }
+
+    public void run() {
+        suspendAndResume();
+    }
+
+    private static native void suspendAndResume();
+}
diff --git a/test/149-suspend-all-stress/suspend_all.cc b/test/149-suspend-all-stress/suspend_all.cc
new file mode 100644
index 0000000..dfd944a
--- /dev/null
+++ b/test/149-suspend-all-stress/suspend_all.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/time_utils.h"
+#include "jni.h"
+#include "runtime.h"
+#include "thread_list.h"
+
+namespace art {
+
+extern "C" JNIEXPORT void JNICALL Java_Main_suspendAndResume(JNIEnv*, jclass) {
+  static constexpr size_t kInitialSleepUS = 100 * 1000;  // 100ms.
+  usleep(kInitialSleepUS);  // Leave some time for threads to get in here before we start suspending.
+  enum Operation {
+    kOPSuspendAll,
+    kOPDumpStack,
+    kOPSuspendAllDumpStack,
+    // Total number of operations.
+    kOPNumber,
+  };
+  const uint64_t start_time = NanoTime();
+  size_t iterations = 0;
+  // Run for a fixed period of 10 seconds.
+  while (NanoTime() - start_time < MsToNs(10 * 1000)) {
+    switch (static_cast<Operation>(iterations % kOPNumber)) {
+      case kOPSuspendAll: {
+        ScopedSuspendAll ssa(__FUNCTION__);
+        usleep(500);
+        break;
+      }
+      case kOPDumpStack: {
+        Runtime::Current()->GetThreadList()->Dump(LOG(INFO));
+        usleep(500);
+        break;
+      }
+      case kOPSuspendAllDumpStack: {
+        // Not yet supported.
+        // ScopedSuspendAll ssa(__FUNCTION__);
+        // Runtime::Current()->GetThreadList()->Dump(LOG(INFO));
+        break;
+      }
+      case kOPNumber:
+        break;
+    }
+    ++iterations;
+  }
+  LOG(INFO) << "Did " << iterations << " iterations";
+}
+
+}  // namespace art
diff --git a/test/150-loadlibrary/expected.txt b/test/150-loadlibrary/expected.txt
new file mode 100644
index 0000000..41feacf
--- /dev/null
+++ b/test/150-loadlibrary/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+Success.
diff --git a/test/150-loadlibrary/info.txt b/test/150-loadlibrary/info.txt
new file mode 100644
index 0000000..089d044
--- /dev/null
+++ b/test/150-loadlibrary/info.txt
@@ -0,0 +1 @@
+Check that passing the BootClassLoader to loadLibrary works.
diff --git a/test/150-loadlibrary/src/Main.java b/test/150-loadlibrary/src/Main.java
new file mode 100644
index 0000000..9086937
--- /dev/null
+++ b/test/150-loadlibrary/src/Main.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.lang.reflect.Method;
+import java.util.Arrays;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    // Check whether we get the BootClassLoader (not null).
+    ClassLoader bootClassLoader = Object.class.getClassLoader();
+    if (bootClassLoader == null) {
+      throw new IllegalStateException("Expected non-null classloader for Object");
+    }
+
+    // Try to load libarttest(d) with the BootClassLoader. First construct the filename.
+    String libName = System.mapLibraryName(args[0]);
+    Method libPathsMethod = Runtime.class.getDeclaredMethod("getLibPaths");
+    libPathsMethod.setAccessible(true);
+    String[] libPaths = (String[])libPathsMethod.invoke(Runtime.getRuntime());
+    String fileName = null;
+    for (String p : libPaths) {
+      String candidate = p + libName;
+      if (new File(candidate).exists()) {
+          fileName = candidate;
+          break;
+      }
+    }
+    if (fileName == null) {
+      throw new IllegalStateException("Didn't find " + libName + " in " +
+          Arrays.toString(libPaths));
+    }
+
+    // Then call an internal function that accepts the classloader. Do not use load(), as it
+    // is deprecated and only there for backwards compatibility, and prints a warning to the
+    // log that we'd have to strip (it contains the pid).
+    Method m = Runtime.class.getDeclaredMethod("doLoad", String.class, ClassLoader.class);
+    m.setAccessible(true);
+    Object result = m.invoke(Runtime.getRuntime(), fileName, bootClassLoader);
+    if (result != null) {
+      throw new IllegalStateException(result.toString());
+    }
+
+    System.out.println("Success.");
+  }
+}
diff --git a/test/201-built-in-exception-detail-messages/src/Main.java b/test/201-built-in-exception-detail-messages/src/Main.java
index 52d4259..dc58819 100644
--- a/test/201-built-in-exception-detail-messages/src/Main.java
+++ b/test/201-built-in-exception-detail-messages/src/Main.java
@@ -247,7 +247,7 @@
    * Helper for testCastOperatorWithArrays. It's important that
    * the return type is Object.
    */
-  private static Object makeArray(Class c) {
+  private static Object makeArray(Class<?> c) {
     return Array.newInstance(c, 1);
   }
 
@@ -461,7 +461,7 @@
       "hello there".substring(9,14);
       fail();
     } catch (StringIndexOutOfBoundsException ex) {
-      assertEquals("length=11; regionStart=9; regionLength=5", ex.getMessage());
+      assertEquals("length=11; index=14", ex.getMessage());
     }
   }
 }
diff --git a/test/370-dex-v37/build b/test/370-dex-v37/build
new file mode 100755
index 0000000..f472428
--- /dev/null
+++ b/test/370-dex-v37/build
@@ -0,0 +1,30 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+./default-build "$@"
+
+if [[ $@ != *"--jvm"* ]]; then
+  # Change the generated dex file to have a v36 magic number if it is version 35
+  if test -f classes.dex && head -c 7 classes.dex | grep -q 035; then
+    # place ascii value '037' into the classes.dex file starting at byte 4.
+    printf '037' | dd status=none conv=notrunc of=classes.dex bs=1 seek=4 count=3
+    rm -f $TEST_NAME.jar
+    zip $TEST_NAME.jar classes.dex
+  fi
+fi
diff --git a/test/370-dex-v37/expected.txt b/test/370-dex-v37/expected.txt
new file mode 100644
index 0000000..af5626b
--- /dev/null
+++ b/test/370-dex-v37/expected.txt
@@ -0,0 +1 @@
+Hello, world!
diff --git a/test/370-dex-v37/info.txt b/test/370-dex-v37/info.txt
new file mode 100644
index 0000000..5ca9c76
--- /dev/null
+++ b/test/370-dex-v37/info.txt
@@ -0,0 +1 @@
+Print "Hello, World!" with a version 37 dex file.
diff --git a/test/370-dex-v37/src/Main.java b/test/370-dex-v37/src/Main.java
new file mode 100644
index 0000000..1ef6289
--- /dev/null
+++ b/test/370-dex-v37/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.out.println("Hello, world!");
+  }
+}
diff --git a/test/412-new-array/info.txt b/test/412-new-array/info.txt
index cb388b6..b5f834a 100644
--- a/test/412-new-array/info.txt
+++ b/test/412-new-array/info.txt
@@ -1 +1,3 @@
 Simple tests for new-array, filled-new-array and fill-array-data.
+Regression test for the arm64 mterp miscalculating the fill-array-data-payload
+address, zero-extending a register instead of sign-extending.
diff --git a/test/412-new-array/smali/fill_array_data.smali b/test/412-new-array/smali/fill_array_data.smali
index 34776db..2b24e56 100644
--- a/test/412-new-array/smali/fill_array_data.smali
+++ b/test/412-new-array/smali/fill_array_data.smali
@@ -15,6 +15,21 @@
 
 .end method
 
+.method public static intArrayFillInstructionAfterData([I)V
+   .registers 1
+   goto :FillInstruction
+
+:ArrayData
+    .array-data 4
+        1 2 3 4 5
+    .end array-data
+
+:FillInstruction
+   fill-array-data v0, :ArrayData
+   return-void
+
+.end method
+
 .method public static shortArray([S)V
    .registers 1
 
diff --git a/test/412-new-array/src/Main.java b/test/412-new-array/src/Main.java
index b9c2a05..d95d2c5 100644
--- a/test/412-new-array/src/Main.java
+++ b/test/412-new-array/src/Main.java
@@ -259,6 +259,45 @@
     }
 
     {
+      Method m = c.getMethod("intArrayFillInstructionAfterData", int[].class);
+      int[] array = new int[7];
+      Object[] args = { array };
+      m.invoke(null, args);
+      assertEquals(7, array.length);
+      assertEquals(1, array[0]);
+      assertEquals(2, array[1]);
+      assertEquals(3, array[2]);
+      assertEquals(4, array[3]);
+      assertEquals(5, array[4]);
+      assertEquals(0, array[5]);
+      assertEquals(0, array[6]);
+
+      array = new int[2];
+      args[0] = array;
+      Throwable exception  = null;
+      try {
+        m.invoke(null, args);
+      } catch (InvocationTargetException e) {
+        exception = e.getCause();
+        assertTrue(exception instanceof IndexOutOfBoundsException);
+      }
+      assertNotNull(exception);
+      exception = null;
+      // Test that nothing has been written to the array.
+      assertEquals(0, array[0]);
+      assertEquals(0, array[1]);
+
+      args[0] = null;
+      try {
+        m.invoke(null, args);
+      } catch (InvocationTargetException e) {
+        exception = e.getCause();
+        assertTrue(exception instanceof NullPointerException);
+      }
+      assertNotNull(exception);
+    }
+
+    {
       Method m = c.getMethod("shortArray", short[].class);
       short[] array = new short[7];
       Object[] args = { array };
diff --git a/test/420-const-class/src/Main.java b/test/420-const-class/src/Main.java
index 44a7436..90ccf3a 100644
--- a/test/420-const-class/src/Main.java
+++ b/test/420-const-class/src/Main.java
@@ -53,15 +53,15 @@
     $opt$LoadAndClinitCheck();
   }
 
-  public static Class $opt$LoadThisClass() {
+  public static Class<?> $opt$LoadThisClass() {
     return Main.class;
   }
 
-  public static Class $opt$LoadOtherClass() {
+  public static Class<?> $opt$LoadOtherClass() {
     return Other.class;
   }
 
-  public static Class $opt$LoadSystemClass() {
+  public static Class<?> $opt$LoadSystemClass() {
     return System.class;
   }
 
diff --git a/test/431-optimizing-arith-shifts/src/Main.java b/test/431-optimizing-arith-shifts/src/Main.java
index 86422bd..b7a112f 100644
--- a/test/431-optimizing-arith-shifts/src/Main.java
+++ b/test/431-optimizing-arith-shifts/src/Main.java
@@ -29,304 +29,302 @@
   }
 
   public static void main(String[] args) {
-    shlInt();
-    shlLong();
-    shrInt();
-    shrLong();
-    ushrInt();
-    ushrLong();
+    testShlInt();
+    testShlLong();
+    testShrInt();
+    testShrLong();
+    testUShrInt();
+    testUShrLong();
   }
 
-  private static void shlInt() {
-    expectEquals(48, $opt$ShlConst2(12));
-    expectEquals(12, $opt$ShlConst0(12));
-    expectEquals(-48, $opt$Shl(-12, 2));
-    expectEquals(1024, $opt$Shl(32, 5));
+  private static void testShlInt() {
+    expectEquals(48, $opt$ShlIntConst2(12));
+    expectEquals(12, $opt$ShlIntConst0(12));
+    expectEquals(-48, $opt$ShlInt(-12, 2));
+    expectEquals(1024, $opt$ShlInt(32, 5));
 
-    expectEquals(7, $opt$Shl(7, 0));
-    expectEquals(14, $opt$Shl(7, 1));
-    expectEquals(0, $opt$Shl(0, 30));
+    expectEquals(7, $opt$ShlInt(7, 0));
+    expectEquals(14, $opt$ShlInt(7, 1));
+    expectEquals(0, $opt$ShlInt(0, 30));
 
-    expectEquals(1073741824L, $opt$Shl(1, 30));
-    expectEquals(Integer.MIN_VALUE, $opt$Shl(1, 31));  // overflow
-    expectEquals(Integer.MIN_VALUE, $opt$Shl(1073741824, 1));  // overflow
-    expectEquals(1073741824, $opt$Shl(268435456, 2));
+    expectEquals(1073741824L, $opt$ShlInt(1, 30));
+    expectEquals(Integer.MIN_VALUE, $opt$ShlInt(1, 31));  // overflow
+    expectEquals(Integer.MIN_VALUE, $opt$ShlInt(1073741824, 1));  // overflow
+    expectEquals(1073741824, $opt$ShlInt(268435456, 2));
 
     // Only the 5 lower bits should be used for shifting (& 0x1f).
-    expectEquals(7, $opt$Shl(7, 32));  // 32 & 0x1f = 0
-    expectEquals(14, $opt$Shl(7, 33));  // 33 & 0x1f = 1
-    expectEquals(32, $opt$Shl(1, 101));  // 101 & 0x1f = 5
+    expectEquals(7, $opt$ShlInt(7, 32));  // 32 & 0x1f = 0
+    expectEquals(14, $opt$ShlInt(7, 33));  // 33 & 0x1f = 1
+    expectEquals(32, $opt$ShlInt(1, 101));  // 101 & 0x1f = 5
 
-    expectEquals(Integer.MIN_VALUE, $opt$Shl(1, -1));  // -1 & 0x1f = 31
-    expectEquals(14, $opt$Shl(7, -31));  // -31 & 0x1f = 1
-    expectEquals(7, $opt$Shl(7, -32));  // -32 & 0x1f = 0
-    expectEquals(-536870912, $opt$Shl(7, -3));  // -3 & 0x1f = 29
+    expectEquals(Integer.MIN_VALUE, $opt$ShlInt(1, -1));  // -1 & 0x1f = 31
+    expectEquals(14, $opt$ShlInt(7, -31));  // -31 & 0x1f = 1
+    expectEquals(7, $opt$ShlInt(7, -32));  // -32 & 0x1f = 0
+    expectEquals(-536870912, $opt$ShlInt(7, -3));  // -3 & 0x1f = 29
 
-    expectEquals(Integer.MIN_VALUE, $opt$Shl(7, Integer.MAX_VALUE));
-    expectEquals(7, $opt$Shl(7, Integer.MIN_VALUE));
+    expectEquals(Integer.MIN_VALUE, $opt$ShlInt(7, Integer.MAX_VALUE));
+    expectEquals(7, $opt$ShlInt(7, Integer.MIN_VALUE));
   }
 
-  private static void shlLong() {
-    expectEquals(48L, $opt$ShlConst2(12L));
-    expectEquals(12L, $opt$ShlConst0(12L));
-    expectEquals(-48L, $opt$Shl(-12L, 2L));
-    expectEquals(1024L, $opt$Shl(32L, 5L));
+  private static void testShlLong() {
+    expectEquals(48L, $opt$ShlLongConst2(12L));
+    expectEquals(12L, $opt$ShlLongConst0(12L));
+    expectEquals(-48L, $opt$ShlLong(-12L, 2));
+    expectEquals(1024L, $opt$ShlLong(32L, 5));
 
-    expectEquals(7L, $opt$Shl(7L, 0L));
-    expectEquals(14L, $opt$Shl(7L, 1L));
-    expectEquals(0L, $opt$Shl(0L, 30L));
+    expectEquals(7L, $opt$ShlLong(7L, 0));
+    expectEquals(14L, $opt$ShlLong(7L, 1));
+    expectEquals(0L, $opt$ShlLong(0L, 30));
 
-    expectEquals(1073741824L, $opt$Shl(1L, 30L));
-    expectEquals(2147483648L, $opt$Shl(1L, 31L));
-    expectEquals(2147483648L, $opt$Shl(1073741824L, 1L));
+    expectEquals(1073741824L, $opt$ShlLong(1L, 30));
+    expectEquals(2147483648L, $opt$ShlLong(1L, 31));
+    expectEquals(2147483648L, $opt$ShlLong(1073741824L, 1));
 
     // Long shifts can use up to 6 lower bits.
-    expectEquals(4294967296L, $opt$Shl(1L, 32L));
-    expectEquals(60129542144L, $opt$Shl(7L, 33L));
-    expectEquals(Long.MIN_VALUE, $opt$Shl(1L, 63L));  // overflow
+    expectEquals(4294967296L, $opt$ShlLong(1L, 32));
+    expectEquals(60129542144L, $opt$ShlLong(7L, 33));
+    expectEquals(Long.MIN_VALUE, $opt$ShlLong(1L, 63));  // overflow
 
     // Only the 6 lower bits should be used for shifting (& 0x3f).
-    expectEquals(7L, $opt$Shl(7L, 64L));  // 64 & 0x3f = 0
-    expectEquals(14L, $opt$Shl(7L, 65L));  // 65 & 0x3f = 1
-    expectEquals(137438953472L, $opt$Shl(1L, 101L));  // 101 & 0x3f = 37
+    expectEquals(7L, $opt$ShlLong(7L, 64));  // 64 & 0x3f = 0
+    expectEquals(14L, $opt$ShlLong(7L, 65));  // 65 & 0x3f = 1
+    expectEquals(137438953472L, $opt$ShlLong(1L, 101));  // 101 & 0x3f = 37
 
-    expectEquals(Long.MIN_VALUE, $opt$Shl(1L, -1L));  // -1 & 0x3f = 63
-    expectEquals(14L, $opt$Shl(7L, -63L));  // -63 & 0x3f = 1
-    expectEquals(7L, $opt$Shl(7L, -64L));  // -64 & 0x3f = 0
-    expectEquals(2305843009213693952L, $opt$Shl(1L, -3L));  // -3 & 0x3f = 61
+    expectEquals(Long.MIN_VALUE, $opt$ShlLong(1L, -1));  // -1 & 0x3f = 63
+    expectEquals(14L, $opt$ShlLong(7L, -63));  // -63 & 0x3f = 1
+    expectEquals(7L, $opt$ShlLong(7L, -64));  // -64 & 0x3f = 0
+    expectEquals(2305843009213693952L, $opt$ShlLong(1L, -3));  // -3 & 0x3f = 61
 
-    expectEquals(Long.MIN_VALUE, $opt$Shl(7L, Long.MAX_VALUE));
-    expectEquals(7L, $opt$Shl(7L, Long.MIN_VALUE));
+    expectEquals(Long.MIN_VALUE, $opt$ShlLong(7L, Integer.MAX_VALUE));
+    expectEquals(7L, $opt$ShlLong(7L, Integer.MIN_VALUE));
 
     // Exercise some special cases handled by backends/simplifier.
-    expectEquals(24L, $opt$ShlConst1(12L));
-    expectEquals(0x2345678900000000L, $opt$ShlConst32(0x123456789L));
-    expectEquals(0x2490249000000000L, $opt$ShlConst33(0x12481248L));
-    expectEquals(0x4920492000000000L, $opt$ShlConst34(0x12481248L));
-    expectEquals(0x9240924000000000L, $opt$ShlConst35(0x12481248L));
+    expectEquals(24L, $opt$ShlLongConst1(12L));
+    expectEquals(0x2345678900000000L, $opt$ShlLongConst32(0x123456789L));
+    expectEquals(0x2490249000000000L, $opt$ShlLongConst33(0x12481248L));
+    expectEquals(0x4920492000000000L, $opt$ShlLongConst34(0x12481248L));
+    expectEquals(0x9240924000000000L, $opt$ShlLongConst35(0x12481248L));
   }
 
-  private static void shrInt() {
-    expectEquals(3, $opt$ShrConst2(12));
-    expectEquals(12, $opt$ShrConst0(12));
-    expectEquals(-3, $opt$Shr(-12, 2));
-    expectEquals(1, $opt$Shr(32, 5));
+  private static void testShrInt() {
+    expectEquals(3, $opt$ShrIntConst2(12));
+    expectEquals(12, $opt$ShrIntConst0(12));
+    expectEquals(-3, $opt$ShrInt(-12, 2));
+    expectEquals(1, $opt$ShrInt(32, 5));
 
-    expectEquals(7, $opt$Shr(7, 0));
-    expectEquals(3, $opt$Shr(7, 1));
-    expectEquals(0, $opt$Shr(0, 30));
-    expectEquals(0, $opt$Shr(1, 30));
-    expectEquals(-1, $opt$Shr(-1, 30));
+    expectEquals(7, $opt$ShrInt(7, 0));
+    expectEquals(3, $opt$ShrInt(7, 1));
+    expectEquals(0, $opt$ShrInt(0, 30));
+    expectEquals(0, $opt$ShrInt(1, 30));
+    expectEquals(-1, $opt$ShrInt(-1, 30));
 
-    expectEquals(0, $opt$Shr(Integer.MAX_VALUE, 31));
-    expectEquals(-1, $opt$Shr(Integer.MIN_VALUE, 31));
+    expectEquals(0, $opt$ShrInt(Integer.MAX_VALUE, 31));
+    expectEquals(-1, $opt$ShrInt(Integer.MIN_VALUE, 31));
 
     // Only the 5 lower bits should be used for shifting (& 0x1f).
-    expectEquals(7, $opt$Shr(7, 32));  // 32 & 0x1f = 0
-    expectEquals(3, $opt$Shr(7, 33));  // 33 & 0x1f = 1
+    expectEquals(7, $opt$ShrInt(7, 32));  // 32 & 0x1f = 0
+    expectEquals(3, $opt$ShrInt(7, 33));  // 33 & 0x1f = 1
 
-    expectEquals(0, $opt$Shr(1, -1));  // -1 & 0x1f = 31
-    expectEquals(3, $opt$Shr(7, -31));  // -31 & 0x1f = 1
-    expectEquals(7, $opt$Shr(7, -32));  // -32 & 0x1f = 0
-    expectEquals(-4, $opt$Shr(Integer.MIN_VALUE, -3));  // -3 & 0x1f = 29
+    expectEquals(0, $opt$ShrInt(1, -1));  // -1 & 0x1f = 31
+    expectEquals(3, $opt$ShrInt(7, -31));  // -31 & 0x1f = 1
+    expectEquals(7, $opt$ShrInt(7, -32));  // -32 & 0x1f = 0
+    expectEquals(-4, $opt$ShrInt(Integer.MIN_VALUE, -3));  // -3 & 0x1f = 29
 
-    expectEquals(0, $opt$Shr(7, Integer.MAX_VALUE));
-    expectEquals(7, $opt$Shr(7, Integer.MIN_VALUE));
+    expectEquals(0, $opt$ShrInt(7, Integer.MAX_VALUE));
+    expectEquals(7, $opt$ShrInt(7, Integer.MIN_VALUE));
   }
 
-  private static void shrLong() {
-    expectEquals(3L, $opt$ShrConst2(12L));
-    expectEquals(12L, $opt$ShrConst0(12L));
-    expectEquals(-3L, $opt$Shr(-12L, 2L));
-    expectEquals(1, $opt$Shr(32, 5));
+  private static void testShrLong() {
+    expectEquals(3L, $opt$ShrLongConst2(12L));
+    expectEquals(12L, $opt$ShrLongConst0(12L));
+    expectEquals(-3L, $opt$ShrLong(-12L, 2));
+    expectEquals(1, $opt$ShrLong(32, 5));
 
-    expectEquals(7L, $opt$Shr(7L, 0L));
-    expectEquals(3L, $opt$Shr(7L, 1L));
-    expectEquals(0L, $opt$Shr(0L, 30L));
-    expectEquals(0L, $opt$Shr(1L, 30L));
-    expectEquals(-1L, $opt$Shr(-1L, 30L));
+    expectEquals(7L, $opt$ShrLong(7L, 0));
+    expectEquals(3L, $opt$ShrLong(7L, 1));
+    expectEquals(0L, $opt$ShrLong(0L, 30));
+    expectEquals(0L, $opt$ShrLong(1L, 30));
+    expectEquals(-1L, $opt$ShrLong(-1L, 30));
 
-
-    expectEquals(1L, $opt$Shr(1073741824L, 30L));
-    expectEquals(1L, $opt$Shr(2147483648L, 31L));
-    expectEquals(1073741824L, $opt$Shr(2147483648L, 1L));
+    expectEquals(1L, $opt$ShrLong(1073741824L, 30));
+    expectEquals(1L, $opt$ShrLong(2147483648L, 31));
+    expectEquals(1073741824L, $opt$ShrLong(2147483648L, 1));
 
     // Long shifts can use up to 6 lower bits.
-    expectEquals(1L, $opt$Shr(4294967296L, 32L));
-    expectEquals(7L, $opt$Shr(60129542144L, 33L));
-    expectEquals(0L, $opt$Shr(Long.MAX_VALUE, 63L));
-    expectEquals(-1L, $opt$Shr(Long.MIN_VALUE, 63L));
+    expectEquals(1L, $opt$ShrLong(4294967296L, 32));
+    expectEquals(7L, $opt$ShrLong(60129542144L, 33));
+    expectEquals(0L, $opt$ShrLong(Long.MAX_VALUE, 63));
+    expectEquals(-1L, $opt$ShrLong(Long.MIN_VALUE, 63));
 
     // Only the 6 lower bits should be used for shifting (& 0x3f).
-    expectEquals(7L, $opt$Shr(7L, 64L));  // 64 & 0x3f = 0
-    expectEquals(3L, $opt$Shr(7L, 65L));  // 65 & 0x3f = 1
+    expectEquals(7L, $opt$ShrLong(7L, 64));  // 64 & 0x3f = 0
+    expectEquals(3L, $opt$ShrLong(7L, 65));  // 65 & 0x3f = 1
 
-    expectEquals(-1L, $opt$Shr(Long.MIN_VALUE, -1L));  // -1 & 0x3f = 63
-    expectEquals(3L, $opt$Shr(7L, -63L));  // -63 & 0x3f = 1
-    expectEquals(7L, $opt$Shr(7L, -64L));  // -64 & 0x3f = 0
-    expectEquals(1L, $opt$Shr(2305843009213693952L, -3L));  // -3 & 0x3f = 61
-    expectEquals(-4L, $opt$Shr(Integer.MIN_VALUE, -3));  // -3 & 0x1f = 29
+    expectEquals(-1L, $opt$ShrLong(Long.MIN_VALUE, -1));  // -1 & 0x3f = 63
+    expectEquals(3L, $opt$ShrLong(7L, -63));  // -63 & 0x3f = 1
+    expectEquals(7L, $opt$ShrLong(7L, -64));  // -64 & 0x3f = 0
+    expectEquals(1L, $opt$ShrLong(2305843009213693952L, -3));  // -3 & 0x3f = 61
+    expectEquals(-1L, $opt$ShrLong(Integer.MIN_VALUE, -3));  // -3 & 0x1f = 29
 
-    expectEquals(0L, $opt$Shr(7L, Long.MAX_VALUE));
-    expectEquals(7L, $opt$Shr(7L, Long.MIN_VALUE));
+    expectEquals(0L, $opt$ShrLong(7L, Integer.MAX_VALUE));
+    expectEquals(7L, $opt$ShrLong(7L, Integer.MIN_VALUE));
   }
 
-  private static void ushrInt() {
-    expectEquals(3, $opt$UShrConst2(12));
-    expectEquals(12, $opt$UShrConst0(12));
-    expectEquals(1073741821, $opt$UShr(-12, 2));
-    expectEquals(1, $opt$UShr(32, 5));
+  private static void testUShrInt() {
+    expectEquals(3, $opt$UShrIntConst2(12));
+    expectEquals(12, $opt$UShrIntConst0(12));
+    expectEquals(1073741821, $opt$UShrInt(-12, 2));
+    expectEquals(1, $opt$UShrInt(32, 5));
 
-    expectEquals(7, $opt$UShr(7, 0));
-    expectEquals(3, $opt$UShr(7, 1));
-    expectEquals(0, $opt$UShr(0, 30));
-    expectEquals(0, $opt$UShr(1, 30));
-    expectEquals(3, $opt$UShr(-1, 30));
+    expectEquals(7, $opt$UShrInt(7, 0));
+    expectEquals(3, $opt$UShrInt(7, 1));
+    expectEquals(0, $opt$UShrInt(0, 30));
+    expectEquals(0, $opt$UShrInt(1, 30));
+    expectEquals(3, $opt$UShrInt(-1, 30));
 
-    expectEquals(0, $opt$UShr(Integer.MAX_VALUE, 31));
-    expectEquals(1, $opt$UShr(Integer.MIN_VALUE, 31));
+    expectEquals(0, $opt$UShrInt(Integer.MAX_VALUE, 31));
+    expectEquals(1, $opt$UShrInt(Integer.MIN_VALUE, 31));
 
     // Only the 5 lower bits should be used for shifting (& 0x1f).
-    expectEquals(7, $opt$UShr(7, 32));  // 32 & 0x1f = 0
-    expectEquals(3, $opt$UShr(7, 33));  // 33 & 0x1f = 1
+    expectEquals(7, $opt$UShrInt(7, 32));  // 32 & 0x1f = 0
+    expectEquals(3, $opt$UShrInt(7, 33));  // 33 & 0x1f = 1
 
-    expectEquals(0, $opt$UShr(1, -1));  // -1 & 0x1f = 31
-    expectEquals(3, $opt$UShr(7, -31));  // -31 & 0x1f = 1
-    expectEquals(7, $opt$UShr(7, -32));  // -32 & 0x1f = 0
-    expectEquals(4, $opt$UShr(Integer.MIN_VALUE, -3));  // -3 & 0x1f = 29
+    expectEquals(0, $opt$UShrInt(1, -1));  // -1 & 0x1f = 31
+    expectEquals(3, $opt$UShrInt(7, -31));  // -31 & 0x1f = 1
+    expectEquals(7, $opt$UShrInt(7, -32));  // -32 & 0x1f = 0
+    expectEquals(4, $opt$UShrInt(Integer.MIN_VALUE, -3));  // -3 & 0x1f = 29
 
-    expectEquals(0, $opt$UShr(7, Integer.MAX_VALUE));
-    expectEquals(7, $opt$UShr(7, Integer.MIN_VALUE));
+    expectEquals(0, $opt$UShrInt(7, Integer.MAX_VALUE));
+    expectEquals(7, $opt$UShrInt(7, Integer.MIN_VALUE));
   }
 
-  private static void ushrLong() {
-    expectEquals(3L, $opt$UShrConst2(12L));
-    expectEquals(12L, $opt$UShrConst0(12L));
-    expectEquals(4611686018427387901L, $opt$UShr(-12L, 2L));
-    expectEquals(1, $opt$UShr(32, 5));
+  private static void testUShrLong() {
+    expectEquals(3L, $opt$UShrLongConst2(12L));
+    expectEquals(12L, $opt$UShrLongConst0(12L));
+    expectEquals(4611686018427387901L, $opt$UShrLong(-12L, 2));
+    expectEquals(1, $opt$UShrLong(32, 5));
 
-    expectEquals(7L, $opt$UShr(7L, 0L));
-    expectEquals(3L, $opt$UShr(7L, 1L));
-    expectEquals(0L, $opt$UShr(0L, 30L));
-    expectEquals(0L, $opt$UShr(1L, 30L));
-    expectEquals(17179869183L, $opt$UShr(-1L, 30L));
+    expectEquals(7L, $opt$UShrLong(7L, 0));
+    expectEquals(3L, $opt$UShrLong(7L, 1));
+    expectEquals(0L, $opt$UShrLong(0L, 30));
+    expectEquals(0L, $opt$UShrLong(1L, 30));
+    expectEquals(17179869183L, $opt$UShrLong(-1L, 30));
 
-
-    expectEquals(1L, $opt$UShr(1073741824L, 30L));
-    expectEquals(1L, $opt$UShr(2147483648L, 31L));
-    expectEquals(1073741824L, $opt$UShr(2147483648L, 1L));
+    expectEquals(1L, $opt$UShrLong(1073741824L, 30));
+    expectEquals(1L, $opt$UShrLong(2147483648L, 31));
+    expectEquals(1073741824L, $opt$UShrLong(2147483648L, 1));
 
     // Long shifts can use use up to 6 lower bits.
-    expectEquals(1L, $opt$UShr(4294967296L, 32L));
-    expectEquals(7L, $opt$UShr(60129542144L, 33L));
-    expectEquals(0L, $opt$UShr(Long.MAX_VALUE, 63L));
-    expectEquals(1L, $opt$UShr(Long.MIN_VALUE, 63L));
+    expectEquals(1L, $opt$UShrLong(4294967296L, 32));
+    expectEquals(7L, $opt$UShrLong(60129542144L, 33));
+    expectEquals(0L, $opt$UShrLong(Long.MAX_VALUE, 63));
+    expectEquals(1L, $opt$UShrLong(Long.MIN_VALUE, 63));
 
     // Only the 6 lower bits should be used for shifting (& 0x3f).
-    expectEquals(7L, $opt$UShr(7L, 64L));  // 64 & 0x3f = 0
-    expectEquals(3L, $opt$UShr(7L, 65L));  // 65 & 0x3f = 1
+    expectEquals(7L, $opt$UShrLong(7L, 64));  // 64 & 0x3f = 0
+    expectEquals(3L, $opt$UShrLong(7L, 65));  // 65 & 0x3f = 1
 
-    expectEquals(1L, $opt$UShr(Long.MIN_VALUE, -1L));  // -1 & 0x3f = 63
-    expectEquals(3L, $opt$UShr(7L, -63L));  // -63 & 0x3f = 1
-    expectEquals(7L, $opt$UShr(7L, -64L));  // -64 & 0x3f = 0
-    expectEquals(1L, $opt$UShr(2305843009213693952L, -3L));  // -3 & 0x3f = 61
-    expectEquals(4L, $opt$UShr(Long.MIN_VALUE, -3L));  // -3 & 0x3f = 61
+    expectEquals(1L, $opt$UShrLong(Long.MIN_VALUE, -1));  // -1 & 0x3f = 63
+    expectEquals(3L, $opt$UShrLong(7L, -63));  // -63 & 0x3f = 1
+    expectEquals(7L, $opt$UShrLong(7L, -64));  // -64 & 0x3f = 0
+    expectEquals(1L, $opt$UShrLong(2305843009213693952L, -3));  // -3 & 0x3f = 61
+    expectEquals(4L, $opt$UShrLong(Long.MIN_VALUE, -3));  // -3 & 0x3f = 61
 
-    expectEquals(0L, $opt$UShr(7L, Long.MAX_VALUE));
-    expectEquals(7L, $opt$UShr(7L, Long.MIN_VALUE));
+    expectEquals(0L, $opt$UShrLong(7L, Integer.MAX_VALUE));
+    expectEquals(7L, $opt$UShrLong(7L, Integer.MIN_VALUE));
   }
 
-  static int $opt$Shl(int a, int b) {
-    return a << b;
+
+  static int $opt$ShlInt(int value, int distance) {
+    return value << distance;
   }
 
-  static long $opt$Shl(long a, long b) {
-    return a << b;
+  static long $opt$ShlLong(long value, int distance) {
+    return value << distance;
   }
 
-  static int $opt$Shr(int a, int b) {
-    return a >> b;
+  static int $opt$ShrInt(int value, int distance) {
+    return value >> distance;
   }
 
-  static long $opt$Shr(long a, long b) {
-    return a >> b;
+  static long $opt$ShrLong(long value, int distance) {
+    return value >> distance;
   }
 
-  static int $opt$UShr(int a, int b) {
-    return a >>> b;
+  static int $opt$UShrInt(int value, int distance) {
+    return value >>> distance;
   }
 
-  static long $opt$UShr(long a, long b) {
-    return a >>> b;
+  static long $opt$UShrLong(long value, int distance) {
+    return value >>> distance;
   }
 
-  static int $opt$ShlConst2(int a) {
-    return a << 2;
+  static int $opt$ShlIntConst2(int value) {
+    return value << 2;
   }
 
-  static long $opt$ShlConst2(long a) {
-    return a << 2L;
+  static long $opt$ShlLongConst2(long value) {
+    return value << 2;
   }
 
-  static int $opt$ShrConst2(int a) {
-    return a >> 2;
+  static int $opt$ShrIntConst2(int value) {
+    return value >> 2;
   }
 
-  static long $opt$ShrConst2(long a) {
-    return a >> 2L;
+  static long $opt$ShrLongConst2(long value) {
+    return value >> 2;
   }
 
-  static int $opt$UShrConst2(int a) {
-    return a >>> 2;
+  static int $opt$UShrIntConst2(int value) {
+    return value >>> 2;
   }
 
-  static long $opt$UShrConst2(long a) {
-    return a >>> 2L;
+  static long $opt$UShrLongConst2(long value) {
+    return value >>> 2;
   }
 
-  static int $opt$ShlConst0(int a) {
-    return a << 0;
+  static int $opt$ShlIntConst0(int value) {
+    return value << 0;
   }
 
-  static long $opt$ShlConst0(long a) {
-    return a << 0L;
+  static long $opt$ShlLongConst0(long value) {
+    return value << 0;
   }
 
-  static int $opt$ShrConst0(int a) {
-    return a >> 0;
+  static int $opt$ShrIntConst0(int value) {
+    return value >> 0;
   }
 
-  static long $opt$ShrConst0(long a) {
-    return a >> 0L;
+  static long $opt$ShrLongConst0(long value) {
+    return value >> 0;
   }
 
-  static int $opt$UShrConst0(int a) {
-    return a >>> 0;
+  static int $opt$UShrIntConst0(int value) {
+    return value >>> 0;
   }
 
-  static long $opt$UShrConst0(long a) {
-    return a >>> 0L;
+  static long $opt$UShrLongConst0(long value) {
+    return value >>> 0;
   }
 
-  static long $opt$ShlConst1(long a) {
-    return a << 1L;
+  static long $opt$ShlLongConst1(long value) {
+    return value << 1;
   }
 
-  static long $opt$ShlConst32(long a) {
-    return a << 32L;
+  static long $opt$ShlLongConst32(long value) {
+    return value << 32;
   }
 
-  static long $opt$ShlConst33(long a) {
-    return a << 33L;
+  static long $opt$ShlLongConst33(long value) {
+    return value << 33;
   }
 
-  static long $opt$ShlConst34(long a) {
-    return a << 34L;
+  static long $opt$ShlLongConst34(long value) {
+    return value << 34;
   }
 
-  static long $opt$ShlConst35(long a) {
-    return a << 35L;
+  static long $opt$ShlLongConst35(long value) {
+    return value << 35;
   }
 
 }
-
diff --git a/test/432-optimizing-cmp/src/Main.java b/test/432-optimizing-cmp/src/Main.java
index 3c7b13f..359eacd 100644
--- a/test/432-optimizing-cmp/src/Main.java
+++ b/test/432-optimizing-cmp/src/Main.java
@@ -41,6 +41,10 @@
   }
 
   private static void cmpFloat() throws Exception {
+    expectEq(0F, 0F);
+    expectEq(-0F, 0F);
+    expectEq(0F, -0F);
+    expectEq(-0F, -0F);
     expectLt(3.1F, 5.1F);
     expectGt(5.1F, 3.1F);
     expectLt(Float.MIN_VALUE, Float.MAX_VALUE);
@@ -49,6 +53,9 @@
     expectFalse(Float.NaN, 3.1F);
 
     expectEquals(0, smaliCmpGtFloat(0F, 0F));
+    expectEquals(0, smaliCmpGtFloat(-0F, 0F));
+    expectEquals(0, smaliCmpGtFloat(0F, -0F));
+    expectEquals(0, smaliCmpGtFloat(-0F, -0F));
     expectEquals(0, smaliCmpGtFloat(1F, 1F));
     expectEquals(-1, smaliCmpGtFloat(1.1F, 2.1F));
     expectEquals(1, smaliCmpGtFloat(2.1F, 1.1F));
@@ -60,6 +67,9 @@
     expectEquals(1, smaliCmpGtFloat(Float.NaN, 5F));
 
     expectEquals(0, smaliCmpLtFloat(0F, 0F));
+    expectEquals(0, smaliCmpLtFloat(-0F, 0F));
+    expectEquals(0, smaliCmpLtFloat(0F, -0F));
+    expectEquals(0, smaliCmpLtFloat(-0F, -0F));
     expectEquals(0, smaliCmpLtFloat(1F, 1F));
     expectEquals(-1, smaliCmpLtFloat(1.1F, 2.1F));
     expectEquals(1, smaliCmpLtFloat(2.1F, 1.1F));
@@ -72,6 +82,10 @@
   }
 
   private static void cmpDouble() throws Exception {
+    expectEq(0D, 0D);
+    expectEq(-0D, 0D);
+    expectEq(0D, -0D);
+    expectEq(-0D, -0D);
     expectLt(3.1D, 5.1D);
     expectGt(5.1D, 3.1D);
     expectLt(Double.MIN_VALUE, Double.MAX_VALUE);
@@ -80,6 +94,9 @@
     expectFalse(Double.NaN, 3.1D);
 
     expectEquals(0, smaliCmpGtDouble(0D, 0D));
+    expectEquals(0, smaliCmpGtDouble(-0D, 0D));
+    expectEquals(0, smaliCmpGtDouble(0D, -0D));
+    expectEquals(0, smaliCmpGtDouble(-0D, -0D));
     expectEquals(0, smaliCmpGtDouble(1D, 1D));
     expectEquals(-1, smaliCmpGtDouble(1.1D, 2.1D));
     expectEquals(1, smaliCmpGtDouble(2.1D, 1.1D));
@@ -91,6 +108,9 @@
     expectEquals(1, smaliCmpGtDouble(Double.NaN, 5D));
 
     expectEquals(0, smaliCmpLtDouble(0D, 0D));
+    expectEquals(0, smaliCmpLtDouble(-0D, 0D));
+    expectEquals(0, smaliCmpLtDouble(0D, -0D));
+    expectEquals(0, smaliCmpLtDouble(-0D, -0D));
     expectEquals(0, smaliCmpLtDouble(1D, 1D));
     expectEquals(-1, smaliCmpLtDouble(1.1D, 2.1D));
     expectEquals(1, smaliCmpLtDouble(2.1D, 1.1D));
@@ -102,7 +122,15 @@
     expectEquals(-1, smaliCmpLtDouble(Float.NaN, 5D));
   }
 
- static boolean $opt$lt(long a, long b) {
+  static boolean $opt$eq(float a, float b) {
+    return a == b;
+  }
+
+  static boolean $opt$eq(double a, double b) {
+    return a == b;
+  }
+
+  static boolean $opt$lt(long a, long b) {
     return a < b;
   }
 
@@ -181,6 +209,12 @@
     }
   }
 
+  public static void expectEq(float a, float b) {
+    if (!$opt$eq(a, b)) {
+      throw new Error("Expected: " + a + " == " + b);
+    }
+  }
+
   public static void expectLt(float a, float b) {
     if (!$opt$lt(a, b)) {
       throw new Error("Expected: " + a + " < " + b);
@@ -202,6 +236,12 @@
     }
   }
 
+  public static void expectEq(double a, double b) {
+    if (!$opt$eq(a, b)) {
+      throw new Error("Expected: " + a + " == " + b);
+    }
+  }
+
   public static void expectLt(double a, double b) {
     if (!$opt$lt(a, b)) {
       throw new Error("Expected: " + a + " < " + b);
@@ -224,4 +264,3 @@
   }
 
 }
-
diff --git a/test/441-checker-inliner/src/Main.java b/test/441-checker-inliner/src/Main.java
index 96302fb..6d6a4f2 100644
--- a/test/441-checker-inliner/src/Main.java
+++ b/test/441-checker-inliner/src/Main.java
@@ -19,7 +19,7 @@
   /// CHECK-START: void Main.InlineVoid() inliner (before)
   /// CHECK-DAG:     <<Const42:i\d+>> IntConstant 42
   /// CHECK-DAG:                      InvokeStaticOrDirect
-  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Const42>>,{{[ij]\d+}}]
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Const42>>{{(,[ij]\d+)?}}]
 
   /// CHECK-START: void Main.InlineVoid() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
@@ -31,7 +31,7 @@
 
   /// CHECK-START: int Main.InlineParameter(int) inliner (before)
   /// CHECK-DAG:     <<Param:i\d+>>  ParameterValue
-  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect [<<Param>>,{{[ij]\d+}}]
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect [<<Param>>{{(,[ij]\d+)?}}]
   /// CHECK-DAG:                     Return [<<Result>>]
 
   /// CHECK-START: int Main.InlineParameter(int) inliner (after)
@@ -44,7 +44,7 @@
 
   /// CHECK-START: long Main.InlineWideParameter(long) inliner (before)
   /// CHECK-DAG:     <<Param:j\d+>>  ParameterValue
-  /// CHECK-DAG:     <<Result:j\d+>> InvokeStaticOrDirect [<<Param>>,{{[ij]\d+}}]
+  /// CHECK-DAG:     <<Result:j\d+>> InvokeStaticOrDirect [<<Param>>{{(,[ij]\d+)?}}]
   /// CHECK-DAG:                     Return [<<Result>>]
 
   /// CHECK-START: long Main.InlineWideParameter(long) inliner (after)
@@ -57,7 +57,7 @@
 
   /// CHECK-START: java.lang.Object Main.InlineReferenceParameter(java.lang.Object) inliner (before)
   /// CHECK-DAG:     <<Param:l\d+>>  ParameterValue
-  /// CHECK-DAG:     <<Result:l\d+>> InvokeStaticOrDirect [<<Param>>,{{[ij]\d+}}]
+  /// CHECK-DAG:     <<Result:l\d+>> InvokeStaticOrDirect [<<Param>>{{(,[ij]\d+)?}}]
   /// CHECK-DAG:                     Return [<<Result>>]
 
   /// CHECK-START: java.lang.Object Main.InlineReferenceParameter(java.lang.Object) inliner (after)
@@ -128,8 +128,8 @@
   /// CHECK-DAG:     <<Const1:i\d+>> IntConstant 1
   /// CHECK-DAG:     <<Const3:i\d+>> IntConstant 3
   /// CHECK-DAG:     <<Const5:i\d+>> IntConstant 5
-  /// CHECK-DAG:     <<Add:i\d+>>    InvokeStaticOrDirect [<<Const1>>,<<Const3>>,{{[ij]\d+}}]
-  /// CHECK-DAG:     <<Sub:i\d+>>    InvokeStaticOrDirect [<<Const5>>,<<Const3>>,{{[ij]\d+}}]
+  /// CHECK-DAG:     <<Add:i\d+>>    InvokeStaticOrDirect [<<Const1>>,<<Const3>>{{(,[ij]\d+)?}}]
+  /// CHECK-DAG:     <<Sub:i\d+>>    InvokeStaticOrDirect [<<Const5>>,<<Const3>>{{(,[ij]\d+)?}}]
   /// CHECK-DAG:     <<Phi:i\d+>>    Phi [<<Add>>,<<Sub>>]
   /// CHECK-DAG:                     Return [<<Phi>>]
 
diff --git a/test/442-checker-constant-folding/smali/TestCmp.smali b/test/442-checker-constant-folding/smali/TestCmp.smali
new file mode 100644
index 0000000..df631bc
--- /dev/null
+++ b/test/442-checker-constant-folding/smali/TestCmp.smali
@@ -0,0 +1,332 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCmp;
+
+.super Ljava/lang/Object;
+
+
+## CHECK-START: int TestCmp.$opt$CmpLongConstants() constant_folding (before)
+## CHECK-DAG:     <<Const13:j\d+>>  LongConstant 13
+## CHECK-DAG:     <<Const7:j\d+>>   LongConstant 7
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const13>>,<<Const7>>]
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLongConstants() constant_folding (after)
+## CHECK-DAG:                       LongConstant 13
+## CHECK-DAG:                       LongConstant 7
+## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+## CHECK-DAG:                       Return [<<Const1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLongConstants() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLongConstants()I
+   .registers 5
+   const-wide v1, 13
+   const-wide v3, 7
+   cmp-long v0, v1, v3
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstants() constant_folding (before)
+## CHECK-DAG:     <<Const11:f\d+>>  FloatConstant 11
+## CHECK-DAG:     <<Const22:f\d+>>  FloatConstant 22
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const11>>,<<Const22>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstants() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 11
+## CHECK-DAG:                       FloatConstant 22
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstants() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtFloatConstants()I
+   .registers 3
+   const v1, 11.f
+   const v2, 22.f
+   cmpg-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstants() constant_folding (before)
+## CHECK-DAG:     <<Const33:f\d+>>  FloatConstant 33
+## CHECK-DAG:     <<Const44:f\d+>>  FloatConstant 44
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const33>>,<<Const44>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstants() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 33
+## CHECK-DAG:                       FloatConstant 44
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstants() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtFloatConstants()I
+   .registers 3
+   const v1, 33.f
+   const v2, 44.f
+   cmpl-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstants() constant_folding (before)
+## CHECK-DAG:     <<Const55:d\d+>>  DoubleConstant 55
+## CHECK-DAG:     <<Const66:d\d+>>  DoubleConstant 66
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const55>>,<<Const66>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstants() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 55
+## CHECK-DAG:                       DoubleConstant 66
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstants() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtDoubleConstants()I
+   .registers 5
+   const-wide v1, 55.
+   const-wide v3, 66.
+   cmpg-double v0, v1, v3
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstants() constant_folding (before)
+## CHECK-DAG:     <<Const77:d\d+>>  DoubleConstant 77
+## CHECK-DAG:     <<Const88:d\d+>>  DoubleConstant 88
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const77>>,<<Const88>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstants() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 77
+## CHECK-DAG:                       DoubleConstant 88
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstants() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtDoubleConstants()I
+   .registers 5
+   const-wide v1, 77.
+   const-wide v3, 88.
+   cmpl-double v0, v1, v3
+   return v0
+.end method
+
+
+## CHECK-START: int TestCmp.$opt$CmpLongSameConstant() constant_folding (before)
+## CHECK-DAG:     <<Const100:j\d+>> LongConstant 100
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const100>>,<<Const100>>]
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLongSameConstant() constant_folding (after)
+## CHECK-DAG:                       LongConstant 100
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:                       Return [<<Const0>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLongSameConstant() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLongSameConstant()I
+   .registers 5
+   const-wide v1, 100
+   const-wide v3, 100
+   cmp-long v0, v1, v3
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatSameConstant() constant_folding (before)
+## CHECK-DAG:     <<Const200:f\d+>> FloatConstant 200
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const200>>,<<Const200>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatSameConstant() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 200
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:                       Return [<<Const0>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatSameConstant() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtFloatSameConstant()I
+   .registers 3
+   const v1, 200.f
+   const v2, 200.f
+   cmpg-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatSameConstant() constant_folding (before)
+## CHECK-DAG:     <<Const300:f\d+>> FloatConstant 300
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const300>>,<<Const300>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatSameConstant() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 300
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:                       Return [<<Const0>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatSameConstant() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtFloatSameConstant()I
+   .registers 3
+   const v1, 300.f
+   const v2, 300.f
+   cmpl-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleSameConstant() constant_folding (before)
+## CHECK-DAG:     <<Const400:d\d+>> DoubleConstant 400
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const400>>,<<Const400>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleSameConstant() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 400
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:                       Return [<<Const0>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleSameConstant() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtDoubleSameConstant()I
+   .registers 5
+   const-wide v1, 400.
+   const-wide v3, 400.
+   cmpg-double v0, v1, v3
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleSameConstant() constant_folding (before)
+## CHECK-DAG:     <<Const500:d\d+>> DoubleConstant 500
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const500>>,<<Const500>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleSameConstant() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 500
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:                       Return [<<Const0>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleSameConstant() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtDoubleSameConstant()I
+   .registers 5
+   const-wide v1, 500.
+   const-wide v3, 500.
+   cmpl-double v0, v1, v3
+   return v0
+.end method
+
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstantWithNaN() constant_folding (before)
+## CHECK-DAG:     <<Const44:f\d+>>  FloatConstant 44
+## CHECK-DAG:     <<ConstNan:f\d+>> FloatConstant nan
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const44>>,<<ConstNan>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstantWithNaN() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 44
+## CHECK-DAG:                       FloatConstant nan
+## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+## CHECK-DAG:                       Return [<<Const1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstantWithNaN() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtFloatConstantWithNaN()I
+   .registers 3
+   const v1, 44.f
+   const v2, NaNf
+   cmpg-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstantWithNaN() constant_folding (before)
+## CHECK-DAG:     <<Const44:f\d+>>  FloatConstant 44
+## CHECK-DAG:     <<ConstNan:f\d+>> FloatConstant nan
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const44>>,<<ConstNan>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstantWithNaN() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 44
+## CHECK-DAG:                       FloatConstant nan
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstantWithNaN() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtFloatConstantWithNaN()I
+   .registers 3
+   const v1, 44.f
+   const v2, NaNf
+   cmpl-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstantWithNaN() constant_folding (before)
+## CHECK-DAG:     <<Const45:d\d+>>  DoubleConstant 45
+## CHECK-DAG:     <<ConstNan:d\d+>> DoubleConstant nan
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const45>>,<<ConstNan>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstantWithNaN() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 45
+## CHECK-DAG:                       DoubleConstant nan
+## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+## CHECK-DAG:                       Return [<<Const1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstantWithNaN() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtDoubleConstantWithNaN()I
+   .registers 5
+   const-wide v1, 45.
+   const-wide v3, NaN
+   cmpg-double v0, v1, v3
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstantWithNaN() constant_folding (before)
+## CHECK-DAG:     <<Const46:d\d+>>  DoubleConstant 46
+## CHECK-DAG:     <<ConstNan:d\d+>> DoubleConstant nan
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const46>>,<<ConstNan>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstantWithNaN() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 46
+## CHECK-DAG:                       DoubleConstant nan
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstantWithNaN() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtDoubleConstantWithNaN()I
+   .registers 5
+   const-wide v1, 46.
+   const-wide v3, NaN
+   cmpl-double v0, v1, v3
+   return v0
+.end method
diff --git a/test/442-checker-constant-folding/src/Main.java b/test/442-checker-constant-folding/src/Main.java
index 59e7282..33ef10b 100644
--- a/test/442-checker-constant-folding/src/Main.java
+++ b/test/442-checker-constant-folding/src/Main.java
@@ -14,8 +14,13 @@
  * limitations under the License.
  */
 
+import java.lang.reflect.Method;
+
 public class Main {
 
+  // Workaround for b/18051191.
+  class InnerClass {}
+
   public static void assertFalse(boolean condition) {
     if (condition) {
       throw new Error();
@@ -46,6 +51,83 @@
     }
   }
 
+  private static int $inline$int(int x) {
+    return x;
+  }
+
+  private static long $inline$long(long x) {
+    return x;
+  }
+
+  private static float $inline$float(float x) {
+    return x;
+  }
+
+  private static double $inline$double(double x) {
+    return x;
+  }
+
+  // Wrappers around methods located in file TestCmp.smali.
+
+  public int smaliCmpLongConstants() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLongConstants");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpGtFloatConstants() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtFloatConstants");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtFloatConstants() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtFloatConstants");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpGtDoubleConstants() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtDoubleConstants");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtDoubleConstants() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtDoubleConstants");
+    return (Integer)m.invoke(null);
+  }
+
+  public int smaliCmpLongSameConstant() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLongSameConstant");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpGtFloatSameConstant() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtFloatSameConstant");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtFloatSameConstant() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtFloatSameConstant");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpGtDoubleSameConstant() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtDoubleSameConstant");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtDoubleSameConstant() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtDoubleSameConstant");
+    return (Integer)m.invoke(null);
+  }
+
+  public int smaliCmpGtFloatConstantWithNaN() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtFloatConstantWithNaN");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtFloatConstantWithNaN() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtFloatConstantWithNaN");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpGtDoubleConstantWithNaN() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtDoubleConstantWithNaN");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtDoubleConstantWithNaN() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtDoubleConstantWithNaN");
+    return (Integer)m.invoke(null);
+  }
+
 
   /**
    * Exercise constant folding on negation.
@@ -89,33 +171,70 @@
     return y;
   }
 
+  /// CHECK-START: float Main.FloatNegation() constant_folding (before)
+  /// CHECK-DAG:     <<Const42:f\d+>>  FloatConstant 42
+  /// CHECK-DAG:     <<Neg:f\d+>>      Neg [<<Const42>>]
+  /// CHECK-DAG:                       Return [<<Neg>>]
+
+  /// CHECK-START: float Main.FloatNegation() constant_folding (after)
+  /// CHECK-DAG:     <<ConstN42:f\d+>> FloatConstant -42
+  /// CHECK-DAG:                       Return [<<ConstN42>>]
+
+  /// CHECK-START: float Main.FloatNegation() constant_folding (after)
+  /// CHECK-NOT:                       Neg
+
+  public static float FloatNegation() {
+    float x, y;
+    x = 42F;
+    y = -x;
+    return y;
+  }
+
+  /// CHECK-START: double Main.DoubleNegation() constant_folding (before)
+  /// CHECK-DAG:     <<Const42:d\d+>>  DoubleConstant 42
+  /// CHECK-DAG:     <<Neg:d\d+>>      Neg [<<Const42>>]
+  /// CHECK-DAG:                       Return [<<Neg>>]
+
+  /// CHECK-START: double Main.DoubleNegation() constant_folding (after)
+  /// CHECK-DAG:     <<ConstN42:d\d+>> DoubleConstant -42
+  /// CHECK-DAG:                       Return [<<ConstN42>>]
+
+  /// CHECK-START: double Main.DoubleNegation() constant_folding (after)
+  /// CHECK-NOT:                       Neg
+
+  public static double DoubleNegation() {
+    double x, y;
+    x = 42D;
+    y = -x;
+    return y;
+  }
 
   /**
    * Exercise constant folding on addition.
    */
 
-  /// CHECK-START: int Main.IntAddition1() constant_folding (before)
+  /// CHECK-START: int Main.IntAddition1() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const1:i\d+>>  IntConstant 1
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Add:i\d+>>     Add [<<Const1>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: int Main.IntAddition1() constant_folding (after)
+  /// CHECK-START: int Main.IntAddition1() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const3:i\d+>>  IntConstant 3
   /// CHECK-DAG:                      Return [<<Const3>>]
 
-  /// CHECK-START: int Main.IntAddition1() constant_folding (after)
+  /// CHECK-START: int Main.IntAddition1() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Add
 
   public static int IntAddition1() {
     int a, b, c;
-    a = 1;
-    b = 2;
+    a = $inline$int(1);
+    b = $inline$int(2);
     c = a + b;
     return c;
   }
 
-  /// CHECK-START: int Main.IntAddition2() constant_folding (before)
+  /// CHECK-START: int Main.IntAddition2() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const1:i\d+>>  IntConstant 1
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Const5:i\d+>>  IntConstant 5
@@ -125,42 +244,84 @@
   /// CHECK-DAG:     <<Add3:i\d+>>    Add [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                      Return [<<Add3>>]
 
-  /// CHECK-START: int Main.IntAddition2() constant_folding (after)
+  /// CHECK-START: int Main.IntAddition2() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const14:i\d+>> IntConstant 14
   /// CHECK-DAG:                      Return [<<Const14>>]
 
-  /// CHECK-START: int Main.IntAddition2() constant_folding (after)
+  /// CHECK-START: int Main.IntAddition2() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Add
 
   public static int IntAddition2() {
     int a, b, c;
-    a = 1;
-    b = 2;
+    a = $inline$int(1);
+    b = $inline$int(2);
     a += b;
-    b = 5;
-    c = 6;
+    b = $inline$int(5);
+    c = $inline$int(6);
     b += c;
     c = a + b;
     return c;
   }
 
-  /// CHECK-START: long Main.LongAddition() constant_folding (before)
+  /// CHECK-START: long Main.LongAddition() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
   /// CHECK-DAG:     <<Const2:j\d+>>  LongConstant 2
   /// CHECK-DAG:     <<Add:j\d+>>     Add [<<Const1>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: long Main.LongAddition() constant_folding (after)
+  /// CHECK-START: long Main.LongAddition() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const3:j\d+>>  LongConstant 3
   /// CHECK-DAG:                      Return [<<Const3>>]
 
-  /// CHECK-START: long Main.LongAddition() constant_folding (after)
+  /// CHECK-START: long Main.LongAddition() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Add
 
   public static long LongAddition() {
     long a, b, c;
-    a = 1L;
-    b = 2L;
+    a = $inline$long(1L);
+    b = $inline$long(2L);
+    c = a + b;
+    return c;
+  }
+
+  /// CHECK-START: float Main.FloatAddition() constant_folding$after_inlining (before)
+  /// CHECK-DAG:     <<Const1:f\d+>>  FloatConstant 1
+  /// CHECK-DAG:     <<Const2:f\d+>>  FloatConstant 2
+  /// CHECK-DAG:     <<Add:f\d+>>     Add [<<Const1>>,<<Const2>>]
+  /// CHECK-DAG:                      Return [<<Add>>]
+
+  /// CHECK-START: float Main.FloatAddition() constant_folding$after_inlining (after)
+  /// CHECK-DAG:     <<Const3:f\d+>>  FloatConstant 3
+  /// CHECK-DAG:                      Return [<<Const3>>]
+
+  /// CHECK-START: float Main.FloatAddition() constant_folding$after_inlining (after)
+  /// CHECK-NOT:                      Add
+
+  public static float FloatAddition() {
+    float a, b, c;
+    a = $inline$float(1F);
+    b = $inline$float(2F);
+    c = a + b;
+    return c;
+  }
+
+  /// CHECK-START: double Main.DoubleAddition() constant_folding$after_inlining (before)
+  /// CHECK-DAG:     <<Const1:d\d+>>  DoubleConstant 1
+  /// CHECK-DAG:     <<Const2:d\d+>>  DoubleConstant 2
+  /// CHECK-DAG:     <<Add:d\d+>>     Add [<<Const1>>,<<Const2>>]
+  /// CHECK-DAG:                      Return [<<Add>>]
+
+  /// CHECK-START: double Main.DoubleAddition() constant_folding$after_inlining (after)
+  /// CHECK-DAG:     <<Const3:d\d+>>  DoubleConstant 3
+  /// CHECK-DAG:                      Return [<<Const3>>]
+
+  /// CHECK-START: double Main.DoubleAddition() constant_folding$after_inlining (after)
+  /// CHECK-NOT:                      Add
+
+  public static double DoubleAddition() {
+    double a, b, c;
+    a = $inline$double(1D);
+    b = $inline$double(2D);
     c = a + b;
     return c;
   }
@@ -170,44 +331,86 @@
    * Exercise constant folding on subtraction.
    */
 
-  /// CHECK-START: int Main.IntSubtraction() constant_folding (before)
+  /// CHECK-START: int Main.IntSubtraction() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const6:i\d+>>  IntConstant 6
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Sub:i\d+>>     Sub [<<Const6>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Sub>>]
 
-  /// CHECK-START: int Main.IntSubtraction() constant_folding (after)
+  /// CHECK-START: int Main.IntSubtraction() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const4:i\d+>>  IntConstant 4
   /// CHECK-DAG:                      Return [<<Const4>>]
 
-  /// CHECK-START: int Main.IntSubtraction() constant_folding (after)
+  /// CHECK-START: int Main.IntSubtraction() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Sub
 
   public static int IntSubtraction() {
     int a, b, c;
-    a = 6;
-    b = 2;
+    a = $inline$int(6);
+    b = $inline$int(2);
     c = a - b;
     return c;
   }
 
-  /// CHECK-START: long Main.LongSubtraction() constant_folding (before)
+  /// CHECK-START: long Main.LongSubtraction() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const6:j\d+>>  LongConstant 6
   /// CHECK-DAG:     <<Const2:j\d+>>  LongConstant 2
   /// CHECK-DAG:     <<Sub:j\d+>>     Sub [<<Const6>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Sub>>]
 
-  /// CHECK-START: long Main.LongSubtraction() constant_folding (after)
+  /// CHECK-START: long Main.LongSubtraction() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const4:j\d+>>  LongConstant 4
   /// CHECK-DAG:                      Return [<<Const4>>]
 
-  /// CHECK-START: long Main.LongSubtraction() constant_folding (after)
+  /// CHECK-START: long Main.LongSubtraction() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Sub
 
   public static long LongSubtraction() {
     long a, b, c;
-    a = 6L;
-    b = 2L;
+    a = $inline$long(6L);
+    b = $inline$long(2L);
+    c = a - b;
+    return c;
+  }
+
+  /// CHECK-START: float Main.FloatSubtraction() constant_folding$after_inlining (before)
+  /// CHECK-DAG:     <<Const6:f\d+>>  FloatConstant 6
+  /// CHECK-DAG:     <<Const2:f\d+>>  FloatConstant 2
+  /// CHECK-DAG:     <<Sub:f\d+>>     Sub [<<Const6>>,<<Const2>>]
+  /// CHECK-DAG:                      Return [<<Sub>>]
+
+  /// CHECK-START: float Main.FloatSubtraction() constant_folding$after_inlining (after)
+  /// CHECK-DAG:     <<Const4:f\d+>>  FloatConstant 4
+  /// CHECK-DAG:                      Return [<<Const4>>]
+
+  /// CHECK-START: float Main.FloatSubtraction() constant_folding$after_inlining (after)
+  /// CHECK-NOT:                      Sub
+
+  public static float FloatSubtraction() {
+    float a, b, c;
+    a = $inline$float(6F);
+    b = $inline$float(2F);
+    c = a - b;
+    return c;
+  }
+
+  /// CHECK-START: double Main.DoubleSubtraction() constant_folding$after_inlining (before)
+  /// CHECK-DAG:     <<Const6:d\d+>>  DoubleConstant 6
+  /// CHECK-DAG:     <<Const2:d\d+>>  DoubleConstant 2
+  /// CHECK-DAG:     <<Sub:d\d+>>     Sub [<<Const6>>,<<Const2>>]
+  /// CHECK-DAG:                      Return [<<Sub>>]
+
+  /// CHECK-START: double Main.DoubleSubtraction() constant_folding$after_inlining (after)
+  /// CHECK-DAG:     <<Const4:d\d+>>  DoubleConstant 4
+  /// CHECK-DAG:                      Return [<<Const4>>]
+
+  /// CHECK-START: double Main.DoubleSubtraction() constant_folding$after_inlining (after)
+  /// CHECK-NOT:                      Sub
+
+  public static double DoubleSubtraction() {
+    double a, b, c;
+    a = $inline$double(6D);
+    b = $inline$double(2D);
     c = a - b;
     return c;
   }
@@ -217,44 +420,86 @@
    * Exercise constant folding on multiplication.
    */
 
-  /// CHECK-START: int Main.IntMultiplication() constant_folding (before)
+  /// CHECK-START: int Main.IntMultiplication() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
   /// CHECK-DAG:     <<Const3:i\d+>>  IntConstant 3
   /// CHECK-DAG:     <<Mul:i\d+>>     Mul [<<Const7>>,<<Const3>>]
   /// CHECK-DAG:                      Return [<<Mul>>]
 
-  /// CHECK-START: int Main.IntMultiplication() constant_folding (after)
+  /// CHECK-START: int Main.IntMultiplication() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const21:i\d+>> IntConstant 21
   /// CHECK-DAG:                      Return [<<Const21>>]
 
-  /// CHECK-START: int Main.IntMultiplication() constant_folding (after)
+  /// CHECK-START: int Main.IntMultiplication() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Mul
 
   public static int IntMultiplication() {
     int a, b, c;
-    a = 7;
-    b = 3;
+    a = $inline$int(7);
+    b = $inline$int(3);
     c = a * b;
     return c;
   }
 
-  /// CHECK-START: long Main.LongMultiplication() constant_folding (before)
+  /// CHECK-START: long Main.LongMultiplication() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const7:j\d+>>  LongConstant 7
   /// CHECK-DAG:     <<Const3:j\d+>>  LongConstant 3
   /// CHECK-DAG:     <<Mul:j\d+>>     Mul [<<Const7>>,<<Const3>>]
   /// CHECK-DAG:                      Return [<<Mul>>]
 
-  /// CHECK-START: long Main.LongMultiplication() constant_folding (after)
+  /// CHECK-START: long Main.LongMultiplication() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const21:j\d+>> LongConstant 21
   /// CHECK-DAG:                      Return [<<Const21>>]
 
-  /// CHECK-START: long Main.LongMultiplication() constant_folding (after)
+  /// CHECK-START: long Main.LongMultiplication() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Mul
 
   public static long LongMultiplication() {
     long a, b, c;
-    a = 7L;
-    b = 3L;
+    a = $inline$long(7L);
+    b = $inline$long(3L);
+    c = a * b;
+    return c;
+  }
+
+  /// CHECK-START: float Main.FloatMultiplication() constant_folding$after_inlining (before)
+  /// CHECK-DAG:     <<Const7:f\d+>>  FloatConstant 7
+  /// CHECK-DAG:     <<Const3:f\d+>>  FloatConstant 3
+  /// CHECK-DAG:     <<Mul:f\d+>>     Mul [<<Const7>>,<<Const3>>]
+  /// CHECK-DAG:                      Return [<<Mul>>]
+
+  /// CHECK-START: float Main.FloatMultiplication() constant_folding$after_inlining (after)
+  /// CHECK-DAG:     <<Const21:f\d+>> FloatConstant 21
+  /// CHECK-DAG:                      Return [<<Const21>>]
+
+  /// CHECK-START: float Main.FloatMultiplication() constant_folding$after_inlining (after)
+  /// CHECK-NOT:                      Mul
+
+  public static float FloatMultiplication() {
+    float a, b, c;
+    a = $inline$float(7F);
+    b = $inline$float(3F);
+    c = a * b;
+    return c;
+  }
+
+  /// CHECK-START: double Main.DoubleMultiplication() constant_folding$after_inlining (before)
+  /// CHECK-DAG:     <<Const7:d\d+>>  DoubleConstant 7
+  /// CHECK-DAG:     <<Const3:d\d+>>  DoubleConstant 3
+  /// CHECK-DAG:     <<Mul:d\d+>>     Mul [<<Const7>>,<<Const3>>]
+  /// CHECK-DAG:                      Return [<<Mul>>]
+
+  /// CHECK-START: double Main.DoubleMultiplication() constant_folding$after_inlining (after)
+  /// CHECK-DAG:     <<Const21:d\d+>> DoubleConstant 21
+  /// CHECK-DAG:                      Return [<<Const21>>]
+
+  /// CHECK-START: double Main.DoubleMultiplication() constant_folding$after_inlining (after)
+  /// CHECK-NOT:                      Mul
+
+  public static double DoubleMultiplication() {
+    double a, b, c;
+    a = $inline$double(7D);
+    b = $inline$double(3D);
     c = a * b;
     return c;
   }
@@ -264,48 +509,90 @@
    * Exercise constant folding on division.
    */
 
-  /// CHECK-START: int Main.IntDivision() constant_folding (before)
+  /// CHECK-START: int Main.IntDivision() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:i\d+>>   IntConstant 8
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<Div0Chk:i\d+>>  DivZeroCheck [<<Const3>>]
   /// CHECK-DAG:     <<Div:i\d+>>      Div [<<Const8>>,<<Div0Chk>>]
   /// CHECK-DAG:                       Return [<<Div>>]
 
-  /// CHECK-START: int Main.IntDivision() constant_folding (after)
+  /// CHECK-START: int Main.IntDivision() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: int Main.IntDivision() constant_folding (after)
+  /// CHECK-START: int Main.IntDivision() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       DivZeroCheck
   /// CHECK-NOT:                       Div
 
   public static int IntDivision() {
     int a, b, c;
-    a = 8;
-    b = 3;
+    a = $inline$int(8);
+    b = $inline$int(3);
     c = a / b;
     return c;
   }
 
-  /// CHECK-START: long Main.LongDivision() constant_folding (before)
+  /// CHECK-START: long Main.LongDivision() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:j\d+>>   LongConstant 8
   /// CHECK-DAG:     <<Const3:j\d+>>   LongConstant 3
   /// CHECK-DAG:     <<Div0Chk:j\d+>>  DivZeroCheck [<<Const3>>]
   /// CHECK-DAG:     <<Div:j\d+>>      Div [<<Const8>>,<<Div0Chk>>]
   /// CHECK-DAG:                       Return [<<Div>>]
 
-  /// CHECK-START: long Main.LongDivision() constant_folding (after)
+  /// CHECK-START: long Main.LongDivision() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:j\d+>>   LongConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: long Main.LongDivision() constant_folding (after)
+  /// CHECK-START: long Main.LongDivision() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       DivZeroCheck
   /// CHECK-NOT:                       Div
 
   public static long LongDivision() {
     long a, b, c;
-    a = 8L;
-    b = 3L;
+    a = $inline$long(8L);
+    b = $inline$long(3L);
+    c = a / b;
+    return c;
+  }
+
+  /// CHECK-START: float Main.FloatDivision() constant_folding$after_inlining (before)
+  /// CHECK-DAG:     <<Const8:f\d+>>   FloatConstant 8
+  /// CHECK-DAG:     <<Const2P5:f\d+>> FloatConstant 2.5
+  /// CHECK-DAG:     <<Div:f\d+>>      Div [<<Const8>>,<<Const2P5>>]
+  /// CHECK-DAG:                       Return [<<Div>>]
+
+  /// CHECK-START: float Main.FloatDivision() constant_folding$after_inlining (after)
+  /// CHECK-DAG:     <<Const3P2:f\d+>> FloatConstant 3.2
+  /// CHECK-DAG:                       Return [<<Const3P2>>]
+
+  /// CHECK-START: float Main.FloatDivision() constant_folding$after_inlining (after)
+  /// CHECK-NOT:                       Div
+
+  public static float FloatDivision() {
+    float a, b, c;
+    a = $inline$float(8F);
+    b = $inline$float(2.5F);
+    c = a / b;
+    return c;
+  }
+
+  /// CHECK-START: double Main.DoubleDivision() constant_folding$after_inlining (before)
+  /// CHECK-DAG:     <<Const8:d\d+>>   DoubleConstant 8
+  /// CHECK-DAG:     <<Const2P5:d\d+>> DoubleConstant 2.5
+  /// CHECK-DAG:     <<Div:d\d+>>      Div [<<Const8>>,<<Const2P5>>]
+  /// CHECK-DAG:                       Return [<<Div>>]
+
+  /// CHECK-START: double Main.DoubleDivision() constant_folding$after_inlining (after)
+  /// CHECK-DAG:     <<Const3P2:d\d+>> DoubleConstant 3.2
+  /// CHECK-DAG:                       Return [<<Const3P2>>]
+
+  /// CHECK-START: double Main.DoubleDivision() constant_folding$after_inlining (after)
+  /// CHECK-NOT:                       Div
+
+  public static double DoubleDivision() {
+    double a, b, c;
+    a = $inline$double(8D);
+    b = $inline$double(2.5D);
     c = a / b;
     return c;
   }
@@ -315,48 +602,90 @@
    * Exercise constant folding on remainder.
    */
 
-  /// CHECK-START: int Main.IntRemainder() constant_folding (before)
+  /// CHECK-START: int Main.IntRemainder() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:i\d+>>   IntConstant 8
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<Div0Chk:i\d+>>  DivZeroCheck [<<Const3>>]
   /// CHECK-DAG:     <<Rem:i\d+>>      Rem [<<Const8>>,<<Div0Chk>>]
   /// CHECK-DAG:                       Return [<<Rem>>]
 
-  /// CHECK-START: int Main.IntRemainder() constant_folding (after)
+  /// CHECK-START: int Main.IntRemainder() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: int Main.IntRemainder() constant_folding (after)
+  /// CHECK-START: int Main.IntRemainder() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       DivZeroCheck
   /// CHECK-NOT:                       Rem
 
   public static int IntRemainder() {
     int a, b, c;
-    a = 8;
-    b = 3;
+    a = $inline$int(8);
+    b = $inline$int(3);
     c = a % b;
     return c;
   }
 
-  /// CHECK-START: long Main.LongRemainder() constant_folding (before)
+  /// CHECK-START: long Main.LongRemainder() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:j\d+>>   LongConstant 8
   /// CHECK-DAG:     <<Const3:j\d+>>   LongConstant 3
   /// CHECK-DAG:     <<Div0Chk:j\d+>>  DivZeroCheck [<<Const3>>]
   /// CHECK-DAG:     <<Rem:j\d+>>      Rem [<<Const8>>,<<Div0Chk>>]
   /// CHECK-DAG:                       Return [<<Rem>>]
 
-  /// CHECK-START: long Main.LongRemainder() constant_folding (after)
+  /// CHECK-START: long Main.LongRemainder() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:j\d+>>   LongConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: long Main.LongRemainder() constant_folding (after)
+  /// CHECK-START: long Main.LongRemainder() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       DivZeroCheck
   /// CHECK-NOT:                       Rem
 
   public static long LongRemainder() {
     long a, b, c;
-    a = 8L;
-    b = 3L;
+    a = $inline$long(8L);
+    b = $inline$long(3L);
+    c = a % b;
+    return c;
+  }
+
+  /// CHECK-START: float Main.FloatRemainder() constant_folding$after_inlining (before)
+  /// CHECK-DAG:     <<Const8:f\d+>>   FloatConstant 8
+  /// CHECK-DAG:     <<Const2P5:f\d+>> FloatConstant 2.5
+  /// CHECK-DAG:     <<Rem:f\d+>>      Rem [<<Const8>>,<<Const2P5>>]
+  /// CHECK-DAG:                       Return [<<Rem>>]
+
+  /// CHECK-START: float Main.FloatRemainder() constant_folding$after_inlining (after)
+  /// CHECK-DAG:     <<Const0P5:f\d+>> FloatConstant 0.5
+  /// CHECK-DAG:                       Return [<<Const0P5>>]
+
+  /// CHECK-START: float Main.FloatRemainder() constant_folding$after_inlining (after)
+  /// CHECK-NOT:                       Rem
+
+  public static float FloatRemainder() {
+    float a, b, c;
+    a = $inline$float(8F);
+    b = $inline$float(2.5F);
+    c = a % b;
+    return c;
+  }
+
+  /// CHECK-START: double Main.DoubleRemainder() constant_folding$after_inlining (before)
+  /// CHECK-DAG:     <<Const8:d\d+>>   DoubleConstant 8
+  /// CHECK-DAG:     <<Const2P5:d\d+>> DoubleConstant 2.5
+  /// CHECK-DAG:     <<Rem:d\d+>>      Rem [<<Const8>>,<<Const2P5>>]
+  /// CHECK-DAG:                       Return [<<Rem>>]
+
+  /// CHECK-START: double Main.DoubleRemainder() constant_folding$after_inlining (after)
+  /// CHECK-DAG:     <<Const0P5:d\d+>> DoubleConstant 0.5
+  /// CHECK-DAG:                       Return [<<Const0P5>>]
+
+  /// CHECK-START: double Main.DoubleRemainder() constant_folding$after_inlining (after)
+  /// CHECK-NOT:                       Rem
+
+  public static double DoubleRemainder() {
+    double a, b, c;
+    a = $inline$double(8D);
+    b = $inline$double(2.5D);
     c = a % b;
     return c;
   }
@@ -366,42 +695,42 @@
    * Exercise constant folding on left shift.
    */
 
-  /// CHECK-START: int Main.ShlIntLong() constant_folding (before)
+  /// CHECK-START: int Main.ShlIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:     <<Const2L:j\d+>>  LongConstant 2
   /// CHECK-DAG:     <<TypeConv:i\d+>> TypeConversion [<<Const2L>>]
   /// CHECK-DAG:     <<Shl:i\d+>>      Shl [<<Const1>>,<<TypeConv>>]
   /// CHECK-DAG:                       Return [<<Shl>>]
 
-  /// CHECK-START: int Main.ShlIntLong() constant_folding (after)
+  /// CHECK-START: int Main.ShlIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const4:i\d+>>   IntConstant 4
   /// CHECK-DAG:                       Return [<<Const4>>]
 
-  /// CHECK-START: int Main.ShlIntLong() constant_folding (after)
+  /// CHECK-START: int Main.ShlIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Shl
 
   public static int ShlIntLong() {
-    int lhs = 1;
-    long rhs = 2;
+    int lhs = $inline$int(1);
+    long rhs = $inline$long(2L);
     return lhs << rhs;
   }
 
-  /// CHECK-START: long Main.ShlLongInt() constant_folding (before)
+  /// CHECK-START: long Main.ShlLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const3L:j\d+>>  LongConstant 3
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
   /// CHECK-DAG:     <<Shl:j\d+>>      Shl [<<Const3L>>,<<Const2>>]
   /// CHECK-DAG:                       Return [<<Shl>>]
 
-  /// CHECK-START: long Main.ShlLongInt() constant_folding (after)
+  /// CHECK-START: long Main.ShlLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const12L:j\d+>> LongConstant 12
   /// CHECK-DAG:                       Return [<<Const12L>>]
 
-  /// CHECK-START: long Main.ShlLongInt() constant_folding (after)
+  /// CHECK-START: long Main.ShlLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Shl
 
   public static long ShlLongInt() {
-    long lhs = 3;
-    int rhs = 2;
+    long lhs = $inline$long(3L);
+    int rhs = $inline$int(2);
     return lhs << rhs;
   }
 
@@ -410,42 +739,42 @@
    * Exercise constant folding on right shift.
    */
 
-  /// CHECK-START: int Main.ShrIntLong() constant_folding (before)
+  /// CHECK-START: int Main.ShrIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const7:i\d+>>   IntConstant 7
   /// CHECK-DAG:     <<Const2L:j\d+>>  LongConstant 2
   /// CHECK-DAG:     <<TypeConv:i\d+>> TypeConversion [<<Const2L>>]
   /// CHECK-DAG:     <<Shr:i\d+>>      Shr [<<Const7>>,<<TypeConv>>]
   /// CHECK-DAG:                       Return [<<Shr>>]
 
-  /// CHECK-START: int Main.ShrIntLong() constant_folding (after)
+  /// CHECK-START: int Main.ShrIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:                       Return [<<Const1>>]
 
-  /// CHECK-START: int Main.ShrIntLong() constant_folding (after)
+  /// CHECK-START: int Main.ShrIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Shr
 
   public static int ShrIntLong() {
-    int lhs = 7;
-    long rhs = 2;
+    int lhs = $inline$int(7);
+    long rhs = $inline$long(2L);
     return lhs >> rhs;
   }
 
-  /// CHECK-START: long Main.ShrLongInt() constant_folding (before)
+  /// CHECK-START: long Main.ShrLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const9L:j\d+>>  LongConstant 9
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
   /// CHECK-DAG:     <<Shr:j\d+>>      Shr [<<Const9L>>,<<Const2>>]
   /// CHECK-DAG:                       Return [<<Shr>>]
 
-  /// CHECK-START: long Main.ShrLongInt() constant_folding (after)
+  /// CHECK-START: long Main.ShrLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2L:j\d+>>  LongConstant 2
   /// CHECK-DAG:                       Return [<<Const2L>>]
 
-  /// CHECK-START: long Main.ShrLongInt() constant_folding (after)
+  /// CHECK-START: long Main.ShrLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Shr
 
   public static long ShrLongInt() {
-    long lhs = 9;
-    int rhs = 2;
+    long lhs = $inline$long(9);
+    int rhs = $inline$int(2);
     return lhs >> rhs;
   }
 
@@ -454,42 +783,42 @@
    * Exercise constant folding on unsigned right shift.
    */
 
-  /// CHECK-START: int Main.UShrIntLong() constant_folding (before)
+  /// CHECK-START: int Main.UShrIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<ConstM7:i\d+>>  IntConstant -7
   /// CHECK-DAG:     <<Const2L:j\d+>>  LongConstant 2
   /// CHECK-DAG:     <<TypeConv:i\d+>> TypeConversion [<<Const2L>>]
   /// CHECK-DAG:     <<UShr:i\d+>>     UShr [<<ConstM7>>,<<TypeConv>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: int Main.UShrIntLong() constant_folding (after)
+  /// CHECK-START: int Main.UShrIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<ConstRes:i\d+>> IntConstant 1073741822
   /// CHECK-DAG:                       Return [<<ConstRes>>]
 
-  /// CHECK-START: int Main.UShrIntLong() constant_folding (after)
+  /// CHECK-START: int Main.UShrIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       UShr
 
   public static int UShrIntLong() {
-    int lhs = -7;
-    long rhs = 2;
+    int lhs = $inline$int(-7);
+    long rhs = $inline$long(2L);
     return lhs >>> rhs;
   }
 
-  /// CHECK-START: long Main.UShrLongInt() constant_folding (before)
+  /// CHECK-START: long Main.UShrLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<ConstM9L:j\d+>> LongConstant -9
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
   /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<ConstM9L>>,<<Const2>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: long Main.UShrLongInt() constant_folding (after)
+  /// CHECK-START: long Main.UShrLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<ConstRes:j\d+>> LongConstant 4611686018427387901
   /// CHECK-DAG:                       Return [<<ConstRes>>]
 
-  /// CHECK-START: long Main.UShrLongInt() constant_folding (after)
+  /// CHECK-START: long Main.UShrLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       UShr
 
   public static long UShrLongInt() {
-    long lhs = -9;
-    int rhs = 2;
+    long lhs = $inline$long(-9);
+    int rhs = $inline$int(2);
     return lhs >>> rhs;
   }
 
@@ -498,43 +827,43 @@
    * Exercise constant folding on logical and.
    */
 
-  /// CHECK-START: long Main.AndIntLong() constant_folding (before)
+  /// CHECK-START: long Main.AndIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10:i\d+>>  IntConstant 10
   /// CHECK-DAG:     <<Const3L:j\d+>>  LongConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const10>>]
   /// CHECK-DAG:     <<And:j\d+>>      And [<<TypeConv>>,<<Const3L>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.AndIntLong() constant_folding (after)
+  /// CHECK-START: long Main.AndIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:j\d+>>   LongConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: long Main.AndIntLong() constant_folding (after)
+  /// CHECK-START: long Main.AndIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       And
 
   public static long AndIntLong() {
-    int lhs = 10;
-    long rhs = 3;
+    int lhs = $inline$int(10);
+    long rhs = $inline$long(3L);
     return lhs & rhs;
   }
 
-  /// CHECK-START: long Main.AndLongInt() constant_folding (before)
+  /// CHECK-START: long Main.AndLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10L:j\d+>> LongConstant 10
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const3>>]
-  /// CHECK-DAG:     <<And:j\d+>>      And [<<Const10L>>,<<TypeConv>>]
+  /// CHECK-DAG:     <<And:j\d+>>      And [<<TypeConv>>,<<Const10L>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.AndLongInt() constant_folding (after)
+  /// CHECK-START: long Main.AndLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:j\d+>>   LongConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: long Main.AndLongInt() constant_folding (after)
+  /// CHECK-START: long Main.AndLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       And
 
   public static long AndLongInt() {
-    long lhs = 10;
-    int rhs = 3;
+    long lhs = $inline$long(10L);
+    int rhs = $inline$int(3);
     return lhs & rhs;
   }
 
@@ -543,43 +872,43 @@
    * Exercise constant folding on logical or.
    */
 
-  /// CHECK-START: long Main.OrIntLong() constant_folding (before)
+  /// CHECK-START: long Main.OrIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10:i\d+>>  IntConstant 10
   /// CHECK-DAG:     <<Const3L:j\d+>>  LongConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const10>>]
   /// CHECK-DAG:     <<Or:j\d+>>       Or [<<TypeConv>>,<<Const3L>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: long Main.OrIntLong() constant_folding (after)
+  /// CHECK-START: long Main.OrIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const11:j\d+>>  LongConstant 11
   /// CHECK-DAG:                       Return [<<Const11>>]
 
-  /// CHECK-START: long Main.OrIntLong() constant_folding (after)
+  /// CHECK-START: long Main.OrIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Or
 
   public static long OrIntLong() {
-    int lhs = 10;
-    long rhs = 3;
+    int lhs = $inline$int(10);
+    long rhs = $inline$long(3L);
     return lhs | rhs;
   }
 
-  /// CHECK-START: long Main.OrLongInt() constant_folding (before)
+  /// CHECK-START: long Main.OrLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10L:j\d+>> LongConstant 10
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const3>>]
-  /// CHECK-DAG:     <<Or:j\d+>>       Or [<<Const10L>>,<<TypeConv>>]
+  /// CHECK-DAG:     <<Or:j\d+>>       Or [<<TypeConv>>,<<Const10L>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: long Main.OrLongInt() constant_folding (after)
+  /// CHECK-START: long Main.OrLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const11:j\d+>>  LongConstant 11
   /// CHECK-DAG:                       Return [<<Const11>>]
 
-  /// CHECK-START: long Main.OrLongInt() constant_folding (after)
+  /// CHECK-START: long Main.OrLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Or
 
   public static long OrLongInt() {
-    long lhs = 10;
-    int rhs = 3;
+    long lhs = $inline$long(10L);
+    int rhs = $inline$int(3);
     return lhs | rhs;
   }
 
@@ -588,43 +917,43 @@
    * Exercise constant folding on logical exclusive or.
    */
 
-  /// CHECK-START: long Main.XorIntLong() constant_folding (before)
+  /// CHECK-START: long Main.XorIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10:i\d+>>  IntConstant 10
   /// CHECK-DAG:     <<Const3L:j\d+>>  LongConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const10>>]
   /// CHECK-DAG:     <<Xor:j\d+>>      Xor [<<TypeConv>>,<<Const3L>>]
   /// CHECK-DAG:                       Return [<<Xor>>]
 
-  /// CHECK-START: long Main.XorIntLong() constant_folding (after)
+  /// CHECK-START: long Main.XorIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const9:j\d+>>   LongConstant 9
   /// CHECK-DAG:                       Return [<<Const9>>]
 
-  /// CHECK-START: long Main.XorIntLong() constant_folding (after)
+  /// CHECK-START: long Main.XorIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Xor
 
   public static long XorIntLong() {
-    int lhs = 10;
-    long rhs = 3;
+    int lhs = $inline$int(10);
+    long rhs = $inline$long(3L);
     return lhs ^ rhs;
   }
 
-  /// CHECK-START: long Main.XorLongInt() constant_folding (before)
+  /// CHECK-START: long Main.XorLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10L:j\d+>> LongConstant 10
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const3>>]
-  /// CHECK-DAG:     <<Xor:j\d+>>      Xor [<<Const10L>>,<<TypeConv>>]
+  /// CHECK-DAG:     <<Xor:j\d+>>      Xor [<<TypeConv>>,<<Const10L>>]
   /// CHECK-DAG:                       Return [<<Xor>>]
 
-  /// CHECK-START: long Main.XorLongInt() constant_folding (after)
+  /// CHECK-START: long Main.XorLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const9:j\d+>>   LongConstant 9
   /// CHECK-DAG:                       Return [<<Const9>>]
 
-  /// CHECK-START: long Main.XorLongInt() constant_folding (after)
+  /// CHECK-START: long Main.XorLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Xor
 
   public static long XorLongInt() {
-    long lhs = 10;
-    int rhs = 3;
+    long lhs = $inline$long(10L);
+    int rhs = $inline$int(3);
     return lhs ^ rhs;
   }
 
@@ -633,23 +962,23 @@
    * Exercise constant folding on constant (static) condition.
    */
 
-  /// CHECK-START: int Main.StaticCondition() constant_folding (before)
+  /// CHECK-START: int Main.StaticCondition() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Cond:z\d+>>    GreaterThanOrEqual [<<Const7>>,<<Const2>>]
-  /// CHECK-DAG:                      If [<<Cond>>]
+  /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Cond>>]
 
-  /// CHECK-START: int Main.StaticCondition() constant_folding (after)
+  /// CHECK-START: int Main.StaticCondition() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const1:i\d+>>  IntConstant 1
-  /// CHECK-DAG:                      If [<<Const1>>]
+  /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Const1>>]
 
-  /// CHECK-START: int Main.StaticCondition() constant_folding (after)
+  /// CHECK-START: int Main.StaticCondition() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      GreaterThanOrEqual
 
   public static int StaticCondition() {
     int a, b, c;
-    a = 7;
-    b = 2;
+    a = $inline$int(7);
+    b = $inline$int(2);
     if (a < b)
       c = a + b;
     else
@@ -659,6 +988,33 @@
 
 
   /**
+   * Exercise constant folding on constant (static) condition for null references.
+   */
+
+  /// CHECK-START: int Main.StaticConditionNulls() constant_folding$after_inlining (before)
+  /// CHECK-DAG:     <<Null:l\d+>>    NullConstant
+  /// CHECK-DAG:     <<Cond:z\d+>>    NotEqual [<<Null>>,<<Null>>]
+  /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  /// CHECK-START: int Main.StaticConditionNulls() constant_folding$after_inlining (after)
+  /// CHECK-DAG:     <<Const0:i\d+>>  IntConstant 0
+  /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Const0>>]
+
+  /// CHECK-START: int Main.StaticConditionNulls() constant_folding$after_inlining (after)
+  /// CHECK-NOT:                      NotEqual
+
+  private static Object getNull() {
+    return null;
+  }
+
+  public static int StaticConditionNulls() {
+    Object a = getNull();
+    Object b = getNull();
+    return (a == b) ? 5 : 2;
+  }
+
+
+  /**
    * Exercise constant folding on a program with condition
    * (i.e. jumps) leading to the creation of many blocks.
    *
@@ -667,28 +1023,30 @@
    * (forward) post-order traversal of the the dominator tree.
    */
 
-  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding (before)
+  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding$after_inlining (before)
+  /// CHECK-DAG:     <<Cond:z\d+>>    ParameterValue
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Const5:i\d+>>  IntConstant 5
   /// CHECK-DAG:     <<Add:i\d+>>     Add [<<Const5>>,<<Const2>>]
   /// CHECK-DAG:     <<Sub:i\d+>>     Sub [<<Const5>>,<<Const2>>]
-  /// CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Add>>,<<Sub>>]
+  /// CHECK-DAG:     <<Phi:i\d+>>     Select [<<Sub>>,<<Add>>,<<Cond>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding (after)
+  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding$after_inlining (after)
+  /// CHECK-DAG:     <<Cond:z\d+>>    ParameterValue
   /// CHECK-DAG:     <<Const3:i\d+>>  IntConstant 3
   /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
-  /// CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Const7>>,<<Const3>>]
+  /// CHECK-DAG:     <<Phi:i\d+>>     Select [<<Const3>>,<<Const7>>,<<Cond>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding (after)
+  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Add
   /// CHECK-NOT:                      Sub
 
   public static int JumpsAndConditionals(boolean cond) {
     int a, b, c;
-    a = 5;
-    b = 2;
+    a = $inline$int(5);
+    b = $inline$int(2);
     if (cond)
       c = a + b;
     else
@@ -722,7 +1080,7 @@
   /// CHECK-START: long Main.Mul0(long) constant_folding (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
-  /// CHECK-DAG:     <<Mul:j\d+>>      Mul [<<Arg>>,<<Const0>>]
+  /// CHECK-DAG:     <<Mul:j\d+>>      Mul [<<Const0>>,<<Arg>>]
   /// CHECK-DAG:                       Return [<<Mul>>]
 
   /// CHECK-START: long Main.Mul0(long) constant_folding (after)
@@ -967,227 +1325,239 @@
    * Exercise constant folding on type conversions.
    */
 
-  /// CHECK-START: int Main.ReturnInt33() constant_folding (before)
+  /// CHECK-START: int Main.ReturnInt33() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const33:j\d+>>  LongConstant 33
   /// CHECK-DAG:     <<Convert:i\d+>>  TypeConversion [<<Const33>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: int Main.ReturnInt33() constant_folding (after)
+  /// CHECK-START: int Main.ReturnInt33() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const33:i\d+>>  IntConstant 33
   /// CHECK-DAG:                       Return [<<Const33>>]
 
-  /// CHECK-START: int Main.ReturnInt33() constant_folding (after)
+  /// CHECK-START: int Main.ReturnInt33() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static int ReturnInt33() {
-    long imm = 33L;
+    long imm = $inline$long(33L);
     return (int) imm;
   }
 
-  /// CHECK-START: int Main.ReturnIntMax() constant_folding (before)
+  /// CHECK-START: int Main.ReturnIntMax() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<ConstMax:f\d+>> FloatConstant 1e+34
   /// CHECK-DAG:     <<Convert:i\d+>>  TypeConversion [<<ConstMax>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: int Main.ReturnIntMax() constant_folding (after)
+  /// CHECK-START: int Main.ReturnIntMax() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<ConstMax:i\d+>> IntConstant 2147483647
   /// CHECK-DAG:                       Return [<<ConstMax>>]
 
-  /// CHECK-START: int Main.ReturnIntMax() constant_folding (after)
+  /// CHECK-START: int Main.ReturnIntMax() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static int ReturnIntMax() {
-    float imm = 1.0e34f;
+    float imm = $inline$float(1.0e34f);
     return (int) imm;
   }
 
-  /// CHECK-START: int Main.ReturnInt0() constant_folding (before)
+  /// CHECK-START: int Main.ReturnInt0() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<ConstNaN:d\d+>> DoubleConstant nan
   /// CHECK-DAG:     <<Convert:i\d+>>  TypeConversion [<<ConstNaN>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: int Main.ReturnInt0() constant_folding (after)
+  /// CHECK-START: int Main.ReturnInt0() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:                       Return [<<Const0>>]
 
-  /// CHECK-START: int Main.ReturnInt0() constant_folding (after)
+  /// CHECK-START: int Main.ReturnInt0() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static int ReturnInt0() {
-    double imm = Double.NaN;
+    double imm = $inline$double(Double.NaN);
     return (int) imm;
   }
 
-  /// CHECK-START: long Main.ReturnLong33() constant_folding (before)
+  /// CHECK-START: long Main.ReturnLong33() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const33:i\d+>>  IntConstant 33
   /// CHECK-DAG:     <<Convert:j\d+>>  TypeConversion [<<Const33>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: long Main.ReturnLong33() constant_folding (after)
+  /// CHECK-START: long Main.ReturnLong33() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const33:j\d+>>  LongConstant 33
   /// CHECK-DAG:                       Return [<<Const33>>]
 
-  /// CHECK-START: long Main.ReturnLong33() constant_folding (after)
+  /// CHECK-START: long Main.ReturnLong33() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static long ReturnLong33() {
-    int imm = 33;
+    int imm = $inline$int(33);
     return (long) imm;
   }
 
-  /// CHECK-START: long Main.ReturnLong34() constant_folding (before)
+  /// CHECK-START: long Main.ReturnLong34() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const34:f\d+>>  FloatConstant 34
   /// CHECK-DAG:     <<Convert:j\d+>>  TypeConversion [<<Const34>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: long Main.ReturnLong34() constant_folding (after)
+  /// CHECK-START: long Main.ReturnLong34() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const34:j\d+>>  LongConstant 34
   /// CHECK-DAG:                       Return [<<Const34>>]
 
-  /// CHECK-START: long Main.ReturnLong34() constant_folding (after)
+  /// CHECK-START: long Main.ReturnLong34() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static long ReturnLong34() {
-    float imm = 34.0f;
+    float imm = $inline$float(34.0f);
     return (long) imm;
   }
 
-  /// CHECK-START: long Main.ReturnLong0() constant_folding (before)
+  /// CHECK-START: long Main.ReturnLong0() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<ConstNaN:d\d+>> DoubleConstant nan
   /// CHECK-DAG:     <<Convert:j\d+>>  TypeConversion [<<ConstNaN>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: long Main.ReturnLong0() constant_folding (after)
+  /// CHECK-START: long Main.ReturnLong0() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
   /// CHECK-DAG:                       Return [<<Const0>>]
 
-  /// CHECK-START: long Main.ReturnLong0() constant_folding (after)
+  /// CHECK-START: long Main.ReturnLong0() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static long ReturnLong0() {
-    double imm = -Double.NaN;
+    double imm = $inline$double(-Double.NaN);
     return (long) imm;
   }
 
-  /// CHECK-START: float Main.ReturnFloat33() constant_folding (before)
+  /// CHECK-START: float Main.ReturnFloat33() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const33:i\d+>>  IntConstant 33
   /// CHECK-DAG:     <<Convert:f\d+>>  TypeConversion [<<Const33>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: float Main.ReturnFloat33() constant_folding (after)
+  /// CHECK-START: float Main.ReturnFloat33() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const33:f\d+>>  FloatConstant 33
   /// CHECK-DAG:                       Return [<<Const33>>]
 
-  /// CHECK-START: float Main.ReturnFloat33() constant_folding (after)
+  /// CHECK-START: float Main.ReturnFloat33() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static float ReturnFloat33() {
-    int imm = 33;
+    int imm = $inline$int(33);
     return (float) imm;
   }
 
-  /// CHECK-START: float Main.ReturnFloat34() constant_folding (before)
+  /// CHECK-START: float Main.ReturnFloat34() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const34:j\d+>>  LongConstant 34
   /// CHECK-DAG:     <<Convert:f\d+>>  TypeConversion [<<Const34>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: float Main.ReturnFloat34() constant_folding (after)
+  /// CHECK-START: float Main.ReturnFloat34() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const34:f\d+>>  FloatConstant 34
   /// CHECK-DAG:                       Return [<<Const34>>]
 
-  /// CHECK-START: float Main.ReturnFloat34() constant_folding (after)
+  /// CHECK-START: float Main.ReturnFloat34() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static float ReturnFloat34() {
-    long imm = 34L;
+    long imm = $inline$long(34L);
     return (float) imm;
   }
 
-  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding (before)
+  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const:d\d+>>    DoubleConstant 99.25
   /// CHECK-DAG:     <<Convert:f\d+>>  TypeConversion [<<Const>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding (after)
+  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const:f\d+>>    FloatConstant 99.25
   /// CHECK-DAG:                       Return [<<Const>>]
 
-  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding (after)
+  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static float ReturnFloat99P25() {
-    double imm = 99.25;
+    double imm = $inline$double(99.25);
     return (float) imm;
   }
 
-  /// CHECK-START: double Main.ReturnDouble33() constant_folding (before)
+  /// CHECK-START: double Main.ReturnDouble33() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const33:i\d+>>  IntConstant 33
   /// CHECK-DAG:     <<Convert:d\d+>>  TypeConversion [<<Const33>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: double Main.ReturnDouble33() constant_folding (after)
+  /// CHECK-START: double Main.ReturnDouble33() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const33:d\d+>>  DoubleConstant 33
   /// CHECK-DAG:                       Return [<<Const33>>]
 
   public static double ReturnDouble33() {
-    int imm = 33;
+    int imm = $inline$int(33);
     return (double) imm;
   }
 
-  /// CHECK-START: double Main.ReturnDouble34() constant_folding (before)
+  /// CHECK-START: double Main.ReturnDouble34() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const34:j\d+>>  LongConstant 34
   /// CHECK-DAG:     <<Convert:d\d+>>  TypeConversion [<<Const34>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: double Main.ReturnDouble34() constant_folding (after)
+  /// CHECK-START: double Main.ReturnDouble34() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const34:d\d+>>  DoubleConstant 34
   /// CHECK-DAG:                       Return [<<Const34>>]
 
-  /// CHECK-START: double Main.ReturnDouble34() constant_folding (after)
+  /// CHECK-START: double Main.ReturnDouble34() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static double ReturnDouble34() {
-    long imm = 34L;
+    long imm = $inline$long(34L);
     return (double) imm;
   }
 
-  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding (before)
+  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const:f\d+>>    FloatConstant 99.25
   /// CHECK-DAG:     <<Convert:d\d+>>  TypeConversion [<<Const>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding (after)
+  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const:d\d+>>    DoubleConstant 99.25
   /// CHECK-DAG:                       Return [<<Const>>]
 
-  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding (after)
+  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static double ReturnDouble99P25() {
-    float imm = 99.25f;
+    float imm = $inline$float(99.25f);
     return (double) imm;
   }
 
 
-  public static void main(String[] args) {
+  public static void main(String[] args) throws Exception {
     assertIntEquals(-42, IntNegation());
     assertLongEquals(-42L, LongNegation());
+    assertFloatEquals(-42F, FloatNegation());
+    assertDoubleEquals(-42D, DoubleNegation());
 
     assertIntEquals(3, IntAddition1());
     assertIntEquals(14, IntAddition2());
     assertLongEquals(3L, LongAddition());
+    assertFloatEquals(3F, FloatAddition());
+    assertDoubleEquals(3D, DoubleAddition());
 
     assertIntEquals(4, IntSubtraction());
     assertLongEquals(4L, LongSubtraction());
+    assertFloatEquals(4F, FloatSubtraction());
+    assertDoubleEquals(4D, DoubleSubtraction());
 
     assertIntEquals(21, IntMultiplication());
     assertLongEquals(21L, LongMultiplication());
+    assertFloatEquals(21F, FloatMultiplication());
+    assertDoubleEquals(21D, DoubleMultiplication());
 
     assertIntEquals(2, IntDivision());
     assertLongEquals(2L, LongDivision());
+    assertFloatEquals(3.2F, FloatDivision());
+    assertDoubleEquals(3.2D, DoubleDivision());
 
     assertIntEquals(2, IntRemainder());
     assertLongEquals(2L, LongRemainder());
+    assertFloatEquals(0.5F, FloatRemainder());
+    assertDoubleEquals(0.5D, DoubleRemainder());
 
     assertIntEquals(4, ShlIntLong());
     assertLongEquals(12L, ShlLongInt());
@@ -1208,6 +1578,7 @@
     assertLongEquals(9, XorLongInt());
 
     assertIntEquals(5, StaticCondition());
+    assertIntEquals(5, StaticConditionNulls());
 
     assertIntEquals(7, JumpsAndConditionals(true));
     assertIntEquals(3, JumpsAndConditionals(false));
@@ -1230,6 +1601,24 @@
     assertFalse(CmpFloatGreaterThanNaN(arbitrary));
     assertFalse(CmpDoubleLessThanNaN(arbitrary));
 
+    Main main = new Main();
+    assertIntEquals(1, main.smaliCmpLongConstants());
+    assertIntEquals(-1, main.smaliCmpGtFloatConstants());
+    assertIntEquals(-1, main.smaliCmpLtFloatConstants());
+    assertIntEquals(-1, main.smaliCmpGtDoubleConstants());
+    assertIntEquals(-1, main.smaliCmpLtDoubleConstants());
+
+    assertIntEquals(0, main.smaliCmpLongSameConstant());
+    assertIntEquals(0, main.smaliCmpGtFloatSameConstant());
+    assertIntEquals(0, main.smaliCmpLtFloatSameConstant());
+    assertIntEquals(0, main.smaliCmpGtDoubleSameConstant());
+    assertIntEquals(0, main.smaliCmpLtDoubleSameConstant());
+
+    assertIntEquals(1, main.smaliCmpGtFloatConstantWithNaN());
+    assertIntEquals(-1, main.smaliCmpLtFloatConstantWithNaN());
+    assertIntEquals(1, main.smaliCmpGtDoubleConstantWithNaN());
+    assertIntEquals(-1, main.smaliCmpLtDoubleConstantWithNaN());
+
     assertIntEquals(33, ReturnInt33());
     assertIntEquals(2147483647, ReturnIntMax());
     assertIntEquals(0, ReturnInt0());
@@ -1246,4 +1635,10 @@
     assertDoubleEquals(34, ReturnDouble34());
     assertDoubleEquals(99.25, ReturnDouble99P25());
   }
+
+  Main() throws ClassNotFoundException {
+    testCmp = Class.forName("TestCmp");
+  }
+
+  private Class<?> testCmp;
 }
diff --git a/test/444-checker-nce/src/Main.java b/test/444-checker-nce/src/Main.java
index 32122e4..ddc2f77 100644
--- a/test/444-checker-nce/src/Main.java
+++ b/test/444-checker-nce/src/Main.java
@@ -16,61 +16,52 @@
 
 public class Main {
 
-  /// CHECK-START: Main Main.keepTest(Main) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.keepTest(Main) instruction_simplifier (before)
   /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
 
-  /// CHECK-START: Main Main.keepTest(Main) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.keepTest(Main) instruction_simplifier (after)
   /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
   public Main keepTest(Main m) {
     return m.g();
   }
 
-  /// CHECK-START: Main Main.thisTest() ssa_builder (after)
-  /// CHECK:         NullCheck
-  /// CHECK:         InvokeStaticOrDirect
-
-  /// CHECK-START: Main Main.thisTest() instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.thisTest() builder (after)
   /// CHECK-NOT:     NullCheck
   /// CHECK:         InvokeStaticOrDirect
   public Main thisTest() {
     return g();
   }
 
-  /// CHECK-START: Main Main.newInstanceRemoveTest() ssa_builder (after)
+  /// CHECK-START: Main Main.newInstanceRemoveTest() builder (after)
   /// CHECK:         NewInstance
-  /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
-  /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
 
-  /// CHECK-START: Main Main.newInstanceRemoveTest() instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.newInstanceRemoveTest() builder (after)
   /// CHECK-NOT:     NullCheck
   public Main newInstanceRemoveTest() {
     Main m = new Main();
     return m.g();
   }
 
-  /// CHECK-START: Main Main.newArrayRemoveTest() ssa_builder (after)
+  /// CHECK-START: Main Main.newArrayRemoveTest() builder (after)
   /// CHECK:         NewArray
-  /// CHECK:         NullCheck
   /// CHECK:         ArrayGet
 
-  /// CHECK-START: Main Main.newArrayRemoveTest() instruction_simplifier_after_types (after)
-  /// CHECK:         NewArray
+  /// CHECK-START: Main Main.newArrayRemoveTest() builder (after)
   /// CHECK-NOT:     NullCheck
-  /// CHECK:         ArrayGet
   public Main newArrayRemoveTest() {
     Main[] ms = new Main[1];
     return ms[0];
   }
 
-  /// CHECK-START: Main Main.ifRemoveTest(boolean) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.ifRemoveTest(boolean) instruction_simplifier (before)
   /// CHECK:         NewInstance
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.ifRemoveTest(boolean) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.ifRemoveTest(boolean) instruction_simplifier (after)
   /// CHECK:         NewInstance
   /// CHECK-NOT:     NullCheck
   public Main ifRemoveTest(boolean flag) {
@@ -83,11 +74,11 @@
     return m.g();
   }
 
-  /// CHECK-START: Main Main.ifKeepTest(boolean) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.ifKeepTest(boolean) instruction_simplifier (before)
   /// CHECK:         NewInstance
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.ifKeepTest(boolean) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.ifKeepTest(boolean) instruction_simplifier (after)
   /// CHECK:         NewInstance
   /// CHECK:         NullCheck
   public Main ifKeepTest(boolean flag) {
@@ -98,10 +89,10 @@
     return m.g();
   }
 
-  /// CHECK-START: Main Main.forRemoveTest(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.forRemoveTest(int) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.forRemoveTest(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.forRemoveTest(int) instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   public Main forRemoveTest(int count) {
     Main a = new Main();
@@ -114,10 +105,10 @@
     return m.g();
   }
 
-  /// CHECK-START: Main Main.forKeepTest(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.forKeepTest(int) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.forKeepTest(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.forKeepTest(int) instruction_simplifier (after)
   /// CHECK:         NullCheck
   public Main forKeepTest(int count) {
     Main a = new Main();
@@ -132,10 +123,10 @@
     return m.g();
   }
 
-  /// CHECK-START: Main Main.phiFlowRemoveTest(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.phiFlowRemoveTest(int) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.phiFlowRemoveTest(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.phiFlowRemoveTest(int) instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   public Main phiFlowRemoveTest(int count) {
     Main a = new Main();
@@ -154,10 +145,10 @@
     return n.g();
   }
 
-  /// CHECK-START: Main Main.phiFlowKeepTest(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.phiFlowKeepTest(int) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.phiFlowKeepTest(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.phiFlowKeepTest(int) instruction_simplifier (after)
   /// CHECK:         NullCheck
   public Main phiFlowKeepTest(int count) {
     Main a = new Main();
@@ -178,10 +169,7 @@
     return n.g();
   }
 
-  /// CHECK-START: Main Main.scopeRemoveTest(int, Main) ssa_builder (after)
-  /// CHECK:         NullCheck
-
-  /// CHECK-START: Main Main.scopeRemoveTest(int, Main) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.scopeRemoveTest(int, Main) builder (after)
   /// CHECK-NOT:     NullCheck
   public Main scopeRemoveTest(int count, Main a) {
     Main m = null;
@@ -196,10 +184,10 @@
     return m;
   }
 
-  /// CHECK-START: Main Main.scopeKeepTest(int, Main) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.scopeKeepTest(int, Main) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.scopeKeepTest(int, Main) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.scopeKeepTest(int, Main) instruction_simplifier (after)
   /// CHECK:         NullCheck
   public Main scopeKeepTest(int count, Main a) {
     Main m = new Main();
@@ -214,10 +202,10 @@
     return m;
   }
 
-  /// CHECK-START: Main Main.scopeIfNotNullRemove(Main) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.scopeIfNotNullRemove(Main) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.scopeIfNotNullRemove(Main) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.scopeIfNotNullRemove(Main) instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   public Main scopeIfNotNullRemove(Main m) {
     if (m != null) {
@@ -226,10 +214,10 @@
     return m;
   }
 
-  /// CHECK-START: Main Main.scopeIfKeep(Main) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.scopeIfKeep(Main) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.scopeIfKeep(Main) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.scopeIfKeep(Main) instruction_simplifier (after)
   /// CHECK:         NullCheck
   public Main scopeIfKeep(Main m) {
     if (m == null) {
@@ -258,11 +246,11 @@
 class ListElement {
   private ListElement next;
 
-  /// CHECK-START: boolean ListElement.isShorter(ListElement, ListElement) instruction_simplifier_after_types (before)
+  /// CHECK-START: boolean ListElement.isShorter(ListElement, ListElement) instruction_simplifier (before)
   /// CHECK:         NullCheck
   /// CHECK:         NullCheck
 
-  /// CHECK-START: boolean ListElement.isShorter(ListElement, ListElement) instruction_simplifier_after_types (after)
+  /// CHECK-START: boolean ListElement.isShorter(ListElement, ListElement) instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   static boolean isShorter(ListElement x, ListElement y) {
     ListElement xTail = x;
diff --git a/test/445-checker-licm/src/Main.java b/test/445-checker-licm/src/Main.java
index 6ee8a4d..061fe6e 100644
--- a/test/445-checker-licm/src/Main.java
+++ b/test/445-checker-licm/src/Main.java
@@ -52,13 +52,13 @@
     return result;
   }
 
-  /// CHECK-START: int Main.innerDiv2() licm (before)
+  /// CHECK-START: int Main.innerMul() licm (before)
   /// CHECK-DAG: Mul loop:B4
 
-  /// CHECK-START: int Main.innerDiv2() licm (after)
+  /// CHECK-START: int Main.innerMul() licm (after)
   /// CHECK-DAG: Mul loop:B2
 
-  public static int innerDiv2() {
+  public static int innerMul() {
     int result = 0;
     for (int i = 0; i < 10; ++i) {
       for (int j = 0; j < 10; ++j) {
@@ -71,13 +71,13 @@
     return result;
   }
 
-  /// CHECK-START: int Main.innerDiv3(int, int) licm (before)
+  /// CHECK-START: int Main.divByA(int, int) licm (before)
   /// CHECK-DAG: Div loop:{{B\d+}}
 
-  /// CHECK-START: int Main.innerDiv3(int, int) licm (after)
+  /// CHECK-START: int Main.divByA(int, int) licm (after)
   /// CHECK-DAG: Div loop:{{B\d+}}
 
-  public static int innerDiv3(int a, int b) {
+  public static int divByA(int a, int b) {
     int result = 0;
     while (b < 5) {
       // a might be null, so we can't hoist the operation.
@@ -107,6 +107,63 @@
     return result;
   }
 
+  /// CHECK-START: int Main.divAndIntrinsic(int[]) licm (before)
+  /// CHECK-DAG: Div loop:{{B\d+}}
+
+  /// CHECK-START: int Main.divAndIntrinsic(int[]) licm (after)
+  /// CHECK-NOT: Div loop:{{B\d+}}
+
+  /// CHECK-START: int Main.divAndIntrinsic(int[]) licm (after)
+  /// CHECK-DAG: Div loop:none
+
+  public static int divAndIntrinsic(int[] array) {
+    int result = 0;
+    for (int i = 0; i < array.length; i++) {
+      // An intrinsic call, unlike a general method call, cannot modify the field value.
+      // As a result, the invariant division on the field can be moved out of the loop.
+      result += (staticField / 42) + Math.abs(array[i]);
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.invariantBoundIntrinsic(int) licm (before)
+  /// CHECK-DAG: InvokeStaticOrDirect loop:{{B\d+}}
+
+  /// CHECK-START: int Main.invariantBoundIntrinsic(int) licm (after)
+  /// CHECK-NOT: InvokeStaticOrDirect loop:{{B\d+}}
+
+  /// CHECK-START: int Main.invariantBoundIntrinsic(int) licm (after)
+  /// CHECK-DAG: InvokeStaticOrDirect loop:none
+
+  public static int invariantBoundIntrinsic(int x) {
+    int result = 0;
+    // The intrinsic call to abs used as loop bound is invariant.
+    // As a result, the call itself can be moved out of the loop header.
+    for (int i = 0; i < Math.abs(x); i++) {
+      result += i;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.invariantBodyIntrinsic(int, int) licm (before)
+  /// CHECK-DAG: InvokeStaticOrDirect loop:{{B\d+}}
+
+  /// CHECK-START: int Main.invariantBodyIntrinsic(int, int) licm (after)
+  /// CHECK-NOT: InvokeStaticOrDirect loop:{{B\d+}}
+
+  /// CHECK-START: int Main.invariantBodyIntrinsic(int, int) licm (after)
+  /// CHECK-DAG: InvokeStaticOrDirect loop:none
+
+  public static int invariantBodyIntrinsic(int x, int y) {
+    int result = 0;
+    for (int i = 0; i < 10; i++) {
+      // The intrinsic call to max used inside the loop is invariant.
+      // As a result, the call itself can be moved out of the loop body.
+      result += Math.max(x, y);
+    }
+    return result;
+  }
+
   public static int staticField = 42;
 
   public static void assertEquals(int expected, int actual) {
@@ -118,6 +175,11 @@
   public static void main(String[] args) {
     assertEquals(10, div());
     assertEquals(100, innerDiv());
+    assertEquals(18900, innerMul());
+    assertEquals(105, divByA(2, 0));
     assertEquals(12, arrayLength(new int[] { 4, 8 }));
+    assertEquals(21, divAndIntrinsic(new int[] { 4, -2, 8, -3 }));
+    assertEquals(45, invariantBoundIntrinsic(-10));
+    assertEquals(30, invariantBodyIntrinsic(2, 3));
   }
 }
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 22829cd..3a56c3b 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -122,12 +122,13 @@
   /// CHECK: ArraySet
 
   static void constantIndexing1(int[] array) {
-    array[5] = 1;
-    array[4] = 1;
+    // Decreasing order: bc for 5 but not for 4.
+    array[5] = 11;
+    array[4] = 11;
   }
 
 
-  /// CHECK-START: void Main.constantIndexing2(int[]) BCE (before)
+  /// CHECK-START: void Main.$opt$noinline$constantIndexing2(int[]) BCE (before)
   /// CHECK: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK: BoundsCheck
@@ -137,8 +138,7 @@
   /// CHECK: BoundsCheck
   /// CHECK: ArraySet
 
-  /// CHECK-START: void Main.constantIndexing2(int[]) BCE (after)
-  /// CHECK: LessThanOrEqual
+  /// CHECK-START: void Main.$opt$noinline$constantIndexing2(int[]) BCE (after)
   /// CHECK: Deoptimize
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
@@ -148,17 +148,77 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArraySet
 
-  static void constantIndexing2(int[] array) {
+  static void $opt$noinline$constantIndexing2(int[] array) {
     array[1] = 1;
     array[2] = 1;
     array[3] = 1;
     array[4] = 1;
-    array[-1] = 1;
+    if (array[1] != 1) {
+      throw new Error("");
+    }
   }
 
+  /// CHECK-START: void Main.constantIndexing2b(int[]) BCE (before)
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+
+  /// CHECK-START: void Main.constantIndexing2b(int[]) BCE (after)
+  /// CHECK-NOT: Deoptimize
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+
+  static void constantIndexing2b(int[] array) {
+    array[0] = 6;
+    array[1] = 6;
+    array[2] = 6;
+    array[3] = 6;
+    array[-1] = 1;  // prevents the whole opt on [-1:4]
+  }
+
+  /// CHECK-START: void Main.constantIndexing2c(int[]) BCE (before)
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+
+  /// CHECK-START: void Main.constantIndexing2c(int[]) BCE (after)
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+
+  static void constantIndexing2c(int[] array) {
+    array[0] = 7;
+    array[1] = 7;
+    array[2] = 7;
+    array[3] = 7;
+  }
 
   /// CHECK-START: int[] Main.constantIndexing3(int[], int[], boolean) BCE (before)
   /// CHECK: BoundsCheck
@@ -179,11 +239,9 @@
   /// CHECK: ArraySet
 
   /// CHECK-START: int[] Main.constantIndexing3(int[], int[], boolean) BCE (after)
-  /// CHECK: LessThanOrEqual
   /// CHECK: Deoptimize
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  /// CHECK: LessThanOrEqual
   /// CHECK: Deoptimize
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
@@ -217,14 +275,14 @@
   /// CHECK: ArraySet
 
   /// CHECK-START: void Main.constantIndexing4(int[]) BCE (after)
-  /// CHECK-NOT: LessThanOrEqual
+  /// CHECK-NOT: Deoptimize
   /// CHECK: BoundsCheck
   /// CHECK: ArraySet
 
   // There is only one array access. It's not beneficial
   // to create a compare with deoptimization instruction.
   static void constantIndexing4(int[] array) {
-    array[0] = 1;
+    array[0] = -1;
   }
 
 
@@ -257,10 +315,263 @@
 
   /// CHECK-START: void Main.constantIndexing6(int[]) BCE (after)
   /// CHECK: Deoptimize
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
 
   static void constantIndexing6(int[] array) {
-    array[3] = 1;
-    array[4] = 1;
+    array[3] = 111;
+    array[4] = 111;
+  }
+
+  /// CHECK-START: void Main.constantIndexing7(int[], int) BCE (before)
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+
+  /// CHECK-START: void Main.constantIndexing7(int[], int) BCE (after)
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+
+  static void constantIndexing7(int[] array, int base) {
+    // With constant offsets to symbolic base.
+    array[base]     = 10;
+    array[base + 1] = 20;
+    array[base + 2] = 30;
+    array[base + 3] = 40;
+  }
+
+  /// CHECK-START: void Main.constantIndexing8(int[], int) BCE (before)
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+
+  /// CHECK-START: void Main.constantIndexing8(int[], int) BCE (after)
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+
+  static void constantIndexing8(int[] array, int base) {
+    // With constant offsets "both ways" to symbolic base.
+    array[base - 1] = 100;
+    array[base]     = 200;
+    array[base + 1] = 300;
+    array[base + 2] = 400;
+  }
+
+  /// CHECK-START: void Main.constantIndexing9(int[], int) BCE (before)
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+
+  /// CHECK-START: void Main.constantIndexing9(int[], int) BCE (after)
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+
+  static void constantIndexing9(int[] array, int base) {
+    // Final range is base..base+3 so conditional
+    // references may be included in the end.
+    array[base] = 0;
+    if (base != 12345)
+      array[base + 2] = 2;
+    array[base + 3] = 3;
+    if (base != 67890)
+      array[base + 1] = 1;
+  }
+
+  /// CHECK-START: void Main.constantIndexing10(int[], int) BCE (before)
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+
+  /// CHECK-START: void Main.constantIndexing10(int[], int) BCE (after)
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+
+  static void constantIndexing10(int[] array, int base) {
+    // Offset hidden in incremented base.
+    array[base] = 1;
+    array[++base] = 2;
+    array[++base] = 3;
+    array[++base] = 4;
+  }
+
+  static void runAllConstantIndices() {
+    int[] a1 = { 0 };
+    int[] a6 = { 0, 0, 0, 0, 0, 0 };
+
+    boolean caught = false;
+    try {
+      constantIndexing1(a1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught) {
+      System.out.println("constant indices 1 failed!");
+    }
+
+    constantIndexing1(a6);
+    if (a6[4] != 11 || a6[5] != 11) {
+      System.out.println("constant indices 1 failed!");
+    }
+
+    $opt$noinline$constantIndexing2(a6);
+    if (a6[0] != 0 || a6[1] != 1 || a6[2] != 1 ||
+        a6[3] != 1 || a6[4] != 1 || a6[5] != 11) {
+      System.out.println("constant indices 2 failed!");
+    }
+
+    caught = false;
+    try {
+      constantIndexing2b(a6);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || a6[0] != 6 || a6[1] != 6 || a6[2] != 6 ||
+                   a6[3] != 6 || a6[4] != 1 || a6[5] != 11) {
+      System.out.println("constant indices 2b failed!");
+    }
+
+    caught = false;
+    try {
+      constantIndexing2c(a1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || a1[0] != 7) {
+      System.out.println("constant indices 2c failed!");
+    }
+
+    constantIndexing2c(a6);
+    if (a6[0] != 7 || a6[1] != 7 || a6[2] != 7 ||
+        a6[3] != 7 || a6[4] != 1 || a6[5] != 11) {
+      System.out.println("constant indices 2c failed!");
+    }
+
+    int[] b4 = new int[4];
+    constantIndexing3(a6, b4, true);
+    if (b4[0] != 7 || b4[1] != 7 || b4[2] != 7 || b4[3] != 7) {
+      System.out.println("constant indices 3 failed!");
+    }
+
+    constantIndexing4(a1);
+    if (a1[0] != -1) {
+      System.out.println("constant indices 4 failed!");
+    }
+
+    caught = false;
+    try {
+      constantIndexing5(a6);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught) {
+      System.out.println("constant indices 5 failed!");
+    }
+
+    constantIndexing6(a6);
+    if (a6[0] != 7   || a6[1] != 7   || a6[2] != 7 ||
+        a6[3] != 111 || a6[4] != 111 || a6[5] != 11) {
+      System.out.println("constant indices 6 failed!");
+    }
+
+    constantIndexing7(a6, 1);
+    if (a6[0] != 7  || a6[1] != 10 || a6[2] != 20 ||
+        a6[3] != 30 || a6[4] != 40 || a6[5] != 11) {
+      System.out.println("constant indices 7 failed!");
+    }
+
+    caught = false;
+    try {
+      constantIndexing7(a6, 5);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || a6[0] != 7  || a6[1] != 10 || a6[2] != 20 ||
+                   a6[3] != 30 || a6[4] != 40 || a6[5] != 10) {
+      System.out.println("constant indices 7 failed!");
+    }
+
+    constantIndexing8(a6, 1);
+    if (a6[0] != 100 || a6[1] != 200 || a6[2] != 300 ||
+        a6[3] != 400 || a6[4] != 40  || a6[5] != 10) {
+      System.out.println("constant indices 8 failed!");
+    }
+
+    caught = false;
+    try {
+      constantIndexing8(a6, 0);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || a6[0] != 100) {
+      System.out.println("constant indices 8 failed!");
+    }
+
+    constantIndexing9(a6, 0);
+    if (a6[0] != 0 || a6[1] != 1  || a6[2] != 2  ||
+        a6[3] != 3 || a6[4] != 40 || a6[5] != 10) {
+      System.out.println("constant indices 9 failed!");
+    }
+
+    constantIndexing10(a6, 0);
+    if (a6[0] != 1 || a6[1] != 2  || a6[2] != 3  ||
+        a6[3] != 4 || a6[4] != 40 || a6[5] != 10) {
+      System.out.println("constant indices 10 failed!");
+    }
   }
 
   // A helper into which the actual throwing function should be inlined.
@@ -616,20 +927,94 @@
     }
   }
 
+  /// CHECK-START: void Main.nonzeroLength(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.nonzeroLength(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  public static void nonzeroLength(int[] a) {
+    if (a.length != 0) {
+      a[0] = 112;
+    }
+  }
+
+  /// CHECK-START: void Main.knownLength(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.knownLength(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  public static void knownLength(int[] a) {
+    if (a.length == 2) {
+      a[0] = -1;
+      a[1] = -2;
+    }
+  }
+
+  static int[][] mA;
+
+  /// CHECK-START: void Main.dynamicBCEAndIntrinsic(int) BCE (before)
+  //  Array references mA[i] and ..[j] both in inner loop.
+  /// CHECK-DAG:  <<Get1:l\d+>>  ArrayGet [<<Array1:l\d+>>,<<Bounds1:i\d+>>] loop:<<InnerLoop:B\d+>>
+  /// CHECK-DAG:  <<Array1>>     NullCheck [<<Field1:l\d+>>]                 loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Len1:i\d+>>  ArrayLength [<<Array1>>]                    loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Bounds1>>    BoundsCheck [<<Index1:i\d+>>,<<Len1>>]      loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Get2:i\d+>>  ArrayGet [<<Array2:l\d+>>,<<Bounds2:i\d+>>] loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Array2>>     NullCheck [<<Get1>>]                        loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Len2:i\d+>>  ArrayLength [<<Array2>>]                    loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Bounds2>>    BoundsCheck [<<Index2:i\d+>>,<<Len2>>]      loop:<<InnerLoop>>
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>{{(,[ij]\d+)?}}] loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Index2>>     Phi                                         loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Index1>>     Phi                                         loop:<<OuterLoop:B\d+>>
+  /// CHECK-DAG:  <<Field1>>     StaticFieldGet                              loop:none
+  /// CHECK-EVAL: "<<InnerLoop>>" != "<<OuterLoop>>"
+  //
+  /// CHECK-START: void Main.dynamicBCEAndIntrinsic(int) BCE (after)
+  //  Array reference mA[i] hoisted to same level as deopt.
+  /// CHECK-DAG:                 Deoptimize                                  loop:<<OuterLoop:B\d+>>
+  /// CHECK-DAG:                 ArrayLength                                 loop:<<OuterLoop>>
+  /// CHECK-DAG:  <<Get1:l\d+>>  ArrayGet [<<Array1:l\d+>>,<<Index1:i\d+>>]  loop:<<OuterLoop>>
+  //  Array reference ..[j] still in inner loop, with a direct index.
+  /// CHECK-DAG:  <<Get2:i\d+>>  ArrayGet [<<Array2:l\d+>>,<<Index2:i\d+>>]  loop:<<InnerLoop:B\d+>>
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>{{(,[ij]\d+)?}}] loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Index2>>     Phi                                         loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Index1>>     Phi                                         loop:<<OuterLoop>>
+  //  Synthetic phi.
+  /// CHECK-DAG:  <<Array2>>     Phi                                         loop:<<OuterLoop>>
+  /// CHECK-DAG:  <<Array1>>     StaticFieldGet                              loop:none
+  /// CHECK-EVAL: "<<InnerLoop>>" != "<<OuterLoop>>"
+  //
+  /// CHECK-START: void Main.dynamicBCEAndIntrinsic(int) BCE (after)
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: BoundsCheck
+  static void dynamicBCEAndIntrinsic(int n) {
+    for (int i = 0; i < n; i++) {
+      for (int j = 0; j < n; j++) {
+        // Since intrinsic call cannot modify fields or arrays,
+        // dynamic BCE and hoisting can be applied to the inner loop.
+        mA[i][j] = Math.abs(mA[i][j]);
+      }
+    }
+  }
 
   static int foo() {
     try {
       assertIsManaged();
       // This will cause AIOOBE.
-      constantIndexing2(new int[3]);
+      $opt$noinline$constantIndexing2(new int[3]);
     } catch (ArrayIndexOutOfBoundsException e) {
       assertIsManaged();  // This is to ensure that single-frame deoptimization works.
-                                // Will need to be updated if constantIndexing2 is inlined.
+                          // Will need to be updated if $opt$noinline$constantIndexing2 is inlined.
       try {
         // This will cause AIOOBE.
         constantIndexingForward6(new int[3]);
       } catch (ArrayIndexOutOfBoundsException e2) {
-        assertIsManaged();
+        // Having deopted, we expect to be running interpreted at this point.
+        // Does not apply to debuggable, however, since we do not inline.
         return 99;
       }
     }
@@ -651,20 +1036,19 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo1(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i < end; i++) {
@@ -684,27 +1068,25 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-
   /// CHECK-START: void Main.foo2(int[], int, int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo2(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i <= end; i++) {
@@ -724,25 +1106,25 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-
   /// CHECK-START: void Main.foo3(int[], int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo3(int[] array, int end, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for end < array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null check
     // and array.length out of loop).
     for (int i = 3 ; i <= end; i++) {
@@ -769,18 +1151,19 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo4(int[] array, int end, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null check
     // and array.length out of loop).
     for (int i = end ; i > 0; i--) {
@@ -815,14 +1198,15 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  //  array.length is defined before the loop header so no phi is needed.
-  /// CHECK-NOT: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo5(int[] array, int end, boolean expectInterpreter) {
@@ -830,8 +1214,8 @@
     for (int i = array.length - 1 ; i >= 0; i--) {
       array[i] = 1;
     }
-    // One HDeoptimize will be added.
-    // It's for (end - 2 <= array.length - 2).
+    // Three HDeoptimize will be added for the bounds.
+    // The null check is not necessary.
     for (int i = end - 2 ; i > 0; i--) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -858,7 +1242,6 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-
   /// CHECK-START: void Main.foo6(int[], int, int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
@@ -873,23 +1256,19 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
   /// CHECK: Goto
-  /// CHECK-NOT: Deoptimize
+  /// CHECK: Goto
 
   void foo6(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 2, one for end <= array.length - 3,
-    // and one for null check on array (to hoist null
-    // check and array.length out of loop).
     for (int i = end; i >= start; i--) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -913,20 +1292,19 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo7(int[] array, int start, int end, boolean lowEnd) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. One for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i < end; i++) {
@@ -954,26 +1332,28 @@
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
+  /// CHECK: Goto
+  /// CHECK: Goto
+  /// CHECK: If
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo8(int[][] matrix, int start, int end) {
-    // Three HDeoptimize will be added for the outer loop.
-    // start >= 0, end <= matrix.length, and null check on matrix.
-    // Three HDeoptimize will be added for the inner loop
-    // start >= 0 (TODO: this may be optimized away),
-    // end <= row.length, and null check on row.
+    // Three HDeoptimize will be added for the outer loop,
+    // two for the index, and null check on matrix. Same
+    // for the inner loop.
     for (int i = start; i < end; i++) {
       int[] row = matrix[i];
       for (int j = start; j < end; j++) {
@@ -993,15 +1373,22 @@
   //  loop for loop body entry test.
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Phi
   /// CHECK-NOT: NullCheck
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
 
+  /// CHECK-START: void Main.foo9(int[], boolean) instruction_simplifier$after_bce (after)
+  //  Simplification removes the redundant check
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: Deoptimize
+
   void foo9(int[] array, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for
-    // 10 <= array.length, and one for null check on array.
+    // Three HDeoptimize will be added. Two for the index and one for null check on array. Then
+    // simplification removes one redundant HDeoptimize.
     for (int i = 0 ; i < 10; i++) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -1038,6 +1425,9 @@
 
   static void testUnknownBounds() {
     boolean caught = false;
+
+    runAllConstantIndices();
+
     Main main = new Main();
     main.foo1(new int[10], 0, 10, false);
     if (main.sum != 10) {
@@ -1211,6 +1601,41 @@
       }
     }
 
+    nonzeroLength(array);
+    if (array[0] != 112) {
+      System.out.println("nonzero length failed!");
+    }
+
+    knownLength(array);
+    if (array[0] != 112 || array[1] != 1) {
+      System.out.println("nonzero length failed!");
+    }
+    array = new int[2];
+    knownLength(array);
+    if (array[0] != -1 || array[1] != -2) {
+      System.out.println("nonzero length failed!");
+    }
+
+    // Zero length array does not break.
+    array = new int[0];
+    nonzeroLength(array);
+    knownLength(array);
+
+    mA = new int[4][4];
+    for (int i = 0; i < 4; i++) {
+      for (int j = 0; j < 4; j++) {
+        mA[i][j] = -1;
+      }
+    }
+    dynamicBCEAndIntrinsic(4);
+    for (int i = 0; i < 4; i++) {
+      for (int j = 0; j < 4; j++) {
+        if (mA[i][i] != 1) {
+          System.out.println("dynamic bce failed!");
+        }
+      }
+    }
+
     array = new int[7];
     pyramid1(array);
     if (!isPyramid(array)) {
diff --git a/test/450-checker-types/smali/SmaliTests.smali b/test/450-checker-types/smali/SmaliTests.smali
new file mode 100644
index 0000000..6a3122e
--- /dev/null
+++ b/test/450-checker-types/smali/SmaliTests.smali
@@ -0,0 +1,120 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LSmaliTests;
+.super Ljava/lang/Object;
+
+## CHECK-START: void SmaliTests.testInstanceOf_EQ0_NotInlined(java.lang.Object) builder (after)
+## CHECK-DAG:     <<Cst0:i\d+>> IntConstant 0
+## CHECK-DAG:     <<IOf:z\d+>>  InstanceOf
+## CHECK-DAG:                   Equal [<<IOf>>,<<Cst0>>]
+
+## CHECK-START: void SmaliTests.testInstanceOf_EQ0_NotInlined(java.lang.Object) instruction_simplifier (before)
+## CHECK:         CheckCast
+
+## CHECK-START: void SmaliTests.testInstanceOf_EQ0_NotInlined(java.lang.Object) instruction_simplifier (after)
+## CHECK-NOT:     CheckCast
+
+.method public static testInstanceOf_EQ0_NotInlined(Ljava/lang/Object;)V
+  .registers 3
+
+  const v0, 0x0
+  instance-of v1, p0, LSubclassC;
+  if-eq v1, v0, :return
+
+  check-cast p0, LSubclassC;
+  invoke-virtual {p0}, LSubclassC;->$noinline$g()V
+
+  :return
+  return-void
+
+.end method
+
+## CHECK-START: void SmaliTests.testInstanceOf_EQ1_NotInlined(java.lang.Object) builder (after)
+## CHECK-DAG:     <<Cst1:i\d+>> IntConstant 1
+## CHECK-DAG:     <<IOf:z\d+>>  InstanceOf
+## CHECK-DAG:                   Equal [<<IOf>>,<<Cst1>>]
+
+## CHECK-START: void SmaliTests.testInstanceOf_EQ1_NotInlined(java.lang.Object) instruction_simplifier (before)
+## CHECK:         CheckCast
+
+## CHECK-START: void SmaliTests.testInstanceOf_EQ1_NotInlined(java.lang.Object) instruction_simplifier (after)
+## CHECK-NOT:     CheckCast
+
+.method public static testInstanceOf_EQ1_NotInlined(Ljava/lang/Object;)V
+  .registers 3
+
+  const v0, 0x1
+  instance-of v1, p0, LSubclassC;
+  if-eq v1, v0, :invoke
+  return-void
+
+  :invoke
+  check-cast p0, LSubclassC;
+  invoke-virtual {p0}, LSubclassC;->$noinline$g()V
+  return-void
+
+.end method
+
+## CHECK-START: void SmaliTests.testInstanceOf_NE0_NotInlined(java.lang.Object) builder (after)
+## CHECK-DAG:     <<Cst0:i\d+>> IntConstant 0
+## CHECK-DAG:     <<IOf:z\d+>>  InstanceOf
+## CHECK-DAG:                   NotEqual [<<IOf>>,<<Cst0>>]
+
+## CHECK-START: void SmaliTests.testInstanceOf_NE0_NotInlined(java.lang.Object) instruction_simplifier (before)
+## CHECK:         CheckCast
+
+## CHECK-START: void SmaliTests.testInstanceOf_NE0_NotInlined(java.lang.Object) instruction_simplifier (after)
+## CHECK-NOT:     CheckCast
+
+.method public static testInstanceOf_NE0_NotInlined(Ljava/lang/Object;)V
+  .registers 3
+
+  const v0, 0x0
+  instance-of v1, p0, LSubclassC;
+  if-ne v1, v0, :invoke
+  return-void
+
+  :invoke
+  check-cast p0, LSubclassC;
+  invoke-virtual {p0}, LSubclassC;->$noinline$g()V
+  return-void
+
+.end method
+
+## CHECK-START: void SmaliTests.testInstanceOf_NE1_NotInlined(java.lang.Object) builder (after)
+## CHECK-DAG:     <<Cst1:i\d+>> IntConstant 1
+## CHECK-DAG:     <<IOf:z\d+>>  InstanceOf
+## CHECK-DAG:                   NotEqual [<<IOf>>,<<Cst1>>]
+
+## CHECK-START: void SmaliTests.testInstanceOf_NE1_NotInlined(java.lang.Object) instruction_simplifier (before)
+## CHECK:         CheckCast
+
+## CHECK-START: void SmaliTests.testInstanceOf_NE1_NotInlined(java.lang.Object) instruction_simplifier (after)
+## CHECK-NOT:     CheckCast
+
+.method public static testInstanceOf_NE1_NotInlined(Ljava/lang/Object;)V
+  .registers 3
+
+  const v0, 0x1
+  instance-of v1, p0, LSubclassC;
+  if-ne v1, v0, :return
+
+  check-cast p0, LSubclassC;
+  invoke-virtual {p0}, LSubclassC;->$noinline$g()V
+
+  :return
+  return-void
+
+.end method
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index f1885de..6e453af 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -14,14 +14,27 @@
  * limitations under the License.
  */
 
-interface Interface {
+interface SuperInterface {
+  void superInterfaceMethod();
+}
+
+interface OtherInterface extends SuperInterface {
+}
+
+interface Interface extends SuperInterface {
   void $noinline$f();
 }
 
 class Super implements Interface {
+  public void superInterfaceMethod() {}
   public void $noinline$f() {
     throw new RuntimeException();
   }
+
+  public int $inline$h(boolean cond) {
+    Super obj = (cond ? this : null);
+    return obj.hashCode();
+  }
 }
 
 class SubclassA extends Super {
@@ -64,49 +77,49 @@
 
 public class Main {
 
-  /// CHECK-START: void Main.testSimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testSimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testSimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testSimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testSimpleRemove() {
     Super s = new SubclassA();
     ((SubclassA)s).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testSimpleKeep(Super) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testSimpleKeep(Super) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testSimpleKeep(Super) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testSimpleKeep(Super) instruction_simplifier (after)
   /// CHECK:         CheckCast
   public void testSimpleKeep(Super s) {
     ((SubclassA)s).$noinline$f();
   }
 
-  /// CHECK-START: java.lang.String Main.testClassRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: java.lang.String Main.testClassRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: java.lang.String Main.testClassRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: java.lang.String Main.testClassRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public String testClassRemove() {
     Object s = SubclassA.class;
-    return ((Class)s).getName();
+    return ((Class<?>)s).getName();
   }
 
-  /// CHECK-START: java.lang.String Main.testClassKeep() instruction_simplifier_after_types (before)
+  /// CHECK-START: java.lang.String Main.testClassKeep() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: java.lang.String Main.testClassKeep() instruction_simplifier_after_types (after)
+  /// CHECK-START: java.lang.String Main.testClassKeep() instruction_simplifier (after)
   /// CHECK:         CheckCast
   public String testClassKeep() {
     Object s = SubclassA.class;
     return ((SubclassA)s).$noinline$h();
   }
 
-  /// CHECK-START: void Main.testIfRemove(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testIfRemove(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testIfRemove(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testIfRemove(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testIfRemove(int x) {
     Super s;
@@ -118,10 +131,10 @@
     ((SubclassA)s).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testIfKeep(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testIfKeep(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testIfKeep(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testIfKeep(int) instruction_simplifier (after)
   /// CHECK:         CheckCast
   public void testIfKeep(int x) {
     Super s;
@@ -133,10 +146,10 @@
     ((SubclassA)s).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testForRemove(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testForRemove(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testForRemove(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testForRemove(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testForRemove(int x) {
     Super s = new SubclassA();
@@ -148,10 +161,10 @@
     ((SubclassA)s).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testForKeep(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testForKeep(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testForKeep(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testForKeep(int) instruction_simplifier (after)
   /// CHECK:         CheckCast
   public void testForKeep(int x) {
     Super s = new SubclassA();
@@ -163,10 +176,10 @@
     ((SubclassC)s).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testPhiFromCall(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testPhiFromCall(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testPhiFromCall(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testPhiFromCall(int) instruction_simplifier (after)
   /// CHECK:         CheckCast
   public void testPhiFromCall(int i) {
     Object x;
@@ -178,11 +191,12 @@
     ((SubclassC)x).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testInstanceOf(java.lang.Object) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOf(java.lang.Object) instruction_simplifier (before)
   /// CHECK:         CheckCast
   /// CHECK:         CheckCast
+  /// CHECK-NOT:     CheckCast
 
-  /// CHECK-START: void Main.testInstanceOf(java.lang.Object) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOf(java.lang.Object) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOf(Object o) {
     if (o instanceof SubclassC) {
@@ -193,11 +207,32 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfKeep(java.lang.Object) instruction_simplifier_after_types (before)
+  public static boolean $inline$InstanceofSubclassB(Object o) { return o instanceof SubclassB; }
+  public static boolean $inline$InstanceofSubclassC(Object o) { return o instanceof SubclassC; }
+
+  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) inliner (after)
+  /// CHECK-DAG:     <<IOf:z\d+>>  InstanceOf
+  /// CHECK-DAG:                   If [<<IOf>>]
+
+  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier$after_bce (before)
+  /// CHECK:         CheckCast
+  /// CHECK-NOT:     CheckCast
+
+  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:     CheckCast
+  public void testInstanceOf_Inlined(Object o) {
+    if (!$inline$InstanceofSubclassC(o)) {
+      // Empty branch to flip the condition.
+    } else {
+      ((SubclassC)o).$noinline$g();
+    }
+  }
+
+  /// CHECK-START: void Main.testInstanceOfKeep(java.lang.Object) instruction_simplifier (before)
   /// CHECK:         CheckCast
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfKeep(java.lang.Object) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfKeep(java.lang.Object) instruction_simplifier (after)
   /// CHECK:         CheckCast
   /// CHECK:         CheckCast
   public void testInstanceOfKeep(Object o) {
@@ -209,11 +244,11 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfNested(java.lang.Object) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfNested(java.lang.Object) instruction_simplifier (before)
   /// CHECK:         CheckCast
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfNested(java.lang.Object) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfNested(java.lang.Object) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfNested(Object o) {
     if (o instanceof SubclassC) {
@@ -225,10 +260,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfWithPhi(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfWithPhi(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfWithPhi(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfWithPhi(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfWithPhi(int i) {
     Object o;
@@ -243,10 +278,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfInFor(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfInFor(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfInFor(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfInFor(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfInFor(int n) {
     Object o = new SubclassA();
@@ -260,10 +295,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfSubclass() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfSubclass() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfSubclass() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfSubclass() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfSubclass() {
     Object o = new SubclassA();
@@ -272,10 +307,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfWithPhiSubclass(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfWithPhiSubclass(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfWithPhiSubclass(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfWithPhiSubclass(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfWithPhiSubclass(int i) {
     Object o;
@@ -290,10 +325,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfWithPhiTop(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfWithPhiTop(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfWithPhiTop(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfWithPhiTop(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfWithPhiTop(int i) {
     Object o;
@@ -308,10 +343,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfSubclassInFor(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfSubclassInFor(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfSubclassInFor(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfSubclassInFor(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfSubclassInFor(int n) {
     Object o = new SubclassA();
@@ -325,10 +360,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfTopInFor(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfTopInFor(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfTopInFor(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfTopInFor(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfTopInFor(int n) {
     Object o = new SubclassA();
@@ -353,10 +388,10 @@
   public SubclassA a = new SubclassA();
   public static SubclassA b = new SubclassA();
 
-  /// CHECK-START: void Main.testInstanceFieldGetSimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceFieldGetSimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceFieldGetSimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceFieldGetSimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceFieldGetSimpleRemove() {
     Main m = new Main();
@@ -364,10 +399,10 @@
     ((SubclassA)a).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testStaticFieldGetSimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testStaticFieldGetSimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testStaticFieldGetSimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testStaticFieldGetSimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testStaticFieldGetSimpleRemove() {
     Super b = Main.b;
@@ -376,36 +411,36 @@
 
   public SubclassA $noinline$getSubclass() { throw new RuntimeException(); }
 
-  /// CHECK-START: void Main.testArraySimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testArraySimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testArraySimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testArraySimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testArraySimpleRemove() {
     Super[] b = new SubclassA[10];
     SubclassA[] c = (SubclassA[])b;
   }
 
-  /// CHECK-START: void Main.testInvokeSimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInvokeSimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInvokeSimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInvokeSimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInvokeSimpleRemove() {
     Super b = $noinline$getSubclass();
     ((SubclassA)b).$noinline$g();
   }
-  /// CHECK-START: void Main.testArrayGetSimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testArrayGetSimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testArrayGetSimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testArrayGetSimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testArrayGetSimpleRemove() {
     Super[] a = new SubclassA[10];
     ((SubclassA)a[0]).$noinline$g();
   }
 
-  /// CHECK-START: int Main.testLoadExceptionInCatchNonExact(int, int) reference_type_propagation (after)
+  /// CHECK-START: int Main.testLoadExceptionInCatchNonExact(int, int) builder (after)
   /// CHECK:         LoadException klass:java.lang.ArithmeticException can_be_null:false exact:false
   public int testLoadExceptionInCatchNonExact(int x, int y) {
     try {
@@ -415,7 +450,7 @@
     }
   }
 
-  /// CHECK-START: int Main.testLoadExceptionInCatchExact(int) reference_type_propagation (after)
+  /// CHECK-START: int Main.testLoadExceptionInCatchExact(int) builder (after)
   /// CHECK:         LoadException klass:FinalException can_be_null:false exact:true
   public int testLoadExceptionInCatchExact(int x) {
     try {
@@ -429,7 +464,7 @@
     }
   }
 
-  /// CHECK-START: int Main.testLoadExceptionInCatchAll(int, int) reference_type_propagation (after)
+  /// CHECK-START: int Main.testLoadExceptionInCatchAll(int, int) builder (after)
   /// CHECK:         LoadException klass:java.lang.Throwable can_be_null:false exact:false
   public int testLoadExceptionInCatchAll(int x, int y) {
     try {
@@ -450,11 +485,11 @@
     return genericFinal.get();
   }
 
-  /// CHECK-START: SubclassC Main.inlineGenerics() reference_type_propagation (after)
+  /// CHECK-START: SubclassC Main.inlineGenerics() builder (after)
   /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:SubclassC exact:false
   /// CHECK-NEXT:                    Return [<<Invoke>>]
 
-  /// CHECK-START: SubclassC Main.inlineGenerics() reference_type_propagation_after_inlining (after)
+  /// CHECK-START: SubclassC Main.inlineGenerics() inliner (after)
   /// CHECK:      <<BoundType:l\d+>> BoundType klass:SubclassC exact:false
   /// CHECK:                         Return [<<BoundType>>]
   private SubclassC inlineGenerics() {
@@ -462,11 +497,11 @@
     return c;
   }
 
-  /// CHECK-START: Final Main.inlineGenericsFinal() reference_type_propagation (after)
+  /// CHECK-START: Final Main.inlineGenericsFinal() builder (after)
   /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:Final exact:true
   /// CHECK-NEXT:                    Return [<<Invoke>>]
 
-  /// CHECK-START: Final Main.inlineGenericsFinal() reference_type_propagation_after_inlining (after)
+  /// CHECK-START: Final Main.inlineGenericsFinal() inliner (after)
   /// CHECK:      <<BoundType:l\d+>> BoundType klass:Final exact:true
   /// CHECK:                         Return [<<BoundType>>]
   private Final inlineGenericsFinal() {
@@ -474,7 +509,7 @@
     return f;
   }
 
-  /// CHECK-START: void Main.boundOnlyOnceIfNotNull(java.lang.Object) reference_type_propagation_after_inlining (after)
+  /// CHECK-START: void Main.boundOnlyOnceIfNotNull(java.lang.Object) inliner (after)
   /// CHECK:      BoundType
   /// CHECK-NOT:  BoundType
   private void boundOnlyOnceIfNotNull(Object o) {
@@ -483,7 +518,7 @@
     }
   }
 
-  /// CHECK-START: void Main.boundOnlyOnceIfInstanceOf(java.lang.Object) reference_type_propagation_after_inlining (after)
+  /// CHECK-START: void Main.boundOnlyOnceIfInstanceOf(java.lang.Object) inliner (after)
   /// CHECK:      BoundType
   /// CHECK-NOT:  BoundType
   private void boundOnlyOnceIfInstanceOf(Object o) {
@@ -492,7 +527,7 @@
     }
   }
 
-  /// CHECK-START: Final Main.boundOnlyOnceCheckCast(Generic) reference_type_propagation_after_inlining (after)
+  /// CHECK-START: Final Main.boundOnlyOnceCheckCast(Generic) inliner (after)
   /// CHECK:      BoundType
   /// CHECK-NOT:  BoundType
   private Final boundOnlyOnceCheckCast(Generic<Final> o) {
@@ -504,11 +539,11 @@
     return new SubclassA();
   }
 
-  /// CHECK-START: void Main.updateNodesInTheSameBlockAsPhi(boolean) reference_type_propagation (after)
+  /// CHECK-START: void Main.updateNodesInTheSameBlockAsPhi(boolean) builder (after)
   /// CHECK:      <<Phi:l\d+>> Phi klass:Super
   /// CHECK:                   NullCheck [<<Phi>>] klass:Super
 
-  /// CHECK-START: void Main.updateNodesInTheSameBlockAsPhi(boolean) reference_type_propagation_after_inlining (after)
+  /// CHECK-START: void Main.updateNodesInTheSameBlockAsPhi(boolean) inliner (after)
   /// CHECK:      <<Phi:l\d+>> Phi klass:SubclassA
   /// CHECK:                   NullCheck [<<Phi>>] klass:SubclassA
   private void updateNodesInTheSameBlockAsPhi(boolean cond) {
@@ -519,14 +554,14 @@
     s.$noinline$f();
   }
 
-  /// CHECK-START: java.lang.String Main.checkcastPreserveNullCheck(java.lang.Object) reference_type_propagation_after_inlining (after)
+  /// CHECK-START: java.lang.String Main.checkcastPreserveNullCheck(java.lang.Object) inliner (after)
   /// CHECK:      <<This:l\d+>>     ParameterValue
   /// CHECK:      <<Param:l\d+>>    ParameterValue
   /// CHECK:      <<Clazz:l\d+>>    LoadClass
   /// CHECK:                        CheckCast [<<Param>>,<<Clazz>>]
   /// CHECK:                        BoundType [<<Param>>] can_be_null:true
 
-  /// CHECK-START: java.lang.String Main.checkcastPreserveNullCheck(java.lang.Object) instruction_simplifier_after_types (after)
+  /// CHECK-START: java.lang.String Main.checkcastPreserveNullCheck(java.lang.Object) instruction_simplifier (after)
   /// CHECK:      <<This:l\d+>>     ParameterValue
   /// CHECK:      <<Param:l\d+>>    ParameterValue
   /// CHECK:      <<Clazz:l\d+>>    LoadClass
@@ -538,7 +573,7 @@
   }
 
 
-  /// CHECK-START: void Main.argumentCheck(Super, double, SubclassA, Final) reference_type_propagation (after)
+  /// CHECK-START: void Main.argumentCheck(Super, double, SubclassA, Final) builder (after)
   /// CHECK:      ParameterValue klass:Main can_be_null:false exact:false
   /// CHECK:      ParameterValue klass:Super can_be_null:true exact:false
   /// CHECK:      ParameterValue
@@ -548,6 +583,143 @@
   private void argumentCheck(Super s, double d, SubclassA a, Final f) {
   }
 
+  private Main getNull() {
+    return null;
+  }
+
+  private int mainField = 0;
+
+  /// CHECK-START: SuperInterface Main.getWiderType(boolean, Interface, OtherInterface) builder (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:                         Return [<<Phi>>]
+  private SuperInterface getWiderType(boolean cond, Interface a, OtherInterface b) {
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: void Main.testInlinerWidensReturnType(boolean, Interface, OtherInterface) inliner (before)
+  /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:SuperInterface
+  /// CHECK:      <<NullCheck:l\d+>> NullCheck [<<Invoke>>] klass:SuperInterface exact:false
+  /// CHECK:                         InvokeInterface [<<NullCheck>>]
+
+  /// CHECK-START: void Main.testInlinerWidensReturnType(boolean, Interface, OtherInterface) inliner (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:      <<NullCheck:l\d+>> NullCheck [<<Phi>>] klass:SuperInterface exact:false
+  /// CHECK:                         InvokeInterface [<<NullCheck>>]
+  private void testInlinerWidensReturnType(boolean cond, Interface a, OtherInterface b) {
+    getWiderType(cond, a, b).superInterfaceMethod();
+  }
+
+  /// CHECK-START: void Main.testInlinerReturnsNull() inliner (before)
+  /// CHECK:      <<Int:i\d+>>       IntConstant 0
+  /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:Main
+  /// CHECK:      <<NullCheck:l\d+>> NullCheck [<<Invoke>>] klass:Main exact:false
+  /// CHECK:                         InstanceFieldSet [<<NullCheck>>,<<Int>>]
+
+  /// CHECK-START: void Main.testInlinerReturnsNull() inliner (after)
+  /// CHECK:      <<Int:i\d+>>       IntConstant 0
+  /// CHECK:      <<Null:l\d+>>      NullConstant klass:java.lang.Object
+  /// CHECK:      <<NullCheck:l\d+>> NullCheck [<<Null>>] klass:Main exact:false
+  /// CHECK:                         InstanceFieldSet [<<NullCheck>>,<<Int>>]
+  private void testInlinerReturnsNull() {
+    Main o = getNull();
+    o.mainField = 0;
+  }
+
+  /// CHECK-START: void Main.testThisArgumentMoreSpecific(boolean) inliner (before)
+  /// CHECK-DAG:     <<Arg:l\d+>>   NewInstance
+  /// CHECK-DAG:                    InvokeVirtual [<<Arg>>,{{z\d+}}] method_name:Super.$inline$h
+
+  /// CHECK-START: void Main.testThisArgumentMoreSpecific(boolean) inliner (after)
+  /// CHECK-DAG:     <<Arg:l\d+>>   NewInstance
+  /// CHECK-DAG:     <<Null:l\d+>>  NullConstant
+  /// CHECK-DAG:     <<Phi:l\d+>>   Phi [<<Arg>>,<<Null>>] klass:SubclassA
+  /// CHECK-DAG:     <<NCPhi:l\d+>> NullCheck [<<Phi>>]
+  /// CHECK-DAG:                    InvokeVirtual [<<NCPhi>>] method_name:Super.hashCode
+
+  public void testThisArgumentMoreSpecific(boolean cond) {
+    // Inlining method from Super will build it with `this` typed as Super.
+    // Running RTP will sharpen it to SubclassA.
+    SubclassA obj = new SubclassA();
+    ((Super) obj).$inline$h(cond);
+  }
+
+  public static int $inline$hashCode(Super obj) {
+    return obj.hashCode();
+  }
+
+  /// CHECK-START: void Main.testExplicitArgumentMoreSpecific(SubclassA) inliner (before)
+  /// CHECK-DAG:     <<Arg:l\d+>>   ParameterValue klass:SubclassA
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                    InvokeStaticOrDirect [<<Arg>>{{(,[ij]\d+)?}}] method_name:Main.$inline$hashCode
+
+  /// CHECK-START: void Main.testExplicitArgumentMoreSpecific(SubclassA) inliner (after)
+  /// CHECK-DAG:     <<Arg:l\d+>>   ParameterValue klass:SubclassA
+  /// CHECK-DAG:     <<NCArg:l\d+>> NullCheck [<<Arg>>] klass:SubclassA
+  /// CHECK-DAG:                    InvokeVirtual [<<NCArg>>] method_name:Super.hashCode
+
+  public void testExplicitArgumentMoreSpecific(SubclassA obj) {
+    // Inlining a method will build it with reference types from its signature,
+    // here the callee graph is built with Super as the type of its only argument.
+    // Running RTP after its ParameterValue instructions are replaced with actual
+    // arguments will type the inner graph more precisely.
+    $inline$hashCode(obj);
+  }
+
+  /// CHECK-START: void Main.testPhiHasOnlyNullInputs(boolean) inliner (before)
+  /// CHECK:      <<Int:i\d+>>       IntConstant 0
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:Main exact:false
+  /// CHECK:      <<NullCheck:l\d+>> NullCheck [<<Phi>>] klass:Main exact:false
+  /// CHECK:                         InstanceFieldSet [<<NullCheck>>,<<Int>>]
+
+  /// CHECK-START: void Main.testPhiHasOnlyNullInputs(boolean) inliner (after)
+  /// CHECK:      <<Int:i\d+>>       IntConstant 0
+  /// CHECK:      <<Null:l\d+>>      NullConstant klass:java.lang.Object
+  /// CHECK:      <<Phi:l\d+>>       Phi [<<Null>>,<<Null>>] klass:java.lang.Object exact:false
+  /// CHECK:      <<NullCheck:l\d+>> NullCheck [<<Phi>>] klass:java.lang.Object exact:false
+  /// CHECK:                         InstanceFieldSet [<<NullCheck>>,<<Int>>]
+  private void testPhiHasOnlyNullInputs(boolean cond) {
+    Main o = cond ? null : getNull();
+    o.mainField = 0;
+    // getSuper() will force a type propagation after inlining
+    // because returns a more precise type.
+    getSuper();
+  }
+
+  /// CHECK-START: void Main.testLoopPhiWithNullFirstInput(boolean) builder (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<Main:l\d+>>      NewInstance klass:Main exact:true
+  /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<LoopPhi>>,<<Main>>] klass:Main exact:true
+  private void testLoopPhiWithNullFirstInput(boolean cond) {
+    Main a = null;
+    while (a == null) {
+      if (cond) {
+        a = new Main();
+      }
+    }
+  }
+
+  /// CHECK-START: java.lang.Object[] Main.testInstructionsWithUntypedParent() builder (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<Phi:l\d+>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<Array:l\d+>>     NewArray klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<Phi>>            Phi [<<Array>>,<<LoopPhi>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<NC:l\d+>>        NullCheck [<<LoopPhi>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:                     ArrayGet [<<NC>>,{{i\d+}}] klass:java.lang.Object exact:false
+  private Object[] testInstructionsWithUntypedParent() {
+    Object[] array = null;
+    boolean cond = true;
+    for (int i = 0; i < 10; ++i) {
+      if (cond) {
+        array = new Object[10];
+        array[0] = new Object();
+        cond = false;
+      } else {
+        array[i] = array[0];
+      }
+    }
+    return array;
+  }
+
   public static void main(String[] args) {
   }
 }
diff --git a/test/454-get-vreg/build b/test/454-get-vreg/build
deleted file mode 100644
index 08987b5..0000000
--- a/test/454-get-vreg/build
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Stop if something fails.
-set -e
-
-# The test relies on DEX file produced by javac+dx so keep building with them for now
-# (see b/19467889)
-mkdir classes
-${JAVAC} -d classes `find src -name '*.java'`
-${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex \
-  --dump-width=1000 ${DX_FLAGS} classes
-zip $TEST_NAME.jar classes.dex
diff --git a/test/455-checker-gvn/expected.txt b/test/455-checker-gvn/expected.txt
index 8351c19..c1679c7 100644
--- a/test/455-checker-gvn/expected.txt
+++ b/test/455-checker-gvn/expected.txt
@@ -1 +1,3 @@
 14
+0
+10
diff --git a/test/455-checker-gvn/src/Main.java b/test/455-checker-gvn/src/Main.java
index 9824f27..cea0959 100644
--- a/test/455-checker-gvn/src/Main.java
+++ b/test/455-checker-gvn/src/Main.java
@@ -15,8 +15,14 @@
  */
 
 public class Main {
+
+  private static int mX = 2;
+  private static int mY = -3;
+
   public static void main(String[] args) {
     System.out.println(foo(3, 4));
+    System.out.println(mulAndIntrinsic());
+    System.out.println(directIntrinsic(-5));
   }
 
   /// CHECK-START: int Main.foo(int, int) GVN (before)
@@ -35,7 +41,50 @@
     return sum1 + sum2;
   }
 
-  public static long bar(int i) {
-    return i;
+  /// CHECK-START: int Main.mulAndIntrinsic() GVN (before)
+  /// CHECK: StaticFieldGet
+  /// CHECK: StaticFieldGet
+  /// CHECK: Mul
+  /// CHECK: InvokeStaticOrDirect
+  /// CHECK: StaticFieldGet
+  /// CHECK: StaticFieldGet
+  /// CHECK: Mul
+  /// CHECK: Add
+
+  /// CHECK-START: int Main.mulAndIntrinsic() GVN (after)
+  /// CHECK: StaticFieldGet
+  /// CHECK: StaticFieldGet
+  /// CHECK: Mul
+  /// CHECK: InvokeStaticOrDirect
+  /// CHECK-NOT: StaticFieldGet
+  /// CHECK-NOT: StaticFieldGet
+  /// CHECK-NOT: Mul
+  /// CHECK: Add
+
+  public static int mulAndIntrinsic() {
+    // The intermediate call to abs() does not kill
+    // the common subexpression on the multiplication.
+    int mul1 = mX * mY;
+    int abs  = Math.abs(mul1);
+    int mul2 = mY * mX;
+    return abs + mul2;
   }
+
+  /// CHECK-START: int Main.directIntrinsic(int) GVN (before)
+  /// CHECK: InvokeStaticOrDirect
+  /// CHECK: InvokeStaticOrDirect
+  /// CHECK: Add
+
+  /// CHECK-START: int Main.directIntrinsic(int) GVN (after)
+  /// CHECK: InvokeStaticOrDirect
+  /// CHECK-NOT: InvokeStaticOrDirect
+  /// CHECK: Add
+
+  public static int directIntrinsic(int x) {
+    // Here, the two calls to abs() themselves can be replaced with just one.
+    int abs1 = Math.abs(x);
+    int abs2 = Math.abs(x);
+    return abs1 + abs2;
+  }
+
 }
diff --git a/test/455-set-vreg/info.txt b/test/455-set-vreg/info.txt
deleted file mode 100644
index e8c57b5..0000000
--- a/test/455-set-vreg/info.txt
+++ /dev/null
@@ -1 +0,0 @@
-Tests for setting DEX registers in a Java method.
diff --git a/test/455-set-vreg/set_vreg_jni.cc b/test/455-set-vreg/set_vreg_jni.cc
deleted file mode 100644
index 21149f6..0000000
--- a/test/455-set-vreg/set_vreg_jni.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "arch/context.h"
-#include "art_method-inl.h"
-#include "jni.h"
-#include "scoped_thread_state_change.h"
-#include "stack.h"
-#include "thread.h"
-
-namespace art {
-
-namespace {
-
-class TestVisitor : public StackVisitor {
- public:
-  TestVisitor(Thread* thread, Context* context, mirror::Object* this_value)
-      SHARED_REQUIRES(Locks::mutator_lock_)
-      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
-        this_value_(this_value) {}
-
-  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
-    ArtMethod* m = GetMethod();
-    std::string m_name(m->GetName());
-
-    if (m_name.compare("testIntVReg") == 0) {
-      uint32_t value = 0;
-      CHECK(GetVReg(m, 1, kReferenceVReg, &value));
-      CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
-
-      CHECK(SetVReg(m, 2, 5, kIntVReg));
-      CHECK(SetVReg(m, 3, 4, kIntVReg));
-      CHECK(SetVReg(m, 4, 3, kIntVReg));
-      CHECK(SetVReg(m, 5, 2, kIntVReg));
-      CHECK(SetVReg(m, 6, 1, kIntVReg));
-    } else if (m_name.compare("testLongVReg") == 0) {
-      uint32_t value = 0;
-      CHECK(GetVReg(m, 3, kReferenceVReg, &value));
-      CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
-
-      CHECK(SetVRegPair(m, 4, std::numeric_limits<int64_t>::max(), kLongLoVReg, kLongHiVReg));
-      CHECK(SetVRegPair(m, 6, 4, kLongLoVReg, kLongHiVReg));
-      CHECK(SetVRegPair(m, 8, 3, kLongLoVReg, kLongHiVReg));
-      CHECK(SetVRegPair(m, 10, 2, kLongLoVReg, kLongHiVReg));
-      CHECK(SetVRegPair(m, 12, 1, kLongLoVReg, kLongHiVReg));
-    } else if (m_name.compare("testFloatVReg") == 0) {
-      uint32_t value = 0;
-      CHECK(GetVReg(m, 1, kReferenceVReg, &value));
-      CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
-
-      CHECK(SetVReg(m, 2, bit_cast<uint32_t, float>(5.0f), kFloatVReg));
-      CHECK(SetVReg(m, 3, bit_cast<uint32_t, float>(4.0f), kFloatVReg));
-      CHECK(SetVReg(m, 4, bit_cast<uint32_t, float>(3.0f), kFloatVReg));
-      CHECK(SetVReg(m, 5, bit_cast<uint32_t, float>(2.0f), kFloatVReg));
-      CHECK(SetVReg(m, 6, bit_cast<uint32_t, float>(1.0f), kFloatVReg));
-    } else if (m_name.compare("testDoubleVReg") == 0) {
-      uint32_t value = 0;
-      CHECK(GetVReg(m, 3, kReferenceVReg, &value));
-      CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
-
-      CHECK(SetVRegPair(m, 4, bit_cast<uint64_t, double>(5.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 6, bit_cast<uint64_t, double>(4.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 8, bit_cast<uint64_t, double>(3.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 10, bit_cast<uint64_t, double>(2.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 12, bit_cast<uint64_t, double>(1.0), kDoubleLoVReg, kDoubleHiVReg));
-    }
-
-    return true;
-  }
-
-  mirror::Object* this_value_;
-};
-
-extern "C" JNIEXPORT void JNICALL Java_Main_doNativeCallSetVReg(JNIEnv*, jobject value) {
-  ScopedObjectAccess soa(Thread::Current());
-  std::unique_ptr<Context> context(Context::Create());
-  TestVisitor visitor(soa.Self(), context.get(), soa.Decode<mirror::Object*>(value));
-  visitor.WalkStack();
-}
-
-}  // namespace
-
-}  // namespace art
diff --git a/test/455-set-vreg/src/Main.java b/test/455-set-vreg/src/Main.java
deleted file mode 100644
index 4db9d66..0000000
--- a/test/455-set-vreg/src/Main.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-public class Main {
-  public Main() {
-  }
-
-  int testIntVReg(int a, int b, int c, int d, int e) {
-    doNativeCallSetVReg();
-    return a - b - c - d - e;
-  }
-
-  long testLongVReg(long a, long b, long c, long d, long e) {
-    doNativeCallSetVReg();
-    return a - b - c - d - e;
-  }
-
-  float testFloatVReg(float a, float b, float c, float d, float e) {
-    doNativeCallSetVReg();
-    return a - b - c - d - e;
-  }
-
-  double testDoubleVReg(double a, double b, double c, double d, double e) {
-    doNativeCallSetVReg();
-    return a - b - c - d - e;
-  }
-
-  native void doNativeCallSetVReg();
-
-  public static void main(String[] args) {
-    System.loadLibrary(args[0]);
-    Main rm = new Main();
-    int intExpected = 5 - 4 - 3 - 2 - 1;
-    int intResult = rm.testIntVReg(0, 0, 0, 0, 0);
-    if (intResult != intExpected) {
-      throw new Error("Expected " + intExpected + ", got " + intResult);
-    }
-
-    long longExpected = Long.MAX_VALUE - 4 - 3 - 2 - 1;
-    long longResult = rm.testLongVReg(0, 0, 0, 0, 0);
-    if (longResult != longExpected) {
-      throw new Error("Expected " + longExpected + ", got " + longResult);
-    }
-
-    float floatExpected = 5.0f - 4.0f - 3.0f - 2.0f - 1.0f;
-    float floatResult = rm.testFloatVReg(0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
-    if (floatResult != floatExpected) {
-      throw new Error("Expected " + floatExpected + ", got " + floatResult);
-    }
-
-    double doubleExpected = 5.0 - 4.0 - 3.0 - 2.0 - 1.0;
-    double doubleResult = rm.testDoubleVReg(0.0, 0.0, 0.0, 0.0, 0.0);
-    if (doubleResult != doubleExpected) {
-      throw new Error("Expected " + doubleExpected + ", got " + doubleResult);
-    }
-  }
-}
diff --git a/test/457-regs/regs_jni.cc b/test/457-regs/regs_jni.cc
index 64b2336..79fa8b0 100644
--- a/test/457-regs/regs_jni.cc
+++ b/test/457-regs/regs_jni.cc
@@ -68,7 +68,7 @@
         CHECK(!success);
       }
 
-      CHECK(GetVReg(m, 3, kReferenceVReg, &value));
+      CHECK(GetVReg(m, 3, kIntVReg, &value));
       CHECK_EQ(value, 1u);
 
       CHECK(GetVReg(m, 4, kFloatVReg, &value));
diff --git a/test/458-checker-instruction-simplification/smali/SmaliTests.smali b/test/458-checker-instruction-simplification/smali/SmaliTests.smali
new file mode 100644
index 0000000..6845961
--- /dev/null
+++ b/test/458-checker-instruction-simplification/smali/SmaliTests.smali
@@ -0,0 +1,329 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LSmaliTests;
+.super Ljava/lang/Object;
+
+## CHECK-START: int SmaliTests.EqualTrueRhs(boolean) instruction_simplifier (before)
+## CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+## CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Arg>>,<<Const1>>]
+## CHECK-DAG:                       If [<<Cond>>]
+
+## CHECK-START: int SmaliTests.EqualTrueRhs(boolean) instruction_simplifier (after)
+## CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+## CHECK-DAG:                       If [<<Arg>>]
+
+.method public static EqualTrueRhs(Z)I
+  .registers 3
+
+  const v0, 0x1
+  const v1, 0x5
+  if-eq p0, v0, :return
+  const v1, 0x3
+  :return
+  return v1
+
+.end method
+
+## CHECK-START: int SmaliTests.EqualTrueLhs(boolean) instruction_simplifier (before)
+## CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+## CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Const1>>,<<Arg>>]
+## CHECK-DAG:                       If [<<Cond>>]
+
+## CHECK-START: int SmaliTests.EqualTrueLhs(boolean) instruction_simplifier (after)
+## CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+## CHECK-DAG:                       If [<<Arg>>]
+
+.method public static EqualTrueLhs(Z)I
+  .registers 3
+
+  const v0, 0x1
+  const v1, 0x5
+  if-eq v0, p0, :return
+  const v1, 0x3
+  :return
+  return v1
+
+.end method
+
+## CHECK-START: int SmaliTests.EqualFalseRhs(boolean) instruction_simplifier (before)
+## CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Arg>>,<<Const0>>]
+## CHECK-DAG:                       If [<<Cond>>]
+
+## CHECK-START: int SmaliTests.EqualFalseRhs(boolean) instruction_simplifier (after)
+## CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+## CHECK-DAG:                       If [<<Arg>>]
+
+.method public static EqualFalseRhs(Z)I
+  .registers 3
+
+  const v0, 0x0
+  const v1, 0x3
+  if-eq p0, v0, :return
+  const v1, 0x5
+  :return
+  return v1
+
+.end method
+
+## CHECK-START: int SmaliTests.EqualFalseLhs(boolean) instruction_simplifier (before)
+## CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Const0>>,<<Arg>>]
+## CHECK-DAG:                       If [<<Cond>>]
+
+## CHECK-START: int SmaliTests.EqualFalseLhs(boolean) instruction_simplifier (after)
+## CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+## CHECK-DAG:                       If [<<Arg>>]
+
+.method public static EqualFalseLhs(Z)I
+  .registers 3
+
+  const v0, 0x0
+  const v1, 0x3
+  if-eq v0, p0, :return
+  const v1, 0x5
+  :return
+  return v1
+
+.end method
+
+## CHECK-START: int SmaliTests.NotEqualTrueRhs(boolean) instruction_simplifier (before)
+## CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+## CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Arg>>,<<Const1>>]
+## CHECK-DAG:                       If [<<Cond>>]
+
+## CHECK-START: int SmaliTests.NotEqualTrueRhs(boolean) instruction_simplifier (after)
+## CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+## CHECK-DAG:                       If [<<Arg>>]
+
+.method public static NotEqualTrueRhs(Z)I
+  .registers 3
+
+  const v0, 0x1
+  const v1, 0x3
+  if-ne p0, v0, :return
+  const v1, 0x5
+  :return
+  return v1
+
+.end method
+
+## CHECK-START: int SmaliTests.NotEqualTrueLhs(boolean) instruction_simplifier (before)
+## CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+## CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Const1>>,<<Arg>>]
+## CHECK-DAG:                       If [<<Cond>>]
+
+## CHECK-START: int SmaliTests.NotEqualTrueLhs(boolean) instruction_simplifier (after)
+## CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+## CHECK-DAG:                       If [<<Arg>>]
+
+.method public static NotEqualTrueLhs(Z)I
+  .registers 3
+
+  const v0, 0x1
+  const v1, 0x3
+  if-ne v0, p0, :return
+  const v1, 0x5
+  :return
+  return v1
+
+.end method
+
+## CHECK-START: int SmaliTests.NotEqualFalseRhs(boolean) instruction_simplifier (before)
+## CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Arg>>,<<Const0>>]
+## CHECK-DAG:                       If [<<Cond>>]
+
+## CHECK-START: int SmaliTests.NotEqualFalseRhs(boolean) instruction_simplifier (after)
+## CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+## CHECK-DAG:                       If [<<Arg>>]
+
+.method public static NotEqualFalseRhs(Z)I
+  .registers 3
+
+  const v0, 0x0
+  const v1, 0x5
+  if-ne p0, v0, :return
+  const v1, 0x3
+  :return
+  return v1
+
+.end method
+
+## CHECK-START: int SmaliTests.NotEqualFalseLhs(boolean) instruction_simplifier (before)
+## CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Const0>>,<<Arg>>]
+## CHECK-DAG:                       If [<<Cond>>]
+
+## CHECK-START: int SmaliTests.NotEqualFalseLhs(boolean) instruction_simplifier (after)
+## CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+## CHECK-DAG:                       If [<<Arg>>]
+
+.method public static NotEqualFalseLhs(Z)I
+  .registers 3
+
+  const v0, 0x0
+  const v1, 0x5
+  if-ne v0, p0, :return
+  const v1, 0x3
+  :return
+  return v1
+
+.end method
+
+## CHECK-START: int SmaliTests.AddSubConst(int) instruction_simplifier (before)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const7:i\d+>>    IntConstant 7
+## CHECK-DAG:     <<Const8:i\d+>>    IntConstant 8
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<Const7>>]
+## CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Add>>,<<Const8>>]
+## CHECK-DAG:                        Return [<<Sub>>]
+
+## CHECK-START: int SmaliTests.AddSubConst(int) instruction_simplifier (after)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<ConstM1:i\d+>>   IntConstant -1
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<ConstM1>>]
+## CHECK-DAG:                        Return [<<Add>>]
+
+.method public static AddSubConst(I)I
+    .registers 3
+
+    .prologue
+    add-int/lit8 v0, p0, 7
+
+    const/16 v1, 8
+
+    sub-int v0, v0, v1
+
+    return v0
+.end method
+
+## CHECK-START: int SmaliTests.SubAddConst(int) instruction_simplifier (before)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const3:i\d+>>    IntConstant 3
+## CHECK-DAG:     <<Const4:i\d+>>    IntConstant 4
+## CHECK-DAG:     <<Sub:i\d+>>       Sub [<<ArgValue>>,<<Const3>>]
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<Sub>>,<<Const4>>]
+## CHECK-DAG:                        Return [<<Add>>]
+
+## CHECK-START: int SmaliTests.SubAddConst(int) instruction_simplifier (after)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<Const1>>]
+## CHECK-DAG:                        Return [<<Add>>]
+
+.method public static SubAddConst(I)I
+    .registers 2
+
+    .prologue
+    const/4 v0, 3
+
+    sub-int v0, p0, v0
+
+    add-int/lit8 v0, v0, 4
+
+    return v0
+.end method
+
+## CHECK-START: int SmaliTests.SubSubConst1(int) instruction_simplifier (before)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const9:i\d+>>    IntConstant 9
+## CHECK-DAG:     <<Const10:i\d+>>   IntConstant 10
+## CHECK-DAG:     <<Sub1:i\d+>>      Sub [<<ArgValue>>,<<Const9>>]
+## CHECK-DAG:     <<Sub2:i\d+>>      Sub [<<Sub1>>,<<Const10>>]
+## CHECK-DAG:                        Return [<<Sub2>>]
+
+## CHECK-START: int SmaliTests.SubSubConst1(int) instruction_simplifier (after)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<ConstM19:i\d+>>  IntConstant -19
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<ConstM19>>]
+## CHECK-DAG:                        Return [<<Add>>]
+
+.method public static SubSubConst1(I)I
+    .registers 3
+
+    .prologue
+    const/16 v1, 9
+
+    sub-int v0, p0, v1
+
+    const/16 v1, 10
+
+    sub-int v0, v0, v1
+
+    return v0
+.end method
+
+## CHECK-START: int SmaliTests.SubSubConst2(int) instruction_simplifier (before)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const11:i\d+>>   IntConstant 11
+## CHECK-DAG:     <<Const12:i\d+>>   IntConstant 12
+## CHECK-DAG:     <<Sub1:i\d+>>      Sub [<<Const11>>,<<ArgValue>>]
+## CHECK-DAG:     <<Sub2:i\d+>>      Sub [<<Sub1>>,<<Const12>>]
+## CHECK-DAG:                        Return [<<Sub2>>]
+
+## CHECK-START: int SmaliTests.SubSubConst2(int) instruction_simplifier (after)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<ConstM1:i\d+>>   IntConstant -1
+## CHECK-DAG:     <<Sub:i\d+>>       Sub [<<ConstM1>>,<<ArgValue>>]
+## CHECK-DAG:                        Return [<<Sub>>]
+
+.method public static SubSubConst2(I)I
+    .registers 3
+
+    .prologue
+    rsub-int/lit8 v0, p0, 11
+
+    const/16 v1, 12
+
+    sub-int v0, v0, v1
+
+    return v0
+.end method
+
+## CHECK-START: int SmaliTests.SubSubConst3(int) instruction_simplifier (before)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const15:i\d+>>   IntConstant 15
+## CHECK-DAG:     <<Const16:i\d+>>   IntConstant 16
+## CHECK-DAG:     <<Sub1:i\d+>>      Sub [<<ArgValue>>,<<Const16>>]
+## CHECK-DAG:     <<Sub2:i\d+>>      Sub [<<Const15>>,<<Sub1>>]
+## CHECK-DAG:                        Return [<<Sub2>>]
+
+## CHECK-START: int SmaliTests.SubSubConst3(int) instruction_simplifier (after)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const31:i\d+>>   IntConstant 31
+## CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Const31>>,<<ArgValue>>]
+## CHECK-DAG:                        Return [<<Sub>>]
+
+.method public static SubSubConst3(I)I
+    .registers 2
+
+    .prologue
+    const/16 v0, 16
+
+    sub-int v0, p0, v0
+
+    rsub-int/lit8 v0, v0, 15
+
+    return v0
+.end method
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index c32d34a..5b14735 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -14,8 +14,12 @@
  * limitations under the License.
  */
 
+import java.lang.reflect.Method;
+
 public class Main {
 
+  static boolean doThrow = false;
+
   public static void assertBooleanEquals(boolean expected, boolean result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
@@ -46,45 +50,76 @@
     }
   }
 
+  public static void assertStringEquals(String expected, String result) {
+    if (expected == null ? result != null : !expected.equals(result)) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   /**
    * Tiny programs exercising optimizations of arithmetic identities.
    */
 
-  /// CHECK-START: long Main.Add0(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Add0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Const0:j\d+>>  LongConstant 0
   /// CHECK-DAG:     <<Add:j\d+>>     Add [<<Const0>>,<<Arg>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: long Main.Add0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Add0(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Add0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Add0(long) instruction_simplifier (after)
   /// CHECK-NOT:                        Add
 
-  public static long Add0(long arg) {
+  public static long $noinline$Add0(long arg) {
+    if (doThrow) { throw new Error(); }
     return 0 + arg;
   }
 
-  /// CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$AddAddSubAddConst(int) instruction_simplifier (before)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
+  /// CHECK-DAG:     <<Const2:i\d+>>    IntConstant 2
+  /// CHECK-DAG:     <<ConstM3:i\d+>>   IntConstant -3
+  /// CHECK-DAG:     <<Const4:i\d+>>    IntConstant 4
+  /// CHECK-DAG:     <<Add1:i\d+>>      Add [<<ArgValue>>,<<Const1>>]
+  /// CHECK-DAG:     <<Add2:i\d+>>      Add [<<Add1>>,<<Const2>>]
+  /// CHECK-DAG:     <<Add3:i\d+>>      Add [<<Add2>>,<<ConstM3>>]
+  /// CHECK-DAG:     <<Add4:i\d+>>      Add [<<Add3>>,<<Const4>>]
+  /// CHECK-DAG:                        Return [<<Add4>>]
+
+  /// CHECK-START: int Main.$noinline$AddAddSubAddConst(int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const4:i\d+>>    IntConstant 4
+  /// CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<Const4>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  public static int $noinline$AddAddSubAddConst(int arg) {
+    if (doThrow) { throw new Error(); }
+    return arg + 1 + 2 - 3 + 4;
+  }
+
+  /// CHECK-START: int Main.$noinline$AndAllOnes(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<ConstF:i\d+>>  IntConstant -1
   /// CHECK-DAG:     <<And:i\d+>>     And [<<Arg>>,<<ConstF>>]
   /// CHECK-DAG:                      Return [<<And>>]
 
-  /// CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$AndAllOnes(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$AndAllOnes(int) instruction_simplifier (after)
   /// CHECK-NOT:                      And
 
-  public static int AndAllOnes(int arg) {
+  public static int $noinline$AndAllOnes(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg & -1;
   }
 
-  /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$UShr28And15(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
   /// CHECK-DAG:     <<Const15:i\d+>>  IntConstant 15
@@ -92,20 +127,21 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<UShr>>,<<Const15>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$UShr28And15(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
   /// CHECK-DAG:     <<UShr:i\d+>>     UShr [<<Arg>>,<<Const28>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$UShr28And15(int) instruction_simplifier (after)
   /// CHECK-NOT:                       And
 
-  public static int UShr28And15(int arg) {
+  public static int $noinline$UShr28And15(int arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >>> 28) & 15;
   }
 
-  /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$UShr60And15(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
   /// CHECK-DAG:     <<Const15:j\d+>>  LongConstant 15
@@ -113,20 +149,21 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<UShr>>,<<Const15>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr60And15(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
   /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const60>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr60And15(long) instruction_simplifier (after)
   /// CHECK-NOT:                       And
 
-  public static long UShr60And15(long arg) {
+  public static long $noinline$UShr60And15(long arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >>> 60) & 15;
   }
 
-  /// CHECK-START: int Main.UShr28And7(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$UShr28And7(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
   /// CHECK-DAG:     <<Const7:i\d+>>   IntConstant 7
@@ -134,7 +171,7 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<UShr>>,<<Const7>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: int Main.UShr28And7(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$UShr28And7(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
   /// CHECK-DAG:     <<Const7:i\d+>>   IntConstant 7
@@ -142,11 +179,12 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<UShr>>,<<Const7>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  public static int UShr28And7(int arg) {
+  public static int $noinline$UShr28And7(int arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >>> 28) & 7;
   }
 
-  /// CHECK-START: long Main.UShr60And7(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$UShr60And7(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
   /// CHECK-DAG:     <<Const7:j\d+>>   LongConstant 7
@@ -154,7 +192,7 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<UShr>>,<<Const7>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.UShr60And7(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr60And7(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
   /// CHECK-DAG:     <<Const7:j\d+>>   LongConstant 7
@@ -162,11 +200,12 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<UShr>>,<<Const7>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  public static long UShr60And7(long arg) {
+  public static long $noinline$UShr60And7(long arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >>> 60) & 7;
   }
 
-  /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Shr24And255(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
   /// CHECK-DAG:     <<Const255:i\d+>> IntConstant 255
@@ -174,21 +213,22 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<Shr>>,<<Const255>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shr24And255(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
   /// CHECK-DAG:     <<UShr:i\d+>>     UShr [<<Arg>>,<<Const24>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shr24And255(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Shr
   /// CHECK-NOT:                       And
 
-  public static int Shr24And255(int arg) {
+  public static int $noinline$Shr24And255(int arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >> 24) & 255;
   }
 
-  /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Shr56And255(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
   /// CHECK-DAG:     <<Const255:j\d+>> LongConstant 255
@@ -196,21 +236,22 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<Shr>>,<<Const255>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr56And255(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
   /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const56>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr56And255(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Shr
   /// CHECK-NOT:                       And
 
-  public static long Shr56And255(long arg) {
+  public static long $noinline$Shr56And255(long arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >> 56) & 255;
   }
 
-  /// CHECK-START: int Main.Shr24And127(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Shr24And127(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
   /// CHECK-DAG:     <<Const127:i\d+>> IntConstant 127
@@ -218,7 +259,7 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<Shr>>,<<Const127>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: int Main.Shr24And127(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shr24And127(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
   /// CHECK-DAG:     <<Const127:i\d+>> IntConstant 127
@@ -226,11 +267,12 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<Shr>>,<<Const127>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  public static int Shr24And127(int arg) {
+  public static int $noinline$Shr24And127(int arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >> 24) & 127;
   }
 
-  /// CHECK-START: long Main.Shr56And127(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Shr56And127(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
   /// CHECK-DAG:     <<Const127:j\d+>> LongConstant 127
@@ -238,7 +280,7 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<Shr>>,<<Const127>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.Shr56And127(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr56And127(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
   /// CHECK-DAG:     <<Const127:j\d+>> LongConstant 127
@@ -246,268 +288,361 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<Shr>>,<<Const127>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  public static long Shr56And127(long arg) {
+  public static long $noinline$Shr56And127(long arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >> 56) & 127;
   }
 
-  /// CHECK-START: long Main.Div1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Div1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
   /// CHECK-DAG:     <<Div:j\d+>>     Div [<<Arg>>,<<Const1>>]
   /// CHECK-DAG:                      Return [<<Div>>]
 
-  /// CHECK-START: long Main.Div1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Div1(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Div1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Div1(long) instruction_simplifier (after)
   /// CHECK-NOT:                      Div
 
-  public static long Div1(long arg) {
+  public static long $noinline$Div1(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg / 1;
   }
 
-  /// CHECK-START: int Main.DivN1(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$DivN1(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<ConstN1:i\d+>>  IntConstant -1
   /// CHECK-DAG:     <<Div:i\d+>>      Div [<<Arg>>,<<ConstN1>>]
   /// CHECK-DAG:                       Return [<<Div>>]
 
-  /// CHECK-START: int Main.DivN1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$DivN1(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.DivN1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$DivN1(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Div
 
-  public static int DivN1(int arg) {
+  public static int $noinline$DivN1(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg / -1;
   }
 
-  /// CHECK-START: long Main.Mul1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Mul1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
-  /// CHECK-DAG:     <<Mul:j\d+>>     Mul [<<Arg>>,<<Const1>>]
+  /// CHECK-DAG:     <<Mul:j\d+>>     Mul [<<Const1>>,<<Arg>>]
   /// CHECK-DAG:                      Return [<<Mul>>]
 
-  /// CHECK-START: long Main.Mul1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Mul1(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Mul1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Mul1(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Mul
 
-  public static long Mul1(long arg) {
+  public static long $noinline$Mul1(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg * 1;
   }
 
-  /// CHECK-START: int Main.MulN1(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$MulN1(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<ConstN1:i\d+>>  IntConstant -1
   /// CHECK-DAG:     <<Mul:i\d+>>      Mul [<<Arg>>,<<ConstN1>>]
   /// CHECK-DAG:                       Return [<<Mul>>]
 
-  /// CHECK-START: int Main.MulN1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$MulN1(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.MulN1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$MulN1(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Mul
 
-  public static int MulN1(int arg) {
+  public static int $noinline$MulN1(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg * -1;
   }
 
-  /// CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$MulPowerOfTwo128(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:     <<Const128:j\d+>>  LongConstant 128
-  /// CHECK-DAG:     <<Mul:j\d+>>       Mul [<<Arg>>,<<Const128>>]
+  /// CHECK-DAG:     <<Mul:j\d+>>       Mul [<<Const128>>,<<Arg>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$MulPowerOfTwo128(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:     <<Const7:i\d+>>    IntConstant 7
   /// CHECK-DAG:     <<Shl:j\d+>>       Shl [<<Arg>>,<<Const7>>]
   /// CHECK-DAG:                        Return [<<Shl>>]
 
-  /// CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$MulPowerOfTwo128(long) instruction_simplifier (after)
   /// CHECK-NOT:                        Mul
 
-  public static long MulPowerOfTwo128(long arg) {
+  public static long $noinline$MulPowerOfTwo128(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg * 128;
   }
 
-  /// CHECK-START: int Main.Or0(int) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$MulMulMulConst(long) instruction_simplifier (before)
+  /// CHECK-DAG:     <<ArgValue:j\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const10:j\d+>>   LongConstant 10
+  /// CHECK-DAG:     <<Const11:j\d+>>   LongConstant 11
+  /// CHECK-DAG:     <<Const12:j\d+>>   LongConstant 12
+  /// CHECK-DAG:     <<Mul1:j\d+>>      Mul [<<Const10>>,<<ArgValue>>]
+  /// CHECK-DAG:     <<Mul2:j\d+>>      Mul [<<Mul1>>,<<Const11>>]
+  /// CHECK-DAG:     <<Mul3:j\d+>>      Mul [<<Mul2>>,<<Const12>>]
+  /// CHECK-DAG:                        Return [<<Mul3>>]
+
+  /// CHECK-START: long Main.$noinline$MulMulMulConst(long) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgValue:j\d+>>   ParameterValue
+  /// CHECK-DAG:     <<Const1320:j\d+>>  LongConstant 1320
+  /// CHECK-DAG:     <<Mul:j\d+>>        Mul [<<ArgValue>>,<<Const1320>>]
+  /// CHECK-DAG:                         Return [<<Mul>>]
+
+  public static long $noinline$MulMulMulConst(long arg) {
+    if (doThrow) { throw new Error(); }
+    return 10 * arg * 11 * 12;
+  }
+
+  /// CHECK-START: int Main.$noinline$Or0(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.Or0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Or0(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: int Main.Or0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Or0(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Or
 
-  public static int Or0(int arg) {
+  public static int $noinline$Or0(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg | 0;
   }
 
-  /// CHECK-START: long Main.OrSame(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$OrSame(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:     <<Or:j\d+>>        Or [<<Arg>>,<<Arg>>]
   /// CHECK-DAG:                        Return [<<Or>>]
 
-  /// CHECK-START: long Main.OrSame(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$OrSame(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  /// CHECK-START: long Main.OrSame(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$OrSame(long) instruction_simplifier (after)
   /// CHECK-NOT:                        Or
 
-  public static long OrSame(long arg) {
+  public static long $noinline$OrSame(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg | arg;
   }
 
-  /// CHECK-START: int Main.Shl0(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Shl0(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Shl:i\d+>>      Shl [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Shl>>]
 
-  /// CHECK-START: int Main.Shl0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shl0(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: int Main.Shl0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shl0(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Shl
 
-  public static int Shl0(int arg) {
+  public static int $noinline$Shl0(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg << 0;
   }
 
-  /// CHECK-START: int Main.Shl1(int) instruction_simplifier (before)
-  /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
-  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
-  /// CHECK-DAG:     <<Shl:i\d+>>      Shl [<<Arg>>,<<Const1>>]
-  /// CHECK-DAG:                       Return [<<Shl>>]
-
-  /// CHECK-START: int Main.Shl1(int) instruction_simplifier (after)
-  /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
-  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg>>,<<Arg>>]
-  /// CHECK-DAG:                       Return [<<Add>>]
-
-  /// CHECK-START: int Main.Shl1(int) instruction_simplifier (after)
-  /// CHECK-NOT:                       Shl
-
-  public static int Shl1(int arg) {
-    return arg << 1;
-  }
-
-  /// CHECK-START: long Main.Shr0(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Shr0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Shr:j\d+>>      Shr [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Shr>>]
 
-  /// CHECK-START: long Main.Shr0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr0(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Shr0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr0(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Shr
 
-  public static long Shr0(long arg) {
+  public static long $noinline$Shr0(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg >> 0;
   }
 
-  /// CHECK-START: long Main.Sub0(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Shr64(long) instruction_simplifier (before)
+  /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const64:i\d+>>  IntConstant 64
+  /// CHECK-DAG:     <<Shr:j\d+>>      Shr [<<Arg>>,<<Const64>>]
+  /// CHECK-DAG:                       Return [<<Shr>>]
+
+  /// CHECK-START: long Main.$noinline$Shr64(long) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:                       Return [<<Arg>>]
+
+  /// CHECK-START: long Main.$noinline$Shr64(long) instruction_simplifier (after)
+  /// CHECK-NOT:                       Shr
+
+  public static long $noinline$Shr64(long arg) {
+    if (doThrow) { throw new Error(); }
+    return arg >> 64;
+  }
+
+  /// CHECK-START: long Main.$noinline$Sub0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
   /// CHECK-DAG:     <<Sub:j\d+>>      Sub [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: long Main.Sub0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Sub0(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Sub0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Sub0(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Sub
 
-  public static long Sub0(long arg) {
+  public static long $noinline$Sub0(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg - 0;
   }
 
-  /// CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$SubAliasNeg(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Const0>>,<<Arg>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubAliasNeg(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubAliasNeg(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Sub
 
-  public static int SubAliasNeg(int arg) {
+  public static int $noinline$SubAliasNeg(int arg) {
+    if (doThrow) { throw new Error(); }
     return 0 - arg;
   }
 
-  /// CHECK-START: long Main.UShr0(long) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$SubAddConst1(int) instruction_simplifier (before)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const5:i\d+>>    IntConstant 5
+  /// CHECK-DAG:     <<Const6:i\d+>>    IntConstant 6
+  /// CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Const5>>,<<ArgValue>>]
+  /// CHECK-DAG:     <<Add:i\d+>>       Add [<<Sub>>,<<Const6>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  /// CHECK-START: int Main.$noinline$SubAddConst1(int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const11:i\d+>>   IntConstant 11
+  /// CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Const11>>,<<ArgValue>>]
+  /// CHECK-DAG:                        Return [<<Sub>>]
+
+  public static int $noinline$SubAddConst1(int arg) {
+    if (doThrow) { throw new Error(); }
+    return 5 - arg + 6;
+  }
+
+  /// CHECK-START: int Main.$noinline$SubAddConst2(int) instruction_simplifier (before)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const14:i\d+>>   IntConstant 14
+  /// CHECK-DAG:     <<Const13:i\d+>>   IntConstant 13
+  /// CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<Const13>>]
+  /// CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Const14>>,<<Add>>]
+  /// CHECK-DAG:                        Return [<<Sub>>]
+
+  /// CHECK-START: int Main.$noinline$SubAddConst2(int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
+  /// CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Const1>>,<<ArgValue>>]
+  /// CHECK-DAG:                        Return [<<Sub>>]
+
+  public static int $noinline$SubAddConst2(int arg) {
+    if (doThrow) { throw new Error(); }
+    return 14 - (arg + 13);
+  }
+
+  /// CHECK-START: long Main.$noinline$SubSubConst(long) instruction_simplifier (before)
+  /// CHECK-DAG:     <<ArgValue:j\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const17:j\d+>>   LongConstant 17
+  /// CHECK-DAG:     <<Const18:j\d+>>   LongConstant 18
+  /// CHECK-DAG:     <<Sub1:j\d+>>      Sub [<<Const18>>,<<ArgValue>>]
+  /// CHECK-DAG:     <<Sub2:j\d+>>      Sub [<<Const17>>,<<Sub1>>]
+  /// CHECK-DAG:                        Return [<<Sub2>>]
+
+  /// CHECK-START: long Main.$noinline$SubSubConst(long) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgValue:j\d+>>  ParameterValue
+  /// CHECK-DAG:     <<ConstM1:j\d+>>   LongConstant -1
+  /// CHECK-DAG:     <<Add:j\d+>>       Add [<<ArgValue>>,<<ConstM1>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  public static long $noinline$SubSubConst(long arg) {
+    if (doThrow) { throw new Error(); }
+    return 17 - (18 - arg);
+  }
+
+  /// CHECK-START: long Main.$noinline$UShr0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: long Main.UShr0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr0(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.UShr0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr0(long) instruction_simplifier (after)
   /// CHECK-NOT:                       UShr
 
-  public static long UShr0(long arg) {
+  public static long $noinline$UShr0(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg >>> 0;
   }
 
-  /// CHECK-START: int Main.Xor0(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Xor0(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Xor:i\d+>>      Xor [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Xor>>]
 
-  /// CHECK-START: int Main.Xor0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Xor0(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: int Main.Xor0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Xor0(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Xor
 
-  public static int Xor0(int arg) {
+  public static int $noinline$Xor0(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg ^ 0;
   }
 
-  /// CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$XorAllOnes(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<ConstF:i\d+>>   IntConstant -1
   /// CHECK-DAG:     <<Xor:i\d+>>      Xor [<<Arg>>,<<ConstF>>]
   /// CHECK-DAG:                       Return [<<Xor>>]
 
-  /// CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$XorAllOnes(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Not:i\d+>>      Not [<<Arg>>]
   /// CHECK-DAG:                       Return [<<Not>>]
 
-  /// CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$XorAllOnes(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Xor
 
-  public static int XorAllOnes(int arg) {
+  public static int $noinline$XorAllOnes(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg ^ -1;
   }
 
@@ -518,7 +653,7 @@
    * `InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop`.
    */
 
-  /// CHECK-START: int Main.AddNegs1(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$AddNegs1(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
@@ -526,7 +661,7 @@
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Neg1>>,<<Neg2>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: int Main.AddNegs1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$AddNegs1(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-NOT:                       Neg
@@ -534,7 +669,8 @@
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Add>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  public static int AddNegs1(int arg1, int arg2) {
+  public static int $noinline$AddNegs1(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     return -arg1 + -arg2;
   }
 
@@ -549,7 +685,7 @@
    * increasing the register pressure by creating or extending live ranges.
    */
 
-  /// CHECK-START: int Main.AddNegs2(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$AddNegs2(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
@@ -559,7 +695,7 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.AddNegs2(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$AddNegs2(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
@@ -570,7 +706,7 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.AddNegs2(int, int) GVN (after)
+  /// CHECK-START: int Main.$noinline$AddNegs2(int, int) GVN (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
@@ -579,7 +715,8 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Add>>,<<Add>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  public static int AddNegs2(int arg1, int arg2) {
+  public static int $noinline$AddNegs2(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     int temp1 = -arg1;
     int temp2 = -arg2;
     return (temp1 + temp2) | (temp1 + temp2);
@@ -593,7 +730,7 @@
    * the loop.
    */
 
-  /// CHECK-START: long Main.AddNegs3(long, long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$AddNegs3(long, long) instruction_simplifier (before)
   //  -------------- Arguments and initial negation operations.
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
@@ -605,7 +742,7 @@
   /// CHECK:         <<Add:j\d+>>      Add [<<Neg1>>,<<Neg2>>]
   /// CHECK:                           Goto
 
-  /// CHECK-START: long Main.AddNegs3(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNegs3(long, long) instruction_simplifier (after)
   //  -------------- Arguments and initial negation operations.
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
@@ -618,7 +755,8 @@
   /// CHECK-NOT:                       Neg
   /// CHECK:                           Goto
 
-  public static long AddNegs3(long arg1, long arg2) {
+  public static long $noinline$AddNegs3(long arg1, long arg2) {
+    if (doThrow) { throw new Error(); }
     long res = 0;
     long n_arg1 = -arg1;
     long n_arg2 = -arg2;
@@ -634,24 +772,25 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitAdd`.
    */
 
-  /// CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$AddNeg1(long, long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg1>>]
   /// CHECK-DAG:     <<Add:j\d+>>      Add [<<Neg>>,<<Arg2>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNeg1(long, long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:j\d+>>      Sub [<<Arg2>>,<<Arg1>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNeg1(long, long) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Add
 
-  public static long AddNeg1(long arg1, long arg2) {
+  public static long $noinline$AddNeg1(long arg1, long arg2) {
+    if (doThrow) { throw new Error(); }
     return -arg1 + arg2;
   }
 
@@ -664,7 +803,7 @@
    * increasing the register pressure by creating or extending live ranges.
    */
 
-  /// CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$AddNeg2(long, long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg2>>]
@@ -673,7 +812,7 @@
   /// CHECK-DAG:     <<Res:j\d+>>      Or [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                       Return [<<Res>>]
 
-  /// CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNeg2(long, long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg2>>]
@@ -682,10 +821,11 @@
   /// CHECK-DAG:     <<Res:j\d+>>      Or [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                       Return [<<Res>>]
 
-  /// CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNeg2(long, long) instruction_simplifier (after)
   /// CHECK-NOT:                       Sub
 
-  public static long AddNeg2(long arg1, long arg2) {
+  public static long $noinline$AddNeg2(long arg1, long arg2) {
+    if (doThrow) { throw new Error(); }
     long temp = -arg2;
     return (arg1 + temp) | (arg1 + temp);
   }
@@ -695,20 +835,21 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`.
    */
 
-  /// CHECK-START: long Main.NegNeg1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$NegNeg1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg1:j\d+>>     Neg [<<Arg>>]
   /// CHECK-DAG:     <<Neg2:j\d+>>     Neg [<<Neg1>>]
   /// CHECK-DAG:                       Return [<<Neg2>>]
 
-  /// CHECK-START: long Main.NegNeg1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NegNeg1(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.NegNeg1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NegNeg1(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
 
-  public static long NegNeg1(long arg) {
+  public static long $noinline$NegNeg1(long arg) {
+    if (doThrow) { throw new Error(); }
     return -(-arg);
   }
 
@@ -719,29 +860,30 @@
    * and in `InstructionSimplifierVisitor::VisitAdd`.
    */
 
-  /// CHECK-START: int Main.NegNeg2(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg>>]
   /// CHECK-DAG:     <<Neg2:i\d+>>     Neg [<<Neg1>>]
-  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Neg1>>,<<Neg2>>]
+  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Neg2>>,<<Neg1>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg>>,<<Arg>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Add
 
-  /// CHECK-START: int Main.NegNeg2(int) constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) constant_folding$after_inlining (after)
   /// CHECK:         <<Const0:i\d+>>   IntConstant 0
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Add
   /// CHECK:                           Return [<<Const0>>]
 
-  public static int NegNeg2(int arg) {
+  public static int $noinline$NegNeg2(int arg) {
+    if (doThrow) { throw new Error(); }
     int temp = -arg;
     return temp + -temp;
   }
@@ -753,22 +895,23 @@
    * and in `InstructionSimplifierVisitor::VisitSub`.
    */
 
-  /// CHECK-START: long Main.NegNeg3(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$NegNeg3(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
   /// CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg>>]
   /// CHECK-DAG:     <<Sub:j\d+>>      Sub [<<Const0>>,<<Neg>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: long Main.NegNeg3(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NegNeg3(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.NegNeg3(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NegNeg3(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Sub
 
-  public static long NegNeg3(long arg) {
+  public static long $noinline$NegNeg3(long arg) {
+    if (doThrow) { throw new Error(); }
     return 0 - -arg;
   }
 
@@ -778,23 +921,24 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`.
    */
 
-  /// CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$NegSub1(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg1>>,<<Arg2>>]
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Sub>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegSub1(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg2>>,<<Arg1>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegSub1(int, int) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
 
-  public static int NegSub1(int arg1, int arg2) {
+  public static int $noinline$NegSub1(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     return -(arg1 - arg2);
   }
 
@@ -808,7 +952,7 @@
    * increasing the register pressure by creating or extending live ranges.
    */
 
-  /// CHECK-START: int Main.NegSub2(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$NegSub2(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg1>>,<<Arg2>>]
@@ -817,7 +961,7 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Neg1>>,<<Neg2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.NegSub2(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegSub2(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg1>>,<<Arg2>>]
@@ -826,7 +970,8 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Neg1>>,<<Neg2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  public static int NegSub2(int arg1, int arg2) {
+  public static int $noinline$NegSub2(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     int temp = arg1 - arg2;
     return -temp | -temp;
   }
@@ -836,42 +981,43 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNot`.
    */
 
-  /// CHECK-START: long Main.NotNot1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$NotNot1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
-  /// CHECK-DAG:     <<ConstF1:j\d+>>  LongConstant -1
-  /// CHECK-DAG:     <<Xor1:j\d+>>     Xor [<<Arg>>,<<ConstF1>>]
-  /// CHECK-DAG:     <<Xor2:j\d+>>     Xor [<<Xor1>>,<<ConstF1>>]
-  /// CHECK-DAG:                       Return [<<Xor2>>]
+  /// CHECK-DAG:     <<Not1:j\d+>>     Not [<<Arg>>]
+  /// CHECK-DAG:     <<Not2:j\d+>>     Not [<<Not1>>]
+  /// CHECK-DAG:                       Return [<<Not2>>]
 
-  /// CHECK-START: long Main.NotNot1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NotNot1(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.NotNot1(long) instruction_simplifier (after)
-  /// CHECK-NOT:                       Xor
+  /// CHECK-START: long Main.$noinline$NotNot1(long) instruction_simplifier (after)
+  /// CHECK-NOT:                       Not
 
-  public static long NotNot1(long arg) {
+  public static long $noinline$NotNot1(long arg) {
+    if (doThrow) { throw new Error(); }
     return ~~arg;
   }
 
-  /// CHECK-START: int Main.NotNot2(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$NotNot2(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
-  /// CHECK-DAG:     <<ConstF1:i\d+>>  IntConstant -1
-  /// CHECK-DAG:     <<Xor1:i\d+>>     Xor [<<Arg>>,<<ConstF1>>]
-  /// CHECK-DAG:     <<Xor2:i\d+>>     Xor [<<Xor1>>,<<ConstF1>>]
-  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Xor1>>,<<Xor2>>]
+  /// CHECK-DAG:     <<Not1:i\d+>>     Not [<<Arg>>]
+  /// CHECK-DAG:     <<Not2:i\d+>>     Not [<<Not1>>]
+  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Not2>>,<<Not1>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NotNot2(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Not:i\d+>>      Not [<<Arg>>]
-  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Not>>,<<Arg>>]
+  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg>>,<<Not>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
-  /// CHECK-NOT:                       Xor
+  /// CHECK-START: int Main.$noinline$NotNot2(int) instruction_simplifier (after)
+  /// CHECK:                           Not
+  /// CHECK-NOT:                       Not
 
-  public static int NotNot2(int arg) {
+  public static int $noinline$NotNot2(int arg) {
+    if (doThrow) { throw new Error(); }
     int temp = ~arg;
     return temp + ~temp;
   }
@@ -881,24 +1027,25 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitSub`.
    */
 
-  /// CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$SubNeg1(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg1>>]
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Neg>>,<<Arg2>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubNeg1(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg1>>,<<Arg2>>]
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Add>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubNeg1(int, int) instruction_simplifier (after)
   /// CHECK-NOT:                       Sub
 
-  public static int SubNeg1(int arg1, int arg2) {
+  public static int $noinline$SubNeg1(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     return -arg1 - arg2;
   }
 
@@ -912,7 +1059,7 @@
    * increasing the register pressure by creating or extending live ranges.
    */
 
-  /// CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$SubNeg2(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg1>>]
@@ -921,7 +1068,7 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Sub1>>,<<Sub2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubNeg2(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg1>>]
@@ -930,10 +1077,11 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Sub1>>,<<Sub2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubNeg2(int, int) instruction_simplifier (after)
   /// CHECK-NOT:                       Add
 
-  public static int SubNeg2(int arg1, int arg2) {
+  public static int $noinline$SubNeg2(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     int temp = -arg1;
     return (temp - arg2) | (temp - arg2);
   }
@@ -945,7 +1093,7 @@
    * the loop.
    */
 
-  /// CHECK-START: long Main.SubNeg3(long, long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$SubNeg3(long, long) instruction_simplifier (before)
   //  -------------- Arguments and initial negation operation.
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
@@ -956,7 +1104,7 @@
   /// CHECK:         <<Sub:j\d+>>      Sub [<<Neg>>,<<Arg2>>]
   /// CHECK:                           Goto
 
-  /// CHECK-START: long Main.SubNeg3(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$SubNeg3(long, long) instruction_simplifier (after)
   //  -------------- Arguments and initial negation operation.
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
@@ -968,7 +1116,8 @@
   /// CHECK-NOT:                       Neg
   /// CHECK:                           Goto
 
-  public static long SubNeg3(long arg1, long arg2) {
+  public static long $noinline$SubNeg3(long arg1, long arg2) {
+    if (doThrow) { throw new Error(); }
     long res = 0;
     long temp = -arg1;
     for (long i = 0; i < 1; i++) {
@@ -977,150 +1126,50 @@
     return res;
   }
 
-  /// CHECK-START: int Main.EqualTrueRhs(boolean) instruction_simplifier (before)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
-  /// CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Arg>>,<<Const1>>]
-  /// CHECK-DAG:                       If [<<Cond>>]
-
-  /// CHECK-START: int Main.EqualTrueRhs(boolean) instruction_simplifier (after)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:                       If [<<Arg>>]
-
-  public static int EqualTrueRhs(boolean arg) {
-    return (arg != true) ? 3 : 5;
-  }
-
-  /// CHECK-START: int Main.EqualTrueLhs(boolean) instruction_simplifier (before)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
-  /// CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Const1>>,<<Arg>>]
-  /// CHECK-DAG:                       If [<<Cond>>]
-
-  /// CHECK-START: int Main.EqualTrueLhs(boolean) instruction_simplifier (after)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:                       If [<<Arg>>]
-
-  public static int EqualTrueLhs(boolean arg) {
-    return (true != arg) ? 3 : 5;
-  }
-
-  /// CHECK-START: int Main.EqualFalseRhs(boolean) instruction_simplifier (before)
+  /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
-  /// CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Arg>>,<<Const0>>]
-  /// CHECK-DAG:                       If [<<Cond>>]
-
-  /// CHECK-START: int Main.EqualFalseRhs(boolean) instruction_simplifier (after)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:     <<NotArg:z\d+>>   BooleanNot [<<Arg>>]
-  /// CHECK-DAG:                       If [<<NotArg>>]
-
-  public static int EqualFalseRhs(boolean arg) {
-    return (arg != false) ? 3 : 5;
-  }
-
-  /// CHECK-START: int Main.EqualFalseLhs(boolean) instruction_simplifier (before)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
-  /// CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Const0>>,<<Arg>>]
-  /// CHECK-DAG:                       If [<<Cond>>]
-
-  /// CHECK-START: int Main.EqualFalseLhs(boolean) instruction_simplifier (after)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:     <<NotArg:z\d+>>   BooleanNot [<<Arg>>]
-  /// CHECK-DAG:                       If [<<NotArg>>]
-
-  public static int EqualFalseLhs(boolean arg) {
-    return (false != arg) ? 3 : 5;
-  }
-
-  /// CHECK-START: int Main.NotEqualTrueRhs(boolean) instruction_simplifier (before)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
-  /// CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Arg>>,<<Const1>>]
-  /// CHECK-DAG:                       If [<<Cond>>]
-
-  /// CHECK-START: int Main.NotEqualTrueRhs(boolean) instruction_simplifier (after)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:     <<NotArg:z\d+>>   BooleanNot [<<Arg>>]
-  /// CHECK-DAG:                       If [<<NotArg>>]
-
-  public static int NotEqualTrueRhs(boolean arg) {
-    return (arg == true) ? 3 : 5;
-  }
-
-  /// CHECK-START: int Main.NotEqualTrueLhs(boolean) instruction_simplifier (before)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
-  /// CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Const1>>,<<Arg>>]
-  /// CHECK-DAG:                       If [<<Cond>>]
-
-  /// CHECK-START: int Main.NotEqualTrueLhs(boolean) instruction_simplifier (after)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:     <<NotArg:z\d+>>   BooleanNot [<<Arg>>]
-  /// CHECK-DAG:                       If [<<NotArg>>]
-
-  public static int NotEqualTrueLhs(boolean arg) {
-    return (true == arg) ? 3 : 5;
-  }
-
-  /// CHECK-START: int Main.NotEqualFalseRhs(boolean) instruction_simplifier (before)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
-  /// CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Arg>>,<<Const0>>]
-  /// CHECK-DAG:                       If [<<Cond>>]
-
-  /// CHECK-START: int Main.NotEqualFalseRhs(boolean) instruction_simplifier (after)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:                       If [<<Arg>>]
-
-  public static int NotEqualFalseRhs(boolean arg) {
-    return (arg == false) ? 3 : 5;
-  }
-
-  /// CHECK-START: int Main.NotEqualFalseLhs(boolean) instruction_simplifier (before)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
-  /// CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Const0>>,<<Arg>>]
-  /// CHECK-DAG:                       If [<<Cond>>]
-
-  /// CHECK-START: int Main.NotEqualFalseLhs(boolean) instruction_simplifier (after)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:                       If [<<Arg>>]
-
-  public static int NotEqualFalseLhs(boolean arg) {
-    return (false == arg) ? 3 : 5;
-  }
-
-  /// CHECK-START: boolean Main.EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
-  /// CHECK-DAG:     <<BoolNot:z\d+>>  BooleanNot [<<Arg>>]
-  /// CHECK-DAG:     <<Cond:z\d+>>     Equal [<<BoolNot>>,<<Const2>>]
-  /// CHECK-DAG:                       Return [<<Cond>>]
+  /// CHECK-DAG:     <<NotArg:i\d+>>   Select [<<Const1>>,<<Const0>>,<<Arg>>]
+  /// CHECK-DAG:     <<Cond:z\d+>>     Equal [<<NotArg>>,<<Const2>>]
+  /// CHECK-DAG:     <<NotCond:i\d+>>  Select [<<Const1>>,<<Const0>>,<<Cond>>]
+  /// CHECK-DAG:                       Return [<<NotCond>>]
 
-  /// CHECK-START: boolean Main.EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
-  /// CHECK-DAG:     <<False:i\d+>>    IntConstant 0
-  /// CHECK-DAG:                       Return [<<False>>]
-
-  public static boolean EqualBoolVsIntConst(boolean arg) {
-    return (arg ? 0 : 1) == 2;
-  }
-
-  /// CHECK-START: boolean Main.NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
-  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
-  /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
-  /// CHECK-DAG:     <<BoolNot:z\d+>>  BooleanNot [<<Arg>>]
-  /// CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<BoolNot>>,<<Const2>>]
-  /// CHECK-DAG:                       Return [<<Cond>>]
-
-  /// CHECK-START: boolean Main.NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     <<True:i\d+>>     IntConstant 1
   /// CHECK-DAG:                       Return [<<True>>]
 
-  public static boolean NotEqualBoolVsIntConst(boolean arg) {
-    return (arg ? 0 : 1) != 2;
+  public static boolean $noinline$EqualBoolVsIntConst(boolean arg) {
+    if (doThrow) { throw new Error(); }
+    // Make calls that will be inlined to make sure the instruction simplifier
+    // sees the simplification (dead code elimination will also try to simplify it).
+    return (arg ? $inline$ReturnArg(0) : $inline$ReturnArg(1)) != 2;
+  }
+
+  public static int $inline$ReturnArg(int arg) {
+    return arg;
+  }
+
+  /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
+  /// CHECK-DAG:     <<NotArg:i\d+>>   Select [<<Const1>>,<<Const0>>,<<Arg>>]
+  /// CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<NotArg>>,<<Const2>>]
+  /// CHECK-DAG:     <<NotCond:i\d+>>  Select [<<Const1>>,<<Const0>>,<<Cond>>]
+  /// CHECK-DAG:                       Return [<<NotCond>>]
+
+  /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:     <<False:i\d+>>    IntConstant 0
+  /// CHECK-DAG:                       Return [<<False>>]
+
+  public static boolean $noinline$NotEqualBoolVsIntConst(boolean arg) {
+    if (doThrow) { throw new Error(); }
+    // Make calls that will be inlined to make sure the instruction simplifier
+    // sees the simplification (dead code elimination will also try to simplify it).
+    return (arg ? $inline$ReturnArg(0) : $inline$ReturnArg(1)) == 2;
   }
 
   /*
@@ -1129,159 +1178,1149 @@
    * remove the second.
    */
 
-  /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$noinline$NotNotBool(boolean) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
-  /// CHECK-DAG:     <<NotArg:z\d+>>    BooleanNot [<<Arg>>]
-  /// CHECK-DAG:     <<NotNotArg:z\d+>> BooleanNot [<<NotArg>>]
+  /// CHECK-DAG:     <<Const0:i\d+>>    IntConstant 0
+  /// CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
+  /// CHECK-DAG:     <<NotArg:i\d+>>    Select [<<Const1>>,<<Const0>>,<<Arg>>]
+  /// CHECK-DAG:     <<NotNotArg:i\d+>> Select [<<Const1>>,<<Const0>>,<<NotArg>>]
   /// CHECK-DAG:                        Return [<<NotNotArg>>]
 
-  /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$NotNotBool(boolean) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
-  /// CHECK-DAG:                        BooleanNot [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (after)
-  /// CHECK:                            BooleanNot
-  /// CHECK-NOT:                        BooleanNot
-
   public static boolean NegateValue(boolean arg) {
     return !arg;
   }
 
-  public static boolean NotNotBool(boolean arg) {
+  public static boolean $noinline$NotNotBool(boolean arg) {
+    if (doThrow) { throw new Error(); }
     return !(NegateValue(arg));
   }
 
-  /// CHECK-START: float Main.Div2(float) instruction_simplifier (before)
+  /// CHECK-START: float Main.$noinline$Div2(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const2:f\d+>>   FloatConstant 2
   /// CHECK-DAG:      <<Div:f\d+>>      Div [<<Arg>>,<<Const2>>]
   /// CHECK-DAG:                        Return [<<Div>>]
 
-  /// CHECK-START: float Main.Div2(float) instruction_simplifier (after)
+  /// CHECK-START: float Main.$noinline$Div2(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstP5:f\d+>>  FloatConstant 0.5
   /// CHECK-DAG:      <<Mul:f\d+>>      Mul [<<Arg>>,<<ConstP5>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: float Main.Div2(float) instruction_simplifier (after)
+  /// CHECK-START: float Main.$noinline$Div2(float) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
 
-  public static float Div2(float arg) {
+  public static float $noinline$Div2(float arg) {
+    if (doThrow) { throw new Error(); }
     return arg / 2.0f;
   }
 
-  /// CHECK-START: double Main.Div2(double) instruction_simplifier (before)
+  /// CHECK-START: double Main.$noinline$Div2(double) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const2:d\d+>>   DoubleConstant 2
   /// CHECK-DAG:      <<Div:d\d+>>      Div [<<Arg>>,<<Const2>>]
   /// CHECK-DAG:                        Return [<<Div>>]
 
-  /// CHECK-START: double Main.Div2(double) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$Div2(double) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstP5:d\d+>>  DoubleConstant 0.5
   /// CHECK-DAG:      <<Mul:d\d+>>      Mul [<<Arg>>,<<ConstP5>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: double Main.Div2(double) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$Div2(double) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
-  public static double Div2(double arg) {
+  public static double $noinline$Div2(double arg) {
+    if (doThrow) { throw new Error(); }
     return arg / 2.0;
   }
 
-  /// CHECK-START: float Main.DivMP25(float) instruction_simplifier (before)
+  /// CHECK-START: float Main.$noinline$DivMP25(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstMP25:f\d+>>   FloatConstant -0.25
   /// CHECK-DAG:      <<Div:f\d+>>      Div [<<Arg>>,<<ConstMP25>>]
   /// CHECK-DAG:                        Return [<<Div>>]
 
-  /// CHECK-START: float Main.DivMP25(float) instruction_simplifier (after)
+  /// CHECK-START: float Main.$noinline$DivMP25(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstM4:f\d+>>  FloatConstant -4
   /// CHECK-DAG:      <<Mul:f\d+>>      Mul [<<Arg>>,<<ConstM4>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: float Main.DivMP25(float) instruction_simplifier (after)
+  /// CHECK-START: float Main.$noinline$DivMP25(float) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
 
-  public static float DivMP25(float arg) {
+  public static float $noinline$DivMP25(float arg) {
+    if (doThrow) { throw new Error(); }
     return arg / -0.25f;
   }
 
-  /// CHECK-START: double Main.DivMP25(double) instruction_simplifier (before)
+  /// CHECK-START: double Main.$noinline$DivMP25(double) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstMP25:d\d+>>   DoubleConstant -0.25
   /// CHECK-DAG:      <<Div:d\d+>>      Div [<<Arg>>,<<ConstMP25>>]
   /// CHECK-DAG:                        Return [<<Div>>]
 
-  /// CHECK-START: double Main.DivMP25(double) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$DivMP25(double) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstM4:d\d+>>  DoubleConstant -4
   /// CHECK-DAG:      <<Mul:d\d+>>      Mul [<<Arg>>,<<ConstM4>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: double Main.DivMP25(double) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$DivMP25(double) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
-  public static double DivMP25(double arg) {
+  public static double $noinline$DivMP25(double arg) {
+    if (doThrow) { throw new Error(); }
     return arg / -0.25f;
   }
 
-  public static void main(String[] args) {
-    int arg = 123456;
+  /**
+   * Test strength reduction of factors of the form (2^n + 1).
+   */
 
-    assertLongEquals(Add0(arg), arg);
-    assertIntEquals(AndAllOnes(arg), arg);
-    assertLongEquals(Div1(arg), arg);
-    assertIntEquals(DivN1(arg), -arg);
-    assertLongEquals(Mul1(arg), arg);
-    assertIntEquals(MulN1(arg), -arg);
-    assertLongEquals(MulPowerOfTwo128(arg), (128 * arg));
-    assertIntEquals(Or0(arg), arg);
-    assertLongEquals(OrSame(arg), arg);
-    assertIntEquals(Shl0(arg), arg);
-    assertLongEquals(Shr0(arg), arg);
-    assertLongEquals(Sub0(arg), arg);
-    assertIntEquals(SubAliasNeg(arg), -arg);
-    assertLongEquals(UShr0(arg), arg);
-    assertIntEquals(Xor0(arg), arg);
-    assertIntEquals(XorAllOnes(arg), ~arg);
-    assertIntEquals(AddNegs1(arg, arg + 1), -(arg + arg + 1));
-    assertIntEquals(AddNegs2(arg, arg + 1), -(arg + arg + 1));
-    assertLongEquals(AddNegs3(arg, arg + 1), -(2 * arg + 1));
-    assertLongEquals(AddNeg1(arg, arg + 1), 1);
-    assertLongEquals(AddNeg2(arg, arg + 1), -1);
-    assertLongEquals(NegNeg1(arg), arg);
-    assertIntEquals(NegNeg2(arg), 0);
-    assertLongEquals(NegNeg3(arg), arg);
-    assertIntEquals(NegSub1(arg, arg + 1), 1);
-    assertIntEquals(NegSub2(arg, arg + 1), 1);
-    assertLongEquals(NotNot1(arg), arg);
-    assertIntEquals(NotNot2(arg), -1);
-    assertIntEquals(SubNeg1(arg, arg + 1), -(arg + arg + 1));
-    assertIntEquals(SubNeg2(arg, arg + 1), -(arg + arg + 1));
-    assertLongEquals(SubNeg3(arg, arg + 1), -(2 * arg + 1));
-    assertIntEquals(EqualTrueRhs(true), 5);
-    assertIntEquals(EqualTrueLhs(true), 5);
-    assertIntEquals(EqualFalseRhs(true), 3);
-    assertIntEquals(EqualFalseLhs(true), 3);
-    assertIntEquals(NotEqualTrueRhs(true), 3);
-    assertIntEquals(NotEqualTrueLhs(true), 3);
-    assertIntEquals(NotEqualFalseRhs(true), 5);
-    assertIntEquals(NotEqualFalseLhs(true), 5);
-    assertBooleanEquals(NotNotBool(true), true);
-    assertBooleanEquals(NotNotBool(false), false);
-    assertFloatEquals(Div2(100.0f), 50.0f);
-    assertDoubleEquals(Div2(150.0), 75.0);
-    assertFloatEquals(DivMP25(100.0f), -400.0f);
-    assertDoubleEquals(DivMP25(150.0), -600.0);
-    assertLongEquals(Shl1(100), 200);
-    assertIntEquals(UShr28And15(0xc1234567), 0xc);
-    assertLongEquals(UShr60And15(0xc123456787654321L), 0xcL);
-    assertIntEquals(UShr28And7(0xc1234567), 0x4);
-    assertLongEquals(UShr60And7(0xc123456787654321L), 0x4L);
-    assertIntEquals(Shr24And255(0xc1234567), 0xc1);
-    assertLongEquals(Shr56And255(0xc123456787654321L), 0xc1L);
-    assertIntEquals(Shr24And127(0xc1234567), 0x41);
-    assertLongEquals(Shr56And127(0xc123456787654321L), 0x41L);
+  /// CHECK-START: int Main.$noinline$mulPow2Plus1(int) instruction_simplifier (before)
+  /// CHECK-DAG:   <<Arg:i\d+>>         ParameterValue
+  /// CHECK-DAG:   <<Const9:i\d+>>      IntConstant 9
+  /// CHECK:                            Mul [<<Arg>>,<<Const9>>]
+
+  /// CHECK-START: int Main.$noinline$mulPow2Plus1(int) instruction_simplifier (after)
+  /// CHECK-DAG:   <<Arg:i\d+>>         ParameterValue
+  /// CHECK-DAG:   <<Const3:i\d+>>      IntConstant 3
+  /// CHECK:       <<Shift:i\d+>>       Shl [<<Arg>>,<<Const3>>]
+  /// CHECK-NEXT:                       Add [<<Arg>>,<<Shift>>]
+
+  public static int $noinline$mulPow2Plus1(int arg) {
+    if (doThrow) { throw new Error(); }
+    return arg * 9;
   }
+
+  /**
+   * Test strength reduction of factors of the form (2^n - 1).
+   */
+
+  /// CHECK-START: long Main.$noinline$mulPow2Minus1(long) instruction_simplifier (before)
+  /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
+  /// CHECK-DAG:   <<Const31:j\d+>>     LongConstant 31
+  /// CHECK:                            Mul [<<Const31>>,<<Arg>>]
+
+  /// CHECK-START: long Main.$noinline$mulPow2Minus1(long) instruction_simplifier (after)
+  /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
+  /// CHECK-DAG:   <<Const5:i\d+>>      IntConstant 5
+  /// CHECK:       <<Shift:j\d+>>       Shl [<<Arg>>,<<Const5>>]
+  /// CHECK-NEXT:                       Sub [<<Shift>>,<<Arg>>]
+
+  public static long $noinline$mulPow2Minus1(long arg) {
+    if (doThrow) { throw new Error(); }
+    return arg * 31;
+  }
+
+  /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<NE:z\d+>>       NotEqual [<<Field>>,<<Const1>>]
+  /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
+  /// CHECK-DAG:                        Return [<<Select>>]
+
+  /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const54>>,<<Const13>>,<<Field>>]
+  /// CHECK-DAG:                        Return [<<Select>>]
+
+  public static int $noinline$booleanFieldNotEqualOne() {
+    if (doThrow) { throw new Error(); }
+    return (booleanField == $inline$true()) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<NE:z\d+>>       Equal [<<Field>>,<<Const0>>]
+  /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
+  /// CHECK-DAG:                        Return [<<Select>>]
+
+  /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const54>>,<<Const13>>,<<Field>>]
+  /// CHECK-DAG:                        Return [<<Select>>]
+
+  public static int $noinline$booleanFieldEqualZero() {
+    if (doThrow) { throw new Error(); }
+    return (booleanField != $inline$false()) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.$noinline$intConditionNotEqualOne(int) instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<LE:z\d+>>       LessThanOrEqual [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:      <<GT:i\d+>>       Select [<<Const1>>,<<Const0>>,<<LE>>]
+  /// CHECK-DAG:      <<NE:z\d+>>       NotEqual [<<GT>>,<<Const1>>]
+  /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
+  /// CHECK-DAG:                        Return [<<Result>>]
+
+  /// CHECK-START: int Main.$noinline$intConditionNotEqualOne(int) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<LE:z\d+>>]
+  /// CHECK-DAG:      <<LE>>            LessThanOrEqual [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:                        Return [<<Result>>]
+  // Note that we match `LE` from Select because there are two identical
+  // LessThanOrEqual instructions.
+
+  public static int $noinline$intConditionNotEqualOne(int i) {
+    if (doThrow) { throw new Error(); }
+    return ((i > 42) == $inline$true()) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.$noinline$intConditionEqualZero(int) instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<LE:z\d+>>       LessThanOrEqual [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:      <<GT:i\d+>>       Select [<<Const1>>,<<Const0>>,<<LE>>]
+  /// CHECK-DAG:      <<NE:z\d+>>       Equal [<<GT>>,<<Const0>>]
+  /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
+  /// CHECK-DAG:                        Return [<<Result>>]
+
+  /// CHECK-START: int Main.$noinline$intConditionEqualZero(int) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<LE:z\d+>>]
+  /// CHECK-DAG:      <<LE>>            LessThanOrEqual [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:                        Return [<<Result>>]
+  // Note that we match `LE` from Select because there are two identical
+  // LessThanOrEqual instructions.
+
+  public static int $noinline$intConditionEqualZero(int i) {
+    if (doThrow) { throw new Error(); }
+    return ((i > 42) != $inline$false()) ? 13 : 54;
+  }
+
+  // Test that conditions on float/double are not flipped.
+
+  /// CHECK-START: int Main.$noinline$floatConditionNotEqualOne(float) builder (after)
+  /// CHECK:                            LessThanOrEqual
+
+  /// CHECK-START: int Main.$noinline$floatConditionNotEqualOne(float) instruction_simplifier$before_codegen (after)
+  /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<Const42:f\d+>>  FloatConstant 42
+  /// CHECK-DAG:      <<LE:z\d+>>       LessThanOrEqual [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<LE>>]
+  /// CHECK-DAG:                        Return [<<Select>>]
+
+  public static int $noinline$floatConditionNotEqualOne(float f) {
+    if (doThrow) { throw new Error(); }
+    return ((f > 42.0f) == true) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.$noinline$doubleConditionEqualZero(double) builder (after)
+  /// CHECK:                            LessThanOrEqual
+
+  /// CHECK-START: int Main.$noinline$doubleConditionEqualZero(double) instruction_simplifier$before_codegen (after)
+  /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<Const42:d\d+>>  DoubleConstant 42
+  /// CHECK-DAG:      <<LE:z\d+>>       LessThanOrEqual [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<LE>>]
+  /// CHECK-DAG:                        Return [<<Select>>]
+
+  public static int $noinline$doubleConditionEqualZero(double d) {
+    if (doThrow) { throw new Error(); }
+    return ((d > 42.0) != false) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.$noinline$intToDoubleToInt(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
+  /// CHECK-DAG:                        Return [<<Int>>]
+
+  /// CHECK-START: int Main.$noinline$intToDoubleToInt(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:                        Return [<<Arg>>]
+
+  /// CHECK-START: int Main.$noinline$intToDoubleToInt(int) instruction_simplifier (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static int $noinline$intToDoubleToInt(int value) {
+    if (doThrow) { throw new Error(); }
+    // Lossless conversion followed by a conversion back.
+    return (int) (double) value;
+  }
+
+  /// CHECK-START: java.lang.String Main.$noinline$intToDoubleToIntPrint(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      {{i\d+}}          TypeConversion [<<Double>>]
+
+  /// CHECK-START: java.lang.String Main.$noinline$intToDoubleToIntPrint(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      {{d\d+}}          TypeConversion [<<Arg>>]
+
+  /// CHECK-START: java.lang.String Main.$noinline$intToDoubleToIntPrint(int) instruction_simplifier (after)
+  /// CHECK-DAG:                        TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static String $noinline$intToDoubleToIntPrint(int value) {
+    if (doThrow) { throw new Error(); }
+    // Lossless conversion followed by a conversion back
+    // with another use of the intermediate result.
+    double d = (double) value;
+    int i = (int) d;
+    return "d=" + d + ", i=" + i;
+  }
+
+  /// CHECK-START: int Main.$noinline$byteToDoubleToInt(byte) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
+  /// CHECK-DAG:                        Return [<<Int>>]
+
+  /// CHECK-START: int Main.$noinline$byteToDoubleToInt(byte) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
+  /// CHECK-DAG:                        Return [<<Arg>>]
+
+  /// CHECK-START: int Main.$noinline$byteToDoubleToInt(byte) instruction_simplifier (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static int $noinline$byteToDoubleToInt(byte value) {
+    if (doThrow) { throw new Error(); }
+    // Lossless conversion followed by another conversion, use implicit conversion.
+    return (int) (double) value;
+  }
+
+  /// CHECK-START: int Main.$noinline$floatToDoubleToInt(float) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
+  /// CHECK-DAG:                        Return [<<Int>>]
+
+  /// CHECK-START: int Main.$noinline$floatToDoubleToInt(float) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
+  /// CHECK-DAG:                        Return [<<Int>>]
+
+  /// CHECK-START: int Main.$noinline$floatToDoubleToInt(float) instruction_simplifier (after)
+  /// CHECK-DAG:                        TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static int $noinline$floatToDoubleToInt(float value) {
+    if (doThrow) { throw new Error(); }
+    // Lossless conversion followed by another conversion.
+    return (int) (double) value;
+  }
+
+  /// CHECK-START: java.lang.String Main.$noinline$floatToDoubleToIntPrint(float) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      {{i\d+}}          TypeConversion [<<Double>>]
+
+  /// CHECK-START: java.lang.String Main.$noinline$floatToDoubleToIntPrint(float) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      {{i\d+}}          TypeConversion [<<Double>>]
+
+  public static String $noinline$floatToDoubleToIntPrint(float value) {
+    if (doThrow) { throw new Error(); }
+    // Lossless conversion followed by another conversion with
+    // an extra use of the intermediate result.
+    double d = (double) value;
+    int i = (int) d;
+    return "d=" + d + ", i=" + i;
+  }
+
+  /// CHECK-START: short Main.$noinline$byteToDoubleToShort(byte) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
+  /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Short>>]
+
+  /// CHECK-START: short Main.$noinline$byteToDoubleToShort(byte) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
+  /// CHECK-DAG:                        Return [<<Arg>>]
+
+  /// CHECK-START: short Main.$noinline$byteToDoubleToShort(byte) instruction_simplifier (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static short $noinline$byteToDoubleToShort(byte value) {
+    if (doThrow) { throw new Error(); }
+    // Originally, this is byte->double->int->short. The first conversion is lossless,
+    // so we merge this with the second one to byte->int which we omit as it's an implicit
+    // conversion. Then we eliminate the resulting byte->short as an implicit conversion.
+    return (short) (double) value;
+  }
+
+  /// CHECK-START: short Main.$noinline$charToDoubleToShort(char) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:c\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
+  /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Short>>]
+
+  /// CHECK-START: short Main.$noinline$charToDoubleToShort(char) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:c\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Arg>>]
+  /// CHECK-DAG:                        Return [<<Short>>]
+
+  /// CHECK-START: short Main.$noinline$charToDoubleToShort(char) instruction_simplifier (after)
+  /// CHECK-DAG:                        TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static short $noinline$charToDoubleToShort(char value) {
+    if (doThrow) { throw new Error(); }
+    // Originally, this is char->double->int->short. The first conversion is lossless,
+    // so we merge this with the second one to char->int which we omit as it's an implicit
+    // conversion. Then we are left with the resulting char->short conversion.
+    return (short) (double) value;
+  }
+
+  /// CHECK-START: short Main.$noinline$floatToIntToShort(float) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Short>>]
+
+  /// CHECK-START: short Main.$noinline$floatToIntToShort(float) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Short>>]
+
+  public static short $noinline$floatToIntToShort(float value) {
+    if (doThrow) { throw new Error(); }
+    // Lossy FP to integral conversion followed by another conversion: no simplification.
+    return (short) value;
+  }
+
+  /// CHECK-START: int Main.$noinline$intToFloatToInt(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Float:f\d+>>    TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Float>>]
+  /// CHECK-DAG:                        Return [<<Int>>]
+
+  /// CHECK-START: int Main.$noinline$intToFloatToInt(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Float:f\d+>>    TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Float>>]
+  /// CHECK-DAG:                        Return [<<Int>>]
+
+  public static int $noinline$intToFloatToInt(int value) {
+    if (doThrow) { throw new Error(); }
+    // Lossy integral to FP conversion followed another conversion: no simplification.
+    return (int) (float) value;
+  }
+
+  /// CHECK-START: double Main.$noinline$longToIntToDouble(long) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Double>>]
+
+  /// CHECK-START: double Main.$noinline$longToIntToDouble(long) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Double>>]
+
+  public static double $noinline$longToIntToDouble(long value) {
+    if (doThrow) { throw new Error(); }
+    // Lossy long-to-int conversion followed an integral to FP conversion: no simplification.
+    return (double) (int) value;
+  }
+
+  /// CHECK-START: long Main.$noinline$longToIntToLong(long) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Long>>]
+
+  /// CHECK-START: long Main.$noinline$longToIntToLong(long) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Long>>]
+
+  public static long $noinline$longToIntToLong(long value) {
+    if (doThrow) { throw new Error(); }
+    // Lossy long-to-int conversion followed an int-to-long conversion: no simplification.
+    return (long) (int) value;
+  }
+
+  /// CHECK-START: short Main.$noinline$shortToCharToShort(short) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Char>>]
+  /// CHECK-DAG:                        Return [<<Short>>]
+
+  /// CHECK-START: short Main.$noinline$shortToCharToShort(short) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:                        Return [<<Arg>>]
+
+  public static short $noinline$shortToCharToShort(short value) {
+    if (doThrow) { throw new Error(); }
+    // Integral conversion followed by non-widening integral conversion to original type.
+    return (short) (char) value;
+  }
+
+  /// CHECK-START: int Main.$noinline$shortToLongToInt(short) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Long>>]
+  /// CHECK-DAG:                        Return [<<Int>>]
+
+  /// CHECK-START: int Main.$noinline$shortToLongToInt(short) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:                        Return [<<Arg>>]
+
+  public static int $noinline$shortToLongToInt(short value) {
+    if (doThrow) { throw new Error(); }
+    // Integral conversion followed by non-widening integral conversion, use implicit conversion.
+    return (int) (long) value;
+  }
+
+  /// CHECK-START: byte Main.$noinline$shortToCharToByte(short) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Char>>]
+  /// CHECK-DAG:                        Return [<<Byte>>]
+
+  /// CHECK-START: byte Main.$noinline$shortToCharToByte(short) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:                        Return [<<Byte>>]
+
+  public static byte $noinline$shortToCharToByte(short value) {
+    if (doThrow) { throw new Error(); }
+    // Integral conversion followed by non-widening integral conversion losing bits
+    // from the original type. Simplify to use only one conversion.
+    return (byte) (char) value;
+  }
+
+  /// CHECK-START: java.lang.String Main.$noinline$shortToCharToBytePrint(short) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      {{b\d+}}          TypeConversion [<<Char>>]
+
+  /// CHECK-START: java.lang.String Main.$noinline$shortToCharToBytePrint(short) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      {{b\d+}}          TypeConversion [<<Char>>]
+
+  public static String $noinline$shortToCharToBytePrint(short value) {
+    if (doThrow) { throw new Error(); }
+    // Integral conversion followed by non-widening integral conversion losing bits
+    // from the original type with an extra use of the intermediate result.
+    char c = (char) value;
+    byte b = (byte) c;
+    return "c=" + ((int) c) + ", b=" + ((int) b);  // implicit conversions.
+  }
+
+  /// CHECK-START: byte Main.$noinline$longAnd0xffToByte(long) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Mask:j\d+>>     LongConstant 255
+  /// CHECK-DAG:      <<And:j\d+>>      And [<<Mask>>,<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<And>>]
+  /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Byte>>]
+
+  /// CHECK-START: byte Main.$noinline$longAnd0xffToByte(long) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:                        Return [<<Byte>>]
+
+  /// CHECK-START: byte Main.$noinline$longAnd0xffToByte(long) instruction_simplifier (after)
+  /// CHECK-NOT:                        And
+
+  public static byte $noinline$longAnd0xffToByte(long value) {
+    if (doThrow) { throw new Error(); }
+    return (byte) (value & 0xff);
+  }
+
+  /// CHECK-START: char Main.$noinline$intAnd0x1ffffToChar(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 131071
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
+  /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<And>>]
+  /// CHECK-DAG:                        Return [<<Char>>]
+
+  /// CHECK-START: char Main.$noinline$intAnd0x1ffffToChar(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:                        Return [<<Char>>]
+
+  /// CHECK-START: char Main.$noinline$intAnd0x1ffffToChar(int) instruction_simplifier (after)
+  /// CHECK-NOT:                        And
+
+  public static char $noinline$intAnd0x1ffffToChar(int value) {
+    if (doThrow) { throw new Error(); }
+    // Keeping all significant bits and one more.
+    return (char) (value & 0x1ffff);
+  }
+
+  /// CHECK-START: short Main.$noinline$intAnd0x17fffToShort(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 98303
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
+  /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<And>>]
+  /// CHECK-DAG:                        Return [<<Short>>]
+
+  /// CHECK-START: short Main.$noinline$intAnd0x17fffToShort(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 98303
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
+  /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<And>>]
+  /// CHECK-DAG:                        Return [<<Short>>]
+
+  public static short $noinline$intAnd0x17fffToShort(int value) {
+    if (doThrow) { throw new Error(); }
+    // No simplification: clearing a significant bit.
+    return (short) (value & 0x17fff);
+  }
+
+  /// CHECK-START: double Main.$noinline$shortAnd0xffffToShortToDouble(short) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 65535
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
+  /// CHECK-DAG:      <<Same:s\d+>>     TypeConversion [<<And>>]
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Same>>]
+  /// CHECK-DAG:                        Return [<<Double>>]
+
+  /// CHECK-START: double Main.$noinline$shortAnd0xffffToShortToDouble(short) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:                        Return [<<Double>>]
+
+  public static double $noinline$shortAnd0xffffToShortToDouble(short value) {
+    if (doThrow) { throw new Error(); }
+    short same = (short) (value & 0xffff);
+    return (double) same;
+  }
+
+  /// CHECK-START: int Main.$noinline$intReverseCondition(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:      <<LE:z\d+>>       LessThanOrEqual [<<Const42>>,<<Arg>>]
+
+  /// CHECK-START: int Main.$noinline$intReverseCondition(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:      <<GE:z\d+>>       GreaterThanOrEqual [<<Arg>>,<<Const42>>]
+
+  public static int $noinline$intReverseCondition(int i) {
+    if (doThrow) { throw new Error(); }
+    return (42 > i) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.$noinline$intReverseConditionNaN(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Const42:d\d+>>  DoubleConstant 42
+  /// CHECK-DAG:      <<Result:d\d+>>   InvokeStaticOrDirect
+  /// CHECK-DAG:      <<CMP:i\d+>>      Compare [<<Const42>>,<<Result>>]
+
+  /// CHECK-START: int Main.$noinline$intReverseConditionNaN(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Const42:d\d+>>  DoubleConstant 42
+  /// CHECK-DAG:      <<Result:d\d+>>   InvokeStaticOrDirect
+  /// CHECK-DAG:      <<EQ:z\d+>>       Equal [<<Result>>,<<Const42>>]
+
+  public static int $noinline$intReverseConditionNaN(int i) {
+    if (doThrow) { throw new Error(); }
+    return (42 != Math.sqrt(i)) ? 13 : 54;
+  }
+
+  public static int $noinline$runSmaliTest(String name, boolean input) {
+    if (doThrow) { throw new Error(); }
+    try {
+      Class<?> c = Class.forName("SmaliTests");
+      Method m = c.getMethod(name, boolean.class);
+      return (Integer) m.invoke(null, input);
+    } catch (Exception ex) {
+      throw new Error(ex);
+    }
+  }
+
+  public static int $noinline$runSmaliTestConst(String name, int arg) {
+    if (doThrow) { throw new Error(); }
+    try {
+      Class<?> c = Class.forName("SmaliTests");
+      Method m = c.getMethod(name, int.class);
+      return (Integer) m.invoke(null, arg);
+    } catch (Exception ex) {
+      throw new Error(ex);
+    }
+  }
+
+  /// CHECK-START: int Main.$noinline$intUnnecessaryShiftMasking(int, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const31:i\d+>>  IntConstant 31
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const31>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>      Shl [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Shl>>]
+
+  /// CHECK-START: int Main.$noinline$intUnnecessaryShiftMasking(int, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Shl:i\d+>>      Shl [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:                        Return [<<Shl>>]
+
+  public static int $noinline$intUnnecessaryShiftMasking(int value, int shift) {
+    if (doThrow) { throw new Error(); }
+    return value << (shift & 31);
+  }
+
+  /// CHECK-START: long Main.$noinline$longUnnecessaryShiftMasking(long, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:j\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const63:i\d+>>  IntConstant 63
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const63>>]
+  /// CHECK-DAG:      <<Shr:j\d+>>      Shr [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Shr>>]
+
+  /// CHECK-START: long Main.$noinline$longUnnecessaryShiftMasking(long, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:j\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Shr:j\d+>>      Shr [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:                        Return [<<Shr>>]
+
+  public static long $noinline$longUnnecessaryShiftMasking(long value, int shift) {
+    if (doThrow) { throw new Error(); }
+    return value >> (shift & 63);
+  }
+
+  /// CHECK-START: int Main.$noinline$intUnnecessaryWiderShiftMasking(int, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const255:i\d+>> IntConstant 255
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const255>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>     UShr [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<UShr>>]
+
+  /// CHECK-START: int Main.$noinline$intUnnecessaryWiderShiftMasking(int, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<UShr:i\d+>>     UShr [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:                        Return [<<UShr>>]
+
+  public static int $noinline$intUnnecessaryWiderShiftMasking(int value, int shift) {
+    if (doThrow) { throw new Error(); }
+    return value >>> (shift & 0xff);
+  }
+
+  /// CHECK-START: long Main.$noinline$longSmallerShiftMasking(long, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:j\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const3:i\d+>>   IntConstant 3
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const3>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>      Shl [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Shl>>]
+
+  /// CHECK-START: long Main.$noinline$longSmallerShiftMasking(long, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:j\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const3:i\d+>>   IntConstant 3
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const3>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>      Shl [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Shl>>]
+
+  public static long $noinline$longSmallerShiftMasking(long value, int shift) {
+    if (doThrow) { throw new Error(); }
+    return value << (shift & 3);
+  }
+
+  /// CHECK-START: int Main.$noinline$otherUseOfUnnecessaryShiftMasking(int, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const31:i\d+>>  IntConstant 31
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const31>>]
+  /// CHECK-DAG:      <<Shr:i\d+>>      Shr [<<Value>>,<<And>>]
+  /// CHECK-DAG:      <<Add:i\d+>>      Add [<<Shr>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  /// CHECK-START: int Main.$noinline$otherUseOfUnnecessaryShiftMasking(int, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const31:i\d+>>  IntConstant 31
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const31>>]
+  /// CHECK-DAG:      <<Shr:i\d+>>      Shr [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:      <<Add:i\d+>>      Add [<<Shr>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  public static int $noinline$otherUseOfUnnecessaryShiftMasking(int value, int shift) {
+    if (doThrow) { throw new Error(); }
+    int temp = shift & 31;
+    return (value >> temp) + temp;
+  }
+
+  /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg1(int, int) instruction_simplifier (before)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:i\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:i\d+>>      Sub [<<Sum>>,<<X>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg1(int, int) instruction_simplifier (after)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:i\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Y>>]
+
+  public static int $noinline$intAddSubSimplifyArg1(int x, int y) {
+    if (doThrow) { throw new Error(); }
+    int sum = x + y;
+    return sum - x;
+  }
+
+  /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg2(int, int) instruction_simplifier (before)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:i\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:i\d+>>      Sub [<<Sum>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg2(int, int) instruction_simplifier (after)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:i\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<X>>]
+
+  public static int $noinline$intAddSubSimplifyArg2(int x, int y) {
+    if (doThrow) { throw new Error(); }
+    int sum = x + y;
+    return sum - y;
+  }
+
+  /// CHECK-START: int Main.$noinline$intSubAddSimplifyLeft(int, int) instruction_simplifier (before)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:i\d+>>      Add [<<Sub>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: int Main.$noinline$intSubAddSimplifyLeft(int, int) instruction_simplifier (after)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<X>>]
+
+  public static int $noinline$intSubAddSimplifyLeft(int x, int y) {
+    if (doThrow) { throw new Error(); }
+    int sub = x - y;
+    return sub + y;
+  }
+
+  /// CHECK-START: int Main.$noinline$intSubAddSimplifyRight(int, int) instruction_simplifier (before)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:i\d+>>      Add [<<Y>>,<<Sub>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: int Main.$noinline$intSubAddSimplifyRight(int, int) instruction_simplifier (after)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<X>>]
+
+  public static int $noinline$intSubAddSimplifyRight(int x, int y) {
+    if (doThrow) { throw new Error(); }
+    int sub = x - y;
+    return y + sub;
+  }
+
+  /// CHECK-START: float Main.$noinline$floatAddSubSimplifyArg1(float, float) instruction_simplifier (before)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:f\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Sub [<<Sum>>,<<X>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: float Main.$noinline$floatAddSubSimplifyArg1(float, float) instruction_simplifier (after)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:f\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Sub [<<Sum>>,<<X>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  public static float $noinline$floatAddSubSimplifyArg1(float x, float y) {
+    if (doThrow) { throw new Error(); }
+    float sum = x + y;
+    return sum - x;
+  }
+
+  /// CHECK-START: float Main.$noinline$floatAddSubSimplifyArg2(float, float) instruction_simplifier (before)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:f\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Sub [<<Sum>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: float Main.$noinline$floatAddSubSimplifyArg2(float, float) instruction_simplifier (after)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:f\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Sub [<<Sum>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  public static float $noinline$floatAddSubSimplifyArg2(float x, float y) {
+    if (doThrow) { throw new Error(); }
+    float sum = x + y;
+    return sum - y;
+  }
+
+  /// CHECK-START: float Main.$noinline$floatSubAddSimplifyLeft(float, float) instruction_simplifier (before)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:f\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Add [<<Sub>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: float Main.$noinline$floatSubAddSimplifyLeft(float, float) instruction_simplifier (after)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:f\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Add [<<Sub>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  public static float $noinline$floatSubAddSimplifyLeft(float x, float y) {
+    if (doThrow) { throw new Error(); }
+    float sub = x - y;
+    return sub + y;
+  }
+
+  /// CHECK-START: float Main.$noinline$floatSubAddSimplifyRight(float, float) instruction_simplifier (before)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:f\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Add [<<Y>>,<<Sub>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: float Main.$noinline$floatSubAddSimplifyRight(float, float) instruction_simplifier (after)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:f\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Add [<<Y>>,<<Sub>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  public static float $noinline$floatSubAddSimplifyRight(float x, float y) {
+    if (doThrow) { throw new Error(); }
+    float sub = x - y;
+    return y + sub;
+  }
+
+ public static void main(String[] args) {
+    int arg = 123456;
+    float floatArg = 123456.125f;
+
+    assertLongEquals(arg, $noinline$Add0(arg));
+    assertIntEquals(5, $noinline$AddAddSubAddConst(1));
+    assertIntEquals(arg, $noinline$AndAllOnes(arg));
+    assertLongEquals(arg, $noinline$Div1(arg));
+    assertIntEquals(-arg, $noinline$DivN1(arg));
+    assertLongEquals(arg, $noinline$Mul1(arg));
+    assertIntEquals(-arg, $noinline$MulN1(arg));
+    assertLongEquals((128 * arg), $noinline$MulPowerOfTwo128(arg));
+    assertLongEquals(2640, $noinline$MulMulMulConst(2));
+    assertIntEquals(arg, $noinline$Or0(arg));
+    assertLongEquals(arg, $noinline$OrSame(arg));
+    assertIntEquals(arg, $noinline$Shl0(arg));
+    assertLongEquals(arg, $noinline$Shr0(arg));
+    assertLongEquals(arg, $noinline$Shr64(arg));
+    assertLongEquals(arg, $noinline$Sub0(arg));
+    assertIntEquals(-arg, $noinline$SubAliasNeg(arg));
+    assertIntEquals(9, $noinline$SubAddConst1(2));
+    assertIntEquals(-2, $noinline$SubAddConst2(3));
+    assertLongEquals(3, $noinline$SubSubConst(4));
+    assertLongEquals(arg, $noinline$UShr0(arg));
+    assertIntEquals(arg, $noinline$Xor0(arg));
+    assertIntEquals(~arg, $noinline$XorAllOnes(arg));
+    assertIntEquals(-(arg + arg + 1), $noinline$AddNegs1(arg, arg + 1));
+    assertIntEquals(-(arg + arg + 1), $noinline$AddNegs2(arg, arg + 1));
+    assertLongEquals(-(2 * arg + 1), $noinline$AddNegs3(arg, arg + 1));
+    assertLongEquals(1, $noinline$AddNeg1(arg, arg + 1));
+    assertLongEquals(-1, $noinline$AddNeg2(arg, arg + 1));
+    assertLongEquals(arg, $noinline$NegNeg1(arg));
+    assertIntEquals(0, $noinline$NegNeg2(arg));
+    assertLongEquals(arg, $noinline$NegNeg3(arg));
+    assertIntEquals(1, $noinline$NegSub1(arg, arg + 1));
+    assertIntEquals(1, $noinline$NegSub2(arg, arg + 1));
+    assertLongEquals(arg, $noinline$NotNot1(arg));
+    assertIntEquals(-1, $noinline$NotNot2(arg));
+    assertIntEquals(-(arg + arg + 1), $noinline$SubNeg1(arg, arg + 1));
+    assertIntEquals(-(arg + arg + 1), $noinline$SubNeg2(arg, arg + 1));
+    assertLongEquals(-(2 * arg + 1), $noinline$SubNeg3(arg, arg + 1));
+    assertBooleanEquals(true, $noinline$EqualBoolVsIntConst(true));
+    assertBooleanEquals(true, $noinline$EqualBoolVsIntConst(true));
+    assertBooleanEquals(false, $noinline$NotEqualBoolVsIntConst(false));
+    assertBooleanEquals(false, $noinline$NotEqualBoolVsIntConst(false));
+    assertBooleanEquals(true, $noinline$NotNotBool(true));
+    assertBooleanEquals(false, $noinline$NotNotBool(false));
+    assertFloatEquals(50.0f, $noinline$Div2(100.0f));
+    assertDoubleEquals(75.0, $noinline$Div2(150.0));
+    assertFloatEquals(-400.0f, $noinline$DivMP25(100.0f));
+    assertDoubleEquals(-600.0, $noinline$DivMP25(150.0));
+    assertIntEquals(0xc, $noinline$UShr28And15(0xc1234567));
+    assertLongEquals(0xcL, $noinline$UShr60And15(0xc123456787654321L));
+    assertIntEquals(0x4, $noinline$UShr28And7(0xc1234567));
+    assertLongEquals(0x4L, $noinline$UShr60And7(0xc123456787654321L));
+    assertIntEquals(0xc1, $noinline$Shr24And255(0xc1234567));
+    assertLongEquals(0xc1L, $noinline$Shr56And255(0xc123456787654321L));
+    assertIntEquals(0x41, $noinline$Shr24And127(0xc1234567));
+    assertLongEquals(0x41L, $noinline$Shr56And127(0xc123456787654321L));
+    assertIntEquals(0, $noinline$mulPow2Plus1(0));
+    assertIntEquals(9, $noinline$mulPow2Plus1(1));
+    assertIntEquals(18, $noinline$mulPow2Plus1(2));
+    assertIntEquals(900, $noinline$mulPow2Plus1(100));
+    assertIntEquals(111105, $noinline$mulPow2Plus1(12345));
+    assertLongEquals(0, $noinline$mulPow2Minus1(0));
+    assertLongEquals(31, $noinline$mulPow2Minus1(1));
+    assertLongEquals(62, $noinline$mulPow2Minus1(2));
+    assertLongEquals(3100, $noinline$mulPow2Minus1(100));
+    assertLongEquals(382695, $noinline$mulPow2Minus1(12345));
+
+    booleanField = false;
+    assertIntEquals($noinline$booleanFieldNotEqualOne(), 54);
+    assertIntEquals($noinline$booleanFieldEqualZero(), 54);
+    booleanField = true;
+    assertIntEquals(13, $noinline$booleanFieldNotEqualOne());
+    assertIntEquals(13, $noinline$booleanFieldEqualZero());
+    assertIntEquals(54, $noinline$intConditionNotEqualOne(6));
+    assertIntEquals(13, $noinline$intConditionNotEqualOne(43));
+    assertIntEquals(54, $noinline$intConditionEqualZero(6));
+    assertIntEquals(13, $noinline$intConditionEqualZero(43));
+    assertIntEquals(54, $noinline$floatConditionNotEqualOne(6.0f));
+    assertIntEquals(13, $noinline$floatConditionNotEqualOne(43.0f));
+    assertIntEquals(54, $noinline$doubleConditionEqualZero(6.0));
+    assertIntEquals(13, $noinline$doubleConditionEqualZero(43.0));
+
+    assertIntEquals(1234567, $noinline$intToDoubleToInt(1234567));
+    assertIntEquals(Integer.MIN_VALUE, $noinline$intToDoubleToInt(Integer.MIN_VALUE));
+    assertIntEquals(Integer.MAX_VALUE, $noinline$intToDoubleToInt(Integer.MAX_VALUE));
+    assertStringEquals("d=7654321.0, i=7654321", $noinline$intToDoubleToIntPrint(7654321));
+    assertIntEquals(12, $noinline$byteToDoubleToInt((byte) 12));
+    assertIntEquals(Byte.MIN_VALUE, $noinline$byteToDoubleToInt(Byte.MIN_VALUE));
+    assertIntEquals(Byte.MAX_VALUE, $noinline$byteToDoubleToInt(Byte.MAX_VALUE));
+    assertIntEquals(11, $noinline$floatToDoubleToInt(11.3f));
+    assertStringEquals("d=12.25, i=12", $noinline$floatToDoubleToIntPrint(12.25f));
+    assertIntEquals(123, $noinline$byteToDoubleToShort((byte) 123));
+    assertIntEquals(Byte.MIN_VALUE, $noinline$byteToDoubleToShort(Byte.MIN_VALUE));
+    assertIntEquals(Byte.MAX_VALUE, $noinline$byteToDoubleToShort(Byte.MAX_VALUE));
+    assertIntEquals(1234, $noinline$charToDoubleToShort((char) 1234));
+    assertIntEquals(Character.MIN_VALUE, $noinline$charToDoubleToShort(Character.MIN_VALUE));
+    assertIntEquals(/* sign-extended */ -1, $noinline$charToDoubleToShort(Character.MAX_VALUE));
+    assertIntEquals(12345, $noinline$floatToIntToShort(12345.75f));
+    assertIntEquals(Short.MAX_VALUE, $noinline$floatToIntToShort((float)(Short.MIN_VALUE - 1)));
+    assertIntEquals(Short.MIN_VALUE, $noinline$floatToIntToShort((float)(Short.MAX_VALUE + 1)));
+    assertIntEquals(-54321, $noinline$intToFloatToInt(-54321));
+    assertDoubleEquals((double) 0x12345678, $noinline$longToIntToDouble(0x1234567812345678L));
+    assertDoubleEquals(0.0, $noinline$longToIntToDouble(Long.MIN_VALUE));
+    assertDoubleEquals(-1.0, $noinline$longToIntToDouble(Long.MAX_VALUE));
+    assertLongEquals(0x0000000012345678L, $noinline$longToIntToLong(0x1234567812345678L));
+    assertLongEquals(0xffffffff87654321L, $noinline$longToIntToLong(0x1234567887654321L));
+    assertLongEquals(0L, $noinline$longToIntToLong(Long.MIN_VALUE));
+    assertLongEquals(-1L, $noinline$longToIntToLong(Long.MAX_VALUE));
+    assertIntEquals((short) -5678, $noinline$shortToCharToShort((short) -5678));
+    assertIntEquals(Short.MIN_VALUE, $noinline$shortToCharToShort(Short.MIN_VALUE));
+    assertIntEquals(Short.MAX_VALUE, $noinline$shortToCharToShort(Short.MAX_VALUE));
+    assertIntEquals(5678, $noinline$shortToLongToInt((short) 5678));
+    assertIntEquals(Short.MIN_VALUE, $noinline$shortToLongToInt(Short.MIN_VALUE));
+    assertIntEquals(Short.MAX_VALUE, $noinline$shortToLongToInt(Short.MAX_VALUE));
+    assertIntEquals(0x34, $noinline$shortToCharToByte((short) 0x1234));
+    assertIntEquals(-0x10, $noinline$shortToCharToByte((short) 0x12f0));
+    assertIntEquals(0, $noinline$shortToCharToByte(Short.MIN_VALUE));
+    assertIntEquals(-1, $noinline$shortToCharToByte(Short.MAX_VALUE));
+    assertStringEquals("c=1025, b=1", $noinline$shortToCharToBytePrint((short) 1025));
+    assertStringEquals("c=1023, b=-1", $noinline$shortToCharToBytePrint((short) 1023));
+    assertStringEquals("c=65535, b=-1", $noinline$shortToCharToBytePrint((short) -1));
+
+    assertIntEquals(0x21, $noinline$longAnd0xffToByte(0x1234432112344321L));
+    assertIntEquals(0, $noinline$longAnd0xffToByte(Long.MIN_VALUE));
+    assertIntEquals(-1, $noinline$longAnd0xffToByte(Long.MAX_VALUE));
+    assertIntEquals(0x1234, $noinline$intAnd0x1ffffToChar(0x43211234));
+    assertIntEquals(0, $noinline$intAnd0x1ffffToChar(Integer.MIN_VALUE));
+    assertIntEquals(Character.MAX_VALUE, $noinline$intAnd0x1ffffToChar(Integer.MAX_VALUE));
+    assertIntEquals(0x4321, $noinline$intAnd0x17fffToShort(0x87654321));
+    assertIntEquals(0x0888, $noinline$intAnd0x17fffToShort(0x88888888));
+    assertIntEquals(0, $noinline$intAnd0x17fffToShort(Integer.MIN_VALUE));
+    assertIntEquals(Short.MAX_VALUE, $noinline$intAnd0x17fffToShort(Integer.MAX_VALUE));
+
+    assertDoubleEquals(0.0, $noinline$shortAnd0xffffToShortToDouble((short) 0));
+    assertDoubleEquals(1.0, $noinline$shortAnd0xffffToShortToDouble((short) 1));
+    assertDoubleEquals(-2.0, $noinline$shortAnd0xffffToShortToDouble((short) -2));
+    assertDoubleEquals(12345.0, $noinline$shortAnd0xffffToShortToDouble((short) 12345));
+    assertDoubleEquals((double)Short.MAX_VALUE,
+                       $noinline$shortAnd0xffffToShortToDouble(Short.MAX_VALUE));
+    assertDoubleEquals((double)Short.MIN_VALUE,
+                       $noinline$shortAnd0xffffToShortToDouble(Short.MIN_VALUE));
+
+    assertIntEquals(13, $noinline$intReverseCondition(41));
+    assertIntEquals(13, $noinline$intReverseConditionNaN(-5));
+
+    for (String condition : new String[] { "Equal", "NotEqual" }) {
+      for (String constant : new String[] { "True", "False" }) {
+        for (String side : new String[] { "Rhs", "Lhs" }) {
+          String name = condition + constant + side;
+          assertIntEquals(5, $noinline$runSmaliTest(name, true));
+          assertIntEquals(3, $noinline$runSmaliTest(name, false));
+        }
+      }
+    }
+
+    assertIntEquals(0, $noinline$runSmaliTestConst("AddSubConst", 1));
+    assertIntEquals(3, $noinline$runSmaliTestConst("SubAddConst", 2));
+    assertIntEquals(-16, $noinline$runSmaliTestConst("SubSubConst1", 3));
+    assertIntEquals(-5, $noinline$runSmaliTestConst("SubSubConst2", 4));
+    assertIntEquals(26, $noinline$runSmaliTestConst("SubSubConst3", 5));
+    assertIntEquals(0x5e6f7808, $noinline$intUnnecessaryShiftMasking(0xabcdef01, 3));
+    assertIntEquals(0x5e6f7808, $noinline$intUnnecessaryShiftMasking(0xabcdef01, 3 + 32));
+    assertLongEquals(0xffffffffffffeaf3L, $noinline$longUnnecessaryShiftMasking(0xabcdef0123456789L, 50));
+    assertLongEquals(0xffffffffffffeaf3L, $noinline$longUnnecessaryShiftMasking(0xabcdef0123456789L, 50 + 64));
+    assertIntEquals(0x2af37b, $noinline$intUnnecessaryWiderShiftMasking(0xabcdef01, 10));
+    assertIntEquals(0x2af37b, $noinline$intUnnecessaryWiderShiftMasking(0xabcdef01, 10 + 128));
+    assertLongEquals(0xaf37bc048d159e24L, $noinline$longSmallerShiftMasking(0xabcdef0123456789L, 2));
+    assertLongEquals(0xaf37bc048d159e24L, $noinline$longSmallerShiftMasking(0xabcdef0123456789L, 2 + 256));
+    assertIntEquals(0xfffd5e7c, $noinline$otherUseOfUnnecessaryShiftMasking(0xabcdef01, 13));
+    assertIntEquals(0xfffd5e7c, $noinline$otherUseOfUnnecessaryShiftMasking(0xabcdef01, 13 + 512));
+
+    assertIntEquals(654321, $noinline$intAddSubSimplifyArg1(arg, 654321));
+    assertIntEquals(arg, $noinline$intAddSubSimplifyArg2(arg, 654321));
+    assertIntEquals(arg, $noinline$intSubAddSimplifyLeft(arg, 654321));
+    assertIntEquals(arg, $noinline$intSubAddSimplifyRight(arg, 654321));
+    assertFloatEquals(654321.125f, $noinline$floatAddSubSimplifyArg1(floatArg, 654321.125f));
+    assertFloatEquals(floatArg, $noinline$floatAddSubSimplifyArg2(floatArg, 654321.125f));
+    assertFloatEquals(floatArg, $noinline$floatSubAddSimplifyLeft(floatArg, 654321.125f));
+    assertFloatEquals(floatArg, $noinline$floatSubAddSimplifyRight(floatArg, 654321.125f));
+  }
+
+  private static boolean $inline$true() { return true; }
+  private static boolean $inline$false() { return false; }
+
+  public static boolean booleanField;
 }
diff --git a/test/462-checker-inlining-across-dex-files/multidex.jpp b/test/462-checker-inlining-across-dex-files/multidex.jpp
new file mode 100644
index 0000000..ae55456
--- /dev/null
+++ b/test/462-checker-inlining-across-dex-files/multidex.jpp
@@ -0,0 +1,8 @@
+Main:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Main
+
+AAA:
+  @@com.android.jack.annotations.ForceInMainDex
+  class AAA
+
diff --git a/test/462-checker-inlining-across-dex-files/src-multidex/OtherDex.java b/test/462-checker-inlining-across-dex-files/src-multidex/OtherDex.java
index 171ade8..2056e2f 100644
--- a/test/462-checker-inlining-across-dex-files/src-multidex/OtherDex.java
+++ b/test/462-checker-inlining-across-dex-files/src-multidex/OtherDex.java
@@ -38,25 +38,25 @@
     return "OtherDex";
   }
 
-  public static Class returnOtherDexClass() {
+  public static Class<?> returnOtherDexClass() {
     return OtherDex.class;
   }
 
-  public static Class returnMainClass() {
+  public static Class<?> returnMainClass() {
     return Main.class;
   }
 
-  private static Class returnOtherDexClass2() {
+  private static Class<?> returnOtherDexClass2() {
     return OtherDex.class;
   }
 
-  public static Class returnOtherDexClassStaticCall() {
+  public static Class<?> returnOtherDexClassStaticCall() {
     // Do not call returnOtherDexClass, as it may have been flagged
     // as non-inlineable.
     return returnOtherDexClass2();
   }
 
-  public static Class returnOtherDexCallingMain() {
+  public static Class<?> returnOtherDexCallingMain() {
     return Main.getOtherClass();
   }
 
diff --git a/test/462-checker-inlining-across-dex-files/src/Main.java b/test/462-checker-inlining-across-dex-files/src/Main.java
index 1fe49a8..c2bb479 100644
--- a/test/462-checker-inlining-across-dex-files/src/Main.java
+++ b/test/462-checker-inlining-across-dex-files/src/Main.java
@@ -106,7 +106,7 @@
   /// CHECK-DAG:     <<Invoke:l\d+>>  InvokeStaticOrDirect
   /// CHECK-DAG:                      Return [<<Invoke>>]
 
-  public static Class dontInlineOtherDexClass() {
+  public static Class<?> dontInlineOtherDexClass() {
     return OtherDex.returnOtherDexClass();
   }
 
@@ -123,7 +123,7 @@
   // Note: There are two LoadClass instructions. We obtain the correct
   //       instruction id by matching the Return's input list first.
 
-  public static Class inlineMainClass() {
+  public static Class<?> inlineMainClass() {
     return OtherDex.returnMainClass();
   }
 
@@ -135,7 +135,7 @@
   /// CHECK-DAG:     <<Invoke:l\d+>>  InvokeStaticOrDirect
   /// CHECK-DAG:                      Return [<<Invoke>>]
 
-  public static Class dontInlineOtherDexClassStaticCall() {
+  public static Class<?> dontInlineOtherDexClassStaticCall() {
     return OtherDex.returnOtherDexClassStaticCall();
   }
 
@@ -152,11 +152,11 @@
   // Note: There are two LoadClass instructions. We obtain the correct
   //       instruction id by matching the Return's input list first.
 
-  public static Class inlineOtherDexCallingMain() {
+  public static Class<?> inlineOtherDexCallingMain() {
     return OtherDex.returnOtherDexCallingMain();
   }
 
-  public static Class getOtherClass() {
+  public static Class<?> getOtherClass() {
     return Main.class;
   }
 
diff --git a/test/463-checker-boolean-simplifier/src/Main.java b/test/463-checker-boolean-simplifier/src/Main.java
index 61510d8..f0fe1b1 100644
--- a/test/463-checker-boolean-simplifier/src/Main.java
+++ b/test/463-checker-boolean-simplifier/src/Main.java
@@ -37,31 +37,32 @@
    * empty branches removed.
    */
 
-  /// CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (before)
+  /// CHECK-START: boolean Main.BooleanNot(boolean) select_generator (before)
   /// CHECK-DAG:     <<Param:z\d+>>    ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:                       If [<<Param>>]
-  /// CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const1>>,<<Const0>>]
+  /// CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const0>>,<<Const1>>]
   /// CHECK-DAG:                       Return [<<Phi>>]
 
-  /// CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (before)
+  /// CHECK-START: boolean Main.BooleanNot(boolean) select_generator (before)
   /// CHECK:                           Goto
   /// CHECK:                           Goto
   /// CHECK:                           Goto
   /// CHECK-NOT:                       Goto
 
-  /// CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.BooleanNot(boolean) select_generator (after)
   /// CHECK-DAG:     <<Param:z\d+>>    ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
-  /// CHECK-DAG:     <<NotParam:z\d+>> BooleanNot [<<Param>>]
+  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:     <<NotParam:i\d+>> Select [<<Const1>>,<<Const0>>,<<Param>>]
   /// CHECK-DAG:                       Return [<<NotParam>>]
 
-  /// CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.BooleanNot(boolean) select_generator (after)
   /// CHECK-NOT:                       If
   /// CHECK-NOT:                       Phi
 
-  /// CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.BooleanNot(boolean) select_generator (after)
   /// CHECK:                           Goto
   /// CHECK-NOT:                       Goto
 
@@ -74,7 +75,7 @@
    * and 0 when False.
    */
 
-  /// CHECK-START: boolean Main.GreaterThan(int, int) boolean_simplifier (before)
+  /// CHECK-START: boolean Main.GreaterThan(int, int) select_generator (before)
   /// CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
@@ -84,13 +85,14 @@
   /// CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const0>>,<<Const1>>]
   /// CHECK-DAG:                       Return [<<Phi>>]
 
-  /// CHECK-START: boolean Main.GreaterThan(int, int) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.GreaterThan(int, int) select_generator (after)
   /// CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:     <<Cond:z\d+>>     GreaterThan [<<ParamX>>,<<ParamY>>]
-  /// CHECK-DAG:                       Return [<<Cond>>]
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Const0>>,<<Const1>>,<<Cond>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
 
   public static boolean GreaterThan(int x, int y) {
     return (x <= y) ? false : true;
@@ -101,7 +103,7 @@
    * and 1 when False.
    */
 
-  /// CHECK-START: boolean Main.LessThan(int, int) boolean_simplifier (before)
+  /// CHECK-START: boolean Main.LessThan(int, int) select_generator (before)
   /// CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
@@ -111,13 +113,14 @@
   /// CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const1>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Phi>>]
 
-  /// CHECK-START: boolean Main.LessThan(int, int) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.LessThan(int, int) select_generator (after)
   /// CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
-  /// CHECK-DAG:     <<Cond:z\d+>>     LessThan [<<ParamX>>,<<ParamY>>]
-  /// CHECK-DAG:                       Return [<<Cond>>]
+  /// CHECK-DAG:     <<Cond:z\d+>>     GreaterThanOrEqual [<<ParamX>>,<<ParamY>>]
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Const1>>,<<Const0>>,<<Cond>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
 
   public static boolean LessThan(int x, int y) {
     return (x < y) ? true : false;
@@ -128,7 +131,7 @@
    * Note that Phis are discovered retrospectively.
    */
 
-  /// CHECK-START: boolean Main.ValuesOrdered(int, int, int) boolean_simplifier (before)
+  /// CHECK-START: boolean Main.ValuesOrdered(int, int, int) select_generator (before)
   /// CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<ParamZ:i\d+>>   ParameterValue
@@ -145,29 +148,25 @@
   /// CHECK-DAG:     <<PhiYZ>>         Phi [<<Const1>>,<<Const0>>]
   /// CHECK-DAG:     <<PhiXYZ>>        Phi [<<Const1>>,<<Const0>>]
 
-  /// CHECK-START: boolean Main.ValuesOrdered(int, int, int) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.ValuesOrdered(int, int, int) select_generator (after)
   /// CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
   /// CHECK-DAG:     <<ParamZ:i\d+>>   ParameterValue
-  /// CHECK-DAG:     <<CmpXY:z\d+>>    LessThanOrEqual [<<ParamX>>,<<ParamY>>]
-  /// CHECK-DAG:     <<CmpYZ:z\d+>>    LessThanOrEqual [<<ParamY>>,<<ParamZ>>]
-  /// CHECK-DAG:     <<CmpXYZ:z\d+>>   Equal [<<CmpXY>>,<<CmpYZ>>]
-  /// CHECK-DAG:                       Return [<<CmpXYZ>>]
+  /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:     <<CmpXY:z\d+>>    GreaterThan [<<ParamX>>,<<ParamY>>]
+  /// CHECK-DAG:     <<SelXY:i\d+>>    Select [<<Const1>>,<<Const0>>,<<CmpXY>>]
+  /// CHECK-DAG:     <<CmpYZ:z\d+>>    GreaterThan [<<ParamY>>,<<ParamZ>>]
+  /// CHECK-DAG:     <<SelYZ:i\d+>>    Select [<<Const1>>,<<Const0>>,<<CmpYZ>>]
+  /// CHECK-DAG:     <<CmpXYZ:z\d+>>   NotEqual [<<SelXY>>,<<SelYZ>>]
+  /// CHECK-DAG:     <<SelXYZ:i\d+>>   Select [<<Const1>>,<<Const0>>,<<CmpXYZ>>]
+  /// CHECK-DAG:                       Return [<<SelXYZ>>]
 
   public static boolean ValuesOrdered(int x, int y, int z) {
     return (x <= y) == (y <= z);
   }
 
-  /// CHECK-START: int Main.NegatedCondition(boolean) boolean_simplifier (before)
-  /// CHECK-DAG:     <<Param:z\d+>>    ParameterValue
-  /// CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
-  /// CHECK-DAG:     <<Const43:i\d+>>  IntConstant 43
-  /// CHECK-DAG:     <<NotParam:z\d+>> BooleanNot [<<Param>>]
-  /// CHECK-DAG:                       If [<<NotParam>>]
-  /// CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const42>>,<<Const43>>]
-  /// CHECK-DAG:                       Return [<<Phi>>]
-
-  /// CHECK-START: int Main.NegatedCondition(boolean) boolean_simplifier (after)
+  /// CHECK-START: int Main.NegatedCondition(boolean) select_generator (before)
   /// CHECK-DAG:     <<Param:z\d+>>    ParameterValue
   /// CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
   /// CHECK-DAG:     <<Const43:i\d+>>  IntConstant 43
@@ -175,17 +174,187 @@
   /// CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const42>>,<<Const43>>]
   /// CHECK-DAG:                       Return [<<Phi>>]
 
-  // Note: The fact that branches are swapped is verified by running the test.
+  /// CHECK-START: int Main.NegatedCondition(boolean) select_generator (after)
+  /// CHECK-DAG:     <<Param:z\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:     <<Const43:i\d+>>  IntConstant 43
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Const43>>,<<Const42>>,<<Param>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
 
-  /// CHECK-START: int Main.NegatedCondition(boolean) boolean_simplifier (after)
+  /// CHECK-START: int Main.NegatedCondition(boolean) select_generator (after)
   /// CHECK-NOT:                       BooleanNot
 
   public static int NegatedCondition(boolean x) {
-    if (x != false) {
-      return 42;
-    } else {
-      return 43;
+    return (x != false) ? 42 : 43;
+  }
+
+  /// CHECK-START: int Main.SimpleTrueBlock(boolean, int) select_generator (after)
+  /// CHECK-DAG:     <<ParamX:z\d+>>   ParameterValue
+  /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
+  /// CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:     <<Const43:i\d+>>  IntConstant 43
+  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<ParamY>>,<<Const42>>]
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Const43>>,<<Add>>,<<ParamX>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
+
+  /// CHECK-START: int Main.SimpleTrueBlock(boolean, int) select_generator (after)
+  /// CHECK-NOT:     If
+
+  public static int SimpleTrueBlock(boolean x, int y) {
+    return x ? y + 42 : 43;
+  }
+
+  /// CHECK-START: int Main.SimpleFalseBlock(boolean, int) select_generator (after)
+  /// CHECK-DAG:     <<ParamX:z\d+>>   ParameterValue
+  /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
+  /// CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:     <<Const43:i\d+>>  IntConstant 43
+  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<ParamY>>,<<Const43>>]
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<Add>>,<<Const42>>,<<ParamX>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
+
+  /// CHECK-START: int Main.SimpleFalseBlock(boolean, int) select_generator (after)
+  /// CHECK-NOT:     If
+
+  public static int SimpleFalseBlock(boolean x, int y) {
+    return x ? 42 : y + 43;
+  }
+
+  /// CHECK-START: int Main.SimpleBothBlocks(boolean, int, int) select_generator (after)
+  /// CHECK-DAG:     <<ParamX:z\d+>>   ParameterValue
+  /// CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
+  /// CHECK-DAG:     <<ParamZ:i\d+>>   ParameterValue
+  /// CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:     <<Const43:i\d+>>  IntConstant 43
+  /// CHECK-DAG:     <<AddTrue:i\d+>>  Add [<<ParamY>>,<<Const42>>]
+  /// CHECK-DAG:     <<AddFalse:i\d+>> Add [<<ParamZ>>,<<Const43>>]
+  /// CHECK-DAG:     <<Select:i\d+>>   Select [<<AddFalse>>,<<AddTrue>>,<<ParamX>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
+
+  /// CHECK-START: int Main.SimpleBothBlocks(boolean, int, int) select_generator (after)
+  /// CHECK-NOT:     If
+
+  public static int SimpleBothBlocks(boolean x, int y, int z) {
+    return x ? y + 42 : z + 43;
+  }
+
+  /// CHECK-START: int Main.ThreeBlocks(boolean, boolean) select_generator (after)
+  /// CHECK-DAG:     <<ParamX:z\d+>>    ParameterValue
+  /// CHECK-DAG:     <<ParamY:z\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
+  /// CHECK-DAG:     <<Const2:i\d+>>    IntConstant 2
+  /// CHECK-DAG:     <<Const3:i\d+>>    IntConstant 3
+  /// CHECK-DAG:     <<Select23:i\d+>>  Select [<<Const3>>,<<Const2>>,<<ParamY>>]
+  /// CHECK-DAG:     <<Select123:i\d+>> Select [<<Select23>>,<<Const1>>,<<ParamX>>]
+  /// CHECK-DAG:                        Return [<<Select123>>]
+
+  public static int ThreeBlocks(boolean x, boolean y) {
+    return x ? 1 : (y ? 2 : 3);
+  }
+
+  /// CHECK-START: int Main.MultiplePhis() select_generator (before)
+  /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:     <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:     <<PhiX:i\d+>>     Phi [<<Const0>>,<<Const13>>,<<Const42>>]
+  /// CHECK-DAG:     <<PhiY:i\d+>>     Phi [<<Const1>>,<<Add:i\d+>>,<<Add>>]
+  /// CHECK-DAG:     <<Add>>           Add [<<PhiY>>,<<Const1>>]
+  /// CHECK-DAG:     <<Cond:z\d+>>     LessThanOrEqual [<<Add>>,<<Const1>>]
+  /// CHECK-DAG:                       If [<<Cond>>]
+  /// CHECK-DAG:                       Return [<<PhiX>>]
+
+  /// CHECK-START: int Main.MultiplePhis() select_generator (after)
+  /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:     <<Const13:i\d+>>  IntConstant 13
+  /// CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:     <<PhiX:i\d+>>     Phi [<<Const0>>,<<Select:i\d+>>]
+  /// CHECK-DAG:     <<PhiY:i\d+>>     Phi [<<Const1>>,<<Add:i\d+>>]
+  /// CHECK-DAG:     <<Add>>           Add [<<PhiY>>,<<Const1>>]
+  /// CHECK-DAG:     <<Cond:z\d+>>     LessThanOrEqual [<<Add>>,<<Const1>>]
+  /// CHECK-DAG:     <<Select>>        Select [<<Const13>>,<<Const42>>,<<Cond>>]
+  /// CHECK-DAG:                       Return [<<PhiX>>]
+
+  public static int MultiplePhis() {
+    int x = 0;
+    int y = 1;
+    while (y++ < 10) {
+      if (y > 1) {
+        x = 13;
+        continue;
+      } else {
+        x = 42;
+        continue;
+      }
     }
+    return x;
+  }
+
+  /// CHECK-START: int Main.TrueBlockWithTooManyInstructions(boolean) select_generator (before)
+  /// CHECK-DAG:     <<This:l\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Cond:z\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
+  /// CHECK-DAG:     <<Const43:i\d+>> IntConstant 43
+  /// CHECK-DAG:                      If [<<Cond>>]
+  /// CHECK-DAG:     <<Iget:i\d+>>    InstanceFieldGet [<<This>>]
+  /// CHECK-DAG:     <<Add:i\d+>>     Add [<<Iget>>,<<Const2>>]
+  /// CHECK-DAG:                      Phi [<<Add>>,<<Const43>>]
+
+  /// CHECK-START: int Main.TrueBlockWithTooManyInstructions(boolean) select_generator (after)
+  /// CHECK-NOT:     Select
+
+  public int TrueBlockWithTooManyInstructions(boolean x) {
+    return x ? (read_field + 2) : 43;
+  }
+
+  /// CHECK-START: int Main.FalseBlockWithTooManyInstructions(boolean) select_generator (before)
+  /// CHECK-DAG:     <<This:l\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Cond:z\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Const3:i\d+>>  IntConstant 3
+  /// CHECK-DAG:     <<Const42:i\d+>> IntConstant 42
+  /// CHECK-DAG:                      If [<<Cond>>]
+  /// CHECK-DAG:     <<Iget:i\d+>>    InstanceFieldGet [<<This>>]
+  /// CHECK-DAG:     <<Add:i\d+>>     Add [<<Iget>>,<<Const3>>]
+  /// CHECK-DAG:                      Phi [<<Const42>>,<<Add>>]
+
+  /// CHECK-START: int Main.FalseBlockWithTooManyInstructions(boolean) select_generator (after)
+  /// CHECK-NOT:     Select
+
+  public int FalseBlockWithTooManyInstructions(boolean x) {
+    return x ? 42 : (read_field + 3);
+  }
+
+  /// CHECK-START: int Main.TrueBlockWithSideEffects(boolean) select_generator (before)
+  /// CHECK-DAG:     <<This:l\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Cond:z\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Const42:i\d+>> IntConstant 42
+  /// CHECK-DAG:     <<Const43:i\d+>> IntConstant 43
+  /// CHECK-DAG:                      If [<<Cond>>]
+  /// CHECK-DAG:                      InstanceFieldSet [<<This>>,<<Const42>>]
+  /// CHECK-DAG:                      Phi [<<Const42>>,<<Const43>>]
+
+  /// CHECK-START: int Main.TrueBlockWithSideEffects(boolean) select_generator (after)
+  /// CHECK-NOT:     Select
+
+  public int TrueBlockWithSideEffects(boolean x) {
+    return x ? (write_field = 42) : 43;
+  }
+
+  /// CHECK-START: int Main.FalseBlockWithSideEffects(boolean) select_generator (before)
+  /// CHECK-DAG:     <<This:l\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Cond:z\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Const42:i\d+>> IntConstant 42
+  /// CHECK-DAG:     <<Const43:i\d+>> IntConstant 43
+  /// CHECK-DAG:                      If [<<Cond>>]
+  /// CHECK-DAG:                      InstanceFieldSet [<<This>>,<<Const43>>]
+  /// CHECK-DAG:                      Phi [<<Const42>>,<<Const43>>]
+
+  /// CHECK-START: int Main.FalseBlockWithSideEffects(boolean) select_generator (after)
+  /// CHECK-NOT:     Select
+
+  public int FalseBlockWithSideEffects(boolean x) {
+    return x ? 42 : (write_field = 43);
   }
 
   public static void main(String[] args) {
@@ -206,5 +375,30 @@
     assertBoolEquals(false, ValuesOrdered(5, 5, 3));
     assertIntEquals(42, NegatedCondition(true));
     assertIntEquals(43, NegatedCondition(false));
+    assertIntEquals(46, SimpleTrueBlock(true, 4));
+    assertIntEquals(43, SimpleTrueBlock(false, 4));
+    assertIntEquals(42, SimpleFalseBlock(true, 7));
+    assertIntEquals(50, SimpleFalseBlock(false, 7));
+    assertIntEquals(48, SimpleBothBlocks(true, 6, 2));
+    assertIntEquals(45, SimpleBothBlocks(false, 6, 2));
+    assertIntEquals(1, ThreeBlocks(true, true));
+    assertIntEquals(1, ThreeBlocks(true, false));
+    assertIntEquals(2, ThreeBlocks(false, true));
+    assertIntEquals(3, ThreeBlocks(false, false));
+    assertIntEquals(13, MultiplePhis());
+
+    Main m = new Main();
+    assertIntEquals(42, m.TrueBlockWithTooManyInstructions(true));
+    assertIntEquals(43, m.TrueBlockWithTooManyInstructions(false));
+    assertIntEquals(42, m.FalseBlockWithTooManyInstructions(true));
+    assertIntEquals(43, m.FalseBlockWithTooManyInstructions(false));
+    assertIntEquals(42, m.TrueBlockWithSideEffects(true));
+    assertIntEquals(43, m.TrueBlockWithSideEffects(false));
+    assertIntEquals(42, m.FalseBlockWithSideEffects(true));
+    assertIntEquals(43, m.FalseBlockWithSideEffects(false));
   }
+
+  // These need to be instance fields so as to not generate a LoadClass for iget/iput.
+  public int read_field = 40;
+  public int write_field = 42;
 }
diff --git a/test/464-checker-inline-sharpen-calls/src/Main.java b/test/464-checker-inline-sharpen-calls/src/Main.java
index 6dce96c..3f25635 100644
--- a/test/464-checker-inline-sharpen-calls/src/Main.java
+++ b/test/464-checker-inline-sharpen-calls/src/Main.java
@@ -16,38 +16,48 @@
 
 public final class Main {
 
+  public final static class Helper {
+    private int foo = 3;
+
+    public int getFoo() {
+        return foo;
+    }
+  }
+
   public void invokeVirtual() {
   }
 
-  /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) inliner (before)
-  /// CHECK-DAG:     <<Invoke:v\d+>>  InvokeStaticOrDirect
+  /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) builder (after)
+  /// CHECK-DAG:     <<Invoke:v\d+>>  InvokeVirtual
   /// CHECK-DAG:                      ReturnVoid
 
   /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) inliner (after)
+  /// CHECK-NOT:                      InvokeVirtual
   /// CHECK-NOT:                      InvokeStaticOrDirect
 
   public static void inlineSharpenInvokeVirtual(Main m) {
     m.invokeVirtual();
   }
 
-  /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (before)
-  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  /// CHECK-START: int Main.inlineSharpenHelperInvoke() builder (after)
+  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeVirtual {{.*\.getFoo.*}}
   /// CHECK-DAG:                      Return [<<Invoke>>]
 
-  /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (after)
-  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-START: int Main.inlineSharpenHelperInvoke() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect {{.*\.getFoo.*}}
+  /// CHECK-NOT:                      InvokeVirtual {{.*\.getFoo.*}}
 
-  /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (after)
+  /// CHECK-START: int Main.inlineSharpenHelperInvoke() inliner (after)
   /// CHECK-DAG:     <<Field:i\d+>>   InstanceFieldGet
   /// CHECK-DAG:                      Return [<<Field>>]
 
-  public static int inlineSharpenStringInvoke() {
-    return "Foo".length();
+  public static int inlineSharpenHelperInvoke() {
+    return new Helper().getFoo();
   }
 
   public static void main(String[] args) {
     inlineSharpenInvokeVirtual(new Main());
-    if (inlineSharpenStringInvoke() != 3) {
+    if (inlineSharpenHelperInvoke() != 3) {
       throw new Error("Expected 3");
     }
   }
diff --git a/test/466-get-live-vreg/get_live_vreg_jni.cc b/test/466-get-live-vreg/get_live_vreg_jni.cc
index 375a3fc..4f89e91 100644
--- a/test/466-get-live-vreg/get_live_vreg_jni.cc
+++ b/test/466-get-live-vreg/get_live_vreg_jni.cc
@@ -40,15 +40,17 @@
       uint32_t value = 0;
       CHECK(GetVReg(m, 0, kIntVReg, &value));
       CHECK_EQ(value, 42u);
-    } else if (m_name.compare("testIntervalHole") == 0) {
+    } else if (m_name.compare("$opt$noinline$testIntervalHole") == 0) {
+      uint32_t number_of_dex_registers = m->GetCodeItem()->registers_size_;
+      uint32_t dex_register_of_first_parameter = number_of_dex_registers - 2;
       found_method_ = true;
       uint32_t value = 0;
       if (GetCurrentQuickFrame() != nullptr &&
           GetCurrentOatQuickMethodHeader()->IsOptimized() &&
           !Runtime::Current()->IsDebuggable()) {
-        CHECK_EQ(GetVReg(m, 0, kIntVReg, &value), false);
+        CHECK_EQ(GetVReg(m, dex_register_of_first_parameter, kIntVReg, &value), false);
       } else {
-        CHECK(GetVReg(m, 0, kIntVReg, &value));
+        CHECK(GetVReg(m, dex_register_of_first_parameter, kIntVReg, &value));
         CHECK_EQ(value, 1u);
       }
     }
diff --git a/test/466-get-live-vreg/src/Main.java b/test/466-get-live-vreg/src/Main.java
index d036a24..1903260 100644
--- a/test/466-get-live-vreg/src/Main.java
+++ b/test/466-get-live-vreg/src/Main.java
@@ -31,7 +31,7 @@
     }
   }
 
-  static void testIntervalHole(int arg, boolean test) {
+  static void $opt$noinline$testIntervalHole(int arg, boolean test) {
     // Move the argument to callee save to ensure it is in
     // a readable register.
     moveArgToCalleeSave();
@@ -44,6 +44,9 @@
       // The environment use of `arg` should not make it live.
       doStaticNativeCallLiveVreg();
     }
+    if (staticField1 == 2) {
+      throw new Error("");
+    }
   }
 
   static native void doStaticNativeCallLiveVreg();
@@ -67,7 +70,7 @@
   static void testWrapperIntervalHole(int arg, boolean test) {
     try {
       Thread.sleep(0);
-      testIntervalHole(arg, test);
+      $opt$noinline$testIntervalHole(arg, test);
     } catch (Exception e) {
       throw new Error(e);
     }
diff --git a/test/468-checker-bool-simplifier-regression/smali/TestCase.smali b/test/468-checker-bool-simplifier-regression/smali/TestCase.smali
index da1c5ec..87ad21e 100644
--- a/test/468-checker-bool-simplifier-regression/smali/TestCase.smali
+++ b/test/468-checker-bool-simplifier-regression/smali/TestCase.smali
@@ -18,7 +18,7 @@
 
 .field public static value:Z
 
-## CHECK-START: boolean TestCase.testCase() boolean_simplifier (before)
+## CHECK-START: boolean TestCase.testCase() select_generator (before)
 ## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
 ## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
 ## CHECK-DAG:     <<Value:z\d+>>    StaticFieldGet
@@ -26,10 +26,12 @@
 ## CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const1>>,<<Const0>>]
 ## CHECK-DAG:                       Return [<<Phi>>]
 
-## CHECK-START: boolean TestCase.testCase() boolean_simplifier (after)
+## CHECK-START: boolean TestCase.testCase() select_generator (after)
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
 ## CHECK-DAG:     <<Value:z\d+>>    StaticFieldGet
-## CHECK-DAG:     <<Not:z\d+>>      BooleanNot [<<Value>>]
-## CHECK-DAG:                       Return [<<Not>>]
+## CHECK-DAG:     <<Select:i\d+>>   Select [<<Const1>>,<<Const0>>,<<Value>>]
+## CHECK-DAG:                       Return [<<Select>>]
 
 .method public static testCase()Z
     .registers 2
diff --git a/test/469-condition-materialization-regression/expected.txt b/test/469-condition-materialization/expected.txt
similarity index 100%
rename from test/469-condition-materialization-regression/expected.txt
rename to test/469-condition-materialization/expected.txt
diff --git a/test/469-condition-materialization-regression/info.txt b/test/469-condition-materialization/info.txt
similarity index 100%
rename from test/469-condition-materialization-regression/info.txt
rename to test/469-condition-materialization/info.txt
diff --git a/test/469-condition-materialization-regression/src/Main.java b/test/469-condition-materialization/src/Main.java
similarity index 100%
rename from test/469-condition-materialization-regression/src/Main.java
rename to test/469-condition-materialization/src/Main.java
diff --git a/test/471-uninitialized-locals/src/Main.java b/test/471-uninitialized-locals/src/Main.java
index a5b1c48..1ac749e 100644
--- a/test/471-uninitialized-locals/src/Main.java
+++ b/test/471-uninitialized-locals/src/Main.java
@@ -24,8 +24,8 @@
   public static void main(String args[]) throws Exception {
     try {
       Class<?> c = Class.forName("Test");
-      Method m = c.getMethod("ThrowException", (Class[]) null);
-      m.invoke(null, (Object[]) null);
+      Method m = c.getMethod("ThrowException");
+      m.invoke(null);
     } catch (VerifyError e) {
        // Compilation should go fine but we expect the runtime verification to fail.
       return;
diff --git a/test/472-unreachable-if-regression/src/Main.java b/test/472-unreachable-if-regression/src/Main.java
index c9f9511..d426df1 100644
--- a/test/472-unreachable-if-regression/src/Main.java
+++ b/test/472-unreachable-if-regression/src/Main.java
@@ -25,12 +25,12 @@
     System.out.println("Test started.");
     Class<?> c = Class.forName("Test");
 
-    Method unreachableIf = c.getMethod("UnreachableIf", (Class[]) null);
-    unreachableIf.invoke(null, (Object[]) null);
+    Method unreachableIf = c.getMethod("UnreachableIf");
+    unreachableIf.invoke(null);
     System.out.println("Successfully called UnreachableIf().");
 
-    Method unreachablePackedSwitch = c.getMethod("UnreachablePackedSwitch", (Class[]) null);
-    unreachablePackedSwitch.invoke(null, (Object[]) null);
+    Method unreachablePackedSwitch = c.getMethod("UnreachablePackedSwitch");
+    unreachablePackedSwitch.invoke(null);
     System.out.println("Successfully called UnreachablePackedSwitch().");
   }
 
diff --git a/test/474-checker-boolean-input/src/Main.java b/test/474-checker-boolean-input/src/Main.java
index a2b219d..fbc28d8 100644
--- a/test/474-checker-boolean-input/src/Main.java
+++ b/test/474-checker-boolean-input/src/Main.java
@@ -27,9 +27,9 @@
    * we implement a suitable type analysis.
    */
 
-  /// CHECK-START: boolean Main.TestPhiAsBoolean(int) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.TestPhiAsBoolean(int) select_generator (after)
   /// CHECK-DAG:     <<Phi:i\d+>>     Phi
-  /// CHECK-DAG:                      BooleanNot [<<Phi>>]
+  /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Phi>>]
 
   public static boolean f1;
   public static boolean f2;
@@ -47,9 +47,9 @@
    * we implement a suitable type analysis.
    */
 
-  /// CHECK-START: boolean Main.TestAndAsBoolean(boolean, boolean) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.TestAndAsBoolean(boolean, boolean) select_generator (after)
   /// CHECK-DAG:     <<And:i\d+>>     And
-  /// CHECK-DAG:                      BooleanNot [<<And>>]
+  /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<And>>]
 
   public static boolean InlineAnd(boolean x, boolean y) {
     return x & y;
@@ -64,9 +64,9 @@
    * we implement a suitable type analysis.
    */
 
-  /// CHECK-START: boolean Main.TestOrAsBoolean(boolean, boolean) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.TestOrAsBoolean(boolean, boolean) select_generator (after)
   /// CHECK-DAG:     <<Or:i\d+>>      Or
-  /// CHECK-DAG:                      BooleanNot [<<Or>>]
+  /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Or>>]
 
   public static boolean InlineOr(boolean x, boolean y) {
     return x | y;
@@ -81,9 +81,9 @@
    * we implement a suitable type analysis.
    */
 
-  /// CHECK-START: boolean Main.TestXorAsBoolean(boolean, boolean) boolean_simplifier (after)
+  /// CHECK-START: boolean Main.TestXorAsBoolean(boolean, boolean) select_generator (after)
   /// CHECK-DAG:     <<Xor:i\d+>>     Xor
-  /// CHECK-DAG:                      BooleanNot [<<Xor>>]
+  /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Xor>>]
 
   public static boolean InlineXor(boolean x, boolean y) {
     return x ^ y;
diff --git a/test/476-checker-ctor-memory-barrier/src/Main.java b/test/476-checker-ctor-memory-barrier/src/Main.java
index 41bec05..c2a2a10 100644
--- a/test/476-checker-ctor-memory-barrier/src/Main.java
+++ b/test/476-checker-ctor-memory-barrier/src/Main.java
@@ -25,13 +25,14 @@
 class ClassWithFinals {
   public final int x;
   public ClassWithFinals obj;
+  public static boolean doThrow = false;
 
   /// CHECK-START: void ClassWithFinals.<init>(boolean) register (after)
   /// CHECK:      MemoryBarrier kind:StoreStore
   /// CHECK-NEXT: ReturnVoid
   public ClassWithFinals(boolean cond) {
     x = 0;
-    if (cond) {
+    if (doThrow) {
       // avoid inlining
       throw new RuntimeException();
     }
diff --git a/test/476-clinit-check-inlining-static-invoke/expected.txt b/test/476-clinit-inline-static-invoke/expected.txt
similarity index 100%
rename from test/476-clinit-check-inlining-static-invoke/expected.txt
rename to test/476-clinit-inline-static-invoke/expected.txt
diff --git a/test/476-clinit-check-inlining-static-invoke/info.txt b/test/476-clinit-inline-static-invoke/info.txt
similarity index 100%
rename from test/476-clinit-check-inlining-static-invoke/info.txt
rename to test/476-clinit-inline-static-invoke/info.txt
diff --git a/test/476-clinit-check-inlining-static-invoke/src/Main.java b/test/476-clinit-inline-static-invoke/src/Main.java
similarity index 100%
rename from test/476-clinit-check-inlining-static-invoke/src/Main.java
rename to test/476-clinit-inline-static-invoke/src/Main.java
diff --git a/test/477-checker-bound-type/src/Main.java b/test/477-checker-bound-type/src/Main.java
index c873702..2504ab2 100644
--- a/test/477-checker-bound-type/src/Main.java
+++ b/test/477-checker-bound-type/src/Main.java
@@ -17,7 +17,7 @@
 
 public class Main {
 
-  /// CHECK-START: java.lang.Object Main.boundTypeForIf(java.lang.Object) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.boundTypeForIf(java.lang.Object) builder (after)
   /// CHECK:     BoundType
   public static Object boundTypeForIf(Object a) {
     if (a != null) {
@@ -27,7 +27,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.Object Main.boundTypeForInstanceOf(java.lang.Object) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.boundTypeForInstanceOf(java.lang.Object) builder (after)
   /// CHECK:     BoundType
   public static Object boundTypeForInstanceOf(Object a) {
     if (a instanceof Main) {
@@ -37,7 +37,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.Object Main.noBoundTypeForIf(java.lang.Object) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.noBoundTypeForIf(java.lang.Object) builder (after)
   /// CHECK-NOT: BoundType
   public static Object noBoundTypeForIf(Object a) {
     if (a == null) {
@@ -47,7 +47,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.Object Main.noBoundTypeForInstanceOf(java.lang.Object) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.noBoundTypeForInstanceOf(java.lang.Object) builder (after)
   /// CHECK-NOT: BoundType
   public static Object noBoundTypeForInstanceOf(Object a) {
     if (a instanceof Main) {
diff --git a/test/478-checker-clinit-check-pruning/expected.txt b/test/478-checker-clinit-check-pruning/expected.txt
index 387e1a7..6f73b65 100644
--- a/test/478-checker-clinit-check-pruning/expected.txt
+++ b/test/478-checker-clinit-check-pruning/expected.txt
@@ -4,3 +4,10 @@
 Main$ClassWithClinit4's static initializer
 Main$ClassWithClinit5's static initializer
 Main$ClassWithClinit6's static initializer
+Main$ClassWithClinit7's static initializer
+Main$ClassWithClinit8's static initializer
+Main$ClassWithClinit9's static initializer
+Main$ClassWithClinit10's static initializer
+Main$ClassWithClinit11's static initializer
+Main$ClassWithClinit12's static initializer
+Main$ClassWithClinit13's static initializer
diff --git a/test/478-checker-clinit-check-pruning/src/Main.java b/test/478-checker-clinit-check-pruning/src/Main.java
index e6aab63..c2982b4 100644
--- a/test/478-checker-clinit-check-pruning/src/Main.java
+++ b/test/478-checker-clinit-check-pruning/src/Main.java
@@ -16,6 +16,8 @@
 
 public class Main {
 
+  static boolean doThrow = false;
+
   /*
    * Ensure an inlined static invoke explicitly triggers the
    * initialization check of the called method's declaring class, and
@@ -67,14 +69,14 @@
    */
 
   /// CHECK-START: void Main.invokeStaticNotInlined() builder (after)
-  /// CHECK-DAG:     <<LoadClass:l\d+>>    LoadClass gen_clinit_check:false
-  /// CHECK-DAG:     <<ClinitCheck:l\d+>>  ClinitCheck [<<LoadClass>>]
-  /// CHECK-DAG:                           InvokeStaticOrDirect [{{[ij]\d+}},<<ClinitCheck>>]
+  /// CHECK:         <<LoadClass:l\d+>>    LoadClass gen_clinit_check:false
+  /// CHECK:         <<ClinitCheck:l\d+>>  ClinitCheck [<<LoadClass>>]
+  /// CHECK:                               InvokeStaticOrDirect [{{[ij]\d+}},<<ClinitCheck>>]
 
   /// CHECK-START: void Main.invokeStaticNotInlined() inliner (after)
-  /// CHECK-DAG:     <<LoadClass:l\d+>>    LoadClass gen_clinit_check:false
-  /// CHECK-DAG:     <<ClinitCheck:l\d+>>  ClinitCheck [<<LoadClass>>]
-  /// CHECK-DAG:                           InvokeStaticOrDirect [{{[ij]\d+}},<<ClinitCheck>>]
+  /// CHECK:         <<LoadClass:l\d+>>    LoadClass gen_clinit_check:false
+  /// CHECK:         <<ClinitCheck:l\d+>>  ClinitCheck [<<LoadClass>>]
+  /// CHECK:                               InvokeStaticOrDirect [{{([ij]\d+,)?}}<<ClinitCheck>>]
 
   // The following checks ensure the clinit check and load class
   // instructions added by the builder are pruned by the
@@ -83,7 +85,7 @@
   // before the next pass (liveness analysis) instead.
 
   /// CHECK-START: void Main.invokeStaticNotInlined() liveness (before)
-  /// CHECK-DAG:                           InvokeStaticOrDirect
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:implicit
 
   /// CHECK-START: void Main.invokeStaticNotInlined() liveness (before)
   /// CHECK-NOT:                           LoadClass
@@ -101,10 +103,8 @@
     static boolean doThrow = false;
 
     static void $noinline$staticMethod() {
-      if (doThrow) {
-        // Try defeating inlining.
-        throw new Error();
-      }
+      // Try defeating inlining.
+      if (doThrow) { throw new Error(); }
     }
   }
 
@@ -179,10 +179,8 @@
     static boolean doThrow = false;
 
     static void $noinline$staticMethod() {
-      if (doThrow) {
         // Try defeating inlining.
-        throw new Error();
-      }
+      if (doThrow) { throw new Error(); }
     }
   }
 
@@ -243,10 +241,8 @@
     static boolean doThrow = false;
 
     static void $noinline$staticMethod() {
-      if (doThrow) {
         // Try defeating inlining.
-        throw new Error();
-      }
+      if (doThrow) { throw new Error(); }
     }
 
     static {
@@ -269,7 +265,7 @@
   /// CHECK-START: void Main.noClinitBecauseOfInvokeStatic() liveness (before)
   /// CHECK-DAG:     <<IntConstant:i\d+>>  IntConstant 0
   /// CHECK-DAG:     <<LoadClass:l\d+>>    LoadClass gen_clinit_check:false
-  /// CHECK-DAG:                           InvokeStaticOrDirect
+  /// CHECK-DAG:                           InvokeStaticOrDirect clinit_check:implicit
   /// CHECK-DAG:                           StaticFieldSet [<<LoadClass>>,<<IntConstant>>]
 
   /// CHECK-START: void Main.noClinitBecauseOfInvokeStatic() liveness (before)
@@ -289,7 +285,7 @@
   /// CHECK-DAG:     <<IntConstant:i\d+>>  IntConstant 0
   /// CHECK-DAG:     <<LoadClass:l\d+>>    LoadClass gen_clinit_check:true
   /// CHECK-DAG:                           StaticFieldSet [<<LoadClass>>,<<IntConstant>>]
-  /// CHECK-DAG:                           InvokeStaticOrDirect
+  /// CHECK-DAG:                           InvokeStaticOrDirect clinit_check:none
 
   /// CHECK-START: void Main.clinitBecauseOfFieldAccess() liveness (before)
   /// CHECK-NOT:                           ClinitCheck
@@ -298,6 +294,233 @@
     ClassWithClinit2.$noinline$staticMethod();
   }
 
+  /*
+   * Verify that LoadClass from const-class is not merged with
+   * later invoke-static (or it's ClinitCheck).
+   */
+
+  /// CHECK-START: void Main.constClassAndInvokeStatic(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:false
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:implicit
+
+  /// CHECK-START: void Main.constClassAndInvokeStatic(java.lang.Iterable) liveness (before)
+  /// CHECK-NOT:                           ClinitCheck
+
+  static void constClassAndInvokeStatic(Iterable<?> it) {
+    $opt$inline$ignoreClass(ClassWithClinit7.class);
+    ClassWithClinit7.$noinline$someStaticMethod(it);
+  }
+
+  static void $opt$inline$ignoreClass(Class<?> c) {
+  }
+
+  static class ClassWithClinit7 {
+    static {
+      System.out.println("Main$ClassWithClinit7's static initializer");
+    }
+
+    static void $noinline$someStaticMethod(Iterable<?> it) {
+      it.iterator();
+      // We're not inlining throw at the moment.
+      if (doThrow) { throw new Error(""); }
+    }
+  }
+
+  /*
+   * Verify that LoadClass from sget is not merged with later invoke-static.
+   */
+
+  /// CHECK-START: void Main.sgetAndInvokeStatic(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:true
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:none
+
+  /// CHECK-START: void Main.sgetAndInvokeStatic(java.lang.Iterable) liveness (before)
+  /// CHECK-NOT:                           ClinitCheck
+
+  static void sgetAndInvokeStatic(Iterable<?> it) {
+    $opt$inline$ignoreInt(ClassWithClinit8.value);
+    ClassWithClinit8.$noinline$someStaticMethod(it);
+  }
+
+  static void $opt$inline$ignoreInt(int i) {
+  }
+
+  static class ClassWithClinit8 {
+    public static int value = 0;
+    static {
+      System.out.println("Main$ClassWithClinit8's static initializer");
+    }
+
+    static void $noinline$someStaticMethod(Iterable<?> it) {
+      it.iterator();
+      // We're not inlining throw at the moment.
+      if (doThrow) { throw new Error(""); }
+    }
+  }
+
+  /*
+   * Verify that LoadClass from const-class, ClinitCheck from sget and
+   * InvokeStaticOrDirect from invoke-static are not merged.
+   */
+
+  /// CHECK-START: void Main.constClassSgetAndInvokeStatic(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:false
+  /// CHECK:                               ClinitCheck
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:none
+
+  static void constClassSgetAndInvokeStatic(Iterable<?> it) {
+    $opt$inline$ignoreClass(ClassWithClinit9.class);
+    $opt$inline$ignoreInt(ClassWithClinit9.value);
+    ClassWithClinit9.$noinline$someStaticMethod(it);
+  }
+
+  static class ClassWithClinit9 {
+    public static int value = 0;
+    static {
+      System.out.println("Main$ClassWithClinit9's static initializer");
+    }
+
+    static void $noinline$someStaticMethod(Iterable<?> it) {
+      it.iterator();
+      // We're not inlining throw at the moment.
+      if (doThrow) { throw new Error(""); }
+    }
+  }
+
+  /*
+   * Verify that LoadClass from a fully-inlined invoke-static is not merged
+   * with InvokeStaticOrDirect from a later invoke-static to the same method.
+   */
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaNonStatic(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:true
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:none
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaNonStatic(java.lang.Iterable) liveness (before)
+  /// CHECK-NOT:                           ClinitCheck
+
+  static void inlinedInvokeStaticViaNonStatic(Iterable<?> it) {
+    inlinedInvokeStaticViaNonStaticHelper(null);
+    inlinedInvokeStaticViaNonStaticHelper(it);
+  }
+
+  static void inlinedInvokeStaticViaNonStaticHelper(Iterable<?> it) {
+    ClassWithClinit10.inlinedForNull(it);
+  }
+
+  static class ClassWithClinit10 {
+    public static int value = 0;
+    static {
+      System.out.println("Main$ClassWithClinit10's static initializer");
+    }
+
+    static void inlinedForNull(Iterable<?> it) {
+      if (it != null) {
+        it.iterator();
+        // We're not inlining throw at the moment.
+        if (doThrow) { throw new Error(""); }
+      }
+    }
+  }
+
+  /*
+   * Check that the LoadClass from an invoke-static C.foo() doesn't get merged with
+   * an invoke-static inside C.foo(). This would mess up the stack walk in the
+   * resolution trampoline where we would have to load C (if C isn't loaded yet)
+   * which is not permitted there.
+   *
+   * Note: In case of failure, we would get an failed assertion during compilation,
+   * so we wouldn't really get to the checker tests below.
+   */
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaStatic(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:true
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:none
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaStatic(java.lang.Iterable) liveness (before)
+  /// CHECK-NOT:                           ClinitCheck
+
+  static void inlinedInvokeStaticViaStatic(Iterable<?> it) {
+    ClassWithClinit11.callInlinedForNull(it);
+  }
+
+  static class ClassWithClinit11 {
+    public static int value = 0;
+    static {
+      System.out.println("Main$ClassWithClinit11's static initializer");
+    }
+
+    static void callInlinedForNull(Iterable<?> it) {
+      inlinedForNull(it);
+    }
+
+    static void inlinedForNull(Iterable<?> it) {
+      it.iterator();
+      if (it != null) {
+        // We're not inlining throw at the moment.
+        if (doThrow) { throw new Error(""); }
+      }
+    }
+  }
+
+  /*
+   * A test similar to inlinedInvokeStaticViaStatic() but doing the indirect invoke
+   * twice with the first one to be fully inlined.
+   */
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaStaticTwice(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:true
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:none
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaStaticTwice(java.lang.Iterable) liveness (before)
+  /// CHECK-NOT:                           ClinitCheck
+
+  static void inlinedInvokeStaticViaStaticTwice(Iterable<?> it) {
+    ClassWithClinit12.callInlinedForNull(null);
+    ClassWithClinit12.callInlinedForNull(it);
+  }
+
+  static class ClassWithClinit12 {
+    public static int value = 0;
+    static {
+      System.out.println("Main$ClassWithClinit12's static initializer");
+    }
+
+    static void callInlinedForNull(Iterable<?> it) {
+      inlinedForNull(it);
+    }
+
+    static void inlinedForNull(Iterable<?> it) {
+      if (it != null) {
+        // We're not inlining throw at the moment.
+        if (doThrow) { throw new Error(""); }
+      }
+    }
+  }
+
+  static class ClassWithClinit13 {
+    static {
+      System.out.println("Main$ClassWithClinit13's static initializer");
+    }
+
+    public static void $inline$forwardToGetIterator(Iterable<?> it) {
+      $noinline$getIterator(it);
+    }
+
+    public static void $noinline$getIterator(Iterable<?> it) {
+      it.iterator();
+      // We're not inlining throw at the moment.
+      if (doThrow) { throw new Error(""); }
+    }
+  }
+
+  // TODO: Write checker statements.
+  static Object $noinline$testInliningAndNewInstance(Iterable<?> it) {
+    if (doThrow) { throw new Error(); }
+    ClassWithClinit13.$inline$forwardToGetIterator(it);
+    return new ClassWithClinit13();
+  }
+
   // TODO: Add a test for the case of a static method whose declaring
   // class type index is not available (i.e. when `storage_index`
   // equals `DexFile::kDexNoIndex` in
@@ -310,5 +533,13 @@
     ClassWithClinit4.invokeStaticNotInlined();
     SubClassOfClassWithClinit5.invokeStaticInlined();
     SubClassOfClassWithClinit6.invokeStaticNotInlined();
+    Iterable it = new Iterable() { public java.util.Iterator iterator() { return null; } };
+    constClassAndInvokeStatic(it);
+    sgetAndInvokeStatic(it);
+    constClassSgetAndInvokeStatic(it);
+    inlinedInvokeStaticViaNonStatic(it);
+    inlinedInvokeStaticViaStatic(it);
+    inlinedInvokeStaticViaStaticTwice(it);
+    $noinline$testInliningAndNewInstance(it);
   }
 }
diff --git a/test/480-checker-dead-blocks/src/Main.java b/test/480-checker-dead-blocks/src/Main.java
index 5adafaf..141054d 100644
--- a/test/480-checker-dead-blocks/src/Main.java
+++ b/test/480-checker-dead-blocks/src/Main.java
@@ -30,7 +30,7 @@
     return false;
   }
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (before)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:                      If
@@ -39,13 +39,13 @@
   /// CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Add>>,<<Sub>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (after)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<Add:i\d+>>     Add [<<ArgX>>,<<ArgY>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Sub
   /// CHECK-NOT:                      Phi
@@ -56,11 +56,13 @@
       z = x + y;
     } else {
       z = x - y;
+      // Prevent HSelect simplification by having a branch with multiple instructions.
+      System.nanoTime();
     }
     return z;
   }
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (before)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:                      If
@@ -69,13 +71,13 @@
   /// CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Add>>,<<Sub>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (after)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>     Sub [<<ArgX>>,<<ArgY>>]
   /// CHECK-DAG:                      Return [<<Sub>>]
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
   /// CHECK-NOT:                      Phi
@@ -86,14 +88,16 @@
       z = x + y;
     } else {
       z = x - y;
+      // Prevent HSelect simplification by having a branch with multiple instructions.
+      System.nanoTime();
     }
     return z;
   }
 
-  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$final (before)
   /// CHECK:                          Mul
 
-  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      Mul
 
   public static int testRemoveLoop(int x) {
@@ -105,11 +109,11 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$final (before)
   /// CHECK-DAG:                      Return
   /// CHECK-DAG:                      Exit
 
-  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      Return
   /// CHECK-NOT:                      Exit
 
@@ -120,15 +124,15 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (before)
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      Add
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (after)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
 
@@ -139,16 +143,16 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (before)
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      Add
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (after)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
 
@@ -161,13 +165,13 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$final (before)
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK-NOT:                      SuspendCheck
 
-  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$final (after)
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK-NOT:                      SuspendCheck
diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java
index 6b4da9d..f8f0aa3 100644
--- a/test/482-checker-loop-back-edge-use/src/Main.java
+++ b/test/482-checker-loop-back-edge-use/src/Main.java
@@ -40,6 +40,9 @@
   /// CHECK-EVAL:    <<GotoLiv2>> + 2 == <<ArgLoopUse2>>
 
   public static void loop2(boolean incoming) {
+    // Add some code at entry to avoid having the entry block be a pre header.
+    // This avoids having to create a synthesized block.
+    System.out.println("Enter");
     while (true) {
       System.out.println("foo");
       while (incoming) {}
@@ -163,13 +166,16 @@
   /// CHECK:         <<Arg:z\d+>>  StaticFieldGet  liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse>>]
   /// CHECK:                       If [<<Arg>>]    liveness:<<IfLiv:\d+>>
   /// CHECK:                       Goto            liveness:<<GotoLiv1:\d+>>
-  /// CHECK:                       Goto            liveness:<<GotoLiv2:\d+>>
   /// CHECK:                       Exit
+  /// CHECK:                       Goto            liveness:<<GotoLiv2:\d+>>
   /// CHECK-EVAL:    <<IfLiv>> + 1 == <<ArgUse>>
   /// CHECK-EVAL:    <<GotoLiv1>> < <<GotoLiv2>>
   /// CHECK-EVAL:    <<GotoLiv1>> + 2 == <<ArgLoopUse>>
 
   public static void loop9() {
+    // Add some code at entry to avoid having the entry block be a pre header.
+    // This avoids having to create a synthesized block.
+    System.out.println("Enter");
     while (Runtime.getRuntime() != null) {
       // 'incoming' must only have a use in the inner loop.
       boolean incoming = field;
diff --git a/test/485-checker-dce-loop-update/smali/TestCase.smali b/test/485-checker-dce-loop-update/smali/TestCase.smali
index ab4afdb..e3617c7 100644
--- a/test/485-checker-dce-loop-update/smali/TestCase.smali
+++ b/test/485-checker-dce-loop-update/smali/TestCase.smali
@@ -23,7 +23,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$final (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst1:i\d+>>  IntConstant 1
@@ -36,7 +36,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
@@ -73,7 +73,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$final (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -88,7 +88,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -129,37 +129,35 @@
 .end method
 
 
-## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$final (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst1:i\d+>>  IntConstant 1
 ## CHECK-DAG:     <<Cst5:i\d+>>  IntConstant 5
 ## CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
-## CHECK-DAG:     <<Cst9:i\d+>>  IntConstant 9
-## CHECK-DAG:     <<PhiX1:i\d+>> Phi [<<ArgX>>,<<Add5:i\d+>>,<<Add7:i\d+>>] loop:<<HeaderY:B\d+>>
+## CHECK-DAG:     <<Cst11:i\d+>> IntConstant 11
+## CHECK-DAG:     <<PhiX:i\d+>>  Phi [<<ArgX>>,<<Add5:i\d+>>,<<Add7:i\d+>>] loop:<<HeaderY:B\d+>>
 ## CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
-## CHECK-DAG:                    If [<<ArgZ>>]                              loop:<<HeaderY>>
-## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst9>>]                   loop:<<HeaderY>>
-## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>]                   loop:<<HeaderY>>
+## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX>>,<<Cst11>>]                   loop:<<HeaderY>>
+## CHECK-DAG:     <<SelX:i\d+>>  Select [<<PhiX>>,<<Mul9>>,<<ArgZ>>]        loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<Cst1>>]                              loop:<<HeaderY>>
-## CHECK-DAG:     <<Add5>>       Add [<<PhiX2>>,<<Cst5>>]                   loop:<<HeaderY>>
-## CHECK-DAG:     <<Add7>>       Add [<<PhiX1>>,<<Cst7>>]                   loop:<<HeaderY>>
-## CHECK-DAG:                    Return [<<PhiX2>>]                         loop:none
+## CHECK-DAG:     <<Add5>>       Add [<<SelX>>,<<Cst5>>]                    loop:<<HeaderY>>
+## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
+## CHECK-DAG:                    Return [<<SelX>>]                          loop:none
 
-## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
-## CHECK-DAG:     <<Cst9:i\d+>>  IntConstant 9
-## CHECK-DAG:     <<PhiX1:i\d+>> Phi [<<ArgX>>,<<Add7:i\d+>>]               loop:<<HeaderY:B\d+>>
+## CHECK-DAG:     <<Cst11:i\d+>> IntConstant 11
+## CHECK-DAG:     <<PhiX:i\d+>>  Phi [<<ArgX>>,<<Add7:i\d+>>]               loop:<<HeaderY:B\d+>>
 ## CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
-## CHECK-DAG:     <<Add7>>       Add [<<PhiX1>>,<<Cst7>>]                   loop:<<HeaderY>>
-## CHECK-DAG:                    If [<<ArgZ>>]                              loop:none
-## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst9>>]                   loop:none
-## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>]                   loop:none
-## CHECK-DAG:                    Return [<<PhiX2>>]                         loop:none
+## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
+## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX>>,<<Cst11>>]                   loop:none
+## CHECK-DAG:     <<SelX:i\d+>>  Select [<<PhiX>>,<<Mul9>>,<<ArgZ>>]        loop:none
+## CHECK-DAG:                    Return [<<SelX>>]                          loop:none
 
 .method public static testExitPredecessors(IZZ)I
   .registers 4
@@ -177,7 +175,7 @@
 
   # Additional logic which will end up outside the loop
   if-eqz p2, :skip_if
-  mul-int/lit8 p0, p0, 9
+  mul-int/lit8 p0, p0, 11
   :skip_if
 
   if-nez v0, :loop_end    # will always take the branch
@@ -196,7 +194,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$final (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -219,7 +217,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
diff --git a/test/485-checker-dce-switch/src/Main.java b/test/485-checker-dce-switch/src/Main.java
index 019d876..7d5fd4f 100644
--- a/test/485-checker-dce-switch/src/Main.java
+++ b/test/485-checker-dce-switch/src/Main.java
@@ -20,14 +20,14 @@
     return 5;
   }
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (after)
   /// CHECK-DAG:    <<Const100:i\d+>> IntConstant 100
   /// CHECK-DAG:                      Return [<<Const100>>]
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int wholeSwitchDead(int j) {
@@ -60,14 +60,14 @@
     return l;
   }
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (before)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (after)
   /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
   /// CHECK-DAG:                      Return [<<Const7>>]
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_InRange() {
@@ -96,14 +96,14 @@
     return i;
   }
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (before)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (after)
   /// CHECK-DAG:     <<Const15:i\d+>> IntConstant 15
   /// CHECK-DAG:                      Return [<<Const15>>]
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_AboveRange() {
@@ -132,14 +132,14 @@
     return i;
   }
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (before)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (after)
   /// CHECK-DAG:     <<ConstM5:i\d+>> IntConstant -5
   /// CHECK-DAG:                      Return [<<ConstM5>>]
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_BelowRange() {
diff --git a/test/488-checker-inline-recursive-calls/src/Main.java b/test/488-checker-inline-recursive-calls/src/Main.java
index c1f25b3..87ff3f7 100644
--- a/test/488-checker-inline-recursive-calls/src/Main.java
+++ b/test/488-checker-inline-recursive-calls/src/Main.java
@@ -25,10 +25,10 @@
   }
 
   /// CHECK-START: void Main.doTopCall(boolean) inliner (before)
-  /// CHECK-NOT:   InvokeStaticOrDirect recursive:true
+  /// CHECK-NOT:   InvokeStaticOrDirect method_load_kind:recursive
 
   /// CHECK-START: void Main.doTopCall(boolean) inliner (after)
-  /// CHECK:       InvokeStaticOrDirect recursive:true
+  /// CHECK:       InvokeStaticOrDirect method_load_kind:recursive
   public static void doTopCall(boolean first_call) {
     if (first_call) {
       inline1();
diff --git a/test/489-current-method-regression/src/Main.java b/test/489-current-method-regression/src/Main.java
index 7d102f5..285c41d 100644
--- a/test/489-current-method-regression/src/Main.java
+++ b/test/489-current-method-regression/src/Main.java
@@ -23,7 +23,7 @@
     if (a == 42) {
       // The class loading will be seen as dead code by
       // the optimizer.
-      Class c = Main.class;
+      Class<?> c = Main.class;
     }
     return new Main().bar();
   }
diff --git a/test/490-checker-inline/src/Main.java b/test/490-checker-inline/src/Main.java
index 21a0189..2e2deea 100644
--- a/test/490-checker-inline/src/Main.java
+++ b/test/490-checker-inline/src/Main.java
@@ -39,7 +39,7 @@
   /// CHECK-DAG:     InvokeInterface
 
   /// CHECK-START: void Main.testMethod() inliner (after)
-  /// CHECK-NOT:     Invoke{{.*}}
+  /// CHECK-NOT:     Invoke{{.*Object\.<init>.*}}
 
   public static void testMethod() {
     createMain().invokeVirtual();
diff --git a/test/492-checker-inline-invoke-interface/expected.txt b/test/492-checker-inline-invoke-interface/expected.txt
index b0014d7..42b331f 100644
--- a/test/492-checker-inline-invoke-interface/expected.txt
+++ b/test/492-checker-inline-invoke-interface/expected.txt
@@ -2,4 +2,4 @@
 java.lang.Exception
 	at ForceStatic.<clinit>(Main.java:24)
 	at Main.$inline$foo(Main.java:31)
-	at Main.main(Main.java:48)
+	at Main.main(Main.java:50)
diff --git a/test/492-checker-inline-invoke-interface/src/Main.java b/test/492-checker-inline-invoke-interface/src/Main.java
index 9a45485..a919690 100644
--- a/test/492-checker-inline-invoke-interface/src/Main.java
+++ b/test/492-checker-inline-invoke-interface/src/Main.java
@@ -31,15 +31,17 @@
     int a = ForceStatic.field;
   }
 
-  /// CHECK-START: void Main.main(java.lang.String[]) inliner (before)
-  /// CHECK:           InvokeStaticOrDirect
-  /// CHECK:           InvokeStaticOrDirect
+  /// CHECK-START: void Main.main(java.lang.String[]) builder (after)
+  /// CHECK:           InvokeStaticOrDirect {{.*Main.<init>.*}}
+  /// CHECK:           InvokeInterface
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (before)
   /// CHECK-NOT:       ClinitCheck
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (after)
-  /// CHECK-NOT:       InvokeStaticOrDirect
+  /// CHECK-NOT:       InvokeStaticOrDirect {{.*Main.<init>.*}}
+  /// CHECK-NOT:       InvokeVirtual
+  /// CHECK-NOT:       InvokeInterface
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (after)
   /// CHECK:           ClinitCheck
diff --git a/test/493-checker-inline-invoke-interface/src/Main.java b/test/493-checker-inline-invoke-interface/src/Main.java
index 44b727f..171405c 100644
--- a/test/493-checker-inline-invoke-interface/src/Main.java
+++ b/test/493-checker-inline-invoke-interface/src/Main.java
@@ -36,7 +36,7 @@
   /// CHECK:           InvokeInterface
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (after)
-  /// CHECK-NOT:       Invoke{{.*}}
+  /// CHECK-NOT:       Invoke{{.*Object\.<init>.*}}
   public static void main(String[] args) {
     Itf itf = bar();
     itf.foo();
diff --git a/test/495-checker-checkcast-tests/src/Main.java b/test/495-checker-checkcast-tests/src/Main.java
index 4b2bf09..6011c7c 100644
--- a/test/495-checker-checkcast-tests/src/Main.java
+++ b/test/495-checker-checkcast-tests/src/Main.java
@@ -113,13 +113,13 @@
   }
 
   /// CHECK-START: java.lang.String Main.knownTestWithLoadedClass() register (after)
-  /// CHECK-NOT: LoadClass
+  /// CHECK-NOT: CheckCast
   public static String knownTestWithLoadedClass() {
     return (String)$inline$getString();
   }
 
   /// CHECK-START: Itf Main.knownTestWithUnloadedClass() register (after)
-  /// CHECK: LoadClass
+  /// CHECK: CheckCast
   public static Itf knownTestWithUnloadedClass() {
     return (Itf)$inline$getString();
   }
diff --git a/test/496-checker-inlining-and-class-loader/src/Main.java b/test/496-checker-inlining-and-class-loader/src/Main.java
index 39c031a..15d4dc0 100644
--- a/test/496-checker-inlining-and-class-loader/src/Main.java
+++ b/test/496-checker-inlining-and-class-loader/src/Main.java
@@ -16,6 +16,7 @@
 
 import java.lang.reflect.Field;
 import java.lang.reflect.Method;
+import java.util.ArrayList;
 import java.util.List;
 
 class MyClassLoader extends ClassLoader {
@@ -30,18 +31,31 @@
     Object pathList = f.get(loader);
 
     // Some magic to get access to the dexField field of pathList.
+    // Need to make a copy of the dex elements since we don't want an app image with pre-resolved
+    // things.
     f = pathList.getClass().getDeclaredField("dexElements");
     f.setAccessible(true);
-    dexElements = (Object[]) f.get(pathList);
-    dexFileField = dexElements[0].getClass().getDeclaredField("dexFile");
-    dexFileField.setAccessible(true);
+    Object[] dexElements = (Object[]) f.get(pathList);
+    f = dexElements[0].getClass().getDeclaredField("dexFile");
+    f.setAccessible(true);
+    for (Object element : dexElements) {
+      Object dexFile = f.get(element);
+      // Make copy.
+      Field fileNameField = dexFile.getClass().getDeclaredField("mFileName");
+      fileNameField.setAccessible(true);
+      dexFiles.add(dexFile.getClass().getDeclaredConstructor(String.class).newInstance(
+        fileNameField.get(dexFile)));
+    }
   }
 
-  Object[] dexElements;
+  ArrayList<Object> dexFiles = new ArrayList<Object>();
   Field dexFileField;
 
   protected Class<?> loadClass(String className, boolean resolve) throws ClassNotFoundException {
-    System.out.println("Request for " + className);
+    // Other classes may also get loaded, ignore those.
+    if (className.equals("LoadedByMyClassLoader") || className.equals("FirstSeenByMyClassLoader")) {
+      System.out.println("Request for " + className);
+    }
 
     // We're only going to handle LoadedByMyClassLoader.
     if (className != "LoadedByMyClassLoader") {
@@ -50,13 +64,12 @@
 
     // Mimic what DexPathList.findClass is doing.
     try {
-      for (Object element : dexElements) {
-        Object dex = dexFileField.get(element);
-        Method method = dex.getClass().getDeclaredMethod(
+      for (Object dexFile : dexFiles) {
+        Method method = dexFile.getClass().getDeclaredMethod(
             "loadClassBinaryName", String.class, ClassLoader.class, List.class);
 
-        if (dex != null) {
-          Class clazz = (Class)method.invoke(dex, className, this, null);
+        if (dexFile != null) {
+          Class<?> clazz = (Class<?>)method.invoke(dexFile, className, this, null);
           if (clazz != null) {
             return clazz;
           }
@@ -94,9 +107,11 @@
                 /* Load and initialize FirstSeenByMyClassLoader */
   /// CHECK:      LoadClass gen_clinit_check:true
                 /* Load and initialize System */
-  /// CHECK-NEXT: LoadClass gen_clinit_check:true
+  // There may be MipsComputeBaseMethodAddress here.
+  /// CHECK:      LoadClass gen_clinit_check:true
   /// CHECK-NEXT: StaticFieldGet
-  /// CHECK-NEXT: LoadString
+  // There may be HArmDexCacheArraysBase or HX86ComputeBaseMethodAddress here.
+  /// CHECK:      LoadString
   /// CHECK-NEXT: NullCheck
   /// CHECK-NEXT: InvokeVirtual
   public static void bar() {
@@ -109,7 +124,7 @@
 public class Main {
   public static void main(String[] args) throws Exception {
     MyClassLoader o = new MyClassLoader();
-    Class foo = o.loadClass("LoadedByMyClassLoader");
+    Class<?> foo = o.loadClass("LoadedByMyClassLoader");
     Method m = foo.getDeclaredMethod("bar");
     m.invoke(null);
   }
diff --git a/test/497-inlining-and-class-loader/clear_dex_cache.cc b/test/497-inlining-and-class-loader/clear_dex_cache.cc
index 50d1a63..1597c4a 100644
--- a/test/497-inlining-and-class-loader/clear_dex_cache.cc
+++ b/test/497-inlining-and-class-loader/clear_dex_cache.cc
@@ -15,6 +15,7 @@
  */
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "jni.h"
 #include "scoped_thread_state_change.h"
 #include "stack.h"
@@ -44,8 +45,8 @@
   CHECK(array != nullptr);
   mirror::PointerArray* pointer_array = soa.Decode<mirror::PointerArray*>(array);
   for (size_t i = 0; i != num_methods; ++i) {
-    ArtMethod* method = mirror::DexCache::GetElementPtrSize(methods, i, sizeof(void*));
-    pointer_array->SetElementPtrSize(i, method, sizeof(void*));
+    ArtMethod* method = mirror::DexCache::GetElementPtrSize(methods, i, kRuntimePointerSize);
+    pointer_array->SetElementPtrSize(i, method, kRuntimePointerSize);
   }
   return array;
 }
@@ -61,8 +62,8 @@
   CHECK_EQ(methods != nullptr, old != nullptr);
   CHECK_EQ(num_methods, static_cast<size_t>(old->GetLength()));
   for (size_t i = 0; i != num_methods; ++i) {
-    ArtMethod* method = old->GetElementPtrSize<ArtMethod*>(i, sizeof(void*));
-    mirror::DexCache::SetElementPtrSize(methods, i, method, sizeof(void*));
+    ArtMethod* method = old->GetElementPtrSize<ArtMethod*>(i, kRuntimePointerSize);
+    mirror::DexCache::SetElementPtrSize(methods, i, method, kRuntimePointerSize);
   }
 }
 
diff --git a/test/497-inlining-and-class-loader/src/Main.java b/test/497-inlining-and-class-loader/src/Main.java
index 832b1f0..1e27e77 100644
--- a/test/497-inlining-and-class-loader/src/Main.java
+++ b/test/497-inlining-and-class-loader/src/Main.java
@@ -66,7 +66,7 @@
             "loadClassBinaryName", String.class, ClassLoader.class, List.class);
 
         if (dex != null) {
-          Class clazz = (Class)method.invoke(dex, className, this, null);
+          Class<?> clazz = (Class<?>)method.invoke(dex, className, this, null);
           if (clazz != null) {
             return clazz;
           }
@@ -92,7 +92,7 @@
 
     MyClassLoader o = new MyClassLoader();
     MyClassLoader.level1ClassLoader = new MyClassLoader();
-    Class foo = o.loadClass("LoadedByMyClassLoader");
+    Class<?> foo = o.loadClass("LoadedByMyClassLoader");
     Method m = foo.getDeclaredMethod("bar");
     try {
       m.invoke(null);
diff --git a/test/501-regression-packed-switch/info.txt b/test/501-regression-packed-switch/info.txt
index fbd93fa..988b220 100644
--- a/test/501-regression-packed-switch/info.txt
+++ b/test/501-regression-packed-switch/info.txt
@@ -1,2 +1,4 @@
 Regression test for the interpreter and optimizing's builder which used
 to trip when compiled code contained a packed switch with no targets.
+Regression test for the arm64 mterp miscalculating the switch table
+address, zero-extending a register instead of sign-extending.
diff --git a/test/501-regression-packed-switch/smali/Test.smali b/test/501-regression-packed-switch/smali/Test.smali
index 8756ed5..5a760c7 100644
--- a/test/501-regression-packed-switch/smali/Test.smali
+++ b/test/501-regression-packed-switch/smali/Test.smali
@@ -27,3 +27,28 @@
   .packed-switch 0x0
   .end packed-switch
 .end method
+
+.method public static PackedSwitchAfterData(I)I
+  .registers 1
+  goto :pswitch_instr
+
+  :case0
+  const/4 v0, 0x1
+  return v0
+
+  :pswitch_data
+  .packed-switch 0x0
+    :case0
+    :case1
+  .end packed-switch
+
+  :pswitch_instr
+  packed-switch v0, :pswitch_data
+  const/4 v0, 0x7
+  return v0
+
+  :case1
+  const/4 v0, 0x4
+  return v0
+
+.end method
diff --git a/test/501-regression-packed-switch/src/Main.java b/test/501-regression-packed-switch/src/Main.java
index b80bc62..74c081a 100644
--- a/test/501-regression-packed-switch/src/Main.java
+++ b/test/501-regression-packed-switch/src/Main.java
@@ -24,10 +24,15 @@
 
   public static void main(String args[]) throws Exception {
     Class<?> c = Class.forName("Test");
-    Method m = c.getMethod("EmptyPackedSwitch", new Class[] { int.class });
+    Method m = c.getMethod("EmptyPackedSwitch", int.class);
     Integer result = (Integer) m.invoke(null, new Integer(42));
     if (result != 5) {
       throw new Error("Expected 5, got " + result);
     }
+    m = c.getMethod("PackedSwitchAfterData", int.class);
+    result = (Integer) m.invoke(null, new Integer(0));
+    if (result != 1) {
+      throw new Error("Expected 1, got " + result);
+    }
   }
 }
diff --git a/test/504-regression-baseline-entry/src/Main.java b/test/504-regression-baseline-entry/src/Main.java
index 2c9df28..284cbdc 100644
--- a/test/504-regression-baseline-entry/src/Main.java
+++ b/test/504-regression-baseline-entry/src/Main.java
@@ -24,7 +24,7 @@
 
   public static void main(String args[]) throws Exception {
     Class<?> c = Class.forName("Test");
-    Method m = c.getMethod("SingleGotoStart", (Class[]) null);
+    Method m = c.getMethod("SingleGotoStart");
     Integer result = (Integer) m.invoke(null);
     if (result != 5) {
       throw new Error("Expected 5, got " + result);
diff --git a/test/506-verify-aput/src/Main.java b/test/506-verify-aput/src/Main.java
index 8359f2c..08368d4 100644
--- a/test/506-verify-aput/src/Main.java
+++ b/test/506-verify-aput/src/Main.java
@@ -23,11 +23,12 @@
     try {
       Class.forName("VerifyAPut1");
       throw new Error("expected verification error");
-    } catch (VerifyError e) { /* ignore */ }
-
+    } catch (VerifyError e) { /* ignore */
+    } catch (Error e) { System.out.println(e.getClass() + " " + e.getClass().getClassLoader()); }
     try {
       Class.forName("VerifyAPut2");
       throw new Error("expected verification error");
-    } catch (VerifyError e) { /* ignore */ }
+    } catch (VerifyError e) { /* ignore */
+    } catch (Error e) { System.out.println(e.getClass() + " " + e.getClass().getClassLoader()); }
   }
 }
diff --git a/test/510-checker-try-catch/smali/Builder.smali b/test/510-checker-try-catch/smali/Builder.smali
index 1fde5ed..b0bffa5 100644
--- a/test/510-checker-try-catch/smali/Builder.smali
+++ b/test/510-checker-try-catch/smali/Builder.smali
@@ -21,11 +21,11 @@
 
 ## CHECK-START: int Builder.testMultipleTryCatch(int, int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  successors       "<<BEnterTry1:B\d+>>"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
-## CHECK:  <<Minus3:i\d+>>  IntConstant -3
+## CHECK:      name             "B0"
+## CHECK:      successors       "<<BEnterTry1:B\d+>>"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK-DAG:  <<Minus3:i\d+>>  IntConstant -3
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>"
@@ -41,28 +41,35 @@
 ## CHECK:  predecessors     "<<BEnterTry2>>"
 ## CHECK:  successors       "<<BExitTry2:B\d+>>"
 ## CHECK:  DivZeroCheck
+## CHECK:  <<Div:i\d+>> Div
 
-## CHECK:  name             "<<BReturn:B\d+>>"
-## CHECK:  predecessors     "<<BExitTry2>>" "<<BCatch1:B\d+>>" "<<BCatch2:B\d+>>" "<<BCatch3:B\d+>>"
+## CHECK:  name             "<<BAfterTry2:B\d+>>"
+## CHECK:  predecessors     "<<BExitTry2>>"
+## CHECK:  successors       "<<BReturn:B\d+>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BReturn>>"
+## CHECK:  predecessors     "<<BAfterTry2>>" "<<BCatch1:B\d+>>" "<<BCatch2:B\d+>>" "<<BCatch3:B\d+>>"
+## CHECK:  Phi [<<Div>>,<<Minus1>>,<<Minus2>>,<<Minus3>>]
 ## CHECK:  Return
 
 ## CHECK:  name             "<<BCatch1>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>" "<<BExitTry1>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus1>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BCatch2>>"
 ## CHECK:  predecessors     "<<BEnterTry2>>" "<<BExitTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus2>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BCatch3>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus3>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnterTry1>>"
 ## CHECK:  predecessors     "B0"
@@ -84,7 +91,7 @@
 
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
-## CHECK:  successors       "<<BReturn>>"
+## CHECK:  successors       "<<BAfterTry2>>"
 ## CHECK:  xhandlers        "<<BCatch2>>" "<<BCatch3>>"
 ## CHECK:  TryBoundary      kind:exit
 
@@ -105,6 +112,8 @@
     .catch Ljava/lang/OutOfMemoryError; {:try_start_2 .. :try_end_2} :catch_mem
     .catchall {:try_start_2 .. :try_end_2} :catch_other
 
+    nop
+
     :return
     return p0
 
@@ -131,7 +140,7 @@
 
 ## CHECK:  name             "<<BIf>>"
 ## CHECK:  predecessors     "B0"
-## CHECK:  successors       "<<BEnterTry2:B\d+>>" "<<BThen:B\d+>>"
+## CHECK:  successors       "<<BSplit1:B\d+>>" "<<BThen:B\d+>>"
 ## CHECK:  If
 
 ## CHECK:  name             "<<BThen>>"
@@ -145,19 +154,19 @@
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BTry2:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry2>>"
+## CHECK:  predecessors     "<<BEnterTry2:B\d+>>"
 ## CHECK:  successors       "<<BExitTry2:B\d+>>"
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
-## CHECK:  predecessors     "<<BExitTry2>>" "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BSplit3:B\d+>>" "<<BCatch:B\d+>>"
 ## CHECK:  Return
 
 ## CHECK:  name             "<<BCatch>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus1>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnterTry1>>"
 ## CHECK:  predecessors     "<<BThen>>"
@@ -166,23 +175,38 @@
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BEnterTry2>>"
-## CHECK:  predecessors     "<<BIf>>" "<<BExitTry1>>"
+## CHECK:  predecessors     "<<BSplit1>>" "<<BSplit2:B\d+>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BExitTry1>>"
 ## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  successors       "<<BSplit2>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
-## CHECK:  successors       "<<BReturn>>"
+## CHECK:  successors       "<<BSplit3>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit1>>"
+## CHECK:  predecessors     "<<BIf>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit2>>"
+## CHECK:  predecessors     "<<BExitTry1>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit3>>"
+## CHECK:  predecessors     "<<BExitTry2>>"
+## CHECK:  successors       "<<BReturn>>"
+## CHECK:  Goto
+
 .method public static testMultipleEntries(IIII)I
     .registers 4
 
@@ -212,31 +236,32 @@
 
 ## CHECK-START: int Builder.testMultipleExits(int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  successors       "<<BEnterTry:B\d+>>"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK:      name             "B0"
+## CHECK:      successors       "<<BEnterTry:B\d+>>"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
 
 ## CHECK:  name             "<<BTry:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry>>"
 ## CHECK:  successors       "<<BExitTry1:B\d+>>" "<<BExitTry2:B\d+>>"
-## CHECK:  Div
+## CHECK:  <<Div:i\d+>> Div
 ## CHECK:  If
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
-## CHECK:  predecessors     "<<BExitTry2>>" "<<BThen:B\d+>>" "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BSplit:B\d+>>" "<<BThen:B\d+>>" "<<BCatch:B\d+>>"
+## CHECK:  Phi [<<Div>>,<<Minus1>>,<<Minus2>>]
 ## CHECK:  Return
 
 ## CHECK:  name             "<<BThen>>"
 ## CHECK:  predecessors     "<<BExitTry1>>"
 ## CHECK:  successors       "<<BReturn>>"
-## CHECK:  StoreLocal       [v0,<<Minus1>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BCatch>>"
 ## CHECK:  predecessors     "<<BEnterTry>>" "<<BExitTry1>>" "<<BExitTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus2>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnterTry>>"
 ## CHECK:  predecessors     "B0"
@@ -252,10 +277,15 @@
 
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry>>"
-## CHECK:  successors       "<<BReturn>>"
+## CHECK:  successors       "<<BSplit>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit>>"
+## CHECK:  predecessors     "<<BExitTry2>>"
+## CHECK:  successors       "<<BReturn>>"
+## CHECK:  Goto
+
 .method public static testMultipleExits(II)I
     .registers 2
 
@@ -282,10 +312,10 @@
 
 ## CHECK-START: int Builder.testSharedBoundary(int, int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  successors       "<<BEnter1:B\d+>>"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK:      name             "B0"
+## CHECK:      successors       "<<BEnter1:B\d+>>"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnter1>>"
@@ -295,23 +325,25 @@
 ## CHECK:  name             "<<BTry2:B\d+>>"
 ## CHECK:  predecessors     "<<BEnter2:B\d+>>"
 ## CHECK:  successors       "<<BExit2:B\d+>>"
-## CHECK:  Div
+## CHECK:  <<Div:i\d+>> Div
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
-## CHECK:  predecessors     "<<BExit2>>" "<<BCatch1:B\d+>>" "<<BCatch2:B\d+>>"
+## CHECK:  predecessors     "<<BSplit:B\d+>>" "<<BCatch1:B\d+>>" "<<BCatch2:B\d+>>"
+## CHECK:  Phi [<<Div>>,<<Minus1>>,<<Minus2>>]
 ## CHECK:  Return
 
 ## CHECK:  name             "<<BCatch1>>"
 ## CHECK:  predecessors     "<<BEnter1>>" "<<BExit1>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus1>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BCatch2>>"
 ## CHECK:  predecessors     "<<BEnter2>>" "<<BExit2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus2>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnter1>>"
 ## CHECK:  predecessors     "B0"
@@ -333,10 +365,15 @@
 
 ## CHECK:  name             "<<BExit2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
-## CHECK:  successors       "<<BReturn>>"
+## CHECK:  successors       "<<BSplit>>"
 ## CHECK:  xhandlers        "<<BCatch2>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit>>"
+## CHECK:  predecessors     "<<BExit2>>"
+## CHECK:  successors       "<<BReturn>>"
+## CHECK:  Goto
+
 .method public static testSharedBoundary(III)I
     .registers 3
 
@@ -366,10 +403,10 @@
 
 ## CHECK-START: int Builder.testSharedBoundary_Reverse(int, int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  successors       "<<BGoto:B\d+>>"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK:      name             "B0"
+## CHECK:      successors       "<<BGoto:B\d+>>"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
 
 ## CHECK:  name             "<<BGoto>>"
 ## CHECK:  successors       "<<BEnter2:B\d+>>"
@@ -378,28 +415,31 @@
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnter1:B\d+>>"
 ## CHECK:  successors       "<<BExit1:B\d+>>"
-## CHECK:  Div
+## CHECK:  <<Div:i\d+>> Div
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BTry2:B\d+>>"
 ## CHECK:  predecessors     "<<BEnter2>>"
 ## CHECK:  successors       "<<BExit2:B\d+>>"
 ## CHECK:  Div
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
-## CHECK:  predecessors     "<<BExit1>>" "<<BCatch1:B\d+>>" "<<BCatch2:B\d+>>"
+## CHECK:  predecessors     "<<BSplit:B\d+>>" "<<BCatch1:B\d+>>" "<<BCatch2:B\d+>>"
+## CHECK:  Phi [<<Div>>,<<Minus1>>,<<Minus2>>]
 ## CHECK:  Return
 
 ## CHECK:  name             "<<BCatch1>>"
 ## CHECK:  predecessors     "<<BEnter1>>" "<<BExit1>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus1>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BCatch2>>"
 ## CHECK:  predecessors     "<<BEnter2>>" "<<BExit2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus2>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnter1>>"
 ## CHECK:  predecessors     "<<BExit2>>"
@@ -415,7 +455,7 @@
 
 ## CHECK:  name             "<<BExit1>>"
 ## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BReturn>>"
+## CHECK:  successors       "<<BSplit>>"
 ## CHECK:  xhandlers        "<<BCatch1>>"
 ## CHECK:  TryBoundary      kind:exit
 
@@ -425,6 +465,11 @@
 ## CHECK:  xhandlers        "<<BCatch2>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit>>"
+## CHECK:  predecessors     "<<BExit1>>"
+## CHECK:  successors       "<<BReturn>>"
+## CHECK:  Goto
+
 .method public static testSharedBoundary_Reverse(III)I
     .registers 3
 
@@ -459,9 +504,9 @@
 
 ## CHECK-START: int Builder.testNestedTry(int, int, int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK:      name             "B0"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnter1:B\d+>>"
@@ -472,26 +517,30 @@
 ## CHECK:  predecessors     "<<BEnter2:B\d+>>"
 ## CHECK:  successors       "<<BExit2:B\d+>>"
 ## CHECK:  Div
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BTry3:B\d+>>"
 ## CHECK:  predecessors     "<<BEnter3:B\d+>>"
 ## CHECK:  successors       "<<BExit3:B\d+>>"
-## CHECK:  Div
+## CHECK:  <<Div:i\d+>> Div
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
-## CHECK:  predecessors     "<<BExit3>>" "<<BCatchArith:B\d+>>" "<<BCatchAll:B\d+>>"
+## CHECK:  predecessors     "<<BSplit:B\d+>>" "<<BCatchArith:B\d+>>" "<<BCatchAll:B\d+>>"
+## CHECK:  Phi [<<Div>>,<<Minus1>>,<<Minus2>>]
+## CHECK:  Return
 
 ## CHECK:  name             "<<BCatchArith>>"
 ## CHECK:  predecessors     "<<BEnter2>>" "<<BExit2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus1>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BCatchAll>>"
 ## CHECK:  predecessors     "<<BEnter1>>" "<<BEnter2>>" "<<BEnter3>>" "<<BExit1>>" "<<BExit2>>" "<<BExit3>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus2>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnter1>>"
 ## CHECK:  predecessors     "B0"
@@ -525,10 +574,15 @@
 
 ## CHECK:  name             "<<BExit3>>"
 ## CHECK:  predecessors     "<<BTry3>>"
-## CHECK:  successors       "<<BReturn>>"
+## CHECK:  successors       "<<BSplit>>"
 ## CHECK:  xhandlers        "<<BCatchAll>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit>>"
+## CHECK:  predecessors     "<<BExit3>>"
+## CHECK:  successors       "<<BReturn>>"
+## CHECK:  Goto
+
 .method public static testNestedTry(IIII)I
     .registers 4
 
@@ -567,14 +621,18 @@
 ## CHECK:  predecessors     "<<BEnterTry1:B\d+>>"
 ## CHECK:  successors       "<<BExitTry1:B\d+>>"
 ## CHECK:  Div
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BTry2:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry2:B\d+>>"
 ## CHECK:  successors       "<<BExitTry2:B\d+>>"
-## CHECK:  Div
+## CHECK:  <<Div:i\d+>> Div
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
-## CHECK:  predecessors     "<<BExitTry2>>" "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BSplit:B\d+>>" "<<BCatch:B\d+>>"
+## CHECK:  Phi [<<Div>>,<<Minus1>>]
+## CHECK:  Return
 
 ## CHECK:  name             "<<BOutside:B\d+>>"
 ## CHECK:  predecessors     "<<BExitTry1>>"
@@ -585,7 +643,7 @@
 ## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus1>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnterTry1>>"
 ## CHECK:  predecessors     "B0"
@@ -607,10 +665,15 @@
 
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
-## CHECK:  successors       "<<BReturn>>"
+## CHECK:  successors       "<<BSplit>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit>>"
+## CHECK:  predecessors     "<<BExitTry2>>"
+## CHECK:  successors       "<<BReturn>>"
+## CHECK:  Goto
+
 .method public static testIncontinuousTry(IIII)I
     .registers 4
 
@@ -642,12 +705,12 @@
 
 ## CHECK:  name             "<<BPSwitch0>>"
 ## CHECK:  predecessors     "B0"
-## CHECK:  successors       "<<BEnterTry2:B\d+>>" "<<BPSwitch1:B\d+>>"
+## CHECK:  successors       "<<BSplit1:B\d+>>" "<<BPSwitch1:B\d+>>"
 ## CHECK:  If
 
 ## CHECK:  name             "<<BPSwitch1>>"
 ## CHECK:  predecessors     "<<BPSwitch0>>"
-## CHECK:  successors       "<<BOutside:B\d+>>" "<<BEnterTry1:B\d+>>"
+## CHECK:  successors       "<<BSplit2:B\d+>>" "<<BEnterTry1:B\d+>>"
 ## CHECK:  If
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
@@ -656,44 +719,73 @@
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BTry2:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry2>>"
+## CHECK:  predecessors     "<<BEnterTry2:B\d+>>"
 ## CHECK:  successors       "<<BExitTry2:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BOutside>>"
-## CHECK:  predecessors     "<<BPSwitch1>>" "<<BExitTry2>>"
-## CHECK:  successors       "<<BCatchReturn:B\d+>>"
+## CHECK:  name             "<<BOutside:B\d+>>"
+## CHECK:  predecessors     "<<BSplit2>>" "<<BSplit4:B\d+>>"
+## CHECK:  successors       "<<BReturn:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BCatchReturn>>"
-## CHECK:  predecessors     "<<BOutside>>" "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
-## CHECK:  flags            "catch_block"
+## CHECK:  name             "<<BReturn>>"
+## CHECK:  predecessors     "<<BOutside>>" "<<BCatch:B\d+>>"
+## CHECK:  successors       "<<BExit:B\d+>>"
 ## CHECK:  Return
 
+## CHECK:  name             "<<BExit>>"
+## CHECK:  Exit
+
+## CHECK:  name             "<<BCatch>>"
+## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
+## CHECK:  successors       "<<BReturn>>"
+## CHECK:  flags            "catch_block"
+## CHECK:  Goto
+
 ## CHECK:  name             "<<BEnterTry1>>"
 ## CHECK:  predecessors     "<<BPSwitch1>>"
 ## CHECK:  successors       "<<BTry1>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BEnterTry2>>"
-## CHECK:  predecessors     "<<BPSwitch0>>"
+## CHECK:  predecessors     "<<BSplit1>>" "<<BSplit3:B\d+>>"
 ## CHECK:  successors       "<<BTry2>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BExitTry1>>"
 ## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BEnterTry2>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  successors       "<<BSplit3>>"
+## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
-## CHECK:  successors       "<<BOutside>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  successors       "<<BSplit4>>"
+## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit1>>"
+## CHECK:  predecessors     "<<BPSwitch0>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit2>>"
+## CHECK:  predecessors     "<<BPSwitch1>>"
+## CHECK:  successors       "<<BOutside>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit3>>"
+## CHECK:  predecessors     "<<BExitTry1>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit4>>"
+## CHECK:  predecessors     "<<BExitTry2>>"
+## CHECK:  successors       "<<BOutside>>"
+## CHECK:  Goto
+
 .method public static testSwitchTryEnter(IIII)I
     .registers 4
 
@@ -728,58 +820,82 @@
 
 ## CHECK:  name             "<<BPSwitch0:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>"
-## CHECK:  successors       "<<BTry2:B\d+>>" "<<BExitTry1:B\d+>>"
+## CHECK:  successors       "<<BSplit1:B\d+>>" "<<BExitTry1:B\d+>>"
 ## CHECK:  If
 
 ## CHECK:  name             "<<BPSwitch1:B\d+>>"
 ## CHECK:  predecessors     "<<BExitTry1>>"
-## CHECK:  successors       "<<BOutside:B\d+>>" "<<BEnterTry2:B\d+>>"
+## CHECK:  successors       "<<BSplit2:B\d+>>" "<<BEnterTry2:B\d+>>"
 ## CHECK:  If
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry2>>"
-## CHECK:  successors       "<<BTry2>>"
+## CHECK:  successors       "<<BTry2:B\d+>>"
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BTry2>>"
-## CHECK:  predecessors     "<<BPSwitch0>>"
+## CHECK:  predecessors     "<<BSplit1>>" "<<BTry1>>"
 ## CHECK:  successors       "<<BExitTry2:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BOutside>>"
-## CHECK:  predecessors     "<<BPSwitch1>>" "<<BExitTry2>>"
-## CHECK:  successors       "<<BCatchReturn:B\d+>>"
+## CHECK:  name             "<<BOutside:B\d+>>"
+## CHECK:  predecessors     "<<BSplit2>>" "<<BSplit3:B\d+>>"
+## CHECK:  successors       "<<BReturn:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BCatchReturn>>"
-## CHECK:  predecessors     "<<BOutside>>" "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
-## CHECK:  flags            "catch_block"
+## CHECK:  name             "<<BReturn>>"
+## CHECK:  predecessors     "<<BOutside>>" "<<BCatch:B\d+>>"
+## CHECK:  successors       "<<BExit:B\d+>>"
 ## CHECK:  Return
 
+## CHECK:  name             "<<BExit>>"
+## CHECK:  Exit
+
+## CHECK:  name             "<<BCatch>>"
+## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
+## CHECK:  successors       "<<BReturn>>"
+## CHECK:  flags            "catch_block"
+## CHECK:  Goto
+
 ## CHECK:  name             "<<BEnterTry1>>"
 ## CHECK:  predecessors     "B0"
 ## CHECK:  successors       "<<BPSwitch0>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BEnterTry2>>"
 ## CHECK:  predecessors     "<<BPSwitch1>>"
 ## CHECK:  successors       "<<BTry1>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BExitTry1>>"
 ## CHECK:  predecessors     "<<BPSwitch0>>"
 ## CHECK:  successors       "<<BPSwitch1>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
-## CHECK:  successors       "<<BOutside>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  successors       "<<BSplit3>>"
+## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit1>>"
+## CHECK:  predecessors     "<<BPSwitch0>>"
+## CHECK:  successors       "<<BTry2>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit2>>"
+## CHECK:  predecessors     "<<BPSwitch1>>"
+## CHECK:  successors       "<<BOutside>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit3>>"
+## CHECK:  predecessors     "<<BExitTry2>>"
+## CHECK:  successors       "<<BOutside>>"
+## CHECK:  Goto
+
 .method public static testSwitchTryExit(IIII)I
     .registers 4
 
@@ -825,7 +941,7 @@
 ## CHECK:  predecessors     "<<BEnterTry>>" "<<BExitTry>>"
 ## CHECK:  successors       "<<BExit:B\d+>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus1>>]
+## CHECK:  Return [<<Minus1>>]
 
 ## CHECK:  name             "<<BExit>>"
 ## CHECK:  predecessors     "<<BExitTry>>" "<<BCatch>>"
@@ -861,26 +977,30 @@
 ## CHECK-START: int Builder.testCatchLoop(int, int, int) builder (after)
 
 ## CHECK:  name             "B0"
-## CHECK:  successors       "<<BCatch:B\d+>>"
+## CHECK:  successors       "<<BSplit:B\d+>>"
 
-## CHECK:  name             "<<BCatch>>"
-## CHECK:  predecessors     "B0" "<<BEnterTry:B\d+>>" "<<BExitTry:B\d+>>"
-## CHECK:  successors       "<<BEnterTry>>"
-## CHECK:  flags            "catch_block"
+## CHECK:  name             "<<BTry:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry:B\d+>>"
+## CHECK:  successors       "<<BExitTry:B\d+>>"
+## CHECK:  Div
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
 ## CHECK:  predecessors     "<<BExitTry>>"
 ## CHECK:  successors       "<<BExit:B\d+>>"
+## CHECK:  Return
 
 ## CHECK:  name             "<<BExit>>"
+## CHECK:  predecessors     "<<BReturn>>"
+## CHECK:  Exit
 
-## CHECK:  name             "<<BTry:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry>>"
-## CHECK:  successors       "<<BExitTry>>"
-## CHECK:  Div
+## CHECK:  name             "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry>>" "<<BExitTry>>"
+## CHECK:  successors       "<<BEnterTry>>"
+## CHECK:  flags            "catch_block"
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnterTry>>"
-## CHECK:  predecessors     "<<BCatch>>"
+## CHECK:  predecessors     "<<BSplit>>" "<<BCatch>>"
 ## CHECK:  successors       "<<BTry>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
@@ -891,6 +1011,11 @@
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit>>"
+## CHECK:  predecessors     "B0"
+## CHECK:  successors       "<<BEnterTry>>"
+## CHECK:  Goto
+
 .method public static testCatchLoop(III)I
     .registers 4
 
@@ -917,21 +1042,25 @@
 ## CHECK:  successors       "<<BExitTry1:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BCatch:B\d+>>"
-## CHECK:  predecessors     "<<BExitTry1>>" "<<BEnterTry1>>" "<<BEnterTry2:B\d+>>" "<<BExitTry1>>" "<<BExitTry2:B\d+>>"
-## CHECK:  successors       "<<BEnterTry2>>"
-## CHECK:  flags            "catch_block"
+## CHECK:  name             "<<BTry2:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry2:B\d+>>"
+## CHECK:  successors       "<<BExitTry2:B\d+>>"
+## CHECK:  Div
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
 ## CHECK:  predecessors     "<<BExitTry2>>"
+## CHECK:  successors       "<<BExit:B\d+>>"
+## CHECK:  Return
 
-## CHECK:  name             "{{B\d+}}"
+## CHECK:  name             "<<BExit>>"
+## CHECK:  predecessors     "<<BReturn>>"
 ## CHECK:  Exit
 
-## CHECK:  name             "<<BTry2:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry2>>"
-## CHECK:  successors       "<<BExitTry2>>"
-## CHECK:  Div
+## CHECK:  name             "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  flags            "catch_block"
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnterTry1>>"
 ## CHECK:  predecessors     "B0"
@@ -940,14 +1069,14 @@
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BEnterTry2>>"
-## CHECK:  predecessors     "<<BCatch>>"
+## CHECK:  predecessors     "<<BSplit:B\d+>>" "<<BCatch>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BExitTry1>>"
 ## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BCatch>>"
+## CHECK:  successors       "<<BSplit>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
@@ -957,6 +1086,11 @@
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit>>"
+## CHECK:  predecessors     "<<BExitTry1>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  Goto
+
 .method public static testHandlerEdge1(III)I
     .registers 4
 
@@ -977,17 +1111,17 @@
 ## CHECK-START: int Builder.testHandlerEdge2(int, int, int) builder (after)
 
 ## CHECK:  name             "B0"
-## CHECK:  successors       "<<BCatch1:B\d+>>"
+## CHECK:  successors       "<<BSplit1:B\d+>>"
 
-## CHECK:  name             "<<BCatch1>>"
-## CHECK:  predecessors     "B0" "<<BEnterTry2:B\d+>>" "<<BExitTry2:B\d+>>"
-## CHECK:  successors       "<<BEnterTry1:B\d+>>"
-## CHECK:  flags            "catch_block"
+## CHECK:  name             "<<BTry1:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry1:B\d+>>"
+## CHECK:  successors       "<<BExitTry1:B\d+>>"
+## CHECK:  Div
 
-## CHECK:  name             "<<BCatch2:B\d+>>"
-## CHECK:  predecessors     "<<BExitTry1:B\d+>>" "<<BEnterTry1>>" "<<BExitTry1>>"
-## CHECK:  successors       "<<BEnterTry2>>"
-## CHECK:  flags            "catch_block"
+## CHECK:  name             "<<BTry2:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry2:B\d+>>"
+## CHECK:  successors       "<<BExitTry2:B\d+>>"
+## CHECK:  Div
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
 ## CHECK:  predecessors     "<<BExitTry2>>"
@@ -995,32 +1129,33 @@
 ## CHECK:  Return
 
 ## CHECK:  name             "<<BExit>>"
+## CHECK:  Exit
 
-## CHECK:  name             "<<BTry1:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry1>>"
-## CHECK:  successors       "<<BExitTry1>>"
-## CHECK:  Div
+## CHECK:  name             "<<BCatch2:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry1>>" "<<BExitTry1>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  flags            "catch_block"
 
-## CHECK:  name             "<<BTry2:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry2>>"
-## CHECK:  successors       "<<BExitTry2>>"
-## CHECK:  Div
+## CHECK:  name             "<<BCatch1:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry2>>" "<<BExitTry2>>"
+## CHECK:  successors       "<<BEnterTry1>>"
+## CHECK:  flags            "catch_block"
 
 ## CHECK:  name             "<<BEnterTry1>>"
-## CHECK:  predecessors     "<<BCatch1>>"
+## CHECK:  predecessors     "<<BSplit1>>" "<<BCatch1>>"
 ## CHECK:  successors       "<<BTry1>>"
 ## CHECK:  xhandlers        "<<BCatch2>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BEnterTry2>>"
-## CHECK:  predecessors     "<<BCatch2>>"
+## CHECK:  predecessors     "<<BSplit2:B\d+>>" "<<BCatch2>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch1>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BExitTry1>>"
 ## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BCatch2>>"
+## CHECK:  successors       "<<BSplit2>>"
 ## CHECK:  xhandlers        "<<BCatch2>>"
 ## CHECK:  TryBoundary      kind:exit
 
@@ -1030,6 +1165,16 @@
 ## CHECK:  xhandlers        "<<BCatch1>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit1>>"
+## CHECK:  predecessors     "B0"
+## CHECK:  successors       "<<BEnterTry1>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit2>>"
+## CHECK:  predecessors     "<<BExitTry1>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  Goto
+
 .method public static testHandlerEdge2(III)I
     .registers 4
 
@@ -1053,10 +1198,10 @@
 ## CHECK-START: int Builder.testTryInLoop(int, int) builder (after)
 
 ## CHECK:  name             "B0"
-## CHECK:  successors       "<<BEnterTry:B\d+>>"
+## CHECK:  successors       "<<BSplit1:B\d+>>"
 
 ## CHECK:  name             "<<BTry:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry>>"
+## CHECK:  predecessors     "<<BEnterTry:B\d+>>"
 ## CHECK:  successors       "<<BExitTry:B\d+>>"
 ## CHECK:  Div
 
@@ -1065,22 +1210,28 @@
 ## CHECK:  successors       "<<BEnterTry>>"
 ## CHECK:  flags            "catch_block"
 
-## CHECK:  name             "<<BExit:B\d+>>"
-## CHECK-NOT: predecessors  "{{B\d+}}"
-## CHECK:  end_block
-
 ## CHECK:  name             "<<BEnterTry>>"
-## CHECK:  predecessors     "B0"
+## CHECK:  predecessors     "<<BSplit1>>"
 ## CHECK:  successors       "<<BTry>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BExitTry>>"
 ## CHECK:  predecessors     "<<BTry>>"
-## CHECK:  successors       "<<BEnterTry>>"
+## CHECK:  successors       "<<BSplit2:B\d+>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit1>>"
+## CHECK:  predecessors     "B0"
+## CHECK:  successors       "<<BEnterTry>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit2>>"
+## CHECK:  predecessors     "<<BExitTry>>"
+## CHECK:  successors       "<<BEnterTry>>"
+## CHECK:  Goto
+
 .method public static testTryInLoop(II)I
     .registers 3
 
@@ -1098,9 +1249,10 @@
 # INVOKE it follows, even if there is a try boundary between them.
 
 ## CHECK-START: int Builder.testMoveResult_Invoke(int, int, int) builder (after)
-
-## CHECK:       <<Res:i\d+>> InvokeStaticOrDirect
-## CHECK-NEXT:  StoreLocal   [v0,<<Res>>]
+## CHECK-DAG:     <<M1:i\d+>>  IntConstant -1
+## CHECK-DAG:     <<Res:i\d+>> InvokeStaticOrDirect
+## CHECK-DAG:     <<Phi:i\d+>> Phi [<<Res>>,<<M1>>]
+## CHECK-DAG:                  Return [<<Phi>>]
 
 .method public static testMoveResult_Invoke(III)I
     .registers 3
@@ -1124,16 +1276,16 @@
 # FILLED_NEW_ARRAY it follows, even if there is a try boundary between them.
 
 ## CHECK-START: int[] Builder.testMoveResult_FilledNewArray(int, int, int) builder (after)
-
-## CHECK:      <<Res:l\d+>>     NewArray
-## CHECK-NEXT:                  Temporary
-## CHECK-NEXT: <<Local1:i\d+>>  LoadLocal  [v0]
-## CHECK-NEXT:                  ArraySet   [<<Res>>,{{i\d+}},<<Local1>>]
-## CHECK-NEXT: <<Local2:i\d+>>  LoadLocal  [v1]
-## CHECK-NEXT:                  ArraySet   [<<Res>>,{{i\d+}},<<Local2>>]
-## CHECK-NEXT: <<Local3:i\d+>>  LoadLocal  [v2]
-## CHECK-NEXT:                  ArraySet   [<<Res>>,{{i\d+}},<<Local3>>]
-## CHECK-NEXT:                  StoreLocal [v0,<<Res>>]
+## CHECK-DAG:     <<Arg1:i\d+>> ParameterValue
+## CHECK-DAG:     <<Arg2:i\d+>> ParameterValue
+## CHECK-DAG:     <<Arg3:i\d+>> ParameterValue
+## CHECK-DAG:     <<Null:l\d+>> NullConstant
+## CHECK-DAG:     <<Res:l\d+>>  NewArray
+## CHECK-DAG:                   ArraySet   [<<Res>>,{{i\d+}},<<Arg1>>]
+## CHECK-DAG:                   ArraySet   [<<Res>>,{{i\d+}},<<Arg2>>]
+## CHECK-DAG:                   ArraySet   [<<Res>>,{{i\d+}},<<Arg3>>]
+## CHECK-DAG:     <<Phi:l\d+>>  Phi [<<Res>>,<<Null>>]
+## CHECK-DAG:                   Return [<<Phi>>]
 
 .method public static testMoveResult_FilledNewArray(III)[I
     .registers 3
@@ -1205,7 +1357,10 @@
     .catchall {:try_start .. :try_end} :catch_all
 .end method
 
-## CHECK-START: int Builder.testSynchronized(java.lang.Object) builder (after)
+# Test that a throw-catch loop on monitor-exit is eliminated.
+# Note that we do not test this until after DCE which merges trivially split blocks.
+
+## CHECK-START: int Builder.testSynchronized(java.lang.Object) dead_code_elimination$initial (after)
 ## CHECK:      flags "catch_block"
 ## CHECK-NOT:  end_block
 ## CHECK:      MonitorOperation kind:exit
diff --git a/test/510-checker-try-catch/smali/SsaBuilder.smali b/test/510-checker-try-catch/smali/SsaBuilder.smali
index 710e849..bfc0b20 100644
--- a/test/510-checker-try-catch/smali/SsaBuilder.smali
+++ b/test/510-checker-try-catch/smali/SsaBuilder.smali
@@ -19,32 +19,36 @@
 # Tests that catch blocks with both normal and exceptional predecessors are
 # split in two.
 
-## CHECK-START: int SsaBuilder.testSimplifyCatchBlock(int, int, int) ssa_builder (after)
+## CHECK-START: int SsaBuilder.testSimplifyCatchBlock(int, int, int) builder (after)
 
-## CHECK:      name             "B0"
+## CHECK:      name             "B1"
 ## CHECK-NEXT: from_bci
 ## CHECK-NEXT: to_bci
 ## CHECK-NEXT: predecessors
-## CHECK-NEXT: successors       "<<BExtracted:B\d+>>"
+## CHECK-NEXT: successors       "<<BAdd:B\d+>>"
 
-## CHECK:      name             "<<BCatch:B\d+>>"
+## CHECK:      name             "<<BAdd>>"
+## CHECK-NEXT: from_bci
+## CHECK-NEXT: to_bci
+## CHECK-NEXT: predecessors     "B1" "<<BCatch:B\d+>>"
+## CHECK-NEXT: successors
+## CHECK-NEXT: xhandlers
+## CHECK-NOT:  end_block
+## CHECK:      Add
+
+## CHECK:      name             "<<BCatch>>"
 ## CHECK-NEXT: from_bci
 ## CHECK-NEXT: to_bci
 ## CHECK-NEXT: predecessors
-## CHECK-NEXT: successors       "<<BExtracted>>"
+## CHECK-NEXT: successors       "<<BAdd>>"
 ## CHECK-NEXT: xhandlers
 ## CHECK-NEXT: flags            "catch_block"
-## CHECK-NOT:  Add
-
-## CHECK:      name             "<<BExtracted>>"
-## CHECK-NEXT: from_bci
-## CHECK-NEXT: to_bci
-## CHECK-NEXT: predecessors     "B0" "<<BCatch>>"
-## CHECK-NOT:  flags            "catch_block"
-## CHECK:      Add
 
 .method public static testSimplifyCatchBlock(III)I
     .registers 4
+    # Avoid entry block be a pre header, which leads to
+    # the cfg simplifier to add a synthesized block.
+    goto :catch_all
 
     :catch_all
     add-int/2addr p0, p1
@@ -59,7 +63,7 @@
 
 # Should be rejected because :catch_all is a loop header.
 
-## CHECK-START: int SsaBuilder.testCatchLoopHeader(int, int, int) ssa_builder (after, bad_state)
+## CHECK-START: int SsaBuilder.testCatchLoopHeader(int, int, int) builder (after, bad_state)
 
 .method public static testCatchLoopHeader(III)I
     .registers 4
@@ -81,7 +85,7 @@
 
 # Tests creation of catch Phis.
 
-## CHECK-START: int SsaBuilder.testPhiCreation(int, int, int) ssa_builder (after)
+## CHECK-START: int SsaBuilder.testPhiCreation(int, int, int) builder (after)
 ## CHECK-DAG:     <<P0:i\d+>>   ParameterValue
 ## CHECK-DAG:     <<P1:i\d+>>   ParameterValue
 ## CHECK-DAG:     <<P2:i\d+>>   ParameterValue
@@ -124,7 +128,7 @@
 # Tests that phi elimination does not remove catch phis where the value does
 # not dominate the phi.
 
-## CHECK-START: int SsaBuilder.testPhiElimination_Domination(int, int) ssa_builder (after)
+## CHECK-START: int SsaBuilder.testPhiElimination_Domination(int, int) builder (after)
 ## CHECK-DAG:     <<P0:i\d+>>   ParameterValue
 ## CHECK-DAG:     <<P1:i\d+>>   ParameterValue
 ## CHECK-DAG:     <<Cst5:i\d+>> IntConstant 5
@@ -165,7 +169,7 @@
 
 # Tests that phi elimination loops until no more phis can be removed.
 
-## CHECK-START: int SsaBuilder.testPhiElimination_Dependencies(int, int, int) ssa_builder (after)
+## CHECK-START: int SsaBuilder.testPhiElimination_Dependencies(int, int, int) builder (after)
 ## CHECK-NOT:     Phi
 
 .method public static testPhiElimination_Dependencies(III)I
@@ -197,10 +201,7 @@
 
 # Tests that dead catch blocks are removed.
 
-## CHECK-START: int SsaBuilder.testDeadCatchBlock(int, int, int) ssa_builder (before)
-## CHECK:                       Mul
-
-## CHECK-START: int SsaBuilder.testDeadCatchBlock(int, int, int) ssa_builder (after)
+## CHECK-START: int SsaBuilder.testDeadCatchBlock(int, int, int) builder (after)
 ## CHECK-DAG:     <<P0:i\d+>>   ParameterValue
 ## CHECK-DAG:     <<P1:i\d+>>   ParameterValue
 ## CHECK-DAG:     <<P2:i\d+>>   ParameterValue
@@ -208,7 +209,7 @@
 ## CHECK-DAG:     <<Add2:i\d+>> Add [<<Add1>>,<<P2>>]
 ## CHECK-DAG:                   Return [<<Add2>>]
 
-## CHECK-START: int SsaBuilder.testDeadCatchBlock(int, int, int) ssa_builder (after)
+## CHECK-START: int SsaBuilder.testDeadCatchBlock(int, int, int) builder (after)
 ## CHECK-NOT:                   flags "catch_block"
 ## CHECK-NOT:                   Mul
 
diff --git a/test/510-checker-try-catch/src/Main.java b/test/510-checker-try-catch/src/Main.java
index 25cdc0e..d6dcd30 100644
--- a/test/510-checker-try-catch/src/Main.java
+++ b/test/510-checker-try-catch/src/Main.java
@@ -39,7 +39,7 @@
 
   public static void testMethod(String method) throws Exception {
     Class<?> c = Class.forName("Runtime");
-    Method m = c.getMethod(method, new Class[] { boolean.class, boolean.class });
+    Method m = c.getMethod(method, boolean.class, boolean.class);
 
     for (TestPath path : TestPath.values()) {
       Object[] arguments = new Object[] { path.arg1, path.arg2 };
diff --git a/test/517-checker-builder-fallthrough/smali/TestCase.smali b/test/517-checker-builder-fallthrough/smali/TestCase.smali
index bc9502b..946f169 100644
--- a/test/517-checker-builder-fallthrough/smali/TestCase.smali
+++ b/test/517-checker-builder-fallthrough/smali/TestCase.smali
@@ -25,8 +25,8 @@
 
 ## CHECK:  name            "B1"
 ## CHECK:  successors      "B5" "B2"
-## CHECK:  StoreLocal      [v0,<<Const0>>]
-## CHECK:  If
+## CHECK:  <<Cond:z\d+>>   Equal [<<Const0>>,<<Const0>>]
+## CHECK:  If [<<Cond>>]
 
 ## CHECK:  name            "B2"
 ## CHECK:  successors      "B4"
diff --git a/test/517-checker-builder-fallthrough/src/Main.java b/test/517-checker-builder-fallthrough/src/Main.java
index 23d94e6..14170f5 100644
--- a/test/517-checker-builder-fallthrough/src/Main.java
+++ b/test/517-checker-builder-fallthrough/src/Main.java
@@ -20,7 +20,7 @@
 
   public static int runTest(int input) throws Exception {
     Class<?> c = Class.forName("TestCase");
-    Method m = c.getMethod("testCase", new Class[] { int.class });
+    Method m = c.getMethod("testCase", int.class);
     return (Integer) m.invoke(null, input);
   }
 
diff --git a/test/522-checker-regression-monitor-exit/smali/Test.smali b/test/522-checker-regression-monitor-exit/smali/Test.smali
index c8e9198..72583d2 100644
--- a/test/522-checker-regression-monitor-exit/smali/Test.smali
+++ b/test/522-checker-regression-monitor-exit/smali/Test.smali
@@ -17,11 +17,11 @@
 
 .super Ljava/lang/Object;
 
-## CHECK-START: int Test.synchronizedHashCode(java.lang.Object) dead_code_elimination (before)
+## CHECK-START: int Test.synchronizedHashCode(java.lang.Object) dead_code_elimination$initial (before)
 ## CHECK:         MonitorOperation [<<Param:l\d+>>] kind:enter
 ## CHECK:         MonitorOperation [<<Param>>]      kind:exit
 
-## CHECK-START: int Test.synchronizedHashCode(java.lang.Object) dead_code_elimination (after)
+## CHECK-START: int Test.synchronizedHashCode(java.lang.Object) dead_code_elimination$initial (after)
 ## CHECK:         MonitorOperation [<<Param:l\d+>>] kind:enter
 ## CHECK:         MonitorOperation [<<Param>>]      kind:exit
 
diff --git a/test/522-checker-regression-monitor-exit/src/Main.java b/test/522-checker-regression-monitor-exit/src/Main.java
index c85ac96..a5e9512 100644
--- a/test/522-checker-regression-monitor-exit/src/Main.java
+++ b/test/522-checker-regression-monitor-exit/src/Main.java
@@ -40,7 +40,7 @@
       Integer result;
       try {
         Class<?> c = Class.forName("Test");
-        Method m = c.getMethod("synchronizedHashCode", new Class[] { Object.class });
+        Method m = c.getMethod("synchronizedHashCode", Object.class);
         result = (Integer) m.invoke(null, m_obj);
       } catch (Exception e) {
         System.err.println("Hash code query exception");
diff --git a/test/523-checker-can-throw-regression/smali/Test.smali b/test/523-checker-can-throw-regression/smali/Test.smali
index 87192ea..4b737a9 100644
--- a/test/523-checker-can-throw-regression/smali/Test.smali
+++ b/test/523-checker-can-throw-regression/smali/Test.smali
@@ -46,8 +46,10 @@
   div-int/2addr p0, p1
   :else
   div-int/2addr p0, p2
-  return p0
   :try_end_2
-  .catchall {:try_start_2 .. :try_end_2} :catchall
+  .catchall {:try_start_2 .. :try_end_2} :catchall2
+
+  :catchall2
+  return p0
 
 .end method
diff --git a/test/525-checker-arrays-and-fields/info.txt b/test/525-checker-arrays-and-fields/info.txt
deleted file mode 100644
index 3e16abf..0000000
--- a/test/525-checker-arrays-and-fields/info.txt
+++ /dev/null
@@ -1 +0,0 @@
-Test on (in)variant static and instance field and array references in loops.
diff --git a/test/525-checker-arrays-and-fields/src/Main.java b/test/525-checker-arrays-and-fields/src/Main.java
deleted file mode 100644
index a635a51..0000000
--- a/test/525-checker-arrays-and-fields/src/Main.java
+++ /dev/null
@@ -1,1099 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-//
-// Test on (in)variant static and instance field and array references in loops.
-//
-public class Main {
-
-  private static Object anObject = new Object();
-  private static Object anotherObject = new Object();
-
-  //
-  // Static fields.
-  //
-
-  private static boolean sZ;
-  private static byte sB;
-  private static char sC;
-  private static short sS;
-  private static int sI;
-  private static long sJ;
-  private static float sF;
-  private static double sD;
-  private static Object sL;
-
-  //
-  // Static arrays.
-  //
-
-  private static boolean[] sArrZ;
-  private static byte[] sArrB;
-  private static char[] sArrC;
-  private static short[] sArrS;
-  private static int[] sArrI;
-  private static long[] sArrJ;
-  private static float[] sArrF;
-  private static double[] sArrD;
-  private static Object[] sArrL;
-
-  //
-  // Instance fields.
-  //
-
-  private boolean mZ;
-  private byte mB;
-  private char mC;
-  private short mS;
-  private int mI;
-  private long mJ;
-  private float mF;
-  private double mD;
-  private Object mL;
-
-  //
-  // Instance arrays.
-  //
-
-  private boolean[] mArrZ;
-  private byte[] mArrB;
-  private char[] mArrC;
-  private short[] mArrS;
-  private int[] mArrI;
-  private long[] mArrJ;
-  private float[] mArrF;
-  private double[] mArrD;
-  private Object[] mArrL;
-
-  //
-  // Loops on static arrays with invariant static field references.
-  // The checker is used to ensure hoisting occurred.
-  //
-
-  /// CHECK-START: void Main.SInvLoopZ() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopZ() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopZ() {
-    for (int i = 0; i < sArrZ.length; i++) {
-      sArrZ[i] = sZ;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopB() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopB() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopB() {
-    for (int i = 0; i < sArrB.length; i++) {
-      sArrB[i] = sB;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopC() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopC() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopC() {
-    for (int i = 0; i < sArrC.length; i++) {
-      sArrC[i] = sC;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopS() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopS() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopS() {
-    for (int i = 0; i < sArrS.length; i++) {
-      sArrS[i] = sS;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopI() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopI() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopI() {
-    for (int i = 0; i < sArrI.length; i++) {
-      sArrI[i] = sI;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopJ() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopJ() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopJ() {
-    for (int i = 0; i < sArrJ.length; i++) {
-      sArrJ[i] = sJ;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopF() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopF() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopF() {
-    for (int i = 0; i < sArrF.length; i++) {
-      sArrF[i] = sF;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopD() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopD() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopD() {
-    for (int i = 0; i < sArrD.length; i++) {
-      sArrD[i] = sD;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopL() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopL() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopL() {
-    for (int i = 0; i < sArrL.length; i++) {
-      sArrL[i] = sL;
-    }
-  }
-
-  //
-  // Loops on static arrays with variant static field references.
-  // Incorrect hoisting is detected by incorrect outcome.
-  //
-
-  private static void SVarLoopZ() {
-    for (int i = 0; i < sArrZ.length; i++) {
-      sArrZ[i] = sZ;
-      if (i == 10)
-        sZ = !sZ;
-    }
-  }
-
-  private static void SVarLoopB() {
-    for (int i = 0; i < sArrB.length; i++) {
-      sArrB[i] = sB;
-      if (i == 10)
-        sB++;
-    }
-  }
-
-  private static void SVarLoopC() {
-    for (int i = 0; i < sArrC.length; i++) {
-      sArrC[i] = sC;
-      if (i == 10)
-        sC++;
-    }
-  }
-
-  private static void SVarLoopS() {
-    for (int i = 0; i < sArrS.length; i++) {
-      sArrS[i] = sS;
-      if (i == 10)
-        sS++;
-    }
-  }
-
-  private static void SVarLoopI() {
-    for (int i = 0; i < sArrI.length; i++) {
-      sArrI[i] = sI;
-      if (i == 10)
-        sI++;
-    }
-  }
-
-  private static void SVarLoopJ() {
-    for (int i = 0; i < sArrJ.length; i++) {
-      sArrJ[i] = sJ;
-      if (i == 10)
-        sJ++;
-    }
-  }
-
-  private static void SVarLoopF() {
-    for (int i = 0; i < sArrF.length; i++) {
-      sArrF[i] = sF;
-      if (i == 10)
-        sF++;
-    }
-  }
-
-  private static void SVarLoopD() {
-    for (int i = 0; i < sArrD.length; i++) {
-      sArrD[i] = sD;
-      if (i == 10)
-        sD++;
-    }
-  }
-
-  private static void SVarLoopL() {
-    for (int i = 0; i < sArrL.length; i++) {
-      sArrL[i] = sL;
-      if (i == 10)
-        sL = anotherObject;
-    }
-  }
-
-  //
-  // Loops on static arrays with a cross-over reference.
-  // Incorrect hoisting is detected by incorrect outcome.
-  // In addition, the checker is used to detect no hoisting.
-  //
-
-  /// CHECK-START: void Main.SCrossOverLoopZ() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopZ() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopZ() {
-    for (int i = 0; i < sArrZ.length; i++) {
-      sArrZ[i] = !sArrZ[20];
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopB() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopB() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopB() {
-    for (int i = 0; i < sArrB.length; i++) {
-      sArrB[i] = (byte)(sArrB[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopC() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopC() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopC() {
-    for (int i = 0; i < sArrC.length; i++) {
-      sArrC[i] = (char)(sArrC[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopS() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopS() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopS() {
-    for (int i = 0; i < sArrS.length; i++) {
-      sArrS[i] = (short)(sArrS[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopI() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopI() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopI() {
-    for (int i = 0; i < sArrI.length; i++) {
-      sArrI[i] = sArrI[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopJ() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopJ() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopJ() {
-    for (int i = 0; i < sArrJ.length; i++) {
-      sArrJ[i] = sArrJ[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopF() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopF() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopF() {
-    for (int i = 0; i < sArrF.length; i++) {
-      sArrF[i] = sArrF[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopD() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopD() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopD() {
-    for (int i = 0; i < sArrD.length; i++) {
-      sArrD[i] = sArrD[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopL() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopL() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopL() {
-    for (int i = 0; i < sArrL.length; i++) {
-      sArrL[i] = (sArrL[20] == anObject) ? anotherObject : anObject;
-    }
-  }
-
-  //
-  // Loops on instance arrays with invariant instance field references.
-  // The checker is used to ensure hoisting occurred.
-  //
-
-  /// CHECK-START: void Main.InvLoopZ() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopZ() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopZ() {
-    for (int i = 0; i < mArrZ.length; i++) {
-      mArrZ[i] = mZ;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopB() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopB() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopB() {
-    for (int i = 0; i < mArrB.length; i++) {
-      mArrB[i] = mB;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopC() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopC() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopC() {
-    for (int i = 0; i < mArrC.length; i++) {
-      mArrC[i] = mC;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopS() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopS() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopS() {
-    for (int i = 0; i < mArrS.length; i++) {
-      mArrS[i] = mS;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopI() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopI() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopI() {
-    for (int i = 0; i < mArrI.length; i++) {
-      mArrI[i] = mI;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopJ() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopJ() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopJ() {
-    for (int i = 0; i < mArrJ.length; i++) {
-      mArrJ[i] = mJ;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopF() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopF() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopF() {
-    for (int i = 0; i < mArrF.length; i++) {
-      mArrF[i] = mF;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopD() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopD() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopD() {
-    for (int i = 0; i < mArrD.length; i++) {
-      mArrD[i] = mD;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopL() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopL() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopL() {
-    for (int i = 0; i < mArrL.length; i++) {
-      mArrL[i] = mL;
-    }
-  }
-
-  //
-  // Loops on instance arrays with variant instance field references.
-  // Incorrect hoisting is detected by incorrect outcome.
-  //
-
-  private void VarLoopZ() {
-    for (int i = 0; i < mArrZ.length; i++) {
-      mArrZ[i] = mZ;
-      if (i == 10)
-        mZ = !mZ;
-    }
-  }
-
-  private void VarLoopB() {
-    for (int i = 0; i < mArrB.length; i++) {
-      mArrB[i] = mB;
-      if (i == 10)
-        mB++;
-    }
-  }
-
-  private void VarLoopC() {
-    for (int i = 0; i < mArrC.length; i++) {
-      mArrC[i] = mC;
-      if (i == 10)
-        mC++;
-    }
-  }
-
-  private void VarLoopS() {
-    for (int i = 0; i < mArrS.length; i++) {
-      mArrS[i] = mS;
-      if (i == 10)
-        mS++;
-    }
-  }
-
-  private void VarLoopI() {
-    for (int i = 0; i < mArrI.length; i++) {
-      mArrI[i] = mI;
-      if (i == 10)
-        mI++;
-    }
-  }
-
-  private void VarLoopJ() {
-    for (int i = 0; i < mArrJ.length; i++) {
-      mArrJ[i] = mJ;
-      if (i == 10)
-        mJ++;
-    }
-  }
-
-  private void VarLoopF() {
-    for (int i = 0; i < mArrF.length; i++) {
-      mArrF[i] = mF;
-      if (i == 10)
-        mF++;
-    }
-  }
-
-  private void VarLoopD() {
-    for (int i = 0; i < mArrD.length; i++) {
-      mArrD[i] = mD;
-      if (i == 10)
-        mD++;
-    }
-  }
-
-  private void VarLoopL() {
-    for (int i = 0; i < mArrL.length; i++) {
-      mArrL[i] = mL;
-      if (i == 10)
-        mL = anotherObject;
-    }
-  }
-
-  //
-  // Loops on instance arrays with a cross-over reference.
-  // Incorrect hoisting is detected by incorrect outcome.
-  // In addition, the checker is used to detect no hoisting.
-  //
-
-  /// CHECK-START: void Main.CrossOverLoopZ() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopZ() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopZ() {
-    for (int i = 0; i < mArrZ.length; i++) {
-      mArrZ[i] = !mArrZ[20];
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopB() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopB() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopB() {
-    for (int i = 0; i < mArrB.length; i++) {
-      mArrB[i] = (byte)(mArrB[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopC() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopC() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopC() {
-    for (int i = 0; i < mArrC.length; i++) {
-      mArrC[i] = (char)(mArrC[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopS() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopS() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopS() {
-    for (int i = 0; i < mArrS.length; i++) {
-      mArrS[i] = (short)(mArrS[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopI() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopI() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopI() {
-    for (int i = 0; i < mArrI.length; i++) {
-      mArrI[i] = mArrI[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopJ() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopJ() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopJ() {
-    for (int i = 0; i < mArrJ.length; i++) {
-      mArrJ[i] = mArrJ[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopF() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopF() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopF() {
-    for (int i = 0; i < mArrF.length; i++) {
-      mArrF[i] = mArrF[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopD() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopD() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopD() {
-    for (int i = 0; i < mArrD.length; i++) {
-      mArrD[i] = mArrD[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopL() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopL() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopL() {
-    for (int i = 0; i < mArrL.length; i++) {
-      mArrL[i] = (mArrL[20] == anObject) ? anotherObject : anObject;
-    }
-  }
-
-  //
-  // Driver and testers.
-  //
-
-  public static void main(String[] args) {
-    DoStaticTests();
-    new Main().DoInstanceTests();
-  }
-
-  private static void DoStaticTests() {
-    // Type Z.
-    sZ = true;
-    sArrZ = new boolean[100];
-    SInvLoopZ();
-    for (int i = 0; i < sArrZ.length; i++) {
-      expectEquals(true, sArrZ[i]);
-    }
-    SVarLoopZ();
-    for (int i = 0; i < sArrZ.length; i++) {
-      expectEquals(i <= 10, sArrZ[i]);
-    }
-    SCrossOverLoopZ();
-    for (int i = 0; i < sArrZ.length; i++) {
-      expectEquals(i <= 20, sArrZ[i]);
-    }
-    // Type B.
-    sB = 1;
-    sArrB = new byte[100];
-    SInvLoopB();
-    for (int i = 0; i < sArrB.length; i++) {
-      expectEquals(1, sArrB[i]);
-    }
-    SVarLoopB();
-    for (int i = 0; i < sArrB.length; i++) {
-      expectEquals(i <= 10 ? 1 : 2, sArrB[i]);
-    }
-    SCrossOverLoopB();
-    for (int i = 0; i < sArrB.length; i++) {
-      expectEquals(i <= 20 ? 4 : 6, sArrB[i]);
-    }
-    // Type C.
-    sC = 2;
-    sArrC = new char[100];
-    SInvLoopC();
-    for (int i = 0; i < sArrC.length; i++) {
-      expectEquals(2, sArrC[i]);
-    }
-    SVarLoopC();
-    for (int i = 0; i < sArrC.length; i++) {
-      expectEquals(i <= 10 ? 2 : 3, sArrC[i]);
-    }
-    SCrossOverLoopC();
-    for (int i = 0; i < sArrC.length; i++) {
-      expectEquals(i <= 20 ? 5 : 7, sArrC[i]);
-    }
-    // Type S.
-    sS = 3;
-    sArrS = new short[100];
-    SInvLoopS();
-    for (int i = 0; i < sArrS.length; i++) {
-      expectEquals(3, sArrS[i]);
-    }
-    SVarLoopS();
-    for (int i = 0; i < sArrS.length; i++) {
-      expectEquals(i <= 10 ? 3 : 4, sArrS[i]);
-    }
-    SCrossOverLoopS();
-    for (int i = 0; i < sArrS.length; i++) {
-      expectEquals(i <= 20 ? 6 : 8, sArrS[i]);
-    }
-    // Type I.
-    sI = 4;
-    sArrI = new int[100];
-    SInvLoopI();
-    for (int i = 0; i < sArrI.length; i++) {
-      expectEquals(4, sArrI[i]);
-    }
-    SVarLoopI();
-    for (int i = 0; i < sArrI.length; i++) {
-      expectEquals(i <= 10 ? 4 : 5, sArrI[i]);
-    }
-    SCrossOverLoopI();
-    for (int i = 0; i < sArrI.length; i++) {
-      expectEquals(i <= 20 ? 7 : 9, sArrI[i]);
-    }
-    // Type J.
-    sJ = 5;
-    sArrJ = new long[100];
-    SInvLoopJ();
-    for (int i = 0; i < sArrJ.length; i++) {
-      expectEquals(5, sArrJ[i]);
-    }
-    SVarLoopJ();
-    for (int i = 0; i < sArrJ.length; i++) {
-      expectEquals(i <= 10 ? 5 : 6, sArrJ[i]);
-    }
-    SCrossOverLoopJ();
-    for (int i = 0; i < sArrJ.length; i++) {
-      expectEquals(i <= 20 ? 8 : 10, sArrJ[i]);
-    }
-    // Type F.
-    sF = 6.0f;
-    sArrF = new float[100];
-    SInvLoopF();
-    for (int i = 0; i < sArrF.length; i++) {
-      expectEquals(6, sArrF[i]);
-    }
-    SVarLoopF();
-    for (int i = 0; i < sArrF.length; i++) {
-      expectEquals(i <= 10 ? 6 : 7, sArrF[i]);
-    }
-    SCrossOverLoopF();
-    for (int i = 0; i < sArrF.length; i++) {
-      expectEquals(i <= 20 ? 9 : 11, sArrF[i]);
-    }
-    // Type D.
-    sD = 7.0;
-    sArrD = new double[100];
-    SInvLoopD();
-    for (int i = 0; i < sArrD.length; i++) {
-      expectEquals(7.0, sArrD[i]);
-    }
-    SVarLoopD();
-    for (int i = 0; i < sArrD.length; i++) {
-      expectEquals(i <= 10 ? 7 : 8, sArrD[i]);
-    }
-    SCrossOverLoopD();
-    for (int i = 0; i < sArrD.length; i++) {
-      expectEquals(i <= 20 ? 10 : 12, sArrD[i]);
-    }
-    // Type L.
-    sL = anObject;
-    sArrL = new Object[100];
-    SInvLoopL();
-    for (int i = 0; i < sArrL.length; i++) {
-      expectEquals(anObject, sArrL[i]);
-    }
-    SVarLoopL();
-    for (int i = 0; i < sArrL.length; i++) {
-      expectEquals(i <= 10 ? anObject : anotherObject, sArrL[i]);
-    }
-    SCrossOverLoopL();
-    for (int i = 0; i < sArrL.length; i++) {
-      expectEquals(i <= 20 ? anObject : anotherObject, sArrL[i]);
-    }
-  }
-
-  private void DoInstanceTests() {
-    // Type Z.
-    mZ = true;
-    mArrZ = new boolean[100];
-    InvLoopZ();
-    for (int i = 0; i < mArrZ.length; i++) {
-      expectEquals(true, mArrZ[i]);
-    }
-    VarLoopZ();
-    for (int i = 0; i < mArrZ.length; i++) {
-      expectEquals(i <= 10, mArrZ[i]);
-    }
-    CrossOverLoopZ();
-    for (int i = 0; i < mArrZ.length; i++) {
-      expectEquals(i <= 20, mArrZ[i]);
-    }
-    // Type B.
-    mB = 1;
-    mArrB = new byte[100];
-    InvLoopB();
-    for (int i = 0; i < mArrB.length; i++) {
-      expectEquals(1, mArrB[i]);
-    }
-    VarLoopB();
-    for (int i = 0; i < mArrB.length; i++) {
-      expectEquals(i <= 10 ? 1 : 2, mArrB[i]);
-    }
-    CrossOverLoopB();
-    for (int i = 0; i < mArrB.length; i++) {
-      expectEquals(i <= 20 ? 4 : 6, mArrB[i]);
-    }
-    // Type C.
-    mC = 2;
-    mArrC = new char[100];
-    InvLoopC();
-    for (int i = 0; i < mArrC.length; i++) {
-      expectEquals(2, mArrC[i]);
-    }
-    VarLoopC();
-    for (int i = 0; i < mArrC.length; i++) {
-      expectEquals(i <= 10 ? 2 : 3, mArrC[i]);
-    }
-    CrossOverLoopC();
-    for (int i = 0; i < mArrC.length; i++) {
-      expectEquals(i <= 20 ? 5 : 7, mArrC[i]);
-    }
-    // Type S.
-    mS = 3;
-    mArrS = new short[100];
-    InvLoopS();
-    for (int i = 0; i < mArrS.length; i++) {
-      expectEquals(3, mArrS[i]);
-    }
-    VarLoopS();
-    for (int i = 0; i < mArrS.length; i++) {
-      expectEquals(i <= 10 ? 3 : 4, mArrS[i]);
-    }
-    CrossOverLoopS();
-    for (int i = 0; i < mArrS.length; i++) {
-      expectEquals(i <= 20 ? 6 : 8, mArrS[i]);
-    }
-    // Type I.
-    mI = 4;
-    mArrI = new int[100];
-    InvLoopI();
-    for (int i = 0; i < mArrI.length; i++) {
-      expectEquals(4, mArrI[i]);
-    }
-    VarLoopI();
-    for (int i = 0; i < mArrI.length; i++) {
-      expectEquals(i <= 10 ? 4 : 5, mArrI[i]);
-    }
-    CrossOverLoopI();
-    for (int i = 0; i < mArrI.length; i++) {
-      expectEquals(i <= 20 ? 7 : 9, mArrI[i]);
-    }
-    // Type J.
-    mJ = 5;
-    mArrJ = new long[100];
-    InvLoopJ();
-    for (int i = 0; i < mArrJ.length; i++) {
-      expectEquals(5, mArrJ[i]);
-    }
-    VarLoopJ();
-    for (int i = 0; i < mArrJ.length; i++) {
-      expectEquals(i <= 10 ? 5 : 6, mArrJ[i]);
-    }
-    CrossOverLoopJ();
-    for (int i = 0; i < mArrJ.length; i++) {
-      expectEquals(i <= 20 ? 8 : 10, mArrJ[i]);
-    }
-    // Type F.
-    mF = 6.0f;
-    mArrF = new float[100];
-    InvLoopF();
-    for (int i = 0; i < mArrF.length; i++) {
-      expectEquals(6, mArrF[i]);
-    }
-    VarLoopF();
-    for (int i = 0; i < mArrF.length; i++) {
-      expectEquals(i <= 10 ? 6 : 7, mArrF[i]);
-    }
-    CrossOverLoopF();
-    for (int i = 0; i < mArrF.length; i++) {
-      expectEquals(i <= 20 ? 9 : 11, mArrF[i]);
-    }
-    // Type D.
-    mD = 7.0;
-    mArrD = new double[100];
-    InvLoopD();
-    for (int i = 0; i < mArrD.length; i++) {
-      expectEquals(7.0, mArrD[i]);
-    }
-    VarLoopD();
-    for (int i = 0; i < mArrD.length; i++) {
-      expectEquals(i <= 10 ? 7 : 8, mArrD[i]);
-    }
-    CrossOverLoopD();
-    for (int i = 0; i < mArrD.length; i++) {
-      expectEquals(i <= 20 ? 10 : 12, mArrD[i]);
-    }
-    // Type L.
-    mL = anObject;
-    mArrL = new Object[100];
-    InvLoopL();
-    for (int i = 0; i < mArrL.length; i++) {
-      expectEquals(anObject, mArrL[i]);
-    }
-    VarLoopL();
-    for (int i = 0; i < mArrL.length; i++) {
-      expectEquals(i <= 10 ? anObject : anotherObject, mArrL[i]);
-    }
-    CrossOverLoopL();
-    for (int i = 0; i < mArrL.length; i++) {
-      expectEquals(i <= 20 ? anObject : anotherObject, mArrL[i]);
-    }
-  }
-
-  private static void expectEquals(boolean expected, boolean result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(byte expected, byte result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(char expected, char result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(short expected, short result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(int expected, int result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(long expected, long result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(float expected, float result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(double expected, double result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(Object expected, Object result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-}
diff --git a/test/525-checker-arrays-fields1/expected.txt b/test/525-checker-arrays-fields1/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/525-checker-arrays-fields1/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/525-checker-arrays-fields1/info.txt b/test/525-checker-arrays-fields1/info.txt
new file mode 100644
index 0000000..7d0a088
--- /dev/null
+++ b/test/525-checker-arrays-fields1/info.txt
@@ -0,0 +1 @@
+Test on (in)variant static field and array references in loops.
diff --git a/test/525-checker-arrays-fields1/src/Main.java b/test/525-checker-arrays-fields1/src/Main.java
new file mode 100644
index 0000000..ba0476a
--- /dev/null
+++ b/test/525-checker-arrays-fields1/src/Main.java
@@ -0,0 +1,711 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on (in)variant static field and array references in loops.
+//
+public class Main {
+
+  private static Object anObject = new Object();
+  private static Object anotherObject = new Object();
+
+  //
+  // Static fields.
+  //
+
+  private static boolean sZ;
+  private static byte sB;
+  private static char sC;
+  private static short sS;
+  private static int sI;
+  private static long sJ;
+  private static float sF;
+  private static double sD;
+  private static Object sL;
+
+  //
+  // Static arrays.
+  //
+
+  private static boolean[] sArrZ;
+  private static byte[] sArrB;
+  private static char[] sArrC;
+  private static short[] sArrS;
+  private static int[] sArrI;
+  private static long[] sArrJ;
+  private static float[] sArrF;
+  private static double[] sArrD;
+  private static Object[] sArrL;
+
+  //
+  // Loops on static arrays with invariant static field references.
+  // The checker is used to ensure hoisting occurred.
+  //
+
+  /// CHECK-START: void Main.InvLoopZ() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopZ() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopZ() {
+    for (int i = 0; i < sArrZ.length; i++) {
+      sArrZ[i] = sZ;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopB() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopB() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopB() {
+    for (int i = 0; i < sArrB.length; i++) {
+      sArrB[i] = sB;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopC() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopC() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopC() {
+    for (int i = 0; i < sArrC.length; i++) {
+      sArrC[i] = sC;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopS() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopS() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopS() {
+    for (int i = 0; i < sArrS.length; i++) {
+      sArrS[i] = sS;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopI() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopI() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopI() {
+    for (int i = 0; i < sArrI.length; i++) {
+      sArrI[i] = sI;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopJ() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopJ() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopJ() {
+    for (int i = 0; i < sArrJ.length; i++) {
+      sArrJ[i] = sJ;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopF() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopF() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopF() {
+    for (int i = 0; i < sArrF.length; i++) {
+      sArrF[i] = sF;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopD() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopD() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopD() {
+    for (int i = 0; i < sArrD.length; i++) {
+      sArrD[i] = sD;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopL() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopL() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopL() {
+    for (int i = 0; i < sArrL.length; i++) {
+      sArrL[i] = sL;
+    }
+  }
+
+  //
+  // Loops on static arrays with variant static field references.
+  // Incorrect hoisting is detected by incorrect outcome.
+  //
+
+  private static void VarLoopZ() {
+    for (int i = 0; i < sArrZ.length; i++) {
+      sArrZ[i] = sZ;
+      if (i == 10)
+        sZ = !sZ;
+    }
+  }
+
+  private static void VarLoopB() {
+    for (int i = 0; i < sArrB.length; i++) {
+      sArrB[i] = sB;
+      if (i == 10)
+        sB++;
+    }
+  }
+
+  private static void VarLoopC() {
+    for (int i = 0; i < sArrC.length; i++) {
+      sArrC[i] = sC;
+      if (i == 10)
+        sC++;
+    }
+  }
+
+  private static void VarLoopS() {
+    for (int i = 0; i < sArrS.length; i++) {
+      sArrS[i] = sS;
+      if (i == 10)
+        sS++;
+    }
+  }
+
+  private static void VarLoopI() {
+    for (int i = 0; i < sArrI.length; i++) {
+      sArrI[i] = sI;
+      if (i == 10)
+        sI++;
+    }
+  }
+
+  private static void VarLoopJ() {
+    for (int i = 0; i < sArrJ.length; i++) {
+      sArrJ[i] = sJ;
+      if (i == 10)
+        sJ++;
+    }
+  }
+
+  private static void VarLoopF() {
+    for (int i = 0; i < sArrF.length; i++) {
+      sArrF[i] = sF;
+      if (i == 10)
+        sF++;
+    }
+  }
+
+  private static void VarLoopD() {
+    for (int i = 0; i < sArrD.length; i++) {
+      sArrD[i] = sD;
+      if (i == 10)
+        sD++;
+    }
+  }
+
+  private static void VarLoopL() {
+    for (int i = 0; i < sArrL.length; i++) {
+      sArrL[i] = sL;
+      if (i == 10)
+        sL = anotherObject;
+    }
+  }
+
+  //
+  // Loops on static arrays with a cross-over reference.
+  // Incorrect hoisting is detected by incorrect outcome.
+  // In addition, the checker is used to detect no hoisting.
+  //
+
+  /// CHECK-START: void Main.CrossOverLoopZ() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopZ() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopZ() {
+    sArrZ[20] = false;
+    for (int i = 0; i < sArrZ.length; i++) {
+      sArrZ[i] = !sArrZ[20];
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopB() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopB() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopB() {
+    sArrB[20] = 11;
+    for (int i = 0; i < sArrB.length; i++) {
+      sArrB[i] = (byte)(sArrB[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopC() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopC() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopC() {
+    sArrC[20] = 11;
+    for (int i = 0; i < sArrC.length; i++) {
+      sArrC[i] = (char)(sArrC[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopS() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopS() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopS() {
+    sArrS[20] = 11;
+    for (int i = 0; i < sArrS.length; i++) {
+      sArrS[i] = (short)(sArrS[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopI() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopI() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopI() {
+    sArrI[20] = 11;
+    for (int i = 0; i < sArrI.length; i++) {
+      sArrI[i] = sArrI[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopJ() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopJ() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopJ() {
+    sArrJ[20] = 11;
+    for (int i = 0; i < sArrJ.length; i++) {
+      sArrJ[i] = sArrJ[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopF() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopF() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopF() {
+    sArrF[20] = 11;
+    for (int i = 0; i < sArrF.length; i++) {
+      sArrF[i] = sArrF[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopD() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopD() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopD() {
+    sArrD[20] = 11;
+    for (int i = 0; i < sArrD.length; i++) {
+      sArrD[i] = sArrD[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopL() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopL() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopL() {
+    sArrL[20] = anotherObject;
+    for (int i = 0; i < sArrL.length; i++) {
+      sArrL[i] = (sArrL[20] == anObject) ? anotherObject : anObject;
+    }
+  }
+
+  //
+  // False cross-over loops on static arrays with data types (I/F and J/D) that used
+  // to be aliased in an older version of the compiler. This alias has been removed,
+  // however, which enables hoisting the invariant array reference.
+  //
+
+  /// CHECK-START: void Main.FalseCrossOverLoop1() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop1() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void FalseCrossOverLoop1() {
+    sArrF[20] = -1;
+    for (int i = 0; i < sArrI.length; i++) {
+      sArrI[i] = (int) sArrF[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop2() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop2() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void FalseCrossOverLoop2() {
+    sArrI[20] = -2;
+    for (int i = 0; i < sArrF.length; i++) {
+      sArrF[i] = sArrI[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop3() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop3() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void FalseCrossOverLoop3() {
+    sArrD[20] = -3;
+    for (int i = 0; i < sArrJ.length; i++) {
+      sArrJ[i] = (long) sArrD[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop4() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop4() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void FalseCrossOverLoop4() {
+    sArrJ[20] = -4;
+    for (int i = 0; i < sArrD.length; i++) {
+      sArrD[i] = sArrJ[20] - 2;
+    }
+  }
+
+  //
+  // Main driver and testers.
+  //
+
+  public static void main(String[] args) {
+    DoStaticTests();
+    System.out.println("passed");
+  }
+
+  private static void DoStaticTests() {
+    // Type Z.
+    sZ = true;
+    sArrZ = new boolean[100];
+    InvLoopZ();
+    for (int i = 0; i < sArrZ.length; i++) {
+      expectEquals(true, sArrZ[i]);
+    }
+    VarLoopZ();
+    for (int i = 0; i < sArrZ.length; i++) {
+      expectEquals(i <= 10, sArrZ[i]);
+    }
+    CrossOverLoopZ();
+    for (int i = 0; i < sArrZ.length; i++) {
+      expectEquals(i <= 20, sArrZ[i]);
+    }
+    // Type B.
+    sB = 1;
+    sArrB = new byte[100];
+    InvLoopB();
+    for (int i = 0; i < sArrB.length; i++) {
+      expectEquals(1, sArrB[i]);
+    }
+    VarLoopB();
+    for (int i = 0; i < sArrB.length; i++) {
+      expectEquals(i <= 10 ? 1 : 2, sArrB[i]);
+    }
+    CrossOverLoopB();
+    for (int i = 0; i < sArrB.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrB[i]);
+    }
+    // Type C.
+    sC = 2;
+    sArrC = new char[100];
+    InvLoopC();
+    for (int i = 0; i < sArrC.length; i++) {
+      expectEquals(2, sArrC[i]);
+    }
+    VarLoopC();
+    for (int i = 0; i < sArrC.length; i++) {
+      expectEquals(i <= 10 ? 2 : 3, sArrC[i]);
+    }
+    CrossOverLoopC();
+    for (int i = 0; i < sArrC.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrC[i]);
+    }
+    // Type S.
+    sS = 3;
+    sArrS = new short[100];
+    InvLoopS();
+    for (int i = 0; i < sArrS.length; i++) {
+      expectEquals(3, sArrS[i]);
+    }
+    VarLoopS();
+    for (int i = 0; i < sArrS.length; i++) {
+      expectEquals(i <= 10 ? 3 : 4, sArrS[i]);
+    }
+    CrossOverLoopS();
+    for (int i = 0; i < sArrS.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrS[i]);
+    }
+    // Type I.
+    sI = 4;
+    sArrI = new int[100];
+    InvLoopI();
+    for (int i = 0; i < sArrI.length; i++) {
+      expectEquals(4, sArrI[i]);
+    }
+    VarLoopI();
+    for (int i = 0; i < sArrI.length; i++) {
+      expectEquals(i <= 10 ? 4 : 5, sArrI[i]);
+    }
+    CrossOverLoopI();
+    for (int i = 0; i < sArrI.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrI[i]);
+    }
+    // Type J.
+    sJ = 5;
+    sArrJ = new long[100];
+    InvLoopJ();
+    for (int i = 0; i < sArrJ.length; i++) {
+      expectEquals(5, sArrJ[i]);
+    }
+    VarLoopJ();
+    for (int i = 0; i < sArrJ.length; i++) {
+      expectEquals(i <= 10 ? 5 : 6, sArrJ[i]);
+    }
+    CrossOverLoopJ();
+    for (int i = 0; i < sArrJ.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrJ[i]);
+    }
+    // Type F.
+    sF = 6.0f;
+    sArrF = new float[100];
+    InvLoopF();
+    for (int i = 0; i < sArrF.length; i++) {
+      expectEquals(6, sArrF[i]);
+    }
+    VarLoopF();
+    for (int i = 0; i < sArrF.length; i++) {
+      expectEquals(i <= 10 ? 6 : 7, sArrF[i]);
+    }
+    CrossOverLoopF();
+    for (int i = 0; i < sArrF.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrF[i]);
+    }
+    // Type D.
+    sD = 7.0;
+    sArrD = new double[100];
+    InvLoopD();
+    for (int i = 0; i < sArrD.length; i++) {
+      expectEquals(7.0, sArrD[i]);
+    }
+    VarLoopD();
+    for (int i = 0; i < sArrD.length; i++) {
+      expectEquals(i <= 10 ? 7 : 8, sArrD[i]);
+    }
+    CrossOverLoopD();
+    for (int i = 0; i < sArrD.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrD[i]);
+    }
+    // Type L.
+    sL = anObject;
+    sArrL = new Object[100];
+    InvLoopL();
+    for (int i = 0; i < sArrL.length; i++) {
+      expectEquals(anObject, sArrL[i]);
+    }
+    VarLoopL();
+    for (int i = 0; i < sArrL.length; i++) {
+      expectEquals(i <= 10 ? anObject : anotherObject, sArrL[i]);
+    }
+    CrossOverLoopL();
+    for (int i = 0; i < sArrL.length; i++) {
+      expectEquals(i <= 20 ? anObject : anotherObject, sArrL[i]);
+    }
+    // False cross-over.
+    FalseCrossOverLoop1();
+    for (int i = 0; i < sArrI.length; i++) {
+      expectEquals(-3, sArrI[i]);
+    }
+    FalseCrossOverLoop2();
+    for (int i = 0; i < sArrF.length; i++) {
+      expectEquals(-4, sArrF[i]);
+    }
+    FalseCrossOverLoop3();
+    for (int i = 0; i < sArrJ.length; i++) {
+      expectEquals(-5, sArrJ[i]);
+    }
+    FalseCrossOverLoop4();
+    for (int i = 0; i < sArrD.length; i++) {
+      expectEquals(-6, sArrD[i]);
+    }
+  }
+
+  private static void expectEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(Object expected, Object result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/525-checker-arrays-fields2/expected.txt b/test/525-checker-arrays-fields2/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/525-checker-arrays-fields2/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/525-checker-arrays-fields2/info.txt b/test/525-checker-arrays-fields2/info.txt
new file mode 100644
index 0000000..3464e54
--- /dev/null
+++ b/test/525-checker-arrays-fields2/info.txt
@@ -0,0 +1 @@
+Test on (in)variant instance field and array references in loops.
diff --git a/test/525-checker-arrays-fields2/src/Main.java b/test/525-checker-arrays-fields2/src/Main.java
new file mode 100644
index 0000000..2aa40fc
--- /dev/null
+++ b/test/525-checker-arrays-fields2/src/Main.java
@@ -0,0 +1,711 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on (in)variant instance field and array references in loops.
+//
+public class Main {
+
+  private static Object anObject = new Object();
+  private static Object anotherObject = new Object();
+
+  //
+  // Instance fields.
+  //
+
+  private boolean mZ;
+  private byte mB;
+  private char mC;
+  private short mS;
+  private int mI;
+  private long mJ;
+  private float mF;
+  private double mD;
+  private Object mL;
+
+  //
+  // Instance arrays.
+  //
+
+  private boolean[] mArrZ;
+  private byte[] mArrB;
+  private char[] mArrC;
+  private short[] mArrS;
+  private int[] mArrI;
+  private long[] mArrJ;
+  private float[] mArrF;
+  private double[] mArrD;
+  private Object[] mArrL;
+
+  //
+  // Loops on instance arrays with invariant instance field references.
+  // The checker is used to ensure hoisting occurred.
+  //
+
+  /// CHECK-START: void Main.InvLoopZ() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopZ() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopZ() {
+    for (int i = 0; i < mArrZ.length; i++) {
+      mArrZ[i] = mZ;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopB() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopB() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopB() {
+    for (int i = 0; i < mArrB.length; i++) {
+      mArrB[i] = mB;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopC() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopC() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopC() {
+    for (int i = 0; i < mArrC.length; i++) {
+      mArrC[i] = mC;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopS() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopS() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopS() {
+    for (int i = 0; i < mArrS.length; i++) {
+      mArrS[i] = mS;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopI() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopI() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopI() {
+    for (int i = 0; i < mArrI.length; i++) {
+      mArrI[i] = mI;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopJ() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopJ() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopJ() {
+    for (int i = 0; i < mArrJ.length; i++) {
+      mArrJ[i] = mJ;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopF() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopF() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopF() {
+    for (int i = 0; i < mArrF.length; i++) {
+      mArrF[i] = mF;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopD() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopD() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopD() {
+    for (int i = 0; i < mArrD.length; i++) {
+      mArrD[i] = mD;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopL() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopL() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopL() {
+    for (int i = 0; i < mArrL.length; i++) {
+      mArrL[i] = mL;
+    }
+  }
+
+  //
+  // Loops on instance arrays with variant instance field references.
+  // Incorrect hoisting is detected by incorrect outcome.
+  //
+
+  private void VarLoopZ() {
+    for (int i = 0; i < mArrZ.length; i++) {
+      mArrZ[i] = mZ;
+      if (i == 10)
+        mZ = !mZ;
+    }
+  }
+
+  private void VarLoopB() {
+    for (int i = 0; i < mArrB.length; i++) {
+      mArrB[i] = mB;
+      if (i == 10)
+        mB++;
+    }
+  }
+
+  private void VarLoopC() {
+    for (int i = 0; i < mArrC.length; i++) {
+      mArrC[i] = mC;
+      if (i == 10)
+        mC++;
+    }
+  }
+
+  private void VarLoopS() {
+    for (int i = 0; i < mArrS.length; i++) {
+      mArrS[i] = mS;
+      if (i == 10)
+        mS++;
+    }
+  }
+
+  private void VarLoopI() {
+    for (int i = 0; i < mArrI.length; i++) {
+      mArrI[i] = mI;
+      if (i == 10)
+        mI++;
+    }
+  }
+
+  private void VarLoopJ() {
+    for (int i = 0; i < mArrJ.length; i++) {
+      mArrJ[i] = mJ;
+      if (i == 10)
+        mJ++;
+    }
+  }
+
+  private void VarLoopF() {
+    for (int i = 0; i < mArrF.length; i++) {
+      mArrF[i] = mF;
+      if (i == 10)
+        mF++;
+    }
+  }
+
+  private void VarLoopD() {
+    for (int i = 0; i < mArrD.length; i++) {
+      mArrD[i] = mD;
+      if (i == 10)
+        mD++;
+    }
+  }
+
+  private void VarLoopL() {
+    for (int i = 0; i < mArrL.length; i++) {
+      mArrL[i] = mL;
+      if (i == 10)
+        mL = anotherObject;
+    }
+  }
+
+  //
+  // Loops on instance arrays with a cross-over reference.
+  // Incorrect hoisting is detected by incorrect outcome.
+  // In addition, the checker is used to detect no hoisting.
+  //
+
+  /// CHECK-START: void Main.CrossOverLoopZ() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopZ() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopZ() {
+    mArrZ[20] = false;
+    for (int i = 0; i < mArrZ.length; i++) {
+      mArrZ[i] = !mArrZ[20];
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopB() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopB() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopB() {
+    mArrB[20] = 111;
+    for (int i = 0; i < mArrB.length; i++) {
+      mArrB[i] = (byte)(mArrB[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopC() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopC() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopC() {
+    mArrC[20] = 111;
+    for (int i = 0; i < mArrC.length; i++) {
+      mArrC[i] = (char)(mArrC[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopS() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopS() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopS() {
+    mArrS[20] = 111;
+    for (int i = 0; i < mArrS.length; i++) {
+      mArrS[i] = (short)(mArrS[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopI() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopI() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopI() {
+    mArrI[20] = 111;
+    for (int i = 0; i < mArrI.length; i++) {
+      mArrI[i] = mArrI[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopJ() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopJ() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopJ() {
+    mArrJ[20] = 111;
+    for (int i = 0; i < mArrJ.length; i++) {
+      mArrJ[i] = mArrJ[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopF() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopF() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopF() {
+    mArrF[20] = 111;
+    for (int i = 0; i < mArrF.length; i++) {
+      mArrF[i] = mArrF[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopD() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopD() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopD() {
+    mArrD[20] = 111;
+    for (int i = 0; i < mArrD.length; i++) {
+      mArrD[i] = mArrD[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopL() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopL() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopL() {
+    mArrL[20] = anotherObject;
+    for (int i = 0; i < mArrL.length; i++) {
+      mArrL[i] = (mArrL[20] == anObject) ? anotherObject : anObject;
+    }
+  }
+
+  //
+  // False cross-over loops on instance arrays with data types (I/F and J/D) that used
+  // to be aliased in an older version of the compiler. This alias has been removed,
+  // however, which enables hoisting the invariant array reference.
+  //
+
+  /// CHECK-START: void Main.FalseCrossOverLoop1() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop1() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void FalseCrossOverLoop1() {
+    mArrF[20] = -1;
+    for (int i = 0; i < mArrI.length; i++) {
+      mArrI[i] = (int) mArrF[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop2() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop2() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void FalseCrossOverLoop2() {
+    mArrI[20] = -2;
+    for (int i = 0; i < mArrF.length; i++) {
+      mArrF[i] = mArrI[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop3() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop3() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void FalseCrossOverLoop3() {
+    mArrD[20] = -3;
+    for (int i = 0; i < mArrJ.length; i++) {
+      mArrJ[i] = (long) mArrD[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop4() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop4() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void FalseCrossOverLoop4() {
+    mArrJ[20] = -4;
+    for (int i = 0; i < mArrD.length; i++) {
+      mArrD[i] = mArrJ[20] - 2;
+    }
+  }
+
+  //
+  // Main driver and testers.
+  //
+
+  public static void main(String[] args) {
+    new Main().DoInstanceTests();
+    System.out.println("passed");
+  }
+
+  private void DoInstanceTests() {
+    // Type Z.
+    mZ = true;
+    mArrZ = new boolean[100];
+    InvLoopZ();
+    for (int i = 0; i < mArrZ.length; i++) {
+      expectEquals(true, mArrZ[i]);
+    }
+    VarLoopZ();
+    for (int i = 0; i < mArrZ.length; i++) {
+      expectEquals(i <= 10, mArrZ[i]);
+    }
+    CrossOverLoopZ();
+    for (int i = 0; i < mArrZ.length; i++) {
+      expectEquals(i <= 20, mArrZ[i]);
+    }
+    // Type B.
+    mB = 1;
+    mArrB = new byte[100];
+    InvLoopB();
+    for (int i = 0; i < mArrB.length; i++) {
+      expectEquals(1, mArrB[i]);
+    }
+    VarLoopB();
+    for (int i = 0; i < mArrB.length; i++) {
+      expectEquals(i <= 10 ? 1 : 2, mArrB[i]);
+    }
+    CrossOverLoopB();
+    for (int i = 0; i < mArrB.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrB[i]);
+    }
+    // Type C.
+    mC = 2;
+    mArrC = new char[100];
+    InvLoopC();
+    for (int i = 0; i < mArrC.length; i++) {
+      expectEquals(2, mArrC[i]);
+    }
+    VarLoopC();
+    for (int i = 0; i < mArrC.length; i++) {
+      expectEquals(i <= 10 ? 2 : 3, mArrC[i]);
+    }
+    CrossOverLoopC();
+    for (int i = 0; i < mArrC.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrC[i]);
+    }
+    // Type S.
+    mS = 3;
+    mArrS = new short[100];
+    InvLoopS();
+    for (int i = 0; i < mArrS.length; i++) {
+      expectEquals(3, mArrS[i]);
+    }
+    VarLoopS();
+    for (int i = 0; i < mArrS.length; i++) {
+      expectEquals(i <= 10 ? 3 : 4, mArrS[i]);
+    }
+    CrossOverLoopS();
+    for (int i = 0; i < mArrS.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrS[i]);
+    }
+    // Type I.
+    mI = 4;
+    mArrI = new int[100];
+    InvLoopI();
+    for (int i = 0; i < mArrI.length; i++) {
+      expectEquals(4, mArrI[i]);
+    }
+    VarLoopI();
+    for (int i = 0; i < mArrI.length; i++) {
+      expectEquals(i <= 10 ? 4 : 5, mArrI[i]);
+    }
+    CrossOverLoopI();
+    for (int i = 0; i < mArrI.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrI[i]);
+    }
+    // Type J.
+    mJ = 5;
+    mArrJ = new long[100];
+    InvLoopJ();
+    for (int i = 0; i < mArrJ.length; i++) {
+      expectEquals(5, mArrJ[i]);
+    }
+    VarLoopJ();
+    for (int i = 0; i < mArrJ.length; i++) {
+      expectEquals(i <= 10 ? 5 : 6, mArrJ[i]);
+    }
+    CrossOverLoopJ();
+    for (int i = 0; i < mArrJ.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrJ[i]);
+    }
+    // Type F.
+    mF = 6.0f;
+    mArrF = new float[100];
+    InvLoopF();
+    for (int i = 0; i < mArrF.length; i++) {
+      expectEquals(6, mArrF[i]);
+    }
+    VarLoopF();
+    for (int i = 0; i < mArrF.length; i++) {
+      expectEquals(i <= 10 ? 6 : 7, mArrF[i]);
+    }
+    CrossOverLoopF();
+    for (int i = 0; i < mArrF.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrF[i]);
+    }
+    // Type D.
+    mD = 7.0;
+    mArrD = new double[100];
+    InvLoopD();
+    for (int i = 0; i < mArrD.length; i++) {
+      expectEquals(7.0, mArrD[i]);
+    }
+    VarLoopD();
+    for (int i = 0; i < mArrD.length; i++) {
+      expectEquals(i <= 10 ? 7 : 8, mArrD[i]);
+    }
+    CrossOverLoopD();
+    for (int i = 0; i < mArrD.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrD[i]);
+    }
+    // Type L.
+    mL = anObject;
+    mArrL = new Object[100];
+    InvLoopL();
+    for (int i = 0; i < mArrL.length; i++) {
+      expectEquals(anObject, mArrL[i]);
+    }
+    VarLoopL();
+    for (int i = 0; i < mArrL.length; i++) {
+      expectEquals(i <= 10 ? anObject : anotherObject, mArrL[i]);
+    }
+    CrossOverLoopL();
+    for (int i = 0; i < mArrL.length; i++) {
+      expectEquals(i <= 20 ? anObject : anotherObject, mArrL[i]);
+    }
+    // False cross-over.
+    FalseCrossOverLoop1();
+    for (int i = 0; i < mArrI.length; i++) {
+      expectEquals(-3, mArrI[i]);
+    }
+    FalseCrossOverLoop2();
+    for (int i = 0; i < mArrF.length; i++) {
+      expectEquals(-4, mArrF[i]);
+    }
+    FalseCrossOverLoop3();
+    for (int i = 0; i < mArrJ.length; i++) {
+      expectEquals(-5, mArrJ[i]);
+    }
+    FalseCrossOverLoop4();
+    for (int i = 0; i < mArrD.length; i++) {
+      expectEquals(-6, mArrD[i]);
+    }
+  }
+
+  private static void expectEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(Object expected, Object result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/527-checker-array-access-split/src/Main.java b/test/527-checker-array-access-split/src/Main.java
index ead9446..9435ef1 100644
--- a/test/527-checker-array-access-split/src/Main.java
+++ b/test/527-checker-array-access-split/src/Main.java
@@ -34,9 +34,21 @@
   /// CHECK-START-ARM64: int Main.constantIndexGet(int[]) instruction_simplifier_arm64 (after)
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
   /// CHECK:                                    ArrayGet [<<Array>>,<<Index>>]
 
+
+  /// CHECK-START-ARM: int Main.constantIndexGet(int[]) instruction_simplifier_arm (before)
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:                                    ArrayGet [<<Array>>,<<Index>>]
+
+  /// CHECK-START-ARM: int Main.constantIndexGet(int[]) instruction_simplifier_arm (after)
+  /// CHECK:           <<Array:l\d+>>         NullCheck
+  /// CHECK:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-NOT:                              IntermediateAddress
+  /// CHECK:                                  ArrayGet [<<Array>>,<<Index>>]
+
   public static int constantIndexGet(int array[]) {
     return array[1];
   }
@@ -55,10 +67,23 @@
   /// CHECK:             <<Const2:i\d+>>        IntConstant 2
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
   /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Const2>>]
 
 
+  /// CHECK-START-ARM:   void Main.constantIndexSet(int[]) instruction_simplifier_arm (before)
+  /// CHECK:             <<Const2:i\d+>>        IntConstant 2
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Const2>>]
+
+  /// CHECK-START-ARM:   void Main.constantIndexSet(int[]) instruction_simplifier_arm (after)
+  /// CHECK:             <<Const2:i\d+>>        IntConstant 2
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK-NOT:                                IntermediateAddress
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Const2>>]
+
   public static void constantIndexSet(int array[]) {
     array[1] = 2;
   }
@@ -76,7 +101,20 @@
   /// CHECK:             <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArrayGet [<<Address>>,<<Index>>]
+
+
+  /// CHECK-START-ARM:   int Main.get(int[], int) instruction_simplifier_arm (before)
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:                                    ArrayGet [<<Array>>,<<Index>>]
+
+  /// CHECK-START-ARM:   int Main.get(int[], int) instruction_simplifier_arm (after)
+  /// CHECK:             <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArrayGet [<<Address>>,<<Index>>]
 
   public static int get(int array[], int index) {
@@ -102,7 +140,26 @@
   /// CHECK:             <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address>>,<<Index>>,<<Arg>>]
+
+
+  /// CHECK-START-ARM:   void Main.set(int[], int, int) instruction_simplifier_arm (before)
+  /// CHECK:                                    ParameterValue
+  /// CHECK:                                    ParameterValue
+  /// CHECK:             <<Arg:i\d+>>           ParameterValue
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Arg>>]
+
+  /// CHECK-START-ARM:   void Main.set(int[], int, int) instruction_simplifier_arm (after)
+  /// CHECK:                                    ParameterValue
+  /// CHECK:                                    ParameterValue
+  /// CHECK:             <<Arg:i\d+>>           ParameterValue
+  /// CHECK:             <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address>>,<<Index>>,<<Arg>>]
 
   public static void set(int array[], int index, int value) {
@@ -126,23 +183,53 @@
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
-  /// CHECK:             <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
-  /// CHECK-START-ARM64: void Main.getSet(int[], int) GVN_after_arch (after)
+  /// CHECK-START-ARM64: void Main.getSet(int[], int) GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
-  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
   /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Add>>]
 
+
+  /// CHECK-START-ARM:   void Main.getSet(int[], int) instruction_simplifier_arm (before)
+  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   void Main.getSet(int[], int) instruction_simplifier_arm (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   void Main.getSet(int[], int) GVN$after_arch (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK-NOT:                                IntermediateAddress
+  /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Add>>]
   public static void getSet(int array[], int index) {
     array[index] = array[index] + 1;
   }
@@ -166,23 +253,57 @@
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
   /// CHECK:                                    NewArray
-  /// CHECK:             <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
-  /// CHECK-START-ARM64: int[] Main.accrossGC(int[], int) GVN_after_arch (after)
+  /// CHECK-START-ARM64: int[] Main.accrossGC(int[], int) GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
   /// CHECK:                                    NewArray
-  /// CHECK:             <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:                                    ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+
+  /// CHECK-START-ARM:   int[] Main.accrossGC(int[], int) instruction_simplifier_arm (before)
+  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    NewArray
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int[] Main.accrossGC(int[], int) instruction_simplifier_arm (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    NewArray
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int[] Main.accrossGC(int[], int) GVN$after_arch (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    NewArray
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:                                    ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
   public static int[] accrossGC(int array[], int index) {
@@ -196,6 +317,14 @@
    * Test that the intermediate address is shared between array accesses after
    * the bounds check have been removed by BCE.
    */
+  // For checker tests `instruction_simplifier_<arch> (after)` below, by the time we reach
+  // the architecture-specific instruction simplifier, BCE has removed the bounds checks in
+  // the loop.
+
+  // Note that we do not care that the `DataOffset` is `12`. But if we do not
+  // specify it and any other `IntConstant` appears before that instruction,
+  // checker will match the previous `IntConstant`, and we will thus fail the
+  // check.
 
   /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (before)
   /// CHECK:             <<Const1:i\d+>>        IntConstant 1
@@ -207,14 +336,6 @@
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
   /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
 
-  // By the time we reach the architecture-specific instruction simplifier, BCE
-  // has removed the bounds checks in the loop.
-
-  // Note that we do not care that the `DataOffset` is `12`. But if we do not
-  // specify it and any other `IntConstant` appears before that instruction,
-  // checker will match the previous `IntConstant`, and we will thus fail the
-  // check.
-
   /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
@@ -222,23 +343,60 @@
   /// CHECK:             <<Index:i\d+>>         Phi
   /// CHECK:                                    If
   //  -------------- Loop
-  /// CHECK:             <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
-  /// CHECK:             <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
-  /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() GVN_after_arch (after)
+  /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
   /// CHECK:             <<Array:l\d+>>         NewArray
   /// CHECK:             <<Index:i\d+>>         Phi
   /// CHECK:                                    If
   //  -------------- Loop
-  /// CHECK:             <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
-  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
+  /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Add>>]
+
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE1() instruction_simplifier_arm (before)
+  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE1() instruction_simplifier_arm (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE1() GVN$after_arch (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK-NOT:                                IntermediateAddress
   /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Add>>]
 
   public static int canMergeAfterBCE1() {
@@ -279,15 +437,15 @@
   /// CHECK:                                    If
   //  -------------- Loop
   /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
-  /// CHECK-DAG:         <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address1>>,<<Index>>]
-  /// CHECK-DAG:         <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address2>>,<<Index1>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
-  /// CHECK:             <<Address3:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address3:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:                                    ArraySet [<<Address3>>,<<Index1>>,<<Add>>]
 
-  /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after)
+  /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
   /// CHECK:             <<Array:l\d+>>         NewArray
@@ -295,7 +453,7 @@
   /// CHECK:                                    If
   //  -------------- Loop
   /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
-  /// CHECK-DAG:         <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address>>,<<Index>>]
   /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address>>,<<Index1>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
@@ -303,9 +461,56 @@
 
   // There should be only one intermediate address computation in the loop.
 
-  /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after)
-  /// CHECK:                                    Arm64IntermediateAddress
-  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN$after_arch (after)
+  /// CHECK:                                    IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
+
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() instruction_simplifier_arm (before)
+  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
+  /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Array>>,<<Index1>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index1>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() instruction_simplifier_arm (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
+  /// CHECK-DAG:         <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK-DAG:         <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address2>>,<<Index1>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:             <<Address3:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:                                    ArraySet [<<Address3>>,<<Index1>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() GVN$after_arch (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
+  /// CHECK-DAG:         <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address>>,<<Index>>]
+  /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address>>,<<Index1>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:                                    ArraySet [<<Address>>,<<Index1>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() GVN$after_arch (after)
+  /// CHECK:                                    IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
 
   public static int canMergeAfterBCE2() {
     int[] array = {0, 1, 2, 3};
@@ -315,6 +520,37 @@
     return array[array.length - 1];
   }
 
+  /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (before)
+  /// CHECK-DAG:         <<Array1:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Array2:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Array3:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                ArrayGet [<<Array1>>,<<Index>>]
+  /// CHECK-DAG:                                ArrayGet [<<Array2>>,<<Index>>]
+  /// CHECK-DAG:                                ArrayGet [<<Array3>>,<<Index>>]
+
+  /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (after)
+  /// CHECK-DAG:         <<Array1:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Array2:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Array3:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                ArrayGet [<<Array1>>,<<Index>>]
+  /// CHECK-DAG:                                ArrayGet [<<Array2>>,<<Index>>]
+  /// CHECK-DAG:                                ArrayGet [<<Array3>>,<<Index>>]
+
+  /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (after)
+  /// CHECK-NOT:                                IntermediateAddress
+  public static int checkLongFloatDouble() {
+    long[] array_long = {0, 1, 2, 3};
+    float[] array_float = {(float)0.0, (float)1.0, (float)2.0, (float)3.0};
+    double[] array_double = {0.0, 1.0, 2.0, 3.0};
+    double s = 0.0;
+
+    for (int i = 0; i < 4; i++) {
+      s += (double)array_long[i] + (double)array_float[i] + array_double[i];
+    }
+    return (int)s;
+  }
 
   public static void main(String[] args) {
     int[] array = {123, 456, 789};
@@ -337,5 +573,7 @@
 
     assertIntEquals(4, canMergeAfterBCE1());
     assertIntEquals(6, canMergeAfterBCE2());
+
+    assertIntEquals(18, checkLongFloatDouble());
   }
 }
diff --git a/test/529-checker-unresolved/build b/test/529-checker-unresolved/build
deleted file mode 100644
index 8c3c4f8..0000000
--- a/test/529-checker-unresolved/build
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Stop if something fails.
-set -e
-
-# We can't use src-ex testing infrastructure because src and src-ex are compiled
-# with javac independetely and can't share code (without reflection).
-
-mkdir classes
-${JAVAC} -d classes `find src -name '*.java'`
-
-mkdir classes-ex
-mv classes/UnresolvedClass.class classes-ex
-mv classes/UnresolvedInterface.class classes-ex
-mv classes/UnresolvedSuperClass.class classes-ex
-
-if [ ${USE_JACK} = "true" ]; then
-  # Create .jack files from classes generated with javac.
-  ${JILL} classes --output classes.jack
-  ${JILL} classes-ex --output classes-ex.jack
-
-  # Create DEX files from .jack files.
-  ${JACK} --import classes.jack --output-dex .
-  zip $TEST_NAME.jar classes.dex
-  ${JACK} --import classes-ex.jack --output-dex .
-  zip ${TEST_NAME}-ex.jar classes.dex
-else
-  if [ ${NEED_DEX} = "true" ]; then
-    ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
-    zip $TEST_NAME.jar classes.dex
-    ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
-    zip ${TEST_NAME}-ex.jar classes.dex
-  fi
-fi
diff --git a/test/529-checker-unresolved/expected.txt b/test/529-checker-unresolved/expected.txt
index 1e7dbfe..1590a2a 100644
--- a/test/529-checker-unresolved/expected.txt
+++ b/test/529-checker-unresolved/expected.txt
@@ -5,3 +5,6 @@
 UnresolvedClass.superMethod()
 instanceof ok
 checkcast ok
+UnresolvedClass.directCall()
+UnresolvedClass.directCall()
+UnresolvedClass.directCall()
diff --git a/test/529-checker-unresolved/src-dex2oat-unresolved/UnresolvedClass.java b/test/529-checker-unresolved/src-dex2oat-unresolved/UnresolvedClass.java
new file mode 100644
index 0000000..8b3bb3c
--- /dev/null
+++ b/test/529-checker-unresolved/src-dex2oat-unresolved/UnresolvedClass.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class UnresolvedClass extends UnresolvedSuperClass implements UnresolvedInterface {
+  static public void staticMethod() {
+    System.out.println("UnresolvedClass.staticMethod()");
+  }
+
+  public UnresolvedClass() {
+    System.out.println("UnresolvedClass.directCall()");
+  }
+
+  public void virtualMethod() {
+    System.out.println("UnresolvedClass.virtualMethod()");
+  }
+
+  public void interfaceMethod() {
+    System.out.println("UnresolvedClass.interfaceMethod()");
+  }
+
+  public static byte staticByte;
+  public static char staticChar;
+  public static int staticInt;
+  public static long staticLong;
+  public static float staticFloat;
+  public static double staticDouble;
+  public static Object staticObject;
+
+  public byte instanceByte;
+  public char instanceChar;
+  public int instanceInt;
+  public long instanceLong;
+  public float instanceFloat;
+  public double instanceDouble;
+  public Object instanceObject;
+}
+
diff --git a/test/529-checker-unresolved/src-dex2oat-unresolved/UnresolvedInterface.java b/test/529-checker-unresolved/src-dex2oat-unresolved/UnresolvedInterface.java
new file mode 100644
index 0000000..6e6b14b
--- /dev/null
+++ b/test/529-checker-unresolved/src-dex2oat-unresolved/UnresolvedInterface.java
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface UnresolvedInterface {
+  void interfaceMethod();
+}
diff --git a/test/529-checker-unresolved/src-dex2oat-unresolved/UnresolvedSuperClass.java b/test/529-checker-unresolved/src-dex2oat-unresolved/UnresolvedSuperClass.java
new file mode 100644
index 0000000..dd3be00
--- /dev/null
+++ b/test/529-checker-unresolved/src-dex2oat-unresolved/UnresolvedSuperClass.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class UnresolvedSuperClass {
+  public void superMethod() {
+    System.out.println("UnresolvedClass.superMethod()");
+  }
+}
diff --git a/test/529-checker-unresolved/src/Main.java b/test/529-checker-unresolved/src/Main.java
index 5219c04..7b5cbc1 100644
--- a/test/529-checker-unresolved/src/Main.java
+++ b/test/529-checker-unresolved/src/Main.java
@@ -77,6 +77,16 @@
     expectEquals(123456789123456789f, UnresolvedClass.staticFloat);
     expectEquals(123456789123456789d, UnresolvedClass.staticDouble);
     expectEquals(o, UnresolvedClass.staticObject);
+
+    // Check "large" values.
+
+    UnresolvedClass.staticByte = (byte)-1;
+    UnresolvedClass.staticChar = (char)32768;
+    UnresolvedClass.staticInt = -1;
+
+    expectEquals((byte)-1, UnresolvedClass.staticByte);
+    expectEquals((char)32768, UnresolvedClass.staticChar);
+    expectEquals(-1, UnresolvedClass.staticInt);
   }
 
   /// CHECK-START: void Main.callUnresolvedInstanceFieldAccess(UnresolvedClass) register (before)
@@ -114,6 +124,33 @@
     expectEquals(o, c.instanceObject);
   }
 
+  /// CHECK-START: void Main.callUnresolvedNull(UnresolvedClass) register (before)
+  /// CHECK-NOT: NullCheck
+  static public void callUnresolvedNull(UnresolvedClass c) {
+    int x = 0;
+    try {
+      x = c.instanceInt;
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+      x -= 1;
+    }
+    expectEquals(-1, x);
+    try {
+      c.instanceInt = -1;
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+      x -= 1;
+    }
+    expectEquals(-2, x);
+    try {
+      c.virtualMethod();
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+      x -= 1;
+    }
+    expectEquals(-3, x);
+  }
+
   static public void testInstanceOf(Object o) {
     if (o instanceof UnresolvedSuperClass) {
       System.out.println("instanceof ok");
@@ -136,8 +173,30 @@
     callInvokeUnresolvedSuper(m);
     callUnresolvedStaticFieldAccess();
     callUnresolvedInstanceFieldAccess(c);
+    callUnresolvedNull(null);
     testInstanceOf(m);
     testCheckCast(m);
+    testLicm(2);
+  }
+
+  /// CHECK-START: void Main.testLicm(int) licm (before)
+  /// CHECK:      <<Class:l\d+>>        LoadClass                                     loop:B2
+  /// CHECK-NEXT: <<Clinit:l\d+>>       ClinitCheck [<<Class>>]                       loop:B2
+  /// CHECK-NEXT: <<New:l\d+>>          NewInstance [<<Clinit>>,<<Method:[i|j]\d+>>]  loop:B2
+  /// CHECK-NEXT:                       InvokeUnresolved [<<New>>]                    loop:B2
+
+  /// CHECK-START: void Main.testLicm(int) licm (after)
+  /// CHECK:      <<Class:l\d+>>        LoadClass                                     loop:none
+  /// CHECK-NEXT: <<Clinit:l\d+>>       ClinitCheck [<<Class>>]                       loop:none
+  /// CHECK:      <<New:l\d+>>          NewInstance [<<Clinit>>,<<Method:[i|j]\d+>>]  loop:B2
+  /// CHECK-NEXT:                       InvokeUnresolved [<<New>>]                    loop:B2
+  static public void testLicm(int count) {
+    // Test to make sure we keep the initialization check after loading an unresolved class.
+    UnresolvedClass c;
+    int i = 0;
+    do {
+      c = new UnresolvedClass();
+    } while (i++ != count);
   }
 
   public static void expectEquals(byte expected, byte result) {
@@ -164,7 +223,7 @@
     }
   }
 
-    public static void expectEquals(float expected, float result) {
+  public static void expectEquals(float expected, float result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
diff --git a/test/529-checker-unresolved/src/Unresolved.java b/test/529-checker-unresolved/src/Unresolved.java
deleted file mode 100644
index 20ac6e0..0000000
--- a/test/529-checker-unresolved/src/Unresolved.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-interface UnresolvedInterface {
-  void interfaceMethod();
-}
-
-class UnresolvedSuperClass {
-  public void superMethod() {
-    System.out.println("UnresolvedClass.superMethod()");
-  }
-}
-
-class UnresolvedClass extends UnresolvedSuperClass implements UnresolvedInterface {
-  static public void staticMethod() {
-    System.out.println("UnresolvedClass.staticMethod()");
-  }
-
-  public UnresolvedClass() {
-    System.out.println("UnresolvedClass.directCall()");
-  }
-
-  public void virtualMethod() {
-    System.out.println("UnresolvedClass.virtualMethod()");
-  }
-
-  public void interfaceMethod() {
-    System.out.println("UnresolvedClass.interfaceMethod()");
-  }
-
-  public static byte staticByte;
-  public static char staticChar;
-  public static int staticInt;
-  public static long staticLong;
-  public static float staticFloat;
-  public static double staticDouble;
-  public static Object staticObject;
-
-  public byte instanceByte;
-  public char instanceChar;
-  public int instanceInt;
-  public long instanceLong;
-  public float instanceFloat;
-  public double instanceDouble;
-  public Object instanceObject;
-}
-
diff --git a/test/530-checker-loops/src/Main.java b/test/530-checker-loops/src/Main.java
deleted file mode 100644
index 58c92f1..0000000
--- a/test/530-checker-loops/src/Main.java
+++ /dev/null
@@ -1,700 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-//
-// Test on loop optimizations.
-//
-public class Main {
-
-  static int sResult;
-
-  //
-  // Various sequence variables used in bound checks.
-  //
-
-  /// CHECK-START: int Main.linear(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linear(int[]) BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int linear(int[] x) {
-    int result = 0;
-    for (int i = 0; i < x.length; i++) {
-      result += x[i];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.linearDown(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linearDown(int[]) BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int linearDown(int[] x) {
-    int result = 0;
-    for (int i = x.length - 1; i >= 0; i--) {
-      result += x[i];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.linearObscure(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linearObscure(int[]) BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int linearObscure(int[] x) {
-    int result = 0;
-    for (int i = x.length - 1; i >= 0; i--) {
-      int k = i + 5;
-      result += x[k - 5];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int linearVeryObscure(int[] x) {
-    int result = 0;
-    for (int i = 0; i < x.length; i++) {
-      int k = (-i) + (i << 5) + i - (32 * i) + 5 + (int) i;
-      result += x[k - 5];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.linearWhile(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linearWhile(int[]) BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int linearWhile(int[] x) {
-    int i = 0;
-    int result = 0;
-    while (i < x.length) {
-      result += x[i++];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int linearThreeWayPhi(int[] x) {
-    int result = 0;
-    for (int i = 0; i < x.length; ) {
-      if (x[i] == 5) {
-        i++;
-        continue;
-      }
-      result += x[i++];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int linearFourWayPhi(int[] x) {
-    int result = 0;
-    for (int i = 0; i < x.length; ) {
-      if (x[i] == 5) {
-        i++;
-        continue;
-      } else if (x[i] == 6) {
-        i++;
-        result += 7;
-        continue;
-      }
-      result += x[i++];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int wrapAroundThenLinear(int[] x) {
-    // Loop with wrap around (length - 1, 0, 1, 2, ..).
-    int w = x.length - 1;
-    int result = 0;
-    for (int i = 0; i < x.length; i++) {
-      result += x[w];
-      w = i;
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int wrapAroundThenLinearThreeWayPhi(int[] x) {
-    // Loop with wrap around (length - 1, 0, 1, 2, ..).
-    int w = x.length - 1;
-    int result = 0;
-    for (int i = 0; i < x.length; ) {
-       if (x[w] == 1) {
-         w = i++;
-         continue;
-       }
-       result += x[w];
-       w = i++;
-    }
-    return result;
-  }
-
-  /// CHECK-START: int[] Main.linearWithParameter(int) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int[] Main.linearWithParameter(int) BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int[] linearWithParameter(int n) {
-    int[] x = new int[n];
-    for (int i = 0; i < n; i++) {
-      x[i] = i;
-    }
-    return x;
-  }
-
-  /// CHECK-START: int[] Main.linearCopy(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int[] Main.linearCopy(int[]) BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int[] linearCopy(int x[]) {
-    int n = x.length;
-    int y[] = new int[n];
-    for (int i = 0; i < n; i++) {
-      y[i] = x[i];
-    }
-    return y;
-  }
-
-  /// CHECK-START: int Main.linearWithCompoundStride() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linearWithCompoundStride() BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int linearWithCompoundStride() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
-    int result = 0;
-    for (int i = 0; i <= 12; ) {
-      i++;
-      result += x[i];
-      i++;
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int linearWithLargePositiveStride() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-    int result = 0;
-    int k = 0;
-    // Range analysis has no problem with a trip-count defined by a
-    // reasonably large positive stride far away from upper bound.
-    for (int i = 1; i <= 10 * 10000000 + 1; i += 10000000) {
-      result += x[k++];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (after)
-  /// CHECK-DAG: BoundsCheck
-  private static int linearWithVeryLargePositiveStride() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-    int result = 0;
-    int k = 0;
-    // Range analysis conservatively bails due to potential of wrap-around
-    // arithmetic while computing the trip-count for this very large stride.
-    for (int i = 1; i < Integer.MAX_VALUE; i += 195225786) {
-      result += x[k++];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int linearWithLargeNegativeStride() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-    int result = 0;
-    int k = 0;
-    // Range analysis has no problem with a trip-count defined by a
-    // reasonably large negative stride far away from lower bound.
-    for (int i = -1; i >= -10 * 10000000 - 1; i -= 10000000) {
-      result += x[k++];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (after)
-  /// CHECK-DAG: BoundsCheck
-  private static int linearWithVeryLargeNegativeStride() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-    int result = 0;
-    int k = 0;
-    // Range analysis conservatively bails due to potential of wrap-around
-    // arithmetic while computing the trip-count for this very large stride.
-    for (int i = -2; i > Integer.MIN_VALUE; i -= 195225786) {
-      result += x[k++];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.linearForNEUp() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linearForNEUp() BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int linearForNEUp() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-    int result = 0;
-    for (int i = 0; i != 10; i++) {
-      result += x[i];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.linearForNEDown() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linearForNEDown() BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int linearForNEDown() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-    int result = 0;
-    for (int i = 9; i != -1; i--) {
-      result += x[i];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.linearDoWhileUp() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linearDoWhileUp() BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int linearDoWhileUp() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-    int result = 0;
-    int i = 0;
-    do {
-      result += x[i++];
-    } while (i < 10);
-    return result;
-  }
-
-  /// CHECK-START: int Main.linearDoWhileDown() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linearDoWhileDown() BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int linearDoWhileDown() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-    int result = 0;
-    int i = 9;
-    do {
-      result += x[i--];
-    } while (0 <= i);
-    return result;
-  }
-
-  /// CHECK-START: int Main.linearShort() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.linearShort() BCE (after)
-  /// CHECK-DAG: BoundsCheck
-  private static int linearShort() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-    int result = 0;
-    // TODO: make this work
-    for (short i = 0; i < 10; i++) {
-      result += x[i];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.periodicIdiom(int) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.periodicIdiom(int) BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int periodicIdiom(int tc) {
-    int[] x = { 1, 3 };
-    // Loop with periodic sequence (0, 1).
-    int k = 0;
-    int result = 0;
-    for (int i = 0; i < tc; i++) {
-      result += x[k];
-      k = 1 - k;
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.periodicSequence2(int) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.periodicSequence2(int) BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int periodicSequence2(int tc) {
-    int[] x = { 1, 3 };
-    // Loop with periodic sequence (0, 1).
-    int k = 0;
-    int l = 1;
-    int result = 0;
-    for (int i = 0; i < tc; i++) {
-      result += x[k];
-      int t = l;
-      l = k;
-      k = t;
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.periodicSequence4(int) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.periodicSequence4(int) BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int periodicSequence4(int tc) {
-    int[] x = { 1, 3, 5, 7 };
-    // Loop with periodic sequence (0, 1, 2, 3).
-    int k = 0;
-    int l = 1;
-    int m = 2;
-    int n = 3;
-    int result = 0;
-    for (int i = 0; i < tc; i++) {
-      result += x[k] + x[l] + x[m] + x[n];  // all used at once
-      int t = n;
-      n = k;
-      k = l;
-      l = m;
-      m = t;
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.justRightUp1() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.justRightUp1() BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int justRightUp1() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-    int result = 0;
-    for (int i = Integer.MAX_VALUE - 10, k = 0; i < Integer.MAX_VALUE; i++) {
-      result += x[k++];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.justRightUp2() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.justRightUp2() BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int justRightUp2() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-    int result = 0;
-    for (int i = Integer.MAX_VALUE - 10; i < Integer.MAX_VALUE; i++) {
-      result += x[i - Integer.MAX_VALUE + 10];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.justRightUp3() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.justRightUp3() BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int justRightUp3() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-    int result = 0;
-    for (int i = Integer.MAX_VALUE - 10, k = 0; i <= Integer.MAX_VALUE - 1; i++) {
-      result += x[k++];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.justOOBUp() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.justOOBUp() BCE (after)
-  /// CHECK-DAG: BoundsCheck
-  private static int justOOBUp() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-    int result = 0;
-    // Infinite loop!
-    for (int i = Integer.MAX_VALUE - 9, k = 0; i <= Integer.MAX_VALUE; i++) {
-      result += x[k++];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.justRightDown1() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.justRightDown1() BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int justRightDown1() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-    int result = 0;
-    for (int i = Integer.MIN_VALUE + 10, k = 0; i > Integer.MIN_VALUE; i--) {
-      result += x[k++];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.justRightDown2() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.justRightDown2() BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int justRightDown2() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-    int result = 0;
-    for (int i = Integer.MIN_VALUE + 10; i > Integer.MIN_VALUE; i--) {
-      result += x[Integer.MAX_VALUE + i];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.justRightDown3() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.justRightDown3() BCE (after)
-  /// CHECK-NOT: BoundsCheck
-  private static int justRightDown3() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-    int result = 0;
-    for (int i = Integer.MIN_VALUE + 10, k = 0; i >= Integer.MIN_VALUE + 1; i--) {
-      result += x[k++];
-    }
-    return result;
-  }
-
-  /// CHECK-START: int Main.justOOBDown() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: int Main.justOOBDown() BCE (after)
-  /// CHECK-DAG: BoundsCheck
-  private static int justOOBDown() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-    int result = 0;
-    // Infinite loop!
-    for (int i = Integer.MIN_VALUE + 9, k = 0; i >= Integer.MIN_VALUE; i--) {
-      result += x[k++];
-    }
-    return result;
-  }
-
-  /// CHECK-START: void Main.lowerOOB(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: void Main.lowerOOB(int[]) BCE (after)
-  /// CHECK-DAG: BoundsCheck
-  private static void lowerOOB(int[] x) {
-    for (int i = -1; i < x.length; i++) {
-      sResult += x[i];
-    }
-  }
-
-  /// CHECK-START: void Main.upperOOB(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: void Main.upperOOB(int[]) BCE (after)
-  /// CHECK-DAG: BoundsCheck
-  private static void upperOOB(int[] x) {
-    for (int i = 0; i <= x.length; i++) {
-      sResult += x[i];
-    }
-  }
-
-  /// CHECK-START: void Main.doWhileUpOOB() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: void Main.doWhileUpOOB() BCE (after)
-  /// CHECK-DAG: BoundsCheck
-  private static void doWhileUpOOB() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-    int i = 0;
-    do {
-      sResult += x[i++];
-    } while (i <= x.length);
-  }
-
-  /// CHECK-START: void Main.doWhileDownOOB() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-START: void Main.doWhileDownOOB() BCE (after)
-  /// CHECK-DAG: BoundsCheck
-  private static void doWhileDownOOB() {
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-    int i = x.length - 1;
-    do {
-      sResult += x[i--];
-    } while (-1 <= i);
-  }
-
-  //
-  // Verifier.
-  //
-
-  public static void main(String[] args) {
-    int[] empty = { };
-    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-
-    // Linear and wrap-around.
-    expectEquals(0, linear(empty));
-    expectEquals(55, linear(x));
-    expectEquals(0, linearDown(empty));
-    expectEquals(55, linearDown(x));
-    expectEquals(0, linearObscure(empty));
-    expectEquals(55, linearObscure(x));
-    expectEquals(0, linearVeryObscure(empty));
-    expectEquals(55, linearVeryObscure(x));
-    expectEquals(0, linearWhile(empty));
-    expectEquals(55, linearWhile(x));
-    expectEquals(0, linearThreeWayPhi(empty));
-    expectEquals(50, linearThreeWayPhi(x));
-    expectEquals(0, linearFourWayPhi(empty));
-    expectEquals(51, linearFourWayPhi(x));
-    expectEquals(0, wrapAroundThenLinear(empty));
-    expectEquals(55, wrapAroundThenLinear(x));
-    expectEquals(0, wrapAroundThenLinearThreeWayPhi(empty));
-    expectEquals(54, wrapAroundThenLinearThreeWayPhi(x));
-
-    // Linear with parameter.
-    sResult = 0;
-    try {
-      linearWithParameter(-1);
-    } catch (NegativeArraySizeException e) {
-      sResult = 1;
-    }
-    expectEquals(1, sResult);
-    for (int n = 0; n < 32; n++) {
-      int[] r = linearWithParameter(n);
-      expectEquals(n, r.length);
-      for (int i = 0; i < n; i++) {
-        expectEquals(i, r[i]);
-      }
-    }
-
-    // Linear copy.
-    expectEquals(0, linearCopy(empty).length);
-    {
-      int[] r = linearCopy(x);
-      expectEquals(x.length, r.length);
-      for (int i = 0; i < x.length; i++) {
-        expectEquals(x[i], r[i]);
-      }
-    }
-
-    // Linear with non-unit strides.
-    expectEquals(56, linearWithCompoundStride());
-    expectEquals(66, linearWithLargePositiveStride());
-    expectEquals(66, linearWithVeryLargePositiveStride());
-    expectEquals(66, linearWithLargeNegativeStride());
-    expectEquals(66, linearWithVeryLargeNegativeStride());
-
-    // Special forms.
-    expectEquals(55, linearForNEUp());
-    expectEquals(55, linearForNEDown());
-    expectEquals(55, linearDoWhileUp());
-    expectEquals(55, linearDoWhileDown());
-    expectEquals(55, linearShort());
-
-    // Periodic adds (1, 3), one at the time.
-    expectEquals(0, periodicIdiom(-1));
-    for (int tc = 0; tc < 32; tc++) {
-      int expected = (tc >> 1) << 2;
-      if ((tc & 1) != 0)
-        expected += 1;
-      expectEquals(expected, periodicIdiom(tc));
-    }
-
-    // Periodic adds (1, 3), one at the time.
-    expectEquals(0, periodicSequence2(-1));
-    for (int tc = 0; tc < 32; tc++) {
-      int expected = (tc >> 1) << 2;
-      if ((tc & 1) != 0)
-        expected += 1;
-      expectEquals(expected, periodicSequence2(tc));
-    }
-
-    // Periodic adds (1, 3, 5, 7), all at once.
-    expectEquals(0, periodicSequence4(-1));
-    for (int tc = 0; tc < 32; tc++) {
-      expectEquals(tc * 16, periodicSequence4(tc));
-    }
-
-    // Large bounds.
-    expectEquals(55, justRightUp1());
-    expectEquals(55, justRightUp2());
-    expectEquals(55, justRightUp3());
-    expectEquals(55, justRightDown1());
-    expectEquals(55, justRightDown2());
-    expectEquals(55, justRightDown3());
-    sResult = 0;
-    try {
-      justOOBUp();
-    } catch (ArrayIndexOutOfBoundsException e) {
-      sResult = 1;
-    }
-    expectEquals(1, sResult);
-    sResult = 0;
-    try {
-      justOOBDown();
-    } catch (ArrayIndexOutOfBoundsException e) {
-      sResult = 1;
-    }
-    expectEquals(1, sResult);
-
-    // Lower bound goes OOB.
-    sResult = 0;
-    try {
-      lowerOOB(x);
-    } catch (ArrayIndexOutOfBoundsException e) {
-      sResult += 1000;
-    }
-    expectEquals(1000, sResult);
-
-    // Upper bound goes OOB.
-    sResult = 0;
-    try {
-      upperOOB(x);
-    } catch (ArrayIndexOutOfBoundsException e) {
-      sResult += 1000;
-    }
-    expectEquals(1055, sResult);
-
-    // Do while up goes OOB.
-    sResult = 0;
-    try {
-      doWhileUpOOB();
-    } catch (ArrayIndexOutOfBoundsException e) {
-      sResult += 1000;
-    }
-    expectEquals(1055, sResult);
-
-    // Do while down goes OOB.
-    sResult = 0;
-    try {
-      doWhileDownOOB();
-    } catch (ArrayIndexOutOfBoundsException e) {
-      sResult += 1000;
-    }
-    expectEquals(1055, sResult);
-  }
-
-  private static void expectEquals(int expected, int result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-}
diff --git a/test/530-checker-loops1/expected.txt b/test/530-checker-loops1/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/530-checker-loops1/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/530-checker-loops/info.txt b/test/530-checker-loops1/info.txt
similarity index 100%
rename from test/530-checker-loops/info.txt
rename to test/530-checker-loops1/info.txt
diff --git a/test/530-checker-loops1/src/Main.java b/test/530-checker-loops1/src/Main.java
new file mode 100644
index 0000000..dde4d62
--- /dev/null
+++ b/test/530-checker-loops1/src/Main.java
@@ -0,0 +1,822 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on loop optimizations.
+//
+public class Main {
+
+  static int sResult;
+
+  //
+  // Various sequence variables used in bound checks.
+  //
+
+  /// CHECK-START: int Main.linear(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linear(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linear(int[] x) {
+    int result = 0;
+    for (int i = 0; i < x.length; i++) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearDown(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearDown(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearDown(int[] x) {
+    int result = 0;
+    for (int i = x.length - 1; i >= 0; i--) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearObscure(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearObscure(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearObscure(int[] x) {
+    int result = 0;
+    for (int i = x.length - 1; i >= 0; i--) {
+      int k = i + 5;
+      result += x[k - 5];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearVeryObscure(int[] x) {
+    int result = 0;
+    for (int i = 0; i < x.length; i++) {
+      int k = (-i) + (i << 5) + i - (32 * i) + 5 + (int) i;
+      result += x[k - 5];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.hiddenStride(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.hiddenStride(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int hiddenStride(int[] a) {
+    int result = 0;
+    for (int i = 1; i <= 1; i++) {
+      // Obscured unit stride.
+      for (int j = 0; j < a.length; j += i) {
+        result += a[j];
+      }
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearWhile(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearWhile(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearWhile(int[] x) {
+    int i = 0;
+    int result = 0;
+    while (i < x.length) {
+      result += x[i++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearThreeWayPhi(int[] x) {
+    int result = 0;
+    for (int i = 0; i < x.length; ) {
+      if (x[i] == 5) {
+        i++;
+        continue;
+      }
+      result += x[i++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearFourWayPhi(int[] x) {
+    int result = 0;
+    for (int i = 0; i < x.length; ) {
+      if (x[i] == 5) {
+        i++;
+        continue;
+      } else if (x[i] == 6) {
+        i++;
+        result += 7;
+        continue;
+      }
+      result += x[i++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int wrapAroundThenLinear(int[] x) {
+    // Loop with wrap around (length - 1, 0, 1, 2, ..).
+    int w = x.length - 1;
+    int result = 0;
+    for (int i = 0; i < x.length; i++) {
+      result += x[w];
+      w = i;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int wrapAroundThenLinearThreeWayPhi(int[] x) {
+    // Loop with wrap around (length - 1, 0, 1, 2, ..).
+    int w = x.length - 1;
+    int result = 0;
+    for (int i = 0; i < x.length; ) {
+       if (x[w] == 1) {
+         w = i++;
+         continue;
+       }
+       result += x[w];
+       w = i++;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int[] Main.linearWithParameter(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int[] Main.linearWithParameter(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int[] linearWithParameter(int n) {
+    int[] x = new int[n];
+    for (int i = 0; i < n; i++) {
+      x[i] = i;
+    }
+    return x;
+  }
+
+  /// CHECK-START: int[] Main.linearCopy(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int[] Main.linearCopy(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int[] linearCopy(int x[]) {
+    int n = x.length;
+    int y[] = new int[n];
+    for (int i = 0; i < n; i++) {
+      y[i] = x[i];
+    }
+    return y;
+  }
+
+  /// CHECK-START: int Main.linearByTwo(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearByTwo(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwo(int x[]) {
+    int n = x.length / 2;
+    int result = 0;
+    for (int i = 0; i < n; i++) {
+      int ii = i << 1;
+      result += x[ii];
+      result += x[ii + 1];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwoSkip1(int x[]) {
+    int result = 0;
+    for (int i = 0; i < x.length / 2; i++) {
+      result += x[2 * i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (after)
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwoSkip2(int x[]) {
+    int result = 0;
+    // This case is not optimized.
+    for (int i = 0; i < x.length; i+=2) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearWithCompoundStride() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearWithCompoundStride() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearWithCompoundStride() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
+    int result = 0;
+    for (int i = 0; i <= 12; ) {
+      i++;
+      result += x[i];
+      i++;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearWithLargePositiveStride() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+    int result = 0;
+    int k = 0;
+    // Range analysis has no problem with a trip-count defined by a
+    // reasonably large positive stride far away from upper bound.
+    for (int i = 1; i <= 10 * 10000000 + 1; i += 10000000) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (after)
+  /// CHECK-NOT: Deoptimize
+  private static int linearWithVeryLargePositiveStride() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+    int result = 0;
+    int k = 0;
+    // Range analysis conservatively bails due to potential of wrap-around
+    // arithmetic while computing the trip-count for this very large stride.
+    for (int i = 1; i < Integer.MAX_VALUE; i += 195225786) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearWithLargeNegativeStride() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+    int result = 0;
+    int k = 0;
+    // Range analysis has no problem with a trip-count defined by a
+    // reasonably large negative stride far away from lower bound.
+    for (int i = -1; i >= -10 * 10000000 - 1; i -= 10000000) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (after)
+  /// CHECK-NOT: Deoptimize
+  private static int linearWithVeryLargeNegativeStride() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+    int result = 0;
+    int k = 0;
+    // Range analysis conservatively bails due to potential of wrap-around
+    // arithmetic while computing the trip-count for this very large stride.
+    for (int i = -2; i > Integer.MIN_VALUE; i -= 195225786) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearForNEUp() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearForNEUp() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearForNEUp() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    for (int i = 0; i != 10; i++) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearForNEDown() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearForNEDown() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearForNEDown() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    for (int i = 9; i != -1; i--) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearForNEArrayLengthUp(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearForNEArrayLengthUp(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearForNEArrayLengthUp(int[] x) {
+    int result = 0;
+    for (int i = 0; i != x.length; i++) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearForNEArrayLengthDown(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearForNEArrayLengthDown(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearForNEArrayLengthDown(int[] x) {
+    int result = 0;
+    for (int i = x.length - 1; i != -1; i--) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearDoWhileUp() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearDoWhileUp() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearDoWhileUp() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    int i = 0;
+    do {
+      result += x[i++];
+    } while (i < 10);
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearDoWhileDown() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearDoWhileDown() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearDoWhileDown() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    int i = 9;
+    do {
+      result += x[i--];
+    } while (0 <= i);
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearLong() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearLong() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearLong() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    // Induction on constant interval is done in higher precision than necessary,
+    // but truncated at the use as subscript.
+    for (long i = 0; i < 10; i++) {
+      result += x[(int)i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearLongAlt(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearLongAlt(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearLongAlt(int[] x) {
+    int result = 0;
+    // Induction on array length is done in higher precision than necessary,
+    // but truncated at the use as subscript.
+    for (long i = 0; i < x.length; i++) {
+      result += x[(int)i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearShort() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearShort() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearShort() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    // Induction is done in short precision, but fits.
+    for (short i = 0; i < 10; i++) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearChar() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearChar() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearChar() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    // Induction is done in char precision, but fits.
+    for (char i = 0; i < 10; i++) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearByte() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearByte() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByte() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    // Induction is done in byte precision, but fits.
+    for (byte i = 0; i < 10; i++) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int invariantFromPreLoop(int[] x, int y) {
+    int result = 0;
+    // Strange pre-loop that sets upper bound.
+    int hi;
+    while (true) {
+      y = y % 3;
+      hi = x.length;
+      if (y != 123) break;
+    }
+    for (int i = 0; i < hi; i++) {
+       result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularOnTwoArrayLengths(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < a.length; i++) {
+      int[] b = new int[i];
+      for (int j = 0; j < b.length; j++) {
+        // Need to know j < b.length < a.length for static bce.
+        a[j] += 1;
+        // Need to know just j < b.length for static bce.
+        b[j] += 1;
+      }
+      verifyTriangular(a, b, i, n);
+    }
+  }
+
+  /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularOnOneArrayLength(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < a.length; i++) {
+      int[] b = new int[i];
+      for (int j = 0; j < i; j++) {
+        // Need to know j < i < a.length for static bce.
+        a[j] += 1;
+        // Need to know just j < i for static bce.
+        b[j] += 1;
+      }
+      verifyTriangular(a, b, i, n);
+    }
+  }
+
+  /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularOnParameter(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < n; i++) {
+      int[] b = new int[i];
+      for (int j = 0; j < i; j++) {
+        // Need to know j < i < n for static bce.
+        a[j] += 1;
+        // Need to know just j < i for static bce.
+        b[j] += 1;
+      }
+      verifyTriangular(a, b, i, n);
+    }
+  }
+
+  /// CHECK-START: void Main.linearTriangularStrictLower(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.linearTriangularStrictLower(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularStrictLower(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < n; i++) {
+      for (int j = 0; j < i; j++) {
+        a[j] += 1;
+      }
+      for (int j = i - 1; j >= 0; j--) {
+        a[j] += 1;
+      }
+      for (int j = i; j < n; j++) {
+        a[j] += 1;
+      }
+      for (int j = n - 1; j >= i; j--) {
+        a[j] += 1;
+      }
+    }
+    verifyTriangular(a);
+  }
+
+  /// CHECK-START: void Main.linearTriangularStrictUpper(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.linearTriangularStrictUpper(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularStrictUpper(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < n; i++) {
+      for (int j = 0; j <= i; j++) {
+        a[j] += 1;
+      }
+      for (int j = i; j >= 0; j--) {
+        a[j] += 1;
+      }
+      for (int j = i + 1; j < n; j++) {
+        a[j] += 1;
+      }
+      for (int j = n - 1; j >= i + 1; j--) {
+        a[j] += 1;
+      }
+    }
+    verifyTriangular(a);
+  }
+
+  // Verifier for triangular loops.
+  private static void verifyTriangular(int[] a, int[] b, int m, int n) {
+    expectEquals(n, a.length);
+    for (int i = 0, k = m; i < n; i++) {
+      expectEquals(a[i], k);
+      if (k > 0) k--;
+    }
+    expectEquals(m, b.length);
+    for (int i = 0; i < m; i++) {
+      expectEquals(b[i], 1);
+    }
+  }
+
+  // Verifier for triangular loops.
+  private static void verifyTriangular(int[] a) {
+    int n = a.length;
+    for (int i = 0; i < n; i++) {
+      expectEquals(a[i], n + n);
+    }
+  }
+
+  /// CHECK-START: int[] Main.linearTriangularOOB() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int[] Main.linearTriangularOOB() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int[] Main.linearTriangularOOB() BCE (after)
+  /// CHECK-NOT: Deoptimize
+  private static int[] linearTriangularOOB() {
+    int[] a = new int[200];
+    try {
+      for (int i = 0; i < 200; i++) {
+        // Lower bound must be recognized as lower precision induction with arithmetic
+        // wrap-around to -128 when i exceeds 127.
+        for (int j = (byte) i; j < 200; j++) {
+          a[j] += 1;
+        }
+      }
+    } catch (ArrayIndexOutOfBoundsException e) {
+      return a;
+    }
+    return null;  // failure if this is reached
+  }
+
+  //
+  // Verifier.
+  //
+
+  public static void main(String[] args) {
+    int[] empty = { };
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+
+    // Linear and wrap-around.
+    expectEquals(0, linear(empty));
+    expectEquals(55, linear(x));
+    expectEquals(0, linearDown(empty));
+    expectEquals(55, linearDown(x));
+    expectEquals(0, linearObscure(empty));
+    expectEquals(55, linearObscure(x));
+    expectEquals(0, linearVeryObscure(empty));
+    expectEquals(55, linearVeryObscure(x));
+    expectEquals(0, hiddenStride(empty));
+    expectEquals(55, hiddenStride(x));
+    expectEquals(0, linearWhile(empty));
+    expectEquals(55, linearWhile(x));
+    expectEquals(0, linearThreeWayPhi(empty));
+    expectEquals(50, linearThreeWayPhi(x));
+    expectEquals(0, linearFourWayPhi(empty));
+    expectEquals(51, linearFourWayPhi(x));
+    expectEquals(0, wrapAroundThenLinear(empty));
+    expectEquals(55, wrapAroundThenLinear(x));
+    expectEquals(0, wrapAroundThenLinearThreeWayPhi(empty));
+    expectEquals(54, wrapAroundThenLinearThreeWayPhi(x));
+
+    // Linear with parameter.
+    sResult = 0;
+    try {
+      linearWithParameter(-1);
+    } catch (NegativeArraySizeException e) {
+      sResult = 1;
+    }
+    expectEquals(1, sResult);
+    for (int n = 0; n < 32; n++) {
+      int[] r = linearWithParameter(n);
+      expectEquals(n, r.length);
+      for (int i = 0; i < n; i++) {
+        expectEquals(i, r[i]);
+      }
+    }
+
+    // Linear copy.
+    expectEquals(0, linearCopy(empty).length);
+    {
+      int[] r = linearCopy(x);
+      expectEquals(x.length, r.length);
+      for (int i = 0; i < x.length; i++) {
+        expectEquals(x[i], r[i]);
+      }
+    }
+
+    // Linear with non-unit strides.
+    expectEquals(55, linearByTwo(x));
+    expectEquals(25, linearByTwoSkip1(x));
+    expectEquals(25, linearByTwoSkip2(x));
+    expectEquals(56, linearWithCompoundStride());
+    expectEquals(66, linearWithLargePositiveStride());
+    expectEquals(66, linearWithVeryLargePositiveStride());
+    expectEquals(66, linearWithLargeNegativeStride());
+    expectEquals(66, linearWithVeryLargeNegativeStride());
+
+    // Special forms.
+    expectEquals(55, linearForNEUp());
+    expectEquals(55, linearForNEDown());
+    expectEquals(55, linearForNEArrayLengthUp(x));
+    expectEquals(55, linearForNEArrayLengthDown(x));
+    expectEquals(55, linearDoWhileUp());
+    expectEquals(55, linearDoWhileDown());
+    expectEquals(55, linearLong());
+    expectEquals(55, linearLongAlt(x));
+    expectEquals(55, linearShort());
+    expectEquals(55, linearChar());
+    expectEquals(55, linearByte());
+    expectEquals(55, invariantFromPreLoop(x, 1));
+    linearTriangularOnTwoArrayLengths(10);
+    linearTriangularOnOneArrayLength(10);
+    linearTriangularOnParameter(10);
+    linearTriangularStrictLower(10);
+    linearTriangularStrictUpper(10);
+    {
+      int[] t = linearTriangularOOB();
+      for (int i = 0; i < 200; i++) {
+        expectEquals(i <= 127 ? i + 1 : 128, t[i]);
+      }
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/530-checker-loops2/expected.txt b/test/530-checker-loops2/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/530-checker-loops2/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/530-checker-loops/info.txt b/test/530-checker-loops2/info.txt
similarity index 100%
copy from test/530-checker-loops/info.txt
copy to test/530-checker-loops2/info.txt
diff --git a/test/530-checker-loops2/src/Main.java b/test/530-checker-loops2/src/Main.java
new file mode 100644
index 0000000..7acf008
--- /dev/null
+++ b/test/530-checker-loops2/src/Main.java
@@ -0,0 +1,1205 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on loop optimizations.
+//
+public class Main {
+
+  static int sResult;
+
+  //
+  // Various sequence variables used in bound checks.
+  //
+
+  /// CHECK-START: void Main.bubble(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.bubble(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static void bubble(int[] a) {
+    for (int i = a.length; --i >= 0;) {
+      for (int j = 0; j < i; j++) {
+        if (a[j] > a[j+1]) {
+          int tmp = a[j];
+          a[j]  = a[j+1];
+          a[j+1] = tmp;
+        }
+      }
+    }
+  }
+
+  /// CHECK-START: int Main.periodicIdiom(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.periodicIdiom(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int periodicIdiom(int tc) {
+    int[] x = { 1, 3 };
+    // Loop with periodic sequence (0, 1).
+    int k = 0;
+    int result = 0;
+    for (int i = 0; i < tc; i++) {
+      result += x[k];
+      k = 1 - k;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.periodicSequence2(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.periodicSequence2(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int periodicSequence2(int tc) {
+    int[] x = { 1, 3 };
+    // Loop with periodic sequence (0, 1).
+    int k = 0;
+    int l = 1;
+    int result = 0;
+    for (int i = 0; i < tc; i++) {
+      result += x[k];
+      int t = l;
+      l = k;
+      k = t;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.periodicSequence4(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.periodicSequence4(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int periodicSequence4(int tc) {
+    int[] x = { 1, 3, 5, 7 };
+    // Loop with periodic sequence (0, 1, 2, 3).
+    int k = 0;
+    int l = 1;
+    int m = 2;
+    int n = 3;
+    int result = 0;
+    for (int i = 0; i < tc; i++) {
+      result += x[k] + x[l] + x[m] + x[n];  // all used at once
+      int t = n;
+      n = k;
+      k = l;
+      l = m;
+      m = t;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.justRightUp1() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.justRightUp1() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int justRightUp1() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    for (int i = Integer.MAX_VALUE - 10, k = 0; i < Integer.MAX_VALUE; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.justRightUp2() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.justRightUp2() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int justRightUp2() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    for (int i = Integer.MAX_VALUE - 10; i < Integer.MAX_VALUE; i++) {
+      result += x[i - Integer.MAX_VALUE + 10];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.justRightUp3() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.justRightUp3() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int justRightUp3() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    for (int i = Integer.MAX_VALUE - 10, k = 0; i <= Integer.MAX_VALUE - 1; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.justOOBUp() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.justOOBUp() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.justOOBUp() BCE (after)
+  /// CHECK-NOT: Deoptimize
+  private static int justOOBUp() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    // Infinite loop!
+    for (int i = Integer.MAX_VALUE - 9, k = 0; i <= Integer.MAX_VALUE; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.justRightDown1() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.justRightDown1() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int justRightDown1() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    for (int i = Integer.MIN_VALUE + 10, k = 0; i > Integer.MIN_VALUE; i--) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.justRightDown2() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.justRightDown2() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int justRightDown2() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    for (int i = Integer.MIN_VALUE + 10; i > Integer.MIN_VALUE; i--) {
+      result += x[Integer.MAX_VALUE + i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.justRightDown3() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.justRightDown3() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int justRightDown3() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    for (int i = Integer.MIN_VALUE + 10, k = 0; i >= Integer.MIN_VALUE + 1; i--) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.justOOBDown() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.justOOBDown() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.justOOBDown() BCE (after)
+  /// CHECK-NOT: Deoptimize
+  private static int justOOBDown() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    // Infinite loop!
+    for (int i = Integer.MIN_VALUE + 9, k = 0; i >= Integer.MIN_VALUE; i--) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: void Main.lowerOOB(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.lowerOOB(int[]) BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.lowerOOB(int[]) BCE (after)
+  /// CHECK-NOT: Deoptimize
+  private static void lowerOOB(int[] x) {
+    // OOB!
+    for (int i = -1; i < x.length; i++) {
+      sResult += x[i];
+    }
+  }
+
+  /// CHECK-START: void Main.upperOOB(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.upperOOB(int[]) BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.upperOOB(int[]) BCE (after)
+  /// CHECK-NOT: Deoptimize
+  private static void upperOOB(int[] x) {
+    // OOB!
+    for (int i = 0; i <= x.length; i++) {
+      sResult += x[i];
+    }
+  }
+
+  /// CHECK-START: void Main.doWhileUpOOB() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.doWhileUpOOB() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.doWhileUpOOB() BCE (after)
+  /// CHECK-NOT: Deoptimize
+  private static void doWhileUpOOB() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int i = 0;
+    // OOB!
+    do {
+      sResult += x[i++];
+    } while (i <= x.length);
+  }
+
+  /// CHECK-START: void Main.doWhileDownOOB() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.doWhileDownOOB() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.doWhileDownOOB() BCE (after)
+  /// CHECK-NOT: Deoptimize
+  private static void doWhileDownOOB() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int i = x.length - 1;
+    // OOB!
+    do {
+      sResult += x[i--];
+    } while (-1 <= i);
+  }
+
+  /// CHECK-START: void Main.justRightTriangular1() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.justRightTriangular1() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static void justRightTriangular1() {
+    int[] a = { 1 } ;
+    for (int i = Integer.MIN_VALUE + 5; i <= Integer.MIN_VALUE + 10; i++) {
+      for (int j = Integer.MIN_VALUE + 4; j < i - 5; j++) {
+        sResult += a[j - (Integer.MIN_VALUE + 4)];
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.justRightTriangular2() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.justRightTriangular2() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static void justRightTriangular2() {
+    int[] a = { 1 } ;
+    for (int i = Integer.MIN_VALUE + 5; i <= 10; i++) {
+      for (int j = 4; j < i - 5; j++) {
+        sResult += a[j - 4];
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.justOOBTriangular() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.justOOBTriangular() BCE (after)
+  /// CHECK-DAG: Deoptimize
+  //
+  /// CHECK-START: void Main.justOOBTriangular() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static void justOOBTriangular() {
+    int[] a = { 1 } ;
+    for (int i = Integer.MIN_VALUE + 4; i <= 10; i++) {
+      for (int j = 4; j < i - 5; j++) {
+        sResult += a[j - 4];
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.hiddenOOB1(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.hiddenOOB1(int) BCE (after)
+  /// CHECK-DAG: Deoptimize
+  //
+  /// CHECK-START: void Main.hiddenOOB1(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static void hiddenOOB1(int lo) {
+    int[] a = { 1 } ;
+    for (int i = lo; i <= 10; i++) {
+      // Dangerous loop where careless static range analysis would yield strict upper bound
+      // on index j of 5. When, for instance, lo and thus i = -2147483648, the upper bound
+      // becomes really positive due to arithmetic wrap-around, causing OOB.
+      for (int j = 4; j < i - 5; j++) {
+        sResult += a[j - 4];
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.hiddenOOB2(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.hiddenOOB2(int) BCE (after)
+  /// CHECK-DAG: Deoptimize
+  //
+  /// CHECK-START: void Main.hiddenOOB2(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static void hiddenOOB2(int hi) {
+    int[] a = { 1 } ;
+    for (int i = 0; i < hi; i++) {
+      // Dangerous loop where careless static range analysis would yield strict lower bound
+      // on index j of 5. When, for instance, hi and thus i = 2147483647, the upper bound
+      // becomes really negative due to arithmetic wrap-around, causing OOB.
+      for (int j = 6; j > i + 5; j--) {
+        sResult += a[j - 6];
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.hiddenOOB3(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.hiddenOOB3(int) BCE (after)
+  /// CHECK-DAG: Deoptimize
+  //
+  /// CHECK-START: void Main.hiddenOOB3(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static void hiddenOOB3(int hi) {
+    int[] a = { 11 } ;
+    for (int i = -1; i <= hi; i++) {
+      // Dangerous loop where careless static range analysis would yield strict lower bound
+      // on index j of 0. For large i, the initial value of j becomes really negative due
+      // to arithmetic wrap-around, causing OOB.
+      for (int j = i + 1; j < 1; j++) {
+        sResult += a[j];
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.hiddenInfiniteOOB() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.hiddenInfiniteOOB() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.hiddenInfiniteOOB() BCE (after)
+  /// CHECK-NOT: Deoptimize
+  private static void hiddenInfiniteOOB() {
+    int[] a = { 11 } ;
+    for (int i = -1; i <= 0; i++) {
+      // Dangerous loop where careless static range analysis would yield a safe upper bound
+      // of -3. In reality, due to arithmetic wrap-around (when i = -1, j <= 2147483647;
+      // whereas when i = 0, j <= -3), this is an infinite loop that goes OOB.
+      for (int j = -3; j <= 2147483646 * i - 3; j++) {
+        sResult += a[j + 3];
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.hiddenFiniteOOB() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.hiddenFiniteOOB() BCE (after)
+  /// CHECK-DAG: Deoptimize
+  //
+  /// CHECK-START: void Main.hiddenFiniteOOB() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static void hiddenFiniteOOB() {
+    int[] a = { 111 } ;
+    for (int i = -1; i <= 0; i++) {
+      // Dangerous loop similar as above where the loop is now finite, but the
+      // loop still goes out of bounds for i = -1 due to the large upper bound.
+      for (int j = -4; j < 2147483646 * i - 3; j++) {
+        sResult += a[j + 4];
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.inductionOOB(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.inductionOOB(int[]) BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.inductionOOB(int[]) BCE (after)
+  /// CHECK-NOT: Deoptimize
+  private static void inductionOOB(int[] a) {
+    // Careless range analysis would remove the bounds check.
+    // However, the narrower induction b wraps around arithmetically
+    // before it reaches the end of arrays longer than 127.
+    byte b = 0;
+    for (int i = 0; i < a.length; i++) {
+      a[b++] = i;
+    }
+  }
+
+  /// CHECK-START: void Main.controlOOB(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.controlOOB(int[]) BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.controlOOB(int[]) BCE (after)
+  /// CHECK-NOT: Deoptimize
+  private static void controlOOB(int[] a) {
+    // As above, but now the loop control also wraps around.
+    for (byte i = 0; i < a.length; i++) {
+      a[i] = -i;
+    }
+  }
+
+  /// CHECK-START: void Main.conversionOOB(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.conversionOOB(int[]) BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.conversionOOB(int[]) BCE (after)
+  /// CHECK-NOT: Deoptimize
+  private static void conversionOOB(int[] a) {
+    // As above, but with wrap around caused by an explicit conversion.
+    for (int i = 0; i < a.length; ) {
+      a[i] = i;
+      i = (byte) (i + 1);
+    }
+  }
+
+  /// CHECK-START: int Main.doNotHoist(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.doNotHoist(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static int doNotHoist(int[] a) {
+     int n = a.length;
+     int x = 0;
+     // BCE applies, but hoisting would crash the loop.
+     for (int i = -10000; i < 10000; i++) {
+       for (int j = 0; j <= 1; j++) {
+         if (0 <= i && i < n)
+           x += a[i];
+       }
+    }
+    return x;
+  }
+
+
+  /// CHECK-START: int[] Main.add() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int[] Main.add() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int[] add() {
+    int[] a = new int[10];
+    for (int i = 0; i <= 3; i++) {
+      for (int j = 0; j <= 6; j++) {
+        a[i + j] += 1;
+      }
+    }
+    return a;
+  }
+
+  /// CHECK-START: int[] Main.multiply1() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int[] Main.multiply1() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int[] multiply1() {
+    int[] a = new int[10];
+    try {
+      for (int i = 0; i <= 3; i++) {
+        for (int j = 0; j <= 3; j++) {
+          // Range [0,9]: safe.
+          a[i * j] += 1;
+        }
+      }
+    } catch (Exception e) {
+      a[0] += 1000;
+    }
+    return a;
+  }
+
+  /// CHECK-START: int[] Main.multiply2() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int[] Main.multiply2() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int[] Main.multiply2() BCE (after)
+  /// CHECK-NOT: Deoptimize
+  static int[] multiply2() {
+    int[] a = new int[10];
+    try {
+      for (int i = -3; i <= 3; i++) {
+        for (int j = -3; j <= 3; j++) {
+          // Range [-9,9]: unsafe.
+          a[i * j] += 1;
+        }
+      }
+    } catch (Exception e) {
+      a[0] += 1000;
+    }
+    return a;
+  }
+
+  /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (before)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (after)
+  /// CHECK-DAG: ArrayGet    loop:{{B\d+}}
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
+  private static int linearDynamicBCE1(int[] x, int lo, int hi) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      sResult += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (before)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (after)
+  /// CHECK-DAG: ArrayGet    loop:{{B\d+}}
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
+  private static int linearDynamicBCE2(int[] x, int lo, int hi, int offset) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      sResult += x[offset + i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (before)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (after)
+  /// CHECK-DAG: ArrayGet    loop:{{B\d+}}
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
+  private static int wrapAroundDynamicBCE(int[] x) {
+    int w = 9;
+    int result = 0;
+    for (int i = 0; i < 10; i++) {
+      result += x[w];
+      w = i;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (before)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (after)
+  /// CHECK-DAG: ArrayGet    loop:{{B\d+}}
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
+  private static int periodicDynamicBCE(int[] x) {
+    int k = 0;
+    int result = 0;
+    for (int i = 0; i < 10; i++) {
+      result += x[k];
+      k = 1 - k;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
+  /// CHECK-DAG: ArrayGet    loop:{{B\d+}}
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
+  static int dynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
+    // This loop could be infinite for hi = max int. Since i is also used
+    // as subscript, however, dynamic bce can proceed.
+    int result = 0;
+    for (int i = lo; i <= hi; i++) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
+  /// CHECK-NOT: Deoptimize
+  static int noDynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
+    // As above, but now the index is not used as subscript,
+    // and dynamic bce is not applied.
+    int result = 0;
+    for (int k = 0, i = lo; i <= hi; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (before)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (after)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (after)
+  /// CHECK-NOT: Deoptimize
+  static int noDynamicBCEMixedInductionTypes(int[] x, long lo, long hi) {
+    int result = 0;
+    // Mix of int and long induction.
+    int k = 0;
+    for (long i = lo; i < hi; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEConstantRange(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<InnerLoop:B\d+>>
+  /// CHECK-DAG: ArrayGet    loop:<<InnerLoop>>
+  /// CHECK-DAG: If          loop:<<InnerLoop>>
+  /// CHECK-DAG: If          loop:<<OuterLoop:B\d+>>
+  /// CHECK-EVAL: "<<InnerLoop>>" != "<<OuterLoop>>"
+  //
+  /// CHECK-START: int Main.dynamicBCEConstantRange(int[]) BCE (after)
+  /// CHECK-DAG: ArrayGet   loop:<<InnerLoop:B\d+>>
+  /// CHECK-DAG: Deoptimize loop:<<OuterLoop:B\d+>>
+  /// CHECK-EVAL: "<<InnerLoop>>" != "<<OuterLoop>>"
+  //
+  /// CHECK-START: int Main.dynamicBCEConstantRange(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  //
+  //  No additional top tests were introduced.
+  /// CHECK-START: int Main.dynamicBCEConstantRange(int[]) BCE (after)
+  /// CHECK-DAG: If
+  /// CHECK-DAG: If
+  /// CHECK-NOT: If
+  static int dynamicBCEConstantRange(int[] x) {
+    int result = 0;
+    for (int i = 2; i <= 6; i++) {
+      // Range analysis sees that innermost loop is finite and always taken.
+      for (int j = i - 2; j <= i + 2; j++) {
+        result += x[j];
+      }
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (before)
+  /// CHECK-DAG: {{l\d+}} ArrayGet loop:<<Loop:B\d+>>
+  /// CHECK-DAG: {{l\d+}} ArrayGet loop:<<Loop>>
+  /// CHECK-DAG: {{l\d+}} ArrayGet loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (after)
+  //  Order matters:
+  /// CHECK:              Deoptimize loop:<<Loop:B\d+>>
+  /// CHECK-NOT:          Goto       loop:<<Loop>>
+  /// CHECK-DAG: {{l\d+}} ArrayGet   loop:<<Loop>>
+  /// CHECK-DAG: {{l\d+}} ArrayGet   loop:<<Loop>>
+  /// CHECK-DAG: {{l\d+}} ArrayGet   loop:<<Loop>>
+  /// CHECK:              Goto       loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  static int dynamicBCEAndConstantIndices(int[] x, int[][] a, int lo, int hi) {
+    // Deliberately test array length on a before the loop so that only bounds checks
+    // on constant subscripts remain, making them a viable candidate for hoisting.
+    if (a.length == 0) {
+      return -1;
+    }
+    // Loop that allows BCE on x[i].
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      result += x[i];
+      if ((i % 10) != 0) {
+        // None of the subscripts inside a conditional are removed by dynamic bce,
+        // making them a candidate for deoptimization based on constant indices.
+        // Compiler should ensure the array loads are not subsequently hoisted
+        // "above" the deoptimization "barrier" on the bounds.
+        a[1][i] = 1;
+        a[2][i] = 2;
+        a[99][i] = 3;
+      }
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllPrimTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], int, int) BCE (before)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  //  For brevity, just test occurrence of at least one of each in the loop:
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllPrimTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], int, int) BCE (after)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-NOT: ArrayGet    loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllPrimTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], int, int) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllPrimTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], int, int) BCE (after)
+  /// CHECK-DAG: Deoptimize  loop:none
+  static int dynamicBCEAndConstantIndicesAllPrimTypes(int[] q,
+                                                      boolean[] r,
+                                                      byte[] s,
+                                                      char[] t,
+                                                      short[] u,
+                                                      int[] v,
+                                                      long[] w,
+                                                      float[] x,
+                                                      double[] y, int lo, int hi) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      // All constant index array references can be hoisted out of the loop during BCE on q[i].
+      result += q[i] + (r[0] ? 1 : 0) + (int) s[0] + (int) t[0] + (int) u[0] + (int) v[0] +
+                                        (int) w[0] + (int) x[0] + (int) y[0];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndexRefType(int[], java.lang.Integer[], int, int) BCE (before)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndexRefType(int[], java.lang.Integer[], int, int) BCE (after)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndexRefType(int[], java.lang.Integer[], int, int) BCE (after)
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
+  static int dynamicBCEAndConstantIndexRefType(int[] q, Integer[] z, int lo, int hi) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      // Similar to above, but now implicit call to intValue() may prevent hoisting
+      // z[0] itself during BCE on q[i]. Therefore, we just check BCE on q[i].
+      result += q[i] + z[0];
+    }
+    return result;
+  }
+
+  //
+  // Verifier.
+  //
+
+  public static void main(String[] args) {
+    // Set to run expensive tests for correctness too.
+    boolean HEAVY = false;
+
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+
+    int[] a200 = new int[200];
+
+    // Sorting.
+    int[] sort = { 5, 4, 1, 9, 10, 2, 7, 6, 3, 8 };
+    bubble(sort);
+    for (int i = 0; i < 10; i++) {
+      expectEquals(sort[i], x[i]);
+    }
+
+    // Periodic adds (1, 3), one at the time.
+    expectEquals(0, periodicIdiom(-1));
+    for (int tc = 0; tc < 32; tc++) {
+      int expected = (tc >> 1) << 2;
+      if ((tc & 1) != 0)
+        expected += 1;
+      expectEquals(expected, periodicIdiom(tc));
+    }
+
+    // Periodic adds (1, 3), one at the time.
+    expectEquals(0, periodicSequence2(-1));
+    for (int tc = 0; tc < 32; tc++) {
+      int expected = (tc >> 1) << 2;
+      if ((tc & 1) != 0)
+        expected += 1;
+      expectEquals(expected, periodicSequence2(tc));
+    }
+
+    // Periodic adds (1, 3, 5, 7), all at once.
+    expectEquals(0, periodicSequence4(-1));
+    for (int tc = 0; tc < 32; tc++) {
+      expectEquals(tc * 16, periodicSequence4(tc));
+    }
+
+    // Large bounds.
+    expectEquals(55, justRightUp1());
+    expectEquals(55, justRightUp2());
+    expectEquals(55, justRightUp3());
+    expectEquals(55, justRightDown1());
+    expectEquals(55, justRightDown2());
+    expectEquals(55, justRightDown3());
+
+    // Large bounds OOB.
+    sResult = 0;
+    try {
+      justOOBUp();
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult = 1;
+    }
+    expectEquals(1, sResult);
+    sResult = 0;
+    try {
+      justOOBDown();
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult = 1;
+    }
+    expectEquals(1, sResult);
+
+    // Lower bound goes OOB.
+    sResult = 0;
+    try {
+      lowerOOB(x);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+
+    // Upper bound goes OOB.
+    sResult = 0;
+    try {
+      upperOOB(x);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1055, sResult);
+
+    // Do while up goes OOB.
+    sResult = 0;
+    try {
+      doWhileUpOOB();
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1055, sResult);
+
+    // Do while down goes OOB.
+    sResult = 0;
+    try {
+      doWhileDownOOB();
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1055, sResult);
+
+    // Triangular.
+    sResult = 0;
+    justRightTriangular1();
+    expectEquals(1, sResult);
+    if (HEAVY) {
+      sResult = 0;
+      justRightTriangular2();
+      expectEquals(1, sResult);
+    }
+    sResult = 0;
+    try {
+      justOOBTriangular();
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1001, sResult);
+
+    // Hidden OOB.
+    sResult = 0;
+    try {
+      hiddenOOB1(10);  // no OOB
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1, sResult);
+    sResult = 0;
+    try {
+      hiddenOOB1(-2147483648);  // OOB
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1001, sResult);
+    sResult = 0;
+    try {
+      hiddenOOB2(1);  // no OOB
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1, sResult);
+    sResult = 0;
+    try {
+      hiddenOOB3(-1);  // no OOB
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(11, sResult);
+
+    // Expensive hidden OOB test.
+    if (HEAVY) {
+      sResult = 0;
+      try {
+        hiddenOOB2(2147483647);  // OOB
+      } catch (ArrayIndexOutOfBoundsException e) {
+        sResult += 1000;
+      }
+      expectEquals(1002, sResult);
+      sResult = 0;
+      try {
+        hiddenOOB3(2147483647);  // OOB
+      } catch (ArrayIndexOutOfBoundsException e) {
+        sResult += 1000;
+      }
+      expectEquals(1011, sResult);
+    }
+
+    // More hidden OOB.
+    sResult = 0;
+    try {
+      hiddenInfiniteOOB();
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1011, sResult);
+    sResult = 0;
+    try {
+      hiddenFiniteOOB();
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1111, sResult);
+    sResult = 0;
+    try {
+      inductionOOB(a200);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+    for (int i = 0; i < 200; i++) {
+      expectEquals(i < 128 ? i : 0, a200[i]);
+    }
+    sResult = 0;
+    try {
+      controlOOB(a200);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+    for (int i = 0; i < 200; i++) {
+      expectEquals(i < 128 ? -i : 0, a200[i]);
+    }
+    sResult = 0;
+    try {
+      conversionOOB(a200);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+    for (int i = 0; i < 200; i++) {
+      expectEquals(i < 128 ? i : 0, a200[i]);
+    }
+
+    // No hoisting after BCE.
+    expectEquals(110, doNotHoist(x));
+
+    // Addition.
+    {
+      int[] e1 ={ 1, 2, 3, 4, 4, 4, 4, 3, 2, 1 };
+      int[] a1 = add();
+      for (int i = 0; i < 10; i++) {
+        expectEquals(a1[i], e1[i]);
+      }
+    }
+
+    // Multiplication.
+    {
+      int[] e1 = { 7, 1, 2, 2, 1, 0, 2, 0, 0, 1 };
+      int[] a1 = multiply1();
+      for (int i = 0; i < 10; i++) {
+        expectEquals(a1[i], e1[i]);
+      }
+      int[] e2 = { 1001, 0, 0, 1, 0, 0, 1, 0, 0, 1 };
+      int[] a2 = multiply2();
+      for (int i = 0; i < 10; i++) {
+        expectEquals(a2[i], e2[i]);
+      }
+    }
+
+    // Dynamic BCE.
+    sResult = 0;
+    try {
+      linearDynamicBCE1(x, -1, x.length);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+    sResult = 0;
+    linearDynamicBCE1(x, 0, x.length);
+    expectEquals(55, sResult);
+    sResult = 0;
+    try {
+      linearDynamicBCE1(x, 0, x.length + 1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1055, sResult);
+
+    // Dynamic BCE with offset.
+    sResult = 0;
+    try {
+      linearDynamicBCE2(x, 0, x.length, -1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+    sResult = 0;
+    linearDynamicBCE2(x, 0, x.length, 0);
+    expectEquals(55, sResult);
+    sResult = 0;
+    try {
+      linearDynamicBCE2(x, 0, x.length, 1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1054, sResult);
+
+    // Dynamic BCE candidates.
+    expectEquals(55, wrapAroundDynamicBCE(x));
+    expectEquals(15, periodicDynamicBCE(x));
+    expectEquals(55, dynamicBCEPossiblyInfiniteLoop(x, 0, 9));
+    expectEquals(55, noDynamicBCEPossiblyInfiniteLoop(x, 0, 9));
+    expectEquals(55, noDynamicBCEMixedInductionTypes(x, 0, 10));
+    expectEquals(125, dynamicBCEConstantRange(x));
+
+    // Dynamic BCE combined with constant indices.
+    int[][] a;
+    a = new int[0][0];
+    expectEquals(-1, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    a = new int[100][10];
+    expectEquals(55, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    for (int i = 0; i < 10; i++) {
+      expectEquals((i % 10) != 0 ? 1 : 0, a[1][i]);
+      expectEquals((i % 10) != 0 ? 2 : 0, a[2][i]);
+      expectEquals((i % 10) != 0 ? 3 : 0, a[99][i]);
+    }
+    a = new int[3][10];
+    sResult = 0;
+    try {
+      expectEquals(55, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult = 1;
+    }
+    expectEquals(1, sResult);
+    expectEquals(a[1][1], 1);
+    expectEquals(a[2][1], 2);
+
+    // Dynamic BCE combined with constant indices of all types.
+    boolean[] x1 = { true };
+    byte[] x2 = { 2 };
+    char[] x3 = { 3 };
+    short[] x4 = { 4 };
+    int[] x5 = { 5 };
+    long[] x6 = { 6 };
+    float[] x7 = { 7 };
+    double[] x8 = { 8 };
+    expectEquals(415,
+        dynamicBCEAndConstantIndicesAllPrimTypes(x, x1, x2, x3, x4, x5, x6, x7, x8, 0, 10));
+    Integer[] x9 = { 9 };
+    expectEquals(145, dynamicBCEAndConstantIndexRefType(x, x9, 0, 10));
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/530-checker-loops3/expected.txt b/test/530-checker-loops3/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/530-checker-loops3/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/530-checker-loops3/info.txt b/test/530-checker-loops3/info.txt
new file mode 100644
index 0000000..07d99a3
--- /dev/null
+++ b/test/530-checker-loops3/info.txt
@@ -0,0 +1 @@
+Test on loop optimizations, in particular loop-based dynamic bce.
diff --git a/test/530-checker-loops3/src/Main.java b/test/530-checker-loops3/src/Main.java
new file mode 100644
index 0000000..6b5c657
--- /dev/null
+++ b/test/530-checker-loops3/src/Main.java
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on loop optimizations, in particular dynamic BCE. In all cases,
+// bounds check on a[] is resolved statically. Bounds checks on b[]
+// exercise various different scenarios. In all cases, loop-based
+// dynamic BCE is better than the dominator-based BCE, since it
+// generates the test outside the loop.
+//
+public class Main {
+
+  /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void oneConstantIndex(int[] a, int[] b) {
+    // Dynamic bce on b requires two deopts: one null and one bound.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[1];
+    }
+  }
+
+  /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void multipleConstantIndices(int[] a, int[] b) {
+    // Dynamic bce on b requires two deopts: one null and one bound.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[0] + b[1] + b[2];
+    }
+  }
+
+  /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void oneInvariantIndex(int[] a, int[] b, int c) {
+    // Dynamic bce on b requires two deopts: one null and one bound.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[c];
+    }
+  }
+
+  /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void multipleInvariantIndices(int[] a, int[] b, int c) {
+    // Dynamic bce on b requires three deopts: one null and two bounds.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[c-1] + b[c] + b[c+1];
+    }
+  }
+
+  /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void oneUnitStride(int[] a, int[] b) {
+    // Dynamic bce on b requires three deopts: one null and two bounds.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[i];
+    }
+  }
+
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void multipleUnitStrides(int[] a, int[] b) {
+    // Dynamic bce on b requires four deopts: one null and three bounds.
+    // One redundant deopt is removed by simplifier.
+    // TODO: range information could remove another
+    for (int i = 1; i < a.length - 1; i++) {
+      a[i] = b[i-1] + b[i] + b[i+1];
+    }
+  }
+
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void multipleUnitStridesConditional(int[] a, int[] b) {
+    // Dynamic bce on b requires four deopts: one null and three bounds.
+    // The two conditional references may be included, since they are in range.
+    // One redundant deopt is removed by simplifier.
+    for (int i = 2; i < a.length - 2; i++) {
+      int t = b[i-2] + b[i] + b[i+2] + (((i & 1) == 0) ? b[i+1] : b[i-1]);
+      a[i] = t;
+    }
+  }
+
+  /// CHECK-START: void Main.shifter(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.shifter(int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.shifter(int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.shifter(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void shifter(int[] x) {
+    // Real-life example: should have four deopts: one null and three bounds.
+    // Two redundant deopts are removed by simplifier.
+    for (int i = 16; i < 80; i++) {
+      int t = x[i - 3] ^ x[i - 8] ^ x[i - 14] ^ x[i - 16];
+      x[i] = t << 1 | t >>> 31;
+    }
+  }
+
+  /// CHECK-START: void Main.stencil(int[], int, int) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.stencil(int[], int, int) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.stencil(int[], int, int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void stencil(int[] array, int start, int end) {
+    // Real-life example: should have four deopts: one null and three bounds.
+    for (int i = end; i >= start; i--) {
+      array[i] = (array[i-2] + array[i-1] + array[i] + array[i+1] + array[i+2]) / 5;
+    }
+  }
+
+  //
+  // Verifier.
+  //
+
+  public static void main(String[] args) {
+    int[] a = new int[10];
+    int b[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int b1[] = { 100 };
+
+    oneConstantIndex(a, b);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(2, a[i]);;
+    }
+    try {
+      oneConstantIndex(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    multipleConstantIndices(a, b);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(6, a[i]);;
+    }
+    try {
+      multipleConstantIndices(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    oneInvariantIndex(a, b, 1);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(2, a[i]);;
+    }
+    try {
+      oneInvariantIndex(a, b1, 1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    multipleInvariantIndices(a, b, 1);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(6, a[i]);;
+    }
+    try {
+      multipleInvariantIndices(a, b1, 1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    oneUnitStride(a, b);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(i + 1, a[i]);;
+    }
+    try {
+      oneUnitStride(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+      expectEquals(100, a[0]);;
+    }
+
+    multipleUnitStrides(a, b);
+    for (int i = 1; i < a.length - 1; i++) {
+      expectEquals(3 * i + 3, a[i]);;
+    }
+    try {
+      multipleUnitStrides(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    multipleUnitStridesConditional(a, b);
+    for (int i = 2; i < a.length - 2; i++) {
+      int e = 3 * i + 3 + (((i & 1) == 0) ? i + 2 : i);
+      expectEquals(e, a[i]);;
+    }
+    try {
+      multipleUnitStridesConditional(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index c766aaa..89875d7 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -22,9 +22,12 @@
     return radius * radius * Math.PI;
   }
   private double radius;
-};
+}
 
 class TestClass {
+  static {
+    sTestClassObj = new TestClass(-1, -2);
+  }
   TestClass() {
   }
   TestClass(int i, int j) {
@@ -35,17 +38,41 @@
   int j;
   volatile int k;
   TestClass next;
+  String str;
   static int si;
-};
+  static TestClass sTestClassObj;
+}
 
 class SubTestClass extends TestClass {
   int k;
-};
+}
 
 class TestClass2 {
   int i;
   int j;
-};
+}
+
+class TestClass3 {
+  float floatField = 8.0f;
+  boolean test1 = true;
+}
+
+class Finalizable {
+  static boolean sVisited = false;
+  static final int VALUE = 0xbeef;
+  int i;
+
+  protected void finalize() {
+    if (i != VALUE) {
+      System.out.println("Where is the beef?");
+    }
+    sVisited = true;
+  }
+}
+
+interface Filter {
+  public boolean isValid(int i);
+}
 
 public class Main {
 
@@ -55,8 +82,8 @@
   /// CHECK: InstanceFieldGet
 
   /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (after)
-  /// CHECK: NewInstance
-  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
 
   static double calcCircleArea(double radius) {
@@ -101,33 +128,39 @@
   }
 
   /// CHECK-START: int Main.test3(TestClass) load_store_elimination (before)
-  /// CHECK: InstanceFieldSet
-  /// CHECK: InstanceFieldGet
-  /// CHECK: InstanceFieldSet
+  /// CHECK: StaticFieldGet
   /// CHECK: NewInstance
   /// CHECK: InstanceFieldSet
   /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
   /// CHECK: InstanceFieldGet
   /// CHECK: InstanceFieldGet
   /// CHECK: InstanceFieldGet
   /// CHECK: InstanceFieldGet
 
   /// CHECK-START: int Main.test3(TestClass) load_store_elimination (after)
-  /// CHECK: InstanceFieldSet
-  /// CHECK: InstanceFieldGet
-  /// CHECK: InstanceFieldSet
+  /// CHECK: StaticFieldGet
   /// CHECK: NewInstance
   /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
+  /// CHECK-NOT: StaticFieldGet
 
-  // A new allocation shouldn't alias with pre-existing values.
+  // A new allocation (even non-singleton) shouldn't alias with pre-existing values.
   static int test3(TestClass obj) {
+    TestClass obj1 = TestClass.sTestClassObj;
+    TestClass obj2 = new TestClass();  // Cannot alias with obj or obj1 which pre-exist.
+    obj.next = obj2;  // Make obj2 a non-singleton.
+    // All stores below need to stay since obj/obj1/obj2 are not singletons.
     obj.i = 1;
-    obj.next.j = 2;
-    TestClass obj2 = new TestClass();
+    obj1.j = 2;
+    // Following stores won't kill values of obj.i and obj1.j.
     obj2.i = 3;
     obj2.j = 4;
-    return obj.i + obj.next.j + obj2.i + obj2.j;
+    return obj.i + obj1.j + obj2.i + obj2.j;
   }
 
   /// CHECK-START: int Main.test4(TestClass, boolean) load_store_elimination (before)
@@ -222,8 +255,8 @@
   /// CHECK: InstanceFieldGet
 
   /// CHECK-START: int Main.test8() load_store_elimination (after)
-  /// CHECK: NewInstance
-  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
   /// CHECK: InvokeVirtual
   /// CHECK-NOT: NullCheck
   /// CHECK-NOT: InstanceFieldGet
@@ -380,9 +413,9 @@
   /// CHECK: InstanceFieldGet
 
   /// CHECK-START: int Main.test16() load_store_elimination (after)
-  /// CHECK: NewInstance
-  /// CHECK-NOT: StaticFieldSet
-  /// CHECK-NOT: StaticFieldGet
+  /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
 
   // Test inlined constructor.
   static int test16() {
@@ -397,9 +430,9 @@
 
   /// CHECK-START: int Main.test17() load_store_elimination (after)
   /// CHECK: <<Const0:i\d+>> IntConstant 0
-  /// CHECK: NewInstance
-  /// CHECK-NOT: StaticFieldSet
-  /// CHECK-NOT: StaticFieldGet
+  /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
   /// CHECK: Return [<<Const0>>]
 
   // Test getting default value.
@@ -424,16 +457,14 @@
   }
 
   /// CHECK-START: float Main.test19(float[], float[]) load_store_elimination (before)
-  /// CHECK: <<IntTypeValue:i\d+>> ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: <<FloatTypeValue:f\d+>> ArrayGet
+  /// CHECK:     {{f\d+}} ArrayGet
+  /// CHECK:     {{f\d+}} ArrayGet
 
   /// CHECK-START: float Main.test19(float[], float[]) load_store_elimination (after)
-  /// CHECK: <<IntTypeValue:i\d+>> ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: <<FloatTypeValue:f\d+>> ArrayGet
+  /// CHECK:     {{f\d+}} ArrayGet
+  /// CHECK-NOT: {{f\d+}} ArrayGet
 
-  // I/F, J/D aliasing should keep the load/store.
+  // I/F, J/D aliasing should not happen any more and LSE should eliminate the load.
   static float test19(float[] fa1, float[] fa2) {
     fa1[0] = fa2[0];
     return fa1[0];
@@ -455,19 +486,250 @@
     return obj;
   }
 
-  public static void assertIntEquals(int expected, int result) {
+  /// CHECK-START: void Main.test21(TestClass) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: void Main.test21(TestClass) load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+
+  // Loop side effects can kill heap values, stores need to be kept in that case.
+  static void test21(TestClass obj0) {
+    TestClass obj = new TestClass();
+    obj0.str = "abc";
+    obj.str = "abc";
+    for (int i = 0; i < 2; i++) {
+      // Generate some loop side effect that writes into obj.
+      obj.str = "def";
+    }
+    System.out.print(obj0.str.substring(0, 0) + obj.str.substring(0, 0));
+  }
+
+  /// CHECK-START: int Main.test22() load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test22() load_store_elimination (after)
+  /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
+  /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
+  /// CHECK-NOT: InstanceFieldGet
+
+  // For a singleton, loop side effects can kill its field values only if:
+  // (1) it dominiates the loop header, and
+  // (2) its fields are stored into inside a loop.
+  static int test22() {
+    int sum = 0;
+    TestClass obj1 = new TestClass();
+    obj1.i = 2;    // This store can be eliminated since obj1 is never stored into inside a loop.
+    for (int i = 0; i < 2; i++) {
+      TestClass obj2 = new TestClass();
+      obj2.i = 3;  // This store can be eliminated since the singleton is inside the loop.
+      sum += obj2.i;
+    }
+    TestClass obj3 = new TestClass();
+    obj3.i = 5;    // This store can be eliminated since the singleton is created after the loop.
+    sum += obj1.i + obj3.i;
+    return sum;
+  }
+
+  /// CHECK-START: int Main.test23(boolean) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: Return
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+
+  /// CHECK-START: int Main.test23(boolean) load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: Return
+  /// CHECK-NOT: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+
+  // Test store elimination on merging.
+  static int test23(boolean b) {
+    TestClass obj = new TestClass();
+    obj.i = 3;      // This store can be eliminated since the value flows into each branch.
+    if (b) {
+      obj.i += 1;   // This store cannot be eliminated due to the merge later.
+    } else {
+      obj.i += 2;   // This store cannot be eliminated due to the merge later.
+    }
+    return obj.i;
+  }
+
+  /// CHECK-START: float Main.test24() load_store_elimination (before)
+  /// CHECK-DAG:     <<True:i\d+>>     IntConstant 1
+  /// CHECK-DAG:     <<Float8:f\d+>>   FloatConstant 8
+  /// CHECK-DAG:     <<Float42:f\d+>>  FloatConstant 42
+  /// CHECK-DAG:     <<Obj:l\d+>>      NewInstance
+  /// CHECK-DAG:                       InstanceFieldSet [<<Obj>>,<<True>>]
+  /// CHECK-DAG:                       InstanceFieldSet [<<Obj>>,<<Float8>>]
+  /// CHECK-DAG:     <<GetTest:z\d+>>  InstanceFieldGet [<<Obj>>]
+  /// CHECK-DAG:     <<GetField:f\d+>> InstanceFieldGet [<<Obj>>]
+  /// CHECK-DAG:     <<Select:f\d+>>   Select [<<Float42>>,<<GetField>>,<<GetTest>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
+
+  /// CHECK-START: float Main.test24() load_store_elimination (after)
+  /// CHECK-DAG:     <<True:i\d+>>     IntConstant 1
+  /// CHECK-DAG:     <<Float8:f\d+>>   FloatConstant 8
+  /// CHECK-DAG:     <<Float42:f\d+>>  FloatConstant 42
+  /// CHECK-DAG:     <<Select:f\d+>>   Select [<<Float42>>,<<Float8>>,<<True>>]
+  /// CHECK-DAG:                       Return [<<Select>>]
+
+  static float test24() {
+    float a = 42.0f;
+    TestClass3 obj = new TestClass3();
+    if (obj.test1) {
+      a = obj.floatField;
+    }
+    return a;
+  }
+
+  /// CHECK-START: void Main.testFinalizable() load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+
+  /// CHECK-START: void Main.testFinalizable() load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+
+  // Allocations and stores into finalizable objects cannot be eliminated.
+  static void testFinalizable() {
+    Finalizable finalizable = new Finalizable();
+    finalizable.i = Finalizable.VALUE;
+  }
+
+  static java.lang.ref.WeakReference<Object> getWeakReference() {
+    return new java.lang.ref.WeakReference<>(new Object());
+  }
+
+  static void testFinalizableByForcingGc() {
+    testFinalizable();
+    java.lang.ref.WeakReference<Object> reference = getWeakReference();
+
+    Runtime runtime = Runtime.getRuntime();
+    for (int i = 0; i < 20; ++i) {
+      runtime.gc();
+      System.runFinalization();
+      try {
+        Thread.sleep(1);
+      } catch (InterruptedException e) {
+        throw new AssertionError(e);
+      }
+
+      // Check to see if the weak reference has been garbage collected.
+      if (reference.get() == null) {
+        // A little bit more sleep time to make sure.
+        try {
+          Thread.sleep(100);
+        } catch (InterruptedException e) {
+          throw new AssertionError(e);
+        }
+        if (!Finalizable.sVisited) {
+          System.out.println("finalize() not called.");
+        }
+        return;
+      }
+    }
+    System.out.println("testFinalizableByForcingGc() failed to force gc.");
+  }
+
+  /// CHECK-START: int Main.$noinline$testHSelect(boolean) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: Select
+
+  /// CHECK-START: int Main.$noinline$testHSelect(boolean) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: Select
+
+  // Test that HSelect creates alias.
+  static int $noinline$testHSelect(boolean b) {
+    if (sFlag) {
+      throw new Error();
+    }
+    TestClass obj = new TestClass();
+    TestClass obj2 = null;
+    obj.i = 0xdead;
+    if (b) {
+      obj2 = obj;
+    }
+    return obj2.i;
+  }
+
+  static int sumWithFilter(int[] array, Filter f) {
+    int sum = 0;
+    for (int i = 0; i < array.length; i++) {
+      if (f.isValid(array[i])) {
+        sum += array[i];
+      }
+    }
+    return sum;
+  }
+
+  /// CHECK-START: int Main.sumWithinRange(int[], int, int) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.sumWithinRange(int[], int, int) load_store_elimination (after)
+  /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
+
+  // A lambda-style allocation can be eliminated after inlining.
+  static int sumWithinRange(int[] array, final int low, final int high) {
+    Filter filter = new Filter() {
+      public boolean isValid(int i) {
+        return (i >= low) && (i <= high);
+      }
+    };
+    return sumWithFilter(array, filter);
+  }
+
+  static void assertIntEquals(int result, int expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
   }
 
-  public static void assertFloatEquals(float expected, float result) {
+  static void assertFloatEquals(float result, float expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
   }
 
-  public static void assertDoubleEquals(double expected, double result) {
+  static void assertDoubleEquals(double result, double expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
@@ -508,5 +770,16 @@
     float[] fa2 = { 1.8f };
     assertFloatEquals(test19(fa1, fa2), 1.8f);
     assertFloatEquals(test20().i, 0);
+    test21(new TestClass());
+    assertIntEquals(test22(), 13);
+    assertIntEquals(test23(true), 4);
+    assertIntEquals(test23(false), 5);
+    assertFloatEquals(test24(), 8.0f);
+    testFinalizableByForcingGc();
+    assertIntEquals($noinline$testHSelect(true), 0xdead);
+    int[] array = {2, 5, 9, -1, -3, 10, 8, 4};
+    assertIntEquals(sumWithinRange(array, 1, 5), 11);
   }
+
+  static boolean sFlag;
 }
diff --git a/test/530-checker-regression-reftype-final/smali/TestCase.smali b/test/530-checker-regression-reftype-final/smali/TestCase.smali
index 8fd7bb7..44facfc 100644
--- a/test/530-checker-regression-reftype-final/smali/TestCase.smali
+++ b/test/530-checker-regression-reftype-final/smali/TestCase.smali
@@ -23,7 +23,7 @@
 # inline any methods from array classes, this bug cannot be triggered and we
 # verify it using Checker.
 
-## CHECK-START: void TestCase.testInliner() reference_type_propagation_after_inlining (before)
+## CHECK-START: void TestCase.testInliner() inliner (after)
 ## CHECK-DAG:             CheckCast [<<Phi:l\d+>>,{{l\d+}}]
 ## CHECK-DAG:    <<Phi>>  Phi klass:java.lang.Object[] exact:false
 
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
index 1b784ae..26475ae 100644
--- a/test/536-checker-intrinsic-optimization/src/Main.java
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -16,9 +16,270 @@
 
 
 public class Main {
+  public static boolean doThrow = false;
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertBooleanEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertCharEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertStringContains(String searchTerm, String result) {
+    if (result == null || !result.contains(searchTerm)) {
+      throw new Error("Search term: " + searchTerm + ", not found in: " + result);
+    }
+  }
+
   public static void main(String[] args) {
     stringEqualsSame();
     stringArgumentNotNull("Foo");
+
+    assertIntEquals(0, $opt$noinline$getStringLength(""));
+    assertIntEquals(3, $opt$noinline$getStringLength("abc"));
+    assertIntEquals(10, $opt$noinline$getStringLength("0123456789"));
+
+    assertBooleanEquals(true, $opt$noinline$isStringEmpty(""));
+    assertBooleanEquals(false, $opt$noinline$isStringEmpty("abc"));
+    assertBooleanEquals(false, $opt$noinline$isStringEmpty("0123456789"));
+
+    assertCharEquals('a', $opt$noinline$stringCharAt("a", 0));
+    assertCharEquals('a', $opt$noinline$stringCharAt("abc", 0));
+    assertCharEquals('b', $opt$noinline$stringCharAt("abc", 1));
+    assertCharEquals('c', $opt$noinline$stringCharAt("abc", 2));
+    assertCharEquals('7', $opt$noinline$stringCharAt("0123456789", 7));
+
+    try {
+      $opt$noinline$stringCharAt("abc", -1);
+      throw new Error("Should throw SIOOB.");
+    } catch (StringIndexOutOfBoundsException sioob) {
+      assertStringContains("java.lang.String.charAt", sioob.getStackTrace()[0].toString());
+      assertStringContains("Main.$opt$noinline$stringCharAt", sioob.getStackTrace()[1].toString());
+    }
+    try {
+      $opt$noinline$stringCharAt("abc", 3);
+      throw new Error("Should throw SIOOB.");
+    } catch (StringIndexOutOfBoundsException sioob) {
+      assertStringContains("java.lang.String.charAt", sioob.getStackTrace()[0].toString());
+      assertStringContains("Main.$opt$noinline$stringCharAt", sioob.getStackTrace()[1].toString());
+    }
+    try {
+      $opt$noinline$stringCharAt("abc", Integer.MAX_VALUE);
+      throw new Error("Should throw SIOOB.");
+    } catch (StringIndexOutOfBoundsException sioob) {
+      assertStringContains("java.lang.String.charAt", sioob.getStackTrace()[0].toString());
+      assertStringContains("Main.$opt$noinline$stringCharAt", sioob.getStackTrace()[1].toString());
+    }
+
+    assertCharEquals('7', $opt$noinline$stringCharAtCatch("0123456789", 7));
+    assertCharEquals('\0', $opt$noinline$stringCharAtCatch("0123456789", 10));
+
+    assertIntEquals('a' + 'b' + 'c', $opt$noinline$stringSumChars("abc"));
+    assertIntEquals('a' + 'b' + 'c', $opt$noinline$stringSumLeadingChars("abcdef", 3));
+    try {
+      $opt$noinline$stringSumLeadingChars("abcdef", 7);
+      throw new Error("Should throw SIOOB.");
+    } catch (StringIndexOutOfBoundsException sioob) {
+      assertStringContains("java.lang.String.charAt", sioob.getStackTrace()[0].toString());
+      assertStringContains("Main.$opt$noinline$stringSumLeadingChars",
+                           sioob.getStackTrace()[1].toString());
+    }
+    assertIntEquals('a' + 'b' + 'c' + 'd', $opt$noinline$stringSum4LeadingChars("abcdef"));
+    try {
+      $opt$noinline$stringSum4LeadingChars("abc");
+      throw new Error("Should throw SIOOB.");
+    } catch (StringIndexOutOfBoundsException sioob) {
+      assertStringContains("java.lang.String.charAt", sioob.getStackTrace()[0].toString());
+      assertStringContains("Main.$opt$noinline$stringSum4LeadingChars",
+                           sioob.getStackTrace()[1].toString());
+    }
+  }
+
+  /// CHECK-START: int Main.$opt$noinline$getStringLength(java.lang.String) instruction_simplifier (before)
+  /// CHECK-DAG:  <<Length:i\d+>>   InvokeVirtual intrinsic:StringLength
+  /// CHECK-DAG:                    Return [<<Length>>]
+
+  /// CHECK-START: int Main.$opt$noinline$getStringLength(java.lang.String) instruction_simplifier (after)
+  /// CHECK-DAG:  <<String:l\d+>>   ParameterValue
+  /// CHECK-DAG:  <<NullCk:l\d+>>   NullCheck [<<String>>]
+  /// CHECK-DAG:  <<Length:i\d+>>   ArrayLength [<<NullCk>>] is_string_length:true
+  /// CHECK-DAG:                    Return [<<Length>>]
+
+  /// CHECK-START: int Main.$opt$noinline$getStringLength(java.lang.String) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringLength
+
+  static public int $opt$noinline$getStringLength(String s) {
+    if (doThrow) { throw new Error(); }
+    return s.length();
+  }
+
+  /// CHECK-START: boolean Main.$opt$noinline$isStringEmpty(java.lang.String) instruction_simplifier (before)
+  /// CHECK-DAG:  <<IsEmpty:z\d+>>  InvokeVirtual intrinsic:StringIsEmpty
+  /// CHECK-DAG:                    Return [<<IsEmpty>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$isStringEmpty(java.lang.String) instruction_simplifier (after)
+  /// CHECK-DAG:  <<String:l\d+>>   ParameterValue
+  /// CHECK-DAG:  <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:  <<NullCk:l\d+>>   NullCheck [<<String>>]
+  /// CHECK-DAG:  <<Length:i\d+>>   ArrayLength [<<NullCk>>] is_string_length:true
+  /// CHECK-DAG:  <<IsEmpty:z\d+>>  Equal [<<Length>>,<<Const0>>]
+  /// CHECK-DAG:                    Return [<<IsEmpty>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$isStringEmpty(java.lang.String) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringIsEmpty
+
+  static public boolean $opt$noinline$isStringEmpty(String s) {
+    if (doThrow) { throw new Error(); }
+    return s.isEmpty();
+  }
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAt(java.lang.String, int) instruction_simplifier (before)
+  /// CHECK-DAG:  <<Char:c\d+>>     InvokeVirtual intrinsic:StringCharAt
+  /// CHECK-DAG:                    Return [<<Char>>]
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAt(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-DAG:  <<String:l\d+>>   ParameterValue
+  /// CHECK-DAG:  <<Pos:i\d+>>      ParameterValue
+  /// CHECK-DAG:  <<NullCk:l\d+>>   NullCheck [<<String>>]
+  /// CHECK-DAG:  <<Length:i\d+>>   ArrayLength [<<NullCk>>] is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck [<<Pos>>,<<Length>>] is_string_char_at:true
+  /// CHECK-DAG:  <<Char:c\d+>>     ArrayGet [<<NullCk>>,<<Pos>>] is_string_char_at:true
+  /// CHECK-DAG:                    Return [<<Char>>]
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAt(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringCharAt
+
+  static public char $opt$noinline$stringCharAt(String s, int pos) {
+    if (doThrow) { throw new Error(); }
+    return s.charAt(pos);
+  }
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAtCatch(java.lang.String, int) instruction_simplifier (before)
+  /// CHECK-DAG:  <<Char:c\d+>>     InvokeVirtual intrinsic:StringCharAt
+  /// CHECK-DAG:                    Return [<<Char>>]
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAtCatch(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-DAG:  <<String:l\d+>>   ParameterValue
+  /// CHECK-DAG:  <<Pos:i\d+>>      ParameterValue
+  /// CHECK-DAG:  <<NullCk:l\d+>>   NullCheck [<<String>>]
+  /// CHECK-DAG:  <<Length:i\d+>>   ArrayLength [<<NullCk>>] is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck [<<Pos>>,<<Length>>] is_string_char_at:true
+  /// CHECK-DAG:  <<Char:c\d+>>     ArrayGet [<<NullCk>>,<<Pos>>] is_string_char_at:true
+  /// CHECK-DAG:                    Return [<<Char>>]
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAtCatch(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringCharAt
+
+  static public char $opt$noinline$stringCharAtCatch(String s, int pos) {
+    if (doThrow) { throw new Error(); }
+    try {
+      return s.charAt(pos);
+    } catch (StringIndexOutOfBoundsException ignored) {
+      return '\0';
+    }
+  }
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumChars(java.lang.String) instruction_simplifier (before)
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringLength
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumChars(java.lang.String) instruction_simplifier (after)
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumChars(java.lang.String) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringLength
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumChars(java.lang.String) GVN (after)
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-NOT:                    ArrayLength is_string_length:true
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumChars(java.lang.String) BCE (after)
+  /// CHECK-NOT:                    BoundsCheck
+
+  static public int $opt$noinline$stringSumChars(String s) {
+    if (doThrow) { throw new Error(); }
+    int sum = 0;
+    int len = s.length();
+    for (int i = 0; i < len; ++i) {
+      sum += s.charAt(i);
+    }
+    return sum;
+  }
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumLeadingChars(java.lang.String, int) instruction_simplifier (before)
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumLeadingChars(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumLeadingChars(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumLeadingChars(java.lang.String, int) BCE (after)
+  /// CHECK-DAG:                    Deoptimize env:[[{{[^\]]*}}]]
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumLeadingChars(java.lang.String, int) BCE (after)
+  /// CHECK-NOT:                    BoundsCheck is_string_char_at:true
+
+  static public int $opt$noinline$stringSumLeadingChars(String s, int n) {
+    if (doThrow) { throw new Error(); }
+    int sum = 0;
+    for (int i = 0; i < n; ++i) {
+      sum += s.charAt(i);
+    }
+    return sum;
+  }
+
+  /// CHECK-START: int Main.$opt$noinline$stringSum4LeadingChars(java.lang.String) instruction_simplifier (before)
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSum4LeadingChars(java.lang.String) instruction_simplifier (after)
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+
+  /// CHECK-START: int Main.$opt$noinline$stringSum4LeadingChars(java.lang.String) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSum4LeadingChars(java.lang.String) BCE (after)
+  /// CHECK-DAG:                    Deoptimize env:[[{{[^\]]*}}]]
+
+  /// CHECK-START: int Main.$opt$noinline$stringSum4LeadingChars(java.lang.String) BCE (after)
+  /// CHECK-NOT:                    BoundsCheck is_string_char_at:true
+
+  static public int $opt$noinline$stringSum4LeadingChars(String s) {
+    if (doThrow) { throw new Error(); }
+    int sum = s.charAt(0) + s.charAt(1) + s.charAt(2) + s.charAt(3);
+    return sum;
   }
 
   /// CHECK-START: boolean Main.stringEqualsSame() instruction_simplifier (before)
@@ -35,7 +296,7 @@
   }
 
   /// CHECK-START: boolean Main.stringEqualsNull() register (after)
-  /// CHECK:      <<Invoke:z\d+>> InvokeStaticOrDirect
+  /// CHECK:      <<Invoke:z\d+>> InvokeVirtual
   /// CHECK:      Return [<<Invoke>>]
   public static boolean stringEqualsNull() {
     String o = (String)myObject;
@@ -47,8 +308,28 @@
   }
 
   /// CHECK-START-X86: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
-  /// CHECK:          InvokeStaticOrDirect
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
   /// CHECK-NOT:      test
+
+  /// CHECK-START-X86_64: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
+  /// CHECK-NOT:      test
+
+  /// CHECK-START-ARM: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
+  // CompareAndBranchIfZero() may emit either CBZ or CMP+BEQ.
+  /// CHECK-NOT:      cbz
+  /// CHECK-NOT:      cmp {{r\d+}}, #0
+  // Terminate the scope for the CHECK-NOT search at the reference or length comparison,
+  // whichever comes first.
+  /// CHECK:          cmp {{r\d+}}, {{r\d+}}
+
+  /// CHECK-START-ARM64: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
+  /// CHECK-NOT:      cbz
+  // Terminate the scope for the CHECK-NOT search at the reference or length comparison,
+  // whichever comes first.
+  /// CHECK:          cmp {{w.*,}} {{w.*}}
   public static boolean stringArgumentNotNull(Object obj) {
     obj.getClass();
     return "foo".equals(obj);
@@ -56,12 +337,53 @@
 
   // Test is very brittle as it depends on the order we emit instructions.
   /// CHECK-START-X86: boolean Main.stringArgumentIsString() disassembly (after)
-  /// CHECK:      InvokeStaticOrDirect
-  /// CHECK:      test
-  /// CHECK:      jz/eq
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          test
+  /// CHECK:          jz/eq
   // Check that we don't try to compare the classes.
-  /// CHECK-NOT:  mov
-  /// CHECK:      cmp
+  /// CHECK-NOT:      mov
+  /// CHECK:          cmp
+
+  // Test is very brittle as it depends on the order we emit instructions.
+  /// CHECK-START-X86_64: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          test
+  /// CHECK:          jz/eq
+  // Check that we don't try to compare the classes.
+  /// CHECK-NOT:      mov
+  /// CHECK:          cmp
+
+  // Test is brittle as it depends on the class offset being 0.
+  /// CHECK-START-ARM: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          {{cbz|cmp}}
+  // Check that we don't try to compare the classes.
+  // The dissassembler currently explicitly emits the offset 0 but don't rely on it.
+  // We want to terminate the CHECK-NOT search after two CMPs, one for reference
+  // equality and one for length comparison but these may be emitted in different order,
+  // so repeat the check twice.
+  /// CHECK-NOT:      ldr{{(|.w)}} {{r\d+}}, [{{r\d+}}]
+  /// CHECK-NOT:      ldr{{(|.w)}} {{r\d+}}, [{{r\d+}}, #0]
+  /// CHECK:          cmp {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:      ldr{{(|.w)}} {{r\d+}}, [{{r\d+}}]
+  /// CHECK-NOT:      ldr{{(|.w)}} {{r\d+}}, [{{r\d+}}, #0]
+  /// CHECK:          cmp {{r\d+}}, {{r\d+}}
+
+  // Test is brittle as it depends on the class offset being 0.
+  /// CHECK-START-ARM64: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          cbz
+  // Check that we don't try to compare the classes.
+  // The dissassembler currently does not explicitly emits the offset 0 but don't rely on it.
+  // We want to terminate the CHECK-NOT search after two CMPs, one for reference
+  // equality and one for length comparison but these may be emitted in different order,
+  // so repeat the check twice.
+  /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}]
+  /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}, #0]
+  /// CHECK:          cmp {{w\d+}}, {{w\d+}}
+  /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}]
+  /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}, #0]
+  /// CHECK:          cmp {{w\d+}}, {{w\d+}}
   public static boolean stringArgumentIsString() {
     return "foo".equals(myString);
   }
diff --git a/test/537-checker-arraycopy/src/Main.java b/test/537-checker-arraycopy/src/Main.java
index 30ccc56..7c124ca 100644
--- a/test/537-checker-arraycopy/src/Main.java
+++ b/test/537-checker-arraycopy/src/Main.java
@@ -50,7 +50,7 @@
   }
 
   /// CHECK-START-X86_64: void Main.arraycopy() disassembly (after)
-  /// CHECK:          InvokeStaticOrDirect
+  /// CHECK:          InvokeStaticOrDirect intrinsic:SystemArrayCopy
   /// CHECK-NOT:      test
   /// CHECK-NOT:      call
   /// CHECK:          ReturnVoid
@@ -65,7 +65,36 @@
     System.arraycopy(obj, 1, obj, 0, 1);
   }
 
+  // Test case for having enough registers on x86 for the arraycopy intrinsic.
+  /// CHECK-START-X86: void Main.arraycopy(java.lang.Object[], int) disassembly (after)
+  /// CHECK:          InvokeStaticOrDirect intrinsic:SystemArrayCopy
+  /// CHECK-NOT:      mov {{[a-z]+}}, [esp + {{[0-9]+}}]
+  /// CHECK:          ReturnVoid
   public static void arraycopy(Object[] obj, int pos) {
     System.arraycopy(obj, pos, obj, 0, obj.length);
   }
+
+  // Test case for having enough registers on x86 for the arraycopy intrinsic
+  // when an input is passed twice.
+  /// CHECK-START-X86: int Main.arraycopy2(java.lang.Object[], int) disassembly (after)
+  /// CHECK:          InvokeStaticOrDirect intrinsic:SystemArrayCopy
+  /// CHECK-NOT:      mov {{[a-z]+}}, [esp + {{[0-9]+}}]
+  /// CHECK:          Return
+  public static int arraycopy2(Object[] obj, int pos) {
+    System.arraycopy(obj, pos, obj, pos - 1, obj.length);
+    return pos;
+  }
+
+  // Test case for not having enough registers on x86. The arraycopy intrinsic
+  // will ask for length to be in stack and load it.
+  /// CHECK-START-X86: int Main.arraycopy3(java.lang.Object[], java.lang.Object[], int, int, int) disassembly (after)
+  /// CHECK:          InvokeStaticOrDirect intrinsic:SystemArrayCopy
+  /// CHECK:          mov {{[a-z]+}}, [esp + {{[0-9]+}}]
+  /// CHECK:          Return
+  public static int arraycopy3(Object[] obj1, Object[] obj2, int input1, int input3, int input4) {
+    System.arraycopy(obj1, input1, obj2, input3, input4);
+    System.out.println(obj1);
+    System.out.println(obj2);
+    return input1 + input3 + input4;
+  }
 }
diff --git a/test/537-checker-debuggable/smali/TestCase.smali b/test/537-checker-debuggable/smali/TestCase.smali
index 8e6c7ef..5714d3a 100644
--- a/test/537-checker-debuggable/smali/TestCase.smali
+++ b/test/537-checker-debuggable/smali/TestCase.smali
@@ -20,10 +20,10 @@
 # be eliminated in normal mode but kept live in debuggable mode. Test that
 # Checker runs the correct test for each compilation mode.
 
-## CHECK-START: int TestCase.deadPhi(int, int, int) ssa_builder (after)
+## CHECK-START: int TestCase.deadPhi(int, int, int) builder (after)
 ## CHECK-NOT:         Phi
 
-## CHECK-START-DEBUGGABLE: int TestCase.deadPhi(int, int, int) ssa_builder (after)
+## CHECK-START-DEBUGGABLE: int TestCase.deadPhi(int, int, int) builder (after)
 ## CHECK:             Phi
 
 .method public static deadPhi(III)I
diff --git a/test/537-checker-inline-and-unverified/src/Main.java b/test/537-checker-inline-and-unverified/src/Main.java
index bdc14b0..b9d5fc9 100644
--- a/test/537-checker-inline-and-unverified/src/Main.java
+++ b/test/537-checker-inline-and-unverified/src/Main.java
@@ -45,12 +45,14 @@
     }
 
     public static boolean $opt$noinline$testNoInline() {
+        boolean result = true;
         try {
-            return null instanceof InaccessibleClass;
+            result = (null instanceof InaccessibleClass);
+            throw new Error("Unreachable");
         } catch (IllegalAccessError e) {
             // expected
         }
-        return false;
+        return result;
     }
 
     public static boolean $opt$inline$testInline() {
diff --git a/test/530-checker-loops/expected.txt b/test/537-checker-jump-over-jump/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/537-checker-jump-over-jump/expected.txt
diff --git a/test/537-checker-jump-over-jump/info.txt b/test/537-checker-jump-over-jump/info.txt
new file mode 100644
index 0000000..aeb30bb
--- /dev/null
+++ b/test/537-checker-jump-over-jump/info.txt
@@ -0,0 +1 @@
+Test for X86-64 elimination of jump over jump.
diff --git a/test/537-checker-jump-over-jump/src/Main.java b/test/537-checker-jump-over-jump/src/Main.java
new file mode 100644
index 0000000..7a58e8b
--- /dev/null
+++ b/test/537-checker-jump-over-jump/src/Main.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public class Main {
+  public static int FIBCOUNT = 64;
+  public static int[] fibs;
+
+  /// CHECK-START-X86_64: int Main.test() disassembly (after)
+  /// CHECK-DAG:   <<Zero:i\d+>>        IntConstant 0
+  //
+  /// CHECK:                            If
+  /// CHECK-NEXT:                       cmp
+  /// CHECK-NEXT:                       jle/ng
+  //
+  /// CHECK-DAG:   <<Fibs:l\d+>>        StaticFieldGet
+  /// CHECK-DAG:                        NullCheck [<<Fibs>>]
+  /// CHECK-NOT:                        jmp
+  /// CHECK-DAG:   <<FibsAtZero:i\d+>>  ArrayGet [<<Fibs>>,<<Zero>>]
+  /// CHECK-DAG:                        Return [<<FibsAtZero>>]
+  //
+  // Checks that there is no conditional jump over a `jmp`
+  // instruction. The `ArrayGet` instruction is in the next block.
+  //
+  // Note that the `StaticFieldGet` HIR instruction above (captured as
+  // `Fibs`) can produce a `jmp` x86-64 instruction when read barriers
+  // are enabled (to jump into the read barrier slow path), which is
+  // different from the `jmp` in the `CHECK-NOT` assertion.
+  public static int test() {
+    for (int i = 1; ; i++) {
+      if (i >= FIBCOUNT) {
+        return fibs[0];
+      }
+      fibs[i] = (i + fibs[(i - 1)]);
+    }
+  }
+
+  public static void main(String[] args) {
+    fibs = new int[FIBCOUNT];
+    fibs[0] = 1;
+    test();
+  }
+}
diff --git a/test/538-checker-embed-constants/src/Main.java b/test/538-checker-embed-constants/src/Main.java
index d8618e3..f6713a2 100644
--- a/test/538-checker-embed-constants/src/Main.java
+++ b/test/538-checker-embed-constants/src/Main.java
@@ -260,6 +260,368 @@
     return arg ^ 0xf00000000000000fL;
   }
 
+  /// CHECK-START-ARM: long Main.shl1(long) disassembly (after)
+  /// CHECK:                lsls{{(\.w)?}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK:                adc{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  /// CHECK-START-ARM: long Main.shl1(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  /// CHECK-START-X86: long Main.shl1(long) disassembly (after)
+  /// CHECK:                add
+  /// CHECK:                adc
+
+  /// CHECK-START-X86: long Main.shl1(long) disassembly (after)
+  /// CHECK-NOT:            shl
+
+  public static long shl1(long arg) {
+    return arg << 1;
+  }
+
+  /// CHECK-START-ARM: long Main.shl2(long) disassembly (after)
+  /// CHECK:                lsl{{s?|\.w}} <<oh:r\d+>>, {{r\d+}}, #2
+  /// CHECK:                orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #30
+  /// CHECK:                lsl{{s?|\.w}} {{r\d+}}, <<low>>, #2
+
+  /// CHECK-START-ARM: long Main.shl2(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shl2(long arg) {
+    return arg << 2;
+  }
+
+  /// CHECK-START-ARM: long Main.shl31(long) disassembly (after)
+  /// CHECK:                lsl{{s?|\.w}} <<oh:r\d+>>, {{r\d+}}, #31
+  /// CHECK:                orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #1
+  /// CHECK:                lsl{{s?|\.w}} {{r\d+}}, <<low>>, #31
+
+  /// CHECK-START-ARM: long Main.shl31(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shl31(long arg) {
+    return arg << 31;
+  }
+
+  /// CHECK-START-ARM: long Main.shl32(long) disassembly (after)
+  /// CHECK-DAG:            mov {{r\d+}}, {{r\d+}}
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.shl32(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|\.w}}
+
+  public static long shl32(long arg) {
+    return arg << 32;
+  }
+
+  /// CHECK-START-ARM: long Main.shl33(long) disassembly (after)
+  /// CHECK-DAG:            lsl{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #1
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.shl33(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shl33(long arg) {
+    return arg << 33;
+  }
+
+  /// CHECK-START-ARM: long Main.shl63(long) disassembly (after)
+  /// CHECK-DAG:            lsl{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.shl63(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shl63(long arg) {
+    return arg << 63;
+  }
+
+  /// CHECK-START-ARM: long Main.shr1(long) disassembly (after)
+  /// CHECK:                asrs{{(\.w)?}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK:                mov.w {{r\d+}}, {{r\d+}}, rrx
+
+  /// CHECK-START-ARM: long Main.shr1(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shr1(long arg) {
+    return arg >> 1;
+  }
+
+  /// CHECK-START-ARM: long Main.shr2(long) disassembly (after)
+  /// CHECK:                lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #2
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #30
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high>>, #2
+
+  /// CHECK-START-ARM: long Main.shr2(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shr2(long arg) {
+    return arg >> 2;
+  }
+
+  /// CHECK-START-ARM: long Main.shr31(long) disassembly (after)
+  /// CHECK:                lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #31
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1
+  /// CHECK:                asr{{s?|\.w}} {{r\d+}}, <<high>>, #31
+
+  /// CHECK-START-ARM: long Main.shr31(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shr31(long arg) {
+    return arg >> 31;
+  }
+
+  /// CHECK-START-ARM: long Main.shr32(long) disassembly (after)
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31
+  /// CHECK-DAG:            mov {{r\d+}}, <<high>>
+
+  /// CHECK-START-ARM: long Main.shr32(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsr{{s?|\.w}}
+
+  public static long shr32(long arg) {
+    return arg >> 32;
+  }
+
+  /// CHECK-START-ARM: long Main.shr33(long) disassembly (after)
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #1
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high>>, #31
+
+  /// CHECK-START-ARM: long Main.shr33(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shr33(long arg) {
+    return arg >> 33;
+  }
+
+  /// CHECK-START-ARM: long Main.shr63(long) disassembly (after)
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high>>, #31
+
+  /// CHECK-START-ARM: long Main.shr63(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shr63(long arg) {
+    return arg >> 63;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr1(long) disassembly (after)
+  /// CHECK:                lsrs{{|.w}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK:                mov.w {{r\d+}}, {{r\d+}}, rrx
+
+  /// CHECK-START-ARM: long Main.ushr1(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long ushr1(long arg) {
+    return arg >>> 1;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr2(long) disassembly (after)
+  /// CHECK:                lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #2
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #30
+  /// CHECK-DAG:            lsr{{s?|\.w}} {{r\d+}}, <<high>>, #2
+
+  /// CHECK-START-ARM: long Main.ushr2(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long ushr2(long arg) {
+    return arg >>> 2;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after)
+  /// CHECK:                lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #31
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1
+  /// CHECK:                lsr{{s?|\.w}} {{r\d+}}, <<high>>, #31
+
+  /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long ushr31(long arg) {
+    return arg >>> 31;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr32(long) disassembly (after)
+  /// CHECK-DAG:            mov {{r\d+}}, {{r\d+}}
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.ushr32(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|\.w}}
+
+  public static long ushr32(long arg) {
+    return arg >>> 32;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr33(long) disassembly (after)
+  /// CHECK-DAG:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.ushr33(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long ushr33(long arg) {
+    return arg >>> 33;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr63(long) disassembly (after)
+  /// CHECK-DAG:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, #31
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.ushr63(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long ushr63(long arg) {
+    return arg >>> 63;
+  }
+
+  /**
+   * ARM/ARM64: Test that the `-1` constant is not synthesized in a register and that we
+   * instead simply switch between `add` and `sub` instructions with the
+   * constant embedded.
+   * We need two uses (or more) of the constant because the compiler always
+   * defers to immediate value handling to VIXL when it has only one use.
+   */
+
+  /// CHECK-START-ARM64: long Main.addM1(long) register (after)
+  /// CHECK:     <<Arg:j\d+>>       ParameterValue
+  /// CHECK:     <<ConstM1:j\d+>>   LongConstant -1
+  /// CHECK-NOT:                    ParallelMove
+  /// CHECK:                        Add [<<Arg>>,<<ConstM1>>]
+  /// CHECK:                        Sub [<<Arg>>,<<ConstM1>>]
+
+  /// CHECK-START-ARM64: long Main.addM1(long) disassembly (after)
+  /// CHECK:                        sub x{{\d+}}, x{{\d+}}, #0x1
+  /// CHECK:                        add x{{\d+}}, x{{\d+}}, #0x1
+
+  /// CHECK-START-ARM: long Main.addM1(long) register (after)
+  /// CHECK:     <<Arg:j\d+>>       ParameterValue
+  /// CHECK:     <<ConstM1:j\d+>>   LongConstant -1
+  /// CHECK-NOT:                    ParallelMove
+  /// CHECK:                        Add [<<Arg>>,<<ConstM1>>]
+  /// CHECK:                        Sub [<<Arg>>,<<ConstM1>>]
+
+  /// CHECK-START-ARM: long Main.addM1(long) disassembly (after)
+  /// CHECK:     <<Arg:j\d+>>       ParameterValue
+  /// CHECK:     <<ConstM1:j\d+>>   LongConstant -1
+  /// CHECK:                        Add [<<Arg>>,<<ConstM1>>]
+  /// CHECK-NEXT:                   subs r{{\d+}}, #1
+  /// CHECK-NEXT:                   adc r{{\d+}}, r{{\d+}}, #-1
+  /// CHECK:                        Sub [<<Arg>>,<<ConstM1>>]
+  /// CHECK-NEXT:                   adds r{{\d+}}, #1
+  /// CHECK-NEXT:                   adc r{{\d+}}, r{{\d+}}, #0
+
+  public static long addM1(long arg) {
+    return (arg + (-1)) | (arg - (-1));
+  }
+
+  /**
+   * ARM: Test that some long constants are not synthesized in a register for add-long.
+   * Also test some negative cases where we do synthetize constants in registers.
+   */
+
+  /// CHECK-START-ARM: long Main.addLongConstants(long) disassembly (after)
+  /// CHECK:     <<Arg:j\d+>>       ParameterValue
+  /// CHECK-DAG: <<ConstA:j\d+>>    LongConstant 4486007727657233
+  /// CHECK-DAG: <<ConstB:j\d+>>    LongConstant 4486011735248896
+  /// CHECK-DAG: <<ConstC:j\d+>>    LongConstant -1071856711330889728
+  /// CHECK-DAG: <<ConstD:j\d+>>    LongConstant 17587891077120
+  /// CHECK-DAG: <<ConstE:j\d+>>    LongConstant -8808977924096
+  /// CHECK-DAG: <<ConstF:j\d+>>    LongConstant 17587891077121
+  /// CHECK-DAG: <<ConstG:j\d+>>    LongConstant 4095
+  /// CHECK:                        Add [<<Arg>>,<<ConstA>>]
+  /// CHECK-NEXT:                   adds r{{\d+}}, r{{\d+}}, #286331153
+  /// CHECK-NEXT:                   adc r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK:                        Add [<<Arg>>,<<ConstB>>]
+  /// CHECK-NEXT:                   subs r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK-NEXT:                   adc r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK:                        Add [<<Arg>>,<<ConstC>>]
+  /// CHECK-NEXT:                   subs r{{\d+}}, r{{\d+}}, #16711680
+  /// CHECK-NEXT:                   sbc r{{\d+}}, r{{\d+}}, #249561088
+  /// CHECK:                        Add [<<Arg>>,<<ConstD>>]
+  // There may or may not be a MOV here.
+  /// CHECK:                        addw r{{\d+}}, r{{\d+}}, #4095
+  /// CHECK:                        Add [<<Arg>>,<<ConstE>>]
+  // There may or may not be a MOV here.
+  /// CHECK:                        subw r{{\d+}}, r{{\d+}}, #2051
+  /// CHECK:                        Add [<<Arg>>,<<ConstF>>]
+  /// CHECK-NEXT:                   adds{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK-NEXT:                   adc{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK:                        Add [<<Arg>>,<<ConstG>>]
+  /// CHECK-NEXT:                   adds{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK-NEXT:                   adc{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+
+  public static long addLongConstants(long arg) {
+    return
+        // Modified immediates.
+        (arg + 0x000ff00011111111L) ^  // 4486007727657233
+        // Modified immediates high and -low.
+        (arg + 0x000ff000fff01000L) ^  // 4486011735248896
+        // Modified immediates ~high and -low.
+        (arg + 0xf11fffffff010000L) ^  // -1071856711330889728
+        // Low word 0 (no carry), high is imm12.
+        (arg + 0x00000fff00000000L) ^  // 17587891077120
+        // Low word 0 (no carry), -high is imm12.
+        (arg + 0xfffff7fd00000000L) ^  // -8808977924096
+        // Cannot embed imm12 in ADC/SBC for high word.
+        (arg + 0x00000fff00000001L) ^  // 17587891077121
+        // Cannot embed imm12 in ADDS/SUBS for low word (need to set flags).
+        (arg + 0x0000000000000fffL) ^  // 4095
+        arg;
+  }
+
+  /**
+   * ARM: Test that some long constants are not synthesized in a register for add-long.
+   * Also test some negative cases where we do synthetize constants in registers.
+   */
+
+  /// CHECK-START-ARM: long Main.subLongConstants(long) disassembly (after)
+  /// CHECK:     <<Arg:j\d+>>       ParameterValue
+  /// CHECK-DAG: <<ConstA:j\d+>>    LongConstant 4486007727657233
+  /// CHECK-DAG: <<ConstB:j\d+>>    LongConstant 4486011735248896
+  /// CHECK-DAG: <<ConstC:j\d+>>    LongConstant -1071856711330889728
+  /// CHECK-DAG: <<ConstD:j\d+>>    LongConstant 17587891077120
+  /// CHECK-DAG: <<ConstE:j\d+>>    LongConstant -8808977924096
+  /// CHECK-DAG: <<ConstF:j\d+>>    LongConstant 17587891077121
+  /// CHECK-DAG: <<ConstG:j\d+>>    LongConstant 4095
+  /// CHECK:                        Sub [<<Arg>>,<<ConstA>>]
+  /// CHECK-NEXT:                   subs r{{\d+}}, r{{\d+}}, #286331153
+  /// CHECK-NEXT:                   sbc r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK:                        Sub [<<Arg>>,<<ConstB>>]
+  /// CHECK-NEXT:                   adds r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK-NEXT:                   sbc r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK:                        Sub [<<Arg>>,<<ConstC>>]
+  /// CHECK-NEXT:                   adds r{{\d+}}, r{{\d+}}, #16711680
+  /// CHECK-NEXT:                   adc r{{\d+}}, r{{\d+}}, #249561088
+  /// CHECK:                        Sub [<<Arg>>,<<ConstD>>]
+  // There may or may not be a MOV here.
+  /// CHECK:                        subw r{{\d+}}, r{{\d+}}, #4095
+  /// CHECK:                        Sub [<<Arg>>,<<ConstE>>]
+  // There may or may not be a MOV here.
+  /// CHECK:                        addw r{{\d+}}, r{{\d+}}, #2051
+  /// CHECK:                        Sub [<<Arg>>,<<ConstF>>]
+  /// CHECK-NEXT:                   subs{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK-NEXT:                   sbc{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK:                        Sub [<<Arg>>,<<ConstG>>]
+  /// CHECK-NEXT:                   subs{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK-NEXT:                   sbc{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+
+  public static long subLongConstants(long arg) {
+    return
+        // Modified immediates.
+        (arg - 0x000ff00011111111L) ^  // 4486007727657233
+        // Modified immediates high and -low.
+        (arg - 0x000ff000fff01000L) ^  // 4486011735248896
+        // Modified immediates ~high and -low.
+        (arg - 0xf11fffffff010000L) ^  // -1071856711330889728
+        // Low word 0 (no carry), high is imm12.
+        (arg - 0x00000fff00000000L) ^  // 17587891077120
+        // Low word 0 (no carry), -high is imm12.
+        (arg - 0xfffff7fd00000000L) ^  // -8808977924096
+        // Cannot embed imm12 in ADC/SBC for high word.
+        (arg - 0x00000fff00000001L) ^  // 17587891077121
+        // Cannot embed imm12 in ADDS/SUBS for low word (need to set flags).
+        (arg - 0x0000000000000fffL) ^  // 4095
+        arg;
+  }
+
   public static void main(String[] args) {
     int arg = 0x87654321;
     assertIntEquals(and255(arg), 0x21);
@@ -286,5 +648,71 @@
     assertLongEquals(xorNot15(longArg), 0xedcba987789abcd1L);
     assertLongEquals(xor0xfffffff00000000f(longArg), 0xedcba9888765432eL);
     assertLongEquals(xor0xf00000000000000f(longArg), 0xe23456788765432eL);
+
+    assertLongEquals(14L, addM1(7));
+
+    assertLongEquals(shl1(longArg), 0x2468acf10eca8642L);
+    assertLongEquals(shl2(longArg), 0x48d159e21d950c84L);
+    assertLongEquals(shl31(longArg), 0x43b2a19080000000L);
+    assertLongEquals(shl32(longArg), 0x8765432100000000L);
+    assertLongEquals(shl33(longArg), 0x0eca864200000000L);
+    assertLongEquals(shl63(longArg), 0x8000000000000000L);
+    assertLongEquals(shl1(~longArg), 0xdb97530ef13579bcL);
+    assertLongEquals(shl2(~longArg), 0xb72ea61de26af378L);
+    assertLongEquals(shl31(~longArg), 0xbc4d5e6f00000000L);
+    assertLongEquals(shl32(~longArg), 0x789abcde00000000L);
+    assertLongEquals(shl33(~longArg), 0xf13579bc00000000L);
+    assertLongEquals(shl63(~longArg), 0x0000000000000000L);
+
+    assertLongEquals(shr1(longArg), 0x091a2b3c43b2a190L);
+    assertLongEquals(shr2(longArg), 0x048d159e21d950c8L);
+    assertLongEquals(shr31(longArg), 0x000000002468acf1L);
+    assertLongEquals(shr32(longArg), 0x0000000012345678L);
+    assertLongEquals(shr33(longArg), 0x00000000091a2b3cL);
+    assertLongEquals(shr63(longArg), 0x0000000000000000L);
+    assertLongEquals(shr1(~longArg), 0xf6e5d4c3bc4d5e6fL);
+    assertLongEquals(shr2(~longArg), 0xfb72ea61de26af37L);
+    assertLongEquals(shr31(~longArg), 0xffffffffdb97530eL);
+    assertLongEquals(shr32(~longArg), 0xffffffffedcba987L);
+    assertLongEquals(shr33(~longArg), 0xfffffffff6e5d4c3L);
+    assertLongEquals(shr63(~longArg), 0xffffffffffffffffL);
+
+    assertLongEquals(ushr1(longArg), 0x091a2b3c43b2a190L);
+    assertLongEquals(ushr2(longArg), 0x048d159e21d950c8L);
+    assertLongEquals(ushr31(longArg), 0x000000002468acf1L);
+    assertLongEquals(ushr32(longArg), 0x0000000012345678L);
+    assertLongEquals(ushr33(longArg), 0x00000000091a2b3cL);
+    assertLongEquals(ushr63(longArg), 0x0000000000000000L);
+    assertLongEquals(ushr1(~longArg), 0x76e5d4c3bc4d5e6fL);
+    assertLongEquals(ushr2(~longArg), 0x3b72ea61de26af37L);
+    assertLongEquals(ushr31(~longArg), 0x00000001db97530eL);
+    assertLongEquals(ushr32(~longArg), 0x00000000edcba987L);
+    assertLongEquals(ushr33(~longArg), 0x0000000076e5d4c3L);
+    assertLongEquals(ushr63(~longArg), 0x0000000000000001L);
+
+    // Test -1, 0, +1 and arbitrary constants just before and after overflow
+    // on low word in subexpressions of addLongConstants()/subLongConstants(),
+    // so that we check that we carry the overflow correctly to the high word.
+    // For example
+    //    0x111eeeeeeee+0x000ff00011111111 = 0x000ff111ffffffff (carry=0),
+    //    0x111eeeeeeef+0x000ff00011111111 = 0x000ff11200000000 (carry=1).
+    assertLongEquals(0xf11ff7fdee1e1111L, addLongConstants(0xffffffffffffffffL));
+    assertLongEquals(0xee0080211e00eefL, addLongConstants(0x0L));
+    assertLongEquals(0xee0080211e01111L, addLongConstants(0x1L));
+    assertLongEquals(0xedff81c12201113L, addLongConstants(0x111eeeeeeeeL));
+    assertLongEquals(0xedff81feddfeef1L, addLongConstants(0x111eeeeeeefL));
+    assertLongEquals(0xedff83e11c1f111L, addLongConstants(0x222000fefffL));
+    assertLongEquals(0xedff83fee3e0eefL, addLongConstants(0x222000ff000L));
+    assertLongEquals(0xedff805edfe1111L, addLongConstants(0x33300feffffL));
+    assertLongEquals(0xedff80412000eefL, addLongConstants(0x33300ff0000L));
+    assertLongEquals(0xee0080211e00eefL, subLongConstants(0xffffffffffffffffL));
+    assertLongEquals(0xf11ff7fdee1e1111L, subLongConstants(0x0L));
+    assertLongEquals(0xf11ff7fc11e1eef3L, subLongConstants(0x1L));
+    assertLongEquals(0xee0080412201113L, subLongConstants(0x44411111111L));
+    assertLongEquals(0xee0080412201111L, subLongConstants(0x44411111112L));
+    assertLongEquals(0xee0080e11c1f111L, subLongConstants(0x555fff01000L));
+    assertLongEquals(0xee0080e11c1eef3L, subLongConstants(0x555fff01001L));
+    assertLongEquals(0xee0080dedfe1111L, subLongConstants(0x666ff010000L));
+    assertLongEquals(0xee0080dedffeef3L, subLongConstants(0x666ff010001L));
   }
 }
diff --git a/test/539-checker-arm64-encodable-immediates/expected.txt b/test/539-checker-arm64-encodable-immediates/expected.txt
deleted file mode 100644
index e69de29..0000000
--- a/test/539-checker-arm64-encodable-immediates/expected.txt
+++ /dev/null
diff --git a/test/539-checker-arm64-encodable-immediates/info.txt b/test/539-checker-arm64-encodable-immediates/info.txt
deleted file mode 100644
index efeef33..0000000
--- a/test/539-checker-arm64-encodable-immediates/info.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Basic tests that check the compiler recognizes when constant values can be
-encoded in the immediate field of instructions.
diff --git a/test/539-checker-arm64-encodable-immediates/src/Main.java b/test/539-checker-arm64-encodable-immediates/src/Main.java
deleted file mode 100644
index 7e3ff9f..0000000
--- a/test/539-checker-arm64-encodable-immediates/src/Main.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-public class Main {
-
-  public static void assertLongEquals(long expected, long result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  /**
-   * Test that the `-1` constant is not synthesized in a register and that we
-   * instead simply switch between `add` and `sub` instructions with the
-   * constant embedded.
-   * We need two uses (or more) of the constant because the compiler always
-   * delegates the immediate value handling to VIXL when there is only one use.
-   */
-
-  /// CHECK-START-ARM64: long Main.addM1(long) register (after)
-  /// CHECK:     <<Arg:j\d+>>       ParameterValue
-  /// CHECK:     <<ConstM1:j\d+>>   LongConstant -1
-  /// CHECK-NOT:                    ParallelMove
-  /// CHECK:                        Add [<<Arg>>,<<ConstM1>>]
-  /// CHECK:                        Sub [<<Arg>>,<<ConstM1>>]
-
-  /// CHECK-START-ARM64: long Main.addM1(long) disassembly (after)
-  /// CHECK:                        sub x{{\d+}}, x{{\d+}}, #0x1
-  /// CHECK:                        add x{{\d+}}, x{{\d+}}, #0x1
-
-  public static long addM1(long arg) {
-    return (arg + (-1)) | (arg - (-1));
-  }
-
-  public static void main(String[] args) {
-    assertLongEquals(14, addM1(7));
-  }
-}
diff --git a/test/540-checker-rtp-bug/src/Main.java b/test/540-checker-rtp-bug/src/Main.java
index e9f16c0..19b7fb7 100644
--- a/test/540-checker-rtp-bug/src/Main.java
+++ b/test/540-checker-rtp-bug/src/Main.java
@@ -21,14 +21,14 @@
 }
 
 public class Main {
-  /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) reference_type_propagation (after)
+  /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) builder (after)
   /// CHECK:    <<Phi:l\d+>>     Phi klass:java.lang.Object
   /// CHECK:    <<Class:l\d+>>   LoadClass
   /// CHECK:                     CheckCast [<<Phi>>,<<Class>>]
   /// CHECK:    <<Ret:l\d+>>     BoundType [<<Phi>>] klass:Final
   /// CHECK:                     Return [<<Ret>>]
 
-  /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) instruction_simplifier_after_types (after)
+  /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) instruction_simplifier (after)
   /// CHECK:    <<Phi:l\d+>>     Phi
   /// CHECK:    <<Class:l\d+>>   LoadClass
   /// CHECK:                     CheckCast [<<Phi>>,<<Class>>]
@@ -43,12 +43,12 @@
     return (Final) x;
   }
 
-  /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) reference_type_propagation (after)
+  /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) builder (after)
   /// CHECK:    <<Phi:l\d+>>     Phi klass:java.lang.Object
   /// CHECK:    <<Class:l\d+>>   LoadClass
   /// CHECK:                     InstanceOf [<<Phi>>,<<Class>>]
 
-  /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) dead_code_elimination (after)
+  /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) dead_code_elimination$initial (after)
   /// CHECK:    <<Phi:l\d+>>     Phi
   /// CHECK:    <<Class:l\d+>>   LoadClass
   /// CHECK:                     InstanceOf [<<Phi>>,<<Class>>]
@@ -65,7 +65,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.String Main.testNoInline(java.lang.Object, boolean) reference_type_propagation (after)
+  /// CHECK-START: java.lang.String Main.testNoInline(java.lang.Object, boolean) builder (after)
   /// CHECK:    <<Phi:l\d+>>     Phi klass:java.lang.Object
   /// CHECK:    <<NC:l\d+>>      NullCheck [<<Phi>>]
   /// CHECK:    <<Ret:l\d+>>     InvokeVirtual [<<NC>>] method_name:java.lang.Object.toString
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/541-regression-inlined-deopt/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/541-regression-inlined-deopt/expected.txt
diff --git a/test/541-regression-inlined-deopt/info.txt b/test/541-regression-inlined-deopt/info.txt
new file mode 100644
index 0000000..209588f
--- /dev/null
+++ b/test/541-regression-inlined-deopt/info.txt
@@ -0,0 +1,4 @@
+Regression test for deopt from optimized code which would use the top-level
+stack map for deopting inlined frames. Test case is written in smali for full
+control over vregs because the previous test 449 would pass because the vreg
+maps at the various inlining depths were similar.
diff --git a/test/541-regression-inlined-deopt/smali/TestCase.smali b/test/541-regression-inlined-deopt/smali/TestCase.smali
new file mode 100644
index 0000000..a109775
--- /dev/null
+++ b/test/541-regression-inlined-deopt/smali/TestCase.smali
@@ -0,0 +1,55 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+.method private static $inline$depth1([I)V
+    .registers 3
+
+    # Expects array in v2.
+
+    const v0, 0x0
+
+    const v1, 0x3
+    aput v0, p0, v1
+
+    const v1, 0x4
+    aput v0, p0, v1
+
+    return-void
+.end method
+
+.method private static $inline$depth0([I)V
+    .registers 1
+
+    # Expects array in v0.
+
+    invoke-static {p0}, LTestCase;->$inline$depth1([I)V
+    return-void
+.end method
+
+.method public static foo()V
+    .registers 10
+
+    # Create a new array short enough to throw AIOOB in $inline$depth1.
+    # Make sure the reference is not stored in the same vreg as used by
+    # the inlined methods.
+
+    const v5, 0x3
+    new-array v6, v5, [I
+
+    invoke-static {v6}, LTestCase;->$inline$depth0([I)V
+    return-void
+.end method
diff --git a/test/541-regression-inlined-deopt/src/Main.java b/test/541-regression-inlined-deopt/src/Main.java
new file mode 100644
index 0000000..fa79590
--- /dev/null
+++ b/test/541-regression-inlined-deopt/src/Main.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.*;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Throwable {
+    try {
+      Class<?> c = Class.forName("TestCase");
+      Method m = c.getMethod("foo");
+      m.invoke(null, (Object[]) null);
+    } catch (InvocationTargetException ex) {
+      // Code should have thrown AIOOB.
+      if (!(ex.getCause() instanceof ArrayIndexOutOfBoundsException)) {
+        throw ex;
+      }
+    }
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/542-bitfield-rotates/expected.txt
similarity index 100%
rename from test/530-checker-loops/expected.txt
rename to test/542-bitfield-rotates/expected.txt
diff --git a/test/542-bitfield-rotates/info.txt b/test/542-bitfield-rotates/info.txt
new file mode 100644
index 0000000..961be3b
--- /dev/null
+++ b/test/542-bitfield-rotates/info.txt
@@ -0,0 +1 @@
+Tests bitfield rotate simplification in optimizing compiler.
diff --git a/test/542-bitfield-rotates/src/Main.java b/test/542-bitfield-rotates/src/Main.java
new file mode 100644
index 0000000..f2bc153
--- /dev/null
+++ b/test/542-bitfield-rotates/src/Main.java
@@ -0,0 +1,423 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Expected: " + expected + ", found: " + actual);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long actual) {
+    if (expected != actual) {
+      throw new Error("Expected: " + expected + ", found: " + actual);
+    }
+  }
+
+  public static void main(String args[]) throws Exception {
+    test_Integer_right_v_csubv();
+    test_Long_right_v_csubv();
+
+    test_Integer_right_constant_v();
+    test_Long_right_constant_v();
+
+    test_Integer_left_csubv_v();
+    test_Long_left_csubv_v();
+
+    test_Integer_right_v_negv();
+    test_Long_right_v_negv();
+
+    test_Integer_left_negv_v();
+    test_Long_left_negv_v();
+
+    test_Integer_left_constant_v();
+    test_Long_left_constant_v();
+  }
+
+  public static boolean doThrow = false;
+
+  public static int $noinline$rotate_int_right_reg_v_csubv(int value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> distance) | (value << (32 - distance));
+  }
+
+  public static void test_Integer_right_v_csubv() throws Exception {
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, 0), 0x11);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, 1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, Integer.SIZE - 1), 0x22);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, Integer.SIZE + 1), 0x80000008);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, -1), 0x22);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, -(Integer.SIZE - 1)), 0x80000008);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, -Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, -(Integer.SIZE + 1)), 0x22);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x80000000, 1), 0x40000000);
+  }
+
+  public static long $noinline$rotate_long_right_reg_v_csubv(long value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> distance) | (value << (64 - distance));
+  }
+
+  public static void test_Long_right_v_csubv() throws Exception {
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, 0), 0x11);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, 1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, Long.SIZE - 1), 0x22);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, Long.SIZE + 1), 0x8000000000000008L);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, -1), 0x22);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, -(Long.SIZE - 1)), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, -Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, -(Long.SIZE + 1)), 0x22);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x8000000000000000L, 1), 0x4000000000000000L);
+  }
+
+  public static int $noinline$rotate_int_left_reg_csubv_v(int value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> (32 - distance)) | (value << distance);
+  }
+
+  public static void test_Integer_left_csubv_v() throws Exception {
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, 0), 0x11);
+
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, 1), 0x22);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, Integer.SIZE - 1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, Integer.SIZE + 1), 0x22);
+
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, -1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, -(Integer.SIZE - 1)), 0x22);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, -Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, -(Integer.SIZE + 1)), 0x80000008);
+
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0xC0000000, 1), 0x80000001);
+  }
+
+  public static long $noinline$rotate_long_left_reg_csubv_v(long value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> (64 - distance)) | (value << distance);
+  }
+
+  public static void test_Long_left_csubv_v() throws Exception {
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, 0), 0x11);
+
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, 1), 0x22);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, Long.SIZE - 1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, Long.SIZE + 1), 0x22);
+
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, -1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, -(Long.SIZE - 1)), 0x22);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, -Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, -(Long.SIZE + 1)), 0x8000000000000008L);
+
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0xC000000000000000L, 1), 0x8000000000000001L);
+  }
+
+  public static int $noinline$rotate_int_right_reg_v_negv(int value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> distance) | (value << -distance);
+  }
+
+  public static void test_Integer_right_v_negv() throws Exception {
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, 0), 0x11);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, 1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, Integer.SIZE - 1), 0x22);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, Integer.SIZE + 1), 0x80000008);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, -1), 0x22);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, -(Integer.SIZE - 1)), 0x80000008);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, -Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, -(Integer.SIZE + 1)), 0x22);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x80000000, 1), 0x40000000);
+  }
+
+  public static long $noinline$rotate_long_right_reg_v_negv(long value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> distance) | (value << -distance);
+  }
+
+  public static void test_Long_right_v_negv() throws Exception {
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, 0), 0x11);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, 1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, Long.SIZE - 1), 0x22);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, Long.SIZE + 1), 0x8000000000000008L);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, -1), 0x22);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, -(Long.SIZE - 1)), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, -Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, -(Long.SIZE + 1)), 0x22);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x8000000000000000L, 1), 0x4000000000000000L);
+  }
+
+  public static int $noinline$rotate_int_left_reg_negv_v(int value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> -distance) | (value << distance);
+  }
+
+  public static void test_Integer_left_negv_v() throws Exception {
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, 0), 0x11);
+
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, 1), 0x22);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, Integer.SIZE - 1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, Integer.SIZE + 1), 0x22);
+
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, -1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, -(Integer.SIZE - 1)), 0x22);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, -Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, -(Integer.SIZE + 1)), 0x80000008);
+
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0xC0000000, 1), 0x80000001);
+  }
+
+  public static long $noinline$rotate_long_left_reg_negv_v(long value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> -distance) | (value << distance);
+  }
+
+  public static void test_Long_left_negv_v() throws Exception {
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, 0), 0x11);
+
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, 1), 0x22);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, Long.SIZE - 1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, Long.SIZE + 1), 0x22);
+
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, -1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, -(Long.SIZE - 1)), 0x22);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, -Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, -(Long.SIZE + 1)), 0x8000000000000008L);
+
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0xC000000000000000L, 1), 0x8000000000000001L);
+  }
+
+  public static int $noinline$rotate_int_right_constant_0(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 0) | (value << 0);
+  }
+
+  public static int $noinline$rotate_int_right_constant_1(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 1) | (value << -1);
+  }
+
+  public static int $noinline$rotate_int_right_constant_m1(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> -1) | (value << 1);
+  }
+
+  public static int $noinline$rotate_int_right_constant_16(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 16) | (value << -16);
+  }
+
+  public static void test_Integer_right_constant_v() throws Exception {
+    assertIntEquals($noinline$rotate_int_right_constant_0(0x11), 0x11);
+    assertIntEquals($noinline$rotate_int_right_constant_1(0x11), 0x80000008);
+    assertIntEquals($noinline$rotate_int_right_constant_m1(0x11), 0x22);
+    assertIntEquals($noinline$rotate_int_right_constant_16(0x11), 0x110000);
+  }
+
+  public static long $noinline$rotate_long_right_constant_0(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 0) | (value << 0);
+  }
+
+  public static long $noinline$rotate_long_right_constant_1(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 1) | (value << -1);
+  }
+
+  public static long $noinline$rotate_long_right_constant_m1(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> -1) | (value << 1);
+  }
+
+  public static long $noinline$rotate_long_right_constant_16(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 16) | (value << -16);
+  }
+
+  public static long $noinline$rotate_long_right_constant_32(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 32) | (value << -32);
+  }
+
+  public static long $noinline$rotate_long_right_constant_48(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 48) | (value << -48);
+  }
+
+  public static long $noinline$rotate_long_right_constant_64(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 64) | (value << -64);
+  }
+
+  public static void test_Long_right_constant_v() throws Exception {
+    assertLongEquals($noinline$rotate_long_right_constant_0(0x11), 0x11);
+    assertLongEquals($noinline$rotate_long_right_constant_1(0x11), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_right_constant_m1(0x11), 0x22);
+    assertLongEquals($noinline$rotate_long_right_constant_16(0x11), 0x11000000000000L);
+    assertLongEquals($noinline$rotate_long_right_constant_32(0x11), 0x1100000000L);
+    assertLongEquals($noinline$rotate_long_right_constant_48(0x11), 0x110000L);
+  }
+
+  public static int $noinline$rotate_int_left_constant_0(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 0) | (value >>> 0);
+  }
+
+  public static int $noinline$rotate_int_left_constant_1(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 1) | (value >>> -1);
+  }
+
+  public static int $noinline$rotate_int_left_constant_m1(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << -1) | (value >>> 1);
+  }
+
+  public static int $noinline$rotate_int_left_constant_16(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 16) | (value >>> -16);
+  }
+
+  public static void test_Integer_left_constant_v() throws Exception {
+    assertIntEquals($noinline$rotate_int_left_constant_0(0x11), 0x11);
+    assertIntEquals($noinline$rotate_int_left_constant_1(0x11), 0x22);
+    assertIntEquals($noinline$rotate_int_left_constant_m1(0x11), 0x80000008);
+    assertIntEquals($noinline$rotate_int_left_constant_16(0x11), 0x110000);
+  }
+
+  public static long $noinline$rotate_long_left_constant_0(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 0) | (value >>> 0);
+  }
+
+  public static long $noinline$rotate_long_left_constant_1(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 1) | (value >>> -1);
+  }
+
+  public static long $noinline$rotate_long_left_constant_m1(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << -1) | (value >>> 1);
+  }
+
+  public static long $noinline$rotate_long_left_constant_16(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 16) | (value >>> -16);
+  }
+
+  public static long $noinline$rotate_long_left_constant_32(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 32) | (value >>> -32);
+  }
+
+  public static long $noinline$rotate_long_left_constant_48(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 48) | (value >>> -48);
+  }
+
+  public static long $noinline$rotate_long_left_constant_64(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 64) | (value >>> -64);
+  }
+
+  public static void test_Long_left_constant_v() throws Exception {
+    assertLongEquals($noinline$rotate_long_left_constant_0(0x11), 0x11);
+    assertLongEquals($noinline$rotate_long_left_constant_1(0x11), 0x22);
+    assertLongEquals($noinline$rotate_long_left_constant_m1(0x11), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_left_constant_16(0x11), 0x110000L);
+    assertLongEquals($noinline$rotate_long_left_constant_32(0x11), 0x1100000000L);
+    assertLongEquals($noinline$rotate_long_left_constant_48(0x11), 0x11000000000000L);
+  }
+
+}
diff --git a/test/530-checker-loops/expected.txt b/test/542-inline-trycatch/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/542-inline-trycatch/expected.txt
diff --git a/test/542-inline-trycatch/info.txt b/test/542-inline-trycatch/info.txt
new file mode 100644
index 0000000..b3e50d3
--- /dev/null
+++ b/test/542-inline-trycatch/info.txt
@@ -0,0 +1 @@
+Tests inlining in the optimizing compiler under try/catch.
\ No newline at end of file
diff --git a/test/542-inline-trycatch/src/Main.java b/test/542-inline-trycatch/src/Main.java
new file mode 100644
index 0000000..5a6e06f
--- /dev/null
+++ b/test/542-inline-trycatch/src/Main.java
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // The following tests make sure that we inline methods used inside try and catch
+  // blocks, provided they meet other inlining criteria. To do that, we rely on
+  // the compiler recognizing and enforcing the $inline$ and $noinline$ markers.
+
+  // We expect a single block to always be inlined.
+
+  private static int $inline$SingleBlock(String str) throws NumberFormatException {
+    return Integer.parseInt(str);
+  }
+
+  // We expect a "simple" method with multiple blocks to always be inlined.
+
+  private static int $inline$MultipleBlocks(String str, boolean is_hex)
+      throws NumberFormatException {
+    return is_hex ? Integer.parseInt(str, 16) : Integer.parseInt(str);
+  }
+
+  // We expect methods with try/catch to not be inlined. Inlined try/catch
+  // blocks are not supported at the moment.
+
+  private static int $noinline$TryCatch(String str) {
+    try {
+      return Integer.parseInt(str);
+    } catch (NumberFormatException ex) {
+      return -1;
+    }
+  }
+
+  public static void testSingleBlockFromTry() {
+    int val = 0;
+
+    try {
+      val = $inline$SingleBlock("42");
+    } catch (NumberFormatException ex) {
+      unreachable();
+    }
+    assertEquals(42, val);
+
+    try {
+      $inline$SingleBlock("xyz");
+      unreachable();
+    } catch (NumberFormatException ex) {}
+  }
+
+  public static void testSingleBlockFromCatch() {
+    int val = 0;
+
+    try {
+      throwException();
+    } catch (Exception ex) {
+      val = $inline$SingleBlock("42");
+    }
+    assertEquals(42, val);
+  }
+
+  public static void testMultipleBlocksFromTry() {
+    int val = 0;
+
+    try {
+      val = $inline$MultipleBlocks("42", false);
+    } catch (NumberFormatException ex) {
+      unreachable();
+    }
+    assertEquals(42, val);
+
+    try {
+      val = $inline$MultipleBlocks("20", true);
+    } catch (NumberFormatException ex) {
+      unreachable();
+    }
+    assertEquals(32, val);
+
+    try {
+      $inline$MultipleBlocks("xyz", false);
+      unreachable();
+    } catch (NumberFormatException ex) {}
+
+    try {
+      $inline$MultipleBlocks("xyz", true);
+      unreachable();
+    } catch (NumberFormatException ex) {}
+  }
+
+  public static void testMultipleBlocksFromCatch() {
+    int val = 0;
+
+    try {
+      throwException();
+    } catch (Exception ex) {
+      val = $inline$MultipleBlocks("42", false);
+    }
+    assertEquals(42, val);
+
+    try {
+      throwException();
+    } catch (Exception ex) {
+      val = $inline$MultipleBlocks("20", true);
+    }
+    assertEquals(32, val);
+  }
+
+  public static void testTryCatchFromTry() {
+    int val = 0;
+
+    try {
+      val = $noinline$TryCatch("42");
+    } catch (NumberFormatException ex) {
+      unreachable();
+    }
+    assertEquals(42, val);
+
+    try {
+      val = $noinline$TryCatch("xyz");
+    } catch (NumberFormatException ex) {
+      unreachable();
+    }
+    assertEquals(-1, val);
+  }
+
+  public static void testTryCatchFromCatch() {
+    int val = 0;
+
+    try {
+      throwException();
+    } catch (Exception ex) {
+      val = $noinline$TryCatch("42");
+    }
+    assertEquals(42, val);
+
+    try {
+      throwException();
+    } catch (Exception ex) {
+      val = $noinline$TryCatch("xyz");
+    }
+    assertEquals(-1, val);
+  }
+
+  public static void main(String[] args) {
+    testSingleBlockFromTry();
+    testSingleBlockFromCatch();
+    testMultipleBlocksFromTry();
+    testMultipleBlocksFromCatch();
+    testTryCatchFromTry();
+    testTryCatchFromCatch();
+  }
+
+  private static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new AssertionError("Wrong result: " + expected + " != " + actual);
+    }
+  }
+
+  private static void unreachable() {
+    throw new Error("Unreachable");
+  }
+
+  private static void throwException() throws Exception {
+    throw new Exception();
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/542-unresolved-access-check/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/542-unresolved-access-check/expected.txt
diff --git a/test/542-unresolved-access-check/info.txt b/test/542-unresolved-access-check/info.txt
new file mode 100644
index 0000000..30d45b8
--- /dev/null
+++ b/test/542-unresolved-access-check/info.txt
@@ -0,0 +1 @@
+Test unresolved/access checks entry points with the JIT.
diff --git a/test/542-unresolved-access-check/src/Main.java b/test/542-unresolved-access-check/src/Main.java
new file mode 100644
index 0000000..62bfea1
--- /dev/null
+++ b/test/542-unresolved-access-check/src/Main.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.List;
+import p1.InP1;
+import p1.PlaceHolder;
+
+
+// Custom class loader to prevent loading while verifying.
+class MyClassLoader extends ClassLoader {
+  MyClassLoader() throws Exception {
+    super(MyClassLoader.class.getClassLoader());
+
+    // Some magic to get access to the pathList field of BaseDexClassLoader.
+    ClassLoader loader = getClass().getClassLoader();
+    Class<?> baseDexClassLoader = loader.getClass().getSuperclass();
+    Field f = baseDexClassLoader.getDeclaredField("pathList");
+    f.setAccessible(true);
+    Object pathList = f.get(loader);
+
+    // Some magic to get access to the dexField field of pathList.
+    f = pathList.getClass().getDeclaredField("dexElements");
+    f.setAccessible(true);
+    dexElements = (Object[]) f.get(pathList);
+    dexFileField = dexElements[0].getClass().getDeclaredField("dexFile");
+    dexFileField.setAccessible(true);
+  }
+
+  Object[] dexElements;
+  Field dexFileField;
+
+  protected Class<?> loadClass(String className, boolean resolve) throws ClassNotFoundException {
+    if (className.equals("p1.OtherInP1") && !p1.PlaceHolder.entered) {
+      // The request comes from the verifier. Return null to get the access check entry
+      // point in the compiled code.
+      return null;
+    }
+    // Mimic what DexPathList.findClass is doing.
+    try {
+      for (Object element : dexElements) {
+        Object dex = dexFileField.get(element);
+        Method method = dex.getClass().getDeclaredMethod(
+            "loadClassBinaryName", String.class, ClassLoader.class, List.class);
+
+        if (dex != null) {
+          Class<?> clazz = (Class<?>)method.invoke(dex, className, this, null);
+          if (clazz != null) {
+            return clazz;
+          }
+        }
+      }
+    } catch (Exception e) { /* Ignore */ }
+    return getParent().loadClass(className);
+  }
+}
+
+public class Main {
+    public static void main(String[] args) throws Exception {
+      MyClassLoader o = new MyClassLoader();
+      Class<?> foo = o.loadClass("LoadedByMyClassLoader");
+      Method m = foo.getDeclaredMethod("main");
+      m.invoke(null);
+    }
+}
+
+class LoadedByMyClassLoader {
+    public static void main() throws Exception {
+      for (int i = 0; i < 10000; ++i) {
+        // Warm up the JIT.
+        doTheCall(i);
+      }
+      // Sleep a while to let the JIT compile things.
+      // TODO(ngeoffray): Remove the sleep. b/25414532
+      Thread.sleep(2000);
+      doTheCall(10001);
+    }
+
+    public static void doTheCall(int i) {
+      InP1.$inline$AllocateOtherInP1(i);
+      InP1.$inline$AllocateArrayOtherInP1(i);
+      InP1.$inline$UseStaticFieldOtherInP1(i);
+      InP1.$inline$SetStaticFieldOtherInP1(i);
+      InP1.$inline$UseInstanceFieldOtherInP1(i);
+      InP1.$inline$SetInstanceFieldOtherInP1(i);
+      InP1.$inline$LoadOtherInP1(i);
+      InP1.$inline$StaticCallOtherInP1(i);
+      InP1.$inline$InstanceCallOtherInP1(i);
+    }
+}
diff --git a/test/542-unresolved-access-check/src/p1/InP1.java b/test/542-unresolved-access-check/src/p1/InP1.java
new file mode 100644
index 0000000..3516c72
--- /dev/null
+++ b/test/542-unresolved-access-check/src/p1/InP1.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package p1;
+
+public class InP1 {
+    public static Object $inline$AllocateOtherInP1(int i) {
+      // Let this method execute a while to make sure the JIT sees it hot.
+      if (i <= 10000) {
+        return null;
+      }
+      // Set the flag that we have entered InP1 code to get OtherInP1 loaded.
+      PlaceHolder.entered = true;
+      return new OtherInP1();
+    }
+
+    public static Object $inline$AllocateArrayOtherInP1(int i) {
+      if (i <= 10000) {
+        return null;
+      }
+      return new OtherInP1[10];
+    }
+
+    public static Object $inline$UseStaticFieldOtherInP1(int i) {
+      if (i <= 10000) {
+        return null;
+      }
+      return OtherInP1.staticField;
+    }
+
+    public static void $inline$SetStaticFieldOtherInP1(int i) {
+      if (i <= 10000) {
+        return;
+      }
+      OtherInP1.staticField = new Object();
+    }
+
+    public static Object $inline$UseInstanceFieldOtherInP1(int i) {
+      if (i <= 10000) {
+        return null;
+      }
+      return $noinline$AllocateOtherInP1().instanceField;
+    }
+
+    public static void $inline$SetInstanceFieldOtherInP1(int i) {
+      if (i <= 10000) {
+        return;
+      }
+      $noinline$AllocateOtherInP1().instanceField = new Object();
+    }
+
+    public static OtherInP1 $noinline$AllocateOtherInP1() {
+      try {
+        return new OtherInP1();
+      } catch (Exception e) {
+        throw new Error(e);
+      }
+    }
+
+    public static Object $inline$LoadOtherInP1(int i) {
+      if (i <= 10000) {
+        return null;
+      }
+      return OtherInP1.class;
+    }
+
+    public static Object $inline$StaticCallOtherInP1(int i) {
+      if (i <= 10000) {
+        return null;
+      }
+      return OtherInP1.doTheStaticCall();
+    }
+
+    public static Object $inline$InstanceCallOtherInP1(int i) {
+      if (i <= 10000) {
+        return null;
+      }
+      return $noinline$AllocateOtherInP1().doTheInstanceCall();
+    }
+}
diff --git a/test/542-unresolved-access-check/src/p1/OtherInP1.java b/test/542-unresolved-access-check/src/p1/OtherInP1.java
new file mode 100644
index 0000000..adc1ce1
--- /dev/null
+++ b/test/542-unresolved-access-check/src/p1/OtherInP1.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package p1;
+
+class OtherInP1 {
+  OtherInP1() {
+  }
+  static Object staticField = new Object();
+  Object instanceField = new Object();
+
+  static Object doTheStaticCall() {
+    return null;
+  }
+
+  Object doTheInstanceCall() {
+    return null;
+  }
+}
diff --git a/test/542-unresolved-access-check/src/p1/PlaceHolder.java b/test/542-unresolved-access-check/src/p1/PlaceHolder.java
new file mode 100644
index 0000000..2bf4bdf
--- /dev/null
+++ b/test/542-unresolved-access-check/src/p1/PlaceHolder.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package p1;
+
+// Specific class for putting the 'entered' marker. If we were to put the marker
+// in InP1 or in OtherInP1, the code in MyClassLoader using that marker would load
+// InP1 or OtherInP1 in the system class loader, and not in MyClassLoader.
+public class PlaceHolder {
+  public static boolean entered = false;
+}
diff --git a/test/530-checker-loops/expected.txt b/test/543-checker-dce-trycatch/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/543-checker-dce-trycatch/expected.txt
diff --git a/test/543-checker-dce-trycatch/info.txt b/test/543-checker-dce-trycatch/info.txt
new file mode 100644
index 0000000..e541938
--- /dev/null
+++ b/test/543-checker-dce-trycatch/info.txt
@@ -0,0 +1 @@
+Tests removal of try/catch blocks by DCE.
\ No newline at end of file
diff --git a/test/543-checker-dce-trycatch/smali/TestCase.smali b/test/543-checker-dce-trycatch/smali/TestCase.smali
new file mode 100644
index 0000000..5557c7b
--- /dev/null
+++ b/test/543-checker-dce-trycatch/smali/TestCase.smali
@@ -0,0 +1,333 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+.field public static sField:I
+
+.method private static $inline$False()Z
+    .registers 1
+    const/4 v0, 0x0
+    return v0
+.end method
+
+# Test a case when one entering TryBoundary is dead but the rest of the try
+# block remains live.
+
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (before)
+## CHECK: Add
+
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (before)
+## CHECK:     TryBoundary kind:entry
+## CHECK:     TryBoundary kind:entry
+## CHECK-NOT: TryBoundary kind:entry
+
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-NOT: Add
+
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (after)
+## CHECK:     TryBoundary kind:entry
+## CHECK-NOT: TryBoundary kind:entry
+
+.method public static testDeadEntry(IIII)I
+    .registers 5
+
+    invoke-static {}, LTestCase;->$inline$False()Z
+    move-result v0
+
+    if-eqz v0, :else
+
+    add-int/2addr p0, p1
+
+    :try_start
+    div-int/2addr p0, p2
+
+    :else
+    div-int/2addr p0, p3
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    :return
+    return p0
+
+    :catch_all
+    const/4 p0, -0x1
+    goto :return
+
+.end method
+
+# Test a case when one exiting TryBoundary is dead but the rest of the try
+# block remains live.
+
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (before)
+## CHECK: Add
+
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (before)
+## CHECK:     TryBoundary kind:exit
+## CHECK:     TryBoundary kind:exit
+## CHECK-NOT: TryBoundary kind:exit
+
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-NOT: Add
+
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (after)
+## CHECK:     TryBoundary kind:exit
+## CHECK-NOT: TryBoundary kind:exit
+
+.method public static testDeadExit(IIII)I
+    .registers 5
+
+    invoke-static {}, LTestCase;->$inline$False()Z
+    move-result v0
+
+    :try_start
+    div-int/2addr p0, p2
+
+    if-nez v0, :else
+
+    div-int/2addr p0, p3
+    goto :return
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    :else
+    add-int/2addr p0, p1
+
+    :return
+    return p0
+
+    :catch_all
+    const/4 p0, -0x1
+    goto :return
+
+.end method
+
+# Test that a catch block remains live and consistent if some of try blocks
+# throwing into it are removed.
+
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (before)
+## CHECK:     TryBoundary kind:entry
+## CHECK:     TryBoundary kind:entry
+## CHECK-NOT: TryBoundary kind:entry
+
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (before)
+## CHECK:     TryBoundary kind:exit
+## CHECK:     TryBoundary kind:exit
+## CHECK-NOT: TryBoundary kind:exit
+
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (after)
+## CHECK:     TryBoundary kind:entry
+## CHECK-NOT: TryBoundary kind:entry
+
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (after)
+## CHECK:     TryBoundary kind:exit
+## CHECK-NOT: TryBoundary kind:exit
+
+.method public static testOneTryBlockDead(IIII)I
+    .registers 5
+
+    invoke-static {}, LTestCase;->$inline$False()Z
+    move-result v0
+
+    :try_start_1
+    div-int/2addr p0, p2
+    :try_end_1
+    .catchall {:try_start_1 .. :try_end_1} :catch_all
+
+    if-eqz v0, :return
+
+    :try_start_2
+    div-int/2addr p0, p3
+    :try_end_2
+    .catchall {:try_start_2 .. :try_end_2} :catch_all
+
+    :return
+    return p0
+
+    :catch_all
+    const/4 p0, -0x1
+    goto :return
+
+.end method
+
+# Test that try block membership is recomputed. In this test case, the try entry
+# stored with the merge block gets deleted and SSAChecker would fail if it was
+# not replaced with the try entry from the live branch.
+
+.method public static testRecomputeTryMembership(IIII)I
+    .registers 5
+
+    invoke-static {}, LTestCase;->$inline$False()Z
+    move-result v0
+
+    if-eqz v0, :else
+
+    # Dead branch
+    :try_start
+    div-int/2addr p0, p1
+    goto :merge
+
+    # Live branch
+    :else
+    div-int/2addr p0, p2
+
+    # Merge block. Make complex so it does not get merged with the live branch.
+    :merge
+    div-int/2addr p0, p3
+    if-eqz p0, :else2
+    div-int/2addr p0, p3
+    :else2
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    :return
+    return p0
+
+    :catch_all
+    const/4 p0, -0x1
+    goto :return
+
+.end method
+
+# Test that DCE removes catch phi uses of instructions defined in dead try blocks.
+
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-DAG:     <<Arg0:i\d+>>      ParameterValue
+## CHECK-DAG:     <<Arg1:i\d+>>      ParameterValue
+## CHECK-DAG:     <<Const0xa:i\d+>>  IntConstant 10
+## CHECK-DAG:     <<Const0xb:i\d+>>  IntConstant 11
+## CHECK-DAG:     <<Const0xc:i\d+>>  IntConstant 12
+## CHECK-DAG:     <<Const0xd:i\d+>>  IntConstant 13
+## CHECK-DAG:     <<Const0xe:i\d+>>  IntConstant 14
+## CHECK-DAG:     <<Const0xf:i\d+>>  IntConstant 15
+## CHECK-DAG:     <<Const0x10:i\d+>> IntConstant 16
+## CHECK-DAG:     <<Const0x11:i\d+>> IntConstant 17
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<Arg0>>,<<Arg1>>]
+## CHECK-DAG:     <<Select:i\d+>>    Select [<<Const0xf>>,<<Add>>,{{z\d+}}]
+## CHECK-DAG:                        Phi [<<Const0xa>>,<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
+## CHECK-DAG:                        Phi [<<Add>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
+## CHECK-DAG:                        Phi [<<Select>>,<<Const0x10>>,<<Const0x11>>] reg:3 is_catch_phi:true
+
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-DAG:     <<Const0xb:i\d+>>  IntConstant 11
+## CHECK-DAG:     <<Const0xc:i\d+>>  IntConstant 12
+## CHECK-DAG:     <<Const0xd:i\d+>>  IntConstant 13
+## CHECK-DAG:     <<Const0xe:i\d+>>  IntConstant 14
+## CHECK-DAG:     <<Const0x10:i\d+>> IntConstant 16
+## CHECK-DAG:     <<Const0x11:i\d+>> IntConstant 17
+## CHECK-DAG:                        Phi [<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
+## CHECK-DAG:                        Phi [<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
+## CHECK-DAG:                        Phi [<<Const0x10>>,<<Const0x11>>] reg:3 is_catch_phi:true
+
+.method public static testCatchPhiInputs_DefinedInTryBlock(IIII)I
+    .registers 8
+
+    invoke-static {}, LTestCase;->$inline$False()Z
+    move-result v0
+
+    if-eqz v0, :else
+
+    shr-int/2addr p2, p3
+
+    :try_start
+    const v1, 0xa                  # dead catch phi input, defined in entry block (HInstruction)
+    add-int v2, p0, p1             # dead catch phi input, defined in the dead block (HInstruction)
+    move v3, v2
+    if-eqz v3, :define_phi
+    const v3, 0xf
+    :define_phi
+    # v3 = Phi [Add, 0xf]          # dead catch phi input, defined in the dead block (HPhi)
+    div-int/2addr p0, v2
+
+    :else
+    const v1, 0xb                  # live catch phi input
+    const v2, 0xc                  # live catch phi input
+    const v3, 0x10                 # live catch phi input
+    div-int/2addr p0, p3
+
+    const v1, 0xd                  # live catch phi input
+    const v2, 0xe                  # live catch phi input
+    const v3, 0x11                 # live catch phi input
+    div-int/2addr p0, p1
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    :return
+    return p0
+
+    :catch_all
+    sub-int p0, v1, v2      # use catch phi values
+    sub-int p0, p0, v3      # use catch phi values
+    goto :return
+
+.end method
+
+# Test that DCE does not remove catch phi uses of instructions defined outside
+# dead try blocks.
+
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
+## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
+## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
+## CHECK-DAG:     <<Const0xd:i\d+>> IntConstant 13
+## CHECK-DAG:     <<Const0xe:i\d+>> IntConstant 14
+## CHECK-DAG:     <<Const0xf:i\d+>> IntConstant 15
+## CHECK-DAG:                       Phi [<<Const0xa>>,<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
+## CHECK-DAG:                       Phi [<<Const0xf>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
+
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
+## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
+## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
+## CHECK-DAG:     <<Const0xd:i\d+>> IntConstant 13
+## CHECK-DAG:     <<Const0xe:i\d+>> IntConstant 14
+## CHECK-DAG:     <<Const0xf:i\d+>> IntConstant 15
+## CHECK-DAG:                       Phi [<<Const0xa>>,<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
+## CHECK-DAG:                       Phi [<<Const0xf>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
+
+.method public static testCatchPhiInputs_DefinedOutsideTryBlock(IIII)I
+    .registers 7
+
+    invoke-static {}, LTestCase;->$inline$False()Z
+    move-result v0
+
+    if-eqz v0, :else
+
+    shr-int/2addr p2, p3
+
+    :try_start
+    const v1, 0xa           # dead catch phi input, defined in entry block
+    const v2, 0xf           # dead catch phi input, defined in entry block
+    div-int/2addr p0, v2
+
+    :else
+    const v1, 0xb           # live catch phi input
+    const v2, 0xc           # live catch phi input
+    div-int/2addr p0, p3
+
+    const v1, 0xd           # live catch phi input
+    const v2, 0xe           # live catch phi input
+    div-int/2addr p0, p1
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    :return
+    return p0
+
+    :catch_all
+    sub-int p0, v1, v2      # use catch phi values
+    goto :return
+
+.end method
diff --git a/test/543-checker-dce-trycatch/src/Main.java b/test/543-checker-dce-trycatch/src/Main.java
new file mode 100644
index 0000000..19587e7
--- /dev/null
+++ b/test/543-checker-dce-trycatch/src/Main.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  static boolean $inline$False() { return false; }
+
+  // DCE should only merge blocks where the first ends with a Goto.
+  // SSAChecker will fail if the following Throw->TryBoundary blocks are merged.
+  public static void doNotMergeThrow(String str) {
+    try {
+      throw new Exception(str);
+    } catch (Exception ex) {
+      return;
+    }
+  }
+
+  // Test deletion of all try/catch blocks. Multiple catch blocks test deletion
+  // where TryBoundary still has exception handler successors after having removed
+  // some already.
+
+  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$final (after)
+  /// CHECK-NOT: TryBoundary
+
+  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$final (after)
+  /// CHECK: begin_block
+  /// CHECK: begin_block
+  /// CHECK: begin_block
+  /// CHECK-NOT: begin_block
+
+  public static void testDeadTryCatch(boolean val) {
+    if ($inline$False()) {
+      try {
+        if (val) {
+          throw new ArithmeticException();
+        } else {
+          throw new ArrayIndexOutOfBoundsException();
+        }
+      } catch (ArithmeticException ex) {
+        System.out.println("Unexpected AE catch");
+      } catch (ArrayIndexOutOfBoundsException ex) {
+        System.out.println("Unexpected AIIOB catch");
+      }
+    }
+  }
+
+  public static void main(String[] args) {
+
+  }
+}
\ No newline at end of file
diff --git a/test/543-env-long-ref/env_long_ref.cc b/test/543-env-long-ref/env_long_ref.cc
new file mode 100644
index 0000000..4108323
--- /dev/null
+++ b/test/543-env-long-ref/env_long_ref.cc
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arch/context.h"
+#include "art_method-inl.h"
+#include "jni.h"
+#include "scoped_thread_state_change.h"
+#include "stack.h"
+#include "thread.h"
+
+namespace art {
+
+namespace {
+
+class TestVisitor : public StackVisitor {
+ public:
+  TestVisitor(const ScopedObjectAccess& soa, Context* context, jobject expected_value)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      : StackVisitor(soa.Self(), context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        expected_value_(expected_value),
+        found_(false),
+        soa_(soa) {}
+
+  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtMethod* m = GetMethod();
+    std::string m_name(m->GetName());
+
+    if (m_name == "testCase") {
+      found_ = true;
+      uint32_t value = 0;
+      CHECK(GetVReg(m, 1, kReferenceVReg, &value));
+      CHECK_EQ(reinterpret_cast<mirror::Object*>(value),
+               soa_.Decode<mirror::Object*>(expected_value_));
+    }
+    return true;
+  }
+
+  jobject expected_value_;
+  bool found_;
+  const ScopedObjectAccess& soa_;
+};
+
+}  // namespace
+
+extern "C" JNIEXPORT void JNICALL Java_Main_lookForMyRegisters(JNIEnv*, jclass, jobject value) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::unique_ptr<Context> context(Context::Create());
+  TestVisitor visitor(soa, context.get(), value);
+  visitor.WalkStack();
+  CHECK(visitor.found_);
+}
+
+}  // namespace art
diff --git a/test/543-env-long-ref/expected.txt b/test/543-env-long-ref/expected.txt
new file mode 100644
index 0000000..89f155b
--- /dev/null
+++ b/test/543-env-long-ref/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+42
diff --git a/test/543-env-long-ref/info.txt b/test/543-env-long-ref/info.txt
new file mode 100644
index 0000000..6a42533
--- /dev/null
+++ b/test/543-env-long-ref/info.txt
@@ -0,0 +1,3 @@
+Regression test for optimizing that used to not return
+the right dex register in debuggable when a new value
+was overwriting the high dex register of a wide value.
diff --git a/test/543-env-long-ref/smali/TestCase.smali b/test/543-env-long-ref/smali/TestCase.smali
new file mode 100644
index 0000000..608d6eb
--- /dev/null
+++ b/test/543-env-long-ref/smali/TestCase.smali
@@ -0,0 +1,26 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+.method public static testCase()I
+  .registers 5
+  const-wide/16 v0, 0x1
+  invoke-static {v0, v1}, LMain;->$noinline$allocate(J)LMain;
+  move-result-object v1
+  invoke-static {v1}, LMain;->lookForMyRegisters(LMain;)V
+  iget v2, v1, LMain;->field:I
+  return v2
+.end method
diff --git a/test/543-env-long-ref/src/Main.java b/test/543-env-long-ref/src/Main.java
new file mode 100644
index 0000000..e723789
--- /dev/null
+++ b/test/543-env-long-ref/src/Main.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Throwable {
+    System.loadLibrary(args[0]);
+    Class<?> c = Class.forName("TestCase");
+    Method m = c.getMethod("testCase");
+    Integer a = (Integer)m.invoke(null, (Object[]) null);
+    System.out.println(a);
+  }
+
+  public static Main $noinline$allocate(long a) {
+    try {
+      return new Main();
+    } catch (Exception e) {
+      throw new Error(e);
+    }
+  }
+
+  public static native void lookForMyRegisters(Main m);
+
+  int field = 42;
+}
diff --git a/test/530-checker-loops/expected.txt b/test/545-tracing-and-jit/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/545-tracing-and-jit/expected.txt
diff --git a/test/545-tracing-and-jit/info.txt b/test/545-tracing-and-jit/info.txt
new file mode 100644
index 0000000..34e654e
--- /dev/null
+++ b/test/545-tracing-and-jit/info.txt
@@ -0,0 +1,2 @@
+Tests interaction between the JIT and the method tracing
+functionality.
diff --git a/test/545-tracing-and-jit/src/Main.java b/test/545-tracing-and-jit/src/Main.java
new file mode 100644
index 0000000..f365c6e
--- /dev/null
+++ b/test/545-tracing-and-jit/src/Main.java
@@ -0,0 +1,251 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+public class Main {
+    private static final String TEMP_FILE_NAME_PREFIX = "test";
+    private static final String TEMP_FILE_NAME_SUFFIX = ".trace";
+    private static File file;
+
+    public static void main(String[] args) throws Exception {
+        String name = System.getProperty("java.vm.name");
+        if (!"Dalvik".equals(name)) {
+            System.out.println("This test is not supported on " + name);
+            return;
+        }
+        file = createTempFile();
+        try {
+            new Main().ensureCaller(true, 0);
+            new Main().ensureCaller(false, 0);
+        } finally {
+            if (file != null) {
+              file.delete();
+            }
+        }
+    }
+
+    private static File createTempFile() throws Exception {
+        try {
+            return  File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+        } catch (IOException e) {
+            System.setProperty("java.io.tmpdir", "/data/local/tmp");
+            try {
+                return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+            } catch (IOException e2) {
+                System.setProperty("java.io.tmpdir", "/sdcard");
+                return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+            }
+        }
+    }
+
+    // We make sure 'doLoadsOfStuff' has a caller, because it is this caller that will be
+    // pushed in the side instrumentation frame.
+    public void ensureCaller(boolean warmup, int invocationCount) throws Exception {
+        doLoadsOfStuff(warmup, invocationCount);
+    }
+
+    // The number of recursive calls we are going to do in 'doLoadsOfStuff' to ensure
+    // the JIT sees it hot.
+    static final int NUMBER_OF_INVOCATIONS = 5;
+
+    public void doLoadsOfStuff(boolean warmup, int invocationCount) throws Exception {
+        // Warmup is to make sure the JIT gets a chance to compile 'doLoadsOfStuff'.
+        if (warmup) {
+            if (invocationCount < NUMBER_OF_INVOCATIONS) {
+                doLoadsOfStuff(warmup, ++invocationCount);
+            } else {
+                // Give the JIT a chance to compiler.
+                Thread.sleep(1000);
+            }
+        } else {
+            if (invocationCount == 0) {
+                // When running the trace in trace mode, there is already a trace running.
+                if (VMDebug.getMethodTracingMode() != 0) {
+                    VMDebug.stopMethodTracing();
+                }
+                VMDebug.startMethodTracing(file.getPath(), 0, 0, false, 0);
+            }
+            fillJit();
+            if (invocationCount < NUMBER_OF_INVOCATIONS) {
+                doLoadsOfStuff(warmup, ++invocationCount);
+            } else {
+                VMDebug.stopMethodTracing();
+            }
+        }
+    }
+
+    // This method creates enough profiling data to fill the code cache and trigger
+    // a collection in debug mode (at the time of the test 10KB of data space). We
+    // used to crash by not looking at the instrumentation stack and deleting JIT code
+    // that will be later restored by the instrumentation.
+    public static void fillJit() throws Exception {
+        Map map = new HashMap();
+        map.put("foo", "bar");
+        map.clear();
+        map.containsKey("foo");
+        map.containsValue("foo");
+        map.entrySet();
+        map.equals(map);
+        map.hashCode();
+        map.isEmpty();
+        map.keySet();
+        map.putAll(map);
+        map.remove("foo");
+        map.size();
+        map.put("bar", "foo");
+        map.values();
+
+        map = new LinkedHashMap();
+        map.put("foo", "bar");
+        map.clear();
+        map.containsKey("foo");
+        map.containsValue("foo");
+        map.entrySet();
+        map.equals(map);
+        map.hashCode();
+        map.isEmpty();
+        map.keySet();
+        map.putAll(map);
+        map.remove("foo");
+        map.size();
+        map.put("bar", "foo");
+        map.values();
+
+        map = new TreeMap();
+        map.put("foo", "bar");
+        map.clear();
+        map.containsKey("foo");
+        map.containsValue("foo");
+        map.entrySet();
+        map.equals(map);
+        map.hashCode();
+        map.isEmpty();
+        map.keySet();
+        map.putAll(map);
+        map.remove("foo");
+        map.size();
+        map.put("bar", "foo");
+        map.values();
+
+        map = new ConcurrentSkipListMap();
+        map.put("foo", "bar");
+        map.clear();
+        map.containsKey("foo");
+        map.containsValue("foo");
+        map.entrySet();
+        map.equals(map);
+        map.hashCode();
+        map.isEmpty();
+        map.keySet();
+        map.putAll(map);
+        map.remove("foo");
+        map.size();
+        map.put("bar", "foo");
+        map.values();
+
+        Set set = new HashSet();
+        set.add("foo");
+        set.addAll(set);
+        set.clear();
+        set.contains("foo");
+        set.containsAll(set);
+        set.equals(set);
+        set.hashCode();
+        set.isEmpty();
+        set.iterator();
+        set.remove("foo");
+        set.removeAll(set);
+        set.retainAll(set);
+        set.size();
+        set.add("foo");
+        set.toArray();
+
+        set = new LinkedHashSet();
+        set.add("foo");
+        set.addAll(set);
+        set.clear();
+        set.contains("foo");
+        set.containsAll(set);
+        set.equals(set);
+        set.hashCode();
+        set.isEmpty();
+        set.iterator();
+        set.remove("foo");
+        set.removeAll(set);
+        set.retainAll(set);
+        set.size();
+        set.add("foo");
+        set.toArray();
+
+        set = new TreeSet();
+        set.add("foo");
+        set.addAll(set);
+        set.clear();
+        set.contains("foo");
+        set.containsAll(set);
+        set.equals(set);
+        set.hashCode();
+        set.isEmpty();
+        set.iterator();
+        set.remove("foo");
+        set.removeAll(set);
+        set.retainAll(set);
+        set.size();
+        set.add("foo");
+        set.toArray();
+    }
+
+    private static class VMDebug {
+        private static final Method startMethodTracingMethod;
+        private static final Method stopMethodTracingMethod;
+        private static final Method getMethodTracingModeMethod;
+        static {
+            try {
+                Class<?> c = Class.forName("dalvik.system.VMDebug");
+                startMethodTracingMethod = c.getDeclaredMethod("startMethodTracing", String.class,
+                        Integer.TYPE, Integer.TYPE, Boolean.TYPE, Integer.TYPE);
+                stopMethodTracingMethod = c.getDeclaredMethod("stopMethodTracing");
+                getMethodTracingModeMethod = c.getDeclaredMethod("getMethodTracingMode");
+            } catch (Exception e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        public static void startMethodTracing(String filename, int bufferSize, int flags,
+                boolean samplingEnabled, int intervalUs) throws Exception {
+            startMethodTracingMethod.invoke(null, filename, bufferSize, flags, samplingEnabled,
+                    intervalUs);
+        }
+        public static void stopMethodTracing() throws Exception {
+            stopMethodTracingMethod.invoke(null);
+        }
+        public static int getMethodTracingMode() throws Exception {
+            return (int) getMethodTracingModeMethod.invoke(null);
+        }
+    }
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/546-regression-simplify-catch/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/546-regression-simplify-catch/expected.txt
diff --git a/test/546-regression-simplify-catch/info.txt b/test/546-regression-simplify-catch/info.txt
new file mode 100644
index 0000000..b146e87
--- /dev/null
+++ b/test/546-regression-simplify-catch/info.txt
@@ -0,0 +1,2 @@
+Tests simplification of catch blocks in the presence of trivially dead code
+that was not verified by the verifier.
diff --git a/test/546-regression-simplify-catch/smali/TestCase.smali b/test/546-regression-simplify-catch/smali/TestCase.smali
new file mode 100644
index 0000000..486b3b0
--- /dev/null
+++ b/test/546-regression-simplify-catch/smali/TestCase.smali
@@ -0,0 +1,104 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+# Test simplification of an empty, dead catch block. Compiler used to segfault
+# because it did expect at least a control-flow instruction (b/25494450).
+
+.method public static testCase_EmptyCatch()I
+    .registers 3
+
+    const v0, 0x0
+    return v0
+
+    :try_start
+    nop
+    :try_end
+    .catchall {:try_start .. :try_end} :catch
+
+    nop
+
+    :catch
+    nop
+
+.end method
+
+# Test simplification of a dead catch block with some code but no control-flow
+# instruction.
+
+.method public static testCase_NoConrolFlowCatch()I
+    .registers 3
+
+    const v0, 0x0
+    return v0
+
+    :try_start
+    nop
+    :try_end
+    .catchall {:try_start .. :try_end} :catch
+
+    nop
+
+    :catch
+    const v1, 0x3
+    add-int v0, v0, v1
+
+.end method
+
+# Test simplification of a dead catch block with normal-predecessors but
+# starting with a move-exception. Verifier does not check trivially dead code
+# and this used to trip a DCHECK (b/25492628).
+
+.method public static testCase_InvalidLoadException()I
+    .registers 3
+
+    const v0, 0x0
+    return v0
+
+    :try_start
+    nop
+    :try_end
+    .catchall {:try_start .. :try_end} :catch
+
+    :catch
+    move-exception v0
+
+.end method
+
+# Test simplification of a live catch block with dead normal-predecessors and
+# starting with a move-exception. Verifier does not check trivially dead code
+# and this used to trip a DCHECK (b/25492628).
+
+.method public static testCase_TriviallyDeadPredecessor(II)I
+    .registers 3
+
+    :try_start
+    div-int v0, p0, p1
+    return v0
+    :try_end
+    .catchall {:try_start .. :try_end} :catch
+
+    # Trivially dead predecessor block.
+    add-int p0, p0, p1
+
+    :catch
+    # This verifies because only exceptional predecessors are live.
+    move-exception v0
+    const v0, 0x0
+    return v0
+
+.end method
+
diff --git a/test/546-regression-simplify-catch/src/Main.java b/test/546-regression-simplify-catch/src/Main.java
new file mode 100644
index 0000000..8eddac3
--- /dev/null
+++ b/test/546-regression-simplify-catch/src/Main.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) {}
+
+}
diff --git a/test/469-condition-materialization-regression/expected.txt b/test/547-regression-trycatch-critical-edge/expected.txt
similarity index 100%
copy from test/469-condition-materialization-regression/expected.txt
copy to test/547-regression-trycatch-critical-edge/expected.txt
diff --git a/test/547-regression-trycatch-critical-edge/info.txt b/test/547-regression-trycatch-critical-edge/info.txt
new file mode 100644
index 0000000..dc798c0
--- /dev/null
+++ b/test/547-regression-trycatch-critical-edge/info.txt
@@ -0,0 +1,2 @@
+Test a specific SSA building regression a back edge would not be split due to
+being on try/catch boundary.
\ No newline at end of file
diff --git a/test/547-regression-trycatch-critical-edge/smali/TestCase.smali b/test/547-regression-trycatch-critical-edge/smali/TestCase.smali
new file mode 100644
index 0000000..53a3cc5
--- /dev/null
+++ b/test/547-regression-trycatch-critical-edge/smali/TestCase.smali
@@ -0,0 +1,57 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+# The following test case would crash liveness analysis because the back edge of
+# the outer loop would have a smaller liveness position than the two back edges
+# of the inner loop. This was caused by a bug which did not split the critical
+# edge between TryBoundary and outer loop header (b/25493695).
+
+.method public static testCase(II)I
+  .registers 10
+
+  const v0, 0x0                                       # v0 = result
+  const v1, 0x1                                       # v1 = const 1
+
+  move v2, p0                                         # v2 = outer loop counter
+  :outer_loop
+  if-eqz v2, :return
+  sub-int/2addr v2, v1
+
+  :try_start
+
+  move v3, p1                                         # v3 = inner loop counter
+  :inner_loop
+  invoke-static {}, Ljava/lang/System;->nanoTime()J   # throwing instruction
+  if-eqz v3, :outer_loop                              # back edge of outer loop
+  sub-int/2addr v3, v1
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J   # throwing instruction
+  add-int/2addr v0, v1
+  goto :inner_loop                                    # back edge of inner loop
+
+  :try_end
+  .catchall {:try_start .. :try_end} :catch
+
+  :catch
+  const v4, 0x2
+  add-int/2addr v0, v4
+  goto :inner_loop                                    # back edge of inner loop
+
+  :return
+  return v0
+
+.end method
diff --git a/test/547-regression-trycatch-critical-edge/src/Main.java b/test/547-regression-trycatch-critical-edge/src/Main.java
new file mode 100644
index 0000000..8eddac3
--- /dev/null
+++ b/test/547-regression-trycatch-critical-edge/src/Main.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) {}
+
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/548-checker-inlining-and-dce/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/548-checker-inlining-and-dce/expected.txt
diff --git a/test/548-checker-inlining-and-dce/info.txt b/test/548-checker-inlining-and-dce/info.txt
new file mode 100644
index 0000000..3255d6b
--- /dev/null
+++ b/test/548-checker-inlining-and-dce/info.txt
@@ -0,0 +1 @@
+Test that inlining works when code preventing inlining is eliminated by DCE.
diff --git a/test/548-checker-inlining-and-dce/src/Main.java b/test/548-checker-inlining-and-dce/src/Main.java
new file mode 100644
index 0000000..bf64c3b
--- /dev/null
+++ b/test/548-checker-inlining-and-dce/src/Main.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  static boolean doThrow = false;
+
+  private void inlinedForNull(Iterable it) {
+    if (it != null) {
+      // We're not inlining throw at the moment.
+      if (doThrow) { throw new Error(""); }
+    }
+  }
+
+  private void inlinedForFalse(boolean value, Iterable it) {
+    if (value) {
+      // We're not inlining throw at the moment.
+      if (doThrow) { throw new Error(""); }
+    }
+  }
+
+  /// CHECK-START: void Main.testInlinedForFalseInlined(java.lang.Iterable) inliner (before)
+  /// CHECK:                          InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.testInlinedForFalseInlined(java.lang.Iterable) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      InvokeInterface
+
+  public void testInlinedForFalseInlined(Iterable it) {
+    inlinedForFalse(false, it);
+  }
+
+  /// CHECK-START: void Main.testInlinedForFalseNotInlined(java.lang.Iterable) inliner (before)
+  /// CHECK:                          InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.testInlinedForFalseNotInlined(java.lang.Iterable) inliner (after)
+  /// CHECK:                          InvokeStaticOrDirect
+
+  public void testInlinedForFalseNotInlined(Iterable it) {
+    inlinedForFalse(true, it);
+  }
+
+  /// CHECK-START: void Main.testInlinedForNullInlined(java.lang.Iterable) inliner (before)
+  /// CHECK:                          InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.testInlinedForNullInlined(java.lang.Iterable) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      InvokeInterface
+
+  public void testInlinedForNullInlined(Iterable it) {
+    inlinedForNull(null);
+  }
+
+  /// CHECK-START: void Main.testInlinedForNullNotInlined(java.lang.Iterable) inliner (before)
+  /// CHECK:                          InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.testInlinedForNullNotInlined(java.lang.Iterable) inliner (after)
+  /// CHECK:                          InvokeStaticOrDirect
+
+  public void testInlinedForNullNotInlined(Iterable it) {
+    inlinedForNull(it);
+  }
+
+  public static void main(String[] args) {
+    Main m = new Main();
+    Iterable it = new Iterable() {
+      public java.util.Iterator iterator() { return null; }
+    };
+    m.testInlinedForFalseInlined(it);
+    m.testInlinedForFalseNotInlined(it);
+    m.testInlinedForNullInlined(it);
+    m.testInlinedForNullNotInlined(it);
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/549-checker-types-merge/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/549-checker-types-merge/expected.txt
diff --git a/test/549-checker-types-merge/info.txt b/test/549-checker-types-merge/info.txt
new file mode 100644
index 0000000..f174e20
--- /dev/null
+++ b/test/549-checker-types-merge/info.txt
@@ -0,0 +1 @@
+Checker test for testing the type merge during reference type propagation.
diff --git a/test/549-checker-types-merge/src/Main.java b/test/549-checker-types-merge/src/Main.java
new file mode 100644
index 0000000..51af3cf
--- /dev/null
+++ b/test/549-checker-types-merge/src/Main.java
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// To make it easier to follow the tests:
+//  - all interfaces defined in this file extend InterfaceSuper (except InterfaceOtherSuper)
+//  - all classes defined in this file extend ClassSuper (except ClassOtherSuper)
+
+interface InterfaceSuper {}
+interface InterfaceOtherSuper {}
+
+interface InterfaceA extends InterfaceSuper {}
+interface InterfaceB extends InterfaceSuper {}
+interface InterfaceExtendsA extends InterfaceA {}
+interface InterfaceExtendsB extends InterfaceB {}
+
+class ClassSuper {}
+class ClassOtherSuper {}
+
+class ClassA extends ClassSuper {}
+class ClassB extends ClassSuper {}
+class ClassExtendsA extends ClassA {}
+class ClassExtendsB extends ClassB {}
+
+class ClassImplementsInterfaceA extends ClassSuper implements InterfaceA {}
+
+public class Main {
+
+  /// CHECK-START: java.lang.Object Main.testMergeNullContant(boolean) builder (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:Main
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeNullContant(boolean cond) {
+    return cond ? null : new Main();
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassExtendsB) builder (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClasses(boolean cond, ClassExtendsA a, ClassExtendsB b) {
+    // Different classes, have a common super type.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassSuper) builder (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClasses(boolean cond, ClassExtendsA a, ClassSuper b) {
+    // Different classes, one is the super type of the other.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassSuper, ClassSuper) builder (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClasses(boolean cond, ClassSuper a, ClassSuper b) {
+    // Same classes.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassOtherSuper, ClassSuper) builder (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClasses(boolean cond, ClassOtherSuper a, ClassSuper b) {
+    // Different classes, have Object as the common super type.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassImplementsInterfaceA, InterfaceSuper) builder (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClassWithInterface(boolean cond, ClassImplementsInterfaceA a, InterfaceSuper b) {
+    // Class implements interface.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassSuper, InterfaceSuper) builder (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClassWithInterface(boolean cond, ClassSuper a, InterfaceSuper b) {
+    // Class doesn't implement interface.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceSuper) builder (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeInterfaces(boolean cond, InterfaceExtendsA a, InterfaceSuper b) {
+    // Different Interfaces, one implements the other.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceSuper) builder (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeInterfaces(boolean cond, InterfaceSuper a, InterfaceSuper b) {
+    // Same interfaces.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceExtendsB) builder (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeInterfaces(boolean cond, InterfaceExtendsA a, InterfaceExtendsB b) {
+    // Different Interfaces, have a common super type.
+    return cond ? a : b;
+  }
+
+    /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceOtherSuper) builder (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeInterfaces(boolean cond, InterfaceSuper a, InterfaceOtherSuper b) {
+    // Different interfaces.
+    return cond ? a : b;
+  }
+
+  public static void main(String[] args) {
+  }
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/550-checker-multiply-accumulate/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/550-checker-multiply-accumulate/expected.txt
diff --git a/test/550-checker-multiply-accumulate/info.txt b/test/550-checker-multiply-accumulate/info.txt
new file mode 100644
index 0000000..10e998c
--- /dev/null
+++ b/test/550-checker-multiply-accumulate/info.txt
@@ -0,0 +1 @@
+Test the merging of instructions into the shifter operand on arm64.
diff --git a/test/550-checker-multiply-accumulate/src/Main.java b/test/550-checker-multiply-accumulate/src/Main.java
new file mode 100644
index 0000000..09376a2
--- /dev/null
+++ b/test/550-checker-multiply-accumulate/src/Main.java
@@ -0,0 +1,437 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // A dummy value to defeat inlining of these routines.
+  static boolean doThrow = false;
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /**
+   * Test basic merging of `MUL+ADD` into `MULADD`.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<MulAdd:i\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Add
+  /// CHECK:                            Return [<<MulAdd>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Add
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) disassembly (after)
+  /// CHECK:                            madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<MulAdd:i\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Add
+  /// CHECK:                            Return [<<MulAdd>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Add
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulAdd(int, int, int) disassembly (after)
+  /// CHECK:                            mla r{{\d+}}, r{{\d+}}, r{{\d+}}, r{{\d+}}
+
+  public static int $opt$noinline$mulAdd(int acc, int left, int right) {
+    if (doThrow) throw new Error();
+    return acc + left * right;
+  }
+
+  /**
+   * Test basic merging of `MUL+SUB` into `MULSUB`.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:                            Return [<<Sub>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<MulSub:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
+  /// CHECK:                            Return [<<MulSub>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) disassembly (after)
+  /// CHECK:                            msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:                            Return [<<Sub>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        MultiplyAccumulate
+
+  public static long $opt$noinline$mulSub(long acc, long left, long right) {
+    if (doThrow) throw new Error();
+    return acc - left * right;
+  }
+
+  /**
+   * Test that we do not create a multiply-accumulate instruction when there
+   * are other uses of the multiplication that cannot merge it.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Mul>>,<<Add>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Mul>>,<<Add>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        MultiplyAccumulate
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Mul>>,<<Add>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Mul>>,<<Add>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        MultiplyAccumulate
+
+  public static int $opt$noinline$multipleUses1(int acc, int left, int right) {
+    if (doThrow) throw new Error();
+    int temp = left * right;
+    return temp | (acc + temp);
+  }
+
+  /**
+   * Test that we do not create a multiply-accumulate instruction even when all
+   * uses of the multiplication can merge it.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:j\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Res:j\d+>>         Add [<<Add>>,<<Sub>>]
+  /// CHECK:                            Return [<<Res>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:j\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Res:j\d+>>         Add [<<Add>>,<<Sub>>]
+  /// CHECK:                            Return [<<Res>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        MultiplyAccumulate
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:j\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Res:j\d+>>         Add [<<Add>>,<<Sub>>]
+  /// CHECK:                            Return [<<Res>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:j\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Res:j\d+>>         Add [<<Add>>,<<Sub>>]
+  /// CHECK:                            Return [<<Res>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        MultiplyAccumulate
+
+
+  public static long $opt$noinline$multipleUses2(long acc, long left, long right) {
+    if (doThrow) throw new Error();
+    long temp = left * right;
+    return (acc + temp) + (acc - temp);
+  }
+
+
+  /**
+   * Test the interpretation of `a * (b + 1)` as `a + (a * b)`.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Var:i\d+>>         ParameterValue
+  /// CHECK:       <<Const1:i\d+>>      IntConstant 1
+  /// CHECK:       <<Add:i\d+>>         Add [<<Var>>,<<Const1>>]
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Acc>>,<<Add>>]
+  /// CHECK:                            Return [<<Mul>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Var:i\d+>>         ParameterValue
+  /// CHECK:       <<MulAdd:i\d+>>      MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Add
+  /// CHECK:                            Return [<<MulAdd>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Add
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) disassembly (after)
+  /// CHECK:                            madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Var:i\d+>>         ParameterValue
+  /// CHECK:       <<Const1:i\d+>>      IntConstant 1
+  /// CHECK:       <<Add:i\d+>>         Add [<<Var>>,<<Const1>>]
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Acc>>,<<Add>>]
+  /// CHECK:                            Return [<<Mul>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Var:i\d+>>         ParameterValue
+  /// CHECK:       <<MulAdd:i\d+>>      MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Add
+  /// CHECK:                            Return [<<MulAdd>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Add
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulPlusOne(int, int) disassembly (after)
+  /// CHECK:                            mla r{{\d+}}, r{{\d+}}, r{{\d+}}, r{{\d+}}
+
+  public static int $opt$noinline$mulPlusOne(int acc, int var) {
+    if (doThrow) throw new Error();
+    return acc * (var + 1);
+  }
+
+
+  /**
+   * Test the interpretation of `a * (1 - b)` as `a - (a * b)`.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Var:j\d+>>         ParameterValue
+  /// CHECK:       <<Const1:j\d+>>      LongConstant 1
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Const1>>,<<Var>>]
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Acc>>,<<Sub>>]
+  /// CHECK:                            Return [<<Mul>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Var:j\d+>>         ParameterValue
+  /// CHECK:       <<MulSub:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Sub
+  /// CHECK:                            Return [<<MulSub>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) disassembly (after)
+  /// CHECK:                            msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Var:j\d+>>         ParameterValue
+  /// CHECK:       <<Const1:j\d+>>      LongConstant 1
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Const1>>,<<Var>>]
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Acc>>,<<Sub>>]
+  /// CHECK:                            Return [<<Mul>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        MultiplyAccumulate
+  public static long $opt$noinline$mulMinusOne(long acc, long var) {
+    if (doThrow) throw new Error();
+    return acc * (1 - var);
+  }
+
+  /**
+   * Test basic merging of `MUL+NEG` into `MULNEG`.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:i\d+>>         Neg [<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Const0:i\d+>>      IntConstant 0
+  /// CHECK:       <<MulNeg:i\d+>>      MultiplyAccumulate [<<Const0>>,<<Left>>,<<Right>>] kind:Sub
+  /// CHECK:                            Return [<<MulNeg>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Neg
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulNeg(int, int) disassembly (after)
+  /// CHECK:                            mneg w{{\d+}}, w{{\d+}}, w{{\d+}}
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:i\d+>>         Neg [<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:i\d+>>         Neg [<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        MultiplyAccumulate
+
+  public static int $opt$noinline$mulNeg(int left, int right) {
+    if (doThrow) throw new Error();
+    return - (left * right);
+  }
+
+  /**
+   * Test basic merging of `MUL+NEG` into `MULNEG`.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Const0:j\d+>>      LongConstant 0
+  /// CHECK:       <<MulNeg:j\d+>>      MultiplyAccumulate [<<Const0>>,<<Left>>,<<Right>>] kind:Sub
+  /// CHECK:                            Return [<<MulNeg>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Neg
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulNeg(long, long) disassembly (after)
+  /// CHECK:                            mneg x{{\d+}}, x{{\d+}}, x{{\d+}}
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm (before)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm (after)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        MultiplyAccumulate
+
+  public static long $opt$noinline$mulNeg(long left, long right) {
+    if (doThrow) throw new Error();
+    return - (left * right);
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(7, $opt$noinline$mulAdd(1, 2, 3));
+    assertLongEquals(-26, $opt$noinline$mulSub(4, 5, 6));
+    assertIntEquals(79, $opt$noinline$multipleUses1(7, 8, 9));
+    assertLongEquals(20, $opt$noinline$multipleUses2(10, 11, 12));
+    assertIntEquals(195, $opt$noinline$mulPlusOne(13, 14));
+    assertLongEquals(-225, $opt$noinline$mulMinusOne(15, 16));
+    assertIntEquals(-306, $opt$noinline$mulNeg(17, 18));
+    assertLongEquals(-380, $opt$noinline$mulNeg(19, 20));
+  }
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/550-checker-regression-wide-store/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/550-checker-regression-wide-store/expected.txt
diff --git a/test/550-checker-regression-wide-store/info.txt b/test/550-checker-regression-wide-store/info.txt
new file mode 100644
index 0000000..6cf04bc
--- /dev/null
+++ b/test/550-checker-regression-wide-store/info.txt
@@ -0,0 +1,3 @@
+Test an SsaBuilder regression where storing into the high vreg of a pair
+would not invalidate the low vreg. The resulting environment would generate
+an incorrect stack map, causing deopt and try/catch to use a wrong location.
\ No newline at end of file
diff --git a/test/550-checker-regression-wide-store/smali/TestCase.smali b/test/550-checker-regression-wide-store/smali/TestCase.smali
new file mode 100644
index 0000000..9133c82
--- /dev/null
+++ b/test/550-checker-regression-wide-store/smali/TestCase.smali
@@ -0,0 +1,82 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+.method public static $noinline$throw()V
+  .registers 1
+  new-instance v0, Ljava/lang/Exception;
+  invoke-direct {v0}, Ljava/lang/Exception;-><init>()V
+  throw v0
+.end method
+
+# Test storing into the high vreg of a wide pair. This scenario has runtime
+# behaviour implications so we run it from Main.main.
+
+## CHECK-START: int TestCase.invalidateLow(long) builder (after)
+## CHECK-DAG: <<Cst0:i\d+>> IntConstant 0
+## CHECK-DAG: <<Arg:j\d+>>  ParameterValue
+## CHECK-DAG: <<Cast:i\d+>> TypeConversion [<<Arg>>]
+## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[_,<<Cst0>>,<<Arg>>,_]]
+## CHECK-DAG: InvokeStaticOrDirect method_name:TestCase.$noinline$throw  env:[[_,<<Cast>>,<<Arg>>,_]]
+
+.method public static invalidateLow(J)I
+  .registers 4
+
+  const/4 v1, 0x0
+
+  :try_start
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  move-wide v0, p0
+  long-to-int v1, v0
+  invoke-static {}, LTestCase;->$noinline$throw()V
+  :try_end
+  .catchall {:try_start .. :try_end} :catchall
+
+  :catchall
+  return v1
+
+.end method
+
+# Test that storing a wide invalidates the value in the high vreg. This
+# cannot be detected from runtime so we only test the environment with Checker.
+
+## CHECK-START: void TestCase.invalidateHigh1(long) builder (after)
+## CHECK-DAG: <<Arg:j\d+>>  ParameterValue
+## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[<<Arg>>,_,<<Arg>>,_]]
+
+.method public static invalidateHigh1(J)V
+  .registers 4
+
+  const/4 v1, 0x0
+  move-wide v0, p0
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  return-void
+
+.end method
+
+## CHECK-START: void TestCase.invalidateHigh2(long) builder (after)
+## CHECK-DAG: <<Arg:j\d+>>  ParameterValue
+## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[<<Arg>>,_,_,<<Arg>>,_]]
+
+.method public static invalidateHigh2(J)V
+  .registers 5
+
+  move-wide v1, p0
+  move-wide v0, p0
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  return-void
+
+.end method
diff --git a/test/550-checker-regression-wide-store/src/Main.java b/test/550-checker-regression-wide-store/src/Main.java
new file mode 100644
index 0000000..9b502df
--- /dev/null
+++ b/test/550-checker-regression-wide-store/src/Main.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  private static int runTestCase(String name, long arg) throws Exception {
+    Class<?> c = Class.forName("TestCase");
+    Method m = c.getMethod(name, long.class);
+    int result = (Integer) m.invoke(null, arg);
+    return result;
+  }
+
+  private static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Wrong result: " + expected + " != " + actual);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    assertEquals(42, runTestCase("invalidateLow", 42L));
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/550-new-instance-clinit/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/550-new-instance-clinit/expected.txt
diff --git a/test/550-new-instance-clinit/info.txt b/test/550-new-instance-clinit/info.txt
new file mode 100644
index 0000000..c5fa3c7
--- /dev/null
+++ b/test/550-new-instance-clinit/info.txt
@@ -0,0 +1,3 @@
+Regression test for optimizing which used to treat
+HNewInstance as not having side effects even though it
+could invoke a clinit method.
diff --git a/test/550-new-instance-clinit/src/Main.java b/test/550-new-instance-clinit/src/Main.java
new file mode 100644
index 0000000..45e259e
--- /dev/null
+++ b/test/550-new-instance-clinit/src/Main.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    int foo = Main.a;
+    new Bar();
+    foo = Main.a;
+    if (foo != 43) {
+      throw new Error("Expected 43, got " + foo);
+    }
+  }
+  static int a = 42;
+}
+
+class Bar {
+  static {
+    Main.a++;
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/551-checker-clinit/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/551-checker-clinit/expected.txt
diff --git a/test/551-checker-clinit/info.txt b/test/551-checker-clinit/info.txt
new file mode 100644
index 0000000..4d54bb5
--- /dev/null
+++ b/test/551-checker-clinit/info.txt
@@ -0,0 +1 @@
+Checker test to ensure we optimize aways HClinitChecks as expected.
diff --git a/test/551-checker-clinit/src/Main.java b/test/551-checker-clinit/src/Main.java
new file mode 100644
index 0000000..5ec30480
--- /dev/null
+++ b/test/551-checker-clinit/src/Main.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {}
+  public static int foo = 42;
+
+  /// CHECK-START: void Main.inlinedMethod() builder (after)
+  /// CHECK:                        ClinitCheck
+
+  /// CHECK-START: void Main.inlinedMethod() inliner (after)
+  /// CHECK:                        ClinitCheck
+  /// CHECK-NOT:                    ClinitCheck
+  /// CHECK-NOT:                    InvokeStaticOrDirect
+  public void inlinedMethod() {
+    SubSub.bar();
+  }
+}
+
+class Sub extends Main {
+  /// CHECK-START: void Sub.invokeSuperClass() builder (after)
+  /// CHECK-NOT:                        ClinitCheck
+  public void invokeSuperClass() {
+    int a = Main.foo;
+  }
+
+  /// CHECK-START: void Sub.invokeItself() builder (after)
+  /// CHECK-NOT:                        ClinitCheck
+  public void invokeItself() {
+    int a = foo;
+  }
+
+  /// CHECK-START: void Sub.invokeSubClass() builder (after)
+  /// CHECK:                            ClinitCheck
+  public void invokeSubClass() {
+    int a = SubSub.foo;
+  }
+
+  public static int foo = 42;
+}
+
+class SubSub {
+  public static void bar() {
+    int a = Main.foo;
+  }
+  public static int foo = 42;
+}
diff --git a/test/551-checker-shifter-operand/build b/test/551-checker-shifter-operand/build
new file mode 100644
index 0000000..a78021f
--- /dev/null
+++ b/test/551-checker-shifter-operand/build
@@ -0,0 +1,212 @@
+#!/bin/bash
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This is an almost exact copy of `art/test/etc/default-build`. Only the parsing
+# of `dx` option has been overriden.
+
+# Stop if something fails.
+set -e
+
+# Set default values for directories.
+if [ -d smali ]; then
+  HAS_SMALI=true
+else
+  HAS_SMALI=false
+fi
+
+if [ -d src ]; then
+  HAS_SRC=true
+else
+  HAS_SRC=false
+fi
+
+if [ -d src2 ]; then
+  HAS_SRC2=true
+else
+  HAS_SRC2=false
+fi
+
+if [ -d src-multidex ]; then
+  HAS_SRC_MULTIDEX=true
+else
+  HAS_SRC_MULTIDEX=false
+fi
+
+if [ -d src-ex ]; then
+  HAS_SRC_EX=true
+else
+  HAS_SRC_EX=false
+fi
+
+DX_FLAGS=""
+SKIP_DX_MERGER="false"
+EXPERIMENTAL=""
+
+# Setup experimental flag mappings in a bash associative array.
+declare -A JACK_EXPERIMENTAL_ARGS
+JACK_EXPERIMENTAL_ARGS["default-methods"]="-D jack.java.source.version=1.8 -D jack.android.min-api-level=24"
+JACK_EXPERIMENTAL_ARGS["lambdas"]="-D jack.java.source.version=1.8 -D jack.android.min-api-level=24"
+
+while true; do
+  if [ "x$1" = "x--dx-option" ]; then
+    shift
+    option="$1"
+    # Make sure we run this test *with* `dx` optimizations.
+    if [ "x$option" != "x--no-optimize" ]; then
+      DX_FLAGS="${DX_FLAGS} $option"
+    fi
+    shift
+  elif [ "x$1" = "x--jvm" ]; then
+    shift
+  elif [ "x$1" = "x--no-src" ]; then
+    HAS_SRC=false
+    shift
+  elif [ "x$1" = "x--no-src2" ]; then
+    HAS_SRC2=false
+    shift
+  elif [ "x$1" = "x--no-src-multidex" ]; then
+    HAS_SRC_MULTIDEX=false
+    shift
+  elif [ "x$1" = "x--no-src-ex" ]; then
+    HAS_SRC_EX=false
+    shift
+  elif [ "x$1" = "x--no-smali" ]; then
+    HAS_SMALI=false
+    shift
+  elif [ "x$1" = "x--experimental" ]; then
+    shift
+    EXPERIMENTAL="${EXPERIMENTAL} $1"
+    shift
+  elif expr "x$1" : "x--" >/dev/null 2>&1; then
+    echo "unknown $0 option: $1" 1>&2
+    exit 1
+  else
+    break
+  fi
+done
+
+# Add args from the experimental mappings.
+for experiment in ${EXPERIMENTAL}; do
+  JACK_ARGS="${JACK_ARGS} ${JACK_EXPERIMENTAL_ARGS[${experiment}]}"
+done
+
+if [ -e classes.dex ]; then
+  zip $TEST_NAME.jar classes.dex
+  exit 0
+fi
+
+if ! [ "${HAS_SRC}" = "true" ] && ! [ "${HAS_SRC2}" = "true" ]; then
+  # No src directory? Then forget about trying to run dx.
+  SKIP_DX_MERGER="true"
+fi
+
+if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then
+  # Jack does not support this configuration unless we specify how to partition the DEX file
+  # with a .jpp file.
+  USE_JACK="false"
+fi
+
+if [ ${USE_JACK} = "true" ]; then
+  # Jack toolchain
+  if [ "${HAS_SRC}" = "true" ]; then
+    ${JACK} ${JACK_ARGS} --output-jack src.jack src
+    imported_jack_files="--import src.jack"
+  fi
+
+  if [ "${HAS_SRC2}" = "true" ]; then
+    ${JACK} ${JACK_ARGS} --output-jack src2.jack src2
+    imported_jack_files="--import src2.jack ${imported_jack_files}"
+  fi
+
+  # Compile jack files into a DEX file. We set jack.import.type.policy=keep-first to consider
+  # class definitions from src2 first.
+  if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then
+    ${JACK} ${JACK_ARGS} ${imported_jack_files} -D jack.import.type.policy=keep-first --output-dex .
+  fi
+else
+  # Legacy toolchain with javac+dx
+  if [ "${HAS_SRC}" = "true" ]; then
+    mkdir classes
+    ${JAVAC} ${JAVAC_ARGS} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'`
+  fi
+
+  if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then
+    mkdir classes2
+    ${JAVAC} -implicit:none -classpath src -d classes2 `find src-multidex -name '*.java'`
+    if [ ${NEED_DEX} = "true" ]; then
+      ${DX} -JXmx256m --debug --dex --dump-to=classes2.lst --output=classes2.dex \
+        --dump-width=1000 ${DX_FLAGS} classes2
+    fi
+  fi
+
+  if [ "${HAS_SRC2}" = "true" ]; then
+    mkdir -p classes
+    ${JAVAC} ${JAVAC_ARGS} -d classes `find src2 -name '*.java'`
+  fi
+
+  if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then
+    if [ ${NEED_DEX} = "true" -a ${SKIP_DX_MERGER} = "false" ]; then
+      ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex \
+        --dump-width=1000 ${DX_FLAGS} classes
+    fi
+  fi
+fi
+
+if [ "${HAS_SMALI}" = "true" ]; then
+  # Compile Smali classes
+  ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'`
+
+  # Don't bother with dexmerger if we provide our own main function in a smali file.
+  if [ ${SKIP_DX_MERGER} = "false" ]; then
+    ${DXMERGER} classes.dex classes.dex smali_classes.dex
+  else
+    mv smali_classes.dex classes.dex
+  fi
+fi
+
+if [ ${HAS_SRC_EX} = "true" ]; then
+  if [ ${USE_JACK} = "true" ]; then
+      # Rename previous "classes.dex" so it is not overwritten.
+      mv classes.dex classes-1.dex
+      #TODO find another way to append src.jack to the jack classpath
+      ${JACK}:src.jack ${JACK_ARGS} --output-dex . src-ex
+      zip $TEST_NAME-ex.jar classes.dex
+      # Restore previous "classes.dex" so it can be zipped.
+      mv classes-1.dex classes.dex
+  else
+    mkdir classes-ex
+    ${JAVAC} ${JAVAC_ARGS} -d classes-ex -cp classes `find src-ex -name '*.java'`
+    if [ ${NEED_DEX} = "true" ]; then
+      ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes-ex.dex \
+        --dump-width=1000 ${DX_FLAGS} classes-ex
+
+      # quick shuffle so that the stored name is "classes.dex"
+      mv classes.dex classes-1.dex
+      mv classes-ex.dex classes.dex
+      zip $TEST_NAME-ex.jar classes.dex
+      mv classes.dex classes-ex.dex
+      mv classes-1.dex classes.dex
+    fi
+  fi
+fi
+
+# Create a single jar with two dex files for multidex.
+if [ ${HAS_SRC_MULTIDEX} = "true" ]; then
+  zip $TEST_NAME.jar classes.dex classes2.dex
+elif [ ${NEED_DEX} = "true" ]; then
+  zip $TEST_NAME.jar classes.dex
+fi
diff --git a/test/530-checker-loops/expected.txt b/test/551-checker-shifter-operand/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/551-checker-shifter-operand/expected.txt
diff --git a/test/551-checker-shifter-operand/info.txt b/test/551-checker-shifter-operand/info.txt
new file mode 100644
index 0000000..10e998c
--- /dev/null
+++ b/test/551-checker-shifter-operand/info.txt
@@ -0,0 +1 @@
+Test the merging of instructions into the shifter operand on arm64.
diff --git a/test/551-checker-shifter-operand/src/Main.java b/test/551-checker-shifter-operand/src/Main.java
new file mode 100644
index 0000000..a4561b8
--- /dev/null
+++ b/test/551-checker-shifter-operand/src/Main.java
@@ -0,0 +1,690 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // A dummy value to defeat inlining of these routines.
+  static boolean doThrow = false;
+
+  public static void assertByteEquals(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertCharEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertShortEquals(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  // Non-inlinable type-casting helpers.
+  static  char $noinline$byteToChar   (byte v) { if (doThrow) throw new Error(); return  (char)v; }
+  static short $noinline$byteToShort  (byte v) { if (doThrow) throw new Error(); return (short)v; }
+  static   int $noinline$byteToInt    (byte v) { if (doThrow) throw new Error(); return   (int)v; }
+  static  long $noinline$byteToLong   (byte v) { if (doThrow) throw new Error(); return  (long)v; }
+  static  byte $noinline$charToByte   (char v) { if (doThrow) throw new Error(); return  (byte)v; }
+  static short $noinline$charToShort  (char v) { if (doThrow) throw new Error(); return (short)v; }
+  static   int $noinline$charToInt    (char v) { if (doThrow) throw new Error(); return   (int)v; }
+  static  long $noinline$charToLong   (char v) { if (doThrow) throw new Error(); return  (long)v; }
+  static  byte $noinline$shortToByte (short v) { if (doThrow) throw new Error(); return  (byte)v; }
+  static  char $noinline$shortToChar (short v) { if (doThrow) throw new Error(); return  (char)v; }
+  static   int $noinline$shortToInt  (short v) { if (doThrow) throw new Error(); return   (int)v; }
+  static  long $noinline$shortToLong (short v) { if (doThrow) throw new Error(); return  (long)v; }
+  static  byte $noinline$intToByte     (int v) { if (doThrow) throw new Error(); return  (byte)v; }
+  static  char $noinline$intToChar     (int v) { if (doThrow) throw new Error(); return  (char)v; }
+  static short $noinline$intToShort    (int v) { if (doThrow) throw new Error(); return (short)v; }
+  static  long $noinline$intToLong     (int v) { if (doThrow) throw new Error(); return  (long)v; }
+  static  byte $noinline$longToByte   (long v) { if (doThrow) throw new Error(); return  (byte)v; }
+  static  char $noinline$longToChar   (long v) { if (doThrow) throw new Error(); return  (char)v; }
+  static short $noinline$longToShort  (long v) { if (doThrow) throw new Error(); return (short)v; }
+  static   int $noinline$longToInt    (long v) { if (doThrow) throw new Error(); return   (int)v; }
+
+  /**
+   * Basic test merging a bitfield move operation (here a type conversion) into
+   * the shifter operand.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (before)
+  /// CHECK-DAG:   <<l:j\d+>>           ParameterValue
+  /// CHECK-DAG:   <<b:b\d+>>           ParameterValue
+  /// CHECK:       <<tmp:j\d+>>         TypeConversion [<<b>>]
+  /// CHECK:                            Sub [<<l>>,<<tmp>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:   <<l:j\d+>>           ParameterValue
+  /// CHECK-DAG:   <<b:b\d+>>           ParameterValue
+  /// CHECK:                            Arm64DataProcWithShifterOp [<<l>>,<<b>>] kind:Sub+SXTB
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        TypeConversion
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) disassembly (after)
+  /// CHECK:                            sub x{{\d+}}, x{{\d+}}, w{{\d+}}, sxtb
+
+  public static long $opt$noinline$translate(long l, byte b) {
+    if (doThrow) throw new Error();
+    long tmp = (long)b;
+    return l - tmp;
+  }
+
+
+  /**
+   * Test that we do not merge into the shifter operand when the left and right
+   * inputs are the the IR.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<a:i\d+>>           ParameterValue
+  /// CHECK:       <<Const2:i\d+>>      IntConstant 2
+  /// CHECK:       <<tmp:i\d+>>         Shl [<<a>>,<<Const2>>]
+  /// CHECK:                            Add [<<tmp>>,<<tmp>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:   <<a:i\d+>>           ParameterValue
+  /// CHECK-DAG:   <<Const2:i\d+>>      IntConstant 2
+  /// CHECK:       <<Shl:i\d+>>         Shl [<<a>>,<<Const2>>]
+  /// CHECK:                            Add [<<Shl>>,<<Shl>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  public static int $opt$noinline$sameInput(int a) {
+    if (doThrow) throw new Error();
+    int tmp = a << 2;
+    return tmp + tmp;
+  }
+
+  /**
+   * Check that we perform the merge for multiple uses.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<arg:i\d+>>         ParameterValue
+  /// CHECK:       <<Const23:i\d+>>     IntConstant 23
+  /// CHECK:       <<tmp:i\d+>>         Shl [<<arg>>,<<Const23>>]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<arg:i\d+>>         ParameterValue
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Shl
+  /// CHECK-NOT:                        Add
+
+  public static int $opt$noinline$multipleUses(int arg) {
+    if (doThrow) throw new Error();
+    int tmp = arg << 23;
+    switch (arg) {
+      case 1:  return (arg | 1) + tmp;
+      case 2:  return (arg | 2) + tmp;
+      case 3:  return (arg | 3) + tmp;
+      case 4:  return (arg | 4) + tmp;
+      case (1 << 20):  return (arg | 5) + tmp;
+      default: return 0;
+    }
+  }
+
+  /**
+   * Logical instructions cannot take 'extend' operations into the shift
+   * operand, so test that only the shifts are merged.
+   */
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testAnd(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testAnd(long, long) disassembly (after)
+  /// CHECK:                            and lsl
+  /// CHECK:                            sxtb
+  /// CHECK:                            and
+
+  static void $opt$noinline$testAnd(long a, long b) {
+    if (doThrow) throw new Error();
+    assertLongEquals((a & $noinline$LongShl(b, 5)) | (a & $noinline$longToByte(b)),
+                     (a & (b << 5)) | (a & (byte)b));
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testOr(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testOr(int, int) disassembly (after)
+  /// CHECK:                            orr asr
+  /// CHECK:                            uxth
+  /// CHECK:                            orr
+
+  static void $opt$noinline$testOr(int a, int b) {
+    if (doThrow) throw new Error();
+    assertIntEquals((a | $noinline$IntShr(b, 6)) | (a | $noinline$intToChar(b)),
+                    (a | (b >> 6)) | (a | (char)b));
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testXor(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testXor(long, long) disassembly (after)
+  /// CHECK:                            eor lsr
+  /// CHECK:                            sxtw
+  /// CHECK:                            eor
+
+  static void $opt$noinline$testXor(long a, long b) {
+    if (doThrow) throw new Error();
+    assertLongEquals((a ^ $noinline$LongUshr(b, 7)) | (a ^ $noinline$longToInt(b)),
+                     (a ^ (b >>> 7)) | (a ^ (int)b));
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testNeg(int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testNeg(int) disassembly (after)
+  /// CHECK:                            neg lsl
+  /// CHECK:                            sxth
+  /// CHECK:                            neg
+
+  static void $opt$noinline$testNeg(int a) {
+    if (doThrow) throw new Error();
+    assertIntEquals(-$noinline$IntShl(a, 8) | -$noinline$intToShort(a),
+                    (-(a << 8)) | (-(short)a));
+  }
+
+  /**
+   * The functions below are used to compare the result of optimized operations
+   * to non-optimized operations.
+   * On the left-hand side we use a non-inlined function call to ensure the
+   * optimization does not occur. The checker tests ensure that the optimization
+   * does occur on the right-hand.
+   */
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendByteInt1(int a, byte b) {
+    assertIntEquals(a + $noinline$byteToChar (b), a +  (char)b);
+    // Conversions byte->short and short->int are implicit; nothing to merge.
+    assertIntEquals(a + $noinline$byteToShort(b), a + (short)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt2(int, byte) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  public static void $opt$validateExtendByteInt2(int a, byte b) {
+    // The conversion to `int` has been optimized away, so there is nothing to merge.
+    assertIntEquals (a + $noinline$byteToInt (b), a +  (int)b);
+    // There is an environment use for `(long)b`, preventing the merge.
+    assertLongEquals(a + $noinline$byteToLong(b), a + (long)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendByteLong(long a, byte b) {
+    // In each of the following tests, there will be a merge on the LHS.
+
+    // The first test has an explicit byte->char conversion on RHS,
+    // followed by a conversion that is merged with the Add.
+    assertLongEquals(a + $noinline$byteToChar (b), a +  (char)b);
+    // Since conversions byte->short and byte->int are implicit, the RHS
+    // for the two tests below is the same and one is eliminated by GVN.
+    // The other is then merged to a shifter operand instruction.
+    assertLongEquals(a + $noinline$byteToShort(b), a + (short)b);
+    assertLongEquals(a + $noinline$byteToInt  (b), a +  (int)b);
+  }
+
+  public static void $opt$validateExtendByte(long a, byte b) {
+    $opt$validateExtendByteInt1((int)a, b);
+    $opt$validateExtendByteInt2((int)a, b);
+    $opt$validateExtendByteLong(a, b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt1(int, char) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt1(int, char) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendCharInt1(int a, char b) {
+    assertIntEquals(a + $noinline$charToByte (b), a +  (byte)b);
+    assertIntEquals(a + $noinline$charToShort(b), a + (short)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt2(int, char) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  public static void $opt$validateExtendCharInt2(int a, char b) {
+    // The conversion to `int` has been optimized away, so there is nothing to merge.
+    assertIntEquals (a + $noinline$charToInt (b), a +  (int)b);
+    // There is an environment use for `(long)b`, preventing the merge.
+    assertLongEquals(a + $noinline$charToLong(b), a + (long)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharLong(long, char) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharLong(long, char) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendCharLong(long a, char b) {
+    // The first two tests have a type conversion.
+    assertLongEquals(a + $noinline$charToByte (b), a +  (byte)b);
+    assertLongEquals(a + $noinline$charToShort(b), a + (short)b);
+    // This test does not because the conversion to `int` is optimized away.
+    assertLongEquals(a + $noinline$charToInt  (b), a +   (int)b);
+  }
+
+  public static void $opt$validateExtendChar(long a, char b) {
+    $opt$validateExtendCharInt1((int)a, b);
+    $opt$validateExtendCharInt2((int)a, b);
+    $opt$validateExtendCharLong(a, b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt1(int, short) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt1(int, short) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendShortInt1(int a, short b) {
+    assertIntEquals(a + $noinline$shortToByte (b), a + (byte)b);
+    assertIntEquals(a + $noinline$shortToChar (b), a + (char)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt2(int, short) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  public static void $opt$validateExtendShortInt2(int a, short b) {
+    // The conversion to `int` has been optimized away, so there is nothing to merge.
+    assertIntEquals (a + $noinline$shortToInt  (b), a +  (int)b);
+    // There is an environment use for `(long)b`, preventing the merge.
+    assertLongEquals(a + $noinline$shortToLong (b), a + (long)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortLong(long, short) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortLong(long, short) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendShortLong(long a, short b) {
+    // The first two tests have a type conversion.
+    assertLongEquals(a + $noinline$shortToByte(b), a + (byte)b);
+    assertLongEquals(a + $noinline$shortToChar(b), a + (char)b);
+    // This test does not because the conversion to `int` is optimized away.
+    assertLongEquals(a + $noinline$shortToInt (b), a +  (int)b);
+  }
+
+  public static void $opt$validateExtendShort(long a, short b) {
+    $opt$validateExtendShortInt1((int)a, b);
+    $opt$validateExtendShortInt2((int)a, b);
+    $opt$validateExtendShortLong(a, b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendInt(long, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendInt(long, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendInt(long a, int b) {
+    // All tests have a conversion to `long`. The first three tests also have a
+    // conversion from `int` to the specified type. For each test the conversion
+    // to `long` is merged into the shifter operand.
+    assertLongEquals(a + $noinline$intToByte (b), a +  (byte)b);
+    assertLongEquals(a + $noinline$intToChar (b), a +  (char)b);
+    assertLongEquals(a + $noinline$intToShort(b), a + (short)b);
+    assertLongEquals(a + $noinline$intToLong (b), a +  (long)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendLong(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendLong(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendLong(long a, long b) {
+    // Each test has two conversions, from `long` and then back to `long`. The
+    // conversions to `long` are merged.
+    assertLongEquals(a + $noinline$longToByte (b), a +  (byte)b);
+    assertLongEquals(a + $noinline$longToChar (b), a +  (char)b);
+    assertLongEquals(a + $noinline$longToShort(b), a + (short)b);
+    assertLongEquals(a + $noinline$longToInt  (b), a +   (int)b);
+  }
+
+
+  static int $noinline$IntShl(int b, int c) {
+    if (doThrow) throw new Error();
+    return b << c;
+  }
+  static int $noinline$IntShr(int b, int c) {
+    if (doThrow) throw new Error();
+    return b >> c;
+  }
+  static int $noinline$IntUshr(int b, int c) {
+    if (doThrow) throw new Error();
+    return b >>> c;
+  }
+
+
+  // Each test line below should see one merge.
+  /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  // Note: `b << 32`, `b >> 32` and `b >>> 32` are optimized away by generic simplifier.
+
+  /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Shl
+  /// CHECK-NOT:                        Shr
+  /// CHECK-NOT:                        UShr
+
+  public static void $opt$validateShiftInt(int a, int b) {
+    assertIntEquals(a + $noinline$IntShl(b, 1),   a + (b <<  1));
+    assertIntEquals(a + $noinline$IntShl(b, 6),   a + (b <<  6));
+    assertIntEquals(a + $noinline$IntShl(b, 7),   a + (b <<  7));
+    assertIntEquals(a + $noinline$IntShl(b, 8),   a + (b <<  8));
+    assertIntEquals(a + $noinline$IntShl(b, 14),  a + (b << 14));
+    assertIntEquals(a + $noinline$IntShl(b, 15),  a + (b << 15));
+    assertIntEquals(a + $noinline$IntShl(b, 16),  a + (b << 16));
+    assertIntEquals(a + $noinline$IntShl(b, 30),  a + (b << 30));
+    assertIntEquals(a + $noinline$IntShl(b, 31),  a + (b << 31));
+    assertIntEquals(a + $noinline$IntShl(b, 32),  a + (b << $opt$inline$IntConstant32()));
+    assertIntEquals(a + $noinline$IntShl(b, 62),  a + (b << $opt$inline$IntConstant62()));
+    assertIntEquals(a + $noinline$IntShl(b, 63),  a + (b << $opt$inline$IntConstant63()));
+
+    assertIntEquals(a - $noinline$IntShr(b, 1),   a - (b >>  1));
+    assertIntEquals(a - $noinline$IntShr(b, 6),   a - (b >>  6));
+    assertIntEquals(a - $noinline$IntShr(b, 7),   a - (b >>  7));
+    assertIntEquals(a - $noinline$IntShr(b, 8),   a - (b >>  8));
+    assertIntEquals(a - $noinline$IntShr(b, 14),  a - (b >> 14));
+    assertIntEquals(a - $noinline$IntShr(b, 15),  a - (b >> 15));
+    assertIntEquals(a - $noinline$IntShr(b, 16),  a - (b >> 16));
+    assertIntEquals(a - $noinline$IntShr(b, 30),  a - (b >> 30));
+    assertIntEquals(a - $noinline$IntShr(b, 31),  a - (b >> 31));
+    assertIntEquals(a - $noinline$IntShr(b, 32),  a - (b >> $opt$inline$IntConstant32()));
+    assertIntEquals(a - $noinline$IntShr(b, 62),  a - (b >> $opt$inline$IntConstant62()));
+    assertIntEquals(a - $noinline$IntShr(b, 63),  a - (b >> $opt$inline$IntConstant63()));
+
+    assertIntEquals(a ^ $noinline$IntUshr(b, 1),   a ^ (b >>>  1));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 6),   a ^ (b >>>  6));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 7),   a ^ (b >>>  7));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 8),   a ^ (b >>>  8));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 14),  a ^ (b >>> 14));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 15),  a ^ (b >>> 15));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 16),  a ^ (b >>> 16));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 30),  a ^ (b >>> 30));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 31),  a ^ (b >>> 31));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 32),  a ^ (b >>> $opt$inline$IntConstant32()));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 62),  a ^ (b >>> $opt$inline$IntConstant62()));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 63),  a ^ (b >>> $opt$inline$IntConstant63()));
+  }
+
+  // Hiding constants outside the range [0, 32) used for int shifts from Jack.
+  // (Jack extracts only the low 5 bits.)
+  public static int $opt$inline$IntConstant32() { return 32; }
+  public static int $opt$inline$IntConstant62() { return 62; }
+  public static int $opt$inline$IntConstant63() { return 63; }
+
+
+  static long $noinline$LongShl(long b, long c) {
+    if (doThrow) throw new Error();
+    return b << c;
+  }
+  static long $noinline$LongShr(long b, long c) {
+    if (doThrow) throw new Error();
+    return b >> c;
+  }
+  static long $noinline$LongUshr(long b, long c) {
+    if (doThrow) throw new Error();
+    return b >>> c;
+  }
+
+  // Each test line below should see one merge.
+  /// CHECK-START-ARM64: void Main.$opt$validateShiftLong(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateShiftLong(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Shl
+  /// CHECK-NOT:                        Shr
+  /// CHECK-NOT:                        UShr
+
+  public static void $opt$validateShiftLong(long a, long b) {
+    assertLongEquals(a + $noinline$LongShl(b, 1),   a + (b <<  1));
+    assertLongEquals(a + $noinline$LongShl(b, 6),   a + (b <<  6));
+    assertLongEquals(a + $noinline$LongShl(b, 7),   a + (b <<  7));
+    assertLongEquals(a + $noinline$LongShl(b, 8),   a + (b <<  8));
+    assertLongEquals(a + $noinline$LongShl(b, 14),  a + (b << 14));
+    assertLongEquals(a + $noinline$LongShl(b, 15),  a + (b << 15));
+    assertLongEquals(a + $noinline$LongShl(b, 16),  a + (b << 16));
+    assertLongEquals(a + $noinline$LongShl(b, 30),  a + (b << 30));
+    assertLongEquals(a + $noinline$LongShl(b, 31),  a + (b << 31));
+    assertLongEquals(a + $noinline$LongShl(b, 32),  a + (b << 32));
+    assertLongEquals(a + $noinline$LongShl(b, 62),  a + (b << 62));
+    assertLongEquals(a + $noinline$LongShl(b, 63),  a + (b << 63));
+
+    assertLongEquals(a - $noinline$LongShr(b, 1),   a - (b >>  1));
+    assertLongEquals(a - $noinline$LongShr(b, 6),   a - (b >>  6));
+    assertLongEquals(a - $noinline$LongShr(b, 7),   a - (b >>  7));
+    assertLongEquals(a - $noinline$LongShr(b, 8),   a - (b >>  8));
+    assertLongEquals(a - $noinline$LongShr(b, 14),  a - (b >> 14));
+    assertLongEquals(a - $noinline$LongShr(b, 15),  a - (b >> 15));
+    assertLongEquals(a - $noinline$LongShr(b, 16),  a - (b >> 16));
+    assertLongEquals(a - $noinline$LongShr(b, 30),  a - (b >> 30));
+    assertLongEquals(a - $noinline$LongShr(b, 31),  a - (b >> 31));
+    assertLongEquals(a - $noinline$LongShr(b, 32),  a - (b >> 32));
+    assertLongEquals(a - $noinline$LongShr(b, 62),  a - (b >> 62));
+    assertLongEquals(a - $noinline$LongShr(b, 63),  a - (b >> 63));
+
+    assertLongEquals(a ^ $noinline$LongUshr(b, 1),   a ^ (b >>>  1));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 6),   a ^ (b >>>  6));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 7),   a ^ (b >>>  7));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 8),   a ^ (b >>>  8));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 14),  a ^ (b >>> 14));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 15),  a ^ (b >>> 15));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 16),  a ^ (b >>> 16));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 30),  a ^ (b >>> 30));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 31),  a ^ (b >>> 31));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 32),  a ^ (b >>> 32));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 62),  a ^ (b >>> 62));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 63),  a ^ (b >>> 63));
+  }
+
+
+  public static void main(String[] args) {
+    assertLongEquals(10000L - 3L, $opt$noinline$translate(10000L, (byte)3));
+    assertLongEquals(-10000L - -3L, $opt$noinline$translate(-10000L, (byte)-3));
+
+    assertIntEquals(4096, $opt$noinline$sameInput(512));
+    assertIntEquals(-8192, $opt$noinline$sameInput(-1024));
+
+    assertIntEquals(((1 << 23) | 1), $opt$noinline$multipleUses(1));
+    assertIntEquals(((1 << 20) | 5), $opt$noinline$multipleUses(1 << 20));
+
+    long inputs[] = {
+      -((1L <<  7) - 1L), -((1L <<  7)), -((1L <<  7) + 1L),
+      -((1L << 15) - 1L), -((1L << 15)), -((1L << 15) + 1L),
+      -((1L << 16) - 1L), -((1L << 16)), -((1L << 16) + 1L),
+      -((1L << 31) - 1L), -((1L << 31)), -((1L << 31) + 1L),
+      -((1L << 32) - 1L), -((1L << 32)), -((1L << 32) + 1L),
+      -((1L << 63) - 1L), -((1L << 63)), -((1L << 63) + 1L),
+      -42L, -314L, -2718281828L, -0x123456789L, -0x987654321L,
+      -1L, -20L, -300L, -4000L, -50000L, -600000L, -7000000L, -80000000L,
+      0L,
+      1L, 20L, 300L, 4000L, 50000L, 600000L, 7000000L, 80000000L,
+      42L,  314L,  2718281828L,  0x123456789L,  0x987654321L,
+      (1L <<  7) - 1L, (1L <<  7), (1L <<  7) + 1L,
+      (1L <<  8) - 1L, (1L <<  8), (1L <<  8) + 1L,
+      (1L << 15) - 1L, (1L << 15), (1L << 15) + 1L,
+      (1L << 16) - 1L, (1L << 16), (1L << 16) + 1L,
+      (1L << 31) - 1L, (1L << 31), (1L << 31) + 1L,
+      (1L << 32) - 1L, (1L << 32), (1L << 32) + 1L,
+      (1L << 63) - 1L, (1L << 63), (1L << 63) + 1L,
+      Long.MIN_VALUE, Long.MAX_VALUE
+    };
+    for (int i = 0; i < inputs.length; i++) {
+      $opt$noinline$testNeg((int)inputs[i]);
+      for (int j = 0; j < inputs.length; j++) {
+        $opt$noinline$testAnd(inputs[i], inputs[j]);
+        $opt$noinline$testOr((int)inputs[i], (int)inputs[j]);
+        $opt$noinline$testXor(inputs[i], inputs[j]);
+
+        $opt$validateExtendByte(inputs[i], (byte)inputs[j]);
+        $opt$validateExtendChar(inputs[i], (char)inputs[j]);
+        $opt$validateExtendShort(inputs[i], (short)inputs[j]);
+        $opt$validateExtendInt(inputs[i], (int)inputs[j]);
+        $opt$validateExtendLong(inputs[i], inputs[j]);
+
+        $opt$validateShiftInt((int)inputs[i], (int)inputs[j]);
+        $opt$validateShiftLong(inputs[i], inputs[j]);
+      }
+    }
+
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/551-implicit-null-checks/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/551-implicit-null-checks/expected.txt
diff --git a/test/551-implicit-null-checks/info.txt b/test/551-implicit-null-checks/info.txt
new file mode 100644
index 0000000..bdd066b
--- /dev/null
+++ b/test/551-implicit-null-checks/info.txt
@@ -0,0 +1 @@
+Test that implicit null checks are recorded correctly for longs.
\ No newline at end of file
diff --git a/test/551-implicit-null-checks/src/Main.java b/test/551-implicit-null-checks/src/Main.java
new file mode 100644
index 0000000..677e8d3
--- /dev/null
+++ b/test/551-implicit-null-checks/src/Main.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  private class Inner {
+    private long i1;
+  }
+  private Inner inst;
+
+  public static void main(String args[]) throws Exception {
+    Main m = new Main();
+    try {
+      m.$opt$noinline$testGetLong();
+    } catch (NullPointerException ex) {
+      // good
+    }
+    try {
+      m.$opt$noinline$testPutLong(778899112233L);
+    } catch (NullPointerException ex) {
+      // good
+    }
+  }
+
+  public void $opt$noinline$testGetLong() throws Exception {
+    long result = inst.i1;
+    throw new Exception();  // prevent inline
+  }
+
+  public void $opt$noinline$testPutLong(long a) throws Exception {
+    inst.i1 = a;
+    throw new Exception();  // prevent inline
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/551-invoke-super/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/551-invoke-super/expected.txt
diff --git a/test/551-invoke-super/info.txt b/test/551-invoke-super/info.txt
new file mode 100644
index 0000000..864ddfe
--- /dev/null
+++ b/test/551-invoke-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode when resolving to an abstract method.
diff --git a/test/551-invoke-super/smali/invokesuper.smali b/test/551-invoke-super/smali/invokesuper.smali
new file mode 100644
index 0000000..ad3c218
--- /dev/null
+++ b/test/551-invoke-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on a non-super class to force complex resolution.
+    invoke-super {v1}, LInvokeSuper;->returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/551-invoke-super/smali/superclass.smali b/test/551-invoke-super/smali/superclass.smali
new file mode 100644
index 0000000..47fbee7
--- /dev/null
+++ b/test/551-invoke-super/smali/superclass.smali
@@ -0,0 +1,26 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class abstract public LSuperClass;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method abstract public returnInt()I
+.end method
diff --git a/test/551-invoke-super/src/Main.java b/test/551-invoke-super/src/Main.java
new file mode 100644
index 0000000..3a30184
--- /dev/null
+++ b/test/551-invoke-super/src/Main.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    try {
+      Method m = c.getMethod("run");
+      m.invoke(c.newInstance(), new Object[0]);
+      throw new Error("Expected AbstractMethodError");
+    } catch (InvocationTargetException e) {
+      if (!(e.getCause() instanceof AbstractMethodError)) {
+        throw new Error("Expected AbstractMethodError");
+      }
+    }
+  }
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/552-checker-primitive-typeprop/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/552-checker-primitive-typeprop/expected.txt
diff --git a/test/552-checker-primitive-typeprop/info.txt b/test/552-checker-primitive-typeprop/info.txt
new file mode 100644
index 0000000..9d69056
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/info.txt
@@ -0,0 +1,2 @@
+Test that phis with environment uses which can be properly typed are kept
+in --debuggable mode.
\ No newline at end of file
diff --git a/test/552-checker-primitive-typeprop/smali/ArrayGet.smali b/test/552-checker-primitive-typeprop/smali/ArrayGet.smali
new file mode 100644
index 0000000..de32290
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/smali/ArrayGet.smali
@@ -0,0 +1,245 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LArrayGet;
+.super Ljava/lang/Object;
+
+
+# Test phi with fixed-type ArrayGet as an input and a matching second input.
+# The phi should be typed accordingly.
+
+## CHECK-START: void ArrayGet.matchingFixedType(float[], float) builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.matchingFixedType(float[], float) builder (after)
+## CHECK-DAG:  <<Arg1:f\d+>> ParameterValue
+## CHECK-DAG:  <<Aget:f\d+>> ArrayGet
+## CHECK-DAG:  {{f\d+}}      Phi [<<Aget>>,<<Arg1>>] reg:0
+.method public static matchingFixedType([FF)V
+  .registers 8
+
+  const v0, 0x0
+  const v1, 0x1
+
+  aget v0, p0, v0       # read value
+  add-float v2, v0, v1  # float use fixes type
+
+  float-to-int v2, p1
+  if-eqz v2, :after
+  move v0, p1
+  :after
+  # v0 = Phi [ArrayGet, Arg1] => float
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+
+# Test phi with fixed-type ArrayGet as an input and a conflicting second input.
+# The phi should be eliminated due to the conflict.
+
+## CHECK-START: void ArrayGet.conflictingFixedType(float[], int) builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFixedType(float[], int) builder (after)
+## CHECK-NOT: Phi
+.method public static conflictingFixedType([FI)V
+  .registers 8
+
+  const v0, 0x0
+  const v1, 0x1
+
+  aget v0, p0, v0       # read value
+  add-float v2, v0, v1  # float use fixes type
+
+  if-eqz p1, :after
+  move v0, p1
+  :after
+  # v0 = Phi [ArrayGet, Arg1] => conflict
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+
+# Same test as the one above, only this time tests that type of ArrayGet is not
+# changed.
+
+## CHECK-START: void ArrayGet.conflictingFixedType2(int[], float) builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFixedType2(int[], float) builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFixedType2(int[], float) builder (after)
+## CHECK:     {{i\d+}} ArrayGet
+.method public static conflictingFixedType2([IF)V
+  .registers 8
+
+  const v0, 0x0
+  const v1, 0x1
+
+  aget v0, p0, v0       # read value
+  add-int v2, v0, v1    # int use fixes type
+
+  float-to-int v2, p1
+  if-eqz v2, :after
+  move v0, p1
+  :after
+  # v0 = Phi [ArrayGet, Arg1] => conflict
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+
+# Test phi with free-type ArrayGet as an input and a matching second input.
+# The phi should be typed accordingly.
+
+## CHECK-START: void ArrayGet.matchingFreeType(float[], float) builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.matchingFreeType(float[], float) builder (after)
+## CHECK-DAG:  <<Arg1:f\d+>> ParameterValue
+## CHECK-DAG:  <<Aget:f\d+>> ArrayGet
+## CHECK-DAG:                ArraySet [{{l\d+}},{{i\d+}},<<Aget>>]
+## CHECK-DAG:  {{f\d+}}      Phi [<<Aget>>,<<Arg1>>] reg:0
+.method public static matchingFreeType([FF)V
+  .registers 8
+
+  const v0, 0x0
+  const v1, 0x1
+
+  aget v0, p0, v0       # read value, should be float but has no typed use
+  aput v0, p0, v1       # aput does not disambiguate the type
+
+  float-to-int v2, p1
+  if-eqz v2, :after
+  move v0, p1
+  :after
+  # v0 = Phi [ArrayGet, Arg1] => float
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+
+# Test phi with free-type ArrayGet as an input and a conflicting second input.
+# The phi will be kept and typed according to the second input despite the
+# conflict.
+
+## CHECK-START: void ArrayGet.conflictingFreeType(int[], float) builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFreeType(int[], float) builder (after)
+## CHECK-NOT: Phi
+
+.method public static conflictingFreeType([IF)V
+  .registers 8
+
+  const v0, 0x0
+  const v1, 0x1
+
+  aget v0, p0, v0       # read value, should be int but has no typed use
+  aput v0, p0, v1
+
+  float-to-int v2, p1
+  if-eqz v2, :after
+  move v0, p1
+  :after
+  # v0 = Phi [ArrayGet, Arg1] => float
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+
+# Test that real use of ArrayGet is propagated through phis. The following test
+# case uses ArrayGet indirectly through two phis. It also creates an unused
+# conflicting phi which should not be preserved.
+
+## CHECK-START: void ArrayGet.conflictingPhiUses(int[], float, boolean, boolean, boolean) builder (after)
+## CHECK:         InvokeStaticOrDirect env:[[{{i\d+}},{{i\d+}},_,{{i\d+}},{{.*}}
+
+.method public static conflictingPhiUses([IFZZZ)V
+  .registers 10
+
+  const v0, 0x0
+
+  # Create v1 = Phi [0x0, int ArrayGet]
+  move v1, v0
+  if-eqz p2, :else1
+  aget v1, p0, v0
+  :else1
+
+  # Create v2 = Phi [v1, float]
+  move v2, v1
+  if-eqz p3, :else2
+  move v2, p1
+  :else2
+
+  # Create v3 = Phi [v1, int]
+  move v3, v1
+  if-eqz p4, :else3
+  move v3, v0
+  :else3
+
+  # Use v3 as int.
+  add-int/lit8 v4, v3, 0x2a
+
+  # Create env uses.
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+
+  return-void
+.end method
+
+# Test that the right ArrayGet equivalent is always selected. The following test
+# case uses ArrayGet as float through one phi and as an indeterminate type through
+# another. The situation needs to be resolved so that only one instruction
+# remains.
+
+## CHECK-START: void ArrayGet.typedVsUntypedPhiUse(float[], float, boolean, boolean) builder (after)
+## CHECK:         {{f\d+}} ArrayGet
+
+## CHECK-START: void ArrayGet.typedVsUntypedPhiUse(float[], float, boolean, boolean) builder (after)
+## CHECK-NOT:     {{i\d+}} ArrayGet
+
+.method public static typedVsUntypedPhiUse([FFZZ)V
+  .registers 10
+
+  const v0, 0x0
+
+  # v1 = float ArrayGet
+  aget v1, p0, v0
+
+  # Create v2 = Phi [v1, 0.0f]
+  move v2, v1
+  if-eqz p2, :else1
+  move v2, v0
+  :else1
+
+  # Use v2 as float
+  cmpl-float v2, v2, p1
+
+  # Create v3 = Phi [v1, 0.0f]
+  move v3, v1
+  if-eqz p3, :else2
+  move v3, v0
+  :else2
+
+  # Use v3 without a determinate type.
+  aput v3, p0, v0
+
+  return-void
+.end method
diff --git a/test/552-checker-primitive-typeprop/smali/ArraySet.smali b/test/552-checker-primitive-typeprop/smali/ArraySet.smali
new file mode 100644
index 0000000..087460a
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/smali/ArraySet.smali
@@ -0,0 +1,51 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LArraySet;
+.super Ljava/lang/Object;
+
+# Test ArraySet on int[] and float[] arrays. The input should be typed accordingly.
+# Note that the input is a Phi to make sure primitive type propagation is re-run
+# on the replaced inputs.
+
+## CHECK-START: void ArraySet.ambiguousSet(int[], float[], boolean) builder (after)
+## CHECK-DAG:     <<IntArray:l\d+>>    ParameterValue klass:int[]
+## CHECK-DAG:     <<IntA:i\d+>>        IntConstant 0
+## CHECK-DAG:     <<IntB:i\d+>>        IntConstant 1073741824
+## CHECK-DAG:     <<IntPhi:i\d+>>      Phi [<<IntA>>,<<IntB>>] reg:0
+## CHECK-DAG:     <<IntNC:l\d+>>       NullCheck [<<IntArray>>]
+## CHECK-DAG:                          ArraySet [<<IntNC>>,{{i\d+}},<<IntPhi>>]
+
+## CHECK-DAG:     <<FloatArray:l\d+>>  ParameterValue klass:float[]
+## CHECK-DAG:     <<FloatA:f\d+>>      FloatConstant 0
+## CHECK-DAG:     <<FloatB:f\d+>>      FloatConstant 2
+## CHECK-DAG:     <<FloatPhi:f\d+>>    Phi [<<FloatA>>,<<FloatB>>] reg:0
+## CHECK-DAG:     <<FloatNC:l\d+>>     NullCheck [<<FloatArray>>]
+## CHECK-DAG:                          ArraySet [<<FloatNC>>,{{i\d+}},<<FloatPhi>>]
+
+.method public static ambiguousSet([I[FZ)V
+  .registers 8
+
+  const v0, 0x0
+  if-eqz p2, :else
+  const v0, 0x40000000
+  :else
+  # v0 = Phi [0.0f, 2.0f]
+
+  const v1, 0x1
+  aput v0, p0, v1
+  aput v0, p1, v1
+
+  return-void
+.end method
diff --git a/test/552-checker-primitive-typeprop/smali/SsaBuilder.smali b/test/552-checker-primitive-typeprop/smali/SsaBuilder.smali
new file mode 100644
index 0000000..0d067ed
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/smali/SsaBuilder.smali
@@ -0,0 +1,52 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LSsaBuilder;
+.super Ljava/lang/Object;
+
+# Check that a dead phi with a live equivalent is replaced in an environment. The
+# following test case throws an exception and uses v0 afterwards. However, v0
+# contains a phi that is interpreted as int for the environment, and as float for
+# instruction use. SsaBuilder must substitute the int variant before removing it,
+# otherwise running the code with an array short enough to throw will crash at
+# runtime because v0 is undefined.
+
+## CHECK-START: int SsaBuilder.environmentPhi(boolean, int[]) builder (after)
+## CHECK-DAG:     <<Cst0:f\d+>>  FloatConstant 0
+## CHECK-DAG:     <<Cst2:f\d+>>  FloatConstant 2
+## CHECK-DAG:     <<Phi:f\d+>>   Phi [<<Cst0>>,<<Cst2>>]
+## CHECK-DAG:                    BoundsCheck env:[[<<Phi>>,{{i\d+}},{{z\d+}},{{l\d+}}]]
+
+.method public static environmentPhi(Z[I)I
+  .registers 4
+
+  const v0, 0x0
+  if-eqz p0, :else
+  const v0, 0x40000000
+  :else
+  # v0 = phi that can be both int and float
+
+  :try_start
+  const v1, 0x3
+  aput v1, p1, v1
+  const v0, 0x1     # generate catch phi for v0
+  const v1, 0x4
+  aput v1, p1, v1
+  :try_end
+  .catchall {:try_start .. :try_end} :use_as_float
+
+  :use_as_float
+  float-to-int v0, v0
+  return v0
+.end method
\ No newline at end of file
diff --git a/test/552-checker-primitive-typeprop/smali/TypePropagation.smali b/test/552-checker-primitive-typeprop/smali/TypePropagation.smali
new file mode 100644
index 0000000..d34e43e
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/smali/TypePropagation.smali
@@ -0,0 +1,136 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTypePropagation;
+.super Ljava/lang/Object;
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeDeadPhi(boolean, boolean, int, float, float) builder (after)
+## CHECK-NOT: Phi
+.method public static mergeDeadPhi(ZZIFF)V
+  .registers 8
+
+  if-eqz p0, :after1
+  move p2, p3
+  :after1
+  # p2 = merge(int,float) = conflict
+
+  if-eqz p1, :after2
+  move p2, p4
+  :after2
+  # p2 = merge(conflict,float) = conflict
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeSameType(boolean, int, int) builder (after)
+## CHECK:     {{i\d+}} Phi
+## CHECK-NOT:          Phi
+.method public static mergeSameType(ZII)V
+  .registers 8
+  if-eqz p0, :after
+  move p1, p2
+  :after
+  # p1 = merge(int,int) = int
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeVoidInput(boolean, boolean, int, int) builder (after)
+## CHECK:     {{i\d+}} Phi
+## CHECK:     {{i\d+}} Phi
+## CHECK-NOT:          Phi
+.method public static mergeVoidInput(ZZII)V
+  .registers 8
+  :loop
+  # p2 = void (loop phi) => p2 = merge(int,int) = int
+  if-eqz p0, :after
+  move p2, p3
+  :after
+  # p2 = merge(void,int) = int
+  if-eqz p1, :loop
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeDifferentSize(boolean, int, long) builder (after)
+## CHECK-NOT: Phi
+.method public static mergeDifferentSize(ZIJ)V
+  .registers 8
+  if-eqz p0, :after
+  move-wide p1, p2
+  :after
+  # p1 = merge(int,long) = conflict
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeRefFloat(boolean, float, java.lang.Object) builder (after)
+## CHECK-NOT: Phi
+.method public static mergeRefFloat(ZFLjava/lang/Object;)V
+  .registers 8
+  if-eqz p0, :after
+  move-object p1, p2
+  :after
+  # p1 = merge(float,reference) = conflict
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeIntFloat_Success(boolean, float) builder (after)
+## CHECK:     {{f\d+}} Phi
+## CHECK-NOT:          Phi
+.method public static mergeIntFloat_Success(ZF)V
+  .registers 8
+  if-eqz p0, :after
+  const/4 p1, 0x0
+  :after
+  # p1 = merge(float,0x0) = float
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeIntFloat_Fail(boolean, int, float) builder (after)
+## CHECK-NOT: Phi
+.method public static mergeIntFloat_Fail(ZIF)V
+  .registers 8
+  if-eqz p0, :after
+  move p1, p2
+  :after
+  # p1 = merge(int,float) = conflict
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.updateAllUsersOnConflict(boolean, boolean, int, float, int) builder (after)
+## CHECK-NOT: Phi
+.method public static updateAllUsersOnConflict(ZZIFI)V
+  .registers 8
+
+  :loop1
+  # loop phis for all args
+  # p2 = merge(int,float) = float? => conflict
+  move p2, p3
+  if-eqz p0, :loop1
+
+  :loop2
+  # loop phis for all args
+  # requests float equivalent of p4 phi in loop1 => conflict
+  # propagates conflict to loop2's phis
+  move p2, p4
+  if-eqz p1, :loop2
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
diff --git a/test/552-checker-primitive-typeprop/src/Main.java b/test/552-checker-primitive-typeprop/src/Main.java
new file mode 100644
index 0000000..1296800
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/src/Main.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  private static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Wrong result, expected=" + expected + ", actual=" + actual);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("SsaBuilder");
+    Method m = c.getMethod("environmentPhi", boolean.class, int[].class);
+
+    int[] array = new int[3];
+    int result;
+
+    result = (Integer) m.invoke(null, true, array);
+    assertEquals(2, result);
+
+    result = (Integer) m.invoke(null, false, array);
+    assertEquals(0, result);
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/552-checker-sharpening/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/552-checker-sharpening/expected.txt
diff --git a/test/552-checker-sharpening/info.txt b/test/552-checker-sharpening/info.txt
new file mode 100644
index 0000000..c84539c
--- /dev/null
+++ b/test/552-checker-sharpening/info.txt
@@ -0,0 +1 @@
+Tests for sharpening.
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
new file mode 100644
index 0000000..2232ff4
--- /dev/null
+++ b/test/552-checker-sharpening/src/Main.java
@@ -0,0 +1,410 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertStringEquals(String expected, String result) {
+    if (expected != null ? !expected.equals(result) : result != null) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertClassEquals(Class<?> expected, Class<?> result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static boolean doThrow = false;
+
+  private static int $noinline$foo(int x) {
+    if (doThrow) { throw new Error(); }
+    return x;
+  }
+
+  /// CHECK-START: int Main.testSimple(int) sharpening (before)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_via_method
+
+  /// CHECK-START-ARM: int Main.testSimple(int) sharpening (after)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM64: int Main.testSimple(int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-MIPS: int Main.testSimple(int) sharpening (after)
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86: int Main.testSimple(int) sharpening (after)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86_64: int Main.testSimple(int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testSimple(int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testSimple(int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                MipsDexCacheArraysBase
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-X86: int Main.testSimple(int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  public static int testSimple(int x) {
+    // This call should use PC-relative dex cache array load to retrieve the target method.
+    return $noinline$foo(x);
+  }
+
+  /// CHECK-START: int Main.testDiamond(boolean, int) sharpening (before)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_via_method
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM64: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86_64: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NEXT:           If
+
+  /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                MipsDexCacheArraysBase
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                MipsDexCacheArraysBase
+  /// CHECK-NEXT:           If
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           If
+
+  public static int testDiamond(boolean negate, int x) {
+    // These calls should use PC-relative dex cache array loads to retrieve the target method.
+    // PC-relative bases used by ARM, MIPS and X86 should be pulled before the If.
+    if (negate) {
+      return $noinline$foo(-x);
+    } else {
+      return $noinline$foo(x);
+    }
+  }
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (before)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (before)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           ArmDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (before)
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                MipsDexCacheArraysBase
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           MipsDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  public static int testLoop(int[] array, int x) {
+    // PC-relative bases used by ARM, MIPS and X86 should be pulled before the loop.
+    for (int i : array) {
+      x += $noinline$foo(i);
+    }
+    return x;
+  }
+
+  /// CHECK-START-X86: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_x86 (before)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           Goto
+
+  /// CHECK-START-ARM: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_arm (before)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           ArmDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+
+  /// CHECK-START-MIPS: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_mips (before)
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           MipsDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+
+  public static int testLoopWithDiamond(int[] array, boolean negate, int x) {
+    // PC-relative bases used by ARM, MIPS and X86 should be pulled before the loop
+    // but not outside the if.
+    if (array != null) {
+      for (int i : array) {
+        if (negate) {
+          x += $noinline$foo(-i);
+        } else {
+          x += $noinline$foo(i);
+        }
+      }
+    }
+    return x;
+  }
+
+  /// CHECK-START: java.lang.String Main.$noinline$getBootImageString() sharpening (before)
+  /// CHECK:                LoadString load_kind:DexCacheViaMethod
+
+  /// CHECK-START-X86: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+
+  /// CHECK-START-X86_64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+
+  /// CHECK-START-ARM: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+
+  /// CHECK-START-ARM64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+
+  /// CHECK-START-MIPS: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+
+  public static String $noinline$getBootImageString() {
+    // Prevent inlining to avoid the string comparison being optimized away.
+    if (doThrow) { throw new Error(); }
+    // Empty string is known to be in the boot image.
+    return "";
+  }
+
+  /// CHECK-START: java.lang.String Main.$noinline$getNonBootImageString() sharpening (before)
+  /// CHECK:                LoadString load_kind:DexCacheViaMethod
+
+  /// CHECK-START-X86: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
+  /// CHECK:                LoadString load_kind:DexCachePcRelative
+
+  /// CHECK-START-X86: java.lang.String Main.$noinline$getNonBootImageString() pc_relative_fixups_x86 (after)
+  /// CHECK-DAG:            X86ComputeBaseMethodAddress
+  /// CHECK-DAG:            LoadString load_kind:DexCachePcRelative
+
+  /// CHECK-START-X86_64: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
+  /// CHECK:                LoadString load_kind:DexCachePcRelative
+
+  /// CHECK-START-ARM: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
+  /// CHECK:                LoadString load_kind:DexCachePcRelative
+
+  /// CHECK-START-ARM: java.lang.String Main.$noinline$getNonBootImageString() dex_cache_array_fixups_arm (after)
+  /// CHECK-DAG:            ArmDexCacheArraysBase
+  /// CHECK-DAG:            LoadString load_kind:DexCachePcRelative
+
+  /// CHECK-START-ARM64: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
+  /// CHECK:                LoadString load_kind:DexCachePcRelative
+
+  /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
+  /// CHECK:                LoadString load_kind:DexCachePcRelative
+
+  /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() dex_cache_array_fixups_mips (after)
+  /// CHECK-DAG:            MipsDexCacheArraysBase
+  /// CHECK-DAG:            LoadString load_kind:DexCachePcRelative
+
+  public static String $noinline$getNonBootImageString() {
+    // Prevent inlining to avoid the string comparison being optimized away.
+    if (doThrow) { throw new Error(); }
+    // This string is not in the boot image.
+    return "non-boot-image-string";
+  }
+
+  /// CHECK-START: java.lang.Class Main.$noinline$getStringClass() sharpening (before)
+  /// CHECK:                LoadClass load_kind:DexCacheViaMethod class_name:java.lang.String
+
+  /// CHECK-START-X86: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+
+  /// CHECK-START-X86_64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+
+  /// CHECK-START-ARM: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+
+  /// CHECK-START-ARM64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+
+  /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+
+  public static Class<?> $noinline$getStringClass() {
+    // Prevent inlining to avoid the string comparison being optimized away.
+    if (doThrow) { throw new Error(); }
+    // String class is known to be in the boot image.
+    return String.class;
+  }
+
+  /// CHECK-START: java.lang.Class Main.$noinline$getOtherClass() sharpening (before)
+  /// CHECK:                LoadClass load_kind:DexCacheViaMethod class_name:Other
+
+  /// CHECK-START-X86: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-X86: java.lang.Class Main.$noinline$getOtherClass() pc_relative_fixups_x86 (after)
+  /// CHECK-DAG:            X86ComputeBaseMethodAddress
+  /// CHECK-DAG:            LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-X86_64: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-ARM: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-ARM: java.lang.Class Main.$noinline$getOtherClass() dex_cache_array_fixups_arm (after)
+  /// CHECK-DAG:            ArmDexCacheArraysBase
+  /// CHECK-DAG:            LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-ARM64: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getOtherClass() dex_cache_array_fixups_mips (after)
+  /// CHECK-DAG:            MipsDexCacheArraysBase
+  /// CHECK-DAG:            LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  public static Class<?> $noinline$getOtherClass() {
+    // Prevent inlining to avoid the string comparison being optimized away.
+    if (doThrow) { throw new Error(); }
+    // Other class is not in the boot image.
+    return Other.class;
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(1, testSimple(1));
+    assertIntEquals(1, testDiamond(false, 1));
+    assertIntEquals(-1, testDiamond(true, 1));
+    assertIntEquals(3, testLoop(new int[]{ 2 }, 1));
+    assertIntEquals(8, testLoop(new int[]{ 3, 4 }, 1));
+    assertIntEquals(1, testLoopWithDiamond(null, false, 1));
+    assertIntEquals(3, testLoopWithDiamond(new int[]{ 2 }, false, 1));
+    assertIntEquals(-6, testLoopWithDiamond(new int[]{ 3, 4 }, true, 1));
+    assertStringEquals("", $noinline$getBootImageString());
+    assertStringEquals("non-boot-image-string", $noinline$getNonBootImageString());
+    assertClassEquals(String.class, $noinline$getStringClass());
+    assertClassEquals(Other.class, $noinline$getOtherClass());
+  }
+}
+
+class Other {
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/552-invoke-non-existent-super/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/552-invoke-non-existent-super/expected.txt
diff --git a/test/552-invoke-non-existent-super/info.txt b/test/552-invoke-non-existent-super/info.txt
new file mode 100644
index 0000000..c5428d4
--- /dev/null
+++ b/test/552-invoke-non-existent-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode when the super class does not have the method.
diff --git a/test/552-invoke-non-existent-super/smali/invokesuper.smali b/test/552-invoke-non-existent-super/smali/invokesuper.smali
new file mode 100644
index 0000000..ad3c218
--- /dev/null
+++ b/test/552-invoke-non-existent-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on a non-super class to force complex resolution.
+    invoke-super {v1}, LInvokeSuper;->returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/552-invoke-non-existent-super/smali/superclass.smali b/test/552-invoke-non-existent-super/smali/superclass.smali
new file mode 100644
index 0000000..21d961e
--- /dev/null
+++ b/test/552-invoke-non-existent-super/smali/superclass.smali
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class abstract public LSuperClass;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
diff --git a/test/552-invoke-non-existent-super/src/Main.java b/test/552-invoke-non-existent-super/src/Main.java
new file mode 100644
index 0000000..c264471
--- /dev/null
+++ b/test/552-invoke-non-existent-super/src/Main.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    try {
+      Method m = c.getMethod("run");
+      m.invoke(c.newInstance(), new Object[0]);
+      throw new Error("Expected NoSuchMethodError");
+    } catch (InvocationTargetException e) {
+      if (!(e.getCause() instanceof NoSuchMethodError)) {
+        throw new Error("Expected NoSuchMethodError");
+      }
+    }
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/553-invoke-super/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/553-invoke-super/expected.txt
diff --git a/test/553-invoke-super/info.txt b/test/553-invoke-super/info.txt
new file mode 100644
index 0000000..ad99030
--- /dev/null
+++ b/test/553-invoke-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode.
diff --git a/test/553-invoke-super/smali/invokesuper.smali b/test/553-invoke-super/smali/invokesuper.smali
new file mode 100644
index 0000000..a6f9b4e
--- /dev/null
+++ b/test/553-invoke-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on this class, to confuse runtime/compiler.
+    invoke-super {v1}, LInvokeSuper;->$noinline$returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public $noinline$returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/553-invoke-super/src/Main.java b/test/553-invoke-super/src/Main.java
new file mode 100644
index 0000000..91d2394
--- /dev/null
+++ b/test/553-invoke-super/src/Main.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  static void assertEquals(int expected, int value) {
+    if (expected != value) {
+      throw new Error("Expected " + expected + ", got " + value);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    Method m = c.getMethod("run");
+    assertEquals(42, ((Integer)m.invoke(c.newInstance(), new Object[0])).intValue());
+  }
+}
diff --git a/test/553-invoke-super/src/SuperClass.java b/test/553-invoke-super/src/SuperClass.java
new file mode 100644
index 0000000..36ce093
--- /dev/null
+++ b/test/553-invoke-super/src/SuperClass.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class SuperClass {
+  boolean doThrow = false;
+
+  public int $noinline$returnInt() {
+    if (doThrow) {
+      throw new Error();
+    }
+    return 42;
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/554-checker-rtp-checkcast/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/554-checker-rtp-checkcast/expected.txt
diff --git a/test/554-checker-rtp-checkcast/info.txt b/test/554-checker-rtp-checkcast/info.txt
new file mode 100644
index 0000000..2a60971
--- /dev/null
+++ b/test/554-checker-rtp-checkcast/info.txt
@@ -0,0 +1 @@
+Tests that phis with check-casted reference type inputs are typed.
diff --git a/test/554-checker-rtp-checkcast/src/Main.java b/test/554-checker-rtp-checkcast/src/Main.java
new file mode 100644
index 0000000..5bf766f
--- /dev/null
+++ b/test/554-checker-rtp-checkcast/src/Main.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public class Main {
+
+  public static Object returnIntArray() { return new int[10]; }
+
+  /// CHECK-START: void Main.boundTypeForMergingPhi() builder (after)
+  /// CHECK-DAG:              ArraySet [<<NC:l\d+>>,{{i\d+}},{{i\d+}}]
+  /// CHECK-DAG:     <<NC>>   NullCheck [<<Phi:l\d+>>]
+  /// CHECK-DAG:     <<Phi>>  Phi klass:int[]
+
+  public static void boundTypeForMergingPhi() {
+    int[] array = new int[20];
+    if (array.hashCode() > 5) {
+      array = (int[]) returnIntArray();
+    }
+    array[0] = 14;
+  }
+
+  /// CHECK-START: void Main.boundTypeForLoopPhi() builder (after)
+  /// CHECK-DAG:              ArraySet [<<NC:l\d+>>,{{i\d+}},{{i\d+}}]
+  /// CHECK-DAG:     <<NC>>   NullCheck [<<Phi:l\d+>>]
+  /// CHECK-DAG:     <<Phi>>  Phi klass:int[]
+
+  public static void boundTypeForLoopPhi() {
+    int[] array = new int[20];
+    int i = 0;
+    while (i < 4) {
+      ++i;
+      array[i] = i;
+      if (i > 2) {
+        array = (int[]) returnIntArray();
+      }
+    }
+    array[0] = 14;
+  }
+
+  /// CHECK-START: void Main.boundTypeForCatchPhi() builder (after)
+  /// CHECK-DAG:              ArraySet [<<NC:l\d+>>,{{i\d+}},{{i\d+}}]
+  /// CHECK-DAG:     <<NC>>   NullCheck [<<Phi:l\d+>>]
+  /// CHECK-DAG:     <<Phi>>  Phi is_catch_phi:true klass:int[]
+
+  public static void boundTypeForCatchPhi() {
+    int[] array1 = new int[20];
+    int[] array2 = (int[]) returnIntArray();
+
+    int[] catch_phi = array1;
+    try {
+      System.nanoTime();
+      catch_phi = array2;
+      System.nanoTime();
+    } catch (Throwable ex) {
+      catch_phi[0] = 14;
+    }
+  }
+
+  public static void main(String[] args) {  }
+}
diff --git a/test/455-set-vreg/expected.txt b/test/555-UnsafeGetLong-regression/expected.txt
similarity index 100%
copy from test/455-set-vreg/expected.txt
copy to test/555-UnsafeGetLong-regression/expected.txt
diff --git a/test/555-UnsafeGetLong-regression/info.txt b/test/555-UnsafeGetLong-regression/info.txt
new file mode 100644
index 0000000..0e16ed7
--- /dev/null
+++ b/test/555-UnsafeGetLong-regression/info.txt
@@ -0,0 +1,2 @@
+Regression test for sun.misc.Unsafe.getLong's intrinsic's locations
+not handled properly.
diff --git a/test/555-UnsafeGetLong-regression/src/Main.java b/test/555-UnsafeGetLong-regression/src/Main.java
new file mode 100644
index 0000000..1adafae
--- /dev/null
+++ b/test/555-UnsafeGetLong-regression/src/Main.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+import sun.misc.Unsafe;
+
+public class Main {
+  private static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static Unsafe getUnsafe() throws Exception {
+    Class<?> unsafeClass = Class.forName("sun.misc.Unsafe");
+    Field f = unsafeClass.getDeclaredField("theUnsafe");
+    f.setAccessible(true);
+    return (Unsafe) f.get(null);
+  }
+
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+    Unsafe unsafe = getUnsafe();
+
+    testUnsafeGetLong(unsafe);
+  }
+
+  public static void testUnsafeGetLong(Unsafe unsafe) throws Exception {
+    TestClass test = new TestClass();
+    Field longField = TestClass.class.getDeclaredField("longVar");
+    long lvar = unsafe.objectFieldOffset(longField);
+    lvar = unsafe.getLong(test, lvar);
+    assertLongEquals(1122334455667788L, lvar);
+  }
+
+  private static class TestClass {
+    public long longVar = 1122334455667788L;
+  }
+}
diff --git a/test/555-checker-regression-x86const/build b/test/555-checker-regression-x86const/build
new file mode 100644
index 0000000..92ddfc9
--- /dev/null
+++ b/test/555-checker-regression-x86const/build
@@ -0,0 +1,44 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stop if something fails.
+set -e
+
+# We can't use src-ex testing infrastructure because src and src-ex are compiled
+# with javac independetely and can't share code (without reflection).
+
+mkdir classes
+${JAVAC} -d classes `find src -name '*.java'`
+
+mkdir classes-ex
+mv classes/UnresolvedClass.class classes-ex
+
+if [ ${USE_JACK} = "true" ]; then
+  jar cf classes.jill.jar -C classes .
+  jar cf classes-ex.jill.jar -C classes-ex .
+
+  ${JACK} --import classes.jill.jar --output-dex .
+  zip $TEST_NAME.jar classes.dex
+  ${JACK} --import classes-ex.jill.jar --output-dex .
+  zip ${TEST_NAME}-ex.jar classes.dex
+else
+  if [ ${NEED_DEX} = "true" ]; then
+    ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
+    zip $TEST_NAME.jar classes.dex
+    ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
+    zip ${TEST_NAME}-ex.jar classes.dex
+  fi
+fi
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/555-checker-regression-x86const/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/555-checker-regression-x86const/expected.txt
diff --git a/test/555-checker-regression-x86const/info.txt b/test/555-checker-regression-x86const/info.txt
new file mode 100644
index 0000000..c4037fa
--- /dev/null
+++ b/test/555-checker-regression-x86const/info.txt
@@ -0,0 +1,2 @@
+Check that X86 FP constant-area handling handles intrinsics with CurrentMethod
+on the call.
diff --git a/test/555-checker-regression-x86const/run b/test/555-checker-regression-x86const/run
new file mode 100644
index 0000000..63fdb8c
--- /dev/null
+++ b/test/555-checker-regression-x86const/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use secondary switch to add secondary dex file to class path.
+exec ${RUN} "${@}" --secondary
diff --git a/test/555-checker-regression-x86const/src/Main.java b/test/555-checker-regression-x86const/src/Main.java
new file mode 100644
index 0000000..914cfde
--- /dev/null
+++ b/test/555-checker-regression-x86const/src/Main.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main extends UnresolvedClass {
+
+  /// CHECK-START: float Main.callAbs(float) register (before)
+  /// CHECK:       <<CurrentMethod:[ij]\d+>> CurrentMethod
+  /// CHECK:       <<ParamValue:f\d+>> ParameterValue
+  /// CHECK:       InvokeStaticOrDirect [<<ParamValue>>,<<CurrentMethod>>] method_name:java.lang.Math.abs
+  static public float callAbs(float f) {
+    // An intrinsic invoke in a method that has unresolved references will still
+    // have a CurrentMethod as an argument.  The X86 pc_relative_fixups_x86 pass
+    // must be able to handle Math.abs invokes that have a CurrentMethod, as both
+    // the CurrentMethod and the HX86LoadFromConstantTable (for the bitmask)
+    // expect to be in the 'SpecialInputIndex' input index.
+    return Math.abs(f);
+  }
+
+  static public void main(String[] args) {
+    expectEquals(callAbs(-6.5f), 6.5f);
+  }
+
+  public static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/555-checker-regression-x86const/src/Unresolved.java b/test/555-checker-regression-x86const/src/Unresolved.java
new file mode 100644
index 0000000..e98bdbf
--- /dev/null
+++ b/test/555-checker-regression-x86const/src/Unresolved.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class UnresolvedClass {
+}
diff --git a/test/530-checker-loops/expected.txt b/test/556-invoke-super/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/556-invoke-super/expected.txt
diff --git a/test/556-invoke-super/info.txt b/test/556-invoke-super/info.txt
new file mode 100644
index 0000000..7de2a4f
--- /dev/null
+++ b/test/556-invoke-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode with multidex.
diff --git a/test/556-invoke-super/multidex.jpp b/test/556-invoke-super/multidex.jpp
new file mode 100644
index 0000000..fe01801
--- /dev/null
+++ b/test/556-invoke-super/multidex.jpp
@@ -0,0 +1,4 @@
+Main:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Main*
+
diff --git a/test/556-invoke-super/smali/invokesuper.smali b/test/556-invoke-super/smali/invokesuper.smali
new file mode 100644
index 0000000..ef55000
--- /dev/null
+++ b/test/556-invoke-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on this class, to confuse runtime/compiler.
+    invoke-super {p0}, LInvokeSuper;->returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/556-invoke-super/src-multidex/SuperClass.java b/test/556-invoke-super/src-multidex/SuperClass.java
new file mode 100644
index 0000000..faf16c4
--- /dev/null
+++ b/test/556-invoke-super/src-multidex/SuperClass.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class SuperClass {
+  public int returnInt() {
+    return 42;
+  }
+}
diff --git a/test/556-invoke-super/src/Main.java b/test/556-invoke-super/src/Main.java
new file mode 100644
index 0000000..07289f7
--- /dev/null
+++ b/test/556-invoke-super/src/Main.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  static void assertEquals(int expected, int value) {
+    if (expected != value) {
+      throw new Error("Expected " + expected + ", got " + value);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    Method m = c.getMethod("run");
+    assertEquals(42, ((Integer)m.invoke(c.newInstance(), new Object[0])).intValue());
+  }
+}
diff --git a/test/469-condition-materialization-regression/expected.txt b/test/557-checker-instruction-simplifier-ror/expected.txt
similarity index 100%
copy from test/469-condition-materialization-regression/expected.txt
copy to test/557-checker-instruction-simplifier-ror/expected.txt
diff --git a/test/557-checker-instruction-simplifier-ror/info.txt b/test/557-checker-instruction-simplifier-ror/info.txt
new file mode 100644
index 0000000..f9a86f8
--- /dev/null
+++ b/test/557-checker-instruction-simplifier-ror/info.txt
@@ -0,0 +1 @@
+Tests simplification of bitfield rotate patterns in optimizing compiler.
diff --git a/test/557-checker-instruction-simplifier-ror/src/Main.java b/test/557-checker-instruction-simplifier-ror/src/Main.java
new file mode 100644
index 0000000..0e3d145
--- /dev/null
+++ b/test/557-checker-instruction-simplifier-ror/src/Main.java
@@ -0,0 +1,663 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Expected: " + expected + ", found: " + actual);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long actual) {
+    if (expected != actual) {
+      throw new Error("Expected: " + expected + ", found: " + actual);
+    }
+  }
+
+  /// CHECK-START: int Main.rotateIntegerRight(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Invoke:i\d+>>       InvokeStaticOrDirect intrinsic:IntegerRotateRight
+
+  /// CHECK-START: int Main.rotateIntegerRight(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.rotateIntegerRight(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      LoadClass
+  /// CHECK-NOT:      ClinitCheck
+  /// CHECK-NOT:      InvokeStaticOrDirect
+  public static int rotateIntegerRight(int value, int distance) {
+    return java.lang.Integer.rotateRight(value, distance);
+  }
+
+  /// CHECK-START: int Main.rotateIntegerLeft(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Invoke:i\d+>>       InvokeStaticOrDirect intrinsic:IntegerRotateLeft
+
+  /// CHECK-START: int Main.rotateIntegerLeft(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.rotateIntegerLeft(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      LoadClass
+  /// CHECK-NOT:      ClinitCheck
+  /// CHECK-NOT:      InvokeStaticOrDirect
+  public static int rotateIntegerLeft(int value, int distance) {
+    return java.lang.Integer.rotateLeft(value, distance);
+  }
+
+  /// CHECK-START: long Main.rotateLongRight(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Invoke:j\d+>>       InvokeStaticOrDirect intrinsic:LongRotateRight
+
+  /// CHECK-START: long Main.rotateLongRight(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rotateLongRight(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      LoadClass
+  /// CHECK-NOT:      ClinitCheck
+  /// CHECK-NOT:      InvokeStaticOrDirect
+  public static long rotateLongRight(long value, int distance) {
+    return java.lang.Long.rotateRight(value, distance);
+  }
+
+  /// CHECK-START: long Main.rotateLongLeft(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Invoke:j\d+>>       InvokeStaticOrDirect intrinsic:LongRotateLeft
+
+  /// CHECK-START: long Main.rotateLongLeft(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rotateLongLeft(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      LoadClass
+  /// CHECK-NOT:      ClinitCheck
+  /// CHECK-NOT:      InvokeStaticOrDirect
+  public static long rotateLongLeft(long value, int distance) {
+    return java.lang.Long.rotateLeft(value, distance);
+  }
+
+  //  (i >>> #distance) | (i << #(reg_bits - distance))
+
+  /// CHECK-START: int Main.ror_int_constant_c_c(int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Const30:i\d+>>      IntConstant 30
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<Const2>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Const30>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.ror_int_constant_c_c(int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Const2>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_constant_c_c(int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_constant_c_c(int value) {
+    return (value >>> 2) | (value << 30);
+  }
+
+  /// CHECK-START: int Main.ror_int_constant_c_c_0(int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Const2>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_constant_c_c_0(int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_constant_c_c_0(int value) {
+    return (value >>> 2) | (value << 62);
+  }
+
+  //  (j >>> #distance) | (j << #(reg_bits - distance))
+
+  /// CHECK-START: long Main.ror_long_constant_c_c(long) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Const62:i\d+>>      IntConstant 62
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Const2>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<Const62>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.ror_long_constant_c_c(long) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Const2>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.ror_long_constant_c_c(long) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static long ror_long_constant_c_c(long value) {
+    return (value >>> 2) | (value << 62);
+  }
+
+  /// CHECK-START: long Main.ror_long_constant_c_c_0(long) instruction_simplifier (after)
+  /// CHECK-NOT:      Ror
+  public static long ror_long_constant_c_c_0(long value) {
+    return (value >>> 2) | (value << 30);
+  }
+
+  //  (i >>> #distance) | (i << #-distance)
+
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier$after_bce (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<ConstNeg2:i\d+>>    IntConstant -2
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<Const2>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<ConstNeg2>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier$after_bce (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Const2>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_constant_c_negc(int value) {
+    return (value >>> 2) | (value << $opt$inline$IntConstantM2());
+  }
+
+  // Hiding constants outside the range [0, 32) used for int shifts from Jack.
+  // (Jack extracts only the low 5 bits.)
+  public static int $opt$inline$IntConstantM2() { return -2; }
+
+  //  (j >>> #distance) | (j << #-distance)
+
+  /// CHECK-START: long Main.ror_long_constant_c_negc(long) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<ConstNeg2:i\d+>>    IntConstant -2
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Const2>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<ConstNeg2>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.ror_long_constant_c_negc(long) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Const2>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.ror_long_constant_c_negc(long) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static long ror_long_constant_c_negc(long value) {
+    return (value >>> 2) | (value << -2);
+  }
+
+  //  (i >>> distance) | (i << (#reg_bits - distance)
+
+  /// CHECK-START: int Main.ror_int_reg_v_csubv(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Sub:i\d+>>          Sub [<<Const32>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Sub>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_csubv(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_csubv(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  /// CHECK-NOT:      Sub
+  public static int ror_int_reg_v_csubv(int value, int distance) {
+    return (value >>> distance) | (value << (32 - distance));
+  }
+
+  //  (distance = x - y)
+  //  (i >>> distance) | (i << (#reg_bits - distance)
+
+  /// CHECK-START: int Main.ror_int_subv_csubv(int, int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK-DAG:      <<Sub32:i\d+>>        Sub [<<Const32>>,<<SubDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Sub32>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<SubDistance>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.ror_int_subv_csubv(int, int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<SubDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_subv_csubv(int, int, int) instruction_simplifier (after)
+  /// CHECK:          Sub
+  /// CHECK-NOT:      Sub
+
+  /// CHECK-START: int Main.ror_int_subv_csubv(int, int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_subv_csubv(int value, int x, int y) {
+    int distance = x - y;
+    return (value >>> distance) | (value << (32 - distance));
+  }
+
+  /// CHECK-START: int Main.ror_int_subv_csubv_env(int, int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK-DAG:      <<Sub32:i\d+>>        Sub [<<Const32>>,<<SubDistance>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<SubDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Sub32>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:          <<Add:i\d+>>          Add [<<Or>>,<<Sub32>>]
+  /// CHECK:                                Return [<<Add>>]
+
+  /// CHECK-START: int Main.ror_int_subv_csubv_env(int, int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK-DAG:      <<Sub32:i\d+>>        Sub [<<Const32>>,<<SubDistance>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<SubDistance>>]
+  /// CHECK:          <<Add:i\d+>>          Add [<<Ror>>,<<Sub32>>]
+  /// CHECK:                                Return [<<Add>>]
+
+  /// CHECK-START: int Main.ror_int_subv_csubv_env(int, int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_subv_csubv_env(int value, int x, int y) {
+    int distance = x - y;
+    int bits_minus_dist = 32 - distance;
+    return ((value >>> distance) | (value << bits_minus_dist)) + bits_minus_dist;
+  }
+
+  //  (j >>> distance) | (j << (#reg_bits - distance)
+
+  /// CHECK-START: long Main.ror_long_reg_v_csubv(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const64:i\d+>>      IntConstant 64
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Sub:i\d+>>          Sub [<<Const64>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<Sub>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.ror_long_reg_v_csubv(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.ror_long_reg_v_csubv(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  /// CHECK-NOT:      Sub
+  public static long ror_long_reg_v_csubv(long value, int distance) {
+    return (value >>> distance) | (value << (64 - distance));
+  }
+
+  /// CHECK-START: long Main.ror_long_reg_v_csubv_0(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      Ror
+  public static long ror_long_reg_v_csubv_0(long value, int distance) {
+    return (value >>> distance) | (value << (32 - distance));
+  }
+
+  /// CHECK-START: long Main.ror_long_subv_csubv_0(long, int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      Ror
+  public static long ror_long_subv_csubv_0(long value, int x, int y) {
+    int distance = x - y;
+    return (value >>> distance) | (value << (32 - distance));
+  }
+
+  //  (i >>> (#reg_bits - distance)) | (i << distance)
+
+  /// CHECK-START: int Main.rol_int_reg_csubv_v(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<Sub:i\d+>>          Sub [<<Const32>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<Sub>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.rol_int_reg_csubv_v(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK:          <<Sub:i\d+>>          Sub [<<Const32>>,<<ArgDistance>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Sub>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.rol_int_reg_csubv_v(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int rol_int_reg_csubv_v(int value, int distance) {
+    return (value >>> (32 - distance)) | (value << distance);
+  }
+
+  //  (distance = x - y)
+  //  (i >>> (#reg_bits - distance)) | (i << distance)
+
+  /// CHECK-START: int Main.rol_int_csubv_subv(int, int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK-DAG:      <<Sub32:i\d+>>        Sub [<<Const32>>,<<SubDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<SubDistance>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<Sub32>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.rol_int_csubv_subv(int, int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK:          <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK:          <<Sub:i\d+>>          Sub [<<Const32>>,<<SubDistance>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Sub>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.rol_int_csubv_subv(int, int, int) instruction_simplifier (after)
+  /// CHECK:          Sub
+  /// CHECK:          Sub
+
+  /// CHECK-START: int Main.rol_int_csubv_subv(int, int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int rol_int_csubv_subv(int value, int x, int y) {
+    int distance = x - y;
+    return (value >>> (32 - distance)) | (value << distance);
+  }
+
+  //  (j >>> (#reg_bits - distance)) | (j << distance)
+
+  /// CHECK-START: long Main.rol_long_reg_csubv_v(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const64:i\d+>>      IntConstant 64
+  /// CHECK-DAG:      <<Sub:i\d+>>          Sub [<<Const64>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Sub>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.rol_long_reg_csubv_v(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const64:i\d+>>      IntConstant 64
+  /// CHECK:          <<Sub:i\d+>>          Sub [<<Const64>>,<<ArgDistance>>]
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Sub>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rol_long_reg_csubv_v(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static long rol_long_reg_csubv_v(long value, int distance) {
+    return (value >>> (64 - distance)) | (value << distance);
+  }
+
+  /// CHECK-START: long Main.rol_long_reg_csubv_v_0(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      Ror
+  public static long rol_long_reg_csubv_v_0(long value, int distance) {
+    return (value >>> (32 - distance)) | (value << distance);
+  }
+
+  //  (i >>> distance) | (i << -distance) (i.e. libcore's Integer.rotateRight)
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Neg>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  /// CHECK-NOT:      Neg
+  public static int ror_int_reg_v_negv(int value, int distance) {
+    return (value >>> distance) | (value << -distance);
+  }
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv_env(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Neg>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:          <<Add:i\d+>>          Add [<<Or>>,<<Neg>>]
+  /// CHECK:                                Return [<<Add>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv_env(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Sub:i\d+>>          Sub [<<Ror>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Sub>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv_env(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_reg_v_negv_env(int value, int distance) {
+    int neg_distance = -distance;
+    return ((value >>> distance) | (value << neg_distance)) + neg_distance;
+  }
+
+  //  (j >>> distance) | (j << -distance) (i.e. libcore's Long.rotateRight)
+
+  /// CHECK-START: long Main.ror_long_reg_v_negv(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<Neg>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.ror_long_reg_v_negv(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.ror_long_reg_v_negv(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  /// CHECK-NOT:      Neg
+  public static long ror_long_reg_v_negv(long value, int distance) {
+    return (value >>> distance) | (value << -distance);
+  }
+
+  //  (i << distance) | (i >>> -distance) (i.e. libcore's Integer.rotateLeft)
+
+  /// CHECK-START: int Main.rol_int_reg_negv_v(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<Neg>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<Shl>>,<<UShr>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.rol_int_reg_negv_v(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.rol_int_reg_negv_v(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int rol_int_reg_negv_v(int value, int distance) {
+    return (value << distance) | (value >>> -distance);
+  }
+
+  //  (j << distance) | (j >>> -distance) (i.e. libcore's Long.rotateLeft)
+
+  /// CHECK-START: long Main.rol_long_reg_negv_v(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Neg>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<Shl>>,<<UShr>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.rol_long_reg_negv_v(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rol_long_reg_negv_v(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static long rol_long_reg_negv_v(long value, int distance) {
+    return (value << distance) | (value >>> -distance);
+  }
+
+  //  (j << distance) + (j >>> -distance)
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_add(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Neg>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Add:j\d+>>          Add [<<Shl>>,<<UShr>>]
+  /// CHECK:                                Return [<<Add>>]
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_add(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_add(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:  Add
+  /// CHECK-NOT:  Shl
+  /// CHECK-NOT:  UShr
+  public static long rol_long_reg_v_negv_add(long value, int distance) {
+    return (value << distance) + (value >>> -distance);
+  }
+
+  //  (j << distance) ^ (j >>> -distance)
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_xor(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Neg>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Xor:j\d+>>          Xor [<<Shl>>,<<UShr>>]
+  /// CHECK:                                Return [<<Xor>>]
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_xor(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_xor(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:  Xor
+  /// CHECK-NOT:  Shl
+  /// CHECK-NOT:  UShr
+  public static long rol_long_reg_v_negv_xor(long value, int distance) {
+    return (value << distance) ^ (value >>> -distance);
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(2, ror_int_constant_c_c(8));
+    assertIntEquals(2, ror_int_constant_c_c_0(8));
+    assertLongEquals(2L, ror_long_constant_c_c(8L));
+
+    assertIntEquals(2, ror_int_constant_c_negc(8));
+    assertLongEquals(2L, ror_long_constant_c_negc(8L));
+
+    assertIntEquals(2, ror_int_reg_v_csubv(8, 2));
+    assertLongEquals(2L, ror_long_reg_v_csubv(8L, 2));
+
+    assertIntEquals(2, ror_int_subv_csubv(8, 2, 0));
+    assertIntEquals(32, ror_int_subv_csubv_env(8, 2, 0));
+    assertIntEquals(32, rol_int_csubv_subv(8, 2, 0));
+
+    assertIntEquals(32, rol_int_reg_csubv_v(8, 2));
+    assertLongEquals(32L, rol_long_reg_csubv_v(8L, 2));
+
+    assertIntEquals(2, ror_int_reg_v_negv(8, 2));
+    assertIntEquals(0, ror_int_reg_v_negv_env(8, 2));
+    assertLongEquals(2L, ror_long_reg_v_negv(8L, 2));
+
+    assertIntEquals(32, rol_int_reg_negv_v(8, 2));
+    assertLongEquals(32L, rol_long_reg_negv_v(8L, 2));
+
+    assertLongEquals(32L, rol_long_reg_v_negv_add(8L, 2));
+    assertLongEquals(32L, rol_long_reg_v_negv_xor(8L, 2));
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/557-checker-ref-equivalent/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/557-checker-ref-equivalent/expected.txt
diff --git a/test/557-checker-ref-equivalent/info.txt b/test/557-checker-ref-equivalent/info.txt
new file mode 100644
index 0000000..30e763b
--- /dev/null
+++ b/test/557-checker-ref-equivalent/info.txt
@@ -0,0 +1 @@
+Checker tests to ensure we do not get reference and integer phi equivalents.
diff --git a/test/557-checker-ref-equivalent/smali/TestCase.smali b/test/557-checker-ref-equivalent/smali/TestCase.smali
new file mode 100644
index 0000000..1347554
--- /dev/null
+++ b/test/557-checker-ref-equivalent/smali/TestCase.smali
@@ -0,0 +1,51 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+
+.super Ljava/lang/Object;
+
+## CHECK-START: void TestCase.testIntRefEquivalent() builder (after)
+## CHECK-NOT: Phi
+.method public static testIntRefEquivalent()V
+    .registers 4
+
+    const v0, 0
+
+    :try_start
+    invoke-static {v0,v0}, LTestCase;->foo(ILjava/lang/Object;)V
+    if-eqz v0, :end_if
+    const v0, 0
+    :end_if
+    invoke-static {v0,v0}, LTestCase;->foo(ILjava/lang/Object;)V
+    goto :no_catch
+    :try_end
+
+    .catch Ljava/lang/Exception; {:try_start .. :try_end} :exception
+    :exception
+    # We used to have a reference and an integer phi equivalents here, which
+    # broke the invariant of not sharing the same spill slot between those two
+    # types.
+    invoke-static {v0,v0}, LTestCase;->foo(ILjava/lang/Object;)V
+
+    :no_catch
+    goto :try_start
+    return-void
+
+.end method
+
+.method public static foo(ILjava/lang/Object;)V
+    .registers 4
+    return-void
+.end method
diff --git a/test/557-checker-ref-equivalent/src/Main.java b/test/557-checker-ref-equivalent/src/Main.java
new file mode 100644
index 0000000..9323757
--- /dev/null
+++ b/test/557-checker-ref-equivalent/src/Main.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: void Main.testRedundantPhiCycle(boolean) builder (after)
+  /// CHECK-NOT:  Phi
+  private void testRedundantPhiCycle(boolean cond) {
+    Object o = null;
+    while (true) {
+      if (cond) {
+        o = null;
+      }
+      System.out.println(o);
+    }
+  }
+
+  /// CHECK-START: void Main.testLoopPhisWithNullAndCrossUses(boolean) builder (after)
+  /// CHECK-NOT:  Phi
+  private void testLoopPhisWithNullAndCrossUses(boolean cond) {
+    Main a = null;
+    Main b = null;
+    while (a == null) {
+      if (cond) {
+        a = b;
+      } else {
+        b = a;
+      }
+    }
+  }
+
+  public static void main(String[] args) {
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/558-switch/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/558-switch/expected.txt
diff --git a/test/558-switch/info.txt b/test/558-switch/info.txt
new file mode 100644
index 0000000..07283ff
--- /dev/null
+++ b/test/558-switch/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing that used to generate invalid
+code for arm.
diff --git a/test/558-switch/src/Main.java b/test/558-switch/src/Main.java
new file mode 100644
index 0000000..f44231e
--- /dev/null
+++ b/test/558-switch/src/Main.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+   public static boolean testMethod(int statusCode) {
+        switch (statusCode) {
+        case 303:
+        case 301:
+        case 302:
+        case 307:
+            return true;
+        default:
+            return false;
+        } //end of switch
+    }
+
+  public static void main(String[] args) {
+    if (!testMethod(301)) {
+      throw new Error("Unexpected result");
+    }
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/559-bce-ssa/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/559-bce-ssa/expected.txt
diff --git a/test/559-bce-ssa/info.txt b/test/559-bce-ssa/info.txt
new file mode 100644
index 0000000..e104515
--- /dev/null
+++ b/test/559-bce-ssa/info.txt
@@ -0,0 +1,2 @@
+Regression test for the optimizing compiler which used
+to hit a bogus DCHECK on the test case.
diff --git a/test/559-bce-ssa/src/Main.java b/test/559-bce-ssa/src/Main.java
new file mode 100644
index 0000000..88f06b4
--- /dev/null
+++ b/test/559-bce-ssa/src/Main.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main {
+
+  public static void foo(int[] array, int[] array2, int start, int end) {
+    for (int i = start; i < end; ++i) {
+      array[i] = array2[array.length] + 1;
+    }
+  }
+
+  public static void main(String[] args) {
+    int[]a = new int[1];
+    foo(a, new int[2], 0, 1);
+    if (a[0] != 1) {
+      throw new Error("test failed");
+    }
+  }
+}
diff --git a/test/559-checker-irreducible-loop/expected.txt b/test/559-checker-irreducible-loop/expected.txt
new file mode 100644
index 0000000..b64be7a
--- /dev/null
+++ b/test/559-checker-irreducible-loop/expected.txt
@@ -0,0 +1,7 @@
+84
+30
+168
+126
+class Main
+42
+-42
diff --git a/test/559-checker-irreducible-loop/info.txt b/test/559-checker-irreducible-loop/info.txt
new file mode 100644
index 0000000..e0ace18
--- /dev/null
+++ b/test/559-checker-irreducible-loop/info.txt
@@ -0,0 +1 @@
+Tests for irreducible loop support in compiler.
diff --git a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..5d4aa56
--- /dev/null
+++ b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -0,0 +1,549 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+# Back-edges in the ascii-art graphs are represented with dash '-'.
+
+# Test that we support a simple irreducible loop.
+#
+#        entry
+#       /    \
+#      /      \
+# loop_entry   \
+#    /    \-    \
+#  exit    \-    \
+#           other_loop_entry
+#
+## CHECK-START: int IrreducibleLoop.simpleLoop(int) dead_code_elimination$initial (before)
+## CHECK: irreducible:true
+.method public static simpleLoop(I)I
+   .registers 2
+   const/16 v0, 42
+   if-eq v1, v0, :other_loop_entry
+   :loop_entry
+   if-ne v1, v0, :exit
+   add-int v0, v0, v0
+   :other_loop_entry
+   add-int v0, v0, v0
+   goto :loop_entry
+   :exit
+   return v0
+.end method
+
+# Test that lse does not wrongly optimize loads in irreducible loops. At the
+# SSA level, since we create redundant phis for irreducible loop headers, lse
+# does not see the relation between the dex register and the phi.
+#
+#               entry
+#                p1
+#             /     \
+#            /       \
+#           /         \
+#          /           \
+#   loop_pre_entry      \
+# set 42 in p1:myField   \
+#        /                \
+#   loop_entry             \
+#  get p1.myField           \
+#    /         \-            \
+#  exit         \-            \
+#                \-            \
+#                other_loop_entry
+#              set 30 in p1:myField
+#
+## CHECK-START: int IrreducibleLoop.lse(int, Main) dead_code_elimination$initial (after)
+## CHECK: irreducible:true
+#
+## CHECK-START: int IrreducibleLoop.lse(int, Main) load_store_elimination (after)
+## CHECK: InstanceFieldGet
+.method public static lse(ILMain;)I
+   .registers 4
+   const/16 v0, 42
+   const/16 v1, 30
+   if-eq p0, v0, :other_loop_pre_entry
+   goto: loop_pre_entry
+   :loop_pre_entry
+   iput v0, p1, LMain;->myField:I
+   :loop_entry
+   if-ne v1, v0, :exit
+   :other_loop_entry
+   iget v0, p1, LMain;->myField:I
+   if-eq v1, v0, :exit
+   goto :loop_entry
+   :exit
+   return v0
+   :other_loop_pre_entry
+   iput v1, p1, LMain;->myField:I
+   goto :other_loop_entry
+.end method
+
+# Check that dce does not apply for irreducible loops.
+#
+#        entry
+#       /    \
+#      /      \
+# loop_entry   \
+#    /    \-    \
+#  exit    \-    \
+#           other_loop_entry
+#
+## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination$initial (before)
+## CHECK: irreducible:true
+
+## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination$initial (after)
+## CHECK: irreducible:true
+.method public static dce(I)I
+   .registers 3
+   const/16 v0, 42
+   const/16 v1, 168
+   if-ne v0, v0, :other_loop_pre_entry
+   :loop_entry
+   if-ne v0, v0, :exit
+   add-int v0, v0, v0
+   :other_loop_entry
+   add-int v0, v0, v0
+   if-eq v0, v1, :exit
+   goto :loop_entry
+   :exit
+   return v0
+   :other_loop_pre_entry
+   add-int v0, v0, v0
+   goto :other_loop_entry
+.end method
+
+# Check that a dex register only used in the loop header remains live thanks
+# to the (redundant) Phi created at the loop header for it.
+#
+#           entry
+#            p0
+#          /   \
+#         /     \
+#        /       \
+#   loop_entry    \
+# i0 = phi(p0,i1)  \
+#    /    \-        \
+#  exit    \-        \
+#        other_loop_entry
+#        i1 = phi(p0, i0)
+#
+## CHECK-START: int IrreducibleLoop.liveness(int) liveness (after)
+## CHECK-DAG: <<Arg:i\d+>>      ParameterValue liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopPhiUse:\d+>>)}
+## CHECK-DAG: <<LoopPhi:i\d+>>  Phi [<<Arg>>,<<PhiInLoop:i\d+>>] liveness:<<ArgLoopPhiUse>> ranges:{[<<ArgLoopPhiUse>>,<<PhiInLoopUse:\d+>>)}
+## CHECK-DAG: <<PhiInLoop>>     Phi [<<Arg>>,<<LoopPhi>>] liveness:<<PhiInLoopUse>> ranges:{[<<PhiInLoopUse>>,<<BackEdgeLifetimeEnd:\d+>>)}
+## CHECK:                       Return liveness:<<ReturnLiveness:\d+>>
+## CHECK-EVAL:    <<ReturnLiveness>> == <<BackEdgeLifetimeEnd>> + 2
+.method public static liveness(I)I
+   .registers 2
+   const/16 v0, 42
+   if-eq p0, v0, :other_loop_entry
+   :loop_entry
+   add-int v0, v0, p0
+   if-ne v1, v0, :exit
+   :other_loop_entry
+   add-int v0, v0, v0
+   goto :loop_entry
+   :exit
+   return v0
+.end method
+
+# Check that we don't GVN across irreducible loops:
+# "const-class 1" in loop_entry should not be GVN with
+# "const-class 1" in entry.
+#
+#        entry
+#     const-class 1
+#       /    \
+#      /      \
+# loop_entry   \
+# const-class 1 \
+#    /    \-     \
+#  exit    \-     \
+#           other_loop_entry
+#             const-class 2
+#
+## CHECK-START: java.lang.Class IrreducibleLoop.gvn() GVN (before)
+## CHECK: LoadClass
+## CHECK: LoadClass
+## CHECK: LoadClass
+## CHECK-NOT: LoadClass
+
+## CHECK-START: java.lang.Class IrreducibleLoop.gvn() GVN (after)
+## CHECK: LoadClass
+## CHECK: LoadClass
+## CHECK: LoadClass
+## CHECK-NOT: LoadClass
+
+.method public static gvn()Ljava/lang/Class;
+  .registers 3
+  const/4 v2, 0
+  const-class v0, LMain;
+  if-ne v0, v2, :other_loop_entry
+  :loop_entry
+  const-class v0, LMain;
+  if-ne v0, v2, :exit
+  :other_loop_entry
+  const-class v1, LIrreducibleLoop;
+  goto :loop_entry
+  :exit
+  return-object v0
+.end method
+
+# Check that we don't LICM across irreducible loops:
+# "add" in loop_entry should not be LICMed.
+#
+#        entry
+#        /   \
+#       /     \
+#  loop_entry  \
+#      add      \
+#    /    \-     \
+#  exit    \-     \
+#           other_loop_entry
+#
+## CHECK-START: int IrreducibleLoop.licm1(int) licm (after)
+## CHECK: Add irreducible:true
+.method public static licm1(I)I
+  .registers 3
+  const/4 v0, 0
+  if-ne p0, v0, :other_loop_entry
+  :loop_entry
+  add-int v0, p0, p0
+  if-ne v0, p0, :exit
+  :other_loop_entry
+  sub-int v1, p0, p0
+  goto :loop_entry
+  :exit
+  sub-int v0, v0, p0
+  return v0
+.end method
+
+# Check that we don't LICM across irreducible loops:
+# "const-class" in loop_entry should not be LICMed.
+#
+#        entry
+#        /   \
+#       /     \
+#  loop_entry  \
+#  const-class  \
+#    /    \-     \
+#  exit    \-     \
+#           other_loop_entry
+#
+## CHECK-START: int IrreducibleLoop.licm2(int) licm (after)
+## CHECK: LoadClass irreducible:true
+.method public static licm2(I)I
+  .registers 3
+  const/4 v0, 0
+  if-ne p0, v0, :other_loop_entry
+  :loop_entry
+  const-class v1, LIrreducibleLoop;
+  if-ne v0, p0, :exit
+  :other_loop_entry
+  sub-int v1, p0, p0
+  goto :loop_entry
+  :exit
+  sub-int v0, v0, p0
+  return v0
+.end method
+
+# Check that we don't LICM in a natural loop that contains an irreducible loop:
+# "const-class" should not be LICMed.
+#
+#        entry
+#          |
+#       loop_entry
+#       const-class -------------------
+#        /        \                   -
+#       /          \                  -
+#     exit         loop_body          -
+#                  /       \          -
+#                 /         \         -
+#   irreducible_loop_entry   \        -
+#        -      \             \       -
+#        -       \             \      -
+#        -      irreducible_loop_other_entry
+#        -                  |
+#        -                  |
+#        ------ irreducible_loop_back_edge
+#
+## CHECK-START: int IrreducibleLoop.licm3(int, int, int) licm (after)
+## CHECK: LoadClass loop:<<OuterLoop:B\d+>>  irreducible:false
+## CHECK: Goto outer_loop:<<OuterLoop>>  irreducible:true
+.method public static licm3(III)I
+  .registers 4
+  :loop_entry
+  const-class v0, LIrreducibleLoop;
+  if-ne p1, p2, :exit
+  goto :loop_body
+
+  :loop_body
+  if-eq p0, p1, :irreducible_loop_entry
+  goto :irreducible_loop_other_entry
+
+  :irreducible_loop_entry
+  goto :irreducible_loop_other_entry
+
+  :irreducible_loop_other_entry
+  if-eq p0, p2, :loop_entry
+  goto :irreducible_loop_back_edge
+
+  :irreducible_loop_back_edge
+  goto :irreducible_loop_entry
+  :exit
+  return p0
+.end method
+
+# Check a loop within an irreducible loop
+#
+#                      entry
+#                    /       \
+#                   /         \
+# irreducible_loop_entry       \
+#    / -       \         irreducible_loop_pre_other_entry
+# exit -        \              /
+#      -    irreducible_loop_body
+#      -              |
+#      -              |
+#      -      loop_within_header
+#      -        /               \-
+#      -       /                 \-
+# irreducible_loop_back_edge    loop_within_back_edge
+#
+## CHECK-START: void IrreducibleLoop.analyze1(int) builder (after)
+## CHECK-DAG: Goto loop:<<OuterLoop:B\d+>> outer_loop:none irreducible:true
+## CHECK-DAG: Goto outer_loop:<<OuterLoop>> irreducible:false
+.method public static analyze1(I)V
+  .registers 1
+  if-eq p0, p0, :irreducible_loop_entry
+  goto :irreducible_loop_pre_other_entry
+
+  :irreducible_loop_entry
+  if-eq p0, p0, :exit
+  goto :irreducible_loop_body
+
+  :irreducible_loop_body
+  :loop_within_header
+  if-eq p0, p0, :irreducible_loop_back_edge
+  goto :loop_within_back_edge
+
+  :loop_within_back_edge
+  goto :loop_within_header
+
+  :irreducible_loop_back_edge
+  goto :irreducible_loop_entry
+
+  :irreducible_loop_pre_other_entry
+  goto :irreducible_loop_body
+
+  :exit
+  return-void
+.end method
+
+# Check than a loop before an irreducible loop is not part of the
+# irreducible loop.
+#
+#                      entry
+#                        |
+#                        |
+#                   loop_header
+#                    /        \-
+#                   /          \-
+# irreducible_loop_pre_entry  loop_body
+#           /             \
+#          /               \
+#  irreducible_loop_entry   \
+#    /        \-       irreducible_loop_other_pre_entry
+#   /          \-           /
+# exit          \-         /
+#          irreducible_loop_body
+#
+## CHECK-START: void IrreducibleLoop.analyze2(int) builder (after)
+## CHECK-DAG: Goto outer_loop:none irreducible:false
+## CHECK-DAG: Goto outer_loop:none irreducible:true
+.method public static analyze2(I)V
+  .registers 1
+  :loop_header
+  if-eq p0, p0, :irreducible_loop_pre_entry
+  goto :loop_body
+  :loop_body
+  goto :loop_header
+
+  :irreducible_loop_pre_entry
+  if-eq p0, p0, :irreducible_loop_other_pre_entry
+  goto :irreducible_loop_entry
+
+  :irreducible_loop_entry
+  if-eq p0, p0, :exit
+  goto :irreducible_loop_body
+
+  :irreducible_loop_body
+  goto :irreducible_loop_entry
+
+  :irreducible_loop_other_pre_entry
+  goto :irreducible_loop_body
+
+  :exit
+  return-void
+.end method
+
+# Check two irreducible loops, one within another.
+#
+#                      entry
+#                    /       \
+#                   /         \
+#           loop1_header   loop2_header
+#           -   |          /       -
+#           -   |         /        -
+#           -   |        /         -
+#           -   |       /          -
+#           -  loop2_body          -
+#           -    /     \           -
+#           -   /       \          -
+#         loop1_body   loop2_back_edge
+#             |
+#             |
+#           exit
+#
+## CHECK-START: void IrreducibleLoop.analyze3(int) builder (after)
+## CHECK-DAG: Goto loop:<<OuterLoop:B\d+>> outer_loop:none irreducible:true
+## CHECK-DAG: Goto outer_loop:<<OuterLoop>> irreducible:true
+.method public static analyze3(I)V
+  .registers 1
+  if-eq p0, p0, :loop2_header
+  goto :loop1_header
+
+  :loop1_header
+  goto :loop2_body
+
+  :loop2_header
+  goto :loop2_body
+
+  :loop2_body
+  if-eq p0, p0, :loop2_back_edge
+  goto :loop1_body
+
+  :loop2_back_edge
+  goto :loop2_header
+
+  :loop1_body
+  if-eq p0, p0, :exit
+  goto :loop1_header
+
+  :exit
+  return-void
+.end method
+
+# Check two irreducible loops, one within another. Almost identical
+# to analyze3 except the branches of the first 'if' are swapped, to
+# ensure the order at which we find the back edges does not matter.
+#
+#                      entry
+#                    /       \
+#                   /         \
+#           loop1_header   loop2_header
+#           -   |          /       -
+#           -   |         /        -
+#           -   |        /         -
+#           -   |       /          -
+#           -  loop2_body          -
+#           -    /     \           -
+#           -   /       \          -
+#         loop1_body   loop2_back_edge
+#             |
+#             |
+#           exit
+#
+## CHECK-START: void IrreducibleLoop.analyze4(int) builder (after)
+## CHECK-DAG: Goto loop:<<OuterLoop:B\d+>> outer_loop:none irreducible:true
+## CHECK-DAG: Goto outer_loop:<<OuterLoop>> irreducible:true
+.method public static analyze4(I)V
+  .registers 1
+  if-eq p0, p0, :loop1_header
+  goto :loop2_header
+
+  :loop1_header
+  goto :loop2_body
+
+  :loop2_header
+  goto :loop2_body
+
+  :loop2_body
+  if-eq p0, p0, :loop2_back_edge
+  goto :loop1_body
+
+  :loop2_back_edge
+  goto :loop2_header
+
+  :loop1_body
+  if-eq p0, p0, :exit
+  goto :loop1_header
+
+  :exit
+  return-void
+.end method
+
+# Check two irreducible loops, one within another. Almost identical
+# to analyze3 and analyze4, except that the inner loop exits from the
+# back edge, and not the body.
+#
+#                      entry
+#                    /       \
+#                   /         \
+#           loop1_header   loop2_header
+#           -   \            /       -
+#           -    \          /        -
+#           -     \        /         -
+#           -      \      /          -
+#           -     loop2_body         -
+#           -        |               -
+#           -        |               -
+#           -   loop2_back_edge ------
+#           -        |
+#           -        |
+#           ----- loop1_body
+#                    |
+#                    |
+#                   exit
+#
+## CHECK-START: void IrreducibleLoop.analyze5(int) builder (after)
+## CHECK-DAG: Goto loop:<<OuterLoop:B\d+>> outer_loop:none irreducible:true
+## CHECK-DAG: Goto outer_loop:<<OuterLoop>> irreducible:true
+.method public static analyze5(I)V
+  .registers 1
+  if-eq p0, p0, :loop1_header
+  goto :loop2_header
+
+  :loop1_header
+  goto :loop2_body
+
+  :loop2_header
+  goto :loop2_body
+
+  :loop2_body
+  goto :loop2_back_edge
+
+  :loop2_back_edge
+  if-eq p0, p0, :loop2_header
+  goto :loop1_body
+
+  :loop1_body
+  if-eq p0, p0, :exit
+  goto :loop1_header
+
+  :exit
+  return-void
+.end method
diff --git a/test/559-checker-irreducible-loop/src/Main.java b/test/559-checker-irreducible-loop/src/Main.java
new file mode 100644
index 0000000..ab84f81
--- /dev/null
+++ b/test/559-checker-irreducible-loop/src/Main.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    {
+      Method m = c.getMethod("simpleLoop", int.class);
+      Object[] arguments = { 42 };
+      System.out.println(m.invoke(null, arguments));
+    }
+
+    {
+      Method m = c.getMethod("lse", int.class, Main.class);
+      Object[] arguments = { 42, new Main() };
+      System.out.println(m.invoke(null, arguments));
+    }
+
+    {
+      Method m = c.getMethod("dce", int.class);
+      Object[] arguments = { 42 };
+      System.out.println(m.invoke(null, arguments));
+    }
+
+    {
+      Method m = c.getMethod("liveness", int.class);
+      Object[] arguments = { 42 };
+      System.out.println(m.invoke(null, arguments));
+    }
+
+    {
+      Method m = c.getMethod("gvn");
+      Object[] arguments = { };
+      System.out.println(m.invoke(null, arguments));
+    }
+
+    {
+      Method m = c.getMethod("licm1", int.class);
+      Object[] arguments = { 42 };
+      System.out.println(m.invoke(null, arguments));
+    }
+
+    {
+      Method m = c.getMethod("licm2", int.class);
+      Object[] arguments = { 42 };
+      System.out.println(m.invoke(null, arguments));
+    }
+  }
+
+  int myField;
+}
diff --git a/test/530-checker-loops/expected.txt b/test/559-checker-rtp-ifnotnull/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/559-checker-rtp-ifnotnull/expected.txt
diff --git a/test/559-checker-rtp-ifnotnull/info.txt b/test/559-checker-rtp-ifnotnull/info.txt
new file mode 100644
index 0000000..c08aa0c
--- /dev/null
+++ b/test/559-checker-rtp-ifnotnull/info.txt
@@ -0,0 +1,2 @@
+Tests that BoundType created for if-not-null does not force untyped loop phis
+to Object.
\ No newline at end of file
diff --git a/test/559-checker-rtp-ifnotnull/src/Main.java b/test/559-checker-rtp-ifnotnull/src/Main.java
new file mode 100644
index 0000000..2dc5666
--- /dev/null
+++ b/test/559-checker-rtp-ifnotnull/src/Main.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public class Main {
+
+  /// CHECK-START: void Main.boundTypeForIfNotNull() builder (after)
+  /// CHECK-DAG:     <<Method:(i|j)\d+>>  CurrentMethod
+  /// CHECK-DAG:     <<Null:l\d+>>        NullConstant
+  /// CHECK-DAG:     <<Cst5:i\d+>>        IntConstant 5
+  /// CHECK-DAG:     <<Cst10:i\d+>>       IntConstant 10
+
+  /// CHECK-DAG:                          InvokeVirtual [<<NullCheck:l\d+>>]
+  /// CHECK-DAG:     <<NullCheck>>        NullCheck [<<LoopPhi:l\d+>>] klass:int[]
+  /// CHECK-DAG:     <<LoopPhi>>          Phi [<<Null>>,<<MergePhi:l\d+>>] klass:int[]
+
+  /// CHECK-DAG:     <<BoundType:l\d+>>   BoundType [<<LoopPhi>>] klass:int[] can_be_null:false
+  /// CHECK-DAG:     <<NewArray10:l\d+>>  NewArray [<<Cst10>>,<<Method>>] klass:int[]
+  /// CHECK-DAG:     <<NotNullPhi:l\d+>>  Phi [<<BoundType>>,<<NewArray10>>] klass:int[]
+
+  /// CHECK-DAG:     <<NewArray5:l\d+>>   NewArray [<<Cst5>>,<<Method>>] klass:int[]
+  /// CHECK-DAG:     <<MergePhi>>         Phi [<<NewArray5>>,<<NotNullPhi>>] klass:int[]
+
+  public static void boundTypeForIfNotNull() {
+    int[] array = null;
+    for (int i = -1; i < 10; ++i) {
+      if (array == null) {
+        array = new int[5];
+      } else {
+        if (i == 5) {
+          array = new int[10];
+        }
+        array[i] = i;
+      }
+    }
+    array.hashCode();
+  }
+
+  public static void main(String[] args) {  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/560-packed-switch/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/560-packed-switch/expected.txt
diff --git a/test/560-packed-switch/info.txt b/test/560-packed-switch/info.txt
new file mode 100644
index 0000000..41d4562
--- /dev/null
+++ b/test/560-packed-switch/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing that used to emit wrong code
+for a HPackedSwitch.
diff --git a/test/560-packed-switch/src/Main.java b/test/560-packed-switch/src/Main.java
new file mode 100644
index 0000000..3b0b425
--- /dev/null
+++ b/test/560-packed-switch/src/Main.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    switch (staticField) {
+      case -1:
+        return;
+      case -4:
+        // We add this case to make it an odd number of case/default.
+        // The code generation for it used to be bogus.
+        throw new Error("Cannot happen");
+      default:
+        throw new Error("Cannot happen");
+    }
+  }
+  static int staticField = -1;
+}
diff --git a/test/530-checker-loops/expected.txt b/test/561-divrem/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/561-divrem/expected.txt
diff --git a/test/561-divrem/info.txt b/test/561-divrem/info.txt
new file mode 100644
index 0000000..71c9601
--- /dev/null
+++ b/test/561-divrem/info.txt
@@ -0,0 +1,2 @@
+Regression test for div/rem taking Integer.MIN_VALUE and
+Long.MIN_VALUE.
diff --git a/test/561-divrem/src/Main.java b/test/561-divrem/src/Main.java
new file mode 100644
index 0000000..082783d
--- /dev/null
+++ b/test/561-divrem/src/Main.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected + ", got " + actual);
+    }
+  }
+
+  public static void assertEquals(long expected, long actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected + ", got " + actual);
+    }
+  }
+
+  public static void main(String[] args) {
+    assertEquals(0, $noinline$divInt(1));
+    assertEquals(1, $noinline$remInt(1));
+
+    assertEquals(0, $noinline$divInt(-1));
+    assertEquals(-1, $noinline$remInt(-1));
+
+    assertEquals(0, $noinline$divInt(0));
+    assertEquals(0, $noinline$remInt(0));
+
+    assertEquals(1, $noinline$divInt(Integer.MIN_VALUE));
+    assertEquals(0, $noinline$remInt(Integer.MIN_VALUE));
+
+    assertEquals(0, $noinline$divInt(Integer.MAX_VALUE));
+    assertEquals(Integer.MAX_VALUE, $noinline$remInt(Integer.MAX_VALUE));
+
+    assertEquals(0, $noinline$divInt(Integer.MAX_VALUE - 1));
+    assertEquals(Integer.MAX_VALUE - 1, $noinline$remInt(Integer.MAX_VALUE - 1));
+
+    assertEquals(0, $noinline$divInt(Integer.MIN_VALUE + 1));
+    assertEquals(Integer.MIN_VALUE + 1, $noinline$remInt(Integer.MIN_VALUE + 1));
+
+    assertEquals(0L, $noinline$divLong(1L));
+    assertEquals(1L, $noinline$remLong(1L));
+
+    assertEquals(0L, $noinline$divLong(-1L));
+    assertEquals(-1L, $noinline$remLong(-1L));
+
+    assertEquals(0L, $noinline$divLong(0L));
+    assertEquals(0L, $noinline$remLong(0L));
+
+    assertEquals(1L, $noinline$divLong(Long.MIN_VALUE));
+    assertEquals(0L, $noinline$remLong(Long.MIN_VALUE));
+
+    assertEquals(0L, $noinline$divLong(Long.MAX_VALUE));
+    assertEquals(Long.MAX_VALUE, $noinline$remLong(Long.MAX_VALUE));
+
+    assertEquals(0L, $noinline$divLong(Long.MAX_VALUE - 1));
+    assertEquals(Long.MAX_VALUE - 1, $noinline$remLong(Long.MAX_VALUE - 1));
+
+    assertEquals(0L, $noinline$divLong(Integer.MIN_VALUE + 1));
+    assertEquals(Long.MIN_VALUE + 1, $noinline$remLong(Long.MIN_VALUE + 1));
+  }
+
+  public static int $noinline$divInt(int value) {
+    if (doThrow) {
+      throw new Error("");
+    }
+    return value / Integer.MIN_VALUE;
+  }
+
+  public static int $noinline$remInt(int value) {
+    if (doThrow) {
+      throw new Error("");
+    }
+    return value % Integer.MIN_VALUE;
+  }
+
+  public static long $noinline$divLong(long value) {
+    if (doThrow) {
+      throw new Error("");
+    }
+    return value / Long.MIN_VALUE;
+  }
+
+  public static long $noinline$remLong(long value) {
+    if (doThrow) {
+      throw new Error("");
+    }
+    return value % Long.MIN_VALUE;
+  }
+
+  static boolean doThrow = false;
+}
diff --git a/test/561-shared-slowpaths/expected.txt b/test/561-shared-slowpaths/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/561-shared-slowpaths/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/561-shared-slowpaths/info.txt b/test/561-shared-slowpaths/info.txt
new file mode 100644
index 0000000..c51e70b
--- /dev/null
+++ b/test/561-shared-slowpaths/info.txt
@@ -0,0 +1 @@
+Test on correctness while possibly sharing slow paths.
diff --git a/test/561-shared-slowpaths/src/Main.java b/test/561-shared-slowpaths/src/Main.java
new file mode 100644
index 0000000..718b875
--- /dev/null
+++ b/test/561-shared-slowpaths/src/Main.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on correctness in situations where slow paths may be shared
+// (actual sharing may vary between different code generators).
+//
+//
+public class Main {
+
+  // A method with two loops that can be optimized with dynamic BCE,
+  // resulting in a two times a deopt on null, a deopt on lower OOB,
+  // and a deopt on upper OOB.
+  private static void init(int[] x, int [] y, int l1, int h1, int l2, int h2) {
+    for (int i = l1; i < h1; i++) {
+      x[i] = i;
+    }
+    for (int i = l2; i < h2; i++) {
+      y[i] = i;
+    }
+  }
+
+  // Test that each of the six possible exceptions situations for init()
+  // are correctly handled by the deopt instructions.
+  public static void main(String[] args) {
+    int[] x = new int[100];
+    int[] y = new int[100];
+    int z;
+
+    // All is well.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, y, 0, 100, 0, 100);
+    } catch (Exception e) {
+      z = 1;
+    }
+    expectEquals(z, 0);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], i);
+      expectEquals(y[i], i);
+    }
+
+    // Null deopt on x.
+    z = 0;
+    reset(x, y);
+    try {
+      init(null, y, 0, 100, 0, 100);
+    } catch (NullPointerException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], 0);
+      expectEquals(y[i], 0);
+    }
+
+    // Lower out-of-bounds on x.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, y, -1, 100, 0, 100);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], 0);
+      expectEquals(y[i], 0);
+    }
+
+    // Upper out-of-bounds on x.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, y, 0, 101, 0, 100);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], i);
+      expectEquals(y[i], 0);
+    }
+
+    // Null deopt on y.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, null, 0, 100, 0, 100);
+    } catch (NullPointerException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], i);
+      expectEquals(y[i], 0);
+    }
+
+    // Lower out-of-bounds on y.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, y, 0, 100, -1, 100);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], i);
+      expectEquals(y[i], 0);
+    }
+
+    // Upper out-of-bounds on y.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, y, 0, 100, 0, 101);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], i);
+      expectEquals(y[i], i);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void reset(int[] x, int[] y) {
+    for (int i = 0; i < x.length; i++) x[i] = 0;
+    for (int i = 0; i < y.length; i++) y[i] = 0;
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/562-bce-preheader/expected.txt b/test/562-bce-preheader/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/562-bce-preheader/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/562-bce-preheader/info.txt b/test/562-bce-preheader/info.txt
new file mode 100644
index 0000000..ae006ac
--- /dev/null
+++ b/test/562-bce-preheader/info.txt
@@ -0,0 +1 @@
+Regression test for correct placement of hoisting/deopting code.
diff --git a/test/562-bce-preheader/src/Main.java b/test/562-bce-preheader/src/Main.java
new file mode 100644
index 0000000..4397f67
--- /dev/null
+++ b/test/562-bce-preheader/src/Main.java
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /**
+   * Method with an outer countable loop and an inner do-while loop.
+   * Since all work is done in the header of the inner loop, any invariant hoisting
+   * and deopting should be done in its proper loop preheader, not the true-block
+   * of the newly generated taken-test after dynamic BCE.
+   */
+  public static int doit(int[][] x, int j) {
+    float f = 0;
+    int acc = 0;
+    for (int i = 0; i < 2; i++) {
+      // The full body of a do-while loop is the loop header.
+      do {
+        // Some "noise" to avoid hoisting the array reference
+        // before the dynamic BCE phase runs.
+        f++;
+        // The invariant array reference with corresponding bounds check
+        // is a candidate for hoisting when dynamic BCE runs. If it is
+        // not moved to the proper loop preheader, the wrong values
+        // cause the test to fail.
+        acc += x[i][i];
+      } while (++j < i);
+    }
+    return acc;
+  }
+
+  /**
+   * Single countable loop with a clear header and a loop body. In this case,
+   * after dynamic bce, some invariant hoisting and deopting must go to the
+   * proper loop preheader and some must go to the true-block.
+   */
+  public static int foo(int[] x, int[] y, int n) {
+    float f = 0;
+    int acc = 0;
+    int i = 0;
+    while (true) {
+      // This part is the loop header.
+      // Some "noise" to avoid hoisting the array reference
+      // before the dynamic BCE phase runs.
+      f++;
+      // The invariant array reference with corresponding bounds check
+      // is a candidate for hoisting when dynamic BCE runs. If it is
+      // not moved to the proper loop preheader, the wrong values
+      // cause the test to fail.
+      acc += y[0];
+      if (++i > n)
+        break;
+      // From here on, this part is the loop body.
+      // The unit-stride array reference is a candidate for dynamic BCE.
+      // The deopting appears in the true-block.
+      acc += x[i];
+    }
+    return acc;
+  }
+
+  /**
+   * An artificial example with an inconsistent phi structure during
+   * dynamic bce that is corrected afterwards. Note that only the last
+   * assignment is really live, but the other statements set up an
+   * interesting phi structure.
+   */
+  private static int doit(int[] z) {
+    int a = 0;
+    for (int i = 0; i < 10; ++i) {
+      for (int j = i; j < 10; ++j) {
+        a = z[i];
+        for (int k = 0; k < 10; ++k) {
+          a += z[k];
+          a = z[i];
+        }
+      }
+    }
+    return a;
+  }
+
+  /**
+   * Example shows that we can hoist ArrayGet to pre-header only if
+   * its execution is guaranteed.
+   */
+  public static int hoistcheck(int[] c) {
+    int i = 0, i2 = 0, i3 = 0, k = 0;
+    int n = c.length;
+    for (i = -100000000; i < 20; i += 10000000) {
+      i3 = i;
+      i2 = 0;
+      while (i2++ < 1) {
+        if (i3 >= 0 && i3 < n) {
+          k += c[i3];
+        }
+      }
+    }
+    return k;
+  }
+
+  public static void main(String args[]) {
+    int[][] x = new int[2][2];
+    int y;
+
+    x[0][0] = 1;
+    x[1][1] = 2;
+
+    expectEquals(8, doit(x, -6));
+    expectEquals(7, doit(x, -5));
+    expectEquals(6, doit(x, -4));
+    expectEquals(5, doit(x, -3));
+    expectEquals(4, doit(x, -2));
+    expectEquals(3, doit(x, -1));
+    expectEquals(3, doit(x,  0));
+    expectEquals(3, doit(x,  1));
+    expectEquals(3, doit(x, 22));
+
+    int a[] = { 1, 2, 3, 5 };
+    int b[] = { 7 };
+
+    expectEquals(7,  foo(a, b, -1));
+    expectEquals(7,  foo(a, b,  0));
+    expectEquals(16, foo(a, b,  1));
+    expectEquals(26, foo(a, b,  2));
+    expectEquals(38, foo(a, b,  3));
+
+    int[] z = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    expectEquals(10, doit(z));
+
+    int c[] = { 1, 2, 3, 5 };
+    expectEquals(1, hoistcheck(c));
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/562-no-intermediate/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/562-no-intermediate/expected.txt
diff --git a/test/562-no-intermediate/info.txt b/test/562-no-intermediate/info.txt
new file mode 100644
index 0000000..4f21aeb
--- /dev/null
+++ b/test/562-no-intermediate/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing, checking that there is no
+intermediate address between a Java call.
diff --git a/test/562-no-intermediate/src/Main.java b/test/562-no-intermediate/src/Main.java
new file mode 100644
index 0000000..3b74d6f
--- /dev/null
+++ b/test/562-no-intermediate/src/Main.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
+  /// CHECK-NOT: IntermediateAddress
+  public static void main(String[] args) {
+    array[index] += Math.cos(42);
+  }
+
+  static int index = 0;
+  static double[] array = new double[2];
+}
diff --git a/test/455-set-vreg/expected.txt b/test/563-checker-fakestring/expected.txt
similarity index 100%
copy from test/455-set-vreg/expected.txt
copy to test/563-checker-fakestring/expected.txt
diff --git a/test/563-checker-fakestring/info.txt b/test/563-checker-fakestring/info.txt
new file mode 100644
index 0000000..ef09d8c
--- /dev/null
+++ b/test/563-checker-fakestring/info.txt
@@ -0,0 +1,2 @@
+Regression test for FakeString simplification which incorrectly assumed that
+it cannot be used before a call to StringFactory.
\ No newline at end of file
diff --git a/test/563-checker-fakestring/smali/TestCase.smali b/test/563-checker-fakestring/smali/TestCase.smali
new file mode 100644
index 0000000..54312a4
--- /dev/null
+++ b/test/563-checker-fakestring/smali/TestCase.smali
@@ -0,0 +1,182 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+# Test that all vregs holding the new-instance are updated after the
+# StringFactory call.
+
+## CHECK-START: java.lang.String TestCase.vregAliasing(byte[]) register (after)
+## CHECK-DAG:                Return [<<String:l\d+>>]
+## CHECK-DAG:     <<String>> InvokeStaticOrDirect  method_name:java.lang.String.<init>
+
+.method public static vregAliasing([B)Ljava/lang/String;
+   .registers 5
+
+   # Create new instance of String and store it to v0, v1, v2.
+   new-instance v0, Ljava/lang/String;
+   move-object v1, v0
+   move-object v2, v0
+
+   # Call String.<init> on v1.
+   const-string v3, "UTF8"
+   invoke-direct {v1, p0, v3}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+
+   # Return the object from v2.
+   return-object v2
+
+.end method
+
+# Test usage of String new-instance before it is initialized.
+
+## CHECK-START: void TestCase.compareNewInstance() register (after)
+## CHECK-DAG:     <<Null:l\d+>>   NullConstant
+## CHECK-DAG:     <<String:l\d+>> NewInstance
+## CHECK-DAG:     <<Cond:z\d+>>   NotEqual [<<String>>,<<Null>>]
+## CHECK-DAG:                     If [<<Cond>>]
+
+.method public static compareNewInstance()V
+   .registers 3
+
+   new-instance v0, Ljava/lang/String;
+   if-nez v0, :return
+
+   # Will throw NullPointerException if this branch is taken.
+   const v1, 0x0
+   const-string v2, "UTF8"
+   invoke-direct {v0, v1, v2}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-void
+
+   :return
+   return-void
+
+.end method
+
+# Test deoptimization between String's allocation and initialization. When not
+# compiling --debuggable, the NewInstance will be optimized out.
+
+## CHECK-START: int TestCase.deoptimizeNewInstance(int[], byte[]) register (after)
+## CHECK:         <<Null:l\d+>>   NullConstant
+## CHECK:                         Deoptimize env:[[<<Null>>,{{.*]]}}
+## CHECK:                         InvokeStaticOrDirect method_name:java.lang.String.<init>
+
+## CHECK-START-DEBUGGABLE: int TestCase.deoptimizeNewInstance(int[], byte[]) register (after)
+## CHECK:         <<String:l\d+>> NewInstance
+## CHECK:                         Deoptimize env:[[<<String>>,{{.*]]}}
+## CHECK:                         InvokeStaticOrDirect method_name:java.lang.String.<init>
+
+.method public static deoptimizeNewInstance([I[B)I
+   .registers 6
+
+   const v2, 0x0
+   const v1, 0x1
+
+   new-instance v0, Ljava/lang/String;
+
+   # Deoptimize here if the array is too short.
+   aget v1, p0, v1
+   add-int/2addr v2, v1
+
+   # Check that we're being executed by the interpreter.
+   invoke-static {}, LMain;->assertIsInterpreted()V
+
+   # String allocation should succeed.
+   const-string v3, "UTF8"
+   invoke-direct {v0, p1, v3}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+
+   # This ArrayGet will throw ArrayIndexOutOfBoundsException.
+   const v1, 0x4
+   aget v1, p0, v1
+   add-int/2addr v2, v1
+
+   return v2
+
+.end method
+
+# Test that a redundant NewInstance is removed if not used and not compiling
+# --debuggable.
+
+## CHECK-START: java.lang.String TestCase.removeNewInstance(byte[]) register (after)
+## CHECK-NOT:     NewInstance
+## CHECK-NOT:     LoadClass
+
+## CHECK-START-DEBUGGABLE: java.lang.String TestCase.removeNewInstance(byte[]) register (after)
+## CHECK:         NewInstance
+
+.method public static removeNewInstance([B)Ljava/lang/String;
+   .registers 5
+
+   new-instance v0, Ljava/lang/String;
+   const-string v1, "UTF8"
+   invoke-direct {v0, p0, v1}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-object v0
+
+.end method
+
+# Test that the compiler does not assume that the first argument of String.<init>
+# is a NewInstance by inserting an irreducible loop between them (b/26676472).
+
+# We verify the type of the input instruction (Phi) in debuggable mode, because
+# it is eliminated by later stages of SsaBuilder otherwise.
+
+## CHECK-START-DEBUGGABLE: java.lang.String TestCase.thisNotNewInstance1(byte[], boolean) register (after)
+## CHECK-DAG:                   InvokeStaticOrDirect env:[[<<Phi:l\d+>>,{{.*]]}}
+## CHECK-DAG:     <<Phi>>       Phi
+
+.method public static thisNotNewInstance1([BZ)Ljava/lang/String;
+   .registers 5
+
+   new-instance v0, Ljava/lang/String;
+
+   # Irreducible loop
+   if-eqz p1, :loop_entry
+   :loop_header
+   const v1, 0x1
+   xor-int p1, p1, v1
+   :loop_entry
+   if-eqz p1, :string_init
+   goto :loop_header
+
+   :string_init
+   const-string v1, "UTF8"
+   invoke-direct {v0, p0, v1}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-object v0
+
+.end method
+
+## CHECK-START-DEBUGGABLE: java.lang.String TestCase.thisNotNewInstance2(byte[], boolean) register (after)
+## CHECK-DAG:                   InvokeStaticOrDirect env:[[<<Phi:l\d+>>,{{.*]]}}
+## CHECK-DAG:     <<Phi>>       Phi
+
+.method public static thisNotNewInstance2([BZ)Ljava/lang/String;
+   .registers 5
+
+   new-instance v0, Ljava/lang/String;
+
+   # Irreducible loop
+   if-eqz p1, :loop_entry
+   :loop_header
+   if-eqz p1, :string_init
+   :loop_entry
+   const v1, 0x1
+   xor-int p1, p1, v1
+   goto :loop_header
+
+   :string_init
+   const-string v1, "UTF8"
+   invoke-direct {v0, p0, v1}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-object v0
+
+.end method
diff --git a/test/563-checker-fakestring/src/Main.java b/test/563-checker-fakestring/src/Main.java
new file mode 100644
index 0000000..1ac8a5b
--- /dev/null
+++ b/test/563-checker-fakestring/src/Main.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+public class Main {
+  // Workaround for b/18051191.
+  class Inner {}
+
+  public static native void assertIsInterpreted();
+
+  private static void assertEqual(String expected, String actual) {
+    if (!expected.equals(actual)) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  public static void main(String[] args) throws Throwable {
+    System.loadLibrary(args[0]);
+    Class<?> c = Class.forName("TestCase");
+    String testString = "Hello world";
+    byte[] testData = testString.getBytes("UTF8");
+
+    {
+      Method m = c.getMethod("vregAliasing", byte[].class);
+      String result = (String) m.invoke(null, new Object[] { testData });
+      assertEqual(testString, result);
+    }
+
+    {
+      c.getMethod("compareNewInstance").invoke(null, (Object[]) null);
+    }
+
+    {
+      Method m = c.getMethod("deoptimizeNewInstance", int[].class, byte[].class);
+      try {
+        m.invoke(null, new Object[] { new int[] { 1, 2, 3 }, testData });
+      } catch (InvocationTargetException ex) {
+        if (ex.getCause() instanceof ArrayIndexOutOfBoundsException) {
+          // Expected.
+        } else {
+          throw ex.getCause();
+        }
+      }
+    }
+
+    {
+      Method m = c.getMethod("removeNewInstance", byte[].class);
+      String result = (String) m.invoke(null, new Object[] { testData });
+      assertEqual(testString, result);
+    }
+
+    {
+      Method m = c.getMethod("thisNotNewInstance1", byte[].class, boolean.class);
+      String result = (String) m.invoke(null, new Object[] { testData, true });
+      assertEqual(testString, result);
+      result = (String) m.invoke(null, new Object[] { testData, false });
+      assertEqual(testString, result);
+    }
+    {
+      Method m = c.getMethod("thisNotNewInstance2", byte[].class, boolean.class);
+      String result = (String) m.invoke(null, new Object[] { testData, true });
+      assertEqual(testString, result);
+      result = (String) m.invoke(null, new Object[] { testData, false });
+      assertEqual(testString, result);
+    }
+  }
+}
diff --git a/test/563-checker-invoke-super/build b/test/563-checker-invoke-super/build
new file mode 100755
index 0000000..32f84ef
--- /dev/null
+++ b/test/563-checker-invoke-super/build
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Make us exit on a failure.
+#
+set -e
+
+# Hard-wired use of experimental jack.
+# TODO: fix this temporary work-around for lambdas, see b/19467889
+export USE_JACK=true
+
+./default-build "$@" --experimental default-methods
diff --git a/test/530-checker-loops/expected.txt b/test/563-checker-invoke-super/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/563-checker-invoke-super/expected.txt
diff --git a/test/563-checker-invoke-super/info.txt b/test/563-checker-invoke-super/info.txt
new file mode 100644
index 0000000..23c0d2f
--- /dev/null
+++ b/test/563-checker-invoke-super/info.txt
@@ -0,0 +1,2 @@
+Tests that invoke-super's to interface methods are optimized to direct method
+calls when in the same dex file.
diff --git a/test/563-checker-invoke-super/src/Main.java b/test/563-checker-invoke-super/src/Main.java
new file mode 100644
index 0000000..8554dbd
--- /dev/null
+++ b/test/563-checker-invoke-super/src/Main.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+interface IFace {
+  public default void $noinline$aMethod() { throw new RuntimeException("Should not be called"); }
+}
+
+class ClassImplA implements IFace {
+  /// CHECK-START: void ClassImplA.testSuperInvoke() builder (after)
+  /// CHECK:                       InvokeStaticOrDirect
+  public void testSuperInvoke() {
+    IFace.super.$noinline$aMethod();
+  }
+}
+
+class ClassImplB extends ClassImplA {
+  /// CHECK-START: void ClassImplB.testSuperInvoke2() builder (after)
+  /// CHECK:                       InvokeStaticOrDirect
+  public void testSuperInvoke2() {
+    super.$noinline$aMethod();
+  }
+}
+
+public class Main {
+  public static void main(String[] args) { }
+}
diff --git a/test/564-checker-bitcount/expected.txt b/test/564-checker-bitcount/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/564-checker-bitcount/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/564-checker-bitcount/info.txt b/test/564-checker-bitcount/info.txt
new file mode 100644
index 0000000..57db66b
--- /dev/null
+++ b/test/564-checker-bitcount/info.txt
@@ -0,0 +1 @@
+Unit test for 32-bit and 64-bit bit count operation.
diff --git a/test/564-checker-bitcount/src/Main.java b/test/564-checker-bitcount/src/Main.java
new file mode 100644
index 0000000..aad9689
--- /dev/null
+++ b/test/564-checker-bitcount/src/Main.java
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // TODO: make something like this work when b/26700769 is done.
+  // CHECK-START-X86_64: int Main.bits32(int) disassembly (after)
+  // CHECK-DAG: popcnt
+
+
+  /// CHECK-START: int Main.$noinline$BitCountBoolean(boolean) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:IntegerBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountBoolean(boolean x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
+    return Integer.bitCount(x ? 1 : 0);
+  }
+
+  /// CHECK-START: int Main.$noinline$BitCountByte(byte) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:IntegerBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountByte(byte x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
+    return Integer.bitCount(x);
+  }
+
+  /// CHECK-START: int Main.$noinline$BitCountShort(short) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:IntegerBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountShort(short x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
+    return Integer.bitCount(x);
+  }
+
+  /// CHECK-START: int Main.$noinline$BitCountChar(char) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:IntegerBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountChar(char x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
+    return Integer.bitCount(x);
+  }
+
+  /// CHECK-START: int Main.$noinline$BitCountInt(int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:IntegerBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountInt(int x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
+    return Integer.bitCount(x);
+  }
+
+  /// CHECK-START: int Main.$noinline$BitCountLong(long) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:LongBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountLong(long x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
+    return Long.bitCount(x);
+  }
+
+  public static void testBitCountBoolean() {
+    expectEqualsInt($noinline$BitCountBoolean(false), 0);
+    expectEqualsInt($noinline$BitCountBoolean(true), 1);
+  }
+
+  public static void testBitCountByte() {
+    // Number of bits in an 32-bit integer representing the sign
+    // extension of a byte value widened to an int.
+    int signExtensionSize = Integer.SIZE - Byte.SIZE;
+    // Sign bit position in a byte.
+    int signBit = Byte.SIZE - 1;
+
+    expectEqualsInt($noinline$BitCountByte((byte) 0x00), 0);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x01), 1);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x10), 1);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x11), 2);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x03), 2);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x70), 3);
+    expectEqualsInt($noinline$BitCountByte((byte) 0xF0), 4 + signExtensionSize);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x0F), 4);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x12), 2);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x9A), 4 + signExtensionSize);
+    expectEqualsInt($noinline$BitCountByte((byte) 0xFF), 8 + signExtensionSize);
+
+    for (int i = 0; i < Byte.SIZE; i++) {
+      expectEqualsInt($noinline$BitCountByte((byte) (1 << i)),
+                      (i < signBit) ? 1 : 1 + signExtensionSize);
+    }
+  }
+
+  public static void testBitCountShort() {
+    // Number of bits in an 32-bit integer representing the sign
+    // extension of a short value widened to an int.
+    int signExtensionSize = Integer.SIZE - Short.SIZE;
+    // Sign bit position in a short.
+    int signBit = Short.SIZE - 1;
+
+    expectEqualsInt($noinline$BitCountShort((short) 0x0000), 0);
+    expectEqualsInt($noinline$BitCountShort((short) 0x0001), 1);
+    expectEqualsInt($noinline$BitCountShort((short) 0x1000), 1);
+    expectEqualsInt($noinline$BitCountShort((short) 0x1001), 2);
+    expectEqualsInt($noinline$BitCountShort((short) 0x0003), 2);
+    expectEqualsInt($noinline$BitCountShort((short) 0x7000), 3);
+    expectEqualsInt($noinline$BitCountShort((short) 0x0F00), 4);
+    expectEqualsInt($noinline$BitCountShort((short) 0x0011), 2);
+    expectEqualsInt($noinline$BitCountShort((short) 0x1100), 2);
+    expectEqualsInt($noinline$BitCountShort((short) 0x1111), 4);
+    expectEqualsInt($noinline$BitCountShort((short) 0x1234), 5);
+    expectEqualsInt($noinline$BitCountShort((short) 0x9ABC), 9 + signExtensionSize);
+    expectEqualsInt($noinline$BitCountShort((short) 0xFFFF), 16 + signExtensionSize);
+
+    for (int i = 0; i < Short.SIZE; i++) {
+      expectEqualsInt($noinline$BitCountShort((short) (1 << i)),
+                      (i < signBit) ? 1 : 1 + signExtensionSize);
+    }
+  }
+
+  public static void testBitCountChar() {
+    expectEqualsInt($noinline$BitCountChar((char) 0x0000), 0);
+    expectEqualsInt($noinline$BitCountChar((char) 0x0001), 1);
+    expectEqualsInt($noinline$BitCountChar((char) 0x1000), 1);
+    expectEqualsInt($noinline$BitCountChar((char) 0x1001), 2);
+    expectEqualsInt($noinline$BitCountChar((char) 0x0003), 2);
+    expectEqualsInt($noinline$BitCountChar((char) 0x7000), 3);
+    expectEqualsInt($noinline$BitCountChar((char) 0x0F00), 4);
+    expectEqualsInt($noinline$BitCountChar((char) 0x0011), 2);
+    expectEqualsInt($noinline$BitCountChar((char) 0x1100), 2);
+    expectEqualsInt($noinline$BitCountChar((char) 0x1111), 4);
+    expectEqualsInt($noinline$BitCountChar((char) 0x1234), 5);
+    expectEqualsInt($noinline$BitCountChar((char) 0x9ABC), 9);
+    expectEqualsInt($noinline$BitCountChar((char) 0xFFFF), 16);
+
+    for (int i = 0; i < Character.SIZE; i++) {
+      expectEqualsInt($noinline$BitCountChar((char) (1 << i)), 1);
+    }
+  }
+
+  public static void testBitCountInt() {
+    expectEqualsInt($noinline$BitCountInt(0x00000000), 0);
+    expectEqualsInt($noinline$BitCountInt(0x00000001), 1);
+    expectEqualsInt($noinline$BitCountInt(0x10000000), 1);
+    expectEqualsInt($noinline$BitCountInt(0x10000001), 2);
+    expectEqualsInt($noinline$BitCountInt(0x00000003), 2);
+    expectEqualsInt($noinline$BitCountInt(0x70000000), 3);
+    expectEqualsInt($noinline$BitCountInt(0x000F0000), 4);
+    expectEqualsInt($noinline$BitCountInt(0x00001111), 4);
+    expectEqualsInt($noinline$BitCountInt(0x11110000), 4);
+    expectEqualsInt($noinline$BitCountInt(0x11111111), 8);
+    expectEqualsInt($noinline$BitCountInt(0x12345678), 13);
+    expectEqualsInt($noinline$BitCountInt(0x9ABCDEF0), 19);
+    expectEqualsInt($noinline$BitCountInt(0xFFFFFFFF), 32);
+
+    for (int i = 0; i < Integer.SIZE; i++) {
+      expectEqualsInt($noinline$BitCountInt(1 << i), 1);
+    }
+  }
+
+  public static void testBitCountLong() {
+    expectEqualsInt($noinline$BitCountLong(0x0000000000000000L), 0);
+    expectEqualsInt($noinline$BitCountLong(0x0000000000000001L), 1);
+    expectEqualsInt($noinline$BitCountLong(0x1000000000000000L), 1);
+    expectEqualsInt($noinline$BitCountLong(0x1000000000000001L), 2);
+    expectEqualsInt($noinline$BitCountLong(0x0000000000000003L), 2);
+    expectEqualsInt($noinline$BitCountLong(0x7000000000000000L), 3);
+    expectEqualsInt($noinline$BitCountLong(0x000F000000000000L), 4);
+    expectEqualsInt($noinline$BitCountLong(0x0000000011111111L), 8);
+    expectEqualsInt($noinline$BitCountLong(0x1111111100000000L), 8);
+    expectEqualsInt($noinline$BitCountLong(0x1111111111111111L), 16);
+    expectEqualsInt($noinline$BitCountLong(0x123456789ABCDEF1L), 33);
+    expectEqualsInt($noinline$BitCountLong(0xFFFFFFFFFFFFFFFFL), 64);
+
+    for (int i = 0; i < Long.SIZE; i++) {
+      expectEqualsInt($noinline$BitCountLong(1L << i), 1);
+    }
+  }
+
+  public static void main(String args[]) {
+    testBitCountBoolean();
+    testBitCountByte();
+    testBitCountShort();
+    testBitCountChar();
+    testBitCountInt();
+    testBitCountLong();
+
+    System.out.println("passed");
+  }
+
+  private static void expectEqualsInt(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static boolean doThrow = false;
+}
diff --git a/test/530-checker-loops/expected.txt b/test/564-checker-inline-loop/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/564-checker-inline-loop/expected.txt
diff --git a/test/564-checker-inline-loop/info.txt b/test/564-checker-inline-loop/info.txt
new file mode 100644
index 0000000..a590bc6
--- /dev/null
+++ b/test/564-checker-inline-loop/info.txt
@@ -0,0 +1 @@
+Tests inlining of loops in the optimizing compiler.
diff --git a/test/564-checker-inline-loop/src/Main.java b/test/564-checker-inline-loop/src/Main.java
new file mode 100644
index 0000000..6929913
--- /dev/null
+++ b/test/564-checker-inline-loop/src/Main.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.inlineLoop() inliner (before)
+  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  /// CHECK-DAG:                      Return [<<Invoke>>]
+
+  /// CHECK-START: int Main.inlineLoop() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  /// CHECK-START: int Main.inlineLoop() inliner (after)
+  /// CHECK-DAG:     <<Constant:i\d+>>   IntConstant 42
+  /// CHECK-DAG:                         Return [<<Constant>>]
+
+  /// CHECK-START: int Main.inlineLoop() licm (after)
+  /// CHECK:                         Goto loop:{{B\d+}}
+
+  public static int inlineLoop() {
+    return loopMethod();
+  }
+
+  /// CHECK-START: void Main.inlineWithinLoop() inliner (before)
+  /// CHECK:      InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.inlineWithinLoop() inliner (after)
+  /// CHECK-NOT:  InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.inlineWithinLoop() licm (after)
+  /// CHECK-DAG:  Goto loop:<<OuterLoop:B\d+>> outer_loop:none
+  /// CHECK-DAG:  Goto outer_loop:<<OuterLoop>>
+
+  public static void inlineWithinLoop() {
+    while (doLoop) {
+      loopMethod();
+    }
+  }
+
+  public static int loopMethod() {
+    while (doLoop) {}
+    return 42;
+  }
+
+  public static boolean doLoop = false;
+
+  public static void main(String[] args) {
+    inlineLoop();
+    inlineWithinLoop();
+  }
+}
diff --git a/test/564-checker-irreducible-loop/expected.txt b/test/564-checker-irreducible-loop/expected.txt
new file mode 100644
index 0000000..d81cc07
--- /dev/null
+++ b/test/564-checker-irreducible-loop/expected.txt
@@ -0,0 +1 @@
+42
diff --git a/test/564-checker-irreducible-loop/info.txt b/test/564-checker-irreducible-loop/info.txt
new file mode 100644
index 0000000..1e0dd02
--- /dev/null
+++ b/test/564-checker-irreducible-loop/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing in the presence of
+an irreducible loop.
diff --git a/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..75344f7
--- /dev/null
+++ b/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -0,0 +1,61 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+## CHECK-START-X86: int IrreducibleLoop.simpleLoop(int) dead_code_elimination$initial (before)
+## CHECK-DAG: <<Method:(i|j)\d+>> CurrentMethod
+## CHECK-DAG: <<Constant:i\d+>>   IntConstant 42
+## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>,<<Method>>] loop:{{B\d+}} irreducible:true
+## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>,<<Method>>] loop:none
+.method public static simpleLoop(I)I
+   .registers 3
+   const/16 v0, 42
+   if-eqz p0, :loop_entry
+   goto :other_loop_pre_entry
+
+   # The then part: beginning of the irreducible loop.
+   :loop_entry
+   if-nez p0, :exit
+   invoke-static {v0},LIrreducibleLoop;->$noinline$m(I)V
+   :other_loop_entry
+   goto :loop_entry
+
+   # The else part: a block uses the ArtMethod and branches to
+   # a block that doesn't. The register allocator used to trip there, as the
+   # ArtMethod was a live_in of the last block before the loop, but did not have
+   # a location due to our liveness analysis.
+   :other_loop_pre_entry
+   if-eqz p0, :other_loop_entry
+   invoke-static {v0},LIrreducibleLoop;->$noinline$m(I)V
+   goto :other_loop_entry
+
+   :exit
+   return v0
+.end method
+
+.method public static $noinline$m(I)V
+   .registers 3
+   const/16 v0, 0
+   sget-boolean v1,LIrreducibleLoop;->doThrow:Z
+   if-eqz v1, :exit
+   # Prevent inlining.
+   throw v0
+   :exit
+   return-void
+.end method
+
+.field public static doThrow:Z
diff --git a/test/564-checker-irreducible-loop/src/Main.java b/test/564-checker-irreducible-loop/src/Main.java
new file mode 100644
index 0000000..94e3357
--- /dev/null
+++ b/test/564-checker-irreducible-loop/src/Main.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    Method m = c.getMethod("simpleLoop", int.class);
+    Object[] arguments = { 42 };
+    System.out.println(m.invoke(null, arguments));
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/564-checker-negbitwise/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/564-checker-negbitwise/expected.txt
diff --git a/test/564-checker-negbitwise/info.txt b/test/564-checker-negbitwise/info.txt
new file mode 100644
index 0000000..28b9e9e
--- /dev/null
+++ b/test/564-checker-negbitwise/info.txt
@@ -0,0 +1 @@
+Test negated bitwise operations simplification on ARM64.
diff --git a/test/564-checker-negbitwise/src/Main.java b/test/564-checker-negbitwise/src/Main.java
new file mode 100644
index 0000000..ccb8ff4
--- /dev/null
+++ b/test/564-checker-negbitwise/src/Main.java
@@ -0,0 +1,300 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // A dummy value to defeat inlining of these routines.
+  static boolean doThrow = false;
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /**
+   * Test merging of `NOT+AND` into `BIC`.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notAnd(int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op:i\d+>>          And [<<Base>>,<<Not>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notAnd(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<NegOp:i\d+>>       BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:And
+  /// CHECK:                            Return [<<NegOp>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notAnd(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Not
+  /// CHECK-NOT:                        And
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notAnd(int, int) disassembly (after)
+  /// CHECK:                            bic w{{\d+}}, w{{\d+}}, w{{\d+}}
+
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAnd(int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op:i\d+>>          And [<<Base>>,<<Not>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAnd(int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<NegOp:i\d+>>       BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:And
+  /// CHECK:                            Return [<<NegOp>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAnd(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Not
+  /// CHECK-NOT:                        And
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAnd(int, int) disassembly (after)
+  /// CHECK:                            bic.w r{{\d+}}, r{{\d+}}, r{{\d+}}
+
+  public static int $opt$noinline$notAnd(int base, int mask) {
+    if (doThrow) throw new Error();
+    return base & ~mask;
+  }
+
+  /**
+   * Test merging of `NOT+ORR` into `ORN`.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$notOr(long, long) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Base:j\d+>>        ParameterValue
+  /// CHECK:       <<Mask:j\d+>>        ParameterValue
+  /// CHECK:       <<Not:j\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op:j\d+>>          Or [<<Base>>,<<Not>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$notOr(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Base:j\d+>>        ParameterValue
+  /// CHECK:       <<Mask:j\d+>>        ParameterValue
+  /// CHECK:       <<NegOp:j\d+>>       BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:Or
+  /// CHECK:                            Return [<<NegOp>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$notOr(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Not
+  /// CHECK-NOT:                        Or
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$notOr(long, long) disassembly (after)
+  /// CHECK:                            orn x{{\d+}}, x{{\d+}}, x{{\d+}}
+
+
+  /// CHECK-START-ARM:   long Main.$opt$noinline$notOr(long, long) instruction_simplifier_arm (before)
+  /// CHECK:       <<Base:j\d+>>        ParameterValue
+  /// CHECK:       <<Mask:j\d+>>        ParameterValue
+  /// CHECK:       <<Not:j\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op:j\d+>>          Or [<<Base>>,<<Not>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM:   long Main.$opt$noinline$notOr(long, long) instruction_simplifier_arm (after)
+  /// CHECK:       <<Base:j\d+>>        ParameterValue
+  /// CHECK:       <<Mask:j\d+>>        ParameterValue
+  /// CHECK:       <<NegOp:j\d+>>       BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:Or
+  /// CHECK:                            Return [<<NegOp>>]
+
+  /// CHECK-START-ARM:   long Main.$opt$noinline$notOr(long, long) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Not
+  /// CHECK-NOT:                        Or
+
+  /// CHECK-START-ARM:   long Main.$opt$noinline$notOr(long, long) disassembly (after)
+  /// CHECK:                            orn.w r{{\d+}}, r{{\d+}}, r{{\d+}}
+
+  public static long $opt$noinline$notOr(long base, long mask) {
+    if (doThrow) throw new Error();
+    return base | ~mask;
+  }
+
+  /**
+   * Test merging of `NOT+EOR` into `EON`.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notXor(int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op:i\d+>>          Xor [<<Base>>,<<Not>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notXor(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<NegOp:i\d+>>       BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:Xor
+  /// CHECK:                            Return [<<NegOp>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notXor(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Not
+  /// CHECK-NOT:                        Xor
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notXor(int, int) disassembly (after)
+  /// CHECK:                            eon w{{\d+}}, w{{\d+}}, w{{\d+}}
+
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notXor(int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op:i\d+>>          Xor [<<Base>>,<<Not>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notXor(int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op:i\d+>>          Xor [<<Base>>,<<Not>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notXor(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        BitwiseNegatedRight
+
+  public static int $opt$noinline$notXor(int base, int mask) {
+    if (doThrow) throw new Error();
+    return base ^ ~mask;
+  }
+
+  /**
+   * Check that transformation is done when the argument is a constant.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notAndConstant(int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Constant:i\d+>>    IntConstant
+  /// CHECK:       <<Not:i\d+>>         Not [<<Base>>]
+  /// CHECK:       <<Op:i\d+>>          And [<<Not>>,<<Constant>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notAndConstant(int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Constant:i\d+>>    IntConstant
+  /// CHECK:       <<NegOp:i\d+>>       BitwiseNegatedRight [<<Constant>>,<<Base>>] kind:And
+  /// CHECK:                            Return [<<NegOp>>]
+
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAndConstant(int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Constant:i\d+>>    IntConstant
+  /// CHECK:       <<Not:i\d+>>         Not [<<Base>>]
+  /// CHECK:       <<Op:i\d+>>          And [<<Not>>,<<Constant>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAndConstant(int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Constant:i\d+>>    IntConstant
+  /// CHECK:       <<NegOp:i\d+>>       BitwiseNegatedRight [<<Constant>>,<<Base>>] kind:And
+  /// CHECK:                            Return [<<NegOp>>]
+
+  public static int $opt$noinline$notAndConstant(int mask) {
+    if (doThrow) throw new Error();
+    return 0xf & ~mask;
+  }
+
+  /**
+   * Check that no transformation is done when Not has multiple uses.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notAndMultipleUses(int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<One:i\d+>>         IntConstant
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op1:i\d+>>         And [<<Not>>,<<One>>]
+  /// CHECK:       <<Op2:i\d+>>         And [<<Base>>,<<Not>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Op1>>,<<Op2>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notAndMultipleUses(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<One:i\d+>>         IntConstant
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op1:i\d+>>         And [<<Not>>,<<One>>]
+  /// CHECK:       <<Op2:i\d+>>         And [<<Base>>,<<Not>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Op1>>,<<Op2>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notAndMultipleUses(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        BitwiseNegatedRight
+
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAndMultipleUses(int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<One:i\d+>>         IntConstant
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op1:i\d+>>         And [<<Not>>,<<One>>]
+  /// CHECK:       <<Op2:i\d+>>         And [<<Base>>,<<Not>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Op1>>,<<Op2>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAndMultipleUses(int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<One:i\d+>>         IntConstant
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op1:i\d+>>         And [<<Not>>,<<One>>]
+  /// CHECK:       <<Op2:i\d+>>         And [<<Base>>,<<Not>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Op1>>,<<Op2>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAndMultipleUses(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        BitwiseNegatedRight
+
+  public static int $opt$noinline$notAndMultipleUses(int base, int mask) {
+    if (doThrow) throw new Error();
+    int tmp = ~mask;
+    return (tmp & 0x1) + (base & tmp);
+  }
+
+  /**
+   * Check that no transformation is done when both inputs are Not's.
+   */
+
+  // We don't check the instructions before the pass, since if De Morgan's laws
+  // have been applied then Not/Not/Or is replaced by And/Not.
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$deMorganOr(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        BitwiseNegatedRight
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$deMorganOr(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        BitwiseNegatedRight
+
+  public static int $opt$noinline$deMorganOr(int a, int b) {
+    if (doThrow) throw new Error();
+    return ~a | ~b;
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(0xe,   $opt$noinline$notAnd(0xf, 0x1));
+    assertLongEquals(~0x0, $opt$noinline$notOr(0xf, 0x1));
+    assertIntEquals(~0xe,  $opt$noinline$notXor(0xf, 0x1));
+    assertIntEquals(0xe,  $opt$noinline$notAndConstant(0x1));
+    assertIntEquals(0xe,   $opt$noinline$notAndMultipleUses(0xf, 0x1));
+    assertIntEquals(~0x1,  $opt$noinline$deMorganOr(0x3, 0x1));
+  }
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/565-checker-condition-liveness/expected.txt
similarity index 100%
rename from test/525-checker-arrays-and-fields/expected.txt
rename to test/565-checker-condition-liveness/expected.txt
diff --git a/test/565-checker-condition-liveness/info.txt b/test/565-checker-condition-liveness/info.txt
new file mode 100644
index 0000000..67b6ceb
--- /dev/null
+++ b/test/565-checker-condition-liveness/info.txt
@@ -0,0 +1 @@
+Test the use positions of inputs of non-materialized conditions.
\ No newline at end of file
diff --git a/test/565-checker-condition-liveness/src/Main.java b/test/565-checker-condition-liveness/src/Main.java
new file mode 100644
index 0000000..acfcecd
--- /dev/null
+++ b/test/565-checker-condition-liveness/src/Main.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START-X86: int Main.p(float) liveness (after)
+  /// CHECK:         <<Arg:f\d+>>  ParameterValue uses:[<<UseInput:\d+>>]
+  /// CHECK-DAG:     <<Five:f\d+>> FloatConstant 5 uses:[<<UseInput>>]
+  /// CHECK-DAG:     <<Zero:i\d+>> IntConstant 0
+  /// CHECK-DAG:     <<MinusOne:i\d+>> IntConstant -1 uses:[<<UseInput>>]
+  /// CHECK:         <<Base:i\d+>> X86ComputeBaseMethodAddress uses:[<<UseInput>>]
+  /// CHECK-NEXT:    <<Load:f\d+>> X86LoadFromConstantTable [<<Base>>,<<Five>>]
+  /// CHECK-NEXT:    <<Cond:z\d+>> LessThanOrEqual [<<Arg>>,<<Load>>]
+  /// CHECK-NEXT:                  Select [<<Zero>>,<<MinusOne>>,<<Cond>>] liveness:<<LivSel:\d+>>
+  /// CHECK-EVAL:    <<UseInput>> == <<LivSel>> + 1
+
+  public static int p(float arg) {
+    return (arg > 5.0f) ? 0 : -1;
+  }
+
+  /// CHECK-START: void Main.main(java.lang.String[]) liveness (after)
+  /// CHECK:         <<X:i\d+>>    ArrayLength uses:[<<UseInput:\d+>>]
+  /// CHECK:         <<Y:i\d+>>    StaticFieldGet uses:[<<UseInput>>]
+  /// CHECK:         <<Cond:z\d+>> LessThanOrEqual [<<X>>,<<Y>>]
+  /// CHECK-NEXT:                  If [<<Cond>>] liveness:<<LivIf:\d+>>
+  /// CHECK-EVAL:    <<UseInput>> == <<LivIf>> + 1
+
+  public static void main(String[] args) {
+    int x = args.length;
+    int y = field;
+    if (x > y) {
+      System.nanoTime();
+    }
+  }
+
+  public static int field = 42;
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/565-checker-doublenegbitwise/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/565-checker-doublenegbitwise/expected.txt
diff --git a/test/565-checker-doublenegbitwise/info.txt b/test/565-checker-doublenegbitwise/info.txt
new file mode 100644
index 0000000..cbe183c
--- /dev/null
+++ b/test/565-checker-doublenegbitwise/info.txt
@@ -0,0 +1 @@
+Test double-negated bitwise operations simplifications.
diff --git a/test/565-checker-doublenegbitwise/src/Main.java b/test/565-checker-doublenegbitwise/src/Main.java
new file mode 100644
index 0000000..811c280
--- /dev/null
+++ b/test/565-checker-doublenegbitwise/src/Main.java
@@ -0,0 +1,314 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // A dummy value to defeat inlining of these routines.
+  static boolean doThrow = false;
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /**
+   * Test transformation of Not/Not/And into Or/Not.
+   */
+
+  /// CHECK-START: int Main.$opt$noinline$andToOr(int, int) instruction_simplifier (before)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<Not1:i\d+>>        Not [<<P1>>]
+  /// CHECK:       <<Not2:i\d+>>        Not [<<P2>>]
+  /// CHECK:       <<And:i\d+>>         And [<<Not1>>,<<Not2>>]
+  /// CHECK:                            Return [<<And>>]
+
+  /// CHECK-START: int Main.$opt$noinline$andToOr(int, int) instruction_simplifier (after)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<Or:i\d+>>          Or [<<P1>>,<<P2>>]
+  /// CHECK:       <<Not:i\d+>>         Not [<<Or>>]
+  /// CHECK:                            Return [<<Not>>]
+
+  /// CHECK-START: int Main.$opt$noinline$andToOr(int, int) instruction_simplifier (after)
+  /// CHECK:                            Not
+  /// CHECK-NOT:                        Not
+
+  /// CHECK-START: int Main.$opt$noinline$andToOr(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:                        And
+
+  public static int $opt$noinline$andToOr(int a, int b) {
+    if (doThrow) throw new Error();
+    return ~a & ~b;
+  }
+
+  /**
+   * Test transformation of Not/Not/And into Or/Not for boolean negations.
+   * Note that the graph before this instruction simplification pass does not
+   * contain `HBooleanNot` instructions. This is because this transformation
+   * follows the optimization of `HSelect` to `HBooleanNot` occurring in the
+   * same pass.
+   */
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier$after_bce (before)
+  /// CHECK:       <<P1:z\d+>>          ParameterValue
+  /// CHECK:       <<P2:z\d+>>          ParameterValue
+  /// CHECK-DAG:   <<Const0:i\d+>>      IntConstant 0
+  /// CHECK-DAG:   <<Const1:i\d+>>      IntConstant 1
+  /// CHECK:       <<Select1:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P1>>]
+  /// CHECK:       <<Select2:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P2>>]
+  /// CHECK:       <<And:i\d+>>         And [<<Select2>>,<<Select1>>]
+  /// CHECK:                            Return [<<And>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier$after_bce (after)
+  /// CHECK:       <<Cond1:z\d+>>       ParameterValue
+  /// CHECK:       <<Cond2:z\d+>>       ParameterValue
+  /// CHECK:       <<Or:i\d+>>          Or [<<Cond2>>,<<Cond1>>]
+  /// CHECK:       <<BooleanNot:z\d+>>  BooleanNot [<<Or>>]
+  /// CHECK:                            Return [<<BooleanNot>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier$after_bce (after)
+  /// CHECK:                            BooleanNot
+  /// CHECK-NOT:                        BooleanNot
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:                        And
+
+  public static boolean $opt$noinline$booleanAndToOr(boolean a, boolean b) {
+    if (doThrow) throw new Error();
+    return !a & !b;
+  }
+
+  /**
+   * Test transformation of Not/Not/Or into And/Not.
+   */
+
+  /// CHECK-START: long Main.$opt$noinline$orToAnd(long, long) instruction_simplifier (before)
+  /// CHECK:       <<P1:j\d+>>          ParameterValue
+  /// CHECK:       <<P2:j\d+>>          ParameterValue
+  /// CHECK:       <<Not1:j\d+>>        Not [<<P1>>]
+  /// CHECK:       <<Not2:j\d+>>        Not [<<P2>>]
+  /// CHECK:       <<Or:j\d+>>          Or [<<Not1>>,<<Not2>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START: long Main.$opt$noinline$orToAnd(long, long) instruction_simplifier (after)
+  /// CHECK:       <<P1:j\d+>>          ParameterValue
+  /// CHECK:       <<P2:j\d+>>          ParameterValue
+  /// CHECK:       <<And:j\d+>>         And [<<P1>>,<<P2>>]
+  /// CHECK:       <<Not:j\d+>>         Not [<<And>>]
+  /// CHECK:                            Return [<<Not>>]
+
+  /// CHECK-START: long Main.$opt$noinline$orToAnd(long, long) instruction_simplifier (after)
+  /// CHECK:                            Not
+  /// CHECK-NOT:                        Not
+
+  /// CHECK-START: long Main.$opt$noinline$orToAnd(long, long) instruction_simplifier (after)
+  /// CHECK-NOT:                        Or
+
+  public static long $opt$noinline$orToAnd(long a, long b) {
+    if (doThrow) throw new Error();
+    return ~a | ~b;
+  }
+
+  /**
+   * Test transformation of Not/Not/Or into Or/And for boolean negations.
+   * Note that the graph before this instruction simplification pass does not
+   * contain `HBooleanNot` instructions. This is because this transformation
+   * follows the optimization of `HSelect` to `HBooleanNot` occurring in the
+   * same pass.
+   */
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier$after_bce (before)
+  /// CHECK:       <<P1:z\d+>>          ParameterValue
+  /// CHECK:       <<P2:z\d+>>          ParameterValue
+  /// CHECK-DAG:   <<Const0:i\d+>>      IntConstant 0
+  /// CHECK-DAG:   <<Const1:i\d+>>      IntConstant 1
+  /// CHECK:       <<Select1:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P1>>]
+  /// CHECK:       <<Select2:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P2>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Select2>>,<<Select1>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier$after_bce (after)
+  /// CHECK:       <<Cond1:z\d+>>       ParameterValue
+  /// CHECK:       <<Cond2:z\d+>>       ParameterValue
+  /// CHECK:       <<And:i\d+>>         And [<<Cond2>>,<<Cond1>>]
+  /// CHECK:       <<BooleanNot:z\d+>>  BooleanNot [<<And>>]
+  /// CHECK:                            Return [<<BooleanNot>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier$after_bce (after)
+  /// CHECK:                            BooleanNot
+  /// CHECK-NOT:                        BooleanNot
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:                        Or
+
+  public static boolean $opt$noinline$booleanOrToAnd(boolean a, boolean b) {
+    if (doThrow) throw new Error();
+    return !a | !b;
+  }
+
+  /**
+   * Test that the transformation copes with inputs being separated from the
+   * bitwise operations.
+   * This is a regression test. The initial logic was inserting the new bitwise
+   * operation incorrectly.
+   */
+
+  /// CHECK-START: int Main.$opt$noinline$regressInputsAway(int, int) instruction_simplifier (before)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<Cst1:i\d+>>        IntConstant 1
+  /// CHECK:       <<AddP1:i\d+>>       Add [<<P1>>,<<Cst1>>]
+  /// CHECK:       <<Not1:i\d+>>        Not [<<AddP1>>]
+  /// CHECK:       <<AddP2:i\d+>>       Add [<<P2>>,<<Cst1>>]
+  /// CHECK:       <<Not2:i\d+>>        Not [<<AddP2>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Not1>>,<<Not2>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START: int Main.$opt$noinline$regressInputsAway(int, int) instruction_simplifier (after)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<Cst1:i\d+>>        IntConstant 1
+  /// CHECK:       <<AddP1:i\d+>>       Add [<<P1>>,<<Cst1>>]
+  /// CHECK:       <<AddP2:i\d+>>       Add [<<P2>>,<<Cst1>>]
+  /// CHECK:       <<And:i\d+>>         And [<<AddP1>>,<<AddP2>>]
+  /// CHECK:       <<Not:i\d+>>         Not [<<And>>]
+  /// CHECK:                            Return [<<Not>>]
+
+  /// CHECK-START: int Main.$opt$noinline$regressInputsAway(int, int) instruction_simplifier (after)
+  /// CHECK:                            Not
+  /// CHECK-NOT:                        Not
+
+  /// CHECK-START: int Main.$opt$noinline$regressInputsAway(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:                        Or
+
+  public static int $opt$noinline$regressInputsAway(int a, int b) {
+    if (doThrow) throw new Error();
+    int a1 = a + 1;
+    int not_a1 = ~a1;
+    int b1 = b + 1;
+    int not_b1 = ~b1;
+    return not_a1 | not_b1;
+  }
+
+  /**
+   * Test transformation of Not/Not/Xor into Xor.
+   */
+
+  // See first note above.
+  /// CHECK-START: int Main.$opt$noinline$notXorToXor(int, int) instruction_simplifier (before)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<Not1:i\d+>>        Not [<<P1>>]
+  /// CHECK:       <<Not2:i\d+>>        Not [<<P2>>]
+  /// CHECK:       <<Xor:i\d+>>         Xor [<<Not1>>,<<Not2>>]
+  /// CHECK:                            Return [<<Xor>>]
+
+  /// CHECK-START: int Main.$opt$noinline$notXorToXor(int, int) instruction_simplifier (after)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<Xor:i\d+>>         Xor [<<P1>>,<<P2>>]
+  /// CHECK:                            Return [<<Xor>>]
+
+  /// CHECK-START: int Main.$opt$noinline$notXorToXor(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:                        Not
+
+  public static int $opt$noinline$notXorToXor(int a, int b) {
+    if (doThrow) throw new Error();
+    return ~a ^ ~b;
+  }
+
+  /**
+   * Test transformation of Not/Not/Xor into Xor for boolean negations.
+   * Note that the graph before this instruction simplification pass does not
+   * contain `HBooleanNot` instructions. This is because this transformation
+   * follows the optimization of `HSelect` to `HBooleanNot` occurring in the
+   * same pass.
+   */
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier$after_bce (before)
+  /// CHECK:       <<P1:z\d+>>          ParameterValue
+  /// CHECK:       <<P2:z\d+>>          ParameterValue
+  /// CHECK-DAG:   <<Const0:i\d+>>      IntConstant 0
+  /// CHECK-DAG:   <<Const1:i\d+>>      IntConstant 1
+  /// CHECK:       <<Select1:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P1>>]
+  /// CHECK:       <<Select2:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P2>>]
+  /// CHECK:       <<Xor:i\d+>>         Xor [<<Select2>>,<<Select1>>]
+  /// CHECK:                            Return [<<Xor>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier$after_bce (after)
+  /// CHECK:       <<Cond1:z\d+>>       ParameterValue
+  /// CHECK:       <<Cond2:z\d+>>       ParameterValue
+  /// CHECK:       <<Xor:i\d+>>         Xor [<<Cond2>>,<<Cond1>>]
+  /// CHECK:                            Return [<<Xor>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:                        BooleanNot
+
+  public static boolean $opt$noinline$booleanNotXorToXor(boolean a, boolean b) {
+    if (doThrow) throw new Error();
+    return !a ^ !b;
+  }
+
+  /**
+   * Check that no transformation is done when one Not has multiple uses.
+   */
+
+  /// CHECK-START: int Main.$opt$noinline$notMultipleUses(int, int) instruction_simplifier (before)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<One:i\d+>>         IntConstant 1
+  /// CHECK:       <<Not2:i\d+>>        Not [<<P2>>]
+  /// CHECK:       <<And2:i\d+>>        And [<<Not2>>,<<One>>]
+  /// CHECK:       <<Not1:i\d+>>        Not [<<P1>>]
+  /// CHECK:       <<And1:i\d+>>        And [<<Not1>>,<<Not2>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<And2>>,<<And1>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START: int Main.$opt$noinline$notMultipleUses(int, int) instruction_simplifier (after)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<One:i\d+>>         IntConstant 1
+  /// CHECK:       <<Not2:i\d+>>        Not [<<P2>>]
+  /// CHECK:       <<And2:i\d+>>        And [<<Not2>>,<<One>>]
+  /// CHECK:       <<Not1:i\d+>>        Not [<<P1>>]
+  /// CHECK:       <<And1:i\d+>>        And [<<Not1>>,<<Not2>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<And2>>,<<And1>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START: int Main.$opt$noinline$notMultipleUses(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:                        Or
+
+  public static int $opt$noinline$notMultipleUses(int a, int b) {
+    if (doThrow) throw new Error();
+    int tmp = ~b;
+    return (tmp & 0x1) + (~a & tmp);
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(~0xff, $opt$noinline$andToOr(0xf, 0xff));
+    assertLongEquals(~0xf, $opt$noinline$orToAnd(0xf, 0xff));
+    assertIntEquals(0xf0, $opt$noinline$notXorToXor(0xf, 0xff));
+    assertIntEquals(~0xff, $opt$noinline$notMultipleUses(0xf, 0xff));
+  }
+}
diff --git a/test/565-checker-irreducible-loop/expected.txt b/test/565-checker-irreducible-loop/expected.txt
new file mode 100644
index 0000000..6ed281c
--- /dev/null
+++ b/test/565-checker-irreducible-loop/expected.txt
@@ -0,0 +1,2 @@
+1
+1
diff --git a/test/565-checker-irreducible-loop/info.txt b/test/565-checker-irreducible-loop/info.txt
new file mode 100644
index 0000000..1e0dd02
--- /dev/null
+++ b/test/565-checker-irreducible-loop/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing in the presence of
+an irreducible loop.
diff --git a/test/565-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/565-checker-irreducible-loop/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..29547ca
--- /dev/null
+++ b/test/565-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -0,0 +1,101 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+# Check that both the irreducible loop and the other loop entry
+# move the constant-folded value to where it's expected.
+
+## CHECK-START-X86: int IrreducibleLoop.test1(int, long) register (after)
+## CHECK-DAG:                     ParallelMove {{.*84->.*}} loop:none
+## CHECK-DAG:                     ParallelMove {{.*84->.*}} loop:{{B\d+}} irreducible:true
+.method public static test1(IJ)I
+   .registers 10
+   const/16 v6, 2
+   const/16 v4, 1
+   const-wide/16 v0, 42
+   add-long v2, v0, v0
+
+   if-eqz p0, :loop_entry
+   goto :other_loop_pre_entry
+
+   # The then part: beginning of the irreducible loop.
+   :loop_entry
+   if-eqz p0, :exit
+   cmp-long v6, v2, p1
+   :other_loop_entry
+   sub-int p0, p0, v4
+   goto :loop_entry
+
+   # The other block branching to the irreducible loop.
+   # In that block, v4 has no live range.
+   :other_loop_pre_entry
+   goto :other_loop_entry
+
+   :exit
+   return v6
+.end method
+
+# Check that the compiler does not crash when
+# a live interval is found while connecting siblings, but that
+# live interval is inactive at the desired position.
+
+## CHECK-START-X86: int IrreducibleLoop.test2(int, long) register (after)
+## CHECK-DAG:                     ParallelMove {{.*84->.*}} loop:none
+## CHECK-DAG:                     ParallelMove {{.*84->.*}} loop:{{B\d+}} irreducible:true
+.method public static test2(IJ)I
+   .registers 14
+   const/16 v6, 2
+   const/16 v4, 1
+   const-wide/16 v0, 42
+   const-wide/16 v8, 68
+   add-long v2, v0, v0
+
+   if-eqz p0, :loop_entry
+   goto :other_loop_pre_entry
+
+   # The then part: beginning of the irreducible loop.
+   :loop_entry
+   if-eqz p0, :exit
+   cmp-long v6, v2, p1
+   :other_loop_entry
+   sub-int p0, p0, v4
+   goto :loop_entry
+
+   # The other block branching to the irreducible loop.
+   :other_loop_pre_entry
+   # Make v2 have a register location.
+   sput-wide v2, LIrreducibleLoop;->myField:J
+   # Stress register allocator on x86 to split v2.
+   sput-wide v0, LIrreducibleLoop;->myField:J
+   sput-wide p1, LIrreducibleLoop;->myField:J
+   sput-wide v8, LIrreducibleLoop;->myField:J
+   if-eqz p0, :join
+   # Stress register allocator on x86 to split v2.
+   sput-wide p1, LIrreducibleLoop;->myField:J
+   sput-wide v8, LIrreducibleLoop;->myField:J
+   sput-wide v0, LIrreducibleLoop;->myField:J
+   # Last use of v2 before the irreducible loop, that
+   # will create an interval hole.
+   sput-wide v2, LIrreducibleLoop;->myField:J
+   :join
+   goto :other_loop_entry
+
+   :exit
+   return v6
+.end method
+
+.field public static volatile myField:J
diff --git a/test/565-checker-irreducible-loop/src/Main.java b/test/565-checker-irreducible-loop/src/Main.java
new file mode 100644
index 0000000..e48bd6b
--- /dev/null
+++ b/test/565-checker-irreducible-loop/src/Main.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    {
+      Method m = c.getMethod("test1", int.class, long.class);
+      Object[] arguments = { 42, 31L };
+      System.out.println(m.invoke(null, arguments));
+    }
+
+    {
+      Method m = c.getMethod("test2", int.class, long.class);
+      Object[] arguments = { 42, 31L };
+      System.out.println(m.invoke(null, arguments));
+    }
+  }
+}
diff --git a/test/565-checker-rotate/expected.txt b/test/565-checker-rotate/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/565-checker-rotate/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/565-checker-rotate/info.txt b/test/565-checker-rotate/info.txt
new file mode 100644
index 0000000..c6a8091
--- /dev/null
+++ b/test/565-checker-rotate/info.txt
@@ -0,0 +1 @@
+Unit test for 32-bit and 64-bit rotate operations.
diff --git a/test/565-checker-rotate/src/Main.java b/test/565-checker-rotate/src/Main.java
new file mode 100644
index 0000000..eb0e868
--- /dev/null
+++ b/test/565-checker-rotate/src/Main.java
@@ -0,0 +1,636 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK:         <<ArgVal:z\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>    IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>     IntConstant 1
+  /// CHECK-DAG:     <<Val:i\d+>>     Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect [<<Val>>,<<ArgDist>>,<<Method>>] intrinsic:IntegerRotateLeft
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) instruction_simplifier (after)
+  /// CHECK:         <<ArgVal:z\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>    IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>     IntConstant 1
+  /// CHECK-DAG:     <<Val:i\d+>>     Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<NegDist:i\d+>> Neg [<<ArgDist>>]
+  /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<Val>>,<<NegDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) instruction_simplifier (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) select_generator (after)
+  /// CHECK:         <<ArgVal:z\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>    IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>     IntConstant 1
+  /// CHECK-DAG:     <<SelVal:i\d+>>  Select [<<Zero>>,<<One>>,<<ArgVal>>]
+  /// CHECK-DAG:     <<NegDist:i\d+>> Neg [<<ArgDist>>]
+  /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<SelVal>>,<<NegDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) select_generator (after)
+  /// CHECK-NOT:                      Phi
+
+  /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) instruction_simplifier$after_bce (after)
+  /// CHECK:         <<ArgVal:z\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<NegDist:i\d+>> Neg [<<ArgDist>>]
+  /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<ArgVal>>,<<NegDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:                      Select
+
+  private static int rotateLeftBoolean(boolean value, int distance) {
+    return Integer.rotateLeft(value ? 1 : 0, distance);
+  }
+
+  /// CHECK-START: int Main.rotateLeftByte(byte, int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK:         <<ArgVal:b\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect [<<ArgVal>>,<<ArgDist>>,<<Method>>] intrinsic:IntegerRotateLeft
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateLeftByte(byte, int) instruction_simplifier (after)
+  /// CHECK:         <<ArgVal:b\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<NegDist:i\d+>> Neg [<<ArgDist>>]
+  /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<ArgVal>>,<<NegDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateLeftByte(byte, int) instruction_simplifier (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  private static int rotateLeftByte(byte value, int distance) {
+    return Integer.rotateLeft(value, distance);
+  }
+
+  /// CHECK-START: int Main.rotateLeftShort(short, int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK:         <<ArgVal:s\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect [<<ArgVal>>,<<ArgDist>>,<<Method>>] intrinsic:IntegerRotateLeft
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateLeftShort(short, int) instruction_simplifier (after)
+  /// CHECK:         <<ArgVal:s\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<NegDist:i\d+>> Neg [<<ArgDist>>]
+  /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<ArgVal>>,<<NegDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateLeftShort(short, int) instruction_simplifier (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  private static int rotateLeftShort(short value, int distance) {
+    return Integer.rotateLeft(value, distance);
+  }
+
+  /// CHECK-START: int Main.rotateLeftChar(char, int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK:         <<ArgVal:c\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect [<<ArgVal>>,<<ArgDist>>,<<Method>>] intrinsic:IntegerRotateLeft
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateLeftChar(char, int) instruction_simplifier (after)
+  /// CHECK:         <<ArgVal:c\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<NegDist:i\d+>> Neg [<<ArgDist>>]
+  /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<ArgVal>>,<<NegDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateLeftChar(char, int) instruction_simplifier (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  private static int rotateLeftChar(char value, int distance) {
+    return Integer.rotateLeft(value, distance);
+  }
+
+  /// CHECK-START: int Main.rotateLeftInt(int, int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK:         <<ArgVal:i\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect [<<ArgVal>>,<<ArgDist>>,<<Method>>] intrinsic:IntegerRotateLeft
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateLeftInt(int, int) instruction_simplifier (after)
+  /// CHECK:         <<ArgVal:i\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<NegDist:i\d+>> Neg [<<ArgDist>>]
+  /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<ArgVal>>,<<NegDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateLeftInt(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  private static int rotateLeftInt(int value, int distance) {
+    return Integer.rotateLeft(value, distance);
+  }
+
+  /// CHECK-START: long Main.rotateLeftLong(long, int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK:         <<ArgVal:j\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:j\d+>>  InvokeStaticOrDirect [<<ArgVal>>,<<ArgDist>>,<<Method>>] intrinsic:LongRotateLeft
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: long Main.rotateLeftLong(long, int) instruction_simplifier (after)
+  /// CHECK:         <<ArgVal:j\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<NegDist:i\d+>> Neg [<<ArgDist>>]
+  /// CHECK-DAG:     <<Result:j\d+>>  Ror [<<ArgVal>>,<<NegDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: long Main.rotateLeftLong(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  private static long rotateLeftLong(long value, int distance) {
+    return Long.rotateLeft(value, distance);
+  }
+
+
+  /// CHECK-START: int Main.rotateRightBoolean(boolean, int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK:         <<ArgVal:z\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>    IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>     IntConstant 1
+  /// CHECK-DAG:     <<Val:i\d+>>     Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect [<<Val>>,<<ArgDist>>,<<Method>>] intrinsic:IntegerRotateRight
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateRightBoolean(boolean, int) instruction_simplifier (after)
+  /// CHECK:         <<ArgVal:z\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>    IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>     IntConstant 1
+  /// CHECK-DAG:     <<Val:i\d+>>     Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<Val>>,<<ArgDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateRightBoolean(boolean, int) instruction_simplifier (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  /// CHECK-START: int Main.rotateRightBoolean(boolean, int) select_generator (after)
+  /// CHECK:         <<ArgVal:z\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>    IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>     IntConstant 1
+  /// CHECK-DAG:     <<SelVal:i\d+>>  Select [<<Zero>>,<<One>>,<<ArgVal>>]
+  /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<SelVal>>,<<ArgDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateRightBoolean(boolean, int) select_generator (after)
+  /// CHECK-NOT:                     Phi
+
+  /// CHECK-START: int Main.rotateRightBoolean(boolean, int) instruction_simplifier$after_bce (after)
+  /// CHECK:         <<ArgVal:z\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<ArgVal>>,<<ArgDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateRightBoolean(boolean, int) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:                     Select
+
+  private static int rotateRightBoolean(boolean value, int distance) {
+    return Integer.rotateRight(value ? 1 : 0, distance);
+  }
+
+  /// CHECK-START: int Main.rotateRightByte(byte, int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK:         <<ArgVal:b\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect [<<ArgVal>>,<<ArgDist>>,<<Method>>] intrinsic:IntegerRotateRight
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateRightByte(byte, int) instruction_simplifier (after)
+  /// CHECK:         <<ArgVal:b\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<ArgVal>>,<<ArgDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateRightByte(byte, int) instruction_simplifier (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  private static int rotateRightByte(byte value, int distance) {
+    return Integer.rotateRight(value, distance);
+  }
+
+  /// CHECK-START: int Main.rotateRightShort(short, int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK:         <<ArgVal:s\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect [<<ArgVal>>,<<ArgDist>>,<<Method>>] intrinsic:IntegerRotateRight
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateRightShort(short, int) instruction_simplifier (after)
+  /// CHECK:         <<ArgVal:s\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<ArgVal>>,<<ArgDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateRightShort(short, int) instruction_simplifier (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  private static int rotateRightShort(short value, int distance) {
+    return Integer.rotateRight(value, distance);
+  }
+
+  /// CHECK-START: int Main.rotateRightChar(char, int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK:         <<ArgVal:c\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect [<<ArgVal>>,<<ArgDist>>,<<Method>>] intrinsic:IntegerRotateRight
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateRightChar(char, int) instruction_simplifier (after)
+  /// CHECK:         <<ArgVal:c\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<ArgVal>>,<<ArgDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateRightChar(char, int) instruction_simplifier (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  private static int rotateRightChar(char value, int distance) {
+    return Integer.rotateRight(value, distance);
+  }
+
+  /// CHECK-START: int Main.rotateRightInt(int, int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK:         <<ArgVal:i\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect [<<ArgVal>>,<<ArgDist>>,<<Method>>] intrinsic:IntegerRotateRight
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateRightInt(int, int) instruction_simplifier (after)
+  /// CHECK:         <<ArgVal:i\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<ArgVal>>,<<ArgDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateRightInt(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  private static int rotateRightInt(int value, int distance) {
+    return Integer.rotateRight(value, distance);
+  }
+
+  /// CHECK-START: long Main.rotateRightLong(long, int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK:         <<ArgVal:j\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:j\d+>>  InvokeStaticOrDirect [<<ArgVal>>,<<ArgDist>>,<<Method>>] intrinsic:LongRotateRight
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: long Main.rotateRightLong(long, int) instruction_simplifier (after)
+  /// CHECK:         <<ArgVal:j\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:i\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:j\d+>>  Ror [<<ArgVal>>,<<ArgDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: long Main.rotateRightLong(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  private static long rotateRightLong(long value, int distance) {
+    return Long.rotateRight(value, distance);
+  }
+
+
+  /// CHECK-START: int Main.rotateLeftIntWithByteDistance(int, byte) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK:         <<ArgVal:i\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:b\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect [<<ArgVal>>,<<ArgDist>>,<<Method>>] intrinsic:IntegerRotateLeft
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateLeftIntWithByteDistance(int, byte) instruction_simplifier (after)
+  /// CHECK:         <<ArgVal:i\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:b\d+>> ParameterValue
+  /// CHECK-DAG:     <<NegDist:i\d+>> Neg [<<ArgDist>>]
+  /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<ArgVal>>,<<NegDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateLeftIntWithByteDistance(int, byte) instruction_simplifier (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  private static int rotateLeftIntWithByteDistance(int value, byte distance) {
+    return Integer.rotateLeft(value, distance);
+  }
+
+  /// CHECK-START: int Main.rotateRightIntWithByteDistance(int, byte) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK:         <<ArgVal:i\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:b\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect [<<ArgVal>>,<<ArgDist>>,<<Method>>] intrinsic:IntegerRotateRight
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateRightIntWithByteDistance(int, byte) instruction_simplifier (after)
+  /// CHECK:         <<ArgVal:i\d+>>  ParameterValue
+  /// CHECK:         <<ArgDist:b\d+>> ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<ArgVal>>,<<ArgDist>>]
+  /// CHECK-DAG:                      Return [<<Result>>]
+
+  /// CHECK-START: int Main.rotateRightIntWithByteDistance(int, byte) instruction_simplifier (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  private static int rotateRightIntWithByteDistance(int value, byte distance) {
+    return Integer.rotateRight(value, distance);
+  }
+
+
+  public static void testRotateLeftBoolean() {
+    for (int i = 0; i < 40; i++) {  // overshoot a bit
+      int j = i & 31;
+      expectEqualsInt(0, rotateLeftBoolean(false, i));
+      expectEqualsInt(1 << i, rotateLeftBoolean(true, i));
+    }
+  }
+
+  public static void testRotateLeftByte() {
+    expectEqualsInt(0x00000001, rotateLeftByte((byte)0x01, 0));
+    expectEqualsInt(0x00000002, rotateLeftByte((byte)0x01, 1));
+    expectEqualsInt(0x80000000, rotateLeftByte((byte)0x01, 31));
+    expectEqualsInt(0x00000001, rotateLeftByte((byte)0x01, 32));  // overshoot
+    expectEqualsInt(0xFFFFFF03, rotateLeftByte((byte)0x81, 1));
+    expectEqualsInt(0xFFFFFE07, rotateLeftByte((byte)0x81, 2));
+    expectEqualsInt(0x00000120, rotateLeftByte((byte)0x12, 4));
+    expectEqualsInt(0xFFFF9AFF, rotateLeftByte((byte)0x9A, 8));
+    for (int i = 0; i < 40; i++) {  // overshoot a bit
+      int j = i & 31;
+      expectEqualsInt(0x00000000, rotateLeftByte((byte)0x0000, i));
+      expectEqualsInt(0xFFFFFFFF, rotateLeftByte((byte)0xFFFF, i));
+      expectEqualsInt((1 << j), rotateLeftByte((byte)0x0001, i));
+      expectEqualsInt((0x12 << j) | (0x12 >>> -j), rotateLeftByte((byte)0x12, i));
+    }
+  }
+
+  public static void testRotateLeftShort() {
+    expectEqualsInt(0x00000001, rotateLeftShort((short)0x0001, 0));
+    expectEqualsInt(0x00000002, rotateLeftShort((short)0x0001, 1));
+    expectEqualsInt(0x80000000, rotateLeftShort((short)0x0001, 31));
+    expectEqualsInt(0x00000001, rotateLeftShort((short)0x0001, 32));  // overshoot
+    expectEqualsInt(0xFFFF0003, rotateLeftShort((short)0x8001, 1));
+    expectEqualsInt(0xFFFE0007, rotateLeftShort((short)0x8001, 2));
+    expectEqualsInt(0x00012340, rotateLeftShort((short)0x1234, 4));
+    expectEqualsInt(0xFF9ABCFF, rotateLeftShort((short)0x9ABC, 8));
+    for (int i = 0; i < 40; i++) {  // overshoot a bit
+      int j = i & 31;
+      expectEqualsInt(0x00000000, rotateLeftShort((short)0x0000, i));
+      expectEqualsInt(0xFFFFFFFF, rotateLeftShort((short)0xFFFF, i));
+      expectEqualsInt((1 << j), rotateLeftShort((short)0x0001, i));
+      expectEqualsInt((0x1234 << j) | (0x1234 >>> -j), rotateLeftShort((short)0x1234, i));
+    }
+  }
+
+  public static void testRotateLeftChar() {
+    expectEqualsInt(0x00000001, rotateLeftChar((char)0x0001, 0));
+    expectEqualsInt(0x00000002, rotateLeftChar((char)0x0001, 1));
+    expectEqualsInt(0x80000000, rotateLeftChar((char)0x0001, 31));
+    expectEqualsInt(0x00000001, rotateLeftChar((char)0x0001, 32));  // overshoot
+    expectEqualsInt(0x00010002, rotateLeftChar((char)0x8001, 1));
+    expectEqualsInt(0x00020004, rotateLeftChar((char)0x8001, 2));
+    expectEqualsInt(0x00012340, rotateLeftChar((char)0x1234, 4));
+    expectEqualsInt(0x009ABC00, rotateLeftChar((char)0x9ABC, 8));
+    expectEqualsInt(0x00FF0000, rotateLeftChar((char)0xFF00, 8));
+    for (int i = 0; i < 40; i++) {  // overshoot a bit
+      int j = i & 31;
+      expectEqualsInt(0x00000000, rotateLeftChar((char)0x0000, i));
+      expectEqualsInt((1 << j), rotateLeftChar((char)0x0001, i));
+      expectEqualsInt((0x1234 << j) | (0x1234 >>> -j), rotateLeftChar((char)0x1234, i));
+    }
+  }
+
+  public static void testRotateLeftInt() {
+    expectEqualsInt(0x00000001, rotateLeftInt(0x00000001, 0));
+    expectEqualsInt(0x00000002, rotateLeftInt(0x00000001, 1));
+    expectEqualsInt(0x80000000, rotateLeftInt(0x00000001, 31));
+    expectEqualsInt(0x00000001, rotateLeftInt(0x00000001, 32));  // overshoot
+    expectEqualsInt(0x00000003, rotateLeftInt(0x80000001, 1));
+    expectEqualsInt(0x00000006, rotateLeftInt(0x80000001, 2));
+    expectEqualsInt(0x23456781, rotateLeftInt(0x12345678, 4));
+    expectEqualsInt(0xBCDEF09A, rotateLeftInt(0x9ABCDEF0, 8));
+    for (int i = 0; i < 40; i++) {  // overshoot a bit
+      int j = i & 31;
+      expectEqualsInt(0x00000000, rotateLeftInt(0x00000000, i));
+      expectEqualsInt(0xFFFFFFFF, rotateLeftInt(0xFFFFFFFF, i));
+      expectEqualsInt(1 << j, rotateLeftInt(0x00000001, i));
+      expectEqualsInt((0x12345678 << j) | (0x12345678 >>> -j), rotateLeftInt(0x12345678, i));
+    }
+  }
+
+  public static void testRotateLeftLong() {
+    expectEqualsLong(0x0000000000000001L, rotateLeftLong(0x0000000000000001L, 0));
+    expectEqualsLong(0x0000000000000002L, rotateLeftLong(0x0000000000000001L, 1));
+    expectEqualsLong(0x8000000000000000L, rotateLeftLong(0x0000000000000001L, 63));
+    expectEqualsLong(0x0000000000000001L, rotateLeftLong(0x0000000000000001L, 64));  // overshoot
+    expectEqualsLong(0x0000000000000003L, rotateLeftLong(0x8000000000000001L, 1));
+    expectEqualsLong(0x0000000000000006L, rotateLeftLong(0x8000000000000001L, 2));
+    expectEqualsLong(0x23456789ABCDEF01L, rotateLeftLong(0x123456789ABCDEF0L, 4));
+    expectEqualsLong(0x3456789ABCDEF012L, rotateLeftLong(0x123456789ABCDEF0L, 8));
+    for (int i = 0; i < 70; i++) {  // overshoot a bit
+      int j = i & 63;
+      expectEqualsLong(0x0000000000000000L, rotateLeftLong(0x0000000000000000L, i));
+      expectEqualsLong(0xFFFFFFFFFFFFFFFFL, rotateLeftLong(0xFFFFFFFFFFFFFFFFL, i));
+      expectEqualsLong(1L << j, rotateLeftLong(0x0000000000000001, i));
+      expectEqualsLong((0x123456789ABCDEF0L << j) | (0x123456789ABCDEF0L >>> -j),
+                       rotateLeftLong(0x123456789ABCDEF0L, i));
+    }
+  }
+
+  public static void testRotateRightBoolean() {
+    for (int i = 0; i < 40; i++) {  // overshoot a bit
+      int j = i & 31;
+      expectEqualsInt(0, rotateRightBoolean(false, i));
+      expectEqualsInt(1 << (32 - i), rotateRightBoolean(true, i));
+    }
+  }
+
+  public static void testRotateRightByte() {
+    expectEqualsInt(0xFFFFFF80, rotateRightByte((byte)0x80, 0));
+    expectEqualsInt(0x7FFFFFC0, rotateRightByte((byte)0x80, 1));
+    expectEqualsInt(0xFFFFFF01, rotateRightByte((byte)0x80, 31));
+    expectEqualsInt(0xFFFFFF80, rotateRightByte((byte)0x80, 32));  // overshoot
+    expectEqualsInt(0xFFFFFFC0, rotateRightByte((byte)0x81, 1));
+    expectEqualsInt(0x7FFFFFE0, rotateRightByte((byte)0x81, 2));
+    expectEqualsInt(0x20000001, rotateRightByte((byte)0x12, 4));
+    expectEqualsInt(0x9AFFFFFF, rotateRightByte((byte)0x9A, 8));
+    for (int i = 0; i < 40; i++) {  // overshoot a bit
+      int j = i & 31;
+      expectEqualsInt(0x00000000, rotateRightByte((byte)0x00, i));
+      expectEqualsInt(0xFFFFFFFF, rotateRightByte((byte)0xFF, i));
+      expectEqualsInt(1 << (32 - j), rotateRightByte((byte)0x01, i));
+      expectEqualsInt((0x12 >>> j) | (0x12 << -j), rotateRightByte((byte)0x12, i));
+    }
+  }
+
+  public static void testRotateRightShort() {
+    expectEqualsInt(0xFFFF8000, rotateRightShort((short)0x8000, 0));
+    expectEqualsInt(0x7FFFC000, rotateRightShort((short)0x8000, 1));
+    expectEqualsInt(0xFFFF0001, rotateRightShort((short)0x8000, 31));
+    expectEqualsInt(0xFFFF8000, rotateRightShort((short)0x8000, 32));  // overshoot
+    expectEqualsInt(0xFFFFC000, rotateRightShort((short)0x8001, 1));
+    expectEqualsInt(0x7FFFE000, rotateRightShort((short)0x8001, 2));
+    expectEqualsInt(0x40000123, rotateRightShort((short)0x1234, 4));
+    expectEqualsInt(0xBCFFFF9A, rotateRightShort((short)0x9ABC, 8));
+    for (int i = 0; i < 40; i++) {  // overshoot a bit
+      int j = i & 31;
+      expectEqualsInt(0x00000000, rotateRightShort((short)0x0000, i));
+      expectEqualsInt(0xFFFFFFFF, rotateRightShort((short)0xFFFF, i));
+      expectEqualsInt(1 << (32 - j), rotateRightShort((short)0x0001, i));
+      expectEqualsInt((0x1234 >>> j) | (0x1234 << -j), rotateRightShort((short)0x1234, i));
+    }
+  }
+
+  public static void testRotateRightChar() {
+    expectEqualsInt(0x00008000, rotateRightChar((char)0x8000, 0));
+    expectEqualsInt(0x00004000, rotateRightChar((char)0x8000, 1));
+    expectEqualsInt(0x00010000, rotateRightChar((char)0x8000, 31));
+    expectEqualsInt(0x00008000, rotateRightChar((char)0x8000, 32));  // overshoot
+    expectEqualsInt(0x80004000, rotateRightChar((char)0x8001, 1));
+    expectEqualsInt(0x40002000, rotateRightChar((char)0x8001, 2));
+    expectEqualsInt(0x40000123, rotateRightChar((char)0x1234, 4));
+    expectEqualsInt(0xBC00009A, rotateRightChar((char)0x9ABC, 8));
+    for (int i = 0; i < 40; i++) {  // overshoot a bit
+      int j = i & 31;
+      expectEqualsInt(0x00000000, rotateRightChar((char)0x0000, i));
+      expectEqualsInt(1 << (32 - j), rotateRightChar((char)0x0001, i));
+      expectEqualsInt((0x1234 >>> j) | (0x1234 << -j), rotateRightChar((char)0x1234, i));
+    }
+  }
+
+  public static void testRotateRightInt() {
+    expectEqualsInt(0x80000000, rotateRightInt(0x80000000, 0));
+    expectEqualsInt(0x40000000, rotateRightInt(0x80000000, 1));
+    expectEqualsInt(0x00000001, rotateRightInt(0x80000000, 31));
+    expectEqualsInt(0x80000000, rotateRightInt(0x80000000, 32));  // overshoot
+    expectEqualsInt(0xC0000000, rotateRightInt(0x80000001, 1));
+    expectEqualsInt(0x60000000, rotateRightInt(0x80000001, 2));
+    expectEqualsInt(0x81234567, rotateRightInt(0x12345678, 4));
+    expectEqualsInt(0xF09ABCDE, rotateRightInt(0x9ABCDEF0, 8));
+    for (int i = 0; i < 40; i++) {  // overshoot a bit
+      int j = i & 31;
+      expectEqualsInt(0x00000000, rotateRightInt(0x00000000, i));
+      expectEqualsInt(0xFFFFFFFF, rotateRightInt(0xFFFFFFFF, i));
+      expectEqualsInt(0x80000000 >>> j, rotateRightInt(0x80000000, i));
+      expectEqualsInt((0x12345678 >>> j) | (0x12345678 << -j), rotateRightInt(0x12345678, i));
+    }
+  }
+
+  public static void testRotateRightLong() {
+    expectEqualsLong(0x8000000000000000L, rotateRightLong(0x8000000000000000L, 0));
+    expectEqualsLong(0x4000000000000000L, rotateRightLong(0x8000000000000000L, 1));
+    expectEqualsLong(0x0000000000000001L, rotateRightLong(0x8000000000000000L, 63));
+    expectEqualsLong(0x8000000000000000L, rotateRightLong(0x8000000000000000L, 64));  // overshoot
+    expectEqualsLong(0xC000000000000000L, rotateRightLong(0x8000000000000001L, 1));
+    expectEqualsLong(0x6000000000000000L, rotateRightLong(0x8000000000000001L, 2));
+    expectEqualsLong(0x0123456789ABCDEFL, rotateRightLong(0x123456789ABCDEF0L, 4));
+    expectEqualsLong(0xF0123456789ABCDEL, rotateRightLong(0x123456789ABCDEF0L, 8));
+    for (int i = 0; i < 70; i++) {  // overshoot a bit
+      int j = i & 63;
+      expectEqualsLong(0x0000000000000000L, rotateRightLong(0x0000000000000000L, i));
+      expectEqualsLong(0xFFFFFFFFFFFFFFFFL, rotateRightLong(0xFFFFFFFFFFFFFFFFL, i));
+      expectEqualsLong(0x8000000000000000L >>> j, rotateRightLong(0x8000000000000000L, i));
+      expectEqualsLong((0x123456789ABCDEF0L >>> j) | (0x123456789ABCDEF0L << -j),
+                       rotateRightLong(0x123456789ABCDEF0L, i));
+    }
+  }
+
+
+  public static void testRotateLeftIntWithByteDistance() {
+    expectEqualsInt(0x00000001, rotateLeftIntWithByteDistance(0x00000001, (byte)0));
+    expectEqualsInt(0x00000002, rotateLeftIntWithByteDistance(0x00000001, (byte)1));
+    expectEqualsInt(0x80000000, rotateLeftIntWithByteDistance(0x00000001, (byte)31));
+    expectEqualsInt(0x00000001, rotateLeftIntWithByteDistance(0x00000001, (byte)32));  // overshoot
+    expectEqualsInt(0x00000003, rotateLeftIntWithByteDistance(0x80000001, (byte)1));
+    expectEqualsInt(0x00000006, rotateLeftIntWithByteDistance(0x80000001, (byte)2));
+    expectEqualsInt(0x23456781, rotateLeftIntWithByteDistance(0x12345678, (byte)4));
+    expectEqualsInt(0xBCDEF09A, rotateLeftIntWithByteDistance(0x9ABCDEF0, (byte)8));
+    for (byte i = 0; i < 40; i++) {  // overshoot a bit
+      byte j = (byte)(i & 31);
+      expectEqualsInt(0x00000000, rotateLeftIntWithByteDistance(0x00000000, i));
+      expectEqualsInt(0xFFFFFFFF, rotateLeftIntWithByteDistance(0xFFFFFFFF, i));
+      expectEqualsInt(1 << j, rotateLeftIntWithByteDistance(0x00000001, i));
+      expectEqualsInt((0x12345678 << j) | (0x12345678 >>> -j),
+                      rotateLeftIntWithByteDistance(0x12345678, i));
+    }
+  }
+
+  public static void testRotateRightIntWithByteDistance() {
+    expectEqualsInt(0x80000000, rotateRightIntWithByteDistance(0x80000000, (byte)0));
+    expectEqualsInt(0x40000000, rotateRightIntWithByteDistance(0x80000000, (byte)1));
+    expectEqualsInt(0x00000001, rotateRightIntWithByteDistance(0x80000000, (byte)31));
+    expectEqualsInt(0x80000000, rotateRightIntWithByteDistance(0x80000000, (byte)32));  // overshoot
+    expectEqualsInt(0xC0000000, rotateRightIntWithByteDistance(0x80000001, (byte)1));
+    expectEqualsInt(0x60000000, rotateRightIntWithByteDistance(0x80000001, (byte)2));
+    expectEqualsInt(0x81234567, rotateRightIntWithByteDistance(0x12345678, (byte)4));
+    expectEqualsInt(0xF09ABCDE, rotateRightIntWithByteDistance(0x9ABCDEF0, (byte)8));
+    for (byte i = 0; i < 40; i++) {  // overshoot a bit
+      byte j = (byte)(i & 31);
+      expectEqualsInt(0x00000000, rotateRightIntWithByteDistance(0x00000000, i));
+      expectEqualsInt(0xFFFFFFFF, rotateRightIntWithByteDistance(0xFFFFFFFF, i));
+      expectEqualsInt(0x80000000 >>> j, rotateRightIntWithByteDistance(0x80000000, i));
+      expectEqualsInt((0x12345678 >>> j) | (0x12345678 << -j),
+                      rotateRightIntWithByteDistance(0x12345678, i));
+    }
+  }
+
+
+  public static void main(String args[]) {
+    testRotateLeftBoolean();
+    testRotateLeftByte();
+    testRotateLeftShort();
+    testRotateLeftChar();
+    testRotateLeftInt();
+    testRotateLeftLong();
+
+    testRotateRightBoolean();
+    testRotateRightByte();
+    testRotateRightShort();
+    testRotateRightChar();
+    testRotateRightInt();
+    testRotateRightLong();
+
+    // Also exercise distance values with types other than int.
+    testRotateLeftIntWithByteDistance();
+    testRotateRightIntWithByteDistance();
+
+    System.out.println("passed");
+  }
+
+
+  private static void expectEqualsInt(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEqualsLong(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/566-checker-codegen-select/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/566-checker-codegen-select/expected.txt
diff --git a/test/566-checker-codegen-select/info.txt b/test/566-checker-codegen-select/info.txt
new file mode 100644
index 0000000..67b6ceb
--- /dev/null
+++ b/test/566-checker-codegen-select/info.txt
@@ -0,0 +1 @@
+Test the use positions of inputs of non-materialized conditions.
\ No newline at end of file
diff --git a/test/566-checker-codegen-select/src/Main.java b/test/566-checker-codegen-select/src/Main.java
new file mode 100644
index 0000000..e215ab0
--- /dev/null
+++ b/test/566-checker-codegen-select/src/Main.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: long Main.$noinline$longSelect(long) register (before)
+  /// CHECK:         <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
+  /// CHECK-NEXT:                  Select [{{j\d+}},{{j\d+}},<<Cond>>]
+
+  public long $noinline$longSelect(long param) {
+    if (doThrow) { throw new Error(); }
+    long val_true = longB;
+    long val_false = longC;
+    return (param > longA) ? val_true : val_false;
+  }
+
+  /// CHECK-START: long Main.$noinline$longSelect_Constant(long) register (before)
+  /// CHECK:         <<Const:j\d+>> LongConstant
+  /// CHECK:         <<Cond:z\d+>>  LessThanOrEqual [{{j\d+}},<<Const>>]
+  /// CHECK-NEXT:                   Select [{{j\d+}},{{j\d+}},<<Cond>>]
+
+  // Condition can be non-materialized on X86 because the condition does not
+  // request 4 registers any more.
+  /// CHECK-START-X86: long Main.$noinline$longSelect_Constant(long) disassembly (after)
+  /// CHECK:             LessThanOrEqual
+  /// CHECK-NEXT:        Select
+
+  // Check that we generate CMOV for long on x86_64.
+  /// CHECK-START-X86_64: long Main.$noinline$longSelect_Constant(long) disassembly (after)
+  /// CHECK:             LessThanOrEqual
+  /// CHECK-NEXT:        Select
+  /// CHECK:             cmpq
+  /// CHECK:             cmovle/ngq
+
+  public long $noinline$longSelect_Constant(long param) {
+    if (doThrow) { throw new Error(); }
+    long val_true = longB;
+    long val_false = longC;
+    return (param > 3L) ? val_true : val_false;
+  }
+
+  // Check that we generate CMOV for int on x86_64.
+  /// CHECK-START-X86_64: int Main.$noinline$intSelect_Constant(int) disassembly (after)
+  /// CHECK:             LessThan
+  /// CHECK-NEXT:        Select
+  /// CHECK:             cmp
+  /// CHECK:             cmovl/nge
+
+  public int $noinline$intSelect_Constant(int param) {
+    if (doThrow) { throw new Error(); }
+    int val_true = intB;
+    int val_false = intC;
+    return (param >= 3) ? val_true : val_false;
+  }
+
+  public static void main(String[] args) {
+    Main m = new Main();
+    assertLongEquals(5L, m.$noinline$longSelect(4L));
+    assertLongEquals(7L, m.$noinline$longSelect(2L));
+    assertLongEquals(5L, m.$noinline$longSelect_Constant(4L));
+    assertLongEquals(7L, m.$noinline$longSelect_Constant(2L));
+    assertIntEquals(5, m.$noinline$intSelect_Constant(4));
+    assertIntEquals(7, m.$noinline$intSelect_Constant(2));
+  }
+
+  public static void assertIntEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error(expected + " != " + actual);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long actual) {
+    if (expected != actual) {
+      throw new Error(expected + " != " + actual);
+    }
+  }
+
+  public boolean doThrow = false;
+
+  public long longA = 3L;
+  public long longB = 5L;
+  public long longC = 7L;
+  public int intB = 5;
+  public int intC = 7;
+}
diff --git a/test/566-checker-signum/expected.txt b/test/566-checker-signum/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/566-checker-signum/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/566-checker-signum/info.txt b/test/566-checker-signum/info.txt
new file mode 100644
index 0000000..328e494
--- /dev/null
+++ b/test/566-checker-signum/info.txt
@@ -0,0 +1 @@
+Unit test for 32-bit and 64-bit signum operations.
diff --git a/test/566-checker-signum/src/Main.java b/test/566-checker-signum/src/Main.java
new file mode 100644
index 0000000..7fc9e84
--- /dev/null
+++ b/test/566-checker-signum/src/Main.java
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.signBoolean(boolean) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK-DAG:     <<Zero:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>    IntConstant 1
+  /// CHECK-DAG:     <<Phi:i\d+>>    Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect [<<Phi>>,<<Method>>] intrinsic:IntegerSignum
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.signBoolean(boolean) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Zero:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>    IntConstant 1
+  /// CHECK-DAG:     <<Phi:i\d+>>    Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<Result:i\d+>> Compare [<<Phi>>,<<Zero>>]
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.signBoolean(boolean) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  /// CHECK-START: int Main.signBoolean(boolean) select_generator (after)
+  /// CHECK-DAG:     <<Arg:z\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>    IntConstant 1
+  /// CHECK-DAG:     <<Sel:i\d+>>    Select [<<Zero>>,<<One>>,<<Arg>>]
+  /// CHECK-DAG:     <<Result:i\d+>> Compare [<<Sel>>,<<Zero>>]
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.signBoolean(boolean) select_generator (after)
+  /// CHECK-NOT:                     Phi
+
+  /// CHECK-START: int Main.signBoolean(boolean) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:     <<Arg:z\d+>>    ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<Result:i\d+>> Compare [<<Arg>>,<<Zero>>]
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.signBoolean(boolean) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:                     Select
+
+  private static int signBoolean(boolean x) {
+    return Integer.signum(x ? 1 : 0);
+  }
+
+  /// CHECK-START: int Main.signByte(byte) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerSignum
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.signByte(byte) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.signByte(byte) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  private static int signByte(byte x) {
+    return Integer.signum(x);
+  }
+
+  /// CHECK-START: int Main.signShort(short) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerSignum
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.signShort(short) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.signShort(short) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  private static int signShort(short x) {
+    return Integer.signum(x);
+  }
+
+  /// CHECK-START: int Main.signChar(char) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerSignum
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.signChar(char) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.signChar(char) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  private static int signChar(char x) {
+    return Integer.signum(x);
+  }
+
+  /// CHECK-START: int Main.signInt(int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerSignum
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.signInt(int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.signInt(int) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  private static int signInt(int x) {
+    return Integer.signum(x);
+  }
+
+  /// CHECK-START: int Main.signLong(long) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:LongSignum
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.signLong(long) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.signLong(long) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  private static int signLong(long x) {
+    return Long.signum(x);
+  }
+
+
+  public static void testSignBoolean() {
+    expectEquals(0, signBoolean(false));
+    expectEquals(1, signBoolean(true));
+  }
+
+  public static void testSignByte() {
+    expectEquals(-1, signByte((byte)Byte.MIN_VALUE));
+    expectEquals(-1, signByte((byte)-64));
+    expectEquals(-1, signByte((byte)-1));
+    expectEquals(0, signByte((byte)0));
+    expectEquals(1, signByte((byte)1));
+    expectEquals(1, signByte((byte)64));
+    expectEquals(1, signByte((byte)Byte.MAX_VALUE));
+  }
+
+  public static void testSignShort() {
+    expectEquals(-1, signShort((short)Short.MIN_VALUE));
+    expectEquals(-1, signShort((short)-12345));
+    expectEquals(-1, signShort((short)-1));
+    expectEquals(0, signShort((short)0));
+    expectEquals(1, signShort((short)1));
+    expectEquals(1, signShort((short)12345));
+    expectEquals(1, signShort((short)Short.MAX_VALUE));
+  }
+
+  public static void testSignChar() {
+    expectEquals(0, signChar((char)0));
+    expectEquals(1, signChar((char)1));
+    expectEquals(1, signChar((char)12345));
+    expectEquals(1, signChar((char)Character.MAX_VALUE));
+  }
+
+  public static void testSignInt() {
+    expectEquals(-1, signInt(Integer.MIN_VALUE));
+    expectEquals(-1, signInt(-12345));
+    expectEquals(-1, signInt(-1));
+    expectEquals(0, signInt(0));
+    expectEquals(1, signInt(1));
+    expectEquals(1, signInt(12345));
+    expectEquals(1, signInt(Integer.MAX_VALUE));
+
+    for (int i = -11; i <= 11; i++) {
+      int expected = 0;
+      if (i < 0) expected = -1;
+      else if (i > 0) expected = 1;
+      expectEquals(expected, signInt(i));
+    }
+  }
+
+  public static void testSignLong() {
+    expectEquals(-1, signLong(Long.MIN_VALUE));
+    expectEquals(-1, signLong(-12345L));
+    expectEquals(-1, signLong(-1L));
+    expectEquals(0, signLong(0L));
+    expectEquals(1, signLong(1L));
+    expectEquals(1, signLong(12345L));
+    expectEquals(1, signLong(Long.MAX_VALUE));
+
+    expectEquals(-1, signLong(0x800000007FFFFFFFL));
+    expectEquals(-1, signLong(0x80000000FFFFFFFFL));
+    expectEquals(1, signLong(0x000000007FFFFFFFL));
+    expectEquals(1, signLong(0x00000000FFFFFFFFL));
+    expectEquals(1, signLong(0x7FFFFFFF7FFFFFFFL));
+    expectEquals(1, signLong(0x7FFFFFFFFFFFFFFFL));
+
+    for (long i = -11L; i <= 11L; i++) {
+      int expected = 0;
+      if (i < 0) expected = -1;
+      else if (i > 0) expected = 1;
+      expectEquals(expected, signLong(i));
+    }
+
+    for (long i = Long.MIN_VALUE; i <= Long.MIN_VALUE + 11L; i++) {
+      expectEquals(-1, signLong(i));
+    }
+
+    for (long i = Long.MAX_VALUE; i >= Long.MAX_VALUE - 11L; i--) {
+      expectEquals(1, signLong(i));
+    }
+  }
+
+
+  public static void main(String args[]) {
+    testSignBoolean();
+    testSignByte();
+    testSignShort();
+    testSignChar();
+    testSignInt();
+    testSignLong();
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/455-set-vreg/expected.txt b/test/566-polymorphic-inlining/expected.txt
similarity index 100%
copy from test/455-set-vreg/expected.txt
copy to test/566-polymorphic-inlining/expected.txt
diff --git a/test/566-polymorphic-inlining/info.txt b/test/566-polymorphic-inlining/info.txt
new file mode 100644
index 0000000..1a47f89
--- /dev/null
+++ b/test/566-polymorphic-inlining/info.txt
@@ -0,0 +1 @@
+Test polymorphic inlining.
diff --git a/test/566-polymorphic-inlining/polymorphic_inline.cc b/test/566-polymorphic-inlining/polymorphic_inline.cc
new file mode 100644
index 0000000..89293cc
--- /dev/null
+++ b/test/566-polymorphic-inlining/polymorphic_inline.cc
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "art_method.h"
+#include "base/enums.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "jit/profiling_info.h"
+#include "oat_quick_method_header.h"
+#include "scoped_thread_state_change.h"
+#include "stack_map.h"
+
+namespace art {
+
+static void do_checks(jclass cls, const char* method_name) {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  jit::JitCodeCache* code_cache = jit->GetCodeCache();
+  ArtMethod* method = klass->FindDeclaredDirectMethodByName(method_name, kRuntimePointerSize);
+
+  OatQuickMethodHeader* header = nullptr;
+  // Infinite loop... Test harness will have its own timeout.
+  while (true) {
+    header = OatQuickMethodHeader::FromEntryPoint(method->GetEntryPointFromQuickCompiledCode());
+    if (code_cache->ContainsPc(header->GetCode())) {
+      break;
+    } else {
+      // Sleep to yield to the compiler thread.
+      usleep(1000);
+      // Will either ensure it's compiled or do the compilation itself.
+      jit->CompileMethod(method, soa.Self(), /* osr */ false);
+    }
+  }
+
+  CodeInfo info = header->GetOptimizedCodeInfo();
+  CodeInfoEncoding encoding = info.ExtractEncoding();
+  CHECK(info.HasInlineInfo(encoding));
+}
+
+static void allocate_profiling_info(jclass cls, const char* method_name) {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
+  ArtMethod* method = klass->FindDeclaredDirectMethodByName(method_name, kRuntimePointerSize);
+  ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureProfilingInfo566(JNIEnv*, jclass cls) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit == nullptr) {
+    return;
+  }
+
+  allocate_profiling_info(cls, "testInvokeVirtual");
+  allocate_profiling_info(cls, "testInvokeInterface");
+  allocate_profiling_info(cls, "$noinline$testInlineToSameTarget");
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureJittedAndPolymorphicInline566(JNIEnv*, jclass cls) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit == nullptr) {
+    return;
+  }
+
+  if (kIsDebugBuild) {
+    // A debug build might often compile the methods without profiling informations filled.
+    return;
+  }
+
+  do_checks(cls, "testInvokeVirtual");
+  do_checks(cls, "testInvokeInterface");
+  do_checks(cls, "testInvokeInterface2");
+  do_checks(cls, "$noinline$testInlineToSameTarget");
+}
+
+}  // namespace art
diff --git a/test/566-polymorphic-inlining/src/Main.java b/test/566-polymorphic-inlining/src/Main.java
new file mode 100644
index 0000000..793b85f
--- /dev/null
+++ b/test/566-polymorphic-inlining/src/Main.java
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+interface Itf {
+  public Class<?> sameInvokeInterface();
+  public Class<?> sameInvokeInterface2();
+  public Class<?> sameInvokeInterface3();
+}
+
+public class Main implements Itf {
+  public static void assertEquals(Object expected, Object actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected  + ", got " + actual);
+    }
+  }
+
+  public static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected  + ", got " + actual);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+    Main[] mains = new Main[3];
+    Itf[] itfs = new Itf[3];
+    itfs[0] = mains[0] = new Main();
+    itfs[1] = mains[1] = new Subclass();
+    itfs[2] = mains[2] = new OtherSubclass();
+
+    // Create the profiling info eagerly to make sure they are filled.
+    ensureProfilingInfo566();
+
+    // Make testInvokeVirtual and testInvokeInterface hot to get them jitted.
+    // We pass Main and Subclass to get polymorphic inlining based on calling
+    // the same method.
+    for (int i = 0; i < 10000; ++i) {
+      testInvokeVirtual(mains[0]);
+      testInvokeVirtual(mains[1]);
+      testInvokeInterface(itfs[0]);
+      testInvokeInterface(itfs[1]);
+      testInvokeInterface2(itfs[0]);
+      testInvokeInterface2(itfs[1]);
+      $noinline$testInlineToSameTarget(mains[0]);
+      $noinline$testInlineToSameTarget(mains[1]);
+    }
+
+    ensureJittedAndPolymorphicInline566();
+
+    // At this point, the JIT should have compiled both methods, and inline
+    // sameInvokeVirtual and sameInvokeInterface.
+    assertEquals(Main.class, testInvokeVirtual(mains[0]));
+    assertEquals(Main.class, testInvokeVirtual(mains[1]));
+
+    assertEquals(Itf.class, testInvokeInterface(itfs[0]));
+    assertEquals(Itf.class, testInvokeInterface(itfs[1]));
+
+    assertEquals(Itf.class, testInvokeInterface2(itfs[0]));
+    assertEquals(Itf.class, testInvokeInterface2(itfs[1]));
+
+    // This will trigger a deoptimization of the compiled code.
+    assertEquals(OtherSubclass.class, testInvokeVirtual(mains[2]));
+    assertEquals(OtherSubclass.class, testInvokeInterface(itfs[2]));
+    assertEquals(null, testInvokeInterface2(itfs[2]));
+
+    // Run this once to make sure we execute the JITted code.
+    $noinline$testInlineToSameTarget(mains[0]);
+    assertEquals(20001, counter);
+  }
+
+  public Class<?> sameInvokeVirtual() {
+    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo.
+    return Main.class;
+  }
+
+  public Class<?> sameInvokeInterface() {
+    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo.
+    return Itf.class;
+  }
+
+  public Class<?> sameInvokeInterface2() {
+    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo.
+    return Itf.class;
+  }
+
+  public Class<?> sameInvokeInterface3() {
+    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo.
+    return Itf.class;
+  }
+
+  public static Class<?> testInvokeInterface(Itf i) {
+    return i.sameInvokeInterface();
+  }
+
+  public static Class<?> testInvokeInterface2(Itf i) {
+    // Make three interface calls that will do a ClassTableGet to ensure bogus code
+    // generation of ClassTableGet will crash.
+    i.sameInvokeInterface();
+    i.sameInvokeInterface2();
+    return i.sameInvokeInterface3();
+  }
+
+  public static Class<?> testInvokeVirtual(Main m) {
+    return m.sameInvokeVirtual();
+  }
+
+  public static void $noinline$testInlineToSameTarget(Main m) {
+    if (doThrow) throw new Error("");
+    m.increment();
+  }
+
+  public Object field = new Object();
+
+  public static native void ensureJittedAndPolymorphicInline566();
+  public static native void ensureProfilingInfo566();
+
+  public void increment() {
+    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo
+    counter++;
+  }
+  public static int counter = 0;
+  public static boolean doThrow = false;
+}
+
+class Subclass extends Main {
+}
+
+class OtherSubclass extends Main {
+  public Class<?> sameInvokeVirtual() {
+    return OtherSubclass.class;
+  }
+
+  public Class<?> sameInvokeInterface() {
+    return OtherSubclass.class;
+  }
+
+  public Class<?> sameInvokeInterface2() {
+    return null;
+  }
+  public Class<?> sameInvokeInterface3() {
+    return null;
+  }
+}
diff --git a/test/567-checker-compare/expected.txt b/test/567-checker-compare/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/567-checker-compare/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/567-checker-compare/info.txt b/test/567-checker-compare/info.txt
new file mode 100644
index 0000000..5bac7b1
--- /dev/null
+++ b/test/567-checker-compare/info.txt
@@ -0,0 +1 @@
+Unit test for 32-bit and 64-bit compare operations.
diff --git a/test/567-checker-compare/src/Main.java b/test/567-checker-compare/src/Main.java
new file mode 100644
index 0000000..a05bb60
--- /dev/null
+++ b/test/567-checker-compare/src/Main.java
@@ -0,0 +1,952 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static boolean doThrow = false;
+
+  /// CHECK-START: void Main.$opt$noinline$testReplaceInputWithItself(int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<ArgX:i\d+>>   ParameterValue
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK-DAG:     <<Zero:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<Cmp:i\d+>>    InvokeStaticOrDirect [<<ArgX>>,<<Zero>>,<<Method>>] intrinsic:IntegerCompare
+  /// CHECK-DAG:                     GreaterThanOrEqual [<<Cmp>>,<<Zero>>]
+
+  /// CHECK-START: void Main.$opt$noinline$testReplaceInputWithItself(int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgX:i\d+>>   ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>   IntConstant 0
+  /// CHECK-DAG:                     GreaterThanOrEqual [<<ArgX>>,<<Zero>>]
+
+  public static void $opt$noinline$testReplaceInputWithItself(int x) {
+    if (doThrow) { throw new Error(); }
+
+    // The instruction simplifier first replaces Integer.compare(x, 0) with Compare HIR
+    // and then merges the Compare into the GreaterThanOrEqual. This is a regression
+    // test that to check that it is allowed to replace the second input of the
+    // GreaterThanOrEqual, i.e. <<Zero>>, with the very same instruction.
+    if (Integer.compare(x, 0) < 0) {
+      System.out.println("OOOPS");
+    }
+  }
+
+  /// CHECK-START: int Main.compareBooleans(boolean, boolean) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK-DAG:     <<Zero:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>    IntConstant 1
+  /// CHECK-DAG:     <<PhiX:i\d+>>   Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<PhiY:i\d+>>   Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect [<<PhiX>>,<<PhiY>>,<<Method>>] intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareBooleans(boolean, boolean) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Zero:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>    IntConstant 1
+  /// CHECK-DAG:     <<PhiX:i\d+>>   Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<PhiY:i\d+>>   Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<Result:i\d+>> Compare [<<PhiX>>,<<PhiY>>]
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareBooleans(boolean, boolean) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  /// CHECK-START: int Main.compareBooleans(boolean, boolean) select_generator (after)
+  /// CHECK:         <<ArgX:z\d+>>   ParameterValue
+  /// CHECK:         <<ArgY:z\d+>>   ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>    IntConstant 1
+  /// CHECK-DAG:     <<SelX:i\d+>>   Select [<<Zero>>,<<One>>,<<ArgX>>]
+  /// CHECK-DAG:     <<SelY:i\d+>>   Select [<<Zero>>,<<One>>,<<ArgY>>]
+  /// CHECK-DAG:     <<Result:i\d+>> Compare [<<SelX>>,<<SelY>>]
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareBooleans(boolean, boolean) select_generator (after)
+  /// CHECK-NOT:                     Phi
+
+  /// CHECK-START: int Main.compareBooleans(boolean, boolean) instruction_simplifier$after_bce (after)
+  /// CHECK:         <<ArgX:z\d+>>   ParameterValue
+  /// CHECK:         <<ArgY:z\d+>>   ParameterValue
+  /// CHECK-DAG:     <<Result:i\d+>> Compare [<<ArgX>>,<<ArgY>>]
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareBooleans(boolean, boolean) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:                     Select
+
+  private static int compareBooleans(boolean x, boolean y) {
+    return Integer.compare((x ? 1 : 0), (y ? 1 : 0));
+  }
+
+  /// CHECK-START: int Main.compareBytes(byte, byte) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareBytes(byte, byte) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareBytes(byte, byte) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  private static int compareBytes(byte x, byte y) {
+    return Integer.compare(x, y);
+  }
+
+  /// CHECK-START: int Main.compareShorts(short, short) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareShorts(short, short) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareShorts(short, short) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  private static int compareShorts(short x, short y) {
+    return Integer.compare(x, y);
+  }
+
+  /// CHECK-START: int Main.compareChars(char, char) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareChars(char, char) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareChars(char, char) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  private static int compareChars(char x, char y) {
+    return Integer.compare(x, y);
+  }
+
+  /// CHECK-START: int Main.compareInts(int, int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareInts(int, int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareInts(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  private static int compareInts(int x, int y) {
+    return Integer.compare(x, y);
+  }
+
+  /// CHECK-START: int Main.compareLongs(long, long) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:LongCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareLongs(long, long) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareLongs(long, long) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  private static int compareLongs(long x, long y) {
+    return Long.compare(x, y);
+  }
+
+
+  /// CHECK-START: int Main.compareByteShort(byte, short) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareByteShort(byte, short) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareByteShort(byte, short) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  public static int compareByteShort(byte x, short y) {
+    return Integer.compare(x, y);
+  }
+
+  /// CHECK-START: int Main.compareByteChar(byte, char) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareByteChar(byte, char) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareByteChar(byte, char) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  public static int compareByteChar(byte x, char y) {
+    return Integer.compare(x, y);
+  }
+
+  /// CHECK-START: int Main.compareByteInt(byte, int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareByteInt(byte, int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareByteInt(byte, int) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  public static int compareByteInt(byte x, int y) {
+    return Integer.compare(x, y);
+  }
+
+
+  /// CHECK-START: int Main.compareShortByte(short, byte) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareShortByte(short, byte) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareShortByte(short, byte) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  public static int compareShortByte(short x, byte y) {
+    return Integer.compare(x, y);
+  }
+
+  /// CHECK-START: int Main.compareShortChar(short, char) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareShortChar(short, char) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareShortChar(short, char) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  public static int compareShortChar(short x, char y) {
+    return Integer.compare(x, y);
+  }
+
+  /// CHECK-START: int Main.compareShortInt(short, int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareShortInt(short, int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareShortInt(short, int) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  public static int compareShortInt(short x, int y) {
+    return Integer.compare(x, y);
+  }
+
+
+  /// CHECK-START: int Main.compareCharByte(char, byte) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareCharByte(char, byte) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareCharByte(char, byte) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  public static int compareCharByte(char x, byte y) {
+    return Integer.compare(x, y);
+  }
+
+  /// CHECK-START: int Main.compareCharShort(char, short) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareCharShort(char, short) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareCharShort(char, short) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  public static int compareCharShort(char x, short y) {
+    return Integer.compare(x, y);
+  }
+
+  /// CHECK-START: int Main.compareCharInt(char, int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareCharInt(char, int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareCharInt(char, int) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  public static int compareCharInt(char x, int y) {
+    return Integer.compare(x, y);
+  }
+
+
+  /// CHECK-START: int Main.compareIntByte(int, byte) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareIntByte(int, byte) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareIntByte(int, byte) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  public static int compareIntByte(int x, byte y) {
+    return Integer.compare(x, y);
+  }
+
+  /// CHECK-START: int Main.compareIntShort(int, short) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareIntShort(int, short) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareIntShort(int, short) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  public static int compareIntShort(int x, short y) {
+    return Integer.compare(x, y);
+  }
+
+  /// CHECK-START: int Main.compareIntChar(int, char) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerCompare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareIntChar(int, char) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Result:i\d+>> Compare
+  /// CHECK-DAG:                     Return [<<Result>>]
+
+  /// CHECK-START: int Main.compareIntChar(int, char) instruction_simplifier (after)
+  /// CHECK-NOT:                     InvokeStaticOrDirect
+
+  public static int compareIntChar(int x, char y) {
+    return Integer.compare(x, y);
+  }
+
+
+  public static void testCompareBooleans() {
+    expectEquals(-1, compareBooleans(false, true));
+
+    expectEquals(0, compareBooleans(false, false));
+    expectEquals(0, compareBooleans(true, true));
+
+    expectEquals(1, compareBooleans(true, false));
+  }
+
+  public static void testCompareBytes() {
+    expectEquals(-1, compareBytes(Byte.MIN_VALUE, (byte)(Byte.MIN_VALUE + 1)));
+    expectEquals(-1, compareBytes(Byte.MIN_VALUE, (byte)-1));
+    expectEquals(-1, compareBytes(Byte.MIN_VALUE, (byte)0));
+    expectEquals(-1, compareBytes(Byte.MIN_VALUE, (byte)1));
+    expectEquals(-1, compareBytes(Byte.MIN_VALUE, Byte.MAX_VALUE));
+    expectEquals(-1, compareBytes((byte)-1, (byte)0));
+    expectEquals(-1, compareBytes((byte)-1, (byte)1));
+    expectEquals(-1, compareBytes((byte)0, (byte)1));
+
+    expectEquals(0, compareBytes(Byte.MIN_VALUE, Byte.MIN_VALUE));
+    expectEquals(0, compareBytes((byte)-1, (byte)-1));
+    expectEquals(0, compareBytes((byte)0, (byte)0));
+    expectEquals(0, compareBytes((byte)1, (byte)1));
+    expectEquals(0, compareBytes(Byte.MAX_VALUE, Byte.MAX_VALUE));
+
+    expectEquals(1, compareBytes((byte)0, (byte)-1));
+    expectEquals(1, compareBytes((byte)1, (byte)-1));
+    expectEquals(1, compareBytes((byte)1, (byte)0));
+    expectEquals(1, compareBytes(Byte.MAX_VALUE, Byte.MIN_VALUE));
+    expectEquals(1, compareBytes(Byte.MAX_VALUE, (byte)-1));
+    expectEquals(1, compareBytes(Byte.MAX_VALUE, (byte)0));
+    expectEquals(1, compareBytes(Byte.MAX_VALUE, (byte)1));
+    expectEquals(1, compareBytes(Byte.MAX_VALUE, (byte)(Byte.MAX_VALUE - 1)));
+
+    for (byte i = -11; i <= 11; i++) {
+      for (byte j = -11; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareBytes(i, j));
+      }
+    }
+  }
+
+  public static void testCompareShorts() {
+    expectEquals(-1, compareShorts(Short.MIN_VALUE, (short)(Short.MIN_VALUE + 1)));
+    expectEquals(-1, compareShorts(Short.MIN_VALUE, (short)-1));
+    expectEquals(-1, compareShorts(Short.MIN_VALUE, (short)0));
+    expectEquals(-1, compareShorts(Short.MIN_VALUE, (short)1));
+    expectEquals(-1, compareShorts(Short.MIN_VALUE, (short)Short.MAX_VALUE));
+    expectEquals(-1, compareShorts((short)-1, (short)0));
+    expectEquals(-1, compareShorts((short)-1, (short)1));
+    expectEquals(-1, compareShorts((short)0, (short)1));
+
+    expectEquals(0, compareShorts(Short.MIN_VALUE, Short.MIN_VALUE));
+    expectEquals(0, compareShorts((short)-1, (short)-1));
+    expectEquals(0, compareShorts((short)0, (short)0));
+    expectEquals(0, compareShorts((short)1, (short)1));
+    expectEquals(0, compareShorts(Short.MAX_VALUE, Short.MAX_VALUE));
+
+    expectEquals(1, compareShorts((short)0, (short)-1));
+    expectEquals(1, compareShorts((short)1, (short)-1));
+    expectEquals(1, compareShorts((short)1, (short)0));
+    expectEquals(1, compareShorts(Short.MAX_VALUE, Short.MIN_VALUE));
+    expectEquals(1, compareShorts(Short.MAX_VALUE, (short)-1));
+    expectEquals(1, compareShorts(Short.MAX_VALUE, (short)0));
+    expectEquals(1, compareShorts(Short.MAX_VALUE, (short)1));
+    expectEquals(1, compareShorts(Short.MAX_VALUE, (short)(Short.MAX_VALUE - 1)));
+
+    for (short i = -11; i <= 11; i++) {
+      for (short j = -11; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareShorts(i, j));
+      }
+    }
+  }
+
+  public static void testCompareChars() {
+    expectEquals(-1, compareChars((char)0, Character.MAX_VALUE));
+    expectEquals(-1, compareChars((char)0, (char)1));
+
+    expectEquals(0, compareChars((char)0, (char)0));
+    expectEquals(0, compareChars((char)1, (char)1));
+    expectEquals(0, compareChars(Character.MAX_VALUE, Character.MAX_VALUE));
+
+    expectEquals(1, compareChars((char)1, (char)0));
+    expectEquals(1, compareChars(Character.MAX_VALUE, (char)0));
+    expectEquals(1, compareChars(Character.MAX_VALUE, (char)1));
+    expectEquals(1, compareChars(Character.MAX_VALUE, (char)(Character.MAX_VALUE - 1)));
+
+    for (char i = 0; i <= 11; i++) {
+      for (char j = 0; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareChars(i, j));
+      }
+    }
+  }
+
+  public static void testCompareInts() {
+    expectEquals(-1, compareInts(Integer.MIN_VALUE, Integer.MIN_VALUE + 1));
+    expectEquals(-1, compareInts(Integer.MIN_VALUE, -1));
+    expectEquals(-1, compareInts(Integer.MIN_VALUE, 0));
+    expectEquals(-1, compareInts(Integer.MIN_VALUE, 1));
+    expectEquals(-1, compareInts(Integer.MIN_VALUE, Integer.MAX_VALUE));
+    expectEquals(-1, compareInts(-1, 0));
+    expectEquals(-1, compareInts(-1, 1));
+    expectEquals(-1, compareInts(0, 1));
+
+    expectEquals(0, compareInts(Integer.MIN_VALUE, Integer.MIN_VALUE));
+    expectEquals(0, compareInts(-1, -1));
+    expectEquals(0, compareInts(0, 0));
+    expectEquals(0, compareInts(1, 1));
+    expectEquals(0, compareInts(Integer.MAX_VALUE, Integer.MAX_VALUE));
+
+    expectEquals(1, compareInts(0, -1));
+    expectEquals(1, compareInts(1, -1));
+    expectEquals(1, compareInts(1, 0));
+    expectEquals(1, compareInts(Integer.MAX_VALUE, Integer.MIN_VALUE));
+    expectEquals(1, compareInts(Integer.MAX_VALUE, -1));
+    expectEquals(1, compareInts(Integer.MAX_VALUE, 0));
+    expectEquals(1, compareInts(Integer.MAX_VALUE, 1));
+    expectEquals(1, compareInts(Integer.MAX_VALUE, Integer.MAX_VALUE - 1));
+
+    for (int i = -11; i <= 11; i++) {
+      for (int j = -11; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareInts(i, j));
+      }
+    }
+  }
+
+  public static void testCompareLongs() {
+    expectEquals(-1, compareLongs(Long.MIN_VALUE, Long.MIN_VALUE + 1L));
+    expectEquals(-1, compareLongs(Long.MIN_VALUE, -1L));
+    expectEquals(-1, compareLongs(Long.MIN_VALUE, 0L));
+    expectEquals(-1, compareLongs(Long.MIN_VALUE, 1L));
+    expectEquals(-1, compareLongs(Long.MIN_VALUE, Long.MAX_VALUE));
+    expectEquals(-1, compareLongs(-1L, 0L));
+    expectEquals(-1, compareLongs(-1L, 1L));
+    expectEquals(-1, compareLongs(0L, 1L));
+
+    expectEquals(0, compareLongs(Long.MIN_VALUE, Long.MIN_VALUE));
+    expectEquals(0, compareLongs(-1L, -1L));
+    expectEquals(0, compareLongs(0L, 0L));
+    expectEquals(0, compareLongs(1L, 1L));
+    expectEquals(0, compareLongs(Long.MAX_VALUE, Long.MAX_VALUE));
+
+    expectEquals(1, compareLongs(0L, -1L));
+    expectEquals(1, compareLongs(1L, -1L));
+    expectEquals(1, compareLongs(1L, 0L));
+    expectEquals(1, compareLongs(Long.MAX_VALUE, Long.MIN_VALUE));
+    expectEquals(1, compareLongs(Long.MAX_VALUE, -1L));
+    expectEquals(1, compareLongs(Long.MAX_VALUE, 0L));
+    expectEquals(1, compareLongs(Long.MAX_VALUE, 1L));
+    expectEquals(1, compareLongs(Long.MAX_VALUE, Long.MAX_VALUE - 1L));
+
+    expectEquals(-1, compareLongs(0x111111117FFFFFFFL, 0x11111111FFFFFFFFL));
+    expectEquals(0, compareLongs(0x111111117FFFFFFFL, 0x111111117FFFFFFFL));
+    expectEquals(1, compareLongs(0x11111111FFFFFFFFL, 0x111111117FFFFFFFL));
+
+    for (long i = -11L; i <= 11L; i++) {
+      for (long j = -11L; j <= 11L; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareLongs(i, j));
+      }
+    }
+
+    for (long i = Long.MIN_VALUE; i <= Long.MIN_VALUE + 11L; i++) {
+      expectEquals(-1, compareLongs(i, 0));
+    }
+
+    for (long i = Long.MAX_VALUE; i >= Long.MAX_VALUE - 11L; i--) {
+      expectEquals(1, compareLongs(i, 0));
+    }
+  }
+
+
+  public static void testCompareByteShort() {
+    expectEquals(-1, compareByteShort(Byte.MIN_VALUE, (short)-1));
+    expectEquals(-1, compareByteShort(Byte.MIN_VALUE, (short)0));
+    expectEquals(-1, compareByteShort(Byte.MIN_VALUE, (short)1));
+    expectEquals(-1, compareByteShort(Byte.MIN_VALUE, Short.MAX_VALUE));
+    expectEquals(-1, compareByteShort((byte)-1, (short)0));
+    expectEquals(-1, compareByteShort((byte)-1, (short)1));
+    expectEquals(-1, compareByteShort((byte)0, (short)1));
+    expectEquals(-1, compareByteShort(Byte.MAX_VALUE, (short)(Short.MAX_VALUE - 1)));
+    expectEquals(-1, compareByteShort(Byte.MAX_VALUE, Short.MAX_VALUE));
+
+    expectEquals(0, compareByteShort((byte)-1, (short)-1));
+    expectEquals(0, compareByteShort((byte)0, (short)0));
+    expectEquals(0, compareByteShort((byte)1, (short)1));
+
+    expectEquals(1, compareByteShort(Byte.MIN_VALUE, Short.MIN_VALUE));
+    expectEquals(1, compareByteShort(Byte.MIN_VALUE, (short)(Short.MIN_VALUE + 1)));
+    expectEquals(1, compareByteShort((byte)0, (short)-1));
+    expectEquals(1, compareByteShort((byte)1, (short)-1));
+    expectEquals(1, compareByteShort((byte)1, (short)0));
+    expectEquals(1, compareByteShort(Byte.MAX_VALUE, Short.MIN_VALUE));
+    expectEquals(1, compareByteShort(Byte.MAX_VALUE, (short)-1));
+    expectEquals(1, compareByteShort(Byte.MAX_VALUE, (short)0));
+    expectEquals(1, compareByteShort(Byte.MAX_VALUE, (short)1));
+
+    for (byte i = -11; i <= 11; i++) {
+      for (short j = -11; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareByteShort(i, j));
+      }
+    }
+  }
+
+  public static void testCompareByteChar() {
+    expectEquals(-1, compareByteChar(Byte.MIN_VALUE, (char)0));
+    expectEquals(-1, compareByteChar(Byte.MIN_VALUE, (char)1));
+    expectEquals(-1, compareByteChar(Byte.MIN_VALUE, Character.MAX_VALUE));
+    expectEquals(-1, compareByteChar((byte)-1, (char)0));
+    expectEquals(-1, compareByteChar((byte)-1, (char)1));
+    expectEquals(-1, compareByteChar((byte)0, (char)1));
+    expectEquals(-1, compareByteChar(Byte.MAX_VALUE, (char)(Character.MAX_VALUE - 1)));
+    expectEquals(-1, compareByteChar(Byte.MAX_VALUE, Character.MAX_VALUE));
+
+    expectEquals(0, compareByteChar((byte)0, (char)0));
+    expectEquals(0, compareByteChar((byte)1, (char)1));
+
+    expectEquals(1, compareByteChar((byte)1, (char)0));
+    expectEquals(1, compareByteChar(Byte.MAX_VALUE, (char)0));
+    expectEquals(1, compareByteChar(Byte.MAX_VALUE, (char)1));
+
+    for (byte i = -11; i <= 11; i++) {
+      for (char j = 0; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareByteChar(i, j));
+      }
+    }
+  }
+
+  public static void testCompareByteInt() {
+    expectEquals(-1, compareByteInt(Byte.MIN_VALUE, -1));
+    expectEquals(-1, compareByteInt(Byte.MIN_VALUE, 0));
+    expectEquals(-1, compareByteInt(Byte.MIN_VALUE, 1));
+    expectEquals(-1, compareByteInt(Byte.MIN_VALUE, Integer.MAX_VALUE));
+    expectEquals(-1, compareByteInt((byte)-1, 0));
+    expectEquals(-1, compareByteInt((byte)-1, 1));
+    expectEquals(-1, compareByteInt((byte)0, 1));
+    expectEquals(-1, compareByteInt(Byte.MAX_VALUE, Integer.MAX_VALUE - 1));
+    expectEquals(-1, compareByteInt(Byte.MAX_VALUE, Integer.MAX_VALUE));
+
+    expectEquals(0, compareByteInt((byte)-1, -1));
+    expectEquals(0, compareByteInt((byte)0, 0));
+    expectEquals(0, compareByteInt((byte)1, 1));
+
+    expectEquals(1, compareByteInt(Byte.MIN_VALUE, Integer.MIN_VALUE));
+    expectEquals(1, compareByteInt(Byte.MIN_VALUE, Integer.MIN_VALUE + 1));
+    expectEquals(1, compareByteInt((byte)0, -1));
+    expectEquals(1, compareByteInt((byte)1, -1));
+    expectEquals(1, compareByteInt((byte)1, 0));
+    expectEquals(1, compareByteInt(Byte.MAX_VALUE, Integer.MIN_VALUE));
+    expectEquals(1, compareByteInt(Byte.MAX_VALUE, -1));
+    expectEquals(1, compareByteInt(Byte.MAX_VALUE, 0));
+    expectEquals(1, compareByteInt(Byte.MAX_VALUE, 1));
+
+    for (byte i = -11; i <= 11; i++) {
+      for (int j = -11; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareByteInt(i, j));
+      }
+    }
+  }
+
+
+  public static void testCompareShortByte() {
+    expectEquals(-1, compareShortByte(Short.MIN_VALUE, Byte.MIN_VALUE));
+    expectEquals(-1, compareShortByte(Short.MIN_VALUE, (byte)(Byte.MIN_VALUE + 1)));
+    expectEquals(-1, compareShortByte(Short.MIN_VALUE, (byte)-1));
+    expectEquals(-1, compareShortByte(Short.MIN_VALUE, (byte)0));
+    expectEquals(-1, compareShortByte(Short.MIN_VALUE, (byte)1));
+    expectEquals(-1, compareShortByte(Short.MIN_VALUE, Byte.MAX_VALUE));
+    expectEquals(-1, compareShortByte((short)-1, (byte)0));
+    expectEquals(-1, compareShortByte((short)-1, (byte)1));
+    expectEquals(-1, compareShortByte((short)0, (byte)1));
+
+    expectEquals(0, compareShortByte((short)-1, (byte)-1));
+    expectEquals(0, compareShortByte((short)0, (byte)0));
+    expectEquals(0, compareShortByte((short)1, (byte)1));
+
+    expectEquals(1, compareShortByte((short)0, (byte)-1));
+    expectEquals(1, compareShortByte((short)1, (byte)-1));
+    expectEquals(1, compareShortByte((short)1, (byte)0));
+    expectEquals(1, compareShortByte(Short.MAX_VALUE, Byte.MIN_VALUE));
+    expectEquals(1, compareShortByte(Short.MAX_VALUE, (byte)-1));
+    expectEquals(1, compareShortByte(Short.MAX_VALUE, (byte)0));
+    expectEquals(1, compareShortByte(Short.MAX_VALUE, (byte)1));
+    expectEquals(1, compareShortByte(Short.MAX_VALUE, (byte)(Byte.MAX_VALUE - 1)));
+    expectEquals(1, compareShortByte(Short.MAX_VALUE, Byte.MAX_VALUE));
+
+    for (short i = -11; i <= 11; i++) {
+      for (byte j = -11; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareShortByte(i, j));
+      }
+    }
+  }
+
+  public static void testCompareShortChar() {
+    expectEquals(-1, compareShortChar(Short.MIN_VALUE, (char)0));
+    expectEquals(-1, compareShortChar(Short.MIN_VALUE, (char)1));
+    expectEquals(-1, compareShortChar(Short.MIN_VALUE, Character.MAX_VALUE));
+    expectEquals(-1, compareShortChar((short)-1, (char)0));
+    expectEquals(-1, compareShortChar((short)-1, (char)1));
+    expectEquals(-1, compareShortChar((short)0, (char)1));
+    expectEquals(-1, compareShortChar(Short.MAX_VALUE, (char)(Character.MAX_VALUE - 1)));
+    expectEquals(-1, compareShortChar(Short.MAX_VALUE, Character.MAX_VALUE));
+
+    expectEquals(0, compareShortChar((short)0, (char)0));
+    expectEquals(0, compareShortChar((short)1, (char)1));
+
+    expectEquals(1, compareShortChar((short)1, (char)0));
+    expectEquals(1, compareShortChar(Short.MAX_VALUE, (char)0));
+    expectEquals(1, compareShortChar(Short.MAX_VALUE, (char)1));
+
+    for (short i = -11; i <= 11; i++) {
+      for (char j = 0; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareShortChar(i, j));
+      }
+    }
+  }
+
+  public static void testCompareShortInt() {
+    expectEquals(-1, compareShortInt(Short.MIN_VALUE, -1));
+    expectEquals(-1, compareShortInt(Short.MIN_VALUE, 0));
+    expectEquals(-1, compareShortInt(Short.MIN_VALUE, 1));
+    expectEquals(-1, compareShortInt(Short.MIN_VALUE, Integer.MAX_VALUE));
+    expectEquals(-1, compareShortInt((short)-1, 0));
+    expectEquals(-1, compareShortInt((short)-1, 1));
+    expectEquals(-1, compareShortInt((short)0, 1));
+    expectEquals(-1, compareShortInt(Short.MAX_VALUE, Integer.MAX_VALUE - 1));
+    expectEquals(-1, compareShortInt(Short.MAX_VALUE, Integer.MAX_VALUE));
+
+    expectEquals(0, compareShortInt((short)-1, -1));
+    expectEquals(0, compareShortInt((short)0, 0));
+    expectEquals(0, compareShortInt((short)1, 1));
+
+    expectEquals(1, compareShortInt(Short.MIN_VALUE, Integer.MIN_VALUE));
+    expectEquals(1, compareShortInt(Short.MIN_VALUE, Integer.MIN_VALUE + 1));
+    expectEquals(1, compareShortInt((short)0, -1));
+    expectEquals(1, compareShortInt((short)1, -1));
+    expectEquals(1, compareShortInt((short)1, 0));
+    expectEquals(1, compareShortInt(Short.MAX_VALUE, Integer.MIN_VALUE));
+    expectEquals(1, compareShortInt(Short.MAX_VALUE, -1));
+    expectEquals(1, compareShortInt(Short.MAX_VALUE, 0));
+    expectEquals(1, compareShortInt(Short.MAX_VALUE, 1));
+
+    for (short i = -11; i <= 11; i++) {
+      for (int j = -11; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareShortInt(i, j));
+      }
+    }
+  }
+
+
+  public static void testCompareCharByte() {
+    expectEquals(-1, compareCharByte((char)0, (byte)1));
+    expectEquals(-1, compareCharByte((char)0, Byte.MAX_VALUE));
+
+    expectEquals(0, compareCharByte((char)0, (byte)0));
+    expectEquals(0, compareCharByte((char)1, (byte)1));
+
+    expectEquals(1, compareCharByte((char)0, Byte.MIN_VALUE));
+    expectEquals(1, compareCharByte((char)0, (byte)(Byte.MIN_VALUE + 1)));
+    expectEquals(1, compareCharByte((char)0, (byte)-1));
+    expectEquals(1, compareCharByte((char)1, (byte)-1));
+    expectEquals(1, compareCharByte((char)1, (byte)0));
+    expectEquals(1, compareCharByte(Character.MAX_VALUE, Byte.MIN_VALUE));
+    expectEquals(1, compareCharByte(Character.MAX_VALUE, (byte)-1));
+    expectEquals(1, compareCharByte(Character.MAX_VALUE, (byte)0));
+    expectEquals(1, compareCharByte(Character.MAX_VALUE, (byte)1));
+    expectEquals(1, compareCharByte(Character.MAX_VALUE, (byte)(Byte.MAX_VALUE - 1)));
+    expectEquals(1, compareCharByte(Character.MAX_VALUE, Byte.MAX_VALUE));
+
+    for (char i = 0; i <= 11; i++) {
+      for (byte j = -11; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareCharByte(i, j));
+      }
+    }
+  }
+
+  public static void testCompareCharShort() {
+    expectEquals(-1, compareCharShort((char)0, (short)1));
+    expectEquals(-1, compareCharShort((char)0, Short.MAX_VALUE));
+
+    expectEquals(0, compareCharShort((char)0, (short)0));
+    expectEquals(0, compareCharShort((char)1, (short)1));
+
+    expectEquals(1, compareCharShort((char)0, Short.MIN_VALUE));
+    expectEquals(1, compareCharShort((char)0, (short)(Short.MIN_VALUE + 1)));
+    expectEquals(1, compareCharShort((char)0, (short)-1));
+    expectEquals(1, compareCharShort((char)1, (short)-1));
+    expectEquals(1, compareCharShort((char)1, (short)0));
+    expectEquals(1, compareCharShort(Character.MAX_VALUE, Short.MIN_VALUE));
+    expectEquals(1, compareCharShort(Character.MAX_VALUE, (short)-1));
+    expectEquals(1, compareCharShort(Character.MAX_VALUE, (short)0));
+    expectEquals(1, compareCharShort(Character.MAX_VALUE, (short)1));
+    expectEquals(1, compareCharShort(Character.MAX_VALUE, (short)(Short.MAX_VALUE - 1)));
+    expectEquals(1, compareCharShort(Character.MAX_VALUE, Short.MAX_VALUE));
+
+    for (char i = 0; i <= 11; i++) {
+      for (short j = -11; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareCharShort(i, j));
+      }
+    }
+  }
+
+  public static void testCompareCharInt() {
+    expectEquals(-1, compareCharInt((char)0, 1));
+    expectEquals(-1, compareCharInt((char)0, Integer.MAX_VALUE));
+    expectEquals(-1, compareCharInt(Character.MAX_VALUE, Integer.MAX_VALUE - 1));
+    expectEquals(-1, compareCharInt(Character.MAX_VALUE, Integer.MAX_VALUE));
+
+    expectEquals(0, compareCharInt((char)0, 0));
+    expectEquals(0, compareCharInt((char)1, 1));
+
+    expectEquals(1, compareCharInt((char)0, Integer.MIN_VALUE));
+    expectEquals(1, compareCharInt((char)0, Integer.MIN_VALUE + 1));
+    expectEquals(1, compareCharInt((char)0, -1));
+    expectEquals(1, compareCharInt((char)1, -1));
+    expectEquals(1, compareCharInt((char)1, 0));
+    expectEquals(1, compareCharInt(Character.MAX_VALUE, Integer.MIN_VALUE));
+    expectEquals(1, compareCharInt(Character.MAX_VALUE, -1));
+    expectEquals(1, compareCharInt(Character.MAX_VALUE, 0));
+    expectEquals(1, compareCharInt(Character.MAX_VALUE, 1));
+
+    for (char i = 0; i <= 11; i++) {
+      for (int j = -11; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareCharInt(i, j));
+      }
+    }
+  }
+
+
+  public static void testCompareIntByte() {
+    expectEquals(-1, compareIntByte(Integer.MIN_VALUE, Byte.MIN_VALUE));
+    expectEquals(-1, compareIntByte(Integer.MIN_VALUE, (byte)(Byte.MIN_VALUE + 1)));
+    expectEquals(-1, compareIntByte(Integer.MIN_VALUE, (byte)-1));
+    expectEquals(-1, compareIntByte(Integer.MIN_VALUE, (byte)0));
+    expectEquals(-1, compareIntByte(Integer.MIN_VALUE, (byte)1));
+    expectEquals(-1, compareIntByte(Integer.MIN_VALUE, Byte.MAX_VALUE));
+    expectEquals(-1, compareIntByte(-1, (byte)0));
+    expectEquals(-1, compareIntByte(-1, (byte)1));
+    expectEquals(-1, compareIntByte(0, (byte)1));
+
+    expectEquals(0, compareIntByte(-1, (byte)-1));
+    expectEquals(0, compareIntByte(0, (byte)0));
+    expectEquals(0, compareIntByte(1, (byte)1));
+
+    expectEquals(1, compareIntByte(0, (byte)-1));
+    expectEquals(1, compareIntByte(1, (byte)-1));
+    expectEquals(1, compareIntByte(1, (byte)0));
+    expectEquals(1, compareIntByte(Integer.MAX_VALUE, Byte.MIN_VALUE));
+    expectEquals(1, compareIntByte(Integer.MAX_VALUE, (byte)-1));
+    expectEquals(1, compareIntByte(Integer.MAX_VALUE, (byte)0));
+    expectEquals(1, compareIntByte(Integer.MAX_VALUE, (byte)1));
+    expectEquals(1, compareIntByte(Integer.MAX_VALUE, (byte)(Byte.MAX_VALUE - 1)));
+    expectEquals(1, compareIntByte(Integer.MAX_VALUE, Byte.MAX_VALUE));
+
+    for (int i = -11; i <= 11; i++) {
+      for (byte j = -11; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareIntByte(i, j));
+      }
+    }
+  }
+
+  public static void testCompareIntShort() {
+    expectEquals(-1, compareIntShort(Integer.MIN_VALUE, Short.MIN_VALUE));
+    expectEquals(-1, compareIntShort(Integer.MIN_VALUE, (short)(Short.MIN_VALUE + 1)));
+    expectEquals(-1, compareIntShort(Integer.MIN_VALUE, (short)-1));
+    expectEquals(-1, compareIntShort(Integer.MIN_VALUE, (short)0));
+    expectEquals(-1, compareIntShort(Integer.MIN_VALUE, (short)1));
+    expectEquals(-1, compareIntShort(Integer.MIN_VALUE, Short.MAX_VALUE));
+    expectEquals(-1, compareIntShort(-1, (short)0));
+    expectEquals(-1, compareIntShort(-1, (short)1));
+    expectEquals(-1, compareIntShort(0, (short)1));
+
+    expectEquals(0, compareIntShort(-1, (short)-1));
+    expectEquals(0, compareIntShort(0, (short)0));
+    expectEquals(0, compareIntShort(1, (short)1));
+
+    expectEquals(1, compareIntShort(0, (short)-1));
+    expectEquals(1, compareIntShort(1, (short)-1));
+    expectEquals(1, compareIntShort(1, (short)0));
+    expectEquals(1, compareIntShort(Integer.MAX_VALUE, Short.MIN_VALUE));
+    expectEquals(1, compareIntShort(Integer.MAX_VALUE, (short)-1));
+    expectEquals(1, compareIntShort(Integer.MAX_VALUE, (short)0));
+    expectEquals(1, compareIntShort(Integer.MAX_VALUE, (short)1));
+    expectEquals(1, compareIntShort(Integer.MAX_VALUE, (short)(Short.MAX_VALUE - 1)));
+    expectEquals(1, compareIntShort(Integer.MAX_VALUE, Short.MAX_VALUE));
+
+    for (int i = -11; i <= 11; i++) {
+      for (short j = -11; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareIntShort(i, j));
+      }
+    }
+  }
+
+  public static void testCompareIntChar() {
+    expectEquals(-1, compareIntChar(Integer.MIN_VALUE, (char)0));
+    expectEquals(-1, compareIntChar(Integer.MIN_VALUE, (char)1));
+    expectEquals(-1, compareIntChar(Integer.MIN_VALUE, Character.MAX_VALUE));
+    expectEquals(-1, compareIntChar(-1, (char)0));
+    expectEquals(-1, compareIntChar(-1, (char)1));
+    expectEquals(-1, compareIntChar(0, (char)1));
+
+    expectEquals(0, compareIntChar(0, (char)0));
+    expectEquals(0, compareIntChar(1, (char)1));
+
+    expectEquals(1, compareIntChar(1, (char)0));
+    expectEquals(1, compareIntChar(Integer.MAX_VALUE, (char)0));
+    expectEquals(1, compareIntChar(Integer.MAX_VALUE, (char)1));
+    expectEquals(1, compareIntChar(Integer.MAX_VALUE, (char)(Character.MAX_VALUE - 1)));
+    expectEquals(1, compareIntChar(Integer.MAX_VALUE, Character.MAX_VALUE));
+
+    for (int i = -11; i <= 11; i++) {
+      for (char j = 0; j <= 11; j++) {
+        int expected = 0;
+        if (i < j) expected = -1;
+        else if (i > j) expected = 1;
+        expectEquals(expected, compareIntChar(i, j));
+      }
+    }
+  }
+
+
+  public static void main(String args[]) {
+    $opt$noinline$testReplaceInputWithItself(42);
+
+    testCompareBooleans();
+    testCompareBytes();
+    testCompareShorts();
+    testCompareChars();
+    testCompareInts();
+    testCompareLongs();
+
+    testCompareByteShort();
+    testCompareByteChar();
+    testCompareByteInt();
+
+    testCompareShortByte();
+    testCompareShortChar();
+    testCompareShortInt();
+
+    testCompareCharByte();
+    testCompareCharShort();
+    testCompareCharInt();
+
+    testCompareIntByte();
+    testCompareIntShort();
+    testCompareIntChar();
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/568-checker-onebit/expected.txt b/test/568-checker-onebit/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/568-checker-onebit/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/568-checker-onebit/info.txt b/test/568-checker-onebit/info.txt
new file mode 100644
index 0000000..c2b5bf8
--- /dev/null
+++ b/test/568-checker-onebit/info.txt
@@ -0,0 +1 @@
+Unit test for 32-bit and 64-bit high/low-bit operations.
diff --git a/test/568-checker-onebit/src/Main.java b/test/568-checker-onebit/src/Main.java
new file mode 100644
index 0000000..6ce4ffb
--- /dev/null
+++ b/test/568-checker-onebit/src/Main.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.hi32(int) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerHighestOneBit
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int hi32(int x) {
+    return Integer.highestOneBit(x);
+  }
+
+  /// CHECK-START: int Main.lo32(int) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerLowestOneBit
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int lo32(int x) {
+    return Integer.lowestOneBit(x);
+  }
+
+  /// CHECK-START: long Main.hi64(long) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:j\d+>> InvokeStaticOrDirect intrinsic:LongHighestOneBit
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static long hi64(long x) {
+    return Long.highestOneBit(x);
+  }
+
+  /// CHECK-START: long Main.lo64(long) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:j\d+>> InvokeStaticOrDirect intrinsic:LongLowestOneBit
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static long lo64(long x) {
+    return Long.lowestOneBit(x);
+  }
+
+  public static void main(String args[]) {
+    // Hidden zeros.
+    int[] xi = new int[32];
+    long[] xj = new long[64];
+
+    expectEquals32(0x00000000, hi32(0x00000000));
+    expectEquals32(0x00000000, lo32(0x00000000));
+    expectEquals32(0x00010000, hi32(0x00010000));
+    expectEquals32(0x00010000, lo32(0x00010000));
+    expectEquals32(0x00800000, hi32(0x00FF0000));
+    expectEquals32(0x00010000, lo32(0x00FF0000));
+    expectEquals32(0x80000000, hi32(0xFFFFFFFF));
+    expectEquals32(0x00000001, lo32(0xFFFFFFFF));
+
+    for (int i = 0; i < 32; i++) {
+      expectEquals32(0, hi32(xi[i]));
+      expectEquals32(0, lo32(xi[i]));
+      expectEquals32(1 << i, hi32(1 << i));
+      expectEquals32(1 << i, lo32(1 << i));
+      int expected = i < 29 ? 0x8 << i : 0x80000000;
+      expectEquals32(expected, hi32(0xF << i));
+      expectEquals32(0x1 << i, lo32(0xF << i));
+    }
+
+    expectEquals64(0x0000000000000000L, hi64(0x0000000000000000L));
+    expectEquals64(0x0000000000000000L, lo64(0x0000000000000000L));
+    expectEquals64(0x0000000100000000L, hi64(0x0000000100000000L));
+    expectEquals64(0x0000000100000000L, lo64(0x0000000100000000L));
+    expectEquals64(0x0000008000000000L, hi64(0x000000FF00000000L));
+    expectEquals64(0x0000000100000000L, lo64(0x000000FF00000000L));
+    expectEquals64(0x8000000000000000L, hi64(0xFFFFFFFFFFFFFFFFL));
+    expectEquals64(0x0000000000000001L, lo64(0xFFFFFFFFFFFFFFFFL));
+
+    for (int i = 0; i < 64; i++) {
+      expectEquals64(0L, hi64(xj[i]));
+      expectEquals64(0L, lo64(xj[i]));
+      expectEquals64(1L << i, hi64(1L << i));
+      expectEquals64(1L << i, lo64(1L << i));
+      long expected = i < 61 ? 0x8L << i : 0x8000000000000000L;
+      expectEquals64(expected, hi64(0xFL << i));
+      expectEquals64(0x1L << i, lo64(0xFL << i));
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals32(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+  private static void expectEquals64(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/569-checker-pattern-replacement/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/569-checker-pattern-replacement/expected.txt
diff --git a/test/569-checker-pattern-replacement/info.txt b/test/569-checker-pattern-replacement/info.txt
new file mode 100644
index 0000000..4dfa932
--- /dev/null
+++ b/test/569-checker-pattern-replacement/info.txt
@@ -0,0 +1 @@
+Test pattern substitution used when we cannot inline.
diff --git a/test/569-checker-pattern-replacement/multidex.jpp b/test/569-checker-pattern-replacement/multidex.jpp
new file mode 100644
index 0000000..cfc8ad1
--- /dev/null
+++ b/test/569-checker-pattern-replacement/multidex.jpp
@@ -0,0 +1,8 @@
+Main:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Main
+
+BaseInMainDex:
+  @@com.android.jack.annotations.ForceInMainDex
+  class BaseInMainDex
+
diff --git a/test/569-checker-pattern-replacement/run b/test/569-checker-pattern-replacement/run
new file mode 100755
index 0000000..f7e9df2
--- /dev/null
+++ b/test/569-checker-pattern-replacement/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+exec ${RUN} "$@" \
+    -Xcompiler-option --no-inline-from=core-oj,569-checker-pattern-replacement.jar:classes2.dex
diff --git a/test/569-checker-pattern-replacement/src-multidex/Base.java b/test/569-checker-pattern-replacement/src-multidex/Base.java
new file mode 100644
index 0000000..f4d59af
--- /dev/null
+++ b/test/569-checker-pattern-replacement/src-multidex/Base.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Base {
+  Base() {
+    intField = 0;               // Unnecessary IPUT.
+    doubleField = 0.0;          // Unnecessary IPUT.
+    objectField = null;         // Unnecessary IPUT.
+  }
+
+  Base(int intValue) {
+    intField = intValue;
+  }
+
+  Base(String stringValue) {
+    objectField = stringValue;  // Unnecessary IPUT.
+    stringField = stringValue;
+    objectField = null;         // Unnecessary IPUT.
+  }
+
+  Base(double doubleValue, Object objectValue) {
+    doubleField = doubleValue;
+    objectField = objectValue;
+  }
+
+  Base(int intValue, double doubleValue, Object objectValue) {
+    intField = intValue;
+    doubleField = doubleValue;
+    objectField = objectValue;
+  }
+
+  Base(int intValue, double doubleValue, Object objectValue, String stringValue) {
+    // Outside our limit of 3 IPUTs.
+    intField = intValue;
+    doubleField = doubleValue;
+    objectField = objectValue;
+    stringField = stringValue;
+  }
+
+  Base(double doubleValue) {
+    this(doubleValue, null);
+  }
+
+  Base(Object objectValue) {
+    // Unsupported forwarding of a value after a zero.
+    this(0.0, objectValue);
+  }
+
+  Base(int intValue, long dummy) {
+    this(intValue, 0.0, null);
+  }
+
+  public int intField;
+  public double doubleField;
+  public Object objectField;
+  public String stringField;
+}
diff --git a/test/569-checker-pattern-replacement/src-multidex/BaseWithFinalField.java b/test/569-checker-pattern-replacement/src-multidex/BaseWithFinalField.java
new file mode 100644
index 0000000..7a1d591
--- /dev/null
+++ b/test/569-checker-pattern-replacement/src-multidex/BaseWithFinalField.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class BaseWithFinalField {
+  BaseWithFinalField() {
+    intField = 0;
+  }
+
+  BaseWithFinalField(int intValue) {
+    intField = intValue;
+  }
+
+  public final int intField;
+}
diff --git a/test/569-checker-pattern-replacement/src-multidex/Derived.java b/test/569-checker-pattern-replacement/src-multidex/Derived.java
new file mode 100644
index 0000000..184563f
--- /dev/null
+++ b/test/569-checker-pattern-replacement/src-multidex/Derived.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public final class Derived extends Base {
+  public Derived() {
+    this(0);
+  }
+
+  public Derived(int intValue) {
+    super(intValue);
+  }
+
+  public Derived(String stringValue) {
+    super(stringValue);
+    stringField = null;   // Clear field set by Base.<init>(String).
+  }
+
+  public Derived(double doubleValue) {
+    super(doubleValue, null);
+  }
+
+  public Derived(int intValue, double doubleValue, Object objectValue) {
+    super(intValue, doubleValue, objectValue);
+    objectField = null;   // Clear field set by Base.<init>(int, double, Object).
+    intField = 0;         // Clear field set by Base.<init>(int, double, Object).
+  }
+
+  Derived(int intValue, double doubleValue, Object objectValue, String stringValue) {
+    super(intValue, doubleValue, objectValue, stringValue);
+    // Clearing fields here doesn't help because the superclass constructor must
+    // satisfy the pattern constraints on its own and it doesn't (it has 4 IPUTs).
+    intField = 0;
+    doubleField = 0.0;
+    objectField = null;
+    stringField = null;
+  }
+
+  public Derived(float floatValue) {
+    super();
+    floatField = floatValue;
+  }
+
+  public Derived(int intValue, double doubleValue, Object objectValue, float floatValue) {
+    super(intValue, doubleValue, objectValue);
+    objectField = null;   // Clear field set by Base.<init>(int, double, Object).
+    floatField = floatValue;
+  }
+
+  public float floatField;
+}
diff --git a/test/569-checker-pattern-replacement/src-multidex/DerivedInSecondDex.java b/test/569-checker-pattern-replacement/src-multidex/DerivedInSecondDex.java
new file mode 100644
index 0000000..50266e8
--- /dev/null
+++ b/test/569-checker-pattern-replacement/src-multidex/DerivedInSecondDex.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public final class DerivedInSecondDex extends BaseInMainDex {
+  DerivedInSecondDex() {
+    super();
+  }
+
+  DerivedInSecondDex(int intValue) {
+    // Not matched: Superclass in a different dex file has an IPUT.
+    super(intValue);
+  }
+
+  DerivedInSecondDex(long dummy) {
+    // Matched: Superclass in a different dex file has an IPUT that's pruned because we store 0.
+    super(0);
+  }
+}
diff --git a/test/569-checker-pattern-replacement/src-multidex/DerivedWithFinalField.java b/test/569-checker-pattern-replacement/src-multidex/DerivedWithFinalField.java
new file mode 100644
index 0000000..5b39b8a
--- /dev/null
+++ b/test/569-checker-pattern-replacement/src-multidex/DerivedWithFinalField.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public final class DerivedWithFinalField extends BaseWithFinalField {
+  DerivedWithFinalField() {
+    this(0);
+  }
+
+  DerivedWithFinalField(int intValue) {
+    super(intValue);
+    doubleField = 0.0;
+  }
+
+  DerivedWithFinalField(double doubleValue) {
+    super(0);
+    doubleField = doubleValue;
+  }
+
+  DerivedWithFinalField(int intValue, double doubleValue) {
+    super(intValue);
+    doubleField = doubleValue;
+  }
+
+  public final double doubleField;
+}
diff --git a/test/569-checker-pattern-replacement/src-multidex/Second.java b/test/569-checker-pattern-replacement/src-multidex/Second.java
new file mode 100644
index 0000000..cba1dc8
--- /dev/null
+++ b/test/569-checker-pattern-replacement/src-multidex/Second.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public final class Second {
+  public static void staticNop(int unused) { }
+
+  public void nop() { }
+
+  public static Object staticReturnArg2(int unused1, String arg2) {
+    return arg2;
+  }
+
+  public long returnArg1(long arg1) {
+    return arg1;
+  }
+
+  public static int staticReturn9() {
+    return 9;
+  }
+
+  public int return7(Object unused) {
+    return 7;
+  }
+
+  public static String staticReturnNull() {
+    return null;
+  }
+
+  public Object returnNull() {
+    return null;
+  }
+
+  public int getInstanceIntField() {
+    return instanceIntField;
+  }
+
+  public double getInstanceDoubleField(int unused1) {
+    return instanceDoubleField;
+  }
+
+  public Object getInstanceObjectField(long unused1) {
+    return instanceObjectField;
+  }
+
+  public String getInstanceStringField(Object unused1, String unused2, long unused3) {
+    return instanceStringField;
+  }
+
+  public static int staticGetInstanceIntField(Second s) {
+    return s.instanceIntField;
+  }
+
+  public double getInstanceDoubleFieldFromParam(Second s) {
+    return s.instanceDoubleField;
+  }
+
+  public int getStaticIntField() {
+    return staticIntField;
+  }
+
+  public void setInstanceLongField(int ignored, long value) {
+    instanceLongField = value;
+  }
+
+  public int setInstanceLongFieldReturnArg2(long value, int arg2) {
+    instanceLongField = value;
+    return arg2;
+  }
+
+  public static void staticSetInstanceLongField(Second s, long value) {
+    s.instanceLongField = value;
+  }
+
+  public void setInstanceLongFieldThroughParam(Second s, long value) {
+    s.instanceLongField = value;
+  }
+
+  public void setStaticFloatField(float value) {
+    staticFloatField = value;
+  }
+
+  public int instanceIntField = 42;
+  public double instanceDoubleField = -42.0;
+  public Object instanceObjectField = null;
+  public String instanceStringField = "dummy";
+  public long instanceLongField = 0;  // Overwritten by setters.
+
+  public static int staticIntField = 4242;
+  public static float staticFloatField = 0.0f;  // Overwritten by setters.
+}
diff --git a/test/569-checker-pattern-replacement/src/BaseInMainDex.java b/test/569-checker-pattern-replacement/src/BaseInMainDex.java
new file mode 100644
index 0000000..b401540
--- /dev/null
+++ b/test/569-checker-pattern-replacement/src/BaseInMainDex.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class BaseInMainDex {
+  BaseInMainDex() {
+  }
+
+  BaseInMainDex(int intValue) {
+    intField = intValue;
+  }
+
+  public int intField;
+}
diff --git a/test/569-checker-pattern-replacement/src/Main.java b/test/569-checker-pattern-replacement/src/Main.java
new file mode 100644
index 0000000..345e9fd
--- /dev/null
+++ b/test/569-checker-pattern-replacement/src/Main.java
@@ -0,0 +1,1224 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  /// CHECK-START: void Main.staticNop() inliner (before)
+  /// CHECK:                          InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.staticNop() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  public static void staticNop() {
+    Second.staticNop(11);
+  }
+
+  /// CHECK-START: void Main.nop(Second) inliner (before)
+  /// CHECK:                          InvokeVirtual
+
+  /// CHECK-START: void Main.nop(Second) inliner (after)
+  /// CHECK-NOT:                      InvokeVirtual
+
+  public static void nop(Second s) {
+    s.nop();
+  }
+
+  /// CHECK-START: java.lang.Object Main.staticReturnArg2(java.lang.String) inliner (before)
+  /// CHECK-DAG:  <<Value:l\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Ignored:i\d+>>    IntConstant 77
+  /// CHECK-DAG:  <<ClinitCk:l\d+>>   ClinitCheck
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:  <<Invoke:l\d+>>     InvokeStaticOrDirect [<<Ignored>>,<<Value>>{{(,[ij]\d+)?}},<<ClinitCk>>]
+  /// CHECK-DAG:                      Return [<<Invoke>>]
+
+  /// CHECK-START: java.lang.Object Main.staticReturnArg2(java.lang.String) inliner (after)
+  /// CHECK-DAG:  <<Value:l\d+>>      ParameterValue
+  /// CHECK-DAG:                      Return [<<Value>>]
+
+  /// CHECK-START: java.lang.Object Main.staticReturnArg2(java.lang.String) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  public static Object staticReturnArg2(String value) {
+    return Second.staticReturnArg2(77, value);
+  }
+
+  /// CHECK-START: long Main.returnArg1(Second, long) inliner (before)
+  /// CHECK-DAG:  <<Second:l\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Value:j\d+>>      ParameterValue
+  /// CHECK-DAG:  <<NullCk:l\d+>>     NullCheck [<<Second>>]
+  /// CHECK-DAG:  <<Invoke:j\d+>>     InvokeVirtual [<<NullCk>>,<<Value>>]
+  /// CHECK-DAG:                      Return [<<Invoke>>]
+
+  /// CHECK-START: long Main.returnArg1(Second, long) inliner (after)
+  /// CHECK-DAG:  <<Value:j\d+>>      ParameterValue
+  /// CHECK-DAG:                      Return [<<Value>>]
+
+  /// CHECK-START: long Main.returnArg1(Second, long) inliner (after)
+  /// CHECK-NOT:                      InvokeVirtual
+
+  public static long returnArg1(Second s, long value) {
+    return s.returnArg1(value);
+  }
+
+  /// CHECK-START: int Main.staticReturn9() inliner (before)
+  /// CHECK:      {{i\d+}}            InvokeStaticOrDirect
+
+  /// CHECK-START: int Main.staticReturn9() inliner (before)
+  /// CHECK-NOT:                      IntConstant 9
+
+  /// CHECK-START: int Main.staticReturn9() inliner (after)
+  /// CHECK-DAG:  <<Const9:i\d+>>     IntConstant 9
+  /// CHECK-DAG:                      Return [<<Const9>>]
+
+  /// CHECK-START: int Main.staticReturn9() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  public static int staticReturn9() {
+    return Second.staticReturn9();
+  }
+
+  /// CHECK-START: int Main.return7(Second) inliner (before)
+  /// CHECK:      {{i\d+}}            InvokeVirtual
+
+  /// CHECK-START: int Main.return7(Second) inliner (before)
+  /// CHECK-NOT:                      IntConstant 7
+
+  /// CHECK-START: int Main.return7(Second) inliner (after)
+  /// CHECK-DAG:  <<Const7:i\d+>>     IntConstant 7
+  /// CHECK-DAG:                      Return [<<Const7>>]
+
+  /// CHECK-START: int Main.return7(Second) inliner (after)
+  /// CHECK-NOT:                      InvokeVirtual
+
+  public static int return7(Second s) {
+    return s.return7(null);
+  }
+
+  /// CHECK-START: java.lang.String Main.staticReturnNull() inliner (before)
+  /// CHECK:      {{l\d+}}            InvokeStaticOrDirect
+
+  /// CHECK-START: java.lang.String Main.staticReturnNull() inliner (before)
+  /// CHECK-NOT:                      NullConstant
+
+  /// CHECK-START: java.lang.String Main.staticReturnNull() inliner (after)
+  /// CHECK-DAG:  <<Null:l\d+>>       NullConstant
+  /// CHECK-DAG:                      Return [<<Null>>]
+
+  /// CHECK-START: java.lang.String Main.staticReturnNull() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  public static String staticReturnNull() {
+    return Second.staticReturnNull();
+  }
+
+  /// CHECK-START: java.lang.Object Main.returnNull(Second) inliner (before)
+  /// CHECK:      {{l\d+}}            InvokeVirtual
+
+  /// CHECK-START: java.lang.Object Main.returnNull(Second) inliner (before)
+  /// CHECK-NOT:                      NullConstant
+
+  /// CHECK-START: java.lang.Object Main.returnNull(Second) inliner (after)
+  /// CHECK-DAG:  <<Null:l\d+>>       NullConstant
+  /// CHECK-DAG:                      Return [<<Null>>]
+
+  /// CHECK-START: java.lang.Object Main.returnNull(Second) inliner (after)
+  /// CHECK-NOT:                      InvokeVirtual
+
+  public static Object returnNull(Second s) {
+    return s.returnNull();
+  }
+
+  /// CHECK-START: int Main.getInt(Second) inliner (before)
+  /// CHECK:      {{i\d+}}            InvokeVirtual
+
+  /// CHECK-START: int Main.getInt(Second) inliner (after)
+  /// CHECK:      {{i\d+}}            InstanceFieldGet
+
+  /// CHECK-START: int Main.getInt(Second) inliner (after)
+  /// CHECK-NOT:                      InvokeVirtual
+
+  public static int getInt(Second s) {
+    return s.getInstanceIntField();
+  }
+
+  /// CHECK-START: double Main.getDouble(Second) inliner (before)
+  /// CHECK:      {{d\d+}}            InvokeVirtual
+
+  /// CHECK-START: double Main.getDouble(Second) inliner (after)
+  /// CHECK:      {{d\d+}}            InstanceFieldGet
+
+  /// CHECK-START: double Main.getDouble(Second) inliner (after)
+  /// CHECK-NOT:                      InvokeVirtual
+
+  public static double getDouble(Second s) {
+    return s.getInstanceDoubleField(22);
+  }
+
+  /// CHECK-START: java.lang.Object Main.getObject(Second) inliner (before)
+  /// CHECK:      {{l\d+}}            InvokeVirtual
+
+  /// CHECK-START: java.lang.Object Main.getObject(Second) inliner (after)
+  /// CHECK:      {{l\d+}}            InstanceFieldGet
+
+  /// CHECK-START: java.lang.Object Main.getObject(Second) inliner (after)
+  /// CHECK-NOT:                      InvokeVirtual
+
+  public static Object getObject(Second s) {
+    return s.getInstanceObjectField(-1L);
+  }
+
+  /// CHECK-START: java.lang.String Main.getString(Second) inliner (before)
+  /// CHECK:      {{l\d+}}            InvokeVirtual
+
+  /// CHECK-START: java.lang.String Main.getString(Second) inliner (after)
+  /// CHECK:      {{l\d+}}            InstanceFieldGet
+
+  /// CHECK-START: java.lang.String Main.getString(Second) inliner (after)
+  /// CHECK-NOT:                      InvokeVirtual
+
+  public static String getString(Second s) {
+    return s.getInstanceStringField(null, "whatever", 1234L);
+  }
+
+  /// CHECK-START: int Main.staticGetInt(Second) inliner (before)
+  /// CHECK:      {{i\d+}}            InvokeStaticOrDirect
+
+  /// CHECK-START: int Main.staticGetInt(Second) inliner (after)
+  /// CHECK:      {{i\d+}}            InvokeStaticOrDirect
+
+  /// CHECK-START: int Main.staticGetInt(Second) inliner (after)
+  /// CHECK-NOT:                      InstanceFieldGet
+
+  public static int staticGetInt(Second s) {
+    return Second.staticGetInstanceIntField(s);
+  }
+
+  /// CHECK-START: double Main.getDoubleFromParam(Second) inliner (before)
+  /// CHECK:      {{d\d+}}            InvokeVirtual
+
+  /// CHECK-START: double Main.getDoubleFromParam(Second) inliner (after)
+  /// CHECK:      {{d\d+}}            InvokeVirtual
+
+  /// CHECK-START: double Main.getDoubleFromParam(Second) inliner (after)
+  /// CHECK-NOT:                      InstanceFieldGet
+
+  public static double getDoubleFromParam(Second s) {
+    return s.getInstanceDoubleFieldFromParam(s);
+  }
+
+  /// CHECK-START: int Main.getStaticInt(Second) inliner (before)
+  /// CHECK:      {{i\d+}}            InvokeVirtual
+
+  /// CHECK-START: int Main.getStaticInt(Second) inliner (after)
+  /// CHECK:      {{i\d+}}            InvokeVirtual
+
+  /// CHECK-START: int Main.getStaticInt(Second) inliner (after)
+  /// CHECK-NOT:                      InstanceFieldGet
+  /// CHECK-NOT:                      StaticFieldGet
+
+  public static int getStaticInt(Second s) {
+    return s.getStaticIntField();
+  }
+
+  /// CHECK-START: long Main.setLong(Second, long) inliner (before)
+  /// CHECK:                          InvokeVirtual
+
+  /// CHECK-START: long Main.setLong(Second, long) inliner (after)
+  /// CHECK:                          InstanceFieldSet
+
+  /// CHECK-START: long Main.setLong(Second, long) inliner (after)
+  /// CHECK-NOT:                      InvokeVirtual
+
+  public static long setLong(Second s, long value) {
+    s.setInstanceLongField(-1, value);
+    return s.instanceLongField;
+  }
+
+  /// CHECK-START: long Main.setLongReturnArg2(Second, long, int) inliner (before)
+  /// CHECK:                          InvokeVirtual
+
+  /// CHECK-START: long Main.setLongReturnArg2(Second, long, int) inliner (after)
+  /// CHECK-DAG:  <<Second:l\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Value:j\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Arg2:i\d+>>       ParameterValue
+  /// CHECK-DAG:  <<NullCk:l\d+>>     NullCheck [<<Second>>]
+  /// CHECK-DAG:                      InstanceFieldSet [<<NullCk>>,<<Value>>]
+  /// CHECK-DAG:  <<NullCk2:l\d+>>    NullCheck [<<Second>>]
+  /// CHECK-DAG:  <<IGet:j\d+>>       InstanceFieldGet [<<NullCk2>>]
+  /// CHECK-DAG:  <<Conv:j\d+>>       TypeConversion [<<Arg2>>]
+  /// CHECK-DAG:  <<Add:j\d+>>        Add [<<IGet>>,<<Conv>>]
+  /// CHECK-DAG:                      Return [<<Add>>]
+
+  /// CHECK-START: long Main.setLongReturnArg2(Second, long, int) inliner (after)
+  /// CHECK-NOT:                      InvokeVirtual
+
+  public static long setLongReturnArg2(Second s, long value, int arg2) {
+    int result = s.setInstanceLongFieldReturnArg2(value, arg2);
+    return s.instanceLongField + result;
+  }
+
+  /// CHECK-START: long Main.staticSetLong(Second, long) inliner (before)
+  /// CHECK:                          InvokeStaticOrDirect
+
+  /// CHECK-START: long Main.staticSetLong(Second, long) inliner (after)
+  /// CHECK:                          InvokeStaticOrDirect
+
+  /// CHECK-START: long Main.staticSetLong(Second, long) inliner (after)
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static long staticSetLong(Second s, long value) {
+    Second.staticSetInstanceLongField(s, value);
+    return s.instanceLongField;
+  }
+
+  /// CHECK-START: long Main.setLongThroughParam(Second, long) inliner (before)
+  /// CHECK:                          InvokeVirtual
+
+  /// CHECK-START: long Main.setLongThroughParam(Second, long) inliner (after)
+  /// CHECK:                          InvokeVirtual
+
+  /// CHECK-START: long Main.setLongThroughParam(Second, long) inliner (after)
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static long setLongThroughParam(Second s, long value) {
+    s.setInstanceLongFieldThroughParam(s, value);
+    return s.instanceLongField;
+  }
+
+  /// CHECK-START: float Main.setStaticFloat(Second, float) inliner (before)
+  /// CHECK:                          InvokeVirtual
+
+  /// CHECK-START: float Main.setStaticFloat(Second, float) inliner (after)
+  /// CHECK:                          InvokeVirtual
+
+  /// CHECK-START: float Main.setStaticFloat(Second, float) inliner (after)
+  /// CHECK-NOT:                      InstanceFieldSet
+  /// CHECK-NOT:                      StaticFieldSet
+
+  public static float setStaticFloat(Second s, float value) {
+    s.setStaticFloatField(value);
+    return s.staticFloatField;
+  }
+
+  /// CHECK-START: java.lang.Object Main.newObject() inliner (before)
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>{{(,[ij]\d+)?}}] method_name:java.lang.Object.<init>
+
+  /// CHECK-START: java.lang.Object Main.newObject() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  public static Object newObject() {
+    return new Object();
+  }
+
+  /// CHECK-START: double Main.constructBase() inliner (before)
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>{{(,[ij]\d+)?}}] method_name:Base.<init>
+
+  /// CHECK-START: double Main.constructBase() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructBase() {
+    Base b = new Base();
+    return b.intField + b.doubleField;
+  }
+
+  /// CHECK-START: double Main.constructBase(int) inliner (before)
+  /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:Base.<init>
+
+  /// CHECK-START: double Main.constructBase(int) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+
+  /// CHECK-START: double Main.constructBase(int) inliner (after)
+  /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
+
+  /// CHECK-START: double Main.constructBase(int) inliner (after)
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructBase(int intValue) {
+    Base b = new Base(intValue);
+    return b.intField + b.doubleField;
+  }
+
+  /// CHECK-START: double Main.constructBaseWith0() inliner (before)
+  /// CHECK-DAG:  <<Value:i\d+>>      IntConstant 0
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:Base.<init>
+
+  /// CHECK-START: double Main.constructBaseWith0() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructBaseWith0() {
+    Base b = new Base(0);
+    return b.intField + b.doubleField;
+  }
+
+  /// CHECK-START: java.lang.String Main.constructBase(java.lang.String) inliner (before)
+  /// CHECK-DAG:  <<Value:l\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:Base.<init>
+
+  /// CHECK-START: java.lang.String Main.constructBase(java.lang.String) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+
+  /// CHECK-START: java.lang.String Main.constructBase(java.lang.String) inliner (after)
+  /// CHECK-DAG:  <<Value:l\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
+
+  /// CHECK-START: java.lang.String Main.constructBase(java.lang.String) inliner (after)
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static String constructBase(String stringValue) {
+    Base b = new Base(stringValue);
+    return b.stringField;
+  }
+
+  /// CHECK-START: java.lang.String Main.constructBaseWithNullString() inliner (before)
+  /// CHECK-DAG:  <<Null:l\d+>>       NullConstant
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Null>>{{(,[ij]\d+)?}}] method_name:Base.<init>
+
+  /// CHECK-START: java.lang.String Main.constructBaseWithNullString() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+
+  /// CHECK-START: java.lang.String Main.constructBaseWithNullString() inliner (after)
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static String constructBaseWithNullString() {
+    String stringValue = null;
+    Base b = new Base(stringValue);
+    return b.stringField;
+  }
+
+  /// CHECK-START: double Main.constructBase(double, java.lang.Object) inliner (before)
+  /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
+  /// CHECK-DAG:  <<OValue:l\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<DValue>>,<<OValue>>{{(,[ij]\d+)?}}] method_name:Base.<init>
+
+  /// CHECK-START: double Main.constructBase(double, java.lang.Object) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+
+  /// CHECK-START: double Main.constructBase(double, java.lang.Object) inliner (after)
+  /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
+  /// CHECK-DAG:  <<OValue:l\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<DValue>>]
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<OValue>>]
+
+  /// CHECK-START: double Main.constructBase(double, java.lang.Object) inliner (after)
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructBase(double doubleValue, Object objectValue) {
+    Base b = new Base(doubleValue, objectValue);
+    return (b.objectField != null) ? b.doubleField : -b.doubleField;
+  }
+
+  /// CHECK-START: double Main.constructBase(int, double, java.lang.Object) inliner (before)
+  /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
+  /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
+  /// CHECK-DAG:  <<OValue:l\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<IValue>>,<<DValue>>,<<OValue>>{{(,[ij]\d+)?}}] method_name:Base.<init>
+
+  /// CHECK-START: double Main.constructBase(int, double, java.lang.Object) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+
+  /// CHECK-START: double Main.constructBase(int, double, java.lang.Object) inliner (after)
+  /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
+  /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
+  /// CHECK-DAG:  <<OValue:l\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<IValue>>]
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<DValue>>]
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<OValue>>]
+
+  /// CHECK-START: double Main.constructBase(int, double, java.lang.Object) inliner (after)
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructBase(int intValue, double doubleValue, Object objectValue) {
+    Base b = new Base(intValue, doubleValue, objectValue);
+    double tmp = b.intField + b.doubleField;
+    return (b.objectField != null) ? tmp : -tmp;
+  }
+
+  /// CHECK-START: double Main.constructBaseWith0DoubleNull(double) inliner (before)
+  /// CHECK-DAG:  <<IValue:i\d+>>     IntConstant 0
+  /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
+  /// CHECK-DAG:  <<OValue:l\d+>>     NullConstant
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<IValue>>,<<DValue>>,<<OValue>>{{(,[ij]\d+)?}}] method_name:Base.<init>
+
+  /// CHECK-START: double Main.constructBaseWith0DoubleNull(double) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+
+  /// CHECK-START: double Main.constructBaseWith0DoubleNull(double) inliner (after)
+  /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<DValue>>]
+
+  /// CHECK-START: double Main.constructBaseWith0DoubleNull(double) inliner (after)
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructBaseWith0DoubleNull(double doubleValue) {
+    Base b = new Base(0, doubleValue, null);
+    double tmp = b.intField + b.doubleField;
+    return (b.objectField != null) ? tmp : -tmp;
+  }
+
+  /// CHECK-START: double Main.constructBase(int, double, java.lang.Object, java.lang.String) inliner (before)
+  /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
+  /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<IValue>>,<<DValue>>,{{l\d+}},{{l\d+}}{{(,[ij]\d+)?}}] method_name:Base.<init>
+
+  /// CHECK-START: double Main.constructBase(int, double, java.lang.Object, java.lang.String) inliner (after)
+  /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
+  /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<IValue>>,<<DValue>>,{{l\d+}},{{l\d+}}{{(,[ij]\d+)?}}] method_name:Base.<init>
+
+  /// CHECK-START: double Main.constructBase(int, double, java.lang.Object, java.lang.String) inliner (after)
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructBase(
+      int intValue, double doubleValue, Object objectValue, String stringValue) {
+    Base b = new Base(intValue, doubleValue, objectValue, stringValue);
+    double tmp = b.intField + b.doubleField;
+    tmp = (b.objectField != null) ? tmp : -tmp;
+    return (b.stringField != null) ? 2.0 * tmp : 0.5 * tmp;
+  }
+
+  /// CHECK-START: double Main.constructBase(double) inliner (before)
+  /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:Base.<init>
+
+  /// CHECK-START: double Main.constructBase(double) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+
+  /// CHECK-START: double Main.constructBase(double) inliner (after)
+  /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
+
+  /// CHECK-START: double Main.constructBase(double) inliner (after)
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructBase(double doubleValue) {
+    Base b = new Base(doubleValue);
+    return b.intField + b.doubleField;
+  }
+
+  /// CHECK-START: double Main.constructBaseWith0d() inliner (before)
+  /// CHECK-DAG:  <<Value:d\d+>>      DoubleConstant
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:Base.<init>
+
+  /// CHECK-START: double Main.constructBaseWith0d() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructBaseWith0d() {
+    Base b = new Base(0.0);
+    return b.intField + b.doubleField;
+  }
+
+  /// CHECK-START: double Main.constructBase(java.lang.Object) inliner (before)
+  /// CHECK-DAG:  <<OValue:l\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<OValue>>{{(,[ij]\d+)?}}] method_name:Base.<init>
+
+  /// CHECK-START: double Main.constructBase(java.lang.Object) inliner (after)
+  /// CHECK-DAG:  <<OValue:l\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<OValue>>{{(,[ij]\d+)?}}] method_name:Base.<init>
+
+  /// CHECK-START: double Main.constructBase(java.lang.Object) inliner (after)
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructBase(Object objectValue) {
+    Base b = new Base(objectValue);
+    double tmp = b.intField + b.doubleField;
+    return (b.objectField != null) ? tmp + 1.0 : tmp - 1.0;
+  }
+
+  /// CHECK-START: double Main.constructBase(int, long) inliner (before)
+  /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
+  /// CHECK-DAG:  <<JValue:j\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<IValue>>,<<JValue>>{{(,[ij]\d+)?}}] method_name:Base.<init>
+
+  /// CHECK-START: double Main.constructBase(int, long) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+
+  /// CHECK-START: double Main.constructBase(int, long) inliner (after)
+  /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<IValue>>]
+
+  /// CHECK-START: double Main.constructBase(int, long) inliner (after)
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructBase(int intValue, long dummy) {
+    Base b = new Base(intValue, dummy);
+    return b.intField + b.doubleField;
+  }
+
+  /// CHECK-START: double Main.constructDerived() inliner (before)
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>{{(,[ij]\d+)?}}] method_name:Derived.<init>
+
+  /// CHECK-START: double Main.constructDerived() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructDerived() {
+    Derived d = new Derived();
+    return d.intField + d.doubleField;
+  }
+
+  /// CHECK-START: double Main.constructDerived(int) inliner (before)
+  /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:Derived.<init>
+
+  /// CHECK-START: double Main.constructDerived(int) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+
+  /// CHECK-START: double Main.constructDerived(int) inliner (after)
+  /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
+
+  /// CHECK-START: double Main.constructDerived(int) inliner (after)
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructDerived(int intValue) {
+    Derived d = new Derived(intValue);
+    return d.intField + d.doubleField;
+  }
+
+  /// CHECK-START: double Main.constructDerivedWith0() inliner (before)
+  /// CHECK-DAG:  <<Value:i\d+>>      IntConstant 0
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:Derived.<init>
+
+  /// CHECK-START: double Main.constructDerivedWith0() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructDerivedWith0() {
+    Derived d = new Derived(0);
+    return d.intField + d.doubleField;
+  }
+
+  /// CHECK-START: java.lang.String Main.constructDerived(java.lang.String) inliner (before)
+  /// CHECK-DAG:  <<Value:l\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:Derived.<init>
+
+  /// CHECK-START: java.lang.String Main.constructDerived(java.lang.String) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+
+  /// CHECK-START: java.lang.String Main.constructDerived(java.lang.String) inliner (after)
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static String constructDerived(String stringValue) {
+    Derived d = new Derived(stringValue);
+    return d.stringField;
+  }
+
+  /// CHECK-START: double Main.constructDerived(double) inliner (before)
+  /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:Derived.<init>
+
+  /// CHECK-START: double Main.constructDerived(double) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+
+  /// CHECK-START: double Main.constructDerived(double) inliner (after)
+  /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
+
+  /// CHECK-START: double Main.constructDerived(double) inliner (after)
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructDerived(double doubleValue) {
+    Derived d = new Derived(doubleValue);
+    return d.intField + d.doubleField;
+  }
+
+  /// CHECK-START: double Main.constructDerivedWith0d() inliner (before)
+  /// CHECK-DAG:  <<Value:d\d+>>      DoubleConstant
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:Derived.<init>
+
+  /// CHECK-START: double Main.constructDerivedWith0d() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructDerivedWith0d() {
+    Derived d = new Derived(0.0);
+    return d.intField + d.doubleField;
+  }
+
+  /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object) inliner (before)
+  /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
+  /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
+  /// CHECK-DAG:  <<OValue:l\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<IValue>>,<<DValue>>,<<OValue>>{{(,[ij]\d+)?}}] method_name:Derived.<init>
+
+  /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+
+  /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object) inliner (after)
+  /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<DValue>>]
+
+  /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object) inliner (after)
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructDerived(int intValue, double doubleValue, Object objectValue) {
+    Derived d = new Derived(intValue, doubleValue, objectValue);
+    double tmp = d.intField + d.doubleField;
+    return (d.objectField != null) ? tmp : -tmp;
+  }
+
+  /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, java.lang.String) inliner (before)
+  /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
+  /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<IValue>>,<<DValue>>,{{l\d+}},{{l\d+}}{{(,[ij]\d+)?}}] method_name:Derived.<init>
+
+  /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, java.lang.String) inliner (after)
+  /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
+  /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<IValue>>,<<DValue>>,{{l\d+}},{{l\d+}}{{(,[ij]\d+)?}}] method_name:Derived.<init>
+
+  /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, java.lang.String) inliner (after)
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructDerived(
+      int intValue, double doubleValue, Object objectValue, String stringValue) {
+    Derived d = new Derived(intValue, doubleValue, objectValue, stringValue);
+    double tmp = d.intField + d.doubleField;
+    tmp = (d.objectField != null) ? tmp : -tmp;
+    return (d.stringField != null) ? 2.0 * tmp : 0.5 * tmp;
+  }
+
+  /// CHECK-START: double Main.constructDerived(float) inliner (before)
+  /// CHECK-DAG:  <<Value:f\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:Derived.<init>
+
+  /// CHECK-START: double Main.constructDerived(float) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+
+  /// CHECK-START: double Main.constructDerived(float) inliner (after)
+  /// CHECK-DAG:  <<Value:f\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
+
+  /// CHECK-START: double Main.constructDerived(float) inliner (after)
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructDerived(float floatValue) {
+    Derived d = new Derived(floatValue);
+    return d.intField + d.doubleField + d.floatField;
+  }
+
+  /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, float) inliner (before)
+  /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
+  /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
+  /// CHECK-DAG:  <<OValue:l\d+>>     ParameterValue
+  /// CHECK-DAG:  <<FValue:f\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<IValue>>,<<DValue>>,<<OValue>>,<<FValue>>{{(,[ij]\d+)?}}] method_name:Derived.<init>
+
+  /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, float) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+
+  /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, float) inliner (after)
+  /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
+  /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
+  /// CHECK-DAG:  <<FValue:f\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<IValue>>]
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<DValue>>]
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<FValue>>]
+
+  /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, float) inliner (after)
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructDerived(
+      int intValue, double doubleValue, Object objectValue, float floatValue) {
+    Derived d = new Derived(intValue, doubleValue, objectValue, floatValue);
+    double tmp = d.intField + d.doubleField + d.floatField;
+    return (d.objectField != null) ? tmp : -tmp;
+  }
+
+  /// CHECK-START: int Main.constructBaseWithFinalField() inliner (before)
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>{{(,[ij]\d+)?}}] method_name:BaseWithFinalField.<init>
+
+  /// CHECK-START: int Main.constructBaseWithFinalField() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static int constructBaseWithFinalField() {
+    BaseWithFinalField b = new BaseWithFinalField();
+    return b.intField;
+  }
+
+  /// CHECK-START: int Main.constructBaseWithFinalField(int) inliner (before)
+  /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:BaseWithFinalField.<init>
+
+  /// CHECK-START: int Main.constructBaseWithFinalField(int) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  /// CHECK-START: int Main.constructBaseWithFinalField(int) inliner (after)
+  /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
+  /// CHECK-DAG:                      MemoryBarrier
+
+  /// CHECK-START: int Main.constructBaseWithFinalField(int) inliner (after)
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static int constructBaseWithFinalField(int intValue) {
+    BaseWithFinalField b = new BaseWithFinalField(intValue);
+    return b.intField;
+  }
+
+  /// CHECK-START: int Main.constructBaseWithFinalFieldWith0() inliner (before)
+  /// CHECK-DAG:  <<Value:i\d+>>      IntConstant 0
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:BaseWithFinalField.<init>
+
+  /// CHECK-START: int Main.constructBaseWithFinalFieldWith0() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static int constructBaseWithFinalFieldWith0() {
+    BaseWithFinalField b = new BaseWithFinalField(0);
+    return b.intField;
+  }
+
+  /// CHECK-START: double Main.constructDerivedWithFinalField() inliner (before)
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>{{(,[ij]\d+)?}}] method_name:DerivedWithFinalField.<init>
+
+  /// CHECK-START: double Main.constructDerivedWithFinalField() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructDerivedWithFinalField() {
+    DerivedWithFinalField d = new DerivedWithFinalField();
+    return d.intField + d.doubleField;
+  }
+
+  /// CHECK-START: double Main.constructDerivedWithFinalField(int) inliner (before)
+  /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedWithFinalField.<init>
+
+  /// CHECK-START: double Main.constructDerivedWithFinalField(int) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  /// CHECK-START: double Main.constructDerivedWithFinalField(int) inliner (after)
+  /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
+  /// CHECK-DAG:                      MemoryBarrier
+
+  /// CHECK-START: double Main.constructDerivedWithFinalField(int) inliner (after)
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructDerivedWithFinalField(int intValue) {
+    DerivedWithFinalField d = new DerivedWithFinalField(intValue);
+    return d.intField + d.doubleField;
+  }
+
+  /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0() inliner (before)
+  /// CHECK-DAG:  <<Value:i\d+>>      IntConstant 0
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedWithFinalField.<init>
+
+  /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructDerivedWithFinalFieldWith0() {
+    DerivedWithFinalField d = new DerivedWithFinalField(0);
+    return d.intField + d.doubleField;
+  }
+
+  /// CHECK-START: double Main.constructDerivedWithFinalField(double) inliner (before)
+  /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedWithFinalField.<init>
+
+  /// CHECK-START: double Main.constructDerivedWithFinalField(double) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  /// CHECK-START: double Main.constructDerivedWithFinalField(double) inliner (after)
+  /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
+  /// CHECK-DAG:                      MemoryBarrier
+
+  /// CHECK-START: double Main.constructDerivedWithFinalField(double) inliner (after)
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructDerivedWithFinalField(double doubleValue) {
+    DerivedWithFinalField d = new DerivedWithFinalField(doubleValue);
+    return d.intField + d.doubleField;
+  }
+
+  /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0d() inliner (before)
+  /// CHECK-DAG:  <<Value:d\d+>>      DoubleConstant
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedWithFinalField.<init>
+
+  /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0d() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructDerivedWithFinalFieldWith0d() {
+    DerivedWithFinalField d = new DerivedWithFinalField(0.0);
+    return d.intField + d.doubleField;
+  }
+
+  /// CHECK-START: double Main.constructDerivedWithFinalField(int, double) inliner (before)
+  /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
+  /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<IValue>>,<<DValue>>{{(,[ij]\d+)?}}] method_name:DerivedWithFinalField.<init>
+
+  /// CHECK-START: double Main.constructDerivedWithFinalField(int, double) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  /// CHECK-START: double Main.constructDerivedWithFinalField(int, double) inliner (after)
+  /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
+  /// CHECK-DAG:                      MemoryBarrier
+
+  /// CHECK-START: double Main.constructDerivedWithFinalField(int, double) inliner (after)
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-DAG:                      InstanceFieldSet
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  /// CHECK-START: double Main.constructDerivedWithFinalField(int, double) inliner (after)
+  /// CHECK-DAG:                      MemoryBarrier
+  /// CHECK-NOT:                      MemoryBarrier
+
+  public static double constructDerivedWithFinalField(int intValue, double doubleValue) {
+    DerivedWithFinalField d = new DerivedWithFinalField(intValue, doubleValue);
+    return d.intField + d.doubleField;
+  }
+
+  /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0And0d() inliner (before)
+  /// CHECK-DAG:  <<IValue:i\d+>>     IntConstant 0
+  /// CHECK-DAG:  <<DValue:d\d+>>     DoubleConstant
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<IValue>>,<<DValue>>{{(,[ij]\d+)?}}] method_name:DerivedWithFinalField.<init>
+
+  /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0And0d() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static double constructDerivedWithFinalFieldWith0And0d() {
+    DerivedWithFinalField d = new DerivedWithFinalField(0, 0.0);
+    return d.intField + d.doubleField;
+  }
+
+  /// CHECK-START: int Main.constructDerivedInSecondDex() inliner (before)
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>{{(,[ij]\d+)?}}] method_name:DerivedInSecondDex.<init>
+
+  /// CHECK-START: int Main.constructDerivedInSecondDex() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static int constructDerivedInSecondDex() {
+    DerivedInSecondDex d = new DerivedInSecondDex();
+    return d.intField;
+  }
+
+  /// CHECK-START: int Main.constructDerivedInSecondDex(int) inliner (before)
+  /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedInSecondDex.<init>
+
+  /// CHECK-START: int Main.constructDerivedInSecondDex(int) inliner (after)
+  /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedInSecondDex.<init>
+
+  /// CHECK-START: int Main.constructDerivedInSecondDex(int) inliner (after)
+  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static int constructDerivedInSecondDex(int intValue) {
+    DerivedInSecondDex d = new DerivedInSecondDex(intValue);
+    return d.intField;
+  }
+
+  /// CHECK-START: int Main.constructDerivedInSecondDexWith0() inliner (before)
+  /// CHECK-DAG:  <<Value:i\d+>>      IntConstant 0
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedInSecondDex.<init>
+
+  /// CHECK-START: int Main.constructDerivedInSecondDexWith0() inliner (after)
+  /// CHECK-DAG:  <<Value:i\d+>>      IntConstant 0
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedInSecondDex.<init>
+
+  /// CHECK-START: int Main.constructDerivedInSecondDexWith0() inliner (after)
+  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static int constructDerivedInSecondDexWith0() {
+    DerivedInSecondDex d = new DerivedInSecondDex(0);
+    return d.intField;
+  }
+
+  /// CHECK-START: int Main.constructDerivedInSecondDex(long) inliner (before)
+  /// CHECK-DAG:  <<Value:j\d+>>      ParameterValue
+  /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedInSecondDex.<init>
+
+  /// CHECK-START: int Main.constructDerivedInSecondDex(long) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      InstanceFieldSet
+
+  public static int constructDerivedInSecondDex(long dummy) {
+    DerivedInSecondDex d = new DerivedInSecondDex(dummy);
+    return d.intField;
+  }
+
+  public static void main(String[] args) throws Exception {
+    Second s = new Second();
+
+    // Replaced NOP pattern.
+    staticNop();
+    nop(s);
+    // Replaced "return arg" pattern.
+    assertEquals("arbitrary string", staticReturnArg2("arbitrary string"));
+    assertEquals(4321L, returnArg1(s, 4321L));
+    // Replaced "return const" pattern.
+    assertEquals(9, staticReturn9());
+    assertEquals(7, return7(s));
+    assertEquals(null, staticReturnNull());
+    assertEquals(null, returnNull(s));
+    // Replaced IGET pattern.
+    assertEquals(42, getInt(s));
+    assertEquals(-42.0, getDouble(s));
+    assertEquals(null, getObject(s));
+    assertEquals("dummy", getString(s));
+    // Not replaced IGET pattern.
+    assertEquals(42, staticGetInt(s));
+    assertEquals(-42.0, getDoubleFromParam(s));
+    // SGET.
+    assertEquals(4242, getStaticInt(s));
+    // Replaced IPUT pattern.
+    assertEquals(111L, setLong(s, 111L));
+    assertEquals(345L, setLongReturnArg2(s, 222L, 123));
+    // Not replaced IPUT pattern.
+    assertEquals(222L, staticSetLong(s, 222L));
+    assertEquals(333L, setLongThroughParam(s, 333L));
+    // SPUT.
+    assertEquals(-11.5f, setStaticFloat(s, -11.5f));
+
+    if (newObject() == null) {
+      throw new AssertionError("new Object() cannot be null.");
+    }
+
+    assertEquals(0.0, constructBase());
+    assertEquals(42.0, constructBase(42));
+    assertEquals(0.0, constructBaseWith0());
+    assertEquals("something", constructBase("something"));
+    assertEquals(null, constructBaseWithNullString());
+    assertEquals(11.0, constructBase(11.0, new Object()));
+    assertEquals(-12.0, constructBase(12.0, null));
+    assertEquals(30.0, constructBase(17, 13.0, new Object()));
+    assertEquals(-34.0, constructBase(19, 15.0, null));
+    assertEquals(-22.5, constructBaseWith0DoubleNull(22.5));
+    assertEquals(-8.0, constructBase(2, 14.0, null, null));
+    assertEquals(-64.0, constructBase(4, 28.0, null, "dummy"));
+    assertEquals(13.0, constructBase(24, 2.0, new Object(), null));
+    assertEquals(30.0, constructBase(11, 4.0, new Object(), "dummy"));
+    assertEquals(43.0, constructBase(43.0));
+    assertEquals(0.0, constructBaseWith0d());
+    assertEquals(1.0, constructBase(new Object()));
+    assertEquals(-1.0, constructBase((Object) null));
+    assertEquals(123.0, constructBase(123, 65L));
+
+    assertEquals(0.0, constructDerived());
+    assertEquals(73.0, constructDerived(73));
+    assertEquals(0.0, constructDerivedWith0());
+    assertEquals(null, constructDerived("something else"));
+    assertEquals(18.0, constructDerived(18.0));
+    assertEquals(0.0, constructDerivedWith0d());
+    assertEquals(-7.0, constructDerived(5, 7.0, new Object()));
+    assertEquals(-4.0, constructDerived(9, 4.0, null));
+    assertEquals(0.0, constructDerived(1, 9.0, null, null));
+    assertEquals(0.0, constructDerived(2, 8.0, null, "dummy"));
+    assertEquals(0.0, constructDerived(3, 7.0, new Object(), null));
+    assertEquals(0.0, constructDerived(4, 6.0, new Object(), "dummy"));
+    assertEquals(17.0, constructDerived(17.0f));
+    assertEquals(-5.5, constructDerived(6, -7.0, new Object(), 6.5f));
+
+    assertEquals(0, constructBaseWithFinalField());
+    assertEquals(77, constructBaseWithFinalField(77));
+    assertEquals(0, constructBaseWithFinalFieldWith0());
+    assertEquals(0.0, constructDerivedWithFinalField());
+    assertEquals(-33.0, constructDerivedWithFinalField(-33));
+    assertEquals(0.0, constructDerivedWithFinalFieldWith0());
+    assertEquals(-44.0, constructDerivedWithFinalField(-44.0));
+    assertEquals(0.0, constructDerivedWithFinalFieldWith0d());
+    assertEquals(88, constructDerivedWithFinalField(22, 66.0));
+    assertEquals(0.0, constructDerivedWithFinalFieldWith0And0d());
+
+    assertEquals(0, constructDerivedInSecondDex());
+    assertEquals(123, constructDerivedInSecondDex(123));
+    assertEquals(0, constructDerivedInSecondDexWith0());
+    assertEquals(0, constructDerivedInSecondDex(7L));
+  }
+
+  private static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new AssertionError("Wrong result: " + expected + " != " + actual);
+    }
+  }
+
+  private static void assertEquals(double expected, double actual) {
+    if (expected != actual) {
+      throw new AssertionError("Wrong result: " + expected + " != " + actual);
+    }
+  }
+
+  private static void assertEquals(Object expected, Object actual) {
+    if (expected != actual && (expected == null || !expected.equals(actual))) {
+      throw new AssertionError("Wrong result: " + expected + " != " + actual);
+    }
+  }
+}
diff --git a/test/570-checker-osr/expected.txt b/test/570-checker-osr/expected.txt
new file mode 100644
index 0000000..65447be
--- /dev/null
+++ b/test/570-checker-osr/expected.txt
@@ -0,0 +1,6 @@
+JNI_OnLoad called
+100000
+200000
+300000
+400000
+b28210356 passed.
diff --git a/test/530-checker-loops/expected.txt b/test/570-checker-osr/info.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/570-checker-osr/info.txt
diff --git a/test/570-checker-osr/osr.cc b/test/570-checker-osr/osr.cc
new file mode 100644
index 0000000..cf413ba
--- /dev/null
+++ b/test/570-checker-osr/osr.cc
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "art_method-inl.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "jit/profiling_info.h"
+#include "oat_quick_method_header.h"
+#include "scoped_thread_state_change.h"
+#include "ScopedUtfChars.h"
+#include "stack_map.h"
+
+namespace art {
+
+class OsrVisitor : public StackVisitor {
+ public:
+  explicit OsrVisitor(Thread* thread, const char* method_name)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        method_name_(method_name),
+        in_osr_method_(false),
+        in_interpreter_(false) {}
+
+  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtMethod* m = GetMethod();
+    std::string m_name(m->GetName());
+
+    if (m_name.compare(method_name_) == 0) {
+      const OatQuickMethodHeader* header =
+          Runtime::Current()->GetJit()->GetCodeCache()->LookupOsrMethodHeader(m);
+      if (header != nullptr && header == GetCurrentOatQuickMethodHeader()) {
+        in_osr_method_ = true;
+      } else if (IsCurrentFrameInInterpreter()) {
+        in_interpreter_ = true;
+      }
+      return false;
+    }
+    return true;
+  }
+
+  const char* const method_name_;
+  bool in_osr_method_;
+  bool in_interpreter_;
+};
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isInOsrCode(JNIEnv* env,
+                                                            jclass,
+                                                            jstring method_name) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit == nullptr) {
+    // Just return true for non-jit configurations to stop the infinite loop.
+    return JNI_TRUE;
+  }
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
+  ScopedObjectAccess soa(Thread::Current());
+  OsrVisitor visitor(soa.Self(), chars.c_str());
+  visitor.WalkStack();
+  return visitor.in_osr_method_;
+}
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isInInterpreter(JNIEnv* env,
+                                                                jclass,
+                                                                jstring method_name) {
+  if (!Runtime::Current()->UseJitCompilation()) {
+    // The return value is irrelevant if we're not using JIT.
+    return false;
+  }
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
+  ScopedObjectAccess soa(Thread::Current());
+  OsrVisitor visitor(soa.Self(), chars.c_str());
+  visitor.WalkStack();
+  return visitor.in_interpreter_;
+}
+
+class ProfilingInfoVisitor : public StackVisitor {
+ public:
+  explicit ProfilingInfoVisitor(Thread* thread, const char* method_name)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        method_name_(method_name) {}
+
+  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtMethod* m = GetMethod();
+    std::string m_name(m->GetName());
+
+    if (m_name.compare(method_name_) == 0) {
+      ProfilingInfo::Create(Thread::Current(), m, /* retry_allocation */ true);
+      return false;
+    }
+    return true;
+  }
+
+  const char* const method_name_;
+};
+
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureHasProfilingInfo(JNIEnv* env,
+                                                                   jclass,
+                                                                   jstring method_name) {
+  if (!Runtime::Current()->UseJitCompilation()) {
+    return;
+  }
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
+  ScopedObjectAccess soa(Thread::Current());
+  ProfilingInfoVisitor visitor(soa.Self(), chars.c_str());
+  visitor.WalkStack();
+}
+
+class OsrCheckVisitor : public StackVisitor {
+ public:
+  OsrCheckVisitor(Thread* thread, const char* method_name)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        method_name_(method_name) {}
+
+  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtMethod* m = GetMethod();
+    std::string m_name(m->GetName());
+
+    jit::Jit* jit = Runtime::Current()->GetJit();
+    if (m_name.compare(method_name_) == 0) {
+      while (jit->GetCodeCache()->LookupOsrMethodHeader(m) == nullptr) {
+        // Sleep to yield to the compiler thread.
+        usleep(1000);
+        // Will either ensure it's compiled or do the compilation itself.
+        jit->CompileMethod(m, Thread::Current(), /* osr */ true);
+      }
+      return false;
+    }
+    return true;
+  }
+
+  const char* const method_name_;
+};
+
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureHasOsrCode(JNIEnv* env,
+                                                             jclass,
+                                                             jstring method_name) {
+  if (!Runtime::Current()->UseJitCompilation()) {
+    return;
+  }
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
+  ScopedObjectAccess soa(Thread::Current());
+  OsrCheckVisitor visitor(soa.Self(), chars.c_str());
+  visitor.WalkStack();
+}
+
+}  // namespace art
diff --git a/test/570-checker-osr/run b/test/570-checker-osr/run
new file mode 100755
index 0000000..24d69b4
--- /dev/null
+++ b/test/570-checker-osr/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Ensure this test is not subject to code collection.
+exec ${RUN} "$@" --runtime-option -Xjitinitialsize:32M
diff --git a/test/570-checker-osr/smali/Osr.smali b/test/570-checker-osr/smali/Osr.smali
new file mode 100644
index 0000000..6592b7b
--- /dev/null
+++ b/test/570-checker-osr/smali/Osr.smali
@@ -0,0 +1,35 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LOsr;
+
+.super Ljava/lang/Object;
+
+# Check that blocks only havig nops are not merged when they are loop headers.
+# This ensures we can do on-stack replacement for branches to those nop blocks.
+
+## CHECK-START: int Osr.simpleLoop(int, int) dead_code_elimination$final (after)
+## CHECK-DAG:                     SuspendCheck loop:<<OuterLoop:B\d+>> outer_loop:none
+## CHECK-DAG:                     SuspendCheck loop:{{B\d+}} outer_loop:<<OuterLoop>>
+.method public static simpleLoop(II)I
+   .registers 3
+   const/16 v0, 0
+   :nop_entry
+   nop
+   :loop_entry
+   add-int v0, v0, v0
+   if-eq v0, v1, :loop_entry
+   if-eq v0, v2, :nop_entry
+   return v0
+.end method
diff --git a/test/570-checker-osr/src/DeoptimizationController.java b/test/570-checker-osr/src/DeoptimizationController.java
new file mode 100644
index 0000000..907d133
--- /dev/null
+++ b/test/570-checker-osr/src/DeoptimizationController.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is a copy of 802-deoptimization/src/DeoptimizationController.java
+// because run-test requires standalone individual test.
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Method;
+
+/**
+ * Controls deoptimization using dalvik.system.VMDebug class.
+ */
+public class DeoptimizationController {
+  private static final String TEMP_FILE_NAME_PREFIX = "test";
+  private static final String TEMP_FILE_NAME_SUFFIX = ".trace";
+
+  private static File createTempFile() throws Exception {
+    try {
+      return  File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+    } catch (IOException e) {
+      System.setProperty("java.io.tmpdir", "/data/local/tmp");
+      try {
+        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+      } catch (IOException e2) {
+        System.setProperty("java.io.tmpdir", "/sdcard");
+        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+      }
+    }
+  }
+
+  public static void startDeoptimization() {
+    File tempFile = null;
+    try {
+      tempFile = createTempFile();
+      String tempFileName = tempFile.getPath();
+
+      VMDebug.startMethodTracing(tempFileName, 0, 0, false, 1000);
+      if (VMDebug.getMethodTracingMode() == 0) {
+        throw new IllegalStateException("Not tracing.");
+      }
+    } catch (Exception exc) {
+      exc.printStackTrace(System.err);
+    } finally {
+      if (tempFile != null) {
+        tempFile.delete();
+      }
+    }
+  }
+
+  public static void stopDeoptimization() {
+    try {
+      VMDebug.stopMethodTracing();
+      if (VMDebug.getMethodTracingMode() != 0) {
+        throw new IllegalStateException("Still tracing.");
+      }
+    } catch (Exception exc) {
+      exc.printStackTrace(System.err);
+    }
+  }
+
+  private static class VMDebug {
+    private static final Method startMethodTracingMethod;
+    private static final Method stopMethodTracingMethod;
+    private static final Method getMethodTracingModeMethod;
+
+    static {
+      try {
+        Class<?> c = Class.forName("dalvik.system.VMDebug");
+        startMethodTracingMethod = c.getDeclaredMethod("startMethodTracing", String.class,
+            Integer.TYPE, Integer.TYPE, Boolean.TYPE, Integer.TYPE);
+        stopMethodTracingMethod = c.getDeclaredMethod("stopMethodTracing");
+        getMethodTracingModeMethod = c.getDeclaredMethod("getMethodTracingMode");
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    public static void startMethodTracing(String filename, int bufferSize, int flags,
+        boolean samplingEnabled, int intervalUs) throws Exception {
+      startMethodTracingMethod.invoke(null, filename, bufferSize, flags, samplingEnabled,
+          intervalUs);
+    }
+    public static void stopMethodTracing() throws Exception {
+      stopMethodTracingMethod.invoke(null);
+    }
+    public static int getMethodTracingMode() throws Exception {
+      return (int) getMethodTracingModeMethod.invoke(null);
+    }
+  }
+}
diff --git a/test/570-checker-osr/src/Main.java b/test/570-checker-osr/src/Main.java
new file mode 100644
index 0000000..8af3894
--- /dev/null
+++ b/test/570-checker-osr/src/Main.java
@@ -0,0 +1,323 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    Thread testThread = new Thread() {
+      public void run() {
+        performTest();
+      }
+    };
+    testThread.start();
+    try {
+      testThread.join(20 * 1000);  // 20s timeout.
+    } catch (InterruptedException ie) {
+      System.out.println("Interrupted.");
+      System.exit(1);
+    }
+    Thread.State state = testThread.getState();
+    if (state != Thread.State.TERMINATED) {
+      System.out.println("Test timed out, current state: " + state);
+      System.exit(1);
+    }
+  }
+
+  public static void performTest() {
+    new SubMain();
+    if ($noinline$returnInt() != 53) {
+      throw new Error("Unexpected return value");
+    }
+    if ($noinline$returnFloat() != 42.2f) {
+      throw new Error("Unexpected return value");
+    }
+    if ($noinline$returnDouble() != Double.longBitsToDouble(0xF000000000001111L)) {
+      throw new Error("Unexpected return value ");
+    }
+    if ($noinline$returnLong() != 0xFFFF000000001111L) {
+      throw new Error("Unexpected return value");
+    }
+
+    try {
+      $noinline$deopt();
+    } catch (Exception e) {}
+    DeoptimizationController.stopDeoptimization();
+
+    $noinline$inlineCache(new Main(), /* isSecondInvocation */ false);
+    if ($noinline$inlineCache(new SubMain(), /* isSecondInvocation */ true) != SubMain.class) {
+      throw new Error("Unexpected return value");
+    }
+
+    $noinline$inlineCache2(new Main(), /* isSecondInvocation */ false);
+    if ($noinline$inlineCache2(new SubMain(), /* isSecondInvocation */ true) != SubMain.class) {
+      throw new Error("Unexpected return value");
+    }
+
+    // Test polymorphic inline cache to the same target (inlineCache3).
+    $noinline$inlineCache3(new Main(), /* isSecondInvocation */ false);
+    $noinline$inlineCache3(new SubMain(), /* isSecondInvocation */ false);
+    if ($noinline$inlineCache3(new SubMain(), /* isSecondInvocation */ true) != null) {
+      throw new Error("Unexpected return value");
+    }
+
+    $noinline$stackOverflow(new Main(), /* isSecondInvocation */ false);
+    $noinline$stackOverflow(new SubMain(), /* isSecondInvocation */ true);
+
+    $opt$noinline$testOsrInlineLoop(null);
+    System.out.println("b28210356 passed.");
+  }
+
+  public static int $noinline$returnInt() {
+    if (doThrow) throw new Error("");
+    int i = 0;
+    for (; i < 100000; ++i) {
+    }
+    while (!isInOsrCode("$noinline$returnInt")) {}
+    System.out.println(i);
+    return 53;
+  }
+
+  public static float $noinline$returnFloat() {
+    if (doThrow) throw new Error("");
+    int i = 0;
+    for (; i < 200000; ++i) {
+    }
+    while (!isInOsrCode("$noinline$returnFloat")) {}
+    System.out.println(i);
+    return 42.2f;
+  }
+
+  public static double $noinline$returnDouble() {
+    if (doThrow) throw new Error("");
+    int i = 0;
+    for (; i < 300000; ++i) {
+    }
+    while (!isInOsrCode("$noinline$returnDouble")) {}
+    System.out.println(i);
+    return Double.longBitsToDouble(0xF000000000001111L);
+  }
+
+  public static long $noinline$returnLong() {
+    if (doThrow) throw new Error("");
+    int i = 0;
+    for (; i < 400000; ++i) {
+    }
+    while (!isInOsrCode("$noinline$returnLong")) {}
+    System.out.println(i);
+    return 0xFFFF000000001111L;
+  }
+
+  public static void $noinline$deopt() {
+    if (doThrow) throw new Error("");
+    int i = 0;
+    for (; i < 100000; ++i) {
+    }
+    while (!isInOsrCode("$noinline$deopt")) {}
+    DeoptimizationController.startDeoptimization();
+  }
+
+  public static Class<?> $noinline$inlineCache(Main m, boolean isSecondInvocation) {
+    // If we are running in non-JIT mode, or were unlucky enough to get this method
+    // already JITted, just return the expected value.
+    if (!isInInterpreter("$noinline$inlineCache")) {
+      return SubMain.class;
+    }
+
+    ensureHasProfilingInfo("$noinline$inlineCache");
+
+    // Ensure that we have OSR code to jump to.
+    if (isSecondInvocation) {
+      ensureHasOsrCode("$noinline$inlineCache");
+    }
+
+    // This call will be optimized in the OSR compiled code
+    // to check and deoptimize if m is not of type 'Main'.
+    Main other = m.inlineCache();
+
+    // Jump to OSR compiled code. The second run
+    // of this method will have 'm' as a SubMain, and the compiled
+    // code we are jumping to will have wrongly optimize other as being a
+    // 'Main'.
+    if (isSecondInvocation) {
+      while (!isInOsrCode("$noinline$inlineCache")) {}
+    }
+
+    // We used to wrongly optimize this call and assume 'other' was a 'Main'.
+    return other.returnClass();
+  }
+
+  public static Class<?> $noinline$inlineCache2(Main m, boolean isSecondInvocation) {
+    // If we are running in non-JIT mode, or were unlucky enough to get this method
+    // already JITted, just return the expected value.
+    if (!isInInterpreter("$noinline$inlineCache2")) {
+      return SubMain.class;
+    }
+
+    ensureHasProfilingInfo("$noinline$inlineCache2");
+
+    // Ensure that we have OSR code to jump to.
+    if (isSecondInvocation) {
+      ensureHasOsrCode("$noinline$inlineCache2");
+    }
+
+    // This call will be optimized in the OSR compiled code
+    // to check and deoptimize if m is not of type 'Main'.
+    Main other = m.inlineCache2();
+
+    // Jump to OSR compiled code. The second run
+    // of this method will have 'm' as a SubMain, and the compiled
+    // code we are jumping to will have wrongly optimize other as being null.
+    if (isSecondInvocation) {
+      while (!isInOsrCode("$noinline$inlineCache2")) {}
+    }
+
+    // We used to wrongly optimize this code and assume 'other' was always null.
+    return (other == null) ? null : other.returnClass();
+  }
+
+  public static Class<?> $noinline$inlineCache3(Main m, boolean isSecondInvocation) {
+    // If we are running in non-JIT mode, or were unlucky enough to get this method
+    // already JITted, just return the expected value.
+    if (!isInInterpreter("$noinline$inlineCache3")) {
+      return null;
+    }
+
+    ensureHasProfilingInfo("$noinline$inlineCache3");
+
+    // Ensure that we have OSR code to jump to.
+    if (isSecondInvocation) {
+      ensureHasOsrCode("$noinline$inlineCache3");
+    }
+
+    // This call will be optimized in the OSR compiled code
+    // to check and deoptimize if m is not of type 'Main'.
+    Main other = m.inlineCache3();
+
+    // Jump to OSR compiled code. The second run
+    // of this method will have 'm' as a SubMain, and the compiled
+    // code we are jumping to will have wrongly optimize other as being null.
+    if (isSecondInvocation) {
+      while (!isInOsrCode("$noinline$inlineCache3")) {}
+    }
+
+    // We used to wrongly optimize this code and assume 'other' was always null.
+    return (other == null) ? null : other.returnClass();
+  }
+
+  public Main inlineCache() {
+    return new Main();
+  }
+
+  public Main inlineCache2() {
+    return null;
+  }
+
+  public Main inlineCache3() {
+    return null;
+  }
+
+  public Class<?> returnClass() {
+    return Main.class;
+  }
+
+  public void otherInlineCache() {
+    return;
+  }
+
+  public static void $noinline$stackOverflow(Main m, boolean isSecondInvocation) {
+    // If we are running in non-JIT mode, or were unlucky enough to get this method
+    // already JITted, just return the expected value.
+    if (!isInInterpreter("$noinline$stackOverflow")) {
+      return;
+    }
+
+    // We need a ProfilingInfo object to populate the 'otherInlineCache' call.
+    ensureHasProfilingInfo("$noinline$stackOverflow");
+
+    if (isSecondInvocation) {
+      // Ensure we have an OSR code and we jump to it.
+      while (!isInOsrCode("$noinline$stackOverflow")) {}
+    }
+
+    for (int i = 0; i < (isSecondInvocation ? 10000000 : 1); ++i) {
+      // The first invocation of $noinline$stackOverflow will populate the inline
+      // cache with Main. The second invocation of the method, will see a SubMain
+      // and will therefore trigger deoptimization.
+      m.otherInlineCache();
+    }
+  }
+
+  public static void $opt$noinline$testOsrInlineLoop(String[] args) {
+    // Regression test for inlining a method with a loop to a method without a loop in OSR mode.
+    if (doThrow) throw new Error();
+    assertIntEquals(12, $opt$inline$testRemoveSuspendCheck(12, 5));
+    // Since we cannot have a loop directly in this method, we need to force the OSR
+    // compilation from native code.
+    ensureHasProfilingInfo("$opt$noinline$testOsrInlineLoop");
+    ensureHasOsrCode("$opt$noinline$testOsrInlineLoop");
+  }
+
+  public static int $opt$inline$testRemoveSuspendCheck(int x, int y) {
+    // For this test we need an inlined loop and have DCE re-run loop analysis
+    // after inlining.
+    while (y > 0) {
+      while ($opt$inline$inlineFalse() || !$opt$inline$inlineTrue()) {
+        x++;
+      }
+      y--;
+    }
+    return x;
+  }
+
+  public static boolean $opt$inline$inlineTrue() {
+    return true;
+  }
+
+  public static boolean $opt$inline$inlineFalse() {
+    return false;
+  }
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static native boolean isInOsrCode(String methodName);
+  public static native boolean isInInterpreter(String methodName);
+  public static native void ensureHasProfilingInfo(String methodName);
+  public static native void ensureHasOsrCode(String methodName);
+
+  public static boolean doThrow = false;
+}
+
+class SubMain extends Main {
+  public Class<?> returnClass() {
+    return SubMain.class;
+  }
+
+  public Main inlineCache() {
+    return new SubMain();
+  }
+
+  public Main inlineCache2() {
+    return new SubMain();
+  }
+
+  public void otherInlineCache() {
+    return;
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/570-checker-select/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/570-checker-select/expected.txt
diff --git a/test/570-checker-select/info.txt b/test/570-checker-select/info.txt
new file mode 100644
index 0000000..6d49532
--- /dev/null
+++ b/test/570-checker-select/info.txt
@@ -0,0 +1 @@
+Tests for HSelect codegens.
diff --git a/test/570-checker-select/src/Main.java b/test/570-checker-select/src/Main.java
new file mode 100644
index 0000000..e0a76ca
--- /dev/null
+++ b/test/570-checker-select/src/Main.java
@@ -0,0 +1,650 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  static boolean doThrow = false;
+
+  /// CHECK-START: int Main.BoolCond_IntVarVar(boolean, int, int) register (after)
+  /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
+
+  /// CHECK-START-ARM64: int Main.BoolCond_IntVarVar(boolean, int, int) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csel ne
+
+  /// CHECK-START-X86_64: int Main.BoolCond_IntVarVar(boolean, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  /// CHECK-START-X86: int Main.BoolCond_IntVarVar(boolean, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  public static int BoolCond_IntVarVar(boolean cond, int x, int y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return cond ? x : y;
+  }
+
+  /// CHECK-START: int Main.BoolCond_IntVarCst(boolean, int) register (after)
+  /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
+
+  /// CHECK-START-ARM64: int Main.BoolCond_IntVarCst(boolean, int) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csinc ne
+
+  /// CHECK-START-X86_64: int Main.BoolCond_IntVarCst(boolean, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  /// CHECK-START-X86: int Main.BoolCond_IntVarCst(boolean, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  public static int BoolCond_IntVarCst(boolean cond, int x) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return cond ? x : 1;
+  }
+
+  /// CHECK-START: int Main.BoolCond_IntCstVar(boolean, int) register (after)
+  /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
+
+  /// CHECK-START-ARM64: int Main.BoolCond_IntCstVar(boolean, int) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csinc eq
+
+  /// CHECK-START-X86_64: int Main.BoolCond_IntCstVar(boolean, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  /// CHECK-START-X86: int Main.BoolCond_IntCstVar(boolean, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  public static int BoolCond_IntCstVar(boolean cond, int y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return cond ? 1 : y;
+  }
+
+  /// CHECK-START: long Main.BoolCond_LongVarVar(boolean, long, long) register (after)
+  /// CHECK:               Select [{{j\d+}},{{j\d+}},{{z\d+}}]
+
+  /// CHECK-START-ARM64: long Main.BoolCond_LongVarVar(boolean, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csel ne
+
+  /// CHECK-START-X86_64: long Main.BoolCond_LongVarVar(boolean, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/neq
+
+  /// CHECK-START-X86: long Main.BoolCond_LongVarVar(boolean, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+  /// CHECK-NEXT:                     cmovnz/ne
+
+  public static long BoolCond_LongVarVar(boolean cond, long x, long y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return cond ? x : y;
+  }
+
+  /// CHECK-START: long Main.BoolCond_LongVarCst(boolean, long) register (after)
+  /// CHECK:               Select [{{j\d+}},{{j\d+}},{{z\d+}}]
+
+  /// CHECK-START-ARM64: long Main.BoolCond_LongVarCst(boolean, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csinc ne
+
+  /// CHECK-START-X86_64: long Main.BoolCond_LongVarCst(boolean, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/neq
+
+  /// CHECK-START-X86: long Main.BoolCond_LongVarCst(boolean, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+  /// CHECK-NEXT:                     cmovnz/ne
+
+  public static long BoolCond_LongVarCst(boolean cond, long x) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return cond ? x : 1L;
+  }
+
+  /// CHECK-START: long Main.BoolCond_LongCstVar(boolean, long) register (after)
+  /// CHECK:               Select [{{j\d+}},{{j\d+}},{{z\d+}}]
+
+  /// CHECK-START-ARM64: long Main.BoolCond_LongCstVar(boolean, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csinc eq
+
+  /// CHECK-START-X86_64: long Main.BoolCond_LongCstVar(boolean, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/neq
+
+  /// CHECK-START-X86: long Main.BoolCond_LongCstVar(boolean, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+  /// CHECK-NEXT:                     cmovnz/ne
+
+  public static long BoolCond_LongCstVar(boolean cond, long y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return cond ? 1L : y;
+  }
+
+  /// CHECK-START: float Main.BoolCond_FloatVarVar(boolean, float, float) register (after)
+  /// CHECK:               Select [{{f\d+}},{{f\d+}},{{z\d+}}]
+
+  /// CHECK-START-ARM64: float Main.BoolCond_FloatVarVar(boolean, float, float) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            fcsel ne
+
+  public static float BoolCond_FloatVarVar(boolean cond, float x, float y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return cond ? x : y;
+  }
+
+  /// CHECK-START: float Main.BoolCond_FloatVarCst(boolean, float) register (after)
+  /// CHECK:               Select [{{f\d+}},{{f\d+}},{{z\d+}}]
+
+  /// CHECK-START-ARM64: float Main.BoolCond_FloatVarCst(boolean, float) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            fcsel ne
+
+  public static float BoolCond_FloatVarCst(boolean cond, float x) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return cond ? x : 1.0f;
+  }
+
+  /// CHECK-START: float Main.BoolCond_FloatCstVar(boolean, float) register (after)
+  /// CHECK:               Select [{{f\d+}},{{f\d+}},{{z\d+}}]
+
+  /// CHECK-START-ARM64: float Main.BoolCond_FloatCstVar(boolean, float) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            fcsel ne
+
+  public static float BoolCond_FloatCstVar(boolean cond, float y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return cond ? 1.0f : y;
+  }
+
+  /// CHECK-START: int Main.IntNonmatCond_IntVarVar(int, int, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  /// CHECK-START-ARM64: int Main.IntNonmatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csel le
+
+  /// CHECK-START-X86_64: int Main.IntNonmatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+
+  /// CHECK-START-X86: int Main.IntNonmatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+
+  public static int IntNonmatCond_IntVarVar(int a, int b, int x, int y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return a > b ? x : y;
+  }
+
+  /// CHECK-START: int Main.IntMatCond_IntVarVar(int, int, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:       <<Sel:i\d+>>  Select [{{i\d+}},{{i\d+}},{{z\d+}}]
+  /// CHECK-NEXT:                     Add [<<Cond>>,<<Sel>>]
+
+  /// CHECK-START-ARM64: int Main.IntMatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:               LessThanOrEqual
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            cset le
+  /// CHECK:               Select
+  /// CHECK-NEXT:            csel le
+
+  /// CHECK-START-X86_64: int Main.IntMatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+
+  /// CHECK-START-X86: int Main.IntMatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+
+  public static int IntMatCond_IntVarVar(int a, int b, int x, int y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    int result = (a > b ? x : y);
+    return result + (a > b ? 0 : 1);
+  }
+
+  /// CHECK-START: long Main.IntNonmatCond_LongVarVar(int, int, long, long) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{j\d+}},{{j\d+}},<<Cond>>]
+
+  /// CHECK-START-ARM64: long Main.IntNonmatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csel le
+
+  /// CHECK-START-X86_64: long Main.IntNonmatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ngq
+
+  /// CHECK-START-X86: long Main.IntNonmatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+  /// CHECK-NEXT:                     cmovle/ng
+
+  public static long IntNonmatCond_LongVarVar(int a, int b, long x, long y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return a > b ? x : y;
+  }
+
+  /// CHECK-START: long Main.IntMatCond_LongVarVar(int, int, long, long) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:            <<Sel1:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:            <<Sel2:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          Add [<<Sel2>>,<<Sel1>>]
+
+  /// CHECK-START-ARM64: long Main.IntMatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:               LessThanOrEqual
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            cset le
+  /// CHECK:               Select
+  /// CHECK-NEXT:            csel le
+
+  /// CHECK-START-X86_64: long Main.IntMatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ngq
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/neq
+
+  /// CHECK-START-X86: long Main.IntMatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     cmovle/ng
+  /// CHECK-NEXT:                     cmovle/ng
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+  /// CHECK-NEXT:                     cmovnz/ne
+
+  public static long IntMatCond_LongVarVar(int a, int b, long x, long y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    long result = (a > b ? x : y);
+    return result + (a > b ? 0L : 1L);
+  }
+
+  /// CHECK-START: long Main.LongNonmatCond_LongVarVar(long, long, long, long) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+
+  /// CHECK-START-ARM64: long Main.LongNonmatCond_LongVarVar(long, long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csel le
+
+  /// CHECK-START-X86_64: long Main.LongNonmatCond_LongVarVar(long, long, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ngq
+
+  public static long LongNonmatCond_LongVarVar(long a, long b, long x, long y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return a > b ? x : y;
+  }
+
+  /// CHECK-START: long Main.LongMatCond_LongVarVar(long, long, long, long) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
+  /// CHECK:            <<Sel1:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:            <<Sel2:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          Add [<<Sel2>>,<<Sel1>>]
+
+  /// CHECK-START-ARM64: long Main.LongMatCond_LongVarVar(long, long, long, long) disassembly (after)
+  /// CHECK:               LessThanOrEqual
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            cset le
+  /// CHECK:               Select
+  /// CHECK-NEXT:            csel le
+
+  /// CHECK-START-X86_64: long Main.LongMatCond_LongVarVar(long, long, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ngq
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/neq
+
+  public static long LongMatCond_LongVarVar(long a, long b, long x, long y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    long result = (a > b ? x : y);
+    return result + (a > b ? 0L : 1L);
+  }
+
+  /// CHECK-START: int Main.FloatLtNonmatCond_IntVarVar(float, float, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  /// CHECK-START-ARM64: int Main.FloatLtNonmatCond_IntVarVar(float, float, int, int) disassembly (after)
+  /// CHECK:               LessThanOrEqual
+  /// CHECK:               Select
+  /// CHECK-NEXT:            fcmp
+  /// CHECK-NEXT:            csel le
+
+  public static int FloatLtNonmatCond_IntVarVar(float a, float b, int x, int y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return a > b ? x : y;
+  }
+
+  /// CHECK-START: int Main.FloatGtNonmatCond_IntVarVar(float, float, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  /// CHECK-START-ARM64: int Main.FloatGtNonmatCond_IntVarVar(float, float, int, int) disassembly (after)
+  /// CHECK:               GreaterThanOrEqual
+  /// CHECK:               Select
+  /// CHECK-NEXT:            fcmp
+  /// CHECK-NEXT:            csel hs
+
+  public static int FloatGtNonmatCond_IntVarVar(float a, float b, int x, int y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return a < b ? x : y;
+  }
+
+  /// CHECK-START: float Main.FloatGtNonmatCond_FloatVarVar(float, float, float, float) register (after)
+  /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:                     Select [{{f\d+}},{{f\d+}},<<Cond>>]
+
+  /// CHECK-START-ARM64: float Main.FloatGtNonmatCond_FloatVarVar(float, float, float, float) disassembly (after)
+  /// CHECK:               GreaterThanOrEqual
+  /// CHECK:               Select
+  /// CHECK-NEXT:            fcmp
+  /// CHECK-NEXT:            fcsel hs
+
+  public static float FloatGtNonmatCond_FloatVarVar(float a, float b, float x, float y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return a < b ? x : y;
+  }
+
+  /// CHECK-START: int Main.FloatLtMatCond_IntVarVar(float, float, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:       <<Sel:i\d+>>  Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     Add [<<Cond>>,<<Sel>>]
+
+  /// CHECK-START-ARM64: int Main.FloatLtMatCond_IntVarVar(float, float, int, int) disassembly (after)
+  /// CHECK:               LessThanOrEqual
+  /// CHECK-NEXT:            fcmp
+  /// CHECK-NEXT:            cset le
+  /// CHECK:               Select
+  /// CHECK-NEXT:            csel le
+
+  public static int FloatLtMatCond_IntVarVar(float a, float b, int x, int y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    int result = (a > b ? x : y);
+    return result + (a > b ? 0 : 1);
+  }
+
+  /// CHECK-START: int Main.FloatGtMatCond_IntVarVar(float, float, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:       <<Sel:i\d+>>  Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     Add [<<Cond>>,<<Sel>>]
+
+  /// CHECK-START-ARM64: int Main.FloatGtMatCond_IntVarVar(float, float, int, int) disassembly (after)
+  /// CHECK:               GreaterThanOrEqual
+  /// CHECK-NEXT:            fcmp
+  /// CHECK-NEXT:            cset hs
+  /// CHECK:               Select
+  /// CHECK-NEXT:            csel hs
+
+  public static int FloatGtMatCond_IntVarVar(float a, float b, int x, int y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    int result = (a < b ? x : y);
+    return result + (a < b ? 0 : 1);
+  }
+
+  /// CHECK-START: float Main.FloatGtMatCond_FloatVarVar(float, float, float, float) register (after)
+  /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual
+  /// CHECK-NEXT:       <<Sel:f\d+>>  Select [{{f\d+}},{{f\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     TypeConversion [<<Cond>>]
+
+  /// CHECK-START-ARM64: float Main.FloatGtMatCond_FloatVarVar(float, float, float, float) disassembly (after)
+  /// CHECK:               GreaterThanOrEqual
+  /// CHECK-NEXT:            fcmp
+  /// CHECK-NEXT:            cset hs
+  /// CHECK:               Select
+  /// CHECK-NEXT:            fcsel hs
+
+  public static float FloatGtMatCond_FloatVarVar(float a, float b, float x, float y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    float result = (a < b ? x : y);
+    return result + (a < b ? 0 : 1);
+  }
+
+  /// CHECK-START: int Main.BoolCond_0_m1(boolean) register (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  /// CHECK-START-ARM64: int Main.BoolCond_0_m1(boolean) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     cmp {{w\d+}}, #0x0 (0)
+  /// CHECK-NEXT:                     csetm {{w\d+}}, eq
+
+  /// CHECK-START-X86_64: int Main.BoolCond_0_m1(boolean) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  /// CHECK-START-X86: int Main.BoolCond_0_m1(boolean) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  public static int BoolCond_0_m1(boolean cond) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return cond ? 0 : -1;
+  }
+
+  /// CHECK-START: int Main.BoolCond_m1_0(boolean) register (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  /// CHECK-START-ARM64: int Main.BoolCond_m1_0(boolean) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     cmp {{w\d+}}, #0x0 (0)
+  /// CHECK-NEXT:                     csetm {{w\d+}}, ne
+
+  /// CHECK-START-X86_64: int Main.BoolCond_m1_0(boolean) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  /// CHECK-START-X86: int Main.BoolCond_m1_0(boolean) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  public static int BoolCond_m1_0(boolean cond) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return cond ? -1 : 0;
+  }
+
+  public static void assertEqual(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  public static void assertEqual(float expected, float actual) {
+    if (expected != actual) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  public static void main(String[] args) {
+    assertEqual(5, BoolCond_IntVarVar(true, 5, 7));
+    assertEqual(7, BoolCond_IntVarVar(false, 5, 7));
+    assertEqual(5, BoolCond_IntVarCst(true, 5));
+    assertEqual(1, BoolCond_IntVarCst(false, 5));
+    assertEqual(1, BoolCond_IntCstVar(true, 7));
+    assertEqual(7, BoolCond_IntCstVar(false, 7));
+
+    assertEqual(5L, BoolCond_LongVarVar(true, 5L, 7L));
+    assertEqual(7L, BoolCond_LongVarVar(false, 5L, 7L));
+    assertEqual(5L, BoolCond_LongVarCst(true, 5L));
+    assertEqual(1L, BoolCond_LongVarCst(false, 5L));
+    assertEqual(1L, BoolCond_LongCstVar(true, 7L));
+    assertEqual(7L, BoolCond_LongCstVar(false, 7L));
+
+    assertEqual(5, BoolCond_FloatVarVar(true, 5, 7));
+    assertEqual(7, BoolCond_FloatVarVar(false, 5, 7));
+    assertEqual(5, BoolCond_FloatVarCst(true, 5));
+    assertEqual(1, BoolCond_FloatVarCst(false, 5));
+    assertEqual(1, BoolCond_FloatCstVar(true, 7));
+    assertEqual(7, BoolCond_FloatCstVar(false, 7));
+
+    assertEqual(5, IntNonmatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(7, IntNonmatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(5, IntMatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(8, IntMatCond_IntVarVar(2, 3, 5, 7));
+
+    assertEqual(5, FloatLtNonmatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(7, FloatLtNonmatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(7, FloatLtNonmatCond_IntVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(7, FloatLtNonmatCond_IntVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatGtNonmatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_IntVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_IntVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatGtNonmatCond_FloatVarVar(2, 3, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_FloatVarVar(3, 2, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_FloatVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_FloatVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatLtMatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(8, FloatLtMatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(8, FloatLtMatCond_IntVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(8, FloatLtMatCond_IntVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatGtMatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(8, FloatGtMatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(8, FloatGtMatCond_IntVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(8, FloatGtMatCond_IntVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatGtMatCond_FloatVarVar(2, 3, 5, 7));
+    assertEqual(8, FloatGtMatCond_FloatVarVar(3, 2, 5, 7));
+    assertEqual(8, FloatGtMatCond_FloatVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(8, FloatGtMatCond_FloatVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(0, BoolCond_0_m1(true));
+    assertEqual(-1, BoolCond_0_m1(false));
+    assertEqual(-1, BoolCond_m1_0(true));
+    assertEqual(0, BoolCond_m1_0(false));
+  }
+}
diff --git a/test/571-irreducible-loop/expected.txt b/test/571-irreducible-loop/expected.txt
new file mode 100644
index 0000000..3a71184
--- /dev/null
+++ b/test/571-irreducible-loop/expected.txt
@@ -0,0 +1 @@
+5.9E-44
diff --git a/test/571-irreducible-loop/info.txt b/test/571-irreducible-loop/info.txt
new file mode 100644
index 0000000..1e0dd02
--- /dev/null
+++ b/test/571-irreducible-loop/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing in the presence of
+an irreducible loop.
diff --git a/test/571-irreducible-loop/smali/IrreducibleLoop.smali b/test/571-irreducible-loop/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..737a18b
--- /dev/null
+++ b/test/571-irreducible-loop/smali/IrreducibleLoop.smali
@@ -0,0 +1,47 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+# Check that on x86 we don't crash because irreducible loops
+# disabled the constant pool optimization.
+.method public static test1(IF)F
+   .registers 5
+   const/16 v0, 1
+   const/16 v1, 42
+
+   if-nez p0, :loop_entry
+   goto :other_loop_pre_entry
+
+   # The then part: beginning of the irreducible loop.
+   :loop_entry
+   if-eqz p0, :exit
+   add-float v2, p1, v1
+   sub-float v2, v2, v1
+   div-float v2, v2, v1
+   mul-float v2, v2, v1
+   :other_loop_entry
+   sub-int p0, p0, v0
+   goto :loop_entry
+
+   # The other block branching to the irreducible loop.
+   # In that block, v4 has no live range.
+   :other_loop_pre_entry
+   goto :other_loop_entry
+
+   :exit
+   return v1
+.end method
diff --git a/test/571-irreducible-loop/src/Main.java b/test/571-irreducible-loop/src/Main.java
new file mode 100644
index 0000000..ff22f67
--- /dev/null
+++ b/test/571-irreducible-loop/src/Main.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    Method m = c.getMethod("test1", int.class, float.class);
+    Object[] arguments = { 42, 31.0f };
+    System.out.println(m.invoke(null, arguments));
+  }
+}
diff --git a/test/572-checker-array-get-regression/expected.txt b/test/572-checker-array-get-regression/expected.txt
new file mode 100644
index 0000000..f7d1ad4
--- /dev/null
+++ b/test/572-checker-array-get-regression/expected.txt
@@ -0,0 +1 @@
+524287
diff --git a/test/572-checker-array-get-regression/info.txt b/test/572-checker-array-get-regression/info.txt
new file mode 100644
index 0000000..d06feee
--- /dev/null
+++ b/test/572-checker-array-get-regression/info.txt
@@ -0,0 +1,3 @@
+Regression test for the ARM64 Baker's read barrier fast path compiler
+instrumentation of array loads with a large constant index, where we
+used to require too many scratch (temporary) registers.
diff --git a/test/572-checker-array-get-regression/src/Main.java b/test/572-checker-array-get-regression/src/Main.java
new file mode 100644
index 0000000..89b97ed
--- /dev/null
+++ b/test/572-checker-array-get-regression/src/Main.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {
+    System.out.println(test().intValue());
+  }
+
+  /// CHECK-START: java.lang.Integer Main.test() builder (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>>    CurrentMethod
+  /// CHECK-DAG:     <<Const2P19:i\d+>>    IntConstant 524288
+  /// CHECK-DAG:     <<ConstM1:i\d+>>      IntConstant -1
+  /// CHECK-DAG:     <<Array:l\d+>>        NewArray [<<Const2P19>>,<<Method>>]
+  /// CHECK-DAG:     <<Length1:i\d+>>      ArrayLength [<<Array>>]
+  /// CHECK-DAG:     <<Index:i\d+>>        Add [<<Length1>>,<<ConstM1>>]
+  /// CHECK-DAG:     <<Length2:i\d+>>      ArrayLength [<<Array>>]
+  /// CHECK-DAG:     <<BoundsCheck:i\d+>>  BoundsCheck [<<Index>>,<<Length2>>]
+  /// CHECK-DAG:     <<LastElement:l\d+>>  ArrayGet [<<Array>>,<<BoundsCheck>>]
+  /// CHECK-DAG:                           Return [<<LastElement>>]
+
+
+  /// CHECK-START: java.lang.Integer Main.test() register (before)
+  /// CHECK-DAG:     <<Method:[ij]\d+>>    CurrentMethod
+  /// CHECK-DAG:     <<Const2P19:i\d+>>    IntConstant 524288
+  /// CHECK-DAG:     <<Const2P19M1:i\d+>>  IntConstant 524287
+  /// CHECK-DAG:     <<Array:l\d+>>        NewArray [<<Const2P19>>,<<Method>>]
+  /// CHECK-DAG:     <<LastElement:l\d+>>  ArrayGet [<<Array>>,<<Const2P19M1>>]
+  /// CHECK-DAG:                           Return [<<LastElement>>]
+
+  public static Integer test() {
+    Integer[] integers = new Integer[1 << 19];
+    initIntegerArray(integers);
+    // Array load with a large constant index (after constant folding
+    // and bounds check elimination).
+    Integer last_integer = integers[integers.length - 1];
+    return last_integer;
+  }
+
+  public static void initIntegerArray(Integer[] integers) {
+    for (int i = 0; i < integers.length; ++i) {
+      integers[i] = new Integer(i);
+    }
+  }
+
+}
diff --git a/test/573-checker-checkcast-regression/expected.txt b/test/573-checker-checkcast-regression/expected.txt
new file mode 100644
index 0000000..b8626c4
--- /dev/null
+++ b/test/573-checker-checkcast-regression/expected.txt
@@ -0,0 +1 @@
+4
diff --git a/test/573-checker-checkcast-regression/info.txt b/test/573-checker-checkcast-regression/info.txt
new file mode 100644
index 0000000..74a6d6e
--- /dev/null
+++ b/test/573-checker-checkcast-regression/info.txt
@@ -0,0 +1,4 @@
+Regression test for the x86-64 Baker's read barrier fast path compiler
+instrumentation of CheckCasts, where we used to use an
+art::x86_64::NearLabel, the range of which was sometimes too short
+with Baker's read barriers enabled.
diff --git a/test/573-checker-checkcast-regression/src/Main.java b/test/573-checker-checkcast-regression/src/Main.java
new file mode 100644
index 0000000..473a2b1
--- /dev/null
+++ b/test/573-checker-checkcast-regression/src/Main.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {
+    Object[] array = { new Integer(1), new Integer(2), new Integer(3) };
+    int result = test(array, 0, 2);
+    System.out.println(result);
+  }
+
+  // This test method uses two integers (`index1` and `index2`) to
+  // force the register allocator to use some high registers (R8-R15)
+  // on x86-64 in the code generated for the first CheckCast (which
+  // converts `new_array` to an `Object[]`), so as to produce code
+  // containing a conditional jump whose offset does not fit in a
+  // NearLabel when using Baker's read barrier fast path (because
+  // x86-64 instructions using these high registers have a larger
+  // encoding).
+  //
+  // The intent of this artifical constraint is to ensure the initial
+  // failure is properly tested by this regression test.
+
+  /// CHECK-START: int Main.test(java.lang.Object, int, int) register (after)
+  /// CHECK-DAG:     CheckCast check_kind:array_object_check
+  /// CHECK-DAG:     CheckCast check_kind:exact_check
+  /// CHECK-DAG:     CheckCast check_kind:exact_check
+
+  static public int test(Object new_array, int index1, int index2) {
+    Object[] objectArray = (Object[]) new_array;
+    Integer integer1 = (Integer) objectArray[index1];
+    Integer integer2 = (Integer) objectArray[index2];
+    return integer1.intValue() + integer2.intValue();
+  }
+
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/574-irreducible-and-constant-area/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/574-irreducible-and-constant-area/expected.txt
diff --git a/test/574-irreducible-and-constant-area/info.txt b/test/574-irreducible-and-constant-area/info.txt
new file mode 100644
index 0000000..e957a5a
--- /dev/null
+++ b/test/574-irreducible-and-constant-area/info.txt
@@ -0,0 +1,3 @@
+Regression test for intrinsics on x86, which used to wrongly assume
+a HInvokeStaticOrDirect must have a special input (does not apply for irreducible
+loops).
diff --git a/test/574-irreducible-and-constant-area/run b/test/574-irreducible-and-constant-area/run
new file mode 100755
index 0000000..ffdbcc9
--- /dev/null
+++ b/test/574-irreducible-and-constant-area/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Don't do relocation, as this affects this test.
+exec ${RUN} "$@" --no-relocate
diff --git a/test/574-irreducible-and-constant-area/smali/IrreducibleLoop.smali b/test/574-irreducible-and-constant-area/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..d7d4346
--- /dev/null
+++ b/test/574-irreducible-and-constant-area/smali/IrreducibleLoop.smali
@@ -0,0 +1,35 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+.method public static simpleLoop(I)I
+   .registers 5
+   const/16 v0, 42
+   const/16 v1, 42
+   const-wide/high16 v2, 0x4000000000000000L
+   if-eq p0, v0, :other_loop_entry
+   :loop_entry
+   invoke-static {v1, v1}, LMain;->$inline$foo(FF)V
+   invoke-static {v2, v3, v2, v3}, LMain;->$inline$foo(DD)V
+   if-ne p0, v0, :exit
+   add-int v0, v0, v0
+   :other_loop_entry
+   add-int v0, v0, v0
+   goto :loop_entry
+   :exit
+   return v0
+.end method
diff --git a/test/574-irreducible-and-constant-area/src/Main.java b/test/574-irreducible-and-constant-area/src/Main.java
new file mode 100644
index 0000000..3cdd924
--- /dev/null
+++ b/test/574-irreducible-and-constant-area/src/Main.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    Method m = c.getMethod("simpleLoop", int.class);
+    Object[] arguments = { 42 };
+    m.invoke(null, arguments);
+  }
+
+  public static void $inline$foo(float a, float b) {
+    Math.abs(a);
+    Math.max(a, b);
+    Math.min(a, b);
+  }
+
+  public static void $inline$foo(double a, double b) {
+    Math.abs(a);
+    Math.max(a, b);
+    Math.min(a, b);
+  }
+}
diff --git a/test/575-checker-isnan/expected.txt b/test/575-checker-isnan/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/575-checker-isnan/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/575-checker-isnan/info.txt b/test/575-checker-isnan/info.txt
new file mode 100644
index 0000000..5c48a6a
--- /dev/null
+++ b/test/575-checker-isnan/info.txt
@@ -0,0 +1 @@
+Unit test for float/double isNaN() operation.
diff --git a/test/575-checker-isnan/src/Main.java b/test/575-checker-isnan/src/Main.java
new file mode 100644
index 0000000..cc71e5e
--- /dev/null
+++ b/test/575-checker-isnan/src/Main.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: boolean Main.isNaN32(float) instruction_simplifier (before)
+  /// CHECK-DAG: <<Result:z\d+>> InvokeStaticOrDirect
+  /// CHECK-DAG: Return [<<Result>>]
+  //
+  /// CHECK-START: boolean Main.isNaN32(float) instruction_simplifier (after)
+  /// CHECK-DAG: <<Result:z\d+>> NotEqual
+  /// CHECK-DAG: Return [<<Result>>]
+  //
+  /// CHECK-START: boolean Main.isNaN32(float) instruction_simplifier (after)
+  /// CHECK-NOT: InvokeStaticOrDirect
+  private static boolean isNaN32(float x) {
+    return Float.isNaN(x);
+  }
+
+  /// CHECK-START: boolean Main.isNaN64(double) instruction_simplifier (before)
+  /// CHECK-DAG: <<Result:z\d+>> InvokeStaticOrDirect
+  /// CHECK-DAG: Return [<<Result>>]
+  //
+  /// CHECK-START: boolean Main.isNaN64(double) instruction_simplifier (after)
+  /// CHECK-DAG: <<Result:z\d+>> NotEqual
+  /// CHECK-DAG: Return [<<Result>>]
+  //
+  /// CHECK-START: boolean Main.isNaN64(double) instruction_simplifier (after)
+  /// CHECK-NOT: InvokeStaticOrDirect
+  private static boolean isNaN64(double x) {
+    return Double.isNaN(x);
+  }
+
+  public static void main(String args[]) {
+    // A few distinct numbers.
+    expectFalse(isNaN32(Float.NEGATIVE_INFINITY));
+    expectFalse(isNaN32(-1.0f));
+    expectFalse(isNaN32(-0.0f));
+    expectFalse(isNaN32(0.0f));
+    expectFalse(isNaN32(1.0f));
+    expectFalse(isNaN32(Float.POSITIVE_INFINITY));
+
+    // A few distinct subnormal numbers.
+    expectFalse(isNaN32(Float.intBitsToFloat(0x00400000)));
+    expectFalse(isNaN32(Float.intBitsToFloat(0x80400000)));
+    expectFalse(isNaN32(Float.intBitsToFloat(0x00000001)));
+    expectFalse(isNaN32(Float.intBitsToFloat(0x80000001)));
+
+    // A few NaN numbers.
+    expectTrue(isNaN32(Float.NaN));
+    expectTrue(isNaN32(0.0f / 0.0f));
+    expectTrue(isNaN32((float)Math.sqrt(-1.0f)));
+    float[] fvals = {
+      Float.intBitsToFloat(0x7f800001),
+      Float.intBitsToFloat(0x7fa00000),
+      Float.intBitsToFloat(0x7fc00000),
+      Float.intBitsToFloat(0x7fffffff),
+      Float.intBitsToFloat(0xff800001),
+      Float.intBitsToFloat(0xffa00000),
+      Float.intBitsToFloat(0xffc00000),
+      Float.intBitsToFloat(0xffffffff)
+    };
+    for (int i = 0; i < fvals.length; i++) {
+      expectTrue(isNaN32(fvals[i]));
+    }
+
+    // A few distinct numbers.
+    expectFalse(isNaN64(Double.NEGATIVE_INFINITY));
+    expectFalse(isNaN32(-1.0f));
+    expectFalse(isNaN64(-0.0d));
+    expectFalse(isNaN64(0.0d));
+    expectFalse(isNaN64(1.0d));
+    expectFalse(isNaN64(Double.POSITIVE_INFINITY));
+
+    // A few distinct subnormal numbers.
+    expectFalse(isNaN64(Double.longBitsToDouble(0x0008000000000000l)));
+    expectFalse(isNaN64(Double.longBitsToDouble(0x8008000000000000l)));
+    expectFalse(isNaN64(Double.longBitsToDouble(0x0000000000000001l)));
+    expectFalse(isNaN64(Double.longBitsToDouble(0x8000000000000001l)));
+
+    // A few NaN numbers.
+    expectTrue(isNaN64(Double.NaN));
+    expectTrue(isNaN64(0.0d / 0.0d));
+    expectTrue(isNaN64(Math.sqrt(-1.0d)));
+    double[] dvals = {
+      Double.longBitsToDouble(0x7ff0000000000001L),
+      Double.longBitsToDouble(0x7ff4000000000000L),
+      Double.longBitsToDouble(0x7ff8000000000000L),
+      Double.longBitsToDouble(0x7fffffffffffffffL),
+      Double.longBitsToDouble(0xfff0000000000001L),
+      Double.longBitsToDouble(0xfff4000000000000L),
+      Double.longBitsToDouble(0xfff8000000000000L),
+      Double.longBitsToDouble(0xffffffffffffffffL)
+    };
+    for (int i = 0; i < dvals.length; i++) {
+      expectTrue(isNaN64(dvals[i]));
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectTrue(boolean value) {
+    if (!value) {
+      throw new Error("Expected True");
+    }
+  }
+
+  private static void expectFalse(boolean value) {
+    if (value) {
+      throw new Error("Expected False");
+    }
+  }
+}
diff --git a/test/455-set-vreg/expected.txt b/test/575-checker-string-init-alias/expected.txt
similarity index 100%
copy from test/455-set-vreg/expected.txt
copy to test/575-checker-string-init-alias/expected.txt
diff --git a/test/575-checker-string-init-alias/info.txt b/test/575-checker-string-init-alias/info.txt
new file mode 100644
index 0000000..a91ea64
--- /dev/null
+++ b/test/575-checker-string-init-alias/info.txt
@@ -0,0 +1,2 @@
+Test for the String.<init> change and deoptimization: make
+sure the compiler knows how to handle dex aliases.
diff --git a/test/575-checker-string-init-alias/smali/TestCase.smali b/test/575-checker-string-init-alias/smali/TestCase.smali
new file mode 100644
index 0000000..ff04b27
--- /dev/null
+++ b/test/575-checker-string-init-alias/smali/TestCase.smali
@@ -0,0 +1,72 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+
+.super Ljava/lang/Object;
+
+.field public static staticField:Ljava/lang/String;
+
+## CHECK-START: void TestCase.testNoAlias(int[], java.lang.String) register (after)
+## CHECK:         <<Null:l\d+>>   NullConstant
+## CHECK:                         Deoptimize env:[[<<Null>>,{{.*]]}}
+## CHECK:                         InvokeStaticOrDirect method_name:java.lang.String.<init>
+.method public static testNoAlias([ILjava/lang/String;)V
+    .registers 6
+    const v1, 0
+    const v2, 1
+    new-instance v0, Ljava/lang/String;
+
+    # Will deoptimize.
+    aget v3, p0, v1
+
+    # Check that we're being executed by the interpreter.
+    invoke-static {}, LMain;->assertIsInterpreted()V
+
+    invoke-direct {v0, p1}, Ljava/lang/String;-><init>(Ljava/lang/String;)V
+
+    sput-object v0, LTestCase;->staticField:Ljava/lang/String;
+
+    # Will throw AIOOBE.
+    aget v3, p0, v2
+
+    return-void
+.end method
+
+## CHECK-START: void TestCase.testAlias(int[], java.lang.String) register (after)
+## CHECK:         <<New:l\d+>>    NewInstance
+## CHECK:                         Deoptimize env:[[<<New>>,<<New>>,{{.*]]}}
+## CHECK:                         InvokeStaticOrDirect method_name:java.lang.String.<init>
+.method public static testAlias([ILjava/lang/String;)V
+    .registers 7
+    const v2, 0
+    const v3, 1
+    new-instance v0, Ljava/lang/String;
+    move-object v1, v0
+
+    # Will deoptimize.
+    aget v4, p0, v2
+
+    # Check that we're being executed by the interpreter.
+    invoke-static {}, LMain;->assertIsInterpreted()V
+
+    invoke-direct {v1, p1}, Ljava/lang/String;-><init>(Ljava/lang/String;)V
+
+    sput-object v1, LTestCase;->staticField:Ljava/lang/String;
+
+    # Will throw AIOOBE.
+    aget v4, p0, v3
+
+    return-void
+.end method
diff --git a/test/575-checker-string-init-alias/src/Main.java b/test/575-checker-string-init-alias/src/Main.java
new file mode 100644
index 0000000..1ab3207
--- /dev/null
+++ b/test/575-checker-string-init-alias/src/Main.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+public class Main {
+  // Workaround for b/18051191.
+  class Inner {}
+
+  public static native void assertIsInterpreted();
+
+  private static void assertEqual(String expected, String actual) {
+    if (!expected.equals(actual)) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  public static void main(String[] args) throws Throwable {
+    System.loadLibrary(args[0]);
+    Class<?> c = Class.forName("TestCase");
+    int[] array = new int[1];
+
+    {
+      Method m = c.getMethod("testNoAlias", int[].class, String.class);
+      try {
+        m.invoke(null, new Object[] { array , "foo" });
+        throw new Error("Expected AIOOBE");
+      } catch (InvocationTargetException e) {
+        if (!(e.getCause() instanceof ArrayIndexOutOfBoundsException)) {
+          throw new Error("Expected AIOOBE");
+        }
+        // Ignore
+      }
+      Field field = c.getField("staticField");
+      assertEqual("foo", (String)field.get(null));
+    }
+
+    {
+      Method m = c.getMethod("testAlias", int[].class, String.class);
+      try {
+        m.invoke(null, new Object[] { array, "bar" });
+        throw new Error("Expected AIOOBE");
+      } catch (InvocationTargetException e) {
+        if (!(e.getCause() instanceof ArrayIndexOutOfBoundsException)) {
+          throw new Error("Expected AIOOBE");
+        }
+        // Ignore
+      }
+      Field field = c.getField("staticField");
+      assertEqual("bar", (String)field.get(null));
+    }
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/576-polymorphic-inlining/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/576-polymorphic-inlining/expected.txt
diff --git a/test/576-polymorphic-inlining/info.txt b/test/576-polymorphic-inlining/info.txt
new file mode 100644
index 0000000..b3ef0c8
--- /dev/null
+++ b/test/576-polymorphic-inlining/info.txt
@@ -0,0 +1 @@
+Test for polymorphic inlining.
diff --git a/test/576-polymorphic-inlining/src/Main.java b/test/576-polymorphic-inlining/src/Main.java
new file mode 100644
index 0000000..5763d89
--- /dev/null
+++ b/test/576-polymorphic-inlining/src/Main.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    for (int i = 0; i < 20000; ++i) {
+      $noinline$testVoid(new Main());
+      $noinline$testVoid(new SubMain());
+      $noinline$testVoid(new SubSubMain());
+
+      $noinline$testWithReturnValue(new Main());
+      $noinline$testWithReturnValue(new SubMain());
+      $noinline$testWithReturnValue(new SubSubMain());
+
+      $noinline$testWithBackEdge(new Main());
+      $noinline$testWithBackEdge(new SubMain());
+      $noinline$testWithBackEdge(new SubSubMain());
+    }
+  }
+
+  public static void assertIdentical(Object expected, Object actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected + ", got " + actual);
+    }
+  }
+
+  public static void $noinline$testVoid(Main m) {
+    if (doThrow) throw new Error("");
+    m.willInlineVoid();
+    m.willOnlyInlineForMainVoid();
+  }
+
+  public static void $noinline$testWithReturnValue(Main m) {
+    if (doThrow) throw new Error("");
+    assertIdentical(m.getClass(), m.willInlineWithReturnValue());
+    assertIdentical(m.getClass(), m.willOnlyInlineForMainWithReturnValue());
+  }
+
+  public static void $noinline$testWithBackEdge(Main m) {
+    if (doThrow) throw new Error("");
+    for (int i = 0; i < 10; ++i) {
+      m.willInlineVoid();
+    }
+    for (int i = 0; i < 10; ++i) {
+      m.willOnlyInlineForMainVoid();
+    }
+  }
+
+  public void willInlineVoid() {
+  }
+
+  public void willOnlyInlineForMainVoid() {
+  }
+
+  public Class<?> willInlineWithReturnValue() {
+    return Main.class;
+  }
+
+  public Class<?> willOnlyInlineForMainWithReturnValue() {
+    return Main.class;
+  }
+  public static boolean doThrow;
+}
+
+class SubMain extends Main {
+  public void willOnlyInlineForMainVoid() {
+    if (doThrow) throw new Error("");
+  }
+
+  public void willInlineVoid() {
+  }
+
+  public Class<?> willInlineWithReturnValue() {
+    return SubMain.class;
+  }
+
+  public Class<?> willOnlyInlineForMainWithReturnValue() {
+    return SubMain.class;
+  }
+}
+
+class SubSubMain extends SubMain {
+  public Class<?> willInlineWithReturnValue() {
+    return SubSubMain.class;
+  }
+
+  public Class<?> willOnlyInlineForMainWithReturnValue() {
+    return SubSubMain.class;
+  }
+}
diff --git a/test/577-checker-fp2int/expected.txt b/test/577-checker-fp2int/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/577-checker-fp2int/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/577-checker-fp2int/info.txt b/test/577-checker-fp2int/info.txt
new file mode 100644
index 0000000..d22a0ea
--- /dev/null
+++ b/test/577-checker-fp2int/info.txt
@@ -0,0 +1 @@
+Unit test for float/double to raw bits conversions.
diff --git a/test/577-checker-fp2int/src/Main.java b/test/577-checker-fp2int/src/Main.java
new file mode 100644
index 0000000..ace956d
--- /dev/null
+++ b/test/577-checker-fp2int/src/Main.java
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.f2int(float) instruction_simplifier (before)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:FloatFloatToIntBits
+  /// CHECK-DAG: Return [<<Result>>]
+  //
+  /// CHECK-START: int Main.f2int(float) instruction_simplifier (after)
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG: <<Raw:i\d+>> InvokeStaticOrDirect [<<Arg:f\d+>>{{(,[ij]\d+)?}}] intrinsic:FloatFloatToRawIntBits
+  /// CHECK-DAG: <<Cond:z\d+>> NotEqual [<<Arg>>,<<Arg>>]
+  /// CHECK-DAG: <<Result:i\d+>> Select [<<Raw>>,{{i\d+}},<<Cond>>]
+  /// CHECK-DAG: Return [<<Result>>]
+  private static int f2int(float f) {
+    return Float.floatToIntBits(f);
+  }
+
+  /// CHECK-START: long Main.d2long(double) instruction_simplifier (before)
+  /// CHECK-DAG: <<Result:j\d+>> InvokeStaticOrDirect intrinsic:DoubleDoubleToLongBits
+  /// CHECK-DAG: Return [<<Result>>]
+  //
+  /// CHECK-START: long Main.d2long(double) instruction_simplifier (after)
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG: <<Raw:j\d+>> InvokeStaticOrDirect [<<Arg:d\d+>>{{(,[ij]\d+)?}}] intrinsic:DoubleDoubleToRawLongBits
+  /// CHECK-DAG: <<Cond:z\d+>> NotEqual [<<Arg>>,<<Arg>>]
+  /// CHECK-DAG: <<Result:j\d+>> Select [<<Raw>>,{{j\d+}},<<Cond>>]
+  /// CHECK-DAG: Return [<<Result>>]
+  private static long d2long(double d) {
+    return Double.doubleToLongBits(d);
+  }
+
+  public static void main(String args[]) {
+    // A few distinct numbers.
+    expectEquals32(0xff800000, f2int(Float.NEGATIVE_INFINITY));
+    expectEquals32(0xbf800000, f2int(-1.0f));
+    expectEquals32(0x80000000, f2int(-0.0f));
+    expectEquals32(0x00000000, f2int(+0.0f));
+    expectEquals32(0x3f800000, f2int(+1.0f));
+    expectEquals32(0x7f800000, f2int(Float.POSITIVE_INFINITY));
+
+    // A few others.
+    for (int i = 0; i <= 100; i++) {
+      expectEquals32(i, f2int(Float.intBitsToFloat(i)));
+    }
+
+    // A few NaN numbers.
+    float[] fvals = {
+      Float.intBitsToFloat(0x7f800001),
+      Float.intBitsToFloat(0x7fa00000),
+      Float.intBitsToFloat(0x7fc00000),
+      Float.intBitsToFloat(0x7fffffff),
+      Float.intBitsToFloat(0xff800001),
+      Float.intBitsToFloat(0xffa00000),
+      Float.intBitsToFloat(0xffc00000),
+      Float.intBitsToFloat(0xffffffff)
+    };
+    for (int i = 0; i < fvals.length; i++) {
+      expectEquals32(0x7fc00000, f2int(fvals[i]));
+    }
+
+    // A few distinct numbers.
+    expectEquals64(0xfff0000000000000L, d2long(Double.NEGATIVE_INFINITY));
+    expectEquals64(0xbff0000000000000L, d2long(-1.0d));
+    expectEquals64(0x8000000000000000L, d2long(-0.0d));
+    expectEquals64(0x0000000000000000L, d2long(+0.0d));
+    expectEquals64(0x3ff0000000000000L, d2long(+1.0d));
+    expectEquals64(0x7ff0000000000000L, d2long(Double.POSITIVE_INFINITY));
+
+    // A few others.
+    for (long l = 0; l <= 100; l++) {
+      expectEquals64(l, d2long(Double.longBitsToDouble(l)));
+    }
+
+    // A few NaN numbers.
+    double[] dvals = {
+      Double.longBitsToDouble(0x7ff0000000000001L),
+      Double.longBitsToDouble(0x7ff4000000000000L),
+      Double.longBitsToDouble(0x7ff8000000000000L),
+      Double.longBitsToDouble(0x7fffffffffffffffL),
+      Double.longBitsToDouble(0xfff0000000000001L),
+      Double.longBitsToDouble(0xfff4000000000000L),
+      Double.longBitsToDouble(0xfff8000000000000L),
+      Double.longBitsToDouble(0xffffffffffffffffL)
+    };
+    for (int i = 0; i < dvals.length; i++) {
+      expectEquals64(0x7ff8000000000000L, d2long(dvals[i]));
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals32(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: "
+          + Integer.toHexString(expected)
+          + ", found: "
+          + Integer.toHexString(result));
+    }
+  }
+
+  private static void expectEquals64(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: "
+          + Long.toHexString(expected)
+          + ", found: "
+          + Long.toHexString(result));
+    }
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/577-profile-foreign-dex/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/577-profile-foreign-dex/expected.txt
diff --git a/test/577-profile-foreign-dex/info.txt b/test/577-profile-foreign-dex/info.txt
new file mode 100644
index 0000000..090db3f
--- /dev/null
+++ b/test/577-profile-foreign-dex/info.txt
@@ -0,0 +1 @@
+Check that we record the use of foreign dex files when profiles are enabled.
diff --git a/test/577-profile-foreign-dex/run b/test/577-profile-foreign-dex/run
new file mode 100644
index 0000000..ad57d14
--- /dev/null
+++ b/test/577-profile-foreign-dex/run
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+exec ${RUN} \
+  --runtime-option -Xjitsaveprofilinginfo \
+  --runtime-option -Xusejit:true \
+  "${@}"
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/577-profile-foreign-dex/src-ex/OtherDex.java
new file mode 100644
index 0000000..cba73b3
--- /dev/null
+++ b/test/577-profile-foreign-dex/src-ex/OtherDex.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public class OtherDex {
+}
diff --git a/test/577-profile-foreign-dex/src/Main.java b/test/577-profile-foreign-dex/src/Main.java
new file mode 100644
index 0000000..ed7a625
--- /dev/null
+++ b/test/577-profile-foreign-dex/src/Main.java
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Constructor;
+import java.util.HashMap;
+
+public class Main {
+
+  private static final String PROFILE_NAME = "primary.prof";
+  private static final String APP_DIR_PREFIX = "app_dir_";
+  private static final String FOREIGN_DEX_PROFILE_DIR = "foreign-dex";
+  private static final String TEMP_FILE_NAME_PREFIX = "dummy";
+  private static final String TEMP_FILE_NAME_SUFFIX = "-file";
+
+  public static void main(String[] args) throws Exception {
+    File tmpFile = null;
+    File appDir = null;
+    File profileFile = null;
+    File foreignDexProfileDir = null;
+
+    try {
+      // Create the necessary files layout.
+      tmpFile = createTempFile();
+      appDir = new File(tmpFile.getParent(), APP_DIR_PREFIX + tmpFile.getName());
+      appDir.mkdir();
+      foreignDexProfileDir = new File(tmpFile.getParent(), FOREIGN_DEX_PROFILE_DIR);
+      foreignDexProfileDir.mkdir();
+      profileFile = createTempFile();
+
+      String codePath = System.getenv("DEX_LOCATION") + "/577-profile-foreign-dex.jar";
+
+      // Register the app with the runtime
+      VMRuntime.registerAppInfo(profileFile.getPath(), appDir.getPath(),
+             new String[] { codePath }, foreignDexProfileDir.getPath());
+
+      testMarkerForForeignDex(foreignDexProfileDir);
+      testMarkerForCodePath(foreignDexProfileDir);
+      testMarkerForApplicationDexFile(foreignDexProfileDir, appDir);
+    } finally {
+      if (tmpFile != null) {
+        tmpFile.delete();
+      }
+      if (profileFile != null) {
+        profileFile.delete();
+      }
+      if (foreignDexProfileDir != null) {
+        foreignDexProfileDir.delete();
+      }
+      if (appDir != null) {
+        appDir.delete();
+      }
+    }
+  }
+
+  // Verify we actually create a marker on disk for foreign dex files.
+  private static void testMarkerForForeignDex(File foreignDexProfileDir) throws Exception {
+    String foreignDex = System.getenv("DEX_LOCATION") + "/577-profile-foreign-dex-ex.jar";
+    loadDexFile(foreignDex);
+    checkMarker(foreignDexProfileDir, foreignDex, /* exists */ true);
+  }
+
+  // Verify we do not create a marker on disk for dex files path of the code path.
+  private static void testMarkerForCodePath(File foreignDexProfileDir) throws Exception {
+    String codePath = System.getenv("DEX_LOCATION") + "/577-profile-foreign-dex.jar";
+    loadDexFile(codePath);
+    checkMarker(foreignDexProfileDir, codePath, /* exists */ false);
+  }
+
+  private static void testMarkerForApplicationDexFile(File foreignDexProfileDir, File appDir)
+      throws Exception {
+    // Copy the -ex jar to the application directory and load it from there.
+    // This will record duplicate class conflicts but we don't care for this use case.
+    File foreignDex = new File(System.getenv("DEX_LOCATION") + "/577-profile-foreign-dex-ex.jar");
+    File appDex = new File(appDir, "appDex.jar");
+    try {
+      copyFile(foreignDex, appDex);
+
+      loadDexFile(appDex.getAbsolutePath());
+      checkMarker(foreignDexProfileDir, appDex.getAbsolutePath(), /* exists */ false);
+    } finally {
+      if (appDex != null) {
+        appDex.delete();
+      }
+    }
+  }
+
+  private static void checkMarker(File foreignDexProfileDir, String dexFile, boolean exists) {
+    File marker = new File(foreignDexProfileDir, dexFile.replace('/', '@'));
+    boolean result_ok = exists ? marker.exists() : !marker.exists();
+    if (!result_ok) {
+      throw new RuntimeException("Marker test failed for:" + marker.getPath());
+    }
+  }
+
+  private static void loadDexFile(String dexFile) throws Exception {
+    Class<?> pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
+    if (pathClassLoader == null) {
+        throw new RuntimeException("Couldn't find path class loader class");
+    }
+    Constructor<?> constructor =
+        pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class);
+    constructor.newInstance(
+            dexFile, ClassLoader.getSystemClassLoader());
+  }
+
+  private static class VMRuntime {
+    private static final Method registerAppInfoMethod;
+    static {
+      try {
+        Class<?> c = Class.forName("dalvik.system.VMRuntime");
+        registerAppInfoMethod = c.getDeclaredMethod("registerAppInfo",
+            String.class, String.class, String[].class, String.class);
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    public static void registerAppInfo(String pkgName, String appDir,
+        String[] codePath, String foreignDexProfileDir) throws Exception {
+      registerAppInfoMethod.invoke(null, pkgName, appDir, codePath, foreignDexProfileDir);
+    }
+  }
+
+  private static void copyFile(File fromFile, File toFile) throws Exception {
+    FileInputStream in = new FileInputStream(fromFile);
+    FileOutputStream out = new FileOutputStream(toFile);
+    try {
+      byte[] buffer = new byte[4096];
+      int bytesRead;
+      while ((bytesRead = in.read(buffer)) >= 0) {
+          out.write(buffer, 0, bytesRead);
+      }
+    } finally {
+      out.flush();
+      try {
+          out.getFD().sync();
+      } catch (IOException e) {
+      }
+      out.close();
+      in.close();
+    }
+  }
+
+  private static File createTempFile() throws Exception {
+    try {
+      return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+    } catch (IOException e) {
+      System.setProperty("java.io.tmpdir", "/data/local/tmp");
+      try {
+        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+      } catch (IOException e2) {
+        System.setProperty("java.io.tmpdir", "/sdcard");
+        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+      }
+    }
+  }
+}
diff --git a/test/578-bce-visit/expected.txt b/test/578-bce-visit/expected.txt
new file mode 100644
index 0000000..28fca2c
--- /dev/null
+++ b/test/578-bce-visit/expected.txt
@@ -0,0 +1,2 @@
+exception caught
+FUZZ result = 1001 16
diff --git a/test/578-bce-visit/info.txt b/test/578-bce-visit/info.txt
new file mode 100644
index 0000000..2462e1b
--- /dev/null
+++ b/test/578-bce-visit/info.txt
@@ -0,0 +1 @@
+Fuzz test that exposed bug in bounds check elimination visiting of blocks.
diff --git a/test/578-bce-visit/src/Main.java b/test/578-bce-visit/src/Main.java
new file mode 100644
index 0000000..b0e920e
--- /dev/null
+++ b/test/578-bce-visit/src/Main.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Automatically generated fuzz test that exposed bug in the way bounds
+ * check elimination visits basic blocks. If, after dynamic bce, the same
+ * block would be visited again, then static length based bce would incorrectly
+ * feed information back to itself and removed a necessary bounds check.
+ */
+public class Main {
+
+  private static int[][][] mA = new int[10][10][10];
+
+  private static int mX = 17;
+
+  private static int doit() {
+    int l0 = (((++mA[7][2][8]) <= mA[0][1][3]) ? (++mA[9][0][5]) : ((( -mA[0][7][0]) * ((mX == mX) ? 180 : mX)) + (mA[7][8][8]++)));
+    mA[1][0][4] -= mX;
+    int l1 = (((l0 >= ( ~mA[6][7][5])) && ((921 <= l0) && (mA[3][9][6] > l0))) ? mX : (l0--));
+    int l2 = ( -384);
+    for (int i0 = 7 - 1; i0 >= 1; i0--) {
+      mA[6][0][0] -= ((((l0++) == ( -mX)) ? (((mA[3][i0][1] > 503) || (mX <= i0)) ? (--l0) : (l0--)) : mX) - ( ~(mX--)));
+      int l3 = 24;
+      int l4 = ((l2--) & mX);
+      for (int i1 = i0-2 - 1; i1 >= 3; i1--) {
+        for (int i2 = 2; i2 < i0; i2++) {
+          mA[i0][4][l3] >>= 1;
+        }
+      }
+    }
+    return 1;
+  }
+
+  public static void main(String[] args) {
+    int k = 1;
+    for (int i0 = 0; i0 < 10; i0++)
+    for (int i1 = 0; i1 < 10; i1++)
+    for (int i2 = 0; i2 < 10; i2++)
+      mA[i0][i1][i2] = k++;
+    try {
+      k = doit();
+    } catch (Exception e) {
+      System.out.println("exception caught");
+    }
+    System.out.println("FUZZ result = " + k + " " + mX);
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/578-polymorphic-inlining/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/578-polymorphic-inlining/expected.txt
diff --git a/test/578-polymorphic-inlining/info.txt b/test/578-polymorphic-inlining/info.txt
new file mode 100644
index 0000000..77ec49b
--- /dev/null
+++ b/test/578-polymorphic-inlining/info.txt
@@ -0,0 +1,2 @@
+Regression test for polymorphic inlining that used to propagate
+wrongly the try/catch information of new blocks.
diff --git a/test/578-polymorphic-inlining/src/Main.java b/test/578-polymorphic-inlining/src/Main.java
new file mode 100644
index 0000000..22d33d0
--- /dev/null
+++ b/test/578-polymorphic-inlining/src/Main.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    for (int i = 0; i < 20000; ++i) {
+      $noinline$testInTryCatch(new Main(), i);
+      $noinline$testInTryCatch(new SubMain(), i);
+    }
+  }
+
+  public static void $noinline$testInTryCatch(Main m, int i) {
+    final int value;
+    try {
+      throw new Exception();
+    } catch (Exception e) {
+      // The polymorphic inlining of 'willInlineVoid' used to generate an
+      // incorrect graph, by setting the inlined blocks as catch blocks.
+      m.willInlineVoid(i);
+      return;
+    }
+  }
+
+  public void willInlineVoid(int i) {
+    if (i == 0) {
+      $noinline$foo();
+    } else {
+      $noinline$foo();
+      $noinline$foo();
+    }
+  }
+
+  public static void $noinline$foo() {
+    if (doThrow) throw new Error("");
+  }
+
+  public static boolean doThrow;
+}
+
+class SubMain extends Main {
+  public void willInlineVoid(int i) {
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/579-inline-infinite/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/579-inline-infinite/expected.txt
diff --git a/test/579-inline-infinite/info.txt b/test/579-inline-infinite/info.txt
new file mode 100644
index 0000000..6fb917c
--- /dev/null
+++ b/test/579-inline-infinite/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing.
+Inlining of method with infinite loop cause a crash.
diff --git a/test/579-inline-infinite/src/Main.java b/test/579-inline-infinite/src/Main.java
new file mode 100644
index 0000000..f214ed4
--- /dev/null
+++ b/test/579-inline-infinite/src/Main.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Infinite implements Runnable {
+  public int field;
+
+  private final void $noinline$infinite() {
+    while(true) {
+      field++;
+    }
+  }
+
+  public void run() {
+    $noinline$infinite();
+  }
+}
+
+public class Main {
+  public static void main(String[] args) {
+    Thread thr = new Thread(new Infinite());
+    thr.setDaemon(true);
+    thr.start();
+    // This is a compiler test, so just finish.
+  }
+}
diff --git a/test/580-checker-round/expected.txt b/test/580-checker-round/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/580-checker-round/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/580-checker-round/info.txt b/test/580-checker-round/info.txt
new file mode 100644
index 0000000..d6397fd
--- /dev/null
+++ b/test/580-checker-round/info.txt
@@ -0,0 +1 @@
+Unit test for float/double rounding.
diff --git a/test/580-checker-round/src/Main.java b/test/580-checker-round/src/Main.java
new file mode 100644
index 0000000..83bc55c
--- /dev/null
+++ b/test/580-checker-round/src/Main.java
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.round32(float) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:MathRoundFloat
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int round32(float f) {
+    return Math.round(f);
+  }
+
+  /// CHECK-START: long Main.round64(double) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:j\d+>> InvokeStaticOrDirect intrinsic:MathRoundDouble
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static long round64(double d) {
+    return Math.round(d);
+  }
+
+  public static void main(String args[]) {
+    // A few obvious numbers.
+    expectEquals32(-2147483648, round32(Float.NEGATIVE_INFINITY));
+    expectEquals32(-2, round32(-1.51f));
+    expectEquals32(-1, round32(-1.2f));
+    expectEquals32(-1, round32(-1.0f));
+    expectEquals32(-1, round32(-0.5000001f));
+    expectEquals32(0, round32(-0.5f));
+    expectEquals32(0, round32(-0.2f));
+    expectEquals32(0, round32(-0.0f));
+    expectEquals32(0, round32(+0.0f));
+    expectEquals32(0, round32(+0.2f));
+    expectEquals32(1, round32(+0.5f));
+    expectEquals32(1, round32(+1.0f));
+    expectEquals32(1, round32(+1.2f));
+    expectEquals32(2, round32(+1.5f));
+    expectEquals32(2147483647, round32(Float.POSITIVE_INFINITY));
+
+    // Near minint.
+    expectEquals32(-2147483648, round32(Math.nextAfter(-2147483648.0f, Float.NEGATIVE_INFINITY)));
+    expectEquals32(-2147483648, round32(-2147483648.0f));
+    expectEquals32(-2147483520, round32(Math.nextAfter(-2147483648.0f, Float.POSITIVE_INFINITY)));
+
+    // Near maxint.
+    expectEquals32(2147483520, round32(Math.nextAfter(2147483648.0f, Float.NEGATIVE_INFINITY)));
+    expectEquals32(2147483647, round32(2147483648.0f));
+    expectEquals32(2147483647, round32(Math.nextAfter(2147483648.0f, Float.POSITIVE_INFINITY)));
+
+    // Some others.
+    for (int i = -100; i <= 100; ++i) {
+      expectEquals32(i - 1, round32((float) i - 0.51f));
+      expectEquals32(i, round32((float) i - 0.5f));
+      expectEquals32(i, round32((float) i));
+      expectEquals32(i + 1, round32((float) i + 0.5f));
+      expectEquals32(i + 1, round32((float) i + 0.51f));
+    }
+    for (float f = -1.5f; f <= -1.499f; f = Math.nextAfter(f, Float.POSITIVE_INFINITY)) {
+      expectEquals32(-1, round32(f));
+    }
+
+    // Some harder.
+    float[] fvals = {
+      -16777215.5f,
+      -16777215.0f,
+      -0.49999998f,
+      -0.4999999701976776123046875f,
+      0.4999999701976776123046875f,
+      0.49999998f,
+      16777215.0f,
+      16777215.5f
+    };
+    int[] ivals = {
+      -16777216,
+      -16777215,
+      0,
+      0,
+      0,
+      0,
+      16777215,
+      16777216
+    };
+    for (int i = 0; i < fvals.length; i++) {
+      expectEquals32(ivals[i], round32(fvals[i]));
+    }
+
+    // A few NaN numbers.
+    float[] fnans = {
+      Float.intBitsToFloat(0x7f800001),
+      Float.intBitsToFloat(0x7fa00000),
+      Float.intBitsToFloat(0x7fc00000),
+      Float.intBitsToFloat(0x7fffffff),
+      Float.intBitsToFloat(0xff800001),
+      Float.intBitsToFloat(0xffa00000),
+      Float.intBitsToFloat(0xffc00000),
+      Float.intBitsToFloat(0xffffffff)
+    };
+    for (int i = 0; i < fnans.length; i++) {
+      expectEquals32(0, round32(fnans[i]));
+    }
+
+    // A few obvious numbers.
+    expectEquals64(-9223372036854775808L, round64(Double.NEGATIVE_INFINITY));
+    expectEquals64(-2L, round64(-1.51d));
+    expectEquals64(-1L, round64(-1.2d));
+    expectEquals64(-1L, round64(-1.0d));
+    expectEquals64(-1L, round64(-0.5000001f));
+    expectEquals64(0L, round64(-0.5d));
+    expectEquals64(0L, round64(-0.2d));
+    expectEquals64(0L, round64(-0.0d));
+    expectEquals64(0L, round64(+0.0d));
+    expectEquals64(0L, round64(+0.2d));
+    expectEquals64(1L, round64(+0.5d));
+    expectEquals64(1L, round64(+1.0d));
+    expectEquals64(1L, round64(+1.2d));
+    expectEquals64(2L, round64(+1.5d));
+    expectEquals64(9223372036854775807L, round64(Double.POSITIVE_INFINITY));
+
+    // Near minlong.
+    expectEquals64(-9223372036854775808L,
+        round64(Math.nextAfter(-9223372036854775808.0, Double.NEGATIVE_INFINITY)));
+    expectEquals64(-9223372036854775808L, round64(-9223372036854775808.0));
+    expectEquals64(-9223372036854774784L,
+        round64(Math.nextAfter(-9223372036854775809.0, Double.POSITIVE_INFINITY)));
+
+    // Near maxlong.
+    expectEquals64(9223372036854774784L,
+        round64(Math.nextAfter(9223372036854775808.0, Double.NEGATIVE_INFINITY)));
+    expectEquals64(9223372036854775807L, round64(9223372036854775808.0));
+    expectEquals64(9223372036854775807L,
+        round64(Math.nextAfter(9223372036854775808.0, Double.POSITIVE_INFINITY)));
+
+    // Some others.
+    for (long l = -100; l <= 100; ++l) {
+      expectEquals64(l - 1, round64((double) l - 0.51d));
+      expectEquals64(l, round64((double) l - 0.5d));
+      expectEquals64(l, round64((double) l));
+      expectEquals64(l + 1, round64((double) l + 0.5d));
+      expectEquals64(l + 1, round64((double) l + 0.51d));
+    }
+    for (double d = -1.5d; d <= -1.49999999999d; d = Math.nextAfter(d, Double.POSITIVE_INFINITY)) {
+      expectEquals64(-1L, round64(d));
+    }
+
+    // Some harder.
+    double[] dvals = {
+      -9007199254740991.5d,
+      -9007199254740991.0d,
+      -0.49999999999999997d,
+      -0.49999999999999994d,
+      0.49999999999999994d,
+      0.49999999999999997d,
+      9007199254740991.0d,
+      9007199254740991.5d
+    };
+    long[] lvals = {
+      -9007199254740992L,
+      -9007199254740991L,
+      0L,
+      0L,
+      0L,
+      0L,
+      9007199254740991L,
+      9007199254740992L
+    };
+    for (int i = 0; i < dvals.length; i++) {
+      expectEquals64(lvals[i], round64(dvals[i]));
+    }
+
+    // A few NaN numbers.
+    double[] dnans = {
+      Double.longBitsToDouble(0x7ff0000000000001L),
+      Double.longBitsToDouble(0x7ff4000000000000L),
+      Double.longBitsToDouble(0x7ff8000000000000L),
+      Double.longBitsToDouble(0x7fffffffffffffffL),
+      Double.longBitsToDouble(0xfff0000000000001L),
+      Double.longBitsToDouble(0xfff4000000000000L),
+      Double.longBitsToDouble(0xfff8000000000000L),
+      Double.longBitsToDouble(0xffffffffffffffffL)
+    };
+    for (int i = 0; i < dnans.length; i++) {
+      expectEquals64(0L, round64(dnans[i]));
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals32(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals64(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/580-checker-string-factory-intrinsics/expected.txt b/test/580-checker-string-factory-intrinsics/expected.txt
new file mode 100644
index 0000000..86e041d
--- /dev/null
+++ b/test/580-checker-string-factory-intrinsics/expected.txt
@@ -0,0 +1,3 @@
+foo
+bar
+baz
diff --git a/test/580-checker-string-factory-intrinsics/info.txt b/test/580-checker-string-factory-intrinsics/info.txt
new file mode 100644
index 0000000..3d01a19
--- /dev/null
+++ b/test/580-checker-string-factory-intrinsics/info.txt
@@ -0,0 +1 @@
+Ensure java.lang.StringFactory intrinsics are recognized and used.
diff --git a/test/580-checker-string-factory-intrinsics/src/Main.java b/test/580-checker-string-factory-intrinsics/src/Main.java
new file mode 100644
index 0000000..a2e34bf
--- /dev/null
+++ b/test/580-checker-string-factory-intrinsics/src/Main.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: void Main.testNewStringFromBytes() builder (after)
+  /// CHECK-DAG:     InvokeStaticOrDirect method_name:java.lang.StringFactory.newStringFromBytes intrinsic:None
+
+  /// CHECK-START: void Main.testNewStringFromBytes() intrinsics_recognition (after)
+  /// CHECK-DAG:     InvokeStaticOrDirect method_name:java.lang.StringFactory.newStringFromBytes intrinsic:StringNewStringFromBytes
+
+  public static void testNewStringFromBytes() {
+    byte[] bytes = { 'f', 'o', 'o' };
+    String s = StringFactory.newStringFromBytes(bytes, 0, 0, 3);
+    System.out.println(s);
+  }
+
+  // The (native) method
+  //
+  //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
+  //
+  // is recognized as intrinsic StringNewStringFromChars.  However,
+  // because this method is not public, we cannot call it and check
+  // that the compiler actually intrinsifies it (as it does for the
+  // StringNewStringFromBytes and StringNewStringFromString
+  // intrinsics) with Checker.
+  //
+  // We can call a public method such as
+  //
+  //   java.lang.StringFactory.newStringFromChars(char[] data)
+  //
+  // which contains a call to the former (non-public) native method.
+  // However, this call will not be inlined (because it is a method in
+  // another Dex file and which contains a call, which needs an
+  // environment), so we cannot use Checker here to ensure the native
+  // call was intrinsified either.
+
+  /// CHECK-START: void Main.testNewStringFromChars() builder (after)
+  /// CHECK-DAG:     InvokeStaticOrDirect method_name:java.lang.StringFactory.newStringFromChars intrinsic:None
+
+  /// CHECK-START: void Main.testNewStringFromChars() intrinsics_recognition (after)
+  /// CHECK-DAG:     InvokeStaticOrDirect method_name:java.lang.StringFactory.newStringFromChars intrinsic:None
+
+  /// CHECK-START: void Main.testNewStringFromChars() inliner (after)
+  /// CHECK-DAG:     InvokeStaticOrDirect method_name:java.lang.StringFactory.newStringFromChars intrinsic:None
+
+  public static void testNewStringFromChars() {
+    char[] chars = { 'b', 'a', 'r' };
+    String s = StringFactory.newStringFromChars(chars);
+    System.out.println(s);
+  }
+
+  /// CHECK-START: void Main.testNewStringFromString() builder (after)
+  /// CHECK-DAG:     InvokeStaticOrDirect method_name:java.lang.StringFactory.newStringFromString intrinsic:None
+
+  /// CHECK-START: void Main.testNewStringFromString() intrinsics_recognition (after)
+  /// CHECK-DAG:     InvokeStaticOrDirect method_name:java.lang.StringFactory.newStringFromString intrinsic:StringNewStringFromString
+
+  public static void testNewStringFromString() {
+    String s1 = "baz";
+    String s2 = StringFactory.newStringFromString(s1);
+    System.out.println(s2);
+  }
+
+  public static void main(String[] args) throws Exception {
+    testNewStringFromBytes();
+    testNewStringFromChars();
+    testNewStringFromString();
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/581-rtp/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/581-rtp/expected.txt
diff --git a/test/581-rtp/info.txt b/test/581-rtp/info.txt
new file mode 100644
index 0000000..b57449a
--- /dev/null
+++ b/test/581-rtp/info.txt
@@ -0,0 +1,2 @@
+Regression test for the reference type propagation pass
+of the optimizing compiler that used to break invariants.
diff --git a/test/581-rtp/src/Main.java b/test/581-rtp/src/Main.java
new file mode 100644
index 0000000..09f6f6c
--- /dev/null
+++ b/test/581-rtp/src/Main.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public final class Main {
+
+  /// CHECK-START: void Main.main(String[]) builder (after)
+  /// CHECK: StaticFieldGet klass:Main[] exact: true
+  /// CHECK: ArrayGet klass:Main exact:true
+  /// CHECK: BoundType klass:Main exact:true
+  public static void main(String[] args) {
+    Object o = null;
+    Main f = a[0];
+    for (int i = 0; i < 2; ++i) {
+      // We used to crash in the fixed point iteration of
+      // the reference type propagation while handling the instanceof:
+      // we were expecting `o` to get the same exact-ness as the
+      // `HBoundType` but the typing of the `ArrayGet` used to not
+      // propagate the exact-ness.
+      if (o instanceof Main) {
+        field = o;
+      }
+      o = f;
+    }
+    if (field != null) {
+      throw new Error("Expected null");
+    }
+  }
+
+  static Main[] a = new Main[1];
+  static Object field;
+}
diff --git a/test/582-checker-bce-length/expected.txt b/test/582-checker-bce-length/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/582-checker-bce-length/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/582-checker-bce-length/info.txt b/test/582-checker-bce-length/info.txt
new file mode 100644
index 0000000..cb826cd
--- /dev/null
+++ b/test/582-checker-bce-length/info.txt
@@ -0,0 +1 @@
+Regression test on deopt bounds check elimination.
diff --git a/test/582-checker-bce-length/src/Main.java b/test/582-checker-bce-length/src/Main.java
new file mode 100644
index 0000000..3565b6b
--- /dev/null
+++ b/test/582-checker-bce-length/src/Main.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression test on duplicate removal of same bounds check.
+ */
+public class Main {
+
+  /// CHECK-START: void Main.doit1(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.doit1(int[]) BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.doit1(int[]) BCE (after)
+  /// CHECK-NOT: Deoptimize
+  public static void doit1(int[] a) {
+    a[a.length-3] = 1;
+    a[a.length-2] = 2;
+    a[a.length-1] = 3;
+    // This introduces a problematic BoundsCheck(x,x) node
+    // (1) certain OOB, so should be rejected
+    // (2) exposed bug in removing same BC twice if (1) would not be done.
+    a[a.length-0] = 4;
+  }
+
+  /// CHECK-START: void Main.doit2(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.doit2(int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  //
+  /// CHECK-START: void Main.doit2(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void doit2(int[] a) {
+    a[a.length-4] = -101;
+    a[a.length-3] = -102;
+    a[a.length-2] = -103;
+    a[a.length-1] = -104;
+  }
+
+  public static void main(String[] args) {
+    int[] a = new int[4];
+
+    int fail = 0;
+    try {
+      doit1(a);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      fail++;
+    }
+    expectEquals(1, fail);
+    expectEquals(0, a[0]);
+    expectEquals(1, a[1]);
+    expectEquals(2, a[2]);
+    expectEquals(3, a[3]);
+
+    try {
+      doit2(a);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      fail++;
+    }
+    expectEquals(1, fail);
+    expectEquals(-101, a[0]);
+    expectEquals(-102, a[1]);
+    expectEquals(-103, a[2]);
+    expectEquals(-104, a[3]);
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/583-checker-zero/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/583-checker-zero/expected.txt
diff --git a/test/583-checker-zero/info.txt b/test/583-checker-zero/info.txt
new file mode 100644
index 0000000..8ec5d48
--- /dev/null
+++ b/test/583-checker-zero/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing that used to think 0.0 has the same bits
+as -0.0.
diff --git a/test/583-checker-zero/src/Main.java b/test/583-checker-zero/src/Main.java
new file mode 100644
index 0000000..dc9534c
--- /dev/null
+++ b/test/583-checker-zero/src/Main.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  // Test that by inlining new Float(-0f), we still keep the store of
+  // -0f to the instance field. We used to remove it due to wrong assumptions
+  // around art::HConstant::IsZero (now replaced with
+  // art::HConstant::IsArithmeticZero and art::HConstant::IsZeroBitPattern).
+
+  /// CHECK-START: void Main.main(java.lang.String[]) inliner (after)
+  /// CHECK: InstanceFieldSet
+  public static void main(String[] args) {
+    if (new Float(0f).equals(new Float(-0f))) {
+      throw new Error("Expected not equal");
+    }
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/584-checker-div-bool/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/584-checker-div-bool/expected.txt
diff --git a/test/584-checker-div-bool/info.txt b/test/584-checker-div-bool/info.txt
new file mode 100644
index 0000000..59650d5
--- /dev/null
+++ b/test/584-checker-div-bool/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing that used to not accept
+HDivZeroCheck taking a boolean.
diff --git a/test/584-checker-div-bool/src/Main.java b/test/584-checker-div-bool/src/Main.java
new file mode 100644
index 0000000..fadc995
--- /dev/null
+++ b/test/584-checker-div-bool/src/Main.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {
+    try {
+      foo(intField);
+      throw new Error("Expected ArithmeticException");
+    } catch (ArithmeticException e) {
+      // expected
+    }
+  }
+
+  /// CHECK-START: int Main.foo(int) register (after)
+  /// CHECK: <<BoolField:z\d+>> StaticFieldGet
+  /// CHECK:                    DivZeroCheck [<<BoolField>>]
+  public static int foo(int a) {
+    return a / bar();
+  }
+
+  public static int bar() {
+    return booleanField ? 1 : 0;
+  }
+
+  public static boolean booleanField;
+  public static int intField;
+}
diff --git a/test/530-checker-loops/expected.txt b/test/585-inline-unresolved/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/585-inline-unresolved/expected.txt
diff --git a/test/585-inline-unresolved/info.txt b/test/585-inline-unresolved/info.txt
new file mode 100644
index 0000000..414f638
--- /dev/null
+++ b/test/585-inline-unresolved/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing that used to crash when inlining
+a method whose return type is unresolved.
diff --git a/test/585-inline-unresolved/smali/TestCase.smali b/test/585-inline-unresolved/smali/TestCase.smali
new file mode 100644
index 0000000..f260092
--- /dev/null
+++ b/test/585-inline-unresolved/smali/TestCase.smali
@@ -0,0 +1,48 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+
+.super Ljava/lang/Object;
+
+.field static private test1:Z
+
+.method public static topLevel()V
+   .registers 1
+   invoke-static {}, LTestCase;->$inline$foo()LUnresolved;
+   return-void
+.end method
+
+# We need multiple returns to trigger the crash.
+.method public static $inline$foo()LUnresolved;
+  .registers 2
+  const v1, 0x0
+  sget-boolean v0, LTestCase;->test1:Z
+  if-eqz v0, :other_return
+  return-object v1
+  :other_return
+  invoke-static {}, LTestCase;->$noinline$bar()LUnresolved;
+  move-result-object v0
+  return-object v0
+.end method
+
+.method public static $noinline$bar()LUnresolved;
+  .registers 2
+  const v1, 0x0
+  sget-boolean v0, LTestCase;->test1:Z
+  if-eqz v0, :return
+  throw v1
+  :return
+  return-object v1
+.end method
diff --git a/test/585-inline-unresolved/src/Main.java b/test/585-inline-unresolved/src/Main.java
new file mode 100644
index 0000000..67ad4d2
--- /dev/null
+++ b/test/585-inline-unresolved/src/Main.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("TestCase");
+    c.getMethod("topLevel").invoke(null);
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/586-checker-null-array-get/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/586-checker-null-array-get/expected.txt
diff --git a/test/586-checker-null-array-get/info.txt b/test/586-checker-null-array-get/info.txt
new file mode 100644
index 0000000..81b42e9
--- /dev/null
+++ b/test/586-checker-null-array-get/info.txt
@@ -0,0 +1,3 @@
+Regression test for the load store elimination of optimizing
+that used to merge two array gets that have the same inputs but
+not the same type. Note that this only happens if the array is null.
diff --git a/test/586-checker-null-array-get/src/Main.java b/test/586-checker-null-array-get/src/Main.java
new file mode 100644
index 0000000..e0782bc
--- /dev/null
+++ b/test/586-checker-null-array-get/src/Main.java
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Test1 {
+  int[] iarr;
+}
+
+class Test2 {
+  float[] farr;
+}
+
+public class Main {
+  public static Object[] getObjectArray() { return null; }
+  public static long[] getLongArray() { return null; }
+  public static Object getNull() { return null; }
+  public static Test1 getNullTest1() { return null; }
+  public static Test2 getNullTest2() { return null; }
+
+  public static void main(String[] args) {
+    try {
+      foo();
+      throw new Error("Expected NullPointerException");
+    } catch (NullPointerException e) {
+      // Expected.
+    }
+    try {
+      bar();
+      throw new Error("Expected NullPointerException");
+    } catch (NullPointerException e) {
+      // Expected.
+    }
+    try {
+      test1();
+      throw new Error("Expected NullPointerException");
+    } catch (NullPointerException e) {
+      // Expected.
+    }
+  }
+
+  /// CHECK-START: void Main.foo() load_store_elimination (after)
+  /// CHECK-DAG: <<Null:l\d+>>   NullConstant
+  /// CHECK-DAG: <<Check:l\d+>>  NullCheck [<<Null>>]
+  /// CHECK-DAG: <<Get1:j\d+>>   ArrayGet [<<Check>>,{{i\d+}}]
+  /// CHECK-DAG: <<Get2:l\d+>>   ArrayGet [<<Check>>,{{i\d+}}]
+  public static void foo() {
+    longField = getLongArray()[0];
+    objectField = getObjectArray()[0];
+  }
+
+  /// CHECK-START: void Main.bar() load_store_elimination (after)
+  /// CHECK-DAG: <<Null:l\d+>>       NullConstant
+  /// CHECK-DAG: <<BoundType:l\d+>>  BoundType [<<Null>>]
+  /// CHECK-DAG: <<CheckL:l\d+>>     NullCheck [<<BoundType>>]
+  /// CHECK-DAG: <<GetL0:l\d+>>      ArrayGet [<<CheckL>>,{{i\d+}}]
+  /// CHECK-DAG: <<GetL1:l\d+>>      ArrayGet [<<CheckL>>,{{i\d+}}]
+  /// CHECK-DAG: <<GetL2:l\d+>>      ArrayGet [<<CheckL>>,{{i\d+}}]
+  /// CHECK-DAG: <<GetL3:l\d+>>      ArrayGet [<<CheckL>>,{{i\d+}}]
+  /// CHECK-DAG: <<CheckJ:l\d+>>     NullCheck [<<Null>>]
+  /// CHECK-DAG: <<GetJ0:j\d+>>      ArrayGet [<<CheckJ>>,{{i\d+}}]
+  /// CHECK-DAG: <<GetJ1:j\d+>>      ArrayGet [<<CheckJ>>,{{i\d+}}]
+  /// CHECK-DAG: <<GetJ2:j\d+>>      ArrayGet [<<CheckJ>>,{{i\d+}}]
+  /// CHECK-DAG: <<GetJ3:j\d+>>      ArrayGet [<<CheckJ>>,{{i\d+}}]
+  public static void bar() {
+    // We create multiple accesses that will lead the bounds check
+    // elimination pass to add a HDeoptimize. Not having the bounds check helped
+    // the load store elimination think it could merge two ArrayGet with different
+    // types.
+    String[] array = (String[])getNull();
+    objectField = array[0];
+    objectField = array[1];
+    objectField = array[2];
+    objectField = array[3];
+    long[] longArray = getLongArray();
+    longField = longArray[0];
+    longField = longArray[1];
+    longField = longArray[2];
+    longField = longArray[3];
+  }
+
+  /// CHECK-START: float Main.test1() load_store_elimination (after)
+  /// CHECK-DAG: <<Null:l\d+>>       NullConstant
+  /// CHECK-DAG: <<Check1:l\d+>>     NullCheck [<<Null>>]
+  /// CHECK-DAG: <<FieldGet1:l\d+>>  InstanceFieldGet [<<Check1>>] field_name:Test1.iarr
+  /// CHECK-DAG: <<Check2:l\d+>>     NullCheck [<<FieldGet1>>]
+  /// CHECK-DAG: <<ArrayGet1:i\d+>>  ArrayGet [<<Check2>>,{{i\d+}}]
+  /// CHECK-DAG: <<ArrayGet2:f\d+>>  ArrayGet [<<Check2>>,{{i\d+}}]
+  /// CHECK-DAG:                     Return [<<ArrayGet2>>]
+  public static float test1() {
+    Test1 test1 = getNullTest1();
+    Test2 test2 = getNullTest2();;
+    int[] iarr = test1.iarr;
+    float[] farr = test2.farr;
+    iarr[0] = iarr[1];
+    return farr[0];
+  }
+
+  public static long longField;
+  public static Object objectField;
+}
diff --git a/test/530-checker-loops/expected.txt b/test/587-inline-class-error/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/587-inline-class-error/expected.txt
diff --git a/test/587-inline-class-error/info.txt b/test/587-inline-class-error/info.txt
new file mode 100644
index 0000000..7f244f6
--- /dev/null
+++ b/test/587-inline-class-error/info.txt
@@ -0,0 +1,2 @@
+Regression test for the inliner that used to crash while
+trying to find a method for an erroneous class.
diff --git a/test/587-inline-class-error/smali/SuperVerifyError.smali b/test/587-inline-class-error/smali/SuperVerifyError.smali
new file mode 100644
index 0000000..b63cba0
--- /dev/null
+++ b/test/587-inline-class-error/smali/SuperVerifyError.smali
@@ -0,0 +1,27 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LSuperVerifyError;
+
+.super Ljava/lang/Object;
+
+.method public final foo()V
+  .registers 1
+  return-void
+.end method
+
+.method public bar()V
+  .registers 1
+  return-void
+.end method
diff --git a/test/587-inline-class-error/smali/TestCase.smali b/test/587-inline-class-error/smali/TestCase.smali
new file mode 100644
index 0000000..7c991ed
--- /dev/null
+++ b/test/587-inline-class-error/smali/TestCase.smali
@@ -0,0 +1,33 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+
+.super Ljava/lang/Object;
+
+.method public static topLevel()V
+  .registers 2
+  const v0, 0x1
+  new-array v0, v0, [LVerifyError;
+  invoke-static {v0}, LTestCase;->test([LVerifyError;)V
+  return-void
+.end method
+
+.method public static test([LVerifyError;)V
+   .registers 2
+   const v0, 0x0
+   aget-object v1, v1, v0
+   invoke-virtual {v1}, LSuperVerifyError;->bar()V
+   return-void
+.end method
diff --git a/test/587-inline-class-error/smali/VerifyError.smali b/test/587-inline-class-error/smali/VerifyError.smali
new file mode 100644
index 0000000..b821b71
--- /dev/null
+++ b/test/587-inline-class-error/smali/VerifyError.smali
@@ -0,0 +1,28 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public final LVerifyError;
+
+.super LSuperVerifyError;
+
+# Override a final method to put this class in the error state.
+.method public foo()V
+  .registers 1
+  return-void
+.end method
+
+# Having a static field in the class is needed to get the
+# right initialization for the embedded vtable length of a
+# class.
+.field public static i:I
diff --git a/test/587-inline-class-error/src/Main.java b/test/587-inline-class-error/src/Main.java
new file mode 100644
index 0000000..3402fab
--- /dev/null
+++ b/test/587-inline-class-error/src/Main.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    try {
+      Class<?> v = Class.forName("VerifyError");
+      throw new Error("Expected LinkageError");
+    } catch (LinkageError e) {
+      // expected
+    }
+
+    try {
+      Class.forName("TestCase").getMethod("topLevel").invoke(null);
+      throw new Error("Expected InvocationTargetException");
+    } catch (InvocationTargetException e) {
+      if (!(e.getCause() instanceof NullPointerException)) {
+        throw new Error("Expected NullPointerException, got " + e.getCause());
+      }
+    }
+  }
+}
diff --git a/test/588-checker-irreducible-lifetime-hole/expected.txt b/test/588-checker-irreducible-lifetime-hole/expected.txt
new file mode 100644
index 0000000..aab2009
--- /dev/null
+++ b/test/588-checker-irreducible-lifetime-hole/expected.txt
@@ -0,0 +1,2 @@
+42
+1
diff --git a/test/588-checker-irreducible-lifetime-hole/info.txt b/test/588-checker-irreducible-lifetime-hole/info.txt
new file mode 100644
index 0000000..a2861a9
--- /dev/null
+++ b/test/588-checker-irreducible-lifetime-hole/info.txt
@@ -0,0 +1,3 @@
+Regression test for optimizing that used to have a too
+strong DCHECK in the presence of a combination of irreducible loops
+and try/catch.
diff --git a/test/588-checker-irreducible-lifetime-hole/smali/IrreducibleLoop.smali b/test/588-checker-irreducible-lifetime-hole/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..186f0ab
--- /dev/null
+++ b/test/588-checker-irreducible-lifetime-hole/smali/IrreducibleLoop.smali
@@ -0,0 +1,118 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+## CHECK-START-X86: int IrreducibleLoop.simpleLoop1(int) dead_code_elimination$initial (before)
+## CHECK-DAG: <<Method:(i|j)\d+>> CurrentMethod
+## CHECK-DAG: <<Constant:i\d+>>   IntConstant 42
+## CHECK-DAG:                     Goto irreducible:true
+## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>,<<Method>>] loop:none
+## CHECK-DAG:                     InvokeStaticOrDirect [{{i\d+}},<<Method>>] loop:none
+.method public static simpleLoop1(I)I
+   .registers 3
+   const/16 v0, 42
+   invoke-static {v0}, LIrreducibleLoop;->$noinline$m(I)V
+   if-eqz p0, :b22
+   goto :b34
+
+   :b34
+   goto :b20
+
+   :b20
+   if-nez p0, :b45
+   goto :b46
+
+   :b46
+   goto :b21
+
+   :b21
+   goto :b34
+
+   :b22
+   :try_start
+   div-int v0, v0, v0
+   :try_end
+   .catchall {:try_start .. :try_end} :b34
+   goto :b20
+
+   :b45
+   invoke-static {v0}, LIrreducibleLoop;->$noinline$m(I)V
+   goto :b26
+
+   :b26
+   return v0
+.end method
+
+## CHECK-START-X86: int IrreducibleLoop.simpleLoop2(int) dead_code_elimination$initial (before)
+## CHECK-DAG: <<Method:(i|j)\d+>> CurrentMethod
+## CHECK-DAG: <<Constant:i\d+>>   IntConstant 42
+## CHECK-DAG:                     Goto irreducible:true
+## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>,<<Method>>] loop:none
+## CHECK-DAG:                     InvokeStaticOrDirect [{{i\d+}},<<Method>>] loop:none
+.method public static simpleLoop2(I)I
+   .registers 3
+   const/16 v0, 42
+
+   :try_start1
+   invoke-static {v0}, LIrreducibleLoop;->$noinline$m(I)V
+   div-int v0, v0, v0
+   :try_end1
+   .catchall {:try_start1 .. :try_end1} :b14
+
+   :try_start2
+   invoke-static {v0}, LIrreducibleLoop;->$noinline$m(I)V
+   div-int v0, v0, v0
+   :try_end2
+   .catchall {:try_start2 .. :try_end2} :b45
+   goto :b49
+
+   :b14
+   goto :b15
+
+   :b45
+   goto :b15
+
+   :b15
+   goto :b16
+
+   :b16
+   goto :b49
+
+   :b49
+   invoke-static {v0}, LIrreducibleLoop;->$noinline$m(I)V
+   div-int v0, v0, v0
+   :try_end3
+   .catchall {:b49 .. :try_end3} :b49
+   if-eqz p0, :b16
+   goto :b26
+
+   :b26
+   return v0
+.end method
+
+.method public static $noinline$m(I)V
+   .registers 3
+   const/16 v0, 0
+   sget-boolean v1,LIrreducibleLoop;->doThrow:Z
+   if-eqz v1, :exit
+   # Prevent inlining.
+   throw v0
+   :exit
+   return-void
+.end method
+
+.field public static doThrow:Z
diff --git a/test/588-checker-irreducible-lifetime-hole/src/Main.java b/test/588-checker-irreducible-lifetime-hole/src/Main.java
new file mode 100644
index 0000000..98565b1
--- /dev/null
+++ b/test/588-checker-irreducible-lifetime-hole/src/Main.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    {
+      Method m = c.getMethod("simpleLoop1", int.class);
+      Object[] arguments = { 42 };
+      System.out.println(m.invoke(null, arguments));
+    }
+    {
+      Method m = c.getMethod("simpleLoop2", int.class);
+      Object[] arguments = { 42 };
+      System.out.println(m.invoke(null, arguments));
+    }
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/589-super-imt/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/589-super-imt/expected.txt
diff --git a/test/589-super-imt/info.txt b/test/589-super-imt/info.txt
new file mode 100644
index 0000000..c815dc9
--- /dev/null
+++ b/test/589-super-imt/info.txt
@@ -0,0 +1,2 @@
+Test what the IMT is properly set for a subclass, and that the
+subclass won't use the ImtConflictTable table of its super class.
diff --git a/test/589-super-imt/src/Main.java b/test/589-super-imt/src/Main.java
new file mode 100644
index 0000000..e381ca7
--- /dev/null
+++ b/test/589-super-imt/src/Main.java
@@ -0,0 +1,447 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+interface Itf {
+  public Class<?> method1();
+  public Class<?> method2();
+  public Class<?> method3();
+  public Class<?> method4();
+  public Class<?> method5();
+  public Class<?> method6();
+  public Class<?> method7();
+  public Class<?> method8();
+  public Class<?> method9();
+  public Class<?> method10();
+  public Class<?> method11();
+  public Class<?> method12();
+  public Class<?> method13();
+  public Class<?> method14();
+  public Class<?> method15();
+  public Class<?> method16();
+  public Class<?> method17();
+  public Class<?> method18();
+  public Class<?> method19();
+  public Class<?> method20();
+  public Class<?> method21();
+  public Class<?> method22();
+  public Class<?> method23();
+  public Class<?> method24();
+  public Class<?> method25();
+  public Class<?> method26();
+  public Class<?> method27();
+  public Class<?> method28();
+  public Class<?> method29();
+  public Class<?> method30();
+  public Class<?> method31();
+  public Class<?> method32();
+  public Class<?> method33();
+  public Class<?> method34();
+  public Class<?> method35();
+  public Class<?> method36();
+  public Class<?> method37();
+  public Class<?> method38();
+  public Class<?> method39();
+  public Class<?> method40();
+  public Class<?> method41();
+  public Class<?> method42();
+  public Class<?> method43();
+  public Class<?> method44();
+  public Class<?> method45();
+  public Class<?> method46();
+  public Class<?> method47();
+  public Class<?> method48();
+  public Class<?> method49();
+  public Class<?> method50();
+  public Class<?> method51();
+  public Class<?> method52();
+  public Class<?> method53();
+  public Class<?> method54();
+  public Class<?> method55();
+  public Class<?> method56();
+  public Class<?> method57();
+  public Class<?> method58();
+  public Class<?> method59();
+  public Class<?> method60();
+  public Class<?> method61();
+  public Class<?> method62();
+  public Class<?> method63();
+  public Class<?> method64();
+  public Class<?> method65();
+  public Class<?> method66();
+  public Class<?> method67();
+  public Class<?> method68();
+  public Class<?> method69();
+  public Class<?> method70();
+  public Class<?> method71();
+  public Class<?> method72();
+  public Class<?> method73();
+  public Class<?> method74();
+  public Class<?> method75();
+  public Class<?> method76();
+  public Class<?> method77();
+  public Class<?> method78();
+  public Class<?> method79();
+}
+
+public class Main implements Itf {
+  public static Itf main;
+  public static void main(String[] args) {
+    main = new Main();
+    callMains();
+    main = new SubMain();
+    callSubMains();
+  }
+
+  public static void callMains() {
+    // We loop to artificially create branches. The compiler will
+    // not compile this method otherwise.
+    for (int i = 0; i < 2; ++i) {
+      expectEquals(main.method1(), Main.class);
+      expectEquals(main.method2(), Main.class);
+      expectEquals(main.method3(), Main.class);
+      expectEquals(main.method4(), Main.class);
+      expectEquals(main.method5(), Main.class);
+      expectEquals(main.method6(), Main.class);
+      expectEquals(main.method7(), Main.class);
+      expectEquals(main.method8(), Main.class);
+      expectEquals(main.method9(), Main.class);
+      expectEquals(main.method10(), Main.class);
+      expectEquals(main.method11(), Main.class);
+      expectEquals(main.method12(), Main.class);
+      expectEquals(main.method13(), Main.class);
+      expectEquals(main.method14(), Main.class);
+      expectEquals(main.method15(), Main.class);
+      expectEquals(main.method16(), Main.class);
+      expectEquals(main.method17(), Main.class);
+      expectEquals(main.method18(), Main.class);
+      expectEquals(main.method19(), Main.class);
+      expectEquals(main.method20(), Main.class);
+      expectEquals(main.method21(), Main.class);
+      expectEquals(main.method22(), Main.class);
+      expectEquals(main.method23(), Main.class);
+      expectEquals(main.method24(), Main.class);
+      expectEquals(main.method25(), Main.class);
+      expectEquals(main.method26(), Main.class);
+      expectEquals(main.method27(), Main.class);
+      expectEquals(main.method28(), Main.class);
+      expectEquals(main.method29(), Main.class);
+      expectEquals(main.method30(), Main.class);
+      expectEquals(main.method31(), Main.class);
+      expectEquals(main.method32(), Main.class);
+      expectEquals(main.method33(), Main.class);
+      expectEquals(main.method34(), Main.class);
+      expectEquals(main.method35(), Main.class);
+      expectEquals(main.method36(), Main.class);
+      expectEquals(main.method37(), Main.class);
+      expectEquals(main.method38(), Main.class);
+      expectEquals(main.method39(), Main.class);
+      expectEquals(main.method40(), Main.class);
+      expectEquals(main.method41(), Main.class);
+      expectEquals(main.method42(), Main.class);
+      expectEquals(main.method43(), Main.class);
+      expectEquals(main.method44(), Main.class);
+      expectEquals(main.method45(), Main.class);
+      expectEquals(main.method46(), Main.class);
+      expectEquals(main.method47(), Main.class);
+      expectEquals(main.method48(), Main.class);
+      expectEquals(main.method49(), Main.class);
+      expectEquals(main.method50(), Main.class);
+      expectEquals(main.method51(), Main.class);
+      expectEquals(main.method52(), Main.class);
+      expectEquals(main.method53(), Main.class);
+      expectEquals(main.method54(), Main.class);
+      expectEquals(main.method55(), Main.class);
+      expectEquals(main.method56(), Main.class);
+      expectEquals(main.method57(), Main.class);
+      expectEquals(main.method58(), Main.class);
+      expectEquals(main.method59(), Main.class);
+      expectEquals(main.method60(), Main.class);
+      expectEquals(main.method61(), Main.class);
+      expectEquals(main.method62(), Main.class);
+      expectEquals(main.method63(), Main.class);
+      expectEquals(main.method64(), Main.class);
+      expectEquals(main.method65(), Main.class);
+      expectEquals(main.method66(), Main.class);
+      expectEquals(main.method67(), Main.class);
+      expectEquals(main.method68(), Main.class);
+      expectEquals(main.method69(), Main.class);
+      expectEquals(main.method70(), Main.class);
+      expectEquals(main.method71(), Main.class);
+      expectEquals(main.method72(), Main.class);
+      expectEquals(main.method73(), Main.class);
+      expectEquals(main.method74(), Main.class);
+      expectEquals(main.method75(), Main.class);
+      expectEquals(main.method76(), Main.class);
+      expectEquals(main.method77(), Main.class);
+      expectEquals(main.method78(), Main.class);
+      expectEquals(main.method79(), Main.class);
+    }
+  }
+
+  public static void callSubMains() {
+    // We loop to artificially create branches. The compiler will
+    // not compile this method otherwise.
+    for (int i = 0; i < 2; ++i) {
+      expectEquals(main.method1(), SubMain.class);
+      expectEquals(main.method2(), SubMain.class);
+      expectEquals(main.method3(), SubMain.class);
+      expectEquals(main.method4(), SubMain.class);
+      expectEquals(main.method5(), SubMain.class);
+      expectEquals(main.method6(), SubMain.class);
+      expectEquals(main.method7(), SubMain.class);
+      expectEquals(main.method8(), SubMain.class);
+      expectEquals(main.method9(), SubMain.class);
+      expectEquals(main.method10(), SubMain.class);
+      expectEquals(main.method11(), SubMain.class);
+      expectEquals(main.method12(), SubMain.class);
+      expectEquals(main.method13(), SubMain.class);
+      expectEquals(main.method14(), SubMain.class);
+      expectEquals(main.method15(), SubMain.class);
+      expectEquals(main.method16(), SubMain.class);
+      expectEquals(main.method17(), SubMain.class);
+      expectEquals(main.method18(), SubMain.class);
+      expectEquals(main.method19(), SubMain.class);
+      expectEquals(main.method20(), SubMain.class);
+      expectEquals(main.method21(), SubMain.class);
+      expectEquals(main.method22(), SubMain.class);
+      expectEquals(main.method23(), SubMain.class);
+      expectEquals(main.method24(), SubMain.class);
+      expectEquals(main.method25(), SubMain.class);
+      expectEquals(main.method26(), SubMain.class);
+      expectEquals(main.method27(), SubMain.class);
+      expectEquals(main.method28(), SubMain.class);
+      expectEquals(main.method29(), SubMain.class);
+      expectEquals(main.method30(), SubMain.class);
+      expectEquals(main.method31(), SubMain.class);
+      expectEquals(main.method32(), SubMain.class);
+      expectEquals(main.method33(), SubMain.class);
+      expectEquals(main.method34(), SubMain.class);
+      expectEquals(main.method35(), SubMain.class);
+      expectEquals(main.method36(), SubMain.class);
+      expectEquals(main.method37(), SubMain.class);
+      expectEquals(main.method38(), SubMain.class);
+      expectEquals(main.method39(), SubMain.class);
+      expectEquals(main.method40(), SubMain.class);
+      expectEquals(main.method41(), SubMain.class);
+      expectEquals(main.method42(), SubMain.class);
+      expectEquals(main.method43(), SubMain.class);
+      expectEquals(main.method44(), SubMain.class);
+      expectEquals(main.method45(), SubMain.class);
+      expectEquals(main.method46(), SubMain.class);
+      expectEquals(main.method47(), SubMain.class);
+      expectEquals(main.method48(), SubMain.class);
+      expectEquals(main.method49(), SubMain.class);
+      expectEquals(main.method50(), SubMain.class);
+      expectEquals(main.method51(), SubMain.class);
+      expectEquals(main.method52(), SubMain.class);
+      expectEquals(main.method53(), SubMain.class);
+      expectEquals(main.method54(), SubMain.class);
+      expectEquals(main.method55(), SubMain.class);
+      expectEquals(main.method56(), SubMain.class);
+      expectEquals(main.method57(), SubMain.class);
+      expectEquals(main.method58(), SubMain.class);
+      expectEquals(main.method59(), SubMain.class);
+      expectEquals(main.method60(), SubMain.class);
+      expectEquals(main.method61(), SubMain.class);
+      expectEquals(main.method62(), SubMain.class);
+      expectEquals(main.method63(), SubMain.class);
+      expectEquals(main.method64(), SubMain.class);
+      expectEquals(main.method65(), SubMain.class);
+      expectEquals(main.method66(), SubMain.class);
+      expectEquals(main.method67(), SubMain.class);
+      expectEquals(main.method68(), SubMain.class);
+      expectEquals(main.method69(), SubMain.class);
+      expectEquals(main.method70(), SubMain.class);
+      expectEquals(main.method71(), SubMain.class);
+      expectEquals(main.method72(), SubMain.class);
+      expectEquals(main.method73(), SubMain.class);
+      expectEquals(main.method74(), SubMain.class);
+      expectEquals(main.method75(), SubMain.class);
+      expectEquals(main.method76(), SubMain.class);
+      expectEquals(main.method77(), SubMain.class);
+      expectEquals(main.method78(), SubMain.class);
+      expectEquals(main.method79(), SubMain.class);
+    }
+  }
+
+  public static void expectEquals(Object actual, Object expected) {
+    if (!actual.equals(expected)) {
+      throw new Error("Expected " + expected + ", got " + actual);
+    }
+  }
+
+  public Class<?> method1() { return Main.class; }
+  public Class<?> method2() { return Main.class; }
+  public Class<?> method3() { return Main.class; }
+  public Class<?> method4() { return Main.class; }
+  public Class<?> method5() { return Main.class; }
+  public Class<?> method6() { return Main.class; }
+  public Class<?> method7() { return Main.class; }
+  public Class<?> method8() { return Main.class; }
+  public Class<?> method9() { return Main.class; }
+  public Class<?> method10() { return Main.class; }
+  public Class<?> method11() { return Main.class; }
+  public Class<?> method12() { return Main.class; }
+  public Class<?> method13() { return Main.class; }
+  public Class<?> method14() { return Main.class; }
+  public Class<?> method15() { return Main.class; }
+  public Class<?> method16() { return Main.class; }
+  public Class<?> method17() { return Main.class; }
+  public Class<?> method18() { return Main.class; }
+  public Class<?> method19() { return Main.class; }
+  public Class<?> method20() { return Main.class; }
+  public Class<?> method21() { return Main.class; }
+  public Class<?> method22() { return Main.class; }
+  public Class<?> method23() { return Main.class; }
+  public Class<?> method24() { return Main.class; }
+  public Class<?> method25() { return Main.class; }
+  public Class<?> method26() { return Main.class; }
+  public Class<?> method27() { return Main.class; }
+  public Class<?> method28() { return Main.class; }
+  public Class<?> method29() { return Main.class; }
+  public Class<?> method30() { return Main.class; }
+  public Class<?> method31() { return Main.class; }
+  public Class<?> method32() { return Main.class; }
+  public Class<?> method33() { return Main.class; }
+  public Class<?> method34() { return Main.class; }
+  public Class<?> method35() { return Main.class; }
+  public Class<?> method36() { return Main.class; }
+  public Class<?> method37() { return Main.class; }
+  public Class<?> method38() { return Main.class; }
+  public Class<?> method39() { return Main.class; }
+  public Class<?> method40() { return Main.class; }
+  public Class<?> method41() { return Main.class; }
+  public Class<?> method42() { return Main.class; }
+  public Class<?> method43() { return Main.class; }
+  public Class<?> method44() { return Main.class; }
+  public Class<?> method45() { return Main.class; }
+  public Class<?> method46() { return Main.class; }
+  public Class<?> method47() { return Main.class; }
+  public Class<?> method48() { return Main.class; }
+  public Class<?> method49() { return Main.class; }
+  public Class<?> method50() { return Main.class; }
+  public Class<?> method51() { return Main.class; }
+  public Class<?> method52() { return Main.class; }
+  public Class<?> method53() { return Main.class; }
+  public Class<?> method54() { return Main.class; }
+  public Class<?> method55() { return Main.class; }
+  public Class<?> method56() { return Main.class; }
+  public Class<?> method57() { return Main.class; }
+  public Class<?> method58() { return Main.class; }
+  public Class<?> method59() { return Main.class; }
+  public Class<?> method60() { return Main.class; }
+  public Class<?> method61() { return Main.class; }
+  public Class<?> method62() { return Main.class; }
+  public Class<?> method63() { return Main.class; }
+  public Class<?> method64() { return Main.class; }
+  public Class<?> method65() { return Main.class; }
+  public Class<?> method66() { return Main.class; }
+  public Class<?> method67() { return Main.class; }
+  public Class<?> method68() { return Main.class; }
+  public Class<?> method69() { return Main.class; }
+  public Class<?> method70() { return Main.class; }
+  public Class<?> method71() { return Main.class; }
+  public Class<?> method72() { return Main.class; }
+  public Class<?> method73() { return Main.class; }
+  public Class<?> method74() { return Main.class; }
+  public Class<?> method75() { return Main.class; }
+  public Class<?> method76() { return Main.class; }
+  public Class<?> method77() { return Main.class; }
+  public Class<?> method78() { return Main.class; }
+  public Class<?> method79() { return Main.class; }
+}
+
+class SubMain extends Main {
+  public Class<?> method1() { return SubMain.class; }
+  public Class<?> method2() { return SubMain.class; }
+  public Class<?> method3() { return SubMain.class; }
+  public Class<?> method4() { return SubMain.class; }
+  public Class<?> method5() { return SubMain.class; }
+  public Class<?> method6() { return SubMain.class; }
+  public Class<?> method7() { return SubMain.class; }
+  public Class<?> method8() { return SubMain.class; }
+  public Class<?> method9() { return SubMain.class; }
+  public Class<?> method10() { return SubMain.class; }
+  public Class<?> method11() { return SubMain.class; }
+  public Class<?> method12() { return SubMain.class; }
+  public Class<?> method13() { return SubMain.class; }
+  public Class<?> method14() { return SubMain.class; }
+  public Class<?> method15() { return SubMain.class; }
+  public Class<?> method16() { return SubMain.class; }
+  public Class<?> method17() { return SubMain.class; }
+  public Class<?> method18() { return SubMain.class; }
+  public Class<?> method19() { return SubMain.class; }
+  public Class<?> method20() { return SubMain.class; }
+  public Class<?> method21() { return SubMain.class; }
+  public Class<?> method22() { return SubMain.class; }
+  public Class<?> method23() { return SubMain.class; }
+  public Class<?> method24() { return SubMain.class; }
+  public Class<?> method25() { return SubMain.class; }
+  public Class<?> method26() { return SubMain.class; }
+  public Class<?> method27() { return SubMain.class; }
+  public Class<?> method28() { return SubMain.class; }
+  public Class<?> method29() { return SubMain.class; }
+  public Class<?> method30() { return SubMain.class; }
+  public Class<?> method31() { return SubMain.class; }
+  public Class<?> method32() { return SubMain.class; }
+  public Class<?> method33() { return SubMain.class; }
+  public Class<?> method34() { return SubMain.class; }
+  public Class<?> method35() { return SubMain.class; }
+  public Class<?> method36() { return SubMain.class; }
+  public Class<?> method37() { return SubMain.class; }
+  public Class<?> method38() { return SubMain.class; }
+  public Class<?> method39() { return SubMain.class; }
+  public Class<?> method40() { return SubMain.class; }
+  public Class<?> method41() { return SubMain.class; }
+  public Class<?> method42() { return SubMain.class; }
+  public Class<?> method43() { return SubMain.class; }
+  public Class<?> method44() { return SubMain.class; }
+  public Class<?> method45() { return SubMain.class; }
+  public Class<?> method46() { return SubMain.class; }
+  public Class<?> method47() { return SubMain.class; }
+  public Class<?> method48() { return SubMain.class; }
+  public Class<?> method49() { return SubMain.class; }
+  public Class<?> method50() { return SubMain.class; }
+  public Class<?> method51() { return SubMain.class; }
+  public Class<?> method52() { return SubMain.class; }
+  public Class<?> method53() { return SubMain.class; }
+  public Class<?> method54() { return SubMain.class; }
+  public Class<?> method55() { return SubMain.class; }
+  public Class<?> method56() { return SubMain.class; }
+  public Class<?> method57() { return SubMain.class; }
+  public Class<?> method58() { return SubMain.class; }
+  public Class<?> method59() { return SubMain.class; }
+  public Class<?> method60() { return SubMain.class; }
+  public Class<?> method61() { return SubMain.class; }
+  public Class<?> method62() { return SubMain.class; }
+  public Class<?> method63() { return SubMain.class; }
+  public Class<?> method64() { return SubMain.class; }
+  public Class<?> method65() { return SubMain.class; }
+  public Class<?> method66() { return SubMain.class; }
+  public Class<?> method67() { return SubMain.class; }
+  public Class<?> method68() { return SubMain.class; }
+  public Class<?> method69() { return SubMain.class; }
+  public Class<?> method70() { return SubMain.class; }
+  public Class<?> method71() { return SubMain.class; }
+  public Class<?> method72() { return SubMain.class; }
+  public Class<?> method73() { return SubMain.class; }
+  public Class<?> method74() { return SubMain.class; }
+  public Class<?> method75() { return SubMain.class; }
+  public Class<?> method76() { return SubMain.class; }
+  public Class<?> method77() { return SubMain.class; }
+  public Class<?> method78() { return SubMain.class; }
+  public Class<?> method79() { return SubMain.class; }
+}
diff --git a/test/590-checker-array-set-null-regression/expected.txt b/test/590-checker-array-set-null-regression/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/590-checker-array-set-null-regression/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/590-checker-array-set-null-regression/info.txt b/test/590-checker-array-set-null-regression/info.txt
new file mode 100644
index 0000000..fe173a3
--- /dev/null
+++ b/test/590-checker-array-set-null-regression/info.txt
@@ -0,0 +1,11 @@
+Regression test for art::PrepareForRegisterAllocation, which replaces
+
+  ArraySet[array, index, BoundType[NullConstant]]
+
+with
+
+  ArraySet[array, index, NullConstant]
+
+but used to forget to remove the "need for a type check" bit in the
+ArraySet, thus failing "!may_need_runtime_call_for_type_check"
+assertions in code generators.
diff --git a/test/590-checker-array-set-null-regression/src/Main.java b/test/590-checker-array-set-null-regression/src/Main.java
new file mode 100644
index 0000000..792ee4e
--- /dev/null
+++ b/test/590-checker-array-set-null-regression/src/Main.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String args[]) {
+    Element[] elements = new Element[51];
+    testArraySetCheckCastNull(elements);
+
+    System.out.println("passed");
+  }
+
+  /// CHECK-START: void Main.testArraySetCheckCastNull(Main$Element[]) builder (after)
+  /// CHECK:         <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         IntConstant 42
+  /// CHECK-DAG:     <<Null:l\d+>>          NullConstant
+  /// CHECK-DAG:     <<Class:l\d+>>         LoadClass
+  /// CHECK-DAG:                            CheckCast [<<Null>>,<<Class>>]
+  /// CHECK-DAG:     <<CheckedValue:l\d+>>  BoundType [<<Null>>] klass:Main$Element can_be_null:true
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>]
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<CheckedValue>>] needs_type_check:true
+
+  /// CHECK-START: void Main.testArraySetCheckCastNull(Main$Element[]) instruction_simplifier (after)
+  /// CHECK-NOT:                            CheckCast
+
+  /// CHECK-START: void Main.testArraySetCheckCastNull(Main$Element[]) prepare_for_register_allocation (before)
+  /// CHECK:         <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         IntConstant 42
+  /// CHECK-DAG:     <<Null:l\d+>>          NullConstant
+  /// CHECK-DAG:     <<Class:l\d+>>         LoadClass
+  /// CHECK-DAG:     <<CheckedValue:l\d+>>  BoundType [<<Null>>]
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>]
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<CheckedValue>>] needs_type_check:true
+
+  /// CHECK-START: void Main.testArraySetCheckCastNull(Main$Element[]) prepare_for_register_allocation (after)
+  /// CHECK:         <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         IntConstant 42
+  /// CHECK-DAG:     <<Null:l\d+>>          NullConstant
+  /// CHECK-DAG:     <<Class:l\d+>>         LoadClass
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<Array>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<Array>>,<<Index>>,<<Null>>] needs_type_check:false
+
+  static void testArraySetCheckCastNull(Element[] elements) {
+    Object object = null;
+    Element element = (Element) object;
+    elements[42] = element;
+  }
+
+  class Element {}
+
+}
diff --git a/test/530-checker-loops/expected.txt b/test/590-infinite-loop-with-nop/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/590-infinite-loop-with-nop/expected.txt
diff --git a/test/590-infinite-loop-with-nop/info.txt b/test/590-infinite-loop-with-nop/info.txt
new file mode 100644
index 0000000..ee09709
--- /dev/null
+++ b/test/590-infinite-loop-with-nop/info.txt
@@ -0,0 +1 @@
+Regression test for debug build check failure for infinite loop with NOP.
diff --git a/test/590-infinite-loop-with-nop/smali/TestCase.smali b/test/590-infinite-loop-with-nop/smali/TestCase.smali
new file mode 100644
index 0000000..7ea495d
--- /dev/null
+++ b/test/590-infinite-loop-with-nop/smali/TestCase.smali
@@ -0,0 +1,28 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class LTestCase;
+
+.super Ljava/lang/Object;
+
+.method static public infiniteLoop()V
+  .registers 0
+  :infinite_loop
+  nop
+  goto :infinite_loop
+.end method
+
+# Add a field to work around
+#   Failure to verify dex file '...': Offset(208) should be zero when size is zero for field-ids.
+.field private a:I
diff --git a/test/590-infinite-loop-with-nop/src/Main.java b/test/590-infinite-loop-with-nop/src/Main.java
new file mode 100644
index 0000000..531ff28
--- /dev/null
+++ b/test/590-infinite-loop-with-nop/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+  }
+}
+
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/591-checker-regression-dead-loop/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/591-checker-regression-dead-loop/expected.txt
diff --git a/test/591-checker-regression-dead-loop/info.txt b/test/591-checker-regression-dead-loop/info.txt
new file mode 100644
index 0000000..f192b8d
--- /dev/null
+++ b/test/591-checker-regression-dead-loop/info.txt
@@ -0,0 +1,2 @@
+Regression test for Optimizing's dead block elimination which used to remove
+dependencies in the wrong order.
\ No newline at end of file
diff --git a/test/591-checker-regression-dead-loop/src/Main.java b/test/591-checker-regression-dead-loop/src/Main.java
new file mode 100644
index 0000000..19856cf
--- /dev/null
+++ b/test/591-checker-regression-dead-loop/src/Main.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main {
+  private static boolean $inline$false() { return false; }
+
+  /// CHECK-START: void Main.main(java.lang.String[]) dead_code_elimination$initial (before)
+  /// CHECK-DAG:     <<Const0:i\d+>> IntConstant 0
+  /// CHECK-DAG:     <<Const1:i\d+>> IntConstant 1
+  /// CHECK-DAG:     <<Phi:i\d+>>    Phi [<<Const0>>,<<Add:i\d+>>] loop:{{B\d+}}
+  /// CHECK-DAG:                     InvokeVirtual [{{l\d+}},<<Phi>>] method_name:java.io.PrintStream.println
+  /// CHECK-DAG:     <<Add>>         Add [<<Phi>>,<<Const1>>]
+
+  public static void main(String[] args) {
+    if ($inline$false()) {
+      int x = 0;
+      while (true) {
+        System.out.println(x++);
+      }
+    }
+  }
+}
diff --git a/test/591-new-instance-string/expected.txt b/test/591-new-instance-string/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/591-new-instance-string/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/591-new-instance-string/info.txt b/test/591-new-instance-string/info.txt
new file mode 100644
index 0000000..16c7b1f
--- /dev/null
+++ b/test/591-new-instance-string/info.txt
@@ -0,0 +1 @@
+Regression test on new-instance that reaches multiple <init> calls.
diff --git a/test/591-new-instance-string/smali/new-instance.smali b/test/591-new-instance-string/smali/new-instance.smali
new file mode 100644
index 0000000..42559ca
--- /dev/null
+++ b/test/591-new-instance-string/smali/new-instance.smali
@@ -0,0 +1,29 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LNewInstance;
+.super Ljava/lang/Object;
+
+.method public static multipleInit(I)V
+    .registers 2
+    new-instance v0, Ljava/lang/String;
+    if-eqz v1, :Skip
+    invoke-direct {v0}, Ljava/lang/String;-><init>()V
+    goto :Done
+:Skip
+    invoke-direct {v0}, Ljava/lang/String;-><init>()V
+:Done
+    return-void
+.end method
diff --git a/test/591-new-instance-string/src/Main.java b/test/591-new-instance-string/src/Main.java
new file mode 100644
index 0000000..bd59b95
--- /dev/null
+++ b/test/591-new-instance-string/src/Main.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.*;
+
+public class Main {
+
+  public static void main(String args[]) throws Throwable {
+    Class<?> c = Class.forName("NewInstance");
+    Method m = c.getMethod("multipleInit", int.class);
+    m.invoke(null, 0);
+    m.invoke(null, 1);
+    System.out.println("passed");
+  }
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/592-checker-regression-bool-input/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/592-checker-regression-bool-input/expected.txt
diff --git a/test/592-checker-regression-bool-input/info.txt b/test/592-checker-regression-bool-input/info.txt
new file mode 100644
index 0000000..8b97d9d
--- /dev/null
+++ b/test/592-checker-regression-bool-input/info.txt
@@ -0,0 +1,2 @@
+Regression test for Optimizing's GraphChecker which used to verify the internal
+type of a boolean input.
\ No newline at end of file
diff --git a/test/592-checker-regression-bool-input/smali/TestCase.smali b/test/592-checker-regression-bool-input/smali/TestCase.smali
new file mode 100644
index 0000000..56c499d
--- /dev/null
+++ b/test/592-checker-regression-bool-input/smali/TestCase.smali
@@ -0,0 +1,42 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+
+.super Ljava/lang/Object;
+
+## CHECK-START: boolean TestCase.testCase() load_store_elimination (after)
+## CHECK-DAG:     If [{{b\d+}}]
+
+.method public static testCase()Z
+    .registers 6
+
+    sget-boolean v0, LMain;->field0:Z
+    sget-boolean v1, LMain;->field1:Z
+    or-int v2, v0, v1
+    int-to-byte v2, v2
+    sput-boolean v2, LMain;->field2:Z
+
+    # LSE will replace this sget with the type conversion above...
+    sget-boolean v2, LMain;->field2:Z
+
+    # ... and generate an If with a byte-typed condition.
+    if-eqz v2, :else
+    const v0, 0x1
+    return v0
+
+    :else
+    const v0, 0x0
+    return v0
+.end method
diff --git a/test/592-checker-regression-bool-input/src/Main.java b/test/592-checker-regression-bool-input/src/Main.java
new file mode 100644
index 0000000..35ae59c
--- /dev/null
+++ b/test/592-checker-regression-bool-input/src/Main.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+public class Main {
+  // Workaround for b/18051191.
+  class Inner {}
+
+  public static boolean field0;
+  public static boolean field1;
+  public static boolean field2;
+
+  public static void assertTrue(boolean result) {
+    if (!result) {
+      throw new Error("Expected true");
+    }
+  }
+
+  public static void assertFalse(boolean result) {
+    if (result) {
+      throw new Error("Expected false");
+    }
+  }
+
+  public static void main(String[] args) throws Throwable {
+    Class<?> c = Class.forName("TestCase");
+    Method m = c.getMethod("testCase");
+
+    try {
+      field0 = true;
+      field1 = false;
+      assertTrue((Boolean) m.invoke(null, null));
+
+      field0 = true;
+      field1 = true;
+      assertTrue((Boolean) m.invoke(null, null));
+
+      field0 = false;
+      field1 = false;
+      assertFalse((Boolean) m.invoke(null, null));
+    } catch (Exception e) {
+      throw new Error(e);
+    }
+  }
+}
diff --git a/test/593-checker-boolean-to-integral-conv/expected.txt b/test/593-checker-boolean-to-integral-conv/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/593-checker-boolean-to-integral-conv/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/593-checker-boolean-to-integral-conv/info.txt b/test/593-checker-boolean-to-integral-conv/info.txt
new file mode 100644
index 0000000..2d883c7
--- /dev/null
+++ b/test/593-checker-boolean-to-integral-conv/info.txt
@@ -0,0 +1 @@
+Regression test for Boolean to integral types conversions.
diff --git a/test/593-checker-boolean-to-integral-conv/src/Main.java b/test/593-checker-boolean-to-integral-conv/src/Main.java
new file mode 100644
index 0000000..b4c91c8
--- /dev/null
+++ b/test/593-checker-boolean-to-integral-conv/src/Main.java
@@ -0,0 +1,232 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String args[]) {
+    expectEqualsByte((byte)1, booleanToByte(true));
+    expectEqualsShort((short)1, booleanToShort(true));
+    expectEqualsChar((char)1, booleanToChar(true));
+    expectEqualsInt(1, booleanToInt(true));
+    expectEqualsLong(1L, booleanToLong(true));
+
+    expectEqualsInt(1, longToIntOfBoolean());
+
+    System.out.println("passed");
+  }
+
+  /// CHECK-START: byte Main.booleanToByte(boolean) builder (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Cond:z\d+>>          Equal [<<Arg>>,<<Zero>>]
+  /// CHECK-DAG:                            If [<<Cond>>]
+  /// CHECK-DAG:     <<Phi:i\d+>>           Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<IToS:b\d+>>          TypeConversion [<<Phi>>]
+  /// CHECK-DAG:                            Return [<<IToS>>]
+
+  /// CHECK-START: byte Main.booleanToByte(boolean) select_generator (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Sel:i\d+>>           Select [<<Zero>>,<<One>>,<<Arg>>]
+  /// CHECK-DAG:     <<IToS:b\d+>>          TypeConversion [<<Sel>>]
+  /// CHECK-DAG:                            Return [<<IToS>>]
+
+  /// CHECK-START: byte Main.booleanToByte(boolean) instruction_simplifier$after_bce (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:                            Return [<<Arg>>]
+
+  static byte booleanToByte(boolean b) {
+    return (byte)(b ? 1 : 0);
+  }
+
+  /// CHECK-START: short Main.booleanToShort(boolean) builder (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Cond:z\d+>>          Equal [<<Arg>>,<<Zero>>]
+  /// CHECK-DAG:                            If [<<Cond>>]
+  /// CHECK-DAG:     <<Phi:i\d+>>           Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<IToS:s\d+>>          TypeConversion [<<Phi>>]
+  /// CHECK-DAG:                            Return [<<IToS>>]
+
+  /// CHECK-START: short Main.booleanToShort(boolean) select_generator (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Sel:i\d+>>           Select [<<Zero>>,<<One>>,<<Arg>>]
+  /// CHECK-DAG:     <<IToS:s\d+>>          TypeConversion [<<Sel>>]
+  /// CHECK-DAG:                            Return [<<IToS>>]
+
+  /// CHECK-START: short Main.booleanToShort(boolean) instruction_simplifier$after_bce (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:                            Return [<<Arg>>]
+
+  static short booleanToShort(boolean b) {
+    return (short)(b ? 1 : 0);
+  }
+
+  /// CHECK-START: char Main.booleanToChar(boolean) builder (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Cond:z\d+>>          Equal [<<Arg>>,<<Zero>>]
+  /// CHECK-DAG:                            If [<<Cond>>]
+  /// CHECK-DAG:     <<Phi:i\d+>>           Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<IToC:c\d+>>          TypeConversion [<<Phi>>]
+  /// CHECK-DAG:                            Return [<<IToC>>]
+
+  /// CHECK-START: char Main.booleanToChar(boolean) select_generator (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Sel:i\d+>>           Select [<<Zero>>,<<One>>,<<Arg>>]
+  /// CHECK-DAG:     <<IToC:c\d+>>          TypeConversion [<<Sel>>]
+  /// CHECK-DAG:                            Return [<<IToC>>]
+
+  /// CHECK-START: char Main.booleanToChar(boolean) instruction_simplifier$after_bce (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:                            Return [<<Arg>>]
+
+  static char booleanToChar(boolean b) {
+    return (char)(b ? 1 : 0);
+  }
+
+  /// CHECK-START: int Main.booleanToInt(boolean) builder (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Cond:z\d+>>          Equal [<<Arg>>,<<Zero>>]
+  /// CHECK-DAG:                            If [<<Cond>>]
+  /// CHECK-DAG:     <<Phi:i\d+>>           Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:                            Return [<<Phi>>]
+
+  /// CHECK-START: int Main.booleanToInt(boolean) select_generator (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Sel:i\d+>>           Select [<<Zero>>,<<One>>,<<Arg>>]
+  /// CHECK-DAG:                            Return [<<Sel>>]
+
+  /// CHECK-START: int Main.booleanToInt(boolean) instruction_simplifier$after_bce (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:                            Return [<<Arg>>]
+
+  static int booleanToInt(boolean b) {
+    return b ? 1 : 0;
+  }
+
+  /// CHECK-START: long Main.booleanToLong(boolean) builder (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Cond:z\d+>>          Equal [<<Arg>>,<<Zero>>]
+  /// CHECK-DAG:                            If [<<Cond>>]
+  /// CHECK-DAG:     <<Phi:i\d+>>           Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<IToJ:j\d+>>          TypeConversion [<<Phi>>]
+  /// CHECK-DAG:                            Return [<<IToJ>>]
+
+  /// CHECK-START: long Main.booleanToLong(boolean) select_generator (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Sel:i\d+>>           Select [<<Zero>>,<<One>>,<<Arg>>]
+  /// CHECK-DAG:     <<IToJ:j\d+>>          TypeConversion [<<Sel>>]
+  /// CHECK-DAG:                            Return [<<IToJ>>]
+
+  /// CHECK-START: long Main.booleanToLong(boolean) instruction_simplifier$after_bce (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<ZToJ:j\d+>>          TypeConversion [<<Arg>>]
+  /// CHECK-DAG:                            Return [<<ZToJ>>]
+
+  static long booleanToLong(boolean b) {
+    return b ? 1 : 0;
+  }
+
+  /// CHECK-START: int Main.longToIntOfBoolean() builder (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>>     CurrentMethod
+  /// CHECK-DAG:     <<Sget:z\d+>>          StaticFieldGet
+  /// CHECK-DAG:     <<ZToJ:j\d+>>          InvokeStaticOrDirect [<<Sget>>,<<Method>>]
+  /// CHECK-DAG:     <<JToI:i\d+>>          TypeConversion [<<ZToJ>>]
+  /// CHECK-DAG:                            Return [<<JToI>>]
+
+  /// CHECK-START: int Main.longToIntOfBoolean() inliner (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>>     CurrentMethod
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Sget:z\d+>>          StaticFieldGet
+  /// CHECK-DAG:                            If [<<Sget>>]
+  /// CHECK-DAG:     <<Phi:i\d+>>           Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<IToJ:j\d+>>          TypeConversion [<<Phi>>]
+  /// CHECK-DAG:     <<JToI:i\d+>>          TypeConversion [<<IToJ>>]
+  /// CHECK-DAG:                            Return [<<JToI>>]
+
+  /// CHECK-START: int Main.longToIntOfBoolean() select_generator (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>>     CurrentMethod
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Sget:z\d+>>          StaticFieldGet
+  /// CHECK-DAG:     <<Sel:i\d+>>           Select [<<Zero>>,<<One>>,<<Sget>>]
+  /// CHECK-DAG:     <<IToJ:j\d+>>          TypeConversion [<<Sel>>]
+  /// CHECK-DAG:     <<JToI:i\d+>>          TypeConversion [<<IToJ>>]
+  /// CHECK-DAG:                            Return [<<JToI>>]
+
+  /// CHECK-START: int Main.longToIntOfBoolean() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>>     CurrentMethod
+  /// CHECK-DAG:     <<Sget:z\d+>>          StaticFieldGet
+  /// CHECK-DAG:                            Return [<<Sget>>]
+
+  static int longToIntOfBoolean() {
+    long l = booleanToLong(booleanField);
+    return (int) l;
+  }
+
+
+  private static void expectEqualsByte(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEqualsShort(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEqualsChar(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEqualsInt(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEqualsLong(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+
+  public static boolean booleanField = true;
+
+}
diff --git a/test/469-condition-materialization-regression/expected.txt b/test/593-checker-long-to-float-regression/expected.txt
similarity index 100%
copy from test/469-condition-materialization-regression/expected.txt
copy to test/593-checker-long-to-float-regression/expected.txt
diff --git a/test/593-checker-long-to-float-regression/info.txt b/test/593-checker-long-to-float-regression/info.txt
new file mode 100644
index 0000000..39402e9
--- /dev/null
+++ b/test/593-checker-long-to-float-regression/info.txt
@@ -0,0 +1,3 @@
+Regression test for x86_64's code generator, which had a bug in
+the long-to-float implementation loading a constant as 64-bit double
+instead of 32-bit float.
diff --git a/test/593-checker-long-to-float-regression/src/Main.java b/test/593-checker-long-to-float-regression/src/Main.java
new file mode 100644
index 0000000..9c07f3d
--- /dev/null
+++ b/test/593-checker-long-to-float-regression/src/Main.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  static boolean doThrow = false;
+  static long longValue;
+
+  public static void assertEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void main(String[] args) {
+    assertEquals(1.0F, $noinline$longToFloat());
+  }
+
+  /// CHECK-START: float Main.$noinline$longToFloat() register (after)
+  /// CHECK-DAG:     <<Const1:j\d+>>   LongConstant 1
+  /// CHECK-DAG:     <<Convert:f\d+>>  TypeConversion [<<Const1>>]
+  /// CHECK-DAG:                       Return [<<Convert>>]
+
+  static float $noinline$longToFloat() {
+    if (doThrow) { throw new Error(); }
+    longValue = $inline$returnConst();
+    return (float) longValue;
+  }
+
+  static long $inline$returnConst() {
+    return 1L;
+  }
+}
diff --git a/test/593-checker-shift-and-simplifier/expected.txt b/test/593-checker-shift-and-simplifier/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/593-checker-shift-and-simplifier/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/593-checker-shift-and-simplifier/info.txt b/test/593-checker-shift-and-simplifier/info.txt
new file mode 100644
index 0000000..2f4c7f5
--- /dev/null
+++ b/test/593-checker-shift-and-simplifier/info.txt
@@ -0,0 +1 @@
+Regression test on pattern that caused double removal of AND by ARM64 simplifier.
diff --git a/test/593-checker-shift-and-simplifier/src/Main.java b/test/593-checker-shift-and-simplifier/src/Main.java
new file mode 100644
index 0000000..65e809a
--- /dev/null
+++ b/test/593-checker-shift-and-simplifier/src/Main.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  private static int[] a = { 10 };
+
+  // A very particular set of operations that caused a double removal by the
+  // ARM64 simplifier doing "forward" removals (b/27851582).
+
+  /// CHECK-START-ARM64: int Main.operations() instruction_simplifier_arm64 (before)
+  /// CHECK-DAG: <<Get:i\d+>> ArrayGet
+  /// CHECK-DAG: <<Not:i\d+>> Not [<<Get>>]
+  /// CHECK-DAG: <<Shl:i\d+>> Shl [<<Get>>,i{{\d+}}]
+  /// CHECK-DAG:              And [<<Not>>,<<Shl>>]
+  //
+  /// CHECK-START-ARM64: int Main.operations() instruction_simplifier_arm64 (after)
+  /// CHECK-DAG: <<Get:i\d+>> ArrayGet
+  /// CHECK-DAG: <<Not:i\d+>> Not [<<Get>>]
+  /// CHECK-DAG:              Arm64DataProcWithShifterOp [<<Not>>,<<Get>>] kind:And+LSL shift:2
+  private static int operations() {
+     int r = a[0];
+     int n = ~r;
+     int s = r << 2;
+     int a = s & n;
+     return a;
+  }
+
+  public static void main(String[] args) {
+    if (operations() != 32) {
+      System.out.println("failed");
+    } else {
+      System.out.println("passed");
+    }
+  }
+}
diff --git a/test/594-checker-array-alias/expected.txt b/test/594-checker-array-alias/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/594-checker-array-alias/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/594-checker-array-alias/info.txt b/test/594-checker-array-alias/info.txt
new file mode 100644
index 0000000..57c6de5
--- /dev/null
+++ b/test/594-checker-array-alias/info.txt
@@ -0,0 +1 @@
+Tests on array parameters with and without alias.
diff --git a/test/594-checker-array-alias/src/Main.java b/test/594-checker-array-alias/src/Main.java
new file mode 100644
index 0000000..5ece2e2
--- /dev/null
+++ b/test/594-checker-array-alias/src/Main.java
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+
+//
+// Test on array parameters with or without potential aliasing.
+//
+public class Main {
+
+  //
+  // Cross-over on parameters with potential aliasing on parameters.
+  // The arrays a and b may point to the same memory, which (without
+  // further runtime tests) prevents hoisting the seemingly invariant
+  // array reference.
+  //
+
+  /// CHECK-START: void Main.CrossOverLoop1(int[], int[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.CrossOverLoop1(int[], int[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void CrossOverLoop1(int a[], int b[]) {
+    b[20] = 99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoop2(float[], float[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.CrossOverLoop2(float[], float[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void CrossOverLoop2(float a[], float b[]) {
+    b[20] = 99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoop3(long[], long[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.CrossOverLoop3(long[], long[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void CrossOverLoop3(long a[], long b[]) {
+    b[20] = 99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoop4(double[], double[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.CrossOverLoop4(double[], double[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void CrossOverLoop4(double a[], double b[]) {
+    b[20] = 99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  //
+  // False cross-over on parameters. Parameters have same width (which used to
+  // cause a false type aliasing in an older version of the compiler), but since
+  // the types are different cannot be aliased. Thus, the invariant array
+  // reference can be hoisted.
+  //
+
+  /// CHECK-START: void Main.FalseCrossOverLoop1(int[], float[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.FalseCrossOverLoop1(int[], float[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void FalseCrossOverLoop1(int a[], float b[]) {
+    b[20] = -99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = (int) b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop2(float[], int[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.FalseCrossOverLoop2(float[], int[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void FalseCrossOverLoop2(float a[], int b[]) {
+    b[20] = -99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop3(long[], double[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.FalseCrossOverLoop3(long[], double[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void FalseCrossOverLoop3(long a[], double b[]) {
+    b[20] = -99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = (long) b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop4(double[], long[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.FalseCrossOverLoop4(double[], long[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void FalseCrossOverLoop4(double a[], long b[]) {
+    b[20] = -99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  //
+  // Main driver and testers.
+  //
+
+  public static void main(String[] args) {
+    int[] aI = new int[100];
+    float[] aF = new float[100];
+    long[] aJ = new long[100];
+    double[] aD = new double[100];
+
+    // Type I.
+    CrossOverLoop1(aI, aI);
+    for (int i = 0; i < aI.length; i++) {
+      expectEquals(i <= 20 ? 92 : 85, aI[i]);
+    }
+    // Type F.
+    CrossOverLoop2(aF, aF);
+    for (int i = 0; i < aF.length; i++) {
+      expectEquals(i <= 20 ? 92 : 85, aF[i]);
+    }
+    // Type J.
+    CrossOverLoop3(aJ, aJ);
+    for (int i = 0; i < aJ.length; i++) {
+      expectEquals(i <= 20 ? 92 : 85, aJ[i]);
+    }
+    // Type D.
+    CrossOverLoop4(aD, aD);
+    for (int i = 0; i < aD.length; i++) {
+      expectEquals(i <= 20 ? 92 : 85, aD[i]);
+    }
+
+    // Type I vs F.
+    FalseCrossOverLoop1(aI, aF);
+    for (int i = 0; i < aI.length; i++) {
+      expectEquals(-106, aI[i]);
+    }
+    // Type F vs I.
+    FalseCrossOverLoop2(aF, aI);
+    for (int i = 0; i < aF.length; i++) {
+      expectEquals(-106, aF[i]);
+    }
+    // Type J vs D.
+    FalseCrossOverLoop3(aJ, aD);
+    for (int i = 0; i < aJ.length; i++) {
+      expectEquals(-106, aJ[i]);
+    }
+    // Type D vs J.
+    FalseCrossOverLoop4(aD, aJ);
+    for (int i = 0; i < aD.length; i++) {
+      expectEquals(-106, aD[i]);
+    }
+
+    // Real-world example where incorrect type assignment could introduce a bug.
+    // The library sorting algorithm is heavy on array reads and writes, and
+    // assigning the wrong J/D type to one of these would introduce errors.
+    for (int i = 0; i < aD.length; i++) {
+      aD[i] = aD.length - i - 1;
+    }
+    Arrays.sort(aD);
+    for (int i = 0; i < aD.length; i++) {
+      expectEquals((double) i, aD[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/594-checker-irreducible-linorder/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/594-checker-irreducible-linorder/expected.txt
diff --git a/test/594-checker-irreducible-linorder/info.txt b/test/594-checker-irreducible-linorder/info.txt
new file mode 100644
index 0000000..a1783f8
--- /dev/null
+++ b/test/594-checker-irreducible-linorder/info.txt
@@ -0,0 +1,2 @@
+Regression test for a failing DCHECK in SSA liveness analysis in the presence
+of irreducible loops.
diff --git a/test/594-checker-irreducible-linorder/smali/IrreducibleLoop.smali b/test/594-checker-irreducible-linorder/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..ef53ee8
--- /dev/null
+++ b/test/594-checker-irreducible-linorder/smali/IrreducibleLoop.smali
@@ -0,0 +1,123 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+.super Ljava/lang/Object;
+
+# Test case where liveness analysis produces linear order where loop blocks are
+# not adjacent.
+
+## CHECK-START: int IrreducibleLoop.liveness(boolean, boolean, boolean, int) builder (after)
+## CHECK-DAG:     Add loop:none
+## CHECK-DAG:     Mul loop:<<Loop:B\d+>>
+## CHECK-DAG:     Not loop:<<Loop>>
+
+## CHECK-START: int IrreducibleLoop.liveness(boolean, boolean, boolean, int) liveness (after)
+## CHECK-DAG:     Add liveness:<<LPreEntry:\d+>>
+## CHECK-DAG:     Mul liveness:<<LHeader:\d+>>
+## CHECK-DAG:     Not liveness:<<LBackEdge:\d+>>
+## CHECK-EVAL:    (<<LHeader>> < <<LPreEntry>>) and (<<LPreEntry>> < <<LBackEdge>>)
+
+.method public static liveness(ZZZI)I
+   .registers 10
+   const/16 v0, 42
+
+   if-eqz p0, :header
+
+   :pre_entry
+   add-int/2addr p3, p3
+   invoke-static {v0}, Ljava/lang/System;->exit(I)V
+   goto :body1
+
+   # Trivially dead code to ensure linear order verification skips removed blocks (b/28252537).
+   :dead_code
+   nop
+   goto :dead_code
+
+   :header
+   mul-int/2addr p3, p3
+   if-eqz p1, :body2
+
+   :body1
+   goto :body_merge
+
+   :body2
+   invoke-static {v0}, Ljava/lang/System;->exit(I)V
+   goto :body_merge
+
+   :body_merge
+   if-eqz p2, :exit
+
+   :back_edge
+   not-int p3, p3
+   goto :header
+
+   :exit
+   return p3
+
+.end method
+
+## CHECK-START: int IrreducibleLoop.liveness2(boolean, boolean, boolean, int) builder (after)
+## CHECK-DAG:     Mul loop:<<Loop:B\d+>>
+## CHECK-DAG:     Not loop:<<Loop>>
+
+## CHECK-START: int IrreducibleLoop.liveness2(boolean, boolean, boolean, int) liveness (after)
+## CHECK-DAG:     Mul liveness:<<LPreEntry2:\d+>>
+## CHECK-DAG:     Not liveness:<<LBackEdge1:\d+>>
+## CHECK-EVAL:    <<LBackEdge1>> < <<LPreEntry2>>
+
+.method public liveness2(ZZZI)I
+    .registers 10
+
+    const v1, 1
+
+    :header1
+    if-eqz p0, :body1
+
+    :exit
+    return p3
+
+    :body1
+    # The test will generate an incorrect linear order when the following IF swaps
+    # its successors. To do that, load a boolean value and compare NotEqual to 1.
+    sget-boolean v2, LIrreducibleLoop;->f:Z
+    const v3, 1
+    if-ne v2, v3, :pre_header2
+
+    :pre_entry2
+    # This constant has a use in a phi in :back_edge2 and a back edge use in
+    # :back_edge1. Because the linear order is wrong, the back edge use has
+    # a lower liveness than the phi use.
+    const v0, 42
+    mul-int/2addr p3, p3
+    goto :back_edge2
+
+    :back_edge2
+    add-int/2addr p3, v0
+    add-int/2addr v0, v1
+    goto :header2
+
+    :header2
+    if-eqz p2, :back_edge2
+
+    :back_edge1
+    not-int p3, p3
+    goto :header1
+
+    :pre_header2
+    const v0, 42
+    goto :header2
+.end method
+
+.field public static f:Z
diff --git a/test/594-checker-irreducible-linorder/src/Main.java b/test/594-checker-irreducible-linorder/src/Main.java
new file mode 100644
index 0000000..38b2ab4
--- /dev/null
+++ b/test/594-checker-irreducible-linorder/src/Main.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) {
+    // Nothing to run. This regression test merely makes sure the smali test
+    // case successfully compiles.
+  }
+}
diff --git a/test/594-invoke-super/expected.txt b/test/594-invoke-super/expected.txt
new file mode 100644
index 0000000..de26026
--- /dev/null
+++ b/test/594-invoke-super/expected.txt
@@ -0,0 +1,7 @@
+new A
+I am A's foo
+new B
+I am B's foo
+new A
+new B
+passed
diff --git a/test/594-invoke-super/info.txt b/test/594-invoke-super/info.txt
new file mode 100644
index 0000000..440d8b8
--- /dev/null
+++ b/test/594-invoke-super/info.txt
@@ -0,0 +1 @@
+Invoke-super on various references.
diff --git a/test/594-invoke-super/smali/invoke-super.smali b/test/594-invoke-super/smali/invoke-super.smali
new file mode 100644
index 0000000..6f787dd
--- /dev/null
+++ b/test/594-invoke-super/smali/invoke-super.smali
@@ -0,0 +1,31 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LZ;
+.super LA;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LA;-><init>()V
+    return-void
+.end method
+
+.method public foo()V
+.registers 3
+    new-instance v0, LY;
+    invoke-direct {v0}, LY;-><init>()V
+    invoke-super {v0}, LY;->foo()V
+    return-void
+.end method
diff --git a/test/594-invoke-super/src/Main.java b/test/594-invoke-super/src/Main.java
new file mode 100644
index 0000000..53f2bbf
--- /dev/null
+++ b/test/594-invoke-super/src/Main.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+//
+// Two classes A and B with method foo().
+//
+
+class A {
+  A() { System.out.println("new A"); }
+
+  public void foo() { System.out.println("I am A's foo"); }
+
+  // We previously used to invoke this method with a Y instance, due
+  // to invoke-super underspecified behavior.
+  public void bar() { System.out.println("I am A's bar"); }
+}
+
+class B {
+  B() { System.out.println("new B"); }
+
+  public void foo() { System.out.println("I am B's foo"); }
+}
+
+//
+// Two subclasses X and Y that call foo() on super.
+//
+
+class X extends A {
+  public void foo() { super.foo(); }
+}
+
+class Y extends B {
+  public void foo() { super.foo(); }
+}
+
+//
+// Driver class.
+//
+
+public class Main {
+
+  public static void main(String[] args) throws Exception {
+    // The normal stuff, X's super goes to A, Y's super goes to B.
+    new X().foo();
+    new Y().foo();
+
+    // And now it gets interesting.
+
+    // In bytecode, we define a class Z that is a subclass of A, and we call
+    // invoke-super on an instance of Y.
+    Class<?> z = Class.forName("Z");
+    Method m = z.getMethod("foo");
+    try {
+      m.invoke(z.newInstance());
+      throw new Error("Expected InvocationTargetException");
+    } catch (InvocationTargetException e) {
+      if (!(e.getCause() instanceof NoSuchMethodError)) {
+        throw new Error("Expected NoSuchMethodError");
+      }
+    }
+
+    System.out.println("passed");
+  }
+}
diff --git a/test/594-load-string-regression/expected.txt b/test/594-load-string-regression/expected.txt
new file mode 100644
index 0000000..365b0e1
--- /dev/null
+++ b/test/594-load-string-regression/expected.txt
@@ -0,0 +1 @@
+String: ""
diff --git a/test/594-load-string-regression/info.txt b/test/594-load-string-regression/info.txt
new file mode 100644
index 0000000..6a07ace
--- /dev/null
+++ b/test/594-load-string-regression/info.txt
@@ -0,0 +1,2 @@
+Regression test for LoadString listing side effects when it doesn't have any
+and triggering a DCHECK() failure when merging ClinitCheck into NewInstance.
diff --git a/test/594-load-string-regression/src/Main.java b/test/594-load-string-regression/src/Main.java
new file mode 100644
index 0000000..0b9f7b5
--- /dev/null
+++ b/test/594-load-string-regression/src/Main.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  static boolean doThrow = false;
+
+  // Note: We're not doing checker tests as we cannot do them specifically for a non-PIC
+  // configuration. The check here would be "prepare_for_register_allocation (before)"
+  //     CHECK:         LoadClass
+  //     CHECK-NEXT:    ClinitCheck
+  //     CHECK-NEXT:    LoadString load_kind:BootImageAddress
+  //     CHECK-NEXT:    NewInstance
+  // and "prepare_for_register_allocation (after)"
+  //     CHECK:         LoadString
+  //     CHECK-NEXT:    NewInstance
+  // but the order of instructions for non-PIC mode is different.
+  public static int $noinline$test() {
+    if (doThrow) { throw new Error(); }
+
+    int r = 0x12345678;
+    do {
+      // LICM pulls the LoadClass and ClinitCheck out of the loop, leaves NewInstance in the loop.
+      Helper h = new Helper();
+      // For non-PIC mode, LICM pulls the boot image LoadString out of the loop.
+      // (For PIC mode, the LoadString can throw and will not be moved out of the loop.)
+      String s = "";  // Empty string is known to be in the boot image.
+      r = r ^ (r >> 5);
+      h.$noinline$printString(s);
+      // During DCE after inlining, the loop back-edge disappears and the pre-header is
+      // merged with the body, leaving consecutive LoadClass, ClinitCheck, LoadString
+      // and NewInstance in non-PIC mode. The prepare_for_register_allocation pass
+      // merges the LoadClass and ClinitCheck with the NewInstance and checks that
+      // there are no instructions with side effects in between. This check used to
+      // fail because LoadString was always listing SideEffects::CanTriggerGC() even
+      // when it doesn't really have any side effects, i.e. for direct references to
+      // boot image Strings or for Strings known to be in the dex cache.
+    } while ($inline$shouldContinue());
+    return r;
+  }
+
+  static boolean $inline$shouldContinue() {
+    return false;
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(0x12345678 ^ (0x12345678 >> 5), $noinline$test());
+  }
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
+
+class Helper {
+  static boolean doThrow = false;
+
+  public void $noinline$printString(String s) {
+    if (doThrow) { throw new Error(); }
+
+    System.out.println("String: \"" + s + "\"");
+  }
+}
diff --git a/test/595-error-class/expected.txt b/test/595-error-class/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/595-error-class/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/595-error-class/info.txt b/test/595-error-class/info.txt
new file mode 100644
index 0000000..a58b8b3
--- /dev/null
+++ b/test/595-error-class/info.txt
@@ -0,0 +1 @@
+Regression test on merging array type with error component type.
diff --git a/test/595-error-class/smali/error.smali b/test/595-error-class/smali/error.smali
new file mode 100644
index 0000000..925c34b
--- /dev/null
+++ b/test/595-error-class/smali/error.smali
@@ -0,0 +1,23 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public final LAnError;
+
+.super LSuperOfAnError;
+
+# Override a final method to put this class in the error state.
+.method public foo()V
+  .registers 1
+  return-void
+.end method
diff --git a/test/595-error-class/smali/merge.smali b/test/595-error-class/smali/merge.smali
new file mode 100644
index 0000000..2f8b415
--- /dev/null
+++ b/test/595-error-class/smali/merge.smali
@@ -0,0 +1,31 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LMerge;
+.super Ljava/lang/Object;
+
+# Method that selects between x = new Integer[] or new AnError[],
+# Reference type propagation should correctly see error in component type.
+.method public static select(Z)Ljava/lang/Object;
+    .registers 2
+    const/16 v0, 10
+    if-eqz v1, :Skip
+    new-array v0, v0, [LAnError;
+    goto :Done
+:Skip
+    new-array v0, v0, [Ljava/lang/Integer;
+:Done
+    return-object v0
+.end method
diff --git a/test/595-error-class/smali/super.smali b/test/595-error-class/smali/super.smali
new file mode 100644
index 0000000..da7467d
--- /dev/null
+++ b/test/595-error-class/smali/super.smali
@@ -0,0 +1,22 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LSuperOfAnError;
+
+.super Ljava/lang/Object;
+
+.method public final foo()V
+  .registers 1
+  return-void
+.end method
diff --git a/test/595-error-class/src/Main.java b/test/595-error-class/src/Main.java
new file mode 100644
index 0000000..655fa43
--- /dev/null
+++ b/test/595-error-class/src/Main.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.*;
+
+public class Main {
+
+  public static void main(String args[]) throws Throwable {
+    Class<?> c = Class.forName("Merge");
+    Method m = c.getMethod("select", boolean.class);
+    Object x = m.invoke(null, true);
+    if (x == null) {
+      throw new Error("Did not get array");
+    }
+    System.out.println("passed");
+  }
+}
diff --git a/test/455-set-vreg/expected.txt b/test/595-profile-saving/expected.txt
similarity index 100%
rename from test/455-set-vreg/expected.txt
rename to test/595-profile-saving/expected.txt
diff --git a/test/595-profile-saving/info.txt b/test/595-profile-saving/info.txt
new file mode 100644
index 0000000..5d318f5
--- /dev/null
+++ b/test/595-profile-saving/info.txt
@@ -0,0 +1 @@
+Check that profile recording works even when JIT compilation is not enabled.
diff --git a/test/595-profile-saving/profile-saving.cc b/test/595-profile-saving/profile-saving.cc
new file mode 100644
index 0000000..0d26f45
--- /dev/null
+++ b/test/595-profile-saving/profile-saving.cc
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_file.h"
+
+#include "art_method-inl.h"
+#include "jit/offline_profiling_info.h"
+#include "jit/profile_saver.h"
+#include "jni.h"
+#include "method_reference.h"
+#include "mirror/class-inl.h"
+#include "oat_file_assistant.h"
+#include "oat_file_manager.h"
+#include "scoped_thread_state_change.h"
+#include "ScopedUtfChars.h"
+#include "thread.h"
+
+namespace art {
+namespace {
+
+class CreateProfilingInfoVisitor : public StackVisitor {
+ public:
+  explicit CreateProfilingInfoVisitor(Thread* thread, const char* method_name)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        method_name_(method_name) {}
+
+  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtMethod* m = GetMethod();
+    std::string m_name(m->GetName());
+
+    if (m_name.compare(method_name_) == 0) {
+      ProfilingInfo::Create(Thread::Current(), m, /* retry_allocation */ true);
+      method_index_ = m->GetDexMethodIndex();
+      return false;
+    }
+    return true;
+  }
+
+  int method_index_ = -1;
+  const char* const method_name_;
+};
+
+extern "C" JNIEXPORT jint JNICALL Java_Main_ensureProfilingInfo(JNIEnv* env,
+                                                                jclass,
+                                                                jstring method_name) {
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
+  ScopedObjectAccess soa(Thread::Current());
+  CreateProfilingInfoVisitor visitor(soa.Self(), chars.c_str());
+  visitor.WalkStack();
+  return visitor.method_index_;
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureProfileProcessing(JNIEnv*, jclass) {
+  ProfileSaver::ForceProcessProfiles();
+}
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_presentInProfile(
+      JNIEnv* env, jclass cls, jstring filename, jint method_index) {
+  ScopedUtfChars filename_chars(env, filename);
+  CHECK(filename_chars.c_str() != nullptr);
+  ScopedObjectAccess soa(Thread::Current());
+  const DexFile* dex_file = soa.Decode<mirror::Class*>(cls)->GetDexCache()->GetDexFile();
+  return ProfileSaver::HasSeenMethod(std::string(filename_chars.c_str()),
+                                     dex_file,
+                                     static_cast<uint16_t>(method_index));
+}
+
+}  // namespace
+}  // namespace art
diff --git a/test/595-profile-saving/run b/test/595-profile-saving/run
new file mode 100644
index 0000000..068ad03
--- /dev/null
+++ b/test/595-profile-saving/run
@@ -0,0 +1,27 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use
+# --compiler-filter=interpret-only to make sure that the test is not compiled AOT
+# and to make sure the test is not compiled  when loaded (by PathClassLoader)
+# -Xjitsaveprofilinginfo to enable profile saving
+# -Xusejit:false to disable jit and only test profiles.
+exec ${RUN} \
+  -Xcompiler-option --compiler-filter=interpret-only \
+  --runtime-option '-Xcompiler-option --compiler-filter=interpret-only' \
+  --runtime-option -Xjitsaveprofilinginfo \
+  --runtime-option -Xusejit:false \
+  "${@}"
diff --git a/test/595-profile-saving/src/Main.java b/test/595-profile-saving/src/Main.java
new file mode 100644
index 0000000..039503f
--- /dev/null
+++ b/test/595-profile-saving/src/Main.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Method;
+
+public class Main {
+
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+
+    File file = null;
+    try {
+      file = createTempFile();
+      // String codePath = getDexBaseLocation();
+      String codePath = System.getenv("DEX_LOCATION") + "/595-profile-saving.jar";
+      VMRuntime.registerAppInfo(file.getPath(),
+                                System.getenv("DEX_LOCATION"),
+                                new String[] {codePath},
+                                /* foreignProfileDir */ null);
+
+      int methodIdx = $opt$noinline$testProfile();
+      ensureProfileProcessing();
+      if (!presentInProfile(file.getPath(), methodIdx)) {
+        throw new RuntimeException("Method with index " + methodIdx + " not in the profile");
+      }
+    } finally {
+      if (file != null) {
+        file.delete();
+      }
+    }
+  }
+
+  public static int $opt$noinline$testProfile() {
+    if (doThrow) throw new Error();
+    // Make sure we have a profile info for this method without the need to loop.
+    return ensureProfilingInfo("$opt$noinline$testProfile");
+  }
+
+  // Return the dex method index.
+  public static native int ensureProfilingInfo(String methodName);
+  // Ensures the profile saver does its usual processing.
+  public static native void ensureProfileProcessing();
+  // Checks if the profiles saver knows about the method.
+  public static native boolean presentInProfile(String profile, int methodIdx);
+
+  public static boolean doThrow = false;
+  private static final String TEMP_FILE_NAME_PREFIX = "dummy";
+  private static final String TEMP_FILE_NAME_SUFFIX = "-file";
+
+  static native String getProfileInfoDump(
+      String filename);
+
+  private static File createTempFile() throws Exception {
+    try {
+      return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+    } catch (IOException e) {
+      System.setProperty("java.io.tmpdir", "/data/local/tmp");
+      try {
+        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+      } catch (IOException e2) {
+        System.setProperty("java.io.tmpdir", "/sdcard");
+        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+      }
+    }
+  }
+
+  private static class VMRuntime {
+    private static final Method registerAppInfoMethod;
+    static {
+      try {
+        Class<? extends Object> c = Class.forName("dalvik.system.VMRuntime");
+        registerAppInfoMethod = c.getDeclaredMethod("registerAppInfo",
+            String.class, String.class, String[].class, String.class);
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    public static void registerAppInfo(String profile, String appDir,
+                                       String[] codePaths, String foreignDir) throws Exception {
+      registerAppInfoMethod.invoke(null, profile, appDir, codePaths, foreignDir);
+    }
+  }
+}
diff --git a/test/596-app-images/app_images.cc b/test/596-app-images/app_images.cc
new file mode 100644
index 0000000..a5bbf5f
--- /dev/null
+++ b/test/596-app-images/app_images.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <pthread.h>
+#include <stdio.h>
+#include <vector>
+
+#include "gc/heap.h"
+#include "gc/space/image_space.h"
+#include "gc/space/space-inl.h"
+#include "image.h"
+#include "jni.h"
+#include "mirror/class.h"
+#include "runtime.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+namespace {
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_checkAppImageLoaded(JNIEnv*, jclass) {
+  ScopedObjectAccess soa(Thread::Current());
+  for (auto* space : Runtime::Current()->GetHeap()->GetContinuousSpaces()) {
+    if (space->IsImageSpace()) {
+      auto* image_space = space->AsImageSpace();
+      const auto& image_header = image_space->GetImageHeader();
+      if (image_header.IsAppImage()) {
+        return JNI_TRUE;
+      }
+    }
+  }
+  return JNI_FALSE;
+}
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_checkAppImageContains(JNIEnv*, jclass, jclass c) {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass_ptr = soa.Decode<mirror::Class*>(c);
+  for (auto* space : Runtime::Current()->GetHeap()->GetContinuousSpaces()) {
+    if (space->IsImageSpace()) {
+      auto* image_space = space->AsImageSpace();
+      const auto& image_header = image_space->GetImageHeader();
+      if (image_header.IsAppImage()) {
+        if (image_space->HasAddress(klass_ptr)) {
+          return JNI_TRUE;
+        }
+      }
+    }
+  }
+  return JNI_FALSE;
+}
+
+}  // namespace
+
+}  // namespace art
diff --git a/test/455-set-vreg/expected.txt b/test/596-app-images/expected.txt
similarity index 100%
copy from test/455-set-vreg/expected.txt
copy to test/596-app-images/expected.txt
diff --git a/test/596-app-images/info.txt b/test/596-app-images/info.txt
new file mode 100644
index 0000000..a3d5e7e
--- /dev/null
+++ b/test/596-app-images/info.txt
@@ -0,0 +1 @@
+Tests that app-images are loaded and used.
diff --git a/test/596-app-images/src/Main.java b/test/596-app-images/src/Main.java
new file mode 100644
index 0000000..75b31b8
--- /dev/null
+++ b/test/596-app-images/src/Main.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main {
+  static class Inner {
+    public static int abc = 0;
+  }
+
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    if (!checkAppImageLoaded()) {
+      System.out.println("App image is not loaded!");
+    } else if (!checkAppImageContains(Inner.class)) {
+      System.out.println("App image does not contain Inner!");
+    }
+  }
+
+  public static native boolean checkAppImageLoaded();
+  public static native boolean checkAppImageContains(Class<?> klass);
+}
diff --git a/test/596-checker-dead-phi/expected.txt b/test/596-checker-dead-phi/expected.txt
new file mode 100644
index 0000000..d81cc07
--- /dev/null
+++ b/test/596-checker-dead-phi/expected.txt
@@ -0,0 +1 @@
+42
diff --git a/test/596-checker-dead-phi/info.txt b/test/596-checker-dead-phi/info.txt
new file mode 100644
index 0000000..7f7cf0f
--- /dev/null
+++ b/test/596-checker-dead-phi/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing where we used to replace a dead loop
+phi with its first incoming input.
diff --git a/test/596-checker-dead-phi/smali/IrreducibleLoop.smali b/test/596-checker-dead-phi/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..bab2ba9
--- /dev/null
+++ b/test/596-checker-dead-phi/smali/IrreducibleLoop.smali
@@ -0,0 +1,74 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+# Test case where liveness analysis produces linear order where loop blocks are
+# not adjacent. This revealed a bug in our SSA builder, where a dead loop phi would
+# be replaced by its incoming input during SsaRedundantPhiElimination.
+
+# Check that the outer loop suspend check environment only has the parameter vreg.
+## CHECK-START: int IrreducibleLoop.liveness(int) builder (after)
+## CHECK-DAG:     <<Phi:i\d+>> Phi reg:4 loop:{{B\d+}} irreducible:false
+## CHECK-DAG:     SuspendCheck env:[[_,_,_,_,<<Phi>>]] loop:{{B\d+}} irreducible:false
+
+# Check that the linear order has non-adjacent loop blocks.
+## CHECK-START: int IrreducibleLoop.liveness(int) liveness (after)
+## CHECK-DAG:     Mul liveness:<<LPreEntry2:\d+>>
+## CHECK-DAG:     Add liveness:<<LBackEdge1:\d+>>
+## CHECK-EVAL:    <<LBackEdge1>> < <<LPreEntry2>>
+
+.method public static liveness(I)I
+    .registers 5
+
+    const-string v1, "MyString"
+
+    :header1
+    if-eqz p0, :body1
+
+    :exit
+    return p0
+
+    :body1
+    # The test will generate an incorrect linear order when the following IF swaps
+    # its successors. To do that, load a boolean value and compare NotEqual to 1.
+    sget-boolean v2, LIrreducibleLoop;->f:Z
+    const v3, 1
+    if-ne v2, v3, :pre_header2
+
+    :pre_entry2
+    # Add a marker on the irreducible loop entry.
+    mul-int/2addr p0, p0
+    goto :back_edge2
+
+    :back_edge2
+    goto :header2
+
+    :header2
+    if-eqz p0, :back_edge2
+
+    :back_edge1
+    # Add a marker on the outer loop back edge.
+    add-int/2addr p0, p0
+    # Set a wide register, to have v1 undefined at the back edge.
+    const-wide/16 v0, 0x1
+    goto :header1
+
+    :pre_header2
+    goto :header2
+.end method
+
+.field public static f:Z
diff --git a/test/596-checker-dead-phi/src/Main.java b/test/596-checker-dead-phi/src/Main.java
new file mode 100644
index 0000000..5a3fffc
--- /dev/null
+++ b/test/596-checker-dead-phi/src/Main.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    // Note that we don't actually enter the loops in the 'liveness'
+    // method, so this is just a sanity check that part of the code we
+    // generated for that method is correct.
+    Method m = c.getMethod("liveness", int.class);
+    Object[] arguments = { 42 };
+    System.out.println(m.invoke(null, arguments));
+  }
+}
diff --git a/test/597-deopt-new-string/deopt.cc b/test/597-deopt-new-string/deopt.cc
new file mode 100644
index 0000000..844a786
--- /dev/null
+++ b/test/597-deopt-new-string/deopt.cc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni.h"
+#include "mirror/class-inl.h"
+#include "runtime.h"
+#include "thread_list.h"
+#include "thread_state.h"
+#include "gc/gc_cause.h"
+#include "gc/scoped_gc_critical_section.h"
+
+namespace art {
+
+extern "C" JNIEXPORT void JNICALL Java_Main_deoptimizeAll(
+    JNIEnv* env,
+    jclass cls ATTRIBUTE_UNUSED) {
+  ScopedObjectAccess soa(env);
+  ScopedThreadSuspension sts(Thread::Current(), kWaitingForDeoptimization);
+  gc::ScopedGCCriticalSection gcs(Thread::Current(),
+                                  gc::kGcCauseInstrumentation,
+                                  gc::kCollectorTypeInstrumentation);
+  // We need to suspend mutator threads first.
+  ScopedSuspendAll ssa(__FUNCTION__);
+  static bool first = true;
+  if (first) {
+    // We need to enable deoptimization once in order to call DeoptimizeEverything().
+    Runtime::Current()->GetInstrumentation()->EnableDeoptimization();
+    first = false;
+  }
+  Runtime::Current()->GetInstrumentation()->DeoptimizeEverything("test");
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_undeoptimizeAll(
+    JNIEnv* env,
+    jclass cls ATTRIBUTE_UNUSED) {
+  ScopedObjectAccess soa(env);
+  ScopedThreadSuspension sts(Thread::Current(), kWaitingForDeoptimization);
+  gc::ScopedGCCriticalSection gcs(Thread::Current(),
+                                  gc::kGcCauseInstrumentation,
+                                  gc::kCollectorTypeInstrumentation);
+  // We need to suspend mutator threads first.
+  ScopedSuspendAll ssa(__FUNCTION__);
+  Runtime::Current()->GetInstrumentation()->UndeoptimizeEverything("test");
+}
+
+}  // namespace art
diff --git a/test/597-deopt-new-string/expected.txt b/test/597-deopt-new-string/expected.txt
new file mode 100644
index 0000000..f993efc
--- /dev/null
+++ b/test/597-deopt-new-string/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+Finishing
diff --git a/test/597-deopt-new-string/info.txt b/test/597-deopt-new-string/info.txt
new file mode 100644
index 0000000..1bd1f79
--- /dev/null
+++ b/test/597-deopt-new-string/info.txt
@@ -0,0 +1 @@
+Regression test for b/28555675
diff --git a/test/597-deopt-new-string/run b/test/597-deopt-new-string/run
new file mode 100644
index 0000000..9776ab3
--- /dev/null
+++ b/test/597-deopt-new-string/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# We want to run in debuggable mode which keeps the call into StringFactory.newEmptyString().
+exec ${RUN} -Xcompiler-option --debuggable "${@}"
diff --git a/test/597-deopt-new-string/src/Main.java b/test/597-deopt-new-string/src/Main.java
new file mode 100644
index 0000000..e78f0d3
--- /dev/null
+++ b/test/597-deopt-new-string/src/Main.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main implements Runnable {
+    static final int numberOfThreads = 2;
+    static final int totalOperations = 40000;
+    static boolean sFlag = false;
+    static volatile boolean done = false;
+    int threadIndex;
+
+    public static native void deoptimizeAll();
+    public static native void undeoptimizeAll();
+
+    Main(int index) {
+        threadIndex = index;
+    }
+
+    public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
+
+        final Thread[] threads = new Thread[numberOfThreads];
+        for (int t = 0; t < threads.length; t++) {
+            threads[t] = new Thread(new Main(t));
+            threads[t].start();
+        }
+        for (Thread t : threads) {
+            t.join();
+        }
+        System.out.println("Finishing");
+    }
+
+    public String $noinline$run0() {
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        char[] arr = {'a', 'b', 'c'};
+        String str = new String(arr, 0, arr.length);
+        if (!str.equals("abc")) {
+            System.out.println("Failure 1! " + str);
+            System.exit(0);
+        }
+        return str;
+    }
+
+    public void run() {
+        if (threadIndex == 0) {
+            // This thread keeps doing deoptimization of all threads.
+            // Hopefully that will trigger one deoptimization when returning from
+            // StringFactory.newEmptyString() in one of the other threads.
+            for (int i = 0; i < totalOperations; ++i) {
+                if (i % 50 == 0) {
+                    deoptimizeAll();
+                }
+                if (i % 50 == 25) {
+                    undeoptimizeAll();
+                }
+            }
+            done = true;
+        } else {
+            // This thread keeps doing new String() from a char array.
+            while (!done) {
+                String str = $noinline$run0();
+                if (!str.equals("abc")) {
+                    System.out.println("Failure 2! " + str);
+                    System.exit(0);
+                }
+            }
+        }
+    }
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/598-checker-irreducible-dominance/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/598-checker-irreducible-dominance/expected.txt
diff --git a/test/598-checker-irreducible-dominance/info.txt b/test/598-checker-irreducible-dominance/info.txt
new file mode 100644
index 0000000..8ca4e63
--- /dev/null
+++ b/test/598-checker-irreducible-dominance/info.txt
@@ -0,0 +1,2 @@
+Regression test for HGraphBuilder which would compute wrong dominance information
+in the presence of irreducible loops.
\ No newline at end of file
diff --git a/test/598-checker-irreducible-dominance/smali/IrreducibleLoop.smali b/test/598-checker-irreducible-dominance/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..4d8b515
--- /dev/null
+++ b/test/598-checker-irreducible-dominance/smali/IrreducibleLoop.smali
@@ -0,0 +1,52 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+.super Ljava/lang/Object;
+
+# Test case in which `inner_back_edge` is not dominated by `inner_header` and
+# causes `outer_back_edge` to not be dominated by `outer_header`. HGraphBuilder
+# not do a fix-point iteration and would miss the path to `outer_back_edge`
+# through `inner_back_edge` and incorrectly label the outer loop non-irreducible.
+
+## CHECK-START: int IrreducibleLoop.dominance(int) builder (after)
+## CHECK:         Add irreducible:true
+
+.method public static dominance(I)I
+    .registers 2
+
+    if-eqz p0, :outer_header
+    goto :inner_back_edge
+
+    :outer_header
+    if-eqz p0, :inner_header
+
+    :outer_branch_exit
+    if-eqz p0, :outer_merge
+    return p0
+
+    :inner_header
+    goto :outer_merge
+
+    :inner_back_edge
+    goto :inner_header
+
+    :outer_merge
+    if-eqz p0, :inner_back_edge
+
+    :outer_back_edge
+    add-int/2addr p0, p0
+    goto :outer_header
+
+.end method
diff --git a/test/598-checker-irreducible-dominance/src/Main.java b/test/598-checker-irreducible-dominance/src/Main.java
new file mode 100644
index 0000000..38b2ab4
--- /dev/null
+++ b/test/598-checker-irreducible-dominance/src/Main.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) {
+    // Nothing to run. This regression test merely makes sure the smali test
+    // case successfully compiles.
+  }
+}
diff --git a/test/599-checker-irreducible-loop/expected.txt b/test/599-checker-irreducible-loop/expected.txt
new file mode 100644
index 0000000..573541a
--- /dev/null
+++ b/test/599-checker-irreducible-loop/expected.txt
@@ -0,0 +1 @@
+0
diff --git a/test/599-checker-irreducible-loop/info.txt b/test/599-checker-irreducible-loop/info.txt
new file mode 100644
index 0000000..1e0dd02
--- /dev/null
+++ b/test/599-checker-irreducible-loop/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing in the presence of
+an irreducible loop.
diff --git a/test/599-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/599-checker-irreducible-loop/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..5331fd6
--- /dev/null
+++ b/test/599-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -0,0 +1,56 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+## CHECK-START: int IrreducibleLoop.test(int) GVN (before)
+## CHECK-DAG:                     LoadClass loop:none
+## CHECK-DAG:                     LoadClass loop:{{B\d+}} outer_loop:none
+
+## CHECK-START: int IrreducibleLoop.test(int) GVN (after)
+## CHECK-DAG:                     LoadClass loop:none
+## CHECK-DAG:                     LoadClass loop:{{B\d+}} outer_loop:none
+.method public static test(I)I
+   .registers 2
+
+   sget v0, LIrreducibleLoop;->field1:I
+   sput v0, LIrreducibleLoop;->field2:I
+
+   if-eqz p0, :loop_entry
+   goto :exit
+
+   :loop_entry
+   if-eqz p0, :irreducible_loop_entry
+   sget v0, LIrreducibleLoop;->field2:I
+   sput v0, LIrreducibleLoop;->field1:I
+   if-eqz v0, :exit
+   goto :irreducible_other_loop_entry
+
+   :irreducible_loop_entry
+   if-eqz p0, :loop_back_edge
+   :irreducible_other_loop_entry
+   if-eqz v0, :loop_back_edge
+   goto :irreducible_loop_entry
+
+   :loop_back_edge
+   goto :loop_entry
+
+   :exit
+   return v0
+.end method
+
+.field public static field1:I
+.field public static field2:I
diff --git a/test/599-checker-irreducible-loop/src/Main.java b/test/599-checker-irreducible-loop/src/Main.java
new file mode 100644
index 0000000..b47721f
--- /dev/null
+++ b/test/599-checker-irreducible-loop/src/Main.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    Method m = c.getMethod("test", int.class);
+    Object[] arguments = { 42 };
+    // Invoke the code just for sanity checking.
+    System.out.println(m.invoke(null, arguments));
+  }
+}
diff --git a/test/600-verifier-fails/expected.txt b/test/600-verifier-fails/expected.txt
new file mode 100644
index 0000000..974b995
--- /dev/null
+++ b/test/600-verifier-fails/expected.txt
@@ -0,0 +1,6 @@
+passed A
+passed B
+passed C
+passed D
+passed E
+passed F
diff --git a/test/600-verifier-fails/info.txt b/test/600-verifier-fails/info.txt
new file mode 100644
index 0000000..23f3ebc
--- /dev/null
+++ b/test/600-verifier-fails/info.txt
@@ -0,0 +1,23 @@
+The situations in these tests were discovered by running the mutating
+dexfuzz on the DEX files of fuzzingly random generated Java test.
+
+(A) b/28908555:
+    soft verification failure (on the final field modification) should
+    not hide the hard verification failure (on the type mismatch) to
+    avoid compiler crash later on
+(B) b/29070461:
+    hard verification failure (not calling super in constructor) should
+    bail immediately and not allow soft verification failures to pile up
+    behind it to avoid fatal message later on
+(C) b/29068831:
+    access validation on field should occur prior to null reference check
+(D) b/29126870:
+    soft verification failure (cannot access) should not hide the hard
+    verification failure (non-reference type) to avoid a compiler crash
+    later on
+(E) b/29068831:
+    access validation on method should occur prior to null reference check
+(F) b/29758098:
+    new-instance of java.lang.Class should throw an IllegalAccessError to
+    avoid interpreter crash on zero size object later
+
diff --git a/test/600-verifier-fails/smali/class.smali b/test/600-verifier-fails/smali/class.smali
new file mode 100644
index 0000000..b2eb254
--- /dev/null
+++ b/test/600-verifier-fails/smali/class.smali
@@ -0,0 +1,24 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LF;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 2
+    invoke-direct {v1}, Ljava/lang/Object;-><init>()V
+    new-instance v0, Ljava/lang/Class;
+    return-void
+.end method
diff --git a/test/600-verifier-fails/smali/construct.smali b/test/600-verifier-fails/smali/construct.smali
new file mode 100644
index 0000000..417ced9
--- /dev/null
+++ b/test/600-verifier-fails/smali/construct.smali
@@ -0,0 +1,25 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    if-eqz v0, :bail
+    invoke-direct {v0}, LB;->append(Ljava/lang/String;)V
+:bail
+    return-void
+.end method
diff --git a/test/600-verifier-fails/smali/iget.smali b/test/600-verifier-fails/smali/iget.smali
new file mode 100644
index 0000000..5c045e6
--- /dev/null
+++ b/test/600-verifier-fails/smali/iget.smali
@@ -0,0 +1,25 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LD;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 2
+    invoke-direct {v1}, Ljava/lang/Object;-><init>()V
+    const v0, 2
+    iget v1, v0, LMain;->privateField:I
+    return-void
+.end method
diff --git a/test/600-verifier-fails/smali/invoke.smali b/test/600-verifier-fails/smali/invoke.smali
new file mode 100644
index 0000000..616d63c
--- /dev/null
+++ b/test/600-verifier-fails/smali/invoke.smali
@@ -0,0 +1,25 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LE;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 2
+    invoke-direct {v1}, Ljava/lang/Object;-><init>()V
+    const v0, 0
+    invoke-virtual {v0}, LMain;->privateMethod()V
+    return-void
+.end method
diff --git a/test/600-verifier-fails/smali/iput.smali b/test/600-verifier-fails/smali/iput.smali
new file mode 100644
index 0000000..bd8b928
--- /dev/null
+++ b/test/600-verifier-fails/smali/iput.smali
@@ -0,0 +1,25 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LC;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 2
+    invoke-direct {v1}, Ljava/lang/Object;-><init>()V
+    const v0, 0
+    iput-object v0, v0, LMain;->staticPrivateField:Ljava/lang/String;
+    return-void
+.end method
diff --git a/test/600-verifier-fails/smali/sput.smali b/test/600-verifier-fails/smali/sput.smali
new file mode 100644
index 0000000..e8e56ac
--- /dev/null
+++ b/test/600-verifier-fails/smali/sput.smali
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LA;
+.super Ljava/lang/Object;
+
+.method public foo(I)V
+.registers 2
+    sput v1, LMain;->staticFinalField:Ljava/lang/String;
+    return-void
+.end method
diff --git a/test/600-verifier-fails/src/Main.java b/test/600-verifier-fails/src/Main.java
new file mode 100644
index 0000000..1726bc4
--- /dev/null
+++ b/test/600-verifier-fails/src/Main.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static final String staticFinalField = null;
+
+  private static String staticPrivateField = null;
+
+  private int privateField = 0;
+
+  private void privateMethod() { }
+
+  private static void test(String name) throws Exception {
+    try {
+      Class<?> a = Class.forName(name);
+      a.newInstance();
+    } catch (java.lang.LinkageError e) {
+      System.out.println("passed " + name);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    test("A");
+    test("B");
+    test("C");
+    test("D");
+    test("E");
+    test("F");
+  }
+}
diff --git a/test/601-method-access/expected.txt b/test/601-method-access/expected.txt
new file mode 100644
index 0000000..90fbab8
--- /dev/null
+++ b/test/601-method-access/expected.txt
@@ -0,0 +1 @@
+Got expected failure
diff --git a/test/601-method-access/info.txt b/test/601-method-access/info.txt
new file mode 100644
index 0000000..e38a336
--- /dev/null
+++ b/test/601-method-access/info.txt
@@ -0,0 +1 @@
+Regression test for method access checks.
diff --git a/test/601-method-access/smali/SubClassUsingInaccessibleMethod.smali b/test/601-method-access/smali/SubClassUsingInaccessibleMethod.smali
new file mode 100644
index 0000000..7a896a2
--- /dev/null
+++ b/test/601-method-access/smali/SubClassUsingInaccessibleMethod.smali
@@ -0,0 +1,33 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LSubClassUsingInaccessibleMethod;
+
+.super Lother/PublicClass;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Lother/PublicClass;-><init>()V
+    return-void
+.end method
+
+# Regression test for compiler DCHECK() failure (bogus check) when referencing
+# a package-private method from an indirectly inherited package-private class,
+# using this very class as the declaring class in the MethodId, bug: 28771056.
+.method public test()I
+    .registers 2
+    invoke-virtual {p0}, LSubClassUsingInaccessibleMethod;->otherProtectedClassPackageIntInstanceMethod()I
+    move-result v0
+    return v0
+.end method
diff --git a/test/601-method-access/src/Main.java b/test/601-method-access/src/Main.java
new file mode 100644
index 0000000..9d9e568
--- /dev/null
+++ b/test/601-method-access/src/Main.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+
+/*
+ * Test method access through reflection.
+ */
+public class Main {
+  public static void main(String[] args) {
+    try {
+      Class<?> c = Class.forName("SubClassUsingInaccessibleMethod");
+      Object o = c.newInstance();
+      c.getMethod("test").invoke(o, null);
+    } catch (InvocationTargetException ite) {
+      if (ite.getCause() instanceof IllegalAccessError) {
+        System.out.println("Got expected failure");
+      } else {
+        System.out.println("Got unexpected failure " + ite.getCause());
+      }
+    } catch (Exception e) {
+      System.out.println("Got unexpected failure " + e);
+    }
+  }
+}
diff --git a/test/601-method-access/src/other/ProtectedClass.java b/test/601-method-access/src/other/ProtectedClass.java
new file mode 100644
index 0000000..9426884
--- /dev/null
+++ b/test/601-method-access/src/other/ProtectedClass.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+// Class that cannot be accessed outside of this package.
+class ProtectedClass {
+ /* package */ int otherProtectedClassPackageIntInstanceMethod() {
+   return 28;
+ }
+}
diff --git a/test/601-method-access/src/other/PublicClass.java b/test/601-method-access/src/other/PublicClass.java
new file mode 100644
index 0000000..d9f7961
--- /dev/null
+++ b/test/601-method-access/src/other/PublicClass.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+// Class that makes the ProtectedClass sub-classable by classes outside of package other.
+public class PublicClass extends ProtectedClass {
+}
diff --git a/test/602-deoptimizeable/expected.txt b/test/602-deoptimizeable/expected.txt
new file mode 100644
index 0000000..f993efc
--- /dev/null
+++ b/test/602-deoptimizeable/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+Finishing
diff --git a/test/602-deoptimizeable/info.txt b/test/602-deoptimizeable/info.txt
new file mode 100644
index 0000000..d0952f9
--- /dev/null
+++ b/test/602-deoptimizeable/info.txt
@@ -0,0 +1 @@
+Test various cases for full/partial-fragment deoptimization.
diff --git a/test/602-deoptimizeable/src/Main.java b/test/602-deoptimizeable/src/Main.java
new file mode 100644
index 0000000..743a579
--- /dev/null
+++ b/test/602-deoptimizeable/src/Main.java
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+
+class DummyObject {
+    public static boolean sHashCodeInvoked = false;
+    private int i;
+
+    public DummyObject(int i) {
+        this.i = i;
+    }
+
+    public boolean equals(Object obj) {
+        return (obj instanceof DummyObject) && (i == ((DummyObject)obj).i);
+    }
+
+    public int hashCode() {
+        sHashCodeInvoked = true;
+        Main.assertIsManaged();
+        Main.deoptimizeAll();
+        Main.assertIsInterpreted();
+        Main.assertCallerIsManaged();  // Caller is from framework code HashMap.
+        return i % 64;
+    }
+}
+
+public class Main {
+    static boolean sFlag = false;
+
+    public static native void deoptimizeAll();
+    public static native void undeoptimizeAll();
+    public static native void assertIsInterpreted();
+    public static native void assertIsManaged();
+    public static native void assertCallerIsInterpreted();
+    public static native void assertCallerIsManaged();
+    public static native void disableStackFrameAsserts();
+    public static native boolean hasOatFile();
+    public static native boolean isInterpreted();
+
+    public static void execute(Runnable runnable) throws Exception {
+      Thread t = new Thread(runnable);
+      t.start();
+      t.join();
+    }
+
+    public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
+        // Only test stack frames in compiled mode.
+        if (!hasOatFile() || isInterpreted()) {
+          disableStackFrameAsserts();
+        }
+        final HashMap<DummyObject, Long> map = new HashMap<DummyObject, Long>();
+
+        // Single-frame deoptimization that covers partial fragment.
+        execute(new Runnable() {
+            public void run() {
+                int[] arr = new int[3];
+                assertIsManaged();
+                int res = $noinline$run1(arr);
+                assertIsManaged();  // Only single frame is deoptimized.
+                if (res != 79) {
+                    System.out.println("Failure 1!");
+                    System.exit(0);
+                }
+            }
+        });
+
+        // Single-frame deoptimization that covers a full fragment.
+        execute(new Runnable() {
+            public void run() {
+                try {
+                    int[] arr = new int[3];
+                    assertIsManaged();
+                    // Use reflection to call $noinline$run2 so that it does
+                    // full-fragment deoptimization since that is an upcall.
+                    Class<?> cls = Class.forName("Main");
+                    Method method = cls.getDeclaredMethod("$noinline$run2", int[].class);
+                    double res = (double)method.invoke(Main.class, arr);
+                    assertIsManaged();  // Only single frame is deoptimized.
+                    if (res != 79.3d) {
+                        System.out.println("Failure 2!");
+                        System.exit(0);
+                    }
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }
+            }
+        });
+
+        // Full-fragment deoptimization.
+        execute(new Runnable() {
+            public void run() {
+                assertIsManaged();
+                float res = $noinline$run3B();
+                assertIsInterpreted();  // Every deoptimizeable method is deoptimized.
+                if (res != 0.034f) {
+                    System.out.println("Failure 3!");
+                    System.exit(0);
+                }
+            }
+        });
+
+        undeoptimizeAll();  // Make compiled code useable again.
+
+        // Partial-fragment deoptimization.
+        execute(new Runnable() {
+            public void run() {
+                try {
+                    assertIsManaged();
+                    map.put(new DummyObject(10), Long.valueOf(100));
+                    assertIsInterpreted();  // Every deoptimizeable method is deoptimized.
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }
+            }
+        });
+
+        undeoptimizeAll();  // Make compiled code useable again.
+
+        if (!DummyObject.sHashCodeInvoked) {
+            System.out.println("hashCode() method not invoked!");
+        }
+        if (map.get(new DummyObject(10)) != 100) {
+            System.out.println("Wrong hashmap value!");
+        }
+        System.out.println("Finishing");
+    }
+
+    public static int $noinline$run1(int[] arr) {
+        assertIsManaged();
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        boolean caught = false;
+        // BCE will use deoptimization for the code below.
+        try {
+            arr[0] = 1;
+            arr[1] = 1;
+            arr[2] = 1;
+            // This causes AIOOBE and triggers deoptimization from compiled code.
+            arr[3] = 1;
+        } catch (ArrayIndexOutOfBoundsException e) {
+            assertIsInterpreted(); // Single-frame deoptimization triggered.
+            caught = true;
+        }
+        if (!caught) {
+            System.out.println("Expected exception");
+        }
+        assertIsInterpreted();
+        return 79;
+    }
+
+    public static double $noinline$run2(int[] arr) {
+        assertIsManaged();
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        boolean caught = false;
+        // BCE will use deoptimization for the code below.
+        try {
+            arr[0] = 1;
+            arr[1] = 1;
+            arr[2] = 1;
+            // This causes AIOOBE and triggers deoptimization from compiled code.
+            arr[3] = 1;
+        } catch (ArrayIndexOutOfBoundsException e) {
+            assertIsInterpreted();  // Single-frame deoptimization triggered.
+            caught = true;
+        }
+        if (!caught) {
+            System.out.println("Expected exception");
+        }
+        assertIsInterpreted();
+        return 79.3d;
+    }
+
+    public static float $noinline$run3A() {
+        assertIsManaged();
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        // Deoptimize callers.
+        deoptimizeAll();
+        assertIsInterpreted();
+        assertCallerIsInterpreted();  // $noinline$run3B is deoptimizeable.
+        return 0.034f;
+    }
+
+    public static float $noinline$run3B() {
+        assertIsManaged();
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        float res = $noinline$run3A();
+        assertIsInterpreted();
+        return res;
+    }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/603-checker-instanceof/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/603-checker-instanceof/expected.txt
diff --git a/test/603-checker-instanceof/info.txt b/test/603-checker-instanceof/info.txt
new file mode 100644
index 0000000..5907abc
--- /dev/null
+++ b/test/603-checker-instanceof/info.txt
@@ -0,0 +1,2 @@
+Regression test for the compiler that used to wrongly optimize
+an instanceof.
diff --git a/test/603-checker-instanceof/src/Main.java b/test/603-checker-instanceof/src/Main.java
new file mode 100644
index 0000000..ddf4b92
--- /dev/null
+++ b/test/603-checker-instanceof/src/Main.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class SuperClass {
+}
+
+class ChildClass extends SuperClass {
+}
+
+public class Main {
+
+  /// CHECK-START:    void Main.main(java.lang.String[]) builder (after)
+  /// CHECK:          BoundType  klass:SuperClass can_be_null:false exact:false
+
+  /// CHECK-START:    void Main.main(java.lang.String[]) builder (after)
+  /// CHECK-NOT:      BoundType  klass:SuperClass can_be_null:false exact:true
+  public static void main(String[] args) {
+    Object obj = new ChildClass();
+
+    // We need a fixed point iteration to hit the bogus type update
+    // of 'obj' below, so create a loop that updates the type of 'obj'.
+    for (int i = 1; i < 1; i++) {
+      obj = new Object();
+    }
+
+    if (obj instanceof SuperClass) {
+      // We used to wrongly type obj as an exact SuperClass from this point,
+      // meaning we were statically determining that the following instanceof
+      // would always fail.
+      if (!(obj instanceof ChildClass)) {
+        throw new Error("Expected a ChildClass, got " + obj.getClass());
+      }
+    }
+  }
+}
diff --git a/test/604-hot-static-interface/build b/test/604-hot-static-interface/build
new file mode 100755
index 0000000..1ca2daf
--- /dev/null
+++ b/test/604-hot-static-interface/build
@@ -0,0 +1,27 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm
+  # Hard-wired use of experimental jack.
+  # TODO: fix this temporary work-around for default-methods, see b/19467889
+  export USE_JACK=true
+fi
+
+./default-build "$@" --experimental default-methods
diff --git a/test/455-set-vreg/expected.txt b/test/604-hot-static-interface/expected.txt
similarity index 100%
copy from test/455-set-vreg/expected.txt
copy to test/604-hot-static-interface/expected.txt
diff --git a/test/604-hot-static-interface/info.txt b/test/604-hot-static-interface/info.txt
new file mode 100644
index 0000000..bc00bda
--- /dev/null
+++ b/test/604-hot-static-interface/info.txt
@@ -0,0 +1,2 @@
+Regression test for the JIT that used to crash when compiling
+a static method of an interface.
diff --git a/test/604-hot-static-interface/src/Main.java b/test/604-hot-static-interface/src/Main.java
new file mode 100644
index 0000000..a26623c
--- /dev/null
+++ b/test/604-hot-static-interface/src/Main.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+    // Loop enough to get Itf.foo JITed.
+    for (int i = 0; i < 100000; i++) {
+      Itf.foo(new Object());
+    }
+
+    ensureJitCompiled(Itf.class, "foo");
+
+    if (!Itf.foo(new Object())) {
+      throw new Error("Unexpected result");
+    }
+  }
+
+  private static native void ensureJitCompiled(Class<?> itf, String method_name);
+}
+
+interface Itf {
+  public static boolean foo(Object o) {
+    return o.equals(o);
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/605-new-string-from-bytes/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/605-new-string-from-bytes/expected.txt
diff --git a/test/605-new-string-from-bytes/info.txt b/test/605-new-string-from-bytes/info.txt
new file mode 100644
index 0000000..be02c43
--- /dev/null
+++ b/test/605-new-string-from-bytes/info.txt
@@ -0,0 +1,2 @@
+Regression test for the newStringFromBytes entrypoint,
+which used to wrongly setup the stack.
diff --git a/test/605-new-string-from-bytes/src/Main.java b/test/605-new-string-from-bytes/src/Main.java
new file mode 100644
index 0000000..5bd6c5d
--- /dev/null
+++ b/test/605-new-string-from-bytes/src/Main.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("java.lang.StringFactory");
+    Method m = c.getDeclaredMethod("newStringFromBytes", byte[].class, int.class);
+
+    // Loop over allocations to get more chances of doing GC while in the
+    // newStringFromBytes intrinsic.
+    for (int i = 0; i < 10; i++) {
+      try {
+        byte[] f = new byte[100000000];
+        f[0] = (byte)i;
+        f[1] = (byte)i;
+        m.invoke(null, f, 0);
+      } catch (InvocationTargetException e) {
+        if (e.getCause() instanceof OutOfMemoryError) {
+          // Ignore, this is a stress test.
+        } else {
+          throw e;
+        }
+      } catch (OutOfMemoryError e) {
+        // Ignore, this is a stress test.
+      }
+    }
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/606-erroneous-class/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/606-erroneous-class/expected.txt
diff --git a/test/606-erroneous-class/info.txt b/test/606-erroneous-class/info.txt
new file mode 100644
index 0000000..42cbb7a
--- /dev/null
+++ b/test/606-erroneous-class/info.txt
@@ -0,0 +1,3 @@
+Regression test for a DCHECK in the DexCache which prevented erroneous classes
+from being stored into it. This was bogus because the status of a class can be
+changed by another thread.
\ No newline at end of file
diff --git a/test/606-erroneous-class/smali-multidex/ClassA.smali b/test/606-erroneous-class/smali-multidex/ClassA.smali
new file mode 100644
index 0000000..f87fcb2
--- /dev/null
+++ b/test/606-erroneous-class/smali-multidex/ClassA.smali
@@ -0,0 +1,27 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public final LClassA;
+.super Ljava/lang/Object;
+
+.method public static foo()V
+    .registers 1
+    # Obtain the ErrClass type from Dex cache of the first Dex file. Note that
+    # because the first Dex file has already been verified, we know the class
+    # is erroneous at this point.
+    sget-object v0, LClassB;->g:LErrClass;
+    # Use the object in a way that will try to store the ErrClass type in
+    # the Dex cache of the second Dex file.
+    invoke-virtual {v0}, LErrClass;->foo()V
+.end method
diff --git a/test/606-erroneous-class/smali/ClassB.smali b/test/606-erroneous-class/smali/ClassB.smali
new file mode 100644
index 0000000..80754c8
--- /dev/null
+++ b/test/606-erroneous-class/smali/ClassB.smali
@@ -0,0 +1,18 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LClassB;
+.super Ljava/lang/Object;
+
+.field public static g:LErrClass;
diff --git a/test/606-erroneous-class/smali/ErrClass.smali b/test/606-erroneous-class/smali/ErrClass.smali
new file mode 100644
index 0000000..740f1e1
--- /dev/null
+++ b/test/606-erroneous-class/smali/ErrClass.smali
@@ -0,0 +1,26 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public final LErrClass;
+.super Ljava/lang/Object;
+
+.field public g:Ljava/lang/Object;
+
+.method public foo()V
+    .registers 6
+    # Use a new instance before initializing it => hard verifier error.
+    new-instance v0, LSomeClass;
+    iput-object v0, p0, LErrClass;->g:Ljava/lang/Object;
+    return-void
+.end method
diff --git a/test/606-erroneous-class/src/Main.java b/test/606-erroneous-class/src/Main.java
new file mode 100644
index 0000000..7dbe567
--- /dev/null
+++ b/test/606-erroneous-class/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    // Nothing to run.
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/607-daemon-stress/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/607-daemon-stress/expected.txt
diff --git a/test/607-daemon-stress/info.txt b/test/607-daemon-stress/info.txt
new file mode 100644
index 0000000..1047b76
--- /dev/null
+++ b/test/607-daemon-stress/info.txt
@@ -0,0 +1,3 @@
+Stress test for daemon threads stuck in a method that requires the thread list lock.
+(for example Thread.isInterrupted). The shutdown thread used to block those daemons
+from making progress.
diff --git a/test/607-daemon-stress/src/Main.java b/test/607-daemon-stress/src/Main.java
new file mode 100644
index 0000000..56ef410
--- /dev/null
+++ b/test/607-daemon-stress/src/Main.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main extends Thread {
+  public static void main(String[] args) throws Exception {
+    for (int i = 0; i < 5; i++) {
+      Main m = new Main();
+      m.setDaemon(true);
+      m.start();
+    }
+    // Sleep a while to give some time for the threads to start.
+    Thread.sleep(1000);
+  }
+
+  public void run() {
+    while (!isInterrupted());
+  }
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/608-checker-unresolved-lse/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/608-checker-unresolved-lse/expected.txt
diff --git a/test/608-checker-unresolved-lse/info.txt b/test/608-checker-unresolved-lse/info.txt
new file mode 100644
index 0000000..466d5f4
--- /dev/null
+++ b/test/608-checker-unresolved-lse/info.txt
@@ -0,0 +1,3 @@
+Regression test for the load store elimination optimization,
+which used to wrongly remove field stores in the presence of
+unresolved accesses.
diff --git a/test/608-checker-unresolved-lse/run b/test/608-checker-unresolved-lse/run
new file mode 100644
index 0000000..226891f
--- /dev/null
+++ b/test/608-checker-unresolved-lse/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use secondary switch to add secondary dex file to class path.
+exec ${RUN} "${@}" --secondary
diff --git a/test/608-checker-unresolved-lse/src-dex2oat-unresolved/MissingSuperClass.java b/test/608-checker-unresolved-lse/src-dex2oat-unresolved/MissingSuperClass.java
new file mode 100644
index 0000000..b11b9be
--- /dev/null
+++ b/test/608-checker-unresolved-lse/src-dex2oat-unresolved/MissingSuperClass.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class MissingSuperClass {
+}
diff --git a/test/608-checker-unresolved-lse/src/Main.java b/test/608-checker-unresolved-lse/src/Main.java
new file mode 100644
index 0000000..c6f8854
--- /dev/null
+++ b/test/608-checker-unresolved-lse/src/Main.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// We make Main extend an unresolved super class. This will lead to an
+// unresolved access to Foo.field, as we won't know if Main can access
+// a package private field.
+public class Main extends MissingSuperClass {
+
+  public static void main(String[] args) {
+    instanceFieldTest();
+    staticFieldTest();
+    instanceFieldTest2();
+  }
+
+  /// CHECK-START: void Main.instanceFieldTest() inliner (before)
+  /// CHECK-NOT:    InstanceFieldSet
+
+  /// CHECK-START: void Main.instanceFieldTest() inliner (after)
+  /// CHECK:        InstanceFieldSet
+  /// CHECK:        UnresolvedInstanceFieldGet
+
+  // Load store elimination used to remove the InstanceFieldSet, thinking
+  // that the UnresolvedInstanceFieldGet was not related. However inlining
+  // can put you in a situation where the UnresolvedInstanceFieldGet resolves
+  // to the same field as the one in InstanceFieldSet. So the InstanceFieldSet
+  // must be preserved.
+
+  /// CHECK-START: void Main.instanceFieldTest() load_store_elimination (after)
+  /// CHECK:        InstanceFieldSet
+  /// CHECK:        UnresolvedInstanceFieldGet
+  public static void instanceFieldTest() {
+    Foo f = new Foo();
+    if (f.iField != 42) {
+      throw new Error("Expected 42, got " + f.iField);
+    }
+  }
+
+  /// CHECK-START: void Main.instanceFieldTest2() inliner (before)
+  /// CHECK-NOT:    InstanceFieldSet
+  /// CHECK-NOT:    InstanceFieldGet
+
+  /// CHECK-START: void Main.instanceFieldTest2() inliner (after)
+  /// CHECK:        InstanceFieldSet
+  /// CHECK:        InstanceFieldGet
+  /// CHECK:        UnresolvedInstanceFieldSet
+  /// CHECK:        InstanceFieldGet
+
+  // Load store elimination will eliminate the first InstanceFieldGet because
+  // it simply follows an InstanceFieldSet. It must however not eliminate the second
+  // InstanceFieldGet, as the UnresolvedInstanceFieldSet might resolve to the same
+  // field.
+
+  /// CHECK-START: void Main.instanceFieldTest2() load_store_elimination (after)
+  /// CHECK:        InstanceFieldSet
+  /// CHECK-NOT:    InstanceFieldGet
+  /// CHECK:        UnresolvedInstanceFieldSet
+  /// CHECK:        InstanceFieldGet
+  public static void instanceFieldTest2() {
+    Foo f = new Foo();
+    int a = f.$inline$GetInstanceField();
+    f.iField = 43;
+    a = f.$inline$GetInstanceField();
+    if (a != 43) {
+      throw new Error("Expected 43, got " + a);
+    }
+  }
+
+  /// CHECK-START: void Main.staticFieldTest() inliner (before)
+  /// CHECK-NOT:    StaticFieldSet
+
+  /// CHECK-START: void Main.staticFieldTest() inliner (after)
+  /// CHECK:        StaticFieldSet
+  /// CHECK:        StaticFieldSet
+  /// CHECK:        UnresolvedStaticFieldGet
+
+  /// CHECK-START: void Main.staticFieldTest() load_store_elimination (after)
+  /// CHECK:        StaticFieldSet
+  /// CHECK:        StaticFieldSet
+  /// CHECK:        UnresolvedStaticFieldGet
+  public static void staticFieldTest() {
+    // Ensure Foo is initialized.
+    Foo f = new Foo();
+    f.$inline$StaticSet42();
+    f.$inline$StaticSet43();
+    if (Foo.sField != 43) {
+      throw new Error("Expected 43, got " + Foo.sField);
+    }
+  }
+}
+
+class Foo {
+  // field needs to be package-private to make the access in Main.main
+  // unresolved.
+  int iField;
+  static int sField;
+
+  public void $inline$StaticSet42() {
+    sField = 42;
+  }
+
+  public void $inline$StaticSet43() {
+    sField = 43;
+  }
+
+  public int $inline$GetInstanceField() {
+    return iField;
+  }
+
+  // Constructor needs to be public to get it resolved in Main.main
+  // and therefore inlined.
+  public Foo() {
+    iField = 42;
+  }
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/609-checker-inline-interface/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/609-checker-inline-interface/expected.txt
diff --git a/test/609-checker-inline-interface/info.txt b/test/609-checker-inline-interface/info.txt
new file mode 100644
index 0000000..35eee08
--- /dev/null
+++ b/test/609-checker-inline-interface/info.txt
@@ -0,0 +1,2 @@
+Checker test that we inline interface calls and if we can't inline
+them, we can turn them into a virtual invoke.
diff --git a/test/609-checker-inline-interface/src/Main.java b/test/609-checker-inline-interface/src/Main.java
new file mode 100644
index 0000000..413f2dd
--- /dev/null
+++ b/test/609-checker-inline-interface/src/Main.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public final class Main implements Interface {
+
+  static void methodWithInvokeInterface(Interface interf) {
+    interf.doCall();
+  }
+
+  public void doCall() {
+    if (doThrow) throw new Error("");
+  }
+
+  public static void main(String[] args) {
+    testInlineInterfaceCall();
+    testInterfaceToVirtualCall();
+  }
+
+  /// CHECK-START: void Main.testInlineInterfaceCall() inliner (before)
+  /// CHECK:                          InvokeStaticOrDirect method_name:Main.methodWithInvokeInterface
+
+  /// CHECK-START: void Main.testInlineInterfaceCall() inliner (before)
+  /// CHECK-NOT:                      InvokeInterface
+
+  /// CHECK-START: void Main.testInlineInterfaceCall() inliner (after)
+  /// CHECK:                          InvokeInterface method_name:Interface.doCall
+
+  /// CHECK-START: void Main.testInlineInterfaceCall() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  public static void testInlineInterfaceCall() {
+    methodWithInvokeInterface(itf);
+  }
+
+  /// CHECK-START: void Main.testInterfaceToVirtualCall() inliner (before)
+  /// CHECK:                          InvokeStaticOrDirect method_name:Main.methodWithInvokeInterface
+
+  /// CHECK-START: void Main.testInterfaceToVirtualCall() inliner (before)
+  /// CHECK-NOT:                      InvokeInterface
+
+  /// CHECK-START: void Main.testInterfaceToVirtualCall() inliner (after)
+  /// CHECK:                          InvokeVirtual method_name:Main.doCall
+
+  /// CHECK-START: void Main.testInterfaceToVirtualCall() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      InvokeInterface
+  public static void testInterfaceToVirtualCall() {
+    methodWithInvokeInterface(m);
+  }
+
+  static Interface itf = new Main();
+  static Main m = new Main();
+  static boolean doThrow = false;
+}
+
+interface Interface {
+  public void doCall();
+}
diff --git a/test/609-checker-x86-bounds-check/expected.txt b/test/609-checker-x86-bounds-check/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/609-checker-x86-bounds-check/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/609-checker-x86-bounds-check/info.txt b/test/609-checker-x86-bounds-check/info.txt
new file mode 100644
index 0000000..c0f26d0
--- /dev/null
+++ b/test/609-checker-x86-bounds-check/info.txt
@@ -0,0 +1 @@
+Checker test that we combine ArrayLength and BoundsCheck on x86/x86_64.
diff --git a/test/609-checker-x86-bounds-check/src/Main.java b/test/609-checker-x86-bounds-check/src/Main.java
new file mode 100644
index 0000000..bfc2be8
--- /dev/null
+++ b/test/609-checker-x86-bounds-check/src/Main.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String args[]) {
+    int[] array = new int[51];
+    testArrayLengthBoundsCheckX86(array, 10);
+
+    System.out.println("passed");
+  }
+
+  /// CHECK-START-X86: void Main.testArrayLengthBoundsCheckX86(int[], int) x86_memory_operand_generation (before)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>] is_string_length:false loop:none
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<Value>>]
+
+  /// CHECK-START-X86: void Main.testArrayLengthBoundsCheckX86(int[], int) x86_memory_operand_generation (after)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>] is_string_length:false emitted_at_use:true loop:none
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<Value>>]
+
+  /// CHECK-START-X86: void Main.testArrayLengthBoundsCheckX86(int[], int) disassembly (after)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK:         <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-NEXT:    <<Length:i\d+>>        ArrayLength [<<Array>>] is_string_length:false emitted_at_use:true loop:none
+  /// CHECK-NEXT:    <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-NEXT:                           cmp [<<BaseReg:\w+>> + 8], <<IndexReg:\w+>>
+  /// CHECK:         <<ArraySet:v\d+>>      ArraySet [<<Array>>,<<Index>>,<<Value>>]
+  /// CHECK-NEXT:                           mov [<<BaseReg>> + <<IndexReg>> * 4 + 12], 9
+
+  /// CHECK-START-X86_64: void Main.testArrayLengthBoundsCheckX86(int[], int) x86_memory_operand_generation (before)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>] is_string_length:false loop:none
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<Value>>]
+
+  /// CHECK-START-X86_64: void Main.testArrayLengthBoundsCheckX86(int[], int) x86_memory_operand_generation (after)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>] is_string_length:false emitted_at_use:true loop:none
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<Value>>]
+
+  // Test assumes parameter value is in lower 8 registers (it is passed in edx).
+  /// CHECK-START-X86_64: void Main.testArrayLengthBoundsCheckX86(int[], int) disassembly (after)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK:         <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-NEXT:    <<Length:i\d+>>        ArrayLength [<<Array>>] is_string_length:false emitted_at_use:true loop:none
+  /// CHECK-NEXT:    <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-NEXT:                           cmp [<<BaseReg:\w+>> + 8], e<<IndexReg:\w+>>
+  /// CHECK:         <<ArraySet:v\d+>>      ArraySet [<<Array>>,<<Index>>,<<Value>>]
+  /// CHECK-NEXT:                           mov [<<BaseReg>> + r<<IndexReg>> * 4 + 12], 9
+
+  static void testArrayLengthBoundsCheckX86(int[] array, int index) {
+    array[index] = 9;
+  }
+}
diff --git a/test/530-checker-loops/expected.txt b/test/610-arraycopy/expected.txt
similarity index 100%
copy from test/530-checker-loops/expected.txt
copy to test/610-arraycopy/expected.txt
diff --git a/test/610-arraycopy/info.txt b/test/610-arraycopy/info.txt
new file mode 100644
index 0000000..a77190d
--- /dev/null
+++ b/test/610-arraycopy/info.txt
@@ -0,0 +1,2 @@
+Regression test for the System.arraycopy intrinsic, which had a bug
+when doing the copy on the same array.
diff --git a/test/610-arraycopy/src/Main.java b/test/610-arraycopy/src/Main.java
new file mode 100644
index 0000000..ee11c8e
--- /dev/null
+++ b/test/610-arraycopy/src/Main.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    Object[] a = new Object[5];
+    for (int i = 0; i < 5; i++) {
+      a[i] = new Integer(i);
+    }
+    $noinline$callArrayCopy(a, a);
+
+    expectEquals(0, ((Integer)a[0]).intValue());
+    expectEquals(0, ((Integer)a[1]).intValue());
+    expectEquals(1, ((Integer)a[2]).intValue());
+    expectEquals(2, ((Integer)a[3]).intValue());
+    expectEquals(4, ((Integer)a[4]).intValue());
+  }
+
+  public static void expectEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected + ", got " + actual);
+    }
+  }
+
+  public static void $noinline$callArrayCopy(Object[] a, Object[] b) {
+    System.arraycopy(a, 0, b, 1, 3);
+    if (doThrow) { throw new Error(); }
+  }
+
+  static boolean doThrow = false;
+}
diff --git a/test/611-checker-simplify-if/expected.txt b/test/611-checker-simplify-if/expected.txt
new file mode 100644
index 0000000..3083c4c
--- /dev/null
+++ b/test/611-checker-simplify-if/expected.txt
@@ -0,0 +1,7 @@
+54
+54
+54
+12
+12
+12
+33
diff --git a/test/611-checker-simplify-if/info.txt b/test/611-checker-simplify-if/info.txt
new file mode 100644
index 0000000..b090db8
--- /dev/null
+++ b/test/611-checker-simplify-if/info.txt
@@ -0,0 +1 @@
+Checker tests for the 'if' simplification in the compiler.
diff --git a/test/611-checker-simplify-if/src/Main.java b/test/611-checker-simplify-if/src/Main.java
new file mode 100644
index 0000000..7dac007
--- /dev/null
+++ b/test/611-checker-simplify-if/src/Main.java
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {
+    testNoInline(args);
+    System.out.println(staticField);
+    testInline(args);
+    System.out.println(staticField);
+    testNonConstantInputs(args);
+    System.out.println(staticField);
+    testNonConstantEqual(args);
+    System.out.println(staticField);
+    testGreaterCondition(args);
+    System.out.println(staticField);
+    testSwitch(args);
+    System.out.println(staticField);
+    testFP(args);
+    System.out.println(staticField);
+  }
+
+  // Test when a condition is the input of the if.
+
+  /// CHECK-START: void Main.testNoInline(java.lang.String[]) dead_code_elimination$initial (before)
+  /// CHECK: <<Const0:i\d+>>   IntConstant 0
+  /// CHECK:                   If
+  /// CHECK: <<Phi:i\d+>>      Phi
+  /// CHECK: <<Equal:z\d+>>    Equal [<<Phi>>,<<Const0>>]
+  /// CHECK:                   If [<<Equal>>]
+
+  /// CHECK-START: void Main.testNoInline(java.lang.String[]) dead_code_elimination$initial (after)
+  /// CHECK:      If
+  /// CHECK-NOT:  Phi
+  /// CHECK-NOT:  Equal
+  /// CHECK-NOT:  If
+  public static void testNoInline(String[] args) {
+    boolean myVar = false;
+    if (args.length == 42) {
+      myVar = true;
+    } else {
+      staticField = 32;
+      myVar = false;
+    }
+    if (myVar) {
+      staticField = 12;
+    } else {
+      staticField = 54;
+    }
+  }
+
+  // Test when the phi is the input of the if.
+
+  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$final (before)
+  /// CHECK-DAG: <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:                   If
+  /// CHECK-DAG: <<Phi:i\d+>>      Phi
+  /// CHECK-DAG:                   If [<<Phi>>]
+
+  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$final (after)
+  /// CHECK:      If
+  /// CHECK-NOT:  Phi
+  /// CHECK-NOT:  If
+  public static void testInline(String[] args) {
+    boolean myVar = $inline$doTest(args);
+    if (myVar) {
+      staticField = 12;
+    } else {
+      staticField = 54;
+    }
+  }
+
+  public static boolean $inline$doTest(String[] args) {
+    boolean myVar;
+    if (args.length == 42) {
+      myVar = true;
+    } else {
+      staticField = 32;
+      myVar = false;
+    }
+    return myVar;
+  }
+
+  // Test when one input is not a constant. We can only optimize the constant input.
+
+  /// CHECK-START: void Main.testNonConstantInputs(java.lang.String[]) dead_code_elimination$initial (before)
+  /// CHECK-DAG: <<Const34:i\d+>>         IntConstant 34
+  /// CHECK-DAG: <<Const42:i\d+>>         IntConstant 42
+  /// CHECK-DAG:                          If
+  /// CHECK-DAG: <<StaticFieldGet:i\d+>>  StaticFieldGet
+  /// CHECK-DAG: <<Phi:i\d+>>             Phi [<<Const34>>,<<StaticFieldGet>>]
+  /// CHECK-DAG: <<NotEqual:z\d+>>        NotEqual [<<Phi>>,<<Const42>>]
+  /// CHECK-DAG:                          If [<<NotEqual>>]
+
+  /// CHECK-START: void Main.testNonConstantInputs(java.lang.String[]) dead_code_elimination$initial (after)
+  /// CHECK-DAG: <<Const42:i\d+>>         IntConstant 42
+  /// CHECK-DAG:                          If
+  /// CHECK-DAG: <<StaticFieldGet:i\d+>>  StaticFieldGet
+  /// CHECK-NOT:                          Phi
+  /// CHECK-DAG: <<NotEqual:z\d+>>        NotEqual [<<StaticFieldGet>>,<<Const42>>]
+  /// CHECK-DAG:                          If [<<NotEqual>>]
+  public static void testNonConstantInputs(String[] args) {
+    int a = 42;
+    if (args.length == 42) {
+      a = 34;
+    } else {
+      staticField = 32;
+      a = otherStaticField;
+    }
+    if (a == 42) {
+      staticField = 12;
+    } else {
+      staticField = 54;
+    }
+  }
+
+  // Test with a condition.
+
+  /// CHECK-START: void Main.testGreaterCondition(java.lang.String[]) dead_code_elimination$initial (before)
+  /// CHECK-DAG: <<Const34:i\d+>>         IntConstant 34
+  /// CHECK-DAG: <<Const22:i\d+>>         IntConstant 22
+  /// CHECK-DAG: <<Const25:i\d+>>         IntConstant 25
+  /// CHECK-DAG:                          If
+  /// CHECK-DAG: <<Phi:i\d+>>             Phi [<<Const34>>,<<Const22>>]
+  /// CHECK-DAG: <<GE:z\d+>>              GreaterThanOrEqual [<<Phi>>,<<Const25>>]
+  /// CHECK-DAG:                          If [<<GE>>]
+
+  /// CHECK-START: void Main.testGreaterCondition(java.lang.String[]) dead_code_elimination$initial (after)
+  /// CHECK-DAG:                          If
+  /// CHECK-NOT:                          Phi
+  /// CHECK-NOT:                          GreaterThanOrEqual
+  /// CHECK-NOT:                          If
+  public static void testGreaterCondition(String[] args) {
+    int a = 42;;
+    if (args.length == 42) {
+      a = 34;
+    } else {
+      staticField = 32;
+      a = 22;
+    }
+    if (a < 25) {
+      staticField = 12;
+    } else {
+      staticField = 54;
+    }
+  }
+
+  // Test when comparing non constants.
+
+  /// CHECK-START: void Main.testNonConstantEqual(java.lang.String[]) dead_code_elimination$initial (before)
+  /// CHECK-DAG: <<Const34:i\d+>>         IntConstant 34
+  /// CHECK-DAG: <<Const42:i\d+>>         IntConstant 42
+  /// CHECK-DAG:                          If
+  /// CHECK-DAG: <<StaticFieldGet:i\d+>>  StaticFieldGet
+  /// CHECK-DAG: <<Phi:i\d+>>             Phi [<<Const34>>,<<StaticFieldGet>>]
+  /// CHECK-DAG: <<NotEqual:z\d+>>        NotEqual [<<Phi>>,<<StaticFieldGet>>]
+  /// CHECK-DAG:                          If [<<NotEqual>>]
+
+  /// CHECK-START: void Main.testNonConstantEqual(java.lang.String[]) dead_code_elimination$initial (after)
+  /// CHECK-DAG: <<Const34:i\d+>>         IntConstant 34
+  /// CHECK-DAG:                          If
+  /// CHECK-DAG: <<StaticFieldGet:i\d+>>  StaticFieldGet
+  /// CHECK-NOT:                          Phi
+  /// CHECK-DAG: <<NotEqual:z\d+>>        NotEqual [<<Const34>>,<<StaticFieldGet>>]
+  /// CHECK-DAG:                          If [<<NotEqual>>]
+  public static void testNonConstantEqual(String[] args) {
+    int a = 42;
+    int b = otherStaticField;
+    if (args.length == 42) {
+      a = 34;
+    } else {
+      staticField = 32;
+      a = b;
+    }
+    if (a == b) {
+      staticField = 12;
+    } else {
+      staticField = 54;
+    }
+  }
+
+  // Make sure we don't "simplify" a loop and potentially turn it into
+  // an irreducible loop. The suspend check at the loop header prevents
+  // us from doing the simplification.
+
+  /// CHECK-START: void Main.testLoop(boolean) disassembly (after)
+  /// CHECK-DAG: SuspendCheck
+  /// CHECK:     irreducible:false
+  /// CHECK-NOT: irreducible:true
+  public static void testLoop(boolean c) {
+    while (true) {
+      if (c) {
+        if ($noinline$foo()) return;
+        c = false;
+      } else {
+        $noinline$foo();
+        c = true;
+      }
+    }
+  }
+
+  static boolean $noinline$foo() {
+    if (doThrow) throw new Error("");
+    return true;
+  }
+
+  /// CHECK-START: void Main.testSwitch(java.lang.String[]) dead_code_elimination$initial (before)
+  /// CHECK:      If
+  /// CHECK:      If
+  /// CHECK:      If
+
+  /// CHECK-START: void Main.testSwitch(java.lang.String[]) dead_code_elimination$initial (after)
+  /// CHECK:      If
+  /// CHECK:      If
+  /// CHECK-NOT:  If
+  public static void testSwitch(String[] args) {
+    boolean cond = false;
+    switch (args.length) {
+      case 42:
+        staticField = 11;
+        cond = true;
+        break;
+      case 43:
+        staticField = 33;
+        cond = true;
+        break;
+      default:
+        cond = false;
+        break;
+    }
+    if (cond) {
+      // Redirect case 42 and 43 here.
+      staticField = 2;
+    }
+    // Redirect default here.
+  }
+
+  /// CHECK-START: void Main.testFP(java.lang.String[]) dead_code_elimination$initial (before)
+  /// CHECK:      If
+  /// CHECK:      If
+
+  /// CHECK-START: void Main.testFP(java.lang.String[]) dead_code_elimination$initial (after)
+  /// CHECK:      If
+  /// CHECK:      If
+  public static void testFP(String[] args) {
+    float f = 2.2f;
+    float nan = $noinline$getNaN();
+    if (args.length == 42) {
+      f = 4.3f;
+    } else {
+      staticField = 33;
+      f = nan;
+    }
+    if (f == nan) {
+      staticField = 5;
+    }
+  }
+
+  // No inline variant to avoid having the compiler see it's a NaN.
+  static float $noinline$getNaN() {
+    if (doThrow) throw new Error("");
+    return Float.NaN;
+  }
+
+  static boolean doThrow;
+  static int staticField;
+  static int otherStaticField;
+}
diff --git a/test/455-set-vreg/expected.txt b/test/612-jit-dex-cache/expected.txt
similarity index 100%
copy from test/455-set-vreg/expected.txt
copy to test/612-jit-dex-cache/expected.txt
diff --git a/test/612-jit-dex-cache/info.txt b/test/612-jit-dex-cache/info.txt
new file mode 100644
index 0000000..e80f642
--- /dev/null
+++ b/test/612-jit-dex-cache/info.txt
@@ -0,0 +1,2 @@
+Regression test for the JIT compiler which used to
+wrongly update the dex cache of a class loader.
diff --git a/test/612-jit-dex-cache/src-ex/B.java b/test/612-jit-dex-cache/src-ex/B.java
new file mode 100644
index 0000000..4da9a1d
--- /dev/null
+++ b/test/612-jit-dex-cache/src-ex/B.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class B {
+}
diff --git a/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java b/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java
new file mode 100644
index 0000000..fcb314d
--- /dev/null
+++ b/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class LoadedByAppClassLoader {
+  public static void letMeInlineYou(A a) {
+    a.foo();
+  }
+
+  public static ClassLoader areYouB() {
+    // Ensure letMeInlineYou is JITted and tries to do inlining of A.foo.
+    // The compiler used to wrongly update the dex cache of letMeInlineYou's
+    // class loader.
+    Main.ensureJitCompiled(LoadedByAppClassLoader.class, "letMeInlineYou");
+    return OtherClass.getB().getClassLoader();
+  }
+}
+
+class OtherClass {
+  public static Class<?> getB() {
+    // This used to return the B class of another class loader.
+    return B.class;
+  }
+}
diff --git a/test/612-jit-dex-cache/src/A.java b/test/612-jit-dex-cache/src/A.java
new file mode 100644
index 0000000..415c712
--- /dev/null
+++ b/test/612-jit-dex-cache/src/A.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class A {
+  public int foo() {
+    return 42;
+  }
+}
diff --git a/test/612-jit-dex-cache/src/B.java b/test/612-jit-dex-cache/src/B.java
new file mode 100644
index 0000000..46c878b
--- /dev/null
+++ b/test/612-jit-dex-cache/src/B.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class B extends A {
+}
diff --git a/test/612-jit-dex-cache/src/Main.java b/test/612-jit-dex-cache/src/Main.java
new file mode 100644
index 0000000..89ebe09
--- /dev/null
+++ b/test/612-jit-dex-cache/src/Main.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+import dalvik.system.PathClassLoader;
+
+// ClassLoader not delegating for non java. packages.
+class DelegateLastPathClassLoader extends PathClassLoader {
+
+  public DelegateLastPathClassLoader(String dexPath, ClassLoader parent) {
+    super(dexPath, parent);
+  }
+
+  @Override
+  protected Class<?> loadClass(String name, boolean resolve) throws ClassNotFoundException {
+    if (!name.startsWith("java.")) {
+      try {
+        return findClass(name);
+      } catch (ClassNotFoundException ignore) {
+        // Ignore and fall through to parent class loader.
+      }
+    }
+    return super.loadClass(name, resolve);
+  }
+}
+
+public class Main {
+
+   private static Class<?> classFromDifferentLoader() throws Exception {
+     final String DEX_FILE = System.getenv("DEX_LOCATION") + "/612-jit-dex-cache-ex.jar";
+     ClassLoader loader = new DelegateLastPathClassLoader(DEX_FILE, Main.class.getClassLoader());
+     return loader.loadClass("LoadedByAppClassLoader");
+  }
+
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+    Class<?> cls = classFromDifferentLoader();
+    Method m = cls.getDeclaredMethod("letMeInlineYou", A.class);
+    B b = new B();
+    // Invoke the method enough times to get an inline cache and get JITted.
+    for (int i = 0; i < 10000; ++i) {
+      m.invoke(null, b);
+    }
+    m = cls.getDeclaredMethod("areYouB", null);
+    ClassLoader loader = (ClassLoader) m.invoke(null);
+    if (loader != cls.getClassLoader()) {
+      throw new Error("Wrong class loader");
+    }
+  }
+
+  public static native void ensureJitCompiled(Class<?> cls, String method_name);
+}
diff --git a/test/455-set-vreg/expected.txt b/test/613-inlining-dex-cache/expected.txt
similarity index 100%
copy from test/455-set-vreg/expected.txt
copy to test/613-inlining-dex-cache/expected.txt
diff --git a/test/613-inlining-dex-cache/info.txt b/test/613-inlining-dex-cache/info.txt
new file mode 100644
index 0000000..e80f642
--- /dev/null
+++ b/test/613-inlining-dex-cache/info.txt
@@ -0,0 +1,2 @@
+Regression test for the JIT compiler which used to
+wrongly update the dex cache of a class loader.
diff --git a/test/613-inlining-dex-cache/run b/test/613-inlining-dex-cache/run
new file mode 100644
index 0000000..9c1e7aa
--- /dev/null
+++ b/test/613-inlining-dex-cache/run
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+flags="$@"
+# We need the dex files pre-verified to avoid running the verifier
+# at runtime which will update the dex cache.
+exec ${RUN} ${flags/verify-at-runtime/interpret-only}
diff --git a/test/613-inlining-dex-cache/src-ex/B.java b/test/613-inlining-dex-cache/src-ex/B.java
new file mode 100644
index 0000000..4da9a1d
--- /dev/null
+++ b/test/613-inlining-dex-cache/src-ex/B.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class B {
+}
diff --git a/test/613-inlining-dex-cache/src-ex/LoadedByAppClassLoader.java b/test/613-inlining-dex-cache/src-ex/LoadedByAppClassLoader.java
new file mode 100644
index 0000000..f4e0f10
--- /dev/null
+++ b/test/613-inlining-dex-cache/src-ex/LoadedByAppClassLoader.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class LoadedByAppClassLoader {
+  public static void letMeInlineYou() {
+    // We used to pass the wrong class loader when trying to inline 'Main.foo'.
+    Main.foo(null);
+  }
+}
diff --git a/test/613-inlining-dex-cache/src/B.java b/test/613-inlining-dex-cache/src/B.java
new file mode 100644
index 0000000..6e7e55d
--- /dev/null
+++ b/test/613-inlining-dex-cache/src/B.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class B {
+  public void foo() {
+  }
+}
diff --git a/test/613-inlining-dex-cache/src/Main.java b/test/613-inlining-dex-cache/src/Main.java
new file mode 100644
index 0000000..31ab1d2
--- /dev/null
+++ b/test/613-inlining-dex-cache/src/Main.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+import dalvik.system.PathClassLoader;
+
+// ClassLoader not delegating for non java. packages.
+class DelegateLastPathClassLoader extends PathClassLoader {
+
+  public DelegateLastPathClassLoader(String dexPath, ClassLoader parent) {
+    super(dexPath, parent);
+  }
+
+  @Override
+  protected Class<?> loadClass(String name, boolean resolve) throws ClassNotFoundException {
+    if (!name.startsWith("java.")) {
+      try {
+        return findClass(name);
+      } catch (ClassNotFoundException ignore) {
+        // Ignore and fall through to parent class loader.
+      }
+    }
+    return super.loadClass(name, resolve);
+  }
+}
+
+public class Main {
+
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+    final String DEX_FILE = System.getenv("DEX_LOCATION") + "/613-inlining-dex-cache-ex.jar";
+    ClassLoader loader = new DelegateLastPathClassLoader(DEX_FILE, Main.class.getClassLoader());
+    Class cls = loader.loadClass("LoadedByAppClassLoader");
+    Method m = cls.getDeclaredMethod("letMeInlineYou");
+    // Invoke the method enough times to get JITted.
+    for (int i = 0; i < 10000; ++i) {
+      m.invoke(null);
+    }
+    ensureJitCompiled(cls, "letMeInlineYou");
+    ClassLoader bLoader = areYouB();
+    if (bLoader != Main.class.getClassLoader()) {
+      throw new Error("Wrong class loader");
+    }
+  }
+
+  public static void foo(Main o) {
+    // LoadedByAppClassLoader.letMeInlineYou will try to inline this
+    // method but used to pass the wrong class loader. As a result,
+    // the lookup of B.foo was updating the dex cache with the other
+    // class loader's B class.
+    if (o != null) {
+      o.myField.foo();
+    }
+  }
+
+  public B myField;
+
+  public static ClassLoader areYouB() {
+    return OtherClass.getB().getClassLoader();
+  }
+
+  public static native void ensureJitCompiled(Class cls, String method_name);
+}
+
+class OtherClass {
+  public static Class getB() {
+    // This used to return the B class of another class loader.
+    return B.class;
+  }
+}
diff --git a/test/469-condition-materialization-regression/expected.txt b/test/614-checker-dump-constant-location/expected.txt
similarity index 100%
copy from test/469-condition-materialization-regression/expected.txt
copy to test/614-checker-dump-constant-location/expected.txt
diff --git a/test/614-checker-dump-constant-location/info.txt b/test/614-checker-dump-constant-location/info.txt
new file mode 100644
index 0000000..4a94ffa
--- /dev/null
+++ b/test/614-checker-dump-constant-location/info.txt
@@ -0,0 +1,2 @@
+Test that the graph visualizer outputs useful information for constant
+locations in parallel moves.
diff --git a/test/614-checker-dump-constant-location/src/Main.java b/test/614-checker-dump-constant-location/src/Main.java
new file mode 100644
index 0000000..f6bc063
--- /dev/null
+++ b/test/614-checker-dump-constant-location/src/Main.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static int array_int[] = { 0 };
+  public static long array_long[] = { 0 };
+  public static float array_float[] = { 0.0f };
+  public static double array_double[] = { 0.0 };
+
+  // The code used to print constant locations in parallel moves is architecture
+  // independent. We only test for ARM and ARM64 as it is easy: 'store'
+  // instructions only take registers as a source.
+
+  /// CHECK-START-ARM: void Main.store_to_arrays() register (after)
+  /// CHECK:    ParallelMove {{.*#1->.*#2->.*#3\.3->.*#4\.4->.*}}
+
+  /// CHECK-START-ARM64: void Main.store_to_arrays() register (after)
+  /// CHECK:    ParallelMove {{.*#1->.*#2->.*#3\.3->.*#4\.4->.*}}
+
+  public void store_to_arrays() {
+    array_int[0] = 1;
+    array_long[0] = 2;
+    array_float[0] = 3.3f;
+    array_double[0] = 4.4;
+  }
+
+  public static void main(String args[]) {}
+}
diff --git a/test/469-condition-materialization-regression/expected.txt b/test/615-checker-arm64-zr-parallel-move/expected.txt
similarity index 100%
copy from test/469-condition-materialization-regression/expected.txt
copy to test/615-checker-arm64-zr-parallel-move/expected.txt
diff --git a/test/615-checker-arm64-zr-parallel-move/info.txt b/test/615-checker-arm64-zr-parallel-move/info.txt
new file mode 100644
index 0000000..199755d
--- /dev/null
+++ b/test/615-checker-arm64-zr-parallel-move/info.txt
@@ -0,0 +1 @@
+Checker test to verify we correctly use wzr and xzr to synthesize zero constants.
diff --git a/test/615-checker-arm64-zr-parallel-move/src/Main.java b/test/615-checker-arm64-zr-parallel-move/src/Main.java
new file mode 100644
index 0000000..5024f28
--- /dev/null
+++ b/test/615-checker-arm64-zr-parallel-move/src/Main.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static boolean doThrow = false;
+
+  public void $noinline$foo(int in_w1,
+                            int in_w2,
+                            int in_w3,
+                            int in_w4,
+                            int in_w5,
+                            int in_w6,
+                            int in_w7,
+                            int on_stack_int,
+                            long on_stack_long,
+                            float in_s0,
+                            float in_s1,
+                            float in_s2,
+                            float in_s3,
+                            float in_s4,
+                            float in_s5,
+                            float in_s6,
+                            float in_s7,
+                            float on_stack_float,
+                            double on_stack_double) {
+    if (doThrow) throw new Error();
+  }
+
+  // We expect a parallel move that moves four times the zero constant to stack locations.
+  /// CHECK-START-ARM64: void Main.bar() register (after)
+  /// CHECK:             ParallelMove {{.*#0->[0-9x]+\(sp\).*#0->[0-9x]+\(sp\).*#0->[0-9x]+\(sp\).*#0->[0-9x]+\(sp\).*}}
+
+  // Those four moves should generate four 'store' instructions using directly the zero register.
+  /// CHECK-START-ARM64: void Main.bar() disassembly (after)
+  /// CHECK-DAG:         {{(str|stur)}} wzr, [sp, #{{[0-9]+}}]
+  /// CHECK-DAG:         {{(str|stur)}} xzr, [sp, #{{[0-9]+}}]
+  /// CHECK-DAG:         {{(str|stur)}} wzr, [sp, #{{[0-9]+}}]
+  /// CHECK-DAG:         {{(str|stur)}} xzr, [sp, #{{[0-9]+}}]
+
+  public void bar() {
+    $noinline$foo(1, 2, 3, 4, 5, 6, 7,     // Integral values in registers.
+                  0, 0L,                   // Integral values on the stack.
+                  1, 2, 3, 4, 5, 6, 7, 8,  // Floating-point values in registers.
+                  0.0f, 0.0);              // Floating-point values on the stack.
+  }
+
+  public static void main(String args[]) {}
+}
diff --git a/test/617-clinit-oome/expected.txt b/test/617-clinit-oome/expected.txt
new file mode 100644
index 0000000..c1d33ff
--- /dev/null
+++ b/test/617-clinit-oome/expected.txt
@@ -0,0 +1 @@
+Filling heap
diff --git a/test/617-clinit-oome/info.txt b/test/617-clinit-oome/info.txt
new file mode 100644
index 0000000..ece35b2
--- /dev/null
+++ b/test/617-clinit-oome/info.txt
@@ -0,0 +1 @@
+Regression test for encoded static strings caussing OOME b/30690988
diff --git a/test/617-clinit-oome/src/Main.java b/test/617-clinit-oome/src/Main.java
new file mode 100644
index 0000000..749a232
--- /dev/null
+++ b/test/617-clinit-oome/src/Main.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    Class klass = Other.class;
+    Object[] data = new Object[100000];
+    try {
+        System.out.println("Filling heap");
+        int size = 256 * 1024 * 1024;
+        int index = 0;
+        while (true) {
+            try {
+                data[index] = new byte[size];
+                index++;
+            } catch (OutOfMemoryError e) {
+                size /= 2;
+                if (size == 0) {
+                    break;
+                }
+            }
+        }
+        // Initialize now that the heap is full.
+        Other.print();
+    } catch (OutOfMemoryError e) {
+    } catch (Exception e) {
+        System.err.println(e);
+    }
+  }
+}
diff --git a/test/617-clinit-oome/src/Other.java b/test/617-clinit-oome/src/Other.java
new file mode 100644
index 0000000..20306ee
--- /dev/null
+++ b/test/617-clinit-oome/src/Other.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public final class Other {
+    public static final String string1 = "ABCDEFG1";
+    public static final String string2 = "ABCDEFG2";
+    public static final String string3 = "ABCDEFG3";
+    public static final String string4 = "ABCDEFG4";
+    public static final String string5 = "ABCDEFG5";
+    public static final int int1 = 12;
+
+    public static void print() {
+        System.out.println(string2);
+    }
+}
diff --git a/test/701-easy-div-rem/genMain.py b/test/701-easy-div-rem/genMain.py
index 75eee17..b6c769f 100644
--- a/test/701-easy-div-rem/genMain.py
+++ b/test/701-easy-div-rem/genMain.py
@@ -13,25 +13,27 @@
 # limitations under the License.
 
 upper_bound_int_pow2 = 31
+upper_bound_int_pow2_neg = 32
 upper_bound_long_pow2 = 63
+upper_bound_long_pow2_neg = 64
 upper_bound_constant = 100
 all_tests = [
     ({'@INT@': 'int', '@SUFFIX@':''},
      [('CheckDiv', 'idiv_by_pow2_', [2**i for i in range(upper_bound_int_pow2)]),
-      ('CheckDiv', 'idiv_by_pow2_neg_', [-2**i for i in range(upper_bound_int_pow2)]),
+      ('CheckDiv', 'idiv_by_pow2_neg_', [-2**i for i in range(upper_bound_int_pow2_neg)]),
       ('CheckDiv', 'idiv_by_constant_', [i for i in range(1, upper_bound_constant)]),
       ('CheckDiv', 'idiv_by_constant_neg_', [-i for i in range(1, upper_bound_constant)]),
       ('CheckRem', 'irem_by_pow2_', [2**i for i in range(upper_bound_int_pow2)]),
-      ('CheckRem', 'irem_by_pow2_neg_', [-2**i for i in range(upper_bound_int_pow2)]),
+      ('CheckRem', 'irem_by_pow2_neg_', [-2**i for i in range(upper_bound_int_pow2_neg)]),
       ('CheckRem', 'irem_by_constant_', [i for i in range(1, upper_bound_constant)]),
       ('CheckRem', 'irem_by_constant_neg_', [-i for i in range(1, upper_bound_constant)])]),
     ({'@INT@': 'long', '@SUFFIX@': 'l'},
      [('CheckDiv', 'ldiv_by_pow2_', [2**i for i in range(upper_bound_long_pow2)]),
-      ('CheckDiv', 'ldiv_by_pow2_neg_', [-2**i for i in range(upper_bound_long_pow2)]),
+      ('CheckDiv', 'ldiv_by_pow2_neg_', [-2**i for i in range(upper_bound_long_pow2_neg)]),
       ('CheckDiv', 'ldiv_by_constant_', [i for i in range(1, upper_bound_constant)]),
       ('CheckDiv', 'ldiv_by_constant_neg_', [-i for i in range(1, upper_bound_constant)]),
       ('CheckRem', 'lrem_by_pow2_', [2**i for i in range(upper_bound_long_pow2)]),
-      ('CheckRem', 'lrem_by_pow2_neg_', [-2**i for i in range(upper_bound_long_pow2)]),
+      ('CheckRem', 'lrem_by_pow2_neg_', [-2**i for i in range(upper_bound_long_pow2_neg)]),
       ('CheckRem', 'lrem_by_constant_', [i for i in range(1, upper_bound_constant)]),
       ('CheckRem', 'lrem_by_constant_neg_', [-i for i in range(1, upper_bound_constant)])])
 ]
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 17c1f00..3bb3725 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -46,4 +46,28 @@
 b/23300986 (2)
 b/23502994 (if-eqz)
 b/23502994 (check-cast)
+b/25494456
+b/21869691
+b/26143249
+b/26579108
+b/26594149 (1)
+b/26594149 (2)
+b/26594149 (3)
+b/26594149 (4)
+b/26594149 (5)
+b/26594149 (6)
+b/26594149 (7)
+b/26594149 (8)
+b/27148248
+b/26965384
+b/27799205 (1)
+b/27799205 (2)
+b/27799205 (3)
+b/27799205 (4)
+b/27799205 (5)
+b/27799205 (6)
+b/28187158
+b/29778499 (1)
+b/29778499 (2)
+b/30458218
 Done!
diff --git a/test/800-smali/smali/B30458218.smali b/test/800-smali/smali/B30458218.smali
new file mode 100644
index 0000000..67b882a
--- /dev/null
+++ b/test/800-smali/smali/B30458218.smali
@@ -0,0 +1,27 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB30458218;
+.super Ljava/io/InterruptedIOException;
+
+.method public static run()V
+    .registers 2
+    new-instance v0, LB30458218;
+    invoke-direct {v0}, LB30458218;-><init>()V
+
+    # IGET used to wrongly cache 'InterruptedIOException' class under the key 'LB30458218;'
+    iget v1, v0, LB30458218;->bytesTransferred:I
+
+    return-void
+.end method
diff --git a/test/800-smali/smali/b_21869691A.smali b/test/800-smali/smali/b_21869691A.smali
new file mode 100644
index 0000000..a7a6ef4
--- /dev/null
+++ b/test/800-smali/smali/b_21869691A.smali
@@ -0,0 +1,47 @@
+# Test that the verifier does not stash methods incorrectly because they are being invoked with
+# the wrong opcode.
+#
+# When using invoke-interface on a method id that is not from an interface class, we should throw
+# an IncompatibleClassChangeError. FindInterfaceMethod assumes that the given type is an interface,
+# so we can construct a class hierarchy that would have a surprising result:
+#
+#   interface I {
+#     void a();
+#   }
+#
+#   class B implements I {
+#      // miranda method for a, or a implemented.
+#   }
+#
+#   class C extends B {
+#   }
+#
+# Then calling invoke-interface C.a() will go wrong if there is no explicit check: a can't be found
+# in C, but in the interface table, so we will find an interface method and pass ICCE checks.
+#
+# If we do this before a correct invoke-virtual C.a(), we poison the dex cache with an incorrect
+# method. In this test, this is done in A (A < B, so processed first). The "real" call is in B.
+
+.class public LB21869691A;
+
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public run()V
+  .registers 3
+  new-instance v0, LB21869691C;
+  invoke-direct {v0}, LB21869691C;-><init>()V
+  invoke-virtual {v2, v0}, LB21869691A;->callinf(LB21869691C;)V
+  return-void
+.end method
+
+.method public callinf(LB21869691C;)V
+  .registers 2
+  invoke-interface {p1}, LB21869691C;->a()V
+  return-void
+.end method
diff --git a/test/800-smali/smali/b_21869691B.smali b/test/800-smali/smali/b_21869691B.smali
new file mode 100644
index 0000000..1172bdb
--- /dev/null
+++ b/test/800-smali/smali/b_21869691B.smali
@@ -0,0 +1,33 @@
+# Test that the verifier does not stash methods incorrectly because they are being invoked with
+# the wrong opcode. See b_21869691A.smali for explanation.
+
+.class public abstract LB21869691B;
+
+.super Ljava/lang/Object;
+.implements LB21869691I;
+
+.method protected constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+# Have an implementation for the interface method.
+.method public a()V
+  .registers 1
+  return-void
+.end method
+
+# Call ourself with invoke-virtual.
+.method public callB()V
+  .registers 1
+  invoke-virtual {p0}, LB21869691B;->a()V
+  return-void
+.end method
+
+# Call C with invoke-virtual.
+.method public callB(LB21869691C;)V
+  .registers 2
+  invoke-virtual {p1}, LB21869691C;->a()V
+  return-void
+.end method
diff --git a/test/800-smali/smali/b_21869691C.smali b/test/800-smali/smali/b_21869691C.smali
new file mode 100644
index 0000000..4f89a04
--- /dev/null
+++ b/test/800-smali/smali/b_21869691C.smali
@@ -0,0 +1,12 @@
+# Test that the verifier does not stash methods incorrectly because they are being invoked with
+# the wrong opcode. See b_21869691A.smali for explanation.
+
+.class public LB21869691C;
+
+.super LB21869691B;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, LB21869691B;-><init>()V
+    return-void
+.end method
diff --git a/test/800-smali/smali/b_21869691I.smali b/test/800-smali/smali/b_21869691I.smali
new file mode 100644
index 0000000..72a27dd
--- /dev/null
+++ b/test/800-smali/smali/b_21869691I.smali
@@ -0,0 +1,11 @@
+# Test that the verifier does not stash methods incorrectly because they are being invoked with
+# the wrong opcode.
+#
+# This is the interface class that has an "a" method.
+
+.class public abstract interface LB21869691I;
+
+.super Ljava/lang/Object;
+
+.method public abstract a()V
+.end method
diff --git a/test/800-smali/smali/b_25494456.smali b/test/800-smali/smali/b_25494456.smali
new file mode 100644
index 0000000..0675b27
--- /dev/null
+++ b/test/800-smali/smali/b_25494456.smali
@@ -0,0 +1,14 @@
+.class public LB25494456;
+
+.super Ljava/lang/Object;
+
+# Ensure that a type mismatch (integral/float vs reference) overrides a soft failure (because of
+# an unresolvable type) in return-object.
+
+.method public static run()Lwont/be/Resolvable;
+    .registers 1
+
+    const/4 v0, 1
+    return-object v0
+
+.end method
diff --git a/test/800-smali/smali/b_26143249.smali b/test/800-smali/smali/b_26143249.smali
new file mode 100644
index 0000000..aa69e84
--- /dev/null
+++ b/test/800-smali/smali/b_26143249.smali
@@ -0,0 +1,20 @@
+# Make sure we accept non-abstract classes with abstract members.
+
+.class public LB26143249;
+
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public run()V
+    .registers 1
+    invoke-virtual {p0}, LB26143249;->abs()V
+    return-void
+.end method
+
+.method public abstract abs()V
+.end method
diff --git a/test/800-smali/smali/b_26579108.smali b/test/800-smali/smali/b_26579108.smali
new file mode 100644
index 0000000..dde3825
--- /dev/null
+++ b/test/800-smali/smali/b_26579108.smali
@@ -0,0 +1,34 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB26579108;
+.super Ljava/lang/Object;
+
+# Ensure that merging uninitialized type and null does not pass verification.
+
+.field public static field:I
+
+.method public static run()Ljava/lang/String;
+    .registers 2
+    new-instance v0, Ljava/lang/String;
+
+    sget v1, LB26579108;->field:I
+    if-eqz v1, :cond_5
+
+    const/4 v0, 0x0
+    :cond_5
+
+    invoke-direct {v0}, Ljava/lang/String;-><init>()V
+    return-object v0
+  .end method
diff --git a/test/800-smali/smali/b_26594149_1.smali b/test/800-smali/smali/b_26594149_1.smali
new file mode 100644
index 0000000..c465859
--- /dev/null
+++ b/test/800-smali/smali/b_26594149_1.smali
@@ -0,0 +1,26 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB26594149_1;
+.super Ljava/lang/Object;
+
+.method public static run()V
+    .registers 2
+    new-instance v0, Ljava/lang/String;
+
+    # Illegal operation.
+    instance-of v1, v0, Ljava/lang/String;
+
+    return-void
+  .end method
diff --git a/test/800-smali/smali/b_26594149_2.smali b/test/800-smali/smali/b_26594149_2.smali
new file mode 100644
index 0000000..765afe2
--- /dev/null
+++ b/test/800-smali/smali/b_26594149_2.smali
@@ -0,0 +1,26 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB26594149_2;
+.super Ljava/lang/Object;
+
+.method public static run()V
+    .registers 2
+    new-instance v0, Ljava/lang/String;
+
+    # Illegal operation.
+    check-cast v0, Ljava/lang/String;
+
+    return-void
+  .end method
diff --git a/test/800-smali/smali/b_26594149_3.smali b/test/800-smali/smali/b_26594149_3.smali
new file mode 100644
index 0000000..42b5675
--- /dev/null
+++ b/test/800-smali/smali/b_26594149_3.smali
@@ -0,0 +1,28 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB26594149_3;
+.super Ljava/lang/Object;
+
+.field public static field:Ljava/lang/String;
+
+.method public static run()V
+    .registers 2
+    new-instance v0, Ljava/lang/String;
+
+    # Illegal operation.
+    sput-object v0, LB26594149_3;->field:Ljava/lang/String;
+
+    return-void
+  .end method
diff --git a/test/800-smali/smali/b_26594149_4.smali b/test/800-smali/smali/b_26594149_4.smali
new file mode 100644
index 0000000..5b2f99b
--- /dev/null
+++ b/test/800-smali/smali/b_26594149_4.smali
@@ -0,0 +1,38 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB26594149_4;
+.super Ljava/lang/Object;
+
+.field public field:Ljava/lang/String;
+
+.method public constructor <init>()V
+    .registers 4
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static run()V
+    .registers 4
+
+    new-instance v1, LB26594149_4;
+    invoke-direct {v1}, LB26594149_4;-><init>()V
+
+    new-instance v0, Ljava/lang/String;
+
+    # Illegal operation.
+    iput-object v0, v1, LB26594149_4;->field:Ljava/lang/String;
+
+    return-void
+  .end method
diff --git a/test/800-smali/smali/b_26594149_5.smali b/test/800-smali/smali/b_26594149_5.smali
new file mode 100644
index 0000000..27d6255
--- /dev/null
+++ b/test/800-smali/smali/b_26594149_5.smali
@@ -0,0 +1,28 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB26594149_5;
+.super Ljava/lang/Object;
+
+.method public static run()V
+    .registers 4
+
+    new-instance v0, Ljava/lang/Object;
+
+    # Allowed operation on uninitialized objects.
+    monitor-enter v0
+    monitor-exit v0
+
+    return-void
+  .end method
diff --git a/test/800-smali/smali/b_26594149_6.smali b/test/800-smali/smali/b_26594149_6.smali
new file mode 100644
index 0000000..8d26ee8
--- /dev/null
+++ b/test/800-smali/smali/b_26594149_6.smali
@@ -0,0 +1,24 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB26594149_6;
+.super Ljava/lang/Object;
+
+.method public static run()V
+    .registers 4
+
+    new-instance v0, Ljava/lang/Exception;
+    throw v0
+
+  .end method
diff --git a/test/800-smali/smali/b_26594149_7.smali b/test/800-smali/smali/b_26594149_7.smali
new file mode 100644
index 0000000..f624d1a
--- /dev/null
+++ b/test/800-smali/smali/b_26594149_7.smali
@@ -0,0 +1,30 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB26594149_7;
+.super Ljava/lang/Object;
+
+.method private static foo(Ljava/lang/Object;)V
+    .registers 1
+    return-void
+.end method
+
+.method public static run()V
+    .registers 4
+
+    new-instance v0, Ljava/lang/Object;
+    invoke-static {v0}, LB26594149_7;->foo(Ljava/lang/Object;)V
+    return-void
+
+  .end method
diff --git a/test/800-smali/smali/b_26594149_8.smali b/test/800-smali/smali/b_26594149_8.smali
new file mode 100644
index 0000000..e366de4
--- /dev/null
+++ b/test/800-smali/smali/b_26594149_8.smali
@@ -0,0 +1,24 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB26594149_8;
+.super Ljava/lang/Object;
+
+.method public static run()Ljava/lang/Object;
+    .registers 4
+
+    new-instance v0, Ljava/lang/Object;
+    return-object v0
+
+  .end method
diff --git a/test/800-smali/smali/b_26965384.smali b/test/800-smali/smali/b_26965384.smali
new file mode 100644
index 0000000..47ed418
--- /dev/null
+++ b/test/800-smali/smali/b_26965384.smali
@@ -0,0 +1,20 @@
+.class public LB26965384;
+.super LB26965384Super;
+
+.method public constructor <init>()V
+    .locals 1
+    const v0, 0
+    iput v0, p0, LB26965384;->a:I
+    invoke-direct {p0}, LB26965384Super;-><init>()V
+    return-void
+.end method
+
+
+# Just by loading this class we should fail. It doesn't really matter what's in
+# this method.
+.method public static run()V
+    .registers 4
+    new-instance v0, LB26965384;
+    invoke-direct {v0}, LB26965384;-><init>()V
+    return-void
+.end method
diff --git a/test/800-smali/smali/b_26965384Super.smali b/test/800-smali/smali/b_26965384Super.smali
new file mode 100644
index 0000000..32faea7
--- /dev/null
+++ b/test/800-smali/smali/b_26965384Super.smali
@@ -0,0 +1,10 @@
+.class public LB26965384Super;
+.super Ljava/lang/Object;
+
+.field public a:I
+
+.method public constructor <init>()V
+    .locals 0
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
diff --git a/test/800-smali/smali/b_27148248.smali b/test/800-smali/smali/b_27148248.smali
new file mode 100644
index 0000000..4601cc6
--- /dev/null
+++ b/test/800-smali/smali/b_27148248.smali
@@ -0,0 +1,27 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB27148248;
+
+# Regression for dex2oatd crash during compilation of method which
+# used to throw with argument of non-reference type.
+
+.super Ljava/lang/Object;
+
+.method public static run()V
+   .registers 1
+   const v0, 0xbad
+   throw v0
+.end method
+
diff --git a/test/800-smali/smali/b_27799205_1.smali b/test/800-smali/smali/b_27799205_1.smali
new file mode 100644
index 0000000..92bfc80
--- /dev/null
+++ b/test/800-smali/smali/b_27799205_1.smali
@@ -0,0 +1,37 @@
+.class public LB27799205_1;
+.super Ljava/lang/Object;
+
+# A class with an unresolved array type should not fail hard (unless it's a primitive-type access).
+
+.method public static run()V
+.registers 1
+       return-void
+.end method
+
+.method public static test([Ljava/lang/Object;[Ldo/not/resolve/K;Z)V
+.registers 6
+       # Make v0, v1 and v2 null. We'll use v0 as a merge of the inputs, v1 as null, and v2 as 0.
+       const v0, 0
+       const v1, 0
+       const v2, 0
+
+       # Conditional jump so we have a merge point.
+       if-eqz v5, :LabelSelectUnresolved
+
+:LabelSelectResolved
+       move-object v0, v3
+       goto :LabelMerged
+
+:LabelSelectUnresolved
+       move-object v0, v4
+       goto :LabelMerged
+
+:LabelMerged
+       # At this point, v0 will be the unresolved merge.
+
+       # Test aput: v0[v2] = v1.
+       aput-object v1, v0, v2
+
+       return-void
+
+.end method
diff --git a/test/800-smali/smali/b_27799205_2.smali b/test/800-smali/smali/b_27799205_2.smali
new file mode 100644
index 0000000..e730b1e
--- /dev/null
+++ b/test/800-smali/smali/b_27799205_2.smali
@@ -0,0 +1,37 @@
+.class public LB27799205_2;
+.super Ljava/lang/Object;
+
+# A class with an unresolved array type should not fail hard (unless it's a primitive-type access).
+
+.method public static run()V
+.registers 1
+       return-void
+.end method
+
+.method public static test([Ljava/lang/Object;[Ldo/not/resolve/K;Z)V
+.registers 6
+       # Make v0, v1 and v2 null. We'll use v0 as a merge of the inputs, v1 as null, and v2 as 0.
+       const v0, 0
+       const v1, 0
+       const v2, 0
+
+       # Conditional jump so we have a merge point.
+       if-eqz v5, :LabelSelectUnresolved
+
+:LabelSelectResolved
+       move-object v0, v3
+       goto :LabelMerged
+
+:LabelSelectUnresolved
+       move-object v0, v4
+       goto :LabelMerged
+
+:LabelMerged
+       # At this point, v0 will be the unresolved merge.
+
+       # Test aput: v0[v2] = v1.
+       aput v1, v0, v2
+
+       return-void
+
+.end method
diff --git a/test/800-smali/smali/b_27799205_3.smali b/test/800-smali/smali/b_27799205_3.smali
new file mode 100644
index 0000000..1cb025e
--- /dev/null
+++ b/test/800-smali/smali/b_27799205_3.smali
@@ -0,0 +1,39 @@
+.class public LB27799205_3;
+.super Ljava/lang/Object;
+
+# A class with an unresolved array type should not fail hard (unless it's a primitive-type access).
+# Make sure that merging is pro-active.
+
+.method public static run()V
+.registers 1
+       return-void
+.end method
+
+# Use some non-object non-array input (non-Object because the merge should be Object).
+.method public static test(Ljava/lang/Integer;[Ldo/not/resolve/K;Z)V
+.registers 6
+       # Make v0, v1 and v2 null. We'll use v0 as a merge of the inputs, v1 as null, and v2 as 0.
+       const v0, 0
+       const v1, 0
+       const v2, 0
+
+       # Conditional jump so we have a merge point.
+       if-eqz v5, :LabelSelectUnresolved
+
+:LabelSelectResolved
+       move-object v0, v3
+       goto :LabelMerged
+
+:LabelSelectUnresolved
+       move-object v0, v4
+       goto :LabelMerged
+
+:LabelMerged
+       # At this point, v0 should be Object.
+
+       # Test aput-object: v0[v2] = v1. Should fail for v0 not being an array.
+       aput-object v1, v0, v2
+
+       return-void
+
+.end method
diff --git a/test/800-smali/smali/b_27799205_4.smali b/test/800-smali/smali/b_27799205_4.smali
new file mode 100644
index 0000000..e42951a
--- /dev/null
+++ b/test/800-smali/smali/b_27799205_4.smali
@@ -0,0 +1,39 @@
+.class public LB27799205_4;
+.super Ljava/lang/Object;
+
+# A class with an unresolved array type should not fail hard (unless it's a primitive-type access).
+# Make sure that merging is pro-active.
+
+.method public static run()V
+.registers 1
+       return-void
+.end method
+
+# Use some primitive-type array input.
+.method public static test([I[Ldo/not/resolve/K;Z)V
+.registers 6
+       # Make v0, v1 and v2 null. We'll use v0 as a merge of the inputs, v1 as null, and v2 as 0.
+       const v0, 0
+       const v1, 0
+       const v2, 0
+
+       # Conditional jump so we have a merge point.
+       if-eqz v5, :LabelSelectUnresolved
+
+:LabelSelectResolved
+       move-object v0, v3
+       goto :LabelMerged
+
+:LabelSelectUnresolved
+       move-object v0, v4
+       goto :LabelMerged
+
+:LabelMerged
+       # At this point, v0 should be Object.
+
+       # Test aput-object: v0[v2] = v1. Should fail for v0 not being an array.
+       aput-object v1, v0, v2
+
+       return-void
+
+.end method
diff --git a/test/800-smali/smali/b_27799205_5.smali b/test/800-smali/smali/b_27799205_5.smali
new file mode 100644
index 0000000..6c7b183
--- /dev/null
+++ b/test/800-smali/smali/b_27799205_5.smali
@@ -0,0 +1,39 @@
+.class public LB27799205_5;
+.super Ljava/lang/Object;
+
+# A class with an unresolved array type should not fail hard (unless it's a primitive-type access).
+# Make sure that merging is pro-active.
+
+.method public static run()V
+.registers 1
+       return-void
+.end method
+
+# Use some non-resolvable non-array type.
+.method public static test(Ldo/not/resolve/L;[Ldo/not/resolve/K;Z)V
+.registers 6
+       # Make v0, v1 and v2 null. We'll use v0 as a merge of the inputs, v1 as null, and v2 as 0.
+       const v0, 0
+       const v1, 0
+       const v2, 0
+
+       # Conditional jump so we have a merge point.
+       if-eqz v5, :LabelSelectUnresolved
+
+:LabelSelectResolved
+       move-object v0, v3
+       goto :LabelMerged
+
+:LabelSelectUnresolved
+       move-object v0, v4
+       goto :LabelMerged
+
+:LabelMerged
+       # At this point, v0 should be Object.
+
+       # Test aput-object: v0[v2] = v1. Should fail for v0 not being an array.
+       aput-object v1, v0, v2
+
+       return-void
+
+.end method
diff --git a/test/800-smali/smali/b_27799205_6.smali b/test/800-smali/smali/b_27799205_6.smali
new file mode 100644
index 0000000..d0154f7
--- /dev/null
+++ b/test/800-smali/smali/b_27799205_6.smali
@@ -0,0 +1,24 @@
+.class public LB27799205_6;
+.super Ljava/lang/Object;
+
+# A class with an unresolved array type should not fail hard (unless it's a primitive-type access).
+# Make sure that non-merged types still work.
+
+.method public static run()V
+.registers 1
+       return-void
+.end method
+
+# Use some non-resolvable array type.
+.method public static test([Ldo/not/resolve/K;)Ldo/not/resolve/K;
+.registers 3
+       const v0, 0
+       const v1, 0
+       # v2 = p0
+
+       # v0 := v2[v1]
+       aget-object v0, v2, v1
+
+       return-object v0
+
+.end method
diff --git a/test/800-smali/smali/b_27799205_helper.smali b/test/800-smali/smali/b_27799205_helper.smali
new file mode 100644
index 0000000..e6d0985
--- /dev/null
+++ b/test/800-smali/smali/b_27799205_helper.smali
@@ -0,0 +1,47 @@
+.class public LB27799205Helper;
+.super Ljava/lang/Object;
+
+# Helper for B27799205. Reflection tries to resolve all types. That's bad for intentionally
+# unresolved types. It makes it harder to distinguish what kind of error we got.
+
+.method public static run1()V
+.registers 1
+       invoke-static {}, LB27799205_1;->run()V
+
+       return-void
+.end method
+
+.method public static run2()V
+.registers 1
+       invoke-static {}, LB27799205_2;->run()V
+
+       return-void
+.end method
+
+.method public static run3()V
+.registers 1
+       invoke-static {}, LB27799205_3;->run()V
+
+       return-void
+.end method
+
+.method public static run4()V
+.registers 1
+       invoke-static {}, LB27799205_4;->run()V
+
+       return-void
+.end method
+
+.method public static run5()V
+.registers 1
+       invoke-static {}, LB27799205_5;->run()V
+
+       return-void
+.end method
+
+.method public static run6()V
+.registers 1
+       invoke-static {}, LB27799205_6;->run()V
+
+       return-void
+.end method
diff --git a/test/800-smali/smali/b_28187158.smali b/test/800-smali/smali/b_28187158.smali
new file mode 100644
index 0000000..47e5ef6
--- /dev/null
+++ b/test/800-smali/smali/b_28187158.smali
@@ -0,0 +1,11 @@
+.class public LB28187158;
+
+# Regression test for iget with wrong classes.
+
+.super Ljava/lang/Object;
+
+.method public static run(Ljava/lang/Integer;)V
+   .registers 2
+   iget v0, p0, Ljava/lang/System;->in:Ljava/io/InputStream;
+   return-void
+.end method
diff --git a/test/800-smali/smali/b_29778499_1.smali b/test/800-smali/smali/b_29778499_1.smali
new file mode 100644
index 0000000..6cc0731
--- /dev/null
+++ b/test/800-smali/smali/b_29778499_1.smali
@@ -0,0 +1,19 @@
+.class public LB29778499_1;
+.super Ljava/lang/Object;
+
+# Test returning an object that doesn't implement the declared output interface.
+
+.method public static run()V
+.registers 2
+       invoke-static {}, LB29778499_1;->test()Ljava/lang/Runnable;
+       move-result-object v0
+       invoke-interface {v0}, Ljava/lang/Runnable;->run()V
+       return-void
+.end method
+
+.method public static test()Ljava/lang/Runnable;
+.registers 1
+       new-instance v0, LB29778499_1;
+       invoke-direct {v0}, LB29778499_1;-><init>()V
+       return-object v0
+.end method
diff --git a/test/800-smali/smali/b_29778499_2.smali b/test/800-smali/smali/b_29778499_2.smali
new file mode 100644
index 0000000..ad24d2f
--- /dev/null
+++ b/test/800-smali/smali/b_29778499_2.smali
@@ -0,0 +1,13 @@
+.class public LB29778499_2;
+.super Ljava/lang/Object;
+
+# Test invoking an interface method on an object that doesn't implement any interface.
+# This is testing an edge case (not implementing any interface) for b/18116999.
+
+.method public static run()V
+.registers 1
+       new-instance v0, Ljava/lang/Object;
+       invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+       invoke-interface {v0}, Ljava/lang/Runnable;->run()V
+       return-void
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index f75747d..34f2580 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -137,6 +137,50 @@
                 new Object[] { new Object() }, null, null));
         testCases.add(new TestCase("b/23502994 (check-cast)", "B23502994", "runCHECKCAST",
                 new Object[] { "abc" }, null, null));
+        testCases.add(new TestCase("b/25494456", "B25494456", "run", null, new VerifyError(),
+                null));
+        testCases.add(new TestCase("b/21869691", "B21869691A", "run", null,
+                new IncompatibleClassChangeError(), null));
+        testCases.add(new TestCase("b/26143249", "B26143249", "run", null,
+                new AbstractMethodError(), null));
+        testCases.add(new TestCase("b/26579108", "B26579108", "run", null, new VerifyError(),
+                null));
+        testCases.add(new TestCase("b/26594149 (1)", "B26594149_1", "run", null, new VerifyError(),
+                null));
+        testCases.add(new TestCase("b/26594149 (2)", "B26594149_2", "run", null, new VerifyError(),
+                null));
+        testCases.add(new TestCase("b/26594149 (3)", "B26594149_3", "run", null, new VerifyError(),
+                null));
+        testCases.add(new TestCase("b/26594149 (4)", "B26594149_4", "run", null, new VerifyError(),
+                null));
+        testCases.add(new TestCase("b/26594149 (5)", "B26594149_5", "run", null, null, null));
+        testCases.add(new TestCase("b/26594149 (6)", "B26594149_6", "run", null, new VerifyError(),
+                null));
+        testCases.add(new TestCase("b/26594149 (7)", "B26594149_7", "run", null, new VerifyError(),
+                null));
+        testCases.add(new TestCase("b/26594149 (8)", "B26594149_8", "run", null, new VerifyError(),
+                null));
+        testCases.add(new TestCase("b/27148248", "B27148248", "run", null, new VerifyError(),
+                null));
+        testCases.add(new TestCase("b/26965384", "B26965384", "run", null, new VerifyError(),
+                null));
+        testCases.add(new TestCase("b/27799205 (1)", "B27799205Helper", "run1", null, null, null));
+        testCases.add(new TestCase("b/27799205 (2)", "B27799205Helper", "run2", null,
+                new VerifyError(), null));
+        testCases.add(new TestCase("b/27799205 (3)", "B27799205Helper", "run3", null,
+                new VerifyError(), null));
+        testCases.add(new TestCase("b/27799205 (4)", "B27799205Helper", "run4", null,
+                new VerifyError(), null));
+        testCases.add(new TestCase("b/27799205 (5)", "B27799205Helper", "run5", null,
+                new VerifyError(), null));
+        testCases.add(new TestCase("b/27799205 (6)", "B27799205Helper", "run6", null, null, null));
+        testCases.add(new TestCase("b/28187158", "B28187158", "run", new Object[] { null },
+                new VerifyError(), null));
+        testCases.add(new TestCase("b/29778499 (1)", "B29778499_1", "run", null,
+                new IncompatibleClassChangeError(), null));
+        testCases.add(new TestCase("b/29778499 (2)", "B29778499_2", "run", null,
+                new IncompatibleClassChangeError(), null));
+        testCases.add(new TestCase("b/30458218", "B30458218", "run", null, null, null));
     }
 
     public void runTests() {
@@ -182,8 +226,7 @@
                 if (tc.expectedException != null) {
                     errorReturn = new IllegalStateException("Expected an exception in test " +
                                                             tc.testName);
-                }
-                if (tc.expectedReturn == null && retValue != null) {
+                } else if (tc.expectedReturn == null && retValue != null) {
                     errorReturn = new IllegalStateException("Expected a null result in test " +
                                                             tc.testName);
                 } else if (tc.expectedReturn != null &&
@@ -206,7 +249,7 @@
                                                         tc.expectedException.getClass().getName() +
                                                         ", but got " + exc.getClass(), exc);
             } else {
-              // Expected exception, do nothing.
+                // Expected exception, do nothing.
             }
         } finally {
             if (errorReturn != null) {
diff --git a/test/803-no-super/expected.txt b/test/803-no-super/expected.txt
new file mode 100644
index 0000000..5036991
--- /dev/null
+++ b/test/803-no-super/expected.txt
@@ -0,0 +1,2 @@
+java.lang.ClassNotFoundException: NoSuper1
+Done!
diff --git a/test/803-no-super/info.txt b/test/803-no-super/info.txt
new file mode 100644
index 0000000..0178a44
--- /dev/null
+++ b/test/803-no-super/info.txt
@@ -0,0 +1,3 @@
+Regression test that temp (erroneous) classes don't get conflict tables created.
+
+Obviously needs to run under Dalvik or ART.
diff --git a/test/803-no-super/smali/nosuper1.smali b/test/803-no-super/smali/nosuper1.smali
new file mode 100644
index 0000000..df2eaa5
--- /dev/null
+++ b/test/803-no-super/smali/nosuper1.smali
@@ -0,0 +1,3 @@
+.class public LNoSuper1;
+
+.super LNoClass;
diff --git a/test/803-no-super/src/Main.java b/test/803-no-super/src/Main.java
new file mode 100644
index 0000000..a07e042
--- /dev/null
+++ b/test/803-no-super/src/Main.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Attempt to load class with no superclass.
+ */
+public class Main {
+    public static void main(String[] args) throws Exception {
+        try {
+            Class<?> c = Class.forName("NoSuper1");
+        } catch (Exception e) {
+            System.out.println(e);
+        }
+        System.out.println("Done!");
+    }
+}
diff --git a/test/804-class-extends-itself/expected.txt b/test/804-class-extends-itself/expected.txt
new file mode 100644
index 0000000..b98f963
--- /dev/null
+++ b/test/804-class-extends-itself/expected.txt
@@ -0,0 +1,2 @@
+Caught ClassCircularityError
+Done!
diff --git a/test/804-class-extends-itself/info.txt b/test/804-class-extends-itself/info.txt
new file mode 100644
index 0000000..c48934c
--- /dev/null
+++ b/test/804-class-extends-itself/info.txt
@@ -0,0 +1 @@
+Exercise class linker check for classes extending themselves (b/28685551).
diff --git a/test/804-class-extends-itself/smali/Main.smali b/test/804-class-extends-itself/smali/Main.smali
new file mode 100644
index 0000000..5c349ed
--- /dev/null
+++ b/test/804-class-extends-itself/smali/Main.smali
@@ -0,0 +1,57 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# We cannot implement Main in Java, as this would require to run
+# dexmerger (to merge the Dex file produced from Smali code and the
+# Dex file produced from Java code), which loops indefinitely when
+# processing class B28685551, as this class inherits from itself.  As
+# a workaround, implement Main using Smali (we could also have used
+# multidex, but this requires a custom build script).
+
+.class public LMain;
+.super Ljava/lang/Object;
+
+.method public static main([Ljava/lang/String;)V
+    .registers 3
+    .param p0, "args"
+
+    invoke-static {}, LMain;->test()V
+    sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v1, "Done!"
+    invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    return-void
+.end method
+
+.method static test()V
+    .registers 4
+
+    :try_start
+    const-string v2, "B28685551"
+    invoke-static {v2}, Ljava/lang/Class;->forName(Ljava/lang/String;)Ljava/lang/Class;
+    :try_end
+    .catch Ljava/lang/ClassCircularityError; {:try_start .. :try_end} :catch
+
+    move-result-object v0
+
+    :goto_7
+    return-void
+
+    :catch
+    move-exception v1
+    .local v1, "e":Ljava/lang/ClassCircularityError;
+    sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v3, "Caught ClassCircularityError"
+    invoke-virtual {v2, v3}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    goto :goto_7
+.end method
diff --git a/test/804-class-extends-itself/smali/b_28685551.smali b/test/804-class-extends-itself/smali/b_28685551.smali
new file mode 100644
index 0000000..d98c6e3
--- /dev/null
+++ b/test/804-class-extends-itself/smali/b_28685551.smali
@@ -0,0 +1,18 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Regression test for a class inheriting from itself.
+
+.class public LB28685551;
+.super LB28685551;
diff --git a/test/900-hello-plugin/build b/test/900-hello-plugin/build
new file mode 100755
index 0000000..898e2e5
--- /dev/null
+++ b/test/900-hello-plugin/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-build "$@" --experimental agents
diff --git a/test/900-hello-plugin/expected.txt b/test/900-hello-plugin/expected.txt
new file mode 100644
index 0000000..43db31c
--- /dev/null
+++ b/test/900-hello-plugin/expected.txt
@@ -0,0 +1,8 @@
+ArtPlugin_Initialize called in test 900
+Agent_OnLoad called with options "test_900"
+GetEnvHandler called in test 900
+GetEnvHandler called with version 0x900fffff
+GetEnv returned '900' environment!
+Hello, world!
+Agent_OnUnload called
+ArtPlugin_Deinitialize called in test 900
diff --git a/test/900-hello-plugin/info.txt b/test/900-hello-plugin/info.txt
new file mode 100644
index 0000000..47b15c2
--- /dev/null
+++ b/test/900-hello-plugin/info.txt
@@ -0,0 +1,2 @@
+Tests that agents and plugins are loaded.
+
diff --git a/test/900-hello-plugin/load_unload.cc b/test/900-hello-plugin/load_unload.cc
new file mode 100644
index 0000000..a38cc3d
--- /dev/null
+++ b/test/900-hello-plugin/load_unload.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <jni.h>
+#include <stdio.h>
+
+#include "art_method-inl.h"
+#include "base/logging.h"
+#include "base/macros.h"
+
+namespace art {
+
+constexpr jint TEST_900_ENV_VERSION_NUMBER = 0x900FFFFF;
+constexpr uintptr_t ENV_VALUE = 900;
+
+// Allow this library to be used as a plugin too so we can test the stack.
+static jint GetEnvHandler(JavaVMExt* vm ATTRIBUTE_UNUSED, void** new_env, jint version) {
+  printf("%s called in test 900\n", __func__);
+  if (version != TEST_900_ENV_VERSION_NUMBER) {
+    return JNI_EVERSION;
+  }
+  printf("GetEnvHandler called with version 0x%x\n", version);
+  *new_env = reinterpret_cast<void*>(ENV_VALUE);
+  return JNI_OK;
+}
+
+extern "C" bool ArtPlugin_Initialize() {
+  printf("%s called in test 900\n", __func__);
+  Runtime::Current()->GetJavaVM()->AddEnvironmentHook(GetEnvHandler);
+  return true;
+}
+
+extern "C" bool ArtPlugin_Deinitialize() {
+  printf("%s called in test 900\n", __func__);
+  return true;
+}
+
+extern "C" JNIEXPORT jint JNICALL Agent_OnLoad(JavaVM* vm,
+                                               char* options,
+                                               void* reserved ATTRIBUTE_UNUSED) {
+  printf("Agent_OnLoad called with options \"%s\"\n", options);
+  uintptr_t env = 0;
+  jint res = vm->GetEnv(reinterpret_cast<void**>(&env), TEST_900_ENV_VERSION_NUMBER);
+  if (res != JNI_OK) {
+    printf("GetEnv(TEST_900_ENV_VERSION_NUMBER) returned non-zero\n");
+  }
+  printf("GetEnv returned '%" PRIdPTR "' environment!\n", env);
+  return 0;
+}
+
+extern "C" JNIEXPORT void JNICALL Agent_OnUnload(JavaVM* vm ATTRIBUTE_UNUSED) {
+  printf("Agent_OnUnload called\n");
+}
+
+}  // namespace art
diff --git a/test/900-hello-plugin/run b/test/900-hello-plugin/run
new file mode 100755
index 0000000..35b0871
--- /dev/null
+++ b/test/900-hello-plugin/run
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin=libartagentd.so
+if  [[ "$@" == *"-O"* ]]; then
+  plugin=libartagent.so
+fi
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --runtime-option -agentpath:${plugin}=test_900 \
+                   --android-runtime-option -Xplugin:${plugin}
diff --git a/test/900-hello-plugin/src/Main.java b/test/900-hello-plugin/src/Main.java
new file mode 100644
index 0000000..1ef6289
--- /dev/null
+++ b/test/900-hello-plugin/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.out.println("Hello, world!");
+  }
+}
diff --git a/test/901-hello-ti-agent/basics.cc b/test/901-hello-ti-agent/basics.cc
new file mode 100644
index 0000000..81a1b66
--- /dev/null
+++ b/test/901-hello-ti-agent/basics.cc
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "901-hello-ti-agent/basics.h"
+
+#include <jni.h>
+#include <stdio.h>
+#include <string.h>
+#include "base/macros.h"
+#include "openjdkjvmti/jvmti.h"
+
+namespace art {
+namespace Test901HelloTi {
+
+jint OnLoad(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  printf("Loaded Agent for test 901-hello-ti-agent\n");
+  fsync(1);
+  jvmtiEnv* env = nullptr;
+  jvmtiEnv* env2 = nullptr;
+
+#define CHECK_CALL_SUCCESS(c) \
+  do { \
+    if (c != JNI_OK) { \
+      printf("call " #c " did not succeed\n"); \
+      return -1; \
+    } \
+  } while (false)
+
+  CHECK_CALL_SUCCESS(vm->GetEnv(reinterpret_cast<void**>(&env), JVMTI_VERSION_1_0));
+  CHECK_CALL_SUCCESS(vm->GetEnv(reinterpret_cast<void**>(&env2), JVMTI_VERSION_1_0));
+  if (env == env2) {
+    printf("GetEnv returned same environment twice!\n");
+    return -1;
+  }
+  unsigned char* local_data = nullptr;
+  CHECK_CALL_SUCCESS(env->Allocate(8, &local_data));
+  strcpy(reinterpret_cast<char*>(local_data), "hello!!");
+  CHECK_CALL_SUCCESS(env->SetEnvironmentLocalStorage(local_data));
+  unsigned char* get_data = nullptr;
+  CHECK_CALL_SUCCESS(env->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&get_data)));
+  if (get_data != local_data) {
+    printf("Got different data from local storage then what was set!\n");
+    return -1;
+  }
+  CHECK_CALL_SUCCESS(env2->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&get_data)));
+  if (get_data != nullptr) {
+    printf("env2 did not have nullptr local storage.\n");
+    return -1;
+  }
+  CHECK_CALL_SUCCESS(env->Deallocate(local_data));
+  jint version = 0;
+  CHECK_CALL_SUCCESS(env->GetVersionNumber(&version));
+  if ((version & JVMTI_VERSION_1) != JVMTI_VERSION_1) {
+    printf("Unexpected version number!\n");
+    return -1;
+  }
+  CHECK_CALL_SUCCESS(env->DisposeEnvironment());
+  CHECK_CALL_SUCCESS(env2->DisposeEnvironment());
+#undef CHECK_CALL_SUCCESS
+  return JNI_OK;
+}
+
+
+}  // namespace Test901HelloTi
+}  // namespace art
diff --git a/test/901-hello-ti-agent/basics.h b/test/901-hello-ti-agent/basics.h
new file mode 100644
index 0000000..f482950
--- /dev/null
+++ b/test/901-hello-ti-agent/basics.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_TEST_901_HELLO_TI_AGENT_BASICS_H_
+#define ART_TEST_901_HELLO_TI_AGENT_BASICS_H_
+
+#include <jni.h>
+
+namespace art {
+namespace Test901HelloTi {
+
+jint OnLoad(JavaVM* vm, char* options, void* reserved);
+
+}  // namespace Test901HelloTi
+}  // namespace art
+
+#endif  // ART_TEST_901_HELLO_TI_AGENT_BASICS_H_
diff --git a/test/901-hello-ti-agent/build b/test/901-hello-ti-agent/build
new file mode 100755
index 0000000..898e2e5
--- /dev/null
+++ b/test/901-hello-ti-agent/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-build "$@" --experimental agents
diff --git a/test/901-hello-ti-agent/expected.txt b/test/901-hello-ti-agent/expected.txt
new file mode 100644
index 0000000..414eb3b
--- /dev/null
+++ b/test/901-hello-ti-agent/expected.txt
@@ -0,0 +1,2 @@
+Loaded Agent for test 901-hello-ti-agent
+Hello, world!
diff --git a/test/901-hello-ti-agent/info.txt b/test/901-hello-ti-agent/info.txt
new file mode 100644
index 0000000..875a5f6
--- /dev/null
+++ b/test/901-hello-ti-agent/info.txt
@@ -0,0 +1 @@
+Tests basic functions in the jvmti plugin.
diff --git a/test/901-hello-ti-agent/run b/test/901-hello-ti-agent/run
new file mode 100755
index 0000000..8079a8c
--- /dev/null
+++ b/test/901-hello-ti-agent/run
@@ -0,0 +1,27 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin=libopenjdkjvmtid.so
+agent=libtiagentd.so
+if  [[ "$@" == *"-O"* ]]; then
+  agent=libtiagent.so
+  plugin=libopenjdkjvmti.so
+fi
+
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --runtime-option -agentpath:${agent}=901-hello-ti-agent \
+                   --android-runtime-option -Xplugin:${plugin}
diff --git a/test/901-hello-ti-agent/src/Main.java b/test/901-hello-ti-agent/src/Main.java
new file mode 100644
index 0000000..1ef6289
--- /dev/null
+++ b/test/901-hello-ti-agent/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.out.println("Hello, world!");
+  }
+}
diff --git a/test/955-lambda-smali/expected.txt b/test/955-lambda-smali/expected.txt
deleted file mode 100644
index 16381e4..0000000
--- a/test/955-lambda-smali/expected.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-SanityCheck
-Hello world! (0-args, no closure)
-ABCD Hello world! (4-args, no closure)
-Caught NPE
-(BoxUnbox) Hello boxing world! (0-args, no closure)
-(BoxUnbox) Boxing repeatedly yields referentially-equal objects
-(BoxUnbox) Caught NPE for unbox-lambda
-(BoxUnbox) Caught NPE for box-lambda
-(BoxUnbox) Caught ClassCastException for unbox-lambda
-(MoveResult) testZ success
-(MoveResult) testB success
-(MoveResult) testS success
-(MoveResult) testI success
-(MoveResult) testC success
-(MoveResult) testJ success
-(MoveResult) testF success
-(MoveResult) testD success
-(MoveResult) testL success
-(CaptureVariables) (0-args, 1 captured variable 'Z'): value is true
-(CaptureVariables) (0-args, 1 captured variable 'B'): value is R
-(CaptureVariables) (0-args, 1 captured variable 'C'): value is ∂
-(CaptureVariables) (0-args, 1 captured variable 'S'): value is 1000
-(CaptureVariables) (0-args, 1 captured variable 'I'): value is 12345678
-(CaptureVariables) (0-args, 1 captured variable 'J'): value is 3287471278325742
-(CaptureVariables) (0-args, 1 captured variable 'F'): value is Infinity
-(CaptureVariables) (0-args, 1 captured variable 'D'): value is -Infinity
-(CaptureVariables) (0-args, 8 captured variable 'ZBCSIJFD'): value is true,R,∂,1000,12345678,3287471278325742,Infinity,-Infinity
-(CaptureVariables) Caught NPE
diff --git a/test/955-lambda-smali/info.txt b/test/955-lambda-smali/info.txt
deleted file mode 100644
index aed5e84..0000000
--- a/test/955-lambda-smali/info.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-Smali-based tests for experimental lambda intructions.
-
-Obviously needs to run under ART.
diff --git a/test/955-lambda-smali/run b/test/955-lambda-smali/run
deleted file mode 100755
index b754680..0000000
--- a/test/955-lambda-smali/run
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Ensure that the lambda experimental opcodes are turned on for dalvikvm and dex2oat
-${RUN} "$@" --runtime-option -Xexperimental:lambdas -Xcompiler-option --runtime-arg -Xcompiler-option -Xexperimental:lambdas
diff --git a/test/955-lambda-smali/smali/BoxUnbox.smali b/test/955-lambda-smali/smali/BoxUnbox.smali
deleted file mode 100644
index 915de2d..0000000
--- a/test/955-lambda-smali/smali/BoxUnbox.smali
+++ /dev/null
@@ -1,168 +0,0 @@
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LBoxUnbox;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-.registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public static run()V
-    .registers 0
-
-    invoke-static {}, LBoxUnbox;->testBox()V
-    invoke-static {}, LBoxUnbox;->testBoxEquality()V
-    invoke-static {}, LBoxUnbox;->testFailures()V
-    invoke-static {}, LBoxUnbox;->testFailures2()V
-    invoke-static {}, LBoxUnbox;->testFailures3()V
-    invoke-static {}, LBoxUnbox;->forceGC()V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of ArtMethod.
-.method public static doHelloWorld(J)V
-    .registers 4 # 1 wide parameters, 2 locals
-
-    const-string v0, "(BoxUnbox) Hello boxing world! (0-args, no closure)"
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-.end method
-
-# Test boxing and unboxing; the same lambda should be invoked as if there was no box.
-.method private static testBox()V
-    .registers 3
-
-    create-lambda v0, LBoxUnbox;->doHelloWorld(J)V
-    box-lambda v2, v0 # v2 = box(v0)
-    unbox-lambda v0, v2, J # v0 = unbox(v2)
-    invoke-lambda v0, {}
-
-    return-void
-.end method
-
-# Test that boxing the same lambda twice yield the same object.
-.method private static testBoxEquality()V
-   .registers 6 # 0 parameters, 6 locals
-
-    create-lambda v0, LBoxUnbox;->doHelloWorld(J)V
-    box-lambda v2, v0 # v2 = box(v0)
-    box-lambda v3, v0 # v3 = box(v0)
-
-    # The objects should be not-null, and they should have the same reference
-    if-eqz v2, :is_zero
-    if-ne v2, v3, :is_not_equal
-
-    const-string v4, "(BoxUnbox) Boxing repeatedly yields referentially-equal objects"
-    goto :end
-
-:is_zero
-    const-string v4, "(BoxUnbox) Boxing repeatedly FAILED: boxing returned null"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(BoxUnbox) Boxing repeatedly FAILED: objects were not same reference"
-    goto :end
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-.end method
-
-# Test exceptions are thrown as expected when used opcodes incorrectly
-.method private static testFailures()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const v0, 0  # v0 = null
-    const v1, 0  # v1 = null
-:start
-    unbox-lambda v2, v0, J
-    # attempting to unbox a null lambda will throw NPE
-:end
-    return-void
-
-:handler
-    const-string v2, "(BoxUnbox) Caught NPE for unbox-lambda"
-    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-
-    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
-.end method
-
-# Test exceptions are thrown as expected when used opcodes incorrectly
-.method private static testFailures2()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const v0, 0  # v0 = null
-    const v1, 0  # v1 = null
-:start
-    box-lambda v2, v0  # attempting to box a null lambda will throw NPE
-:end
-    return-void
-
-    # TODO: refactor testFailures using a goto
-
-:handler
-    const-string v2, "(BoxUnbox) Caught NPE for box-lambda"
-    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-
-    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
-.end method
-
-# Test exceptions are thrown as expected when used opcodes incorrectly
-.method private static testFailures3()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const-string v0, "This is not a boxed lambda"
-:start
-    # TODO: use \FunctionalType; here instead
-    unbox-lambda v2, v0, J
-    # can't use a string, expects a lambda object here. throws ClassCastException.
-:end
-    return-void
-
-    # TODO: refactor testFailures using a goto
-
-:handler
-    const-string v2, "(BoxUnbox) Caught ClassCastException for unbox-lambda"
-    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-
-    .catch Ljava/lang/ClassCastException; {:start .. :end} :handler
-.end method
-
-
-# Force a GC. Used to ensure our weak reference table of boxed lambdas is getting swept.
-.method private static forceGC()V
-    .registers 1
-    invoke-static {}, Ljava/lang/Runtime;->getRuntime()Ljava/lang/Runtime;
-    move-result-object v0
-    invoke-virtual {v0}, Ljava/lang/Runtime;->gc()V
-
-    return-void
-.end method
diff --git a/test/955-lambda-smali/smali/CaptureVariables.smali b/test/955-lambda-smali/smali/CaptureVariables.smali
deleted file mode 100644
index f18b7ff..0000000
--- a/test/955-lambda-smali/smali/CaptureVariables.smali
+++ /dev/null
@@ -1,311 +0,0 @@
-#
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LCaptureVariables;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-.registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public static run()V
-.registers 8
-    # Test boolean capture
-    const v2, 1           # v2 = true
-    capture-variable v2, "Z"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_Z(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test byte capture
-    const v2, 82       # v2 = 82, 'R'
-    capture-variable v2, "B"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_B(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test char capture
-    const v2, 0x2202       # v2 = 0x2202, '∂'
-    capture-variable v2, "C"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_C(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test short capture
-    const v2, 1000 # v2 = 1000
-    capture-variable v2, "S"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_S(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test int capture
-    const v2, 12345678
-    capture-variable v2, "I"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_I(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test long capture
-    const-wide v2, 0x0badf00dc0ffeeL # v2 = 3287471278325742
-    capture-variable v2, "J"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_J(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test float capture
-    const v2, infinityf
-    capture-variable v2, "F"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_F(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test double capture
-    const-wide v2, -infinity
-    capture-variable v2, "D"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_D(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    #TODO: capture objects and lambdas once we have support for it
-
-    # Test capturing multiple variables
-    invoke-static {}, LCaptureVariables;->testMultipleCaptures()V
-
-    # Test failures
-    invoke-static {}, LCaptureVariables;->testFailures()V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_Z(J)V
-    .registers 5 # 1 wide parameter, 3 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'Z'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "Z"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(Z)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_B(J)V
-    .registers 5 # 1 wide parameter, 3 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'B'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "B"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(C)V  # no println(B), use char instead.
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_C(J)V
-    .registers 5 # 1 wide parameter, 3 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'C'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "C"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(C)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_S(J)V
-    .registers 5 # 1 wide parameter, 3 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'S'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "S"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(I)V  # no println(S), use int instead
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_I(J)V
-    .registers 5 # 1 wide parameter, 3 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'I'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "I"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(I)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_J(J)V
-    .registers 6 # 1 wide parameter, 4 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'J'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "J"
-    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(J)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_F(J)V
-    .registers 5 # 1 parameter, 4 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'F'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "F"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(F)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_D(J)V
-    .registers 6 # 1 wide parameter, 4 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'D'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "D"
-    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(D)V
-
-    return-void
-.end method
-
-# Test capturing more than one variable.
-.method private static testMultipleCaptures()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const v2, 1           # v2 = true
-    capture-variable v2, "Z"
-
-    const v2, 82       # v2 = 82, 'R'
-    capture-variable v2, "B"
-
-    const v2, 0x2202       # v2 = 0x2202, '∂'
-    capture-variable v2, "C"
-
-    const v2, 1000 # v2 = 1000
-    capture-variable v2, "S"
-
-    const v2, 12345678
-    capture-variable v2, "I"
-
-    const-wide v2, 0x0badf00dc0ffeeL # v2 = 3287471278325742
-    capture-variable v2, "J"
-
-    const v2, infinityf
-    capture-variable v2, "F"
-
-    const-wide v2, -infinity
-    capture-variable v2, "D"
-
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_ZBCSIJFD(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_ZBCSIJFD(J)V
-    .registers 7 # 1 wide parameter, 5 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 8 captured variable 'ZBCSIJFD'): value is "
-    const-string v4, ","
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "Z"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(Z)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "B"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(C)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "C"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(C)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "S"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(I)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "I"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(I)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "J"
-    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->print(J)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "F"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(F)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "D"
-    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(D)V
-
-    return-void
-.end method
-
-# Test exceptions are thrown as expected when used opcodes incorrectly
-.method private static testFailures()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const v0, 0  # v0 = null
-    const v1, 0  # v1 = null
-:start
-    liberate-variable v0, v2, "Z" # invoking a null lambda shall raise an NPE
-:end
-    return-void
-
-:handler
-    const-string v2, "(CaptureVariables) Caught NPE"
-    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-
-    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
-.end method
diff --git a/test/955-lambda-smali/smali/Main.smali b/test/955-lambda-smali/smali/Main.smali
deleted file mode 100644
index 9892d61..0000000
--- a/test/955-lambda-smali/smali/Main.smali
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LMain;
-
-.super Ljava/lang/Object;
-
-.method public static main([Ljava/lang/String;)V
-    .registers 2
-
-    invoke-static {}, LSanityCheck;->run()I
-    invoke-static {}, LTrivialHelloWorld;->run()V
-    invoke-static {}, LBoxUnbox;->run()V
-    invoke-static {}, LMoveResult;->run()V
-    invoke-static {}, LCaptureVariables;->run()V
-
-# TODO: add tests when verification fails
-
-    return-void
-.end method
diff --git a/test/955-lambda-smali/smali/MoveResult.smali b/test/955-lambda-smali/smali/MoveResult.smali
deleted file mode 100644
index 52f7ba3..0000000
--- a/test/955-lambda-smali/smali/MoveResult.smali
+++ /dev/null
@@ -1,330 +0,0 @@
-#
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LMoveResult;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-.registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public static run()V
-.registers 8
-    invoke-static {}, LMoveResult;->testZ()V
-    invoke-static {}, LMoveResult;->testB()V
-    invoke-static {}, LMoveResult;->testS()V
-    invoke-static {}, LMoveResult;->testI()V
-    invoke-static {}, LMoveResult;->testC()V
-    invoke-static {}, LMoveResult;->testJ()V
-    invoke-static {}, LMoveResult;->testF()V
-    invoke-static {}, LMoveResult;->testD()V
-    invoke-static {}, LMoveResult;->testL()V
-
-    return-void
-.end method
-
-# Test that booleans are returned correctly via move-result.
-.method public static testZ()V
-    .registers 6
-
-    create-lambda v0, LMoveResult;->lambdaZ(J)Z
-    invoke-lambda v0, {}
-    move-result v2
-    const v3, 1
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testZ success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testZ failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testZ. Always returns "true".
-.method public static lambdaZ(J)Z
-    .registers 3
-
-    const v0, 1
-    return v0
-
-.end method
-
-# Test that bytes are returned correctly via move-result.
-.method public static testB()V
-    .registers 6
-
-    create-lambda v0, LMoveResult;->lambdaB(J)B
-    invoke-lambda v0, {}
-    move-result v2
-    const v3, 15
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testB success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testB failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testB. Always returns "15".
-.method public static lambdaB(J)B
-    .registers 3 # 1 parameters, 2 locals
-
-    const v0, 15
-    return v0
-
-.end method
-
-# Test that shorts are returned correctly via move-result.
-.method public static testS()V
-    .registers 6
-
-    create-lambda v0, LMoveResult;->lambdaS(J)S
-    invoke-lambda v0, {}
-    move-result v2
-    const/16 v3, 31000
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testS success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testS failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testS. Always returns "31000".
-.method public static lambdaS(J)S
-    .registers 3
-
-    const/16 v0, 31000
-    return v0
-
-.end method
-
-# Test that ints are returned correctly via move-result.
-.method public static testI()V
-    .registers 6
-
-    create-lambda v0, LMoveResult;->lambdaI(J)I
-    invoke-lambda v0, {}
-    move-result v2
-    const v3, 128000
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testI success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testI failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testI. Always returns "128000".
-.method public static lambdaI(J)I
-    .registers 3
-
-    const v0, 128000
-    return v0
-
-.end method
-
-# Test that chars are returned correctly via move-result.
-.method public static testC()V
-    .registers 7
-
-    create-lambda v0, LMoveResult;->lambdaC(J)C
-    invoke-lambda v0, {}
-    move-result v2
-    const v3, 65535
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testC success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testC failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testC. Always returns "65535".
-.method public static lambdaC(J)C
-    .registers 3
-
-    const v0, 65535
-    return v0
-
-.end method
-
-# Test that longs are returned correctly via move-result.
-.method public static testJ()V
-    .registers 9
-
-    create-lambda v0, LMoveResult;->lambdaJ(J)J
-    invoke-lambda v0, {}
-    move-result v2
-    const-wide v4, 0xdeadf00dc0ffeeL
-
-    if-ne v4, v2, :is_not_equal
-    const-string v6, "(MoveResult) testJ success"
-    goto :end
-
-:is_not_equal
-    const-string v6, "(MoveResult) testJ failed"
-
-:end
-    sget-object v7, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v7, v6}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testC. Always returns "0xdeadf00dc0ffeeL".
-.method public static lambdaJ(J)J
-    .registers 5
-
-    const-wide v0, 0xdeadf00dc0ffeeL
-    return-wide v0
-
-.end method
-
-# Test that floats are returned correctly via move-result.
-.method public static testF()V
-    .registers 6
-
-    create-lambda v0, LMoveResult;->lambdaF(J)F
-    invoke-lambda v0, {}
-    move-result v2
-    const v3, infinityf
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testF success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testF failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testF. Always returns "infinityf".
-.method public static lambdaF(J)F
-    .registers 4
-
-    const v0, infinityf
-    return v0
-
-.end method
-
-# Test that doubles are returned correctly via move-result.
-.method public static testD()V
-    .registers 8
-
-    create-lambda v0, LMoveResult;->lambdaD(J)D
-    invoke-lambda v0, {}
-    move-result-wide v2
-    const-wide v4, -infinity
-
-    if-ne v4, v2, :is_not_equal
-    const-string v6, "(MoveResult) testD success"
-    goto :end
-
-:is_not_equal
-    const-string v6, "(MoveResult) testD failed"
-
-:end
-    sget-object v7, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v7, v6}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testD. Always returns "infinity".
-.method public static lambdaD(J)D
-    .registers 5
-
-    const-wide v0, -infinity
-    return-wide v0
-
-.end method
-
-
-# Test that objects are returned correctly via move-result.
-.method public static testL()V
-    .registers 8
-
-    create-lambda v0, LMoveResult;->lambdaL(J)Ljava/lang/String;
-    invoke-lambda v0, {}
-    move-result-object v2
-    const-string v4, "Interned string"
-
-    # relies on string interning returning identical object references
-    if-ne v4, v2, :is_not_equal
-    const-string v6, "(MoveResult) testL success"
-    goto :end
-
-:is_not_equal
-    const-string v6, "(MoveResult) testL failed"
-
-:end
-    sget-object v7, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v7, v6}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testL. Always returns "Interned string" (string).
-.method public static lambdaL(J)Ljava/lang/String;
-    .registers 5
-
-    const-string v0, "Interned string"
-    return-object v0
-
-.end method
-
-
diff --git a/test/955-lambda-smali/smali/SanityCheck.smali b/test/955-lambda-smali/smali/SanityCheck.smali
deleted file mode 100644
index 4c807d7..0000000
--- a/test/955-lambda-smali/smali/SanityCheck.smali
+++ /dev/null
@@ -1,36 +0,0 @@
-#
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LSanityCheck;
-.super Ljava/lang/Object;
-
-
-.method public constructor <init>()V
-.registers 1
-   invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-   return-void
-.end method
-
-# This test is just here to make sure that we can at least execute basic non-lambda
-# functionality such as printing (when lambdas are enabled in the runtime).
-.method public static run()I
-# Don't use too many registers here to avoid hitting the Stack::SanityCheck frame<2KB assert
-.registers 3
-    const-string v0, "SanityCheck"
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    const v2, 123456
-    return v2
-.end method
diff --git a/test/955-lambda-smali/smali/TrivialHelloWorld.smali b/test/955-lambda-smali/smali/TrivialHelloWorld.smali
deleted file mode 100644
index 3444b13..0000000
--- a/test/955-lambda-smali/smali/TrivialHelloWorld.smali
+++ /dev/null
@@ -1,94 +0,0 @@
-#
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LTrivialHelloWorld;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-.registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public static run()V
-.registers 8
-    # Trivial 0-arg hello world
-    create-lambda v0, LTrivialHelloWorld;->doHelloWorld(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Slightly more interesting 4-arg hello world
-    create-lambda v2, doHelloWorldArgs(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
-    # TODO: create-lambda should not write to both v2 and v3
-    const-string v4, "A"
-    const-string v5, "B"
-    const-string v6, "C"
-    const-string v7, "D"
-    invoke-lambda v2, {v4, v5, v6, v7}
-
-    invoke-static {}, LTrivialHelloWorld;->testFailures()V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of jlong. 
-.method public static doHelloWorld(J)V
-    .registers 5 # 1 wide parameters, 3 locals
-
-    const-string v0, "Hello world! (0-args, no closure)"
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of jlong. 
-.method public static doHelloWorldArgs(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
-    .registers 9 # 1 wide parameter, 4 narrow parameters, 3 locals
-
-    const-string v0, " Hello world! (4-args, no closure)"
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-
-    invoke-virtual {v1, p2}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-    invoke-virtual {v1, p3}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-    invoke-virtual {v1, p4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-    invoke-virtual {v1, p5}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-.end method
-
-# Test exceptions are thrown as expected when used opcodes incorrectly
-.method private static testFailures()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const v0, 0  # v0 = null
-    const v1, 0  # v1 = null
-:start
-    invoke-lambda v0, {}  # invoking a null lambda shall raise an NPE
-:end
-    return-void
-
-:handler
-    const-string v2, "Caught NPE"
-    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-
-    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
-.end method
diff --git a/test/960-default-smali/build b/test/960-default-smali/build
index c786687..e8f4ed0 100755
--- a/test/960-default-smali/build
+++ b/test/960-default-smali/build
@@ -17,17 +17,14 @@
 # make us exit on a failure
 set -e
 
-# Generate the smali Main.smali file or fail
-./util-src/generate_smali.py ./smali
-
-if [[ $@ == *"--jvm"* ]]; then
-  # Build the Java files if we are running a --jvm test
-  mkdir -p src
-  mkdir -p classes
-  ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
-  ${JAVAC} -implicit:none -d classes $(find src -name '*.java')
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm
+  # Hard-wired use of experimental jack.
+  # TODO: fix this temporary work-around for default-methods, see b/19467889
+  export USE_JACK=true
 fi
 
-# Build the smali files and make a dex
-${SMALI} -JXmx256m --experimental --api-level 23 --output classes.dex $(find smali -name '*.smali')
-zip "$TEST_NAME.jar" classes.dex
+# Generate the Main.java file or fail
+${ANDROID_BUILD_TOP}/art/test/utils/python/generate_java_main.py ./src
+
+./default-build "$@" --experimental default-methods
diff --git a/test/960-default-smali/expected.txt b/test/960-default-smali/expected.txt
index 7671eed..f3db93f 100644
--- a/test/960-default-smali/expected.txt
+++ b/test/960-default-smali/expected.txt
@@ -82,3 +82,19 @@
 J-interface   Greeter.SayHiTwice()='Hi Hi '
 J-virtual           J.SayHiTwice()='Hi Hi '
 End testing for type J
+Testing for type K
+K-interface       Foo.bar()='foobar'
+K-virtual           K.bar()='foobar'
+End testing for type K
+Testing for type L
+L-interface       Foo.bar()='foobar'
+L-virtual           K.bar()='foobar'
+L-virtual           L.bar()='foobar'
+End testing for type L
+Testing for type M
+M-interface       Foo.bar()='BAZ!'
+M-interface     Fooer.bar()='BAZ!'
+M-virtual           K.bar()='BAZ!'
+M-virtual           L.bar()='BAZ!'
+M-virtual           M.bar()='BAZ!'
+End testing for type M
diff --git a/test/960-default-smali/info.txt b/test/960-default-smali/info.txt
index eb596e2..9583abb 100644
--- a/test/960-default-smali/info.txt
+++ b/test/960-default-smali/info.txt
@@ -2,15 +2,16 @@
 
 Obviously needs to run under ART or a Java 8 Language runtime and compiler.
 
-When run a Main.smali file will be generated by the util-src/generate_smali.py
-script. If we run with --jvm we will use the tools/extract-embedded-java script to
-turn the smali into equivalent Java using the embedded Java code.
+When run a Main.smali file will be generated by the
+test/utils/python/generate_smali_main.py script. If we run with --jvm we will
+use the tools/extract-embedded-java script to turn the smali into equivalent
+Java using the embedded Java code.
 
 When updating be sure to write the equivalent Java code in comments of the smali
 files.
 
-Care should be taken when updating the generate_smali.py script. It must always
-return equivalent output when run multiple times.
+Care should be taken when updating the generate_smali_main.py script. It must
+always return equivalent output when run multiple times.
 
 To update the test files do the following steps:
     <Add new classes/interfaces>
diff --git a/test/960-default-smali/run b/test/960-default-smali/run
deleted file mode 100755
index e378b06..0000000
--- a/test/960-default-smali/run
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-if echo $@ | grep -q -- "--jvm"; then
-  ${RUN} "$@"
-else
-  ${RUN} "$@" --runtime-option -Xexperimental:default-methods -Xcompiler-option --runtime-arg -Xcompiler-option -Xexperimental:default-methods
-fi
diff --git a/test/960-default-smali/smali/A.smali b/test/960-default-smali/smali/A.smali
deleted file mode 100644
index e755612..0000000
--- a/test/960-default-smali/smali/A.smali
+++ /dev/null
@@ -1,38 +0,0 @@
-# /*
-#  * Copyright 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-
-.class public LA;
-.super Ljava/lang/Object;
-.implements LGreeter;
-
-# class A implements Greeter {
-#     public String SayHi() {
-#         return "Hi ";
-#     }
-# }
-
-.method public constructor <init>()V
-    .registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public SayHi()Ljava/lang/String;
-    .registers 1
-
-    const-string v0, "Hi "
-    return-object v0
-.end method
diff --git a/test/960-default-smali/smali/Attendant.smali b/test/960-default-smali/smali/Attendant.smali
deleted file mode 100644
index ab63aee..0000000
--- a/test/960-default-smali/smali/Attendant.smali
+++ /dev/null
@@ -1,53 +0,0 @@
-# /*
-#  * Copyright 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-
-.class public abstract interface LAttendant;
-.super Ljava/lang/Object;
-
-# public interface Attendant {
-#     public default String SayHi() {
-#         return "welcome to " + GetPlace();
-#     }
-#     public default String SayHiTwice() {
-#         return SayHi() + SayHi();
-#     }
-#
-#     public String GetPlace();
-# }
-
-.method public SayHi()Ljava/lang/String;
-    .locals 2
-    const-string v0, "welcome to "
-    invoke-interface {p0}, LAttendant;->GetPlace()Ljava/lang/String;
-    move-result-object v1
-    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
-    move-result-object v0
-    return-object v0
-.end method
-
-.method public SayHiTwice()Ljava/lang/String;
-    .locals 2
-    invoke-interface {p0}, LAttendant;->SayHi()Ljava/lang/String;
-    move-result-object v0
-    invoke-interface {p0}, LAttendant;->SayHi()Ljava/lang/String;
-    move-result-object v1
-    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
-    move-result-object v0
-    return-object v0
-.end method
-
-.method public abstract GetPlace()Ljava/lang/String;
-.end method
diff --git a/test/960-default-smali/smali/B.smali b/test/960-default-smali/smali/B.smali
deleted file mode 100644
index d847dd1..0000000
--- a/test/960-default-smali/smali/B.smali
+++ /dev/null
@@ -1,38 +0,0 @@
-# /*
-#  * Copyright 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-
-.class public LB;
-.super Ljava/lang/Object;
-.implements LGreeter2;
-
-# class B implements Greeter2 {
-#     public String SayHi() {
-#         return "Hello ";
-#     }
-# }
-
-.method public constructor <init>()V
-    .registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public SayHi()Ljava/lang/String;
-    .registers 1
-
-    const-string v0, "Hello "
-    return-object v0
-.end method
diff --git a/test/960-default-smali/smali/C.smali b/test/960-default-smali/smali/C.smali
deleted file mode 100644
index 08a8508..0000000
--- a/test/960-default-smali/smali/C.smali
+++ /dev/null
@@ -1,37 +0,0 @@
-# /*
-#  * Copyright 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-
-.class public LC;
-.super LA;
-
-# class C extends A {
-#     public String SayHiTwice() {
-#         return "You don't control me";
-#     }
-# }
-
-.method public constructor <init>()V
-    .registers 1
-    invoke-direct {p0}, LA;-><init>()V
-    return-void
-.end method
-
-.method public SayHiTwice()Ljava/lang/String;
-    .registers 1
-
-    const-string v0, "You don't control me"
-    return-object v0
-.end method
diff --git a/test/960-default-smali/smali/D.smali b/test/960-default-smali/smali/D.smali
deleted file mode 100644
index 32f3b7e..0000000
--- a/test/960-default-smali/smali/D.smali
+++ /dev/null
@@ -1,38 +0,0 @@
-# /*
-#  * Copyright 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-
-.class public LD;
-.super Ljava/lang/Object;
-.implements LGreeter3;
-
-# class D implements Greeter3 {
-#     public String GetName() {
-#         return "Alex ";
-#     }
-# }
-
-.method public constructor <init>()V
-    .registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public GetName()Ljava/lang/String;
-    .registers 1
-
-    const-string v0, "Alex "
-    return-object v0
-.end method
diff --git a/test/960-default-smali/smali/E.smali b/test/960-default-smali/smali/E.smali
deleted file mode 100644
index bae6250..0000000
--- a/test/960-default-smali/smali/E.smali
+++ /dev/null
@@ -1,38 +0,0 @@
-# /*
-#  * Copyright 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-
-.class public LE;
-.super LA;
-.implements LGreeter2;
-
-# class E extends A implements Greeter2 {
-#     public String SayHi() {
-#         return "Hi2 ";
-#     }
-# }
-
-.method public constructor <init>()V
-    .registers 1
-    invoke-direct {p0}, LA;-><init>()V
-    return-void
-.end method
-
-.method public SayHi()Ljava/lang/String;
-    .registers 1
-
-    const-string v0, "Hi2 "
-    return-object v0
-.end method
diff --git a/test/960-default-smali/smali/Extension.smali b/test/960-default-smali/smali/Extension.smali
deleted file mode 100644
index 60ffa26..0000000
--- a/test/960-default-smali/smali/Extension.smali
+++ /dev/null
@@ -1,30 +0,0 @@
-# /*
-#  * Copyright 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-
-.class public abstract interface LExtension;
-.super Ljava/lang/Object;
-
-# public interface Extension {
-#     public default String SayHi() {
-#         return "welcome ";
-#     }
-# }
-
-.method public SayHi()Ljava/lang/String;
-    .locals 1
-    const-string v0, "welcome "
-    return-object v0
-.end method
diff --git a/test/960-default-smali/smali/F.smali b/test/960-default-smali/smali/F.smali
deleted file mode 100644
index 3eaa089..0000000
--- a/test/960-default-smali/smali/F.smali
+++ /dev/null
@@ -1,47 +0,0 @@
-# /*
-#  * Copyright 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-
-.class public LF;
-.super LA;
-.implements LAttendant;
-
-# class F extends A implements Attendant {
-#     public String GetPlace() {
-#         return "android";
-#     }
-#     public String SayHiTwice() {
-#         return "We can override both interfaces";
-#     }
-# }
-
-.method public constructor <init>()V
-    .registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public SayHiTwice()Ljava/lang/String;
-    .registers 1
-
-    const-string v0, "We can override both interfaces"
-    return-object v0
-.end method
-
-.method public GetPlace()Ljava/lang/String;
-    .registers 1
-    const-string v0, "android"
-    return-object v0
-.end method
diff --git a/test/960-default-smali/smali/G.smali b/test/960-default-smali/smali/G.smali
deleted file mode 100644
index 446f2a4..0000000
--- a/test/960-default-smali/smali/G.smali
+++ /dev/null
@@ -1,37 +0,0 @@
-# /*
-#  * Copyright 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-
-.class public LG;
-.super Ljava/lang/Object;
-.implements LAttendant;
-
-# class G implements Attendant {
-#     public String GetPlace() {
-#         return "android";
-#     }
-# }
-
-.method public constructor <init>()V
-    .registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public GetPlace()Ljava/lang/String;
-    .registers 1
-    const-string v0, "android"
-    return-object v0
-.end method
diff --git a/test/960-default-smali/smali/Greeter.smali b/test/960-default-smali/smali/Greeter.smali
deleted file mode 100644
index 28530ff..0000000
--- a/test/960-default-smali/smali/Greeter.smali
+++ /dev/null
@@ -1,40 +0,0 @@
-# /*
-#  * Copyright 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-
-.class public abstract interface LGreeter;
-.super Ljava/lang/Object;
-
-# public interface Greeter {
-#     public String SayHi();
-#
-#     public default String SayHiTwice() {
-#         return SayHi() + SayHi();
-#     }
-# }
-
-.method public abstract SayHi()Ljava/lang/String;
-.end method
-
-.method public SayHiTwice()Ljava/lang/String;
-    .locals 2
-    invoke-interface {p0}, LGreeter;->SayHi()Ljava/lang/String;
-    move-result-object v0
-    invoke-interface {p0}, LGreeter;->SayHi()Ljava/lang/String;
-    move-result-object v1
-    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
-    move-result-object v0
-    return-object v0
-.end method
diff --git a/test/960-default-smali/smali/Greeter2.smali b/test/960-default-smali/smali/Greeter2.smali
deleted file mode 100644
index ace1798..0000000
--- a/test/960-default-smali/smali/Greeter2.smali
+++ /dev/null
@@ -1,39 +0,0 @@
-# /*
-#  * Copyright 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-
-.class public abstract interface LGreeter2;
-.super Ljava/lang/Object;
-.implements LGreeter;
-
-# public interface Greeter2 extends Greeter {
-#     public default String SayHiTwice() {
-#         return "I say " + SayHi() + SayHi();
-#     }
-# }
-
-.method public SayHiTwice()Ljava/lang/String;
-    .locals 3
-    const-string v0, "I say "
-    invoke-interface {p0}, LGreeter;->SayHi()Ljava/lang/String;
-    move-result-object v1
-    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
-    move-result-object v0
-    invoke-interface {p0}, LGreeter;->SayHi()Ljava/lang/String;
-    move-result-object v1
-    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
-    move-result-object v0
-    return-object v0
-.end method
diff --git a/test/960-default-smali/smali/Greeter3.smali b/test/960-default-smali/smali/Greeter3.smali
deleted file mode 100644
index 31fc2e7..0000000
--- a/test/960-default-smali/smali/Greeter3.smali
+++ /dev/null
@@ -1,40 +0,0 @@
-# /*
-#  * Copyright 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-
-.class public abstract interface LGreeter3;
-.super Ljava/lang/Object;
-.implements LGreeter;
-
-# public interface Greeter3 extends Greeter {
-#     public String GetName();
-#
-#     public default String SayHi() {
-#         return "Hello " + GetName();
-#     }
-# }
-
-.method public abstract GetName()Ljava/lang/String;
-.end method
-
-.method public SayHi()Ljava/lang/String;
-    .locals 2
-    const-string v0, "Hello "
-    invoke-interface {p0}, LGreeter3;->GetName()Ljava/lang/String;
-    move-result-object v1
-    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
-    move-result-object v0
-    return-object v0
-.end method
diff --git a/test/960-default-smali/smali/H.smali b/test/960-default-smali/smali/H.smali
deleted file mode 100644
index 82065ea..0000000
--- a/test/960-default-smali/smali/H.smali
+++ /dev/null
@@ -1,28 +0,0 @@
-# /*
-#  * Copyright 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-
-.class public LH;
-.super Ljava/lang/Object;
-.implements LExtension;
-
-# class H implements Extension {
-# }
-
-.method public constructor <init>()V
-    .registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
diff --git a/test/960-default-smali/smali/I.smali b/test/960-default-smali/smali/I.smali
deleted file mode 100644
index 72fb58a..0000000
--- a/test/960-default-smali/smali/I.smali
+++ /dev/null
@@ -1,28 +0,0 @@
-# /*
-#  * Copyright 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-
-.class public LI;
-.super LA;
-.implements LGreeter2;
-
-# class I extends A implements Greeter2 {
-# }
-
-.method public constructor <init>()V
-    .registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
diff --git a/test/960-default-smali/smali/J.smali b/test/960-default-smali/smali/J.smali
deleted file mode 100644
index 93f3d62..0000000
--- a/test/960-default-smali/smali/J.smali
+++ /dev/null
@@ -1,29 +0,0 @@
-# /*
-#  * Copyright 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-
-.class public LJ;
-.super LA;
-
-# class J extends A {
-# }
-
-
-.method public constructor <init>()V
-    .registers 1
-    invoke-direct {p0}, LA;-><init>()V
-    return-void
-.end method
-
diff --git a/test/960-default-smali/smali/classes.xml b/test/960-default-smali/smali/classes.xml
deleted file mode 100644
index 0aa41f7..0000000
--- a/test/960-default-smali/smali/classes.xml
+++ /dev/null
@@ -1,127 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!-- Copyright 2015 The Android Open Source Project
-
-     Licensed under the Apache License, Version 2.0 (the "License");
-     you may not use this file except in compliance with the License.
-     You may obtain a copy of the License at
-
-          http://www.apache.org/licenses/LICENSE-2.0
-
-     Unless required by applicable law or agreed to in writing, software
-     distributed under the License is distributed on an "AS IS" BASIS,
-     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     See the License for the specific language governing permissions and
-     limitations under the License.
--->
-
-<data>
-  <classes>
-    <class name="A" super="java/lang/Object">
-      <implements>
-        <item>Greeter</item>
-      </implements>
-      <methods> </methods>
-    </class>
-
-    <class name="B" super="java/lang/Object">
-      <implements>
-        <item>Greeter2</item>
-      </implements>
-      <methods> </methods>
-    </class>
-
-    <class name="C" super="A">
-      <implements> </implements>
-      <methods> </methods>
-    </class>
-
-    <class name="D" super="java/lang/Object">
-      <implements>
-        <item>Greeter3</item>
-      </implements>
-      <methods> </methods>
-    </class>
-
-    <class name="E" super="A">
-      <implements>
-        <item>Greeter2</item>
-      </implements>
-      <methods> </methods>
-    </class>
-
-    <class name="F" super="A">
-      <implements>
-        <item>Attendant</item>
-      </implements>
-      <methods> </methods>
-    </class>
-
-    <class name="G" super="java/lang/Object">
-      <implements>
-        <item>Attendant</item>
-      </implements>
-      <methods> </methods>
-    </class>
-
-    <class name="H" super="java/lang/Object">
-      <implements>
-        <item>Extension</item>
-      </implements>
-      <methods> </methods>
-    </class>
-
-    <class name="I" super="A">
-      <implements>
-        <item>Greeter2</item>
-      </implements>
-      <methods> </methods>
-    </class>
-
-    <class name="J" super="A">
-      <implements> </implements>
-      <methods> </methods>
-    </class>
-  </classes>
-
-  <interfaces>
-    <interface name="Extension" super="java/lang/Object">
-      <implements> </implements>
-      <methods>
-        <method type="default">SayHi</method>
-      </methods>
-    </interface>
-
-    <interface name="Greeter" super="java/lang/Object">
-      <implements> </implements>
-      <methods>
-        <method type="abstract">SayHi</method>
-        <method type="default">SayHiTwice</method>
-      </methods>
-    </interface>
-
-    <interface name="Greeter2" super="java/lang/Object">
-      <implements>
-        <item>Greeter</item>
-      </implements>
-      <methods> </methods>
-    </interface>
-
-    <interface name="Greeter3" super="java/lang/Object">
-      <implements>
-        <item>Greeter</item>
-      </implements>
-      <methods>
-        <method type="abstract">GetName</method>
-      </methods>
-    </interface>
-
-    <interface name="Attendant" super="java/lang/Object">
-      <implements> </implements>
-      <methods>
-        <method type="default">SayHi</method>
-        <method type="default">SayHiTwice</method>
-        <method type="abstract">GetPlace</method>
-      </methods>
-    </interface>
-  </interfaces>
-</data>
diff --git a/test/960-default-smali/src/A.java b/test/960-default-smali/src/A.java
new file mode 100644
index 0000000..7664a26
--- /dev/null
+++ b/test/960-default-smali/src/A.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class A implements Greeter {
+  public String SayHi() {
+    return "Hi ";
+  }
+}
diff --git a/test/960-default-smali/src/Attendant.java b/test/960-default-smali/src/Attendant.java
new file mode 100644
index 0000000..9f9a58a
--- /dev/null
+++ b/test/960-default-smali/src/Attendant.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Attendant {
+  public default String SayHi() {
+    return "welcome to " + GetPlace();
+  }
+  public default String SayHiTwice() {
+    return SayHi() + SayHi();
+  }
+  public String GetPlace();
+}
diff --git a/test/960-default-smali/src/B.java b/test/960-default-smali/src/B.java
new file mode 100644
index 0000000..18aaade
--- /dev/null
+++ b/test/960-default-smali/src/B.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class B implements Greeter2 {
+  public String SayHi() {
+    return "Hello ";
+  }
+}
diff --git a/test/960-default-smali/src/C.java b/test/960-default-smali/src/C.java
new file mode 100644
index 0000000..f0bc185
--- /dev/null
+++ b/test/960-default-smali/src/C.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class C extends A {
+  public String SayHiTwice() {
+    return "You don't control me";
+  }
+}
diff --git a/test/960-default-smali/src/D.java b/test/960-default-smali/src/D.java
new file mode 100644
index 0000000..b1697cd
--- /dev/null
+++ b/test/960-default-smali/src/D.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class D implements Greeter3 {
+  public String GetName() {
+    return "Alex ";
+  }
+}
diff --git a/test/960-default-smali/src/E.java b/test/960-default-smali/src/E.java
new file mode 100644
index 0000000..477cb67
--- /dev/null
+++ b/test/960-default-smali/src/E.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class E extends A implements Greeter2 {
+  public String SayHi() {
+    return "Hi2 ";
+  }
+}
diff --git a/test/960-default-smali/src/Extension.java b/test/960-default-smali/src/Extension.java
new file mode 100644
index 0000000..89617dd
--- /dev/null
+++ b/test/960-default-smali/src/Extension.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Extension {
+  public default String SayHi() {
+    return "welcome ";
+  }
+}
diff --git a/test/960-default-smali/src/F.java b/test/960-default-smali/src/F.java
new file mode 100644
index 0000000..0282de7
--- /dev/null
+++ b/test/960-default-smali/src/F.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class F extends A implements Attendant {
+  public String GetPlace() {
+    return "android";
+  }
+  public String SayHiTwice() {
+    return "We can override both interfaces";
+  }
+}
diff --git a/test/960-default-smali/src/Foo.java b/test/960-default-smali/src/Foo.java
new file mode 100644
index 0000000..ed5b35f
--- /dev/null
+++ b/test/960-default-smali/src/Foo.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+interface Foo {
+  public default String bar() {
+    return "foobar";
+  }
+}
diff --git a/test/960-default-smali/src/Fooer.java b/test/960-default-smali/src/Fooer.java
new file mode 100644
index 0000000..d8a5f61
--- /dev/null
+++ b/test/960-default-smali/src/Fooer.java
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+interface Fooer extends Foo {
+  public String bar();
+}
diff --git a/test/960-default-smali/src/G.java b/test/960-default-smali/src/G.java
new file mode 100644
index 0000000..86a140a
--- /dev/null
+++ b/test/960-default-smali/src/G.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class G implements Attendant {
+  public String GetPlace() {
+    return "android";
+  }
+}
diff --git a/test/960-default-smali/src/Greeter.java b/test/960-default-smali/src/Greeter.java
new file mode 100644
index 0000000..cee2283
--- /dev/null
+++ b/test/960-default-smali/src/Greeter.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Greeter {
+  public String SayHi();
+  public default String SayHiTwice() {
+    return SayHi() + SayHi();
+  }
+}
diff --git a/test/960-default-smali/src/Greeter2.java b/test/960-default-smali/src/Greeter2.java
new file mode 100644
index 0000000..07f6c53
--- /dev/null
+++ b/test/960-default-smali/src/Greeter2.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Greeter2 extends Greeter {
+  public default String SayHiTwice() {
+    return "I say " + SayHi() + SayHi();
+  }
+}
diff --git a/test/960-default-smali/src/Greeter3.java b/test/960-default-smali/src/Greeter3.java
new file mode 100644
index 0000000..bbb7171
--- /dev/null
+++ b/test/960-default-smali/src/Greeter3.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Greeter3 extends Greeter {
+  public String GetName();
+  public default String SayHi() {
+    return "Hello " + GetName();
+  }
+}
diff --git a/test/960-default-smali/src/H.java b/test/960-default-smali/src/H.java
new file mode 100644
index 0000000..d87a6db
--- /dev/null
+++ b/test/960-default-smali/src/H.java
@@ -0,0 +1,16 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class H implements Extension { }
diff --git a/test/960-default-smali/src/I.java b/test/960-default-smali/src/I.java
new file mode 100644
index 0000000..8d6779c
--- /dev/null
+++ b/test/960-default-smali/src/I.java
@@ -0,0 +1,16 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class I extends A implements Greeter2 { }
diff --git a/test/960-default-smali/src/J.java b/test/960-default-smali/src/J.java
new file mode 100644
index 0000000..a365e40
--- /dev/null
+++ b/test/960-default-smali/src/J.java
@@ -0,0 +1,16 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class J extends A { }
diff --git a/test/960-default-smali/src/K.java b/test/960-default-smali/src/K.java
new file mode 100644
index 0000000..4426be7
--- /dev/null
+++ b/test/960-default-smali/src/K.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class K implements Foo { }
diff --git a/test/960-default-smali/src/L.java b/test/960-default-smali/src/L.java
new file mode 100644
index 0000000..c08ab72
--- /dev/null
+++ b/test/960-default-smali/src/L.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class L extends K { }
diff --git a/test/960-default-smali/src/M.java b/test/960-default-smali/src/M.java
new file mode 100644
index 0000000..affe7e9
--- /dev/null
+++ b/test/960-default-smali/src/M.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class M extends L implements Fooer {
+  public String bar() {
+    return "BAZ!";
+  }
+}
diff --git a/test/960-default-smali/src/classes.xml b/test/960-default-smali/src/classes.xml
new file mode 100644
index 0000000..f3e50c5
--- /dev/null
+++ b/test/960-default-smali/src/classes.xml
@@ -0,0 +1,165 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Copyright 2015 The Android Open Source Project
+
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+          http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS,
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+-->
+
+<data>
+  <classes>
+    <class name="A" super="java/lang/Object">
+      <implements>
+        <item>Greeter</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="B" super="java/lang/Object">
+      <implements>
+        <item>Greeter2</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="C" super="A">
+      <implements> </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="D" super="java/lang/Object">
+      <implements>
+        <item>Greeter3</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="E" super="A">
+      <implements>
+        <item>Greeter2</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="F" super="A">
+      <implements>
+        <item>Attendant</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="G" super="java/lang/Object">
+      <implements>
+        <item>Attendant</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="H" super="java/lang/Object">
+      <implements>
+        <item>Extension</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="I" super="A">
+      <implements>
+        <item>Greeter2</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="J" super="A">
+      <implements> </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="K" super="java/lang/Object">
+      <implements>
+        <item>Foo</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="L" super="K">
+      <implements> </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="M" super="L">
+      <implements>
+        <item>Fooer</item>
+      </implements>
+      <methods>
+        <method>bar</method>
+      </methods>
+    </class>
+  </classes>
+
+  <interfaces>
+    <interface name="Extension" super="java/lang/Object">
+      <implements> </implements>
+      <methods>
+        <method type="default">SayHi</method>
+      </methods>
+    </interface>
+
+    <interface name="Greeter" super="java/lang/Object">
+      <implements> </implements>
+      <methods>
+        <method type="abstract">SayHi</method>
+        <method type="default">SayHiTwice</method>
+      </methods>
+    </interface>
+
+    <interface name="Greeter2" super="java/lang/Object">
+      <implements>
+        <item>Greeter</item>
+      </implements>
+      <methods> </methods>
+    </interface>
+
+    <interface name="Greeter3" super="java/lang/Object">
+      <implements>
+        <item>Greeter</item>
+      </implements>
+      <methods>
+        <method type="abstract">GetName</method>
+      </methods>
+    </interface>
+
+    <interface name="Attendant" super="java/lang/Object">
+      <implements> </implements>
+      <methods>
+        <method type="default">SayHi</method>
+        <method type="default">SayHiTwice</method>
+        <method type="abstract">GetPlace</method>
+      </methods>
+    </interface>
+
+    <interface name="Foo" super="java/lang/Object">
+      <implements>
+      </implements>
+      <methods>
+        <method type="default">bar</method>
+      </methods>
+    </interface>
+
+    <interface name="Fooer" super="java/lang/Object">
+      <implements>
+        <item>Foo</item>
+      </implements>
+      <methods>
+        <method type="abstract">bar</method>
+      </methods>
+    </interface>
+  </interfaces>
+</data>
diff --git a/test/960-default-smali/util-src/generate_smali.py b/test/960-default-smali/util-src/generate_smali.py
deleted file mode 100755
index b2bf1f0..0000000
--- a/test/960-default-smali/util-src/generate_smali.py
+++ /dev/null
@@ -1,376 +0,0 @@
-#!/usr/bin/python3
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Generate Smali Main file for test 960
-"""
-
-import os
-import sys
-from pathlib import Path
-
-BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
-if BUILD_TOP is None:
-  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
-  sys.exit(1)
-
-# Allow us to import utils and mixins.
-sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
-
-from testgen.utils import get_copyright
-import testgen.mixins as mixins
-
-from collections import namedtuple
-import itertools
-import functools
-import xml.etree.ElementTree as ET
-
-class MainClass(mixins.DumpMixin, mixins.Named, mixins.SmaliFileMixin):
-  """
-  A mainclass and main method for this test.
-  """
-
-  MAIN_CLASS_TEMPLATE = """{copyright}
-.class public LMain;
-.super Ljava/lang/Object;
-
-# class Main {{
-
-.method public constructor <init>()V
-    .registers 1
-    invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-{test_groups}
-
-{test_funcs}
-
-{main_func}
-
-# }}
-"""
-
-  MAIN_FUNCTION_TEMPLATE = """
-#   public static void main(String[] args) {{
-.method public static main([Ljava/lang/String;)V
-    .locals 2
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-
-    {test_group_invoke}
-
-    return-void
-.end method
-#   }}
-"""
-
-  TEST_GROUP_INVOKE_TEMPLATE = """
-#     {test_name}();
-    invoke-static {{}}, {test_name}()V
-"""
-
-  def __init__(self):
-    """
-    Initialize this MainClass
-    """
-    self.tests = set()
-    self.global_funcs = set()
-
-  def add_instance(self, it):
-    """
-    Add an instance test for the given class
-    """
-    self.tests.add(it)
-
-  def add_func(self, f):
-    """
-    Add a function to the class
-    """
-    self.global_funcs.add(f)
-
-  def get_name(self):
-    """
-    Get the name of this class
-    """
-    return "Main"
-
-  def __str__(self):
-    """
-    Print this class
-    """
-    all_tests = sorted(self.tests)
-    test_invoke = ""
-    test_groups = ""
-    for t in all_tests:
-      test_groups += str(t)
-    for t in sorted(all_tests):
-      test_invoke += self.TEST_GROUP_INVOKE_TEMPLATE.format(test_name=t.get_name())
-    main_func = self.MAIN_FUNCTION_TEMPLATE.format(test_group_invoke=test_invoke)
-
-    funcs = ""
-    for f in self.global_funcs:
-      funcs += str(f)
-    return self.MAIN_CLASS_TEMPLATE.format(copyright = get_copyright('smali'),
-                                           test_groups=test_groups,
-                                           main_func=main_func, test_funcs=funcs)
-
-
-class InstanceTest(mixins.Named, mixins.NameComparableMixin):
-  """
-  A method that runs tests for a particular concrete type, It calls the test
-  cases for running it in all possible ways.
-  """
-
-  INSTANCE_TEST_TEMPLATE = """
-#   public static void {test_name}() {{
-#     System.out.println("Testing for type {ty}");
-#     String s = "{ty}";
-#     {ty} v = new {ty}();
-.method public static {test_name}()V
-    .locals 3
-    sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    const-string v0, "Testing for type {ty}"
-    invoke-virtual {{v2,v0}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
-
-    const-string v0, "{ty}"
-    new-instance v1, L{ty};
-    invoke-direct {{v1}}, L{ty};-><init>()V
-
-    {invokes}
-
-    const-string v0, "End testing for type {ty}"
-    invoke-virtual {{v2,v0}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
-    return-void
-.end method
-#     System.out.println("End testing for type {ty}");
-#   }}
-"""
-
-  TEST_INVOKE_TEMPLATE = """
-#     {fname}(s, v);
-    invoke-static {{v0, v1}}, {fname}(Ljava/lang/String;L{farg};)V
-"""
-
-  def __init__(self, main, ty):
-    """
-    Initialize this test group for the given type
-    """
-    self.ty = ty
-    self.main = main
-    self.funcs = set()
-    self.main.add_instance(self)
-
-  def get_name(self):
-    """
-    Get the name of this test group
-    """
-    return "TEST_NAME_"+self.ty
-
-  def add_func(self, f):
-    """
-    Add a test function to this test group
-    """
-    self.main.add_func(f)
-    self.funcs.add(f)
-
-  def __str__(self):
-    """
-    Returns the smali code for this function
-    """
-    func_invokes = ""
-    for f in sorted(self.funcs, key=lambda a: (a.func, a.farg)):
-      func_invokes += self.TEST_INVOKE_TEMPLATE.format(fname=f.get_name(),
-                                                       farg=f.farg)
-
-    return self.INSTANCE_TEST_TEMPLATE.format(test_name=self.get_name(), ty=self.ty,
-                                              invokes=func_invokes)
-
-class Func(mixins.Named, mixins.NameComparableMixin):
-  """
-  A single test case that attempts to invoke a function on receiver of a given type.
-  """
-
-  TEST_FUNCTION_TEMPLATE = """
-#   public static void {fname}(String s, {farg} v) {{
-#     try {{
-#       System.out.printf("%s-{invoke_type:<9} {farg:>9}.{callfunc}()='%s'\\n", s, v.{callfunc}());
-#       return;
-#     }} catch (Error e) {{
-#       System.out.printf("%s-{invoke_type} on {farg}: {callfunc}() threw exception!\\n", s);
-#       e.printStackTrace(System.out);
-#     }}
-#   }}
-.method public static {fname}(Ljava/lang/String;L{farg};)V
-    .locals 7
-    :call_{fname}_try_start
-      const/4 v0, 2
-      new-array v1,v0, [Ljava/lang/Object;
-      const/4 v0, 0
-      aput-object p0,v1,v0
-
-      sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
-      const-string v3, "%s-{invoke_type:<9} {farg:>9}.{callfunc}()='%s'\\n"
-
-      invoke-{invoke_type} {{p1}}, L{farg};->{callfunc}()Ljava/lang/String;
-      move-result-object v4
-      const/4 v0, 1
-      aput-object v4, v1, v0
-
-      invoke-virtual {{v2,v3,v1}}, Ljava/io/PrintStream;->printf(Ljava/lang/String;[Ljava/lang/Object;)Ljava/io/PrintStream;
-      return-void
-    :call_{fname}_try_end
-    .catch Ljava/lang/Error; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :error_{fname}_start
-    :error_{fname}_start
-      move-exception v3
-      const/4 v0, 1
-      new-array v1,v0, [Ljava/lang/Object;
-      const/4 v0, 0
-      aput-object p0, v1, v0
-      sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
-      const-string v4, "%s-{invoke_type} on {farg}: {callfunc}() threw exception!\\n"
-      invoke-virtual {{v2,v4,v1}}, Ljava/io/PrintStream;->printf(Ljava/lang/String;[Ljava/lang/Object;)Ljava/io/PrintStream;
-      invoke-virtual {{v3,v2}}, Ljava/lang/Error;->printStackTrace(Ljava/io/PrintStream;)V
-      return-void
-.end method
-"""
-
-  def __init__(self, func, farg, invoke):
-    """
-    Initialize this test function for the given invoke type and argument
-    """
-    self.func = func
-    self.farg = farg
-    self.invoke = invoke
-
-  def get_name(self):
-    """
-    Get the name of this test
-    """
-    return "Test_Func_{}_{}_{}".format(self.func, self.farg, self.invoke)
-
-  def __str__(self):
-    """
-    Get the smali code for this test function
-    """
-    return self.TEST_FUNCTION_TEMPLATE.format(fname=self.get_name(),
-                                              farg=self.farg,
-                                              invoke_type=self.invoke,
-                                              callfunc=self.func)
-
-def flatten_classes(classes, c):
-  """
-  Iterate over all the classes 'c' can be used as
-  """
-  while c:
-    yield c
-    c = classes.get(c.super_class)
-
-def flatten_class_methods(classes, c):
-  """
-  Iterate over all the methods 'c' can call
-  """
-  for c1 in flatten_classes(classes, c):
-    yield from c1.methods
-
-def flatten_interfaces(dat, c):
-  """
-  Iterate over all the interfaces 'c' transitively implements
-  """
-  def get_ifaces(cl):
-    for i2 in cl.implements:
-      yield dat.interfaces[i2]
-      yield from get_ifaces(dat.interfaces[i2])
-
-  for cl in flatten_classes(dat.classes, c):
-    yield from get_ifaces(cl)
-
-def flatten_interface_methods(dat, i):
-  """
-  Iterate over all the interface methods 'c' can call
-  """
-  yield from i.methods
-  for i2 in flatten_interfaces(dat, i):
-    yield from i2.methods
-
-def make_main_class(dat):
-  """
-  Creates a Main.smali file that runs all the tests
-  """
-  m = MainClass()
-  for c in dat.classes.values():
-    i = InstanceTest(m, c.name)
-    for clazz in flatten_classes(dat.classes, c):
-      for meth in flatten_class_methods(dat.classes, clazz):
-        i.add_func(Func(meth, clazz.name, 'virtual'))
-      for iface in flatten_interfaces(dat, clazz):
-        for meth in flatten_interface_methods(dat, iface):
-          i.add_func(Func(meth, clazz.name, 'virtual'))
-          i.add_func(Func(meth, iface.name, 'interface'))
-  return m
-
-class TestData(namedtuple("TestData", ['classes', 'interfaces'])):
-  """
-  A class representing the classes.xml document.
-  """
-  pass
-
-class Clazz(namedtuple("Clazz", ["name", "methods", "super_class", "implements"])):
-  """
-  A class representing a class element in the classes.xml document.
-  """
-  pass
-
-class IFace(namedtuple("IFace", ["name", "methods", "super_class", "implements"])):
-  """
-  A class representing an interface element in the classes.xml document.
-  """
-  pass
-
-def parse_xml(xml):
-  """
-  Parse the xml description of this test.
-  """
-  classes = dict()
-  ifaces  = dict()
-  root = ET.fromstring(xml)
-  for iface in root.find("interfaces"):
-    name = iface.attrib['name']
-    implements = [a.text for a in iface.find("implements")]
-    methods = [a.text for a in iface.find("methods")]
-    ifaces[name] = IFace(name = name,
-                         super_class = iface.attrib['super'],
-                         methods = methods,
-                         implements = implements)
-  for clazz in root.find('classes'):
-    name = clazz.attrib['name']
-    implements = [a.text for a in clazz.find("implements")]
-    methods = [a.text for a in clazz.find("methods")]
-    classes[name] = Clazz(name = name,
-                          super_class = clazz.attrib['super'],
-                          methods = methods,
-                          implements = implements)
-  return TestData(classes, ifaces)
-
-def main(argv):
-  smali_dir = Path(argv[1])
-  if not smali_dir.exists() or not smali_dir.is_dir():
-    print("{} is not a valid smali dir".format(smali_dir), file=sys.stderr)
-    sys.exit(1)
-  class_data = parse_xml((smali_dir / "classes.xml").open().read())
-  make_main_class(class_data).dump(smali_dir)
-
-if __name__ == '__main__':
-  main(sys.argv)
diff --git a/test/961-default-iface-resolution-generated/build b/test/961-default-iface-resolution-generated/build
index 707c17e..ccebbe4 100755
--- a/test/961-default-iface-resolution-generated/build
+++ b/test/961-default-iface-resolution-generated/build
@@ -17,8 +17,6 @@
 # make us exit on a failure
 set -e
 
-mkdir -p ./smali
-
 # We will be making more files than the ulimit is set to allow. Remove it temporarily.
 OLD_ULIMIT=`ulimit -S`
 ulimit -S unlimited
@@ -28,20 +26,19 @@
 }
 trap 'restore_ulimit' ERR
 
-# Generate the smali files and expected.txt or fail
-./util-src/generate_smali.py ./smali ./expected.txt
-
-if [[ $@ == *"--jvm"* ]]; then
-  # Build the Java files if we are running a --jvm test
-  mkdir -p src
-  mkdir -p classes
-  ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
-  ${JAVAC} -implicit:none -d classes $(find src -name '*.java')
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm
+  # Hard-wired use of experimental jack.
+  # TODO: fix this temporary work-around for default-methods, see b/19467889
+  export USE_JACK=true
 fi
 
-# Build the smali files and make a dex
-${SMALI} -JXmx512m --experimental --api-level 23 --output classes.dex $(find smali -name '*.smali')
-zip $TEST_NAME.jar classes.dex
+mkdir -p ./src
+
+# Generate the smali files and expected.txt or fail
+./util-src/generate_java.py ./src ./expected.txt
+
+./default-build "$@" --experimental default-methods
 
 # Reset the ulimit back to its initial value
 restore_ulimit
diff --git a/test/961-default-iface-resolution-generated/run b/test/961-default-iface-resolution-generated/run
deleted file mode 100755
index e378b06..0000000
--- a/test/961-default-iface-resolution-generated/run
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-if echo $@ | grep -q -- "--jvm"; then
-  ${RUN} "$@"
-else
-  ${RUN} "$@" --runtime-option -Xexperimental:default-methods -Xcompiler-option --runtime-arg -Xcompiler-option -Xexperimental:default-methods
-fi
diff --git a/test/961-default-iface-resolution-generated/util-src/generate_java.py b/test/961-default-iface-resolution-generated/util-src/generate_java.py
new file mode 100755
index 0000000..a205cd6
--- /dev/null
+++ b/test/961-default-iface-resolution-generated/util-src/generate_java.py
@@ -0,0 +1,378 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate Java test files for test 961.
+"""
+
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import utils and mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+from testgen.utils import get_copyright, subtree_sizes, gensym, filter_blanks
+import testgen.mixins as mixins
+
+from functools import total_ordering
+import itertools
+import string
+
+# The max depth the type tree can have. Includes the class object in the tree.
+# Increasing this increases the number of generated files significantly. This
+# value was chosen as it is fairly quick to run and very comprehensive, checking
+# every possible interface tree up to 5 layers deep.
+MAX_IFACE_DEPTH = 5
+
+class MainClass(mixins.DumpMixin, mixins.Named, mixins.JavaFileMixin):
+  """
+  A Main.java file containing the Main class and the main function. It will run
+  all the test functions we have.
+  """
+
+  MAIN_CLASS_TEMPLATE = """{copyright}
+class Main {{
+{test_groups}
+{main_func}
+}}
+"""
+
+  MAIN_FUNCTION_TEMPLATE = """
+  public static void main(String[] args) {{
+    {test_group_invoke}
+  }}
+"""
+
+  TEST_GROUP_INVOKE_TEMPLATE = """
+    {test_name}();
+"""
+
+  def __init__(self):
+    """
+    Initialize this MainClass. We start out with no tests.
+    """
+    self.tests = set()
+
+  def get_expected(self):
+    """
+    Get the expected output of this test.
+    """
+    all_tests = sorted(self.tests)
+    return filter_blanks("\n".join(a.get_expected() for a in all_tests))
+
+  def add_test(self, ty):
+    """
+    Add a test for the concrete type 'ty'
+    """
+    self.tests.add(Func(ty))
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return "Main"
+
+  def __str__(self):
+    """
+    Print the MainClass java code.
+    """
+    all_tests = sorted(self.tests)
+    test_invoke = ""
+    test_groups = ""
+    for t in all_tests:
+      test_groups += str(t)
+    for t in all_tests:
+      test_invoke += self.TEST_GROUP_INVOKE_TEMPLATE.format(test_name=t.get_name())
+    main_func = self.MAIN_FUNCTION_TEMPLATE.format(test_group_invoke=test_invoke)
+
+    return self.MAIN_CLASS_TEMPLATE.format(copyright = get_copyright("java"),
+                                           test_groups = test_groups,
+                                           main_func = main_func)
+
+class Func(mixins.Named, mixins.NameComparableMixin):
+  """
+  A function that tests the functionality of a concrete type. Should only be
+  constructed by MainClass.add_test.
+  """
+
+  TEST_FUNCTION_TEMPLATE = """
+  public static void {fname}() {{
+    try {{
+      {farg} v = new {farg}();
+        System.out.printf("%s calls default method on %s\\n",
+                          v.CalledClassName(),
+                          v.CalledInterfaceName());
+        return;
+    }} catch (Error e) {{
+      e.printStackTrace(System.out);
+      return;
+    }}
+  }}
+"""
+
+  def __init__(self, farg):
+    """
+    Initialize a test function for the given argument
+    """
+    self.farg = farg
+
+  def get_expected(self):
+    """
+    Get the expected output calling this function.
+    """
+    return "{tree} calls default method on {iface_tree}".format(
+        tree = self.farg.get_tree(), iface_tree = self.farg.get_called().get_tree())
+
+  def get_name(self):
+    """
+    Get the name of this function
+    """
+    return "TEST_FUNC_{}".format(self.farg.get_name())
+
+  def __str__(self):
+    """
+    Print the java code of this function.
+    """
+    return self.TEST_FUNCTION_TEMPLATE.format(fname=self.get_name(), farg=self.farg.get_name())
+
+class TestClass(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.JavaFileMixin):
+  """
+  A class that will be instantiated to test default method resolution order.
+  """
+
+  TEST_CLASS_TEMPLATE = """{copyright}
+public class {class_name} implements {iface_name} {{
+  public String CalledClassName() {{
+    return "{tree}";
+  }}
+}}
+"""
+
+  def __init__(self, iface):
+    """
+    Initialize this test class which implements the given interface
+    """
+    self.iface = iface
+    self.class_name = "CLASS_"+gensym()
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return self.class_name
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{class_name} {iface_tree}]".format(class_name = self.class_name,
+                                                iface_tree = self.iface.get_tree())
+
+  def __iter__(self):
+    """
+    Step through all interfaces implemented transitively by this class
+    """
+    yield self.iface
+    yield from self.iface
+
+  def get_called(self):
+    """
+    Get the interface whose default method would be called when calling the
+    CalledInterfaceName function.
+    """
+    all_ifaces = set(iface for iface in self if iface.default)
+    for i in all_ifaces:
+      if all(map(lambda j: i not in j.get_super_types(), all_ifaces)):
+        return i
+    raise Exception("UNREACHABLE! Unable to find default method!")
+
+  def __str__(self):
+    """
+    Print the java code of this class.
+    """
+    return self.TEST_CLASS_TEMPLATE.format(copyright = get_copyright('java'),
+                                           iface_name = self.iface.get_name(),
+                                           tree = self.get_tree(),
+                                           class_name = self.class_name)
+
+class TestInterface(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.JavaFileMixin):
+  """
+  An interface that will be used to test default method resolution order.
+  """
+
+  TEST_INTERFACE_TEMPLATE = """{copyright}
+public interface {class_name} {extends} {ifaces} {{
+  public String CalledClassName();
+
+{funcs}
+}}
+"""
+
+  DEFAULT_FUNC_TEMPLATE = """
+  public default String CalledInterfaceName() {{
+    return "{tree}";
+  }}
+"""
+
+  def __init__(self, ifaces, default):
+    """
+    Initialize interface with the given super-interfaces
+    """
+    self.ifaces = sorted(ifaces)
+    self.default = default
+    end = "_DEFAULT" if default else ""
+    self.class_name = "INTERFACE_"+gensym()+end
+
+  def get_super_types(self):
+    """
+    Returns a set of all the supertypes of this interface
+    """
+    return set(i2 for i2 in self)
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return self.class_name
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{class_name} {iftree}]".format(class_name = self.get_name(),
+                                            iftree = print_tree(self.ifaces))
+
+  def __iter__(self):
+    """
+    Performs depth-first traversal of the interface tree this interface is the
+    root of. Does not filter out repeats.
+    """
+    for i in self.ifaces:
+      yield i
+      yield from i
+
+  def __str__(self):
+    """
+    Print the java code of this interface.
+    """
+    j_ifaces = " "
+    for i in self.ifaces:
+      j_ifaces += " {},".format(i.get_name())
+    j_ifaces = j_ifaces[0:-1]
+    if self.default:
+      funcs = self.DEFAULT_FUNC_TEMPLATE.format(ifaces = j_ifaces,
+                                                tree = self.get_tree(),
+                                                class_name = self.class_name)
+    else:
+      funcs = ""
+    return self.TEST_INTERFACE_TEMPLATE.format(copyright = get_copyright('java'),
+                                               extends = "extends" if len(self.ifaces) else "",
+                                               ifaces = j_ifaces,
+                                               funcs = funcs,
+                                               tree = self.get_tree(),
+                                               class_name = self.class_name)
+
+def print_tree(ifaces):
+  """
+  Prints a list of iface trees
+  """
+  return " ".join(i.get_tree() for i in  ifaces)
+
+# The deduplicated output of subtree_sizes for each size up to
+# MAX_LEAF_IFACE_PER_OBJECT.
+SUBTREES = [set(tuple(sorted(l)) for l in subtree_sizes(i))
+            for i in range(MAX_IFACE_DEPTH + 1)]
+
+def create_interface_trees():
+  """
+  Return all legal interface trees
+  """
+  def dump_supers(s):
+    """
+    Does depth first traversal of all the interfaces in the list.
+    """
+    for i in s:
+      yield i
+      yield from i
+
+  def create_interface_trees_inner(num, allow_default):
+    for split in SUBTREES[num]:
+      ifaces = []
+      for sub in split:
+        if sub == 1:
+          ifaces.append([TestInterface([], allow_default)])
+          if allow_default:
+            ifaces[-1].append(TestInterface([], False))
+        else:
+          ifaces.append(list(create_interface_trees_inner(sub, allow_default)))
+      for supers in itertools.product(*ifaces):
+        all_supers = sorted(set(dump_supers(supers)) - set(supers))
+        for i in range(len(all_supers) + 1):
+          for combo in itertools.combinations(all_supers, i):
+            yield TestInterface(list(combo) + list(supers), allow_default)
+      if allow_default:
+        for i in range(len(split)):
+          ifaces = []
+          for sub, cs in zip(split, itertools.count()):
+            if sub == 1:
+              ifaces.append([TestInterface([], i == cs)])
+            else:
+              ifaces.append(list(create_interface_trees_inner(sub, i == cs)))
+          for supers in itertools.product(*ifaces):
+            all_supers = sorted(set(dump_supers(supers)) - set(supers))
+            for i in range(len(all_supers) + 1):
+              for combo in itertools.combinations(all_supers, i):
+                yield TestInterface(list(combo) + list(supers), False)
+
+  for num in range(1, MAX_IFACE_DEPTH):
+    yield from create_interface_trees_inner(num, True)
+
+def create_all_test_files():
+  """
+  Creates all the objects representing the files in this test. They just need to
+  be dumped.
+  """
+  mc = MainClass()
+  classes = {mc}
+  for tree in create_interface_trees():
+    classes.add(tree)
+    for i in tree:
+      classes.add(i)
+    test_class = TestClass(tree)
+    mc.add_test(test_class)
+    classes.add(test_class)
+  return mc, classes
+
+def main(argv):
+  java_dir = Path(argv[1])
+  if not java_dir.exists() or not java_dir.is_dir():
+    print("{} is not a valid java dir".format(java_dir), file=sys.stderr)
+    sys.exit(1)
+  expected_txt = Path(argv[2])
+  mainclass, all_files = create_all_test_files()
+  with expected_txt.open('w') as out:
+    print(mainclass.get_expected(), file=out)
+  for f in all_files:
+    f.dump(java_dir)
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/961-default-iface-resolution-generated/util-src/generate_smali.py b/test/961-default-iface-resolution-generated/util-src/generate_smali.py
deleted file mode 100755
index 921a096..0000000
--- a/test/961-default-iface-resolution-generated/util-src/generate_smali.py
+++ /dev/null
@@ -1,466 +0,0 @@
-#!/usr/bin/python3
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Generate Smali test files for test 961.
-"""
-
-import os
-import sys
-from pathlib import Path
-
-BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
-if BUILD_TOP is None:
-  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
-  sys.exit(1)
-
-# Allow us to import utils and mixins.
-sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
-
-from testgen.utils import get_copyright, subtree_sizes, gensym, filter_blanks
-import testgen.mixins as mixins
-
-from functools import total_ordering
-import itertools
-import string
-
-# The max depth the type tree can have. Includes the class object in the tree.
-# Increasing this increases the number of generated files significantly. This
-# value was chosen as it is fairly quick to run and very comprehensive, checking
-# every possible interface tree up to 5 layers deep.
-MAX_IFACE_DEPTH = 5
-
-class MainClass(mixins.DumpMixin, mixins.Named, mixins.SmaliFileMixin):
-  """
-  A Main.smali file containing the Main class and the main function. It will run
-  all the test functions we have.
-  """
-
-  MAIN_CLASS_TEMPLATE = """{copyright}
-
-.class public LMain;
-.super Ljava/lang/Object;
-
-# class Main {{
-
-.method public constructor <init>()V
-    .registers 1
-    invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-{test_groups}
-
-{main_func}
-
-# }}
-"""
-
-  MAIN_FUNCTION_TEMPLATE = """
-#   public static void main(String[] args) {{
-.method public static main([Ljava/lang/String;)V
-    .locals 2
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-
-    {test_group_invoke}
-
-    return-void
-.end method
-#   }}
-"""
-
-  TEST_GROUP_INVOKE_TEMPLATE = """
-#     {test_name}();
-    invoke-static {{}}, {test_name}()V
-"""
-
-  def __init__(self):
-    """
-    Initialize this MainClass. We start out with no tests.
-    """
-    self.tests = set()
-
-  def get_expected(self):
-    """
-    Get the expected output of this test.
-    """
-    all_tests = sorted(self.tests)
-    return filter_blanks("\n".join(a.get_expected() for a in all_tests))
-
-  def add_test(self, ty):
-    """
-    Add a test for the concrete type 'ty'
-    """
-    self.tests.add(Func(ty))
-
-  def get_name(self):
-    """
-    Get the name of this class
-    """
-    return "Main"
-
-  def __str__(self):
-    """
-    Print the MainClass smali code.
-    """
-    all_tests = sorted(self.tests)
-    test_invoke = ""
-    test_groups = ""
-    for t in all_tests:
-      test_groups += str(t)
-    for t in all_tests:
-      test_invoke += self.TEST_GROUP_INVOKE_TEMPLATE.format(test_name=t.get_name())
-    main_func = self.MAIN_FUNCTION_TEMPLATE.format(test_group_invoke=test_invoke)
-
-    return self.MAIN_CLASS_TEMPLATE.format(copyright = get_copyright("smali"),
-                                           test_groups = test_groups,
-                                           main_func = main_func)
-
-class Func(mixins.Named, mixins.NameComparableMixin):
-  """
-  A function that tests the functionality of a concrete type. Should only be
-  constructed by MainClass.add_test.
-  """
-
-  TEST_FUNCTION_TEMPLATE = """
-#   public static void {fname}() {{
-#     try {{
-#       {farg} v = new {farg}();
-#       System.out.printf("%s calls default method on %s\\n",
-#                         v.CalledClassName(),
-#                         v.CalledInterfaceName());
-#       return;
-#     }} catch (Error e) {{
-#       e.printStackTrace(System.out);
-#       return;
-#     }}
-#   }}
-.method public static {fname}()V
-    .locals 7
-    :call_{fname}_try_start
-      new-instance v6, L{farg};
-      invoke-direct {{v6}}, L{farg};-><init>()V
-
-      const/4 v0, 2
-      new-array v1,v0, [Ljava/lang/Object;
-      const/4 v0, 0
-      invoke-virtual {{v6}}, L{farg};->CalledClassName()Ljava/lang/String;
-      move-result-object v4
-      aput-object v4,v1,v0
-
-      sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
-      const-string v3, "%s calls default method on %s\\n"
-
-      invoke-virtual {{v6}}, L{farg};->CalledInterfaceName()Ljava/lang/String;
-      move-result-object v4
-      const/4 v0, 1
-      aput-object v4, v1, v0
-
-      invoke-virtual {{v2,v3,v1}}, Ljava/io/PrintStream;->printf(Ljava/lang/String;[Ljava/lang/Object;)Ljava/io/PrintStream;
-      return-void
-    :call_{fname}_try_end
-    .catch Ljava/lang/Error; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :error_{fname}_start
-    :error_{fname}_start
-      move-exception v3
-      sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
-      invoke-virtual {{v3,v2}}, Ljava/lang/Error;->printStackTrace(Ljava/io/PrintStream;)V
-      return-void
-.end method
-"""
-
-  def __init__(self, farg):
-    """
-    Initialize a test function for the given argument
-    """
-    self.farg = farg
-
-  def get_expected(self):
-    """
-    Get the expected output calling this function.
-    """
-    return "{tree} calls default method on {iface_tree}".format(
-        tree = self.farg.get_tree(), iface_tree = self.farg.get_called().get_tree())
-
-  def get_name(self):
-    """
-    Get the name of this function
-    """
-    return "TEST_FUNC_{}".format(self.farg.get_name())
-
-  def __str__(self):
-    """
-    Print the smali code of this function.
-    """
-    return self.TEST_FUNCTION_TEMPLATE.format(fname=self.get_name(), farg=self.farg.get_name())
-
-class TestClass(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
-  """
-  A class that will be instantiated to test default method resolution order.
-  """
-
-  TEST_CLASS_TEMPLATE = """{copyright}
-
-.class public L{class_name};
-.super Ljava/lang/Object;
-.implements L{iface_name};
-
-# public class {class_name} implements {iface_name} {{
-#   public String CalledClassName() {{
-#     return "{tree}";
-#   }}
-# }}
-
-.method public constructor <init>()V
-  .registers 1
-  invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
-  return-void
-.end method
-
-.method public CalledClassName()Ljava/lang/String;
-  .locals 1
-  const-string v0, "{tree}"
-  return-object v0
-.end method
-"""
-
-  def __init__(self, iface):
-    """
-    Initialize this test class which implements the given interface
-    """
-    self.iface = iface
-    self.class_name = "CLASS_"+gensym()
-
-  def get_name(self):
-    """
-    Get the name of this class
-    """
-    return self.class_name
-
-  def get_tree(self):
-    """
-    Print out a representation of the type tree of this class
-    """
-    return "[{class_name} {iface_tree}]".format(class_name = self.class_name,
-                                                iface_tree = self.iface.get_tree())
-
-  def __iter__(self):
-    """
-    Step through all interfaces implemented transitively by this class
-    """
-    yield self.iface
-    yield from self.iface
-
-  def get_called(self):
-    """
-    Get the interface whose default method would be called when calling the
-    CalledInterfaceName function.
-    """
-    all_ifaces = set(iface for iface in self if iface.default)
-    for i in all_ifaces:
-      if all(map(lambda j: i not in j.get_super_types(), all_ifaces)):
-        return i
-    raise Exception("UNREACHABLE! Unable to find default method!")
-
-  def __str__(self):
-    """
-    Print the smali code of this class.
-    """
-    return self.TEST_CLASS_TEMPLATE.format(copyright = get_copyright('smali'),
-                                           iface_name = self.iface.get_name(),
-                                           tree = self.get_tree(),
-                                           class_name = self.class_name)
-
-class TestInterface(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
-  """
-  An interface that will be used to test default method resolution order.
-  """
-
-  TEST_INTERFACE_TEMPLATE = """{copyright}
-.class public abstract interface L{class_name};
-.super Ljava/lang/Object;
-{implements_spec}
-
-# public interface {class_name} {extends} {ifaces} {{
-#   public String CalledClassName();
-.method public abstract CalledClassName()Ljava/lang/String;
-.end method
-
-{funcs}
-
-# }}
-"""
-
-  DEFAULT_FUNC_TEMPLATE = """
-#   public default String CalledInterfaceName() {{
-#     return "{tree}";
-#   }}
-.method public CalledInterfaceName()Ljava/lang/String;
-  .locals 1
-  const-string v0, "{tree}"
-  return-object v0
-.end method
-"""
-
-  IMPLEMENTS_TEMPLATE = """
-.implements L{iface_name};
-"""
-
-  def __init__(self, ifaces, default):
-    """
-    Initialize interface with the given super-interfaces
-    """
-    self.ifaces = sorted(ifaces)
-    self.default = default
-    end = "_DEFAULT" if default else ""
-    self.class_name = "INTERFACE_"+gensym()+end
-
-  def get_super_types(self):
-    """
-    Returns a set of all the supertypes of this interface
-    """
-    return set(i2 for i2 in self)
-
-  def get_name(self):
-    """
-    Get the name of this class
-    """
-    return self.class_name
-
-  def get_tree(self):
-    """
-    Print out a representation of the type tree of this class
-    """
-    return "[{class_name} {iftree}]".format(class_name = self.get_name(),
-                                            iftree = print_tree(self.ifaces))
-
-  def __iter__(self):
-    """
-    Performs depth-first traversal of the interface tree this interface is the
-    root of. Does not filter out repeats.
-    """
-    for i in self.ifaces:
-      yield i
-      yield from i
-
-  def __str__(self):
-    """
-    Print the smali code of this interface.
-    """
-    s_ifaces = " "
-    j_ifaces = " "
-    for i in self.ifaces:
-      s_ifaces += self.IMPLEMENTS_TEMPLATE.format(iface_name = i.get_name())
-      j_ifaces += " {},".format(i.get_name())
-    j_ifaces = j_ifaces[0:-1]
-    if self.default:
-      funcs = self.DEFAULT_FUNC_TEMPLATE.format(ifaces = j_ifaces,
-                                                tree = self.get_tree(),
-                                                class_name = self.class_name)
-    else:
-      funcs = ""
-    return self.TEST_INTERFACE_TEMPLATE.format(copyright = get_copyright('smali'),
-                                               implements_spec = s_ifaces,
-                                               extends = "extends" if len(self.ifaces) else "",
-                                               ifaces = j_ifaces,
-                                               funcs = funcs,
-                                               tree = self.get_tree(),
-                                               class_name = self.class_name)
-
-def print_tree(ifaces):
-  """
-  Prints a list of iface trees
-  """
-  return " ".join(i.get_tree() for i in  ifaces)
-
-# The deduplicated output of subtree_sizes for each size up to
-# MAX_LEAF_IFACE_PER_OBJECT.
-SUBTREES = [set(tuple(sorted(l)) for l in subtree_sizes(i))
-            for i in range(MAX_IFACE_DEPTH + 1)]
-
-def create_interface_trees():
-  """
-  Return all legal interface trees
-  """
-  def dump_supers(s):
-    """
-    Does depth first traversal of all the interfaces in the list.
-    """
-    for i in s:
-      yield i
-      yield from i
-
-  def create_interface_trees_inner(num, allow_default):
-    for split in SUBTREES[num]:
-      ifaces = []
-      for sub in split:
-        if sub == 1:
-          ifaces.append([TestInterface([], allow_default)])
-          if allow_default:
-            ifaces[-1].append(TestInterface([], False))
-        else:
-          ifaces.append(list(create_interface_trees_inner(sub, allow_default)))
-      for supers in itertools.product(*ifaces):
-        all_supers = sorted(set(dump_supers(supers)) - set(supers))
-        for i in range(len(all_supers) + 1):
-          for combo in itertools.combinations(all_supers, i):
-            yield TestInterface(list(combo) + list(supers), allow_default)
-      if allow_default:
-        for i in range(len(split)):
-          ifaces = []
-          for sub, cs in zip(split, itertools.count()):
-            if sub == 1:
-              ifaces.append([TestInterface([], i == cs)])
-            else:
-              ifaces.append(list(create_interface_trees_inner(sub, i == cs)))
-          for supers in itertools.product(*ifaces):
-            all_supers = sorted(set(dump_supers(supers)) - set(supers))
-            for i in range(len(all_supers) + 1):
-              for combo in itertools.combinations(all_supers, i):
-                yield TestInterface(list(combo) + list(supers), False)
-
-  for num in range(1, MAX_IFACE_DEPTH):
-    yield from create_interface_trees_inner(num, True)
-
-def create_all_test_files():
-  """
-  Creates all the objects representing the files in this test. They just need to
-  be dumped.
-  """
-  mc = MainClass()
-  classes = {mc}
-  for tree in create_interface_trees():
-    classes.add(tree)
-    for i in tree:
-      classes.add(i)
-    test_class = TestClass(tree)
-    mc.add_test(test_class)
-    classes.add(test_class)
-  return mc, classes
-
-def main(argv):
-  smali_dir = Path(argv[1])
-  if not smali_dir.exists() or not smali_dir.is_dir():
-    print("{} is not a valid smali dir".format(smali_dir), file=sys.stderr)
-    sys.exit(1)
-  expected_txt = Path(argv[2])
-  mainclass, all_files = create_all_test_files()
-  with expected_txt.open('w') as out:
-    print(mainclass.get_expected(), file=out)
-  for f in all_files:
-    f.dump(smali_dir)
-
-if __name__ == '__main__':
-  main(sys.argv)
diff --git a/test/962-iface-static/build b/test/962-iface-static/build
index 5ad82f7..0dd8573 100755
--- a/test/962-iface-static/build
+++ b/test/962-iface-static/build
@@ -17,14 +17,11 @@
 # make us exit on a failure
 set -e
 
-if [[ $@ == *"--jvm"* ]]; then
-  # Build the Java files if we are running a --jvm test
-  mkdir -p src
-  mkdir -p classes
-  ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
-  ${JAVAC} -implicit:none -d classes $(find src -name '*.java')
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm
+  # Hard-wired use of experimental jack.
+  # TODO: fix this temporary work-around for default-methods, see b/19467889
+  export USE_JACK=true
 fi
 
-# Build the smali files and make a dex
-${SMALI} -JXmx512m --experimental --api-level 23 --output classes.dex $(find smali -name '*.smali')
-zip $TEST_NAME.jar classes.dex
+./default-build "$@" --experimental default-methods
diff --git a/test/962-iface-static/run b/test/962-iface-static/run
deleted file mode 100755
index e713708..0000000
--- a/test/962-iface-static/run
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-if echo $@ | grep -q -- "--jvm"; then
-  ${RUN} "$@"
-else
-  ${RUN} "$@" --runtime-option -Xexperimental:default-methods -Xcompiler-option --runtime-arg -Xcompiler-option -Xexperimental:default-methods
-fi
diff --git a/test/962-iface-static/smali/Displayer.smali b/test/962-iface-static/smali/Displayer.smali
deleted file mode 100644
index 06bec16..0000000
--- a/test/962-iface-static/smali/Displayer.smali
+++ /dev/null
@@ -1,45 +0,0 @@
-# /*
-#  * Copyright (C) 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-#
-# public class Displayer {
-#   static {
-#       System.out.println("init");
-#   }
-#
-#   public Displayer() {
-#       System.out.println("constructor");
-#   }
-# }
-
-.class public LDisplayer;
-.super Ljava/lang/Object;
-
-.method public static <clinit>()V
-    .locals 3
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    const-string v0, "init"
-    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
-    return-void
-.end method
-
-.method public constructor <init>()V
-    .locals 2
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    const-string v0, "constructor"
-    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
-    return-void
-.end method
diff --git a/test/962-iface-static/smali/Main.smali b/test/962-iface-static/smali/Main.smali
deleted file mode 100644
index 72fa5e0..0000000
--- a/test/962-iface-static/smali/Main.smali
+++ /dev/null
@@ -1,40 +0,0 @@
-# /*
-#  * Copyright (C) 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-#
-# class Main {
-#   public static void main(String[] args) {
-#       System.out.println(iface.SayHi());
-#   }
-# }
-.class public LMain;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-    .registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public static main([Ljava/lang/String;)V
-    .locals 2
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-
-    invoke-static {}, Liface;->SayHi()Ljava/lang/String;
-    move-result-object v0
-    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
-
-    return-void
-.end method
diff --git a/test/962-iface-static/smali/iface.smali b/test/962-iface-static/smali/iface.smali
deleted file mode 100644
index 441aae6..0000000
--- a/test/962-iface-static/smali/iface.smali
+++ /dev/null
@@ -1,43 +0,0 @@
-# /*
-#  * Copyright (C) 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-#
-# public interface iface {
-#   public static final Displayer f = new Displayer();
-#
-#   public static String SayHi() {
-#       return "Hello";
-#   }
-# }
-
-.class public abstract interface Liface;
-.super Ljava/lang/Object;
-
-.field public final static f:LDisplayer;
-
-.method public static <clinit>()V
-    .locals 3
-    new-instance v1, LDisplayer;
-    invoke-direct {v1}, LDisplayer;-><init>()V
-    sput-object v1, Liface;->f:LDisplayer;
-    return-void
-.end method
-
-.method public static SayHi()Ljava/lang/String;
-    .locals 1
-    const-string v0, "Hello"
-    return-object v0
-.end method
-
diff --git a/test/962-iface-static/src/Displayer.java b/test/962-iface-static/src/Displayer.java
new file mode 100644
index 0000000..5b28b3f
--- /dev/null
+++ b/test/962-iface-static/src/Displayer.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public class Displayer {
+  static {
+    System.out.println("init");
+  }
+  public Displayer() {
+    System.out.println("constructor");
+  }
+}
diff --git a/test/962-iface-static/src/Iface.java b/test/962-iface-static/src/Iface.java
new file mode 100644
index 0000000..82c7808
--- /dev/null
+++ b/test/962-iface-static/src/Iface.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Iface {
+  public static final Displayer f = new Displayer();
+  public static String SayHi() {
+    return "Hello";
+  }
+}
diff --git a/test/962-iface-static/src/Main.java b/test/962-iface-static/src/Main.java
new file mode 100644
index 0000000..7cb8eb7
--- /dev/null
+++ b/test/962-iface-static/src/Main.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class Main {
+  public static void main(String[] args) {
+    System.out.println(Iface.SayHi());
+  }
+}
diff --git a/test/963-default-range-smali/build b/test/963-default-range-smali/build
index 5ad82f7..0dd8573 100755
--- a/test/963-default-range-smali/build
+++ b/test/963-default-range-smali/build
@@ -17,14 +17,11 @@
 # make us exit on a failure
 set -e
 
-if [[ $@ == *"--jvm"* ]]; then
-  # Build the Java files if we are running a --jvm test
-  mkdir -p src
-  mkdir -p classes
-  ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
-  ${JAVAC} -implicit:none -d classes $(find src -name '*.java')
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm
+  # Hard-wired use of experimental jack.
+  # TODO: fix this temporary work-around for default-methods, see b/19467889
+  export USE_JACK=true
 fi
 
-# Build the smali files and make a dex
-${SMALI} -JXmx512m --experimental --api-level 23 --output classes.dex $(find smali -name '*.smali')
-zip $TEST_NAME.jar classes.dex
+./default-build "$@" --experimental default-methods
diff --git a/test/963-default-range-smali/run b/test/963-default-range-smali/run
deleted file mode 100755
index e713708..0000000
--- a/test/963-default-range-smali/run
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-if echo $@ | grep -q -- "--jvm"; then
-  ${RUN} "$@"
-else
-  ${RUN} "$@" --runtime-option -Xexperimental:default-methods -Xcompiler-option --runtime-arg -Xcompiler-option -Xexperimental:default-methods
-fi
diff --git a/test/963-default-range-smali/smali/A.smali b/test/963-default-range-smali/smali/A.smali
deleted file mode 100644
index b3d91dd..0000000
--- a/test/963-default-range-smali/smali/A.smali
+++ /dev/null
@@ -1,29 +0,0 @@
-# /*
-#  * Copyright 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-
-.class public LA;
-.super Ljava/lang/Object;
-.implements Liface;
-
-# class A implements iface {
-# }
-
-.method public constructor <init>()V
-    .registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
diff --git a/test/963-default-range-smali/smali/Main.smali b/test/963-default-range-smali/smali/Main.smali
deleted file mode 100644
index 400fba7..0000000
--- a/test/963-default-range-smali/smali/Main.smali
+++ /dev/null
@@ -1,77 +0,0 @@
-# /*
-#  * Copyright (C) 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-#
-# class Main {
-#   public static void main(String[] args) {
-#       A a = new A();
-#       System.out.println(a.SayHi("a string 0",
-#                                  "a string 1",
-#                                  "a string 2",
-#                                  "a string 3",
-#                                  "a string 4",
-#                                  "a string 5",
-#                                  "a string 6",
-#                                  "a string 7",
-#                                  "a string 8",
-#                                  "a string 9"));
-#       iface b = (iface)a;
-#       System.out.println(b.SayHi("a string 0",
-#                                  "a string 1",
-#                                  "a string 2",
-#                                  "a string 3",
-#                                  "a string 4",
-#                                  "a string 5",
-#                                  "a string 6",
-#                                  "a string 7",
-#                                  "a string 8",
-#                                  "a string 9"));
-#   }
-# }
-.class public LMain;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-    .registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public static main([Ljava/lang/String;)V
-    .locals 15
-    sget-object v12, Ljava/lang/System;->out:Ljava/io/PrintStream;
-
-    new-instance v1, LA;
-    invoke-direct {v1}, LA;-><init>()V
-    const-string v2, "a string 0"
-    const-string v3, "a string 1"
-    const-string v4, "a string 2"
-    const-string v5, "a string 3"
-    const-string v6, "a string 4"
-    const-string v7, "a string 5"
-    const-string v8, "a string 6"
-    const-string v9, "a string 7"
-    const-string v10, "a string 8"
-    const-string v11, "a string 9"
-    invoke-virtual/range {v1 .. v11}, LA;->SayHi(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
-    move-result-object v0
-    invoke-virtual {v12,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
-
-    invoke-interface/range {v1 .. v11}, Liface;->SayHi(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
-    move-result-object v0
-    invoke-virtual {v12,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
-
-    return-void
-.end method
diff --git a/test/963-default-range-smali/smali/iface.smali b/test/963-default-range-smali/smali/iface.smali
deleted file mode 100644
index c2c3ce6..0000000
--- a/test/963-default-range-smali/smali/iface.smali
+++ /dev/null
@@ -1,40 +0,0 @@
-# /*
-#  * Copyright (C) 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-#
-# public interface iface {
-#   public default String SayHi(String n1,
-#                               String n2,
-#                               String n3,
-#                               String n4,
-#                               String n5,
-#                               String n6,
-#                               String n7,
-#                               String n8,
-#                               String n9,
-#                               String n0) {
-#       return "Hello";
-#   }
-# }
-
-.class public abstract interface Liface;
-.super Ljava/lang/Object;
-
-.method public SayHi(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
-    .locals 1
-    const-string v0, "Hello"
-    return-object v0
-.end method
-
diff --git a/test/963-default-range-smali/src/A.java b/test/963-default-range-smali/src/A.java
new file mode 100644
index 0000000..617eccb
--- /dev/null
+++ b/test/963-default-range-smali/src/A.java
@@ -0,0 +1,16 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class A implements Iface { }
diff --git a/test/963-default-range-smali/src/Iface.java b/test/963-default-range-smali/src/Iface.java
new file mode 100644
index 0000000..7556209
--- /dev/null
+++ b/test/963-default-range-smali/src/Iface.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Iface {
+  public default String SayHi(String n1,
+                              String n2,
+                              String n3,
+                              String n4,
+                              String n5,
+                              String n6,
+                              String n7,
+                              String n8,
+                              String n9,
+                              String n0) {
+    return "Hello";
+  }
+}
diff --git a/test/963-default-range-smali/src/Main.java b/test/963-default-range-smali/src/Main.java
new file mode 100644
index 0000000..841842d
--- /dev/null
+++ b/test/963-default-range-smali/src/Main.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class Main {
+  public static void main(String[] args) {
+    A a = new A();
+    System.out.println(a.SayHi("a string 0",
+                               "a string 1",
+                               "a string 2",
+                               "a string 3",
+                               "a string 4",
+                               "a string 5",
+                               "a string 6",
+                               "a string 7",
+                               "a string 8",
+                               "a string 9"));
+    Iface b = a;
+    System.out.println(b.SayHi("a string 0",
+                               "a string 1",
+                               "a string 2",
+                               "a string 3",
+                               "a string 4",
+                               "a string 5",
+                               "a string 6",
+                               "a string 7",
+                               "a string 8",
+                               "a string 9"));
+  }
+}
diff --git a/test/964-default-iface-init-generated/build b/test/964-default-iface-init-generated/build
index deef803..ccebbe4 100755
--- a/test/964-default-iface-init-generated/build
+++ b/test/964-default-iface-init-generated/build
@@ -26,20 +26,19 @@
 }
 trap 'restore_ulimit' ERR
 
-# Generate the smali files and expected.txt or fail
-./util-src/generate_smali.py ./smali ./expected.txt
-
-if [[ $@ == *"--jvm"* ]]; then
-  # Build the Java files if we are running a --jvm test
-  mkdir -p src
-  mkdir -p classes
-  ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
-  ${JAVAC} -implicit:none -d classes $(find src -name '*.java')
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm
+  # Hard-wired use of experimental jack.
+  # TODO: fix this temporary work-around for default-methods, see b/19467889
+  export USE_JACK=true
 fi
 
-# Build the smali files and make a dex
-${SMALI} -JXmx512m --experimental --api-level 23 --output classes.dex $(find smali -name '*.smali')
-zip $TEST_NAME.jar classes.dex
+mkdir -p ./src
+
+# Generate the smali files and expected.txt or fail
+./util-src/generate_java.py ./src ./expected.txt
+
+./default-build "$@" --experimental default-methods
 
 # Reset the ulimit back to its initial value
 restore_ulimit
diff --git a/test/964-default-iface-init-generated/run b/test/964-default-iface-init-generated/run
deleted file mode 100755
index e378b06..0000000
--- a/test/964-default-iface-init-generated/run
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-if echo $@ | grep -q -- "--jvm"; then
-  ${RUN} "$@"
-else
-  ${RUN} "$@" --runtime-option -Xexperimental:default-methods -Xcompiler-option --runtime-arg -Xcompiler-option -Xexperimental:default-methods
-fi
diff --git a/test/964-default-iface-init-generated/smali/Displayer.smali b/test/964-default-iface-init-generated/smali/Displayer.smali
deleted file mode 100644
index 91280a8..0000000
--- a/test/964-default-iface-init-generated/smali/Displayer.smali
+++ /dev/null
@@ -1,45 +0,0 @@
-# /*
-#  * Copyright (C) 2015 The Android Open Source Project
-#  *
-#  * Licensed under the Apache License, Version 2.0 (the "License");
-#  * you may not use this file except in compliance with the License.
-#  * You may obtain a copy of the License at
-#  *
-#  *      http://www.apache.org/licenses/LICENSE-2.0
-#  *
-#  * Unless required by applicable law or agreed to in writing, software
-#  * distributed under the License is distributed on an "AS IS" BASIS,
-#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  * See the License for the specific language governing permissions and
-#  * limitations under the License.
-#  */
-#
-# // This class is b/c java does not allow static {} blocks in interfaces.
-# public class Displayer {
-#   public Displayer(String type) {
-#       System.out.println("initialization of " + type);
-#   }
-#   public void touch() {
-#       return;
-#   }
-# }
-
-.class public LDisplayer;
-.super Ljava/lang/Object;
-
-.method public constructor <init>(Ljava/lang/String;)V
-    .locals 2
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    const-string v0, "initialization of "
-    invoke-virtual {v0, p1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
-    move-result-object v0
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
-    return-void
-.end method
-
-.method public touch()V
-    .locals 0
-    return-void
-.end method
-
diff --git a/test/964-default-iface-init-generated/src/Displayer.java b/test/964-default-iface-init-generated/src/Displayer.java
new file mode 100644
index 0000000..4be0ab2
--- /dev/null
+++ b/test/964-default-iface-init-generated/src/Displayer.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// This class is b/c java does not allow static {} blocks in interfaces.
+public class Displayer {
+  public Displayer(String type) {
+    System.out.println("initialization of " + type);
+  }
+  public void touch() {
+    return;
+  }
+}
diff --git a/test/964-default-iface-init-generated/util-src/generate_java.py b/test/964-default-iface-init-generated/util-src/generate_java.py
new file mode 100755
index 0000000..b2df49f
--- /dev/null
+++ b/test/964-default-iface-init-generated/util-src/generate_java.py
@@ -0,0 +1,419 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate java test files for test 964.
+"""
+
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import utils and mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+from testgen.utils import get_copyright, subtree_sizes, gensym, filter_blanks
+import testgen.mixins as mixins
+
+from functools import total_ordering
+import itertools
+import string
+
+# The max depth the tree can have.
+MAX_IFACE_DEPTH = 3
+
+class MainClass(mixins.DumpMixin, mixins.Named, mixins.JavaFileMixin):
+  """
+  A Main.java file containing the Main class and the main function. It will run
+  all the test functions we have.
+  """
+
+  MAIN_CLASS_TEMPLATE = """{copyright}
+class Main {{
+{test_groups}
+{main_func}
+}}
+"""
+
+  MAIN_FUNCTION_TEMPLATE = """
+  public static void main(String[] args) {{
+    {test_group_invoke}
+  }}
+"""
+
+  TEST_GROUP_INVOKE_TEMPLATE = """
+    {test_name}();
+"""
+
+  def __init__(self):
+    """
+    Initialize this MainClass. We start out with no tests.
+    """
+    self.tests = set()
+
+  def add_test(self, ty):
+    """
+    Add a test for the concrete type 'ty'
+    """
+    self.tests.add(Func(ty))
+
+  def get_expected(self):
+    """
+    Get the expected output of this test.
+    """
+    all_tests = sorted(self.tests)
+    return filter_blanks("\n".join(a.get_expected() for a in all_tests))
+
+  def get_name(self):
+    """
+    Gets the name of this class
+    """
+    return "Main"
+
+  def __str__(self):
+    """
+    Print the java code for this test.
+    """
+    all_tests = sorted(self.tests)
+    test_invoke = ""
+    test_groups = ""
+    for t in all_tests:
+      test_groups += str(t)
+    for t in all_tests:
+      test_invoke += self.TEST_GROUP_INVOKE_TEMPLATE.format(test_name=t.get_name())
+    main_func = self.MAIN_FUNCTION_TEMPLATE.format(test_group_invoke=test_invoke)
+
+    return self.MAIN_CLASS_TEMPLATE.format(copyright = get_copyright('java'),
+                                           test_groups = test_groups,
+                                           main_func = main_func)
+
+class Func(mixins.Named, mixins.NameComparableMixin):
+  """
+  A function that tests the functionality of a concrete type. Should only be
+  constructed by MainClass.add_test.
+  """
+
+  TEST_FUNCTION_TEMPLATE = """
+  public static void {fname}() {{
+    try {{
+      System.out.println("About to initialize {tree}");
+      {farg} v = new {farg}();
+      System.out.println("Initialized {tree}");
+      v.touchAll();
+      System.out.println("All of {tree} hierarchy initialized");
+      return;
+    }} catch (Error e) {{
+      e.printStackTrace(System.out);
+      return;
+    }}
+  }}
+"""
+
+  OUTPUT_FORMAT = """
+About to initialize {tree}
+{initialize_output}
+Initialized {tree}
+{touch_output}
+All of {tree} hierarchy initialized
+""".strip()
+
+  def __init__(self, farg):
+    """
+    Initialize a test function for the given argument
+    """
+    self.farg = farg
+
+  def __str__(self):
+    """
+    Print the java code for this test function.
+    """
+    return self.TEST_FUNCTION_TEMPLATE.format(fname=self.get_name(),
+                                              farg=self.farg.get_name(),
+                                              tree = self.farg.get_tree())
+
+  def get_name(self):
+    """
+    Gets the name of this test function
+    """
+    return "TEST_FUNC_{}".format(self.farg.get_name())
+
+  def get_expected(self):
+    """
+    Get the expected output of this function.
+    """
+    return self.OUTPUT_FORMAT.format(
+        tree = self.farg.get_tree(),
+        initialize_output = self.farg.get_initialize_output().strip(),
+        touch_output = self.farg.get_touch_output().strip())
+
+class TestClass(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.JavaFileMixin):
+  """
+  A class that will be instantiated to test interface initialization order.
+  """
+
+  TEST_CLASS_TEMPLATE = """{copyright}
+public class {class_name} implements {ifaces} {{
+  public void marker() {{
+    return;
+  }}
+
+  public void touchAll() {{
+{touch_calls}
+  }}
+}}
+"""
+
+  TOUCH_CALL_TEMPLATE = """
+    System.out.println("{class_name} touching {iface_name}");
+    {iface_name}.field.touch();
+"""
+
+  TOUCH_OUTPUT_TEMPLATE = """
+{class_name} touching {iface_name}
+{touch_output}
+""".strip()
+
+  def __init__(self, ifaces):
+    """
+    Initialize this test class which implements the given interfaces
+    """
+    self.ifaces = ifaces
+    self.class_name = "CLASS_"+gensym()
+
+  def get_name(self):
+    """
+    Gets the name of this interface
+    """
+    return self.class_name
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{fname} {iftree}]".format(fname = self.get_name(), iftree = print_tree(self.ifaces))
+
+  def get_initialize_output(self):
+    return "\n".join(map(lambda i: i.get_initialize_output().strip(), dump_tree(self.ifaces)))
+
+  def get_touch_output(self):
+    return "\n".join(map(lambda a: self.TOUCH_OUTPUT_TEMPLATE.format(
+                                      class_name = self.class_name,
+                                      iface_name = a.get_name(),
+                                      touch_output = a.get_touch_output()).strip(),
+                         self.get_all_interfaces()))
+
+  def get_all_interfaces(self):
+    """
+    Returns a set of all interfaces this class transitively implements
+    """
+    return sorted(set(dump_tree(self.ifaces)))
+
+  def __str__(self):
+    """
+    Print the java code for this class.
+    """
+    j_ifaces = ', '.join(map(lambda a: a.get_name(), self.ifaces))
+    touches  = '\n'.join(map(lambda a: self.TOUCH_CALL_TEMPLATE.format(class_name = self.class_name,
+                                                                       iface_name = a.get_name()),
+                             self.get_all_interfaces()))
+    return self.TEST_CLASS_TEMPLATE.format(copyright = get_copyright('java'),
+                                           ifaces = j_ifaces,
+                                           class_name = self.class_name,
+                                           touch_calls = touches)
+
+class TestInterface(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.JavaFileMixin):
+  """
+  An interface that will be used to test default method resolution order.
+  """
+
+  TEST_INTERFACE_TEMPLATE = """{copyright}
+public interface {class_name} {extends} {ifaces} {{
+  public static final Displayer field = new Displayer("{tree}");
+  public void marker();
+{funcs}
+}}
+"""
+
+  DEFAULT_FUNC_TEMPLATE = """
+  public default void {class_name}_DEFAULT_FUNC() {{ return; }}
+"""
+
+  OUTPUT_TEMPLATE = "initialization of {tree}"
+
+  def __init__(self, ifaces, default):
+    """
+    Initialize interface with the given super-interfaces
+    """
+    self.ifaces = ifaces
+    self.default = default
+    end = "_DEFAULT" if default else ""
+    self.class_name = "INTERFACE_"+gensym()+end
+    self.cloned = False
+    self.initialized = False
+
+  def clone(self):
+    """
+    Clones this interface, returning a new one with the same structure but
+    different name.
+    """
+    return TestInterface(tuple(map(lambda a: a.clone(), self.ifaces)), self.default)
+
+  def get_name(self):
+    """
+    Gets the name of this interface
+    """
+    return self.class_name
+
+  def __iter__(self):
+    """
+    Performs depth-first traversal of the interface tree this interface is the
+    root of. Does not filter out repeats.
+    """
+    for i in self.ifaces:
+      yield i
+      yield from i
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{class_name} {iftree}]".format(class_name = self.get_name(),
+                                            iftree = print_tree(self.ifaces))
+
+  def get_initialize_output(self):
+    """
+    Returns the expected output upon the class that implements this interface being initialized.
+    """
+    if self.default and not self.initialized:
+      self.initialized = True
+      return self.OUTPUT_TEMPLATE.format(tree = self.get_tree())
+    else:
+      return ""
+
+  def get_touch_output(self):
+    """
+    Returns the expected output upon this interface being touched.
+    """
+    if not self.default and not self.initialized:
+      self.initialized = True
+      return self.OUTPUT_TEMPLATE.format(tree = self.get_tree())
+    else:
+      return ""
+
+  def __str__(self):
+    """
+    Print the java code for this interface.
+    """
+    j_ifaces = ', '.join(map(lambda a: a.get_name(), self.ifaces))
+    if self.default:
+      funcs = self.DEFAULT_FUNC_TEMPLATE.format(class_name = self.class_name)
+    else:
+      funcs = ""
+    return self.TEST_INTERFACE_TEMPLATE.format(copyright = get_copyright('java'),
+                                               extends = "extends" if len(self.ifaces) else "",
+                                               ifaces = j_ifaces,
+                                               funcs = funcs,
+                                               tree = self.get_tree(),
+                                               class_name = self.class_name)
+
+def dump_tree(ifaces):
+  """
+  Yields all the interfaces transitively implemented by the set in
+  reverse-depth-first order
+  """
+  for i in ifaces:
+    yield from dump_tree(i.ifaces)
+    yield i
+
+def print_tree(ifaces):
+  """
+  Prints the tree for the given ifaces.
+  """
+  return " ".join(i.get_tree() for i in  ifaces)
+
+def clone_all(l):
+  return tuple(a.clone() for a in l)
+
+# Cached output of subtree_sizes for speed of access.
+SUBTREES = [set(tuple(l) for l in subtree_sizes(i))
+            for i in range(MAX_IFACE_DEPTH + 1)]
+
+def create_test_classes():
+  """
+  Yield all the test classes with the different interface trees
+  """
+  for num in range(1, MAX_IFACE_DEPTH + 1):
+    for split in SUBTREES[num]:
+      ifaces = []
+      for sub in split:
+        ifaces.append(list(create_interface_trees(sub)))
+      for supers in itertools.product(*ifaces):
+        yield TestClass(clone_all(supers))
+        for i in range(len(set(dump_tree(supers)) - set(supers))):
+          ns = clone_all(supers)
+          selected = sorted(set(dump_tree(ns)) - set(ns))[i]
+          yield TestClass(tuple([selected] + list(ns)))
+
+def create_interface_trees(num):
+  """
+  Yield all the interface trees up to 'num' depth.
+  """
+  if num == 0:
+    yield TestInterface(tuple(), False)
+    yield TestInterface(tuple(), True)
+    return
+  for split in SUBTREES[num]:
+    ifaces = []
+    for sub in split:
+      ifaces.append(list(create_interface_trees(sub)))
+    for supers in itertools.product(*ifaces):
+      yield TestInterface(clone_all(supers), False)
+      yield TestInterface(clone_all(supers), True)
+      # TODO Should add on some from higher up the tree.
+
+def create_all_test_files():
+  """
+  Creates all the objects representing the files in this test. They just need to
+  be dumped.
+  """
+  mc = MainClass()
+  classes = {mc}
+  for clazz in create_test_classes():
+    classes.add(clazz)
+    for i in dump_tree(clazz.ifaces):
+      classes.add(i)
+    mc.add_test(clazz)
+  return mc, classes
+
+def main(argv):
+  java_dir = Path(argv[1])
+  if not java_dir.exists() or not java_dir.is_dir():
+    print("{} is not a valid java dir".format(java_dir), file=sys.stderr)
+    sys.exit(1)
+  expected_txt = Path(argv[2])
+  mainclass, all_files = create_all_test_files()
+  with expected_txt.open('w') as out:
+    print(mainclass.get_expected(), file=out)
+  for f in all_files:
+    f.dump(java_dir)
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/964-default-iface-init-generated/util-src/generate_smali.py b/test/964-default-iface-init-generated/util-src/generate_smali.py
deleted file mode 100755
index be2d3ba..0000000
--- a/test/964-default-iface-init-generated/util-src/generate_smali.py
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/python3
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Generate Smali test files for test 964.
-"""
-
-import os
-import sys
-from pathlib import Path
-
-BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
-if BUILD_TOP is None:
-  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
-  sys.exit(1)
-
-# Allow us to import utils and mixins.
-sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
-
-from testgen.utils import get_copyright, subtree_sizes, gensym, filter_blanks
-import testgen.mixins as mixins
-
-from functools import total_ordering
-import itertools
-import string
-
-# The max depth the tree can have.
-MAX_IFACE_DEPTH = 3
-
-class MainClass(mixins.DumpMixin, mixins.Named, mixins.SmaliFileMixin):
-  """
-  A Main.smali file containing the Main class and the main function. It will run
-  all the test functions we have.
-  """
-
-  MAIN_CLASS_TEMPLATE = """{copyright}
-
-.class public LMain;
-.super Ljava/lang/Object;
-
-# class Main {{
-
-.method public constructor <init>()V
-    .registers 1
-    invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-{test_groups}
-
-{main_func}
-
-# }}
-"""
-
-  MAIN_FUNCTION_TEMPLATE = """
-#   public static void main(String[] args) {{
-.method public static main([Ljava/lang/String;)V
-    .locals 2
-
-    {test_group_invoke}
-
-    return-void
-.end method
-#   }}
-"""
-
-  TEST_GROUP_INVOKE_TEMPLATE = """
-#     {test_name}();
-    invoke-static {{}}, {test_name}()V
-"""
-
-  def __init__(self):
-    """
-    Initialize this MainClass. We start out with no tests.
-    """
-    self.tests = set()
-
-  def add_test(self, ty):
-    """
-    Add a test for the concrete type 'ty'
-    """
-    self.tests.add(Func(ty))
-
-  def get_expected(self):
-    """
-    Get the expected output of this test.
-    """
-    all_tests = sorted(self.tests)
-    return filter_blanks("\n".join(a.get_expected() for a in all_tests))
-
-  def get_name(self):
-    """
-    Gets the name of this class
-    """
-    return "Main"
-
-  def __str__(self):
-    """
-    Print the smali code for this test.
-    """
-    all_tests = sorted(self.tests)
-    test_invoke = ""
-    test_groups = ""
-    for t in all_tests:
-      test_groups += str(t)
-    for t in all_tests:
-      test_invoke += self.TEST_GROUP_INVOKE_TEMPLATE.format(test_name=t.get_name())
-    main_func = self.MAIN_FUNCTION_TEMPLATE.format(test_group_invoke=test_invoke)
-
-    return self.MAIN_CLASS_TEMPLATE.format(copyright = get_copyright('smali'),
-                                           test_groups = test_groups,
-                                           main_func = main_func)
-
-class Func(mixins.Named, mixins.NameComparableMixin):
-  """
-  A function that tests the functionality of a concrete type. Should only be
-  constructed by MainClass.add_test.
-  """
-
-  TEST_FUNCTION_TEMPLATE = """
-#   public static void {fname}() {{
-#     try {{
-#       System.out.println("About to initialize {tree}");
-#       {farg} v = new {farg}();
-#       System.out.println("Initialized {tree}");
-#       v.touchAll();
-#       System.out.println("All of {tree} hierarchy initialized");
-#       return;
-#     }} catch (Error e) {{
-#       e.printStackTrace(System.out);
-#       return;
-#     }}
-#   }}
-.method public static {fname}()V
-    .locals 7
-    :call_{fname}_try_start
-      sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
-      const-string v3, "About to initialize {tree}"
-      invoke-virtual {{v2, v3}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
-
-      new-instance v6, L{farg};
-      invoke-direct {{v6}}, L{farg};-><init>()V
-
-      const-string v3, "Initialized {tree}"
-      invoke-virtual {{v2, v3}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
-
-      invoke-virtual {{v6}}, L{farg};->touchAll()V
-
-      const-string v3, "All of {tree} hierarchy initialized"
-      invoke-virtual {{v2, v3}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
-
-      return-void
-    :call_{fname}_try_end
-    .catch Ljava/lang/Error; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :error_{fname}_start
-    :error_{fname}_start
-      move-exception v3
-      sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
-      invoke-virtual {{v3,v2}}, Ljava/lang/Error;->printStackTrace(Ljava/io/PrintStream;)V
-      return-void
-.end method
-"""
-
-  OUTPUT_FORMAT = """
-About to initialize {tree}
-{initialize_output}
-Initialized {tree}
-{touch_output}
-All of {tree} hierarchy initialized
-""".strip()
-
-  def __init__(self, farg):
-    """
-    Initialize a test function for the given argument
-    """
-    self.farg = farg
-
-  def __str__(self):
-    """
-    Print the smali code for this test function.
-    """
-    return self.TEST_FUNCTION_TEMPLATE.format(fname=self.get_name(),
-                                              farg=self.farg.get_name(),
-                                              tree = self.farg.get_tree())
-
-  def get_name(self):
-    """
-    Gets the name of this test function
-    """
-    return "TEST_FUNC_{}".format(self.farg.get_name())
-
-  def get_expected(self):
-    """
-    Get the expected output of this function.
-    """
-    return self.OUTPUT_FORMAT.format(
-        tree = self.farg.get_tree(),
-        initialize_output = self.farg.get_initialize_output().strip(),
-        touch_output = self.farg.get_touch_output().strip())
-
-class TestClass(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
-  """
-  A class that will be instantiated to test interface initialization order.
-  """
-
-  TEST_CLASS_TEMPLATE = """{copyright}
-
-.class public L{class_name};
-.super Ljava/lang/Object;
-{implements_spec}
-
-# public class {class_name} implements {ifaces} {{
-#
-#   public {class_name}() {{
-#   }}
-.method public constructor <init>()V
-  .locals 2
-  invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
-  return-void
-.end method
-
-#   public void marker() {{
-#     return;
-#   }}
-.method public marker()V
-  .locals 0
-  return-void
-.end method
-
-#   public void touchAll() {{
-.method public touchAll()V
-  .locals 2
-  sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
-  {touch_calls}
-  return-void
-.end method
-#   }}
-# }}
-"""
-
-  IMPLEMENTS_TEMPLATE = """
-.implements L{iface_name};
-"""
-
-  TOUCH_CALL_TEMPLATE = """
-#     System.out.println("{class_name} touching {iface_name}");
-#     {iface_name}.field.touch();
-      const-string v1, "{class_name} touching {iface_name}"
-      invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
-      sget-object v1, L{iface_name};->field:LDisplayer;
-      invoke-virtual {{v1}}, LDisplayer;->touch()V
-"""
-
-  TOUCH_OUTPUT_TEMPLATE = """
-{class_name} touching {iface_name}
-{touch_output}
-""".strip()
-
-  def __init__(self, ifaces):
-    """
-    Initialize this test class which implements the given interfaces
-    """
-    self.ifaces = ifaces
-    self.class_name = "CLASS_"+gensym()
-
-  def get_name(self):
-    """
-    Gets the name of this interface
-    """
-    return self.class_name
-
-  def get_tree(self):
-    """
-    Print out a representation of the type tree of this class
-    """
-    return "[{fname} {iftree}]".format(fname = self.get_name(), iftree = print_tree(self.ifaces))
-
-  def get_initialize_output(self):
-    return "\n".join(map(lambda i: i.get_initialize_output().strip(), dump_tree(self.ifaces)))
-
-  def get_touch_output(self):
-    return "\n".join(map(lambda a: self.TOUCH_OUTPUT_TEMPLATE.format(
-                                      class_name = self.class_name,
-                                      iface_name = a.get_name(),
-                                      touch_output = a.get_touch_output()).strip(),
-                         self.get_all_interfaces()))
-
-  def get_all_interfaces(self):
-    """
-    Returns a set of all interfaces this class transitively implements
-    """
-    return sorted(set(dump_tree(self.ifaces)))
-
-  def __str__(self):
-    """
-    Print the smali code for this class.
-    """
-    s_ifaces = '\n'.join(map(lambda a: self.IMPLEMENTS_TEMPLATE.format(iface_name = a.get_name()),
-                             self.ifaces))
-    j_ifaces = ', '.join(map(lambda a: a.get_name(), self.ifaces))
-    touches  = '\n'.join(map(lambda a: self.TOUCH_CALL_TEMPLATE.format(class_name = self.class_name,
-                                                                       iface_name = a.get_name()),
-                             self.get_all_interfaces()))
-    return self.TEST_CLASS_TEMPLATE.format(copyright = get_copyright('smali'),
-                                           implements_spec = s_ifaces,
-                                           ifaces = j_ifaces,
-                                           class_name = self.class_name,
-                                           touch_calls = touches)
-
-class TestInterface(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
-  """
-  An interface that will be used to test default method resolution order.
-  """
-
-  TEST_INTERFACE_TEMPLATE = """{copyright}
-.class public abstract interface L{class_name};
-.super Ljava/lang/Object;
-{implements_spec}
-
-# public interface {class_name} {extends} {ifaces} {{
-#   public static final Displayer field = new Displayer("{tree}");
-.field public final static field:LDisplayer;
-
-.method public static constructor <clinit>()V
-    .locals 3
-    const-string v2, "{tree}"
-    new-instance v1, LDisplayer;
-    invoke-direct {{v1, v2}}, LDisplayer;-><init>(Ljava/lang/String;)V
-    sput-object v1, L{class_name};->field:LDisplayer;
-    return-void
-.end method
-
-#   public void marker();
-.method public abstract marker()V
-.end method
-
-{funcs}
-
-# }}
-"""
-
-  DEFAULT_FUNC_TEMPLATE = """
-#   public default void {class_name}_DEFAULT_FUNC() {{
-#     return;
-#   }}
-.method public {class_name}_DEFAULT_FUNC()V
-  .locals 0
-  return-void
-.end method
-"""
-  IMPLEMENTS_TEMPLATE = """
-.implements L{iface_name};
-"""
-
-  OUTPUT_TEMPLATE = "initialization of {tree}"
-
-  def __init__(self, ifaces, default):
-    """
-    Initialize interface with the given super-interfaces
-    """
-    self.ifaces = ifaces
-    self.default = default
-    end = "_DEFAULT" if default else ""
-    self.class_name = "INTERFACE_"+gensym()+end
-    self.cloned = False
-    self.initialized = False
-
-  def clone(self):
-    """
-    Clones this interface, returning a new one with the same structure but
-    different name.
-    """
-    return TestInterface(tuple(map(lambda a: a.clone(), self.ifaces)), self.default)
-
-  def get_name(self):
-    """
-    Gets the name of this interface
-    """
-    return self.class_name
-
-  def __iter__(self):
-    """
-    Performs depth-first traversal of the interface tree this interface is the
-    root of. Does not filter out repeats.
-    """
-    for i in self.ifaces:
-      yield i
-      yield from i
-
-  def get_tree(self):
-    """
-    Print out a representation of the type tree of this class
-    """
-    return "[{class_name} {iftree}]".format(class_name = self.get_name(),
-                                            iftree = print_tree(self.ifaces))
-
-  def get_initialize_output(self):
-    """
-    Returns the expected output upon the class that implements this interface being initialized.
-    """
-    if self.default and not self.initialized:
-      self.initialized = True
-      return self.OUTPUT_TEMPLATE.format(tree = self.get_tree())
-    else:
-      return ""
-
-  def get_touch_output(self):
-    """
-    Returns the expected output upon this interface being touched.
-    """
-    if not self.default and not self.initialized:
-      self.initialized = True
-      return self.OUTPUT_TEMPLATE.format(tree = self.get_tree())
-    else:
-      return ""
-
-  def __str__(self):
-    """
-    Print the smali code for this interface.
-    """
-    s_ifaces = '\n'.join(map(lambda a: self.IMPLEMENTS_TEMPLATE.format(iface_name = a.get_name()),
-                             self.ifaces))
-    j_ifaces = ', '.join(map(lambda a: a.get_name(), self.ifaces))
-    if self.default:
-      funcs = self.DEFAULT_FUNC_TEMPLATE.format(class_name = self.class_name)
-    else:
-      funcs = ""
-    return self.TEST_INTERFACE_TEMPLATE.format(copyright = get_copyright('smali'),
-                                               implements_spec = s_ifaces,
-                                               extends = "extends" if len(self.ifaces) else "",
-                                               ifaces = j_ifaces,
-                                               funcs = funcs,
-                                               tree = self.get_tree(),
-                                               class_name = self.class_name)
-
-def dump_tree(ifaces):
-  """
-  Yields all the interfaces transitively implemented by the set in
-  reverse-depth-first order
-  """
-  for i in ifaces:
-    yield from dump_tree(i.ifaces)
-    yield i
-
-def print_tree(ifaces):
-  """
-  Prints the tree for the given ifaces.
-  """
-  return " ".join(i.get_tree() for i in  ifaces)
-
-def clone_all(l):
-  return tuple(a.clone() for a in l)
-
-# Cached output of subtree_sizes for speed of access.
-SUBTREES = [set(tuple(l) for l in subtree_sizes(i))
-            for i in range(MAX_IFACE_DEPTH + 1)]
-
-def create_test_classes():
-  """
-  Yield all the test classes with the different interface trees
-  """
-  for num in range(1, MAX_IFACE_DEPTH + 1):
-    for split in SUBTREES[num]:
-      ifaces = []
-      for sub in split:
-        ifaces.append(list(create_interface_trees(sub)))
-    for supers in itertools.product(*ifaces):
-      yield TestClass(clone_all(supers))
-      for i in range(len(set(dump_tree(supers)) - set(supers))):
-        ns = clone_all(supers)
-        selected = sorted(set(dump_tree(ns)) - set(ns))[i]
-        yield TestClass(tuple([selected] + list(ns)))
-
-def create_interface_trees(num):
-  """
-  Yield all the interface trees up to 'num' depth.
-  """
-  if num == 0:
-    yield TestInterface(tuple(), False)
-    yield TestInterface(tuple(), True)
-    return
-  for split in SUBTREES[num]:
-    ifaces = []
-    for sub in split:
-      ifaces.append(list(create_interface_trees(sub)))
-    for supers in itertools.product(*ifaces):
-      yield TestInterface(clone_all(supers), False)
-      yield TestInterface(clone_all(supers), True)
-      # TODO Should add on some from higher up the tree.
-
-def create_all_test_files():
-  """
-  Creates all the objects representing the files in this test. They just need to
-  be dumped.
-  """
-  mc = MainClass()
-  classes = {mc}
-  for clazz in create_test_classes():
-    classes.add(clazz)
-    for i in dump_tree(clazz.ifaces):
-      classes.add(i)
-    mc.add_test(clazz)
-  return mc, classes
-
-def main(argv):
-  smali_dir = Path(argv[1])
-  if not smali_dir.exists() or not smali_dir.is_dir():
-    print("{} is not a valid smali dir".format(smali_dir), file=sys.stderr)
-    sys.exit(1)
-  expected_txt = Path(argv[2])
-  mainclass, all_files = create_all_test_files()
-  with expected_txt.open('w') as out:
-    print(mainclass.get_expected(), file=out)
-  for f in all_files:
-    f.dump(smali_dir)
-
-if __name__ == '__main__':
-  main(sys.argv)
diff --git a/test/965-default-verify/build b/test/965-default-verify/build
new file mode 100755
index 0000000..0dd8573
--- /dev/null
+++ b/test/965-default-verify/build
@@ -0,0 +1,27 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm
+  # Hard-wired use of experimental jack.
+  # TODO: fix this temporary work-around for default-methods, see b/19467889
+  export USE_JACK=true
+fi
+
+./default-build "$@" --experimental default-methods
diff --git a/test/965-default-verify/expected.txt b/test/965-default-verify/expected.txt
new file mode 100644
index 0000000..b31314f
--- /dev/null
+++ b/test/965-default-verify/expected.txt
@@ -0,0 +1,15 @@
+Create Main instance
+Calling functions on concrete Main
+Calling verifiable function on Main
+Hello
+Calling unverifiable function on Main
+Expected NSME Thrown on Main
+Calling verifiable function on Main
+Hello
+Calling functions on interface Iface
+Calling verifiable function on Iface
+Hello
+Calling unverifiable function on Iface
+Expected NSME Thrown on Iface
+Calling verifiable function on Iface
+Hello
diff --git a/test/965-default-verify/info.txt b/test/965-default-verify/info.txt
new file mode 100644
index 0000000..2ccabf5
--- /dev/null
+++ b/test/965-default-verify/info.txt
@@ -0,0 +1,8 @@
+Smali-based tests for verification interaction with experimental interface
+default methods.
+
+build-src contains java files that are needed if you are to compile with javac
+since it is much more proactive about finding likely runtime errors then smali.
+
+To run with --jvm you must export JAVA_HOME to a Java 8 Language installation
+and pass the --use-java-home to run-test
diff --git a/test/965-default-verify/src/Iface.java b/test/965-default-verify/src/Iface.java
new file mode 100644
index 0000000..180fba2
--- /dev/null
+++ b/test/965-default-verify/src/Iface.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Iface {
+  public default String sayHi() {
+    return "Hello";
+  }
+  public default void verificationSoftFail() {
+    Statics.nonexistantFunction();
+  }
+}
diff --git a/test/965-default-verify/src/Main.java b/test/965-default-verify/src/Main.java
new file mode 100644
index 0000000..6374cb5
--- /dev/null
+++ b/test/965-default-verify/src/Main.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class Main implements Iface {
+  public static void main(String[] args) {
+    System.out.println("Create Main instance");
+    Main m = new Main();
+    System.out.println("Calling functions on concrete Main");
+    callMain(m);
+    System.out.println("Calling functions on interface Iface");
+    callIface(m);
+  }
+
+  public static void callMain(Main m) {
+    System.out.println("Calling verifiable function on Main");
+    System.out.println(m.sayHi());
+    System.out.println("Calling unverifiable function on Main");
+    try {
+      m.verificationSoftFail();
+      System.out.println("Unexpected no error Thrown on Main");
+    } catch (NoSuchMethodError e) {
+      System.out.println("Expected NSME Thrown on Main");
+    } catch (Throwable e) {
+      System.out.println("Unexpected Error Thrown on Main");
+      e.printStackTrace(System.out);
+    }
+    System.out.println("Calling verifiable function on Main");
+    System.out.println(m.sayHi());
+    return;
+  }
+
+  public static void callIface(Iface m) {
+    System.out.println("Calling verifiable function on Iface");
+    System.out.println(m.sayHi());
+    System.out.println("Calling unverifiable function on Iface");
+    try {
+      m.verificationSoftFail();
+      System.out.println("Unexpected no error Thrown on Iface");
+    } catch (NoSuchMethodError e) {
+      System.out.println("Expected NSME Thrown on Iface");
+    } catch (Throwable e) {
+      System.out.println("Unexpected Error Thrown on Iface");
+      e.printStackTrace(System.out);
+    }
+    System.out.println("Calling verifiable function on Iface");
+    System.out.println(m.sayHi());
+    return;
+  }
+}
diff --git a/test/965-default-verify/src/Statics.java b/test/965-default-verify/src/Statics.java
new file mode 100644
index 0000000..2e17ba4
--- /dev/null
+++ b/test/965-default-verify/src/Statics.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Statics {
+  public static void nonexistantFunction() {
+    System.out.println("I don't exist");
+  }
+}
+
diff --git a/test/965-default-verify/src2/Statics.java b/test/965-default-verify/src2/Statics.java
new file mode 100644
index 0000000..7899ca9
--- /dev/null
+++ b/test/965-default-verify/src2/Statics.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class Statics {
+  // public static void nonexistantFunction() {
+  //     System.out.println("I don't exist");
+  // }
+}
diff --git a/test/966-default-conflict/build b/test/966-default-conflict/build
new file mode 100755
index 0000000..0dd8573
--- /dev/null
+++ b/test/966-default-conflict/build
@@ -0,0 +1,27 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm
+  # Hard-wired use of experimental jack.
+  # TODO: fix this temporary work-around for default-methods, see b/19467889
+  export USE_JACK=true
+fi
+
+./default-build "$@" --experimental default-methods
diff --git a/test/966-default-conflict/expected.txt b/test/966-default-conflict/expected.txt
new file mode 100644
index 0000000..fad2c25
--- /dev/null
+++ b/test/966-default-conflict/expected.txt
@@ -0,0 +1,18 @@
+Create Main instance
+Calling functions on concrete Main
+Calling non-conflicting function on Main
+CHARGE
+Calling conflicting function on Main
+Expected ICCE Thrown on Main
+Calling non-conflicting function on Main
+CHARGE
+Calling functions on interface Iface
+Calling non-conflicting function on Iface
+CHARGE
+Calling conflicting function on Iface
+Expected ICCE Thrown on Iface
+Calling non-conflicting function on Iface
+CHARGE
+Calling functions on interface Iface2
+Calling conflicting function on Iface2
+Expected ICCE Thrown on Iface2
diff --git a/test/966-default-conflict/info.txt b/test/966-default-conflict/info.txt
new file mode 100644
index 0000000..2b67657
--- /dev/null
+++ b/test/966-default-conflict/info.txt
@@ -0,0 +1,6 @@
+Smali-based tests for experimental interface static methods.
+
+Tests handling of default method conflicts.
+
+To run with --jvm you must export JAVA_HOME to a Java 8 Language installation
+and pass the --use-java-home to run-test
diff --git a/test/966-default-conflict/src/Iface.java b/test/966-default-conflict/src/Iface.java
new file mode 100644
index 0000000..2131ed8
--- /dev/null
+++ b/test/966-default-conflict/src/Iface.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Iface {
+  public default String sayHi() {
+    return "Hi";
+  }
+  public default String charge() {
+    return "CHARGE";
+  }
+}
diff --git a/test/966-default-conflict/src/Iface2.java b/test/966-default-conflict/src/Iface2.java
new file mode 100644
index 0000000..8d97df8
--- /dev/null
+++ b/test/966-default-conflict/src/Iface2.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// We extend Iface so that javac will not complain that Iface2 does not declare a sayHi method or
+// has a soft-conflict on the sayHi method if it did.
+public interface Iface2 extends Iface {
+  // public default String sayHi() {
+  //   return "hello";
+  // }
+}
+
+
diff --git a/test/966-default-conflict/src/Main.java b/test/966-default-conflict/src/Main.java
new file mode 100644
index 0000000..ce8cb47
--- /dev/null
+++ b/test/966-default-conflict/src/Main.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class Main implements Iface, Iface2 {
+  public static void main(String[] args) {
+    System.out.println("Create Main instance");
+    Main m = new Main();
+    System.out.println("Calling functions on concrete Main");
+    callMain(m);
+    System.out.println("Calling functions on interface Iface");
+    callIface(m);
+    System.out.println("Calling functions on interface Iface2");
+    callIface2(m);
+  }
+  public static void callMain(Main m) {
+    System.out.println("Calling non-conflicting function on Main");
+    System.out.println(m.charge());
+    System.out.println("Calling conflicting function on Main");
+    try {
+      System.out.println(m.sayHi());
+      System.out.println("Unexpected no error Thrown on Main");
+    } catch (AbstractMethodError e) {
+      System.out.println("Unexpected AME Thrown on Main");
+    } catch (IncompatibleClassChangeError e) {
+      System.out.println("Expected ICCE Thrown on Main");
+    }
+    System.out.println("Calling non-conflicting function on Main");
+    System.out.println(m.charge());
+    return;
+  }
+  public static void callIface(Iface m) {
+    System.out.println("Calling non-conflicting function on Iface");
+    System.out.println(m.charge());
+    System.out.println("Calling conflicting function on Iface");
+    try {
+      System.out.println(m.sayHi());
+      System.out.println("Unexpected no error Thrown on Iface");
+    } catch (AbstractMethodError e) {
+      System.out.println("Unexpected AME Thrown on Iface");
+    } catch (IncompatibleClassChangeError e) {
+      System.out.println("Expected ICCE Thrown on Iface");
+    }
+    System.out.println("Calling non-conflicting function on Iface");
+    System.out.println(m.charge());
+    return;
+  }
+  public static void callIface2(Iface2 m) {
+    System.out.println("Calling conflicting function on Iface2");
+    try {
+      System.out.println(m.sayHi());
+      System.out.println("Unexpected no error Thrown on Iface2");
+    } catch (AbstractMethodError e) {
+      System.out.println("Unexpected AME Thrown on Iface2");
+    } catch (IncompatibleClassChangeError e) {
+      System.out.println("Expected ICCE Thrown on Iface2");
+    }
+    return;
+  }
+}
diff --git a/test/966-default-conflict/src2/Iface2.java b/test/966-default-conflict/src2/Iface2.java
new file mode 100644
index 0000000..d29033c
--- /dev/null
+++ b/test/966-default-conflict/src2/Iface2.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Iface2 {
+  public default String sayHi() {
+    return "hello";
+  }
+}
diff --git a/test/967-default-ame/build b/test/967-default-ame/build
new file mode 100755
index 0000000..0dd8573
--- /dev/null
+++ b/test/967-default-ame/build
@@ -0,0 +1,27 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm
+  # Hard-wired use of experimental jack.
+  # TODO: fix this temporary work-around for default-methods, see b/19467889
+  export USE_JACK=true
+fi
+
+./default-build "$@" --experimental default-methods
diff --git a/test/967-default-ame/expected.txt b/test/967-default-ame/expected.txt
new file mode 100644
index 0000000..cbd4ad3
--- /dev/null
+++ b/test/967-default-ame/expected.txt
@@ -0,0 +1,18 @@
+Create Main instance
+Calling functions on concrete Main
+Calling non-abstract function on Main
+CHARGE
+Calling abstract function on Main
+Expected AME Thrown on Main
+Calling non-abstract function on Main
+CHARGE
+Calling functions on interface Iface
+Calling non-abstract function on Iface
+CHARGE
+Calling abstract function on Iface
+Expected AME Thrown on Iface
+Calling non-abstract function on Iface
+CHARGE
+Calling functions on interface Iface2
+Calling abstract function on Iface2
+Expected AME Thrown on Iface2
diff --git a/test/967-default-ame/info.txt b/test/967-default-ame/info.txt
new file mode 100644
index 0000000..a346a32
--- /dev/null
+++ b/test/967-default-ame/info.txt
@@ -0,0 +1,6 @@
+Smali-based tests for experimental interface static methods.
+
+Tests handling of default method overrides.
+
+To run with --jvm you must export JAVA_HOME to a Java 8 Language installation
+and pass the --use-java-home to run-test
diff --git a/test/967-default-ame/src/Iface.java b/test/967-default-ame/src/Iface.java
new file mode 100644
index 0000000..2131ed8
--- /dev/null
+++ b/test/967-default-ame/src/Iface.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Iface {
+  public default String sayHi() {
+    return "Hi";
+  }
+  public default String charge() {
+    return "CHARGE";
+  }
+}
diff --git a/test/967-default-ame/src/Iface2.java b/test/967-default-ame/src/Iface2.java
new file mode 100644
index 0000000..55b2ac0
--- /dev/null
+++ b/test/967-default-ame/src/Iface2.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface Iface2 extends Iface {
+  // public String sayHi();
+}
+
+
diff --git a/test/967-default-ame/src/Iface3.java b/test/967-default-ame/src/Iface3.java
new file mode 100644
index 0000000..a6faa45
--- /dev/null
+++ b/test/967-default-ame/src/Iface3.java
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface Iface3 {
+  // public String charge();
+}
diff --git a/test/967-default-ame/src/Main.java b/test/967-default-ame/src/Main.java
new file mode 100644
index 0000000..3e48062
--- /dev/null
+++ b/test/967-default-ame/src/Main.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+class Main implements Iface, Iface2, Iface3 {
+  public static void main(String[] args) {
+    System.out.println("Create Main instance");
+    Main m = new Main();
+    System.out.println("Calling functions on concrete Main");
+    callMain(m);
+    System.out.println("Calling functions on interface Iface");
+    callIface(m);
+    System.out.println("Calling functions on interface Iface2");
+    callIface2(m);
+  }
+  public static void callMain(Main m) {
+    System.out.println("Calling non-abstract function on Main");
+    System.out.println(m.charge());
+    System.out.println("Calling abstract function on Main");
+    try {
+      System.out.println(m.sayHi());
+      System.out.println("Unexpected no error Thrown on Main");
+    } catch (AbstractMethodError e) {
+      System.out.println("Expected AME Thrown on Main");
+    } catch (IncompatibleClassChangeError e) {
+      System.out.println("Unexpected ICCE Thrown on Main");
+    }
+    System.out.println("Calling non-abstract function on Main");
+    System.out.println(m.charge());
+    return;
+  }
+  public static void callIface(Iface m) {
+    System.out.println("Calling non-abstract function on Iface");
+    System.out.println(m.charge());
+    System.out.println("Calling abstract function on Iface");
+    try {
+      System.out.println(m.sayHi());
+      System.out.println("Unexpected no error Thrown on Iface");
+    } catch (AbstractMethodError e) {
+      System.out.println("Expected AME Thrown on Iface");
+    } catch (IncompatibleClassChangeError e) {
+      System.out.println("Unexpected ICCE Thrown on Iface");
+    }
+    System.out.println("Calling non-abstract function on Iface");
+    System.out.println(m.charge());
+    return;
+  }
+  public static void callIface2(Iface2 m) {
+    System.out.println("Calling abstract function on Iface2");
+    try {
+      System.out.println(m.sayHi());
+      System.out.println("Unexpected no error Thrown on Iface2");
+    } catch (AbstractMethodError e) {
+      System.out.println("Expected AME Thrown on Iface2");
+    } catch (IncompatibleClassChangeError e) {
+      System.out.println("Unexpected ICCE Thrown on Iface2");
+    }
+    return;
+  }
+}
diff --git a/test/967-default-ame/src2/Iface.java b/test/967-default-ame/src2/Iface.java
new file mode 100644
index 0000000..2131ed8
--- /dev/null
+++ b/test/967-default-ame/src2/Iface.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Iface {
+  public default String sayHi() {
+    return "Hi";
+  }
+  public default String charge() {
+    return "CHARGE";
+  }
+}
diff --git a/test/967-default-ame/src2/Iface2.java b/test/967-default-ame/src2/Iface2.java
new file mode 100644
index 0000000..0e4fb5f
--- /dev/null
+++ b/test/967-default-ame/src2/Iface2.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Iface2 extends Iface {
+  public String sayHi();
+}
diff --git a/test/967-default-ame/src2/Iface3.java b/test/967-default-ame/src2/Iface3.java
new file mode 100644
index 0000000..70fc33b
--- /dev/null
+++ b/test/967-default-ame/src2/Iface3.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Iface3 {
+  public String charge();
+}
diff --git a/test/968-default-partial-compile-generated/build b/test/968-default-partial-compile-generated/build
new file mode 100755
index 0000000..1e9f8aa
--- /dev/null
+++ b/test/968-default-partial-compile-generated/build
@@ -0,0 +1,50 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+# We will be making more files than the ulimit is set to allow. Remove it temporarily.
+OLD_ULIMIT=`ulimit -S`
+ulimit -S unlimited
+
+restore_ulimit() {
+  ulimit -S "$OLD_ULIMIT"
+}
+trap 'restore_ulimit' ERR
+
+# TODO: Support running with jack.
+
+if [[ $@ == *"--jvm"* ]]; then
+  # Build the Java files if we are running a --jvm test
+  mkdir -p classes
+  mkdir -p src
+  echo "${JAVAC} \$@" >> ./javac_exec.sh
+  # This will use java_exec.sh to execute the javac compiler. It will place the
+  # compiled class files in ./classes and the expected values in expected.txt
+  #
+  # After this the src directory will contain the final versions of all files.
+  ./util-src/generate_java.py ./javac_exec.sh ./src ./classes ./expected.txt ./build_log
+else
+  mkdir -p ./smali
+  # Generate the smali files and expected.txt or fail
+  ./util-src/generate_smali.py ./smali ./expected.txt
+  # Use the default build script
+  ./default-build "$@" "$EXTRA_ARGS" --experimental default-methods
+fi
+
+# Reset the ulimit back to its initial value
+restore_ulimit
diff --git a/test/968-default-partial-compile-generated/expected.txt b/test/968-default-partial-compile-generated/expected.txt
new file mode 100644
index 0000000..1ddd65d
--- /dev/null
+++ b/test/968-default-partial-compile-generated/expected.txt
@@ -0,0 +1 @@
+This file is generated by util-src/generate_smali.py do not directly modify!
diff --git a/test/968-default-partial-compile-generated/info.txt b/test/968-default-partial-compile-generated/info.txt
new file mode 100644
index 0000000..bc1c428
--- /dev/null
+++ b/test/968-default-partial-compile-generated/info.txt
@@ -0,0 +1,17 @@
+Smali-based tests for experimental interface default methods.
+
+This tests that interface method resolution order is correct in the presence of
+partial compilation/illegal invokes.
+
+Obviously needs to run under ART or a Java 8 Language runtime and compiler.
+
+When run smali test files are generated by the util-src/generate_smali.py
+script.  If we run with --jvm we will use the util-src/generate_java.py script
+will generate equivalent java code based on the smali code.
+
+Care should be taken when updating the generate_smali.py script. It should always
+return equivalent output when run multiple times and the expected output should
+be valid.
+
+Do not modify the expected.txt file. It is generated on each run by
+util-src/generate_smali.py.
diff --git a/test/968-default-partial-compile-generated/util-src/generate_java.py b/test/968-default-partial-compile-generated/util-src/generate_java.py
new file mode 100755
index 0000000..35290ef
--- /dev/null
+++ b/test/968-default-partial-compile-generated/util-src/generate_java.py
@@ -0,0 +1,134 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate java test files for test 966.
+"""
+
+import generate_smali as base
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+import testgen.mixins as mixins
+import functools
+import operator
+import subprocess
+
+class JavaConverter(mixins.DumpMixin, mixins.Named, mixins.JavaFileMixin):
+  """
+  A class that can convert a SmaliFile to a JavaFile.
+  """
+  def __init__(self, inner):
+    self.inner = inner
+
+  def get_name(self):
+    """Gets the name of this file."""
+    return self.inner.get_name()
+
+  def __str__(self):
+    out = ""
+    for line in str(self.inner).splitlines(keepends = True):
+      if line.startswith("#"):
+        out += line[1:]
+    return out
+
+class Compiler:
+  def __init__(self, sources, javac, temp_dir, classes_dir):
+    self.javac = javac
+    self.temp_dir = temp_dir
+    self.classes_dir = classes_dir
+    self.sources = sources
+
+  def compile_files(self, args, files):
+    """
+    Compile the files given with the arguments given.
+    """
+    args = args.split()
+    files = list(map(str, files))
+    cmd = ['sh', '-a', '-e', '--', str(self.javac)] + args + files
+    print("Running compile command: {}".format(cmd))
+    subprocess.check_call(cmd)
+    print("Compiled {} files".format(len(files)))
+
+  def execute(self):
+    """
+    Compiles this test, doing partial compilation as necessary.
+    """
+    # Compile Main and all classes first. Force all interfaces to be default so that there will be
+    # no compiler problems (works since classes only implement 1 interface).
+    for f in self.sources:
+      if isinstance(f, base.TestInterface):
+        JavaConverter(f.get_specific_version(base.InterfaceType.default)).dump(self.temp_dir)
+      else:
+        JavaConverter(f).dump(self.temp_dir)
+    self.compile_files("-d {}".format(self.classes_dir), self.temp_dir.glob("*.java"))
+
+    # Now we compile the interfaces
+    ifaces = set(i for i in self.sources if isinstance(i, base.TestInterface))
+    while len(ifaces) != 0:
+      # Find those ifaces where there are no (uncompiled) interfaces that are subtypes.
+      tops = set(filter(lambda a: not any(map(lambda i: a in i.get_super_types(), ifaces)), ifaces))
+      files = []
+      # Dump these ones, they are getting compiled.
+      for f in tops:
+        out = JavaConverter(f)
+        out.dump(self.temp_dir)
+        files.append(self.temp_dir / out.get_file_name())
+      # Force all superinterfaces of these to be empty so there will be no conflicts
+      overrides = functools.reduce(operator.or_, map(lambda i: i.get_super_types(), tops), set())
+      for overridden in overrides:
+        out = JavaConverter(overridden.get_specific_version(base.InterfaceType.empty))
+        out.dump(self.temp_dir)
+        files.append(self.temp_dir / out.get_file_name())
+      self.compile_files("-d {outdir} -cp {outdir}".format(outdir = self.classes_dir), files)
+      # Remove these from the set of interfaces to be compiled.
+      ifaces -= tops
+    print("Finished compiling all files.")
+    return
+
+def main(argv):
+  javac_exec = Path(argv[1])
+  if not javac_exec.exists() or not javac_exec.is_file():
+    print("{} is not a shell script".format(javac_exec), file=sys.stderr)
+    sys.exit(1)
+  temp_dir = Path(argv[2])
+  if not temp_dir.exists() or not temp_dir.is_dir():
+    print("{} is not a valid source dir".format(temp_dir), file=sys.stderr)
+    sys.exit(1)
+  classes_dir = Path(argv[3])
+  if not classes_dir.exists() or not classes_dir.is_dir():
+    print("{} is not a valid classes directory".format(classes_dir), file=sys.stderr)
+    sys.exit(1)
+  expected_txt = Path(argv[4])
+  mainclass, all_files = base.create_all_test_files()
+
+  with expected_txt.open('w') as out:
+    print(mainclass.get_expected(), file=out)
+  print("Wrote expected output")
+
+  Compiler(all_files, javac_exec, temp_dir, classes_dir).execute()
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/968-default-partial-compile-generated/util-src/generate_smali.py b/test/968-default-partial-compile-generated/util-src/generate_smali.py
new file mode 100755
index 0000000..9855bcf
--- /dev/null
+++ b/test/968-default-partial-compile-generated/util-src/generate_smali.py
@@ -0,0 +1,607 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate Smali test files for test 967.
+"""
+
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import utils and mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+from testgen.utils import get_copyright, subtree_sizes, gensym, filter_blanks
+import testgen.mixins as mixins
+
+from enum import Enum
+from functools import total_ordering
+import itertools
+import string
+
+# The max depth the type tree can have.
+MAX_IFACE_DEPTH = 3
+
+class MainClass(mixins.DumpMixin, mixins.Named, mixins.SmaliFileMixin):
+  """
+  A Main.smali file containing the Main class and the main function. It will run
+  all the test functions we have.
+  """
+
+  MAIN_CLASS_TEMPLATE = """{copyright}
+
+.class public LMain;
+.super Ljava/lang/Object;
+
+# class Main {{
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+{test_funcs}
+
+{main_func}
+
+# }}
+"""
+
+  MAIN_FUNCTION_TEMPLATE = """
+#   public static void main(String[] args) {{
+.method public static main([Ljava/lang/String;)V
+    .locals 0
+
+    {test_group_invoke}
+
+    return-void
+.end method
+#   }}
+"""
+
+  TEST_GROUP_INVOKE_TEMPLATE = """
+#     {test_name}();
+    invoke-static {{}}, {test_name}()V
+"""
+
+  def __init__(self):
+    """
+    Initialize this MainClass. We start out with no tests.
+    """
+    self.tests = set()
+
+  def get_expected(self):
+    """
+    Get the expected output of this test.
+    """
+    all_tests = sorted(self.tests)
+    return filter_blanks("\n".join(a.get_expected() for a in all_tests))
+
+  def add_test(self, ty):
+    """
+    Add a test for the concrete type 'ty'
+    """
+    self.tests.add(Func(ty))
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return "Main"
+
+  def __str__(self):
+    """
+    Print the MainClass smali code.
+    """
+    all_tests = sorted(self.tests)
+    test_invoke = ""
+    test_funcs = ""
+    for t in all_tests:
+      test_funcs += str(t)
+    for t in all_tests:
+      test_invoke += self.TEST_GROUP_INVOKE_TEMPLATE.format(test_name=t.get_name())
+    main_func = self.MAIN_FUNCTION_TEMPLATE.format(test_group_invoke=test_invoke)
+
+    return self.MAIN_CLASS_TEMPLATE.format(copyright = get_copyright("smali"),
+                                           test_funcs = test_funcs,
+                                           main_func = main_func)
+
+class Func(mixins.Named, mixins.NameComparableMixin):
+  """
+  A function that tests the functionality of a concrete type. Should only be
+  constructed by MainClass.add_test.
+  """
+
+  TEST_FUNCTION_TEMPLATE = """
+#   public static void {fname}() {{
+#     {farg} v = null;
+#     try {{
+#       v = new {farg}();
+#     }} catch (Throwable e) {{
+#       System.out.println("Unexpected error occurred which creating {farg} instance");
+#       e.printStackTrace(System.out);
+#       return;
+#     }}
+#     try {{
+#       System.out.printf("{tree} calls %s\\n", v.getName());
+#       return;
+#     }} catch (AbstractMethodError e) {{
+#       System.out.println("{tree} threw AbstractMethodError");
+#     }} catch (NoSuchMethodError e) {{
+#       System.out.println("{tree} threw NoSuchMethodError");
+#     }} catch (IncompatibleClassChangeError e) {{
+#       System.out.println("{tree} threw IncompatibleClassChangeError");
+#     }} catch (Throwable e) {{
+#       e.printStackTrace(System.out);
+#       return;
+#     }}
+#   }}
+.method public static {fname}()V
+    .locals 7
+    sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+    :new_{fname}_try_start
+      new-instance v0, L{farg};
+      invoke-direct {{v0}}, L{farg};-><init>()V
+      goto :call_{fname}_try_start
+    :new_{fname}_try_end
+    .catch Ljava/lang/Throwable; {{:new_{fname}_try_start .. :new_{fname}_try_end}} :new_error_{fname}_start
+    :new_error_{fname}_start
+      move-exception v6
+      const-string v5, "Unexpected error occurred which creating {farg} instance"
+      invoke-virtual {{v4,v5}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      invoke-virtual {{v6,v4}}, Ljava/lang/Throwable;->printStackTrace(Ljava/io/PrintStream;)V
+      return-void
+    :call_{fname}_try_start
+      const/4 v1, 1
+      new-array v2,v1, [Ljava/lang/Object;
+      const/4 v1, 0
+      invoke-virtual {{v0}}, L{farg};->getName()Ljava/lang/String;
+      move-result-object v3
+      aput-object v3,v2,v1
+
+      const-string v5, "{tree} calls %s\\n"
+
+      invoke-virtual {{v4,v5,v2}}, Ljava/io/PrintStream;->printf(Ljava/lang/String;[Ljava/lang/Object;)Ljava/io/PrintStream;
+      return-void
+    :call_{fname}_try_end
+    .catch Ljava/lang/AbstractMethodError; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :AME_{fname}_start
+    .catch Ljava/lang/NoSuchMethodError; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :NSME_{fname}_start
+    .catch Ljava/lang/IncompatibleClassChangeError; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :ICCE_{fname}_start
+    .catch Ljava/lang/Throwable; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :error_{fname}_start
+    :AME_{fname}_start
+      const-string v5, "{tree} threw AbstractMethodError"
+      invoke-virtual {{v4,v5}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      return-void
+    :NSME_{fname}_start
+      const-string v5, "{tree} threw NoSuchMethodError"
+      invoke-virtual {{v4,v5}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      return-void
+    :ICCE_{fname}_start
+      const-string v5, "{tree} threw IncompatibleClassChangeError"
+      invoke-virtual {{v4,v5}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      return-void
+    :error_{fname}_start
+      move-exception v6
+      invoke-virtual {{v6,v4}}, Ljava/lang/Throwable;->printStackTrace(Ljava/io/PrintStream;)V
+      return-void
+.end method
+"""
+
+  NSME_RESULT_TEMPLATE = "{tree} threw NoSuchMethodError"
+  ICCE_RESULT_TEMPLATE = "{tree} threw IncompatibleClassChangeError"
+  AME_RESULT_TEMPLATE = "{tree} threw AbstractMethodError"
+  NORMAL_RESULT_TEMPLATE = "{tree} calls {result}"
+
+  def __init__(self, farg):
+    """
+    Initialize a test function for the given argument
+    """
+    self.farg = farg
+
+  def get_expected(self):
+    """
+    Get the expected output calling this function.
+    """
+    exp = self.farg.get_called()
+    if exp.is_empty():
+      return self.NSME_RESULT_TEMPLATE.format(tree = self.farg.get_tree())
+    elif exp.is_abstract():
+      return self.AME_RESULT_TEMPLATE.format(tree = self.farg.get_tree())
+    elif exp.is_conflict():
+      return self.ICCE_RESULT_TEMPLATE.format(tree = self.farg.get_tree())
+    else:
+      assert exp.is_default()
+      return self.NORMAL_RESULT_TEMPLATE.format(tree = self.farg.get_tree(),
+                                                result = exp.get_tree())
+
+  def get_name(self):
+    """
+    Get the name of this function
+    """
+    return "TEST_FUNC_{}".format(self.farg.get_name())
+
+  def __str__(self):
+    """
+    Print the smali code of this function.
+    """
+    return self.TEST_FUNCTION_TEMPLATE.format(tree = self.farg.get_tree(),
+                                              fname = self.get_name(),
+                                              farg = self.farg.get_name())
+
+class TestClass(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
+  """
+  A class that will be instantiated to test default method resolution order.
+  """
+
+  TEST_CLASS_TEMPLATE = """{copyright}
+
+.class public L{class_name};
+.super Ljava/lang/Object;
+.implements L{iface_name};
+
+# public class {class_name} implements {iface_name} {{
+
+.method public constructor <init>()V
+  .registers 1
+  invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+  return-void
+.end method
+
+{funcs}
+
+# }}
+"""
+
+  def __init__(self, iface):
+    """
+    Initialize this test class which implements the given interface
+    """
+    self.iface = iface
+    self.class_name = "CLASS_"+gensym()
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return self.class_name
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{class_name} {iface_tree}]".format(class_name = self.class_name,
+                                                iface_tree = self.iface.get_tree())
+
+  def __iter__(self):
+    """
+    Step through all interfaces implemented transitively by this class
+    """
+    yield self.iface
+    yield from self.iface
+
+  def get_called(self):
+    """
+    Returns the interface that will be called when the method on this class is invoked or
+    CONFLICT_TYPE if there is no interface that will be called.
+    """
+    return self.iface.get_called()
+
+  def __str__(self):
+    """
+    Print the smali code of this class.
+    """
+    return self.TEST_CLASS_TEMPLATE.format(copyright = get_copyright('smali'),
+                                           iface_name = self.iface.get_name(),
+                                           tree = self.get_tree(),
+                                           class_name = self.class_name,
+                                           funcs = "")
+
+class InterfaceType(Enum):
+  """
+  An enumeration of all the different types of interfaces we can have.
+
+  default: It has a default method
+  abstract: It has a method declared but not defined
+  empty: It does not have the method
+  """
+  default = 0
+  abstract = 1
+  empty = 2
+
+  def get_suffix(self):
+    if self == InterfaceType.default:
+      return "_DEFAULT"
+    elif self == InterfaceType.abstract:
+      return "_ABSTRACT"
+    elif self == InterfaceType.empty:
+      return "_EMPTY"
+    else:
+      raise TypeError("Interface type had illegal value.")
+
+class ConflictInterface:
+  """
+  A singleton representing a conflict of default methods.
+  """
+
+  def is_conflict(self):
+    """
+    Returns true if this is a conflict interface and calling the method on this interface will
+    result in an IncompatibleClassChangeError.
+    """
+    return True
+
+  def is_abstract(self):
+    """
+    Returns true if this is an abstract interface and calling the method on this interface will
+    result in an AbstractMethodError.
+    """
+    return False
+
+  def is_empty(self):
+    """
+    Returns true if this is an abstract interface and calling the method on this interface will
+    result in a NoSuchMethodError.
+    """
+    return False
+
+  def is_default(self):
+    """
+    Returns true if this is a default interface and calling the method on this interface will
+    result in a method actually being called.
+    """
+    return False
+
+CONFLICT_TYPE = ConflictInterface()
+
+class TestInterface(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
+  """
+  An interface that will be used to test default method resolution order.
+  """
+
+  TEST_INTERFACE_TEMPLATE = """{copyright}
+.class public abstract interface L{class_name};
+.super Ljava/lang/Object;
+{implements_spec}
+
+# public interface {class_name} {extends} {ifaces} {{
+
+{funcs}
+
+# }}
+"""
+
+  DEFAULT_FUNC_TEMPLATE = """
+#   public default String getName() {{
+#     return "{tree}";
+#   }}
+.method public getName()Ljava/lang/String;
+  .locals 1
+  const-string v0, "{tree}"
+  return-object v0
+.end method
+"""
+
+  ABSTRACT_FUNC_TEMPLATE = """
+#   public String getName();
+.method public abstract getName()Ljava/lang/String;
+.end method
+"""
+
+  EMPTY_FUNC_TEMPLATE = """"""
+
+  IMPLEMENTS_TEMPLATE = """
+.implements L{iface_name};
+"""
+
+  def __init__(self, ifaces, iface_type, full_name = None):
+    """
+    Initialize interface with the given super-interfaces
+    """
+    self.ifaces = sorted(ifaces)
+    self.iface_type = iface_type
+    if full_name is None:
+      end = self.iface_type.get_suffix()
+      self.class_name = "INTERFACE_"+gensym()+end
+    else:
+      self.class_name = full_name
+
+  def get_specific_version(self, v):
+    """
+    Returns a copy of this interface of the given type for use in partial compilation.
+    """
+    return TestInterface(self.ifaces, v, full_name = self.class_name)
+
+  def get_super_types(self):
+    """
+    Returns a set of all the supertypes of this interface
+    """
+    return set(i2 for i2 in self)
+
+  def is_conflict(self):
+    """
+    Returns true if this is a conflict interface and calling the method on this interface will
+    result in an IncompatibleClassChangeError.
+    """
+    return False
+
+  def is_abstract(self):
+    """
+    Returns true if this is an abstract interface and calling the method on this interface will
+    result in an AbstractMethodError.
+    """
+    return self.iface_type == InterfaceType.abstract
+
+  def is_empty(self):
+    """
+    Returns true if this is an abstract interface and calling the method on this interface will
+    result in a NoSuchMethodError.
+    """
+    return self.iface_type == InterfaceType.empty
+
+  def is_default(self):
+    """
+    Returns true if this is a default interface and calling the method on this interface will
+    result in a method actually being called.
+    """
+    return self.iface_type == InterfaceType.default
+
+  def get_called(self):
+    """
+    Returns the interface that will be called when the method on this class is invoked or
+    CONFLICT_TYPE if there is no interface that will be called.
+    """
+    if not self.is_empty() or len(self.ifaces) == 0:
+      return self
+    else:
+      best = self
+      for super_iface in self.ifaces:
+        super_best = super_iface.get_called()
+        if super_best.is_conflict():
+          return CONFLICT_TYPE
+        elif best.is_default():
+          if super_best.is_default():
+            return CONFLICT_TYPE
+        elif best.is_abstract():
+          if super_best.is_default():
+            best = super_best
+        else:
+          assert best.is_empty()
+          best = super_best
+      return best
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return self.class_name
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{class_name} {iftree}]".format(class_name = self.get_name(),
+                                            iftree = print_tree(self.ifaces))
+
+  def __iter__(self):
+    """
+    Performs depth-first traversal of the interface tree this interface is the
+    root of. Does not filter out repeats.
+    """
+    for i in self.ifaces:
+      yield i
+      yield from i
+
+  def __str__(self):
+    """
+    Print the smali code of this interface.
+    """
+    s_ifaces = " "
+    j_ifaces = " "
+    for i in self.ifaces:
+      s_ifaces += self.IMPLEMENTS_TEMPLATE.format(iface_name = i.get_name())
+      j_ifaces += " {},".format(i.get_name())
+    j_ifaces = j_ifaces[0:-1]
+    if self.is_default():
+      funcs = self.DEFAULT_FUNC_TEMPLATE.format(tree = self.get_tree())
+    elif self.is_abstract():
+      funcs = self.ABSTRACT_FUNC_TEMPLATE.format()
+    else:
+      funcs = ""
+    return self.TEST_INTERFACE_TEMPLATE.format(copyright = get_copyright('smali'),
+                                               implements_spec = s_ifaces,
+                                               extends = "extends" if len(self.ifaces) else "",
+                                               ifaces = j_ifaces,
+                                               funcs = funcs,
+                                               tree = self.get_tree(),
+                                               class_name = self.class_name)
+
+def print_tree(ifaces):
+  """
+  Prints a list of iface trees
+  """
+  return " ".join(i.get_tree() for i in ifaces)
+
+# The deduplicated output of subtree_sizes for each size up to
+# MAX_LEAF_IFACE_PER_OBJECT.
+SUBTREES = [set(tuple(sorted(l)) for l in subtree_sizes(i))
+            for i in range(MAX_IFACE_DEPTH + 1)]
+
+def create_test_classes():
+  """
+  Yield all the test classes with the different interface trees
+  """
+  for num in range(1, MAX_IFACE_DEPTH + 1):
+    for iface in create_interface_trees(num):
+      yield TestClass(iface)
+
+def create_interface_trees(num):
+  """
+  Yield all the interface trees up to 'num' depth.
+  """
+  if num == 0:
+    for iftype in InterfaceType:
+      yield TestInterface(tuple(), iftype)
+    return
+  for split in SUBTREES[num]:
+    ifaces = []
+    for sub in split:
+      ifaces.append(list(create_interface_trees(sub)))
+    yield TestInterface(tuple(), InterfaceType.default)
+    for supers in itertools.product(*ifaces):
+      for iftype in InterfaceType:
+        if iftype == InterfaceType.default:
+          # We can just stop at defaults. We have other tests that a default can override an
+          # abstract and this cuts down on the number of cases significantly, improving speed of
+          # this test.
+          continue
+        yield TestInterface(supers, iftype)
+
+def create_all_test_files():
+  """
+  Creates all the objects representing the files in this test. They just need to
+  be dumped.
+  """
+  mc = MainClass()
+  classes = {mc}
+  for clazz in create_test_classes():
+    classes.add(clazz)
+    for i in clazz:
+      classes.add(i)
+    mc.add_test(clazz)
+  return mc, classes
+
+def main(argv):
+  smali_dir = Path(argv[1])
+  if not smali_dir.exists() or not smali_dir.is_dir():
+    print("{} is not a valid smali dir".format(smali_dir), file=sys.stderr)
+    sys.exit(1)
+  expected_txt = Path(argv[2])
+  mainclass, all_files = create_all_test_files()
+  with expected_txt.open('w') as out:
+    print(mainclass.get_expected(), file=out)
+  for f in all_files:
+    f.dump(smali_dir)
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/969-iface-super/build b/test/969-iface-super/build
new file mode 100755
index 0000000..e8f4ed0
--- /dev/null
+++ b/test/969-iface-super/build
@@ -0,0 +1,30 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm
+  # Hard-wired use of experimental jack.
+  # TODO: fix this temporary work-around for default-methods, see b/19467889
+  export USE_JACK=true
+fi
+
+# Generate the Main.java file or fail
+${ANDROID_BUILD_TOP}/art/test/utils/python/generate_java_main.py ./src
+
+./default-build "$@" --experimental default-methods
diff --git a/test/969-iface-super/expected.txt b/test/969-iface-super/expected.txt
new file mode 100644
index 0000000..f7a63d6
--- /dev/null
+++ b/test/969-iface-super/expected.txt
@@ -0,0 +1,47 @@
+Testing for type A
+A-virtual           A.SayHi()='Hello '
+A-interface     Iface.SayHi()='Hello '
+End testing for type A
+Testing for type B
+B-virtual           B.SayHi()='Hello Hello '
+B-interface     Iface.SayHi()='Hello Hello '
+B-interface    Iface2.SayHi()='Hello Hello '
+End testing for type B
+Testing for type C
+C-virtual           C.SayHi()='Hello  and welcome '
+C-interface     Iface.SayHi()='Hello  and welcome '
+End testing for type C
+Testing for type D
+D-virtual           D.SayHi()='Hello Hello  and welcome '
+D-interface     Iface.SayHi()='Hello Hello  and welcome '
+D-interface    Iface2.SayHi()='Hello Hello  and welcome '
+End testing for type D
+Testing for type E
+E-virtual           E.SayHi()='Hello  there!'
+E-interface     Iface.SayHi()='Hello  there!'
+E-interface    Iface3.SayHi()='Hello  there!'
+End testing for type E
+Testing for type F
+F-virtual           E.SayHi()='Hello  there!'
+F-virtual           F.SayHi()='Hello  there!'
+F-interface     Iface.SayHi()='Hello  there!'
+F-interface    Iface3.SayHi()='Hello  there!'
+F-virtual           F.SaySurprisedHi()='Hello  there!!'
+End testing for type F
+Testing for type G
+G-virtual           E.SayHi()='Hello  there!?'
+G-virtual           F.SayHi()='Hello  there!?'
+G-virtual           G.SayHi()='Hello  there!?'
+G-interface     Iface.SayHi()='Hello  there!?'
+G-interface    Iface3.SayHi()='Hello  there!?'
+G-virtual           F.SaySurprisedHi()='Hello  there!!'
+G-virtual           G.SaySurprisedHi()='Hello  there!!'
+G-virtual           G.SayVerySurprisedHi()='Hello  there!!!'
+End testing for type G
+Testing for type H
+H-virtual           H.SayConfusedHi()='Hello ?!'
+H-virtual           A.SayHi()='Hello ?'
+H-virtual           H.SayHi()='Hello ?'
+H-interface     Iface.SayHi()='Hello ?'
+H-virtual           H.SaySurprisedHi()='Hello !'
+End testing for type H
diff --git a/test/969-iface-super/info.txt b/test/969-iface-super/info.txt
new file mode 100644
index 0000000..c0555d2
--- /dev/null
+++ b/test/969-iface-super/info.txt
@@ -0,0 +1,6 @@
+Smali-based tests for experimental interface static methods.
+
+This tests invoke-super with default methods.
+
+To run with --jvm you must export JAVA_HOME to a java-8 installation and pass
+the --use-java-home to run-test
diff --git a/test/969-iface-super/src/A.java b/test/969-iface-super/src/A.java
new file mode 100644
index 0000000..47db14b
--- /dev/null
+++ b/test/969-iface-super/src/A.java
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public class A implements Iface { }
diff --git a/test/969-iface-super/src/B.java b/test/969-iface-super/src/B.java
new file mode 100644
index 0000000..70f63a2
--- /dev/null
+++ b/test/969-iface-super/src/B.java
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public class B implements Iface2 { }
diff --git a/test/969-iface-super/src/C.java b/test/969-iface-super/src/C.java
new file mode 100644
index 0000000..0fa0b92
--- /dev/null
+++ b/test/969-iface-super/src/C.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public class C implements Iface {
+  public String SayHi() {
+    return Iface.super.SayHi() + " and welcome ";
+  }
+}
diff --git a/test/969-iface-super/src/D.java b/test/969-iface-super/src/D.java
new file mode 100644
index 0000000..8a607c3
--- /dev/null
+++ b/test/969-iface-super/src/D.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public class D implements Iface2 {
+  public String SayHi() {
+    return Iface2.super.SayHi() + " and welcome ";
+  }
+}
diff --git a/test/969-iface-super/src/E.java b/test/969-iface-super/src/E.java
new file mode 100644
index 0000000..d5942b2
--- /dev/null
+++ b/test/969-iface-super/src/E.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public class E implements Iface3 {
+  public String SayHi() {
+    return Iface3.super.SayHi() + " there!";
+  }
+}
diff --git a/test/969-iface-super/src/F.java b/test/969-iface-super/src/F.java
new file mode 100644
index 0000000..610bcb1
--- /dev/null
+++ b/test/969-iface-super/src/F.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public class F extends E {
+  public String SaySurprisedHi() {
+    return super.SayHi() + "!";
+  }
+}
diff --git a/test/969-iface-super/src/G.java b/test/969-iface-super/src/G.java
new file mode 100644
index 0000000..edaf3a9
--- /dev/null
+++ b/test/969-iface-super/src/G.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public class G extends F {
+  public String SayHi() {
+    return super.SayHi() + "?";
+  }
+  public String SayVerySurprisedHi() {
+    return super.SaySurprisedHi() + "!";
+  }
+}
diff --git a/test/969-iface-super/src/H.java b/test/969-iface-super/src/H.java
new file mode 100644
index 0000000..744bda6
--- /dev/null
+++ b/test/969-iface-super/src/H.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public class H extends A {
+  public String SayHi() {
+    return super.SayHi() + "?";
+  }
+  public String SaySurprisedHi() {
+    return super.SayHi() + "!";
+  }
+  public String SayConfusedHi() {
+    return SayHi() + "!";
+  }
+}
diff --git a/test/969-iface-super/src/Iface.java b/test/969-iface-super/src/Iface.java
new file mode 100644
index 0000000..ece5e59
--- /dev/null
+++ b/test/969-iface-super/src/Iface.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Iface {
+  public default String SayHi() {
+    return "Hello ";
+  }
+}
diff --git a/test/969-iface-super/src/Iface2.java b/test/969-iface-super/src/Iface2.java
new file mode 100644
index 0000000..d74ee6d
--- /dev/null
+++ b/test/969-iface-super/src/Iface2.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Iface2 extends Iface {
+  public default String SayHi() {
+    return Iface.super.SayHi() + Iface.super.SayHi();
+  }
+}
diff --git a/test/969-iface-super/src/Iface3.java b/test/969-iface-super/src/Iface3.java
new file mode 100644
index 0000000..10b010c
--- /dev/null
+++ b/test/969-iface-super/src/Iface3.java
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface Iface3 extends Iface { }
diff --git a/test/969-iface-super/src/classes.xml b/test/969-iface-super/src/classes.xml
new file mode 100644
index 0000000..4c3dae4
--- /dev/null
+++ b/test/969-iface-super/src/classes.xml
@@ -0,0 +1,99 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Copyright 2015 The Android Open Source Project
+
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+          http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS,
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+-->
+
+<data>
+  <classes>
+    <class name="A" super="java/lang/Object">
+      <implements>
+        <item>Iface</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="B" super="java/lang/Object">
+      <implements>
+        <item>Iface2</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="C" super="java/lang/Object">
+      <implements>
+        <item>Iface</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="D" super="java/lang/Object">
+      <implements>
+        <item>Iface2</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="E" super="java/lang/Object">
+      <implements>
+        <item>Iface3</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="F" super="E">
+      <implements> </implements>
+      <methods>
+        <item>SaySurprisedHi</item>
+      </methods>
+    </class>
+
+    <class name="G" super="F">
+      <implements> </implements>
+      <methods>
+        <item>SayVerySurprisedHi</item>
+      </methods>
+    </class>
+
+    <class name="H" super="A">
+      <implements> </implements>
+      <methods>
+        <item>SaySurprisedHi</item>
+        <item>SayConfusedHi</item>
+      </methods>
+    </class>
+  </classes>
+
+  <interfaces>
+    <interface name="Iface" super="java/lang/Object">
+      <implements> </implements>
+      <methods>
+        <item>SayHi</item>
+      </methods>
+    </interface>
+
+    <interface name="Iface2" super="java/lang/Object">
+      <implements>
+        <item>Iface</item>
+      </implements>
+      <methods> </methods>
+    </interface>
+
+    <interface name="Iface3" super="java/lang/Object">
+      <implements>
+        <item>Iface</item>
+      </implements>
+      <methods> </methods>
+    </interface>
+  </interfaces>
+</data>
diff --git a/test/970-iface-super-resolution-generated/build b/test/970-iface-super-resolution-generated/build
new file mode 100755
index 0000000..fd1b271
--- /dev/null
+++ b/test/970-iface-super-resolution-generated/build
@@ -0,0 +1,55 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+# We will be making more files than the ulimit is set to allow. Remove it temporarily.
+OLD_ULIMIT=`ulimit -S`
+ulimit -S unlimited
+
+restore_ulimit() {
+  ulimit -S "$OLD_ULIMIT"
+}
+trap 'restore_ulimit' ERR
+
+# Should we compile with Java source code. By default we will use Smali.
+USES_JAVA_SOURCE="false"
+if [[ $@ == *"--jvm"* ]]; then
+  USES_JAVA_SOURCE="true"
+elif [[ "$USE_JACK" == "true" ]]; then
+  if $JACK -D jack.java.source.version=1.8 -D jack.android.min-api-level=24 2>/dev/null; then
+    USES_JAVA_SOURCE="true"
+  else
+    echo "WARNING: Cannot use jack because it does not support JLS 1.8. Falling back to smali" >&2
+  fi
+fi
+
+if [[ "$USES_JAVA_SOURCE" == "true" ]]; then
+  # Build the Java files
+  mkdir -p src
+  mkdir -p src2
+  ./util-src/generate_java.py ./src2 ./src ./expected.txt
+else
+  # Generate the smali files and expected.txt or fail
+  mkdir -p smali
+  ./util-src/generate_smali.py ./smali ./expected.txt
+fi
+
+./default-build "$@" --experimental default-methods
+
+# Reset the ulimit back to its initial value
+restore_ulimit
diff --git a/test/970-iface-super-resolution-generated/expected.txt b/test/970-iface-super-resolution-generated/expected.txt
new file mode 100644
index 0000000..1ddd65d
--- /dev/null
+++ b/test/970-iface-super-resolution-generated/expected.txt
@@ -0,0 +1 @@
+This file is generated by util-src/generate_smali.py do not directly modify!
diff --git a/test/970-iface-super-resolution-generated/info.txt b/test/970-iface-super-resolution-generated/info.txt
new file mode 100644
index 0000000..2cd2cc7
--- /dev/null
+++ b/test/970-iface-super-resolution-generated/info.txt
@@ -0,0 +1,17 @@
+Smali-based tests for experimental interface default methods.
+
+This tests that interface method resolution order is correct.
+
+Obviously needs to run under ART or a Java 8 Language runtime and compiler.
+
+When run smali test files are generated by the util-src/generate_smali.py
+script.  If we run with --jvm we will use the
+$(ANDROID_BUILD_TOP)/art/tools/extract-embedded-java script to turn the smali
+into equivalent Java using the embedded Java code.
+
+Care should be taken when updating the generate_smali.py script. It should always
+return equivalent output when run multiple times and the expected output should
+be valid.
+
+Do not modify the expected.txt file. It is generated on each run by
+util-src/generate_smali.py.
diff --git a/test/970-iface-super-resolution-generated/util-src/generate_java.py b/test/970-iface-super-resolution-generated/util-src/generate_java.py
new file mode 100755
index 0000000..c12f10d
--- /dev/null
+++ b/test/970-iface-super-resolution-generated/util-src/generate_java.py
@@ -0,0 +1,77 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate java test files for test 966.
+"""
+
+import generate_smali as base
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+import testgen.mixins as mixins
+
+class JavaConverter(mixins.DumpMixin, mixins.Named, mixins.JavaFileMixin):
+  """
+  A class that can convert a SmaliFile to a JavaFile.
+  """
+  def __init__(self, inner):
+    self.inner = inner
+
+  def get_name(self):
+    return self.inner.get_name()
+
+  def __str__(self):
+    out = ""
+    for line in str(self.inner).splitlines(keepends = True):
+      if line.startswith("#"):
+        out += line[1:]
+    return out
+
+def main(argv):
+  final_java_dir = Path(argv[1])
+  if not final_java_dir.exists() or not final_java_dir.is_dir():
+    print("{} is not a valid java dir".format(final_java_dir), file=sys.stderr)
+    sys.exit(1)
+  initial_java_dir = Path(argv[2])
+  if not initial_java_dir.exists() or not initial_java_dir.is_dir():
+    print("{} is not a valid java dir".format(initial_java_dir), file=sys.stderr)
+    sys.exit(1)
+  expected_txt = Path(argv[3])
+  mainclass, all_files = base.create_all_test_files()
+  with expected_txt.open('w') as out:
+    print(mainclass.get_expected(), file=out)
+  for f in all_files:
+    if f.initial_build_different():
+      JavaConverter(f).dump(final_java_dir)
+      JavaConverter(f.get_initial_build_version()).dump(initial_java_dir)
+    else:
+      JavaConverter(f).dump(initial_java_dir)
+      if isinstance(f, base.TestInterface):
+        JavaConverter(f).dump(final_java_dir)
+
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/970-iface-super-resolution-generated/util-src/generate_smali.py b/test/970-iface-super-resolution-generated/util-src/generate_smali.py
new file mode 100755
index 0000000..cb7b0fa
--- /dev/null
+++ b/test/970-iface-super-resolution-generated/util-src/generate_smali.py
@@ -0,0 +1,614 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate Smali test files for test 966.
+"""
+
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import utils and mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+from testgen.utils import get_copyright, subtree_sizes, gensym, filter_blanks
+import testgen.mixins as mixins
+
+from functools import total_ordering
+import itertools
+import string
+
+# The max depth the tree can have.
+MAX_IFACE_DEPTH = 3
+
+class MainClass(mixins.DumpMixin, mixins.Named, mixins.SmaliFileMixin):
+  """
+  A Main.smali file containing the Main class and the main function. It will run
+  all the test functions we have.
+  """
+
+  MAIN_CLASS_TEMPLATE = """{copyright}
+
+.class public LMain;
+.super Ljava/lang/Object;
+
+# class Main {{
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+{test_groups}
+
+{main_func}
+
+# }}
+"""
+
+  MAIN_FUNCTION_TEMPLATE = """
+#   public static void main(String[] args) {{
+.method public static main([Ljava/lang/String;)V
+    .locals 2
+
+    {test_group_invoke}
+
+    return-void
+.end method
+#   }}
+"""
+
+  TEST_GROUP_INVOKE_TEMPLATE = """
+#     {test_name}();
+    invoke-static {{}}, {test_name}()V
+"""
+
+  def __init__(self):
+    """
+    Initialize this MainClass. We start out with no tests.
+    """
+    self.tests = set()
+
+  def add_test(self, ty):
+    """
+    Add a test for the concrete type 'ty'
+    """
+    self.tests.add(Func(ty))
+
+  def get_expected(self):
+    """
+    Get the expected output of this test.
+    """
+    all_tests = sorted(self.tests)
+    return filter_blanks("\n".join(a.get_expected() for a in all_tests))
+
+  def initial_build_different(self):
+    return False
+
+  def get_name(self):
+    """
+    Gets the name of this class
+    """
+    return "Main"
+
+  def __str__(self):
+    """
+    Print the smali code for this test.
+    """
+    all_tests = sorted(self.tests)
+    test_invoke = ""
+    test_groups = ""
+    for t in all_tests:
+      test_groups += str(t)
+    for t in all_tests:
+      test_invoke += self.TEST_GROUP_INVOKE_TEMPLATE.format(test_name=t.get_name())
+    main_func = self.MAIN_FUNCTION_TEMPLATE.format(test_group_invoke=test_invoke)
+
+    return self.MAIN_CLASS_TEMPLATE.format(copyright = get_copyright('smali'),
+                                           test_groups = test_groups,
+                                           main_func = main_func)
+
+class Func(mixins.Named, mixins.NameComparableMixin):
+  """
+  A function that tests the functionality of a concrete type. Should only be
+  constructed by MainClass.add_test.
+  """
+
+  TEST_FUNCTION_TEMPLATE = """
+#   public static void {fname}() {{
+#     try {{
+#       {farg} v = new {farg}();
+#       System.out.println("Testing {tree}");
+#       v.testAll();
+#       System.out.println("Success: testing {tree}");
+#       return;
+#     }} catch (Exception e) {{
+#       System.out.println("Failure: testing {tree}");
+#       e.printStackTrace(System.out);
+#       return;
+#     }}
+#   }}
+.method public static {fname}()V
+    .locals 7
+    :call_{fname}_try_start
+      sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+      new-instance v6, L{farg};
+      invoke-direct {{v6}}, L{farg};-><init>()V
+
+      const-string v3, "Testing {tree}"
+      invoke-virtual {{v2, v3}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+      invoke-virtual {{v6}}, L{farg};->testAll()V
+
+      const-string v3, "Success: testing {tree}"
+      invoke-virtual {{v2, v3}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+      return-void
+    :call_{fname}_try_end
+    .catch Ljava/lang/Exception; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :error_{fname}_start
+    :error_{fname}_start
+      move-exception v3
+      sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+      const-string v4, "Failure: testing {tree}"
+      invoke-virtual {{v2, v3}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      invoke-virtual {{v3,v2}}, Ljava/lang/Error;->printStackTrace(Ljava/io/PrintStream;)V
+      return-void
+.end method
+"""
+
+  OUTPUT_FORMAT = """
+Testing {tree}
+{test_output}
+Success: testing {tree}
+""".strip()
+
+  def __init__(self, farg):
+    """
+    Initialize a test function for the given argument
+    """
+    self.farg = farg
+
+  def __str__(self):
+    """
+    Print the smali code for this test function.
+    """
+    return self.TEST_FUNCTION_TEMPLATE.format(fname = self.get_name(),
+                                              farg  = self.farg.get_name(),
+                                              tree  = self.farg.get_tree())
+
+  def get_name(self):
+    """
+    Gets the name of this test function
+    """
+    return "TEST_FUNC_{}".format(self.farg.get_name())
+
+  def get_expected(self):
+    """
+    Get the expected output of this function.
+    """
+    return self.OUTPUT_FORMAT.format(
+        tree = self.farg.get_tree(),
+        test_output = self.farg.get_test_output().strip())
+
+class TestClass(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
+  """
+  A class that will be instantiated to test interface initialization order.
+  """
+
+  TEST_CLASS_TEMPLATE = """{copyright}
+
+.class public L{class_name};
+.super Ljava/lang/Object;
+{implements_spec}
+
+# public class {class_name} implements {ifaces} {{
+#
+#   public {class_name}() {{
+#   }}
+.method public constructor <init>()V
+  .locals 2
+  invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+  return-void
+.end method
+
+#   public String getCalledInterface() {{
+#     throw new Error("Should not be called");
+#   }}
+.method public getCalledInterface()V
+  .locals 2
+  const-string v0, "Should not be called"
+  new-instance v1, Ljava/lang/Error;
+  invoke-direct {{v1, v0}}, Ljava/lang/Error;-><init>(Ljava/lang/String;)V
+  throw v1
+.end method
+
+#   public void testAll() {{
+#     boolean failed = false;
+#     Error exceptions = new Error("Test failures");
+.method public testAll()V
+  .locals 5
+  const/4 v0, 0
+  const-string v1, "Test failures"
+  new-instance v2, Ljava/lang/Error;
+  invoke-direct {{v2, v1}}, Ljava/lang/Error;-><init>(Ljava/lang/String;)V
+
+  {test_calls}
+
+#     if (failed) {{
+  if-eqz v0, :end
+#       throw exceptions;
+    throw v2
+  :end
+#     }}
+  return-void
+#   }}
+.end method
+
+{test_funcs}
+
+# }}
+"""
+
+  IMPLEMENTS_TEMPLATE = """
+.implements L{iface_name};
+"""
+
+  TEST_CALL_TEMPLATE = """
+#     try {{
+#       test_{iface}_super();
+#     }} catch (Throwable t) {{
+#       exceptions.addSuppressed(t);
+#       failed = true;
+#     }}
+  :try_{iface}_start
+    invoke-virtual {{p0}}, L{class_name};->test_{iface}_super()V
+    goto :error_{iface}_end
+  :try_{iface}_end
+  .catch Ljava/lang/Throwable; {{:try_{iface}_start .. :try_{iface}_end}} :error_{iface}_start
+  :error_{iface}_start
+    move-exception v3
+    invoke-virtual {{v2, v3}}, Ljava/lang/Throwable;->addSuppressed(Ljava/lang/Throwable;)V
+    const/4 v0, 1
+  :error_{iface}_end
+"""
+
+  TEST_FUNC_TEMPLATE = """
+#   public void test_{iface}_super() {{
+#     try {{
+#       System.out.println("{class_name} -> {iface}.super.getCalledInterface(): " +
+#                          {iface}.super.getCalledInterface());
+#     }} catch (NoSuchMethodError e) {{
+#       System.out.println("{class_name} -> {iface}.super.getCalledInterface(): NoSuchMethodError");
+#     }} catch (IncompatibleClassChangeError e) {{
+#       System.out.println("{class_name} -> {iface}.super.getCalledInterface(): IncompatibleClassChangeError");
+#     }} catch (Throwable t) {{
+#       System.out.println("{class_name} -> {iface}.super.getCalledInterface(): Unknown error occurred");
+#       throw t;
+#     }}
+#   }}
+.method public test_{iface}_super()V
+  .locals 3
+  sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+  :try_start
+    const-string v1, "{class_name} -> {iface}.super.getCalledInterface(): "
+    invoke-super {{p0}}, L{iface};->getCalledInterface()Ljava/lang/String;
+    move-result-object v2
+
+    invoke-virtual {{v1, v2}}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v1
+
+    invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    return-void
+  :try_end
+  .catch Ljava/lang/NoSuchMethodError; {{:try_start .. :try_end}} :AME_catch
+  .catch Ljava/lang/IncompatibleClassChangeError; {{:try_start .. :try_end}} :ICCE_catch
+  .catch Ljava/lang/Throwable; {{:try_start .. :try_end}} :throwable_catch
+  :AME_catch
+    const-string v1, "{class_name} -> {iface}.super.getCalledInterface(): NoSuchMethodError"
+    invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+  :ICCE_catch
+    const-string v1, "{class_name} -> {iface}.super.getCalledInterface(): IncompatibleClassChangeError"
+    invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+  :throwable_catch
+    move-exception v2
+    const-string v1, "{class_name} -> {iface}.super.getCalledInterface(): Unknown error occurred"
+    invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    throw v2
+.end method
+""".strip()
+
+  OUTPUT_TEMPLATE = "{class_name} -> {iface}.super.getCalledInterface(): {result}"
+
+  def __init__(self, ifaces, name = None):
+    """
+    Initialize this test class which implements the given interfaces
+    """
+    self.ifaces = ifaces
+    if name is None:
+      self.class_name = "CLASS_"+gensym()
+    else:
+      self.class_name = name
+
+  def get_initial_build_version(self):
+    """
+    Returns a version of this class that can be used for the initial build (meaning no compiler
+    checks will be triggered).
+    """
+    return TestClass([i.get_initial_build_version() for i in self.ifaces], self.class_name)
+
+  def initial_build_different(self):
+    return False
+
+  def get_name(self):
+    """
+    Gets the name of this interface
+    """
+    return self.class_name
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{fname} {iftree}]".format(fname = self.get_name(), iftree = print_tree(self.ifaces))
+
+  def get_test_output(self):
+    return '\n'.join(map(lambda a: self.OUTPUT_TEMPLATE.format(class_name = self.get_name(),
+                                                               iface = a.get_name(),
+                                                               result = a.get_output()),
+                         self.ifaces))
+
+  def __str__(self):
+    """
+    Print the smali code for this class.
+    """
+    funcs = '\n'.join(map(lambda a: self.TEST_FUNC_TEMPLATE.format(iface = a.get_name(),
+                                                                   class_name = self.get_name()),
+                          self.ifaces))
+    calls = '\n'.join(map(lambda a: self.TEST_CALL_TEMPLATE.format(iface = a.get_name(),
+                                                                   class_name = self.get_name()),
+                          self.ifaces))
+    s_ifaces = '\n'.join(map(lambda a: self.IMPLEMENTS_TEMPLATE.format(iface_name = a.get_name()),
+                             self.ifaces))
+    j_ifaces = ', '.join(map(lambda a: a.get_name(), self.ifaces))
+    return self.TEST_CLASS_TEMPLATE.format(copyright = get_copyright('smali'),
+                                           implements_spec = s_ifaces,
+                                           ifaces = j_ifaces,
+                                           class_name = self.class_name,
+                                           test_funcs = funcs,
+                                           test_calls = calls)
+
+class IncompatibleClassChangeErrorResult(mixins.Named):
+  def get_name(self):
+    return "IncompatibleClassChangeError"
+
+ICCE = IncompatibleClassChangeErrorResult()
+
+class NoSuchMethodErrorResult(mixins.Named):
+  def get_name(self):
+    return "NoSuchMethodError"
+
+NSME = NoSuchMethodErrorResult()
+
+class TestInterface(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
+  """
+  An interface that will be used to test default method resolution order.
+  """
+
+  TEST_INTERFACE_TEMPLATE = """{copyright}
+.class public abstract interface L{class_name};
+.super Ljava/lang/Object;
+{implements_spec}
+
+# public interface {class_name} {extends} {ifaces} {{
+
+{funcs}
+
+# }}
+"""
+
+  ABSTRACT_FUNC_TEMPLATE = """
+"""
+
+  DEFAULT_FUNC_TEMPLATE = """
+#   public default String getCalledInterface() {{
+#     return "{class_name}";
+#   }}
+.method public getCalledInterface()Ljava/lang/String;
+  .locals 1
+  const-string v0, "{class_name}"
+  return-object v0
+.end method
+"""
+
+  IMPLEMENTS_TEMPLATE = """
+.implements L{iface_name};
+"""
+
+  def __init__(self, ifaces, default, name = None):
+    """
+    Initialize interface with the given super-interfaces
+    """
+    self.ifaces = ifaces
+    self.default = default
+    if name is None:
+      end = "_DEFAULT" if default else ""
+      self.class_name = "INTERFACE_"+gensym()+end
+    else:
+      self.class_name = name
+
+  def get_initial_build_version(self):
+    """
+    Returns a version of this class that can be used for the initial build (meaning no compiler
+    checks will be triggered).
+    """
+    return TestInterface([i.get_initial_build_version() for i in self.ifaces],
+                         True,
+                         self.class_name)
+
+  def initial_build_different(self):
+    return not self.default
+
+  def get_name(self):
+    """
+    Gets the name of this interface
+    """
+    return self.class_name
+
+  def __iter__(self):
+    """
+    Performs depth-first traversal of the interface tree this interface is the
+    root of. Does not filter out repeats.
+    """
+    for i in self.ifaces:
+      yield i
+      yield from i
+
+  def get_called(self):
+    """
+    Get the interface whose default method would be called when calling the
+    CalledInterfaceName function.
+    """
+    all_ifaces = set(iface for iface in self if iface.default)
+    for i in all_ifaces:
+      if all(map(lambda j: i not in j.get_super_types(), all_ifaces)):
+        return i
+    return ICCE if any(map(lambda i: i.default, all_ifaces)) else NSME
+
+  def get_super_types(self):
+    """
+    Returns a set of all the supertypes of this interface
+    """
+    return set(i2 for i2 in self)
+
+  def get_output(self):
+    if self.default:
+      return self.get_name()
+    else:
+      return self.get_called().get_name()
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{class_name} {iftree}]".format(class_name = self.get_name(),
+                                            iftree = print_tree(self.ifaces))
+  def __str__(self):
+    """
+    Print the smali code for this interface.
+    """
+    s_ifaces = '\n'.join(map(lambda a: self.IMPLEMENTS_TEMPLATE.format(iface_name = a.get_name()),
+                             self.ifaces))
+    j_ifaces = ', '.join(map(lambda a: a.get_name(), self.ifaces))
+    if self.default:
+      funcs = self.DEFAULT_FUNC_TEMPLATE.format(class_name = self.class_name)
+    else:
+      funcs = self.ABSTRACT_FUNC_TEMPLATE
+    return self.TEST_INTERFACE_TEMPLATE.format(copyright = get_copyright('smali'),
+                                               implements_spec = s_ifaces,
+                                               extends = "extends" if len(self.ifaces) else "",
+                                               ifaces = j_ifaces,
+                                               funcs = funcs,
+                                               class_name = self.class_name)
+
+def dump_tree(ifaces):
+  """
+  Yields all the interfaces transitively implemented by the set in
+  reverse-depth-first order
+  """
+  for i in ifaces:
+    yield from dump_tree(i.ifaces)
+    yield i
+
+def print_tree(ifaces):
+  """
+  Prints the tree for the given ifaces.
+  """
+  return " ".join(i.get_tree() for i in  ifaces)
+
+# Cached output of subtree_sizes for speed of access.
+SUBTREES = [set(tuple(sorted(l)) for l in subtree_sizes(i)) for i in range(MAX_IFACE_DEPTH + 1)]
+
+def create_test_classes():
+  """
+  Yield all the test classes with the different interface trees
+  """
+  for num in range(1, MAX_IFACE_DEPTH + 1):
+    for split in SUBTREES[num]:
+      ifaces = []
+      for sub in split:
+        ifaces.append(list(create_interface_trees(sub)))
+    for supers in itertools.product(*ifaces):
+      yield TestClass(supers)
+
+def create_interface_trees(num):
+  """
+  Yield all the interface trees up to 'num' depth.
+  """
+  if num == 0:
+    yield TestInterface(tuple(), False)
+    yield TestInterface(tuple(), True)
+    return
+  for split in SUBTREES[num]:
+    ifaces = []
+    for sub in split:
+      ifaces.append(list(create_interface_trees(sub)))
+    for supers in itertools.product(*ifaces):
+      yield TestInterface(supers, False)
+      yield TestInterface(supers, True)
+      for selected in (set(dump_tree(supers)) - set(supers)):
+         yield TestInterface(tuple([selected] + list(supers)), True)
+         yield TestInterface(tuple([selected] + list(supers)), False)
+      # TODO Should add on some from higher up the tree.
+
+def create_all_test_files():
+  """
+  Creates all the objects representing the files in this test. They just need to
+  be dumped.
+  """
+  mc = MainClass()
+  classes = {mc}
+  for clazz in create_test_classes():
+    classes.add(clazz)
+    for i in dump_tree(clazz.ifaces):
+      classes.add(i)
+    mc.add_test(clazz)
+  return mc, classes
+
+def main(argv):
+  smali_dir = Path(argv[1])
+  if not smali_dir.exists() or not smali_dir.is_dir():
+    print("{} is not a valid smali dir".format(smali_dir), file=sys.stderr)
+    sys.exit(1)
+  expected_txt = Path(argv[2])
+  mainclass, all_files = create_all_test_files()
+  with expected_txt.open('w') as out:
+    print(mainclass.get_expected(), file=out)
+  for f in all_files:
+    f.dump(smali_dir)
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/971-iface-super/build b/test/971-iface-super/build
new file mode 100755
index 0000000..1e9f8aa
--- /dev/null
+++ b/test/971-iface-super/build
@@ -0,0 +1,50 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+# We will be making more files than the ulimit is set to allow. Remove it temporarily.
+OLD_ULIMIT=`ulimit -S`
+ulimit -S unlimited
+
+restore_ulimit() {
+  ulimit -S "$OLD_ULIMIT"
+}
+trap 'restore_ulimit' ERR
+
+# TODO: Support running with jack.
+
+if [[ $@ == *"--jvm"* ]]; then
+  # Build the Java files if we are running a --jvm test
+  mkdir -p classes
+  mkdir -p src
+  echo "${JAVAC} \$@" >> ./javac_exec.sh
+  # This will use java_exec.sh to execute the javac compiler. It will place the
+  # compiled class files in ./classes and the expected values in expected.txt
+  #
+  # After this the src directory will contain the final versions of all files.
+  ./util-src/generate_java.py ./javac_exec.sh ./src ./classes ./expected.txt ./build_log
+else
+  mkdir -p ./smali
+  # Generate the smali files and expected.txt or fail
+  ./util-src/generate_smali.py ./smali ./expected.txt
+  # Use the default build script
+  ./default-build "$@" "$EXTRA_ARGS" --experimental default-methods
+fi
+
+# Reset the ulimit back to its initial value
+restore_ulimit
diff --git a/test/971-iface-super/expected.txt b/test/971-iface-super/expected.txt
new file mode 100644
index 0000000..1ddd65d
--- /dev/null
+++ b/test/971-iface-super/expected.txt
@@ -0,0 +1 @@
+This file is generated by util-src/generate_smali.py do not directly modify!
diff --git a/test/971-iface-super/info.txt b/test/971-iface-super/info.txt
new file mode 100644
index 0000000..bc1c428
--- /dev/null
+++ b/test/971-iface-super/info.txt
@@ -0,0 +1,17 @@
+Smali-based tests for experimental interface default methods.
+
+This tests that interface method resolution order is correct in the presence of
+partial compilation/illegal invokes.
+
+Obviously needs to run under ART or a Java 8 Language runtime and compiler.
+
+When run smali test files are generated by the util-src/generate_smali.py
+script.  If we run with --jvm we will use the util-src/generate_java.py script
+will generate equivalent java code based on the smali code.
+
+Care should be taken when updating the generate_smali.py script. It should always
+return equivalent output when run multiple times and the expected output should
+be valid.
+
+Do not modify the expected.txt file. It is generated on each run by
+util-src/generate_smali.py.
diff --git a/test/971-iface-super/util-src/generate_java.py b/test/971-iface-super/util-src/generate_java.py
new file mode 100755
index 0000000..99b0479
--- /dev/null
+++ b/test/971-iface-super/util-src/generate_java.py
@@ -0,0 +1,138 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate java test files for test 966.
+"""
+
+import generate_smali as base
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+import testgen.mixins as mixins
+import functools
+import operator
+import subprocess
+
+class JavaConverter(mixins.DumpMixin, mixins.Named, mixins.JavaFileMixin):
+  """
+  A class that can convert a SmaliFile to a JavaFile.
+  """
+  def __init__(self, inner):
+    self.inner = inner
+
+  def get_name(self):
+    """Gets the name of this file."""
+    return self.inner.get_name()
+
+  def __str__(self):
+    out = ""
+    for line in str(self.inner).splitlines(keepends = True):
+      if line.startswith("#"):
+        out += line[1:]
+    return out
+
+class Compiler:
+  def __init__(self, sources, javac, temp_dir, classes_dir):
+    self.javac = javac
+    self.temp_dir = temp_dir
+    self.classes_dir = classes_dir
+    self.sources = sources
+
+  def compile_files(self, args, files):
+    """
+    Compile the files given with the arguments given.
+    """
+    args = args.split()
+    files = list(map(str, files))
+    cmd = ['sh', '-a', '-e', '--', str(self.javac)] + args + files
+    print("Running compile command: {}".format(cmd))
+    subprocess.check_call(cmd)
+    print("Compiled {} files".format(len(files)))
+
+  def execute(self):
+    """
+    Compiles this test, doing partial compilation as necessary.
+    """
+    # Compile Main and all classes first. Force all interfaces to be default so that there will be
+    # no compiler problems (works since classes only implement 1 interface).
+    for f in self.sources:
+      if isinstance(f, base.TestInterface):
+        JavaConverter(f.get_specific_version(base.InterfaceType.default)).dump(self.temp_dir)
+      else:
+        JavaConverter(f).dump(self.temp_dir)
+    self.compile_files("-d {}".format(self.classes_dir), self.temp_dir.glob("*.java"))
+
+    # Now we compile the interfaces
+    ifaces = set(i for i in self.sources if isinstance(i, base.TestInterface))
+    filters = (lambda a: a.is_default(), lambda a: not a.is_default())
+    converters = (lambda a: JavaConverter(a.get_specific_version(base.InterfaceType.default)),
+                  lambda a: JavaConverter(a.get_specific_version(base.InterfaceType.empty)))
+    while len(ifaces) != 0:
+      for iface_filter, iface_converter in zip(filters, converters):
+        # Find those ifaces where there are no (uncompiled) interfaces that are subtypes.
+        tops = set(filter(lambda a: iface_filter(a) and not any(map(lambda i: a in i.get_super_types(), ifaces)), ifaces))
+        files = []
+        # Dump these ones, they are getting compiled.
+        for f in tops:
+          out = JavaConverter(f)
+          out.dump(self.temp_dir)
+          files.append(self.temp_dir / out.get_file_name())
+        # Force all superinterfaces of these to be empty so there will be no conflicts
+        overrides = functools.reduce(operator.or_, map(lambda i: i.get_super_types(), tops), set())
+        for overridden in overrides:
+          out = iface_converter(overridden)
+          out.dump(self.temp_dir)
+          files.append(self.temp_dir / out.get_file_name())
+        self.compile_files("-d {outdir} -cp {outdir}".format(outdir = self.classes_dir), files)
+        # Remove these from the set of interfaces to be compiled.
+        ifaces -= tops
+    print("Finished compiling all files.")
+    return
+
+def main(argv):
+  javac_exec = Path(argv[1])
+  if not javac_exec.exists() or not javac_exec.is_file():
+    print("{} is not a shell script".format(javac_exec), file=sys.stderr)
+    sys.exit(1)
+  temp_dir = Path(argv[2])
+  if not temp_dir.exists() or not temp_dir.is_dir():
+    print("{} is not a valid source dir".format(temp_dir), file=sys.stderr)
+    sys.exit(1)
+  classes_dir = Path(argv[3])
+  if not classes_dir.exists() or not classes_dir.is_dir():
+    print("{} is not a valid classes directory".format(classes_dir), file=sys.stderr)
+    sys.exit(1)
+  expected_txt = Path(argv[4])
+  mainclass, all_files = base.create_all_test_files()
+
+  with expected_txt.open('w') as out:
+    print(mainclass.get_expected(), file=out)
+  print("Wrote expected output")
+
+  Compiler(all_files, javac_exec, temp_dir, classes_dir).execute()
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/971-iface-super/util-src/generate_smali.py b/test/971-iface-super/util-src/generate_smali.py
new file mode 100755
index 0000000..3681411
--- /dev/null
+++ b/test/971-iface-super/util-src/generate_smali.py
@@ -0,0 +1,689 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate Smali test files for test 967.
+"""
+
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import utils and mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+from testgen.utils import get_copyright, subtree_sizes, gensym, filter_blanks
+import testgen.mixins as mixins
+
+from enum import Enum
+from functools import total_ordering
+import itertools
+import string
+
+# The max depth the type tree can have.
+MAX_IFACE_DEPTH = 2
+
+class MainClass(mixins.DumpMixin, mixins.Named, mixins.SmaliFileMixin):
+  """
+  A Main.smali file containing the Main class and the main function. It will run
+  all the test functions we have.
+  """
+
+  MAIN_CLASS_TEMPLATE = """{copyright}
+
+.class public LMain;
+.super Ljava/lang/Object;
+
+# class Main {{
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+{test_funcs}
+
+{main_func}
+
+# }}
+"""
+
+  MAIN_FUNCTION_TEMPLATE = """
+#   public static void main(String[] args) {{
+.method public static main([Ljava/lang/String;)V
+    .locals 0
+
+    {test_group_invoke}
+
+    return-void
+.end method
+#   }}
+"""
+
+  TEST_GROUP_INVOKE_TEMPLATE = """
+#     {test_name}();
+    invoke-static {{}}, {test_name}()V
+"""
+
+  def __init__(self):
+    """
+    Initialize this MainClass. We start out with no tests.
+    """
+    self.tests = set()
+
+  def get_expected(self):
+    """
+    Get the expected output of this test.
+    """
+    all_tests = sorted(self.tests)
+    return filter_blanks("\n".join(a.get_expected() for a in all_tests))
+
+  def add_test(self, ty):
+    """
+    Add a test for the concrete type 'ty'
+    """
+    self.tests.add(Func(ty))
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return "Main"
+
+  def __str__(self):
+    """
+    Print the MainClass smali code.
+    """
+    all_tests = sorted(self.tests)
+    test_invoke = ""
+    test_funcs = ""
+    for t in all_tests:
+      test_funcs += str(t)
+    for t in all_tests:
+      test_invoke += self.TEST_GROUP_INVOKE_TEMPLATE.format(test_name=t.get_name())
+    main_func = self.MAIN_FUNCTION_TEMPLATE.format(test_group_invoke=test_invoke)
+
+    return self.MAIN_CLASS_TEMPLATE.format(copyright = get_copyright("smali"),
+                                           test_funcs = test_funcs,
+                                           main_func = main_func)
+
+class Func(mixins.Named, mixins.NameComparableMixin):
+  """
+  A function that tests the functionality of a concrete type. Should only be
+  constructed by MainClass.add_test.
+  """
+
+  TEST_FUNCTION_TEMPLATE = """
+#   public static void {fname}() {{
+#     {farg} v = null;
+#     try {{
+#       v = new {farg}();
+#     }} catch (Throwable e) {{
+#       System.out.println("Unexpected error occurred which creating {farg} instance");
+#       e.printStackTrace(System.out);
+#       return;
+#     }}
+#     try {{
+#       v.callSupers();
+#       return;
+#     }} catch (Throwable e) {{
+#       e.printStackTrace(System.out);
+#       return;
+#     }}
+#   }}
+.method public static {fname}()V
+    .locals 7
+    sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+    :new_{fname}_try_start
+      new-instance v0, L{farg};
+      invoke-direct {{v0}}, L{farg};-><init>()V
+      goto :call_{fname}_try_start
+    :new_{fname}_try_end
+    .catch Ljava/lang/Throwable; {{:new_{fname}_try_start .. :new_{fname}_try_end}} :new_error_{fname}_start
+    :new_error_{fname}_start
+      move-exception v6
+      const-string v5, "Unexpected error occurred which creating {farg} instance"
+      invoke-virtual {{v4,v5}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      invoke-virtual {{v6,v4}}, Ljava/lang/Throwable;->printStackTrace(Ljava/io/PrintStream;)V
+      return-void
+    :call_{fname}_try_start
+      invoke-virtual {{v0}}, L{farg};->callSupers()V
+      return-void
+    :call_{fname}_try_end
+    .catch Ljava/lang/Throwable; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :error_{fname}_start
+    :error_{fname}_start
+      move-exception v6
+      invoke-virtual {{v6,v4}}, Ljava/lang/Throwable;->printStackTrace(Ljava/io/PrintStream;)V
+      return-void
+.end method
+"""
+
+  def __init__(self, farg):
+    """
+    Initialize a test function for the given argument
+    """
+    self.farg = farg
+
+  def get_expected(self):
+    """
+    Get the expected output calling this function.
+    """
+    return "\n".join(self.farg.get_expected())
+
+  def get_name(self):
+    """
+    Get the name of this function
+    """
+    return "TEST_FUNC_{}".format(self.farg.get_name())
+
+  def __str__(self):
+    """
+    Print the smali code of this function.
+    """
+    return self.TEST_FUNCTION_TEMPLATE.format(fname = self.get_name(),
+                                              farg = self.farg.get_name())
+
+class InterfaceCallResponse(Enum):
+  """
+  An enumeration of all the different types of responses to an interface call we can have
+  """
+  NoError = 0
+  NoSuchMethodError = 1
+  AbstractMethodError = 2
+  IncompatibleClassChangeError = 3
+
+  def get_output_format(self):
+    if self == InterfaceCallResponse.NoError:
+      return "No exception thrown for {iface_name}.super.call() on {tree}\n"
+    elif self == InterfaceCallResponse.AbstractMethodError:
+      return "AbstractMethodError thrown for {iface_name}.super.call() on {tree}\n"
+    elif self == InterfaceCallResponse.NoSuchMethodError:
+      return "NoSuchMethodError thrown for {iface_name}.super.call() on {tree}\n"
+    else:
+      return "IncompatibleClassChangeError thrown for {iface_name}.super.call() on {tree}\n"
+
+class TestClass(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
+  """
+  A class that will be instantiated to test interface super behavior.
+  """
+
+  TEST_CLASS_TEMPLATE = """{copyright}
+
+.class public L{class_name};
+.super Ljava/lang/Object;
+{implements_spec}
+
+# public class {class_name} implements {ifaces} {{
+
+.method public constructor <init>()V
+  .registers 1
+  invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+  return-void
+.end method
+
+#   public void call() {{
+#     throw new Error("{class_name}.call(v) should never get called!");
+#   }}
+.method public call()V
+  .locals 2
+  new-instance v0, Ljava/lang/Error;
+  const-string v1, "{class_name}.call(v) should never get called!"
+  invoke-direct {{v0, v1}}, Ljava/lang/Error;-><init>(Ljava/lang/String;)V
+  throw v0
+.end method
+
+#   public void callSupers() {{
+.method public callSupers()V
+  .locals 4
+  sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+  {super_calls}
+
+  return-void
+.end method
+#   }}
+
+# }}
+"""
+  SUPER_CALL_TEMPLATE = """
+#     try {{
+#       System.out.println("Calling {iface_name}.super.call() on {tree}");
+#       {iface_name}.super.call();
+#       System.out.println("No exception thrown for {iface_name}.super.call() on {tree}");
+#     }} catch (AbstractMethodError ame) {{
+#       System.out.println("AbstractMethodError thrown for {iface_name}.super.call() on {tree}");
+#     }} catch (NoSuchMethodError nsme) {{
+#       System.out.println("NoSuchMethodError thrown for {iface_name}.super.call() on {tree}");
+#     }} catch (IncompatibleClassChangeError icce) {{
+#       System.out.println("IncompatibleClassChangeError thrown for {iface_name}.super.call() on {tree}");
+#     }} catch (Throwable t) {{
+#       System.out.println("Unknown error thrown for {iface_name}.super.call() on {tree}");
+#       throw t;
+#     }}
+    :call_{class_name}_{iface_name}_try_start
+      const-string v1, "Calling {iface_name}.super.call() on {tree}"
+      invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      invoke-super {{p0}}, L{iface_name};->call()V
+      const-string v1, "No exception thrown for {iface_name}.super.call() on {tree}"
+      invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      goto :call_{class_name}_{iface_name}_end
+    :call_{class_name}_{iface_name}_try_end
+    .catch Ljava/lang/AbstractMethodError; {{:call_{class_name}_{iface_name}_try_start .. :call_{class_name}_{iface_name}_try_end}} :AME_{class_name}_{iface_name}_start
+    .catch Ljava/lang/NoSuchMethodError; {{:call_{class_name}_{iface_name}_try_start .. :call_{class_name}_{iface_name}_try_end}} :NSME_{class_name}_{iface_name}_start
+    .catch Ljava/lang/IncompatibleClassChangeError; {{:call_{class_name}_{iface_name}_try_start .. :call_{class_name}_{iface_name}_try_end}} :ICCE_{class_name}_{iface_name}_start
+    .catch Ljava/lang/Throwable; {{:call_{class_name}_{iface_name}_try_start .. :call_{class_name}_{iface_name}_try_end}} :error_{class_name}_{iface_name}_start
+    :AME_{class_name}_{iface_name}_start
+      const-string v1, "AbstractMethodError thrown for {iface_name}.super.call() on {tree}"
+      invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      goto :call_{class_name}_{iface_name}_end
+    :NSME_{class_name}_{iface_name}_start
+      const-string v1, "NoSuchMethodError thrown for {iface_name}.super.call() on {tree}"
+      invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      goto :call_{class_name}_{iface_name}_end
+    :ICCE_{class_name}_{iface_name}_start
+      const-string v1, "IncompatibleClassChangeError thrown for {iface_name}.super.call() on {tree}"
+      invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      goto :call_{class_name}_{iface_name}_end
+    :error_{class_name}_{iface_name}_start
+      move-exception v2
+      const-string v1, "Unknown error thrown for {iface_name}.super.call() on {tree}"
+      invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      throw v2
+    :call_{class_name}_{iface_name}_end
+"""
+
+  IMPLEMENTS_TEMPLATE = """
+.implements L{iface_name};
+"""
+
+  OUTPUT_PREFIX = "Calling {iface_name}.super.call() on {tree}\n"
+
+  def __init__(self, ifaces):
+    """
+    Initialize this test class which implements the given interfaces
+    """
+    self.ifaces = ifaces
+    self.class_name = "CLASS_"+gensym()
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return self.class_name
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{class_name} {iface_tree}]".format(class_name = self.class_name,
+                                                iface_tree = print_tree(self.ifaces))
+
+  def __iter__(self):
+    """
+    Step through all interfaces implemented transitively by this class
+    """
+    for i in self.ifaces:
+      yield i
+      yield from i
+
+  def get_expected(self):
+    for iface in self.ifaces:
+      yield self.OUTPUT_PREFIX.format(iface_name = iface.get_name(), tree = self.get_tree())
+      yield from iface.get_expected()
+      yield iface.get_response().get_output_format().format(iface_name = iface.get_name(),
+                                                            tree = self.get_tree())
+
+  def __str__(self):
+    """
+    Print the smali code of this class.
+    """
+    s_ifaces = '\n'.join(map(lambda a: self.IMPLEMENTS_TEMPLATE.format(iface_name = a.get_name()),
+                             self.ifaces))
+    j_ifaces = ', '.join(map(lambda a: a.get_name(), self.ifaces))
+    super_template = self.SUPER_CALL_TEMPLATE
+    super_calls = "\n".join(super_template.format(iface_name = iface.get_name(),
+                                                  class_name = self.get_name(),
+                                                  tree = self.get_tree()) for iface in self.ifaces)
+    return self.TEST_CLASS_TEMPLATE.format(copyright = get_copyright('smali'),
+                                           ifaces = j_ifaces,
+                                           implements_spec = s_ifaces,
+                                           tree = self.get_tree(),
+                                           class_name = self.class_name,
+                                           super_calls = super_calls)
+
+class InterfaceType(Enum):
+  """
+  An enumeration of all the different types of interfaces we can have.
+
+  default: It has a default method
+  abstract: It has a method declared but not defined
+  empty: It does not have the method
+  """
+  default = 0
+  abstract = 1
+  empty = 2
+
+  def get_suffix(self):
+    if self == InterfaceType.default:
+      return "_DEFAULT"
+    elif self == InterfaceType.abstract:
+      return "_ABSTRACT"
+    elif self == InterfaceType.empty:
+      return "_EMPTY"
+    else:
+      raise TypeError("Interface type had illegal value.")
+
+class ConflictInterface:
+  """
+  A singleton representing a conflict of default methods.
+  """
+
+  def is_conflict(self):
+    """
+    Returns true if this is a conflict interface and calling the method on this interface will
+    result in an IncompatibleClassChangeError.
+    """
+    return True
+
+  def is_abstract(self):
+    """
+    Returns true if this is an abstract interface and calling the method on this interface will
+    result in an AbstractMethodError.
+    """
+    return False
+
+  def is_empty(self):
+    """
+    Returns true if this is an abstract interface and calling the method on this interface will
+    result in a NoSuchMethodError.
+    """
+    return False
+
+  def is_default(self):
+    """
+    Returns true if this is a default interface and calling the method on this interface will
+    result in a method actually being called.
+    """
+    return False
+
+  def get_response(self):
+    return InterfaceCallResponse.IncompatibleClassChangeError
+
+CONFLICT_TYPE = ConflictInterface()
+
+class TestInterface(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
+  """
+  An interface that will be used to test default method resolution order.
+  """
+
+  TEST_INTERFACE_TEMPLATE = """{copyright}
+.class public abstract interface L{class_name};
+.super Ljava/lang/Object;
+{implements_spec}
+
+# public interface {class_name} {extends} {ifaces} {{
+
+{func}
+
+# }}
+"""
+
+  SUPER_CALL_TEMPLATE = TestClass.SUPER_CALL_TEMPLATE
+  OUTPUT_PREFIX = TestClass.OUTPUT_PREFIX
+
+  DEFAULT_FUNC_TEMPLATE = """
+#   public default void call() {{
+.method public call()V
+  .locals 4
+  sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+  {super_calls}
+
+  return-void
+.end method
+#   }}
+"""
+
+  ABSTRACT_FUNC_TEMPLATE = """
+#   public void call();
+.method public abstract call()V
+.end method
+"""
+
+  EMPTY_FUNC_TEMPLATE = """"""
+
+  IMPLEMENTS_TEMPLATE = """
+.implements L{iface_name};
+"""
+
+  def __init__(self, ifaces, iface_type, full_name = None):
+    """
+    Initialize interface with the given super-interfaces
+    """
+    self.ifaces = sorted(ifaces)
+    self.iface_type = iface_type
+    if full_name is None:
+      end = self.iface_type.get_suffix()
+      self.class_name = "INTERFACE_"+gensym()+end
+    else:
+      self.class_name = full_name
+
+  def get_specific_version(self, v):
+    """
+    Returns a copy of this interface of the given type for use in partial compilation.
+    """
+    return TestInterface(self.ifaces, v, full_name = self.class_name)
+
+  def get_super_types(self):
+    """
+    Returns a set of all the supertypes of this interface
+    """
+    return set(i2 for i2 in self)
+
+  def is_conflict(self):
+    """
+    Returns true if this is a conflict interface and calling the method on this interface will
+    result in an IncompatibleClassChangeError.
+    """
+    return False
+
+  def is_abstract(self):
+    """
+    Returns true if this is an abstract interface and calling the method on this interface will
+    result in an AbstractMethodError.
+    """
+    return self.iface_type == InterfaceType.abstract
+
+  def is_empty(self):
+    """
+    Returns true if this is an abstract interface and calling the method on this interface will
+    result in a NoSuchMethodError.
+    """
+    return self.iface_type == InterfaceType.empty
+
+  def is_default(self):
+    """
+    Returns true if this is a default interface and calling the method on this interface will
+    result in a method actually being called.
+    """
+    return self.iface_type == InterfaceType.default
+
+  def get_expected(self):
+    response = self.get_response()
+    if response == InterfaceCallResponse.NoError:
+      for iface in self.ifaces:
+        if self.is_default():
+          yield self.OUTPUT_PREFIX.format(iface_name = iface.get_name(), tree = self.get_tree())
+        yield from iface.get_expected()
+        if self.is_default():
+          yield iface.get_response().get_output_format().format(iface_name = iface.get_name(),
+                                                                tree = self.get_tree())
+
+  def get_response(self):
+    if self.is_default():
+      return InterfaceCallResponse.NoError
+    elif self.is_abstract():
+      return InterfaceCallResponse.AbstractMethodError
+    elif len(self.ifaces) == 0:
+      return InterfaceCallResponse.NoSuchMethodError
+    else:
+      return self.get_called().get_response()
+
+  def get_called(self):
+    """
+    Returns the interface that will be called when the method on this class is invoked or
+    CONFLICT_TYPE if there is no interface that will be called.
+    """
+    if not self.is_empty() or len(self.ifaces) == 0:
+      return self
+    else:
+      best = self
+      for super_iface in self.ifaces:
+        super_best = super_iface.get_called()
+        if super_best.is_conflict():
+          return CONFLICT_TYPE
+        elif best.is_default():
+          if super_best.is_default():
+            return CONFLICT_TYPE
+        elif best.is_abstract():
+          if super_best.is_default():
+            best = super_best
+        else:
+          assert best.is_empty()
+          best = super_best
+      return best
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return self.class_name
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{class_name} {iftree}]".format(class_name = self.get_name(),
+                                            iftree = print_tree(self.ifaces))
+
+  def __iter__(self):
+    """
+    Performs depth-first traversal of the interface tree this interface is the
+    root of. Does not filter out repeats.
+    """
+    for i in self.ifaces:
+      yield i
+      yield from i
+
+  def __str__(self):
+    """
+    Print the smali code of this interface.
+    """
+    s_ifaces = '\n'.join(map(lambda a: self.IMPLEMENTS_TEMPLATE.format(iface_name = a.get_name()),
+                             self.ifaces))
+    j_ifaces = ', '.join(map(lambda a: a.get_name(), self.ifaces))
+    if self.is_default():
+      super_template = self.SUPER_CALL_TEMPLATE
+      super_calls ="\n".join(super_template.format(iface_name = iface.get_name(),
+                                                   class_name = self.get_name(),
+                                                   tree = self.get_tree()) for iface in self.ifaces)
+      funcs = self.DEFAULT_FUNC_TEMPLATE.format(super_calls = super_calls)
+    elif self.is_abstract():
+      funcs = self.ABSTRACT_FUNC_TEMPLATE.format()
+    else:
+      funcs = ""
+    return self.TEST_INTERFACE_TEMPLATE.format(copyright = get_copyright('smali'),
+                                               implements_spec = s_ifaces,
+                                               extends = "extends" if len(self.ifaces) else "",
+                                               ifaces = j_ifaces,
+                                               func = funcs,
+                                               tree = self.get_tree(),
+                                               class_name = self.class_name)
+
+def print_tree(ifaces):
+  """
+  Prints a list of iface trees
+  """
+  return " ".join(i.get_tree() for i in ifaces)
+
+# The deduplicated output of subtree_sizes for each size up to
+# MAX_LEAF_IFACE_PER_OBJECT.
+SUBTREES = [set(tuple(sorted(l)) for l in subtree_sizes(i))
+            for i in range(MAX_IFACE_DEPTH + 1)]
+
+def create_test_classes():
+  """
+  Yield all the test classes with the different interface trees
+  """
+  for num in range(1, MAX_IFACE_DEPTH + 1):
+    for split in SUBTREES[num]:
+      ifaces = []
+      for sub in split:
+        ifaces.append(list(create_interface_trees(sub)))
+      for supers in itertools.product(*ifaces):
+        yield TestClass(supers)
+
+def create_interface_trees(num):
+  """
+  Yield all the interface trees up to 'num' depth.
+  """
+  if num == 0:
+    for iftype in InterfaceType:
+      yield TestInterface(tuple(), iftype)
+    return
+  for split in SUBTREES[num]:
+    ifaces = []
+    for sub in split:
+      ifaces.append(list(create_interface_trees(sub)))
+    for supers in itertools.product(*ifaces):
+      for iftype in InterfaceType:
+        yield TestInterface(supers, iftype)
+
+def create_all_test_files():
+  """
+  Creates all the objects representing the files in this test. They just need to
+  be dumped.
+  """
+  mc = MainClass()
+  classes = {mc}
+  for clazz in create_test_classes():
+    classes.add(clazz)
+    for i in clazz:
+      classes.add(i)
+    mc.add_test(clazz)
+  return mc, classes
+
+def main(argv):
+  smali_dir = Path(argv[1])
+  if not smali_dir.exists() or not smali_dir.is_dir():
+    print("{} is not a valid smali dir".format(smali_dir), file=sys.stderr)
+    sys.exit(1)
+  expected_txt = Path(argv[2])
+  mainclass, all_files = create_all_test_files()
+  with expected_txt.open('w') as out:
+    print(mainclass.get_expected(), file=out)
+  for f in all_files:
+    f.dump(smali_dir)
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/469-condition-materialization-regression/expected.txt b/test/972-default-imt-collision/expected.txt
similarity index 100%
copy from test/469-condition-materialization-regression/expected.txt
copy to test/972-default-imt-collision/expected.txt
diff --git a/test/972-default-imt-collision/info.txt b/test/972-default-imt-collision/info.txt
new file mode 100644
index 0000000..adecee3
--- /dev/null
+++ b/test/972-default-imt-collision/info.txt
@@ -0,0 +1 @@
+Test for interaction of miranda and non-miranda methods on interface dispatch.
diff --git a/test/972-default-imt-collision/smali/Iface1.smali b/test/972-default-imt-collision/smali/Iface1.smali
new file mode 100644
index 0000000..088c29c
--- /dev/null
+++ b/test/972-default-imt-collision/smali/Iface1.smali
@@ -0,0 +1,32 @@
+# /*
+#  * Copyright (C) 2016 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface Iface1 {
+#   public default void sayHi() {
+#       System.out.println("FAILED: We should never invoke this method!");
+#   }
+# }
+
+.class public abstract interface LIface1;
+.super Ljava/lang/Object;
+
+.method public sayHi()V
+    .locals 2
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v0, "FAILED: We should never invoke this method!"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
diff --git a/test/972-default-imt-collision/smali/Iface2.smali b/test/972-default-imt-collision/smali/Iface2.smali
new file mode 100644
index 0000000..4320e7e
--- /dev/null
+++ b/test/972-default-imt-collision/smali/Iface2.smali
@@ -0,0 +1,277 @@
+# /*
+#  * Copyright (C) 2016 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface Iface2 {
+#    public void notImplementedMethod0();
+#    public void notImplementedMethod1();
+#    public void notImplementedMethod2();
+#    public void notImplementedMethod3();
+#    public void notImplementedMethod4();
+#    public void notImplementedMethod5();
+#    public void notImplementedMethod6();
+#    public void notImplementedMethod7();
+#    public void notImplementedMethod8();
+#    public void notImplementedMethod9();
+#    public void notImplementedMethod10();
+#    public void notImplementedMethod11();
+#    public void notImplementedMethod12();
+#    public void notImplementedMethod13();
+#    public void notImplementedMethod14();
+#    public void notImplementedMethod15();
+#    public void notImplementedMethod16();
+#    public void notImplementedMethod17();
+#    public void notImplementedMethod18();
+#    public void notImplementedMethod19();
+#    public void notImplementedMethod20();
+#    public void notImplementedMethod21();
+#    public void notImplementedMethod22();
+#    public void notImplementedMethod23();
+#    public void notImplementedMethod24();
+#    public void notImplementedMethod25();
+#    public void notImplementedMethod26();
+#    public void notImplementedMethod27();
+#    public void notImplementedMethod28();
+#    public void notImplementedMethod29();
+#    public void notImplementedMethod30();
+#    public void notImplementedMethod31();
+#    public void notImplementedMethod32();
+#    public void notImplementedMethod33();
+#    public void notImplementedMethod34();
+#    public void notImplementedMethod35();
+#    public void notImplementedMethod36();
+#    public void notImplementedMethod37();
+#    public void notImplementedMethod38();
+#    public void notImplementedMethod39();
+#    public void notImplementedMethod40();
+#    public void notImplementedMethod41();
+#    public void notImplementedMethod42();
+#    public void notImplementedMethod43();
+#    public void notImplementedMethod44();
+#    public void notImplementedMethod45();
+#    public void notImplementedMethod46();
+#    public void notImplementedMethod47();
+#    public void notImplementedMethod48();
+#    public void notImplementedMethod49();
+#    public void notImplementedMethod50();
+#    public void notImplementedMethod51();
+#    public void notImplementedMethod52();
+#    public void notImplementedMethod53();
+#    public void notImplementedMethod54();
+#    public void notImplementedMethod55();
+#    public void notImplementedMethod56();
+#    public void notImplementedMethod57();
+#    public void notImplementedMethod58();
+#    public void notImplementedMethod59();
+#    public void notImplementedMethod60();
+#    public void notImplementedMethod61();
+#    public void notImplementedMethod62();
+#    public void notImplementedMethod63();
+# }
+
+.class public abstract interface LIface2;
+.super Ljava/lang/Object;
+
+.method public abstract notImplementedMethod0()V
+.end method
+
+.method public abstract notImplementedMethod1()V
+.end method
+
+.method public abstract notImplementedMethod2()V
+.end method
+
+.method public abstract notImplementedMethod3()V
+.end method
+
+.method public abstract notImplementedMethod4()V
+.end method
+
+.method public abstract notImplementedMethod5()V
+.end method
+
+.method public abstract notImplementedMethod6()V
+.end method
+
+.method public abstract notImplementedMethod7()V
+.end method
+
+.method public abstract notImplementedMethod8()V
+.end method
+
+.method public abstract notImplementedMethod9()V
+.end method
+
+.method public abstract notImplementedMethod10()V
+.end method
+
+.method public abstract notImplementedMethod11()V
+.end method
+
+.method public abstract notImplementedMethod12()V
+.end method
+
+.method public abstract notImplementedMethod13()V
+.end method
+
+.method public abstract notImplementedMethod14()V
+.end method
+
+.method public abstract notImplementedMethod15()V
+.end method
+
+.method public abstract notImplementedMethod16()V
+.end method
+
+.method public abstract notImplementedMethod17()V
+.end method
+
+.method public abstract notImplementedMethod18()V
+.end method
+
+.method public abstract notImplementedMethod19()V
+.end method
+
+.method public abstract notImplementedMethod20()V
+.end method
+
+.method public abstract notImplementedMethod21()V
+.end method
+
+.method public abstract notImplementedMethod22()V
+.end method
+
+.method public abstract notImplementedMethod23()V
+.end method
+
+.method public abstract notImplementedMethod24()V
+.end method
+
+.method public abstract notImplementedMethod25()V
+.end method
+
+.method public abstract notImplementedMethod26()V
+.end method
+
+.method public abstract notImplementedMethod27()V
+.end method
+
+.method public abstract notImplementedMethod28()V
+.end method
+
+.method public abstract notImplementedMethod29()V
+.end method
+
+.method public abstract notImplementedMethod30()V
+.end method
+
+.method public abstract notImplementedMethod31()V
+.end method
+
+.method public abstract notImplementedMethod32()V
+.end method
+
+.method public abstract notImplementedMethod33()V
+.end method
+
+.method public abstract notImplementedMethod34()V
+.end method
+
+.method public abstract notImplementedMethod35()V
+.end method
+
+.method public abstract notImplementedMethod36()V
+.end method
+
+.method public abstract notImplementedMethod37()V
+.end method
+
+.method public abstract notImplementedMethod38()V
+.end method
+
+.method public abstract notImplementedMethod39()V
+.end method
+
+.method public abstract notImplementedMethod40()V
+.end method
+
+.method public abstract notImplementedMethod41()V
+.end method
+
+.method public abstract notImplementedMethod42()V
+.end method
+
+.method public abstract notImplementedMethod43()V
+.end method
+
+.method public abstract notImplementedMethod44()V
+.end method
+
+.method public abstract notImplementedMethod45()V
+.end method
+
+.method public abstract notImplementedMethod46()V
+.end method
+
+.method public abstract notImplementedMethod47()V
+.end method
+
+.method public abstract notImplementedMethod48()V
+.end method
+
+.method public abstract notImplementedMethod49()V
+.end method
+
+.method public abstract notImplementedMethod50()V
+.end method
+
+.method public abstract notImplementedMethod51()V
+.end method
+
+.method public abstract notImplementedMethod52()V
+.end method
+
+.method public abstract notImplementedMethod53()V
+.end method
+
+.method public abstract notImplementedMethod54()V
+.end method
+
+.method public abstract notImplementedMethod55()V
+.end method
+
+.method public abstract notImplementedMethod56()V
+.end method
+
+.method public abstract notImplementedMethod57()V
+.end method
+
+.method public abstract notImplementedMethod58()V
+.end method
+
+.method public abstract notImplementedMethod59()V
+.end method
+
+.method public abstract notImplementedMethod60()V
+.end method
+
+.method public abstract notImplementedMethod61()V
+.end method
+
+.method public abstract notImplementedMethod62()V
+.end method
+
+.method public abstract notImplementedMethod63()V
+.end method
diff --git a/test/972-default-imt-collision/smali/Klass.smali b/test/972-default-imt-collision/smali/Klass.smali
new file mode 100644
index 0000000..a98f9a7
--- /dev/null
+++ b/test/972-default-imt-collision/smali/Klass.smali
@@ -0,0 +1,993 @@
+# /*
+#  * Copyright (C) 2016 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *    http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+# public class Klass implements Iface1, Iface2 {
+#    public static void testMe(Iface2 me) {
+#      try {
+#        me.notImplementedMethod0();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod0!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod1();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod1!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod2();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod2!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod3();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod3!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod4();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod4!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod5();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod5!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod6();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod6!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod7();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod7!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod8();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod8!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod9();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod9!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod10();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod10!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod11();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod11!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod12();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod12!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod13();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod13!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod14();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod14!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod15();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod15!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod16();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod16!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod17();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod17!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod18();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod18!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod19();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod19!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod20();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod20!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod21();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod21!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod22();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod22!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod23();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod23!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod24();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod24!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod25();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod25!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod26();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod26!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod27();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod27!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod28();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod28!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod29();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod29!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod30();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod30!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod31();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod31!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod32();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod32!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod33();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod33!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod34();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod34!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod35();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod35!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod36();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod36!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod37();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod37!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod38();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod38!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod39();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod39!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod40();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod40!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod41();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod41!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod42();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod42!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod43();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod43!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod44();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod44!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod45();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod45!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod46();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod46!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod47();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod47!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod48();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod48!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod49();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod49!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod50();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod50!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod51();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod51!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod52();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod52!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod53();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod53!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod54();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod54!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod55();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod55!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod56();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod56!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod57();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod57!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod58();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod58!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod59();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod59!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod60();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod60!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod61();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod61!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod62();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod62!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }
+#      try {
+#        me.notImplementedMethod63();
+#        System.out.println("FAILED TO THROW AbstractMethodError when calling notImplementedMethod63!");
+#      } catch (AbstractMethodError e) { /* do nothing */ }                                                                }
+#    }
+# }
+
+.class public LKlass;
+.super Ljava/lang/Object;
+.implements LIface1;
+.implements LIface2;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static testMe(LIface2;)V
+.locals 5
+    :try_0_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod0()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod0!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_0_end
+    :try_0_end
+    .catch Ljava/lang/AbstractMethodError; {:try_0_start .. :try_0_end} :catch_0_end
+    :catch_0_end
+
+
+    :try_1_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod1()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod1!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_1_end
+    :try_1_end
+    .catch Ljava/lang/AbstractMethodError; {:try_1_start .. :try_1_end} :catch_1_end
+    :catch_1_end
+
+
+    :try_2_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod2()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod2!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_2_end
+    :try_2_end
+    .catch Ljava/lang/AbstractMethodError; {:try_2_start .. :try_2_end} :catch_2_end
+    :catch_2_end
+
+
+    :try_3_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod3()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod3!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_3_end
+    :try_3_end
+    .catch Ljava/lang/AbstractMethodError; {:try_3_start .. :try_3_end} :catch_3_end
+    :catch_3_end
+
+
+    :try_4_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod4()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod4!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_4_end
+    :try_4_end
+    .catch Ljava/lang/AbstractMethodError; {:try_4_start .. :try_4_end} :catch_4_end
+    :catch_4_end
+
+
+    :try_5_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod5()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod5!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_5_end
+    :try_5_end
+    .catch Ljava/lang/AbstractMethodError; {:try_5_start .. :try_5_end} :catch_5_end
+    :catch_5_end
+
+
+    :try_6_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod6()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod6!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_6_end
+    :try_6_end
+    .catch Ljava/lang/AbstractMethodError; {:try_6_start .. :try_6_end} :catch_6_end
+    :catch_6_end
+
+
+    :try_7_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod7()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod7!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_7_end
+    :try_7_end
+    .catch Ljava/lang/AbstractMethodError; {:try_7_start .. :try_7_end} :catch_7_end
+    :catch_7_end
+
+
+    :try_8_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod8()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod8!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_8_end
+    :try_8_end
+    .catch Ljava/lang/AbstractMethodError; {:try_8_start .. :try_8_end} :catch_8_end
+    :catch_8_end
+
+
+    :try_9_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod9()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod9!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_9_end
+    :try_9_end
+    .catch Ljava/lang/AbstractMethodError; {:try_9_start .. :try_9_end} :catch_9_end
+    :catch_9_end
+
+
+    :try_10_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod10()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod10!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_10_end
+    :try_10_end
+    .catch Ljava/lang/AbstractMethodError; {:try_10_start .. :try_10_end} :catch_10_end
+    :catch_10_end
+
+
+    :try_11_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod11()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod11!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_11_end
+    :try_11_end
+    .catch Ljava/lang/AbstractMethodError; {:try_11_start .. :try_11_end} :catch_11_end
+    :catch_11_end
+
+
+    :try_12_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod12()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod12!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_12_end
+    :try_12_end
+    .catch Ljava/lang/AbstractMethodError; {:try_12_start .. :try_12_end} :catch_12_end
+    :catch_12_end
+
+
+    :try_13_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod13()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod13!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_13_end
+    :try_13_end
+    .catch Ljava/lang/AbstractMethodError; {:try_13_start .. :try_13_end} :catch_13_end
+    :catch_13_end
+
+
+    :try_14_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod14()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod14!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_14_end
+    :try_14_end
+    .catch Ljava/lang/AbstractMethodError; {:try_14_start .. :try_14_end} :catch_14_end
+    :catch_14_end
+
+
+    :try_15_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod15()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod15!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_15_end
+    :try_15_end
+    .catch Ljava/lang/AbstractMethodError; {:try_15_start .. :try_15_end} :catch_15_end
+    :catch_15_end
+
+
+    :try_16_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod16()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod16!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_16_end
+    :try_16_end
+    .catch Ljava/lang/AbstractMethodError; {:try_16_start .. :try_16_end} :catch_16_end
+    :catch_16_end
+
+
+    :try_17_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod17()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod17!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_17_end
+    :try_17_end
+    .catch Ljava/lang/AbstractMethodError; {:try_17_start .. :try_17_end} :catch_17_end
+    :catch_17_end
+
+
+    :try_18_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod18()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod18!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_18_end
+    :try_18_end
+    .catch Ljava/lang/AbstractMethodError; {:try_18_start .. :try_18_end} :catch_18_end
+    :catch_18_end
+
+
+    :try_19_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod19()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod19!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_19_end
+    :try_19_end
+    .catch Ljava/lang/AbstractMethodError; {:try_19_start .. :try_19_end} :catch_19_end
+    :catch_19_end
+
+
+    :try_20_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod20()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod20!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_20_end
+    :try_20_end
+    .catch Ljava/lang/AbstractMethodError; {:try_20_start .. :try_20_end} :catch_20_end
+    :catch_20_end
+
+
+    :try_21_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod21()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod21!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_21_end
+    :try_21_end
+    .catch Ljava/lang/AbstractMethodError; {:try_21_start .. :try_21_end} :catch_21_end
+    :catch_21_end
+
+
+    :try_22_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod22()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod22!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_22_end
+    :try_22_end
+    .catch Ljava/lang/AbstractMethodError; {:try_22_start .. :try_22_end} :catch_22_end
+    :catch_22_end
+
+
+    :try_23_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod23()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod23!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_23_end
+    :try_23_end
+    .catch Ljava/lang/AbstractMethodError; {:try_23_start .. :try_23_end} :catch_23_end
+    :catch_23_end
+
+
+    :try_24_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod24()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod24!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_24_end
+    :try_24_end
+    .catch Ljava/lang/AbstractMethodError; {:try_24_start .. :try_24_end} :catch_24_end
+    :catch_24_end
+
+
+    :try_25_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod25()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod25!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_25_end
+    :try_25_end
+    .catch Ljava/lang/AbstractMethodError; {:try_25_start .. :try_25_end} :catch_25_end
+    :catch_25_end
+
+
+    :try_26_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod26()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod26!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_26_end
+    :try_26_end
+    .catch Ljava/lang/AbstractMethodError; {:try_26_start .. :try_26_end} :catch_26_end
+    :catch_26_end
+
+
+    :try_27_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod27()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod27!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_27_end
+    :try_27_end
+    .catch Ljava/lang/AbstractMethodError; {:try_27_start .. :try_27_end} :catch_27_end
+    :catch_27_end
+
+
+    :try_28_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod28()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod28!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_28_end
+    :try_28_end
+    .catch Ljava/lang/AbstractMethodError; {:try_28_start .. :try_28_end} :catch_28_end
+    :catch_28_end
+
+
+    :try_29_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod29()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod29!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_29_end
+    :try_29_end
+    .catch Ljava/lang/AbstractMethodError; {:try_29_start .. :try_29_end} :catch_29_end
+    :catch_29_end
+
+
+    :try_30_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod30()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod30!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_30_end
+    :try_30_end
+    .catch Ljava/lang/AbstractMethodError; {:try_30_start .. :try_30_end} :catch_30_end
+    :catch_30_end
+
+
+    :try_31_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod31()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod31!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_31_end
+    :try_31_end
+    .catch Ljava/lang/AbstractMethodError; {:try_31_start .. :try_31_end} :catch_31_end
+    :catch_31_end
+
+
+    :try_32_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod32()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod32!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_32_end
+    :try_32_end
+    .catch Ljava/lang/AbstractMethodError; {:try_32_start .. :try_32_end} :catch_32_end
+    :catch_32_end
+
+
+    :try_33_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod33()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod33!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_33_end
+    :try_33_end
+    .catch Ljava/lang/AbstractMethodError; {:try_33_start .. :try_33_end} :catch_33_end
+    :catch_33_end
+
+
+    :try_34_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod34()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod34!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_34_end
+    :try_34_end
+    .catch Ljava/lang/AbstractMethodError; {:try_34_start .. :try_34_end} :catch_34_end
+    :catch_34_end
+
+
+    :try_35_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod35()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod35!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_35_end
+    :try_35_end
+    .catch Ljava/lang/AbstractMethodError; {:try_35_start .. :try_35_end} :catch_35_end
+    :catch_35_end
+
+
+    :try_36_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod36()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod36!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_36_end
+    :try_36_end
+    .catch Ljava/lang/AbstractMethodError; {:try_36_start .. :try_36_end} :catch_36_end
+    :catch_36_end
+
+
+    :try_37_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod37()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod37!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_37_end
+    :try_37_end
+    .catch Ljava/lang/AbstractMethodError; {:try_37_start .. :try_37_end} :catch_37_end
+    :catch_37_end
+
+
+    :try_38_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod38()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod38!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_38_end
+    :try_38_end
+    .catch Ljava/lang/AbstractMethodError; {:try_38_start .. :try_38_end} :catch_38_end
+    :catch_38_end
+
+
+    :try_39_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod39()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod39!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_39_end
+    :try_39_end
+    .catch Ljava/lang/AbstractMethodError; {:try_39_start .. :try_39_end} :catch_39_end
+    :catch_39_end
+
+
+    :try_40_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod40()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod40!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_40_end
+    :try_40_end
+    .catch Ljava/lang/AbstractMethodError; {:try_40_start .. :try_40_end} :catch_40_end
+    :catch_40_end
+
+
+    :try_41_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod41()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod41!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_41_end
+    :try_41_end
+    .catch Ljava/lang/AbstractMethodError; {:try_41_start .. :try_41_end} :catch_41_end
+    :catch_41_end
+
+
+    :try_42_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod42()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod42!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_42_end
+    :try_42_end
+    .catch Ljava/lang/AbstractMethodError; {:try_42_start .. :try_42_end} :catch_42_end
+    :catch_42_end
+
+
+    :try_43_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod43()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod43!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_43_end
+    :try_43_end
+    .catch Ljava/lang/AbstractMethodError; {:try_43_start .. :try_43_end} :catch_43_end
+    :catch_43_end
+
+
+    :try_44_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod44()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod44!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_44_end
+    :try_44_end
+    .catch Ljava/lang/AbstractMethodError; {:try_44_start .. :try_44_end} :catch_44_end
+    :catch_44_end
+
+
+    :try_45_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod45()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod45!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_45_end
+    :try_45_end
+    .catch Ljava/lang/AbstractMethodError; {:try_45_start .. :try_45_end} :catch_45_end
+    :catch_45_end
+
+
+    :try_46_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod46()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod46!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_46_end
+    :try_46_end
+    .catch Ljava/lang/AbstractMethodError; {:try_46_start .. :try_46_end} :catch_46_end
+    :catch_46_end
+
+
+    :try_47_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod47()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod47!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_47_end
+    :try_47_end
+    .catch Ljava/lang/AbstractMethodError; {:try_47_start .. :try_47_end} :catch_47_end
+    :catch_47_end
+
+
+    :try_48_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod48()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod48!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_48_end
+    :try_48_end
+    .catch Ljava/lang/AbstractMethodError; {:try_48_start .. :try_48_end} :catch_48_end
+    :catch_48_end
+
+
+    :try_49_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod49()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod49!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_49_end
+    :try_49_end
+    .catch Ljava/lang/AbstractMethodError; {:try_49_start .. :try_49_end} :catch_49_end
+    :catch_49_end
+
+
+    :try_50_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod50()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod50!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_50_end
+    :try_50_end
+    .catch Ljava/lang/AbstractMethodError; {:try_50_start .. :try_50_end} :catch_50_end
+    :catch_50_end
+
+
+    :try_51_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod51()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod51!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_51_end
+    :try_51_end
+    .catch Ljava/lang/AbstractMethodError; {:try_51_start .. :try_51_end} :catch_51_end
+    :catch_51_end
+
+
+    :try_52_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod52()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod52!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_52_end
+    :try_52_end
+    .catch Ljava/lang/AbstractMethodError; {:try_52_start .. :try_52_end} :catch_52_end
+    :catch_52_end
+
+
+    :try_53_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod53()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod53!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_53_end
+    :try_53_end
+    .catch Ljava/lang/AbstractMethodError; {:try_53_start .. :try_53_end} :catch_53_end
+    :catch_53_end
+
+
+    :try_54_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod54()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod54!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_54_end
+    :try_54_end
+    .catch Ljava/lang/AbstractMethodError; {:try_54_start .. :try_54_end} :catch_54_end
+    :catch_54_end
+
+
+    :try_55_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod55()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod55!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_55_end
+    :try_55_end
+    .catch Ljava/lang/AbstractMethodError; {:try_55_start .. :try_55_end} :catch_55_end
+    :catch_55_end
+
+
+    :try_56_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod56()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod56!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_56_end
+    :try_56_end
+    .catch Ljava/lang/AbstractMethodError; {:try_56_start .. :try_56_end} :catch_56_end
+    :catch_56_end
+
+
+    :try_57_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod57()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod57!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_57_end
+    :try_57_end
+    .catch Ljava/lang/AbstractMethodError; {:try_57_start .. :try_57_end} :catch_57_end
+    :catch_57_end
+
+
+    :try_58_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod58()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod58!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_58_end
+    :try_58_end
+    .catch Ljava/lang/AbstractMethodError; {:try_58_start .. :try_58_end} :catch_58_end
+    :catch_58_end
+
+
+    :try_59_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod59()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod59!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_59_end
+    :try_59_end
+    .catch Ljava/lang/AbstractMethodError; {:try_59_start .. :try_59_end} :catch_59_end
+    :catch_59_end
+
+
+    :try_60_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod60()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod60!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_60_end
+    :try_60_end
+    .catch Ljava/lang/AbstractMethodError; {:try_60_start .. :try_60_end} :catch_60_end
+    :catch_60_end
+
+
+    :try_61_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod61()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod61!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_61_end
+    :try_61_end
+    .catch Ljava/lang/AbstractMethodError; {:try_61_start .. :try_61_end} :catch_61_end
+    :catch_61_end
+
+
+    :try_62_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod62()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod62!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_62_end
+    :try_62_end
+    .catch Ljava/lang/AbstractMethodError; {:try_62_start .. :try_62_end} :catch_62_end
+    :catch_62_end
+
+
+    :try_63_start
+        invoke-interface {p0}, LIface2;->notImplementedMethod63()V
+        sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        const-string v1, "FAILED TO THROW AbstractMethodError when calling notImplementedMethod63!"
+        invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :catch_63_end
+    :try_63_end
+    .catch Ljava/lang/AbstractMethodError; {:try_63_start .. :try_63_end} :catch_63_end
+    :catch_63_end
+    return-void
+.end method
diff --git a/test/972-default-imt-collision/src/Main.java b/test/972-default-imt-collision/src/Main.java
new file mode 100644
index 0000000..6819e43
--- /dev/null
+++ b/test/972-default-imt-collision/src/Main.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.lang.reflect.*;
+
+public class Main {
+  public static void main(String[] args) {
+    try {
+      Class<?> c = Class.forName("Klass");
+      Object o = c.newInstance();
+      Class<?> iface = Class.forName("Iface2");
+      Method test = c.getMethod("testMe", iface);
+      test.invoke(null, o);
+    } catch (Exception e) {
+      e.printStackTrace();
+      System.out.println("FAILED: could not run testMe!");
+    }
+  }
+}
diff --git a/test/972-iface-super-multidex/expected.txt b/test/972-iface-super-multidex/expected.txt
new file mode 100644
index 0000000..a9d31a5
--- /dev/null
+++ b/test/972-iface-super-multidex/expected.txt
@@ -0,0 +1,2 @@
+SuperInterface default method called
+Expected ICCE caught
diff --git a/test/972-iface-super-multidex/info.txt b/test/972-iface-super-multidex/info.txt
new file mode 100644
index 0000000..f7948ad
--- /dev/null
+++ b/test/972-iface-super-multidex/info.txt
@@ -0,0 +1,3 @@
+Smali-based tests for experimental interface default methods.
+
+Obviously needs to run under ART or a Java 8 Language runtime and compiler.
diff --git a/test/972-iface-super-multidex/smali-multidex/conflictinterface.smali b/test/972-iface-super-multidex/smali-multidex/conflictinterface.smali
new file mode 100644
index 0000000..2c76213
--- /dev/null
+++ b/test/972-iface-super-multidex/smali-multidex/conflictinterface.smali
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public abstract interface LConflictInterface;
+.super Ljava/lang/Object;
+.implements LOneConflict;
+.implements LTwoConflict;
+
+# public interface ConflictInterface extends OneConflict, TwoConflict {
+# }
diff --git a/test/972-iface-super-multidex/smali-multidex/oneconflict.smali b/test/972-iface-super-multidex/smali-multidex/oneconflict.smali
new file mode 100644
index 0000000..7001f02
--- /dev/null
+++ b/test/972-iface-super-multidex/smali-multidex/oneconflict.smali
@@ -0,0 +1,31 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public abstract interface LOneConflict;
+.super Ljava/lang/Object;
+
+# public interface OneConflict {
+#     public String runDefault() {
+#         return "OneConflict default method called";
+#     }
+# }
+
+.method public runDefault()Ljava/lang/String;
+.registers 2
+    # Do an invoke super on this class, to confuse runtime/compiler.
+    const-string v0, "OneConflict default method called"
+    return-object v0
+.end method
diff --git a/test/972-iface-super-multidex/smali-multidex/superinterface.smali b/test/972-iface-super-multidex/smali-multidex/superinterface.smali
new file mode 100644
index 0000000..d45ecea
--- /dev/null
+++ b/test/972-iface-super-multidex/smali-multidex/superinterface.smali
@@ -0,0 +1,31 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public abstract interface LSuperInterface;
+.super Ljava/lang/Object;
+
+# public interface SuperInterface {
+#     public String runDefault() {
+#         return "SuperInterface default method called";
+#     }
+# }
+
+.method public runDefault()Ljava/lang/String;
+.registers 2
+    # Do an invoke super on this class, to confuse runtime/compiler.
+    const-string v0, "SuperInterface default method called"
+    return-object v0
+.end method
diff --git a/test/972-iface-super-multidex/smali-multidex/twoconflict.smali b/test/972-iface-super-multidex/smali-multidex/twoconflict.smali
new file mode 100644
index 0000000..b971b74
--- /dev/null
+++ b/test/972-iface-super-multidex/smali-multidex/twoconflict.smali
@@ -0,0 +1,31 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public abstract interface LTwoConflict;
+.super Ljava/lang/Object;
+
+# public interface TwoConflict {
+#     public String runDefault() {
+#         return "TwoConflict default method called";
+#     }
+# }
+
+.method public runDefault()Ljava/lang/String;
+.registers 2
+    # Do an invoke super on this class, to confuse runtime/compiler.
+    const-string v0, "TwoConflict default method called"
+    return-object v0
+.end method
diff --git a/test/972-iface-super-multidex/smali/concreteclass.smali b/test/972-iface-super-multidex/smali/concreteclass.smali
new file mode 100644
index 0000000..703da94
--- /dev/null
+++ b/test/972-iface-super-multidex/smali/concreteclass.smali
@@ -0,0 +1,62 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LConcreteClass;
+.super Ljava/lang/Object;
+.implements LSuperInterface;
+.implements LConflictInterface;
+
+# public class ConcreteClass implements SuperInterface, ConflictInterface {
+#     public String runReal() {
+#         return SuperInterface.super.runDefault();
+#     }
+#     public String runConflict() {
+#         return ConflictInterface.super.runDefault();
+#     }
+#     public String runDefault() {
+#         return "This is the wrong class to invoke";
+#     }
+# }
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public runConflict()Ljava/lang/String;
+.registers 2
+    # Do an invoke super on this class, to confuse runtime/compiler.
+    invoke-super {p0}, LConflictInterface;->runDefault()Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
+
+
+
+.method public runReal()Ljava/lang/String;
+.registers 2
+    # Do an invoke super on this class, to confuse runtime/compiler.
+    invoke-super {p0}, LSuperInterface;->runDefault()Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
+
+.method public runDefault()Ljava/lang/String;
+.registers 2
+    const-string v0, "This is the wrong class to invoke!"
+    return-object v0
+.end method
diff --git a/test/972-iface-super-multidex/src/Main.java b/test/972-iface-super-multidex/src/Main.java
new file mode 100644
index 0000000..3fb3f45
--- /dev/null
+++ b/test/972-iface-super-multidex/src/Main.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.*;
+public class Main {
+  public static void main(String[] args) {
+    Class<?> c = null;
+    try {
+      c = Class.forName("ConcreteClass");
+    } catch (Exception e) {
+      System.out.println("Could not load class");
+      e.printStackTrace();
+      return;
+    }
+    try {
+      Method m = c.getMethod("runReal");
+      System.out.println((String)m.invoke(c.newInstance(), new Object[0]));
+    } catch (Exception e) {
+      System.out.println("Unknown exception occurred");
+      e.printStackTrace();
+    }
+    try {
+      Method m = c.getMethod("runConflict");
+      try {
+        System.out.println((String)m.invoke(c.newInstance(), new Object[0]));
+      } catch (InvocationTargetException e) {
+        throw e.getCause();
+      }
+    } catch (AbstractMethodError e) {
+      System.out.println("Unexpected AME caught");
+      e.printStackTrace();
+    } catch (NoSuchMethodError e) {
+      System.out.println("Unexpected NSME caught");
+      e.printStackTrace();
+    } catch (IncompatibleClassChangeError e) {
+      System.out.println("Expected ICCE caught");
+    } catch (Throwable e) {
+      System.out.println("Unknown exception caught!");
+      e.printStackTrace();
+    }
+  }
+}
diff --git a/test/973-default-multidex/expected.txt b/test/973-default-multidex/expected.txt
new file mode 100644
index 0000000..b376e81
--- /dev/null
+++ b/test/973-default-multidex/expected.txt
@@ -0,0 +1 @@
+STRING!!!STRING!!!
diff --git a/test/973-default-multidex/info.txt b/test/973-default-multidex/info.txt
new file mode 100644
index 0000000..17c0b7d
--- /dev/null
+++ b/test/973-default-multidex/info.txt
@@ -0,0 +1,5 @@
+Smali-based tests for interface default methods.
+
+Obviously needs to run under ART or a Java 8 Language runtime and compiler.
+
+Tests that we handle referenced throws across dex files.
diff --git a/test/973-default-multidex/smali-multidex/iface.smali b/test/973-default-multidex/smali-multidex/iface.smali
new file mode 100644
index 0000000..fa6d27f
--- /dev/null
+++ b/test/973-default-multidex/smali-multidex/iface.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public abstract interface LIface;
+.super Ljava/lang/Object;
+
+# public interface Iface {
+#     public default String getTwice() {
+#         return getString() + getString();
+#     }
+#     public String getString();
+# }
+
+.method public getTwice()Ljava/lang/String;
+.locals 2
+    invoke-static {p0}, Ljava/util/Objects;->requireNonNull(Ljava/lang/Object;)Ljava/lang/Object;
+    invoke-interface {p0}, LIface;->getString()Ljava/lang/String;
+    move-result-object v0
+    invoke-interface {p0}, LIface;->getString()Ljava/lang/String;
+    move-result-object v1
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
+
+.method public abstract getString()Ljava/lang/String;
+.end method
diff --git a/test/973-default-multidex/smali/concreteclass.smali b/test/973-default-multidex/smali/concreteclass.smali
new file mode 100644
index 0000000..e177f26
--- /dev/null
+++ b/test/973-default-multidex/smali/concreteclass.smali
@@ -0,0 +1,47 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LConcreteClass;
+.super Ljava/lang/Object;
+.implements LIface;
+
+# public class ConcreteClass implements Iface {
+#     public String getString() {
+#         return "STRING!!!";
+#     }
+#     public String callMethod() {
+#         return this.getTwice();
+#     }
+# }
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public getString()Ljava/lang/String;
+.registers 2
+    const-string v0, "STRING!!!"
+    return-object v0
+.end method
+
+.method public callMethod()Ljava/lang/String;
+.registers 2
+    invoke-virtual {p0}, LConcreteClass;->getTwice()Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
diff --git a/test/973-default-multidex/src/Main.java b/test/973-default-multidex/src/Main.java
new file mode 100644
index 0000000..b93265a
--- /dev/null
+++ b/test/973-default-multidex/src/Main.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.*;
+public class Main {
+  public static void main(String[] args) {
+    Class<?> c = null;
+    try {
+      c = Class.forName("ConcreteClass");
+      Method m = c.getMethod("callMethod");
+      System.out.println(m.invoke(c.newInstance(), new Object[0]));
+    } catch (Exception e) {
+      e.printStackTrace();
+      System.out.println("FAILED: Could not call method");
+      return;
+    }
+  }
+}
diff --git a/test/974-verify-interface-super/expected.txt b/test/974-verify-interface-super/expected.txt
new file mode 100644
index 0000000..7ba7491
--- /dev/null
+++ b/test/974-verify-interface-super/expected.txt
@@ -0,0 +1 @@
+OK. No exception before invoke!
diff --git a/test/974-verify-interface-super/info.txt b/test/974-verify-interface-super/info.txt
new file mode 100644
index 0000000..c5ff1f6
--- /dev/null
+++ b/test/974-verify-interface-super/info.txt
@@ -0,0 +1,3 @@
+Test that we do the right thing with invoke-super on interfaces when there are
+verifier errors.
+
diff --git a/test/974-verify-interface-super/smali/base.smali b/test/974-verify-interface-super/smali/base.smali
new file mode 100644
index 0000000..c7875de
--- /dev/null
+++ b/test/974-verify-interface-super/smali/base.smali
@@ -0,0 +1,31 @@
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LBase;
+
+.super La/klass/that/does/not/Exist;
+
+.method public static run()V
+    .locals 4
+    new-instance v0, LBase;
+    invoke-direct {v0}, LBase;-><init>()V
+    invoke-virtual {v0}, LBase;->SayHi()V
+    return-void
+.end method
+
+.method public SayHi()V
+.locals 2
+  invoke-super {p0}, LIface;->SayHi()V
+  return-void
+.end method
diff --git a/test/974-verify-interface-super/smali/iface.smali b/test/974-verify-interface-super/smali/iface.smali
new file mode 100644
index 0000000..89f9c0b
--- /dev/null
+++ b/test/974-verify-interface-super/smali/iface.smali
@@ -0,0 +1,22 @@
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public abstract interface LIface;
+
+.super Ljava/lang/Object;
+
+.method public SayHi()V
+.locals 0
+    return-void
+.end method
diff --git a/test/974-verify-interface-super/smali/main.smali b/test/974-verify-interface-super/smali/main.smali
new file mode 100644
index 0000000..be4016c
--- /dev/null
+++ b/test/974-verify-interface-super/smali/main.smali
@@ -0,0 +1,40 @@
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LMain;
+
+.super Ljava/lang/Object;
+
+.method public static main([Ljava/lang/String;)V
+    .locals 4
+    const-string v0, "OK. No exception before invoke!"
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    :try_start
+        invoke-static {}, LBase;->run()V
+        const-string v0, "FAIL: no exception!"
+        sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :end
+    :try_end
+    .catch Ljava/lang/LinkageError; {:try_start .. :try_end} :end
+    .catch Ljava/lang/Throwable; {:try_start .. :try_end} :error
+    :error
+        move-exception v0
+        sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+        invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        invoke-virtual {v0}, Ljava/lang/Throwable;->printStackTrace()V
+    :end
+    return-void
+.end method
diff --git a/test/975-iface-private/build b/test/975-iface-private/build
new file mode 100755
index 0000000..14230c2
--- /dev/null
+++ b/test/975-iface-private/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+./default-build "$@" --experimental default-methods
diff --git a/test/975-iface-private/expected.txt b/test/975-iface-private/expected.txt
new file mode 100644
index 0000000..908a8f2
--- /dev/null
+++ b/test/975-iface-private/expected.txt
@@ -0,0 +1,4 @@
+Saying hi from class
+HELLO!
+Saying hi from interface
+HELLO!
diff --git a/test/975-iface-private/info.txt b/test/975-iface-private/info.txt
new file mode 100644
index 0000000..d5a8d3f
--- /dev/null
+++ b/test/975-iface-private/info.txt
@@ -0,0 +1,5 @@
+Smali-based tests for experimental interface private methods.
+
+This test cannot be run with --jvm.
+
+This test checks that synthetic private methods in interfaces work correctly.
diff --git a/test/975-iface-private/smali/Iface.smali b/test/975-iface-private/smali/Iface.smali
new file mode 100644
index 0000000..a9a44d1
--- /dev/null
+++ b/test/975-iface-private/smali/Iface.smali
@@ -0,0 +1,45 @@
+
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface Iface {
+#   public default void sayHi() {
+#     System.out.println(getHiWords());
+#   }
+#
+#   // Synthetic method
+#   private String getHiWords() {
+#     return "HELLO!";
+#   }
+# }
+
+.class public abstract interface LIface;
+.super Ljava/lang/Object;
+
+.method public sayHi()V
+    .locals 2
+    sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-direct {p0}, LIface;->getHiWords()Ljava/lang/String;
+    move-result-object v1
+    invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
+
+.method private synthetic getHiWords()Ljava/lang/String;
+    .locals 1
+    const-string v0, "HELLO!"
+    return-object v0
+.end method
diff --git a/test/975-iface-private/smali/Main.smali b/test/975-iface-private/smali/Main.smali
new file mode 100644
index 0000000..dbde203
--- /dev/null
+++ b/test/975-iface-private/smali/Main.smali
@@ -0,0 +1,71 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# class Main implements Iface {
+#   public static void main(String[] args) {
+#     Main m = new Main();
+#     sayHiMain(m);
+#     sayHiIface(m);
+#   }
+#   public static void sayHiMain(Main m) {
+#     System.out.println("Saying hi from class");
+#     m.sayHi();
+#   }
+#   public static void sayHiIface(Iface m) {
+#     System.out.println("Saying hi from interface");
+#     m.sayHi();
+#   }
+# }
+.class public LMain;
+.super Ljava/lang/Object;
+.implements LIface;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static main([Ljava/lang/String;)V
+    .locals 2
+    new-instance v0, LMain;
+    invoke-direct {v0}, LMain;-><init>()V
+
+    invoke-static {v0}, LMain;->sayHiMain(LMain;)V
+    invoke-static {v0}, LMain;->sayHiIface(LIface;)V
+
+    return-void
+.end method
+
+.method public static sayHiMain(LMain;)V
+    .locals 2
+    sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v1, "Saying hi from class"
+    invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-virtual {p0}, LMain;->sayHi()V
+    return-void
+.end method
+
+.method public static sayHiIface(LIface;)V
+    .locals 2
+    sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v1, "Saying hi from interface"
+    invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-interface {p0}, LIface;->sayHi()V
+    return-void
+.end method
diff --git a/test/976-conflict-no-methods/expected.txt b/test/976-conflict-no-methods/expected.txt
new file mode 100644
index 0000000..656dfc5
--- /dev/null
+++ b/test/976-conflict-no-methods/expected.txt
@@ -0,0 +1 @@
+Pass
diff --git a/test/976-conflict-no-methods/info.txt b/test/976-conflict-no-methods/info.txt
new file mode 100644
index 0000000..cdc3149
--- /dev/null
+++ b/test/976-conflict-no-methods/info.txt
@@ -0,0 +1 @@
+Regression test for classes that have conflict tables but no methods. b/28707801
\ No newline at end of file
diff --git a/test/976-conflict-no-methods/smali/Iface.smali b/test/976-conflict-no-methods/smali/Iface.smali
new file mode 100644
index 0000000..aa4ec37
--- /dev/null
+++ b/test/976-conflict-no-methods/smali/Iface.smali
@@ -0,0 +1,281 @@
+# /*
+#  * Copyright (C) 2016 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface Iface2 {
+#    public void abstractMethod0();
+#    public void abstractMethod1();
+#    public void abstractMethod2();
+#    public void abstractMethod3();
+#    public void abstractMethod4();
+#    public void abstractMethod5();
+#    public void abstractMethod6();
+#    public void abstractMethod7();
+#    public void abstractMethod8();
+#    public void abstractMethod9();
+#    public void abstractMethod10();
+#    public void abstractMethod11();
+#    public void abstractMethod12();
+#    public void abstractMethod13();
+#    public void abstractMethod14();
+#    public void abstractMethod15();
+#    public void abstractMethod16();
+#    public void abstractMethod17();
+#    public void abstractMethod18();
+#    public void abstractMethod19();
+#    public void abstractMethod20();
+#    public void abstractMethod21();
+#    public void abstractMethod22();
+#    public void abstractMethod23();
+#    public void abstractMethod24();
+#    public void abstractMethod25();
+#    public void abstractMethod26();
+#    public void abstractMethod27();
+#    public void abstractMethod28();
+#    public void abstractMethod29();
+#    public void abstractMethod30();
+#    public void abstractMethod31();
+#    public void abstractMethod32();
+#    public void abstractMethod33();
+#    public void abstractMethod34();
+#    public void abstractMethod35();
+#    public void abstractMethod36();
+#    public void abstractMethod37();
+#    public void abstractMethod38();
+#    public void abstractMethod39();
+#    public void abstractMethod40();
+#    public void abstractMethod41();
+#    public void abstractMethod42();
+#    public void abstractMethod43();
+#    public void abstractMethod44();
+#    public void abstractMethod45();
+#    public void abstractMethod46();
+#    public void abstractMethod47();
+#    public void abstractMethod48();
+#    public void abstractMethod49();
+#    public void abstractMethod50();
+#    public void abstractMethod51();
+#    public void abstractMethod52();
+#    public void abstractMethod53();
+#    public void abstractMethod54();
+#    public void abstractMethod55();
+#    public void abstractMethod56();
+#    public void abstractMethod57();
+#    public void abstractMethod58();
+#    public void abstractMethod59();
+#    public void abstractMethod60();
+#    public void abstractMethod61();
+#    public void abstractMethod62();
+#    public void abstractMethod63();
+#    public void abstractMethod64();
+# }
+
+.class public abstract interface LIface;
+.super Ljava/lang/Object;
+
+.method public abstract abstractMethod0()V
+.end method
+
+.method public abstract abstractMethod1()V
+.end method
+
+.method public abstract abstractMethod2()V
+.end method
+
+.method public abstract abstractMethod3()V
+.end method
+
+.method public abstract abstractMethod4()V
+.end method
+
+.method public abstract abstractMethod5()V
+.end method
+
+.method public abstract abstractMethod6()V
+.end method
+
+.method public abstract abstractMethod7()V
+.end method
+
+.method public abstract abstractMethod8()V
+.end method
+
+.method public abstract abstractMethod9()V
+.end method
+
+.method public abstract abstractMethod10()V
+.end method
+
+.method public abstract abstractMethod11()V
+.end method
+
+.method public abstract abstractMethod12()V
+.end method
+
+.method public abstract abstractMethod13()V
+.end method
+
+.method public abstract abstractMethod14()V
+.end method
+
+.method public abstract abstractMethod15()V
+.end method
+
+.method public abstract abstractMethod16()V
+.end method
+
+.method public abstract abstractMethod17()V
+.end method
+
+.method public abstract abstractMethod18()V
+.end method
+
+.method public abstract abstractMethod19()V
+.end method
+
+.method public abstract abstractMethod20()V
+.end method
+
+.method public abstract abstractMethod21()V
+.end method
+
+.method public abstract abstractMethod22()V
+.end method
+
+.method public abstract abstractMethod23()V
+.end method
+
+.method public abstract abstractMethod24()V
+.end method
+
+.method public abstract abstractMethod25()V
+.end method
+
+.method public abstract abstractMethod26()V
+.end method
+
+.method public abstract abstractMethod27()V
+.end method
+
+.method public abstract abstractMethod28()V
+.end method
+
+.method public abstract abstractMethod29()V
+.end method
+
+.method public abstract abstractMethod30()V
+.end method
+
+.method public abstract abstractMethod31()V
+.end method
+
+.method public abstract abstractMethod32()V
+.end method
+
+.method public abstract abstractMethod33()V
+.end method
+
+.method public abstract abstractMethod34()V
+.end method
+
+.method public abstract abstractMethod35()V
+.end method
+
+.method public abstract abstractMethod36()V
+.end method
+
+.method public abstract abstractMethod37()V
+.end method
+
+.method public abstract abstractMethod38()V
+.end method
+
+.method public abstract abstractMethod39()V
+.end method
+
+.method public abstract abstractMethod40()V
+.end method
+
+.method public abstract abstractMethod41()V
+.end method
+
+.method public abstract abstractMethod42()V
+.end method
+
+.method public abstract abstractMethod43()V
+.end method
+
+.method public abstract abstractMethod44()V
+.end method
+
+.method public abstract abstractMethod45()V
+.end method
+
+.method public abstract abstractMethod46()V
+.end method
+
+.method public abstract abstractMethod47()V
+.end method
+
+.method public abstract abstractMethod48()V
+.end method
+
+.method public abstract abstractMethod49()V
+.end method
+
+.method public abstract abstractMethod50()V
+.end method
+
+.method public abstract abstractMethod51()V
+.end method
+
+.method public abstract abstractMethod52()V
+.end method
+
+.method public abstract abstractMethod53()V
+.end method
+
+.method public abstract abstractMethod54()V
+.end method
+
+.method public abstract abstractMethod55()V
+.end method
+
+.method public abstract abstractMethod56()V
+.end method
+
+.method public abstract abstractMethod57()V
+.end method
+
+.method public abstract abstractMethod58()V
+.end method
+
+.method public abstract abstractMethod59()V
+.end method
+
+.method public abstract abstractMethod60()V
+.end method
+
+.method public abstract abstractMethod61()V
+.end method
+
+.method public abstract abstractMethod62()V
+.end method
+
+.method public abstract abstractMethod63()V
+.end method
+
+.method public abstract abstractMethod64()V
+.end method
diff --git a/test/976-conflict-no-methods/smali/Main.smali b/test/976-conflict-no-methods/smali/Main.smali
new file mode 100644
index 0000000..7dd1160
--- /dev/null
+++ b/test/976-conflict-no-methods/smali/Main.smali
@@ -0,0 +1,358 @@
+# /*
+#  * Copyright (C) 2016 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+.class public LMain;
+.super Ljava/lang/Object;
+.implements LIface;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static main([Ljava/lang/String;)V
+    .locals 2
+    sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v1, "Pass"
+    invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
+
+.method public abstractMethod0()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod1()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod2()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod3()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod4()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod5()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod6()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod7()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod8()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod9()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod10()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod11()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod12()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod13()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod14()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod15()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod16()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod17()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod18()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod19()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod20()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod21()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod22()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod23()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod24()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod25()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod26()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod27()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod28()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod29()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod30()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod31()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod32()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod33()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod34()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod35()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod36()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod37()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod38()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod39()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod40()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod41()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod42()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod43()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod44()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod45()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod46()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod47()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod48()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod49()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod50()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod51()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod52()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod53()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod54()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod55()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod56()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod57()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod58()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod59()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod60()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod61()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod62()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod63()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod64()V
+    .locals 0
+    return-void
+.end method
diff --git a/test/976-conflict-no-methods/smali/NoMethods.smali b/test/976-conflict-no-methods/smali/NoMethods.smali
new file mode 100644
index 0000000..787e34a
--- /dev/null
+++ b/test/976-conflict-no-methods/smali/NoMethods.smali
@@ -0,0 +1,19 @@
+# /*
+#  * Copyright (C) 2016 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+
+.class public LNoMethods;
+.super LMain;
diff --git a/test/Android.libartagent.mk b/test/Android.libartagent.mk
new file mode 100644
index 0000000..729de3f
--- /dev/null
+++ b/test/Android.libartagent.mk
@@ -0,0 +1,101 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+LOCAL_PATH := $(call my-dir)
+
+include art/build/Android.common_build.mk
+
+LIBARTAGENT_COMMON_SRC_FILES := \
+    900-hello-plugin/load_unload.cc
+
+# $(1): target or host
+# $(2): debug or <empty>
+define build-libartagent
+  ifneq ($(1),target)
+    ifneq ($(1),host)
+      $$(error expected target or host for argument 1, received $(1))
+    endif
+  endif
+  ifneq ($(2),debug)
+    ifneq ($(2),)
+      $$(error d or empty for argument 2, received $(2))
+    endif
+    suffix := d
+  else
+    suffix :=
+  endif
+
+  art_target_or_host := $(1)
+
+  include $(CLEAR_VARS)
+  LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
+  LOCAL_MODULE := libartagent$$(suffix)
+  ifeq ($$(art_target_or_host),target)
+    LOCAL_MODULE_TAGS := tests
+  endif
+  LOCAL_SRC_FILES := $(LIBARTAGENT_COMMON_SRC_FILES)
+  LOCAL_SHARED_LIBRARIES += libart$$(suffix) libbacktrace libnativehelper
+  LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
+  LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
+  LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.libartagent.mk
+  ifeq ($$(art_target_or_host),target)
+    $(call set-target-local-clang-vars)
+    ifeq ($$(suffix),d)
+      $(call set-target-local-cflags-vars,debug)
+    else
+      $(call set-target-local-cflags-vars,ndebug)
+    endif
+    LOCAL_SHARED_LIBRARIES += libdl
+    LOCAL_MULTILIB := both
+    LOCAL_MODULE_PATH_32 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_32)
+    LOCAL_MODULE_PATH_64 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_64)
+    LOCAL_MODULE_TARGET_ARCH := $(ART_SUPPORTED_ARCH)
+    include $(BUILD_SHARED_LIBRARY)
+  else # host
+    LOCAL_CLANG := $(ART_HOST_CLANG)
+    LOCAL_CFLAGS := $(ART_HOST_CFLAGS)
+    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
+    ifeq ($$(suffix),d)
+      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
+    else
+      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
+    endif
+    LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
+    LOCAL_IS_HOST_MODULE := true
+    LOCAL_MULTILIB := both
+    include $(BUILD_HOST_SHARED_LIBRARY)
+  endif
+
+  # Clear locally used variables.
+  art_target_or_host :=
+  suffix :=
+endef
+
+ifeq ($(ART_BUILD_TARGET),true)
+  $(eval $(call build-libartagent,target,))
+  $(eval $(call build-libartagent,target,debug))
+endif
+ifeq ($(ART_BUILD_HOST),true)
+  $(eval $(call build-libartagent,host,))
+  $(eval $(call build-libartagent,host,debug))
+endif
+
+# Clear locally used variables.
+LOCAL_PATH :=
+LIBARTAGENT_COMMON_SRC_FILES :=
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index bffd0e0..ec5b7d2 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -25,20 +25,29 @@
   004-SignalTest/signaltest.cc \
   004-ReferenceMap/stack_walk_refmap_jni.cc \
   004-StackWalk/stack_walk_jni.cc \
+  004-ThreadStress/thread_stress.cc \
   004-UnsafeTest/unsafe_test.cc \
   044-proxy/native_proxy.cc \
   051-thread/thread_test.cc \
   117-nopatchoat/nopatchoat.cc \
   1337-gc-coverage/gc_coverage.cc \
+  136-daemon-jni-shutdown/daemon_jni_shutdown.cc \
   137-cfi/cfi.cc \
   139-register-natives/regnative.cc \
   141-class-unload/jni_unload.cc \
+  148-multithread-gc-annotations/gc_coverage.cc \
+  149-suspend-all-stress/suspend_all.cc \
   454-get-vreg/get_vreg_jni.cc \
-  455-set-vreg/set_vreg_jni.cc \
   457-regs/regs_jni.cc \
   461-get-reference-vreg/get_reference_vreg_jni.cc \
   466-get-live-vreg/get_live_vreg_jni.cc \
-  497-inlining-and-class-loader/clear_dex_cache.cc
+  497-inlining-and-class-loader/clear_dex_cache.cc \
+  543-env-long-ref/env_long_ref.cc \
+  566-polymorphic-inlining/polymorphic_inline.cc \
+  570-checker-osr/osr.cc \
+  595-profile-saving/profile-saving.cc \
+  596-app-images/app_images.cc \
+  597-deopt-new-string/deopt.cc
 
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so
@@ -77,8 +86,12 @@
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.libarttest.mk
   ifeq ($$(art_target_or_host),target)
-    $(call set-target-local-clang-vars)
-    $(call set-target-local-cflags-vars,debug)
+    LOCAL_CLANG := $(ART_TARGET_CLANG)
+    ifeq ($$(suffix),d)
+      $(call set-target-local-cflags-vars,debug)
+    else
+      $(call set-target-local-cflags-vars,ndebug)
+    endif
     LOCAL_SHARED_LIBRARIES += libdl
     LOCAL_MULTILIB := both
     LOCAL_MODULE_PATH_32 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_32)
@@ -87,9 +100,16 @@
     include $(BUILD_SHARED_LIBRARY)
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
-    LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS)
+    LOCAL_CFLAGS := $(ART_HOST_CFLAGS)
     LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
-    LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
+    ifeq ($$(suffix),d)
+      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
+    else
+      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
+    endif
+    LOCAL_LDLIBS := -ldl -lpthread
     LOCAL_IS_HOST_MODULE := true
     LOCAL_MULTILIB := both
     include $(BUILD_HOST_SHARED_LIBRARY)
diff --git a/test/Android.libnativebridgetest.mk b/test/Android.libnativebridgetest.mk
index e8cc7e4..5c97e4d 100644
--- a/test/Android.libnativebridgetest.mk
+++ b/test/Android.libnativebridgetest.mk
@@ -48,7 +48,7 @@
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.libnativebridgetest.mk
   ifeq ($$(art_target_or_host),target)
-    $(call set-target-local-clang-vars)
+    LOCAL_CLANG := $(ART_TARGET_CLANG)
     $(call set-target-local-cflags-vars,debug)
     LOCAL_SHARED_LIBRARIES += libdl
     LOCAL_STATIC_LIBRARIES := libgtest
@@ -60,9 +60,9 @@
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS)
-    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
+    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS) $(ART_HOST_DEBUG_ASFLAGS)
     LOCAL_SHARED_LIBRARIES := libcutils
-    LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
+    LOCAL_LDLIBS := -ldl -lpthread
     ifeq ($(HOST_OS),linux)
       LOCAL_LDLIBS += -lrt
     endif
diff --git a/test/Android.libtiagent.mk b/test/Android.libtiagent.mk
new file mode 100644
index 0000000..626dc3b
--- /dev/null
+++ b/test/Android.libtiagent.mk
@@ -0,0 +1,102 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+LOCAL_PATH := $(call my-dir)
+
+include art/build/Android.common_build.mk
+
+LIBARTAGENT_COMMON_SRC_FILES := \
+    ti-agent/common_load.cc \
+    901-hello-ti-agent/basics.cc
+
+# $(1): target or host
+# $(2): debug or <empty>
+define build-libtiagent
+  ifneq ($(1),target)
+    ifneq ($(1),host)
+      $$(error expected target or host for argument 1, received $(1))
+    endif
+  endif
+  ifneq ($(2),debug)
+    ifneq ($(2),)
+      $$(error d or empty for argument 2, received $(2))
+    endif
+    suffix := d
+  else
+    suffix :=
+  endif
+
+  art_target_or_host := $(1)
+
+  include $(CLEAR_VARS)
+  LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
+  LOCAL_MODULE := libtiagent$$(suffix)
+  ifeq ($$(art_target_or_host),target)
+    LOCAL_MODULE_TAGS := tests
+  endif
+  LOCAL_SRC_FILES := $(LIBARTAGENT_COMMON_SRC_FILES)
+  LOCAL_SHARED_LIBRARIES += libart$$(suffix) libbacktrace libnativehelper libopenjdkjvmti$$(suffix)
+  LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime art/test
+  LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
+  LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.libtiagent.mk
+  ifeq ($$(art_target_or_host),target)
+    $(call set-target-local-clang-vars)
+    ifeq ($$(suffix),d)
+      $(call set-target-local-cflags-vars,debug)
+    else
+      $(call set-target-local-cflags-vars,ndebug)
+    endif
+    LOCAL_SHARED_LIBRARIES += libdl
+    LOCAL_MULTILIB := both
+    LOCAL_MODULE_PATH_32 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_32)
+    LOCAL_MODULE_PATH_64 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_64)
+    LOCAL_MODULE_TARGET_ARCH := $(ART_SUPPORTED_ARCH)
+    include $(BUILD_SHARED_LIBRARY)
+  else # host
+    LOCAL_CLANG := $(ART_HOST_CLANG)
+    LOCAL_CFLAGS := $(ART_HOST_CFLAGS)
+    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
+    ifeq ($$(suffix),d)
+      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
+    else
+      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
+    endif
+    LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
+    LOCAL_IS_HOST_MODULE := true
+    LOCAL_MULTILIB := both
+    include $(BUILD_HOST_SHARED_LIBRARY)
+  endif
+
+  # Clear locally used variables.
+  art_target_or_host :=
+  suffix :=
+endef
+
+ifeq ($(ART_BUILD_TARGET),true)
+  $(eval $(call build-libtiagent,target,))
+  $(eval $(call build-libtiagent,target,debug))
+endif
+ifeq ($(ART_BUILD_HOST),true)
+  $(eval $(call build-libtiagent,host,))
+  $(eval $(call build-libtiagent,host,debug))
+endif
+
+# Clear locally used variables.
+LOCAL_PATH :=
+LIBARTAGENT_COMMON_SRC_FILES :=
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 5bbbbc1..75c4f34 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -26,7 +26,8 @@
 
 # The path where build only targets will be output, e.g.
 # out/target/product/generic_x86_64/obj/PACKAGING/art-run-tests_intermediates/DATA
-art_run_tests_dir := $(call intermediates-dir-for,PACKAGING,art-run-tests)/DATA
+art_run_tests_build_dir := $(call intermediates-dir-for,JAVA_LIBRARIES,art-run-tests)/DATA
+art_run_tests_install_dir := $(call intermediates-dir-for,PACKAGING,art-run-tests)/DATA
 
 # A generated list of prerequisites that call 'run-test --build-only', the actual prerequisite is
 # an empty file touched in the intermediate directory.
@@ -37,13 +38,10 @@
   $(DX) \
   $(HOST_OUT_EXECUTABLES)/jasmin \
   $(HOST_OUT_EXECUTABLES)/smali \
-  $(HOST_OUT_EXECUTABLES)/dexmerger
+  $(HOST_OUT_EXECUTABLES)/dexmerger \
+  $(JACK)
 
-ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
-  TEST_ART_RUN_TEST_DEPENDENCIES += \
-    $(JACK) \
-    $(JILL_JAR)
-endif
+TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES := setup-jack-server
 
 ifeq ($(ART_TEST_DEBUG_GC),true)
   ART_TEST_WITH_STRACE := true
@@ -52,27 +50,32 @@
 # Helper to create individual build targets for tests. Must be called with $(eval).
 # $(1): the test number
 define define-build-art-run-test
-  dmart_target := $(art_run_tests_dir)/art-run-tests/$(1)/touch
+  dmart_target := $(art_run_tests_build_dir)/art-run-tests/$(1)/touch
+  dmart_install_target := $(art_run_tests_install_dir)/art-run-tests/$(1)/touch
   run_test_options = --build-only
-  ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
-    run_test_options += --build-with-jack
-  else
-    run_test_options += --build-with-javac-dx
+  ifeq ($(ART_TEST_QUIET),true)
+    run_test_options += --quiet
   endif
 $$(dmart_target): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options)
-$$(dmart_target): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TARGET_JACK_CLASSPATH_DEPENDENCIES)
+$$(dmart_target): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TARGET_JACK_CLASSPATH_DEPENDENCIES) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
 	$(hide) rm -rf $$(dir $$@) && mkdir -p $$(dir $$@)
 	$(hide) DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
 	  SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
 	  DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
+	  JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	  JACK=$(abspath $(JACK)) \
+	  JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	  JACK_CLASSPATH=$(TARGET_JACK_CLASSPATH) \
-	  JILL_JAR=$(abspath $(JILL_JAR)) \
 	  $(LOCAL_PATH)/run-test $$(PRIVATE_RUN_TEST_OPTIONS) --output-path $$(abspath $$(dir $$@)) $(1)
 	$(hide) touch $$@
 
-  TEST_ART_RUN_TEST_BUILD_RULES += $$(dmart_target)
+$$(dmart_install_target): $$(dmart_target)
+	$(hide) rm -rf $$(dir $$@) && mkdir -p $$(dir $$@)
+	$(hide) cp $$(dir $$<)/* $$(dir $$@)/
+
+  TEST_ART_RUN_TEST_BUILD_RULES += $$(dmart_install_target)
   dmart_target :=
+  dmart_install_target :=
   run_test_options :=
 endef
 $(foreach test, $(TEST_ART_RUN_TESTS), $(eval $(call define-build-art-run-test,$(test))))
@@ -82,12 +85,13 @@
 LOCAL_MODULE := art-run-tests
 LOCAL_ADDITIONAL_DEPENDENCIES := $(TEST_ART_RUN_TEST_BUILD_RULES)
 # The build system use this flag to pick up files generated by declare-make-art-run-test.
-LOCAL_PICKUP_FILES := $(art_run_tests_dir)
+LOCAL_PICKUP_FILES := $(art_run_tests_install_dir)
 
 include $(BUILD_PHONY_PACKAGE)
 
 # Clear temp vars.
-art_run_tests_dir :=
+art_run_tests_build_dir :=
+art_run_tests_install_dir :=
 define-build-art-run-test :=
 TEST_ART_RUN_TEST_BUILD_RULES :=
 
@@ -106,9 +110,6 @@
   PREBUILD_TYPES += no-dex2oat
 endif
 COMPILER_TYPES :=
-ifeq ($(ART_TEST_DEFAULT_COMPILER),true)
-  COMPILER_TYPES += default
-endif
 ifeq ($(ART_TEST_INTERPRETER_ACCESS_CHECKS),true)
   COMPILER_TYPES += interp-ac
 endif
@@ -118,8 +119,14 @@
 ifeq ($(ART_TEST_JIT),true)
   COMPILER_TYPES += jit
 endif
+OPTIMIZING_COMPILER_TYPES :=
 ifeq ($(ART_TEST_OPTIMIZING),true)
   COMPILER_TYPES += optimizing
+  OPTIMIZING_COMPILER_TYPES += optimizing
+endif
+ifeq ($(ART_TEST_OPTIMIZING_GRAPH_COLOR),true)
+  COMPILER_TYPES += regalloc_gc
+  OPTIMIZING_COMPILER_TYPES += regalloc_gc
 endif
 RELOCATE_TYPES := relocate
 ifeq ($(ART_TEST_RUN_TEST_NO_RELOCATE),true)
@@ -150,8 +157,14 @@
 ifeq ($(ART_TEST_RUN_TEST_NO_IMAGE),true)
   IMAGE_TYPES += no-image
 endif
+ifeq ($(ART_TEST_RUN_TEST_MULTI_IMAGE),true)
+  IMAGE_TYPES := multiimage
+endif
 ifeq ($(ART_TEST_PIC_IMAGE),true)
   IMAGE_TYPES += picimage
+  ifeq ($(ART_TEST_RUN_TEST_MULTI_IMAGE),true)
+    IMAGE_TYPES := multipicimage
+  endif
 endif
 PICTEST_TYPES := npictest
 ifeq ($(ART_TEST_PIC_TEST),true)
@@ -205,8 +218,29 @@
 $(shell echo $(1) | tr '[:lower:]' '[:upper:]' | tr '-' '_')
 endef  # name-to-var
 
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+        $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(ART_TEST_RUN_TEST_SKIP), $(ALL_ADDRESS_SIZES))
+
+
+# Disable 149-suspend-all-stress, its output is flaky (b/28988206).
+# Disable 577-profile-foreign-dex (b/27454772).
+# Disable 552-checker-sharpening, until compiler component of new string dex cache is added (@cwadsworth, @vmarko)
+TEST_ART_BROKEN_ALL_TARGET_TESTS := \
+  149-suspend-all-stress \
+  577-profile-foreign-dex \
+  552-checker-sharpening \
+
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+    $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+    $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_ALL_TARGET_TESTS), \
+    $(ALL_ADDRESS_SIZES))
+
+TEST_ART_BROKEN_ALL_TARGET_TESTS :=
+
 # Tests that are timing sensitive and flaky on heavily loaded systems.
 TEST_ART_TIMING_SENSITIVE_RUN_TESTS := \
+  002-sleep \
   053-wait-some \
   055-enum-performance \
   133-static-invoke-super
@@ -218,11 +252,30 @@
         $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(ALL_ADDRESS_SIZES))
 endif
 
+# 147-stripped-dex-fallback isn't supported on device because --strip-dex
+# requires the zip command.
+# 569-checker-pattern-replacement tests behaviour present only on host.
+TEST_ART_BROKEN_TARGET_TESTS := \
+  147-stripped-dex-fallback \
+  569-checker-pattern-replacement
+
+ifneq (,$(filter target,$(TARGET_TYPES)))
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
+      $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_TARGET_TESTS), $(ALL_ADDRESS_SIZES))
+endif
+
+TEST_ART_BROKEN_TARGET_TESTS :=
+
 # Tests that require python3.
 TEST_ART_PYTHON3_DEPENDENCY_RUN_TESTS := \
   960-default-smali \
   961-default-iface-resolution-generated \
   964-default-iface-init-generated \
+  968-default-partial-compile-generated \
+  969-iface-super \
+  970-iface-super-resolution-generated \
+  971-iface-super
 
 # Check if we have python3 to run our tests.
 ifeq ($(wildcard /usr/bin/python3),)
@@ -250,8 +303,15 @@
 
 TEST_ART_BROKEN_PREBUILD_RUN_TESTS :=
 
+# 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
+# 529 and 555: b/27784033
 TEST_ART_BROKEN_NO_PREBUILD_TESTS := \
-  117-nopatchoat
+  117-nopatchoat \
+  147-stripped-dex-fallback \
+  554-jit-profile-file \
+  529-checker-unresolved \
+  555-checker-regression-x86const \
+  608-checker-unresolved-lse
 
 ifneq (,$(filter no-prebuild,$(PREBUILD_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-prebuild, \
@@ -266,7 +326,8 @@
 TEST_ART_BROKEN_NO_RELOCATE_TESTS := \
   117-nopatchoat \
   118-noimage-dex2oat \
-  119-noimage-patchoat
+  119-noimage-patchoat \
+  554-jit-profile-file
 
 ifneq (,$(filter no-relocate,$(RELOCATE_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -278,17 +339,7 @@
 
 # Temporarily disable some broken tests when forcing access checks in interpreter b/22414682
 TEST_ART_BROKEN_INTERPRETER_ACCESS_CHECK_TESTS := \
-  004-JniTest \
-  005-annotations \
-  044-proxy \
-  073-mismatched-field \
-  088-monitor-verification \
-  135-MirandaDispatch \
-  137-cfi \
-  412-new-array \
-  471-uninitialized-locals \
-  506-verify-aput \
-  800-smali
+  137-cfi
 
 ifneq (,$(filter interp-ac,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -301,12 +352,13 @@
 # Tests that are broken with GC stress.
 # * 137-cfi needs to unwind a second forked process. We're using a primitive sleep to wait till we
 #   hope the second process got into the expected state. The slowness of gcstress makes this bad.
-# * 961-default-iface-resolution-generated is a very long test that often will take more than the
-#   timeout to run when gcstress is enabled. This is because gcstress slows down allocations
-#   significantly which this test does a lot.
+# * 961-default-iface-resolution-generated and 964-default-iface-init-generated are very long tests
+#   that often will take more than the timeout to run when gcstress is enabled. This is because
+#   gcstress slows down allocations significantly which these tests do a lot.
 TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := \
   137-cfi \
-  961-default-iface-resolution-generated
+  961-default-iface-resolution-generated \
+  964-default-iface-init-generated
 
 ifneq (,$(filter gcstress,$(GC_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -345,13 +397,17 @@
 # All these tests check that we have sane behavior if we don't have a patchoat or dex2oat.
 # Therefore we shouldn't run them in situations where we actually don't have these since they
 # explicitly test for them. These all also assume we have an image.
+# 147-stripped-dex-fallback is disabled because it requires --prebuild.
+# 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
 TEST_ART_BROKEN_FALLBACK_RUN_TESTS := \
   116-nodex2oat \
   117-nopatchoat \
   118-noimage-dex2oat \
   119-noimage-patchoat \
   137-cfi \
-  138-duplicate-classes-check2
+  138-duplicate-classes-check2 \
+  147-stripped-dex-fallback \
+  554-jit-profile-file
 
 # This test fails without an image.
 TEST_ART_BROKEN_NO_IMAGE_RUN_TESTS := \
@@ -384,13 +440,14 @@
 
 # 137:
 # This test unrolls and expects managed frames, but tracing means we run the interpreter.
-# 802:
+# 802 and 570-checker-osr:
 # This test dynamically enables tracing to force a deoptimization. This makes the test meaningless
 # when already tracing, and writes an error message that we do not want to check for.
 TEST_ART_BROKEN_TRACING_RUN_TESTS := \
   087-gc-after-link \
   137-cfi \
   141-class-unload \
+  570-checker-osr \
   802-deoptimization
 
 ifneq (,$(filter trace stream,$(TRACE_TYPES)))
@@ -402,7 +459,8 @@
 # Known broken tests for the interpreter.
 # CFI unwinding expects managed frames.
 TEST_ART_BROKEN_INTERPRETER_RUN_TESTS := \
-  137-cfi
+  137-cfi \
+  554-jit-profile-file
 
 ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -426,32 +484,28 @@
 
 TEST_ART_BROKEN_JIT_RUN_TESTS :=
 
-# Known broken tests for the default compiler (Quick).
-TEST_ART_BROKEN_DEFAULT_RUN_TESTS := \
-  457-regs
+# Known broken tests for the graph coloring register allocator.
+# These tests were based on the linear scan allocator, which makes different decisions than
+# the graph coloring allocator. (These attempt to test for code quality, not correctness.)
+TEST_ART_BROKEN_OPTIMIZING_GRAPH_COLOR := \
+  570-checker-select \
+  484-checker-register-hints
 
-ifneq (,$(filter default,$(COMPILER_TYPES)))
+ifneq (,$(filter regalloc_gc,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      default,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_DEFAULT_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+      regalloc_gc,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+      $(TEST_ART_BROKEN_OPTIMIZING_GRAPH_COLOR),$(ALL_ADDRESS_SIZES))
 endif
 
-TEST_ART_BROKEN_DEFAULT_RUN_TESTS :=
-
 # Known broken tests for the mips32 optimizing compiler backend.
 TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := \
-    441-checker-inliner \
-    449-checker-bce \
     510-checker-try-catch \
-    521-checker-array-set-null \
-    529-checker-unresolved \
-    534-checker-bce-deoptimization \
-    536-checker-intrinsic-optimization \
 
 ifeq (mips,$(TARGET_ARCH))
-  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+  ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
     ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
-        optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
         $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
         $(TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
@@ -459,26 +513,28 @@
 
 TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS :=
 
-# Known broken tests for the optimizing compiler.
-TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS := \
-  455-set-vreg \
+# Known broken tests for the mips64 optimizing compiler backend.
+TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS := \
 
-ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+ifeq (mips64,$(TARGET_ARCH))
+  ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
+        $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  endif
 endif
 
-TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS :=
+TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS :=
 
 # Tests that should fail when the optimizing compiler compiles them non-debuggable.
 TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS := \
   454-get-vreg \
   457-regs \
 
-ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
       $(IMAGE_TYPES),$(PICTEST_TYPES),ndebuggable,$(TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
@@ -487,68 +543,101 @@
 # Tests that should fail when the optimizing compiler compiles them debuggable.
 TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS := \
 
-ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
       $(IMAGE_TYPES),$(PICTEST_TYPES),debuggable,$(TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
 TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS :=
 
-# Tests that should fail in the read barrier configuration.
-# 137: Read barrier forces interpreter. Cannot run this with the interpreter.
-# 141: Class unloading test is flaky with CC since CC seems to occasionally keep class loaders live.
-TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS := \
-  137-cfi \
-  141-class-unload
+# Tests that should fail in the read barrier configuration with the interpreter.
+TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS :=
+
+# Tests that should fail in the read barrier configuration with the Optimizing compiler (AOT).
+# 484: Baker's fast path based read barrier compiler instrumentation generates code containing
+#      more parallel moves on x86, thus some Checker assertions may fail.
+# 527: On ARM64 and ARM, the read barrier instrumentation does not support the HIntermediateAddress
+#      instruction yet (b/26601270).
+TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := \
+  484-checker-register-hints \
+  527-checker-array-access-split
+
+# Tests that should fail in the read barrier configuration with JIT (Optimizing compiler).
+TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
+
+# Tests failing in non-Baker read barrier configurations with the Optimizing compiler (AOT).
+# 537: Expects an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet
+#      handled in non-Baker read barrier configurations.
+TEST_ART_BROKEN_OPTIMIZING_NON_BAKER_READ_BARRIER_RUN_TESTS := \
+  537-checker-arraycopy
+
+# Tests failing in non-Baker read barrier configurations with JIT (Optimizing compiler).
+# 537: Expects an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet
+#      handled in non-Baker read barrier configurations.
+TEST_ART_BROKEN_JIT_NON_BAKER_READ_BARRIER_RUN_TESTS := \
+  537-checker-arraycopy
 
 ifeq ($(ART_USE_READ_BARRIER),true)
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS :=
-
-# Tests that should fail in the heap poisoning configuration with the default (Quick) compiler.
-# 137: Quick punts to the interpreter, and this test cannot run this with the interpreter.
-TEST_ART_BROKEN_DEFAULT_HEAP_POISONING_RUN_TESTS := \
-  137-cfi
-# Tests that should fail in the heap poisoning configuration with the Optimizing compiler.
-# 055-enum-performance: Exceeds run time limits due to heap poisoning instrumentation.
-TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS := \
-  055-enum-performance
-# Tests that should fail in the heap poisoning configuration with the interpreter.
-# 137: Cannot run this with the interpreter.
-TEST_ART_BROKEN_INTERPRETER_HEAP_POISONING_RUN_TESTS := \
-  137-cfi
-
-ifeq ($(ART_HEAP_POISONING),true)
-  ifneq (,$(filter default,$(COMPILER_TYPES)))
+  ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
     ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-        $(PREBUILD_TYPES),default,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-        $(TEST_ART_BROKEN_DEFAULT_HEAP_POISONING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+        $(PREBUILD_TYPES),interpreter,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
+        $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
 
-  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+  ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
     ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-        $(PREBUILD_TYPES),optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES), \
+        $(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+    ifneq ($(ART_READ_BARRIER_TYPE),BAKER)
+      ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+          $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES), \
+          $(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+          $(TEST_ART_BROKEN_OPTIMIZING_NON_BAKER_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+    endif
+  endif
+
+  ifneq (,$(filter jit,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+        $(PREBUILD_TYPES),jit,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
+        $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+    ifneq ($(ART_READ_BARRIER_TYPE),BAKER)
+      ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+          $(PREBUILD_TYPES),jit,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
+          $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+          $(TEST_ART_BROKEN_JIT_NON_BAKER_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+    endif
+  endif
+endif
+
+TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS :=
+TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
+
+TEST_ART_BROKEN_NPIC_RUN_TESTS := 596-app-images
+ifneq (,$(filter npictest,$(PICTEST_TYPES)))
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+      ${COMPILER_TYPES},$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(IMAGE_TYPES),npictest,$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_NPIC_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+endif
+
+# Tests that should fail in the heap poisoning configuration with the Optimizing compiler.
+# 055: Exceeds run time limits due to heap poisoning instrumentation (on ARM and ARM64 devices).
+TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS := \
+  055-enum-performance
+
+ifeq ($(ART_HEAP_POISONING),true)
+  ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+        $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
         $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
         $(TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
-
-  ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
-    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-        $(PREBUILD_TYPES),interpreter,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-        $(TEST_ART_BROKEN_INTERPRETER_HEAP_POISONING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-  endif
 endif
 
-TEST_ART_BROKEN_INTERPRETER_HEAP_POISONING_RUN_TESTS :=
 TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS :=
-TEST_ART_BROKEN_DEFAULT_HEAP_POISONING_RUN_TESTS :=
 
 # Clear variables ahead of appending to them when defining tests.
 $(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=))
@@ -590,6 +679,22 @@
 # only once).
 TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_EXECUTABLES) $(TARGET_CORE_IMG_OUTS)
 
+# Also need libartagent.
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libartagent.so
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libartagentd.so
+ifdef TARGET_2ND_ARCH
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_2ND_ARCH)/libartagent.so
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_2ND_ARCH)/libartagentd.so
+endif
+
+# Also need libtiagent.
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libtiagent.so
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libtiagentd.so
+ifdef TARGET_2ND_ARCH
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_2ND_ARCH)/libtiagent.so
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_2ND_ARCH)/libtiagentd.so
+endif
+
 # Also need libarttest.
 TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
 TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so
@@ -608,22 +713,34 @@
 # specific version depending on the compiler.
 ART_TEST_HOST_RUN_TEST_DEPENDENCIES := \
   $(ART_HOST_EXECUTABLES) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libtiagent$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libtiagentd$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libartagent$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libartagentd$(ART_HOST_SHLIB_EXTENSION) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libarttestd$(ART_HOST_SHLIB_EXTENSION) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libnativebridgetest$(ART_HOST_SHLIB_EXTENSION) \
-  $(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$(ART_HOST_SHLIB_EXTENSION)
 
 ifneq ($(HOST_PREFER_32_BIT),true)
 ART_TEST_HOST_RUN_TEST_DEPENDENCIES += \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libtiagent$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libtiagentd$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libartagent$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libartagentd$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libarttestd$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libnativebridgetest$(ART_HOST_SHLIB_EXTENSION) \
-  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$(ART_HOST_SHLIB_EXTENSION)
 endif
 
 # Create a rule to build and run a tests following the form:
 # test-art-{1: host or target}-run-test-{2: debug ndebug}-{3: prebuild no-prebuild no-dex2oat}-
-#    {4: interpreter default optimizing jit interp-ac}-
+#    {4: interpreter optimizing jit interp-ac}-
 #    {5: relocate nrelocate relocate-npatchoat}-
 #    {6: trace or ntrace}-{7: gcstress gcverify cms}-{8: forcecopy checkjni jni}-
 #    {9: no-image image picimage}-{10: pictest npictest}-
@@ -634,11 +751,6 @@
   test_groups :=
   uc_host_or_target :=
   jack_classpath :=
-  ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
-    run_test_options += --build-with-jack
-  else
-    run_test_options += --build-with-javac-dx
-  endif
   ifeq ($(ART_TEST_WITH_STRACE),true)
     run_test_options += --strace
   endif
@@ -690,6 +802,9 @@
   ifeq ($(4),optimizing)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_OPTIMIZING_RULES
     run_test_options += --optimizing
+  else ifeq ($(4),regalloc_gc)
+    test_groups += ART_RUN_TEST_$$(uc_host_or_target)_OPTIMIZING_GRAPH_COLOR_RULES
+    run_test_options += --optimizing -Xcompiler-option --register-allocation-strategy=graph-color
   else
     ifeq ($(4),interpreter)
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_INTERPRETER_RULES
@@ -698,16 +813,11 @@
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_INTERPRETER_ACCESS_CHECKS_RULES
       run_test_options += --interpreter --verify-soft-fail
     else
-      ifeq ($(4),default)
-        test_groups += ART_RUN_TEST_$$(uc_host_or_target)_DEFAULT_RULES
-        run_test_options += --quick
+      ifeq ($(4),jit)
+        test_groups += ART_RUN_TEST_$$(uc_host_or_target)_JIT_RULES
+        run_test_options += --jit
       else
-        ifeq ($(4),jit)
-          test_groups += ART_RUN_TEST_$$(uc_host_or_target)_JIT_RULES
-          run_test_options += --jit
-        else
-          $$(error found $(4) expected $(COMPILER_TYPES))
-        endif
+        $$(error found $(4) expected $(COMPILER_TYPES))
       endif
     endif
   endif
@@ -778,6 +888,10 @@
     endif
   endif
   image_suffix := $(4)
+  ifeq ($(4),regalloc_gc)
+    # Graph coloring tests share the image_suffix with optimizing tests.
+    image_suffix := optimizing
+  endif
   ifeq ($(9),no-image)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_IMAGE_RULES
     run_test_options += --no-image
@@ -806,7 +920,27 @@
           prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_pic_$(13))
         endif
       else
-        $$(error found $(9) expected $(IMAGE_TYPES))
+        ifeq ($(9),multiimage)
+          test_groups += ART_RUN_TEST_$$(uc_host_or_target)_IMAGE_RULES
+          run_test_options += --multi-image
+                ifeq ($(1),host)
+                        prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13))
+                else
+                        prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13))
+                endif
+        else
+          ifeq ($(9),multipicimage)
+            test_groups += ART_RUN_TEST_$$(uc_host_or_target)_PICIMAGE_RULES
+                        run_test_options += --pic-image --multi-image
+                        ifeq ($(1),host)
+                        prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13))
+                        else
+                        prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13))
+                        endif
+          else
+            $$(error found $(9) expected $(IMAGE_TYPES))
+          endif
+        endif
       endif
     endif
   endif
@@ -861,18 +995,22 @@
   ifneq ($(ART_TEST_ANDROID_ROOT),)
     run_test_options := --android-root $(ART_TEST_ANDROID_ROOT) $$(run_test_options)
   endif
+  ifeq ($(ART_TEST_QUIET),true)
+    run_test_options += --quiet
+  endif
 $$(run_test_rule_name): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options)
 $$(run_test_rule_name): PRIVATE_JACK_CLASSPATH := $$(jack_classpath)
 .PHONY: $$(run_test_rule_name)
-$$(run_test_rule_name): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(HOST_OUT_EXECUTABLES)/hprof-conv $$(prereq_rule)
+$$(run_test_rule_name): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(HOST_OUT_EXECUTABLES)/hprof-conv $$(prereq_rule) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
 	$(hide) $$(call ART_TEST_SKIP,$$@) && \
 	  DX=$(abspath $(DX)) \
 	    JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
 	    SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
 	    DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
+	    JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	    JACK=$(abspath $(JACK)) \
+	    JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	    JACK_CLASSPATH=$$(PRIVATE_JACK_CLASSPATH) \
-	    JILL_JAR=$(abspath $(JILL_JAR)) \
 	    art/test/run-test $$(PRIVATE_RUN_TEST_OPTIONS) $(12) \
 	      && $$(call ART_TEST_PASSED,$$@) || $$(call ART_TEST_FAILED,$$@)
 	$$(hide) (echo $(MAKECMDGOALS) | grep -q $$@ && \
@@ -1004,5 +1142,10 @@
 RUN_TYPES :=
 DEBUGGABLE_TYPES :=
 
-include $(LOCAL_PATH)/Android.libarttest.mk
-include art/test/Android.libnativebridgetest.mk
+MY_LOCAL_PATH := $(LOCAL_PATH)
+include $(MY_LOCAL_PATH)/Android.libartagent.mk
+include $(MY_LOCAL_PATH)/Android.libtiagent.mk
+include $(MY_LOCAL_PATH)/Android.libarttest.mk
+include $(MY_LOCAL_PATH)/Android.libnativebridgetest.mk
+MY_LOCAL_PATH :=
+LOCAL_PATH :=
diff --git a/test/Lookup/A.java b/test/Lookup/A.java
new file mode 100644
index 0000000..666ba18
--- /dev/null
+++ b/test/Lookup/A.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class A {}
diff --git a/test/Lookup/AB.java b/test/Lookup/AB.java
new file mode 100644
index 0000000..b231708
--- /dev/null
+++ b/test/Lookup/AB.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class AB {}
diff --git a/test/Lookup/C.java b/test/Lookup/C.java
new file mode 100644
index 0000000..5b90069
--- /dev/null
+++ b/test/Lookup/C.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class C {}
diff --git a/test/MyClassNatives/MyClassNatives.java b/test/MyClassNatives/MyClassNatives.java
index 19c13f7..45cfd0f 100644
--- a/test/MyClassNatives/MyClassNatives.java
+++ b/test/MyClassNatives/MyClassNatives.java
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+import dalvik.annotation.optimization.FastNative;
+
 class MyClassNatives {
     native void throwException();
     native void foo();
@@ -35,11 +37,11 @@
     static native int getText(long val1, Object obj1, long val2, Object obj2);
     synchronized native Object []getSinkPropertiesNative(String path);
 
-    native Class instanceMethodThatShouldReturnClass();
-    static native Class staticMethodThatShouldReturnClass();
+    native Class<?> instanceMethodThatShouldReturnClass();
+    static native Class<?> staticMethodThatShouldReturnClass();
 
-    native void instanceMethodThatShouldTakeClass(int i, Class c);
-    static native void staticMethodThatShouldTakeClass(int i, Class c);
+    native void instanceMethodThatShouldTakeClass(int i, Class<?> c);
+    static native void staticMethodThatShouldTakeClass(int i, Class<?> c);
 
     native float checkFloats(float f1, float f2);
     native void forceStackParameters(int i1, int i2, int i3, int i4, int i5, int i6, int i8, int i9,
@@ -102,4 +104,9 @@
     static native boolean returnTrue();
     static native boolean returnFalse();
     static native int returnInt();
+
+    // Check for @FastNative annotation presence [or lack of presence].
+    public static native void normalNative();
+    @FastNative
+    public static native void fastNative();
 }
diff --git a/test/ProfileTestMultiDex/Main.java b/test/ProfileTestMultiDex/Main.java
new file mode 100644
index 0000000..41532ea
--- /dev/null
+++ b/test/ProfileTestMultiDex/Main.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main {
+  public String getA() {
+    return "A";
+  }
+  public String getB() {
+    return "B";
+  }
+  public String getC() {
+    return "C";
+  }
+}
diff --git a/test/ProfileTestMultiDex/Second.java b/test/ProfileTestMultiDex/Second.java
new file mode 100644
index 0000000..4ac5abc
--- /dev/null
+++ b/test/ProfileTestMultiDex/Second.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Second {
+  public String getX() {
+    return "X";
+  }
+  public String getY() {
+    return "Y";
+  }
+  public String getZ() {
+    return "Z";
+  }
+}
diff --git a/test/ProfileTestMultiDex/main.jpp b/test/ProfileTestMultiDex/main.jpp
new file mode 100644
index 0000000..f2e3b4e
--- /dev/null
+++ b/test/ProfileTestMultiDex/main.jpp
@@ -0,0 +1,3 @@
+main:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Second
diff --git a/test/ProfileTestMultiDex/main.list b/test/ProfileTestMultiDex/main.list
new file mode 100644
index 0000000..44ba78e
--- /dev/null
+++ b/test/ProfileTestMultiDex/main.list
@@ -0,0 +1 @@
+Main.class
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index 082c9b3..ee2ee1a 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -16,12 +16,17 @@
 
 #include "jni.h"
 
+#include "base/enums.h"
 #include "base/logging.h"
 #include "dex_file-inl.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "mirror/class-inl.h"
 #include "nth_caller_visitor.h"
+#include "oat_quick_method_header.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
+#include "ScopedUtfChars.h"
 #include "stack.h"
 #include "thread-inl.h"
 
@@ -56,7 +61,7 @@
 
 extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasImage(JNIEnv* env ATTRIBUTE_UNUSED,
                                                          jclass cls ATTRIBUTE_UNUSED) {
-  return Runtime::Current()->GetHeap()->HasImageSpace();
+  return Runtime::Current()->GetHeap()->HasBootImageSpace();
 }
 
 // public static native boolean isImageDex2OatEnabled();
@@ -116,4 +121,41 @@
   return JNI_TRUE;
 }
 
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureJitCompiled(JNIEnv* env,
+                                                             jclass,
+                                                             jclass cls,
+                                                             jstring method_name) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit == nullptr) {
+    return;
+  }
+
+  ArtMethod* method = nullptr;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+
+    ScopedUtfChars chars(env, method_name);
+    CHECK(chars.c_str() != nullptr);
+    method = soa.Decode<mirror::Class*>(cls)->FindDeclaredDirectMethodByName(
+        chars.c_str(), kRuntimePointerSize);
+  }
+
+  jit::JitCodeCache* code_cache = jit->GetCodeCache();
+  OatQuickMethodHeader* header = nullptr;
+  while (true) {
+    header = OatQuickMethodHeader::FromEntryPoint(method->GetEntryPointFromQuickCompiledCode());
+    if (code_cache->ContainsPc(header->GetCode())) {
+      break;
+    } else {
+      // Sleep to yield to the compiler thread.
+      usleep(1000);
+      ScopedObjectAccess soa(Thread::Current());
+      // Make sure there is a profiling info, required by the compiler.
+      ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true);
+      // Will either ensure it's compiled or do the compilation itself.
+      jit->CompileMethod(method, soa.Self(), /* osr */ false);
+    }
+  }
+}
+
 }  // namespace art
diff --git a/test/common/stack_inspect.cc b/test/common/stack_inspect.cc
index 922eae6..85ea1c8 100644
--- a/test/common/stack_inspect.cc
+++ b/test/common/stack_inspect.cc
@@ -37,17 +37,20 @@
   asserts_enabled = false;
 }
 
-
-// public static native boolean isInterpreted();
-
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isInterpreted(JNIEnv* env, jclass) {
+static jboolean IsInterpreted(JNIEnv* env, jclass, size_t level) {
   ScopedObjectAccess soa(env);
-  NthCallerVisitor caller(soa.Self(), 1, false);
+  NthCallerVisitor caller(soa.Self(), level, false);
   caller.WalkStack();
   CHECK(caller.caller != nullptr);
   return caller.GetCurrentShadowFrame() != nullptr ? JNI_TRUE : JNI_FALSE;
 }
 
+// public static native boolean isInterpreted();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isInterpreted(JNIEnv* env, jclass klass) {
+  return IsInterpreted(env, klass, 1);
+}
+
 // public static native void assertIsInterpreted();
 
 extern "C" JNIEXPORT void JNICALL Java_Main_assertIsInterpreted(JNIEnv* env, jclass klass) {
@@ -56,10 +59,7 @@
   }
 }
 
-
-// public static native boolean isManaged();
-
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isManaged(JNIEnv* env, jclass cls) {
+static jboolean IsManaged(JNIEnv* env, jclass cls, size_t level) {
   ScopedObjectAccess soa(env);
 
   mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
@@ -71,13 +71,19 @@
     return JNI_FALSE;
   }
 
-  NthCallerVisitor caller(soa.Self(), 1, false);
+  NthCallerVisitor caller(soa.Self(), level, false);
   caller.WalkStack();
   CHECK(caller.caller != nullptr);
 
   return caller.GetCurrentShadowFrame() != nullptr ? JNI_FALSE : JNI_TRUE;
 }
 
+// public static native boolean isManaged();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isManaged(JNIEnv* env, jclass cls) {
+  return IsManaged(env, cls, 1);
+}
+
 // public static native void assertIsManaged();
 
 extern "C" JNIEXPORT void JNICALL Java_Main_assertIsManaged(JNIEnv* env, jclass cls) {
@@ -86,4 +92,32 @@
   }
 }
 
+// public static native boolean isCallerInterpreted();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isCallerInterpreted(JNIEnv* env, jclass klass) {
+  return IsInterpreted(env, klass, 2);
+}
+
+// public static native void assertCallerIsInterpreted();
+
+extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsInterpreted(JNIEnv* env, jclass klass) {
+  if (asserts_enabled) {
+    CHECK(Java_Main_isCallerInterpreted(env, klass));
+  }
+}
+
+// public static native boolean isCallerManaged();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isCallerManaged(JNIEnv* env, jclass cls) {
+  return IsManaged(env, cls, 2);
+}
+
+// public static native void assertCallerIsManaged();
+
+extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsManaged(JNIEnv* env, jclass cls) {
+  if (asserts_enabled) {
+    CHECK(Java_Main_isCallerManaged(env, cls));
+  }
+}
+
 }  // namespace art
diff --git a/test/dexdump/all.dex b/test/dexdump/all.dex
new file mode 100644
index 0000000..caf678d
--- /dev/null
+++ b/test/dexdump/all.dex
Binary files differ
diff --git a/test/dexdump/all.lst b/test/dexdump/all.lst
new file mode 100644
index 0000000..17ab9ca
--- /dev/null
+++ b/test/dexdump/all.lst
@@ -0,0 +1,21 @@
+#all.dex
+0x0000043c 8 A <init> ()V (none) -1
+0x00000454 58 A arrays ()V (none) -1
+0x000004a0 130 A binary_ops ()V (none) -1
+0x00000534 66 A binary_ops_2addr ()V (none) -1
+0x00000588 34 A binary_ops_lit16 ()V (none) -1
+0x000005bc 46 A binary_ops_lit8 ()V (none) -1
+0x000005fc 22 A compares ()V (none) -1
+0x00000624 50 A conditionals ()V (none) -1
+0x00000668 56 A constants ()V (none) -1
+0x000006b0 108 A misc ()V (none) -1
+0x0000072c 46 A moves ()V (none) -1
+0x0000076c 32 A packed_switch ()V (none) -1
+0x0000079c 2 A return32 ()I (none) -1
+0x000007b0 2 A return64 ()I (none) -1
+0x000007c4 2 A return_object ()Ljava/lang/Object; (none) -1
+0x000007d8 44 A sparse_switch ()V (none) -1
+0x00000814 58 A static_fields ()V (none) -1
+0x00000860 44 A unary_ops ()V (none) -1
+0x0000089c 58 A instance_fields ()V (none) -1
+0x000008e8 30 A invokes ()V (none) -1
diff --git a/test/dexdump/all.txt b/test/dexdump/all.txt
new file mode 100644
index 0000000..af4fb4c
--- /dev/null
+++ b/test/dexdump/all.txt
@@ -0,0 +1,622 @@
+Processing 'all.dex'...
+Opened 'all.dex', DEX version '035'
+DEX file header:
+magic               : 'dex\n035\0'
+checksum            : d5134208
+signature           : 7af6...100f
+file_size           : 2572
+header_size         : 112
+link_size           : 0
+link_off            : 0 (0x000000)
+string_ids_size     : 46
+string_ids_off      : 112 (0x000070)
+type_ids_size       : 10
+type_ids_off        : 296 (0x000128)
+proto_ids_size      : 3
+proto_ids_off       : 336 (0x000150)
+field_ids_size      : 14
+field_ids_off       : 372 (0x000174)
+method_ids_size     : 21
+method_ids_off      : 484 (0x0001e4)
+class_defs_size     : 1
+class_defs_off      : 652 (0x00028c)
+data_size           : 1888
+data_off            : 684 (0x0002ac)
+
+Class #0 header:
+class_idx           : 4
+access_flags        : 1 (0x0001)
+superclass_idx      : 5
+interfaces_off      : 0 (0x000000)
+source_file_idx     : -1
+annotations_off     : 0 (0x000000)
+class_data_off      : 2310 (0x000906)
+static_fields_size  : 7
+instance_fields_size: 7
+direct_methods_size : 18
+virtual_methods_size: 2
+
+Class #0            -
+  Class descriptor  : 'LA;'
+  Access flags      : 0x0001 (PUBLIC)
+  Superclass        : 'Ljava/lang/Object;'
+  Interfaces        -
+  Static fields     -
+    #0              : (in LA;)
+      name          : 'sB'
+      type          : 'B'
+      access        : 0x000a (PRIVATE STATIC)
+    #1              : (in LA;)
+      name          : 'sC'
+      type          : 'C'
+      access        : 0x000a (PRIVATE STATIC)
+    #2              : (in LA;)
+      name          : 'sI'
+      type          : 'I'
+      access        : 0x000a (PRIVATE STATIC)
+    #3              : (in LA;)
+      name          : 'sJ'
+      type          : 'J'
+      access        : 0x000a (PRIVATE STATIC)
+    #4              : (in LA;)
+      name          : 'sO'
+      type          : 'LA;'
+      access        : 0x000a (PRIVATE STATIC)
+    #5              : (in LA;)
+      name          : 'sS'
+      type          : 'S'
+      access        : 0x000a (PRIVATE STATIC)
+    #6              : (in LA;)
+      name          : 'sZ'
+      type          : 'Z'
+      access        : 0x000a (PRIVATE STATIC)
+  Instance fields   -
+    #0              : (in LA;)
+      name          : 'mB'
+      type          : 'B'
+      access        : 0x0002 (PRIVATE)
+    #1              : (in LA;)
+      name          : 'mC'
+      type          : 'C'
+      access        : 0x0002 (PRIVATE)
+    #2              : (in LA;)
+      name          : 'mI'
+      type          : 'I'
+      access        : 0x0002 (PRIVATE)
+    #3              : (in LA;)
+      name          : 'mJ'
+      type          : 'J'
+      access        : 0x0002 (PRIVATE)
+    #4              : (in LA;)
+      name          : 'mO'
+      type          : 'LA;'
+      access        : 0x0002 (PRIVATE)
+    #5              : (in LA;)
+      name          : 'mS'
+      type          : 'S'
+      access        : 0x0002 (PRIVATE)
+    #6              : (in LA;)
+      name          : 'mZ'
+      type          : 'Z'
+      access        : 0x0002 (PRIVATE)
+  Direct methods    -
+    #0              : (in LA;)
+      name          : '<init>'
+      type          : '()V'
+      access        : 0x10001 (PUBLIC CONSTRUCTOR)
+      code          -
+      registers     : 1
+      ins           : 1
+      outs          : 1
+      insns size    : 4 16-bit code units
+00042c:                                        |[00042c] A.<init>:()V
+00043c: 7010 1400 0000                         |0000: invoke-direct {v0}, Ljava/lang/Object;.<init>:()V // method@0014
+000442: 0e00                                   |0003: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #1              : (in LA;)
+      name          : 'arrays'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 3
+      ins           : 0
+      outs          : 0
+      insns size    : 29 16-bit code units
+000444:                                        |[000444] A.arrays:()V
+000454: 4400 0102                              |0000: aget v0, v1, v2
+000458: 4500 0102                              |0002: aget-wide v0, v1, v2
+00045c: 4600 0102                              |0004: aget-object v0, v1, v2
+000460: 4700 0102                              |0006: aget-boolean v0, v1, v2
+000464: 4800 0102                              |0008: aget-byte v0, v1, v2
+000468: 4900 0102                              |000a: aget-char v0, v1, v2
+00046c: 4a00 0102                              |000c: aget-short v0, v1, v2
+000470: 4b00 0102                              |000e: aput v0, v1, v2
+000474: 4c00 0102                              |0010: aput-wide v0, v1, v2
+000478: 4d00 0102                              |0012: aput-object v0, v1, v2
+00047c: 4e00 0102                              |0014: aput-boolean v0, v1, v2
+000480: 4f00 0102                              |0016: aput-byte v0, v1, v2
+000484: 5000 0102                              |0018: aput-char v0, v1, v2
+000488: 5100 0102                              |001a: aput-short v0, v1, v2
+00048c: 0e00                                   |001c: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #2              : (in LA;)
+      name          : 'binary_ops'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 3
+      ins           : 0
+      outs          : 0
+      insns size    : 65 16-bit code units
+000490:                                        |[000490] A.binary_ops:()V
+0004a0: 9000 0102                              |0000: add-int v0, v1, v2
+0004a4: 9100 0102                              |0002: sub-int v0, v1, v2
+0004a8: 9200 0102                              |0004: mul-int v0, v1, v2
+0004ac: 9300 0102                              |0006: div-int v0, v1, v2
+0004b0: 9400 0102                              |0008: rem-int v0, v1, v2
+0004b4: 9500 0102                              |000a: and-int v0, v1, v2
+0004b8: 9600 0102                              |000c: or-int v0, v1, v2
+0004bc: 9700 0102                              |000e: xor-int v0, v1, v2
+0004c0: 9800 0102                              |0010: shl-int v0, v1, v2
+0004c4: 9900 0102                              |0012: shr-int v0, v1, v2
+0004c8: 9a00 0102                              |0014: ushr-int v0, v1, v2
+0004cc: 9b00 0102                              |0016: add-long v0, v1, v2
+0004d0: 9c00 0102                              |0018: sub-long v0, v1, v2
+0004d4: 9d00 0102                              |001a: mul-long v0, v1, v2
+0004d8: 9e00 0102                              |001c: div-long v0, v1, v2
+0004dc: 9f00 0102                              |001e: rem-long v0, v1, v2
+0004e0: a000 0102                              |0020: and-long v0, v1, v2
+0004e4: a100 0102                              |0022: or-long v0, v1, v2
+0004e8: a200 0102                              |0024: xor-long v0, v1, v2
+0004ec: a300 0102                              |0026: shl-long v0, v1, v2
+0004f0: a400 0102                              |0028: shr-long v0, v1, v2
+0004f4: a500 0102                              |002a: ushr-long v0, v1, v2
+0004f8: a600 0102                              |002c: add-float v0, v1, v2
+0004fc: a700 0102                              |002e: sub-float v0, v1, v2
+000500: a800 0102                              |0030: mul-float v0, v1, v2
+000504: a900 0102                              |0032: div-float v0, v1, v2
+000508: aa00 0102                              |0034: rem-float v0, v1, v2
+00050c: ab00 0102                              |0036: add-double v0, v1, v2
+000510: ac00 0102                              |0038: sub-double v0, v1, v2
+000514: ad00 0102                              |003a: mul-double v0, v1, v2
+000518: ae00 0102                              |003c: div-double v0, v1, v2
+00051c: af00 0102                              |003e: rem-double v0, v1, v2
+000520: 0e00                                   |0040: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #3              : (in LA;)
+      name          : 'binary_ops_2addr'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 33 16-bit code units
+000524:                                        |[000524] A.binary_ops_2addr:()V
+000534: b010                                   |0000: add-int/2addr v0, v1
+000536: b110                                   |0001: sub-int/2addr v0, v1
+000538: b210                                   |0002: mul-int/2addr v0, v1
+00053a: b310                                   |0003: div-int/2addr v0, v1
+00053c: b410                                   |0004: rem-int/2addr v0, v1
+00053e: b510                                   |0005: and-int/2addr v0, v1
+000540: b610                                   |0006: or-int/2addr v0, v1
+000542: b710                                   |0007: xor-int/2addr v0, v1
+000544: b810                                   |0008: shl-int/2addr v0, v1
+000546: b910                                   |0009: shr-int/2addr v0, v1
+000548: ba10                                   |000a: ushr-int/2addr v0, v1
+00054a: bb10                                   |000b: add-long/2addr v0, v1
+00054c: bc10                                   |000c: sub-long/2addr v0, v1
+00054e: bd10                                   |000d: mul-long/2addr v0, v1
+000550: be10                                   |000e: div-long/2addr v0, v1
+000552: bf10                                   |000f: rem-long/2addr v0, v1
+000554: c010                                   |0010: and-long/2addr v0, v1
+000556: c110                                   |0011: or-long/2addr v0, v1
+000558: c210                                   |0012: xor-long/2addr v0, v1
+00055a: c310                                   |0013: shl-long/2addr v0, v1
+00055c: c410                                   |0014: shr-long/2addr v0, v1
+00055e: c510                                   |0015: ushr-long/2addr v0, v1
+000560: c610                                   |0016: add-float/2addr v0, v1
+000562: c710                                   |0017: sub-float/2addr v0, v1
+000564: c810                                   |0018: mul-float/2addr v0, v1
+000566: c910                                   |0019: div-float/2addr v0, v1
+000568: ca10                                   |001a: rem-float/2addr v0, v1
+00056a: cb10                                   |001b: add-double/2addr v0, v1
+00056c: cc10                                   |001c: sub-double/2addr v0, v1
+00056e: cd10                                   |001d: mul-double/2addr v0, v1
+000570: ce10                                   |001e: div-double/2addr v0, v1
+000572: cf10                                   |001f: rem-double/2addr v0, v1
+000574: 0e00                                   |0020: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #4              : (in LA;)
+      name          : 'binary_ops_lit16'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 17 16-bit code units
+000578:                                        |[000578] A.binary_ops_lit16:()V
+000588: d010 3412                              |0000: add-int/lit16 v0, v1, #int 4660 // #1234
+00058c: d110 3412                              |0002: rsub-int v0, v1, #int 4660 // #1234
+000590: d210 3412                              |0004: mul-int/lit16 v0, v1, #int 4660 // #1234
+000594: d310 3412                              |0006: div-int/lit16 v0, v1, #int 4660 // #1234
+000598: d410 3412                              |0008: rem-int/lit16 v0, v1, #int 4660 // #1234
+00059c: d510 3412                              |000a: and-int/lit16 v0, v1, #int 4660 // #1234
+0005a0: d610 3412                              |000c: or-int/lit16 v0, v1, #int 4660 // #1234
+0005a4: d710 3412                              |000e: xor-int/lit16 v0, v1, #int 4660 // #1234
+0005a8: 0e00                                   |0010: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #5              : (in LA;)
+      name          : 'binary_ops_lit8'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 23 16-bit code units
+0005ac:                                        |[0005ac] A.binary_ops_lit8:()V
+0005bc: d800 0112                              |0000: add-int/lit8 v0, v1, #int 18 // #12
+0005c0: d900 0112                              |0002: rsub-int/lit8 v0, v1, #int 18 // #12
+0005c4: da00 0112                              |0004: mul-int/lit8 v0, v1, #int 18 // #12
+0005c8: db00 0112                              |0006: div-int/lit8 v0, v1, #int 18 // #12
+0005cc: dc00 0112                              |0008: rem-int/lit8 v0, v1, #int 18 // #12
+0005d0: dd00 0112                              |000a: and-int/lit8 v0, v1, #int 18 // #12
+0005d4: de00 0112                              |000c: or-int/lit8 v0, v1, #int 18 // #12
+0005d8: df00 0112                              |000e: xor-int/lit8 v0, v1, #int 18 // #12
+0005dc: e000 0112                              |0010: shl-int/lit8 v0, v1, #int 18 // #12
+0005e0: e100 0112                              |0012: shr-int/lit8 v0, v1, #int 18 // #12
+0005e4: e200 0112                              |0014: ushr-int/lit8 v0, v1, #int 18 // #12
+0005e8: 0e00                                   |0016: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #6              : (in LA;)
+      name          : 'compares'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 3
+      ins           : 0
+      outs          : 0
+      insns size    : 11 16-bit code units
+0005ec:                                        |[0005ec] A.compares:()V
+0005fc: 2d00 0102                              |0000: cmpl-float v0, v1, v2
+000600: 2e00 0102                              |0002: cmpg-float v0, v1, v2
+000604: 2f00 0102                              |0004: cmpl-double v0, v1, v2
+000608: 3000 0102                              |0006: cmpg-double v0, v1, v2
+00060c: 3100 0102                              |0008: cmp-long v0, v1, v2
+000610: 0e00                                   |000a: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #7              : (in LA;)
+      name          : 'conditionals'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 25 16-bit code units
+000614:                                        |[000614] A.conditionals:()V
+000624: 3210 1800                              |0000: if-eq v0, v1, 0018 // +0018
+000628: 3310 1600                              |0002: if-ne v0, v1, 0018 // +0016
+00062c: 3410 1400                              |0004: if-lt v0, v1, 0018 // +0014
+000630: 3510 1200                              |0006: if-ge v0, v1, 0018 // +0012
+000634: 3610 1000                              |0008: if-gt v0, v1, 0018 // +0010
+000638: 3710 0e00                              |000a: if-le v0, v1, 0018 // +000e
+00063c: 3800 0c00                              |000c: if-eqz v0, 0018 // +000c
+000640: 3900 0a00                              |000e: if-nez v0, 0018 // +000a
+000644: 3a00 0800                              |0010: if-ltz v0, 0018 // +0008
+000648: 3b00 0600                              |0012: if-gez v0, 0018 // +0006
+00064c: 3c00 0400                              |0014: if-gtz v0, 0018 // +0004
+000650: 3d00 0200                              |0016: if-lez v0, 0018 // +0002
+000654: 0e00                                   |0018: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #8              : (in LA;)
+      name          : 'constants'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 28 16-bit code units
+000658:                                        |[000658] A.constants:()V
+000668: 1210                                   |0000: const/4 v0, #int 1 // #1
+00066a: 1300 3412                              |0001: const/16 v0, #int 4660 // #1234
+00066e: 1400 7856 3412                         |0003: const v0, #float 5.69046e-28 // #12345678
+000674: 1500 3412                              |0006: const/high16 v0, #int 305397760 // #1234
+000678: 1600 3412                              |0008: const-wide/16 v0, #int 4660 // #1234
+00067c: 1700 7856 3412                         |000a: const-wide/32 v0, #float 5.69046e-28 // #12345678
+000682: 1800 efcd ab90 7856 3412               |000d: const-wide v0, #double 5.62635e-221 // #1234567890abcdef
+00068c: 1900 3412                              |0012: const-wide/high16 v0, #long 1311673391471656960 // #1234
+000690: 1a00 2c00                              |0014: const-string v0, "string" // string@002c
+000694: 1b00 2c00 0000                         |0016: const-string/jumbo v0, "string" // string@0000002c
+00069a: 1c00 0500                              |0019: const-class v0, Ljava/lang/Object; // type@0005
+00069e: 0e00                                   |001b: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #9              : (in LA;)
+      name          : 'misc'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 5
+      ins           : 0
+      outs          : 0
+      insns size    : 54 16-bit code units
+0006a0:                                        |[0006a0] A.misc:()V
+0006b0: 0000                                   |0000: nop // spacer
+0006b2: 1d00                                   |0001: monitor-enter v0
+0006b4: 1e00                                   |0002: monitor-exit v0
+0006b6: 1f00 0500                              |0003: check-cast v0, Ljava/lang/Object; // type@0005
+0006ba: 2010 0500                              |0005: instance-of v0, v1, Ljava/lang/Object; // type@0005
+0006be: 2110                                   |0007: array-length v0, v1
+0006c0: 2200 0500                              |0008: new-instance v0, Ljava/lang/Object; // type@0005
+0006c4: 2310 0500                              |000a: new-array v0, v1, Ljava/lang/Object; // type@0005
+0006c8: 2454 0900 1032                         |000c: filled-new-array {v0, v1, v2, v3, v4}, [Ljava/lang/Object; // type@0009
+0006ce: 2505 0900 0000                         |000f: filled-new-array/range {v0, v1, v2, v3, v4}, [Ljava/lang/Object; // type@0009
+0006d4: 2600 0c00 0000                         |0012: fill-array-data v0, 0000001e // +0000000c
+0006da: 2700                                   |0015: throw v0
+0006dc: 2806                                   |0016: goto 001c // +0006
+0006de: 2900 0500                              |0017: goto/16 001c // +0005
+0006e2: 2a00 0300 0000                         |0019: goto/32 #00000003
+0006e8: 0e00                                   |001c: return-void
+0006ea: 0000                                   |001d: nop // spacer
+0006ec: 0003 0400 0a00 0000 0100 0000 0200 ... |001e: array-data (24 units)
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #10              : (in LA;)
+      name          : 'moves'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 23 16-bit code units
+00071c:                                        |[00071c] A.moves:()V
+00072c: 0110                                   |0000: move v0, v1
+00072e: 0200 0100                              |0001: move/from16 v0, v1
+000732: 0300 0000 0100                         |0003: move/16 v0, v1
+000738: 0410                                   |0006: move-wide v0, v1
+00073a: 0500 0100                              |0007: move-wide/from16 v0, v1
+00073e: 0600 0000 0100                         |0009: move-wide/16 v0, v1
+000744: 0710                                   |000c: move-object v0, v1
+000746: 0800 0100                              |000d: move-object/from16 v0, v1
+00074a: 0900 0000 0100                         |000f: move-object/16 v0, v1
+000750: 0a00                                   |0012: move-result v0
+000752: 0b00                                   |0013: move-result-wide v0
+000754: 0c00                                   |0014: move-result-object v0
+000756: 0d00                                   |0015: move-exception v0
+000758: 0e00                                   |0016: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #11              : (in LA;)
+      name          : 'packed_switch'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 16 16-bit code units
+00075c:                                        |[00075c] A.packed_switch:()V
+00076c: 2b00 0800 0000                         |0000: packed-switch v0, 00000008 // +00000008
+000772: 0e00                                   |0003: return-void
+000774: 28ff                                   |0004: goto 0003 // -0001
+000776: 28fe                                   |0005: goto 0003 // -0002
+000778: 28fd                                   |0006: goto 0003 // -0003
+00077a: 0000                                   |0007: nop // spacer
+00077c: 0001 0200 feff ff7f 0500 0000 0600 ... |0008: packed-switch-data (8 units)
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #12              : (in LA;)
+      name          : 'return32'
+      type          : '()I'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 1 16-bit code units
+00078c:                                        |[00078c] A.return32:()I
+00079c: 0f00                                   |0000: return v0
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #13              : (in LA;)
+      name          : 'return64'
+      type          : '()I'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 1 16-bit code units
+0007a0:                                        |[0007a0] A.return64:()I
+0007b0: 1000                                   |0000: return-wide v0
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #14              : (in LA;)
+      name          : 'return_object'
+      type          : '()Ljava/lang/Object;'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 1 16-bit code units
+0007b4:                                        |[0007b4] A.return_object:()Ljava/lang/Object;
+0007c4: 1100                                   |0000: return-object v0
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #15              : (in LA;)
+      name          : 'sparse_switch'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 22 16-bit code units
+0007c8:                                        |[0007c8] A.sparse_switch:()V
+0007d8: 2c00 0400 0000                         |0000: sparse-switch v0, 00000004 // +00000004
+0007de: 0e00                                   |0003: return-void
+0007e0: 0002 0400 1111 0000 2222 0000 3333 ... |0004: sparse-switch-data (18 units)
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #16              : (in LA;)
+      name          : 'static_fields'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 29 16-bit code units
+000804:                                        |[000804] A.static_fields:()V
+000814: 6000 0900                              |0000: sget v0, LA;.sI:I // field@0009
+000818: 6100 0a00                              |0002: sget-wide v0, LA;.sJ:J // field@000a
+00081c: 6200 0b00                              |0004: sget-object v0, LA;.sO:LA; // field@000b
+000820: 6300 0d00                              |0006: sget-boolean v0, LA;.sZ:Z // field@000d
+000824: 6400 0700                              |0008: sget-byte v0, LA;.sB:B // field@0007
+000828: 6500 0800                              |000a: sget-char v0, LA;.sC:C // field@0008
+00082c: 6600 0c00                              |000c: sget-short v0, LA;.sS:S // field@000c
+000830: 6700 0900                              |000e: sput v0, LA;.sI:I // field@0009
+000834: 6800 0a00                              |0010: sput-wide v0, LA;.sJ:J // field@000a
+000838: 6900 0b00                              |0012: sput-object v0, LA;.sO:LA; // field@000b
+00083c: 6a00 0d00                              |0014: sput-boolean v0, LA;.sZ:Z // field@000d
+000840: 6b00 0700                              |0016: sput-byte v0, LA;.sB:B // field@0007
+000844: 6c00 0800                              |0018: sput-char v0, LA;.sC:C // field@0008
+000848: 6d00 0500                              |001a: sput-short v0, LA;.mS:S // field@0005
+00084c: 0e00                                   |001c: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #17              : (in LA;)
+      name          : 'unary_ops'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 22 16-bit code units
+000850:                                        |[000850] A.unary_ops:()V
+000860: 7b10                                   |0000: neg-int v0, v1
+000862: 7c10                                   |0001: not-int v0, v1
+000864: 7d10                                   |0002: neg-long v0, v1
+000866: 7e10                                   |0003: not-long v0, v1
+000868: 7f10                                   |0004: neg-float v0, v1
+00086a: 8010                                   |0005: neg-double v0, v1
+00086c: 8110                                   |0006: int-to-long v0, v1
+00086e: 8210                                   |0007: int-to-float v0, v1
+000870: 8310                                   |0008: int-to-double v0, v1
+000872: 8410                                   |0009: long-to-int v0, v1
+000874: 8510                                   |000a: long-to-float v0, v1
+000876: 8610                                   |000b: long-to-double v0, v1
+000878: 8710                                   |000c: float-to-int v0, v1
+00087a: 8810                                   |000d: float-to-long v0, v1
+00087c: 8910                                   |000e: float-to-double v0, v1
+00087e: 8a10                                   |000f: double-to-int v0, v1
+000880: 8b10                                   |0010: double-to-long v0, v1
+000882: 8c10                                   |0011: double-to-float v0, v1
+000884: 8d10                                   |0012: int-to-byte v0, v1
+000886: 8e10                                   |0013: int-to-char v0, v1
+000888: 8f10                                   |0014: int-to-short v0, v1
+00088a: 0e00                                   |0015: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+  Virtual methods   -
+    #0              : (in LA;)
+      name          : 'instance_fields'
+      type          : '()V'
+      access        : 0x0001 (PUBLIC)
+      code          -
+      registers     : 2
+      ins           : 1
+      outs          : 0
+      insns size    : 29 16-bit code units
+00088c:                                        |[00088c] A.instance_fields:()V
+00089c: 5210 0900                              |0000: iget v0, v1, LA;.sI:I // field@0009
+0008a0: 5310 0a00                              |0002: iget-wide v0, v1, LA;.sJ:J // field@000a
+0008a4: 5410 0b00                              |0004: iget-object v0, v1, LA;.sO:LA; // field@000b
+0008a8: 5510 0d00                              |0006: iget-boolean v0, v1, LA;.sZ:Z // field@000d
+0008ac: 5610 0700                              |0008: iget-byte v0, v1, LA;.sB:B // field@0007
+0008b0: 5710 0800                              |000a: iget-char v0, v1, LA;.sC:C // field@0008
+0008b4: 5810 0c00                              |000c: iget-short v0, v1, LA;.sS:S // field@000c
+0008b8: 5910 0900                              |000e: iput v0, v1, LA;.sI:I // field@0009
+0008bc: 5a10 0a00                              |0010: iput-wide v0, v1, LA;.sJ:J // field@000a
+0008c0: 5b10 0b00                              |0012: iput-object v0, v1, LA;.sO:LA; // field@000b
+0008c4: 5c10 0d00                              |0014: iput-boolean v0, v1, LA;.sZ:Z // field@000d
+0008c8: 5d10 0700                              |0016: iput-byte v0, v1, LA;.sB:B // field@0007
+0008cc: 5e10 0800                              |0018: iput-char v0, v1, LA;.sC:C // field@0008
+0008d0: 5f10 0c00                              |001a: iput-short v0, v1, LA;.sS:S // field@000c
+0008d4: 0e00                                   |001c: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #1              : (in LA;)
+      name          : 'invokes'
+      type          : '()V'
+      access        : 0x0001 (PUBLIC)
+      code          -
+      registers     : 5
+      ins           : 1
+      outs          : 1
+      insns size    : 15 16-bit code units
+0008d8:                                        |[0008d8] A.invokes:()V
+0008e8: 6e54 0a00 1032                         |0000: invoke-virtual {v0, v1, v2, v3, v4}, LA;.invokes:()V // method@000a
+0008ee: 6f54 0a00 1032                         |0003: invoke-super {v0, v1, v2, v3, v4}, LA;.invokes:()V // method@000a
+0008f4: 7054 0a00 1032                         |0006: invoke-direct {v0, v1, v2, v3, v4}, LA;.invokes:()V // method@000a
+0008fa: 7154 0a00 1032                         |0009: invoke-static {v0, v1, v2, v3, v4}, LA;.invokes:()V // method@000a
+000900: 7254 0a00 1032                         |000c: invoke-interface {v0, v1, v2, v3, v4}, LA;.invokes:()V // method@000a
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+  source_file_idx   : -1 (unknown)
+
diff --git a/test/dexdump/all.xml b/test/dexdump/all.xml
new file mode 100644
index 0000000..b623ecb
--- /dev/null
+++ b/test/dexdump/all.xml
@@ -0,0 +1,211 @@
+<api>
+<package name=""
+>
+<class name="A"
+ extends="java.lang.Object"
+ interface="false"
+ abstract="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+<constructor name="A"
+ type="A"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</constructor>
+<method name="arrays"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="binary_ops"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="binary_ops_2addr"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="binary_ops_lit16"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="binary_ops_lit8"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="compares"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="conditionals"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="constants"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="misc"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="moves"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="packed_switch"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="return32"
+ return="int"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="return64"
+ return="int"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="return_object"
+ return="java.lang.Object"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="sparse_switch"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="static_fields"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="unary_ops"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="instance_fields"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="invokes"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</method>
+</class>
+</package>
+</api>
diff --git a/test/dexdump/bytecodes.txt b/test/dexdump/bytecodes.txt
old mode 100755
new mode 100644
index d14c47c..e1a381e
--- a/test/dexdump/bytecodes.txt
+++ b/test/dexdump/bytecodes.txt
@@ -12,8 +12,8 @@
 string_ids_off      : 112 (0x000070)
 type_ids_size       : 42
 type_ids_off        : 724 (0x0002d4)
-proto_ids_size       : 12
-proto_ids_off        : 892 (0x00037c)
+proto_ids_size      : 12
+proto_ids_off       : 892 (0x00037c)
 field_ids_size      : 40
 field_ids_off       : 1036 (0x00040c)
 method_ids_size     : 28
@@ -36,6 +36,11 @@
 direct_methods_size : 0
 virtual_methods_size: 1
 
+Class #0 annotations:
+Annotations on class
+  VISIBILITY_RUNTIME Ljava/lang/annotation/Retention; value=CLASS
+  VISIBILITY_RUNTIME Ljava/lang/annotation/Target; value={ TYPE FIELD METHOD PARAMETER CONSTRUCTOR LOCAL_VARIABLE }
+
 Class #0            -
   Class descriptor  : 'Landroid/annotation/SuppressLint;'
   Access flags      : 0x2601 (PUBLIC INTERFACE ABSTRACT ANNOTATION)
@@ -67,6 +72,11 @@
 direct_methods_size : 0
 virtual_methods_size: 1
 
+Class #1 annotations:
+Annotations on class
+  VISIBILITY_RUNTIME Ljava/lang/annotation/Retention; value=CLASS
+  VISIBILITY_RUNTIME Ljava/lang/annotation/Target; value={ TYPE METHOD CONSTRUCTOR }
+
 Class #1            -
   Class descriptor  : 'Landroid/annotation/TargetApi;'
   Access flags      : 0x2601 (PUBLIC INTERFACE ABSTRACT ANNOTATION)
@@ -144,6 +154,11 @@
 direct_methods_size : 1
 virtual_methods_size: 0
 
+Class #3 annotations:
+Annotations on class
+  VISIBILITY_SYSTEM Ldalvik/annotation/EnclosingClass; value=Lcom/google/android/test/R;
+  VISIBILITY_SYSTEM Ldalvik/annotation/InnerClass; accessFlags=25 name="attr"
+
 Class #3            -
   Class descriptor  : 'Lcom/google/android/test/R$attr;'
   Access flags      : 0x0011 (PUBLIC FINAL)
@@ -186,6 +201,11 @@
 direct_methods_size : 1
 virtual_methods_size: 0
 
+Class #4 annotations:
+Annotations on class
+  VISIBILITY_SYSTEM Ldalvik/annotation/EnclosingClass; value=Lcom/google/android/test/R;
+  VISIBILITY_SYSTEM Ldalvik/annotation/InnerClass; accessFlags=25 name="drawable"
+
 Class #4            -
   Class descriptor  : 'Lcom/google/android/test/R$drawable;'
   Access flags      : 0x0011 (PUBLIC FINAL)
@@ -196,6 +216,7 @@
       name          : 'icon'
       type          : 'I'
       access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 2130837504
   Instance fields   -
   Direct methods    -
     #0              : (in Lcom/google/android/test/R$drawable;)
@@ -232,6 +253,10 @@
 direct_methods_size : 1
 virtual_methods_size: 0
 
+Class #5 annotations:
+Annotations on class
+  VISIBILITY_SYSTEM Ldalvik/annotation/MemberClasses; value={ Lcom/google/android/test/R$attr; Lcom/google/android/test/R$drawable; }
+
 Class #5            -
   Class descriptor  : 'Lcom/google/android/test/R;'
   Access flags      : 0x0011 (PUBLIC FINAL)
@@ -274,6 +299,10 @@
 direct_methods_size : 13
 virtual_methods_size: 2
 
+Class #6 annotations:
+Annotations on method #13 'doit'
+  VISIBILITY_SYSTEM Ldalvik/annotation/Throws; value={ Ljava/lang/Exception; }
+
 Class #6            -
   Class descriptor  : 'Lcom/google/android/test/Test;'
   Access flags      : 0x0001 (PUBLIC)
@@ -417,17 +446,17 @@
 000a02: 6a00 1800                              |0001: sput-boolean v0, Lcom/google/android/test/Test;.sBool:Z // field@0018
 000a06: 1300 1f00                              |0003: const/16 v0, #int 31 // #1f
 000a0a: 6b00 1700                              |0005: sput-byte v0, Lcom/google/android/test/Test;.sB:B // field@0017
-000a0e: 1400 ffff 0000                         |0007: const v0, #float 0.000000 // #0000ffff
+000a0e: 1400 ffff 0000                         |0007: const v0, #float 9.18341e-41 // #0000ffff
 000a14: 6c00 1900                              |000a: sput-char v0, Lcom/google/android/test/Test;.sC:C // field@0019
 000a18: 1300 3412                              |000c: const/16 v0, #int 4660 // #1234
 000a1c: 6d00 1f00                              |000e: sput-short v0, Lcom/google/android/test/Test;.sS:S // field@001f
-000a20: 1400 7856 3412                         |0010: const v0, #float 0.000000 // #12345678
+000a20: 1400 7856 3412                         |0010: const v0, #float 5.69046e-28 // #12345678
 000a26: 6700 1c00                              |0013: sput v0, Lcom/google/android/test/Test;.sI:I // field@001c
-000a2a: 1800 ffff cdab 7956 3412               |0015: const-wide v0, #double 0.000000 // #12345679abcdffff
+000a2a: 1800 ffff cdab 7956 3412               |0015: const-wide v0, #double 5.62635e-221 // #12345679abcdffff
 000a34: 6800 1d00                              |001a: sput-wide v0, Lcom/google/android/test/Test;.sL:J // field@001d
-000a38: 1400 00e4 4046                         |001c: const v0, #float 12345.000000 // #4640e400
+000a38: 1400 00e4 4046                         |001c: const v0, #float 12345 // #4640e400
 000a3e: 6700 1b00                              |001f: sput v0, Lcom/google/android/test/Test;.sF:F // field@001b
-000a42: 1800 0000 0000 801c c840               |0021: const-wide v0, #double 12345.000000 // #40c81c8000000000
+000a42: 1800 0000 0000 801c c840               |0021: const-wide v0, #double 12345 // #40c81c8000000000
 000a4c: 6800 1a00                              |0026: sput-wide v0, Lcom/google/android/test/Test;.sD:D // field@001a
 000a50: 1200                                   |0028: const/4 v0, #int 0 // #0
 000a52: 6900 1e00                              |0029: sput-object v0, Lcom/google/android/test/Test;.sO:Ljava/lang/Object; // field@001e
@@ -470,17 +499,17 @@
 000ab4: 5c81 0d00                              |0008: iput-boolean v1, v8, Lcom/google/android/test/Test;.mBool:Z // field@000d
 000ab8: 1301 1f00                              |000a: const/16 v1, #int 31 // #1f
 000abc: 5d81 0c00                              |000c: iput-byte v1, v8, Lcom/google/android/test/Test;.mB:B // field@000c
-000ac0: 1401 ffff 0000                         |000e: const v1, #float 0.000000 // #0000ffff
+000ac0: 1401 ffff 0000                         |000e: const v1, #float 9.18341e-41 // #0000ffff
 000ac6: 5e81 0e00                              |0011: iput-char v1, v8, Lcom/google/android/test/Test;.mC:C // field@000e
 000aca: 1301 3412                              |0013: const/16 v1, #int 4660 // #1234
 000ace: 5f81 1500                              |0015: iput-short v1, v8, Lcom/google/android/test/Test;.mS:S // field@0015
-000ad2: 1401 7856 3412                         |0017: const v1, #float 0.000000 // #12345678
+000ad2: 1401 7856 3412                         |0017: const v1, #float 5.69046e-28 // #12345678
 000ad8: 5981 1100                              |001a: iput v1, v8, Lcom/google/android/test/Test;.mI:I // field@0011
-000adc: 1802 ffff cdab 7956 3412               |001c: const-wide v2, #double 0.000000 // #12345679abcdffff
+000adc: 1802 ffff cdab 7956 3412               |001c: const-wide v2, #double 5.62635e-221 // #12345679abcdffff
 000ae6: 5a82 1200                              |0021: iput-wide v2, v8, Lcom/google/android/test/Test;.mL:J // field@0012
-000aea: 1401 00e4 4046                         |0023: const v1, #float 12345.000000 // #4640e400
+000aea: 1401 00e4 4046                         |0023: const v1, #float 12345 // #4640e400
 000af0: 5981 1000                              |0026: iput v1, v8, Lcom/google/android/test/Test;.mF:F // field@0010
-000af4: 1802 0000 0000 801c c840               |0028: const-wide v2, #double 12345.000000 // #40c81c8000000000
+000af4: 1802 0000 0000 801c c840               |0028: const-wide v2, #double 12345 // #40c81c8000000000
 000afe: 5a82 0f00                              |002d: iput-wide v2, v8, Lcom/google/android/test/Test;.mD:D // field@000f
 000b02: 1201                                   |002f: const/4 v1, #int 0 // #0
 000b04: 5b81 1300                              |0030: iput-object v1, v8, Lcom/google/android/test/Test;.mO:Ljava/lang/Object; // field@0013
@@ -625,7 +654,7 @@
 000cc6: 8d00                                   |0011: int-to-byte v0, v0
 000cc8: 5db0 0c00                              |0012: iput-byte v0, v11, Lcom/google/android/test/Test;.mB:B // field@000c
 000ccc: 57b0 0e00                              |0014: iget-char v0, v11, Lcom/google/android/test/Test;.mC:C // field@000e
-000cd0: 1401 ffff 0000                         |0016: const v1, #float 0.000000 // #0000ffff
+000cd0: 1401 ffff 0000                         |0016: const v1, #float 9.18341e-41 // #0000ffff
 000cd6: b010                                   |0019: add-int/2addr v0, v1
 000cd8: 8e00                                   |001a: int-to-char v0, v0
 000cda: 5eb0 0e00                              |001b: iput-char v0, v11, Lcom/google/android/test/Test;.mC:C // field@000e
@@ -634,7 +663,7 @@
 000ce6: 8f00                                   |0021: int-to-short v0, v0
 000ce8: 5fb0 1500                              |0022: iput-short v0, v11, Lcom/google/android/test/Test;.mS:S // field@0015
 000cec: 52b0 1100                              |0024: iget v0, v11, Lcom/google/android/test/Test;.mI:I // field@0011
-000cf0: 1401 7856 3412                         |0026: const v1, #float 0.000000 // #12345678
+000cf0: 1401 7856 3412                         |0026: const v1, #float 5.69046e-28 // #12345678
 000cf6: b010                                   |0029: add-int/2addr v0, v1
 000cf8: 59b0 1100                              |002a: iput v0, v11, Lcom/google/android/test/Test;.mI:I // field@0011
 000cfc: 52b0 1100                              |002c: iget v0, v11, Lcom/google/android/test/Test;.mI:I // field@0011
@@ -642,7 +671,7 @@
 000d04: b010                                   |0030: add-int/2addr v0, v1
 000d06: 59b0 1100                              |0031: iput v0, v11, Lcom/google/android/test/Test;.mI:I // field@0011
 000d0a: 53b0 1200                              |0033: iget-wide v0, v11, Lcom/google/android/test/Test;.mL:J // field@0012
-000d0e: 1802 ffff cdab 7956 3412               |0035: const-wide v2, #double 0.000000 // #12345679abcdffff
+000d0e: 1802 ffff cdab 7956 3412               |0035: const-wide v2, #double 5.62635e-221 // #12345679abcdffff
 000d18: bb20                                   |003a: add-long/2addr v0, v2
 000d1a: 5ab0 1200                              |003b: iput-wide v0, v11, Lcom/google/android/test/Test;.mL:J // field@0012
 000d1e: 53b0 1200                              |003d: iget-wide v0, v11, Lcom/google/android/test/Test;.mL:J // field@0012
@@ -650,7 +679,7 @@
 000d26: bb20                                   |0041: add-long/2addr v0, v2
 000d28: 5ab0 1200                              |0042: iput-wide v0, v11, Lcom/google/android/test/Test;.mL:J // field@0012
 000d2c: 52b0 1000                              |0044: iget v0, v11, Lcom/google/android/test/Test;.mF:F // field@0010
-000d30: 1401 00e4 4046                         |0046: const v1, #float 12345.000000 // #4640e400
+000d30: 1401 00e4 4046                         |0046: const v1, #float 12345 // #4640e400
 000d36: 52b2 1000                              |0049: iget v2, v11, Lcom/google/android/test/Test;.mF:F // field@0010
 000d3a: 1503 803f                              |004b: const/high16 v3, #int 1065353216 // #3f80
 000d3e: c732                                   |004d: sub-float/2addr v2, v3
@@ -663,7 +692,7 @@
 000d50: c610                                   |0056: add-float/2addr v0, v1
 000d52: 59b0 1000                              |0057: iput v0, v11, Lcom/google/android/test/Test;.mF:F // field@0010
 000d56: 53b0 0f00                              |0059: iget-wide v0, v11, Lcom/google/android/test/Test;.mD:D // field@000f
-000d5a: 1802 0000 0000 801c c840               |005b: const-wide v2, #double 12345.000000 // #40c81c8000000000
+000d5a: 1802 0000 0000 801c c840               |005b: const-wide v2, #double 12345 // #40c81c8000000000
 000d64: 53b4 0f00                              |0060: iget-wide v4, v11, Lcom/google/android/test/Test;.mD:D // field@000f
 000d68: 1906 f03f                              |0062: const-wide/high16 v6, #long 4607182418800017408 // #3ff0
 000d6c: cc64                                   |0064: sub-double/2addr v4, v6
@@ -680,7 +709,7 @@
 000d8a: 2d00 0001                              |0073: cmpl-float v0, v0, v1
 000d8e: 3800 2900                              |0075: if-eqz v0, 009e // +0029
 000d92: 52b0 1000                              |0077: iget v0, v11, Lcom/google/android/test/Test;.mF:F // field@0010
-000d96: 1401 9a99 993e                         |0079: const v1, #float 0.300000 // #3e99999a
+000d96: 1401 9a99 993e                         |0079: const v1, #float 0.3 // #3e99999a
 000d9c: 2d00 0001                              |007c: cmpl-float v0, v0, v1
 000da0: 3900 2000                              |007e: if-nez v0, 009e // +0020
 000da4: 52b0 1000                              |0080: iget v0, v11, Lcom/google/android/test/Test;.mF:F // field@0010
@@ -706,7 +735,7 @@
 000df2: 2f00 0002                              |00a7: cmpl-double v0, v0, v2
 000df6: 3800 2b00                              |00a9: if-eqz v0, 00d4 // +002b
 000dfa: 53b0 0f00                              |00ab: iget-wide v0, v11, Lcom/google/android/test/Test;.mD:D // field@000f
-000dfe: 1802 3333 3333 3333 d33f               |00ad: const-wide v2, #double 0.300000 // #3fd3333333333333
+000dfe: 1802 3333 3333 3333 d33f               |00ad: const-wide v2, #double 0.3 // #3fd3333333333333
 000e08: 2f00 0002                              |00b2: cmpl-double v0, v0, v2
 000e0c: 3900 2000                              |00b4: if-nez v0, 00d4 // +0020
 000e10: 53b0 0f00                              |00b6: iget-wide v0, v11, Lcom/google/android/test/Test;.mD:D // field@000f
@@ -789,7 +818,7 @@
 000eb8: 8d00                                   |000c: int-to-byte v0, v0
 000eba: 6b00 1700                              |000d: sput-byte v0, Lcom/google/android/test/Test;.sB:B // field@0017
 000ebe: 6500 1900                              |000f: sget-char v0, Lcom/google/android/test/Test;.sC:C // field@0019
-000ec2: 1401 ffff 0000                         |0011: const v1, #float 0.000000 // #0000ffff
+000ec2: 1401 ffff 0000                         |0011: const v1, #float 9.18341e-41 // #0000ffff
 000ec8: b010                                   |0014: add-int/2addr v0, v1
 000eca: 8e00                                   |0015: int-to-char v0, v0
 000ecc: 6c00 1900                              |0016: sput-char v0, Lcom/google/android/test/Test;.sC:C // field@0019
@@ -798,7 +827,7 @@
 000ed8: 8f00                                   |001c: int-to-short v0, v0
 000eda: 6d00 1f00                              |001d: sput-short v0, Lcom/google/android/test/Test;.sS:S // field@001f
 000ede: 6000 1c00                              |001f: sget v0, Lcom/google/android/test/Test;.sI:I // field@001c
-000ee2: 1401 7856 3412                         |0021: const v1, #float 0.000000 // #12345678
+000ee2: 1401 7856 3412                         |0021: const v1, #float 5.69046e-28 // #12345678
 000ee8: b010                                   |0024: add-int/2addr v0, v1
 000eea: 6700 1c00                              |0025: sput v0, Lcom/google/android/test/Test;.sI:I // field@001c
 000eee: 6000 1c00                              |0027: sget v0, Lcom/google/android/test/Test;.sI:I // field@001c
@@ -806,7 +835,7 @@
 000ef6: b010                                   |002b: add-int/2addr v0, v1
 000ef8: 6700 1c00                              |002c: sput v0, Lcom/google/android/test/Test;.sI:I // field@001c
 000efc: 6100 1d00                              |002e: sget-wide v0, Lcom/google/android/test/Test;.sL:J // field@001d
-000f00: 1802 ffff cdab 7956 3412               |0030: const-wide v2, #double 0.000000 // #12345679abcdffff
+000f00: 1802 ffff cdab 7956 3412               |0030: const-wide v2, #double 5.62635e-221 // #12345679abcdffff
 000f0a: bb20                                   |0035: add-long/2addr v0, v2
 000f0c: 6800 1d00                              |0036: sput-wide v0, Lcom/google/android/test/Test;.sL:J // field@001d
 000f10: 6100 1d00                              |0038: sget-wide v0, Lcom/google/android/test/Test;.sL:J // field@001d
@@ -814,7 +843,7 @@
 000f18: bb20                                   |003c: add-long/2addr v0, v2
 000f1a: 6800 1d00                              |003d: sput-wide v0, Lcom/google/android/test/Test;.sL:J // field@001d
 000f1e: 6000 1b00                              |003f: sget v0, Lcom/google/android/test/Test;.sF:F // field@001b
-000f22: 1401 00e4 4046                         |0041: const v1, #float 12345.000000 // #4640e400
+000f22: 1401 00e4 4046                         |0041: const v1, #float 12345 // #4640e400
 000f28: 6002 1b00                              |0044: sget v2, Lcom/google/android/test/Test;.sF:F // field@001b
 000f2c: 7f22                                   |0046: neg-float v2, v2
 000f2e: 1503 803f                              |0047: const/high16 v3, #int 1065353216 // #3f80
@@ -829,7 +858,7 @@
 000f48: c610                                   |0054: add-float/2addr v0, v1
 000f4a: 6700 1b00                              |0055: sput v0, Lcom/google/android/test/Test;.sF:F // field@001b
 000f4e: 6100 1a00                              |0057: sget-wide v0, Lcom/google/android/test/Test;.sD:D // field@001a
-000f52: 1802 0000 0000 801c c840               |0059: const-wide v2, #double 12345.000000 // #40c81c8000000000
+000f52: 1802 0000 0000 801c c840               |0059: const-wide v2, #double 12345 // #40c81c8000000000
 000f5c: 6104 1a00                              |005e: sget-wide v4, Lcom/google/android/test/Test;.sD:D // field@001a
 000f60: 8044                                   |0060: neg-double v4, v4
 000f62: 1906 f03f                              |0061: const-wide/high16 v6, #long 4607182418800017408 // #3ff0
diff --git a/test/dexdump/bytecodes.xml b/test/dexdump/bytecodes.xml
index 0581677..d08c2e9 100755
--- a/test/dexdump/bytecodes.xml
+++ b/test/dexdump/bytecodes.xml
@@ -3,6 +3,7 @@
 >
 <class name="SuppressLint"
  extends="java.lang.Object"
+ interface="true"
  abstract="true"
  static="false"
  final="false"
@@ -23,6 +24,7 @@
 </class>
 <class name="TargetApi"
  extends="java.lang.Object"
+ interface="true"
  abstract="true"
  static="false"
  final="false"
@@ -46,6 +48,7 @@
 >
 <class name="BuildConfig"
  extends="java.lang.Object"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
@@ -70,6 +73,7 @@
 </class>
 <class name="R.attr"
  extends="java.lang.Object"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
@@ -85,6 +89,7 @@
 </class>
 <class name="R.drawable"
  extends="java.lang.Object"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
@@ -97,6 +102,7 @@
  static="true"
  final="true"
  visibility="public"
+ value="2130837504"
 >
 </field>
 <constructor name="R.drawable"
@@ -109,6 +115,7 @@
 </class>
 <class name="R"
  extends="java.lang.Object"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
@@ -124,6 +131,7 @@
 </class>
 <class name="Test"
  extends="android.app.Activity"
+ interface="false"
  abstract="false"
  static="false"
  final="false"
diff --git a/test/dexdump/checkers.txt b/test/dexdump/checkers.txt
old mode 100755
new mode 100644
index 5c8336f..aee6e64
--- a/test/dexdump/checkers.txt
+++ b/test/dexdump/checkers.txt
@@ -12,8 +12,8 @@
 string_ids_off      : 112 (0x000070)
 type_ids_size       : 58
 type_ids_off        : 1404 (0x00057c)
-proto_ids_size       : 88
-proto_ids_off        : 1636 (0x000664)
+proto_ids_size      : 88
+proto_ids_off       : 1636 (0x000664)
 field_ids_size      : 108
 field_ids_off       : 2692 (0x000a84)
 method_ids_size     : 177
@@ -836,7 +836,7 @@
 001c8a: 1300 5829                              |0147: const/16 v0, #int 10584 // #2958
 001c8e: 2300 3600                              |0149: new-array v0, v0, [B // type@0036
 001c92: 6900 6800                              |014b: sput-object v0, Lcom/google/android/checkers/g;.p:[B // field@0068
-001c96: 1400 00c1 0300                         |014d: const v0, #float 0.000000 // #0003c100
+001c96: 1400 00c1 0300                         |014d: const v0, #float 3.44742e-40 // #0003c100
 001c9c: 2300 3600                              |0150: new-array v0, v0, [B // type@0036
 001ca0: 6900 6900                              |0152: sput-object v0, Lcom/google/android/checkers/g;.q:[B // field@0069
 001ca4: 6e10 1100 0a00                         |0154: invoke-virtual {v10}, Landroid/content/Context;.getResources:()Landroid/content/res/Resources; // method@0011
@@ -2044,7 +2044,7 @@
 002bd0: 5433 3b00                              |004c: iget-object v3, v3, Lcom/google/android/checkers/a;.b:[I // field@003b
 002bd4: 4403 0309                              |004e: aget v3, v3, v9
 002bd8: 5983 2a00                              |0050: iput v3, v8, Lcom/google/android/checkers/CheckersView;.x:I // field@002a
-002bdc: 1403 6666 663f                         |0052: const v3, #float 0.900000 // #3f666666
+002bdc: 1403 6666 663f                         |0052: const v3, #float 0.9 // #3f666666
 002be2: 5983 1e00                              |0055: iput v3, v8, Lcom/google/android/checkers/CheckersView;.l:F // field@001e
 002be6: 3800 4500                              |0057: if-eqz v0, 009c // +0045
 002bea: 5483 2200                              |0059: iget-object v3, v8, Lcom/google/android/checkers/CheckersView;.p:Lcom/google/android/checkers/a; // field@0022
@@ -2943,7 +2943,7 @@
 0036f2: 0800 1c00                              |0197: move-object/from16 v0, v28
 0036f6: 5202 1e00                              |0199: iget v2, v0, Lcom/google/android/checkers/CheckersView;.l:F // field@001e
 0036fa: 8922                                   |019b: float-to-double v2, v2
-0036fc: 1804 9a99 9999 9999 a93f               |019c: const-wide v4, #double 0.050000 // #3fa999999999999a
+0036fc: 1804 9a99 9999 9999 a93f               |019c: const-wide v4, #double 0.05 // #3fa999999999999a
 003706: cc42                                   |01a1: sub-double/2addr v2, v4
 003708: 8c22                                   |01a2: double-to-float v2, v2
 00370a: 0800 1c00                              |01a3: move-object/from16 v0, v28
@@ -3568,7 +3568,7 @@
 003f38: 28e9                                   |001e: goto 0007 // -0017
 003f3a: 1300 3075                              |001f: const/16 v0, #int 30000 // #7530
 003f3e: 28e6                                   |0021: goto 0007 // -001a
-003f40: 1400 60ea 0000                         |0022: const v0, #float 0.000000 // #0000ea60
+003f40: 1400 60ea 0000                         |0022: const v0, #float 8.40779e-41 // #0000ea60
 003f46: 28e2                                   |0025: goto 0007 // -001e
 003f48: 0d00                                   |0026: move-exception v0
 003f4a: 1e02                                   |0027: monitor-exit v2
@@ -3811,7 +3811,7 @@
 004024: 1302 0040                              |0046: const/16 v2, #int 16384 // #4000
 004028: 4b02 0001                              |0048: aput v2, v0, v1
 00402c: 1301 1300                              |004a: const/16 v1, #int 19 // #13
-004030: 1402 0080 0000                         |004c: const v2, #float 0.000000 // #00008000
+004030: 1402 0080 0000                         |004c: const v2, #float 4.59177e-41 // #00008000
 004036: 4b02 0001                              |004f: aput v2, v0, v1
 00403a: 1501 0100                              |0051: const/high16 v1, #int 65536 // #1
 00403e: 4b01 0006                              |0053: aput v1, v0, v6
@@ -3931,7 +3931,7 @@
 0041f6: 1302 0040                              |012f: const/16 v2, #int 16384 // #4000
 0041fa: 4b02 0001                              |0131: aput v2, v0, v1
 0041fe: 1301 1200                              |0133: const/16 v1, #int 18 // #12
-004202: 1402 0080 0000                         |0135: const v2, #float 0.000000 // #00008000
+004202: 1402 0080 0000                         |0135: const v2, #float 4.59177e-41 // #00008000
 004208: 4b02 0001                              |0138: aput v2, v0, v1
 00420c: 1301 1400                              |013a: const/16 v1, #int 20 // #14
 004210: 1502 0100                              |013c: const/high16 v2, #int 65536 // #1
@@ -3996,7 +3996,7 @@
 0042fa: 1301 0040                              |01b1: const/16 v1, #int 16384 // #4000
 0042fe: 4b01 0006                              |01b3: aput v1, v0, v6
 004302: 1301 1600                              |01b5: const/16 v1, #int 22 // #16
-004306: 1402 0080 0000                         |01b7: const v2, #float 0.000000 // #00008000
+004306: 1402 0080 0000                         |01b7: const v2, #float 4.59177e-41 // #00008000
 00430c: 4b02 0001                              |01ba: aput v2, v0, v1
 004310: 1301 1800                              |01bc: const/16 v1, #int 24 // #18
 004314: 1502 0200                              |01be: const/high16 v2, #int 131072 // #2
@@ -4045,7 +4045,7 @@
 0043b4: 1301 0040                              |020e: const/16 v1, #int 16384 // #4000
 0043b8: 4b01 0004                              |0210: aput v1, v0, v4
 0043bc: 1301 0b00                              |0212: const/16 v1, #int 11 // #b
-0043c0: 1402 0080 0000                         |0214: const v2, #float 0.000000 // #00008000
+0043c0: 1402 0080 0000                         |0214: const v2, #float 4.59177e-41 // #00008000
 0043c6: 4b02 0001                              |0217: aput v2, v0, v1
 0043ca: 1301 0d00                              |0219: const/16 v1, #int 13 // #d
 0043ce: 1502 0100                              |021b: const/high16 v2, #int 65536 // #1
@@ -4167,7 +4167,7 @@
 004588: 1301 0900                              |02f8: const/16 v1, #int 9 // #9
 00458c: 1302 0040                              |02fa: const/16 v2, #int 16384 // #4000
 004590: 4b02 0001                              |02fc: aput v2, v0, v1
-004594: 1401 0080 0000                         |02fe: const v1, #float 0.000000 // #00008000
+004594: 1401 0080 0000                         |02fe: const v1, #float 4.59177e-41 // #00008000
 00459a: 4b01 0004                              |0301: aput v1, v0, v4
 00459e: 1301 0c00                              |0303: const/16 v1, #int 12 // #c
 0045a2: 1502 0100                              |0305: const/high16 v2, #int 65536 // #1
@@ -4226,7 +4226,7 @@
 00466e: 1302 0040                              |036b: const/16 v2, #int 16384 // #4000
 004672: 4b02 0001                              |036d: aput v2, v0, v1
 004676: 1261                                   |036f: const/4 v1, #int 6 // #6
-004678: 1402 0080 0000                         |0370: const v2, #float 0.000000 // #00008000
+004678: 1402 0080 0000                         |0370: const v2, #float 4.59177e-41 // #00008000
 00467e: 4b02 0001                              |0373: aput v2, v0, v1
 004682: 1301 0800                              |0375: const/16 v1, #int 8 // #8
 004686: 1502 0200                              |0377: const/high16 v2, #int 131072 // #2
@@ -4496,7 +4496,7 @@
 004c16: 3803 3400                              |0047: if-eqz v3, 007b // +0034
 004c1a: 0800 1800                              |0049: move-object/from16 v0, v24
 004c1e: 5203 5100                              |004b: iget v3, v0, Lcom/google/android/checkers/a;.x:I // field@0051
-004c22: 1404 ffff 0f00                         |004d: const v4, #float 0.000000 // #000fffff
+004c22: 1404 ffff 0f00                         |004d: const v4, #float 1.46937e-39 // #000fffff
 004c28: b534                                   |0050: and-int/2addr v4, v3
 004c2a: 0800 1800                              |0051: move-object/from16 v0, v24
 004c2e: 5405 5200                              |0053: iget-object v5, v0, Lcom/google/android/checkers/a;.y:[I // field@0052
@@ -4516,7 +4516,7 @@
 004c66: 5405 5300                              |006f: iget-object v5, v0, Lcom/google/android/checkers/a;.z:[S // field@0053
 004c6a: 4a04 0504                              |0071: aget-short v4, v5, v4
 004c6e: 2c03 8104 0000                         |0073: sparse-switch v3, 000004f4 // +00000481
-004c74: 1403 3f42 0f00                         |0076: const v3, #float 0.000000 // #000f423f
+004c74: 1403 3f42 0f00                         |0076: const v3, #float 1.4013e-39 // #000f423f
 004c7a: 3334 a1ff                              |0079: if-ne v4, v3, 001a // -005f
 004c7e: 0800 1800                              |007b: move-object/from16 v0, v24
 004c82: 0201 1b00                              |007d: move/from16 v1, v27
@@ -4897,7 +4897,7 @@
 0051da: 28c4                                   |0329: goto 02ed // -003c
 0051dc: 0200 1900                              |032a: move/from16 v0, v25
 0051e0: 3704 4afd                              |032c: if-le v4, v0, 0076 // -02b6
-0051e4: 1404 3f42 0f00                         |032e: const v4, #float 0.000000 // #000f423f
+0051e4: 1404 3f42 0f00                         |032e: const v4, #float 1.4013e-39 // #000f423f
 0051ea: 2900 45fd                              |0331: goto/16 0076 // -02bb
 0051ee: 0200 1a00                              |0333: move/from16 v0, v26
 0051f2: 3404 f9ff                              |0335: if-lt v4, v0, 032e // -0007
@@ -5020,7 +5020,7 @@
 0053a2: 3545 bd00                              |040d: if-ge v5, v4, 04ca // +00bd
 0053a6: 0800 1800                              |040f: move-object/from16 v0, v24
 0053aa: 5204 3e00                              |0411: iget v4, v0, Lcom/google/android/checkers/a;.e:I // field@003e
-0053ae: 1405 1100 0088                         |0413: const v5, #float -0.000000 // #88000011
+0053ae: 1405 1100 0088                         |0413: const v5, #float -3.85187e-34 // #88000011
 0053b4: b554                                   |0416: and-int/2addr v4, v5
 0053b6: 3804 0900                              |0417: if-eqz v4, 0420 // +0009
 0053ba: 7110 9e00 0400                         |0419: invoke-static {v4}, Ljava/lang/Integer;.bitCount:(I)I // method@009e
@@ -5052,11 +5052,11 @@
 005418: 1507 00a0                              |0448: const/high16 v7, #int -1610612736 // #a000
 00541c: 3376 0400                              |044a: if-ne v6, v7, 044e // +0004
 005420: d803 03f4                              |044c: add-int/lit8 v3, v3, #int -12 // #f4
-005424: 1406 0066 6600                         |044e: const v6, #float 0.000000 // #00666600
+005424: 1406 0066 6600                         |044e: const v6, #float 9.40381e-39 // #00666600
 00542a: b564                                   |0451: and-int/2addr v4, v6
 00542c: 7110 9e00 0400                         |0452: invoke-static {v4}, Ljava/lang/Integer;.bitCount:(I)I // method@009e
 005432: 0a04                                   |0455: move-result v4
-005434: 1406 0066 6600                         |0456: const v6, #float 0.000000 // #00666600
+005434: 1406 0066 6600                         |0456: const v6, #float 9.40381e-39 // #00666600
 00543a: b565                                   |0459: and-int/2addr v5, v6
 00543c: 7110 9e00 0500                         |045a: invoke-static {v5}, Ljava/lang/Integer;.bitCount:(I)I // method@009e
 005442: 0a05                                   |045d: move-result v5
@@ -5064,13 +5064,13 @@
 005446: b043                                   |045f: add-int/2addr v3, v4
 005448: 0800 1800                              |0460: move-object/from16 v0, v24
 00544c: 5204 3d00                              |0462: iget v4, v0, Lcom/google/android/checkers/a;.d:I // field@003d
-005450: 1405 1818 1818                         |0464: const v5, #float 0.000000 // #18181818
+005450: 1405 1818 1818                         |0464: const v5, #float 1.96577e-24 // #18181818
 005456: b554                                   |0467: and-int/2addr v4, v5
 005458: 7110 9e00 0400                         |0468: invoke-static {v4}, Ljava/lang/Integer;.bitCount:(I)I // method@009e
 00545e: 0a04                                   |046b: move-result v4
 005460: 0800 1800                              |046c: move-object/from16 v0, v24
 005464: 5205 3f00                              |046e: iget v5, v0, Lcom/google/android/checkers/a;.f:I // field@003f
-005468: 1406 1818 1818                         |0470: const v6, #float 0.000000 // #18181818
+005468: 1406 1818 1818                         |0470: const v6, #float 1.96577e-24 // #18181818
 00546e: b565                                   |0473: and-int/2addr v5, v6
 005470: 7110 9e00 0500                         |0474: invoke-static {v5}, Ljava/lang/Integer;.bitCount:(I)I // method@009e
 005476: 0a05                                   |0477: move-result v5
@@ -5078,7 +5078,7 @@
 00547a: b143                                   |0479: sub-int/2addr v3, v4
 00547c: 0800 1800                              |047a: move-object/from16 v0, v24
 005480: 5204 3e00                              |047c: iget v4, v0, Lcom/google/android/checkers/a;.e:I // field@003e
-005484: 1405 0800 0010                         |047e: const v5, #float 0.000000 // #10000008
+005484: 1405 0800 0010                         |047e: const v5, #float 2.52436e-29 // #10000008
 00548a: b554                                   |0481: and-int/2addr v4, v5
 00548c: 3804 0900                              |0482: if-eqz v4, 048b // +0009
 005490: 7110 9e00 0400                         |0484: invoke-static {v4}, Ljava/lang/Integer;.bitCount:(I)I // method@009e
@@ -5087,7 +5087,7 @@
 00549c: b143                                   |048a: sub-int/2addr v3, v4
 00549e: 0800 1800                              |048b: move-object/from16 v0, v24
 0054a2: 5204 4000                              |048d: iget v4, v0, Lcom/google/android/checkers/a;.g:I // field@0040
-0054a6: 1405 0800 0010                         |048f: const v5, #float 0.000000 // #10000008
+0054a6: 1405 0800 0010                         |048f: const v5, #float 2.52436e-29 // #10000008
 0054ac: b554                                   |0492: and-int/2addr v4, v5
 0054ae: 3804 4c00                              |0493: if-eqz v4, 04df // +004c
 0054b2: 7110 9e00 0400                         |0495: invoke-static {v4}, Ljava/lang/Integer;.bitCount:(I)I // method@009e
@@ -5124,7 +5124,7 @@
 00551c: 3745 56ff                              |04ca: if-le v5, v4, 0420 // -00aa
 005520: 0800 1800                              |04cc: move-object/from16 v0, v24
 005524: 5204 4000                              |04ce: iget v4, v0, Lcom/google/android/checkers/a;.g:I // field@0040
-005528: 1405 1100 0088                         |04d0: const v5, #float -0.000000 // #88000011
+005528: 1405 1100 0088                         |04d0: const v5, #float -3.85187e-34 // #88000011
 00552e: b554                                   |04d3: and-int/2addr v4, v5
 005530: 3804 4cff                              |04d4: if-eqz v4, 0420 // -00b4
 005534: 7110 9e00 0400                         |04d6: invoke-static {v4}, Ljava/lang/Integer;.bitCount:(I)I // method@009e
@@ -5407,7 +5407,7 @@
 005868: 0126                                   |0010: move v6, v2
 00586a: 0135                                   |0011: move v5, v3
 00586c: 5240 5100                              |0012: iget v0, v4, Lcom/google/android/checkers/a;.x:I // field@0051
-005870: 1401 ffff 0f00                         |0014: const v1, #float 0.000000 // #000fffff
+005870: 1401 ffff 0f00                         |0014: const v1, #float 1.46937e-39 // #000fffff
 005876: b501                                   |0017: and-int/2addr v1, v0
 005878: 5442 5200                              |0018: iget-object v2, v4, Lcom/google/android/checkers/a;.y:[I // field@0052
 00587c: 4b00 0201                              |001a: aput v0, v2, v1
@@ -5568,19 +5568,19 @@
 005a54: e203 1404                              |0016: ushr-int/lit8 v3, v20, #int 4 // #04
 005a58: b543                                   |0018: and-int/2addr v3, v4
 005a5a: 3803 1200                              |0019: if-eqz v3, 002b // +0012
-005a5e: 1401 e0e0 e0e0                         |001b: const v1, #float -129633581999069331456.000000 // #e0e0e0e0
+005a5e: 1401 e0e0 e0e0                         |001b: const v1, #float -1.29634e+20 // #e0e0e0e0
 005a64: b531                                   |001e: and-int/2addr v1, v3
 005a66: e201 0105                              |001f: ushr-int/lit8 v1, v1, #int 5 // #05
-005a6a: 1405 0007 0707                         |0021: const v5, #float 0.000000 // #07070700
+005a6a: 1405 0007 0707                         |0021: const v5, #float 1.01583e-34 // #07070700
 005a70: b553                                   |0024: and-int/2addr v3, v5
 005a72: e203 0303                              |0025: ushr-int/lit8 v3, v3, #int 3 // #03
 005a76: b631                                   |0027: or-int/2addr v1, v3
 005a78: b521                                   |0028: and-int/2addr v1, v2
 005a7a: de01 0100                              |0029: or-int/lit8 v1, v1, #int 0 // #00
-005a7e: 1403 e0e0 e0e0                         |002b: const v3, #float -129633581999069331456.000000 // #e0e0e0e0
+005a7e: 1403 e0e0 e0e0                         |002b: const v3, #float -1.29634e+20 // #e0e0e0e0
 005a84: 9503 0314                              |002e: and-int v3, v3, v20
 005a88: e203 0305                              |0030: ushr-int/lit8 v3, v3, #int 5 // #05
-005a8c: 1405 0007 0707                         |0032: const v5, #float 0.000000 // #07070700
+005a8c: 1405 0007 0707                         |0032: const v5, #float 1.01583e-34 // #07070700
 005a92: 9505 0514                              |0035: and-int v5, v5, v20
 005a96: e205 0503                              |0037: ushr-int/lit8 v5, v5, #int 3 // #03
 005a9a: b653                                   |0039: or-int/2addr v3, v5
@@ -5597,19 +5597,19 @@
 005abc: 3802 1500                              |004a: if-eqz v2, 005f // +0015
 005ac0: 0800 1100                              |004c: move-object/from16 v0, v17
 005ac4: 5203 3e00                              |004e: iget v3, v0, Lcom/google/android/checkers/a;.e:I // field@003e
-005ac8: 1405 0707 0707                         |0050: const v5, #float 0.000000 // #07070707
+005ac8: 1405 0707 0707                         |0050: const v5, #float 1.01583e-34 // #07070707
 005ace: b525                                   |0053: and-int/2addr v5, v2
 005ad0: e005 0505                              |0054: shl-int/lit8 v5, v5, #int 5 // #05
-005ad4: 1406 e0e0 e000                         |0056: const v6, #float 0.000000 // #00e0e0e0
+005ad4: 1406 e0e0 e000                         |0056: const v6, #float 2.06518e-38 // #00e0e0e0
 005ada: b562                                   |0059: and-int/2addr v2, v6
 005adc: e002 0203                              |005a: shl-int/lit8 v2, v2, #int 3 // #03
 005ae0: b652                                   |005c: or-int/2addr v2, v5
 005ae2: b532                                   |005d: and-int/2addr v2, v3
 005ae4: b621                                   |005e: or-int/2addr v1, v2
-005ae6: 1402 0707 0707                         |005f: const v2, #float 0.000000 // #07070707
+005ae6: 1402 0707 0707                         |005f: const v2, #float 1.01583e-34 // #07070707
 005aec: 9502 0214                              |0062: and-int v2, v2, v20
 005af0: e002 0205                              |0064: shl-int/lit8 v2, v2, #int 5 // #05
-005af4: 1403 e0e0 e000                         |0066: const v3, #float 0.000000 // #00e0e0e0
+005af4: 1403 e0e0 e000                         |0066: const v3, #float 2.06518e-38 // #00e0e0e0
 005afa: 9503 0314                              |0069: and-int v3, v3, v20
 005afe: e003 0303                              |006b: shl-int/lit8 v3, v3, #int 3 // #03
 005b02: b632                                   |006d: or-int/2addr v2, v3
@@ -5802,19 +5802,19 @@
 005dd4: 3803 1600                              |01d6: if-eqz v3, 01ec // +0016
 005dd8: 0800 1100                              |01d8: move-object/from16 v0, v17
 005ddc: 5201 4000                              |01da: iget v1, v0, Lcom/google/android/checkers/a;.g:I // field@0040
-005de0: 1405 e0e0 e0e0                         |01dc: const v5, #float -129633581999069331456.000000 // #e0e0e0e0
+005de0: 1405 e0e0 e0e0                         |01dc: const v5, #float -1.29634e+20 // #e0e0e0e0
 005de6: b535                                   |01df: and-int/2addr v5, v3
 005de8: e205 0505                              |01e0: ushr-int/lit8 v5, v5, #int 5 // #05
-005dec: 1406 0007 0707                         |01e2: const v6, #float 0.000000 // #07070700
+005dec: 1406 0007 0707                         |01e2: const v6, #float 1.01583e-34 // #07070700
 005df2: b563                                   |01e5: and-int/2addr v3, v6
 005df4: e203 0303                              |01e6: ushr-int/lit8 v3, v3, #int 3 // #03
 005df8: b653                                   |01e8: or-int/2addr v3, v5
 005dfa: b531                                   |01e9: and-int/2addr v1, v3
 005dfc: de01 0100                              |01ea: or-int/lit8 v1, v1, #int 0 // #00
-005e00: 1403 e0e0 e0e0                         |01ec: const v3, #float -129633581999069331456.000000 // #e0e0e0e0
+005e00: 1403 e0e0 e0e0                         |01ec: const v3, #float -1.29634e+20 // #e0e0e0e0
 005e06: 9503 0314                              |01ef: and-int v3, v3, v20
 005e0a: e203 0305                              |01f1: ushr-int/lit8 v3, v3, #int 5 // #05
-005e0e: 1405 0007 0707                         |01f3: const v5, #float 0.000000 // #07070700
+005e0e: 1405 0007 0707                         |01f3: const v5, #float 1.01583e-34 // #07070700
 005e14: 9505 0514                              |01f6: and-int v5, v5, v20
 005e18: e205 0503                              |01f8: ushr-int/lit8 v5, v5, #int 3 // #03
 005e1c: b653                                   |01fa: or-int/2addr v3, v5
@@ -5828,19 +5828,19 @@
 005e34: e003 1404                              |0206: shl-int/lit8 v3, v20, #int 4 // #04
 005e38: b543                                   |0208: and-int/2addr v3, v4
 005e3a: 3803 1100                              |0209: if-eqz v3, 021a // +0011
-005e3e: 1405 0707 0707                         |020b: const v5, #float 0.000000 // #07070707
+005e3e: 1405 0707 0707                         |020b: const v5, #float 1.01583e-34 // #07070707
 005e44: b535                                   |020e: and-int/2addr v5, v3
 005e46: e005 0505                              |020f: shl-int/lit8 v5, v5, #int 5 // #05
-005e4a: 1406 e0e0 e000                         |0211: const v6, #float 0.000000 // #00e0e0e0
+005e4a: 1406 e0e0 e000                         |0211: const v6, #float 2.06518e-38 // #00e0e0e0
 005e50: b563                                   |0214: and-int/2addr v3, v6
 005e52: e003 0303                              |0215: shl-int/lit8 v3, v3, #int 3 // #03
 005e56: b653                                   |0217: or-int/2addr v3, v5
 005e58: b523                                   |0218: and-int/2addr v3, v2
 005e5a: b631                                   |0219: or-int/2addr v1, v3
-005e5c: 1403 0707 0707                         |021a: const v3, #float 0.000000 // #07070707
+005e5c: 1403 0707 0707                         |021a: const v3, #float 1.01583e-34 // #07070707
 005e62: 9503 0314                              |021d: and-int v3, v3, v20
 005e66: e003 0305                              |021f: shl-int/lit8 v3, v3, #int 5 // #05
-005e6a: 1405 e0e0 e000                         |0221: const v5, #float 0.000000 // #00e0e0e0
+005e6a: 1405 e0e0 e000                         |0221: const v5, #float 2.06518e-38 // #00e0e0e0
 005e70: 9505 0514                              |0224: and-int v5, v5, v20
 005e74: e005 0503                              |0226: shl-int/lit8 v5, v5, #int 3 // #03
 005e78: b653                                   |0228: or-int/2addr v3, v5
@@ -6423,9 +6423,9 @@
       outs          : 6
       insns size    : 461 16-bit code units
 006604:                                        |[006604] com.google.android.checkers.a.b:(IZI)Z
-006614: 1404 e0e0 e000                         |0000: const v4, #float 0.000000 // #00e0e0e0
+006614: 1404 e0e0 e000                         |0000: const v4, #float 2.06518e-38 // #00e0e0e0
 00661a: 1216                                   |0003: const/4 v6, #int 1 // #1
-00661c: 1403 e0e0 e0e0                         |0004: const v3, #float -129633581999069331456.000000 // #e0e0e0e0
+00661c: 1403 e0e0 e0e0                         |0004: const v3, #float -1.29634e+20 // #e0e0e0e0
 006622: 130a 0008                              |0007: const/16 v10, #int 2048 // #800
 006626: 1309 0002                              |0009: const/16 v9, #int 512 // #200
 00662a: 380d e400                              |000b: if-eqz v13, 00ef // +00e4
@@ -6436,7 +6436,7 @@
 00663e: 9502 0e03                              |0015: and-int v2, v14, v3
 006642: e202 0205                              |0017: ushr-int/lit8 v2, v2, #int 5 // #05
 006646: b621                                   |0019: or-int/2addr v1, v2
-006648: 1402 0007 0707                         |001a: const v2, #float 0.000000 // #07070700
+006648: 1402 0007 0707                         |001a: const v2, #float 1.01583e-34 // #07070700
 00664e: b5e2                                   |001d: and-int/2addr v2, v14
 006650: e202 0203                              |001e: ushr-int/lit8 v2, v2, #int 3 // #03
 006654: b621                                   |0020: or-int/2addr v1, v2
@@ -6453,14 +6453,14 @@
 006676: 9502 0e03                              |0031: and-int v2, v14, v3
 00667a: e202 0205                              |0033: ushr-int/lit8 v2, v2, #int 5 // #05
 00667e: b621                                   |0035: or-int/2addr v1, v2
-006680: 1402 0007 0707                         |0036: const v2, #float 0.000000 // #07070700
+006680: 1402 0007 0707                         |0036: const v2, #float 1.01583e-34 // #07070700
 006686: b5e2                                   |0039: and-int/2addr v2, v14
 006688: e202 0203                              |003a: ushr-int/lit8 v2, v2, #int 3 // #03
 00668c: b621                                   |003c: or-int/2addr v1, v2
 00668e: b510                                   |003d: and-int/2addr v0, v1
 006690: 52b1 3e00                              |003e: iget v1, v11, Lcom/google/android/checkers/a;.e:I // field@003e
 006694: e002 0e04                              |0040: shl-int/lit8 v2, v14, #int 4 // #04
-006698: 1403 0707 0707                         |0042: const v3, #float 0.000000 // #07070707
+006698: 1403 0707 0707                         |0042: const v3, #float 1.01583e-34 // #07070707
 00669e: b5e3                                   |0045: and-int/2addr v3, v14
 0066a0: e003 0305                              |0046: shl-int/lit8 v3, v3, #int 5 // #05
 0066a4: b632                                   |0048: or-int/2addr v2, v3
@@ -6563,7 +6563,7 @@
 0067f6: 3900 5400                              |00f1: if-nez v0, 0145 // +0054
 0067fa: 52b0 3f00                              |00f3: iget v0, v11, Lcom/google/android/checkers/a;.f:I // field@003f
 0067fe: e001 0e04                              |00f5: shl-int/lit8 v1, v14, #int 4 // #04
-006802: 1402 0707 0707                         |00f7: const v2, #float 0.000000 // #07070707
+006802: 1402 0707 0707                         |00f7: const v2, #float 1.01583e-34 // #07070707
 006808: b5e2                                   |00fa: and-int/2addr v2, v14
 00680a: e002 0205                              |00fb: shl-int/lit8 v2, v2, #int 5 // #05
 00680e: b621                                   |00fd: or-int/2addr v1, v2
@@ -6611,7 +6611,7 @@
 0068a2: 52b1 3f00                              |0147: iget v1, v11, Lcom/google/android/checkers/a;.f:I // field@003f
 0068a6: b610                                   |0149: or-int/2addr v0, v1
 0068a8: e001 0e04                              |014a: shl-int/lit8 v1, v14, #int 4 // #04
-0068ac: 1402 0707 0707                         |014c: const v2, #float 0.000000 // #07070707
+0068ac: 1402 0707 0707                         |014c: const v2, #float 1.01583e-34 // #07070707
 0068b2: b5e2                                   |014f: and-int/2addr v2, v14
 0068b4: e002 0205                              |0150: shl-int/lit8 v2, v2, #int 5 // #05
 0068b8: b621                                   |0152: or-int/2addr v1, v2
@@ -6624,7 +6624,7 @@
 0068ce: b5e3                                   |015d: and-int/2addr v3, v14
 0068d0: e203 0305                              |015e: ushr-int/lit8 v3, v3, #int 5 // #05
 0068d4: b632                                   |0160: or-int/2addr v2, v3
-0068d6: 1403 0007 0707                         |0161: const v3, #float 0.000000 // #07070700
+0068d6: 1403 0007 0707                         |0161: const v3, #float 1.01583e-34 // #07070700
 0068dc: b5e3                                   |0164: and-int/2addr v3, v14
 0068de: e203 0303                              |0165: ushr-int/lit8 v3, v3, #int 3 // #03
 0068e2: b632                                   |0167: or-int/2addr v2, v3
diff --git a/test/dexdump/checkers.xml b/test/dexdump/checkers.xml
index 232254f..4e56ea2 100755
--- a/test/dexdump/checkers.xml
+++ b/test/dexdump/checkers.xml
@@ -3,6 +3,7 @@
 >
 <class name="Checkers"
  extends="android.app.Activity"
+ interface="false"
  abstract="false"
  static="false"
  final="false"
@@ -112,6 +113,7 @@
 </class>
 <class name="CheckersView"
  extends="android.view.View"
+ interface="false"
  abstract="false"
  static="false"
  final="false"
@@ -331,6 +333,7 @@
 </class>
 <class name="a"
  extends="java.lang.Thread"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
@@ -500,6 +503,7 @@
 </class>
 <class name="g"
  extends="java.lang.Object"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
diff --git a/test/dexdump/run-all-tests b/test/dexdump/run-all-tests
index 9cf7ab6..c9976cd 100755
--- a/test/dexdump/run-all-tests
+++ b/test/dexdump/run-all-tests
@@ -39,8 +39,8 @@
 
 # Set up dexdump binary and flags to test.
 DEXD="${ANDROID_HOST_OUT}/bin/dexdump2"
-DEXDFLAGS1="-dfh"
-DEXDFLAGS2="-l xml"
+DEXDFLAGS1="-adfh"
+DEXDFLAGS2="-e -l xml"
 
 # Set up dexlist binary and flags to test.
 DEXL="${ANDROID_HOST_OUT}/bin/dexlist"
diff --git a/test/dexdump/staticfields.dex b/test/dexdump/staticfields.dex
new file mode 100644
index 0000000..a07c46e
--- /dev/null
+++ b/test/dexdump/staticfields.dex
Binary files differ
diff --git a/test/dexdump/staticfields.lst b/test/dexdump/staticfields.lst
new file mode 100644
index 0000000..5375b8e
--- /dev/null
+++ b/test/dexdump/staticfields.lst
@@ -0,0 +1,2 @@
+#staticfields.dex
+0x000001bc 8 StaticFields <init> ()V StaticFields.java 24
diff --git a/test/dexdump/staticfields.txt b/test/dexdump/staticfields.txt
new file mode 100644
index 0000000..f6d8f19
--- /dev/null
+++ b/test/dexdump/staticfields.txt
@@ -0,0 +1,126 @@
+Processing 'staticfields.dex'...
+Opened 'staticfields.dex', DEX version '035'
+DEX file header:
+magic               : 'dex\n035\0'
+checksum            : 52d4fc6d
+signature           : 6e82...2f27
+file_size           : 1264
+header_size         : 112
+link_size           : 0
+link_off            : 0 (0x000000)
+string_ids_size     : 28
+string_ids_off      : 112 (0x000070)
+type_ids_size       : 12
+type_ids_off        : 224 (0x0000e0)
+proto_ids_size      : 1
+proto_ids_off       : 272 (0x000110)
+field_ids_size      : 12
+field_ids_off       : 284 (0x00011c)
+method_ids_size     : 2
+method_ids_off      : 380 (0x00017c)
+class_defs_size     : 1
+class_defs_off      : 396 (0x00018c)
+data_size           : 836
+data_off            : 428 (0x0001ac)
+
+Class #0 header:
+class_idx           : 6
+access_flags        : 1 (0x0001)
+superclass_idx      : 7
+interfaces_off      : 0 (0x000000)
+source_file_idx     : 11
+annotations_off     : 0 (0x000000)
+class_data_off      : 1067 (0x00042b)
+static_fields_size  : 12
+instance_fields_size: 0
+direct_methods_size : 1
+virtual_methods_size: 0
+
+Class #0            -
+  Class descriptor  : 'LStaticFields;'
+  Access flags      : 0x0001 (PUBLIC)
+  Superclass        : 'Ljava/lang/Object;'
+  Interfaces        -
+  Static fields     -
+    #0              : (in LStaticFields;)
+      name          : 'test00_public_static_final_byte_42'
+      type          : 'B'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 42
+    #1              : (in LStaticFields;)
+      name          : 'test01_public_static_final_short_43'
+      type          : 'S'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 43
+    #2              : (in LStaticFields;)
+      name          : 'test02_public_static_final_char_X'
+      type          : 'C'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 88
+    #3              : (in LStaticFields;)
+      name          : 'test03_public_static_final_int_44'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 44
+    #4              : (in LStaticFields;)
+      name          : 'test04_public_static_final_long_45'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 45
+    #5              : (in LStaticFields;)
+      name          : 'test05_public_static_final_float_46_47'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 46.47
+    #6              : (in LStaticFields;)
+      name          : 'test06_public_static_final_double_48_49'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 48.49
+    #7              : (in LStaticFields;)
+      name          : 'test07_public_static_final_string'
+      type          : 'Ljava/lang/String;'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : "abc \\><\"'&\t\r\n"
+    #8              : (in LStaticFields;)
+      name          : 'test08_public_static_final_object_null'
+      type          : 'Ljava/lang/Object;'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : null
+    #9              : (in LStaticFields;)
+      name          : 'test09_public_static_final_boolean_true'
+      type          : 'Z'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : true
+    #10              : (in LStaticFields;)
+      name          : 'test10_private_static_final_int_50'
+      type          : 'I'
+      access        : 0x001a (PRIVATE STATIC FINAL)
+      value         : 50
+    #11              : (in LStaticFields;)
+      name          : 'test99_empty_value'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+  Instance fields   -
+  Direct methods    -
+    #0              : (in LStaticFields;)
+      name          : '<init>'
+      type          : '()V'
+      access        : 0x10001 (PUBLIC CONSTRUCTOR)
+      code          -
+      registers     : 1
+      ins           : 1
+      outs          : 1
+      insns size    : 4 16-bit code units
+0001ac:                                        |[0001ac] StaticFields.<init>:()V
+0001bc: 7010 0100 0000                         |0000: invoke-direct {v0}, Ljava/lang/Object;.<init>:()V // method@0001
+0001c2: 0e00                                   |0003: return-void
+      catches       : (none)
+      positions     : 
+        0x0000 line=24
+      locals        : 
+        0x0000 - 0x0004 reg=0 this LStaticFields; 
+
+  Virtual methods   -
+  source_file_idx   : 11 (StaticFields.java)
+
diff --git a/test/dexdump/staticfields.xml b/test/dexdump/staticfields.xml
new file mode 100644
index 0000000..9082f0e
--- /dev/null
+++ b/test/dexdump/staticfields.xml
@@ -0,0 +1,130 @@
+<api>
+<package name=""
+>
+<class name="StaticFields"
+ extends="java.lang.Object"
+ interface="false"
+ abstract="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+<field name="test00_public_static_final_byte_42"
+ type="byte"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="42"
+>
+</field>
+<field name="test01_public_static_final_short_43"
+ type="short"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="43"
+>
+</field>
+<field name="test02_public_static_final_char_X"
+ type="char"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="88"
+>
+</field>
+<field name="test03_public_static_final_int_44"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="44"
+>
+</field>
+<field name="test04_public_static_final_long_45"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="45"
+>
+</field>
+<field name="test05_public_static_final_float_46_47"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="46.47"
+>
+</field>
+<field name="test06_public_static_final_double_48_49"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="48.49"
+>
+</field>
+<field name="test07_public_static_final_string"
+ type="java.lang.String"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="abc \&gt;&lt;&quot;'&amp;&#x9;&#xD;&#xA;"
+>
+</field>
+<field name="test08_public_static_final_object_null"
+ type="java.lang.Object"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="null"
+>
+</field>
+<field name="test09_public_static_final_boolean_true"
+ type="boolean"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="true"
+>
+</field>
+<field name="test99_empty_value"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+>
+</field>
+<constructor name="StaticFields"
+ type="StaticFields"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</constructor>
+</class>
+</package>
+</api>
diff --git a/test/dexdump/values.dex b/test/dexdump/values.dex
new file mode 100644
index 0000000..84602d0
--- /dev/null
+++ b/test/dexdump/values.dex
Binary files differ
diff --git a/test/dexdump/values.lst b/test/dexdump/values.lst
new file mode 100644
index 0000000..0dbe3a9
--- /dev/null
+++ b/test/dexdump/values.lst
@@ -0,0 +1,3 @@
+#values.dex
+0x000003bc 8 Test <clinit> ()V Test.java 66
+0x000003d4 8 Test <init> ()V Test.java 1
diff --git a/test/dexdump/values.txt b/test/dexdump/values.txt
new file mode 100644
index 0000000..7f831b1
--- /dev/null
+++ b/test/dexdump/values.txt
@@ -0,0 +1,355 @@
+Processing 'values.dex'...
+Opened 'values.dex', DEX version '035'
+DEX file header:
+magic               : 'dex\n035\0'
+checksum            : 7605eec0
+signature           : c197...a065
+file_size           : 1864
+header_size         : 112
+link_size           : 0
+link_off            : 0 (0x000000)
+string_ids_size     : 70
+string_ids_off      : 112 (0x000070)
+type_ids_size       : 12
+type_ids_off        : 392 (0x000188)
+proto_ids_size      : 1
+proto_ids_off       : 440 (0x0001b8)
+field_ids_size      : 54
+field_ids_off       : 452 (0x0001c4)
+method_ids_size     : 3
+method_ids_off      : 884 (0x000374)
+class_defs_size     : 1
+class_defs_off      : 908 (0x00038c)
+data_size           : 924
+data_off            : 940 (0x0003ac)
+
+Class #0 header:
+class_idx           : 6
+access_flags        : 1 (0x0001)
+superclass_idx      : 7
+interfaces_off      : 0 (0x000000)
+source_file_idx     : 13
+annotations_off     : 0 (0x000000)
+class_data_off      : 1578 (0x00062a)
+static_fields_size  : 54
+instance_fields_size: 0
+direct_methods_size : 2
+virtual_methods_size: 0
+
+Class #0            -
+  Class descriptor  : 'LTest;'
+  Access flags      : 0x0001 (PUBLIC)
+  Superclass        : 'Ljava/lang/Object;'
+  Interfaces        -
+  Static fields     -
+    #0              : (in LTest;)
+      name          : 'mB0'
+      type          : 'B'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 0
+    #1              : (in LTest;)
+      name          : 'mB1'
+      type          : 'B'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 127
+    #2              : (in LTest;)
+      name          : 'mB2'
+      type          : 'B'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -128
+    #3              : (in LTest;)
+      name          : 'mB3'
+      type          : 'B'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -1
+    #4              : (in LTest;)
+      name          : 'mC0'
+      type          : 'C'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 0
+    #5              : (in LTest;)
+      name          : 'mC1'
+      type          : 'C'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 32767
+    #6              : (in LTest;)
+      name          : 'mC2'
+      type          : 'C'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 32768
+    #7              : (in LTest;)
+      name          : 'mC3'
+      type          : 'C'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 255
+    #8              : (in LTest;)
+      name          : 'mC4'
+      type          : 'C'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 65520
+    #9              : (in LTest;)
+      name          : 'mC5'
+      type          : 'C'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 65535
+    #10              : (in LTest;)
+      name          : 'mD0'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -inf
+    #11              : (in LTest;)
+      name          : 'mD1'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 4.94066e-324
+    #12              : (in LTest;)
+      name          : 'mD2'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -0
+    #13              : (in LTest;)
+      name          : 'mD3'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 0
+    #14              : (in LTest;)
+      name          : 'mD4'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 1.79769e+308
+    #15              : (in LTest;)
+      name          : 'mD5'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : inf
+    #16              : (in LTest;)
+      name          : 'mD6'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : nan
+    #17              : (in LTest;)
+      name          : 'mF0'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -inf
+    #18              : (in LTest;)
+      name          : 'mF1'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 1.4013e-45
+    #19              : (in LTest;)
+      name          : 'mF2'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -0
+    #20              : (in LTest;)
+      name          : 'mF3'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 0
+    #21              : (in LTest;)
+      name          : 'mF4'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 3.40282e+38
+    #22              : (in LTest;)
+      name          : 'mF5'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : inf
+    #23              : (in LTest;)
+      name          : 'mF6'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : nan
+    #24              : (in LTest;)
+      name          : 'mI0'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 0
+    #25              : (in LTest;)
+      name          : 'mI1'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 2147483647
+    #26              : (in LTest;)
+      name          : 'mI2'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -2147483648
+    #27              : (in LTest;)
+      name          : 'mI3'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 255
+    #28              : (in LTest;)
+      name          : 'mI4'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -16
+    #29              : (in LTest;)
+      name          : 'mI5'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -4096
+    #30              : (in LTest;)
+      name          : 'mI6'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -1048576
+    #31              : (in LTest;)
+      name          : 'mI7'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -1
+    #32              : (in LTest;)
+      name          : 'mJ0'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 0
+    #33              : (in LTest;)
+      name          : 'mJ1'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 9223372036854775807
+    #34              : (in LTest;)
+      name          : 'mJ2'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -9223372036854775808
+    #35              : (in LTest;)
+      name          : 'mJ3'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 255
+    #36              : (in LTest;)
+      name          : 'mJ4'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -16
+    #37              : (in LTest;)
+      name          : 'mJ5'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -4096
+    #38              : (in LTest;)
+      name          : 'mJ6'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -1048576
+    #39              : (in LTest;)
+      name          : 'mJ7'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -268435456
+    #40              : (in LTest;)
+      name          : 'mJ8'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -68719476736
+    #41              : (in LTest;)
+      name          : 'mJ9'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -17592186044416
+    #42              : (in LTest;)
+      name          : 'mJa'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -4503599627370496
+    #43              : (in LTest;)
+      name          : 'mJb'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -1
+    #44              : (in LTest;)
+      name          : 'mObject'
+      type          : 'Ljava/lang/Object;'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : null
+    #45              : (in LTest;)
+      name          : 'mS0'
+      type          : 'S'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 0
+    #46              : (in LTest;)
+      name          : 'mS1'
+      type          : 'S'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 32767
+    #47              : (in LTest;)
+      name          : 'mS2'
+      type          : 'S'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -32768
+    #48              : (in LTest;)
+      name          : 'mS3'
+      type          : 'S'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 255
+    #49              : (in LTest;)
+      name          : 'mS4'
+      type          : 'S'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -16
+    #50              : (in LTest;)
+      name          : 'mS5'
+      type          : 'S'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -1
+    #51              : (in LTest;)
+      name          : 'mString'
+      type          : 'Ljava/lang/String;'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : "<&\"JOHO\"&>\n"
+    #52              : (in LTest;)
+      name          : 'mZ0'
+      type          : 'Z'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : false
+    #53              : (in LTest;)
+      name          : 'mZ1'
+      type          : 'Z'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : true
+  Instance fields   -
+  Direct methods    -
+    #0              : (in LTest;)
+      name          : '<clinit>'
+      type          : '()V'
+      access        : 0x10008 (STATIC CONSTRUCTOR)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 4 16-bit code units
+0003ac:                                        |[0003ac] Test.<clinit>:()V
+0003bc: 1200                                   |0000: const/4 v0, #int 0 // #0
+0003be: 6900 2c00                              |0001: sput-object v0, LTest;.mObject:Ljava/lang/Object; // field@002c
+0003c2: 0e00                                   |0003: return-void
+      catches       : (none)
+      positions     : 
+        0x0000 line=66
+      locals        : 
+
+    #1              : (in LTest;)
+      name          : '<init>'
+      type          : '()V'
+      access        : 0x10001 (PUBLIC CONSTRUCTOR)
+      code          -
+      registers     : 1
+      ins           : 1
+      outs          : 1
+      insns size    : 4 16-bit code units
+0003c4:                                        |[0003c4] Test.<init>:()V
+0003d4: 7010 0200 0000                         |0000: invoke-direct {v0}, Ljava/lang/Object;.<init>:()V // method@0002
+0003da: 0e00                                   |0003: return-void
+      catches       : (none)
+      positions     : 
+        0x0000 line=1
+      locals        : 
+        0x0000 - 0x0004 reg=0 this LTest; 
+
+  Virtual methods   -
+  source_file_idx   : 13 (Test.java)
+
diff --git a/test/dexdump/values.xml b/test/dexdump/values.xml
new file mode 100644
index 0000000..d6ba48d
--- /dev/null
+++ b/test/dexdump/values.xml
@@ -0,0 +1,561 @@
+<api>
+<package name=""
+>
+<class name="Test"
+ extends="java.lang.Object"
+ interface="false"
+ abstract="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+<field name="mB0"
+ type="byte"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="0"
+>
+</field>
+<field name="mB1"
+ type="byte"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="127"
+>
+</field>
+<field name="mB2"
+ type="byte"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-128"
+>
+</field>
+<field name="mB3"
+ type="byte"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-1"
+>
+</field>
+<field name="mC0"
+ type="char"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="0"
+>
+</field>
+<field name="mC1"
+ type="char"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="32767"
+>
+</field>
+<field name="mC2"
+ type="char"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="32768"
+>
+</field>
+<field name="mC3"
+ type="char"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="255"
+>
+</field>
+<field name="mC4"
+ type="char"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="65520"
+>
+</field>
+<field name="mC5"
+ type="char"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="65535"
+>
+</field>
+<field name="mD0"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-inf"
+>
+</field>
+<field name="mD1"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="4.94066e-324"
+>
+</field>
+<field name="mD2"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-0"
+>
+</field>
+<field name="mD3"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="0"
+>
+</field>
+<field name="mD4"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="1.79769e+308"
+>
+</field>
+<field name="mD5"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="inf"
+>
+</field>
+<field name="mD6"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="nan"
+>
+</field>
+<field name="mF0"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-inf"
+>
+</field>
+<field name="mF1"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="1.4013e-45"
+>
+</field>
+<field name="mF2"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-0"
+>
+</field>
+<field name="mF3"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="0"
+>
+</field>
+<field name="mF4"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="3.40282e+38"
+>
+</field>
+<field name="mF5"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="inf"
+>
+</field>
+<field name="mF6"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="nan"
+>
+</field>
+<field name="mI0"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="0"
+>
+</field>
+<field name="mI1"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="2147483647"
+>
+</field>
+<field name="mI2"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-2147483648"
+>
+</field>
+<field name="mI3"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="255"
+>
+</field>
+<field name="mI4"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-16"
+>
+</field>
+<field name="mI5"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-4096"
+>
+</field>
+<field name="mI6"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-1048576"
+>
+</field>
+<field name="mI7"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-1"
+>
+</field>
+<field name="mJ0"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="0"
+>
+</field>
+<field name="mJ1"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="9223372036854775807"
+>
+</field>
+<field name="mJ2"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-9223372036854775808"
+>
+</field>
+<field name="mJ3"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="255"
+>
+</field>
+<field name="mJ4"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-16"
+>
+</field>
+<field name="mJ5"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-4096"
+>
+</field>
+<field name="mJ6"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-1048576"
+>
+</field>
+<field name="mJ7"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-268435456"
+>
+</field>
+<field name="mJ8"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-68719476736"
+>
+</field>
+<field name="mJ9"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-17592186044416"
+>
+</field>
+<field name="mJa"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-4503599627370496"
+>
+</field>
+<field name="mJb"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-1"
+>
+</field>
+<field name="mObject"
+ type="java.lang.Object"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="null"
+>
+</field>
+<field name="mS0"
+ type="short"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="0"
+>
+</field>
+<field name="mS1"
+ type="short"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="32767"
+>
+</field>
+<field name="mS2"
+ type="short"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-32768"
+>
+</field>
+<field name="mS3"
+ type="short"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="255"
+>
+</field>
+<field name="mS4"
+ type="short"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-16"
+>
+</field>
+<field name="mS5"
+ type="short"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-1"
+>
+</field>
+<field name="mString"
+ type="java.lang.String"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="&lt;&amp;&quot;JOHO&quot;&amp;&gt;&#xA;"
+>
+</field>
+<field name="mZ0"
+ type="boolean"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="false"
+>
+</field>
+<field name="mZ1"
+ type="boolean"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="true"
+>
+</field>
+<constructor name="Test"
+ type="Test"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</constructor>
+</class>
+</package>
+</api>
diff --git a/test/etc/default-build b/test/etc/default-build
index c92402b..37ce0f2 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -17,17 +17,101 @@
 # Stop if something fails.
 set -e
 
+# Set default values for directories.
+if [ -d smali ]; then
+  HAS_SMALI=true
+else
+  HAS_SMALI=false
+fi
+
+if [ -d src ]; then
+  HAS_SRC=true
+else
+  HAS_SRC=false
+fi
+
+if [ -d src2 ]; then
+  HAS_SRC2=true
+else
+  HAS_SRC2=false
+fi
+
+if [ -d src-multidex ]; then
+  HAS_SRC_MULTIDEX=true
+else
+  HAS_SRC_MULTIDEX=false
+fi
+
+if [ -d smali-multidex ]; then
+  HAS_SMALI_MULTIDEX=true
+else
+  HAS_SMALI_MULTIDEX=false
+fi
+
+if [ -d src-ex ]; then
+  HAS_SRC_EX=true
+else
+  HAS_SRC_EX=false
+fi
+
+if [ -d src-dex2oat-unresolved ]; then
+  HAS_SRC_DEX2OAT_UNRESOLVED=true
+else
+  HAS_SRC_DEX2OAT_UNRESOLVED=false
+fi
+
 DX_FLAGS=""
 SKIP_DX_MERGER="false"
+EXPERIMENTAL=""
+
+# The key for default arguments if no experimental things are enabled.
+DEFAULT_EXPERIMENT="no-experiment"
+
+# Setup experimental flag mappings in a bash associative array.
+declare -A JACK_EXPERIMENTAL_ARGS
+JACK_EXPERIMENTAL_ARGS["default-methods"]="-D jack.java.source.version=1.8 -D jack.android.min-api-level=24"
+JACK_EXPERIMENTAL_ARGS["lambdas"]="-D jack.java.source.version=1.8 -D jack.android.min-api-level=24"
+
+declare -A SMALI_EXPERIMENTAL_ARGS
+SMALI_EXPERIMENTAL_ARGS["default-methods"]="--api-level 24"
+
+declare -A JAVAC_EXPERIMENTAL_ARGS
+JAVAC_EXPERIMENTAL_ARGS["default-methods"]="-source 1.8 -target 1.8"
+JAVAC_EXPERIMENTAL_ARGS["lambdas"]="-source 1.8 -target 1.8"
+JAVAC_EXPERIMENTAL_ARGS[${DEFAULT_EXPERIMENT}]="-source 1.7 -target 1.7"
 
 while true; do
   if [ "x$1" = "x--dx-option" ]; then
     shift
-    option="$1"
+    on="$1"
     DX_FLAGS="${DX_FLAGS} $option"
     shift
   elif [ "x$1" = "x--jvm" ]; then
     shift
+  elif [ "x$1" = "x--no-src" ]; then
+    HAS_SRC=false
+    shift
+  elif [ "x$1" = "x--no-src2" ]; then
+    HAS_SRC2=false
+    shift
+  elif [ "x$1" = "x--no-src-multidex" ]; then
+    HAS_SRC_MULTIDEX=false
+    shift
+  elif [ "x$1" = "x--no-smali-multidex" ]; then
+    HAS_SMALI_MULTIDEX=false
+    shift
+  elif [ "x$1" = "x--no-src-ex" ]; then
+    HAS_SRC_EX=false
+    shift
+  elif [ "x$1" = "x--no-smali" ]; then
+    HAS_SMALI=false
+    shift
+  elif [ "x$1" = "x--experimental" ]; then
+    shift
+    # We have a specific experimental configuration so don't use the default.
+    DEFAULT_EXPERIMENT=""
+    EXPERIMENTAL="${EXPERIMENTAL} $1"
+    shift
   elif expr "x$1" : "x--" >/dev/null 2>&1; then
     echo "unknown $0 option: $1" 1>&2
     exit 1
@@ -36,67 +120,107 @@
   fi
 done
 
+# Be sure to get any default arguments if not doing any experiments.
+EXPERIMENTAL="${EXPERIMENTAL} ${DEFAULT_EXPERIMENT}"
+
+# Add args from the experimental mappings.
+for experiment in ${EXPERIMENTAL}; do
+  JACK_ARGS="${JACK_ARGS} ${JACK_EXPERIMENTAL_ARGS[${experiment}]}"
+  SMALI_ARGS="${SMALI_ARGS} ${SMALI_EXPERIMENTAL_ARGS[${experiment}]}"
+  JAVAC_ARGS="${JAVAC_ARGS} ${JAVAC_EXPERIMENTAL_ARGS[${experiment}]}"
+done
+
 if [ -e classes.dex ]; then
   zip $TEST_NAME.jar classes.dex
   exit 0
 fi
 
-if ! [ -d src ] && ! [ -d src2 ]; then
+if ! [ "${HAS_SRC}" = "true" ] && ! [ "${HAS_SRC2}" = "true" ]; then
   # No src directory? Then forget about trying to run dx.
   SKIP_DX_MERGER="true"
 fi
 
-if [ -d src-multidex ]; then
-  # Jack does not support this configuration unless we specify how to partition the DEX file
-  # with a .jpp file.
-  USE_JACK="false"
-fi
+if [ ${HAS_SRC_DEX2OAT_UNRESOLVED} = "true" ]; then
+  mkdir classes
+  mkdir classes-ex
+  ${JAVAC} ${JAVAC_ARGS} -implicit:none -sourcepath src-dex2oat-unresolved -d classes `find src -name '*.java'`
+  ${JAVAC} ${JAVAC_ARGS} -implicit:none -sourcepath src -d classes-ex `find src-dex2oat-unresolved -name '*.java'`
+  if [ ${USE_JACK} = "true" ]; then
+    jar cf classes.jill.jar -C classes .
+    jar cf classes-ex.jill.jar -C classes-ex .
 
-if [ ${USE_JACK} = "true" ]; then
-  # Jack toolchain
-  if [ -d src ]; then
-    ${JACK} --output-jack src.jack src
-    imported_jack_files="--import src.jack"
-  fi
-
-  if [ -d src2 ]; then
-    ${JACK} --output-jack src2.jack src2
-    imported_jack_files="--import src2.jack ${imported_jack_files}"
-  fi
-
-  # Compile jack files into a DEX file. We set jack.import.type.policy=keep-first to consider
-  # class definitions from src2 first.
-  ${JACK} ${imported_jack_files} -D jack.import.type.policy=keep-first --output-dex .
-else
-  # Legacy toolchain with javac+dx
-  if [ -d src ]; then
-    mkdir classes
-    ${JAVAC} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'`
-  fi
-
-  if [ -d src-multidex ]; then
-    mkdir classes2
-    ${JAVAC} -implicit:none -classpath src -d classes2 `find src-multidex -name '*.java'`
+    ${JACK} --import classes-ex.jill.jar --output-dex .
+    zip ${TEST_NAME}-ex.jar classes.dex
+    ${JACK} --import classes.jill.jar --output-dex .
+  else
     if [ ${NEED_DEX} = "true" ]; then
-      ${DX} -JXmx256m --debug --dex --dump-to=classes2.lst --output=classes2.dex \
-        --dump-width=1000 ${DX_FLAGS} classes2
+      ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
+      zip ${TEST_NAME}-ex.jar classes.dex
+      ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
     fi
   fi
+else
+  if [ ${USE_JACK} = "true" ]; then
+    # Jack toolchain
+    if [ "${HAS_SRC}" = "true" ]; then
+      if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then
+        # Compile src and src-multidex in the same .jack file. We will apply multidex partitioning
+        # when creating the output .dex file.
+        ${JACK} ${JACK_ARGS} --output-jack src.jack src src src-multidex
+        jack_extra_args="${jack_extra_args} -D jack.dex.output.policy=minimal-multidex"
+        jack_extra_args="${jack_extra_args} -D jack.preprocessor=true"
+        jack_extra_args="${jack_extra_args} -D jack.preprocessor.file=multidex.jpp"
+      else
+        ${JACK} ${JACK_ARGS} --output-jack src.jack src
+      fi
+      jack_extra_args="${jack_extra_args} --import src.jack"
+    fi
 
-  if [ -d src2 ]; then
-    mkdir -p classes
-    ${JAVAC} -d classes `find src2 -name '*.java'`
-  fi
+    if [ "${HAS_SRC2}" = "true" ]; then
+      ${JACK} ${JACK_ARGS} --output-jack src2.jack src2
+      # In case of duplicate classes, we want to take into account the classes from src2. Therefore
+      # we apply the 'keep-first' policy and import src2.jack file *before* the src.jack file.
+      jack_extra_args="${jack_extra_args} -D jack.import.type.policy=keep-first"
+      jack_extra_args="--import src2.jack ${jack_extra_args}"
+    fi
 
-  if [ ${NEED_DEX} = "true" -a ${SKIP_DX_MERGER} = "false" ]; then
-    ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex \
-      --dump-width=1000 ${DX_FLAGS} classes
+    # Compile jack files into a DEX file.
+    if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then
+      ${JACK} ${JACK_ARGS} ${jack_extra_args} --output-dex .
+    fi
+  else
+    # Legacy toolchain with javac+dx
+    if [ "${HAS_SRC}" = "true" ]; then
+      mkdir classes
+      ${JAVAC} ${JAVAC_ARGS} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'`
+    fi
+
+    if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then
+      mkdir classes2
+      ${JAVAC} -implicit:none -classpath src -d classes2 `find src-multidex -name '*.java'`
+      if [ ${NEED_DEX} = "true" ]; then
+        ${DX} -JXmx256m --debug --dex --dump-to=classes2.lst --output=classes2.dex \
+          --dump-width=1000 ${DX_FLAGS} classes2
+      fi
+    fi
+
+    if [ "${HAS_SRC2}" = "true" ]; then
+      mkdir -p classes
+      ${JAVAC} ${JAVAC_ARGS} -d classes `find src2 -name '*.java'`
+    fi
+
+    if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then
+      if [ ${NEED_DEX} = "true" -a ${SKIP_DX_MERGER} = "false" ]; then
+        ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex \
+          --dump-width=1000 ${DX_FLAGS} classes
+      fi
+    fi
   fi
 fi
 
-if [ -d smali ]; then
+if [ "${HAS_SMALI}" = "true" ]; then
   # Compile Smali classes
-  ${SMALI} -JXmx256m --experimental --api-level 23 --output smali_classes.dex `find smali -name '*.smali'`
+  ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'`
 
   # Don't bother with dexmerger if we provide our own main function in a smali file.
   if [ ${SKIP_DX_MERGER} = "false" ]; then
@@ -106,18 +230,31 @@
   fi
 fi
 
-if [ -d src-ex ]; then
+if [ "${HAS_SMALI_MULTIDEX}" = "true" ]; then
+  # Compile Smali classes
+  ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes2.dex `find smali-multidex -name '*.smali'`
+
+  # Don't bother with dexmerger if we provide our own main function in a smali file.
+  if [ ${HAS_SRC_MULTIDEX} = "true" ]; then
+    ${DXMERGER} classes2.dex classes2.dex smali_classes2.dex
+  else
+    mv smali_classes2.dex classes2.dex
+  fi
+fi
+
+
+if [ ${HAS_SRC_EX} = "true" ]; then
   if [ ${USE_JACK} = "true" ]; then
       # Rename previous "classes.dex" so it is not overwritten.
       mv classes.dex classes-1.dex
       #TODO find another way to append src.jack to the jack classpath
-      ${JACK}:src.jack --output-dex . src-ex
+      ${JACK}:src.jack ${JACK_ARGS} --output-dex . src-ex
       zip $TEST_NAME-ex.jar classes.dex
       # Restore previous "classes.dex" so it can be zipped.
       mv classes-1.dex classes.dex
   else
     mkdir classes-ex
-    ${JAVAC} -d classes-ex -cp classes `find src-ex -name '*.java'`
+    ${JAVAC} ${JAVAC_ARGS} -d classes-ex -cp classes `find src-ex -name '*.java'`
     if [ ${NEED_DEX} = "true" ]; then
       ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes-ex.dex \
         --dump-width=1000 ${DX_FLAGS} classes-ex
@@ -133,7 +270,7 @@
 fi
 
 # Create a single jar with two dex files for multidex.
-if [ -d src-multidex ]; then
+if [ ${HAS_SRC_MULTIDEX} = "true" ] || [ ${HAS_SMALI_MULTIDEX} = "true" ]; then
   zip $TEST_NAME.jar classes.dex classes2.dex
 elif [ ${NEED_DEX} = "true" ]; then
   zip $TEST_NAME.jar classes.dex
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index fbefa07..d12bd79 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -18,8 +18,10 @@
 DEBUGGER="n"
 DEV_MODE="n"
 DEX2OAT=""
+EXPERIMENTAL=""
 FALSE_BIN="/system/bin/false"
 FLAGS=""
+ANDROID_FLAGS=""
 GDB=""
 GDB_ARGS=""
 GDB_SERVER="gdbserver"
@@ -36,13 +38,14 @@
 PREBUILD="y"
 QUIET="n"
 RELOCATE="y"
+STRIP_DEX="n"
 SECONDARY_DEX=""
 TIME_OUT="gdb"  # "n" (disabled), "timeout" (use timeout), "gdb" (use gdb)
 # Value in seconds
 if [ "$ART_USE_READ_BARRIER" = "true" ]; then
-  TIME_OUT_VALUE=900  # 15 minutes.
+  TIME_OUT_VALUE=1800  # 30 minutes.
 else
-  TIME_OUT_VALUE=600  # 10 minutes.
+  TIME_OUT_VALUE=1200  # 20 minutes.
 fi
 USE_GDB="n"
 USE_JVM="n"
@@ -57,6 +60,9 @@
     if [ "x$1" = "x--quiet" ]; then
         QUIET="y"
         shift
+    elif [ "x$1" = "x-O" ]; then
+        # Ignore this option.
+        shift
     elif [ "x$1" = "x--lib" ]; then
         shift
         if [ "x$1" = "x" ]; then
@@ -65,6 +71,10 @@
         fi
         LIB="$1"
         shift
+    elif [ "x$1" = "x--gc-stress" ]; then
+        # Give an extra 5 mins if we are gc-stress.
+        TIME_OUT_VALUE=$((${TIME_OUT_VALUE} + 300))
+        shift
     elif [ "x$1" = "x--testlib" ]; then
         shift
         if [ "x$1" = "x" ]; then
@@ -73,12 +83,25 @@
         fi
         ARGS="${ARGS} $1"
         shift
+    elif [ "x$1" = "x--args" ]; then
+        shift
+        if [ "x$1" = "x" ]; then
+            echo "$0 missing argument to --args" 1>&2
+            exit 1
+        fi
+        ARGS="${ARGS} $1"
+        shift
     elif [ "x$1" = "x-Xcompiler-option" ]; then
         shift
         option="$1"
         FLAGS="${FLAGS} -Xcompiler-option $option"
         COMPILE_FLAGS="${COMPILE_FLAGS} $option"
         shift
+    elif [ "x$1" = "x--android-runtime-option" ]; then
+        shift
+        option="$1"
+        ANDROID_FLAGS="${ANDROID_FLAGS} $option"
+        shift
     elif [ "x$1" = "x--runtime-option" ]; then
         shift
         option="$1"
@@ -105,6 +128,9 @@
     elif [ "x$1" = "x--prebuild" ]; then
         PREBUILD="y"
         shift
+    elif [ "x$1" = "x--strip-dex" ]; then
+        STRIP_DEX="y"
+        shift
     elif [ "x$1" = "x--host" ]; then
         HOST="y"
         ANDROID_ROOT="$ANDROID_HOST_OUT"
@@ -182,6 +208,10 @@
         shift
         INSTRUCTION_SET_FEATURES="$1"
         shift
+    elif [ "x$1" = "x--timeout" ]; then
+        shift
+        TIME_OUT_VALUE="$1"
+        shift
     elif [ "x$1" = "x--" ]; then
         shift
         break
@@ -196,6 +226,13 @@
         FLAGS="${FLAGS} -Xcompiler-option --compile-pic"
         COMPILE_FLAGS="${COMPILE_FLAGS} --compile-pic"
         shift
+    elif [ "x$1" = "x--experimental" ]; then
+        if [ "$#" -lt 2 ]; then
+            echo "missing --experimental option" 1>&2
+            exit 1
+        fi
+        EXPERIMENTAL="$EXPERIMENTAL $2"
+        shift 2
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         exit 1
@@ -204,6 +241,14 @@
     fi
 done
 
+if [ "$USE_JVM" = "n" ]; then
+    FLAGS="${FLAGS} ${ANDROID_FLAGS}"
+    for feature in ${EXPERIMENTAL}; do
+        FLAGS="${FLAGS} -Xexperimental:${feature} -Xcompiler-option --runtime-arg -Xcompiler-option -Xexperimental:${feature}"
+        COMPILE_FLAGS="${COMPILE_FLAGS} --runtime-arg -Xexperimental:${feature}"
+    done
+fi
+
 if [ "x$1" = "x" ] ; then
   MAIN="Main"
 else
@@ -292,11 +337,14 @@
 if [ "$INTERPRETER" = "y" ]; then
     INT_OPTS="-Xint"
     if [ "$VERIFY" = "y" ] ; then
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=interpret-only"
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=interpret-only"
     elif [ "$VERIFY" = "s" ]; then
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-at-runtime"
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-at-runtime"
       DEX_VERIFY="${DEX_VERIFY} -Xverify:softfail"
     else # VERIFY = "n"
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-none"
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-none"
       DEX_VERIFY="${DEX_VERIFY} -Xverify:none"
     fi
@@ -305,8 +353,10 @@
 if [ "$JIT" = "y" ]; then
     INT_OPTS="-Xusejit:true"
     if [ "$VERIFY" = "y" ] ; then
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-at-runtime"
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-at-runtime"
     else
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-none"
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-none"
       DEX_VERIFY="${DEX_VERIFY} -Xverify:none"
     fi
@@ -323,7 +373,7 @@
         # in 512 byte blocks and set it as the ulimit. This should be more than enough
         # room.
         if [ ! `uname` = "Darwin" ]; then  # TODO: Darwin doesn't support "du -B..."
-          ulimit -S $(du -c -B512 ${ANDROID_HOST_OUT}/framework | tail -1 | cut -f1) || exit 1
+          ulimit -S $(du -c -B512 ${ANDROID_HOST_OUT}/framework 2>/dev/null | tail -1 | cut -f1) || exit 1
         fi
     fi
 else
@@ -344,14 +394,20 @@
 fi
 
 dex2oat_cmdline="true"
-mkdir_cmdline="mkdir -p ${DEX_LOCATION}/dalvik-cache/$ISA"
+mkdir_locations="${DEX_LOCATION}/dalvik-cache/$ISA"
+strip_cmdline="true"
+
+# Pick a base that will force the app image to get relocated.
+app_image="--base=0x4000 --app-image-file=$DEX_LOCATION/oat/$ISA/$TEST_NAME.art"
 
 if [ "$PREBUILD" = "y" ]; then
+  mkdir_locations="${mkdir_locations} ${DEX_LOCATION}/oat/$ISA"
   dex2oat_cmdline="$INVOKE_WITH $ANDROID_ROOT/bin/dex2oatd \
                       $COMPILE_FLAGS \
                       --boot-image=${BOOT_IMAGE} \
                       --dex-file=$DEX_LOCATION/$TEST_NAME.jar \
-                      --oat-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.dex | cut -d/ -f 2- | sed "s:/:@:g") \
+                      --oat-file=$DEX_LOCATION/oat/$ISA/$TEST_NAME.odex \
+                      ${app_image} \
                       --instruction-set=$ISA"
   if [ "x$INSTRUCTION_SET_FEATURES" != "x" ] ; then
     dex2oat_cmdline="${dex2oat_cmdline} --instruction-set-features=${INSTRUCTION_SET_FEATURES}"
@@ -369,11 +425,24 @@
   fi
 fi
 
+if [ "$STRIP_DEX" = "y" ]; then
+  strip_cmdline="zip --quiet --delete $DEX_LOCATION/$TEST_NAME.jar classes.dex"
+fi
+
 DALVIKVM_ISA_FEATURES_ARGS=""
 if [ "x$INSTRUCTION_SET_FEATURES" != "x" ] ; then
   DALVIKVM_ISA_FEATURES_ARGS="-Xcompiler-option --instruction-set-features=${INSTRUCTION_SET_FEATURES}"
 fi
 
+# java.io.tmpdir can only be set at launch time.
+TMP_DIR_OPTION=""
+if [ "$HOST" = "n" ]; then
+  TMP_DIR_OPTION="-Djava.io.tmpdir=/data/local/tmp"
+fi
+
+# We set DumpNativeStackOnSigQuit to false to avoid stressing libunwind.
+# b/27185632
+# b/24664297
 dalvikvm_cmdline="$INVOKE_WITH $GDB $ANDROID_ROOT/bin/$DALVIKVM \
                   $GDB_ARGS \
                   $FLAGS \
@@ -387,6 +456,8 @@
                   $INT_OPTS \
                   $DEBUGGER_OPTS \
                   $DALVIKVM_BOOT_OPT \
+                  $TMP_DIR_OPTION \
+                  -XX:DumpNativeStackOnSigQuit:false \
                   -cp $DEX_LOCATION/$TEST_NAME.jar$SECONDARY_DEX $MAIN $ARGS"
 
 # Remove whitespace.
@@ -408,25 +479,29 @@
       adb push $TEST_NAME-ex.jar $DEX_LOCATION >/dev/null 2>&1
     fi
 
-    LD_LIBRARY_PATH=
+    LD_LIBRARY_PATH=/data/art-test/$ISA
     if [ "$ANDROID_ROOT" != "/system" ]; then
       # Current default installation is dalvikvm 64bits and dex2oat 32bits,
       # so we can only use LD_LIBRARY_PATH when testing on a local
       # installation.
-      LD_LIBRARY_PATH=$ANDROID_ROOT/$LIBRARY_DIRECTORY
+      LD_LIBRARY_PATH=$ANDROID_ROOT/$LIBRARY_DIRECTORY:$LD_LIBRARY_PATH
     fi
 
+    PUBLIC_LIBS=libart.so:libartd.so
+
     # Create a script with the command. The command can get longer than the longest
     # allowed adb command and there is no way to get the exit status from a adb shell
     # command.
     cmdline="cd $DEX_LOCATION && \
              export ANDROID_DATA=$DEX_LOCATION && \
+             export ANDROID_ADDITIONAL_PUBLIC_LIBRARIES=$PUBLIC_LIBS && \
              export DEX_LOCATION=$DEX_LOCATION && \
              export ANDROID_ROOT=$ANDROID_ROOT && \
-             $mkdir_cmdline && \
+             mkdir -p ${mkdir_locations} && \
              export LD_LIBRARY_PATH=$LD_LIBRARY_PATH && \
              export PATH=$ANDROID_ROOT/bin:$PATH && \
              $dex2oat_cmdline && \
+             $strip_cmdline && \
              $dalvikvm_cmdline"
 
     cmdfile=$(tempfile -p "cmd-" -s "-$TEST_NAME")
@@ -488,28 +563,22 @@
     if [ "$TIME_OUT" = "timeout" ]; then
       # Add timeout command if time out is desired.
       #
-      # Note: We use nested timeouts. The inner timeout sends SIGRTMIN+2 (usually 36) to ART, which
-      #       will induce a full thread dump before abort. However, dumping threads might deadlock,
-      #       so the outer timeout sends the regular SIGTERM after an additional minute to ensure
-      #       termination (without dumping all threads).
-      TIME_PLUS_ONE=$(($TIME_OUT_VALUE + 60))
-      cmdline="timeout ${TIME_PLUS_ONE}s timeout -s SIGRTMIN+2 ${TIME_OUT_VALUE}s $cmdline"
+      # Note: We first send SIGRTMIN+2 (usually 36) to ART, which will induce a full thread dump
+      #       before abort. However, dumping threads might deadlock, so we also use the "-k"
+      #       option to definitely kill the child.
+      cmdline="timeout -k 120s -s SIGRTMIN+2 ${TIME_OUT_VALUE}s $cmdline"
     fi
 
     if [ "$DEV_MODE" = "y" ]; then
-      if [ "$PREBUILD" = "y" ]; then
-        echo "$mkdir_cmdline && $dex2oat_cmdline && $cmdline"
-      elif [ "$RELOCATE" = "y" ]; then
-        echo "$mkdir_cmdline && $cmdline"
-      else
-        echo $cmdline
-      fi
+      echo "mkdir -p ${mkdir_locations} && $dex2oat_cmdline && $strip_cmdline && $cmdline"
     fi
 
     cd $ANDROID_BUILD_TOP
 
-    $mkdir_cmdline || exit 1
+    rm -rf ${DEX_LOCATION}/dalvik-cache/
+    mkdir -p ${mkdir_locations} || exit 1
     $dex2oat_cmdline || { echo "Dex2oat failed." >&2 ; exit 2; }
+    $strip_cmdline || { echo "Strip failed." >&2 ; exit 3; }
 
     # For running, we must turn off logging when dex2oat or patchoat are missing. Otherwise we use
     # the same defaults as for prebuilt: everything when --dev, otherwise errors and above only.
diff --git a/test/run-all-tests b/test/run-all-tests
index 76283b7..402c299 100755
--- a/test/run-all-tests
+++ b/test/run-all-tests
@@ -44,12 +44,42 @@
     elif [ "x$1" = "x--use-java-home" ]; then
         run_args="${run_args} --use-java-home"
         shift
+    elif [ "x$1" = "x--no-image" ]; then
+        run_args="${run_args} --no-image"
+        shift
+    elif [ "x$1" = "x--optimizing" ]; then
+        run_args="${run_args} --optimizing"
+        shift
+    elif [ "x$1" = "x--image" ]; then
+        run_args="${run_args} --image"
+        shift
+    elif [ "x$1" = "x--never-clean" ]; then
+        run_args="${run_args} --never-clean"
+        shift
     elif [ "x$1" = "x--jvm" ]; then
         run_args="${run_args} --jvm"
         shift
     elif [ "x$1" = "x--debug" ]; then
         run_args="${run_args} --debug"
         shift
+    elif [ "x$1" = "x--build-only" ]; then
+        run_args="${run_args} --build-only"
+        shift
+    elif [ "x$1" = "x--build-with-jack" ]; then
+        run_args="${run_args} --build-with-jack"
+        shift
+    elif [ "x$1" = "x--build-with-javac-dx" ]; then
+        run_args="${run_args} --build-with-javac-dx"
+        shift
+    elif [ "x$1" = "x--dex2oat-swap" ]; then
+        run_args="${run_args} --dex2oat-swap"
+        shift
+    elif [ "x$1" = "x--dalvik" ]; then
+        run_args="${run_args} --dalvik"
+        shift
+    elif [ "x$1" = "x--debuggable" ]; then
+        run_args="${run_args} --debuggable"
+        shift
     elif [ "x$1" = "x--zygote" ]; then
         run_args="${run_args} --zygote"
         shift
@@ -59,15 +89,15 @@
     elif [ "x$1" = "x--jit" ]; then
         run_args="${run_args} --jit"
         shift
+    elif [ "x$1" = "x--verify-soft-fail" ]; then
+        run_args="${run_args} --verify-soft-fail"
+        shift
     elif [ "x$1" = "x--no-verify" ]; then
         run_args="${run_args} --no-verify"
         shift
     elif [ "x$1" = "x--no-optimize" ]; then
         run_args="${run_args} --no-optimize"
         shift
-    elif [ "x$1" = "x--valgrind" ]; then
-        run_args="${run_args} --valgrind"
-        shift
     elif [ "x$1" = "x--dev" ]; then
         run_args="${run_args} --dev"
         shift
@@ -116,6 +146,15 @@
     elif [ "x$1" = "x--always-clean" ]; then
         run_args="${run_args} --always-clean"
         shift
+    elif [ "x$1" = "x--pic-test" ]; then
+        run_args="${run_args} --pic-test"
+        shift
+    elif [ "x$1" = "x--pic-image" ]; then
+        run_args="${run_args} --pic-image"
+        shift
+    elif [ "x$1" = "x--strace" ]; then
+        run_args="${run_args} --strace"
+        shift
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         usage="yes"
@@ -134,9 +173,13 @@
         echo "  Options are all passed to run-test; refer to that for " \
              "further documentation:"
         echo "    --debug --dev --host --interpreter --jit --jvm --no-optimize"
-        echo "    --no-verify -O --update --valgrind --zygote --64 --relocate"
-        echo "    --prebuild --always-clean --gcstress --gcverify --trace"
-        echo "    --no-patchoat --no-dex2oat --use-java-home"
+        echo "    --no-verify --verify-soft-fail -O --update --zygote --64"
+        echo "    --relocate --prebuild --always-clean --gcstress --gcverify"
+        echo "    --trace --no-patchoat --no-dex2oat --use-java-home --pic-image"
+        echo "    --pic-test --strace --debuggable --dalvik --dex2oat-swap"
+        echo "    --build-only --build-with-jack --build-with-javac-dx"
+        echo "    --never-clean --image --no-image --optimizing"
+        echo "    --no-relocate --no-prebuild"
         echo "  Specific Runtime Options:"
         echo "    --seq                Run tests one-by-one, avoiding failures caused by busy CPU"
     ) 1>&2
diff --git a/test/run-test b/test/run-test
index 293779f..4c29420 100755
--- a/test/run-test
+++ b/test/run-test
@@ -37,15 +37,16 @@
 if [ -z "$TMPDIR" ]; then
   tmp_dir="/tmp/$USER/${test_dir}"
 else
-  tmp_dir="${TMPDIR}/$USER/${test_dir}"
+  tmp_dir="${TMPDIR}/${test_dir}"
 fi
 checker="${progdir}/../tools/checker/checker.py"
 export JAVA="java"
-export JAVAC="javac -g"
+export JAVAC="javac -g -Xlint:-options"
 export RUN="${progdir}/etc/run-test-jar"
 export DEX_LOCATION=/data/run-test/${test_dir}
 export NEED_DEX="true"
-export USE_JACK="false"
+export USE_JACK="true"
+export SMALI_ARGS="--experimental"
 
 # If dx was not set by the environment variable, assume it is in the path.
 if [ -z "$DX" ]; then
@@ -72,28 +73,22 @@
   export JACK="jack"
 fi
 
-# If the tree is compiled with Jack, build test with Jack by default.
-if [ "$ANDROID_COMPILE_WITH_JACK" = "true" ]; then
-  USE_JACK="true"
-fi
-
 # ANDROID_BUILD_TOP is not set in a build environment.
 if [ -z "$ANDROID_BUILD_TOP" ]; then
     export ANDROID_BUILD_TOP=$oldwd
 fi
 
-# If JACK_CLASSPATH is not set, assume it only contains core-libart.
-if [ -z "$JACK_CLASSPATH" ]; then
-  export JACK_CLASSPATH="$ANDROID_BUILD_TOP/out/host/common/obj/JAVA_LIBRARIES/core-libart-hostdex_intermediates/classes.jack"
+# ANDROID_HOST_OUT is not set in a build environment.
+if [ -z "$ANDROID_HOST_OUT" ]; then
+    export ANDROID_HOST_OUT=${OUT_DIR:-$ANDROID_BUILD_TOP/out/}host/linux-x86
 fi
 
-# If JILL_JAR is not set, assume it is located in the prebuilts directory.
-if [ -z "$JILL_JAR" ]; then
-  export JILL_JAR="$ANDROID_BUILD_TOP/prebuilts/sdk/tools/jill.jar"
+# If JACK_CLASSPATH is not set, assume it only contains core-libart.
+if [ -z "$JACK_CLASSPATH" ]; then
+  export JACK_CLASSPATH="${ANDROID_HOST_OUT}/../common/obj/JAVA_LIBRARIES/core-libart-hostdex_intermediates/classes.jack:${ANDROID_HOST_OUT}/../common/obj/JAVA_LIBRARIES/core-oj-hostdex_intermediates/classes.jack"
 fi
 
 export JACK="$JACK -g -cp $JACK_CLASSPATH"
-export JILL="java -jar $JILL_JAR"
 
 info="info.txt"
 build="build"
@@ -109,6 +104,7 @@
 run_args="--quiet"
 build_args=""
 
+quiet="no"
 debuggable="no"
 prebuild_mode="yes"
 target_mode="yes"
@@ -131,9 +127,12 @@
 have_dex2oat="yes"
 have_patchoat="yes"
 have_image="yes"
-image_suffix=""
 pic_image_suffix=""
+multi_image_suffix=""
 android_root="/system"
+# By default we will use optimizing.
+image_args=""
+image_suffix="-optimizing"
 
 while true; do
     if [ "x$1" = "x--host" ]; then
@@ -141,6 +140,9 @@
         DEX_LOCATION=$tmp_dir
         run_args="${run_args} --host"
         shift
+    elif [ "x$1" = "x--quiet" ]; then
+        quiet="yes"
+        shift
     elif [ "x$1" = "x--use-java-home" ]; then
         if [ -n "${JAVA_HOME}" ]; then
           export JAVA="${JAVA_HOME}/bin/java"
@@ -153,6 +155,7 @@
     elif [ "x$1" = "x--jvm" ]; then
         target_mode="no"
         runtime="jvm"
+        image_args=""
         prebuild_mode="no"
         NEED_DEX="false"
         USE_JACK="false"
@@ -162,6 +165,7 @@
     elif [ "x$1" = "x-O" ]; then
         lib="libart.so"
         testlib="arttest"
+        run_args="${run_args} -O"
         shift
     elif [ "x$1" = "x--dalvik" ]; then
         lib="libdvm.so"
@@ -179,6 +183,9 @@
     elif [ "x$1" = "x--pic-image" ]; then
         pic_image_suffix="-pic"
         shift
+    elif [ "x$1" = "x--multi-image" ]; then
+        multi_image_suffix="-multi"
+        shift
     elif [ "x$1" = "x--pic-test" ]; then
         run_args="${run_args} --pic-test"
         shift
@@ -192,6 +199,9 @@
         run_args="${run_args} --prebuild"
         prebuild_mode="yes"
         shift;
+    elif [ "x$1" = "x--strip-dex" ]; then
+        run_args="${run_args} --strip-dex"
+        shift;
     elif [ "x$1" = "x--debuggable" ]; then
         run_args="${run_args} -Xcompiler-option --debuggable"
         debuggable="yes"
@@ -237,7 +247,7 @@
         shift
     elif [ "x$1" = "x--strace" ]; then
         strace="yes"
-        run_args="${run_args} --invoke-with strace --invoke-with -o --invoke-with $tmp_dir/$strace_output"
+        run_args="${run_args} --timeout 1800 --invoke-with strace --invoke-with -o --invoke-with $tmp_dir/$strace_output"
         shift
     elif [ "x$1" = "x--zygote" ]; then
         run_args="${run_args} --zygote"
@@ -247,21 +257,18 @@
         image_suffix="-interpreter"
         shift
     elif [ "x$1" = "x--jit" ]; then
-        run_args="${run_args} --jit"
+        image_args="--jit"
         image_suffix="-jit"
         shift
     elif [ "x$1" = "x--optimizing" ]; then
-        run_args="${run_args} -Xcompiler-option --compiler-backend=Optimizing"
+        image_args="-Xcompiler-option --compiler-backend=Optimizing"
         image_suffix="-optimizing"
         shift
-    elif [ "x$1" = "x--quick" ]; then
-        run_args="${run_args} -Xcompiler-option --compiler-backend=Quick"
-        shift
     elif [ "x$1" = "x--no-verify" ]; then
         run_args="${run_args} --no-verify"
         shift
     elif [ "x$1" = "x--verify-soft-fail" ]; then
-        run_args="${run_args} --verify-soft-fail"
+        image_args="--verify-soft-fail"
         image_suffix="-interp-ac"
         shift
     elif [ "x$1" = "x--no-optimize" ]; then
@@ -350,6 +357,30 @@
     fi
 done
 
+run_args="${run_args} ${image_args}"
+# Allocate file descriptor real_stderr and redirect it to the shell's error
+# output (fd 2).
+if [ ${BASH_VERSINFO[1]} -ge 4 ] && [ ${BASH_VERSINFO[2]} -ge 1 ]; then
+  exec {real_stderr}>&2
+else
+  # In bash before version 4.1 we need to do a manual search for free file
+  # descriptors.
+  FD=3
+  while [ -e /dev/fd/$FD ]; do FD=$((FD + 1)); done
+  real_stderr=$FD
+  eval "exec ${real_stderr}>&2"
+fi
+if [ "$quiet" = "yes" ]; then
+  # Force the default standard output and error to go to /dev/null so we will
+  # not print them.
+  exec 1>/dev/null
+  exec 2>/dev/null
+fi
+
+function err_echo() {
+  echo "$@" 1>&${real_stderr}
+}
+
 # tmp_dir may be relative, resolve.
 #
 # Cannot use realpath, as it does not exist on Mac.
@@ -368,7 +399,7 @@
   run_args="${run_args} --runtime-option -Xgc:preverify_rosalloc --runtime-option -Xgc:postverify_rosalloc"
 fi
 if [ "$gc_stress" = "true" ]; then
-  run_args="${run_args} --runtime-option -Xgc:SS,gcstress --runtime-option -Xms2m --runtime-option -Xmx16m"
+  run_args="${run_args} --gc-stress --runtime-option -Xgc:gcstress --runtime-option -Xms2m --runtime-option -Xmx16m"
 fi
 if [ "$trace" = "true" ]; then
     run_args="${run_args} --runtime-option -Xmethod-trace --runtime-option -Xmethod-trace-file-size:2000000"
@@ -382,7 +413,7 @@
         run_args="${run_args} --runtime-option -Xmethod-trace-file:${DEX_LOCATION}/trace.bin"
     fi
 elif [ "$trace_stream" = "true" ]; then
-    echo "Cannot use --stream without --trace."
+    err_echo "Cannot use --stream without --trace."
     exit 1
 fi
 
@@ -409,7 +440,7 @@
 if [ "$target_mode" = "no" ]; then
     if [ "$runtime" = "jvm" ]; then
         if [ "$prebuild_mode" = "yes" ]; then
-            echo "--prebuild with --jvm is unsupported";
+            err_echo "--prebuild with --jvm is unsupported"
             exit 1;
         fi
     fi
@@ -430,24 +461,20 @@
 if [ "$runtime" = "dalvik" ]; then
     if [ "$target_mode" = "no" ]; then
         framework="${ANDROID_PRODUCT_OUT}/system/framework"
-        bpath="${framework}/core.jar:${framework}/conscrypt.jar:${framework}/okhttp.jar:${framework}/core-junit.jar:${framework}/bouncycastle.jar:${framework}/ext.jar"
-        run_args="${run_args} --boot -Xbootclasspath:${bpath}"
+        bpath="${framework}/core-libart.jar:${framework}/core-oj.jar:${framework}/conscrypt.jar:${framework}/okhttp.jar:${framework}/bouncycastle.jar:${framework}/ext.jar"
+        run_args="${run_args} --boot --runtime-option -Xbootclasspath:${bpath}"
     else
         true # defaults to using target BOOTCLASSPATH
     fi
 elif [ "$runtime" = "art" ]; then
     if [ "$target_mode" = "no" ]; then
-        # ANDROID_HOST_OUT is not set in a build environment.
-        if [ -z "$ANDROID_HOST_OUT" ]; then
-            export ANDROID_HOST_OUT=$ANDROID_BUILD_TOP/out/host/linux-x86
-        fi
         guess_host_arch_name
-        run_args="${run_args} --boot ${ANDROID_HOST_OUT}/framework/core${image_suffix}${pic_image_suffix}.art"
+        run_args="${run_args} --boot ${ANDROID_HOST_OUT}/framework/core${image_suffix}${pic_image_suffix}${multi_image_suffix}.art"
         run_args="${run_args} --runtime-option -Djava.library.path=${ANDROID_HOST_OUT}/lib${suffix64}"
     else
         guess_target_arch_name
-        run_args="${run_args} --runtime-option -Djava.library.path=/data/art-test/${target_arch_name}"
-        run_args="${run_args} --boot /data/art-test/core${image_suffix}${pic_image_suffix}.art"
+        run_args="${run_args} --runtime-option -Djava.library.path=/data/art-test/${target_arch_name}:/system/lib${suffix64}"
+        run_args="${run_args} --boot /data/art-test/core${image_suffix}${pic_image_suffix}${multi_image_suffix}.art"
     fi
     if [ "$relocate" = "yes" ]; then
       run_args="${run_args} --relocate"
@@ -461,7 +488,7 @@
 
 if [ "$have_image" = "no" ]; then
     if [ "$runtime" != "art" ]; then
-        echo "--no-image is only supported on the art runtime"
+        err_echo "--no-image is only supported on the art runtime"
         exit 1
     fi
     if [ "$target_mode" = "no" ]; then
@@ -474,9 +501,9 @@
     # TODO If the target was compiled WITH_DEXPREOPT=true then these tests will
     # fail since these jar files will be stripped.
     bpath="${framework}/core-libart${bpath_suffix}.jar"
+    bpath="${bpath}:${framework}/core-oj${bpath_suffix}.jar"
     bpath="${bpath}:${framework}/conscrypt${bpath_suffix}.jar"
     bpath="${bpath}:${framework}/okhttp${bpath_suffix}.jar"
-    bpath="${bpath}:${framework}/core-junit${bpath_suffix}.jar"
     bpath="${bpath}:${framework}/bouncycastle${bpath_suffix}.jar"
     # Pass down the bootclasspath
     run_args="${run_args} --runtime-option -Xbootclasspath:${bpath}"
@@ -484,7 +511,12 @@
 fi
 
 if [ "$dev_mode" = "yes" -a "$update_mode" = "yes" ]; then
-    echo "--dev and --update are mutually exclusive" 1>&2
+    err_echo "--dev and --update are mutually exclusive"
+    usage="yes"
+fi
+
+if [ "$dev_mode" = "yes" -a "$quiet" = "yes" ]; then
+    err_echo "--dev and --quiet are mutually exclusive"
     usage="yes"
 fi
 
@@ -498,7 +530,7 @@
     if [ '!' -d "$test_dir" ]; then
         td2=`echo ${test_dir}-*`
         if [ '!' -d "$td2" ]; then
-            echo "${test_dir}: no such test directory" 1>&2
+            err_echo "${test_dir}: no such test directory"
             usage="yes"
         fi
         test_dir="$td2"
@@ -527,13 +559,13 @@
         echo "    --debug               Wait for a debugger to attach."
         echo "    --debuggable          Whether to compile Java code for a debugger."
         echo "    --gdb                 Run under gdb; incompatible with some tests."
+        echo "    --gdb-arg             Pass an option to gdb."
         echo "    --build-only          Build test files only (off by default)."
         echo "    --build-with-javac-dx Build test files with javac and dx (on by default)."
         echo "    --build-with-jack     Build test files with jack and jill (off by default)."
         echo "    --interpreter         Enable interpreter only mode (off by default)."
         echo "    --jit                 Enable jit (off by default)."
         echo "    --optimizing          Enable optimizing compiler (default)."
-        echo "    --quick               Use Quick compiler (off by default)."
         echo "    --no-verify           Turn off verification (on by default)."
         echo "    --verify-soft-fail    Force soft fail verification (off by default)."
         echo "                          Verification is enabled if neither --no-verify"
@@ -548,10 +580,13 @@
         echo "    --prebuild            Run dex2oat on the files before starting test. (default)"
         echo "    --no-prebuild         Do not run dex2oat on the files before starting"
         echo "                          the test."
+        echo "    --strip-dex           Strip the dex files before starting test."
         echo "    --relocate            Force the use of relocating in the test, making"
         echo "                          the image and oat files be relocated to a random"
         echo "                          address before running. (default)"
         echo "    --no-relocate         Force the use of no relocating in the test"
+        echo "    --image               Run the test using a precompiled boot image. (default)"
+        echo "    --no-image            Run the test without a precompiled boot image."
         echo "    --host                Use the host-mode virtual machine."
         echo "    --invoke-with         Pass --invoke-with option to runtime."
         echo "    --dalvik              Use Dalvik (off by default)."
@@ -563,6 +598,7 @@
              "files."
         echo "    --64                  Run the test in 64-bit mode"
         echo "    --trace               Run with method tracing"
+        echo "    --strace              Run with syscall tracing from strace."
         echo "    --stream              Run method tracing in streaming mode (requires --trace)"
         echo "    --gcstress            Run with gc stress testing"
         echo "    --gcverify            Run with gc verification"
@@ -572,7 +608,13 @@
         echo "    --dex2oat-swap        Use a dex2oat swap file."
         echo "    --instruction-set-features [string]"
         echo "                          Set instruction-set-features for compilation."
-    ) 1>&2
+        echo "    --pic-image           Use an image compiled with position independent code for the"
+        echo "                          boot class path."
+        echo "    --multi-image         Use a set of images compiled with dex2oat multi-image for"
+        echo "                          the boot class path."
+        echo "    --pic-test            Compile the test code position independent."
+        echo "    --quiet               Don't print anything except failure messages"
+    ) 1>&2  # Direct to stderr so usage is not printed if --quiet is set.
     exit 1
 fi
 
@@ -583,12 +625,12 @@
 td_expected="${test_dir}/${expected}"
 
 if [ ! -r $td_info ]; then
-    echo "${test_dir}: missing file $td_info" 1>&2
+    err_echo "${test_dir}: missing file $td_info"
     exit 1
 fi
 
 if [ ! -r $td_expected ]; then
-    echo "${test_dir}: missing file $td_expected" 1>&2
+    err_echo "${test_dir}: missing file $td_expected"
     exit 1
 fi
 
@@ -624,22 +666,37 @@
 
 export TEST_NAME=`basename ${test_dir}`
 
+# arch_supports_read_barrier ARCH
+# -------------------------------
+# Return whether the Optimizing compiler has read barrier support for ARCH.
+function arch_supports_read_barrier() {
+  # Optimizing has read barrier support for ARM, ARM64, x86 and x86-64 at the
+  # moment.
+  [ "x$1" = xarm ] || [ "x$1" = xarm64 ] || [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ]
+}
+
 # Tests named '<number>-checker-*' will also have their CFGs verified with
 # Checker when compiled with Optimizing on host.
 if [[ "$TEST_NAME" =~ ^[0-9]+-checker- ]]; then
-  # Build Checker DEX files without dx's optimizations so the input to dex2oat
-  # better resembles the Java source. We always build the DEX the same way, even
-  # if Checker is not invoked and the test only runs the program.
-  build_args="${build_args} --dx-option --no-optimize"
-
-  # Jack does not necessarily generate the same DEX output than dx. Because these tests depend
-  # on a particular DEX output, keep building them with dx for now (b/19467889).
-  USE_JACK="false"
-
-  if [ "$runtime" = "art" -a "$image_suffix" = "-optimizing" ]; then
+  if [ "$runtime" = "art" -a "$image_suffix" = "-optimizing" -a "$USE_JACK" = "true" ]; then
+    # Optimizing has read barrier support for certain architectures
+    # only. On other architectures, compiling is disabled when read
+    # barriers are enabled, meaning that we do not produce a CFG file
+    # as a side-effect of compilation, thus the Checker assertions
+    # cannot be checked. Disable Checker for those cases.
+    #
+    # TODO: Enable Checker when read barrier support is added to more
+    # architectures (b/12687968).
+    if [ "x$ART_USE_READ_BARRIER" = xtrue ]                    \
+       && (([ "x$host_mode" = "xyes" ]                         \
+            && ! arch_supports_read_barrier "$host_arch_name") \
+           || ([ "x$target_mode" = "xyes" ]                    \
+               && ! arch_supports_read_barrier "$target_arch_name")); then
+      run_checker="no"
     # In no-prebuild mode, the compiler is only invoked if both dex2oat and
     # patchoat are available. Disable Checker otherwise (b/22552692).
-    if [ "$prebuild_mode" = "yes" ] || [ "$have_patchoat" = "yes" -a "$have_dex2oat" = "yes" ]; then
+    elif [ "$prebuild_mode" = "yes" ] \
+         || [ "$have_patchoat" = "yes" -a "$have_dex2oat" = "yes" ]; then
       run_checker="yes"
 
       if [ "$target_mode" = "no" ]; then
@@ -667,23 +724,23 @@
 # To cause tests to fail fast, limit the file sizes created by dx, dex2oat and ART output to 2MB.
 build_file_size_limit=2048
 run_file_size_limit=2048
-if echo "$test_dir" | grep 089; then
-  build_file_size_limit=5120
-  run_file_size_limit=5120
-elif echo "$test_dir" | grep 083; then
+
+# Add tests requiring a higher ulimit to this list. Ulimits might need to be raised to deal with
+# large amounts of expected output or large generated files.
+if echo "$test_dir" | grep -Eq "(083|089|961|964|971)" > /dev/null; then
   build_file_size_limit=5120
   run_file_size_limit=5120
 fi
 if [ "$run_checker" = "yes" -a "$target_mode" = "yes" ]; then
   # We will need to `adb pull` the .cfg output from the target onto the host to
   # run checker on it. This file can be big.
-  build_file_size_limit=16384
-  run_file_size_limit=16384
+  build_file_size_limit=24576
+  run_file_size_limit=24576
 fi
 if [ ${USE_JACK} = "false" ]; then
   # Set ulimit if we build with dx only, Jack can generate big temp files.
   if ! ulimit -S "$build_file_size_limit"; then
-    echo "ulimit file size setting failed"
+    err_echo "ulimit file size setting failed"
   fi
 fi
 
@@ -696,7 +753,7 @@
     echo "build exit status: $build_exit" 1>&2
     if [ "$build_exit" = '0' ]; then
         if ! ulimit -S "$run_file_size_limit"; then
-          echo "ulimit file size setting failed"
+          err_echo "ulimit file size setting failed"
         fi
         echo "${test_dir}: running..." 1>&2
         "./${run}" $run_args "$@" 2>&1
@@ -712,7 +769,7 @@
                 if [ "$checker_exit" = "0" ]; then
                     good="yes"
                 fi
-                echo "checker exit status: $checker_exit" 1>&2
+                err_echo "checker exit status: $checker_exit"
             else
                 good="yes"
             fi
@@ -724,7 +781,7 @@
     build_exit="$?"
     if [ "$build_exit" = '0' ]; then
         if ! ulimit -S "$run_file_size_limit"; then
-          echo "ulimit file size setting failed"
+          err_echo "ulimit file size setting failed"
         fi
         echo "${test_dir}: running..." 1>&2
         "./${run}" $run_args "$@" >"$output" 2>&1
@@ -737,8 +794,8 @@
         sed -e 's/[[:cntrl:]]$//g' < "$output" >"${td_expected}"
         good="yes"
     else
-        cat "$build_output" 1>&2
-        echo "build exit status: $build_exit" 1>&2
+        cat "$build_output" 1>&${real_stderr} 1>&2
+        err_echo "build exit status: $build_exit"
     fi
 elif [ "$build_only" = "yes" ]; then
     good="yes"
@@ -750,7 +807,7 @@
         diff --strip-trailing-cr -q "$expected" "$output" >/dev/null
         if [ "$?" '!=' "0" ]; then
             good="no"
-            echo "BUILD FAILED For ${TEST_NAME}"
+            err_echo "BUILD FAILED For ${TEST_NAME}"
         fi
     fi
     # Clean up extraneous files that are not used by tests.
@@ -761,13 +818,13 @@
     build_exit="$?"
     if [ "$build_exit" = '0' ]; then
         if ! ulimit -S "$run_file_size_limit"; then
-          echo "ulimit file size setting failed"
+          err_echo "ulimit file size setting failed"
         fi
         echo "${test_dir}: running..." 1>&2
         "./${run}" $run_args "$@" >"$output" 2>&1
         run_exit="$?"
         if [ "$run_exit" != "0" ]; then
-            echo "run exit status: $run_exit" 1>&2
+            err_echo "run exit status: $run_exit"
             good_run="no"
         elif [ "$run_checker" = "yes" ]; then
             if [ "$target_mode" = "yes" ]; then
@@ -776,7 +833,7 @@
             "$checker" -q $checker_args "$cfg_output" "$tmp_dir" >> "$output" 2>&1
             checker_exit="$?"
             if [ "$checker_exit" != "0" ]; then
-                echo "checker exit status: $checker_exit" 1>&2
+                err_echo "checker exit status: $checker_exit"
                 good_run="no"
             else
                 good_run="yes"
@@ -823,7 +880,7 @@
         echo ' '
     fi
 
-) 1>&2
+) 2>&${real_stderr} 1>&2
 
 # Clean up test files.
 if [ "$always_clean" = "yes" -o "$good" = "yes" ] && [ "$never_clean" = "no" ]; then
@@ -851,6 +908,10 @@
         fi
     fi
 
-) 1>&2
+) 2>&${real_stderr} 1>&2
 
-exit 1
+if [ "$never_clean" = "yes" ] && [ "$good" = "yes" ]; then
+  exit 0
+else
+  exit 1
+fi
diff --git a/test/ti-agent/common_load.cc b/test/ti-agent/common_load.cc
new file mode 100644
index 0000000..ed280e4
--- /dev/null
+++ b/test/ti-agent/common_load.cc
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <jni.h>
+#include <stdio.h>
+// TODO I don't know?
+#include "openjdkjvmti/jvmti.h"
+
+#include "art_method-inl.h"
+#include "base/logging.h"
+#include "base/macros.h"
+
+#include "901-hello-ti-agent/basics.h"
+
+namespace art {
+
+using OnLoad   = jint (*)(JavaVM* vm, char* options, void* reserved);
+using OnAttach = jint (*)(JavaVM* vm, char* options, void* reserved);
+
+struct AgentLib {
+  const char* name;
+  OnLoad load;
+  OnAttach attach;
+};
+
+// A list of all the agents we have for testing.
+AgentLib agents[] = {
+  { "901-hello-ti-agent", Test901HelloTi::OnLoad, nullptr },
+};
+
+static AgentLib* FindAgent(char* name) {
+  for (AgentLib& l : agents) {
+    if (strncmp(l.name, name, strlen(l.name)) == 0) {
+      return &l;
+    }
+  }
+  return nullptr;
+}
+
+static bool FindAgentNameAndOptions(char* options,
+                                    /*out*/char** name,
+                                    /*out*/char** other_options) {
+  // Name is the first element.
+  *name = options;
+  char* rest = options;
+  // name is the first thing in the options
+  while (*rest != '\0' && *rest != ',') {
+    rest++;
+  }
+  if (*rest == ',') {
+    *rest = '\0';
+    rest++;
+  }
+  *other_options = rest;
+  return true;
+}
+
+extern "C" JNIEXPORT jint JNICALL Agent_OnLoad(JavaVM* vm, char* options, void* reserved) {
+  char* remaining_options = nullptr;
+  char* name_option = nullptr;
+  if (!FindAgentNameAndOptions(options, &name_option, &remaining_options)) {
+    printf("Unable to find agent name in options: %s\n", options);
+    return -1;
+  }
+  AgentLib* lib = FindAgent(name_option);
+  if (lib == nullptr) {
+    printf("Unable to find agent named: %s, add it to the list in test/ti-agent/common_load.cc\n",
+           name_option);
+    return -2;
+  }
+  if (lib->load == nullptr) {
+    printf("agent: %s does not include an OnLoad method.\n", name_option);
+    return -3;
+  }
+  return lib->load(vm, remaining_options, reserved);
+}
+
+
+extern "C" JNIEXPORT jint JNICALL Agent_OnAttach(JavaVM* vm, char* options, void* reserved) {
+  char* remaining_options = nullptr;
+  char* name_option = nullptr;
+  if (!FindAgentNameAndOptions(options, &name_option, &remaining_options)) {
+    printf("Unable to find agent name in options: %s\n", options);
+    return -1;
+  }
+  AgentLib* lib = FindAgent(name_option);
+  if (lib == nullptr) {
+    printf("Unable to find agent named: %s, add it to the list in test/ti-agent/common_load.cc\n",
+           name_option);
+    return -2;
+  }
+  if (lib->attach == nullptr) {
+    printf("agent: %s does not include an OnAttach method.\n", name_option);
+    return -3;
+  }
+  return lib->attach(vm, remaining_options, reserved);
+}
+
+}  // namespace art
diff --git a/test/utils/python/generate_java_main.py b/test/utils/python/generate_java_main.py
new file mode 100755
index 0000000..f66d0dd
--- /dev/null
+++ b/test/utils/python/generate_java_main.py
@@ -0,0 +1,309 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate Java Main file from a classes.xml file.
+"""
+
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import utils and mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+from testgen.utils import get_copyright
+import testgen.mixins as mixins
+
+from collections import namedtuple
+import itertools
+import functools
+import xml.etree.ElementTree as ET
+
+class MainClass(mixins.DumpMixin, mixins.Named, mixins.JavaFileMixin):
+  """
+  A mainclass and main method for this test.
+  """
+
+  MAIN_CLASS_TEMPLATE = """{copyright}
+class Main {{
+{test_groups}
+{test_funcs}
+{main_func}
+}}
+"""
+
+  MAIN_FUNCTION_TEMPLATE = """
+  public static void main(String[] args) {{
+    {test_group_invoke}
+  }}
+"""
+
+  TEST_GROUP_INVOKE_TEMPLATE = """
+    {test_name}();
+"""
+
+  def __init__(self):
+    """
+    Initialize this MainClass
+    """
+    self.tests = set()
+    self.global_funcs = set()
+
+  def add_instance(self, it):
+    """
+    Add an instance test for the given class
+    """
+    self.tests.add(it)
+
+  def add_func(self, f):
+    """
+    Add a function to the class
+    """
+    self.global_funcs.add(f)
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return "Main"
+
+  def __str__(self):
+    """
+    Print this class
+    """
+    all_tests = sorted(self.tests)
+    test_invoke = ""
+    test_groups = ""
+    for t in all_tests:
+      test_groups += str(t)
+    for t in sorted(all_tests):
+      test_invoke += self.TEST_GROUP_INVOKE_TEMPLATE.format(test_name=t.get_name())
+    main_func = self.MAIN_FUNCTION_TEMPLATE.format(test_group_invoke=test_invoke)
+
+    funcs = ""
+    for f in self.global_funcs:
+      funcs += str(f)
+    return self.MAIN_CLASS_TEMPLATE.format(copyright = get_copyright('java'),
+                                           test_groups=test_groups,
+                                           main_func=main_func, test_funcs=funcs)
+
+
+class InstanceTest(mixins.Named, mixins.NameComparableMixin):
+  """
+  A method that runs tests for a particular concrete type, It calls the test
+  cases for running it in all possible ways.
+  """
+
+  INSTANCE_TEST_TEMPLATE = """
+  public static void {test_name}() {{
+    System.out.println("Testing for type {ty}");
+    String s = "{ty}";
+    {ty} v = new {ty}();
+
+    {invokes}
+
+    System.out.println("End testing for type {ty}");
+  }}
+"""
+
+  TEST_INVOKE_TEMPLATE = """
+    {fname}(s, v);
+"""
+
+  def __init__(self, main, ty):
+    """
+    Initialize this test group for the given type
+    """
+    self.ty = ty
+    self.main = main
+    self.funcs = set()
+    self.main.add_instance(self)
+
+  def get_name(self):
+    """
+    Get the name of this test group
+    """
+    return "TEST_NAME_"+self.ty
+
+  def add_func(self, f):
+    """
+    Add a test function to this test group
+    """
+    self.main.add_func(f)
+    self.funcs.add(f)
+
+  def __str__(self):
+    """
+    Returns the java code for this function
+    """
+    func_invokes = ""
+    for f in sorted(self.funcs, key=lambda a: (a.func, a.farg)):
+      func_invokes += self.TEST_INVOKE_TEMPLATE.format(fname=f.get_name(),
+                                                       farg=f.farg)
+
+    return self.INSTANCE_TEST_TEMPLATE.format(test_name=self.get_name(), ty=self.ty,
+                                              invokes=func_invokes)
+
+class Func(mixins.Named, mixins.NameComparableMixin):
+  """
+  A single test case that attempts to invoke a function on receiver of a given type.
+  """
+
+  TEST_FUNCTION_TEMPLATE = """
+  public static void {fname}(String s, {farg} v) {{
+    try {{
+      System.out.printf("%s-{invoke_type:<9} {farg:>9}.{callfunc}()='%s'\\n", s, v.{callfunc}());
+      return;
+    }} catch (Error e) {{
+      System.out.printf("%s-{invoke_type} on {farg}: {callfunc}() threw exception!\\n", s);
+      e.printStackTrace(System.out);
+    }}
+  }}
+"""
+
+  def __init__(self, func, farg, invoke):
+    """
+    Initialize this test function for the given invoke type and argument
+    """
+    self.func = func
+    self.farg = farg
+    self.invoke = invoke
+
+  def get_name(self):
+    """
+    Get the name of this test
+    """
+    return "Test_Func_{}_{}_{}".format(self.func, self.farg, self.invoke)
+
+  def __str__(self):
+    """
+    Get the java code for this test function
+    """
+    return self.TEST_FUNCTION_TEMPLATE.format(fname=self.get_name(),
+                                              farg=self.farg,
+                                              invoke_type=self.invoke,
+                                              callfunc=self.func)
+
+def flatten_classes(classes, c):
+  """
+  Iterate over all the classes 'c' can be used as
+  """
+  while c:
+    yield c
+    c = classes.get(c.super_class)
+
+def flatten_class_methods(classes, c):
+  """
+  Iterate over all the methods 'c' can call
+  """
+  for c1 in flatten_classes(classes, c):
+    yield from c1.methods
+
+def flatten_interfaces(dat, c):
+  """
+  Iterate over all the interfaces 'c' transitively implements
+  """
+  def get_ifaces(cl):
+    for i2 in cl.implements:
+      yield dat.interfaces[i2]
+      yield from get_ifaces(dat.interfaces[i2])
+
+  for cl in flatten_classes(dat.classes, c):
+    yield from get_ifaces(cl)
+
+def flatten_interface_methods(dat, i):
+  """
+  Iterate over all the interface methods 'c' can call
+  """
+  yield from i.methods
+  for i2 in flatten_interfaces(dat, i):
+    yield from i2.methods
+
+def make_main_class(dat):
+  """
+  Creates a Main.java file that runs all the tests
+  """
+  m = MainClass()
+  for c in dat.classes.values():
+    i = InstanceTest(m, c.name)
+    for clazz in flatten_classes(dat.classes, c):
+      for meth in flatten_class_methods(dat.classes, clazz):
+        i.add_func(Func(meth, clazz.name, 'virtual'))
+      for iface in flatten_interfaces(dat, clazz):
+        for meth in flatten_interface_methods(dat, iface):
+          i.add_func(Func(meth, clazz.name, 'virtual'))
+          i.add_func(Func(meth, iface.name, 'interface'))
+  return m
+
+class TestData(namedtuple("TestData", ['classes', 'interfaces'])):
+  """
+  A class representing the classes.xml document.
+  """
+  pass
+
+class Clazz(namedtuple("Clazz", ["name", "methods", "super_class", "implements"])):
+  """
+  A class representing a class element in the classes.xml document.
+  """
+  pass
+
+class IFace(namedtuple("IFace", ["name", "methods", "super_class", "implements"])):
+  """
+  A class representing an interface element in the classes.xml document.
+  """
+  pass
+
+def parse_xml(xml):
+  """
+  Parse the xml description of this test.
+  """
+  classes = dict()
+  ifaces  = dict()
+  root = ET.fromstring(xml)
+  for iface in root.find("interfaces"):
+    name = iface.attrib['name']
+    implements = [a.text for a in iface.find("implements")]
+    methods = [a.text for a in iface.find("methods")]
+    ifaces[name] = IFace(name = name,
+                         super_class = iface.attrib['super'],
+                         methods = methods,
+                         implements = implements)
+  for clazz in root.find('classes'):
+    name = clazz.attrib['name']
+    implements = [a.text for a in clazz.find("implements")]
+    methods = [a.text for a in clazz.find("methods")]
+    classes[name] = Clazz(name = name,
+                          super_class = clazz.attrib['super'],
+                          methods = methods,
+                          implements = implements)
+  return TestData(classes, ifaces)
+
+def main(argv):
+  java_dir = Path(argv[1])
+  if not java_dir.exists() or not java_dir.is_dir():
+    print("{} is not a valid java dir".format(java_dir), file=sys.stderr)
+    sys.exit(1)
+  class_data = parse_xml((java_dir / "classes.xml").open().read())
+  make_main_class(class_data).dump(java_dir)
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/utils/python/testgen/mixins.py b/test/utils/python/testgen/mixins.py
index 085e51d..aa8943b 100644
--- a/test/utils/python/testgen/mixins.py
+++ b/test/utils/python/testgen/mixins.py
@@ -79,6 +79,12 @@
   """
   pass
 
+class JavaFileMixin(get_file_extension_mixin(".java")):
+  """
+  A mixin that defines that the file this class belongs to is get_name() + ".java".
+  """
+  pass
+
 class NameComparableMixin(object):
   """
   A mixin that defines the object comparison and related functionality in terms
diff --git a/test/valgrind-suppressions.txt b/test/valgrind-suppressions.txt
new file mode 100644
index 0000000..acab6e5
--- /dev/null
+++ b/test/valgrind-suppressions.txt
@@ -0,0 +1,15 @@
+{
+   b/27596582
+   Memcheck:Cond
+   fun:index
+   fun:expand_dynamic_string_token
+   fun:_dl_map_object
+   fun:map_doit
+   fun:_dl_catch_error
+   fun:do_preload
+   fun:dl_main
+   fun:_dl_sysdep_start
+   fun:_dl_start_final
+   fun:_dl_start
+   obj:/lib/x86_64-linux-gnu/ld-2.19.so
+}
diff --git a/test/valgrind-target-suppressions.txt b/test/valgrind-target-suppressions.txt
new file mode 100644
index 0000000..7ae6d53
--- /dev/null
+++ b/test/valgrind-target-suppressions.txt
@@ -0,0 +1,52 @@
+# Valgrind does not recognize the ashmen ioctl() calls on ARM64, so it assumes that a size
+# parameter is a pointer.
+{
+   ashmem ioctl
+   Memcheck:Param
+   ioctl(generic)
+   ...
+   fun:ioctl
+   fun:ashmem_create_region
+}
+
+# It seems that on ARM64 Valgrind considers the canary value used by the Clang stack protector to
+# be an uninitialized value.
+{
+   jemalloc chunk_alloc_cache
+   Memcheck:Cond
+   fun:je_chunk_alloc_cache
+}
+
+# The VectorImpl class does not hold a pointer to the allocated SharedBuffer structure, but to the
+# beginning of the data, which is effectively an interior pointer. Valgrind has limitations when
+# dealing with interior pointers.
+{
+   VectorImpl
+   Memcheck:Leak
+   match-leak-kinds:possible
+   fun:malloc
+   # The wildcards make this rule work both for 32-bit and 64-bit environments.
+   fun:_ZN7android12SharedBuffer5allocE?
+   fun:_ZN7android10VectorImpl5_growE??
+}
+
+# Clang/LLVM uses memcpy for *x = *y, even though x == y (which is undefined behavior). Ignore.
+# b/29279679, https://llvm.org/bugs/show_bug.cgi?id=11763
+{
+   MemCpySelfAssign
+   Memcheck:Overlap
+   fun:memcpy
+   fun:je_tsd_set
+   fun:je_tsd_fetch
+   fun:je_malloc_tsd_boot0
+}
+
+# Setenv is known-leaking when overwriting mappings. This is triggered by re-initializing
+# ANDROID_DATA. Ignore all setenv leaks.
+{
+   SetenvAndroidDataReinit
+   Memcheck:Leak
+   match-leak-kinds: definite
+   fun:malloc
+   fun:setenv
+}
diff --git a/tools/Android.mk b/tools/Android.mk
index 9a96f7a..bc2fd8c 100644
--- a/tools/Android.mk
+++ b/tools/Android.mk
@@ -19,21 +19,14 @@
 # Copy the art shell script to the host's bin directory
 include $(CLEAR_VARS)
 LOCAL_IS_HOST_MODULE := true
-LOCAL_MODULE_TAGS := optional
 LOCAL_MODULE_CLASS := EXECUTABLES
 LOCAL_MODULE := art
-include $(BUILD_SYSTEM)/base_rules.mk
-$(LOCAL_BUILT_MODULE): $(LOCAL_PATH)/art $(ACP)
-	@echo "Copy: $(PRIVATE_MODULE) ($@)"
-	$(copy-file-to-new-target)
-	$(hide) chmod 755 $@
+LOCAL_SRC_FILES := art
+include $(BUILD_PREBUILT)
 
 # Copy the art shell script to the target's bin directory
 include $(CLEAR_VARS)
 LOCAL_MODULE_CLASS := EXECUTABLES
 LOCAL_MODULE := art
-include $(BUILD_SYSTEM)/base_rules.mk
-$(LOCAL_BUILT_MODULE): $(LOCAL_PATH)/art $(ACP)
-	@echo "Copy: $(PRIVATE_MODULE) ($@)"
-	$(copy-file-to-new-target)
-	$(hide) chmod 755 $@
+LOCAL_SRC_FILES := art
+include $(BUILD_PREBUILT)
diff --git a/tools/ahat/Android.mk b/tools/ahat/Android.mk
index 6869b04..60e0cd8 100644
--- a/tools/ahat/Android.mk
+++ b/tools/ahat/Android.mk
@@ -30,21 +30,19 @@
 LOCAL_IS_HOST_MODULE := true
 LOCAL_MODULE_TAGS := optional
 LOCAL_MODULE := ahat
+
+# Let users with Java 7 run ahat (b/28303627)
+LOCAL_JAVA_LANGUAGE_VERSION := 1.7
+
 include $(BUILD_HOST_JAVA_LIBRARY)
 
 # --- ahat script ----------------
 include $(CLEAR_VARS)
 LOCAL_IS_HOST_MODULE := true
-LOCAL_MODULE_TAGS := optional
 LOCAL_MODULE_CLASS := EXECUTABLES
 LOCAL_MODULE := ahat
-include $(BUILD_SYSTEM)/base_rules.mk
-$(LOCAL_BUILT_MODULE): $(LOCAL_PATH)/ahat $(ACP)
-	@echo "Copy: $(PRIVATE_MODULE) ($@)"
-	$(copy-file-to-new-target)
-	$(hide) chmod 755 $@
-
-ahat: $(LOCAL_BUILT_MODULE)
+LOCAL_SRC_FILES := ahat
+include $(BUILD_PREBUILT)
 
 # --- ahat-tests.jar --------------
 include $(CLEAR_VARS)
@@ -72,9 +70,10 @@
 
 # Run ahat-test-dump.jar to generate test-dump.hprof
 AHAT_TEST_DUMP_DEPENDENCIES := \
-	$(ART_HOST_EXECUTABLES) \
-	$(HOST_OUT_EXECUTABLES)/art \
-	$(HOST_CORE_IMG_OUT_BASE)-optimizing-pic$(CORE_IMG_SUFFIX)
+  $(ART_HOST_EXECUTABLES) \
+  $(ART_HOST_SHARED_LIBRARY_DEPENDENCIES) \
+  $(HOST_OUT_EXECUTABLES)/art \
+  $(HOST_CORE_IMG_OUT_BASE)-optimizing-pic$(CORE_IMG_SUFFIX)
 
 $(AHAT_TEST_DUMP_HPROF): PRIVATE_AHAT_TEST_ART := $(HOST_OUT_EXECUTABLES)/art
 $(AHAT_TEST_DUMP_HPROF): PRIVATE_AHAT_TEST_DUMP_JAR := $(AHAT_TEST_DUMP_JAR)
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index 5615f8f..ecf9e53 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -9,41 +9,28 @@
        Serve pages on the given port. Defaults to 7100.
 
 TODO:
+ * Show GC Root paths.
+ * Have a way to diff two heap dumps.
+
  * Add more tips to the help page.
    - Recommend how to start looking at a heap dump.
    - Say how to enable allocation sites.
    - Where to submit feedback, questions, and bug reports.
- * Submit perflib fix for getting stack traces, then uncomment that code in
-   AhatSnapshot to use that.
  * Dim 'image' and 'zygote' heap sizes slightly? Why do we even show these?
- * Filter out RootObjs in mSnapshot.getGCRoots, not RootsHandler.
  * Let user re-sort sites objects info by clicking column headers.
  * Let user re-sort "Objects" list.
  * Show site context and heap and class filter in "Objects" view?
  * Have a menu at the top of an object view with links to the sections?
  * Include ahat version and hprof file in the menu at the top of the page?
- * Show root types.
  * Heaped Table
    - Make sortable by clicking on headers.
-   - Use consistent order for heap columns.
-      Sometimes I see "app" first, sometimes last (from one heap dump to
-      another) How about, always sort by name?
- * For long strings, limit the string length shown in the summary view to
-   something reasonable.  Say 50 chars, then add a "..." at the end.
  * For HeapTable with single heap shown, the heap name isn't centered?
  * Consistently document functions.
- * Should help be part of an AhatHandler, that automatically gets the menu and
-   stylesheet link rather than duplicating that?
  * Show version number with --version.
  * Show somewhere where to send bugs.
- * /objects query takes a long time to load without parameters.
  * Include a link to /objects in the overview and menu?
  * Turn on LOCAL_JAVACFLAGS := -Xlint:unchecked -Werror
  * Use hex for object ids in URLs?
- * In general, all tables and descriptions should show a limited amount to
-   start, and only show more when requested by the user.
- * Don't have handlers inherit from HttpHandler
-   - because they should be independent from http.
 
  * [low priority] by site allocations won't line up if the stack has been
    truncated. Is there any way to manually line them up in that case?
@@ -52,29 +39,21 @@
    ignored or not?  Is there any interest in what's unreachable, or is it only
    reachable objects that people care about?
 
- * [low priority] Have a way to diff two heap dumps by site.
-   This should be pretty easy to do, actually. The interface is the real
-   question. Maybe: augment each byte count field on every page with the diff
-   if a baseline has been provided, and allow the user to sort by the diff.
-
 Things to Test:
  * That we can open a hprof without an 'app' heap and show a tabulation of
    objects normally sorted by 'app' heap by default.
  * Visit /objects without parameters and verify it doesn't throw an exception.
  * Visit /objects with an invalid site, verify it doesn't throw an exception.
- * That we can view an array with 3 million elements in a reasonably short
-   amount of time (not more than 1 second?)
  * That we can view the list of all objects in a reasonably short amount of
    time.
  * That we don't show the 'extra' column in the DominatedList if we are
    showing all the instances.
  * That InstanceUtils.asString properly takes into account "offset" and
    "count" fields, if they are present.
+ * InstanceUtils.getDexCacheLocation
 
 Reported Issues:
  * Request to be able to sort tables by size.
- * Hangs on showing large arrays, where hat does not hang.
-   - Solution is probably to not show all the array elements by default.
 
 Perflib Requests:
  * Class objects should have java.lang.Class as their class object, not null.
@@ -93,8 +72,31 @@
  * Extracting bitmap data from bitmap instances.
  * Adding up allocations by stack frame.
  * Computing, for each instance, the other instances it dominates.
+ * Instance.isRoot and Instance.getRootTypes.
 
 Release History:
+ 0.8 Pending
+
+ 0.7 Aug 16, 2016
+   Launch ahat server before processing the heap dump.
+   Target Java 1.7.
+
+ 0.6 Jun 21, 2016
+   Add support for proguard deobfuscation (pending AOSP push of perflib)
+
+ 0.5 Apr 19, 2016
+   Update perflib to perflib-25.0.0 to improve processing performance.
+
+ 0.4 Feb 23, 2016
+   Annotate char[] objects with their string values.
+   Show registered native allocations for heap dumps that support it.
+
+ 0.3 Dec 15, 2015
+   Fix page loading performance by showing a limited number of entries by default.
+   Fix mismatch between overview and "roots" totals.
+   Annotate root objects and show their types.
+   Annotate references with their referents.
+
  0.2 Oct 20, 2015
    Take into account 'count' and 'offset' when displaying strings.
 
diff --git a/tools/ahat/src/AhatHandler.java b/tools/ahat/src/AhatHandler.java
index 2da02f8..d4b4d1b 100644
--- a/tools/ahat/src/AhatHandler.java
+++ b/tools/ahat/src/AhatHandler.java
@@ -16,51 +16,17 @@
 
 package com.android.ahat;
 
-import com.sun.net.httpserver.HttpExchange;
-import com.sun.net.httpserver.HttpHandler;
 import java.io.IOException;
-import java.io.PrintStream;
 
 /**
  * AhatHandler.
  *
- * Common base class of all the ahat HttpHandlers.
+ * Interface for an ahat page handler.
  */
-abstract class AhatHandler implements HttpHandler {
+interface AhatHandler {
 
-  protected AhatSnapshot mSnapshot;
-
-  public AhatHandler(AhatSnapshot snapshot) {
-    mSnapshot = snapshot;
-  }
-
-  public abstract void handle(Doc doc, Query query) throws IOException;
-
-  @Override
-  public void handle(HttpExchange exchange) throws IOException {
-    exchange.getResponseHeaders().add("Content-Type", "text/html;charset=utf-8");
-    exchange.sendResponseHeaders(200, 0);
-    PrintStream ps = new PrintStream(exchange.getResponseBody());
-    try {
-      HtmlDoc doc = new HtmlDoc(ps, DocString.text("ahat"), DocString.uri("style.css"));
-      DocString menu = new DocString();
-      menu.appendLink(DocString.uri("/"), DocString.text("overview"));
-      menu.append(" - ");
-      menu.appendLink(DocString.uri("roots"), DocString.text("roots"));
-      menu.append(" - ");
-      menu.appendLink(DocString.uri("sites"), DocString.text("allocations"));
-      menu.append(" - ");
-      menu.appendLink(DocString.uri("help"), DocString.text("help"));
-      doc.menu(menu);
-      handle(doc, new Query(exchange.getRequestURI()));
-      doc.close();
-    } catch (RuntimeException e) {
-      // Print runtime exceptions to standard error for debugging purposes,
-      // because otherwise they are swallowed and not reported.
-      System.err.println("Exception when handling " + exchange.getRequestURI() + ": ");
-      e.printStackTrace();
-      throw e;
-    }
-    ps.close();
-  }
+  /**
+   * Handle the given query, rendering the page to the given document.
+   */
+  void handle(Doc doc, Query query) throws IOException;
 }
diff --git a/tools/ahat/src/AhatHttpHandler.java b/tools/ahat/src/AhatHttpHandler.java
new file mode 100644
index 0000000..1d05a66
--- /dev/null
+++ b/tools/ahat/src/AhatHttpHandler.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.sun.net.httpserver.HttpExchange;
+import com.sun.net.httpserver.HttpHandler;
+import java.io.IOException;
+import java.io.PrintStream;
+
+/**
+ * AhatHttpHandler.
+ *
+ * HttpHandler for AhatHandlers.
+ */
+class AhatHttpHandler implements HttpHandler {
+
+  private AhatHandler mAhatHandler;
+
+  public AhatHttpHandler(AhatHandler handler) {
+    mAhatHandler = handler;
+  }
+
+  @Override
+  public void handle(HttpExchange exchange) throws IOException {
+    exchange.getResponseHeaders().add("Content-Type", "text/html;charset=utf-8");
+    exchange.sendResponseHeaders(200, 0);
+    PrintStream ps = new PrintStream(exchange.getResponseBody());
+    try {
+      HtmlDoc doc = new HtmlDoc(ps, DocString.text("ahat"), DocString.uri("style.css"));
+      doc.menu(Menu.getMenu());
+      mAhatHandler.handle(doc, new Query(exchange.getRequestURI()));
+      doc.close();
+    } catch (RuntimeException e) {
+      // Print runtime exceptions to standard error for debugging purposes,
+      // because otherwise they are swallowed and not reported.
+      System.err.println("Exception when handling " + exchange.getRequestURI() + ": ");
+      e.printStackTrace();
+      throw e;
+    }
+    ps.close();
+  }
+}
diff --git a/tools/ahat/src/AhatSnapshot.java b/tools/ahat/src/AhatSnapshot.java
index 43658f3..a8205c7 100644
--- a/tools/ahat/src/AhatSnapshot.java
+++ b/tools/ahat/src/AhatSnapshot.java
@@ -16,16 +16,20 @@
 
 package com.android.ahat;
 
+import com.android.tools.perflib.captures.MemoryMappedFileBuffer;
 import com.android.tools.perflib.heap.ClassObj;
 import com.android.tools.perflib.heap.Heap;
 import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.RootObj;
+import com.android.tools.perflib.heap.RootType;
 import com.android.tools.perflib.heap.Snapshot;
 import com.android.tools.perflib.heap.StackFrame;
 import com.android.tools.perflib.heap.StackTrace;
-import com.android.tools.perflib.captures.MemoryMappedFileBuffer;
-import com.google.common.collect.Iterables;
+
 import com.google.common.collect.Lists;
+
+import gnu.trove.TObjectProcedure;
+
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
@@ -33,6 +37,7 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 
@@ -41,14 +46,27 @@
  * ahat.
  */
 class AhatSnapshot {
-  private Snapshot mSnapshot;
-  private List<Heap> mHeaps;
+  private final Snapshot mSnapshot;
+  private final List<Heap> mHeaps;
 
   // Map from Instance to the list of Instances it immediately dominates.
-  private Map<Instance, List<Instance>> mDominated;
+  private final Map<Instance, List<Instance>> mDominated
+    = new HashMap<Instance, List<Instance>>();
 
-  private Site mRootSite;
-  private Map<Heap, Long> mHeapSizes;
+  // Collection of objects whose immediate dominator is the SENTINEL_ROOT.
+  private final List<Instance> mRooted = new ArrayList<Instance>();
+
+  // Map from roots to their types.
+  // Instances are only included if they are roots, and the collection of root
+  // types is guaranteed to be non-empty.
+  private final Map<Instance, Collection<RootType>> mRoots
+    = new HashMap<Instance, Collection<RootType>>();
+
+  private final Site mRootSite = new Site("ROOT");
+  private final Map<Heap, Long> mHeapSizes = new HashMap<Heap, Long>();
+
+  private final List<InstanceUtils.NativeAllocation> mNativeAllocations
+    = new ArrayList<InstanceUtils.NativeAllocation>();
 
   /**
    * Create an AhatSnapshot from an hprof file.
@@ -67,45 +85,73 @@
   private AhatSnapshot(Snapshot snapshot) {
     mSnapshot = snapshot;
     mHeaps = new ArrayList<Heap>(mSnapshot.getHeaps());
-    mDominated = new HashMap<Instance, List<Instance>>();
-    mRootSite = new Site("ROOT");
-    mHeapSizes = new HashMap<Heap, Long>();
 
-    ClassObj javaLangClass = mSnapshot.findClass("java.lang.Class");
+    final ClassObj javaLangClass = mSnapshot.findClass("java.lang.Class");
     for (Heap heap : mHeaps) {
-      long total = 0;
-      for (Instance inst : Iterables.concat(heap.getClasses(), heap.getInstances())) {
-        Instance dominator = inst.getImmediateDominator();
-        if (dominator != null) {
-          total += inst.getSize();
+      // Use a single element array for the total to act as a reference to a
+      // long.
+      final long[] total = new long[]{0};
+      TObjectProcedure<Instance> processInstance = new TObjectProcedure<Instance>() {
+        @Override
+        public boolean execute(Instance inst) {
+          Instance dominator = inst.getImmediateDominator();
+          if (dominator != null) {
+            total[0] += inst.getSize();
 
-          // Properly label the class of a class object.
-          if (inst instanceof ClassObj && javaLangClass != null && inst.getClassObj() == null) {
-              inst.setClassId(javaLangClass.getId());
-          }
+            if (dominator == Snapshot.SENTINEL_ROOT) {
+              mRooted.add(inst);
+            }
 
-          // Update dominated instances.
-          List<Instance> instances = mDominated.get(dominator);
-          if (instances == null) {
-            instances = new ArrayList<Instance>();
-            mDominated.put(dominator, instances);
-          }
-          instances.add(inst);
+            // Properly label the class of a class object.
+            if (inst instanceof ClassObj && javaLangClass != null && inst.getClassObj() == null) {
+                inst.setClassId(javaLangClass.getId());
+            }
 
-          // Update sites.
-          List<StackFrame> path = Collections.emptyList();
-          StackTrace stack = getStack(inst);
-          int stackId = getStackTraceSerialNumber(stack);
-          if (stack != null) {
-            StackFrame[] frames = getStackFrames(stack);
-            if (frames != null && frames.length > 0) {
-              path = Lists.reverse(Arrays.asList(frames));
+            // Update dominated instances.
+            List<Instance> instances = mDominated.get(dominator);
+            if (instances == null) {
+              instances = new ArrayList<Instance>();
+              mDominated.put(dominator, instances);
+            }
+            instances.add(inst);
+
+            // Update sites.
+            List<StackFrame> path = Collections.emptyList();
+            StackTrace stack = getStack(inst);
+            int stackId = getStackTraceSerialNumber(stack);
+            if (stack != null) {
+              StackFrame[] frames = getStackFrames(stack);
+              if (frames != null && frames.length > 0) {
+                path = Lists.reverse(Arrays.asList(frames));
+              }
+            }
+            mRootSite.add(stackId, 0, path.iterator(), inst);
+
+            // Update native allocations.
+            InstanceUtils.NativeAllocation alloc = InstanceUtils.getNativeAllocation(inst);
+            if (alloc != null) {
+              mNativeAllocations.add(alloc);
             }
           }
-          mRootSite.add(stackId, 0, path.iterator(), inst);
+          return true;
         }
+      };
+      for (Instance instance : heap.getClasses()) {
+        processInstance.execute(instance);
       }
-      mHeapSizes.put(heap, total);
+      heap.forEachInstance(processInstance);
+      mHeapSizes.put(heap, total[0]);
+    }
+
+    // Record the roots and their types.
+    for (RootObj root : snapshot.getGCRoots()) {
+      Instance inst = root.getReferredInstance();
+      Collection<RootType> types = mRoots.get(inst);
+      if (types == null) {
+        types = new HashSet<RootType>();
+        mRoots.put(inst, types);
+      }
+      types.add(root.getRootType());
     }
   }
 
@@ -126,8 +172,27 @@
     return mSnapshot.getHeap(name);
   }
 
-  public Collection<RootObj> getGCRoots() {
-    return mSnapshot.getGCRoots();
+  /**
+   * Returns a collection of instances whose immediate dominator is the
+   * SENTINEL_ROOT.
+   */
+  public List<Instance> getRooted() {
+    return mRooted;
+  }
+
+  /**
+   * Returns true if the given instance is a root.
+   */
+  public boolean isRoot(Instance inst) {
+    return mRoots.containsKey(inst);
+  }
+
+  /**
+   * Returns the list of root types for the given instance, or null if the
+   * instance is not a root.
+   */
+  public Collection<RootType> getRootTypes(Instance inst) {
+    return mRoots.get(inst);
   }
 
   public List<Heap> getHeaps() {
@@ -213,4 +278,9 @@
     }
     return site;
   }
+
+  // Return a list of known native allocations in the snapshot.
+  public List<InstanceUtils.NativeAllocation> getNativeAllocations() {
+    return mNativeAllocations;
+  }
 }
diff --git a/tools/ahat/src/Doc.java b/tools/ahat/src/Doc.java
index 7fa70de..5a70c4c 100644
--- a/tools/ahat/src/Doc.java
+++ b/tools/ahat/src/Doc.java
@@ -25,27 +25,27 @@
   /**
    * Output the title of the page.
    */
-  public void title(String format, Object... args);
+  void title(String format, Object... args);
 
   /**
    * Print a line of text for a page menu.
    */
-  public void menu(DocString string);
+  void menu(DocString string);
 
   /**
    * Start a new section with the given title.
    */
-  public void section(String title);
+  void section(String title);
 
   /**
    * Print a line of text in a normal font.
    */
-  public void println(DocString string);
+  void println(DocString string);
 
   /**
    * Print a line of text in a large font that is easy to see and click on.
    */
-  public void big(DocString string);
+  void big(DocString string);
 
   /**
    * Start a table with the given columns.
@@ -55,7 +55,7 @@
    * This should be followed by calls to the 'row' method to fill in the table
    * contents and the 'end' method to end the table.
    */
-  public void table(Column... columns);
+  void table(Column... columns);
 
   /**
    * Start a table with the following heading structure:
@@ -68,14 +68,14 @@
    * This should be followed by calls to the 'row' method to fill in the table
    * contents and the 'end' method to end the table.
    */
-  public void table(DocString description, List<Column> subcols, List<Column> cols);
+  void table(DocString description, List<Column> subcols, List<Column> cols);
 
   /**
    * Add a row to the currently active table.
    * The number of values must match the number of columns provided for the
    * currently active table.
    */
-  public void row(DocString... values);
+  void row(DocString... values);
 
   /**
    * Start a new description list.
@@ -83,15 +83,15 @@
    * This should be followed by calls to description() and finally a call to
    * end().
    */
-  public void descriptions();
+  void descriptions();
 
   /**
    * Add a description to the currently active description list.
    */
-  public void description(DocString key, DocString value);
+  void description(DocString key, DocString value);
 
   /**
    * End the currently active table or description list.
    */
-  public void end();
+  void end();
 }
diff --git a/tools/ahat/src/DominatedList.java b/tools/ahat/src/DominatedList.java
index 123d8be..7a673f5 100644
--- a/tools/ahat/src/DominatedList.java
+++ b/tools/ahat/src/DominatedList.java
@@ -21,71 +21,35 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 /**
  * Class for rendering a list of instances dominated by a single instance in a
  * pretty way.
  */
 class DominatedList {
-  private static final int kIncrAmount = 100;
-  private static final int kDefaultShown = 100;
-
   /**
    * Render a table to the given HtmlWriter showing a pretty list of
    * instances.
    *
-   * Rather than show all of the instances (which may be very many), we use
-   * the query parameter "dominated" to specify a limited number of
-   * instances to show. The 'uri' parameter should be the current page URI, so
-   * that we can add links to "show more" and "show less" objects that go to
-   * the same page with only the number of objects adjusted.
+   * @param snapshot  the snapshot where the instances reside
+   * @param doc       the document to render the dominated list to
+   * @param query     the current page query
+   * @param id        a unique identifier to use for the dominated list in the current page
+   * @param instances the collection of instances to generate a list for
    */
-  public static void render(final AhatSnapshot snapshot, Doc doc,
-      Collection<Instance> instances, Query query) {
+  public static void render(final AhatSnapshot snapshot,
+      Doc doc, Query query, String id, Collection<Instance> instances) {
     List<Instance> insts = new ArrayList<Instance>(instances);
     Collections.sort(insts, Sort.defaultInstanceCompare(snapshot));
-
-    int numInstancesToShow = getNumInstancesToShow(query, insts.size());
-    List<Instance> shown = new ArrayList<Instance>(insts.subList(0, numInstancesToShow));
-    List<Instance> hidden = insts.subList(numInstancesToShow, insts.size());
-
-    // Add 'null' as a marker for "all the rest of the objects".
-    if (!hidden.isEmpty()) {
-      shown.add(null);
-    }
-    HeapTable.render(doc, new TableConfig(snapshot, hidden), snapshot, shown);
-
-    if (insts.size() > kDefaultShown) {
-      printMenu(doc, query, numInstancesToShow, insts.size());
-    }
+    HeapTable.render(doc, query, id, new TableConfig(snapshot), snapshot, insts);
   }
 
   private static class TableConfig implements HeapTable.TableConfig<Instance> {
     AhatSnapshot mSnapshot;
 
-    // Map from heap name to the total size of the instances not shown in the
-    // table.
-    Map<Heap, Long> mHiddenSizes;
-
-    public TableConfig(AhatSnapshot snapshot, List<Instance> hidden) {
+    public TableConfig(AhatSnapshot snapshot) {
       mSnapshot = snapshot;
-      mHiddenSizes = new HashMap<Heap, Long>();
-      for (Heap heap : snapshot.getHeaps()) {
-        mHiddenSizes.put(heap, 0L);
-      }
-
-      if (!hidden.isEmpty()) {
-        for (Instance inst : hidden) {
-          for (Heap heap : snapshot.getHeaps()) {
-            int index = snapshot.getHeapIndex(heap);
-            long size = inst.getRetainedSize(index);
-            mHiddenSizes.put(heap, mHiddenSizes.get(heap) + size);
-          }
-        }
-      }
     }
 
     @Override
@@ -95,9 +59,6 @@
 
     @Override
     public long getSize(Instance element, Heap heap) {
-      if (element == null) {
-        return mHiddenSizes.get(heap);
-      }
       int index = mSnapshot.getHeapIndex(heap);
       return element.getRetainedSize(index);
     }
@@ -110,56 +71,10 @@
         }
 
         public DocString render(Instance element) {
-          if (element == null) {
-            return DocString.text("...");
-          } else {
-            return Value.render(element);
-          }
+          return Value.render(mSnapshot, element);
         }
       };
       return Collections.singletonList(value);
     }
   }
-
-  // Figure out how many objects to show based on the query parameter.
-  // The resulting value is guaranteed to be at least zero, and no greater
-  // than the number of total objects.
-  private static int getNumInstancesToShow(Query query, int totalNumInstances) {
-    String value = query.get("dominated", null);
-    try {
-      int count = Math.min(totalNumInstances, Integer.parseInt(value));
-      return Math.max(0, count);
-    } catch (NumberFormatException e) {
-      // We can't parse the value as a number. Ignore it.
-    }
-    return Math.min(kDefaultShown, totalNumInstances);
-  }
-
-  // Print a menu line after the table to control how many objects are shown.
-  // It has the form:
-  //  (showing X of Y objects - show none - show less - show more - show all)
-  private static void printMenu(Doc doc, Query query, int shown, int all) {
-    DocString menu = new DocString();
-    menu.appendFormat("(%d of %d objects shown - ", shown, all);
-    if (shown > 0) {
-      int less = Math.max(0, shown - kIncrAmount);
-      menu.appendLink(query.with("dominated", 0), DocString.text("show none"));
-      menu.append(" - ");
-      menu.appendLink(query.with("dominated", less), DocString.text("show less"));
-      menu.append(" - ");
-    } else {
-      menu.append("show none - show less - ");
-    }
-    if (shown < all) {
-      int more = Math.min(shown + kIncrAmount, all);
-      menu.appendLink(query.with("dominated", more), DocString.text("show more"));
-      menu.append(" - ");
-      menu.appendLink(query.with("dominated", all), DocString.text("show all"));
-      menu.append(")");
-    } else {
-      menu.append("show more - show all)");
-    }
-    doc.println(menu);
-  }
 }
-
diff --git a/tools/ahat/src/HeapTable.java b/tools/ahat/src/HeapTable.java
index 37d5816..ed11d17 100644
--- a/tools/ahat/src/HeapTable.java
+++ b/tools/ahat/src/HeapTable.java
@@ -18,7 +18,9 @@
 
 import com.android.tools.perflib.heap.Heap;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 /**
  * Class for rendering a table that includes sizes of some kind for each heap.
@@ -27,22 +29,27 @@
   /**
    * Configuration for a value column of a heap table.
    */
-  public static interface ValueConfig<T> {
-    public String getDescription();
-    public DocString render(T element);
+  public interface ValueConfig<T> {
+    String getDescription();
+    DocString render(T element);
   }
 
   /**
    * Configuration for the HeapTable.
    */
-  public static interface TableConfig<T> {
-    public String getHeapsDescription();
-    public long getSize(T element, Heap heap);
-    public List<ValueConfig<T>> getValueConfigs();
+  public interface TableConfig<T> {
+    String getHeapsDescription();
+    long getSize(T element, Heap heap);
+    List<ValueConfig<T>> getValueConfigs();
   }
 
-  public static <T> void render(Doc doc, TableConfig<T> config,
-      AhatSnapshot snapshot, List<T> elements) {
+  /**
+   * Render the table to the given document.
+   * @param query - The page query.
+   * @param id - A unique identifier for the table on the page.
+   */
+  public static <T> void render(Doc doc, Query query, String id,
+      TableConfig<T> config, AhatSnapshot snapshot, List<T> elements) {
     // Only show the heaps that have non-zero entries.
     List<Heap> heaps = new ArrayList<Heap>();
     for (Heap heap : snapshot.getHeaps()) {
@@ -68,9 +75,10 @@
     }
     doc.table(DocString.text(config.getHeapsDescription()), subcols, cols);
 
-    // Print the entries.
+    // Print the entries up to the selected limit.
+    SubsetSelector<T> selector = new SubsetSelector(query, id, elements);
     ArrayList<DocString> vals = new ArrayList<DocString>();
-    for (T elem : elements) {
+    for (T elem : selector.selected()) {
       vals.clear();
       long total = 0;
       for (Heap heap : heaps) {
@@ -87,7 +95,39 @@
       }
       doc.row(vals.toArray(new DocString[0]));
     }
+
+    // Print a summary of the remaining entries if there are any.
+    List<T> remaining = selector.remaining();
+    if (!remaining.isEmpty()) {
+      Map<Heap, Long> summary = new HashMap<Heap, Long>();
+      for (Heap heap : heaps) {
+        summary.put(heap, 0L);
+      }
+
+      for (T elem : remaining) {
+        for (Heap heap : heaps) {
+          summary.put(heap, summary.get(heap) + config.getSize(elem, heap));
+        }
+      }
+
+      vals.clear();
+      long total = 0;
+      for (Heap heap : heaps) {
+        long size = summary.get(heap);
+        total += size;
+        vals.add(DocString.format("%,14d", size));
+      }
+      if (showTotal) {
+        vals.add(DocString.format("%,14d", total));
+      }
+
+      for (ValueConfig<T> value : values) {
+        vals.add(DocString.text("..."));
+      }
+      doc.row(vals.toArray(new DocString[0]));
+    }
     doc.end();
+    selector.render(doc);
   }
 
   // Returns true if the given heap has a non-zero size entry.
diff --git a/tools/ahat/src/HelpHandler.java b/tools/ahat/src/HelpHandler.java
new file mode 100644
index 0000000..8de3c85
--- /dev/null
+++ b/tools/ahat/src/HelpHandler.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.google.common.io.ByteStreams;
+import com.sun.net.httpserver.HttpExchange;
+import com.sun.net.httpserver.HttpHandler;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+
+/**
+ * HelpHandler.
+ *
+ * HttpHandler to show the help page.
+ */
+class HelpHandler implements HttpHandler {
+
+  @Override
+  public void handle(HttpExchange exchange) throws IOException {
+    ClassLoader loader = HelpHandler.class.getClassLoader();
+    exchange.getResponseHeaders().add("Content-Type", "text/html;charset=utf-8");
+    exchange.sendResponseHeaders(200, 0);
+    PrintStream ps = new PrintStream(exchange.getResponseBody());
+    HtmlDoc doc = new HtmlDoc(ps, DocString.text("ahat"), DocString.uri("style.css"));
+    doc.menu(Menu.getMenu());
+
+    InputStream is = loader.getResourceAsStream("help.html");
+    if (is == null) {
+      ps.println("No help available.");
+    } else {
+      ByteStreams.copy(is, ps);
+    }
+
+    doc.close();
+    ps.close();
+  }
+}
diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java
index eb9e363..8769d11 100644
--- a/tools/ahat/src/InstanceUtils.java
+++ b/tools/ahat/src/InstanceUtils.java
@@ -19,8 +19,10 @@
 import com.android.tools.perflib.heap.ArrayInstance;
 import com.android.tools.perflib.heap.ClassInstance;
 import com.android.tools.perflib.heap.ClassObj;
+import com.android.tools.perflib.heap.Heap;
 import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.Type;
+
 import java.awt.image.BufferedImage;
 
 /**
@@ -31,7 +33,7 @@
    * Returns true if the given instance is an instance of a class with the
    * given name.
    */
-  public static boolean isInstanceOfClass(Instance inst, String className) {
+  private static boolean isInstanceOfClass(Instance inst, String className) {
     ClassObj cls = (inst == null) ? null : inst.getClassObj();
     return (cls != null && className.equals(cls.getClassName()));
   }
@@ -41,11 +43,11 @@
    * Returns null if the instance is not a byte array.
    */
   private static byte[] asByteArray(Instance inst) {
-    if (! (inst instanceof ArrayInstance)) {
+    if (!(inst instanceof ArrayInstance)) {
       return null;
     }
 
-    ArrayInstance array = (ArrayInstance)inst;
+    ArrayInstance array = (ArrayInstance) inst;
     if (array.getArrayType() != Type.BYTE) {
       return null;
     }
@@ -53,21 +55,37 @@
     Object[] objs = array.getValues();
     byte[] bytes = new byte[objs.length];
     for (int i = 0; i < objs.length; i++) {
-      Byte b = (Byte)objs[i];
+      Byte b = (Byte) objs[i];
       bytes[i] = b.byteValue();
     }
     return bytes;
   }
 
 
-  // Read the string value from an hprof Instance.
-  // Returns null if the object can't be interpreted as a string.
+  /**
+   * Read the string value from an hprof Instance.
+   * Returns null if the object can't be interpreted as a string.
+   */
   public static String asString(Instance inst) {
-    if (!isInstanceOfClass(inst, "java.lang.String")) {
-      return null;
+    return asString(inst, -1);
+  }
+
+  /**
+   * Read the string value from an hprof Instance.
+   * Returns null if the object can't be interpreted as a string.
+   * The returned string is truncated to maxChars characters.
+   * If maxChars is negative, the returned string is not truncated.
+   */
+  public static String asString(Instance inst, int maxChars) {
+    // The inst object could either be a java.lang.String or a char[]. If it
+    // is a char[], use that directly as the value, otherwise use the value
+    // field of the string object. The field accesses for count and offset
+    // later on will work okay regardless of what type the inst object is.
+    Object value = inst;
+    if (isInstanceOfClass(inst, "java.lang.String")) {
+      value = getField(inst, "value");
     }
 
-    Object value = getField(inst, "value");
     if (!(value instanceof ArrayInstance)) {
       return null;
     }
@@ -77,17 +95,17 @@
       return null;
     }
 
-    // TODO: When perflib provides a better way to get the length of the
-    // array, we should use that here.
-    int numChars = chars.getValues().length;
+    int numChars = chars.getLength();
     int count = getIntField(inst, "count", numChars);
-    int offset = getIntField(inst, "offset", 0);
-    int end = offset + count - 1;
-
     if (count == 0) {
       return "";
     }
+    if (0 <= maxChars && maxChars < count) {
+      count = maxChars;
+    }
 
+    int offset = getIntField(inst, "offset", 0);
+    int end = offset + count - 1;
     if (offset >= 0 && offset < numChars && end >= 0 && end < numChars) {
       return new String(chars.asCharArray(offset, count));
     }
@@ -104,12 +122,12 @@
       return null;
     }
 
-    Integer width = getIntField(inst, "mWidth");
+    Integer width = getIntField(inst, "mWidth", null);
     if (width == null) {
       return null;
     }
 
-    Integer height = getIntField(inst, "mHeight");
+    Integer height = getIntField(inst, "mHeight", null);
     if (height == null) {
       return null;
     }
@@ -124,10 +142,10 @@
     int[] abgr = new int[height * width];
     for (int i = 0; i < abgr.length; i++) {
       abgr[i] = (
-          (((int)buffer[i * 4 + 3] & 0xFF) << 24) +
-          (((int)buffer[i * 4 + 0] & 0xFF) << 16) +
-          (((int)buffer[i * 4 + 1] & 0xFF) << 8) +
-          ((int)buffer[i * 4 + 2] & 0xFF));
+          (((int) buffer[i * 4 + 3] & 0xFF) << 24)
+          + (((int) buffer[i * 4 + 0] & 0xFF) << 16)
+          + (((int) buffer[i * 4 + 1] & 0xFF) << 8)
+          + ((int) buffer[i * 4 + 2] & 0xFF));
     }
 
     BufferedImage bitmap = new BufferedImage(
@@ -166,29 +184,35 @@
     if (!(value instanceof Instance)) {
       return null;
     }
-    return (Instance)value;
+    return (Instance) value;
   }
 
   /**
    * Read an int field of an instance.
    * The field is assumed to be an int type.
-   * Returns null if the field value is not an int or could not be read.
+   * Returns <code>def</code> if the field value is not an int or could not be
+   * read.
    */
-  private static Integer getIntField(Instance inst, String fieldName) {
+  private static Integer getIntField(Instance inst, String fieldName, Integer def) {
     Object value = getField(inst, fieldName);
     if (!(value instanceof Integer)) {
-      return null;
+      return def;
     }
-    return (Integer)value;
+    return (Integer) value;
   }
 
   /**
-   * Read an int field of an instance, returning a default value if the field
-   * was not an int or could not be read.
+   * Read a long field of an instance.
+   * The field is assumed to be a long type.
+   * Returns <code>def</code> if the field value is not an long or could not
+   * be read.
    */
-  private static int getIntField(Instance inst, String fieldName, int def) {
-    Integer value = getIntField(inst, fieldName);
-    return value == null ? def : value;
+  private static Long getLongField(Instance inst, String fieldName, Long def) {
+    Object value = getField(inst, fieldName);
+    if (!(value instanceof Long)) {
+      return def;
+    }
+    return (Long) value;
   }
 
   /**
@@ -201,7 +225,7 @@
     if (!(value instanceof Instance)) {
       return null;
     }
-    return asByteArray((Instance)value);
+    return asByteArray((Instance) value);
   }
 
   // Return the bitmap instance associated with this object, or null if there
@@ -218,9 +242,9 @@
     }
 
     if (inst instanceof ArrayInstance) {
-      ArrayInstance array = (ArrayInstance)inst;
-      if (array.getArrayType() == Type.BYTE && inst.getHardReferences().size() == 1) {
-        Instance ref = inst.getHardReferences().get(0);
+      ArrayInstance array = (ArrayInstance) inst;
+      if (array.getArrayType() == Type.BYTE && inst.getHardReverseReferences().size() == 1) {
+        Instance ref = inst.getHardReverseReferences().get(0);
         ClassObj clsref = ref.getClassObj();
         if (clsref != null && "android.graphics.Bitmap".equals(clsref.getClassName())) {
           return ref;
@@ -230,18 +254,107 @@
     return null;
   }
 
+  private static boolean isJavaLangRefReference(Instance inst) {
+    ClassObj cls = (inst == null) ? null : inst.getClassObj();
+    while (cls != null) {
+      if ("java.lang.ref.Reference".equals(cls.getClassName())) {
+        return true;
+      }
+      cls = cls.getSuperClassObj();
+    }
+    return false;
+  }
+
+  public static Instance getReferent(Instance inst) {
+    if (isJavaLangRefReference(inst)) {
+      return getRefField(inst, "referent");
+    }
+    return null;
+  }
+
   /**
    * Assuming inst represents a DexCache object, return the dex location for
    * that dex cache. Returns null if the given instance doesn't represent a
    * DexCache object or the location could not be found.
+   * If maxChars is non-negative, the returned location is truncated to
+   * maxChars in length.
    */
-  public static String getDexCacheLocation(Instance inst) {
+  public static String getDexCacheLocation(Instance inst, int maxChars) {
     if (isInstanceOfClass(inst, "java.lang.DexCache")) {
       Instance location = getRefField(inst, "location");
       if (location != null) {
-        return asString(location);
+        return asString(location, maxChars);
       }
     }
     return null;
   }
+
+  public static class NativeAllocation {
+    public long size;
+    public Heap heap;
+    public long pointer;
+    public Instance referent;
+
+    public NativeAllocation(long size, Heap heap, long pointer, Instance referent) {
+      this.size = size;
+      this.heap = heap;
+      this.pointer = pointer;
+      this.referent = referent;
+    }
+  }
+
+  /**
+   * Assuming inst represents a NativeAllocation, return information about the
+   * native allocation. Returns null if the given instance doesn't represent a
+   * native allocation.
+   */
+  public static NativeAllocation getNativeAllocation(Instance inst) {
+    if (!isInstanceOfClass(inst, "libcore.util.NativeAllocationRegistry$CleanerThunk")) {
+      return null;
+    }
+
+    Long pointer = InstanceUtils.getLongField(inst, "nativePtr", null);
+    if (pointer == null) {
+      return null;
+    }
+
+    // Search for the registry field of inst.
+    // Note: We know inst as an instance of ClassInstance because we already
+    // read the nativePtr field from it.
+    Instance registry = null;
+    for (ClassInstance.FieldValue field : ((ClassInstance) inst).getValues()) {
+      Object fieldValue = field.getValue();
+      if (fieldValue instanceof Instance) {
+        Instance fieldInst = (Instance) fieldValue;
+        if (isInstanceOfClass(fieldInst, "libcore.util.NativeAllocationRegistry")) {
+          registry = fieldInst;
+          break;
+        }
+      }
+    }
+
+    if (registry == null) {
+      return null;
+    }
+
+    Long size = InstanceUtils.getLongField(registry, "size", null);
+    if (size == null) {
+      return null;
+    }
+
+    Instance referent = null;
+    for (Instance ref : inst.getHardReverseReferences()) {
+      if (isInstanceOfClass(ref, "sun.misc.Cleaner")) {
+        referent = InstanceUtils.getReferent(ref);
+        if (referent != null) {
+          break;
+        }
+      }
+    }
+
+    if (referent == null) {
+      return null;
+    }
+    return new NativeAllocation(size, inst.getHeap(), pointer, referent);
+  }
 }
diff --git a/tools/ahat/src/Main.java b/tools/ahat/src/Main.java
index 1563aa0..fdc5a86 100644
--- a/tools/ahat/src/Main.java
+++ b/tools/ahat/src/Main.java
@@ -67,19 +67,22 @@
       return;
     }
 
-    System.out.println("Processing hprof file...");
-    AhatSnapshot ahat = AhatSnapshot.fromHprof(hprof);
-
+    // Launch the server before parsing the hprof file so we get
+    // BindExceptions quickly.
     InetAddress loopback = InetAddress.getLoopbackAddress();
     InetSocketAddress addr = new InetSocketAddress(loopback, port);
     HttpServer server = HttpServer.create(addr, 0);
-    server.createContext("/", new OverviewHandler(ahat, hprof));
-    server.createContext("/roots", new RootsHandler(ahat));
-    server.createContext("/object", new ObjectHandler(ahat));
-    server.createContext("/objects", new ObjectsHandler(ahat));
-    server.createContext("/site", new SiteHandler(ahat));
+
+    System.out.println("Processing hprof file...");
+    AhatSnapshot ahat = AhatSnapshot.fromHprof(hprof);
+    server.createContext("/", new AhatHttpHandler(new OverviewHandler(ahat, hprof)));
+    server.createContext("/rooted", new AhatHttpHandler(new RootedHandler(ahat)));
+    server.createContext("/object", new AhatHttpHandler(new ObjectHandler(ahat)));
+    server.createContext("/objects", new AhatHttpHandler(new ObjectsHandler(ahat)));
+    server.createContext("/site", new AhatHttpHandler(new SiteHandler(ahat)));
+    server.createContext("/native", new AhatHttpHandler(new NativeAllocationsHandler(ahat)));
     server.createContext("/bitmap", new BitmapHandler(ahat));
-    server.createContext("/help", new StaticHandler("help.html", "text/html"));
+    server.createContext("/help", new HelpHandler());
     server.createContext("/style.css", new StaticHandler("style.css", "text/css"));
     server.setExecutor(Executors.newFixedThreadPool(1));
     System.out.println("Server started on localhost:" + port);
diff --git a/tools/ahat/src/Menu.java b/tools/ahat/src/Menu.java
new file mode 100644
index 0000000..232b849
--- /dev/null
+++ b/tools/ahat/src/Menu.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+/**
+ * A menu showed in the UI that can be used to jump to common pages.
+ */
+class Menu {
+  private static DocString mMenu =
+    DocString.link(DocString.uri("/"), DocString.text("overview"))
+      .append(" - ")
+      .appendLink(DocString.uri("rooted"), DocString.text("rooted"))
+      .append(" - ")
+      .appendLink(DocString.uri("sites"), DocString.text("allocations"))
+      .append(" - ")
+      .appendLink(DocString.uri("native"), DocString.text("native"))
+      .append(" - ")
+      .appendLink(DocString.uri("help"), DocString.text("help"));
+
+  /**
+   * Returns the menu as a DocString.
+   */
+  public static DocString getMenu() {
+    return mMenu;
+  }
+}
diff --git a/tools/ahat/src/NativeAllocationsHandler.java b/tools/ahat/src/NativeAllocationsHandler.java
new file mode 100644
index 0000000..17407e1
--- /dev/null
+++ b/tools/ahat/src/NativeAllocationsHandler.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+class NativeAllocationsHandler implements AhatHandler {
+  private static final String ALLOCATIONS_ID = "allocations";
+
+  private AhatSnapshot mSnapshot;
+
+  public NativeAllocationsHandler(AhatSnapshot snapshot) {
+    mSnapshot = snapshot;
+  }
+
+  @Override
+  public void handle(Doc doc, Query query) throws IOException {
+    List<InstanceUtils.NativeAllocation> allocs = mSnapshot.getNativeAllocations();
+
+    doc.title("Registered Native Allocations");
+
+    doc.section("Overview");
+    long totalSize = 0;
+    for (InstanceUtils.NativeAllocation alloc : allocs) {
+      totalSize += alloc.size;
+    }
+    doc.descriptions();
+    doc.description(DocString.text("Number of Registered Native Allocations"),
+        DocString.format("%,14d", allocs.size()));
+    doc.description(DocString.text("Total Size of Registered Native Allocations"),
+        DocString.format("%,14d", totalSize));
+    doc.end();
+
+    doc.section("List of Allocations");
+    if (allocs.isEmpty()) {
+      doc.println(DocString.text("(none)"));
+    } else {
+      doc.table(
+          new Column("Size", Column.Align.RIGHT),
+          new Column("Heap"),
+          new Column("Native Pointer"),
+          new Column("Referent"));
+      Comparator<InstanceUtils.NativeAllocation> compare
+        = new Sort.WithPriority<InstanceUtils.NativeAllocation>(
+            new Sort.NativeAllocationByHeapName(),
+            new Sort.NativeAllocationBySize());
+      Collections.sort(allocs, compare);
+      SubsetSelector<InstanceUtils.NativeAllocation> selector
+        = new SubsetSelector(query, ALLOCATIONS_ID, allocs);
+      for (InstanceUtils.NativeAllocation alloc : selector.selected()) {
+        doc.row(
+            DocString.format("%,14d", alloc.size),
+            DocString.text(alloc.heap.getName()),
+            DocString.format("0x%x", alloc.pointer),
+            Value.render(mSnapshot, alloc.referent));
+      }
+
+      // Print a summary of the remaining entries if there are any.
+      List<InstanceUtils.NativeAllocation> remaining = selector.remaining();
+      if (!remaining.isEmpty()) {
+        long total = 0;
+        for (InstanceUtils.NativeAllocation alloc : remaining) {
+          total += alloc.size;
+        }
+
+        doc.row(
+            DocString.format("%,14d", total),
+            DocString.text("..."),
+            DocString.text("..."),
+            DocString.text("..."));
+      }
+
+      doc.end();
+      selector.render(doc);
+    }
+  }
+}
+
diff --git a/tools/ahat/src/ObjectHandler.java b/tools/ahat/src/ObjectHandler.java
index 5e321e2..4df1be5 100644
--- a/tools/ahat/src/ObjectHandler.java
+++ b/tools/ahat/src/ObjectHandler.java
@@ -23,15 +23,30 @@
 import com.android.tools.perflib.heap.Heap;
 import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.RootObj;
+import com.android.tools.perflib.heap.RootType;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 
-class ObjectHandler extends AhatHandler {
+class ObjectHandler implements AhatHandler {
+
+  private static final String ARRAY_ELEMENTS_ID = "elements";
+  private static final String DOMINATOR_PATH_ID = "dompath";
+  private static final String ALLOCATION_SITE_ID = "frames";
+  private static final String DOMINATED_OBJECTS_ID = "dominated";
+  private static final String INSTANCE_FIELDS_ID = "ifields";
+  private static final String STATIC_FIELDS_ID = "sfields";
+  private static final String HARD_REFS_ID = "refs";
+  private static final String SOFT_REFS_ID = "srefs";
+
+  private AhatSnapshot mSnapshot;
+
   public ObjectHandler(AhatSnapshot snapshot) {
-    super(snapshot);
+    mSnapshot = snapshot;
   }
 
   @Override
@@ -44,100 +59,137 @@
     }
 
     doc.title("Object %08x", inst.getUniqueId());
-    doc.big(Value.render(inst));
+    doc.big(Value.render(mSnapshot, inst));
 
-    printAllocationSite(doc, inst);
-    printDominatorPath(doc, inst);
+    printAllocationSite(doc, query, inst);
+    printDominatorPath(doc, query, inst);
 
     doc.section("Object Info");
     ClassObj cls = inst.getClassObj();
     doc.descriptions();
-    doc.description(DocString.text("Class"), Value.render(cls));
+    doc.description(DocString.text("Class"), Value.render(mSnapshot, cls));
     doc.description(DocString.text("Size"), DocString.format("%d", inst.getSize()));
     doc.description(
         DocString.text("Retained Size"),
         DocString.format("%d", inst.getTotalRetainedSize()));
     doc.description(DocString.text("Heap"), DocString.text(inst.getHeap().getName()));
+
+    Collection<RootType> rootTypes = mSnapshot.getRootTypes(inst);
+    if (rootTypes != null) {
+      DocString types = new DocString();
+      String comma = "";
+      for (RootType type : rootTypes) {
+        types.append(comma);
+        types.append(type.getName());
+        comma = ", ";
+      }
+      doc.description(DocString.text("Root Types"), types);
+    }
+
     doc.end();
 
     printBitmap(doc, inst);
     if (inst instanceof ClassInstance) {
-      printClassInstanceFields(doc, (ClassInstance)inst);
+      printClassInstanceFields(doc, query, mSnapshot, (ClassInstance)inst);
     } else if (inst instanceof ArrayInstance) {
-      printArrayElements(doc, (ArrayInstance)inst);
+      printArrayElements(doc, query, mSnapshot, (ArrayInstance)inst);
     } else if (inst instanceof ClassObj) {
-      printClassInfo(doc, (ClassObj)inst);
+      printClassInfo(doc, query, mSnapshot, (ClassObj)inst);
     }
-    printReferences(doc, inst);
+    printReferences(doc, query, mSnapshot, inst);
     printDominatedObjects(doc, query, inst);
   }
 
-  private static void printClassInstanceFields(Doc doc, ClassInstance inst) {
+  private static void printClassInstanceFields(
+      Doc doc, Query query, AhatSnapshot snapshot, ClassInstance inst) {
     doc.section("Fields");
     doc.table(new Column("Type"), new Column("Name"), new Column("Value"));
-    for (ClassInstance.FieldValue field : inst.getValues()) {
+    SubsetSelector<ClassInstance.FieldValue> selector
+      = new SubsetSelector(query, INSTANCE_FIELDS_ID, inst.getValues());
+    for (ClassInstance.FieldValue field : selector.selected()) {
       doc.row(
           DocString.text(field.getField().getType().toString()),
           DocString.text(field.getField().getName()),
-          Value.render(field.getValue()));
+          Value.render(snapshot, field.getValue()));
     }
     doc.end();
+    selector.render(doc);
   }
 
-  private static void printArrayElements(Doc doc, ArrayInstance array) {
+  private static void printArrayElements(
+      Doc doc, Query query, AhatSnapshot snapshot, ArrayInstance array) {
     doc.section("Array Elements");
     doc.table(new Column("Index", Column.Align.RIGHT), new Column("Value"));
-    Object[] elements = array.getValues();
-    for (int i = 0; i < elements.length; i++) {
-      doc.row(DocString.format("%d", i), Value.render(elements[i]));
+    List<Object> elements = Arrays.asList(array.getValues());
+    SubsetSelector<Object> selector = new SubsetSelector(query, ARRAY_ELEMENTS_ID, elements);
+    int i = 0;
+    for (Object elem : selector.selected()) {
+      doc.row(DocString.format("%d", i), Value.render(snapshot, elem));
+      i++;
     }
     doc.end();
+    selector.render(doc);
   }
 
-  private static void printClassInfo(Doc doc, ClassObj clsobj) {
+  private static void printClassInfo(
+      Doc doc, Query query, AhatSnapshot snapshot, ClassObj clsobj) {
     doc.section("Class Info");
     doc.descriptions();
-    doc.description(DocString.text("Super Class"), Value.render(clsobj.getSuperClassObj()));
-    doc.description(DocString.text("Class Loader"), Value.render(clsobj.getClassLoader()));
+    doc.description(DocString.text("Super Class"),
+        Value.render(snapshot, clsobj.getSuperClassObj()));
+    doc.description(DocString.text("Class Loader"),
+        Value.render(snapshot, clsobj.getClassLoader()));
     doc.end();
 
     doc.section("Static Fields");
     doc.table(new Column("Type"), new Column("Name"), new Column("Value"));
-    for (Map.Entry<Field, Object> field : clsobj.getStaticFieldValues().entrySet()) {
+    List<Map.Entry<Field, Object>> fields
+      = new ArrayList<Map.Entry<Field, Object>>(clsobj.getStaticFieldValues().entrySet());
+    SubsetSelector<Map.Entry<Field, Object>> selector
+      = new SubsetSelector(query, STATIC_FIELDS_ID, fields);
+    for (Map.Entry<Field, Object> field : selector.selected()) {
       doc.row(
           DocString.text(field.getKey().getType().toString()),
           DocString.text(field.getKey().getName()),
-          Value.render(field.getValue()));
+          Value.render(snapshot, field.getValue()));
     }
     doc.end();
+    selector.render(doc);
   }
 
-  private static void printReferences(Doc doc, Instance inst) {
+  private static void printReferences(
+      Doc doc, Query query, AhatSnapshot snapshot, Instance inst) {
     doc.section("Objects with References to this Object");
-    if (inst.getHardReferences().isEmpty()) {
+    if (inst.getHardReverseReferences().isEmpty()) {
       doc.println(DocString.text("(none)"));
     } else {
       doc.table(new Column("Object"));
-      for (Instance ref : inst.getHardReferences()) {
-        doc.row(Value.render(ref));
+      List<Instance> references = inst.getHardReverseReferences();
+      SubsetSelector<Instance> selector = new SubsetSelector(query, HARD_REFS_ID, references);
+      for (Instance ref : selector.selected()) {
+        doc.row(Value.render(snapshot, ref));
       }
       doc.end();
+      selector.render(doc);
     }
 
-    if (inst.getSoftReferences() != null) {
+    if (inst.getSoftReverseReferences() != null) {
       doc.section("Objects with Soft References to this Object");
       doc.table(new Column("Object"));
-      for (Instance ref : inst.getSoftReferences()) {
-        doc.row(Value.render(inst));
+      List<Instance> references = inst.getSoftReverseReferences();
+      SubsetSelector<Instance> selector = new SubsetSelector(query, SOFT_REFS_ID, references);
+      for (Instance ref : selector.selected()) {
+        doc.row(Value.render(snapshot, ref));
       }
       doc.end();
+      selector.render(doc);
     }
   }
 
-  private void printAllocationSite(Doc doc, Instance inst) {
+  private void printAllocationSite(Doc doc, Query query, Instance inst) {
     doc.section("Allocation Site");
     Site site = mSnapshot.getSiteForInstance(inst);
-    SitePrinter.printSite(doc, mSnapshot, site);
+    SitePrinter.printSite(mSnapshot, doc, query, ALLOCATION_SITE_ID, site);
   }
 
   // Draw the bitmap corresponding to this instance if there is one.
@@ -150,7 +202,7 @@
     }
   }
 
-  private void printDominatorPath(Doc doc, Instance inst) {
+  private void printDominatorPath(Doc doc, Query query, Instance inst) {
     doc.section("Dominator Path from Root");
     List<Instance> path = new ArrayList<Instance>();
     for (Instance parent = inst;
@@ -184,23 +236,23 @@
 
           public DocString render(Instance element) {
             if (element == null) {
-              return DocString.link(DocString.uri("roots"), DocString.text("ROOT"));
+              return DocString.link(DocString.uri("rooted"), DocString.text("ROOT"));
             } else {
-              return DocString.text("→ ").append(Value.render(element));
+              return DocString.text("→ ").append(Value.render(mSnapshot, element));
             }
           }
         };
         return Collections.singletonList(value);
       }
     };
-    HeapTable.render(doc, table, mSnapshot, path);
+    HeapTable.render(doc, query, DOMINATOR_PATH_ID, table, mSnapshot, path);
   }
 
   public void printDominatedObjects(Doc doc, Query query, Instance inst) {
     doc.section("Immediately Dominated Objects");
     List<Instance> instances = mSnapshot.getDominated(inst);
     if (instances != null) {
-      DominatedList.render(mSnapshot, doc, instances, query);
+      DominatedList.render(mSnapshot, doc, query, DOMINATED_OBJECTS_ID, instances);
     } else {
       doc.println(DocString.text("(none)"));
     }
diff --git a/tools/ahat/src/ObjectsHandler.java b/tools/ahat/src/ObjectsHandler.java
index 4e9c42e..4cfb0a5 100644
--- a/tools/ahat/src/ObjectsHandler.java
+++ b/tools/ahat/src/ObjectsHandler.java
@@ -22,9 +22,13 @@
 import java.util.Collections;
 import java.util.List;
 
-class ObjectsHandler extends AhatHandler {
+class ObjectsHandler implements AhatHandler {
+  private static final String OBJECTS_ID = "objects";
+
+  private AhatSnapshot mSnapshot;
+
   public ObjectsHandler(AhatSnapshot snapshot) {
-    super(snapshot);
+    mSnapshot = snapshot;
   }
 
   @Override
@@ -51,13 +55,15 @@
         new Column("Size", Column.Align.RIGHT),
         new Column("Heap"),
         new Column("Object"));
-    for (Instance inst : insts) {
+    SubsetSelector<Instance> selector = new SubsetSelector(query, OBJECTS_ID, insts);
+    for (Instance inst : selector.selected()) {
       doc.row(
           DocString.format("%,d", inst.getSize()),
           DocString.text(inst.getHeap().getName()),
-          Value.render(inst));
+          Value.render(mSnapshot, inst));
     }
     doc.end();
+    selector.render(doc);
   }
 }
 
diff --git a/tools/ahat/src/OverviewHandler.java b/tools/ahat/src/OverviewHandler.java
index f49c009..0dbad7e 100644
--- a/tools/ahat/src/OverviewHandler.java
+++ b/tools/ahat/src/OverviewHandler.java
@@ -22,11 +22,15 @@
 import java.util.Collections;
 import java.util.List;
 
-class OverviewHandler extends AhatHandler {
+class OverviewHandler implements AhatHandler {
+
+  private static final String OVERVIEW_ID = "overview";
+
+  private AhatSnapshot mSnapshot;
   private File mHprof;
 
   public OverviewHandler(AhatSnapshot snapshot, File hprof) {
-    super(snapshot);
+    mSnapshot = snapshot;
     mHprof = hprof;
   }
 
@@ -43,18 +47,27 @@
     doc.end();
 
     doc.section("Heap Sizes");
-    printHeapSizes(doc);
+    printHeapSizes(doc, query);
 
-    DocString menu = new DocString();
-    menu.appendLink(DocString.uri("roots"), DocString.text("Roots"));
-    menu.append(" - ");
-    menu.appendLink(DocString.uri("site"), DocString.text("Allocations"));
-    menu.append(" - ");
-    menu.appendLink(DocString.uri("help"), DocString.text("Help"));
-    doc.big(menu);
+    List<InstanceUtils.NativeAllocation> allocs = mSnapshot.getNativeAllocations();
+    if (!allocs.isEmpty()) {
+      doc.section("Registered Native Allocations");
+      long totalSize = 0;
+      for (InstanceUtils.NativeAllocation alloc : allocs) {
+        totalSize += alloc.size;
+      }
+      doc.descriptions();
+      doc.description(DocString.text("Number of Registered Native Allocations"),
+          DocString.format("%,14d", allocs.size()));
+      doc.description(DocString.text("Total Size of Registered Native Allocations"),
+          DocString.format("%,14d", totalSize));
+      doc.end();
+    }
+
+    doc.big(Menu.getMenu());
   }
 
-  private void printHeapSizes(Doc doc) {
+  private void printHeapSizes(Doc doc, Query query) {
     List<Object> dummy = Collections.singletonList(null);
 
     HeapTable.TableConfig<Object> table = new HeapTable.TableConfig<Object>() {
@@ -70,7 +83,7 @@
         return Collections.emptyList();
       }
     };
-    HeapTable.render(doc, table, mSnapshot, dummy);
+    HeapTable.render(doc, query, OVERVIEW_ID, table, mSnapshot, dummy);
   }
 }
 
diff --git a/tools/ahat/src/RootedHandler.java b/tools/ahat/src/RootedHandler.java
new file mode 100644
index 0000000..ec3272f
--- /dev/null
+++ b/tools/ahat/src/RootedHandler.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import java.io.IOException;
+
+class RootedHandler implements AhatHandler {
+
+  private static final String ROOTED_ID = "rooted";
+
+  private AhatSnapshot mSnapshot;
+
+  public RootedHandler(AhatSnapshot snapshot) {
+    mSnapshot = snapshot;
+  }
+
+  @Override
+  public void handle(Doc doc, Query query) throws IOException {
+    doc.title("Rooted");
+    DominatedList.render(mSnapshot, doc, query, ROOTED_ID, mSnapshot.getRooted());
+  }
+}
diff --git a/tools/ahat/src/RootsHandler.java b/tools/ahat/src/RootsHandler.java
deleted file mode 100644
index 185b9bf..0000000
--- a/tools/ahat/src/RootsHandler.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.ahat;
-
-import com.android.tools.perflib.heap.Instance;
-import com.android.tools.perflib.heap.RootObj;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-class RootsHandler extends AhatHandler {
-  public RootsHandler(AhatSnapshot snapshot) {
-    super(snapshot);
-  }
-
-  @Override
-  public void handle(Doc doc, Query query) throws IOException {
-    doc.title("Roots");
-
-    Set<Instance> rootset = new HashSet<Instance>();
-    for (RootObj root : mSnapshot.getGCRoots()) {
-      Instance inst = root.getReferredInstance();
-      if (inst != null) {
-        rootset.add(inst);
-      }
-    }
-
-    List<Instance> roots = new ArrayList<Instance>();
-    for (Instance inst : rootset) {
-      roots.add(inst);
-    }
-    DominatedList.render(mSnapshot, doc, roots, query);
-  }
-}
-
diff --git a/tools/ahat/src/Site.java b/tools/ahat/src/Site.java
index d504096..dbb84f6 100644
--- a/tools/ahat/src/Site.java
+++ b/tools/ahat/src/Site.java
@@ -20,6 +20,7 @@
 import com.android.tools.perflib.heap.Heap;
 import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.StackFrame;
+
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
diff --git a/tools/ahat/src/SiteHandler.java b/tools/ahat/src/SiteHandler.java
index 0a9381e..839e220 100644
--- a/tools/ahat/src/SiteHandler.java
+++ b/tools/ahat/src/SiteHandler.java
@@ -22,9 +22,15 @@
 import java.util.Comparator;
 import java.util.List;
 
-class SiteHandler extends AhatHandler {
+class SiteHandler implements AhatHandler {
+  private static final String ALLOCATION_SITE_ID = "frames";
+  private static final String SITES_CALLED_ID = "called";
+  private static final String OBJECTS_ALLOCATED_ID = "objects";
+
+  private AhatSnapshot mSnapshot;
+
   public SiteHandler(AhatSnapshot snapshot) {
-    super(snapshot);
+    mSnapshot = snapshot;
   }
 
   @Override
@@ -35,7 +41,7 @@
 
     doc.title("Site %s", site.getName());
     doc.section("Allocation Site");
-    SitePrinter.printSite(doc, mSnapshot, site);
+    SitePrinter.printSite(mSnapshot, doc, query, ALLOCATION_SITE_ID, site);
 
     doc.section("Sites Called from Here");
     List<Site> children = site.getChildren();
@@ -69,7 +75,7 @@
           return Collections.singletonList(value);
         }
       };
-      HeapTable.render(doc, table, mSnapshot, children);
+      HeapTable.render(doc, query, SITES_CALLED_ID, table, mSnapshot, children);
     }
 
     doc.section("Objects Allocated");
@@ -84,7 +90,9 @@
         new Sort.ObjectsInfoBySize(),
         new Sort.ObjectsInfoByClassName());
     Collections.sort(infos, compare);
-    for (Site.ObjectsInfo info : infos) {
+    SubsetSelector<Site.ObjectsInfo> selector
+      = new SubsetSelector(query, OBJECTS_ALLOCATED_ID, infos);
+    for (Site.ObjectsInfo info : selector.selected()) {
       String className = AhatSnapshot.getClassName(info.classObj);
       doc.row(
           DocString.format("%,14d", info.numBytes),
@@ -93,9 +101,10 @@
                 site.getStackId(), site.getStackDepth(), info.heap.getName(), className),
             DocString.format("%,14d", info.numInstances)),
           DocString.text(info.heap.getName()),
-          Value.render(info.classObj));
+          Value.render(mSnapshot, info.classObj));
     }
     doc.end();
+    selector.render(doc);
   }
 }
 
diff --git a/tools/ahat/src/SitePrinter.java b/tools/ahat/src/SitePrinter.java
index be87032..2c06b47 100644
--- a/tools/ahat/src/SitePrinter.java
+++ b/tools/ahat/src/SitePrinter.java
@@ -22,7 +22,7 @@
 import java.util.List;
 
 class SitePrinter {
-  public static void printSite(Doc doc, AhatSnapshot snapshot, Site site) {
+  public static void printSite(AhatSnapshot snapshot, Doc doc, Query query, String id, Site site) {
     List<Site> path = new ArrayList<Site>();
     for (Site parent = site; parent != null; parent = parent.getParent()) {
       path.add(parent);
@@ -60,6 +60,6 @@
         return Collections.singletonList(value);
       }
     };
-    HeapTable.render(doc, table, snapshot, path);
+    HeapTable.render(doc, query, id, table, snapshot, path);
   }
 }
diff --git a/tools/ahat/src/Sort.java b/tools/ahat/src/Sort.java
index 3b79166..8a3d9f2 100644
--- a/tools/ahat/src/Sort.java
+++ b/tools/ahat/src/Sort.java
@@ -16,13 +16,14 @@
 
 package com.android.ahat;
 
-import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.Heap;
+import com.android.tools.perflib.heap.Instance;
+
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
-import java.util.List;
 import java.util.Iterator;
+import java.util.List;
 
 /**
  * Provides Comparators and helper functions for sorting Instances, Sites, and
@@ -177,5 +178,31 @@
       return aName.compareTo(bName);
     }
   }
+
+  /**
+   * Compare AhatSnapshot.NativeAllocation by heap name.
+   * Different allocations with the same heap name are considered equal for
+   * the purposes of comparison.
+   */
+  public static class NativeAllocationByHeapName
+      implements Comparator<InstanceUtils.NativeAllocation> {
+    @Override
+    public int compare(InstanceUtils.NativeAllocation a, InstanceUtils.NativeAllocation b) {
+      return a.heap.getName().compareTo(b.heap.getName());
+    }
+  }
+
+  /**
+   * Compare InstanceUtils.NativeAllocation by their size.
+   * Different allocations with the same size are considered equal for the
+   * purposes of comparison.
+   * This sorts allocations from larger size to smaller size.
+   */
+  public static class NativeAllocationBySize implements Comparator<InstanceUtils.NativeAllocation> {
+    @Override
+    public int compare(InstanceUtils.NativeAllocation a, InstanceUtils.NativeAllocation b) {
+      return Long.compare(b.size, a.size);
+    }
+  }
 }
 
diff --git a/tools/ahat/src/SubsetSelector.java b/tools/ahat/src/SubsetSelector.java
new file mode 100644
index 0000000..79399c1
--- /dev/null
+++ b/tools/ahat/src/SubsetSelector.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import java.util.List;
+
+/**
+ * The SubsetSelector is that can be added to a page that lets the
+ * user select a limited number of elements to show.
+ * This is used to limit the number of elements shown on a page by default,
+ * requiring the user to explicitly request more, so users not interested in
+ * more don't have to wait for everything to render.
+ */
+class SubsetSelector<T> {
+  private static final int kIncrAmount = 1000;
+  private static final int kDefaultShown = 1000;
+
+  private Query mQuery;
+  private String mId;
+  private int mLimit;
+  private List<T> mElements;
+
+  /**
+   * @param id - the name of the query parameter key that should hold
+   * the limit selectors selected value.
+   * @param query - The query for the current page. This is required so the
+   * LimitSelector can add a link to the same page with modified limit
+   * selection.
+   * @param elements - the elements to select from. The collection of elements
+   * should not be modified during the lifetime of the SubsetSelector object.
+   */
+  public SubsetSelector(Query query, String id, List<T> elements) {
+    mQuery = query;
+    mId = id;
+    mLimit = getSelectedLimit(query, id, elements.size());
+    mElements = elements;
+  }
+
+  // Return the list of elements included in the selected subset.
+  public List<T> selected() {
+    return mElements.subList(0, mLimit);
+  }
+
+  // Return the list of remaining elements not included in the selected subset.
+  public List<T> remaining() {
+    return mElements.subList(mLimit, mElements.size());
+  }
+
+  /**
+   * Returns the currently selected limit.
+   * @param query the current page query
+   * @param size the total number of elements to select from
+   * @return the number of selected elements
+   */
+  private static int getSelectedLimit(Query query, String id, int size) {
+    String value = query.get(id, null);
+    try {
+      int ivalue = Math.min(size, Integer.parseInt(value));
+      return Math.max(0, ivalue);
+    } catch (NumberFormatException e) {
+      // We can't parse the value as a number. Ignore it.
+    }
+    return Math.min(kDefaultShown, size);
+  }
+
+  // Render the limit selector to the given doc.
+  // It has the form:
+  //  (showing X of Y - show none - show less - show more - show all)
+  public void render(Doc doc) {
+    int all = mElements.size();
+    if (all > kDefaultShown) {
+      DocString menu = new DocString();
+      menu.appendFormat("(%d of %d elements shown - ", mLimit, all);
+      if (mLimit > 0) {
+        int less = Math.max(0, mLimit - kIncrAmount);
+        menu.appendLink(mQuery.with(mId, 0), DocString.text("show none"));
+        menu.append(" - ");
+        menu.appendLink(mQuery.with(mId, less), DocString.text("show less"));
+        menu.append(" - ");
+      } else {
+        menu.append("show none - show less - ");
+      }
+      if (mLimit < all) {
+        int more = Math.min(mLimit + kIncrAmount, all);
+        menu.appendLink(mQuery.with(mId, more), DocString.text("show more"));
+        menu.append(" - ");
+        menu.appendLink(mQuery.with(mId, all), DocString.text("show all"));
+        menu.append(")");
+      } else {
+        menu.append("show more - show all)");
+      }
+      doc.println(menu);
+    }
+  }
+}
diff --git a/tools/ahat/src/Value.java b/tools/ahat/src/Value.java
index 9b483fa..847692b 100644
--- a/tools/ahat/src/Value.java
+++ b/tools/ahat/src/Value.java
@@ -25,37 +25,64 @@
  */
 class Value {
 
+  // For string literals, we limit the number of characters we show to
+  // kMaxChars in case the string is really long.
+  private static int kMaxChars = 200;
+
   /**
    * Create a DocString representing a summary of the given instance.
    */
-  private static DocString renderInstance(Instance inst) {
-    DocString link = new DocString();
+  private static DocString renderInstance(AhatSnapshot snapshot, Instance inst) {
+    DocString formatted = new DocString();
     if (inst == null) {
-      link.append("(null)");
-      return link;
+      formatted.append("(null)");
+      return formatted;
     }
 
+    // Annotate roots as roots.
+    if (snapshot.isRoot(inst)) {
+      formatted.append("(root) ");
+    }
+
+
     // Annotate classes as classes.
+    DocString link = new DocString();
     if (inst instanceof ClassObj) {
       link.append("class ");
     }
 
     link.append(inst.toString());
 
+    URI objTarget = DocString.formattedUri("object?id=%d", inst.getId());
+    formatted.appendLink(objTarget, link);
+
     // Annotate Strings with their values.
-    String stringValue = InstanceUtils.asString(inst);
+    String stringValue = InstanceUtils.asString(inst, kMaxChars);
     if (stringValue != null) {
-      link.appendFormat("\"%s\"", stringValue);
+      formatted.appendFormat(" \"%s", stringValue);
+      formatted.append(kMaxChars == stringValue.length() ? "..." : "\"");
+    }
+
+    // Annotate Reference with its referent
+    Instance referent = InstanceUtils.getReferent(inst);
+    if (referent != null) {
+      formatted.append(" for ");
+
+      // It should not be possible for a referent to refer back to the
+      // reference object, even indirectly, so there shouldn't be any issues
+      // with infinite recursion here.
+      formatted.append(renderInstance(snapshot, referent));
     }
 
     // Annotate DexCache with its location.
-    String dexCacheLocation = InstanceUtils.getDexCacheLocation(inst);
+    String dexCacheLocation = InstanceUtils.getDexCacheLocation(inst, kMaxChars);
     if (dexCacheLocation != null) {
-      link.append(" for " + dexCacheLocation);
+      formatted.appendFormat(" for %s", dexCacheLocation);
+      if (kMaxChars == dexCacheLocation.length()) {
+        formatted.append("...");
+      }
     }
 
-    URI objTarget = DocString.formattedUri("object?id=%d", inst.getId());
-    DocString formatted = DocString.link(objTarget, link);
 
     // Annotate bitmaps with a thumbnail.
     Instance bitmap = InstanceUtils.getAssociatedBitmapInstance(inst);
@@ -70,9 +97,9 @@
   /**
    * Create a DocString summarizing the given value.
    */
-  public static DocString render(Object val) {
+  public static DocString render(AhatSnapshot snapshot, Object val) {
     if (val instanceof Instance) {
-      return renderInstance((Instance)val);
+      return renderInstance(snapshot, (Instance)val);
     } else {
       return DocString.format("%s", val);
     }
diff --git a/tools/ahat/src/help.html b/tools/ahat/src/help.html
index b7ae2ce..ff04ad2 100644
--- a/tools/ahat/src/help.html
+++ b/tools/ahat/src/help.html
@@ -14,22 +14,11 @@
 limitations under the License.
 -->
 
-<head>
-<link rel="stylesheet" type="text/css" href="style.css">
-</head>
-
-<div class="menu">
-  <a href="/">overview</a> -
-  <a href="roots">roots</a> -
-  <a href="sites">allocations</a> -
-  <a href="help">help</a>
-</div>
-
 <h1>Help</h1>
 <h2>Information shown by ahat:</h2>
 <ul>
   <li><a href="/">The total bytes retained by heap.</a></li>
-  <li><a href="/roots">A list of root objects and their retained sizes for each heap.</a></li>
+  <li><a href="/rooted">A list of rooted objects and their retained sizes for each heap.</a></li>
   <li>Information about each allocated object:
     <ul>
       <li>The allocation site (stack trace) of the object (if available).</li>
diff --git a/tools/ahat/src/manifest.txt b/tools/ahat/src/manifest.txt
index 7efb1a7..cac53c5 100644
--- a/tools/ahat/src/manifest.txt
+++ b/tools/ahat/src/manifest.txt
@@ -1,4 +1,4 @@
 Name: ahat/
 Implementation-Title: ahat
-Implementation-Version: 0.2
+Implementation-Version: 0.7
 Main-Class: com.android.ahat.Main
diff --git a/tools/ahat/test-dump/Main.java b/tools/ahat/test-dump/Main.java
index cea1dc1..3936f29 100644
--- a/tools/ahat/test-dump/Main.java
+++ b/tools/ahat/test-dump/Main.java
@@ -16,6 +16,10 @@
 
 import dalvik.system.VMDebug;
 import java.io.IOException;
+import java.lang.ref.PhantomReference;
+import java.lang.ref.ReferenceQueue;
+import java.lang.ref.WeakReference;
+import libcore.util.NativeAllocationRegistry;
 
 /**
  * Program used to create a heap dump for test purposes.
@@ -31,8 +35,25 @@
   // class and reading the desired field.
   public static class DumpedStuff {
     public String basicString = "hello, world";
+    public char[] charArray = "char thing".toCharArray();
     public String nullString = null;
     public Object anObject = new Object();
+    public ReferenceQueue<Object> referenceQueue = new ReferenceQueue<Object>();
+    public PhantomReference aPhantomReference = new PhantomReference(anObject, referenceQueue);
+    public WeakReference aWeakReference = new WeakReference(anObject, referenceQueue);
+    public byte[] bigArray;
+
+    DumpedStuff() {
+      int N = 1000000;
+      bigArray = new byte[N];
+      for (int i = 0; i < N; i++) {
+        bigArray[i] = (byte)((i*i) & 0xFF);
+      }
+
+      NativeAllocationRegistry registry = new NativeAllocationRegistry(
+          Main.class.getClassLoader(), 0x12345, 42);
+      registry.registerNativeAllocation(anObject, 0xABCDABCD);
+    }
   }
 
   public static void main(String[] args) throws IOException {
diff --git a/tools/ahat/test/InstanceUtilsTest.java b/tools/ahat/test/InstanceUtilsTest.java
index 7613df4..59b1c90 100644
--- a/tools/ahat/test/InstanceUtilsTest.java
+++ b/tools/ahat/test/InstanceUtilsTest.java
@@ -25,24 +25,102 @@
 
 public class InstanceUtilsTest {
   @Test
-  public void basicString() throws IOException {
+  public void asStringBasic() throws IOException {
     TestDump dump = TestDump.getTestDump();
     Instance str = (Instance)dump.getDumpedThing("basicString");
     assertEquals("hello, world", InstanceUtils.asString(str));
   }
 
   @Test
-  public void nullString() throws IOException {
+  public void asStringCharArray() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("charArray");
+    assertEquals("char thing", InstanceUtils.asString(str));
+  }
+
+  @Test
+  public void asStringTruncated() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("basicString");
+    assertEquals("hello", InstanceUtils.asString(str, 5));
+  }
+
+  @Test
+  public void asStringCharArrayTruncated() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("charArray");
+    assertEquals("char ", InstanceUtils.asString(str, 5));
+  }
+
+  @Test
+  public void asStringExactMax() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("basicString");
+    assertEquals("hello, world", InstanceUtils.asString(str, 12));
+  }
+
+  @Test
+  public void asStringCharArrayExactMax() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("charArray");
+    assertEquals("char thing", InstanceUtils.asString(str, 10));
+  }
+
+  @Test
+  public void asStringNotTruncated() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("basicString");
+    assertEquals("hello, world", InstanceUtils.asString(str, 50));
+  }
+
+  @Test
+  public void asStringCharArrayNotTruncated() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("charArray");
+    assertEquals("char thing", InstanceUtils.asString(str, 50));
+  }
+
+  @Test
+  public void asStringNegativeMax() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("basicString");
+    assertEquals("hello, world", InstanceUtils.asString(str, -3));
+  }
+
+  @Test
+  public void asStringCharArrayNegativeMax() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("charArray");
+    assertEquals("char thing", InstanceUtils.asString(str, -3));
+  }
+
+  @Test
+  public void asStringNull() throws IOException {
     TestDump dump = TestDump.getTestDump();
     Instance obj = (Instance)dump.getDumpedThing("nullString");
     assertNull(InstanceUtils.asString(obj));
   }
 
   @Test
-  public void notString() throws IOException {
+  public void asStringNotString() throws IOException {
     TestDump dump = TestDump.getTestDump();
     Instance obj = (Instance)dump.getDumpedThing("anObject");
     assertNotNull(obj);
     assertNull(InstanceUtils.asString(obj));
   }
+
+  @Test
+  public void basicReference() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+
+    Instance pref = (Instance)dump.getDumpedThing("aPhantomReference");
+    Instance wref = (Instance)dump.getDumpedThing("aWeakReference");
+    Instance referent = (Instance)dump.getDumpedThing("anObject");
+    assertNotNull(pref);
+    assertNotNull(wref);
+    assertNotNull(referent);
+    assertEquals(referent, InstanceUtils.getReferent(pref));
+    assertEquals(referent, InstanceUtils.getReferent(wref));
+    assertNull(InstanceUtils.getReferent(referent));
+  }
 }
diff --git a/tools/ahat/test/NativeAllocationTest.java b/tools/ahat/test/NativeAllocationTest.java
new file mode 100644
index 0000000..7ad4c1d
--- /dev/null
+++ b/tools/ahat/test/NativeAllocationTest.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.android.tools.perflib.heap.Instance;
+import java.io.IOException;
+import static org.junit.Assert.fail;
+import static org.junit.Assert.assertEquals;
+import org.junit.Test;
+
+public class NativeAllocationTest {
+
+  @Test
+  public void nativeAllocation() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+
+    AhatSnapshot snapshot = dump.getAhatSnapshot();
+    Instance referent = (Instance)dump.getDumpedThing("anObject");
+    for (InstanceUtils.NativeAllocation alloc : snapshot.getNativeAllocations()) {
+      if (alloc.referent == referent) {
+        assertEquals(42 , alloc.size);
+        assertEquals(referent.getHeap(), alloc.heap);
+        assertEquals(0xABCDABCD , alloc.pointer);
+        return;
+      }
+    }
+    fail("No native allocation found with anObject as the referent");
+  }
+}
+
diff --git a/tools/ahat/test/PerformanceTest.java b/tools/ahat/test/PerformanceTest.java
new file mode 100644
index 0000000..6e46800
--- /dev/null
+++ b/tools/ahat/test/PerformanceTest.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.android.tools.perflib.heap.Instance;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import org.junit.Test;
+
+public class PerformanceTest {
+  private static class NullOutputStream extends OutputStream {
+    public void write(int b) throws IOException {
+    }
+  }
+
+  @Test
+  public void bigArray() throws IOException {
+    // It should not take more than 1 second to load the default object view
+    // for any object, including big arrays.
+    TestDump dump = TestDump.getTestDump();
+
+    Instance bigArray = (Instance)dump.getDumpedThing("bigArray");
+    assertNotNull(bigArray);
+
+    AhatSnapshot snapshot = dump.getAhatSnapshot();
+    AhatHandler handler = new ObjectHandler(snapshot);
+
+    PrintStream ps = new PrintStream(new NullOutputStream());
+    HtmlDoc doc = new HtmlDoc(ps, DocString.text("bigArray test"), DocString.uri("style.css"));
+    String uri = "http://localhost:7100/object?id=" + bigArray.getId();
+    Query query = new Query(DocString.uri(uri));
+
+    long start = System.currentTimeMillis();
+    handler.handle(doc, query);
+    long time = System.currentTimeMillis() - start;
+    assertTrue("bigArray took too long: " + time + "ms", time < 1000);
+  }
+}
diff --git a/tools/ahat/test/Tests.java b/tools/ahat/test/Tests.java
index bab7121..3291470 100644
--- a/tools/ahat/test/Tests.java
+++ b/tools/ahat/test/Tests.java
@@ -23,8 +23,10 @@
     if (args.length == 0) {
       args = new String[]{
         "com.android.ahat.InstanceUtilsTest",
+        "com.android.ahat.NativeAllocationTest",
+        "com.android.ahat.PerformanceTest",
         "com.android.ahat.QueryTest",
-        "com.android.ahat.SortTest"
+        "com.android.ahat.SortTest",
       };
     }
     JUnitCore.main(args);
diff --git a/tools/art b/tools/art
index 304a9d0..d91b451 100644
--- a/tools/art
+++ b/tools/art
@@ -75,6 +75,7 @@
 ANDROID_ROOT=$PROG_DIR/..
 LIBDIR=$(find_libdir)
 LD_LIBRARY_PATH=$ANDROID_ROOT/$LIBDIR
+DEBUG_OPTION=""
 
 DELETE_ANDROID_DATA=false
 # If ANDROID_DATA is the system ANDROID_DATA or is not set, use our own,
@@ -87,6 +88,7 @@
 
 if [ z"$PERF" != z ]; then
   invoke_with="perf record -o $ANDROID_DATA/perf.data -e cycles:u $invoke_with"
+  DEBUG_OPTION="-Xcompiler-option --generate-debug-info"
 fi
 
 # We use the PIC core image to work with perf.
@@ -99,7 +101,7 @@
     -XXlib:$LIBART \
     -Xnorelocate \
     -Ximage:$ANDROID_ROOT/framework/core-optimizing-pic.art \
-    -Xcompiler-option --generate-debug-info \
+    $DEBUG_OPTION \
     "$@"
 
 EXIT_STATUS=$?
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 631e0a0..12e0338 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -21,7 +21,7 @@
 
 out_dir=${OUT_DIR-out}
 java_libraries_dir=${out_dir}/target/common/obj/JAVA_LIBRARIES
-common_targets="vogar vogar.jar ${java_libraries_dir}/core-tests_intermediates/javalib.jar apache-harmony-jdwp-tests-hostdex ${java_libraries_dir}/jsr166-tests_intermediates/javalib.jar"
+common_targets="vogar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests mockito-target ${out_dir}/host/linux-x86/bin/jack"
 mode="target"
 j_arg="-j$(nproc)"
 showcommands=
@@ -46,9 +46,14 @@
 done
 
 if [[ $mode == "host" ]]; then
-  make_command="make $j_arg $showcommands build-art-host-tests $common_targets ${out_dir}/host/linux-x86/lib/libjavacoretests.so ${out_dir}/host/linux-x86/lib64/libjavacoretests.so"
+  make_command="make $j_arg $showcommands build-art-host-tests $common_targets"
+  make_command+=" ${out_dir}/host/linux-x86/lib/libjavacoretests.so "
+  make_command+=" ${out_dir}/host/linux-x86/lib64/libjavacoretests.so"
 elif [[ $mode == "target" ]]; then
-  make_command="make $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh ${out_dir}/host/linux-x86/bin/adb"
+  make_command="make $j_arg $showcommands build-art-target-tests $common_targets"
+  make_command+=" libjavacrypto libjavacoretests libnetd_client linker toybox toolbox sh"
+  make_command+=" ${out_dir}/host/linux-x86/bin/adb libstdc++ "
+  make_command+=" ${out_dir}/target/product/${TARGET_PRODUCT}/system/etc/public.libraries.txt"
 fi
 
 echo "Executing $make_command"
diff --git a/tools/checker/common/logger.py b/tools/checker/common/logger.py
index 28bb458..f13eaf6 100644
--- a/tools/checker/common/logger.py
+++ b/tools/checker/common/logger.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from __future__ import print_function
+import collections
 import sys
 
 class Logger(object):
@@ -21,7 +22,7 @@
     NoOutput, Error, Info = range(3)
 
   class Color(object):
-    Default, Blue, Gray, Purple, Red = range(5)
+    Default, Blue, Gray, Purple, Red, Green = range(6)
 
     @staticmethod
     def terminalCode(color, out=sys.stdout):
@@ -35,6 +36,8 @@
         return '\033[95m'
       elif color == Logger.Color.Red:
         return '\033[91m'
+      elif color == Logger.Color.Green:
+        return '\033[32m'
       else:
         return '\033[0m'
 
@@ -52,19 +55,34 @@
       out.flush()
 
   @staticmethod
-  def fail(msg, file=None, line=-1):
-    location = ""
-    if file:
-      location += file + ":"
-    if line > 0:
-      location += str(line) + ":"
-    if location:
-      location += " "
-
-    Logger.log(location, Logger.Level.Error, color=Logger.Color.Gray, newLine=False, out=sys.stderr)
+  def fail(msg, file=None, line=-1, lineText=None, variables=None):
     Logger.log("error: ", Logger.Level.Error, color=Logger.Color.Red, newLine=False, out=sys.stderr)
     Logger.log(msg, Logger.Level.Error, out=sys.stderr)
-    sys.exit(msg)
+
+    if lineText:
+      loc = ""
+      if file:
+        loc += file + ":"
+      if line > 0:
+        loc += str(line) + ":"
+      if loc:
+        loc += " "
+      Logger.log(loc, Logger.Level.Error, color=Logger.Color.Gray, newLine=False, out=sys.stderr)
+      Logger.log(lineText, Logger.Level.Error, out=sys.stderr)
+
+    if variables:
+      longestName = 0
+      for var in variables:
+        longestName = max(longestName, len(var))
+
+      for var in collections.OrderedDict(sorted(variables.items())):
+        padding = ' ' * (longestName - len(var))
+        Logger.log(var, Logger.Level.Error, color=Logger.Color.Green, newLine=False, out=sys.stderr)
+        Logger.log(padding, Logger.Level.Error, newLine=False, out=sys.stderr)
+        Logger.log(" = ", Logger.Level.Error, newLine=False, out=sys.stderr)
+        Logger.log(variables[var], Logger.Level.Error, out=sys.stderr)
+
+    sys.exit(1)
 
   @staticmethod
   def startTest(name):
@@ -76,6 +94,6 @@
     Logger.log("PASS", color=Logger.Color.Blue)
 
   @staticmethod
-  def testFailed(msg, file=None, line=-1):
+  def testFailed(msg, assertion, variables):
     Logger.log("FAIL", color=Logger.Color.Red)
-    Logger.fail(msg, file, line)
+    Logger.fail(msg, assertion.fileName, assertion.lineNo, assertion.originalText, variables)
diff --git a/tools/checker/match/file.py b/tools/checker/match/file.py
index 3ded074..520c4ae 100644
--- a/tools/checker/match/file.py
+++ b/tools/checker/match/file.py
@@ -23,9 +23,10 @@
 MatchInfo = namedtuple("MatchInfo", ["scope", "variables"])
 
 class MatchFailedException(Exception):
-  def __init__(self, assertion, lineNo):
+  def __init__(self, assertion, lineNo, variables):
     self.assertion = assertion
     self.lineNo = lineNo
+    self.variables = variables
 
 def splitIntoGroups(assertions):
   """ Breaks up a list of assertions, grouping instructions which should be
@@ -58,7 +59,7 @@
     newVariables = MatchLines(assertion, c1Pass.body[i], variables)
     if newVariables is not None:
       return MatchInfo(MatchScope(i, i), newVariables)
-  raise MatchFailedException(assertion, scope.start)
+  raise MatchFailedException(assertion, scope.start, variables)
 
 def matchDagGroup(assertions, c1Pass, scope, variables):
   """ Attempts to find matching `c1Pass` lines for a group of DAG assertions.
@@ -92,12 +93,12 @@
     for assertion in assertions:
       assert assertion.variant == TestAssertion.Variant.Not
       if MatchLines(assertion, line, variables) is not None:
-        raise MatchFailedException(assertion, i)
+        raise MatchFailedException(assertion, i, variables)
 
 def testEvalGroup(assertions, scope, variables):
   for assertion in assertions:
     if not EvaluateLine(assertion, variables):
-      raise MatchFailedException(assertion, scope.start)
+      raise MatchFailedException(assertion, scope.start, variables)
 
 def MatchTestCase(testCase, c1Pass):
   """ Runs a test case against a C1visualizer graph dump.
@@ -171,8 +172,8 @@
     # match a check group against the first output group of the same name.
     c1Pass = c1File.findPass(testCase.name)
     if c1Pass is None:
-      Logger.fail("Test case \"{}\" not found in the CFG file".format(testCase.name),
-                  testCase.fileName, testCase.startLineNo)
+      Logger.fail("Test case not found in the CFG file",
+                  testCase.fileName, testCase.startLineNo, testCase.name)
 
     Logger.startTest(testCase.name)
     try:
@@ -181,8 +182,8 @@
     except MatchFailedException as e:
       lineNo = c1Pass.startLineNo + e.lineNo
       if e.assertion.variant == TestAssertion.Variant.Not:
-        Logger.testFailed("NOT assertion matched line {}".format(lineNo),
-                          e.assertion.fileName, e.assertion.lineNo)
+        msg = "NOT assertion matched line {}"
       else:
-        Logger.testFailed("Assertion could not be matched starting from line {}".format(lineNo),
-                          e.assertion.fileName, e.assertion.lineNo)
+        msg = "Assertion could not be matched starting from line {}"
+      msg = msg.format(lineNo)
+      Logger.testFailed(msg, e.assertion, e.variables)
diff --git a/tools/checker/match/line.py b/tools/checker/match/line.py
index 08f001f..ed48a53 100644
--- a/tools/checker/match/line.py
+++ b/tools/checker/match/line.py
@@ -35,15 +35,13 @@
   if name in variables:
     return variables[name]
   else:
-    Logger.testFailed("Missing definition of variable \"{}\"".format(name),
-                      pos.fileName, pos.lineNo)
+    Logger.testFailed("Missing definition of variable \"{}\"".format(name), pos, variables)
 
 def setVariable(name, value, variables, pos):
   if name not in variables:
     return variables.copyWith(name, value)
   else:
-    Logger.testFailed("Multiple definitions of variable \"{}\"".format(name),
-                      pos.fileName, pos.lineNo)
+    Logger.testFailed("Multiple definitions of variable \"{}\"".format(name), pos, variables)
 
 def matchWords(checkerWord, stringWord, variables, pos):
   """ Attempts to match a list of TestExpressions against a string.
diff --git a/tools/cpp-define-generator/Android.mk b/tools/cpp-define-generator/Android.mk
new file mode 100644
index 0000000..6ba643c
--- /dev/null
+++ b/tools/cpp-define-generator/Android.mk
@@ -0,0 +1,34 @@
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(call my-dir)
+
+include art/build/Android.executable.mk
+
+CPP_DEFINE_GENERATOR_SRC_FILES := \
+  main.cc
+
+CPP_DEFINE_GENERATOR_EXTRA_SHARED_LIBRARIES :=
+CPP_DEFINE_GENERATOR_EXTRA_INCLUDE :=
+CPP_DEFINE_GENERATOR_MULTILIB :=
+
+# Build a "data" binary which will hold all the symbol values that will be parsed by the other scripts.
+#
+# Builds are for host only, target-specific define generation is possibly but is trickier and would need extra tooling.
+#
+# In the future we may wish to parameterize this on (32,64)x(read_barrier,no_read_barrier).
+$(eval $(call build-art-executable,cpp-define-generator-data,$(CPP_DEFINE_GENERATOR_SRC_FILES),$(CPP_DEFINE_GENERATOR_EXTRA_SHARED_LIBRARIES),$(CPP_DEFINE_GENERATOR_EXTRA_INCLUDE),host,debug,$(CPP_DEFINE_GENERATOR_MULTILIB),shared))
+
diff --git a/tools/cpp-define-generator/common.def b/tools/cpp-define-generator/common.def
new file mode 100644
index 0000000..76c64c9
--- /dev/null
+++ b/tools/cpp-define-generator/common.def
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Convenience macro to define an offset expression.
+
+#ifndef DEFINE_OFFSET_EXPR
+#define DEFINE_OFFSET_EXPR(holder_type, field_name, field_type, expr) \
+  DEFINE_EXPR(holder_type ## _ ## field_name ## _OFFSET, field_type, expr)
+#define DEFINE_OFFSET_EXPR_STANDARD_DEFINITION
+#endif
+
diff --git a/tools/cpp-define-generator/common_undef.def b/tools/cpp-define-generator/common_undef.def
new file mode 100644
index 0000000..c44aba7
--- /dev/null
+++ b/tools/cpp-define-generator/common_undef.def
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef DEFINE_OFFSET_EXPR_STANDARD_DEFINITION
+#undef DEFINE_OFFSET_EXPR_STANDARD_DEFINITION
+#undef DEFINE_OFFSET_EXPR
+#endif
diff --git a/tools/cpp-define-generator/constant_class.def b/tools/cpp-define-generator/constant_class.def
new file mode 100644
index 0000000..58372f9
--- /dev/null
+++ b/tools/cpp-define-generator/constant_class.def
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "mirror/class.h"         // kStatusInitialized
+#include "modifiers.h"            // kAccClassIsFinalizable
+#include "base/bit_utils.h"       // MostSignificantBit
+#endif
+
+#define DEFINE_FLAG_OFFSET(type_name, field_name, expr) \
+  DEFINE_EXPR(type_name ## _ ## field_name, uint32_t, (expr))
+
+DEFINE_FLAG_OFFSET(MIRROR_CLASS, STATUS_INITIALIZED,       art::mirror::Class::kStatusInitialized)
+DEFINE_FLAG_OFFSET(ACCESS_FLAGS, CLASS_IS_FINALIZABLE,     art::kAccClassIsFinalizable)
+// TODO: We should really have a BitPosition which also checks it's a power of 2.
+DEFINE_FLAG_OFFSET(ACCESS_FLAGS, CLASS_IS_FINALIZABLE_BIT, art::MostSignificantBit(art::kAccClassIsFinalizable))
+
+#undef DEFINE_FLAG_OFFSET
diff --git a/tools/cpp-define-generator/constant_dexcache.def b/tools/cpp-define-generator/constant_dexcache.def
new file mode 100644
index 0000000..fd197f2
--- /dev/null
+++ b/tools/cpp-define-generator/constant_dexcache.def
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "mirror/dex_cache.h"   // art::mirror::DexCache, StringDexCachePair
+#endif
+
+DEFINE_EXPR(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT,       int32_t, art::WhichPowerOf2(sizeof(art::mirror::StringDexCachePair)))
+DEFINE_EXPR(STRING_DEX_CACHE_SIZE_MINUS_ONE,           int32_t, art::mirror::DexCache::kDexCacheStringCacheSize - 1)
+DEFINE_EXPR(STRING_DEX_CACHE_HASH_BITS,                int32_t,
+    art::LeastSignificantBit(art::mirror::DexCache::kDexCacheStringCacheSize))
\ No newline at end of file
diff --git a/tools/cpp-define-generator/constant_globals.def b/tools/cpp-define-generator/constant_globals.def
new file mode 100644
index 0000000..a3ccc72
--- /dev/null
+++ b/tools/cpp-define-generator/constant_globals.def
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Export global values.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "globals.h"         // art::kObjectAlignment
+#endif
+
+#define DEFINE_OBJECT_EXPR(macro_name, type, constant_field_name) \
+  DEFINE_EXPR(OBJECT_ ## macro_name, type, constant_field_name)
+
+DEFINE_OBJECT_EXPR(ALIGNMENT_MASK,         size_t,   art::kObjectAlignment - 1)
+DEFINE_OBJECT_EXPR(ALIGNMENT_MASK_TOGGLED, uint32_t, ~static_cast<uint32_t>(art::kObjectAlignment - 1))
+DEFINE_OBJECT_EXPR(ALIGNMENT_MASK_TOGGLED64, uint64_t, ~static_cast<uint64_t>(art::kObjectAlignment - 1))
+
+#undef DEFINE_OBJECT_EXPR
+
diff --git a/tools/cpp-define-generator/constant_heap.def b/tools/cpp-define-generator/constant_heap.def
new file mode 100644
index 0000000..dc76736
--- /dev/null
+++ b/tools/cpp-define-generator/constant_heap.def
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Export heap values.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "gc/heap.h"
+#endif
+
+// Size of references to the heap on the stack.
+DEFINE_EXPR(MIN_LARGE_OBJECT_THRESHOLD, size_t, art::gc::Heap::kMinLargeObjectThreshold)
+
diff --git a/tools/cpp-define-generator/constant_jit.def b/tools/cpp-define-generator/constant_jit.def
new file mode 100644
index 0000000..5fa5194
--- /dev/null
+++ b/tools/cpp-define-generator/constant_jit.def
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Constants within jit.h.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "jit/jit.h"   // art::kSuspendRequest, etc.
+#endif
+
+#define DEFINE_JIT_CONSTANT(macro_name, type, expr) \
+  DEFINE_EXPR(JIT_ ## macro_name, type, (expr))
+
+DEFINE_JIT_CONSTANT(CHECK_OSR,       int16_t, art::jit::kJitCheckForOSR)
+DEFINE_JIT_CONSTANT(HOTNESS_DISABLE, int16_t, art::jit::kJitHotnessDisabled)
+
+#undef DEFINE_JIT_CONSTANT
diff --git a/tools/cpp-define-generator/constant_lockword.def b/tools/cpp-define-generator/constant_lockword.def
new file mode 100644
index 0000000..67ed5b5
--- /dev/null
+++ b/tools/cpp-define-generator/constant_lockword.def
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Export lockword values.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "lock_word.h"         // art::LockWord
+#endif
+
+#define DEFINE_LOCK_WORD_EXPR(macro_name, type, constant_field_name) \
+  DEFINE_EXPR(LOCK_WORD_ ## macro_name, type, art::LockWord::constant_field_name)
+
+DEFINE_LOCK_WORD_EXPR(STATE_SHIFT,               int32_t,  kStateShift)
+DEFINE_LOCK_WORD_EXPR(STATE_MASK,                uint32_t, kStateMaskShifted)
+DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_SHIFT,  int32_t,  kReadBarrierStateShift)
+DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_MASK,   uint32_t,  kReadBarrierStateMaskShifted)
+DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_MASK_TOGGLED, uint32_t, kReadBarrierStateMaskShiftedToggled)
+DEFINE_LOCK_WORD_EXPR(THIN_LOCK_COUNT_ONE,       int32_t,  kThinLockCountOne)
+
+DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED,   uint32_t,  kGCStateMaskShifted)
+DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED_TOGGLED, uint32_t, kGCStateMaskShiftedToggled)
+DEFINE_LOCK_WORD_EXPR(GC_STATE_SHIFT,   int32_t,  kGCStateShift)
+
+DEFINE_LOCK_WORD_EXPR(MARK_BIT_SHIFT, int32_t, kMarkBitStateShift)
+DEFINE_LOCK_WORD_EXPR(MARK_BIT_MASK_SHIFTED, uint32_t, kMarkBitStateMaskShifted)
+
+#undef DEFINE_LOCK_WORD_EXPR
+
diff --git a/tools/cpp-define-generator/constant_reference.def b/tools/cpp-define-generator/constant_reference.def
new file mode 100644
index 0000000..d312f76
--- /dev/null
+++ b/tools/cpp-define-generator/constant_reference.def
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "mirror/object.h"            // mirror::Object
+#include "stack.h"                    // StackReference
+#include "mirror/object_reference.h"  // mirror::CompressedReference
+#include "base/bit_utils.h"           // WhichPowerOf2
+#endif
+
+// Size of references to the heap on the stack.
+DEFINE_EXPR(STACK_REFERENCE_SIZE,            size_t, sizeof(art::StackReference<art::mirror::Object>))
+// Size of heap references
+DEFINE_EXPR(COMPRESSED_REFERENCE_SIZE,       size_t, sizeof(art::mirror::CompressedReference<art::mirror::Object>))
+DEFINE_EXPR(COMPRESSED_REFERENCE_SIZE_SHIFT, size_t, art::WhichPowerOf2(sizeof(art::mirror::CompressedReference<art::mirror::Object>)))
+
+#undef DEFINE_REFERENCE_OFFSET
diff --git a/tools/cpp-define-generator/constant_rosalloc.def b/tools/cpp-define-generator/constant_rosalloc.def
new file mode 100644
index 0000000..2007cef
--- /dev/null
+++ b/tools/cpp-define-generator/constant_rosalloc.def
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Constants within RosAlloc.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "gc/allocator/rosalloc.h"   // art::gc::allocator::RosAlloc
+#endif
+
+#define DEFINE_ROSALLOC_CONSTANT(macro_name, type, expr) \
+  DEFINE_EXPR(ROSALLOC_ ## macro_name, type, (expr))
+
+DEFINE_ROSALLOC_CONSTANT(MAX_THREAD_LOCAL_BRACKET_SIZE, int32_t, art::gc::allocator::RosAlloc::kMaxThreadLocalBracketSize)
+DEFINE_ROSALLOC_CONSTANT(BRACKET_QUANTUM_SIZE_SHIFT,    int32_t, art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSizeShift)
+// TODO: This should be a BitUtils helper, e.g. BitMaskFromSize or something like that.
+DEFINE_ROSALLOC_CONSTANT(BRACKET_QUANTUM_SIZE_MASK,     int32_t, static_cast<int32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
+DEFINE_ROSALLOC_CONSTANT(BRACKET_QUANTUM_SIZE_MASK_TOGGLED32,\
+                                                        uint32_t, ~static_cast<uint32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
+DEFINE_ROSALLOC_CONSTANT(BRACKET_QUANTUM_SIZE_MASK_TOGGLED64,\
+                                                        uint64_t, ~static_cast<uint64_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
+DEFINE_ROSALLOC_CONSTANT(RUN_FREE_LIST_OFFSET,          int32_t, art::gc::allocator::RosAlloc::RunFreeListOffset())
+DEFINE_ROSALLOC_CONSTANT(RUN_FREE_LIST_HEAD_OFFSET,     int32_t, art::gc::allocator::RosAlloc::RunFreeListHeadOffset())
+DEFINE_ROSALLOC_CONSTANT(RUN_FREE_LIST_SIZE_OFFSET,     int32_t, art::gc::allocator::RosAlloc::RunFreeListSizeOffset())
+DEFINE_ROSALLOC_CONSTANT(SLOT_NEXT_OFFSET,              int32_t, art::gc::allocator::RosAlloc::RunSlotNextOffset())
+
+
+#undef DEFINE_ROSALLOC_CONSTANT
diff --git a/tools/cpp-define-generator/constant_thread.def b/tools/cpp-define-generator/constant_thread.def
new file mode 100644
index 0000000..af5ca21
--- /dev/null
+++ b/tools/cpp-define-generator/constant_thread.def
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Constants within thread.h.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "thread.h"   // art::kSuspendRequest, etc.
+#endif
+
+#define DEFINE_THREAD_CONSTANT(macro_name, type, expr) \
+  DEFINE_EXPR(THREAD_ ## macro_name, type, (expr))
+
+DEFINE_THREAD_CONSTANT(SUSPEND_REQUEST,    int32_t, art::kSuspendRequest)
+DEFINE_THREAD_CONSTANT(CHECKPOINT_REQUEST, int32_t, art::kCheckpointRequest)
+
+#undef DEFINE_THREAD_CONSTANT
diff --git a/tools/cpp-define-generator/generate-asm-support b/tools/cpp-define-generator/generate-asm-support
new file mode 100755
index 0000000..f95648b
--- /dev/null
+++ b/tools/cpp-define-generator/generate-asm-support
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# Generates asm_support_gen.h
+# - This must be run after a build since it uses cpp-define-generator-data
+
+[[ -z ${ANDROID_BUILD_TOP+x} ]] && (echo "Run source build/envsetup.sh first" >&2 && exit 1)
+
+cpp-define-generator-datad > ${ANDROID_BUILD_TOP}/art/runtime/generated/asm_support_gen.h
diff --git a/tools/cpp-define-generator/main.cc b/tools/cpp-define-generator/main.cc
new file mode 100644
index 0000000..a1b463a
--- /dev/null
+++ b/tools/cpp-define-generator/main.cc
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <sstream>
+#include <type_traits>
+#include <ios>
+#include <algorithm>
+#include <string>
+
+// Art Offset file dependencies
+#define DEFINE_INCLUDE_DEPENDENCIES
+#include "offsets_all.def"
+
+std::string to_upper(std::string input) {
+  std::transform(input.begin(), input.end(), input.begin(), ::toupper);
+  return input;
+}
+
+template <typename T, typename = void>
+typename std::enable_if<!std::is_signed<T>::value, std::string>::type
+pretty_format(T value) {
+  // Print most values as hex.
+  std::stringstream ss;
+  ss << std::showbase << std::hex << value;
+  return ss.str();
+}
+
+template <typename T, typename = void>
+typename std::enable_if<std::is_signed<T>::value, std::string>::type
+pretty_format(T value) {
+  // Print "signed" values as decimal so that the negativity doesn't get lost.
+  std::stringstream ss;
+
+  // For negative values add a (). Omit it from positive values for conciseness.
+  if (value < 0) {
+    ss << "(";
+  }
+
+  ss << value;
+
+  if (value < 0) {
+    ss << ")";
+  }
+  return ss.str();
+}
+
+template <typename T>
+void cpp_define(std::string name, T value) {
+  std::cout << "#define " << name << " " << pretty_format(value) << std::endl;
+}
+
+template <typename T>
+void emit_check_eq(T value, std::string expr) {
+  std::cout << "DEFINE_CHECK_EQ(" << value << ", (" << expr << "))" << std::endl;
+}
+
+const char *kFileHeader = /* // NOLINT [readability/multiline_string] [5] */ R"L1C3NS3(
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+#define ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+
+// This file has been auto-generated by cpp-define-generator; do not edit directly.
+)L1C3NS3";  // NOLINT [readability/multiline_string] [5]
+
+const char *kFileFooter = /* // NOLINT [readability/multiline_string] [5] */ R"F00T3R(
+#endif  // ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+)F00T3R";  // NOLINT [readability/multiline_string] [5]
+
+#define MACROIZE(holder_type, field_name) to_upper(#holder_type "_" #field_name "_OFFSET")
+
+int main() {
+  std::cout << kFileHeader << std::endl;
+
+  std::string z = "";
+
+  // Print every constant expression to stdout as a #define or a CHECK_EQ
+#define DEFINE_EXPR(macro_name, field_type, expr) \
+  cpp_define(to_upper(#macro_name), static_cast<field_type>(expr)); \
+  emit_check_eq(z + "static_cast<" #field_type ">(" + to_upper(#macro_name) + ")", \
+                "static_cast<" #field_type ">(" #expr ")");
+#include "offsets_all.def"
+
+  std::cout << kFileFooter << std::endl;
+  return 0;
+}
diff --git a/tools/cpp-define-generator/offset_codeitem.def b/tools/cpp-define-generator/offset_codeitem.def
new file mode 100644
index 0000000..e5acd1d
--- /dev/null
+++ b/tools/cpp-define-generator/offset_codeitem.def
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within CodeItem.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include <cstddef>      // offsetof
+#include "dex_file.h"   // art::DexFile
+#endif
+
+#include "common.def"        // DEFINE_OFFSET_EXPR
+
+#define DEFINE_CODEITEM_OFFSET(field_name) \
+  DEFINE_OFFSET_EXPR(CodeItem, field_name, int32_t, offsetof(art::DexFile::CodeItem, field_name ## _))
+
+//                     Field Name
+DEFINE_CODEITEM_OFFSET(insns)
+
+#undef DEFINE_CODEITEM_OFFSET
+#include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpp-define-generator/offset_dexcache.def b/tools/cpp-define-generator/offset_dexcache.def
new file mode 100644
index 0000000..4b9d481
--- /dev/null
+++ b/tools/cpp-define-generator/offset_dexcache.def
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within art::ArtMethod.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "art_method.h"         // art::ArtMethod
+#include "base/enums.h"         // PointerSize
+#include "mirror/dex_cache.h"   // art::DexCache
+#endif
+
+#define DEFINE_ART_METHOD_OFFSET_SIZED(field_name, method_name) \
+  DEFINE_EXPR(ART_METHOD_ ## field_name ## _OFFSET_32, int32_t, art::ArtMethod::method_name##Offset(art::PointerSize::k32).Int32Value()) \
+  DEFINE_EXPR(ART_METHOD_ ## field_name ## _OFFSET_64, int32_t, art::ArtMethod::method_name##Offset(art::PointerSize::k64).Int32Value())
+
+#define DEFINE_ART_METHOD_OFFSET(field_name, method_name) \
+  DEFINE_EXPR(ART_METHOD_ ## field_name ## _OFFSET, int32_t, art::ArtMethod::method_name##Offset().Int32Value())
+
+#define DEFINE_DECLARING_CLASS_OFFSET(field_name, method_name) \
+  DEFINE_EXPR(DECLARING_CLASS_ ## field_name ## _OFFSET, int32_t, art::mirror::Class::method_name##Offset().Int32Value())
+
+//                         New macro suffix          Method Name (of the Offset method)
+DEFINE_ART_METHOD_OFFSET_SIZED(DEX_CACHE_METHODS,    DexCacheResolvedMethods)
+DEFINE_ART_METHOD_OFFSET_SIZED(DEX_CACHE_TYPES,      DexCacheResolvedTypes)
+DEFINE_ART_METHOD_OFFSET_SIZED(JNI,                  EntryPointFromJni)
+DEFINE_ART_METHOD_OFFSET_SIZED(QUICK_CODE,           EntryPointFromQuickCompiledCode)
+DEFINE_ART_METHOD_OFFSET(DECLARING_CLASS,            DeclaringClass)
+DEFINE_DECLARING_CLASS_OFFSET(DEX_CACHE_STRINGS,     DexCacheStrings)
+
+#undef DEFINE_ART_METHOD_OFFSET
+#undef DEFINE_ART_METHOD_OFFSET_32
+#undef DEFINE_DECLARING_CLASS_OFFSET
diff --git a/tools/cpp-define-generator/offset_mirror_object.def b/tools/cpp-define-generator/offset_mirror_object.def
new file mode 100644
index 0000000..9b99634
--- /dev/null
+++ b/tools/cpp-define-generator/offset_mirror_object.def
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within java.lang.Object (mirror::Object).
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "mirror/object.h"         // art::mirror::Object
+#endif
+
+#include "common.def"        // DEFINE_OFFSET_EXPR
+
+#define DEFINE_MIRROR_OBJECT_OFFSET(field_name, method_name) \
+  DEFINE_OFFSET_EXPR(MIRROR_OBJECT, field_name, int32_t, art::mirror::Object::method_name##Offset().Int32Value())
+
+//                          New macro suffix            Method Name (of the Offset method)
+DEFINE_MIRROR_OBJECT_OFFSET(CLASS,                      Class)
+DEFINE_MIRROR_OBJECT_OFFSET(LOCK_WORD,                  Monitor)
+
+#undef DEFINE_MIRROR_OBJECT_OFFSET
+#include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpp-define-generator/offset_runtime.def b/tools/cpp-define-generator/offset_runtime.def
new file mode 100644
index 0000000..17167a0
--- /dev/null
+++ b/tools/cpp-define-generator/offset_runtime.def
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within ShadowFrame.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "runtime.h"         // art::Runtime
+#endif
+
+#include "common.def"        // DEFINE_OFFSET_EXPR
+
+// Note: these callee save methods loads require read barriers.
+
+#define DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(field_name, constant_name) \
+  DEFINE_OFFSET_EXPR(Runtime, field_name ## _METHOD, size_t, art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: constant_name))
+
+                    //     Macro substring       Constant name
+// Offset of field Runtime::callee_save_methods_[kSaveAllCalleeSaves]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_ALL_CALLEE_SAVES, kSaveAllCalleeSaves)
+// Offset of field Runtime::callee_save_methods_[kSaveRefsOnly]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_REFS_ONLY, kSaveRefsOnly)
+// Offset of field Runtime::callee_save_methods_[kSaveRefsAndArgs]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_REFS_AND_ARGS, kSaveRefsAndArgs)
+// Offset of field Runtime::callee_save_methods_[kSaveEverything]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_EVERYTHING, kSaveEverything)
+
+#undef DEFINE_RUNTIME_CALLEE_SAVE_OFFSET
+#include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpp-define-generator/offset_shadow_frame.def b/tools/cpp-define-generator/offset_shadow_frame.def
new file mode 100644
index 0000000..b49a340
--- /dev/null
+++ b/tools/cpp-define-generator/offset_shadow_frame.def
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within ShadowFrame.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "stack.h"         // art::ShadowFrame
+#endif
+
+#include "common.def"        // DEFINE_OFFSET_EXPR
+
+#define DEFINE_SHADOW_FRAME_OFFSET(field_name, method_name) \
+  DEFINE_OFFSET_EXPR(ShadowFrame, field_name, int32_t, art::ShadowFrame::method_name##Offset())
+
+//                         New macro suffix            Method Name (of the Offset method)
+DEFINE_SHADOW_FRAME_OFFSET(LINK,                       Link)
+DEFINE_SHADOW_FRAME_OFFSET(METHOD,                     Method)
+DEFINE_SHADOW_FRAME_OFFSET(RESULT_REGISTER,            ResultRegister)
+DEFINE_SHADOW_FRAME_OFFSET(DEX_PC_PTR,                 DexPCPtr)
+DEFINE_SHADOW_FRAME_OFFSET(CODE_ITEM,                  CodeItem)
+DEFINE_SHADOW_FRAME_OFFSET(LOCK_COUNT_DATA,            LockCountData)
+DEFINE_SHADOW_FRAME_OFFSET(NUMBER_OF_VREGS,            NumberOfVRegs)
+DEFINE_SHADOW_FRAME_OFFSET(DEX_PC,                     DexPC)
+DEFINE_SHADOW_FRAME_OFFSET(CACHED_HOTNESS_COUNTDOWN,   CachedHotnessCountdown)
+DEFINE_SHADOW_FRAME_OFFSET(VREGS,                      VRegs)
+
+#undef DEFINE_SHADOW_FRAME_OFFSET
+#include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpp-define-generator/offset_thread.def b/tools/cpp-define-generator/offset_thread.def
new file mode 100644
index 0000000..6f94d38
--- /dev/null
+++ b/tools/cpp-define-generator/offset_thread.def
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within ShadowFrame.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "base/enums.h"    // PointerSize
+#include "stack.h"         // art::ShadowFrame
+#endif
+
+#include "common.def"        // DEFINE_OFFSET_EXPR
+
+#define DEFINE_THREAD_OFFSET(field_name, method_name) \
+  DEFINE_OFFSET_EXPR(Thread, field_name, int32_t, art::Thread::method_name##Offset<art::kRuntimePointerSize>().Int32Value())
+
+//                   New macro suffix            Method Name (of the Offset method)
+DEFINE_THREAD_OFFSET(FLAGS,                      ThreadFlags)
+DEFINE_THREAD_OFFSET(ID,                         ThinLockId)
+DEFINE_THREAD_OFFSET(IS_GC_MARKING,              IsGcMarking)
+DEFINE_THREAD_OFFSET(CARD_TABLE,                 CardTable)
+
+// TODO: The rest of the offsets
+// are dependent on __SIZEOF_POINTER__
+
+#undef DEFINE_THREAD_OFFSET
+
+#include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpp-define-generator/offsets_all.def b/tools/cpp-define-generator/offsets_all.def
new file mode 100644
index 0000000..13371a1
--- /dev/null
+++ b/tools/cpp-define-generator/offsets_all.def
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Includes every single offset file in art.
+// Useful for processing every single offset together.
+
+// Usage:
+// #define DEFINE_INCLUDE_DEPENDENCIES
+// #include "offsets_all.def"
+// to automatically include each def file's header dependencies.
+//
+// Afterwards,
+// #define DEFINE_EXPR(define_name, field_type, expr) ...
+// #include "offsets_all.def"
+// to process each offset however one wants.
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#define DEFINE_EXPR(define_name, field_type, expr)
+#endif
+
+#if !defined(DEFINE_EXPR)
+#error "Either DEFINE_INCLUDE_DEPENDENCIES or DEFINE_EXPR must be defined"
+#endif
+
+#include "constant_reference.def"
+#include "offset_runtime.def"
+// TODO: rest of THREAD_ offsets (depends on __SIZEOF__POINTER__).
+#include "offset_thread.def"
+// TODO: SHADOW_FRAME depends on __SIZEOF__POINTER__
+// #include "offset_shadow_frame.def"
+#include "offset_codeitem.def"
+// TODO: MIRROR_OBJECT_HEADER_SIZE (depends on #ifdef read barrier)
+// TODO: MIRROR_CLASS offsets (see above)
+#include "offset_mirror_object.def"
+#include "constant_class.def"
+// TODO: MIRROR_*_ARRAY offsets (depends on header size)
+// TODO: MIRROR_STRING offsets (depends on header size)
+#include "offset_dexcache.def"
+#include "constant_dexcache.def"
+#include "constant_heap.def"
+#include "constant_lockword.def"
+#include "constant_globals.def"
+#include "constant_rosalloc.def"
+#include "constant_thread.def"
+#include "constant_jit.def"
+
+// TODO: MIRROR_OBJECT_HEADER_SIZE #ifdef depends on read barriers
+// TODO: Array offsets (depends on MIRROR_OBJECT_HEADER_SIZE)
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#undef DEFINE_EXPR
+#undef DEFINE_INCLUDE_DEPENDENCIES
+#endif
+
+
diff --git a/tools/cpplint.py b/tools/cpplint.py
index 4f063d9..308dd8c 100755
--- a/tools/cpplint.py
+++ b/tools/cpplint.py
@@ -90,6 +90,7 @@
 _USAGE = """
 Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
                    [--counting=total|toplevel|detailed]
+                   [--quiet]
         <file> [file] ...
 
   The style guidelines this tries to follow are those in
@@ -115,6 +116,9 @@
     verbose=#
       Specify a number 0-5 to restrict errors to certain verbosity levels.
 
+    quiet
+      Don't print anything if no errors are found.
+
     filter=-x,+y,...
       Specify a comma-separated list of category-filters to apply: only
       error messages whose category names pass the filters will be printed.
@@ -558,6 +562,9 @@
     self.filters = _DEFAULT_FILTERS[:]
     self.counting = 'total'  # In what way are we counting errors?
     self.errors_by_category = {}  # string to int dict storing error counts
+    # BEGIN android-added
+    self.quiet = False      # global setting.
+    # END android-added
 
     # output format:
     # "emacs" - format that emacs can parse (default)
@@ -568,6 +575,14 @@
     """Sets the output format for errors."""
     self.output_format = output_format
 
+  # BEGIN android-added
+  def SetQuiet(self, level):
+    """Sets the module's quiet setting, and returns the previous setting."""
+    last_quiet = self.quiet
+    self.quiet = level
+    return last_quiet
+  # END android-added
+
   def SetVerboseLevel(self, level):
     """Sets the module's verbosity, and returns the previous setting."""
     last_verbose_level = self.verbose_level
@@ -638,6 +653,17 @@
   _cpplint_state.SetOutputFormat(output_format)
 
 
+# BEGIN android-added
+def _Quiet():
+  """Returns the module's quiet setting."""
+  return _cpplint_state.quiet
+
+
+def _SetQuiet(level):
+  """Sets the module's quiet status, and returns the previous setting."""
+  return _cpplint_state.SetQuiet(level)
+# END android-added
+
 def _VerboseLevel():
   """Returns the module's verbosity setting."""
   return _cpplint_state.verbose_level
@@ -3888,6 +3914,9 @@
   """
 
   _SetVerboseLevel(vlevel)
+# BEGIN android-added
+  old_errors = _cpplint_state.error_count
+# END android-added
 
   try:
     # Support the UNIX convention of using "-" for stdin.  Note that
@@ -3938,8 +3967,11 @@
             'One or more unexpected \\r (^M) found;'
             'better to use only a \\n')
 
-  sys.stderr.write('Done processing %s\n' % filename)
-
+# BEGIN android-changed
+  # sys.stderr.write('Done processing %s\n' % filename)
+  if not _cpplint_state.quiet or old_errors != _cpplint_state.error_count:
+    sys.stderr.write('Done processing %s\n' % filename)
+# END android-changed
 
 def PrintUsage(message):
   """Prints a brief usage string and exits, optionally with an error message.
@@ -3977,6 +4009,9 @@
   try:
     (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
                                                  'stdout', # TODO(enh): added --stdout
+                                                 # BEGIN android-added
+                                                 'quiet',
+                                                 # END android-added
                                                  'counting=',
                                                  'filter=',
                                                  'root='])
@@ -3987,6 +4022,9 @@
   output_format = _OutputFormat()
   output_stream = sys.stderr # TODO(enh): added --stdout
   filters = ''
+  # BEGIN android-added
+  quiet = _Quiet()
+  # END android-added
   counting_style = ''
 
   for (opt, val) in opts:
@@ -3994,6 +4032,10 @@
       PrintUsage(None)
     elif opt == '--stdout': # TODO(enh): added --stdout
       output_stream = sys.stdout # TODO(enh): added --stdout
+    # BEGIN android-added
+    elif opt == '--quiet':
+      quiet = True
+    # END android-added
     elif opt == '--output':
       if not val in ('emacs', 'vs7', 'eclipse'):
         PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
@@ -4019,6 +4061,9 @@
   _SetVerboseLevel(verbosity)
   _SetFilters(filters)
   _SetCountingStyle(counting_style)
+  # BEGIN android-added
+  _SetQuiet(quiet)
+  # END android-added
   sys.stderr = output_stream # TODO(enh): added --stdout
 
   return filenames
@@ -4037,7 +4082,11 @@
   _cpplint_state.ResetErrorCounts()
   for filename in filenames:
     ProcessFile(filename, _cpplint_state.verbose_level)
-  _cpplint_state.PrintErrorCounts()
+  # BEGIN android-changed
+  # _cpplint_state.PrintErrorCounts()
+  if not _cpplint_state.quiet or _cpplint_state.error_count > 0:
+    _cpplint_state.PrintErrorCounts()
+  # END android-changed
 
   sys.exit(_cpplint_state.error_count > 0)
 
diff --git a/tools/dexfuzz/Android.mk b/tools/dexfuzz/Android.mk
index 1580bc3..473f6de 100644
--- a/tools/dexfuzz/Android.mk
+++ b/tools/dexfuzz/Android.mk
@@ -27,14 +27,10 @@
 # --- dexfuzz script ----------------
 include $(CLEAR_VARS)
 LOCAL_IS_HOST_MODULE := true
-LOCAL_MODULE_TAGS := optional
 LOCAL_MODULE_CLASS := EXECUTABLES
 LOCAL_MODULE := dexfuzz
-include $(BUILD_SYSTEM)/base_rules.mk
-$(LOCAL_BUILT_MODULE): $(LOCAL_PATH)/dexfuzz $(ACP)
-	@echo "Copy: $(PRIVATE_MODULE) ($@)"
-	$(copy-file-to-new-target)
-	$(hide) chmod 755 $@
+LOCAL_SRC_FILES := dexfuzz
+include $(BUILD_PREBUILT)
 
 # --- dexfuzz script with core image dependencies ----------------
 fuzzer: $(LOCAL_BUILT_MODULE) $(HOST_CORE_IMG_OUTS)
diff --git a/tools/dexfuzz/src/dexfuzz/executors/Device.java b/tools/dexfuzz/src/dexfuzz/executors/Device.java
index 4a53957..45538fe 100644
--- a/tools/dexfuzz/src/dexfuzz/executors/Device.java
+++ b/tools/dexfuzz/src/dexfuzz/executors/Device.java
@@ -68,7 +68,13 @@
     return envVars.get(key);
   }
 
-  private String getHostCoreImagePath() {
+  private String getHostCoreImagePathWithArch() {
+    // TODO: Using host currently implies x86 (see Options.java), change this when generalized.
+    assert(Options.useArchX86);
+    return androidHostOut + "/framework/x86/core.art";
+  }
+
+  private String getHostCoreImagePathNoArch() {
     return androidHostOut + "/framework/core.art";
   }
 
@@ -80,7 +86,7 @@
     androidHostOut = checkForEnvVar(envVars, "ANDROID_HOST_OUT");
 
     if (Options.executeOnHost) {
-      File coreImage = new File(getHostCoreImagePath());
+      File coreImage = new File(getHostCoreImagePathWithArch());
       if (!coreImage.exists()) {
         Log.errorAndQuit("Host core image not found at " + coreImage.getPath()
             + ". Did you forget to build it?");
@@ -156,7 +162,7 @@
    * Get any extra flags required to execute ART on the host.
    */
   public String getHostExecutionFlags() {
-    return String.format("-Xnorelocate -Ximage:%s", getHostCoreImagePath());
+    return String.format("-Xnorelocate -Ximage:%s", getHostCoreImagePathNoArch());
   }
 
   public String getAndroidHostOut() {
diff --git a/tools/dmtracedump/tracedump.cc b/tools/dmtracedump/tracedump.cc
index f70e2c2..3afee6f 100644
--- a/tools/dmtracedump/tracedump.cc
+++ b/tools/dmtracedump/tracedump.cc
@@ -512,10 +512,10 @@
 void freeDataKeys(DataKeys* pKeys) {
   if (pKeys == nullptr) return;
 
-  free(pKeys->fileData);
-  free(pKeys->threads);
-  free(pKeys->methods);
-  free(pKeys);
+  delete[] pKeys->fileData;
+  delete[] pKeys->threads;
+  delete[] pKeys->methods;
+  delete pKeys;
 }
 
 /*
@@ -822,8 +822,8 @@
 DataKeys* parseKeys(FILE* fp, int32_t verbose) {
   int64_t offset;
   DataKeys* pKeys = new DataKeys();
-  memset(pKeys, 0, sizeof(DataKeys));
   if (pKeys == nullptr) return nullptr;
+  memset(pKeys, 0, sizeof(DataKeys));
 
   /*
    * We load the entire file into memory.  We do this, rather than memory-
@@ -865,9 +865,13 @@
     return nullptr;
   }
 
-  /* Reduce our allocation now that we know where the end of the key section is. */
-  pKeys->fileData = reinterpret_cast<char*>(realloc(pKeys->fileData, offset));
-  pKeys->fileLen = offset;
+  /*
+   * Although it is tempting to reduce our allocation now that we know where the
+   * end of the key section is, there is a pitfall. The method names and
+   * signatures in the method list contain pointers into the fileData area.
+   * Realloc or free will result in corruption.
+   */
+
   /* Leave fp pointing to the beginning of the data section. */
   fseek(fp, offset, SEEK_SET);
 
@@ -2607,7 +2611,7 @@
     if (gOptions.graphFileName != nullptr) {
       createInclusiveProfileGraphNew(dataKeys);
     }
-    free(methods);
+    delete[] methods;
   }
 
   freeDataKeys(dataKeys);
diff --git a/tools/javafuzz/Android.mk b/tools/javafuzz/Android.mk
new file mode 100644
index 0000000..63db57a
--- /dev/null
+++ b/tools/javafuzz/Android.mk
@@ -0,0 +1,25 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Java fuzzer tool.
+
+LOCAL_PATH:= $(call my-dir)
+
+include $(CLEAR_VARS)
+LOCAL_CPP_EXTENSION := cc
+LOCAL_SRC_FILES := javafuzz.cc
+LOCAL_CFLAGS += -O0 -g -Wall
+LOCAL_MODULE_HOST_OS := darwin linux windows
+LOCAL_MODULE := javafuzz
+include $(BUILD_HOST_EXECUTABLE)
diff --git a/tools/javafuzz/README.md b/tools/javafuzz/README.md
new file mode 100644
index 0000000..68fc171
--- /dev/null
+++ b/tools/javafuzz/README.md
@@ -0,0 +1,83 @@
+JavaFuzz
+========
+
+JavaFuzz is a tool for generating random Java programs with the objective
+of fuzz testing the ART infrastructure. Each randomly generated Java program
+can be run under various modes of execution, such as using the interpreter,
+using the optimizing compiler, using an external reference implementation,
+or using various target architectures. Any difference between the outputs
+(**divergence**) may indicate a bug in one of the execution modes.
+
+JavaFuzz can be combined with dexfuzz to get multi-layered fuzz testing.
+
+How to run JavaFuzz
+===================
+
+    javafuzz [-s seed] [-d expr-depth] [-l stmt-length]
+             [-i if-nest] [-n loop-nest]
+
+where
+
+    -s : defines a deterministic random seed
+         (randomized using time by default)
+    -d : defines a fuzzing depth for expressions
+         (higher values yield deeper expressions)
+    -l : defines a fuzzing length for statement lists
+         (higher values yield longer statement sequences)
+    -i : defines a fuzzing nest for if/switch statements
+         (higher values yield deeper nested conditionals)
+    -n : defines a fuzzing nest for for/while/do-while loops
+         (higher values yield deeper nested loops)
+
+The current version of JavaFuzz sends all output to stdout, and uses
+a fixed testing class named Test. So a typical test run looks as follows.
+
+    javafuzz > Test.java
+    jack -cp ${JACK_CLASSPATH} --output-dex . Test.java
+    art -classpath classes.dex Test
+
+How to start the JavaFuzz tests
+===============================
+
+    run_java_fuzz_test.py [--num_tests]
+                          [--device]
+                          [--mode1=mode] [--mode2=mode]
+
+where
+
+    --num_tests : number of tests to run (10000 by default)
+    --device    : target device serial number (passed to adb -s)
+    --mode1     : m1
+    --mode2     : m2, with m1 != m2, and values one of
+      ri   = reference implementation on host (default for m1)
+      hint = Art interpreter on host
+      hopt = Art optimizing on host (default for m2)
+      tint = Art interpreter on target
+      topt = Art optimizing on target
+
+Background
+==========
+
+Although test suites are extremely useful to validate the correctness of a
+system and to ensure that no regressions occur, any test suite is necessarily
+finite in size and scope. Tests typically focus on validating particular
+features by means of code sequences most programmers would expect. Regression
+tests often use slightly less idiomatic code sequences, since they reflect
+problems that were not anticipated originally, but occurred “in the field”.
+Still, any test suite leaves the developer wondering whether undetected bugs
+and flaws still linger in the system.
+
+Over the years, fuzz testing has gained popularity as a testing technique for
+discovering such lingering bugs, including bugs that can bring down a system
+in an unexpected way. Fuzzing refers to feeding a large amount of random data
+as input to a system in an attempt to find bugs or make it crash. Generation-
+based fuzz testing constructs random, but properly formatted input data.
+Mutation-based fuzz testing applies small random changes to existing inputs
+in order to detect shortcomings in a system. Profile-guided or coverage-guided
+fuzzing adds a direction to the way these random changes are applied. Multi-
+layered approaches generate random inputs that are subsequently mutated at
+various stages of execution.
+
+The randomness of fuzz testing implies that the size and scope of testing is no
+longer bounded. Every new run can potentially discover bugs and crashes that were
+hereto undetected.
diff --git a/tools/javafuzz/javafuzz.cc b/tools/javafuzz/javafuzz.cc
new file mode 100644
index 0000000..161ae0a
--- /dev/null
+++ b/tools/javafuzz/javafuzz.cc
@@ -0,0 +1,1108 @@
+/*
+ * Copyright 2016, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <random>
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <time.h>
+
+namespace {
+
+/*
+ * Java operators.
+ */
+
+#define EMIT(x) fputs((x)[random0(sizeof(x)/sizeof(const char*))], out_);
+
+static constexpr const char* kIncDecOps[]   = { "++", "--" };
+static constexpr const char* kIntUnaryOps[] = { "+", "-", "~" };
+static constexpr const char* kFpUnaryOps[]  = { "+", "-" };
+
+static constexpr const char* kBoolBinOps[] = { "&&", "||", "&", "|", "^" };  // few less common
+static constexpr const char* kIntBinOps[]  = { "+", "-", "*", "/", "%",
+                                               ">>", ">>>", "<<", "&", "|", "^" };
+static constexpr const char* kFpBinOps[]   = { "+", "-", "*", "/" };
+
+static constexpr const char* kBoolAssignOps[] = { "=", "&=" , "|=", "^=" };  // few less common
+static constexpr const char* kIntAssignOps[]  = { "=", "+=", "-=", "*=", "/=", "%=",
+                                                  ">>=", ">>>=", "<<=", "&=", "|=", "^=" };
+static constexpr const char* kFpAssignOps[]   = { "=", "+=", "-=", "*=", "/=" };
+
+static constexpr const char* kBoolRelOps[] = { "==", "!=" };
+static constexpr const char* kRelOps[]     = { "==", "!=", ">", ">=", "<", "<=" };
+
+/*
+ * Version of JavaFuzz. Increase this each time changes are made to the program
+ * to preserve the property that a given version of JavaFuzz yields the same
+ * fuzzed Java program for a deterministic random seed.
+ */
+const char* VERSION = "1.1";
+
+static const uint32_t MAX_DIMS[11] = { 0, 1000, 32, 10, 6, 4, 3, 3, 2, 2, 2 };
+
+/**
+ * A class that generates a random Java program that compiles correctly. The program
+ * is generated using rules that generate various programming constructs. Each rule
+ * has a fixed probability to "fire". Running a generated program yields deterministic
+ * output, making it suited to test various modes of execution (e.g an interpreter vs.
+ * an compiler or two different run times) for divergences.
+ *
+ * TODO: Due to the original scope of this project, the generated Java program is heavy
+ *       on loops, arrays, and basic operations; fuzzing other aspects of Java programs,
+ *       like elaborate typing, class hierarchies, and interfaces is still TBD.
+ */
+class JavaFuzz {
+ public:
+  JavaFuzz(FILE* out,
+           uint32_t seed,
+           uint32_t expr_depth,
+           uint32_t stmt_length,
+           uint32_t if_nest,
+           uint32_t loop_nest)
+      : out_(out),
+        fuzz_random_engine_(seed),
+        fuzz_seed_(seed),
+        fuzz_expr_depth_(expr_depth),
+        fuzz_stmt_length_(stmt_length),
+        fuzz_if_nest_(if_nest),
+        fuzz_loop_nest_(loop_nest),
+        return_type_(randomType()),
+        array_type_(randomType()),
+        array_dim_(random1(10)),
+        array_size_(random1(MAX_DIMS[array_dim_])),
+        indentation_(0),
+        expr_depth_(0),
+        stmt_length_(0),
+        if_nest_(0),
+        loop_nest_(0),
+        switch_nest_(0),
+        do_nest_(0),
+        boolean_local_(0),
+        int_local_(0),
+        long_local_(0),
+        float_local_(0),
+        double_local_(0) { }
+
+  ~JavaFuzz() { }
+
+  void emitProgram() {
+    emitHeader();
+    emitTestClassWithMain();
+  }
+
+ private:
+  //
+  // Types.
+  //
+
+  // Current type of each expression during generation.
+  enum Type {
+    kBoolean,
+    kInt,
+    kLong,
+    kFloat,
+    kDouble
+  };
+
+  // Test for an integral type.
+  static bool isInteger(Type tp) {
+    return tp == kInt || tp == kLong;
+  }
+
+  // Test for a floating-point type.
+  static bool isFP(Type tp) {
+    return tp == kFloat || tp == kDouble;
+  }
+
+  // Emit type.
+  void emitType(Type tp) const {
+    switch (tp) {
+      case kBoolean: fputs("boolean", out_); break;
+      case kInt:     fputs("int",     out_); break;
+      case kLong:    fputs("long",    out_); break;
+      case kFloat:   fputs("float",   out_); break;
+      case kDouble:  fputs("double",  out_); break;
+    }
+  }
+
+  // Emit type class.
+  void emitTypeClass(Type tp) const {
+    switch (tp) {
+      case kBoolean: fputs("Boolean", out_); break;
+      case kInt:     fputs("Integer", out_); break;
+      case kLong:    fputs("Long",    out_); break;
+      case kFloat:   fputs("Float",   out_); break;
+      case kDouble:  fputs("Double",  out_); break;
+    }
+  }
+
+  // Return a random type.
+  Type randomType() {
+    switch (random1(5)) {
+      case 1:  return kBoolean;
+      case 2:  return kInt;
+      case 3:  return kLong;
+      case 4:  return kFloat;
+      default: return kDouble;
+    }
+  }
+
+  //
+  // Expressions.
+  //
+
+  // Emit an unary operator (same type in-out).
+  void emitUnaryOp(Type tp) {
+    if (tp == kBoolean) {
+      fputc('!', out_);
+    } else if (isInteger(tp)) {
+      EMIT(kIntUnaryOps);
+    } else {  // isFP(tp)
+      EMIT(kFpUnaryOps);
+    }
+  }
+
+  // Emit a pre/post-increment/decrement operator (same type in-out).
+  void emitIncDecOp(Type tp) {
+    if (tp == kBoolean) {
+      // Not applicable, just leave "as is".
+    } else {  // isInteger(tp) || isFP(tp)
+      EMIT(kIncDecOps);
+    }
+  }
+
+  // Emit a binary operator (same type in-out).
+  void emitBinaryOp(Type tp) {
+    if (tp == kBoolean) {
+      EMIT(kBoolBinOps);
+    } else if (isInteger(tp)) {
+      EMIT(kIntBinOps);
+    } else {  // isFP(tp)
+      EMIT(kFpBinOps);
+    }
+  }
+
+  // Emit an assignment operator (same type in-out).
+  void emitAssignmentOp(Type tp) {
+    if (tp == kBoolean) {
+      EMIT(kBoolAssignOps);
+    } else if (isInteger(tp)) {
+      EMIT(kIntAssignOps);
+    } else {  // isFP(tp)
+      EMIT(kFpAssignOps);
+    }
+  }
+
+  // Emit a relational operator (one type in, boolean out).
+  void emitRelationalOp(Type tp) {
+    if (tp == kBoolean) {
+      EMIT(kBoolRelOps);
+    } else {  // isInteger(tp) || isFP(tp)
+      EMIT(kRelOps);
+    }
+  }
+
+  // Emit a type conversion operator sequence (out type given, new suitable in type picked).
+  Type emitTypeConversionOp(Type tp) {
+    if (tp == kInt) {
+      switch (random1(5)) {
+        case 1: fputs("(int)", out_); return kLong;
+        case 2: fputs("(int)", out_); return kFloat;
+        case 3: fputs("(int)", out_); return kDouble;
+        // Narrowing-widening.
+        case 4: fputs("(int)(byte)(int)",  out_); return kInt;
+        case 5: fputs("(int)(short)(int)", out_); return kInt;
+      }
+    } else if (tp == kLong) {
+      switch (random1(6)) {
+        case 1: /* implicit */         return kInt;
+        case 2: fputs("(long)", out_); return kFloat;
+        case 3: fputs("(long)", out_); return kDouble;
+        // Narrowing-widening.
+        case 4: fputs("(long)(byte)(long)",  out_); return kLong;
+        case 5: fputs("(long)(short)(long)", out_); return kLong;
+        case 6: fputs("(long)(int)(long)",   out_); return kLong;
+      }
+    } else if (tp == kFloat) {
+      switch (random1(4)) {
+        case 1: fputs("(float)", out_); return kInt;
+        case 2: fputs("(float)", out_); return kLong;
+        case 3: fputs("(float)", out_); return kDouble;
+        // Narrowing-widening.
+        case 4: fputs("(float)(int)(float)", out_); return kFloat;
+      }
+    } else if (tp == kDouble) {
+      switch (random1(5)) {
+        case 1: fputs("(double)", out_); return kInt;
+        case 2: fputs("(double)", out_); return kLong;
+        case 3: fputs("(double)", out_); return kFloat;
+        // Narrowing-widening.
+        case 4: fputs("(double)(int)(double)",   out_); return kDouble;
+        case 5: fputs("(double)(float)(double)", out_); return kDouble;
+      }
+    }
+    return tp;  // nothing suitable, just keep type
+  }
+
+  // Emit a type conversion (out type given, new suitable in type picked).
+  void emitTypeConversion(Type tp) {
+    if (tp == kBoolean) {
+      Type tp = randomType();
+      emitExpression(tp);
+      fputc(' ', out_);
+      emitRelationalOp(tp);
+      fputc(' ', out_);
+      emitExpression(tp);
+    } else {
+      tp = emitTypeConversionOp(tp);
+      fputc(' ', out_);
+      emitExpression(tp);
+    }
+  }
+
+  // Emit an unary intrinsic (out type given, new suitable in type picked).
+  Type emitIntrinsic1(Type tp) {
+    if (tp == kBoolean) {
+      switch (random1(6)) {
+        case 1: fputs("Float.isNaN",       out_); return kFloat;
+        case 2: fputs("Float.isFinite",    out_); return kFloat;
+        case 3: fputs("Float.isInfinite",  out_); return kFloat;
+        case 4: fputs("Double.isNaN",      out_); return kDouble;
+        case 5: fputs("Double.isFinite",   out_); return kDouble;
+        case 6: fputs("Double.isInfinite", out_); return kDouble;
+      }
+    } else if (isInteger(tp)) {
+      const char* prefix = tp == kLong ? "Long" : "Integer";
+      switch (random1(13)) {
+        case 1: fprintf(out_, "%s.highestOneBit",         prefix); break;
+        case 2: fprintf(out_, "%s.lowestOneBit",          prefix); break;
+        case 3: fprintf(out_, "%s.numberOfLeadingZeros",  prefix); break;
+        case 4: fprintf(out_, "%s.numberOfTrailingZeros", prefix); break;
+        case 5: fprintf(out_, "%s.bitCount",              prefix); break;
+        case 6: fprintf(out_, "%s.signum",                prefix); break;
+        case 7: fprintf(out_, "%s.reverse",               prefix); break;
+        case 8: fprintf(out_, "%s.reverseBytes",          prefix); break;
+        case 9:  fputs("Math.incrementExact", out_); break;
+        case 10: fputs("Math.decrementExact", out_); break;
+        case 11: fputs("Math.negateExact",    out_); break;
+        case 12: fputs("Math.abs",            out_); break;
+        case 13: fputs("Math.round", out_);
+                 return tp == kLong ? kDouble : kFloat;
+      }
+    } else {  // isFP(tp)
+      switch (random1(6)) {
+        case 1: fputs("Math.abs",      out_); break;
+        case 2: fputs("Math.ulp",      out_); break;
+        case 3: fputs("Math.signum",   out_); break;
+        case 4: fputs("Math.nextUp",   out_); break;
+        case 5: fputs("Math.nextDown", out_); break;
+        case 6: if (tp == kDouble) {
+                  fputs("Double.longBitsToDouble", out_);
+                  return kLong;
+                } else {
+                  fputs("Float.intBitsToFloat", out_);
+                  return kInt;
+                }
+      }
+    }
+    return tp;  // same type in-out
+  }
+
+  // Emit a binary intrinsic (out type given, new suitable in type picked).
+  Type emitIntrinsic2(Type tp) {
+    if (tp == kBoolean) {
+      switch (random1(3)) {
+        case 1: fputs("Boolean.logicalAnd", out_); break;
+        case 2: fputs("Boolean.logicalOr",  out_); break;
+        case 3: fputs("Boolean.logicalXor", out_); break;
+      }
+    } else if (isInteger(tp)) {
+      const char* prefix = tp == kLong ? "Long" : "Integer";
+      switch (random1(11)) {
+        case 1: fprintf(out_, "%s.compare", prefix); break;
+        case 2: fprintf(out_, "%s.sum",     prefix); break;
+        case 3: fprintf(out_, "%s.min",     prefix); break;
+        case 4: fprintf(out_, "%s.max",     prefix); break;
+        case 5:  fputs("Math.min",           out_); break;
+        case 6:  fputs("Math.max",           out_); break;
+        case 7:  fputs("Math.floorDiv",      out_); break;
+        case 8:  fputs("Math.floorMod",      out_); break;
+        case 9:  fputs("Math.addExact",      out_); break;
+        case 10: fputs("Math.subtractExact", out_); break;
+        case 11: fputs("Math.multiplyExact", out_); break;
+      }
+    } else {  // isFP(tp)
+      const char* prefix = tp == kDouble ? "Double" : "Float";
+      switch (random1(5)) {
+        case 1: fprintf(out_, "%s.sum", prefix); break;
+        case 2: fprintf(out_, "%s.min", prefix); break;
+        case 3: fprintf(out_, "%s.max", prefix); break;
+        case 4: fputs("Math.min", out_); break;
+        case 5: fputs("Math.max", out_); break;
+      }
+    }
+    return tp;  // same type in-out
+  }
+
+  // Emit an intrinsic (out type given, new suitable in type picked).
+  void emitIntrinsic(Type tp) {
+    if (random1(2) == 1) {
+      tp = emitIntrinsic1(tp);
+      fputc('(', out_);
+      emitExpression(tp);
+      fputc(')', out_);
+    } else {
+      tp = emitIntrinsic2(tp);
+      fputc('(', out_);
+      emitExpression(tp);
+      fputs(", ", out_);
+      emitExpression(tp);
+      fputc(')', out_);
+    }
+  }
+
+  // Emit unboxing boxed object.
+  void emitUnbox(Type tp) {
+    fputc('(', out_);
+    emitType(tp);
+    fputs(") new ", out_);
+    emitTypeClass(tp);
+    fputc('(', out_);
+    emitExpression(tp);
+    fputc(')', out_);
+  }
+
+  // Emit miscellaneous constructs.
+  void emitMisc(Type tp) {
+    if (tp == kBoolean) {
+      fputs("this instanceof Test", out_);
+    } else if (isInteger(tp)) {
+      const char* prefix = tp == kLong ? "Long" : "Integer";
+      switch (random1(2)) {
+        case 1: fprintf(out_, "%s.MIN_VALUE", prefix); break;
+        case 2: fprintf(out_, "%s.MAX_VALUE", prefix); break;
+      }
+    } else {  // isFP(tp)
+      const char* prefix = tp == kDouble ? "Double" : "Float";
+      switch (random1(6)) {
+        case 1: fprintf(out_, "%s.MIN_NORMAL", prefix);        break;
+        case 2: fprintf(out_, "%s.MIN_VALUE", prefix);         break;
+        case 3: fprintf(out_, "%s.MAX_VALUE", prefix);         break;
+        case 4: fprintf(out_, "%s.POSITIVE_INFINITY", prefix); break;
+        case 5: fprintf(out_, "%s.NEGATIVE_INFINITY", prefix); break;
+        case 6: fprintf(out_, "%s.NaN", prefix);               break;
+      }
+    }
+  }
+
+  // Adjust local of given type and return adjusted value.
+  uint32_t adjustLocal(Type tp, int32_t a) {
+    switch (tp) {
+      case kBoolean: boolean_local_ += a; return boolean_local_;
+      case kInt:     int_local_     += a; return int_local_;
+      case kLong:    long_local_    += a; return long_local_;
+      case kFloat:   float_local_   += a; return float_local_;
+      default:       double_local_  += a; return double_local_;
+    }
+  }
+
+  // Emit an expression that is a strict upper bound for an array index.
+  void emitUpperBound() {
+    if (random1(8) == 1) {
+      fputs("mArray.length", out_);
+    } else if (random1(8) == 1) {
+      fprintf(out_, "%u", random1(array_size_));  // random in range
+    } else {
+      fprintf(out_, "%u", array_size_);
+    }
+  }
+
+  // Emit an array index, usually within proper range.
+  void emitArrayIndex() {
+    if (loop_nest_ > 0 && random1(2) == 1) {
+      fprintf(out_, "i%u", random0(loop_nest_));
+    } else if (random1(8) == 1) {
+      fputs("mArray.length - 1", out_);
+    } else {
+      fprintf(out_, "%u", random0(array_size_));  // random in range
+    }
+    // Introduce potential off by one errors with low probability.
+    if (random1(100) == 1) {
+      if (random1(2) == 1) {
+        fputs(" - 1", out_);
+      } else {
+        fputs(" + 1", out_);
+      }
+    }
+  }
+
+  // Emit a literal.
+  void emitLiteral(Type tp) {
+    switch (tp) {
+      case kBoolean: fputs(random1(2) == 1 ? "true" : "false", out_); break;
+      case kInt:     fprintf(out_, "%d",    random()); break;
+      case kLong:    fprintf(out_, "%dL",   random()); break;
+      case kFloat:   fprintf(out_, "%d.0f", random()); break;
+      case kDouble:  fprintf(out_, "%d.0",  random()); break;
+    }
+  }
+
+  // Emit array variable, if available.
+  bool emitArrayVariable(Type tp) {
+    if (tp == array_type_) {
+      fputs("mArray", out_);
+      for (uint32_t i = 0; i < array_dim_; i++) {
+        fputc('[', out_);
+        emitArrayIndex();
+        fputc(']', out_);
+      }
+      return true;
+    }
+    return false;
+  }
+
+  // Emit a local variable, if available.
+  bool emitLocalVariable(Type tp) {
+    uint32_t locals = adjustLocal(tp, 0);
+    if (locals > 0) {
+      uint32_t local = random0(locals);
+      switch (tp) {
+        case kBoolean: fprintf(out_, "lZ%u", local); break;
+        case kInt:     fprintf(out_, "lI%u", local); break;
+        case kLong:    fprintf(out_, "lJ%u", local); break;
+        case kFloat:   fprintf(out_, "lF%u", local); break;
+        case kDouble:  fprintf(out_, "lD%u", local); break;
+      }
+      return true;
+    }
+    return false;
+  }
+
+  // Emit a field variable.
+  void emitFieldVariable(Type tp) {
+    switch (tp) {
+      case kBoolean:fputs("mZ", out_); break;
+      case kInt:    fputs("mI", out_); break;
+      case kLong:   fputs("mJ", out_); break;
+      case kFloat:  fputs("mF", out_); break;
+      case kDouble: fputs("mD", out_); break;
+    }
+  }
+
+  // Emit a variable.
+  void emitVariable(Type tp) {
+    switch (random1(4)) {
+      case 1:
+        if (emitArrayVariable(tp))
+          return;
+        // FALL-THROUGH
+      case 2:
+        if (emitLocalVariable(tp))
+          return;
+        // FALL-THROUGH
+      default:
+        emitFieldVariable(tp);
+        break;
+    }
+  }
+
+  // Emit an expression.
+  void emitExpression(Type tp) {
+    // Continuing expression becomes less likely as the depth grows.
+    if (random1(expr_depth_ + 1) > fuzz_expr_depth_) {
+      if (random1(2) == 1) {
+        emitLiteral(tp);
+      } else {
+        emitVariable(tp);
+      }
+      return;
+    }
+
+    expr_depth_++;
+
+    fputc('(', out_);
+    switch (random1(12)) {  // favor binary operations
+      case 1:
+        // Unary operator: ~ x
+        emitUnaryOp(tp);
+        fputc(' ', out_);
+        emitExpression(tp);
+        break;
+      case 2:
+        // Pre-increment: ++x
+        emitIncDecOp(tp);
+        emitVariable(tp);
+        break;
+      case 3:
+        // Post-increment: x++
+        emitVariable(tp);
+        emitIncDecOp(tp);
+        break;
+      case 4:
+        // Ternary operator: b ? x : y
+        emitExpression(kBoolean);
+        fputs(" ? ", out_);
+        emitExpression(tp);
+        fputs(" : ", out_);
+        emitExpression(tp);
+        break;
+      case 5:
+        // Type conversion: (float) x
+        emitTypeConversion(tp);
+        break;
+      case 6:
+        // Intrinsic: foo(x)
+        emitIntrinsic(tp);
+        break;
+      case 7:
+        // Emit unboxing boxed value: (int) Integer(x)
+        emitUnbox(tp);
+        break;
+      case 8:
+        // Miscellaneous constructs: a.length
+        emitMisc(tp);
+        break;
+      default:
+        // Binary operator: x + y
+        emitExpression(tp);
+        fputc(' ', out_);
+        emitBinaryOp(tp);
+        fputc(' ', out_);
+        emitExpression(tp);
+        break;
+    }
+    fputc(')', out_);
+
+    --expr_depth_;
+  }
+
+  //
+  // Statements.
+  //
+
+  // Emit current indentation.
+  void emitIndentation() const {
+    for (uint32_t i = 0; i < indentation_; i++) {
+      fputc(' ', out_);
+    }
+  }
+
+  // Emit a return statement.
+  bool emitReturn(bool mustEmit) {
+    // Only emit when we must, or with low probability inside ifs/loops,
+    // but outside do-while to avoid confusing the may follow status.
+    if (mustEmit || ((if_nest_ + loop_nest_) > 0 && do_nest_ == 0 && random1(10) == 1)) {
+      fputs("return ", out_);
+      emitExpression(return_type_);
+      fputs(";\n", out_);
+      return false;
+    }
+    // Fall back to assignment.
+    return emitAssignment();
+  }
+
+  // Emit a continue statement.
+  bool emitContinue() {
+    // Only emit with low probability inside loops.
+    if (loop_nest_ > 0 && random1(10) == 1) {
+      fputs("continue;\n", out_);
+      return false;
+    }
+    // Fall back to assignment.
+    return emitAssignment();
+  }
+
+  // Emit a break statement.
+  bool emitBreak() {
+    // Only emit with low probability inside loops, but outside switches
+    // to avoid confusing the may follow status.
+    if (loop_nest_ > 0 && switch_nest_ == 0 && random1(10) == 1) {
+      fputs("break;\n", out_);
+      return false;
+    }
+    // Fall back to assignment.
+    return emitAssignment();
+  }
+
+  // Emit a new scope with a local variable declaration statement.
+  bool emitScope() {
+    Type tp = randomType();
+    fputs("{\n", out_);
+    indentation_ += 2;
+    emitIndentation();
+    emitType(tp);
+    switch (tp) {
+      case kBoolean: fprintf(out_, " lZ%u = ", boolean_local_); break;
+      case kInt:     fprintf(out_, " lI%u = ", int_local_);     break;
+      case kLong:    fprintf(out_, " lJ%u = ", long_local_);    break;
+      case kFloat:   fprintf(out_, " lF%u = ", float_local_);   break;
+      case kDouble:  fprintf(out_, " lD%u = ", double_local_);  break;
+    }
+    emitExpression(tp);
+    fputs(";\n", out_);
+
+    adjustLocal(tp, 1);  // local now visible
+
+    bool mayFollow = emitStatementList();
+
+    adjustLocal(tp, -1);  // local no longer visible
+
+    indentation_ -= 2;
+    emitIndentation();
+    fputs("}\n", out_);
+    return mayFollow;
+  }
+
+  // Emit a for loop.
+  bool emitForLoop() {
+    // Continuing loop nest becomes less likely as the depth grows.
+    if (random1(loop_nest_ + 1) > fuzz_loop_nest_) {
+      return emitAssignment();  // fall back
+    }
+
+    bool goesUp = random1(2) == 1;
+    fprintf(out_, "for (int i%u = ", loop_nest_);
+    if (goesUp) {
+      fprintf(out_, "0; i%u < ", loop_nest_);
+      emitUpperBound();
+      fprintf(out_, "; i%u++) {\n", loop_nest_);
+    } else {
+      emitUpperBound();
+      fprintf(out_, " - 1; i%d >= 0", loop_nest_);
+      fprintf(out_, "; i%d--) {\n", loop_nest_);
+    }
+
+    ++loop_nest_;  // now in loop
+
+    indentation_ += 2;
+    emitStatementList();
+
+    --loop_nest_;  // no longer in loop
+
+    indentation_ -= 2;
+    emitIndentation();
+    fprintf(out_, "}\n");
+    return true;  // loop-body does not block flow
+  }
+
+  // Emit while or do-while loop.
+  bool emitDoLoop() {
+    // Continuing loop nest becomes less likely as the depth grows.
+    if (random1(loop_nest_ + 1) > fuzz_loop_nest_) {
+      return emitAssignment();  // fall back
+    }
+
+    // TODO: remove this
+    // The jack bug b/28862040 prevents generating while/do-while loops because otherwise
+    // we get dozens of reports on the same issue per nightly/ run.
+    if (true) {
+      return emitAssignment();
+    }
+
+    bool isWhile = random1(2) == 1;
+    fputs("{\n", out_);
+    indentation_ += 2;
+    emitIndentation();
+    fprintf(out_, "int i%u = %d;", loop_nest_, isWhile ? -1 : 0);
+    emitIndentation();
+    if (isWhile) {
+      fprintf(out_, "while (++i%u < ", loop_nest_);
+      emitUpperBound();
+      fputs(") {\n", out_);
+    } else {
+      fputs("do {\n", out_);
+      do_nest_++;
+    }
+
+    ++loop_nest_;  // now in loop
+
+    indentation_ += 2;
+    emitStatementList();
+
+    --loop_nest_;  // no longer in loop
+
+    indentation_ -= 2;
+    emitIndentation();
+    if (isWhile) {
+      fputs("}\n", out_);
+    } else {
+      fprintf(out_, "} while (++i%u < ", loop_nest_);
+      emitUpperBound();
+      fputs(");\n", out_);
+      --do_nest_;
+    }
+    indentation_ -= 2;
+    emitIndentation();
+    fputs("}\n", out_);
+    return true;  // loop-body does not block flow
+  }
+
+  // Emit an if statement.
+  bool emitIfStmt() {
+    // Continuing if nest becomes less likely as the depth grows.
+    if (random1(if_nest_ + 1) > fuzz_if_nest_) {
+      return emitAssignment();  // fall back
+    }
+
+    fputs("if (", out_);
+    emitExpression(kBoolean);
+    fputs(") {\n", out_);
+
+    ++if_nest_;  // now in if
+
+    indentation_ += 2;
+    bool mayFollowTrue = emitStatementList();
+    indentation_ -= 2;
+    emitIndentation();
+    fprintf(out_, "} else {\n");
+    indentation_ += 2;
+    bool mayFollowFalse = emitStatementList();
+
+    --if_nest_;  // no longer in if
+
+    indentation_ -= 2;
+    emitIndentation();
+    fprintf(out_, "}\n");
+    return mayFollowTrue || mayFollowFalse;
+  }
+
+  // Emit a switch statement.
+  bool emitSwitch() {
+    // Continuing if nest becomes less likely as the depth grows.
+    if (random1(if_nest_ + 1) > fuzz_if_nest_) {
+      return emitAssignment();  // fall back
+    }
+
+    bool mayFollow = false;
+    fputs("switch (", out_);
+    emitArrayIndex();  // restrict its range
+    fputs(") {\n", out_);
+
+    ++if_nest_;
+    ++switch_nest_;  // now in switch
+
+    indentation_ += 2;
+    for (uint32_t i = 0; i < 2; i++) {
+      emitIndentation();
+      if (i == 0) {
+        fprintf(out_, "case %u: {\n", random0(array_size_));
+      } else {
+        fprintf(out_, "default: {\n");
+      }
+      indentation_ += 2;
+      if (emitStatementList()) {
+        // Must end with break.
+        emitIndentation();
+        fputs("break;\n", out_);
+        mayFollow = true;
+      }
+      indentation_ -= 2;
+      emitIndentation();
+      fputs("}\n", out_);
+    }
+
+    --if_nest_;
+    --switch_nest_;  // no longer in switch
+
+    indentation_ -= 2;
+    emitIndentation();
+    fprintf(out_, "}\n");
+    return mayFollow;
+  }
+
+  // Emit an assignment statement.
+  bool emitAssignment() {
+    Type tp = randomType();
+    emitVariable(tp);
+    fputc(' ', out_);
+    emitAssignmentOp(tp);
+    fputc(' ', out_);
+    emitExpression(tp);
+    fputs(";\n", out_);
+    return true;
+  }
+
+  // Emit a single statement. Returns true if statements may follow.
+  bool emitStatement() {
+    switch (random1(16)) {  // favor assignments
+      case 1:  return emitReturn(false); break;
+      case 2:  return emitContinue();    break;
+      case 3:  return emitBreak();       break;
+      case 4:  return emitScope();       break;
+      case 5:  return emitForLoop();     break;
+      case 6:  return emitDoLoop();      break;
+      case 7:  return emitIfStmt();      break;
+      case 8:  return emitSwitch();      break;
+      default: return emitAssignment();  break;
+    }
+  }
+
+  // Emit a statement list. Returns true if statements may follow.
+  bool emitStatementList() {
+    while (stmt_length_ < 1000) {  // avoid run-away
+      stmt_length_++;
+      emitIndentation();
+      if (!emitStatement()) {
+        return false;  // rest would be dead code
+      }
+      // Continuing this list becomes less likely as the total statement list grows.
+      if (random1(stmt_length_) > fuzz_stmt_length_) {
+        break;
+      }
+    }
+    return true;
+  }
+
+  // Emit field declarations.
+  void emitFieldDecls() {
+    fputs("  private boolean mZ = false;\n", out_);
+    fputs("  private int     mI = 0;\n", out_);
+    fputs("  private long    mJ = 0;\n", out_);
+    fputs("  private float   mF = 0;\n", out_);
+    fputs("  private double  mD = 0;\n\n", out_);
+  }
+
+  // Emit array declaration.
+  void emitArrayDecl() {
+    fputs("  private ", out_);
+    emitType(array_type_);
+    for (uint32_t i = 0; i < array_dim_; i++) {
+      fputs("[]", out_);
+    }
+    fputs(" mArray = new ", out_);
+    emitType(array_type_);
+    for (uint32_t i = 0; i < array_dim_; i++) {
+      fprintf(out_, "[%d]", array_size_);
+    }
+    fputs(";\n\n", out_);
+  }
+
+  // Emit test constructor.
+  void emitTestConstructor() {
+    fputs("  private Test() {\n", out_);
+    indentation_ += 2;
+    emitIndentation();
+    emitType(array_type_);
+    fputs(" a = ", out_);
+    emitLiteral(array_type_);
+    fputs(";\n", out_);
+    for (uint32_t i = 0; i < array_dim_; i++) {
+      emitIndentation();
+      fprintf(out_, "for (int i%u = 0; i%u < %u; i%u++) {\n", i, i, array_size_, i);
+      indentation_ += 2;
+    }
+    emitIndentation();
+    fputs("mArray", out_);
+    for (uint32_t i = 0; i < array_dim_; i++) {
+      fprintf(out_, "[i%u]", i);
+    }
+    fputs(" = a;\n", out_);
+    emitIndentation();
+    if (array_type_ == kBoolean) {
+      fputs("a = !a;\n", out_);
+    } else {
+      fputs("a++;\n", out_);
+    }
+    for (uint32_t i = 0; i < array_dim_; i++) {
+      indentation_ -= 2;
+      emitIndentation();
+      fputs("}\n", out_);
+    }
+    indentation_ -= 2;
+    fputs("  }\n\n", out_);
+  }
+
+  // Emit test method.
+  void emitTestMethod() {
+    fputs("  private ", out_);
+    emitType(return_type_);
+    fputs(" testMethod() {\n", out_);
+    indentation_ += 2;
+    if (emitStatementList()) {
+      // Must end with return.
+      emitIndentation();
+      emitReturn(true);
+    }
+    indentation_ -= 2;
+    fputs("  }\n\n", out_);
+  }
+
+  // Emit main method driver.
+  void emitMainMethod() {
+    fputs("  public static void main(String[] args) {\n", out_);
+    indentation_ += 2;
+    fputs("    Test t = new Test();\n    ", out_);
+    emitType(return_type_);
+    fputs(" r = ", out_);
+    emitLiteral(return_type_);
+    fputs(";\n", out_);
+    fputs("    try {\n", out_);
+    fputs("      r = t.testMethod();\n", out_);
+    fputs("    } catch (Exception e) {\n", out_);
+    fputs("      // Arithmetic, null pointer, index out of bounds, etc.\n", out_);
+    fputs("      System.out.println(\"An exception was caught.\");\n", out_);
+    fputs("    }\n", out_);
+    fputs("    System.out.println(\"r  = \" + r);\n",    out_);
+    fputs("    System.out.println(\"mZ = \" + t.mZ);\n", out_);
+    fputs("    System.out.println(\"mI = \" + t.mI);\n", out_);
+    fputs("    System.out.println(\"mJ = \" + t.mJ);\n", out_);
+    fputs("    System.out.println(\"mF = \" + t.mF);\n", out_);
+    fputs("    System.out.println(\"mD = \" + t.mD);\n", out_);
+    fputs("    System.out.println(\"mArray = \" + ", out_);
+    if (array_dim_ == 1) {
+      fputs("Arrays.toString(t.mArray)", out_);
+    } else {
+      fputs("Arrays.deepToString(t.mArray)", out_);
+    }
+    fputs(");\n", out_);
+    indentation_ -= 2;
+    fputs("  }\n", out_);
+  }
+
+  // Emit program header. Emit command line options in the comments.
+  void emitHeader() {
+    fputs("\n/**\n * AOSP Java Fuzz Tester.\n", out_);
+    fputs(" * Automatically generated Java program.\n", out_);
+    fprintf(out_,
+            " * javafuzz -s %u -d %u -l %u -i %u -n %u (version %s)\n */\n\n",
+            fuzz_seed_,
+            fuzz_expr_depth_,
+            fuzz_stmt_length_,
+            fuzz_if_nest_,
+            fuzz_loop_nest_,
+            VERSION);
+    fputs("import java.util.Arrays;\n\n", out_);
+  }
+
+  // Emit single test class with main driver.
+  void emitTestClassWithMain() {
+    fputs("public class Test {\n\n", out_);
+    indentation_ += 2;
+    emitFieldDecls();
+    emitArrayDecl();
+    emitTestConstructor();
+    emitTestMethod();
+    emitMainMethod();
+    indentation_ -= 2;
+    fputs("}\n\n", out_);
+  }
+
+  //
+  // Random integers.
+  //
+
+  // Return random integer.
+  int32_t random() {
+    return fuzz_random_engine_();
+  }
+
+  // Return random integer in range [0,max).
+  uint32_t random0(uint32_t max) {
+    std::uniform_int_distribution<uint32_t> gen(0, max - 1);
+    return gen(fuzz_random_engine_);
+  }
+
+  // Return random integer in range [1,max].
+  uint32_t random1(uint32_t max) {
+    std::uniform_int_distribution<uint32_t> gen(1, max);
+    return gen(fuzz_random_engine_);
+  }
+
+  // Fuzzing parameters.
+  FILE* out_;
+  std::mt19937 fuzz_random_engine_;
+  const uint32_t fuzz_seed_;
+  const uint32_t fuzz_expr_depth_;
+  const uint32_t fuzz_stmt_length_;
+  const uint32_t fuzz_if_nest_;
+  const uint32_t fuzz_loop_nest_;
+
+  // Return and array setup.
+  const Type return_type_;
+  const Type array_type_;
+  const uint32_t array_dim_;
+  const uint32_t array_size_;
+
+  // Current context.
+  uint32_t indentation_;
+  uint32_t expr_depth_;
+  uint32_t stmt_length_;
+  uint32_t if_nest_;
+  uint32_t loop_nest_;
+  uint32_t switch_nest_;
+  uint32_t do_nest_;
+  uint32_t boolean_local_;
+  uint32_t int_local_;
+  uint32_t long_local_;
+  uint32_t float_local_;
+  uint32_t double_local_;
+};
+
+}  // anonymous namespace
+
+int32_t main(int32_t argc, char** argv) {
+  // Defaults.
+  uint32_t seed = time(NULL);
+  uint32_t expr_depth = 1;
+  uint32_t stmt_length = 8;
+  uint32_t if_nest = 2;
+  uint32_t loop_nest = 3;
+
+  // Parse options.
+  while (1) {
+    int32_t option = getopt(argc, argv, "s:d:l:i:n:h");
+    if (option < 0) {
+      break;  // done
+    }
+    switch (option) {
+      case 's':
+        seed = strtoul(optarg, nullptr, 0);  // deterministic seed
+        break;
+      case 'd':
+        expr_depth = strtoul(optarg, nullptr, 0);
+        break;
+      case 'l':
+        stmt_length = strtoul(optarg, nullptr, 0);
+        break;
+      case 'i':
+        if_nest = strtoul(optarg, nullptr, 0);
+        break;
+      case 'n':
+        loop_nest = strtoul(optarg, nullptr, 0);
+        break;
+      case 'h':
+      default:
+        fprintf(stderr,
+                "usage: %s [-s seed] "
+                "[-d expr-depth] [-l stmt-length] "
+                "[-i if-nest] [-n loop-nest] [-h]\n",
+                argv[0]);
+        return 1;
+    }
+  }
+
+  // Seed global random generator.
+  srand(seed);
+
+  // Generate fuzzed Java program.
+  JavaFuzz fuzz(stdout, seed, expr_depth, stmt_length, if_nest, loop_nest);
+  fuzz.emitProgram();
+  return 0;
+}
diff --git a/tools/javafuzz/run_java_fuzz_test.py b/tools/javafuzz/run_java_fuzz_test.py
new file mode 100755
index 0000000..5f527b8
--- /dev/null
+++ b/tools/javafuzz/run_java_fuzz_test.py
@@ -0,0 +1,416 @@
+#!/usr/bin/env python2
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+import argparse
+import subprocess
+import sys
+import os
+
+from tempfile import mkdtemp
+from threading import Timer
+
+# Normalized return codes.
+EXIT_SUCCESS = 0
+EXIT_TIMEOUT = 1
+EXIT_NOTCOMPILED = 2
+EXIT_NOTRUN = 3
+
+#
+# Utility methods.
+#
+
+def RunCommand(cmd, args, out, err, timeout = 5):
+  """Executes a command, and returns its return code.
+
+  Args:
+    cmd: string, a command to execute
+    args: string, arguments to pass to command (or None)
+    out: string, file name to open for stdout (or None)
+    err: string, file name to open for stderr (or None)
+    timeout: int, time out in seconds
+  Returns:
+    return code of running command (forced EXIT_TIMEOUT on timeout)
+  """
+  cmd = 'exec ' + cmd  # preserve pid
+  if args != None:
+    cmd = cmd + ' ' + args
+  outf = None
+  if out != None:
+    outf = open(out, mode='w')
+  errf = None
+  if err != None:
+    errf = open(err, mode='w')
+  proc = subprocess.Popen(cmd, stdout=outf, stderr=errf, shell=True)
+  timer = Timer(timeout, proc.kill)  # enforces timeout
+  timer.start()
+  proc.communicate()
+  if timer.is_alive():
+    timer.cancel()
+    returncode = proc.returncode
+  else:
+    returncode = EXIT_TIMEOUT
+  if outf != None:
+    outf.close()
+  if errf != None:
+    errf.close()
+  return returncode
+
+def GetJackClassPath():
+  """Returns Jack's classpath."""
+  top = os.environ.get('ANDROID_BUILD_TOP')
+  if top == None:
+    raise FatalError('Cannot find AOSP build top')
+  libdir = top + '/out/host/common/obj/JAVA_LIBRARIES'
+  return libdir + '/core-libart-hostdex_intermediates/classes.jack:' \
+       + libdir + '/core-oj-hostdex_intermediates/classes.jack'
+
+def GetExecutionModeRunner(device, mode):
+  """Returns a runner for the given execution mode.
+
+  Args:
+    device: string, target device serial number (or None)
+    mode: string, execution mode
+  Returns:
+    TestRunner with given execution mode
+  Raises:
+    FatalError: error for unknown execution mode
+  """
+  if mode == 'ri':
+    return TestRunnerRIOnHost()
+  if mode == 'hint':
+    return TestRunnerArtOnHost(True)
+  if mode == 'hopt':
+    return TestRunnerArtOnHost(False)
+  if mode == 'tint':
+    return TestRunnerArtOnTarget(device, True)
+  if mode == 'topt':
+    return TestRunnerArtOnTarget(device, False)
+  raise FatalError('Unknown execution mode')
+
+def GetReturnCode(retc):
+  """Returns a string representation of the given normalized return code.
+  Args:
+    retc: int, normalized return code
+  Returns:
+    string representation of normalized return code
+  Raises:
+    FatalError: error for unknown normalized return code
+  """
+  if retc == EXIT_SUCCESS:
+    return 'SUCCESS'
+  if retc == EXIT_TIMEOUT:
+    return 'TIMED-OUT'
+  if retc == EXIT_NOTCOMPILED:
+    return 'NOT-COMPILED'
+  if retc == EXIT_NOTRUN:
+    return 'NOT-RUN'
+  raise FatalError('Unknown normalized return code')
+
+#
+# Execution mode classes.
+#
+
+class TestRunner(object):
+  """Abstraction for running a test in a particular execution mode."""
+  __meta_class__ = abc.ABCMeta
+
+  def GetDescription(self):
+    """Returns a description string of the execution mode."""
+    return self._description
+
+  def GetId(self):
+    """Returns a short string that uniquely identifies the execution mode."""
+    return self._id
+
+  @abc.abstractmethod
+  def CompileAndRunTest(self):
+    """Compile and run the generated test.
+
+    Ensures that the current Test.java in the temporary directory is compiled
+    and executed under the current execution mode. On success, transfers the
+    generated output to the file GetId()_out.txt in the temporary directory.
+    Cleans up after itself.
+
+    Most nonzero return codes are assumed non-divergent, since systems may
+    exit in different ways. This is enforced by normalizing return codes.
+
+    Returns:
+      normalized return code
+    """
+    pass
+
+class TestRunnerRIOnHost(TestRunner):
+  """Concrete test runner of the reference implementation on host."""
+
+  def  __init__(self):
+    """Constructor for the RI tester."""
+    self._description = 'RI on host'
+    self._id = 'RI'
+
+  def CompileAndRunTest(self):
+    if RunCommand('javac', 'Test.java',
+                  out=None, err=None, timeout=30) == EXIT_SUCCESS:
+      retc = RunCommand('java', 'Test', 'RI_run_out.txt', err=None)
+      if retc != EXIT_SUCCESS and retc != EXIT_TIMEOUT:
+        retc = EXIT_NOTRUN
+    else:
+      retc = EXIT_NOTCOMPILED
+    # Cleanup and return.
+    RunCommand('rm', '-f Test.class', out=None, err=None)
+    return retc
+
+class TestRunnerArtOnHost(TestRunner):
+  """Concrete test runner of Art on host (interpreter or optimizing)."""
+
+  def  __init__(self, interpreter):
+    """Constructor for the Art on host tester.
+
+    Args:
+      interpreter: boolean, selects between interpreter or optimizing
+    """
+    self._art_args = '-cp classes.dex Test'
+    if interpreter:
+      self._description = 'Art interpreter on host'
+      self._id = 'HInt'
+      self._art_args = '-Xint ' + self._art_args
+    else:
+      self._description = 'Art optimizing on host'
+      self._id = 'HOpt'
+    self._jack_args = '-cp ' + GetJackClassPath() + ' --output-dex . Test.java'
+
+  def CompileAndRunTest(self):
+    if RunCommand('jack', self._jack_args,
+                  out=None, err='jackerr.txt', timeout=30) == EXIT_SUCCESS:
+      out = self.GetId() + '_run_out.txt'
+      retc = RunCommand('art', self._art_args, out, 'arterr.txt')
+      if retc != EXIT_SUCCESS and retc != EXIT_TIMEOUT:
+        retc = EXIT_NOTRUN
+    else:
+      retc = EXIT_NOTCOMPILED
+    # Cleanup and return.
+    RunCommand('rm', '-rf classes.dex jackerr.txt arterr.txt android-data*',
+               out=None, err=None)
+    return retc
+
+# TODO: very rough first version without proper cache,
+#       reuse staszkiewicz' module for properly setting up dalvikvm on target.
+class TestRunnerArtOnTarget(TestRunner):
+  """Concrete test runner of Art on target (interpreter or optimizing)."""
+
+  def  __init__(self, device, interpreter):
+    """Constructor for the Art on target tester.
+
+    Args:
+      device: string, target device serial number (or None)
+      interpreter: boolean, selects between interpreter or optimizing
+    """
+    self._dalvik_args = 'shell dalvikvm -cp /data/local/tmp/classes.dex Test'
+    if interpreter:
+      self._description = 'Art interpreter on target'
+      self._id = 'TInt'
+      self._dalvik_args = '-Xint ' + self._dalvik_args
+    else:
+      self._description = 'Art optimizing on target'
+      self._id = 'TOpt'
+    self._adb = 'adb'
+    if device != None:
+      self._adb = self._adb + ' -s ' + device
+    self._jack_args = '-cp ' + GetJackClassPath() + ' --output-dex . Test.java'
+
+  def CompileAndRunTest(self):
+    if RunCommand('jack', self._jack_args,
+                  out=None, err='jackerr.txt', timeout=30) == EXIT_SUCCESS:
+      if RunCommand(self._adb, 'push classes.dex /data/local/tmp/',
+                    'adb.txt', err=None) != EXIT_SUCCESS:
+        raise FatalError('Cannot push to target device')
+      out = self.GetId() + '_run_out.txt'
+      retc = RunCommand(self._adb, self._dalvik_args, out, err=None)
+      if retc != EXIT_SUCCESS and retc != EXIT_TIMEOUT:
+        retc = EXIT_NOTRUN
+    else:
+      retc = EXIT_NOTCOMPILED
+    # Cleanup and return.
+    RunCommand('rm', '-f classes.dex jackerr.txt adb.txt',
+               out=None, err=None)
+    RunCommand(self._adb, 'shell rm -f /data/local/tmp/classes.dex',
+               out=None, err=None)
+    return retc
+
+#
+# Tester classes.
+#
+
+class FatalError(Exception):
+  """Fatal error in the tester."""
+  pass
+
+class JavaFuzzTester(object):
+  """Tester that runs JavaFuzz many times and report divergences."""
+
+  def  __init__(self, num_tests, device, mode1, mode2):
+    """Constructor for the tester.
+
+    Args:
+    num_tests: int, number of tests to run
+    device: string, target device serial number (or None)
+    mode1: string, execution mode for first runner
+    mode2: string, execution mode for second runner
+    """
+    self._num_tests = num_tests
+    self._device = device
+    self._runner1 = GetExecutionModeRunner(device, mode1)
+    self._runner2 = GetExecutionModeRunner(device, mode2)
+    self._save_dir = None
+    self._tmp_dir = None
+    # Statistics.
+    self._test = 0
+    self._num_success = 0
+    self._num_not_compiled = 0
+    self._num_not_run = 0
+    self._num_timed_out = 0
+    self._num_divergences = 0
+
+  def __enter__(self):
+    """On entry, enters new temp directory after saving current directory.
+
+    Raises:
+      FatalError: error when temp directory cannot be constructed
+    """
+    self._save_dir = os.getcwd()
+    self._tmp_dir = mkdtemp(dir="/tmp/")
+    if self._tmp_dir == None:
+      raise FatalError('Cannot obtain temp directory')
+    os.chdir(self._tmp_dir)
+    return self
+
+  def __exit__(self, etype, evalue, etraceback):
+    """On exit, re-enters previously saved current directory and cleans up."""
+    os.chdir(self._save_dir)
+    if self._num_divergences == 0:
+      RunCommand('rm', '-rf ' + self._tmp_dir, out=None, err=None)
+
+  def Run(self):
+    """Runs JavaFuzz many times and report divergences."""
+    print
+    print '**\n**** JavaFuzz Testing\n**'
+    print
+    print '#Tests    :', self._num_tests
+    print 'Device    :', self._device
+    print 'Directory :', self._tmp_dir
+    print 'Exec-mode1:', self._runner1.GetDescription()
+    print 'Exec-mode2:', self._runner2.GetDescription()
+    print
+    self.ShowStats()
+    for self._test in range(1, self._num_tests + 1):
+      self.RunJavaFuzzTest()
+      self.ShowStats()
+    if self._num_divergences == 0:
+      print '\n\nsuccess (no divergences)\n'
+    else:
+      print '\n\nfailure (divergences)\n'
+
+  def ShowStats(self):
+    """Shows current statistics (on same line) while tester is running."""
+    print '\rTests:', self._test, \
+        'Success:', self._num_success, \
+        'Not-compiled:', self._num_not_compiled, \
+        'Not-run:', self._num_not_run, \
+        'Timed-out:', self._num_timed_out, \
+        'Divergences:', self._num_divergences,
+    sys.stdout.flush()
+
+  def RunJavaFuzzTest(self):
+    """Runs a single JavaFuzz test, comparing two execution modes."""
+    self.ConstructTest()
+    retc1 = self._runner1.CompileAndRunTest()
+    retc2 = self._runner2.CompileAndRunTest()
+    self.CheckForDivergence(retc1, retc2)
+    self.CleanupTest()
+
+  def ConstructTest(self):
+    """Use JavaFuzz to generate next Test.java test.
+
+    Raises:
+      FatalError: error when javafuzz fails
+    """
+    if RunCommand('javafuzz', args=None,
+                  out='Test.java', err=None) != EXIT_SUCCESS:
+      raise FatalError('Unexpected error while running JavaFuzz')
+
+  def CheckForDivergence(self, retc1, retc2):
+    """Checks for divergences and updates statistics.
+
+    Args:
+      retc1: int, normalized return code of first runner
+      retc2: int, normalized return code of second runner
+    """
+    if retc1 == retc2:
+      # Non-divergent in return code.
+      if retc1 == EXIT_SUCCESS:
+        # Both compilations and runs were successful, inspect generated output.
+        args = self._runner1.GetId() + '_run_out.txt ' \
+            + self._runner2.GetId() + '_run_out.txt'
+        if RunCommand('diff', args, out=None, err=None) != EXIT_SUCCESS:
+          self.ReportDivergence('divergence in output')
+        else:
+          self._num_success += 1
+      elif retc1 == EXIT_TIMEOUT:
+        self._num_timed_out += 1
+      elif retc1 == EXIT_NOTCOMPILED:
+        self._num_not_compiled += 1
+      else:
+        self._num_not_run += 1
+    else:
+      # Divergent in return code.
+      self.ReportDivergence('divergence in return code: ' +
+                            GetReturnCode(retc1) + ' vs. ' +
+                            GetReturnCode(retc2))
+
+  def ReportDivergence(self, reason):
+    """Reports and saves a divergence."""
+    self._num_divergences += 1
+    print '\n', self._test, reason
+    # Save.
+    ddir = 'divergence' + str(self._test)
+    RunCommand('mkdir', ddir, out=None, err=None)
+    RunCommand('mv', 'Test.java *.txt ' + ddir, out=None, err=None)
+
+  def CleanupTest(self):
+    """Cleans up after a single test run."""
+    RunCommand('rm', '-f Test.java *.txt', out=None, err=None)
+
+
+def main():
+  # Handle arguments.
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--num_tests', default=10000,
+                      type=int, help='number of tests to run')
+  parser.add_argument('--device', help='target device serial number')
+  parser.add_argument('--mode1', default='ri',
+                      help='execution mode 1 (default: ri)')
+  parser.add_argument('--mode2', default='hopt',
+                      help='execution mode 2 (default: hopt)')
+  args = parser.parse_args()
+  if args.mode1 == args.mode2:
+    raise FatalError("Identical execution modes given")
+  # Run the JavaFuzz tester.
+  with JavaFuzzTester(args.num_tests, args.device,
+                      args.mode1, args.mode2) as fuzzer:
+    fuzzer.Run()
+
+if __name__ == "__main__":
+  main()
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 81ea79a..cbb6e1d 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -36,6 +36,15 @@
   names: ["libcore.io.OsTest#testUnixDomainSockets_in_file_system"]
 },
 {
+  description: "TCP_USER_TIMEOUT is not defined on host's tcp.h (glibc-2.15-4.8).",
+  result: EXEC_FAILED,
+  modes: [host],
+  names: ["libcore.android.system.OsConstantsTest#testTcpUserTimeoutIsDefined",
+          "libcore.io.OsTest#test_socket_tcpUserTimeout_setAndGet",
+          "libcore.io.OsTest#test_socket_tcpUserTimeout_doesNotWorkOnDatagramSocket"],
+  bug: 30402085
+},
+{
   description: "Issue with incorrect device time (1970)",
   result: EXEC_FAILED,
   modes: [device],
@@ -66,8 +75,7 @@
           "libcore.java.text.SimpleDateFormatTest#testDstZoneNameWithNonDstTimestamp",
           "libcore.java.text.SimpleDateFormatTest#testDstZoneWithNonDstTimestampForNonHourDstZone",
           "libcore.java.text.SimpleDateFormatTest#testNonDstZoneNameWithDstTimestamp",
-          "libcore.java.text.SimpleDateFormatTest#testNonDstZoneWithDstTimestampForNonHourDstZone",
-          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parseLjava_lang_StringLjava_text_ParsePosition"]
+          "libcore.java.text.SimpleDateFormatTest#testNonDstZoneWithDstTimestampForNonHourDstZone"]
 },
 {
   description: "Failing due to missing localhost on hammerhead and volantis.",
@@ -159,17 +167,89 @@
   bug: 22786792
 },
 {
-  description: "Formatting failures",
-  result: EXEC_FAILED,
-  names: ["libcore.java.text.NumberFormatTest#test_currencyFromLocale",
-          "libcore.java.text.NumberFormatTest#test_currencyWithPatternDigits"],
-  bug: 25136848
-},
-{
   description: "Lack of IPv6 on some buildbot slaves",
   result: EXEC_FAILED,
   names: ["libcore.io.OsTest#test_byteBufferPositions_sendto_recvfrom_af_inet6",
           "libcore.io.OsTest#test_sendtoSocketAddress_af_inet6"],
   bug: 25178637
+},
+{
+  description: "Non-deterministic test because of a dependency on weak ref collection.",
+  result: EXEC_FAILED,
+  names: ["org.apache.harmony.tests.java.util.WeakHashMapTest#test_keySet"],
+  bug: 25437292
+},
+{
+  description: "Missing resource in classpath",
+  result: EXEC_FAILED,
+  modes: [device],
+  names: ["libcore.java.util.prefs.OldAbstractPreferencesTest#testClear",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testExportNode",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testExportSubtree",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testGet",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testGetBoolean",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testGetByteArray",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testGetDouble",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testGetFloat",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testGetInt",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testGetLong",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testKeys",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testNodeExists",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testPut",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testPutBoolean",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testPutByteArray",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testPutDouble",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testPutFloat",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testPutInt",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testPutLong",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testRemove",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testRemoveNode",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testSync",
+          "libcore.java.util.prefs.PreferencesTest#testHtmlEncoding",
+          "libcore.java.util.prefs.PreferencesTest#testPreferencesClobbersExistingFiles",
+          "org.apache.harmony.tests.java.util.PropertiesTest#test_storeToXMLLjava_io_OutputStreamLjava_lang_StringLjava_lang_String",
+          "org.apache.harmony.tests.java.util.prefs.AbstractPreferencesTest#testExportNode",
+          "org.apache.harmony.tests.java.util.prefs.AbstractPreferencesTest#testExportSubtree",
+          "org.apache.harmony.tests.java.util.prefs.AbstractPreferencesTest#testFlush",
+          "org.apache.harmony.tests.java.util.prefs.AbstractPreferencesTest#testSync",
+          "org.apache.harmony.tests.java.util.prefs.FilePreferencesImplTest#testPutGet"]
+},
+{
+  description: "Only work with --mode=activity",
+  result: EXEC_FAILED,
+  names: [ "libcore.java.io.FileTest#testJavaIoTmpdirMutable" ]
+},
+{
+  description: "Made for extending, shouldn't be run",
+  result: EXEC_FAILED,
+  names: ["jsr166.CollectionTest#testEmptyMeansEmpty",
+          "jsr166.Collection8Test#testForEach",
+          "jsr166.Collection8Test#testForEachConcurrentStressTest"]
+},
+{
+  description: "Flaky test",
+  result: EXEC_FAILED,
+  bug: 30107038,
+  modes: [device],
+  names: ["org.apache.harmony.tests.java.lang.ProcessTest#test_destroyForcibly"]
+},
+{
+  description: "Flaky failure, native crash in the runtime.
+                Unclear if this relates to the tests running sh as a child process.",
+  result: EXEC_FAILED,
+  bug: 30657148,
+  modes: [device],
+  names: ["libcore.java.lang.ProcessBuilderTest#testRedirectInherit",
+          "libcore.java.lang.ProcessBuilderTest#testRedirect_nullStreams"]
+},
+{
+  description: "BigInteger failures due to the BoringSSL upgrade",
+  result: EXEC_FAILED,
+  bug: 30917411,
+  names: [
+    "org.apache.harmony.tests.java.math.BigIntegerModPowTest#testModPowNegExp",
+    "org.apache.harmony.tests.java.math.BigIntegerModPowTest#testmodInversePos1",
+    "org.apache.harmony.tests.java.math.BigIntegerTest#test_modInverseLjava_math_BigInteger"
+  ]
 }
 ]
diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
new file mode 100644
index 0000000..95f0c2d
--- /dev/null
+++ b/tools/libcore_failures_concurrent_collector.txt
@@ -0,0 +1,20 @@
+/*
+ * This file contains expectations for ART's buildbot's concurrent collector
+ * configurations. The purpose of this file is to temporary and quickly list
+ * failing tests and not break the bots on the CC configurations, until they
+ * are fixed or until the libcore expectation files get properly updated. The
+ * script that uses this file is art/tools/run-libcore-tests.sh.
+ *
+ * It is also used to enable AOSP experiments, and not mess up with CTS's
+ * expectations.
+ */
+
+[
+{
+  description: "Assertion failing on the concurrent collector configuration.",
+  result: EXEC_FAILED,
+  names: ["jsr166.LinkedTransferQueueTest#testTransfer2",
+          "jsr166.LinkedTransferQueueTest#testWaitingConsumer"],
+  bug: 25883050
+}
+]
diff --git a/tools/public.libraries.buildbot.txt b/tools/public.libraries.buildbot.txt
new file mode 100644
index 0000000..4b01796
--- /dev/null
+++ b/tools/public.libraries.buildbot.txt
@@ -0,0 +1,8 @@
+libart.so
+libartd.so
+libbacktrace.so
+libc.so
+libc++.so
+libdl.so
+libm.so
+libnativehelper.so
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index edec362..bdb2d4b 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -19,19 +19,21 @@
   exit 1
 fi
 
-# Jar containing all the tests.
-test_jar=${OUT_DIR-out}/host/linux-x86/framework/apache-harmony-jdwp-tests-hostdex.jar
+if [ -z "$ANDROID_HOST_OUT" ] ; then
+  ANDROID_HOST_OUT=${OUT_DIR-$ANDROID_BUILD_TOP/out}/host/linux-x86
+fi
 
-if [ ! -f $test_jar ]; then
+# Jar containing all the tests.
+test_jack=${ANDROID_HOST_OUT}/../common/obj/JAVA_LIBRARIES/apache-harmony-jdwp-tests-hostdex_intermediates/classes.jack
+
+if [ ! -f $test_jack ]; then
   echo "Before running, you must build jdwp tests and vogar:" \
-       "make apache-harmony-jdwp-tests-hostdex vogar vogar.jar"
+       "make apache-harmony-jdwp-tests-hostdex vogar"
   exit 1
 fi
 
 art="/data/local/tmp/system/bin/art"
 art_debugee="sh /data/local/tmp/system/bin/art"
-# We use Quick's image on target because optimizing's image is not compiled debuggable.
-image="-Ximage:/data/art-test/core.art"
 args=$@
 debuggee_args="-Xcompiler-option --debuggable"
 device_dir="--device-dir=/data/local/tmp"
@@ -41,28 +43,36 @@
 image_compiler_option=""
 debug="no"
 verbose="no"
+image="-Ximage:/data/art-test/core-optimizing-pic.art"
+vm_args=""
 # By default, we run the whole JDWP test suite.
 test="org.apache.harmony.jpda.tests.share.AllTests"
+host="no"
+# Use JIT compiling by default.
+use_jit=true
 
 while true; do
   if [[ "$1" == "--mode=host" ]]; then
+    host="yes"
     # Specify bash explicitly since the art script cannot, since it has to run on the device
     # with mksh.
     art="bash ${OUT_DIR-out}/host/linux-x86/bin/art"
     art_debugee="bash ${OUT_DIR-out}/host/linux-x86/bin/art"
     # We force generation of a new image to avoid build-time and run-time classpath differences.
-    image="-Ximage:/system/non/existent"
+    image="-Ximage:/system/non/existent/vogar.art"
     # We do not need a device directory on host.
     device_dir=""
     # Vogar knows which VM to use on host.
     vm_command=""
-    # We only compile the image on the host. Note that not providing this option
-    # for target testing puts us below the adb command limit for vogar.
-    image_compiler_option="--vm-arg -Ximage-compiler-option --vm-arg --debuggable"
     shift
   elif [[ $1 == -Ximage:* ]]; then
     image="$1"
     shift
+  elif [[ "$1" == "--no-jit" ]]; then
+    use_jit=false
+    # Remove the --no-jit from the arguments.
+    args=${args/$1}
+    shift
   elif [[ $1 == "--debug" ]]; then
     debug="yes"
     # Remove the --debug from the arguments.
@@ -88,7 +98,15 @@
   fi
 done
 
-vm_args="--vm-arg $image"
+if [[ "$image" != "" ]]; then
+  vm_args="--vm-arg $image"
+fi
+if $use_jit; then
+  vm_args="$vm_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=interpret-only"
+  debuggee_args="$debuggee_args -Xcompiler-option --compiler-filter=interpret-only"
+fi
+vm_args="$vm_args --vm-arg -Xusejit:$use_jit"
+debuggee_args="$debuggee_args -Xusejit:$use_jit"
 if [[ $debug == "yes" ]]; then
   art="$art -d"
   art_debugee="$art_debugee -d"
@@ -108,9 +126,21 @@
       $image_compiler_option \
       --timeout 800 \
       --vm-arg -Djpda.settings.verbose=true \
-      --vm-arg -Djpda.settings.syncPort=34016 \
       --vm-arg -Djpda.settings.transportAddress=127.0.0.1:55107 \
-      --vm-arg -Djpda.settings.debuggeeJavaPath="\"$art_debugee $image $debuggee_args\"" \
-      --classpath $test_jar \
+      --vm-arg -Djpda.settings.debuggeeJavaPath="$art_debugee $image $debuggee_args" \
+      --classpath $test_jack \
+      --toolchain jack --language JN \
       --vm-arg -Xcompiler-option --vm-arg --debuggable \
       $test
+
+vogar_exit_status=$?
+
+echo "Killing stalled dalvikvm processes..."
+if [[ $host == "yes" ]]; then
+  pkill -9 -f /bin/dalvikvm
+else
+  adb shell pkill -9 -f /bin/dalvikvm
+fi
+echo "Done."
+
+exit $vogar_exit_status
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index 80f7a37..2a6e172 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -19,17 +19,33 @@
   exit 1
 fi
 
-# Jar containing jsr166 tests.
-jsr166_test_jar=${OUT_DIR-out}/target/common/obj/JAVA_LIBRARIES/jsr166-tests_intermediates/javalib.jar
+if [ -z "$ANDROID_PRODUCT_OUT" ] ; then
+  JAVA_LIBRARIES=out/target/common/obj/JAVA_LIBRARIES
+else
+  JAVA_LIBRARIES=${ANDROID_PRODUCT_OUT}/../../common/obj/JAVA_LIBRARIES
+fi
 
-# Jar containing all the other tests.
-test_jar=${OUT_DIR-out}/target/common/obj/JAVA_LIBRARIES/core-tests_intermediates/javalib.jar
+function cparg {
+  for var
+  do
+    printf -- "--classpath ${JAVA_LIBRARIES}/${var}_intermediates/classes.jack ";
+  done
+}
 
+DEPS="core-tests jsr166-tests mockito-target"
 
-if [ ! -f $test_jar ]; then
-  echo "Before running, you must build core-tests, jsr166-tests and vogar: \
-        make core-tests jsr166-tests vogar vogar.jar"
-  exit 1
+for lib in $DEPS
+do
+  if [ ! -f "${JAVA_LIBRARIES}/${lib}_intermediates/classes.jack" ]; then
+    echo "${lib} is missing. Before running, you must run art/tools/buildbot-build.sh"
+    exit 1
+  fi
+done
+
+expectations="--expectations art/tools/libcore_failures.txt"
+if [ "x$ART_USE_READ_BARRIER" = xtrue ]; then
+  # Tolerate some more failures on the concurrent collector configurations.
+  expectations="$expectations --expectations art/tools/libcore_failures_concurrent_collector.txt"
 fi
 
 emulator="no"
@@ -37,6 +53,9 @@
   emulator="yes"
 fi
 
+# Use JIT compiling by default.
+use_jit=true
+
 # Packages that currently work correctly with the expectation files.
 working_packages=("dalvik.system"
                   "libcore.icu"
@@ -57,7 +76,6 @@
                   "org.apache.harmony.luni"
                   "org.apache.harmony.nio"
                   "org.apache.harmony.regex"
-                  "org.apache.harmony.security"
                   "org.apache.harmony.testframework"
                   "org.apache.harmony.tests.java.io"
                   "org.apache.harmony.tests.java.lang"
@@ -68,6 +86,10 @@
                   "tests.java.lang.String"
                   "jsr166")
 
+# List of packages we could run, but don't have rights to revert
+# changes in case of failures.
+# "org.apache.harmony.security"
+
 vogar_args=$@
 while true; do
   if [[ "$1" == "--mode=device" ]]; then
@@ -80,7 +102,12 @@
     # will create a boot image with the default compiler. Note that
     # giving an existing image on host does not work because of
     # classpath/resources differences when compiling the boot image.
-    vogar_args="$vogar_args --vm-arg -Ximage:/non/existent"
+    vogar_args="$vogar_args --vm-arg -Ximage:/non/existent/vogar.art"
+    shift
+  elif [[ "$1" == "--no-jit" ]]; then
+    # Remove the --no-jit from the arguments.
+    vogar_args=${vogar_args/$1}
+    use_jit=false
     shift
   elif [[ "$1" == "--debug" ]]; then
     # Remove the --debug from the arguments.
@@ -99,7 +126,16 @@
 # the default timeout.
 vogar_args="$vogar_args --timeout 480"
 
+# Use Jack with "1.8" configuration.
+vogar_args="$vogar_args --toolchain jack --language JN"
+
+# JIT settings.
+if $use_jit; then
+  vogar_args="$vogar_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=interpret-only"
+fi
+vogar_args="$vogar_args --vm-arg -Xusejit:$use_jit"
+
 # Run the tests using vogar.
 echo "Running tests for the following test packages:"
 echo ${working_packages[@]} | tr " " "\n"
-vogar $vogar_args --expectations art/tools/libcore_failures.txt --classpath $jsr166_test_jar --classpath $test_jar ${working_packages[@]}
+vogar $vogar_args $expectations $(cparg $DEPS) ${working_packages[@]}
diff --git a/tools/setup-buildbot-device.sh b/tools/setup-buildbot-device.sh
index 7faf86e..1e9c763 100755
--- a/tools/setup-buildbot-device.sh
+++ b/tools/setup-buildbot-device.sh
@@ -30,3 +30,29 @@
 
 echo -e "${green}List properties${nc}"
 adb shell getprop
+
+echo -e "${green}Uptime${nc}"
+adb shell uptime
+
+echo -e "${green}Battery info${nc}"
+adb shell dumpsys battery
+
+echo -e "${green}Setting adb buffer size to 32MB${nc}"
+adb logcat -G 32M
+adb logcat -g
+
+echo -e "${green}Removing adb spam filter${nc}"
+adb logcat -P ""
+adb logcat -p
+
+echo -e "${green}Kill stalled dalvikvm processes${nc}"
+# 'ps' on M can sometimes hang.
+timeout 2s adb shell "ps"
+if [ $? = 124 ]; then
+  echo -e "${green}Rebooting device to fix 'ps'${nc}"
+  adb reboot
+  adb wait-for-device root
+else
+  processes=$(adb shell "ps" | grep dalvikvm | awk '{print $2}')
+  for i in $processes; do adb shell kill -9 $i; done
+fi
diff --git a/tools/symbolize-buildbot-crashes.sh b/tools/symbolize-buildbot-crashes.sh
new file mode 100755
index 0000000..8dc4e27
--- /dev/null
+++ b/tools/symbolize-buildbot-crashes.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# We push art and its dependencies to '/data/local/tmp', but the 'stack'
+# script expect things to be in '/'. So we just remove the
+# '/data/local/tmp' prefix.
+adb logcat -d | sed 's,/data/local/tmp,,g' | development/scripts/stack
+
+# Always return 0 to avoid having the buildbot complain about wrong stacks.
+exit 0